[
  {
    "path": ".git-blame-ignore-revs",
    "content": "# Exclude these commits from git blame (e.g. mass reformatting).\n# These are ignored by GitHub automatically.\n# To enable this locally, run:\n#\n#    git config blame.ignoreRevsFile .git-blame-ignore-revs\n\n3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)\ned3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190\n7b927e79c25f4ddfd18a067f489e122acd2c89de  # chore(format): format files where `ruff` and `black` agree (#9339)\n"
  },
  {
    "path": ".github/CODEOWNERS",
    "content": "* @onyx-dot-app/onyx-core-team\n# Helm charts Owners\n/helm/ @justin-tahara\n\n# Web standards updates\n/web/STANDARDS.md @raunakab @Weves\n\n# Agent context files\n/CLAUDE.md @Weves\n/AGENTS.md @Weves\n\n# Beta cherry-pick workflow owners\n/.github/workflows/post-merge-beta-cherry-pick.yml @justin-tahara @jmelahman\n"
  },
  {
    "path": ".github/actionlint.yml",
    "content": "self-hosted-runner:\n  # Labels of self-hosted runner in array of strings.\n  labels:\n    - extras=ecr-cache\n    - extras=s3-cache\n    - hdd=256\n    - runs-on\n    - runner=1cpu-linux-arm64\n    - runner=1cpu-linux-x64\n    - runner=2cpu-linux-arm64\n    - runner=2cpu-linux-x64\n    - runner=4cpu-linux-arm64\n    - runner=4cpu-linux-x64\n    - runner=8cpu-linux-arm64\n    - runner=8cpu-linux-x64\n    - runner=16cpu-linux-arm64\n    - runner=16cpu-linux-x64\n    - ubuntu-slim # Currently in public preview\n    - volume=40gb\n    - volume=50gb\n\n# Configuration variables in array of strings defined in your repository or\n# organization. `null` means disabling configuration variables check.\n# Empty array means no configuration variable is allowed.\nconfig-variables: null\n\n# Configuration for file paths. The keys are glob patterns to match to file\n# paths relative to the repository root. The values are the configurations for\n# the file paths. Note that the path separator is always '/'.\n# The following configurations are available.\n#\n# \"ignore\" is an array of regular expression patterns. Matched error messages\n# are ignored. This is similar to the \"-ignore\" command line option.\npaths:\n  # Glob pattern relative to the repository root for matching files. The path separator is always '/'.\n  # This example configures any YAML file under the '.github/workflows/' directory.\n  .github/workflows/**/*.{yml,yaml}:\n    # TODO: These are real and should be fixed eventually.\n    ignore:\n      - 'shellcheck reported issue in this script: SC2038:.+'\n      - 'shellcheck reported issue in this script: SC2046:.+'\n      - 'shellcheck reported issue in this script: SC2086:.+'\n      - 'shellcheck reported issue in this script: SC2193:.+'\n"
  },
  {
    "path": ".github/actions/build-backend-image/action.yml",
    "content": "name: \"Build Backend Image\"\ndescription: \"Builds and pushes the backend Docker image with cache reuse\"\ninputs:\n  runs-on-ecr-cache:\n    description: \"ECR cache registry from runs-on/action\"\n    required: true\n  ref-name:\n    description: \"Git ref name used for cache suffix fallback\"\n    required: true\n  pr-number:\n    description: \"Optional PR number for cache suffix\"\n    required: false\n    default: \"\"\n  github-sha:\n    description: \"Commit SHA used for cache keys\"\n    required: true\n  run-id:\n    description: \"GitHub run ID used in output image tag\"\n    required: true\n  docker-username:\n    description: \"Docker Hub username\"\n    required: true\n  docker-token:\n    description: \"Docker Hub token\"\n    required: true\n  docker-no-cache:\n    description: \"Set to 'true' to disable docker build cache\"\n    required: false\n    default: \"false\"\nruns:\n  using: \"composite\"\n  steps:\n    - name: Format branch name for cache\n      id: format-branch\n      shell: bash\n      env:\n        PR_NUMBER: ${{ inputs.pr-number }}\n        REF_NAME: ${{ inputs.ref-name }}\n      run: |\n        if [ -n \"${PR_NUMBER}\" ]; then\n          CACHE_SUFFIX=\"${PR_NUMBER}\"\n        else\n          # shellcheck disable=SC2001\n          CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n        fi\n        echo \"cache-suffix=${CACHE_SUFFIX}\" >> \"$GITHUB_OUTPUT\"\n\n    - name: Set up Docker Buildx\n      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n    - name: Login to Docker Hub\n      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n      with:\n        username: ${{ inputs.docker-username }}\n        password: ${{ inputs.docker-token }}\n\n    - name: Build and push Backend Docker image\n      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n      with:\n        context: ./backend\n        file: ./backend/Dockerfile\n        push: true\n        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-backend-${{ inputs.run-id }}\n        cache-from: |\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }}\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache\n          type=registry,ref=onyxdotapp/onyx-backend:latest\n        cache-to: |\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }},mode=max\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache,mode=max\n        no-cache: ${{ inputs.docker-no-cache == 'true' }}\n"
  },
  {
    "path": ".github/actions/build-integration-image/action.yml",
    "content": "name: \"Build Integration Image\"\ndescription: \"Builds and pushes the integration test image with docker bake\"\ninputs:\n  runs-on-ecr-cache:\n    description: \"ECR cache registry from runs-on/action\"\n    required: true\n  ref-name:\n    description: \"Git ref name used for cache suffix fallback\"\n    required: true\n  pr-number:\n    description: \"Optional PR number for cache suffix\"\n    required: false\n    default: \"\"\n  github-sha:\n    description: \"Commit SHA used for cache keys\"\n    required: true\n  run-id:\n    description: \"GitHub run ID used in output image tag\"\n    required: true\n  docker-username:\n    description: \"Docker Hub username\"\n    required: true\n  docker-token:\n    description: \"Docker Hub token\"\n    required: true\nruns:\n  using: \"composite\"\n  steps:\n    - name: Set up Docker Buildx\n      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n    - name: Login to Docker Hub\n      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n      with:\n        username: ${{ inputs.docker-username }}\n        password: ${{ inputs.docker-token }}\n\n    - name: Format branch name for cache\n      id: format-branch\n      shell: bash\n      env:\n        PR_NUMBER: ${{ inputs.pr-number }}\n        REF_NAME: ${{ inputs.ref-name }}\n      run: |\n        if [ -n \"${PR_NUMBER}\" ]; then\n          CACHE_SUFFIX=\"${PR_NUMBER}\"\n        else\n          # shellcheck disable=SC2001\n          CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n        fi\n        echo \"cache-suffix=${CACHE_SUFFIX}\" >> \"$GITHUB_OUTPUT\"\n\n    - name: Build and push integration test image with Docker Bake\n      shell: bash\n      env:\n        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}\n        INTEGRATION_REPOSITORY: ${{ inputs.runs-on-ecr-cache }}\n        TAG: nightly-llm-it-${{ inputs.run-id }}\n        CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}\n        HEAD_SHA: ${{ inputs.github-sha }}\n      run: |\n        docker buildx bake --push \\\n          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \\\n          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \\\n          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \\\n          --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \\\n          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \\\n          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \\\n          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \\\n          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \\\n          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \\\n          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \\\n          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \\\n          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \\\n          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \\\n          integration\n"
  },
  {
    "path": ".github/actions/build-model-server-image/action.yml",
    "content": "name: \"Build Model Server Image\"\ndescription: \"Builds and pushes the model server Docker image with cache reuse\"\ninputs:\n  runs-on-ecr-cache:\n    description: \"ECR cache registry from runs-on/action\"\n    required: true\n  ref-name:\n    description: \"Git ref name used for cache suffix fallback\"\n    required: true\n  pr-number:\n    description: \"Optional PR number for cache suffix\"\n    required: false\n    default: \"\"\n  github-sha:\n    description: \"Commit SHA used for cache keys\"\n    required: true\n  run-id:\n    description: \"GitHub run ID used in output image tag\"\n    required: true\n  docker-username:\n    description: \"Docker Hub username\"\n    required: true\n  docker-token:\n    description: \"Docker Hub token\"\n    required: true\nruns:\n  using: \"composite\"\n  steps:\n    - name: Format branch name for cache\n      id: format-branch\n      shell: bash\n      env:\n        PR_NUMBER: ${{ inputs.pr-number }}\n        REF_NAME: ${{ inputs.ref-name }}\n      run: |\n        if [ -n \"${PR_NUMBER}\" ]; then\n          CACHE_SUFFIX=\"${PR_NUMBER}\"\n        else\n          # shellcheck disable=SC2001\n          CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n        fi\n        echo \"cache-suffix=${CACHE_SUFFIX}\" >> \"$GITHUB_OUTPUT\"\n\n    - name: Set up Docker Buildx\n      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n    - name: Login to Docker Hub\n      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n      with:\n        username: ${{ inputs.docker-username }}\n        password: ${{ inputs.docker-token }}\n\n    - name: Build and push Model Server Docker image\n      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n      with:\n        context: ./backend\n        file: ./backend/Dockerfile.model_server\n        push: true\n        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-model-server-${{ inputs.run-id }}\n        cache-from: |\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }}\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache\n          type=registry,ref=onyxdotapp/onyx-model-server:latest\n        cache-to: |\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }},mode=max\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache,mode=max\n"
  },
  {
    "path": ".github/actions/run-nightly-provider-chat-test/action.yml",
    "content": "name: \"Run Nightly Provider Chat Test\"\ndescription: \"Starts required compose services and runs nightly provider integration test\"\ninputs:\n  provider:\n    description: \"Provider slug for NIGHTLY_LLM_PROVIDER\"\n    required: true\n  models:\n    description: \"Comma-separated model list for NIGHTLY_LLM_MODELS\"\n    required: true\n  provider-api-key:\n    description: \"API key for NIGHTLY_LLM_API_KEY\"\n    required: false\n    default: \"\"\n  strict:\n    description: \"String true/false for NIGHTLY_LLM_STRICT\"\n    required: true\n  api-base:\n    description: \"Optional NIGHTLY_LLM_API_BASE\"\n    required: false\n    default: \"\"\n  api-version:\n    description: \"Optional NIGHTLY_LLM_API_VERSION\"\n    required: false\n    default: \"\"\n  deployment-name:\n    description: \"Optional NIGHTLY_LLM_DEPLOYMENT_NAME\"\n    required: false\n    default: \"\"\n  custom-config-json:\n    description: \"Optional NIGHTLY_LLM_CUSTOM_CONFIG_JSON\"\n    required: false\n    default: \"\"\n  runs-on-ecr-cache:\n    description: \"ECR cache registry from runs-on/action\"\n    required: true\n  run-id:\n    description: \"GitHub run ID used in image tags\"\n    required: true\n  docker-username:\n    description: \"Docker Hub username\"\n    required: true\n  docker-token:\n    description: \"Docker Hub token\"\n    required: true\nruns:\n  using: \"composite\"\n  steps:\n    - name: Login to Docker Hub\n      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n      with:\n        username: ${{ inputs.docker-username }}\n        password: ${{ inputs.docker-token }}\n\n    - name: Create .env file for Docker Compose\n      shell: bash\n      env:\n        ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}\n        RUN_ID: ${{ inputs.run-id }}\n      run: |\n        cat <<EOF2 > deployment/docker_compose/.env\n        COMPOSE_PROFILES=s3-filestore\n        ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n        LICENSE_ENFORCEMENT_ENABLED=false\n        AUTH_TYPE=basic\n        POSTGRES_POOL_PRE_PING=true\n        POSTGRES_USE_NULL_POOL=true\n        REQUIRE_EMAIL_VERIFICATION=false\n        DISABLE_TELEMETRY=true\n        INTEGRATION_TESTS_MODE=true\n        AUTO_LLM_UPDATE_INTERVAL_SECONDS=10\n        AWS_REGION_NAME=us-west-2\n        ONYX_BACKEND_IMAGE=${ECR_CACHE}:nightly-llm-it-backend-${RUN_ID}\n        ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:nightly-llm-it-model-server-${RUN_ID}\n        EOF2\n\n    - name: Start Docker containers\n      shell: bash\n      run: |\n        cd deployment/docker_compose\n        docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait \\\n          relational_db \\\n          index \\\n          cache \\\n          minio \\\n          api_server \\\n          inference_model_server\n\n    - name: Run nightly provider integration test\n      uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3\n      env:\n        MODELS: ${{ inputs.models }}\n        NIGHTLY_LLM_PROVIDER: ${{ inputs.provider }}\n        NIGHTLY_LLM_API_KEY: ${{ inputs.provider-api-key }}\n        NIGHTLY_LLM_API_BASE: ${{ inputs.api-base }}\n        NIGHTLY_LLM_API_VERSION: ${{ inputs.api-version }}\n        NIGHTLY_LLM_DEPLOYMENT_NAME: ${{ inputs.deployment-name }}\n        NIGHTLY_LLM_CUSTOM_CONFIG_JSON: ${{ inputs.custom-config-json }}\n        NIGHTLY_LLM_STRICT: ${{ inputs.strict }}\n        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}\n        RUN_ID: ${{ inputs.run-id }}\n      with:\n        timeout_minutes: 20\n        max_attempts: 2\n        retry_wait_seconds: 10\n        command: |\n          docker run --rm --network onyx_default \\\n            --name test-runner \\\n            -e POSTGRES_HOST=relational_db \\\n            -e POSTGRES_USER=postgres \\\n            -e POSTGRES_PASSWORD=password \\\n            -e POSTGRES_DB=postgres \\\n            -e DB_READONLY_USER=db_readonly_user \\\n            -e DB_READONLY_PASSWORD=password \\\n            -e POSTGRES_POOL_PRE_PING=true \\\n            -e POSTGRES_USE_NULL_POOL=true \\\n            -e VESPA_HOST=index \\\n            -e REDIS_HOST=cache \\\n            -e API_SERVER_HOST=api_server \\\n            -e TEST_WEB_HOSTNAME=test-runner \\\n            -e AWS_REGION_NAME=us-west-2 \\\n            -e NIGHTLY_LLM_PROVIDER=\"${NIGHTLY_LLM_PROVIDER}\" \\\n            -e NIGHTLY_LLM_MODELS=\"${MODELS}\" \\\n            -e NIGHTLY_LLM_API_KEY=\"${NIGHTLY_LLM_API_KEY}\" \\\n            -e NIGHTLY_LLM_API_BASE=\"${NIGHTLY_LLM_API_BASE}\" \\\n            -e NIGHTLY_LLM_API_VERSION=\"${NIGHTLY_LLM_API_VERSION}\" \\\n            -e NIGHTLY_LLM_DEPLOYMENT_NAME=\"${NIGHTLY_LLM_DEPLOYMENT_NAME}\" \\\n            -e NIGHTLY_LLM_CUSTOM_CONFIG_JSON=\"${NIGHTLY_LLM_CUSTOM_CONFIG_JSON}\" \\\n            -e NIGHTLY_LLM_STRICT=\"${NIGHTLY_LLM_STRICT}\" \\\n            ${RUNS_ON_ECR_CACHE}:nightly-llm-it-${RUN_ID} \\\n            /app/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py\n"
  },
  {
    "path": ".github/actions/setup-playwright/action.yml",
    "content": "name: \"Setup Playwright\"\ndescription: \"Sets up Playwright and system deps (assumes Python and Playwright are installed)\"\nruns:\n  using: \"composite\"\n  steps:\n    - name: Cache playwright cache\n      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4\n      with:\n        path: ~/.cache/ms-playwright\n        key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}\n        restore-keys: |\n          ${{ runner.os }}-${{ runner.arch }}-playwright-\n\n    - name: Install playwright\n      shell: bash\n      run: |\n        playwright install chromium --with-deps\n"
  },
  {
    "path": ".github/actions/setup-python-and-install-dependencies/action.yml",
    "content": "name: \"Setup Python and Install Dependencies\"\ndescription: \"Sets up Python with uv and installs deps\"\ninputs:\n  requirements:\n    description: \"Newline-separated list of requirement files to install (relative to repo root)\"\n    required: true\nruns:\n  using: \"composite\"\n  steps:\n    - name: Compute requirements hash\n      id: req-hash\n      shell: bash\n      env:\n        REQUIREMENTS: ${{ inputs.requirements }}\n      run: |\n        # Hash the contents of the specified requirement files\n        hash=\"\"\n        while IFS= read -r req; do\n          if [ -n \"$req\" ] && [ -f \"$req\" ]; then\n            hash=\"$hash$(sha256sum \"$req\")\"\n          fi\n        done <<< \"$REQUIREMENTS\"\n        echo \"hash=$(echo \"$hash\" | sha256sum | cut -d' ' -f1)\" >> \"$GITHUB_OUTPUT\"\n\n    # NOTE: This comes before Setup uv since clean-ups run in reverse chronological order\n    # such that Setup uv's prune-cache is able to prune the cache before we upload.\n    - name: Cache uv cache directory\n      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4\n      with:\n        path: ~/.cache/uv\n        key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}\n        restore-keys: |\n          ${{ runner.os }}-uv-\n\n    - name: Setup uv\n      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7\n      with:\n        version: \"0.9.9\"\n      # TODO: Enable caching once there is a uv.lock file checked in.\n      # with:\n      #   enable-cache: true\n\n    - name: Setup Python\n      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5\n      with:\n        python-version: \"3.11\"\n\n    - name: Create virtual environment\n      shell: bash\n      env:\n        VENV_DIR: ${{ runner.temp }}/venv\n      run: | # zizmor: ignore[github-env]\n        uv venv \"$VENV_DIR\"\n        # Validate path before adding to GITHUB_PATH to prevent code injection\n        if [ -d \"$VENV_DIR/bin\" ]; then\n          realpath \"$VENV_DIR/bin\" >> \"$GITHUB_PATH\"\n        else\n          echo \"Error: $VENV_DIR/bin does not exist\"\n          exit 1\n        fi\n\n    - name: Install Python dependencies with uv\n      shell: bash\n      env:\n        REQUIREMENTS: ${{ inputs.requirements }}\n      run: |\n        # Build the uv pip install command with each requirement file as array elements\n        cmd=(\"uv\" \"pip\" \"install\")\n        while IFS= read -r req; do\n          # Skip empty lines\n          if [ -n \"$req\" ]; then\n            cmd+=(\"-r\" \"$req\")\n          fi\n        done <<< \"$REQUIREMENTS\"\n        echo \"Running: ${cmd[*]}\"\n        \"${cmd[@]}\"\n"
  },
  {
    "path": ".github/actions/slack-notify/action.yml",
    "content": "name: \"Slack Notify\"\ndescription: \"Sends a Slack notification for workflow events\"\ninputs:\n  webhook-url:\n    description: \"Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)\"\n    required: false\n  details:\n    description: \"Additional message body content\"\n    required: false\n  failed-jobs:\n    description: \"Deprecated alias for details\"\n    required: false\n  mention:\n    description: \"GitHub username to resolve to a Slack @-mention. Replaces {mention} in details.\"\n    required: false\n  title:\n    description: \"Title for the notification\"\n    required: false\n    default: \"🚨 Workflow Failed\"\n  ref-name:\n    description: \"Git ref name (tag/branch)\"\n    required: false\nruns:\n  using: \"composite\"\n  steps:\n    - name: Send Slack notification\n      shell: bash\n      env:\n        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}\n        DETAILS: ${{ inputs.details }}\n        FAILED_JOBS: ${{ inputs.failed-jobs }}\n        MENTION_USER: ${{ inputs.mention }}\n        TITLE: ${{ inputs.title }}\n        REF_NAME: ${{ inputs.ref-name }}\n        REPO: ${{ github.repository }}\n        WORKFLOW: ${{ github.workflow }}\n        RUN_NUMBER: ${{ github.run_number }}\n        RUN_ID: ${{ github.run_id }}\n        SERVER_URL: ${{ github.server_url }}\n        GITHUB_REF_NAME: ${{ github.ref_name }}\n      run: |\n        if [ -z \"$SLACK_WEBHOOK_URL\" ]; then\n          echo \"webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification\"\n          exit 0\n        fi\n\n        # Build workflow URL\n        WORKFLOW_URL=\"${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}\"\n\n        # Use ref_name from input or fall back to github.ref_name\n        if [ -z \"$REF_NAME\" ]; then\n          REF_NAME=\"$GITHUB_REF_NAME\"\n        fi\n\n        if [ -z \"$DETAILS\" ]; then\n          DETAILS=\"$FAILED_JOBS\"\n        fi\n\n        # Resolve {mention} placeholder if a GitHub username was provided.\n        # Looks up the username in user-mappings.json (co-located with this action)\n        # and replaces {mention} with <@SLACK_ID> for a Slack @-mention.\n        # Falls back to the plain GitHub username if not found in the mapping.\n        if [ -n \"$MENTION_USER\" ]; then\n          MAPPINGS_FILE=\"${GITHUB_ACTION_PATH}/user-mappings.json\"\n          slack_id=\"$(jq -r --arg gh \"$MENTION_USER\" 'to_entries[] | select(.value | ascii_downcase == ($gh | ascii_downcase)) | .key' \"$MAPPINGS_FILE\" 2>/dev/null | head -1)\"\n\n          if [ -n \"$slack_id\" ]; then\n            mention_text=\"<@${slack_id}>\"\n          else\n            mention_text=\"${MENTION_USER}\"\n          fi\n\n          DETAILS=\"${DETAILS//\\{mention\\}/$mention_text}\"\n          TITLE=\"${TITLE//\\{mention\\}/}\"\n        else\n          DETAILS=\"${DETAILS//\\{mention\\}/}\"\n          TITLE=\"${TITLE//\\{mention\\}/}\"\n        fi\n\n        normalize_multiline() {\n          printf '%s' \"$1\" | awk 'BEGIN { ORS=\"\"; first=1 } { if (!first) printf \"\\\\n\"; printf \"%s\", $0; first=0 }'\n        }\n\n        DETAILS=\"$(normalize_multiline \"$DETAILS\")\"\n        REF_NAME=\"$(normalize_multiline \"$REF_NAME\")\"\n        TITLE=\"$(normalize_multiline \"$TITLE\")\"\n\n        # Escape JSON special characters\n        escape_json() {\n          local input=\"$1\"\n          # Escape backslashes first (but preserve \\n sequences)\n          # Protect \\n sequences temporarily\n          input=$(printf '%s' \"$input\" | sed 's/\\\\n/\\x01NL\\x01/g')\n          # Escape remaining backslashes\n          input=$(printf '%s' \"$input\" | sed 's/\\\\/\\\\\\\\/g')\n          # Restore \\n sequences (single backslash, will be correct in JSON)\n          input=$(printf '%s' \"$input\" | sed 's/\\x01NL\\x01/\\\\n/g')\n          # Escape quotes\n          printf '%s' \"$input\" | sed 's/\"/\\\\\"/g'\n        }\n\n        REF_NAME_ESC=$(escape_json \"$REF_NAME\")\n        DETAILS_ESC=$(escape_json \"$DETAILS\")\n        WORKFLOW_URL_ESC=$(escape_json \"$WORKFLOW_URL\")\n        TITLE_ESC=$(escape_json \"$TITLE\")\n\n        # Build JSON payload piece by piece\n        # Note: DETAILS_ESC already contains \\n sequences that should remain as \\n in JSON\n        PAYLOAD=\"{\"\n        PAYLOAD=\"${PAYLOAD}\\\"text\\\":\\\"${TITLE_ESC}\\\",\"\n        PAYLOAD=\"${PAYLOAD}\\\"blocks\\\":[{\"\n        PAYLOAD=\"${PAYLOAD}\\\"type\\\":\\\"header\\\",\"\n        PAYLOAD=\"${PAYLOAD}\\\"text\\\":{\\\"type\\\":\\\"plain_text\\\",\\\"text\\\":\\\"${TITLE_ESC}\\\"}\"\n        PAYLOAD=\"${PAYLOAD}},{\"\n        PAYLOAD=\"${PAYLOAD}\\\"type\\\":\\\"section\\\",\"\n        PAYLOAD=\"${PAYLOAD}\\\"fields\\\":[\"\n        if [ -n \"$REF_NAME\" ]; then\n          PAYLOAD=\"${PAYLOAD}{\\\"type\\\":\\\"mrkdwn\\\",\\\"text\\\":\\\"*Ref:*\\\\n${REF_NAME_ESC}\\\"},\"\n        fi\n        PAYLOAD=\"${PAYLOAD}{\\\"type\\\":\\\"mrkdwn\\\",\\\"text\\\":\\\"*Run ID:*\\\\n#${RUN_NUMBER}\\\"}\"\n        PAYLOAD=\"${PAYLOAD}]\"\n        PAYLOAD=\"${PAYLOAD}}\"\n        if [ -n \"$DETAILS\" ]; then\n          PAYLOAD=\"${PAYLOAD},{\"\n          PAYLOAD=\"${PAYLOAD}\\\"type\\\":\\\"section\\\",\"\n          PAYLOAD=\"${PAYLOAD}\\\"text\\\":{\\\"type\\\":\\\"mrkdwn\\\",\\\"text\\\":\\\"${DETAILS_ESC}\\\"}\"\n          PAYLOAD=\"${PAYLOAD}}\"\n        fi\n        PAYLOAD=\"${PAYLOAD},{\"\n        PAYLOAD=\"${PAYLOAD}\\\"type\\\":\\\"actions\\\",\"\n        PAYLOAD=\"${PAYLOAD}\\\"elements\\\":[{\"\n        PAYLOAD=\"${PAYLOAD}\\\"type\\\":\\\"button\\\",\"\n        PAYLOAD=\"${PAYLOAD}\\\"text\\\":{\\\"type\\\":\\\"plain_text\\\",\\\"text\\\":\\\"View Workflow Run\\\"},\"\n        PAYLOAD=\"${PAYLOAD}\\\"url\\\":\\\"${WORKFLOW_URL_ESC}\\\"\"\n        PAYLOAD=\"${PAYLOAD}}]\"\n        PAYLOAD=\"${PAYLOAD}}\"\n        PAYLOAD=\"${PAYLOAD}]\"\n        PAYLOAD=\"${PAYLOAD}}\"\n\n        curl -X POST -H 'Content-type: application/json' \\\n          --data \"$PAYLOAD\" \\\n          \"$SLACK_WEBHOOK_URL\"\n"
  },
  {
    "path": ".github/actions/slack-notify/user-mappings.json",
    "content": "{\n  \"U05SAGZPEA1\": \"yuhongsun96\",\n  \"U05SAH6UGUD\": \"Weves\",\n  \"U07PWEQB7A5\": \"evan-onyx\",\n  \"U07V1SM68KF\": \"joachim-danswer\",\n  \"U08JZ9N3QNN\": \"raunakab\",\n  \"U08L24NCLJE\": \"Subash-Mohan\",\n  \"U090B9M07B2\": \"wenxi-onyx\",\n  \"U094RASDP0Q\": \"duo-onyx\",\n  \"U096L8ZQ85B\": \"justin-tahara\",\n  \"U09AHV8UBQX\": \"jessicasingh7\",\n  \"U09KAL5T3C2\": \"nmgarza5\",\n  \"U09KPGVQ70R\": \"acaprau\",\n  \"U09QR8KTSJH\": \"rohoswagger\",\n  \"U09RB4NTXA4\": \"jmelahman\",\n  \"U0A6K9VCY6A\": \"Danelegend\",\n  \"U0AGC4KH71A\": \"Bo-Onyx\"\n}\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n    cooldown:\n      default-days: 7\n    open-pull-requests-limit: 3\n    assignees:\n      - \"jmelahman\"\n    labels:\n      - \"dependabot:actions\"\n  - package-ecosystem: \"pip\"\n    directory: \"/backend\"\n    schedule:\n      interval: \"weekly\"\n    cooldown:\n      default-days: 7\n    open-pull-requests-limit: 3\n    assignees:\n      - \"jmelahman\"\n    labels:\n      - \"dependabot:python\"\n"
  },
  {
    "path": ".github/pull_request_template.md",
    "content": "## Description\n\n<!--- Provide a brief description of the changes in this PR --->\n\n## How Has This Been Tested?\n\n<!--- Describe the tests you ran to verify your changes --->\n\n## Additional Options\n\n- [ ] [Optional] Please cherry-pick this PR to the latest release version.\n- [ ] [Optional] Override Linear Check\n"
  },
  {
    "path": ".github/runs-on.yml",
    "content": "_extend: .github-private\n"
  },
  {
    "path": ".github/workflows/deployment.yml",
    "content": "name: Build and Push Docker Images on Tag\n\non:\n  push:\n    tags:\n      - \"*\"\n  workflow_dispatch:\n\n# Set restrictive default permissions for all jobs. Jobs that need more permissions\n# should explicitly declare them.\npermissions:\n  # Required for OIDC authentication with AWS\n  id-token: write # zizmor: ignore[excessive-permissions]\n\nenv:\n  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}\n\njobs:\n  # Determine which components to build based on the tag\n  determine-builds:\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 90\n    outputs:\n      build-desktop: ${{ steps.check.outputs.build-desktop }}\n      build-web: ${{ steps.check.outputs.build-web }}\n      build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}\n      build-backend: ${{ steps.check.outputs.build-backend }}\n      build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}\n      build-model-server: ${{ steps.check.outputs.build-model-server }}\n      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}\n      is-beta: ${{ steps.check.outputs.is-beta }}\n      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}\n      is-latest: ${{ steps.check.outputs.is-latest }}\n      is-test-run: ${{ steps.check.outputs.is-test-run }}\n      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}\n      short-sha: ${{ steps.check.outputs.short-sha }}\n    steps:\n      - name: Checkout (for git tags)\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n          fetch-depth: 0\n          fetch-tags: true\n\n      - name: Setup uv\n        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          version: \"0.9.9\"\n          enable-cache: false\n\n      - name: Check which components to build and version info\n        id: check\n        env:\n          EVENT_NAME: ${{ github.event_name }}\n        run: |\n          set -eo pipefail\n          TAG=\"${GITHUB_REF_NAME}\"\n          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)\n          SANITIZED_TAG=$(echo \"$TAG\" | tr '/' '-')\n          SHORT_SHA=\"${GITHUB_SHA::7}\"\n\n          # Initialize all flags to false\n          IS_CLOUD=false\n          IS_NIGHTLY=false\n          IS_VERSION_TAG=false\n          IS_STABLE=false\n          IS_BETA=false\n          IS_BETA_STANDALONE=false\n          IS_LATEST=false\n          IS_PROD_TAG=false\n          IS_TEST_RUN=false\n          BUILD_DESKTOP=false\n          BUILD_WEB=false\n          BUILD_WEB_CLOUD=false\n          BUILD_BACKEND=true\n          BUILD_BACKEND_CRAFT=false\n          BUILD_MODEL_SERVER=true\n\n          # Determine tag type based on pattern matching (do regex checks once)\n          if [[ \"$TAG\" == *cloud* ]]; then\n            IS_CLOUD=true\n          fi\n          if [[ \"$TAG\" == nightly* ]]; then\n            IS_NIGHTLY=true\n          fi\n          if [[ \"$TAG\" =~ ^v[0-9]+\\.[0-9]+\\.[0-9]+ ]]; then\n            IS_VERSION_TAG=true\n          fi\n          if [[ \"$TAG\" =~ ^v[0-9]+\\.[0-9]+\\.[0-9]+$ ]]; then\n            IS_STABLE=true\n          fi\n          if [[ \"$TAG\" =~ ^v[0-9]+\\.[0-9]+\\.[0-9]+-beta(\\.[0-9]+)?$ ]]; then\n            IS_BETA=true\n          fi\n\n          # Determine what to build based on tag type\n          if [[ \"$IS_CLOUD\" == \"true\" ]]; then\n            BUILD_WEB_CLOUD=true\n          else\n            BUILD_WEB=true\n            # Only build desktop for semver tags (excluding beta)\n            if [[ \"$IS_VERSION_TAG\" == \"true\" ]] && [[ \"$IS_BETA\" != \"true\" ]]; then\n              BUILD_DESKTOP=true\n            fi\n          fi\n\n          # Standalone version checks (for backend/model-server - version excluding cloud tags)\n          if [[ \"$IS_BETA\" == \"true\" ]] && [[ \"$IS_CLOUD\" != \"true\" ]]; then\n            IS_BETA_STANDALONE=true\n          fi\n\n          # Determine if this tag should get the \"latest\" Docker tag.\n          # Only the highest semver stable tag (vX.Y.Z exactly) gets \"latest\".\n          if [[ \"$IS_STABLE\" == \"true\" ]]; then\n            HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {\n              echo \"::error::Failed to determine highest stable tag via 'ods latest-stable-tag'\"\n              exit 1\n            }\n            if [[ \"$TAG\" == \"$HIGHEST_STABLE\" ]]; then\n              IS_LATEST=true\n            fi\n          fi\n\n          # Build craft-latest backend alongside the regular latest.\n          if [[ \"$IS_LATEST\" == \"true\" ]]; then\n            BUILD_BACKEND_CRAFT=true\n          fi\n\n          # Determine if this is a production tag\n          # Production tags are: version tags (v1.2.3*) or nightly tags\n          if [[ \"$IS_VERSION_TAG\" == \"true\" ]] || [[ \"$IS_NIGHTLY\" == \"true\" ]]; then\n            IS_PROD_TAG=true\n          fi\n\n          # Determine if this is a test run (workflow_dispatch on non-production ref)\n          if [[ \"$EVENT_NAME\" == \"workflow_dispatch\" ]] && [[ \"$IS_PROD_TAG\" != \"true\" ]]; then\n            IS_TEST_RUN=true\n          fi\n          {\n            echo \"build-desktop=$BUILD_DESKTOP\"\n            echo \"build-web=$BUILD_WEB\"\n            echo \"build-web-cloud=$BUILD_WEB_CLOUD\"\n            echo \"build-backend=$BUILD_BACKEND\"\n            echo \"build-backend-craft=$BUILD_BACKEND_CRAFT\"\n            echo \"build-model-server=$BUILD_MODEL_SERVER\"\n            echo \"is-cloud-tag=$IS_CLOUD\"\n            echo \"is-beta=$IS_BETA\"\n            echo \"is-beta-standalone=$IS_BETA_STANDALONE\"\n            echo \"is-latest=$IS_LATEST\"\n            echo \"is-test-run=$IS_TEST_RUN\"\n            echo \"sanitized-tag=$SANITIZED_TAG\"\n            echo \"short-sha=$SHORT_SHA\"\n          } >> \"$GITHUB_OUTPUT\"\n\n  check-version-tag:\n    runs-on: ubuntu-slim\n    timeout-minutes: 10\n    if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n          fetch-depth: 0\n\n      - name: Setup uv\n        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          version: \"0.9.9\"\n          # NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.\n          enable-cache: false\n\n      - name: Validate tag is versioned correctly\n        run: |\n          uv run --no-sync --with release-tag tag --check\n\n  notify-slack-on-tag-check-failure:\n    needs:\n      - check-version-tag\n    if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'\n    runs-on: ubuntu-slim\n    timeout-minutes: 10\n    environment: release\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Send Slack notification\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}\n          failed-jobs: \"• check-version-tag\"\n          title: \"🚨 Version Tag Check Failed\"\n          ref-name: ${{ github.ref_name }}\n\n  # Create GitHub release first, before desktop builds start.\n  # This ensures all desktop matrix jobs upload to the same release instead of\n  # racing to create duplicate releases.\n  create-release:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-desktop == 'true'\n    runs-on: ubuntu-slim\n    timeout-minutes: 10\n    permissions:\n      contents: write\n    outputs:\n      release-id: ${{ steps.create-release.outputs.id }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Determine release tag\n        id: release-tag\n        env:\n          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}\n          SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}\n        run: |\n          if [ \"${IS_TEST_RUN}\" == \"true\" ]; then\n            echo \"tag=v0.0.0-dev+${SHORT_SHA}\" >> \"$GITHUB_OUTPUT\"\n          else\n            echo \"tag=${GITHUB_REF_NAME}\" >> \"$GITHUB_OUTPUT\"\n          fi\n\n      - name: Create GitHub Release\n        id: create-release\n        uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # ratchet:softprops/action-gh-release@v2\n        with:\n          tag_name: ${{ steps.release-tag.outputs.tag }}\n          name: ${{ steps.release-tag.outputs.tag }}\n          body: \"See the assets to download this version and install.\"\n          draft: true\n          prerelease: false\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\n  build-desktop:\n    needs:\n      - determine-builds\n      - create-release\n    if: needs.determine-builds.outputs.build-desktop == 'true'\n    permissions:\n      id-token: write\n      contents: write\n      actions: read\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - platform: \"macos-latest\" # Build a universal image for macOS.\n            args: \"--target universal-apple-darwin\"\n          - platform: \"ubuntu-24.04\"\n            args: \"--bundles deb,rpm\"\n          - platform: \"ubuntu-24.04-arm\" # Only available in public repos.\n            args: \"--bundles deb,rpm\"\n          - platform: \"windows-latest\"\n            args: \"\"\n\n    runs-on: ${{ matrix.platform }}\n    timeout-minutes: 90\n    environment: release\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2\n        with:\n          # NOTE: persist-credentials is needed for tauri-action to upload assets to GitHub releases.\n          persist-credentials: true # zizmor: ignore[artipacked]\n\n      - name: Configure AWS credentials\n        if: startsWith(matrix.platform, 'macos-')\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        if: startsWith(matrix.platform, 'macos-')\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            APPLE_ID, deploy/apple-id\n            APPLE_PASSWORD, deploy/apple-password\n            APPLE_CERTIFICATE, deploy/apple-certificate\n            APPLE_CERTIFICATE_PASSWORD, deploy/apple-certificate-password\n            KEYCHAIN_PASSWORD, deploy/keychain-password\n            APPLE_TEAM_ID, deploy/apple-team-id\n          parse-json-secrets: true\n\n      - name: install dependencies (ubuntu only)\n        if: startsWith(matrix.platform, 'ubuntu-')\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y \\\n            build-essential \\\n            libglib2.0-dev \\\n            libgirepository1.0-dev \\\n            libgtk-3-dev \\\n            libjavascriptcoregtk-4.1-dev \\\n            libwebkit2gtk-4.1-dev \\\n            libayatana-appindicator3-dev \\\n            gobject-introspection \\\n            pkg-config \\\n            curl \\\n            xdg-utils\n\n      - name: setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6.3.0\n        with:\n          node-version: 24\n          package-manager-cache: false\n\n      - name: install Rust stable\n        uses: dtolnay/rust-toolchain@6d9817901c499d6b02debbb57edb38d33daa680b # zizmor: ignore[impostor-commit]\n        with:\n          # Those targets are only used on macos runners so it's in an `if` to slightly speed up windows and linux builds.\n          targets: ${{ matrix.platform == 'macos-latest' && 'aarch64-apple-darwin,x86_64-apple-darwin' || '' }}\n\n      - name: install frontend dependencies\n        working-directory: ./desktop\n        run: npm install\n\n      - name: Inject version (Unix)\n        if: runner.os != 'Windows'\n        working-directory: ./desktop\n        env:\n          SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}\n          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}\n        run: |\n          if [ \"${IS_TEST_RUN}\" == \"true\" ]; then\n            VERSION=\"0.0.0-dev+${SHORT_SHA}\"\n          else\n            VERSION=\"${GITHUB_REF_NAME#v}\"\n          fi\n          echo \"Injecting version: $VERSION\"\n\n          # Update Cargo.toml\n          sed \"s/^version = .*/version = \\\"$VERSION\\\"/\" src-tauri/Cargo.toml > src-tauri/Cargo.toml.tmp\n          mv src-tauri/Cargo.toml.tmp src-tauri/Cargo.toml\n\n          # Update tauri.conf.json\n          jq --arg v \"$VERSION\" '.version = $v' src-tauri/tauri.conf.json > src-tauri/tauri.conf.json.tmp\n          mv src-tauri/tauri.conf.json.tmp src-tauri/tauri.conf.json\n\n          # Update package.json\n          jq --arg v \"$VERSION\" '.version = $v' package.json > package.json.tmp\n          mv package.json.tmp package.json\n\n          echo \"Versions set to: $VERSION\"\n\n      - name: Inject version (Windows)\n        if: runner.os == 'Windows'\n        working-directory: ./desktop\n        shell: pwsh\n        env:\n          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}\n        run: |\n          # Windows MSI requires numeric-only build metadata, so we skip the SHA suffix\n          if ($env:IS_TEST_RUN -eq \"true\") {\n            $VERSION = \"0.0.0\"\n          } else {\n            # Strip 'v' prefix and any pre-release suffix (e.g., -beta.13) for MSI compatibility\n            $VERSION = \"$env:GITHUB_REF_NAME\" -replace '^v', '' -replace '-.*$', ''\n          }\n          Write-Host \"Injecting version: $VERSION\"\n\n          # Update Cargo.toml\n          $cargo = Get-Content src-tauri/Cargo.toml -Raw\n          $cargo = $cargo -replace '(?m)^version = .*', \"version = `\"$VERSION`\"\"\n          Set-Content src-tauri/Cargo.toml $cargo -NoNewline\n\n          # Update tauri.conf.json\n          $json = Get-Content src-tauri/tauri.conf.json | ConvertFrom-Json\n          $json.version = $VERSION\n          $json | ConvertTo-Json -Depth 100 | Set-Content src-tauri/tauri.conf.json\n\n          # Update package.json\n          $pkg = Get-Content package.json | ConvertFrom-Json\n          $pkg.version = $VERSION\n          $pkg | ConvertTo-Json -Depth 100 | Set-Content package.json\n\n          Write-Host \"Versions set to: $VERSION\"\n\n      - name: Import Apple Developer Certificate\n        if: startsWith(matrix.platform, 'macos-')\n        run: |\n          echo $APPLE_CERTIFICATE | base64 --decode > certificate.p12\n          security create-keychain -p \"$KEYCHAIN_PASSWORD\" build.keychain\n          security default-keychain -s build.keychain\n          security unlock-keychain -p \"$KEYCHAIN_PASSWORD\" build.keychain\n          security set-keychain-settings -t 3600 -u build.keychain\n          security import certificate.p12 -k build.keychain -P \"$APPLE_CERTIFICATE_PASSWORD\" -T /usr/bin/codesign\n          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k \"$KEYCHAIN_PASSWORD\" build.keychain\n          security find-identity -v -p codesigning build.keychain\n\n      - name: Verify Certificate\n        if: startsWith(matrix.platform, 'macos-')\n        run: |\n          CERT_INFO=$(security find-identity -v -p codesigning build.keychain | grep -E \"(Developer ID Application|Apple Distribution|Apple Development)\" | head -n 1)\n          CERT_ID=$(echo \"$CERT_INFO\" | awk -F'\"' '{print $2}')\n          echo \"CERT_ID=$CERT_ID\" >> $GITHUB_ENV\n          echo \"Certificate imported.\"\n\n      - uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          APPLE_ID: ${{ env.APPLE_ID }}\n          APPLE_PASSWORD: ${{ env.APPLE_PASSWORD }}\n          APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}\n          APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}\n        with:\n          # Use the release created by the create-release job to avoid race conditions\n          # when multiple matrix jobs try to create/update the same release simultaneously\n          releaseId: ${{ needs.create-release.outputs.release-id }}\n          assetNamePattern: \"[name]_[arch][ext]\"\n          args: ${{ matrix.args }}\n\n  build-web-amd64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-web == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-x64\n      - run-id=${{ github.run_id }}-web-amd64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./web\n          file: ./web/Dockerfile\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            NODE_OPTIONS=--max-old-space-size=8192\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-web-arm64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-web == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - run-id=${{ github.run_id }}-web-arm64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./web\n          file: ./web/Dockerfile\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            NODE_OPTIONS=--max-old-space-size=8192\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  merge-web:\n    needs:\n      - determine-builds\n      - build-web-amd64\n      - build-web-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-web\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-web-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-web-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n\n  build-web-cloud-amd64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-web-cloud == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-x64\n      - run-id=${{ github.run_id }}-web-cloud-amd64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./web\n          file: ./web/Dockerfile\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            NEXT_PUBLIC_CLOUD_ENABLED=true\n            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}\n            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}\n            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}\n            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}\n            NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}\n            NEXT_PUBLIC_GTM_ENABLED=true\n            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true\n            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true\n            NODE_OPTIONS=--max-old-space-size=8192\n            SENTRY_RELEASE=${{ github.sha }}\n          secrets: |\n            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-web-cloud-arm64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-web-cloud == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - run-id=${{ github.run_id }}-web-cloud-arm64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./web\n          file: ./web/Dockerfile\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            NEXT_PUBLIC_CLOUD_ENABLED=true\n            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}\n            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}\n            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}\n            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}\n            NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}\n            NEXT_PUBLIC_GTM_ENABLED=true\n            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true\n            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true\n            NODE_OPTIONS=--max-old-space-size=8192\n            SENTRY_RELEASE=${{ github.sha }}\n          secrets: |\n            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  merge-web-cloud:\n    needs:\n      - determine-builds\n      - build-web-cloud-amd64\n      - build-web-cloud-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-web-cloud\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-cloud-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-web-cloud-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-web-cloud-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n\n  build-backend-amd64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-backend == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-backend-amd64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-backend-arm64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-backend == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - run-id=${{ github.run_id }}-backend-arm64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  merge-backend:\n    needs:\n      - determine-builds\n      - build-backend-amd64\n      - build-backend-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-backend\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-backend-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-backend-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n\n  build-backend-craft-amd64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-backend-craft == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-backend-craft-amd64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-backend\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            ENABLE_CRAFT=true\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64,mode=max\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-backend-craft-arm64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-backend-craft == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - run-id=${{ github.run_id }}-backend-craft-arm64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-backend\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n            ENABLE_CRAFT=true\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64,mode=max\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  merge-backend-craft:\n    needs:\n      - determine-builds\n      - build-backend-craft-amd64\n      - build-backend-craft-arm64\n    if: needs.determine-builds.outputs.build-backend-craft == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-backend-craft\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-backend\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=craft-latest\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-backend-craft-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-backend-craft-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n\n  build-model-server-amd64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-model-server == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-model-server-amd64\n      - volume=40gb\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n        with:\n          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        env:\n          DEBUG: ${{ vars.DOCKER_DEBUG == 'true' && 1 || 0 }}\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile.model_server\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ env.EDGE_TAG != 'true' && vars.MODEL_SERVER_NO_CACHE == 'true' }}\n          provenance: false\n          sbom: false\n\n  build-model-server-arm64:\n    needs: determine-builds\n    if: needs.determine-builds.outputs.build-model-server == 'true'\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - run-id=${{ github.run_id }}-model-server-arm64\n      - volume=40gb\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n        with:\n          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        env:\n          DEBUG: ${{ vars.DOCKER_DEBUG == 'true' && 1 || 0 }}\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile.model_server\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            ONYX_VERSION=${{ github.ref_name }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64,mode=max\n          outputs: type=image,name=${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n          no-cache: ${{ env.EDGE_TAG != 'true' && vars.MODEL_SERVER_NO_CACHE == 'true' }}\n          provenance: false\n          sbom: false\n\n  merge-model-server:\n    needs:\n      - determine-builds\n      - build-model-server-amd64\n      - build-model-server-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-model-server\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    env:\n      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}\n            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-model-server-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-model-server-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n\n  trivy-scan:\n    needs:\n      - determine-builds\n      - merge-web\n      - merge-web-cloud\n      - merge-backend\n      - merge-model-server\n    if: >-\n      always() && !cancelled() &&\n      (needs.merge-web.result == 'success' ||\n       needs.merge-web-cloud.result == 'success' ||\n       needs.merge-backend.result == 'success' ||\n       needs.merge-model-server.result == 'success')\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - run-id=${{ github.run_id }}-trivy-scan-${{ matrix.component }}\n      - extras=ecr-cache\n    permissions:\n      security-events: write # needed for SARIF uploads\n    timeout-minutes: 10\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - component: web\n            registry-image: onyxdotapp/onyx-web-server\n          - component: web-cloud\n            registry-image: onyxdotapp/onyx-web-server-cloud\n          - component: backend\n            registry-image: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}\n            trivyignore: backend/.trivyignore\n          - component: model-server\n            registry-image: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}\n    steps:\n      - name: Check if this scan should run\n        id: should-run\n        run: |\n          case \"$COMPONENT\" in\n            web) RESULT=\"$MERGE_WEB\" ;;\n            web-cloud) RESULT=\"$MERGE_WEB_CLOUD\" ;;\n            backend) RESULT=\"$MERGE_BACKEND\" ;;\n            model-server) RESULT=\"$MERGE_MODEL_SERVER\" ;;\n          esac\n          if [ \"$RESULT\" == \"success\" ]; then\n            echo \"run=true\" >> \"$GITHUB_OUTPUT\"\n          else\n            echo \"run=false\" >> \"$GITHUB_OUTPUT\"\n          fi\n        env:\n          COMPONENT: ${{ matrix.component }}\n          MERGE_WEB: ${{ needs.merge-web.result }}\n          MERGE_WEB_CLOUD: ${{ needs.merge-web-cloud.result }}\n          MERGE_BACKEND: ${{ needs.merge-backend.result }}\n          MERGE_MODEL_SERVER: ${{ needs.merge-model-server.result }}\n\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n        if: steps.should-run.outputs.run == 'true'\n\n      - name: Checkout\n        if: steps.should-run.outputs.run == 'true' && matrix.trivyignore != ''\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Determine scan image\n        if: steps.should-run.outputs.run == 'true'\n        id: scan-image\n        run: |\n          if [ \"$IS_TEST_RUN\" == \"true\" ]; then\n            echo \"image=${RUNS_ON_ECR_CACHE}:${TAG_PREFIX}-${SANITIZED_TAG}\" >> \"$GITHUB_OUTPUT\"\n          else\n            echo \"image=docker.io/${REGISTRY_IMAGE}:${REF_NAME}\" >> \"$GITHUB_OUTPUT\"\n          fi\n        env:\n          IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}\n          TAG_PREFIX: ${{ matrix.component }}\n          SANITIZED_TAG: ${{ needs.determine-builds.outputs.sanitized-tag }}\n          REGISTRY_IMAGE: ${{ matrix.registry-image }}\n          REF_NAME: ${{ github.ref_name }}\n\n      - name: Run Trivy vulnerability scanner\n        if: steps.should-run.outputs.run == 'true'\n        uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # ratchet:aquasecurity/trivy-action@v0.35.0\n        with:\n          image-ref: ${{ steps.scan-image.outputs.image }}\n          severity: CRITICAL,HIGH\n          format: \"sarif\"\n          output: \"trivy-results.sarif\"\n          trivyignores: ${{ matrix.trivyignore }}\n        env:\n          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}\n          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Upload Trivy scan results to GitHub Security tab\n        if: steps.should-run.outputs.run == 'true'\n        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab\n        with:\n          sarif_file: \"trivy-results.sarif\"\n\n  notify-slack-on-failure:\n    needs:\n      - determine-builds\n      - build-desktop\n      - build-web-amd64\n      - build-web-arm64\n      - merge-web\n      - build-web-cloud-amd64\n      - build-web-cloud-arm64\n      - merge-web-cloud\n      - build-backend-amd64\n      - build-backend-arm64\n      - merge-backend\n      - build-backend-craft-amd64\n      - build-backend-craft-arm64\n      - merge-backend-craft\n      - build-model-server-amd64\n      - build-model-server-arm64\n      - merge-model-server\n    if: always() && (needs.build-desktop.result == 'failure' || needs.build-web-amd64.result == 'failure' || needs.build-web-arm64.result == 'failure' || needs.merge-web.result == 'failure' || needs.build-web-cloud-amd64.result == 'failure' || needs.build-web-cloud-arm64.result == 'failure' || needs.merge-web-cloud.result == 'failure' || needs.build-backend-amd64.result == 'failure' || needs.build-backend-arm64.result == 'failure' || needs.merge-backend.result == 'failure' || (needs.determine-builds.outputs.build-backend-craft == 'true' && (needs.build-backend-craft-amd64.result == 'failure' || needs.build-backend-craft-arm64.result == 'failure' || needs.merge-backend-craft.result == 'failure')) || needs.build-model-server-amd64.result == 'failure' || needs.build-model-server-arm64.result == 'failure' || needs.merge-model-server.result == 'failure') && needs.determine-builds.outputs.is-test-run != 'true'\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 90\n    environment: release\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Determine failed jobs\n        id: failed-jobs\n        shell: bash\n        run: |\n          FAILED_JOBS=\"\"\n          if [ \"${NEEDS_BUILD_DESKTOP_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-desktop\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_WEB_AMD64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-web-amd64\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_WEB_ARM64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-web-arm64\\\\n\"\n          fi\n          if [ \"${NEEDS_MERGE_WEB_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• merge-web\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_WEB_CLOUD_AMD64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-web-cloud-amd64\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_WEB_CLOUD_ARM64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-web-cloud-arm64\\\\n\"\n          fi\n          if [ \"${NEEDS_MERGE_WEB_CLOUD_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• merge-web-cloud\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_BACKEND_AMD64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-backend-amd64\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_BACKEND_ARM64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-backend-arm64\\\\n\"\n          fi\n          if [ \"${NEEDS_MERGE_BACKEND_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• merge-backend\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_MODEL_SERVER_AMD64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-model-server-amd64\\\\n\"\n          fi\n          if [ \"${NEEDS_BUILD_MODEL_SERVER_ARM64_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• build-model-server-arm64\\\\n\"\n          fi\n          if [ \"${NEEDS_MERGE_MODEL_SERVER_RESULT}\" == \"failure\" ]; then\n            FAILED_JOBS=\"${FAILED_JOBS}• merge-model-server\\\\n\"\n          fi\n          # Remove trailing \\n and set output\n          FAILED_JOBS=$(printf '%s' \"$FAILED_JOBS\" | sed 's/\\\\n$//')\n          echo \"jobs=$FAILED_JOBS\" >> \"$GITHUB_OUTPUT\"\n        env:\n          NEEDS_BUILD_DESKTOP_RESULT: ${{ needs.build-desktop.result }}\n          NEEDS_BUILD_WEB_AMD64_RESULT: ${{ needs.build-web-amd64.result }}\n          NEEDS_BUILD_WEB_ARM64_RESULT: ${{ needs.build-web-arm64.result }}\n          NEEDS_MERGE_WEB_RESULT: ${{ needs.merge-web.result }}\n          NEEDS_BUILD_WEB_CLOUD_AMD64_RESULT: ${{ needs.build-web-cloud-amd64.result }}\n          NEEDS_BUILD_WEB_CLOUD_ARM64_RESULT: ${{ needs.build-web-cloud-arm64.result }}\n          NEEDS_MERGE_WEB_CLOUD_RESULT: ${{ needs.merge-web-cloud.result }}\n          NEEDS_BUILD_BACKEND_AMD64_RESULT: ${{ needs.build-backend-amd64.result }}\n          NEEDS_BUILD_BACKEND_ARM64_RESULT: ${{ needs.build-backend-arm64.result }}\n          NEEDS_MERGE_BACKEND_RESULT: ${{ needs.merge-backend.result }}\n          NEEDS_BUILD_MODEL_SERVER_AMD64_RESULT: ${{ needs.build-model-server-amd64.result }}\n          NEEDS_BUILD_MODEL_SERVER_ARM64_RESULT: ${{ needs.build-model-server-arm64.result }}\n          NEEDS_MERGE_MODEL_SERVER_RESULT: ${{ needs.merge-model-server.result }}\n\n      - name: Send Slack notification\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}\n          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}\n          title: \"🚨 Deployment Workflow Failed\"\n          ref-name: ${{ github.ref_name }}\n"
  },
  {
    "path": ".github/workflows/docker-tag-beta.yml",
    "content": "# This workflow is set up to be manually triggered via the GitHub Action tab.\n# Given a version, it will tag those backend and webserver images as \"beta\".\n\nname: Tag Beta Version\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: \"The version (ie v1.0.0-beta.0) to tag as beta\"\n        required: true\n\npermissions:\n  contents: read\n\njobs:\n  tag:\n    # See https://runs-on.com/runners/linux/\n    # use a lower powered instance since this just does i/o to docker hub\n    runs-on: [runs-on, runner=2cpu-linux-x64, \"run-id=${{ github.run_id }}-tag\"]\n    timeout-minutes: 45\n    steps:\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Enable Docker CLI experimental features\n        run: echo \"DOCKER_CLI_EXPERIMENTAL=enabled\" >> $GITHUB_ENV\n\n      - name: Pull, Tag and Push Web Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}\n\n      - name: Pull, Tag and Push API Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}\n\n      - name: Pull, Tag and Push Model Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}\n"
  },
  {
    "path": ".github/workflows/docker-tag-latest.yml",
    "content": "# This workflow is set up to be manually triggered via the GitHub Action tab.\n# Given a version, it will tag those backend and webserver images as \"latest\".\n\nname: Tag Latest Version\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: \"The version (ie v0.0.1) to tag as latest\"\n        required: true\n\npermissions:\n  contents: read\n\njobs:\n  tag:\n    # See https://runs-on.com/runners/linux/\n    # use a lower powered instance since this just does i/o to docker hub\n    runs-on: [runs-on, runner=2cpu-linux-x64, \"run-id=${{ github.run_id }}-tag\"]\n    timeout-minutes: 45\n    steps:\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Enable Docker CLI experimental features\n        run: echo \"DOCKER_CLI_EXPERIMENTAL=enabled\" >> $GITHUB_ENV\n\n      - name: Pull, Tag and Push Web Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}\n\n      - name: Pull, Tag and Push API Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}\n\n      - name: Pull, Tag and Push Model Server Image\n        env:\n          VERSION: ${{ github.event.inputs.version }}\n        run: |\n          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}\n"
  },
  {
    "path": ".github/workflows/helm-chart-releases.yml",
    "content": "name: Release Onyx Helm Charts\n\non:\n  push:\n    branches:\n      - main\n\npermissions: write-all\n\njobs:\n  release:\n    permissions:\n      contents: write\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          fetch-depth: 0\n          persist-credentials: false\n\n      - name: Install Helm CLI\n        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4\n        with:\n          version: v3.12.1\n\n      - name: Add required Helm repositories\n        run: |\n          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx\n          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts\n          helm repo add opensearch https://opensearch-project.github.io/helm-charts\n          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts\n          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts\n          helm repo add minio https://charts.min.io/\n          helm repo add code-interpreter https://onyx-dot-app.github.io/python-sandbox/\n          helm repo update\n\n      - name: Build chart dependencies\n        run: |\n          set -euo pipefail\n          for chart_dir in deployment/helm/charts/*; do\n            if [ -f \"$chart_dir/Chart.yaml\" ]; then\n              echo \"Building dependencies for $chart_dir\"\n              helm dependency build \"$chart_dir\"\n            fi\n          done\n\n      - name: Publish Helm charts to gh-pages\n        # NOTE: HEAD of https://github.com/stefanprodan/helm-gh-pages/pull/43\n        uses: stefanprodan/helm-gh-pages@ad32ad3b8720abfeaac83532fd1e9bdfca5bbe27 # zizmor: ignore[impostor-commit]\n        with:\n          token: ${{ secrets.GITHUB_TOKEN }}\n          charts_dir: deployment/helm/charts\n          branch: gh-pages\n          commit_username: ${{ github.actor }}\n          commit_email: ${{ github.actor }}@users.noreply.github.com\n"
  },
  {
    "path": ".github/workflows/merge-group.yml",
    "content": "name: Merge Group-Specific\n\non:\n  merge_group:\n\npermissions:\n  contents: read\n\njobs:\n  # This job immediately succeeds to satisfy branch protection rules on merge_group events.\n  # There is a similarly named \"required\" job in pr-integration-tests.yml which runs the actual\n  # integration tests. That job runs on both pull_request and merge_group events, and this job\n  # exists solely to provide a fast-passing check with the same name for branch protection.\n  # The actual tests remain enforced on presubmit (pull_request events).\n  required:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Success\n        run: echo \"Success\"\n  # This job immediately succeeds to satisfy branch protection rules on merge_group events.\n  # There is a similarly named \"playwright-required\" job in pr-playwright-tests.yml which runs\n  # the actual playwright tests. That job runs on both pull_request and merge_group events, and\n  # this job exists solely to provide a fast-passing check with the same name for branch protection.\n  # The actual tests remain enforced on presubmit (pull_request events).\n  playwright-required:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Success\n        run: echo \"Success\"\n"
  },
  {
    "path": ".github/workflows/nightly-close-stale-issues.yml",
    "content": "name: 'Nightly - Close stale issues and PRs'\non:\n  schedule:\n    - cron: '0 11 * * *' # Runs every day at 3 AM PST / 4 AM PDT / 11 AM UTC\n\npermissions:\n  # contents: write # only for delete-branch option\n  issues: write\n  pull-requests: write\n\njobs:\n  stale:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10\n        with:\n          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'\n          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'\n          close-issue-message: 'This issue was closed because it has been stalled for 90 days with no activity.'\n          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'\n          days-before-stale: 75\n#           days-before-close: 90  # uncomment after we test stale behavior\n"
  },
  {
    "path": ".github/workflows/nightly-llm-provider-chat.yml",
    "content": "name: Nightly LLM Provider Chat Tests\nconcurrency:\n  group: Nightly-LLM-Provider-Chat-${{ github.workflow }}-${{ github.ref_name }}\n  cancel-in-progress: true\n\non:\n  schedule:\n    # Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)\n    - cron: \"30 10 * * *\"\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  provider-chat-test:\n    uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml\n    secrets:\n      AWS_OIDC_ROLE_ARN: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n    permissions:\n      contents: read\n      id-token: write\n    with:\n      openai_models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}\n      anthropic_models: ${{ vars.NIGHTLY_LLM_ANTHROPIC_MODELS }}\n      bedrock_models: ${{ vars.NIGHTLY_LLM_BEDROCK_MODELS }}\n      vertex_ai_models: ${{ vars.NIGHTLY_LLM_VERTEX_AI_MODELS }}\n      azure_models: ${{ vars.NIGHTLY_LLM_AZURE_MODELS }}\n      azure_api_base: ${{ vars.NIGHTLY_LLM_AZURE_API_BASE }}\n      ollama_models: ${{ vars.NIGHTLY_LLM_OLLAMA_MODELS }}\n      openrouter_models: ${{ vars.NIGHTLY_LLM_OPENROUTER_MODELS }}\n      strict: true\n\n  notify-slack-on-failure:\n    needs: [provider-chat-test]\n    if: failure() && github.event_name == 'schedule'\n    runs-on: ubuntu-slim\n    environment: ci-protected\n    timeout-minutes: 5\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Send Slack notification\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.SLACK_WEBHOOK }}\n          failed-jobs: provider-chat-test\n          title: \"🚨 Scheduled LLM Provider Chat Tests failed!\"\n          ref-name: ${{ github.ref_name }}\n"
  },
  {
    "path": ".github/workflows/post-merge-beta-cherry-pick.yml",
    "content": "name: Post-Merge Beta Cherry-Pick\n\non:\n  pull_request_target:\n    types:\n      - closed\n\n# SECURITY NOTE:\n# This workflow intentionally uses pull_request_target so post-merge automation can\n# use base-repo credentials. Do not checkout PR head refs in this workflow\n# (e.g. github.event.pull_request.head.sha). Only trusted base refs are allowed.\npermissions:\n  contents: read\n\njobs:\n  resolve-cherry-pick-request:\n    if: >-\n      github.event.pull_request.merged == true\n      && github.event.pull_request.base.ref == 'main'\n      && github.event.pull_request.head.repo.full_name == github.repository\n    outputs:\n      should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}\n      pr_number: ${{ steps.gate.outputs.pr_number }}\n      merge_commit_sha: ${{ steps.gate.outputs.merge_commit_sha }}\n      merged_by: ${{ steps.gate.outputs.merged_by }}\n      gate_error: ${{ steps.gate.outputs.gate_error }}\n    runs-on: ubuntu-latest\n    timeout-minutes: 10\n    steps:\n      - name: Resolve merged PR and checkbox state\n        id: gate\n        env:\n          GH_TOKEN: ${{ github.token }}\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          # SECURITY: keep PR body in env/plain-text handling; avoid directly\n          # inlining github.event.pull_request.body into shell commands.\n          PR_BODY: ${{ github.event.pull_request.body }}\n          MERGE_COMMIT_SHA: ${{ github.event.pull_request.merge_commit_sha }}\n          MERGED_BY: ${{ github.event.pull_request.merged_by.login }}\n          # Explicit merger allowlist used because pull_request_target runs with\n          # the default GITHUB_TOKEN, which cannot reliably read org/team\n          # membership for this repository context.\n          ALLOWED_MERGERS: |\n            acaprau\n            bo-onyx\n            danelegend\n            duo-onyx\n            evan-onyx\n            jessicasingh7\n            jmelahman\n            joachim-danswer\n            justin-tahara\n            nmgarza5\n            raunakab\n            rohoswagger\n            subash-mohan\n            trial2onyx\n            wenxi-onyx\n            weves\n            yuhongsun96\n        run: |\n          echo \"pr_number=${PR_NUMBER}\" >> \"$GITHUB_OUTPUT\"\n          echo \"merged_by=${MERGED_BY}\" >> \"$GITHUB_OUTPUT\"\n\n          if ! echo \"${PR_BODY}\" | grep -qiE \"\\\\[x\\\\][[:space:]]*(\\\\[[^]]+\\\\][[:space:]]*)?Please cherry-pick this PR to the latest release version\"; then\n            echo \"should_cherrypick=false\" >> \"$GITHUB_OUTPUT\"\n            echo \"Cherry-pick checkbox not checked for PR #${PR_NUMBER}. Skipping.\"\n            exit 0\n          fi\n\n          # Keep should_cherrypick output before any possible exit 1 below so\n          # notify-slack can still gate on this output even if this job fails.\n          echo \"should_cherrypick=true\" >> \"$GITHUB_OUTPUT\"\n          echo \"Cherry-pick checkbox checked for PR #${PR_NUMBER}.\"\n\n          if [ -z \"${MERGE_COMMIT_SHA}\" ] || [ \"${MERGE_COMMIT_SHA}\" = \"null\" ]; then\n            echo \"gate_error=missing-merge-commit-sha\" >> \"$GITHUB_OUTPUT\"\n            echo \"::error::PR #${PR_NUMBER} requested cherry-pick, but merge_commit_sha is missing.\"\n            exit 1\n          fi\n\n          echo \"merge_commit_sha=${MERGE_COMMIT_SHA}\" >> \"$GITHUB_OUTPUT\"\n\n          normalized_merged_by=\"$(printf '%s' \"${MERGED_BY}\" | tr '[:upper:]' '[:lower:]')\"\n          normalized_allowed_mergers=\"$(printf '%s\\n' \"${ALLOWED_MERGERS}\" | tr '[:upper:]' '[:lower:]')\"\n          if ! printf '%s\\n' \"${normalized_allowed_mergers}\" | grep -Fxq \"${normalized_merged_by}\"; then\n            echo \"gate_error=not-allowed-merger\" >> \"$GITHUB_OUTPUT\"\n            echo \"::error::${MERGED_BY} is not in the explicit cherry-pick merger allowlist. Failing cherry-pick gate.\"\n            exit 1\n          fi\n\n          exit 0\n\n  cherry-pick-to-latest-release:\n    needs:\n      - resolve-cherry-pick-request\n    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success'\n    permissions:\n      contents: write\n      pull-requests: write\n    outputs:\n      cherry_pick_pr_url: ${{ steps.run_cherry_pick.outputs.pr_url }}\n      cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}\n      cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Checkout repository\n        # SECURITY: keep checkout pinned to trusted base branch; do not switch to PR head refs.\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          fetch-depth: 0\n          persist-credentials: true\n          ref: main\n\n      - name: Install the latest version of uv\n        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          enable-cache: false\n          version: \"0.9.9\"\n\n      - name: Configure git identity\n        run: |\n          git config user.name \"github-actions[bot]\"\n          git config user.email \"github-actions[bot]@users.noreply.github.com\"\n\n      - name: Create cherry-pick PR to latest release\n        id: run_cherry_pick\n        env:\n          GH_TOKEN: ${{ github.token }}\n          GITHUB_TOKEN: ${{ github.token }}\n          CHERRY_PICK_ASSIGNEE: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}\n          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}\n        run: |\n          output_file=\"$(mktemp)\"\n          set +e\n          uv run --no-sync --with onyx-devtools ods cherry-pick \"${MERGE_COMMIT_SHA}\" --yes --no-verify 2>&1 | tee \"$output_file\"\n          pipe_statuses=(\"${PIPESTATUS[@]}\")\n          exit_code=\"${pipe_statuses[0]}\"\n          tee_exit=\"${pipe_statuses[1]:-0}\"\n          set -e\n          if [ \"${tee_exit}\" -ne 0 ]; then\n            echo \"status=failure\" >> \"$GITHUB_OUTPUT\"\n            echo \"reason=output-capture-failed\" >> \"$GITHUB_OUTPUT\"\n            echo \"::error::tee failed to capture cherry-pick output (exit ${tee_exit}); cannot classify result.\"\n            exit 1\n          fi\n\n          if [ \"${exit_code}\" -eq 0 ]; then\n            pr_url=\"$(sed -n 's/^.*PR created successfully: \\(https:\\/\\/github\\.com\\/[^[:space:]]\\+\\/pull\\/[0-9]\\+\\).*$/\\1/p' \"$output_file\" | tail -n 1)\"\n            echo \"status=success\" >> \"$GITHUB_OUTPUT\"\n            if [ -n \"${pr_url}\" ]; then\n              echo \"pr_url=${pr_url}\" >> \"$GITHUB_OUTPUT\"\n            fi\n            exit 0\n          fi\n\n          echo \"status=failure\" >> \"$GITHUB_OUTPUT\"\n\n          reason=\"command-failed\"\n          if grep -qiE \"merge conflict during cherry-pick|CONFLICT|could not apply|cherry-pick in progress with staged changes\" \"$output_file\"; then\n            reason=\"merge-conflict\"\n          fi\n          echo \"reason=${reason}\" >> \"$GITHUB_OUTPUT\"\n\n          {\n            echo \"details<<EOF\"\n            tail -n 40 \"$output_file\"\n            echo \"EOF\"\n          } >> \"$GITHUB_OUTPUT\"\n\n      - name: Mark workflow as failed if cherry-pick failed\n        if: steps.run_cherry_pick.outputs.status == 'failure'\n        env:\n          CHERRY_PICK_REASON: ${{ steps.run_cherry_pick.outputs.reason }}\n        run: |\n          echo \"::error::Automated cherry-pick failed (${CHERRY_PICK_REASON}).\"\n          exit 1\n\n  notify-slack-on-cherry-pick-success:\n    needs:\n      - resolve-cherry-pick-request\n      - cherry-pick-to-latest-release\n    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'\n    runs-on: ubuntu-slim\n    environment: ci-protected\n    timeout-minutes: 10\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Fail if Slack webhook secret is missing\n        env:\n          CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}\n        run: |\n          if [ -z \"${CHERRY_PICK_PRS_WEBHOOK}\" ]; then\n            echo \"::error::CHERRY_PICK_PRS_WEBHOOK is not configured.\"\n            exit 1\n          fi\n\n      - name: Build cherry-pick success summary\n        id: success-summary\n        env:\n          SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}\n          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}\n          CHERRY_PICK_PR_URL: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_pr_url }}\n        run: |\n          source_pr_url=\"https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}\"\n          details=\"*Cherry-pick PR opened successfully.*\\\\n• author: {mention}\\\\n• source PR: ${source_pr_url}\"\n          if [ -n \"${CHERRY_PICK_PR_URL}\" ]; then\n            details=\"${details}\\\\n• cherry-pick PR: ${CHERRY_PICK_PR_URL}\"\n          fi\n          if [ -n \"${MERGE_COMMIT_SHA}\" ]; then\n            details=\"${details}\\\\n• merge SHA: ${MERGE_COMMIT_SHA}\"\n          fi\n\n          echo \"details=${details}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Notify #cherry-pick-prs about cherry-pick success\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}\n          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}\n          details: ${{ steps.success-summary.outputs.details }}\n          title: \"✅ Automated Cherry-Pick PR Opened\"\n          ref-name: ${{ github.event.pull_request.base.ref }}\n\n  notify-slack-on-cherry-pick-failure:\n    needs:\n      - resolve-cherry-pick-request\n      - cherry-pick-to-latest-release\n    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')\n    runs-on: ubuntu-slim\n    environment: ci-protected\n    timeout-minutes: 10\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Fail if Slack webhook secret is missing\n        env:\n          CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}\n        run: |\n          if [ -z \"${CHERRY_PICK_PRS_WEBHOOK}\" ]; then\n            echo \"::error::CHERRY_PICK_PRS_WEBHOOK is not configured.\"\n            exit 1\n          fi\n\n      - name: Build cherry-pick failure summary\n        id: failure-summary\n        env:\n          SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}\n          MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}\n          GATE_ERROR: ${{ needs.resolve-cherry-pick-request.outputs.gate_error }}\n          CHERRY_PICK_REASON: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_reason }}\n          CHERRY_PICK_DETAILS: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_details }}\n        run: |\n          source_pr_url=\"https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}\"\n\n          reason_text=\"cherry-pick command failed\"\n          if [ \"${GATE_ERROR}\" = \"missing-merge-commit-sha\" ]; then\n            reason_text=\"requested cherry-pick but merge commit SHA was missing\"\n          elif [ \"${GATE_ERROR}\" = \"not-allowed-merger\" ]; then\n            reason_text=\"merger is not in the explicit cherry-pick allowlist\"\n          elif [ \"${CHERRY_PICK_REASON}\" = \"output-capture-failed\" ]; then\n            reason_text=\"failed to capture cherry-pick output for classification\"\n          elif [ \"${CHERRY_PICK_REASON}\" = \"merge-conflict\" ]; then\n            reason_text=\"merge conflict during cherry-pick\"\n          fi\n\n          details_excerpt=\"$(printf '%s' \"${CHERRY_PICK_DETAILS}\" | tail -n 8 | tr '\\n' ' ' | sed \"s/[[:space:]]\\\\+/ /g\" | sed \"s/\\\"/'/g\" | cut -c1-350)\"\n          if [ -n \"${GATE_ERROR}\" ]; then\n            failed_job_label=\"resolve-cherry-pick-request\"\n          else\n            failed_job_label=\"cherry-pick-to-latest-release\"\n          fi\n          details=\"• author: {mention}\\\\n• ${failed_job_label}\\\\n• source PR: ${source_pr_url}\\\\n• reason: ${reason_text}\"\n          if [ -n \"${MERGE_COMMIT_SHA}\" ]; then\n            details=\"${details}\\\\n• merge SHA: ${MERGE_COMMIT_SHA}\"\n          fi\n          if [ -n \"${details_excerpt}\" ]; then\n            details=\"${details}\\\\n• excerpt: ${details_excerpt}\"\n          fi\n\n          echo \"details=${details}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Notify #cherry-pick-prs about cherry-pick failure\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}\n          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}\n          details: ${{ steps.failure-summary.outputs.details }}\n          title: \"🚨 Automated Cherry-Pick Failed\"\n          ref-name: ${{ github.event.pull_request.base.ref }}\n"
  },
  {
    "path": ".github/workflows/pr-database-tests.yml",
    "content": "name: Database Tests\nconcurrency:\n  group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  database-tests:\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - \"run-id=${{ github.run_id }}-database-tests\"\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup Python and Install Dependencies\n        uses: ./.github/actions/setup-python-and-install-dependencies\n        with:\n          requirements: |\n            backend/requirements/default.txt\n            backend/requirements/dev.txt\n\n      - name: Generate OpenAPI schema and Python client\n        shell: bash\n        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n        env:\n          LICENSE_ENFORCEMENT_ENABLED: \"false\"\n        run: |\n          ods openapi all\n\n      # needed for pulling external images otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Start Docker containers\n        working-directory: ./deployment/docker_compose\n        run: |\n          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \\\n            relational_db\n\n      - name: Run Database Tests\n        working-directory: ./backend\n        run: pytest -m alembic tests/integration/tests/migrations/\n"
  },
  {
    "path": ".github/workflows/pr-desktop-build.yml",
    "content": "name: Build Desktop App\nconcurrency:\n  group: Build-Desktop-App-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n    paths:\n      - \"desktop/**\"\n      - \".github/workflows/pr-desktop-build.yml\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  build-desktop:\n    name: Build Desktop (${{ matrix.platform }})\n    runs-on: ${{ matrix.os }}\n    timeout-minutes: 60\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - platform: linux\n            os: ubuntu-latest\n            target: x86_64-unknown-linux-gnu\n            args: \"--bundles deb,rpm\"\n          # TODO: Fix and enable the macOS build.\n          #- platform: macos\n          #  os: macos-latest\n          #  target: universal-apple-darwin\n          #  args: \"--target universal-apple-darwin\"\n          # TODO: Fix and enable the Windows build.\n          #- platform: windows\n          #  os: windows-latest\n          #  target: x86_64-pc-windows-msvc\n          #  args: \"\"\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f\n        with:\n          node-version: 24\n          cache: \"npm\" # zizmor: ignore[cache-poisoning]\n          cache-dependency-path: ./desktop/package-lock.json\n\n      - name: Setup Rust\n        uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9\n        with:\n          toolchain: stable\n          targets: ${{ matrix.target }}\n\n      - name: Cache Cargo registry and build\n        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]\n        with:\n          path: |\n            ~/.cargo/bin/\n            ~/.cargo/registry/index/\n            ~/.cargo/registry/cache/\n            ~/.cargo/git/db/\n            desktop/src-tauri/target/\n          key: ${{ runner.os }}-cargo-${{ hashFiles('desktop/src-tauri/Cargo.lock') }}\n          restore-keys: |\n            ${{ runner.os }}-cargo-\n\n      - name: Install Linux dependencies\n        if: matrix.platform == 'linux'\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y \\\n            build-essential \\\n            libglib2.0-dev \\\n            libgirepository1.0-dev \\\n            libgtk-3-dev \\\n            libjavascriptcoregtk-4.1-dev \\\n            libwebkit2gtk-4.1-dev \\\n            libayatana-appindicator3-dev \\\n            gobject-introspection \\\n            pkg-config \\\n            curl \\\n            xdg-utils\n\n      - name: Install npm dependencies\n        working-directory: ./desktop\n        run: npm ci\n\n      - name: Build desktop app\n        working-directory: ./desktop\n        run: npx tauri build ${{ matrix.args }}\n        env:\n          TAURI_SIGNING_PRIVATE_KEY: \"\"\n          TAURI_SIGNING_PRIVATE_KEY_PASSWORD: \"\"\n\n      - name: Upload build artifacts\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}\n          path: |\n            desktop/src-tauri/target/release/bundle/\n          retention-days: 7\n          if-no-files-found: ignore\n"
  },
  {
    "path": ".github/workflows/pr-external-dependency-unit-tests.yml",
    "content": "name: External Dependency Unit Tests\nconcurrency:\n  group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches: [main]\n    paths:\n      - \"backend/**\"\n      - \"pyproject.toml\"\n      - \"uv.lock\"\n      - \".github/workflows/pr-external-dependency-unit-tests.yml\"\n      - \".github/actions/setup-python-and-install-dependencies/**\"\n      - \".github/actions/setup-playwright/**\"\n      - \"deployment/docker_compose/docker-compose.yml\"\n      - \"deployment/docker_compose/docker-compose.dev.yml\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\nenv:\n  # AWS credentials for S3-specific test\n  S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}\n  S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}\n\n  # MinIO\n  S3_ENDPOINT_URL: \"http://localhost:9004\"\n  S3_AWS_ACCESS_KEY_ID: \"minioadmin\"\n  S3_AWS_SECRET_ACCESS_KEY: \"minioadmin\"\n\n  # Confluence\n  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}\n  CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}\n  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}\n  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}\n  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}\n  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}\n\n  # Jira\n  JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}\n\n  # LLMs\n  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}\n  VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}\n  VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}\n\n  # Code Interpreter\n  # TODO: debug why this is failing and enable\n  CODE_INTERPRETER_BASE_URL: http://localhost:8000\n\njobs:\n  discover-test-dirs:\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 45\n    outputs:\n      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Discover test directories\n        id: set-matrix\n        run: |\n          # Find all subdirectories in backend/tests/external_dependency_unit\n          dirs=$(find backend/tests/external_dependency_unit -mindepth 1 -maxdepth 1 -type d -exec basename {} \\; | sort | jq -R -s -c 'split(\"\\n\")[:-1]')\n          echo \"test-dirs=$dirs\" >> $GITHUB_OUTPUT\n\n  external-dependency-unit-tests:\n    needs: discover-test-dirs\n    # Use larger runner with more resources for Vespa\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}\n      - extras=s3-cache\n    timeout-minutes: 45\n    strategy:\n      fail-fast: false\n      matrix:\n        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}\n\n    env:\n      PYTHONPATH: ./backend\n      MODEL_SERVER_HOST: \"disabled\"\n      DISABLE_TELEMETRY: \"true\"\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup Python and Install Dependencies\n        uses: ./.github/actions/setup-python-and-install-dependencies\n        with:\n          requirements: |\n            backend/requirements/default.txt\n            backend/requirements/dev.txt\n            backend/requirements/ee.txt\n\n      - name: Setup Playwright\n        uses: ./.github/actions/setup-playwright\n\n      # needed for pulling Vespa, Redis, Postgres, and Minio images\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Create .env file for Docker Compose\n        run: |\n          cat <<EOF > deployment/docker_compose/.env\n          COMPOSE_PROFILES=s3-filestore,opensearch-enabled\n          DISABLE_TELEMETRY=true\n          OPENSEARCH_FOR_ONYX_ENABLED=true\n          EOF\n\n      - name: Set up Standard Dependencies\n        run: |\n          cd deployment/docker_compose\n          docker compose \\\n            -f docker-compose.yml \\\n            -f docker-compose.dev.yml \\\n            up -d \\\n            minio \\\n            relational_db \\\n            cache \\\n            index \\\n            opensearch \\\n            code-interpreter\n\n      - name: Run migrations\n        run: |\n          cd backend\n          # Run migrations to head\n          alembic upgrade head\n          alembic heads --verbose\n\n      - name: Run Tests for ${{ matrix.test-dir }}\n        shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n        env:\n          TEST_DIR: ${{ matrix.test-dir }}\n        run: |\n          py.test \\\n            --durations=8 \\\n            -o junit_family=xunit2 \\\n            -xv \\\n            --ff \\\n            backend/tests/external_dependency_unit/${TEST_DIR}\n\n      - name: Collect Docker logs on failure\n        if: failure()\n        run: |\n          mkdir -p docker-logs\n          cd deployment/docker_compose\n\n          # Get list of running containers\n          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)\n\n          # Collect logs from each container\n          for container in $containers; do\n            container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\\///')\n            echo \"Collecting logs from $container_name...\"\n            docker logs $container > ../../docker-logs/${container_name}.log 2>&1\n          done\n\n          cd ../..\n          echo \"Docker logs collected in docker-logs directory\"\n\n      - name: Upload Docker logs\n        if: failure()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-logs-${{ matrix.test-dir }}\n          path: docker-logs/\n          retention-days: 7\n"
  },
  {
    "path": ".github/workflows/pr-golang-tests.yml",
    "content": "name: Golang Tests\nconcurrency:\n  group: Golang-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions: {}\n\nenv:\n  GO_VERSION: \"1.26\"\n\njobs:\n  detect-modules:\n    runs-on: ubuntu-latest\n    timeout-minutes: 10\n    outputs:\n      modules: ${{ steps.set-modules.outputs.modules }}\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd\n        with:\n          persist-credentials: false\n      - id: set-modules\n        run: echo \"modules=$(find . -name 'go.mod' -exec dirname {} \\; | jq -Rc '[.,inputs]')\" >> \"$GITHUB_OUTPUT\"\n\n  golang:\n    needs: detect-modules\n    runs-on: ubuntu-latest\n    timeout-minutes: 10\n    strategy:\n      matrix:\n        modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n      - uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]\n        with:\n          go-version: ${{ env.GO_VERSION }}\n          cache-dependency-path: \"**/go.sum\"\n\n      - run: go mod tidy\n        working-directory: ${{ matrix.modules }}\n      - run: git diff --exit-code go.mod go.sum\n        working-directory: ${{ matrix.modules }}\n\n      - run: go test ./...\n        working-directory: ${{ matrix.modules }}\n"
  },
  {
    "path": ".github/workflows/pr-helm-chart-testing.yml",
    "content": "name: Helm - Lint and Test Charts\nconcurrency:\n  group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches: [main]\n  push:\n    tags:\n      - \"v*.*.*\"\n  workflow_dispatch: # Allows manual triggering\n\npermissions:\n  contents: read\n\njobs:\n  helm-chart-check:\n    # See https://runs-on.com/runners/linux/\n    runs-on:\n      [\n        runs-on,\n        runner=8cpu-linux-x64,\n        hdd=256,\n        \"run-id=${{ github.run_id }}-helm-chart-check\",\n      ]\n    timeout-minutes: 45\n\n    # fetch-depth 0 is required for helm/chart-testing-action\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          fetch-depth: 0\n          persist-credentials: false\n\n      - name: Set up Helm\n        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1\n        with:\n          version: v3.19.0\n\n      - name: Set up chart-testing\n        uses: helm/chart-testing-action@2e2940618cb426dce2999631d543b53cdcfc8527\n        with:\n          uv_version: \"0.9.9\"\n\n      # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...\n      - name: Run chart-testing (list-changed)\n        id: list-changed\n        env:\n          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}\n        run: |\n          echo \"default_branch: ${DEFAULT_BRANCH}\"\n          changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)\n          echo \"list-changed output: $changed\"\n          if [[ -n \"$changed\" ]]; then\n            echo \"changed=true\" >> \"$GITHUB_OUTPUT\"\n          fi\n\n      # uncomment to force run chart-testing\n      #     - name: Force run chart-testing (list-changed)\n      #       id: list-changed\n      #       run: echo \"changed=true\" >> $GITHUB_OUTPUT\n      # lint all charts if any changes were detected\n      - name: Run chart-testing (lint)\n        if: steps.list-changed.outputs.changed == 'true'\n        run: ct lint --config ct.yaml --all\n        # the following would lint only changed charts, but linting isn't expensive\n        # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}\n\n      - name: Create kind cluster\n        if: steps.list-changed.outputs.changed == 'true'\n        uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # ratchet:helm/kind-action@v1.14.0\n\n      - name: Pre-install cluster status check\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Pre-install Cluster Status ===\"\n          kubectl get nodes -o wide\n          kubectl get pods --all-namespaces\n          kubectl get storageclass\n\n      - name: Add Helm repositories and update\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Adding Helm repositories ===\"\n          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx\n          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts\n          helm repo add opensearch https://opensearch-project.github.io/helm-charts\n          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts\n          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts\n          helm repo add minio https://charts.min.io/\n          helm repo add code-interpreter https://onyx-dot-app.github.io/python-sandbox/\n          helm repo update\n\n      - name: Install Redis operator\n        if: steps.list-changed.outputs.changed == 'true'\n        shell: bash\n        run: |\n          echo \"=== Installing redis-operator CRDs ===\"\n          helm upgrade --install redis-operator ot-container-kit/redis-operator \\\n            --namespace redis-operator --create-namespace --wait --timeout 300s\n\n      - name: Pre-pull required images\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Pre-pulling required images to avoid timeout ===\"\n          KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')\n          echo \"Kind cluster: $KIND_CLUSTER\"\n\n          IMAGES=(\n            \"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0\"\n            \"quay.io/opstree/redis:v7.0.15\"\n            \"docker.io/onyxdotapp/onyx-web-server:latest\"\n          )\n\n          for image in \"${IMAGES[@]}\"; do\n            echo \"Pre-pulling $image\"\n            if docker pull \"$image\"; then\n              kind load docker-image \"$image\" --name \"$KIND_CLUSTER\" || echo \"Failed to load $image into kind\"\n            else\n              echo \"Failed to pull $image\"\n            fi\n          done\n\n          echo \"=== Images loaded into Kind cluster ===\"\n          docker exec \"$KIND_CLUSTER\"-control-plane crictl images | grep -E \"(cloudnative-pg|redis|onyx)\" || echo \"Some images may still be loading...\"\n\n      - name: Validate chart dependencies\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Validating chart dependencies ===\"\n          cd deployment/helm/charts/onyx\n          helm dependency update\n          helm lint . --set auth.userauth.values.user_auth_secret=placeholder\n\n      - name: Run chart-testing (install) with enhanced monitoring\n        timeout-minutes: 25\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Starting chart installation with monitoring ===\"\n\n          # Function to monitor cluster state\n          monitor_cluster() {\n            while true; do\n              echo \"=== Cluster Status Check at $(date) ===\"\n              # Only show non-running pods to reduce noise\n              NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)\n              if [ \"$NON_RUNNING_PODS\" -gt 0 ]; then\n                echo \"Non-running pods:\"\n                kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded\n              else\n                echo \"All pods running successfully\"\n              fi\n              # Only show recent events if there are issues\n              RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)\n              if [ -n \"$RECENT_EVENTS\" ]; then\n                echo \"Recent warnings/errors:\"\n                echo \"$RECENT_EVENTS\"\n              fi\n              sleep 60\n            done\n          }\n\n          # Start monitoring in background\n          monitor_cluster &\n          MONITOR_PID=$!\n\n          # Set up cleanup\n          cleanup() {\n            echo \"=== Cleaning up monitoring process ===\"\n            kill $MONITOR_PID 2>/dev/null || true\n            echo \"=== Final cluster state ===\"\n            kubectl get pods --all-namespaces\n            kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20\n          }\n\n          # Trap cleanup on exit\n          trap cleanup EXIT\n\n          # Run the actual installation with detailed logging\n          # Note that opensearch.enabled is true whereas others in this install\n          # are false. There is some work that needs to be done to get this\n          # entire step working in CI, enabling opensearch here is a small step\n          # in that direction. If this is causing issues, disabling it in this\n          # step should be ok in the short term.\n          echo \"=== Starting ct install ===\"\n          set +e\n          ct install --all \\\n            --helm-extra-set-args=\"\\\n              --set=nginx.enabled=false \\\n              --set=minio.enabled=false \\\n              --set=vespa.enabled=false \\\n              --set=opensearch.enabled=true \\\n              --set=auth.opensearch.enabled=true \\\n              --set=auth.userauth.values.user_auth_secret=test-secret \\\n              --set=slackbot.enabled=false \\\n              --set=postgresql.enabled=true \\\n              --set=postgresql.cluster.storage.storageClass=standard \\\n              --set=redis.enabled=true \\\n              --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \\\n              --set=webserver.replicaCount=1 \\\n              --set=api.replicaCount=0 \\\n              --set=inferenceCapability.replicaCount=0 \\\n              --set=indexCapability.replicaCount=0 \\\n              --set=celery_beat.replicaCount=0 \\\n              --set=celery_worker_heavy.replicaCount=0 \\\n              --set=celery_worker_docfetching.replicaCount=0 \\\n              --set=celery_worker_docprocessing.replicaCount=0 \\\n              --set=celery_worker_light.replicaCount=0 \\\n              --set=celery_worker_monitoring.replicaCount=0 \\\n              --set=celery_worker_primary.replicaCount=0 \\\n              --set=celery_worker_user_file_processing.replicaCount=0 \\\n              --set=celery_worker_user_files_indexing.replicaCount=0\" \\\n            --helm-extra-args=\"--timeout 900s --debug\" \\\n            --debug --config ct.yaml\n          CT_EXIT=$?\n          set -e\n\n          if [[ $CT_EXIT -ne 0 ]]; then\n            echo \"ct install failed with exit code $CT_EXIT\"\n            exit $CT_EXIT\n          else\n            echo \"=== Installation completed successfully ===\"\n          fi\n\n          kubectl get pods --all-namespaces\n\n      - name: Post-install verification\n        if: steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Post-install verification ===\"\n          if ! kubectl cluster-info >/dev/null 2>&1; then\n            echo \"ERROR: Kubernetes cluster is not reachable after install\"\n            exit 1\n          fi\n          kubectl get pods --all-namespaces\n          kubectl get services --all-namespaces\n          # Only show issues if they exist\n          kubectl describe pods --all-namespaces | grep -A 5 -B 2 \"Failed\\|Error\\|Warning\" || echo \"No pod issues found\"\n\n      - name: Cleanup on failure\n        if: failure() && steps.list-changed.outputs.changed == 'true'\n        run: |\n          echo \"=== Cleanup on failure ===\"\n          if ! kubectl cluster-info >/dev/null 2>&1; then\n            echo \"Skipping failure cleanup: Kubernetes cluster is not reachable\"\n            exit 0\n          fi\n          echo \"=== Final cluster state ===\"\n          kubectl get pods --all-namespaces\n          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10\n\n          echo \"=== Pod descriptions for debugging ===\"\n          kubectl describe pods --all-namespaces | grep -A 10 -B 3 \"Failed\\|Error\\|Warning\\|Pending\" || echo \"No problematic pods found\"\n\n          echo \"=== Recent logs for debugging ===\"\n          kubectl logs --all-namespaces --tail=50 | grep -i \"error\\|timeout\\|failed\\|pull\" || echo \"No error logs found\"\n\n          echo \"=== Helm releases ===\"\n          helm list --all-namespaces\n        # the following would install only changed charts, but we only have one chart so\n        # don't worry about that for now\n        # run: ct install --target-branch ${{ github.event.repository.default_branch }}\n"
  },
  {
    "path": ".github/workflows/pr-integration-tests.yml",
    "content": "name: Run Integration Tests v2\nconcurrency:\n  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\nenv:\n  # Test Environment Variables\n  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n  SLACK_BOT_TOKEN_TEST_SPACE: ${{ secrets.SLACK_BOT_TOKEN_TEST_SPACE }}\n  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}\n  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}\n  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}\n  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}\n  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}\n  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}\n  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}\n  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}\n  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}\n  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}\n  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}\n  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}\n  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}\n  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}\n  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}\n  GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}\n  GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}\n  GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}\n\njobs:\n  discover-test-dirs:\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 45\n    outputs:\n      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}\n      editions: ${{ steps.set-editions.outputs.editions }}\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Discover test directories\n        id: set-matrix\n        run: |\n          # Find all leaf-level directories in both test directories\n          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name \"__pycache__\" ! -name \"mcp\" ! -name \"no_vectordb\" -exec basename {} \\; | sort)\n          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name \"__pycache__\" -exec basename {} \\; | sort)\n\n          # Create JSON array with directory info\n          all_dirs=\"\"\n          for dir in $tests_dirs; do\n            all_dirs=\"$all_dirs{\\\"path\\\":\\\"tests/$dir\\\",\\\"name\\\":\\\"tests-$dir\\\"},\"\n          done\n          for dir in $connector_dirs; do\n            all_dirs=\"$all_dirs{\\\"path\\\":\\\"connector_job_tests/$dir\\\",\\\"name\\\":\\\"connector-$dir\\\"},\"\n          done\n\n          # Remove trailing comma and wrap in array\n          all_dirs=\"[${all_dirs%,}]\"\n          echo \"test-dirs=$all_dirs\" >> $GITHUB_OUTPUT\n\n      - name: Determine editions to test\n        id: set-editions\n        run: |\n          # On PRs, only run EE tests. On merge_group and tags, run both EE and MIT.\n          if [ \"${{ github.event_name }}\" = \"pull_request\" ]; then\n            echo 'editions=[\"ee\"]' >> $GITHUB_OUTPUT\n          else\n            echo 'editions=[\"ee\",\"mit\"]' >> $GITHUB_OUTPUT\n          fi\n\n  build-backend-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-backend-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling Vespa, Redis, Postgres, and Minio images\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Build and push Backend Docker image\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          push: true\n          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache\n            type=registry,ref=onyxdotapp/onyx-backend:latest\n          cache-to: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-model-server-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-model-server-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling Vespa, Redis, Postgres, and Minio images\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Build and push Model Server Docker image\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile.model_server\n          push: true\n          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache\n            type=registry,ref=onyxdotapp/onyx-model-server:latest\n          cache-to: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max\n\n  build-integration-image:\n    runs-on:\n      [\n        runs-on,\n        runner=2cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-integration-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling openapitools/openapi-generator-cli\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Build and push integration test image with Docker Bake\n        env:\n          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}\n          TAG: integration-test-${{ github.run_id }}\n          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}\n          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}\n        run: |\n          docker buildx bake --push \\\n            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \\\n            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \\\n            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \\\n            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \\\n            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \\\n            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \\\n            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \\\n            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \\\n            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \\\n            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \\\n            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \\\n            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \\\n            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \\\n            integration\n\n  integration-tests:\n    needs:\n      [\n        discover-test-dirs,\n        build-backend-image,\n        build-model-server-image,\n        build-integration-image,\n      ]\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - ${{ format('run-id={0}-integration-tests-{1}-job-{2}', github.run_id, matrix.edition, strategy['job-index']) }}\n      - extras=ecr-cache\n    timeout-minutes: 45\n\n    strategy:\n      fail-fast: false\n      matrix:\n        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}\n        edition: ${{ fromJson(needs.discover-test-dirs.outputs.editions) }}\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      # needed for pulling Vespa, Redis, Postgres, and Minio images\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections\n      # NOTE: don't need web server for integration tests\n      - name: Create .env file for Docker Compose\n        env:\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n          EDITION: ${{ matrix.edition }}\n        run: |\n          # Base config shared by both editions\n          cat <<EOF > deployment/docker_compose/.env\n          COMPOSE_PROFILES=s3-filestore\n          OPENSEARCH_FOR_ONYX_ENABLED=false\n          AUTH_TYPE=basic\n          POSTGRES_POOL_PRE_PING=true\n          POSTGRES_USE_NULL_POOL=true\n          REQUIRE_EMAIL_VERIFICATION=false\n          DISABLE_TELEMETRY=true\n          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}\n          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}\n          INTEGRATION_TESTS_MODE=true\n          MCP_SERVER_ENABLED=true\n          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10\n          EOF\n\n          # EE-only config\n          if [ \"$EDITION\" = \"ee\" ]; then\n            cat <<EOF >> deployment/docker_compose/.env\n          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n          LICENSE_ENFORCEMENT_ENABLED=false\n          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001\n          EOF\n          fi\n\n      - name: Start Docker containers\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \\\n            relational_db \\\n            index \\\n            cache \\\n            minio \\\n            api_server \\\n            inference_model_server \\\n            indexing_model_server \\\n            background \\\n            -d\n        id: start_docker\n\n      - name: Wait for services to be ready\n        run: |\n          echo \"Starting wait-for-service script...\"\n\n          wait_for_service() {\n            local url=$1\n            local label=$2\n            local timeout=${3:-300}  # default 5 minutes\n            local start_time\n            start_time=$(date +%s)\n\n            while true; do\n              local current_time\n              current_time=$(date +%s)\n              local elapsed_time=$((current_time - start_time))\n\n              if [ $elapsed_time -ge $timeout ]; then\n                echo \"Timeout reached. ${label} did not become ready in $timeout seconds.\"\n                exit 1\n              fi\n\n              local response\n              response=$(curl -s -o /dev/null -w \"%{http_code}\" \"$url\" || echo \"curl_error\")\n\n              if [ \"$response\" = \"200\" ]; then\n                echo \"${label} is ready!\"\n                break\n              elif [ \"$response\" = \"curl_error\" ]; then\n                echo \"Curl encountered an error while checking ${label}. Retrying in 5 seconds...\"\n              else\n                echo \"${label} not ready yet (HTTP status $response). Retrying in 5 seconds...\"\n              fi\n\n              sleep 5\n            done\n          }\n\n          wait_for_service \"http://localhost:8080/health\" \"API server\"\n          echo \"Finished waiting for services.\"\n\n      - name: Start Mock Services\n        run: |\n          cd backend/tests/integration/mock_services\n          docker compose -f docker-compose.mock-it-services.yml \\\n            -p mock-it-services-stack up -d\n\n      - name: Run Integration Tests (${{ matrix.edition }}) for ${{ matrix.test-dir.name }}\n        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3\n        with:\n          timeout_minutes: 20\n          max_attempts: 3\n          retry_wait_seconds: 10\n          command: |\n            echo \"Running ${{ matrix.edition }} integration tests for ${{ matrix.test-dir.path }}...\"\n            docker run --rm --network onyx_default \\\n              --name test-runner \\\n              -e POSTGRES_HOST=relational_db \\\n              -e POSTGRES_USER=postgres \\\n              -e POSTGRES_PASSWORD=password \\\n              -e POSTGRES_DB=postgres \\\n              -e DB_READONLY_USER=db_readonly_user \\\n              -e DB_READONLY_PASSWORD=password \\\n              -e POSTGRES_POOL_PRE_PING=true \\\n              -e POSTGRES_USE_NULL_POOL=true \\\n              -e VESPA_HOST=index \\\n              -e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \\\n              -e REDIS_HOST=cache \\\n              -e API_SERVER_HOST=api_server \\\n              -e OPENAI_API_KEY=${OPENAI_API_KEY} \\\n              -e EXA_API_KEY=${EXA_API_KEY} \\\n              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \\\n              -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \\\n              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \\\n              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \\\n              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \\\n              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \\\n              -e JIRA_BASE_URL=${JIRA_BASE_URL} \\\n              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \\\n              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \\\n              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \\\n              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \\\n              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY=\"${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}\" \\\n              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \\\n              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \\\n              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \\\n              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \\\n              -e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \\\n              -e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \\\n              -e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \\\n              -e TEST_WEB_HOSTNAME=test-runner \\\n              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \\\n              -e MOCK_CONNECTOR_SERVER_PORT=8001 \\\n              -e ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${{ matrix.edition == 'ee' && 'true' || 'false' }} \\\n              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \\\n              /app/tests/integration/${{ matrix.test-dir.path }}\n\n      # ------------------------------------------------------------\n      # Always gather logs BEFORE \"down\":\n      - name: Dump API server logs\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true\n\n      - name: Dump all-container logs (optional)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true\n\n      - name: Upload logs\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}\n          path: ${{ github.workspace }}/docker-compose.log\n      # ------------------------------------------------------------\n\n  onyx-lite-tests:\n    needs: [build-backend-image, build-integration-image]\n    runs-on:\n      [\n        runs-on,\n        runner=4cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-onyx-lite-tests\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Create .env file for Onyx Lite Docker Compose\n        env:\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          cat <<EOF > deployment/docker_compose/.env\n          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n          LICENSE_ENFORCEMENT_ENABLED=false\n          AUTH_TYPE=basic\n          POSTGRES_POOL_PRE_PING=true\n          POSTGRES_USE_NULL_POOL=true\n          REQUIRE_EMAIL_VERIFICATION=false\n          DISABLE_TELEMETRY=true\n          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}\n          INTEGRATION_TESTS_MODE=true\n          EOF\n\n      # Start only the services needed for Onyx Lite (Postgres + API server)\n      - name: Start Docker containers (onyx-lite)\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up \\\n            relational_db \\\n            api_server \\\n            -d\n        id: start_docker_onyx_lite\n\n      - name: Wait for services to be ready\n        run: |\n          echo \"Starting wait-for-service script (onyx-lite)...\"\n          start_time=$(date +%s)\n          timeout=300\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. Service did not become ready in $timeout seconds.\"\n              exit 1\n            fi\n            response=$(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:8080/health || echo \"curl_error\")\n            if [ \"$response\" = \"200\" ]; then\n              echo \"API server is ready!\"\n              break\n            elif [ \"$response\" = \"curl_error\" ]; then\n              echo \"Curl encountered an error; retrying...\"\n            else\n              echo \"Service not ready yet (HTTP $response). Retrying in 5 seconds...\"\n            fi\n            sleep 5\n          done\n\n      - name: Run Onyx Lite Integration Tests\n        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3\n        with:\n          timeout_minutes: 20\n          max_attempts: 3\n          retry_wait_seconds: 10\n          command: |\n            echo \"Running onyx-lite integration tests...\"\n            docker run --rm --network onyx_default \\\n              --name test-runner \\\n              -e POSTGRES_HOST=relational_db \\\n              -e POSTGRES_USER=postgres \\\n              -e POSTGRES_PASSWORD=password \\\n              -e POSTGRES_DB=postgres \\\n              -e DB_READONLY_USER=db_readonly_user \\\n              -e DB_READONLY_PASSWORD=password \\\n              -e POSTGRES_POOL_PRE_PING=true \\\n              -e POSTGRES_USE_NULL_POOL=true \\\n              -e API_SERVER_HOST=api_server \\\n              -e OPENAI_API_KEY=${OPENAI_API_KEY} \\\n              -e TEST_WEB_HOSTNAME=test-runner \\\n              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \\\n              /app/tests/integration/tests/no_vectordb\n\n      - name: Dump API server logs (onyx-lite)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \\\n            logs --no-color api_server > $GITHUB_WORKSPACE/api_server_onyx_lite.log || true\n\n      - name: Dump all-container logs (onyx-lite)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \\\n            logs --no-color > $GITHUB_WORKSPACE/docker-compose-onyx-lite.log || true\n\n      - name: Upload logs (onyx-lite)\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-all-logs-onyx-lite\n          path: ${{ github.workspace }}/docker-compose-onyx-lite.log\n\n      - name: Stop Docker containers (onyx-lite)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml down -v\n\n  multitenant-tests:\n    needs:\n      [build-backend-image, build-model-server-image, build-integration-image]\n    runs-on:\n      [\n        runs-on,\n        runner=8cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-multitenant-tests\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Start Docker containers for multi-tenant tests\n        env:\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          cd deployment/docker_compose\n          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \\\n          LICENSE_ENFORCEMENT_ENABLED=false \\\n          MULTI_TENANT=true \\\n          AUTH_TYPE=cloud \\\n          REQUIRE_EMAIL_VERIFICATION=false \\\n          DISABLE_TELEMETRY=true \\\n          OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \\\n          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \\\n          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \\\n          DEV_MODE=true \\\n          OPENSEARCH_FOR_ONYX_ENABLED=false \\\n          docker compose -f docker-compose.multitenant-dev.yml up \\\n            relational_db \\\n            index \\\n            cache \\\n            minio \\\n            api_server \\\n            inference_model_server \\\n            indexing_model_server \\\n            background \\\n            -d\n        id: start_docker_multi_tenant\n\n      - name: Wait for service to be ready (multi-tenant)\n        run: |\n          echo \"Starting wait-for-service script for multi-tenant...\"\n          docker logs -f onyx-api_server-1 &\n          start_time=$(date +%s)\n          timeout=300\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. Service did not become ready in 5 minutes.\"\n              exit 1\n            fi\n            response=$(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:8080/health || echo \"curl_error\")\n            if [ \"$response\" = \"200\" ]; then\n              echo \"Service is ready!\"\n              break\n            elif [ \"$response\" = \"curl_error\" ]; then\n              echo \"Curl encountered an error; retrying...\"\n            else\n              echo \"Service not ready yet (HTTP $response). Retrying in 5 seconds...\"\n            fi\n            sleep 5\n          done\n          echo \"Finished waiting for service.\"\n\n      - name: Run Multi-Tenant Integration Tests\n        env:\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          echo \"Running multi-tenant integration tests...\"\n          docker run --rm --network onyx_default \\\n            --name test-runner \\\n            -e POSTGRES_HOST=relational_db \\\n            -e POSTGRES_USER=postgres \\\n            -e POSTGRES_PASSWORD=password \\\n            -e DB_READONLY_USER=db_readonly_user \\\n            -e DB_READONLY_PASSWORD=password \\\n            -e POSTGRES_DB=postgres \\\n            -e POSTGRES_USE_NULL_POOL=true \\\n            -e VESPA_HOST=index \\\n            -e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \\\n            -e REDIS_HOST=cache \\\n            -e API_SERVER_HOST=api_server \\\n            -e OPENAI_API_KEY=${OPENAI_API_KEY} \\\n            -e EXA_API_KEY=${EXA_API_KEY} \\\n            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \\\n            -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \\\n            -e TEST_WEB_HOSTNAME=test-runner \\\n            -e AUTH_TYPE=cloud \\\n            -e MULTI_TENANT=true \\\n            -e SKIP_RESET=true \\\n            -e REQUIRE_EMAIL_VERIFICATION=false \\\n            -e DISABLE_TELEMETRY=true \\\n            -e DEV_MODE=true \\\n            ${ECR_CACHE}:integration-test-${RUN_ID} \\\n            /app/tests/integration/multitenant_tests\n\n      - name: Dump API server logs (multi-tenant)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true\n\n      - name: Dump all-container logs (multi-tenant)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true\n\n      - name: Upload logs (multi-tenant)\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-all-logs-multitenant\n          path: ${{ github.workspace }}/docker-compose-multitenant.log\n\n      - name: Stop multi-tenant Docker containers\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.multitenant-dev.yml down -v\n\n  required:\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 45\n    needs: [integration-tests, onyx-lite-tests, multitenant-tests]\n    if: ${{ always() }}\n    steps:\n      - name: Check job status\n        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}\n        run: exit 1\n"
  },
  {
    "path": ".github/workflows/pr-jest-tests.yml",
    "content": "name: Run Jest Tests\nconcurrency:\n  group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  jest-tests:\n    name: Jest Tests\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4\n        with:\n          node-version: 22\n          cache: \"npm\" # zizmor: ignore[cache-poisoning] test-only workflow; no deploy artifacts\n          cache-dependency-path: ./web/package-lock.json\n\n      - name: Install node dependencies\n        working-directory: ./web\n        run: npm ci\n\n      - name: Run Jest tests\n        working-directory: ./web\n        run: npm test -- --ci --coverage --maxWorkers=50%\n\n      - name: Upload coverage reports\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: jest-coverage-${{ github.run_id }}\n          path: ./web/coverage\n          retention-days: 7\n"
  },
  {
    "path": ".github/workflows/pr-labeler.yml",
    "content": "name: PR Labeler\n\non:\n  pull_request:\n    branches:\n      - main\n    types:\n      - opened\n      - reopened\n      - synchronize\n      - edited\n\npermissions:\n  contents: read\n\njobs:\n  validate_pr_title:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Check PR title for Conventional Commits\n        env:\n          PR_TITLE: ${{ github.event.pull_request.title }}\n        run: |\n          echo \"PR Title: $PR_TITLE\"\n          if [[ ! \"$PR_TITLE\" =~ ^(feat|fix|docs|test|ci|refactor|perf|chore|revert|build)(\\(.+\\))?:\\ .+ ]]; then\n            echo \"::error::❌ Your PR title does not follow the Conventional Commits format.\n              This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.\n\n              Please update your PR title to follow the Conventional Commits style.\n              Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits\n\n              **Here are some examples of valid PR titles:**\n              - feat: add user authentication\n              - fix(login): handle null password error\n              - docs(readme): update installation instructions\"\n            exit 1\n          fi\n"
  },
  {
    "path": ".github/workflows/pr-linear-check.yml",
    "content": "name: Ensure PR references Linear\nconcurrency:\n  group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  pull_request:\n    types: [opened, edited, reopened, synchronize]\n\npermissions:\n  contents: read\n\njobs:\n  linear-check:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - name: Check PR body for Linear link or override\n        env:\n          PR_BODY: ${{ github.event.pull_request.body }}\n        run: |\n          # Looking for \"https://linear.app\" in the body\n          if echo \"$PR_BODY\" | grep -qE \"https://linear\\.app\"; then\n            echo \"Found a Linear link. Check passed.\"\n            exit 0\n          fi\n\n          # Looking for a checked override: \"[x] Override Linear Check\"\n          if echo \"$PR_BODY\" | grep -q \"\\[x\\].*Override Linear Check\"; then\n            echo \"Override box is checked. Check passed.\"\n            exit 0\n          fi\n\n          # Otherwise, fail the run\n          echo \"No Linear link or override found in the PR description.\"\n          exit 1\n"
  },
  {
    "path": ".github/workflows/pr-playwright-tests.yml",
    "content": "name: Run Playwright Tests\nconcurrency:\n  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n    # TODO: Remove this if we enable merge-queues for release branches.\n    branches:\n      - \"release/**\"\n\npermissions:\n  contents: read\n\nenv:\n  # Test Environment Variables\n  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}\n  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}\n  FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }}\n  GOOGLE_PSE_API_KEY: ${{ secrets.GOOGLE_PSE_API_KEY }}\n  GOOGLE_PSE_SEARCH_ENGINE_ID: ${{ secrets.GOOGLE_PSE_SEARCH_ENGINE_ID }}\n\n  # for federated slack tests\n  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}\n  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}\n\n  # for MCP Oauth tests\n  MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}\n  MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}\n  MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}\n  MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}\n  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}\n  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}\n\n  # for MCP API Key tests\n  MCP_API_KEY: test-api-key-12345\n  MCP_API_KEY_TEST_PORT: 8005\n  MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp\n  MCP_API_KEY_SERVER_HOST: 0.0.0.0\n  MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal\n\n  MOCK_LLM_RESPONSE: true\n  MCP_TEST_SERVER_PORT: 8004\n  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp\n  MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp\n  MCP_TEST_SERVER_BIND_HOST: 0.0.0.0\n  MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal\n  MCP_SERVER_HOST: 0.0.0.0\n  MCP_SERVER_PUBLIC_HOST: host.docker.internal\n  MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp\n\n  # Visual regression S3 bucket (shared across all jobs)\n  PLAYWRIGHT_S3_BUCKET: onyx-playwright-artifacts\n\njobs:\n  build-web-image:\n    runs-on:\n      [\n        runs-on,\n        runner=4cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-web-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling external images otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Build and push Web Docker image\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./web\n          file: ./web/Dockerfile\n          platforms: linux/arm64\n          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}\n          push: true\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache\n            type=registry,ref=onyxdotapp/onyx-web-server:latest\n          cache-to: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-backend-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-backend-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling external images otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Build and push Backend Docker image\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile\n          platforms: linux/arm64\n          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}\n          push: true\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache\n            type=registry,ref=onyxdotapp/onyx-backend:latest\n          cache-to: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  build-model-server-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-model-server-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      # needed for pulling external images otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Build and push Model Server Docker image\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend\n          file: ./backend/Dockerfile.model_server\n          platforms: linux/arm64\n          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}\n          push: true\n          cache-from: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache\n            type=registry,ref=onyxdotapp/onyx-model-server:latest\n          cache-to: |\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max\n          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}\n\n  playwright-tests:\n    needs: [build-web-image, build-backend-image, build-model-server-image]\n    name: Playwright Tests (${{ matrix.project }})\n    permissions:\n      id-token: write # Required for OIDC-based AWS credential exchange (S3 access)\n      contents: read\n    runs-on:\n      - runs-on\n      - runner=8cpu-linux-arm64\n      - \"run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}\"\n      - \"extras=ecr-cache\"\n      - volume=50gb\n    timeout-minutes: 45\n    strategy:\n      fail-fast: false\n      matrix:\n        project: [admin, exclusive]\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4\n        with:\n          node-version: 22\n          cache: \"npm\" # zizmor: ignore[cache-poisoning]\n          cache-dependency-path: ./web/package-lock.json\n\n      - name: Install node dependencies\n        working-directory: ./web\n        run: npm ci\n\n      - name: Cache playwright cache\n        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts\n        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4\n        with:\n          path: ~/.cache/ms-playwright\n          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}\n          restore-keys: |\n            ${{ runner.os }}-playwright-npm-\n\n      - name: Install playwright browsers\n        working-directory: ./web\n        run: npx playwright install --with-deps\n\n      - name: Create .env file for Docker Compose\n        env:\n          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}\n          EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          cat <<EOF > deployment/docker_compose/.env\n          COMPOSE_PROFILES=s3-filestore\n          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n          LICENSE_ENFORCEMENT_ENABLED=false\n          AUTH_TYPE=basic\n          INTEGRATION_TESTS_MODE=true\n          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}\n          EXA_API_KEY=${EXA_API_KEY_VALUE}\n          REQUIRE_EMAIL_VERIFICATION=false\n          DISABLE_TELEMETRY=true\n          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}\n          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}\n          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}\n          EOF\n\n      # needed for pulling Vespa, Redis, Postgres, and Minio images\n      # otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Start Docker containers\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d\n        id: start_docker\n\n      - name: Wait for service to be ready\n        run: |\n          echo \"Starting wait-for-service script...\"\n\n          docker logs -f onyx-api_server-1 &\n\n          start_time=$(date +%s)\n          timeout=300  # 5 minutes in seconds\n\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. Service did not become ready in 5 minutes.\"\n              exit 1\n            fi\n\n            # Use curl with error handling to ignore specific exit code 56\n            response=$(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:8080/health || echo \"curl_error\")\n\n            if [ \"$response\" = \"200\" ]; then\n              echo \"Service is ready!\"\n              break\n            elif [ \"$response\" = \"curl_error\" ]; then\n              echo \"Curl encountered an error, possibly exit code 56. Continuing to retry...\"\n            else\n              echo \"Service not ready yet (HTTP status $response). Retrying in 5 seconds...\"\n            fi\n\n            sleep 5\n          done\n          echo \"Finished waiting for service.\"\n\n      - name: Wait for MCP OAuth mock server\n        run: |\n          echo \"Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}...\"\n          start_time=$(date +%s)\n          timeout=120\n\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s.\"\n              exit 1\n            fi\n\n            if curl -sf \"http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz\" > /dev/null; then\n              echo \"MCP OAuth mock server is ready!\"\n              break\n            fi\n\n            sleep 3\n          done\n\n      - name: Wait for MCP API Key mock server\n        run: |\n          echo \"Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}...\"\n          start_time=$(date +%s)\n          timeout=120\n\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. MCP API Key mock server did not become ready in ${timeout}s.\"\n              exit 1\n            fi\n\n            if curl -sf \"http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz\" > /dev/null; then\n              echo \"MCP API Key mock server is ready!\"\n              break\n            fi\n\n            sleep 3\n          done\n\n      - name: Wait for web server to be ready\n        run: |\n          echo \"Waiting for web server on port 3000...\"\n          start_time=$(date +%s)\n          timeout=120\n\n          while true; do\n            current_time=$(date +%s)\n            elapsed_time=$((current_time - start_time))\n\n            if [ $elapsed_time -ge $timeout ]; then\n              echo \"Timeout reached. Web server did not become ready in ${timeout}s.\"\n              exit 1\n            fi\n\n            if curl -sf \"http://localhost:3000/api/health\" > /dev/null 2>&1 || \\\n               curl -sf \"http://localhost:3000/\" > /dev/null 2>&1; then\n              echo \"Web server is ready!\"\n              break\n            fi\n\n            echo \"Web server not ready yet. Retrying in 3 seconds...\"\n            sleep 3\n          done\n\n      - name: Run Playwright tests\n        working-directory: ./web\n        env:\n          PROJECT: ${{ matrix.project }}\n        run: |\n          npx playwright test --project ${PROJECT}\n\n      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        if: always()\n        with:\n          # Includes test results and trace.zip files\n          name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}\n          path: ./web/output/playwright/\n          retention-days: 30\n\n      - name: Upload screenshots\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        if: always()\n        with:\n          name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}\n          path: ./web/output/screenshots/\n          retention-days: 30\n\n      # --- Visual Regression Diff ---\n      - name: Configure AWS credentials\n        if: always()\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Install the latest version of uv\n        if: always()\n        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          enable-cache: false\n          version: \"0.9.9\"\n\n      - name: Determine baseline revision\n        if: always()\n        id: baseline-rev\n        env:\n          EVENT_NAME: ${{ github.event_name }}\n          BASE_REF: ${{ github.event.pull_request.base.ref }}\n          MERGE_GROUP_BASE_REF: ${{ github.event.merge_group.base_ref }}\n          GH_REF: ${{ github.ref }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ \"${EVENT_NAME}\" = \"pull_request\" ]; then\n            # PRs compare against the base branch (e.g. main, release/2.5)\n            echo \"rev=${BASE_REF}\" >> \"$GITHUB_OUTPUT\"\n          elif [ \"${EVENT_NAME}\" = \"merge_group\" ]; then\n            # Merge queue compares against the target branch (e.g. refs/heads/main -> main)\n            echo \"rev=${MERGE_GROUP_BASE_REF#refs/heads/}\" >> \"$GITHUB_OUTPUT\"\n          elif [[ \"${GH_REF}\" == refs/tags/* ]]; then\n            # Tag builds compare against the tag name\n            echo \"rev=${REF_NAME}\" >> \"$GITHUB_OUTPUT\"\n          else\n            # Push builds (main, release/*) compare against the branch name\n            echo \"rev=${REF_NAME}\" >> \"$GITHUB_OUTPUT\"\n          fi\n\n      - name: Generate screenshot diff report\n        if: always()\n        env:\n          PROJECT: ${{ matrix.project }}\n          PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}\n          BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}\n        run: |\n          uv run --no-sync --with onyx-devtools ods screenshot-diff compare \\\n            --project \"${PROJECT}\" \\\n            --rev \"${BASELINE_REV}\"\n\n      - name: Upload visual diff report to S3\n        if: always()\n        env:\n          PROJECT: ${{ matrix.project }}\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          SUMMARY_FILE=\"web/output/screenshot-diff/${PROJECT}/summary.json\"\n          if [ ! -f \"${SUMMARY_FILE}\" ]; then\n            echo \"No summary file found — skipping S3 upload.\"\n            exit 0\n          fi\n\n          HAS_DIFF=$(jq -r '.has_differences' \"${SUMMARY_FILE}\")\n          if [ \"${HAS_DIFF}\" != \"true\" ]; then\n            echo \"No visual differences for ${PROJECT} — skipping S3 upload.\"\n            exit 0\n          fi\n\n          aws s3 sync \"web/output/screenshot-diff/${PROJECT}/\" \\\n            \"s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/\"\n\n      - name: Upload visual diff summary\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        if: always()\n        with:\n          name: screenshot-diff-summary-${{ matrix.project }}\n          path: ./web/output/screenshot-diff/${{ matrix.project }}/summary.json\n          if-no-files-found: ignore\n          retention-days: 5\n\n      - name: Upload visual diff report artifact\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        if: always()\n        with:\n          name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}\n          path: ./web/output/screenshot-diff/${{ matrix.project }}/\n          if-no-files-found: ignore\n          retention-days: 30\n\n      - name: Update S3 baselines\n        if: >-\n          success() && (\n            github.ref == 'refs/heads/main' ||\n            startsWith(github.ref, 'refs/heads/release/') ||\n            startsWith(github.ref, 'refs/tags/v') ||\n            (\n              github.event_name == 'merge_group' && (\n                github.event.merge_group.base_ref == 'refs/heads/main' ||\n                startsWith(github.event.merge_group.base_ref, 'refs/heads/release/')\n              )\n            )\n          )\n        env:\n          PROJECT: ${{ matrix.project }}\n          PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}\n          BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}\n        run: |\n          if [ -d \"web/output/screenshots/\" ] && [ \"$(ls -A web/output/screenshots/)\" ]; then\n            uv run --no-sync --with onyx-devtools ods screenshot-diff upload-baselines \\\n              --project \"${PROJECT}\" \\\n              --rev \"${BASELINE_REV}\" \\\n              --delete\n          else\n            echo \"No screenshots to upload for ${PROJECT} — skipping baseline update.\"\n          fi\n\n      # save before stopping the containers so the logs can be captured\n      - name: Save Docker logs\n        if: success() || failure()\n        env:\n          WORKSPACE: ${{ github.workspace }}\n        run: |\n          cd deployment/docker_compose\n          docker compose logs > docker-compose.log\n          mv docker-compose.log ${WORKSPACE}/docker-compose.log\n\n      - name: Upload logs\n        if: success() || failure()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}\n          path: ${{ github.workspace }}/docker-compose.log\n\n  playwright-tests-lite:\n    needs: [build-web-image, build-backend-image]\n    name: Playwright Tests (lite)\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - \"run-id=${{ github.run_id }}-playwright-tests-lite\"\n      - \"extras=ecr-cache\"\n    timeout-minutes: 30\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4\n        with:\n          node-version: 22\n          cache: \"npm\" # zizmor: ignore[cache-poisoning]\n          cache-dependency-path: ./web/package-lock.json\n\n      - name: Install node dependencies\n        working-directory: ./web\n        run: npm ci\n\n      - name: Cache playwright cache\n        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts\n        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4\n        with:\n          path: ~/.cache/ms-playwright\n          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}\n          restore-keys: |\n            ${{ runner.os }}-playwright-npm-\n\n      - name: Install playwright browsers\n        working-directory: ./web\n        run: npx playwright install --with-deps\n\n      - name: Create .env file for Docker Compose\n        env:\n          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}\n          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          cat <<EOF > deployment/docker_compose/.env\n          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n          LICENSE_ENFORCEMENT_ENABLED=false\n          AUTH_TYPE=basic\n          INTEGRATION_TESTS_MODE=true\n          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}\n          MOCK_LLM_RESPONSE=true\n          REQUIRE_EMAIL_VERIFICATION=false\n          DISABLE_TELEMETRY=true\n          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}\n          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}\n          EOF\n\n      # needed for pulling external images otherwise, we hit the \"Unauthenticated users\" limit\n      # https://docs.docker.com/docker-hub/usage/\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Start Docker containers (lite)\n        run: |\n          cd deployment/docker_compose\n          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up -d\n        id: start_docker\n\n      - name: Run Playwright tests (lite)\n        working-directory: ./web\n        run: npx playwright test --project lite\n\n      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        if: always()\n        with:\n          name: playwright-test-results-lite-${{ github.run_id }}\n          path: ./web/output/playwright/\n          retention-days: 30\n\n      - name: Save Docker logs\n        if: success() || failure()\n        env:\n          WORKSPACE: ${{ github.workspace }}\n        run: |\n          cd deployment/docker_compose\n          docker compose logs > docker-compose.log\n          mv docker-compose.log ${WORKSPACE}/docker-compose.log\n\n      - name: Upload logs\n        if: success() || failure()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-logs-lite-${{ github.run_id }}\n          path: ${{ github.workspace }}/docker-compose.log\n\n  # Post a single combined visual regression comment after all matrix jobs finish\n  visual-regression-comment:\n    needs: [playwright-tests]\n    if: >-\n      always() &&\n      github.event_name == 'pull_request' &&\n      needs.playwright-tests.result != 'cancelled'\n    runs-on: ubuntu-slim\n    timeout-minutes: 5\n    permissions:\n      pull-requests: write\n    steps:\n      - name: Download visual diff summaries\n        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3\n        with:\n          pattern: screenshot-diff-summary-*\n          path: summaries/\n\n      - name: Post combined PR comment\n        env:\n          GH_TOKEN: ${{ github.token }}\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          RUN_ID: ${{ github.run_id }}\n          REPO: ${{ github.repository }}\n          S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}\n        run: |\n          MARKER=\"<!-- visual-regression-report -->\"\n\n          # Build the markdown table from all summary files\n          TABLE_HEADER=\"| Project | Changed | Added | Removed | Unchanged | Report |\"\n          TABLE_DIVIDER=\"|---------|---------|-------|---------|-----------|--------|\"\n          TABLE_ROWS=\"\"\n          HAS_ANY_SUMMARY=false\n\n          for SUMMARY_DIR in summaries/screenshot-diff-summary-*/; do\n            SUMMARY_FILE=\"${SUMMARY_DIR}summary.json\"\n            if [ ! -f \"${SUMMARY_FILE}\" ]; then\n              continue\n            fi\n\n            HAS_ANY_SUMMARY=true\n            PROJECT=$(jq -r '.project' \"${SUMMARY_FILE}\")\n            CHANGED=$(jq -r '.changed' \"${SUMMARY_FILE}\")\n            ADDED=$(jq -r '.added' \"${SUMMARY_FILE}\")\n            REMOVED=$(jq -r '.removed' \"${SUMMARY_FILE}\")\n            UNCHANGED=$(jq -r '.unchanged' \"${SUMMARY_FILE}\")\n            TOTAL=$(jq -r '.total' \"${SUMMARY_FILE}\")\n            HAS_DIFF=$(jq -r '.has_differences' \"${SUMMARY_FILE}\")\n\n            if [ \"${TOTAL}\" = \"0\" ]; then\n              REPORT_LINK=\"_No screenshots_\"\n            elif [ \"${HAS_DIFF}\" = \"true\" ]; then\n              REPORT_URL=\"https://${S3_BUCKET}.s3.us-east-2.amazonaws.com/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/index.html\"\n              REPORT_LINK=\"[View Report](${REPORT_URL})\"\n            else\n              REPORT_LINK=\"✅ No changes\"\n            fi\n\n            TABLE_ROWS=\"${TABLE_ROWS}| \\`${PROJECT}\\` | ${CHANGED} | ${ADDED} | ${REMOVED} | ${UNCHANGED} | ${REPORT_LINK} |\\n\"\n          done\n\n          if [ \"${HAS_ANY_SUMMARY}\" = \"false\" ]; then\n            echo \"No visual diff summaries found — skipping PR comment.\"\n            exit 0\n          fi\n\n          BODY=$(printf '%s\\n' \\\n            \"${MARKER}\" \\\n            \"### 🖼️ Visual Regression Report\" \\\n            \"\" \\\n            \"${TABLE_HEADER}\" \\\n            \"${TABLE_DIVIDER}\" \\\n            \"$(printf '%b' \"${TABLE_ROWS}\")\")\n\n          # Upsert: find existing comment with the marker, or create a new one\n          EXISTING_COMMENT_ID=$(gh api \\\n            \"repos/${REPO}/issues/${PR_NUMBER}/comments\" \\\n            --jq \".[] | select(.body | startswith(\\\"${MARKER}\\\")) | .id\" \\\n            2>/dev/null | head -1)\n\n          if [ -n \"${EXISTING_COMMENT_ID}\" ]; then\n            gh api \\\n              --method PATCH \\\n              \"repos/${REPO}/issues/comments/${EXISTING_COMMENT_ID}\" \\\n              -f body=\"${BODY}\"\n          else\n            gh api \\\n              --method POST \\\n              \"repos/${REPO}/issues/${PR_NUMBER}/comments\" \\\n              -f body=\"${BODY}\"\n          fi\n\n  playwright-required:\n    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.\n    runs-on: ubuntu-slim\n    timeout-minutes: 45\n    needs: [playwright-tests, playwright-tests-lite]\n    if: ${{ always() }}\n    steps:\n      - name: Check job status\n        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}\n        run: exit 1\n"
  },
  {
    "path": ".github/workflows/pr-python-checks.yml",
    "content": "name: Python Checks\nconcurrency:\n  group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - \"release/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  mypy-check:\n    # See https://runs-on.com/runners/linux/\n    # Note: Mypy seems quite optimized for x64 compared to arm64.\n    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.\n    runs-on:\n      [\n        runs-on,\n        runner=2cpu-linux-x64,\n        \"run-id=${{ github.run_id }}-mypy-check\",\n        \"extras=s3-cache\",\n      ]\n    timeout-minutes: 45\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup Python and Install Dependencies\n        uses: ./.github/actions/setup-python-and-install-dependencies\n        with:\n          requirements: |\n            backend/requirements/default.txt\n            backend/requirements/dev.txt\n            backend/requirements/model_server.txt\n            backend/requirements/ee.txt\n\n      - name: Generate OpenAPI schema and Python client\n        shell: bash\n        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n        env:\n          LICENSE_ENFORCEMENT_ENABLED: \"false\"\n        run: |\n          ods openapi all\n\n      - name: Cache mypy cache\n        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}\n        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4\n        with:\n          path: .mypy_cache\n          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}\n          restore-keys: |\n            mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-\n            mypy-${{ runner.os }}-\n\n      - name: Run MyPy\n        env:\n          MYPY_FORCE_COLOR: 1\n          TERM: xterm-256color\n        run: mypy .\n"
  },
  {
    "path": ".github/workflows/pr-python-connector-tests.yml",
    "content": "name: Connector Tests\nconcurrency:\n  group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches: [main]\n    paths:\n      - \"backend/**\"\n      - \"pyproject.toml\"\n      - \"uv.lock\"\n      - \".github/workflows/pr-python-connector-tests.yml\"\n      - \".github/actions/setup-python-and-install-dependencies/**\"\n      - \".github/actions/setup-playwright/**\"\n  push:\n    tags:\n      - \"v*.*.*\"\n  schedule:\n    # This cron expression runs the job daily at 16:00 UTC (9am PT)\n    - cron: \"0 16 * * *\"\n\npermissions:\n  id-token: write # Required for OIDC-based AWS credential exchange\n  contents: read\n\nenv:\n  PYTHONPATH: ./backend\n  DISABLE_TELEMETRY: \"true\"\n  R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS: ${{ vars.R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS }}\n  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}\n  CONFLUENCE_TEST_SPACE: ${{ vars.CONFLUENCE_TEST_SPACE }}\n  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}\n  SF_USERNAME: ${{ vars.SF_USERNAME }}\n  IMAP_HOST: ${{ vars.IMAP_HOST }}\n  IMAP_USERNAME: ${{ vars.IMAP_USERNAME }}\n  IMAP_MAILBOXES: ${{ vars.IMAP_MAILBOXES }}\n  AIRTABLE_TEST_BASE_ID: ${{ vars.AIRTABLE_TEST_BASE_ID }}\n  AIRTABLE_TEST_TABLE_ID: ${{ vars.AIRTABLE_TEST_TABLE_ID }}\n  AIRTABLE_TEST_TABLE_NAME: ${{ vars.AIRTABLE_TEST_TABLE_NAME }}\n  SHAREPOINT_CLIENT_ID: ${{ vars.SHAREPOINT_CLIENT_ID }}\n  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}\n  SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}\n  BITBUCKET_EMAIL: ${{ vars.BITBUCKET_EMAIL }}\n\njobs:\n  connectors-check:\n    # See https://runs-on.com/runners/linux/\n    runs-on:\n      [\n        runs-on,\n        runner=8cpu-linux-x64,\n        \"run-id=${{ github.run_id }}-connectors-check\",\n        \"extras=s3-cache\",\n      ]\n    timeout-minutes: 45\n    environment: ci-protected\n\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup Python and Install Dependencies\n        uses: ./.github/actions/setup-python-and-install-dependencies\n        with:\n          requirements: |\n            backend/requirements/default.txt\n            backend/requirements/dev.txt\n\n      - name: Setup Playwright\n        uses: ./.github/actions/setup-playwright\n\n      - name: Detect Connector changes\n        id: changes\n        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3\n        with:\n          filters: |\n            hubspot:\n              - 'backend/onyx/connectors/hubspot/**'\n              - 'backend/tests/daily/connectors/hubspot/**'\n              - 'uv.lock'\n            salesforce:\n              - 'backend/onyx/connectors/salesforce/**'\n              - 'backend/tests/daily/connectors/salesforce/**'\n              - 'uv.lock'\n            github:\n              - 'backend/onyx/connectors/github/**'\n              - 'backend/tests/daily/connectors/github/**'\n              - 'uv.lock'\n            file_processing:\n              - 'backend/onyx/file_processing/**'\n              - 'uv.lock'\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v4\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get connector test secrets from AWS Secrets Manager\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2\n        with:\n          parse-json-secrets: false\n          secret-ids: |\n            AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/aws-access-key-id\n            AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/aws-secret-access-key\n            R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/r2-access-key-id\n            R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/r2-secret-access-key\n            GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS, test/gcs-access-key-id\n            GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS, test/gcs-secret-access-key\n            CONFLUENCE_ACCESS_TOKEN, test/confluence-access-token\n            CONFLUENCE_ACCESS_TOKEN_SCOPED, test/confluence-access-token-scoped\n            JIRA_BASE_URL, test/jira-base-url\n            JIRA_USER_EMAIL, test/jira-user-email\n            JIRA_API_TOKEN, test/jira-api-token\n            JIRA_API_TOKEN_SCOPED, test/jira-api-token-scoped\n            GONG_ACCESS_KEY, test/gong-access-key\n            GONG_ACCESS_KEY_SECRET, test/gong-access-key-secret\n            GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR, test/google-drive-service-account-json\n            GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1, test/google-drive-oauth-creds-test-user-1\n            GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR, test/google-drive-oauth-creds\n            GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR, test/google-gmail-service-account-json\n            GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR, test/google-gmail-oauth-creds\n            SLAB_BOT_TOKEN, test/slab-bot-token\n            ZENDESK_SUBDOMAIN, test/zendesk-subdomain\n            ZENDESK_EMAIL, test/zendesk-email\n            ZENDESK_TOKEN, test/zendesk-token\n            SF_PASSWORD, test/sf-password\n            SF_SECURITY_TOKEN, test/sf-security-token\n            HUBSPOT_ACCESS_TOKEN, test/hubspot-access-token\n            IMAP_PASSWORD, test/imap-password\n            AIRTABLE_ACCESS_TOKEN, test/airtable-access-token\n            SHAREPOINT_CLIENT_SECRET, test/sharepoint-client-secret\n            PERM_SYNC_SHAREPOINT_CLIENT_ID, test/perm-sync-sharepoint-client-id\n            PERM_SYNC_SHAREPOINT_PRIVATE_KEY, test/perm-sync-sharepoint-private-key\n            PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD, test/perm-sync-sharepoint-cert-password\n            PERM_SYNC_SHAREPOINT_DIRECTORY_ID, test/perm-sync-sharepoint-directory-id\n            ACCESS_TOKEN_GITHUB, test/github-access-token\n            GITLAB_ACCESS_TOKEN, test/gitlab-access-token\n            GITBOOK_SPACE_ID, test/gitbook-space-id\n            GITBOOK_API_KEY, test/gitbook-api-key\n            NOTION_INTEGRATION_TOKEN, test/notion-integration-token\n            HIGHSPOT_KEY, test/highspot-key\n            HIGHSPOT_SECRET, test/highspot-secret\n            SLACK_BOT_TOKEN, test/slack-bot-token\n            DISCORD_CONNECTOR_BOT_TOKEN, test/discord-bot-token\n            TEAMS_APPLICATION_ID, test/teams-application-id\n            TEAMS_DIRECTORY_ID, test/teams-directory-id\n            TEAMS_SECRET, test/teams-secret\n            BITBUCKET_WORKSPACE, test/bitbucket-workspace\n            BITBUCKET_API_TOKEN, test/bitbucket-api-token\n            FIREFLIES_API_KEY, test/fireflies-api-key\n\n      - name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)\n        shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n        run: |\n          py.test \\\n            -n 8 \\\n            --dist loadfile \\\n            --durations=8 \\\n            -o junit_family=xunit2 \\\n            -xv \\\n            --ff \\\n            backend/tests/daily/connectors \\\n            --ignore backend/tests/daily/connectors/hubspot \\\n            --ignore backend/tests/daily/connectors/salesforce \\\n            --ignore backend/tests/daily/connectors/github \\\n            --ignore backend/tests/daily/connectors/coda\n\n      - name: Run HubSpot Connector Tests\n        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}\n        shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n        run: |\n          py.test \\\n            -n 8 \\\n            --dist loadfile \\\n            --durations=8 \\\n            -o junit_family=xunit2 \\\n            -xv \\\n            --ff \\\n            backend/tests/daily/connectors/hubspot\n\n      - name: Run Salesforce Connector Tests\n        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.salesforce == 'true' || steps.changes.outputs.file_processing == 'true' }}\n        shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n        run: |\n          py.test \\\n            -n 8 \\\n            --dist loadfile \\\n            --durations=8 \\\n            -o junit_family=xunit2 \\\n            -xv \\\n            --ff \\\n            backend/tests/daily/connectors/salesforce\n\n      - name: Run GitHub Connector Tests\n        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.github == 'true' || steps.changes.outputs.file_processing == 'true' }}\n        shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n        run: |\n          py.test \\\n            -n 8 \\\n            --dist loadfile \\\n            --durations=8 \\\n            -o junit_family=xunit2 \\\n            -xv \\\n            --ff \\\n            backend/tests/daily/connectors/github\n\n      - name: Alert on Failure\n        if: failure() && github.event_name == 'schedule'\n        env:\n          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}\n          REPO: ${{ github.repository }}\n          RUN_ID: ${{ github.run_id }}\n        run: |\n          curl -X POST \\\n            -H 'Content-type: application/json' \\\n            --data \"{\\\"text\\\":\\\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\\\"}\" \\\n            $SLACK_WEBHOOK\n"
  },
  {
    "path": ".github/workflows/pr-python-model-tests.yml",
    "content": "name: Model Server Tests\n\non:\n  schedule:\n    # This cron expression runs the job daily at 16:00 UTC (9am PT)\n    - cron: \"0 16 * * *\"\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\nenv:\n  # Bedrock\n  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}\n  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}\n  AWS_REGION_NAME: ${{ vars.AWS_REGION_NAME }}\n\n  # API keys for testing\n  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}\n  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}\n  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}\n  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}\n  AZURE_API_URL: ${{ vars.AZURE_API_URL }}\n\njobs:\n  model-check:\n    # See https://runs-on.com/runners/linux/\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - \"run-id=${{ github.run_id }}-model-check\"\n      - \"extras=ecr-cache\"\n    environment: ci-protected\n    timeout-minutes: 45\n\n    env:\n      PYTHONPATH: ./backend\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Setup Python and Install Dependencies\n        uses: ./.github/actions/setup-python-and-install-dependencies\n        with:\n          requirements: |\n            backend/requirements/default.txt\n            backend/requirements/dev.txt\n\n      - name: Format branch name for cache\n        id: format-branch\n        env:\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n          REF_NAME: ${{ github.ref_name }}\n        run: |\n          if [ -n \"${PR_NUMBER}\" ]; then\n            CACHE_SUFFIX=\"${PR_NUMBER}\"\n          else\n            # shellcheck disable=SC2001\n            CACHE_SUFFIX=$(echo \"${REF_NAME}\" | sed 's/[^A-Za-z0-9._-]/-/g')\n          fi\n          echo \"cache-suffix=${CACHE_SUFFIX}\" >> $GITHUB_OUTPUT\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_TOKEN }}\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f\n\n      - name: Build and load\n        uses: docker/bake-action@82490499d2e5613fcead7e128237ef0b0ea210f7 # ratchet:docker/bake-action@v7.0.0\n        env:\n          TAG: model-server-${{ github.run_id }}\n        with:\n          load: true\n          targets: model-server\n          set: |\n            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}\n            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}\n            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache\n            model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest\n            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max\n            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max\n            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max\n\n      - name: Start Docker containers\n        id: start_docker\n        env:\n          IMAGE_TAG: model-server-${{ github.run_id }}\n        run: |\n          cd deployment/docker_compose\n          docker compose \\\n            -f docker-compose.yml \\\n            -f docker-compose.dev.yml \\\n            up -d --wait \\\n            inference_model_server\n\n      - name: Run Tests\n        run: |\n          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm\n          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding\n\n      - name: Alert on Failure\n        if: failure() && github.event_name == 'schedule'\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.SLACK_WEBHOOK }}\n          failed-jobs: model-check\n          title: \"🚨 Scheduled Model Tests failed!\"\n          ref-name: ${{ github.ref_name }}\n\n      - name: Dump all-container logs (optional)\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true\n\n      - name: Upload logs\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-all-logs\n          path: ${{ github.workspace }}/docker-compose.log\n"
  },
  {
    "path": ".github/workflows/pr-python-tests.yml",
    "content": "name: Python Unit Tests\nconcurrency:\n  group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request:\n    branches:\n      - main\n      - 'release/**'\n  push:\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  backend-check:\n    # See https://runs-on.com/runners/linux/\n    runs-on: [runs-on, runner=2cpu-linux-arm64, \"run-id=${{ github.run_id }}-backend-check\"]\n    timeout-minutes: 45\n\n\n    env:\n      PYTHONPATH: ./backend\n      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}\n      DISABLE_TELEMETRY: \"true\"\n      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n      LICENSE_ENFORCEMENT_ENABLED: \"false\"\n\n    steps:\n    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n    - name: Checkout code\n      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n      with:\n        persist-credentials: false\n\n    - name: Setup Python and Install Dependencies\n      uses: ./.github/actions/setup-python-and-install-dependencies\n      with:\n        requirements: |\n          backend/requirements/default.txt\n          backend/requirements/dev.txt\n          backend/requirements/model_server.txt\n          backend/requirements/ee.txt\n\n    - name: Run Tests\n      shell: script -q -e -c \"bash --noprofile --norc -eo pipefail {0}\"\n      run: py.test -o junit_family=xunit2 -xv --ff backend/tests/unit\n"
  },
  {
    "path": ".github/workflows/pr-quality-checks.yml",
    "content": "name: Quality Checks PR\nconcurrency:\n  group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}\n  cancel-in-progress: true\n\non:\n  merge_group:\n  pull_request: null\n  push:\n    branches:\n      - main\n    tags:\n      - \"v*.*.*\"\n\npermissions:\n  contents: read\n\njobs:\n  quality-checks:\n    runs-on: ubuntu-latest\n    timeout-minutes: 45\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          fetch-depth: 0\n          persist-credentials: false\n      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6\n        with:\n          python-version: \"3.11\"\n      - name: Setup Terraform\n        uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # ratchet:hashicorp/setup-terraform@v4.0.0\n      - name: Setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6\n        with: # zizmor: ignore[cache-poisoning]\n          node-version: 22\n          cache: \"npm\"\n          cache-dependency-path: ./web/package-lock.json\n      - name: Install node dependencies\n        working-directory: ./web\n        run: npm ci\n      - uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1\n        with:\n          prek-version: '0.3.4'\n          extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}\n      - name: Check Actions\n        uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1\n        with:\n          check_permissions: false\n          check_versions: false\n"
  },
  {
    "path": ".github/workflows/preview.yml",
    "content": "name: Preview Deployment\nenv:\n  VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}\n  VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}\n  VERCEL_CLI: vercel@50.14.1\non:\n  push:\n    branches-ignore:\n      - main\n    paths:\n      - \"web/**\"\npermissions:\n  contents: read\n  pull-requests: write\njobs:\n  Deploy-Preview:\n    runs-on: ubuntu-latest\n    timeout-minutes: 30\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4\n        with:\n          node-version: 22\n          cache: \"npm\"\n          cache-dependency-path: ./web/package-lock.json\n\n      - name: Pull Vercel Environment Information\n        run: npx --yes ${{ env.VERCEL_CLI }} pull --yes --environment=preview --token=${{ secrets.VERCEL_TOKEN }}\n\n      - name: Build Project Artifacts\n        run: npx --yes ${{ env.VERCEL_CLI }} build --token=${{ secrets.VERCEL_TOKEN }}\n\n      - name: Deploy Project Artifacts to Vercel\n        id: deploy\n        run: |\n          DEPLOYMENT_URL=$(npx --yes ${{ env.VERCEL_CLI }} deploy --prebuilt --token=${{ secrets.VERCEL_TOKEN }})\n          echo \"url=$DEPLOYMENT_URL\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Update PR comment with deployment URL\n        if: always() && steps.deploy.outputs.url\n        env:\n          GH_TOKEN: ${{ github.token }}\n          DEPLOYMENT_URL: ${{ steps.deploy.outputs.url }}\n        run: |\n          # Find the PR for this branch\n          PR_NUMBER=$(gh pr list --head \"$GITHUB_REF_NAME\" --json number --jq '.[0].number')\n          if [ -z \"$PR_NUMBER\" ]; then\n            echo \"No open PR found for branch $GITHUB_REF_NAME, skipping comment.\"\n            exit 0\n          fi\n\n          COMMENT_MARKER=\"<!-- preview-deployment -->\"\n          COMMENT_BODY=\"$COMMENT_MARKER\n          **Preview Deployment**\n\n          | Status | Preview | Commit | Updated |\n          | --- | --- | --- | --- |\n          | ✅ |  $DEPLOYMENT_URL | \\`${GITHUB_SHA::7}\\` | $(date -u '+%Y-%m-%d %H:%M:%S UTC') |\"\n\n          # Find existing comment by marker\n          EXISTING_COMMENT_ID=$(gh api \"repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments\" \\\n            --jq \".[] | select(.body | startswith(\\\"$COMMENT_MARKER\\\")) | .id\" | head -1)\n\n          if [ -n \"$EXISTING_COMMENT_ID\" ]; then\n            gh api \"repos/$GITHUB_REPOSITORY/issues/comments/$EXISTING_COMMENT_ID\" \\\n              --method PATCH --field body=\"$COMMENT_BODY\"\n          else\n            gh pr comment \"$PR_NUMBER\" --body \"$COMMENT_BODY\"\n          fi\n"
  },
  {
    "path": ".github/workflows/release-cli.yml",
    "content": "name: Release CLI\n\non:\n  push:\n    tags:\n      - \"cli/v*.*.*\"\n\njobs:\n  pypi:\n    runs-on: ubuntu-latest\n    environment:\n      name: release-cli\n    permissions:\n      id-token: write\n    timeout-minutes: 10\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          enable-cache: false\n          version: \"0.9.9\"\n      - run: |\n          for goos in linux windows darwin; do\n            for goarch in amd64 arm64; do\n              GOOS=\"$goos\" GOARCH=\"$goarch\" uv build --wheel\n            done\n          done\n        working-directory: cli\n      - run: uv publish\n        working-directory: cli\n\n  docker-amd64:\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-cli-amd64\n      - extras=ecr-cache\n    environment: deploy\n    permissions:\n      id-token: write\n    timeout-minutes: 30\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-cli\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7\n        with:\n          context: ./cli\n          file: ./cli/Dockerfile\n          platforms: linux/amd64\n          cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: type=inline\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n\n  docker-arm64:\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-arm64\n      - run-id=${{ github.run_id }}-cli-arm64\n      - extras=ecr-cache\n    environment: deploy\n    permissions:\n      id-token: write\n    timeout-minutes: 30\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-cli\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7\n        with:\n          context: ./cli\n          file: ./cli/Dockerfile\n          platforms: linux/arm64\n          cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: type=inline\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n\n  merge-docker:\n    needs:\n      - docker-amd64\n      - docker-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-cli-merge\n    environment: deploy\n    permissions:\n      id-token: write\n    timeout-minutes: 10\n    env:\n      REGISTRY_IMAGE: onyxdotapp/onyx-cli\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Create and push manifest\n        env:\n          AMD64_DIGEST: ${{ needs.docker-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.docker-arm64.outputs.digest }}\n          TAG: ${{ github.ref_name }}\n        run: |\n          SANITIZED_TAG=\"${TAG#cli/}\"\n          IMAGES=(\n            \"${REGISTRY_IMAGE}@${AMD64_DIGEST}\"\n            \"${REGISTRY_IMAGE}@${ARM64_DIGEST}\"\n          )\n\n          if [[ \"$TAG\" =~ ^cli/v[0-9]+\\.[0-9]+\\.[0-9]+$ ]]; then\n            docker buildx imagetools create \\\n              -t \"${REGISTRY_IMAGE}:${SANITIZED_TAG}\" \\\n              -t \"${REGISTRY_IMAGE}:latest\" \\\n              \"${IMAGES[@]}\"\n          else\n            docker buildx imagetools create \\\n              -t \"${REGISTRY_IMAGE}:${SANITIZED_TAG}\" \\\n              \"${IMAGES[@]}\"\n          fi\n"
  },
  {
    "path": ".github/workflows/release-devtools.yml",
    "content": "name: Release Devtools\n\non:\n  push:\n    tags:\n      - \"ods/v*.*.*\"\n\njobs:\n  pypi:\n    runs-on: ubuntu-latest\n    environment:\n      name: release-devtools\n    permissions:\n      id-token: write\n    timeout-minutes: 10\n    strategy:\n      matrix:\n        os-arch:\n          - { goos: \"linux\", goarch: \"amd64\" }\n          - { goos: \"linux\", goarch: \"arm64\" }\n          - { goos: \"windows\", goarch: \"amd64\" }\n          - { goos: \"windows\", goarch: \"arm64\" }\n          - { goos: \"darwin\", goarch: \"amd64\" }\n          - { goos: \"darwin\", goarch: \"arm64\" }\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          enable-cache: false\n          version: \"0.9.9\"\n      - run: |\n          GOOS=\"${{ matrix.os-arch.goos }}\" \\\n          GOARCH=\"${{ matrix.os-arch.goarch }}\" \\\n          uv build --wheel\n        working-directory: tools/ods\n      - run: uv publish\n        working-directory: tools/ods\n"
  },
  {
    "path": ".github/workflows/reusable-nightly-llm-provider-chat.yml",
    "content": "name: Reusable Nightly LLM Provider Chat Tests\n\non:\n  workflow_call:\n    inputs:\n      openai_models:\n        description: \"Comma-separated models for openai\"\n        required: false\n        default: \"\"\n        type: string\n      anthropic_models:\n        description: \"Comma-separated models for anthropic\"\n        required: false\n        default: \"\"\n        type: string\n      bedrock_models:\n        description: \"Comma-separated models for bedrock\"\n        required: false\n        default: \"\"\n        type: string\n      vertex_ai_models:\n        description: \"Comma-separated models for vertex_ai\"\n        required: false\n        default: \"\"\n        type: string\n      azure_models:\n        description: \"Comma-separated models for azure\"\n        required: false\n        default: \"\"\n        type: string\n      ollama_models:\n        description: \"Comma-separated models for ollama_chat\"\n        required: false\n        default: \"\"\n        type: string\n      openrouter_models:\n        description: \"Comma-separated models for openrouter\"\n        required: false\n        default: \"\"\n        type: string\n      azure_api_base:\n        description: \"API base for azure provider\"\n        required: false\n        default: \"\"\n        type: string\n      strict:\n        description: \"Default NIGHTLY_LLM_STRICT passed to tests\"\n        required: false\n        default: true\n        type: boolean\n    secrets:\n      AWS_OIDC_ROLE_ARN:\n        description: \"AWS role ARN for OIDC auth\"\n        required: true\n\npermissions:\n  contents: read\n  id-token: write\n\njobs:\n  build-backend-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-backend-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    environment: ci-protected\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, test/docker-username\n            DOCKER_TOKEN, test/docker-token\n\n      - name: Build backend image\n        uses: ./.github/actions/build-backend-image\n        with:\n          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}\n          ref-name: ${{ github.ref_name }}\n          pr-number: ${{ github.event.pull_request.number }}\n          github-sha: ${{ github.sha }}\n          run-id: ${{ github.run_id }}\n          docker-username: ${{ env.DOCKER_USERNAME }}\n          docker-token: ${{ env.DOCKER_TOKEN }}\n          docker-no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' && 'true' || 'false' }}\n\n  build-model-server-image:\n    runs-on:\n      [\n        runs-on,\n        runner=1cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-model-server-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    environment: ci-protected\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, test/docker-username\n            DOCKER_TOKEN, test/docker-token\n\n      - name: Build model server image\n        uses: ./.github/actions/build-model-server-image\n        with:\n          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}\n          ref-name: ${{ github.ref_name }}\n          pr-number: ${{ github.event.pull_request.number }}\n          github-sha: ${{ github.sha }}\n          run-id: ${{ github.run_id }}\n          docker-username: ${{ env.DOCKER_USERNAME }}\n          docker-token: ${{ env.DOCKER_TOKEN }}\n\n  build-integration-image:\n    runs-on:\n      [\n        runs-on,\n        runner=2cpu-linux-arm64,\n        \"run-id=${{ github.run_id }}-build-integration-image\",\n        \"extras=ecr-cache\",\n      ]\n    timeout-minutes: 45\n    environment: ci-protected\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, test/docker-username\n            DOCKER_TOKEN, test/docker-token\n\n      - name: Build integration image\n        uses: ./.github/actions/build-integration-image\n        with:\n          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}\n          ref-name: ${{ github.ref_name }}\n          pr-number: ${{ github.event.pull_request.number }}\n          github-sha: ${{ github.sha }}\n          run-id: ${{ github.run_id }}\n          docker-username: ${{ env.DOCKER_USERNAME }}\n          docker-token: ${{ env.DOCKER_TOKEN }}\n\n  provider-chat-test:\n    needs:\n      [\n        build-backend-image,\n        build-model-server-image,\n        build-integration-image,\n      ]\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - provider: openai\n            models: ${{ inputs.openai_models }}\n            api_key_env: OPENAI_API_KEY\n            custom_config_env: \"\"\n            api_base: \"\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: true\n          - provider: anthropic\n            models: ${{ inputs.anthropic_models }}\n            api_key_env: ANTHROPIC_API_KEY\n            custom_config_env: \"\"\n            api_base: \"\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: true\n          - provider: bedrock\n            models: ${{ inputs.bedrock_models }}\n            api_key_env: BEDROCK_API_KEY\n            custom_config_env: \"\"\n            api_base: \"\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: false\n          - provider: vertex_ai\n            models: ${{ inputs.vertex_ai_models }}\n            api_key_env: \"\"\n            custom_config_env: NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON\n            api_base: \"\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: false\n          - provider: azure\n            models: ${{ inputs.azure_models }}\n            api_key_env: AZURE_API_KEY\n            custom_config_env: \"\"\n            api_base: ${{ inputs.azure_api_base }}\n            api_version: \"2025-04-01-preview\"\n            deployment_name: \"\"\n            required: false\n          - provider: ollama_chat\n            models: ${{ inputs.ollama_models }}\n            api_key_env: OLLAMA_API_KEY\n            custom_config_env: \"\"\n            api_base: \"https://ollama.com\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: false\n          - provider: openrouter\n            models: ${{ inputs.openrouter_models }}\n            api_key_env: OPENROUTER_API_KEY\n            custom_config_env: \"\"\n            api_base: \"https://openrouter.ai/api/v1\"\n            api_version: \"\"\n            deployment_name: \"\"\n            required: false\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - \"run-id=${{ github.run_id }}-nightly-${{ matrix.provider }}-provider-chat-test\"\n      - extras=ecr-cache\n    timeout-minutes: 45\n    environment: ci-protected\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          # Keep JSON values unparsed so vertex custom config is passed as raw JSON.\n          parse-json-secrets: false\n          secret-ids: |\n            DOCKER_USERNAME, test/docker-username\n            DOCKER_TOKEN, test/docker-token\n            OPENAI_API_KEY, test/openai-api-key\n            ANTHROPIC_API_KEY, test/anthropic-api-key\n            BEDROCK_API_KEY, test/bedrock-api-key\n            NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON, test/nightly-llm-vertex-ai-custom-config-json\n            AZURE_API_KEY, test/azure-api-key\n            OLLAMA_API_KEY, test/ollama-api-key\n            OPENROUTER_API_KEY, test/openrouter-api-key\n\n      - name: Run nightly provider chat test\n        uses: ./.github/actions/run-nightly-provider-chat-test\n        with:\n          provider: ${{ matrix.provider }}\n          models: ${{ matrix.models }}\n          provider-api-key: ${{ matrix.api_key_env && env[matrix.api_key_env] || '' }}\n          strict: ${{ inputs.strict && 'true' || 'false' }}\n          api-base: ${{ matrix.api_base }}\n          api-version: ${{ matrix.api_version }}\n          deployment-name: ${{ matrix.deployment_name }}\n          custom-config-json: ${{ matrix.custom_config_env && env[matrix.custom_config_env] || '' }}\n          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}\n          run-id: ${{ github.run_id }}\n          docker-username: ${{ env.DOCKER_USERNAME }}\n          docker-token: ${{ env.DOCKER_TOKEN }}\n\n      - name: Dump API server logs\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true\n\n      - name: Dump all-container logs\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true\n\n      - name: Upload logs\n        if: always()\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f\n        with:\n          name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider\n          path: |\n            ${{ github.workspace }}/api_server.log\n            ${{ github.workspace }}/docker-compose.log\n\n      - name: Stop Docker containers\n        if: always()\n        run: |\n          cd deployment/docker_compose\n          docker compose down -v\n"
  },
  {
    "path": ".github/workflows/sandbox-deployment.yml",
    "content": "name: Build and Push Sandbox Image on Tag\n\non:\n  push:\n    tags:\n      - \"experimental-cc4a.*\"\n\n# Restrictive defaults; jobs declare what they need.\npermissions: {}\n\njobs:\n  check-sandbox-changes:\n    runs-on: ubuntu-slim\n    timeout-minutes: 10\n    permissions:\n      contents: read\n    outputs:\n      sandbox-changed: ${{ steps.check.outputs.sandbox-changed }}\n      new-version: ${{ steps.version.outputs.new-version }}\n    steps:\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n          fetch-depth: 0\n\n      - name: Check for sandbox-relevant file changes\n        id: check\n        run: |\n          # Get the previous tag to diff against\n          CURRENT_TAG=\"${GITHUB_REF_NAME}\"\n          PREVIOUS_TAG=$(git tag --sort=-creatordate | grep '^experimental-cc4a\\.' | grep -v \"^${CURRENT_TAG}$\" | head -n 1)\n\n          if [ -z \"$PREVIOUS_TAG\" ]; then\n            echo \"No previous experimental-cc4a tag found, building unconditionally\"\n            echo \"sandbox-changed=true\" >> \"$GITHUB_OUTPUT\"\n            exit 0\n          fi\n\n          echo \"Comparing ${PREVIOUS_TAG}..${CURRENT_TAG}\"\n\n          # Check if any sandbox-relevant files changed\n          SANDBOX_PATHS=(\n            \"backend/onyx/server/features/build/sandbox/\"\n          )\n\n          CHANGED=false\n          for path in \"${SANDBOX_PATHS[@]}\"; do\n            if git diff --name-only \"${PREVIOUS_TAG}..${CURRENT_TAG}\" -- \"$path\" | grep -q .; then\n              echo \"Changes detected in: $path\"\n              CHANGED=true\n              break\n            fi\n          done\n\n          echo \"sandbox-changed=$CHANGED\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Determine new sandbox version\n        id: version\n        if: steps.check.outputs.sandbox-changed == 'true'\n        run: |\n          # Query Docker Hub for the latest versioned tag\n          LATEST_TAG=$(curl -s \"https://hub.docker.com/v2/repositories/onyxdotapp/sandbox/tags?page_size=100\" \\\n            | jq -r '.results[].name' \\\n            | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' \\\n            | sort -V \\\n            | tail -n 1)\n\n          if [ -z \"$LATEST_TAG\" ]; then\n            echo \"No existing version tags found on Docker Hub, starting at 0.1.1\"\n            NEW_VERSION=\"0.1.1\"\n          else\n            CURRENT_VERSION=\"${LATEST_TAG#v}\"\n            echo \"Latest version on Docker Hub: $CURRENT_VERSION\"\n\n            # Increment patch version\n            MAJOR=$(echo \"$CURRENT_VERSION\" | cut -d. -f1)\n            MINOR=$(echo \"$CURRENT_VERSION\" | cut -d. -f2)\n            PATCH=$(echo \"$CURRENT_VERSION\" | cut -d. -f3)\n            NEW_PATCH=$((PATCH + 1))\n            NEW_VERSION=\"${MAJOR}.${MINOR}.${NEW_PATCH}\"\n          fi\n\n          echo \"New version: $NEW_VERSION\"\n          echo \"new-version=$NEW_VERSION\" >> \"$GITHUB_OUTPUT\"\n\n  build-sandbox-amd64:\n    needs: check-sandbox-changes\n    if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-x64\n      - run-id=${{ github.run_id }}-sandbox-amd64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    permissions:\n      contents: read\n      id-token: write\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/sandbox\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push AMD64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker\n          file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile\n          platforms: linux/amd64\n          labels: ${{ steps.meta.outputs.labels }}\n          cache-from: |\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n\n  build-sandbox-arm64:\n    needs: check-sandbox-changes\n    if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'\n    runs-on:\n      - runs-on\n      - runner=4cpu-linux-arm64\n      - run-id=${{ github.run_id }}-sandbox-arm64\n      - extras=ecr-cache\n    timeout-minutes: 90\n    environment: release\n    permissions:\n      contents: read\n      id-token: write\n    outputs:\n      digest: ${{ steps.build.outputs.digest }}\n    env:\n      REGISTRY_IMAGE: onyxdotapp/sandbox\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Checkout\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          persist-credentials: false\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Build and push ARM64\n        id: build\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6\n        with:\n          context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker\n          file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile\n          platforms: linux/arm64\n          labels: ${{ steps.meta.outputs.labels }}\n          cache-from: |\n            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest\n          cache-to: |\n            type=inline\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n\n  merge-sandbox:\n    needs:\n      - check-sandbox-changes\n      - build-sandbox-amd64\n      - build-sandbox-arm64\n    runs-on:\n      - runs-on\n      - runner=2cpu-linux-x64\n      - run-id=${{ github.run_id }}-merge-sandbox\n      - extras=ecr-cache\n    timeout-minutes: 30\n    environment: release\n    permissions:\n      id-token: write\n    env:\n      REGISTRY_IMAGE: onyxdotapp/sandbox\n    steps:\n      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7\n        with:\n          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}\n          aws-region: us-east-2\n\n      - name: Get AWS Secrets\n        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802\n        with:\n          secret-ids: |\n            DOCKER_USERNAME, deploy/docker-username\n            DOCKER_TOKEN, deploy/docker-token\n          parse-json-secrets: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3\n\n      - name: Login to Docker Hub\n        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3\n        with:\n          username: ${{ env.DOCKER_USERNAME }}\n          password: ${{ env.DOCKER_TOKEN }}\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          flavor: |\n            latest=false\n          tags: |\n            type=raw,value=v${{ needs.check-sandbox-changes.outputs.new-version }}\n            type=raw,value=latest\n\n      - name: Create and push manifest\n        env:\n          IMAGE_REPO: ${{ env.REGISTRY_IMAGE }}\n          AMD64_DIGEST: ${{ needs.build-sandbox-amd64.outputs.digest }}\n          ARM64_DIGEST: ${{ needs.build-sandbox-arm64.outputs.digest }}\n          META_TAGS: ${{ steps.meta.outputs.tags }}\n        run: |\n          IMAGES=\"${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}\"\n          docker buildx imagetools create \\\n            $(printf '%s\\n' \"${META_TAGS}\" | xargs -I {} echo -t {}) \\\n            $IMAGES\n"
  },
  {
    "path": ".github/workflows/storybook-deploy.yml",
    "content": "name: Storybook Deploy\nenv:\n  VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}\n  VERCEL_PROJECT_ID: prj_sG49mVsA25UsxIPhN2pmBJlikJZM\n  VERCEL_CLI: vercel@50.14.1\n  VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }}\n\nconcurrency:\n  group: storybook-deploy-production\n  cancel-in-progress: true\n\non:\n  workflow_dispatch:\n  push:\n    branches:\n      - main\n    paths:\n      - \"web/lib/opal/**\"\n      - \"web/src/refresh-components/**\"\n      - \"web/.storybook/**\"\n      - \"web/package.json\"\n      - \"web/package-lock.json\"\npermissions:\n  contents: read\njobs:\n  Deploy-Storybook:\n    runs-on: ubuntu-latest\n    environment: ci-protected\n    timeout-minutes: 30\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4\n        with:\n          persist-credentials: false\n\n      - name: Setup node\n        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4\n        with:\n          node-version: 22\n          cache: \"npm\"\n          cache-dependency-path: ./web/package-lock.json\n\n      - name: Install dependencies\n        working-directory: web\n        run: npm ci\n\n      - name: Build Storybook\n        working-directory: web\n        run: npm run storybook:build\n\n      - name: Deploy to Vercel (Production)\n        working-directory: web\n        run: npx --yes \"$VERCEL_CLI\" deploy storybook-static/ --prod --yes --token=\"$VERCEL_TOKEN\"\n\n  notify-slack-on-failure:\n    needs: Deploy-Storybook\n    if: always() && needs.Deploy-Storybook.result == 'failure'\n    runs-on: ubuntu-latest\n    environment: ci-protected\n    timeout-minutes: 10\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4\n        with:\n          persist-credentials: false\n          sparse-checkout: .github/actions/slack-notify\n\n      - name: Send Slack notification\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}\n          failed-jobs: \"• Deploy-Storybook\"\n          title: \"🚨 Storybook Deploy Failed\"\n"
  },
  {
    "path": ".github/workflows/sync_foss.yml",
    "content": "name: Sync FOSS Repo\n\non:\n  schedule:\n    # Run daily at 3am PT (11am UTC during PST)\n    - cron: '0 11 * * *'\n  workflow_dispatch:\n\njobs:\n  sync-foss:\n    runs-on: ubuntu-latest\n    environment: ci-protected\n    timeout-minutes: 45\n    permissions:\n      contents: read\n    steps:\n      - name: Checkout main Onyx repo\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          fetch-depth: 0\n          persist-credentials: false\n\n      - name: Install git-filter-repo\n        run: |\n          sudo apt-get update && sudo apt-get install -y git-filter-repo\n\n      - name: Configure SSH for deploy key\n        env:\n          FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}\n        run: |\n          mkdir -p ~/.ssh\n          echo \"$FOSS_REPO_DEPLOY_KEY\" > ~/.ssh/id_ed25519\n          chmod 600 ~/.ssh/id_ed25519\n          ssh-keyscan github.com >> ~/.ssh/known_hosts\n\n      - name: Set Git config\n        run: |\n          git config --global user.name \"onyx-bot\"\n          git config --global user.email \"bot@onyx.app\"\n\n      - name: Build FOSS version\n        run: bash backend/scripts/make_foss_repo.sh\n\n      - name: Push to FOSS repo\n        env:\n          FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git\n        run: |\n          cd /tmp/foss_repo\n          git remote add public \"$FOSS_REPO_URL\"\n          git push --force public main\n"
  },
  {
    "path": ".github/workflows/tag-nightly.yml",
    "content": "name: Nightly Tag Push\n\non:\n  schedule:\n    - cron: \"0 10 * * *\" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC\n  workflow_dispatch:\n\npermissions:\n  contents: write # Allows pushing tags to the repository\n\njobs:\n  create-and-push-tag:\n    runs-on: ubuntu-slim\n    environment: ci-protected\n    timeout-minutes: 45\n\n    steps:\n      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes\n      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we\n      # implement here which needs an actual user's deploy key\n      - name: Checkout code\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6\n        with:\n          ssh-key: \"${{ secrets.DEPLOY_KEY }}\"\n          persist-credentials: true\n\n      - name: Set up Git user\n        run: |\n          git config user.name \"Onyx Bot [bot]\"\n          git config user.email \"onyx-bot[bot]@onyx.app\"\n\n      - name: Check for existing nightly tag\n        id: check_tag\n        run: |\n          if git tag --points-at HEAD --list \"nightly-latest*\" | grep -q .; then\n            echo \"A tag starting with 'nightly-latest' already exists on HEAD.\"\n            echo \"tag_exists=true\" >> $GITHUB_OUTPUT\n          else\n            echo \"No tag starting with 'nightly-latest' exists on HEAD.\"\n            echo \"tag_exists=false\" >> $GITHUB_OUTPUT\n          fi\n\n      # don't tag again if HEAD already has a nightly-latest tag on it\n      - name: Create Nightly Tag\n        if: steps.check_tag.outputs.tag_exists == 'false'\n        env:\n          DATE: ${{ github.run_id }}\n        run: |\n          TAG_NAME=\"nightly-latest-$(date +'%Y%m%d')\"\n          echo \"Creating tag: $TAG_NAME\"\n          git tag $TAG_NAME\n\n      - name: Push Tag\n        if: steps.check_tag.outputs.tag_exists == 'false'\n        run: |\n          TAG_NAME=\"nightly-latest-$(date +'%Y%m%d')\"\n          git push origin $TAG_NAME\n\n      - name: Send Slack notification\n        if: failure()\n        uses: ./.github/actions/slack-notify\n        with:\n          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}\n          title: \"🚨 Nightly Tag Push Failed\"\n          ref-name: ${{ github.ref_name }}\n          failed-jobs: \"create-and-push-tag\"\n"
  },
  {
    "path": ".github/workflows/zizmor.yml",
    "content": "name: Run Zizmor\n\non:\n  push:\n    branches: [\"main\"]\n  pull_request:\n    branches: [\"**\"]\n    paths:\n      - \".github/**\"\n\npermissions: {}\n\njobs:\n  zizmor:\n    name: zizmor\n    runs-on: ubuntu-slim\n    timeout-minutes: 45\n    permissions:\n      security-events: write # needed for SARIF uploads\n    steps:\n      - name: Checkout repository\n        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2\n        with:\n          persist-credentials: false\n\n      - name: Install the latest version of uv\n        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7\n        with:\n          enable-cache: false\n          version: \"0.9.9\"\n\n      - name: Run zizmor\n        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif\n        env:\n          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Upload SARIF file\n        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5\n        with:\n          sarif_file: results.sarif\n          category: zizmor\n"
  },
  {
    "path": ".gitignore",
    "content": "# editors\n.vscode/*\n!/.vscode/env_template.txt\n!/.vscode/env.web_template.txt\n!/.vscode/launch.json\n!/.vscode/tasks.template.jsonc\n.zed\n.cursor\n!/.cursor/mcp.json\n!/.cursor/skills/\n\n# macos\n.DS_store\n\n# python\n.venv\n.mypy_cache\n.idea\n\n# testing\n/web/test-results/\nbackend/onyx/agent_search/main/test_data.json\nbackend/tests/regression/answer_quality/test_data.json\nbackend/tests/regression/search_quality/eval-*\nbackend/tests/regression/search_quality/search_eval_config.yaml\nbackend/tests/regression/search_quality/*.json\nbackend/onyx/evals/data/\nbackend/onyx/evals/one_off/*.json\n*.log\n*.csv\n\n# secret files\n.env\njira_test_env\nsettings.json\n\n# others\n/deployment/data/nginx/app.conf\n/deployment/data/nginx/mcp.conf.inc\n/deployment/data/nginx/mcp_upstream.conf.inc\n*.sw?\n/backend/tests/regression/answer_quality/search_test_config.yaml\n*.egg-info\n\n# Local .terraform directories\n**/.terraform/*\n\n# Local .tfstate files\n*.tfstate\n*.tfstate.*\n\n# Local .terraform.lock.hcl file\n.terraform.lock.hcl\n\nnode_modules\n\n# MCP configs\n.playwright-mcp\n\n# plans\nplans/\n"
  },
  {
    "path": ".greptile/config.json",
    "content": "{\n    \"labels\": [],\n    \"comment\": \"\",\n    \"fixWithAI\": true,\n    \"hideFooter\": false,\n    \"strictness\": 3,\n    \"statusCheck\": true,\n    \"commentTypes\": [\n      \"logic\",\n      \"syntax\",\n      \"style\"\n    ],\n    \"instructions\": \"\",\n    \"disabledLabels\": [],\n    \"excludeAuthors\": [\n      \"dependabot[bot]\",\n      \"renovate[bot]\"\n    ],\n    \"ignoreKeywords\": \"\",\n    \"ignorePatterns\": \"\",\n    \"includeAuthors\": [],\n    \"summarySection\": {\n      \"included\": true,\n      \"collapsible\": false,\n      \"defaultOpen\": false\n    },\n    \"excludeBranches\": [],\n    \"fileChangeLimit\": 300,\n    \"includeBranches\": [],\n    \"includeKeywords\": \"\",\n    \"triggerOnUpdates\": true,\n    \"updateExistingSummaryComment\": true,\n    \"updateSummaryOnly\": false,\n    \"issuesTableSection\": {\n      \"included\": true,\n      \"collapsible\": false,\n      \"defaultOpen\": false\n    },\n    \"statusCommentsEnabled\": true,\n    \"confidenceScoreSection\": {\n      \"included\": true,\n      \"collapsible\": false\n    },\n    \"sequenceDiagramSection\": {\n      \"included\": true,\n      \"collapsible\": false,\n      \"defaultOpen\": false\n    },\n    \"shouldUpdateDescription\": false,\n    \"rules\": [\n      {\n        \"scope\": [\"web/**\"],\n        \"rule\": \"In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs.\"\n      },\n      {\n        \"scope\": [\"web/**\"],\n        \"rule\": \"In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors.\"\n      },\n      {\n        \"scope\": [\"backend/**/*.py\"],\n        \"rule\": \"Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \\\"message\\\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\\\"error_code\\\": \\\"...\\\", \\\"detail\\\": \\\"...\\\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`.\"\n      }\n    ]\n}\n"
  },
  {
    "path": ".greptile/files.json",
    "content": "[\n  {\n    \"scope\": [],\n    \"path\": \"contributing_guides/best_practices.md\",\n    \"description\": \"Best practices for contributing to the codebase\"\n  },\n  {\n    \"scope\": [\"web/**\"],\n    \"path\": \"web/AGENTS.md\",\n    \"description\": \"Frontend coding standards for the web directory\"\n  },\n  {\n    \"scope\": [\"web/**\"],\n    \"path\": \"web/tests/README.md\",\n    \"description\": \"Frontend testing guide and conventions\"\n  },\n  {\n    \"scope\": [\"web/**\"],\n    \"path\": \"web/CLAUDE.md\",\n    \"description\": \"Single source of truth for frontend coding standards\"\n  },\n  {\n    \"scope\": [\"web/**\"],\n    \"path\": \"web/lib/opal/README.md\",\n    \"description\": \"Opal component library usage guide\"\n  },\n  {\n    \"scope\": [\"backend/**\"],\n    \"path\": \"backend/tests/README.md\",\n    \"description\": \"Backend testing guide covering all 4 test types, fixtures, and conventions\"\n  },\n  {\n    \"scope\": [\"backend/onyx/connectors/**\"],\n    \"path\": \"backend/onyx/connectors/README.md\",\n    \"description\": \"Connector development guide covering design, interfaces, and required changes\"\n  },\n  {\n    \"scope\": [],\n    \"path\": \"CLAUDE.md\",\n    \"description\": \"Project instructions and coding standards\"\n  },\n  {\n    \"scope\": [],\n    \"path\": \"backend/alembic/README.md\",\n    \"description\": \"Migration guidance, including multi-tenant migration behavior\"\n  },\n  {\n    \"scope\": [],\n    \"path\": \"deployment/helm/charts/onyx/values-lite.yaml\",\n    \"description\": \"Lite deployment Helm values and service assumptions\"\n  },\n  {\n    \"scope\": [],\n    \"path\": \"deployment/docker_compose/docker-compose.onyx-lite.yml\",\n    \"description\": \"Lite deployment Docker Compose overlay and disabled service behavior\"\n  }\n]\n"
  },
  {
    "path": ".greptile/rules.md",
    "content": "# Greptile Review Rules\n\n## Type Annotations\n\nUse explicit type annotations for variables to enhance code clarity, especially when moving type hints around in the code.\n\n## Best Practices\n\nUse the \"Engineering Best Practices\" section of `CONTRIBUTING.md` as core review context. Prefer consistency with existing patterns, fix issues in code you touch, avoid tacking new features onto muddy interfaces, fail loudly instead of silently swallowing errors, keep code strictly typed, preserve clear state boundaries, remove duplicate or dead logic, break up overly long functions, avoid hidden import-time side effects, respect module boundaries, and favor correctness-by-construction over relying on callers to use an API correctly.\n\n## TODOs\n\nWhenever a TODO is added, there must always be an associated name or ticket with that TODO in the style of `TODO(name): ...` or `TODO(1234): ...`\n\n## Debugging Code\n\nRemove temporary debugging code before merging to production, especially tenant-specific debugging logs.\n\n## Hardcoded Booleans\n\nWhen hardcoding a boolean variable to a constant value, remove the variable entirely and clean up all places where it's used rather than just setting it to a constant.\n\n## Multi-tenant vs Single-tenant\n\nCode changes must consider both multi-tenant and single-tenant deployments. In multi-tenant mode, preserve tenant isolation, ensure tenant context is propagated correctly, and avoid assumptions that only hold for a single shared schema or globally shared state. In single-tenant mode, avoid introducing unnecessary tenant-specific requirements or cloud-only control-plane dependencies.\n\n## Nginx Routing — New Backend Routes\n\nWhenever a new backend route is added that does NOT start with `/api`, it must also be explicitly added to ALL nginx configs:\n\n- `deployment/helm/charts/onyx/templates/nginx-conf.yaml` (Helm/k8s)\n- `deployment/data/nginx/app.conf.template` (docker-compose dev)\n- `deployment/data/nginx/app.conf.template.prod` (docker-compose prod)\n- `deployment/data/nginx/app.conf.template.no-letsencrypt` (docker-compose no-letsencrypt)\n\nRoutes not starting with `/api` are not caught by the existing `^/(api|openapi\\.json)` location block and will fall through to `location /`, which proxies to the Next.js web server and returns an HTML 404. The new location block must be placed before the `/api` block. Examples of routes that need this treatment: `/scim`, `/mcp`.\n\n## Full vs Lite Deployments\n\nCode changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments.\n\n## SWR Cache Keys — Always Use SWR_KEYS Registry\n\nAll `useSWR()` calls and `mutate()` calls in the frontend must reference the centralized `SWR_KEYS` registry in `web/src/lib/swr-keys.ts` instead of inline endpoint strings or local string constants. Never write `useSWR(\"/api/some/endpoint\", ...)` or `mutate(\"/api/some/endpoint\")` — always use the corresponding `SWR_KEYS.someEndpoint` constant. If the endpoint does not yet exist in the registry, add it there first. This applies to all variants of an endpoint (e.g. query-string variants like `?get_editable=true` must also be registered as their own key).\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "default_install_hook_types:\n  - pre-commit\n  - post-checkout\n  - post-merge\n  - post-rewrite\nrepos:\n  - repo: https://github.com/astral-sh/uv-pre-commit\n    # From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c\n    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c\n    hooks:\n      - id: uv-sync\n        args: [\"--locked\", \"--all-extras\"]\n      - id: uv-lock\n      - id: uv-export\n        name: uv-export default.txt\n        args:\n          [\n            \"--no-emit-project\",\n            \"--no-default-groups\",\n            \"--no-hashes\",\n            \"--extra\",\n            \"backend\",\n            \"-o\",\n            \"backend/requirements/default.txt\",\n          ]\n        files: ^(pyproject\\.toml|uv\\.lock|backend/requirements/.*\\.txt)$\n      - id: uv-export\n        name: uv-export dev.txt\n        args:\n          [\n            \"--no-emit-project\",\n            \"--no-default-groups\",\n            \"--no-hashes\",\n            \"--extra\",\n            \"dev\",\n            \"-o\",\n            \"backend/requirements/dev.txt\",\n          ]\n        files: ^(pyproject\\.toml|uv\\.lock|backend/requirements/.*\\.txt)$\n      - id: uv-export\n        name: uv-export ee.txt\n        args:\n          [\n            \"--no-emit-project\",\n            \"--no-default-groups\",\n            \"--no-hashes\",\n            \"--extra\",\n            \"ee\",\n            \"-o\",\n            \"backend/requirements/ee.txt\",\n          ]\n        files: ^(pyproject\\.toml|uv\\.lock|backend/requirements/.*\\.txt)$\n      - id: uv-export\n        name: uv-export model_server.txt\n        args:\n          [\n            \"--no-emit-project\",\n            \"--no-default-groups\",\n            \"--no-hashes\",\n            \"--extra\",\n            \"model_server\",\n            \"-o\",\n            \"backend/requirements/model_server.txt\",\n          ]\n        files: ^(pyproject\\.toml|uv\\.lock|backend/requirements/.*\\.txt)$\n      - id: uv-run\n        name: Check lazy imports\n        args: [\"--active\", \"--with=onyx-devtools\", \"ods\", \"check-lazy-imports\"]\n        pass_filenames: true\n        files: ^backend/(?!\\.venv/|scripts/).*\\.py$\n      # NOTE: This takes ~6s on a single, large module which is prohibitively slow.\n      # - id: uv-run\n      #   name: mypy\n      #   args: [\"--all-extras\", \"mypy\"]\n      #   pass_filenames: true\n      #   files: ^backend/.*\\.py$\n\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0\n    hooks:\n      - id: check-added-large-files\n        name: Check for added large files\n        args: [\"--maxkb=1500\"]\n\n  - repo: https://github.com/rhysd/actionlint\n    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9\n    hooks:\n      - id: actionlint\n\n  - repo: https://github.com/psf/black\n    rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0\n    hooks:\n      - id: black\n        language_version: python3.11\n\n  # this is a fork which keeps compatibility with black\n  - repo: https://github.com/wimglenn/reorder-python-imports-black\n    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0\n    hooks:\n      - id: reorder-python-imports\n        args: [\"--py311-plus\", \"--application-directories=backend/\"]\n        # need to ignore alembic files, since reorder-python-imports gets confused\n        # and thinks that alembic is a local package since there is a folder\n        # in the backend directory called `alembic`\n        exclude: ^backend/alembic/\n\n  # These settings will remove unused imports with side effects\n  # Note: The repo currently does not and should not have imports with side effects\n  - repo: https://github.com/PyCQA/autoflake\n    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1\n    hooks:\n      - id: autoflake\n        args:\n          [\n            \"--remove-all-unused-imports\",\n            \"--remove-unused-variables\",\n            \"--in-place\",\n            \"--recursive\",\n          ]\n\n  - repo: https://github.com/golangci/golangci-lint\n    rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1\n    hooks:\n      - id: golangci-lint\n        language_version: \"1.26.1\"\n        entry: bash -c \"find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \\\"$(dirname {})\\\" && golangci-lint run ./...'\"\n\n  - repo: https://github.com/astral-sh/ruff-pre-commit\n    # Ruff version.\n    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4\n    hooks:\n      - id: ruff\n\n  - repo: https://github.com/pre-commit/mirrors-prettier\n    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0\n    hooks:\n      - id: prettier\n        types_or: [html, css, javascript, ts, tsx]\n        language_version: system\n\n  - repo: https://github.com/sirwart/ripsecrets\n    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11\n    hooks:\n      - id: ripsecrets\n        args:\n          - --additional-pattern\n          - ^sk-[A-Za-z0-9_\\-]{20,}$\n\n  - repo: local\n    hooks:\n      - id: terraform-fmt\n        name: terraform fmt\n        entry: terraform fmt -recursive\n        language: system\n        pass_filenames: false\n        files: \\.tf$\n\n      - id: npm-install\n        name: npm install\n        description: \"Automatically run 'npm install' after a checkout, pull or rebase\"\n        language: system\n        entry: bash -c 'cd web && npm install --no-save'\n        pass_filenames: false\n        files: ^web/package(-lock)?\\.json$\n        stages: [post-checkout, post-merge, post-rewrite]\n      - id: npm-install-check\n        name: npm install --package-lock-only\n        description: \"Check the 'web/package-lock.json' is updated\"\n        language: system\n        entry: bash -c 'cd web && npm install --package-lock-only'\n        pass_filenames: false\n        files: ^web/package(-lock)?\\.json$\n\n      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.\n      # This is a preview package - if it breaks:\n      #   1. Try updating: cd web && npm update @typescript/native-preview\n      #   2. Or fallback to tsc: replace 'tsgo' with 'tsc' below\n      - id: typescript-check\n        name: TypeScript type check\n        entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'\n        language: system\n        pass_filenames: false\n        files: ^web/.*\\.(ts|tsx)$\n"
  },
  {
    "path": ".prettierignore",
    "content": "backend/tests/integration/tests/pruning/website\n"
  },
  {
    "path": ".vscode/env.web_template.txt",
    "content": "# Copy this file to .env.web in the .vscode folder.\n# Fill in the <REPLACE THIS> values as needed\n# Web Server specific environment variables\n# Minimal set needed for Next.js dev server\n\n# Auth\nAUTH_TYPE=basic\nDEV_MODE=true\n\n# Enable the full set of Danswer Enterprise Edition features.\n# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you\n# are using this for local testing/development).\nENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false\n\n# Enable Onyx Craft\nENABLE_CRAFT=true\n"
  },
  {
    "path": ".vscode/env_template.txt",
    "content": "# Copy this file to .env in the .vscode folder.\n# Fill in the <REPLACE THIS> values as needed; it is recommended to set the\n# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.\n# Also check out onyx/backend/scripts/restart_containers.sh for a script to\n# restart the containers which Onyx relies on outside of VSCode/Cursor\n# processes.\n\n\nAUTH_TYPE=basic\n# Recommended for basic auth - used for signing password reset and verification tokens\n# Generate a secure value with: openssl rand -hex 32\nUSER_AUTH_SECRET=\"\"\nDEV_MODE=true\n\n\n# Always keep these on for Dev.\n# Logs model prompts, reasoning, and answer to stdout.\nLOG_ONYX_MODEL_INTERACTIONS=False\n# More verbose logging\nLOG_LEVEL=debug\n\n\n# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).\nOAUTH_CLIENT_ID=<REPLACE THIS>\nOAUTH_CLIENT_SECRET=<REPLACE THIS>\nOPENID_CONFIG_URL=<REPLACE THIS>\nSAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config\n\n\n# Generally not useful for dev, we don't generally want to set up an SMTP server\n# for dev.\nREQUIRE_EMAIL_VERIFICATION=False\n\n\n# Set these so if you wipe the DB, you don't end up having to go through the UI\n# every time.\nGEN_AI_API_KEY=<REPLACE THIS>\nOPENAI_API_KEY=<REPLACE THIS>\n# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.\nGEN_AI_MODEL_VERSION=gpt-4o\n\n\n# Python stuff\nPYTHONPATH=../backend\nPYTHONUNBUFFERED=1\n\n\n# Enable the full set of Danswer Enterprise Edition features.\n# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you\n# are using this for local testing/development).\nENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False\n\n\n# S3 File Store Configuration (MinIO for local development)\nS3_ENDPOINT_URL=http://localhost:9004\nS3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket\nS3_AWS_ACCESS_KEY_ID=minioadmin\nS3_AWS_SECRET_ACCESS_KEY=minioadmin\n\n\n# Show extra/uncommon connectors.\nSHOW_EXTRA_CONNECTORS=True\n\n\n# Local langsmith tracing\nLANGSMITH_TRACING=\"true\"\nLANGSMITH_ENDPOINT=\"https://api.smith.langchain.com\"\nLANGSMITH_API_KEY=<REPLACE_THIS>\nLANGSMITH_PROJECT=<REPLACE_THIS>\n\n\n# Local Confluence OAuth testing\n# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>\n# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>\n# NEXT_PUBLIC_TEST_ENV=True\n\n\n# OpenSearch\n# Arbitrary password is fine for local development.\nOPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>\n"
  },
  {
    "path": ".vscode/launch.json",
    "content": "{\n  // Use IntelliSense to learn about possible attributes.\n  // Hover to view descriptions of existing attributes.\n  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387\n  \"version\": \"0.2.0\",\n  \"compounds\": [\n    {\n      // Dummy entry used to label the group\n      \"name\": \"--- Compound ---\",\n      \"configurations\": [\"--- Individual ---\"],\n      \"presentation\": {\n        \"group\": \"1\"\n      }\n    },\n    {\n      \"name\": \"Run All Onyx Services\",\n      \"configurations\": [\n        \"Web Server\",\n        \"Model Server\",\n        \"API Server\",\n        \"MCP Server\",\n        \"Slack Bot\",\n        \"Celery primary\",\n        \"Celery light\",\n        \"Celery heavy\",\n        \"Celery docfetching\",\n        \"Celery docprocessing\",\n        \"Celery user_file_processing\",\n        \"Celery beat\"\n      ],\n      \"presentation\": {\n        \"group\": \"1\"\n      }\n    },\n    {\n      \"name\": \"Web / Model / API\",\n      \"configurations\": [\"Web Server\", \"Model Server\", \"API Server\"],\n      \"presentation\": {\n        \"group\": \"1\"\n      }\n    },\n    {\n      \"name\": \"Celery\",\n      \"configurations\": [\n        \"Celery primary\",\n        \"Celery light\",\n        \"Celery heavy\",\n        \"Celery kg_processing\",\n        \"Celery monitoring\",\n        \"Celery user_file_processing\",\n        \"Celery docfetching\",\n        \"Celery docprocessing\",\n        \"Celery beat\"\n      ],\n      \"presentation\": {\n        \"group\": \"1\"\n      },\n      \"stopAll\": true\n    }\n  ],\n  \"configurations\": [\n    {\n      // Dummy entry used to label the group\n      \"name\": \"--- Individual ---\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"presentation\": {\n        \"group\": \"2\",\n        \"order\": 0\n      }\n    },\n    {\n      \"name\": \"Web Server\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"cwd\": \"${workspaceRoot}/web\",\n      \"runtimeExecutable\": \"npm\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env.web\",\n      \"runtimeArgs\": [\"run\", \"dev\"],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"console\": \"integratedTerminal\",\n      \"consoleTitle\": \"Web Server Console\"\n    },\n    {\n      \"name\": \"Model Server\",\n      \"consoleName\": \"Model Server\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"uvicorn\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\"\n      },\n      \"args\": [\"model_server.main:app\", \"--reload\", \"--port\", \"9000\"],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Model Server Console\"\n    },\n    {\n      \"name\": \"API Server\",\n      \"consoleName\": \"API Server\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"uvicorn\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\"\n      },\n      \"args\": [\"onyx.main:app\", \"--reload\", \"--port\", \"8080\"],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"API Server Console\",\n      \"justMyCode\": false\n    },\n    {\n      \"name\": \"Slack Bot\",\n      \"consoleName\": \"Slack Bot\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"onyx/onyxbot/slack/listener.py\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Slack Bot Console\"\n    },\n    {\n      \"name\": \"Discord Bot\",\n      \"consoleName\": \"Discord Bot\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"onyx/onyxbot/discord/client.py\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Discord Bot Console\"\n    },\n    {\n      \"name\": \"MCP Server\",\n      \"consoleName\": \"MCP Server\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"uvicorn\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"MCP_SERVER_ENABLED\": \"true\",\n        \"MCP_SERVER_PORT\": \"8090\",\n        \"MCP_SERVER_CORS_ORIGINS\": \"http://localhost:*\",\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\"\n      },\n      \"args\": [\n        \"onyx.mcp_server.api:mcp_app\",\n        \"--reload\",\n        \"--port\",\n        \"8090\",\n        \"--timeout-graceful-shutdown\",\n        \"0\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"MCP Server Console\"\n    },\n    {\n      \"name\": \"Celery primary\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.primary\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=4\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=primary@%n\",\n        \"-Q\",\n        \"celery\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery primary Console\"\n    },\n    {\n      \"name\": \"Celery light\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.light\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=64\",\n        \"--prefetch-multiplier=8\",\n        \"--loglevel=INFO\",\n        \"--hostname=light@%n\",\n        \"-Q\",\n        \"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup,opensearch_migration\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery light Console\"\n    },\n    {\n      \"name\": \"Celery heavy\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.heavy\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=4\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=heavy@%n\",\n        \"-Q\",\n        \"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery heavy Console\",\n      \"justMyCode\": false\n    },\n    {\n      \"name\": \"Celery kg_processing\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.kg_processing\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=2\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=kg_processing@%n\",\n        \"-Q\",\n        \"kg_processing\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery kg_processing Console\"\n    },\n    {\n      \"name\": \"Celery monitoring\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.monitoring\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=1\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=monitoring@%n\",\n        \"-Q\",\n        \"monitoring\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery monitoring Console\"\n    },\n    {\n      \"name\": \"Celery user_file_processing\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.user_file_processing\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=2\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=user_file_processing@%n\",\n        \"-Q\",\n        \"user_file_processing,user_file_project_sync,user_file_delete\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery user_file_processing Console\",\n      \"justMyCode\": false\n    },\n    {\n      \"name\": \"Celery docfetching\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.docfetching\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=docfetching@%n\",\n        \"-Q\",\n        \"connector_doc_fetching\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery docfetching Console\",\n      \"justMyCode\": false\n    },\n    {\n      \"name\": \"Celery docprocessing\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"ENABLE_MULTIPASS_INDEXING\": \"false\",\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.docprocessing\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=docprocessing@%n\",\n        \"-Q\",\n        \"docprocessing\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery docprocessing Console\",\n      \"justMyCode\": false\n    },\n    {\n      \"name\": \"Celery beat\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"celery\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.beat\",\n        \"beat\",\n        \"--loglevel=INFO\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Celery beat Console\"\n    },\n    {\n      \"name\": \"Pytest\",\n      \"consoleName\": \"Pytest\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"pytest\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"-v\"\n        // Specify a specific module/test to run or provide nothing to run all tests\n        // \"tests/unit/onyx/llm/answering/test_prune_and_merge.py\"\n      ],\n      \"presentation\": {\n        \"group\": \"2\"\n      },\n      \"consoleTitle\": \"Pytest Console\"\n    },\n    {\n      // Dummy entry used to label the group\n      \"name\": \"--- Tasks ---\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"presentation\": {\n        \"group\": \"3\",\n        \"order\": 0\n      }\n    },\n    {\n      \"name\": \"Clear and Restart External Volumes and Containers\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"bash\",\n      \"runtimeArgs\": [\n        \"${workspaceFolder}/backend/scripts/restart_containers.sh\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"3\"\n      }\n    },\n    {\n      \"name\": \"Eval CLI\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"${workspaceFolder}/backend/onyx/evals/eval_cli.py\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"console\": \"integratedTerminal\",\n      \"justMyCode\": false,\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"presentation\": {\n        \"group\": \"3\"\n      },\n      \"env\": {\n        \"LOG_LEVEL\": \"INFO\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\"--verbose\"],\n      \"consoleTitle\": \"Eval CLI Console\"\n    },\n    {\n      // Celery jobs launched through a single background script (legacy)\n      // Recommend using the \"Celery (all)\" compound launch instead.\n      \"name\": \"Background Jobs\",\n      \"consoleName\": \"Background Jobs\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"scripts/dev_run_background_jobs.py\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"LOG_LEVEL\": \"DEBUG\",\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      }\n    },\n    {\n      \"name\": \"Install Python Requirements\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"sync\",\n        \"--all-extras\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"3\"\n      }\n    },\n    {\n      \"name\": \"Build Sandbox Templates\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"module\": \"onyx.server.features.build.sandbox.build_templates\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.vscode/.env\",\n      \"env\": {\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"3\"\n      },\n      \"consoleTitle\": \"Build Sandbox Templates\"\n    },\n    {\n      // Dummy entry used to label the group\n      \"name\": \"--- Database ---\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"presentation\": {\n        \"group\": \"4\",\n        \"order\": 0\n      }\n    },\n    {\n      \"name\": \"Restore seeded database dump\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"run\",\n        \"--with\",\n        \"onyx-devtools\",\n        \"ods\",\n        \"db\",\n        \"restore\",\n        \"--fetch-seeded\",\n        \"--yes\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"4\"\n      }\n    },\n    {\n      \"name\": \"Clean restore seeded database dump (destructive)\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"run\",\n        \"--with\",\n        \"onyx-devtools\",\n        \"ods\",\n        \"db\",\n        \"restore\",\n        \"--fetch-seeded\",\n        \"--clean\",\n        \"--yes\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"4\"\n      }\n    },\n    {\n      \"name\": \"Create database snapshot\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"run\",\n        \"--with\",\n        \"onyx-devtools\",\n        \"ods\",\n        \"db\",\n        \"dump\",\n        \"backup.dump\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"4\"\n      }\n    },\n    {\n      \"name\": \"Clean restore database snapshot (destructive)\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"run\",\n        \"--with\",\n        \"onyx-devtools\",\n        \"ods\",\n        \"db\",\n        \"restore\",\n        \"--clean\",\n        \"--yes\",\n        \"backup.dump\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"4\"\n      }\n    },\n    {\n      \"name\": \"Upgrade database to head revision\",\n      \"type\": \"node\",\n      \"request\": \"launch\",\n      \"runtimeExecutable\": \"uv\",\n      \"runtimeArgs\": [\n        \"run\",\n        \"--with\",\n        \"onyx-devtools\",\n        \"ods\",\n        \"db\",\n        \"upgrade\"\n      ],\n      \"cwd\": \"${workspaceFolder}\",\n      \"console\": \"integratedTerminal\",\n      \"presentation\": {\n        \"group\": \"4\"\n      }\n    },\n    {\n      // script to generate the openapi schema\n      \"name\": \"Onyx OpenAPI Schema Generator\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"backend/scripts/onyx_openapi_schema.py\",\n      \"cwd\": \"${workspaceFolder}\",\n      \"envFile\": \"${workspaceFolder}/.env\",\n      \"env\": {\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \"backend\"\n      },\n      \"args\": [\"--filename\", \"backend/generated/openapi.json\", \"--generate-python-client\"]\n    },\n    {\n      // script to debug multi tenant db issues\n      \"name\": \"Onyx DB Manager (Top Chunks)\",\n      \"type\": \"debugpy\",\n      \"request\": \"launch\",\n      \"program\": \"scripts/debugging/onyx_db.py\",\n      \"cwd\": \"${workspaceFolder}/backend\",\n      \"envFile\": \"${workspaceFolder}/.env\",\n      \"env\": {\n        \"PYTHONUNBUFFERED\": \"1\",\n        \"PYTHONPATH\": \".\"\n      },\n      \"args\": [\n        \"--password\",\n        \"your_password_here\",\n        \"--port\",\n        \"5433\",\n        \"--report\",\n        \"top-chunks\",\n        \"--filename\",\n        \"generated/tenants_by_num_docs.csv\"\n      ]\n    },\n    {\n      \"name\": \"Debug React Web App in Chrome\",\n      \"type\": \"chrome\",\n      \"request\": \"launch\",\n      \"url\": \"http://localhost:3000\",\n      \"webRoot\": \"${workspaceFolder}/web\"\n    }\n  ]\n}\n"
  },
  {
    "path": ".vscode/tasks.template.jsonc",
    "content": "{\n    \"version\": \"2.0.0\",\n    \"tasks\": [\n        {\n            \"type\": \"austin\",\n            \"label\": \"Profile celery beat\",\n            \"envFile\": \"${workspaceFolder}/.env\",\n            \"options\": {\n              \"cwd\": \"${workspaceFolder}/backend\"\n            },\n            \"command\": [\n                \"sudo\",\n                \"-E\"\n            ],\n            \"args\": [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.beat\",\n              \"beat\",\n              \"--loglevel=INFO\"\n            ]\n        },\n        {\n            \"type\": \"shell\",\n            \"label\": \"Generate Onyx OpenAPI Python client\",\n            \"cwd\": \"${workspaceFolder}/backend\",\n            \"envFile\": \"${workspaceFolder}/.env\",\n            \"options\": {\n              \"cwd\": \"${workspaceFolder}/backend\"\n            },\n            \"command\": [\n                \"openapi-generator\"\n            ],\n            \"args\": [\n                \"generate\",\n                \"-i\",\n                \"generated/openapi.json\",\n                \"-g\",\n                \"python\",\n                \"-o\",\n                \"generated/onyx_openapi_client\",\n                \"--package-name\",\n                \"onyx_openapi_client\",\n            ]\n        },\n        {\n            \"type\": \"shell\",\n            \"label\": \"Generate Typescript Fetch client (openapi-generator)\",\n            \"envFile\": \"${workspaceFolder}/.env\",\n            \"options\": {\n              \"cwd\": \"${workspaceFolder}\"\n            },\n            \"command\": [\n                \"openapi-generator\"\n            ],\n            \"args\": [\n                \"generate\",\n                \"-i\",\n                \"backend/generated/openapi.json\",\n                \"-g\",\n                \"typescript-fetch\",\n                \"-o\",\n                \"${workspaceFolder}/web/src/lib/generated/onyx_api\",\n                \"--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true\",\n            ]\n        },\n        {\n            \"type\": \"shell\",\n            \"label\": \"Generate TypeScript Client (openapi-ts)\",\n            \"envFile\": \"${workspaceFolder}/.env\",\n            \"options\": {\n              \"cwd\": \"${workspaceFolder}/web\"\n            },\n            \"command\": [\n                \"npx\"\n            ],\n            \"args\": [\n                \"openapi-typescript\",\n                \"../backend/generated/openapi.json\",\n                \"--output\",\n                \"./src/lib/generated/onyx-schema.ts\",\n            ]\n        },\n        {\n            \"type\": \"shell\",\n            \"label\": \"Generate TypeScript Client (orval)\",\n            \"envFile\": \"${workspaceFolder}/.env\",\n            \"options\": {\n              \"cwd\": \"${workspaceFolder}/web\"\n            },\n            \"command\": [\n                \"npx\"\n            ],\n            \"args\": [\n            \t\"orval\",\n                \"--config\",\n                \"orval.config.js\",\n            ]\n        }\n    ]\n}\n"
  },
  {
    "path": "AGENTS.md",
    "content": "# PROJECT KNOWLEDGE BASE\n\nThis file provides guidance to AI agents when working with code in this repository.\n\n## KEY NOTES\n\n- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \\\n  to assume the python venv.\n- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.\n- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password\n  `a`. The app can be accessed at `http://localhost:3000`.\n- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to\n  make sure we see logs coming out from the relevant service.\n- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c \"<SQL>\"`\n- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`\n- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries\n  outside of those directories.\n\n## Project Overview\n\n**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.\n\n### Background Workers (Celery)\n\nOnyx uses Celery for asynchronous task processing with multiple specialized workers:\n\n#### Worker Types\n\n1. **Primary Worker** (`celery_app.py`)\n   - Coordinates core background tasks and system-wide operations\n   - Handles connector management, document sync, pruning, and periodic checks\n   - Runs with 4 threads concurrency\n   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync\n\n2. **Docfetching Worker** (`docfetching`)\n   - Fetches documents from external data sources (connectors)\n   - Spawns docprocessing tasks for each document batch\n   - Implements watchdog monitoring for stuck connectors\n   - Configurable concurrency (default from env)\n\n3. **Docprocessing Worker** (`docprocessing`)\n   - Processes fetched documents through the indexing pipeline:\n     - Upserts documents to PostgreSQL\n     - Chunks documents and adds contextual information\n     - Embeds chunks via model server\n     - Writes chunks to Vespa vector database\n     - Updates document metadata\n   - Configurable concurrency (default from env)\n\n4. **Light Worker** (`light`)\n   - Handles lightweight, fast operations\n   - Tasks: vespa operations, document permissions sync, external group sync\n   - Higher concurrency for quick tasks\n\n5. **Heavy Worker** (`heavy`)\n   - Handles resource-intensive operations\n   - Primary task: document pruning operations\n   - Runs with 4 threads concurrency\n\n6. **KG Processing Worker** (`kg_processing`)\n   - Handles Knowledge Graph processing and clustering\n   - Builds relationships between documents\n   - Runs clustering algorithms\n   - Configurable concurrency\n\n7. **Monitoring Worker** (`monitoring`)\n   - System health monitoring and metrics collection\n   - Monitors Celery queues, process memory, and system status\n   - Single thread (monitoring doesn't need parallelism)\n   - Cloud-specific monitoring tasks\n\n8. **User File Processing Worker** (`user_file_processing`)\n   - Processes user-uploaded files\n   - Handles user file indexing and project synchronization\n   - Configurable concurrency\n\n9. **Beat Worker** (`beat`)\n   - Celery's scheduler for periodic tasks\n   - Uses DynamicTenantScheduler for multi-tenant support\n   - Schedules tasks like:\n     - Indexing checks (every 15 seconds)\n     - Connector deletion checks (every 20 seconds)\n     - Vespa sync checks (every 20 seconds)\n     - Pruning checks (every 20 seconds)\n     - KG processing (every 60 seconds)\n     - Monitoring tasks (every 5 minutes)\n     - Cleanup tasks (hourly)\n\n#### Key Features\n\n- **Thread-based Workers**: All workers use thread pools (not processes) for stability\n- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a\n  middleware layer that automatically finds the appropriate tenant ID when sending tasks\n  via Celery Beat.\n- **Task Prioritization**: High, Medium, Low priority queues\n- **Monitoring**: Built-in heartbeat and liveness checking\n- **Failure Handling**: Automatic retry and failure recovery mechanisms\n- **Redis Coordination**: Inter-process communication via Redis\n- **PostgreSQL State**: Task state and metadata stored in PostgreSQL\n\n#### Important Notes\n\n**Defining Tasks**:\n\n- Always use `@shared_task` rather than `@celery_app`\n- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`\n- Never enqueue a task without an expiration. Always supply `expires=` when\n  sending tasks, either from the beat schedule or directly from another task. It\n  should never be acceptable to submit code which enqueues tasks without an\n  expiration, as doing so can lead to unbounded task queue growth.\n\n**Defining APIs**:\nWhen creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the\nfunction.\n\n**Testing Updates**:\nIf you make any updates to a celery worker and you want to test these changes, you will need\nto ask me to restart the celery worker. There is no auto-restart on code-change mechanism.\n\n**Task Time Limits**:\nSince all tasks are executed in thread pools, the time limit features of Celery are silently \ndisabled and won't work. Timeout logic must be implemented within the task itself.\n\n### Code Quality\n\n```bash\n# Install and run pre-commit hooks\npre-commit install\npre-commit run --all-files\n```\n\nNOTE: Always make sure everything is strictly typed (both in Python and Typescript).\n\n## Architecture Overview\n\n### Technology Stack\n\n- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery\n- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS\n- **Database**: PostgreSQL with Redis caching\n- **Search**: Vespa vector database\n- **Auth**: OAuth2, SAML, multi-provider support\n- **AI/ML**: LangChain, LiteLLM, multiple embedding models\n\n### Directory Structure\n\n```\nbackend/\n├── onyx/\n│   ├── auth/                    # Authentication & authorization\n│   ├── chat/                    # Chat functionality & LLM interactions\n│   ├── connectors/              # Data source connectors\n│   ├── db/                      # Database models & operations\n│   ├── document_index/          # Vespa integration\n│   ├── federated_connectors/    # External search connectors\n│   ├── llm/                     # LLM provider integrations\n│   └── server/                  # API endpoints & routers\n├── ee/                          # Enterprise Edition features\n├── alembic/                     # Database migrations\n└── tests/                       # Test suites\n\nweb/\n├── src/app/                     # Next.js app router pages\n├── src/components/              # Reusable React components\n└── src/lib/                     # Utilities & business logic\n```\n\n## Frontend Standards\n\nFrontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.\n\n## Database & Migrations\n\n### Running Migrations\n\n```bash\n# Standard migrations\nalembic upgrade head\n\n# Multi-tenant (Enterprise)\nalembic -n schema_private upgrade head\n```\n\n### Creating Migrations\n\n```bash\n# Create migration\nalembic revision -m \"description\"\n\n# Multi-tenant migration\nalembic -n schema_private revision -m \"description\"\n```\n\nWrite the migration manually and place it in the file that alembic creates when running the above command.\n\n## Testing Strategy\n\nFirst, you must activate the virtual environment with `source .venv/bin/activate`.\n\nThere are 4 main types of tests within Onyx:\n\n### Unit Tests\n\nThese should not assume any Onyx/external services are available to be called.\nInteractions with the outside world should be mocked using `unittest.mock`. Generally, only\nwrite these for complex, isolated modules e.g. `citation_processing.py`.\n\nTo run them:\n\n```bash\npytest -xv backend/tests/unit\n```\n\n### External Dependency Unit Tests\n\nThese tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,\nMinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).\n\nHowever, the actual Onyx containers are not running and with these tests we call the function to test directly.\nWe can also mock components/calls at will.\n\nThe goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,\nneed strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called\nwith certain args, something that would be impossible with proper integration tests).\n\nA great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.\n\nTo run them:\n\n```bash\npython -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit\n```\n\n### Integration Tests\n\nStandard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot\nmock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal\nverification is necessary) over any other type of test.\n\nTests are parallelized at a directory level.\n\nWhen writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager\nclass in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than\ncalling the utilities directly (e.g. do NOT create admin users with\n`admin_user = UserManager.create(name=\"admin_user\")`, instead use the `admin_user` fixture).\n\nA great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.\n\nTo run them:\n\n```bash\npython -m dotenv -f .vscode/.env run -- pytest backend/tests/integration\n```\n\n### Playwright (E2E) Tests\n\nThese tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx\nrunning, _including_ the Web Server.\n\nUse these tests for anything that requires significant frontend <-> backend coordination.\n\nTests are located at `web/tests/e2e`. Tests are written in TypeScript.\n\nTo run them:\n\n```bash\nnpx playwright test <TEST_NAME>\n```\n\nFor shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.\n\n## Logs\n\nWhen (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access\nto logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)\nwill be tailing their logs to this file.\n\n## Security Considerations\n\n- Never commit API keys or secrets to repository\n- Use encrypted credential storage for connector credentials\n- Follow RBAC patterns for new features\n- Implement proper input validation with Pydantic models\n- Use parameterized queries to prevent SQL injection\n\n## AI/LLM Integration\n\n- Multiple LLM providers supported via LiteLLM\n- Configurable models per feature (chat, search, embeddings)\n- Streaming support for real-time responses\n- Token management and rate limiting\n- Custom prompts and agent actions\n\n## Creating a Plan\n\nWhen creating a plan in the `plans` directory, make sure to include at least these elements:\n\n**Issues to Address**\nWhat the change is meant to do.\n\n**Important Notes**\nThings you come across in your research that are important to the implementation.\n\n**Implementation strategy**\nHow you are going to make the changes happen. High level approach.\n\n**Tests**\nWhat unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to\nverify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.\n\nDo NOT include these: _Timeline_, _Rollback plan_\n\nThis is a minimal list - feel free to include more. Do NOT write code as part of your plan.\nKeep it high level. You can reference certain files or functions though.\n\nBefore writing your plan, make sure to do research. Explore the relevant sections in the codebase.\n\n## Error Handling\n\n**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.\nNever hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**\n\nA global FastAPI exception handler converts `OnyxError` into a JSON response with the standard\n`{\"error_code\": \"...\", \"detail\": \"...\"}` shape. This eliminates boilerplate and keeps error\nhandling consistent across the entire backend.\n\n```python\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n# ✅ Good\nraise OnyxError(OnyxErrorCode.NOT_FOUND, \"Session not found\")\n\n# ✅ Good — no extra message needed\nraise OnyxError(OnyxErrorCode.UNAUTHENTICATED)\n\n# ✅ Good — upstream service with dynamic status code\nraise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)\n\n# ❌ Bad — using HTTPException directly\nraise HTTPException(status_code=404, detail=\"Session not found\")\n\n# ❌ Bad — starlette constant\nraise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=\"Access denied\")\n```\n\nAvailable error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error\ncategory is needed, add it there first — do not invent ad-hoc codes.\n\n**Upstream service errors:** When forwarding errors from an upstream service where the HTTP\nstatus code is dynamic (comes from the upstream response), use `status_code_override`:\n\n```python\nraise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)\n```\n\n## Best Practices\n\nIn addition to the other content in this file, best practices for contributing\nto the codebase can be found in the \"Engineering Best Practices\" section of\n`CONTRIBUTING.md`. Understand its contents and follow them.\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Onyx\n\nHey there! We are so excited that you're interested in Onyx.\n\n## Table of Contents\n\n- [Contribution Opportunities](#contribution-opportunities)\n- [Contribution Process](#contribution-process)\n- [Development Setup](#development-setup)\n  - [Prerequisites](#prerequisites)\n  - [Backend: Python Requirements](#backend-python-requirements)\n  - [Frontend: Node Dependencies](#frontend-node-dependencies)\n  - [Formatting and Linting](#formatting-and-linting)\n- [Running the Application](#running-the-application)\n  - [VSCode Debugger (Recommended)](#vscode-debugger-recommended)\n  - [Manually Running for Development](#manually-running-for-development)\n  - [Running in Docker](#running-in-docker)\n- [macOS-Specific Notes](#macos-specific-notes)\n- [Engineering Best Practices](#engineering-best-practices)\n  - [Principles and Collaboration](#principles-and-collaboration)\n  - [Style and Maintainability](#style-and-maintainability)\n  - [Performance and Correctness](#performance-and-correctness)\n  - [Repository Conventions](#repository-conventions)\n- [Release Process](#release-process)\n- [Getting Help](#getting-help)\n- [Enterprise Edition Contributions](#enterprise-edition-contributions)\n\n---\n\n## Contribution Opportunities\n\nThe [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.\n\nIf you have your own feature that you would like to build, please create an issue and community members can provide feedback and upvote if they feel a common need.\n\n---\n\n## Contribution Process\n\nTo contribute, please follow the\n[\"fork and pull request\"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.\n\n### 1. Get the feature or enhancement approved\n\nCreate a GitHub issue and see if there are upvotes. If you feel the feature is sufficiently value-additive and you would like approval to contribute it to the repo, tag [Yuhong](https://github.com/yuhongsun96) to review.\n\nIf you do not get a response within a week, feel free to email yuhong@onyx.app and include the issue in the message.\n\nNot all small features and enhancements will be accepted as there is a balance between feature richness and bloat. We strive to provide the best user experience possible so we have to be intentional about what we include in the app.\n\n### 2. Get the design approved\n\nThe Onyx team will either provide a design doc and PRD for the feature or request one from you, the contributor. The scope and detail of the design will depend on the individual feature.\n\n### 3. IP attribution for EE contributions\n\nIf you are contributing features to Onyx Enterprise Edition, you are required to sign the [IP Assignment Agreement](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md).\n\n### 4. Review and testing\n\nYour features must pass all tests and all comments must be addressed prior to merging.\n\n### Implicit agreements\n\nIf we approve an issue, we are promising you the following:\n- Your work will receive timely attention and we will put aside other important items to ensure you are not blocked.\n- You will receive necessary coaching on eng quality, system design, etc. to ensure the feature is completed well.\n- The Onyx team will pull resources and bandwidth from design, PM, and engineering to ensure that you have all the resources to build the feature to the quality required for merging.\n\nBecause this is a large investment from our team, we ask that you:\n- Thoroughly read all the requirements of the design docs, engineering best practices, and try to minimize overhead for the Onyx team.\n- Complete the feature in a timely manner to reduce context switching and an ongoing resource pull from the Onyx team.\n\n---\n\n## Development Setup\n\nOnyx being a fully functional app, relies on some external software, specifically:\n\n- [Postgres](https://www.postgresql.org/) (Relational DB)\n- [OpenSearch](https://opensearch.org/) (Vector DB/Search Engine)\n- [Redis](https://redis.io/) (Cache)\n- [MinIO](https://min.io/) (File Store)\n- [Nginx](https://nginx.org/) (Not needed for development flows generally)\n\n> **Note:**\n> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software.\n> We believe this combination is easier for development purposes. If you prefer to use pre-built container images, see [Running in Docker](#running-in-docker) below.\n\n### Prerequisites\n\n- **Python 3.11** — If using a lower version, modifications will have to be made to the code. Higher versions may have library compatibility issues.\n- **Docker** — Required for running external services (Postgres, OpenSearch, Redis, MinIO).\n- **Node.js v22** — We recommend using [nvm](https://github.com/nvm-sh/nvm) to manage Node installations.\n\n### Backend: Python Requirements\n\nWe use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).\n\n```bash\nuv venv .venv --python 3.11\nsource .venv/bin/activate\n```\n\n_For Windows, activate the virtual environment using Command Prompt:_\n\n```bash\n.venv\\Scripts\\activate\n```\n\nIf using PowerShell, the command slightly differs:\n\n```powershell\n.venv\\Scripts\\Activate.ps1\n```\n\nInstall the required Python dependencies:\n\n```bash\nuv sync --all-extras\n```\n\nInstall Playwright for Python (headless browser required by the Web Connector):\n\n```bash\nuv run playwright install\n```\n\n### Frontend: Node Dependencies\n\n```bash\nnvm install 22 && nvm use 22\nnode -v # verify your active version\n```\n\nNavigate to `onyx/web` and run:\n\n```bash\nnpm i\n```\n\n### Formatting and Linting\n\n#### Backend\n\nSet up pre-commit hooks (black / reorder-python-imports):\n\n```bash\nuv run pre-commit install\n```\n\nWe also use `mypy` for static type checking. Onyx is fully type-annotated, and we want to keep it that way! To run the mypy checks manually:\n\n```bash\nuv run mypy .  # from onyx/backend\n```\n\n#### Frontend\n\nWe use `prettier` for formatting. The desired version will be installed via `npm i` from the `onyx/web` directory. To run the formatter:\n\n```bash\nnpx prettier --write .  # from onyx/web\n```\n\nPre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail. Re-stage your changes and commit again.\n\n---\n\n## Running the Application\n\n### VSCode Debugger (Recommended)\n\nWe highly recommend using VSCode's debugger for development.\n\n#### Initial Setup\n\n1. Copy `.vscode/env_template.txt` to `.vscode/.env`\n2. Fill in the necessary environment variables in `.vscode/.env`\n\n#### Using the Debugger\n\nBefore starting, make sure the Docker Daemon is running.\n\n1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)\n2. From the dropdown at the top, select \"Clear and Restart External Volumes and Containers\" and press the green play button\n3. From the dropdown at the top, select \"Run All Onyx Services\" and press the green play button\n4. Navigate to http://localhost:3000 in your browser to start using the app\n5. Set breakpoints by clicking to the left of line numbers to help debug while the app is running\n6. Use the debug toolbar to step through code, inspect variables, etc.\n\n> **Note:** \"Clear and Restart External Volumes and Containers\" will reset your Postgres and OpenSearch (relational-db and index). Only run this if you are okay with wiping your data.\n\n**Features:**\n- Hot reload is enabled for the web server and API servers\n- Python debugging is configured with debugpy\n- Environment variables are loaded from `.vscode/.env`\n- Console output is organized in the integrated terminal with labeled tabs\n\n### Manually Running for Development\n\n#### Docker containers for external software\n\nYou will need Docker installed to run these containers.\n\nNavigate to `onyx/deployment/docker_compose`, then start up Postgres/OpenSearch/Redis/MinIO with:\n\n```bash\ndocker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio\n```\n\n(index refers to OpenSearch, relational_db refers to Postgres, and cache refers to Redis)\n\n#### Running Onyx locally\n\nTo start the frontend, navigate to `onyx/web` and run:\n\n```bash\nnpm run dev\n```\n\nNext, start the model server which runs the local NLP models. Navigate to `onyx/backend` and run:\n\n```bash\nuvicorn model_server.main:app --reload --port 9000\n```\n\n_For Windows (for compatibility with both PowerShell and Command Prompt):_\n\n```bash\npowershell -Command \"uvicorn model_server.main:app --reload --port 9000\"\n```\n\nThe first time running Onyx, you will need to run the DB migrations for Postgres. After the first time, this is no longer required unless the DB models change.\n\nNavigate to `onyx/backend` and with the venv active, run:\n\n```bash\nalembic upgrade head\n```\n\nNext, start the task queue which orchestrates the background jobs. Still in `onyx/backend`, run:\n\n```bash\npython ./scripts/dev_run_background_jobs.py\n```\n\nTo run the backend API server, navigate back to `onyx/backend` and run:\n\n```bash\nAUTH_TYPE=basic uvicorn onyx.main:app --reload --port 8080\n```\n\n_For Windows (for compatibility with both PowerShell and Command Prompt):_\n\n```bash\npowershell -Command \"\n    $env:AUTH_TYPE='basic'\n    uvicorn onyx.main:app --reload --port 8080\n\"\n```\n\n> **Note:** If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.\n\n#### Wrapping up\n\nYou should now have 4 servers running:\n\n- Web server\n- Backend API\n- Model server\n- Background jobs\n\nNow, visit http://localhost:3000 in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.\n\nYou've successfully set up a local Onyx instance!\n\n### Running in Docker\n\nYou can run the full Onyx application stack from pre-built images including all external software dependencies.\n\nNavigate to `onyx/deployment/docker_compose` and run:\n\n```bash\ndocker compose up -d\n```\n\nAfter Docker pulls and starts these containers, navigate to http://localhost:3000 to use Onyx.\n\nIf you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes:\n\n```bash\ndocker compose up -d --build\n```\n\n---\n\n## macOS-Specific Notes\n\n### Setting up Python\n\nEnsure [Homebrew](https://brew.sh/) is already set up, then install Python 3.11:\n\n```bash\nbrew install python@3.11\n```\n\nAdd Python 3.11 to your path by adding the following line to `~/.zshrc`:\n\n```\nexport PATH=\"$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH\"\n```\n\n> **Note:** You will need to open a new terminal for the path change above to take effect.\n\n### Setting up Docker\n\nOn macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and ensure it is running before continuing with the docker commands.\n\n### Formatting and Linting\n\nmacOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly. After installing pre-commit, run the following command:\n\n```bash\nsudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit\n```\n\n---\n\n## Engineering Best Practices\n\n> These are also what we adhere to as a team internally, we love to build in the open and to uplevel our community and each other through being transparent.\n\n### Principles and Collaboration\n\n- **Use 1-way vs 2-way doors.** For 2-way doors, move faster and iterate. For 1-way doors, be more deliberate.\n- **Consistency > being \"right.\"** Prefer consistent patterns across the codebase. If something is truly bad, fix it everywhere.\n- **Fix what you touch (selectively).**\n  - Don't feel obligated to fix every best-practice issue you notice.\n  - Don't introduce new bad practices.\n  - If your change touches code that violates best practices, fix it as part of the change.\n- **Don't tack features on.** When adding functionality, restructure logically as needed to avoid muddying interfaces and accumulating tech debt.\n\n### Style and Maintainability\n\n#### Comments and readability\nAdd clear comments:\n- At logical boundaries (e.g., interfaces) so the reader doesn't need to dig 10 layers deeper.\n- Wherever assumptions are made or something non-obvious/unexpected is done.\n- For complicated flows/functions.\n- Wherever it saves time (e.g., nontrivial regex patterns).\n\n#### Errors and exceptions\n- **Fail loudly** rather than silently skipping work.\n  - Example: raise and let exceptions propagate instead of silently dropping a document.\n- **Don't overuse `try/except`.**\n  - Put `try/except` at the correct logical level.\n  - Do not mask exceptions unless it is clearly appropriate.\n\n#### Typing\n- Everything should be **as strictly typed as possible**.\n- Use `cast` for annoying/loose-typed interfaces (e.g., results of `run_functions_tuples_in_parallel`).\n  - Only `cast` when the type checker sees `Any` or types are too loose.\n- Prefer types that are easy to read.\n  - Avoid dense types like `dict[tuple[str, str], list[list[float]]]`.\n  - Prefer domain models, e.g.:\n    - `EmbeddingModel(provider_name, model_name)` as a Pydantic model\n    - `dict[EmbeddingModel, list[EmbeddingVector]]`\n\n#### State, objects, and boundaries\n- Keep **clear logical boundaries** for state containers and objects.\n- A **config** object should never contain things like a `db_session`.\n- Avoid state containers that are overly nested, or huge + flat (use judgment).\n- Prefer **composition and functional style** over inheritance/OOP.\n- Prefer **no mutation** unless there's a strong reason.\n- State objects should be **intentional and explicit**, ideally nonmutating.\n- Use interfaces/objects to create clear separation of responsibility.\n- Prefer simplicity when there's no clear gain.\n  - Avoid overcomplicated mechanisms like semaphores.\n  - Prefer **hash maps (dicts)** over tree structures unless there's a strong reason.\n\n#### Naming\n- Name variables carefully and intentionally.\n- Prefer long, explicit names when undecided.\n- Avoid single-character variables except for small, self-contained utilities (or not at all).\n- Keep the same object/name consistent through the call stack and within functions when reasonable.\n  - Good: `for token in tokens:`\n  - Bad: `for msg in tokens:` (if iterating tokens)\n- Function names should bias toward **long + descriptive** for codebase search.\n  - IntelliSense can miss call sites; search works best with unique names.\n\n#### Correctness by construction\n- Prefer self-contained correctness — don't rely on callers to \"use it right\" if you can make misuse hard.\n- Avoid redundancies: if a function takes an arg, it shouldn't also take a state object that contains that same arg.\n- No dead code (unless there's a very good reason).\n- No commented-out code in main or feature branches (unless there's a very good reason).\n- No duplicate logic:\n  - Don't copy/paste into branches when shared logic can live above the conditional.\n  - If you're afraid to touch the original, you don't understand it well enough.\n  - LLMs often create subtle duplicate logic — review carefully and remove it.\n  - Avoid \"nearly identical\" objects that confuse when to use which.\n- Avoid extremely long functions with chained logic:\n  - Encapsulate steps into helpers for readability, even if not reused.\n  - \"Pythonic\" multi-step expressions are OK in moderation; don't trade clarity for cleverness.\n\n### Performance and Correctness\n\n- Avoid holding resources for extended periods (DB sessions, locks/semaphores).\n- Validate objects on creation and right before use.\n- Connector code (data to Onyx documents):\n  - Any in-memory structure that can grow without bound based on input must be periodically size-checked.\n  - If a connector is OOMing (often shows up as \"missing celery tasks\"), this is a top thing to check retroactively.\n- Async and event loops:\n  - Never introduce new async/event loop Python code, and try to make existing async code synchronous when possible if it makes sense.\n  - Writing async code without 100% understanding the code and having a concrete reason to do so is likely to introduce bugs and not add any meaningful performance gains.\n\n### Repository Conventions\n\n#### Where code lives\n- Pydantic + data models: `models.py` files.\n- DB interface functions (excluding lazy loading): `db/` directory.\n- LLM prompts: `prompts/` directory, roughly mirroring the code layout that uses them.\n- API routes: `server/` directory.\n\n#### Pydantic and modeling\n- Prefer **Pydantic** over dataclasses.\n- If absolutely required, use `allow_arbitrary_types`.\n\n#### Data conventions\n- Prefer explicit `None` over sentinel empty strings (usually; depends on intent).\n- Prefer explicit identifiers: use string enums instead of integer codes.\n- Avoid magic numbers (co-location is good when necessary). **Always avoid magic strings.**\n\n#### Logging\n- Log messages where they are created.\n- Don't propagate log messages around just to log them elsewhere.\n\n#### Encapsulation\n- Don't use private attributes/methods/properties from other classes/modules.\n- \"Private\" is private — respect that boundary.\n\n#### SQLAlchemy guidance\n- Lazy loading is often bad at scale, especially across multiple list relationships.\n- Be careful when accessing SQLAlchemy object attributes:\n  - It can help avoid redundant DB queries,\n  - but it can also fail if accessed outside an active session,\n  - and lazy loading can add hidden DB dependencies to otherwise \"simple\" functions.\n- Reference: https://www.reddit.com/r/SQLAlchemy/comments/138f248/joinedload_vs_selectinload/\n\n#### Trunk-based development and feature flags\n- **PRs should contain no more than 500 lines of real change.**\n- **Merge to main frequently.** Avoid long-lived feature branches — they create merge conflicts and integration pain.\n- **Use feature flags for incremental rollout.**\n  - Large features should be merged in small, shippable increments behind a flag.\n  - This allows continuous integration without exposing incomplete functionality.\n- **Keep flags short-lived.** Once a feature is fully rolled out, remove the flag and dead code paths promptly.\n- **Flag at the right level.** Prefer flagging at API/UI entry points rather than deep in business logic.\n- **Test both flag states.** Ensure the codebase works correctly with the flag on and off.\n\n#### Miscellaneous\n- Any TODOs you add in the code must be accompanied by either the name/username of the owner of that TODO, or an issue number for an issue referencing that piece of work.\n- Avoid module-level logic that runs on import, which leads to import-time side effects. Essentially every piece of meaningful logic should exist within some function that has to be explicitly invoked. Acceptable exceptions may include loading environment variables or setting up loggers.\n  - If you find yourself needing something like this, you may want that logic to exist in a file dedicated for manual execution (contains `if __name__ == \"__main__\":`) which should not be imported by anything else.\n- Do not conflate Python scripts you intend to run from the command line (contains `if __name__ == \"__main__\":`) with modules you intend to import from elsewhere. If for some unlikely reason they have to be the same file, any logic specific to executing the file (including imports) should be contained in the `if __name__ == \"__main__\":` block.\n  - Generally these executable files exist in `backend/scripts/`.\n\n---\n\n## Release Process\n\nOnyx loosely follows the SemVer versioning standard.\nA set of Docker containers will be pushed automatically to DockerHub with every tag.\nYou can see the containers [here](https://hub.docker.com/search?q=onyx%2F).\n\n---\n\n## Getting Help\n\nWe have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).\n\nSee you there!\n\n---\n\n## Enterprise Edition Contributions\n\nIf you are contributing features to Onyx Enterprise Edition (code under any `ee/` directory), you are required to sign the [IP Assignment Agreement](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md) ([PDF version](contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.pdf)).\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright (c) 2023-present DanswerAI, Inc.\n\nPortions of this software are licensed as follows:\n\n- All content that resides under \"ee\" directories of this repository is licensed under the Onyx Enterprise License. Each ee directory contains an identical copy of this license at its root:\n  - backend/ee/LICENSE\n  - web/src/app/ee/LICENSE\n  - web/src/ee/LICENSE\n- All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.\n- Content outside of the above mentioned directories or restrictions above is available under the \"MIT Expat\" license as defined below.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "<a name=\"readme-top\"></a>\n\n<h2 align=\"center\">\n    <a href=\"https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme\"> <img width=\"50%\" src=\"https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true\" /></a>\n</h2>\n\n<p align=\"center\">\n    <a href=\"https://discord.gg/TDJ59cGV2X\" target=\"_blank\">\n        <img src=\"https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white\" alt=\"Discord\" />\n    </a>\n    <a href=\"https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme\" target=\"_blank\">\n        <img src=\"https://img.shields.io/badge/docs-view-blue\" alt=\"Documentation\" />\n    </a>\n    <a href=\"https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme\" target=\"_blank\">\n        <img src=\"https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue\" alt=\"Documentation\" />\n    </a>\n    <a href=\"https://github.com/onyx-dot-app/onyx/blob/main/LICENSE\" target=\"_blank\">\n        <img src=\"https://img.shields.io/static/v1?label=license&message=MIT&color=blue\" alt=\"License\" />\n    </a>\n</p>\n\n<p align=\"center\">\n  <a href=\"https://trendshift.io/repositories/12516\" target=\"_blank\">\n    <img src=\"https://trendshift.io/api/badge/repositories/12516\" alt=\"onyx-dot-app/onyx | Trendshift\" style=\"width: 250px; height: 55px;\" />\n  </a>\n</p>\n\n# Onyx - The Open Source AI Platform\n\n**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is the application layer for LLMs - bringing a feature-rich interface that can be easily hosted by anyone.\nOnyx enables LLMs through advanced capabilities like RAG, web search, code execution, file creation, deep research and more.\n\nConnect your applications with over 50+ indexing based connectors provided out of the box or via MCP.\n\n> [!TIP]\n> Deploy with a single command:\n> ```\n> curl -fsSL https://onyx.app/install_onyx.sh | bash\n> ```\n\n![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v3.0.0/Onyx.gif)\n\n---\n\n## ⭐ Features\n\n- **🔍 Agentic RAG:** Get best in class search and answer quality based on hybrid index + AI Agents for information retrieval\n  - Benchmark to release soon!\n- **🔬 Deep Research:** Get in depth reports with a multi-step research flow.\n  - Top of [leaderboard](https://github.com/onyx-dot-app/onyx_deep_research_bench) as of Feb 2026.\n- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge, and actions.\n- **🌍 Web Search:** Browse the web to get up to date information.\n  - Supports Serper, Google PSE, Brave, SearXNG, and others.\n  - Comes with an in house web crawler and support for Firecrawl/Exa.\n- **📄 Artifacts:** Generate documents, graphics, and other downloadable artifacts.\n- **▶️ Actions & MCP:** Let Onyx agents interact with external applications, comes with flexible Auth options.\n- **💻 Code Execution:** Execute code in a sandbox to analyze data, render graphs, or modify files.\n- **🎙️ Voice Mode:** Chat with Onyx via text-to-speech and speech-to-text.\n- **🎨 Image Generation:** Generate images based on user prompts.\n\nOnyx supports all major LLM providers, both self-hosted (like Ollama, LiteLLM, vLLM, etc.) and proprietary (like Anthropic, OpenAI, Gemini, etc.).\n\nTo learn more - check out our [docs](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!\n\n---\n\n## 🚀 Deployment Modes\n\n> Onyx supports deployments in Docker, Kubernetes, Helm/Terraform and provides guides for major cloud providers.\n> Detailed deployment guides found [here](https://docs.onyx.app/deployment/overview).\n\nOnyx supports two separate deployment options: standard and lite.\n\n#### Onyx Lite\n\nThe Lite mode can be thought of as a lightweight Chat UI. It requires less resources (under 1GB memory) and runs a less complex stack.\nIt is great for users who want to test out Onyx quickly or for teams who are only interested in the Chat UI and Agents functionalities.\n\n#### Standard Onyx\n\nThe complete feature set of Onyx which is recommended for serious users and larger teams. Additional components not included in Lite mode:\n- Vector + Keyword index for RAG.\n- Background containers to run job queues and workers for syncing knowledge from connectors.\n- AI model inference servers to run deep learning models used during indexing and inference.\n- Performance optimizations for large scale use via in memory cache (Redis) and blob store (MinIO).\n\n> [!TIP]  \n> **To try Onyx for free without deploying, visit [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.\n\n---\n\n## 🏢 Onyx for Enterprise\n\nOnyx is built for teams of all sizes, from individual users to the largest global enterprises:\n- 👥 Collaboration: Share chats and agents with other members of your organization.\n- 🔐 Single Sign On: SSO via Google OAuth, OIDC, or SAML. Group syncing and user provisioning via SCIM.\n- 🛡️ Role Based Access Control: RBAC for sensitive resources like access to agents, actions, etc.\n- 📊 Analytics: Usage graphs broken down by teams, LLMs, or agents.\n- 🕵️ Query History: Audit usage to ensure safe adoption of AI in your organization.\n- 💻 Custom code: Run custom code to remove PII, reject sensitive queries, or to run custom analysis.\n- 🎨 Whitelabeling: Customize the look and feel of Onyx with custom naming, icons, banners, and more.\n\n## 📚 Licensing\n\nThere are two editions of Onyx:\n\n- Onyx Community Edition (CE) is available freely under the MIT license and covers all of the core features for Chat, RAG, Agents, and Actions.\n- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.\n\nFor feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).\n\n## 👪 Community\n\nJoin our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!\n\n## 💡 Contributing\n\nLooking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.\n"
  },
  {
    "path": "backend/.dockerignore",
    "content": "**/__pycache__\nvenv/\nenv/\n*.egg-info\n.cache\n.git/\n.svn/\n.vscode/\n.idea/\n*.log\nlog/\n.env\nsecrets.yaml\nbuild/\ndist/\n.coverage\nhtmlcov/\nmodel_server/legacy/\n\n# Craft: demo_data directory should be unzipped at container startup, not copied\n**/demo_data/\n# Craft: templates/outputs/venv is created at container startup\n**/templates/outputs/venv\n"
  },
  {
    "path": "backend/.gitignore",
    "content": "__pycache__/\n.mypy_cache\n.idea/\nsite_crawls/\n.ipynb_checkpoints/\napi_keys.py\n*ipynb\n.env*\nvespa-app.zip\ndynamic_config_storage/\ncelerybeat-schedule*\nonyx/connectors/salesforce/data/\n.test.env\n/generated\n"
  },
  {
    "path": "backend/.trivyignore",
    "content": "# https://github.com/madler/zlib/issues/868\n# Pulled in with base Debian image, it's part of the contrib folder but unused\n# zlib1g is fine\n# Will be gone with Debian image upgrade\n# No impact in our settings\nCVE-2023-45853\n\n# krb5 related, worst case is denial of service by resource exhaustion\n# Accept the risk\nCVE-2024-26458\nCVE-2024-26461\nCVE-2024-26462\nCVE-2024-26458\nCVE-2024-26461\nCVE-2024-26462\nCVE-2024-26458\nCVE-2024-26461\nCVE-2024-26462\nCVE-2024-26458\nCVE-2024-26461\nCVE-2024-26462\n\n# Specific to Firefox which we do not use\n# No impact in our settings\nCVE-2024-0743\n\n# bind9 related, worst case is denial of service by CPU resource exhaustion\n# Accept the risk\nCVE-2023-50387\nCVE-2023-50868\nCVE-2023-50387\nCVE-2023-50868\n\n# libexpat1, XML parsing resource exhaustion\n# We don't parse any user provided XMLs\n# No impact in our settings\nCVE-2023-52425\nCVE-2024-28757\n\n# libharfbuzz0b, O(n^2) growth, worst case is denial of service\n# Accept the risk\nCVE-2023-25193\n"
  },
  {
    "path": "backend/Dockerfile",
    "content": "FROM python:3.11.7-slim-bookworm\n\nLABEL com.danswer.maintainer=\"founders@onyx.app\"\nLABEL com.danswer.description=\"This image is the web/frontend container of Onyx which \\\ncontains code for both the Community and Enterprise editions of Onyx. If you do not \\\nhave a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \\\nEdition features outside of personal development or testing purposes. Please reach out to \\\nfounders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx\"\n\n# Build argument for Craft support (disabled by default)\n# Use --build-arg ENABLE_CRAFT=true to include Node.js and opencode CLI\nARG ENABLE_CRAFT=false\n\n# DO_NOT_TRACK is used to disable telemetry for Unstructured\nENV DANSWER_RUNNING_IN_DOCKER=\"true\" \\\n    DO_NOT_TRACK=\"true\" \\\n    PLAYWRIGHT_BROWSERS_PATH=\"/app/.cache/ms-playwright\"\n\n# Create non-root user for security best practices\nRUN groupadd -g 1001 onyx && \\\n    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \\\n    mkdir -p /var/log/onyx && \\\n    chmod 755 /var/log/onyx && \\\n    chown onyx:onyx /var/log/onyx\n\nCOPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/\n\n# Install system dependencies\n# cmake needed for psycopg (postgres)\n# libpq-dev needed for psycopg (postgres)\n# curl included just for users' convenience\n# zip for Vespa step futher down\n# ca-certificates for HTTPS\nRUN apt-get update && \\\n    apt-get install -y \\\n        cmake \\\n        curl \\\n        zip \\\n        ca-certificates \\\n        libgnutls30 \\\n        libblkid1 \\\n        libmount1 \\\n        libsmartcols1 \\\n        libuuid1 \\\n        libxmlsec1-dev \\\n        pkg-config \\\n        gcc \\\n        nano \\\n        vim \\\n        # Install procps so kubernetes exec sessions can use ps aux for debugging\n        procps \\\n        libjemalloc2 \\\n        && \\\n    rm -rf /var/lib/apt/lists/* && \\\n    apt-get clean\n\n# Conditionally install Node.js 20 for Craft (required for Next.js)\n# Only installed when ENABLE_CRAFT=true\nRUN if [ \"$ENABLE_CRAFT\" = \"true\" ]; then \\\n        echo \"Installing Node.js 20 for Craft support...\" && \\\n        curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \\\n        apt-get install -y nodejs && \\\n        rm -rf /var/lib/apt/lists/*; \\\n    fi\n\n# Conditionally install opencode CLI for Craft agent functionality\n# Only installed when ENABLE_CRAFT=true\n# TODO: download a specific, versioned release of the opencode CLI\nRUN if [ \"$ENABLE_CRAFT\" = \"true\" ]; then \\\n        echo \"Installing opencode CLI for Craft support...\" && \\\n        curl -fsSL https://opencode.ai/install | bash; \\\n    fi\nENV PATH=\"/root/.opencode/bin:${PATH}\"\n\n# Install Python dependencies\n# Remove py which is pulled in by retry, py is not needed and is a CVE\nCOPY ./requirements/default.txt /tmp/requirements.txt\nCOPY ./requirements/ee.txt /tmp/ee-requirements.txt\nRUN uv pip install --system --no-cache-dir --upgrade \\\n        -r /tmp/requirements.txt \\\n        -r /tmp/ee-requirements.txt && \\\n    pip uninstall -y py && \\\n    playwright install chromium && \\\n    playwright install-deps chromium && \\\n    chown -R onyx:onyx /app && \\\n    ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \\\n    # Cleanup for CVEs and size reduction\n    # https://github.com/tornadoweb/tornado/issues/3107\n    # xserver-common and xvfb included by playwright installation but not needed after\n    # perl-base is part of the base Python Debian image but not needed for Onyx functionality\n    # perl-base could only be removed with --allow-remove-essential\n    apt-get update && \\\n    apt-get remove -y --allow-remove-essential \\\n        perl-base \\\n        xserver-common \\\n        xvfb \\\n        cmake \\\n        libldap-2.5-0 \\\n        libxmlsec1-dev \\\n        pkg-config \\\n        gcc && \\\n    # Install here to avoid some packages being cleaned up above\n    apt-get install -y \\\n        libxmlsec1-openssl \\\n        # Install postgresql-client for easy manual tests\n        postgresql-client && \\\n    apt-get autoremove -y && \\\n    rm -rf /var/lib/apt/lists/* && \\\n    rm -rf ~/.cache/uv /tmp/*.txt && \\\n    rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key\n\n# Pre-downloading models for setups with limited egress\nRUN python -c \"from tokenizers import Tokenizer; \\\nTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')\"\n\n# Pre-downloading NLTK for setups with limited egress\nRUN python -c \"import nltk; \\\n    nltk.download('stopwords', quiet=True); \\\n    nltk.download('punkt_tab', quiet=True);\"\n# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed\n\n# Pre-downloading tiktoken for setups with limited egress\nRUN python -c \"import tiktoken; \\\ntiktoken.get_encoding('cl100k_base')\"\n\n# Set up application files\nWORKDIR /app\n\n# Enterprise Version Files\nCOPY --chown=onyx:onyx ./ee /app/ee\nCOPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf\n\n# Set up application files\nCOPY --chown=onyx:onyx ./onyx /app/onyx\nCOPY --chown=onyx:onyx ./shared_configs /app/shared_configs\nCOPY --chown=onyx:onyx ./alembic /app/alembic\nCOPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants\nCOPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini\nCOPY supervisord.conf /usr/etc/supervisord.conf\nCOPY --chown=onyx:onyx ./static /app/static\nCOPY --chown=onyx:onyx ./keys /app/keys\n\n# Escape hatch scripts\nCOPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging\nCOPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py\nCOPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh\nCOPY --chown=onyx:onyx ./scripts/setup_craft_templates.sh /app/scripts/setup_craft_templates.sh\nCOPY --chown=onyx:onyx ./scripts/reencrypt_secrets.py /app/scripts/reencrypt_secrets.py\nRUN chmod +x /app/scripts/supervisord_entrypoint.sh /app/scripts/setup_craft_templates.sh\n\n# Run Craft template setup at build time when ENABLE_CRAFT=true\n# This pre-bakes demo data, Python venv, and npm dependencies into the image\nRUN if [ \"$ENABLE_CRAFT\" = \"true\" ]; then \\\n        echo \"Running Craft template setup at build time...\" && \\\n        ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \\\n    fi\n\n# Set Craft template paths to the in-image locations\n# These match the paths where setup_craft_templates.sh creates the templates\nENV OUTPUTS_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs\nENV VENV_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv\n\n# Put logo in assets\nCOPY --chown=onyx:onyx ./assets /app/assets\n\nENV PYTHONPATH=/app\n\n# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.\nARG ONYX_VERSION=0.0.0-dev\nENV ONYX_VERSION=${ONYX_VERSION}\n\n# Use jemalloc instead of glibc malloc to reduce memory fragmentation\n# in long-running Python processes (API server, Celery workers).\n# The soname is architecture-independent; the dynamic linker resolves\n# the correct path from standard library directories.\n# Placed after all RUN steps so build-time processes are unaffected.\nENV LD_PRELOAD=libjemalloc.so.2\n\n# Default command which does nothing\n# This container is used by api server and background which specify their own CMD\nCMD [\"tail\", \"-f\", \"/dev/null\"]\n"
  },
  {
    "path": "backend/Dockerfile.model_server",
    "content": "# Base stage with dependencies\nFROM python:3.11.7-slim-bookworm AS base\n\nENV DANSWER_RUNNING_IN_DOCKER=\"true\" \\\n    HF_HOME=/app/.cache/huggingface\n\nCOPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/\n\nRUN mkdir -p /app/.cache/huggingface\n\nCOPY ./requirements/model_server.txt /tmp/requirements.txt\nRUN uv pip install --system --no-cache-dir --upgrade \\\n        -r /tmp/requirements.txt && \\\n    rm -rf ~/.cache/uv /tmp/*.txt\n\n# Stage for downloading embedding models\nFROM base AS embedding-models\nRUN python -c \"from huggingface_hub import snapshot_download; \\\nsnapshot_download('nomic-ai/nomic-embed-text-v1');\"\n\n# Initialize SentenceTransformer to cache the custom architecture\nRUN python -c \"from sentence_transformers import SentenceTransformer; \\\nSentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);\"\n\n# Final stage - combine all downloads\nFROM base AS final\n\nLABEL com.danswer.maintainer=\"founders@onyx.app\"\nLABEL com.danswer.description=\"This image is for the Onyx model server which runs all of the \\\nAI models for Onyx. This container and all the code is MIT Licensed and free for all to use. \\\nYou can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \\\nvisit https://github.com/onyx-dot-app/onyx.\"\n\n# Create non-root user for security best practices\nRUN groupadd -g 1001 onyx && \\\n    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \\\n    mkdir -p /var/log/onyx && \\\n    chmod 755 /var/log/onyx && \\\n    chown onyx:onyx /var/log/onyx\n\n# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while\n# running Onyx, move the current contents of the cache folder to a temporary location to ensure\n# it's preserved in order to combine with the user's cache contents\nCOPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface\n\nWORKDIR /app\n\n# Utils used by model server\nCOPY ./onyx/utils/logger.py /app/onyx/utils/logger.py\nCOPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py\nCOPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py\n\n# Place to fetch version information\nCOPY ./onyx/__init__.py /app/onyx/__init__.py\n\n# Shared between Onyx Backend and Model Server\nCOPY ./shared_configs /app/shared_configs\n\n# Model Server main code\nCOPY ./model_server /app/model_server\n\nENV PYTHONPATH=/app\n\n# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.\nARG ONYX_VERSION=0.0.0-dev\nENV ONYX_VERSION=${ONYX_VERSION}\n\nCMD [\"uvicorn\", \"model_server.main:app\", \"--host\", \"0.0.0.0\", \"--port\", \"9000\"]\n"
  },
  {
    "path": "backend/alembic/README.md",
    "content": "<!-- ONYX_METADATA={\"link\": \"https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md\"} -->\n\n# Alembic DB Migrations\n\nThese files are for creating/updating the tables in the Relational DB (Postgres).\nOnyx migrations use a generic single-database configuration with an async dbapi.\n\n## To generate new migrations:\n\nFrom onyx/backend, run:\n`alembic revision -m <DESCRIPTION_OF_MIGRATION>`\n\nNote: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.\n\nManually populate the upgrade and downgrade in your new migration.\n\nMore info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html\n\n## Running migrations\n\nTo run all un-applied migrations:\n`alembic upgrade head`\n\nTo undo migrations:\n`alembic downgrade -X`\nwhere X is the number of migrations you want to undo from the current state\n\n### Multi-tenant migrations\n\nFor multi-tenant deployments, you can use additional options:\n\n**Upgrade all tenants:**\n```bash\nalembic -x upgrade_all_tenants=true upgrade head\n```\n\n**Upgrade specific schemas:**\n```bash\n# Single schema\nalembic -x schemas=tenant_12345678-1234-1234-1234-123456789012 upgrade head\n\n# Multiple schemas (comma-separated)\nalembic -x schemas=tenant_12345678-1234-1234-1234-123456789012,public,another_tenant upgrade head\n```\n\n**Upgrade tenants within an alphabetical range:**\n```bash\n# Upgrade tenants 100-200 when sorted alphabetically (positions 100 to 200)\nalembic -x upgrade_all_tenants=true -x tenant_range_start=100 -x tenant_range_end=200 upgrade head\n\n# Upgrade tenants starting from position 1000 alphabetically\nalembic -x upgrade_all_tenants=true -x tenant_range_start=1000 upgrade head\n\n# Upgrade first 500 tenants alphabetically\nalembic -x upgrade_all_tenants=true -x tenant_range_end=500 upgrade head\n```\n\n**Continue on error (for batch operations):**\n```bash\nalembic -x upgrade_all_tenants=true -x continue=true upgrade head\n```\n\nThe tenant range filtering works by:\n1. Sorting tenant IDs alphabetically\n2. Using 1-based position numbers (1st, 2nd, 3rd tenant, etc.)\n3. Filtering to the specified range of positions\n4. Non-tenant schemas (like 'public') are always included\n"
  },
  {
    "path": "backend/alembic/env.py",
    "content": "from typing import Any, Literal\nfrom onyx.db.engine.iam_auth import get_iam_auth_token\nfrom onyx.configs.app_configs import USE_IAM_AUTH\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.configs.app_configs import AWS_REGION_NAME\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom sqlalchemy import event\nfrom sqlalchemy import pool\nfrom sqlalchemy import text\nfrom sqlalchemy.engine.base import Connection\nimport os\nimport ssl\nimport asyncio\nimport logging\nfrom logging.config import fileConfig\n\nfrom alembic import context\nfrom sqlalchemy.ext.asyncio import create_async_engine\nfrom sqlalchemy.sql.schema import SchemaItem\nfrom onyx.configs.constants import SSL_CERT_FILE\nfrom shared_configs.configs import (\n    MULTI_TENANT,\n    POSTGRES_DEFAULT_SCHEMA,\n    TENANT_ID_PREFIX,\n)\nfrom onyx.db.models import Base\nfrom celery.backends.database.session import ResultModelBase  # type: ignore\nfrom onyx.db.engine.sql_engine import SqlEngine\n\n# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be\n# hidden! (defaults to level=WARN)\n\n# Alembic Config object\nconfig = context.config\n\nif config.config_file_name is not None and config.attributes.get(\n    \"configure_logger\", True\n):\n    # disable_existing_loggers=False prevents breaking pytest's caplog fixture\n    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues\n    fileConfig(config.config_file_name, disable_existing_loggers=False)\n\ntarget_metadata = [Base.metadata, ResultModelBase.metadata]\n\nEXCLUDE_TABLES = {\"kombu_queue\", \"kombu_message\"}\n\nlogger = logging.getLogger(__name__)\n\nssl_context: ssl.SSLContext | None = None\nif USE_IAM_AUTH:\n    if not os.path.exists(SSL_CERT_FILE):\n        raise FileNotFoundError(f\"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.\")\n    ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)\n\n\ndef include_object(\n    object: SchemaItem,  # noqa: ARG001\n    name: str | None,\n    type_: Literal[\n        \"schema\",\n        \"table\",\n        \"column\",\n        \"index\",\n        \"unique_constraint\",\n        \"foreign_key_constraint\",\n    ],\n    reflected: bool,  # noqa: ARG001\n    compare_to: SchemaItem | None,  # noqa: ARG001\n) -> bool:\n    if type_ == \"table\" and name in EXCLUDE_TABLES:\n        return False\n    return True\n\n\ndef filter_tenants_by_range(\n    tenant_ids: list[str], start_range: int | None = None, end_range: int | None = None\n) -> list[str]:\n    \"\"\"\n    Filter tenant IDs by alphabetical position range.\n\n    Args:\n        tenant_ids: List of tenant IDs to filter\n        start_range: Starting position in alphabetically sorted list (1-based, inclusive)\n        end_range: Ending position in alphabetically sorted list (1-based, inclusive)\n\n    Returns:\n        Filtered list of tenant IDs in their original order\n    \"\"\"\n    if start_range is None and end_range is None:\n        return tenant_ids\n\n    # Separate tenant IDs from non-tenant schemas\n    tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]\n    non_tenant_schemas = [\n        tid for tid in tenant_ids if not tid.startswith(TENANT_ID_PREFIX)\n    ]\n\n    # Sort tenant schemas alphabetically.\n    # NOTE: can cause missed schemas if a schema is created in between workers\n    # fetching of all tenant IDs. We accept this risk for now. Just re-running\n    # the migration will fix the issue.\n    sorted_tenant_schemas = sorted(tenant_schemas)\n\n    # Apply range filtering (0-based indexing)\n    start_idx = start_range if start_range is not None else 0\n    end_idx = end_range if end_range is not None else len(sorted_tenant_schemas)\n\n    # Ensure indices are within bounds\n    start_idx = max(0, start_idx)\n    end_idx = min(len(sorted_tenant_schemas), end_idx)\n\n    # Get the filtered tenant schemas\n    filtered_tenant_schemas = sorted_tenant_schemas[start_idx:end_idx]\n\n    # Combine with non-tenant schemas and preserve original order\n    filtered_tenants = []\n    for tenant_id in tenant_ids:\n        if tenant_id in filtered_tenant_schemas or tenant_id in non_tenant_schemas:\n            filtered_tenants.append(tenant_id)\n\n    return filtered_tenants\n\n\ndef get_schema_options() -> (\n    tuple[bool, bool, bool, int | None, int | None, list[str] | None]\n):\n    x_args_raw = context.get_x_argument()\n    x_args = {}\n    for arg in x_args_raw:\n        if \"=\" in arg:\n            key, value = arg.split(\"=\", 1)\n            x_args[key.strip()] = value.strip()\n        else:\n            raise ValueError(f\"Invalid argument: {arg}\")\n\n    create_schema = x_args.get(\"create_schema\", \"true\").lower() == \"true\"\n    upgrade_all_tenants = x_args.get(\"upgrade_all_tenants\", \"false\").lower() == \"true\"\n\n    # continue on error with individual tenant\n    # only applies to online migrations\n    continue_on_error = x_args.get(\"continue\", \"false\").lower() == \"true\"\n\n    # Tenant range filtering\n    tenant_range_start = None\n    tenant_range_end = None\n\n    if \"tenant_range_start\" in x_args:\n        try:\n            tenant_range_start = int(x_args[\"tenant_range_start\"])\n        except ValueError:\n            raise ValueError(\n                f\"Invalid tenant_range_start value: {x_args['tenant_range_start']}. Must be an integer.\"\n            )\n\n    if \"tenant_range_end\" in x_args:\n        try:\n            tenant_range_end = int(x_args[\"tenant_range_end\"])\n        except ValueError:\n            raise ValueError(\n                f\"Invalid tenant_range_end value: {x_args['tenant_range_end']}. Must be an integer.\"\n            )\n\n    # Validate range\n    if tenant_range_start is not None and tenant_range_end is not None:\n        if tenant_range_start > tenant_range_end:\n            raise ValueError(\n                f\"tenant_range_start ({tenant_range_start}) cannot be greater than tenant_range_end ({tenant_range_end})\"\n            )\n\n    # Specific schema names filtering (replaces both schema_name and the old tenant_ids approach)\n    schemas = None\n    if \"schemas\" in x_args:\n        schema_names_str = x_args[\"schemas\"].strip()\n        if schema_names_str:\n            # Split by comma and strip whitespace\n            schemas = [\n                name.strip() for name in schema_names_str.split(\",\") if name.strip()\n            ]\n            if schemas:\n                logger.info(f\"Specific schema names specified: {schemas}\")\n\n    # Validate that only one method is used at a time\n    range_filtering = tenant_range_start is not None or tenant_range_end is not None\n    specific_filtering = schemas is not None and len(schemas) > 0\n\n    if range_filtering and specific_filtering:\n        raise ValueError(\n            \"Cannot use both tenant range filtering (tenant_range_start/tenant_range_end) \"\n            \"and specific schema filtering (schemas) at the same time. \"\n            \"Please use only one filtering method.\"\n        )\n\n    if upgrade_all_tenants and specific_filtering:\n        raise ValueError(\n            \"Cannot use both upgrade_all_tenants=true and schemas at the same time. \"\n            \"Use either upgrade_all_tenants=true for all tenants, or schemas for specific schemas.\"\n        )\n\n    # If any filtering parameters are specified, we're not doing the default single schema migration\n    if range_filtering:\n        upgrade_all_tenants = True\n\n    # Validate multi-tenant requirements\n    if MULTI_TENANT and not upgrade_all_tenants and not specific_filtering:\n        raise ValueError(\n            \"In multi-tenant mode, you must specify either upgrade_all_tenants=true \"\n            \"or provide schemas. Cannot run default migration.\"\n        )\n\n    return (\n        create_schema,\n        upgrade_all_tenants,\n        continue_on_error,\n        tenant_range_start,\n        tenant_range_end,\n        schemas,\n    )\n\n\ndef do_run_migrations(\n    connection: Connection, schema_name: str, create_schema: bool\n) -> None:\n    if create_schema:\n        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS \"{schema_name}\"'))\n\n    connection.execute(text(f'SET search_path TO \"{schema_name}\"'))\n\n    context.configure(\n        connection=connection,\n        target_metadata=target_metadata,  # type: ignore\n        include_object=include_object,\n        version_table_schema=schema_name,\n        include_schemas=True,\n        compare_type=True,\n        compare_server_default=True,\n        script_location=config.get_main_option(\"script_location\"),\n    )\n\n    with context.begin_transaction():\n        context.run_migrations()\n\n\ndef provide_iam_token_for_alembic(\n    dialect: Any,  # noqa: ARG001\n    conn_rec: Any,  # noqa: ARG001\n    cargs: Any,  # noqa: ARG001\n    cparams: Any,\n) -> None:\n    if USE_IAM_AUTH:\n        # Database connection settings\n        region = AWS_REGION_NAME\n        host = POSTGRES_HOST\n        port = POSTGRES_PORT\n        user = POSTGRES_USER\n\n        # Get IAM authentication token\n        token = get_iam_auth_token(host, port, user, region)\n\n        # For Alembic / SQLAlchemy in this context, set SSL and password\n        cparams[\"password\"] = token\n        cparams[\"ssl\"] = ssl_context\n\n\nasync def run_async_migrations() -> None:\n    (\n        create_schema,\n        upgrade_all_tenants,\n        continue_on_error,\n        tenant_range_start,\n        tenant_range_end,\n        schemas,\n    ) = get_schema_options()\n\n    if not schemas and not MULTI_TENANT:\n        schemas = [POSTGRES_DEFAULT_SCHEMA]\n\n    # without init_engine, subsequent engine calls fail hard intentionally\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    engine = create_async_engine(\n        build_connection_string(),\n        poolclass=pool.NullPool,\n    )\n\n    if USE_IAM_AUTH:\n\n        @event.listens_for(engine.sync_engine, \"do_connect\")\n        def event_provide_iam_token_for_alembic(\n            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any\n        ) -> None:\n            provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)\n\n    if schemas:\n        # Use specific schema names directly without fetching all tenants\n        logger.info(f\"Migrating specific schema names: {schemas}\")\n\n        i_schema = 0\n        num_schemas = len(schemas)\n        for schema in schemas:\n            i_schema += 1\n            logger.info(\n                f\"Migrating schema: index={i_schema} num_schemas={num_schemas} schema={schema}\"\n            )\n            try:\n                async with engine.connect() as connection:\n                    await connection.run_sync(\n                        do_run_migrations,\n                        schema_name=schema,\n                        create_schema=create_schema,\n                    )\n                    await connection.commit()\n            except Exception as e:\n                logger.error(f\"Error migrating schema {schema}: {e}\")\n                if not continue_on_error:\n                    logger.error(\"--continue=true is not set, raising exception!\")\n                    raise\n\n                logger.warning(\"--continue=true is set, continuing to next schema.\")\n\n    elif upgrade_all_tenants:\n        tenant_schemas = get_all_tenant_ids()\n\n        filtered_tenant_schemas = filter_tenants_by_range(\n            tenant_schemas, tenant_range_start, tenant_range_end\n        )\n\n        if tenant_range_start is not None or tenant_range_end is not None:\n            logger.info(\n                f\"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}\"\n            )\n            logger.info(\n                f\"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}\"\n            )\n\n        i_tenant = 0\n        num_tenants = len(filtered_tenant_schemas)\n        for schema in filtered_tenant_schemas:\n            i_tenant += 1\n            logger.info(\n                f\"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}\"\n            )\n            try:\n                async with engine.connect() as connection:\n                    await connection.run_sync(\n                        do_run_migrations,\n                        schema_name=schema,\n                        create_schema=create_schema,\n                    )\n                    await connection.commit()\n            except Exception as e:\n                logger.error(f\"Error migrating schema {schema}: {e}\")\n                if not continue_on_error:\n                    logger.error(\"--continue=true is not set, raising exception!\")\n                    raise\n\n                logger.warning(\"--continue=true is set, continuing to next schema.\")\n\n    else:\n        # This should not happen in the new design since we require either\n        # upgrade_all_tenants=true or schemas in multi-tenant mode\n        # and for non-multi-tenant mode, we should use schemas with the default schema\n        raise ValueError(\n            \"No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas.\"\n        )\n\n    await engine.dispose()\n\n\ndef run_migrations_offline() -> None:\n    \"\"\"\n    NOTE(rkuo): This generates a sql script that can be used to migrate the database ...\n    instead of migrating the db live via an open connection\n\n    Not clear on when this would be used by us or if it even works.\n\n    If it is offline, then why are there calls to the db engine?\n\n    This doesn't really get used when we migrate in the cloud.\"\"\"\n\n    logger.info(\"run_migrations_offline starting.\")\n\n    # without init_engine, subsequent engine calls fail hard intentionally\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    (\n        create_schema,\n        upgrade_all_tenants,\n        continue_on_error,\n        tenant_range_start,\n        tenant_range_end,\n        schemas,\n    ) = get_schema_options()\n    url = build_connection_string()\n\n    if schemas:\n        # Use specific schema names directly without fetching all tenants\n        logger.info(f\"Migrating specific schema names: {schemas}\")\n\n        for schema in schemas:\n            logger.info(f\"Migrating schema: {schema}\")\n            context.configure(\n                url=url,\n                target_metadata=target_metadata,  # type: ignore\n                literal_binds=True,\n                include_object=include_object,\n                version_table_schema=schema,\n                include_schemas=True,\n                script_location=config.get_main_option(\"script_location\"),\n                dialect_opts={\"paramstyle\": \"named\"},\n            )\n\n            with context.begin_transaction():\n                context.run_migrations()\n\n    elif upgrade_all_tenants:\n        engine = create_async_engine(url)\n\n        if USE_IAM_AUTH:\n\n            @event.listens_for(engine.sync_engine, \"do_connect\")\n            def event_provide_iam_token_for_alembic_offline(\n                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any\n            ) -> None:\n                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)\n\n        tenant_schemas = get_all_tenant_ids()\n        engine.sync_engine.dispose()\n\n        filtered_tenant_schemas = filter_tenants_by_range(\n            tenant_schemas, tenant_range_start, tenant_range_end\n        )\n\n        if tenant_range_start is not None or tenant_range_end is not None:\n            logger.info(\n                f\"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}\"\n            )\n            logger.info(\n                f\"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}\"\n            )\n\n        for schema in filtered_tenant_schemas:\n            logger.info(f\"Migrating schema: {schema}\")\n            context.configure(\n                url=url,\n                target_metadata=target_metadata,  # type: ignore\n                literal_binds=True,\n                include_object=include_object,\n                version_table_schema=schema,\n                include_schemas=True,\n                script_location=config.get_main_option(\"script_location\"),\n                dialect_opts={\"paramstyle\": \"named\"},\n            )\n\n            with context.begin_transaction():\n                context.run_migrations()\n    else:\n        # This should not happen in the new design\n        raise ValueError(\n            \"No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas.\"\n        )\n\n\ndef run_migrations_online() -> None:\n    \"\"\"Run migrations in 'online' mode.\n\n    Supports pytest-alembic by checking for a pre-configured connection\n    in context.config.attributes[\"connection\"]. If present, uses that\n    connection/engine directly instead of creating a new async engine.\n    \"\"\"\n    # Check if pytest-alembic is providing a connection/engine\n    connectable = context.config.attributes.get(\"connection\", None)\n\n    if connectable is not None:\n        # pytest-alembic is providing an engine - use it directly\n        logger.debug(\"run_migrations_online starting (pytest-alembic mode).\")\n\n        # For pytest-alembic, we use the default schema (public)\n        schema_name = context.config.attributes.get(\n            \"schema_name\", POSTGRES_DEFAULT_SCHEMA\n        )\n\n        # pytest-alembic passes an Engine, we need to get a connection from it\n        with connectable.connect() as connection:\n            # Set search path for the schema\n            connection.execute(text(f'SET search_path TO \"{schema_name}\"'))\n\n            context.configure(\n                connection=connection,\n                target_metadata=target_metadata,  # type: ignore\n                include_object=include_object,\n                version_table_schema=schema_name,\n                include_schemas=True,\n                compare_type=True,\n                compare_server_default=True,\n                script_location=config.get_main_option(\"script_location\"),\n            )\n\n            with context.begin_transaction():\n                context.run_migrations()\n\n            # Commit the transaction to ensure changes are visible to next migration\n            connection.commit()\n    else:\n        # Normal operation - use async migrations\n        logger.info(\"run_migrations_online starting.\")\n        asyncio.run(run_async_migrations())\n\n\nif context.is_offline_mode():\n    run_migrations_offline()\nelse:\n    run_migrations_online()\n"
  },
  {
    "path": "backend/alembic/run_multitenant_migrations.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Parallel Alembic Migration Runner\n\nUpgrades tenant schemas to head in batched, parallel alembic subprocesses.\nEach subprocess handles a batch of schemas (via ``-x schemas=a,b,c``),\nreducing per-process overhead compared to one-schema-per-process.\n\nUsage examples::\n\n    # defaults: 6 workers, 50 schemas/batch\n    python alembic/run_multitenant_migrations.py\n\n    # custom settings\n    python alembic/run_multitenant_migrations.py -j 8 -b 100\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport subprocess\nimport sys\nimport threading\nimport time\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom typing import NamedTuple\n\nfrom alembic.config import Config\nfrom alembic.script import ScriptDirectory\n\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.db.engine.tenant_utils import get_schemas_needing_migration\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\n\n# ---------------------------------------------------------------------------\n# Data types\n# ---------------------------------------------------------------------------\n\n\nclass Args(NamedTuple):\n    jobs: int\n    batch_size: int\n\n\nclass BatchResult(NamedTuple):\n    schemas: list[str]\n    success: bool\n    output: str\n    elapsed_sec: float\n\n\n# ---------------------------------------------------------------------------\n# Core functions\n# ---------------------------------------------------------------------------\n\n\ndef run_alembic_for_batch(schemas: list[str]) -> BatchResult:\n    \"\"\"Run ``alembic upgrade head`` for a batch of schemas in one subprocess.\n\n    If the batch fails, it is automatically retried with ``-x continue=true``\n    so that the remaining schemas in the batch still get migrated.  The retry\n    output (which contains alembic's per-schema error messages) is returned\n    for diagnosis.\n    \"\"\"\n    csv = \",\".join(schemas)\n    base_cmd = [\"alembic\", \"-x\", f\"schemas={csv}\"]\n\n    start = time.monotonic()\n    result = subprocess.run(\n        [*base_cmd, \"upgrade\", \"head\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.STDOUT,\n        text=True,\n    )\n\n    if result.returncode == 0:\n        elapsed = time.monotonic() - start\n        return BatchResult(schemas, True, result.stdout or \"\", elapsed)\n\n    # At least one schema failed.  Print the initial error output, then\n    # re-run with continue=true so the remaining schemas still get migrated.\n    if result.stdout:\n        print(f\"Initial error output:\\n{result.stdout}\", file=sys.stderr, flush=True)\n    print(\n        f\"Batch failed (exit {result.returncode}), retrying with 'continue=true'...\",\n        file=sys.stderr,\n        flush=True,\n    )\n\n    retry = subprocess.run(\n        [*base_cmd, \"-x\", \"continue=true\", \"upgrade\", \"head\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.STDOUT,\n        text=True,\n    )\n    elapsed = time.monotonic() - start\n    return BatchResult(schemas, False, retry.stdout or \"\", elapsed)\n\n\ndef get_head_revision() -> str | None:\n    \"\"\"Get the head revision from the alembic script directory.\"\"\"\n    alembic_cfg = Config(\"alembic.ini\")\n    script = ScriptDirectory.from_config(alembic_cfg)\n    return script.get_current_head()\n\n\ndef run_migrations_parallel(\n    schemas: list[str],\n    max_workers: int,\n    batch_size: int,\n) -> bool:\n    \"\"\"Chunk *schemas* into batches and run them in parallel.\n\n    A background monitor thread prints a status line every 60 s listing\n    which batches are still in-flight, making it easy to spot hung tenants.\n    \"\"\"\n    batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]\n    total_batches = len(batches)\n    print(\n        f\"{len(schemas)} schemas in {total_batches} batch(es) with {max_workers} workers (batch size: {batch_size})...\",\n        flush=True,\n    )\n    all_success = True\n\n    # Thread-safe tracking of in-flight batches for the monitor thread.\n    in_flight: dict[int, list[str]] = {}\n    prev_in_flight: set[int] = set()\n    lock = threading.Lock()\n    stop_event = threading.Event()\n\n    def _monitor() -> None:\n        \"\"\"Print a status line every 60 s listing batches still in-flight.\n\n        Only prints batches that were also present in the previous tick,\n        making it easy to spot batches that are stuck.\n        \"\"\"\n        nonlocal prev_in_flight\n        while not stop_event.wait(60):\n            with lock:\n                if not in_flight:\n                    prev_in_flight = set()\n                    continue\n                current = set(in_flight)\n                stuck = current & prev_in_flight\n                prev_in_flight = current\n\n                if not stuck:\n                    continue\n\n                schemas = [s for idx in sorted(stuck) for s in in_flight[idx]]\n                print(\n                    f\"⏳ batch(es) still running since last check \"\n                    f\"({', '.join(str(i + 1) for i in sorted(stuck))}): \"\n                    + \", \".join(schemas),\n                    flush=True,\n                )\n\n    monitor_thread = threading.Thread(target=_monitor, daemon=True)\n    monitor_thread.start()\n\n    try:\n        with ThreadPoolExecutor(max_workers=max_workers) as executor:\n\n            def _run(batch_idx: int, batch: list[str]) -> BatchResult:\n                with lock:\n                    in_flight[batch_idx] = batch\n                print(\n                    f\"Batch {batch_idx + 1}/{total_batches} started ({len(batch)} schemas): {', '.join(batch)}\",\n                    flush=True,\n                )\n                result = run_alembic_for_batch(batch)\n                with lock:\n                    in_flight.pop(batch_idx, None)\n                return result\n\n            future_to_idx = {\n                executor.submit(_run, i, b): i for i, b in enumerate(batches)\n            }\n\n            for future in as_completed(future_to_idx):\n                batch_idx = future_to_idx[future]\n                try:\n                    result = future.result()\n                    status = \"✓\" if result.success else \"✗\"\n\n                    print(\n                        f\"Batch {batch_idx + 1}/{total_batches} \"\n                        f\"{status} {len(result.schemas)} schemas \"\n                        f\"in {result.elapsed_sec:.1f}s\",\n                        flush=True,\n                    )\n\n                    if not result.success:\n                        # Print last 20 lines of retry output for diagnosis\n                        tail = result.output.strip().splitlines()[-20:]\n                        for line in tail:\n                            print(f\"    {line}\", flush=True)\n                        all_success = False\n\n                except Exception as e:\n                    print(\n                        f\"Batch {batch_idx + 1}/{total_batches} ✗ exception: {e}\",\n                        flush=True,\n                    )\n                    all_success = False\n    finally:\n        stop_event.set()\n        monitor_thread.join(timeout=2)\n\n    return all_success\n\n\n# ---------------------------------------------------------------------------\n# CLI\n# ---------------------------------------------------------------------------\n\n\ndef parse_args() -> Args:\n    parser = argparse.ArgumentParser(\n        description=\"Run alembic migrations for all tenant schemas in parallel\"\n    )\n    parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        default=6,\n        metavar=\"N\",\n        help=\"Number of parallel alembic processes (default: 6)\",\n    )\n    parser.add_argument(\n        \"-b\",\n        \"--batch-size\",\n        type=int,\n        default=50,\n        metavar=\"N\",\n        help=\"Schemas per alembic process (default: 50)\",\n    )\n    args = parser.parse_args()\n    if args.jobs < 1:\n        parser.error(\"--jobs must be >= 1\")\n    if args.batch_size < 1:\n        parser.error(\"--batch-size must be >= 1\")\n    return Args(jobs=args.jobs, batch_size=args.batch_size)\n\n\ndef main() -> int:\n    args = parse_args()\n\n    head_rev = get_head_revision()\n    if head_rev is None:\n        print(\"Could not determine head revision.\", file=sys.stderr)\n        return 1\n\n    with SqlEngine.scoped_engine(pool_size=5, max_overflow=2):\n        tenant_ids = get_all_tenant_ids()\n        tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]\n\n        if not tenant_schemas:\n            print(\n                \"No tenant schemas found. Is MULTI_TENANT=true set?\",\n                file=sys.stderr,\n            )\n            return 1\n\n        schemas_to_migrate = get_schemas_needing_migration(tenant_schemas, head_rev)\n\n    if not schemas_to_migrate:\n        print(\n            f\"All {len(tenant_schemas)} tenants are already at head revision ({head_rev}).\"\n        )\n        return 0\n\n    print(\n        f\"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need migration (head: {head_rev}).\"\n    )\n\n    success = run_migrations_parallel(\n        schemas_to_migrate,\n        max_workers=args.jobs,\n        batch_size=args.batch_size,\n    )\n\n    print(f\"\\n{'All migrations successful' if success else 'Some migrations failed'}\")\n    return 0 if success else 1\n\n\nif __name__ == \"__main__\":\n    raise SystemExit(main())\n"
  },
  {
    "path": "backend/alembic/script.py.mako",
    "content": "\"\"\"${message}\n\nRevision ID: ${up_revision}\nRevises: ${down_revision | comma,n}\nCreate Date: ${create_date}\n\n\"\"\"\nfrom alembic import op\nimport sqlalchemy as sa\n${imports if imports else \"\"}\n\n# revision identifiers, used by Alembic.\nrevision = ${repr(up_revision)}\ndown_revision = ${repr(down_revision)}\nbranch_labels = ${repr(branch_labels)}\ndepends_on = ${repr(depends_on)}\n\n\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n\n\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n"
  },
  {
    "path": "backend/alembic/versions/01f8e6d95a33_populate_flow_mapping_data.py",
    "content": "\"\"\"Populate flow mapping data\n\nRevision ID: 01f8e6d95a33\nRevises: d5c86e2c6dc6\nCreate Date: 2026-01-31 17:37:10.485558\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"01f8e6d95a33\"\ndown_revision = \"d5c86e2c6dc6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add each model config to the conversation flow, setting the global default if it exists\n    # Exclude models that are part of ImageGenerationConfig\n    op.execute(\n        \"\"\"\n        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)\n        SELECT\n            'CHAT' AS llm_model_flow_type,\n            COALESCE(\n                (lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),\n                FALSE\n            ) AS is_default,\n            mc.id AS model_configuration_id\n        FROM model_configuration mc\n        LEFT JOIN llm_provider lp\n            ON lp.id = mc.llm_provider_id\n        WHERE NOT EXISTS (\n            SELECT 1 FROM image_generation_config igc\n            WHERE igc.model_configuration_id = mc.id\n        );\n        \"\"\"\n    )\n\n    # Add models with supports_image_input to the vision flow\n    op.execute(\n        \"\"\"\n        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)\n        SELECT\n            'VISION' AS llm_model_flow_type,\n            COALESCE(\n                (lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),\n                FALSE\n            ) AS is_default,\n            mc.id AS model_configuration_id\n        FROM model_configuration mc\n        LEFT JOIN llm_provider lp\n            ON lp.id = mc.llm_provider_id\n        WHERE mc.supports_image_input IS TRUE;\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # Populate vision defaults from model_flow\n    op.execute(\n        \"\"\"\n        UPDATE llm_provider AS lp\n        SET\n            is_default_vision_provider = TRUE,\n            default_vision_model = mc.name\n        FROM llm_model_flow mf\n        JOIN model_configuration mc ON mc.id = mf.model_configuration_id\n        WHERE mf.llm_model_flow_type = 'VISION'\n          AND mf.is_default = TRUE\n          AND mc.llm_provider_id = lp.id;\n        \"\"\"\n    )\n\n    # Populate conversation defaults from model_flow\n    op.execute(\n        \"\"\"\n        UPDATE llm_provider AS lp\n        SET\n            is_default_provider = TRUE,\n            default_model_name = mc.name\n        FROM llm_model_flow mf\n        JOIN model_configuration mc ON mc.id = mf.model_configuration_id\n        WHERE mf.llm_model_flow_type = 'CHAT'\n          AND mf.is_default = TRUE\n          AND mc.llm_provider_id = lp.id;\n        \"\"\"\n    )\n\n    # For providers that have conversation flow mappings but aren't the default,\n    # we still need a default_model_name (it was NOT NULL originally)\n    # Pick the first visible model or any model for that provider\n    op.execute(\n        \"\"\"\n        UPDATE llm_provider AS lp\n        SET default_model_name = (\n            SELECT mc.name\n            FROM model_configuration mc\n            JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id\n            WHERE mc.llm_provider_id = lp.id\n              AND mf.llm_model_flow_type = 'CHAT'\n            ORDER BY mc.is_visible DESC, mc.id ASC\n            LIMIT 1\n        )\n        WHERE lp.default_model_name IS NULL;\n        \"\"\"\n    )\n\n    # Delete all model_flow entries (reverse the inserts from upgrade)\n    op.execute(\"DELETE FROM llm_model_flow;\")\n"
  },
  {
    "path": "backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py",
    "content": "\"\"\"add shortcut option for users\n\nRevision ID: 027381bce97c\nRevises: 6fc7886d665d\nCreate Date: 2025-01-14 12:14:00.814390\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"027381bce97c\"\ndown_revision = \"6fc7886d665d\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"shortcut_enabled\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"shortcut_enabled\")\n"
  },
  {
    "path": "backend/alembic/versions/03bf8be6b53a_rework_kg_config.py",
    "content": "\"\"\"rework-kg-config\n\nRevision ID: 03bf8be6b53a\nRevises: 65bc6e0f8500\nCreate Date: 2025-06-16 10:52:34.815335\n\n\"\"\"\n\nimport json\n\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy import text\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"03bf8be6b53a\"\ndown_revision = \"65bc6e0f8500\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # get current config\n    current_configs = (\n        op.get_bind()\n        .execute(text(\"SELECT kg_variable_name, kg_variable_values FROM kg_config\"))\n        .all()\n    )\n    current_config_dict = {\n        config.kg_variable_name: (\n            config.kg_variable_values[0]\n            if config.kg_variable_name\n            not in (\"KG_VENDOR_DOMAINS\", \"KG_IGNORE_EMAIL_DOMAINS\")\n            else config.kg_variable_values\n        )\n        for config in current_configs\n        if config.kg_variable_values\n    }\n\n    # not using the KGConfigSettings model here in case it changes in the future\n    kg_config_settings = json.dumps(\n        {\n            \"KG_EXPOSED\": current_config_dict.get(\"KG_EXPOSED\", False),\n            \"KG_ENABLED\": current_config_dict.get(\"KG_ENABLED\", False),\n            \"KG_VENDOR\": current_config_dict.get(\"KG_VENDOR\", None),\n            \"KG_VENDOR_DOMAINS\": current_config_dict.get(\"KG_VENDOR_DOMAINS\", []),\n            \"KG_IGNORE_EMAIL_DOMAINS\": current_config_dict.get(\n                \"KG_IGNORE_EMAIL_DOMAINS\", []\n            ),\n            \"KG_COVERAGE_START\": current_config_dict.get(\n                \"KG_COVERAGE_START\",\n                (datetime.now() - timedelta(days=90)).strftime(\"%Y-%m-%d\"),\n            ),\n            \"KG_MAX_COVERAGE_DAYS\": current_config_dict.get(\"KG_MAX_COVERAGE_DAYS\", 90),\n            \"KG_MAX_PARENT_RECURSION_DEPTH\": current_config_dict.get(\n                \"KG_MAX_PARENT_RECURSION_DEPTH\", 2\n            ),\n            \"KG_BETA_PERSONA_ID\": current_config_dict.get(\"KG_BETA_PERSONA_ID\", None),\n        }\n    )\n    op.execute(\n        f\"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')\"\n    )\n\n    # drop kg config table\n    op.drop_table(\"kg_config\")\n\n\ndef downgrade() -> None:\n    # get current config\n    current_config_dict = {\n        \"KG_EXPOSED\": False,\n        \"KG_ENABLED\": False,\n        \"KG_VENDOR\": [],\n        \"KG_VENDOR_DOMAINS\": [],\n        \"KG_IGNORE_EMAIL_DOMAINS\": [],\n        \"KG_COVERAGE_START\": (datetime.now() - timedelta(days=90)).strftime(\"%Y-%m-%d\"),\n        \"KG_MAX_COVERAGE_DAYS\": 90,\n        \"KG_MAX_PARENT_RECURSION_DEPTH\": 2,\n    }\n    current_configs = (\n        op.get_bind()\n        .execute(text(\"SELECT value FROM key_value_store WHERE key = 'kg_config'\"))\n        .one_or_none()\n    )\n    if current_configs is not None:\n        current_config_dict.update(current_configs[0])\n    insert_values = [\n        {\n            \"kg_variable_name\": name,\n            \"kg_variable_values\": (\n                [str(val).lower() if isinstance(val, bool) else str(val)]\n                if not isinstance(val, list)\n                else val\n            ),\n        }\n        for name, val in current_config_dict.items()\n    ]\n\n    op.create_table(\n        \"kg_config\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"kg_variable_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"kg_variable_values\", postgresql.ARRAY(sa.String()), nullable=False),\n        sa.UniqueConstraint(\"kg_variable_name\", name=\"uq_kg_config_variable_name\"),\n    )\n    op.bulk_insert(\n        sa.table(\n            \"kg_config\",\n            sa.column(\"kg_variable_name\", sa.String),\n            sa.column(\"kg_variable_values\", postgresql.ARRAY(sa.String)),\n        ),\n        insert_values,\n    )\n\n    op.execute(\"DELETE FROM key_value_store WHERE key = 'kg_config'\")\n"
  },
  {
    "path": "backend/alembic/versions/03d085c5c38d_backfill_account_type.py",
    "content": "\"\"\"backfill_account_type\n\nRevision ID: 03d085c5c38d\nRevises: 977e834c1427\nCreate Date: 2026-03-25 16:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"03d085c5c38d\"\ndown_revision = \"977e834c1427\"\nbranch_labels = None\ndepends_on = None\n\n_STANDARD = \"STANDARD\"\n_BOT = \"BOT\"\n_EXT_PERM_USER = \"EXT_PERM_USER\"\n_SERVICE_ACCOUNT = \"SERVICE_ACCOUNT\"\n_ANONYMOUS = \"ANONYMOUS\"\n\n# Well-known anonymous user UUID\nANONYMOUS_USER_ID = \"00000000-0000-0000-0000-000000000002\"\n\n# Email pattern for API key virtual users\nAPI_KEY_EMAIL_PATTERN = r\"API\\_KEY\\_\\_%\"\n\n# Reflect the table structure for use in DML\nuser_table = sa.table(\n    \"user\",\n    sa.column(\"id\", sa.Uuid),\n    sa.column(\"email\", sa.String),\n    sa.column(\"role\", sa.String),\n    sa.column(\"account_type\", sa.String),\n)\n\n\ndef upgrade() -> None:\n    # ------------------------------------------------------------------\n    # Step 1: Backfill account_type from role.\n    # Order matters — most-specific matches first so the final catch-all\n    # only touches rows that haven't been classified yet.\n    # ------------------------------------------------------------------\n\n    # 1a. API key virtual users → SERVICE_ACCOUNT\n    op.execute(\n        sa.update(user_table)\n        .where(\n            user_table.c.email.ilike(API_KEY_EMAIL_PATTERN),\n            user_table.c.account_type.is_(None),\n        )\n        .values(account_type=_SERVICE_ACCOUNT)\n    )\n\n    # 1b. Anonymous user → ANONYMOUS\n    op.execute(\n        sa.update(user_table)\n        .where(\n            user_table.c.id == ANONYMOUS_USER_ID,\n            user_table.c.account_type.is_(None),\n        )\n        .values(account_type=_ANONYMOUS)\n    )\n\n    # 1c. SLACK_USER role → BOT\n    op.execute(\n        sa.update(user_table)\n        .where(\n            user_table.c.role == \"SLACK_USER\",\n            user_table.c.account_type.is_(None),\n        )\n        .values(account_type=_BOT)\n    )\n\n    # 1d. EXT_PERM_USER role → EXT_PERM_USER\n    op.execute(\n        sa.update(user_table)\n        .where(\n            user_table.c.role == \"EXT_PERM_USER\",\n            user_table.c.account_type.is_(None),\n        )\n        .values(account_type=_EXT_PERM_USER)\n    )\n\n    # 1e. Everything else → STANDARD\n    op.execute(\n        sa.update(user_table)\n        .where(user_table.c.account_type.is_(None))\n        .values(account_type=_STANDARD)\n    )\n\n    # ------------------------------------------------------------------\n    # Step 2: Set account_type to NOT NULL now that every row is filled.\n    # ------------------------------------------------------------------\n    op.alter_column(\n        \"user\",\n        \"account_type\",\n        nullable=False,\n        server_default=\"STANDARD\",\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\"user\", \"account_type\", nullable=True, server_default=None)\n    op.execute(sa.update(user_table).values(account_type=None))\n"
  },
  {
    "path": "backend/alembic/versions/03d710ccf29c_add_permission_sync_attempt_tables.py",
    "content": "\"\"\"add permission sync attempt tables\n\nRevision ID: 03d710ccf29c\nRevises: 96a5702df6aa\nCreate Date: 2025-09-11 13:30:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"03d710ccf29c\"  # Generate a new unique ID\ndown_revision = \"96a5702df6aa\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create the permission sync status enum\n    permission_sync_status_enum = sa.Enum(\n        \"not_started\",\n        \"in_progress\",\n        \"success\",\n        \"canceled\",\n        \"failed\",\n        \"completed_with_errors\",\n        name=\"permissionsyncstatus\",\n        native_enum=False,\n    )\n\n    # Create doc_permission_sync_attempt table\n    op.create_table(\n        \"doc_permission_sync_attempt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=False),\n        sa.Column(\"status\", permission_sync_status_enum, nullable=False),\n        sa.Column(\"total_docs_synced\", sa.Integer(), nullable=True),\n        sa.Column(\"docs_with_permission_errors\", sa.Integer(), nullable=True),\n        sa.Column(\"error_message\", sa.Text(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"time_started\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"time_finished\", sa.DateTime(timezone=True), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"connector_credential_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Create indexes for doc_permission_sync_attempt\n    op.create_index(\n        \"ix_doc_permission_sync_attempt_time_created\",\n        \"doc_permission_sync_attempt\",\n        [\"time_created\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_permission_sync_attempt_latest_for_cc_pair\",\n        \"doc_permission_sync_attempt\",\n        [\"connector_credential_pair_id\", \"time_created\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_permission_sync_attempt_status_time\",\n        \"doc_permission_sync_attempt\",\n        [\"status\", sa.text(\"time_finished DESC\")],\n        unique=False,\n    )\n\n    # Create external_group_permission_sync_attempt table\n    # connector_credential_pair_id is nullable - group syncs can be global (e.g., Confluence)\n    op.create_table(\n        \"external_group_permission_sync_attempt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=True),\n        sa.Column(\"status\", permission_sync_status_enum, nullable=False),\n        sa.Column(\"total_users_processed\", sa.Integer(), nullable=True),\n        sa.Column(\"total_groups_processed\", sa.Integer(), nullable=True),\n        sa.Column(\"total_group_memberships_synced\", sa.Integer(), nullable=True),\n        sa.Column(\"error_message\", sa.Text(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"time_started\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"time_finished\", sa.DateTime(timezone=True), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"connector_credential_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Create indexes for external_group_permission_sync_attempt\n    op.create_index(\n        \"ix_external_group_permission_sync_attempt_time_created\",\n        \"external_group_permission_sync_attempt\",\n        [\"time_created\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_group_sync_attempt_cc_pair_time\",\n        \"external_group_permission_sync_attempt\",\n        [\"connector_credential_pair_id\", \"time_created\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_group_sync_attempt_status_time\",\n        \"external_group_permission_sync_attempt\",\n        [\"status\", sa.text(\"time_finished DESC\")],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    # Drop indexes\n    op.drop_index(\n        \"ix_group_sync_attempt_status_time\",\n        table_name=\"external_group_permission_sync_attempt\",\n    )\n    op.drop_index(\n        \"ix_group_sync_attempt_cc_pair_time\",\n        table_name=\"external_group_permission_sync_attempt\",\n    )\n    op.drop_index(\n        \"ix_external_group_permission_sync_attempt_time_created\",\n        table_name=\"external_group_permission_sync_attempt\",\n    )\n    op.drop_index(\n        \"ix_permission_sync_attempt_status_time\",\n        table_name=\"doc_permission_sync_attempt\",\n    )\n    op.drop_index(\n        \"ix_permission_sync_attempt_latest_for_cc_pair\",\n        table_name=\"doc_permission_sync_attempt\",\n    )\n    op.drop_index(\n        \"ix_doc_permission_sync_attempt_time_created\",\n        table_name=\"doc_permission_sync_attempt\",\n    )\n\n    # Drop tables\n    op.drop_table(\"external_group_permission_sync_attempt\")\n    op.drop_table(\"doc_permission_sync_attempt\")\n"
  },
  {
    "path": "backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py",
    "content": "\"\"\"Add thread specific model selection\n\nRevision ID: 0568ccf46a6b\nRevises: e209dc5a8156\nCreate Date: 2024-06-19 14:25:36.376046\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"0568ccf46a6b\"\ndown_revision = \"e209dc5a8156\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\"current_alternate_model\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"current_alternate_model\")\n"
  },
  {
    "path": "backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py",
    "content": "\"\"\"add search doc relevance details\n\nRevision ID: 05c07bf07c00\nRevises: b896bbd0d5a7\nCreate Date: 2024-07-10 17:48:15.886653\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"05c07bf07c00\"\ndown_revision = \"b896bbd0d5a7\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"search_doc\",\n        sa.Column(\"is_relevant\", sa.Boolean(), nullable=True),\n    )\n    op.add_column(\n        \"search_doc\",\n        sa.Column(\"relevance_explanation\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"search_doc\", \"relevance_explanation\")\n    op.drop_column(\"search_doc\", \"is_relevant\")\n"
  },
  {
    "path": "backend/alembic/versions/07b98176f1de_code_interpreter_seed.py",
    "content": "\"\"\"code interpreter seed\n\nRevision ID: 07b98176f1de\nRevises: 7cb492013621\nCreate Date: 2026-02-23 15:55:07.606784\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"07b98176f1de\"\ndown_revision = \"7cb492013621\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Seed the single instance of code_interpreter_server\n    # NOTE: There should only exist at most and at minimum 1 code_interpreter_server row\n    op.execute(\n        sa.text(\"INSERT INTO code_interpreter_server (server_enabled) VALUES (true)\")\n    )\n\n\ndef downgrade() -> None:\n    op.execute(sa.text(\"DELETE FROM code_interpreter_server\"))\n"
  },
  {
    "path": "backend/alembic/versions/0816326d83aa_add_federated_connector_tables.py",
    "content": "\"\"\"add federated connector tables\n\nRevision ID: 0816326d83aa\nRevises: 12635f6655b7\nCreate Date: 2025-06-29 14:09:45.109518\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"0816326d83aa\"\ndown_revision = \"12635f6655b7\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create federated_connector table\n    op.create_table(\n        \"federated_connector\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"source\", sa.String(), nullable=False),\n        sa.Column(\"credentials\", sa.LargeBinary(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Create federated_connector_oauth_token table\n    op.create_table(\n        \"federated_connector_oauth_token\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"federated_connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", postgresql.UUID(as_uuid=True), nullable=False),\n        sa.Column(\"token\", sa.LargeBinary(), nullable=False),\n        sa.Column(\"expires_at\", sa.DateTime(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"federated_connector_id\"], [\"federated_connector.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Create federated_connector__document_set table\n    op.create_table(\n        \"federated_connector__document_set\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"federated_connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_set_id\", sa.Integer(), nullable=False),\n        sa.Column(\"entities\", postgresql.JSONB(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"federated_connector_id\"], [\"federated_connector.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_set_id\"], [\"document_set.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\n            \"federated_connector_id\",\n            \"document_set_id\",\n            name=\"uq_federated_connector_document_set\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Drop tables in reverse order due to foreign key dependencies\n    op.drop_table(\"federated_connector__document_set\")\n    op.drop_table(\"federated_connector_oauth_token\")\n    op.drop_table(\"federated_connector\")\n"
  },
  {
    "path": "backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py",
    "content": "\"\"\"add_indexing_start_to_connector\n\nRevision ID: 08a1eda20fe1\nRevises: 8a87bd6ec550\nCreate Date: 2024-07-23 11:12:39.462397\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"08a1eda20fe1\"\ndown_revision = \"8a87bd6ec550\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector\", sa.Column(\"indexing_start\", sa.DateTime(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector\", \"indexing_start\")\n"
  },
  {
    "path": "backend/alembic/versions/09995b8811eb_add_theme_preference_to_user.py",
    "content": "\"\"\"add theme_preference to user\n\nRevision ID: 09995b8811eb\nRevises: 3d1cca026fe8\nCreate Date: 2025-10-24 08:58:50.246949\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom onyx.db.enums import ThemePreference\n\n\n# revision identifiers, used by Alembic.\nrevision = \"09995b8811eb\"\ndown_revision = \"3d1cca026fe8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"theme_preference\",\n            sa.Enum(ThemePreference, native_enum=False),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"theme_preference\")\n"
  },
  {
    "path": "backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py",
    "content": "\"\"\"Add starter prompts\n\nRevision ID: 0a2b51deb0b8\nRevises: 5f4b8568a221\nCreate Date: 2024-03-02 23:23:49.960309\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"0a2b51deb0b8\"\ndown_revision = \"5f4b8568a221\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"starter_messages\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"starter_messages\")\n"
  },
  {
    "path": "backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py",
    "content": "\"\"\"Enable Encrypted Fields\n\nRevision ID: 0a98909f2757\nRevises: 570282d33c49\nCreate Date: 2024-05-05 19:30:34.317972\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.sql import table\nfrom sqlalchemy.dialects import postgresql\nimport json\n\nfrom onyx.utils.encryption import encrypt_string_to_bytes\n\n# revision identifiers, used by Alembic.\nrevision = \"0a98909f2757\"\ndown_revision = \"570282d33c49\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    connection = op.get_bind()\n\n    op.alter_column(\"key_value_store\", \"value\", nullable=True)\n    op.add_column(\n        \"key_value_store\",\n        sa.Column(\n            \"encrypted_value\",\n            sa.LargeBinary,\n            nullable=True,\n        ),\n    )\n\n    # Need a temporary column to translate the JSONB to binary\n    op.add_column(\"credential\", sa.Column(\"temp_column\", sa.LargeBinary()))\n\n    creds_table = table(\n        \"credential\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"credential_json\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=False,\n        ),\n        sa.Column(\n            \"temp_column\",\n            sa.LargeBinary(),\n            nullable=False,\n        ),\n    )\n\n    results = connection.execute(sa.select(creds_table))\n\n    # This uses the MIT encrypt which does not actually encrypt the credentials\n    # In other words, this upgrade does not apply the encryption. Porting existing sensitive data\n    # and key rotation currently is not supported and will come out in the future\n    for row_id, creds, _ in results:\n        creds_binary = encrypt_string_to_bytes(json.dumps(creds))\n        connection.execute(\n            creds_table.update()\n            .where(creds_table.c.id == row_id)\n            .values(temp_column=creds_binary)\n        )\n\n    op.drop_column(\"credential\", \"credential_json\")\n    op.alter_column(\"credential\", \"temp_column\", new_column_name=\"credential_json\")\n\n    op.add_column(\"llm_provider\", sa.Column(\"temp_column\", sa.LargeBinary()))\n\n    llm_table = table(\n        \"llm_provider\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"api_key\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"temp_column\",\n            sa.LargeBinary(),\n            nullable=False,\n        ),\n    )\n    results = connection.execute(sa.select(llm_table))\n\n    for row_id, api_key, _ in results:\n        llm_key = encrypt_string_to_bytes(api_key)\n        connection.execute(\n            llm_table.update()\n            .where(llm_table.c.id == row_id)\n            .values(temp_column=llm_key)\n        )\n\n    op.drop_column(\"llm_provider\", \"api_key\")\n    op.alter_column(\"llm_provider\", \"temp_column\", new_column_name=\"api_key\")\n\n\ndef downgrade() -> None:\n    # Some information loss but this is ok. Should not allow decryption via downgrade.\n    op.drop_column(\"credential\", \"credential_json\")\n    op.drop_column(\"llm_provider\", \"api_key\")\n\n    op.add_column(\"llm_provider\", sa.Column(\"api_key\", sa.String()))\n    op.add_column(\n        \"credential\",\n        sa.Column(\"credential_json\", postgresql.JSONB(astext_type=sa.Text())),\n    )\n\n    op.execute(\"DELETE FROM key_value_store WHERE value IS NULL\")\n    op.alter_column(\"key_value_store\", \"value\", nullable=False)\n    op.drop_column(\"key_value_store\", \"encrypted_value\")\n"
  },
  {
    "path": "backend/alembic/versions/0bb4558f35df_add_scim_username_to_scim_user_mapping.py",
    "content": "\"\"\"add scim_username to scim_user_mapping\n\nRevision ID: 0bb4558f35df\nRevises: 631fd2504136\nCreate Date: 2026-02-20 10:45:30.340188\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"0bb4558f35df\"\ndown_revision = \"631fd2504136\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"scim_username\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"scim_user_mapping\", \"scim_username\")\n"
  },
  {
    "path": "backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py",
    "content": "\"\"\"Migration 2: User file data preparation and backfill\n\nRevision ID: 0cd424f32b1d\nRevises: 9b66d3156fc6\nCreate Date: 2025-09-22 09:44:42.727034\n\nThis migration populates the new columns added in migration 1.\nIt prepares data for the UUID transition and relationship migration.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy import text\nimport logging\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n# revision identifiers, used by Alembic.\nrevision = \"0cd424f32b1d\"\ndown_revision = \"9b66d3156fc6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Populate new columns with data.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    # === Step 1: Populate user_file.new_id ===\n    user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n    has_new_id = \"new_id\" in user_file_columns\n\n    if has_new_id:\n        logger.info(\"Populating user_file.new_id with UUIDs...\")\n\n        # Count rows needing UUIDs\n        null_count = bind.execute(\n            text(\"SELECT COUNT(*) FROM user_file WHERE new_id IS NULL\")\n        ).scalar_one()\n\n        if null_count > 0:\n            logger.info(f\"Generating UUIDs for {null_count} user_file records...\")\n\n            # Populate in batches to avoid long locks\n            batch_size = 10000\n            total_updated = 0\n\n            while True:\n                result = bind.execute(\n                    text(\n                        \"\"\"\n                    UPDATE user_file\n                    SET new_id = gen_random_uuid()\n                    WHERE new_id IS NULL\n                    AND id IN (\n                        SELECT id FROM user_file\n                        WHERE new_id IS NULL\n                        LIMIT :batch_size\n                    )\n                \"\"\"\n                    ),\n                    {\"batch_size\": batch_size},\n                )\n\n                updated = result.rowcount\n                total_updated += updated\n\n                if updated < batch_size:\n                    break\n\n                logger.info(f\"  Updated {total_updated}/{null_count} records...\")\n\n            logger.info(f\"Generated UUIDs for {total_updated} user_file records\")\n\n        # Verify all records have UUIDs\n        remaining_null = bind.execute(\n            text(\"SELECT COUNT(*) FROM user_file WHERE new_id IS NULL\")\n        ).scalar_one()\n\n        if remaining_null > 0:\n            raise Exception(\n                f\"Failed to populate all user_file.new_id values ({remaining_null} NULL)\"\n            )\n\n        # Lock down the column\n        op.alter_column(\"user_file\", \"new_id\", nullable=False)\n        op.alter_column(\"user_file\", \"new_id\", server_default=None)\n        logger.info(\"Locked down user_file.new_id column\")\n\n    # === Step 2: Populate persona__user_file.user_file_id_uuid ===\n    persona_user_file_columns = [\n        col[\"name\"] for col in inspector.get_columns(\"persona__user_file\")\n    ]\n\n    if has_new_id and \"user_file_id_uuid\" in persona_user_file_columns:\n        logger.info(\"Populating persona__user_file.user_file_id_uuid...\")\n\n        # Count rows needing update\n        null_count = bind.execute(\n            text(\n                \"\"\"\n            SELECT COUNT(*) FROM persona__user_file\n            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL\n        \"\"\"\n            )\n        ).scalar_one()\n\n        if null_count > 0:\n            logger.info(f\"Updating {null_count} persona__user_file records...\")\n\n            # Update in batches\n            batch_size = 10000\n            total_updated = 0\n\n            while True:\n                result = bind.execute(\n                    text(\n                        \"\"\"\n                    UPDATE persona__user_file p\n                    SET user_file_id_uuid = uf.new_id\n                    FROM user_file uf\n                    WHERE p.user_file_id = uf.id\n                    AND p.user_file_id_uuid IS NULL\n                    AND p.persona_id IN (\n                        SELECT persona_id\n                        FROM persona__user_file\n                        WHERE user_file_id_uuid IS NULL\n                        LIMIT :batch_size\n                    )\n                \"\"\"\n                    ),\n                    {\"batch_size\": batch_size},\n                )\n\n                updated = result.rowcount\n                total_updated += updated\n\n                if updated < batch_size:\n                    break\n\n                logger.info(f\"  Updated {total_updated}/{null_count} records...\")\n\n            logger.info(f\"Updated {total_updated} persona__user_file records\")\n\n        # Verify all records are populated\n        remaining_null = bind.execute(\n            text(\n                \"\"\"\n            SELECT COUNT(*) FROM persona__user_file\n            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL\n        \"\"\"\n            )\n        ).scalar_one()\n\n        if remaining_null > 0:\n            raise Exception(\n                f\"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)\"\n            )\n\n        op.alter_column(\"persona__user_file\", \"user_file_id_uuid\", nullable=False)\n        logger.info(\"Locked down persona__user_file.user_file_id_uuid column\")\n\n    # === Step 3: Create user_project records from chat_folder ===\n    if \"chat_folder\" in inspector.get_table_names():\n        logger.info(\"Creating user_project records from chat_folder...\")\n\n        result = bind.execute(\n            text(\n                \"\"\"\n            INSERT INTO user_project (user_id, name)\n            SELECT cf.user_id, cf.name\n            FROM chat_folder cf\n            WHERE NOT EXISTS (\n                SELECT 1\n                FROM user_project up\n                WHERE up.user_id = cf.user_id AND up.name = cf.name\n            )\n        \"\"\"\n            )\n        )\n\n        logger.info(f\"Created {result.rowcount} user_project records from chat_folder\")\n\n    # === Step 4: Populate chat_session.project_id ===\n    chat_session_columns = [\n        col[\"name\"] for col in inspector.get_columns(\"chat_session\")\n    ]\n\n    if \"folder_id\" in chat_session_columns and \"project_id\" in chat_session_columns:\n        logger.info(\"Populating chat_session.project_id...\")\n\n        # Count sessions needing update\n        null_count = bind.execute(\n            text(\n                \"\"\"\n            SELECT COUNT(*) FROM chat_session\n            WHERE project_id IS NULL AND folder_id IS NOT NULL\n        \"\"\"\n            )\n        ).scalar_one()\n\n        if null_count > 0:\n            logger.info(f\"Updating {null_count} chat_session records...\")\n\n            result = bind.execute(\n                text(\n                    \"\"\"\n                UPDATE chat_session cs\n                SET project_id = up.id\n                FROM chat_folder cf\n                JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name\n                WHERE cs.folder_id = cf.id AND cs.project_id IS NULL\n            \"\"\"\n                )\n            )\n\n            logger.info(f\"Updated {result.rowcount} chat_session records\")\n\n        # Verify all records are populated\n        remaining_null = bind.execute(\n            text(\n                \"\"\"\n            SELECT COUNT(*) FROM chat_session\n            WHERE project_id IS NULL AND folder_id IS NOT NULL\n        \"\"\"\n            )\n        ).scalar_one()\n\n        if remaining_null > 0:\n            logger.warning(\n                f\"Warning: {remaining_null} chat_session records could not be mapped to projects\"\n            )\n\n    # === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===\n    # Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.\n    # After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').\n    # This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.\n    logger.info(\"Updating plaintext FileRecord ids and display names to UUID scheme...\")\n\n    # Count legacy plaintext records that can be mapped to UUID user_file ids\n    count_query = text(\n        \"\"\"\n        SELECT COUNT(*)\n        FROM file_record fr\n        JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)\n        WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'\n        \"\"\"\n    )\n    legacy_count = bind.execute(count_query).scalar_one()\n\n    if legacy_count and legacy_count > 0:\n        logger.info(f\"Found {legacy_count} legacy plaintext file records to update\")\n\n        # Update display_name first for readability (safe regardless of rename)\n        bind.execute(\n            text(\n                \"\"\"\n                UPDATE file_record fr\n                SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)\n                FROM user_file uf\n                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'\n                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)\n                \"\"\"\n            )\n        )\n\n        # Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id\n        # Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'\n        # and ensure it aligns to existing user_file ids to avoid renaming unrelated rows\n        result = bind.execute(\n            text(\n                \"\"\"\n                UPDATE file_record fr\n                SET file_id = CONCAT('plaintext_', uf.new_id::text)\n                FROM user_file uf\n                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'\n                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)\n                \"\"\"\n            )\n        )\n        logger.info(\n            f\"Updated {result.rowcount} plaintext file_record ids to UUID scheme\"\n        )\n\n    # === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===\n    # New records should default to migrated=True so the migration task won't run for them.\n    # Existing rows that had a legacy document_id should be marked as not migrated to be processed.\n\n    # Backfill existing records: if document_id is not null, set to FALSE\n    bind.execute(\n        text(\n            \"\"\"\n            UPDATE user_file\n            SET document_id_migrated = FALSE\n            WHERE document_id IS NOT NULL\n            \"\"\"\n        )\n    )\n\n    # === Step 7: Backfill user_file.status from index_attempt ===\n    logger.info(\"Backfilling user_file.status from index_attempt...\")\n\n    # Update user_file status based on latest index attempt\n    # Using CTEs instead of temp tables for asyncpg compatibility\n    result = bind.execute(\n        text(\n            \"\"\"\n        WITH latest_attempt AS (\n            SELECT DISTINCT ON (ia.connector_credential_pair_id)\n                ia.connector_credential_pair_id,\n                ia.status\n            FROM index_attempt ia\n            ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC\n        ),\n        uf_to_ccp AS (\n            SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id\n            FROM user_file uf\n            JOIN document_by_connector_credential_pair dcc\n                ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')\n            JOIN connector_credential_pair ccp\n                ON ccp.connector_id = dcc.connector_id\n                AND ccp.credential_id = dcc.credential_id\n        )\n        UPDATE user_file uf\n        SET status = CASE\n            WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'\n            WHEN la.status = 'SUCCESS' THEN 'COMPLETED'\n            ELSE 'FAILED'\n        END\n        FROM uf_to_ccp ufc\n        LEFT JOIN latest_attempt la\n            ON la.connector_credential_pair_id = ufc.cc_pair_id\n        WHERE uf.id = ufc.uf_id\n        AND uf.status = 'PROCESSING'\n    \"\"\"\n        )\n    )\n\n    logger.info(f\"Updated status for {result.rowcount} user_file records\")\n\n    logger.info(\"Migration 2 (data preparation) completed successfully\")\n\n\ndef downgrade() -> None:\n    \"\"\"Reset populated data to allow clean downgrade of schema.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.info(\"Starting downgrade of data preparation...\")\n\n    # Reset user_file columns to allow nulls before data removal\n    if \"user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n\n        if \"new_id\" in columns:\n            op.alter_column(\n                \"user_file\",\n                \"new_id\",\n                nullable=True,\n                server_default=sa.text(\"gen_random_uuid()\"),\n            )\n            # Optionally clear the data\n            # bind.execute(text(\"UPDATE user_file SET new_id = NULL\"))\n            logger.info(\"Reset user_file.new_id to nullable\")\n\n    # Reset persona__user_file.user_file_id_uuid\n    if \"persona__user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"persona__user_file\")]\n\n        if \"user_file_id_uuid\" in columns:\n            op.alter_column(\"persona__user_file\", \"user_file_id_uuid\", nullable=True)\n            # Optionally clear the data\n            # bind.execute(text(\"UPDATE persona__user_file SET user_file_id_uuid = NULL\"))\n            logger.info(\"Reset persona__user_file.user_file_id_uuid to nullable\")\n\n    # Note: We don't delete user_project records or reset chat_session.project_id\n    # as these might be in use and can be handled by the schema downgrade\n\n    # Reset user_file.status to default\n    if \"user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n        if \"status\" in columns:\n            bind.execute(text(\"UPDATE user_file SET status = 'PROCESSING'\"))\n            logger.info(\"Reset user_file.status to default\")\n\n    logger.info(\"Downgrade completed successfully\")\n"
  },
  {
    "path": "backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py",
    "content": "\"\"\"add ccpair deletion failure message\n\nRevision ID: 0ebb1d516877\nRevises: 52a219fb5233\nCreate Date: 2024-09-10 15:03:48.233926\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"0ebb1d516877\"\ndown_revision = \"52a219fb5233\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"deletion_failure_message\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"deletion_failure_message\")\n"
  },
  {
    "path": "backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py",
    "content": "\"\"\"add index to index_attempt.time_created\n\nRevision ID: 0f7ff6d75b57\nRevises: 369644546676\nCreate Date: 2025-01-10 14:01:14.067144\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"0f7ff6d75b57\"\ndown_revision = \"fec3db967bf7\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        op.f(\"ix_index_attempt_status\"),\n        \"index_attempt\",\n        [\"status\"],\n        unique=False,\n    )\n\n    op.create_index(\n        op.f(\"ix_index_attempt_time_created\"),\n        \"index_attempt\",\n        [\"time_created\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_index_attempt_time_created\"), table_name=\"index_attempt\")\n\n    op.drop_index(op.f(\"ix_index_attempt_status\"), table_name=\"index_attempt\")\n"
  },
  {
    "path": "backend/alembic/versions/114a638452db_add_default_app_mode_to_user.py",
    "content": "\"\"\"add default_app_mode to user\n\nRevision ID: 114a638452db\nRevises: feead2911109\nCreate Date: 2026-02-09 18:57:08.274640\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"114a638452db\"\ndown_revision = \"feead2911109\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"default_app_mode\",\n            sa.String(),\n            nullable=False,\n            server_default=\"CHAT\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"default_app_mode\")\n"
  },
  {
    "path": "backend/alembic/versions/12635f6655b7_drive_canonical_ids.py",
    "content": "\"\"\"drive-canonical-ids\n\nRevision ID: 12635f6655b7\nRevises: 58c50ef19f08\nCreate Date: 2025-06-20 14:44:54.241159\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom urllib.parse import urlparse, urlunparse\nfrom httpx import HTTPStatusError\nimport httpx\nfrom onyx.db.search_settings import SearchSettings\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.shared_utils.utils import (\n    replace_invalid_doc_id_characters,\n)\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.utils.logger import setup_logger\nimport os\n\nlogger = setup_logger()\n\n# revision identifiers, used by Alembic.\nrevision = \"12635f6655b7\"\ndown_revision = \"58c50ef19f08\"\nbranch_labels = None\ndepends_on = None\n\nSKIP_CANON_DRIVE_IDS = os.environ.get(\"SKIP_CANON_DRIVE_IDS\", \"true\").lower() == \"true\"\n\n\ndef active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:\n    result = op.get_bind().execute(\n        sa.text(\n            \"\"\"\n        SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1\n        \"\"\"\n        )\n    )\n    search_settings_fetch = result.fetchall()\n    search_settings = (\n        SearchSettings(**search_settings_fetch[0]._asdict())\n        if search_settings_fetch\n        else None\n    )\n\n    result2 = op.get_bind().execute(\n        sa.text(\n            \"\"\"\n        SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1\n        \"\"\"\n        )\n    )\n    search_settings_future_fetch = result2.fetchall()\n    search_settings_future = (\n        SearchSettings(**search_settings_future_fetch[0]._asdict())\n        if search_settings_future_fetch\n        else None\n    )\n\n    if not isinstance(search_settings, SearchSettings):\n        raise RuntimeError(\n            \"current search settings is of type \" + str(type(search_settings))\n        )\n    if (\n        not isinstance(search_settings_future, SearchSettings)\n        and search_settings_future is not None\n    ):\n        raise RuntimeError(\n            \"future search settings is of type \" + str(type(search_settings_future))\n        )\n\n    return search_settings, search_settings_future\n\n\ndef normalize_google_drive_url(url: str) -> str:\n    \"\"\"Remove query parameters from Google Drive URLs to create canonical document IDs.\n    NOTE: copied from drive doc_conversion.py\n    \"\"\"\n    parsed_url = urlparse(url)\n    parsed_url = parsed_url._replace(query=\"\")\n    spl_path = parsed_url.path.split(\"/\")\n    if spl_path and (spl_path[-1] in [\"edit\", \"view\", \"preview\"]):\n        spl_path.pop()\n        parsed_url = parsed_url._replace(path=\"/\".join(spl_path))\n    # Remove query parameters and reconstruct URL\n    return urlunparse(parsed_url)\n\n\ndef get_google_drive_documents_from_database() -> list[dict]:\n    \"\"\"Get all Google Drive documents from the database.\"\"\"\n    bind = op.get_bind()\n    result = bind.execute(\n        sa.text(\n            \"\"\"\n            SELECT d.id\n            FROM document d\n            JOIN document_by_connector_credential_pair dcc ON d.id = dcc.id\n            JOIN connector_credential_pair cc ON dcc.connector_id = cc.connector_id\n                AND dcc.credential_id = cc.credential_id\n            JOIN connector c ON cc.connector_id = c.id\n            WHERE c.source = 'GOOGLE_DRIVE'\n        \"\"\"\n        )\n    )\n\n    documents = []\n    for row in result:\n        documents.append({\"document_id\": row.id})\n\n    return documents\n\n\ndef update_document_id_in_database(\n    old_doc_id: str, new_doc_id: str, index_name: str\n) -> None:\n    \"\"\"Update document IDs in all relevant database tables using copy-and-swap approach.\"\"\"\n    bind = op.get_bind()\n\n    # print(f\"Updating database tables for document {old_doc_id} -> {new_doc_id}\")\n\n    # Check if new document ID already exists\n    result = bind.execute(\n        sa.text(\"SELECT COUNT(*) FROM document WHERE id = :new_id\"),\n        {\"new_id\": new_doc_id},\n    )\n    row = result.fetchone()\n    if row and row[0] > 0:\n        # print(f\"Document with ID {new_doc_id} already exists, deleting old one\")\n        delete_document_from_db(old_doc_id, index_name)\n        return\n\n    # Step 1: Create a new document row with the new ID (copy all fields from old row)\n    # Use a conservative approach to handle columns that might not exist in all installations\n    try:\n        bind.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,\n                                    link, doc_updated_at, primary_owners, secondary_owners,\n                                    external_user_emails, external_user_group_ids, is_public,\n                                    chunk_count, last_modified, last_synced, kg_stage, kg_processing_time)\n                SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,\n                       link, doc_updated_at, primary_owners, secondary_owners,\n                       external_user_emails, external_user_group_ids, is_public,\n                       chunk_count, last_modified, last_synced, kg_stage, kg_processing_time\n                FROM document\n                WHERE id = :old_id\n            \"\"\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated database tables for document {old_doc_id} -> {new_doc_id}\")\n    except Exception as e:\n        # If the full INSERT fails, try a more basic version with only core columns\n        logger.warning(f\"Full INSERT failed, trying basic version: {e}\")\n        bind.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO document (id, from_ingestion_api, boost, hidden, semantic_id,\n                                    link, doc_updated_at, primary_owners, secondary_owners)\n                SELECT :new_id, from_ingestion_api, boost, hidden, semantic_id,\n                       link, doc_updated_at, primary_owners, secondary_owners\n                FROM document\n                WHERE id = :old_id\n            \"\"\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n\n    # Step 2: Update all foreign key references to point to the new ID\n\n    # Update document_by_connector_credential_pair table\n    bind.execute(\n        sa.text(\n            \"UPDATE document_by_connector_credential_pair SET id = :new_id WHERE id = :old_id\"\n        ),\n        {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n    )\n    # print(f\"Successfully updated document_by_connector_credential_pair table for document {old_doc_id} -> {new_doc_id}\")\n\n    # Update search_doc table (stores search results for chat replay)\n    # This is critical for agent functionality\n    bind.execute(\n        sa.text(\n            \"UPDATE search_doc SET document_id = :new_id WHERE document_id = :old_id\"\n        ),\n        {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n    )\n    # print(f\"Successfully updated search_doc table for document {old_doc_id} -> {new_doc_id}\")\n    # Update document_retrieval_feedback table (user feedback on documents)\n    bind.execute(\n        sa.text(\n            \"UPDATE document_retrieval_feedback SET document_id = :new_id WHERE document_id = :old_id\"\n        ),\n        {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n    )\n    # print(f\"Successfully updated document_retrieval_feedback table for document {old_doc_id} -> {new_doc_id}\")\n    # Update document__tag table (document-tag relationships)\n    bind.execute(\n        sa.text(\n            \"UPDATE document__tag SET document_id = :new_id WHERE document_id = :old_id\"\n        ),\n        {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n    )\n    # print(f\"Successfully updated document__tag table for document {old_doc_id} -> {new_doc_id}\")\n    # Update user_file table (user uploaded files linked to documents)\n    bind.execute(\n        sa.text(\n            \"UPDATE user_file SET document_id = :new_id WHERE document_id = :old_id\"\n        ),\n        {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n    )\n    # print(f\"Successfully updated user_file table for document {old_doc_id} -> {new_doc_id}\")\n    # Update KG and chunk_stats tables (these may not exist in all installations)\n    try:\n        # Update kg_entity table\n        bind.execute(\n            sa.text(\n                \"UPDATE kg_entity SET document_id = :new_id WHERE document_id = :old_id\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated kg_entity table for document {old_doc_id} -> {new_doc_id}\")\n        # Update kg_entity_extraction_staging table\n        bind.execute(\n            sa.text(\n                \"UPDATE kg_entity_extraction_staging SET document_id = :new_id WHERE document_id = :old_id\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated kg_entity_extraction_staging table for document {old_doc_id} -> {new_doc_id}\")\n        # Update kg_relationship table\n        bind.execute(\n            sa.text(\n                \"UPDATE kg_relationship SET source_document = :new_id WHERE source_document = :old_id\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated kg_relationship table for document {old_doc_id} -> {new_doc_id}\")\n        # Update kg_relationship_extraction_staging table\n        bind.execute(\n            sa.text(\n                \"UPDATE kg_relationship_extraction_staging SET source_document = :new_id WHERE source_document = :old_id\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated kg_relationship_extraction_staging table for document {old_doc_id} -> {new_doc_id}\")\n        # Update chunk_stats table\n        bind.execute(\n            sa.text(\n                \"UPDATE chunk_stats SET document_id = :new_id WHERE document_id = :old_id\"\n            ),\n            {\"new_id\": new_doc_id, \"old_id\": old_doc_id},\n        )\n        # print(f\"Successfully updated chunk_stats table for document {old_doc_id} -> {new_doc_id}\")\n        # Update chunk_stats ID field which includes document_id\n        bind.execute(\n            sa.text(\n                \"\"\"\n                UPDATE chunk_stats\n                SET id = REPLACE(id, :old_id, :new_id)\n                WHERE id LIKE :old_id_pattern\n            \"\"\"\n            ),\n            {\n                \"new_id\": new_doc_id,\n                \"old_id\": old_doc_id,\n                \"old_id_pattern\": f\"{old_doc_id}__%\",\n            },\n        )\n        # print(f\"Successfully updated chunk_stats ID field for document {old_doc_id} -> {new_doc_id}\")\n    except Exception as e:\n        logger.warning(f\"Some KG/chunk tables may not exist or failed to update: {e}\")\n\n    # Step 3: Delete the old document row (this should now be safe since all FKs point to new row)\n    bind.execute(\n        sa.text(\"DELETE FROM document WHERE id = :old_id\"), {\"old_id\": old_doc_id}\n    )\n    # print(f\"Successfully deleted document {old_doc_id} from database\")\n\n\ndef _visit_chunks(\n    *,\n    http_client: httpx.Client,\n    index_name: str,\n    selection: str,\n    continuation: str | None = None,\n) -> tuple[list[dict], str | None]:\n    \"\"\"Helper that calls the /document/v1 visit API once and returns (docs, next_token).\"\"\"\n\n    # Use the same URL as the document API, but with visit-specific params\n    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n\n    params: dict[str, str] = {\n        \"selection\": selection,\n        \"wantedDocumentCount\": \"1000\",\n    }\n    if continuation:\n        params[\"continuation\"] = continuation\n\n    # print(f\"Visiting chunks for selection '{selection}' with params {params}\")\n    resp = http_client.get(base_url, params=params, timeout=None)\n    # print(f\"Visited chunks for document {selection}\")\n    resp.raise_for_status()\n\n    payload = resp.json()\n    return payload.get(\"documents\", []), payload.get(\"continuation\")\n\n\ndef delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:\n    \"\"\"Delete all chunks for *doc_id* from Vespa using continuation-token paging (no offset).\"\"\"\n\n    total_deleted = 0\n    # Use exact match instead of contains - Document Selector Language doesn't support contains\n    selection = f'{index_name}.document_id==\"{doc_id}\"'\n\n    with get_vespa_http_client() as http_client:\n        continuation: str | None = None\n        while True:\n            docs, continuation = _visit_chunks(\n                http_client=http_client,\n                index_name=index_name,\n                selection=selection,\n                continuation=continuation,\n            )\n\n            if not docs:\n                break\n\n            for doc in docs:\n                vespa_full_id = doc.get(\"id\")\n                if not vespa_full_id:\n                    continue\n\n                vespa_doc_uuid = vespa_full_id.split(\"::\")[-1]\n                delete_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}\"\n\n                try:\n                    resp = http_client.delete(delete_url)\n                    resp.raise_for_status()\n                    total_deleted += 1\n                except Exception as e:\n                    print(f\"Failed to delete chunk {vespa_doc_uuid}: {e}\")\n\n            if not continuation:\n                break\n\n\ndef update_document_id_in_vespa(\n    index_name: str, old_doc_id: str, new_doc_id: str\n) -> None:\n    \"\"\"Update all chunks' document_id field from *old_doc_id* to *new_doc_id* using continuation paging.\"\"\"\n\n    clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)\n\n    # Use exact match instead of contains - Document Selector Language doesn't support contains\n    selection = f'{index_name}.document_id==\"{old_doc_id}\"'\n\n    with get_vespa_http_client() as http_client:\n        continuation: str | None = None\n        while True:\n            # print(f\"Visiting chunks for document {old_doc_id} -> {new_doc_id}\")\n            docs, continuation = _visit_chunks(\n                http_client=http_client,\n                index_name=index_name,\n                selection=selection,\n                continuation=continuation,\n            )\n\n            if not docs:\n                break\n\n            for doc in docs:\n                vespa_full_id = doc.get(\"id\")\n                if not vespa_full_id:\n                    continue\n\n                vespa_doc_uuid = vespa_full_id.split(\"::\")[-1]\n                vespa_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}\"\n\n                update_request = {\n                    \"fields\": {\"document_id\": {\"assign\": clean_new_doc_id}}\n                }\n\n                try:\n                    resp = http_client.put(vespa_url, json=update_request)\n                    resp.raise_for_status()\n                except Exception as e:\n                    print(f\"Failed to update chunk {vespa_doc_uuid}: {e}\")\n                    raise\n\n            if not continuation:\n                break\n\n\ndef delete_document_from_db(current_doc_id: str, index_name: str) -> None:\n    # Delete all foreign key references first, then delete the document\n    try:\n        bind = op.get_bind()\n\n        # Delete from agent-related tables first (order matters due to foreign keys)\n        # Delete from agent__sub_query__search_doc first since it references search_doc\n        bind.execute(\n            sa.text(\n                \"\"\"\n                DELETE FROM agent__sub_query__search_doc\n                WHERE search_doc_id IN (\n                    SELECT id FROM search_doc WHERE document_id = :doc_id\n                )\n                \"\"\"\n            ),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Delete from chat_message__search_doc\n        bind.execute(\n            sa.text(\n                \"\"\"\n                DELETE FROM chat_message__search_doc\n                WHERE search_doc_id IN (\n                    SELECT id FROM search_doc WHERE document_id = :doc_id\n                )\n                \"\"\"\n            ),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Now we can safely delete from search_doc\n        bind.execute(\n            sa.text(\"DELETE FROM search_doc WHERE document_id = :doc_id\"),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Delete from document_by_connector_credential_pair\n        bind.execute(\n            sa.text(\n                \"DELETE FROM document_by_connector_credential_pair WHERE id = :doc_id\"\n            ),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Delete from other tables that reference this document\n        bind.execute(\n            sa.text(\n                \"DELETE FROM document_retrieval_feedback WHERE document_id = :doc_id\"\n            ),\n            {\"doc_id\": current_doc_id},\n        )\n\n        bind.execute(\n            sa.text(\"DELETE FROM document__tag WHERE document_id = :doc_id\"),\n            {\"doc_id\": current_doc_id},\n        )\n\n        bind.execute(\n            sa.text(\"DELETE FROM user_file WHERE document_id = :doc_id\"),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Delete from KG tables if they exist\n        try:\n            bind.execute(\n                sa.text(\"DELETE FROM kg_entity WHERE document_id = :doc_id\"),\n                {\"doc_id\": current_doc_id},\n            )\n\n            bind.execute(\n                sa.text(\n                    \"DELETE FROM kg_entity_extraction_staging WHERE document_id = :doc_id\"\n                ),\n                {\"doc_id\": current_doc_id},\n            )\n\n            bind.execute(\n                sa.text(\"DELETE FROM kg_relationship WHERE source_document = :doc_id\"),\n                {\"doc_id\": current_doc_id},\n            )\n\n            bind.execute(\n                sa.text(\n                    \"DELETE FROM kg_relationship_extraction_staging WHERE source_document = :doc_id\"\n                ),\n                {\"doc_id\": current_doc_id},\n            )\n\n            bind.execute(\n                sa.text(\"DELETE FROM chunk_stats WHERE document_id = :doc_id\"),\n                {\"doc_id\": current_doc_id},\n            )\n\n            bind.execute(\n                sa.text(\"DELETE FROM chunk_stats WHERE id LIKE :doc_id_pattern\"),\n                {\"doc_id_pattern\": f\"{current_doc_id}__%\"},\n            )\n\n        except Exception as e:\n            logger.warning(\n                f\"Some KG/chunk tables may not exist or failed to delete from: {e}\"\n            )\n\n        # Finally delete the document itself\n        bind.execute(\n            sa.text(\"DELETE FROM document WHERE id = :doc_id\"),\n            {\"doc_id\": current_doc_id},\n        )\n\n        # Delete chunks from vespa\n        delete_document_chunks_from_vespa(index_name, current_doc_id)\n\n    except Exception as e:\n        print(f\"Failed to delete duplicate document {current_doc_id}: {e}\")\n        # Continue with other documents instead of failing the entire migration\n\n\ndef upgrade() -> None:\n    if SKIP_CANON_DRIVE_IDS:\n        return\n    current_search_settings, _ = active_search_settings()\n\n    # Get the index name\n    if hasattr(current_search_settings, \"index_name\"):\n        index_name = current_search_settings.index_name\n    else:\n        # Default index name if we can't get it from the document_index\n        index_name = \"danswer_index\"\n\n    # Get all Google Drive documents from the database (this is faster and more reliable)\n    gdrive_documents = get_google_drive_documents_from_database()\n\n    if not gdrive_documents:\n        return\n\n    # Track normalized document IDs to detect duplicates\n    all_normalized_doc_ids = set()\n    updated_count = 0\n\n    for doc_info in gdrive_documents:\n        current_doc_id = doc_info[\"document_id\"]\n        normalized_doc_id = normalize_google_drive_url(current_doc_id)\n\n        print(f\"Processing document {current_doc_id} -> {normalized_doc_id}\")\n        # Check for duplicates\n        if normalized_doc_id in all_normalized_doc_ids:\n            # print(f\"Deleting duplicate document {current_doc_id}\")\n            delete_document_from_db(current_doc_id, index_name)\n            continue\n\n        all_normalized_doc_ids.add(normalized_doc_id)\n\n        # If the document ID already doesn't have query parameters, skip it\n        if current_doc_id == normalized_doc_id:\n            # print(f\"Skipping document {current_doc_id} -> {normalized_doc_id} because it already has no query parameters\")\n            continue\n\n        try:\n            # Update both database and Vespa in order\n            # Database first to ensure consistency\n            update_document_id_in_database(\n                current_doc_id, normalized_doc_id, index_name\n            )\n\n            # For Vespa, we can now use the original document IDs since we're using contains matching\n            update_document_id_in_vespa(index_name, current_doc_id, normalized_doc_id)\n            updated_count += 1\n            # print(f\"Finished updating document {current_doc_id} -> {normalized_doc_id}\")\n        except Exception as e:\n            print(f\"Failed to update document {current_doc_id}: {e}\")\n\n            if isinstance(e, HTTPStatusError):\n                print(f\"HTTPStatusError: {e}\")\n                print(f\"Response: {e.response.text}\")\n                print(f\"Status: {e.response.status_code}\")\n                print(f\"Headers: {e.response.headers}\")\n                print(f\"Request: {e.request.url}\")\n                print(f\"Request headers: {e.request.headers}\")\n            # Note: Rollback is complex with copy-and-swap approach since the old document is already deleted\n            # In case of failure, manual intervention may be required\n            # Continue with other documents instead of failing the entire migration\n            continue\n\n    logger.info(f\"Migration complete. Updated {updated_count} Google Drive documents\")\n\n\ndef downgrade() -> None:\n    # this is a one way migration, so no downgrade.\n    # It wouldn't make sense to store the extra query parameters\n    # and duplicate documents to allow a reversal.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py",
    "content": "\"\"\"Introduce Onyx APIs\n\nRevision ID: 15326fcec57e\nRevises: 77d07dffae64\nCreate Date: 2023-11-11 20:51:24.228999\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.configs.constants import DocumentSource\n\n# revision identifiers, used by Alembic.\nrevision = \"15326fcec57e\"\ndown_revision = \"77d07dffae64\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"credential\", \"is_admin\", new_column_name=\"admin_public\")\n    op.add_column(\n        \"document\",\n        sa.Column(\"from_ingestion_api\", sa.Boolean(), nullable=True),\n    )\n    op.alter_column(\n        \"connector\",\n        \"source\",\n        type_=sa.String(length=50),\n        existing_type=sa.Enum(DocumentSource, native_enum=False),\n        existing_nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"document\", \"from_ingestion_api\")\n    op.alter_column(\"credential\", \"admin_public\", new_column_name=\"is_admin\")\n"
  },
  {
    "path": "backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py",
    "content": "\"\"\"Migration 3: User file relationship migration\n\nRevision ID: 16c37a30adf2\nRevises: 0cd424f32b1d\nCreate Date: 2025-09-22 09:47:34.175596\n\nThis migration converts folder-based relationships to project-based relationships.\nIt migrates persona__user_folder to persona__user_file and populates project__user_file.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy import text\nimport logging\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n# revision identifiers, used by Alembic.\nrevision = \"16c37a30adf2\"\ndown_revision = \"0cd424f32b1d\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Migrate folder-based relationships to project-based relationships.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    # === Step 1: Migrate persona__user_folder to persona__user_file ===\n    table_names = inspector.get_table_names()\n\n    if \"persona__user_folder\" in table_names and \"user_file\" in table_names:\n        user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n        has_new_id = \"new_id\" in user_file_columns\n\n        if has_new_id and \"folder_id\" in user_file_columns:\n            logger.info(\n                \"Migrating persona__user_folder relationships to persona__user_file...\"\n            )\n\n            # Count relationships to migrate (asyncpg-compatible)\n            count_query = text(\n                \"\"\"\n                SELECT COUNT(*)\n                FROM (\n                    SELECT DISTINCT puf.persona_id, uf.id\n                    FROM persona__user_folder puf\n                    JOIN user_file uf ON uf.folder_id = puf.user_folder_id\n                    WHERE NOT EXISTS (\n                        SELECT 1\n                        FROM persona__user_file p2\n                        WHERE p2.persona_id = puf.persona_id\n                        AND p2.user_file_id = uf.id\n                    )\n                ) AS distinct_pairs\n            \"\"\"\n            )\n            to_migrate = bind.execute(count_query).scalar_one()\n\n            if to_migrate > 0:\n                logger.info(f\"Creating {to_migrate} persona-file relationships...\")\n\n                # Migrate in batches to avoid memory issues\n                batch_size = 10000\n                total_inserted = 0\n\n                while True:\n                    # Insert batch directly using subquery (asyncpg compatible)\n                    result = bind.execute(\n                        text(\n                            \"\"\"\n                        INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)\n                        SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id\n                        FROM persona__user_folder puf\n                        JOIN user_file uf ON uf.folder_id = puf.user_folder_id\n                        WHERE NOT EXISTS (\n                            SELECT 1\n                            FROM persona__user_file p2\n                            WHERE p2.persona_id = puf.persona_id\n                            AND p2.user_file_id = uf.id\n                        )\n                        LIMIT :batch_size\n                    \"\"\"\n                        ),\n                        {\"batch_size\": batch_size},\n                    )\n\n                    inserted = result.rowcount\n                    total_inserted += inserted\n\n                    if inserted < batch_size:\n                        break\n\n                    logger.info(\n                        f\"  Migrated {total_inserted}/{to_migrate} relationships...\"\n                    )\n\n                logger.info(\n                    f\"Created {total_inserted} persona__user_file relationships\"\n                )\n\n    # === Step 2: Add foreign key for chat_session.project_id ===\n    chat_session_fks = inspector.get_foreign_keys(\"chat_session\")\n    fk_exists = any(\n        fk[\"name\"] == \"fk_chat_session_project_id\" for fk in chat_session_fks\n    )\n\n    if not fk_exists:\n        logger.info(\"Adding foreign key constraint for chat_session.project_id...\")\n        op.create_foreign_key(\n            \"fk_chat_session_project_id\",\n            \"chat_session\",\n            \"user_project\",\n            [\"project_id\"],\n            [\"id\"],\n        )\n        logger.info(\"Added foreign key constraint\")\n\n    # === Step 3: Populate project__user_file from user_file.folder_id ===\n    user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n    has_new_id = \"new_id\" in user_file_columns\n\n    if has_new_id and \"folder_id\" in user_file_columns:\n        logger.info(\"Populating project__user_file from folder relationships...\")\n\n        # Count relationships to create\n        count_query = text(\n            \"\"\"\n            SELECT COUNT(*)\n            FROM user_file uf\n            WHERE uf.folder_id IS NOT NULL\n            AND NOT EXISTS (\n                SELECT 1\n                FROM project__user_file puf\n                WHERE puf.project_id = uf.folder_id\n                AND puf.user_file_id = uf.new_id\n            )\n        \"\"\"\n        )\n        to_create = bind.execute(count_query).scalar_one()\n\n        if to_create > 0:\n            logger.info(f\"Creating {to_create} project-file relationships...\")\n\n            # Insert in batches\n            batch_size = 10000\n            total_inserted = 0\n\n            while True:\n                result = bind.execute(\n                    text(\n                        \"\"\"\n                    INSERT INTO project__user_file (project_id, user_file_id)\n                    SELECT uf.folder_id, uf.new_id\n                    FROM user_file uf\n                    WHERE uf.folder_id IS NOT NULL\n                    AND NOT EXISTS (\n                        SELECT 1\n                        FROM project__user_file puf\n                        WHERE puf.project_id = uf.folder_id\n                        AND puf.user_file_id = uf.new_id\n                    )\n                    LIMIT :batch_size\n                    ON CONFLICT (project_id, user_file_id) DO NOTHING\n                \"\"\"\n                    ),\n                    {\"batch_size\": batch_size},\n                )\n\n                inserted = result.rowcount\n                total_inserted += inserted\n\n                if inserted < batch_size:\n                    break\n\n                logger.info(f\"  Created {total_inserted}/{to_create} relationships...\")\n\n            logger.info(f\"Created {total_inserted} project__user_file relationships\")\n\n    # === Step 4: Create index on chat_session.project_id ===\n    try:\n        indexes = [ix.get(\"name\") for ix in inspector.get_indexes(\"chat_session\")]\n    except Exception:\n        indexes = []\n\n    if \"ix_chat_session_project_id\" not in indexes:\n        logger.info(\"Creating index on chat_session.project_id...\")\n        op.create_index(\n            \"ix_chat_session_project_id\", \"chat_session\", [\"project_id\"], unique=False\n        )\n        logger.info(\"Created index\")\n\n    logger.info(\"Migration 3 (relationship migration) completed successfully\")\n\n\ndef downgrade() -> None:\n    \"\"\"Remove migrated relationships and constraints.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.info(\"Starting downgrade of relationship migration...\")\n\n    # Drop index on chat_session.project_id\n    try:\n        indexes = [ix.get(\"name\") for ix in inspector.get_indexes(\"chat_session\")]\n        if \"ix_chat_session_project_id\" in indexes:\n            op.drop_index(\"ix_chat_session_project_id\", \"chat_session\")\n            logger.info(\"Dropped index on chat_session.project_id\")\n    except Exception:\n        pass\n\n    # Drop foreign key constraint\n    try:\n        chat_session_fks = inspector.get_foreign_keys(\"chat_session\")\n        fk_exists = any(\n            fk[\"name\"] == \"fk_chat_session_project_id\" for fk in chat_session_fks\n        )\n        if fk_exists:\n            op.drop_constraint(\n                \"fk_chat_session_project_id\", \"chat_session\", type_=\"foreignkey\"\n            )\n            logger.info(\"Dropped foreign key constraint on chat_session.project_id\")\n    except Exception:\n        pass\n\n    # Clear project__user_file relationships (but keep the table for migration 1 to handle)\n    if \"project__user_file\" in inspector.get_table_names():\n        result = bind.execute(text(\"DELETE FROM project__user_file\"))\n        logger.info(f\"Cleared {result.rowcount} records from project__user_file\")\n\n    # Remove migrated persona__user_file relationships\n    # Only remove those that came from folder relationships\n    if all(\n        table in inspector.get_table_names()\n        for table in [\"persona__user_file\", \"persona__user_folder\", \"user_file\"]\n    ):\n        user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n        if \"folder_id\" in user_file_columns:\n            result = bind.execute(\n                text(\n                    \"\"\"\n                DELETE FROM persona__user_file puf\n                WHERE EXISTS (\n                    SELECT 1\n                    FROM user_file uf\n                    JOIN persona__user_folder puf2\n                        ON puf2.user_folder_id = uf.folder_id\n                    WHERE puf.persona_id = puf2.persona_id\n                    AND puf.user_file_id = uf.id\n                )\n            \"\"\"\n                )\n            )\n            logger.info(\n                f\"Removed {result.rowcount} migrated persona__user_file relationships\"\n            )\n\n    logger.info(\"Downgrade completed successfully\")\n"
  },
  {
    "path": "backend/alembic/versions/173cae5bba26_port_config_store.py",
    "content": "\"\"\"Port Config Store\n\nRevision ID: 173cae5bba26\nRevises: e50154680a5c\nCreate Date: 2024-03-19 15:30:44.425436\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"173cae5bba26\"\ndown_revision = \"e50154680a5c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"key_value_store\",\n        sa.Column(\"key\", sa.String(), nullable=False),\n        sa.Column(\"value\", postgresql.JSONB(astext_type=sa.Text()), nullable=False),\n        sa.PrimaryKeyConstraint(\"key\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"key_value_store\")\n"
  },
  {
    "path": "backend/alembic/versions/175ea04c7087_add_user_preferences.py",
    "content": "\"\"\"add_user_preferences\n\nRevision ID: 175ea04c7087\nRevises: d56ffa94ca32\nCreate Date: 2026-02-04 18:16:24.830873\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"175ea04c7087\"\ndown_revision = \"d56ffa94ca32\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"user_preferences\", sa.Text(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"user_preferences\")\n"
  },
  {
    "path": "backend/alembic/versions/177de57c21c9_display_custom_llm_models.py",
    "content": "\"\"\"display custom llm models\n\nRevision ID: 177de57c21c9\nRevises: 4ee1287bd26a\nCreate Date: 2024-11-21 11:49:04.488677\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy import and_\n\nrevision = \"177de57c21c9\"\ndown_revision = \"4ee1287bd26a\"\nbranch_labels = None\ndepends_on = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    llm_provider = sa.table(\n        \"llm_provider\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"provider\", sa.String),\n        sa.column(\"model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"display_model_names\", postgresql.ARRAY(sa.String)),\n    )\n\n    excluded_providers = [\"openai\", \"bedrock\", \"anthropic\", \"azure\"]\n\n    providers_to_update = sa.select(\n        llm_provider.c.id,\n        llm_provider.c.model_names,\n        llm_provider.c.display_model_names,\n    ).where(\n        and_(\n            ~llm_provider.c.provider.in_(excluded_providers),\n            llm_provider.c.model_names.isnot(None),\n        )\n    )\n\n    results = conn.execute(providers_to_update).fetchall()\n\n    for provider_id, model_names, display_model_names in results:\n        if display_model_names is None:\n            display_model_names = []\n\n        combined_model_names = list(set(display_model_names + model_names))\n        update_stmt = (\n            llm_provider.update()\n            .where(llm_provider.c.id == provider_id)\n            .values(display_model_names=combined_model_names)\n        )\n        conn.execute(update_stmt)\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py",
    "content": "\"\"\"add is_clarification to chat_message\n\nRevision ID: 18b5b2524446\nRevises: 87c52ec39f84\nCreate Date: 2025-01-16\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"18b5b2524446\"\ndown_revision = \"87c52ec39f84\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"is_clarification\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"is_clarification\")\n"
  },
  {
    "path": "backend/alembic/versions/19c0ccb01687_migrate_to_contextual_rag_model.py",
    "content": "\"\"\"Migrate to contextual rag model\n\nRevision ID: 19c0ccb01687\nRevises: 9c54986124c6\nCreate Date: 2026-02-12 11:21:41.798037\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"19c0ccb01687\"\ndown_revision = \"9c54986124c6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Widen the column to fit 'CONTEXTUAL_RAG' (15 chars); was varchar(10)\n    # when the table was created with only CHAT/VISION values.\n    op.alter_column(\n        \"llm_model_flow\",\n        \"llm_model_flow_type\",\n        type_=sa.String(length=20),\n        existing_type=sa.String(length=10),\n        existing_nullable=False,\n    )\n\n    # For every search_settings row that has contextual rag configured,\n    # create an llm_model_flow entry. is_default is TRUE if the row\n    # belongs to the PRESENT search settings, FALSE otherwise.\n    op.execute(\n        \"\"\"\n        INSERT INTO llm_model_flow (llm_model_flow_type, model_configuration_id, is_default)\n        SELECT DISTINCT\n            'CONTEXTUAL_RAG',\n            mc.id,\n            (ss.status = 'PRESENT')\n        FROM search_settings ss\n        JOIN llm_provider lp\n            ON lp.name = ss.contextual_rag_llm_provider\n        JOIN model_configuration mc\n            ON mc.llm_provider_id = lp.id\n            AND mc.name = ss.contextual_rag_llm_name\n        WHERE ss.enable_contextual_rag = TRUE\n            AND ss.contextual_rag_llm_name IS NOT NULL\n            AND ss.contextual_rag_llm_provider IS NOT NULL\n        ON CONFLICT (llm_model_flow_type, model_configuration_id)\n            DO UPDATE SET is_default = EXCLUDED.is_default\n            WHERE EXCLUDED.is_default = TRUE\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        \"\"\"\n        DELETE FROM llm_model_flow\n        WHERE llm_model_flow_type = 'CONTEXTUAL_RAG'\n        \"\"\"\n    )\n\n    op.alter_column(\n        \"llm_model_flow\",\n        \"llm_model_flow_type\",\n        type_=sa.String(length=10),\n        existing_type=sa.String(length=20),\n        existing_nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py",
    "content": "\"\"\"Add indexes to document__tag\n\nRevision ID: 1a03d2c2856b\nRevises: 9c00a2bccb83\nCreate Date: 2025-02-18 10:45:13.957807\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"1a03d2c2856b\"\ndown_revision = \"9c00a2bccb83\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        op.f(\"ix_document__tag_tag_id\"),\n        \"document__tag\",\n        [\"tag_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_document__tag_tag_id\"), table_name=\"document__tag\")\n"
  },
  {
    "path": "backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py",
    "content": "\"\"\"add additional data to notifications\n\nRevision ID: 1b10e1fda030\nRevises: 6756efa39ada\nCreate Date: 2024-10-15 19:26:44.071259\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"1b10e1fda030\"\ndown_revision = \"6756efa39ada\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"notification\", sa.Column(\"additional_data\", postgresql.JSONB(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"notification\", \"additional_data\")\n"
  },
  {
    "path": "backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py",
    "content": "\"\"\"add_user_delete_cascades\n\nRevision ID: 1b8206b29c5d\nRevises: 35e6853a51d5\nCreate Date: 2024-09-18 11:48:59.418726\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"1b8206b29c5d\"\ndown_revision = \"35e6853a51d5\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_constraint(\"credential_user_id_fkey\", \"credential\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"credential_user_id_fkey\",\n        \"credential\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.drop_constraint(\"chat_session_user_id_fkey\", \"chat_session\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"chat_session_user_id_fkey\",\n        \"chat_session\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.drop_constraint(\"chat_folder_user_id_fkey\", \"chat_folder\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"chat_folder_user_id_fkey\",\n        \"chat_folder\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.drop_constraint(\"prompt_user_id_fkey\", \"prompt\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"prompt_user_id_fkey\", \"prompt\", \"user\", [\"user_id\"], [\"id\"], ondelete=\"CASCADE\"\n    )\n\n    op.drop_constraint(\"notification_user_id_fkey\", \"notification\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"notification_user_id_fkey\",\n        \"notification\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.drop_constraint(\"inputprompt_user_id_fkey\", \"inputprompt\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"inputprompt_user_id_fkey\",\n        \"inputprompt\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"credential_user_id_fkey\", \"credential\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"credential_user_id_fkey\", \"credential\", \"user\", [\"user_id\"], [\"id\"]\n    )\n\n    op.drop_constraint(\"chat_session_user_id_fkey\", \"chat_session\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"chat_session_user_id_fkey\", \"chat_session\", \"user\", [\"user_id\"], [\"id\"]\n    )\n\n    op.drop_constraint(\"chat_folder_user_id_fkey\", \"chat_folder\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"chat_folder_user_id_fkey\", \"chat_folder\", \"user\", [\"user_id\"], [\"id\"]\n    )\n\n    op.drop_constraint(\"prompt_user_id_fkey\", \"prompt\", type_=\"foreignkey\")\n    op.create_foreign_key(\"prompt_user_id_fkey\", \"prompt\", \"user\", [\"user_id\"], [\"id\"])\n\n    op.drop_constraint(\"notification_user_id_fkey\", \"notification\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"notification_user_id_fkey\", \"notification\", \"user\", [\"user_id\"], [\"id\"]\n    )\n\n    op.drop_constraint(\"inputprompt_user_id_fkey\", \"inputprompt\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"inputprompt_user_id_fkey\", \"inputprompt\", \"user\", [\"user_id\"], [\"id\"]\n    )\n"
  },
  {
    "path": "backend/alembic/versions/1d78c0ca7853_remove_voice_provider_deleted_column.py",
    "content": "\"\"\"remove voice_provider deleted column\n\nRevision ID: 1d78c0ca7853\nRevises: a3f8b2c1d4e5\nCreate Date: 2026-03-26 11:30:53.883127\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"1d78c0ca7853\"\ndown_revision = \"a3f8b2c1d4e5\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Hard-delete any soft-deleted rows before dropping the column\n    op.execute(\"DELETE FROM voice_provider WHERE deleted = true\")\n    op.drop_column(\"voice_provider\", \"deleted\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"voice_provider\",\n        sa.Column(\n            \"deleted\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.text(\"false\"),\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py",
    "content": "\"\"\"add internet search and content provider tables\n\nRevision ID: 1f2a3b4c5d6e\nRevises: 9drpiiw74ljy\nCreate Date: 2025-11-10 19:45:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"1f2a3b4c5d6e\"\ndown_revision = \"9drpiiw74ljy\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"internet_search_provider\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"name\", sa.String(), nullable=False, unique=True),\n        sa.Column(\"provider_type\", sa.String(), nullable=False),\n        sa.Column(\"api_key\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"config\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n        sa.Column(\n            \"is_active\", sa.Boolean(), nullable=False, server_default=sa.text(\"false\")\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n    )\n    op.create_index(\n        \"ix_internet_search_provider_is_active\",\n        \"internet_search_provider\",\n        [\"is_active\"],\n    )\n\n    op.create_table(\n        \"internet_content_provider\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"name\", sa.String(), nullable=False, unique=True),\n        sa.Column(\"provider_type\", sa.String(), nullable=False),\n        sa.Column(\"api_key\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"config\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n        sa.Column(\n            \"is_active\", sa.Boolean(), nullable=False, server_default=sa.text(\"false\")\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n    )\n    op.create_index(\n        \"ix_internet_content_provider_is_active\",\n        \"internet_content_provider\",\n        [\"is_active\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"ix_internet_content_provider_is_active\", table_name=\"internet_content_provider\"\n    )\n    op.drop_table(\"internet_content_provider\")\n    op.drop_index(\n        \"ix_internet_search_provider_is_active\", table_name=\"internet_search_provider\"\n    )\n    op.drop_table(\"internet_search_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py",
    "content": "\"\"\"embedding model -> search settings\n\nRevision ID: 1f60f60c3401\nRevises: f17bf3b0d9f1\nCreate Date: 2024-08-25 12:39:51.731632\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"1f60f60c3401\"\ndown_revision = \"f17bf3b0d9f1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_constraint(\n        \"index_attempt__embedding_model_fk\", \"index_attempt\", type_=\"foreignkey\"\n    )\n    # Rename the table\n    op.rename_table(\"embedding_model\", \"search_settings\")\n\n    # Add new columns\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"multipass_indexing\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"multilingual_expansion\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"disable_rerank_for_streaming\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_model_name\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_provider_type\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_api_key\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"num_rerank\",\n            sa.Integer(),\n            nullable=False,\n            server_default=str(20),\n        ),\n    )\n\n    # Add the new column as nullable initially\n    op.add_column(\n        \"index_attempt\", sa.Column(\"search_settings_id\", sa.Integer(), nullable=True)\n    )\n\n    # Populate the new column with data from the existing embedding_model_id\n    op.execute(\"UPDATE index_attempt SET search_settings_id = embedding_model_id\")\n\n    # Create the foreign key constraint\n    op.create_foreign_key(\n        \"fk_index_attempt_search_settings\",\n        \"index_attempt\",\n        \"search_settings\",\n        [\"search_settings_id\"],\n        [\"id\"],\n    )\n\n    # Make the new column non-nullable\n    op.alter_column(\"index_attempt\", \"search_settings_id\", nullable=False)\n\n    # Drop the old embedding_model_id column\n    op.drop_column(\"index_attempt\", \"embedding_model_id\")\n\n\ndef downgrade() -> None:\n    # Add back the embedding_model_id column\n    op.add_column(\n        \"index_attempt\", sa.Column(\"embedding_model_id\", sa.Integer(), nullable=True)\n    )\n\n    # Populate the old column with data from search_settings_id\n    op.execute(\"UPDATE index_attempt SET embedding_model_id = search_settings_id\")\n\n    # Make the old column non-nullable\n    op.alter_column(\"index_attempt\", \"embedding_model_id\", nullable=False)\n\n    # Drop the foreign key constraint\n    op.drop_constraint(\n        \"fk_index_attempt_search_settings\", \"index_attempt\", type_=\"foreignkey\"\n    )\n\n    # Drop the new search_settings_id column\n    op.drop_column(\"index_attempt\", \"search_settings_id\")\n\n    # Rename the table back\n    op.rename_table(\"search_settings\", \"embedding_model\")\n\n    # Remove added columns\n    op.drop_column(\"embedding_model\", \"num_rerank\")\n    op.drop_column(\"embedding_model\", \"rerank_api_key\")\n    op.drop_column(\"embedding_model\", \"rerank_provider_type\")\n    op.drop_column(\"embedding_model\", \"rerank_model_name\")\n    op.drop_column(\"embedding_model\", \"disable_rerank_for_streaming\")\n    op.drop_column(\"embedding_model\", \"multilingual_expansion\")\n    op.drop_column(\"embedding_model\", \"multipass_indexing\")\n\n    op.create_foreign_key(\n        \"index_attempt__embedding_model_fk\",\n        \"index_attempt\",\n        \"embedding_model\",\n        [\"embedding_model_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/2020d417ec84_single_onyx_craft_migration.py",
    "content": "\"\"\"single onyx craft migration\n\nConsolidates all buildmode/onyx craft tables into a single migration.\n\nTables created:\n- build_session: User build sessions with status tracking\n- sandbox: User-owned containerized environments (one per user)\n- artifact: Build output files (web apps, documents, images)\n- snapshot: Sandbox filesystem snapshots\n- build_message: Conversation messages for build sessions\n\nExisting table modified:\n- connector_credential_pair: Added processing_mode column\n\nRevision ID: 2020d417ec84\nRevises: 41fa44bef321\nCreate Date: 2026-01-26 14:43:54.641405\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2020d417ec84\"\ndown_revision = \"41fa44bef321\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # ==========================================================================\n    # ENUMS\n    # ==========================================================================\n\n    # Build session status enum\n    build_session_status_enum = sa.Enum(\n        \"active\",\n        \"idle\",\n        name=\"buildsessionstatus\",\n        native_enum=False,\n    )\n\n    # Sandbox status enum\n    sandbox_status_enum = sa.Enum(\n        \"provisioning\",\n        \"running\",\n        \"idle\",\n        \"sleeping\",\n        \"terminated\",\n        \"failed\",\n        name=\"sandboxstatus\",\n        native_enum=False,\n    )\n\n    # Artifact type enum\n    artifact_type_enum = sa.Enum(\n        \"web_app\",\n        \"pptx\",\n        \"docx\",\n        \"markdown\",\n        \"excel\",\n        \"image\",\n        name=\"artifacttype\",\n        native_enum=False,\n    )\n\n    # ==========================================================================\n    # BUILD_SESSION TABLE\n    # ==========================================================================\n\n    op.create_table(\n        \"build_session\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"user_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"user.id\", ondelete=\"CASCADE\"),\n            nullable=True,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=True),\n        sa.Column(\n            \"status\",\n            build_session_status_enum,\n            nullable=False,\n            server_default=\"active\",\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"last_activity_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"nextjs_port\", sa.Integer(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_index(\n        \"ix_build_session_user_created\",\n        \"build_session\",\n        [\"user_id\", sa.text(\"created_at DESC\")],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_build_session_status\",\n        \"build_session\",\n        [\"status\"],\n        unique=False,\n    )\n\n    # ==========================================================================\n    # SANDBOX TABLE (user-owned, one per user)\n    # ==========================================================================\n\n    op.create_table(\n        \"sandbox\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"user_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"user.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\"container_id\", sa.String(), nullable=True),\n        sa.Column(\n            \"status\",\n            sandbox_status_enum,\n            nullable=False,\n            server_default=\"provisioning\",\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"last_heartbeat\", sa.DateTime(timezone=True), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"user_id\", name=\"sandbox_user_id_key\"),\n    )\n\n    op.create_index(\n        \"ix_sandbox_status\",\n        \"sandbox\",\n        [\"status\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_sandbox_container_id\",\n        \"sandbox\",\n        [\"container_id\"],\n        unique=False,\n    )\n\n    # ==========================================================================\n    # ARTIFACT TABLE\n    # ==========================================================================\n\n    op.create_table(\n        \"artifact\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"session_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\"type\", artifact_type_enum, nullable=False),\n        sa.Column(\"path\", sa.String(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_index(\n        \"ix_artifact_session_created\",\n        \"artifact\",\n        [\"session_id\", sa.text(\"created_at DESC\")],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_artifact_type\",\n        \"artifact\",\n        [\"type\"],\n        unique=False,\n    )\n\n    # ==========================================================================\n    # SNAPSHOT TABLE\n    # ==========================================================================\n\n    op.create_table(\n        \"snapshot\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"session_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\"storage_path\", sa.String(), nullable=False),\n        sa.Column(\"size_bytes\", sa.BigInteger(), nullable=False, server_default=\"0\"),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_index(\n        \"ix_snapshot_session_created\",\n        \"snapshot\",\n        [\"session_id\", sa.text(\"created_at DESC\")],\n        unique=False,\n    )\n\n    # ==========================================================================\n    # BUILD_MESSAGE TABLE\n    # ==========================================================================\n\n    op.create_table(\n        \"build_message\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"session_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"turn_index\",\n            sa.Integer(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"type\",\n            sa.Enum(\n                \"SYSTEM\",\n                \"USER\",\n                \"ASSISTANT\",\n                \"DANSWER\",\n                name=\"messagetype\",\n                create_type=False,\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\n            \"message_metadata\",\n            postgresql.JSONB(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_index(\n        \"ix_build_message_session_turn\",\n        \"build_message\",\n        [\"session_id\", \"turn_index\", sa.text(\"created_at ASC\")],\n        unique=False,\n    )\n\n    # ==========================================================================\n    # CONNECTOR_CREDENTIAL_PAIR MODIFICATION\n    # ==========================================================================\n\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"processing_mode\",\n            sa.String(),\n            nullable=False,\n            server_default=\"regular\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # ==========================================================================\n    # CONNECTOR_CREDENTIAL_PAIR MODIFICATION\n    # ==========================================================================\n\n    op.drop_column(\"connector_credential_pair\", \"processing_mode\")\n\n    # ==========================================================================\n    # BUILD_MESSAGE TABLE\n    # ==========================================================================\n\n    op.drop_index(\"ix_build_message_session_turn\", table_name=\"build_message\")\n    op.drop_table(\"build_message\")\n\n    # ==========================================================================\n    # SNAPSHOT TABLE\n    # ==========================================================================\n\n    op.drop_index(\"ix_snapshot_session_created\", table_name=\"snapshot\")\n    op.drop_table(\"snapshot\")\n\n    # ==========================================================================\n    # ARTIFACT TABLE\n    # ==========================================================================\n\n    op.drop_index(\"ix_artifact_type\", table_name=\"artifact\")\n    op.drop_index(\"ix_artifact_session_created\", table_name=\"artifact\")\n    op.drop_table(\"artifact\")\n    sa.Enum(name=\"artifacttype\").drop(op.get_bind(), checkfirst=True)\n\n    # ==========================================================================\n    # SANDBOX TABLE\n    # ==========================================================================\n\n    op.drop_index(\"ix_sandbox_container_id\", table_name=\"sandbox\")\n    op.drop_index(\"ix_sandbox_status\", table_name=\"sandbox\")\n    op.drop_table(\"sandbox\")\n    sa.Enum(name=\"sandboxstatus\").drop(op.get_bind(), checkfirst=True)\n\n    # ==========================================================================\n    # BUILD_SESSION TABLE\n    # ==========================================================================\n\n    op.drop_index(\"ix_build_session_status\", table_name=\"build_session\")\n    op.drop_index(\"ix_build_session_user_created\", table_name=\"build_session\")\n    op.drop_table(\"build_session\")\n    sa.Enum(name=\"buildsessionstatus\").drop(op.get_bind(), checkfirst=True)\n"
  },
  {
    "path": "backend/alembic/versions/213fd978c6d8_notifications.py",
    "content": "\"\"\"notifications\n\nRevision ID: 213fd978c6d8\nRevises: 5fc1f54cc252\nCreate Date: 2024-08-10 11:13:36.070790\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"213fd978c6d8\"\ndown_revision = \"5fc1f54cc252\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"notification\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"notif_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"user_id\",\n            sa.UUID(),\n            nullable=True,\n        ),\n        sa.Column(\"dismissed\", sa.Boolean(), nullable=False),\n        sa.Column(\"last_shown\", sa.DateTime(timezone=True), nullable=False),\n        sa.Column(\"first_shown\", sa.DateTime(timezone=True), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"notification\")\n"
  },
  {
    "path": "backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py",
    "content": "\"\"\"Add foreign key to user__external_user_group_id\n\nRevision ID: 238b84885828\nRevises: a7688ab35c45\nCreate Date: 2025-05-19 17:15:33.424584\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"238b84885828\"\ndown_revision = \"a7688ab35c45\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # First, clean up any entries that don't have a valid cc_pair_id\n    op.execute(\n        \"\"\"\n        DELETE FROM user__external_user_group_id\n        WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)\n        \"\"\"\n    )\n\n    # Add foreign key constraint with cascade delete\n    op.create_foreign_key(\n        \"fk_user__external_user_group_id_cc_pair_id\",\n        \"user__external_user_group_id\",\n        \"connector_credential_pair\",\n        [\"cc_pair_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    # Drop the foreign key constraint\n    op.drop_constraint(\n        \"fk_user__external_user_group_id_cc_pair_id\",\n        \"user__external_user_group_id\",\n        type_=\"foreignkey\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py",
    "content": "\"\"\"remove-feedback-foreignkey-constraint\n\nRevision ID: 23957775e5f5\nRevises: bc9771dccadf\nCreate Date: 2024-06-27 16:04:51.480437\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"23957775e5f5\"\ndown_revision = \"bc9771dccadf\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_constraint(\n        \"chat_feedback__chat_message_fk\", \"chat_feedback\", type_=\"foreignkey\"\n    )\n    op.create_foreign_key(\n        \"chat_feedback__chat_message_fk\",\n        \"chat_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n    op.alter_column(\n        \"chat_feedback\", \"chat_message_id\", existing_type=sa.Integer(), nullable=True\n    )\n    op.drop_constraint(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        type_=\"foreignkey\",\n    )\n    op.create_foreign_key(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n    op.alter_column(\n        \"document_retrieval_feedback\",\n        \"chat_message_id\",\n        existing_type=sa.Integer(),\n        nullable=True,\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"chat_feedback\", \"chat_message_id\", existing_type=sa.Integer(), nullable=False\n    )\n    op.drop_constraint(\n        \"chat_feedback__chat_message_fk\", \"chat_feedback\", type_=\"foreignkey\"\n    )\n    op.create_foreign_key(\n        \"chat_feedback__chat_message_fk\",\n        \"chat_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n\n    op.alter_column(\n        \"document_retrieval_feedback\",\n        \"chat_message_id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n    op.drop_constraint(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        type_=\"foreignkey\",\n    )\n    op.create_foreign_key(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/25a5501dc766_group_permissions_phase1.py",
    "content": "\"\"\"group_permissions_phase1\n\nRevision ID: 25a5501dc766\nRevises: b728689f45b1\nCreate Date: 2026-03-23 11:41:25.557442\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\n\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import GrantSource\nfrom onyx.db.enums import Permission\n\n\n# revision identifiers, used by Alembic.\nrevision = \"25a5501dc766\"\ndown_revision = \"b728689f45b1\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1. Add account_type column to user table (nullable for now).\n    #    TODO(subash): backfill account_type for existing rows and add NOT NULL.\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"account_type\",\n            sa.Enum(AccountType, native_enum=False),\n            nullable=True,\n        ),\n    )\n\n    # 2. Add is_default column to user_group table\n    op.add_column(\n        \"user_group\",\n        sa.Column(\n            \"is_default\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n\n    # 3. Create permission_grant table\n    op.create_table(\n        \"permission_grant\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"group_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"permission\",\n            sa.Enum(Permission, native_enum=False),\n            nullable=False,\n        ),\n        sa.Column(\n            \"grant_source\",\n            sa.Enum(GrantSource, native_enum=False),\n            nullable=False,\n        ),\n        sa.Column(\n            \"granted_by\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\n            \"granted_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"is_deleted\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.ForeignKeyConstraint(\n            [\"group_id\"],\n            [\"user_group.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"granted_by\"],\n            [\"user.id\"],\n            ondelete=\"SET NULL\",\n        ),\n        sa.UniqueConstraint(\n            \"group_id\", \"permission\", name=\"uq_permission_grant_group_permission\"\n        ),\n    )\n\n    # 4. Index on user__user_group(user_id) — existing composite PK\n    #    has user_group_id as leading column; user-filtered queries need this\n    op.create_index(\n        \"ix_user__user_group_user_id\",\n        \"user__user_group\",\n        [\"user_id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_user__user_group_user_id\", table_name=\"user__user_group\")\n    op.drop_table(\"permission_grant\")\n    op.drop_column(\"user_group\", \"is_default\")\n    op.drop_column(\"user\", \"account_type\")\n"
  },
  {
    "path": "backend/alembic/versions/2664261bfaab_add_cache_store_table.py",
    "content": "\"\"\"add cache_store table\n\nRevision ID: 2664261bfaab\nRevises: 4a1e4b1c89d2\nCreate Date: 2026-02-27 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"2664261bfaab\"\ndown_revision = \"4a1e4b1c89d2\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"cache_store\",\n        sa.Column(\"key\", sa.String(), nullable=False),\n        sa.Column(\"value\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"expires_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.PrimaryKeyConstraint(\"key\"),\n    )\n    op.create_index(\n        \"ix_cache_store_expires\",\n        \"cache_store\",\n        [\"expires_at\"],\n        postgresql_where=sa.text(\"expires_at IS NOT NULL\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_cache_store_expires\", table_name=\"cache_store\")\n    op.drop_table(\"cache_store\")\n"
  },
  {
    "path": "backend/alembic/versions/2666d766cb9b_google_oauth2.py",
    "content": "\"\"\"Google OAuth2\n\nRevision ID: 2666d766cb9b\nRevises: 6d387b3196c2\nCreate Date: 2023-05-05 15:49:35.716016\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2666d766cb9b\"\ndown_revision = \"6d387b3196c2\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"oauth_account\",\n        sa.Column(\"id\", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\"oauth_name\", sa.String(length=100), nullable=False),\n        sa.Column(\"access_token\", sa.String(length=1024), nullable=False),\n        sa.Column(\"expires_at\", sa.Integer(), nullable=True),\n        sa.Column(\"refresh_token\", sa.String(length=1024), nullable=True),\n        sa.Column(\"account_id\", sa.String(length=320), nullable=False),\n        sa.Column(\"account_email\", sa.String(length=320), nullable=False),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"cascade\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_index(\n        op.f(\"ix_oauth_account_account_id\"),\n        \"oauth_account\",\n        [\"account_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_oauth_account_oauth_name\"),\n        \"oauth_account\",\n        [\"oauth_name\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_oauth_account_oauth_name\"), table_name=\"oauth_account\")\n    op.drop_index(op.f(\"ix_oauth_account_account_id\"), table_name=\"oauth_account\")\n    op.drop_table(\"oauth_account\")\n"
  },
  {
    "path": "backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py",
    "content": "\"\"\"default chosen assistants to none\n\nRevision ID: 26b931506ecb\nRevises: 2daa494a0851\nCreate Date: 2024-11-12 13:23:29.858995\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"26b931506ecb\"\ndown_revision = \"2daa494a0851\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\", sa.Column(\"chosen_assistants_new\", postgresql.JSONB(), nullable=True)\n    )\n\n    op.execute(\n        \"\"\"\n    UPDATE \"user\"\n    SET chosen_assistants_new =\n        CASE\n            WHEN chosen_assistants = '[-2, -1, 0]' THEN NULL\n            ELSE chosen_assistants\n        END\n    \"\"\"\n    )\n\n    op.drop_column(\"user\", \"chosen_assistants\")\n\n    op.alter_column(\n        \"user\", \"chosen_assistants_new\", new_column_name=\"chosen_assistants\"\n    )\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"chosen_assistants_old\",\n            postgresql.JSONB(),\n            nullable=False,\n            server_default=\"[-2, -1, 0]\",\n        ),\n    )\n\n    op.execute(\n        \"\"\"\n    UPDATE \"user\"\n    SET chosen_assistants_old =\n        CASE\n            WHEN chosen_assistants IS NULL THEN '[-2, -1, 0]'::jsonb\n            ELSE chosen_assistants\n        END\n    \"\"\"\n    )\n\n    op.drop_column(\"user\", \"chosen_assistants\")\n\n    op.alter_column(\n        \"user\", \"chosen_assistants_old\", new_column_name=\"chosen_assistants\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/27c6ecc08586_permission_framework.py",
    "content": "\"\"\"Permission Framework\n\nRevision ID: 27c6ecc08586\nRevises: 2666d766cb9b\nCreate Date: 2023-05-24 18:45:17.244495\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom alembic import op\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"27c6ecc08586\"\ndown_revision = \"2666d766cb9b\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\"TRUNCATE TABLE index_attempt\")\n    op.create_table(\n        \"connector\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"source\",\n            sa.Enum(\n                \"SLACK\",\n                \"WEB\",\n                \"GOOGLE_DRIVE\",\n                \"GITHUB\",\n                \"CONFLUENCE\",\n                name=\"documentsource\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\n            \"input_type\",\n            sa.Enum(\n                \"LOAD_STATE\",\n                \"POLL\",\n                \"EVENT\",\n                name=\"inputtype\",\n                native_enum=False,\n            ),\n            nullable=True,\n        ),\n        sa.Column(\n            \"connector_specific_config\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=False,\n        ),\n        sa.Column(\"refresh_freq\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"disabled\", sa.Boolean(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"credential\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"credential_json\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=False,\n        ),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"public_doc\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"connector_credential_pair\",\n        sa.Column(\"connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"credential_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"connector_id\"],\n            [\"connector.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"credential_id\"],\n            [\"credential.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"connector_id\", \"credential_id\"),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"connector_id\", sa.Integer(), nullable=True),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"credential_id\", sa.Integer(), nullable=True),\n    )\n    op.create_foreign_key(\n        \"fk_index_attempt_credential_id\",\n        \"index_attempt\",\n        \"credential\",\n        [\"credential_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"fk_index_attempt_connector_id\",\n        \"index_attempt\",\n        \"connector\",\n        [\"connector_id\"],\n        [\"id\"],\n    )\n    op.drop_column(\"index_attempt\", \"connector_specific_config\")\n    op.drop_column(\"index_attempt\", \"source\")\n    op.drop_column(\"index_attempt\", \"input_type\")\n\n\ndef downgrade() -> None:\n    op.execute(\"TRUNCATE TABLE index_attempt\")\n    conn = op.get_bind()\n    inspector = sa.inspect(conn)\n    existing_columns = {col[\"name\"] for col in inspector.get_columns(\"index_attempt\")}\n\n    if \"input_type\" not in existing_columns:\n        op.add_column(\n            \"index_attempt\",\n            sa.Column(\"input_type\", sa.VARCHAR(), autoincrement=False, nullable=False),\n        )\n\n    if \"source\" not in existing_columns:\n        op.add_column(\n            \"index_attempt\",\n            sa.Column(\"source\", sa.VARCHAR(), autoincrement=False, nullable=False),\n        )\n\n    if \"connector_specific_config\" not in existing_columns:\n        op.add_column(\n            \"index_attempt\",\n            sa.Column(\n                \"connector_specific_config\",\n                postgresql.JSONB(astext_type=sa.Text()),\n                autoincrement=False,\n                nullable=False,\n            ),\n        )\n\n    # Check if the constraint exists before dropping\n    constraints = inspector.get_foreign_keys(\"index_attempt\")\n\n    if any(\n        constraint[\"name\"] == \"fk_index_attempt_credential_id\"\n        for constraint in constraints\n    ):\n        op.drop_constraint(\n            \"fk_index_attempt_credential_id\", \"index_attempt\", type_=\"foreignkey\"\n        )\n\n    if any(\n        constraint[\"name\"] == \"fk_index_attempt_connector_id\"\n        for constraint in constraints\n    ):\n        op.drop_constraint(\n            \"fk_index_attempt_connector_id\", \"index_attempt\", type_=\"foreignkey\"\n        )\n\n    if \"credential_id\" in existing_columns:\n        op.drop_column(\"index_attempt\", \"credential_id\")\n\n    if \"connector_id\" in existing_columns:\n        op.drop_column(\"index_attempt\", \"connector_id\")\n\n    op.execute(\"DROP TABLE IF EXISTS connector_credential_pair CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS credential CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS connector CASCADE\")\n"
  },
  {
    "path": "backend/alembic/versions/27fb147a843f_add_timestamps_to_user_table.py",
    "content": "\"\"\"add timestamps to user table\n\nRevision ID: 27fb147a843f\nRevises: b5c4d7e8f9a1\nCreate Date: 2026-03-08 17:18:40.828644\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"27fb147a843f\"\ndown_revision = \"b5c4d7e8f9a1\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"updated_at\")\n    op.drop_column(\"user\", \"created_at\")\n"
  },
  {
    "path": "backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py",
    "content": "\"\"\"add chunk count to document\n\nRevision ID: 2955778aa44c\nRevises: c0aab6edb6dd\nCreate Date: 2025-01-04 11:39:43.268612\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2955778aa44c\"\ndown_revision = \"c0aab6edb6dd\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"document\", sa.Column(\"chunk_count\", sa.Integer(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"document\", \"chunk_count\")\n"
  },
  {
    "path": "backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py",
    "content": "\"\"\"add last refreshed at mcp server\n\nRevision ID: 2a391f840e85\nRevises: 4cebcbc9b2ae\nCreate Date: 2025-12-06 15:19:59.766066\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembi.\nrevision = \"2a391f840e85\"\ndown_revision = \"4cebcbc9b2ae\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"mcp_server\",\n        sa.Column(\"last_refreshed_at\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"mcp_server\", \"last_refreshed_at\")\n"
  },
  {
    "path": "backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py",
    "content": "\"\"\"add switchover_type field and remove background_reindex_enabled\n\nRevision ID: 2acdef638fc2\nRevises: a4f23d6b71c8\nCreate Date: 2025-01-XX XX:XX:XX.XXXXXX\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.db.enums import SwitchoverType\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2acdef638fc2\"\ndown_revision = \"a4f23d6b71c8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add switchover_type column with default value of REINDEX\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"switchover_type\",\n            sa.Enum(SwitchoverType, native_enum=False),\n            nullable=False,\n            server_default=SwitchoverType.REINDEX.value,\n        ),\n    )\n\n    # Migrate existing data: set switchover_type based on background_reindex_enabled\n    # REINDEX where background_reindex_enabled=True, INSTANT where False\n    op.execute(\n        \"\"\"\n        UPDATE search_settings\n        SET switchover_type = CASE\n            WHEN background_reindex_enabled = true THEN 'REINDEX'\n            ELSE 'INSTANT'\n        END\n        \"\"\"\n    )\n\n    # Remove the background_reindex_enabled column (replaced by switchover_type)\n    op.drop_column(\"search_settings\", \"background_reindex_enabled\")\n\n\ndef downgrade() -> None:\n    # Re-add the background_reindex_enabled column with default value of True\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"background_reindex_enabled\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"true\",\n        ),\n    )\n    # Set background_reindex_enabled based on switchover_type\n    op.execute(\n        \"\"\"\n        UPDATE search_settings\n        SET background_reindex_enabled = CASE\n            WHEN switchover_type = 'INSTANT' THEN false\n            ELSE true\n        END\n        \"\"\"\n    )\n    # Remove the switchover_type column\n    op.drop_column(\"search_settings\", \"switchover_type\")\n"
  },
  {
    "path": "backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py",
    "content": "\"\"\"Migration 6: User file schema cleanup\n\nRevision ID: 2b75d0a8ffcb\nRevises: 3a78dba1080a\nCreate Date: 2025-09-22 10:09:26.375377\n\nThis migration removes legacy columns and tables after data migration is complete.\nIt should only be run after verifying all data has been successfully migrated.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy import text\nimport logging\nimport fastapi_users_db_sqlalchemy\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n# revision identifiers, used by Alembic.\nrevision = \"2b75d0a8ffcb\"\ndown_revision = \"3a78dba1080a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Remove legacy columns and tables.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.info(\"Starting schema cleanup...\")\n\n    # === Step 1: Verify data migration is complete ===\n    logger.info(\"Verifying data migration completion...\")\n\n    # Check if any chat sessions still have folder_id references\n    chat_session_columns = [\n        col[\"name\"] for col in inspector.get_columns(\"chat_session\")\n    ]\n    if \"folder_id\" in chat_session_columns:\n        orphaned_count = bind.execute(\n            text(\n                \"\"\"\n            SELECT COUNT(*) FROM chat_session\n            WHERE folder_id IS NOT NULL AND project_id IS NULL\n        \"\"\"\n            )\n        ).scalar_one()\n\n        if orphaned_count > 0:\n            logger.warning(\n                f\"WARNING: {orphaned_count} chat_session records still have folder_id without project_id. Proceeding anyway.\"\n            )\n\n    # === Step 2: Drop chat_session.folder_id ===\n    if \"folder_id\" in chat_session_columns:\n        logger.info(\"Dropping chat_session.folder_id...\")\n\n        # Drop foreign key constraint first\n        op.execute(\n            \"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk\"\n        )\n        op.execute(\n            \"ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk\"\n        )\n\n        # Drop the column\n        op.drop_column(\"chat_session\", \"folder_id\")\n        logger.info(\"Dropped chat_session.folder_id\")\n\n    # === Step 3: Drop persona__user_folder table ===\n    if \"persona__user_folder\" in inspector.get_table_names():\n        logger.info(\"Dropping persona__user_folder table...\")\n\n        # Check for any remaining data\n        remaining = bind.execute(\n            text(\"SELECT COUNT(*) FROM persona__user_folder\")\n        ).scalar_one()\n\n        if remaining > 0:\n            logger.warning(\n                f\"WARNING: Dropping persona__user_folder with {remaining} records\"\n            )\n\n        op.drop_table(\"persona__user_folder\")\n        logger.info(\"Dropped persona__user_folder table\")\n\n    # === Step 4: Drop chat_folder table ===\n    if \"chat_folder\" in inspector.get_table_names():\n        logger.info(\"Dropping chat_folder table...\")\n\n        # Check for any remaining data\n        remaining = bind.execute(text(\"SELECT COUNT(*) FROM chat_folder\")).scalar_one()\n\n        if remaining > 0:\n            logger.warning(f\"WARNING: Dropping chat_folder with {remaining} records\")\n\n        op.drop_table(\"chat_folder\")\n        logger.info(\"Dropped chat_folder table\")\n\n    # === Step 5: Drop user_file legacy columns ===\n    user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n\n    # Drop folder_id\n    if \"folder_id\" in user_file_columns:\n        logger.info(\"Dropping user_file.folder_id...\")\n        op.drop_column(\"user_file\", \"folder_id\")\n        logger.info(\"Dropped user_file.folder_id\")\n\n    # Drop cc_pair_id (already handled in migration 5, but be sure)\n    if \"cc_pair_id\" in user_file_columns:\n        logger.info(\"Dropping user_file.cc_pair_id...\")\n\n        # Drop any remaining foreign key constraints\n        bind.execute(\n            text(\n                \"\"\"\n            DO $$\n            DECLARE r RECORD;\n            BEGIN\n              FOR r IN (\n                SELECT conname\n                FROM pg_constraint c\n                JOIN pg_class t ON c.conrelid = t.oid\n                WHERE c.contype = 'f'\n                  AND t.relname = 'user_file'\n                  AND EXISTS (\n                    SELECT 1 FROM pg_attribute a\n                    WHERE a.attrelid = t.oid\n                    AND a.attname = 'cc_pair_id'\n                  )\n              ) LOOP\n                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);\n              END LOOP;\n            END$$;\n        \"\"\"\n            )\n        )\n\n        op.drop_column(\"user_file\", \"cc_pair_id\")\n        logger.info(\"Dropped user_file.cc_pair_id\")\n\n    # === Step 6: Clean up any remaining constraints ===\n    logger.info(\"Cleaning up remaining constraints...\")\n\n    # Drop any unique constraints on removed columns\n    op.execute(\n        \"ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key\"\n    )\n\n    logger.info(\"Migration 6 (schema cleanup) completed successfully\")\n    logger.info(\"Legacy schema has been fully removed\")\n\n\ndef downgrade() -> None:\n    \"\"\"Recreate dropped columns and tables (structure only, no data).\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.warning(\"Downgrading schema cleanup - recreating structure only, no data!\")\n\n    # Recreate user_file columns\n    if \"user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n\n        if \"cc_pair_id\" not in columns:\n            op.add_column(\n                \"user_file\", sa.Column(\"cc_pair_id\", sa.Integer(), nullable=True)\n            )\n\n        if \"folder_id\" not in columns:\n            op.add_column(\n                \"user_file\", sa.Column(\"folder_id\", sa.Integer(), nullable=True)\n            )\n\n    # Recreate persona__user_folder table\n    if \"persona__user_folder\" not in inspector.get_table_names():\n        op.create_table(\n            \"persona__user_folder\",\n            sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n            sa.Column(\"user_folder_id\", sa.Integer(), nullable=False),\n            sa.PrimaryKeyConstraint(\"persona_id\", \"user_folder_id\"),\n            sa.ForeignKeyConstraint([\"persona_id\"], [\"persona.id\"]),\n            sa.ForeignKeyConstraint([\"user_folder_id\"], [\"user_project.id\"]),\n        )\n\n    # Recreate chat_folder table and related structures\n    if \"chat_folder\" not in inspector.get_table_names():\n        op.create_table(\n            \"chat_folder\",\n            sa.Column(\"id\", sa.Integer(), nullable=False),\n            sa.Column(\n                \"user_id\",\n                fastapi_users_db_sqlalchemy.generics.GUID(),\n                nullable=True,\n            ),\n            sa.Column(\"name\", sa.String(), nullable=True),\n            sa.Column(\"display_priority\", sa.Integer(), nullable=False),\n            sa.ForeignKeyConstraint(\n                [\"user_id\"],\n                [\"user.id\"],\n                name=\"chat_folder_user_id_fkey\",\n            ),\n            sa.PrimaryKeyConstraint(\"id\"),\n        )\n\n    # Add folder_id back to chat_session\n    if \"chat_session\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"chat_session\")]\n        if \"folder_id\" not in columns:\n            op.add_column(\n                \"chat_session\", sa.Column(\"folder_id\", sa.Integer(), nullable=True)\n            )\n\n            # Add foreign key if chat_folder exists\n            if \"chat_folder\" in inspector.get_table_names():\n                op.create_foreign_key(\n                    \"chat_session_chat_folder_fk\",\n                    \"chat_session\",\n                    \"chat_folder\",\n                    [\"folder_id\"],\n                    [\"id\"],\n                )\n\n    logger.info(\"Downgrade completed - structure recreated but data is lost\")\n"
  },
  {
    "path": "backend/alembic/versions/2b90f3af54b8_usage_limits.py",
    "content": "\"\"\"usage_limits\n\nRevision ID: 2b90f3af54b8\nRevises: 9a0296d7421e\nCreate Date: 2026-01-03 16:55:30.449692\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2b90f3af54b8\"\ndown_revision = \"9a0296d7421e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"tenant_usage\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"window_start\", sa.DateTime(timezone=True), nullable=False, index=True\n        ),\n        sa.Column(\"llm_cost_cents\", sa.Float(), nullable=False, server_default=\"0.0\"),\n        sa.Column(\"chunks_indexed\", sa.Integer(), nullable=False, server_default=\"0\"),\n        sa.Column(\"api_calls\", sa.Integer(), nullable=False, server_default=\"0\"),\n        sa.Column(\n            \"non_streaming_api_calls\", sa.Integer(), nullable=False, server_default=\"0\"\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=True,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"window_start\", name=\"uq_tenant_usage_window\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_tenant_usage_window_start\", table_name=\"tenant_usage\")\n    op.drop_table(\"tenant_usage\")\n"
  },
  {
    "path": "backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py",
    "content": "\"\"\"add_unique_constraint_to_inputprompt_prompt_user_id\n\nRevision ID: 2c2430828bdf\nRevises: fb80bdd256de\nCreate Date: 2026-01-20 16:01:54.314805\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2c2430828bdf\"\ndown_revision = \"fb80bdd256de\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create unique constraint on (prompt, user_id) for user-owned prompts\n    # This ensures each user can only have one shortcut with a given name\n    op.create_unique_constraint(\n        \"uq_inputprompt_prompt_user_id\",\n        \"inputprompt\",\n        [\"prompt\", \"user_id\"],\n    )\n\n    # Create partial unique index for public prompts (where user_id IS NULL)\n    # PostgreSQL unique constraints don't enforce uniqueness for NULL values,\n    # so we need a partial index to ensure public prompt names are also unique\n    op.execute(\n        \"\"\"\n        CREATE UNIQUE INDEX uq_inputprompt_prompt_public\n        ON inputprompt (prompt)\n        WHERE user_id IS NULL\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\"DROP INDEX IF EXISTS uq_inputprompt_prompt_public\")\n    op.drop_constraint(\"uq_inputprompt_prompt_user_id\", \"inputprompt\", type_=\"unique\")\n"
  },
  {
    "path": "backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py",
    "content": "\"\"\"set built in to default\n\nRevision ID: 2cdeff6d8c93\nRevises: f5437cc136c5\nCreate Date: 2025-02-11 14:57:51.308775\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2cdeff6d8c93\"\ndown_revision = \"f5437cc136c5\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Prior to this migration / point in the codebase history,\n    # built in personas were implicitly treated as default personas (with no option to change this)\n    # This migration makes that explicit\n    op.execute(\n        \"\"\"\n        UPDATE persona\n        SET is_default_persona = TRUE\n        WHERE builtin_persona = TRUE\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py",
    "content": "\"\"\"Add Above Below to Persona\n\nRevision ID: 2d2304e27d8c\nRevises: 4b08d97e175a\nCreate Date: 2024-08-21 19:15:15.762948\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"2d2304e27d8c\"\ndown_revision = \"4b08d97e175a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"chunks_above\", sa.Integer(), nullable=True))\n    op.add_column(\"persona\", sa.Column(\"chunks_below\", sa.Integer(), nullable=True))\n\n    op.execute(\n        \"UPDATE persona SET chunks_above = 1, chunks_below = 1 WHERE chunks_above IS NULL AND chunks_below IS NULL\"\n    )\n\n    op.alter_column(\"persona\", \"chunks_above\", nullable=False)\n    op.alter_column(\"persona\", \"chunks_below\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"chunks_below\")\n    op.drop_column(\"persona\", \"chunks_above\")\n"
  },
  {
    "path": "backend/alembic/versions/2daa494a0851_add_group_sync_time.py",
    "content": "\"\"\"add-group-sync-time\n\nRevision ID: 2daa494a0851\nRevises: c0fd6e4da83a\nCreate Date: 2024-11-11 10:57:22.991157\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"2daa494a0851\"\ndown_revision = \"c0fd6e4da83a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"last_time_external_group_sync\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"last_time_external_group_sync\")\n"
  },
  {
    "path": "backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py",
    "content": "\"\"\"add chat session specific temperature override\n\nRevision ID: 2f80c6a2550f\nRevises: 33ea50e88f24\nCreate Date: 2025-01-31 10:30:27.289646\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2f80c6a2550f\"\ndown_revision = \"33ea50e88f24\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_session\", sa.Column(\"temperature_override\", sa.Float(), nullable=True)\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"temperature_override_enabled\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"temperature_override\")\n    op.drop_column(\"user\", \"temperature_override_enabled\")\n"
  },
  {
    "path": "backend/alembic/versions/2f95e36923e6_add_indexing_coordination.py",
    "content": "\"\"\"add_indexing_coordination\n\nRevision ID: 2f95e36923e6\nRevises: 0816326d83aa\nCreate Date: 2025-07-10 16:17:57.762182\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"2f95e36923e6\"\ndown_revision = \"0816326d83aa\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add database-based coordination fields (replacing Redis fencing)\n    op.add_column(\n        \"index_attempt\", sa.Column(\"celery_task_id\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"cancellation_requested\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n\n    # Add batch coordination fields (replacing FileStore state)\n    op.add_column(\n        \"index_attempt\", sa.Column(\"total_batches\", sa.Integer(), nullable=True)\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"completed_batches\", sa.Integer(), nullable=False, server_default=\"0\"\n        ),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"total_failures_batch_level\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"total_chunks\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n\n    # Progress tracking for stall detection\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"last_progress_time\", sa.DateTime(timezone=True), nullable=True),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"last_batches_completed_count\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n    )\n\n    # Heartbeat tracking for worker liveness detection\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"heartbeat_counter\", sa.Integer(), nullable=False, server_default=\"0\"\n        ),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"last_heartbeat_value\", sa.Integer(), nullable=False, server_default=\"0\"\n        ),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"last_heartbeat_time\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n    # Add index for coordination queries\n    op.create_index(\n        \"ix_index_attempt_active_coordination\",\n        \"index_attempt\",\n        [\"connector_credential_pair_id\", \"search_settings_id\", \"status\"],\n    )\n\n\ndef downgrade() -> None:\n    # Remove the new index\n    op.drop_index(\"ix_index_attempt_active_coordination\", table_name=\"index_attempt\")\n\n    # Remove the new columns\n    op.drop_column(\"index_attempt\", \"last_batches_completed_count\")\n    op.drop_column(\"index_attempt\", \"last_progress_time\")\n    op.drop_column(\"index_attempt\", \"last_heartbeat_time\")\n    op.drop_column(\"index_attempt\", \"last_heartbeat_value\")\n    op.drop_column(\"index_attempt\", \"heartbeat_counter\")\n    op.drop_column(\"index_attempt\", \"total_chunks\")\n    op.drop_column(\"index_attempt\", \"total_failures_batch_level\")\n    op.drop_column(\"index_attempt\", \"completed_batches\")\n    op.drop_column(\"index_attempt\", \"total_batches\")\n    op.drop_column(\"index_attempt\", \"cancellation_requested\")\n    op.drop_column(\"index_attempt\", \"celery_task_id\")\n"
  },
  {
    "path": "backend/alembic/versions/30c1d5744104_persona_datetime_aware.py",
    "content": "\"\"\"Persona Datetime Aware\n\nRevision ID: 30c1d5744104\nRevises: 7f99be1cb9f5\nCreate Date: 2023-10-16 23:21:01.283424\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"30c1d5744104\"\ndown_revision = \"7f99be1cb9f5\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"datetime_aware\", sa.Boolean(), nullable=True))\n    op.execute(\"UPDATE persona SET datetime_aware = TRUE\")\n    op.alter_column(\"persona\", \"datetime_aware\", nullable=False)\n    op.create_index(\n        \"_default_persona_name_idx\",\n        \"persona\",\n        [\"name\"],\n        unique=True,\n        postgresql_where=sa.text(\"default_persona = true\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"_default_persona_name_idx\",\n        table_name=\"persona\",\n        postgresql_where=sa.text(\"default_persona = true\"),\n    )\n    op.drop_column(\"persona\", \"datetime_aware\")\n"
  },
  {
    "path": "backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py",
    "content": "\"\"\"Add icon_color and icon_shape to Persona\n\nRevision ID: 325975216eb3\nRevises: 91ffac7e65b3\nCreate Date: 2024-07-24 21:29:31.784562\n\n\"\"\"\n\nimport random\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.sql import table, column, select\n\n# revision identifiers, used by Alembic.\nrevision = \"325975216eb3\"\ndown_revision = \"91ffac7e65b3\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ncolorOptions = [\n    \"#FF6FBF\",\n    \"#6FB1FF\",\n    \"#B76FFF\",\n    \"#FFB56F\",\n    \"#6FFF8D\",\n    \"#FF6F6F\",\n    \"#6FFFFF\",\n]\n\n\n# Function to generate a random shape ensuring at least 3 of the middle 4 squares are filled\ndef generate_random_shape() -> int:\n    center_squares = [12, 10, 6, 14, 13, 11, 7, 15]\n    center_fill = random.choice(center_squares)\n    remaining_squares = [i for i in range(16) if not (center_fill & (1 << i))]\n    random.shuffle(remaining_squares)\n    for i in range(10 - bin(center_fill).count(\"1\")):\n        center_fill |= 1 << remaining_squares[i]\n    return center_fill\n\n\ndef upgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"icon_color\", sa.String(), nullable=True))\n    op.add_column(\"persona\", sa.Column(\"icon_shape\", sa.Integer(), nullable=True))\n    op.add_column(\"persona\", sa.Column(\"uploaded_image_id\", sa.String(), nullable=True))\n\n    persona = table(\n        \"persona\",\n        column(\"id\", sa.Integer),\n        column(\"icon_color\", sa.String),\n        column(\"icon_shape\", sa.Integer),\n    )\n\n    conn = op.get_bind()\n    personas = conn.execute(select(persona.c.id))\n\n    for persona_id in personas:\n        random_color = random.choice(colorOptions)\n        random_shape = generate_random_shape()\n        conn.execute(\n            persona.update()\n            .where(persona.c.id == persona_id[0])\n            .values(icon_color=random_color, icon_shape=random_shape)\n        )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"icon_shape\")\n    op.drop_column(\"persona\", \"uploaded_image_id\")\n    op.drop_column(\"persona\", \"icon_color\")\n"
  },
  {
    "path": "backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py",
    "content": "\"\"\"single tool call per message\n\nRevision ID: 33cb72ea4d80\nRevises: 5b29123cd710\nCreate Date: 2024-11-01 12:51:01.535003\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"33cb72ea4d80\"\ndown_revision = \"5b29123cd710\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Step 1: Delete extraneous ToolCall entries\n    # Keep only the ToolCall with the smallest 'id' for each 'message_id'\n    op.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM tool_call\n            WHERE id NOT IN (\n                SELECT MIN(id)\n                FROM tool_call\n                WHERE message_id IS NOT NULL\n                GROUP BY message_id\n            );\n        \"\"\"\n        )\n    )\n\n    # Step 2: Add a unique constraint on message_id\n    op.create_unique_constraint(\n        constraint_name=\"uq_tool_call_message_id\",\n        table_name=\"tool_call\",\n        columns=[\"message_id\"],\n    )\n\n\ndef downgrade() -> None:\n    # Step 1: Drop the unique constraint on message_id\n    op.drop_constraint(\n        constraint_name=\"uq_tool_call_message_id\",\n        table_name=\"tool_call\",\n        type_=\"unique\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py",
    "content": "\"\"\"foreign key input prompts\n\nRevision ID: 33ea50e88f24\nRevises: a6df6b88ef81\nCreate Date: 2025-01-29 10:54:22.141765\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"33ea50e88f24\"\ndown_revision = \"a6df6b88ef81\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Safely drop constraints if exists\n    op.execute(\n        \"\"\"\n        ALTER TABLE inputprompt__user\n        DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey\n        \"\"\"\n    )\n    op.execute(\n        \"\"\"\n        ALTER TABLE inputprompt__user\n        DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey\n        \"\"\"\n    )\n\n    # Recreate with ON DELETE CASCADE\n    op.create_foreign_key(\n        \"inputprompt__user_input_prompt_id_fkey\",\n        \"inputprompt__user\",\n        \"inputprompt\",\n        [\"input_prompt_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.create_foreign_key(\n        \"inputprompt__user_user_id_fkey\",\n        \"inputprompt__user\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    # Drop the new FKs with ondelete\n    op.drop_constraint(\n        \"inputprompt__user_input_prompt_id_fkey\",\n        \"inputprompt__user\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"inputprompt__user_user_id_fkey\",\n        \"inputprompt__user\",\n        type_=\"foreignkey\",\n    )\n\n    # Recreate them without cascading\n    op.create_foreign_key(\n        \"inputprompt__user_input_prompt_id_fkey\",\n        \"inputprompt__user\",\n        \"inputprompt\",\n        [\"input_prompt_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"inputprompt__user_user_id_fkey\",\n        \"inputprompt__user\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/351faebd379d_add_curator_fields.py",
    "content": "\"\"\"Add curator fields\n\nRevision ID: 351faebd379d\nRevises: ee3f4b47fad5\nCreate Date: 2024-08-15 22:37:08.397052\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"351faebd379d\"\ndown_revision = \"ee3f4b47fad5\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Add is_curator column to User__UserGroup table\n    op.add_column(\n        \"user__user_group\",\n        sa.Column(\"is_curator\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n\n    # Use batch mode to modify the enum type\n    with op.batch_alter_table(\"user\", schema=None) as batch_op:\n        batch_op.alter_column(  # type: ignore[attr-defined]\n            \"role\",\n            type_=sa.Enum(\n                \"BASIC\",\n                \"ADMIN\",\n                \"CURATOR\",\n                \"GLOBAL_CURATOR\",\n                name=\"userrole\",\n                native_enum=False,\n            ),\n            existing_type=sa.Enum(\"BASIC\", \"ADMIN\", name=\"userrole\", native_enum=False),\n            existing_nullable=False,\n        )\n    # Create the association table\n    op.create_table(\n        \"credential__user_group\",\n        sa.Column(\"credential_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"credential_id\"],\n            [\"credential.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"credential_id\", \"user_group_id\"),\n    )\n    op.add_column(\n        \"credential\",\n        sa.Column(\n            \"curator_public\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Update existing records to ensure they fit within the BASIC/ADMIN roles\n    op.execute(\n        \"UPDATE \\\"user\\\" SET role = 'ADMIN' WHERE role IN ('CURATOR', 'GLOBAL_CURATOR')\"\n    )\n\n    # Remove is_curator column from User__UserGroup table\n    op.drop_column(\"user__user_group\", \"is_curator\")\n\n    with op.batch_alter_table(\"user\", schema=None) as batch_op:\n        batch_op.alter_column(  # type: ignore[attr-defined]\n            \"role\",\n            type_=sa.Enum(\n                \"BASIC\", \"ADMIN\", name=\"userrole\", native_enum=False, length=20\n            ),\n            existing_type=sa.Enum(\n                \"BASIC\",\n                \"ADMIN\",\n                \"CURATOR\",\n                \"GLOBAL_CURATOR\",\n                name=\"userrole\",\n                native_enum=False,\n            ),\n            existing_nullable=False,\n        )\n    # Drop the association table\n    op.drop_table(\"credential__user_group\")\n    op.drop_column(\"credential\", \"curator_public\")\n"
  },
  {
    "path": "backend/alembic/versions/35e518e0ddf4_properly_cascade.py",
    "content": "\"\"\"properly_cascade\n\nRevision ID: 35e518e0ddf4\nRevises: 91a0a4d62b14\nCreate Date: 2024-09-20 21:24:04.891018\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"35e518e0ddf4\"\ndown_revision = \"91a0a4d62b14\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Update chat_message foreign key constraint\n    op.drop_constraint(\n        \"chat_message_chat_session_id_fkey\", \"chat_message\", type_=\"foreignkey\"\n    )\n    op.create_foreign_key(\n        \"chat_message_chat_session_id_fkey\",\n        \"chat_message\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Update chat_message__search_doc foreign key constraints\n    op.drop_constraint(\n        \"chat_message__search_doc_chat_message_id_fkey\",\n        \"chat_message__search_doc\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"chat_message__search_doc_search_doc_id_fkey\",\n        \"chat_message__search_doc\",\n        type_=\"foreignkey\",\n    )\n\n    op.create_foreign_key(\n        \"chat_message__search_doc_chat_message_id_fkey\",\n        \"chat_message__search_doc\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"chat_message__search_doc_search_doc_id_fkey\",\n        \"chat_message__search_doc\",\n        \"search_doc\",\n        [\"search_doc_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Add CASCADE delete for tool_call foreign key\n    op.drop_constraint(\"tool_call_message_id_fkey\", \"tool_call\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"tool_call_message_id_fkey\",\n        \"tool_call\",\n        \"chat_message\",\n        [\"message_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    # Revert chat_message foreign key constraint\n    op.drop_constraint(\n        \"chat_message_chat_session_id_fkey\", \"chat_message\", type_=\"foreignkey\"\n    )\n    op.create_foreign_key(\n        \"chat_message_chat_session_id_fkey\",\n        \"chat_message\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"id\"],\n    )\n\n    # Revert chat_message__search_doc foreign key constraints\n    op.drop_constraint(\n        \"chat_message__search_doc_chat_message_id_fkey\",\n        \"chat_message__search_doc\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"chat_message__search_doc_search_doc_id_fkey\",\n        \"chat_message__search_doc\",\n        type_=\"foreignkey\",\n    )\n\n    op.create_foreign_key(\n        \"chat_message__search_doc_chat_message_id_fkey\",\n        \"chat_message__search_doc\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"chat_message__search_doc_search_doc_id_fkey\",\n        \"chat_message__search_doc\",\n        \"search_doc\",\n        [\"search_doc_id\"],\n        [\"id\"],\n    )\n\n    # Revert tool_call foreign key constraint\n    op.drop_constraint(\"tool_call_message_id_fkey\", \"tool_call\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"tool_call_message_id_fkey\",\n        \"tool_call\",\n        \"chat_message\",\n        [\"message_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py",
    "content": "\"\"\"server default chosen assistants\n\nRevision ID: 35e6853a51d5\nRevises: c99d76fcd298\nCreate Date: 2024-09-13 13:20:32.885317\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"35e6853a51d5\"\ndown_revision = \"c99d76fcd298\"\nbranch_labels = None\ndepends_on = None\n\nDEFAULT_ASSISTANTS = [-2, -1, 0]\n\n\ndef upgrade() -> None:\n    # Step 1: Update any NULL values to the default value\n    # This upgrades existing users without ordered assistant\n    # to have default assistants set to visible assistants which are\n    # accessible by them.\n    op.execute(\n        \"\"\"\n        UPDATE \"user\" u\n        SET chosen_assistants = (\n            SELECT jsonb_agg(\n                p.id ORDER BY\n                    COALESCE(p.display_priority, 2147483647) ASC,\n                    p.id ASC\n            )\n            FROM persona p\n            LEFT JOIN persona__user pu ON p.id = pu.persona_id AND pu.user_id = u.id\n            WHERE p.is_visible = true\n            AND (p.is_public = true OR pu.user_id IS NOT NULL)\n        )\n        WHERE chosen_assistants IS NULL\n        OR chosen_assistants = 'null'\n        OR jsonb_typeof(chosen_assistants) = 'null'\n        OR (jsonb_typeof(chosen_assistants) = 'string' AND chosen_assistants = '\"null\"')\n    \"\"\"\n    )\n\n    # Step 2: Alter the column to make it non-nullable\n    op.alter_column(\n        \"user\",\n        \"chosen_assistants\",\n        type_=postgresql.JSONB(astext_type=sa.Text()),\n        nullable=False,\n        server_default=sa.text(f\"'{DEFAULT_ASSISTANTS}'::jsonb\"),\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"user\",\n        \"chosen_assistants\",\n        type_=postgresql.JSONB(astext_type=sa.Text()),\n        nullable=True,\n        server_default=None,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py",
    "content": "\"\"\"add composite index for index attempt time updated\n\nRevision ID: 369644546676\nRevises: 2955778aa44c\nCreate Date: 2025-01-08 15:38:17.224380\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy import text\n\n# revision identifiers, used by Alembic.\nrevision = \"369644546676\"\ndown_revision = \"2955778aa44c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        \"ix_index_attempt_ccpair_search_settings_time_updated\",\n        \"index_attempt\",\n        [\n            \"connector_credential_pair_id\",\n            \"search_settings_id\",\n            text(\"time_updated DESC\"),\n        ],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"ix_index_attempt_ccpair_search_settings_time_updated\",\n        table_name=\"index_attempt\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/36e9220ab794_update_kg_trigger_functions.py",
    "content": "\"\"\"update_kg_trigger_functions\n\nRevision ID: 36e9220ab794\nRevises: c9e2cd766c29\nCreate Date: 2025-06-22 17:33:25.833733\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy import text\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n# revision identifiers, used by Alembic.\nrevision = \"36e9220ab794\"\ndown_revision = \"c9e2cd766c29\"\nbranch_labels = None\ndepends_on = None\n\n\ndef _get_tenant_contextvar(session: Session) -> str:\n    \"\"\"Get the current schema for the migration\"\"\"\n    current_tenant = session.execute(text(\"SELECT current_schema()\")).scalar()\n    if isinstance(current_tenant, str):\n        return current_tenant\n    else:\n        raise ValueError(\"Current tenant is not a string\")\n\n\ndef upgrade() -> None:\n\n    bind = op.get_bind()\n    session = Session(bind=bind)\n\n    # Create kg_entity trigger to update kg_entity.name and its trigrams\n    tenant_id = _get_tenant_contextvar(session)\n    alphanum_pattern = r\"[^a-z0-9]+\"\n    truncate_length = 1000\n    function = \"update_kg_entity_name\"\n    op.execute(\n        text(\n            f\"\"\"\n            CREATE OR REPLACE FUNCTION \"{tenant_id}\".{function}()\n            RETURNS TRIGGER AS $$\n            DECLARE\n                name text;\n                cleaned_name text;\n            BEGIN\n                -- Set name to semantic_id if document_id is not NULL\n                IF NEW.document_id IS NOT NULL THEN\n                    SELECT lower(semantic_id) INTO name\n                    FROM \"{tenant_id}\".document\n                    WHERE id = NEW.document_id;\n                ELSE\n                    name = lower(NEW.name);\n                END IF;\n\n                -- Clean name and truncate if too long\n                cleaned_name = regexp_replace(\n                    name,\n                    '{alphanum_pattern}', '', 'g'\n                );\n                IF length(cleaned_name) > {truncate_length} THEN\n                    cleaned_name = left(cleaned_name, {truncate_length});\n                END IF;\n\n                -- Set name and name trigrams\n                NEW.name = name;\n                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);\n                RETURN NEW;\n            END;\n            $$ LANGUAGE plpgsql;\n            \"\"\"\n        )\n    )\n    trigger = f\"{function}_trigger\"\n    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON \"{tenant_id}\".kg_entity')\n    op.execute(\n        f\"\"\"\n        CREATE TRIGGER {trigger}\n            BEFORE INSERT OR UPDATE OF name\n            ON \"{tenant_id}\".kg_entity\n            FOR EACH ROW\n            EXECUTE FUNCTION \"{tenant_id}\".{function}();\n        \"\"\"\n    )\n\n    # Create kg_entity trigger to update kg_entity.name and its trigrams\n    function = \"update_kg_entity_name_from_doc\"\n    op.execute(\n        text(\n            f\"\"\"\n            CREATE OR REPLACE FUNCTION \"{tenant_id}\".{function}()\n            RETURNS TRIGGER AS $$\n            DECLARE\n                doc_name text;\n                cleaned_name text;\n            BEGIN\n                doc_name = lower(NEW.semantic_id);\n\n                -- Clean name and truncate if too long\n                cleaned_name = regexp_replace(\n                    doc_name,\n                    '{alphanum_pattern}', '', 'g'\n                );\n                IF length(cleaned_name) > {truncate_length} THEN\n                    cleaned_name = left(cleaned_name, {truncate_length});\n                END IF;\n\n                -- Set name and name trigrams for all entities referencing this document\n                UPDATE \"{tenant_id}\".kg_entity\n                SET\n                    name = doc_name,\n                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)\n                WHERE document_id = NEW.id;\n                RETURN NEW;\n            END;\n            $$ LANGUAGE plpgsql;\n            \"\"\"\n        )\n    )\n    trigger = f\"{function}_trigger\"\n    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON \"{tenant_id}\".document')\n    op.execute(\n        f\"\"\"\n        CREATE TRIGGER {trigger}\n            AFTER UPDATE OF semantic_id\n            ON \"{tenant_id}\".document\n            FOR EACH ROW\n            EXECUTE FUNCTION \"{tenant_id}\".{function}();\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py",
    "content": "\"\"\"add chunk stats table\n\nRevision ID: 3781a5eb12cb\nRevises: df46c75b714e\nCreate Date: 2025-03-10 10:02:30.586666\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"3781a5eb12cb\"\ndown_revision = \"df46c75b714e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"chunk_stats\",\n        sa.Column(\"id\", sa.String(), primary_key=True, index=True),\n        sa.Column(\n            \"document_id\",\n            sa.String(),\n            sa.ForeignKey(\"document.id\"),\n            nullable=False,\n            index=True,\n        ),\n        sa.Column(\"chunk_in_doc_id\", sa.Integer(), nullable=False),\n        sa.Column(\"information_content_boost\", sa.Float(), nullable=True),\n        sa.Column(\n            \"last_modified\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            index=True,\n            server_default=sa.func.now(),\n        ),\n        sa.Column(\"last_synced\", sa.DateTime(timezone=True), nullable=True, index=True),\n        sa.UniqueConstraint(\n            \"document_id\", \"chunk_in_doc_id\", name=\"uq_chunk_stats_doc_chunk\"\n        ),\n    )\n\n    op.create_index(\n        \"ix_chunk_sync_status\", \"chunk_stats\", [\"last_modified\", \"last_synced\"]\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_chunk_sync_status\", table_name=\"chunk_stats\")\n    op.drop_table(\"chunk_stats\")\n"
  },
  {
    "path": "backend/alembic/versions/3879338f8ba1_add_tool_table.py",
    "content": "\"\"\"Add tool table\n\nRevision ID: 3879338f8ba1\nRevises: f1c6478c3fd8\nCreate Date: 2024-05-11 16:11:23.718084\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"3879338f8ba1\"\ndown_revision = \"f1c6478c3fd8\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"tool\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.Text(), nullable=True),\n        sa.Column(\"in_code_tool_id\", sa.String(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"persona__tool\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"tool_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"tool_id\"],\n            [\"tool.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"tool_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"persona__tool\")\n    op.drop_table(\"tool\")\n"
  },
  {
    "path": "backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py",
    "content": "\"\"\"Add chat session sharing\n\nRevision ID: 38eda64af7fe\nRevises: 776b3bbe9092\nCreate Date: 2024-03-27 19:41:29.073594\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"38eda64af7fe\"\ndown_revision = \"776b3bbe9092\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\n            \"shared_status\",\n            sa.Enum(\n                \"PUBLIC\",\n                \"PRIVATE\",\n                name=\"chatsessionsharedstatus\",\n                native_enum=False,\n            ),\n            nullable=True,\n        ),\n    )\n    op.execute(\"UPDATE chat_session SET shared_status='PRIVATE'\")\n    op.alter_column(\n        \"chat_session\",\n        \"shared_status\",\n        nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"shared_status\")\n"
  },
  {
    "path": "backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py",
    "content": "\"\"\"Update GitHub connector repo_name to repositories\n\nRevision ID: 3934b1bc7b62\nRevises: b7c2b63c4a03\nCreate Date: 2025-03-05 10:50:30.516962\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport json\nimport logging\n\n# revision identifiers, used by Alembic.\nrevision = \"3934b1bc7b62\"\ndown_revision = \"b7c2b63c4a03\"\nbranch_labels = None\ndepends_on = None\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n\ndef upgrade() -> None:\n    # Get all GitHub connectors\n    conn = op.get_bind()\n\n    # First get all GitHub connectors\n    github_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = 'GITHUB'\n            \"\"\"\n        )\n    ).fetchall()\n\n    # Update each connector's config\n    updated_count = 0\n    for connector_id, config in github_connectors:\n        try:\n            if not config:\n                logger.warning(f\"Connector {connector_id} has no config, skipping\")\n                continue\n\n            # Parse the config if it's a string\n            if isinstance(config, str):\n                config = json.loads(config)\n\n            if \"repo_name\" not in config:\n                continue\n\n            # Create new config with repositories instead of repo_name\n            new_config = dict(config)\n            repo_name_value = new_config.pop(\"repo_name\")\n            new_config[\"repositories\"] = repo_name_value\n\n            # Update the connector with the new config\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    UPDATE connector\n                    SET connector_specific_config = :new_config\n                    WHERE id = :connector_id\n                    \"\"\"\n                ),\n                {\"connector_id\": connector_id, \"new_config\": json.dumps(new_config)},\n            )\n            updated_count += 1\n        except Exception as e:\n            logger.error(f\"Error updating connector {connector_id}: {str(e)}\")\n\n\ndef downgrade() -> None:\n    # Get all GitHub connectors\n    conn = op.get_bind()\n\n    logger.debug(\n        \"Starting rollback of GitHub connectors from repositories to repo_name\"\n    )\n\n    github_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = 'GITHUB'\n            \"\"\"\n        )\n    ).fetchall()\n\n    logger.debug(f\"Found {len(github_connectors)} GitHub connectors to rollback\")\n\n    # Revert each GitHub connector to use repo_name instead of repositories\n    reverted_count = 0\n    for connector_id, config in github_connectors:\n        try:\n            if not config:\n                continue\n\n            # Parse the config if it's a string\n            if isinstance(config, str):\n                config = json.loads(config)\n\n            if \"repositories\" not in config:\n                continue\n\n            # Create new config with repo_name instead of repositories\n            new_config = dict(config)\n            repositories_value = new_config.pop(\"repositories\")\n            new_config[\"repo_name\"] = repositories_value\n\n            # Update the connector with the new config\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    UPDATE connector\n                    SET connector_specific_config = :new_config\n                    WHERE id = :connector_id\n                    \"\"\"\n                ),\n                {\"new_config\": json.dumps(new_config), \"connector_id\": connector_id},\n            )\n            reverted_count += 1\n        except Exception as e:\n            logger.error(f\"Error reverting connector {connector_id}: {str(e)}\")\n"
  },
  {
    "path": "backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py",
    "content": "\"\"\"add alternate assistant to chat message\n\nRevision ID: 3a7802814195\nRevises: 23957775e5f5\nCreate Date: 2024-06-05 11:18:49.966333\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"3a7802814195\"\ndown_revision = \"23957775e5f5\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\", sa.Column(\"alternate_assistant_id\", sa.Integer(), nullable=True)\n    )\n    op.create_foreign_key(\n        \"fk_chat_message_persona\",\n        \"chat_message\",\n        \"persona\",\n        [\"alternate_assistant_id\"],\n        [\"id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"fk_chat_message_persona\", \"chat_message\", type_=\"foreignkey\")\n    op.drop_column(\"chat_message\", \"alternate_assistant_id\")\n"
  },
  {
    "path": "backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py",
    "content": "\"\"\"Migration 5: User file legacy data cleanup\n\nRevision ID: 3a78dba1080a\nRevises: 7cc3fcc116c1\nCreate Date: 2025-09-22 10:04:27.986294\n\nThis migration removes legacy user-file documents and connector_credential_pairs.\nIt performs bulk deletions of obsolete data after the UUID migration.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql as psql\nfrom sqlalchemy import text\nimport logging\nfrom typing import List\nimport uuid\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n# revision identifiers, used by Alembic.\nrevision = \"3a78dba1080a\"\ndown_revision = \"7cc3fcc116c1\"\nbranch_labels = None\ndepends_on = None\n\n\ndef batch_delete(\n    bind: sa.engine.Connection,\n    table_name: str,\n    id_column: str,\n    ids: List[str | int | uuid.UUID],\n    batch_size: int = 1000,\n    id_type: str = \"int\",\n) -> int:\n    \"\"\"Delete records in batches to avoid memory issues and timeouts.\"\"\"\n    total_count = len(ids)\n    if total_count == 0:\n        return 0\n\n    logger.info(\n        f\"Starting batch deletion of {total_count} records from {table_name}...\"\n    )\n\n    # Determine appropriate ARRAY type\n    if id_type == \"uuid\":\n        array_type = psql.ARRAY(psql.UUID(as_uuid=True))\n    elif id_type == \"int\":\n        array_type = psql.ARRAY(sa.Integer())\n    else:\n        array_type = psql.ARRAY(sa.String())\n\n    total_deleted = 0\n    failed_batches = []\n\n    for i in range(0, total_count, batch_size):\n        batch_ids = ids[i : i + batch_size]\n        try:\n            stmt = text(\n                f\"DELETE FROM {table_name} WHERE {id_column} = ANY(:ids)\"\n            ).bindparams(sa.bindparam(\"ids\", value=batch_ids, type_=array_type))\n            result = bind.execute(stmt)\n            total_deleted += result.rowcount\n\n            # Log progress every 10 batches or at completion\n            batch_num = (i // batch_size) + 1\n            if batch_num % 10 == 0 or i + batch_size >= total_count:\n                logger.info(\n                    f\"  Deleted {min(i + batch_size, total_count)}/{total_count} records \"\n                    f\"({total_deleted} actual) from {table_name}\"\n                )\n        except Exception as e:\n            logger.error(f\"Failed to delete batch {(i // batch_size) + 1}: {e}\")\n            failed_batches.append((i, min(i + batch_size, total_count)))\n\n    if failed_batches:\n        logger.warning(\n            f\"Failed to delete {len(failed_batches)} batches from {table_name}. Total deleted: {total_deleted}/{total_count}\"\n        )\n        # Fail the migration to avoid silently succeeding on partial cleanup\n        raise RuntimeError(\n            f\"Batch deletion failed for {table_name}: \"\n            f\"{len(failed_batches)} failed batches out of \"\n            f\"{(total_count + batch_size - 1) // batch_size}.\"\n        )\n\n    return total_deleted\n\n\ndef upgrade() -> None:\n    \"\"\"Remove legacy user-file documents and connector_credential_pairs.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.info(\"Starting legacy data cleanup...\")\n\n    # === Step 1: Identify and delete user-file documents ===\n    logger.info(\"Identifying user-file documents to delete...\")\n\n    # Get document IDs to delete\n    doc_rows = bind.execute(\n        text(\n            \"\"\"\n        SELECT DISTINCT dcc.id AS document_id\n        FROM document_by_connector_credential_pair dcc\n        JOIN connector_credential_pair u\n          ON u.connector_id = dcc.connector_id\n         AND u.credential_id = dcc.credential_id\n        WHERE u.is_user_file IS TRUE\n    \"\"\"\n        )\n    ).fetchall()\n\n    doc_ids = [r[0] for r in doc_rows]\n\n    if doc_ids:\n        logger.info(f\"Found {len(doc_ids)} user-file documents to delete\")\n\n        # Delete dependent rows first\n        tables_to_clean = [\n            (\"document_retrieval_feedback\", \"document_id\"),\n            (\"document__tag\", \"document_id\"),\n            (\"chunk_stats\", \"document_id\"),\n        ]\n\n        for table_name, column_name in tables_to_clean:\n            if table_name in inspector.get_table_names():\n                # document_id is a string in these tables\n                deleted = batch_delete(\n                    bind, table_name, column_name, doc_ids, id_type=\"str\"\n                )\n                logger.info(f\"Deleted {deleted} records from {table_name}\")\n\n        # Delete document_by_connector_credential_pair entries\n        deleted = batch_delete(\n            bind, \"document_by_connector_credential_pair\", \"id\", doc_ids, id_type=\"str\"\n        )\n        logger.info(f\"Deleted {deleted} document_by_connector_credential_pair records\")\n\n        # Delete documents themselves\n        deleted = batch_delete(bind, \"document\", \"id\", doc_ids, id_type=\"str\")\n        logger.info(f\"Deleted {deleted} document records\")\n    else:\n        logger.info(\"No user-file documents found to delete\")\n\n    # === Step 2: Clean up user-file connector_credential_pairs ===\n    logger.info(\"Cleaning up user-file connector_credential_pairs...\")\n\n    # Get cc_pair IDs\n    cc_pair_rows = bind.execute(\n        text(\n            \"\"\"\n        SELECT id AS cc_pair_id\n        FROM connector_credential_pair\n        WHERE is_user_file IS TRUE\n    \"\"\"\n        )\n    ).fetchall()\n\n    cc_pair_ids = [r[0] for r in cc_pair_rows]\n\n    if cc_pair_ids:\n        logger.info(\n            f\"Found {len(cc_pair_ids)} user-file connector_credential_pairs to clean up\"\n        )\n\n        # Delete related records\n        # Clean child tables first to satisfy foreign key constraints,\n        # then the parent tables\n        tables_to_clean = [\n            (\"index_attempt_errors\", \"connector_credential_pair_id\"),\n            (\"index_attempt\", \"connector_credential_pair_id\"),\n            (\"background_error\", \"cc_pair_id\"),\n            (\"document_set__connector_credential_pair\", \"connector_credential_pair_id\"),\n            (\"user_group__connector_credential_pair\", \"cc_pair_id\"),\n        ]\n\n        for table_name, column_name in tables_to_clean:\n            if table_name in inspector.get_table_names():\n                deleted = batch_delete(\n                    bind, table_name, column_name, cc_pair_ids, id_type=\"int\"\n                )\n                logger.info(f\"Deleted {deleted} records from {table_name}\")\n\n    # === Step 3: Identify connectors and credentials to delete ===\n    logger.info(\"Identifying orphaned connectors and credentials...\")\n\n    # Get connectors used only by user-file cc_pairs\n    connector_rows = bind.execute(\n        text(\n            \"\"\"\n        SELECT DISTINCT ccp.connector_id\n        FROM connector_credential_pair ccp\n        WHERE ccp.is_user_file IS TRUE\n          AND ccp.connector_id != 0  -- Exclude system default\n          AND NOT EXISTS (\n            SELECT 1\n            FROM connector_credential_pair c2\n            WHERE c2.connector_id = ccp.connector_id\n              AND c2.is_user_file IS NOT TRUE\n          )\n    \"\"\"\n        )\n    ).fetchall()\n\n    userfile_only_connector_ids = [r[0] for r in connector_rows]\n\n    # Get credentials used only by user-file cc_pairs\n    credential_rows = bind.execute(\n        text(\n            \"\"\"\n        SELECT DISTINCT ccp.credential_id\n        FROM connector_credential_pair ccp\n        WHERE ccp.is_user_file IS TRUE\n          AND ccp.credential_id != 0  -- Exclude public/default\n          AND NOT EXISTS (\n            SELECT 1\n            FROM connector_credential_pair c2\n            WHERE c2.credential_id = ccp.credential_id\n              AND c2.is_user_file IS NOT TRUE\n          )\n    \"\"\"\n        )\n    ).fetchall()\n\n    userfile_only_credential_ids = [r[0] for r in credential_rows]\n\n    # === Step 4: Delete the cc_pairs themselves ===\n    if cc_pair_ids:\n        # Remove FK dependency from user_file first\n        bind.execute(\n            text(\n                \"\"\"\n            DO $$\n            DECLARE r RECORD;\n            BEGIN\n              FOR r IN (\n                SELECT conname\n                FROM pg_constraint c\n                JOIN pg_class t ON c.conrelid = t.oid\n                JOIN pg_class ft ON c.confrelid = ft.oid\n                WHERE c.contype = 'f'\n                  AND t.relname = 'user_file'\n                  AND ft.relname = 'connector_credential_pair'\n              ) LOOP\n                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);\n              END LOOP;\n            END$$;\n        \"\"\"\n            )\n        )\n\n        # Delete cc_pairs\n        deleted = batch_delete(\n            bind, \"connector_credential_pair\", \"id\", cc_pair_ids, id_type=\"int\"\n        )\n        logger.info(f\"Deleted {deleted} connector_credential_pair records\")\n\n    # === Step 5: Delete orphaned connectors ===\n    if userfile_only_connector_ids:\n        deleted = batch_delete(\n            bind, \"connector\", \"id\", userfile_only_connector_ids, id_type=\"int\"\n        )\n        logger.info(f\"Deleted {deleted} orphaned connector records\")\n\n    # === Step 6: Delete orphaned credentials ===\n    if userfile_only_credential_ids:\n        # Clean up credential__user_group mappings first\n        deleted = batch_delete(\n            bind,\n            \"credential__user_group\",\n            \"credential_id\",\n            userfile_only_credential_ids,\n            id_type=\"int\",\n        )\n        logger.info(f\"Deleted {deleted} credential__user_group records\")\n\n        # Delete credentials\n        deleted = batch_delete(\n            bind, \"credential\", \"id\", userfile_only_credential_ids, id_type=\"int\"\n        )\n        logger.info(f\"Deleted {deleted} orphaned credential records\")\n\n    logger.info(\"Migration 5 (legacy data cleanup) completed successfully\")\n\n\ndef downgrade() -> None:\n    \"\"\"Cannot restore deleted data - requires backup restoration.\"\"\"\n\n    logger.error(\"CRITICAL: Downgrading data cleanup cannot restore deleted data!\")\n    logger.error(\"Data restoration requires backup files or database backup.\")\n\n    # raise NotImplementedError(\n    #     \"Downgrade of legacy data cleanup is not supported. \"\n    #     \"Deleted data must be restored from backups.\"\n    # )\n"
  },
  {
    "path": "backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py",
    "content": "\"\"\"Move is_public to cc_pair\n\nRevision ID: 3b25685ff73c\nRevises: e0a68a81d434\nCreate Date: 2023-10-05 18:47:09.582849\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"3b25685ff73c\"\ndown_revision = \"e0a68a81d434\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"is_public\", sa.Boolean(), nullable=True),\n    )\n    # fill in is_public for existing rows\n    op.execute(\n        \"UPDATE connector_credential_pair SET is_public = true WHERE is_public IS NULL\"\n    )\n    op.alter_column(\"connector_credential_pair\", \"is_public\", nullable=False)\n\n    op.add_column(\n        \"credential\",\n        sa.Column(\"is_admin\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\"UPDATE credential SET is_admin = true WHERE is_admin IS NULL\")\n    op.alter_column(\"credential\", \"is_admin\", nullable=False)\n\n    op.drop_column(\"credential\", \"public_doc\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"credential\",\n        sa.Column(\"public_doc\", sa.Boolean(), nullable=True),\n    )\n    # setting public_doc to false for all existing rows to be safe\n    # NOTE: this is likely not the correct state of the world but it's the best we can do\n    op.execute(\"UPDATE credential SET public_doc = false WHERE public_doc IS NULL\")\n    op.alter_column(\"credential\", \"public_doc\", nullable=False)\n    op.drop_column(\"connector_credential_pair\", \"is_public\")\n    op.drop_column(\"credential\", \"is_admin\")\n"
  },
  {
    "path": "backend/alembic/versions/3bd4c84fe72f_improved_index.py",
    "content": "\"\"\"improved index\n\nRevision ID: 3bd4c84fe72f\nRevises: 8f43500ee275\nCreate Date: 2025-02-26 13:07:56.217791\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"3bd4c84fe72f\"\ndown_revision = \"8f43500ee275\"\nbranch_labels = None\ndepends_on = None\n\n\n# NOTE:\n# This migration addresses issues with the previous migration (8f43500ee275) which caused\n# an outage by creating an index without using CONCURRENTLY. This migration:\n#\n# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes\n# 2. Adds indexes to both chat_message and chat_session tables for comprehensive search\n# 3. Note: CONCURRENTLY was removed due to operational issues\n\n\ndef upgrade() -> None:\n    # First, drop any existing indexes to avoid conflicts\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_message_tsv;\")\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_session_desc_tsv;\")\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_message_message_lower;\")\n\n    # Drop existing columns if they exist\n    op.execute(\"ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;\")\n    op.execute(\"ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;\")\n\n    # Create a GIN index for full-text search on chat_message.message\n    op.execute(\n        \"\"\"\n        ALTER TABLE chat_message\n        ADD COLUMN message_tsv tsvector\n        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;\n        \"\"\"\n    )\n\n    op.execute(\n        \"\"\"\n        CREATE INDEX IF NOT EXISTS idx_chat_message_tsv\n        ON chat_message\n        USING GIN (message_tsv)\n        \"\"\"\n    )\n\n    # Also add a stored tsvector column for chat_session.description\n    op.execute(\n        \"\"\"\n        ALTER TABLE chat_session\n        ADD COLUMN description_tsv tsvector\n        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;\n        \"\"\"\n    )\n\n    op.execute(\n        \"\"\"\n        CREATE INDEX IF NOT EXISTS idx_chat_session_desc_tsv\n        ON chat_session\n        USING GIN (description_tsv)\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # Drop the indexes first\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_message_tsv;\")\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_session_desc_tsv;\")\n\n    # Then drop the columns\n    op.execute(\"ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;\")\n    op.execute(\"ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;\")\n\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_message_message_lower;\")\n"
  },
  {
    "path": "backend/alembic/versions/3c5e35aa9af0_polling_document_count.py",
    "content": "\"\"\"Polling Document Count\n\nRevision ID: 3c5e35aa9af0\nRevises: 27c6ecc08586\nCreate Date: 2023-06-14 23:45:51.760440\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"3c5e35aa9af0\"\ndown_revision = \"27c6ecc08586\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"last_successful_index_time\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"last_attempt_status\",\n            sa.Enum(\n                \"NOT_STARTED\",\n                \"IN_PROGRESS\",\n                \"SUCCESS\",\n                \"FAILED\",\n                name=\"indexingstatus\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n    )\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"total_docs_indexed\", sa.Integer(), nullable=False),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"total_docs_indexed\")\n    op.drop_column(\"connector_credential_pair\", \"last_attempt_status\")\n    op.drop_column(\"connector_credential_pair\", \"last_successful_index_time\")\n"
  },
  {
    "path": "backend/alembic/versions/3c6531f32351_add_back_input_prompts.py",
    "content": "\"\"\"add back input prompts\n\nRevision ID: 3c6531f32351\nRevises: aeda5f2df4f6\nCreate Date: 2025-01-13 12:49:51.705235\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\n\n# revision identifiers, used by Alembic.\nrevision = \"3c6531f32351\"\ndown_revision = \"aeda5f2df4f6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"inputprompt\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"prompt\", sa.String(), nullable=False),\n        sa.Column(\"content\", sa.String(), nullable=False),\n        sa.Column(\"active\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_public\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"inputprompt__user\",\n        sa.Column(\"input_prompt_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False\n        ),\n        sa.Column(\"disabled\", sa.Boolean(), nullable=False, default=False),\n        sa.ForeignKeyConstraint(\n            [\"input_prompt_id\"],\n            [\"inputprompt.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"input_prompt_id\", \"user_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"inputprompt__user\")\n    op.drop_table(\"inputprompt\")\n"
  },
  {
    "path": "backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py",
    "content": "\"\"\"seed_exa_provider_from_env\n\nRevision ID: 3c9a65f1207f\nRevises: 1f2a3b4c5d6e\nCreate Date: 2025-11-20 19:18:00.000000\n\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom dotenv import load_dotenv, find_dotenv\n\nfrom onyx.utils.encryption import encrypt_string_to_bytes\n\nrevision = \"3c9a65f1207f\"\ndown_revision = \"1f2a3b4c5d6e\"\nbranch_labels = None\ndepends_on = None\n\n\nEXA_PROVIDER_NAME = \"Exa\"\n\n\ndef _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:\n    return sa.Table(\n        \"internet_search_provider\",\n        metadata,\n        sa.Column(\"id\", sa.Integer, primary_key=True),\n        sa.Column(\"name\", sa.String),\n        sa.Column(\"provider_type\", sa.String),\n        sa.Column(\"api_key\", sa.LargeBinary),\n        sa.Column(\"config\", postgresql.JSONB),\n        sa.Column(\"is_active\", sa.Boolean),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.text(\"now()\"),\n        ),\n    )\n\n\ndef upgrade() -> None:\n    load_dotenv(find_dotenv())\n\n    exa_api_key = os.environ.get(\"EXA_API_KEY\")\n    if not exa_api_key:\n        return\n\n    bind = op.get_bind()\n    metadata = sa.MetaData()\n    table = _get_internet_search_table(metadata)\n\n    existing = bind.execute(\n        sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)\n    ).first()\n    if existing:\n        return\n\n    encrypted_key = encrypt_string_to_bytes(exa_api_key)\n\n    has_active_provider = bind.execute(\n        sa.select(table.c.id).where(table.c.is_active.is_(True))\n    ).first()\n\n    bind.execute(\n        table.insert().values(\n            name=EXA_PROVIDER_NAME,\n            provider_type=\"exa\",\n            api_key=encrypted_key,\n            config=None,\n            is_active=not bool(has_active_provider),\n        )\n    )\n\n\ndef downgrade() -> None:\n    return\n"
  },
  {
    "path": "backend/alembic/versions/3d1cca026fe8_add_oauth_config_and_user_tokens.py",
    "content": "\"\"\"add_oauth_config_and_user_tokens\n\nRevision ID: 3d1cca026fe8\nRevises: c8a93a2af083\nCreate Date: 2025-10-21 13:27:34.274721\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"3d1cca026fe8\"\ndown_revision = \"c8a93a2af083\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create oauth_config table\n    op.create_table(\n        \"oauth_config\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"authorization_url\", sa.Text(), nullable=False),\n        sa.Column(\"token_url\", sa.Text(), nullable=False),\n        sa.Column(\"client_id\", sa.LargeBinary(), nullable=False),\n        sa.Column(\"client_secret\", sa.LargeBinary(), nullable=False),\n        sa.Column(\"scopes\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n        sa.Column(\n            \"additional_params\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n\n    # Create oauth_user_token table\n    op.create_table(\n        \"oauth_user_token\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"oauth_config_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\"token_data\", sa.LargeBinary(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"oauth_config_id\"], [\"oauth_config.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"oauth_config_id\", \"user_id\", name=\"uq_oauth_user_token\"),\n    )\n\n    # Create index on user_id for efficient user-based token lookups\n    # Note: unique constraint on (oauth_config_id, user_id) already creates\n    # an index for config-based lookups\n    op.create_index(\n        \"ix_oauth_user_token_user_id\",\n        \"oauth_user_token\",\n        [\"user_id\"],\n    )\n\n    # Add oauth_config_id column to tool table\n    op.add_column(\"tool\", sa.Column(\"oauth_config_id\", sa.Integer(), nullable=True))\n\n    # Create foreign key from tool to oauth_config\n    op.create_foreign_key(\n        \"tool_oauth_config_fk\",\n        \"tool\",\n        \"oauth_config\",\n        [\"oauth_config_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n\ndef downgrade() -> None:\n    # Drop foreign key from tool to oauth_config\n    op.drop_constraint(\"tool_oauth_config_fk\", \"tool\", type_=\"foreignkey\")\n\n    # Drop oauth_config_id column from tool table\n    op.drop_column(\"tool\", \"oauth_config_id\")\n\n    # Drop index on user_id\n    op.drop_index(\"ix_oauth_user_token_user_id\", table_name=\"oauth_user_token\")\n\n    # Drop oauth_user_token table (will cascade delete tokens)\n    op.drop_table(\"oauth_user_token\")\n\n    # Drop oauth_config table\n    op.drop_table(\"oauth_config\")\n"
  },
  {
    "path": "backend/alembic/versions/3fc5d75723b3_add_doc_metadata_field_in_document_model.py",
    "content": "\"\"\"add_doc_metadata_field_in_document_model\n\nRevision ID: 3fc5d75723b3\nRevises: 2f95e36923e6\nCreate Date: 2025-07-28 18:45:37.985406\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"3fc5d75723b3\"\ndown_revision = \"2f95e36923e6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"document\",\n        sa.Column(\n            \"doc_metadata\", postgresql.JSONB(astext_type=sa.Text()), nullable=True\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"document\", \"doc_metadata\")\n"
  },
  {
    "path": "backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py",
    "content": "\"\"\"Add tables for UI-based LLM configuration\n\nRevision ID: 401c1ac29467\nRevises: 703313b75876\nCreate Date: 2024-04-13 18:07:29.153817\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"401c1ac29467\"\ndown_revision = \"703313b75876\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"llm_provider\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"api_key\", sa.String(), nullable=True),\n        sa.Column(\"api_base\", sa.String(), nullable=True),\n        sa.Column(\"api_version\", sa.String(), nullable=True),\n        sa.Column(\n            \"custom_config\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n        sa.Column(\"default_model_name\", sa.String(), nullable=False),\n        sa.Column(\"fast_default_model_name\", sa.String(), nullable=True),\n        sa.Column(\"is_default_provider\", sa.Boolean(), unique=True, nullable=True),\n        sa.Column(\"model_names\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n\n    op.add_column(\n        \"persona\",\n        sa.Column(\"llm_model_provider_override\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"llm_model_provider_override\")\n\n    op.drop_table(\"llm_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/40926a4dab77_reset_userfile_document_id_migrated_.py",
    "content": "\"\"\"reset userfile document_id_migrated field\n\nRevision ID: 40926a4dab77\nRevises: 64bd5677aeb6\nCreate Date: 2025-10-06 16:10:32.898668\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"40926a4dab77\"\ndown_revision = \"64bd5677aeb6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Set all existing records to not migrated\n    op.execute(\n        \"UPDATE user_file SET document_id_migrated = FALSE WHERE document_id_migrated IS DISTINCT FROM FALSE;\"\n    )\n\n\ndef downgrade() -> None:\n    # No-op\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py",
    "content": "\"\"\"remove default prompt shortcuts\n\nRevision ID: 41fa44bef321\nRevises: 2c2430828bdf\nCreate Date: 2025-01-21\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"41fa44bef321\"\ndown_revision = \"2c2430828bdf\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Delete any user associations for the default prompts first (foreign key constraint)\n    op.execute(\n        \"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)\"\n    )\n    # Delete the pre-seeded default prompt shortcuts (they have negative IDs)\n    op.execute(\"DELETE FROM inputprompt WHERE id < 0\")\n\n\ndef downgrade() -> None:\n    # We don't restore the default prompts on downgrade\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py",
    "content": "\"\"\"Rename index_origin to index_recursively\n\nRevision ID: 1d6ad76d1f37\nRevises: e1392f05e840\nCreate Date: 2024-08-01 12:38:54.466081\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"1d6ad76d1f37\"\ndown_revision = \"e1392f05e840\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE connector\n        SET connector_specific_config = jsonb_set(\n            connector_specific_config,\n            '{index_recursively}',\n            'true'::jsonb\n        ) - 'index_origin'\n        WHERE connector_specific_config ? 'index_origin'\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE connector\n        SET connector_specific_config = jsonb_set(\n            connector_specific_config,\n            '{index_origin}',\n            connector_specific_config->'index_recursively'\n        ) - 'index_recursively'\n        WHERE connector_specific_config ? 'index_recursively'\n    \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py",
    "content": "\"\"\"add cloud embedding model and update embedding_model\n\nRevision ID: 44f856ae2a4a\nRevises: d716b0791ddd\nCreate Date: 2024-06-28 20:01:05.927647\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"44f856ae2a4a\"\ndown_revision = \"d716b0791ddd\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Create embedding_provider table\n    op.create_table(\n        \"embedding_provider\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"api_key\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"default_model_id\", sa.Integer(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n\n    # Add cloud_provider_id to embedding_model table\n    op.add_column(\n        \"embedding_model\", sa.Column(\"cloud_provider_id\", sa.Integer(), nullable=True)\n    )\n\n    # Add foreign key constraints\n    op.create_foreign_key(\n        \"fk_embedding_model_cloud_provider\",\n        \"embedding_model\",\n        \"embedding_provider\",\n        [\"cloud_provider_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"fk_embedding_provider_default_model\",\n        \"embedding_provider\",\n        \"embedding_model\",\n        [\"default_model_id\"],\n        [\"id\"],\n    )\n\n\ndef downgrade() -> None:\n    # Remove foreign key constraints\n    op.drop_constraint(\n        \"fk_embedding_model_cloud_provider\", \"embedding_model\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\n        \"fk_embedding_provider_default_model\", \"embedding_provider\", type_=\"foreignkey\"\n    )\n\n    # Remove cloud_provider_id column\n    op.drop_column(\"embedding_model\", \"cloud_provider_id\")\n\n    # Drop embedding_provider table\n    op.drop_table(\"embedding_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py",
    "content": "\"\"\"added is_internet to DBDoc\n\nRevision ID: 4505fd7302e1\nRevises: c18cdf4b497e\nCreate Date: 2024-06-18 20:46:09.095034\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"4505fd7302e1\"\ndown_revision = \"c18cdf4b497e\"\n\n\ndef upgrade() -> None:\n    op.add_column(\"search_doc\", sa.Column(\"is_internet\", sa.Boolean(), nullable=True))\n    op.add_column(\"tool\", sa.Column(\"display_name\", sa.String(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"tool\", \"display_name\")\n    op.drop_column(\"search_doc\", \"is_internet\")\n"
  },
  {
    "path": "backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py",
    "content": "\"\"\"Larger Access Tokens for OAUTH\n\nRevision ID: 465f78d9b7f9\nRevises: 3c5e35aa9af0\nCreate Date: 2023-07-18 17:33:40.365034\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"465f78d9b7f9\"\ndown_revision = \"3c5e35aa9af0\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"oauth_account\", \"access_token\", type_=sa.Text())\n\n\ndef downgrade() -> None:\n    op.alter_column(\"oauth_account\", \"access_token\", type_=sa.String(length=1024))\n"
  },
  {
    "path": "backend/alembic/versions/46625e4745d4_remove_native_enum.py",
    "content": "\"\"\"Remove Native Enum\n\nRevision ID: 46625e4745d4\nRevises: 9d97fecfab7f\nCreate Date: 2023-10-27 11:38:33.803145\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy import String\n\n# revision identifiers, used by Alembic.\nrevision = \"46625e4745d4\"\ndown_revision = \"9d97fecfab7f\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # At this point, we directly changed some previous migrations,\n    # https://github.com/onyx-dot-app/onyx/pull/637\n    # Due to using Postgres native Enums, it caused some complications for first time users.\n    # To remove those complications, all Enums are only handled application side moving forward.\n    # This migration exists to ensure that existing users don't run into upgrade issues.\n    op.alter_column(\"index_attempt\", \"status\", type_=String)\n    op.alter_column(\"connector_credential_pair\", \"last_attempt_status\", type_=String)\n    op.execute(\"DROP TYPE IF EXISTS indexingstatus\")\n\n\ndef downgrade() -> None:\n    # We don't want Native Enums, do nothing\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py",
    "content": "\"\"\"fix_user__external_user_group_id_fk\n\nRevision ID: 46b7a812670f\nRevises: f32615f71aeb\nCreate Date: 2024-09-23 12:58:03.894038\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"46b7a812670f\"\ndown_revision = \"f32615f71aeb\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Drop the existing primary key\n    op.drop_constraint(\n        \"user__external_user_group_id_pkey\",\n        \"user__external_user_group_id\",\n        type_=\"primary\",\n    )\n\n    # Add the new composite primary key\n    op.create_primary_key(\n        \"user__external_user_group_id_pkey\",\n        \"user__external_user_group_id\",\n        [\"user_id\", \"external_user_group_id\", \"cc_pair_id\"],\n    )\n\n\ndef downgrade() -> None:\n    # Drop the composite primary key\n    op.drop_constraint(\n        \"user__external_user_group_id_pkey\",\n        \"user__external_user_group_id\",\n        type_=\"primary\",\n    )\n    # Delete all entries from the table\n    op.execute(\"DELETE FROM user__external_user_group_id\")\n\n    # Recreate the original primary key on user_id\n    op.create_primary_key(\n        \"user__external_user_group_id_pkey\", \"user__external_user_group_id\", [\"user_id\"]\n    )\n"
  },
  {
    "path": "backend/alembic/versions/4738e4b3bae1_pg_file_store.py",
    "content": "\"\"\"PG File Store\n\nRevision ID: 4738e4b3bae1\nRevises: e91df4e935ef\nCreate Date: 2024-03-20 18:53:32.461518\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"4738e4b3bae1\"\ndown_revision = \"e91df4e935ef\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"file_store\",\n        sa.Column(\"file_name\", sa.String(), nullable=False),\n        sa.Column(\"lobj_oid\", sa.Integer(), nullable=False),\n        sa.PrimaryKeyConstraint(\"file_name\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"file_store\")\n"
  },
  {
    "path": "backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py",
    "content": "\"\"\"Add display_model_names to llm_provider\n\nRevision ID: 473a1a7ca408\nRevises: 325975216eb3\nCreate Date: 2024-07-25 14:31:02.002917\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"473a1a7ca408\"\ndown_revision = \"325975216eb3\"\nbranch_labels: None = None\ndepends_on: None = None\n\ndefault_models_by_provider = {\n    \"openai\": [\"gpt-4\", \"gpt-4o\", \"gpt-4o-mini\"],\n    \"bedrock\": [\n        \"meta.llama3-1-70b-instruct-v1:0\",\n        \"meta.llama3-1-8b-instruct-v1:0\",\n        \"anthropic.claude-3-opus-20240229-v1:0\",\n        \"mistral.mistral-large-2402-v1:0\",\n        \"anthropic.claude-3-5-sonnet-20240620-v1:0\",\n    ],\n    \"anthropic\": [\"claude-3-opus-20240229\", \"claude-3-5-sonnet-20240620\"],\n}\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\"display_model_names\", postgresql.ARRAY(sa.String()), nullable=True),\n    )\n\n    connection = op.get_bind()\n    for provider, models in default_models_by_provider.items():\n        connection.execute(\n            sa.text(\n                \"UPDATE llm_provider SET display_model_names = :models WHERE provider = :provider\"\n            ),\n            {\"models\": models, \"provider\": provider},\n        )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"llm_provider\", \"display_model_names\")\n"
  },
  {
    "path": "backend/alembic/versions/47433d30de82_create_indexattempt_table.py",
    "content": "\"\"\"Create IndexAttempt table\n\nRevision ID: 47433d30de82\nRevises:\nCreate Date: 2023-05-04 00:55:32.971991\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"47433d30de82\"\ndown_revision: None = None\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"index_attempt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        # String type since python enum will change often\n        sa.Column(\n            \"source\",\n            sa.String(),\n            nullable=False,\n        ),\n        # String type to easily accomodate new ways of pulling\n        # in documents\n        sa.Column(\n            \"input_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"connector_specific_config\",\n            postgresql.JSONB(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=True,\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            server_onupdate=sa.text(\"now()\"),  # type: ignore\n            nullable=True,\n        ),\n        sa.Column(\n            \"status\",\n            sa.Enum(\n                \"NOT_STARTED\",\n                \"IN_PROGRESS\",\n                \"SUCCESS\",\n                \"FAILED\",\n                name=\"indexingstatus\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"document_ids\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.Column(\"error_msg\", sa.String(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"index_attempt\")\n"
  },
  {
    "path": "backend/alembic/versions/475fcefe8826_add_name_to_api_key.py",
    "content": "\"\"\"Add name to api_key\n\nRevision ID: 475fcefe8826\nRevises: ecab2b3f1a3b\nCreate Date: 2024-04-11 11:05:18.414438\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"475fcefe8826\"\ndown_revision = \"ecab2b3f1a3b\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"api_key\", sa.Column(\"name\", sa.String(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"api_key\", \"name\")\n"
  },
  {
    "path": "backend/alembic/versions/4794bc13e484_update_prompt_length.py",
    "content": "\"\"\"update prompt length\n\nRevision ID: 4794bc13e484\nRevises: f7505c5b0284\nCreate Date: 2025-04-02 11:26:36.180328\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"4794bc13e484\"\ndown_revision = \"f7505c5b0284\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"prompt\",\n        \"system_prompt\",\n        existing_type=sa.TEXT(),\n        type_=sa.String(length=5000000),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"prompt\",\n        \"task_prompt\",\n        existing_type=sa.TEXT(),\n        type_=sa.String(length=5000000),\n        existing_nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"prompt\",\n        \"system_prompt\",\n        existing_type=sa.String(length=5000000),\n        type_=sa.TEXT(),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"prompt\",\n        \"task_prompt\",\n        existing_type=sa.String(length=5000000),\n        type_=sa.TEXT(),\n        existing_nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py",
    "content": "\"\"\"Fix invalid model-configurations state\n\nRevision ID: 47a07e1a38f1\nRevises: 7a70b7664e37\nCreate Date: 2025-04-23 15:39:43.159504\n\n\"\"\"\n\nfrom alembic import op\nfrom pydantic import BaseModel, ConfigDict\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    fetch_model_names_for_provider_as_set,\n    fetch_visible_model_names_for_provider_as_set,\n)\n\n\n# revision identifiers, used by Alembic.\nrevision = \"47a07e1a38f1\"\ndown_revision = \"7a70b7664e37\"\nbranch_labels = None\ndepends_on = None\n\n\nclass _SimpleModelConfiguration(BaseModel):\n    # Configure model to read from attributes\n    model_config = ConfigDict(from_attributes=True)\n\n    id: int\n    llm_provider_id: int\n    name: str\n    is_visible: bool\n    max_input_tokens: int | None\n\n\ndef upgrade() -> None:\n    llm_provider_table = sa.sql.table(\n        \"llm_provider\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"provider\", sa.String),\n        sa.column(\"model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"display_model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"default_model_name\", sa.String),\n        sa.column(\"fast_default_model_name\", sa.String),\n    )\n    model_configuration_table = sa.sql.table(\n        \"model_configuration\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"llm_provider_id\", sa.Integer),\n        sa.column(\"name\", sa.String),\n        sa.column(\"is_visible\", sa.Boolean),\n        sa.column(\"max_input_tokens\", sa.Integer),\n    )\n\n    connection = op.get_bind()\n\n    llm_providers = connection.execute(\n        sa.select(\n            llm_provider_table.c.id,\n            llm_provider_table.c.provider,\n        )\n    ).fetchall()\n\n    for llm_provider in llm_providers:\n        llm_provider_id, provider_name = llm_provider\n\n        default_models = fetch_model_names_for_provider_as_set(provider_name)\n        display_models = fetch_visible_model_names_for_provider_as_set(\n            provider_name=provider_name\n        )\n\n        # if `fetch_model_names_for_provider_as_set` returns `None`, then\n        # that means that `provider_name` is not a well-known llm provider.\n        if not default_models:\n            continue\n\n        if not display_models:\n            raise RuntimeError(\n                \"If `default_models` is non-None, `display_models` must be non-None too.\"\n            )\n\n        model_configurations = [\n            _SimpleModelConfiguration.model_validate(model_configuration)\n            for model_configuration in connection.execute(\n                sa.select(\n                    model_configuration_table.c.id,\n                    model_configuration_table.c.llm_provider_id,\n                    model_configuration_table.c.name,\n                    model_configuration_table.c.is_visible,\n                    model_configuration_table.c.max_input_tokens,\n                ).where(model_configuration_table.c.llm_provider_id == llm_provider_id)\n            ).fetchall()\n        ]\n\n        if model_configurations:\n            at_least_one_is_visible = any(\n                [\n                    model_configuration.is_visible\n                    for model_configuration in model_configurations\n                ]\n            )\n\n            # If there is at least one model which is public, this is a valid state.\n            # Therefore, don't touch it and move on to the next one.\n            if at_least_one_is_visible:\n                continue\n\n            existing_visible_model_names: set[str] = set(\n                [\n                    model_configuration.name\n                    for model_configuration in model_configurations\n                    if model_configuration.is_visible\n                ]\n            )\n\n            difference = display_models.difference(existing_visible_model_names)\n\n            for model_name in difference:\n                if not model_name:\n                    continue\n\n                insert_statement = postgresql.insert(model_configuration_table).values(\n                    llm_provider_id=llm_provider_id,\n                    name=model_name,\n                    is_visible=True,\n                    max_input_tokens=None,\n                )\n\n                connection.execute(\n                    insert_statement.on_conflict_do_update(\n                        index_elements=[\"llm_provider_id\", \"name\"],\n                        set_={\"is_visible\": insert_statement.excluded.is_visible},\n                    )\n                )\n        else:\n            for model_name in default_models:\n                connection.execute(\n                    model_configuration_table.insert().values(\n                        llm_provider_id=llm_provider_id,\n                        name=model_name,\n                        is_visible=model_name in display_models,\n                        max_input_tokens=None,\n                    )\n                )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py",
    "content": "\"\"\"add persona categories\n\nRevision ID: 47e5bef3a1d7\nRevises: dfbe9e93d3c7\nCreate Date: 2024-11-05 18:55:02.221064\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"47e5bef3a1d7\"\ndown_revision = \"dfbe9e93d3c7\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create the persona_category table\n    op.create_table(\n        \"persona_category\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.String(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n\n    # Add category_id to persona table\n    op.add_column(\"persona\", sa.Column(\"category_id\", sa.Integer(), nullable=True))\n    op.create_foreign_key(\n        \"fk_persona_category\",\n        \"persona\",\n        \"persona_category\",\n        [\"category_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"persona_category_id_fkey\", \"persona\", type_=\"foreignkey\")\n    op.drop_column(\"persona\", \"category_id\")\n    op.drop_table(\"persona_category\")\n"
  },
  {
    "path": "backend/alembic/versions/48d14957fe80_add_support_for_custom_tools.py",
    "content": "\"\"\"Add support for custom tools\n\nRevision ID: 48d14957fe80\nRevises: b85f02ec1308\nCreate Date: 2024-06-09 14:58:19.946509\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"48d14957fe80\"\ndown_revision = \"b85f02ec1308\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"tool\",\n        sa.Column(\n            \"openapi_schema\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"tool\",\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n    )\n    op.create_foreign_key(\"tool_user_fk\", \"tool\", \"user\", [\"user_id\"], [\"id\"])\n\n    op.create_table(\n        \"tool_call\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"tool_id\", sa.Integer(), nullable=False),\n        sa.Column(\"tool_name\", sa.String(), nullable=False),\n        sa.Column(\n            \"tool_arguments\", postgresql.JSONB(astext_type=sa.Text()), nullable=False\n        ),\n        sa.Column(\n            \"tool_result\", postgresql.JSONB(astext_type=sa.Text()), nullable=False\n        ),\n        sa.Column(\n            \"message_id\", sa.Integer(), sa.ForeignKey(\"chat_message.id\"), nullable=False\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"tool_call\")\n\n    op.drop_constraint(\"tool_user_fk\", \"tool\", type_=\"foreignkey\")\n    op.drop_column(\"tool\", \"user_id\")\n    op.drop_column(\"tool\", \"openapi_schema\")\n"
  },
  {
    "path": "backend/alembic/versions/495cb26ce93e_create_knowlege_graph_tables.py",
    "content": "\"\"\"create knowledge graph tables\n\nRevision ID: 495cb26ce93e\nRevises: ca04500b9ee8\nCreate Date: 2025-03-19 08:51:14.341989\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy import text\nfrom datetime import datetime, timedelta\n\nfrom onyx.configs.app_configs import DB_READONLY_USER\nfrom onyx.configs.app_configs import DB_READONLY_PASSWORD\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n\n# revision identifiers, used by Alembic.\nrevision = \"495cb26ce93e\"\ndown_revision = \"ca04500b9ee8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n\n    # Create a new permission-less user to be later used for knowledge graph queries.\n    # The user will later get temporary read privileges for a specific view that will be\n    # ad hoc generated specific to a knowledge graph query.\n    #\n    # Note: in order for the migration to run, the DB_READONLY_USER and DB_READONLY_PASSWORD\n    # environment variables MUST be set. Otherwise, an exception will be raised.\n\n    if not MULTI_TENANT:\n        # Enable pg_trgm extension if not already enabled\n        op.execute(\"CREATE EXTENSION IF NOT EXISTS pg_trgm\")\n\n        # Create read-only db user here only in single tenant mode. For multi-tenant mode,\n        # the user is created in the alembic_tenants migration.\n        if not (DB_READONLY_USER and DB_READONLY_PASSWORD):\n            raise Exception(\"DB_READONLY_USER or DB_READONLY_PASSWORD is not set\")\n\n        op.execute(\n            text(\n                f\"\"\"\n                DO $$\n                BEGIN\n                    -- Check if the read-only user already exists\n                    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                        -- Create the read-only user with the specified password\n                        EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');\n                        -- First revoke all privileges to ensure a clean slate\n                        EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');\n                        -- Grant only the CONNECT privilege to allow the user to connect to the database\n                        -- but not perform any operations without additional specific grants\n                        EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');\n                    END IF;\n                END\n                $$;\n                \"\"\"\n            )\n        )\n\n    # Grant usage on current schema to readonly user\n    op.execute(\n        text(\n            f\"\"\"\n            DO $$\n            BEGIN\n                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                    EXECUTE format('GRANT USAGE ON SCHEMA %I TO %I', current_schema(), '{DB_READONLY_USER}');\n                END IF;\n            END\n            $$;\n            \"\"\"\n        )\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_config CASCADE\")\n    op.create_table(\n        \"kg_config\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"kg_variable_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"kg_variable_values\", postgresql.ARRAY(sa.String()), nullable=False),\n        sa.UniqueConstraint(\"kg_variable_name\", name=\"uq_kg_config_variable_name\"),\n    )\n\n    # Insert initial data into kg_config table\n    op.bulk_insert(\n        sa.table(\n            \"kg_config\",\n            sa.column(\"kg_variable_name\", sa.String),\n            sa.column(\"kg_variable_values\", postgresql.ARRAY(sa.String)),\n        ),\n        [\n            {\"kg_variable_name\": \"KG_EXPOSED\", \"kg_variable_values\": [\"false\"]},\n            {\"kg_variable_name\": \"KG_ENABLED\", \"kg_variable_values\": [\"false\"]},\n            {\"kg_variable_name\": \"KG_VENDOR\", \"kg_variable_values\": []},\n            {\"kg_variable_name\": \"KG_VENDOR_DOMAINS\", \"kg_variable_values\": []},\n            {\"kg_variable_name\": \"KG_IGNORE_EMAIL_DOMAINS\", \"kg_variable_values\": []},\n            {\n                \"kg_variable_name\": \"KG_EXTRACTION_IN_PROGRESS\",\n                \"kg_variable_values\": [\"false\"],\n            },\n            {\n                \"kg_variable_name\": \"KG_CLUSTERING_IN_PROGRESS\",\n                \"kg_variable_values\": [\"false\"],\n            },\n            {\n                \"kg_variable_name\": \"KG_COVERAGE_START\",\n                \"kg_variable_values\": [\n                    (datetime.now() - timedelta(days=90)).strftime(\"%Y-%m-%d\")\n                ],\n            },\n            {\"kg_variable_name\": \"KG_MAX_COVERAGE_DAYS\", \"kg_variable_values\": [\"90\"]},\n            {\n                \"kg_variable_name\": \"KG_MAX_PARENT_RECURSION_DEPTH\",\n                \"kg_variable_values\": [\"2\"],\n            },\n        ],\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_entity_type CASCADE\")\n    op.create_table(\n        \"kg_entity_type\",\n        sa.Column(\"id_name\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"description\", sa.String(), nullable=True),\n        sa.Column(\"grounding\", sa.String(), nullable=False),\n        sa.Column(\n            \"attributes\",\n            postgresql.JSONB,\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\"active\", sa.Boolean(), nullable=False, default=False),\n        sa.Column(\"deep_extraction\", sa.Boolean(), nullable=False, default=False),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.Column(\"grounded_source_name\", sa.String(), nullable=True),\n        sa.Column(\"entity_values\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.Column(\n            \"clustering\",\n            postgresql.JSONB,\n            nullable=False,\n            server_default=\"{}\",\n        ),\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_relationship_type CASCADE\")\n    # Create KGRelationshipType table\n    op.create_table(\n        \"kg_relationship_type\",\n        sa.Column(\"id_name\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"name\", sa.String(), nullable=False, index=True),\n        sa.Column(\n            \"source_entity_type_id_name\", sa.String(), nullable=False, index=True\n        ),\n        sa.Column(\n            \"target_entity_type_id_name\", sa.String(), nullable=False, index=True\n        ),\n        sa.Column(\"definition\", sa.Boolean(), nullable=False, default=False),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\"type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"active\", sa.Boolean(), nullable=False, default=True),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.Column(\n            \"clustering\",\n            postgresql.JSONB,\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"source_entity_type_id_name\"], [\"kg_entity_type.id_name\"]\n        ),\n        sa.ForeignKeyConstraint(\n            [\"target_entity_type_id_name\"], [\"kg_entity_type.id_name\"]\n        ),\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_relationship_type_extraction_staging CASCADE\")\n    # Create KGRelationshipTypeExtractionStaging table\n    op.create_table(\n        \"kg_relationship_type_extraction_staging\",\n        sa.Column(\"id_name\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"name\", sa.String(), nullable=False, index=True),\n        sa.Column(\n            \"source_entity_type_id_name\", sa.String(), nullable=False, index=True\n        ),\n        sa.Column(\n            \"target_entity_type_id_name\", sa.String(), nullable=False, index=True\n        ),\n        sa.Column(\"definition\", sa.Boolean(), nullable=False, default=False),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\"type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"active\", sa.Boolean(), nullable=False, default=True),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.Column(\n            \"clustering\",\n            postgresql.JSONB,\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"transferred\", sa.Boolean(), nullable=False, server_default=\"false\"),\n        sa.ForeignKeyConstraint(\n            [\"source_entity_type_id_name\"], [\"kg_entity_type.id_name\"]\n        ),\n        sa.ForeignKeyConstraint(\n            [\"target_entity_type_id_name\"], [\"kg_entity_type.id_name\"]\n        ),\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_entity CASCADE\")\n\n    # Create KGEntity table\n    op.create_table(\n        \"kg_entity\",\n        sa.Column(\"id_name\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"entity_class\", sa.String(), nullable=True, index=True),\n        sa.Column(\"entity_subtype\", sa.String(), nullable=True, index=True),\n        sa.Column(\"entity_key\", sa.String(), nullable=True, index=True),\n        sa.Column(\"name_trigrams\", postgresql.ARRAY(sa.String(3)), nullable=True),\n        sa.Column(\"document_id\", sa.String(), nullable=True, index=True),\n        sa.Column(\n            \"alternative_names\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"entity_type_id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"description\", sa.String(), nullable=True),\n        sa.Column(\n            \"keywords\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\n            \"acl\", postgresql.ARRAY(sa.String()), nullable=False, server_default=\"{}\"\n        ),\n        sa.Column(\"boosts\", postgresql.JSONB, nullable=False, server_default=\"{}\"),\n        sa.Column(\"attributes\", postgresql.JSONB, nullable=False, server_default=\"{}\"),\n        sa.Column(\"event_time\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.ForeignKeyConstraint([\"entity_type_id_name\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"document_id\"], [\"document.id\"]),\n        sa.UniqueConstraint(\n            \"name\",\n            \"entity_type_id_name\",\n            \"document_id\",\n            name=\"uq_kg_entity_name_type_doc\",\n        ),\n    )\n    op.create_index(\"ix_entity_type_acl\", \"kg_entity\", [\"entity_type_id_name\", \"acl\"])\n    op.create_index(\n        \"ix_entity_name_search\", \"kg_entity\", [\"name\", \"entity_type_id_name\"]\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_entity_extraction_staging CASCADE\")\n    # Create KGEntityExtractionStaging table\n    op.create_table(\n        \"kg_entity_extraction_staging\",\n        sa.Column(\"id_name\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\"name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"document_id\", sa.String(), nullable=True, index=True),\n        sa.Column(\n            \"alternative_names\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"entity_type_id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"description\", sa.String(), nullable=True),\n        sa.Column(\n            \"keywords\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\n            \"acl\", postgresql.ARRAY(sa.String()), nullable=False, server_default=\"{}\"\n        ),\n        sa.Column(\"boosts\", postgresql.JSONB, nullable=False, server_default=\"{}\"),\n        sa.Column(\"attributes\", postgresql.JSONB, nullable=False, server_default=\"{}\"),\n        sa.Column(\"transferred_id_name\", sa.String(), nullable=True, default=None),\n        sa.Column(\"entity_class\", sa.String(), nullable=True, index=True),\n        sa.Column(\"entity_key\", sa.String(), nullable=True, index=True),\n        sa.Column(\"entity_subtype\", sa.String(), nullable=True, index=True),\n        sa.Column(\"parent_key\", sa.String(), nullable=True, index=True),\n        sa.Column(\"event_time\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.ForeignKeyConstraint([\"entity_type_id_name\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"document_id\"], [\"document.id\"]),\n    )\n    op.create_index(\n        \"ix_entity_extraction_staging_acl\",\n        \"kg_entity_extraction_staging\",\n        [\"entity_type_id_name\", \"acl\"],\n    )\n    op.create_index(\n        \"ix_entity_extraction_staging_name_search\",\n        \"kg_entity_extraction_staging\",\n        [\"name\", \"entity_type_id_name\"],\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_relationship CASCADE\")\n    # Create KGRelationship table\n    op.create_table(\n        \"kg_relationship\",\n        sa.Column(\"id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_node\", sa.String(), nullable=False, index=True),\n        sa.Column(\"target_node\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_node_type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"target_node_type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_document\", sa.String(), nullable=True, index=True),\n        sa.Column(\"type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"relationship_type_id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.ForeignKeyConstraint([\"source_node\"], [\"kg_entity.id_name\"]),\n        sa.ForeignKeyConstraint([\"target_node\"], [\"kg_entity.id_name\"]),\n        sa.ForeignKeyConstraint([\"source_node_type\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"target_node_type\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"source_document\"], [\"document.id\"]),\n        sa.ForeignKeyConstraint(\n            [\"relationship_type_id_name\"], [\"kg_relationship_type.id_name\"]\n        ),\n        sa.UniqueConstraint(\n            \"source_node\",\n            \"target_node\",\n            \"type\",\n            name=\"uq_kg_relationship_source_target_type\",\n        ),\n        sa.PrimaryKeyConstraint(\"id_name\", \"source_document\"),\n    )\n    op.create_index(\n        \"ix_kg_relationship_nodes\", \"kg_relationship\", [\"source_node\", \"target_node\"]\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_relationship_extraction_staging CASCADE\")\n    # Create KGRelationshipExtractionStaging table\n    op.create_table(\n        \"kg_relationship_extraction_staging\",\n        sa.Column(\"id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_node\", sa.String(), nullable=False, index=True),\n        sa.Column(\"target_node\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_node_type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"target_node_type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"source_document\", sa.String(), nullable=True, index=True),\n        sa.Column(\"type\", sa.String(), nullable=False, index=True),\n        sa.Column(\"relationship_type_id_name\", sa.String(), nullable=False, index=True),\n        sa.Column(\"occurrences\", sa.Integer(), server_default=\"1\", nullable=False),\n        sa.Column(\"transferred\", sa.Boolean(), nullable=False, server_default=\"false\"),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n        sa.ForeignKeyConstraint(\n            [\"source_node\"], [\"kg_entity_extraction_staging.id_name\"]\n        ),\n        sa.ForeignKeyConstraint(\n            [\"target_node\"], [\"kg_entity_extraction_staging.id_name\"]\n        ),\n        sa.ForeignKeyConstraint([\"source_node_type\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"target_node_type\"], [\"kg_entity_type.id_name\"]),\n        sa.ForeignKeyConstraint([\"source_document\"], [\"document.id\"]),\n        sa.ForeignKeyConstraint(\n            [\"relationship_type_id_name\"],\n            [\"kg_relationship_type_extraction_staging.id_name\"],\n        ),\n        sa.UniqueConstraint(\n            \"source_node\",\n            \"target_node\",\n            \"type\",\n            name=\"uq_kg_relationship_extraction_staging_source_target_type\",\n        ),\n        sa.PrimaryKeyConstraint(\"id_name\", \"source_document\"),\n    )\n    op.create_index(\n        \"ix_kg_relationship_extraction_staging_nodes\",\n        \"kg_relationship_extraction_staging\",\n        [\"source_node\", \"target_node\"],\n    )\n\n    op.execute(\"DROP TABLE IF EXISTS kg_term CASCADE\")\n    # Create KGTerm table\n    op.create_table(\n        \"kg_term\",\n        sa.Column(\"id_term\", sa.String(), primary_key=True, nullable=False, index=True),\n        sa.Column(\n            \"entity_types\",\n            postgresql.ARRAY(sa.String()),\n            nullable=False,\n            server_default=\"{}\",\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n        ),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.text(\"now()\")\n        ),\n    )\n    op.create_index(\"ix_search_term_entities\", \"kg_term\", [\"entity_types\"])\n    op.create_index(\"ix_search_term_term\", \"kg_term\", [\"id_term\"])\n\n    op.add_column(\n        \"document\",\n        sa.Column(\"kg_stage\", sa.String(), nullable=True, index=True),\n    )\n    op.add_column(\n        \"document\",\n        sa.Column(\"kg_processing_time\", sa.DateTime(timezone=True), nullable=True),\n    )\n    op.add_column(\n        \"connector\",\n        sa.Column(\n            \"kg_processing_enabled\",\n            sa.Boolean(),\n            nullable=True,\n            server_default=\"false\",\n        ),\n    )\n\n    op.add_column(\n        \"connector\",\n        sa.Column(\n            \"kg_coverage_days\",\n            sa.Integer(),\n            nullable=True,\n            server_default=None,\n        ),\n    )\n\n    # Create GIN index for clustering and normalization\n    op.execute(\n        \"CREATE INDEX IF NOT EXISTS idx_kg_entity_clustering_trigrams \"\n        f\"ON kg_entity USING GIN (name {POSTGRES_DEFAULT_SCHEMA}.gin_trgm_ops)\"\n    )\n    op.execute(\n        \"CREATE INDEX IF NOT EXISTS idx_kg_entity_normalization_trigrams ON kg_entity USING GIN (name_trigrams)\"\n    )\n\n    # Create kg_entity trigger to update kg_entity.name and its trigrams\n    alphanum_pattern = r\"[^a-z0-9]+\"\n    truncate_length = 1000\n    function = \"update_kg_entity_name\"\n    op.execute(\n        text(\n            f\"\"\"\n            CREATE OR REPLACE FUNCTION {function}()\n            RETURNS TRIGGER AS $$\n            DECLARE\n                name text;\n                cleaned_name text;\n            BEGIN\n                -- Set name to semantic_id if document_id is not NULL\n                IF NEW.document_id IS NOT NULL THEN\n                    SELECT lower(semantic_id) INTO name\n                    FROM document\n                    WHERE id = NEW.document_id;\n                ELSE\n                    name = lower(NEW.name);\n                END IF;\n\n                -- Clean name and truncate if too long\n                cleaned_name = regexp_replace(\n                    name,\n                    '{alphanum_pattern}', '', 'g'\n                );\n                IF length(cleaned_name) > {truncate_length} THEN\n                    cleaned_name = left(cleaned_name, {truncate_length});\n                END IF;\n\n                -- Set name and name trigrams\n                NEW.name = name;\n                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name);\n                RETURN NEW;\n            END;\n            $$ LANGUAGE plpgsql;\n            \"\"\"\n        )\n    )\n    trigger = f\"{function}_trigger\"\n    op.execute(f\"DROP TRIGGER IF EXISTS {trigger} ON kg_entity\")\n    op.execute(\n        f\"\"\"\n        CREATE TRIGGER {trigger}\n            BEFORE INSERT OR UPDATE OF name\n            ON kg_entity\n            FOR EACH ROW\n            EXECUTE FUNCTION {function}();\n        \"\"\"\n    )\n\n    # Create kg_entity trigger to update kg_entity.name and its trigrams\n    function = \"update_kg_entity_name_from_doc\"\n    op.execute(\n        text(\n            f\"\"\"\n            CREATE OR REPLACE FUNCTION {function}()\n            RETURNS TRIGGER AS $$\n            DECLARE\n                doc_name text;\n                cleaned_name text;\n            BEGIN\n                doc_name = lower(NEW.semantic_id);\n\n                -- Clean name and truncate if too long\n                cleaned_name = regexp_replace(\n                    doc_name,\n                    '{alphanum_pattern}', '', 'g'\n                );\n                IF length(cleaned_name) > {truncate_length} THEN\n                    cleaned_name = left(cleaned_name, {truncate_length});\n                END IF;\n\n                -- Set name and name trigrams for all entities referencing this document\n                UPDATE kg_entity\n                SET\n                    name = doc_name,\n                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA}.show_trgm(cleaned_name)\n                WHERE document_id = NEW.id;\n                RETURN NEW;\n            END;\n            $$ LANGUAGE plpgsql;\n            \"\"\"\n        )\n    )\n    trigger = f\"{function}_trigger\"\n    op.execute(f\"DROP TRIGGER IF EXISTS {trigger} ON document\")\n    op.execute(\n        f\"\"\"\n        CREATE TRIGGER {trigger}\n            AFTER UPDATE OF semantic_id\n            ON document\n            FOR EACH ROW\n            EXECUTE FUNCTION {function}();\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n\n    #  Drop all views that start with 'kg_'\n    op.execute(\n        \"\"\"\n                DO $$\n                DECLARE\n                    view_name text;\n                BEGIN\n                    FOR view_name IN\n                        SELECT c.relname\n                        FROM pg_catalog.pg_class c\n                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n                        WHERE c.relkind = 'v'\n                        AND n.nspname = current_schema()\n                        AND c.relname LIKE 'kg_relationships_with_access%'\n                    LOOP\n                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);\n                    END LOOP;\n                END $$;\n            \"\"\"\n    )\n\n    op.execute(\n        \"\"\"\n                DO $$\n                DECLARE\n                    view_name text;\n                BEGIN\n                    FOR view_name IN\n                        SELECT c.relname\n                        FROM pg_catalog.pg_class c\n                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n                        WHERE c.relkind = 'v'\n                        AND n.nspname = current_schema()\n                        AND c.relname LIKE 'allowed_docs%'\n                    LOOP\n                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);\n                    END LOOP;\n                END $$;\n            \"\"\"\n    )\n\n    for table, function in (\n        (\"kg_entity\", \"update_kg_entity_name\"),\n        (\"document\", \"update_kg_entity_name_from_doc\"),\n    ):\n        op.execute(f\"DROP TRIGGER IF EXISTS {function}_trigger ON {table}\")\n        op.execute(f\"DROP FUNCTION IF EXISTS {function}()\")\n\n    # Drop index\n    op.execute(\"DROP INDEX IF EXISTS idx_kg_entity_clustering_trigrams\")\n    op.execute(\"DROP INDEX IF EXISTS idx_kg_entity_normalization_trigrams\")\n\n    # Drop tables in reverse order of creation to handle dependencies\n    op.drop_table(\"kg_term\")\n    op.drop_table(\"kg_relationship\")\n    op.drop_table(\"kg_entity\")\n    op.drop_table(\"kg_relationship_type\")\n    op.drop_table(\"kg_relationship_extraction_staging\")\n    op.drop_table(\"kg_relationship_type_extraction_staging\")\n    op.drop_table(\"kg_entity_extraction_staging\")\n    op.drop_table(\"kg_entity_type\")\n    op.drop_column(\"connector\", \"kg_processing_enabled\")\n    op.drop_column(\"connector\", \"kg_coverage_days\")\n    op.drop_column(\"document\", \"kg_stage\")\n    op.drop_column(\"document\", \"kg_processing_time\")\n    op.drop_table(\"kg_config\")\n\n    # Revoke usage on current schema for the readonly user\n    op.execute(\n        text(\n            f\"\"\"\n            DO $$\n            BEGIN\n                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                    EXECUTE format('REVOKE ALL ON SCHEMA %I FROM %I', current_schema(), '{DB_READONLY_USER}');\n                END IF;\n            END\n            $$;\n            \"\"\"\n        )\n    )\n\n    if not MULTI_TENANT:\n        # Drop read-only db user here only in single tenant mode. For multi-tenant mode,\n        # the user is dropped in the alembic_tenants migration.\n\n        op.execute(\n            text(\n                f\"\"\"\n            DO $$\n            BEGIN\n                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                    -- First revoke all privileges from the database\n                    EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');\n                    -- Then drop the user\n                    EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');\n                END IF;\n            END\n            $$;\n        \"\"\"\n            )\n        )\n        op.execute(text(\"DROP EXTENSION IF EXISTS pg_trgm\"))\n"
  },
  {
    "path": "backend/alembic/versions/4a1e4b1c89d2_add_indexing_to_userfilestatus.py",
    "content": "\"\"\"Add INDEXING to UserFileStatus\n\nRevision ID: 4a1e4b1c89d2\nRevises: 6b3b4083c5aa\nCreate Date: 2026-02-28 00:00:00.000000\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\n\nrevision = \"4a1e4b1c89d2\"\ndown_revision = \"6b3b4083c5aa\"\nbranch_labels = None\ndepends_on = None\n\nTABLE = \"user_file\"\nCOLUMN = \"status\"\nCONSTRAINT_NAME = \"ck_user_file_status\"\n\nOLD_VALUES = (\"PROCESSING\", \"COMPLETED\", \"FAILED\", \"CANCELED\", \"DELETING\")\nNEW_VALUES = (\"PROCESSING\", \"INDEXING\", \"COMPLETED\", \"FAILED\", \"CANCELED\", \"DELETING\")\n\n\ndef _drop_status_check_constraint() -> None:\n    \"\"\"Drop the existing CHECK constraint on user_file.status.\n\n    The constraint name is auto-generated by SQLAlchemy and unknown,\n    so we look it up via the inspector.\n    \"\"\"\n    inspector = sa.inspect(op.get_bind())\n    for constraint in inspector.get_check_constraints(TABLE):\n        if COLUMN in constraint.get(\"sqltext\", \"\"):\n            constraint_name = constraint[\"name\"]\n            if constraint_name is not None:\n                op.drop_constraint(constraint_name, TABLE, type_=\"check\")\n\n\ndef upgrade() -> None:\n    _drop_status_check_constraint()\n    in_clause = \", \".join(f\"'{v}'\" for v in NEW_VALUES)\n    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f\"{COLUMN} IN ({in_clause})\")\n\n\ndef downgrade() -> None:\n    op.execute(\n        f\"UPDATE {TABLE} SET {COLUMN} = 'PROCESSING' WHERE {COLUMN} = 'INDEXING'\"\n    )\n    op.drop_constraint(CONSTRAINT_NAME, TABLE, type_=\"check\")\n    in_clause = \", \".join(f\"'{v}'\" for v in OLD_VALUES)\n    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f\"{COLUMN} IN ({in_clause})\")\n"
  },
  {
    "path": "backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py",
    "content": "\"\"\"Moved status to connector credential pair\n\nRevision ID: 4a951134c801\nRevises: 7477a5f5d728\nCreate Date: 2024-08-10 19:20:34.527559\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"4a951134c801\"\ndown_revision = \"7477a5f5d728\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"status\",\n            sa.Enum(\n                \"ACTIVE\",\n                \"PAUSED\",\n                \"DELETING\",\n                name=\"connectorcredentialpairstatus\",\n                native_enum=False,\n            ),\n            nullable=True,\n        ),\n    )\n\n    # Update status of connector_credential_pair based on connector's disabled status\n    op.execute(\n        \"\"\"\n        UPDATE connector_credential_pair\n        SET status = CASE\n            WHEN (\n                SELECT disabled\n                FROM connector\n                WHERE connector.id = connector_credential_pair.connector_id\n            ) = FALSE THEN 'ACTIVE'\n            ELSE 'PAUSED'\n        END\n        \"\"\"\n    )\n\n    # Make the status column not nullable after setting values\n    op.alter_column(\"connector_credential_pair\", \"status\", nullable=False)\n\n    op.drop_column(\"connector\", \"disabled\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"connector\",\n        sa.Column(\"disabled\", sa.BOOLEAN(), autoincrement=False, nullable=True),\n    )\n\n    # Update disabled status of connector based on connector_credential_pair's status\n    op.execute(\n        \"\"\"\n        UPDATE connector\n        SET disabled = CASE\n            WHEN EXISTS (\n                SELECT 1\n                FROM connector_credential_pair\n                WHERE connector_credential_pair.connector_id = connector.id\n                AND connector_credential_pair.status = 'ACTIVE'\n            ) THEN FALSE\n            ELSE TRUE\n        END\n        \"\"\"\n    )\n\n    # Make the disabled column not nullable after setting values\n    op.alter_column(\"connector\", \"disabled\", nullable=False)\n\n    op.drop_column(\"connector_credential_pair\", \"status\")\n"
  },
  {
    "path": "backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py",
    "content": "\"\"\"change default prune_freq\n\nRevision ID: 4b08d97e175a\nRevises: d9ec13955951\nCreate Date: 2024-08-20 15:28:52.993827\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"4b08d97e175a\"\ndown_revision = \"d9ec13955951\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE connector\n        SET prune_freq = 2592000\n        WHERE prune_freq = 86400\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE connector\n        SET prune_freq = 86400\n        WHERE prune_freq = 2592000\n        \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py",
    "content": "\"\"\"add tab_index to tool_call\n\nRevision ID: 4cebcbc9b2ae\nRevises: a1b2c3d4e5f6\nCreate Date: 2025-12-16\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"4cebcbc9b2ae\"\ndown_revision = \"a1b2c3d4e5f6\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"tab_index\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"tool_call\", \"tab_index\")\n"
  },
  {
    "path": "backend/alembic/versions/4d58345da04a_lowercase_user_emails.py",
    "content": "\"\"\"lowercase_user_emails\n\nRevision ID: 4d58345da04a\nRevises: f1ca58b2f2ec\nCreate Date: 2025-01-29 07:48:46.784041\n\n\"\"\"\n\nimport logging\nfrom typing import cast\nfrom alembic import op\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.sql import text\n\n\n# revision identifiers, used by Alembic.\nrevision = \"4d58345da04a\"\ndown_revision = \"f1ca58b2f2ec\"\nbranch_labels = None\ndepends_on = None\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n\ndef upgrade() -> None:\n    \"\"\"Conflicts on lowercasing will result in the uppercased email getting a\n    unique integer suffix when converted to lowercase.\"\"\"\n\n    connection = op.get_bind()\n\n    # Fetch all user emails that are not already lowercase\n    user_emails = connection.execute(\n        text('SELECT id, email FROM \"user\" WHERE email != LOWER(email)')\n    ).fetchall()\n\n    for user_id, email in user_emails:\n        email = cast(str, email)\n        username, domain = email.rsplit(\"@\", 1)\n        new_email = f\"{username.lower()}@{domain.lower()}\"\n        attempt = 1\n\n        while True:\n            try:\n                # Try updating the email\n                connection.execute(\n                    text('UPDATE \"user\" SET email = :new_email WHERE id = :user_id'),\n                    {\"new_email\": new_email, \"user_id\": user_id},\n                )\n                break  # Success, exit loop\n            except IntegrityError:\n                next_email = f\"{username.lower()}_{attempt}@{domain.lower()}\"\n                # Email conflict occurred, append `_1`, `_2`, etc., to the username\n                logger.warning(\n                    f\"Conflict while lowercasing email: old_email={email} conflicting_email={new_email} next_email={next_email}\"\n                )\n                new_email = next_email\n                attempt += 1\n\n\ndef downgrade() -> None:\n    # Cannot restore original case of emails\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py",
    "content": "\"\"\"Add type to credentials\n\nRevision ID: 4ea2c93919c1\nRevises: 473a1a7ca408\nCreate Date: 2024-07-18 13:07:13.655895\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"4ea2c93919c1\"\ndown_revision = \"473a1a7ca408\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Add the new 'source' column to the 'credential' table\n    op.add_column(\n        \"credential\",\n        sa.Column(\n            \"source\",\n            sa.String(length=100),  # Use String instead of Enum\n            nullable=True,  # Initially allow NULL values\n        ),\n    )\n    op.add_column(\n        \"credential\",\n        sa.Column(\n            \"name\",\n            sa.String(),\n            nullable=True,\n        ),\n    )\n\n    # Create a temporary table that maps each credential to a single connector source.\n    # This is needed because a credential can be associated with multiple connectors,\n    # but we want to assign a single source to each credential.\n    # We use DISTINCT ON to ensure we only get one row per credential_id.\n    op.execute(\n        \"\"\"\n    CREATE TEMPORARY TABLE temp_connector_credential AS\n    SELECT DISTINCT ON (cc.credential_id)\n        cc.credential_id,\n        c.source AS connector_source\n    FROM connector_credential_pair cc\n    JOIN connector c ON cc.connector_id = c.id\n    \"\"\"\n    )\n\n    # Update the 'source' column in the 'credential' table\n    op.execute(\n        \"\"\"\n    UPDATE credential cred\n    SET source = COALESCE(\n        (SELECT connector_source\n         FROM temp_connector_credential temp\n         WHERE cred.id = temp.credential_id),\n        'NOT_APPLICABLE'\n    )\n    \"\"\"\n    )\n\n    # Drop the temporary table to avoid conflicts if migration runs again\n    # (e.g., during upgrade -> downgrade -> upgrade cycles in tests)\n    op.execute(\"DROP TABLE IF EXISTS temp_connector_credential\")\n\n    # If no exception was raised, alter the column\n    op.alter_column(\"credential\", \"source\", nullable=True)  # TODO modify\n    # # ### end Alembic commands ###\n\n\ndef downgrade() -> None:\n    op.drop_column(\"credential\", \"source\")\n    op.drop_column(\"credential\", \"name\")\n"
  },
  {
    "path": "backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py",
    "content": "\"\"\"add_multiple_slack_bot_support\n\nRevision ID: 4ee1287bd26a\nRevises: 47e5bef3a1d7\nCreate Date: 2024-11-06 13:15:53.302644\n\n\"\"\"\n\nfrom typing import cast\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.orm import Session\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.db.models import SlackBot\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"4ee1287bd26a\"\ndown_revision = \"47e5bef3a1d7\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Create new slack_bot table\n    op.create_table(\n        \"slack_bot\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"enabled\", sa.Boolean(), nullable=False, server_default=\"true\"),\n        sa.Column(\"bot_token\", sa.LargeBinary(), nullable=False),\n        sa.Column(\"app_token\", sa.LargeBinary(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"bot_token\"),\n        sa.UniqueConstraint(\"app_token\"),\n    )\n\n    # # Create new slack_channel_config table\n    op.create_table(\n        \"slack_channel_config\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"slack_bot_id\", sa.Integer(), nullable=True),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=True),\n        sa.Column(\"channel_config\", postgresql.JSONB(), nullable=False),\n        sa.Column(\"response_type\", sa.String(), nullable=False),\n        sa.Column(\n            \"enable_auto_filters\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n        sa.ForeignKeyConstraint(\n            [\"slack_bot_id\"],\n            [\"slack_bot.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Handle existing Slack bot tokens first\n    bot_token = None\n    app_token = None\n    first_row_id = None\n\n    try:\n        tokens = cast(dict, get_kv_store().load(\"slack_bot_tokens_config_key\"))\n    except Exception:\n        tokens = {}\n\n    bot_token = tokens.get(\"bot_token\")\n    app_token = tokens.get(\"app_token\")\n\n    if bot_token and app_token:\n        session = Session(bind=op.get_bind())\n        new_slack_bot = SlackBot(\n            name=\"Slack Bot (Migrated)\",\n            enabled=True,\n            bot_token=bot_token,\n            app_token=app_token,\n        )\n        session.add(new_slack_bot)\n        session.commit()\n        first_row_id = new_slack_bot.id\n\n    # Create a default bot if none exists\n    # This is in case there are no slack tokens but there are channels configured\n    op.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO slack_bot (name, enabled, bot_token, app_token)\n            SELECT 'Default Bot', true, '', ''\n            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)\n            RETURNING id;\n            \"\"\"\n        )\n    )\n\n    # Get the bot ID to use (either from existing migration or newly created)\n    bot_id_query = sa.text(\n        \"\"\"\n        SELECT COALESCE(\n            :first_row_id,\n            (SELECT id FROM slack_bot ORDER BY id ASC LIMIT 1)\n        ) as bot_id;\n        \"\"\"\n    )\n    result = op.get_bind().execute(bot_id_query, {\"first_row_id\": first_row_id})\n    bot_id = result.scalar()\n\n    # CTE (Common Table Expression) that transforms the old slack_bot_config table data\n    # This splits up the channel_names into their own rows\n    channel_names_cte = \"\"\"\n        WITH channel_names AS (\n            SELECT\n                sbc.id as config_id,\n                sbc.persona_id,\n                sbc.response_type,\n                sbc.enable_auto_filters,\n                jsonb_array_elements_text(sbc.channel_config->'channel_names') as channel_name,\n                sbc.channel_config->>'respond_tag_only' as respond_tag_only,\n                sbc.channel_config->>'respond_to_bots' as respond_to_bots,\n                sbc.channel_config->'respond_member_group_list' as respond_member_group_list,\n                sbc.channel_config->'answer_filters' as answer_filters,\n                sbc.channel_config->'follow_up_tags' as follow_up_tags\n            FROM slack_bot_config sbc\n        )\n    \"\"\"\n\n    # Insert the channel names into the new slack_channel_config table\n    insert_statement = \"\"\"\n        INSERT INTO slack_channel_config (\n            slack_bot_id,\n            persona_id,\n            channel_config,\n            response_type,\n            enable_auto_filters\n        )\n        SELECT\n            :bot_id,\n            channel_name.persona_id,\n            jsonb_build_object(\n                'channel_name', channel_name.channel_name,\n                'respond_tag_only',\n                COALESCE((channel_name.respond_tag_only)::boolean, false),\n                'respond_to_bots',\n                COALESCE((channel_name.respond_to_bots)::boolean, false),\n                'respond_member_group_list',\n                COALESCE(channel_name.respond_member_group_list, '[]'::jsonb),\n                'answer_filters',\n                COALESCE(channel_name.answer_filters, '[]'::jsonb),\n                'follow_up_tags',\n                COALESCE(channel_name.follow_up_tags, '[]'::jsonb)\n            ),\n            channel_name.response_type,\n            channel_name.enable_auto_filters\n        FROM channel_names channel_name;\n    \"\"\"\n\n    op.execute(sa.text(channel_names_cte + insert_statement).bindparams(bot_id=bot_id))\n\n    # Clean up old tokens if they existed\n    try:\n        if bot_token and app_token:\n            get_kv_store().delete(\"slack_bot_tokens_config_key\")\n    except Exception:\n        pass\n    # Rename the table\n    op.rename_table(\n        \"slack_bot_config__standard_answer_category\",\n        \"slack_channel_config__standard_answer_category\",\n    )\n\n    # Rename the column\n    op.alter_column(\n        \"slack_channel_config__standard_answer_category\",\n        \"slack_bot_config_id\",\n        new_column_name=\"slack_channel_config_id\",\n    )\n\n    # Drop the table with CASCADE to handle dependent objects\n    op.execute(\"DROP TABLE slack_bot_config CASCADE\")\n\n\ndef downgrade() -> None:\n    # Recreate the old slack_bot_config table\n    op.create_table(\n        \"slack_bot_config\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=True),\n        sa.Column(\"channel_config\", postgresql.JSONB(), nullable=False),\n        sa.Column(\"response_type\", sa.String(), nullable=False),\n        sa.Column(\"enable_auto_filters\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Migrate data back to the old format\n    # Group by persona_id to combine channel names back into arrays\n    op.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO slack_bot_config (\n                persona_id,\n                channel_config,\n                response_type,\n                enable_auto_filters\n            )\n            SELECT DISTINCT ON (persona_id)\n                persona_id,\n                jsonb_build_object(\n                    'channel_names', (\n                        SELECT jsonb_agg(c.channel_config->>'channel_name')\n                        FROM slack_channel_config c\n                        WHERE c.persona_id = scc.persona_id\n                    ),\n                    'respond_tag_only', (channel_config->>'respond_tag_only')::boolean,\n                    'respond_to_bots', (channel_config->>'respond_to_bots')::boolean,\n                    'respond_member_group_list', channel_config->'respond_member_group_list',\n                    'answer_filters', channel_config->'answer_filters',\n                    'follow_up_tags', channel_config->'follow_up_tags'\n                ),\n                response_type,\n                enable_auto_filters\n            FROM slack_channel_config scc\n            WHERE persona_id IS NOT NULL;\n            \"\"\"\n        )\n    )\n\n    # Rename the table back\n    op.rename_table(\n        \"slack_channel_config__standard_answer_category\",\n        \"slack_bot_config__standard_answer_category\",\n    )\n\n    # Rename the column back\n    op.alter_column(\n        \"slack_bot_config__standard_answer_category\",\n        \"slack_channel_config_id\",\n        new_column_name=\"slack_bot_config_id\",\n    )\n\n    # Try to save the first bot's tokens back to KV store\n    try:\n        first_bot = (\n            op.get_bind()\n            .execute(\n                sa.text(\n                    \"SELECT bot_token, app_token FROM slack_bot ORDER BY id LIMIT 1\"\n                )\n            )\n            .first()\n        )\n        if first_bot and first_bot.bot_token and first_bot.app_token:\n            tokens = {\n                \"bot_token\": first_bot.bot_token,\n                \"app_token\": first_bot.app_token,\n            }\n            get_kv_store().store(\"slack_bot_tokens_config_key\", tokens)\n    except Exception:\n        pass\n\n    # Drop the new tables in reverse order\n    op.drop_table(\"slack_channel_config\")\n    op.drop_table(\"slack_bot\")\n"
  },
  {
    "path": "backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py",
    "content": "\"\"\"add_open_url_tool\n\nRevision ID: 4f8a2b3c1d9e\nRevises: a852cbe15577\nCreate Date: 2025-11-24 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"4f8a2b3c1d9e\"\ndown_revision = \"a852cbe15577\"\nbranch_labels = None\ndepends_on = None\n\n\nOPEN_URL_TOOL = {\n    \"name\": \"OpenURLTool\",\n    \"display_name\": \"Open URL\",\n    \"description\": (\n        \"The Open URL Action allows the agent to fetch and read contents of web pages.\"\n    ),\n    \"in_code_tool_id\": \"OpenURLTool\",\n    \"enabled\": True,\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Check if tool already exists\n    existing = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id\"),\n        {\"in_code_tool_id\": OPEN_URL_TOOL[\"in_code_tool_id\"]},\n    ).fetchone()\n\n    if existing:\n        tool_id = existing[0]\n        # Update existing tool\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE tool\n                SET name = :name,\n                    display_name = :display_name,\n                    description = :description\n                WHERE in_code_tool_id = :in_code_tool_id\n                \"\"\"\n            ),\n            OPEN_URL_TOOL,\n        )\n    else:\n        # Insert new tool\n        conn.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)\n                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)\n                \"\"\"\n            ),\n            OPEN_URL_TOOL,\n        )\n        # Get the newly inserted tool's id\n        result = conn.execute(\n            sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id\"),\n            {\"in_code_tool_id\": OPEN_URL_TOOL[\"in_code_tool_id\"]},\n        ).fetchone()\n        tool_id = result[0]  # type: ignore\n\n    # Associate the tool with all existing personas\n    # Get all persona IDs\n    persona_ids = conn.execute(sa.text(\"SELECT id FROM persona\")).fetchall()\n\n    for (persona_id,) in persona_ids:\n        # Check if association already exists\n        exists = conn.execute(\n            sa.text(\n                \"\"\"\n                SELECT 1 FROM persona__tool\n                WHERE persona_id = :persona_id AND tool_id = :tool_id\n                \"\"\"\n            ),\n            {\"persona_id\": persona_id, \"tool_id\": tool_id},\n        ).fetchone()\n\n        if not exists:\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    INSERT INTO persona__tool (persona_id, tool_id)\n                    VALUES (:persona_id, :tool_id)\n                    \"\"\"\n                ),\n                {\"persona_id\": persona_id, \"tool_id\": tool_id},\n            )\n\n\ndef downgrade() -> None:\n    # We don't remove the tool on downgrade since it's fine to have it around.\n    # If we upgrade again, it will be a no-op.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/503883791c39_add_effective_permissions.py",
    "content": "\"\"\"add_effective_permissions\n\nAdds a JSONB column `effective_permissions` to the user table to store\ndirectly granted permissions (e.g. [\"admin\"] or [\"basic\"]). Implied\npermissions are expanded at read time, not stored.\n\nBackfill: joins user__user_group → permission_grant to collect each\nuser's granted permissions into a JSON array. Users without group\nmemberships keep the default [].\n\nRevision ID: 503883791c39\nRevises: b4b7e1028dfd\nCreate Date: 2026-03-30 14:49:22.261748\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"503883791c39\"\ndown_revision = \"b4b7e1028dfd\"\nbranch_labels: str | None = None\ndepends_on: str | Sequence[str] | None = None\n\nuser_table = sa.table(\n    \"user\",\n    sa.column(\"id\", sa.Uuid),\n    sa.column(\"effective_permissions\", postgresql.JSONB),\n)\n\nuser_user_group = sa.table(\n    \"user__user_group\",\n    sa.column(\"user_id\", sa.Uuid),\n    sa.column(\"user_group_id\", sa.Integer),\n)\n\npermission_grant = sa.table(\n    \"permission_grant\",\n    sa.column(\"group_id\", sa.Integer),\n    sa.column(\"permission\", sa.String),\n    sa.column(\"is_deleted\", sa.Boolean),\n)\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"effective_permissions\",\n            postgresql.JSONB(),\n            nullable=False,\n            server_default=sa.text(\"'[]'::jsonb\"),\n        ),\n    )\n\n    conn = op.get_bind()\n\n    # Deduplicated permissions per user\n    deduped = (\n        sa.select(\n            user_user_group.c.user_id,\n            permission_grant.c.permission,\n        )\n        .select_from(\n            user_user_group.join(\n                permission_grant,\n                sa.and_(\n                    permission_grant.c.group_id == user_user_group.c.user_group_id,\n                    permission_grant.c.is_deleted == sa.false(),\n                ),\n            )\n        )\n        .distinct()\n        .subquery(\"deduped\")\n    )\n\n    # Aggregate into JSONB array per user (order is not guaranteed;\n    # consumers read this as a set so ordering does not matter)\n    perms_per_user = (\n        sa.select(\n            deduped.c.user_id,\n            sa.func.jsonb_agg(\n                deduped.c.permission,\n                type_=postgresql.JSONB,\n            ).label(\"perms\"),\n        )\n        .group_by(deduped.c.user_id)\n        .subquery(\"sub\")\n    )\n\n    conn.execute(\n        user_table.update()\n        .where(user_table.c.id == perms_per_user.c.user_id)\n        .values(effective_permissions=perms_per_user.c.perms)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"effective_permissions\")\n"
  },
  {
    "path": "backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py",
    "content": "\"\"\"merge_default_assistants_into_unified\n\nRevision ID: 505c488f6662\nRevises: d09fc20a3c66\nCreate Date: 2025-09-09 19:00:56.816626\n\n\"\"\"\n\nimport json\nfrom typing import Any\nfrom typing import NamedTuple\nfrom uuid import UUID\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"505c488f6662\"\ndown_revision = \"d09fc20a3c66\"\nbranch_labels = None\ndepends_on = None\n\n# Constants for the unified assistant\nUNIFIED_ASSISTANT_NAME = \"Assistant\"\nUNIFIED_ASSISTANT_DESCRIPTION = (\n    \"Your AI assistant with search, web browsing, and image generation capabilities.\"\n)\nUNIFIED_ASSISTANT_NUM_CHUNKS = 25\nUNIFIED_ASSISTANT_DISPLAY_PRIORITY = 0\nUNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION = True\nUNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER = False\nUNIFIED_ASSISTANT_RECENCY_BIAS = \"AUTO\"  # NOTE: needs to be capitalized\nUNIFIED_ASSISTANT_CHUNKS_ABOVE = 0\nUNIFIED_ASSISTANT_CHUNKS_BELOW = 0\nUNIFIED_ASSISTANT_DATETIME_AWARE = True\n\n# NOTE: tool specific prompts are handled on the fly and automatically injected\n# into the prompt before passing to the LLM.\nDEFAULT_SYSTEM_PROMPT = \"\"\"\nYou are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the \\\nuser's intent, ask clarifying questions when needed, think step-by-step through complex problems, \\\nprovide clear and accurate answers, and proactively anticipate helpful follow-up information. Always \\\nprioritize being truthful, nuanced, insightful, and efficient.\nThe current date is [[CURRENT_DATETIME]]\n\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make \\\nyour responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, \\\nsymbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use Markdown horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\"\"\".strip()\n\n\nINSERT_DICT: dict[str, Any] = {\n    \"name\": UNIFIED_ASSISTANT_NAME,\n    \"description\": UNIFIED_ASSISTANT_DESCRIPTION,\n    \"system_prompt\": DEFAULT_SYSTEM_PROMPT,\n    \"num_chunks\": UNIFIED_ASSISTANT_NUM_CHUNKS,\n    \"display_priority\": UNIFIED_ASSISTANT_DISPLAY_PRIORITY,\n    \"llm_filter_extraction\": UNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION,\n    \"llm_relevance_filter\": UNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER,\n    \"recency_bias\": UNIFIED_ASSISTANT_RECENCY_BIAS,\n    \"chunks_above\": UNIFIED_ASSISTANT_CHUNKS_ABOVE,\n    \"chunks_below\": UNIFIED_ASSISTANT_CHUNKS_BELOW,\n    \"datetime_aware\": UNIFIED_ASSISTANT_DATETIME_AWARE,\n}\n\nGENERAL_ASSISTANT_ID = -1\nART_ASSISTANT_ID = -3\n\n\nclass UserRow(NamedTuple):\n    \"\"\"Typed representation of user row from database query.\"\"\"\n\n    id: UUID\n    chosen_assistants: list[int] | None\n    visible_assistants: list[int] | None\n    hidden_assistants: list[int] | None\n    pinned_assistants: list[int] | None\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Step 1: Create or update the unified assistant (ID 0)\n    search_assistant = conn.execute(\n        sa.text(\"SELECT * FROM persona WHERE id = 0\")\n    ).fetchone()\n\n    if search_assistant:\n        # Update existing Search assistant to be the unified assistant\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE persona\n                SET name = :name,\n                    description = :description,\n                    system_prompt = :system_prompt,\n                    num_chunks = :num_chunks,\n                    is_default_persona = true,\n                    is_visible = true,\n                    deleted = false,\n                    display_priority = :display_priority,\n                    llm_filter_extraction = :llm_filter_extraction,\n                    llm_relevance_filter = :llm_relevance_filter,\n                    recency_bias = :recency_bias,\n                    chunks_above = :chunks_above,\n                    chunks_below = :chunks_below,\n                    datetime_aware = :datetime_aware,\n                    starter_messages = null\n                WHERE id = 0\n            \"\"\"\n            ),\n            INSERT_DICT,\n        )\n    else:\n        # Create new unified assistant with ID 0\n        conn.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO persona (\n                    id, name, description, system_prompt, num_chunks,\n                    is_default_persona, is_visible, deleted, display_priority,\n                    llm_filter_extraction, llm_relevance_filter, recency_bias,\n                    chunks_above, chunks_below, datetime_aware, starter_messages,\n                    builtin_persona\n                ) VALUES (\n                    0, :name, :description, :system_prompt, :num_chunks,\n                    true, true, false, :display_priority, :llm_filter_extraction,\n                    :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,\n                    :datetime_aware, null, true\n                )\n            \"\"\"\n            ),\n            INSERT_DICT,\n        )\n\n    # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET deleted = true, is_visible = false, is_default_persona = false\n            WHERE builtin_persona = true AND id != 0\n        \"\"\"\n        )\n    )\n\n    # Step 3: Add all built-in tools to the unified assistant\n    # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool\n    search_tool = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'\")\n    ).fetchone()\n\n    if not search_tool:\n        raise ValueError(\n            \"SearchTool not found in database. Ensure tools migration has run first.\"\n        )\n\n    image_gen_tool = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'\")\n    ).fetchone()\n\n    if not image_gen_tool:\n        raise ValueError(\n            \"ImageGenerationTool not found in database. Ensure tools migration has run first.\"\n        )\n\n    # WebSearchTool is optional - may not be configured\n    web_search_tool = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'\")\n    ).fetchone()\n\n    # Clear existing tool associations for persona 0\n    conn.execute(sa.text(\"DELETE FROM persona__tool WHERE persona_id = 0\"))\n\n    # Add tools to the unified assistant\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO persona__tool (persona_id, tool_id)\n            VALUES (0, :tool_id)\n            ON CONFLICT DO NOTHING\n        \"\"\"\n        ),\n        {\"tool_id\": search_tool[0]},\n    )\n\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO persona__tool (persona_id, tool_id)\n            VALUES (0, :tool_id)\n            ON CONFLICT DO NOTHING\n        \"\"\"\n        ),\n        {\"tool_id\": image_gen_tool[0]},\n    )\n\n    if web_search_tool:\n        conn.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO persona__tool (persona_id, tool_id)\n                VALUES (0, :tool_id)\n                ON CONFLICT DO NOTHING\n            \"\"\"\n            ),\n            {\"tool_id\": web_search_tool[0]},\n        )\n\n    # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE chat_session\n            SET persona_id = 0\n            WHERE persona_id IN (\n                SELECT id FROM persona WHERE builtin_persona = true AND id != 0\n            )\n        \"\"\"\n        )\n    )\n\n    # Step 5: Migrate user preferences - remove references to all builtin assistants\n    # First, get all builtin assistant IDs (except 0)\n    builtin_assistants_result = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id FROM persona\n            WHERE builtin_persona = true AND id != 0\n        \"\"\"\n        )\n    ).fetchall()\n    builtin_assistant_ids = [row[0] for row in builtin_assistants_result]\n\n    # Get all users with preferences\n    users_result = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, chosen_assistants, visible_assistants,\n                   hidden_assistants, pinned_assistants\n            FROM \"user\"\n        \"\"\"\n        )\n    ).fetchall()\n\n    for user_row in users_result:\n        user = UserRow(*user_row)\n        user_id: UUID = user.id\n        updates: dict[str, Any] = {}\n\n        # Remove all builtin assistants from chosen_assistants\n        if user.chosen_assistants:\n            new_chosen: list[int] = [\n                assistant_id\n                for assistant_id in user.chosen_assistants\n                if assistant_id not in builtin_assistant_ids\n            ]\n            if new_chosen != user.chosen_assistants:\n                updates[\"chosen_assistants\"] = json.dumps(new_chosen)\n\n        # Remove all builtin assistants from visible_assistants\n        if user.visible_assistants:\n            new_visible: list[int] = [\n                assistant_id\n                for assistant_id in user.visible_assistants\n                if assistant_id not in builtin_assistant_ids\n            ]\n            if new_visible != user.visible_assistants:\n                updates[\"visible_assistants\"] = json.dumps(new_visible)\n\n        # Add all builtin assistants to hidden_assistants\n        if user.hidden_assistants:\n            new_hidden: list[int] = list(user.hidden_assistants)\n            for old_id in builtin_assistant_ids:\n                if old_id not in new_hidden:\n                    new_hidden.append(old_id)\n            if new_hidden != user.hidden_assistants:\n                updates[\"hidden_assistants\"] = json.dumps(new_hidden)\n        else:\n            updates[\"hidden_assistants\"] = json.dumps(builtin_assistant_ids)\n\n        # Remove all builtin assistants from pinned_assistants\n        if user.pinned_assistants:\n            new_pinned: list[int] = [\n                assistant_id\n                for assistant_id in user.pinned_assistants\n                if assistant_id not in builtin_assistant_ids\n            ]\n            if new_pinned != user.pinned_assistants:\n                updates[\"pinned_assistants\"] = json.dumps(new_pinned)\n\n        # Apply updates if any\n        if updates:\n            set_clause = \", \".join([f\"{k} = :{k}\" for k in updates.keys()])\n            updates[\"user_id\"] = str(user_id)  # Convert UUID to string for SQL\n            conn.execute(\n                sa.text(f'UPDATE \"user\" SET {set_clause} WHERE id = :user_id'),\n                updates,\n            )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n\n    # Only restore General (ID -1) and Art (ID -3) assistants\n    # Step 1: Keep Search assistant (ID 0) as default but restore original state\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET is_default_persona = true,\n                is_visible = true,\n                deleted = false\n            WHERE id = 0\n        \"\"\"\n        )\n    )\n\n    # Step 2: Restore General assistant (ID -1)\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET deleted = false,\n                is_visible = true,\n                is_default_persona = true\n            WHERE id = :general_assistant_id\n        \"\"\"\n        ),\n        {\"general_assistant_id\": GENERAL_ASSISTANT_ID},\n    )\n\n    # Step 3: Restore Art assistant (ID -3)\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET deleted = false,\n                is_visible = true,\n                is_default_persona = true\n            WHERE id = :art_assistant_id\n        \"\"\"\n        ),\n        {\"art_assistant_id\": ART_ASSISTANT_ID},\n    )\n\n    # Note: We don't restore the original tool associations, names, or descriptions\n    # as those would require more complex logic to determine original state.\n    # We also cannot restore original chat session persona_ids as we don't\n    # have the original mappings.\n    # Other builtin assistants remain deleted as per the requirement.\n"
  },
  {
    "path": "backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py",
    "content": "\"\"\"Add additional retrieval controls to Persona\n\nRevision ID: 50b683a8295c\nRevises: 7da0ae5ad583\nCreate Date: 2023-11-27 17:23:29.668422\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"50b683a8295c\"\ndown_revision = \"7da0ae5ad583\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"num_chunks\", sa.Integer(), nullable=True))\n    op.add_column(\n        \"persona\",\n        sa.Column(\"apply_llm_relevance_filter\", sa.Boolean(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"apply_llm_relevance_filter\")\n    op.drop_column(\"persona\", \"num_chunks\")\n"
  },
  {
    "path": "backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py",
    "content": "\"\"\"Add last synced and last modified to document table\n\nRevision ID: 52a219fb5233\nRevises: f7e58d357687\nCreate Date: 2024-08-28 17:40:46.077470\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.sql import func\n\n# revision identifiers, used by Alembic.\nrevision = \"52a219fb5233\"\ndown_revision = \"f7e58d357687\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # last modified represents the last time anything needing syncing to vespa changed\n    # including row metadata and the document itself. This obviously does not include\n    # the last_synced column.\n    op.add_column(\n        \"document\",\n        sa.Column(\n            \"last_modified\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=func.now(),\n        ),\n    )\n\n    # last synced represents the last time this document was synced to Vespa\n    op.add_column(\n        \"document\",\n        sa.Column(\"last_synced\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n    # Set last_synced to the same value as last_modified for existing rows\n    op.execute(\n        \"\"\"\n        UPDATE document\n        SET last_synced = last_modified\n        \"\"\"\n    )\n\n    op.create_index(\n        op.f(\"ix_document_last_modified\"),\n        \"document\",\n        [\"last_modified\"],\n        unique=False,\n    )\n\n    op.create_index(\n        op.f(\"ix_document_last_synced\"),\n        \"document\",\n        [\"last_synced\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_document_last_synced\"), table_name=\"document\")\n    op.drop_index(op.f(\"ix_document_last_modified\"), table_name=\"document\")\n    op.drop_column(\"document\", \"last_synced\")\n    op.drop_column(\"document\", \"last_modified\")\n"
  },
  {
    "path": "backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py",
    "content": "\"\"\"danswerbot -> onyxbot\n\nRevision ID: 54a74a0417fc\nRevises: 94dc3d0236f8\nCreate Date: 2024-12-11 18:05:05.490737\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"54a74a0417fc\"\ndown_revision = \"94dc3d0236f8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"chat_session\", \"danswerbot_flow\", new_column_name=\"onyxbot_flow\")\n\n\ndef downgrade() -> None:\n    op.alter_column(\"chat_session\", \"onyxbot_flow\", new_column_name=\"danswerbot_flow\")\n"
  },
  {
    "path": "backend/alembic/versions/55546a7967ee_assistant_rework.py",
    "content": "\"\"\"assistant_rework\n\nRevision ID: 55546a7967ee\nRevises: 61ff3651add4\nCreate Date: 2024-09-18 17:00:23.755399\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"55546a7967ee\"\ndown_revision = \"61ff3651add4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Reworking persona and user tables for new assistant features\n    # keep track of user's chosen assistants separate from their `ordering`\n    op.add_column(\"persona\", sa.Column(\"builtin_persona\", sa.Boolean(), nullable=True))\n    op.execute(\"UPDATE persona SET builtin_persona = default_persona\")\n    op.alter_column(\"persona\", \"builtin_persona\", nullable=False)\n    op.drop_index(\"_default_persona_name_idx\", table_name=\"persona\")\n    op.create_index(\n        \"_builtin_persona_name_idx\",\n        \"persona\",\n        [\"name\"],\n        unique=True,\n        postgresql_where=sa.text(\"builtin_persona = true\"),\n    )\n\n    op.add_column(\n        \"user\", sa.Column(\"visible_assistants\", postgresql.JSONB(), nullable=True)\n    )\n    op.add_column(\n        \"user\", sa.Column(\"hidden_assistants\", postgresql.JSONB(), nullable=True)\n    )\n    op.execute(\n        \"UPDATE \\\"user\\\" SET visible_assistants = '[]'::jsonb, hidden_assistants = '[]'::jsonb\"\n    )\n    op.alter_column(\n        \"user\",\n        \"visible_assistants\",\n        nullable=False,\n        server_default=sa.text(\"'[]'::jsonb\"),\n    )\n    op.alter_column(\n        \"user\",\n        \"hidden_assistants\",\n        nullable=False,\n        server_default=sa.text(\"'[]'::jsonb\"),\n    )\n    op.drop_column(\"persona\", \"default_persona\")\n    op.add_column(\n        \"persona\", sa.Column(\"is_default_persona\", sa.Boolean(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    # Reverting changes made in upgrade\n    op.drop_column(\"user\", \"hidden_assistants\")\n    op.drop_column(\"user\", \"visible_assistants\")\n    op.drop_index(\"_builtin_persona_name_idx\", table_name=\"persona\")\n\n    op.drop_column(\"persona\", \"is_default_persona\")\n    op.add_column(\"persona\", sa.Column(\"default_persona\", sa.Boolean(), nullable=True))\n    op.execute(\"UPDATE persona SET default_persona = builtin_persona\")\n    op.alter_column(\"persona\", \"default_persona\", nullable=False)\n    op.drop_column(\"persona\", \"builtin_persona\")\n    op.create_index(\n        \"_default_persona_name_idx\",\n        \"persona\",\n        [\"name\"],\n        unique=True,\n        postgresql_where=sa.text(\"default_persona = true\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/570282d33c49_track_onyxbot_explicitly.py",
    "content": "\"\"\"Track Onyxbot Explicitly\n\nRevision ID: 570282d33c49\nRevises: 7547d982db8f\nCreate Date: 2024-05-04 17:49:28.568109\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"570282d33c49\"\ndown_revision = \"7547d982db8f\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_session\", sa.Column(\"danswerbot_flow\", sa.Boolean(), nullable=True)\n    )\n    op.execute(\"UPDATE chat_session SET danswerbot_flow = one_shot\")\n    op.alter_column(\"chat_session\", \"danswerbot_flow\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"danswerbot_flow\")\n"
  },
  {
    "path": "backend/alembic/versions/57122d037335_add_python_tool_on_default.py",
    "content": "\"\"\"add python tool on default\n\nRevision ID: 57122d037335\nRevises: c0c937d5c9e5\nCreate Date: 2026-02-27 10:10:40.124925\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"57122d037335\"\ndown_revision = \"c0c937d5c9e5\"\nbranch_labels = None\ndepends_on = None\n\n\nPYTHON_TOOL_NAME = \"python\"\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Look up the PythonTool id\n    result = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE name = :name\"),\n        {\"name\": PYTHON_TOOL_NAME},\n    ).fetchone()\n\n    if not result:\n        return\n\n    tool_id = result[0]\n\n    # Attach to the default persona (id=0) if not already attached\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO persona__tool (persona_id, tool_id)\n            VALUES (0, :tool_id)\n            ON CONFLICT DO NOTHING\n            \"\"\"\n        ),\n        {\"tool_id\": tool_id},\n    )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n\n    result = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE name = :name\"),\n        {\"name\": PYTHON_TOOL_NAME},\n    ).fetchone()\n\n    if not result:\n        return\n\n    conn.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM persona__tool\n            WHERE persona_id = 0 AND tool_id = :tool_id\n            \"\"\"\n        ),\n        {\"tool_id\": result[0]},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/57b53544726e_add_document_set_tables.py",
    "content": "\"\"\"Add document set tables\n\nRevision ID: 57b53544726e\nRevises: 800f48024ae9\nCreate Date: 2023-09-20 16:59:39.097177\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"57b53544726e\"\ndown_revision = \"800f48024ae9\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"document_set\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.String(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"is_up_to_date\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n    op.create_table(\n        \"document_set__connector_credential_pair\",\n        sa.Column(\"document_set_id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=False),\n        sa.Column(\"is_current\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"connector_credential_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_set_id\"],\n            [\"document_set.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\n            \"document_set_id\", \"connector_credential_pair_id\", \"is_current\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"document_set__connector_credential_pair\")\n    op.drop_table(\"document_set\")\n"
  },
  {
    "path": "backend/alembic/versions/5809c0787398_add_chat_sessions.py",
    "content": "\"\"\"Add Chat Sessions\n\nRevision ID: 5809c0787398\nRevises: d929f0c1c6af\nCreate Date: 2023-09-04 15:29:44.002164\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5809c0787398\"\ndown_revision = \"d929f0c1c6af\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"chat_session\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"description\", sa.Text(), nullable=False),\n        sa.Column(\"deleted\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"chat_message\",\n        sa.Column(\"chat_session_id\", sa.Integer(), nullable=False),\n        sa.Column(\"message_number\", sa.Integer(), nullable=False),\n        sa.Column(\"edit_number\", sa.Integer(), nullable=False),\n        sa.Column(\"parent_edit_number\", sa.Integer(), nullable=True),\n        sa.Column(\"latest\", sa.Boolean(), nullable=False),\n        sa.Column(\"message\", sa.Text(), nullable=False),\n        sa.Column(\n            \"message_type\",\n            sa.Enum(\n                \"SYSTEM\",\n                \"USER\",\n                \"ASSISTANT\",\n                \"DANSWER\",\n                name=\"messagetype\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_sent\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"chat_session_id\"],\n            [\"chat_session.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"chat_session_id\", \"message_number\", \"edit_number\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"chat_message\")\n    op.drop_table(\"chat_session\")\n"
  },
  {
    "path": "backend/alembic/versions/58c50ef19f08_add_stale_column_to_user__external_user_.py",
    "content": "\"\"\"add stale column to external user group tables\n\nRevision ID: 58c50ef19f08\nRevises: 7b9b952abdf6\nCreate Date: 2025-06-25 14:08:14.162380\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"58c50ef19f08\"\ndown_revision = \"7b9b952abdf6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add the stale column with default value False to user__external_user_group_id\n    op.add_column(\n        \"user__external_user_group_id\",\n        sa.Column(\"stale\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n\n    # Create index for efficient querying of stale rows by cc_pair_id\n    op.create_index(\n        \"ix_user__external_user_group_id_cc_pair_id_stale\",\n        \"user__external_user_group_id\",\n        [\"cc_pair_id\", \"stale\"],\n        unique=False,\n    )\n\n    # Create index for efficient querying of all stale rows\n    op.create_index(\n        \"ix_user__external_user_group_id_stale\",\n        \"user__external_user_group_id\",\n        [\"stale\"],\n        unique=False,\n    )\n\n    # Add the stale column with default value False to public_external_user_group\n    op.add_column(\n        \"public_external_user_group\",\n        sa.Column(\"stale\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n\n    # Create index for efficient querying of stale rows by cc_pair_id\n    op.create_index(\n        \"ix_public_external_user_group_cc_pair_id_stale\",\n        \"public_external_user_group\",\n        [\"cc_pair_id\", \"stale\"],\n        unique=False,\n    )\n\n    # Create index for efficient querying of all stale rows\n    op.create_index(\n        \"ix_public_external_user_group_stale\",\n        \"public_external_user_group\",\n        [\"stale\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    # Drop the indices for public_external_user_group first\n    op.drop_index(\n        \"ix_public_external_user_group_stale\", table_name=\"public_external_user_group\"\n    )\n    op.drop_index(\n        \"ix_public_external_user_group_cc_pair_id_stale\",\n        table_name=\"public_external_user_group\",\n    )\n\n    # Drop the stale column from public_external_user_group\n    op.drop_column(\"public_external_user_group\", \"stale\")\n\n    # Drop the indices for user__external_user_group_id\n    op.drop_index(\n        \"ix_user__external_user_group_id_stale\",\n        table_name=\"user__external_user_group_id\",\n    )\n    op.drop_index(\n        \"ix_user__external_user_group_id_cc_pair_id_stale\",\n        table_name=\"user__external_user_group_id\",\n    )\n\n    # Drop the stale column from user__external_user_group_id\n    op.drop_column(\"user__external_user_group_id\", \"stale\")\n"
  },
  {
    "path": "backend/alembic/versions/5ae8240accb3_add_research_agent_database_tables_and_.py",
    "content": "\"\"\"add research agent database tables and chat message research fields\n\nRevision ID: 5ae8240accb3\nRevises: b558f51620b4\nCreate Date: 2025-08-06 14:29:24.691388\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5ae8240accb3\"\ndown_revision = \"b558f51620b4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add research_type and research_plan columns to chat_message table\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"research_type\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"research_plan\", postgresql.JSONB(), nullable=True),\n    )\n\n    # Create research_agent_iteration table\n    op.create_table(\n        \"research_agent_iteration\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\n            \"primary_question_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"chat_message.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\"iteration_nr\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\"purpose\", sa.String(), nullable=True),\n        sa.Column(\"reasoning\", sa.String(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\n            \"primary_question_id\",\n            \"iteration_nr\",\n            name=\"_research_agent_iteration_unique_constraint\",\n        ),\n    )\n\n    # Create research_agent_iteration_sub_step table\n    op.create_table(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\n            \"primary_question_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"chat_message.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"parent_question_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"research_agent_iteration_sub_step.id\", ondelete=\"CASCADE\"),\n            nullable=True,\n        ),\n        sa.Column(\"iteration_nr\", sa.Integer(), nullable=False),\n        sa.Column(\"iteration_sub_step_nr\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\"sub_step_instructions\", sa.String(), nullable=True),\n        sa.Column(\n            \"sub_step_tool_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"tool.id\"),\n            nullable=True,\n        ),\n        sa.Column(\"reasoning\", sa.String(), nullable=True),\n        sa.Column(\"sub_answer\", sa.String(), nullable=True),\n        sa.Column(\"cited_doc_results\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"claims\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"generated_images\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"additional_data\", postgresql.JSONB(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.ForeignKeyConstraint(\n            [\"primary_question_id\", \"iteration_nr\"],\n            [\n                \"research_agent_iteration.primary_question_id\",\n                \"research_agent_iteration.iteration_nr\",\n            ],\n            ondelete=\"CASCADE\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Drop tables in reverse order\n    op.drop_table(\"research_agent_iteration_sub_step\")\n    op.drop_table(\"research_agent_iteration\")\n\n    # Remove columns from chat_message table\n    op.drop_column(\"chat_message\", \"research_plan\")\n    op.drop_column(\"chat_message\", \"research_type\")\n"
  },
  {
    "path": "backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py",
    "content": "\"\"\"nullable search settings for historic index attempts\n\nRevision ID: 5b29123cd710\nRevises: 949b4a92a401\nCreate Date: 2024-10-30 19:37:59.630704\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5b29123cd710\"\ndown_revision = \"949b4a92a401\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Drop the existing foreign key constraint\n    op.drop_constraint(\n        \"fk_index_attempt_search_settings\", \"index_attempt\", type_=\"foreignkey\"\n    )\n\n    # Modify the column to be nullable\n    op.alter_column(\n        \"index_attempt\", \"search_settings_id\", existing_type=sa.INTEGER(), nullable=True\n    )\n\n    # Add back the foreign key with ON DELETE SET NULL\n    op.create_foreign_key(\n        \"fk_index_attempt_search_settings\",\n        \"index_attempt\",\n        \"search_settings\",\n        [\"search_settings_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n\ndef downgrade() -> None:\n    # Warning: This will delete all index attempts that don't have search settings\n    op.execute(\n        \"\"\"\n        DELETE FROM index_attempt\n        WHERE search_settings_id IS NULL\n    \"\"\"\n    )\n\n    # Drop foreign key constraint\n    op.drop_constraint(\n        \"fk_index_attempt_search_settings\", \"index_attempt\", type_=\"foreignkey\"\n    )\n\n    # Modify the column to be not nullable\n    op.alter_column(\n        \"index_attempt\",\n        \"search_settings_id\",\n        existing_type=sa.INTEGER(),\n        nullable=False,\n    )\n\n    # Add back the foreign key without ON DELETE SET NULL\n    op.create_foreign_key(\n        \"fk_index_attempt_search_settings\",\n        \"index_attempt\",\n        \"search_settings\",\n        [\"search_settings_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py",
    "content": "\"\"\"backend driven notification details\n\nRevision ID: 5c3dca366b35\nRevises: 9087b548dd69\nCreate Date: 2026-01-06 16:03:11.413724\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5c3dca366b35\"\ndown_revision = \"9087b548dd69\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"notification\",\n        sa.Column(\n            \"title\", sa.String(), nullable=False, server_default=\"New Notification\"\n        ),\n    )\n    op.add_column(\n        \"notification\",\n        sa.Column(\"description\", sa.String(), nullable=True, server_default=\"\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"notification\", \"title\")\n    op.drop_column(\"notification\", \"description\")\n"
  },
  {
    "path": "backend/alembic/versions/5c448911b12f_add_content_type_to_userfile.py",
    "content": "\"\"\"Add content type to UserFile\n\nRevision ID: 5c448911b12f\nRevises: 47a07e1a38f1\nCreate Date: 2025-04-25 16:59:48.182672\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"5c448911b12f\"\ndown_revision = \"47a07e1a38f1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"user_file\", sa.Column(\"content_type\", sa.String(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user_file\", \"content_type\")\n"
  },
  {
    "path": "backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py",
    "content": "\"\"\"match_any_keywords flag for standard answers\n\nRevision ID: 5c7fdadae813\nRevises: efb35676026c\nCreate Date: 2024-09-13 18:52:59.256478\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"5c7fdadae813\"\ndown_revision = \"efb35676026c\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.add_column(\n        \"standard_answer\",\n        sa.Column(\n            \"match_any_keywords\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n    # ### end Alembic commands ###\n\n\ndef downgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.drop_column(\"standard_answer\", \"match_any_keywords\")\n    # ### end Alembic commands ###\n"
  },
  {
    "path": "backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py",
    "content": "\"\"\"add api_version and deployment_name to search settings\n\nRevision ID: 5d12a446f5c0\nRevises: e4334d5b33ba\nCreate Date: 2024-10-08 15:56:07.975636\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5d12a446f5c0\"\ndown_revision = \"e4334d5b33ba\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"embedding_provider\", sa.Column(\"api_version\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"embedding_provider\", sa.Column(\"deployment_name\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"embedding_provider\", \"deployment_name\")\n    op.drop_column(\"embedding_provider\", \"api_version\")\n"
  },
  {
    "path": "backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py",
    "content": "\"\"\"add_personal_access_token_table\n\nRevision ID: 5e1c073d48a3\nRevises: 09995b8811eb\nCreate Date: 2025-10-30 17:30:24.308521\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5e1c073d48a3\"\ndown_revision = \"09995b8811eb\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create personal_access_token table\n    op.create_table(\n        \"personal_access_token\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"hashed_token\", sa.String(length=64), nullable=False),\n        sa.Column(\"token_display\", sa.String(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            postgresql.UUID(as_uuid=True),\n            nullable=False,\n        ),\n        sa.Column(\n            \"expires_at\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"last_used_at\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n        sa.Column(\n            \"is_revoked\",\n            sa.Boolean(),\n            server_default=sa.text(\"false\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"hashed_token\"),\n    )\n\n    # Create indexes\n    op.create_index(\n        \"ix_personal_access_token_expires_at\",\n        \"personal_access_token\",\n        [\"expires_at\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_pat_user_created\",\n        \"personal_access_token\",\n        [\"user_id\", sa.text(\"created_at DESC\")],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    # Drop indexes first\n    op.drop_index(\"ix_pat_user_created\", table_name=\"personal_access_token\")\n    op.drop_index(\n        \"ix_personal_access_token_expires_at\", table_name=\"personal_access_token\"\n    )\n\n    # Drop table\n    op.drop_table(\"personal_access_token\")\n"
  },
  {
    "path": "backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py",
    "content": "\"\"\"update_default_persona_prompt\n\nRevision ID: 5e6f7a8b9c0d\nRevises: 4f8a2b3c1d9e\nCreate Date: 2025-11-30 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5e6f7a8b9c0d\"\ndown_revision = \"4f8a2b3c1d9e\"\nbranch_labels = None\ndepends_on = None\n\n\nDEFAULT_PERSONA_ID = 0\n\n# ruff: noqa: E501, W605 start\nDEFAULT_SYSTEM_PROMPT = \"\"\"\nYou are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.\n\nThe current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}\n\n# Response Style\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\"\"\".lstrip()\n# ruff: noqa: E501, W605 end\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET system_prompt = :system_prompt\n            WHERE id = :persona_id\n            \"\"\"\n        ),\n        {\"system_prompt\": DEFAULT_SYSTEM_PROMPT, \"persona_id\": DEFAULT_PERSONA_ID},\n    )\n\n\ndef downgrade() -> None:\n    # We don't revert the system prompt on downgrade since we don't know\n    # what the previous value was. The new prompt is a reasonable default.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py",
    "content": "\"\"\"Add docs_indexed_column + time_started to index_attempt table\n\nRevision ID: 5e84129c8be3\nRevises: e6a4bbc13fe4\nCreate Date: 2023-08-10 21:43:09.069523\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"5e84129c8be3\"\ndown_revision = \"e6a4bbc13fe4\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"num_docs_indexed\", sa.Integer()),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"time_started\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"index_attempt\", \"time_started\")\n    op.drop_column(\"index_attempt\", \"num_docs_indexed\")\n"
  },
  {
    "path": "backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py",
    "content": "\"\"\"add removed documents to index_attempt\n\nRevision ID: 5f4b8568a221\nRevises: dbaa756c2ccf\nCreate Date: 2024-02-16 15:02:03.319907\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"5f4b8568a221\"\ndown_revision = \"8987770549c0\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"docs_removed_from_index\", sa.Integer()),\n    )\n    op.execute(\"UPDATE index_attempt SET docs_removed_from_index = 0\")\n\n\ndef downgrade() -> None:\n    op.drop_column(\"index_attempt\", \"docs_removed_from_index\")\n"
  },
  {
    "path": "backend/alembic/versions/5fc1f54cc252_hybrid_enum.py",
    "content": "\"\"\"hybrid-enum\n\nRevision ID: 5fc1f54cc252\nRevises: 1d6ad76d1f37\nCreate Date: 2024-08-06 15:35:40.278485\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"5fc1f54cc252\"\ndown_revision = \"1d6ad76d1f37\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"persona\", \"search_type\")\n\n\ndef downgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"search_type\", sa.String(), nullable=True))\n    op.execute(\"UPDATE persona SET search_type = 'SEMANTIC'\")\n    op.alter_column(\"persona\", \"search_type\", nullable=False)\n"
  },
  {
    "path": "backend/alembic/versions/61ff3651add4_add_permission_syncing.py",
    "content": "\"\"\"Add Permission Syncing\n\nRevision ID: 61ff3651add4\nRevises: 1b8206b29c5d\nCreate Date: 2024-09-05 13:57:11.770413\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"61ff3651add4\"\ndown_revision = \"1b8206b29c5d\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Admin user who set up connectors will lose access to the docs temporarily\n    # only way currently to give back access is to rerun from beginning\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"access_type\",\n            sa.String(),\n            nullable=True,\n        ),\n    )\n    op.execute(\n        \"UPDATE connector_credential_pair SET access_type = 'PUBLIC' WHERE is_public = true\"\n    )\n    op.execute(\n        \"UPDATE connector_credential_pair SET access_type = 'PRIVATE' WHERE is_public = false\"\n    )\n    op.alter_column(\"connector_credential_pair\", \"access_type\", nullable=False)\n\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"auto_sync_options\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"last_time_perm_sync\", sa.DateTime(timezone=True), nullable=True),\n    )\n    op.drop_column(\"connector_credential_pair\", \"is_public\")\n\n    op.add_column(\n        \"document\",\n        sa.Column(\"external_user_emails\", postgresql.ARRAY(sa.String()), nullable=True),\n    )\n    op.add_column(\n        \"document\",\n        sa.Column(\n            \"external_user_group_ids\", postgresql.ARRAY(sa.String()), nullable=True\n        ),\n    )\n    op.add_column(\n        \"document\",\n        sa.Column(\"is_public\", sa.Boolean(), nullable=True),\n    )\n\n    op.create_table(\n        \"user__external_user_group_id\",\n        sa.Column(\n            \"user_id\", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False\n        ),\n        sa.Column(\"external_user_group_id\", sa.String(), nullable=False),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=False),\n        sa.PrimaryKeyConstraint(\"user_id\"),\n    )\n\n    op.drop_column(\"external_permission\", \"user_id\")\n    op.drop_column(\"email_to_external_user_cache\", \"user_id\")\n    op.drop_table(\"permission_sync_run\")\n    op.drop_table(\"external_permission\")\n    op.drop_table(\"email_to_external_user_cache\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"is_public\", sa.BOOLEAN(), nullable=True),\n    )\n    op.execute(\n        \"UPDATE connector_credential_pair SET is_public = (access_type = 'PUBLIC')\"\n    )\n    op.alter_column(\"connector_credential_pair\", \"is_public\", nullable=False)\n\n    op.drop_column(\"connector_credential_pair\", \"auto_sync_options\")\n    op.drop_column(\"connector_credential_pair\", \"access_type\")\n    op.drop_column(\"connector_credential_pair\", \"last_time_perm_sync\")\n    op.drop_column(\"document\", \"external_user_emails\")\n    op.drop_column(\"document\", \"external_user_group_ids\")\n    op.drop_column(\"document\", \"is_public\")\n\n    op.drop_table(\"user__external_user_group_id\")\n\n    # Drop the enum type at the end of the downgrade\n    op.create_table(\n        \"permission_sync_run\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"source_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"update_type\", sa.String(), nullable=False),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"status\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"error_msg\", sa.Text(), nullable=True),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"cc_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"external_permission\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", sa.UUID(), nullable=True),\n        sa.Column(\"user_email\", sa.String(), nullable=False),\n        sa.Column(\n            \"source_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"external_permission_group\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"email_to_external_user_cache\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"external_user_id\", sa.String(), nullable=False),\n        sa.Column(\"user_id\", sa.UUID(), nullable=True),\n        sa.Column(\"user_email\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/62c3a055a141_add_file_names_to_file_connector_config.py",
    "content": "\"\"\"add file names to file connector config\n\nRevision ID: 62c3a055a141\nRevises: 3fc5d75723b3\nCreate Date: 2025-07-30 17:01:24.417551\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport json\nimport os\nimport logging\n\n\n# revision identifiers, used by Alembic.\nrevision = \"62c3a055a141\"\ndown_revision = \"3fc5d75723b3\"\nbranch_labels = None\ndepends_on = None\n\nSKIP_FILE_NAME_MIGRATION = (\n    os.environ.get(\"SKIP_FILE_NAME_MIGRATION\", \"true\").lower() == \"true\"\n)\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n\ndef upgrade() -> None:\n    if SKIP_FILE_NAME_MIGRATION:\n        logger.info(\n            \"Skipping file name migration. Hint: set SKIP_FILE_NAME_MIGRATION=false to run this migration\"\n        )\n        return\n    logger.info(\"Running file name migration\")\n    # Get connection\n    conn = op.get_bind()\n\n    # Get all FILE connectors with their configs\n    file_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = 'FILE'\n        \"\"\"\n        )\n    ).fetchall()\n\n    for connector_id, config in file_connectors:\n        # Parse config if it's a string\n        if isinstance(config, str):\n            config = json.loads(config)\n\n        # Get file_locations list\n        file_locations = config.get(\"file_locations\", [])\n\n        # Get display names for each file_id\n        file_names = []\n        for file_id in file_locations:\n            result = conn.execute(\n                sa.text(\n                    \"\"\"\n                    SELECT display_name\n                    FROM file_record\n                    WHERE file_id = :file_id\n                \"\"\"\n                ),\n                {\"file_id\": file_id},\n            ).fetchone()\n\n            if result:\n                file_names.append(result[0])\n            else:\n                file_names.append(file_id)  # Should not happen\n\n        # Add file_names to config\n        new_config = dict(config)\n        new_config[\"file_names\"] = file_names\n\n        # Update the connector\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE connector\n                SET connector_specific_config = :new_config\n                WHERE id = :connector_id\n            \"\"\"\n            ),\n            {\"connector_id\": connector_id, \"new_config\": json.dumps(new_config)},\n        )\n\n\ndef downgrade() -> None:\n    # Get connection\n    conn = op.get_bind()\n\n    # Remove file_names from all FILE connectors\n    file_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = 'FILE'\n        \"\"\"\n        )\n    ).fetchall()\n\n    for connector_id, config in file_connectors:\n        # Parse config if it's a string\n        if isinstance(config, str):\n            config = json.loads(config)\n\n        # Remove file_names if it exists\n        if \"file_names\" in config:\n            new_config = dict(config)\n            del new_config[\"file_names\"]\n\n            # Update the connector\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    UPDATE connector\n                    SET connector_specific_config = :new_config\n                    WHERE id = :connector_id\n                \"\"\"\n                ),\n                {\n                    \"connector_id\": connector_id,\n                    \"new_config\": json.dumps(new_config),\n                },\n            )\n"
  },
  {
    "path": "backend/alembic/versions/631fd2504136_add_approx_chunk_count_in_vespa_to_.py",
    "content": "\"\"\"add approx_chunk_count_in_vespa to opensearch tenant migration\n\nRevision ID: 631fd2504136\nRevises: c7f2e1b4a9d3\nCreate Date: 2026-02-18 21:07:52.831215\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"631fd2504136\"\ndown_revision = \"c7f2e1b4a9d3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"approx_chunk_count_in_vespa\",\n            sa.Integer(),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"opensearch_tenant_migration_record\", \"approx_chunk_count_in_vespa\")\n"
  },
  {
    "path": "backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py",
    "content": "\"\"\"add_created_at_in_project_userfile\n\nRevision ID: 6436661d5b65\nRevises: c7e9f4a3b2d1\nCreate Date: 2025-11-24 11:50:24.536052\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"6436661d5b65\"\ndown_revision = \"c7e9f4a3b2d1\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add created_at column to project__user_file table\n    op.add_column(\n        \"project__user_file\",\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n    )\n    # Add composite index on (project_id, created_at DESC)\n    op.create_index(\n        \"ix_project__user_file_project_id_created_at\",\n        \"project__user_file\",\n        [\"project_id\", sa.text(\"created_at DESC\")],\n    )\n\n\ndef downgrade() -> None:\n    # Remove composite index on (project_id, created_at)\n    op.drop_index(\n        \"ix_project__user_file_project_id_created_at\", table_name=\"project__user_file\"\n    )\n    # Remove created_at column from project__user_file table\n    op.drop_column(\"project__user_file\", \"created_at\")\n"
  },
  {
    "path": "backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py",
    "content": "\"\"\"Add user-configured names to LLMProvider\n\nRevision ID: 643a84a42a33\nRevises: 0a98909f2757\nCreate Date: 2024-05-07 14:54:55.493100\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"643a84a42a33\"\ndown_revision = \"0a98909f2757\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"llm_provider\", sa.Column(\"provider\", sa.String(), nullable=True))\n    # move \"name\" -> \"provider\" to match the new schema\n    op.execute(\"UPDATE llm_provider SET provider = name\")\n    # pretty up display name\n    op.execute(\"UPDATE llm_provider SET name = 'OpenAI' WHERE name = 'openai'\")\n    op.execute(\"UPDATE llm_provider SET name = 'Anthropic' WHERE name = 'anthropic'\")\n    op.execute(\"UPDATE llm_provider SET name = 'Azure OpenAI' WHERE name = 'azure'\")\n    op.execute(\"UPDATE llm_provider SET name = 'AWS Bedrock' WHERE name = 'bedrock'\")\n\n    # update personas to use the new provider names\n    op.execute(\n        \"UPDATE persona SET llm_model_provider_override = 'OpenAI' WHERE llm_model_provider_override = 'openai'\"\n    )\n    op.execute(\n        \"UPDATE persona SET llm_model_provider_override = 'Anthropic' WHERE llm_model_provider_override = 'anthropic'\"\n    )\n    op.execute(\n        \"UPDATE persona SET llm_model_provider_override = 'Azure OpenAI' WHERE llm_model_provider_override = 'azure'\"\n    )\n    op.execute(\n        \"UPDATE persona SET llm_model_provider_override = 'AWS Bedrock' WHERE llm_model_provider_override = 'bedrock'\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\"UPDATE llm_provider SET name = provider\")\n    op.drop_column(\"llm_provider\", \"provider\")\n"
  },
  {
    "path": "backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py",
    "content": "\"\"\"Add image input support to model config\n\nRevision ID: 64bd5677aeb6\nRevises: b30353be4eec\nCreate Date: 2025-09-28 15:48:12.003612\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"64bd5677aeb6\"\ndown_revision = \"b30353be4eec\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"model_configuration\",\n        sa.Column(\"supports_image_input\", sa.Boolean(), nullable=True),\n    )\n\n    # Seems to be left over from when model visibility was introduced and a nullable field.\n    # Set any null is_visible values to False\n    connection = op.get_bind()\n    connection.execute(\n        sa.text(\n            \"UPDATE model_configuration SET is_visible = false WHERE is_visible IS NULL\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"model_configuration\", \"supports_image_input\")\n"
  },
  {
    "path": "backend/alembic/versions/65bc6e0f8500_remove_kg_subtype_from_db.py",
    "content": "\"\"\"remove kg subtype from db\n\nRevision ID: 65bc6e0f8500\nRevises: cec7ec36c505\nCreate Date: 2025-06-13 10:04:27.705976\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"65bc6e0f8500\"\ndown_revision = \"cec7ec36c505\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"kg_entity\", \"entity_class\")\n    op.drop_column(\"kg_entity\", \"entity_subtype\")\n    op.drop_column(\"kg_entity_extraction_staging\", \"entity_class\")\n    op.drop_column(\"kg_entity_extraction_staging\", \"entity_subtype\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"kg_entity_extraction_staging\",\n        sa.Column(\"entity_subtype\", sa.String(), nullable=True, index=True),\n    )\n    op.add_column(\n        \"kg_entity_extraction_staging\",\n        sa.Column(\"entity_class\", sa.String(), nullable=True, index=True),\n    )\n    op.add_column(\n        \"kg_entity\", sa.Column(\"entity_subtype\", sa.String(), nullable=True, index=True)\n    )\n    op.add_column(\n        \"kg_entity\", sa.Column(\"entity_class\", sa.String(), nullable=True, index=True)\n    )\n"
  },
  {
    "path": "backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py",
    "content": "\"\"\"Migrate chat_session and chat_message tables to use UUID primary keys\n\nRevision ID: 6756efa39ada\nRevises: 5d12a446f5c0\nCreate Date: 2024-10-15 17:47:44.108537\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nrevision = \"6756efa39ada\"\ndown_revision = \"5d12a446f5c0\"\nbranch_labels = None\ndepends_on = None\n\n\"\"\"\nThis script:\n1. Adds UUID columns to chat_session and chat_message\n2. Populates new columns with UUIDs\n3. Updates foreign key relationships\n4. Removes old integer ID columns\n\nNote: Downgrade will assign new integer IDs, not restore original ones.\n\"\"\"\n\n\ndef upgrade() -> None:\n    op.execute(\"CREATE EXTENSION IF NOT EXISTS pgcrypto;\")\n\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\n            \"new_id\",\n            sa.UUID(as_uuid=True),\n            server_default=sa.text(\"gen_random_uuid()\"),\n            nullable=False,\n        ),\n    )\n\n    op.execute(\"UPDATE chat_session SET new_id = gen_random_uuid();\")\n\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"new_chat_session_id\", sa.UUID(as_uuid=True), nullable=True),\n    )\n\n    op.execute(\n        \"\"\"\n        UPDATE chat_message\n        SET new_chat_session_id = cs.new_id\n        FROM chat_session cs\n        WHERE chat_message.chat_session_id = cs.id;\n        \"\"\"\n    )\n\n    op.drop_constraint(\n        \"chat_message_chat_session_id_fkey\", \"chat_message\", type_=\"foreignkey\"\n    )\n\n    op.drop_column(\"chat_message\", \"chat_session_id\")\n    op.alter_column(\n        \"chat_message\", \"new_chat_session_id\", new_column_name=\"chat_session_id\"\n    )\n\n    op.drop_constraint(\"chat_session_pkey\", \"chat_session\", type_=\"primary\")\n    op.drop_column(\"chat_session\", \"id\")\n    op.alter_column(\"chat_session\", \"new_id\", new_column_name=\"id\")\n\n    op.create_primary_key(\"chat_session_pkey\", \"chat_session\", [\"id\"])\n\n    op.create_foreign_key(\n        \"chat_message_chat_session_id_fkey\",\n        \"chat_message\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\n        \"chat_message_chat_session_id_fkey\", \"chat_message\", type_=\"foreignkey\"\n    )\n\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\"old_id\", sa.Integer, autoincrement=True, nullable=True),\n    )\n\n    op.execute(\"CREATE SEQUENCE chat_session_old_id_seq OWNED BY chat_session.old_id;\")\n    op.execute(\n        \"ALTER TABLE chat_session ALTER COLUMN old_id SET DEFAULT nextval('chat_session_old_id_seq');\"\n    )\n\n    op.execute(\n        \"UPDATE chat_session SET old_id = nextval('chat_session_old_id_seq') WHERE old_id IS NULL;\"\n    )\n\n    op.alter_column(\"chat_session\", \"old_id\", nullable=False)\n\n    op.drop_constraint(\"chat_session_pkey\", \"chat_session\", type_=\"primary\")\n    op.create_primary_key(\"chat_session_pkey\", \"chat_session\", [\"old_id\"])\n\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"old_chat_session_id\", sa.Integer, nullable=True),\n    )\n\n    op.execute(\n        \"\"\"\n        UPDATE chat_message\n        SET old_chat_session_id = cs.old_id\n        FROM chat_session cs\n        WHERE chat_message.chat_session_id = cs.id;\n        \"\"\"\n    )\n\n    op.drop_column(\"chat_message\", \"chat_session_id\")\n    op.alter_column(\n        \"chat_message\", \"old_chat_session_id\", new_column_name=\"chat_session_id\"\n    )\n\n    op.create_foreign_key(\n        \"chat_message_chat_session_id_fkey\",\n        \"chat_message\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"old_id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    op.drop_column(\"chat_session\", \"id\")\n    op.alter_column(\"chat_session\", \"old_id\", new_column_name=\"id\")\n\n    op.alter_column(\n        \"chat_session\",\n        \"id\",\n        type_=sa.Integer(),\n        existing_type=sa.Integer(),\n        existing_nullable=False,\n        existing_server_default=False,\n    )\n\n    # Rename the sequence\n    op.execute(\"ALTER SEQUENCE chat_session_old_id_seq RENAME TO chat_session_id_seq;\")\n\n    # Update the default value to use the renamed sequence\n    op.alter_column(\n        \"chat_session\",\n        \"id\",\n        server_default=sa.text(\"nextval('chat_session_id_seq'::regclass)\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/689433b0d8de_add_hook_and_hook_execution_log_tables.py",
    "content": "\"\"\"add_hook_and_hook_execution_log_tables\n\nRevision ID: 689433b0d8de\nRevises: 93a2e195e25c\nCreate Date: 2026-03-13 11:25:06.547474\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects.postgresql import UUID as PGUUID\n\n\n# revision identifiers, used by Alembic.\nrevision = \"689433b0d8de\"\ndown_revision = \"93a2e195e25c\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"hook\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"hook_point\",\n            sa.Enum(\"document_ingestion\", \"query_processing\", native_enum=False),\n            nullable=False,\n        ),\n        sa.Column(\"endpoint_url\", sa.Text(), nullable=True),\n        sa.Column(\"api_key\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"is_reachable\", sa.Boolean(), nullable=True),\n        sa.Column(\n            \"fail_strategy\",\n            sa.Enum(\"hard\", \"soft\", native_enum=False),\n            nullable=False,\n        ),\n        sa.Column(\"timeout_seconds\", sa.Float(), nullable=False),\n        sa.Column(\n            \"is_active\", sa.Boolean(), nullable=False, server_default=sa.text(\"false\")\n        ),\n        sa.Column(\n            \"deleted\", sa.Boolean(), nullable=False, server_default=sa.text(\"false\")\n        ),\n        sa.Column(\"creator_id\", PGUUID(as_uuid=True), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint([\"creator_id\"], [\"user.id\"], ondelete=\"SET NULL\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_index(\n        \"ix_hook_one_non_deleted_per_point\",\n        \"hook\",\n        [\"hook_point\"],\n        unique=True,\n        postgresql_where=sa.text(\"deleted = false\"),\n    )\n\n    op.create_table(\n        \"hook_execution_log\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"hook_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"is_success\",\n            sa.Boolean(),\n            nullable=False,\n        ),\n        sa.Column(\"error_message\", sa.Text(), nullable=True),\n        sa.Column(\"status_code\", sa.Integer(), nullable=True),\n        sa.Column(\"duration_ms\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint([\"hook_id\"], [\"hook.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_index(\"ix_hook_execution_log_hook_id\", \"hook_execution_log\", [\"hook_id\"])\n    op.create_index(\n        \"ix_hook_execution_log_created_at\", \"hook_execution_log\", [\"created_at\"]\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_hook_execution_log_created_at\", table_name=\"hook_execution_log\")\n    op.drop_index(\"ix_hook_execution_log_hook_id\", table_name=\"hook_execution_log\")\n    op.drop_table(\"hook_execution_log\")\n\n    op.drop_index(\"ix_hook_one_non_deleted_per_point\", table_name=\"hook\")\n    op.drop_table(\"hook\")\n"
  },
  {
    "path": "backend/alembic/versions/699221885109_nullify_default_task_prompt.py",
    "content": "\"\"\"nullify_default_task_prompt\n\nRevision ID: 699221885109\nRevises: 7e490836d179\nCreate Date: 2025-12-30 10:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"699221885109\"\ndown_revision = \"7e490836d179\"\nbranch_labels = None\ndepends_on = None\n\nDEFAULT_PERSONA_ID = 0\n\n\ndef upgrade() -> None:\n    # Make task_prompt column nullable\n    # Note: The model had nullable=True but the DB column was NOT NULL until this point\n    op.alter_column(\n        \"persona\",\n        \"task_prompt\",\n        nullable=True,\n    )\n\n    # Set task_prompt to NULL for the default persona\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET task_prompt = NULL\n            WHERE id = :persona_id\n            \"\"\"\n        ),\n        {\"persona_id\": DEFAULT_PERSONA_ID},\n    )\n\n\ndef downgrade() -> None:\n    # Restore task_prompt to empty string for the default persona\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET task_prompt = ''\n            WHERE id = :persona_id AND task_prompt IS NULL\n            \"\"\"\n        ),\n        {\"persona_id\": DEFAULT_PERSONA_ID},\n    )\n\n    # Set any remaining NULL task_prompts to empty string before making non-nullable\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET task_prompt = ''\n            WHERE task_prompt IS NULL\n            \"\"\"\n        )\n    )\n\n    # Revert task_prompt column to not nullable\n    op.alter_column(\n        \"persona\",\n        \"task_prompt\",\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py",
    "content": "\"\"\"duplicated no-harm user file migration\n\nRevision ID: 6a804aeb4830\nRevises: 8e1ac4f39a9f\nCreate Date: 2025-04-01 07:26:10.539362\n\n\"\"\"\n\n# revision identifiers, used by Alembic.\nrevision = \"6a804aeb4830\"\ndown_revision = \"8e1ac4f39a9f\"\nbranch_labels = None\ndepends_on = None\n\n\n# Leaving this around only because some people might be on this migration\n# originally was a duplicate of the user files migration\ndef upgrade() -> None:\n    pass\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/6b3b4083c5aa_persona_cleanup_and_featured.py",
    "content": "\"\"\"persona cleanup and featured\n\nRevision ID: 6b3b4083c5aa\nRevises: 57122d037335\nCreate Date: 2026-02-26 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"6b3b4083c5aa\"\ndown_revision = \"57122d037335\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add featured column with nullable=True first\n    op.add_column(\"persona\", sa.Column(\"featured\", sa.Boolean(), nullable=True))\n\n    # Migrate data from is_default_persona to featured\n    op.execute(\"UPDATE persona SET featured = is_default_persona\")\n\n    # Make featured non-nullable with default=False\n    op.alter_column(\n        \"persona\",\n        \"featured\",\n        existing_type=sa.Boolean(),\n        nullable=False,\n        server_default=sa.false(),\n    )\n\n    # Drop is_default_persona column\n    op.drop_column(\"persona\", \"is_default_persona\")\n\n    # Drop unused columns\n    op.drop_column(\"persona\", \"num_chunks\")\n    op.drop_column(\"persona\", \"chunks_above\")\n    op.drop_column(\"persona\", \"chunks_below\")\n    op.drop_column(\"persona\", \"llm_relevance_filter\")\n    op.drop_column(\"persona\", \"llm_filter_extraction\")\n    op.drop_column(\"persona\", \"recency_bias\")\n\n\ndef downgrade() -> None:\n    # Add back recency_bias column\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"recency_bias\",\n            sa.VARCHAR(),\n            nullable=False,\n            server_default=\"base_decay\",\n        ),\n    )\n\n    # Add back llm_filter_extraction column\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"llm_filter_extraction\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n\n    # Add back llm_relevance_filter column\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"llm_relevance_filter\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n\n    # Add back chunks_below column\n    op.add_column(\n        \"persona\",\n        sa.Column(\"chunks_below\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n\n    # Add back chunks_above column\n    op.add_column(\n        \"persona\",\n        sa.Column(\"chunks_above\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n\n    # Add back num_chunks column\n    op.add_column(\"persona\", sa.Column(\"num_chunks\", sa.Float(), nullable=True))\n\n    # Add back is_default_persona column\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"is_default_persona\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.false(),\n        ),\n    )\n\n    # Migrate data from featured to is_default_persona\n    op.execute(\"UPDATE persona SET is_default_persona = featured\")\n\n    # Drop featured column\n    op.drop_column(\"persona\", \"featured\")\n"
  },
  {
    "path": "backend/alembic/versions/6d387b3196c2_basic_auth.py",
    "content": "\"\"\"Basic Auth\n\nRevision ID: 6d387b3196c2\nRevises: 47433d30de82\nCreate Date: 2023-05-05 14:40:10.242502\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom alembic import op\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"6d387b3196c2\"\ndown_revision = \"47433d30de82\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"user\",\n        sa.Column(\"id\", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False),\n        sa.Column(\"email\", sa.String(length=320), nullable=False),\n        sa.Column(\"hashed_password\", sa.String(length=1024), nullable=False),\n        sa.Column(\"is_active\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_superuser\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_verified\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"role\",\n            sa.Enum(\"BASIC\", \"ADMIN\", name=\"userrole\", native_enum=False),\n            default=\"BASIC\",\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_index(op.f(\"ix_user_email\"), \"user\", [\"email\"], unique=True)\n    op.create_table(\n        \"accesstoken\",\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\"token\", sa.String(length=43), nullable=False),\n        sa.Column(\n            \"created_at\",\n            fastapi_users_db_sqlalchemy.generics.TIMESTAMPAware(timezone=True),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"cascade\"),\n        sa.PrimaryKeyConstraint(\"token\"),\n    )\n    op.create_index(\n        op.f(\"ix_accesstoken_created_at\"),\n        \"accesstoken\",\n        [\"created_at\"],\n        unique=False,\n    )\n    op.alter_column(\n        \"index_attempt\",\n        \"time_created\",\n        existing_type=postgresql.TIMESTAMP(timezone=True),\n        nullable=False,\n        existing_server_default=sa.text(\"now()\"),  # type: ignore\n    )\n    op.alter_column(\n        \"index_attempt\",\n        \"time_updated\",\n        existing_type=postgresql.TIMESTAMP(timezone=True),\n        nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"index_attempt\",\n        \"time_updated\",\n        existing_type=postgresql.TIMESTAMP(timezone=True),\n        nullable=True,\n    )\n    op.alter_column(\n        \"index_attempt\",\n        \"time_created\",\n        existing_type=postgresql.TIMESTAMP(timezone=True),\n        nullable=True,\n        existing_server_default=sa.text(\"now()\"),  # type: ignore\n    )\n    op.drop_index(op.f(\"ix_accesstoken_created_at\"), table_name=\"accesstoken\")\n    op.drop_table(\"accesstoken\")\n    op.drop_index(op.f(\"ix_user_email\"), table_name=\"user\")\n    op.drop_table(\"user\")\n"
  },
  {
    "path": "backend/alembic/versions/6d562f86c78b_remove_default_bot.py",
    "content": "\"\"\"remove default bot\n\nRevision ID: 6d562f86c78b\nRevises: 177de57c21c9\nCreate Date: 2024-11-22 11:51:29.331336\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"6d562f86c78b\"\ndown_revision = \"177de57c21c9\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM slack_bot\n            WHERE name = 'Default Bot'\n            AND bot_token = ''\n            AND app_token = ''\n            AND NOT EXISTS (\n                SELECT 1 FROM slack_channel_config\n                WHERE slack_channel_config.slack_bot_id = slack_bot.id\n            )\n            \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO slack_bot (name, enabled, bot_token, app_token)\n            SELECT 'Default Bot', true, '', ''\n            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)\n            RETURNING id;\n            \"\"\"\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/6f4f86aef280_add_queries_and_is_web_fetch_to_.py",
    "content": "\"\"\"add queries and is web fetch to iteration answer\n\nRevision ID: 6f4f86aef280\nRevises: 03d710ccf29c\nCreate Date: 2025-10-14 18:08:30.920123\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"6f4f86aef280\"\ndown_revision = \"03d710ccf29c\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add is_web_fetch column\n    op.add_column(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\"is_web_fetch\", sa.Boolean(), nullable=True),\n    )\n\n    # Add queries column\n    op.add_column(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\"queries\", postgresql.JSONB(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"research_agent_iteration_sub_step\", \"queries\")\n    op.drop_column(\"research_agent_iteration_sub_step\", \"is_web_fetch\")\n"
  },
  {
    "path": "backend/alembic/versions/6fc7886d665d_make_categories_labels_and_many_to_many.py",
    "content": "\"\"\"make categories labels and many to many\n\nRevision ID: 6fc7886d665d\nRevises: 3c6531f32351\nCreate Date: 2025-01-13 18:12:18.029112\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"6fc7886d665d\"\ndown_revision = \"3c6531f32351\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Rename persona_category table to persona_label\n    op.rename_table(\"persona_category\", \"persona_label\")\n\n    # Create the new association table\n    op.create_table(\n        \"persona__persona_label\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"persona_label_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_label_id\"],\n            [\"persona_label.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"persona_label_id\"),\n    )\n\n    # Copy existing relationships to the new table\n    op.execute(\n        \"\"\"\n        INSERT INTO persona__persona_label (persona_id, persona_label_id)\n        SELECT id, category_id FROM persona WHERE category_id IS NOT NULL\n    \"\"\"\n    )\n\n    # Remove the old category_id column from persona table\n    op.drop_column(\"persona\", \"category_id\")\n\n\ndef downgrade() -> None:\n    # Rename persona_label table back to persona_category\n    op.rename_table(\"persona_label\", \"persona_category\")\n\n    # Add back the category_id column to persona table\n    op.add_column(\"persona\", sa.Column(\"category_id\", sa.Integer(), nullable=True))\n    op.create_foreign_key(\n        \"persona_category_id_fkey\",\n        \"persona\",\n        \"persona_category\",\n        [\"category_id\"],\n        [\"id\"],\n    )\n\n    # Copy the first label relationship back to the persona table\n    op.execute(\n        \"\"\"\n        UPDATE persona\n        SET category_id = (\n            SELECT persona_label_id\n            FROM persona__persona_label\n            WHERE persona__persona_label.persona_id = persona.id\n            LIMIT 1\n        )\n    \"\"\"\n    )\n\n    # Drop the association table\n    op.drop_table(\"persona__persona_label\")\n"
  },
  {
    "path": "backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py",
    "content": "\"\"\"Add TokenRateLimit Tables\n\nRevision ID: 703313b75876\nRevises: fad14119fb92\nCreate Date: 2024-04-15 01:36:02.952809\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"703313b75876\"\ndown_revision = \"fad14119fb92\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"token_rate_limit\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"enabled\", sa.Boolean(), nullable=False),\n        sa.Column(\"token_budget\", sa.Integer(), nullable=False),\n        sa.Column(\"period_hours\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"scope\",\n            sa.String(length=10),\n            nullable=False,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"token_rate_limit__user_group\",\n        sa.Column(\"rate_limit_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"rate_limit_id\"],\n            [\"token_rate_limit.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"rate_limit_id\", \"user_group_id\"),\n    )\n\n    # NOTE: rate limit settings used to be stored in the \"token_budget_settings\" key in the\n    # KeyValueStore. This will now be lost. The KV store works differently than it used to\n    # so the migration is fairly complicated and likely not worth it to support (pretty much\n    # nobody will have it set)\n\n\ndef downgrade() -> None:\n    op.drop_table(\"token_rate_limit__user_group\")\n    op.drop_table(\"token_rate_limit\")\n"
  },
  {
    "path": "backend/alembic/versions/70f00c45c0f2_more_descriptive_filestore.py",
    "content": "\"\"\"More Descriptive Filestore\n\nRevision ID: 70f00c45c0f2\nRevises: 3879338f8ba1\nCreate Date: 2024-05-17 17:51:41.926893\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"70f00c45c0f2\"\ndown_revision = \"3879338f8ba1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"file_store\", sa.Column(\"display_name\", sa.String(), nullable=True))\n    op.add_column(\n        \"file_store\",\n        sa.Column(\n            \"file_origin\",\n            sa.String(),\n            nullable=False,\n            server_default=\"connector\",  # Default to connector\n        ),\n    )\n    op.add_column(\n        \"file_store\",\n        sa.Column(\n            \"file_type\", sa.String(), nullable=False, server_default=\"text/plain\"\n        ),\n    )\n    op.add_column(\n        \"file_store\",\n        sa.Column(\n            \"file_metadata\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n\n    op.execute(\n        \"\"\"\n        UPDATE file_store\n        SET file_origin = CASE\n            WHEN file_name LIKE 'chat__%' THEN 'chat_upload'\n            ELSE 'connector'\n        END,\n        file_name = CASE\n            WHEN file_name LIKE 'chat__%' THEN SUBSTR(file_name, 7)\n            ELSE file_name\n        END,\n        file_type = CASE\n            WHEN file_name LIKE 'chat__%' THEN 'image/png'\n            ELSE 'text/plain'\n        END\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"file_store\", \"file_metadata\")\n    op.drop_column(\"file_store\", \"file_type\")\n    op.drop_column(\"file_store\", \"file_origin\")\n    op.drop_column(\"file_store\", \"display_name\")\n"
  },
  {
    "path": "backend/alembic/versions/7206234e012a_add_image_generation_config_table.py",
    "content": "\"\"\"add image generation config table\n\nRevision ID: 7206234e012a\nRevises: 699221885109\nCreate Date: 2025-12-21 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7206234e012a\"\ndown_revision = \"699221885109\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"image_generation_config\",\n        sa.Column(\"image_provider_id\", sa.String(), primary_key=True),\n        sa.Column(\"model_configuration_id\", sa.Integer(), nullable=False),\n        sa.Column(\"is_default\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"model_configuration_id\"],\n            [\"model_configuration.id\"],\n            ondelete=\"CASCADE\",\n        ),\n    )\n    op.create_index(\n        \"ix_image_generation_config_is_default\",\n        \"image_generation_config\",\n        [\"is_default\"],\n        unique=False,\n    )\n    op.create_index(\n        \"ix_image_generation_config_model_configuration_id\",\n        \"image_generation_config\",\n        [\"model_configuration_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"ix_image_generation_config_model_configuration_id\",\n        table_name=\"image_generation_config\",\n    )\n    op.drop_index(\n        \"ix_image_generation_config_is_default\", table_name=\"image_generation_config\"\n    )\n    op.drop_table(\"image_generation_config\")\n"
  },
  {
    "path": "backend/alembic/versions/72aa7de2e5cf_make_processing_mode_default_all_caps.py",
    "content": "\"\"\"make processing mode default all caps\n\nRevision ID: 72aa7de2e5cf\nRevises: 2020d417ec84\nCreate Date: 2026-01-26 18:58:47.705253\n\nThis migration fixes the ProcessingMode enum value mismatch:\n- SQLAlchemy's Enum with native_enum=False uses enum member NAMES as valid values\n- The original migration stored lowercase VALUES ('regular', 'file_system')\n- This converts existing data to uppercase NAMES ('REGULAR', 'FILE_SYSTEM')\n- Also drops any spurious native PostgreSQL enum type that may have been auto-created\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"72aa7de2e5cf\"\ndown_revision = \"2020d417ec84\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Convert existing lowercase values to uppercase to match enum member names\n    op.execute(\n        \"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' WHERE processing_mode = 'regular'\"\n    )\n    op.execute(\n        \"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' WHERE processing_mode = 'file_system'\"\n    )\n\n    # Update the server default to use uppercase\n    op.alter_column(\n        \"connector_credential_pair\",\n        \"processing_mode\",\n        server_default=\"REGULAR\",\n    )\n\n\ndef downgrade() -> None:\n    # State prior to this was broken, so we don't want to revert back to it\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py",
    "content": "\"\"\"Permission Auto Sync Framework\n\nRevision ID: 72bdc9929a46\nRevises: 475fcefe8826\nCreate Date: 2024-04-14 21:15:28.659634\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"72bdc9929a46\"\ndown_revision = \"475fcefe8826\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"email_to_external_user_cache\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"external_user_id\", sa.String(), nullable=False),\n        sa.Column(\"user_id\", sa.UUID(), nullable=True),\n        sa.Column(\"user_email\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"external_permission\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", sa.UUID(), nullable=True),\n        sa.Column(\"user_email\", sa.String(), nullable=False),\n        sa.Column(\n            \"source_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"external_permission_group\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"permission_sync_run\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"source_type\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"update_type\", sa.String(), nullable=False),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"status\",\n            sa.String(),\n            nullable=False,\n        ),\n        sa.Column(\"error_msg\", sa.Text(), nullable=True),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"cc_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"permission_sync_run\")\n    op.drop_table(\"external_permission\")\n    op.drop_table(\"email_to_external_user_cache\")\n"
  },
  {
    "path": "backend/alembic/versions/73e9983e5091_add_search_query_table.py",
    "content": "\"\"\"add_search_query_table\n\nRevision ID: 73e9983e5091\nRevises: d1b637d7050a\nCreate Date: 2026-01-14 14:16:52.837489\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"73e9983e5091\"\ndown_revision = \"d1b637d7050a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"search_query\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), primary_key=True),\n        sa.Column(\n            \"user_id\",\n            postgresql.UUID(as_uuid=True),\n            sa.ForeignKey(\"user.id\"),\n            nullable=False,\n        ),\n        sa.Column(\"query\", sa.String(), nullable=False),\n        sa.Column(\"query_expansions\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.func.now(),\n        ),\n    )\n\n    op.create_index(\"ix_search_query_user_id\", \"search_query\", [\"user_id\"])\n    op.create_index(\"ix_search_query_created_at\", \"search_query\", [\"created_at\"])\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_search_query_created_at\", table_name=\"search_query\")\n    op.drop_index(\"ix_search_query_user_id\", table_name=\"search_query\")\n    op.drop_table(\"search_query\")\n"
  },
  {
    "path": "backend/alembic/versions/7477a5f5d728_added_model_defaults_for_users.py",
    "content": "\"\"\"Added model defaults for users\n\nRevision ID: 7477a5f5d728\nRevises: 213fd978c6d8\nCreate Date: 2024-08-04 19:00:04.512634\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"7477a5f5d728\"\ndown_revision = \"213fd978c6d8\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"user\", sa.Column(\"default_model\", sa.Text(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"default_model\")\n"
  },
  {
    "path": "backend/alembic/versions/7547d982db8f_chat_folders.py",
    "content": "\"\"\"Chat Folders\n\nRevision ID: 7547d982db8f\nRevises: ef7da92f7213\nCreate Date: 2024-05-02 15:18:56.573347\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\n\n# revision identifiers, used by Alembic.\nrevision = \"7547d982db8f\"\ndown_revision = \"ef7da92f7213\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"chat_folder\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=True),\n        sa.Column(\"display_priority\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.add_column(\"chat_session\", sa.Column(\"folder_id\", sa.Integer(), nullable=True))\n    op.create_foreign_key(\n        \"chat_session_chat_folder_fk\",\n        \"chat_session\",\n        \"chat_folder\",\n        [\"folder_id\"],\n        [\"id\"],\n    )\n\n\ndef downgrade() -> None:\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    if \"chat_session\" in inspector.get_table_names():\n        chat_session_fks = {\n            fk.get(\"name\") for fk in inspector.get_foreign_keys(\"chat_session\")\n        }\n        if \"chat_session_chat_folder_fk\" in chat_session_fks:\n            op.drop_constraint(\n                \"chat_session_chat_folder_fk\", \"chat_session\", type_=\"foreignkey\"\n            )\n\n        chat_session_columns = {\n            col[\"name\"] for col in inspector.get_columns(\"chat_session\")\n        }\n        if \"folder_id\" in chat_session_columns:\n            op.drop_column(\"chat_session\", \"folder_id\")\n\n    if \"chat_folder\" in inspector.get_table_names():\n        op.drop_table(\"chat_folder\")\n"
  },
  {
    "path": "backend/alembic/versions/7616121f6e97_add_enterprise_fields_to_scim_user_mapping.py",
    "content": "\"\"\"add enterprise and name fields to scim_user_mapping\n\nRevision ID: 7616121f6e97\nRevises: 07b98176f1de\nCreate Date: 2026-02-23 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7616121f6e97\"\ndown_revision = \"07b98176f1de\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"department\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"manager\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"given_name\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"family_name\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"scim_user_mapping\",\n        sa.Column(\"scim_emails_json\", sa.Text(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"scim_user_mapping\", \"scim_emails_json\")\n    op.drop_column(\"scim_user_mapping\", \"family_name\")\n    op.drop_column(\"scim_user_mapping\", \"given_name\")\n    op.drop_column(\"scim_user_mapping\", \"manager\")\n    op.drop_column(\"scim_user_mapping\", \"department\")\n"
  },
  {
    "path": "backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py",
    "content": "\"\"\"Count Chat Tokens\n\nRevision ID: 767f1c2a00eb\nRevises: dba7f71618f5\nCreate Date: 2023-09-21 10:03:21.509899\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"767f1c2a00eb\"\ndown_revision = \"dba7f71618f5\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\", sa.Column(\"token_count\", sa.Integer(), nullable=False)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"token_count\")\n"
  },
  {
    "path": "backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py",
    "content": "\"\"\"CC-Pair Name not Unique\n\nRevision ID: 76b60d407dfb\nRevises: b156fa702355\nCreate Date: 2023-12-22 21:42:10.018804\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"76b60d407dfb\"\ndown_revision = \"b156fa702355\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\"DELETE FROM connector_credential_pair WHERE name IS NULL\")\n    op.drop_constraint(\n        \"connector_credential_pair__name__key\",\n        \"connector_credential_pair\",\n        type_=\"unique\",\n    )\n    op.alter_column(\n        \"connector_credential_pair\", \"name\", existing_type=sa.String(), nullable=False\n    )\n\n\ndef downgrade() -> None:\n    op.create_unique_constraint(\n        \"connector_credential_pair__name__key\", \"connector_credential_pair\", [\"name\"]\n    )\n    op.alter_column(\n        \"connector_credential_pair\", \"name\", existing_type=sa.String(), nullable=True\n    )\n"
  },
  {
    "path": "backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py",
    "content": "\"\"\"Remove Remaining Enums\n\nRevision ID: 776b3bbe9092\nRevises: 4738e4b3bae1\nCreate Date: 2024-03-22 21:34:27.629444\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.context.search.enums import RecencyBiasSetting, SearchType\n\n# revision identifiers, used by Alembic.\nrevision = \"776b3bbe9092\"\ndown_revision = \"4738e4b3bae1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"persona\",\n        \"search_type\",\n        type_=sa.String,\n        existing_type=sa.Enum(SearchType, native_enum=False),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"persona\",\n        \"recency_bias\",\n        type_=sa.String,\n        existing_type=sa.Enum(RecencyBiasSetting, native_enum=False),\n        existing_nullable=False,\n    )\n\n    # Because the indexmodelstatus enum does not have a mapping to a string type\n    # we need this workaround instead of directly changing the type\n    op.add_column(\"embedding_model\", sa.Column(\"temp_status\", sa.String))\n    op.execute(\"UPDATE embedding_model SET temp_status = status::text\")\n    op.drop_column(\"embedding_model\", \"status\")\n    op.alter_column(\"embedding_model\", \"temp_status\", new_column_name=\"status\")\n\n    op.execute(\"DROP TYPE IF EXISTS searchtype\")\n    op.execute(\"DROP TYPE IF EXISTS recencybiassetting\")\n    op.execute(\"DROP TYPE IF EXISTS indexmodelstatus\")\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"persona\",\n        \"search_type\",\n        type_=sa.Enum(SearchType, native_enum=False),\n        existing_type=sa.String(length=50),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"persona\",\n        \"recency_bias\",\n        type_=sa.Enum(RecencyBiasSetting, native_enum=False),\n        existing_type=sa.String(length=50),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"embedding_model\",\n        \"status\",\n        type_=sa.Enum(IndexModelStatus, native_enum=False),\n        existing_type=sa.String(length=50),\n        existing_nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py",
    "content": "\"\"\"forcibly remove more enum types from postgres\n\nRevision ID: 77d07dffae64\nRevises: d61e513bef0a\nCreate Date: 2023-11-01 12:33:01.999617\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy import String\n\n\n# revision identifiers, used by Alembic.\nrevision = \"77d07dffae64\"\ndown_revision = \"d61e513bef0a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # In a PR:\n    # https://github.com/onyx-dot-app/onyx/pull/397/files#diff-f05fb341f6373790b91852579631b64ca7645797a190837156a282b67e5b19c2\n    # we directly changed some previous migrations. This caused some users to have native enums\n    # while others wouldn't. This has caused some issues when adding new fields to these enums.\n    # This migration manually changes the enum types to ensure that nobody uses native enums.\n    op.alter_column(\"query_event\", \"selected_search_flow\", type_=String)\n    op.alter_column(\"query_event\", \"feedback\", type_=String)\n    op.alter_column(\"document_retrieval_feedback\", \"feedback\", type_=String)\n    op.execute(\"DROP TYPE IF EXISTS searchtype\")\n    op.execute(\"DROP TYPE IF EXISTS qafeedbacktype\")\n    op.execute(\"DROP TYPE IF EXISTS searchfeedbacktype\")\n\n\ndef downgrade() -> None:\n    # We don't want Native Enums, do nothing\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/78dbe7e38469_task_tracking.py",
    "content": "\"\"\"Task Tracking\n\nRevision ID: 78dbe7e38469\nRevises: 7ccea01261f6\nCreate Date: 2023-10-15 23:40:50.593262\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"78dbe7e38469\"\ndown_revision = \"7ccea01261f6\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"task_queue_jobs\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"task_id\", sa.String(), nullable=False),\n        sa.Column(\"task_name\", sa.String(), nullable=False),\n        sa.Column(\n            \"status\",\n            sa.Enum(\n                \"PENDING\",\n                \"STARTED\",\n                \"SUCCESS\",\n                \"FAILURE\",\n                name=\"taskstatus\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"start_time\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"register_time\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"task_queue_jobs\")\n"
  },
  {
    "path": "backend/alembic/versions/78ebc66946a0_remove_reranking_from_search_settings.py",
    "content": "\"\"\"remove reranking from search_settings\n\nRevision ID: 78ebc66946a0\nRevises: 849b21c732f8\nCreate Date: 2026-01-28\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"78ebc66946a0\"\ndown_revision = \"849b21c732f8\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"search_settings\", \"disable_rerank_for_streaming\")\n    op.drop_column(\"search_settings\", \"rerank_model_name\")\n    op.drop_column(\"search_settings\", \"rerank_provider_type\")\n    op.drop_column(\"search_settings\", \"rerank_api_key\")\n    op.drop_column(\"search_settings\", \"rerank_api_url\")\n    op.drop_column(\"search_settings\", \"num_rerank\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"disable_rerank_for_streaming\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_model_name\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_provider_type\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_api_key\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_api_url\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"num_rerank\",\n            sa.Integer(),\n            nullable=False,\n            server_default=str(20),\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/795b20b85b4b_add_llm_group_permissions_control.py",
    "content": "\"\"\"add_llm_group_permissions_control\n\nRevision ID: 795b20b85b4b\nRevises: 05c07bf07c00\nCreate Date: 2024-07-19 11:54:35.701558\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\nrevision = \"795b20b85b4b\"\ndown_revision = \"05c07bf07c00\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"llm_provider__user_group\",\n        sa.Column(\"llm_provider_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"llm_provider_id\"],\n            [\"llm_provider.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"llm_provider_id\", \"user_group_id\"),\n    )\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\"is_public\", sa.Boolean(), nullable=False, server_default=\"true\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"llm_provider__user_group\")\n    op.drop_column(\"llm_provider\", \"is_public\")\n"
  },
  {
    "path": "backend/alembic/versions/797089dfb4d2_persona_start_date.py",
    "content": "\"\"\"persona_start_date\n\nRevision ID: 797089dfb4d2\nRevises: 55546a7967ee\nCreate Date: 2024-09-11 14:51:49.785835\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"797089dfb4d2\"\ndown_revision = \"55546a7967ee\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"search_start_date\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"search_start_date\")\n"
  },
  {
    "path": "backend/alembic/versions/79acd316403a_add_api_key_table.py",
    "content": "\"\"\"Add api_key table\n\nRevision ID: 79acd316403a\nRevises: 904e5138fffb\nCreate Date: 2024-01-11 17:56:37.934381\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"79acd316403a\"\ndown_revision = \"904e5138fffb\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"api_key\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"hashed_api_key\", sa.String(), nullable=False),\n        sa.Column(\"api_key_display\", sa.String(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"owner_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"api_key_display\"),\n        sa.UniqueConstraint(\"hashed_api_key\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"api_key\")\n"
  },
  {
    "path": "backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py",
    "content": "\"\"\"Add model-configuration table\n\nRevision ID: 7a70b7664e37\nRevises: d961aca62eb3\nCreate Date: 2025-04-10 15:00:35.984669\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    fetch_model_names_for_provider_as_set,\n    fetch_visible_model_names_for_provider_as_set,\n)\n\n# revision identifiers, used by Alembic.\nrevision = \"7a70b7664e37\"\ndown_revision = \"d961aca62eb3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef _resolve(\n    provider_name: str,\n    model_names: list[str] | None,\n    display_model_names: list[str] | None,\n    default_model_name: str,\n    fast_default_model_name: str | None,\n) -> set[tuple[str, bool]]:\n    models = set(model_names) if model_names else None\n    display_models = set(display_model_names) if display_model_names else None\n\n    # If both are defined, we need to make sure that `model_names` is a superset of `display_model_names`.\n    if models and display_models:\n        models = display_models.union(models)\n\n    # If only `model_names` is defined, then:\n    #   - If default-model-names are available for the `provider_name`, then set `display_model_names` to it\n    #     and set `model_names` to the union of those default-model-names with itself.\n    #   - If no default-model-names are available, then set `display_models` to `models`.\n    #\n    # This preserves the invariant that `display_models` is a subset of `models`.\n    elif models and not display_models:\n        visible_default_models = fetch_visible_model_names_for_provider_as_set(\n            provider_name=provider_name\n        )\n        if visible_default_models:\n            display_models = set(visible_default_models)\n            models = display_models.union(models)\n        else:\n            display_models = set(models)\n\n    # If only the `display_model_names` are defined, then set `models` to the union of `display_model_names`\n    # and the default-model-names for that provider.\n    #\n    # This will also preserve the invariant that `display_models` is a subset of `models`.\n    elif not models and display_models:\n        default_models = fetch_model_names_for_provider_as_set(\n            provider_name=provider_name\n        )\n        if default_models:\n            models = display_models.union(default_models)\n        else:\n            models = set(display_models)\n\n    # If neither are defined, then set `models` and `display_models` to the default-model-names for the given provider.\n    #\n    # This will also preserve the invariant that `display_models` is a subset of `models`.\n    else:\n        default_models = fetch_model_names_for_provider_as_set(\n            provider_name=provider_name\n        )\n        visible_default_models = fetch_visible_model_names_for_provider_as_set(\n            provider_name=provider_name\n        )\n\n        if default_models:\n            if not visible_default_models:\n                raise RuntimeError\n                raise RuntimeError(\n                    \"If `default_models` is non-None, `visible_default_models` must be non-None too.\"\n                )\n            models = default_models\n            display_models = visible_default_models\n\n        # This is not a well-known llm-provider; we can't provide any model suggestions.\n        # Therefore, we set to the empty set and continue\n        else:\n            models = set()\n            display_models = set()\n\n    # It is possible that `default_model_name` is not in `models` and is not in `display_models`.\n    # It is also possible that `fast_default_model_name` is not in `models` and is not in `display_models`.\n    models.add(default_model_name)\n    if fast_default_model_name:\n        models.add(fast_default_model_name)\n    display_models.add(default_model_name)\n    if fast_default_model_name:\n        display_models.add(fast_default_model_name)\n\n    return set([(model, model in display_models) for model in models])\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"model_configuration\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"llm_provider_id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"is_visible\", sa.Boolean(), nullable=False),\n        sa.Column(\"max_input_tokens\", sa.Integer(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"llm_provider_id\"], [\"llm_provider.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"llm_provider_id\", \"name\"),\n    )\n\n    # Create temporary sqlalchemy references to tables for data migration\n    llm_provider_table = sa.sql.table(\n        \"llm_provider\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"provider\", sa.Integer),\n        sa.column(\"model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"display_model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"default_model_name\", sa.String),\n        sa.column(\"fast_default_model_name\", sa.String),\n    )\n    model_configuration_table = sa.sql.table(\n        \"model_configuration\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"llm_provider_id\", sa.Integer),\n        sa.column(\"name\", sa.String),\n        sa.column(\"is_visible\", sa.Boolean),\n        sa.column(\"max_input_tokens\", sa.Integer),\n    )\n    connection = op.get_bind()\n    llm_providers = connection.execute(\n        sa.select(\n            llm_provider_table.c.id,\n            llm_provider_table.c.provider,\n            llm_provider_table.c.model_names,\n            llm_provider_table.c.display_model_names,\n            llm_provider_table.c.default_model_name,\n            llm_provider_table.c.fast_default_model_name,\n        )\n    ).fetchall()\n\n    for llm_provider in llm_providers:\n        provider_id = llm_provider[0]\n        provider_name = llm_provider[1]\n        model_names = llm_provider[2]\n        display_model_names = llm_provider[3]\n        default_model_name = llm_provider[4]\n        fast_default_model_name = llm_provider[5]\n\n        model_configurations = _resolve(\n            provider_name=provider_name,\n            model_names=model_names,\n            display_model_names=display_model_names,\n            default_model_name=default_model_name,\n            fast_default_model_name=fast_default_model_name,\n        )\n\n        for model_name, is_visible in model_configurations:\n            connection.execute(\n                model_configuration_table.insert().values(\n                    llm_provider_id=provider_id,\n                    name=model_name,\n                    is_visible=is_visible,\n                    max_input_tokens=None,\n                )\n            )\n\n    op.drop_column(\"llm_provider\", \"model_names\")\n    op.drop_column(\"llm_provider\", \"display_model_names\")\n\n\ndef downgrade() -> None:\n    llm_provider = sa.table(\n        \"llm_provider\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"model_names\", postgresql.ARRAY(sa.String)),\n        sa.column(\"display_model_names\", postgresql.ARRAY(sa.String)),\n    )\n\n    model_configuration = sa.table(\n        \"model_configuration\",\n        sa.column(\"id\", sa.Integer),\n        sa.column(\"llm_provider_id\", sa.Integer),\n        sa.column(\"name\", sa.String),\n        sa.column(\"is_visible\", sa.Boolean),\n        sa.column(\"max_input_tokens\", sa.Integer),\n    )\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\n            \"model_names\",\n            postgresql.ARRAY(sa.VARCHAR()),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\n            \"display_model_names\",\n            postgresql.ARRAY(sa.VARCHAR()),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n\n    connection = op.get_bind()\n    provider_ids = connection.execute(sa.select(llm_provider.c.id)).fetchall()\n\n    for (provider_id,) in provider_ids:\n        # Get all models for this provider\n        models = connection.execute(\n            sa.select(\n                model_configuration.c.name, model_configuration.c.is_visible\n            ).where(model_configuration.c.llm_provider_id == provider_id)\n        ).fetchall()\n\n        all_models = [model[0] for model in models]\n        visible_models = [model[0] for model in models if model[1]]\n\n        # Update provider with arrays\n        op.execute(\n            llm_provider.update()\n            .where(llm_provider.c.id == provider_id)\n            .values(model_names=all_models, display_model_names=visible_models)\n        )\n\n    op.drop_table(\"model_configuration\")\n"
  },
  {
    "path": "backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py",
    "content": "\"\"\"added slack_auto_filter\n\nRevision ID: 7aea705850d5\nRevises: 4505fd7302e1\nCreate Date: 2024-07-10 11:01:23.581015\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nrevision = \"7aea705850d5\"\ndown_revision = \"4505fd7302e1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"slack_bot_config\",\n        sa.Column(\"enable_auto_filters\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\n        \"UPDATE slack_bot_config SET enable_auto_filters = FALSE WHERE enable_auto_filters IS NULL\"\n    )\n    op.alter_column(\n        \"slack_bot_config\",\n        \"enable_auto_filters\",\n        existing_type=sa.Boolean(),\n        nullable=False,\n        server_default=sa.false(),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"slack_bot_config\", \"enable_auto_filters\")\n"
  },
  {
    "path": "backend/alembic/versions/7b9b952abdf6_update_entities.py",
    "content": "\"\"\"update-entities\n\nRevision ID: 7b9b952abdf6\nRevises: 36e9220ab794\nCreate Date: 2025-06-23 20:24:08.139201\n\n\"\"\"\n\nimport json\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7b9b952abdf6\"\ndown_revision = \"36e9220ab794\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # new entity type metadata_attribute_conversion\n    new_entity_type_conversion = {\n        \"LINEAR\": {\n            \"team\": {\"name\": \"team\", \"keep\": True, \"implication_property\": None},\n            \"state\": {\"name\": \"state\", \"keep\": True, \"implication_property\": None},\n            \"priority\": {\n                \"name\": \"priority\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"estimate\": {\n                \"name\": \"estimate\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"created_at\": {\n                \"name\": \"created_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"started_at\": {\n                \"name\": \"started_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"completed_at\": {\n                \"name\": \"completed_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"due_date\": {\n                \"name\": \"due_date\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"creator\": {\n                \"name\": \"creator\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_creator_of\",\n                },\n            },\n            \"assignee\": {\n                \"name\": \"assignee\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_assignee_of\",\n                },\n            },\n        },\n        \"JIRA\": {\n            \"issuetype\": {\n                \"name\": \"subtype\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"status\": {\"name\": \"status\", \"keep\": True, \"implication_property\": None},\n            \"priority\": {\n                \"name\": \"priority\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"project_name\": {\n                \"name\": \"project\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"created\": {\n                \"name\": \"created_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"updated\": {\n                \"name\": \"updated_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"resolution_date\": {\n                \"name\": \"completed_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"duedate\": {\"name\": \"due_date\", \"keep\": True, \"implication_property\": None},\n            \"reporter_email\": {\n                \"name\": \"creator\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_creator_of\",\n                },\n            },\n            \"assignee_email\": {\n                \"name\": \"assignee\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_assignee_of\",\n                },\n            },\n            \"key\": {\"name\": \"key\", \"keep\": True, \"implication_property\": None},\n            \"parent\": {\"name\": \"parent\", \"keep\": True, \"implication_property\": None},\n        },\n        \"GITHUB_PR\": {\n            \"repo\": {\"name\": \"repository\", \"keep\": True, \"implication_property\": None},\n            \"state\": {\"name\": \"state\", \"keep\": True, \"implication_property\": None},\n            \"num_commits\": {\n                \"name\": \"num_commits\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"num_files_changed\": {\n                \"name\": \"num_files_changed\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"labels\": {\"name\": \"labels\", \"keep\": True, \"implication_property\": None},\n            \"merged\": {\"name\": \"merged\", \"keep\": True, \"implication_property\": None},\n            \"merged_at\": {\n                \"name\": \"merged_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"closed_at\": {\n                \"name\": \"closed_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"created_at\": {\n                \"name\": \"created_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"updated_at\": {\n                \"name\": \"updated_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"user\": {\n                \"name\": \"creator\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_creator_of\",\n                },\n            },\n            \"assignees\": {\n                \"name\": \"assignees\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_assignee_of\",\n                },\n            },\n        },\n        \"GITHUB_ISSUE\": {\n            \"repo\": {\"name\": \"repository\", \"keep\": True, \"implication_property\": None},\n            \"state\": {\"name\": \"state\", \"keep\": True, \"implication_property\": None},\n            \"labels\": {\"name\": \"labels\", \"keep\": True, \"implication_property\": None},\n            \"closed_at\": {\n                \"name\": \"closed_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"created_at\": {\n                \"name\": \"created_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"updated_at\": {\n                \"name\": \"updated_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"user\": {\n                \"name\": \"creator\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_creator_of\",\n                },\n            },\n            \"assignees\": {\n                \"name\": \"assignees\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"from_email\",\n                    \"implied_relationship_name\": \"is_assignee_of\",\n                },\n            },\n        },\n        \"FIREFLIES\": {},\n        \"ACCOUNT\": {},\n        \"OPPORTUNITY\": {\n            \"name\": {\"name\": \"name\", \"keep\": True, \"implication_property\": None},\n            \"stage_name\": {\"name\": \"stage\", \"keep\": True, \"implication_property\": None},\n            \"type\": {\"name\": \"type\", \"keep\": True, \"implication_property\": None},\n            \"amount\": {\"name\": \"amount\", \"keep\": True, \"implication_property\": None},\n            \"fiscal_year\": {\n                \"name\": \"fiscal_year\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"fiscal_quarter\": {\n                \"name\": \"fiscal_quarter\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"is_closed\": {\n                \"name\": \"is_closed\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"close_date\": {\n                \"name\": \"close_date\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"probability\": {\n                \"name\": \"close_probability\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"created_date\": {\n                \"name\": \"created_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"last_modified_date\": {\n                \"name\": \"updated_at\",\n                \"keep\": True,\n                \"implication_property\": None,\n            },\n            \"account\": {\n                \"name\": \"account\",\n                \"keep\": False,\n                \"implication_property\": {\n                    \"implied_entity_type\": \"ACCOUNT\",\n                    \"implied_relationship_name\": \"is_account_of\",\n                },\n            },\n        },\n        \"VENDOR\": {},\n        \"EMPLOYEE\": {},\n    }\n\n    current_entity_types = conn.execute(\n        sa.text(\"SELECT id_name, attributes from kg_entity_type\")\n    ).all()\n    for entity_type, attributes in current_entity_types:\n        # delete removed entity types\n        if entity_type not in new_entity_type_conversion:\n            op.execute(\n                sa.text(f\"DELETE FROM kg_entity_type WHERE id_name = '{entity_type}'\")\n            )\n            continue\n\n        # update entity type attributes\n        if \"metadata_attributes\" in attributes:\n            del attributes[\"metadata_attributes\"]\n        attributes[\"metadata_attribute_conversion\"] = new_entity_type_conversion[\n            entity_type\n        ]\n        attributes_str = json.dumps(attributes).replace(\"'\", \"''\")\n        op.execute(\n            sa.text(\n                f\"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'\"\n            ),\n        )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n\n    current_entity_types = conn.execute(\n        sa.text(\"SELECT id_name, attributes from kg_entity_type\")\n    ).all()\n    for entity_type, attributes in current_entity_types:\n        conversion = {}\n        if \"metadata_attribute_conversion\" in attributes:\n            conversion = attributes.pop(\"metadata_attribute_conversion\")\n        attributes[\"metadata_attributes\"] = {\n            attr: prop[\"name\"] for attr, prop in conversion.items() if prop[\"keep\"]\n        }\n\n        attributes_str = json.dumps(attributes).replace(\"'\", \"''\")\n        op.execute(\n            sa.text(\n                f\"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'\"\n            ),\n        )\n"
  },
  {
    "path": "backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py",
    "content": "\"\"\"Add display_name to model_configuration\n\nRevision ID: 7bd55f264e1b\nRevises: e8f0d2a38171\nCreate Date: 2025-12-04\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"7bd55f264e1b\"\ndown_revision = \"e8f0d2a38171\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"model_configuration\",\n        sa.Column(\"display_name\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"model_configuration\", \"display_name\")\n"
  },
  {
    "path": "backend/alembic/versions/7cb492013621_code_interpreter_server_model.py",
    "content": "\"\"\"code interpreter server model\n\nRevision ID: 7cb492013621\nRevises: 0bb4558f35df\nCreate Date: 2026-02-22 18:54:54.007265\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7cb492013621\"\ndown_revision = \"0bb4558f35df\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"code_interpreter_server\",\n        sa.Column(\"id\", sa.Integer, primary_key=True),\n        sa.Column(\n            \"server_enabled\", sa.Boolean, nullable=False, server_default=sa.true()\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"code_interpreter_server\")\n"
  },
  {
    "path": "backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py",
    "content": "\"\"\"Migration 4: User file UUID primary key swap\n\nRevision ID: 7cc3fcc116c1\nRevises: 16c37a30adf2\nCreate Date: 2025-09-22 09:54:38.292952\n\nThis migration performs the critical UUID primary key swap on user_file table.\nIt updates all foreign key references to use UUIDs instead of integers.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql as psql\nimport logging\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n# revision identifiers, used by Alembic.\nrevision = \"7cc3fcc116c1\"\ndown_revision = \"16c37a30adf2\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Swap user_file primary key from integer to UUID.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    # Verify we're in the expected state\n    user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n    if \"new_id\" not in user_file_columns:\n        logger.warning(\n            \"user_file.new_id not found - migration may have already been applied\"\n        )\n        return\n\n    logger.info(\"Starting UUID primary key swap...\")\n\n    # === Step 1: Update persona__user_file foreign key to UUID ===\n    logger.info(\"Updating persona__user_file foreign key...\")\n\n    # Drop existing foreign key constraints\n    op.execute(\n        \"ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_uuid_fkey\"\n    )\n    op.execute(\n        \"ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey\"\n    )\n\n    # Create new foreign key to user_file.new_id\n    op.create_foreign_key(\n        \"persona__user_file_user_file_id_fkey\",\n        \"persona__user_file\",\n        \"user_file\",\n        local_cols=[\"user_file_id_uuid\"],\n        remote_cols=[\"new_id\"],\n    )\n\n    # Drop the old integer column and rename UUID column\n    op.execute(\"ALTER TABLE persona__user_file DROP COLUMN IF EXISTS user_file_id\")\n    op.alter_column(\n        \"persona__user_file\",\n        \"user_file_id_uuid\",\n        new_column_name=\"user_file_id\",\n        existing_type=psql.UUID(as_uuid=True),\n        nullable=False,\n    )\n\n    # Recreate composite primary key\n    op.execute(\n        \"ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_pkey\"\n    )\n    op.execute(\n        \"ALTER TABLE persona__user_file ADD PRIMARY KEY (persona_id, user_file_id)\"\n    )\n\n    logger.info(\"Updated persona__user_file to use UUID foreign key\")\n\n    # === Step 2: Perform the primary key swap on user_file ===\n    logger.info(\"Swapping user_file primary key to UUID...\")\n\n    # Drop the primary key constraint\n    op.execute(\"ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_pkey\")\n\n    # Drop the old id column and rename new_id to id\n    op.execute(\"ALTER TABLE user_file DROP COLUMN IF EXISTS id\")\n    op.alter_column(\n        \"user_file\",\n        \"new_id\",\n        new_column_name=\"id\",\n        existing_type=psql.UUID(as_uuid=True),\n        nullable=False,\n    )\n\n    # Set default for new inserts\n    op.alter_column(\n        \"user_file\",\n        \"id\",\n        existing_type=psql.UUID(as_uuid=True),\n        server_default=sa.text(\"gen_random_uuid()\"),\n    )\n\n    # Create new primary key\n    op.execute(\"ALTER TABLE user_file ADD PRIMARY KEY (id)\")\n\n    logger.info(\"Swapped user_file primary key to UUID\")\n\n    # === Step 3: Update foreign key constraints ===\n    logger.info(\"Updating foreign key constraints...\")\n\n    # Recreate persona__user_file foreign key to point to user_file.id\n    # Drop existing FK first to break dependency on the unique constraint\n    op.execute(\n        \"ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey\"\n    )\n    # Drop the unique constraint on (formerly) new_id BEFORE recreating the FK,\n    # so the FK will bind to the primary key instead of the unique index.\n    op.execute(\"ALTER TABLE user_file DROP CONSTRAINT IF EXISTS uq_user_file_new_id\")\n    # Now recreate FK to the primary key column\n    op.create_foreign_key(\n        \"persona__user_file_user_file_id_fkey\",\n        \"persona__user_file\",\n        \"user_file\",\n        local_cols=[\"user_file_id\"],\n        remote_cols=[\"id\"],\n    )\n\n    # Add foreign keys for project__user_file\n    existing_fks = inspector.get_foreign_keys(\"project__user_file\")\n\n    has_user_file_fk = any(\n        fk.get(\"referred_table\") == \"user_file\"\n        and fk.get(\"constrained_columns\") == [\"user_file_id\"]\n        for fk in existing_fks\n    )\n\n    if not has_user_file_fk:\n        op.create_foreign_key(\n            \"fk_project__user_file_user_file_id\",\n            \"project__user_file\",\n            \"user_file\",\n            [\"user_file_id\"],\n            [\"id\"],\n        )\n        logger.info(\"Added project__user_file -> user_file foreign key\")\n\n    has_project_fk = any(\n        fk.get(\"referred_table\") == \"user_project\"\n        and fk.get(\"constrained_columns\") == [\"project_id\"]\n        for fk in existing_fks\n    )\n\n    if not has_project_fk:\n        op.create_foreign_key(\n            \"fk_project__user_file_project_id\",\n            \"project__user_file\",\n            \"user_project\",\n            [\"project_id\"],\n            [\"id\"],\n        )\n        logger.info(\"Added project__user_file -> user_project foreign key\")\n\n    # === Step 4: Mark files for document_id migration ===\n    logger.info(\"Marking files for background document_id migration...\")\n\n    logger.info(\"Migration 4 (UUID primary key swap) completed successfully\")\n    logger.info(\n        \"NOTE: Background task will update document IDs in Vespa and search_doc\"\n    )\n\n\ndef downgrade() -> None:\n    \"\"\"Revert UUID primary key back to integer (data destructive!).\"\"\"\n\n    logger.error(\"CRITICAL: Downgrading UUID primary key swap is data destructive!\")\n    logger.error(\n        \"This will break all UUID-based references created after the migration.\"\n    )\n    logger.error(\"Only proceed if absolutely necessary and have backups.\")\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    # Capture existing primary key definitions so we can restore them after swaps\n    persona_pk = inspector.get_pk_constraint(\"persona__user_file\") or {}\n    persona_pk_name = persona_pk.get(\"name\")\n    persona_pk_cols = persona_pk.get(\"constrained_columns\") or []\n\n    project_pk = inspector.get_pk_constraint(\"project__user_file\") or {}\n    project_pk_name = project_pk.get(\"name\")\n    project_pk_cols = project_pk.get(\"constrained_columns\") or []\n\n    # Drop foreign keys that reference the UUID primary key\n    op.drop_constraint(\n        \"persona__user_file_user_file_id_fkey\",\n        \"persona__user_file\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"fk_project__user_file_user_file_id\",\n        \"project__user_file\",\n        type_=\"foreignkey\",\n    )\n\n    # Drop primary keys that rely on the UUID column so we can replace it\n    if persona_pk_name:\n        op.drop_constraint(persona_pk_name, \"persona__user_file\", type_=\"primary\")\n    if project_pk_name:\n        op.drop_constraint(project_pk_name, \"project__user_file\", type_=\"primary\")\n\n    # Rebuild integer IDs on user_file using a sequence-backed column\n    op.execute(\"CREATE SEQUENCE IF NOT EXISTS user_file_id_seq\")\n    op.add_column(\n        \"user_file\",\n        sa.Column(\n            \"id_int\",\n            sa.Integer(),\n            server_default=sa.text(\"nextval('user_file_id_seq')\"),\n            nullable=False,\n        ),\n    )\n    op.execute(\"ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id_int\")\n\n    # Prepare integer foreign key columns on referencing tables\n    op.add_column(\n        \"persona__user_file\",\n        sa.Column(\"user_file_id_int\", sa.Integer(), nullable=True),\n    )\n    op.add_column(\n        \"project__user_file\",\n        sa.Column(\"user_file_id_int\", sa.Integer(), nullable=True),\n    )\n\n    # Populate the new integer foreign key columns by mapping from the UUID IDs\n    op.execute(\n        \"\"\"\n        UPDATE persona__user_file AS p\n        SET user_file_id_int = uf.id_int\n        FROM user_file AS uf\n        WHERE p.user_file_id = uf.id\n        \"\"\"\n    )\n    op.execute(\n        \"\"\"\n        UPDATE project__user_file AS p\n        SET user_file_id_int = uf.id_int\n        FROM user_file AS uf\n        WHERE p.user_file_id = uf.id\n        \"\"\"\n    )\n\n    op.alter_column(\n        \"persona__user_file\",\n        \"user_file_id_int\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n    op.alter_column(\n        \"project__user_file\",\n        \"user_file_id_int\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n\n    # Remove the UUID foreign key columns and rename the integer replacements\n    op.drop_column(\"persona__user_file\", \"user_file_id\")\n    op.alter_column(\n        \"persona__user_file\",\n        \"user_file_id_int\",\n        new_column_name=\"user_file_id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n\n    op.drop_column(\"project__user_file\", \"user_file_id\")\n    op.alter_column(\n        \"project__user_file\",\n        \"user_file_id_int\",\n        new_column_name=\"user_file_id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n\n    # Swap the user_file primary key back to the integer column\n    op.drop_constraint(\"user_file_pkey\", \"user_file\", type_=\"primary\")\n    op.drop_column(\"user_file\", \"id\")\n    op.alter_column(\n        \"user_file\",\n        \"id_int\",\n        new_column_name=\"id\",\n        existing_type=sa.Integer(),\n    )\n    op.alter_column(\n        \"user_file\",\n        \"id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n        server_default=sa.text(\"nextval('user_file_id_seq')\"),\n    )\n    op.execute(\"ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id\")\n    op.execute(\n        \"\"\"\n        SELECT setval(\n            'user_file_id_seq',\n            GREATEST(COALESCE(MAX(id), 1), 1),\n            MAX(id) IS NOT NULL\n        )\n        FROM user_file\n        \"\"\"\n    )\n    op.create_primary_key(\"user_file_pkey\", \"user_file\", [\"id\"])\n\n    # Restore primary keys on referencing tables\n    if persona_pk_cols:\n        op.create_primary_key(\n            \"persona__user_file_pkey\", \"persona__user_file\", persona_pk_cols\n        )\n    if project_pk_cols:\n        op.create_primary_key(\n            \"project__user_file_pkey\",\n            \"project__user_file\",\n            project_pk_cols,\n        )\n\n    # Recreate foreign keys pointing at the integer primary key\n    op.create_foreign_key(\n        \"persona__user_file_user_file_id_fkey\",\n        \"persona__user_file\",\n        \"user_file\",\n        [\"user_file_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"fk_project__user_file_user_file_id\",\n        \"project__user_file\",\n        \"user_file\",\n        [\"user_file_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py",
    "content": "\"\"\"Store Chat Retrieval Docs\n\nRevision ID: 7ccea01261f6\nRevises: a570b80a5f20\nCreate Date: 2023-10-15 10:39:23.317453\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"7ccea01261f6\"\ndown_revision = \"a570b80a5f20\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"reference_docs\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"reference_docs\")\n"
  },
  {
    "path": "backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py",
    "content": "\"\"\"Add description to persona\n\nRevision ID: 7da0ae5ad583\nRevises: e86866a9c78a\nCreate Date: 2023-11-27 00:16:19.959414\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"7da0ae5ad583\"\ndown_revision = \"e86866a9c78a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"persona\", sa.Column(\"description\", sa.String(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"description\")\n"
  },
  {
    "path": "backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py",
    "content": "\"\"\"Add SlackBotConfig table\n\nRevision ID: 7da543f5672f\nRevises: febe9eaa0644\nCreate Date: 2023-09-24 16:34:17.526128\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"7da543f5672f\"\ndown_revision = \"febe9eaa0644\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"slack_bot_config\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"channel_config\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"slack_bot_config\")\n"
  },
  {
    "path": "backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py",
    "content": "\"\"\"nullify_default_system_prompt\n\nRevision ID: 7e490836d179\nRevises: c1d2e3f4a5b6\nCreate Date: 2025-12-29 16:54:36.635574\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7e490836d179\"\ndown_revision = \"c1d2e3f4a5b6\"\nbranch_labels = None\ndepends_on = None\n\n\n# This is the default system prompt from the previous migration (87c52ec39f84)\n# ruff: noqa: E501, W605 start\nPREVIOUS_DEFAULT_SYSTEM_PROMPT = \"\"\"\nYou are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.\n\nThe current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]\n\n# Response Style\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\"\"\".lstrip()\n# ruff: noqa: E501, W605 end\n\n\ndef upgrade() -> None:\n    # Make system_prompt column nullable (model already has nullable=True but DB doesn't)\n    op.alter_column(\n        \"persona\",\n        \"system_prompt\",\n        nullable=True,\n    )\n\n    # Set system_prompt to NULL where it matches the previous default\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET system_prompt = NULL\n            WHERE system_prompt = :previous_default\n            \"\"\"\n        ),\n        {\"previous_default\": PREVIOUS_DEFAULT_SYSTEM_PROMPT},\n    )\n\n\ndef downgrade() -> None:\n    # Restore the default system prompt for personas that have NULL\n    # Note: This may restore the prompt to personas that originally had NULL\n    # before this migration, but there's no way to distinguish them\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET system_prompt = :previous_default\n            WHERE system_prompt IS NULL\n            \"\"\"\n        ),\n        {\"previous_default\": PREVIOUS_DEFAULT_SYSTEM_PROMPT},\n    )\n\n    # Revert system_prompt column to not nullable\n    op.alter_column(\n        \"persona\",\n        \"system_prompt\",\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py",
    "content": "\"\"\"add_mcp_server_and_connection_config_models\n\nRevision ID: 7ed603b64d5a\nRevises: b329d00a9ea6\nCreate Date: 2025-07-28 17:35:59.900680\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom onyx.db.enums import MCPAuthenticationType\n\n# revision identifiers, used by Alembic.\nrevision = \"7ed603b64d5a\"\ndown_revision = \"b329d00a9ea6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Create tables and columns for MCP Server support\"\"\"\n\n    # 1. MCP Server main table (no FK constraints yet to avoid circular refs)\n    op.create_table(\n        \"mcp_server\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"owner\", sa.String(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.String(), nullable=True),\n        sa.Column(\"server_url\", sa.String(), nullable=False),\n        sa.Column(\n            \"auth_type\",\n            sa.Enum(\n                MCPAuthenticationType,\n                name=\"mcp_authentication_type\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"admin_connection_config_id\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n    )\n\n    # 2. MCP Connection Config table (can reference mcp_server now that it exists)\n    op.create_table(\n        \"mcp_connection_config\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"mcp_server_id\", sa.Integer(), nullable=True),\n        sa.Column(\"user_email\", sa.String(), nullable=False, default=\"\"),\n        sa.Column(\"config\", sa.LargeBinary(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"mcp_server_id\"], [\"mcp_server.id\"], ondelete=\"CASCADE\"\n        ),\n    )\n\n    # Helpful indexes\n    op.create_index(\n        \"ix_mcp_connection_config_server_user\",\n        \"mcp_connection_config\",\n        [\"mcp_server_id\", \"user_email\"],\n    )\n    op.create_index(\n        \"ix_mcp_connection_config_user_email\",\n        \"mcp_connection_config\",\n        [\"user_email\"],\n    )\n\n    # 3. Add the back-references from mcp_server to connection configs\n    op.create_foreign_key(\n        \"mcp_server_admin_config_fk\",\n        \"mcp_server\",\n        \"mcp_connection_config\",\n        [\"admin_connection_config_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n    # 4. Association / access-control tables\n    op.create_table(\n        \"mcp_server__user\",\n        sa.Column(\"mcp_server_id\", sa.Integer(), primary_key=True),\n        sa.Column(\"user_id\", sa.UUID(), primary_key=True),\n        sa.ForeignKeyConstraint(\n            [\"mcp_server_id\"], [\"mcp_server.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n    )\n\n    op.create_table(\n        \"mcp_server__user_group\",\n        sa.Column(\"mcp_server_id\", sa.Integer(), primary_key=True),\n        sa.Column(\"user_group_id\", sa.Integer(), primary_key=True),\n        sa.ForeignKeyConstraint(\n            [\"mcp_server_id\"], [\"mcp_server.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"user_group_id\"], [\"user_group.id\"]),\n    )\n\n    # 5. Update existing `tool` table – allow tools to belong to an MCP server\n    op.add_column(\n        \"tool\",\n        sa.Column(\"mcp_server_id\", sa.Integer(), nullable=True),\n    )\n    # Add column for MCP tool input schema\n    op.add_column(\n        \"tool\",\n        sa.Column(\"mcp_input_schema\", postgresql.JSONB(), nullable=True),\n    )\n    op.create_foreign_key(\n        \"tool_mcp_server_fk\",\n        \"tool\",\n        \"mcp_server\",\n        [\"mcp_server_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # 6. Update persona__tool foreign keys to cascade delete\n    # This ensures that when a tool is deleted (including via MCP server deletion),\n    # the corresponding persona__tool rows are also deleted\n    op.drop_constraint(\n        \"persona__tool_tool_id_fkey\", \"persona__tool\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\n        \"persona__tool_persona_id_fkey\", \"persona__tool\", type_=\"foreignkey\"\n    )\n\n    op.create_foreign_key(\n        \"persona__tool_persona_id_fkey\",\n        \"persona__tool\",\n        \"persona\",\n        [\"persona_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"persona__tool_tool_id_fkey\",\n        \"persona__tool\",\n        \"tool\",\n        [\"tool_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # 7. Update research_agent_iteration_sub_step foreign key to SET NULL on delete\n    # This ensures that when a tool is deleted, the sub_step_tool_id is set to NULL\n    # instead of causing a foreign key constraint violation\n    op.drop_constraint(\n        \"research_agent_iteration_sub_step_sub_step_tool_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        type_=\"foreignkey\",\n    )\n    op.create_foreign_key(\n        \"research_agent_iteration_sub_step_sub_step_tool_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        \"tool\",\n        [\"sub_step_tool_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n\ndef downgrade() -> None:\n    \"\"\"Drop all MCP-related tables / columns\"\"\"\n\n    # # # 1. Drop FK & columns from tool\n    # op.drop_constraint(\"tool_mcp_server_fk\", \"tool\", type_=\"foreignkey\")\n    op.execute(\"DELETE FROM tool WHERE mcp_server_id IS NOT NULL\")\n\n    op.drop_constraint(\n        \"research_agent_iteration_sub_step_sub_step_tool_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        type_=\"foreignkey\",\n    )\n    op.create_foreign_key(\n        \"research_agent_iteration_sub_step_sub_step_tool_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        \"tool\",\n        [\"sub_step_tool_id\"],\n        [\"id\"],\n    )\n\n    # Restore original persona__tool foreign keys (without CASCADE)\n    op.drop_constraint(\n        \"persona__tool_persona_id_fkey\", \"persona__tool\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\n        \"persona__tool_tool_id_fkey\", \"persona__tool\", type_=\"foreignkey\"\n    )\n\n    op.create_foreign_key(\n        \"persona__tool_persona_id_fkey\",\n        \"persona__tool\",\n        \"persona\",\n        [\"persona_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"persona__tool_tool_id_fkey\",\n        \"persona__tool\",\n        \"tool\",\n        [\"tool_id\"],\n        [\"id\"],\n    )\n    op.drop_column(\"tool\", \"mcp_input_schema\")\n    op.drop_column(\"tool\", \"mcp_server_id\")\n\n    # 2. Drop association tables\n    op.drop_table(\"mcp_server__user_group\")\n    op.drop_table(\"mcp_server__user\")\n\n    # 3. Drop FK from mcp_server to connection configs\n    op.drop_constraint(\"mcp_server_admin_config_fk\", \"mcp_server\", type_=\"foreignkey\")\n\n    # 4. Drop connection config indexes & table\n    op.drop_index(\n        \"ix_mcp_connection_config_user_email\", table_name=\"mcp_connection_config\"\n    )\n    op.drop_index(\n        \"ix_mcp_connection_config_server_user\", table_name=\"mcp_connection_config\"\n    )\n    op.drop_table(\"mcp_connection_config\")\n\n    # 5. Finally drop mcp_server table\n    op.drop_table(\"mcp_server\")\n"
  },
  {
    "path": "backend/alembic/versions/7f726bad5367_slack_followup.py",
    "content": "\"\"\"Slack Followup\n\nRevision ID: 7f726bad5367\nRevises: 79acd316403a\nCreate Date: 2024-01-15 00:19:55.991224\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"7f726bad5367\"\ndown_revision = \"79acd316403a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\"required_followup\", sa.Boolean(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_feedback\", \"required_followup\")\n"
  },
  {
    "path": "backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py",
    "content": "\"\"\"Add index for getting documents just by connector id / credential id\n\nRevision ID: 7f99be1cb9f5\nRevises: 78dbe7e38469\nCreate Date: 2023-10-15 22:48:15.487762\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"7f99be1cb9f5\"\ndown_revision = \"78dbe7e38469\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        op.f(\n            \"ix_document_by_connector_credential_pair_pkey__connector_id__credential_id\"\n        ),\n        \"document_by_connector_credential_pair\",\n        [\"connector_id\", \"credential_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        op.f(\n            \"ix_document_by_connector_credential_pair_pkey__connector_id__credential_id\"\n        ),\n        table_name=\"document_by_connector_credential_pair\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py",
    "content": "\"\"\"Add ID to ConnectorCredentialPair\n\nRevision ID: 800f48024ae9\nRevises: 767f1c2a00eb\nCreate Date: 2023-09-19 16:13:42.299715\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.schema import Sequence, CreateSequence\n\n# revision identifiers, used by Alembic.\nrevision = \"800f48024ae9\"\ndown_revision = \"767f1c2a00eb\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    sequence = Sequence(\"connector_credential_pair_id_seq\")\n    op.execute(CreateSequence(sequence))  # type: ignore\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"id\", sa.Integer(), nullable=True, server_default=sequence.next_value()\n        ),\n    )\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"name\", sa.String(), nullable=True),\n    )\n\n    # fill in IDs for existing rows\n    op.execute(\n        \"UPDATE connector_credential_pair SET id = nextval('connector_credential_pair_id_seq') WHERE id IS NULL\"\n    )\n    op.alter_column(\"connector_credential_pair\", \"id\", nullable=False)\n\n    op.create_unique_constraint(\n        \"connector_credential_pair__name__key\", \"connector_credential_pair\", [\"name\"]\n    )\n    op.create_unique_constraint(\n        \"connector_credential_pair__id__key\", \"connector_credential_pair\", [\"id\"]\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\n        \"connector_credential_pair__name__key\",\n        \"connector_credential_pair\",\n        type_=\"unique\",\n    )\n    op.drop_constraint(\n        \"connector_credential_pair__id__key\",\n        \"connector_credential_pair\",\n        type_=\"unique\",\n    )\n    op.drop_column(\"connector_credential_pair\", \"name\")\n    op.drop_column(\"connector_credential_pair\", \"id\")\n    op.execute(\"DROP SEQUENCE connector_credential_pair_id_seq\")\n"
  },
  {
    "path": "backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py",
    "content": "\"\"\"Add chat session to query_event\n\nRevision ID: 80696cf850ae\nRevises: 15326fcec57e\nCreate Date: 2023-11-26 02:38:35.008070\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"80696cf850ae\"\ndown_revision = \"15326fcec57e\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"query_event\",\n        sa.Column(\"chat_session_id\", sa.Integer(), nullable=True),\n    )\n    op.create_foreign_key(\n        \"fk_query_event_chat_session_id\",\n        \"query_event\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\n        \"fk_query_event_chat_session_id\", \"query_event\", type_=\"foreignkey\"\n    )\n    op.drop_column(\"query_event\", \"chat_session_id\")\n"
  },
  {
    "path": "backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py",
    "content": "\"\"\"csv to tabular chat file type\n\nRevision ID: 8188861f4e92\nRevises: d8cdfee5df80\nCreate Date: 2026-03-31 19:23:05.753184\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8188861f4e92\"\ndown_revision = \"d8cdfee5df80\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE chat_message\n        SET files = (\n            SELECT jsonb_agg(\n                CASE\n                    WHEN elem->>'type' = 'csv'\n                    THEN jsonb_set(elem, '{type}', '\"tabular\"')\n                    ELSE elem\n                END\n            )\n            FROM jsonb_array_elements(files) AS elem\n        )\n        WHERE files::text LIKE '%\"type\": \"csv\"%'\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE chat_message\n        SET files = (\n            SELECT jsonb_agg(\n                CASE\n                    WHEN elem->>'type' = 'tabular'\n                    THEN jsonb_set(elem, '{type}', '\"csv\"')\n                    ELSE elem\n                END\n            )\n            FROM jsonb_array_elements(files) AS elem\n        )\n        WHERE files::text LIKE '%\"type\": \"tabular\"%'\n        \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/81c22b1e2e78_hierarchy_nodes_v1.py",
    "content": "\"\"\"hierarchy_nodes_v1\n\nRevision ID: 81c22b1e2e78\nRevises: 72aa7de2e5cf\nCreate Date: 2026-01-13 18:10:01.021451\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\nfrom onyx.configs.constants import DocumentSource\n\n\n# revision identifiers, used by Alembic.\nrevision = \"81c22b1e2e78\"\ndown_revision = \"72aa7de2e5cf\"\nbranch_labels = None\ndepends_on = None\n\n\n# Human-readable display names for each source\nSOURCE_DISPLAY_NAMES: dict[str, str] = {\n    \"ingestion_api\": \"Ingestion API\",\n    \"slack\": \"Slack\",\n    \"web\": \"Web\",\n    \"google_drive\": \"Google Drive\",\n    \"gmail\": \"Gmail\",\n    \"requesttracker\": \"Request Tracker\",\n    \"github\": \"GitHub\",\n    \"gitbook\": \"GitBook\",\n    \"gitlab\": \"GitLab\",\n    \"guru\": \"Guru\",\n    \"bookstack\": \"BookStack\",\n    \"outline\": \"Outline\",\n    \"confluence\": \"Confluence\",\n    \"jira\": \"Jira\",\n    \"slab\": \"Slab\",\n    \"productboard\": \"Productboard\",\n    \"file\": \"File\",\n    \"coda\": \"Coda\",\n    \"notion\": \"Notion\",\n    \"zulip\": \"Zulip\",\n    \"linear\": \"Linear\",\n    \"hubspot\": \"HubSpot\",\n    \"document360\": \"Document360\",\n    \"gong\": \"Gong\",\n    \"google_sites\": \"Google Sites\",\n    \"zendesk\": \"Zendesk\",\n    \"loopio\": \"Loopio\",\n    \"dropbox\": \"Dropbox\",\n    \"sharepoint\": \"SharePoint\",\n    \"teams\": \"Teams\",\n    \"salesforce\": \"Salesforce\",\n    \"discourse\": \"Discourse\",\n    \"axero\": \"Axero\",\n    \"clickup\": \"ClickUp\",\n    \"mediawiki\": \"MediaWiki\",\n    \"wikipedia\": \"Wikipedia\",\n    \"asana\": \"Asana\",\n    \"s3\": \"S3\",\n    \"r2\": \"R2\",\n    \"google_cloud_storage\": \"Google Cloud Storage\",\n    \"oci_storage\": \"OCI Storage\",\n    \"xenforo\": \"XenForo\",\n    \"not_applicable\": \"Not Applicable\",\n    \"discord\": \"Discord\",\n    \"freshdesk\": \"Freshdesk\",\n    \"fireflies\": \"Fireflies\",\n    \"egnyte\": \"Egnyte\",\n    \"airtable\": \"Airtable\",\n    \"highspot\": \"Highspot\",\n    \"drupal_wiki\": \"Drupal Wiki\",\n    \"imap\": \"IMAP\",\n    \"bitbucket\": \"Bitbucket\",\n    \"testrail\": \"TestRail\",\n    \"mock_connector\": \"Mock Connector\",\n    \"user_file\": \"User File\",\n}\n\n\ndef upgrade() -> None:\n    # 1. Create hierarchy_node table\n    op.create_table(\n        \"hierarchy_node\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"raw_node_id\", sa.String(), nullable=False),\n        sa.Column(\"display_name\", sa.String(), nullable=False),\n        sa.Column(\"link\", sa.String(), nullable=True),\n        sa.Column(\"source\", sa.String(), nullable=False),\n        sa.Column(\"node_type\", sa.String(), nullable=False),\n        sa.Column(\"document_id\", sa.String(), nullable=True),\n        sa.Column(\"parent_id\", sa.Integer(), nullable=True),\n        # Permission fields - same pattern as Document table\n        sa.Column(\n            \"external_user_emails\",\n            postgresql.ARRAY(sa.String()),\n            nullable=True,\n        ),\n        sa.Column(\n            \"external_user_group_ids\",\n            postgresql.ARRAY(sa.String()),\n            nullable=True,\n        ),\n        sa.Column(\"is_public\", sa.Boolean(), nullable=False, server_default=\"false\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        # When document is deleted, just unlink (node can exist without document)\n        sa.ForeignKeyConstraint([\"document_id\"], [\"document.id\"], ondelete=\"SET NULL\"),\n        # When parent node is deleted, orphan children (cleanup via pruning)\n        sa.ForeignKeyConstraint(\n            [\"parent_id\"], [\"hierarchy_node.id\"], ondelete=\"SET NULL\"\n        ),\n        sa.UniqueConstraint(\n            \"raw_node_id\", \"source\", name=\"uq_hierarchy_node_raw_id_source\"\n        ),\n    )\n    op.create_index(\"ix_hierarchy_node_parent_id\", \"hierarchy_node\", [\"parent_id\"])\n    op.create_index(\n        \"ix_hierarchy_node_source_type\", \"hierarchy_node\", [\"source\", \"node_type\"]\n    )\n\n    # Add partial unique index to ensure only one SOURCE-type node per source\n    # This prevents duplicate source root nodes from being created\n    # NOTE: node_type stores enum NAME ('SOURCE'), not value ('source')\n    op.execute(\n        sa.text(\n            \"\"\"\n            CREATE UNIQUE INDEX uq_hierarchy_node_one_source_per_type\n            ON hierarchy_node (source)\n            WHERE node_type = 'SOURCE'\n            \"\"\"\n        )\n    )\n\n    # 2. Create hierarchy_fetch_attempt table\n    op.create_table(\n        \"hierarchy_fetch_attempt\",\n        sa.Column(\"id\", postgresql.UUID(as_uuid=True), nullable=False),\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=False),\n        sa.Column(\"status\", sa.String(), nullable=False),\n        sa.Column(\"nodes_fetched\", sa.Integer(), nullable=True, server_default=\"0\"),\n        sa.Column(\"nodes_updated\", sa.Integer(), nullable=True, server_default=\"0\"),\n        sa.Column(\"error_msg\", sa.Text(), nullable=True),\n        sa.Column(\"full_exception_trace\", sa.Text(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\"time_started\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.ForeignKeyConstraint(\n            [\"connector_credential_pair_id\"],\n            [\"connector_credential_pair.id\"],\n            ondelete=\"CASCADE\",\n        ),\n    )\n    op.create_index(\n        \"ix_hierarchy_fetch_attempt_status\", \"hierarchy_fetch_attempt\", [\"status\"]\n    )\n    op.create_index(\n        \"ix_hierarchy_fetch_attempt_time_created\",\n        \"hierarchy_fetch_attempt\",\n        [\"time_created\"],\n    )\n    op.create_index(\n        \"ix_hierarchy_fetch_attempt_cc_pair\",\n        \"hierarchy_fetch_attempt\",\n        [\"connector_credential_pair_id\"],\n    )\n\n    # 3. Insert SOURCE-type hierarchy nodes for each DocumentSource\n    # We insert these so every existing document can have a parent hierarchy node\n    # NOTE: SQLAlchemy's Enum with native_enum=False stores the enum NAME (e.g., 'GOOGLE_DRIVE'),\n    # not the VALUE (e.g., 'google_drive'). We must use .name for source and node_type columns.\n    # SOURCE nodes are always public since they're just categorical roots.\n    for source in DocumentSource:\n        source_name = (\n            source.name\n        )  # e.g., 'GOOGLE_DRIVE' - what SQLAlchemy stores/expects\n        source_value = source.value  # e.g., 'google_drive' - the raw_node_id\n        display_name = SOURCE_DISPLAY_NAMES.get(\n            source_value, source_value.replace(\"_\", \" \").title()\n        )\n        op.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO hierarchy_node (raw_node_id, display_name, source, node_type, parent_id, is_public)\n                VALUES (:raw_node_id, :display_name, :source, 'SOURCE', NULL, true)\n                ON CONFLICT (raw_node_id, source) DO NOTHING\n                \"\"\"\n            ).bindparams(\n                raw_node_id=source_value,  # Use .value for raw_node_id (human-readable identifier)\n                display_name=display_name,\n                source=source_name,  # Use .name for source column (SQLAlchemy enum storage)\n            )\n        )\n\n    # 4. Add parent_hierarchy_node_id column to document table\n    op.add_column(\n        \"document\",\n        sa.Column(\"parent_hierarchy_node_id\", sa.Integer(), nullable=True),\n    )\n    # When hierarchy node is deleted, just unlink the document (SET NULL)\n    op.create_foreign_key(\n        \"fk_document_parent_hierarchy_node\",\n        \"document\",\n        \"hierarchy_node\",\n        [\"parent_hierarchy_node_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n    op.create_index(\n        \"ix_document_parent_hierarchy_node_id\",\n        \"document\",\n        [\"parent_hierarchy_node_id\"],\n    )\n\n    # 5. Set all existing documents' parent_hierarchy_node_id to their source's SOURCE node\n    # For documents with multiple connectors, we pick one source deterministically (MIN connector_id)\n    # NOTE: Both connector.source and hierarchy_node.source store enum NAMEs (e.g., 'GOOGLE_DRIVE')\n    # because SQLAlchemy Enum(native_enum=False) uses the enum name for storage.\n    op.execute(\n        sa.text(\n            \"\"\"\n            UPDATE document d\n            SET parent_hierarchy_node_id = hn.id\n            FROM (\n                -- Get the source for each document (pick MIN connector_id for determinism)\n                SELECT DISTINCT ON (dbcc.id)\n                    dbcc.id as doc_id,\n                    c.source as source\n                FROM document_by_connector_credential_pair dbcc\n                JOIN connector c ON dbcc.connector_id = c.id\n                ORDER BY dbcc.id, dbcc.connector_id\n            ) doc_source\n            JOIN hierarchy_node hn ON hn.source = doc_source.source AND hn.node_type = 'SOURCE'\n            WHERE d.id = doc_source.doc_id\n            \"\"\"\n        )\n    )\n\n    # Create the persona__hierarchy_node association table\n    op.create_table(\n        \"persona__hierarchy_node\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"hierarchy_node_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"hierarchy_node_id\"],\n            [\"hierarchy_node.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"hierarchy_node_id\"),\n    )\n\n    # Add index for efficient lookups\n    op.create_index(\n        \"ix_persona__hierarchy_node_hierarchy_node_id\",\n        \"persona__hierarchy_node\",\n        [\"hierarchy_node_id\"],\n    )\n\n    # Create the persona__document association table for attaching individual\n    # documents directly to assistants\n    op.create_table(\n        \"persona__document\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"],\n            [\"document.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"document_id\"),\n    )\n\n    # Add index for efficient lookups by document_id\n    op.create_index(\n        \"ix_persona__document_document_id\",\n        \"persona__document\",\n        [\"document_id\"],\n    )\n\n    # 6. Add last_time_hierarchy_fetch column to connector_credential_pair table\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"last_time_hierarchy_fetch\", sa.DateTime(timezone=True), nullable=True\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Remove last_time_hierarchy_fetch from connector_credential_pair\n    op.drop_column(\"connector_credential_pair\", \"last_time_hierarchy_fetch\")\n\n    # Drop persona__document table\n    op.drop_index(\"ix_persona__document_document_id\", table_name=\"persona__document\")\n    op.drop_table(\"persona__document\")\n\n    # Drop persona__hierarchy_node table\n    op.drop_index(\n        \"ix_persona__hierarchy_node_hierarchy_node_id\",\n        table_name=\"persona__hierarchy_node\",\n    )\n    op.drop_table(\"persona__hierarchy_node\")\n\n    # Remove parent_hierarchy_node_id from document\n    op.drop_index(\"ix_document_parent_hierarchy_node_id\", table_name=\"document\")\n    op.drop_constraint(\n        \"fk_document_parent_hierarchy_node\", \"document\", type_=\"foreignkey\"\n    )\n    op.drop_column(\"document\", \"parent_hierarchy_node_id\")\n\n    # Drop hierarchy_fetch_attempt table\n    op.drop_index(\n        \"ix_hierarchy_fetch_attempt_cc_pair\", table_name=\"hierarchy_fetch_attempt\"\n    )\n    op.drop_index(\n        \"ix_hierarchy_fetch_attempt_time_created\", table_name=\"hierarchy_fetch_attempt\"\n    )\n    op.drop_index(\n        \"ix_hierarchy_fetch_attempt_status\", table_name=\"hierarchy_fetch_attempt\"\n    )\n    op.drop_table(\"hierarchy_fetch_attempt\")\n\n    # Drop hierarchy_node table\n    op.drop_index(\"uq_hierarchy_node_one_source_per_type\", table_name=\"hierarchy_node\")\n    op.drop_index(\"ix_hierarchy_node_source_type\", table_name=\"hierarchy_node\")\n    op.drop_index(\"ix_hierarchy_node_parent_id\", table_name=\"hierarchy_node\")\n    op.drop_table(\"hierarchy_node\")\n"
  },
  {
    "path": "backend/alembic/versions/8405ca81cc83_notifications_constraint.py",
    "content": "\"\"\"notifications constraint, sort index, and cleanup old notifications\n\nRevision ID: 8405ca81cc83\nRevises: a3c1a7904cd0\nCreate Date: 2026-01-07 16:43:44.855156\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8405ca81cc83\"\ndown_revision = \"a3c1a7904cd0\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create unique index for notification deduplication.\n    # This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.\n    #\n    # Uses COALESCE to handle NULL additional_data (NULLs are normally distinct\n    # in unique constraints, but we want NULL == NULL for deduplication).\n    # The '{}' represents an empty JSONB object as the NULL replacement.\n\n    # Clean up legacy notifications first\n    op.execute(\"DELETE FROM notification WHERE title = 'New Notification'\")\n\n    op.execute(\n        \"\"\"\n        CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data\n        ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))\n        \"\"\"\n    )\n\n    # Create index for efficient notification sorting by user\n    # Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC\n    op.execute(\n        \"\"\"\n        CREATE INDEX IF NOT EXISTS ix_notification_user_sort\n        ON notification (user_id, dismissed, first_shown DESC)\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\"DROP INDEX IF EXISTS ix_notification_user_type_data\")\n    op.execute(\"DROP INDEX IF EXISTS ix_notification_user_sort\")\n"
  },
  {
    "path": "backend/alembic/versions/849b21c732f8_add_demo_data_enabled_to_build_session.py",
    "content": "\"\"\"add demo_data_enabled to build_session\n\nRevision ID: 849b21c732f8\nRevises: 81c22b1e2e78\nCreate Date: 2026-01-28 10:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"849b21c732f8\"\ndown_revision = \"81c22b1e2e78\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"build_session\",\n        sa.Column(\n            \"demo_data_enabled\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.text(\"true\"),\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"build_session\", \"demo_data_enabled\")\n"
  },
  {
    "path": "backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py",
    "content": "\"\"\"update_default_system_prompt\n\nRevision ID: 87c52ec39f84\nRevises: 7bd55f264e1b\nCreate Date: 2025-12-05 15:54:06.002452\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"87c52ec39f84\"\ndown_revision = \"7bd55f264e1b\"\nbranch_labels = None\ndepends_on = None\n\n\nDEFAULT_PERSONA_ID = 0\n\n# ruff: noqa: E501, W605 start\nDEFAULT_SYSTEM_PROMPT = \"\"\"\nYou are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.\n\nThe current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]\n\n# Response Style\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\"\"\".lstrip()\n# ruff: noqa: E501, W605 end\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET system_prompt = :system_prompt\n            WHERE id = :persona_id\n            \"\"\"\n        ),\n        {\"system_prompt\": DEFAULT_SYSTEM_PROMPT, \"persona_id\": DEFAULT_PERSONA_ID},\n    )\n\n\ndef downgrade() -> None:\n    # We don't revert the system prompt on downgrade since we don't know\n    # what the previous value was. The new prompt is a reasonable default.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/8818cf73fa1a_drop_include_citations.py",
    "content": "\"\"\"drop include citations\n\nRevision ID: 8818cf73fa1a\nRevises: 7ed603b64d5a\nCreate Date: 2025-09-02 19:43:50.060680\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"8818cf73fa1a\"\ndown_revision = \"7ed603b64d5a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"prompt\", \"include_citations\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"prompt\",\n        sa.Column(\n            \"include_citations\",\n            sa.BOOLEAN(),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    # Set include_citations based on prompt name: FALSE for ImageGeneration, TRUE for others\n    op.execute(\n        sa.text(\n            \"UPDATE prompt SET include_citations = CASE WHEN name = 'ImageGeneration' THEN FALSE ELSE TRUE END\"\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py",
    "content": "\"\"\"Add is_visible to Persona\n\nRevision ID: 891cd83c87a8\nRevises: 76b60d407dfb\nCreate Date: 2023-12-21 11:55:54.132279\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"891cd83c87a8\"\ndown_revision = \"76b60d407dfb\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"is_visible\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\"UPDATE persona SET is_visible = true\")\n    op.alter_column(\"persona\", \"is_visible\", nullable=False)\n\n    op.add_column(\n        \"persona\",\n        sa.Column(\"display_priority\", sa.Integer(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"is_visible\")\n    op.drop_column(\"persona\", \"display_priority\")\n"
  },
  {
    "path": "backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py",
    "content": "\"\"\"Add full exception stack trace\n\nRevision ID: 8987770549c0\nRevises: ec3ec2eabf7b\nCreate Date: 2024-02-10 19:31:28.339135\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"8987770549c0\"\ndown_revision = \"ec3ec2eabf7b\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\", sa.Column(\"full_exception_trace\", sa.Text(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"index_attempt\", \"full_exception_trace\")\n"
  },
  {
    "path": "backend/alembic/versions/8a87bd6ec550_associate_index_attempts_with_ccpair.py",
    "content": "\"\"\"associate index attempts with ccpair\n\nRevision ID: 8a87bd6ec550\nRevises: 4ea2c93919c1\nCreate Date: 2024-07-22 15:15:52.558451\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"8a87bd6ec550\"\ndown_revision = \"4ea2c93919c1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Add the new connector_credential_pair_id column\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=True),\n    )\n\n    # Create a foreign key constraint to the connector_credential_pair table\n    op.create_foreign_key(\n        \"fk_index_attempt_connector_credential_pair_id\",\n        \"index_attempt\",\n        \"connector_credential_pair\",\n        [\"connector_credential_pair_id\"],\n        [\"id\"],\n    )\n\n    # Populate the new connector_credential_pair_id column using existing connector_id and credential_id\n    op.execute(\n        \"\"\"\n        UPDATE index_attempt ia\n        SET connector_credential_pair_id = (\n            SELECT id FROM connector_credential_pair ccp\n            WHERE\n                (ia.connector_id IS NULL OR ccp.connector_id = ia.connector_id)\n                AND (ia.credential_id IS NULL OR ccp.credential_id = ia.credential_id)\n            LIMIT 1\n        )\n        WHERE ia.connector_id IS NOT NULL OR ia.credential_id IS NOT NULL\n        \"\"\"\n    )\n\n    # For good measure\n    op.execute(\n        \"\"\"\n        DELETE FROM index_attempt\n        WHERE connector_credential_pair_id IS NULL\n        \"\"\"\n    )\n\n    # Make the new connector_credential_pair_id column non-nullable\n    op.alter_column(\"index_attempt\", \"connector_credential_pair_id\", nullable=False)\n\n    # Drop the old connector_id and credential_id columns\n    op.drop_column(\"index_attempt\", \"connector_id\")\n    op.drop_column(\"index_attempt\", \"credential_id\")\n\n    # Update the index to use connector_credential_pair_id\n    op.create_index(\n        \"ix_index_attempt_latest_for_connector_credential_pair\",\n        \"index_attempt\",\n        [\"connector_credential_pair_id\", \"time_created\"],\n    )\n\n\ndef downgrade() -> None:\n    # Add back the old connector_id and credential_id columns\n    op.add_column(\n        \"index_attempt\", sa.Column(\"connector_id\", sa.Integer(), nullable=True)\n    )\n    op.add_column(\n        \"index_attempt\", sa.Column(\"credential_id\", sa.Integer(), nullable=True)\n    )\n\n    # Populate the old connector_id and credential_id columns using the connector_credential_pair_id\n    op.execute(\n        \"\"\"\n        UPDATE index_attempt ia\n        SET connector_id = ccp.connector_id, credential_id = ccp.credential_id\n        FROM connector_credential_pair ccp\n        WHERE ia.connector_credential_pair_id = ccp.id\n        \"\"\"\n    )\n\n    # Make the old connector_id and credential_id columns non-nullable\n    op.alter_column(\"index_attempt\", \"connector_id\", nullable=False)\n    op.alter_column(\"index_attempt\", \"credential_id\", nullable=False)\n\n    # Drop the new connector_credential_pair_id column\n    op.drop_constraint(\n        \"fk_index_attempt_connector_credential_pair_id\",\n        \"index_attempt\",\n        type_=\"foreignkey\",\n    )\n    op.drop_column(\"index_attempt\", \"connector_credential_pair_id\")\n\n    op.create_index(\n        \"ix_index_attempt_latest_for_connector_credential_pair\",\n        \"index_attempt\",\n        [\"connector_id\", \"credential_id\", \"time_created\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py",
    "content": "\"\"\"Restructure Document Indices\n\nRevision ID: 8aabb57f3b49\nRevises: 5e84129c8be3\nCreate Date: 2023-08-18 21:15:57.629515\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"8aabb57f3b49\"\ndown_revision = \"5e84129c8be3\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_table(\"chunk\")\n    op.execute(\"DROP TYPE IF EXISTS documentstoretype\")\n\n\ndef downgrade() -> None:\n    op.create_table(\n        \"chunk\",\n        sa.Column(\"id\", sa.VARCHAR(), autoincrement=False, nullable=False),\n        sa.Column(\n            \"document_store_type\",\n            postgresql.ENUM(\"VECTOR\", \"KEYWORD\", name=\"documentstoretype\"),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.Column(\"document_id\", sa.VARCHAR(), autoincrement=False, nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"], [\"document.id\"], name=\"chunk_document_id_fkey\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\", \"document_store_type\", name=\"chunk_pkey\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py",
    "content": "\"\"\"Add Discord bot tables\n\nRevision ID: 8b5ce697290e\nRevises: a1b2c3d4e5f7\nCreate Date: 2025-01-14\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"8b5ce697290e\"\ndown_revision = \"a1b2c3d4e5f7\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # DiscordBotConfig (singleton table - one per tenant)\n    op.create_table(\n        \"discord_bot_config\",\n        sa.Column(\n            \"id\",\n            sa.String(),\n            primary_key=True,\n            server_default=sa.text(\"'SINGLETON'\"),\n        ),\n        sa.Column(\"bot_token\", sa.LargeBinary(), nullable=False),  # EncryptedString\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.CheckConstraint(\"id = 'SINGLETON'\", name=\"ck_discord_bot_config_singleton\"),\n    )\n\n    # DiscordGuildConfig\n    op.create_table(\n        \"discord_guild_config\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"guild_id\", sa.BigInteger(), nullable=True, unique=True),\n        sa.Column(\"guild_name\", sa.String(), nullable=True),\n        sa.Column(\"registration_key\", sa.String(), nullable=False, unique=True),\n        sa.Column(\"registered_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"default_persona_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"persona.id\", ondelete=\"SET NULL\"),\n            nullable=True,\n        ),\n        sa.Column(\n            \"enabled\", sa.Boolean(), server_default=sa.text(\"true\"), nullable=False\n        ),\n    )\n\n    # DiscordChannelConfig\n    op.create_table(\n        \"discord_channel_config\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\n            \"guild_config_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"discord_guild_config.id\", ondelete=\"CASCADE\"),\n            nullable=False,\n        ),\n        sa.Column(\"channel_id\", sa.BigInteger(), nullable=False),\n        sa.Column(\"channel_name\", sa.String(), nullable=False),\n        sa.Column(\n            \"channel_type\",\n            sa.String(20),\n            server_default=sa.text(\"'text'\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"is_private\",\n            sa.Boolean(),\n            server_default=sa.text(\"false\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"thread_only_mode\",\n            sa.Boolean(),\n            server_default=sa.text(\"false\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"require_bot_invocation\",\n            sa.Boolean(),\n            server_default=sa.text(\"true\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"persona_override_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"persona.id\", ondelete=\"SET NULL\"),\n            nullable=True,\n        ),\n        sa.Column(\n            \"enabled\", sa.Boolean(), server_default=sa.text(\"false\"), nullable=False\n        ),\n    )\n\n    # Unique constraint: one config per channel per guild\n    op.create_unique_constraint(\n        \"uq_discord_channel_guild_channel\",\n        \"discord_channel_config\",\n        [\"guild_config_id\", \"channel_id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"discord_channel_config\")\n    op.drop_table(\"discord_guild_config\")\n    op.drop_table(\"discord_bot_config\")\n"
  },
  {
    "path": "backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py",
    "content": "\"\"\"enable contextual retrieval\n\nRevision ID: 8e1ac4f39a9f\nRevises: 9aadf32dfeb4\nCreate Date: 2024-12-20 13:29:09.918661\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8e1ac4f39a9f\"\ndown_revision = \"9aadf32dfeb4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"enable_contextual_rag\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"contextual_rag_llm_name\",\n            sa.String(),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"contextual_rag_llm_provider\",\n            sa.String(),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"search_settings\", \"enable_contextual_rag\")\n    op.drop_column(\"search_settings\", \"contextual_rag_llm_name\")\n    op.drop_column(\"search_settings\", \"contextual_rag_llm_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/8e26726b7683_chat_context_addition.py",
    "content": "\"\"\"Chat Context Addition\n\nRevision ID: 8e26726b7683\nRevises: 5809c0787398\nCreate Date: 2023-09-13 18:34:31.327944\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8e26726b7683\"\ndown_revision = \"5809c0787398\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"persona\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"system_text\", sa.Text(), nullable=True),\n        sa.Column(\"tools_text\", sa.Text(), nullable=True),\n        sa.Column(\"hint_text\", sa.Text(), nullable=True),\n        sa.Column(\"default_persona\", sa.Boolean(), nullable=False),\n        sa.Column(\"deleted\", sa.Boolean(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.add_column(\"chat_message\", sa.Column(\"persona_id\", sa.Integer(), nullable=True))\n    op.create_foreign_key(\n        \"fk_chat_message_persona_id\", \"chat_message\", \"persona\", [\"persona_id\"], [\"id\"]\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"fk_chat_message_persona_id\", \"chat_message\", type_=\"foreignkey\")\n    op.drop_column(\"chat_message\", \"persona_id\")\n    op.drop_table(\"persona\")\n"
  },
  {
    "path": "backend/alembic/versions/8f43500ee275_add_index.py",
    "content": "\"\"\"add index\n\nRevision ID: 8f43500ee275\nRevises: da42808081e3\nCreate Date: 2025-02-24 17:35:33.072714\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8f43500ee275\"\ndown_revision = \"da42808081e3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create a basic index on the lowercase message column for direct text matching\n    # Limit to 1500 characters to stay well under the 2856 byte limit of btree version 4\n    # op.execute(\n    #     \"\"\"\n    #     CREATE INDEX idx_chat_message_message_lower\n    #     ON chat_message (LOWER(substring(message, 1, 1500)))\n    #     \"\"\"\n    # )\n    pass\n\n\ndef downgrade() -> None:\n    # Drop the index\n    op.execute(\"DROP INDEX IF EXISTS idx_chat_message_message_lower;\")\n"
  },
  {
    "path": "backend/alembic/versions/8ffcc2bcfc11_add_needs_persona_sync_to_user_file.py",
    "content": "\"\"\"add needs_persona_sync to user_file\n\nRevision ID: 8ffcc2bcfc11\nRevises: 7616121f6e97\nCreate Date: 2026-02-23 10:48:48.343826\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"8ffcc2bcfc11\"\ndown_revision = \"7616121f6e97\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user_file\",\n        sa.Column(\n            \"needs_persona_sync\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.text(\"false\"),\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user_file\", \"needs_persona_sync\")\n"
  },
  {
    "path": "backend/alembic/versions/904451035c9b_store_tool_details.py",
    "content": "\"\"\"Store Tool Details\n\nRevision ID: 904451035c9b\nRevises: 3b25685ff73c\nCreate Date: 2023-10-05 12:29:26.620000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"904451035c9b\"\ndown_revision = \"3b25685ff73c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"tools\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n    )\n    op.drop_column(\"persona\", \"tools_text\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"tools_text\", sa.TEXT(), autoincrement=False, nullable=True),\n    )\n    op.drop_column(\"persona\", \"tools\")\n"
  },
  {
    "path": "backend/alembic/versions/904e5138fffb_tags.py",
    "content": "\"\"\"Tags\n\nRevision ID: 904e5138fffb\nRevises: 891cd83c87a8\nCreate Date: 2024-01-01 10:44:43.733974\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"904e5138fffb\"\ndown_revision = \"891cd83c87a8\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"tag\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"tag_key\", sa.String(), nullable=False),\n        sa.Column(\"tag_value\", sa.String(), nullable=False),\n        sa.Column(\"source\", sa.String(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\n            \"tag_key\", \"tag_value\", \"source\", name=\"_tag_key_value_source_uc\"\n        ),\n    )\n    op.create_table(\n        \"document__tag\",\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.Column(\"tag_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"],\n            [\"document.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"tag_id\"],\n            [\"tag.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"document_id\", \"tag_id\"),\n    )\n\n    op.add_column(\n        \"search_doc\",\n        sa.Column(\n            \"doc_metadata\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n    op.execute(\"UPDATE search_doc SET doc_metadata = '{}' WHERE doc_metadata IS NULL\")\n    op.alter_column(\"search_doc\", \"doc_metadata\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_table(\"document__tag\")\n    op.drop_table(\"tag\")\n    op.drop_column(\"search_doc\", \"doc_metadata\")\n"
  },
  {
    "path": "backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py",
    "content": "\"\"\"seed_default_image_gen_config\n\nRevision ID: 9087b548dd69\nRevises: 2b90f3af54b8\nCreate Date: 2026-01-05 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"9087b548dd69\"\ndown_revision = \"2b90f3af54b8\"\nbranch_labels = None\ndepends_on = None\n\n# Constants for default image generation config\n# Source: web/src/app/admin/configuration/image-generation/constants.ts\nIMAGE_PROVIDER_ID = \"openai_gpt_image_1\"\nMODEL_NAME = \"gpt-image-1\"\nPROVIDER_NAME = \"openai\"\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Check if image_generation_config table already has records\n    existing_configs = (\n        conn.execute(sa.text(\"SELECT COUNT(*) FROM image_generation_config\")).scalar()\n        or 0\n    )\n\n    if existing_configs > 0:\n        # Skip if configs already exist - user may have configured manually\n        return\n\n    # Find the first OpenAI LLM provider\n    openai_provider = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, api_key\n            FROM llm_provider\n            WHERE provider = :provider\n            ORDER BY id\n            LIMIT 1\n            \"\"\"\n        ),\n        {\"provider\": PROVIDER_NAME},\n    ).fetchone()\n\n    if not openai_provider:\n        # No OpenAI provider found - nothing to do\n        return\n\n    source_provider_id, api_key = openai_provider\n\n    # Create new LLM provider for image generation (clone only api_key)\n    result = conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO llm_provider (\n                name, provider, api_key, api_base, api_version,\n                deployment_name, default_model_name, is_public,\n                is_default_provider, is_default_vision_provider, is_auto_mode\n            )\n            VALUES (\n                :name, :provider, :api_key, NULL, NULL,\n                NULL, :default_model_name, :is_public,\n                NULL, NULL, :is_auto_mode\n            )\n            RETURNING id\n            \"\"\"\n        ),\n        {\n            \"name\": f\"Image Gen - {IMAGE_PROVIDER_ID}\",\n            \"provider\": PROVIDER_NAME,\n            \"api_key\": api_key,\n            \"default_model_name\": MODEL_NAME,\n            \"is_public\": True,\n            \"is_auto_mode\": False,\n        },\n    )\n    new_provider_id = result.scalar()\n\n    # Create model configuration\n    result = conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO model_configuration (\n                llm_provider_id, name, is_visible, max_input_tokens,\n                supports_image_input, display_name\n            )\n            VALUES (\n                :llm_provider_id, :name, :is_visible, :max_input_tokens,\n                :supports_image_input, :display_name\n            )\n            RETURNING id\n            \"\"\"\n        ),\n        {\n            \"llm_provider_id\": new_provider_id,\n            \"name\": MODEL_NAME,\n            \"is_visible\": True,\n            \"max_input_tokens\": None,\n            \"supports_image_input\": False,\n            \"display_name\": None,\n        },\n    )\n    model_config_id = result.scalar()\n\n    # Create image generation config\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO image_generation_config (\n                image_provider_id, model_configuration_id, is_default\n            )\n            VALUES (\n                :image_provider_id, :model_configuration_id, :is_default\n            )\n            \"\"\"\n        ),\n        {\n            \"image_provider_id\": IMAGE_PROVIDER_ID,\n            \"model_configuration_id\": model_config_id,\n            \"is_default\": True,\n        },\n    )\n\n\ndef downgrade() -> None:\n    # We don't remove the config on downgrade since it's safe to keep around\n    # If we upgrade again, it will be a no-op due to the existing records check\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/90b409d06e50_add_chat_compression_fields.py",
    "content": "\"\"\"add_chat_compression_fields\n\nRevision ID: 90b409d06e50\nRevises: f220515df7b4\nCreate Date: 2026-01-26 09:13:09.635427\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"90b409d06e50\"\ndown_revision = \"f220515df7b4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add last_summarized_message_id to chat_message\n    # This field marks a message as a summary and indicates the last message it covers.\n    # Summaries are branch-aware via their parent_message_id pointing to the branch.\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"last_summarized_message_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"last_summarized_message_id\")\n"
  },
  {
    "path": "backend/alembic/versions/90e3b9af7da4_tag_fix.py",
    "content": "\"\"\"tag-fix\n\nRevision ID: 90e3b9af7da4\nRevises: 62c3a055a141\nCreate Date: 2025-08-01 20:58:14.607624\n\n\"\"\"\n\nimport json\nimport logging\nimport os\n\nfrom typing import cast\nfrom typing import Generator\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.db.search_settings import SearchSettings\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.constants import AuthType\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n\n\n# revision identifiers, used by Alembic.\nrevision = \"90e3b9af7da4\"\ndown_revision = \"62c3a055a141\"\nbranch_labels = None\ndepends_on = None\n\nSKIP_TAG_FIX = os.environ.get(\"SKIP_TAG_FIX\", \"true\").lower() == \"true\"\n\n# override for cloud\nif AUTH_TYPE == AuthType.CLOUD:\n    SKIP_TAG_FIX = True\n\n\ndef set_is_list_for_known_tags() -> None:\n    \"\"\"\n    Sets is_list to true for all tags that are known to be lists.\n    \"\"\"\n    LIST_METADATA: list[tuple[str, str]] = [\n        (\"CLICKUP\", \"tags\"),\n        (\"CONFLUENCE\", \"labels\"),\n        (\"DISCOURSE\", \"tags\"),\n        (\"FRESHDESK\", \"emails\"),\n        (\"GITHUB\", \"assignees\"),\n        (\"GITHUB\", \"labels\"),\n        (\"GURU\", \"tags\"),\n        (\"GURU\", \"folders\"),\n        (\"HUBSPOT\", \"associated_contact_ids\"),\n        (\"HUBSPOT\", \"associated_company_ids\"),\n        (\"HUBSPOT\", \"associated_deal_ids\"),\n        (\"HUBSPOT\", \"associated_ticket_ids\"),\n        (\"JIRA\", \"labels\"),\n        (\"MEDIAWIKI\", \"categories\"),\n        (\"ZENDESK\", \"labels\"),\n        (\"ZENDESK\", \"content_tags\"),\n    ]\n\n    bind = op.get_bind()\n    for source, key in LIST_METADATA:\n        bind.execute(\n            sa.text(\n                f\"\"\"\n                UPDATE tag\n                SET is_list = true\n                WHERE tag_key = '{key}'\n                AND source = '{source}'\n                \"\"\"\n            )\n        )\n\n\ndef set_is_list_for_list_tags() -> None:\n    \"\"\"\n    Sets is_list to true for all tags which have multiple values for a given\n    document, key, and source triplet. This only works if we remove old tags\n    from the database.\n    \"\"\"\n    bind = op.get_bind()\n    bind.execute(\n        sa.text(\n            \"\"\"\n            UPDATE tag\n            SET is_list = true\n            FROM (\n                SELECT DISTINCT tag.tag_key, tag.source\n                FROM tag\n                JOIN document__tag ON tag.id = document__tag.tag_id\n                GROUP BY tag.tag_key, tag.source, document__tag.document_id\n                HAVING count(*) > 1\n            ) AS list_tags\n            WHERE tag.tag_key = list_tags.tag_key\n            AND tag.source = list_tags.source\n            \"\"\"\n        )\n    )\n\n\ndef log_list_tags() -> None:\n    bind = op.get_bind()\n    result = bind.execute(\n        sa.text(\n            \"\"\"\n            SELECT DISTINCT source, tag_key\n            FROM tag\n            WHERE is_list\n            ORDER BY source, tag_key\n            \"\"\"\n        )\n    ).fetchall()\n    logger.info(\n        \"List tags:\\n\" + \"\\n\".join(f\"  {source}: {key}\" for source, key in result)\n    )\n\n\ndef remove_old_tags() -> None:\n    \"\"\"\n    Removes old tags from the database.\n    Previously, there was a bug where if a document got indexed with a tag and then\n    the document got reindexed, the old tag would not be removed.\n    This function removes those old tags by comparing it against the tags in vespa.\n    \"\"\"\n    current_search_settings, _ = active_search_settings()\n\n    # Get the index name\n    if hasattr(current_search_settings, \"index_name\"):\n        index_name = current_search_settings.index_name\n    else:\n        # Default index name if we can't get it from the document_index\n        index_name = \"danswer_index\"\n\n    for batch in _get_batch_documents_with_multiple_tags():\n        n_deleted = 0\n\n        for document_id in batch:\n            true_metadata = _get_vespa_metadata(document_id, index_name)\n            tags = _get_document_tags(document_id)\n\n            # identify document__tags to delete\n            to_delete: list[str] = []\n            for tag_id, tag_key, tag_value in tags:\n                true_val = true_metadata.get(tag_key, \"\")\n                if (isinstance(true_val, list) and tag_value not in true_val) or (\n                    isinstance(true_val, str) and tag_value != true_val\n                ):\n                    to_delete.append(str(tag_id))\n\n            if not to_delete:\n                continue\n\n            # delete old document__tags\n            bind = op.get_bind()\n            result = bind.execute(\n                sa.text(\n                    f\"\"\"\n                    DELETE FROM document__tag\n                    WHERE document_id = '{document_id}'\n                    AND tag_id IN ({\",\".join(to_delete)})\n                    \"\"\"\n                )\n            )\n            n_deleted += result.rowcount\n        logger.info(f\"Processed {len(batch)} documents and deleted {n_deleted} tags\")\n\n\ndef active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:\n    result = op.get_bind().execute(\n        sa.text(\n            \"\"\"\n        SELECT * FROM search_settings WHERE status = 'PRESENT' ORDER BY id DESC LIMIT 1\n        \"\"\"\n        )\n    )\n    search_settings_fetch = result.fetchall()\n    search_settings = (\n        SearchSettings(**search_settings_fetch[0]._asdict())\n        if search_settings_fetch\n        else None\n    )\n\n    result2 = op.get_bind().execute(\n        sa.text(\n            \"\"\"\n        SELECT * FROM search_settings WHERE status = 'FUTURE' ORDER BY id DESC LIMIT 1\n        \"\"\"\n        )\n    )\n    search_settings_future_fetch = result2.fetchall()\n    search_settings_future = (\n        SearchSettings(**search_settings_future_fetch[0]._asdict())\n        if search_settings_future_fetch\n        else None\n    )\n\n    if not isinstance(search_settings, SearchSettings):\n        raise RuntimeError(\n            \"current search settings is of type \" + str(type(search_settings))\n        )\n    if (\n        not isinstance(search_settings_future, SearchSettings)\n        and search_settings_future is not None\n    ):\n        raise RuntimeError(\n            \"future search settings is of type \" + str(type(search_settings_future))\n        )\n\n    return search_settings, search_settings_future\n\n\ndef _get_batch_documents_with_multiple_tags(\n    batch_size: int = 128,\n) -> Generator[list[str], None, None]:\n    \"\"\"\n    Returns a list of document ids which contain a one to many tag.\n    The document may either contain a list metadata value, or may contain leftover\n    old tags from reindexing.\n    \"\"\"\n    offset_clause = \"\"\n    bind = op.get_bind()\n\n    while True:\n        batch = bind.execute(\n            sa.text(\n                f\"\"\"\n                SELECT DISTINCT document__tag.document_id\n                FROM tag\n                JOIN document__tag ON tag.id = document__tag.tag_id\n                GROUP BY tag.tag_key, tag.source, document__tag.document_id\n                HAVING count(*) > 1 {offset_clause}\n                ORDER BY document__tag.document_id\n                LIMIT {batch_size}\n                \"\"\"\n            )\n        ).fetchall()\n        if not batch:\n            break\n        doc_ids = [document_id for (document_id,) in batch]\n        yield doc_ids\n        offset_clause = f\"AND document__tag.document_id > '{doc_ids[-1]}'\"\n\n\ndef _get_vespa_metadata(\n    document_id: str, index_name: str\n) -> dict[str, str | list[str]]:\n    url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n\n    # Document-Selector language\n    selection = (\n        f\"{index_name}.document_id=='{document_id}' and {index_name}.chunk_id==0\"\n    )\n\n    params: dict[str, str | int] = {\n        \"selection\": selection,\n        \"wantedDocumentCount\": 1,\n        \"fieldSet\": f\"{index_name}:metadata\",\n    }\n\n    with get_vespa_http_client() as client:\n        resp = client.get(url, params=params)\n        resp.raise_for_status()\n\n    docs = resp.json().get(\"documents\", [])\n    if not docs:\n        raise RuntimeError(f\"No chunk-0 found for document {document_id}\")\n\n    # for some reason, metadata is a string\n    metadata = docs[0][\"fields\"][\"metadata\"]\n    return json.loads(metadata)\n\n\ndef _get_document_tags(document_id: str) -> list[tuple[int, str, str]]:\n    bind = op.get_bind()\n    result = bind.execute(\n        sa.text(\n            f\"\"\"\n            SELECT tag.id, tag.tag_key, tag.tag_value\n            FROM tag\n            JOIN document__tag ON tag.id = document__tag.tag_id\n            WHERE document__tag.document_id = '{document_id}'\n            \"\"\"\n        )\n    ).fetchall()\n    return cast(list[tuple[int, str, str]], result)\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"tag\",\n        sa.Column(\"is_list\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n    op.drop_constraint(\n        constraint_name=\"_tag_key_value_source_uc\",\n        table_name=\"tag\",\n        type_=\"unique\",\n    )\n    op.create_unique_constraint(\n        constraint_name=\"_tag_key_value_source_list_uc\",\n        table_name=\"tag\",\n        columns=[\"tag_key\", \"tag_value\", \"source\", \"is_list\"],\n    )\n    set_is_list_for_known_tags()\n\n    if SKIP_TAG_FIX:\n        logger.warning(\n            \"Skipping removal of old tags. \"\n            \"This can cause issues when using the knowledge graph, or \"\n            \"when filtering for documents by tags.\"\n        )\n        log_list_tags()\n        return\n\n    remove_old_tags()\n    set_is_list_for_list_tags()\n\n    # debug\n    log_list_tags()\n\n\ndef downgrade() -> None:\n    # the migration adds and populates the is_list column, and removes old bugged tags\n    # there isn't a point in adding back the bugged tags, so we just drop the column\n    op.drop_constraint(\n        constraint_name=\"_tag_key_value_source_list_uc\",\n        table_name=\"tag\",\n        type_=\"unique\",\n    )\n    op.create_unique_constraint(\n        constraint_name=\"_tag_key_value_source_uc\",\n        table_name=\"tag\",\n        columns=[\"tag_key\", \"tag_value\", \"source\"],\n    )\n    op.drop_column(\"tag\", \"is_list\")\n"
  },
  {
    "path": "backend/alembic/versions/91a0a4d62b14_milestone.py",
    "content": "\"\"\"Milestone\n\nRevision ID: 91a0a4d62b14\nRevises: dab04867cd88\nCreate Date: 2024-12-13 19:03:30.947551\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"91a0a4d62b14\"\ndown_revision = \"dab04867cd88\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"milestone\",\n        sa.Column(\"id\", sa.UUID(), nullable=False),\n        sa.Column(\"tenant_id\", sa.String(), nullable=True),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"event_type\", sa.String(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"event_tracker\", postgresql.JSONB(), nullable=True),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"event_type\", name=\"uq_milestone_event_type\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"milestone\")\n"
  },
  {
    "path": "backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py",
    "content": "\"\"\"Remove DocumentSource from Tag\n\nRevision ID: 91fd3b470d1a\nRevises: 173cae5bba26\nCreate Date: 2024-03-21 12:05:23.956734\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom onyx.configs.constants import DocumentSource\n\n# revision identifiers, used by Alembic.\nrevision = \"91fd3b470d1a\"\ndown_revision = \"173cae5bba26\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"tag\",\n        \"source\",\n        type_=sa.String(length=50),\n        existing_type=sa.Enum(DocumentSource, native_enum=False),\n        existing_nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"tag\",\n        \"source\",\n        type_=sa.Enum(DocumentSource, native_enum=False),\n        existing_type=sa.String(length=50),\n        existing_nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/91ffac7e65b3_add_expiry_time.py",
    "content": "\"\"\"add expiry time\n\nRevision ID: 91ffac7e65b3\nRevises: bc9771dccadf\nCreate Date: 2024-06-24 09:39:56.462242\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"91ffac7e65b3\"\ndown_revision = \"795b20b85b4b\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\", sa.Column(\"oidc_expiry\", sa.DateTime(timezone=True), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"oidc_expiry\")\n"
  },
  {
    "path": "backend/alembic/versions/93560ba1b118_add_web_ui_option_to_slack_config.py",
    "content": "\"\"\"add web ui option to slack config\n\nRevision ID: 93560ba1b118\nRevises: 6d562f86c78b\nCreate Date: 2024-11-24 06:36:17.490612\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"93560ba1b118\"\ndown_revision = \"6d562f86c78b\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add show_continue_in_web_ui with default False to all existing channel_configs\n    op.execute(\n        \"\"\"\n        UPDATE slack_channel_config\n        SET channel_config = channel_config || '{\"show_continue_in_web_ui\": false}'::jsonb\n        WHERE NOT channel_config ? 'show_continue_in_web_ui'\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # Remove show_continue_in_web_ui from all channel_configs\n    op.execute(\n        \"\"\"\n        UPDATE slack_channel_config\n        SET channel_config = channel_config - 'show_continue_in_web_ui'\n        \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/93a2e195e25c_add_voice_provider_and_user_voice_prefs.py",
    "content": "\"\"\"add_voice_provider_and_user_voice_prefs\n\nRevision ID: 93a2e195e25c\nRevises: 27fb147a843f\nCreate Date: 2026-02-23 15:16:39.507304\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy import column\nfrom sqlalchemy import true\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"93a2e195e25c\"\ndown_revision = \"27fb147a843f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create voice_provider table\n    op.create_table(\n        \"voice_provider\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"name\", sa.String(), unique=True, nullable=False),\n        sa.Column(\"provider_type\", sa.String(), nullable=False),\n        sa.Column(\"api_key\", sa.LargeBinary(), nullable=True),\n        sa.Column(\"api_base\", sa.String(), nullable=True),\n        sa.Column(\"custom_config\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"stt_model\", sa.String(), nullable=True),\n        sa.Column(\"tts_model\", sa.String(), nullable=True),\n        sa.Column(\"default_voice\", sa.String(), nullable=True),\n        sa.Column(\n            \"is_default_stt\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n        sa.Column(\n            \"is_default_tts\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n        sa.Column(\"deleted\", sa.Boolean(), nullable=False, server_default=\"false\"),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            onupdate=sa.func.now(),\n            nullable=False,\n        ),\n    )\n\n    # Add partial unique indexes to enforce only one default STT/TTS provider\n    op.create_index(\n        \"ix_voice_provider_one_default_stt\",\n        \"voice_provider\",\n        [\"is_default_stt\"],\n        unique=True,\n        postgresql_where=column(\"is_default_stt\") == true(),\n    )\n    op.create_index(\n        \"ix_voice_provider_one_default_tts\",\n        \"voice_provider\",\n        [\"is_default_tts\"],\n        unique=True,\n        postgresql_where=column(\"is_default_tts\") == true(),\n    )\n\n    # Add voice preference columns to user table\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"voice_auto_send\",\n            sa.Boolean(),\n            default=False,\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"voice_auto_playback\",\n            sa.Boolean(),\n            default=False,\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"voice_playback_speed\",\n            sa.Float(),\n            default=1.0,\n            nullable=False,\n            server_default=\"1.0\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Remove user voice preference columns\n    op.drop_column(\"user\", \"voice_playback_speed\")\n    op.drop_column(\"user\", \"voice_auto_playback\")\n    op.drop_column(\"user\", \"voice_auto_send\")\n\n    op.drop_index(\"ix_voice_provider_one_default_tts\", table_name=\"voice_provider\")\n    op.drop_index(\"ix_voice_provider_one_default_stt\", table_name=\"voice_provider\")\n\n    # Drop voice_provider table\n    op.drop_table(\"voice_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/93c15d6a6fbb_add_chunk_error_and_vespa_count_columns_.py",
    "content": "\"\"\"add chunk error and vespa count columns to opensearch tenant migration\n\nRevision ID: 93c15d6a6fbb\nRevises: d3fd499c829c\nCreate Date: 2026-02-11 23:07:34.576725\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"93c15d6a6fbb\"\ndown_revision = \"d3fd499c829c\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"total_chunks_errored\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n    )\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"total_chunks_in_vespa\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"opensearch_tenant_migration_record\", \"total_chunks_in_vespa\")\n    op.drop_column(\"opensearch_tenant_migration_record\", \"total_chunks_errored\")\n"
  },
  {
    "path": "backend/alembic/versions/949b4a92a401_remove_rt.py",
    "content": "\"\"\"remove rt\n\nRevision ID: 949b4a92a401\nRevises: 1b10e1fda030\nCreate Date: 2024-10-26 13:06:06.937969\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy import text\n\n# Import your models and constants\nfrom onyx.db.models import (\n    Connector,\n    ConnectorCredentialPair,\n    Credential,\n    IndexAttempt,\n)\n\n\n# revision identifiers, used by Alembic.\nrevision = \"949b4a92a401\"\ndown_revision = \"1b10e1fda030\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Deletes all RequestTracker connectors and associated data\n    bind = op.get_bind()\n    session = Session(bind=bind)\n\n    # Get connectors using raw SQL\n    result = bind.execute(\n        text(\"SELECT id FROM connector WHERE source = 'requesttracker'\")\n    )\n    connector_ids = [row[0] for row in result]\n\n    if connector_ids:\n        cc_pairs_to_delete = (\n            session.query(ConnectorCredentialPair)\n            .filter(ConnectorCredentialPair.connector_id.in_(connector_ids))\n            .all()\n        )\n\n        cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs_to_delete]\n\n        if cc_pair_ids:\n            session.query(IndexAttempt).filter(\n                IndexAttempt.connector_credential_pair_id.in_(cc_pair_ids)\n            ).delete(synchronize_session=False)\n\n            session.query(ConnectorCredentialPair).filter(\n                ConnectorCredentialPair.id.in_(cc_pair_ids)\n            ).delete(synchronize_session=False)\n\n        credential_ids = [cc_pair.credential_id for cc_pair in cc_pairs_to_delete]\n        if credential_ids:\n            session.query(Credential).filter(Credential.id.in_(credential_ids)).delete(\n                synchronize_session=False\n            )\n\n        session.query(Connector).filter(Connector.id.in_(connector_ids)).delete(\n            synchronize_session=False\n        )\n\n    session.commit()\n\n\ndef downgrade() -> None:\n    # No-op downgrade as we cannot restore deleted data\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/94dc3d0236f8_make_document_set_description_optional.py",
    "content": "\"\"\"make document set description optional\n\nRevision ID: 94dc3d0236f8\nRevises: bf7a81109301\nCreate Date: 2024-12-11 11:26:10.616722\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"94dc3d0236f8\"\ndown_revision = \"bf7a81109301\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Make document_set.description column nullable\n    op.alter_column(\n        \"document_set\", \"description\", existing_type=sa.String(), nullable=True\n    )\n\n\ndef downgrade() -> None:\n    # Revert document_set.description column to non-nullable\n    op.alter_column(\n        \"document_set\", \"description\", existing_type=sa.String(), nullable=False\n    )\n"
  },
  {
    "path": "backend/alembic/versions/96a5702df6aa_mcp_tool_enabled.py",
    "content": "\"\"\"mcp_tool_enabled\n\nRevision ID: 96a5702df6aa\nRevises: 40926a4dab77\nCreate Date: 2025-10-09 12:10:21.733097\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"96a5702df6aa\"\ndown_revision = \"40926a4dab77\"\nbranch_labels = None\ndepends_on = None\n\n\nDELETE_DISABLED_TOOLS_SQL = \"DELETE FROM tool WHERE enabled = false\"\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"tool\",\n        sa.Column(\n            \"enabled\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.true(),\n        ),\n    )\n    op.create_index(\n        \"ix_tool_mcp_server_enabled\",\n        \"tool\",\n        [\"mcp_server_id\", \"enabled\"],\n    )\n    # Remove the server default so application controls defaulting\n    op.alter_column(\"tool\", \"enabled\", server_default=None)\n\n\ndef downgrade() -> None:\n    op.execute(DELETE_DISABLED_TOOLS_SQL)\n    op.drop_index(\"ix_tool_mcp_server_enabled\", table_name=\"tool\")\n    op.drop_column(\"tool\", \"enabled\")\n"
  },
  {
    "path": "backend/alembic/versions/977e834c1427_seed_default_groups.py",
    "content": "\"\"\"seed_default_groups\n\nRevision ID: 977e834c1427\nRevises: 8188861f4e92\nCreate Date: 2026-03-25 14:59:41.313091\n\n\"\"\"\n\nfrom typing import Any\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\n\n\n# revision identifiers, used by Alembic.\nrevision = \"977e834c1427\"\ndown_revision = \"8188861f4e92\"\nbranch_labels = None\ndepends_on = None\n\n# (group_name, permission_value)\nDEFAULT_GROUPS = [\n    (\"Admin\", \"admin\"),\n    (\"Basic\", \"basic\"),\n]\n\nCUSTOM_SUFFIX = \"(Custom)\"\n\nMAX_RENAME_ATTEMPTS = 100\n\n# Reflect table structures for use in DML\nuser_group_table = sa.table(\n    \"user_group\",\n    sa.column(\"id\", sa.Integer),\n    sa.column(\"name\", sa.String),\n    sa.column(\"is_up_to_date\", sa.Boolean),\n    sa.column(\"is_up_for_deletion\", sa.Boolean),\n    sa.column(\"is_default\", sa.Boolean),\n)\n\npermission_grant_table = sa.table(\n    \"permission_grant\",\n    sa.column(\"group_id\", sa.Integer),\n    sa.column(\"permission\", sa.String),\n    sa.column(\"grant_source\", sa.String),\n)\n\nuser__user_group_table = sa.table(\n    \"user__user_group\",\n    sa.column(\"user_group_id\", sa.Integer),\n    sa.column(\"user_id\", sa.Uuid),\n)\n\n\ndef _find_available_name(conn: sa.engine.Connection, base: str) -> str:\n    \"\"\"Return a name like 'Admin (Custom)' or 'Admin (Custom 2)' that is not taken.\"\"\"\n    candidate = f\"{base} {CUSTOM_SUFFIX}\"\n    attempt = 1\n    while attempt <= MAX_RENAME_ATTEMPTS:\n        exists: Any = conn.execute(\n            sa.select(sa.literal(1))\n            .select_from(user_group_table)\n            .where(user_group_table.c.name == candidate)\n            .limit(1)\n        ).fetchone()\n        if exists is None:\n            return candidate\n        attempt += 1\n        candidate = f\"{base} (Custom {attempt})\"\n    raise RuntimeError(\n        f\"Could not find an available name for group '{base}' \"\n        f\"after {MAX_RENAME_ATTEMPTS} attempts\"\n    )\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    for group_name, permission_value in DEFAULT_GROUPS:\n        # Step 1: Rename ALL existing groups that clash with the canonical name.\n        conflicting = conn.execute(\n            sa.select(user_group_table.c.id, user_group_table.c.name).where(\n                user_group_table.c.name == group_name\n            )\n        ).fetchall()\n\n        for row_id, row_name in conflicting:\n            new_name = _find_available_name(conn, row_name)\n            op.execute(\n                sa.update(user_group_table)\n                .where(user_group_table.c.id == row_id)\n                .values(name=new_name, is_up_to_date=False)\n            )\n\n        # Step 2: Create a fresh default group.\n        result = conn.execute(\n            user_group_table.insert()\n            .values(\n                name=group_name,\n                is_up_to_date=True,\n                is_up_for_deletion=False,\n                is_default=True,\n            )\n            .returning(user_group_table.c.id)\n        ).fetchone()\n        assert result is not None\n        group_id = result[0]\n\n        # Step 3: Upsert permission grant.\n        op.execute(\n            pg_insert(permission_grant_table)\n            .values(\n                group_id=group_id,\n                permission=permission_value,\n                grant_source=\"SYSTEM\",\n            )\n            .on_conflict_do_nothing(index_elements=[\"group_id\", \"permission\"])\n        )\n\n\ndef downgrade() -> None:\n    # Remove the default groups created by this migration.\n    # First remove user-group memberships that reference default groups\n    # to avoid FK violations, then delete the groups themselves.\n    default_group_ids = sa.select(user_group_table.c.id).where(\n        user_group_table.c.is_default == True  # noqa: E712\n    )\n    conn = op.get_bind()\n    conn.execute(\n        sa.delete(user__user_group_table).where(\n            user__user_group_table.c.user_group_id.in_(default_group_ids)\n        )\n    )\n    conn.execute(\n        sa.delete(user_group_table).where(\n            user_group_table.c.is_default == True  # noqa: E712\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/97dbb53fa8c8_add_syncrecord.py",
    "content": "\"\"\"Add SyncRecord\n\nRevision ID: 97dbb53fa8c8\nRevises: 369644546676\nCreate Date: 2025-01-11 19:39:50.426302\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"97dbb53fa8c8\"\ndown_revision = \"be2ab2aa50ee\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"sync_record\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"entity_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"sync_type\",\n            sa.Enum(\n                \"DOCUMENT_SET\",\n                \"USER_GROUP\",\n                \"CONNECTOR_DELETION\",\n                name=\"synctype\",\n                native_enum=False,\n                length=40,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\n            \"sync_status\",\n            sa.Enum(\n                \"IN_PROGRESS\",\n                \"SUCCESS\",\n                \"FAILED\",\n                \"CANCELED\",\n                name=\"syncstatus\",\n                native_enum=False,\n                length=40,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"num_docs_synced\", sa.Integer(), nullable=False),\n        sa.Column(\"sync_start_time\", sa.DateTime(timezone=True), nullable=False),\n        sa.Column(\"sync_end_time\", sa.DateTime(timezone=True), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Add index for fetch_latest_sync_record query\n    op.create_index(\n        \"ix_sync_record_entity_id_sync_type_sync_start_time\",\n        \"sync_record\",\n        [\"entity_id\", \"sync_type\", \"sync_start_time\"],\n    )\n\n    # Add index for cleanup_sync_records query\n    op.create_index(\n        \"ix_sync_record_entity_id_sync_type_sync_status\",\n        \"sync_record\",\n        [\"entity_id\", \"sync_type\", \"sync_status\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_sync_record_entity_id_sync_type_sync_status\")\n    op.drop_index(\"ix_sync_record_entity_id_sync_type_sync_start_time\")\n    op.drop_table(\"sync_record\")\n"
  },
  {
    "path": "backend/alembic/versions/98a5008d8711_agent_tracking.py",
    "content": "\"\"\"agent_tracking\n\nRevision ID: 98a5008d8711\nRevises: 2f80c6a2550f\nCreate Date: 2025-01-29 17:00:00.000001\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.dialects.postgresql import UUID\n\n# revision identifiers, used by Alembic.\nrevision = \"98a5008d8711\"\ndown_revision = \"2f80c6a2550f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"agent__search_metrics\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", postgresql.UUID(as_uuid=True), nullable=True),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=True),\n        sa.Column(\"agent_type\", sa.String(), nullable=False),\n        sa.Column(\"start_time\", sa.DateTime(timezone=True), nullable=False),\n        sa.Column(\"base_duration_s\", sa.Float(), nullable=False),\n        sa.Column(\"full_duration_s\", sa.Float(), nullable=False),\n        sa.Column(\"base_metrics\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"refined_metrics\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"all_metrics\", postgresql.JSONB(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Create sub_question table\n    op.create_table(\n        \"agent__sub_question\",\n        sa.Column(\"id\", sa.Integer, primary_key=True),\n        sa.Column(\"primary_question_id\", sa.Integer, sa.ForeignKey(\"chat_message.id\")),\n        sa.Column(\n            \"chat_session_id\", UUID(as_uuid=True), sa.ForeignKey(\"chat_session.id\")\n        ),\n        sa.Column(\"sub_question\", sa.Text),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.func.now()\n        ),\n        sa.Column(\"sub_answer\", sa.Text),\n        sa.Column(\"sub_question_doc_results\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"level\", sa.Integer(), nullable=False),\n        sa.Column(\"level_question_num\", sa.Integer(), nullable=False),\n    )\n\n    # Create sub_query table\n    op.create_table(\n        \"agent__sub_query\",\n        sa.Column(\"id\", sa.Integer, primary_key=True),\n        sa.Column(\n            \"parent_question_id\", sa.Integer, sa.ForeignKey(\"agent__sub_question.id\")\n        ),\n        sa.Column(\n            \"chat_session_id\", UUID(as_uuid=True), sa.ForeignKey(\"chat_session.id\")\n        ),\n        sa.Column(\"sub_query\", sa.Text),\n        sa.Column(\n            \"time_created\", sa.DateTime(timezone=True), server_default=sa.func.now()\n        ),\n    )\n\n    # Create sub_query__search_doc association table\n    op.create_table(\n        \"agent__sub_query__search_doc\",\n        sa.Column(\n            \"sub_query_id\",\n            sa.Integer,\n            sa.ForeignKey(\"agent__sub_query.id\"),\n            primary_key=True,\n        ),\n        sa.Column(\n            \"search_doc_id\",\n            sa.Integer,\n            sa.ForeignKey(\"search_doc.id\"),\n            primary_key=True,\n        ),\n    )\n\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"refined_answer_improvement\",\n            sa.Boolean(),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"refined_answer_improvement\")\n    op.drop_table(\"agent__sub_query__search_doc\")\n    op.drop_table(\"agent__sub_query\")\n    op.drop_table(\"agent__sub_question\")\n    op.drop_table(\"agent__search_metrics\")\n"
  },
  {
    "path": "backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py",
    "content": "\"\"\"add_is_auto_mode_to_llm_provider\n\nRevision ID: 9a0296d7421e\nRevises: 7206234e012a\nCreate Date: 2025-12-17 18:14:29.620981\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"9a0296d7421e\"\ndown_revision = \"7206234e012a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\n            \"is_auto_mode\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"llm_provider\", \"is_auto_mode\")\n"
  },
  {
    "path": "backend/alembic/versions/9aadf32dfeb4_add_user_files.py",
    "content": "\"\"\"add user files\n\nRevision ID: 9aadf32dfeb4\nRevises: 3781a5eb12cb\nCreate Date: 2025-01-26 16:08:21.551022\n\n\"\"\"\n\nimport sqlalchemy as sa\nimport datetime\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"9aadf32dfeb4\"\ndown_revision = \"3781a5eb12cb\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create user_folder table without parent_id\n    op.create_table(\n        \"user_folder\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True, autoincrement=True),\n        sa.Column(\"user_id\", sa.UUID(), sa.ForeignKey(\"user.id\"), nullable=True),\n        sa.Column(\"name\", sa.String(length=255), nullable=True),\n        sa.Column(\"description\", sa.String(length=255), nullable=True),\n        sa.Column(\"display_priority\", sa.Integer(), nullable=True, default=0),\n        sa.Column(\n            \"created_at\", sa.DateTime(timezone=True), server_default=sa.func.now()\n        ),\n    )\n\n    # Create user_file table with folder_id instead of parent_folder_id\n    op.create_table(\n        \"user_file\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True, autoincrement=True),\n        sa.Column(\"user_id\", sa.UUID(), sa.ForeignKey(\"user.id\"), nullable=True),\n        sa.Column(\n            \"folder_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"user_folder.id\"),\n            nullable=True,\n        ),\n        sa.Column(\"link_url\", sa.String(), nullable=True),\n        sa.Column(\"token_count\", sa.Integer(), nullable=True),\n        sa.Column(\"file_type\", sa.String(), nullable=True),\n        sa.Column(\"file_id\", sa.String(length=255), nullable=False),\n        sa.Column(\"document_id\", sa.String(length=255), nullable=False),\n        sa.Column(\"name\", sa.String(length=255), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(),\n            default=datetime.datetime.utcnow,\n        ),\n        sa.Column(\n            \"cc_pair_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"connector_credential_pair.id\"),\n            nullable=True,\n            unique=True,\n        ),\n    )\n\n    # Create persona__user_file table\n    op.create_table(\n        \"persona__user_file\",\n        sa.Column(\n            \"persona_id\", sa.Integer(), sa.ForeignKey(\"persona.id\"), primary_key=True\n        ),\n        sa.Column(\n            \"user_file_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"user_file.id\"),\n            primary_key=True,\n        ),\n    )\n\n    # Create persona__user_folder table\n    op.create_table(\n        \"persona__user_folder\",\n        sa.Column(\n            \"persona_id\", sa.Integer(), sa.ForeignKey(\"persona.id\"), primary_key=True\n        ),\n        sa.Column(\n            \"user_folder_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"user_folder.id\"),\n            primary_key=True,\n        ),\n    )\n\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"is_user_file\", sa.Boolean(), nullable=True, default=False),\n    )\n\n    # Update existing records to have is_user_file=False instead of NULL\n    op.execute(\n        \"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL\"\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"is_user_file\")\n    # Drop the persona__user_folder table\n    op.drop_table(\"persona__user_folder\")\n    # Drop the persona__user_file table\n    op.drop_table(\"persona__user_file\")\n    # Drop the user_file table\n    op.drop_table(\"user_file\")\n    # Drop the user_folder table\n    op.drop_table(\"user_folder\")\n"
  },
  {
    "path": "backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py",
    "content": "\"\"\"Migration 1: User file schema additions\n\nRevision ID: 9b66d3156fc6\nRevises: b4ef3ae0bf6e\nCreate Date: 2025-09-22 09:42:06.086732\n\nThis migration adds new columns and tables without modifying existing data.\nIt is safe to run and can be easily rolled back.\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql as psql\nimport logging\n\nlogger = logging.getLogger(\"alembic.runtime.migration\")\n# revision identifiers, used by Alembic.\nrevision = \"9b66d3156fc6\"\ndown_revision = \"b4ef3ae0bf6e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Add new columns and tables without modifying existing data.\"\"\"\n\n    # Enable pgcrypto for UUID generation\n    op.execute(\"CREATE EXTENSION IF NOT EXISTS pgcrypto\")\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    # === USER_FILE: Add new columns ===\n    logger.info(\"Adding new columns to user_file table...\")\n\n    user_file_columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n\n    # Check if ID is already UUID (in case of re-run after partial migration)\n    id_is_uuid = any(\n        col[\"name\"] == \"id\" and \"uuid\" in str(col[\"type\"]).lower()\n        for col in inspector.get_columns(\"user_file\")\n    )\n\n    # Add transitional UUID column only if ID is not already UUID\n    if \"new_id\" not in user_file_columns and not id_is_uuid:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\n                \"new_id\",\n                psql.UUID(as_uuid=True),\n                nullable=True,\n                server_default=sa.text(\"gen_random_uuid()\"),\n            ),\n        )\n        op.create_unique_constraint(\"uq_user_file_new_id\", \"user_file\", [\"new_id\"])\n        logger.info(\"Added new_id column to user_file\")\n\n    # Add status column\n    if \"status\" not in user_file_columns:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\n                \"status\",\n                sa.Enum(\n                    \"PROCESSING\",\n                    \"COMPLETED\",\n                    \"FAILED\",\n                    \"CANCELED\",\n                    name=\"userfilestatus\",\n                    native_enum=False,\n                ),\n                nullable=False,\n                server_default=\"PROCESSING\",\n            ),\n        )\n        logger.info(\"Added status column to user_file\")\n\n    # Add other tracking columns\n    if \"chunk_count\" not in user_file_columns:\n        op.add_column(\n            \"user_file\", sa.Column(\"chunk_count\", sa.Integer(), nullable=True)\n        )\n        logger.info(\"Added chunk_count column to user_file\")\n\n    if \"last_accessed_at\" not in user_file_columns:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\"last_accessed_at\", sa.DateTime(timezone=True), nullable=True),\n        )\n        logger.info(\"Added last_accessed_at column to user_file\")\n\n    if \"needs_project_sync\" not in user_file_columns:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\n                \"needs_project_sync\",\n                sa.Boolean(),\n                nullable=False,\n                server_default=sa.text(\"false\"),\n            ),\n        )\n        logger.info(\"Added needs_project_sync column to user_file\")\n\n    if \"last_project_sync_at\" not in user_file_columns:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\n                \"last_project_sync_at\", sa.DateTime(timezone=True), nullable=True\n            ),\n        )\n        logger.info(\"Added last_project_sync_at column to user_file\")\n\n    if \"document_id_migrated\" not in user_file_columns:\n        op.add_column(\n            \"user_file\",\n            sa.Column(\n                \"document_id_migrated\",\n                sa.Boolean(),\n                nullable=False,\n                server_default=sa.text(\"true\"),\n            ),\n        )\n        logger.info(\"Added document_id_migrated column to user_file\")\n\n    # === USER_FOLDER -> USER_PROJECT rename ===\n    table_names = set(inspector.get_table_names())\n\n    if \"user_folder\" in table_names:\n        logger.info(\"Updating user_folder table...\")\n        # Make description nullable first\n        op.alter_column(\"user_folder\", \"description\", nullable=True)\n\n        # Rename table if user_project doesn't exist\n        if \"user_project\" not in table_names:\n            op.execute(\"ALTER TABLE user_folder RENAME TO user_project\")\n            logger.info(\"Renamed user_folder to user_project\")\n    elif \"user_project\" in table_names:\n        # If already renamed, ensure column nullability\n        project_cols = [col[\"name\"] for col in inspector.get_columns(\"user_project\")]\n        if \"description\" in project_cols:\n            op.alter_column(\"user_project\", \"description\", nullable=True)\n\n    # Add instructions column to user_project\n    inspector = sa.inspect(bind)  # Refresh after rename\n    if \"user_project\" in inspector.get_table_names():\n        project_columns = [col[\"name\"] for col in inspector.get_columns(\"user_project\")]\n        if \"instructions\" not in project_columns:\n            op.add_column(\n                \"user_project\",\n                sa.Column(\"instructions\", sa.String(), nullable=True),\n            )\n            logger.info(\"Added instructions column to user_project\")\n\n    # === CHAT_SESSION: Add project_id ===\n    chat_session_columns = [\n        col[\"name\"] for col in inspector.get_columns(\"chat_session\")\n    ]\n    if \"project_id\" not in chat_session_columns:\n        op.add_column(\n            \"chat_session\",\n            sa.Column(\"project_id\", sa.Integer(), nullable=True),\n        )\n        logger.info(\"Added project_id column to chat_session\")\n\n    # === PERSONA__USER_FILE: Add UUID column ===\n    persona_user_file_columns = [\n        col[\"name\"] for col in inspector.get_columns(\"persona__user_file\")\n    ]\n    if \"user_file_id_uuid\" not in persona_user_file_columns:\n        op.add_column(\n            \"persona__user_file\",\n            sa.Column(\"user_file_id_uuid\", psql.UUID(as_uuid=True), nullable=True),\n        )\n        logger.info(\"Added user_file_id_uuid column to persona__user_file\")\n\n    # === PROJECT__USER_FILE: Create new table ===\n    if \"project__user_file\" not in inspector.get_table_names():\n        op.create_table(\n            \"project__user_file\",\n            sa.Column(\"project_id\", sa.Integer(), nullable=False),\n            sa.Column(\"user_file_id\", psql.UUID(as_uuid=True), nullable=False),\n            sa.PrimaryKeyConstraint(\"project_id\", \"user_file_id\"),\n        )\n        logger.info(\"Created project__user_file table\")\n\n    # Only create the index if it doesn't exist\n    existing_indexes = [\n        ix[\"name\"] for ix in inspector.get_indexes(\"project__user_file\")\n    ]\n    if \"idx_project__user_file_user_file_id\" not in existing_indexes:\n        op.create_index(\n            \"idx_project__user_file_user_file_id\",\n            \"project__user_file\",\n            [\"user_file_id\"],\n        )\n        logger.info(\n            \"Created index idx_project__user_file_user_file_id on project__user_file\"\n        )\n\n    logger.info(\"Migration 1 (schema additions) completed successfully\")\n\n\ndef downgrade() -> None:\n    \"\"\"Remove added columns and tables.\"\"\"\n\n    bind = op.get_bind()\n    inspector = sa.inspect(bind)\n\n    logger.info(\"Starting downgrade of schema additions...\")\n\n    # Drop project__user_file table\n    if \"project__user_file\" in inspector.get_table_names():\n        # op.drop_index(\"idx_project__user_file_user_file_id\", \"project__user_file\")\n        op.drop_table(\"project__user_file\")\n        logger.info(\"Dropped project__user_file table\")\n\n    # Remove columns from persona__user_file\n    if \"persona__user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"persona__user_file\")]\n        if \"user_file_id_uuid\" in columns:\n            op.drop_column(\"persona__user_file\", \"user_file_id_uuid\")\n            logger.info(\"Dropped user_file_id_uuid from persona__user_file\")\n\n    # Remove columns from chat_session\n    if \"chat_session\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"chat_session\")]\n        if \"project_id\" in columns:\n            op.drop_column(\"chat_session\", \"project_id\")\n            logger.info(\"Dropped project_id from chat_session\")\n\n    # Rename user_project back to user_folder and remove instructions\n    if \"user_project\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"user_project\")]\n        if \"instructions\" in columns:\n            op.drop_column(\"user_project\", \"instructions\")\n        op.execute(\"ALTER TABLE user_project RENAME TO user_folder\")\n        # Update NULL descriptions to empty string before setting NOT NULL constraint\n        op.execute(\"UPDATE user_folder SET description = '' WHERE description IS NULL\")\n        op.alter_column(\"user_folder\", \"description\", nullable=False)\n        logger.info(\"Renamed user_project back to user_folder\")\n\n    # Remove columns from user_file\n    if \"user_file\" in inspector.get_table_names():\n        columns = [col[\"name\"] for col in inspector.get_columns(\"user_file\")]\n\n        columns_to_drop = [\n            \"document_id_migrated\",\n            \"last_project_sync_at\",\n            \"needs_project_sync\",\n            \"last_accessed_at\",\n            \"chunk_count\",\n            \"status\",\n        ]\n\n        for col in columns_to_drop:\n            if col in columns:\n                op.drop_column(\"user_file\", col)\n                logger.info(f\"Dropped {col} from user_file\")\n\n        if \"new_id\" in columns:\n            op.drop_constraint(\"uq_user_file_new_id\", \"user_file\", type_=\"unique\")\n            op.drop_column(\"user_file\", \"new_id\")\n            logger.info(\"Dropped new_id from user_file\")\n\n    # Drop enum type if no columns use it\n    bind.execute(sa.text(\"DROP TYPE IF EXISTS userfilestatus\"))\n\n    logger.info(\"Downgrade completed successfully\")\n"
  },
  {
    "path": "backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py",
    "content": "\"\"\"chat_message_agentic\n\nRevision ID: 9c00a2bccb83\nRevises: b7a7eee5aa15\nCreate Date: 2025-02-17 11:15:43.081150\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"9c00a2bccb83\"\ndown_revision = \"b7a7eee5aa15\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # First add the column as nullable\n    op.add_column(\"chat_message\", sa.Column(\"is_agentic\", sa.Boolean(), nullable=True))\n\n    # Update existing rows based on presence of SubQuestions\n    op.execute(\n        \"\"\"\n        UPDATE chat_message\n        SET is_agentic = EXISTS (\n            SELECT 1\n            FROM agent__sub_question\n            WHERE agent__sub_question.primary_question_id = chat_message.id\n        )\n        WHERE is_agentic IS NULL\n    \"\"\"\n    )\n\n    # Make the column non-nullable with a default value of False\n    op.alter_column(\n        \"chat_message\", \"is_agentic\", nullable=False, server_default=sa.text(\"false\")\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"is_agentic\")\n"
  },
  {
    "path": "backend/alembic/versions/9c54986124c6_add_scim_tables.py",
    "content": "\"\"\"add_scim_tables\n\nRevision ID: 9c54986124c6\nRevises: b51c6844d1df\nCreate Date: 2026-02-12 20:29:47.448614\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"9c54986124c6\"\ndown_revision = \"b51c6844d1df\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"scim_token\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"hashed_token\", sa.String(length=64), nullable=False),\n        sa.Column(\"token_display\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_by_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"is_active\",\n            sa.Boolean(),\n            server_default=sa.text(\"true\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"last_used_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.ForeignKeyConstraint([\"created_by_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"hashed_token\"),\n    )\n    op.create_table(\n        \"scim_group_mapping\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"external_id\", sa.String(), nullable=False),\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"], [\"user_group.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"user_group_id\"),\n    )\n    op.create_index(\n        op.f(\"ix_scim_group_mapping_external_id\"),\n        \"scim_group_mapping\",\n        [\"external_id\"],\n        unique=True,\n    )\n    op.create_table(\n        \"scim_user_mapping\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"external_id\", sa.String(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            onupdate=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"user_id\"),\n    )\n    op.create_index(\n        op.f(\"ix_scim_user_mapping_external_id\"),\n        \"scim_user_mapping\",\n        [\"external_id\"],\n        unique=True,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        op.f(\"ix_scim_user_mapping_external_id\"),\n        table_name=\"scim_user_mapping\",\n    )\n    op.drop_table(\"scim_user_mapping\")\n    op.drop_index(\n        op.f(\"ix_scim_group_mapping_external_id\"),\n        table_name=\"scim_group_mapping\",\n    )\n    op.drop_table(\"scim_group_mapping\")\n    op.drop_table(\"scim_token\")\n"
  },
  {
    "path": "backend/alembic/versions/9cf5c00f72fe_add_creator_to_cc_pair.py",
    "content": "\"\"\"add creator to cc pair\n\nRevision ID: 9cf5c00f72fe\nRevises: 26b931506ecb\nCreate Date: 2024-11-12 15:16:42.682902\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"9cf5c00f72fe\"\ndown_revision = \"26b931506ecb\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"creator_id\",\n            sa.UUID(as_uuid=True),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"creator_id\")\n"
  },
  {
    "path": "backend/alembic/versions/9d1543a37106_add_processing_duration_seconds_to_chat_.py",
    "content": "\"\"\"add processing_duration_seconds to chat_message\n\nRevision ID: 9d1543a37106\nRevises: cbc03e08d0f3\nCreate Date: 2026-01-21 11:42:18.546188\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"9d1543a37106\"\ndown_revision = \"cbc03e08d0f3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"processing_duration_seconds\", sa.Float(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"processing_duration_seconds\")\n"
  },
  {
    "path": "backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py",
    "content": "\"\"\"Added retrieved docs to query event\n\nRevision ID: 9d97fecfab7f\nRevises: ffc707a226b4\nCreate Date: 2023-10-20 12:22:31.930449\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"9d97fecfab7f\"\ndown_revision = \"ffc707a226b4\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"query_event\",\n        sa.Column(\n            \"retrieved_document_ids\",\n            postgresql.ARRAY(sa.String()),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"query_event\", \"retrieved_document_ids\")\n"
  },
  {
    "path": "backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py",
    "content": "\"\"\"add config to federated_connector\n\nRevision ID: 9drpiiw74ljy\nRevises: 2acdef638fc2\nCreate Date: 2025-11-03 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"9drpiiw74ljy\"\ndown_revision = \"2acdef638fc2\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    connection = op.get_bind()\n\n    # Check if column already exists in current schema\n    result = connection.execute(\n        sa.text(\n            \"\"\"\n            SELECT column_name\n            FROM information_schema.columns\n            WHERE table_schema = current_schema()\n            AND table_name = 'federated_connector'\n            AND column_name = 'config'\n            \"\"\"\n        )\n    )\n    column_exists = result.fetchone() is not None\n\n    # Add config column with default empty object (only if it doesn't exist)\n    if not column_exists:\n        op.add_column(\n            \"federated_connector\",\n            sa.Column(\n                \"config\", postgresql.JSONB(), nullable=False, server_default=\"{}\"\n            ),\n        )\n\n    # Data migration: Single bulk update for all Slack connectors\n    connection.execute(\n        sa.text(\n            \"\"\"\n            WITH connector_configs AS (\n                SELECT\n                    fc.id as connector_id,\n                    CASE\n                        WHEN fcds.entities->'channels' IS NOT NULL\n                            AND jsonb_typeof(fcds.entities->'channels') = 'array'\n                            AND jsonb_array_length(fcds.entities->'channels') > 0\n                        THEN\n                            jsonb_build_object(\n                                'channels', fcds.entities->'channels',\n                                'search_all_channels', false\n                            ) ||\n                            CASE\n                                WHEN fcds.entities->'include_dm' IS NOT NULL\n                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')\n                                ELSE '{}'::jsonb\n                            END\n                        ELSE\n                            jsonb_build_object('search_all_channels', true) ||\n                            CASE\n                                WHEN fcds.entities->'include_dm' IS NOT NULL\n                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')\n                                ELSE '{}'::jsonb\n                            END\n                    END as config\n                FROM federated_connector fc\n                LEFT JOIN LATERAL (\n                    SELECT entities\n                    FROM federated_connector__document_set\n                    WHERE federated_connector_id = fc.id\n                    AND entities IS NOT NULL\n                    ORDER BY id\n                    LIMIT 1\n                ) fcds ON true\n                WHERE fc.source = 'FEDERATED_SLACK'\n                AND fcds.entities IS NOT NULL\n            )\n            UPDATE federated_connector fc\n            SET config = cc.config\n            FROM connector_configs cc\n            WHERE fc.id = cc.connector_id\n            \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"federated_connector\", \"config\")\n"
  },
  {
    "path": "backend/alembic/versions/9f696734098f_combine_search_and_chat.py",
    "content": "\"\"\"Combine Search and Chat\n\nRevision ID: 9f696734098f\nRevises: a8c2065484e6\nCreate Date: 2024-11-27 15:32:19.694972\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"9f696734098f\"\ndown_revision = \"a8c2065484e6\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"chat_session\", \"description\", nullable=True)\n    op.drop_column(\"chat_session\", \"one_shot\")\n    op.drop_column(\"slack_channel_config\", \"response_type\")\n\n\ndef downgrade() -> None:\n    op.execute(\"UPDATE chat_session SET description = '' WHERE description IS NULL\")\n    op.alter_column(\"chat_session\", \"description\", nullable=False)\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\"one_shot\", sa.Boolean(), nullable=False, server_default=sa.false()),\n    )\n    op.add_column(\n        \"slack_channel_config\",\n        sa.Column(\n            \"response_type\", sa.String(), nullable=False, server_default=\"citations\"\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py",
    "content": "\"\"\"update_default_tool_descriptions\n\nRevision ID: a01bf2971c5d\nRevises: 87c52ec39f84\nCreate Date: 2025-12-16 15:21:25.656375\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"a01bf2971c5d\"\ndown_revision = \"18b5b2524446\"\nbranch_labels = None\ndepends_on = None\n\n# new tool descriptions (12/2025)\nTOOL_DESCRIPTIONS = {\n    \"SearchTool\": \"The Search Action allows the agent to search through connected knowledge to help build an answer.\",\n    \"ImageGenerationTool\": (\n        \"The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. \"\n        \"The action will be used when the user asks the agent to generate an image.\"\n    ),\n    \"WebSearchTool\": (\n        \"The Web Search Action allows the agent to perform internet searches for up-to-date information.\"\n    ),\n    \"KnowledgeGraphTool\": (\n        \"The Knowledge Graph Search Action allows the agent to search the \"\n        \"Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, \"\n        \"and it requires the Knowledge Graph to be enabled.\"\n    ),\n    \"OktaProfileTool\": (\n        \"The Okta Profile Action allows the agent to fetch the current user's information from Okta. \"\n        \"This may include the user's name, email, phone number, address, and other details such as their \"\n        \"manager and direct reports.\"\n    ),\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    for tool_id, description in TOOL_DESCRIPTIONS.items():\n        conn.execute(\n            sa.text(\n                \"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id\"\n            ),\n            {\"description\": description, \"tool_id\": tool_id},\n        )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py",
    "content": "\"\"\"add license table\n\nRevision ID: a1b2c3d4e5f6\nRevises: a01bf2971c5d\nCreate Date: 2025-12-04 10:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a1b2c3d4e5f6\"\ndown_revision = \"a01bf2971c5d\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"license\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"license_data\", sa.Text(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n    )\n\n    # Singleton pattern - only ever one row in this table\n    op.create_index(\n        \"idx_license_singleton\",\n        \"license\",\n        [sa.text(\"(true)\")],\n        unique=True,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"idx_license_singleton\", table_name=\"license\")\n    op.drop_table(\"license\")\n"
  },
  {
    "path": "backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py",
    "content": "\"\"\"drop agent_search_metrics table\n\nRevision ID: a1b2c3d4e5f7\nRevises: 73e9983e5091\nCreate Date: 2026-01-17\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"a1b2c3d4e5f7\"\ndown_revision = \"73e9983e5091\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_table(\"agent__search_metrics\")\n\n\ndef downgrade() -> None:\n    op.create_table(\n        \"agent__search_metrics\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", sa.UUID(), nullable=True),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=True),\n        sa.Column(\"agent_type\", sa.String(), nullable=False),\n        sa.Column(\"start_time\", sa.DateTime(timezone=True), nullable=False),\n        sa.Column(\"base_duration_s\", sa.Float(), nullable=False),\n        sa.Column(\"full_duration_s\", sa.Float(), nullable=False),\n        sa.Column(\"base_metrics\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"refined_metrics\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"all_metrics\", postgresql.JSONB(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py",
    "content": "\"\"\"Remove fast_default_model_name from llm_provider\n\nRevision ID: a2b3c4d5e6f7\nRevises: 2a391f840e85\nCreate Date: 2024-12-17\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a2b3c4d5e6f7\"\ndown_revision = \"2a391f840e85\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"llm_provider\", \"fast_default_model_name\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\"fast_default_model_name\", sa.String(), nullable=True),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py",
    "content": "\"\"\"migration confluence to be explicit\n\nRevision ID: a3795dce87be\nRevises: 1f60f60c3401\nCreate Date: 2024-09-01 13:52:12.006740\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.sql import table, column\n\nrevision = \"a3795dce87be\"\ndown_revision = \"1f60f60c3401\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]:\n    from urllib.parse import urlparse\n\n    def _extract_confluence_keys_from_cloud_url(wiki_url: str) -> tuple[str, str, str]:\n        parsed_url = urlparse(wiki_url)\n        wiki_base = f\"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split('/spaces')[0]}\"\n        path_parts = parsed_url.path.split(\"/\")\n        space = path_parts[3]\n        page_id = path_parts[5] if len(path_parts) > 5 else \"\"\n        return wiki_base, space, page_id\n\n    def _extract_confluence_keys_from_datacenter_url(\n        wiki_url: str,\n    ) -> tuple[str, str, str]:\n        DISPLAY = \"/display/\"\n        PAGE = \"/pages/\"\n        parsed_url = urlparse(wiki_url)\n        wiki_base = f\"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split(DISPLAY)[0]}\"\n        space = DISPLAY.join(parsed_url.path.split(DISPLAY)[1:]).split(\"/\")[0]\n        page_id = \"\"\n        if (content := parsed_url.path.split(PAGE)) and len(content) > 1:\n            page_id = content[1]\n        return wiki_base, space, page_id\n\n    is_confluence_cloud = (\n        \".atlassian.net/wiki/spaces/\" in wiki_url\n        or \".jira.com/wiki/spaces/\" in wiki_url\n    )\n\n    if is_confluence_cloud:\n        wiki_base, space, page_id = _extract_confluence_keys_from_cloud_url(wiki_url)\n    else:\n        wiki_base, space, page_id = _extract_confluence_keys_from_datacenter_url(\n            wiki_url\n        )\n\n    return wiki_base, space, page_id, is_confluence_cloud\n\n\ndef reconstruct_confluence_url(\n    wiki_base: str, space: str, page_id: str, is_cloud: bool\n) -> str:\n    if is_cloud:\n        url = f\"{wiki_base}/spaces/{space}\"\n        if page_id:\n            url += f\"/pages/{page_id}\"\n    else:\n        url = f\"{wiki_base}/display/{space}\"\n        if page_id:\n            url += f\"/pages/{page_id}\"\n    return url\n\n\ndef upgrade() -> None:\n    connector = table(\n        \"connector\",\n        column(\"id\", sa.Integer),\n        column(\"source\", sa.String()),\n        column(\"input_type\", sa.String()),\n        column(\"connector_specific_config\", postgresql.JSONB),\n    )\n\n    # Fetch all Confluence connectors\n    connection = op.get_bind()\n    confluence_connectors = connection.execute(\n        sa.select(connector).where(\n            sa.and_(\n                connector.c.source == \"CONFLUENCE\", connector.c.input_type == \"POLL\"\n            )\n        )\n    ).fetchall()\n\n    for row in confluence_connectors:\n        config = row.connector_specific_config\n        wiki_page_url = config[\"wiki_page_url\"]\n        wiki_base, space, page_id, is_cloud = extract_confluence_keys_from_url(\n            wiki_page_url\n        )\n\n        new_config = {\n            \"wiki_base\": wiki_base,\n            \"space\": space,\n            \"page_id\": page_id,\n            \"is_cloud\": is_cloud,\n        }\n\n        for key, value in config.items():\n            if key not in [\"wiki_page_url\"]:\n                new_config[key] = value\n\n        op.execute(\n            connector.update()\n            .where(connector.c.id == row.id)\n            .values(connector_specific_config=new_config)\n        )\n\n\ndef downgrade() -> None:\n    connector = table(\n        \"connector\",\n        column(\"id\", sa.Integer),\n        column(\"source\", sa.String()),\n        column(\"input_type\", sa.String()),\n        column(\"connector_specific_config\", postgresql.JSONB),\n    )\n\n    confluence_connectors = (\n        op.get_bind()\n        .execute(\n            sa.select(connector).where(\n                connector.c.source == \"CONFLUENCE\", connector.c.input_type == \"POLL\"\n            )\n        )\n        .fetchall()\n    )\n\n    for row in confluence_connectors:\n        config = row.connector_specific_config\n        if all(key in config for key in [\"wiki_base\", \"space\", \"is_cloud\"]):\n            wiki_page_url = reconstruct_confluence_url(\n                config[\"wiki_base\"],\n                config[\"space\"],\n                config.get(\"page_id\", \"\"),\n                config[\"is_cloud\"],\n            )\n\n            new_config = {\"wiki_page_url\": wiki_page_url}\n            new_config.update(\n                {\n                    k: v\n                    for k, v in config.items()\n                    if k not in [\"wiki_base\", \"space\", \"page_id\", \"is_cloud\"]\n                }\n            )\n\n            op.execute(\n                connector.update()\n                .where(connector.c.id == row.id)\n                .values(connector_specific_config=new_config)\n            )\n"
  },
  {
    "path": "backend/alembic/versions/a3b8d9e2f1c4_make_scim_external_id_nullable.py",
    "content": "\"\"\"make scim_user_mapping.external_id nullable\n\nRevision ID: a3b8d9e2f1c4\nRevises: 2664261bfaab\nCreate Date: 2026-03-02\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"a3b8d9e2f1c4\"\ndown_revision = \"2664261bfaab\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"scim_user_mapping\",\n        \"external_id\",\n        nullable=True,\n    )\n\n\ndef downgrade() -> None:\n    # Delete any rows where external_id is NULL before re-applying NOT NULL\n    op.execute(\"DELETE FROM scim_user_mapping WHERE external_id IS NULL\")\n    op.alter_column(\n        \"scim_user_mapping\",\n        \"external_id\",\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a3bfd0d64902_add_chosen_assistants_to_user_table.py",
    "content": "\"\"\"Add chosen_assistants to User table\n\nRevision ID: a3bfd0d64902\nRevises: ec85f2b3c544\nCreate Date: 2024-05-26 17:22:24.834741\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"a3bfd0d64902\"\ndown_revision = \"ec85f2b3c544\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"chosen_assistants\", postgresql.ARRAY(sa.Integer()), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"chosen_assistants\")\n"
  },
  {
    "path": "backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py",
    "content": "\"\"\"remove userfile related deprecated fields\n\nRevision ID: a3c1a7904cd0\nRevises: 5c3dca366b35\nCreate Date: 2026-01-06 13:00:30.634396\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a3c1a7904cd0\"\ndown_revision = \"5c3dca366b35\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"user_file\", \"document_id\")\n    op.drop_column(\"user_file\", \"document_id_migrated\")\n    op.drop_column(\"connector_credential_pair\", \"is_user_file\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"is_user_file\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n    op.add_column(\n        \"user_file\",\n        sa.Column(\"document_id\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"user_file\",\n        sa.Column(\n            \"document_id_migrated\", sa.Boolean(), nullable=False, server_default=\"true\"\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a3f8b2c1d4e5_add_preferred_response_id_to_chat_message.py",
    "content": "\"\"\"add preferred_response_id and model_display_name to chat_message\n\nRevision ID: a3f8b2c1d4e5\nCreate Date: 2026-03-22\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a3f8b2c1d4e5\"\ndown_revision = \"25a5501dc766\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"preferred_response_id\",\n            sa.Integer(),\n            sa.ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"model_display_name\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"model_display_name\")\n    op.drop_column(\"chat_message\", \"preferred_response_id\")\n"
  },
  {
    "path": "backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py",
    "content": "\"\"\"add llm provider persona restrictions\n\nRevision ID: a4f23d6b71c8\nRevises: 5e1c073d48a3\nCreate Date: 2025-10-21 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"a4f23d6b71c8\"\ndown_revision = \"5e1c073d48a3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"llm_provider__persona\",\n        sa.Column(\"llm_provider_id\", sa.Integer(), nullable=False),\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"llm_provider_id\"], [\"llm_provider.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"persona_id\"], [\"persona.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"llm_provider_id\", \"persona_id\"),\n    )\n    op.create_index(\n        \"ix_llm_provider__persona_llm_provider_id\",\n        \"llm_provider__persona\",\n        [\"llm_provider_id\"],\n    )\n    op.create_index(\n        \"ix_llm_provider__persona_persona_id\",\n        \"llm_provider__persona\",\n        [\"persona_id\"],\n    )\n    op.create_index(\n        \"ix_llm_provider__persona_composite\",\n        \"llm_provider__persona\",\n        [\"persona_id\", \"llm_provider_id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"ix_llm_provider__persona_composite\",\n        table_name=\"llm_provider__persona\",\n    )\n    op.drop_index(\n        \"ix_llm_provider__persona_persona_id\",\n        table_name=\"llm_provider__persona\",\n    )\n    op.drop_index(\n        \"ix_llm_provider__persona_llm_provider_id\",\n        table_name=\"llm_provider__persona\",\n    )\n    op.drop_table(\"llm_provider__persona\")\n"
  },
  {
    "path": "backend/alembic/versions/a570b80a5f20_usergroup_tables.py",
    "content": "\"\"\"UserGroup tables\n\nRevision ID: a570b80a5f20\nRevises: 904451035c9b\nCreate Date: 2023-10-02 12:27:10.265725\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a570b80a5f20\"\ndown_revision = \"904451035c9b\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"user_group\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"is_up_to_date\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_up_for_deletion\", sa.Boolean(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n    op.create_table(\n        \"user__user_group\",\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"user_group_id\", \"user_id\"),\n    )\n    op.create_table(\n        \"user_group__connector_credential_pair\",\n        sa.Column(\"user_group_id\", sa.Integer(), nullable=False),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=False),\n        sa.Column(\"is_current\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"cc_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"user_group_id\", \"cc_pair_id\", \"is_current\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"user_group__connector_credential_pair\")\n    op.drop_table(\"user__user_group\")\n    op.drop_table(\"user_group\")\n"
  },
  {
    "path": "backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py",
    "content": "\"\"\"remove recent assistants\n\nRevision ID: a6df6b88ef81\nRevises: 4d58345da04a\nCreate Date: 2025-01-29 10:25:52.790407\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"a6df6b88ef81\"\ndown_revision = \"4d58345da04a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"user\", \"recent_assistants\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"recent_assistants\", postgresql.JSONB(), server_default=\"[]\", nullable=False\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a7688ab35c45_add_public_external_user_group_table.py",
    "content": "\"\"\"Add public_external_user_group table\n\nRevision ID: a7688ab35c45\nRevises: 5c448911b12f\nCreate Date: 2025-05-06 20:55:12.747875\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"a7688ab35c45\"\ndown_revision = \"5c448911b12f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"public_external_user_group\",\n        sa.Column(\"external_user_group_id\", sa.String(), nullable=False),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=False),\n        sa.PrimaryKeyConstraint(\"external_user_group_id\", \"cc_pair_id\"),\n        sa.ForeignKeyConstraint(\n            [\"cc_pair_id\"], [\"connector_credential_pair.id\"], ondelete=\"CASCADE\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"public_external_user_group\")\n"
  },
  {
    "path": "backend/alembic/versions/a852cbe15577_new_chat_history.py",
    "content": "\"\"\"New Chat History\n\nRevision ID: a852cbe15577\nRevises: 6436661d5b65\nCreate Date: 2025-11-08 15:16:37.781308\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"a852cbe15577\"\ndown_revision = \"6436661d5b65\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1. Drop old research/agent tables (CASCADE handles dependencies)\n    op.execute(\"DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS research_agent_iteration CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS agent__sub_query CASCADE\")\n    op.execute(\"DROP TABLE IF EXISTS agent__sub_question CASCADE\")\n\n    # 2. ChatMessage table changes\n    # Rename columns and add FKs\n    op.alter_column(\n        \"chat_message\", \"parent_message\", new_column_name=\"parent_message_id\"\n    )\n    op.create_foreign_key(\n        \"fk_chat_message_parent_message_id\",\n        \"chat_message\",\n        \"chat_message\",\n        [\"parent_message_id\"],\n        [\"id\"],\n    )\n    op.alter_column(\n        \"chat_message\",\n        \"latest_child_message\",\n        new_column_name=\"latest_child_message_id\",\n    )\n    op.create_foreign_key(\n        \"fk_chat_message_latest_child_message_id\",\n        \"chat_message\",\n        \"chat_message\",\n        [\"latest_child_message_id\"],\n        [\"id\"],\n    )\n\n    # Add new column\n    op.add_column(\n        \"chat_message\", sa.Column(\"reasoning_tokens\", sa.Text(), nullable=True)\n    )\n\n    # Drop old columns\n    op.drop_column(\"chat_message\", \"rephrased_query\")\n    op.drop_column(\"chat_message\", \"alternate_assistant_id\")\n    op.drop_column(\"chat_message\", \"overridden_model\")\n    op.drop_column(\"chat_message\", \"is_agentic\")\n    op.drop_column(\"chat_message\", \"refined_answer_improvement\")\n    op.drop_column(\"chat_message\", \"research_type\")\n    op.drop_column(\"chat_message\", \"research_plan\")\n    op.drop_column(\"chat_message\", \"research_answer_purpose\")\n\n    # 3. ToolCall table changes\n    # Drop the unique constraint first\n    op.drop_constraint(\"uq_tool_call_message_id\", \"tool_call\", type_=\"unique\")\n\n    # Delete orphaned tool_call rows (those without valid chat_message)\n    op.execute(\n        \"DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)\"\n    )\n\n    # Add chat_session_id as nullable first, populate, then make NOT NULL\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"chat_session_id\", postgresql.UUID(as_uuid=True), nullable=True),\n    )\n\n    # Populate chat_session_id from the related chat_message\n    op.execute(\n        \"\"\"\n        UPDATE tool_call\n        SET chat_session_id = chat_message.chat_session_id\n        FROM chat_message\n        WHERE tool_call.message_id = chat_message.id\n    \"\"\"\n    )\n\n    # Now make it NOT NULL and add FK\n    op.alter_column(\"tool_call\", \"chat_session_id\", nullable=False)\n    op.create_foreign_key(\n        \"fk_tool_call_chat_session_id\",\n        \"tool_call\",\n        \"chat_session\",\n        [\"chat_session_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Rename message_id and make nullable, recreate FK with CASCADE\n    op.drop_constraint(\"tool_call_message_id_fkey\", \"tool_call\", type_=\"foreignkey\")\n    op.alter_column(\n        \"tool_call\",\n        \"message_id\",\n        new_column_name=\"parent_chat_message_id\",\n        nullable=True,\n    )\n    op.create_foreign_key(\n        \"fk_tool_call_parent_chat_message_id\",\n        \"tool_call\",\n        \"chat_message\",\n        [\"parent_chat_message_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Add parent_tool_call_id with FK\n    op.add_column(\n        \"tool_call\", sa.Column(\"parent_tool_call_id\", sa.Integer(), nullable=True)\n    )\n    op.create_foreign_key(\n        \"fk_tool_call_parent_tool_call_id\",\n        \"tool_call\",\n        \"tool_call\",\n        [\"parent_tool_call_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Add other new columns\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"turn_number\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"tool_call_id\", sa.String(), nullable=False, server_default=\"\"),\n    )\n    op.add_column(\"tool_call\", sa.Column(\"reasoning_tokens\", sa.Text(), nullable=True))\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"tool_call_tokens\", sa.Integer(), nullable=False, server_default=\"0\"),\n    )\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"generated_images\", postgresql.JSONB(), nullable=True),\n    )\n\n    # Rename columns\n    op.alter_column(\n        \"tool_call\", \"tool_arguments\", new_column_name=\"tool_call_arguments\"\n    )\n    op.alter_column(\"tool_call\", \"tool_result\", new_column_name=\"tool_call_response\")\n\n    # Change tool_call_response type from JSONB to Text\n    op.execute(\n        \"\"\"\n        ALTER TABLE tool_call\n        ALTER COLUMN tool_call_response TYPE TEXT\n        USING tool_call_response::text\n    \"\"\"\n    )\n\n    # Drop old columns\n    op.drop_column(\"tool_call\", \"tool_name\")\n\n    # 4. Create new association table\n    op.create_table(\n        \"tool_call__search_doc\",\n        sa.Column(\"tool_call_id\", sa.Integer(), nullable=False),\n        sa.Column(\"search_doc_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint([\"tool_call_id\"], [\"tool_call.id\"], ondelete=\"CASCADE\"),\n        sa.ForeignKeyConstraint(\n            [\"search_doc_id\"], [\"search_doc.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.PrimaryKeyConstraint(\"tool_call_id\", \"search_doc_id\"),\n    )\n\n    # 5. Persona table change\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"replace_base_system_prompt\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Reverse persona changes\n    op.drop_column(\"persona\", \"replace_base_system_prompt\")\n\n    # Drop new association table\n    op.drop_table(\"tool_call__search_doc\")\n\n    # Reverse ToolCall changes\n    op.add_column(\n        \"tool_call\",\n        sa.Column(\"tool_name\", sa.String(), nullable=False, server_default=\"\"),\n    )\n\n    # Change tool_call_response back to JSONB\n    op.execute(\n        \"\"\"\n        ALTER TABLE tool_call\n        ALTER COLUMN tool_call_response TYPE JSONB\n        USING tool_call_response::jsonb\n    \"\"\"\n    )\n\n    op.alter_column(\"tool_call\", \"tool_call_response\", new_column_name=\"tool_result\")\n    op.alter_column(\n        \"tool_call\", \"tool_call_arguments\", new_column_name=\"tool_arguments\"\n    )\n\n    op.drop_column(\"tool_call\", \"generated_images\")\n    op.drop_column(\"tool_call\", \"tool_call_tokens\")\n    op.drop_column(\"tool_call\", \"reasoning_tokens\")\n    op.drop_column(\"tool_call\", \"tool_call_id\")\n    op.drop_column(\"tool_call\", \"turn_number\")\n\n    op.drop_constraint(\n        \"fk_tool_call_parent_tool_call_id\", \"tool_call\", type_=\"foreignkey\"\n    )\n    op.drop_column(\"tool_call\", \"parent_tool_call_id\")\n\n    op.drop_constraint(\n        \"fk_tool_call_parent_chat_message_id\", \"tool_call\", type_=\"foreignkey\"\n    )\n    op.alter_column(\n        \"tool_call\",\n        \"parent_chat_message_id\",\n        new_column_name=\"message_id\",\n        nullable=False,\n    )\n    op.create_foreign_key(\n        \"tool_call_message_id_fkey\",\n        \"tool_call\",\n        \"chat_message\",\n        [\"message_id\"],\n        [\"id\"],\n    )\n\n    op.drop_constraint(\"fk_tool_call_chat_session_id\", \"tool_call\", type_=\"foreignkey\")\n    op.drop_column(\"tool_call\", \"chat_session_id\")\n\n    op.create_unique_constraint(\"uq_tool_call_message_id\", \"tool_call\", [\"message_id\"])\n\n    # Reverse ChatMessage changes\n    # Note: research_answer_purpose and research_type were originally String columns,\n    # not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"research_answer_purpose\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\", sa.Column(\"research_plan\", postgresql.JSONB(), nullable=True)\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"research_type\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"refined_answer_improvement\", sa.Boolean(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"is_agentic\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n    op.add_column(\n        \"chat_message\", sa.Column(\"overridden_model\", sa.String(), nullable=True)\n    )\n    op.add_column(\n        \"chat_message\", sa.Column(\"alternate_assistant_id\", sa.Integer(), nullable=True)\n    )\n    # Recreate the FK constraint that was implicitly dropped when the column was dropped\n    op.create_foreign_key(\n        \"fk_chat_message_persona\",\n        \"chat_message\",\n        \"persona\",\n        [\"alternate_assistant_id\"],\n        [\"id\"],\n    )\n    op.add_column(\n        \"chat_message\", sa.Column(\"rephrased_query\", sa.Text(), nullable=True)\n    )\n\n    op.drop_column(\"chat_message\", \"reasoning_tokens\")\n\n    op.drop_constraint(\n        \"fk_chat_message_latest_child_message_id\", \"chat_message\", type_=\"foreignkey\"\n    )\n    op.alter_column(\n        \"chat_message\",\n        \"latest_child_message_id\",\n        new_column_name=\"latest_child_message\",\n    )\n\n    op.drop_constraint(\n        \"fk_chat_message_parent_message_id\", \"chat_message\", type_=\"foreignkey\"\n    )\n    op.alter_column(\n        \"chat_message\", \"parent_message_id\", new_column_name=\"parent_message\"\n    )\n\n    # Recreate agent sub question and sub query tables\n    op.create_table(\n        \"agent__sub_question\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"primary_question_id\", sa.Integer(), nullable=False),\n        sa.Column(\"chat_session_id\", postgresql.UUID(as_uuid=True), nullable=False),\n        sa.Column(\"sub_question\", sa.Text(), nullable=False),\n        sa.Column(\"level\", sa.Integer(), nullable=False),\n        sa.Column(\"level_question_num\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"sub_answer\", sa.Text(), nullable=False),\n        sa.Column(\"sub_question_doc_results\", postgresql.JSONB(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"primary_question_id\"], [\"chat_message.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"chat_session_id\"], [\"chat_session.id\"]),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_table(\n        \"agent__sub_query\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"parent_question_id\", sa.Integer(), nullable=False),\n        sa.Column(\"chat_session_id\", postgresql.UUID(as_uuid=True), nullable=False),\n        sa.Column(\"sub_query\", sa.Text(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"parent_question_id\"], [\"agent__sub_question.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"chat_session_id\"], [\"chat_session.id\"]),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_table(\n        \"agent__sub_query__search_doc\",\n        sa.Column(\"sub_query_id\", sa.Integer(), nullable=False),\n        sa.Column(\"search_doc_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"sub_query_id\"], [\"agent__sub_query.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.ForeignKeyConstraint([\"search_doc_id\"], [\"search_doc.id\"]),\n        sa.PrimaryKeyConstraint(\"sub_query_id\", \"search_doc_id\"),\n    )\n\n    # Recreate research agent tables\n    op.create_table(\n        \"research_agent_iteration\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"primary_question_id\", sa.Integer(), nullable=False),\n        sa.Column(\"iteration_nr\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"purpose\", sa.String(), nullable=True),\n        sa.Column(\"reasoning\", sa.String(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"primary_question_id\"], [\"chat_message.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\n            \"primary_question_id\",\n            \"iteration_nr\",\n            name=\"_research_agent_iteration_unique_constraint\",\n        ),\n    )\n\n    op.create_table(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"primary_question_id\", sa.Integer(), nullable=False),\n        sa.Column(\"iteration_nr\", sa.Integer(), nullable=False),\n        sa.Column(\"iteration_sub_step_nr\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"sub_step_instructions\", sa.String(), nullable=True),\n        sa.Column(\"sub_step_tool_id\", sa.Integer(), nullable=True),\n        sa.Column(\"reasoning\", sa.String(), nullable=True),\n        sa.Column(\"sub_answer\", sa.String(), nullable=True),\n        sa.Column(\"cited_doc_results\", postgresql.JSONB(), nullable=False),\n        sa.Column(\"claims\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"is_web_fetch\", sa.Boolean(), nullable=True),\n        sa.Column(\"queries\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"generated_images\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"additional_data\", postgresql.JSONB(), nullable=True),\n        sa.Column(\"file_ids\", postgresql.JSONB(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"primary_question_id\", \"iteration_nr\"],\n            [\n                \"research_agent_iteration.primary_question_id\",\n                \"research_agent_iteration.iteration_nr\",\n            ],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint([\"sub_step_tool_id\"], [\"tool.id\"], ondelete=\"SET NULL\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/a8c2065484e6_add_auto_scroll_to_user_model.py",
    "content": "\"\"\"add auto scroll to user model\n\nRevision ID: a8c2065484e6\nRevises: abe7378b8217\nCreate Date: 2024-11-22 17:34:09.690295\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"a8c2065484e6\"\ndown_revision = \"abe7378b8217\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"auto_scroll\", sa.Boolean(), nullable=True, server_default=None),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"auto_scroll\")\n"
  },
  {
    "path": "backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py",
    "content": "\"\"\"merge prompt into persona\n\nRevision ID: abbfec3a5ac5\nRevises: 8818cf73fa1a\nCreate Date: 2024-12-19 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"abbfec3a5ac5\"\ndown_revision = \"8818cf73fa1a\"\nbranch_labels = None\ndepends_on = None\n\n\nMAX_PROMPT_LENGTH = 5_000_000\n\n\ndef upgrade() -> None:\n    \"\"\"NOTE: Prompts without any Personas will just be lost.\"\"\"\n    # Step 1: Add new columns to persona table (only if they don't exist)\n\n    # Check if columns exist before adding them\n    connection = op.get_bind()\n    inspector = sa.inspect(connection)\n    existing_columns = [col[\"name\"] for col in inspector.get_columns(\"persona\")]\n\n    if \"system_prompt\" not in existing_columns:\n        op.add_column(\n            \"persona\",\n            sa.Column(\n                \"system_prompt\", sa.String(length=MAX_PROMPT_LENGTH), nullable=True\n            ),\n        )\n\n    if \"task_prompt\" not in existing_columns:\n        op.add_column(\n            \"persona\",\n            sa.Column(\n                \"task_prompt\", sa.String(length=MAX_PROMPT_LENGTH), nullable=True\n            ),\n        )\n\n    if \"datetime_aware\" not in existing_columns:\n        op.add_column(\n            \"persona\",\n            sa.Column(\n                \"datetime_aware\", sa.Boolean(), nullable=False, server_default=\"true\"\n            ),\n        )\n\n    # Step 2: Migrate data from prompt table to persona table (only if tables exist)\n    existing_tables = inspector.get_table_names()\n\n    if \"prompt\" in existing_tables and \"persona__prompt\" in existing_tables:\n        # For personas that have associated prompts, copy the prompt data\n        op.execute(\n            \"\"\"\n            UPDATE persona\n            SET\n                system_prompt = p.system_prompt,\n                task_prompt = p.task_prompt,\n                datetime_aware = p.datetime_aware\n            FROM (\n                -- Get the first prompt for each persona (in case there are multiple)\n                SELECT DISTINCT ON (pp.persona_id)\n                    pp.persona_id,\n                    pr.system_prompt,\n                    pr.task_prompt,\n                    pr.datetime_aware\n                FROM persona__prompt pp\n                JOIN prompt pr ON pp.prompt_id = pr.id\n            ) p\n            WHERE persona.id = p.persona_id\n        \"\"\"\n        )\n\n        # Step 3: Update chat_message references\n        # Since chat messages referenced prompt_id, we need to update them to use persona_id\n        # This is complex as we need to map from prompt_id to persona_id\n\n        # Check if chat_message has prompt_id column\n        chat_message_columns = [\n            col[\"name\"] for col in inspector.get_columns(\"chat_message\")\n        ]\n        if \"prompt_id\" in chat_message_columns:\n            op.execute(\n                \"\"\"\n                ALTER TABLE chat_message\n                DROP CONSTRAINT IF EXISTS chat_message__prompt_fk\n            \"\"\"\n            )\n            op.drop_column(\"chat_message\", \"prompt_id\")\n\n    # Step 4: Handle personas without prompts - set default values if needed (always run this)\n    op.execute(\n        \"\"\"\n        UPDATE persona\n        SET\n            system_prompt = COALESCE(system_prompt, ''),\n            task_prompt = COALESCE(task_prompt, '')\n        WHERE system_prompt IS NULL OR task_prompt IS NULL\n    \"\"\"\n    )\n\n    # Step 5: Drop the persona__prompt association table (if it exists)\n    if \"persona__prompt\" in existing_tables:\n        op.drop_table(\"persona__prompt\")\n\n    # Step 6: Drop the prompt table (if it exists)\n    if \"prompt\" in existing_tables:\n        op.drop_table(\"prompt\")\n\n    # Step 7: Make system_prompt and task_prompt non-nullable after migration (only if they exist)\n    op.alter_column(\n        \"persona\",\n        \"system_prompt\",\n        existing_type=sa.String(length=MAX_PROMPT_LENGTH),\n        nullable=False,\n        server_default=None,\n    )\n\n    op.alter_column(\n        \"persona\",\n        \"task_prompt\",\n        existing_type=sa.String(length=MAX_PROMPT_LENGTH),\n        nullable=False,\n        server_default=None,\n    )\n\n\ndef downgrade() -> None:\n    # Step 1: Recreate the prompt table\n    op.create_table(\n        \"prompt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", postgresql.UUID(as_uuid=True), nullable=True),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.String(), nullable=False),\n        sa.Column(\"system_prompt\", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),\n        sa.Column(\"task_prompt\", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),\n        sa.Column(\n            \"datetime_aware\", sa.Boolean(), nullable=False, server_default=\"true\"\n        ),\n        sa.Column(\n            \"default_prompt\", sa.Boolean(), nullable=False, server_default=\"false\"\n        ),\n        sa.Column(\"deleted\", sa.Boolean(), nullable=False, server_default=\"false\"),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # Step 2: Recreate the persona__prompt association table\n    op.create_table(\n        \"persona__prompt\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"prompt_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"prompt_id\"],\n            [\"prompt.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"prompt_id\"),\n    )\n\n    # Step 3: Migrate data back from persona to prompt table\n    op.execute(\n        \"\"\"\n        INSERT INTO prompt (\n            name,\n            description,\n            system_prompt,\n            task_prompt,\n            datetime_aware,\n            default_prompt,\n            deleted,\n            user_id\n        )\n        SELECT\n            CONCAT('Prompt for ', name),\n            description,\n            system_prompt,\n            task_prompt,\n            datetime_aware,\n            is_default_persona,\n            deleted,\n            user_id\n        FROM persona\n        WHERE system_prompt IS NOT NULL AND system_prompt != ''\n        RETURNING id, name\n        \"\"\"\n    )\n\n    # Step 4: Re-establish persona__prompt relationships\n    op.execute(\n        \"\"\"\n        INSERT INTO persona__prompt (persona_id, prompt_id)\n        SELECT\n            p.id as persona_id,\n            pr.id as prompt_id\n        FROM persona p\n        JOIN prompt pr ON pr.name = CONCAT('Prompt for ', p.name)\n        WHERE p.system_prompt IS NOT NULL AND p.system_prompt != ''\n    \"\"\"\n    )\n\n    # Step 5: Add prompt_id column back to chat_message\n    op.add_column(\"chat_message\", sa.Column(\"prompt_id\", sa.Integer(), nullable=True))\n\n    # Step 6: Re-establish foreign key constraint\n    op.create_foreign_key(\n        \"chat_message__prompt_fk\", \"chat_message\", \"prompt\", [\"prompt_id\"], [\"id\"]\n    )\n\n    # Step 7: Remove columns from persona table\n    op.drop_column(\"persona\", \"datetime_aware\")\n    op.drop_column(\"persona\", \"task_prompt\")\n    op.drop_column(\"persona\", \"system_prompt\")\n"
  },
  {
    "path": "backend/alembic/versions/abe7378b8217_add_indexing_trigger_to_cc_pair.py",
    "content": "\"\"\"add indexing trigger to cc_pair\n\nRevision ID: abe7378b8217\nRevises: 6d562f86c78b\nCreate Date: 2024-11-26 19:09:53.481171\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"abe7378b8217\"\ndown_revision = \"93560ba1b118\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"indexing_trigger\",\n            sa.Enum(\"UPDATE\", \"REINDEX\", name=\"indexingmode\", native_enum=False),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"indexing_trigger\")\n"
  },
  {
    "path": "backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py",
    "content": "\"\"\"add last_pruned to the connector_credential_pair table\n\nRevision ID: ac5eaac849f9\nRevises: 52a219fb5233\nCreate Date: 2024-09-10 15:04:26.437118\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"ac5eaac849f9\"\ndown_revision = \"46b7a812670f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # last pruned represents the last time the connector was pruned\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\"last_pruned\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"last_pruned\")\n"
  },
  {
    "path": "backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py",
    "content": "\"\"\"remove inactive ccpair status on downgrade\n\nRevision ID: acaab4ef4507\nRevises: b388730a2899\nCreate Date: 2025-02-16 18:21:41.330212\n\n\"\"\"\n\nfrom alembic import op\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom sqlalchemy import update\n\n# revision identifiers, used by Alembic.\nrevision = \"acaab4ef4507\"\ndown_revision = \"b388730a2899\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    pass\n\n\ndef downgrade() -> None:\n    op.execute(\n        update(ConnectorCredentialPair)\n        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)\n        .values(status=ConnectorCredentialPairStatus.ACTIVE)\n    )\n"
  },
  {
    "path": "backend/alembic/versions/ae62505e3acc_add_saml_accounts.py",
    "content": "\"\"\"Add SAML Accounts\n\nRevision ID: ae62505e3acc\nRevises: 7da543f5672f\nCreate Date: 2023-09-26 16:19:30.933183\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"ae62505e3acc\"\ndown_revision = \"7da543f5672f\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"saml\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\"encrypted_cookie\", sa.Text(), nullable=False),\n        sa.Column(\"expires_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"encrypted_cookie\"),\n        sa.UniqueConstraint(\"user_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"saml\")\n"
  },
  {
    "path": "backend/alembic/versions/aeda5f2df4f6_add_pinned_assistants.py",
    "content": "\"\"\"add pinned assistants\n\nRevision ID: aeda5f2df4f6\nRevises: c5eae4a75a1b\nCreate Date: 2025-01-09 16:04:10.770636\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"aeda5f2df4f6\"\ndown_revision = \"c5eae4a75a1b\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\", sa.Column(\"pinned_assistants\", postgresql.JSONB(), nullable=True)\n    )\n    op.execute('UPDATE \"user\" SET pinned_assistants = chosen_assistants')\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"pinned_assistants\")\n"
  },
  {
    "path": "backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py",
    "content": "\"\"\"Make 'last_attempt_status' nullable\n\nRevision ID: b082fec533f0\nRevises: df0c7ad8a076\nCreate Date: 2023-08-06 12:05:47.087325\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"b082fec533f0\"\ndown_revision = \"df0c7ad8a076\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"connector_credential_pair\",\n        \"last_attempt_status\",\n        existing_type=postgresql.ENUM(\n            \"NOT_STARTED\",\n            \"IN_PROGRESS\",\n            \"SUCCESS\",\n            \"FAILED\",\n            name=\"indexingstatus\",\n        ),\n        nullable=True,\n    )\n\n\ndef downgrade() -> None:\n    # First, update any null values to a default value\n    op.execute(\n        \"UPDATE connector_credential_pair SET last_attempt_status = 'NOT_STARTED' WHERE last_attempt_status IS NULL\"\n    )\n\n    # Then, make the column non-nullable\n    op.alter_column(\n        \"connector_credential_pair\",\n        \"last_attempt_status\",\n        existing_type=postgresql.ENUM(\n            \"NOT_STARTED\",\n            \"IN_PROGRESS\",\n            \"SUCCESS\",\n            \"FAILED\",\n            name=\"indexingstatus\",\n        ),\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/b156fa702355_chat_reworked.py",
    "content": "\"\"\"Chat Reworked\n\nRevision ID: b156fa702355\nRevises: baf71f781b9e\nCreate Date: 2023-12-12 00:57:41.823371\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.dialects.postgresql import ENUM\nfrom onyx.configs.constants import DocumentSource\n\n# revision identifiers, used by Alembic.\nrevision = \"b156fa702355\"\ndown_revision = \"baf71f781b9e\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\nsearchtype_enum = ENUM(\n    \"KEYWORD\", \"SEMANTIC\", \"HYBRID\", name=\"searchtype\", create_type=True\n)\nrecencybiassetting_enum = ENUM(\n    \"FAVOR_RECENT\",\n    \"BASE_DECAY\",\n    \"NO_DECAY\",\n    \"AUTO\",\n    name=\"recencybiassetting\",\n    create_type=True,\n)\n\n\ndef upgrade() -> None:\n    bind = op.get_bind()\n    searchtype_enum.create(bind)\n    recencybiassetting_enum.create(bind)\n\n    # This is irrecoverable, whatever\n    op.execute(\"DELETE FROM chat_feedback\")\n    op.execute(\"DELETE FROM document_retrieval_feedback\")\n\n    op.create_table(\n        \"search_doc\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.Column(\"chunk_ind\", sa.Integer(), nullable=False),\n        sa.Column(\"semantic_id\", sa.String(), nullable=False),\n        sa.Column(\"link\", sa.String(), nullable=True),\n        sa.Column(\"blurb\", sa.String(), nullable=False),\n        sa.Column(\"boost\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"source_type\",\n            sa.Enum(DocumentSource, native=False),\n            nullable=False,\n        ),\n        sa.Column(\"hidden\", sa.Boolean(), nullable=False),\n        sa.Column(\"score\", sa.Float(), nullable=False),\n        sa.Column(\"match_highlights\", postgresql.ARRAY(sa.String()), nullable=False),\n        sa.Column(\"updated_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"primary_owners\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.Column(\"secondary_owners\", postgresql.ARRAY(sa.String()), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"prompt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"description\", sa.String(), nullable=False),\n        sa.Column(\"system_prompt\", sa.Text(), nullable=False),\n        sa.Column(\"task_prompt\", sa.Text(), nullable=False),\n        sa.Column(\"include_citations\", sa.Boolean(), nullable=False),\n        sa.Column(\"datetime_aware\", sa.Boolean(), nullable=False),\n        sa.Column(\"default_prompt\", sa.Boolean(), nullable=False),\n        sa.Column(\"deleted\", sa.Boolean(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"persona__prompt\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"prompt_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"prompt_id\"],\n            [\"prompt.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"prompt_id\"),\n    )\n\n    # Changes to persona first so chat_sessions can have the right persona\n    # The empty persona will be overwritten on server startup\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"search_type\",\n            searchtype_enum,\n            nullable=True,\n        ),\n    )\n    op.execute(\"UPDATE persona SET search_type = 'HYBRID'\")\n    op.alter_column(\"persona\", \"search_type\", nullable=False)\n    op.add_column(\n        \"persona\",\n        sa.Column(\"llm_relevance_filter\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\"UPDATE persona SET llm_relevance_filter = TRUE\")\n    op.alter_column(\"persona\", \"llm_relevance_filter\", nullable=False)\n    op.add_column(\n        \"persona\",\n        sa.Column(\"llm_filter_extraction\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\"UPDATE persona SET llm_filter_extraction = TRUE\")\n    op.alter_column(\"persona\", \"llm_filter_extraction\", nullable=False)\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"recency_bias\",\n            recencybiassetting_enum,\n            nullable=True,\n        ),\n    )\n    op.execute(\"UPDATE persona SET recency_bias = 'BASE_DECAY'\")\n    op.alter_column(\"persona\", \"recency_bias\", nullable=False)\n    op.alter_column(\"persona\", \"description\", existing_type=sa.VARCHAR(), nullable=True)\n    op.execute(\"UPDATE persona SET description = ''\")\n    op.alter_column(\"persona\", \"description\", nullable=False)\n    op.create_foreign_key(\"persona__user_fk\", \"persona\", \"user\", [\"user_id\"], [\"id\"])\n    op.drop_column(\"persona\", \"datetime_aware\")\n    op.drop_column(\"persona\", \"tools\")\n    op.drop_column(\"persona\", \"hint_text\")\n    op.drop_column(\"persona\", \"apply_llm_relevance_filter\")\n    op.drop_column(\"persona\", \"retrieval_enabled\")\n    op.drop_column(\"persona\", \"system_text\")\n\n    # Need to create a persona row so fk can work\n    result = bind.execute(sa.text(\"SELECT 1 FROM persona WHERE id = 0\"))\n    exists = result.fetchone()\n    if not exists:\n        op.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO persona (\n                    id, user_id, name, description, search_type, num_chunks,\n                    llm_relevance_filter, llm_filter_extraction, recency_bias,\n                    llm_model_version_override, default_persona, deleted\n                ) VALUES (\n                    0, NULL, '', '', 'HYBRID', NULL,\n                    TRUE, TRUE, 'BASE_DECAY', NULL, TRUE, FALSE\n                )\n                \"\"\"\n            )\n        )\n    delete_statement = sa.text(\n        \"\"\"\n        DELETE FROM persona\n        WHERE name = 'Danswer' AND default_persona = TRUE AND id != 0\n        \"\"\"\n    )\n\n    bind.execute(delete_statement)\n\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\"chat_message_id\", sa.Integer(), nullable=False),\n    )\n    op.drop_constraint(\n        \"chat_feedback_chat_message_chat_session_id_chat_message_me_fkey\",\n        \"chat_feedback\",\n        type_=\"foreignkey\",\n    )\n    op.drop_column(\"chat_feedback\", \"chat_message_edit_number\")\n    op.drop_column(\"chat_feedback\", \"chat_message_chat_session_id\")\n    op.drop_column(\"chat_feedback\", \"chat_message_message_number\")\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"id\",\n            sa.Integer(),\n            primary_key=True,\n            autoincrement=True,\n            nullable=False,\n            unique=True,\n        ),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"parent_message\", sa.Integer(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"latest_child_message\", sa.Integer(), nullable=True),\n    )\n    op.add_column(\n        \"chat_message\", sa.Column(\"rephrased_query\", sa.Text(), nullable=True)\n    )\n    op.add_column(\"chat_message\", sa.Column(\"prompt_id\", sa.Integer(), nullable=True))\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"citations\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n    )\n    op.add_column(\"chat_message\", sa.Column(\"error\", sa.Text(), nullable=True))\n    op.drop_constraint(\"fk_chat_message_persona_id\", \"chat_message\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"chat_message__prompt_fk\", \"chat_message\", \"prompt\", [\"prompt_id\"], [\"id\"]\n    )\n    op.drop_column(\"chat_message\", \"parent_edit_number\")\n    op.drop_column(\"chat_message\", \"persona_id\")\n    op.drop_column(\"chat_message\", \"reference_docs\")\n    op.drop_column(\"chat_message\", \"edit_number\")\n    op.drop_column(\"chat_message\", \"latest\")\n    op.drop_column(\"chat_message\", \"message_number\")\n    op.add_column(\"chat_session\", sa.Column(\"one_shot\", sa.Boolean(), nullable=True))\n    op.execute(\"UPDATE chat_session SET one_shot = TRUE\")\n    op.alter_column(\"chat_session\", \"one_shot\", nullable=False)\n    op.alter_column(\n        \"chat_session\",\n        \"persona_id\",\n        existing_type=sa.INTEGER(),\n        nullable=True,\n    )\n    op.execute(\"UPDATE chat_session SET persona_id = 0\")\n    op.alter_column(\"chat_session\", \"persona_id\", nullable=False)\n    op.add_column(\n        \"document_retrieval_feedback\",\n        sa.Column(\"chat_message_id\", sa.Integer(), nullable=False),\n    )\n    op.drop_constraint(\n        \"document_retrieval_feedback_qa_event_id_fkey\",\n        \"document_retrieval_feedback\",\n        type_=\"foreignkey\",\n    )\n    op.create_foreign_key(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n    op.drop_column(\"document_retrieval_feedback\", \"qa_event_id\")\n\n    # Relation table must be created after the other tables are correct\n    op.create_table(\n        \"chat_message__search_doc\",\n        sa.Column(\"chat_message_id\", sa.Integer(), nullable=False),\n        sa.Column(\"search_doc_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"chat_message_id\"],\n            [\"chat_message.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"search_doc_id\"],\n            [\"search_doc.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"chat_message_id\", \"search_doc_id\"),\n    )\n\n    # Needs to be created after chat_message id field is added\n    op.create_foreign_key(\n        \"chat_feedback__chat_message_fk\",\n        \"chat_feedback\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n\n    op.drop_table(\"query_event\")\n\n\ndef downgrade() -> None:\n    # NOTE: you will lose all chat history. This is to satisfy the non-nullable constraints\n    # below\n    op.execute(\"DELETE FROM chat_feedback\")\n    op.execute(\"DELETE FROM chat_message__search_doc\")\n    op.execute(\"DELETE FROM document_retrieval_feedback\")\n    op.execute(\"DELETE FROM document_retrieval_feedback\")\n    op.execute(\"DELETE FROM chat_message\")\n    op.execute(\"DELETE FROM chat_session\")\n\n    op.drop_constraint(\n        \"chat_feedback__chat_message_fk\", \"chat_feedback\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\n        \"document_retrieval_feedback__chat_message_fk\",\n        \"document_retrieval_feedback\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\"persona__user_fk\", \"persona\", type_=\"foreignkey\")\n    op.drop_constraint(\"chat_message__prompt_fk\", \"chat_message\", type_=\"foreignkey\")\n    op.drop_constraint(\n        \"chat_message__search_doc_chat_message_id_fkey\",\n        \"chat_message__search_doc\",\n        type_=\"foreignkey\",\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\"system_text\", sa.TEXT(), autoincrement=False, nullable=True),\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"retrieval_enabled\",\n            sa.BOOLEAN(),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.execute(\"UPDATE persona SET retrieval_enabled = TRUE\")\n    op.alter_column(\"persona\", \"retrieval_enabled\", nullable=False)\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"apply_llm_relevance_filter\",\n            sa.BOOLEAN(),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\"hint_text\", sa.TEXT(), autoincrement=False, nullable=True),\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\n            \"tools\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"persona\",\n        sa.Column(\"datetime_aware\", sa.BOOLEAN(), autoincrement=False, nullable=True),\n    )\n    op.execute(\"UPDATE persona SET datetime_aware = TRUE\")\n    op.alter_column(\"persona\", \"datetime_aware\", nullable=False)\n    op.alter_column(\"persona\", \"description\", existing_type=sa.VARCHAR(), nullable=True)\n    op.drop_column(\"persona\", \"recency_bias\")\n    op.drop_column(\"persona\", \"llm_filter_extraction\")\n    op.drop_column(\"persona\", \"llm_relevance_filter\")\n    op.drop_column(\"persona\", \"search_type\")\n    op.drop_column(\"persona\", \"user_id\")\n    op.add_column(\n        \"document_retrieval_feedback\",\n        sa.Column(\"qa_event_id\", sa.INTEGER(), autoincrement=False, nullable=False),\n    )\n    op.drop_column(\"document_retrieval_feedback\", \"chat_message_id\")\n    op.alter_column(\n        \"chat_session\", \"persona_id\", existing_type=sa.INTEGER(), nullable=True\n    )\n    op.drop_column(\"chat_session\", \"one_shot\")\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"message_number\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n            primary_key=True,\n        ),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"latest\", sa.BOOLEAN(), autoincrement=False, nullable=False),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"edit_number\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n            primary_key=True,\n        ),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"reference_docs\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"persona_id\", sa.INTEGER(), autoincrement=False, nullable=True),\n    )\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\n            \"parent_edit_number\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n    op.create_foreign_key(\n        \"fk_chat_message_persona_id\",\n        \"chat_message\",\n        \"persona\",\n        [\"persona_id\"],\n        [\"id\"],\n    )\n    op.drop_column(\"chat_message\", \"error\")\n    op.drop_column(\"chat_message\", \"citations\")\n    op.drop_column(\"chat_message\", \"prompt_id\")\n    op.drop_column(\"chat_message\", \"rephrased_query\")\n    op.drop_column(\"chat_message\", \"latest_child_message\")\n    op.drop_column(\"chat_message\", \"parent_message\")\n    op.drop_column(\"chat_message\", \"id\")\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\n            \"chat_message_message_number\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n        ),\n    )\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\n            \"chat_message_chat_session_id\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n            primary_key=True,\n        ),\n    )\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\n            \"chat_message_edit_number\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n        ),\n    )\n    op.drop_column(\"chat_feedback\", \"chat_message_id\")\n    op.create_table(\n        \"query_event\",\n        sa.Column(\"id\", sa.INTEGER(), autoincrement=True, nullable=False),\n        sa.Column(\"query\", sa.VARCHAR(), autoincrement=False, nullable=False),\n        sa.Column(\n            \"selected_search_flow\",\n            sa.VARCHAR(),\n            autoincrement=False,\n            nullable=True,\n        ),\n        sa.Column(\"llm_answer\", sa.VARCHAR(), autoincrement=False, nullable=True),\n        sa.Column(\"feedback\", sa.VARCHAR(), autoincrement=False, nullable=True),\n        sa.Column(\"user_id\", sa.UUID(), autoincrement=False, nullable=True),\n        sa.Column(\n            \"time_created\",\n            postgresql.TIMESTAMP(timezone=True),\n            server_default=sa.text(\"now()\"),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.Column(\n            \"retrieved_document_ids\",\n            postgresql.ARRAY(sa.VARCHAR()),\n            autoincrement=False,\n            nullable=True,\n        ),\n        sa.Column(\"chat_session_id\", sa.INTEGER(), autoincrement=False, nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"chat_session_id\"],\n            [\"chat_session.id\"],\n            name=\"fk_query_event_chat_session_id\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"], [\"user.id\"], name=\"query_event_user_id_fkey\"\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=\"query_event_pkey\"),\n    )\n    op.drop_table(\"chat_message__search_doc\")\n    op.drop_table(\"persona__prompt\")\n    op.drop_table(\"prompt\")\n    op.drop_table(\"search_doc\")\n    op.create_unique_constraint(\n        \"uq_chat_message_combination\",\n        \"chat_message\",\n        [\"chat_session_id\", \"message_number\", \"edit_number\"],\n    )\n    op.create_foreign_key(\n        \"chat_feedback_chat_message_chat_session_id_chat_message_me_fkey\",\n        \"chat_feedback\",\n        \"chat_message\",\n        [\n            \"chat_message_chat_session_id\",\n            \"chat_message_message_number\",\n            \"chat_message_edit_number\",\n        ],\n        [\"chat_session_id\", \"message_number\", \"edit_number\"],\n    )\n    op.create_foreign_key(\n        \"document_retrieval_feedback_qa_event_id_fkey\",\n        \"document_retrieval_feedback\",\n        \"query_event\",\n        [\"qa_event_id\"],\n        [\"id\"],\n    )\n\n    op.execute(\"DROP TYPE IF EXISTS searchtype\")\n    op.execute(\"DROP TYPE IF EXISTS recencybiassetting\")\n    op.execute(\"DROP TYPE IF EXISTS documentsource\")\n"
  },
  {
    "path": "backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py",
    "content": "\"\"\"add_mcp_auth_performer\n\nRevision ID: b30353be4eec\nRevises: 2b75d0a8ffcb\nCreate Date: 2025-09-13 14:58:08.413534\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom onyx.db.enums import MCPAuthenticationPerformer, MCPTransport\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b30353be4eec\"\ndown_revision = \"2b75d0a8ffcb\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"moving to a better way of handling auth performer and transport\"\"\"\n    # Add nullable column first for backward compatibility\n    op.add_column(\n        \"mcp_server\",\n        sa.Column(\n            \"auth_performer\",\n            sa.Enum(MCPAuthenticationPerformer, native_enum=False),\n            nullable=True,\n        ),\n    )\n\n    op.add_column(\n        \"mcp_server\",\n        sa.Column(\n            \"transport\",\n            sa.Enum(MCPTransport, native_enum=False),\n            nullable=True,\n        ),\n    )\n\n    # # Backfill values using existing data and inference rules\n    bind = op.get_bind()\n\n    # 1) OAUTH servers are always PER_USER\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server\n        SET auth_performer = 'PER_USER'\n        WHERE auth_type = 'OAUTH'\n        \"\"\"\n        )\n    )\n\n    # 2) If there is no admin connection config, mark as ADMIN (and not set yet)\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server\n        SET auth_performer = 'ADMIN'\n        WHERE admin_connection_config_id IS NULL\n          AND auth_performer IS NULL\n        \"\"\"\n        )\n    )\n\n    # 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server AS ms\n        SET auth_performer = 'PER_USER'\n        FROM mcp_connection_config AS mcc\n        WHERE mcc.mcp_server_id = ms.id\n          AND COALESCE(mcc.user_email, '') <> ''\n          AND ms.auth_performer IS NULL\n        \"\"\"\n        )\n    )\n\n    # 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server\n        SET auth_performer = 'ADMIN'\n        WHERE auth_performer IS NULL\n        \"\"\"\n        )\n    )\n\n    # Finally, make the column non-nullable\n    op.alter_column(\n        \"mcp_server\",\n        \"auth_performer\",\n        existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),\n        nullable=False,\n    )\n\n    # Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server\n        SET transport = 'STREAMABLE_HTTP'\n        WHERE transport IS NULL\n        \"\"\"\n        )\n    )\n\n    op.alter_column(\n        \"mcp_server\",\n        \"transport\",\n        existing_type=sa.Enum(MCPTransport, native_enum=False),\n        nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    \"\"\"remove cols\"\"\"\n    op.drop_column(\"mcp_server\", \"transport\")\n    op.drop_column(\"mcp_server\", \"auth_performer\")\n"
  },
  {
    "path": "backend/alembic/versions/b329d00a9ea6_adding_assistant_specific_user_.py",
    "content": "\"\"\"Adding assistant-specific user preferences\n\nRevision ID: b329d00a9ea6\nRevises: f9b8c7d6e5a4\nCreate Date: 2025-08-26 23:14:44.592985\n\n\"\"\"\n\nfrom alembic import op\nimport fastapi_users_db_sqlalchemy\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"b329d00a9ea6\"\ndown_revision = \"f9b8c7d6e5a4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"assistant__user_specific_config\",\n        sa.Column(\"assistant_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.Column(\"disabled_tool_ids\", postgresql.ARRAY(sa.Integer()), nullable=False),\n        sa.ForeignKeyConstraint([\"assistant_id\"], [\"persona.id\"], ondelete=\"CASCADE\"),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"assistant_id\", \"user_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"assistant__user_specific_config\")\n"
  },
  {
    "path": "backend/alembic/versions/b388730a2899_nullable_preferences.py",
    "content": "\"\"\"nullable preferences\n\nRevision ID: b388730a2899\nRevises: 1a03d2c2856b\nCreate Date: 2025-02-17 18:49:22.643902\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b388730a2899\"\ndown_revision = \"1a03d2c2856b\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"user\", \"temperature_override_enabled\", nullable=True)\n    op.alter_column(\"user\", \"auto_scroll\", nullable=True)\n\n\ndef downgrade() -> None:\n    # Ensure no null values before making columns non-nullable\n    op.execute(\n        'UPDATE \"user\" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'\n    )\n    op.execute('UPDATE \"user\" SET auto_scroll = false WHERE auto_scroll IS NULL')\n\n    op.alter_column(\"user\", \"temperature_override_enabled\", nullable=False)\n    op.alter_column(\"user\", \"auto_scroll\", nullable=False)\n"
  },
  {
    "path": "backend/alembic/versions/b4b7e1028dfd_grant_basic_to_existing_groups.py",
    "content": "\"\"\"grant_basic_to_existing_groups\n\nGrants the \"basic\" permission to all existing groups that don't already\nhave it. Every group should have at least \"basic\" so that its members\nget basic access when effective_permissions is backfilled.\n\nRevision ID: b4b7e1028dfd\nRevises: b7bcc991d722\nCreate Date: 2026-03-30 16:15:17.093498\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b4b7e1028dfd\"\ndown_revision = \"b7bcc991d722\"\nbranch_labels: str | None = None\ndepends_on: str | Sequence[str] | None = None\n\nuser_group = sa.table(\n    \"user_group\",\n    sa.column(\"id\", sa.Integer),\n    sa.column(\"is_default\", sa.Boolean),\n)\n\npermission_grant = sa.table(\n    \"permission_grant\",\n    sa.column(\"group_id\", sa.Integer),\n    sa.column(\"permission\", sa.String),\n    sa.column(\"grant_source\", sa.String),\n    sa.column(\"is_deleted\", sa.Boolean),\n)\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    already_has_basic = (\n        sa.select(sa.literal(1))\n        .select_from(permission_grant)\n        .where(\n            permission_grant.c.group_id == user_group.c.id,\n            permission_grant.c.permission == \"basic\",\n        )\n        .exists()\n    )\n\n    groups_needing_basic = sa.select(\n        user_group.c.id,\n        sa.literal(\"basic\").label(\"permission\"),\n        sa.literal(\"SYSTEM\").label(\"grant_source\"),\n        sa.literal(False).label(\"is_deleted\"),\n    ).where(\n        user_group.c.is_default == sa.false(),\n        ~already_has_basic,\n    )\n\n    conn.execute(\n        permission_grant.insert().from_select(\n            [\"group_id\", \"permission\", \"grant_source\", \"is_deleted\"],\n            groups_needing_basic,\n        )\n    )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n\n    non_default_group_ids = sa.select(user_group.c.id).where(\n        user_group.c.is_default == sa.false()\n    )\n\n    conn.execute(\n        permission_grant.delete().where(\n            permission_grant.c.permission == \"basic\",\n            permission_grant.c.grant_source == \"SYSTEM\",\n            permission_grant.c.group_id.in_(non_default_group_ids),\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py",
    "content": "\"\"\"add_user_oauth_token_to_slack_bot\n\nRevision ID: b4ef3ae0bf6e\nRevises: 505c488f6662\nCreate Date: 2025-08-26 17:47:41.788462\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b4ef3ae0bf6e\"\ndown_revision = \"505c488f6662\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add user_token column to slack_bot table\n    op.add_column(\"slack_bot\", sa.Column(\"user_token\", sa.LargeBinary(), nullable=True))\n\n\ndef downgrade() -> None:\n    # Remove user_token column from slack_bot table\n    op.drop_column(\"slack_bot\", \"user_token\")\n"
  },
  {
    "path": "backend/alembic/versions/b51c6844d1df_seed_memory_tool.py",
    "content": "\"\"\"seed_memory_tool and add enable_memory_tool to user\n\nRevision ID: b51c6844d1df\nRevises: 93c15d6a6fbb\nCreate Date: 2026-02-11 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b51c6844d1df\"\ndown_revision = \"93c15d6a6fbb\"\nbranch_labels = None\ndepends_on = None\n\n\nMEMORY_TOOL = {\n    \"name\": \"MemoryTool\",\n    \"display_name\": \"Add Memory\",\n    \"description\": \"Save memories about the user for future conversations.\",\n    \"in_code_tool_id\": \"MemoryTool\",\n    \"enabled\": True,\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    existing = conn.execute(\n        sa.text(\n            \"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id = :in_code_tool_id\"\n        ),\n        {\"in_code_tool_id\": MEMORY_TOOL[\"in_code_tool_id\"]},\n    ).fetchone()\n\n    if existing:\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE tool\n                SET name = :name,\n                    display_name = :display_name,\n                    description = :description\n                WHERE in_code_tool_id = :in_code_tool_id\n                \"\"\"\n            ),\n            MEMORY_TOOL,\n        )\n    else:\n        conn.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)\n                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)\n                \"\"\"\n            ),\n            MEMORY_TOOL,\n        )\n\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"enable_memory_tool\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.true(),\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"enable_memory_tool\")\n\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\"DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id\"),\n        {\"in_code_tool_id\": MEMORY_TOOL[\"in_code_tool_id\"]},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/b558f51620b4_pause_finished_user_file_connectors.py",
    "content": "\"\"\"Pause finished user file connectors\n\nRevision ID: b558f51620b4\nRevises: 90e3b9af7da4\nCreate Date: 2025-08-15 17:17:02.456704\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b558f51620b4\"\ndown_revision = \"90e3b9af7da4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Set all user file connector credential pairs with ACTIVE status to PAUSED\n    # This ensures user files don't continue to run indexing tasks after processing\n    op.execute(\n        \"\"\"\n        UPDATE connector_credential_pair\n        SET status = 'PAUSED'\n        WHERE is_user_file = true\n        AND status = 'ACTIVE'\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/b5c4d7e8f9a1_add_hierarchy_node_cc_pair_table.py",
    "content": "\"\"\"add hierarchy_node_by_connector_credential_pair table\n\nRevision ID: b5c4d7e8f9a1\nRevises: a3b8d9e2f1c4\nCreate Date: 2026-03-04\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\n\nrevision = \"b5c4d7e8f9a1\"\ndown_revision = \"a3b8d9e2f1c4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"hierarchy_node_by_connector_credential_pair\",\n        sa.Column(\"hierarchy_node_id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"credential_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"hierarchy_node_id\"],\n            [\"hierarchy_node.id\"],\n            ondelete=\"CASCADE\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"connector_id\", \"credential_id\"],\n            [\n                \"connector_credential_pair.connector_id\",\n                \"connector_credential_pair.credential_id\",\n            ],\n            ondelete=\"CASCADE\",\n        ),\n        sa.PrimaryKeyConstraint(\"hierarchy_node_id\", \"connector_id\", \"credential_id\"),\n    )\n    op.create_index(\n        \"ix_hierarchy_node_cc_pair_connector_credential\",\n        \"hierarchy_node_by_connector_credential_pair\",\n        [\"connector_id\", \"credential_id\"],\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"ix_hierarchy_node_cc_pair_connector_credential\",\n        table_name=\"hierarchy_node_by_connector_credential_pair\",\n    )\n    op.drop_table(\"hierarchy_node_by_connector_credential_pair\")\n"
  },
  {
    "path": "backend/alembic/versions/b728689f45b1_rename_persona_is_visible_to_is_listed_.py",
    "content": "\"\"\"rename persona is_visible to is_listed and featured to is_featured\n\nRevision ID: b728689f45b1\nRevises: 689433b0d8de\nCreate Date: 2026-03-23 12:36:26.607305\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b728689f45b1\"\ndown_revision = \"689433b0d8de\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"persona\", \"is_visible\", new_column_name=\"is_listed\")\n    op.alter_column(\"persona\", \"featured\", new_column_name=\"is_featured\")\n\n\ndef downgrade() -> None:\n    op.alter_column(\"persona\", \"is_listed\", new_column_name=\"is_visible\")\n    op.alter_column(\"persona\", \"is_featured\", new_column_name=\"featured\")\n"
  },
  {
    "path": "backend/alembic/versions/b72ed7a5db0e_remove_description_from_starter_messages.py",
    "content": "\"\"\"remove description from starter messages\n\nRevision ID: b72ed7a5db0e\nRevises: 33cb72ea4d80\nCreate Date: 2024-11-03 15:55:28.944408\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b72ed7a5db0e\"\ndown_revision = \"33cb72ea4d80\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET starter_messages = (\n                SELECT jsonb_agg(elem - 'description')\n                FROM jsonb_array_elements(starter_messages) elem\n            )\n            WHERE starter_messages IS NOT NULL\n              AND jsonb_typeof(starter_messages) = 'array'\n            \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        sa.text(\n            \"\"\"\n            UPDATE persona\n            SET starter_messages = (\n                SELECT jsonb_agg(elem || '{\"description\": \"\"}')\n                FROM jsonb_array_elements(starter_messages) elem\n            )\n            WHERE starter_messages IS NOT NULL\n              AND jsonb_typeof(starter_messages) = 'array'\n            \"\"\"\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py",
    "content": "\"\"\"Add checkpointing/failure handling\n\nRevision ID: b7a7eee5aa15\nRevises: f39c5794c10a\nCreate Date: 2025-01-24 15:17:36.763172\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"b7a7eee5aa15\"\ndown_revision = \"f39c5794c10a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"checkpoint_pointer\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"poll_range_start\", sa.DateTime(timezone=True), nullable=True),\n    )\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"poll_range_end\", sa.DateTime(timezone=True), nullable=True),\n    )\n\n    op.create_index(\n        \"ix_index_attempt_cc_pair_settings_poll\",\n        \"index_attempt\",\n        [\n            \"connector_credential_pair_id\",\n            \"search_settings_id\",\n            \"status\",\n            sa.text(\"time_updated DESC\"),\n        ],\n    )\n\n    # Drop the old IndexAttemptError table\n    op.drop_index(\"index_attempt_id\", table_name=\"index_attempt_errors\")\n    op.drop_table(\"index_attempt_errors\")\n\n    # Create the new version of the table\n    op.create_table(\n        \"index_attempt_errors\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"index_attempt_id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_credential_pair_id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_id\", sa.String(), nullable=True),\n        sa.Column(\"document_link\", sa.String(), nullable=True),\n        sa.Column(\"entity_id\", sa.String(), nullable=True),\n        sa.Column(\"failed_time_range_start\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"failed_time_range_end\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"failure_message\", sa.Text(), nullable=False),\n        sa.Column(\"is_resolved\", sa.Boolean(), nullable=False, default=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"index_attempt_id\"],\n            [\"index_attempt.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"connector_credential_pair_id\"],\n            [\"connector_credential_pair.id\"],\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\"SET lock_timeout = '5s'\")\n\n    # try a few times to drop the table, this has been observed to fail due to other locks\n    # blocking the drop\n    NUM_TRIES = 10\n    for i in range(NUM_TRIES):\n        try:\n            op.drop_table(\"index_attempt_errors\")\n            break\n        except Exception as e:\n            if i == NUM_TRIES - 1:\n                raise e\n            print(f\"Error dropping table: {e}. Retrying...\")\n\n    op.execute(\"SET lock_timeout = DEFAULT\")\n\n    # Recreate the old IndexAttemptError table\n    op.create_table(\n        \"index_attempt_errors\",\n        sa.Column(\"id\", sa.Integer(), primary_key=True),\n        sa.Column(\"index_attempt_id\", sa.Integer(), nullable=True),\n        sa.Column(\"batch\", sa.Integer(), nullable=True),\n        sa.Column(\"doc_summaries\", postgresql.JSONB(), nullable=False),\n        sa.Column(\"error_msg\", sa.Text(), nullable=True),\n        sa.Column(\"traceback\", sa.Text(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"index_attempt_id\"],\n            [\"index_attempt.id\"],\n        ),\n    )\n\n    op.create_index(\n        \"index_attempt_id\",\n        \"index_attempt_errors\",\n        [\"time_created\"],\n    )\n\n    op.drop_index(\"ix_index_attempt_cc_pair_settings_poll\")\n    op.drop_column(\"index_attempt\", \"checkpoint_pointer\")\n    op.drop_column(\"index_attempt\", \"poll_range_start\")\n    op.drop_column(\"index_attempt\", \"poll_range_end\")\n"
  },
  {
    "path": "backend/alembic/versions/b7bcc991d722_assign_users_to_default_groups.py",
    "content": "\"\"\"assign_users_to_default_groups\n\nRevision ID: b7bcc991d722\nRevises: 03d085c5c38d\nCreate Date: 2026-03-25 16:30:39.529301\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b7bcc991d722\"\ndown_revision = \"03d085c5c38d\"\nbranch_labels = None\ndepends_on = None\n\n# The no-auth placeholder user must NOT be assigned to default groups.\n# A database trigger (migrate_no_auth_data_to_user) will try to DELETE this\n# user when the first real user registers; group membership rows would cause\n# an FK violation on that DELETE.\nNO_AUTH_PLACEHOLDER_USER_UUID = \"00000000-0000-0000-0000-000000000001\"\n\n# Reflect table structures for use in DML\nuser_group_table = sa.table(\n    \"user_group\",\n    sa.column(\"id\", sa.Integer),\n    sa.column(\"name\", sa.String),\n    sa.column(\"is_default\", sa.Boolean),\n)\n\nuser_table = sa.table(\n    \"user\",\n    sa.column(\"id\", sa.Uuid),\n    sa.column(\"role\", sa.String),\n    sa.column(\"account_type\", sa.String),\n    sa.column(\"is_active\", sa.Boolean),\n)\n\nuser__user_group_table = sa.table(\n    \"user__user_group\",\n    sa.column(\"user_group_id\", sa.Integer),\n    sa.column(\"user_id\", sa.Uuid),\n)\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Look up default group IDs\n    admin_row = conn.execute(\n        sa.select(user_group_table.c.id).where(\n            user_group_table.c.name == \"Admin\",\n            user_group_table.c.is_default == True,  # noqa: E712\n        )\n    ).fetchone()\n\n    basic_row = conn.execute(\n        sa.select(user_group_table.c.id).where(\n            user_group_table.c.name == \"Basic\",\n            user_group_table.c.is_default == True,  # noqa: E712\n        )\n    ).fetchone()\n\n    if admin_row is None:\n        raise RuntimeError(\n            \"Default 'Admin' group not found. \"\n            \"Ensure migration 977e834c1427 (seed_default_groups) ran successfully.\"\n        )\n\n    if basic_row is None:\n        raise RuntimeError(\n            \"Default 'Basic' group not found. \"\n            \"Ensure migration 977e834c1427 (seed_default_groups) ran successfully.\"\n        )\n\n    # Users with role=admin → Admin group\n    # Include inactive users so reactivation doesn't require reconciliation.\n    # Exclude non-human account types (mirrors assign_user_to_default_groups logic).\n    admin_users = sa.select(\n        sa.literal(admin_row[0]).label(\"user_group_id\"),\n        user_table.c.id.label(\"user_id\"),\n    ).where(\n        user_table.c.role == \"ADMIN\",\n        user_table.c.account_type.notin_([\"BOT\", \"EXT_PERM_USER\", \"ANONYMOUS\"]),\n        user_table.c.id != NO_AUTH_PLACEHOLDER_USER_UUID,\n    )\n    op.execute(\n        pg_insert(user__user_group_table)\n        .from_select([\"user_group_id\", \"user_id\"], admin_users)\n        .on_conflict_do_nothing(index_elements=[\"user_group_id\", \"user_id\"])\n    )\n\n    # STANDARD users (non-admin) and SERVICE_ACCOUNT users (role=basic) → Basic group\n    # Include inactive users so reactivation doesn't require reconciliation.\n    basic_users = sa.select(\n        sa.literal(basic_row[0]).label(\"user_group_id\"),\n        user_table.c.id.label(\"user_id\"),\n    ).where(\n        user_table.c.account_type.notin_([\"BOT\", \"EXT_PERM_USER\", \"ANONYMOUS\"]),\n        user_table.c.id != NO_AUTH_PLACEHOLDER_USER_UUID,\n        sa.or_(\n            sa.and_(\n                user_table.c.account_type == \"STANDARD\",\n                user_table.c.role != \"ADMIN\",\n            ),\n            sa.and_(\n                user_table.c.account_type == \"SERVICE_ACCOUNT\",\n                user_table.c.role == \"BASIC\",\n            ),\n        ),\n    )\n    op.execute(\n        pg_insert(user__user_group_table)\n        .from_select([\"user_group_id\", \"user_id\"], basic_users)\n        .on_conflict_do_nothing(index_elements=[\"user_group_id\", \"user_id\"])\n    )\n\n\ndef downgrade() -> None:\n    # Group memberships are left in place — removing them risks\n    # deleting memberships that existed before this migration.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/b7c2b63c4a03_add_background_reindex_enabled_field.py",
    "content": "\"\"\"add background_reindex_enabled field\n\nRevision ID: b7c2b63c4a03\nRevises: f11b408e39d3\nCreate Date: 2024-03-26 12:34:56.789012\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.db.enums import EmbeddingPrecision\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b7c2b63c4a03\"\ndown_revision = \"f11b408e39d3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add background_reindex_enabled column with default value of True\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"background_reindex_enabled\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"true\",\n        ),\n    )\n\n    # Add embedding_precision column with default value of FLOAT\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\n            \"embedding_precision\",\n            sa.Enum(EmbeddingPrecision, native_enum=False),\n            nullable=False,\n            server_default=EmbeddingPrecision.FLOAT.name,\n        ),\n    )\n\n    # Add reduced_dimension column with default value of None\n    op.add_column(\n        \"search_settings\",\n        sa.Column(\"reduced_dimension\", sa.Integer(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    # Remove the background_reindex_enabled column\n    op.drop_column(\"search_settings\", \"background_reindex_enabled\")\n    op.drop_column(\"search_settings\", \"embedding_precision\")\n    op.drop_column(\"search_settings\", \"reduced_dimension\")\n"
  },
  {
    "path": "backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py",
    "content": "\"\"\"adjust prompt length\n\nRevision ID: b7ec9b5b505f\nRevises: abbfec3a5ac5\nCreate Date: 2025-09-10 18:51:15.629197\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b7ec9b5b505f\"\ndown_revision = \"abbfec3a5ac5\"\nbranch_labels = None\ndepends_on = None\n\n\nMAX_PROMPT_LENGTH = 5_000_000\n\n\ndef upgrade() -> None:\n    # NOTE: need to run this since the previous migration PREVIOUSLY set the length to 8000\n    op.alter_column(\n        \"persona\",\n        \"system_prompt\",\n        existing_type=sa.String(length=8000),\n        type_=sa.String(length=MAX_PROMPT_LENGTH),\n        existing_nullable=False,\n    )\n    op.alter_column(\n        \"persona\",\n        \"task_prompt\",\n        existing_type=sa.String(length=8000),\n        type_=sa.String(length=MAX_PROMPT_LENGTH),\n        existing_nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    # Downgrade not necessary\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/b85f02ec1308_fix_file_type_migration.py",
    "content": "\"\"\"fix-file-type-migration\n\nRevision ID: b85f02ec1308\nRevises: a3bfd0d64902\nCreate Date: 2024-05-31 18:09:26.658164\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"b85f02ec1308\"\ndown_revision = \"a3bfd0d64902\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE file_store\n        SET file_origin = UPPER(file_origin)\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # Let's not break anything on purpose :)\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/b896bbd0d5a7_backfill_is_internet_data_to_false.py",
    "content": "\"\"\"backfill is_internet data to False\n\nRevision ID: b896bbd0d5a7\nRevises: 44f856ae2a4a\nCreate Date: 2024-07-16 15:21:05.718571\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"b896bbd0d5a7\"\ndown_revision = \"44f856ae2a4a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\"UPDATE search_doc SET is_internet = FALSE WHERE is_internet IS NULL\")\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py",
    "content": "\"\"\"Drop milestone table\n\nRevision ID: b8c9d0e1f2a3\nRevises: a2b3c4d5e6f7\nCreate Date: 2025-12-18\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"b8c9d0e1f2a3\"\ndown_revision = \"a2b3c4d5e6f7\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_table(\"milestone\")\n\n\ndef downgrade() -> None:\n    op.create_table(\n        \"milestone\",\n        sa.Column(\"id\", sa.UUID(), nullable=False),\n        sa.Column(\"tenant_id\", sa.String(), nullable=True),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\"event_type\", sa.String(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"event_tracker\", postgresql.JSONB(), nullable=True),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"event_type\", name=\"uq_milestone_event_type\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/ba98eba0f66a_add_support_for_litellm_proxy_in_.py",
    "content": "\"\"\"add support for litellm proxy in reranking\n\nRevision ID: ba98eba0f66a\nRevises: bceb1e139447\nCreate Date: 2024-09-06 10:36:04.507332\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"ba98eba0f66a\"\ndown_revision = \"bceb1e139447\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"search_settings\", sa.Column(\"rerank_api_url\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"search_settings\", \"rerank_api_url\")\n"
  },
  {
    "path": "backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py",
    "content": "\"\"\"Add llm_model_version_override to Persona\n\nRevision ID: baf71f781b9e\nRevises: 50b683a8295c\nCreate Date: 2023-12-06 21:56:50.286158\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"baf71f781b9e\"\ndown_revision = \"50b683a8295c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"llm_model_version_override\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"llm_model_version_override\")\n"
  },
  {
    "path": "backend/alembic/versions/bc9771dccadf_create_usage_reports_table.py",
    "content": "\"\"\"create usage reports table\n\nRevision ID: bc9771dccadf\nRevises: 0568ccf46a6b\nCreate Date: 2024-06-18 10:04:26.800282\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\n\n# revision identifiers, used by Alembic.\nrevision = \"bc9771dccadf\"\ndown_revision = \"0568ccf46a6b\"\n\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"usage_reports\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"report_name\", sa.String(), nullable=False),\n        sa.Column(\n            \"requestor_user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"period_from\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\"period_to\", sa.DateTime(timezone=True), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\"report_name\"],\n            [\"file_store.file_name\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"requestor_user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"usage_reports\")\n"
  },
  {
    "path": "backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py",
    "content": "\"\"\"Add base_url to CloudEmbeddingProvider\n\nRevision ID: bceb1e139447\nRevises: a3795dce87be\nCreate Date: 2024-08-28 17:00:52.554580\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"bceb1e139447\"\ndown_revision = \"a3795dce87be\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"embedding_provider\", sa.Column(\"api_url\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"embedding_provider\", \"api_url\")\n"
  },
  {
    "path": "backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py",
    "content": "\"\"\"non nullable default persona\n\nRevision ID: bd2921608c3a\nRevises: 797089dfb4d2\nCreate Date: 2024-09-20 10:28:37.992042\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"bd2921608c3a\"\ndown_revision = \"797089dfb4d2\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Set existing NULL values to False\n    op.execute(\n        \"UPDATE persona SET is_default_persona = FALSE WHERE is_default_persona IS NULL\"\n    )\n\n    # Alter the column to be not nullable with a default value of False\n    op.alter_column(\n        \"persona\",\n        \"is_default_persona\",\n        existing_type=sa.Boolean(),\n        nullable=False,\n        server_default=sa.text(\"false\"),\n    )\n\n\ndef downgrade() -> None:\n    # Revert the changes\n    op.alter_column(\n        \"persona\",\n        \"is_default_persona\",\n        existing_type=sa.Boolean(),\n        nullable=True,\n        server_default=None,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/bd7c3bf8beba_migrate_agent_responses_to_research_.py",
    "content": "\"\"\"migrate_agent_sub_questions_to_research_iterations\n\nRevision ID: bd7c3bf8beba\nRevises: f8a9b2c3d4e5\nCreate Date: 2025-08-18 11:33:27.098287\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"bd7c3bf8beba\"\ndown_revision = \"f8a9b2c3d4e5\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Get connection to execute raw SQL\n    connection = op.get_bind()\n\n    # First, insert data into research_agent_iteration table\n    # This creates one iteration record per primary_question_id using the earliest time_created\n    connection.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO research_agent_iteration (primary_question_id, created_at, iteration_nr, purpose, reasoning)\n            SELECT\n                primary_question_id,\n                MIN(time_created) as created_at,\n                1 as iteration_nr,\n                'Generating and researching subquestions' as purpose,\n                '(No previous reasoning)' as reasoning\n            FROM agent__sub_question\n            JOIN chat_message on agent__sub_question.primary_question_id = chat_message.id\n            WHERE primary_question_id IS NOT NULL\n                AND chat_message.is_agentic = true\n            GROUP BY primary_question_id\n            ON CONFLICT DO NOTHING;\n        \"\"\"\n        )\n    )\n\n    # Then, insert data into research_agent_iteration_sub_step table\n    # This migrates each sub-question as a sub-step\n    connection.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO research_agent_iteration_sub_step (\n                primary_question_id,\n                iteration_nr,\n                iteration_sub_step_nr,\n                created_at,\n                sub_step_instructions,\n                sub_step_tool_id,\n                sub_answer,\n                cited_doc_results\n            )\n            SELECT\n                primary_question_id,\n                1 as iteration_nr,\n                level_question_num as iteration_sub_step_nr,\n                time_created as created_at,\n                sub_question as sub_step_instructions,\n                1 as sub_step_tool_id,\n                sub_answer,\n                sub_question_doc_results as cited_doc_results\n            FROM agent__sub_question\n            JOIN chat_message on agent__sub_question.primary_question_id = chat_message.id\n            WHERE chat_message.is_agentic = true\n            AND primary_question_id IS NOT NULL\n            ON CONFLICT DO NOTHING;\n        \"\"\"\n        )\n    )\n\n    # Update chat_message records: set legacy agentic type and answer purpose for existing agentic messages\n    connection.execute(\n        sa.text(\n            \"\"\"\n            UPDATE chat_message\n            SET research_answer_purpose = 'ANSWER'\n            WHERE is_agentic = true\n            AND research_type IS NULL and\n                message_type = 'ASSISTANT';\n        \"\"\"\n        )\n    )\n    connection.execute(\n        sa.text(\n            \"\"\"\n            UPDATE chat_message\n            SET research_type = 'LEGACY_AGENTIC'\n            WHERE is_agentic = true\n            AND research_type IS NULL;\n        \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    # Get connection to execute raw SQL\n    connection = op.get_bind()\n\n    # Note: This downgrade removes all research agent iteration data\n    # There's no way to perfectly restore the original agent__sub_question data\n    # if it was deleted after this migration\n\n    # Delete all research_agent_iteration_sub_step records that were migrated\n    connection.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM research_agent_iteration_sub_step\n            USING chat_message\n            WHERE research_agent_iteration_sub_step.primary_question_id = chat_message.id\n            AND chat_message.research_type = 'LEGACY_AGENTIC';\n        \"\"\"\n        )\n    )\n\n    # Delete all research_agent_iteration records that were migrated\n    connection.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM research_agent_iteration\n            USING chat_message\n            WHERE research_agent_iteration.primary_question_id = chat_message.id\n            AND chat_message.research_type = 'LEGACY_AGENTIC';\n        \"\"\"\n        )\n    )\n\n    # Revert chat_message updates: clear research fields for legacy agentic messages\n    connection.execute(\n        sa.text(\n            \"\"\"\n            UPDATE chat_message\n            SET research_type = NULL,\n                research_answer_purpose = NULL\n            WHERE is_agentic = true\n            AND research_type = 'LEGACY_AGENTIC'\n            AND message_type = 'ASSISTANT';\n        \"\"\"\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/be2ab2aa50ee_fix_capitalization.py",
    "content": "\"\"\"fix_capitalization\n\nRevision ID: be2ab2aa50ee\nRevises: 369644546676\nCreate Date: 2025-01-10 13:13:26.228960\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"be2ab2aa50ee\"\ndown_revision = \"369644546676\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE document\n        SET\n            external_user_group_ids = ARRAY(\n                SELECT LOWER(unnest(external_user_group_ids))\n            ),\n            last_modified = NOW()\n        WHERE\n            external_user_group_ids IS NOT NULL\n            AND external_user_group_ids::text[] <> ARRAY(\n                SELECT LOWER(unnest(external_user_group_ids))\n            )::text[]\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # No way to cleanly persist the bad state through an upgrade/downgrade\n    # cycle, so we just pass\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/be87a654d5af_persona_new_default_model_configuration_.py",
    "content": "\"\"\"Persona new default model configuration id column\n\nRevision ID: be87a654d5af\nRevises: e7f8a9b0c1d2\nCreate Date: 2026-01-30 11:14:17.306275\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"be87a654d5af\"\ndown_revision = \"e7f8a9b0c1d2\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"default_model_configuration_id\", sa.Integer(), nullable=True),\n    )\n    op.create_foreign_key(\n        \"fk_persona_default_model_configuration_id\",\n        \"persona\",\n        \"model_configuration\",\n        [\"default_model_configuration_id\"],\n        [\"id\"],\n        ondelete=\"SET NULL\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\n        \"fk_persona_default_model_configuration_id\", \"persona\", type_=\"foreignkey\"\n    )\n\n    op.drop_column(\"persona\", \"default_model_configuration_id\")\n"
  },
  {
    "path": "backend/alembic/versions/bf7a81109301_delete_input_prompts.py",
    "content": "\"\"\"delete_input_prompts\n\nRevision ID: bf7a81109301\nRevises: f7a894b06d02\nCreate Date: 2024-12-09 12:00:49.884228\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport fastapi_users_db_sqlalchemy\n\n\n# revision identifiers, used by Alembic.\nrevision = \"bf7a81109301\"\ndown_revision = \"f7a894b06d02\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_table(\"inputprompt__user\")\n    op.drop_table(\"inputprompt\")\n\n\ndef downgrade() -> None:\n    op.create_table(\n        \"inputprompt\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"prompt\", sa.String(), nullable=False),\n        sa.Column(\"content\", sa.String(), nullable=False),\n        sa.Column(\"active\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_public\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"inputprompt__user\",\n        sa.Column(\"input_prompt_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"input_prompt_id\"],\n            [\"inputprompt.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"inputprompt.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"input_prompt_id\", \"user_id\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/c0aab6edb6dd_delete_workspace.py",
    "content": "\"\"\"delete workspace\n\nRevision ID: c0aab6edb6dd\nRevises: 35e518e0ddf4\nCreate Date: 2024-12-17 14:37:07.660631\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"c0aab6edb6dd\"\ndown_revision = \"35e518e0ddf4\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n    UPDATE connector\n    SET connector_specific_config = connector_specific_config - 'workspace'\n    WHERE source = 'SLACK'\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    import json\n    from sqlalchemy import text\n    from slack_sdk import WebClient\n\n    conn = op.get_bind()\n\n    # Fetch all Slack credentials\n    creds_result = conn.execute(\n        text(\"SELECT id, credential_json FROM credential WHERE source = 'SLACK'\")\n    )\n    all_slack_creds = creds_result.fetchall()\n    if not all_slack_creds:\n        return\n\n    for cred_row in all_slack_creds:\n        credential_id, credential_json = cred_row\n\n        credential_json = (\n            credential_json.tobytes().decode(\"utf-8\")\n            if isinstance(credential_json, memoryview)\n            else credential_json.decode(\"utf-8\")\n        )\n        credential_data = json.loads(credential_json)\n        slack_bot_token = credential_data.get(\"slack_bot_token\")\n        if not slack_bot_token:\n            print(\n                f\"No slack_bot_token found for credential {credential_id}. \"\n                \"Your Slack connector will not function until you upgrade and provide a valid token.\"\n            )\n            continue\n\n        client = WebClient(token=slack_bot_token)\n        try:\n            auth_response = client.auth_test()\n            workspace = auth_response[\"url\"].split(\"//\")[1].split(\".\")[0]\n\n            # Update only the connectors linked to this credential\n            # (and which are Slack connectors).\n            op.execute(\n                f\"\"\"\n                UPDATE connector AS c\n                SET connector_specific_config = jsonb_set(\n                    connector_specific_config,\n                    '{{workspace}}',\n                    to_jsonb('{workspace}'::text)\n                )\n                FROM connector_credential_pair AS ccp\n                WHERE ccp.connector_id = c.id\n                  AND c.source = 'SLACK'\n                  AND ccp.credential_id = {credential_id}\n            \"\"\"\n            )\n        except Exception:\n            print(\n                f\"We were unable to get the workspace url for your Slack Connector with id {credential_id}.\"\n            )\n            print(\"This connector will no longer work until you upgrade.\")\n            continue\n"
  },
  {
    "path": "backend/alembic/versions/c0c937d5c9e5_llm_provider_deprecate_fields.py",
    "content": "\"\"\"llm provider deprecate fields\n\nRevision ID: c0c937d5c9e5\nRevises: 8ffcc2bcfc11\nCreate Date: 2026-02-25 17:35:46.125102\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"c0c937d5c9e5\"\ndown_revision = \"8ffcc2bcfc11\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Make default_model_name nullable (was NOT NULL)\n    op.alter_column(\n        \"llm_provider\",\n        \"default_model_name\",\n        existing_type=sa.String(),\n        nullable=True,\n    )\n\n    # Drop unique constraint on is_default_provider (defaults now tracked via LLMModelFlow)\n    op.drop_constraint(\n        \"llm_provider_is_default_provider_key\",\n        \"llm_provider\",\n        type_=\"unique\",\n    )\n\n    # Remove server_default from is_default_vision_provider (was server_default=false())\n    op.alter_column(\n        \"llm_provider\",\n        \"is_default_vision_provider\",\n        existing_type=sa.Boolean(),\n        server_default=None,\n    )\n\n\ndef downgrade() -> None:\n    # Restore default_model_name to NOT NULL (set empty string for any NULLs first)\n    op.execute(\n        \"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL\"\n    )\n    op.alter_column(\n        \"llm_provider\",\n        \"default_model_name\",\n        existing_type=sa.String(),\n        nullable=False,\n    )\n\n    # Restore unique constraint on is_default_provider\n    op.create_unique_constraint(\n        \"llm_provider_is_default_provider_key\",\n        \"llm_provider\",\n        [\"is_default_provider\"],\n    )\n\n    # Restore server_default for is_default_vision_provider\n    op.alter_column(\n        \"llm_provider\",\n        \"is_default_vision_provider\",\n        existing_type=sa.Boolean(),\n        server_default=sa.false(),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/c0fd6e4da83a_add_recent_assistants.py",
    "content": "\"\"\"add recent assistants\n\nRevision ID: c0fd6e4da83a\nRevises: b72ed7a5db0e\nCreate Date: 2024-11-03 17:28:54.916618\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"c0fd6e4da83a\"\ndown_revision = \"b72ed7a5db0e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"recent_assistants\", postgresql.JSONB(), server_default=\"[]\", nullable=False\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"recent_assistants\")\n"
  },
  {
    "path": "backend/alembic/versions/c18cdf4b497e_add_standard_answer_tables.py",
    "content": "\"\"\"Add standard_answer tables\n\nRevision ID: c18cdf4b497e\nRevises: 3a7802814195\nCreate Date: 2024-06-06 15:15:02.000648\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"c18cdf4b497e\"\ndown_revision = \"3a7802814195\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"standard_answer\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"keyword\", sa.String(), nullable=False),\n        sa.Column(\"answer\", sa.String(), nullable=False),\n        sa.Column(\"active\", sa.Boolean(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"keyword\"),\n    )\n    op.create_table(\n        \"standard_answer_category\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.UniqueConstraint(\"name\"),\n    )\n    op.create_table(\n        \"standard_answer__standard_answer_category\",\n        sa.Column(\"standard_answer_id\", sa.Integer(), nullable=False),\n        sa.Column(\"standard_answer_category_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"standard_answer_category_id\"],\n            [\"standard_answer_category.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"standard_answer_id\"],\n            [\"standard_answer.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"standard_answer_id\", \"standard_answer_category_id\"),\n    )\n    op.create_table(\n        \"slack_bot_config__standard_answer_category\",\n        sa.Column(\"slack_bot_config_id\", sa.Integer(), nullable=False),\n        sa.Column(\"standard_answer_category_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"slack_bot_config_id\"],\n            [\"slack_bot_config.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"standard_answer_category_id\"],\n            [\"standard_answer_category.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"slack_bot_config_id\", \"standard_answer_category_id\"),\n    )\n\n    op.add_column(\n        \"chat_session\", sa.Column(\"slack_thread_id\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"slack_thread_id\")\n\n    op.drop_table(\"slack_bot_config__standard_answer_category\")\n    op.drop_table(\"standard_answer__standard_answer_category\")\n    op.drop_table(\"standard_answer_category\")\n    op.drop_table(\"standard_answer\")\n"
  },
  {
    "path": "backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py",
    "content": "\"\"\"add_deep_research_tool\n\nRevision ID: c1d2e3f4a5b6\nRevises: b8c9d0e1f2a3\nCreate Date: 2025-12-18 16:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"c1d2e3f4a5b6\"\ndown_revision = \"b8c9d0e1f2a3\"\nbranch_labels = None\ndepends_on = None\n\n\nDEEP_RESEARCH_TOOL = {\n    \"name\": \"ResearchAgent\",\n    \"display_name\": \"Research Agent\",\n    \"description\": \"The Research Agent is a sub-agent that conducts research on a specific topic.\",\n    \"in_code_tool_id\": \"ResearchAgent\",\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)\n            VALUES (:name, :display_name, :description, :in_code_tool_id, false)\n            \"\"\"\n        ),\n        DEEP_RESEARCH_TOOL,\n    )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n    conn.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM tool\n            WHERE in_code_tool_id = :in_code_tool_id\n            \"\"\"\n        ),\n        {\"in_code_tool_id\": DEEP_RESEARCH_TOOL[\"in_code_tool_id\"]},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py",
    "content": "\"\"\"Add index_attempt_errors table\n\nRevision ID: c5b692fa265c\nRevises: 4a951134c801\nCreate Date: 2024-08-08 14:06:39.581972\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"c5b692fa265c\"\ndown_revision = \"4a951134c801\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"index_attempt_errors\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"index_attempt_id\", sa.Integer(), nullable=True),\n        sa.Column(\"batch\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"doc_summaries\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=False,\n        ),\n        sa.Column(\"error_msg\", sa.Text(), nullable=True),\n        sa.Column(\"traceback\", sa.Text(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"index_attempt_id\"],\n            [\"index_attempt.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_index(\n        \"index_attempt_id\",\n        \"index_attempt_errors\",\n        [\"time_created\"],\n        unique=False,\n    )\n    # ### end Alembic commands ###\n\n\ndef downgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.drop_index(\"index_attempt_id\", table_name=\"index_attempt_errors\")\n    op.drop_table(\"index_attempt_errors\")\n    # ### end Alembic commands ###\n"
  },
  {
    "path": "backend/alembic/versions/c5eae4a75a1b_add_chat_message__standard_answer_table.py",
    "content": "\"\"\"Add chat_message__standard_answer table\n\nRevision ID: c5eae4a75a1b\nRevises: 0f7ff6d75b57\nCreate Date: 2025-01-15 14:08:49.688998\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"c5eae4a75a1b\"\ndown_revision = \"0f7ff6d75b57\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"chat_message__standard_answer\",\n        sa.Column(\"chat_message_id\", sa.Integer(), nullable=False),\n        sa.Column(\"standard_answer_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"chat_message_id\"],\n            [\"chat_message.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"standard_answer_id\"],\n            [\"standard_answer.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"chat_message_id\", \"standard_answer_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"chat_message__standard_answer\")\n"
  },
  {
    "path": "backend/alembic/versions/c7bf5721733e_add_has_been_indexed_to_.py",
    "content": "\"\"\"Add has_been_indexed to DocumentByConnectorCredentialPair\n\nRevision ID: c7bf5721733e\nRevises: fec3db967bf7\nCreate Date: 2025-01-13 12:39:05.831693\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"c7bf5721733e\"\ndown_revision = \"027381bce97c\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # assume all existing rows have been indexed, no better approach\n    op.add_column(\n        \"document_by_connector_credential_pair\",\n        sa.Column(\"has_been_indexed\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\n        \"UPDATE document_by_connector_credential_pair SET has_been_indexed = TRUE\"\n    )\n    op.alter_column(\n        \"document_by_connector_credential_pair\",\n        \"has_been_indexed\",\n        nullable=False,\n    )\n\n    # Add index to optimize get_document_counts_for_cc_pairs query pattern\n    op.create_index(\n        \"idx_document_cc_pair_counts\",\n        \"document_by_connector_credential_pair\",\n        [\"connector_id\", \"credential_id\", \"has_been_indexed\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    # Remove the index first before removing the column\n    op.drop_index(\n        \"idx_document_cc_pair_counts\",\n        table_name=\"document_by_connector_credential_pair\",\n    )\n    op.drop_column(\"document_by_connector_credential_pair\", \"has_been_indexed\")\n"
  },
  {
    "path": "backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py",
    "content": "\"\"\"add_python_tool\n\nRevision ID: c7e9f4a3b2d1\nRevises: 3c9a65f1207f\nCreate Date: 2025-11-08 00:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"c7e9f4a3b2d1\"\ndown_revision = \"3c9a65f1207f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    \"\"\"Add PythonTool to built-in tools\"\"\"\n    conn = op.get_bind()\n\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)\n            VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)\n            \"\"\"\n        ),\n        {\n            \"name\": \"PythonTool\",\n            # in the UI, call it `Code Interpreter` since this is a well known term for this tool\n            \"display_name\": \"Code Interpreter\",\n            \"description\": (\n                \"The Code Interpreter Action allows the assistant to execute \"\n                \"Python code in a secure, isolated environment for data analysis, \"\n                \"computation, visualization, and file processing.\"\n            ),\n            \"in_code_tool_id\": \"PythonTool\",\n            \"enabled\": True,\n        },\n    )\n\n    # needed to store files generated by the python tool\n    op.add_column(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\n            \"file_ids\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    \"\"\"Remove PythonTool from built-in tools\"\"\"\n    conn = op.get_bind()\n\n    conn.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM tool\n            WHERE in_code_tool_id = :in_code_tool_id\n            \"\"\"\n        ),\n        {\n            \"in_code_tool_id\": \"PythonTool\",\n        },\n    )\n\n    op.drop_column(\"research_agent_iteration_sub_step\", \"file_ids\")\n"
  },
  {
    "path": "backend/alembic/versions/c7f2e1b4a9d3_add_sharing_scope_to_build_session.py",
    "content": "\"\"\"add sharing_scope to build_session\n\nRevision ID: c7f2e1b4a9d3\nRevises: 19c0ccb01687\nCreate Date: 2026-02-17 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nrevision = \"c7f2e1b4a9d3\"\ndown_revision = \"19c0ccb01687\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"build_session\",\n        sa.Column(\n            \"sharing_scope\",\n            sa.String(),\n            nullable=False,\n            server_default=\"private\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"build_session\", \"sharing_scope\")\n"
  },
  {
    "path": "backend/alembic/versions/c8a93a2af083_personalization_user_info.py",
    "content": "\"\"\"personalization_user_info\n\nRevision ID: c8a93a2af083\nRevises: 6f4f86aef280\nCreate Date: 2025-10-14 15:59:03.577343\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n\n# revision identifiers, used by Alembic.\nrevision = \"c8a93a2af083\"\ndown_revision = \"6f4f86aef280\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"personal_name\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\"personal_role\", sa.String(), nullable=True),\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"use_memories\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=sa.true(),\n        ),\n    )\n\n    op.create_table(\n        \"memory\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", postgresql.UUID(as_uuid=True), nullable=False),\n        sa.Column(\"memory_text\", sa.Text(), nullable=False),\n        sa.Column(\"conversation_id\", postgresql.UUID(as_uuid=True), nullable=True),\n        sa.Column(\"message_id\", sa.Integer(), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.Column(\n            \"updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint([\"user_id\"], [\"user.id\"], ondelete=\"CASCADE\"),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    op.create_index(\"ix_memory_user_id\", \"memory\", [\"user_id\"])\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_memory_user_id\", table_name=\"memory\")\n    op.drop_table(\"memory\")\n\n    op.drop_column(\"user\", \"use_memories\")\n    op.drop_column(\"user\", \"personal_role\")\n    op.drop_column(\"user\", \"personal_name\")\n"
  },
  {
    "path": "backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py",
    "content": "\"\"\"add nullable to persona id in Chat Session\n\nRevision ID: c99d76fcd298\nRevises: 5c7fdadae813\nCreate Date: 2024-07-09 19:27:01.579697\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"c99d76fcd298\"\ndown_revision = \"5c7fdadae813\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"chat_session\", \"persona_id\", existing_type=sa.INTEGER(), nullable=True\n    )\n\n\ndef downgrade() -> None:\n    # Delete chat messages and feedback first since they reference chat sessions\n    # Get chat messages from sessions with null persona_id\n    chat_messages_query = \"\"\"\n        SELECT id\n        FROM chat_message\n        WHERE chat_session_id IN (\n            SELECT id\n            FROM chat_session\n            WHERE persona_id IS NULL\n        )\n    \"\"\"\n\n    # Delete dependent records first\n    op.execute(\n        f\"\"\"\n        DELETE FROM document_retrieval_feedback\n        WHERE chat_message_id IN (\n            {chat_messages_query}\n        )\n    \"\"\"\n    )\n    op.execute(\n        f\"\"\"\n        DELETE FROM chat_message__search_doc\n        WHERE chat_message_id IN (\n            {chat_messages_query}\n        )\n    \"\"\"\n    )\n\n    # Delete chat messages\n    op.execute(\n        \"\"\"\n        DELETE FROM chat_message\n        WHERE chat_session_id IN (\n            SELECT id\n            FROM chat_session\n            WHERE persona_id IS NULL\n        )\n    \"\"\"\n    )\n\n    # Now we can safely delete the chat sessions\n    op.execute(\n        \"\"\"\n        DELETE FROM chat_session\n        WHERE persona_id IS NULL\n    \"\"\"\n    )\n\n    op.alter_column(\n        \"chat_session\",\n        \"persona_id\",\n        existing_type=sa.INTEGER(),\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py",
    "content": "\"\"\"modify_file_store_for_external_storage\n\nRevision ID: c9e2cd766c29\nRevises: 03bf8be6b53a\nCreate Date: 2025-06-13 14:02:09.867679\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy import text\nfrom typing import cast, Any\n\nfrom botocore.exceptions import ClientError\n\nfrom onyx.db._deprecated.pg_file_store import delete_lobj_by_id, read_lobj\nfrom onyx.file_store.file_store import get_s3_file_store\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n# revision identifiers, used by Alembic.\nrevision = \"c9e2cd766c29\"\ndown_revision = \"03bf8be6b53a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    try:\n        # Modify existing file_store table to support external storage\n        op.rename_table(\"file_store\", \"file_record\")\n\n        # Make lobj_oid nullable (for external storage files)\n        op.alter_column(\"file_record\", \"lobj_oid\", nullable=True)\n\n        # Add external storage columns with generic names\n        op.add_column(\n            \"file_record\", sa.Column(\"bucket_name\", sa.String(), nullable=True)\n        )\n        op.add_column(\n            \"file_record\", sa.Column(\"object_key\", sa.String(), nullable=True)\n        )\n\n        # Add timestamps for tracking\n        op.add_column(\n            \"file_record\",\n            sa.Column(\n                \"created_at\",\n                sa.DateTime(timezone=True),\n                server_default=sa.func.now(),\n                nullable=False,\n            ),\n        )\n        op.add_column(\n            \"file_record\",\n            sa.Column(\n                \"updated_at\",\n                sa.DateTime(timezone=True),\n                server_default=sa.func.now(),\n                nullable=False,\n            ),\n        )\n\n        op.alter_column(\"file_record\", \"file_name\", new_column_name=\"file_id\")\n    except Exception as e:\n        if \"does not exist\" in str(e) or 'relation \"file_store\" does not exist' in str(\n            e\n        ):\n            print(\n                f\"Ran into error - {e}. Likely means we had a partial success in the past, continuing...\"\n            )\n        else:\n            raise\n\n    print(\n        \"External storage configured - migrating files from PostgreSQL to external storage...\"\n    )\n    # if we fail midway through this, we'll have a partial success. Running the migration\n    # again should allow us to continue.\n    _migrate_files_to_external_storage()\n    print(\"File migration completed successfully!\")\n\n    # Remove lobj_oid column\n    op.drop_column(\"file_record\", \"lobj_oid\")\n\n\ndef downgrade() -> None:\n    \"\"\"Revert schema changes and migrate files from external storage back to PostgreSQL large objects.\"\"\"\n\n    print(\n        \"Reverting to PostgreSQL-backed file store – migrating files from external storage …\"\n    )\n\n    # 1. Ensure `lobj_oid` exists on the current `file_record` table (nullable for now).\n    op.add_column(\"file_record\", sa.Column(\"lobj_oid\", sa.Integer(), nullable=True))\n\n    # 2. Move content from external storage back into PostgreSQL large objects (table is still\n    #    called `file_record` so application code continues to work during the copy).\n    try:\n        _migrate_files_to_postgres()\n    except Exception:\n        print(\"Error during downgrade migration, rolling back …\")\n        op.drop_column(\"file_record\", \"lobj_oid\")\n        raise\n\n    # 3. After migration every row should now have `lobj_oid` populated – mark NOT NULL.\n    op.alter_column(\"file_record\", \"lobj_oid\", nullable=False)\n\n    # 4. Remove columns that are only relevant to external storage.\n    op.drop_column(\"file_record\", \"updated_at\")\n    op.drop_column(\"file_record\", \"created_at\")\n    op.drop_column(\"file_record\", \"object_key\")\n    op.drop_column(\"file_record\", \"bucket_name\")\n\n    # 5. Rename `file_id` back to `file_name` (still on `file_record`).\n    op.alter_column(\"file_record\", \"file_id\", new_column_name=\"file_name\")\n\n    # 6. Finally, rename the table back to its original name expected by the legacy codebase.\n    op.rename_table(\"file_record\", \"file_store\")\n\n    print(\n        \"Downgrade migration completed – files are now stored inside PostgreSQL again.\"\n    )\n\n\n# -----------------------------------------------------------------------------\n# Helper: migrate from external storage (S3/MinIO) back into PostgreSQL large objects\n\n\ndef _migrate_files_to_postgres() -> None:\n    \"\"\"Move any files whose content lives in external S3-compatible storage back into PostgreSQL.\n\n    The logic mirrors *inverse* of `_migrate_files_to_external_storage` used on upgrade.\n    \"\"\"\n\n    # Obtain DB session from Alembic context\n    bind = op.get_bind()\n    session = Session(bind=bind)\n\n    # Fetch rows that have external storage pointers (bucket/object_key not NULL)\n    result = session.execute(\n        text(\n            \"SELECT file_id, bucket_name, object_key FROM file_record WHERE bucket_name IS NOT NULL AND object_key IS NOT NULL\"\n        )\n    )\n\n    files_to_migrate = [row[0] for row in result.fetchall()]\n    total_files = len(files_to_migrate)\n\n    if total_files == 0:\n        print(\"No files found in external storage to migrate back to PostgreSQL.\")\n        return\n\n    print(f\"Found {total_files} files to migrate back to PostgreSQL large objects.\")\n\n    _set_tenant_contextvar(session)\n    migrated_count = 0\n\n    # only create external store if we have files to migrate. This line\n    # makes it so we need to have S3/MinIO configured to run this migration.\n    external_store = get_s3_file_store()\n\n    for i, file_id in enumerate(files_to_migrate, 1):\n        print(f\"Migrating file {i}/{total_files}: {file_id}\")\n\n        # Read file content from external storage (always binary)\n        try:\n            file_io = external_store.read_file(\n                file_id=file_id, mode=\"b\", use_tempfile=True\n            )\n            file_io.seek(0)\n\n            # Import lazily to avoid circular deps at Alembic runtime\n            from onyx.db._deprecated.pg_file_store import (\n                create_populate_lobj,\n            )  # noqa: E402\n\n            # Create new Postgres large object and populate it\n            lobj_oid = create_populate_lobj(content=file_io, db_session=session)\n\n            # Update DB row: set lobj_oid, clear bucket/object_key\n            session.execute(\n                text(\n                    \"UPDATE file_record SET lobj_oid = :lobj_oid, bucket_name = NULL, object_key = NULL WHERE file_id = :file_id\"\n                ),\n                {\"lobj_oid\": lobj_oid, \"file_id\": file_id},\n            )\n        except ClientError as e:\n            if \"NoSuchKey\" in str(e):\n                print(\n                    f\"File {file_id} not found in external storage. Deleting from database.\"\n                )\n                session.execute(\n                    text(\"DELETE FROM file_record WHERE file_id = :file_id\"),\n                    {\"file_id\": file_id},\n                )\n            else:\n                raise\n\n        migrated_count += 1\n        print(f\"✓ Successfully migrated file {i}/{total_files}: {file_id}\")\n\n    # Flush the SQLAlchemy session so statements are sent to the DB, but **do not**\n    # commit the transaction.  The surrounding Alembic migration will commit once\n    # the *entire* downgrade succeeds.  This keeps the whole downgrade atomic and\n    # avoids leaving the database in a partially-migrated state if a later schema\n    # operation fails.\n    session.flush()\n\n    print(\n        f\"Migration back to PostgreSQL completed: {migrated_count} files staged for commit.\"\n    )\n\n\ndef _migrate_files_to_external_storage() -> None:\n    \"\"\"Migrate files from PostgreSQL large objects to external storage\"\"\"\n    # Get database session\n    bind = op.get_bind()\n    session = Session(bind=bind)\n    external_store = get_s3_file_store()\n\n    # Find all files currently stored in PostgreSQL (lobj_oid is not null)\n    result = session.execute(\n        text(\n            \"SELECT file_id FROM file_record WHERE lobj_oid IS NOT NULL AND bucket_name IS NULL AND object_key IS NULL\"\n        )\n    )\n\n    files_to_migrate = [row[0] for row in result.fetchall()]\n    total_files = len(files_to_migrate)\n\n    if total_files == 0:\n        print(\"No files found in PostgreSQL storage to migrate.\")\n        return\n\n    # might need to move this above the if statement when creating a new multi-tenant\n    # system. VERY extreme edge case.\n    external_store.initialize()\n    print(f\"Found {total_files} files to migrate from PostgreSQL to external storage.\")\n\n    _set_tenant_contextvar(session)\n    migrated_count = 0\n\n    for i, file_id in enumerate(files_to_migrate, 1):\n        print(f\"Migrating file {i}/{total_files}: {file_id}\")\n\n        # Read file record to get metadata\n        file_record = session.execute(\n            text(\"SELECT * FROM file_record WHERE file_id = :file_id\"),\n            {\"file_id\": file_id},\n        ).fetchone()\n\n        if file_record is None:\n            print(f\"File {file_id} not found in PostgreSQL storage.\")\n            continue\n\n        lobj_id = cast(int, file_record.lobj_oid)\n        file_metadata = cast(Any, file_record.file_metadata)\n\n        # Read file content from PostgreSQL\n        try:\n            file_content = read_lobj(\n                lobj_id, db_session=session, mode=\"b\", use_tempfile=True\n            )\n        except Exception as e:\n            if \"large object\" in str(e) and \"does not exist\" in str(e):\n                print(f\"File {file_id} not found in PostgreSQL storage.\")\n                continue\n            else:\n                raise\n\n        # Handle file_metadata type conversion\n        file_metadata = None\n        if file_metadata is not None:\n            if isinstance(file_metadata, dict):\n                file_metadata = file_metadata\n            else:\n                # Convert other types to dict if possible, otherwise None\n                try:\n                    file_metadata = dict(file_record.file_metadata)\n                except (TypeError, ValueError):\n                    file_metadata = None\n\n        # Save to external storage (this will handle the database record update and cleanup)\n        # NOTE: this WILL .commit() the transaction.\n        external_store.save_file(\n            file_id=file_id,\n            content=file_content,\n            display_name=file_record.display_name,\n            file_origin=file_record.file_origin,\n            file_type=file_record.file_type,\n            file_metadata=file_metadata,\n        )\n        delete_lobj_by_id(lobj_id, db_session=session)\n\n        migrated_count += 1\n        print(f\"✓ Successfully migrated file {i}/{total_files}: {file_id}\")\n\n    # See note above – flush but do **not** commit so the outer Alembic transaction\n    # controls atomicity.\n    session.flush()\n\n    print(\n        f\"Migration completed: {migrated_count} files staged for commit to external storage.\"\n    )\n\n\ndef _set_tenant_contextvar(session: Session) -> None:\n    \"\"\"Set the tenant contextvar to the default schema\"\"\"\n    current_tenant = session.execute(text(\"SELECT current_schema()\")).scalar()\n    print(f\"Migrating files for tenant: {current_tenant}\")\n    CURRENT_TENANT_ID_CONTEXTVAR.set(current_tenant)\n"
  },
  {
    "path": "backend/alembic/versions/ca04500b9ee8_add_cascade_deletes_to_agent_tables.py",
    "content": "\"\"\"add_cascade_deletes_to_agent_tables\n\nRevision ID: ca04500b9ee8\nRevises: 238b84885828\nCreate Date: 2025-05-30 16:03:51.112263\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"ca04500b9ee8\"\ndown_revision = \"238b84885828\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Drop existing foreign key constraints\n    op.drop_constraint(\n        \"agent__sub_question_primary_question_id_fkey\",\n        \"agent__sub_question\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"agent__sub_query_parent_question_id_fkey\",\n        \"agent__sub_query\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"chat_message__standard_answer_chat_message_id_fkey\",\n        \"chat_message__standard_answer\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"agent__sub_query__search_doc_sub_query_id_fkey\",\n        \"agent__sub_query__search_doc\",\n        type_=\"foreignkey\",\n    )\n\n    # Recreate foreign key constraints with CASCADE delete\n    op.create_foreign_key(\n        \"agent__sub_question_primary_question_id_fkey\",\n        \"agent__sub_question\",\n        \"chat_message\",\n        [\"primary_question_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"agent__sub_query_parent_question_id_fkey\",\n        \"agent__sub_query\",\n        \"agent__sub_question\",\n        [\"parent_question_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"chat_message__standard_answer_chat_message_id_fkey\",\n        \"chat_message__standard_answer\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"agent__sub_query__search_doc_sub_query_id_fkey\",\n        \"agent__sub_query__search_doc\",\n        \"agent__sub_query\",\n        [\"sub_query_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    # Drop CASCADE foreign key constraints\n    op.drop_constraint(\n        \"agent__sub_question_primary_question_id_fkey\",\n        \"agent__sub_question\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"agent__sub_query_parent_question_id_fkey\",\n        \"agent__sub_query\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"chat_message__standard_answer_chat_message_id_fkey\",\n        \"chat_message__standard_answer\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"agent__sub_query__search_doc_sub_query_id_fkey\",\n        \"agent__sub_query__search_doc\",\n        type_=\"foreignkey\",\n    )\n\n    # Recreate foreign key constraints without CASCADE delete\n    op.create_foreign_key(\n        \"agent__sub_question_primary_question_id_fkey\",\n        \"agent__sub_question\",\n        \"chat_message\",\n        [\"primary_question_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"agent__sub_query_parent_question_id_fkey\",\n        \"agent__sub_query\",\n        \"agent__sub_question\",\n        [\"parent_question_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"chat_message__standard_answer_chat_message_id_fkey\",\n        \"chat_message__standard_answer\",\n        \"chat_message\",\n        [\"chat_message_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"agent__sub_query__search_doc_sub_query_id_fkey\",\n        \"agent__sub_query__search_doc\",\n        \"agent__sub_query\",\n        [\"sub_query_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/cbc03e08d0f3_add_opensearch_migration_tables.py",
    "content": "\"\"\"add_opensearch_migration_tables\n\nRevision ID: cbc03e08d0f3\nRevises: be87a654d5af\nCreate Date: 2026-01-31 17:00:45.176604\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"cbc03e08d0f3\"\ndown_revision = \"be87a654d5af\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1. Create opensearch_document_migration_record table.\n    op.create_table(\n        \"opensearch_document_migration_record\",\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.Column(\"status\", sa.String(), nullable=False, server_default=\"pending\"),\n        sa.Column(\"error_message\", sa.Text(), nullable=True),\n        sa.Column(\"attempts_count\", sa.Integer(), nullable=False, server_default=\"0\"),\n        sa.Column(\"last_attempt_at\", sa.DateTime(timezone=True), nullable=True),\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"document_id\"),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"],\n            [\"document.id\"],\n            ondelete=\"CASCADE\",\n        ),\n    )\n    # 2. Create indices.\n    op.create_index(\n        \"ix_opensearch_document_migration_record_status\",\n        \"opensearch_document_migration_record\",\n        [\"status\"],\n    )\n    op.create_index(\n        \"ix_opensearch_document_migration_record_attempts_count\",\n        \"opensearch_document_migration_record\",\n        [\"attempts_count\"],\n    )\n    op.create_index(\n        \"ix_opensearch_document_migration_record_created_at\",\n        \"opensearch_document_migration_record\",\n        [\"created_at\"],\n    )\n\n    # 3. Create opensearch_tenant_migration_record table (singleton).\n    op.create_table(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"document_migration_record_table_population_status\",\n            sa.String(),\n            nullable=False,\n            server_default=\"pending\",\n        ),\n        sa.Column(\n            \"num_times_observed_no_additional_docs_to_populate_migration_table\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n        sa.Column(\n            \"overall_document_migration_status\",\n            sa.String(),\n            nullable=False,\n            server_default=\"pending\",\n        ),\n        sa.Column(\n            \"num_times_observed_no_additional_docs_to_migrate\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n        sa.Column(\n            \"last_updated_at\",\n            sa.DateTime(timezone=True),\n            server_default=sa.func.now(),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n    # 4. Create unique index on constant to enforce singleton pattern.\n    op.execute(\n        sa.text(\n            \"\"\"\n            CREATE UNIQUE INDEX idx_opensearch_tenant_migration_singleton\n            ON opensearch_tenant_migration_record ((true))\n            \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    # Drop opensearch_tenant_migration_record.\n    op.drop_index(\n        \"idx_opensearch_tenant_migration_singleton\",\n        table_name=\"opensearch_tenant_migration_record\",\n    )\n    op.drop_table(\"opensearch_tenant_migration_record\")\n\n    # Drop opensearch_document_migration_record.\n    op.drop_index(\n        \"ix_opensearch_document_migration_record_created_at\",\n        table_name=\"opensearch_document_migration_record\",\n    )\n    op.drop_index(\n        \"ix_opensearch_document_migration_record_attempts_count\",\n        table_name=\"opensearch_document_migration_record\",\n    )\n    op.drop_index(\n        \"ix_opensearch_document_migration_record_status\",\n        table_name=\"opensearch_document_migration_record\",\n    )\n    op.drop_table(\"opensearch_document_migration_record\")\n"
  },
  {
    "path": "backend/alembic/versions/cec7ec36c505_kgentity_parent.py",
    "content": "\"\"\"kgentity_parent\n\nRevision ID: cec7ec36c505\nRevises: 495cb26ce93e\nCreate Date: 2025-06-07 20:07:46.400770\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"cec7ec36c505\"\ndown_revision = \"495cb26ce93e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"kg_entity\",\n        sa.Column(\"parent_key\", sa.String(), nullable=True, index=True),\n    )\n    # NOTE: you will have to reindex the KG after this migration as the parent_key will be null\n\n\ndef downgrade() -> None:\n    op.drop_column(\"kg_entity\", \"parent_key\")\n"
  },
  {
    "path": "backend/alembic/versions/cf90764725d8_larger_refresh_tokens.py",
    "content": "\"\"\"larger refresh tokens\n\nRevision ID: cf90764725d8\nRevises: 4794bc13e484\nCreate Date: 2025-04-04 10:56:39.769294\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"cf90764725d8\"\ndown_revision = \"4794bc13e484\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\"oauth_account\", \"refresh_token\", type_=sa.Text())\n\n\ndef downgrade() -> None:\n    op.alter_column(\"oauth_account\", \"refresh_token\", type_=sa.String(length=1024))\n"
  },
  {
    "path": "backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py",
    "content": "\"\"\"seed_builtin_tools\n\nRevision ID: d09fc20a3c66\nRevises: b7ec9b5b505f\nCreate Date: 2025-09-09 19:32:16.824373\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d09fc20a3c66\"\ndown_revision = \"b7ec9b5b505f\"\nbranch_labels = None\ndepends_on = None\n\n\n# Tool definitions - core tools that should always be seeded\n# Names/in_code_tool_id are the same as the class names in the tool_implementations package\nBUILT_IN_TOOLS = [\n    {\n        \"name\": \"SearchTool\",\n        \"display_name\": \"Internal Search\",\n        \"description\": \"The Search Action allows the Assistant to search through connected knowledge to help build an answer.\",\n        \"in_code_tool_id\": \"SearchTool\",\n    },\n    {\n        \"name\": \"ImageGenerationTool\",\n        \"display_name\": \"Image Generation\",\n        \"description\": (\n            \"The Image Generation Action allows the assistant to use DALL-E 3 or GPT-IMAGE-1 to generate images. \"\n            \"The action will be used when the user asks the assistant to generate an image.\"\n        ),\n        \"in_code_tool_id\": \"ImageGenerationTool\",\n    },\n    {\n        \"name\": \"WebSearchTool\",\n        \"display_name\": \"Web Search\",\n        \"description\": (\n            \"The Web Search Action allows the assistant to perform internet searches for up-to-date information.\"\n        ),\n        \"in_code_tool_id\": \"WebSearchTool\",\n    },\n    {\n        \"name\": \"KnowledgeGraphTool\",\n        \"display_name\": \"Knowledge Graph Search\",\n        \"description\": (\n            \"The Knowledge Graph Search Action allows the assistant to search the \"\n            \"Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Assistant, \"\n            \"and it requires the Knowledge Graph to be enabled.\"\n        ),\n        \"in_code_tool_id\": \"KnowledgeGraphTool\",\n    },\n    {\n        \"name\": \"OktaProfileTool\",\n        \"display_name\": \"Okta Profile\",\n        \"description\": (\n            \"The Okta Profile Action allows the assistant to fetch the current user's information from Okta. \"\n            \"This may include the user's name, email, phone number, address, and other details such as their \"\n            \"manager and direct reports.\"\n        ),\n        \"in_code_tool_id\": \"OktaProfileTool\",\n    },\n]\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Get existing tools to check what already exists\n    existing_tools = conn.execute(\n        sa.text(\"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL\")\n    ).fetchall()\n    existing_tool_ids = {row[0] for row in existing_tools}\n\n    # Insert or update built-in tools\n    for tool in BUILT_IN_TOOLS:\n        in_code_id = tool[\"in_code_tool_id\"]\n\n        # Handle historical rename: InternetSearchTool -> WebSearchTool\n        if (\n            in_code_id == \"WebSearchTool\"\n            and \"WebSearchTool\" not in existing_tool_ids\n            and \"InternetSearchTool\" in existing_tool_ids\n        ):\n            # Rename the existing InternetSearchTool row in place and update fields\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    UPDATE tool\n                    SET name = :name,\n                        display_name = :display_name,\n                        description = :description,\n                        in_code_tool_id = :in_code_tool_id\n                    WHERE in_code_tool_id = 'InternetSearchTool'\n                    \"\"\"\n                ),\n                tool,\n            )\n            # Keep the local view of existing ids in sync to avoid duplicate insert\n            existing_tool_ids.discard(\"InternetSearchTool\")\n            existing_tool_ids.add(\"WebSearchTool\")\n            continue\n\n        if in_code_id in existing_tool_ids:\n            # Update existing tool\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    UPDATE tool\n                    SET name = :name,\n                        display_name = :display_name,\n                        description = :description\n                    WHERE in_code_tool_id = :in_code_tool_id\n                    \"\"\"\n                ),\n                tool,\n            )\n        else:\n            # Insert new tool\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    INSERT INTO tool (name, display_name, description, in_code_tool_id)\n                    VALUES (:name, :display_name, :description, :in_code_tool_id)\n                    \"\"\"\n                ),\n                tool,\n            )\n\n\ndef downgrade() -> None:\n    # We don't remove the tools on downgrade since it's totally fine to just\n    # have them around. If we upgrade again, it will be a no-op.\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py",
    "content": "\"\"\"sync_exa_api_key_to_content_provider\n\nRevision ID: d1b637d7050a\nRevises: d25168c2beee\nCreate Date: 2026-01-09 15:54:15.646249\n\n\"\"\"\n\nfrom alembic import op\nfrom sqlalchemy import text\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d1b637d7050a\"\ndown_revision = \"d25168c2beee\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Exa uses a shared API key between search and content providers.\n    # For existing Exa search providers with API keys, create the corresponding\n    # content provider if it doesn't exist yet.\n    connection = op.get_bind()\n\n    # Check if Exa search provider exists with an API key\n    result = connection.execute(\n        text(\n            \"\"\"\n            SELECT api_key FROM internet_search_provider\n            WHERE provider_type = 'exa' AND api_key IS NOT NULL\n            LIMIT 1\n            \"\"\"\n        )\n    )\n    row = result.fetchone()\n\n    if row:\n        api_key = row[0]\n        # Create Exa content provider with the shared key\n        connection.execute(\n            text(\n                \"\"\"\n                INSERT INTO internet_content_provider\n                (name, provider_type, api_key, is_active)\n                VALUES ('Exa', 'exa', :api_key, false)\n                ON CONFLICT (name) DO NOTHING\n                \"\"\"\n            ),\n            {\"api_key\": api_key},\n        )\n\n\ndef downgrade() -> None:\n    # Remove the Exa content provider that was created by this migration\n    connection = op.get_bind()\n    connection.execute(\n        text(\n            \"\"\"\n            DELETE FROM internet_content_provider\n            WHERE provider_type = 'exa'\n            \"\"\"\n        )\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d25168c2beee_tool_name_consistency.py",
    "content": "\"\"\"tool_name_consistency\n\nRevision ID: d25168c2beee\nRevises: 8405ca81cc83\nCreate Date: 2026-01-11 17:54:40.135777\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d25168c2beee\"\ndown_revision = \"8405ca81cc83\"\nbranch_labels = None\ndepends_on = None\n\n\n# Currently the seeded tools have the in_code_tool_id == name\nCURRENT_TOOL_NAME_MAPPING = [\n    \"SearchTool\",\n    \"WebSearchTool\",\n    \"ImageGenerationTool\",\n    \"PythonTool\",\n    \"OpenURLTool\",\n    \"KnowledgeGraphTool\",\n    \"ResearchAgent\",\n]\n\n# Mapping of in_code_tool_id -> name\n# These are the expected names that we want in the database\nEXPECTED_TOOL_NAME_MAPPING = {\n    \"SearchTool\": \"internal_search\",\n    \"WebSearchTool\": \"web_search\",\n    \"ImageGenerationTool\": \"generate_image\",\n    \"PythonTool\": \"python\",\n    \"OpenURLTool\": \"open_url\",\n    \"KnowledgeGraphTool\": \"run_kg_search\",\n    \"ResearchAgent\": \"research_agent\",\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Mapping of in_code_tool_id to the NAME constant from each tool class\n    # These match the .name property of each tool implementation\n    tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING\n\n    # Update the name column for each tool based on its in_code_tool_id\n    for in_code_tool_id, expected_name in tool_name_mapping.items():\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE tool\n                SET name = :expected_name\n                WHERE in_code_tool_id = :in_code_tool_id\n                \"\"\"\n            ),\n            {\n                \"expected_name\": expected_name,\n                \"in_code_tool_id\": in_code_tool_id,\n            },\n        )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n\n    # Reverse the migration by setting name back to in_code_tool_id\n    # This matches the original pattern where name was the class name\n    for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE tool\n                SET name = :current_name\n                WHERE in_code_tool_id = :in_code_tool_id\n                \"\"\"\n            ),\n            {\n                \"current_name\": in_code_tool_id,\n                \"in_code_tool_id\": in_code_tool_id,\n            },\n        )\n"
  },
  {
    "path": "backend/alembic/versions/d3fd499c829c_add_file_reader_tool.py",
    "content": "\"\"\"add_file_reader_tool\n\nRevision ID: d3fd499c829c\nRevises: 114a638452db\nCreate Date: 2026-02-07 19:28:22.452337\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d3fd499c829c\"\ndown_revision = \"114a638452db\"\nbranch_labels = None\ndepends_on = None\n\nFILE_READER_TOOL = {\n    \"name\": \"read_file\",\n    \"display_name\": \"File Reader\",\n    \"description\": (\n        \"Read sections of user-uploaded files by character offset. \"\n        \"Useful for inspecting large files that cannot fit entirely in context.\"\n    ),\n    \"in_code_tool_id\": \"FileReaderTool\",\n    \"enabled\": True,\n}\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n\n    # Check if tool already exists\n    existing = conn.execute(\n        sa.text(\"SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id\"),\n        {\"in_code_tool_id\": FILE_READER_TOOL[\"in_code_tool_id\"]},\n    ).fetchone()\n\n    if existing:\n        # Update existing tool\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE tool\n                SET name = :name,\n                    display_name = :display_name,\n                    description = :description\n                WHERE in_code_tool_id = :in_code_tool_id\n                \"\"\"\n            ),\n            FILE_READER_TOOL,\n        )\n        tool_id = existing[0]\n    else:\n        # Insert new tool\n        result = conn.execute(\n            sa.text(\n                \"\"\"\n                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)\n                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)\n                RETURNING id\n                \"\"\"\n            ),\n            FILE_READER_TOOL,\n        )\n        tool_id = result.scalar_one()\n\n    # Attach to the default persona (id=0) if not already attached\n    conn.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO persona__tool (persona_id, tool_id)\n            VALUES (0, :tool_id)\n            ON CONFLICT DO NOTHING\n            \"\"\"\n        ),\n        {\"tool_id\": tool_id},\n    )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n    in_code_tool_id = FILE_READER_TOOL[\"in_code_tool_id\"]\n\n    # Remove persona associations first (FK constraint)\n    conn.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM persona__tool\n            WHERE tool_id IN (\n                SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id\n            )\n            \"\"\"\n        ),\n        {\"in_code_tool_id\": in_code_tool_id},\n    )\n\n    conn.execute(\n        sa.text(\"DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id\"),\n        {\"in_code_tool_id\": in_code_tool_id},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py",
    "content": "\"\"\"Remove deletion_attempt table\n\nRevision ID: d5645c915d0e\nRevises: 8e26726b7683\nCreate Date: 2023-09-14 15:04:14.444909\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"d5645c915d0e\"\ndown_revision = \"8e26726b7683\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_table(\"deletion_attempt\")\n\n    # Remove the DeletionStatus enum\n    op.execute(\"DROP TYPE IF EXISTS deletionstatus;\")\n\n\ndef downgrade() -> None:\n    op.create_table(\n        \"deletion_attempt\",\n        sa.Column(\"id\", sa.INTEGER(), autoincrement=True, nullable=False),\n        sa.Column(\"connector_id\", sa.INTEGER(), autoincrement=False, nullable=False),\n        sa.Column(\"credential_id\", sa.INTEGER(), autoincrement=False, nullable=False),\n        sa.Column(\n            \"status\",\n            postgresql.ENUM(\n                \"NOT_STARTED\",\n                \"IN_PROGRESS\",\n                \"SUCCESS\",\n                \"FAILED\",\n                name=\"deletionstatus\",\n            ),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.Column(\n            \"num_docs_deleted\",\n            sa.INTEGER(),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.Column(\"error_msg\", sa.VARCHAR(), autoincrement=False, nullable=True),\n        sa.Column(\n            \"time_created\",\n            postgresql.TIMESTAMP(timezone=True),\n            server_default=sa.text(\"now()\"),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_updated\",\n            postgresql.TIMESTAMP(timezone=True),\n            server_default=sa.text(\"now()\"),\n            autoincrement=False,\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"connector_id\"],\n            [\"connector.id\"],\n            name=\"deletion_attempt_connector_id_fkey\",\n        ),\n        sa.ForeignKeyConstraint(\n            [\"credential_id\"],\n            [\"credential.id\"],\n            name=\"deletion_attempt_credential_id_fkey\",\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=\"deletion_attempt_pkey\"),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d56ffa94ca32_add_file_content.py",
    "content": "\"\"\"add_file_content\n\nRevision ID: d56ffa94ca32\nRevises: 01f8e6d95a33\nCreate Date: 2026-02-06 15:29:34.192960\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d56ffa94ca32\"\ndown_revision = \"01f8e6d95a33\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"file_content\",\n        sa.Column(\n            \"file_id\",\n            sa.String(),\n            sa.ForeignKey(\"file_record.file_id\", ondelete=\"CASCADE\"),\n            primary_key=True,\n        ),\n        sa.Column(\"lobj_oid\", sa.BigInteger(), nullable=False),\n        sa.Column(\"file_size\", sa.BigInteger(), nullable=False, server_default=\"0\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"file_content\")\n"
  },
  {
    "path": "backend/alembic/versions/d5c86e2c6dc6_add_cascade_delete_to_search_query_user_.py",
    "content": "\"\"\"add_cascade_delete_to_search_query_user_id\n\nRevision ID: d5c86e2c6dc6\nRevises: 90b409d06e50\nCreate Date: 2026-02-04 16:05:04.749804\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d5c86e2c6dc6\"\ndown_revision = \"90b409d06e50\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.drop_constraint(\"search_query_user_id_fkey\", \"search_query\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"search_query_user_id_fkey\",\n        \"search_query\",\n        \"user\",\n        [\"user_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"search_query_user_id_fkey\", \"search_query\", type_=\"foreignkey\")\n    op.create_foreign_key(\n        \"search_query_user_id_fkey\", \"search_query\", \"user\", [\"user_id\"], [\"id\"]\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py",
    "content": "\"\"\"Add Total Docs for Index Attempt\n\nRevision ID: d61e513bef0a\nRevises: 46625e4745d4\nCreate Date: 2023-10-27 23:02:43.369964\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"d61e513bef0a\"\ndown_revision = \"46625e4745d4\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"new_docs_indexed\", sa.Integer(), nullable=True),\n    )\n    op.alter_column(\n        \"index_attempt\", \"num_docs_indexed\", new_column_name=\"total_docs_indexed\"\n    )\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"index_attempt\", \"total_docs_indexed\", new_column_name=\"num_docs_indexed\"\n    )\n    op.drop_column(\"index_attempt\", \"new_docs_indexed\")\n"
  },
  {
    "path": "backend/alembic/versions/d7111c1238cd_remove_document_ids.py",
    "content": "\"\"\"Remove Document IDs\n\nRevision ID: d7111c1238cd\nRevises: 465f78d9b7f9\nCreate Date: 2023-07-29 15:06:25.126169\n\n\"\"\"\n\nimport sqlalchemy as sa\nfrom alembic import op\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"d7111c1238cd\"\ndown_revision = \"465f78d9b7f9\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"index_attempt\", \"document_ids\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\n            \"document_ids\",\n            postgresql.ARRAY(sa.VARCHAR()),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d716b0791ddd_combined_slack_id_fields.py",
    "content": "\"\"\"combined slack id fields\n\nRevision ID: d716b0791ddd\nRevises: 7aea705850d5\nCreate Date: 2024-07-10 17:57:45.630550\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"d716b0791ddd\"\ndown_revision = \"7aea705850d5\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n    UPDATE slack_bot_config\n    SET channel_config = jsonb_set(\n        channel_config,\n        '{respond_member_group_list}',\n        coalesce(channel_config->'respond_team_member_list', '[]'::jsonb) ||\n        coalesce(channel_config->'respond_slack_group_list', '[]'::jsonb)\n    ) - 'respond_team_member_list' - 'respond_slack_group_list'\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        \"\"\"\n    UPDATE slack_bot_config\n    SET channel_config = jsonb_set(\n        jsonb_set(\n            channel_config - 'respond_member_group_list',\n            '{respond_team_member_list}',\n            '[]'::jsonb\n        ),\n        '{respond_slack_group_list}',\n        '[]'::jsonb\n    )\n    \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/d8cdfee5df80_add_skipped_to_userfilestatus.py",
    "content": "\"\"\"add skipped to userfilestatus\n\nRevision ID: d8cdfee5df80\nRevises: 1d78c0ca7853\nCreate Date: 2026-04-01 10:47:12.593950\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d8cdfee5df80\"\ndown_revision = \"1d78c0ca7853\"\nbranch_labels = None\ndepends_on = None\n\n\nTABLE = \"user_file\"\nCOLUMN = \"status\"\nCONSTRAINT_NAME = \"ck_user_file_status\"\n\nOLD_VALUES = (\"PROCESSING\", \"INDEXING\", \"COMPLETED\", \"FAILED\", \"CANCELED\", \"DELETING\")\nNEW_VALUES = (\n    \"PROCESSING\",\n    \"INDEXING\",\n    \"COMPLETED\",\n    \"SKIPPED\",\n    \"FAILED\",\n    \"CANCELED\",\n    \"DELETING\",\n)\n\n\ndef _drop_status_check_constraint() -> None:\n    inspector = sa.inspect(op.get_bind())\n    for constraint in inspector.get_check_constraints(TABLE):\n        if COLUMN in constraint.get(\"sqltext\", \"\"):\n            constraint_name = constraint[\"name\"]\n            if constraint_name is not None:\n                op.drop_constraint(constraint_name, TABLE, type_=\"check\")\n\n\ndef upgrade() -> None:\n    _drop_status_check_constraint()\n    in_clause = \", \".join(f\"'{v}'\" for v in NEW_VALUES)\n    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f\"{COLUMN} IN ({in_clause})\")\n\n\ndef downgrade() -> None:\n    op.execute(f\"UPDATE {TABLE} SET {COLUMN} = 'COMPLETED' WHERE {COLUMN} = 'SKIPPED'\")\n    _drop_status_check_constraint()\n    in_clause = \", \".join(f\"'{v}'\" for v in OLD_VALUES)\n    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f\"{COLUMN} IN ({in_clause})\")\n"
  },
  {
    "path": "backend/alembic/versions/d929f0c1c6af_feedback_feature.py",
    "content": "\"\"\"Feedback Feature\n\nRevision ID: d929f0c1c6af\nRevises: 8aabb57f3b49\nCreate Date: 2023-08-27 13:03:54.274987\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d929f0c1c6af\"\ndown_revision = \"8aabb57f3b49\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"query_event\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"query\", sa.String(), nullable=False),\n        sa.Column(\n            \"selected_search_flow\",\n            sa.Enum(\"KEYWORD\", \"SEMANTIC\", name=\"searchtype\", native_enum=False),\n            nullable=True,\n        ),\n        sa.Column(\"llm_answer\", sa.String(), nullable=True),\n        sa.Column(\n            \"feedback\",\n            sa.Enum(\"LIKE\", \"DISLIKE\", name=\"qafeedbacktype\", native_enum=False),\n            nullable=True,\n        ),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"document_retrieval_feedback\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"qa_event_id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.Column(\"document_rank\", sa.Integer(), nullable=False),\n        sa.Column(\"clicked\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"feedback\",\n            sa.Enum(\n                \"ENDORSE\",\n                \"REJECT\",\n                \"HIDE\",\n                \"UNHIDE\",\n                name=\"searchfeedbacktype\",\n                native_enum=False,\n            ),\n            nullable=True,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"],\n            [\"document.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"qa_event_id\"],\n            [\"query_event.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.add_column(\"document\", sa.Column(\"boost\", sa.Integer(), nullable=False))\n    op.add_column(\"document\", sa.Column(\"hidden\", sa.Boolean(), nullable=False))\n    op.add_column(\"document\", sa.Column(\"semantic_id\", sa.String(), nullable=False))\n    op.add_column(\"document\", sa.Column(\"link\", sa.String(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"document\", \"link\")\n    op.drop_column(\"document\", \"semantic_id\")\n    op.drop_column(\"document\", \"hidden\")\n    op.drop_column(\"document\", \"boost\")\n    op.drop_table(\"document_retrieval_feedback\")\n    op.drop_table(\"query_event\")\n"
  },
  {
    "path": "backend/alembic/versions/d961aca62eb3_update_status_length.py",
    "content": "\"\"\"Update status length\n\nRevision ID: d961aca62eb3\nRevises: cf90764725d8\nCreate Date: 2025-03-23 16:10:05.683965\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d961aca62eb3\"\ndown_revision = \"cf90764725d8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Drop the existing enum type constraint\n    op.execute(\"ALTER TABLE connector_credential_pair ALTER COLUMN status TYPE varchar\")\n\n    # Create new enum type with all values\n    op.execute(\n        \"ALTER TABLE connector_credential_pair ALTER COLUMN status TYPE VARCHAR(20) USING status::varchar(20)\"\n    )\n\n    # Update the enum type to include all possible values\n    op.alter_column(\n        \"connector_credential_pair\",\n        \"status\",\n        type_=sa.Enum(\n            \"SCHEDULED\",\n            \"INITIAL_INDEXING\",\n            \"ACTIVE\",\n            \"PAUSED\",\n            \"DELETING\",\n            \"INVALID\",\n            name=\"connectorcredentialpairstatus\",\n            native_enum=False,\n        ),\n        existing_type=sa.String(20),\n        nullable=False,\n    )\n\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"in_repeated_error_state\", sa.Boolean, default=False, server_default=\"false\"\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # no need to convert back to the old enum type, since we're not using it anymore\n    op.drop_column(\"connector_credential_pair\", \"in_repeated_error_state\")\n"
  },
  {
    "path": "backend/alembic/versions/d9ec13955951_remove__dim_suffix_from_model_name.py",
    "content": "\"\"\"Remove _alt suffix from model_name\n\nRevision ID: d9ec13955951\nRevises: da4c21c69164\nCreate Date: 2024-08-20 16:31:32.955686\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"d9ec13955951\"\ndown_revision = \"da4c21c69164\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE embedding_model\n        SET model_name = regexp_replace(model_name, '__danswer_alt_index$', '')\n        WHERE model_name LIKE '%__danswer_alt_index'\n    \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    # We can't reliably add the __danswer_alt_index suffix back, so we'll leave this empty\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py",
    "content": "\"\"\"migrate jira connectors to new format\n\nRevision ID: da42808081e3\nRevises: f13db29f3101\nCreate Date: 2025-02-24 11:24:54.396040\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport json\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.jira.utils import extract_jira_project\n\n\n# revision identifiers, used by Alembic.\nrevision = \"da42808081e3\"\ndown_revision = \"f13db29f3101\"\nbranch_labels = None\ndepends_on = None\n\n\nPRESERVED_CONFIG_KEYS = [\"comment_email_blacklist\", \"batch_size\", \"labels_to_skip\"]\n\n\ndef upgrade() -> None:\n    # Get all Jira connectors\n    conn = op.get_bind()\n\n    # First get all Jira connectors\n    jira_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = :source\n            \"\"\"\n        ),\n        {\"source\": DocumentSource.JIRA.value.upper()},\n    ).fetchall()\n\n    # Update each connector's config\n    for connector_id, old_config in jira_connectors:\n        if not old_config:\n            continue\n\n        # Extract project key from URL if it exists\n        new_config: dict[str, str | None] = {}\n        if project_url := old_config.get(\"jira_project_url\"):\n            # Parse the URL to get base and project\n            try:\n                jira_base, project_key = extract_jira_project(project_url)\n                new_config = {\"jira_base_url\": jira_base, \"project_key\": project_key}\n            except ValueError:\n                # If URL parsing fails, just use the URL as the base\n                new_config = {\n                    \"jira_base_url\": project_url.split(\"/projects/\")[0],\n                    \"project_key\": None,\n                }\n        else:\n            # For connectors without a project URL, we need admin intervention\n            # Mark these for review\n            print(\n                f\"WARNING: Jira connector {connector_id} has no project URL configured\"\n            )\n            continue\n        for old_key in PRESERVED_CONFIG_KEYS:\n            if old_key in old_config:\n                new_config[old_key] = old_config[old_key]\n\n        # Update the connector config\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE connector\n                SET connector_specific_config = :new_config\n                WHERE id = :id\n                \"\"\"\n            ),\n            {\"id\": connector_id, \"new_config\": json.dumps(new_config)},\n        )\n\n\ndef downgrade() -> None:\n    # Get all Jira connectors\n    conn = op.get_bind()\n\n    # First get all Jira connectors\n    jira_connectors = conn.execute(\n        sa.text(\n            \"\"\"\n            SELECT id, connector_specific_config\n            FROM connector\n            WHERE source = :source\n            \"\"\"\n        ),\n        {\"source\": DocumentSource.JIRA.value.upper()},\n    ).fetchall()\n\n    # Update each connector's config back to the old format\n    for connector_id, new_config in jira_connectors:\n        if not new_config:\n            continue\n\n        old_config = {}\n        base_url = new_config.get(\"jira_base_url\")\n        project_key = new_config.get(\"project_key\")\n\n        if base_url and project_key:\n            old_config = {\"jira_project_url\": f\"{base_url}/projects/{project_key}\"}\n        elif base_url:\n            old_config = {\"jira_project_url\": base_url}\n        else:\n            continue\n\n        for old_key in PRESERVED_CONFIG_KEYS:\n            if old_key in new_config:\n                old_config[old_key] = new_config[old_key]\n\n        # Update the connector config\n        conn.execute(\n            sa.text(\n                \"\"\"\n                UPDATE connector\n                SET connector_specific_config = :old_config\n                WHERE id = :id\n                \"\"\"\n            ),\n            {\"id\": connector_id, \"old_config\": json.dumps(old_config)},\n        )\n"
  },
  {
    "path": "backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py",
    "content": "\"\"\"chosen_assistants changed to jsonb\n\nRevision ID: da4c21c69164\nRevises: c5b692fa265c\nCreate Date: 2024-08-18 19:06:47.291491\n\n\"\"\"\n\nimport json\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"da4c21c69164\"\ndown_revision = \"c5b692fa265c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    conn = op.get_bind()\n    existing_ids_and_chosen_assistants = conn.execute(\n        sa.text('select id, chosen_assistants from \"user\"')\n    )\n    op.drop_column(\n        \"user\",\n        \"chosen_assistants\",\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"chosen_assistants\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n    for id, chosen_assistants in existing_ids_and_chosen_assistants:\n        conn.execute(\n            sa.text(\n                'update \"user\" set chosen_assistants = :chosen_assistants where id = :id'\n            ),\n            {\"chosen_assistants\": json.dumps(chosen_assistants), \"id\": id},\n        )\n\n\ndef downgrade() -> None:\n    conn = op.get_bind()\n    existing_ids_and_chosen_assistants = conn.execute(\n        sa.text('select id, chosen_assistants from \"user\"')\n    )\n    op.drop_column(\n        \"user\",\n        \"chosen_assistants\",\n    )\n    op.add_column(\n        \"user\",\n        sa.Column(\"chosen_assistants\", postgresql.ARRAY(sa.Integer()), nullable=True),\n    )\n    for id, chosen_assistants in existing_ids_and_chosen_assistants:\n        conn.execute(\n            sa.text(\n                'update \"user\" set chosen_assistants = :chosen_assistants where id = :id'\n            ),\n            {\"chosen_assistants\": chosen_assistants, \"id\": id},\n        )\n"
  },
  {
    "path": "backend/alembic/versions/dab04867cd88_add_composite_index_to_document_by_.py",
    "content": "\"\"\"Add composite index to document_by_connector_credential_pair\n\nRevision ID: dab04867cd88\nRevises: 54a74a0417fc\nCreate Date: 2024-12-13 22:43:20.119990\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"dab04867cd88\"\ndown_revision = \"54a74a0417fc\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Composite index on (connector_id, credential_id)\n    op.create_index(\n        \"idx_document_cc_pair_connector_credential\",\n        \"document_by_connector_credential_pair\",\n        [\"connector_id\", \"credential_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        \"idx_document_cc_pair_connector_credential\",\n        table_name=\"document_by_connector_credential_pair\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/dba7f71618f5_onyx_custom_tool_flow.py",
    "content": "\"\"\"Onyx Custom Tool Flow\n\nRevision ID: dba7f71618f5\nRevises: d5645c915d0e\nCreate Date: 2023-09-18 15:18:37.370972\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"dba7f71618f5\"\ndown_revision = \"d5645c915d0e\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"persona\",\n        sa.Column(\"retrieval_enabled\", sa.Boolean(), nullable=True),\n    )\n    op.execute(\"UPDATE persona SET retrieval_enabled = true\")\n    op.alter_column(\"persona\", \"retrieval_enabled\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"retrieval_enabled\")\n"
  },
  {
    "path": "backend/alembic/versions/dbaa756c2ccf_embedding_models.py",
    "content": "\"\"\"Embedding Models\n\nRevision ID: dbaa756c2ccf\nRevises: 7f726bad5367\nCreate Date: 2024-01-25 17:12:31.813160\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy import table, column, String, Integer, Boolean\n\nfrom onyx.configs.model_configs import ASYM_PASSAGE_PREFIX\nfrom onyx.configs.model_configs import ASYM_QUERY_PREFIX\nfrom onyx.configs.model_configs import DOC_EMBEDDING_DIM\nfrom onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL\nfrom onyx.configs.model_configs import NORMALIZE_EMBEDDINGS\nfrom onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL\nfrom onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM\nfrom onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.search_settings import user_has_overridden_embedding_model\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.natural_language_processing.search_nlp_models import clean_model_name\n\n# revision identifiers, used by Alembic.\nrevision = \"dbaa756c2ccf\"\ndown_revision = \"7f726bad5367\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef _get_old_default_embedding_model() -> IndexingSetting:\n    is_overridden = user_has_overridden_embedding_model()\n    return IndexingSetting(\n        model_name=(\n            DOCUMENT_ENCODER_MODEL\n            if is_overridden\n            else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL\n        ),\n        model_dim=(\n            DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM\n        ),\n        embedding_precision=(EmbeddingPrecision.FLOAT),\n        normalize=(\n            NORMALIZE_EMBEDDINGS\n            if is_overridden\n            else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS\n        ),\n        query_prefix=(ASYM_QUERY_PREFIX if is_overridden else \"\"),\n        passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else \"\"),\n        index_name=\"danswer_chunk\",\n        multipass_indexing=False,\n        enable_contextual_rag=False,\n        api_url=None,\n    )\n\n\ndef _get_new_default_embedding_model() -> IndexingSetting:\n    return IndexingSetting(\n        model_name=DOCUMENT_ENCODER_MODEL,\n        model_dim=DOC_EMBEDDING_DIM,\n        embedding_precision=(EmbeddingPrecision.BFLOAT16),\n        normalize=NORMALIZE_EMBEDDINGS,\n        query_prefix=ASYM_QUERY_PREFIX,\n        passage_prefix=ASYM_PASSAGE_PREFIX,\n        index_name=f\"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}\",\n        multipass_indexing=False,\n        enable_contextual_rag=False,\n        api_url=None,\n    )\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"embedding_model\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"model_name\", sa.String(), nullable=False),\n        sa.Column(\"model_dim\", sa.Integer(), nullable=False),\n        sa.Column(\"normalize\", sa.Boolean(), nullable=False),\n        sa.Column(\"query_prefix\", sa.String(), nullable=False),\n        sa.Column(\"passage_prefix\", sa.String(), nullable=False),\n        sa.Column(\"index_name\", sa.String(), nullable=False),\n        sa.Column(\n            \"status\",\n            sa.Enum(IndexModelStatus, native=False),\n            nullable=False,\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    # since all index attempts must be associated with an embedding model,\n    # need to put something in here to avoid nulls. On server startup,\n    # this value will be overriden\n    EmbeddingModel = table(\n        \"embedding_model\",\n        column(\"id\", Integer),\n        column(\"model_name\", String),\n        column(\"model_dim\", Integer),\n        column(\"normalize\", Boolean),\n        column(\"query_prefix\", String),\n        column(\"passage_prefix\", String),\n        column(\"index_name\", String),\n        column(\n            \"status\", sa.Enum(IndexModelStatus, name=\"indexmodelstatus\", native=False)\n        ),\n    )\n    # insert an embedding model row that corresponds to the embedding model\n    # the user selected via env variables before this change. This is needed since\n    # all index_attempts must be associated with an embedding model, so without this\n    # we will run into violations of non-null contraints\n    old_embedding_model = _get_old_default_embedding_model()\n    op.bulk_insert(\n        EmbeddingModel,\n        [\n            {\n                \"model_name\": old_embedding_model.model_name,\n                \"model_dim\": old_embedding_model.model_dim,\n                \"normalize\": old_embedding_model.normalize,\n                \"query_prefix\": old_embedding_model.query_prefix,\n                \"passage_prefix\": old_embedding_model.passage_prefix,\n                \"index_name\": old_embedding_model.index_name,\n                \"status\": IndexModelStatus.PRESENT,\n            }\n        ],\n    )\n    # if the user has not overridden the default embedding model via env variables,\n    # insert the new default model into the database to auto-upgrade them\n    if not user_has_overridden_embedding_model():\n        new_embedding_model = _get_new_default_embedding_model()\n        op.bulk_insert(\n            EmbeddingModel,\n            [\n                {\n                    \"model_name\": new_embedding_model.model_name,\n                    \"model_dim\": new_embedding_model.model_dim,\n                    \"normalize\": new_embedding_model.normalize,\n                    \"query_prefix\": new_embedding_model.query_prefix,\n                    \"passage_prefix\": new_embedding_model.passage_prefix,\n                    \"index_name\": new_embedding_model.index_name,\n                    \"status\": IndexModelStatus.FUTURE,\n                }\n            ],\n        )\n\n    op.add_column(\n        \"index_attempt\",\n        sa.Column(\"embedding_model_id\", sa.Integer(), nullable=True),\n    )\n    op.execute(\n        \"UPDATE index_attempt SET embedding_model_id=1 WHERE embedding_model_id IS NULL\"\n    )\n    op.alter_column(\n        \"index_attempt\",\n        \"embedding_model_id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n    op.create_foreign_key(\n        \"index_attempt__embedding_model_fk\",\n        \"index_attempt\",\n        \"embedding_model\",\n        [\"embedding_model_id\"],\n        [\"id\"],\n    )\n    op.create_index(\n        \"ix_embedding_model_present_unique\",\n        \"embedding_model\",\n        [\"status\"],\n        unique=True,\n        postgresql_where=sa.text(\"status = 'PRESENT'\"),\n    )\n    op.create_index(\n        \"ix_embedding_model_future_unique\",\n        \"embedding_model\",\n        [\"status\"],\n        unique=True,\n        postgresql_where=sa.text(\"status = 'FUTURE'\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\n        \"index_attempt__embedding_model_fk\", \"index_attempt\", type_=\"foreignkey\"\n    )\n    op.drop_column(\"index_attempt\", \"embedding_model_id\")\n    op.drop_table(\"embedding_model\")\n    op.execute(\"DROP TYPE IF EXISTS indexmodelstatus;\")\n"
  },
  {
    "path": "backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py",
    "content": "\"\"\"Added deletion_attempt table\n\nRevision ID: df0c7ad8a076\nRevises: d7111c1238cd\nCreate Date: 2023-08-05 13:35:39.609619\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"df0c7ad8a076\"\ndown_revision = \"d7111c1238cd\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.execute(\"DROP TABLE IF EXISTS document CASCADE\")\n    op.create_table(\n        \"document\",\n        sa.Column(\"id\", sa.String(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.execute(\"DROP TABLE IF EXISTS chunk CASCADE\")\n    op.create_table(\n        \"chunk\",\n        sa.Column(\"id\", sa.String(), nullable=False),\n        sa.Column(\n            \"document_store_type\",\n            sa.Enum(\n                \"VECTOR\",\n                \"KEYWORD\",\n                name=\"documentstoretype\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"document_id\", sa.String(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"document_id\"],\n            [\"document.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\", \"document_store_type\"),\n    )\n    op.execute(\"DROP TABLE IF EXISTS deletion_attempt CASCADE\")\n    op.create_table(\n        \"deletion_attempt\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"credential_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"status\",\n            sa.Enum(\n                \"NOT_STARTED\",\n                \"IN_PROGRESS\",\n                \"SUCCESS\",\n                \"FAILED\",\n                name=\"deletionstatus\",\n                native_enum=False,\n            ),\n            nullable=False,\n        ),\n        sa.Column(\"num_docs_deleted\", sa.Integer(), nullable=False),\n        sa.Column(\"error_msg\", sa.String(), nullable=True),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\n            \"time_updated\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"connector_id\"],\n            [\"connector.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"credential_id\"],\n            [\"credential.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.execute(\"DROP TABLE IF EXISTS document_by_connector_credential_pair CASCADE\")\n    op.create_table(\n        \"document_by_connector_credential_pair\",\n        sa.Column(\"id\", sa.String(), nullable=False),\n        sa.Column(\"connector_id\", sa.Integer(), nullable=False),\n        sa.Column(\"credential_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"connector_id\"],\n            [\"connector.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"credential_id\"],\n            [\"credential.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"id\"],\n            [\"document.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\", \"connector_id\", \"credential_id\"),\n    )\n\n\ndef downgrade() -> None:\n    # upstream tables first\n    op.drop_table(\"document_by_connector_credential_pair\")\n    op.drop_table(\"deletion_attempt\")\n    op.drop_table(\"chunk\")\n\n    # Alembic op.drop_table() has no \"cascade\" flag – issue raw SQL\n    op.execute(\"DROP TABLE IF EXISTS document CASCADE\")\n"
  },
  {
    "path": "backend/alembic/versions/df46c75b714e_add_default_vision_provider_to_llm_.py",
    "content": "\"\"\"add_default_vision_provider_to_llm_provider\n\nRevision ID: df46c75b714e\nRevises: 3934b1bc7b62\nCreate Date: 2025-03-11 16:20:19.038945\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"df46c75b714e\"\ndown_revision = \"3934b1bc7b62\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"llm_provider\",\n        sa.Column(\n            \"is_default_vision_provider\",\n            sa.Boolean(),\n            nullable=True,\n            server_default=sa.false(),\n        ),\n    )\n    op.add_column(\n        \"llm_provider\", sa.Column(\"default_vision_model\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"llm_provider\", \"default_vision_model\")\n    op.drop_column(\"llm_provider\", \"is_default_vision_provider\")\n"
  },
  {
    "path": "backend/alembic/versions/dfbe9e93d3c7_extended_role_for_non_web.py",
    "content": "\"\"\"extended_role_for_non_web\n\nRevision ID: dfbe9e93d3c7\nRevises: 9cf5c00f72fe\nCreate Date: 2024-11-16 07:54:18.727906\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"dfbe9e93d3c7\"\ndown_revision = \"9cf5c00f72fe\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.execute(\n        \"\"\"\n        UPDATE \"user\"\n        SET role = 'EXT_PERM_USER'\n        WHERE has_web_login = false\n    \"\"\"\n    )\n    op.drop_column(\"user\", \"has_web_login\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"has_web_login\", sa.Boolean(), nullable=False, server_default=\"true\"),\n    )\n\n    op.execute(\n        \"\"\"\n        UPDATE \"user\"\n        SET has_web_login = false,\n            role = 'BASIC'\n        WHERE role IN ('SLACK_USER', 'EXT_PERM_USER')\n    \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/e0a68a81d434_add_chat_feedback.py",
    "content": "\"\"\"Add Chat Feedback\n\nRevision ID: e0a68a81d434\nRevises: ae62505e3acc\nCreate Date: 2023-10-04 20:22:33.380286\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"e0a68a81d434\"\ndown_revision = \"ae62505e3acc\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"chat_feedback\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"chat_message_chat_session_id\", sa.Integer(), nullable=False),\n        sa.Column(\"chat_message_message_number\", sa.Integer(), nullable=False),\n        sa.Column(\"chat_message_edit_number\", sa.Integer(), nullable=False),\n        sa.Column(\"is_positive\", sa.Boolean(), nullable=True),\n        sa.Column(\"feedback_text\", sa.Text(), nullable=True),\n        sa.ForeignKeyConstraint(\n            [\n                \"chat_message_chat_session_id\",\n                \"chat_message_message_number\",\n                \"chat_message_edit_number\",\n            ],\n            [\n                \"chat_message.chat_session_id\",\n                \"chat_message.message_number\",\n                \"chat_message.edit_number\",\n            ],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"chat_feedback\")\n"
  },
  {
    "path": "backend/alembic/versions/e1392f05e840_added_input_prompts.py",
    "content": "\"\"\"Added input prompts\n\nRevision ID: e1392f05e840\nRevises: 08a1eda20fe1\nCreate Date: 2024-07-13 19:09:22.556224\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"e1392f05e840\"\ndown_revision = \"08a1eda20fe1\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"inputprompt\",\n        sa.Column(\"id\", sa.Integer(), autoincrement=True, nullable=False),\n        sa.Column(\"prompt\", sa.String(), nullable=False),\n        sa.Column(\"content\", sa.String(), nullable=False),\n        sa.Column(\"active\", sa.Boolean(), nullable=False),\n        sa.Column(\"is_public\", sa.Boolean(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=True,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n    op.create_table(\n        \"inputprompt__user\",\n        sa.Column(\"input_prompt_id\", sa.Integer(), nullable=False),\n        sa.Column(\"user_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"input_prompt_id\"],\n            [\"inputprompt.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"inputprompt.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"input_prompt_id\", \"user_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"inputprompt__user\")\n    op.drop_table(\"inputprompt\")\n"
  },
  {
    "path": "backend/alembic/versions/e209dc5a8156_added_prune_frequency.py",
    "content": "\"\"\"added-prune-frequency\n\nRevision ID: e209dc5a8156\nRevises: 48d14957fe80\nCreate Date: 2024-06-16 16:02:35.273231\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nrevision = \"e209dc5a8156\"\ndown_revision = \"48d14957fe80\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"connector\", sa.Column(\"prune_freq\", sa.Integer(), nullable=True))\n\n\ndef downgrade() -> None:\n    op.drop_column(\"connector\", \"prune_freq\")\n"
  },
  {
    "path": "backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py",
    "content": "\"\"\"add_deployment_name_to_llmprovider\n\nRevision ID: e4334d5b33ba\nRevises: ac5eaac849f9\nCreate Date: 2024-10-04 09:52:34.896867\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"e4334d5b33ba\"\ndown_revision = \"ac5eaac849f9\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"llm_provider\", sa.Column(\"deployment_name\", sa.String(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"llm_provider\", \"deployment_name\")\n"
  },
  {
    "path": "backend/alembic/versions/e50154680a5c_no_source_enum.py",
    "content": "\"\"\"No Source Enum\n\nRevision ID: e50154680a5c\nRevises: fcd135795f21\nCreate Date: 2024-03-14 18:06:08.523106\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom onyx.configs.constants import DocumentSource\n\n# revision identifiers, used by Alembic.\nrevision = \"e50154680a5c\"\ndown_revision = \"fcd135795f21\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.alter_column(\n        \"search_doc\",\n        \"source_type\",\n        type_=sa.String(length=50),\n        existing_type=sa.Enum(DocumentSource, native_enum=False),\n        existing_nullable=False,\n    )\n    op.execute(\"DROP TYPE IF EXISTS documentsource\")\n\n\ndef downgrade() -> None:\n    op.alter_column(\n        \"search_doc\",\n        \"source_type\",\n        type_=sa.Enum(DocumentSource, native_enum=False),\n        existing_type=sa.String(length=50),\n        existing_nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py",
    "content": "\"\"\"Add index for retrieving latest index_attempt\n\nRevision ID: e6a4bbc13fe4\nRevises: b082fec533f0\nCreate Date: 2023-08-10 12:37:23.335471\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"e6a4bbc13fe4\"\ndown_revision = \"b082fec533f0\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        op.f(\"ix_index_attempt_latest_for_connector_credential_pair\"),\n        \"index_attempt\",\n        [\"connector_id\", \"credential_id\", \"time_created\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        op.f(\"ix_index_attempt_latest_for_connector_credential_pair\"),\n        table_name=\"index_attempt\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py",
    "content": "\"\"\"create_anonymous_user\n\nThis migration creates a permanent anonymous user in the database.\nWhen anonymous access is enabled, unauthenticated requests will use this user\ninstead of returning user_id=NULL.\n\nRevision ID: e7f8a9b0c1d2\nRevises: f7ca3e2f45d9\nCreate Date: 2026-01-15 14:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"e7f8a9b0c1d2\"\ndown_revision = \"f7ca3e2f45d9\"\nbranch_labels = None\ndepends_on = None\n\n# Must match constants in onyx/configs/constants.py file\nANONYMOUS_USER_UUID = \"00000000-0000-0000-0000-000000000002\"\nANONYMOUS_USER_EMAIL = \"anonymous@onyx.app\"\n\n# Tables with user_id foreign key that may need migration\nTABLES_WITH_USER_ID = [\n    \"chat_session\",\n    \"credential\",\n    \"document_set\",\n    \"persona\",\n    \"tool\",\n    \"notification\",\n    \"inputprompt\",\n]\n\n\ndef _dedupe_null_notifications(connection: sa.Connection) -> None:\n    # Multiple NULL-owned notifications can exist because the unique index treats\n    # NULL user_id values as distinct. Before migrating them to the anonymous\n    # user, collapse duplicates and remove rows that would conflict with an\n    # already-existing anonymous notification.\n    result = connection.execute(\n        sa.text(\n            \"\"\"\n            WITH ranked_null_notifications AS (\n                SELECT\n                    id,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY notif_type, COALESCE(additional_data, '{}'::jsonb)\n                        ORDER BY first_shown DESC, last_shown DESC, id DESC\n                    ) AS row_num\n                FROM notification\n                WHERE user_id IS NULL\n            )\n            DELETE FROM notification\n            WHERE id IN (\n                SELECT id\n                FROM ranked_null_notifications\n                WHERE row_num > 1\n            )\n            \"\"\"\n        )\n    )\n    if result.rowcount > 0:\n        print(f\"Deleted {result.rowcount} duplicate NULL-owned notifications\")\n\n    result = connection.execute(\n        sa.text(\n            \"\"\"\n            DELETE FROM notification AS null_owned\n            USING notification AS anonymous_owned\n            WHERE null_owned.user_id IS NULL\n              AND anonymous_owned.user_id = :user_id\n              AND null_owned.notif_type = anonymous_owned.notif_type\n              AND COALESCE(null_owned.additional_data, '{}'::jsonb) =\n                  COALESCE(anonymous_owned.additional_data, '{}'::jsonb)\n            \"\"\"\n        ),\n        {\"user_id\": ANONYMOUS_USER_UUID},\n    )\n    if result.rowcount > 0:\n        print(\n            f\"Deleted {result.rowcount} NULL-owned notifications that conflict with existing anonymous-owned notifications\"\n        )\n\n\ndef upgrade() -> None:\n    \"\"\"\n    Create the anonymous user for anonymous access feature.\n    Also migrates any remaining user_id=NULL records to the anonymous user.\n    \"\"\"\n    connection = op.get_bind()\n\n    # Create the anonymous user (using ON CONFLICT to be idempotent)\n    connection.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO \"user\" (id, email, hashed_password, is_active, is_superuser, is_verified, role)\n            VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)\n            ON CONFLICT (id) DO NOTHING\n            \"\"\"\n        ),\n        {\n            \"id\": ANONYMOUS_USER_UUID,\n            \"email\": ANONYMOUS_USER_EMAIL,\n            \"hashed_password\": \"\",  # Empty password - user cannot log in directly\n            \"is_active\": True,  # Active so it can be used for anonymous access\n            \"is_superuser\": False,\n            \"is_verified\": True,  # Verified since no email verification needed\n            \"role\": \"LIMITED\",  # Anonymous users have limited role to restrict access\n        },\n    )\n\n    # Migrate any remaining user_id=NULL records to anonymous user\n    for table in TABLES_WITH_USER_ID:\n        # Dedup notifications outside the savepoint so deletions persist\n        # even if the subsequent UPDATE rolls back\n        if table == \"notification\":\n            _dedupe_null_notifications(connection)\n\n        with connection.begin_nested():\n            # Exclude public credential (id=0) which must remain user_id=NULL\n            # Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL\n            # Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL\n            # Exclude system input prompts (is_public=True with user_id=NULL) which must remain user_id=NULL\n            if table == \"credential\":\n                condition = \"user_id IS NULL AND id != 0\"\n            elif table == \"tool\":\n                condition = \"user_id IS NULL AND in_code_tool_id IS NULL\"\n            elif table == \"persona\":\n                condition = \"user_id IS NULL AND builtin_persona = false\"\n            elif table == \"inputprompt\":\n                condition = \"user_id IS NULL AND is_public = false\"\n            else:\n                condition = \"user_id IS NULL\"\n\n            result = connection.execute(\n                sa.text(\n                    f\"\"\"\n                    UPDATE \"{table}\"\n                    SET user_id = :user_id\n                    WHERE {condition}\n                    \"\"\"\n                ),\n                {\"user_id\": ANONYMOUS_USER_UUID},\n            )\n            if result.rowcount > 0:\n                print(f\"Updated {result.rowcount} rows in {table} to anonymous user\")\n\n\ndef downgrade() -> None:\n    \"\"\"\n    Set anonymous user's records back to NULL and delete the anonymous user.\n\n    Note: Duplicate NULL-owned notifications removed during upgrade are not restored.\n    \"\"\"\n    connection = op.get_bind()\n\n    # Set records back to NULL\n    for table in TABLES_WITH_USER_ID:\n        with connection.begin_nested():\n            connection.execute(\n                sa.text(\n                    f\"\"\"\n                    UPDATE \"{table}\"\n                    SET user_id = NULL\n                    WHERE user_id = :user_id\n                    \"\"\"\n                ),\n                {\"user_id\": ANONYMOUS_USER_UUID},\n            )\n\n    # Delete the anonymous user\n    connection.execute(\n        sa.text('DELETE FROM \"user\" WHERE id = :user_id'),\n        {\"user_id\": ANONYMOUS_USER_UUID},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py",
    "content": "\"\"\"Add persona to chat_session\n\nRevision ID: e86866a9c78a\nRevises: 80696cf850ae\nCreate Date: 2023-11-26 02:51:47.657357\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"e86866a9c78a\"\ndown_revision = \"80696cf850ae\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\"chat_session\", sa.Column(\"persona_id\", sa.Integer(), nullable=True))\n    op.create_foreign_key(\n        \"fk_chat_session_persona_id\", \"chat_session\", \"persona\", [\"persona_id\"], [\"id\"]\n    )\n\n\ndef downgrade() -> None:\n    op.drop_constraint(\"fk_chat_session_persona_id\", \"chat_session\", type_=\"foreignkey\")\n    op.drop_column(\"chat_session\", \"persona_id\")\n"
  },
  {
    "path": "backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py",
    "content": "\"\"\"add status to mcp server and make auth fields nullable\n\nRevision ID: e8f0d2a38171\nRevises: ed9e44312505\nCreate Date: 2025-11-28 11:15:37.667340\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom onyx.db.enums import (\n    MCPTransport,\n    MCPAuthenticationType,\n    MCPAuthenticationPerformer,\n    MCPServerStatus,\n)\n\n# revision identifiers, used by Alembic.\nrevision = \"e8f0d2a38171\"\ndown_revision = \"ed9e44312505\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Make auth fields nullable\n    op.alter_column(\n        \"mcp_server\",\n        \"transport\",\n        existing_type=sa.Enum(MCPTransport, name=\"mcp_transport\", native_enum=False),\n        nullable=True,\n    )\n\n    op.alter_column(\n        \"mcp_server\",\n        \"auth_type\",\n        existing_type=sa.Enum(\n            MCPAuthenticationType, name=\"mcp_authentication_type\", native_enum=False\n        ),\n        nullable=True,\n    )\n\n    op.alter_column(\n        \"mcp_server\",\n        \"auth_performer\",\n        existing_type=sa.Enum(\n            MCPAuthenticationPerformer,\n            name=\"mcp_authentication_performer\",\n            native_enum=False,\n        ),\n        nullable=True,\n    )\n\n    # Add status column with default\n    op.add_column(\n        \"mcp_server\",\n        sa.Column(\n            \"status\",\n            sa.Enum(MCPServerStatus, name=\"mcp_server_status\", native_enum=False),\n            nullable=False,\n            server_default=\"CREATED\",\n        ),\n    )\n\n    # For existing records, mark status as CONNECTED\n    bind = op.get_bind()\n    bind.execute(\n        sa.text(\n            \"\"\"\n        UPDATE mcp_server\n        SET status = 'CONNECTED'\n        WHERE status != 'CONNECTED'\n        and admin_connection_config_id IS NOT NULL\n        \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    # Remove status column\n    op.drop_column(\"mcp_server\", \"status\")\n\n    # Make auth fields non-nullable (set defaults first)\n    op.execute(\n        \"UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL\"\n    )\n    op.execute(\"UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL\")\n    op.execute(\n        \"UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL\"\n    )\n\n    op.alter_column(\n        \"mcp_server\",\n        \"transport\",\n        existing_type=sa.Enum(MCPTransport, name=\"mcp_transport\", native_enum=False),\n        nullable=False,\n    )\n    op.alter_column(\n        \"mcp_server\",\n        \"auth_type\",\n        existing_type=sa.Enum(\n            MCPAuthenticationType, name=\"mcp_authentication_type\", native_enum=False\n        ),\n        nullable=False,\n    )\n    op.alter_column(\n        \"mcp_server\",\n        \"auth_performer\",\n        existing_type=sa.Enum(\n            MCPAuthenticationPerformer,\n            name=\"mcp_authentication_performer\",\n            native_enum=False,\n        ),\n        nullable=False,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py",
    "content": "\"\"\"Private Personas DocumentSets\n\nRevision ID: e91df4e935ef\nRevises: 91fd3b470d1a\nCreate Date: 2024-03-17 11:47:24.675881\n\n\"\"\"\n\nimport fastapi_users_db_sqlalchemy\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"e91df4e935ef\"\ndown_revision = \"91fd3b470d1a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"document_set__user\",\n        sa.Column(\"document_set_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_set_id\"],\n            [\"document_set.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"document_set_id\", \"user_id\"),\n    )\n    op.create_table(\n        \"persona__user\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_id\",\n            fastapi_users_db_sqlalchemy.generics.GUID(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"],\n            [\"user.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"user_id\"),\n    )\n    op.create_table(\n        \"document_set__user_group\",\n        sa.Column(\"document_set_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_group_id\",\n            sa.Integer(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"document_set_id\"],\n            [\"document_set.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"document_set_id\", \"user_group_id\"),\n    )\n    op.create_table(\n        \"persona__user_group\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"user_group_id\",\n            sa.Integer(),\n            nullable=False,\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_group_id\"],\n            [\"user_group.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"user_group_id\"),\n    )\n\n    op.add_column(\n        \"document_set\",\n        sa.Column(\"is_public\", sa.Boolean(), nullable=True),\n    )\n    # fill in is_public for existing rows\n    op.execute(\"UPDATE document_set SET is_public = true WHERE is_public IS NULL\")\n    op.alter_column(\"document_set\", \"is_public\", nullable=False)\n\n    op.add_column(\n        \"persona\",\n        sa.Column(\"is_public\", sa.Boolean(), nullable=True),\n    )\n    # fill in is_public for existing rows\n    op.execute(\"UPDATE persona SET is_public = true WHERE is_public IS NULL\")\n    op.alter_column(\"persona\", \"is_public\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"persona\", \"is_public\")\n\n    op.drop_column(\"document_set\", \"is_public\")\n\n    op.drop_table(\"persona__user\")\n    op.drop_table(\"document_set__user\")\n    op.drop_table(\"persona__user_group\")\n    op.drop_table(\"document_set__user_group\")\n"
  },
  {
    "path": "backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py",
    "content": "\"\"\"add default slack channel config\n\nRevision ID: eaa3b5593925\nRevises: 98a5008d8711\nCreate Date: 2025-02-03 18:07:56.552526\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"eaa3b5593925\"\ndown_revision = \"98a5008d8711\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add is_default column\n    op.add_column(\n        \"slack_channel_config\",\n        sa.Column(\"is_default\", sa.Boolean(), nullable=False, server_default=\"false\"),\n    )\n\n    op.create_index(\n        \"ix_slack_channel_config_slack_bot_id_default\",\n        \"slack_channel_config\",\n        [\"slack_bot_id\", \"is_default\"],\n        unique=True,\n        postgresql_where=sa.text(\"is_default IS TRUE\"),\n    )\n\n    # Create default channel configs for existing slack bots without one\n    conn = op.get_bind()\n    slack_bots = conn.execute(sa.text(\"SELECT id FROM slack_bot\")).fetchall()\n\n    for slack_bot in slack_bots:\n        slack_bot_id = slack_bot[0]\n        existing_default = conn.execute(\n            sa.text(\n                \"SELECT id FROM slack_channel_config WHERE slack_bot_id = :bot_id AND is_default = TRUE\"\n            ),\n            {\"bot_id\": slack_bot_id},\n        ).fetchone()\n\n        if not existing_default:\n            conn.execute(\n                sa.text(\n                    \"\"\"\n                    INSERT INTO slack_channel_config (\n                        slack_bot_id, persona_id, channel_config, enable_auto_filters, is_default\n                    ) VALUES (\n                        :bot_id, NULL,\n                        '{\"channel_name\": null, '\n                        '\"respond_member_group_list\": [], '\n                        '\"answer_filters\": [], '\n                        '\"follow_up_tags\": [], '\n                        '\"respond_tag_only\": true}',\n                        FALSE, TRUE\n                    )\n                \"\"\"\n                ),\n                {\"bot_id\": slack_bot_id},\n            )\n\n\ndef downgrade() -> None:\n    # Delete default slack channel configs\n    conn = op.get_bind()\n    conn.execute(sa.text(\"DELETE FROM slack_channel_config WHERE is_default = TRUE\"))\n\n    # Remove index\n    op.drop_index(\n        \"ix_slack_channel_config_slack_bot_id_default\",\n        table_name=\"slack_channel_config\",\n    )\n\n    # Remove is_default column\n    op.drop_column(\"slack_channel_config\", \"is_default\")\n"
  },
  {
    "path": "backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py",
    "content": "\"\"\"Index From Beginning\n\nRevision ID: ec3ec2eabf7b\nRevises: dbaa756c2ccf\nCreate Date: 2024-02-06 22:03:28.098158\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"ec3ec2eabf7b\"\ndown_revision = \"dbaa756c2ccf\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"index_attempt\", sa.Column(\"from_beginning\", sa.Boolean(), nullable=True)\n    )\n    op.execute(\"UPDATE index_attempt SET from_beginning = False\")\n    op.alter_column(\"index_attempt\", \"from_beginning\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"index_attempt\", \"from_beginning\")\n"
  },
  {
    "path": "backend/alembic/versions/ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py",
    "content": "\"\"\"Remove Last Attempt Status from CC Pair\n\nRevision ID: ec85f2b3c544\nRevises: 3879338f8ba1\nCreate Date: 2024-05-23 21:39:46.126010\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"ec85f2b3c544\"\ndown_revision = \"70f00c45c0f2\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.drop_column(\"connector_credential_pair\", \"last_attempt_status\")\n\n\ndef downgrade() -> None:\n    op.add_column(\n        \"connector_credential_pair\",\n        sa.Column(\n            \"last_attempt_status\",\n            sa.VARCHAR(),\n            autoincrement=False,\n            nullable=True,\n        ),\n    )\n"
  },
  {
    "path": "backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py",
    "content": "\"\"\"Add overrides to the chat session\n\nRevision ID: ecab2b3f1a3b\nRevises: 38eda64af7fe\nCreate Date: 2024-04-01 19:08:21.359102\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"ecab2b3f1a3b\"\ndown_revision = \"38eda64af7fe\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\n            \"llm_override\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"chat_session\",\n        sa.Column(\n            \"prompt_override\",\n            postgresql.JSONB(astext_type=sa.Text()),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_session\", \"prompt_override\")\n    op.drop_column(\"chat_session\", \"llm_override\")\n"
  },
  {
    "path": "backend/alembic/versions/ed9e44312505_add_icon_name_field.py",
    "content": "\"\"\"Add icon_name field\n\nRevision ID: ed9e44312505\nRevises: 5e6f7a8b9c0d\nCreate Date: 2025-12-03 16:35:07.828393\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"ed9e44312505\"\ndown_revision = \"5e6f7a8b9c0d\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add icon_name column\n    op.add_column(\"persona\", sa.Column(\"icon_name\", sa.String(), nullable=True))\n\n    # Remove old icon columns\n    op.drop_column(\"persona\", \"icon_shape\")\n    op.drop_column(\"persona\", \"icon_color\")\n\n\ndef downgrade() -> None:\n    # Re-add old icon columns\n    op.add_column(\"persona\", sa.Column(\"icon_color\", sa.String(), nullable=True))\n    op.add_column(\"persona\", sa.Column(\"icon_shape\", sa.Integer(), nullable=True))\n\n    # Remove icon_name column\n    op.drop_column(\"persona\", \"icon_name\")\n"
  },
  {
    "path": "backend/alembic/versions/ee3f4b47fad5_added_alternate_model_to_chat_message.py",
    "content": "\"\"\"Added alternate model to chat message\n\nRevision ID: ee3f4b47fad5\nRevises: 2d2304e27d8c\nCreate Date: 2024-08-12 00:11:50.915845\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"ee3f4b47fad5\"\ndown_revision = \"2d2304e27d8c\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"overridden_model\", sa.String(length=255), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"overridden_model\")\n"
  },
  {
    "path": "backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py",
    "content": "\"\"\"Add files to ChatMessage\n\nRevision ID: ef7da92f7213\nRevises: 401c1ac29467\nCreate Date: 2024-04-28 16:59:33.199153\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"ef7da92f7213\"\ndown_revision = \"401c1ac29467\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"files\", postgresql.JSONB(astext_type=sa.Text()), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_message\", \"files\")\n"
  },
  {
    "path": "backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py",
    "content": "\"\"\"standard answer match_regex flag\n\nRevision ID: efb35676026c\nRevises: 0ebb1d516877\nCreate Date: 2024-09-11 13:55:46.101149\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"efb35676026c\"\ndown_revision = \"0ebb1d516877\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.add_column(\n        \"standard_answer\",\n        sa.Column(\n            \"match_regex\", sa.Boolean(), nullable=False, server_default=sa.false()\n        ),\n    )\n    # ### end Alembic commands ###\n\n\ndef downgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.drop_column(\"standard_answer\", \"match_regex\")\n    # ### end Alembic commands ###\n"
  },
  {
    "path": "backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py",
    "content": "\"\"\"force lowercase all users\n\nRevision ID: f11b408e39d3\nRevises: 3bd4c84fe72f\nCreate Date: 2025-02-26 17:04:55.683500\n\n\"\"\"\n\n# revision identifiers, used by Alembic.\nrevision = \"f11b408e39d3\"\ndown_revision = \"3bd4c84fe72f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1) Convert all existing user emails to lowercase\n    from alembic import op\n\n    op.execute(\n        \"\"\"\n        UPDATE \"user\"\n        SET email = LOWER(email)\n        \"\"\"\n    )\n\n    # 2) Add a check constraint to ensure emails are always lowercase\n    op.create_check_constraint(\"ensure_lowercase_email\", \"user\", \"email = LOWER(email)\")\n\n\ndef downgrade() -> None:\n    # Drop the check constraint\n    from alembic import op\n\n    op.drop_constraint(\"ensure_lowercase_email\", \"user\", type_=\"check\")\n"
  },
  {
    "path": "backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py",
    "content": "\"\"\"Add composite index for last_modified and last_synced to document\n\nRevision ID: f13db29f3101\nRevises: b388730a2899\nCreate Date: 2025-02-18 22:48:11.511389\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"f13db29f3101\"\ndown_revision = \"acaab4ef4507\"\nbranch_labels: str | None = None\ndepends_on: str | None = None\n\n\ndef upgrade() -> None:\n    op.create_index(\n        \"ix_document_sync_status\",\n        \"document\",\n        [\"last_modified\", \"last_synced\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\"ix_document_sync_status\", table_name=\"document\")\n"
  },
  {
    "path": "backend/alembic/versions/f17bf3b0d9f1_embedding_provider_by_provider_type.py",
    "content": "\"\"\"embedding provider by provider type\n\nRevision ID: f17bf3b0d9f1\nRevises: 351faebd379d\nCreate Date: 2024-08-21 13:13:31.120460\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f17bf3b0d9f1\"\ndown_revision = \"351faebd379d\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Add provider_type column to embedding_provider\n    op.add_column(\n        \"embedding_provider\",\n        sa.Column(\"provider_type\", sa.String(50), nullable=True),\n    )\n\n    # Update provider_type with existing name values\n    op.execute(\"UPDATE embedding_provider SET provider_type = UPPER(name)\")\n\n    # Make provider_type not nullable\n    op.alter_column(\"embedding_provider\", \"provider_type\", nullable=False)\n\n    # Drop the foreign key constraint in embedding_model table\n    op.drop_constraint(\n        \"fk_embedding_model_cloud_provider\", \"embedding_model\", type_=\"foreignkey\"\n    )\n\n    # Drop the existing primary key constraint\n    op.drop_constraint(\"embedding_provider_pkey\", \"embedding_provider\", type_=\"primary\")\n\n    # Create a new primary key constraint on provider_type\n    op.create_primary_key(\n        \"embedding_provider_pkey\", \"embedding_provider\", [\"provider_type\"]\n    )\n\n    # Add provider_type column to embedding_model\n    op.add_column(\n        \"embedding_model\",\n        sa.Column(\"provider_type\", sa.String(50), nullable=True),\n    )\n\n    # Update provider_type for existing embedding models\n    op.execute(\n        \"\"\"\n        UPDATE embedding_model\n        SET provider_type = (\n            SELECT provider_type\n            FROM embedding_provider\n            WHERE embedding_provider.id = embedding_model.cloud_provider_id\n        )\n    \"\"\"\n    )\n\n    # Drop the old id column from embedding_provider\n    op.drop_column(\"embedding_provider\", \"id\")\n\n    # Drop the name column from embedding_provider\n    op.drop_column(\"embedding_provider\", \"name\")\n\n    # Drop the default_model_id column from embedding_provider\n    op.drop_column(\"embedding_provider\", \"default_model_id\")\n\n    # Drop the old cloud_provider_id column from embedding_model\n    op.drop_column(\"embedding_model\", \"cloud_provider_id\")\n\n    # Create the new foreign key constraint\n    op.create_foreign_key(\n        \"fk_embedding_model_cloud_provider\",\n        \"embedding_model\",\n        \"embedding_provider\",\n        [\"provider_type\"],\n        [\"provider_type\"],\n    )\n\n\ndef downgrade() -> None:\n    # Drop the foreign key constraint in embedding_model table\n    op.drop_constraint(\n        \"fk_embedding_model_cloud_provider\", \"embedding_model\", type_=\"foreignkey\"\n    )\n\n    # Add back the cloud_provider_id column to embedding_model\n    op.add_column(\n        \"embedding_model\", sa.Column(\"cloud_provider_id\", sa.Integer(), nullable=True)\n    )\n    op.add_column(\"embedding_provider\", sa.Column(\"id\", sa.Integer(), nullable=True))\n\n    # Assign incrementing IDs to embedding providers\n    op.execute(\n        \"\"\"\n        CREATE SEQUENCE IF NOT EXISTS embedding_provider_id_seq;\"\"\"\n    )\n    op.execute(\n        \"\"\"\n        UPDATE embedding_provider SET id = nextval('embedding_provider_id_seq');\n    \"\"\"\n    )\n\n    # Update cloud_provider_id based on provider_type\n    op.execute(\n        \"\"\"\n        UPDATE embedding_model\n        SET cloud_provider_id = CASE\n            WHEN provider_type IS NULL THEN NULL\n            ELSE (\n                SELECT id\n                FROM embedding_provider\n                WHERE embedding_provider.provider_type = embedding_model.provider_type\n            )\n        END\n    \"\"\"\n    )\n\n    # Drop the provider_type column from embedding_model\n    op.drop_column(\"embedding_model\", \"provider_type\")\n\n    # Add back the columns to embedding_provider\n    op.add_column(\"embedding_provider\", sa.Column(\"name\", sa.String(50), nullable=True))\n    op.add_column(\n        \"embedding_provider\", sa.Column(\"default_model_id\", sa.Integer(), nullable=True)\n    )\n\n    # Drop the existing primary key constraint on provider_type\n    op.drop_constraint(\"embedding_provider_pkey\", \"embedding_provider\", type_=\"primary\")\n\n    # Create the original primary key constraint on id\n    op.create_primary_key(\"embedding_provider_pkey\", \"embedding_provider\", [\"id\"])\n\n    # Update name with existing provider_type values\n    op.execute(\n        \"\"\"\n        UPDATE embedding_provider\n        SET name = CASE\n            WHEN provider_type = 'OPENAI' THEN 'OpenAI'\n            WHEN provider_type = 'COHERE' THEN 'Cohere'\n            WHEN provider_type = 'GOOGLE' THEN 'Google'\n            WHEN provider_type = 'VOYAGE' THEN 'Voyage'\n            ELSE provider_type\n        END\n    \"\"\"\n    )\n\n    # Drop the provider_type column from embedding_provider\n    op.drop_column(\"embedding_provider\", \"provider_type\")\n\n    # Recreate the foreign key constraint in embedding_model table\n    op.create_foreign_key(\n        \"fk_embedding_model_cloud_provider\",\n        \"embedding_model\",\n        \"embedding_provider\",\n        [\"cloud_provider_id\"],\n        [\"id\"],\n    )\n\n    # Recreate the foreign key constraint in embedding_model table\n    op.create_foreign_key(\n        \"fk_embedding_provider_default_model\",\n        \"embedding_provider\",\n        \"embedding_model\",\n        [\"default_model_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py",
    "content": "\"\"\"Add pre-defined feedback\n\nRevision ID: f1c6478c3fd8\nRevises: 643a84a42a33\nCreate Date: 2024-05-09 18:11:49.210667\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nrevision = \"f1c6478c3fd8\"\ndown_revision = \"643a84a42a33\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"chat_feedback\",\n        sa.Column(\"predefined_feedback\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"chat_feedback\", \"predefined_feedback\")\n"
  },
  {
    "path": "backend/alembic/versions/f1ca58b2f2ec_add_passthrough_auth_to_tool.py",
    "content": "\"\"\"add passthrough auth to tool\n\nRevision ID: f1ca58b2f2ec\nRevises: c7bf5721733e\nCreate Date: 2024-03-19\n\n\"\"\"\n\nfrom typing import Sequence, Union\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision: str = \"f1ca58b2f2ec\"\ndown_revision: Union[str, None] = \"c7bf5721733e\"\nbranch_labels: Union[str, Sequence[str], None] = None\ndepends_on: Union[str, Sequence[str], None] = None\n\n\ndef upgrade() -> None:\n    # Add passthrough_auth column to tool table with default value of False\n    op.add_column(\n        \"tool\",\n        sa.Column(\n            \"passthrough_auth\", sa.Boolean(), nullable=False, server_default=sa.false()\n        ),\n    )\n\n\ndef downgrade() -> None:\n    # Remove passthrough_auth column from tool table\n    op.drop_column(\"tool\", \"passthrough_auth\")\n"
  },
  {
    "path": "backend/alembic/versions/f220515df7b4_add_flow_mapping_table.py",
    "content": "\"\"\"Add flow mapping table\n\nRevision ID: f220515df7b4\nRevises: cbc03e08d0f3\nCreate Date: 2026-01-30 12:21:24.955922\n\n\"\"\"\n\nfrom onyx.db.enums import LLMModelFlowType\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f220515df7b4\"\ndown_revision = \"9d1543a37106\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"llm_model_flow\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\n            \"llm_model_flow_type\",\n            sa.Enum(LLMModelFlowType, name=\"llmmodelflowtype\", native_enum=False),\n            nullable=False,\n        ),\n        sa.Column(\n            \"is_default\", sa.Boolean(), nullable=False, server_default=sa.text(\"false\")\n        ),\n        sa.Column(\"model_configuration_id\", sa.Integer(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.ForeignKeyConstraint(\n            [\"model_configuration_id\"], [\"model_configuration.id\"], ondelete=\"CASCADE\"\n        ),\n        sa.UniqueConstraint(\n            \"llm_model_flow_type\",\n            \"model_configuration_id\",\n            name=\"uq_model_config_per_llm_model_flow_type\",\n        ),\n    )\n\n    # Partial unique index so that there is at most one default for each flow type\n    op.create_index(\n        \"ix_one_default_per_llm_model_flow\",\n        \"llm_model_flow\",\n        [\"llm_model_flow_type\"],\n        unique=True,\n        postgresql_where=sa.text(\"is_default IS TRUE\"),\n    )\n\n\ndef downgrade() -> None:\n    # Drop the llm_model_flow table (index is dropped automatically with table)\n    op.drop_table(\"llm_model_flow\")\n"
  },
  {
    "path": "backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py",
    "content": "\"\"\"add custom headers to tools\n\nRevision ID: f32615f71aeb\nRevises: bd2921608c3a\nCreate Date: 2024-09-12 20:26:38.932377\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"f32615f71aeb\"\ndown_revision = \"bd2921608c3a\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"tool\", sa.Column(\"custom_headers\", postgresql.JSONB(), nullable=True)\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"tool\", \"custom_headers\")\n"
  },
  {
    "path": "backend/alembic/versions/f39c5794c10a_add_background_errors_table.py",
    "content": "\"\"\"Add background errors table\n\nRevision ID: f39c5794c10a\nRevises: 2cdeff6d8c93\nCreate Date: 2025-02-12 17:11:14.527876\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"f39c5794c10a\"\ndown_revision = \"2cdeff6d8c93\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"background_error\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"message\", sa.String(), nullable=False),\n        sa.Column(\n            \"time_created\",\n            sa.DateTime(timezone=True),\n            server_default=sa.text(\"now()\"),\n            nullable=False,\n        ),\n        sa.Column(\"cc_pair_id\", sa.Integer(), nullable=True),\n        sa.PrimaryKeyConstraint(\"id\"),\n        sa.ForeignKeyConstraint(\n            [\"cc_pair_id\"],\n            [\"connector_credential_pair.id\"],\n            ondelete=\"CASCADE\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"background_error\")\n"
  },
  {
    "path": "backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py",
    "content": "\"\"\"delete non-search assistants\n\nRevision ID: f5437cc136c5\nRevises: eaa3b5593925\nCreate Date: 2025-02-04 16:17:15.677256\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f5437cc136c5\"\ndown_revision = \"eaa3b5593925\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    pass\n\n\ndef downgrade() -> None:\n    # Fix: split the statements into multiple op.execute() calls\n    op.execute(\n        \"\"\"\n        WITH personas_without_search AS (\n            SELECT p.id\n            FROM persona p\n            LEFT JOIN persona__tool pt ON p.id = pt.persona_id\n            LEFT JOIN tool t ON pt.tool_id = t.id\n            GROUP BY p.id\n            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0\n        )\n        UPDATE slack_channel_config\n        SET persona_id = NULL\n        WHERE is_default = TRUE AND persona_id IN (SELECT id FROM personas_without_search)\n        \"\"\"\n    )\n\n    op.execute(\n        \"\"\"\n        WITH personas_without_search AS (\n            SELECT p.id\n            FROM persona p\n            LEFT JOIN persona__tool pt ON p.id = pt.persona_id\n            LEFT JOIN tool t ON pt.tool_id = t.id\n            GROUP BY p.id\n            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0\n        )\n        DELETE FROM slack_channel_config\n        WHERE is_default = FALSE AND persona_id IN (SELECT id FROM personas_without_search)\n        \"\"\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/f71470ba9274_add_prompt_length_limit.py",
    "content": "\"\"\"add prompt length limit\n\nRevision ID: f71470ba9274\nRevises: 6a804aeb4830\nCreate Date: 2025-04-01 15:07:14.977435\n\n\"\"\"\n\n# revision identifiers, used by Alembic.\nrevision = \"f71470ba9274\"\ndown_revision = \"6a804aeb4830\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # op.alter_column(\n    #     \"prompt\",\n    #     \"system_prompt\",\n    #     existing_type=sa.TEXT(),\n    #     type_=sa.String(length=8000),\n    #     existing_nullable=False,\n    # )\n    # op.alter_column(\n    #     \"prompt\",\n    #     \"task_prompt\",\n    #     existing_type=sa.TEXT(),\n    #     type_=sa.String(length=8000),\n    #     existing_nullable=False,\n    # )\n    pass\n\n\ndef downgrade() -> None:\n    # op.alter_column(\n    #     \"prompt\",\n    #     \"system_prompt\",\n    #     existing_type=sa.String(length=8000),\n    #     type_=sa.TEXT(),\n    #     existing_nullable=False,\n    # )\n    # op.alter_column(\n    #     \"prompt\",\n    #     \"task_prompt\",\n    #     existing_type=sa.String(length=8000),\n    #     type_=sa.TEXT(),\n    #     existing_nullable=False,\n    # )\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/f7505c5b0284_updated_constraints_for_ccpairs.py",
    "content": "\"\"\"updated constraints for ccpairs\n\nRevision ID: f7505c5b0284\nRevises: f71470ba9274\nCreate Date: 2025-04-01 17:50:42.504818\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f7505c5b0284\"\ndown_revision = \"f71470ba9274\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1) Drop the old foreign-key constraints\n    op.drop_constraint(\n        \"document_by_connector_credential_pair_connector_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"document_by_connector_credential_pair_credential_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        type_=\"foreignkey\",\n    )\n\n    # 2) Re-add them with ondelete='CASCADE'\n    op.create_foreign_key(\n        \"document_by_connector_credential_pair_connector_id_fkey\",\n        source_table=\"document_by_connector_credential_pair\",\n        referent_table=\"connector\",\n        local_cols=[\"connector_id\"],\n        remote_cols=[\"id\"],\n        ondelete=\"CASCADE\",\n    )\n    op.create_foreign_key(\n        \"document_by_connector_credential_pair_credential_id_fkey\",\n        source_table=\"document_by_connector_credential_pair\",\n        referent_table=\"credential\",\n        local_cols=[\"credential_id\"],\n        remote_cols=[\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n\ndef downgrade() -> None:\n    # Reverse the changes for rollback\n    op.drop_constraint(\n        \"document_by_connector_credential_pair_connector_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        type_=\"foreignkey\",\n    )\n    op.drop_constraint(\n        \"document_by_connector_credential_pair_credential_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        type_=\"foreignkey\",\n    )\n\n    # Recreate without CASCADE\n    op.create_foreign_key(\n        \"document_by_connector_credential_pair_connector_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        \"connector\",\n        [\"connector_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"document_by_connector_credential_pair_credential_id_fkey\",\n        \"document_by_connector_credential_pair\",\n        \"credential\",\n        [\"credential_id\"],\n        [\"id\"],\n    )\n"
  },
  {
    "path": "backend/alembic/versions/f7a894b06d02_non_nullbale_slack_bot_id_in_channel_.py",
    "content": "\"\"\"non-nullbale slack bot id in channel config\n\nRevision ID: f7a894b06d02\nRevises: 9f696734098f\nCreate Date: 2024-12-06 12:55:42.845723\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f7a894b06d02\"\ndown_revision = \"9f696734098f\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Delete all rows with null slack_bot_id\n    op.execute(\"DELETE FROM slack_channel_config WHERE slack_bot_id IS NULL\")\n\n    # Make slack_bot_id non-nullable\n    op.alter_column(\n        \"slack_channel_config\",\n        \"slack_bot_id\",\n        existing_type=sa.Integer(),\n        nullable=False,\n    )\n\n\ndef downgrade() -> None:\n    # Make slack_bot_id nullable again\n    op.alter_column(\n        \"slack_channel_config\",\n        \"slack_bot_id\",\n        existing_type=sa.Integer(),\n        nullable=True,\n    )\n"
  },
  {
    "path": "backend/alembic/versions/f7ca3e2f45d9_migrate_no_auth_data_to_placeholder.py",
    "content": "\"\"\"migrate_no_auth_data_to_placeholder\n\nThis migration handles the transition from AUTH_TYPE=disabled to requiring\nauthentication. It creates a placeholder user and assigns all data that was\ncreated without a user (user_id=NULL) to this placeholder.\n\nA database trigger is installed that automatically transfers all data from\nthe placeholder user to the first real user who registers, then drops itself.\n\nRevision ID: f7ca3e2f45d9\nRevises: 78ebc66946a0\nCreate Date: 2026-01-15 12:49:53.802741\n\n\"\"\"\n\nimport os\n\nfrom alembic import op\nimport sqlalchemy as sa\n\nfrom shared_configs.configs import MULTI_TENANT\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f7ca3e2f45d9\"\ndown_revision = \"78ebc66946a0\"\nbranch_labels = None\ndepends_on = None\n\n# Must match constants in onyx/configs/constants.py file\nNO_AUTH_PLACEHOLDER_USER_UUID = \"00000000-0000-0000-0000-000000000001\"\nNO_AUTH_PLACEHOLDER_USER_EMAIL = \"no-auth-placeholder@onyx.app\"\n\n# Trigger and function names\nTRIGGER_NAME = \"trg_migrate_no_auth_data\"\nFUNCTION_NAME = \"migrate_no_auth_data_to_user\"\n\n# Trigger function that migrates data from placeholder to first real user\nMIGRATE_NO_AUTH_TRIGGER_FUNCTION = f\"\"\"\nCREATE OR REPLACE FUNCTION {FUNCTION_NAME}()\nRETURNS TRIGGER AS $$\nDECLARE\n    placeholder_uuid UUID := '00000000-0000-0000-0000-000000000001'::uuid;\n    anonymous_uuid UUID := '00000000-0000-0000-0000-000000000002'::uuid;\n    placeholder_row RECORD;\n    schema_name TEXT;\nBEGIN\n    -- Skip if this is the placeholder user being inserted\n    IF NEW.id = placeholder_uuid THEN\n        RETURN NULL;\n    END IF;\n\n    -- Skip if this is the anonymous user being inserted (not a real user)\n    IF NEW.id = anonymous_uuid THEN\n        RETURN NULL;\n    END IF;\n\n    -- Skip if the new user is not active\n    IF NEW.is_active = FALSE THEN\n        RETURN NULL;\n    END IF;\n\n    -- Get current schema for self-cleanup\n    schema_name := current_schema();\n\n    -- Try to lock the placeholder user row with FOR UPDATE SKIP LOCKED\n    -- This ensures only one concurrent transaction can proceed with migration\n    -- SKIP LOCKED means if another transaction has the lock, we skip (don't wait)\n    SELECT id INTO placeholder_row\n    FROM \"user\"\n    WHERE id = placeholder_uuid\n    FOR UPDATE SKIP LOCKED;\n\n    IF NOT FOUND THEN\n        -- Either placeholder doesn't exist or another transaction has it locked\n        -- Either way, drop the trigger and return without making admin\n        EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I.\"user\"', schema_name);\n        EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);\n        RETURN NULL;\n    END IF;\n\n    -- We have exclusive lock on placeholder - proceed with migration\n    -- The INSERT has already completed (AFTER INSERT), so NEW.id exists in the table\n\n    -- Migrate chat_session\n    UPDATE \"chat_session\" SET user_id = NEW.id WHERE user_id = placeholder_uuid;\n\n    -- Migrate credential (exclude public credential id=0)\n    UPDATE \"credential\" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND id != 0;\n\n    -- Migrate document_set\n    UPDATE \"document_set\" SET user_id = NEW.id WHERE user_id = placeholder_uuid;\n\n    -- Migrate persona (exclude builtin personas)\n    UPDATE \"persona\" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND builtin_persona = FALSE;\n\n    -- Migrate tool (exclude builtin tools)\n    UPDATE \"tool\" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND in_code_tool_id IS NULL;\n\n    -- Migrate notification\n    UPDATE \"notification\" SET user_id = NEW.id WHERE user_id = placeholder_uuid;\n\n    -- Migrate inputprompt (exclude system/public prompts)\n    UPDATE \"inputprompt\" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND is_public = FALSE;\n\n    -- Make the new user an admin (they had admin access in no-auth mode)\n    -- In AFTER INSERT trigger, we must UPDATE the row since it already exists\n    UPDATE \"user\" SET role = 'ADMIN' WHERE id = NEW.id;\n\n    -- Delete the placeholder user (we hold the lock so this is safe)\n    DELETE FROM \"user\" WHERE id = placeholder_uuid;\n\n    -- Drop the trigger and function (self-cleanup)\n    EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I.\"user\"', schema_name);\n    EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);\n\n    RETURN NULL;\nEND;\n$$ LANGUAGE plpgsql;\n\"\"\"\n\nMIGRATE_NO_AUTH_TRIGGER = f\"\"\"\nCREATE TRIGGER {TRIGGER_NAME}\nAFTER INSERT ON \"user\"\nFOR EACH ROW\nEXECUTE FUNCTION {FUNCTION_NAME}();\n\"\"\"\n\n\ndef upgrade() -> None:\n    \"\"\"\n    Create a placeholder user and assign all NULL user_id records to it.\n    Install a trigger that migrates data to the first real user and self-destructs.\n    Only runs if AUTH_TYPE is currently disabled/none.\n\n    Skipped in multi-tenant mode - each tenant starts fresh with no legacy data.\n    \"\"\"\n    # Skip in multi-tenant mode - this migration handles single-tenant\n    # AUTH_TYPE=disabled -> auth transitions only\n    if MULTI_TENANT:\n        return\n\n    # Only run if AUTH_TYPE is currently disabled/none\n    # If they've already switched to auth-enabled, NULL data is stale anyway\n    auth_type = (os.environ.get(\"AUTH_TYPE\") or \"\").lower()\n    if auth_type not in (\"disabled\", \"none\", \"\"):\n        print(f\"AUTH_TYPE is '{auth_type}', not disabled. Skipping migration.\")\n        return\n\n    connection = op.get_bind()\n\n    # Check if there are any NULL user_id records that need migration\n    tables_to_check = [\n        \"chat_session\",\n        \"credential\",\n        \"document_set\",\n        \"persona\",\n        \"tool\",\n        \"notification\",\n        \"inputprompt\",\n    ]\n\n    has_null_records = False\n    for table in tables_to_check:\n        try:\n            result = connection.execute(\n                sa.text(f'SELECT 1 FROM \"{table}\" WHERE user_id IS NULL LIMIT 1')\n            )\n            if result.fetchone():\n                has_null_records = True\n                break\n        except Exception:\n            # Table might not exist\n            pass\n\n    if not has_null_records:\n        return\n\n    # Create the placeholder user\n    connection.execute(\n        sa.text(\n            \"\"\"\n            INSERT INTO \"user\" (id, email, hashed_password, is_active, is_superuser, is_verified, role)\n            VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)\n            \"\"\"\n        ),\n        {\n            \"id\": NO_AUTH_PLACEHOLDER_USER_UUID,\n            \"email\": NO_AUTH_PLACEHOLDER_USER_EMAIL,\n            \"hashed_password\": \"\",  # Empty password - user cannot log in\n            \"is_active\": False,  # Inactive - user cannot log in\n            \"is_superuser\": False,\n            \"is_verified\": False,\n            \"role\": \"BASIC\",\n        },\n    )\n\n    # Assign NULL user_id records to the placeholder user\n    for table in tables_to_check:\n        try:\n            # Base condition for all tables\n            condition = \"user_id IS NULL\"\n            # Exclude public credential (id=0) which must remain user_id=NULL\n            if table == \"credential\":\n                condition += \" AND id != 0\"\n            # Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL\n            elif table == \"tool\":\n                condition += \" AND in_code_tool_id IS NULL\"\n            # Exclude builtin personas which must remain user_id=NULL\n            elif table == \"persona\":\n                condition += \" AND builtin_persona = FALSE\"\n            # Exclude system/public input prompts which must remain user_id=NULL\n            elif table == \"inputprompt\":\n                condition += \" AND is_public = FALSE\"\n            result = connection.execute(\n                sa.text(\n                    f\"\"\"\n                    UPDATE \"{table}\"\n                    SET user_id = :user_id\n                    WHERE {condition}\n                    \"\"\"\n                ),\n                {\"user_id\": NO_AUTH_PLACEHOLDER_USER_UUID},\n            )\n            if result.rowcount > 0:\n                print(f\"Updated {result.rowcount} rows in {table}\")\n        except Exception as e:\n            print(f\"Skipping {table}: {e}\")\n\n    # Install the trigger function and trigger for automatic migration on first user registration\n    connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER_FUNCTION))\n    connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER))\n    print(\"Installed trigger for automatic data migration on first user registration\")\n\n\ndef downgrade() -> None:\n    \"\"\"\n    Drop trigger and function, set placeholder user's records back to NULL,\n    and delete the placeholder user.\n    \"\"\"\n    # Skip in multi-tenant mode for consistency with upgrade\n    if MULTI_TENANT:\n        return\n\n    connection = op.get_bind()\n\n    # Drop trigger and function if they exist (they may have already self-destructed)\n    connection.execute(sa.text(f'DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON \"user\"'))\n    connection.execute(sa.text(f\"DROP FUNCTION IF EXISTS {FUNCTION_NAME}()\"))\n\n    tables_to_update = [\n        \"chat_session\",\n        \"credential\",\n        \"document_set\",\n        \"persona\",\n        \"tool\",\n        \"notification\",\n        \"inputprompt\",\n    ]\n\n    # Set records back to NULL\n    for table in tables_to_update:\n        try:\n            connection.execute(\n                sa.text(\n                    f\"\"\"\n                    UPDATE \"{table}\"\n                    SET user_id = NULL\n                    WHERE user_id = :user_id\n                    \"\"\"\n                ),\n                {\"user_id\": NO_AUTH_PLACEHOLDER_USER_UUID},\n            )\n        except Exception:\n            pass\n\n    # Delete the placeholder user\n    connection.execute(\n        sa.text('DELETE FROM \"user\" WHERE id = :user_id'),\n        {\"user_id\": NO_AUTH_PLACEHOLDER_USER_UUID},\n    )\n"
  },
  {
    "path": "backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py",
    "content": "\"\"\"add has_web_login column to user\n\nRevision ID: f7e58d357687\nRevises: ba98eba0f66a\nCreate Date: 2024-09-07 20:20:54.522620\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"f7e58d357687\"\ndown_revision = \"ba98eba0f66a\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\"has_web_login\", sa.Boolean(), nullable=False, server_default=\"true\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"has_web_login\")\n"
  },
  {
    "path": "backend/alembic/versions/f8a9b2c3d4e5_add_research_answer_purpose_to_chat_message.py",
    "content": "\"\"\"add research_answer_purpose to chat_message\n\nRevision ID: f8a9b2c3d4e5\nRevises: 5ae8240accb3\nCreate Date: 2025-01-27 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f8a9b2c3d4e5\"\ndown_revision = \"5ae8240accb3\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add research_answer_purpose column to chat_message table\n    op.add_column(\n        \"chat_message\",\n        sa.Column(\"research_answer_purpose\", sa.String(), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    # Remove research_answer_purpose column from chat_message table\n    op.drop_column(\"chat_message\", \"research_answer_purpose\")\n"
  },
  {
    "path": "backend/alembic/versions/f9b8c7d6e5a4_update_parent_question_id_foreign_key_to_research_agent_iteration.py",
    "content": "\"\"\"remove foreign key constraints from research_agent_iteration_sub_step\n\nRevision ID: f9b8c7d6e5a4\nRevises: bd7c3bf8beba\nCreate Date: 2025-01-27 12:00:00.000000\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"f9b8c7d6e5a4\"\ndown_revision = \"bd7c3bf8beba\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Drop the existing foreign key constraint for parent_question_id\n    op.drop_constraint(\n        \"research_agent_iteration_sub_step_parent_question_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        type_=\"foreignkey\",\n    )\n\n    # Drop the parent_question_id column entirely\n    op.drop_column(\"research_agent_iteration_sub_step\", \"parent_question_id\")\n\n    # Drop the foreign key constraint for primary_question_id to chat_message.id\n    # (keep the column as it's needed for the composite foreign key)\n    op.drop_constraint(\n        \"research_agent_iteration_sub_step_primary_question_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        type_=\"foreignkey\",\n    )\n\n\ndef downgrade() -> None:\n    # Restore the foreign key constraint for primary_question_id to chat_message.id\n    op.create_foreign_key(\n        \"research_agent_iteration_sub_step_primary_question_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        \"chat_message\",\n        [\"primary_question_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n\n    # Add back the parent_question_id column\n    op.add_column(\n        \"research_agent_iteration_sub_step\",\n        sa.Column(\n            \"parent_question_id\",\n            sa.Integer(),\n            nullable=True,\n        ),\n    )\n\n    # Restore the foreign key constraint pointing to research_agent_iteration_sub_step.id\n    op.create_foreign_key(\n        \"research_agent_iteration_sub_step_parent_question_id_fkey\",\n        \"research_agent_iteration_sub_step\",\n        \"research_agent_iteration_sub_step\",\n        [\"parent_question_id\"],\n        [\"id\"],\n        ondelete=\"CASCADE\",\n    )\n"
  },
  {
    "path": "backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py",
    "content": "\"\"\"Delete Tags with wrong Enum\n\nRevision ID: fad14119fb92\nRevises: 72bdc9929a46\nCreate Date: 2024-04-25 17:05:09.695703\n\n\"\"\"\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"fad14119fb92\"\ndown_revision = \"72bdc9929a46\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    # Some documents may lose their tags but this is the only way as the enum\n    # mapping may have changed since tag switched to string (it will be reindexed anyway)\n    op.execute(\n        \"\"\"\n        DELETE FROM document__tag\n        WHERE tag_id IN (\n            SELECT id FROM tag\n            WHERE source ~ '^[0-9]+$'\n        )\n        \"\"\"\n    )\n\n    op.execute(\n        \"\"\"\n        DELETE FROM tag\n        WHERE source ~ '^[0-9]+$'\n        \"\"\"\n    )\n\n\ndef downgrade() -> None:\n    pass\n"
  },
  {
    "path": "backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py",
    "content": "\"\"\"add chat_background to user\n\nRevision ID: fb80bdd256de\nRevises: 8b5ce697290e\nCreate Date: 2026-01-16 16:15:59.222617\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"fb80bdd256de\"\ndown_revision = \"8b5ce697290e\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"user\",\n        sa.Column(\n            \"chat_background\",\n            sa.String(),\n            nullable=True,\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user\", \"chat_background\")\n"
  },
  {
    "path": "backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py",
    "content": "\"\"\"Add slack bot display type\n\nRevision ID: fcd135795f21\nRevises: 0a2b51deb0b8\nCreate Date: 2024-03-04 17:03:27.116284\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"fcd135795f21\"\ndown_revision = \"0a2b51deb0b8\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"slack_bot_config\",\n        sa.Column(\n            \"response_type\",\n            sa.Enum(\n                \"QUOTES\",\n                \"CITATIONS\",\n                name=\"slackbotresponsetype\",\n                native_enum=False,\n            ),\n            nullable=True,\n        ),\n    )\n    op.execute(\n        \"UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL\"\n    )\n    op.alter_column(\"slack_bot_config\", \"response_type\", nullable=False)\n\n\ndef downgrade() -> None:\n    op.drop_column(\"slack_bot_config\", \"response_type\")\n"
  },
  {
    "path": "backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py",
    "content": "\"\"\"Add document_set / persona relationship table\n\nRevision ID: febe9eaa0644\nRevises: 57b53544726e\nCreate Date: 2023-09-24 13:06:24.018610\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"febe9eaa0644\"\ndown_revision = \"57b53544726e\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"persona__document_set\",\n        sa.Column(\"persona_id\", sa.Integer(), nullable=False),\n        sa.Column(\"document_set_id\", sa.Integer(), nullable=False),\n        sa.ForeignKeyConstraint(\n            [\"document_set_id\"],\n            [\"document_set.id\"],\n        ),\n        sa.ForeignKeyConstraint(\n            [\"persona_id\"],\n            [\"persona.id\"],\n        ),\n        sa.PrimaryKeyConstraint(\"persona_id\", \"document_set_id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"persona__document_set\")\n"
  },
  {
    "path": "backend/alembic/versions/fec3db967bf7_add_time_updated_to_usergroup_and_.py",
    "content": "\"\"\"Add time_updated to UserGroup and DocumentSet\n\nRevision ID: fec3db967bf7\nRevises: 97dbb53fa8c8\nCreate Date: 2025-01-12 15:49:02.289100\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n# revision identifiers, used by Alembic.\nrevision = \"fec3db967bf7\"\ndown_revision = \"97dbb53fa8c8\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"document_set\",\n        sa.Column(\n            \"time_last_modified_by_user\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.func.now(),\n        ),\n    )\n    op.add_column(\n        \"user_group\",\n        sa.Column(\n            \"time_last_modified_by_user\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.func.now(),\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"user_group\", \"time_last_modified_by_user\")\n    op.drop_column(\"document_set\", \"time_last_modified_by_user\")\n"
  },
  {
    "path": "backend/alembic/versions/feead2911109_add_opensearch_tenant_migration_columns.py",
    "content": "\"\"\"add_opensearch_tenant_migration_columns\n\nRevision ID: feead2911109\nRevises: d56ffa94ca32\nCreate Date: 2026-02-10 17:46:34.029937\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\n# revision identifiers, used by Alembic.\nrevision = \"feead2911109\"\ndown_revision = \"175ea04c7087\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\"vespa_visit_continuation_token\", sa.Text(), nullable=True),\n    )\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"total_chunks_migrated\",\n            sa.Integer(),\n            nullable=False,\n            server_default=\"0\",\n        ),\n    )\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"created_at\",\n            sa.DateTime(timezone=True),\n            nullable=False,\n            server_default=sa.func.now(),\n        ),\n    )\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"migration_completed_at\",\n            sa.DateTime(timezone=True),\n            nullable=True,\n        ),\n    )\n    op.add_column(\n        \"opensearch_tenant_migration_record\",\n        sa.Column(\n            \"enable_opensearch_retrieval\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"false\",\n        ),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"opensearch_tenant_migration_record\", \"enable_opensearch_retrieval\")\n    op.drop_column(\"opensearch_tenant_migration_record\", \"migration_completed_at\")\n    op.drop_column(\"opensearch_tenant_migration_record\", \"created_at\")\n    op.drop_column(\"opensearch_tenant_migration_record\", \"total_chunks_migrated\")\n    op.drop_column(\n        \"opensearch_tenant_migration_record\", \"vespa_visit_continuation_token\"\n    )\n"
  },
  {
    "path": "backend/alembic/versions/ffc707a226b4_basic_document_metadata.py",
    "content": "\"\"\"Basic Document Metadata\n\nRevision ID: ffc707a226b4\nRevises: 30c1d5744104\nCreate Date: 2023-10-18 16:52:25.967592\n\n\"\"\"\n\nfrom alembic import op\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n\n# revision identifiers, used by Alembic.\nrevision = \"ffc707a226b4\"\ndown_revision = \"30c1d5744104\"\nbranch_labels: None = None\ndepends_on: None = None\n\n\ndef upgrade() -> None:\n    op.add_column(\n        \"document\",\n        sa.Column(\"doc_updated_at\", sa.DateTime(timezone=True), nullable=True),\n    )\n    op.add_column(\n        \"document\",\n        sa.Column(\"primary_owners\", postgresql.ARRAY(sa.String()), nullable=True),\n    )\n    op.add_column(\n        \"document\",\n        sa.Column(\"secondary_owners\", postgresql.ARRAY(sa.String()), nullable=True),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_column(\"document\", \"secondary_owners\")\n    op.drop_column(\"document\", \"primary_owners\")\n    op.drop_column(\"document\", \"doc_updated_at\")\n"
  },
  {
    "path": "backend/alembic.ini",
    "content": "# A generic, single database configuration.\n\n[DEFAULT]\n# path to migration scripts\nscript_location = alembic\n\n# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s\n# Uncomment the line below if you want the files to be prepended with date and time\n# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s\n\n# sys.path path, will be prepended to sys.path if present.\n# defaults to the current working directory.\nprepend_sys_path = .\n\n# timezone to use when rendering the date within the migration file\n# as well as the filename.\n# If specified, requires the python-dateutil library that can be\n# installed by adding `alembic[tz]` to the pip requirements\n# string value is passed to dateutil.tz.gettz()\n# leave blank for localtime\n# timezone =\n\n# max length of characters to apply to the\n# \"slug\" field\n# truncate_slug_length = 40\n\n# set to 'true' to run the environment during\n# the 'revision' command, regardless of autogenerate\n# revision_environment = false\n\n# set to 'true' to allow .pyc and .pyo files without\n# a source .py file to be detected as revisions in the\n# versions/ directory\n# sourceless = false\n\n# version location specification; This defaults\n# to alembic/versions.  When using multiple version\n# directories, initial revisions must be specified with --version-path.\n# The path separator used here should be the separator specified by \"version_path_separator\" below.\n# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions\n\n# version path separator; As mentioned above, this is the character used to split\n# version_locations. The default within new alembic.ini files is \"os\", which uses os.pathsep.\n# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.\n# Valid values for version_path_separator are:\n#\n# version_path_separator = :\n# version_path_separator = ;\n# version_path_separator = space\nversion_path_separator = os  \n# Use os.pathsep. Default configuration used for new projects.\n\n# set to 'true' to search source files recursively\n# in each \"version_locations\" directory\n# new in Alembic version 1.10\n# recursive_version_locations = false\n\n# the output encoding used when revision files\n# are written from script.py.mako\n# output_encoding = utf-8\n\n# sqlalchemy.url = driver://user:pass@localhost/dbname\n\n\n[post_write_hooks]\n# post_write_hooks defines scripts or Python functions that are run\n# on newly generated revision scripts.  See the documentation for further\n# detail and examples\n\n# format using \"black\" - use the console_scripts runner, against the \"black\" entrypoint\nhooks = black\nblack.type = console_scripts\nblack.entrypoint = black\nblack.options = -l 79 REVISION_SCRIPT_FILENAME\n\n# Logging configuration\n[loggers]\nkeys = root,sqlalchemy,alembic\n\n[handlers]\nkeys = console\n\n[formatters]\nkeys = generic\n\n[logger_root]\nlevel = INFO\nhandlers = console\nqualname =\n\n[logger_sqlalchemy]\nlevel = WARN\nhandlers =\nqualname = sqlalchemy.engine\n\n[logger_alembic]\nlevel = INFO\nhandlers =\nqualname = alembic\n\n[handler_console]\nclass = StreamHandler\nargs = (sys.stderr,)\nlevel = NOTSET\nformatter = generic\n\n[formatter_generic]\nformat = %(levelname)-5.5s [%(name)s] %(message)s\ndatefmt = %H:%M:%S\n\n\n[alembic]\nscript_location = alembic\nversion_locations = %(script_location)s/versions\n\n[schema_private]\nscript_location = alembic_tenants\nversion_locations = %(script_location)s/versions\n"
  },
  {
    "path": "backend/alembic_tenants/README.md",
    "content": "These files are for public table migrations when operating with multi tenancy.\n\nIf you are not a Onyx developer, you can ignore this directory entirely.\n"
  },
  {
    "path": "backend/alembic_tenants/__init__.py",
    "content": ""
  },
  {
    "path": "backend/alembic_tenants/env.py",
    "content": "import asyncio\nfrom logging.config import fileConfig\nfrom typing import Literal\n\nfrom sqlalchemy import pool\nfrom sqlalchemy.engine import Connection\nfrom sqlalchemy.ext.asyncio import create_async_engine\nfrom sqlalchemy.schema import SchemaItem\n\nfrom alembic import context\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.models import PublicBase\n\n# this is the Alembic Config object, which provides\n# access to the values within the .ini file in use.\nconfig = context.config\n\n# Interpret the config file for Python logging.\n# This line sets up loggers basically.\nif config.config_file_name is not None and config.attributes.get(\n    \"configure_logger\", True\n):\n    # disable_existing_loggers=False prevents breaking pytest's caplog fixture\n    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues\n    fileConfig(config.config_file_name, disable_existing_loggers=False)\n\n# add your model's MetaData object here\n# for 'autogenerate' support\n# from myapp import mymodel\n# target_metadata = mymodel.Base.metadata\ntarget_metadata = [PublicBase.metadata]\n\n# other values from the config, defined by the needs of env.py,\n# can be acquired:\n# my_important_option = config.get_main_option(\"my_important_option\")\n# ... etc.\n\nEXCLUDE_TABLES = {\"kombu_queue\", \"kombu_message\"}\n\n\ndef include_object(\n    object: SchemaItem,  # noqa: ARG001\n    name: str | None,\n    type_: Literal[\n        \"schema\",\n        \"table\",\n        \"column\",\n        \"index\",\n        \"unique_constraint\",\n        \"foreign_key_constraint\",\n    ],\n    reflected: bool,  # noqa: ARG001\n    compare_to: SchemaItem | None,  # noqa: ARG001\n) -> bool:\n    if type_ == \"table\" and name in EXCLUDE_TABLES:\n        return False\n    return True\n\n\ndef run_migrations_offline() -> None:\n    \"\"\"Run migrations in 'offline' mode.\n\n    This configures the context with just a URL\n    and not an Engine, though an Engine is acceptable\n    here as well.  By skipping the Engine creation\n    we don't even need a DBAPI to be available.\n\n    Calls to context.execute() here emit the given string to the\n    script output.\n\n    \"\"\"\n    url = build_connection_string()\n    context.configure(\n        url=url,\n        target_metadata=target_metadata,  # type: ignore\n        literal_binds=True,\n        dialect_opts={\"paramstyle\": \"named\"},\n    )\n\n    with context.begin_transaction():\n        context.run_migrations()\n\n\ndef do_run_migrations(connection: Connection) -> None:\n    context.configure(\n        connection=connection,\n        target_metadata=target_metadata,  # type: ignore[arg-type]\n        include_object=include_object,\n    )\n\n    with context.begin_transaction():\n        context.run_migrations()\n\n\nasync def run_async_migrations() -> None:\n    \"\"\"In this scenario we need to create an Engine\n    and associate a connection with the context.\n\n    \"\"\"\n\n    connectable = create_async_engine(\n        build_connection_string(),\n        poolclass=pool.NullPool,\n    )\n\n    async with connectable.connect() as connection:\n        await connection.run_sync(do_run_migrations)\n\n    await connectable.dispose()\n\n\ndef run_migrations_online() -> None:\n    \"\"\"Run migrations in 'online' mode.\n\n    Supports pytest-alembic by checking for a pre-configured connection\n    in context.config.attributes[\"connection\"]. If present, uses that\n    connection/engine directly instead of creating a new async engine.\n    \"\"\"\n    # Check if pytest-alembic is providing a connection/engine\n    connectable = context.config.attributes.get(\"connection\", None)\n\n    if connectable is not None:\n        # pytest-alembic is providing an engine - use it directly\n        with connectable.connect() as connection:\n            do_run_migrations(connection)\n            # Commit to ensure changes are visible to next migration\n            connection.commit()\n    else:\n        # Normal operation - use async migrations\n        asyncio.run(run_async_migrations())\n\n\nif context.is_offline_mode():\n    run_migrations_offline()\nelse:\n    run_migrations_online()\n"
  },
  {
    "path": "backend/alembic_tenants/script.py.mako",
    "content": "\"\"\"${message}\n\nRevision ID: ${up_revision}\nRevises: ${down_revision | comma,n}\nCreate Date: ${create_date}\n\n\"\"\"\nfrom alembic import op\nimport sqlalchemy as sa\n${imports if imports else \"\"}\n\n# revision identifiers, used by Alembic.\nrevision = ${repr(up_revision)}\ndown_revision = ${repr(down_revision)}\nbranch_labels = ${repr(branch_labels)}\ndepends_on = ${repr(depends_on)}\n\n\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n\n\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n"
  },
  {
    "path": "backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py",
    "content": "import sqlalchemy as sa\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision = \"14a83a331951\"\ndown_revision = None\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"user_tenant_mapping\",\n        sa.Column(\"email\", sa.String(), nullable=False),\n        sa.Column(\"tenant_id\", sa.String(), nullable=False),\n        sa.UniqueConstraint(\"email\", \"tenant_id\", name=\"uq_user_tenant\"),\n        sa.UniqueConstraint(\"email\", name=\"uq_email\"),\n        schema=\"public\",\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"user_tenant_mapping\", schema=\"public\")\n"
  },
  {
    "path": "backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py",
    "content": "\"\"\"lowercase multi-tenant user auth\n\nRevision ID: 34e3630c7f32\nRevises: a4f6ee863c47\nCreate Date: 2025-02-26 15:03:01.211894\n\n\"\"\"\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"34e3630c7f32\"\ndown_revision = \"a4f6ee863c47\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # 1) Convert all existing rows to lowercase\n    op.execute(\n        \"\"\"\n        UPDATE user_tenant_mapping\n        SET email = LOWER(email)\n        \"\"\"\n    )\n    # 2) Add a check constraint so that emails cannot be written in uppercase\n    op.create_check_constraint(\n        \"ensure_lowercase_email\",\n        \"user_tenant_mapping\",\n        \"email = LOWER(email)\",\n        schema=\"public\",\n    )\n\n\ndef downgrade() -> None:\n    # Drop the check constraint\n    op.drop_constraint(\n        \"ensure_lowercase_email\",\n        \"user_tenant_mapping\",\n        schema=\"public\",\n        type_=\"check\",\n    )\n"
  },
  {
    "path": "backend/alembic_tenants/versions/3b45e0018bf1_add_new_available_tenant_table.py",
    "content": "\"\"\"add new available tenant table\n\nRevision ID: 3b45e0018bf1\nRevises: ac842f85f932\nCreate Date: 2025-03-06 09:55:18.229910\n\n\"\"\"\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"3b45e0018bf1\"\ndown_revision = \"ac842f85f932\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Create new_available_tenant table\n    op.create_table(\n        \"available_tenant\",\n        sa.Column(\"tenant_id\", sa.String(), nullable=False),\n        sa.Column(\"alembic_version\", sa.String(), nullable=False),\n        sa.Column(\"date_created\", sa.DateTime(), nullable=False),\n        sa.PrimaryKeyConstraint(\"tenant_id\"),\n    )\n\n\ndef downgrade() -> None:\n    # Drop new_available_tenant table\n    op.drop_table(\"available_tenant\")\n"
  },
  {
    "path": "backend/alembic_tenants/versions/3b9f09038764_add_read_only_kg_user.py",
    "content": "\"\"\"add_db_readonly_user\n\nRevision ID: 3b9f09038764\nRevises: 3b45e0018bf1\nCreate Date: 2025-05-11 11:05:11.436977\n\n\"\"\"\n\nfrom sqlalchemy import text\n\nfrom alembic import op\nfrom onyx.configs.app_configs import DB_READONLY_PASSWORD\nfrom onyx.configs.app_configs import DB_READONLY_USER\n\n\n# revision identifiers, used by Alembic.\nrevision = \"3b9f09038764\"\ndown_revision = \"3b45e0018bf1\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Enable pg_trgm extension if not already enabled\n    op.execute(\"CREATE EXTENSION IF NOT EXISTS pg_trgm\")\n\n    # Create the read-only db user if it does not already exist.\n    if not (DB_READONLY_USER and DB_READONLY_PASSWORD):\n        raise Exception(\"DB_READONLY_USER or DB_READONLY_PASSWORD is not set\")\n\n    op.execute(\n        text(\n            f\"\"\"\n            DO $$\n            BEGIN\n                -- Check if the read-only user already exists\n                IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                    -- Create the read-only user with the specified password\n                    EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');\n                    -- First revoke all privileges to ensure a clean slate\n                    EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');\n                    -- Grant only the CONNECT privilege to allow the user to connect to the database\n                    -- but not perform any operations without additional specific grants\n                    EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');\n                END IF;\n            END\n            $$;\n            \"\"\"\n        )\n    )\n\n\ndef downgrade() -> None:\n    op.execute(\n        text(\n            f\"\"\"\n        DO $$\n        BEGIN\n            IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN\n                -- First revoke all privileges from the database\n                EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');\n                -- Then revoke all privileges from the public schema\n                EXECUTE format('REVOKE ALL ON SCHEMA public FROM %I', '{DB_READONLY_USER}');\n                -- Then drop the user\n                EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');\n            END IF;\n        END\n        $$;\n    \"\"\"\n        )\n    )\n    op.execute(text(\"DROP EXTENSION IF EXISTS pg_trgm\"))\n"
  },
  {
    "path": "backend/alembic_tenants/versions/a4f6ee863c47_mapping_for_anonymous_user_path.py",
    "content": "\"\"\"mapping for anonymous user path\n\nRevision ID: a4f6ee863c47\nRevises: 14a83a331951\nCreate Date: 2025-01-04 14:16:58.697451\n\n\"\"\"\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"a4f6ee863c47\"\ndown_revision = \"14a83a331951\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"tenant_anonymous_user_path\",\n        sa.Column(\"tenant_id\", sa.String(), primary_key=True, nullable=False),\n        sa.Column(\"anonymous_user_path\", sa.String(), nullable=False),\n        sa.PrimaryKeyConstraint(\"tenant_id\"),\n        sa.UniqueConstraint(\"anonymous_user_path\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"tenant_anonymous_user_path\")\n"
  },
  {
    "path": "backend/alembic_tenants/versions/ac842f85f932_new_column_user_tenant_mapping.py",
    "content": "\"\"\"new column user tenant mapping\n\nRevision ID: ac842f85f932\nRevises: 34e3630c7f32\nCreate Date: 2025-03-03 13:30:14.802874\n\n\"\"\"\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n\n# revision identifiers, used by Alembic.\nrevision = \"ac842f85f932\"\ndown_revision = \"34e3630c7f32\"\nbranch_labels = None\ndepends_on = None\n\n\ndef upgrade() -> None:\n    # Add active column with default value of True\n    op.add_column(\n        \"user_tenant_mapping\",\n        sa.Column(\n            \"active\",\n            sa.Boolean(),\n            nullable=False,\n            server_default=\"true\",\n        ),\n        schema=\"public\",\n    )\n\n    op.drop_constraint(\"uq_email\", \"user_tenant_mapping\", schema=\"public\")\n\n    # Create a unique index for active=true records\n    # This ensures a user can only be active in one tenant at a time\n    op.execute(\n        \"CREATE UNIQUE INDEX uq_user_active_email_idx ON public.user_tenant_mapping (email) WHERE active = true\"\n    )\n\n\ndef downgrade() -> None:\n    # Drop the unique index for active=true records\n    op.execute(\"DROP INDEX IF EXISTS uq_user_active_email_idx\")\n\n    op.create_unique_constraint(\n        \"uq_email\", \"user_tenant_mapping\", [\"email\"], schema=\"public\"\n    )\n\n    # Remove the active column\n    op.drop_column(\"user_tenant_mapping\", \"active\", schema=\"public\")\n"
  },
  {
    "path": "backend/assets/.gitignore",
    "content": "*\n!.gitignore\n"
  },
  {
    "path": "backend/ee/LICENSE",
    "content": "The Onyx Enterprise License (the \"Enterprise License\")\nCopyright (c) 2023-present DanswerAI, Inc.\n\nWith regard to the Onyx Software:\n\nThis software and associated documentation files (the \"Software\") may only be\nused in production, if you (and any entity that you represent) have agreed to,\nand are in compliance with, the Onyx Subscription Terms of Service, available\nat https://www.onyx.app/legal/self-host (the \"Enterprise Terms\"), or other\nagreement governing the use of the Software, as agreed by you and DanswerAI,\nand otherwise have a valid Onyx Enterprise License for the\ncorrect number of user seats. Subject to the foregoing sentence, you are free to\nmodify this Software and publish patches to the Software. You agree that DanswerAI\nand/or its licensors (as applicable) retain all right, title and interest in and\nto all such modifications and/or patches, and all such modifications and/or\npatches may only be used, copied, modified, displayed, distributed, or otherwise\nexploited with a valid Onyx Enterprise License for the correct\nnumber of user seats. Notwithstanding the foregoing, you may copy and modify\nthe Software for development and testing purposes, without requiring a\nsubscription. You agree that DanswerAI and/or its licensors (as applicable) retain\nall right, title and interest in and to all such modifications. You are not\ngranted any other rights beyond what is expressly stated herein. Subject to the\nforegoing, it is forbidden to copy, merge, publish, distribute, sublicense,\nand/or sell the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\nFor all third party components incorporated into the Onyx Software, those\ncomponents are licensed under the original license provided by the owner of the\napplicable component.\n"
  },
  {
    "path": "backend/ee/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/access/access.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom ee.onyx.db.external_perm import fetch_external_groups_for_user\nfrom ee.onyx.db.external_perm import fetch_public_external_group_ids\nfrom ee.onyx.db.user_group import fetch_user_groups_for_documents\nfrom ee.onyx.db.user_group import fetch_user_groups_for_user\nfrom ee.onyx.external_permissions.sync_params import get_source_perm_sync_config\nfrom onyx.access.access import (\n    _get_access_for_documents as get_access_for_documents_without_groups,\n)\nfrom onyx.access.access import _get_acl_for_user as get_acl_for_user_without_groups\nfrom onyx.access.access import collect_user_file_access\nfrom onyx.access.models import DocumentAccess\nfrom onyx.access.utils import prefix_external_group\nfrom onyx.access.utils import prefix_user_group\nfrom onyx.db.document import get_document_sources\nfrom onyx.db.document import get_documents_by_ids\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.user_file import fetch_user_files_with_access_relationships\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _get_access_for_document(\n    document_id: str,\n    db_session: Session,\n) -> DocumentAccess:\n    id_to_access = _get_access_for_documents([document_id], db_session)\n    if len(id_to_access) == 0:\n        return DocumentAccess.build(\n            user_emails=[],\n            user_groups=[],\n            external_user_emails=[],\n            external_user_group_ids=[],\n            is_public=False,\n        )\n\n    return next(iter(id_to_access.values()))\n\n\ndef _get_access_for_documents(\n    document_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    non_ee_access_dict = get_access_for_documents_without_groups(\n        document_ids=document_ids,\n        db_session=db_session,\n    )\n    user_group_info: dict[str, list[str]] = {\n        document_id: group_names\n        for document_id, group_names in fetch_user_groups_for_documents(\n            db_session=db_session,\n            document_ids=document_ids,\n        )\n    }\n    documents = get_documents_by_ids(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n    doc_id_map = {doc.id: doc for doc in documents}\n\n    # Get all sources in one batch\n    doc_id_to_source_map = get_document_sources(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    all_public_ext_u_group_ids = set(fetch_public_external_group_ids(db_session))\n\n    access_map = {}\n    for document_id, non_ee_access in non_ee_access_dict.items():\n        document = doc_id_map[document_id]\n        source = doc_id_to_source_map.get(document_id)\n        if source is None:\n            logger.error(f\"Document {document_id} has no source\")\n            continue\n\n        perm_sync_config = get_source_perm_sync_config(source)\n        is_only_censored = (\n            perm_sync_config\n            and perm_sync_config.censoring_config is not None\n            and perm_sync_config.doc_sync_config is None\n        )\n\n        ext_u_emails = (\n            set(document.external_user_emails)\n            if document.external_user_emails\n            else set()\n        )\n\n        ext_u_groups = (\n            set(document.external_user_group_ids)\n            if document.external_user_group_ids\n            else set()\n        )\n\n        # If the document is determined to be \"public\" externally (through a SYNC connector)\n        # then it's given the same access level as if it were marked public within Onyx\n        # If its censored, then it's public anywhere during the search and then permissions are\n        # applied after the search\n        is_public_anywhere = (\n            document.is_public\n            or non_ee_access.is_public\n            or is_only_censored\n            or any(u_group in all_public_ext_u_group_ids for u_group in ext_u_groups)\n        )\n\n        # To avoid collisions of group namings between connectors, they need to be prefixed\n        access_map[document_id] = DocumentAccess.build(\n            user_emails=list(non_ee_access.user_emails),\n            user_groups=user_group_info.get(document_id, []),\n            is_public=is_public_anywhere,\n            external_user_emails=list(ext_u_emails),\n            external_user_group_ids=list(ext_u_groups),\n        )\n    return access_map\n\n\ndef _collect_user_file_group_names(user_file: UserFile) -> set[str]:\n    \"\"\"Extract user-group names from the already-loaded Persona.groups\n    relationships on a UserFile (skipping deleted personas).\"\"\"\n    groups: set[str] = set()\n    for persona in user_file.assistants:\n        if persona.deleted:\n            continue\n        for group in persona.groups:\n            groups.add(group.name)\n    return groups\n\n\ndef get_access_for_user_files_impl(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    \"\"\"EE version: extends the MIT user file ACL with user group names\n    from personas shared via user groups.\n\n    Uses a single DB query (via fetch_user_files_with_access_relationships)\n    that eagerly loads both the MIT-needed and EE-needed relationships.\n\n    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`\n    DO NOT REMOVE.\"\"\"\n    user_files = fetch_user_files_with_access_relationships(\n        user_file_ids, db_session, eager_load_groups=True\n    )\n    return build_access_for_user_files_impl(user_files)\n\n\ndef build_access_for_user_files_impl(\n    user_files: list[UserFile],\n) -> dict[str, DocumentAccess]:\n    \"\"\"EE version: works on pre-loaded UserFile objects.\n    Expects Persona.groups to be eagerly loaded.\n\n    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`\n    DO NOT REMOVE.\"\"\"\n    result: dict[str, DocumentAccess] = {}\n    for user_file in user_files:\n        if user_file.user is None:\n            result[str(user_file.id)] = DocumentAccess.build(\n                user_emails=[],\n                user_groups=[],\n                is_public=True,\n                external_user_emails=[],\n                external_user_group_ids=[],\n            )\n            continue\n\n        emails, is_public = collect_user_file_access(user_file)\n        group_names = _collect_user_file_group_names(user_file)\n        result[str(user_file.id)] = DocumentAccess.build(\n            user_emails=list(emails),\n            user_groups=list(group_names),\n            is_public=is_public,\n            external_user_emails=[],\n            external_user_group_ids=[],\n        )\n    return result\n\n\ndef _get_acl_for_user(user: User, db_session: Session) -> set[str]:\n    \"\"\"Returns a list of ACL entries that the user has access to. This is meant to be\n    used downstream to filter out documents that the user does not have access to. The\n    user should have access to a document if at least one entry in the document's ACL\n    matches one entry in the returned set.\n\n    NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`\n    DO NOT REMOVE.\"\"\"\n    is_anonymous = user.is_anonymous\n    db_user_groups = (\n        [] if is_anonymous else fetch_user_groups_for_user(db_session, user.id)\n    )\n    prefixed_user_groups = [\n        prefix_user_group(db_user_group.name) for db_user_group in db_user_groups\n    ]\n\n    db_external_groups = (\n        [] if is_anonymous else fetch_external_groups_for_user(db_session, user.id)\n    )\n    prefixed_external_groups = [\n        prefix_external_group(db_external_group.external_user_group_id)\n        for db_external_group in db_external_groups\n    ]\n\n    user_acl = set(prefixed_user_groups + prefixed_external_groups)\n    user_acl.update(get_acl_for_user_without_groups(user, db_session))\n\n    return user_acl\n"
  },
  {
    "path": "backend/ee/onyx/access/hierarchy_access.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom ee.onyx.db.external_perm import fetch_external_groups_for_user\nfrom onyx.db.models import User\n\n\ndef _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:\n    if not user:\n        return []\n    external_groups = fetch_external_groups_for_user(db_session, user.id)\n    return [external_group.external_user_group_id for external_group in external_groups]\n"
  },
  {
    "path": "backend/ee/onyx/auth/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/auth/users.py",
    "content": "import os\nfrom datetime import datetime\n\nimport jwt\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import status\n\nfrom ee.onyx.configs.app_configs import SUPER_CLOUD_API_KEY\nfrom ee.onyx.configs.app_configs import SUPER_USERS\nfrom ee.onyx.server.seeding import get_seed_config\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import USER_AUTH_SECRET\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef verify_auth_setting() -> None:\n    # All the Auth flows are valid for EE version, but warn about deprecated 'disabled'\n    raw_auth_type = (os.environ.get(\"AUTH_TYPE\") or \"\").lower()\n    if raw_auth_type == \"disabled\":\n        logger.warning(\n            \"AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration.\"\n        )\n    logger.notice(f\"Using Auth Type: {AUTH_TYPE.value}\")\n\n\ndef get_default_admin_user_emails_() -> list[str]:\n    seed_config = get_seed_config()\n    if seed_config and seed_config.admin_user_emails:\n        return seed_config.admin_user_emails\n    return []\n\n\nasync def current_cloud_superuser(\n    request: Request,\n    user: User = Depends(current_admin_user),\n) -> User:\n    api_key = request.headers.get(\"Authorization\", \"\").replace(\"Bearer \", \"\")\n    if api_key != SUPER_CLOUD_API_KEY:\n        raise HTTPException(status_code=401, detail=\"Invalid API key\")\n\n    if user and user.email not in SUPER_USERS:\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=\"Access denied. User must be a cloud superuser to perform this action.\",\n        )\n    return user\n\n\ndef generate_anonymous_user_jwt_token(tenant_id: str) -> str:\n    payload = {\n        \"tenant_id\": tenant_id,\n        # Token does not expire\n        \"iat\": datetime.utcnow(),  # Issued at time\n    }\n\n    return jwt.encode(payload, USER_AUTH_SECRET, algorithm=\"HS256\")\n\n\ndef decode_anonymous_user_jwt_token(token: str) -> dict:\n    return jwt.decode(token, USER_AUTH_SECRET, algorithms=[\"HS256\"])\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/apps/heavy.py",
    "content": "from onyx.background.celery.apps import app_base\nfrom onyx.background.celery.apps.heavy import celery_app\n\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"ee.onyx.background.celery.tasks.doc_permission_syncing\",\n            \"ee.onyx.background.celery.tasks.external_group_syncing\",\n            \"ee.onyx.background.celery.tasks.cleanup\",\n            \"ee.onyx.background.celery.tasks.query_history\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/apps/light.py",
    "content": "from onyx.background.celery.apps import app_base\nfrom onyx.background.celery.apps.light import celery_app\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"ee.onyx.background.celery.tasks.doc_permission_syncing\",\n            \"ee.onyx.background.celery.tasks.external_group_syncing\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/apps/monitoring.py",
    "content": "from onyx.background.celery.apps import app_base\nfrom onyx.background.celery.apps.monitoring import celery_app\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"ee.onyx.background.celery.tasks.tenant_provisioning\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/apps/primary.py",
    "content": "from onyx.background.celery.apps import app_base\nfrom onyx.background.celery.apps.primary import celery_app\n\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"ee.onyx.background.celery.tasks.hooks\",\n            \"ee.onyx.background.celery.tasks.doc_permission_syncing\",\n            \"ee.onyx.background.celery.tasks.external_group_syncing\",\n            \"ee.onyx.background.celery.tasks.cloud\",\n            \"ee.onyx.background.celery.tasks.ttl_management\",\n            \"ee.onyx.background.celery.tasks.usage_reporting\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/beat_schedule.py",
    "content": "from datetime import timedelta\nfrom typing import Any\n\nfrom ee.onyx.configs.app_configs import CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS\nfrom onyx.background.celery.tasks.beat_schedule import (\n    beat_cloud_tasks as base_beat_system_tasks,\n)\nfrom onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT\nfrom onyx.background.celery.tasks.beat_schedule import (\n    beat_task_templates as base_beat_task_templates,\n)\nfrom onyx.background.celery.tasks.beat_schedule import generate_cloud_tasks\nfrom onyx.background.celery.tasks.beat_schedule import (\n    get_tasks_to_schedule as base_get_tasks_to_schedule,\n)\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom shared_configs.configs import MULTI_TENANT\n\nee_beat_system_tasks: list[dict] = []\n\nee_beat_task_templates: list[dict] = [\n    {\n        \"name\": \"autogenerate-usage-report\",\n        \"task\": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,\n        \"schedule\": timedelta(days=30),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-ttl-management\",\n        \"task\": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,\n        \"schedule\": timedelta(hours=CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"export-query-history-cleanup-task\",\n        \"task\": OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,\n        \"schedule\": timedelta(hours=1),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n            \"queue\": OnyxCeleryQueues.CSV_GENERATION,\n        },\n    },\n]\n\nee_tasks_to_schedule: list[dict] = []\n\nif not MULTI_TENANT:\n    ee_tasks_to_schedule = [\n        {\n            \"name\": \"hook-execution-log-cleanup\",\n            \"task\": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,\n            \"schedule\": timedelta(days=1),\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.LOW,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n            },\n        },\n        {\n            \"name\": \"autogenerate-usage-report\",\n            \"task\": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,\n            \"schedule\": timedelta(days=30),  # TODO: change this to config flag\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.MEDIUM,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n            },\n        },\n        {\n            \"name\": \"check-ttl-management\",\n            \"task\": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,\n            \"schedule\": timedelta(hours=CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS),\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.MEDIUM,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n            },\n        },\n        {\n            \"name\": \"export-query-history-cleanup-task\",\n            \"task\": OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,\n            \"schedule\": timedelta(hours=1),\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.MEDIUM,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n                \"queue\": OnyxCeleryQueues.CSV_GENERATION,\n            },\n        },\n    ]\n\n\ndef get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:\n    beat_system_tasks = ee_beat_system_tasks + base_beat_system_tasks\n    beat_task_templates = ee_beat_task_templates + base_beat_task_templates\n    cloud_tasks = generate_cloud_tasks(\n        beat_system_tasks, beat_task_templates, beat_multiplier\n    )\n    return cloud_tasks\n\n\ndef get_tasks_to_schedule() -> list[dict[str, Any]]:\n    return ee_tasks_to_schedule + base_get_tasks_to_schedule()\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/cleanup/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/cleanup/tasks.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\n\nfrom celery import shared_task\n\nfrom ee.onyx.db.query_history import get_all_query_history_export_tasks\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.enums import TaskStatus\nfrom onyx.db.tasks import delete_task_with_id\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n@shared_task(\n    name=OnyxCeleryTask.EXPORT_QUERY_HISTORY_CLEANUP_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n)\ndef export_query_history_cleanup_task(*, tenant_id: str) -> None:\n    with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n        tasks = get_all_query_history_export_tasks(db_session=db_session)\n\n        for task in tasks:\n            if task.status == TaskStatus.SUCCESS:\n                delete_task_with_id(db_session=db_session, task_id=task.task_id)\n            elif task.status == TaskStatus.FAILURE:\n                if task.start_time:\n                    deadline = task.start_time + timedelta(hours=24)\n                    now = datetime.now()\n                    if now < deadline:\n                        continue\n\n                logger.error(\n                    f\"Task with {task.task_id=} failed; it is being deleted now\"\n                )\n                delete_task_with_id(db_session=db_session, task_id=task.task_id)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/cloud/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/cloud/tasks.py",
    "content": "import time\n\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import ONYX_CLOUD_TENANT_ID\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom shared_configs.configs import IGNORED_SYNCING_TENANT_LIST\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,\n    ignore_result=True,\n    trail=False,\n    bind=True,\n)\ndef cloud_beat_task_generator(\n    self: Task,\n    task_name: str,\n    queue: str = OnyxCeleryTask.DEFAULT,\n    priority: int = OnyxCeleryPriority.MEDIUM,\n    expires: int = BEAT_EXPIRES_DEFAULT,\n) -> bool | None:\n    \"\"\"a lightweight task used to kick off individual beat tasks per tenant.\"\"\"\n    time_start = time.monotonic()\n\n    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n    lock_beat: RedisLock = redis_client.lock(\n        f\"{OnyxRedisLocks.CLOUD_BEAT_TASK_GENERATOR_LOCK}:{task_name}\",\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    last_lock_time = time.monotonic()\n    tenant_ids: list[str] = []\n    num_processed_tenants = 0\n\n    try:\n        tenant_ids = get_all_tenant_ids()\n\n        # NOTE: for now, we are running tasks for gated tenants, since we want to allow\n        # connector deletion to run successfully. The new plan is to continously prune\n        # the gated tenants set, so we won't have a build up of old, unused gated tenants.\n        # Keeping this around in case we want to revert to the previous behavior.\n        # gated_tenants = get_gated_tenants()\n\n        for tenant_id in tenant_ids:\n            # Same comment here as the above NOTE\n            # if tenant_id in gated_tenants:\n            #     continue\n\n            current_time = time.monotonic()\n            if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4):\n                lock_beat.reacquire()\n                last_lock_time = current_time\n\n            # needed in the cloud\n            if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:\n                continue\n\n            self.app.send_task(\n                task_name,\n                kwargs=dict(\n                    tenant_id=tenant_id,\n                ),\n                queue=queue,\n                priority=priority,\n                expires=expires,\n                ignore_result=True,\n            )\n\n            num_processed_tenants += 1\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during cloud_beat_task_generator\")\n    finally:\n        if not lock_beat.owned():\n            task_logger.error(\n                \"cloud_beat_task_generator - Lock not owned on completion\"\n            )\n            redis_lock_dump(lock_beat, redis_client)\n        else:\n            lock_beat.release()\n\n    time_elapsed = time.monotonic() - time_start\n    task_logger.info(\n        f\"cloud_beat_task_generator finished: \"\n        f\"task={task_name} \"\n        f\"num_processed_tenants={num_processed_tenants} \"\n        f\"num_tenants={len(tenant_ids)} \"\n        f\"elapsed={time_elapsed:.2f}\"\n    )\n    return True\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/doc_permission_syncing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom time import sleep\nfrom typing import Any\nfrom typing import cast\nfrom uuid import uuid4\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom pydantic import ValidationError\nfrom redis import Redis\nfrom redis.exceptions import LockError\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\nfrom tenacity import retry\nfrom tenacity import retry_if_exception\nfrom tenacity import stop_after_delay\nfrom tenacity import wait_random_exponential\n\nfrom ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs\nfrom ee.onyx.db.document import upsert_document_external_perms\nfrom ee.onyx.external_permissions.sync_params import get_source_perm_sync_config\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_find_task\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_redis import celery_get_queued_task_ids\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT\nfrom onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import OnyxRedisSignals\nfrom onyx.connectors.factory import validate_ccpair_for_user\nfrom onyx.db.connector import mark_cc_pair_as_permissions_synced\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.document import get_document_ids_for_connector_credential_pair\nfrom onyx.db.document import get_documents_for_connector_credential_pair_limited_columns\nfrom onyx.db.document import upsert_document_by_connector_credential_pair\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.hierarchy import (\n    update_hierarchy_node_permissions as db_update_hierarchy_node_permissions,\n)\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt\nfrom onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt\nfrom onyx.db.permission_sync_attempt import mark_doc_permission_sync_attempt_failed\nfrom onyx.db.permission_sync_attempt import (\n    mark_doc_permission_sync_attempt_in_progress,\n)\nfrom onyx.db.sync_record import insert_sync_record\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.db.users import batch_add_ext_perm_user_if_not_exists\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import is_retryable_sqlalchemy_error\nfrom onyx.db.utils import SortOrder\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSyncPayload\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.server.utils import make_short_id\nfrom onyx.utils.logger import doc_permission_sync_ctx\nfrom onyx.utils.logger import format_error_for_logging\nfrom onyx.utils.logger import LoggerContextVars\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\nDOCUMENT_PERMISSIONS_UPDATE_MAX_RETRIES = 3\nDOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER = 10 * 60\nDOCUMENT_PERMISSIONS_UPDATE_MAX_WAIT = 60\n\n\n# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT\nLIGHT_SOFT_TIME_LIMIT = 105\nLIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15\n\n\ndef _get_fence_validation_block_expiration() -> int:\n    \"\"\"\n    Compute the expiration time for the fence validation block signal.\n    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.\n    \"\"\"\n    base_expiration = 300  # seconds\n\n    if not MULTI_TENANT:\n        return base_expiration\n\n    try:\n        beat_multiplier = OnyxRuntime.get_beat_multiplier()\n    except Exception:\n        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n    return int(base_expiration * beat_multiplier)\n\n\n\"\"\"Jobs / utils for kicking off doc permissions sync tasks.\"\"\"\n\n\ndef _fail_doc_permission_sync_attempt(attempt_id: int, error_msg: str) -> None:\n    \"\"\"Helper to mark a doc permission sync attempt as failed with an error message.\"\"\"\n    with get_session_with_current_tenant() as db_session:\n        mark_doc_permission_sync_attempt_failed(\n            attempt_id, db_session, error_message=error_msg\n        )\n\n\ndef _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> bool:\n    \"\"\"Returns boolean indicating if external doc permissions sync is due.\"\"\"\n\n    if cc_pair.access_type != AccessType.SYNC:\n        return False\n\n    # skip doc permissions sync if not active\n    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:\n        return False\n\n    sync_config = get_source_perm_sync_config(cc_pair.connector.source)\n    if sync_config is None:\n        logger.error(f\"No sync config found for {cc_pair.connector.source}\")\n        return False\n\n    if sync_config.doc_sync_config is None:\n        logger.error(f\"No doc sync config found for {cc_pair.connector.source}\")\n        return False\n\n    # if indexing also does perm sync, don't start running doc_sync until at\n    # least one indexing is done\n    if (\n        sync_config.doc_sync_config.initial_index_should_sync\n        and cc_pair.last_successful_index_time is None\n    ):\n        return False\n\n    # If the last sync is None, it has never been run so we run the sync\n    last_perm_sync = cc_pair.last_time_perm_sync\n    if last_perm_sync is None:\n        return True\n\n    source_sync_period = sync_config.doc_sync_config.doc_sync_frequency\n    source_sync_period *= int(OnyxRuntime.get_doc_permission_sync_multiplier())\n\n    # If the last sync is greater than the full fetch period, we run the sync\n    next_sync = last_perm_sync + timedelta(seconds=source_sync_period)\n    if datetime.now(timezone.utc) >= next_sync:\n        return True\n\n    return False\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n)\ndef check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None:\n    # TODO(rkuo): merge into check function after lookup table for fences is added\n\n    # we need to use celery's redis client to access its redis data\n    # (which lives on a different db number)\n    r = get_redis_client()\n    r_replica = get_redis_replica_client()\n\n    lock_beat: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        # get all cc pairs that need to be synced\n        cc_pair_ids_to_sync: list[int] = []\n        with get_session_with_current_tenant() as db_session:\n            cc_pairs = get_all_auto_sync_cc_pairs(db_session)\n\n            for cc_pair in cc_pairs:\n                if _is_external_doc_permissions_sync_due(cc_pair):\n                    cc_pair_ids_to_sync.append(cc_pair.id)\n\n        lock_beat.reacquire()\n        for cc_pair_id in cc_pair_ids_to_sync:\n            payload_id = try_creating_permissions_sync_task(\n                self.app, cc_pair_id, r, tenant_id\n            )\n            if not payload_id:\n                continue\n\n            task_logger.info(\n                f\"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}\"\n            )\n\n        # we want to run this less frequently than the overall task\n        lock_beat.reacquire()\n        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES):\n            # clear any permission fences that don't have associated celery tasks in progress\n            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),\n            # or be currently executing\n            try:\n                r_celery = celery_get_broker_client(self.app)\n                validate_permission_sync_fences(\n                    tenant_id, r, r_replica, r_celery, lock_beat\n                )\n            except Exception:\n                task_logger.exception(\n                    \"Exception while validating permission sync fences\"\n                )\n\n            r.set(\n                OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES,\n                1,\n                ex=_get_fence_validation_block_expiration(),\n            )\n\n        # use a lookup table to find active fences. We still have to verify the fence\n        # exists since it is an optimization and not the source of truth.\n        lock_beat.reacquire()\n        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n        for key in keys:\n            key_bytes = cast(bytes, key)\n\n            if not r.exists(key_bytes):\n                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)\n                continue\n\n            key_str = key_bytes.decode(\"utf-8\")\n            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):\n                with get_session_with_current_tenant() as db_session:\n                    monitor_ccpair_permissions_taskset(\n                        tenant_id, key_bytes, r, db_session\n                    )\n        task_logger.info(f\"check_for_doc_permissions_sync finished: tenant={tenant_id}\")\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}\"\n        )\n        task_logger.exception(\n            f\"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}\"\n        )\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n\n    return True\n\n\ndef try_creating_permissions_sync_task(\n    app: Celery,\n    cc_pair_id: int,\n    r: Redis,\n    tenant_id: str,\n) -> str | None:\n    \"\"\"Returns a randomized payload id on success.\n    Returns None if no syncing is required.\"\"\"\n    LOCK_TIMEOUT = 30\n\n    payload_id: str | None = None\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    lock: RedisLock = r.lock(\n        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + \"try_generate_permissions_sync_tasks\",\n        timeout=LOCK_TIMEOUT,\n    )\n\n    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)\n    if not acquired:\n        return None\n\n    try:\n        if redis_connector.permissions.fenced:\n            return None\n\n        if redis_connector.delete.fenced:\n            return None\n\n        if redis_connector.prune.fenced:\n            return None\n\n        redis_connector.permissions.generator_clear()\n        redis_connector.permissions.taskset_clear()\n\n        custom_task_id = f\"{redis_connector.permissions.generator_task_key}_{uuid4()}\"\n\n        # create before setting fence to avoid race condition where the monitoring\n        # task updates the sync record before it is created\n        try:\n            with get_session_with_current_tenant() as db_session:\n                insert_sync_record(\n                    db_session=db_session,\n                    entity_id=cc_pair_id,\n                    sync_type=SyncType.EXTERNAL_PERMISSIONS,\n                )\n        except Exception:\n            task_logger.exception(\"insert_sync_record exceptioned.\")\n\n        # set a basic fence to start\n        redis_connector.permissions.set_active()\n        payload = RedisConnectorPermissionSyncPayload(\n            id=make_short_id(),\n            submitted=datetime.now(timezone.utc),\n            started=None,\n            celery_task_id=None,\n        )\n        redis_connector.permissions.set_fence(payload)\n\n        result = app.send_task(\n            OnyxCeleryTask.CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK,\n            kwargs=dict(\n                cc_pair_id=cc_pair_id,\n                tenant_id=tenant_id,\n            ),\n            queue=OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,\n            task_id=custom_task_id,\n            priority=OnyxCeleryPriority.MEDIUM,\n        )\n\n        # fill in the celery task id\n        payload.celery_task_id = result.id\n        redis_connector.permissions.set_fence(payload)\n\n        payload_id = payload.id\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}\"\n        )\n        return None\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}\"\n    )\n    return payload_id\n\n\n@shared_task(\n    name=OnyxCeleryTask.CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK,\n    acks_late=False,\n    soft_time_limit=JOB_TIMEOUT,\n    track_started=True,\n    trail=False,\n    bind=True,\n)\ndef connector_permission_sync_generator_task(\n    self: Task,\n    cc_pair_id: int,\n    tenant_id: str,\n) -> None:\n    \"\"\"\n    Permission sync task that handles document permission syncing for a given connector credential pair\n    This task assumes that the task has already been properly fenced\n    \"\"\"\n\n    payload_id: str | None = None\n\n    LoggerContextVars.reset()\n\n    doc_permission_sync_ctx_dict = doc_permission_sync_ctx.get()\n    doc_permission_sync_ctx_dict[\"cc_pair_id\"] = cc_pair_id\n    doc_permission_sync_ctx_dict[\"request_id\"] = self.request.id\n    doc_permission_sync_ctx.set(doc_permission_sync_ctx_dict)\n\n    with get_session_with_current_tenant() as db_session:\n        attempt_id = create_doc_permission_sync_attempt(\n            connector_credential_pair_id=cc_pair_id,\n            db_session=db_session,\n        )\n        task_logger.info(\n            f\"Created doc permission sync attempt: {attempt_id} for cc_pair={cc_pair_id}\"\n        )\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    r = get_redis_client()\n\n    # this wait is needed to avoid a race condition where\n    # the primary worker sends the task and it is immediately executed\n    # before the primary worker can finalize the fence\n    start = time.monotonic()\n    while True:\n        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:\n            error_msg = (\n                f\"connector_permission_sync_generator_task - timed out waiting for fence to be ready: \"\n                f\"fence={redis_connector.permissions.fence_key}\"\n            )\n            _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n            raise ValueError(error_msg)\n\n        if not redis_connector.permissions.fenced:  # The fence must exist\n            error_msg = f\"connector_permission_sync_generator_task - fence not found: fence={redis_connector.permissions.fence_key}\"\n            _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n            raise ValueError(error_msg)\n\n        payload = redis_connector.permissions.payload  # The payload must exist\n        if not payload:\n            error_msg = (\n                \"connector_permission_sync_generator_task: payload invalid or not found\"\n            )\n            _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n            raise ValueError(error_msg)\n\n        if payload.celery_task_id is None:\n            logger.info(\n                f\"connector_permission_sync_generator_task - Waiting for fence: fence={redis_connector.permissions.fence_key}\"\n            )\n            sleep(1)\n            continue\n\n        payload_id = payload.id\n\n        logger.info(\n            f\"connector_permission_sync_generator_task - Fence found, continuing...: \"\n            f\"fence={redis_connector.permissions.fence_key} \"\n            f\"payload_id={payload.id}\"\n        )\n        break\n\n    lock: RedisLock = r.lock(\n        OnyxRedisLocks.CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX\n        + f\"_{redis_connector.cc_pair_id}\",\n        timeout=CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT,\n        thread_local=False,\n    )\n\n    acquired = lock.acquire(blocking=False)\n    if not acquired:\n        error_msg = (\n            f\"Permission sync task already running, exiting...: cc_pair={cc_pair_id}\"\n        )\n        task_logger.warning(error_msg)\n        _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n        return None\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n                eager_load_connector=True,\n                eager_load_credential=True,\n            )\n            if cc_pair is None:\n                raise ValueError(\n                    f\"No connector credential pair found for id: {cc_pair_id}\"\n                )\n\n            try:\n                created = validate_ccpair_for_user(\n                    cc_pair.connector.id,\n                    cc_pair.credential.id,\n                    cc_pair.access_type,\n                    db_session,\n                    enforce_creation=False,\n                )\n                if not created:\n                    task_logger.warning(\n                        f\"Unable to create connector credential pair for id: {cc_pair_id}\"\n                    )\n            except Exception:\n                task_logger.exception(\n                    f\"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}\"\n                )\n                # TODO: add some notification to the admins here\n                raise\n\n            source_type = cc_pair.connector.source\n            sync_config = get_source_perm_sync_config(source_type)\n            if sync_config is None:\n                error_msg = f\"No sync config found for {source_type}\"\n                logger.error(error_msg)\n                _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n                return None\n\n            if sync_config.doc_sync_config is None:\n                if sync_config.censoring_config:\n                    error_msg = f\"Doc sync config is None but censoring config exists for {source_type}\"\n                    _fail_doc_permission_sync_attempt(attempt_id, error_msg)\n                    return None\n\n                raise ValueError(\n                    f\"No doc sync func found for {source_type} with cc_pair={cc_pair_id}\"\n                )\n\n            logger.info(f\"Syncing docs for {source_type} with cc_pair={cc_pair_id}\")\n\n            mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n\n            payload = redis_connector.permissions.payload\n            if not payload:\n                raise ValueError(f\"No fence payload found: cc_pair={cc_pair_id}\")\n\n            new_payload = RedisConnectorPermissionSyncPayload(\n                id=payload.id,\n                submitted=payload.submitted,\n                started=datetime.now(timezone.utc),\n                celery_task_id=payload.celery_task_id,\n            )\n            redis_connector.permissions.set_fence(new_payload)\n\n            callback = PermissionSyncCallback(\n                redis_connector, lock, r, timeout_seconds=JOB_TIMEOUT\n            )\n\n            # pass in the capability to fetch all existing docs for the cc_pair\n            # this is can be used to determine documents that are \"missing\" and thus\n            # should no longer be accessible. The decision as to whether we should find\n            # every document during the doc sync process is connector-specific.\n            def fetch_all_existing_docs_fn(\n                sort_order: SortOrder | None = None,\n            ) -> list[DocumentRow]:\n                result = get_documents_for_connector_credential_pair_limited_columns(\n                    db_session=db_session,\n                    connector_id=cc_pair.connector.id,\n                    credential_id=cc_pair.credential.id,\n                    sort_order=sort_order,\n                )\n                return list(result)\n\n            def fetch_all_existing_docs_ids_fn() -> list[str]:\n                result = get_document_ids_for_connector_credential_pair(\n                    db_session=db_session,\n                    connector_id=cc_pair.connector.id,\n                    credential_id=cc_pair.credential.id,\n                )\n                return result\n\n            doc_sync_func = sync_config.doc_sync_config.doc_sync_func\n            document_external_accesses = doc_sync_func(\n                cc_pair,\n                fetch_all_existing_docs_fn,\n                fetch_all_existing_docs_ids_fn,\n                callback,\n            )\n\n            task_logger.info(\n                f\"RedisConnector.permissions.generate_tasks starting. cc_pair={cc_pair_id}\"\n            )\n\n            tasks_generated = 0\n            docs_with_errors = 0\n            for doc_external_access in document_external_accesses:\n                if callback.should_stop():\n                    raise RuntimeError(\n                        f\"Permission sync task timed out or stop signal detected: \"\n                        f\"cc_pair={cc_pair_id} \"\n                        f\"tasks_generated={tasks_generated}\"\n                    )\n\n                result = redis_connector.permissions.update_db(\n                    lock=lock,\n                    new_permissions=[doc_external_access],\n                    source_string=source_type,\n                    connector_id=cc_pair.connector.id,\n                    credential_id=cc_pair.credential.id,\n                    task_logger=task_logger,\n                )\n                tasks_generated += result.num_updated\n                docs_with_errors += result.num_errors\n\n            task_logger.info(\n                f\"RedisConnector.permissions.generate_tasks finished. \"\n                f\"cc_pair={cc_pair_id} tasks_generated={tasks_generated} docs_with_errors={docs_with_errors}\"\n            )\n\n            complete_doc_permission_sync_attempt(\n                db_session=db_session,\n                attempt_id=attempt_id,\n                total_docs_synced=tasks_generated,\n                docs_with_permission_errors=docs_with_errors,\n            )\n            task_logger.info(\n                f\"Completed doc permission sync attempt {attempt_id}: {tasks_generated} docs, {docs_with_errors} errors\"\n            )\n\n            redis_connector.permissions.generator_complete = tasks_generated\n\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n\n        task_logger.warning(\n            f\"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}\"\n        )\n        task_logger.exception(\n            f\"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}\"\n        )\n\n        with get_session_with_current_tenant() as db_session:\n            mark_doc_permission_sync_attempt_failed(\n                attempt_id, db_session, error_message=error_msg\n            )\n\n        redis_connector.permissions.generator_clear()\n        redis_connector.permissions.taskset_clear()\n        redis_connector.permissions.set_fence(None)\n        raise e\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"Permission sync finished: cc_pair={cc_pair_id} payload_id={payload.id}\"\n    )\n\n\n# NOTE(rkuo): this should probably move to the db layer\n@retry(\n    retry=retry_if_exception(is_retryable_sqlalchemy_error),\n    wait=wait_random_exponential(\n        multiplier=1, max=DOCUMENT_PERMISSIONS_UPDATE_MAX_WAIT\n    ),\n    stop=stop_after_delay(DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER),\n)\ndef element_update_permissions(\n    tenant_id: str,\n    permissions: ElementExternalAccess,\n    source_type_str: str,\n    connector_id: int,\n    credential_id: int,\n) -> bool:\n    \"\"\"Update permissions for a document or hierarchy node.\"\"\"\n    start = time.monotonic()\n    external_access = permissions.external_access\n\n    # Determine element type and identifier for logging\n    if isinstance(permissions, DocExternalAccess):\n        element_id = permissions.doc_id\n        element_type = \"doc\"\n    else:\n        element_id = permissions.raw_node_id\n        element_type = \"node\"\n\n    try:\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Add the users to the DB if they don't exist\n            batch_add_ext_perm_user_if_not_exists(\n                db_session=db_session,\n                emails=list(external_access.external_user_emails),\n                continue_on_error=True,\n            )\n\n            if isinstance(permissions, DocExternalAccess):\n                # Document permission update\n                created_new_doc = upsert_document_external_perms(\n                    db_session=db_session,\n                    doc_id=permissions.doc_id,\n                    external_access=external_access,\n                    source_type=DocumentSource(source_type_str),\n                )\n\n                if created_new_doc:\n                    # If a new document was created, we associate it with the cc_pair\n                    upsert_document_by_connector_credential_pair(\n                        db_session=db_session,\n                        connector_id=connector_id,\n                        credential_id=credential_id,\n                        document_ids=[permissions.doc_id],\n                    )\n            else:\n                # Hierarchy node permission update\n                db_update_hierarchy_node_permissions(\n                    db_session=db_session,\n                    raw_node_id=permissions.raw_node_id,\n                    source=DocumentSource(permissions.source),\n                    is_public=external_access.is_public,\n                    external_user_emails=(\n                        list(external_access.external_user_emails)\n                        if external_access.external_user_emails\n                        else None\n                    ),\n                    external_user_group_ids=(\n                        list(external_access.external_user_group_ids)\n                        if external_access.external_user_group_ids\n                        else None\n                    ),\n                )\n\n            elapsed = time.monotonic() - start\n            task_logger.info(\n                f\"{element_type}={element_id} action=update_permissions elapsed={elapsed:.2f}\"\n            )\n    except Exception as e:\n        task_logger.exception(\n            f\"element_update_permissions exceptioned: {element_type}={element_id}, {connector_id=} {credential_id=}\"\n        )\n        raise e\n    finally:\n        task_logger.info(\n            f\"element_update_permissions completed: {element_type}={element_id}, {connector_id=} {credential_id=}\"\n        )\n\n    return True\n\n\ndef validate_permission_sync_fences(\n    tenant_id: str,\n    r: Redis,\n    r_replica: Redis,\n    r_celery: Redis,\n    lock_beat: RedisLock,\n) -> None:\n    # building lookup table can be expensive, so we won't bother\n    # validating until the queue is small\n    PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN = 1024\n\n    queue_len = celery_get_queue_length(\n        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery\n    )\n    if queue_len > PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN:\n        return\n\n    queued_upsert_tasks = celery_get_queued_task_ids(\n        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery\n    )\n    reserved_generator_tasks = celery_get_unacked_task_ids(\n        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery\n    )\n\n    # validate all existing permission sync jobs\n    lock_beat.reacquire()\n    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n    for key in keys:\n        key_bytes = cast(bytes, key)\n        key_str = key_bytes.decode(\"utf-8\")\n        if not key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):\n            continue\n\n        validate_permission_sync_fence(\n            tenant_id,\n            key_bytes,\n            queued_upsert_tasks,\n            reserved_generator_tasks,\n            r,\n            r_celery,\n        )\n\n        lock_beat.reacquire()\n\n    return\n\n\ndef validate_permission_sync_fence(\n    tenant_id: str,\n    key_bytes: bytes,\n    queued_tasks: set[str],\n    reserved_tasks: set[str],\n    r: Redis,\n    r_celery: Redis,\n) -> None:\n    \"\"\"Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.\n    This can happen if the indexing worker hard crashes or is terminated.\n    Being in this bad state means the fence will never clear without help, so this function\n    gives the help.\n\n    How this works:\n    1. This function renews the active signal with a 5 minute TTL under the following conditions\n    1.2. When the task is seen in the redis queue\n    1.3. When the task is seen in the reserved / prefetched list\n\n    2. Externally, the active signal is renewed when:\n    2.1. The fence is created\n    2.2. The indexing watchdog checks the spawned task.\n\n    3. The TTL allows us to get through the transitions on fence startup\n    and when the task starts executing.\n\n    More TTL clarification: it is seemingly impossible to exactly query Celery for\n    whether a task is in the queue or currently executing.\n    1. An unknown task id is always returned as state PENDING.\n    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task\n    and the time it actually starts on the worker.\n\n    queued_tasks: the celery queue of lightweight permission sync tasks\n    reserved_tasks: prefetched tasks for sync task generator\n    \"\"\"\n    # if the fence doesn't exist, there's nothing to do\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(\n            f\"validate_permission_sync_fence - could not parse id from {fence_key}\"\n        )\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n    # parse out metadata and initialize the helper class with it\n    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))\n\n    # check to see if the fence/payload exists\n    if not redis_connector.permissions.fenced:\n        return\n\n    # in the cloud, the payload format may have changed ...\n    # it's a little sloppy, but just reset the fence for now if that happens\n    # TODO: add intentional cleanup/abort logic\n    try:\n        payload = redis_connector.permissions.payload\n    except ValidationError:\n        task_logger.exception(\n            \"validate_permission_sync_fence - \"\n            \"Resetting fence because fence schema is out of date: \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={fence_key}\"\n        )\n\n        redis_connector.permissions.reset()\n        return\n\n    if not payload:\n        return\n\n    if not payload.celery_task_id:\n        return\n\n    # OK, there's actually something for us to validate\n\n    # either the generator task must be in flight or its subtasks must be\n    found = celery_find_task(\n        payload.celery_task_id,\n        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,\n        r_celery,\n    )\n    if found:\n        # the celery task exists in the redis queue\n        redis_connector.permissions.set_active()\n        return\n\n    if payload.celery_task_id in reserved_tasks:\n        # the celery task was prefetched and is reserved within a worker\n        redis_connector.permissions.set_active()\n        return\n\n    # look up every task in the current taskset in the celery queue\n    # every entry in the taskset should have an associated entry in the celery task queue\n    # because we get the celery tasks first, the entries in our own permissions taskset\n    # should be roughly a subset of the tasks in celery\n\n    # this check isn't very exact, but should be sufficient over a period of time\n    # A single successful check over some number of attempts is sufficient.\n\n    # TODO: if the number of tasks in celery is much lower than than the taskset length\n    # we might be able to shortcut the lookup since by definition some of the tasks\n    # must not exist in celery.\n\n    tasks_scanned = 0\n    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad\n\n    for member in r.sscan_iter(redis_connector.permissions.taskset_key):\n        tasks_scanned += 1\n\n        member_bytes = cast(bytes, member)\n        member_str = member_bytes.decode(\"utf-8\")\n        if member_str in queued_tasks:\n            continue\n\n        if member_str in reserved_tasks:\n            continue\n\n        tasks_not_in_celery += 1\n\n    task_logger.info(\n        f\"validate_permission_sync_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}\"\n    )\n\n    # we're active if there are still tasks to run and those tasks all exist in celery\n    if tasks_scanned > 0 and tasks_not_in_celery == 0:\n        redis_connector.permissions.set_active()\n        return\n\n    # we may want to enable this check if using the active task list somehow isn't good enough\n    # if redis_connector_index.generator_locked():\n    #     logger.info(f\"{payload.celery_task_id} is currently executing.\")\n\n    # if we get here, we didn't find any direct indication that the associated celery tasks exist,\n    # but they still might be there due to gaps in our ability to check states during transitions\n    # Checking the active signal safeguards us against these transition periods\n    # (which has a duration that allows us to bridge those gaps)\n    if redis_connector.permissions.active():\n        return\n\n    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.\n    task_logger.warning(\n        \"validate_permission_sync_fence - \"\n        \"Resetting fence because no associated celery tasks were found: \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"fence={fence_key} \"\n        f\"payload_id={payload.id}\"\n    )\n\n    redis_connector.permissions.reset()\n    return\n\n\nclass PermissionSyncCallback(IndexingHeartbeatInterface):\n    PARENT_CHECK_INTERVAL = 60\n\n    def __init__(\n        self,\n        redis_connector: RedisConnector,\n        redis_lock: RedisLock,\n        redis_client: Redis,\n        timeout_seconds: int | None = None,\n    ):\n        super().__init__()\n        self.redis_connector: RedisConnector = redis_connector\n        self.redis_lock: RedisLock = redis_lock\n        self.redis_client = redis_client\n\n        self.started: datetime = datetime.now(timezone.utc)\n        self.redis_lock.reacquire()\n\n        self.last_tag: str = \"PermissionSyncCallback.__init__\"\n        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)\n        self.last_lock_monotonic = time.monotonic()\n        self.start_monotonic = time.monotonic()\n        self.timeout_seconds = timeout_seconds\n\n    def should_stop(self) -> bool:\n        if self.redis_connector.stop.fenced:\n            return True\n\n        # Check if the task has exceeded its timeout\n        # NOTE: Celery's soft_time_limit does not work with thread pools,\n        # so we must enforce timeouts internally.\n        if self.timeout_seconds is not None:\n            elapsed = time.monotonic() - self.start_monotonic\n            if elapsed > self.timeout_seconds:\n                logger.warning(\n                    f\"PermissionSyncCallback - task timeout exceeded: \"\n                    f\"elapsed={elapsed:.0f}s timeout={self.timeout_seconds}s \"\n                    f\"cc_pair={self.redis_connector.cc_pair_id}\"\n                )\n                return True\n\n        return False\n\n    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002\n        try:\n            self.redis_connector.permissions.set_active()\n\n            current_time = time.monotonic()\n            if current_time - self.last_lock_monotonic >= (\n                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                self.redis_lock.reacquire()\n                self.last_lock_reacquire = datetime.now(timezone.utc)\n                self.last_lock_monotonic = time.monotonic()\n\n            self.last_tag = tag\n        except LockError:\n            logger.exception(\n                f\"PermissionSyncCallback - lock.reacquire exceptioned: \"\n                f\"lock_timeout={self.redis_lock.timeout} \"\n                f\"start={self.started} \"\n                f\"last_tag={self.last_tag} \"\n                f\"last_reacquired={self.last_lock_reacquire} \"\n                f\"now={datetime.now(timezone.utc)}\"\n            )\n\n            redis_lock_dump(self.redis_lock, self.redis_client)\n            raise\n\n\n\"\"\"Monitoring CCPair permissions utils\"\"\"\n\n\ndef monitor_ccpair_permissions_taskset(\n    tenant_id: str,\n    key_bytes: bytes,\n    r: Redis,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(\n            f\"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}\"\n        )\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if not redis_connector.permissions.fenced:\n        return\n\n    initial = redis_connector.permissions.generator_complete\n    if initial is None:\n        return\n\n    try:\n        payload = redis_connector.permissions.payload\n    except ValidationError:\n        task_logger.exception(\n            \"Permissions sync payload failed to validate. Schema may have been updated.\"\n        )\n        return\n\n    if not payload:\n        return\n\n    remaining = redis_connector.permissions.get_remaining()\n    task_logger.info(\n        f\"Permissions sync progress: cc_pair={cc_pair_id} id={payload.id} remaining={remaining} initial={initial}\"\n    )\n\n    # Add telemetry for permission syncing progress\n    optional_telemetry(\n        record_type=RecordType.PERMISSION_SYNC_PROGRESS,\n        data={\n            \"cc_pair_id\": cc_pair_id,\n            \"total_docs_synced\": initial if initial is not None else 0,\n            \"remaining_docs_to_sync\": remaining,\n        },\n        tenant_id=tenant_id,\n    )\n\n    if remaining > 0:\n        return\n\n    mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), payload.started)\n    task_logger.info(\n        f\"Permissions sync finished: cc_pair={cc_pair_id} id={payload.id} num_synced={initial}\"\n    )\n\n    # Add telemetry for permission syncing complete\n    optional_telemetry(\n        record_type=RecordType.PERMISSION_SYNC_COMPLETE,\n        data={\"cc_pair_id\": cc_pair_id},\n        tenant_id=tenant_id,\n    )\n\n    update_sync_record_status(\n        db_session=db_session,\n        entity_id=cc_pair_id,\n        sync_type=SyncType.EXTERNAL_PERMISSIONS,\n        sync_status=SyncStatus.SUCCESS,\n        num_docs_synced=initial,\n    )\n\n    redis_connector.permissions.reset()\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/external_group_syncing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/external_group_syncing/group_sync_utils.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom ee.onyx.external_permissions.sync_params import (\n    source_group_sync_is_cc_pair_agnostic,\n)\nfrom onyx.db.connector import mark_cc_pair_as_external_group_synced\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs_for_source\nfrom onyx.db.models import ConnectorCredentialPair\n\n\ndef _get_all_cc_pair_ids_to_mark_as_group_synced(\n    db_session: Session, cc_pair: ConnectorCredentialPair\n) -> list[int]:\n    if not source_group_sync_is_cc_pair_agnostic(cc_pair.connector.source):\n        return [cc_pair.id]\n\n    cc_pairs = get_connector_credential_pairs_for_source(\n        db_session, cc_pair.connector.source\n    )\n    return [cc_pair.id for cc_pair in cc_pairs]\n\n\ndef mark_all_relevant_cc_pairs_as_external_group_synced(\n    db_session: Session, cc_pair: ConnectorCredentialPair\n) -> None:\n    \"\"\"For some source types, one successful group sync run should count for all\n    cc pairs of that type. This function handles that case.\"\"\"\n    cc_pair_ids = _get_all_cc_pair_ids_to_mark_as_group_synced(db_session, cc_pair)\n    for cc_pair_id in cc_pair_ids:\n        mark_cc_pair_as_external_group_synced(db_session, cc_pair_id)\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom uuid import uuid4\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom pydantic import ValidationError\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\n\nfrom ee.onyx.background.celery.tasks.external_group_syncing.group_sync_utils import (\n    mark_all_relevant_cc_pairs_as_external_group_synced,\n)\nfrom ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs\nfrom ee.onyx.db.connector_credential_pair import get_cc_pairs_by_source\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.db.external_perm import mark_old_external_groups_as_stale\nfrom ee.onyx.db.external_perm import remove_stale_external_groups\nfrom ee.onyx.db.external_perm import upsert_external_groups\nfrom ee.onyx.external_permissions.sync_params import (\n    get_all_cc_pair_agnostic_group_sync_sources,\n)\nfrom ee.onyx.external_permissions.sync_params import get_source_perm_sync_config\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_find_task\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.background.error_logging import emit_background_error\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import OnyxRedisSignals\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.permission_sync_attempt import complete_external_group_sync_attempt\nfrom onyx.db.permission_sync_attempt import (\n    create_external_group_sync_attempt,\n)\nfrom onyx.db.permission_sync_attempt import (\n    mark_external_group_sync_attempt_failed,\n)\nfrom onyx.db.permission_sync_attempt import (\n    mark_external_group_sync_attempt_in_progress,\n)\nfrom onyx.db.sync_record import insert_sync_record\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync\nfrom onyx.redis.redis_connector_ext_group_sync import (\n    RedisConnectorExternalGroupSyncPayload,\n)\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.server.utils import make_short_id\nfrom onyx.utils.logger import format_error_for_logging\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\n_EXTERNAL_GROUP_BATCH_SIZE = 100\n\n\ndef _fail_external_group_sync_attempt(attempt_id: int, error_msg: str) -> None:\n    \"\"\"Helper to mark an external group sync attempt as failed with an error message.\"\"\"\n    with get_session_with_current_tenant() as db_session:\n        mark_external_group_sync_attempt_failed(\n            attempt_id, db_session, error_message=error_msg\n        )\n\n\ndef _get_fence_validation_block_expiration() -> int:\n    \"\"\"\n    Compute the expiration time for the fence validation block signal.\n    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.\n    \"\"\"\n    base_expiration = 300  # seconds\n\n    if not MULTI_TENANT:\n        return base_expiration\n\n    try:\n        beat_multiplier = OnyxRuntime.get_beat_multiplier()\n    except Exception:\n        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n    return int(base_expiration * beat_multiplier)\n\n\ndef _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:\n    \"\"\"Returns boolean indicating if external group sync is due.\"\"\"\n\n    if cc_pair.access_type != AccessType.SYNC:\n        task_logger.error(\n            f\"Received non-sync CC Pair {cc_pair.id} for external group sync. Actual access type: {cc_pair.access_type}\"\n        )\n        return False\n\n    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n        task_logger.debug(\n            f\"Skipping group sync for CC Pair {cc_pair.id} - CC Pair is being deleted\"\n        )\n        return False\n\n    sync_config = get_source_perm_sync_config(cc_pair.connector.source)\n    if sync_config is None:\n        task_logger.debug(\n            f\"Skipping group sync for CC Pair {cc_pair.id} - no sync config found for {cc_pair.connector.source}\"\n        )\n        return False\n\n    # If there is not group sync function for the connector, we don't run the sync\n    # This is fine because all sources dont necessarily have a concept of groups\n    if sync_config.group_sync_config is None:\n        task_logger.debug(\n            f\"Skipping group sync for CC Pair {cc_pair.id} - no group sync config found for {cc_pair.connector.source}\"\n        )\n        return False\n\n    # If the last sync is None, it has never been run so we run the sync\n    last_ext_group_sync = cc_pair.last_time_external_group_sync\n    if last_ext_group_sync is None:\n        return True\n\n    source_sync_period = sync_config.group_sync_config.group_sync_frequency\n\n    # If the last sync is greater than the full fetch period, we run the sync\n    next_sync = last_ext_group_sync + timedelta(seconds=source_sync_period)\n    if datetime.now(timezone.utc) >= next_sync:\n        return True\n\n    return False\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n)\ndef check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:\n    # we need to use celery's redis client to access its redis data\n    # (which lives on a different db number)\n    r = get_redis_client()\n    r_replica = get_redis_replica_client()\n\n    lock_beat: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        task_logger.warning(\n            f\"Failed to acquire beat lock for external group sync: {tenant_id}\"\n        )\n        return None\n\n    try:\n        cc_pair_ids_to_sync: list[int] = []\n        with get_session_with_current_tenant() as db_session:\n            cc_pairs = get_all_auto_sync_cc_pairs(db_session)\n\n            # For some sources, we only want to sync one cc_pair per source type\n            for source in get_all_cc_pair_agnostic_group_sync_sources():\n                # These are ordered by cc_pair id so the first one is the one we want\n                cc_pairs_to_dedupe = get_cc_pairs_by_source(\n                    db_session,\n                    source,\n                    access_type=AccessType.SYNC,\n                    status=ConnectorCredentialPairStatus.ACTIVE,\n                )\n                # dedupe cc_pairs to only keep the first one\n                for cc_pair_to_remove in cc_pairs_to_dedupe[1:]:\n                    cc_pairs = [\n                        cc_pair\n                        for cc_pair in cc_pairs\n                        if cc_pair.id != cc_pair_to_remove.id\n                    ]\n\n            for cc_pair in cc_pairs:\n                if _is_external_group_sync_due(cc_pair):\n                    cc_pair_ids_to_sync.append(cc_pair.id)\n\n        lock_beat.reacquire()\n        for cc_pair_id in cc_pair_ids_to_sync:\n            payload_id = try_creating_external_group_sync_task(\n                self.app, cc_pair_id, r, tenant_id\n            )\n            if not payload_id:\n                continue\n\n            task_logger.info(\n                f\"External group sync queued: cc_pair={cc_pair_id} id={payload_id}\"\n            )\n\n        # we want to run this less frequently than the overall task\n        lock_beat.reacquire()\n        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES):\n            # clear fences that don't have associated celery tasks in progress\n            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),\n            # or be currently executing\n            try:\n                r_celery = celery_get_broker_client(self.app)\n                validate_external_group_sync_fences(\n                    tenant_id, self.app, r, r_replica, r_celery, lock_beat\n                )\n            except Exception:\n                task_logger.exception(\n                    \"Exception while validating external group sync fences\"\n                )\n\n            r.set(\n                OnyxRedisSignals.BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES,\n                1,\n                ex=_get_fence_validation_block_expiration(),\n            )\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}\"\n        )\n        task_logger.exception(f\"Unexpected exception: tenant={tenant_id}\")\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n\n    task_logger.info(f\"check_for_external_group_sync finished: tenant={tenant_id}\")\n    return True\n\n\ndef try_creating_external_group_sync_task(\n    app: Celery,\n    cc_pair_id: int,\n    r: Redis,  # noqa: ARG001\n    tenant_id: str,\n) -> str | None:\n    \"\"\"Returns an int if syncing is needed. The int represents the number of sync tasks generated.\n    Returns None if no syncing is required.\"\"\"\n    payload_id: str | None = None\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    try:\n        # Dont kick off a new sync if the previous one is still running\n        if redis_connector.external_group_sync.fenced:\n            logger.warning(\n                f\"Skipping external group sync for CC Pair {cc_pair_id} - already running.\"\n            )\n            return None\n\n        redis_connector.external_group_sync.generator_clear()\n        redis_connector.external_group_sync.taskset_clear()\n\n        # create before setting fence to avoid race condition where the monitoring\n        # task updates the sync record before it is created\n        try:\n            with get_session_with_current_tenant() as db_session:\n                insert_sync_record(\n                    db_session=db_session,\n                    entity_id=cc_pair_id,\n                    sync_type=SyncType.EXTERNAL_GROUP,\n                )\n        except Exception:\n            task_logger.exception(\"insert_sync_record exceptioned.\")\n\n        # Signal active before creating fence\n        redis_connector.external_group_sync.set_active()\n\n        payload = RedisConnectorExternalGroupSyncPayload(\n            id=make_short_id(),\n            submitted=datetime.now(timezone.utc),\n            started=None,\n            celery_task_id=None,\n        )\n        redis_connector.external_group_sync.set_fence(payload)\n\n        custom_task_id = f\"{redis_connector.external_group_sync.taskset_key}_{uuid4()}\"\n\n        result = app.send_task(\n            OnyxCeleryTask.CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK,\n            kwargs=dict(\n                cc_pair_id=cc_pair_id,\n                tenant_id=tenant_id,\n            ),\n            queue=OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,\n            task_id=custom_task_id,\n            priority=OnyxCeleryPriority.MEDIUM,\n        )\n\n        payload.celery_task_id = result.id\n        redis_connector.external_group_sync.set_fence(payload)\n\n        payload_id = payload.id\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}\"\n        )\n        task_logger.exception(\n            f\"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}\"\n        )\n        return None\n\n    task_logger.info(\n        f\"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}\"\n    )\n    return payload_id\n\n\n@shared_task(\n    name=OnyxCeleryTask.CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK,\n    acks_late=False,\n    soft_time_limit=JOB_TIMEOUT,\n    track_started=True,\n    trail=False,\n    bind=True,\n)\ndef connector_external_group_sync_generator_task(\n    self: Task,  # noqa: ARG001\n    cc_pair_id: int,\n    tenant_id: str,\n) -> None:\n    \"\"\"\n    External group sync task for a given connector credential pair\n    This task assumes that the task has already been properly fenced\n    \"\"\"\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    r = get_redis_client()\n\n    # this wait is needed to avoid a race condition where\n    # the primary worker sends the task and it is immediately executed\n    # before the primary worker can finalize the fence\n    start = time.monotonic()\n    while True:\n        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:\n            msg = (\n                f\"connector_external_group_sync_generator_task - timed out waiting for fence to be ready: \"\n                f\"fence={redis_connector.external_group_sync.fence_key}\"\n            )\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            raise ValueError(msg)\n\n        if not redis_connector.external_group_sync.fenced:  # The fence must exist\n            msg = (\n                f\"connector_external_group_sync_generator_task - fence not found: \"\n                f\"fence={redis_connector.external_group_sync.fence_key}\"\n            )\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            raise ValueError(msg)\n\n        payload = redis_connector.external_group_sync.payload  # The payload must exist\n        if not payload:\n            msg = \"connector_external_group_sync_generator_task: payload invalid or not found\"\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            raise ValueError(msg)\n\n        if payload.celery_task_id is None:\n            logger.info(\n                f\"connector_external_group_sync_generator_task - Waiting for fence: \"\n                f\"fence={redis_connector.external_group_sync.fence_key}\"\n            )\n            time.sleep(1)\n            continue\n\n        logger.info(\n            f\"connector_external_group_sync_generator_task - Fence found, continuing...: \"\n            f\"fence={redis_connector.external_group_sync.fence_key} \"\n            f\"payload_id={payload.id}\"\n        )\n        break\n\n    lock: RedisLock = r.lock(\n        OnyxRedisLocks.CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX\n        + f\"_{redis_connector.cc_pair_id}\",\n        timeout=CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT,\n    )\n\n    acquired = lock.acquire(blocking=False)\n    if not acquired:\n        msg = f\"External group sync task already running, exiting...: cc_pair={cc_pair_id}\"\n        emit_background_error(msg, cc_pair_id=cc_pair_id)\n        task_logger.error(msg)\n        return None\n\n    try:\n        payload.started = datetime.now(timezone.utc)\n        redis_connector.external_group_sync.set_fence(payload)\n\n        _perform_external_group_sync(\n            cc_pair_id=cc_pair_id,\n            tenant_id=tenant_id,\n        )\n\n        with get_session_with_current_tenant() as db_session:\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.EXTERNAL_GROUP,\n                sync_status=SyncStatus.SUCCESS,\n            )\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}\"\n        )\n        task_logger.exception(\n            f\"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}\"\n        )\n\n        msg = f\"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}\"\n        task_logger.exception(msg)\n        emit_background_error(msg + f\"\\n\\n{e}\", cc_pair_id=cc_pair_id)\n\n        with get_session_with_current_tenant() as db_session:\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.EXTERNAL_GROUP,\n                sync_status=SyncStatus.FAILED,\n            )\n\n        redis_connector.external_group_sync.generator_clear()\n        redis_connector.external_group_sync.taskset_clear()\n        raise e\n    finally:\n        # we always want to clear the fence after the task is done or failed so it doesn't get stuck\n        redis_connector.external_group_sync.set_fence(None)\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"External group sync finished: cc_pair={cc_pair_id} payload_id={payload.id}\"\n    )\n\n\ndef _perform_external_group_sync(\n    cc_pair_id: int,\n    tenant_id: str,\n    timeout_seconds: int = JOB_TIMEOUT,\n) -> None:\n    # Create attempt record at the start\n    with get_session_with_current_tenant() as db_session:\n        attempt_id = create_external_group_sync_attempt(\n            connector_credential_pair_id=cc_pair_id,\n            db_session=db_session,\n        )\n        logger.info(\n            f\"Created external group sync attempt: {attempt_id} for cc_pair={cc_pair_id}\"\n        )\n\n    with get_session_with_current_tenant() as db_session:\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=cc_pair_id,\n            eager_load_credential=True,\n        )\n        if cc_pair is None:\n            raise ValueError(f\"No connector credential pair found for id: {cc_pair_id}\")\n\n        source_type = cc_pair.connector.source\n        sync_config = get_source_perm_sync_config(source_type)\n        if sync_config is None:\n            msg = f\"No sync config found for {source_type} for cc_pair: {cc_pair_id}\"\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            _fail_external_group_sync_attempt(attempt_id, msg)\n            raise ValueError(msg)\n\n        if sync_config.group_sync_config is None:\n            msg = f\"No group sync config found for {source_type} for cc_pair: {cc_pair_id}\"\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            _fail_external_group_sync_attempt(attempt_id, msg)\n            raise ValueError(msg)\n\n        ext_group_sync_func = sync_config.group_sync_config.group_sync_func\n\n        logger.info(\n            f\"Marking old external groups as stale for {source_type} for cc_pair: {cc_pair_id}\"\n        )\n        mark_old_external_groups_as_stale(db_session, cc_pair_id)\n\n        # Mark attempt as in progress\n        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n        logger.info(f\"Marked external group sync attempt {attempt_id} as in progress\")\n\n        logger.info(\n            f\"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}\"\n        )\n        external_user_group_batch: list[ExternalUserGroup] = []\n        seen_users: set[str] = set()  # Track unique users across all groups\n        total_groups_processed = 0\n        total_group_memberships_synced = 0\n        start_time = time.monotonic()\n        try:\n            external_user_group_generator = ext_group_sync_func(tenant_id, cc_pair)\n            for external_user_group in external_user_group_generator:\n                # Check if the task has exceeded its timeout\n                # NOTE: Celery's soft_time_limit does not work with thread pools,\n                # so we must enforce timeouts internally.\n                elapsed = time.monotonic() - start_time\n                if elapsed > timeout_seconds:\n                    raise RuntimeError(\n                        f\"External group sync task timed out: \"\n                        f\"cc_pair={cc_pair_id} \"\n                        f\"elapsed={elapsed:.0f}s \"\n                        f\"timeout={timeout_seconds}s \"\n                        f\"groups_processed={total_groups_processed}\"\n                    )\n\n                external_user_group_batch.append(external_user_group)\n\n                # Track progress\n                total_groups_processed += 1\n                total_group_memberships_synced += len(external_user_group.user_emails)\n                seen_users = seen_users.union(external_user_group.user_emails)\n\n                if len(external_user_group_batch) >= _EXTERNAL_GROUP_BATCH_SIZE:\n                    logger.debug(\n                        f\"New external user groups: {external_user_group_batch}\"\n                    )\n                    upsert_external_groups(\n                        db_session=db_session,\n                        cc_pair_id=cc_pair_id,\n                        external_groups=external_user_group_batch,\n                        source=cc_pair.connector.source,\n                    )\n                    external_user_group_batch = []\n\n            if external_user_group_batch:\n                logger.debug(f\"New external user groups: {external_user_group_batch}\")\n                upsert_external_groups(\n                    db_session=db_session,\n                    cc_pair_id=cc_pair_id,\n                    external_groups=external_user_group_batch,\n                    source=cc_pair.connector.source,\n                )\n        except Exception as e:\n            format_error_for_logging(e)\n\n            # Mark as failed (this also updates progress to show partial progress)\n            mark_external_group_sync_attempt_failed(\n                attempt_id, db_session, error_message=str(e)\n            )\n\n            # TODO: add some notification to the admins here\n            logger.exception(\n                f\"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}\"\n            )\n            raise e\n\n        logger.info(\n            f\"Removing stale external groups for {source_type} for cc_pair: {cc_pair_id}\"\n        )\n        remove_stale_external_groups(db_session, cc_pair_id)\n\n        # Calculate total unique users processed\n        total_users_processed = len(seen_users)\n\n        # Complete the sync attempt with final progress\n        complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=total_users_processed,\n            total_groups_processed=total_groups_processed,\n            total_group_memberships_synced=total_group_memberships_synced,\n            errors_encountered=0,\n        )\n        logger.info(\n            f\"Completed external group sync attempt {attempt_id}: \"\n            f\"{total_groups_processed} groups, {total_users_processed} users, \"\n            f\"{total_group_memberships_synced} memberships\"\n        )\n\n        mark_all_relevant_cc_pairs_as_external_group_synced(db_session, cc_pair)\n\n\ndef validate_external_group_sync_fences(\n    tenant_id: str,\n    celery_app: Celery,  # noqa: ARG001\n    r: Redis,  # noqa: ARG001\n    r_replica: Redis,\n    r_celery: Redis,\n    lock_beat: RedisLock,\n) -> None:\n    reserved_tasks = celery_get_unacked_task_ids(\n        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery\n    )\n\n    # validate all existing external group sync tasks\n    lock_beat.reacquire()\n    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n    for key in keys:\n        key_bytes = cast(bytes, key)\n        key_str = key_bytes.decode(\"utf-8\")\n        if not key_str.startswith(RedisConnectorExternalGroupSync.FENCE_PREFIX):\n            continue\n\n        validate_external_group_sync_fence(\n            tenant_id,\n            key_bytes,\n            reserved_tasks,\n            r_celery,\n        )\n\n        lock_beat.reacquire()\n    return\n\n\ndef validate_external_group_sync_fence(\n    tenant_id: str,\n    key_bytes: bytes,\n    reserved_tasks: set[str],\n    r_celery: Redis,\n) -> None:\n    \"\"\"Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.\n    This can happen if the indexing worker hard crashes or is terminated.\n    Being in this bad state means the fence will never clear without help, so this function\n    gives the help.\n\n    How this works:\n    1. This function renews the active signal with a 5 minute TTL under the following conditions\n    1.2. When the task is seen in the redis queue\n    1.3. When the task is seen in the reserved / prefetched list\n\n    2. Externally, the active signal is renewed when:\n    2.1. The fence is created\n    2.2. The indexing watchdog checks the spawned task.\n\n    3. The TTL allows us to get through the transitions on fence startup\n    and when the task starts executing.\n\n    More TTL clarification: it is seemingly impossible to exactly query Celery for\n    whether a task is in the queue or currently executing.\n    1. An unknown task id is always returned as state PENDING.\n    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task\n    and the time it actually starts on the worker.\n    \"\"\"\n    # if the fence doesn't exist, there's nothing to do\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        msg = (\n            f\"validate_external_group_sync_fence - could not parse id from {fence_key}\"\n        )\n        emit_background_error(msg)\n        task_logger.error(msg)\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n\n    # parse out metadata and initialize the helper class with it\n    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))\n\n    # check to see if the fence/payload exists\n    if not redis_connector.external_group_sync.fenced:\n        return\n\n    try:\n        payload = redis_connector.external_group_sync.payload\n    except ValidationError:\n        msg = (\n            \"validate_external_group_sync_fence - \"\n            \"Resetting fence because fence schema is out of date: \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={fence_key}\"\n        )\n        task_logger.exception(msg)\n        emit_background_error(msg, cc_pair_id=cc_pair_id)\n\n        redis_connector.external_group_sync.reset()\n        return\n\n    if not payload:\n        return\n\n    if not payload.celery_task_id:\n        return\n\n    # OK, there's actually something for us to validate\n    found = celery_find_task(\n        payload.celery_task_id, OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery\n    )\n    if found:\n        # the celery task exists in the redis queue\n        # redis_connector_index.set_active()\n        return\n\n    if payload.celery_task_id in reserved_tasks:\n        # the celery task was prefetched and is reserved within the indexing worker\n        # redis_connector_index.set_active()\n        return\n\n    # we may want to enable this check if using the active task list somehow isn't good enough\n    # if redis_connector_index.generator_locked():\n    #     logger.info(f\"{payload.celery_task_id} is currently executing.\")\n\n    # if we get here, we didn't find any direct indication that the associated celery tasks exist,\n    # but they still might be there due to gaps in our ability to check states during transitions\n    # Checking the active signal safeguards us against these transition periods\n    # (which has a duration that allows us to bridge those gaps)\n    # if redis_connector_index.active():\n    # return\n\n    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.\n    emit_background_error(\n        message=(\n            \"validate_external_group_sync_fence - \"\n            \"Resetting fence because no associated celery tasks were found: \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={fence_key} \"\n            f\"payload_id={payload.id}\"\n        ),\n        cc_pair_id=cc_pair_id,\n    )\n\n    redis_connector.external_group_sync.reset()\n    return\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/hooks/tasks.py",
    "content": "from celery import shared_task\n\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.hook import cleanup_old_execution_logs__no_commit\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_HOOK_EXECUTION_LOG_RETENTION_DAYS: int = 30\n\n\n@shared_task(\n    name=OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    trail=False,\n)\ndef hook_execution_log_cleanup_task(*, tenant_id: str) -> None:  # noqa: ARG001\n    try:\n        with get_session_with_current_tenant() as db_session:\n            deleted: int = cleanup_old_execution_logs__no_commit(\n                db_session=db_session,\n                max_age_days=_HOOK_EXECUTION_LOG_RETENTION_DAYS,\n            )\n            db_session.commit()\n            if deleted:\n                logger.info(\n                    f\"Deleted {deleted} hook execution log(s) older than \"\n                    f\"{_HOOK_EXECUTION_LOG_RETENTION_DAYS} days.\"\n                )\n    except Exception:\n        logger.exception(\"Failed to clean up hook execution logs\")\n        raise\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/query_history/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/query_history/tasks.py",
    "content": "import csv\nimport io\nfrom datetime import datetime\n\nfrom celery import shared_task\nfrom celery import Task\n\nfrom ee.onyx.server.query_history.api import fetch_and_process_chat_session_history\nfrom ee.onyx.server.query_history.api import ONYX_ANONYMIZED_EMAIL\nfrom ee.onyx.server.query_history.models import QuestionAnswerPairSnapshot\nfrom onyx.background.task_utils import construct_query_history_report_name\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import FileType\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import QueryHistoryType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.tasks import delete_task_with_id\nfrom onyx.db.tasks import mark_task_as_finished_with_id\nfrom onyx.db.tasks import mark_task_as_started_with_id\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n@shared_task(\n    name=OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n    trail=False,\n)\ndef export_query_history_task(\n    self: Task,\n    *,\n    start: datetime,\n    end: datetime,\n    start_time: datetime,\n    # Need to include the tenant_id since the TenantAwareTask needs this\n    tenant_id: str,  # noqa: ARG001\n) -> None:\n    if not self.request.id:\n        raise RuntimeError(\"No task id defined for this task; cannot identify it\")\n\n    task_id = self.request.id\n    stream = io.StringIO()\n    writer = csv.DictWriter(\n        stream,\n        fieldnames=list(QuestionAnswerPairSnapshot.model_fields.keys()),\n    )\n    writer.writeheader()\n\n    with get_session_with_current_tenant() as db_session:\n        try:\n            mark_task_as_started_with_id(\n                db_session=db_session,\n                task_id=task_id,\n            )\n\n            snapshot_generator = fetch_and_process_chat_session_history(\n                db_session=db_session,\n                start=start,\n                end=end,\n            )\n\n            for snapshot in snapshot_generator:\n                if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:\n                    snapshot.user_email = ONYX_ANONYMIZED_EMAIL\n\n                writer.writerows(\n                    qa_pair.to_json()\n                    for qa_pair in QuestionAnswerPairSnapshot.from_chat_session_snapshot(\n                        snapshot\n                    )\n                )\n\n        except Exception:\n            logger.exception(f\"Failed to export query history with {task_id=}\")\n            mark_task_as_finished_with_id(\n                db_session=db_session,\n                task_id=task_id,\n                success=False,\n            )\n            raise\n\n    report_name = construct_query_history_report_name(task_id)\n    with get_session_with_current_tenant() as db_session:\n        try:\n            stream.seek(0)\n            get_default_file_store().save_file(\n                content=stream,\n                display_name=report_name,\n                file_origin=FileOrigin.QUERY_HISTORY_CSV,\n                file_type=FileType.CSV,\n                file_metadata={\n                    \"start\": start.isoformat(),\n                    \"end\": end.isoformat(),\n                    \"start_time\": start_time.isoformat(),\n                },\n                file_id=report_name,\n            )\n\n            delete_task_with_id(\n                db_session=db_session,\n                task_id=task_id,\n            )\n        except Exception:\n            logger.exception(\n                f\"Failed to save query history export file; {report_name=}\"\n            )\n            mark_task_as_finished_with_id(\n                db_session=db_session,\n                task_id=task_id,\n                success=False,\n            )\n            raise\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/tenant_provisioning/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py",
    "content": "\"\"\"\nPeriodic tasks for tenant pre-provisioning.\n\"\"\"\n\nimport asyncio\nimport datetime\nimport uuid\n\nfrom celery import shared_task\nfrom celery import Task\nfrom redis.lock import Lock as RedisLock\n\nfrom ee.onyx.server.tenants.provisioning import setup_tenant\nfrom ee.onyx.server.tenants.schema_management import create_schema_if_not_exists\nfrom ee.onyx.server.tenants.schema_management import get_current_alembic_version\nfrom ee.onyx.server.tenants.schema_management import run_alembic_migrations\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS\nfrom onyx.configs.constants import ONYX_CLOUD_TENANT_ID\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.models import AvailableTenant\nfrom onyx.redis.redis_pool import get_redis_client\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\n# Maximum tenants to provision in a single task run.\n# Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.\n_MAX_TENANTS_PER_RUN = 5\n\n# Time limits sized for worst-case: provisioning up to _MAX_TENANTS_PER_RUN new tenants\n# (~90s each) plus migrating up to TARGET_AVAILABLE_TENANTS pool tenants (~90s each).\n_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 20  # 20 minutes\n_TENANT_PROVISIONING_TIME_LIMIT = 60 * 25  # 25 minutes\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLOUD_CHECK_AVAILABLE_TENANTS,\n    queue=OnyxCeleryQueues.MONITORING,\n    ignore_result=True,\n    soft_time_limit=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,\n    time_limit=_TENANT_PROVISIONING_TIME_LIMIT,\n    trail=False,\n    bind=True,\n)\ndef check_available_tenants(self: Task) -> None:  # noqa: ARG001\n    \"\"\"\n    Check if we have enough pre-provisioned tenants available.\n    If not, trigger the pre-provisioning of new tenants.\n    \"\"\"\n    task_logger.info(\"STARTING CHECK_AVAILABLE_TENANTS\")\n    if not MULTI_TENANT:\n        task_logger.info(\n            \"Multi-tenancy is not enabled, skipping tenant pre-provisioning\"\n        )\n        return\n\n    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n    lock_check: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_AVAILABLE_TENANTS_LOCK,\n        timeout=_TENANT_PROVISIONING_TIME_LIMIT,\n    )\n\n    # These tasks should never overlap\n    if not lock_check.acquire(blocking=False):\n        task_logger.info(\n            \"Skipping check_available_tenants task because it is already running\"\n        )\n        return\n\n    try:\n        # Get the current count of available tenants\n        with get_session_with_shared_schema() as db_session:\n            num_available_tenants = db_session.query(AvailableTenant).count()\n\n        # Get the target number of available tenants\n        num_minimum_available_tenants = TARGET_AVAILABLE_TENANTS\n\n        # Calculate how many new tenants we need to provision\n        if num_available_tenants < num_minimum_available_tenants:\n            tenants_to_provision = num_minimum_available_tenants - num_available_tenants\n        else:\n            tenants_to_provision = 0\n\n        task_logger.info(\n            f\"Available tenants: {num_available_tenants}, \"\n            f\"Target minimum available tenants: {num_minimum_available_tenants}, \"\n            f\"To provision: {tenants_to_provision}\"\n        )\n\n        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)\n        if batch_size < tenants_to_provision:\n            task_logger.info(\n                f\"Capping batch to {batch_size} (need {tenants_to_provision}, will catch up next cycle)\"\n            )\n\n        provisioned = 0\n        for i in range(batch_size):\n            task_logger.info(f\"Provisioning tenant {i + 1}/{batch_size}\")\n            try:\n                if pre_provision_tenant():\n                    provisioned += 1\n            except Exception:\n                task_logger.exception(\n                    f\"Failed to provision tenant {i + 1}/{batch_size}, continuing with remaining tenants\"\n                )\n\n        task_logger.info(f\"Provisioning complete: {provisioned}/{batch_size} succeeded\")\n\n        # Migrate any pool tenants that were provisioned before a new migration was deployed\n        _migrate_stale_pool_tenants()\n\n    except Exception:\n        task_logger.exception(\"Error in check_available_tenants task\")\n\n    finally:\n        try:\n            lock_check.release()\n        except Exception:\n            task_logger.warning(\n                \"Could not release check lock (likely expired), continuing\"\n            )\n\n\ndef _migrate_stale_pool_tenants() -> None:\n    \"\"\"\n    Run alembic upgrade head on all pool tenants. Since alembic upgrade head is\n    idempotent, tenants already at head are a fast no-op. This ensures pool\n    tenants are always current so that signup doesn't hit schema mismatches\n    (e.g. missing columns added after the tenant was pre-provisioned).\n    \"\"\"\n    with get_session_with_shared_schema() as db_session:\n        pool_tenants = db_session.query(AvailableTenant).all()\n        tenant_ids = [t.tenant_id for t in pool_tenants]\n\n    if not tenant_ids:\n        return\n\n    task_logger.info(\n        f\"Checking {len(tenant_ids)} pool tenant(s) for pending migrations\"\n    )\n\n    for tenant_id in tenant_ids:\n        try:\n            run_alembic_migrations(tenant_id)\n            new_version = get_current_alembic_version(tenant_id)\n            with get_session_with_shared_schema() as db_session:\n                tenant = (\n                    db_session.query(AvailableTenant)\n                    .filter_by(tenant_id=tenant_id)\n                    .first()\n                )\n                if tenant and tenant.alembic_version != new_version:\n                    task_logger.info(\n                        f\"Migrated pool tenant {tenant_id}: {tenant.alembic_version} -> {new_version}\"\n                    )\n                    tenant.alembic_version = new_version\n                    db_session.commit()\n        except Exception:\n            task_logger.exception(\n                f\"Failed to migrate pool tenant {tenant_id}, skipping\"\n            )\n\n\ndef pre_provision_tenant() -> bool:\n    \"\"\"\n    Pre-provision a new tenant and store it in the NewAvailableTenant table.\n    This function fully sets up the tenant with all necessary configurations,\n    so it's ready to be assigned to a user immediately.\n\n    Returns True if a tenant was successfully provisioned, False otherwise.\n    \"\"\"\n    # The MULTI_TENANT check is now done at the caller level (check_available_tenants)\n    # rather than inside this function\n\n    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n    lock_provision: RedisLock = r.lock(\n        OnyxRedisLocks.CLOUD_PRE_PROVISION_TENANT_LOCK,\n        timeout=_TENANT_PROVISIONING_TIME_LIMIT,\n    )\n\n    # Allow multiple pre-provisioning tasks to run, but ensure they don't overlap\n    if not lock_provision.acquire(blocking=False):\n        task_logger.warning(\n            \"Skipping pre_provision_tenant — could not acquire provision lock\"\n        )\n        return False\n\n    tenant_id: str | None = None\n    try:\n        # Generate a new tenant ID\n        tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())\n        task_logger.info(f\"Pre-provisioning tenant: {tenant_id}\")\n\n        # Create the schema for the new tenant\n        schema_created = create_schema_if_not_exists(tenant_id)\n        if schema_created:\n            task_logger.debug(f\"Created schema for tenant: {tenant_id}\")\n        else:\n            task_logger.debug(f\"Schema already exists for tenant: {tenant_id}\")\n\n        # Set up the tenant with all necessary configurations\n        task_logger.debug(f\"Setting up tenant configuration: {tenant_id}\")\n        asyncio.run(setup_tenant(tenant_id))\n        task_logger.debug(f\"Tenant configuration completed: {tenant_id}\")\n\n        # Get the current Alembic version\n        alembic_version = get_current_alembic_version(tenant_id)\n        task_logger.debug(\n            f\"Tenant {tenant_id} using Alembic version: {alembic_version}\"\n        )\n\n        # Store the pre-provisioned tenant in the database\n        task_logger.debug(f\"Storing pre-provisioned tenant in database: {tenant_id}\")\n        with get_session_with_shared_schema() as db_session:\n            # Use a transaction to ensure atomicity\n            db_session.begin()\n            try:\n                new_tenant = AvailableTenant(\n                    tenant_id=tenant_id,\n                    alembic_version=alembic_version,\n                    date_created=datetime.datetime.now(),\n                )\n                db_session.add(new_tenant)\n                db_session.commit()\n                task_logger.info(f\"Successfully pre-provisioned tenant: {tenant_id}\")\n                return True\n            except Exception:\n                db_session.rollback()\n                task_logger.error(\n                    f\"Failed to store pre-provisioned tenant: {tenant_id}\",\n                    exc_info=True,\n                )\n                raise\n\n    except Exception:\n        task_logger.error(\"Error in pre_provision_tenant task\", exc_info=True)\n        # If we have a tenant_id, attempt to rollback any partially completed provisioning\n        if tenant_id:\n            task_logger.info(\n                f\"Rolling back failed tenant provisioning for: {tenant_id}\"\n            )\n            try:\n                from ee.onyx.server.tenants.provisioning import (\n                    rollback_tenant_provisioning,\n                )\n\n                asyncio.run(rollback_tenant_provisioning(tenant_id))\n            except Exception:\n                task_logger.exception(f\"Error during rollback for tenant: {tenant_id}\")\n        return False\n    finally:\n        try:\n            lock_provision.release()\n        except Exception:\n            task_logger.warning(\n                \"Could not release provision lock (likely expired), continuing\"\n            )\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/ttl_management/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/ttl_management/tasks.py",
    "content": "from uuid import UUID\n\nfrom celery import shared_task\nfrom celery import Task\n\nfrom ee.onyx.background.celery_utils import should_perform_chat_ttl_check\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.chat import delete_chat_session\nfrom onyx.db.chat import get_chat_sessions_older_than\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.server.settings.store import load_settings\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@shared_task(\n    name=OnyxCeleryTask.PERFORM_TTL_MANAGEMENT_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n    trail=False,\n)\ndef perform_ttl_management_task(\n    self: Task, retention_limit_days: int, *, tenant_id: str  # noqa: ARG001\n) -> None:\n    task_id = self.request.id\n    if not task_id:\n        raise RuntimeError(\"No task id defined for this task; cannot identify it\")\n\n    user_id: UUID | None = None\n    session_id: UUID | None = None\n    try:\n        with get_session_with_current_tenant() as db_session:\n\n            old_chat_sessions = get_chat_sessions_older_than(\n                retention_limit_days, db_session\n            )\n\n        for user_id, session_id in old_chat_sessions:\n            # one session per delete so that we don't blow up if a deletion fails.\n            with get_session_with_current_tenant() as db_session:\n                delete_chat_session(\n                    user_id,\n                    session_id,\n                    db_session,\n                    include_deleted=True,\n                    hard_delete=True,\n                )\n\n    except Exception:\n        logger.exception(\n            f\"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}\"\n        )\n        raise\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n)\ndef check_ttl_management_task(*, tenant_id: str) -> None:\n    \"\"\"Runs periodically to check if any ttl tasks should be run and adds them\n    to the queue\"\"\"\n\n    settings = load_settings()\n    retention_limit_days = settings.maximum_chat_retention_days\n    with get_session_with_current_tenant() as db_session:\n        if should_perform_chat_ttl_check(retention_limit_days, db_session):\n            perform_ttl_management_task.apply_async(\n                kwargs=dict(\n                    retention_limit_days=retention_limit_days, tenant_id=tenant_id\n                ),\n            )\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/usage_reporting/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/usage_reporting/tasks.py",
    "content": "from datetime import datetime\nfrom uuid import UUID\n\nfrom celery import shared_task\nfrom celery import Task\n\nfrom ee.onyx.server.reporting.usage_export_generation import create_new_usage_report\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@shared_task(\n    name=OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n    trail=False,\n)\ndef generate_usage_report_task(\n    self: Task,  # noqa: ARG001\n    *,\n    tenant_id: str,  # noqa: ARG001\n    user_id: str | None = None,\n    period_from: str | None = None,\n    period_to: str | None = None,\n) -> None:\n    \"\"\"User-initiated usage report generation task\"\"\"\n    # Parse period if provided\n    period = None\n    if period_from and period_to:\n        period = (\n            datetime.fromisoformat(period_from),\n            datetime.fromisoformat(period_to),\n        )\n\n    # Generate the report\n    with get_session_with_current_tenant() as db_session:\n        create_new_usage_report(\n            db_session=db_session,\n            user_id=UUID(user_id) if user_id else None,\n            period=period,\n        )\n"
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/vespa/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/background/celery/tasks/vespa/tasks.py",
    "content": "from typing import cast\n\nfrom redis import Redis\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.user_group import delete_user_group\nfrom ee.onyx.db.user_group import fetch_user_group\nfrom ee.onyx.db.user_group import mark_user_group_as_synced\nfrom ee.onyx.db.user_group import prepare_user_group_for_deletion\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.redis.redis_usergroup import RedisUserGroup\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef monitor_usergroup_taskset(\n    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session\n) -> None:\n    \"\"\"This function is likely to move in the worker refactor happening next.\"\"\"\n    fence_key = key_bytes.decode(\"utf-8\")\n    usergroup_id_str = RedisUserGroup.get_id_from_fence_key(fence_key)\n    if not usergroup_id_str:\n        task_logger.warning(f\"Could not parse usergroup id from {fence_key}\")\n        return\n\n    try:\n        usergroup_id = int(usergroup_id_str)\n    except ValueError:\n        task_logger.exception(f\"usergroup_id ({usergroup_id_str}) is not an integer!\")\n        raise\n\n    rug = RedisUserGroup(tenant_id, usergroup_id)\n    if not rug.fenced:\n        return\n\n    initial_count = rug.payload\n    if initial_count is None:\n        return\n\n    count = cast(int, r.scard(rug.taskset_key))\n    task_logger.info(\n        f\"User group sync progress: usergroup_id={usergroup_id} remaining={count} initial={initial_count}\"\n    )\n    if count > 0:\n        update_sync_record_status(\n            db_session=db_session,\n            entity_id=usergroup_id,\n            sync_type=SyncType.USER_GROUP,\n            sync_status=SyncStatus.IN_PROGRESS,\n            num_docs_synced=count,\n        )\n        return\n\n    user_group = fetch_user_group(db_session=db_session, user_group_id=usergroup_id)\n    if user_group:\n        usergroup_name = user_group.name\n        try:\n            if user_group.is_up_for_deletion:\n                # this prepare should have been run when the deletion was scheduled,\n                # but run it again to be sure we're ready to go\n                mark_user_group_as_synced(db_session, user_group)\n                prepare_user_group_for_deletion(db_session, usergroup_id)\n                delete_user_group(db_session=db_session, user_group=user_group)\n\n                update_sync_record_status(\n                    db_session=db_session,\n                    entity_id=usergroup_id,\n                    sync_type=SyncType.USER_GROUP,\n                    sync_status=SyncStatus.SUCCESS,\n                    num_docs_synced=initial_count,\n                )\n\n                task_logger.info(\n                    f\"Deleted usergroup: name={usergroup_name} id={usergroup_id}\"\n                )\n            else:\n                mark_user_group_as_synced(db_session=db_session, user_group=user_group)\n\n                update_sync_record_status(\n                    db_session=db_session,\n                    entity_id=usergroup_id,\n                    sync_type=SyncType.USER_GROUP,\n                    sync_status=SyncStatus.SUCCESS,\n                    num_docs_synced=initial_count,\n                )\n\n                task_logger.info(\n                    f\"Synced usergroup. name={usergroup_name} id={usergroup_id}\"\n                )\n        except Exception as e:\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=usergroup_id,\n                sync_type=SyncType.USER_GROUP,\n                sync_status=SyncStatus.FAILED,\n                num_docs_synced=initial_count,\n            )\n            raise e\n\n    rug.reset()\n"
  },
  {
    "path": "backend/ee/onyx/background/celery_utils.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom ee.onyx.background.task_name_builders import name_chat_ttl_task\nfrom onyx.db.tasks import check_task_is_live_and_not_timed_out\nfrom onyx.db.tasks import get_latest_task\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef should_perform_chat_ttl_check(\n    retention_limit_days: float | None, db_session: Session\n) -> bool:\n    # TODO: make this a check for None and add behavior for 0 day TTL\n    if not retention_limit_days:\n        return False\n\n    task_name = name_chat_ttl_task(retention_limit_days)\n    latest_task = get_latest_task(task_name, db_session)\n    if not latest_task:\n        return True\n\n    if check_task_is_live_and_not_timed_out(latest_task, db_session):\n        logger.debug(f\"{task_name} is already being performed. Skipping.\")\n        return False\n    return True\n"
  },
  {
    "path": "backend/ee/onyx/background/task_name_builders.py",
    "content": "from datetime import datetime\n\nfrom onyx.configs.constants import OnyxCeleryTask\n\n\nQUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK\n\n\ndef name_chat_ttl_task(\n    retention_limit_days: float,\n    tenant_id: str | None = None,  # noqa: ARG001\n) -> str:\n    return f\"chat_ttl_{retention_limit_days}_days\"\n\n\ndef query_history_task_name(start: datetime, end: datetime) -> str:\n    return f\"{QUERY_HISTORY_TASK_NAME_PREFIX}_{start}_{end}\"\n"
  },
  {
    "path": "backend/ee/onyx/configs/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/configs/app_configs.py",
    "content": "import json\nimport os\n\n\n#####\n# Auto Permission Sync\n#####\n# should generally only be used for sources that support polling of permissions\n# e.g. can pull in only permission changes rather than having to go through all\n# documents every time\nDEFAULT_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY\") or 5 * 60\n)\n\n\n#####\n# Confluence\n#####\n\n# In seconds, default is 30 minutes\nCONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(\n    os.environ.get(\"CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY\") or 30 * 60\n)\n# In seconds, default is 30 minutes\nCONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY\") or 30 * 60\n)\n# This is a boolean that determines if anonymous access is public\n# Default behavior is to not make the page public and instead add a group\n# that contains all the users that we found in Confluence\nCONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (\n    os.environ.get(\"CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC\", \"\").lower() == \"true\"\n)\n\n\n#####\n# JIRA\n#####\n\n# In seconds, default is 30 minutes\nJIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"JIRA_PERMISSION_DOC_SYNC_FREQUENCY\") or 30 * 60\n)\n# In seconds, default is 30 minutes\nJIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(\n    os.environ.get(\"JIRA_PERMISSION_GROUP_SYNC_FREQUENCY\") or 30 * 60\n)\n\n\n#####\n# Google Drive\n#####\nGOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY = int(\n    os.environ.get(\"GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY\") or 5 * 60\n)\n\n\n#####\n# GitHub\n#####\n# In seconds, default is 5 minutes\nGITHUB_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"GITHUB_PERMISSION_DOC_SYNC_FREQUENCY\") or 5 * 60\n)\n# In seconds, default is 5 minutes\nGITHUB_PERMISSION_GROUP_SYNC_FREQUENCY = int(\n    os.environ.get(\"GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY\") or 5 * 60\n)\n\n\n#####\n# Slack\n#####\nSLACK_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"SLACK_PERMISSION_DOC_SYNC_FREQUENCY\") or 5 * 60\n)\n\nNUM_PERMISSION_WORKERS = int(os.environ.get(\"NUM_PERMISSION_WORKERS\") or 2)\n\n\n#####\n# Teams\n#####\n# In seconds, default is 5 minutes\nTEAMS_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"TEAMS_PERMISSION_DOC_SYNC_FREQUENCY\") or 5 * 60\n)\n\n#####\n# SharePoint\n#####\n# In seconds, default is 30 minutes\nSHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY = int(\n    os.environ.get(\"SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY\") or 30 * 60\n)\n\n# In seconds, default is 5 minutes\nSHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY = int(\n    os.environ.get(\"SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY\") or 5 * 60\n)\n\n\n####\n# Celery Job Frequency\n####\nCHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(\n    os.environ.get(\"CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS\") or 1\n)  # float for easier testing\n\n\nSTRIPE_SECRET_KEY = os.environ.get(\"STRIPE_SECRET_KEY\")\n\n# JWT Public Key URL\nJWT_PUBLIC_KEY_URL: str | None = os.getenv(\"JWT_PUBLIC_KEY_URL\", None)\n\n\n# Super Users\nSUPER_USERS = json.loads(os.environ.get(\"SUPER_USERS\", \"[]\"))\nSUPER_CLOUD_API_KEY = os.environ.get(\"SUPER_CLOUD_API_KEY\", \"api_key\")\n\nPOSTHOG_API_KEY = os.environ.get(\"POSTHOG_API_KEY\")\nPOSTHOG_HOST = os.environ.get(\"POSTHOG_HOST\") or \"https://us.i.posthog.com\"\nPOSTHOG_DEBUG_LOGS_ENABLED = (\n    os.environ.get(\"POSTHOG_DEBUG_LOGS_ENABLED\", \"\").lower() == \"true\"\n)\n\nMARKETING_POSTHOG_API_KEY = os.environ.get(\"MARKETING_POSTHOG_API_KEY\")\n\nHUBSPOT_TRACKING_URL = os.environ.get(\"HUBSPOT_TRACKING_URL\")\n\nGATED_TENANTS_KEY = \"gated_tenants\"\n\n# License enforcement - when True, blocks API access for gated/expired licenses\nLICENSE_ENFORCEMENT_ENABLED = (\n    os.environ.get(\"LICENSE_ENFORCEMENT_ENABLED\", \"true\").lower() == \"true\"\n)\n\n# Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints\n# Used when MULTI_TENANT=false (self-hosted mode)\nCLOUD_DATA_PLANE_URL = os.environ.get(\n    \"CLOUD_DATA_PLANE_URL\", \"https://cloud.onyx.app/api\"\n)\n"
  },
  {
    "path": "backend/ee/onyx/configs/license_enforcement_config.py",
    "content": "\"\"\"Constants for license enforcement.\n\nThis file is the single source of truth for:\n1. Paths that bypass license enforcement (always accessible)\n2. Paths that require an EE license (EE-only features)\n\nImport these constants in both production code and tests to ensure consistency.\n\"\"\"\n\n# Paths that are ALWAYS accessible, even when license is expired/gated.\n# These enable users to:\n#   /auth - Log in/out (users can't fix billing if locked out of auth)\n#   /license - Fetch, upload, or check license status\n#   /health - Health checks for load balancers/orchestrators\n#   /me - Basic user info needed for UI rendering\n#   /settings, /enterprise-settings - View app status and branding\n#   /billing - Unified billing API\n#   /proxy - Self-hosted proxy endpoints (have own license-based auth)\n#   /tenants/billing-* - Legacy billing endpoints (backwards compatibility)\n#   /manage/users, /users - User management (needed for seat limit resolution)\n#   /notifications - Needed for UI to load properly\nLICENSE_ENFORCEMENT_ALLOWED_PREFIXES: frozenset[str] = frozenset(\n    {\n        \"/auth\",\n        \"/license\",\n        \"/health\",\n        \"/me\",\n        \"/settings\",\n        \"/enterprise-settings\",\n        # Billing endpoints (unified API for both MT and self-hosted)\n        \"/billing\",\n        \"/admin/billing\",\n        # Proxy endpoints for self-hosted billing (no tenant context)\n        \"/proxy\",\n        # Legacy tenant billing endpoints (kept for backwards compatibility)\n        \"/tenants/billing-information\",\n        \"/tenants/create-customer-portal-session\",\n        \"/tenants/create-subscription-session\",\n        # User management - needed to remove users when seat limit exceeded\n        \"/manage/users\",\n        \"/manage/admin/users\",\n        \"/manage/admin/valid-domains\",\n        \"/manage/admin/deactivate-user\",\n        \"/manage/admin/delete-user\",\n        \"/users\",\n        # Notifications - needed for UI to load properly\n        \"/notifications\",\n    }\n)\n\n# EE-only paths that require a valid license.\n# Users without a license (community edition) cannot access these.\n# These are blocked even when user has never subscribed (no license).\nEE_ONLY_PATH_PREFIXES: frozenset[str] = frozenset(\n    {\n        # User groups and access control\n        \"/manage/admin/user-group\",\n        # Analytics and reporting\n        \"/analytics\",\n        # Query history (admin chat session endpoints)\n        \"/admin/chat-sessions\",\n        \"/admin/chat-session-history\",\n        \"/admin/query-history\",\n        # Usage reporting/export\n        \"/admin/usage-report\",\n        # Standard answers (canned responses)\n        \"/manage/admin/standard-answer\",\n        # Token rate limits\n        \"/admin/token-rate-limits\",\n        # Evals\n        \"/evals\",\n        # Hook extensions\n        \"/admin/hooks\",\n    }\n)\n"
  },
  {
    "path": "backend/ee/onyx/connectors/perm_sync_valid.py",
    "content": "from onyx.connectors.confluence.connector import ConfluenceConnector\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.interfaces import BaseConnector\n\n\ndef validate_confluence_perm_sync(connector: ConfluenceConnector) -> None:\n    \"\"\"\n    Validate that the connector is configured correctly for permissions syncing.\n    \"\"\"\n\n\ndef validate_drive_perm_sync(connector: GoogleDriveConnector) -> None:\n    \"\"\"\n    Validate that the connector is configured correctly for permissions syncing.\n    \"\"\"\n\n\ndef validate_perm_sync(connector: BaseConnector) -> None:\n    \"\"\"\n    Override this if your connector needs to validate permissions syncing.\n    Raise an exception if invalid, otherwise do nothing.\n\n    Default is a no-op (always successful).\n    \"\"\"\n    if isinstance(connector, ConfluenceConnector):\n        validate_confluence_perm_sync(connector)\n    elif isinstance(connector, GoogleDriveConnector):\n        validate_drive_perm_sync(connector)\n"
  },
  {
    "path": "backend/ee/onyx/db/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/db/analytics.py",
    "content": "import datetime\nfrom collections.abc import Sequence\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import case\nfrom sqlalchemy import cast\nfrom sqlalchemy import Date\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatMessageFeedback\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\n\n\ndef fetch_query_analytics(\n    start: datetime.datetime,\n    end: datetime.datetime,\n    db_session: Session,\n) -> Sequence[tuple[int, int, int, datetime.date]]:\n    stmt = (\n        select(\n            func.count(ChatMessage.id),\n            func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),\n            func.sum(\n                case(\n                    (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712\n                    else_=0,  # noqa: E712\n                )\n            ),\n            cast(ChatMessage.time_sent, Date),\n        )\n        .join(\n            ChatMessageFeedback,\n            ChatMessageFeedback.chat_message_id == ChatMessage.id,\n            isouter=True,\n        )\n        .where(\n            ChatMessage.time_sent >= start,\n        )\n        .where(\n            ChatMessage.time_sent <= end,\n        )\n        .where(ChatMessage.message_type == MessageType.ASSISTANT)\n        .group_by(cast(ChatMessage.time_sent, Date))\n        .order_by(cast(ChatMessage.time_sent, Date))\n    )\n\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef fetch_per_user_query_analytics(\n    start: datetime.datetime,\n    end: datetime.datetime,\n    db_session: Session,\n) -> Sequence[tuple[int, int, int, datetime.date, UUID]]:\n    stmt = (\n        select(\n            func.count(ChatMessage.id),\n            func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),\n            func.sum(\n                case(\n                    (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712\n                    else_=0,  # noqa: E712\n                )\n            ),\n            cast(ChatMessage.time_sent, Date),\n            ChatSession.user_id,\n        )\n        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)\n        # Include chats that have no explicit feedback instead of dropping them\n        .join(\n            ChatMessageFeedback,\n            ChatMessageFeedback.chat_message_id == ChatMessage.id,\n            isouter=True,\n        )\n        .where(\n            ChatMessage.time_sent >= start,\n        )\n        .where(\n            ChatMessage.time_sent <= end,\n        )\n        .where(ChatMessage.message_type == MessageType.ASSISTANT)\n        .group_by(cast(ChatMessage.time_sent, Date), ChatSession.user_id)\n        .order_by(cast(ChatMessage.time_sent, Date), ChatSession.user_id)\n    )\n\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef fetch_onyxbot_analytics(\n    start: datetime.datetime,\n    end: datetime.datetime,\n    db_session: Session,\n) -> Sequence[tuple[int, int, datetime.date]]:\n    \"\"\"Gets the:\n    Date of each set of aggregated statistics\n    Number of OnyxBot Queries (Chat Sessions)\n    Number of instances of Negative feedback OR Needing additional help\n        (only counting the last feedback)\n    \"\"\"\n    # Get every chat session in the time range which is a Onyxbot flow\n    # along with the first Assistant message which is the response to the user question.\n    # Generally there should not be more than one AI message per chat session of this type\n    subquery_first_ai_response = (\n        db_session.query(\n            ChatMessage.chat_session_id.label(\"chat_session_id\"),\n            func.min(ChatMessage.id).label(\"chat_message_id\"),\n        )\n        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)\n        .where(\n            ChatSession.time_created >= start,\n            ChatSession.time_created <= end,\n            ChatSession.onyxbot_flow.is_(True),\n        )\n        .where(\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n        .group_by(ChatMessage.chat_session_id)\n        .subquery()\n    )\n\n    # Get the chat message ids and most recent feedback for each of those chat messages,\n    # not including the messages that have no feedback\n    subquery_last_feedback = (\n        db_session.query(\n            ChatMessageFeedback.chat_message_id.label(\"chat_message_id\"),\n            func.max(ChatMessageFeedback.id).label(\"max_feedback_id\"),\n        )\n        .group_by(ChatMessageFeedback.chat_message_id)\n        .subquery()\n    )\n\n    results = (\n        db_session.query(\n            func.count(ChatSession.id).label(\"total_sessions\"),\n            # Need to explicitly specify this as False to handle the NULL case so the cases without\n            # feedback aren't counted against Onyxbot\n            func.sum(\n                case(\n                    (\n                        or_(\n                            ChatMessageFeedback.is_positive.is_(False),\n                            ChatMessageFeedback.required_followup.is_(True),\n                        ),\n                        1,\n                    ),\n                    else_=0,\n                )\n            ).label(\"negative_answer\"),\n            cast(ChatSession.time_created, Date).label(\"session_date\"),\n        )\n        .join(\n            subquery_first_ai_response,\n            ChatSession.id == subquery_first_ai_response.c.chat_session_id,\n        )\n        # Combine the chat sessions with latest feedback to get the latest feedback for the first AI\n        # message of the chat session where the chat session is Onyxbot type and within the time\n        # range specified. Left/outer join used here to ensure that if no feedback, a null is used\n        # for the feedback id\n        .outerjoin(\n            subquery_last_feedback,\n            subquery_first_ai_response.c.chat_message_id\n            == subquery_last_feedback.c.chat_message_id,\n        )\n        # Join the actual feedback table to get the feedback info for the sums\n        # Outer join because the \"last feedback\" may be null\n        .outerjoin(\n            ChatMessageFeedback,\n            ChatMessageFeedback.id == subquery_last_feedback.c.max_feedback_id,\n        )\n        .group_by(cast(ChatSession.time_created, Date))\n        .order_by(cast(ChatSession.time_created, Date))\n        .all()\n    )\n\n    return [tuple(row) for row in results]\n\n\ndef fetch_persona_message_analytics(\n    db_session: Session,\n    persona_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n) -> list[tuple[int, datetime.date]]:\n    \"\"\"Gets the daily message counts for a specific persona within the given time range.\"\"\"\n    query = (\n        select(\n            func.count(ChatMessage.id),\n            cast(ChatMessage.time_sent, Date),\n        )\n        .join(\n            ChatSession,\n            ChatMessage.chat_session_id == ChatSession.id,\n        )\n        .where(\n            ChatSession.persona_id == persona_id,\n            ChatMessage.time_sent >= start,\n            ChatMessage.time_sent <= end,\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n        .group_by(cast(ChatMessage.time_sent, Date))\n        .order_by(cast(ChatMessage.time_sent, Date))\n    )\n\n    return [tuple(row) for row in db_session.execute(query).all()]\n\n\ndef fetch_persona_unique_users(\n    db_session: Session,\n    persona_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n) -> list[tuple[int, datetime.date]]:\n    \"\"\"Gets the daily unique user counts for a specific persona within the given time range.\"\"\"\n    query = (\n        select(\n            func.count(func.distinct(ChatSession.user_id)),\n            cast(ChatMessage.time_sent, Date),\n        )\n        .join(\n            ChatSession,\n            ChatMessage.chat_session_id == ChatSession.id,\n        )\n        .where(\n            ChatSession.persona_id == persona_id,\n            ChatMessage.time_sent >= start,\n            ChatMessage.time_sent <= end,\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n        .group_by(cast(ChatMessage.time_sent, Date))\n        .order_by(cast(ChatMessage.time_sent, Date))\n    )\n\n    return [tuple(row) for row in db_session.execute(query).all()]\n\n\ndef fetch_assistant_message_analytics(\n    db_session: Session,\n    assistant_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n) -> list[tuple[int, datetime.date]]:\n    \"\"\"\n    Gets the daily message counts for a specific assistant in the given time range.\n    \"\"\"\n    query = (\n        select(\n            func.count(ChatMessage.id),\n            cast(ChatMessage.time_sent, Date),\n        )\n        .join(\n            ChatSession,\n            ChatMessage.chat_session_id == ChatSession.id,\n        )\n        .where(\n            ChatSession.persona_id == assistant_id,\n            ChatMessage.time_sent >= start,\n            ChatMessage.time_sent <= end,\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n        .group_by(cast(ChatMessage.time_sent, Date))\n        .order_by(cast(ChatMessage.time_sent, Date))\n    )\n\n    return [tuple(row) for row in db_session.execute(query).all()]\n\n\ndef fetch_assistant_unique_users(\n    db_session: Session,\n    assistant_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n) -> list[tuple[int, datetime.date]]:\n    \"\"\"\n    Gets the daily unique user counts for a specific assistant in the given time range.\n    \"\"\"\n    query = (\n        select(\n            func.count(func.distinct(ChatSession.user_id)),\n            cast(ChatMessage.time_sent, Date),\n        )\n        .join(\n            ChatSession,\n            ChatMessage.chat_session_id == ChatSession.id,\n        )\n        .where(\n            ChatSession.persona_id == assistant_id,\n            ChatMessage.time_sent >= start,\n            ChatMessage.time_sent <= end,\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n        .group_by(cast(ChatMessage.time_sent, Date))\n        .order_by(cast(ChatMessage.time_sent, Date))\n    )\n\n    return [tuple(row) for row in db_session.execute(query).all()]\n\n\ndef fetch_assistant_unique_users_total(\n    db_session: Session,\n    assistant_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n) -> int:\n    \"\"\"\n    Gets the total number of distinct users who have sent or received messages from\n    the specified assistant in the given time range.\n    \"\"\"\n    query = (\n        select(func.count(func.distinct(ChatSession.user_id)))\n        .select_from(ChatMessage)\n        .join(\n            ChatSession,\n            ChatMessage.chat_session_id == ChatSession.id,\n        )\n        .where(\n            ChatSession.persona_id == assistant_id,\n            ChatMessage.time_sent >= start,\n            ChatMessage.time_sent <= end,\n            ChatMessage.message_type == MessageType.ASSISTANT,\n        )\n    )\n\n    result = db_session.execute(query).scalar()\n    return result if result else 0\n\n\n# Users can view assistant stats if they created the persona,\n# or if they are an admin\ndef user_can_view_assistant_stats(\n    db_session: Session, user: User, assistant_id: int\n) -> bool:\n    if user.role == UserRole.ADMIN:\n        return True\n\n    # Check if the user created the persona\n    stmt = select(Persona).where(\n        and_(Persona.id == assistant_id, Persona.user_id == user.id)\n    )\n\n    persona = db_session.execute(stmt).scalar_one_or_none()\n    return persona is not None\n"
  },
  {
    "path": "backend/ee/onyx/db/connector.py",
    "content": "from sqlalchemy import distinct\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import Connector\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef fetch_sources_with_connectors(db_session: Session) -> list[DocumentSource]:\n    sources = db_session.query(distinct(Connector.source)).all()  # type: ignore\n\n    document_sources = [source[0] for source in sources]\n\n    return document_sources\n"
  },
  {
    "path": "backend/ee/onyx/db/connector_credential_pair.py",
    "content": "from sqlalchemy import delete\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import UserGroup__ConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _delete_connector_credential_pair_user_groups_relationship__no_commit(\n    db_session: Session, connector_id: int, credential_id: int\n) -> None:\n    cc_pair = get_connector_credential_pair(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n    )\n    if cc_pair is None:\n        raise ValueError(\n            f\"ConnectorCredentialPair with connector_id: {connector_id} and credential_id: {credential_id} not found\"\n        )\n\n    stmt = delete(UserGroup__ConnectorCredentialPair).where(\n        UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair.id,\n    )\n    db_session.execute(stmt)\n\n\ndef get_cc_pairs_by_source(\n    db_session: Session,\n    source_type: DocumentSource,\n    access_type: AccessType | None = None,\n    status: ConnectorCredentialPairStatus | None = None,\n) -> list[ConnectorCredentialPair]:\n    \"\"\"\n    Get all cc_pairs for a given source type with optional filtering by access_type and status\n    result is sorted by cc_pair id\n    \"\"\"\n    query = (\n        db_session.query(ConnectorCredentialPair)\n        .join(ConnectorCredentialPair.connector)\n        .filter(Connector.source == source_type)\n        .order_by(ConnectorCredentialPair.id)\n    )\n\n    if access_type is not None:\n        query = query.filter(ConnectorCredentialPair.access_type == access_type)\n\n    if status is not None:\n        query = query.filter(ConnectorCredentialPair.status == status)\n\n    cc_pairs = query.all()\n    return cc_pairs\n\n\ndef get_all_auto_sync_cc_pairs(\n    db_session: Session,\n) -> list[ConnectorCredentialPair]:\n    return (\n        db_session.query(ConnectorCredentialPair)\n        .where(\n            ConnectorCredentialPair.access_type == AccessType.SYNC,\n        )\n        .all()\n    )\n"
  },
  {
    "path": "backend/ee/onyx/db/document.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import Document as DbDocument\n\n\ndef upsert_document_external_perms__no_commit(\n    db_session: Session,\n    doc_id: str,\n    external_access: ExternalAccess,\n    source_type: DocumentSource,\n) -> None:\n    \"\"\"\n    This sets the permissions for a document in postgres.\n    NOTE: this will replace any existing external access, it will not do a union\n    \"\"\"\n    document = db_session.scalars(\n        select(DbDocument).where(DbDocument.id == doc_id)\n    ).first()\n\n    prefixed_external_groups = [\n        build_ext_group_name_for_onyx(\n            ext_group_name=group_id,\n            source=source_type,\n        )\n        for group_id in external_access.external_user_group_ids\n    ]\n\n    if not document:\n        # If the document does not exist, still store the external access\n        # So that if the document is added later, the external access is already stored\n        document = DbDocument(\n            id=doc_id,\n            semantic_id=\"\",\n            external_user_emails=external_access.external_user_emails,\n            external_user_group_ids=prefixed_external_groups,\n            is_public=external_access.is_public,\n        )\n        db_session.add(document)\n        return\n\n    document.external_user_emails = list(external_access.external_user_emails)\n    document.external_user_group_ids = prefixed_external_groups\n    document.is_public = external_access.is_public\n\n\ndef upsert_document_external_perms(\n    db_session: Session,\n    doc_id: str,\n    external_access: ExternalAccess,\n    source_type: DocumentSource,\n) -> bool:\n    \"\"\"\n    This sets the permissions for a document in postgres. Returns True if the\n    a new document was created, False otherwise.\n    NOTE: this will replace any existing external access, it will not do a union\n    \"\"\"\n    document = db_session.scalars(\n        select(DbDocument).where(DbDocument.id == doc_id)\n    ).first()\n\n    prefixed_external_groups: set[str] = {\n        build_ext_group_name_for_onyx(\n            ext_group_name=group_id,\n            source=source_type,\n        )\n        for group_id in external_access.external_user_group_ids\n    }\n\n    if not document:\n        # If the document does not exist, still store the external access\n        # So that if the document is added later, the external access is already stored\n        # The upsert function in the indexing pipeline does not overwrite the permissions fields\n        document = DbDocument(\n            id=doc_id,\n            semantic_id=\"\",\n            external_user_emails=external_access.external_user_emails,\n            external_user_group_ids=prefixed_external_groups,\n            is_public=external_access.is_public,\n        )\n        db_session.add(document)\n        db_session.commit()\n        return True\n\n    # If the document exists, we need to check if the external access has changed\n    if (\n        external_access.external_user_emails != set(document.external_user_emails or [])\n        or prefixed_external_groups != set(document.external_user_group_ids or [])\n        or external_access.is_public != document.is_public\n    ):\n        document.external_user_emails = list(external_access.external_user_emails)\n        document.external_user_group_ids = list(prefixed_external_groups)\n        document.is_public = external_access.is_public\n        document.last_modified = datetime.now(timezone.utc)\n        db_session.commit()\n\n    return False\n"
  },
  {
    "path": "backend/ee/onyx/db/document_set.py",
    "content": "from uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import DocumentSet__ConnectorCredentialPair\nfrom onyx.db.models import DocumentSet__User\nfrom onyx.db.models import DocumentSet__UserGroup\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\n\n\ndef make_doc_set_private(\n    document_set_id: int,\n    user_ids: list[UUID] | None,\n    group_ids: list[int] | None,\n    db_session: Session,\n) -> None:\n    db_session.query(DocumentSet__User).filter(\n        DocumentSet__User.document_set_id == document_set_id\n    ).delete(synchronize_session=\"fetch\")\n    db_session.query(DocumentSet__UserGroup).filter(\n        DocumentSet__UserGroup.document_set_id == document_set_id\n    ).delete(synchronize_session=\"fetch\")\n\n    if user_ids:\n        for user_uuid in user_ids:\n            db_session.add(\n                DocumentSet__User(document_set_id=document_set_id, user_id=user_uuid)\n            )\n\n    if group_ids:\n        for group_id in group_ids:\n            db_session.add(\n                DocumentSet__UserGroup(\n                    document_set_id=document_set_id, user_group_id=group_id\n                )\n            )\n\n\ndef delete_document_set_privacy__no_commit(\n    document_set_id: int, db_session: Session\n) -> None:\n    db_session.query(DocumentSet__User).filter(\n        DocumentSet__User.document_set_id == document_set_id\n    ).delete(synchronize_session=\"fetch\")\n\n    db_session.query(DocumentSet__UserGroup).filter(\n        DocumentSet__UserGroup.document_set_id == document_set_id\n    ).delete(synchronize_session=\"fetch\")\n\n\ndef fetch_document_sets(\n    user_id: UUID | None,\n    db_session: Session,\n    include_outdated: bool = True,  # Parameter only for versioned implementation, unused  # noqa: ARG001\n) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:\n    assert user_id is not None\n\n    # Public document sets\n    public_document_sets = (\n        db_session.query(DocumentSet)\n        .filter(DocumentSet.is_public == True)  # noqa\n        .all()\n    )\n\n    # Document sets via shared user relationships\n    shared_document_sets = (\n        db_session.query(DocumentSet)\n        .join(DocumentSet__User, DocumentSet.id == DocumentSet__User.document_set_id)\n        .filter(DocumentSet__User.user_id == user_id)\n        .all()\n    )\n\n    # Document sets via groups\n    # First, find the user groups the user belongs to\n    user_groups = (\n        db_session.query(UserGroup)\n        .join(User__UserGroup, UserGroup.id == User__UserGroup.user_group_id)\n        .filter(User__UserGroup.user_id == user_id)\n        .all()\n    )\n\n    group_document_sets = []\n    for group in user_groups:\n        group_document_sets.extend(\n            db_session.query(DocumentSet)\n            .join(\n                DocumentSet__UserGroup,\n                DocumentSet.id == DocumentSet__UserGroup.document_set_id,\n            )\n            .filter(DocumentSet__UserGroup.user_group_id == group.id)\n            .all()\n        )\n\n    # Combine and deduplicate document sets from all sources\n    all_document_sets = list(\n        set(public_document_sets + shared_document_sets + group_document_sets)\n    )\n\n    document_set_with_cc_pairs: list[\n        tuple[DocumentSet, list[ConnectorCredentialPair]]\n    ] = []\n\n    for document_set in all_document_sets:\n        # Fetch the associated ConnectorCredentialPairs\n        cc_pairs = (\n            db_session.query(ConnectorCredentialPair)\n            .join(\n                DocumentSet__ConnectorCredentialPair,\n                ConnectorCredentialPair.id\n                == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,\n            )\n            .filter(\n                DocumentSet__ConnectorCredentialPair.document_set_id == document_set.id,\n            )\n            .all()\n        )\n\n        document_set_with_cc_pairs.append((document_set, cc_pairs))\n\n    return document_set_with_cc_pairs\n"
  },
  {
    "path": "backend/ee/onyx/db/external_perm.py",
    "content": "from collections.abc import Sequence\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import PublicExternalUserGroup\nfrom onyx.db.models import User\nfrom onyx.db.models import User__ExternalUserGroupId\nfrom onyx.db.users import batch_add_ext_perm_user_if_not_exists\nfrom onyx.db.users import get_user_by_email\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass ExternalUserGroup(BaseModel):\n    id: str\n    user_emails: list[str]\n    # `True` for cases like a Folder in Google Drive that give domain-wide\n    # or \"Anyone with link\" access to all files in the folder.\n    # if this is set, `user_emails` don't really matter.\n    # When this is `True`, this `ExternalUserGroup` object doesn't really represent\n    # an actual \"group\" in the source.\n    gives_anyone_access: bool = False\n\n\ndef delete_user__ext_group_for_user__no_commit(\n    db_session: Session,\n    user_id: UUID,\n) -> None:\n    db_session.execute(\n        delete(User__ExternalUserGroupId).where(\n            User__ExternalUserGroupId.user_id == user_id\n        )\n    )\n\n\ndef delete_user__ext_group_for_cc_pair__no_commit(\n    db_session: Session,\n    cc_pair_id: int,\n) -> None:\n    db_session.execute(\n        delete(User__ExternalUserGroupId).where(\n            User__ExternalUserGroupId.cc_pair_id == cc_pair_id\n        )\n    )\n\n\ndef delete_public_external_group_for_cc_pair__no_commit(\n    db_session: Session,\n    cc_pair_id: int,\n) -> None:\n    db_session.execute(\n        delete(PublicExternalUserGroup).where(\n            PublicExternalUserGroup.cc_pair_id == cc_pair_id\n        )\n    )\n\n\ndef mark_old_external_groups_as_stale(\n    db_session: Session,\n    cc_pair_id: int,\n) -> None:\n    db_session.execute(\n        update(User__ExternalUserGroupId)\n        .where(User__ExternalUserGroupId.cc_pair_id == cc_pair_id)\n        .values(stale=True)\n    )\n    db_session.execute(\n        update(PublicExternalUserGroup)\n        .where(PublicExternalUserGroup.cc_pair_id == cc_pair_id)\n        .values(stale=True)\n    )\n\n\ndef upsert_external_groups(\n    db_session: Session,\n    cc_pair_id: int,\n    external_groups: list[ExternalUserGroup],\n    source: DocumentSource,\n) -> None:\n    \"\"\"\n    Performs a true upsert operation for external user groups:\n    - For existing groups (same user_id, external_user_group_id, cc_pair_id), updates the stale flag to False\n    - For new groups, inserts them with stale=False\n    - For public groups, uses upsert logic as well\n    \"\"\"\n    # If there are no groups to add, return early\n    if not external_groups:\n        return\n\n    # collect all emails from all groups to batch add all users at once for efficiency\n    all_group_member_emails = set()\n    for external_group in external_groups:\n        for user_email in external_group.user_emails:\n            all_group_member_emails.add(user_email)\n\n    # batch add users if they don't exist and get their ids\n    all_group_members: list[User] = batch_add_ext_perm_user_if_not_exists(\n        db_session=db_session,\n        # NOTE: this function handles case sensitivity for emails\n        emails=list(all_group_member_emails),\n    )\n\n    # map emails to ids\n    email_id_map = {user.email.lower(): user.id for user in all_group_members}\n\n    # Process each external group\n    for external_group in external_groups:\n        external_group_id = build_ext_group_name_for_onyx(\n            ext_group_name=external_group.id,\n            source=source,\n        )\n\n        # Handle user-group mappings\n        for user_email in external_group.user_emails:\n            user_id = email_id_map.get(user_email.lower())\n            if user_id is None:\n                logger.warning(\n                    f\"User in group {external_group.id} with email {user_email} not found\"\n                )\n                continue\n\n            # Check if the user-group mapping already exists\n            existing_user_group = db_session.scalar(\n                select(User__ExternalUserGroupId).where(\n                    User__ExternalUserGroupId.user_id == user_id,\n                    User__ExternalUserGroupId.external_user_group_id\n                    == external_group_id,\n                    User__ExternalUserGroupId.cc_pair_id == cc_pair_id,\n                )\n            )\n\n            if existing_user_group:\n                # Update existing record\n                existing_user_group.stale = False\n            else:\n                # Insert new record\n                new_user_group = User__ExternalUserGroupId(\n                    user_id=user_id,\n                    external_user_group_id=external_group_id,\n                    cc_pair_id=cc_pair_id,\n                    stale=False,\n                )\n                db_session.add(new_user_group)\n\n        # Handle public group if needed\n        if external_group.gives_anyone_access:\n            # Check if the public group already exists\n            existing_public_group = db_session.scalar(\n                select(PublicExternalUserGroup).where(\n                    PublicExternalUserGroup.external_user_group_id == external_group_id,\n                    PublicExternalUserGroup.cc_pair_id == cc_pair_id,\n                )\n            )\n\n            if existing_public_group:\n                # Update existing record\n                existing_public_group.stale = False\n            else:\n                # Insert new record\n                new_public_group = PublicExternalUserGroup(\n                    external_user_group_id=external_group_id,\n                    cc_pair_id=cc_pair_id,\n                    stale=False,\n                )\n                db_session.add(new_public_group)\n\n    db_session.commit()\n\n\ndef remove_stale_external_groups(\n    db_session: Session,\n    cc_pair_id: int,\n) -> None:\n    db_session.execute(\n        delete(User__ExternalUserGroupId).where(\n            User__ExternalUserGroupId.cc_pair_id == cc_pair_id,\n            User__ExternalUserGroupId.stale.is_(True),\n        )\n    )\n    db_session.execute(\n        delete(PublicExternalUserGroup).where(\n            PublicExternalUserGroup.cc_pair_id == cc_pair_id,\n            PublicExternalUserGroup.stale.is_(True),\n        )\n    )\n    db_session.commit()\n\n\ndef fetch_external_groups_for_user(\n    db_session: Session,\n    user_id: UUID,\n) -> Sequence[User__ExternalUserGroupId]:\n    return db_session.scalars(\n        select(User__ExternalUserGroupId).where(\n            User__ExternalUserGroupId.user_id == user_id\n        )\n    ).all()\n\n\ndef fetch_external_groups_for_user_email_and_group_ids(\n    db_session: Session,\n    user_email: str,\n    group_ids: list[str],\n) -> list[User__ExternalUserGroupId]:\n    user = get_user_by_email(db_session=db_session, email=user_email)\n    if user is None:\n        return []\n    user_id = user.id\n    user_ext_groups = db_session.scalars(\n        select(User__ExternalUserGroupId).where(\n            User__ExternalUserGroupId.user_id == user_id,\n            User__ExternalUserGroupId.external_user_group_id.in_(group_ids),\n        )\n    ).all()\n    return list(user_ext_groups)\n\n\ndef fetch_public_external_group_ids(\n    db_session: Session,\n) -> list[str]:\n    return list(\n        db_session.scalars(select(PublicExternalUserGroup.external_user_group_id)).all()\n    )\n"
  },
  {
    "path": "backend/ee/onyx/db/hierarchy.py",
    "content": "\"\"\"EE version of hierarchy node access control.\n\nThis module provides permission-aware hierarchy node access for Enterprise Edition.\nIt filters hierarchy nodes based on user email and external group membership.\n\"\"\"\n\nfrom sqlalchemy import any_\nfrom sqlalchemy import cast\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy import String\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql.elements import ColumnElement\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import HierarchyNode\n\n\ndef _build_hierarchy_access_filter(\n    user_email: str,\n    external_group_ids: list[str],\n) -> ColumnElement[bool]:\n    \"\"\"Build SQLAlchemy filter for hierarchy node access.\n\n    A user can access a hierarchy node if any of the following are true:\n    - The node is marked as public (is_public=True)\n    - The user's email is in the node's external_user_emails list\n    - Any of the user's external group IDs overlap with the node's external_user_group_ids\n    \"\"\"\n    access_filters: list[ColumnElement[bool]] = [HierarchyNode.is_public.is_(True)]\n    if user_email:\n        access_filters.append(any_(HierarchyNode.external_user_emails) == user_email)\n    if external_group_ids:\n        access_filters.append(\n            HierarchyNode.external_user_group_ids.overlap(\n                cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))\n            )\n        )\n    return or_(*access_filters)\n\n\ndef _get_accessible_hierarchy_nodes_for_source(\n    db_session: Session,\n    source: DocumentSource,\n    user_email: str,\n    external_group_ids: list[str],\n) -> list[HierarchyNode]:\n    \"\"\"\n    EE version: Returns hierarchy nodes filtered by user permissions.\n\n    A user can access a hierarchy node if any of the following are true:\n    - The node is marked as public (is_public=True)\n    - The user's email is in the node's external_user_emails list\n    - Any of the user's external group IDs overlap with the node's external_user_group_ids\n\n    Args:\n        db_session: SQLAlchemy session\n        source: Document source type\n        user_email: User's email for permission checking\n        external_group_ids: User's external group IDs for permission checking\n\n    Returns:\n        List of HierarchyNode objects the user has access to\n    \"\"\"\n    stmt = select(HierarchyNode).where(HierarchyNode.source == source)\n    stmt = stmt.where(_build_hierarchy_access_filter(user_email, external_group_ids))\n    stmt = stmt.order_by(HierarchyNode.display_name)\n    return list(db_session.execute(stmt).scalars().all())\n"
  },
  {
    "path": "backend/ee/onyx/db/license.py",
    "content": "\"\"\"Database and cache operations for the license table.\"\"\"\n\nfrom datetime import datetime\nfrom typing import NamedTuple\n\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.license.models import LicenseMetadata\nfrom ee.onyx.server.license.models import LicensePayload\nfrom ee.onyx.server.license.models import LicenseSource\nfrom onyx.auth.schemas import UserRole\nfrom onyx.cache.factory import get_cache_backend\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.db.models import License\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nLICENSE_METADATA_KEY = \"license:metadata\"\nLICENSE_CACHE_TTL_SECONDS = 86400  # 24 hours\n\n\nclass SeatAvailabilityResult(NamedTuple):\n    \"\"\"Result of a seat availability check.\"\"\"\n\n    available: bool\n    error_message: str | None = None\n\n\n# -----------------------------------------------------------------------------\n# Database CRUD Operations\n# -----------------------------------------------------------------------------\n\n\ndef get_license(db_session: Session) -> License | None:\n    \"\"\"\n    Get the current license (singleton pattern - only one row).\n\n    Args:\n        db_session: Database session\n\n    Returns:\n        License object if exists, None otherwise\n    \"\"\"\n    return db_session.execute(select(License)).scalars().first()\n\n\ndef upsert_license(db_session: Session, license_data: str) -> License:\n    \"\"\"\n    Insert or update the license (singleton pattern).\n\n    Args:\n        db_session: Database session\n        license_data: Base64-encoded signed license blob\n\n    Returns:\n        The created or updated License object\n    \"\"\"\n    existing = get_license(db_session)\n\n    if existing:\n        existing.license_data = license_data\n        db_session.commit()\n        db_session.refresh(existing)\n        logger.info(\"License updated\")\n        return existing\n\n    new_license = License(license_data=license_data)\n    db_session.add(new_license)\n    db_session.commit()\n    db_session.refresh(new_license)\n    logger.info(\"License created\")\n    return new_license\n\n\ndef delete_license(db_session: Session) -> bool:\n    \"\"\"\n    Delete the current license.\n\n    Args:\n        db_session: Database session\n\n    Returns:\n        True if deleted, False if no license existed\n    \"\"\"\n    existing = get_license(db_session)\n    if existing:\n        db_session.delete(existing)\n        db_session.commit()\n        logger.info(\"License deleted\")\n        return True\n    return False\n\n\n# -----------------------------------------------------------------------------\n# Seat Counting\n# -----------------------------------------------------------------------------\n\n\ndef get_used_seats(tenant_id: str | None = None) -> int:\n    \"\"\"\n    Get current seat usage directly from database.\n\n    For multi-tenant: counts users in UserTenantMapping for this tenant.\n    For self-hosted: counts all active users (excludes EXT_PERM_USER role\n    and the anonymous system user).\n\n    TODO: Exclude API key dummy users from seat counting. API keys create\n    users with emails like `__DANSWER_API_KEY_*` that should not count toward\n    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518\n    \"\"\"\n    if MULTI_TENANT:\n        from ee.onyx.server.tenants.user_mapping import get_tenant_count\n\n        return get_tenant_count(tenant_id or get_current_tenant_id())\n    else:\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n        with get_session_with_current_tenant() as db_session:\n            result = db_session.execute(\n                select(func.count())\n                .select_from(User)\n                .where(\n                    User.is_active == True,  # type: ignore  # noqa: E712\n                    User.role != UserRole.EXT_PERM_USER,\n                    User.email != ANONYMOUS_USER_EMAIL,  # type: ignore\n                )\n            )\n            return result.scalar() or 0\n\n\n# -----------------------------------------------------------------------------\n# Redis Cache Operations\n# -----------------------------------------------------------------------------\n\n\ndef get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:\n    \"\"\"\n    Get license metadata from cache.\n\n    Args:\n        tenant_id: Tenant ID (for multi-tenant deployments)\n\n    Returns:\n        LicenseMetadata if cached, None otherwise\n    \"\"\"\n    cache = get_cache_backend(tenant_id=tenant_id)\n    cached = cache.get(LICENSE_METADATA_KEY)\n    if not cached:\n        return None\n\n    try:\n        cached_str = (\n            cached.decode(\"utf-8\") if isinstance(cached, bytes) else str(cached)\n        )\n        return LicenseMetadata.model_validate_json(cached_str)\n    except Exception as e:\n        logger.warning(f\"Failed to parse cached license metadata: {e}\")\n        return None\n\n\ndef invalidate_license_cache(tenant_id: str | None = None) -> None:\n    \"\"\"\n    Invalidate the license metadata cache (not the license itself).\n\n    Deletes the cached LicenseMetadata. The actual license in the database\n    is not affected. Delete is idempotent — if the key doesn't exist, this\n    is a no-op.\n\n    Args:\n        tenant_id: Tenant ID (for multi-tenant deployments)\n    \"\"\"\n    cache = get_cache_backend(tenant_id=tenant_id)\n    cache.delete(LICENSE_METADATA_KEY)\n    logger.info(\"License cache invalidated\")\n\n\ndef update_license_cache(\n    payload: LicensePayload,\n    source: LicenseSource | None = None,\n    grace_period_end: datetime | None = None,\n    tenant_id: str | None = None,\n) -> LicenseMetadata:\n    \"\"\"\n    Update the cache with license metadata.\n\n    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:\n    1. Frontend needs status to show appropriate UI/banners\n    2. Caching avoids repeated DB + crypto verification on every request\n    3. Status enforcement happens at the feature level, not here\n\n    Args:\n        payload: Verified license payload\n        source: How the license was obtained\n        grace_period_end: Optional grace period end time\n        tenant_id: Tenant ID (for multi-tenant deployments)\n\n    Returns:\n        The cached LicenseMetadata\n    \"\"\"\n    from ee.onyx.utils.license import get_license_status\n\n    tenant = tenant_id or get_current_tenant_id()\n    cache = get_cache_backend(tenant_id=tenant_id)\n\n    used_seats = get_used_seats(tenant)\n    status = get_license_status(payload, grace_period_end)\n\n    metadata = LicenseMetadata(\n        tenant_id=payload.tenant_id,\n        organization_name=payload.organization_name,\n        seats=payload.seats,\n        used_seats=used_seats,\n        plan_type=payload.plan_type,\n        issued_at=payload.issued_at,\n        expires_at=payload.expires_at,\n        grace_period_end=grace_period_end,\n        status=status,\n        source=source,\n        stripe_subscription_id=payload.stripe_subscription_id,\n    )\n\n    cache.set(\n        LICENSE_METADATA_KEY,\n        metadata.model_dump_json(),\n        ex=LICENSE_CACHE_TTL_SECONDS,\n    )\n\n    logger.info(f\"License cache updated: {metadata.seats} seats, status={status.value}\")\n    return metadata\n\n\ndef refresh_license_cache(\n    db_session: Session,\n    tenant_id: str | None = None,\n) -> LicenseMetadata | None:\n    \"\"\"\n    Refresh the license cache from the database.\n\n    Args:\n        db_session: Database session\n        tenant_id: Tenant ID (for multi-tenant deployments)\n\n    Returns:\n        LicenseMetadata if license exists, None otherwise\n    \"\"\"\n    from ee.onyx.utils.license import verify_license_signature\n\n    license_record = get_license(db_session)\n    if not license_record:\n        invalidate_license_cache(tenant_id)\n        return None\n\n    try:\n        payload = verify_license_signature(license_record.license_data)\n        # Derive source from payload: manual licenses lack stripe_customer_id\n        source: LicenseSource = (\n            LicenseSource.AUTO_FETCH\n            if payload.stripe_customer_id\n            else LicenseSource.MANUAL_UPLOAD\n        )\n        return update_license_cache(\n            payload,\n            source=source,\n            tenant_id=tenant_id,\n        )\n    except ValueError as e:\n        logger.error(f\"Failed to verify license during cache refresh: {e}\")\n        invalidate_license_cache(tenant_id)\n        return None\n\n\ndef get_license_metadata(\n    db_session: Session,\n    tenant_id: str | None = None,\n) -> LicenseMetadata | None:\n    \"\"\"\n    Get license metadata, using cache if available.\n\n    Args:\n        db_session: Database session\n        tenant_id: Tenant ID (for multi-tenant deployments)\n\n    Returns:\n        LicenseMetadata if license exists, None otherwise\n    \"\"\"\n    # Try cache first\n    cached = get_cached_license_metadata(tenant_id)\n    if cached:\n        return cached\n\n    # Refresh from database\n    return refresh_license_cache(db_session, tenant_id)\n\n\ndef check_seat_availability(\n    db_session: Session,\n    seats_needed: int = 1,\n    tenant_id: str | None = None,\n) -> SeatAvailabilityResult:\n    \"\"\"\n    Check if there are enough seats available to add users.\n\n    Args:\n        db_session: Database session\n        seats_needed: Number of seats needed (default 1)\n        tenant_id: Tenant ID (for multi-tenant deployments)\n\n    Returns:\n        SeatAvailabilityResult with available=True if seats are available,\n        or available=False with error_message if limit would be exceeded.\n        Returns available=True if no license exists (self-hosted = unlimited).\n    \"\"\"\n    metadata = get_license_metadata(db_session, tenant_id)\n\n    # No license = no enforcement (self-hosted without license)\n    if metadata is None:\n        return SeatAvailabilityResult(available=True)\n\n    # Calculate current usage directly from DB (not cache) for accuracy\n    current_used = get_used_seats(tenant_id)\n    total_seats = metadata.seats\n\n    # Use > (not >=) to allow filling to exactly 100% capacity\n    would_exceed_limit = current_used + seats_needed > total_seats\n    if would_exceed_limit:\n        return SeatAvailabilityResult(\n            available=False,\n            error_message=f\"Seat limit would be exceeded: {current_used} of {total_seats} seats used, \"\n            f\"cannot add {seats_needed} more user(s).\",\n        )\n\n    return SeatAvailabilityResult(available=True)\n"
  },
  {
    "path": "backend/ee/onyx/db/persona.py",
    "content": "from uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import NotificationType\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__User\nfrom onyx.db.models import Persona__UserGroup\nfrom onyx.db.notification import create_notification\nfrom onyx.db.persona import mark_persona_user_files_for_sync\nfrom onyx.server.features.persona.models import PersonaSharedNotificationData\n\n\ndef update_persona_access(\n    persona_id: int,\n    creator_user_id: UUID | None,\n    db_session: Session,\n    is_public: bool | None = None,\n    user_ids: list[UUID] | None = None,\n    group_ids: list[int] | None = None,\n) -> None:\n    \"\"\"Updates the access settings for a persona including public status, user shares,\n    and group shares.\n\n    NOTE: This function batches all updates. If we don't dedupe the inputs,\n    the commit will exception.\n\n    NOTE: Callers are responsible for committing.\"\"\"\n\n    needs_sync = False\n    if is_public is not None:\n        needs_sync = True\n        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()\n        if persona:\n            persona.is_public = is_public\n\n    # NOTE: For user-ids and group-ids, `None` means \"leave unchanged\", `[]` means \"clear all shares\",\n    # and a non-empty list means \"replace with these shares\".\n\n    if user_ids is not None:\n        needs_sync = True\n        db_session.query(Persona__User).filter(\n            Persona__User.persona_id == persona_id\n        ).delete(synchronize_session=\"fetch\")\n\n        user_ids_set = set(user_ids)\n        for user_id in user_ids_set:\n            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))\n            if user_id != creator_user_id:\n                create_notification(\n                    user_id=user_id,\n                    notif_type=NotificationType.PERSONA_SHARED,\n                    title=\"A new agent was shared with you!\",\n                    db_session=db_session,\n                    additional_data=PersonaSharedNotificationData(\n                        persona_id=persona_id,\n                    ).model_dump(),\n                )\n\n    if group_ids is not None:\n        needs_sync = True\n        db_session.query(Persona__UserGroup).filter(\n            Persona__UserGroup.persona_id == persona_id\n        ).delete(synchronize_session=\"fetch\")\n\n        group_ids_set = set(group_ids)\n        for group_id in group_ids_set:\n            db_session.add(\n                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)\n            )\n\n    # When sharing changes, user file ACLs need to be updated in the vector DB\n    if needs_sync:\n        mark_persona_user_files_for_sync(persona_id, db_session)\n"
  },
  {
    "path": "backend/ee/onyx/db/query_history.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\n\nfrom sqlalchemy import asc\nfrom sqlalchemy import BinaryExpression\nfrom sqlalchemy import ColumnElement\nfrom sqlalchemy import desc\nfrom sqlalchemy import distinct\nfrom sqlalchemy.orm import contains_eager\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql import case\nfrom sqlalchemy.sql import func\nfrom sqlalchemy.sql import select\nfrom sqlalchemy.sql.expression import literal\nfrom sqlalchemy.sql.expression import UnaryExpression\n\nfrom ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatMessageFeedback\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import TaskQueueState\nfrom onyx.db.tasks import get_all_tasks_with_prefix\n\n\ndef _build_filter_conditions(\n    start_time: datetime | None,\n    end_time: datetime | None,\n    feedback_filter: QAFeedbackType | None,\n) -> list[ColumnElement]:\n    \"\"\"\n    Helper function to build all filter conditions for chat sessions.\n    Filters by start and end time, feedback type, and any sessions without messages.\n    start_time: Date from which to filter\n    end_time: Date to which to filter\n    feedback_filter: Feedback type to filter by\n    Returns: List of filter conditions\n    \"\"\"\n    conditions = []\n\n    if start_time is not None:\n        conditions.append(ChatSession.time_created >= start_time)\n    if end_time is not None:\n        conditions.append(ChatSession.time_created <= end_time)\n\n    if feedback_filter is not None:\n        feedback_subq = (\n            select(ChatMessage.chat_session_id)\n            .join(ChatMessageFeedback)\n            .group_by(ChatMessage.chat_session_id)\n            .having(\n                case(\n                    (\n                        case(\n                            {literal(feedback_filter == QAFeedbackType.LIKE): True},\n                            else_=False,\n                        ),\n                        func.bool_and(ChatMessageFeedback.is_positive),\n                    ),\n                    (\n                        case(\n                            {literal(feedback_filter == QAFeedbackType.DISLIKE): True},\n                            else_=False,\n                        ),\n                        func.bool_and(func.not_(ChatMessageFeedback.is_positive)),\n                    ),\n                    else_=func.bool_or(ChatMessageFeedback.is_positive)\n                    & func.bool_or(func.not_(ChatMessageFeedback.is_positive)),\n                )\n            )\n        )\n        conditions.append(ChatSession.id.in_(feedback_subq))\n\n    return conditions\n\n\ndef get_total_filtered_chat_sessions_count(\n    db_session: Session,\n    start_time: datetime | None,\n    end_time: datetime | None,\n    feedback_filter: QAFeedbackType | None,\n) -> int:\n    conditions = _build_filter_conditions(start_time, end_time, feedback_filter)\n    stmt = (\n        select(func.count(distinct(ChatSession.id)))\n        .select_from(ChatSession)\n        .filter(*conditions)\n    )\n    return db_session.scalar(stmt) or 0\n\n\ndef get_page_of_chat_sessions(\n    start_time: datetime | None,\n    end_time: datetime | None,\n    db_session: Session,\n    page_num: int,\n    page_size: int,\n    feedback_filter: QAFeedbackType | None = None,\n) -> Sequence[ChatSession]:\n    conditions = _build_filter_conditions(start_time, end_time, feedback_filter)\n\n    subquery = (\n        select(ChatSession.id)\n        .filter(*conditions)\n        .order_by(desc(ChatSession.time_created), ChatSession.id)\n        .limit(page_size)\n        .offset(page_num * page_size)\n        .subquery()\n    )\n\n    stmt = (\n        select(ChatSession)\n        .join(subquery, ChatSession.id == subquery.c.id)\n        .outerjoin(ChatMessage, ChatSession.id == ChatMessage.chat_session_id)\n        .options(\n            joinedload(ChatSession.user),\n            joinedload(ChatSession.persona),\n            contains_eager(ChatSession.messages).joinedload(\n                ChatMessage.chat_message_feedbacks\n            ),\n        )\n        .order_by(\n            desc(ChatSession.time_created),\n            ChatSession.id,\n            asc(ChatMessage.id),  # Ensure chronological message order\n        )\n    )\n\n    return db_session.scalars(stmt).unique().all()\n\n\ndef fetch_chat_sessions_eagerly_by_time(\n    start: datetime,\n    end: datetime,\n    db_session: Session,\n    limit: int | None = 500,\n    initial_time: datetime | None = None,\n) -> list[ChatSession]:\n    \"\"\"Sorted by oldest to newest, then by message id\"\"\"\n\n    asc_time_order: UnaryExpression = asc(ChatSession.time_created)\n    message_order: UnaryExpression = asc(ChatMessage.id)\n\n    filters: list[ColumnElement | BinaryExpression] = [\n        ChatSession.time_created.between(start, end)\n    ]\n\n    if initial_time:\n        filters.append(ChatSession.time_created > initial_time)\n\n    subquery = (\n        db_session.query(ChatSession.id, ChatSession.time_created)\n        .filter(*filters)\n        .order_by(asc_time_order)\n        .limit(limit)\n        .subquery()\n    )\n\n    query = (\n        db_session.query(ChatSession)\n        .join(subquery, ChatSession.id == subquery.c.id)\n        .outerjoin(ChatMessage, ChatSession.id == ChatMessage.chat_session_id)\n        .options(\n            joinedload(ChatSession.user),\n            joinedload(ChatSession.persona),\n            contains_eager(ChatSession.messages).joinedload(\n                ChatMessage.chat_message_feedbacks\n            ),\n        )\n        .order_by(asc_time_order, message_order)\n    )\n\n    chat_sessions = query.all()\n\n    return chat_sessions\n\n\ndef get_all_query_history_export_tasks(\n    db_session: Session,\n) -> list[TaskQueueState]:\n    return get_all_tasks_with_prefix(db_session, QUERY_HISTORY_TASK_NAME_PREFIX)\n"
  },
  {
    "path": "backend/ee/onyx/db/saml.py",
    "content": "import datetime\nfrom typing import cast\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS\nfrom onyx.db.models import SamlAccount\n\n\ndef upsert_saml_account(\n    user_id: UUID,\n    cookie: str,\n    db_session: Session,\n    expiration_offset: int = SESSION_EXPIRE_TIME_SECONDS,\n) -> datetime.datetime:\n    expires_at = func.now() + datetime.timedelta(seconds=expiration_offset)\n\n    existing_saml_acc = (\n        db_session.query(SamlAccount)\n        .filter(SamlAccount.user_id == user_id)\n        .one_or_none()\n    )\n\n    if existing_saml_acc:\n        existing_saml_acc.encrypted_cookie = cookie\n        existing_saml_acc.expires_at = cast(datetime.datetime, expires_at)\n        existing_saml_acc.updated_at = func.now()\n        saml_acc = existing_saml_acc\n    else:\n        saml_acc = SamlAccount(\n            user_id=user_id,\n            encrypted_cookie=cookie,\n            expires_at=expires_at,\n        )\n        db_session.add(saml_acc)\n\n    db_session.commit()\n\n    return saml_acc.expires_at\n\n\nasync def get_saml_account(\n    cookie: str, async_db_session: AsyncSession\n) -> SamlAccount | None:\n    \"\"\"NOTE: this is async, since it's used during auth\n    (which is necessarily async due to FastAPI Users)\"\"\"\n    stmt = (\n        select(SamlAccount)\n        .options(selectinload(SamlAccount.user))  # Use selectinload for collections\n        .where(\n            and_(\n                SamlAccount.encrypted_cookie == cookie,\n                SamlAccount.expires_at > func.now(),\n            )\n        )\n    )\n\n    result = await async_db_session.execute(stmt)\n    return result.scalars().unique().one_or_none()\n\n\nasync def expire_saml_account(\n    saml_account: SamlAccount, async_db_session: AsyncSession\n) -> None:\n    saml_account.expires_at = func.now()\n    await async_db_session.commit()\n"
  },
  {
    "path": "backend/ee/onyx/db/scim.py",
    "content": "\"\"\"SCIM Data Access Layer.\n\nAll database operations for SCIM provisioning — token management, user\nmappings, and group mappings. Extends the base DAL (see ``onyx.db.dal``).\n\nUsage from FastAPI::\n\n    def get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:\n        return ScimDAL(db_session)\n\n    @router.post(\"/tokens\")\n    def create_token(dal: ScimDAL = Depends(get_scim_dal)) -> ...:\n        token = dal.create_token(name=..., hashed_token=..., ...)\n        dal.commit()\n        return token\n\nUsage from background tasks::\n\n    with ScimDAL.from_tenant(\"tenant_abc\") as dal:\n        mapping = dal.create_user_mapping(external_id=\"idp-123\", user_id=uid)\n        dal.commit()\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom uuid import UUID\n\nfrom sqlalchemy import delete as sa_delete\nfrom sqlalchemy import func\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import SQLColumnExpression\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\n\nfrom ee.onyx.server.scim.filtering import ScimFilter\nfrom ee.onyx.server.scim.filtering import ScimFilterOperator\nfrom ee.onyx.server.scim.models import ScimMappingFields\nfrom onyx.db.dal import DAL\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import GrantSource\nfrom onyx.db.enums import Permission\nfrom onyx.db.models import PermissionGrant\nfrom onyx.db.models import ScimGroupMapping\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import ScimUserMapping\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass ScimDAL(DAL):\n    \"\"\"Data Access Layer for SCIM provisioning operations.\n\n    Methods mutate but do NOT commit — call ``dal.commit()`` explicitly\n    when you want to persist changes. This follows the existing ``_no_commit``\n    convention and lets callers batch multiple operations into one transaction.\n    \"\"\"\n\n    # ------------------------------------------------------------------\n    # Token operations\n    # ------------------------------------------------------------------\n\n    def create_token(\n        self,\n        name: str,\n        hashed_token: str,\n        token_display: str,\n        created_by_id: UUID,\n    ) -> ScimToken:\n        \"\"\"Create a new SCIM bearer token.\n\n        Only one token is active at a time — this method automatically revokes\n        all existing active tokens before creating the new one.\n        \"\"\"\n        # Revoke any currently active tokens\n        active_tokens = list(\n            self._session.scalars(\n                select(ScimToken).where(ScimToken.is_active.is_(True))\n            ).all()\n        )\n        for t in active_tokens:\n            t.is_active = False\n\n        token = ScimToken(\n            name=name,\n            hashed_token=hashed_token,\n            token_display=token_display,\n            created_by_id=created_by_id,\n        )\n        self._session.add(token)\n        self._session.flush()\n        return token\n\n    def get_active_token(self) -> ScimToken | None:\n        \"\"\"Return the single currently active token, or None.\"\"\"\n        return self._session.scalar(\n            select(ScimToken).where(ScimToken.is_active.is_(True))\n        )\n\n    def get_token_by_hash(self, hashed_token: str) -> ScimToken | None:\n        \"\"\"Look up a token by its SHA-256 hash.\"\"\"\n        return self._session.scalar(\n            select(ScimToken).where(ScimToken.hashed_token == hashed_token)\n        )\n\n    def revoke_token(self, token_id: int) -> None:\n        \"\"\"Deactivate a token by ID.\n\n        Raises:\n            ValueError: If the token does not exist.\n        \"\"\"\n        token = self._session.get(ScimToken, token_id)\n        if not token:\n            raise ValueError(f\"SCIM token with id {token_id} not found\")\n        token.is_active = False\n\n    def update_token_last_used(self, token_id: int) -> None:\n        \"\"\"Update the last_used_at timestamp for a token.\"\"\"\n        token = self._session.get(ScimToken, token_id)\n        if token:\n            token.last_used_at = func.now()  # type: ignore[assignment]\n\n    # ------------------------------------------------------------------\n    # User mapping operations\n    # ------------------------------------------------------------------\n\n    def create_user_mapping(\n        self,\n        external_id: str | None,\n        user_id: UUID,\n        scim_username: str | None = None,\n        fields: ScimMappingFields | None = None,\n    ) -> ScimUserMapping:\n        \"\"\"Create a SCIM mapping for a user.\n\n        ``external_id`` may be ``None`` when the IdP omits it (RFC 7643\n        allows this). The mapping still marks the user as SCIM-managed.\n        \"\"\"\n        f = fields or ScimMappingFields()\n        mapping = ScimUserMapping(\n            external_id=external_id,\n            user_id=user_id,\n            scim_username=scim_username,\n            department=f.department,\n            manager=f.manager,\n            given_name=f.given_name,\n            family_name=f.family_name,\n            scim_emails_json=f.scim_emails_json,\n        )\n        self._session.add(mapping)\n        self._session.flush()\n        return mapping\n\n    def get_user_mapping_by_external_id(\n        self, external_id: str\n    ) -> ScimUserMapping | None:\n        \"\"\"Look up a user mapping by the IdP's external identifier.\"\"\"\n        return self._session.scalar(\n            select(ScimUserMapping).where(ScimUserMapping.external_id == external_id)\n        )\n\n    def get_user_mapping_by_user_id(self, user_id: UUID) -> ScimUserMapping | None:\n        \"\"\"Look up a user mapping by the Onyx user ID.\"\"\"\n        return self._session.scalar(\n            select(ScimUserMapping).where(ScimUserMapping.user_id == user_id)\n        )\n\n    def list_user_mappings(\n        self,\n        start_index: int = 1,\n        count: int = 100,\n    ) -> tuple[list[ScimUserMapping], int]:\n        \"\"\"List user mappings with SCIM-style pagination.\n\n        Args:\n            start_index: 1-based start index (SCIM convention).\n            count: Maximum number of results to return.\n\n        Returns:\n            A tuple of (mappings, total_count).\n        \"\"\"\n        total = (\n            self._session.scalar(select(func.count()).select_from(ScimUserMapping)) or 0\n        )\n\n        offset = max(start_index - 1, 0)\n        mappings = list(\n            self._session.scalars(\n                select(ScimUserMapping)\n                .order_by(ScimUserMapping.id)\n                .offset(offset)\n                .limit(count)\n            ).all()\n        )\n\n        return mappings, total\n\n    def update_user_mapping_external_id(\n        self,\n        mapping_id: int,\n        external_id: str,\n    ) -> ScimUserMapping:\n        \"\"\"Update the external ID on a user mapping.\n\n        Raises:\n            ValueError: If the mapping does not exist.\n        \"\"\"\n        mapping = self._session.get(ScimUserMapping, mapping_id)\n        if not mapping:\n            raise ValueError(f\"SCIM user mapping with id {mapping_id} not found\")\n        mapping.external_id = external_id\n        return mapping\n\n    def delete_user_mapping(self, mapping_id: int) -> None:\n        \"\"\"Delete a user mapping by ID. No-op if already deleted.\"\"\"\n        mapping = self._session.get(ScimUserMapping, mapping_id)\n        if not mapping:\n            logger.warning(\"SCIM user mapping %d not found during delete\", mapping_id)\n            return\n        self._session.delete(mapping)\n\n    # ------------------------------------------------------------------\n    # User query operations\n    # ------------------------------------------------------------------\n\n    def get_user(self, user_id: UUID) -> User | None:\n        \"\"\"Fetch a user by ID.\"\"\"\n        return self._session.scalar(\n            select(User).where(User.id == user_id)  # type: ignore[arg-type]\n        )\n\n    def get_user_by_email(self, email: str) -> User | None:\n        \"\"\"Fetch a user by email (case-insensitive).\"\"\"\n        return self._session.scalar(\n            select(User).where(func.lower(User.email) == func.lower(email))\n        )\n\n    def add_user(self, user: User) -> None:\n        \"\"\"Add a new user to the session and flush to assign an ID.\"\"\"\n        self._session.add(user)\n        self._session.flush()\n\n    def update_user(\n        self,\n        user: User,\n        *,\n        email: str | None = None,\n        is_active: bool | None = None,\n        personal_name: str | None = None,\n    ) -> None:\n        \"\"\"Update user attributes. Only sets fields that are provided.\"\"\"\n        if email is not None:\n            user.email = email\n        if is_active is not None:\n            user.is_active = is_active\n        if personal_name is not None:\n            user.personal_name = personal_name\n\n    def deactivate_user(self, user: User) -> None:\n        \"\"\"Mark a user as inactive.\"\"\"\n        user.is_active = False\n\n    def list_users(\n        self,\n        scim_filter: ScimFilter | None,\n        start_index: int = 1,\n        count: int = 100,\n    ) -> tuple[list[tuple[User, ScimUserMapping | None]], int]:\n        \"\"\"Query users with optional SCIM filter and pagination.\n\n        Returns:\n            A tuple of (list of (user, mapping) pairs, total_count).\n\n        Raises:\n            ValueError: If the filter uses an unsupported attribute.\n        \"\"\"\n        # Inner-join with ScimUserMapping so only SCIM-managed users appear.\n        # Pre-existing system accounts (anonymous, admin, etc.) are excluded\n        # unless they were explicitly linked via SCIM provisioning.\n        query = (\n            select(User)\n            .join(ScimUserMapping, ScimUserMapping.user_id == User.id)\n            .where(\n                User.account_type.notin_([AccountType.BOT, AccountType.EXT_PERM_USER])\n            )\n        )\n\n        if scim_filter:\n            attr = scim_filter.attribute.lower()\n            if attr == \"username\":\n                # arg-type: fastapi-users types User.email as str, not a column expression\n                # assignment: union return type widens but query is still Select[tuple[User]]\n                query = _apply_scim_string_op(query, User.email, scim_filter)  # type: ignore[arg-type, assignment]\n            elif attr == \"active\":\n                query = query.where(\n                    User.is_active.is_(scim_filter.value.lower() == \"true\")  # type: ignore[attr-defined]\n                )\n            elif attr == \"externalid\":\n                mapping = self.get_user_mapping_by_external_id(scim_filter.value)\n                if not mapping:\n                    return [], 0\n                query = query.where(User.id == mapping.user_id)  # type: ignore[arg-type]\n            else:\n                raise ValueError(\n                    f\"Unsupported filter attribute: {scim_filter.attribute}\"\n                )\n\n        # Count total matching rows first, then paginate. SCIM uses 1-based\n        # indexing (RFC 7644 §3.4.2), so we convert to a 0-based offset.\n        total = (\n            self._session.scalar(select(func.count()).select_from(query.subquery()))\n            or 0\n        )\n\n        offset = max(start_index - 1, 0)\n        users = list(\n            self._session.scalars(\n                query.order_by(User.id).offset(offset).limit(count)  # type: ignore[arg-type]\n            )\n            .unique()\n            .all()\n        )\n\n        # Batch-fetch SCIM mappings to avoid N+1 queries\n        mapping_map = self._get_user_mappings_batch([u.id for u in users])\n        return [(u, mapping_map.get(u.id)) for u in users], total\n\n    def sync_user_external_id(\n        self,\n        user_id: UUID,\n        new_external_id: str | None,\n        scim_username: str | None = None,\n        fields: ScimMappingFields | None = None,\n    ) -> None:\n        \"\"\"Sync the SCIM mapping for a user.\n\n        If a mapping already exists, its fields are updated (including\n        setting ``external_id`` to ``None`` when the IdP omits it).\n        If no mapping exists and ``new_external_id`` is provided, a new\n        mapping is created.  A mapping is never deleted here — SCIM-managed\n        users must retain their mapping to remain visible in ``GET /Users``.\n\n        When *fields* is provided, all mapping fields are written\n        unconditionally — including ``None`` values — so that a caller can\n        clear a previously-set field (e.g. removing a department).\n        \"\"\"\n        mapping = self.get_user_mapping_by_user_id(user_id)\n        if mapping:\n            if mapping.external_id != new_external_id:\n                mapping.external_id = new_external_id\n            if scim_username is not None:\n                mapping.scim_username = scim_username\n            if fields is not None:\n                mapping.department = fields.department\n                mapping.manager = fields.manager\n                mapping.given_name = fields.given_name\n                mapping.family_name = fields.family_name\n                mapping.scim_emails_json = fields.scim_emails_json\n        elif new_external_id:\n            self.create_user_mapping(\n                external_id=new_external_id,\n                user_id=user_id,\n                scim_username=scim_username,\n                fields=fields,\n            )\n\n    def _get_user_mappings_batch(\n        self, user_ids: list[UUID]\n    ) -> dict[UUID, ScimUserMapping]:\n        \"\"\"Batch-fetch SCIM user mappings keyed by user ID.\"\"\"\n        if not user_ids:\n            return {}\n        mappings = self._session.scalars(\n            select(ScimUserMapping).where(ScimUserMapping.user_id.in_(user_ids))\n        ).all()\n        return {m.user_id: m for m in mappings}\n\n    def get_user_groups(self, user_id: UUID) -> list[tuple[int, str]]:\n        \"\"\"Get groups a user belongs to as ``(group_id, group_name)`` pairs.\n\n        Excludes groups marked for deletion.\n        \"\"\"\n        rels = self._session.scalars(\n            select(User__UserGroup).where(User__UserGroup.user_id == user_id)\n        ).all()\n\n        group_ids = [r.user_group_id for r in rels]\n        if not group_ids:\n            return []\n\n        groups = self._session.scalars(\n            select(UserGroup).where(\n                UserGroup.id.in_(group_ids),\n                UserGroup.is_up_for_deletion.is_(False),\n            )\n        ).all()\n        return [(g.id, g.name) for g in groups]\n\n    def get_users_groups_batch(\n        self, user_ids: list[UUID]\n    ) -> dict[UUID, list[tuple[int, str]]]:\n        \"\"\"Batch-fetch group memberships for multiple users.\n\n        Returns a mapping of ``user_id → [(group_id, group_name), ...]``.\n        Avoids N+1 queries when building user list responses.\n        \"\"\"\n        if not user_ids:\n            return {}\n\n        rels = self._session.scalars(\n            select(User__UserGroup).where(User__UserGroup.user_id.in_(user_ids))\n        ).all()\n\n        group_ids = list({r.user_group_id for r in rels})\n        if not group_ids:\n            return {}\n\n        groups = self._session.scalars(\n            select(UserGroup).where(\n                UserGroup.id.in_(group_ids),\n                UserGroup.is_up_for_deletion.is_(False),\n            )\n        ).all()\n        groups_by_id = {g.id: g.name for g in groups}\n\n        result: dict[UUID, list[tuple[int, str]]] = {}\n        for r in rels:\n            if r.user_id and r.user_group_id in groups_by_id:\n                result.setdefault(r.user_id, []).append(\n                    (r.user_group_id, groups_by_id[r.user_group_id])\n                )\n        return result\n\n    # ------------------------------------------------------------------\n    # Group mapping operations\n    # ------------------------------------------------------------------\n\n    def create_group_mapping(\n        self,\n        external_id: str,\n        user_group_id: int,\n    ) -> ScimGroupMapping:\n        \"\"\"Create a mapping between a SCIM externalId and an Onyx user group.\"\"\"\n        mapping = ScimGroupMapping(external_id=external_id, user_group_id=user_group_id)\n        self._session.add(mapping)\n        self._session.flush()\n        return mapping\n\n    def get_group_mapping_by_external_id(\n        self, external_id: str\n    ) -> ScimGroupMapping | None:\n        \"\"\"Look up a group mapping by the IdP's external identifier.\"\"\"\n        return self._session.scalar(\n            select(ScimGroupMapping).where(ScimGroupMapping.external_id == external_id)\n        )\n\n    def get_group_mapping_by_group_id(\n        self, user_group_id: int\n    ) -> ScimGroupMapping | None:\n        \"\"\"Look up a group mapping by the Onyx user group ID.\"\"\"\n        return self._session.scalar(\n            select(ScimGroupMapping).where(\n                ScimGroupMapping.user_group_id == user_group_id\n            )\n        )\n\n    def list_group_mappings(\n        self,\n        start_index: int = 1,\n        count: int = 100,\n    ) -> tuple[list[ScimGroupMapping], int]:\n        \"\"\"List group mappings with SCIM-style pagination.\n\n        Args:\n            start_index: 1-based start index (SCIM convention).\n            count: Maximum number of results to return.\n\n        Returns:\n            A tuple of (mappings, total_count).\n        \"\"\"\n        total = (\n            self._session.scalar(select(func.count()).select_from(ScimGroupMapping))\n            or 0\n        )\n\n        offset = max(start_index - 1, 0)\n        mappings = list(\n            self._session.scalars(\n                select(ScimGroupMapping)\n                .order_by(ScimGroupMapping.id)\n                .offset(offset)\n                .limit(count)\n            ).all()\n        )\n\n        return mappings, total\n\n    def delete_group_mapping(self, mapping_id: int) -> None:\n        \"\"\"Delete a group mapping by ID. No-op if already deleted.\"\"\"\n        mapping = self._session.get(ScimGroupMapping, mapping_id)\n        if not mapping:\n            logger.warning(\"SCIM group mapping %d not found during delete\", mapping_id)\n            return\n        self._session.delete(mapping)\n\n    # ------------------------------------------------------------------\n    # Group query operations\n    # ------------------------------------------------------------------\n\n    def get_group(self, group_id: int) -> UserGroup | None:\n        \"\"\"Fetch a group by ID, returning None if deleted or missing.\"\"\"\n        group = self._session.get(UserGroup, group_id)\n        if group and group.is_up_for_deletion:\n            return None\n        return group\n\n    def get_group_by_name(self, name: str) -> UserGroup | None:\n        \"\"\"Fetch a group by exact name.\"\"\"\n        return self._session.scalar(select(UserGroup).where(UserGroup.name == name))\n\n    def add_group(self, group: UserGroup) -> None:\n        \"\"\"Add a new group to the session and flush to assign an ID.\"\"\"\n        self._session.add(group)\n        self._session.flush()\n\n    def add_permission_grant_to_group(\n        self,\n        group_id: int,\n        permission: Permission,\n        grant_source: GrantSource,\n    ) -> None:\n        \"\"\"Grant a permission to a group and flush.\"\"\"\n        self._session.add(\n            PermissionGrant(\n                group_id=group_id,\n                permission=permission,\n                grant_source=grant_source,\n            )\n        )\n        self._session.flush()\n\n    def update_group(\n        self,\n        group: UserGroup,\n        *,\n        name: str | None = None,\n    ) -> None:\n        \"\"\"Update group attributes and set the modification timestamp.\"\"\"\n        if name is not None:\n            group.name = name\n        group.time_last_modified_by_user = func.now()\n\n    def delete_group(self, group: UserGroup) -> None:\n        \"\"\"Delete a group from the session.\"\"\"\n        self._session.delete(group)\n\n    def list_groups(\n        self,\n        scim_filter: ScimFilter | None,\n        start_index: int = 1,\n        count: int = 100,\n    ) -> tuple[list[tuple[UserGroup, str | None]], int]:\n        \"\"\"Query groups with optional SCIM filter and pagination.\n\n        Returns:\n            A tuple of (list of (group, external_id) pairs, total_count).\n\n        Raises:\n            ValueError: If the filter uses an unsupported attribute.\n        \"\"\"\n        query = select(UserGroup).where(UserGroup.is_up_for_deletion.is_(False))\n\n        if scim_filter:\n            attr = scim_filter.attribute.lower()\n            if attr == \"displayname\":\n                # assignment: union return type widens but query is still Select[tuple[UserGroup]]\n                query = _apply_scim_string_op(query, UserGroup.name, scim_filter)  # type: ignore[assignment]\n            elif attr == \"externalid\":\n                mapping = self.get_group_mapping_by_external_id(scim_filter.value)\n                if not mapping:\n                    return [], 0\n                query = query.where(UserGroup.id == mapping.user_group_id)\n            else:\n                raise ValueError(\n                    f\"Unsupported filter attribute: {scim_filter.attribute}\"\n                )\n\n        total = (\n            self._session.scalar(select(func.count()).select_from(query.subquery()))\n            or 0\n        )\n\n        offset = max(start_index - 1, 0)\n        groups = list(\n            self._session.scalars(\n                query.order_by(UserGroup.id).offset(offset).limit(count)\n            ).all()\n        )\n\n        ext_id_map = self._get_group_external_ids([g.id for g in groups])\n        return [(g, ext_id_map.get(g.id)) for g in groups], total\n\n    def get_group_members(self, group_id: int) -> list[tuple[UUID, str | None]]:\n        \"\"\"Get group members as (user_id, email) pairs.\"\"\"\n        rels = self._session.scalars(\n            select(User__UserGroup).where(User__UserGroup.user_group_id == group_id)\n        ).all()\n\n        user_ids = [r.user_id for r in rels if r.user_id]\n        if not user_ids:\n            return []\n\n        users = (\n            self._session.scalars(\n                select(User).where(User.id.in_(user_ids))  # type: ignore[attr-defined]\n            )\n            .unique()\n            .all()\n        )\n        users_by_id = {u.id: u for u in users}\n\n        return [\n            (\n                r.user_id,\n                users_by_id[r.user_id].email if r.user_id in users_by_id else None,\n            )\n            for r in rels\n            if r.user_id\n        ]\n\n    def validate_member_ids(self, uuids: list[UUID]) -> list[UUID]:\n        \"\"\"Return the subset of UUIDs that don't exist as users.\n\n        Returns an empty list if all IDs are valid.\n        \"\"\"\n        if not uuids:\n            return []\n        existing_users = (\n            self._session.scalars(\n                select(User).where(User.id.in_(uuids))  # type: ignore[attr-defined]\n            )\n            .unique()\n            .all()\n        )\n        existing_ids = {u.id for u in existing_users}\n        return [uid for uid in uuids if uid not in existing_ids]\n\n    def upsert_group_members(self, group_id: int, user_ids: list[UUID]) -> None:\n        \"\"\"Add user-group relationships, ignoring duplicates.\"\"\"\n        if not user_ids:\n            return\n        self._session.execute(\n            pg_insert(User__UserGroup)\n            .values([{\"user_id\": uid, \"user_group_id\": group_id} for uid in user_ids])\n            .on_conflict_do_nothing(\n                index_elements=[\n                    User__UserGroup.user_group_id,\n                    User__UserGroup.user_id,\n                ]\n            )\n        )\n\n    def replace_group_members(self, group_id: int, user_ids: list[UUID]) -> None:\n        \"\"\"Replace all members of a group.\"\"\"\n        self._session.execute(\n            sa_delete(User__UserGroup).where(User__UserGroup.user_group_id == group_id)\n        )\n        self.upsert_group_members(group_id, user_ids)\n\n    def remove_group_members(self, group_id: int, user_ids: list[UUID]) -> None:\n        \"\"\"Remove specific members from a group.\"\"\"\n        if not user_ids:\n            return\n        self._session.execute(\n            sa_delete(User__UserGroup).where(\n                User__UserGroup.user_group_id == group_id,\n                User__UserGroup.user_id.in_(user_ids),\n            )\n        )\n\n    def delete_group_with_members(self, group: UserGroup) -> None:\n        \"\"\"Remove all member relationships and delete the group.\"\"\"\n        self._session.execute(\n            sa_delete(User__UserGroup).where(User__UserGroup.user_group_id == group.id)\n        )\n        self._session.delete(group)\n\n    def sync_group_external_id(\n        self, group_id: int, new_external_id: str | None\n    ) -> None:\n        \"\"\"Create, update, or delete the external ID mapping for a group.\"\"\"\n        mapping = self.get_group_mapping_by_group_id(group_id)\n        if new_external_id:\n            if mapping:\n                if mapping.external_id != new_external_id:\n                    mapping.external_id = new_external_id\n            else:\n                self.create_group_mapping(\n                    external_id=new_external_id, user_group_id=group_id\n                )\n        elif mapping:\n            self.delete_group_mapping(mapping.id)\n\n    def _get_group_external_ids(self, group_ids: list[int]) -> dict[int, str]:\n        \"\"\"Batch-fetch external IDs for a list of group IDs.\"\"\"\n        if not group_ids:\n            return {}\n        mappings = self._session.scalars(\n            select(ScimGroupMapping).where(\n                ScimGroupMapping.user_group_id.in_(group_ids)\n            )\n        ).all()\n        return {m.user_group_id: m.external_id for m in mappings}\n\n\n# ---------------------------------------------------------------------------\n# Module-level helpers (used by DAL methods above)\n# ---------------------------------------------------------------------------\n\n\ndef _apply_scim_string_op(\n    query: Select[tuple[User]] | Select[tuple[UserGroup]],\n    column: SQLColumnExpression[str],\n    scim_filter: ScimFilter,\n) -> Select[tuple[User]] | Select[tuple[UserGroup]]:\n    \"\"\"Apply a SCIM string filter operator using SQLAlchemy column operators.\n\n    Handles eq (case-insensitive exact), co (contains), and sw (starts with).\n    SQLAlchemy's operators handle LIKE-pattern escaping internally.\n    \"\"\"\n    val = scim_filter.value\n    if scim_filter.operator == ScimFilterOperator.EQUAL:\n        return query.where(func.lower(column) == val.lower())\n    elif scim_filter.operator == ScimFilterOperator.CONTAINS:\n        return query.where(column.icontains(val, autoescape=True))\n    elif scim_filter.operator == ScimFilterOperator.STARTS_WITH:\n        return query.where(column.istartswith(val, autoescape=True))\n    else:\n        raise ValueError(f\"Unsupported string filter operator: {scim_filter.operator}\")\n"
  },
  {
    "path": "backend/ee/onyx/db/search.py",
    "content": "import uuid\nfrom datetime import timedelta\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.models import SearchQuery\n\n\ndef create_search_query(\n    db_session: Session,\n    user_id: UUID,\n    query: str,\n    query_expansions: list[str] | None = None,\n) -> SearchQuery:\n    \"\"\"Create and persist a `SearchQuery` row.\n\n    Notes:\n    - `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.\n    - `created_at` is filled by the DB (server_default=now()).\n    \"\"\"\n    search_query = SearchQuery(\n        id=uuid.uuid4(),\n        user_id=user_id,\n        query=query,\n        query_expansions=query_expansions,\n    )\n    db_session.add(search_query)\n    db_session.commit()\n    db_session.refresh(search_query)\n    return search_query\n\n\ndef fetch_search_queries_for_user(\n    db_session: Session,\n    user_id: UUID,\n    filter_days: int | None = None,\n    limit: int | None = None,\n) -> list[SearchQuery]:\n    \"\"\"Fetch `SearchQuery` rows for a user.\n\n    Args:\n        user_id: User UUID.\n        filter_days: Optional time filter. If provided, only rows created within\n            the last `filter_days` days are returned.\n        limit: Optional max number of rows to return.\n    \"\"\"\n    if filter_days is not None and filter_days <= 0:\n        raise ValueError(\"filter_days must be > 0\")\n\n    stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)\n\n    if filter_days is not None and filter_days > 0:\n        cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)\n        stmt = stmt.where(SearchQuery.created_at >= cutoff)\n\n    stmt = stmt.order_by(SearchQuery.created_at.desc())\n\n    if limit is not None:\n        stmt = stmt.limit(limit)\n\n    return list(db_session.scalars(stmt).all())\n"
  },
  {
    "path": "backend/ee/onyx/db/standard_answer.py",
    "content": "import re\nimport string\nfrom collections.abc import Sequence\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import StandardAnswer\nfrom onyx.db.models import StandardAnswerCategory\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef check_category_validity(category_name: str) -> bool:\n    \"\"\"If a category name is too long, it should not be used (it will cause an error in Postgres\n    as the unique constraint can only apply to entries that are less than 2704 bytes).\n\n    Additionally, extremely long categories are not really usable / useful.\"\"\"\n    if len(category_name) > 255:\n        logger.error(\n            f\"Category with name '{category_name}' is too long, cannot be used\"\n        )\n        return False\n\n    return True\n\n\ndef insert_standard_answer_category(\n    category_name: str, db_session: Session\n) -> StandardAnswerCategory:\n    if not check_category_validity(category_name):\n        raise ValueError(f\"Invalid category name: {category_name}\")\n    standard_answer_category = StandardAnswerCategory(name=category_name)\n    db_session.add(standard_answer_category)\n    db_session.commit()\n\n    return standard_answer_category\n\n\ndef insert_standard_answer(\n    keyword: str,\n    answer: str,\n    category_ids: list[int],\n    match_regex: bool,\n    match_any_keywords: bool,\n    db_session: Session,\n) -> StandardAnswer:\n    existing_categories = fetch_standard_answer_categories_by_ids(\n        standard_answer_category_ids=category_ids,\n        db_session=db_session,\n    )\n    if len(existing_categories) != len(category_ids):\n        raise ValueError(f\"Some or all categories with ids {category_ids} do not exist\")\n\n    standard_answer = StandardAnswer(\n        keyword=keyword,\n        answer=answer,\n        categories=existing_categories,\n        active=True,\n        match_regex=match_regex,\n        match_any_keywords=match_any_keywords,\n    )\n    db_session.add(standard_answer)\n    db_session.commit()\n    return standard_answer\n\n\ndef update_standard_answer(\n    standard_answer_id: int,\n    keyword: str,\n    answer: str,\n    category_ids: list[int],\n    match_regex: bool,\n    match_any_keywords: bool,\n    db_session: Session,\n) -> StandardAnswer:\n    standard_answer = db_session.scalar(\n        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)\n    )\n    if standard_answer is None:\n        raise ValueError(f\"No standard answer with id {standard_answer_id}\")\n\n    existing_categories = fetch_standard_answer_categories_by_ids(\n        standard_answer_category_ids=category_ids,\n        db_session=db_session,\n    )\n    if len(existing_categories) != len(category_ids):\n        raise ValueError(f\"Some or all categories with ids {category_ids} do not exist\")\n\n    standard_answer.keyword = keyword\n    standard_answer.answer = answer\n    standard_answer.categories = list(existing_categories)\n    standard_answer.match_regex = match_regex\n    standard_answer.match_any_keywords = match_any_keywords\n\n    db_session.commit()\n\n    return standard_answer\n\n\ndef remove_standard_answer(\n    standard_answer_id: int,\n    db_session: Session,\n) -> None:\n    standard_answer = db_session.scalar(\n        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)\n    )\n    if standard_answer is None:\n        raise ValueError(f\"No standard answer with id {standard_answer_id}\")\n\n    standard_answer.active = False\n    db_session.commit()\n\n\ndef update_standard_answer_category(\n    standard_answer_category_id: int,\n    category_name: str,\n    db_session: Session,\n) -> StandardAnswerCategory:\n    standard_answer_category = db_session.scalar(\n        select(StandardAnswerCategory).where(\n            StandardAnswerCategory.id == standard_answer_category_id\n        )\n    )\n    if standard_answer_category is None:\n        raise ValueError(\n            f\"No standard answer category with id {standard_answer_category_id}\"\n        )\n\n    if not check_category_validity(category_name):\n        raise ValueError(f\"Invalid category name: {category_name}\")\n\n    standard_answer_category.name = category_name\n\n    db_session.commit()\n\n    return standard_answer_category\n\n\ndef fetch_standard_answer_category(\n    standard_answer_category_id: int,\n    db_session: Session,\n) -> StandardAnswerCategory | None:\n    return db_session.scalar(\n        select(StandardAnswerCategory).where(\n            StandardAnswerCategory.id == standard_answer_category_id\n        )\n    )\n\n\ndef fetch_standard_answer_categories_by_ids(\n    standard_answer_category_ids: list[int],\n    db_session: Session,\n) -> Sequence[StandardAnswerCategory]:\n    return db_session.scalars(\n        select(StandardAnswerCategory).where(\n            StandardAnswerCategory.id.in_(standard_answer_category_ids)\n        )\n    ).all()\n\n\ndef fetch_standard_answer_categories(\n    db_session: Session,\n) -> Sequence[StandardAnswerCategory]:\n    return db_session.scalars(select(StandardAnswerCategory)).all()\n\n\ndef fetch_standard_answer(\n    standard_answer_id: int,\n    db_session: Session,\n) -> StandardAnswer | None:\n    return db_session.scalar(\n        select(StandardAnswer).where(StandardAnswer.id == standard_answer_id)\n    )\n\n\ndef fetch_standard_answers(db_session: Session) -> Sequence[StandardAnswer]:\n    return db_session.scalars(\n        select(StandardAnswer).where(StandardAnswer.active.is_(True))\n    ).all()\n\n\ndef create_initial_default_standard_answer_category(db_session: Session) -> None:\n    default_category_id = 0\n    default_category_name = \"General\"\n    default_category = fetch_standard_answer_category(\n        standard_answer_category_id=default_category_id,\n        db_session=db_session,\n    )\n    if default_category is not None:\n        if default_category.name != default_category_name:\n            raise ValueError(\n                \"DB is not in a valid initial state. Default standard answer category does not have expected name.\"\n            )\n        return\n\n    standard_answer_category = StandardAnswerCategory(\n        id=default_category_id,\n        name=default_category_name,\n    )\n    db_session.add(standard_answer_category)\n    db_session.commit()\n\n\ndef fetch_standard_answer_categories_by_names(\n    standard_answer_category_names: list[str],\n    db_session: Session,\n) -> Sequence[StandardAnswerCategory]:\n    return db_session.scalars(\n        select(StandardAnswerCategory).where(\n            StandardAnswerCategory.name.in_(standard_answer_category_names)\n        )\n    ).all()\n\n\ndef find_matching_standard_answers(\n    id_in: list[int],\n    query: str,\n    db_session: Session,\n) -> list[tuple[StandardAnswer, str]]:\n    \"\"\"\n    Returns a list of tuples, where each tuple is a StandardAnswer definition matching\n    the query and a string representing the match (either the regex match group or the\n    set of keywords).\n\n    If `answer_instance.match_regex` is true, the definition is considered \"matched\"\n    if the query matches the `answer_instance.keyword` using `re.search`.\n\n    Otherwise, the definition is considered \"matched\" if the space-delimited tokens\n    in `keyword` exists in `query`, depending on the state of `match_any_keywords`\n    \"\"\"\n    stmt = (\n        select(StandardAnswer)\n        .where(StandardAnswer.active.is_(True))\n        .where(StandardAnswer.id.in_(id_in))\n    )\n    possible_standard_answers: Sequence[StandardAnswer] = db_session.scalars(stmt).all()\n\n    matching_standard_answers: list[tuple[StandardAnswer, str]] = []\n    for standard_answer in possible_standard_answers:\n        if standard_answer.match_regex:\n            maybe_matches = re.search(standard_answer.keyword, query, re.IGNORECASE)\n            if maybe_matches is not None:\n                match_group = maybe_matches.group(0)\n                matching_standard_answers.append((standard_answer, match_group))\n\n        else:\n            # Remove punctuation and split the keyword into individual words\n            keyword_words = set(\n                \"\".join(\n                    char\n                    for char in standard_answer.keyword.lower()\n                    if char not in string.punctuation\n                ).split()\n            )\n\n            # Remove punctuation and split the query into individual words\n            query_words = \"\".join(\n                char for char in query.lower() if char not in string.punctuation\n            ).split()\n\n            # Check if all of the keyword words are in the query words\n            if standard_answer.match_any_keywords:\n                for word in query_words:\n                    if word in keyword_words:\n                        matching_standard_answers.append((standard_answer, word))\n                        break\n            else:\n                if all(word in query_words for word in keyword_words):\n                    matching_standard_answers.append(\n                        (\n                            standard_answer,\n                            re.sub(r\"\\s+?\", \", \", standard_answer.keyword),\n                        )\n                    )\n\n    return matching_standard_answers\n"
  },
  {
    "path": "backend/ee/onyx/db/token_limit.py",
    "content": "from collections.abc import Sequence\n\nfrom sqlalchemy import exists\nfrom sqlalchemy import Row\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import TokenRateLimitScope\nfrom onyx.db.models import TokenRateLimit\nfrom onyx.db.models import TokenRateLimit__UserGroup\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.models import UserRole\nfrom onyx.server.token_rate_limits.models import TokenRateLimitArgs\n\n\ndef _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:\n    if user.role == UserRole.ADMIN:\n        return stmt\n\n    # If anonymous user, only show global/public token_rate_limits\n    if user.is_anonymous:\n        where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL\n        return stmt.where(where_clause)\n\n    stmt = stmt.distinct()\n    TRLimit_UG = aliased(TokenRateLimit__UserGroup)\n    User__UG = aliased(User__UserGroup)\n\n    \"\"\"\n    Here we select token_rate_limits by relation:\n    User -> User__UserGroup -> TokenRateLimit__UserGroup ->\n    TokenRateLimit\n    \"\"\"\n    stmt = stmt.outerjoin(TRLimit_UG).outerjoin(\n        User__UG,\n        User__UG.user_group_id == TRLimit_UG.user_group_id,\n    )\n\n    \"\"\"\n    Filter token_rate_limits by:\n    - if the user is in the user_group that owns the token_rate_limit\n    - if the user is not a global_curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out token_rate_limits that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all token_rate_limits in the groups the user curates\n    \"\"\"\n\n    where_clause = User__UG.user_id == user.id\n    if user.role == UserRole.CURATOR and get_editable:\n        where_clause &= User__UG.is_curator == True  # noqa: E712\n    if get_editable:\n        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)\n        if user.role == UserRole.CURATOR:\n            user_groups = user_groups.where(\n                User__UserGroup.is_curator == True  # noqa: E712\n            )\n        where_clause &= (\n            ~exists()\n            .where(TRLimit_UG.rate_limit_id == TokenRateLimit.id)\n            .where(~TRLimit_UG.user_group_id.in_(user_groups))\n            .correlate(TokenRateLimit)\n        )\n\n    return stmt.where(where_clause)\n\n\ndef fetch_all_user_group_token_rate_limits_by_group(\n    db_session: Session,\n) -> Sequence[Row[tuple[TokenRateLimit, str]]]:\n    query = (\n        select(TokenRateLimit, UserGroup.name)\n        .join(\n            TokenRateLimit__UserGroup,\n            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,\n        )\n        .join(UserGroup, UserGroup.id == TokenRateLimit__UserGroup.user_group_id)\n    )\n\n    return db_session.execute(query).all()\n\n\ndef insert_user_group_token_rate_limit(\n    db_session: Session,\n    token_rate_limit_settings: TokenRateLimitArgs,\n    group_id: int,\n) -> TokenRateLimit:\n    token_limit = TokenRateLimit(\n        enabled=token_rate_limit_settings.enabled,\n        token_budget=token_rate_limit_settings.token_budget,\n        period_hours=token_rate_limit_settings.period_hours,\n        scope=TokenRateLimitScope.USER_GROUP,\n    )\n    db_session.add(token_limit)\n    db_session.flush()\n\n    rate_limit = TokenRateLimit__UserGroup(\n        rate_limit_id=token_limit.id, user_group_id=group_id\n    )\n    db_session.add(rate_limit)\n    db_session.commit()\n\n    return token_limit\n\n\ndef fetch_user_group_token_rate_limits_for_user(\n    db_session: Session,\n    group_id: int,\n    user: User,\n    enabled_only: bool = False,\n    ordered: bool = True,\n    get_editable: bool = True,\n) -> Sequence[TokenRateLimit]:\n    stmt = (\n        select(TokenRateLimit)\n        .join(\n            TokenRateLimit__UserGroup,\n            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,\n        )\n        .where(TokenRateLimit__UserGroup.user_group_id == group_id)\n    )\n    stmt = _add_user_filters(stmt, user, get_editable)\n\n    if enabled_only:\n        stmt = stmt.where(TokenRateLimit.enabled.is_(True))\n\n    if ordered:\n        stmt = stmt.order_by(TokenRateLimit.created_at.desc())\n\n    return db_session.scalars(stmt).all()\n"
  },
  {
    "path": "backend/ee/onyx/db/usage_export.py",
    "content": "import uuid\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom typing import IO\nfrom typing import Optional\n\nfrom fastapi_users_db_sqlalchemy import UUID_ID\nfrom sqlalchemy import cast\nfrom sqlalchemy.dialects.postgresql import UUID\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.query_history import fetch_chat_sessions_eagerly_by_time\nfrom ee.onyx.server.reporting.usage_export_models import ChatMessageSkeleton\nfrom ee.onyx.server.reporting.usage_export_models import FlowType\nfrom ee.onyx.server.reporting.usage_export_models import UsageReportMetadata\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.models import UsageReport\nfrom onyx.db.models import User\nfrom onyx.file_store.file_store import get_default_file_store\n\n\n# Gets skeletons of all messages in the given range\ndef get_empty_chat_messages_entries__paginated(\n    db_session: Session,\n    period: tuple[datetime, datetime],\n    limit: int | None = 500,\n    initial_time: datetime | None = None,\n) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:\n    \"\"\"Returns a tuple where:\n    first element is the most recent timestamp out of the sessions iterated\n    - this timestamp can be used to paginate forward in time\n    second element is a list of messages belonging to all the sessions iterated\n\n    Only messages of type USER are returned\n    \"\"\"\n    chat_sessions = fetch_chat_sessions_eagerly_by_time(\n        start=period[0],\n        end=period[1],\n        db_session=db_session,\n        limit=limit,\n        initial_time=initial_time,\n    )\n\n    message_skeletons: list[ChatMessageSkeleton] = []\n    for chat_session in chat_sessions:\n        flow_type = FlowType.SLACK if chat_session.onyxbot_flow else FlowType.CHAT\n\n        for message in chat_session.messages:\n            # Only count user messages\n            if message.message_type != MessageType.USER:\n                continue\n\n            # Get user email\n            user_email = chat_session.user.email if chat_session.user else None\n\n            # Get assistant name (from session persona, or alternate if specified)\n            assistant_name = None\n            if chat_session.persona:\n                assistant_name = chat_session.persona.name\n\n            message_skeletons.append(\n                ChatMessageSkeleton(\n                    message_id=message.id,\n                    chat_session_id=chat_session.id,\n                    user_id=str(chat_session.user_id) if chat_session.user_id else None,\n                    flow_type=flow_type,\n                    time_sent=message.time_sent,\n                    assistant_name=assistant_name,\n                    user_email=user_email,\n                    number_of_tokens=message.token_count,\n                )\n            )\n    if len(chat_sessions) == 0:\n        return None, []\n\n    return chat_sessions[-1].time_created, message_skeletons\n\n\ndef get_all_empty_chat_message_entries(\n    db_session: Session,\n    period: tuple[datetime, datetime],\n) -> Generator[list[ChatMessageSkeleton], None, None]:\n    \"\"\"period is the range of time over which to fetch messages.\"\"\"\n    initial_time: Optional[datetime] = period[0]\n    while True:\n        # iterate from oldest to newest\n        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(\n            db_session,\n            period,\n            initial_time=initial_time,\n        )\n\n        if not message_skeletons:\n            return\n\n        yield message_skeletons\n\n        # Update initial_time for the next iteration\n        initial_time = time_created\n\n\ndef get_all_usage_reports(db_session: Session) -> list[UsageReportMetadata]:\n    # Get the user emails\n    usage_reports = db_session.query(UsageReport).all()\n    user_ids = {r.requestor_user_id for r in usage_reports if r.requestor_user_id}\n    user_emails = {\n        user.id: user.email\n        for user in db_session.query(User)\n        .filter(cast(User.id, UUID).in_(user_ids))\n        .all()\n    }\n\n    return [\n        UsageReportMetadata(\n            report_name=r.report_name,\n            requestor=(\n                user_emails.get(r.requestor_user_id) if r.requestor_user_id else None\n            ),\n            time_created=r.time_created,\n            period_from=r.period_from,\n            period_to=r.period_to,\n        )\n        for r in usage_reports\n    ]\n\n\ndef get_usage_report_data(\n    report_display_name: str,\n) -> IO:\n    \"\"\"\n    Get the usage report data from the file store.\n\n    Args:\n        db_session: The database session.\n        report_display_name: The display name of the usage report. Also assumes\n                             that the file is stored with this as the ID in the file store.\n\n    Returns:\n        The usage report data.\n    \"\"\"\n    file_store = get_default_file_store()\n    # usage report may be very large, so don't load it all into memory\n    return file_store.read_file(\n        file_id=report_display_name, mode=\"b\", use_tempfile=True\n    )\n\n\ndef write_usage_report(\n    db_session: Session,\n    report_name: str,\n    user_id: uuid.UUID | UUID_ID | None,\n    period: tuple[datetime, datetime] | None,\n) -> UsageReport:\n    new_report = UsageReport(\n        report_name=report_name,\n        requestor_user_id=user_id,\n        period_from=period[0] if period else None,\n        period_to=period[1] if period else None,\n    )\n    db_session.add(new_report)\n    db_session.commit()\n    return new_report\n"
  },
  {
    "path": "backend/ee/onyx/db/user_group.py",
    "content": "from collections.abc import Sequence\nfrom operator import and_\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import delete\nfrom sqlalchemy import func\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.user_group.models import SetCuratorRequest\nfrom ee.onyx.server.user_group.models import UserGroupCreate\nfrom ee.onyx.server.user_group.models import UserGroupUpdate\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import GrantSource\nfrom onyx.db.enums import Permission\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import Credential__UserGroup\nfrom onyx.db.models import Document\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import DocumentSet__UserGroup\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.models import LLMProvider__UserGroup\nfrom onyx.db.models import PermissionGrant\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__UserGroup\nfrom onyx.db.models import TokenRateLimit__UserGroup\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.models import UserGroup__ConnectorCredentialPair\nfrom onyx.db.models import UserRole\nfrom onyx.db.permissions import recompute_user_permissions__no_commit\nfrom onyx.db.users import fetch_user_by_id\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _cleanup_user__user_group_relationships__no_commit(\n    db_session: Session,\n    user_group_id: int,\n    user_ids: list[UUID] | None = None,\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    where_clause = User__UserGroup.user_group_id == user_group_id\n    if user_ids:\n        where_clause &= User__UserGroup.user_id.in_(user_ids)\n\n    user__user_group_relationships = db_session.scalars(\n        select(User__UserGroup).where(where_clause)\n    ).all()\n    for user__user_group_relationship in user__user_group_relationships:\n        db_session.delete(user__user_group_relationship)\n\n\ndef _cleanup_credential__user_group_relationships__no_commit(\n    db_session: Session,\n    user_group_id: int,\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    db_session.query(Credential__UserGroup).filter(\n        Credential__UserGroup.user_group_id == user_group_id\n    ).delete(synchronize_session=False)\n\n\ndef _cleanup_llm_provider__user_group_relationships__no_commit(\n    db_session: Session, user_group_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    db_session.query(LLMProvider__UserGroup).filter(\n        LLMProvider__UserGroup.user_group_id == user_group_id\n    ).delete(synchronize_session=False)\n\n\ndef _cleanup_persona__user_group_relationships__no_commit(\n    db_session: Session, user_group_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    db_session.query(Persona__UserGroup).filter(\n        Persona__UserGroup.user_group_id == user_group_id\n    ).delete(synchronize_session=False)\n\n\ndef _cleanup_token_rate_limit__user_group_relationships__no_commit(\n    db_session: Session, user_group_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    token_rate_limit__user_group_relationships = db_session.scalars(\n        select(TokenRateLimit__UserGroup).where(\n            TokenRateLimit__UserGroup.user_group_id == user_group_id\n        )\n    ).all()\n    for (\n        token_rate_limit__user_group_relationship\n    ) in token_rate_limit__user_group_relationships:\n        db_session.delete(token_rate_limit__user_group_relationship)\n\n\ndef _cleanup_user_group__cc_pair_relationships__no_commit(\n    db_session: Session, user_group_id: int, outdated_only: bool\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    stmt = select(UserGroup__ConnectorCredentialPair).where(\n        UserGroup__ConnectorCredentialPair.user_group_id == user_group_id\n    )\n    if outdated_only:\n        stmt = stmt.where(\n            UserGroup__ConnectorCredentialPair.is_current == False  # noqa: E712\n        )\n    user_group__cc_pair_relationships = db_session.scalars(stmt)\n    for user_group__cc_pair_relationship in user_group__cc_pair_relationships:\n        db_session.delete(user_group__cc_pair_relationship)\n\n\ndef _cleanup_document_set__user_group_relationships__no_commit(\n    db_session: Session, user_group_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    db_session.execute(\n        delete(DocumentSet__UserGroup).where(\n            DocumentSet__UserGroup.user_group_id == user_group_id\n        )\n    )\n\n\ndef validate_object_creation_for_user(\n    db_session: Session,\n    user: User,\n    target_group_ids: list[int] | None = None,\n    object_is_public: bool | None = None,\n    object_is_perm_sync: bool | None = None,\n    object_is_owned_by_user: bool = False,\n    object_is_new: bool = False,\n) -> None:\n    \"\"\"\n    All users can create/edit permission synced objects if they don't specify a group\n    All admin actions are allowed.\n    Curators and global curators can create public objects.\n    Prevents other non-admins from creating/editing:\n    - public objects\n    - objects with no groups\n    - objects that belong to a group they don't curate\n    \"\"\"\n    if object_is_perm_sync and not target_group_ids:\n        return\n\n    # Admins are allowed\n    if user.role == UserRole.ADMIN:\n        return\n\n    # Allow curators and global curators to create public objects\n    # w/o associated groups IF the object is new/owned by them\n    if (\n        object_is_public\n        and user.role in [UserRole.CURATOR, UserRole.GLOBAL_CURATOR]\n        and (object_is_new or object_is_owned_by_user)\n    ):\n        return\n\n    if object_is_public and user.role == UserRole.BASIC:\n        detail = \"User does not have permission to create public objects\"\n        logger.error(detail)\n        raise HTTPException(\n            status_code=400,\n            detail=detail,\n        )\n\n    if not target_group_ids:\n        detail = \"Curators must specify 1+ groups\"\n        logger.error(detail)\n        raise HTTPException(\n            status_code=400,\n            detail=detail,\n        )\n\n    user_curated_groups = fetch_user_groups_for_user(\n        db_session=db_session,\n        user_id=user.id,\n        # Global curators can curate all groups they are member of\n        only_curator_groups=user.role != UserRole.GLOBAL_CURATOR,\n    )\n    user_curated_group_ids = set([group.id for group in user_curated_groups])\n    target_group_ids_set = set(target_group_ids)\n    if not target_group_ids_set.issubset(user_curated_group_ids):\n        detail = \"Curators cannot control groups they don't curate\"\n        logger.error(detail)\n        raise HTTPException(\n            status_code=400,\n            detail=detail,\n        )\n\n\ndef fetch_user_group(db_session: Session, user_group_id: int) -> UserGroup | None:\n    stmt = select(UserGroup).where(UserGroup.id == user_group_id)\n    return db_session.scalar(stmt)\n\n\ndef _add_user_group_snapshot_eager_loads(\n    stmt: Select,\n) -> Select:\n    \"\"\"Add eager loading options needed by UserGroup.from_model snapshot creation.\"\"\"\n    return stmt.options(\n        selectinload(UserGroup.users),\n        selectinload(UserGroup.user_group_relationships),\n        selectinload(UserGroup.cc_pair_relationships)\n        .selectinload(UserGroup__ConnectorCredentialPair.cc_pair)\n        .options(\n            selectinload(ConnectorCredentialPair.connector),\n            selectinload(ConnectorCredentialPair.credential).selectinload(\n                Credential.user\n            ),\n        ),\n        selectinload(UserGroup.document_sets).options(\n            selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSet.users),\n            selectinload(DocumentSet.groups),\n            selectinload(DocumentSet.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        ),\n        selectinload(UserGroup.personas).options(\n            selectinload(Persona.tools),\n            selectinload(Persona.hierarchy_nodes),\n            selectinload(Persona.attached_documents).selectinload(\n                Document.parent_hierarchy_node\n            ),\n            selectinload(Persona.labels),\n            selectinload(Persona.document_sets).options(\n                selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                    ConnectorCredentialPair.connector\n                ),\n                selectinload(DocumentSet.users),\n                selectinload(DocumentSet.groups),\n                selectinload(DocumentSet.federated_connectors).selectinload(\n                    FederatedConnector__DocumentSet.federated_connector\n                ),\n            ),\n            selectinload(Persona.user),\n            selectinload(Persona.user_files),\n            selectinload(Persona.users),\n            selectinload(Persona.groups),\n        ),\n    )\n\n\ndef fetch_user_groups(\n    db_session: Session,\n    only_up_to_date: bool = True,\n    eager_load_for_snapshot: bool = False,\n    include_default: bool = True,\n) -> Sequence[UserGroup]:\n    \"\"\"\n    Fetches user groups from the database.\n\n    This function retrieves a sequence of `UserGroup` objects from the database.\n    If `only_up_to_date` is set to `True`, it filters the user groups to return only those\n    that are marked as up-to-date (`is_up_to_date` is `True`).\n\n    Args:\n        db_session (Session): The SQLAlchemy session used to query the database.\n        only_up_to_date (bool, optional): Flag to determine whether to filter the results\n            to include only up to date user groups. Defaults to `True`.\n        eager_load_for_snapshot: If True, adds eager loading for all relationships\n            needed by UserGroup.from_model snapshot creation.\n        include_default: If False, excludes system default groups (is_default=True).\n\n    Returns:\n        Sequence[UserGroup]: A sequence of `UserGroup` objects matching the query criteria.\n    \"\"\"\n    stmt = select(UserGroup)\n    if only_up_to_date:\n        stmt = stmt.where(UserGroup.is_up_to_date == True)  # noqa: E712\n    if not include_default:\n        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712\n    if eager_load_for_snapshot:\n        stmt = _add_user_group_snapshot_eager_loads(stmt)\n    return db_session.scalars(stmt).unique().all()\n\n\ndef fetch_user_groups_for_user(\n    db_session: Session,\n    user_id: UUID,\n    only_curator_groups: bool = False,\n    eager_load_for_snapshot: bool = False,\n    include_default: bool = True,\n) -> Sequence[UserGroup]:\n    stmt = (\n        select(UserGroup)\n        .join(User__UserGroup, User__UserGroup.user_group_id == UserGroup.id)\n        .join(User, User.id == User__UserGroup.user_id)  # type: ignore\n        .where(User.id == user_id)  # type: ignore\n    )\n    if only_curator_groups:\n        stmt = stmt.where(User__UserGroup.is_curator == True)  # noqa: E712\n    if not include_default:\n        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712\n    if eager_load_for_snapshot:\n        stmt = _add_user_group_snapshot_eager_loads(stmt)\n    return db_session.scalars(stmt).unique().all()\n\n\ndef construct_document_id_select_by_usergroup(\n    user_group_id: int,\n) -> Select:\n    \"\"\"This returns a statement that should be executed using\n    .yield_per() to minimize overhead. The primary consumers of this function\n    are background processing task generators.\"\"\"\n    stmt = (\n        select(Document.id)\n        .join(\n            DocumentByConnectorCredentialPair,\n            Document.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            UserGroup__ConnectorCredentialPair,\n            UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,\n        )\n        .join(\n            UserGroup,\n            UserGroup__ConnectorCredentialPair.user_group_id == UserGroup.id,\n        )\n        .where(UserGroup.id == user_group_id)\n        .order_by(Document.id)\n    )\n    stmt = stmt.distinct()\n    return stmt\n\n\ndef fetch_documents_for_user_group_paginated(\n    db_session: Session,\n    user_group_id: int,\n    last_document_id: str | None = None,\n    limit: int = 100,\n) -> tuple[Sequence[Document], str | None]:\n    stmt = (\n        select(Document)\n        .join(\n            DocumentByConnectorCredentialPair,\n            Document.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            UserGroup__ConnectorCredentialPair,\n            UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,\n        )\n        .join(\n            UserGroup,\n            UserGroup__ConnectorCredentialPair.user_group_id == UserGroup.id,\n        )\n        .where(UserGroup.id == user_group_id)\n        .order_by(Document.id)\n        .limit(limit)\n    )\n    if last_document_id is not None:\n        stmt = stmt.where(Document.id > last_document_id)\n    stmt = stmt.distinct()\n\n    documents = db_session.scalars(stmt).all()\n    return documents, documents[-1].id if documents else None\n\n\ndef fetch_user_groups_for_documents(\n    db_session: Session,\n    document_ids: list[str],\n) -> Sequence[tuple[str, list[str]]]:\n    \"\"\"\n    Fetches all user groups that have access to the given documents.\n\n    NOTE: this doesn't include groups if the cc_pair is access type SYNC\n    \"\"\"\n    stmt = (\n        select(Document.id, func.array_agg(UserGroup.name))\n        .join(\n            UserGroup__ConnectorCredentialPair,\n            UserGroup.id == UserGroup__ConnectorCredentialPair.user_group_id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                ConnectorCredentialPair.id\n                == UserGroup__ConnectorCredentialPair.cc_pair_id,\n                ConnectorCredentialPair.access_type != AccessType.SYNC,\n            ),\n        )\n        .join(\n            DocumentByConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(Document, Document.id == DocumentByConnectorCredentialPair.id)\n        .where(Document.id.in_(document_ids))\n        .where(UserGroup__ConnectorCredentialPair.is_current == True)  # noqa: E712\n        # don't include CC pairs that are being deleted\n        # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them\n        .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING)\n        .group_by(Document.id)\n    )\n\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef _check_user_group_is_modifiable(user_group: UserGroup) -> None:\n    if not user_group.is_up_to_date:\n        raise ValueError(\n            \"Specified user group is currently syncing. Wait until the current sync has finished before editing.\"\n        )\n\n\ndef _add_user__user_group_relationships__no_commit(\n    db_session: Session, user_group_id: int, user_ids: list[UUID]\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\n\n    This function is idempotent - it will skip users who are already in the group\n    to avoid duplicate key violations during concurrent operations or re-syncs.\n    Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.\n    \"\"\"\n    if not user_ids:\n        return\n\n    insert_stmt = (\n        insert(User__UserGroup)\n        .values(\n            [\n                {\"user_id\": user_id, \"user_group_id\": user_group_id}\n                for user_id in user_ids\n            ]\n        )\n        .on_conflict_do_nothing(\n            index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]\n        )\n    )\n    db_session.execute(insert_stmt)\n\n\ndef _add_user_group__cc_pair_relationships__no_commit(\n    db_session: Session, user_group_id: int, cc_pair_ids: list[int]\n) -> list[UserGroup__ConnectorCredentialPair]:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    relationships = [\n        UserGroup__ConnectorCredentialPair(\n            user_group_id=user_group_id, cc_pair_id=cc_pair_id\n        )\n        for cc_pair_id in cc_pair_ids\n    ]\n    db_session.add_all(relationships)\n    return relationships\n\n\ndef insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:\n    db_user_group = UserGroup(\n        name=user_group.name,\n        time_last_modified_by_user=func.now(),\n        is_up_to_date=DISABLE_VECTOR_DB,\n    )\n    db_session.add(db_user_group)\n    db_session.flush()  # give the group an ID\n\n    # Every group gets the \"basic\" permission by default\n    db_session.add(\n        PermissionGrant(\n            group_id=db_user_group.id,\n            permission=Permission.BASIC_ACCESS,\n            grant_source=GrantSource.SYSTEM,\n        )\n    )\n    db_session.flush()\n\n    _add_user__user_group_relationships__no_commit(\n        db_session=db_session,\n        user_group_id=db_user_group.id,\n        user_ids=user_group.user_ids,\n    )\n    _add_user_group__cc_pair_relationships__no_commit(\n        db_session=db_session,\n        user_group_id=db_user_group.id,\n        cc_pair_ids=user_group.cc_pair_ids,\n    )\n\n    recompute_user_permissions__no_commit(user_group.user_ids, db_session)\n\n    db_session.commit()\n    return db_user_group\n\n\ndef _mark_user_group__cc_pair_relationships_outdated__no_commit(\n    db_session: Session, user_group_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    user_group__cc_pair_relationships = db_session.scalars(\n        select(UserGroup__ConnectorCredentialPair).where(\n            UserGroup__ConnectorCredentialPair.user_group_id == user_group_id\n        )\n    )\n    for user_group__cc_pair_relationship in user_group__cc_pair_relationships:\n        user_group__cc_pair_relationship.is_current = False\n\n\ndef _validate_curator_status__no_commit(\n    db_session: Session,\n    users: list[User],\n) -> None:\n    for user in users:\n        # Check if the user is a curator in any of their groups\n        curator_relationships = (\n            db_session.query(User__UserGroup)\n            .filter(\n                User__UserGroup.user_id == user.id,\n                User__UserGroup.is_curator == True,  # noqa: E712\n            )\n            .all()\n        )\n\n        # if the user is a curator in any of their groups, set their role to CURATOR\n        # otherwise, set their role to BASIC only if they were previously a CURATOR\n        if curator_relationships:\n            user.role = UserRole.CURATOR\n        elif user.role == UserRole.CURATOR:\n            user.role = UserRole.BASIC\n        db_session.add(user)\n\n\ndef remove_curator_status__no_commit(db_session: Session, user: User) -> None:\n    stmt = (\n        update(User__UserGroup)\n        .where(User__UserGroup.user_id == user.id)\n        .values(is_curator=False)\n    )\n    db_session.execute(stmt)\n    _validate_curator_status__no_commit(db_session, [user])\n\n\ndef _validate_curator_relationship_update_requester(\n    db_session: Session,\n    user_group_id: int,\n    user_making_change: User,\n) -> None:\n    \"\"\"\n    This function validates that the user making the change has the necessary permissions\n    to update the curator relationship for the target user in the given user group.\n    \"\"\"\n\n    # Admins can update curator relationships for any group\n    if user_making_change.role == UserRole.ADMIN:\n        return\n\n    # check if the user making the change is a curator in the group they are changing the curator relationship for\n    user_making_change_curator_groups = fetch_user_groups_for_user(\n        db_session=db_session,\n        user_id=user_making_change.id,\n        # only check if the user making the change is a curator if they are a curator\n        # otherwise, they are a global_curator and can update the curator relationship\n        # for any group they are a member of\n        only_curator_groups=user_making_change.role == UserRole.CURATOR,\n    )\n    requestor_curator_group_ids = [\n        group.id for group in user_making_change_curator_groups\n    ]\n    if user_group_id not in requestor_curator_group_ids:\n        raise ValueError(\n            f\"user making change {user_making_change.email} is not a curator,\"\n            f\" admin, or global_curator for group '{user_group_id}'\"\n        )\n\n\ndef _validate_curator_relationship_update_request(\n    db_session: Session,\n    user_group_id: int,\n    target_user: User,\n) -> None:\n    \"\"\"\n    This function validates that the curator_relationship_update request itself is valid.\n    \"\"\"\n    if target_user.role == UserRole.ADMIN:\n        raise ValueError(\n            f\"User '{target_user.email}' is an admin and therefore has all permissions \"\n            \"of a curator. If you'd like this user to only have curator permissions, \"\n            \"you must update their role to BASIC then assign them to be CURATOR in the \"\n            \"appropriate groups.\"\n        )\n    elif target_user.role == UserRole.GLOBAL_CURATOR:\n        raise ValueError(\n            f\"User '{target_user.email}' is a global_curator and therefore has all \"\n            \"permissions of a curator for all groups. If you'd like this user to only \"\n            \"have curator permissions for a specific group, you must update their role \"\n            \"to BASIC then assign them to be CURATOR in the appropriate groups.\"\n        )\n    elif target_user.role not in [UserRole.CURATOR, UserRole.BASIC]:\n        raise ValueError(\n            f\"This endpoint can only be used to update the curator relationship for \"\n            \"users with the CURATOR or BASIC role. \\n\"\n            f\"Target user: {target_user.email} \\n\"\n            f\"Target user role: {target_user.role} \\n\"\n        )\n\n    # check if the target user is in the group they are changing the curator relationship for\n    requested_user_groups = fetch_user_groups_for_user(\n        db_session=db_session,\n        user_id=target_user.id,\n        only_curator_groups=False,\n    )\n    group_ids = [group.id for group in requested_user_groups]\n    if user_group_id not in group_ids:\n        raise ValueError(\n            f\"target user {target_user.email} is not in group '{user_group_id}'\"\n        )\n\n\ndef update_user_curator_relationship(\n    db_session: Session,\n    user_group_id: int,\n    set_curator_request: SetCuratorRequest,\n    user_making_change: User,\n) -> None:\n    target_user = fetch_user_by_id(db_session, set_curator_request.user_id)\n    if not target_user:\n        raise ValueError(f\"User with id '{set_curator_request.user_id}' not found\")\n\n    _validate_curator_relationship_update_request(\n        db_session=db_session,\n        user_group_id=user_group_id,\n        target_user=target_user,\n    )\n\n    _validate_curator_relationship_update_requester(\n        db_session=db_session,\n        user_group_id=user_group_id,\n        user_making_change=user_making_change,\n    )\n\n    logger.info(\n        f\"user_making_change={user_making_change.email if user_making_change else 'None'} is \"\n        f\"updating the curator relationship for user={target_user.email} \"\n        f\"in group={user_group_id} to is_curator={set_curator_request.is_curator}\"\n    )\n\n    relationship_to_update = (\n        db_session.query(User__UserGroup)\n        .filter(\n            User__UserGroup.user_group_id == user_group_id,\n            User__UserGroup.user_id == set_curator_request.user_id,\n        )\n        .first()\n    )\n\n    if relationship_to_update:\n        relationship_to_update.is_curator = set_curator_request.is_curator\n    else:\n        relationship_to_update = User__UserGroup(\n            user_group_id=user_group_id,\n            user_id=set_curator_request.user_id,\n            is_curator=True,\n        )\n        db_session.add(relationship_to_update)\n\n    _validate_curator_status__no_commit(db_session, [target_user])\n    db_session.commit()\n\n\ndef add_users_to_user_group(\n    db_session: Session,\n    user: User,\n    user_group_id: int,\n    user_ids: list[UUID],\n) -> UserGroup:\n    db_user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id)\n    if db_user_group is None:\n        raise ValueError(f\"UserGroup with id '{user_group_id}' not found\")\n\n    missing_users = [\n        user_id for user_id in user_ids if fetch_user_by_id(db_session, user_id) is None\n    ]\n    if missing_users:\n        raise ValueError(\n            f\"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}\"\n        )\n\n    _check_user_group_is_modifiable(db_user_group)\n\n    current_user_ids = [user.id for user in db_user_group.users]\n    current_user_ids_set = set(current_user_ids)\n    new_user_ids = [\n        user_id for user_id in user_ids if user_id not in current_user_ids_set\n    ]\n\n    if not new_user_ids:\n        return db_user_group\n\n    user_group_update = UserGroupUpdate(\n        user_ids=current_user_ids + new_user_ids,\n        cc_pair_ids=[cc_pair.id for cc_pair in db_user_group.cc_pairs],\n    )\n\n    return update_user_group(\n        db_session=db_session,\n        user=user,\n        user_group_id=user_group_id,\n        user_group_update=user_group_update,\n    )\n\n\ndef update_user_group(\n    db_session: Session,\n    user: User,  # noqa: ARG001\n    user_group_id: int,\n    user_group_update: UserGroupUpdate,\n) -> UserGroup:\n    \"\"\"If successful, this can set db_user_group.is_up_to_date = False.\n    That will be processed by check_for_vespa_user_groups_sync_task and trigger\n    a long running background sync to Vespa.\n    \"\"\"\n    stmt = select(UserGroup).where(UserGroup.id == user_group_id)\n    db_user_group = db_session.scalar(stmt)\n    if db_user_group is None:\n        raise ValueError(f\"UserGroup with id '{user_group_id}' not found\")\n\n    _check_user_group_is_modifiable(db_user_group)\n\n    current_user_ids = set([user.id for user in db_user_group.users])\n    updated_user_ids = set(user_group_update.user_ids)\n    added_user_ids = list(updated_user_ids - current_user_ids)\n    removed_user_ids = list(current_user_ids - updated_user_ids)\n\n    if added_user_ids:\n        missing_users = [\n            user_id\n            for user_id in added_user_ids\n            if fetch_user_by_id(db_session, user_id) is None\n        ]\n        if missing_users:\n            raise ValueError(\n                f\"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}\"\n            )\n\n    # LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES\n    # ACCESS TO DIFFERENT PERMISSIONS\n    # if (removed_user_ids or added_user_ids) and (\n    #     not user or user.role != UserRole.ADMIN\n    # ):\n    #     raise ValueError(\"Only admins can add or remove users from user groups\")\n\n    if removed_user_ids:\n        _cleanup_user__user_group_relationships__no_commit(\n            db_session=db_session,\n            user_group_id=user_group_id,\n            user_ids=removed_user_ids,\n        )\n\n    if added_user_ids:\n        _add_user__user_group_relationships__no_commit(\n            db_session=db_session,\n            user_group_id=user_group_id,\n            user_ids=added_user_ids,\n        )\n\n    cc_pairs_updated = set([cc_pair.id for cc_pair in db_user_group.cc_pairs]) != set(\n        user_group_update.cc_pair_ids\n    )\n    if cc_pairs_updated:\n        _mark_user_group__cc_pair_relationships_outdated__no_commit(\n            db_session=db_session, user_group_id=user_group_id\n        )\n        _add_user_group__cc_pair_relationships__no_commit(\n            db_session=db_session,\n            user_group_id=db_user_group.id,\n            cc_pair_ids=user_group_update.cc_pair_ids,\n        )\n\n    if cc_pairs_updated and not DISABLE_VECTOR_DB:\n        db_user_group.is_up_to_date = False\n\n    removed_users = db_session.scalars(\n        select(User).where(User.id.in_(removed_user_ids))  # type: ignore\n    ).unique()\n\n    # Filter out admin and global curator users before validating curator status\n    users_to_validate = [\n        user\n        for user in removed_users\n        if user.role not in [UserRole.ADMIN, UserRole.GLOBAL_CURATOR]\n    ]\n\n    if users_to_validate:\n        _validate_curator_status__no_commit(db_session, users_to_validate)\n\n    # update \"time_updated\" to now\n    db_user_group.time_last_modified_by_user = func.now()\n\n    recompute_user_permissions__no_commit(\n        list(set(added_user_ids) | set(removed_user_ids)), db_session\n    )\n\n    db_session.commit()\n    return db_user_group\n\n\ndef rename_user_group(\n    db_session: Session,\n    user_group_id: int,\n    new_name: str,\n) -> UserGroup:\n    stmt = select(UserGroup).where(UserGroup.id == user_group_id)\n    db_user_group = db_session.scalar(stmt)\n    if db_user_group is None:\n        raise ValueError(f\"UserGroup with id '{user_group_id}' not found\")\n\n    _check_user_group_is_modifiable(db_user_group)\n\n    db_user_group.name = new_name\n    db_user_group.time_last_modified_by_user = func.now()\n\n    # CC pair documents in Vespa contain the group name, so we need to\n    # trigger a sync to update them with the new name.\n    _mark_user_group__cc_pair_relationships_outdated__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    if not DISABLE_VECTOR_DB:\n        db_user_group.is_up_to_date = False\n\n    db_session.commit()\n    return db_user_group\n\n\ndef prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> None:\n    stmt = select(UserGroup).where(UserGroup.id == user_group_id)\n    db_user_group = db_session.scalar(stmt)\n    if db_user_group is None:\n        raise ValueError(f\"UserGroup with id '{user_group_id}' not found\")\n\n    _check_user_group_is_modifiable(db_user_group)\n\n    # Collect affected user IDs before cleanup deletes the relationships\n    affected_user_ids: list[UUID] = [\n        uid\n        for uid in db_session.execute(\n            select(User__UserGroup.user_id).where(\n                User__UserGroup.user_group_id == user_group_id\n            )\n        )\n        .scalars()\n        .all()\n        if uid is not None\n    ]\n\n    _mark_user_group__cc_pair_relationships_outdated__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n\n    _cleanup_credential__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    _cleanup_user__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    _cleanup_token_rate_limit__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    _cleanup_document_set__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    _cleanup_persona__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n    _cleanup_user_group__cc_pair_relationships__no_commit(\n        db_session=db_session,\n        user_group_id=user_group_id,\n        outdated_only=False,\n    )\n    _cleanup_llm_provider__user_group_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group_id\n    )\n\n    # Recompute permissions for affected users now that their\n    # membership in this group has been removed\n    recompute_user_permissions__no_commit(affected_user_ids, db_session)\n\n    db_user_group.is_up_to_date = False\n    db_user_group.is_up_for_deletion = True\n    db_session.commit()\n\n\ndef delete_user_group(db_session: Session, user_group: UserGroup) -> None:\n    \"\"\"\n    This assumes that all the fk cleanup has already been done.\n    \"\"\"\n    db_session.delete(user_group)\n    db_session.commit()\n\n\ndef mark_user_group_as_synced(db_session: Session, user_group: UserGroup) -> None:\n    # cleanup outdated relationships\n    _cleanup_user_group__cc_pair_relationships__no_commit(\n        db_session=db_session, user_group_id=user_group.id, outdated_only=True\n    )\n    user_group.is_up_to_date = True\n    db_session.commit()\n\n\ndef delete_user_group_cc_pair_relationship__no_commit(\n    cc_pair_id: int, db_session: Session\n) -> None:\n    \"\"\"Deletes all rows from UserGroup__ConnectorCredentialPair where the\n    connector_credential_pair_id matches the given cc_pair_id.\n\n    Should be used very carefully (only for connectors that are being deleted).\"\"\"\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    if not cc_pair:\n        raise ValueError(f\"Connector Credential Pair '{cc_pair_id}' does not exist\")\n\n    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:\n        raise ValueError(\n            f\"Connector Credential Pair '{cc_pair_id}' is not in the DELETING state. status={cc_pair.status}\"\n        )\n\n    delete_stmt = delete(UserGroup__ConnectorCredentialPair).where(\n        UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair_id,\n    )\n    db_session.execute(delete_stmt)\n"
  },
  {
    "path": "backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<services version=\"1.0\">\n    <container id=\"default\" version=\"1.0\">\n        <document-api />\n        <search />\n        <http>\n            <server id=\"default\" port=\"4080\" />\n        </http>\n        <nodes count=\"[2, 4]\">\n            <resources vcpu=\"4.0\" memory=\"16Gb\" architecture=\"arm64\" storage-type=\"remote\"\n                disk=\"48Gb\" />\n        </nodes>\n\n\n    </container>\n    <content id=\"danswer_index\" version=\"1.0\">\n        <documents>\n            <!-- <document type=\"danswer_chunk\" mode=\"index\" /> -->\n{{ document_elements }}\n        </documents>\n        <nodes count=\"50\">\n            <resources vcpu=\"8.0\" memory=\"128.0Gb\" architecture=\"arm64\" storage-type=\"local\"\n                disk=\"475.0Gb\" />\n        </nodes>\n        <engine>\n            <proton>\n                <tuning>\n                    <searchnode>\n                        <requestthreads>\n                            <persearch>2</persearch>\n                        </requestthreads>\n                    </searchnode>\n                </tuning>\n            </proton>\n        </engine>\n\n        <config name=\"vespa.config.search.summary.juniperrc\">\n            <max_matches>3</max_matches>\n            <length>750</length>\n            <surround_max>350</surround_max>\n            <min_length>300</min_length>\n        </config>\n\n\n        <min-redundancy>2</min-redundancy>\n\n    </content>\n</services>\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/constants.py",
    "content": "# This is a group that we use to store all the users that we found in Confluence\n# Instead of setting a page to public, we just add this group so that the page\n# is only accessible to users who have confluence accounts.\nALL_CONF_EMAILS_GROUP_NAME = \"All_Confluence_Users_Found_By_Onyx\"\n\nVIEWSPACE_PERMISSION_TYPE = \"VIEWSPACE\"\nREQUEST_PAGINATION_LIMIT = 5000\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/doc_sync.py",
    "content": "\"\"\"\nRules defined here:\nhttps://confluence.atlassian.com/conf85/check-who-can-view-a-page-1283360557.html\n\"\"\"\n\nfrom collections.abc import Generator\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.utils import generic_doc_sync\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.connector import ConfluenceConnector\nfrom onyx.connectors.credentials_provider import OnyxDBCredentialsProvider\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nCONFLUENCE_DOC_SYNC_LABEL = \"confluence_doc_sync\"\n\n\ndef confluence_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,\n    callback: IndexingHeartbeatInterface | None,\n) -> Generator[ElementExternalAccess, None, None]:\n    \"\"\"\n    Fetches document permissions from Confluence and yields DocExternalAccess objects.\n    Compares fetched documents against existing documents in the DB for the connector.\n    If a document exists in the DB but not in the Confluence fetch, it's marked as restricted.\n    \"\"\"\n    confluence_connector = ConfluenceConnector(\n        **cc_pair.connector.connector_specific_config\n    )\n\n    provider = OnyxDBCredentialsProvider(\n        get_current_tenant_id(), \"confluence\", cc_pair.credential_id\n    )\n    confluence_connector.set_credentials_provider(provider)\n\n    yield from generic_doc_sync(\n        cc_pair=cc_pair,\n        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        callback=callback,\n        doc_source=DocumentSource.CONFLUENCE,\n        slim_connector=confluence_connector,\n        label=CONFLUENCE_DOC_SYNC_LABEL,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/group_sync.py",
    "content": "from collections.abc import Generator\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME\nfrom onyx.background.error_logging import emit_background_error\nfrom onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC\nfrom onyx.connectors.confluence.onyx_confluence import (\n    get_user_email_from_username__server,\n)\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.connectors.credentials_provider import OnyxDBCredentialsProvider\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.users import get_all_users\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _build_group_member_email_map(\n    confluence_client: OnyxConfluence, cc_pair_id: int\n) -> dict[str, set[str]]:\n    group_member_emails: dict[str, set[str]] = {}\n    for user in confluence_client.paginated_cql_user_retrieval():\n        logger.info(f\"Processing groups for user: {user}\")\n\n        email = user.email\n        if not email:\n            # This field is only present in Confluence Server\n            user_name = user.username\n            # If it is present, try to get the email using a Server-specific method\n            if user_name:\n                email = get_user_email_from_username__server(\n                    confluence_client=confluence_client,\n                    user_name=user_name,\n                )\n            else:\n                logger.error(f\"user result missing username field: {user}\")\n\n        if not email:\n            # If we still don't have an email, skip this user\n            msg = f\"user result missing email field: {user}\"\n            if user.type == \"app\":\n                logger.warning(msg)\n            else:\n                emit_background_error(msg, cc_pair_id=cc_pair_id)\n                logger.error(msg)\n            continue\n\n        all_users_groups: set[str] = set()\n        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):\n            # group name uniqueness is enforced by Confluence, so we can use it as a group ID\n            group_id = group[\"name\"]\n            group_member_emails.setdefault(group_id, set()).add(email)\n            all_users_groups.add(group_id)\n\n        if not all_users_groups:\n            msg = f\"No groups found for user with email: {email}\"\n            emit_background_error(msg, cc_pair_id=cc_pair_id)\n            logger.error(msg)\n        else:\n            logger.debug(f\"Found groups {all_users_groups} for user with email {email}\")\n\n    if not group_member_emails:\n        msg = \"No groups found for any users.\"\n        emit_background_error(msg, cc_pair_id=cc_pair_id)\n        logger.error(msg)\n\n    return group_member_emails\n\n\ndef _build_group_member_email_map_from_onyx_users(\n    confluence_client: OnyxConfluence,\n) -> dict[str, set[str]]:\n    \"\"\"Hacky, but it's the only way to do this as long as the\n    Confluence APIs are broken.\n\n    This is fixed in Confluence Data Center 10.1.0, so first choice\n    is to tell users to upgrade to 10.1.0.\n    https://jira.atlassian.com/browse/CONFSERVER-95999\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        # don't include external since they are handled by the \"through confluence\"\n        # user fetching mechanism\n        user_emails = [\n            user.email for user in get_all_users(db_session, include_external=False)\n        ]\n\n    def _infer_username_from_email(email: str) -> str:\n        return email.split(\"@\")[0]\n\n    group_member_emails: dict[str, set[str]] = {}\n    for email in user_emails:\n        logger.info(f\"Processing groups for user with email: {email}\")\n        try:\n            user_name = _infer_username_from_email(email)\n            response = confluence_client.get_user_details_by_username(user_name)\n            user_key = response.get(\"userKey\")\n            if not user_key:\n                logger.error(f\"User key not found for user with email {email}\")\n                continue\n\n            all_users_groups: set[str] = set()\n            for group in confluence_client.paginated_groups_by_user_retrieval(user_key):\n                # group name uniqueness is enforced by Confluence, so we can use it as a group ID\n                group_id = group[\"name\"]\n                group_member_emails.setdefault(group_id, set()).add(email)\n                all_users_groups.add(group_id)\n\n            if not all_users_groups:\n                msg = f\"No groups found for user with email: {email}\"\n                logger.error(msg)\n            else:\n                logger.info(\n                    f\"Found groups {all_users_groups} for user with email {email}\"\n                )\n        except Exception:\n            logger.exception(f\"Error getting user details for user with email {email}\")\n\n    return group_member_emails\n\n\ndef _build_final_group_to_member_email_map(\n    confluence_client: OnyxConfluence,\n    cc_pair_id: int,\n    # if set, will infer confluence usernames from onyx users in addition to using the\n    # confluence users API. This is a hacky workaround for the fact that the Confluence\n    # users API is broken before Confluence Data Center 10.1.0.\n    use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,\n) -> dict[str, set[str]]:\n    group_to_member_email_map = _build_group_member_email_map(\n        confluence_client=confluence_client,\n        cc_pair_id=cc_pair_id,\n    )\n    group_to_member_email_map_from_onyx_users = (\n        (\n            _build_group_member_email_map_from_onyx_users(\n                confluence_client=confluence_client,\n            )\n        )\n        if use_onyx_users\n        else {}\n    )\n\n    all_group_ids = set(group_to_member_email_map.keys()) | set(\n        group_to_member_email_map_from_onyx_users.keys()\n    )\n    final_group_to_member_email_map = {}\n    for group_id in all_group_ids:\n        group_member_emails = group_to_member_email_map.get(\n            group_id, set()\n        ) | group_to_member_email_map_from_onyx_users.get(group_id, set())\n        final_group_to_member_email_map[group_id] = group_member_emails\n\n    return final_group_to_member_email_map\n\n\ndef confluence_group_sync(\n    tenant_id: str,\n    cc_pair: ConnectorCredentialPair,\n) -> Generator[ExternalUserGroup, None, None]:\n    provider = OnyxDBCredentialsProvider(tenant_id, \"confluence\", cc_pair.credential_id)\n    is_cloud = cc_pair.connector.connector_specific_config.get(\"is_cloud\", False)\n    wiki_base: str = cc_pair.connector.connector_specific_config[\"wiki_base\"]\n    url = wiki_base.rstrip(\"/\")\n\n    probe_kwargs = {\n        \"max_backoff_retries\": 6,\n        \"max_backoff_seconds\": 10,\n    }\n\n    final_kwargs = {\n        \"max_backoff_retries\": 10,\n        \"max_backoff_seconds\": 60,\n    }\n\n    confluence_client = OnyxConfluence(is_cloud, url, provider)\n    confluence_client._probe_connection(**probe_kwargs)\n    confluence_client._initialize_connection(**final_kwargs)\n\n    group_to_member_email_map = _build_final_group_to_member_email_map(\n        confluence_client, cc_pair.id\n    )\n\n    all_found_emails = set()\n    for group_id, group_member_emails in group_to_member_email_map.items():\n        yield (\n            ExternalUserGroup(\n                id=group_id,\n                user_emails=list(group_member_emails),\n            )\n        )\n        all_found_emails.update(group_member_emails)\n\n    # This is so that when we find a public confleunce server page, we can\n    # give access to all users only in if they have an email in Confluence\n    if cc_pair.connector.connector_specific_config.get(\"is_cloud\", False):\n        all_found_group = ExternalUserGroup(\n            id=ALL_CONF_EMAILS_GROUP_NAME,\n            user_emails=list(all_found_emails),\n        )\n        yield all_found_group\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/page_access.py",
    "content": "from typing import Any\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.onyx_confluence import (\n    get_user_email_from_username__server,\n)\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _extract_read_access_restrictions(\n    confluence_client: OnyxConfluence, restrictions: dict[str, Any]\n) -> tuple[set[str], set[str], bool]:\n    \"\"\"\n    Converts a page's restrictions dict into an ExternalAccess object.\n    If there are no restrictions, then return None\n    \"\"\"\n    read_access = restrictions.get(\"read\", {})\n    read_access_restrictions = read_access.get(\"restrictions\", {})\n\n    # Extract the users with read access\n    read_access_user = read_access_restrictions.get(\"user\", {})\n    read_access_user_jsons = read_access_user.get(\"results\", [])\n    # any items found means that there is a restriction\n    found_any_restriction = bool(read_access_user_jsons)\n\n    read_access_user_emails = []\n    for user in read_access_user_jsons:\n        # If the user has an email, then add it to the list\n        if user.get(\"email\"):\n            read_access_user_emails.append(user[\"email\"])\n        # If the user has a username and not an email, then get the email from Confluence\n        elif user.get(\"username\"):\n            email = get_user_email_from_username__server(\n                confluence_client=confluence_client, user_name=user[\"username\"]\n            )\n            if email:\n                read_access_user_emails.append(email)\n            else:\n                logger.warning(\n                    f\"Email for user {user['username']} not found in Confluence\"\n                )\n        else:\n            if user.get(\"email\") is not None:\n                logger.warning(f\"Cant find email for user {user.get('displayName')}\")\n                logger.warning(\n                    \"This user needs to make their email accessible in Confluence Settings\"\n                )\n\n            logger.warning(f\"no user email or username for {user}\")\n\n    # Extract the groups with read access\n    read_access_group = read_access_restrictions.get(\"group\", {})\n    read_access_group_jsons = read_access_group.get(\"results\", [])\n    # any items found means that there is a restriction\n    found_any_restriction |= bool(read_access_group_jsons)\n    read_access_group_names = [\n        group[\"name\"] for group in read_access_group_jsons if group.get(\"name\")\n    ]\n\n    return (\n        set(read_access_user_emails),\n        set(read_access_group_names),\n        found_any_restriction,\n    )\n\n\ndef get_page_restrictions(\n    confluence_client: OnyxConfluence,\n    page_id: str,\n    page_restrictions: dict[str, Any],\n    ancestors: list[dict[str, Any]],\n    add_prefix: bool = False,\n) -> ExternalAccess | None:\n    \"\"\"\n    This function gets the restrictions for a page. In Confluence, a child can have\n    at MOST the same level accessibility as its immediate parent.\n\n    If no restrictions are found anywhere, then return None, indicating that the page\n    should inherit the space's restrictions.\n\n    add_prefix: When True, prefix group IDs with source type (for indexing path).\n               When False (default), leave unprefixed (for permission sync path).\n    \"\"\"\n    found_user_emails: set[str] = set()\n    found_group_names: set[str] = set()\n\n    # NOTE: need the found_any_restriction, since we can find restrictions\n    # but not be able to extract any user emails or group names\n    # in this case, we should just give no access\n    found_user_emails, found_group_names, found_any_page_level_restriction = (\n        _extract_read_access_restrictions(\n            confluence_client=confluence_client,\n            restrictions=page_restrictions,\n        )\n    )\n\n    def _maybe_prefix_groups(group_names: set[str]) -> set[str]:\n        if add_prefix:\n            return {\n                build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)\n                for g in group_names\n            }\n        return group_names\n\n    # if there are individual page-level restrictions, then this is the accurate\n    # restriction for the page. You cannot both have page-level restrictions AND\n    # inherit restrictions from the parent.\n    if found_any_page_level_restriction:\n        return ExternalAccess(\n            external_user_emails=found_user_emails,\n            external_user_group_ids=_maybe_prefix_groups(found_group_names),\n            is_public=False,\n        )\n\n    # ancestors seem to be in order from root to immediate parent\n    # https://community.atlassian.com/forums/Confluence-questions/Order-of-ancestors-in-REST-API-response-Confluence-Server-amp/qaq-p/2385981\n    # we want the restrictions from the immediate parent to take precedence, so we should\n    # reverse the list\n    for ancestor in reversed(ancestors):\n        (\n            ancestor_user_emails,\n            ancestor_group_names,\n            found_any_restrictions_in_ancestor,\n        ) = _extract_read_access_restrictions(\n            confluence_client=confluence_client,\n            restrictions=ancestor.get(\"restrictions\", {}),\n        )\n        if found_any_restrictions_in_ancestor:\n            # if inheriting restrictions from the parent, then the first one we run into\n            # should be applied (the reason why we'd traverse more than one ancestor is if\n            # the ancestor also is in \"inherit\" mode.)\n            logger.debug(\n                f\"Found user restrictions {ancestor_user_emails} and group restrictions {ancestor_group_names}\"\n                f\"for document {page_id} based on ancestor {ancestor}\"\n            )\n            return ExternalAccess(\n                external_user_emails=ancestor_user_emails,\n                external_user_group_ids=_maybe_prefix_groups(ancestor_group_names),\n                is_public=False,\n            )\n\n    # we didn't find any restrictions, so the page inherits the space's restrictions\n    return None\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/confluence/space_access.py",
    "content": "from ee.onyx.configs.app_configs import CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC\nfrom ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME\nfrom ee.onyx.external_permissions.confluence.constants import REQUEST_PAGINATION_LIMIT\nfrom ee.onyx.external_permissions.confluence.constants import VIEWSPACE_PERMISSION_TYPE\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.onyx_confluence import (\n    get_user_email_from_username__server,\n)\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _get_server_space_permissions(\n    confluence_client: OnyxConfluence, space_key: str\n) -> ExternalAccess:\n    space_permissions = confluence_client.get_all_space_permissions_server(\n        space_key=space_key\n    )\n\n    viewspace_permissions = []\n    for permission_category in space_permissions:\n        if permission_category.get(\"type\") == VIEWSPACE_PERMISSION_TYPE:\n            viewspace_permissions.extend(\n                permission_category.get(\"spacePermissions\", [])\n            )\n\n    is_public = False\n    user_names = set()\n    group_names = set()\n    for permission in viewspace_permissions:\n        if user_name := permission.get(\"userName\"):\n            user_names.add(user_name)\n        if group_name := permission.get(\"groupName\"):\n            group_names.add(group_name)\n\n        # It seems that if anonymous access is turned on for the site and space,\n        # then the space is publicly accessible.\n        # For confluence server, we make a group that contains all users\n        # that exist in confluence and then just add that group to the space permissions\n        # if anonymous access is turned on for the site and space or we set is_public = True\n        # if they set the env variable CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC to True so\n        # that we can support confluence server deployments that want anonymous access\n        # to be public (we cant test this because its paywalled)\n        if user_name is None and group_name is None:\n            # Defaults to False\n            if CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC:\n                is_public = True\n            else:\n                group_names.add(ALL_CONF_EMAILS_GROUP_NAME)\n\n    user_emails = set()\n    for user_name in user_names:\n        user_email = get_user_email_from_username__server(confluence_client, user_name)\n        if user_email:\n            user_emails.add(user_email)\n        else:\n            logger.warning(f\"Email for user {user_name} not found in Confluence\")\n\n    if not user_emails and not group_names:\n        logger.warning(\n            \"No user emails or group names found in Confluence space permissions\"\n            f\"\\nSpace key: {space_key}\"\n            f\"\\nSpace permissions: {space_permissions}\"\n        )\n\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        external_user_group_ids=group_names,\n        is_public=is_public,\n    )\n\n\ndef _get_cloud_space_permissions(\n    confluence_client: OnyxConfluence, space_key: str\n) -> ExternalAccess:\n    space_permissions_result = confluence_client.get_space(\n        space_key=space_key, expand=\"permissions\"\n    )\n    space_permissions = space_permissions_result.get(\"permissions\", [])\n\n    user_emails = set()\n    group_names = set()\n    is_externally_public = False\n    for permission in space_permissions:\n        subs = permission.get(\"subjects\")\n        if subs:\n            # If there are subjects, then there are explicit users or groups with access\n            if email := subs.get(\"user\", {}).get(\"results\", [{}])[0].get(\"email\"):\n                user_emails.add(email)\n            if group_name := subs.get(\"group\", {}).get(\"results\", [{}])[0].get(\"name\"):\n                group_names.add(group_name)\n        else:\n            # If there are no subjects, then the permission is for everyone\n            if permission.get(\"operation\", {}).get(\n                \"operation\"\n            ) == \"read\" and permission.get(\"anonymousAccess\", False):\n                # If the permission specifies read access for anonymous users, then\n                # the space is publicly accessible\n                is_externally_public = True\n\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        external_user_group_ids=group_names,\n        is_public=is_externally_public,\n    )\n\n\ndef get_space_permission(\n    confluence_client: OnyxConfluence,\n    space_key: str,\n    is_cloud: bool,\n    add_prefix: bool = False,\n) -> ExternalAccess:\n    if is_cloud:\n        space_permissions = _get_cloud_space_permissions(confluence_client, space_key)\n    else:\n        space_permissions = _get_server_space_permissions(confluence_client, space_key)\n\n    if (\n        not space_permissions.is_public\n        and not space_permissions.external_user_emails\n        and not space_permissions.external_user_group_ids\n    ):\n        logger.warning(\n            f\"No permissions found for space '{space_key}'. This is very unlikely \"\n            \"to be correct and is more likely caused by an access token with \"\n            \"insufficient permissions. Make sure that the access token has Admin \"\n            f\"permissions for space '{space_key}'\"\n        )\n\n    # Prefix group IDs with source type if requested (for indexing path)\n    if add_prefix and space_permissions.external_user_group_ids:\n        prefixed_groups = {\n            build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)\n            for g in space_permissions.external_user_group_ids\n        }\n        return ExternalAccess(\n            external_user_emails=space_permissions.external_user_emails,\n            external_user_group_ids=prefixed_groups,\n            is_public=space_permissions.is_public,\n        )\n\n    return space_permissions\n\n\ndef get_all_space_permissions(\n    confluence_client: OnyxConfluence,\n    is_cloud: bool,\n    add_prefix: bool = False,\n) -> dict[str, ExternalAccess]:\n    \"\"\"\n    Get access permissions for all spaces in Confluence.\n\n    add_prefix: When True, prefix group IDs with source type (for indexing path).\n               When False (default), leave unprefixed (for permission sync path).\n    \"\"\"\n    logger.debug(\"Getting space permissions\")\n    # Gets all the spaces in the Confluence instance\n    all_space_keys = [\n        key\n        for space in confluence_client.retrieve_confluence_spaces(\n            limit=REQUEST_PAGINATION_LIMIT,\n        )\n        if (key := space.get(\"key\"))\n    ]\n\n    # Gets the permissions for each space\n    logger.debug(f\"Got {len(all_space_keys)} spaces from confluence\")\n    space_permissions_by_space_key: dict[str, ExternalAccess] = {}\n    for space_key in all_space_keys:\n        space_permissions = get_space_permission(\n            confluence_client, space_key, is_cloud, add_prefix\n        )\n\n        # Stores the permissions for each space\n        space_permissions_by_space_key[space_key] = space_permissions\n\n    return space_permissions_by_space_key\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/github/doc_sync.py",
    "content": "import json\nfrom collections.abc import Generator\n\nfrom github import Github\nfrom github.Repository import Repository\n\nfrom ee.onyx.external_permissions.github.utils import fetch_repository_team_slugs\nfrom ee.onyx.external_permissions.github.utils import form_collaborators_group_id\nfrom ee.onyx.external_permissions.github.utils import form_organization_group_id\nfrom ee.onyx.external_permissions.github.utils import (\n    form_outside_collaborators_group_id,\n)\nfrom ee.onyx.external_permissions.github.utils import get_external_access_permission\nfrom ee.onyx.external_permissions.github.utils import get_repository_visibility\nfrom ee.onyx.external_permissions.github.utils import GitHubVisibility\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.github.connector import DocMetadata\nfrom onyx.connectors.github.connector import GithubConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import SortOrder\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nGITHUB_DOC_SYNC_LABEL = \"github_doc_sync\"\n\n\ndef github_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001\n    callback: IndexingHeartbeatInterface | None = None,\n) -> Generator[DocExternalAccess, None, None]:\n    \"\"\"\n    Sync GitHub documents with external access permissions.\n\n    This function checks each repository for visibility/team changes and updates\n    document permissions accordingly without using checkpoints.\n    \"\"\"\n    logger.info(f\"Starting GitHub document sync for CC pair ID: {cc_pair.id}\")\n\n    # Initialize GitHub connector with credentials\n    github_connector: GithubConnector = GithubConnector(\n        **cc_pair.connector.connector_specific_config\n    )\n\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    github_connector.load_credentials(credential_json)\n    logger.info(\"GitHub connector credentials loaded successfully\")\n\n    if not github_connector.github_client:\n        logger.error(\"GitHub client initialization failed\")\n        raise ValueError(\"github_client is required\")\n\n    # Get all repositories from GitHub API\n    logger.info(\"Fetching all repositories from GitHub API\")\n    try:\n        repos = github_connector.fetch_configured_repos()\n\n        logger.info(f\"Found {len(repos)} repositories to check\")\n    except Exception as e:\n        logger.error(f\"Failed to fetch repositories: {e}\")\n        raise\n\n    repo_to_doc_list_map: dict[str, list[DocumentRow]] = {}\n    # sort order is ascending because we want to get the oldest documents first\n    existing_docs: list[DocumentRow] = fetch_all_existing_docs_fn(\n        sort_order=SortOrder.ASC\n    )\n    logger.info(f\"Found {len(existing_docs)} documents to check\")\n    for doc in existing_docs:\n        try:\n            doc_metadata = DocMetadata.model_validate_json(json.dumps(doc.doc_metadata))\n            if doc_metadata.repo not in repo_to_doc_list_map:\n                repo_to_doc_list_map[doc_metadata.repo] = []\n            repo_to_doc_list_map[doc_metadata.repo].append(doc)\n        except Exception as e:\n            logger.error(f\"Failed to parse doc metadata: {e} for doc {doc.id}\")\n            continue\n    logger.info(f\"Found {len(repo_to_doc_list_map)} documents to check\")\n    # Process each repository individually\n    for repo in repos:\n        try:\n            logger.info(f\"Processing repository: {repo.id} (name: {repo.name})\")\n            repo_doc_list: list[DocumentRow] = repo_to_doc_list_map.get(\n                repo.full_name, []\n            )\n            if not repo_doc_list:\n                logger.warning(\n                    f\"No documents found for repository {repo.id} ({repo.name})\"\n                )\n                continue\n\n            current_external_group_ids = repo_doc_list[0].external_user_group_ids or []\n            # Check if repository has any permission changes\n            has_changes = _check_repository_for_changes(\n                repo=repo,\n                github_client=github_connector.github_client,\n                current_external_group_ids=current_external_group_ids,\n            )\n\n            if has_changes:\n                logger.info(\n                    f\"Repository {repo.id} ({repo.name}) has changes, updating documents\"\n                )\n\n                # Get new external access permissions for this repository\n                new_external_access = get_external_access_permission(\n                    repo, github_connector.github_client\n                )\n\n                logger.info(\n                    f\"Found {len(repo_doc_list)} documents for repository {repo.full_name}\"\n                )\n\n                # Yield updated external access for each document\n                for doc in repo_doc_list:\n                    if callback:\n                        callback.progress(GITHUB_DOC_SYNC_LABEL, 1)\n\n                    yield DocExternalAccess(\n                        doc_id=doc.id,\n                        external_access=new_external_access,\n                    )\n            else:\n                logger.info(\n                    f\"Repository {repo.id} ({repo.name}) has no changes, skipping\"\n                )\n        except Exception as e:\n            logger.error(f\"Error processing repository {repo.id} ({repo.name}): {e}\")\n\n    logger.info(f\"GitHub document sync completed for CC pair ID: {cc_pair.id}\")\n\n\ndef _check_repository_for_changes(\n    repo: Repository,\n    github_client: Github,\n    current_external_group_ids: list[str],\n) -> bool:\n    \"\"\"\n    Check if repository has any permission changes (visibility or team updates).\n    \"\"\"\n    logger.info(f\"Checking repository {repo.id} ({repo.name}) for changes\")\n\n    # Check for repository visibility changes using the sample document data\n    if _is_repo_visibility_changed_from_groups(\n        repo=repo,\n        current_external_group_ids=current_external_group_ids,\n    ):\n        logger.info(f\"Repository {repo.id} ({repo.name}) has visibility changes\")\n        return True\n\n    # Check for team membership changes if repository is private\n    if get_repository_visibility(\n        repo\n    ) == GitHubVisibility.PRIVATE and _teams_updated_from_groups(\n        repo=repo,\n        github_client=github_client,\n        current_external_group_ids=current_external_group_ids,\n    ):\n        logger.info(f\"Repository {repo.id} ({repo.name}) has team changes\")\n        return True\n\n    logger.info(f\"Repository {repo.id} ({repo.name}) has no changes\")\n    return False\n\n\ndef _is_repo_visibility_changed_from_groups(\n    repo: Repository,\n    current_external_group_ids: list[str],\n) -> bool:\n    \"\"\"\n    Check if repository visibility has changed by analyzing existing external group IDs.\n\n    Args:\n        repo: GitHub repository object\n        current_external_group_ids: List of external group IDs from existing document\n\n    Returns:\n        True if visibility has changed\n    \"\"\"\n    current_repo_visibility = get_repository_visibility(repo)\n    logger.info(f\"Current repository visibility: {current_repo_visibility.value}\")\n\n    # Build expected group IDs for current visibility\n    collaborators_group_id = build_ext_group_name_for_onyx(\n        source=DocumentSource.GITHUB,\n        ext_group_name=form_collaborators_group_id(repo.id),\n    )\n\n    org_group_id = None\n    if repo.organization:\n        org_group_id = build_ext_group_name_for_onyx(\n            source=DocumentSource.GITHUB,\n            ext_group_name=form_organization_group_id(repo.organization.id),\n        )\n\n    # Determine existing visibility from group IDs\n    has_collaborators_group = collaborators_group_id in current_external_group_ids\n    has_org_group = org_group_id and org_group_id in current_external_group_ids\n\n    if has_collaborators_group:\n        existing_repo_visibility = GitHubVisibility.PRIVATE\n    elif has_org_group:\n        existing_repo_visibility = GitHubVisibility.INTERNAL\n    else:\n        existing_repo_visibility = GitHubVisibility.PUBLIC\n\n    logger.info(f\"Inferred existing visibility: {existing_repo_visibility.value}\")\n\n    visibility_changed = existing_repo_visibility != current_repo_visibility\n    if visibility_changed:\n        logger.info(\n            f\"Visibility changed for repo {repo.id} ({repo.name}): \"\n            f\"{existing_repo_visibility.value} -> {current_repo_visibility.value}\"\n        )\n\n    return visibility_changed\n\n\ndef _teams_updated_from_groups(\n    repo: Repository,\n    github_client: Github,\n    current_external_group_ids: list[str],\n) -> bool:\n    \"\"\"\n    Check if repository team memberships have changed using existing group IDs.\n    \"\"\"\n    # Fetch current team slugs for the repository\n    current_teams = fetch_repository_team_slugs(repo=repo, github_client=github_client)\n    logger.info(\n        f\"Current teams for repository {repo.id} (name: {repo.name}): {current_teams}\"\n    )\n\n    # Build group IDs to exclude from team comparison (non-team groups)\n    collaborators_group_id = build_ext_group_name_for_onyx(\n        source=DocumentSource.GITHUB,\n        ext_group_name=form_collaborators_group_id(repo.id),\n    )\n    outside_collaborators_group_id = build_ext_group_name_for_onyx(\n        source=DocumentSource.GITHUB,\n        ext_group_name=form_outside_collaborators_group_id(repo.id),\n    )\n    non_team_group_ids = {collaborators_group_id, outside_collaborators_group_id}\n\n    # Extract existing team IDs from current external group IDs\n    existing_team_ids = set()\n    for group_id in current_external_group_ids:\n        # Skip all non-team groups, keep only team groups\n        if group_id not in non_team_group_ids:\n            existing_team_ids.add(group_id)\n\n    # Note: existing_team_ids from DB are already prefixed (e.g., \"github__team-slug\")\n    # but current_teams from API are raw team slugs, so we need to add the prefix\n    current_team_ids = set()\n    for team_slug in current_teams:\n        team_group_id = build_ext_group_name_for_onyx(\n            source=DocumentSource.GITHUB,\n            ext_group_name=team_slug,\n        )\n        current_team_ids.add(team_group_id)\n\n    logger.info(\n        f\"Existing team IDs: {existing_team_ids}, Current team IDs: {current_team_ids}\"\n    )\n\n    # Compare actual team IDs to detect changes\n    teams_changed = current_team_ids != existing_team_ids\n    if teams_changed:\n        logger.info(\n            f\"Team changes detected for repo {repo.id} (name: {repo.name}): \"\n            f\"existing={existing_team_ids}, current={current_team_ids}\"\n        )\n\n    return teams_changed\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/github/group_sync.py",
    "content": "from collections.abc import Generator\n\nfrom github import Repository\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.external_permissions.github.utils import get_external_user_group\nfrom onyx.connectors.github.connector import GithubConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef github_group_sync(\n    tenant_id: str,  # noqa: ARG001\n    cc_pair: ConnectorCredentialPair,\n) -> Generator[ExternalUserGroup, None, None]:\n    github_connector: GithubConnector = GithubConnector(\n        **cc_pair.connector.connector_specific_config\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    github_connector.load_credentials(credential_json)\n    if not github_connector.github_client:\n        raise ValueError(\"github_client is required\")\n\n    logger.info(\"Starting GitHub group sync...\")\n    repos: list[Repository.Repository] = []\n    if github_connector.repositories:\n        if \",\" in github_connector.repositories:\n            # Multiple repositories specified\n            repos = github_connector.get_github_repos(github_connector.github_client)\n        else:\n            # Single repository (backward compatibility)\n            repos = [github_connector.get_github_repo(github_connector.github_client)]\n    else:\n        # All repositories\n        repos = github_connector.get_all_repos(github_connector.github_client)\n\n    for repo in repos:\n        try:\n            for external_group in get_external_user_group(\n                repo, github_connector.github_client\n            ):\n                logger.info(f\"External group: {external_group}\")\n                yield external_group\n        except Exception as e:\n            logger.error(f\"Error processing repository {repo.id} ({repo.name}): {e}\")\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/github/utils.py",
    "content": "from collections.abc import Callable\nfrom enum import Enum\nfrom typing import List\nfrom typing import Optional\nfrom typing import Tuple\nfrom typing import TypeVar\n\nfrom github import Github\nfrom github import RateLimitExceededException\nfrom github.GithubException import GithubException\nfrom github.NamedUser import NamedUser\nfrom github.Organization import Organization\nfrom github.PaginatedList import PaginatedList\nfrom github.Repository import Repository\nfrom github.Team import Team\nfrom pydantic import BaseModel\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.github.rate_limit_utils import sleep_after_rate_limit_exception\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass GitHubVisibility(Enum):\n    \"\"\"GitHub repository visibility options.\"\"\"\n\n    PUBLIC = \"public\"\n    PRIVATE = \"private\"\n    INTERNAL = \"internal\"\n\n\nMAX_RETRY_COUNT = 3\n\nT = TypeVar(\"T\")\n\n# Higher-order function to wrap GitHub operations with retry and exception handling\n\n\ndef _run_with_retry(\n    operation: Callable[[], T],\n    description: str,\n    github_client: Github,\n    retry_count: int = 0,\n) -> Optional[T]:\n    \"\"\"Execute a GitHub operation with retry on rate limit and exception handling.\"\"\"\n    logger.debug(f\"Starting operation '{description}', attempt {retry_count + 1}\")\n    try:\n        result = operation()\n        logger.debug(f\"Operation '{description}' completed successfully\")\n        return result\n    except RateLimitExceededException:\n        if retry_count < MAX_RETRY_COUNT:\n            sleep_after_rate_limit_exception(github_client)\n            logger.warning(\n                f\"Rate limit exceeded while {description}. Retrying... (attempt {retry_count + 1}/{MAX_RETRY_COUNT})\"\n            )\n            return _run_with_retry(\n                operation, description, github_client, retry_count + 1\n            )\n        else:\n            error_msg = f\"Max retries exceeded for {description}\"\n            logger.exception(error_msg)\n            raise RuntimeError(error_msg)\n    except GithubException as e:\n        logger.warning(f\"GitHub API error during {description}: {e}\")\n        return None\n    except Exception as e:\n        logger.exception(f\"Unexpected error during {description}: {e}\")\n        return None\n\n\nclass UserInfo(BaseModel):\n    \"\"\"Represents a GitHub user with their basic information.\"\"\"\n\n    login: str\n    name: Optional[str] = None\n    email: Optional[str] = None\n\n\nclass TeamInfo(BaseModel):\n    \"\"\"Represents a GitHub team with its members.\"\"\"\n\n    name: str\n    slug: str\n    members: List[UserInfo]\n\n\ndef _fetch_organization_members(\n    github_client: Github,\n    org_name: str,\n    retry_count: int = 0,  # noqa: ARG001\n) -> List[UserInfo]:\n    \"\"\"Fetch all organization members including owners and regular members.\"\"\"\n    org_members: List[UserInfo] = []\n    logger.info(f\"Fetching organization members for {org_name}\")\n\n    org = _run_with_retry(\n        lambda: github_client.get_organization(org_name),\n        f\"get organization {org_name}\",\n        github_client,\n    )\n    if not org:\n        logger.error(f\"Failed to fetch organization {org_name}\")\n        raise RuntimeError(f\"Failed to fetch organization {org_name}\")\n\n    member_objs: PaginatedList[NamedUser] | list[NamedUser] = (\n        _run_with_retry(\n            lambda: org.get_members(filter_=\"all\"),\n            f\"get members for organization {org_name}\",\n            github_client,\n        )\n        or []\n    )\n\n    for member in member_objs:\n        user_info = UserInfo(login=member.login, name=member.name, email=member.email)\n        org_members.append(user_info)\n\n    logger.info(f\"Fetched {len(org_members)} members for organization {org_name}\")\n    return org_members\n\n\ndef _fetch_repository_teams_detailed(\n    repo: Repository,\n    github_client: Github,\n    retry_count: int = 0,  # noqa: ARG001\n) -> List[TeamInfo]:\n    \"\"\"Fetch teams with access to the repository and their members.\"\"\"\n    teams_data: List[TeamInfo] = []\n    logger.info(f\"Fetching teams for repository {repo.full_name}\")\n\n    team_objs: PaginatedList[Team] | list[Team] = (\n        _run_with_retry(\n            lambda: repo.get_teams(),\n            f\"get teams for repository {repo.full_name}\",\n            github_client,\n        )\n        or []\n    )\n\n    for team in team_objs:\n        logger.info(\n            f\"Processing team {team.name} (slug: {team.slug}) for repository {repo.full_name}\"\n        )\n\n        members: PaginatedList[NamedUser] | list[NamedUser] = (\n            _run_with_retry(\n                lambda: team.get_members(),\n                f\"get members for team {team.name}\",\n                github_client,\n            )\n            or []\n        )\n\n        team_members = []\n        for m in members:\n            user_info = UserInfo(login=m.login, name=m.name, email=m.email)\n            team_members.append(user_info)\n\n        team_info = TeamInfo(name=team.name, slug=team.slug, members=team_members)\n        teams_data.append(team_info)\n        logger.info(f\"Team {team.name} has {len(team_members)} members\")\n\n    logger.info(f\"Fetched {len(teams_data)} teams for repository {repo.full_name}\")\n    return teams_data\n\n\ndef fetch_repository_team_slugs(\n    repo: Repository,\n    github_client: Github,\n    retry_count: int = 0,  # noqa: ARG001\n) -> List[str]:\n    \"\"\"Fetch team slugs with access to the repository.\"\"\"\n    logger.info(f\"Fetching team slugs for repository {repo.full_name}\")\n    teams_data: List[str] = []\n\n    team_objs: PaginatedList[Team] | list[Team] = (\n        _run_with_retry(\n            lambda: repo.get_teams(),\n            f\"get teams for repository {repo.full_name}\",\n            github_client,\n        )\n        or []\n    )\n\n    for team in team_objs:\n        teams_data.append(team.slug)\n\n    logger.info(f\"Fetched {len(teams_data)} team slugs for repository {repo.full_name}\")\n    return teams_data\n\n\ndef _get_collaborators_and_outside_collaborators(\n    github_client: Github,\n    repo: Repository,\n) -> Tuple[List[UserInfo], List[UserInfo]]:\n    \"\"\"Fetch and categorize collaborators into regular and outside collaborators.\"\"\"\n    collaborators: List[UserInfo] = []\n    outside_collaborators: List[UserInfo] = []\n    logger.info(f\"Fetching collaborators for repository {repo.full_name}\")\n\n    repo_collaborators: PaginatedList[NamedUser] | list[NamedUser] = (\n        _run_with_retry(\n            lambda: repo.get_collaborators(),\n            f\"get collaborators for repository {repo.full_name}\",\n            github_client,\n        )\n        or []\n    )\n\n    for collaborator in repo_collaborators:\n        is_outside = False\n\n        # Check if collaborator is outside the organization\n        if repo.organization:\n            org: Organization | None = _run_with_retry(\n                lambda: github_client.get_organization(repo.organization.login),\n                f\"get organization {repo.organization.login}\",\n                github_client,\n            )\n\n            if org is not None:\n                org_obj = org\n                membership = _run_with_retry(\n                    lambda: org_obj.has_in_members(collaborator),\n                    f\"check membership for {collaborator.login} in org {org_obj.login}\",\n                    github_client,\n                )\n                is_outside = membership is not None and not membership\n\n        info = UserInfo(\n            login=collaborator.login, name=collaborator.name, email=collaborator.email\n        )\n        if repo.organization and is_outside:\n            outside_collaborators.append(info)\n        else:\n            collaborators.append(info)\n\n    logger.info(\n        f\"Categorized {len(collaborators)} regular and {len(outside_collaborators)} outside collaborators for {repo.full_name}\"\n    )\n    return collaborators, outside_collaborators\n\n\ndef form_collaborators_group_id(repository_id: int) -> str:\n    \"\"\"Generate group ID for repository collaborators.\"\"\"\n    if not repository_id:\n        logger.exception(\"Repository ID is required to generate collaborators group ID\")\n        raise ValueError(\"Repository ID must be set to generate group ID.\")\n    group_id = f\"{repository_id}_collaborators\"\n    return group_id\n\n\ndef form_organization_group_id(organization_id: int) -> str:\n    \"\"\"Generate group ID for organization using organization ID.\"\"\"\n    if not organization_id:\n        logger.exception(\n            \"Organization ID is required to generate organization group ID\"\n        )\n        raise ValueError(\"Organization ID must be set to generate group ID.\")\n    group_id = f\"{organization_id}_organization\"\n    return group_id\n\n\ndef form_outside_collaborators_group_id(repository_id: int) -> str:\n    \"\"\"Generate group ID for outside collaborators.\"\"\"\n    if not repository_id:\n        logger.exception(\n            \"Repository ID is required to generate outside collaborators group ID\"\n        )\n        raise ValueError(\"Repository ID must be set to generate group ID.\")\n    group_id = f\"{repository_id}_outside_collaborators\"\n    return group_id\n\n\ndef get_repository_visibility(repo: Repository) -> GitHubVisibility:\n    \"\"\"\n    Get the visibility of a repository.\n    Returns GitHubVisibility enum member.\n    \"\"\"\n    if hasattr(repo, \"visibility\"):\n        visibility = repo.visibility\n        logger.info(\n            f\"Repository {repo.full_name} visibility from attribute: {visibility}\"\n        )\n        try:\n            return GitHubVisibility(visibility)\n        except ValueError:\n            logger.warning(\n                f\"Unknown visibility '{visibility}' for repo {repo.full_name}, defaulting to private\"\n            )\n            return GitHubVisibility.PRIVATE\n\n    logger.info(f\"Repository {repo.full_name} is private\")\n    return GitHubVisibility.PRIVATE\n\n\ndef get_external_access_permission(\n    repo: Repository, github_client: Github, add_prefix: bool = False\n) -> ExternalAccess:\n    \"\"\"\n    Get the external access permission for a repository.\n    Uses group-based permissions for efficiency and scalability.\n\n    add_prefix: When this method is called during the initial permission sync via the connector,\n                the group ID isn't prefixed with the source while inserting the document record.\n                So in that case, set add_prefix to True, allowing the method itself to handle\n                prefixing. However, when the same method is invoked from doc_sync, our system\n                already adds the prefix to the group ID while processing the ExternalAccess object.\n    \"\"\"\n    # We maintain collaborators, and outside collaborators as two separate groups\n    # instead of adding individual user emails to ExternalAccess.external_user_emails for two reasons:\n    # 1. Changes in repo collaborators (additions/removals) would require updating all documents.\n    # 2. Repo permissions can change without updating the repo's updated_at timestamp,\n    #    forcing full permission syncs for all documents every time, which is inefficient.\n\n    repo_visibility = get_repository_visibility(repo)\n    logger.info(\n        f\"Generating ExternalAccess for {repo.full_name}: visibility={repo_visibility.value}\"\n    )\n\n    if repo_visibility == GitHubVisibility.PUBLIC:\n        logger.info(\n            f\"Repository {repo.full_name} is public - allowing access to all users\"\n        )\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        )\n    elif repo_visibility == GitHubVisibility.PRIVATE:\n        logger.info(\n            f\"Repository {repo.full_name} is private - setting up restricted access\"\n        )\n\n        collaborators_group_id = form_collaborators_group_id(repo.id)\n        outside_collaborators_group_id = form_outside_collaborators_group_id(repo.id)\n        if add_prefix:\n            collaborators_group_id = build_ext_group_name_for_onyx(\n                source=DocumentSource.GITHUB,\n                ext_group_name=collaborators_group_id,\n            )\n            outside_collaborators_group_id = build_ext_group_name_for_onyx(\n                source=DocumentSource.GITHUB,\n                ext_group_name=outside_collaborators_group_id,\n            )\n        group_ids = {collaborators_group_id, outside_collaborators_group_id}\n\n        team_slugs = fetch_repository_team_slugs(repo, github_client)\n        if add_prefix:\n            team_slugs = [\n                build_ext_group_name_for_onyx(\n                    source=DocumentSource.GITHUB,\n                    ext_group_name=slug,\n                )\n                for slug in team_slugs\n            ]\n        group_ids.update(team_slugs)\n\n        logger.info(f\"ExternalAccess groups for {repo.full_name}: {group_ids}\")\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=group_ids,\n            is_public=False,\n        )\n    else:\n        # Internal repositories - accessible to organization members\n        logger.info(\n            f\"Repository {repo.full_name} is internal - accessible to org members\"\n        )\n        org_group_id = form_organization_group_id(repo.organization.id)\n        if add_prefix:\n            org_group_id = build_ext_group_name_for_onyx(\n                source=DocumentSource.GITHUB,\n                ext_group_name=org_group_id,\n            )\n        group_ids = {org_group_id}\n        logger.info(f\"ExternalAccess groups for {repo.full_name}: {group_ids}\")\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=group_ids,\n            is_public=False,\n        )\n\n\ndef get_external_user_group(\n    repo: Repository, github_client: Github\n) -> list[ExternalUserGroup]:\n    \"\"\"\n    Get the external user group for a repository.\n    Creates ExternalUserGroup objects with actual user emails for each permission group.\n    \"\"\"\n    repo_visibility = get_repository_visibility(repo)\n    logger.info(\n        f\"Generating ExternalUserGroups for {repo.full_name}: visibility={repo_visibility.value}\"\n    )\n\n    if repo_visibility == GitHubVisibility.PRIVATE:\n        logger.info(f\"Processing private repository {repo.full_name}\")\n\n        collaborators, outside_collaborators = (\n            _get_collaborators_and_outside_collaborators(github_client, repo)\n        )\n        teams = _fetch_repository_teams_detailed(repo, github_client)\n        external_user_groups = []\n\n        user_emails = set()\n        for collab in collaborators:\n            if collab.email:\n                user_emails.add(collab.email)\n            else:\n                logger.error(f\"Collaborator {collab.login} has no email\")\n\n        if user_emails:\n            collaborators_group = ExternalUserGroup(\n                id=form_collaborators_group_id(repo.id),\n                user_emails=list(user_emails),\n            )\n            external_user_groups.append(collaborators_group)\n            logger.info(f\"Created collaborators group with {len(user_emails)} emails\")\n\n        # Create group for outside collaborators\n        user_emails = set()\n        for collab in outside_collaborators:\n            if collab.email:\n                user_emails.add(collab.email)\n            else:\n                logger.error(f\"Outside collaborator {collab.login} has no email\")\n\n        if user_emails:\n            outside_collaborators_group = ExternalUserGroup(\n                id=form_outside_collaborators_group_id(repo.id),\n                user_emails=list(user_emails),\n            )\n            external_user_groups.append(outside_collaborators_group)\n            logger.info(\n                f\"Created outside collaborators group with {len(user_emails)} emails\"\n            )\n\n        # Create groups for teams\n        for team in teams:\n            user_emails = set()\n            for member in team.members:\n                if member.email:\n                    user_emails.add(member.email)\n                else:\n                    logger.error(f\"Team member {member.login} has no email\")\n\n            if user_emails:\n                team_group = ExternalUserGroup(\n                    id=team.slug,\n                    user_emails=list(user_emails),\n                )\n                external_user_groups.append(team_group)\n                logger.info(\n                    f\"Created team group {team.name} with {len(user_emails)} emails\"\n                )\n\n        logger.info(\n            f\"Created {len(external_user_groups)} ExternalUserGroups for private repository {repo.full_name}\"\n        )\n        return external_user_groups\n\n    if repo_visibility == GitHubVisibility.INTERNAL:\n        logger.info(f\"Processing internal repository {repo.full_name}\")\n\n        org_group_id = form_organization_group_id(repo.organization.id)\n        org_members = _fetch_organization_members(\n            github_client, repo.organization.login\n        )\n\n        user_emails = set()\n        for member in org_members:\n            if member.email:\n                user_emails.add(member.email)\n            else:\n                logger.error(f\"Org member {member.login} has no email\")\n\n        org_group = ExternalUserGroup(\n            id=org_group_id,\n            user_emails=list(user_emails),\n        )\n        logger.info(\n            f\"Created organization group with {len(user_emails)} emails for internal repository {repo.full_name}\"\n        )\n        return [org_group]\n\n    logger.info(f\"Repository {repo.full_name} is public - no user groups needed\")\n    return []\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/gmail/doc_sync.py",
    "content": "from collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.access.models import NodeExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.gmail.connector import GmailConnector\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _get_slim_doc_generator(\n    cc_pair: ConnectorCredentialPair,\n    gmail_connector: GmailConnector,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> GenerateSlimDocumentOutput:\n    current_time = datetime.now(timezone.utc)\n    start_time = (\n        cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp()\n        if cc_pair.last_time_perm_sync\n        else 0.0\n    )\n\n    return gmail_connector.retrieve_all_slim_docs_perm_sync(\n        start=start_time,\n        end=current_time.timestamp(),\n        callback=callback,\n    )\n\n\ndef gmail_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001\n    callback: IndexingHeartbeatInterface | None,\n) -> Generator[ElementExternalAccess, None, None]:\n    \"\"\"\n    Adds the external permissions to the documents and hierarchy nodes in postgres.\n    If the document doesn't already exist in postgres, we create\n    it in postgres so that when it gets created later, the permissions are\n    already populated.\n    \"\"\"\n    gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    gmail_connector.load_credentials(credential_json)\n\n    slim_doc_generator = _get_slim_doc_generator(\n        cc_pair, gmail_connector, callback=callback\n    )\n\n    for slim_doc_batch in slim_doc_generator:\n        for slim_doc in slim_doc_batch:\n            if callback:\n                if callback.should_stop():\n                    raise RuntimeError(\"gmail_doc_sync: Stop signal detected\")\n\n                callback.progress(\"gmail_doc_sync\", 1)\n\n            if isinstance(slim_doc, HierarchyNode):\n                # Yield hierarchy node permissions to be processed in outer layer\n                if slim_doc.external_access:\n                    yield NodeExternalAccess(\n                        external_access=slim_doc.external_access,\n                        raw_node_id=slim_doc.raw_node_id,\n                        source=DocumentSource.GMAIL.value,\n                    )\n                continue\n            if slim_doc.external_access is None:\n                logger.warning(f\"No permissions found for document {slim_doc.id}\")\n                continue\n\n            yield DocExternalAccess(\n                doc_id=slim_doc.id,\n                external_access=slim_doc.external_access,\n            )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/doc_sync.py",
    "content": "from collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission\nfrom ee.onyx.external_permissions.google_drive.models import PermissionType\nfrom ee.onyx.external_permissions.google_drive.permission_retrieval import (\n    get_permissions_by_ids,\n)\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.models import NodeExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _get_slim_doc_generator(\n    cc_pair: ConnectorCredentialPair,\n    google_drive_connector: GoogleDriveConnector,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> GenerateSlimDocumentOutput:\n    current_time = datetime.now(timezone.utc)\n    start_time = (\n        cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp()\n        if cc_pair.last_time_perm_sync\n        else 0.0\n    )\n\n    return google_drive_connector.retrieve_all_slim_docs_perm_sync(\n        start=start_time,\n        end=current_time.timestamp(),\n        callback=callback,\n    )\n\n\ndef _merge_permissions_lists(\n    permission_lists: list[list[GoogleDrivePermission]],\n) -> list[GoogleDrivePermission]:\n    \"\"\"\n    Merge a list of permission lists into a single list of permissions.\n    \"\"\"\n    seen_permission_ids: set[str] = set()\n    merged_permissions: list[GoogleDrivePermission] = []\n    for permission_list in permission_lists:\n        for permission in permission_list:\n            if permission.id not in seen_permission_ids:\n                merged_permissions.append(permission)\n                seen_permission_ids.add(permission.id)\n\n    return merged_permissions\n\n\ndef get_external_access_for_raw_gdrive_file(\n    file: GoogleDriveFileType,\n    company_domain: str,\n    retriever_drive_service: GoogleDriveService | None,\n    admin_drive_service: GoogleDriveService,\n    fallback_user_email: str,\n    add_prefix: bool = False,\n) -> ExternalAccess:\n    \"\"\"\n    Get the external access for a raw Google Drive file.\n\n    Assumes the file we retrieved has EITHER `permissions` or `permission_ids`\n\n    add_prefix: When this method is called during the initial indexing via the connector,\n                set add_prefix to True so group IDs are prefixed with the source type.\n                When invoked from doc_sync (permission sync), use the default (False)\n                since upsert_document_external_perms handles prefixing.\n    fallback_user_email: When we cannot retrieve any permission info for a file\n                (e.g. externally-owned files where the API returns no permissions\n                and permissions.list returns 403), fall back to granting access\n                to this user. This is typically the impersonated org user whose\n                drive contained the file.\n    \"\"\"\n    doc_id = file.get(\"id\")\n    if not doc_id:\n        raise ValueError(\"No doc_id found in file\")\n\n    permissions = file.get(\"permissions\")\n    permission_ids = file.get(\"permissionIds\")\n    drive_id = file.get(\"driveId\")\n\n    permissions_list: list[GoogleDrivePermission] = []\n    if permissions:\n        permissions_list = [\n            GoogleDrivePermission.from_drive_permission(p) for p in permissions\n        ]\n    elif permission_ids:\n\n        def _get_permissions(\n            drive_service: GoogleDriveService,\n        ) -> list[GoogleDrivePermission]:\n            return get_permissions_by_ids(\n                drive_service=drive_service,\n                doc_id=doc_id,\n                permission_ids=permission_ids,\n            )\n\n        permissions_list = _get_permissions(\n            retriever_drive_service or admin_drive_service\n        )\n        if len(permissions_list) != len(permission_ids) and retriever_drive_service:\n            logger.warning(\n                f\"Failed to get all permissions for file {doc_id} with retriever service, trying admin service\"\n            )\n            backup_permissions_list = _get_permissions(admin_drive_service)\n            permissions_list = _merge_permissions_lists(\n                [permissions_list, backup_permissions_list]\n            )\n\n    # For externally-owned files, the Drive API may return no permissions\n    # and permissions.list may return 403. In this case, fall back to\n    # granting access to the user who found the file in their drive.\n    # Note, even if other users also have access to this file,\n    # they will not be granted access in Onyx.\n    # We check permissions_list (the final result after all fetch attempts)\n    # rather than the raw fields, because permission_ids may be present\n    # but the actual fetch can still return empty due to a 403.\n    if not permissions_list:\n        logger.info(\n            f\"No permission info available for file {doc_id} \"\n            f\"(likely owned by a user outside of your organization). \"\n            f\"Falling back to granting access to retriever user: {fallback_user_email}\"\n        )\n        return ExternalAccess(\n            external_user_emails={fallback_user_email},\n            external_user_group_ids=set(),\n            is_public=False,\n        )\n\n    folder_ids_to_inherit_permissions_from: set[str] = set()\n    user_emails: set[str] = set()\n    group_emails: set[str] = set()\n    public = False\n\n    for permission in permissions_list:\n        # if the permission is inherited, do not add it directly to the file\n        # instead, add the folder ID as a group that has access to the file\n        # we will then handle mapping that folder to the list of Onyx users\n        # in the group sync job\n        # NOTE: this doesn't handle the case where a folder initially has no\n        # permissioning, but then later that folder is shared with a user or group.\n        # We could fetch all ancestors of the file to get the list of folders that\n        # might affect the permissions of the file, but this will get replaced with\n        # an audit-log based approach in the future so not doing it now.\n        if permission.inherited_from:\n            folder_ids_to_inherit_permissions_from.add(permission.inherited_from)\n\n        if permission.type == PermissionType.USER:\n            if permission.email_address:\n                user_emails.add(permission.email_address)\n            else:\n                logger.error(\n                    f\"Permission is type `user` but no email address is provided for document {doc_id}\\n {permission}\"\n                )\n        elif permission.type == PermissionType.GROUP:\n            # groups are represented as email addresses within Drive\n            if permission.email_address:\n                group_emails.add(permission.email_address)\n            else:\n                logger.error(\n                    f\"Permission is type `group` but no email address is provided for document {doc_id}\\n {permission}\"\n                )\n        elif permission.type == PermissionType.DOMAIN and company_domain:\n            if permission.domain == company_domain:\n                public = True\n            else:\n                logger.warning(\n                    f\"Permission is type domain but does not match company domain:\\n {permission}\"\n                )\n        elif permission.type == PermissionType.ANYONE:\n            public = True\n\n    group_ids = (\n        group_emails\n        | folder_ids_to_inherit_permissions_from\n        | ({drive_id} if drive_id is not None else set())\n    )\n\n    # Prefix group IDs with source type if requested (for indexing path)\n    if add_prefix:\n        group_ids = {\n            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)\n            for group_id in group_ids\n        }\n\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        external_user_group_ids=group_ids,\n        is_public=public,\n    )\n\n\ndef get_external_access_for_folder(\n    folder: GoogleDriveFileType,\n    google_domain: str,\n    drive_service: GoogleDriveService,\n    add_prefix: bool = False,\n) -> ExternalAccess:\n    \"\"\"\n    Extract ExternalAccess from a folder's permissions.\n\n    This fetches permissions using the Drive API (via permissionIds) and extracts\n    user emails, group emails, and public access status.\n\n    Args:\n        folder: The folder metadata from Google Drive API (must include permissionIds field)\n        google_domain: The company's Google Workspace domain (e.g., \"company.com\")\n        drive_service: Google Drive service for fetching permission details\n        add_prefix: When True, prefix group IDs with source type (for indexing path).\n                   When False (default), leave unprefixed (for permission sync path).\n\n    Returns:\n        ExternalAccess with extracted permission info\n    \"\"\"\n    folder_id = folder.get(\"id\")\n    if not folder_id:\n        logger.warning(\"Folder missing ID, returning empty permissions\")\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=False,\n        )\n\n    # Get permission IDs from folder metadata\n    permission_ids = folder.get(\"permissionIds\") or []\n    if not permission_ids:\n        logger.debug(f\"No permissionIds found for folder {folder_id}\")\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=False,\n        )\n\n    # Fetch full permission objects using the permission IDs\n    permissions_list = get_permissions_by_ids(\n        drive_service=drive_service,\n        doc_id=folder_id,\n        permission_ids=permission_ids,\n    )\n\n    user_emails: set[str] = set()\n    group_emails: set[str] = set()\n    is_public = False\n\n    for permission in permissions_list:\n        if permission.type == PermissionType.USER:\n            if permission.email_address:\n                user_emails.add(permission.email_address)\n            else:\n                logger.warning(f\"User permission without email for folder {folder_id}\")\n        elif permission.type == PermissionType.GROUP:\n            # Groups are represented as email addresses in Google Drive\n            if permission.email_address:\n                group_emails.add(permission.email_address)\n            else:\n                logger.warning(f\"Group permission without email for folder {folder_id}\")\n        elif permission.type == PermissionType.DOMAIN:\n            # Domain permission - check if it matches company domain\n            if permission.domain == google_domain:\n                # Only public if discoverable (allowFileDiscovery is not False)\n                # If allowFileDiscovery is False, it's \"link only\" access\n                is_public = permission.allow_file_discovery is not False\n            else:\n                logger.debug(\n                    f\"Domain permission for {permission.domain} does not match \"\n                    f\"company domain {google_domain} for folder {folder_id}\"\n                )\n        elif permission.type == PermissionType.ANYONE:\n            # Only public if discoverable (allowFileDiscovery is not False)\n            # If allowFileDiscovery is False, it's \"link only\" access\n            is_public = permission.allow_file_discovery is not False\n\n    # Prefix group IDs with source type if requested (for indexing path)\n    group_ids: set[str] = group_emails\n    if add_prefix:\n        group_ids = {\n            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)\n            for group_id in group_emails\n        }\n\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        external_user_group_ids=group_ids,\n        is_public=is_public,\n    )\n\n\ndef gdrive_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001\n    callback: IndexingHeartbeatInterface | None,\n) -> Generator[ElementExternalAccess, None, None]:\n    \"\"\"\n    Adds the external permissions to the documents and hierarchy nodes in postgres.\n    If the document doesn't already exist in postgres, we create\n    it in postgres so that when it gets created later, the permissions are\n    already populated.\n    \"\"\"\n    google_drive_connector = GoogleDriveConnector(\n        **cc_pair.connector.connector_specific_config\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    google_drive_connector.load_credentials(credential_json)\n\n    slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)\n\n    total_processed = 0\n    for slim_doc_batch in slim_doc_generator:\n        logger.info(f\"Drive perm sync: Processing {len(slim_doc_batch)} documents\")\n        for slim_doc in slim_doc_batch:\n            if callback:\n                if callback.should_stop():\n                    raise RuntimeError(\"gdrive_doc_sync: Stop signal detected\")\n\n                callback.progress(\"gdrive_doc_sync\", 1)\n            if isinstance(slim_doc, HierarchyNode):\n                # Yield hierarchy node permissions to be processed in outer layer\n                if slim_doc.external_access:\n                    yield NodeExternalAccess(\n                        external_access=slim_doc.external_access,\n                        raw_node_id=slim_doc.raw_node_id,\n                        source=DocumentSource.GOOGLE_DRIVE.value,\n                    )\n                continue\n            if slim_doc.external_access is None:\n                raise ValueError(\n                    f\"Drive perm sync: No external access for document {slim_doc.id}\"\n                )\n\n            yield DocExternalAccess(\n                external_access=slim_doc.external_access,\n                doc_id=slim_doc.id,\n            )\n        total_processed += len(slim_doc_batch)\n        logger.info(f\"Drive perm sync: Processed {total_processed} total documents\")\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/folder_retrieval.py",
    "content": "from collections.abc import Iterator\n\nfrom googleapiclient.discovery import Resource  # type: ignore\n\nfrom ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission\nfrom ee.onyx.external_permissions.google_drive.permission_retrieval import (\n    get_permissions_by_ids,\n)\nfrom onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE\nfrom onyx.connectors.google_drive.file_retrieval import generate_time_range_filter\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Only include fields we need - folder ID and permissions\n# IMPORTANT: must fetch permissionIds, since sometimes the drive API\n# seems to miss permissions when requesting them directly\nFOLDER_PERMISSION_FIELDS = \"nextPageToken, files(id, name, permissionIds, permissions(id, emailAddress, type, domain, permissionDetails))\"\n\n\ndef get_folder_permissions_by_ids(\n    service: Resource,\n    folder_id: str,\n    permission_ids: list[str],\n) -> list[GoogleDrivePermission]:\n    \"\"\"\n    Retrieves permissions for a specific folder filtered by permission IDs.\n\n    Args:\n        service: The Google Drive service instance\n        folder_id: The ID of the folder to fetch permissions for\n        permission_ids: A list of permission IDs to filter by\n\n    Returns:\n        A list of permissions matching the provided permission IDs\n    \"\"\"\n    return get_permissions_by_ids(\n        drive_service=service,\n        doc_id=folder_id,\n        permission_ids=permission_ids,\n    )\n\n\ndef get_modified_folders(\n    service: Resource,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n) -> Iterator[GoogleDriveFileType]:\n    \"\"\"\n    Retrieves all folders that were modified within the specified time range.\n    Only includes folder ID and permission information, not any contained files.\n\n    Args:\n        service: The Google Drive service instance\n        start: The start time as seconds since Unix epoch (inclusive)\n        end: The end time as seconds since Unix epoch (inclusive)\n\n    Returns:\n        An iterator yielding folder information including ID and permissions\n    \"\"\"\n    # Build query for folders\n    query = f\"mimeType = '{DRIVE_FOLDER_TYPE}'\"\n    query += \" and trashed = false\"\n    query += generate_time_range_filter(start, end)\n\n    # Retrieve and yield folders\n    for folder in execute_paginated_retrieval(\n        retrieval_function=service.files().list,\n        list_key=\"files\",\n        continue_on_404_or_403=True,\n        corpora=\"allDrives\",\n        supportsAllDrives=True,\n        includeItemsFromAllDrives=True,\n        includePermissionsForView=\"published\",\n        fields=FOLDER_PERMISSION_FIELDS,\n        q=query,\n    ):\n        yield folder\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/group_sync.py",
    "content": "from collections.abc import Generator\n\nfrom googleapiclient.errors import HttpError  # type: ignore\nfrom pydantic import BaseModel\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.external_permissions.google_drive.folder_retrieval import (\n    get_folder_permissions_by_ids,\n)\nfrom ee.onyx.external_permissions.google_drive.folder_retrieval import (\n    get_modified_folders,\n)\nfrom ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission\nfrom ee.onyx.external_permissions.google_drive.models import PermissionType\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.google_utils.resources import AdminService\nfrom onyx.connectors.google_utils.resources import get_admin_service\nfrom onyx.connectors.google_utils.resources import get_drive_service\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n\"\"\"\nFolder Permission Sync.\n\nEach folder is treated as a group. Each file has all ancestor folders\nas groups.\n\"\"\"\n\n\nclass FolderInfo(BaseModel):\n    id: str\n    permissions: list[GoogleDrivePermission]\n\n\ndef _get_all_folders(\n    google_drive_connector: GoogleDriveConnector, skip_folders_without_permissions: bool\n) -> list[FolderInfo]:\n    \"\"\"Have to get all folders since the group syncing system assumes all groups\n    are returned every time.\n\n    TODO: tweak things so we can fetch deltas.\n    \"\"\"\n    MAX_FAILED_PERCENTAGE = 0.5\n\n    all_folders: list[FolderInfo] = []\n    seen_folder_ids: set[str] = set()\n\n    def _get_all_folders_for_user(\n        google_drive_connector: GoogleDriveConnector,\n        skip_folders_without_permissions: bool,\n        user_email: str,\n    ) -> None:\n        \"\"\"Helper to get folders for a specific user + update shared seen_folder_ids\"\"\"\n        drive_service = get_drive_service(\n            google_drive_connector.creds,\n            user_email,\n        )\n\n        for folder in get_modified_folders(\n            service=drive_service,\n        ):\n            folder_id = folder[\"id\"]\n            if folder_id in seen_folder_ids:\n                logger.debug(f\"Folder {folder_id} has already been seen. Skipping.\")\n                continue\n\n            seen_folder_ids.add(folder_id)\n\n            # Check if the folder has permission IDs but no permissions\n            permission_ids = folder.get(\"permissionIds\", [])\n            raw_permissions = folder.get(\"permissions\", [])\n\n            if not raw_permissions and permission_ids:\n                # Fetch permissions using the IDs\n                permissions = get_folder_permissions_by_ids(\n                    drive_service, folder_id, permission_ids\n                )\n            else:\n                permissions = [\n                    GoogleDrivePermission.from_drive_permission(permission)\n                    for permission in raw_permissions\n                ]\n\n            # Don't include inherited permissions, those will be captured\n            # by the folder/shared drive itself\n            permissions = [\n                permission\n                for permission in permissions\n                if permission.inherited_from is None\n            ]\n\n            if not permissions and skip_folders_without_permissions:\n                logger.debug(f\"Folder {folder_id} has no permissions. Skipping.\")\n                continue\n\n            all_folders.append(\n                FolderInfo(\n                    id=folder_id,\n                    permissions=permissions,\n                )\n            )\n\n    failed_count = 0\n    user_emails = google_drive_connector._get_all_user_emails()\n    for user_email in user_emails:\n        try:\n            _get_all_folders_for_user(\n                google_drive_connector, skip_folders_without_permissions, user_email\n            )\n        except Exception:\n            logger.exception(f\"Error getting folders for user {user_email}\")\n            failed_count += 1\n\n            if failed_count > MAX_FAILED_PERCENTAGE * len(user_emails):\n                raise RuntimeError(\"Too many failed folder fetches during group sync\")\n\n    return all_folders\n\n\ndef _drive_folder_to_onyx_group(\n    folder: FolderInfo,\n    group_email_to_member_emails_map: dict[str, list[str]],\n) -> ExternalUserGroup:\n    \"\"\"\n    Converts a folder into an Onyx group.\n    \"\"\"\n    anyone_can_access = False\n    folder_member_emails: set[str] = set()\n\n    for permission in folder.permissions:\n        if permission.type == PermissionType.USER:\n            if permission.email_address is None:\n                logger.warning(\n                    f\"User email is None for folder {folder.id} permission {permission}\"\n                )\n                continue\n            folder_member_emails.add(permission.email_address)\n        elif permission.type == PermissionType.GROUP:\n            if permission.email_address not in group_email_to_member_emails_map:\n                logger.warning(\n                    f\"Group email {permission.email_address} for folder {folder.id} not found in group_email_to_member_emails_map\"\n                )\n                continue\n            folder_member_emails.update(\n                group_email_to_member_emails_map[permission.email_address]\n            )\n        elif permission.type == PermissionType.ANYONE:\n            anyone_can_access = True\n\n    return ExternalUserGroup(\n        id=folder.id,\n        user_emails=list(folder_member_emails),\n        gives_anyone_access=anyone_can_access,\n    )\n\n\n\"\"\"Individual Shared Drive / My Drive Permission Sync\"\"\"\n\n\ndef _get_drive_members(\n    google_drive_connector: GoogleDriveConnector,\n    admin_service: AdminService,\n) -> dict[str, tuple[set[str], set[str]]]:\n    \"\"\"\n    This builds a map of drive ids to their members (group and user emails).\n    E.g. {\n        \"drive_id_1\": ({\"group_email_1\"}, {\"user_email_1\", \"user_email_2\"}),\n        \"drive_id_2\": ({\"group_email_3\"}, {\"user_email_3\"}),\n    }\n    \"\"\"\n\n    # fetches shared drives only\n    drive_ids = google_drive_connector.get_all_drive_ids()\n\n    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]] = {}\n    drive_service = get_drive_service(\n        google_drive_connector.creds,\n        google_drive_connector.primary_admin_email,\n    )\n\n    admin_user_info = (\n        admin_service.users()\n        .get(userKey=google_drive_connector.primary_admin_email)\n        .execute()\n    )\n    is_admin = admin_user_info.get(\"isAdmin\", False) or admin_user_info.get(\n        \"isDelegatedAdmin\", False\n    )\n\n    for drive_id in drive_ids:\n        group_emails: set[str] = set()\n        user_emails: set[str] = set()\n\n        try:\n            for permission in execute_paginated_retrieval(\n                drive_service.permissions().list,\n                list_key=\"permissions\",\n                fileId=drive_id,\n                fields=\"permissions(emailAddress, type),nextPageToken\",\n                supportsAllDrives=True,\n                # can only set `useDomainAdminAccess` to true if the user\n                # is an admin\n                useDomainAdminAccess=is_admin,\n            ):\n                # NOTE: don't need to check for PermissionType.ANYONE since\n                # you can't share a drive with the internet\n                if permission[\"type\"] == PermissionType.GROUP:\n                    group_emails.add(permission[\"emailAddress\"])\n                elif permission[\"type\"] == PermissionType.USER:\n                    user_emails.add(permission[\"emailAddress\"])\n        except HttpError as e:\n            if e.status_code == 404:\n                logger.warning(\n                    f\"Error getting permissions for drive id {drive_id}. \"\n                    f\"User '{google_drive_connector.primary_admin_email}' likely \"\n                    f\"does not have access to this drive. Exception: {e}\"\n                )\n            else:\n                raise e\n\n        drive_id_to_members_map[drive_id] = (group_emails, user_emails)\n    return drive_id_to_members_map\n\n\ndef _drive_member_map_to_onyx_groups(\n    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],\n    group_email_to_member_emails_map: dict[str, list[str]],\n) -> Generator[ExternalUserGroup, None, None]:\n    \"\"\"The `user_emails` for the Shared Drive should be all individuals in the\n    Shared Drive + the union of all flattened group emails.\"\"\"\n    for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():\n        drive_member_emails: set[str] = user_emails\n        for group_email in group_emails:\n            if group_email not in group_email_to_member_emails_map:\n                logger.warning(\n                    f\"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map\"\n                )\n                continue\n            drive_member_emails.update(group_email_to_member_emails_map[group_email])\n        yield ExternalUserGroup(\n            id=drive_id,\n            user_emails=list(drive_member_emails),\n        )\n\n\ndef _get_all_google_groups(\n    admin_service: AdminService,\n    google_domain: str,\n) -> set[str]:\n    \"\"\"\n    This gets all the group emails.\n    \"\"\"\n    group_emails: set[str] = set()\n    for group in execute_paginated_retrieval(\n        admin_service.groups().list,\n        list_key=\"groups\",\n        domain=google_domain,\n        fields=\"groups(email),nextPageToken\",\n    ):\n        group_emails.add(group[\"email\"])\n    return group_emails\n\n\ndef _google_group_to_onyx_group(\n    admin_service: AdminService,\n    group_email: str,\n) -> ExternalUserGroup:\n    \"\"\"\n    This maps google group emails to their member emails.\n    \"\"\"\n    group_member_emails: set[str] = set()\n    for member in execute_paginated_retrieval(\n        admin_service.members().list,\n        list_key=\"members\",\n        groupKey=group_email,\n        fields=\"members(email),nextPageToken\",\n    ):\n        group_member_emails.add(member[\"email\"])\n\n    return ExternalUserGroup(\n        id=group_email,\n        user_emails=list(group_member_emails),\n    )\n\n\ndef _map_group_email_to_member_emails(\n    admin_service: AdminService,\n    group_emails: set[str],\n) -> dict[str, set[str]]:\n    \"\"\"\n    This maps group emails to their member emails.\n    \"\"\"\n    group_to_member_map: dict[str, set[str]] = {}\n    for group_email in group_emails:\n        group_member_emails: set[str] = set()\n        for member in execute_paginated_retrieval(\n            admin_service.members().list,\n            list_key=\"members\",\n            groupKey=group_email,\n            fields=\"members(email),nextPageToken\",\n        ):\n            group_member_emails.add(member[\"email\"])\n\n        group_to_member_map[group_email] = group_member_emails\n    return group_to_member_map\n\n\ndef _build_onyx_groups(\n    drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],\n    group_email_to_member_emails_map: dict[str, set[str]],\n    folder_info: list[FolderInfo],\n) -> list[ExternalUserGroup]:\n    onyx_groups: list[ExternalUserGroup] = []\n\n    # Convert all drive member definitions to onyx groups\n    # This is because having drive level access means you have\n    # irrevocable access to all the files in the drive.\n    for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():\n        drive_member_emails: set[str] = user_emails\n        for group_email in group_emails:\n            if group_email not in group_email_to_member_emails_map:\n                logger.warning(\n                    f\"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map\"\n                )\n                continue\n            drive_member_emails.update(group_email_to_member_emails_map[group_email])\n        onyx_groups.append(\n            ExternalUserGroup(\n                id=drive_id,\n                user_emails=list(drive_member_emails),\n            )\n        )\n\n    # Convert all folder permissions to onyx groups\n    for folder in folder_info:\n        anyone_can_access = False\n        folder_member_emails: set[str] = set()\n        for permission in folder.permissions:\n            if permission.type == PermissionType.USER:\n                if permission.email_address is None:\n                    logger.warning(\n                        f\"User email is None for folder {folder.id} permission {permission}\"\n                    )\n                    continue\n                folder_member_emails.add(permission.email_address)\n            elif permission.type == PermissionType.GROUP:\n                if permission.email_address not in group_email_to_member_emails_map:\n                    logger.warning(\n                        f\"Group email {permission.email_address} for folder {folder.id} \"\n                        \"not found in group_email_to_member_emails_map\"\n                    )\n                    continue\n                folder_member_emails.update(\n                    group_email_to_member_emails_map[permission.email_address]\n                )\n            elif permission.type == PermissionType.ANYONE:\n                anyone_can_access = True\n\n        onyx_groups.append(\n            ExternalUserGroup(\n                id=folder.id,\n                user_emails=list(folder_member_emails),\n                gives_anyone_access=anyone_can_access,\n            )\n        )\n\n    # Convert all group member definitions to onyx groups\n    for group_email, member_emails in group_email_to_member_emails_map.items():\n        onyx_groups.append(\n            ExternalUserGroup(\n                id=group_email,\n                user_emails=list(member_emails),\n            )\n        )\n\n    return onyx_groups\n\n\ndef gdrive_group_sync(\n    tenant_id: str,  # noqa: ARG001\n    cc_pair: ConnectorCredentialPair,\n) -> Generator[ExternalUserGroup, None, None]:\n    # Initialize connector and build credential/service objects\n    google_drive_connector = GoogleDriveConnector(\n        **cc_pair.connector.connector_specific_config\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    google_drive_connector.load_credentials(credential_json)\n    admin_service = get_admin_service(\n        google_drive_connector.creds, google_drive_connector.primary_admin_email\n    )\n\n    # Get all drive members\n    drive_id_to_members_map = _get_drive_members(google_drive_connector, admin_service)\n\n    # Get all group emails\n    all_group_emails = _get_all_google_groups(\n        admin_service, google_drive_connector.google_domain\n    )\n\n    # Each google group is an Onyx group, yield those\n    group_email_to_member_emails_map: dict[str, list[str]] = {}\n    for group_email in all_group_emails:\n        onyx_group = _google_group_to_onyx_group(admin_service, group_email)\n        group_email_to_member_emails_map[group_email] = onyx_group.user_emails\n        yield onyx_group\n\n    # Each drive is a group, yield those\n    for onyx_group in _drive_member_map_to_onyx_groups(\n        drive_id_to_members_map, group_email_to_member_emails_map\n    ):\n        yield onyx_group\n\n    # Get all folder permissions\n    folder_info = _get_all_folders(\n        google_drive_connector=google_drive_connector,\n        skip_folders_without_permissions=True,\n    )\n    for folder in folder_info:\n        yield _drive_folder_to_onyx_group(folder, group_email_to_member_emails_map)\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/models.py",
    "content": "from enum import Enum\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\n\nclass PermissionType(str, Enum):\n    USER = \"user\"\n    GROUP = \"group\"\n    DOMAIN = \"domain\"\n    ANYONE = \"anyone\"\n\n\nclass GoogleDrivePermissionDetails(BaseModel):\n    # this is \"file\", \"member\", etc.\n    # different from the `type` field within `GoogleDrivePermission`\n    # Sometimes can be not, although not sure why...\n    permission_type: str | None\n    # this is \"reader\", \"writer\", \"owner\", etc.\n    role: str\n    # this is the id of the parent permission\n    inherited_from: str | None\n\n\nclass GoogleDrivePermission(BaseModel):\n    id: str\n    # groups are also represented as email addresses within Drive\n    # will be None for domain/global permissions\n    email_address: str | None\n    type: PermissionType\n    domain: str | None  # only applies to domain permissions\n    permission_details: GoogleDrivePermissionDetails | None\n    # Whether this permission makes the file discoverable in search\n    # False means \"anyone with the link\" (not searchable/discoverable)\n    # Only applicable for domain/anyone permission types\n    allow_file_discovery: bool | None\n\n    @classmethod\n    def from_drive_permission(\n        cls, drive_permission: dict[str, Any]\n    ) -> \"GoogleDrivePermission\":\n        # we seem to only get details for permissions that are inherited\n        # we can get multiple details if a permission is inherited from multiple\n        permission_details_list = drive_permission.get(\"permissionDetails\", [])\n        permission_details: dict[str, Any] | None = (\n            permission_details_list[0] if permission_details_list else None\n        )\n        return cls(\n            id=drive_permission[\"id\"],\n            email_address=drive_permission.get(\"emailAddress\"),\n            type=PermissionType(drive_permission[\"type\"]),\n            domain=drive_permission.get(\"domain\"),\n            allow_file_discovery=drive_permission.get(\"allowFileDiscovery\"),\n            permission_details=(\n                GoogleDrivePermissionDetails(\n                    permission_type=permission_details.get(\"type\"),\n                    role=permission_details.get(\"role\", \"\"),\n                    inherited_from=permission_details.get(\"inheritedFrom\"),\n                )\n                if permission_details\n                else None\n            ),\n        )\n\n    @property\n    def inherited_from(self) -> str | None:\n        if self.permission_details:\n            return self.permission_details.inherited_from\n        return None\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/google_drive/permission_retrieval.py",
    "content": "from retry import retry\n\nfrom ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@retry(tries=3, delay=2, backoff=2)\ndef get_permissions_by_ids(\n    drive_service: GoogleDriveService,\n    doc_id: str,\n    permission_ids: list[str],\n) -> list[GoogleDrivePermission]:\n    \"\"\"\n    Fetches permissions for a document based on a list of permission IDs.\n\n    Args:\n        drive_service: The Google Drive service instance\n        doc_id: The ID of the document to fetch permissions for\n        permission_ids: A list of permission IDs to filter by\n\n    Returns:\n        A list of GoogleDrivePermission objects matching the provided permission IDs\n    \"\"\"\n    if not permission_ids:\n        return []\n\n    # Create a set for faster lookup\n    permission_id_set = set(permission_ids)\n\n    # Fetch all permissions for the document\n    fetched_permissions = execute_paginated_retrieval(\n        retrieval_function=drive_service.permissions().list,\n        list_key=\"permissions\",\n        fileId=doc_id,\n        fields=\"permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails),nextPageToken\",\n        supportsAllDrives=True,\n        continue_on_404_or_403=True,\n    )\n\n    # Filter permissions by ID and convert to GoogleDrivePermission objects\n    filtered_permissions = []\n    for permission in fetched_permissions:\n        permission_id = permission.get(\"id\")\n        if permission_id in permission_id_set:\n            google_drive_permission = GoogleDrivePermission.from_drive_permission(\n                permission\n            )\n            filtered_permissions.append(google_drive_permission)\n\n    # Log if we couldn't find all requested permission IDs\n    if len(filtered_permissions) < len(permission_ids):\n        missing_ids = permission_id_set - {p.id for p in filtered_permissions if p.id}\n        logger.warning(\n            f\"Could not find all requested permission IDs for document {doc_id}. Missing IDs: {missing_ids}\"\n        )\n\n    return filtered_permissions\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/jira/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/external_permissions/jira/doc_sync.py",
    "content": "from collections.abc import Generator\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.utils import generic_doc_sync\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.jira.connector import JiraConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nJIRA_DOC_SYNC_TAG = \"jira_doc_sync\"\n\n\ndef jira_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> Generator[ElementExternalAccess, None, None]:\n    jira_connector = JiraConnector(\n        **cc_pair.connector.connector_specific_config,\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    jira_connector.load_credentials(credential_json)\n\n    yield from generic_doc_sync(\n        cc_pair=cc_pair,\n        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        callback=callback,\n        doc_source=DocumentSource.JIRA,\n        slim_connector=jira_connector,\n        label=JIRA_DOC_SYNC_TAG,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/jira/group_sync.py",
    "content": "from collections.abc import Generator\nfrom typing import Any\n\nfrom jira import JIRA\nfrom jira.exceptions import JIRAError\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom onyx.connectors.jira.utils import build_jira_client\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_ATLASSIAN_ACCOUNT_TYPE = \"atlassian\"\n_GROUP_MEMBER_PAGE_SIZE = 50\n\n# The GET /group/member endpoint was introduced in Jira 6.0.\n# Jira versions older than 6.0 do not have group management REST APIs at all.\n_MIN_JIRA_VERSION_FOR_GROUP_MEMBER = \"6.0\"\n\n\ndef _fetch_group_member_page(\n    jira_client: JIRA,\n    group_name: str,\n    start_at: int,\n) -> dict[str, Any]:\n    \"\"\"Fetch a single page from the non-deprecated GET /group/member endpoint.\n\n    The old GET /group endpoint (used by jira_client.group_members()) is deprecated\n    and decommissioned in Jira Server 10.3+. This uses the replacement endpoint\n    directly via the library's internal _get_json helper, following the same pattern\n    as enhanced_search_ids / bulk_fetch_issues in connector.py.\n\n    There is an open PR to the library to switch to this endpoint since last year:\n    https://github.com/pycontribs/jira/pull/2356\n    so once it is merged and released, we can switch to using the library function.\n    \"\"\"\n    try:\n        return jira_client._get_json(\n            \"group/member\",\n            params={\n                \"groupname\": group_name,\n                \"includeInactiveUsers\": \"false\",\n                \"startAt\": start_at,\n                \"maxResults\": _GROUP_MEMBER_PAGE_SIZE,\n            },\n        )\n    except JIRAError as e:\n        if e.status_code == 404:\n            raise RuntimeError(\n                f\"GET /group/member returned 404 for group '{group_name}'. \"\n                f\"This endpoint requires Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}+. \"\n                f\"If you are running a self-hosted Jira instance, please upgrade \"\n                f\"to at least Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}.\"\n            ) from e\n        raise\n\n\ndef _get_group_member_emails(\n    jira_client: JIRA,\n    group_name: str,\n) -> set[str]:\n    \"\"\"Get all member emails for a single Jira group.\n\n    Uses the non-deprecated GET /group/member endpoint which returns full user\n    objects including accountType, so we can filter out app/customer accounts\n    without making separate user() calls.\n    \"\"\"\n    emails: set[str] = set()\n    start_at = 0\n\n    while True:\n        try:\n            page = _fetch_group_member_page(jira_client, group_name, start_at)\n        except Exception as e:\n            logger.error(f\"Error fetching members for group {group_name}: {e}\")\n            raise\n\n        members: list[dict[str, Any]] = page.get(\"values\", [])\n        for member in members:\n            account_type = member.get(\"accountType\")\n            # On Jira DC < 9.0, accountType is absent; include those users.\n            # On Cloud / DC 9.0+, filter to real user accounts only.\n            if account_type is not None and account_type != _ATLASSIAN_ACCOUNT_TYPE:\n                continue\n\n            email = member.get(\"emailAddress\")\n            if email:\n                emails.add(email)\n            else:\n                logger.warning(\n                    f\"Atlassian user {member.get('accountId', 'unknown')} in group {group_name} has no visible email address\"\n                )\n\n        if page.get(\"isLast\", True) or not members:\n            break\n        start_at += len(members)\n\n    return emails\n\n\ndef jira_group_sync(\n    tenant_id: str,  # noqa: ARG001\n    cc_pair: ConnectorCredentialPair,\n) -> Generator[ExternalUserGroup, None, None]:\n    \"\"\"Sync Jira groups and their members, yielding one group at a time.\n\n    Streams group-by-group rather than accumulating all groups in memory.\n    \"\"\"\n    jira_base_url = cc_pair.connector.connector_specific_config.get(\"jira_base_url\", \"\")\n    scoped_token = cc_pair.connector.connector_specific_config.get(\n        \"scoped_token\", False\n    )\n\n    if not jira_base_url:\n        raise ValueError(\"No jira_base_url found in connector config\")\n\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    jira_client = build_jira_client(\n        credentials=credential_json,\n        jira_base=jira_base_url,\n        scoped_token=scoped_token,\n    )\n\n    group_names = jira_client.groups()\n    if not group_names:\n        raise ValueError(f\"No groups found for cc_pair_id={cc_pair.id}\")\n\n    logger.info(f\"Found {len(group_names)} groups in Jira\")\n\n    for group_name in group_names:\n        if not group_name:\n            continue\n\n        member_emails = _get_group_member_emails(\n            jira_client=jira_client,\n            group_name=group_name,\n        )\n        if not member_emails:\n            logger.debug(f\"No members found for group {group_name}\")\n            continue\n\n        logger.debug(f\"Found {len(member_emails)} members for group {group_name}\")\n        yield ExternalUserGroup(\n            id=group_name,\n            user_emails=list(member_emails),\n        )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/jira/models.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic.alias_generators import to_camel\n\n\nHolder = dict[str, Any]\n\n\nclass Permission(BaseModel):\n    id: int\n    permission: str\n    holder: Holder | None\n\n\nclass User(BaseModel):\n    account_id: str\n    email_address: str\n    display_name: str\n    active: bool\n\n    model_config = ConfigDict(\n        alias_generator=to_camel,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/jira/page_access.py",
    "content": "from collections import defaultdict\n\nfrom jira import JIRA\nfrom jira.resources import PermissionScheme\nfrom pydantic import ValidationError\n\nfrom ee.onyx.external_permissions.jira.models import Holder\nfrom ee.onyx.external_permissions.jira.models import Permission\nfrom ee.onyx.external_permissions.jira.models import User\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.utils.logger import setup_logger\n\nHolderMap = dict[str, list[Holder]]\n\n\nlogger = setup_logger()\n\n\ndef _get_role_id(holder: Holder) -> str | None:\n    return holder.get(\"value\") or holder.get(\"parameter\")\n\n\ndef _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:\n    \"\"\"\n    A \"Holder\" in JIRA is a person / entity who \"holds\" the corresponding permission.\n    It can have different types. They can be one of (but not limited to):\n        - user (an explicitly whitelisted user)\n        - projectRole (for project level \"roles\")\n        - reporter (the reporter of an issue)\n\n    A \"Holder\" usually has following structure:\n        - `{ \"type\": \"user\", \"value\": \"$USER_ID\", \"user\": { .. }, .. }`\n        - `{ \"type\": \"projectRole\", \"value\": \"$PROJECT_ID\", ..  }`\n\n    When we fetch the PermissionSchema from JIRA, we retrieve a list of \"Holder\"s.\n    The list of \"Holder\"s can have multiple \"Holder\"s of the same type in the list (e.g., you can have two `\"type\": \"user\"`s in\n    there, each corresponding to a different user).\n    This function constructs a map of \"Holder\" types to a list of the \"Holder\"s which contained that type.\n\n    Returns:\n        A dict from the \"Holder\" type to the actual \"Holder\" instance.\n\n    Example:\n        ```\n        {\n            \"user\": [\n                { \"type\": \"user\", \"value\": \"10000\", \"user\": { .. }, .. },\n                { \"type\": \"user\", \"value\": \"10001\", \"user\": { .. }, .. },\n            ],\n            \"projectRole\": [\n                { \"type\": \"projectRole\", \"value\": \"10010\", ..  },\n                { \"type\": \"projectRole\", \"value\": \"10011\", ..  },\n            ],\n            \"applicationRole\": [\n                { \"type\": \"applicationRole\" },\n            ],\n            ..\n        }\n        ```\n    \"\"\"\n\n    holder_map: defaultdict[str, list[Holder]] = defaultdict(list)\n\n    for raw_perm in permissions:\n        if not hasattr(raw_perm, \"raw\"):\n            logger.warning(f\"Expected a 'raw' field, but none was found: {raw_perm=}\")\n            continue\n\n        permission = Permission(**raw_perm.raw)\n\n        # We only care about ability to browse through projects + issues (not other permissions such as read/write).\n        if permission.permission != \"BROWSE_PROJECTS\":\n            continue\n\n        # In order to associate this permission to some Atlassian entity, we need the \"Holder\".\n        # If this doesn't exist, then we cannot associate this permission to anyone; just skip.\n        if not permission.holder:\n            logger.warning(\n                f\"Expected to find a permission holder, but none was found: {permission=}\"\n            )\n            continue\n\n        type = permission.holder.get(\"type\")\n        if not type:\n            logger.warning(\n                f\"Expected to find the type of permission holder, but none was found: {permission=}\"\n            )\n            continue\n\n        holder_map[type].append(permission.holder)\n\n    return holder_map\n\n\ndef _get_user_emails(user_holders: list[Holder]) -> list[str]:\n    emails = []\n\n    for user_holder in user_holders:\n        if \"user\" not in user_holder:\n            continue\n        raw_user_dict = user_holder[\"user\"]\n\n        try:\n            user_model = User.model_validate(raw_user_dict)\n        except ValidationError:\n            logger.error(\n                \"Expected to be able to serialize the raw-user-dict into an instance of `User`, but validation failed;\"\n                f\"{raw_user_dict=}\"\n            )\n            continue\n\n        emails.append(user_model.email_address)\n\n    return emails\n\n\ndef _get_user_emails_and_groups_from_project_roles(\n    jira_client: JIRA,\n    jira_project: str,\n    project_role_holders: list[Holder],\n) -> tuple[list[str], list[str]]:\n    \"\"\"\n    Get user emails and group names from project roles.\n    Returns a tuple of (emails, group_names).\n    \"\"\"\n    # Get role IDs - Cloud uses \"value\", Data Center uses \"parameter\"\n    role_ids = []\n    for holder in project_role_holders:\n        role_id = _get_role_id(holder)\n        if role_id:\n            role_ids.append(role_id)\n        else:\n            logger.warning(f\"No value or parameter in projectRole holder: {holder}\")\n\n    roles = [\n        jira_client.project_role(project=jira_project, id=role_id)\n        for role_id in role_ids\n    ]\n\n    emails = []\n    groups = []\n\n    for role in roles:\n        if not hasattr(role, \"actors\"):\n            logger.warning(f\"Project role {role} has no actors attribute\")\n            continue\n\n        for actor in role.actors:\n            # Handle group actors\n            if hasattr(actor, \"actorGroup\"):\n                group_name = getattr(actor.actorGroup, \"name\", None) or getattr(\n                    actor.actorGroup, \"displayName\", None\n                )\n                if group_name:\n                    groups.append(group_name)\n                continue\n\n            # Handle user actors\n            if hasattr(actor, \"actorUser\"):\n                account_id = getattr(actor.actorUser, \"accountId\", None)\n                if not account_id:\n                    logger.error(f\"No accountId in actorUser: {actor.actorUser}\")\n                    continue\n\n                user = jira_client.user(id=account_id)\n                if not hasattr(user, \"accountType\") or user.accountType != \"atlassian\":\n                    logger.info(\n                        f\"Skipping user {account_id} because it is not an atlassian user\"\n                    )\n                    continue\n\n                if not hasattr(user, \"emailAddress\"):\n                    msg = f\"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}\"\n                    if hasattr(user, \"displayName\"):\n                        msg += f\" {actor.displayName=}\"\n                    logger.warning(msg)\n                    continue\n\n                emails.append(user.emailAddress)\n                continue\n\n            logger.debug(f\"Skipping actor type: {actor}\")\n\n    return emails, groups\n\n\ndef _build_external_access_from_holder_map(\n    jira_client: JIRA, jira_project: str, holder_map: HolderMap\n) -> ExternalAccess:\n    \"\"\"\n    Build ExternalAccess from the holder map.\n\n    Holder types handled:\n        - \"anyone\": Public project, anyone can access\n        - \"applicationRole\": All users with a Jira license can access (treated as public)\n        - \"user\": Specific users with access\n        - \"projectRole\": Project roles containing users and/or groups\n        - \"group\": Groups directly assigned in the permission scheme\n    \"\"\"\n    # Public access - anyone can view\n    if \"anyone\" in holder_map:\n        return ExternalAccess(\n            external_user_emails=set(), external_user_group_ids=set(), is_public=True\n        )\n\n    # applicationRole means all users with a Jira license can access - treat as public\n    if \"applicationRole\" in holder_map:\n        return ExternalAccess(\n            external_user_emails=set(), external_user_group_ids=set(), is_public=True\n        )\n\n    # Get emails from explicit user holders\n    user_emails = (\n        _get_user_emails(user_holders=holder_map[\"user\"])\n        if \"user\" in holder_map\n        else []\n    )\n\n    # Get emails and groups from project roles\n    project_role_user_emails: list[str] = []\n    project_role_groups: list[str] = []\n    if \"projectRole\" in holder_map:\n        project_role_user_emails, project_role_groups = (\n            _get_user_emails_and_groups_from_project_roles(\n                jira_client=jira_client,\n                jira_project=jira_project,\n                project_role_holders=holder_map[\"projectRole\"],\n            )\n        )\n\n    # Get groups directly assigned in permission scheme (common in Data Center)\n    # Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}\n    direct_groups: list[str] = []\n    if \"group\" in holder_map:\n        for group_holder in holder_map[\"group\"]:\n            group_name = _get_role_id(group_holder)\n            if group_name:\n                direct_groups.append(group_name)\n            else:\n                logger.error(f\"No parameter/value in group holder: {group_holder}\")\n\n    external_user_emails = set(user_emails + project_role_user_emails)\n    external_user_group_ids = set(project_role_groups + direct_groups)\n\n    return ExternalAccess(\n        external_user_emails=external_user_emails,\n        external_user_group_ids=external_user_group_ids,\n        is_public=False,\n    )\n\n\ndef get_project_permissions(\n    jira_client: JIRA,\n    jira_project: str,\n    add_prefix: bool = False,\n) -> ExternalAccess | None:\n    \"\"\"\n    Get project permissions from Jira.\n\n    add_prefix: When True, prefix group IDs with source type (for indexing path).\n               When False (default), leave unprefixed (for permission sync path).\n    \"\"\"\n    project_permissions: PermissionScheme = jira_client.project_permissionscheme(\n        project=jira_project\n    )\n\n    if not hasattr(project_permissions, \"permissions\"):\n        logger.error(f\"Project {jira_project} has no permissions attribute\")\n        return None\n\n    if not isinstance(project_permissions.permissions, list):\n        logger.error(f\"Project {jira_project} permissions is not a list\")\n        return None\n\n    holder_map = _build_holder_map(permissions=project_permissions.permissions)\n\n    external_access = _build_external_access_from_holder_map(\n        jira_client=jira_client, jira_project=jira_project, holder_map=holder_map\n    )\n\n    # Prefix group IDs with source type if requested (for indexing path)\n    if add_prefix and external_access and external_access.external_user_group_ids:\n        prefixed_groups = {\n            build_ext_group_name_for_onyx(g, DocumentSource.JIRA)\n            for g in external_access.external_user_group_ids\n        }\n        return ExternalAccess(\n            external_user_emails=external_access.external_user_emails,\n            external_user_group_ids=prefixed_groups,\n            is_public=external_access.is_public,\n        )\n\n    return external_access\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/perm_sync_types.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Generator\nfrom typing import Optional\nfrom typing import Protocol\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup  # noqa\nfrom onyx.access.models import DocExternalAccess  # noqa\nfrom onyx.access.models import ElementExternalAccess  # noqa\nfrom onyx.access.models import NodeExternalAccess  # noqa\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.models import ConnectorCredentialPair  # noqa\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import SortOrder\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface  # noqa\n\n\nclass FetchAllDocumentsFunction(Protocol):\n    \"\"\"Protocol for a function that fetches documents for a connector credential pair.\n\n    This protocol defines the interface for functions that retrieve documents\n    from the database, typically used in permission synchronization workflows.\n    \"\"\"\n\n    def __call__(\n        self,\n        sort_order: SortOrder | None,\n    ) -> list[DocumentRow]:\n        \"\"\"\n        Fetches documents for a connector credential pair.\n        \"\"\"\n        ...\n\n\nclass FetchAllDocumentsIdsFunction(Protocol):\n    \"\"\"Protocol for a function that fetches document IDs for a connector credential pair.\n\n    This protocol defines the interface for functions that retrieve document IDs\n    from the database, typically used in permission synchronization workflows.\n    \"\"\"\n\n    def __call__(\n        self,\n    ) -> list[str]:\n        \"\"\"\n        Fetches document IDs for a connector credential pair.\n        \"\"\"\n        ...\n\n\n# Defining the input/output types for the sync functions\nDocSyncFuncType = Callable[\n    [\n        ConnectorCredentialPair,\n        FetchAllDocumentsFunction,\n        FetchAllDocumentsIdsFunction,\n        Optional[IndexingHeartbeatInterface],\n    ],\n    Generator[ElementExternalAccess, None, None],\n]\n\nGroupSyncFuncType = Callable[\n    [\n        str,  # tenant_id\n        ConnectorCredentialPair,  # cc_pair\n    ],\n    Generator[ExternalUserGroup, None, None],\n]\n\n# list of chunks to be censored and the user email. returns censored chunks\nCensoringFuncType = Callable[[list[InferenceChunk], str], list[InferenceChunk]]\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/post_query_censoring.py",
    "content": "from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs\nfrom ee.onyx.external_permissions.sync_params import get_all_censoring_enabled_sources\nfrom ee.onyx.external_permissions.sync_params import get_source_perm_sync_config\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.pipeline import InferenceChunk\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _get_all_censoring_enabled_sources() -> set[DocumentSource]:\n    \"\"\"\n    Returns the set of sources that have censoring enabled.\n    This is based on if the access_type is set to sync and the connector\n    source has a censoring config.\n\n    NOTE: This means if there is a source has a single cc_pair that is sync,\n    all chunks for that source will be censored, even if the connector that\n    indexed that chunk is not sync. This was done to avoid getting the cc_pair\n    for every single chunk.\n    \"\"\"\n    all_censoring_enabled_sources = get_all_censoring_enabled_sources()\n    with get_session_with_current_tenant() as db_session:\n        enabled_sync_connectors = get_all_auto_sync_cc_pairs(db_session)\n        return {\n            cc_pair.connector.source\n            for cc_pair in enabled_sync_connectors\n            if cc_pair.connector.source in all_censoring_enabled_sources\n        }\n\n\n# NOTE: This is only called if ee is enabled.\ndef _post_query_chunk_censoring(\n    chunks: list[InferenceChunk],\n    user: User,\n) -> list[InferenceChunk]:\n    \"\"\"\n    This function checks all chunks to see if they need to be sent to a censoring\n    function. If they do, it sends them to the censoring function and returns the\n    censored chunks. If they don't, it returns the original chunks.\n    \"\"\"\n    sources_to_censor = _get_all_censoring_enabled_sources()\n\n    # Anonymous users can only access public (non-permission-synced) content\n    if user.is_anonymous:\n        return [chunk for chunk in chunks if chunk.source_type not in sources_to_censor]\n\n    final_chunk_dict: dict[str, InferenceChunk] = {}\n    chunks_to_process: dict[DocumentSource, list[InferenceChunk]] = {}\n    for chunk in chunks:\n        # Separate out chunks that require permission post-processing by source\n        if chunk.source_type in sources_to_censor:\n            chunks_to_process.setdefault(chunk.source_type, []).append(chunk)\n        else:\n            final_chunk_dict[chunk.unique_id] = chunk\n\n    # For each source, filter out the chunks using the permission\n    # check function for that source\n    # TODO: Use a threadpool/multiprocessing to process the sources in parallel\n    for source, chunks_for_source in chunks_to_process.items():\n        sync_config = get_source_perm_sync_config(source)\n        if sync_config is None or sync_config.censoring_config is None:\n            raise ValueError(f\"No sync config found for {source}\")\n\n        censor_chunks_for_source = sync_config.censoring_config.chunk_censoring_func\n        try:\n            censored_chunks = censor_chunks_for_source(chunks_for_source, user.email)\n        except Exception as e:\n            logger.exception(\n                f\"Failed to censor chunks for source {source} so throwing out all chunks for this source and continuing: {e}\"\n            )\n            continue\n\n        for censored_chunk in censored_chunks:\n            final_chunk_dict[censored_chunk.unique_id] = censored_chunk\n\n    # IMPORTANT: make sure to retain the same ordering as the original `chunks` passed in\n    final_chunk_list: list[InferenceChunk] = []\n    for chunk in chunks:\n        # only if the chunk is in the final censored chunks, add it to the final list\n        # if it is missing, that means it was intentionally left out\n        if chunk.unique_id in final_chunk_dict:\n            final_chunk_list.append(final_chunk_dict[chunk.unique_id])\n\n    return final_chunk_list\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/salesforce/postprocessing.py",
    "content": "import time\n\nfrom ee.onyx.db.external_perm import fetch_external_groups_for_user_email_and_group_ids\nfrom ee.onyx.external_permissions.salesforce.utils import (\n    get_any_salesforce_client_for_doc_id,\n)\nfrom ee.onyx.external_permissions.salesforce.utils import get_objects_access_for_user_id\nfrom ee.onyx.external_permissions.salesforce.utils import (\n    get_salesforce_user_id_from_email,\n)\nfrom onyx.configs.app_configs import BLURB_SIZE\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# Types\nChunkKey = tuple[str, int]  # (doc_id, chunk_id)\nContentRange = tuple[int, int | None]  # (start_index, end_index) None means to the end\n\n\n# NOTE: Used for testing timing\ndef _get_dummy_object_access_map(\n    object_ids: set[str],\n    user_email: str,  # noqa: ARG001\n    chunks: list[InferenceChunk],  # noqa: ARG001\n) -> dict[str, bool]:\n    time.sleep(0.15)\n    # return {object_id: True for object_id in object_ids}\n    import random\n\n    return {object_id: random.choice([True, False]) for object_id in object_ids}\n\n\ndef _get_objects_access_for_user_email_from_salesforce(\n    object_ids: set[str],\n    user_email: str,\n    chunks: list[InferenceChunk],\n) -> dict[str, bool] | None:\n    \"\"\"\n    This function wraps the salesforce call as we may want to change how this\n    is done in the future. (E.g. replace it with the above function)\n    \"\"\"\n    # This is cached in the function so the first query takes an extra 0.1-0.3 seconds\n    # but subsequent queries for this source are essentially instant\n    first_doc_id = chunks[0].document_id\n    with get_session_with_current_tenant() as db_session:\n        salesforce_client = get_any_salesforce_client_for_doc_id(\n            db_session, first_doc_id\n        )\n\n    # This is cached in the function so the first query takes an extra 0.1-0.3 seconds\n    # but subsequent queries by the same user are essentially instant\n    start_time = time.monotonic()\n    user_id = get_salesforce_user_id_from_email(salesforce_client, user_email)\n    end_time = time.monotonic()\n    logger.info(\n        f\"Time taken to get Salesforce user ID: {end_time - start_time} seconds\"\n    )\n    if user_id is None:\n        logger.warning(f\"User '{user_email}' not found in Salesforce\")\n        return None\n\n    # This is the only query that is not cached in the function\n    # so it takes 0.1-0.2 seconds total\n    object_id_to_access = get_objects_access_for_user_id(\n        salesforce_client, user_id, list(object_ids)\n    )\n    logger.debug(f\"Object ID to access: {object_id_to_access}\")\n    return object_id_to_access\n\n\ndef _extract_salesforce_object_id_from_url(url: str) -> str:\n    return url.split(\"/\")[-1]\n\n\ndef _get_object_ranges_for_chunk(\n    chunk: InferenceChunk,\n) -> dict[str, list[ContentRange]]:\n    \"\"\"\n    Given a chunk, return a dictionary of salesforce object ids and the content ranges\n    for that object id in the current chunk\n    \"\"\"\n    if chunk.source_links is None:\n        return {}\n\n    object_ranges: dict[str, list[ContentRange]] = {}\n    end_index = None\n    descending_source_links = sorted(\n        chunk.source_links.items(), key=lambda x: x[0], reverse=True\n    )\n    for start_index, url in descending_source_links:\n        object_id = _extract_salesforce_object_id_from_url(url)\n        if object_id not in object_ranges:\n            object_ranges[object_id] = []\n        object_ranges[object_id].append((start_index, end_index))\n        end_index = start_index\n    return object_ranges\n\n\ndef _create_empty_censored_chunk(uncensored_chunk: InferenceChunk) -> InferenceChunk:\n    \"\"\"\n    Create a copy of the unfiltered chunk where potentially sensitive content is removed\n    to be added later if the user has access to each of the sub-objects\n    \"\"\"\n    empty_censored_chunk = InferenceChunk(\n        **uncensored_chunk.model_dump(),\n    )\n    empty_censored_chunk.content = \"\"\n    empty_censored_chunk.blurb = \"\"\n    empty_censored_chunk.source_links = {}\n    return empty_censored_chunk\n\n\ndef _update_censored_chunk(\n    censored_chunk: InferenceChunk,\n    uncensored_chunk: InferenceChunk,\n    content_range: ContentRange,\n) -> InferenceChunk:\n    \"\"\"\n    Update the filtered chunk with the content and source links from the unfiltered chunk using the content ranges\n    \"\"\"\n    start_index, end_index = content_range\n\n    # Update the content of the filtered chunk\n    permitted_content = uncensored_chunk.content[start_index:end_index]\n    permitted_section_start_index = len(censored_chunk.content)\n    censored_chunk.content = permitted_content + censored_chunk.content\n\n    # Update the source links of the filtered chunk\n    if uncensored_chunk.source_links is not None:\n        if censored_chunk.source_links is None:\n            censored_chunk.source_links = {}\n        link_content = uncensored_chunk.source_links[start_index]\n        censored_chunk.source_links[permitted_section_start_index] = link_content\n\n    # Update the blurb of the filtered chunk\n    censored_chunk.blurb = censored_chunk.content[:BLURB_SIZE]\n\n    return censored_chunk\n\n\n# TODO: Generalize this to other sources\ndef censor_salesforce_chunks(\n    chunks: list[InferenceChunk],\n    user_email: str,\n    # This is so we can provide a mock access map for testing\n    access_map: dict[str, bool] | None = None,\n) -> list[InferenceChunk]:\n    # object_id -> list[((doc_id, chunk_id), (start_index, end_index))]\n    object_to_content_map: dict[str, list[tuple[ChunkKey, ContentRange]]] = {}\n\n    # (doc_id, chunk_id) -> chunk\n    uncensored_chunks: dict[ChunkKey, InferenceChunk] = {}\n\n    # keep track of all object ids that we have seen to make it easier to get\n    # the access for these object ids\n    object_ids: set[str] = set()\n\n    for chunk in chunks:\n        chunk_key = (chunk.document_id, chunk.chunk_id)\n        # create a dictionary to quickly look up the unfiltered chunk\n        uncensored_chunks[chunk_key] = chunk\n\n        # for each chunk, get a dictionary of object ids and the content ranges\n        # for that object id in the current chunk\n        object_ranges_for_chunk = _get_object_ranges_for_chunk(chunk)\n        for object_id, ranges in object_ranges_for_chunk.items():\n            object_ids.add(object_id)\n            for start_index, end_index in ranges:\n                object_to_content_map.setdefault(object_id, []).append(\n                    (chunk_key, (start_index, end_index))\n                )\n\n    # This is so we can provide a mock access map for testing\n    if access_map is None:\n        access_map = _get_objects_access_for_user_email_from_salesforce(\n            object_ids=object_ids,\n            user_email=user_email,\n            chunks=chunks,\n        )\n        if access_map is None:\n            # If the user is not found in Salesforce, access_map will be None\n            # so we should just return an empty list because no chunks will be\n            # censored\n            return []\n\n    censored_chunks: dict[ChunkKey, InferenceChunk] = {}\n    for object_id, content_list in object_to_content_map.items():\n        # if the user does not have access to the object, or the object is not in the\n        # access_map, do not include its content in the filtered chunks\n        if not access_map.get(object_id, False):\n            continue\n\n        # if we got this far, the user has access to the object so we can create or update\n        # the filtered chunk(s) for this object\n        # NOTE: we only create a censored chunk if the user has access to some\n        # part of the chunk\n        for chunk_key, content_range in content_list:\n            if chunk_key not in censored_chunks:\n                censored_chunks[chunk_key] = _create_empty_censored_chunk(\n                    uncensored_chunks[chunk_key]\n                )\n\n            uncensored_chunk = uncensored_chunks[chunk_key]\n            censored_chunk = _update_censored_chunk(\n                censored_chunk=censored_chunks[chunk_key],\n                uncensored_chunk=uncensored_chunk,\n                content_range=content_range,\n            )\n            censored_chunks[chunk_key] = censored_chunk\n\n    return list(censored_chunks.values())\n\n\n# NOTE: This is not used anywhere.\ndef _get_objects_access_for_user_email(\n    object_ids: set[str], user_email: str\n) -> dict[str, bool]:\n    with get_session_with_current_tenant() as db_session:\n        external_groups = fetch_external_groups_for_user_email_and_group_ids(\n            db_session=db_session,\n            user_email=user_email,\n            # Maybe make a function that adds a salesforce prefix to the group ids\n            group_ids=list(object_ids),\n        )\n        external_group_ids = {group.external_user_group_id for group in external_groups}\n        return {group_id: group_id in external_group_ids for group_id in object_ids}\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/salesforce/utils.py",
    "content": "from simple_salesforce import Salesforce\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.document import get_cc_pairs_for_document\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_ANY_SALESFORCE_CLIENT: Salesforce | None = None\n\n\ndef get_any_salesforce_client_for_doc_id(\n    db_session: Session, doc_id: str\n) -> Salesforce:\n    \"\"\"\n    We create a salesforce client for the first cc_pair for the first doc_id where\n    salesforce censoring is enabled. After that we just cache and reuse the same\n    client for all queries.\n\n    We do this to reduce the number of postgres queries we make at query time.\n\n    This may be problematic if they are using multiple cc_pairs for salesforce.\n    E.g. there are 2 different credential sets for 2 different salesforce cc_pairs\n    but only one has the permissions to access the permissions needed for the query.\n    \"\"\"\n\n    # NOTE: this global seems very very bad\n    global _ANY_SALESFORCE_CLIENT\n    if _ANY_SALESFORCE_CLIENT is None:\n        cc_pairs = get_cc_pairs_for_document(db_session, doc_id)\n        first_cc_pair = cc_pairs[0]\n        credential_json = (\n            first_cc_pair.credential.credential_json.get_value(apply_mask=False)\n            if first_cc_pair.credential.credential_json\n            else {}\n        )\n        _ANY_SALESFORCE_CLIENT = Salesforce(\n            username=credential_json[\"sf_username\"],\n            password=credential_json[\"sf_password\"],\n            security_token=credential_json[\"sf_security_token\"],\n        )\n    return _ANY_SALESFORCE_CLIENT\n\n\ndef _query_salesforce_user_id(sf_client: Salesforce, user_email: str) -> str | None:\n    query = f\"SELECT Id FROM User WHERE Username = '{user_email}' AND IsActive = true\"\n    result = sf_client.query(query)\n    if len(result[\"records\"]) > 0:\n        return result[\"records\"][0][\"Id\"]\n\n    # try emails\n    query = f\"SELECT Id FROM User WHERE Email = '{user_email}' AND IsActive = true\"\n    result = sf_client.query(query)\n    if len(result[\"records\"]) > 0:\n        return result[\"records\"][0][\"Id\"]\n\n    return None\n\n\n# This contains only the user_ids that we have found in Salesforce.\n# If we don't know their user_id, we don't store anything in the cache.\n_CACHED_SF_EMAIL_TO_ID_MAP: dict[str, str] = {}\n\n\ndef get_salesforce_user_id_from_email(\n    sf_client: Salesforce,\n    user_email: str,\n) -> str | None:\n    \"\"\"\n    We cache this so we don't have to query Salesforce for every query and salesforce\n    user IDs never change.\n    Memory usage is fine because we just store 2 small strings per user.\n\n    If the email is not in the cache, we check the local salesforce database for the info.\n    If the user is not found in the local salesforce database, we query Salesforce.\n    Whatever we get back from Salesforce is added to the database.\n    If no user_id is found, we add a NULL_ID_STRING to the database for that email so\n    we don't query Salesforce again (which is slow) but we still check the local salesforce\n    database every query until a user id is found. This is acceptable because the query time\n    is quite fast.\n    If a user_id is created in Salesforce, it will be added to the local salesforce database\n    next time the connector is run. Then that value will be found in this function and cached.\n\n    NOTE: First time this runs, it may be slow if it hasn't already been updated in the local\n    salesforce database. (Around 0.1-0.3 seconds)\n    If it's cached or stored in the local salesforce database, it's fast (<0.001 seconds).\n    \"\"\"\n\n    # NOTE: this global seems bad\n    global _CACHED_SF_EMAIL_TO_ID_MAP\n    if user_email in _CACHED_SF_EMAIL_TO_ID_MAP:\n        if _CACHED_SF_EMAIL_TO_ID_MAP[user_email] is not None:\n            return _CACHED_SF_EMAIL_TO_ID_MAP[user_email]\n\n    # some caching via sqlite existed here before ... check history if interested\n\n    # ...query Salesforce and store the result in the database\n    user_id = _query_salesforce_user_id(sf_client, user_email)\n\n    if user_id is None:\n        return None\n\n    # If the found user_id is real, cache it\n    _CACHED_SF_EMAIL_TO_ID_MAP[user_email] = user_id\n    return user_id\n\n\n_MAX_RECORD_IDS_PER_QUERY = 200\n\n\ndef get_objects_access_for_user_id(\n    salesforce_client: Salesforce,\n    user_id: str,\n    record_ids: list[str],\n) -> dict[str, bool]:\n    \"\"\"\n    Salesforce has a limit of 200 record ids per query. So we just truncate\n    the list of record ids to 200. We only ever retrieve 50 chunks at a time\n    so this should be fine (unlikely that we retrieve all 50 chunks contain\n    4 unique objects).\n    If we decide this isn't acceptable we can use multiple queries but they\n    should be in parallel so query time doesn't get too long.\n    \"\"\"\n    truncated_record_ids = record_ids[:_MAX_RECORD_IDS_PER_QUERY]\n    record_ids_str = \"'\" + \"','\".join(truncated_record_ids) + \"'\"\n    access_query = f\"\"\"\n    SELECT RecordId, HasReadAccess\n    FROM UserRecordAccess\n    WHERE RecordId IN ({record_ids_str})\n    AND UserId = '{user_id}'\n    \"\"\"\n    result = salesforce_client.query_all(access_query)\n    return {record[\"RecordId\"]: record[\"HasReadAccess\"] for record in result[\"records\"]}\n\n\n_CC_PAIR_ID_SALESFORCE_CLIENT_MAP: dict[int, Salesforce] = {}\n_DOC_ID_TO_CC_PAIR_ID_MAP: dict[str, int] = {}\n\n\n# NOTE: This is not used anywhere.\ndef _get_salesforce_client_for_doc_id(db_session: Session, doc_id: str) -> Salesforce:\n    \"\"\"\n    Uses a document id to get the cc_pair that indexed that document and uses the credentials\n    for that cc_pair to create a Salesforce client.\n    Problems:\n    - There may be multiple cc_pairs for a document, and we don't know which one to use.\n        - right now we just use the first one\n    - Building a new Salesforce client for each document is slow.\n    - Memory usage could be an issue as we build these dictionaries.\n    \"\"\"\n    if doc_id not in _DOC_ID_TO_CC_PAIR_ID_MAP:\n        cc_pairs = get_cc_pairs_for_document(db_session, doc_id)\n        first_cc_pair = cc_pairs[0]\n        _DOC_ID_TO_CC_PAIR_ID_MAP[doc_id] = first_cc_pair.id\n\n    cc_pair_id = _DOC_ID_TO_CC_PAIR_ID_MAP[doc_id]\n    if cc_pair_id not in _CC_PAIR_ID_SALESFORCE_CLIENT_MAP:\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=cc_pair_id,\n        )\n        if cc_pair is None:\n            raise ValueError(f\"CC pair {cc_pair_id} not found\")\n        credential_json = (\n            cc_pair.credential.credential_json.get_value(apply_mask=False)\n            if cc_pair.credential.credential_json\n            else {}\n        )\n        _CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id] = Salesforce(\n            username=credential_json[\"sf_username\"],\n            password=credential_json[\"sf_password\"],\n            security_token=credential_json[\"sf_security_token\"],\n        )\n\n    return _CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id]\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/sharepoint/doc_sync.py",
    "content": "from collections.abc import Generator\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.utils import generic_doc_sync\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nSHAREPOINT_DOC_SYNC_TAG = \"sharepoint_doc_sync\"\n\n\ndef sharepoint_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> Generator[ElementExternalAccess, None, None]:\n    sharepoint_connector = SharepointConnector(\n        **cc_pair.connector.connector_specific_config,\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    sharepoint_connector.load_credentials(credential_json)\n\n    yield from generic_doc_sync(\n        cc_pair=cc_pair,\n        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        callback=callback,\n        doc_source=DocumentSource.SHAREPOINT,\n        slim_connector=sharepoint_connector,\n        label=SHAREPOINT_DOC_SYNC_TAG,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/sharepoint/group_sync.py",
    "content": "from collections.abc import Generator\n\nfrom office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    get_sharepoint_external_groups,\n)\nfrom onyx.configs.app_configs import SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION\nfrom onyx.connectors.sharepoint.connector import acquire_token_for_rest\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef sharepoint_group_sync(\n    tenant_id: str,  # noqa: ARG001\n    cc_pair: ConnectorCredentialPair,\n) -> Generator[ExternalUserGroup, None, None]:\n    \"\"\"Sync SharePoint groups and their members\"\"\"\n\n    # Get site URLs from connector config\n    connector_config = cc_pair.connector.connector_specific_config\n\n    # Create SharePoint connector instance and load credentials\n    connector = SharepointConnector(**connector_config)\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    connector.load_credentials(credential_json)\n\n    if not connector.msal_app:\n        raise RuntimeError(\"MSAL app not initialized in connector\")\n\n    if not connector.sp_tenant_domain:\n        raise RuntimeError(\"Tenant domain not initialized in connector\")\n\n    # Get site descriptors from connector (either configured sites or all sites)\n    site_descriptors = connector.site_descriptors or connector.fetch_sites()\n\n    if not site_descriptors:\n        raise RuntimeError(\"No SharePoint sites found for group sync\")\n\n    logger.info(f\"Processing {len(site_descriptors)} sites for group sync\")\n\n    enumerate_all = connector_config.get(\n        \"exhaustive_ad_enumeration\", SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION\n    )\n\n    msal_app = connector.msal_app\n    sp_tenant_domain = connector.sp_tenant_domain\n    sp_domain_suffix = connector.sharepoint_domain_suffix\n    for site_descriptor in site_descriptors:\n        logger.debug(f\"Processing site: {site_descriptor.url}\")\n\n        ctx = ClientContext(site_descriptor.url).with_access_token(\n            lambda: acquire_token_for_rest(msal_app, sp_tenant_domain, sp_domain_suffix)\n        )\n\n        external_groups = get_sharepoint_external_groups(\n            ctx,\n            connector.graph_client,\n            graph_api_base=connector.graph_api_base,\n            get_access_token=connector._get_graph_access_token,\n            enumerate_all_ad_groups=enumerate_all,\n        )\n\n        # Yield each group\n        for group in external_groups:\n            logger.debug(\n                f\"Found group: {group.id} with {len(group.user_emails)} members\"\n            )\n            yield group\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/sharepoint/permission_utils.py",
    "content": "import re\nimport time\nfrom collections import deque\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nimport requests as _requests\nfrom office365.graph_client import GraphClient  # type: ignore[import-untyped]\nfrom office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]\nfrom office365.runtime.client_request import ClientRequestException  # type: ignore\nfrom office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]\nfrom office365.sharepoint.permissions.securable_object import RoleAssignmentCollection  # type: ignore[import-untyped]\nfrom pydantic import BaseModel\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.sharepoint.connector import GRAPH_API_MAX_RETRIES\nfrom onyx.connectors.sharepoint.connector import GRAPH_API_RETRYABLE_STATUSES\nfrom onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE\nfrom onyx.connectors.sharepoint.connector import sleep_and_retry\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# These values represent different types of SharePoint principals used in permission assignments\nUSER_PRINCIPAL_TYPE = 1  # Individual user accounts\nANONYMOUS_USER_PRINCIPAL_TYPE = 3  # Anonymous/unauthenticated users (public access)\nAZURE_AD_GROUP_PRINCIPAL_TYPE = 4  # Azure Active Directory security groups\nSHAREPOINT_GROUP_PRINCIPAL_TYPE = 8  # SharePoint site groups (local to the site)\nMICROSOFT_DOMAIN = \".onmicrosoft\"\n# Limited Access role type, limited access is a travel through permission not a actual permission\nLIMITED_ACCESS_ROLE_TYPES = [1, 9]\nLIMITED_ACCESS_ROLE_NAMES = [\"Limited Access\", \"Web-Only Limited Access\"]\n\n\nAD_GROUP_ENUMERATION_THRESHOLD = 100_000\n\n\ndef _graph_api_get(\n    url: str,\n    get_access_token: Callable[[], str],\n    params: dict[str, str] | None = None,\n) -> dict[str, Any]:\n    \"\"\"Authenticated Graph API GET with retry on transient errors.\"\"\"\n    for attempt in range(GRAPH_API_MAX_RETRIES + 1):\n        access_token = get_access_token()\n        headers = {\"Authorization\": f\"Bearer {access_token}\"}\n        try:\n            resp = _requests.get(\n                url, headers=headers, params=params, timeout=REQUEST_TIMEOUT_SECONDS\n            )\n            if (\n                resp.status_code in GRAPH_API_RETRYABLE_STATUSES\n                and attempt < GRAPH_API_MAX_RETRIES\n            ):\n                wait = min(int(resp.headers.get(\"Retry-After\", str(2**attempt))), 60)\n                logger.warning(\n                    f\"Graph API {resp.status_code} on attempt {attempt + 1}, retrying in {wait}s: {url}\"\n                )\n                time.sleep(wait)\n                continue\n            resp.raise_for_status()\n            return resp.json()\n        except (_requests.ConnectionError, _requests.Timeout, _requests.HTTPError):\n            if attempt < GRAPH_API_MAX_RETRIES:\n                wait = min(2**attempt, 60)\n                logger.warning(\n                    f\"Graph API connection error on attempt {attempt + 1}, retrying in {wait}s: {url}\"\n                )\n                time.sleep(wait)\n                continue\n            raise\n    raise RuntimeError(\n        f\"Graph API request failed after {GRAPH_API_MAX_RETRIES + 1} attempts: {url}\"\n    )\n\n\ndef _iter_graph_collection(\n    initial_url: str,\n    get_access_token: Callable[[], str],\n    params: dict[str, str] | None = None,\n) -> Generator[dict[str, Any], None, None]:\n    \"\"\"Paginate through a Graph API collection, yielding items one at a time.\"\"\"\n    url: str | None = initial_url\n    while url:\n        data = _graph_api_get(url, get_access_token, params)\n        params = None\n        yield from data.get(\"value\", [])\n        url = data.get(\"@odata.nextLink\")\n\n\ndef _normalize_email(email: str) -> str:\n    if MICROSOFT_DOMAIN in email:\n        return email.replace(MICROSOFT_DOMAIN, \"\")\n    return email\n\n\nclass SharepointGroup(BaseModel):\n    model_config = {\"frozen\": True}\n\n    name: str\n    login_name: str\n    principal_type: int\n\n\nclass GroupsResult(BaseModel):\n    groups_to_emails: dict[str, set[str]]\n    found_public_group: bool\n\n\ndef _get_azuread_group_guid_by_name(\n    graph_client: GraphClient, group_name: str\n) -> str | None:\n    try:\n        # Search for groups by display name\n        groups = sleep_and_retry(\n            graph_client.groups.filter(f\"displayName eq '{group_name}'\").get(),\n            \"get_azuread_group_guid_by_name\",\n        )\n\n        if groups and len(groups) > 0:\n            return groups[0].id\n\n        return None\n\n    except Exception as e:\n        logger.error(f\"Failed to get Azure AD group GUID for name {group_name}: {e}\")\n        return None\n\n\ndef _extract_guid_from_claims_token(claims_token: str) -> str | None:\n\n    try:\n        # Pattern to match GUID in claims token\n        # Claims tokens often have format: c:0o.c|provider|GUID_suffix\n        guid_pattern = r\"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\"\n\n        match = re.search(guid_pattern, claims_token, re.IGNORECASE)\n        if match:\n            return match.group(1)\n\n        return None\n\n    except Exception as e:\n        logger.error(f\"Failed to extract GUID from claims token {claims_token}: {e}\")\n        return None\n\n\ndef _get_group_guid_from_identifier(\n    graph_client: GraphClient, identifier: str\n) -> str | None:\n    try:\n        # Check if it's already a GUID\n        guid_pattern = r\"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$\"\n        if re.match(guid_pattern, identifier, re.IGNORECASE):\n            return identifier\n\n        # Check if it's a SharePoint claims token\n        if identifier.startswith(\"c:0\") and \"|\" in identifier:\n            guid = _extract_guid_from_claims_token(identifier)\n            if guid:\n                logger.info(f\"Extracted GUID {guid} from claims token {identifier}\")\n                return guid\n\n        # Try to search by display name as fallback\n        return _get_azuread_group_guid_by_name(graph_client, identifier)\n\n    except Exception as e:\n        logger.error(f\"Failed to get group GUID from identifier {identifier}: {e}\")\n        return None\n\n\ndef _get_security_group_owners(graph_client: GraphClient, group_id: str) -> list[str]:\n    try:\n        # Get group owners using Graph API\n        group = graph_client.groups[group_id]\n        owners = sleep_and_retry(\n            group.owners.get_all(page_loaded=lambda _: None),\n            \"get_security_group_owners\",\n        )\n\n        owner_emails: list[str] = []\n        logger.info(f\"Owners: {owners}\")\n\n        for owner in owners:\n            owner_data = owner.to_json()\n\n            # Extract email from the JSON data\n            mail: str | None = owner_data.get(\"mail\")\n            user_principal_name: str | None = owner_data.get(\"userPrincipalName\")\n\n            # Check if owner is a user and has an email\n            if mail:\n                if MICROSOFT_DOMAIN in mail:\n                    mail = mail.replace(MICROSOFT_DOMAIN, \"\")\n                owner_emails.append(mail)\n            elif user_principal_name:\n                if MICROSOFT_DOMAIN in user_principal_name:\n                    user_principal_name = user_principal_name.replace(\n                        MICROSOFT_DOMAIN, \"\"\n                    )\n                owner_emails.append(user_principal_name)\n\n        logger.info(\n            f\"Retrieved {len(owner_emails)} owners from security group {group_id}\"\n        )\n        return owner_emails\n\n    except Exception as e:\n        logger.error(f\"Failed to get security group owners for group {group_id}: {e}\")\n        return []\n\n\ndef _get_sharepoint_list_item_id(drive_item: DriveItem) -> str | None:\n\n    try:\n        # First try to get the list item directly from the drive item\n        if hasattr(drive_item, \"listItem\"):\n            list_item = drive_item.listItem\n            if list_item:\n                # Load the list item properties to get the ID\n                sleep_and_retry(list_item.get(), \"get_sharepoint_list_item_id\")\n                if hasattr(list_item, \"id\") and list_item.id:\n                    return str(list_item.id)\n\n        # The SharePoint list item ID is typically available in the sharepointIds property\n        sharepoint_ids = getattr(drive_item, \"sharepoint_ids\", None)\n        if sharepoint_ids and hasattr(sharepoint_ids, \"listItemId\"):\n            return sharepoint_ids.listItemId\n\n        # Alternative: try to get it from the properties\n        properties = getattr(drive_item, \"properties\", None)\n        if properties:\n            # Sometimes the SharePoint list item ID is in the properties\n            for prop_name, prop_value in properties.items():\n                if \"listitemid\" in prop_name.lower():\n                    return str(prop_value)\n\n        return None\n    except Exception as e:\n        logger.error(\n            f\"Error getting SharePoint list item ID for item {drive_item.id}: {e}\"\n        )\n        raise e\n\n\ndef _is_public_item(\n    drive_item: DriveItem,\n    treat_sharing_link_as_public: bool = False,\n) -> bool:\n    if not treat_sharing_link_as_public:\n        return False\n\n    try:\n        permissions = sleep_and_retry(\n            drive_item.permissions.get_all(page_loaded=lambda _: None), \"is_public_item\"\n        )\n        for permission in permissions:\n            if permission.link and permission.link.scope in (\n                \"anonymous\",\n                \"organization\",\n            ):\n                return True\n        return False\n    except Exception as e:\n        logger.error(f\"Failed to check if item {drive_item.id} is public: {e}\")\n        return False\n\n\ndef _is_public_login_name(login_name: str) -> bool:\n    # Patterns that indicate public access\n    # This list is derived from the below link\n    # https://learn.microsoft.com/en-us/answers/questions/2085339/guid-in-the-loginname-of-site-user-everyone-except\n    public_login_patterns: list[str] = [\n        \"c:0-.f|rolemanager|spo-grid-all-users/\",\n        \"c:0(.s|true\",\n    ]\n    for pattern in public_login_patterns:\n        if pattern in login_name:\n            logger.info(f\"Login name {login_name} is public\")\n            return True\n    return False\n\n\n# AD groups allows same display name for multiple groups, so we need to add the GUID to the name\ndef _get_group_name_with_suffix(\n    login_name: str, group_name: str, graph_client: GraphClient\n) -> str:\n    ad_group_suffix = _get_group_guid_from_identifier(graph_client, login_name)\n    return f\"{group_name}_{ad_group_suffix}\"\n\n\ndef _get_sharepoint_groups(\n    client_context: ClientContext, group_name: str, graph_client: GraphClient\n) -> tuple[set[SharepointGroup], set[str]]:\n\n    groups: set[SharepointGroup] = set()\n    user_emails: set[str] = set()\n\n    def process_users(users: list[Any]) -> None:\n        nonlocal groups, user_emails\n\n        for user in users:\n            logger.debug(f\"User: {user.to_json()}\")\n            if user.principal_type == USER_PRINCIPAL_TYPE and hasattr(\n                user, \"user_principal_name\"\n            ):\n                if user.user_principal_name:\n                    email = user.user_principal_name\n                    if MICROSOFT_DOMAIN in email:\n                        email = email.replace(MICROSOFT_DOMAIN, \"\")\n                    user_emails.add(email)\n                else:\n                    logger.warning(\n                        f\"User don't have a user principal name: {user.login_name}\"\n                    )\n            elif user.principal_type in [\n                AZURE_AD_GROUP_PRINCIPAL_TYPE,\n                SHAREPOINT_GROUP_PRINCIPAL_TYPE,\n            ]:\n                name = user.title\n                if user.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:\n                    name = _get_group_name_with_suffix(\n                        user.login_name, name, graph_client\n                    )\n                groups.add(\n                    SharepointGroup(\n                        login_name=user.login_name,\n                        principal_type=user.principal_type,\n                        name=name,\n                    )\n                )\n\n    group = client_context.web.site_groups.get_by_name(group_name)\n    sleep_and_retry(\n        group.users.get_all(page_loaded=process_users), \"get_sharepoint_groups\"\n    )\n\n    return groups, user_emails\n\n\ndef _get_azuread_groups(\n    graph_client: GraphClient, group_name: str\n) -> tuple[set[SharepointGroup], set[str]]:\n\n    group_id = _get_group_guid_from_identifier(graph_client, group_name)\n    if not group_id:\n        logger.error(f\"Failed to get Azure AD group GUID for name {group_name}\")\n        return set(), set()\n    group = graph_client.groups[group_id]\n    groups: set[SharepointGroup] = set()\n    user_emails: set[str] = set()\n\n    def process_members(members: list[Any]) -> None:\n        nonlocal groups, user_emails\n\n        for member in members:\n            member_data = member.to_json()\n            logger.debug(f\"Member: {member_data}\")\n            # Check for user-specific attributes\n            user_principal_name = member_data.get(\"userPrincipalName\")\n            mail = member_data.get(\"mail\")\n            display_name = member_data.get(\"displayName\") or member_data.get(\n                \"display_name\"\n            )\n\n            # Check object attributes directly (if available)\n            is_user = False\n            is_group = False\n\n            # Users typically have userPrincipalName or mail\n            if user_principal_name or (mail and \"@\" in str(mail)):\n                is_user = True\n            # Groups typically have displayName but no userPrincipalName\n            elif display_name and not user_principal_name:\n                # Additional check: try to access group-specific properties\n                if (\n                    hasattr(member, \"groupTypes\")\n                    or member_data.get(\"groupTypes\") is not None\n                ):\n                    is_group = True\n                # Or check if it has an 'id' field typical for groups\n                elif member_data.get(\"id\") and not user_principal_name:\n                    is_group = True\n\n            # Check the object type name (fallback)\n            if not is_user and not is_group:\n                obj_type = type(member).__name__.lower()\n                if \"user\" in obj_type:\n                    is_user = True\n                elif \"group\" in obj_type:\n                    is_group = True\n\n            # Process based on identification\n            if is_user:\n                if user_principal_name:\n                    email = user_principal_name\n                    if MICROSOFT_DOMAIN in email:\n                        email = email.replace(MICROSOFT_DOMAIN, \"\")\n                    user_emails.add(email)\n                elif mail:\n                    email = mail\n                    if MICROSOFT_DOMAIN in email:\n                        email = email.replace(MICROSOFT_DOMAIN, \"\")\n                    user_emails.add(email)\n                logger.info(f\"Added user: {user_principal_name or mail}\")\n            elif is_group:\n                if not display_name:\n                    logger.error(f\"No display name for group: {member_data.get('id')}\")\n                    continue\n                name = _get_group_name_with_suffix(\n                    member_data.get(\"id\", \"\"), display_name, graph_client\n                )\n                groups.add(\n                    SharepointGroup(\n                        login_name=member_data.get(\"id\", \"\"),  # Use ID for groups\n                        principal_type=AZURE_AD_GROUP_PRINCIPAL_TYPE,\n                        name=name,\n                    )\n                )\n                logger.info(f\"Added group: {name}\")\n            else:\n                # Log unidentified members for debugging\n                logger.warning(f\"Could not identify member type for: {member_data}\")\n\n    sleep_and_retry(\n        group.members.get_all(page_loaded=process_members), \"get_azuread_groups\"\n    )\n\n    owner_emails = _get_security_group_owners(graph_client, group_id)\n    user_emails.update(owner_emails)\n\n    return groups, user_emails\n\n\ndef _get_groups_and_members_recursively(\n    client_context: ClientContext,\n    graph_client: GraphClient,\n    groups: set[SharepointGroup],\n    is_group_sync: bool = False,\n) -> GroupsResult:\n    \"\"\"\n    Get all groups and their members recursively.\n    \"\"\"\n    group_queue: deque[SharepointGroup] = deque(groups)\n    visited_groups: set[str] = set()\n    visited_group_name_to_emails: dict[str, set[str]] = {}\n    found_public_group = False\n    while group_queue:\n        group = group_queue.popleft()\n        if group.login_name in visited_groups:\n            continue\n        visited_groups.add(group.login_name)\n        visited_group_name_to_emails[group.name] = set()\n        logger.info(\n            f\"Processing group: {group.name} principal type: {group.principal_type}\"\n        )\n        if group.principal_type == SHAREPOINT_GROUP_PRINCIPAL_TYPE:\n            group_info, user_emails = _get_sharepoint_groups(\n                client_context, group.login_name, graph_client\n            )\n            visited_group_name_to_emails[group.name].update(user_emails)\n            if group_info:\n                group_queue.extend(group_info)\n        if group.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:\n            try:\n                # if the site is public, we have default groups assigned to it, so we return early\n                if _is_public_login_name(group.login_name):\n                    found_public_group = True\n                    if not is_group_sync:\n                        return GroupsResult(\n                            groups_to_emails={}, found_public_group=True\n                        )\n                    else:\n                        # we don't want to sync public groups, so we skip them\n                        continue\n                group_info, user_emails = _get_azuread_groups(\n                    graph_client, group.login_name\n                )\n                visited_group_name_to_emails[group.name].update(user_emails)\n                if group_info:\n                    group_queue.extend(group_info)\n            except ClientRequestException as e:\n                # If the group is not found, we skip it. There is a chance that group is still referenced\n                # in sharepoint but it is removed from Azure AD. There is no actual documentation on this, but based on\n                # our testing we have seen this happen.\n                if e.response is not None and e.response.status_code == 404:\n                    logger.warning(f\"Group {group.login_name} not found\")\n                    continue\n                raise e\n\n    return GroupsResult(\n        groups_to_emails=visited_group_name_to_emails,\n        found_public_group=found_public_group,\n    )\n\n\ndef get_external_access_from_sharepoint(\n    client_context: ClientContext,\n    graph_client: GraphClient,\n    drive_name: str | None,\n    drive_item: DriveItem | None,\n    site_page: dict[str, Any] | None,\n    add_prefix: bool = False,\n    treat_sharing_link_as_public: bool = False,\n) -> ExternalAccess:\n    \"\"\"\n    Get external access information from SharePoint.\n    \"\"\"\n    groups: set[SharepointGroup] = set()\n    user_emails: set[str] = set()\n    group_ids: set[str] = set()\n\n    # Add all members to a processing set first\n    def add_user_and_group_to_sets(\n        role_assignments: RoleAssignmentCollection,\n    ) -> None:\n        nonlocal user_emails, groups\n        for assignment in role_assignments:\n            logger.debug(f\"Assignment: {assignment.to_json()}\")\n            if assignment.role_definition_bindings:\n                is_limited_access = True\n                for role_definition_binding in assignment.role_definition_bindings:\n                    if (\n                        role_definition_binding.role_type_kind\n                        not in LIMITED_ACCESS_ROLE_TYPES\n                        or role_definition_binding.name not in LIMITED_ACCESS_ROLE_NAMES\n                    ):\n                        is_limited_access = False\n                        break\n\n                # Skip if the role is only Limited Access, because this is not a actual permission its a travel through permission\n                if is_limited_access:\n                    logger.info(\n                        \"Skipping assignment because it has only Limited Access role\"\n                    )\n                    continue\n            if assignment.member:\n                member = assignment.member\n                if member.principal_type == USER_PRINCIPAL_TYPE and hasattr(\n                    member, \"user_principal_name\"\n                ):\n                    email = member.user_principal_name\n                    if MICROSOFT_DOMAIN in email:\n                        email = email.replace(MICROSOFT_DOMAIN, \"\")\n                    user_emails.add(email)\n                elif member.principal_type in [\n                    AZURE_AD_GROUP_PRINCIPAL_TYPE,\n                    SHAREPOINT_GROUP_PRINCIPAL_TYPE,\n                ]:\n                    name = member.title\n                    if member.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:\n                        name = _get_group_name_with_suffix(\n                            member.login_name, name, graph_client\n                        )\n                    groups.add(\n                        SharepointGroup(\n                            login_name=member.login_name,\n                            principal_type=member.principal_type,\n                            name=name,\n                        )\n                    )\n\n    if drive_item and drive_name:\n        is_public = _is_public_item(drive_item, treat_sharing_link_as_public)\n        if is_public:\n            logger.info(f\"Item {drive_item.id} is public\")\n            return ExternalAccess(\n                external_user_emails=set(),\n                external_user_group_ids=set(),\n                is_public=True,\n            )\n\n        item_id = _get_sharepoint_list_item_id(drive_item)\n\n        if not item_id:\n            raise RuntimeError(\n                f\"Failed to get SharePoint list item ID for item {drive_item.id}\"\n            )\n\n        if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:\n            drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]\n\n        item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(\n            item_id\n        )\n\n        sleep_and_retry(\n            item.role_assignments.expand([\"Member\", \"RoleDefinitionBindings\"]).get_all(\n                page_loaded=add_user_and_group_to_sets,\n            ),\n            \"get_external_access_from_sharepoint\",\n        )\n    elif site_page:\n        site_url = site_page.get(\"webUrl\")\n        # Keep percent-encoding intact so the path matches the encoding\n        # used by the Office365 library's SPResPath.create_relative(),\n        # which compares against urlparse(context.base_url).path.\n        # Decoding (e.g. %27 → ') causes a mismatch that duplicates\n        # the site prefix in the constructed URL.\n        server_relative_url = urlparse(site_url).path\n        file_obj = client_context.web.get_file_by_server_relative_url(\n            server_relative_url\n        )\n        item = file_obj.listItemAllFields\n\n        sleep_and_retry(\n            item.role_assignments.expand([\"Member\", \"RoleDefinitionBindings\"]).get_all(\n                page_loaded=add_user_and_group_to_sets,\n            ),\n            \"get_external_access_from_sharepoint\",\n        )\n    else:\n        raise RuntimeError(\"No drive item or site page provided\")\n\n    groups_and_members: GroupsResult = _get_groups_and_members_recursively(\n        client_context, graph_client, groups\n    )\n\n    # If the site is public, w have default groups assigned to it, so we return early\n    if groups_and_members.found_public_group:\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        )\n\n    for group_name, _ in groups_and_members.groups_to_emails.items():\n        if add_prefix:\n            group_name = build_ext_group_name_for_onyx(\n                group_name, DocumentSource.SHAREPOINT\n            )\n        group_ids.add(group_name.lower())\n\n    logger.info(f\"User emails: {len(user_emails)}\")\n    logger.info(f\"Group IDs: {len(group_ids)}\")\n\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        external_user_group_ids=group_ids,\n        is_public=False,\n    )\n\n\ndef _enumerate_ad_groups_paginated(\n    get_access_token: Callable[[], str],\n    already_resolved: set[str],\n    graph_api_base: str,\n) -> Generator[ExternalUserGroup, None, None]:\n    \"\"\"Paginate through all Azure AD groups and yield ExternalUserGroup for each.\n\n    Skips groups whose suffixed name is already in *already_resolved*.\n    Stops early if the number of groups exceeds AD_GROUP_ENUMERATION_THRESHOLD.\n    \"\"\"\n    groups_url = f\"{graph_api_base}/groups\"\n    groups_params: dict[str, str] = {\"$select\": \"id,displayName\", \"$top\": \"999\"}\n    total_groups = 0\n\n    for group_json in _iter_graph_collection(\n        groups_url, get_access_token, groups_params\n    ):\n        group_id: str = group_json.get(\"id\", \"\")\n        display_name: str = group_json.get(\"displayName\", \"\")\n        if not group_id or not display_name:\n            continue\n\n        total_groups += 1\n        if total_groups > AD_GROUP_ENUMERATION_THRESHOLD:\n            logger.warning(\n                f\"Azure AD group enumeration exceeded {AD_GROUP_ENUMERATION_THRESHOLD} \"\n                \"groups — stopping to avoid excessive memory/API usage. \"\n                \"Remaining groups will be resolved from role assignments only.\"\n            )\n            return\n\n        name = f\"{display_name}_{group_id}\"\n        if name in already_resolved:\n            continue\n\n        member_emails: list[str] = []\n        members_url = f\"{graph_api_base}/groups/{group_id}/members\"\n        members_params: dict[str, str] = {\n            \"$select\": \"userPrincipalName,mail\",\n            \"$top\": \"999\",\n        }\n        for member_json in _iter_graph_collection(\n            members_url, get_access_token, members_params\n        ):\n            email = member_json.get(\"userPrincipalName\") or member_json.get(\"mail\")\n            if email:\n                member_emails.append(_normalize_email(email))\n\n        yield ExternalUserGroup(id=name, user_emails=member_emails)\n\n    logger.info(f\"Enumerated {total_groups} Azure AD groups via paginated Graph API\")\n\n\ndef get_sharepoint_external_groups(\n    client_context: ClientContext,\n    graph_client: GraphClient,\n    graph_api_base: str,\n    get_access_token: Callable[[], str] | None = None,\n    enumerate_all_ad_groups: bool = False,\n) -> list[ExternalUserGroup]:\n\n    groups: set[SharepointGroup] = set()\n\n    def add_group_to_sets(role_assignments: RoleAssignmentCollection) -> None:\n        nonlocal groups\n        for assignment in role_assignments:\n            if assignment.role_definition_bindings:\n                is_limited_access = True\n                for role_definition_binding in assignment.role_definition_bindings:\n                    if (\n                        role_definition_binding.role_type_kind\n                        not in LIMITED_ACCESS_ROLE_TYPES\n                        or role_definition_binding.name not in LIMITED_ACCESS_ROLE_NAMES\n                    ):\n                        is_limited_access = False\n                        break\n\n                # Skip if the role assignment is only Limited Access, because this is not a actual permission its\n                #  a travel through permission\n                if is_limited_access:\n                    logger.info(\n                        \"Skipping assignment because it has only Limited Access role\"\n                    )\n                    continue\n            if assignment.member:\n                member = assignment.member\n                if member.principal_type in [\n                    AZURE_AD_GROUP_PRINCIPAL_TYPE,\n                    SHAREPOINT_GROUP_PRINCIPAL_TYPE,\n                ]:\n                    name = member.title\n                    if member.principal_type == AZURE_AD_GROUP_PRINCIPAL_TYPE:\n                        name = _get_group_name_with_suffix(\n                            member.login_name, name, graph_client\n                        )\n\n                    groups.add(\n                        SharepointGroup(\n                            login_name=member.login_name,\n                            principal_type=member.principal_type,\n                            name=name,\n                        )\n                    )\n\n    sleep_and_retry(\n        client_context.web.role_assignments.expand(\n            [\"Member\", \"RoleDefinitionBindings\"]\n        ).get_all(page_loaded=add_group_to_sets),\n        \"get_sharepoint_external_groups\",\n    )\n    groups_and_members: GroupsResult = _get_groups_and_members_recursively(\n        client_context, graph_client, groups, is_group_sync=True\n    )\n\n    external_user_groups: list[ExternalUserGroup] = [\n        ExternalUserGroup(id=group_name, user_emails=list(emails))\n        for group_name, emails in groups_and_members.groups_to_emails.items()\n    ]\n\n    if not enumerate_all_ad_groups or get_access_token is None:\n        logger.info(\n            \"Skipping exhaustive Azure AD group enumeration. Only groups found in site role assignments are included.\"\n        )\n        return external_user_groups\n\n    already_resolved = set(groups_and_members.groups_to_emails.keys())\n    for group in _enumerate_ad_groups_paginated(\n        get_access_token, already_resolved, graph_api_base\n    ):\n        external_user_groups.append(group)\n\n    return external_user_groups\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/slack/channel_access.py",
    "content": "from slack_sdk import WebClient\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.slack.connector import ChannelType\nfrom onyx.connectors.slack.utils import expert_info_from_slack_id\nfrom onyx.connectors.slack.utils import make_paginated_slack_api_call\n\n\ndef get_channel_access(\n    client: WebClient,\n    channel: ChannelType,\n    user_cache: dict[str, BasicExpertInfo | None],\n) -> ExternalAccess:\n    \"\"\"\n    Get channel access permissions for a Slack channel.\n\n    Args:\n        client: Slack WebClient instance\n        channel: Slack channel object containing channel info\n        user_cache: Cache of user IDs to BasicExpertInfo objects. May be updated in place.\n\n    Returns:\n        ExternalAccess object for the channel.\n    \"\"\"\n    channel_is_public = not channel[\"is_private\"]\n    if channel_is_public:\n        return ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        )\n\n    channel_id = channel[\"id\"]\n\n    # Get all member IDs for the channel\n    member_ids = []\n    for result in make_paginated_slack_api_call(\n        client.conversations_members,\n        channel=channel_id,\n    ):\n        member_ids.extend(result.get(\"members\", []))\n\n    member_emails = set()\n    for member_id in member_ids:\n        # Try to get user info from cache or fetch it\n        user_info = expert_info_from_slack_id(\n            user_id=member_id,\n            client=client,\n            user_cache=user_cache,\n        )\n\n        # If we have user info and an email, add it to the set\n        if user_info and user_info.email:\n            member_emails.add(user_info.email)\n\n    return ExternalAccess(\n        external_user_emails=member_emails,\n        # NOTE: groups are not used, since adding a group to a channel just adds all\n        # users that are in the group.\n        external_user_group_ids=set(),\n        is_public=False,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/slack/doc_sync.py",
    "content": "from collections.abc import Generator\n\nfrom slack_sdk import WebClient\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.credentials_provider import OnyxDBCredentialsProvider\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.slack.connector import get_channels\nfrom onyx.connectors.slack.connector import make_paginated_slack_api_call\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nlogger = setup_logger()\n\n\ndef _fetch_workspace_permissions(\n    user_id_to_email_map: dict[str, str],\n) -> ExternalAccess:\n    user_emails = set()\n    for email in user_id_to_email_map.values():\n        user_emails.add(email)\n    return ExternalAccess(\n        external_user_emails=user_emails,\n        # No group<->document mapping for slack\n        external_user_group_ids=set(),\n        # No way to determine if slack is invite only without enterprise license\n        is_public=False,\n    )\n\n\ndef _fetch_channel_permissions(\n    slack_client: WebClient,\n    workspace_permissions: ExternalAccess,\n    user_id_to_email_map: dict[str, str],\n) -> dict[str, ExternalAccess]:\n    channel_permissions = {}\n    public_channels = get_channels(\n        client=slack_client,\n        get_public=True,\n        get_private=False,\n    )\n    public_channel_ids = [\n        channel[\"id\"] for channel in public_channels if \"id\" in channel\n    ]\n    for channel_id in public_channel_ids:\n        channel_permissions[channel_id] = workspace_permissions\n\n    private_channels = get_channels(\n        client=slack_client,\n        get_public=False,\n        get_private=True,\n    )\n    private_channel_ids = [\n        channel[\"id\"] for channel in private_channels if \"id\" in channel\n    ]\n\n    for channel_id in private_channel_ids:\n        # Collect all member ids for the channel pagination calls\n        member_ids = []\n        for result in make_paginated_slack_api_call(\n            slack_client.conversations_members,\n            channel=channel_id,\n        ):\n            member_ids.extend(result.get(\"members\", []))\n\n        # Collect all member emails for the channel\n        member_emails = set()\n        for member_id in member_ids:\n            member_email = user_id_to_email_map.get(member_id)\n\n            if not member_email:\n                # If the user is an external user, they wont get returned from the\n                # conversations_members call so we need to make a separate call to users_info\n                # and add them to the user_id_to_email_map\n                member_info = slack_client.users_info(user=member_id)\n                member_email = member_info[\"user\"][\"profile\"].get(\"email\")\n                if not member_email:\n                    # If no email is found, we skip the user\n                    continue\n                user_id_to_email_map[member_id] = member_email\n\n            member_emails.add(member_email)\n\n        channel_permissions[channel_id] = ExternalAccess(\n            external_user_emails=member_emails,\n            # No group<->document mapping for slack\n            external_user_group_ids=set(),\n            # No way to determine if slack is invite only without enterprise license\n            is_public=False,\n        )\n\n    return channel_permissions\n\n\ndef _get_slack_document_access(\n    slack_connector: SlackConnector,\n    channel_permissions: dict[str, ExternalAccess],  # noqa: ARG001\n    callback: IndexingHeartbeatInterface | None,\n    indexing_start: SecondsSinceUnixEpoch | None = None,\n) -> Generator[DocExternalAccess, None, None]:\n    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(\n        callback=callback,\n        start=indexing_start,\n    )\n\n    for doc_metadata_batch in slim_doc_generator:\n        for doc_metadata in doc_metadata_batch:\n            if isinstance(doc_metadata, HierarchyNode):\n                # TODO: handle hierarchynodes during sync\n                continue\n            if doc_metadata.external_access is None:\n                raise ValueError(\n                    f\"No external access for document {doc_metadata.id}. \"\n                    \"Please check to make sure that your Slack bot token has the \"\n                    \"`channels:read` scope\"\n                )\n\n            yield DocExternalAccess(\n                external_access=doc_metadata.external_access,\n                doc_id=doc_metadata.id,\n            )\n\n        if callback:\n            if callback.should_stop():\n                raise RuntimeError(\"_get_slack_document_access: Stop signal detected\")\n\n            callback.progress(\"_get_slack_document_access\", 1)\n\n\ndef slack_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001\n    callback: IndexingHeartbeatInterface | None,\n) -> Generator[DocExternalAccess, None, None]:\n    \"\"\"\n    Adds the external permissions to the documents in postgres\n    if the document doesn't already exists in postgres, we create\n    it in postgres so that when it gets created later, the permissions are\n    already populated\n    \"\"\"\n    # Use credentials provider instead of directly loading credentials\n\n    tenant_id = get_current_tenant_id()\n    provider = OnyxDBCredentialsProvider(tenant_id, \"slack\", cc_pair.credential.id)\n    r = get_redis_client(tenant_id=tenant_id)\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    slack_client = SlackConnector.make_slack_web_client(\n        provider.get_provider_key(),\n        credential_json[\"slack_bot_token\"],\n        SlackConnector.MAX_RETRIES,\n        r,\n    )\n\n    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)\n    if not user_id_to_email_map:\n        raise ValueError(\n            \"No user id to email map found. Please check to make sure that your Slack bot token has the `users:read.email` scope\"\n        )\n\n    workspace_permissions = _fetch_workspace_permissions(\n        user_id_to_email_map=user_id_to_email_map,\n    )\n    channel_permissions = _fetch_channel_permissions(\n        slack_client=slack_client,\n        workspace_permissions=workspace_permissions,\n        user_id_to_email_map=user_id_to_email_map,\n    )\n\n    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)\n    slack_connector.set_credentials_provider(provider)\n    indexing_start_ts: SecondsSinceUnixEpoch | None = (\n        cc_pair.connector.indexing_start.timestamp()\n        if cc_pair.connector.indexing_start is not None\n        else None\n    )\n\n    yield from _get_slack_document_access(\n        slack_connector=slack_connector,\n        channel_permissions=channel_permissions,\n        callback=callback,\n        indexing_start=indexing_start_ts,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/slack/group_sync.py",
    "content": "\"\"\"\nTHIS IS NOT USEFUL OR USED FOR PERMISSION SYNCING\nWHEN USERGROUPS ARE ADDED TO A CHANNEL, IT JUST RESOLVES ALL THE USERS TO THAT CHANNEL\nSO WHEN CHECKING IF A USER CAN ACCESS A DOCUMENT, WE ONLY NEED TO CHECK THEIR EMAIL\nTHERE IS NO USERGROUP <-> DOCUMENT PERMISSION MAPPING\n\"\"\"\n\nfrom slack_sdk import WebClient\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map\nfrom onyx.connectors.credentials_provider import OnyxDBCredentialsProvider\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom onyx.connectors.slack.utils import make_paginated_slack_api_call\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _get_slack_group_ids(\n    slack_client: WebClient,\n) -> list[str]:\n    group_ids = []\n    for result in make_paginated_slack_api_call(slack_client.usergroups_list):\n        for group in result.get(\"usergroups\", []):\n            group_ids.append(group.get(\"id\"))\n    return group_ids\n\n\ndef _get_slack_group_members_email(\n    slack_client: WebClient,\n    group_name: str,\n    user_id_to_email_map: dict[str, str],\n) -> list[str]:\n    group_member_emails = []\n    for result in make_paginated_slack_api_call(\n        slack_client.usergroups_users_list, usergroup=group_name\n    ):\n        for member_id in result.get(\"users\", []):\n            member_email = user_id_to_email_map.get(member_id)\n            if not member_email:\n                # If the user is an external user, they wont get returned from the\n                # conversations_members call so we need to make a separate call to users_info\n                member_info = slack_client.users_info(user=member_id)\n                member_email = member_info[\"user\"][\"profile\"].get(\"email\")\n                if not member_email:\n                    # If no email is found, we skip the user\n                    continue\n                user_id_to_email_map[member_id] = member_email\n            group_member_emails.append(member_email)\n\n    return group_member_emails\n\n\ndef slack_group_sync(\n    tenant_id: str,\n    cc_pair: ConnectorCredentialPair,\n) -> list[ExternalUserGroup]:\n    \"\"\"NOTE: not used atm. All channel access is done at the\n    individual user level. Leaving in for now in case we need it later.\"\"\"\n\n    provider = OnyxDBCredentialsProvider(tenant_id, \"slack\", cc_pair.credential.id)\n    r = get_redis_client(tenant_id=tenant_id)\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    slack_client = SlackConnector.make_slack_web_client(\n        provider.get_provider_key(),\n        credential_json[\"slack_bot_token\"],\n        SlackConnector.MAX_RETRIES,\n        r,\n    )\n\n    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)\n\n    onyx_groups: list[ExternalUserGroup] = []\n    for group_name in _get_slack_group_ids(slack_client):\n        group_member_emails = _get_slack_group_members_email(\n            slack_client=slack_client,\n            group_name=group_name,\n            user_id_to_email_map=user_id_to_email_map,\n        )\n        if not group_member_emails:\n            continue\n        onyx_groups.append(\n            ExternalUserGroup(id=group_name, user_emails=group_member_emails)\n        )\n    return onyx_groups\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/slack/utils.py",
    "content": "from slack_sdk import WebClient\n\nfrom onyx.connectors.slack.utils import make_paginated_slack_api_call\n\n\ndef fetch_user_id_to_email_map(\n    slack_client: WebClient,\n) -> dict[str, str]:\n    user_id_to_email_map = {}\n    for user_info in make_paginated_slack_api_call(\n        slack_client.users_list,\n    ):\n        for user in user_info.get(\"members\", []):\n            if user.get(\"profile\", {}).get(\"email\"):\n                user_id_to_email_map[user.get(\"id\")] = user.get(\"profile\", {}).get(\n                    \"email\"\n                )\n    return user_id_to_email_map\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/sync_params.py",
    "content": "from collections.abc import Generator\nfrom typing import Optional\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\n\nfrom ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.configs.app_configs import TEAMS_PERMISSION_DOC_SYNC_FREQUENCY\nfrom ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync\nfrom ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync\nfrom ee.onyx.external_permissions.github.doc_sync import github_doc_sync\nfrom ee.onyx.external_permissions.github.group_sync import github_group_sync\nfrom ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync\nfrom ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync\nfrom ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync\nfrom ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync\nfrom ee.onyx.external_permissions.jira.group_sync import jira_group_sync\nfrom ee.onyx.external_permissions.perm_sync_types import CensoringFuncType\nfrom ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import GroupSyncFuncType\nfrom ee.onyx.external_permissions.salesforce.postprocessing import (\n    censor_salesforce_chunks,\n)\nfrom ee.onyx.external_permissions.sharepoint.doc_sync import sharepoint_doc_sync\nfrom ee.onyx.external_permissions.sharepoint.group_sync import sharepoint_group_sync\nfrom ee.onyx.external_permissions.slack.doc_sync import slack_doc_sync\nfrom ee.onyx.external_permissions.teams.doc_sync import teams_doc_sync\nfrom onyx.configs.constants import DocumentSource\n\nif TYPE_CHECKING:\n    from onyx.access.models import DocExternalAccess  # noqa\n    from onyx.db.models import ConnectorCredentialPair  # noqa\n    from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface  # noqa\n\n\nclass DocSyncConfig(BaseModel):\n    doc_sync_frequency: int\n    doc_sync_func: DocSyncFuncType\n    initial_index_should_sync: bool\n\n\nclass GroupSyncConfig(BaseModel):\n    group_sync_frequency: int\n    group_sync_func: GroupSyncFuncType\n    group_sync_is_cc_pair_agnostic: bool\n\n\nclass CensoringConfig(BaseModel):\n    chunk_censoring_func: CensoringFuncType\n\n\nclass SyncConfig(BaseModel):\n    # None means we don't perform a doc_sync\n    doc_sync_config: DocSyncConfig | None = None\n    # None means we don't perform a group_sync\n    group_sync_config: GroupSyncConfig | None = None\n    # None means we don't perform a chunk_censoring\n    censoring_config: CensoringConfig | None = None\n\n\n# Mock doc sync function for testing (no-op)\ndef mock_doc_sync(\n    cc_pair: \"ConnectorCredentialPair\",  # noqa: ARG001\n    fetch_all_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001\n    callback: Optional[\"IndexingHeartbeatInterface\"],  # noqa: ARG001\n) -> Generator[\"DocExternalAccess\", None, None]:\n    \"\"\"Mock doc sync function for testing - returns empty list since permissions are fetched during indexing\"\"\"\n    yield from []\n\n\n_SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {\n    DocumentSource.GOOGLE_DRIVE: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=gdrive_doc_sync,\n            initial_index_should_sync=True,\n        ),\n        group_sync_config=GroupSyncConfig(\n            group_sync_frequency=GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY,\n            group_sync_func=gdrive_group_sync,\n            group_sync_is_cc_pair_agnostic=False,\n        ),\n    ),\n    DocumentSource.CONFLUENCE: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=confluence_doc_sync,\n            initial_index_should_sync=False,\n        ),\n        group_sync_config=GroupSyncConfig(\n            group_sync_frequency=CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY,\n            group_sync_func=confluence_group_sync,\n            group_sync_is_cc_pair_agnostic=True,\n        ),\n    ),\n    DocumentSource.JIRA: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=JIRA_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=jira_doc_sync,\n            initial_index_should_sync=True,\n        ),\n        group_sync_config=GroupSyncConfig(\n            group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,\n            group_sync_func=jira_group_sync,\n            group_sync_is_cc_pair_agnostic=True,\n        ),\n    ),\n    # Groups are not needed for Slack.\n    # All channel access is done at the individual user level.\n    DocumentSource.SLACK: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=SLACK_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=slack_doc_sync,\n            initial_index_should_sync=True,\n        ),\n    ),\n    DocumentSource.GMAIL: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=gmail_doc_sync,\n            initial_index_should_sync=False,\n        ),\n    ),\n    DocumentSource.GITHUB: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=GITHUB_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=github_doc_sync,\n            initial_index_should_sync=True,\n        ),\n        group_sync_config=GroupSyncConfig(\n            group_sync_frequency=GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY,\n            group_sync_func=github_group_sync,\n            group_sync_is_cc_pair_agnostic=False,\n        ),\n    ),\n    DocumentSource.SALESFORCE: SyncConfig(\n        censoring_config=CensoringConfig(\n            chunk_censoring_func=censor_salesforce_chunks,\n        ),\n    ),\n    DocumentSource.MOCK_CONNECTOR: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=DEFAULT_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=mock_doc_sync,\n            initial_index_should_sync=True,\n        ),\n    ),\n    # Groups are not needed for Teams.\n    # All channel access is done at the individual user level.\n    DocumentSource.TEAMS: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=TEAMS_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=teams_doc_sync,\n            initial_index_should_sync=True,\n        ),\n    ),\n    DocumentSource.SHAREPOINT: SyncConfig(\n        doc_sync_config=DocSyncConfig(\n            doc_sync_frequency=SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY,\n            doc_sync_func=sharepoint_doc_sync,\n            initial_index_should_sync=True,\n        ),\n        group_sync_config=GroupSyncConfig(\n            group_sync_frequency=SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY,\n            group_sync_func=sharepoint_group_sync,\n            group_sync_is_cc_pair_agnostic=False,\n        ),\n    ),\n}\n\n\ndef source_requires_doc_sync(source: DocumentSource) -> bool:\n    \"\"\"Checks if the given DocumentSource requires doc syncing.\"\"\"\n    if source not in _SOURCE_TO_SYNC_CONFIG:\n        return False\n    return _SOURCE_TO_SYNC_CONFIG[source].doc_sync_config is not None\n\n\ndef source_requires_external_group_sync(source: DocumentSource) -> bool:\n    \"\"\"Checks if the given DocumentSource requires external group syncing.\"\"\"\n    if source not in _SOURCE_TO_SYNC_CONFIG:\n        return False\n    return _SOURCE_TO_SYNC_CONFIG[source].group_sync_config is not None\n\n\ndef get_source_perm_sync_config(source: DocumentSource) -> SyncConfig | None:\n    \"\"\"Returns the frequency of the external group sync for the given DocumentSource.\"\"\"\n    return _SOURCE_TO_SYNC_CONFIG.get(source)\n\n\ndef source_group_sync_is_cc_pair_agnostic(source: DocumentSource) -> bool:\n    \"\"\"Checks if the given DocumentSource requires external group syncing.\"\"\"\n    if source not in _SOURCE_TO_SYNC_CONFIG:\n        return False\n\n    group_sync_config = _SOURCE_TO_SYNC_CONFIG[source].group_sync_config\n    if group_sync_config is None:\n        return False\n\n    return group_sync_config.group_sync_is_cc_pair_agnostic\n\n\ndef get_all_cc_pair_agnostic_group_sync_sources() -> set[DocumentSource]:\n    \"\"\"Returns the set of sources that have external group syncing that is cc_pair agnostic.\"\"\"\n    return {\n        source\n        for source, sync_config in _SOURCE_TO_SYNC_CONFIG.items()\n        if sync_config.group_sync_config is not None\n        and sync_config.group_sync_config.group_sync_is_cc_pair_agnostic\n    }\n\n\ndef check_if_valid_sync_source(source_type: DocumentSource) -> bool:\n    return source_type in _SOURCE_TO_SYNC_CONFIG\n\n\ndef get_all_censoring_enabled_sources() -> set[DocumentSource]:\n    \"\"\"Returns the set of sources that have censoring enabled.\"\"\"\n    return {\n        source\n        for source, sync_config in _SOURCE_TO_SYNC_CONFIG.items()\n        if sync_config.censoring_config is not None\n    }\n\n\ndef source_should_fetch_permissions_during_indexing(source: DocumentSource) -> bool:\n    \"\"\"Returns True if the given DocumentSource requires permissions to be fetched during indexing.\"\"\"\n    if source not in _SOURCE_TO_SYNC_CONFIG:\n        return False\n\n    doc_sync_config = _SOURCE_TO_SYNC_CONFIG[source].doc_sync_config\n    if doc_sync_config is None:\n        return False\n\n    return doc_sync_config.initial_index_should_sync\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/teams/doc_sync.py",
    "content": "from collections.abc import Generator\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom ee.onyx.external_permissions.utils import generic_doc_sync\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.teams.connector import TeamsConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nTEAMS_DOC_SYNC_LABEL = \"teams_doc_sync\"\n\n\ndef teams_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,\n    callback: IndexingHeartbeatInterface | None,\n) -> Generator[ElementExternalAccess, None, None]:\n    teams_connector = TeamsConnector(\n        **cc_pair.connector.connector_specific_config,\n    )\n    credential_json = (\n        cc_pair.credential.credential_json.get_value(apply_mask=False)\n        if cc_pair.credential.credential_json\n        else {}\n    )\n    teams_connector.load_credentials(credential_json)\n\n    yield from generic_doc_sync(\n        cc_pair=cc_pair,\n        fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        callback=callback,\n        doc_source=DocumentSource.TEAMS,\n        slim_connector=teams_connector,\n        label=TEAMS_DOC_SYNC_LABEL,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/external_permissions/utils.py",
    "content": "from collections.abc import Generator\n\nfrom ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.access.models import ExternalAccess\nfrom onyx.access.models import NodeExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef generic_doc_sync(\n    cc_pair: ConnectorCredentialPair,\n    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,\n    callback: IndexingHeartbeatInterface | None,\n    doc_source: DocumentSource,\n    slim_connector: SlimConnectorWithPermSync,\n    label: str,\n) -> Generator[ElementExternalAccess, None, None]:\n    \"\"\"\n    A convenience function for performing a generic document synchronization.\n\n    Notes:\n    A generic doc sync includes:\n        - fetching existing docs\n        - fetching *all* new (slim) docs\n        - yielding external-access permissions for existing docs which do not exist in the newly fetched slim-docs set (with their\n        `external_access` set to \"private\")\n        - yielding external-access permissions for newly fetched docs and hierarchy nodes\n\n    Returns:\n        A `Generator` which yields existing and newly fetched external-access permissions.\n    \"\"\"\n\n    logger.info(f\"Starting {doc_source} doc sync for CC Pair ID: {cc_pair.id}\")\n\n    indexing_start: SecondsSinceUnixEpoch | None = (\n        cc_pair.connector.indexing_start.timestamp()\n        if cc_pair.connector.indexing_start is not None\n        else None\n    )\n\n    newly_fetched_doc_ids: set[str] = set()\n\n    logger.info(f\"Fetching all slim documents from {doc_source}\")\n    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(\n        start=indexing_start,\n        callback=callback,\n    ):\n        logger.info(f\"Got {len(doc_batch)} slim documents from {doc_source}\")\n\n        if callback:\n            if callback.should_stop():\n                raise RuntimeError(f\"{label}: Stop signal detected\")\n            callback.progress(label, 1)\n\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                # Yield hierarchy node permissions to be processed in outer layer\n                if doc.external_access:\n                    yield NodeExternalAccess(\n                        external_access=doc.external_access,\n                        raw_node_id=doc.raw_node_id,\n                        source=doc_source.value,\n                    )\n                continue\n            if not doc.external_access:\n                raise RuntimeError(\n                    f\"No external access found for document ID; {cc_pair.id=} {doc_source=} {doc.id=}\"\n                )\n\n            newly_fetched_doc_ids.add(doc.id)\n\n            yield DocExternalAccess(\n                doc_id=doc.id,\n                external_access=doc.external_access,\n            )\n\n    logger.info(f\"Querying existing document IDs for CC Pair ID: {cc_pair.id=}\")\n    existing_doc_ids: list[str] = fetch_all_existing_docs_ids_fn()\n\n    missing_doc_ids = set(existing_doc_ids) - newly_fetched_doc_ids\n\n    if not missing_doc_ids:\n        return\n\n    logger.warning(\n        f\"Found {len(missing_doc_ids)=} documents that are in the DB but not present in fetch. Making them inaccessible.\"\n    )\n\n    for missing_id in missing_doc_ids:\n        logger.warning(f\"Removing access for {missing_id=}\")\n        yield DocExternalAccess(\n            doc_id=missing_id,\n            external_access=ExternalAccess.empty(),\n        )\n\n    logger.info(f\"Finished {doc_source} doc sync\")\n"
  },
  {
    "path": "backend/ee/onyx/feature_flags/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/feature_flags/factory.py",
    "content": "from ee.onyx.feature_flags.posthog_provider import PostHogFeatureFlagProvider\nfrom onyx.feature_flags.interface import FeatureFlagProvider\n\n\ndef get_posthog_feature_flag_provider() -> FeatureFlagProvider:\n    \"\"\"\n    Get the PostHog feature flag provider instance.\n\n    This is the EE implementation that gets loaded by the versioned\n    implementation loader.\n\n    Returns:\n        PostHogFeatureFlagProvider: The PostHog-based feature flag provider\n    \"\"\"\n    return PostHogFeatureFlagProvider()\n"
  },
  {
    "path": "backend/ee/onyx/feature_flags/posthog_provider.py",
    "content": "from typing import Any\nfrom uuid import UUID\n\nfrom ee.onyx.utils.posthog_client import posthog\nfrom onyx.feature_flags.interface import FeatureFlagProvider\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass PostHogFeatureFlagProvider(FeatureFlagProvider):\n    \"\"\"\n    PostHog-based feature flag provider.\n\n    Uses PostHog's feature flag API to determine if features are enabled\n    for specific users. Only active in multi-tenant mode.\n    \"\"\"\n\n    def feature_enabled(\n        self,\n        flag_key: str,\n        user_id: UUID,\n        user_properties: dict[str, Any] | None = None,\n    ) -> bool:\n        \"\"\"\n        Check if a feature flag is enabled for a user via PostHog.\n\n        Args:\n            flag_key: The identifier for the feature flag to check\n            user_id: The unique identifier for the user\n            user_properties: Optional dictionary of user properties/attributes\n                           that may influence flag evaluation\n\n        Returns:\n            True if the feature is enabled for the user, False otherwise.\n        \"\"\"\n        if not posthog:\n            return False\n\n        try:\n            posthog.set(\n                distinct_id=user_id,\n                properties=user_properties,\n            )\n            is_enabled = posthog.feature_enabled(\n                flag_key,\n                str(user_id),\n                person_properties=user_properties,\n            )\n\n            return bool(is_enabled) if is_enabled is not None else False\n\n        except Exception as e:\n            logger.error(\n                f\"Error checking feature flag {flag_key} for user {user_id}: {e}\"\n            )\n            return False\n"
  },
  {
    "path": "backend/ee/onyx/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/hooks/executor.py",
    "content": "\"\"\"Hook executor — calls a customer's external HTTP endpoint for a given hook point.\n\nUsage (Celery tasks and FastAPI handlers):\n    result = execute_hook(\n        db_session=db_session,\n        hook_point=HookPoint.QUERY_PROCESSING,\n        payload={\"query\": \"...\", \"user_email\": \"...\", \"chat_session_id\": \"...\"},\n        response_type=QueryProcessingResponse,\n    )\n\n    if isinstance(result, HookSkipped):\n        # no active hook configured — continue with original behavior\n        ...\n    elif isinstance(result, HookSoftFailed):\n        # hook failed but fail strategy is SOFT — continue with original behavior\n        ...\n    else:\n        # result is a validated Pydantic model instance (response_type)\n        ...\n\nis_reachable update policy\n--------------------------\n``is_reachable`` on the Hook row is updated selectively — only when the outcome\ncarries meaningful signal about physical reachability:\n\n  NetworkError (DNS, connection refused)  → False  (cannot reach the server)\n  HTTP 401 / 403                          → False  (api_key revoked or invalid)\n  TimeoutException                        → None   (server may be slow, skip write)\n  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)\n  Unknown exception                       → None   (no signal, skip write)\n  Non-JSON / non-dict response            → None   (server responded, skip write)\n  Success (2xx, valid dict)               → True   (confirmed reachable)\n\nNone means \"leave the current value unchanged\" — no DB round-trip is made.\n\nDB session design\n-----------------\nThe executor uses three sessions:\n\n  1. Caller's session (db_session) — used only for the hook lookup read. All\n     needed fields are extracted from the Hook object before the HTTP call, so\n     the caller's session is not held open during the external HTTP request.\n\n  2. Log session — a separate short-lived session opened after the HTTP call\n     completes to write the HookExecutionLog row on failure. Success runs are\n     not recorded. Committed independently of everything else.\n\n  3. Reachable session — a second short-lived session to update is_reachable on\n     the Hook. Kept separate from the log session so a concurrent hook deletion\n     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot\n     prevent the execution log from being written. This update is best-effort.\n\"\"\"\n\nimport json\nimport time\nfrom typing import Any\nfrom typing import TypeVar\n\nimport httpx\nfrom pydantic import BaseModel\nfrom pydantic import ValidationError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.db.hook import create_hook_execution_log__no_commit\nfrom onyx.db.hook import get_non_deleted_hook_by_hook_point\nfrom onyx.db.hook import update_hook__no_commit\nfrom onyx.db.models import Hook\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\nT = TypeVar(\"T\", bound=BaseModel)\n\n\n# ---------------------------------------------------------------------------\n# Private helpers\n# ---------------------------------------------------------------------------\n\n\nclass _HttpOutcome(BaseModel):\n    \"\"\"Structured result of an HTTP hook call, returned by _process_response.\"\"\"\n\n    is_success: bool\n    updated_is_reachable: (\n        bool | None\n    )  # True/False = write to DB, None = unchanged (skip write)\n    status_code: int | None\n    error_message: str | None\n    response_payload: dict[str, Any] | None\n\n\ndef _lookup_hook(\n    db_session: Session,\n    hook_point: HookPoint,\n) -> Hook | HookSkipped:\n    \"\"\"Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.\n\n    No HTTP call is made and no DB writes are performed for any HookSkipped path.\n    There is nothing to log and no reachability information to update.\n    \"\"\"\n    if MULTI_TENANT:\n        return HookSkipped()\n    hook = get_non_deleted_hook_by_hook_point(\n        db_session=db_session, hook_point=hook_point\n    )\n    if hook is None or not hook.is_active:\n        return HookSkipped()\n    if not hook.endpoint_url:\n        return HookSkipped()\n    return hook\n\n\ndef _process_response(\n    *,\n    response: httpx.Response | None,\n    exc: Exception | None,\n    timeout: float,\n) -> _HttpOutcome:\n    \"\"\"Process the result of an HTTP call and return a structured outcome.\n\n    Called after the client.post() try/except. If post() raised, exc is set and\n    response is None. Otherwise response is set and exc is None. Handles\n    raise_for_status(), JSON decoding, and the dict shape check.\n    \"\"\"\n    if exc is not None:\n        if isinstance(exc, httpx.NetworkError):\n            msg = f\"Hook network error (endpoint unreachable): {exc}\"\n            logger.warning(msg, exc_info=exc)\n            return _HttpOutcome(\n                is_success=False,\n                updated_is_reachable=False,\n                status_code=None,\n                error_message=msg,\n                response_payload=None,\n            )\n        if isinstance(exc, httpx.TimeoutException):\n            msg = f\"Hook timed out after {timeout}s: {exc}\"\n            logger.warning(msg, exc_info=exc)\n            return _HttpOutcome(\n                is_success=False,\n                updated_is_reachable=None,  # timeout doesn't indicate unreachability\n                status_code=None,\n                error_message=msg,\n                response_payload=None,\n            )\n        msg = f\"Hook call failed: {exc}\"\n        logger.exception(msg, exc_info=exc)\n        return _HttpOutcome(\n            is_success=False,\n            updated_is_reachable=None,  # unknown error — don't make assumptions\n            status_code=None,\n            error_message=msg,\n            response_payload=None,\n        )\n\n    if response is None:\n        raise ValueError(\n            \"exactly one of response or exc must be non-None; both are None\"\n        )\n    status_code = response.status_code\n\n    try:\n        response.raise_for_status()\n    except httpx.HTTPStatusError as e:\n        msg = f\"Hook returned HTTP {e.response.status_code}: {e.response.text}\"\n        logger.warning(msg, exc_info=e)\n        # 401/403 means the api_key has been revoked or is invalid — mark unreachable\n        # so the operator knows to update it. All other HTTP errors keep is_reachable\n        # as-is (server is up, the request just failed for application reasons).\n        auth_failed = e.response.status_code in (401, 403)\n        return _HttpOutcome(\n            is_success=False,\n            updated_is_reachable=False if auth_failed else None,\n            status_code=status_code,\n            error_message=msg,\n            response_payload=None,\n        )\n\n    try:\n        response_payload = response.json()\n    except (json.JSONDecodeError, httpx.DecodingError) as e:\n        msg = f\"Hook returned non-JSON response: {e}\"\n        logger.warning(msg, exc_info=e)\n        return _HttpOutcome(\n            is_success=False,\n            updated_is_reachable=None,  # server responded — reachability unchanged\n            status_code=status_code,\n            error_message=msg,\n            response_payload=None,\n        )\n\n    if not isinstance(response_payload, dict):\n        msg = f\"Hook returned non-dict JSON (got {type(response_payload).__name__})\"\n        logger.warning(msg)\n        return _HttpOutcome(\n            is_success=False,\n            updated_is_reachable=None,  # server responded — reachability unchanged\n            status_code=status_code,\n            error_message=msg,\n            response_payload=None,\n        )\n\n    return _HttpOutcome(\n        is_success=True,\n        updated_is_reachable=True,\n        status_code=status_code,\n        error_message=None,\n        response_payload=response_payload,\n    )\n\n\ndef _persist_result(\n    *,\n    hook_id: int,\n    outcome: _HttpOutcome,\n    duration_ms: int,\n) -> None:\n    \"\"\"Write the execution log on failure and optionally update is_reachable, each\n    in its own session so a failure in one does not affect the other.\"\"\"\n    # Only write the execution log on failure — success runs are not recorded.\n    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently\n    # deleted between the initial lookup and here).\n    if not outcome.is_success:\n        try:\n            with get_session_with_current_tenant() as log_session:\n                create_hook_execution_log__no_commit(\n                    db_session=log_session,\n                    hook_id=hook_id,\n                    is_success=False,\n                    error_message=outcome.error_message,\n                    status_code=outcome.status_code,\n                    duration_ms=duration_ms,\n                )\n                log_session.commit()\n        except Exception:\n            logger.exception(\n                f\"Failed to persist hook execution log for hook_id={hook_id}\"\n            )\n\n    # Update is_reachable separately — best-effort, non-critical.\n    # None means the value is unchanged (set by the caller to skip the no-op write).\n    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was\n    # concurrently deleted, so keep this isolated from the log write above.\n    if outcome.updated_is_reachable is not None:\n        try:\n            with get_session_with_current_tenant() as reachable_session:\n                update_hook__no_commit(\n                    db_session=reachable_session,\n                    hook_id=hook_id,\n                    is_reachable=outcome.updated_is_reachable,\n                )\n                reachable_session.commit()\n        except Exception:\n            logger.warning(f\"Failed to update is_reachable for hook_id={hook_id}\")\n\n\n# ---------------------------------------------------------------------------\n# Public API\n# ---------------------------------------------------------------------------\n\n\ndef _execute_hook_inner(\n    hook: Hook,\n    payload: dict[str, Any],\n    response_type: type[T],\n) -> T | HookSoftFailed:\n    \"\"\"Make the HTTP call, validate the response, and return a typed model.\n\n    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.\n    \"\"\"\n    timeout = hook.timeout_seconds\n    hook_id = hook.id\n    fail_strategy = hook.fail_strategy\n    endpoint_url = hook.endpoint_url\n    current_is_reachable: bool | None = hook.is_reachable\n\n    if not endpoint_url:\n        raise ValueError(\n            f\"hook_id={hook_id} is active but has no endpoint_url — \"\n            \"active hooks without an endpoint_url must be rejected by _lookup_hook\"\n        )\n\n    start = time.monotonic()\n    response: httpx.Response | None = None\n    exc: Exception | None = None\n    try:\n        api_key: str | None = (\n            hook.api_key.get_value(apply_mask=False) if hook.api_key else None\n        )\n        headers: dict[str, str] = {\"Content-Type\": \"application/json\"}\n        if api_key:\n            headers[\"Authorization\"] = f\"Bearer {api_key}\"\n        with httpx.Client(\n            timeout=timeout, follow_redirects=False\n        ) as client:  # SSRF guard: never follow redirects\n            response = client.post(endpoint_url, json=payload, headers=headers)\n    except Exception as e:\n        exc = e\n    duration_ms = int((time.monotonic() - start) * 1000)\n\n    outcome = _process_response(response=response, exc=exc, timeout=timeout)\n\n    # Validate the response payload against response_type.\n    # A validation failure downgrades the outcome to a failure so it is logged,\n    # is_reachable is left unchanged (server responded — just a bad payload),\n    # and fail_strategy is respected below.\n    validated_model: T | None = None\n    if outcome.is_success and outcome.response_payload is not None:\n        try:\n            validated_model = response_type.model_validate(outcome.response_payload)\n        except ValidationError as e:\n            msg = (\n                f\"Hook response failed validation against {response_type.__name__}: {e}\"\n            )\n            outcome = _HttpOutcome(\n                is_success=False,\n                updated_is_reachable=None,  # server responded — reachability unchanged\n                status_code=outcome.status_code,\n                error_message=msg,\n                response_payload=None,\n            )\n\n    # Skip the is_reachable write when the value would not change — avoids a\n    # no-op DB round-trip on every call when the hook is already in the expected state.\n    if outcome.updated_is_reachable == current_is_reachable:\n        outcome = outcome.model_copy(update={\"updated_is_reachable\": None})\n    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)\n\n    if not outcome.is_success:\n        if fail_strategy == HookFailStrategy.HARD:\n            raise OnyxError(\n                OnyxErrorCode.HOOK_EXECUTION_FAILED,\n                outcome.error_message or \"Hook execution failed.\",\n            )\n        logger.warning(\n            f\"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}\"\n        )\n        return HookSoftFailed()\n\n    if validated_model is None:\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            f\"validated_model is None for successful hook call (hook_id={hook_id})\",\n        )\n    return validated_model\n\n\ndef _execute_hook_impl(\n    *,\n    db_session: Session,\n    hook_point: HookPoint,\n    payload: dict[str, Any],\n    response_type: type[T],\n) -> T | HookSkipped | HookSoftFailed:\n    \"\"\"EE implementation — loaded by CE's execute_hook via fetch_versioned_implementation.\n\n    Returns HookSkipped if no active hook is configured, HookSoftFailed if the\n    hook failed with SOFT fail strategy, or a validated response model on success.\n    Raises OnyxError on HARD failure or if the hook is misconfigured.\n    \"\"\"\n    hook = _lookup_hook(db_session, hook_point)\n    if isinstance(hook, HookSkipped):\n        return hook\n\n    fail_strategy = hook.fail_strategy\n    hook_id = hook.id\n\n    try:\n        return _execute_hook_inner(hook, payload, response_type)\n    except Exception:\n        if fail_strategy == HookFailStrategy.SOFT:\n            logger.exception(\n                f\"Unexpected error in hook execution (soft fail) for hook_id={hook_id}\"\n            )\n            return HookSoftFailed()\n        raise\n"
  },
  {
    "path": "backend/ee/onyx/main.py",
    "content": "from collections.abc import AsyncGenerator\nfrom contextlib import asynccontextmanager\n\nfrom fastapi import FastAPI\nfrom httpx_oauth.clients.google import GoogleOAuth2\n\nfrom ee.onyx.server.analytics.api import router as analytics_router\nfrom ee.onyx.server.auth_check import check_ee_router_auth\nfrom ee.onyx.server.billing.api import router as billing_router\nfrom ee.onyx.server.documents.cc_pair import router as ee_document_cc_pair_router\nfrom ee.onyx.server.enterprise_settings.api import (\n    admin_router as enterprise_settings_admin_router,\n)\nfrom ee.onyx.server.enterprise_settings.api import (\n    basic_router as enterprise_settings_router,\n)\nfrom ee.onyx.server.evals.api import router as evals_router\nfrom ee.onyx.server.features.hooks.api import router as hook_router\nfrom ee.onyx.server.license.api import router as license_router\nfrom ee.onyx.server.manage.standard_answer import router as standard_answer_router\nfrom ee.onyx.server.middleware.license_enforcement import (\n    add_license_enforcement_middleware,\n)\nfrom ee.onyx.server.middleware.tenant_tracking import (\n    add_api_server_tenant_id_middleware,\n)\nfrom ee.onyx.server.oauth.api import router as ee_oauth_router\nfrom ee.onyx.server.query_and_chat.query_backend import (\n    basic_router as ee_query_router,\n)\nfrom ee.onyx.server.query_and_chat.search_backend import router as search_router\nfrom ee.onyx.server.query_history.api import router as query_history_router\nfrom ee.onyx.server.reporting.usage_export_api import router as usage_export_router\nfrom ee.onyx.server.scim.api import register_scim_exception_handlers\nfrom ee.onyx.server.scim.api import scim_router\nfrom ee.onyx.server.seeding import seed_db\nfrom ee.onyx.server.tenants.api import router as tenants_router\nfrom ee.onyx.server.token_rate_limits.api import (\n    router as token_rate_limit_settings_router,\n)\nfrom ee.onyx.server.user_group.api import router as user_group_router\nfrom ee.onyx.utils.encryption import test_encryption\nfrom onyx.auth.users import auth_backend\nfrom onyx.auth.users import create_onyx_oauth_router\nfrom onyx.auth.users import fastapi_users\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import OAUTH_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_CLIENT_SECRET\nfrom onyx.configs.app_configs import USER_AUTH_SECRET\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import AuthType\nfrom onyx.main import get_application as get_application_base\nfrom onyx.main import include_auth_router_with_prefix\nfrom onyx.main import include_router_with_global_prefix_prepended\nfrom onyx.main import lifespan as lifespan_base\nfrom onyx.main import use_route_function_names_as_operation_ids\nfrom onyx.server.query_and_chat.query_backend import (\n    basic_router as query_router,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import global_version\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:\n    \"\"\"Small wrapper around the lifespan of the MIT application.\n    Basically just calls the base lifespan, and then adds EE-only\n    steps after.\"\"\"\n\n    async with lifespan_base(app):\n        # seed the Onyx environment with LLMs, Assistants, etc. based on an optional\n        # environment variable. Used to automate deployment for multiple environments.\n        seed_db()\n\n        yield\n\n\ndef get_application() -> FastAPI:\n    # Anything that happens at import time is not guaranteed to be running ee-version\n    # Anything after the server startup will be running ee version\n    global_version.set_ee()\n\n    test_encryption()\n\n    application = get_application_base(lifespan_override=lifespan)\n\n    if MULTI_TENANT:\n        add_api_server_tenant_id_middleware(application, logger)\n    else:\n        # License enforcement middleware for self-hosted deployments only\n        # Checks LICENSE_ENFORCEMENT_ENABLED at runtime (can be toggled without restart)\n        # MT deployments use control plane gating via is_tenant_gated() instead\n        add_license_enforcement_middleware(application, logger)\n\n    if AUTH_TYPE == AuthType.CLOUD:\n        # For Google OAuth, refresh tokens are requested by:\n        # 1. Adding the right scopes\n        # 2. Properly configuring OAuth in Google Cloud Console to allow offline access\n        oauth_client = GoogleOAuth2(\n            OAUTH_CLIENT_ID,\n            OAUTH_CLIENT_SECRET,\n            # Use standard scopes that include profile and email\n            scopes=[\"openid\", \"email\", \"profile\"],\n        )\n        include_auth_router_with_prefix(\n            application,\n            create_onyx_oauth_router(\n                oauth_client,\n                auth_backend,\n                USER_AUTH_SECRET,\n                associate_by_email=True,\n                is_verified_by_default=True,\n                # Points the user back to the login page\n                redirect_url=f\"{WEB_DOMAIN}/auth/oauth/callback\",\n            ),\n            prefix=\"/auth/oauth\",\n        )\n\n        # Need basic auth router for `logout` endpoint\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_logout_router(auth_backend),\n            prefix=\"/auth\",\n        )\n\n    # RBAC / group access control\n    include_router_with_global_prefix_prepended(application, user_group_router)\n    # Analytics endpoints\n    include_router_with_global_prefix_prepended(application, analytics_router)\n    include_router_with_global_prefix_prepended(application, query_history_router)\n    # EE only backend APIs\n    include_router_with_global_prefix_prepended(application, query_router)\n    include_router_with_global_prefix_prepended(application, ee_query_router)\n    include_router_with_global_prefix_prepended(application, search_router)\n    include_router_with_global_prefix_prepended(application, standard_answer_router)\n    include_router_with_global_prefix_prepended(application, ee_oauth_router)\n    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)\n    include_router_with_global_prefix_prepended(application, evals_router)\n    include_router_with_global_prefix_prepended(application, hook_router)\n\n    # Enterprise-only global settings\n    include_router_with_global_prefix_prepended(\n        application, enterprise_settings_admin_router\n    )\n    # Token rate limit settings\n    include_router_with_global_prefix_prepended(\n        application, token_rate_limit_settings_router\n    )\n    include_router_with_global_prefix_prepended(application, enterprise_settings_router)\n    include_router_with_global_prefix_prepended(application, usage_export_router)\n    # License management\n    include_router_with_global_prefix_prepended(application, license_router)\n\n    # Unified billing API - always registered in EE.\n    # Each endpoint is protected by the `current_admin_user` dependency (admin auth).\n    include_router_with_global_prefix_prepended(application, billing_router)\n\n    if MULTI_TENANT:\n        # Tenant management\n        include_router_with_global_prefix_prepended(application, tenants_router)\n\n    # SCIM 2.0 — protocol endpoints (unauthenticated by Onyx session auth;\n    # they use their own SCIM bearer token auth).\n    # Not behind APP_API_PREFIX because IdPs expect /scim/v2/... directly.\n    application.include_router(scim_router)\n    register_scim_exception_handlers(application)\n\n    # Ensure all routes have auth enabled or are explicitly marked as public\n    check_ee_router_auth(application)\n\n    # for debugging discovered routes\n    # for route in application.router.routes:\n    #     print(f\"Path: {route.path}, Methods: {route.methods}\")\n\n    use_route_function_names_as_operation_ids(application)\n\n    return application\n"
  },
  {
    "path": "backend/ee/onyx/onyxbot/slack/handlers/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py",
    "content": "from slack_sdk import WebClient\nfrom slack_sdk.models.blocks import ActionsBlock\nfrom slack_sdk.models.blocks import Block\nfrom slack_sdk.models.blocks import ButtonElement\nfrom slack_sdk.models.blocks import SectionBlock\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.standard_answer import fetch_standard_answer_categories_by_names\nfrom ee.onyx.db.standard_answer import find_matching_standard_answers\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.chat import create_new_chat_message\nfrom onyx.db.chat import get_chat_messages_by_sessions\nfrom onyx.db.chat import get_chat_sessions_by_slack_thread_id\nfrom onyx.db.chat import get_or_create_root_message\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.models import StandardAnswer as StandardAnswerModel\nfrom onyx.onyxbot.slack.blocks import get_restate_blocks\nfrom onyx.onyxbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.handlers.utils import send_team_member_message\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\nfrom onyx.onyxbot.slack.utils import update_emote_react\nfrom onyx.server.manage.models import StandardAnswer as PydanticStandardAnswer\nfrom onyx.utils.logger import OnyxLoggingAdapter\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef build_standard_answer_blocks(\n    answer_message: str,\n) -> list[Block]:\n    generate_button_block = ButtonElement(\n        action_id=GENERATE_ANSWER_BUTTON_ACTION_ID,\n        text=\"Generate Full Answer\",\n    )\n    answer_block = SectionBlock(text=answer_message)\n    return [\n        answer_block,\n        ActionsBlock(\n            elements=[generate_button_block],\n        ),\n    ]\n\n\ndef oneoff_standard_answers(\n    message: str,\n    slack_bot_categories: list[str],\n    db_session: Session,\n) -> list[PydanticStandardAnswer]:\n    \"\"\"\n    Respond to the user message if it matches any configured standard answers.\n\n    Returns a list of matching StandardAnswers if found, otherwise None.\n    \"\"\"\n    configured_standard_answers = {\n        standard_answer\n        for category in fetch_standard_answer_categories_by_names(\n            slack_bot_categories, db_session=db_session\n        )\n        for standard_answer in category.standard_answers\n    }\n\n    matching_standard_answers = find_matching_standard_answers(\n        query=message,\n        id_in=[answer.id for answer in configured_standard_answers],\n        db_session=db_session,\n    )\n\n    server_standard_answers = [\n        PydanticStandardAnswer.from_model(standard_answer_model)\n        for (standard_answer_model, _) in matching_standard_answers\n    ]\n    return server_standard_answers\n\n\ndef _handle_standard_answers(\n    message_info: SlackMessageInfo,\n    receiver_ids: list[str] | None,\n    slack_channel_config: SlackChannelConfig,\n    logger: OnyxLoggingAdapter,\n    client: WebClient,\n    db_session: Session,\n) -> bool:\n    \"\"\"\n    Potentially respond to the user message depending on whether the user's message matches\n    any of the configured standard answers and also whether those answers have already been\n    provided in the current thread.\n\n    Returns True if standard answers are found to match the user's message and therefore,\n    we still need to respond to the users.\n    \"\"\"\n\n    slack_thread_id = message_info.thread_to_respond\n    configured_standard_answer_categories = (\n        slack_channel_config.standard_answer_categories\n    )\n    configured_standard_answers = set(\n        [\n            standard_answer\n            for standard_answer_category in configured_standard_answer_categories\n            for standard_answer in standard_answer_category.standard_answers\n        ]\n    )\n    query_msg = message_info.thread_messages[-1]\n\n    if slack_thread_id is None:\n        used_standard_answer_ids = set([])\n    else:\n        chat_sessions = get_chat_sessions_by_slack_thread_id(\n            slack_thread_id=slack_thread_id,\n            user_id=None,\n            db_session=db_session,\n        )\n        chat_messages = get_chat_messages_by_sessions(\n            chat_session_ids=[chat_session.id for chat_session in chat_sessions],\n            user_id=None,\n            db_session=db_session,\n            skip_permission_check=True,\n        )\n        used_standard_answer_ids = set(\n            [\n                standard_answer.id\n                for chat_message in chat_messages\n                for standard_answer in chat_message.standard_answers\n            ]\n        )\n\n    usable_standard_answers = configured_standard_answers.difference(\n        used_standard_answer_ids\n    )\n\n    matching_standard_answers: list[tuple[StandardAnswerModel, str]] = []\n    if usable_standard_answers:\n        matching_standard_answers = find_matching_standard_answers(\n            query=query_msg.message,\n            id_in=[standard_answer.id for standard_answer in usable_standard_answers],\n            db_session=db_session,\n        )\n\n    if matching_standard_answers:\n        chat_session = create_chat_session(\n            db_session=db_session,\n            description=\"\",\n            user_id=None,\n            persona_id=(\n                slack_channel_config.persona.id if slack_channel_config.persona else 0\n            ),\n            onyxbot_flow=True,\n            slack_thread_id=slack_thread_id,\n        )\n\n        root_message = get_or_create_root_message(\n            chat_session_id=chat_session.id, db_session=db_session\n        )\n\n        new_user_message = create_new_chat_message(\n            chat_session_id=chat_session.id,\n            parent_message=root_message,\n            message=query_msg.message,\n            token_count=0,\n            message_type=MessageType.USER,\n            db_session=db_session,\n            commit=True,\n        )\n\n        formatted_answers = []\n        for standard_answer, match_str in matching_standard_answers:\n            since_you_mentioned_pretext = (\n                f'Since your question contains \"_{match_str}_\"'\n            )\n            block_quotified_answer = \">\" + standard_answer.answer.replace(\"\\n\", \"\\n> \")\n            formatted_answer = f\"{since_you_mentioned_pretext}, I thought this might be useful: \\n\\n{block_quotified_answer}\"\n            formatted_answers.append(formatted_answer)\n        answer_message = \"\\n\\n\".join(formatted_answers)\n\n        chat_message = create_new_chat_message(\n            chat_session_id=chat_session.id,\n            parent_message=new_user_message,\n            message=answer_message,\n            token_count=0,\n            message_type=MessageType.ASSISTANT,\n            error=None,\n            db_session=db_session,\n            commit=False,\n        )\n        # attach the standard answers to the chat message\n        chat_message.standard_answers = [\n            standard_answer for standard_answer, _ in matching_standard_answers\n        ]\n        db_session.commit()\n\n        update_emote_react(\n            emoji=ONYX_BOT_REACT_EMOJI,\n            channel=message_info.channel_to_respond,\n            message_ts=message_info.msg_to_respond,\n            remove=True,\n            client=client,\n        )\n\n        restate_question_blocks = get_restate_blocks(\n            msg=query_msg.message,\n            is_slash_command=message_info.is_slash_command,\n        )\n\n        answer_blocks = build_standard_answer_blocks(\n            answer_message=answer_message,\n        )\n\n        all_blocks = restate_question_blocks + answer_blocks\n\n        try:\n            respond_in_thread_or_channel(\n                client=client,\n                channel=message_info.channel_to_respond,\n                receiver_ids=receiver_ids,\n                text=\"Hello! Onyx has some results for you!\",\n                blocks=all_blocks,\n                thread_ts=message_info.msg_to_respond,\n                unfurl=False,\n            )\n\n            if receiver_ids and slack_thread_id:\n                send_team_member_message(\n                    client=client,\n                    channel=message_info.channel_to_respond,\n                    thread_ts=slack_thread_id,\n                    receiver_ids=receiver_ids,\n                )\n\n            return True\n        except Exception as e:\n            logger.exception(f\"Unable to send standard answer message: {e}\")\n            return False\n    else:\n        return False\n"
  },
  {
    "path": "backend/ee/onyx/prompts/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/prompts/query_expansion.py",
    "content": "# Single message is likely most reliable and generally better for this task\n# No final reminders at the end since the user query is expected to be short\n# If it is not short, it should go into the chat flow so we do not need to account for this.\nKEYWORD_EXPANSION_PROMPT = \"\"\"\nGenerate a set of keyword-only queries to help find relevant documents for the provided query. \\\nThese queries will be passed to a bm25-based keyword search engine. \\\nProvide a single query per line (where each query consists of one or more keywords). \\\nThe queries must be purely keywords and not contain any filler natural language. \\\nThe each query should have as few keywords as necessary to represent the user's search intent. \\\nIf there are no useful expansions, simply return the original query with no additional keyword queries. \\\nCRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.\n\nThe user query is:\n{user_query}\n\"\"\".strip()\n\n\nQUERY_TYPE_PROMPT = \"\"\"\nDetermine if the provided query is better suited for a keyword search or a semantic search.\nRespond with \"keyword\" or \"semantic\" literally and nothing else.\nDo not provide any additional text or reasoning to your response.\n\nCRITICAL: It must only be 1 single word - EITHER \"keyword\" or \"semantic\".\n\nThe user query is:\n{user_query}\n\"\"\".strip()\n"
  },
  {
    "path": "backend/ee/onyx/prompts/search_flow_classification.py",
    "content": "# ruff: noqa: E501, W605 start\nSEARCH_CLASS = \"search\"\nCHAT_CLASS = \"chat\"\n\n# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many\n# use cases.\nSEARCH_CHAT_PROMPT = f\"\"\"\nDetermine if the following query is better suited for a search UI or a chat UI. Respond with \"{SEARCH_CLASS}\" or \"{CHAT_CLASS}\" literally and nothing else. \\\nDo not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER \"{SEARCH_CLASS}\" or \"{CHAT_CLASS}\".\n\n# Classification Guidelines:\n## {SEARCH_CLASS}\n- If the query consists entirely of keywords or query doesn't require any answer from the AI\n- If the query is a short statement that seems like a search query rather than a question\n- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document\n\n### Examples of {SEARCH_CLASS} queries:\n- Find me the document that goes over the onboarding process for a new hire\n- Pull requests since last week\n- Sales Runbook AMEA Region\n- Procurement process\n- Retrieve the PRD for project X\n\n## {CHAT_CLASS}\n- If the query is asking a question that requires an answer rather than a document\n- If the query is asking for a solution, suggestion, or general help\n- If the query is seeking information that is on the web and likely not in a company internal document\n- If the query should be answered without any context from additional documents or searches\n\n### Examples of {CHAT_CLASS} queries:\n- What led us to win the deal with company X? (seeking answer)\n- Google Drive not sync-ing files to my computer (seeking solution)\n- Review my email: <whatever the email is> (general help)\n- Write me a script to... (general help)\n- Cheap flights Europe to Tokyo (information likely found on the web, not internal)\n\n# User Query:\n{{user_query}}\n\nREMEMBER TO ONLY RESPOND WITH \"{SEARCH_CLASS}\" OR \"{CHAT_CLASS}\" AND NOTHING ELSE.\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/ee/onyx/search/process_search_query.py",
    "content": "from collections.abc import Generator\n\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.search import create_search_query\nfrom ee.onyx.secondary_llm_flows.query_expansion import expand_keywords\nfrom ee.onyx.server.query_and_chat.models import SearchDocWithContent\nfrom ee.onyx.server.query_and_chat.models import SearchFullResponse\nfrom ee.onyx.server.query_and_chat.models import SendSearchQueryRequest\nfrom ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket\nfrom ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket\nfrom ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket\nfrom ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import ChunkSearchRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.pipeline import merge_individual_chunks\nfrom onyx.context.search.pipeline import search_pipeline\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.secondary_llm_flows.document_filter import select_sections_for_expansion\nfrom onyx.tools.tool_implementations.search.search_utils import (\n    weighted_reciprocal_rank_fusion,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\n\n# This is just a heuristic that also happens to work well for the UI/UX\n# Users would not find it useful to see a huge list of suggested docs\n# but more than 1 is also likely good as many questions may target more than 1 doc.\nTARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3\n\n\ndef _run_single_search(\n    query: str,\n    filters: BaseFilters | None,\n    document_index: DocumentIndex,\n    user: User,\n    db_session: Session,\n    num_hits: int | None = None,\n    hybrid_alpha: float | None = None,\n) -> list[InferenceChunk]:\n    \"\"\"Execute a single search query and return chunks.\"\"\"\n    chunk_search_request = ChunkSearchRequest(\n        query=query,\n        user_selected_filters=filters,\n        limit=num_hits,\n        hybrid_alpha=hybrid_alpha,\n    )\n\n    return search_pipeline(\n        chunk_search_request=chunk_search_request,\n        document_index=document_index,\n        user=user,\n        persona_search_info=None,\n        db_session=db_session,\n    )\n\n\ndef stream_search_query(\n    request: SendSearchQueryRequest,\n    user: User,\n    db_session: Session,\n) -> Generator[\n    SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,\n    None,\n    None,\n]:\n    \"\"\"\n    Core search function that yields streaming packets.\n    Used by both streaming and non-streaming endpoints.\n    \"\"\"\n    # Get document index.\n    search_settings = get_current_search_settings(db_session)\n    # This flow is for search so we do not get all indices.\n    document_index = get_default_document_index(search_settings, None, db_session)\n\n    # Determine queries to execute\n    original_query = request.search_query\n    keyword_expansions: list[str] = []\n\n    if request.run_query_expansion:\n        try:\n            llm = get_default_llm()\n            keyword_expansions = expand_keywords(\n                user_query=original_query,\n                llm=llm,\n            )\n            if keyword_expansions:\n                logger.debug(\n                    f\"Query expansion generated {len(keyword_expansions)} keyword queries\"\n                )\n        except Exception as e:\n            logger.warning(f\"Query expansion failed: {e}; using original query only.\")\n            keyword_expansions = []\n\n    # Build list of all executed queries for tracking\n    all_executed_queries = [original_query] + keyword_expansions\n\n    if not user.is_anonymous:\n        create_search_query(\n            db_session=db_session,\n            user_id=user.id,\n            query=request.search_query,\n            query_expansions=keyword_expansions if keyword_expansions else None,\n        )\n\n    # Execute search(es)\n    if not keyword_expansions:\n        # Single query (original only) - no threading needed\n        chunks = _run_single_search(\n            query=original_query,\n            filters=request.filters,\n            document_index=document_index,\n            user=user,\n            db_session=db_session,\n            num_hits=request.num_hits,\n            hybrid_alpha=request.hybrid_alpha,\n        )\n    else:\n        # Multiple queries - run in parallel and merge with RRF\n        # First query is the original (semantic), rest are keyword expansions\n        search_functions = [\n            (\n                _run_single_search,\n                (\n                    query,\n                    request.filters,\n                    document_index,\n                    user,\n                    db_session,\n                    request.num_hits,\n                    request.hybrid_alpha,\n                ),\n            )\n            for query in all_executed_queries\n        ]\n\n        # Run all searches in parallel\n        all_search_results: list[list[InferenceChunk]] = (\n            run_functions_tuples_in_parallel(\n                search_functions,\n                allow_failures=True,\n            )\n        )\n\n        # Separate original query results from keyword expansion results\n        # Note that in rare cases, the original query may have failed and so we may be\n        # just overweighting one set of keyword results, should be not a big deal though.\n        original_result = all_search_results[0] if all_search_results else []\n        keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []\n\n        # Build valid results and weights\n        # Original query (semantic): weight 2.0\n        # Keyword expansions: weight 1.0 each\n        valid_results: list[list[InferenceChunk]] = []\n        weights: list[float] = []\n\n        if original_result:\n            valid_results.append(original_result)\n            weights.append(2.0)\n\n        for keyword_result in keyword_results:\n            if keyword_result:\n                valid_results.append(keyword_result)\n                weights.append(1.0)\n\n        if not valid_results:\n            logger.warning(\"All parallel searches returned empty results\")\n            chunks = []\n        else:\n            chunks = weighted_reciprocal_rank_fusion(\n                ranked_results=valid_results,\n                weights=weights,\n                id_extractor=lambda chunk: f\"{chunk.document_id}_{chunk.chunk_id}\",\n            )\n\n    # Merge chunks into sections\n    sections = merge_individual_chunks(chunks)\n\n    # Truncate to the requested number of hits\n    sections = sections[: request.num_hits]\n\n    # Apply LLM document selection if requested\n    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection\n    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it\n    # llm_selected_doc_ids will be:\n    #   - None if LLM selection was not requested or failed\n    #   - Empty list if LLM selection ran but selected nothing\n    #   - List of doc IDs if LLM selection succeeded\n    run_llm_selection = (\n        request.num_docs_fed_to_llm_selection is not None\n        and request.num_docs_fed_to_llm_selection >= 1\n    )\n    llm_selected_doc_ids: list[str] | None = None\n    llm_selection_failed = False\n    if run_llm_selection and sections:\n        try:\n            llm = get_default_llm()\n            sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]\n            selected_sections, _ = select_sections_for_expansion(\n                sections=sections_to_evaluate,\n                user_query=original_query,\n                llm=llm,\n                max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,\n                try_to_fill_to_max=True,\n            )\n            # Extract unique document IDs from selected sections (may be empty)\n            llm_selected_doc_ids = list(\n                dict.fromkeys(\n                    section.center_chunk.document_id for section in selected_sections\n                )\n            )\n            logger.debug(\n                f\"LLM document selection evaluated {len(sections_to_evaluate)} sections, \"\n                f\"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}\"\n            )\n        except Exception as e:\n            # Allowing a blanket exception here as this step is not critical and the rest of the results are still valid\n            logger.warning(f\"LLM document selection failed: {e}\")\n            llm_selection_failed = True\n    elif run_llm_selection and not sections:\n        # LLM selection requested but no sections to evaluate\n        llm_selected_doc_ids = []\n\n    # Convert to SearchDocWithContent list, optionally including content\n    search_docs = SearchDocWithContent.from_inference_sections(\n        sections,\n        include_content=request.include_content,\n        is_internet=False,\n    )\n\n    # Yield queries packet\n    yield SearchQueriesPacket(all_executed_queries=all_executed_queries)\n\n    # Yield docs packet\n    yield SearchDocsPacket(search_docs=search_docs)\n\n    # Yield LLM selected docs packet if LLM selection was requested\n    # - llm_selected_doc_ids is None if selection failed\n    # - llm_selected_doc_ids is empty list if no docs were selected\n    # - llm_selected_doc_ids is list of IDs if docs were selected\n    if run_llm_selection:\n        yield LLMSelectedDocsPacket(\n            llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids\n        )\n\n\ndef gather_search_stream(\n    packets: Generator[\n        SearchQueriesPacket\n        | SearchDocsPacket\n        | LLMSelectedDocsPacket\n        | SearchErrorPacket,\n        None,\n        None,\n    ],\n) -> SearchFullResponse:\n    \"\"\"\n    Aggregate all streaming packets into SearchFullResponse.\n    \"\"\"\n    all_executed_queries: list[str] = []\n    search_docs: list[SearchDocWithContent] = []\n    llm_selected_doc_ids: list[str] | None = None\n    error: str | None = None\n\n    for packet in packets:\n        if isinstance(packet, SearchQueriesPacket):\n            all_executed_queries = packet.all_executed_queries\n        elif isinstance(packet, SearchDocsPacket):\n            search_docs = packet.search_docs\n        elif isinstance(packet, LLMSelectedDocsPacket):\n            llm_selected_doc_ids = packet.llm_selected_doc_ids\n        elif isinstance(packet, SearchErrorPacket):\n            error = packet.error\n\n    return SearchFullResponse(\n        all_executed_queries=all_executed_queries,\n        search_docs=search_docs,\n        doc_selection_reasoning=None,\n        llm_selected_doc_ids=llm_selected_doc_ids,\n        error=error,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/secondary_llm_flows/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/secondary_llm_flows/query_expansion.py",
    "content": "import re\n\nfrom ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.\nCLEANUP_PATTERN = re.compile(r'[\\[\\]\"\\'`]')\n\n\ndef _clean_keyword_line(line: str) -> str:\n    \"\"\"Clean a keyword line by removing common LLM artifacts.\n\n    Removes brackets, quotes, and other characters that LLMs may accidentally\n    include in their output.\n    \"\"\"\n    # Remove common artifacts\n    cleaned = CLEANUP_PATTERN.sub(\"\", line)\n    # Remove leading list markers like \"1.\", \"2.\", \"-\", \"*\"\n    cleaned = re.sub(r\"^\\s*(?:\\d+[\\.\\)]\\s*|[-*]\\s*)\", \"\", cleaned)\n    return cleaned.strip()\n\n\ndef expand_keywords(\n    user_query: str,\n    llm: LLM,\n) -> list[str]:\n    \"\"\"Expand a user query into multiple keyword-only queries for BM25 search.\n\n    Uses an LLM to generate keyword-based search queries that capture different\n    aspects of the user's search intent. Returns only the expanded queries,\n    not the original query.\n\n    Args:\n        user_query: The original search query from the user\n        llm: Language model to use for keyword expansion\n\n    Returns:\n        List of expanded keyword queries (excluding the original query).\n        Returns empty list if expansion fails or produces no useful expansions.\n    \"\"\"\n    messages: LanguageModelInput = [\n        UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))\n    ]\n\n    try:\n        response = llm.invoke(\n            prompt=messages,\n            reasoning_effort=ReasoningEffort.OFF,\n            # Limit output - we only expect a few short keyword queries\n            max_tokens=150,\n        )\n\n        content = llm_response_to_string(response).strip()\n\n        if not content:\n            logger.warning(\"Keyword expansion returned empty response.\")\n            return []\n\n        # Parse response - each line is a separate keyword query\n        # Clean each line to remove LLM artifacts and drop empty lines\n        parsed_queries = []\n        for line in content.strip().split(\"\\n\"):\n            cleaned = _clean_keyword_line(line)\n            if cleaned:\n                parsed_queries.append(cleaned)\n\n        if not parsed_queries:\n            logger.warning(\"Keyword expansion parsing returned no queries.\")\n            return []\n\n        # Filter out duplicates and queries that match the original\n        expanded_queries: list[str] = []\n        seen_lower: set[str] = {user_query.lower()}\n        for query in parsed_queries:\n            query_lower = query.lower()\n            if query_lower not in seen_lower:\n                seen_lower.add(query_lower)\n                expanded_queries.append(query)\n\n        logger.debug(f\"Keyword expansion generated {len(expanded_queries)} queries\")\n        return expanded_queries\n\n    except Exception as e:\n        logger.warning(f\"Keyword expansion failed: {e}\")\n        return []\n"
  },
  {
    "path": "backend/ee/onyx/secondary_llm_flows/search_flow_classification.py",
    "content": "from ee.onyx.prompts.search_flow_classification import CHAT_CLASS\nfrom ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT\nfrom ee.onyx.prompts.search_flow_classification import SEARCH_CLASS\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.timing import log_function_time\n\nlogger = setup_logger()\n\n\n@log_function_time(print_only=True)\ndef classify_is_search_flow(\n    query: str,\n    llm: LLM,\n) -> bool:\n    messages: LanguageModelInput = [\n        UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))\n    ]\n    response = llm.invoke(\n        prompt=messages,\n        reasoning_effort=ReasoningEffort.OFF,\n        # Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout\n        timeout_override=2,\n        # Well more than necessary but just to ensure completion and in case it succeeds with classifying but\n        # ends up rambling\n        max_tokens=20,\n    )\n\n    content = llm_response_to_string(response).strip().lower()\n    if not content:\n        logger.warning(\n            \"Search flow classification returned empty response; defaulting to chat flow.\"\n        )\n        return False\n\n    # Prefer chat if both appear.\n    if CHAT_CLASS in content:\n        return False\n    if SEARCH_CLASS in content:\n        return True\n\n    logger.warning(\n        \"Search flow classification returned unexpected response; defaulting to chat flow. Response=%r\",\n        content,\n    )\n    return False\n"
  },
  {
    "path": "backend/ee/onyx/server/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/analytics/api.py",
    "content": "import datetime\nfrom collections import defaultdict\nfrom typing import List\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.analytics import fetch_assistant_message_analytics\nfrom ee.onyx.db.analytics import fetch_assistant_unique_users\nfrom ee.onyx.db.analytics import fetch_assistant_unique_users_total\nfrom ee.onyx.db.analytics import fetch_onyxbot_analytics\nfrom ee.onyx.db.analytics import fetch_per_user_query_analytics\nfrom ee.onyx.db.analytics import fetch_persona_message_analytics\nfrom ee.onyx.db.analytics import fetch_persona_unique_users\nfrom ee.onyx.db.analytics import fetch_query_analytics\nfrom ee.onyx.db.analytics import user_can_view_assistant_stats\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\n\nrouter = APIRouter(prefix=\"/analytics\", tags=PUBLIC_API_TAGS)\n\n\n_DEFAULT_LOOKBACK_DAYS = 30\n\n\nclass QueryAnalyticsResponse(BaseModel):\n    total_queries: int\n    total_likes: int\n    total_dislikes: int\n    date: datetime.date\n\n\n@router.get(\"/admin/query\")\ndef get_query_analytics(\n    start: datetime.datetime | None = None,\n    end: datetime.datetime | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[QueryAnalyticsResponse]:\n    daily_query_usage_info = fetch_query_analytics(\n        start=start\n        or (\n            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)\n        ),  # default is 30d lookback\n        end=end or datetime.datetime.utcnow(),\n        db_session=db_session,\n    )\n    return [\n        QueryAnalyticsResponse(\n            total_queries=total_queries,\n            total_likes=total_likes,\n            total_dislikes=total_dislikes,\n            date=date,\n        )\n        for total_queries, total_likes, total_dislikes, date in daily_query_usage_info\n    ]\n\n\nclass UserAnalyticsResponse(BaseModel):\n    total_active_users: int\n    date: datetime.date\n\n\n@router.get(\"/admin/user\")\ndef get_user_analytics(\n    start: datetime.datetime | None = None,\n    end: datetime.datetime | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserAnalyticsResponse]:\n    daily_query_usage_info_per_user = fetch_per_user_query_analytics(\n        start=start\n        or (\n            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)\n        ),  # default is 30d lookback\n        end=end or datetime.datetime.utcnow(),\n        db_session=db_session,\n    )\n\n    user_analytics: dict[datetime.date, int] = defaultdict(int)\n    for __, ___, ____, date, _____ in daily_query_usage_info_per_user:\n        user_analytics[date] += 1\n    return [\n        UserAnalyticsResponse(\n            total_active_users=cnt,\n            date=date,\n        )\n        for date, cnt in user_analytics.items()\n    ]\n\n\nclass OnyxbotAnalyticsResponse(BaseModel):\n    total_queries: int\n    auto_resolved: int\n    date: datetime.date\n\n\n@router.get(\"/admin/onyxbot\")\ndef get_onyxbot_analytics(\n    start: datetime.datetime | None = None,\n    end: datetime.datetime | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[OnyxbotAnalyticsResponse]:\n    daily_onyxbot_info = fetch_onyxbot_analytics(\n        start=start\n        or (\n            datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)\n        ),  # default is 30d lookback\n        end=end or datetime.datetime.utcnow(),\n        db_session=db_session,\n    )\n\n    resolution_results = [\n        OnyxbotAnalyticsResponse(\n            total_queries=total_queries,\n            # If it hits negatives, something has gone wrong...\n            auto_resolved=max(0, total_queries - total_negatives),\n            date=date,\n        )\n        for total_queries, total_negatives, date in daily_onyxbot_info\n    ]\n\n    return resolution_results\n\n\nclass PersonaMessageAnalyticsResponse(BaseModel):\n    total_messages: int\n    date: datetime.date\n    persona_id: int\n\n\n@router.get(\"/admin/persona/messages\")\ndef get_persona_messages(\n    persona_id: int,\n    start: datetime.datetime | None = None,\n    end: datetime.datetime | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[PersonaMessageAnalyticsResponse]:\n    \"\"\"Fetch daily message counts for a single persona within the given time range.\"\"\"\n    start = start or (\n        datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)\n    )\n    end = end or datetime.datetime.utcnow()\n\n    persona_message_counts = []\n    for count, date in fetch_persona_message_analytics(\n        db_session=db_session,\n        persona_id=persona_id,\n        start=start,\n        end=end,\n    ):\n        persona_message_counts.append(\n            PersonaMessageAnalyticsResponse(\n                total_messages=count,\n                date=date,\n                persona_id=persona_id,\n            )\n        )\n\n    return persona_message_counts\n\n\nclass PersonaUniqueUsersResponse(BaseModel):\n    unique_users: int\n    date: datetime.date\n    persona_id: int\n\n\n@router.get(\"/admin/persona/unique-users\")\ndef get_persona_unique_users(\n    persona_id: int,\n    start: datetime.datetime,\n    end: datetime.datetime,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[PersonaUniqueUsersResponse]:\n    \"\"\"Get unique users per day for a single persona.\"\"\"\n    unique_user_counts = []\n    daily_counts = fetch_persona_unique_users(\n        db_session=db_session,\n        persona_id=persona_id,\n        start=start,\n        end=end,\n    )\n    for count, date in daily_counts:\n        unique_user_counts.append(\n            PersonaUniqueUsersResponse(\n                unique_users=count,\n                date=date,\n                persona_id=persona_id,\n            )\n        )\n    return unique_user_counts\n\n\nclass AssistantDailyUsageResponse(BaseModel):\n    date: datetime.date\n    total_messages: int\n    total_unique_users: int\n\n\nclass AssistantStatsResponse(BaseModel):\n    daily_stats: List[AssistantDailyUsageResponse]\n    total_messages: int\n    total_unique_users: int\n\n\n@router.get(\"/assistant/{assistant_id}/stats\")\ndef get_assistant_stats(\n    assistant_id: int,\n    start: datetime.datetime | None = None,\n    end: datetime.datetime | None = None,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> AssistantStatsResponse:\n    \"\"\"\n    Returns daily message and unique user counts for a user's assistant,\n    along with the overall total messages and total distinct users.\n    \"\"\"\n    start = start or (\n        datetime.datetime.utcnow() - datetime.timedelta(days=_DEFAULT_LOOKBACK_DAYS)\n    )\n    end = end or datetime.datetime.utcnow()\n\n    if not user_can_view_assistant_stats(db_session, user, assistant_id):\n        raise HTTPException(\n            status_code=403, detail=\"Not allowed to access this assistant's stats.\"\n        )\n\n    # Pull daily usage from the DB calls\n    messages_data = fetch_assistant_message_analytics(\n        db_session, assistant_id, start, end\n    )\n    unique_users_data = fetch_assistant_unique_users(\n        db_session, assistant_id, start, end\n    )\n\n    # Map each day => (messages, unique_users).\n    daily_messages_map = {date: count for count, date in messages_data}\n    daily_unique_users_map = {date: count for count, date in unique_users_data}\n    all_dates = set(daily_messages_map.keys()) | set(daily_unique_users_map.keys())\n\n    # Merge both sets of metrics by date\n    daily_results: list[AssistantDailyUsageResponse] = []\n    for date in sorted(all_dates):\n        daily_results.append(\n            AssistantDailyUsageResponse(\n                date=date,\n                total_messages=daily_messages_map.get(date, 0),\n                total_unique_users=daily_unique_users_map.get(date, 0),\n            )\n        )\n\n    # Now pull a single total distinct user count across the entire time range\n    total_msgs = sum(d.total_messages for d in daily_results)\n    total_users = fetch_assistant_unique_users_total(\n        db_session, assistant_id, start, end\n    )\n\n    return AssistantStatsResponse(\n        daily_stats=daily_results,\n        total_messages=total_msgs,\n        total_unique_users=total_users,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/auth_check.py",
    "content": "from fastapi import FastAPI\n\nfrom onyx.server.auth_check import check_router_auth\nfrom onyx.server.auth_check import PUBLIC_ENDPOINT_SPECS\n\n\nEE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [\n    # SCIM 2.0 service discovery — unauthenticated so IdPs can probe\n    # before bearer token configuration is complete\n    (\"/scim/v2/ServiceProviderConfig\", {\"GET\"}),\n    (\"/scim/v2/ResourceTypes\", {\"GET\"}),\n    (\"/scim/v2/Schemas\", {\"GET\"}),\n    # needs to be accessible prior to user login\n    (\"/enterprise-settings\", {\"GET\"}),\n    (\"/enterprise-settings/logo\", {\"GET\"}),\n    (\"/enterprise-settings/logotype\", {\"GET\"}),\n    (\"/enterprise-settings/custom-analytics-script\", {\"GET\"}),\n    # Stripe publishable key is safe to expose publicly\n    (\"/tenants/stripe-publishable-key\", {\"GET\"}),\n    (\"/admin/billing/stripe-publishable-key\", {\"GET\"}),\n    # Proxy endpoints use license-based auth, not user auth\n    (\"/proxy/create-checkout-session\", {\"POST\"}),\n    (\"/proxy/claim-license\", {\"POST\"}),\n    (\"/proxy/create-customer-portal-session\", {\"POST\"}),\n    (\"/proxy/billing-information\", {\"GET\"}),\n    (\"/proxy/license/{tenant_id}\", {\"GET\"}),\n    (\"/proxy/seats/update\", {\"POST\"}),\n]\n\n\ndef check_ee_router_auth(\n    application: FastAPI,\n    public_endpoint_specs: list[tuple[str, set[str]]] = EE_PUBLIC_ENDPOINT_SPECS,\n) -> None:\n    # similar to the open source version of this function, but checking for the EE-only\n    # endpoints as well\n    check_router_auth(application, public_endpoint_specs)\n"
  },
  {
    "path": "backend/ee/onyx/server/billing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/billing/api.py",
    "content": "\"\"\"Unified Billing API endpoints.\n\nThese endpoints provide Stripe billing functionality for both cloud and\nself-hosted deployments. The service layer routes requests appropriately:\n\n- Self-hosted: Routes through cloud data plane proxy\n  Flow: Backend /admin/billing/* → Cloud DP /proxy/* → Control plane\n\n- Cloud (MULTI_TENANT): Routes directly to control plane\n  Flow: Backend /admin/billing/* → Control plane\n\nLicense claiming is handled separately by /license/claim endpoint (self-hosted only).\n\nMigration Note (ENG-3533):\nThis /admin/billing/* API replaces the older /tenants/* billing endpoints:\n- /tenants/billing-information            -> /admin/billing/billing-information\n- /tenants/create-customer-portal-session -> /admin/billing/create-customer-portal-session\n- /tenants/create-subscription-session    -> /admin/billing/create-checkout-session\n- /tenants/stripe-publishable-key         -> /admin/billing/stripe-publishable-key\n\nSee: https://linear.app/onyx-app/issue/ENG-3533/migrate-tenantsbilling-adminbilling\n\"\"\"\n\nimport asyncio\n\nimport httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.auth.users import current_admin_user\nfrom ee.onyx.db.license import get_license\nfrom ee.onyx.db.license import get_used_seats\nfrom ee.onyx.server.billing.models import BillingInformationResponse\nfrom ee.onyx.server.billing.models import CreateCheckoutSessionRequest\nfrom ee.onyx.server.billing.models import CreateCheckoutSessionResponse\nfrom ee.onyx.server.billing.models import CreateCustomerPortalSessionRequest\nfrom ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse\nfrom ee.onyx.server.billing.models import SeatUpdateRequest\nfrom ee.onyx.server.billing.models import SeatUpdateResponse\nfrom ee.onyx.server.billing.models import StripePublishableKeyResponse\nfrom ee.onyx.server.billing.models import SubscriptionStatusResponse\nfrom ee.onyx.server.billing.service import (\n    create_checkout_session as create_checkout_service,\n)\nfrom ee.onyx.server.billing.service import (\n    create_customer_portal_session as create_portal_service,\n)\nfrom ee.onyx.server.billing.service import (\n    get_billing_information as get_billing_service,\n)\nfrom ee.onyx.server.billing.service import update_seat_count as update_seat_service\nfrom onyx.auth.users import User\nfrom onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE\nfrom onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.redis.redis_pool import get_shared_redis_client\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/admin/billing\")\n\n# Cache for Stripe publishable key to avoid hitting S3 on every request\n_stripe_publishable_key_cache: str | None = None\n_stripe_key_lock = asyncio.Lock()\n\n# Redis key for billing circuit breaker (self-hosted only)\n# When set, billing requests to Stripe are disabled until user manually retries\nBILLING_CIRCUIT_BREAKER_KEY = \"billing_circuit_open\"\n# Circuit breaker auto-expires after 1 hour (user can manually retry sooner)\nBILLING_CIRCUIT_BREAKER_TTL_SECONDS = 3600\n\n\ndef _is_billing_circuit_open() -> bool:\n    \"\"\"Check if the billing circuit breaker is open (self-hosted only).\"\"\"\n    if MULTI_TENANT:\n        return False\n    try:\n        redis_client = get_shared_redis_client()\n        is_open = bool(redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY))\n        logger.debug(\n            f\"Circuit breaker check: key={BILLING_CIRCUIT_BREAKER_KEY}, is_open={is_open}\"\n        )\n        return is_open\n    except Exception as e:\n        logger.error(f\"Failed to check circuit breaker: {e}\")\n        return False\n\n\ndef _open_billing_circuit() -> None:\n    \"\"\"Open the billing circuit breaker after a failure (self-hosted only).\"\"\"\n    if MULTI_TENANT:\n        return\n    try:\n        redis_client = get_shared_redis_client()\n        redis_client.set(\n            BILLING_CIRCUIT_BREAKER_KEY,\n            \"1\",\n            ex=BILLING_CIRCUIT_BREAKER_TTL_SECONDS,\n        )\n        # Verify it was set\n        exists = redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY)\n        logger.warning(\n            f\"Billing circuit breaker opened (TTL={BILLING_CIRCUIT_BREAKER_TTL_SECONDS}s, \"\n            f\"verified={exists}). Stripe billing requests are disabled until manually reset.\"\n        )\n    except Exception as e:\n        logger.error(f\"Failed to open circuit breaker: {e}\")\n\n\ndef _close_billing_circuit() -> None:\n    \"\"\"Close the billing circuit breaker (re-enable Stripe requests).\"\"\"\n    if MULTI_TENANT:\n        return\n    try:\n        redis_client = get_shared_redis_client()\n        redis_client.delete(BILLING_CIRCUIT_BREAKER_KEY)\n        logger.info(\n            \"Billing circuit breaker closed. Stripe billing requests re-enabled.\"\n        )\n    except Exception as e:\n        logger.error(f\"Failed to close circuit breaker: {e}\")\n\n\ndef _get_license_data(db_session: Session) -> str | None:\n    \"\"\"Get license data from database if exists (self-hosted only).\"\"\"\n    if MULTI_TENANT:\n        return None\n    license_record = get_license(db_session)\n    return license_record.license_data if license_record else None\n\n\ndef _get_tenant_id() -> str | None:\n    \"\"\"Get tenant ID for cloud deployments.\"\"\"\n    if MULTI_TENANT:\n        return get_current_tenant_id()\n    return None\n\n\n@router.post(\"/create-checkout-session\")\nasync def create_checkout_session(\n    request: CreateCheckoutSessionRequest | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> CreateCheckoutSessionResponse:\n    \"\"\"Create a Stripe checkout session for new subscription or renewal.\n\n    For new customers, no license/tenant is required.\n    For renewals, existing license (self-hosted) or tenant_id (cloud) is used.\n\n    After checkout completion:\n    - Self-hosted: Use /license/claim to retrieve the license\n    - Cloud: Subscription is automatically activated\n    \"\"\"\n    license_data = _get_license_data(db_session)\n    tenant_id = _get_tenant_id()\n    billing_period = request.billing_period if request else \"monthly\"\n    seats = request.seats if request else None\n    email = request.email if request else None\n\n    # Validate that requested seats is not less than current used seats\n    if seats is not None:\n        used_seats = get_used_seats(tenant_id)\n        if seats < used_seats:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                f\"Cannot subscribe with fewer seats than current usage. \"\n                f\"You have {used_seats} active users/integrations but requested {seats} seats.\",\n            )\n\n    # Build redirect URL for after checkout completion\n    redirect_url = f\"{WEB_DOMAIN}/admin/billing?checkout=success\"\n\n    return await create_checkout_service(\n        billing_period=billing_period,\n        seats=seats,\n        email=email,\n        license_data=license_data,\n        redirect_url=redirect_url,\n        tenant_id=tenant_id,\n    )\n\n\n@router.post(\"/create-customer-portal-session\")\nasync def create_customer_portal_session(\n    request: CreateCustomerPortalSessionRequest | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> CreateCustomerPortalSessionResponse:\n    \"\"\"Create a Stripe customer portal session for managing subscription.\n\n    Requires existing license (self-hosted) or active tenant (cloud).\n    \"\"\"\n    license_data = _get_license_data(db_session)\n    tenant_id = _get_tenant_id()\n\n    # Self-hosted requires license\n    if not MULTI_TENANT and not license_data:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, \"No license found\")\n\n    return_url = request.return_url if request else f\"{WEB_DOMAIN}/admin/billing\"\n\n    return await create_portal_service(\n        license_data=license_data,\n        return_url=return_url,\n        tenant_id=tenant_id,\n    )\n\n\n@router.get(\"/billing-information\")\nasync def get_billing_information(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> BillingInformationResponse | SubscriptionStatusResponse:\n    \"\"\"Get billing information for the current subscription.\n\n    Returns subscription status and details from Stripe.\n    For self-hosted: If the circuit breaker is open (previous failure),\n    returns a 503 error without making the request.\n    \"\"\"\n    license_data = _get_license_data(db_session)\n    tenant_id = _get_tenant_id()\n\n    # Self-hosted without license = no subscription\n    if not MULTI_TENANT and not license_data:\n        return SubscriptionStatusResponse(subscribed=False)\n\n    # Check circuit breaker (self-hosted only)\n    if _is_billing_circuit_open():\n        raise OnyxError(\n            OnyxErrorCode.SERVICE_UNAVAILABLE,\n            \"Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.\",\n        )\n\n    try:\n        return await get_billing_service(\n            license_data=license_data,\n            tenant_id=tenant_id,\n        )\n    except OnyxError as e:\n        # Open circuit breaker on connection failures (self-hosted only)\n        if e.status_code in (\n            OnyxErrorCode.BAD_GATEWAY.status_code,\n            OnyxErrorCode.SERVICE_UNAVAILABLE.status_code,\n            OnyxErrorCode.GATEWAY_TIMEOUT.status_code,\n        ):\n            _open_billing_circuit()\n        raise\n\n\n@router.post(\"/seats/update\")\nasync def update_seats(\n    request: SeatUpdateRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> SeatUpdateResponse:\n    \"\"\"Update the seat count for the current subscription.\n\n    Handles Stripe proration and license regeneration via control plane.\n    For self-hosted, the frontend should call /license/claim after a short delay\n    to fetch the regenerated license.\n    \"\"\"\n    license_data = _get_license_data(db_session)\n    tenant_id = _get_tenant_id()\n\n    # Self-hosted requires license\n    if not MULTI_TENANT and not license_data:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, \"No license found\")\n\n    # Validate that new seat count is not less than current used seats\n    used_seats = get_used_seats(tenant_id)\n    if request.new_seat_count < used_seats:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            f\"Cannot reduce seats below current usage. \"\n            f\"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.\",\n        )\n\n    # Note: Don't store license here - the control plane may still be processing\n    # the subscription update. The frontend should call /license/claim after a\n    # short delay to get the freshly generated license.\n    return await update_seat_service(\n        new_seat_count=request.new_seat_count,\n        license_data=license_data,\n        tenant_id=tenant_id,\n    )\n\n\n@router.get(\"/stripe-publishable-key\")\nasync def get_stripe_publishable_key() -> StripePublishableKeyResponse:\n    \"\"\"Fetch the Stripe publishable key.\n\n    Priority: env var override (for testing) > S3 bucket (production).\n    This endpoint is public (no auth required) since publishable keys are safe to expose.\n    The key is cached in memory to avoid hitting S3 on every request.\n    \"\"\"\n    global _stripe_publishable_key_cache\n\n    # Fast path: return cached value without lock\n    if _stripe_publishable_key_cache:\n        return StripePublishableKeyResponse(\n            publishable_key=_stripe_publishable_key_cache\n        )\n\n    # Use lock to prevent concurrent S3 requests\n    async with _stripe_key_lock:\n        # Double-check after acquiring lock (another request may have populated cache)\n        if _stripe_publishable_key_cache:\n            return StripePublishableKeyResponse(\n                publishable_key=_stripe_publishable_key_cache\n            )\n\n        # Check for env var override first (for local testing with pk_test_* keys)\n        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:\n            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()\n            if not key.startswith(\"pk_\"):\n                raise OnyxError(\n                    OnyxErrorCode.INTERNAL_ERROR,\n                    \"Invalid Stripe publishable key format\",\n                )\n            _stripe_publishable_key_cache = key\n            return StripePublishableKeyResponse(publishable_key=key)\n\n        # Fall back to S3 bucket\n        if not STRIPE_PUBLISHABLE_KEY_URL:\n            raise OnyxError(\n                OnyxErrorCode.INTERNAL_ERROR,\n                \"Stripe publishable key is not configured\",\n            )\n\n        try:\n            async with httpx.AsyncClient() as client:\n                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)\n                response.raise_for_status()\n                key = response.text.strip()\n\n                # Validate key format\n                if not key.startswith(\"pk_\"):\n                    raise OnyxError(\n                        OnyxErrorCode.INTERNAL_ERROR,\n                        \"Invalid Stripe publishable key format\",\n                    )\n\n                _stripe_publishable_key_cache = key\n                return StripePublishableKeyResponse(publishable_key=key)\n        except httpx.HTTPError:\n            raise OnyxError(\n                OnyxErrorCode.INTERNAL_ERROR,\n                \"Failed to fetch Stripe publishable key\",\n            )\n\n\nclass ResetConnectionResponse(BaseModel):\n    success: bool\n    message: str\n\n\n@router.post(\"/reset-connection\")\nasync def reset_stripe_connection(\n    _: User = Depends(current_admin_user),\n) -> ResetConnectionResponse:\n    \"\"\"Reset the Stripe connection circuit breaker.\n\n    Called when user clicks \"Connect to Stripe\" to retry after a previous failure.\n    This clears the circuit breaker flag, allowing billing requests to proceed again.\n    Self-hosted only - cloud deployments don't use the circuit breaker.\n    \"\"\"\n    if MULTI_TENANT:\n        return ResetConnectionResponse(\n            success=True,\n            message=\"Circuit breaker not applicable for cloud deployments\",\n        )\n\n    _close_billing_circuit()\n    return ResetConnectionResponse(\n        success=True,\n        message=\"Stripe connection reset. Billing requests re-enabled.\",\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/billing/models.py",
    "content": "\"\"\"Pydantic models for the billing API.\"\"\"\n\nfrom datetime import datetime\nfrom typing import Literal\n\nfrom pydantic import BaseModel\n\n\nclass CreateCheckoutSessionRequest(BaseModel):\n    \"\"\"Request to create a Stripe checkout session.\"\"\"\n\n    billing_period: Literal[\"monthly\", \"annual\"] = \"monthly\"\n    seats: int | None = None\n    email: str | None = None\n\n\nclass CreateCheckoutSessionResponse(BaseModel):\n    \"\"\"Response containing the Stripe checkout session URL.\"\"\"\n\n    stripe_checkout_url: str\n\n\nclass CreateCustomerPortalSessionRequest(BaseModel):\n    \"\"\"Request to create a Stripe customer portal session.\"\"\"\n\n    return_url: str | None = None\n\n\nclass CreateCustomerPortalSessionResponse(BaseModel):\n    \"\"\"Response containing the Stripe customer portal URL.\"\"\"\n\n    stripe_customer_portal_url: str\n\n\nclass BillingInformationResponse(BaseModel):\n    \"\"\"Billing information for the current subscription.\"\"\"\n\n    tenant_id: str\n    status: str | None = None\n    plan_type: str | None = None\n    seats: int | None = None\n    billing_period: str | None = None\n    current_period_start: datetime | None = None\n    current_period_end: datetime | None = None\n    cancel_at_period_end: bool = False\n    canceled_at: datetime | None = None\n    trial_start: datetime | None = None\n    trial_end: datetime | None = None\n    payment_method_enabled: bool = False\n\n\nclass SubscriptionStatusResponse(BaseModel):\n    \"\"\"Response when no subscription exists.\"\"\"\n\n    subscribed: bool = False\n\n\nclass SeatUpdateRequest(BaseModel):\n    \"\"\"Request to update seat count.\"\"\"\n\n    new_seat_count: int\n\n\nclass SeatUpdateResponse(BaseModel):\n    \"\"\"Response from seat update operation.\"\"\"\n\n    success: bool\n    current_seats: int\n    used_seats: int\n    message: str | None = None\n    license: str | None = None  # Regenerated license (self-hosted stores this)\n\n\nclass StripePublishableKeyResponse(BaseModel):\n    \"\"\"Response containing the Stripe publishable key.\"\"\"\n\n    publishable_key: str\n"
  },
  {
    "path": "backend/ee/onyx/server/billing/service.py",
    "content": "\"\"\"Service layer for billing operations.\n\nThis module provides functions for billing operations that route differently\nbased on deployment type:\n\n- Self-hosted (not MULTI_TENANT): Routes through cloud data plane proxy\n  Flow: Self-hosted backend → Cloud DP /proxy/* → Control plane\n\n- Cloud (MULTI_TENANT): Routes directly to control plane\n  Flow: Cloud backend → Control plane\n\"\"\"\n\nfrom typing import Literal\n\nimport httpx\n\nfrom ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL\nfrom ee.onyx.server.billing.models import BillingInformationResponse\nfrom ee.onyx.server.billing.models import CreateCheckoutSessionResponse\nfrom ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse\nfrom ee.onyx.server.billing.models import SeatUpdateResponse\nfrom ee.onyx.server.billing.models import SubscriptionStatusResponse\nfrom ee.onyx.server.tenants.access import generate_data_plane_token\nfrom onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n# HTTP request timeout for billing service calls\n_REQUEST_TIMEOUT = 30.0\n\n\ndef _get_proxy_headers(license_data: str | None) -> dict[str, str]:\n    \"\"\"Build headers for proxy requests (self-hosted).\n\n    Self-hosted instances authenticate with their license.\n    \"\"\"\n    headers = {\"Content-Type\": \"application/json\"}\n    if license_data:\n        headers[\"Authorization\"] = f\"Bearer {license_data}\"\n    return headers\n\n\ndef _get_direct_headers() -> dict[str, str]:\n    \"\"\"Build headers for direct control plane requests (cloud).\n\n    Cloud instances authenticate with JWT.\n    \"\"\"\n    token = generate_data_plane_token()\n    return {\n        \"Content-Type\": \"application/json\",\n        \"Authorization\": f\"Bearer {token}\",\n    }\n\n\ndef _get_base_url() -> str:\n    \"\"\"Get the base URL based on deployment type.\"\"\"\n    if MULTI_TENANT:\n        return CONTROL_PLANE_API_BASE_URL\n    return f\"{CLOUD_DATA_PLANE_URL}/proxy\"\n\n\ndef _get_headers(license_data: str | None) -> dict[str, str]:\n    \"\"\"Get appropriate headers based on deployment type.\"\"\"\n    if MULTI_TENANT:\n        return _get_direct_headers()\n    return _get_proxy_headers(license_data)\n\n\nasync def _make_billing_request(\n    method: Literal[\"GET\", \"POST\"],\n    path: str,\n    license_data: str | None = None,\n    body: dict | None = None,\n    params: dict | None = None,\n    error_message: str = \"Billing service request failed\",\n) -> dict:\n    \"\"\"Make an HTTP request to the billing service.\n\n    Consolidates the common HTTP request pattern used by all billing operations.\n\n    Args:\n        method: HTTP method (GET or POST)\n        path: URL path (appended to base URL)\n        license_data: License for authentication (self-hosted)\n        body: Request body for POST requests\n        params: Query parameters for GET requests\n        error_message: Default error message if request fails\n\n    Returns:\n        Response JSON as dict\n\n    Raises:\n        OnyxError: If request fails\n    \"\"\"\n\n    base_url = _get_base_url()\n    url = f\"{base_url}{path}\"\n    headers = _get_headers(license_data)\n\n    try:\n        async with httpx.AsyncClient(\n            timeout=_REQUEST_TIMEOUT, follow_redirects=True\n        ) as client:\n            if method == \"GET\":\n                response = await client.get(url, headers=headers, params=params)\n            else:\n                response = await client.post(url, headers=headers, json=body)\n\n            response.raise_for_status()\n            return response.json()\n\n    except httpx.HTTPStatusError as e:\n        detail = error_message\n        try:\n            error_data = e.response.json()\n            detail = error_data.get(\"detail\", detail)\n        except Exception:\n            pass\n        logger.error(f\"{error_message}: {e.response.status_code} - {detail}\")\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            detail,\n            status_code_override=e.response.status_code,\n        )\n\n    except httpx.RequestError:\n        logger.exception(\"Failed to connect to billing service\")\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY, \"Failed to connect to billing service\"\n        )\n\n\nasync def create_checkout_session(\n    billing_period: str = \"monthly\",\n    seats: int | None = None,\n    email: str | None = None,\n    license_data: str | None = None,\n    redirect_url: str | None = None,\n    tenant_id: str | None = None,\n) -> CreateCheckoutSessionResponse:\n    \"\"\"Create a Stripe checkout session.\n\n    Args:\n        billing_period: \"monthly\" or \"annual\"\n        seats: Number of seats to purchase (optional, uses default if not provided)\n        email: Customer email for new subscriptions\n        license_data: Existing license for renewals (self-hosted)\n        redirect_url: URL to redirect after successful checkout\n        tenant_id: Tenant ID (cloud only, for renewals)\n\n    Returns:\n        CreateCheckoutSessionResponse with checkout URL\n    \"\"\"\n    body: dict = {\"billing_period\": billing_period}\n    if seats is not None:\n        body[\"seats\"] = seats\n    if email:\n        body[\"email\"] = email\n    if redirect_url:\n        body[\"redirect_url\"] = redirect_url\n    if tenant_id and MULTI_TENANT:\n        body[\"tenant_id\"] = tenant_id\n\n    data = await _make_billing_request(\n        method=\"POST\",\n        path=\"/create-checkout-session\",\n        license_data=license_data,\n        body=body,\n        error_message=\"Failed to create checkout session\",\n    )\n    return CreateCheckoutSessionResponse(stripe_checkout_url=data[\"url\"])\n\n\nasync def create_customer_portal_session(\n    license_data: str | None = None,\n    return_url: str | None = None,\n    tenant_id: str | None = None,\n) -> CreateCustomerPortalSessionResponse:\n    \"\"\"Create a Stripe customer portal session.\n\n    Args:\n        license_data: License blob for authentication (self-hosted)\n        return_url: URL to return to after portal session\n        tenant_id: Tenant ID (cloud only)\n\n    Returns:\n        CreateCustomerPortalSessionResponse with portal URL\n    \"\"\"\n    body: dict = {}\n    if return_url:\n        body[\"return_url\"] = return_url\n    if tenant_id and MULTI_TENANT:\n        body[\"tenant_id\"] = tenant_id\n\n    data = await _make_billing_request(\n        method=\"POST\",\n        path=\"/create-customer-portal-session\",\n        license_data=license_data,\n        body=body,\n        error_message=\"Failed to create customer portal session\",\n    )\n    return CreateCustomerPortalSessionResponse(stripe_customer_portal_url=data[\"url\"])\n\n\nasync def get_billing_information(\n    license_data: str | None = None,\n    tenant_id: str | None = None,\n) -> BillingInformationResponse | SubscriptionStatusResponse:\n    \"\"\"Fetch billing information.\n\n    Args:\n        license_data: License blob for authentication (self-hosted)\n        tenant_id: Tenant ID (cloud only)\n\n    Returns:\n        BillingInformationResponse or SubscriptionStatusResponse if no subscription\n    \"\"\"\n    params = {}\n    if tenant_id and MULTI_TENANT:\n        params[\"tenant_id\"] = tenant_id\n\n    data = await _make_billing_request(\n        method=\"GET\",\n        path=\"/billing-information\",\n        license_data=license_data,\n        params=params or None,\n        error_message=\"Failed to fetch billing information\",\n    )\n\n    # Check if no subscription\n    if isinstance(data, dict) and data.get(\"subscribed\") is False:\n        return SubscriptionStatusResponse(subscribed=False)\n\n    return BillingInformationResponse(**data)\n\n\nasync def update_seat_count(\n    new_seat_count: int,\n    license_data: str | None = None,\n    tenant_id: str | None = None,\n) -> SeatUpdateResponse:\n    \"\"\"Update the seat count for the current subscription.\n\n    Args:\n        new_seat_count: New number of seats\n        license_data: License blob for authentication (self-hosted)\n        tenant_id: Tenant ID (cloud only)\n\n    Returns:\n        SeatUpdateResponse with updated seat information\n    \"\"\"\n    body: dict = {\"new_seat_count\": new_seat_count}\n    if tenant_id and MULTI_TENANT:\n        body[\"tenant_id\"] = tenant_id\n\n    data = await _make_billing_request(\n        method=\"POST\",\n        path=\"/seats/update\",\n        license_data=license_data,\n        body=body,\n        error_message=\"Failed to update seat count\",\n    )\n\n    return SeatUpdateResponse(\n        success=data.get(\"success\", False),\n        current_seats=data.get(\"current_seats\", 0),\n        used_seats=data.get(\"used_seats\", 0),\n        message=data.get(\"message\"),\n        license=data.get(\"license\"),\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/documents/cc_pair.py",
    "content": "from datetime import datetime\nfrom http import HTTPStatus\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.background.celery.tasks.doc_permission_syncing.tasks import (\n    try_creating_permissions_sync_task,\n)\nfrom ee.onyx.background.celery.tasks.external_group_syncing.tasks import (\n    try_creating_external_group_sync_task,\n)\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.db.connector_credential_pair import (\n    get_connector_credential_pair_from_id_for_user,\n)\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\nrouter = APIRouter(prefix=\"/manage\")\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/sync-permissions\")\ndef get_cc_pair_latest_sync(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> datetime | None:\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"cc_pair not found for current user's permissions\",\n        )\n\n    return cc_pair.last_time_perm_sync\n\n\n@router.post(\"/admin/cc-pair/{cc_pair_id}/sync-permissions\")\ndef sync_cc_pair(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[None]:\n    \"\"\"Triggers permissions sync on a particular cc_pair immediately\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Connection not found for current user's permissions\",\n        )\n\n    r = get_redis_client()\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if redis_connector.permissions.fenced:\n        raise HTTPException(\n            status_code=HTTPStatus.CONFLICT,\n            detail=\"Permissions sync task already in progress.\",\n        )\n\n    logger.info(\n        f\"Permissions sync cc_pair={cc_pair_id} \"\n        f\"connector_id={cc_pair.connector_id} \"\n        f\"credential_id={cc_pair.credential_id} \"\n        f\"{cc_pair.connector.name} connector.\"\n    )\n    payload_id = try_creating_permissions_sync_task(\n        client_app, cc_pair_id, r, tenant_id\n    )\n    if not payload_id:\n        raise HTTPException(\n            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,\n            detail=\"Permissions sync task creation failed.\",\n        )\n\n    logger.info(f\"Permissions sync queued: cc_pair={cc_pair_id} id={payload_id}\")\n\n    return StatusResponse(\n        success=True,\n        message=\"Successfully created the permissions sync task.\",\n    )\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/sync-groups\")\ndef get_cc_pair_latest_group_sync(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> datetime | None:\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"cc_pair not found for current user's permissions\",\n        )\n\n    return cc_pair.last_time_external_group_sync\n\n\n@router.post(\"/admin/cc-pair/{cc_pair_id}/sync-groups\")\ndef sync_cc_pair_groups(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[None]:\n    \"\"\"Triggers group sync on a particular cc_pair immediately\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Connection not found for current user's permissions\",\n        )\n\n    r = get_redis_client()\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if redis_connector.external_group_sync.fenced:\n        raise HTTPException(\n            status_code=HTTPStatus.CONFLICT,\n            detail=\"External group sync task already in progress.\",\n        )\n\n    logger.info(\n        f\"External group sync cc_pair={cc_pair_id} \"\n        f\"connector_id={cc_pair.connector_id} \"\n        f\"credential_id={cc_pair.credential_id} \"\n        f\"{cc_pair.connector.name} connector.\"\n    )\n    payload_id = try_creating_external_group_sync_task(\n        client_app, cc_pair_id, r, tenant_id\n    )\n    if not payload_id:\n        raise HTTPException(\n            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,\n            detail=\"External group sync task creation failed.\",\n        )\n\n    logger.info(f\"External group sync queued: cc_pair={cc_pair_id} id={payload_id}\")\n\n    return StatusResponse(\n        success=True,\n        message=\"Successfully created the external group sync task.\",\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/enterprise_settings/api.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom fastapi import status\nfrom fastapi import UploadFile\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload\nfrom ee.onyx.server.enterprise_settings.models import EnterpriseSettings\nfrom ee.onyx.server.enterprise_settings.store import get_logo_filename\nfrom ee.onyx.server.enterprise_settings.store import get_logotype_filename\nfrom ee.onyx.server.enterprise_settings.store import load_analytics_script\nfrom ee.onyx.server.enterprise_settings.store import load_settings\nfrom ee.onyx.server.enterprise_settings.store import store_analytics_script\nfrom ee.onyx.server.enterprise_settings.store import store_settings\nfrom ee.onyx.server.enterprise_settings.store import upload_logo\nfrom ee.onyx.server.scim.auth import generate_scim_token\nfrom ee.onyx.server.scim.models import ScimTokenCreate\nfrom ee.onyx.server.scim.models import ScimTokenCreatedResponse\nfrom ee.onyx.server.scim.models import ScimTokenResponse\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user_with_expired_token\nfrom onyx.auth.users import get_user_manager\nfrom onyx.auth.users import UserManager\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.server.utils import BasicAuthenticationError\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.contextvars import get_current_tenant_id\n\nadmin_router = APIRouter(prefix=\"/admin/enterprise-settings\")\nbasic_router = APIRouter(prefix=\"/enterprise-settings\")\n\nlogger = setup_logger()\n\n\nclass RefreshTokenData(BaseModel):\n    access_token: str\n    refresh_token: str\n    session: dict = Field(..., description=\"Contains session information\")\n    userinfo: dict = Field(..., description=\"Contains user information\")\n\n    def __init__(self, **data: Any) -> None:\n        super().__init__(**data)\n        if \"exp\" not in self.session:\n            raise ValueError(\"'exp' must be set in the session dictionary\")\n        if \"userId\" not in self.userinfo or \"email\" not in self.userinfo:\n            raise ValueError(\n                \"'userId' and 'email' must be set in the userinfo dictionary\"\n            )\n\n\n@basic_router.post(\"/refresh-token\")\nasync def refresh_access_token(\n    refresh_token: RefreshTokenData,\n    user: User = Depends(current_user_with_expired_token),\n    user_manager: UserManager = Depends(get_user_manager),\n) -> None:\n    try:\n        logger.debug(f\"Received response from Meechum auth URL for user {user.id}\")\n\n        # Extract new tokens\n        new_access_token = refresh_token.access_token\n        new_refresh_token = refresh_token.refresh_token\n\n        new_expiry = datetime.fromtimestamp(\n            refresh_token.session[\"exp\"] / 1000, tz=timezone.utc\n        )\n        expires_at_timestamp = int(new_expiry.timestamp())\n\n        logger.debug(f\"Access token has been refreshed for user {user.id}\")\n\n        await user_manager.oauth_callback(\n            oauth_name=\"custom\",\n            access_token=new_access_token,\n            account_id=refresh_token.userinfo[\"userId\"],\n            account_email=refresh_token.userinfo[\"email\"],\n            expires_at=expires_at_timestamp,\n            refresh_token=new_refresh_token,\n            associate_by_email=True,\n        )\n\n        logger.info(f\"Successfully refreshed tokens for user {user.id}\")\n\n    except httpx.HTTPStatusError as e:\n        if e.response.status_code == 401:\n            logger.warning(f\"Full authentication required for user {user.id}\")\n            raise HTTPException(\n                status_code=status.HTTP_401_UNAUTHORIZED,\n                detail=\"Full authentication required\",\n            )\n        logger.error(\n            f\"HTTP error occurred while refreshing token for user {user.id}: {str(e)}\"\n        )\n        raise HTTPException(\n            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,\n            detail=\"Failed to refresh token\",\n        )\n    except Exception as e:\n        logger.error(\n            f\"Unexpected error occurred while refreshing token for user {user.id}: {str(e)}\"\n        )\n        raise HTTPException(\n            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,\n            detail=\"An unexpected error occurred\",\n        )\n\n\n@admin_router.put(\"\")\ndef admin_ee_put_settings(\n    settings: EnterpriseSettings, _: User = Depends(current_admin_user)\n) -> None:\n    store_settings(settings)\n\n\n@basic_router.get(\"\")\ndef ee_fetch_settings() -> EnterpriseSettings:\n    if MULTI_TENANT:\n        tenant_id = get_current_tenant_id()\n        if not tenant_id or tenant_id == POSTGRES_DEFAULT_SCHEMA:\n            raise BasicAuthenticationError(detail=\"User must authenticate\")\n\n    return load_settings()\n\n\n@admin_router.put(\"/logo\")\ndef put_logo(\n    file: UploadFile,\n    is_logotype: bool = False,\n    _: User = Depends(current_admin_user),\n) -> None:\n    upload_logo(file=file, is_logotype=is_logotype)\n\n\ndef fetch_logo_helper(db_session: Session) -> Response:  # noqa: ARG001\n    try:\n        file_store = get_default_file_store()\n        onyx_file = file_store.get_file_with_mime_type(get_logo_filename())\n        if not onyx_file:\n            raise ValueError(\"get_onyx_file returned None!\")\n    except Exception:\n        logger.exception(\"Faield to fetch logo file\")\n        raise HTTPException(\n            status_code=404,\n            detail=\"No logo file found\",\n        )\n    else:\n        return Response(\n            content=onyx_file.data,\n            media_type=onyx_file.mime_type,\n            headers={\"Cache-Control\": \"no-cache\"},\n        )\n\n\ndef fetch_logotype_helper(db_session: Session) -> Response:  # noqa: ARG001\n    try:\n        file_store = get_default_file_store()\n        onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())\n        if not onyx_file:\n            raise ValueError(\"get_onyx_file returned None!\")\n    except Exception:\n        raise HTTPException(\n            status_code=404,\n            detail=\"No logotype file found\",\n        )\n    else:\n        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)\n\n\n@basic_router.get(\"/logotype\")\ndef fetch_logotype(db_session: Session = Depends(get_session)) -> Response:\n    return fetch_logotype_helper(db_session)\n\n\n@basic_router.get(\"/logo\")\ndef fetch_logo(\n    is_logotype: bool = False, db_session: Session = Depends(get_session)\n) -> Response:\n    if is_logotype:\n        return fetch_logotype_helper(db_session)\n\n    return fetch_logo_helper(db_session)\n\n\n@admin_router.put(\"/custom-analytics-script\")\ndef upload_custom_analytics_script(\n    script_upload: AnalyticsScriptUpload, _: User = Depends(current_admin_user)\n) -> None:\n    try:\n        store_analytics_script(script_upload)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@basic_router.get(\"/custom-analytics-script\")\ndef fetch_custom_analytics_script() -> str | None:\n    return load_analytics_script()\n\n\n# ---------------------------------------------------------------------------\n# SCIM token management\n# ---------------------------------------------------------------------------\n\n\ndef _get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:\n    return ScimDAL(db_session)\n\n\n@admin_router.get(\"/scim/token\")\ndef get_active_scim_token(\n    _: User = Depends(current_admin_user),\n    dal: ScimDAL = Depends(_get_scim_dal),\n) -> ScimTokenResponse:\n    \"\"\"Return the currently active SCIM token's metadata, or 404 if none.\"\"\"\n    token = dal.get_active_token()\n    if not token:\n        raise HTTPException(status_code=404, detail=\"No active SCIM token\")\n\n    # Derive the IdP domain from the first synced user as a heuristic.\n    idp_domain: str | None = None\n    mappings, _total = dal.list_user_mappings(start_index=1, count=1)\n    if mappings:\n        user = dal.get_user(mappings[0].user_id)\n        if user and \"@\" in user.email:\n            idp_domain = user.email.rsplit(\"@\", 1)[1]\n\n    return ScimTokenResponse(\n        id=token.id,\n        name=token.name,\n        token_display=token.token_display,\n        is_active=token.is_active,\n        created_at=token.created_at,\n        last_used_at=token.last_used_at,\n        idp_domain=idp_domain,\n    )\n\n\n@admin_router.post(\"/scim/token\", status_code=201)\ndef create_scim_token(\n    body: ScimTokenCreate,\n    user: User = Depends(current_admin_user),\n    dal: ScimDAL = Depends(_get_scim_dal),\n) -> ScimTokenCreatedResponse:\n    \"\"\"Create a new SCIM bearer token.\n\n    Only one token is active at a time — creating a new token automatically\n    revokes all previous tokens. The raw token value is returned exactly once\n    in the response; it cannot be retrieved again.\n    \"\"\"\n    raw_token, hashed_token, token_display = generate_scim_token()\n    token = dal.create_token(\n        name=body.name,\n        hashed_token=hashed_token,\n        token_display=token_display,\n        created_by_id=user.id,\n    )\n    dal.commit()\n\n    return ScimTokenCreatedResponse(\n        id=token.id,\n        name=token.name,\n        token_display=token.token_display,\n        is_active=token.is_active,\n        created_at=token.created_at,\n        last_used_at=token.last_used_at,\n        raw_token=raw_token,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/enterprise_settings/models.py",
    "content": "from enum import Enum\nfrom typing import Any\nfrom typing import List\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass NavigationItem(BaseModel):\n    link: str\n    title: str\n    # Right now must be one of the FA icons\n    icon: str | None = None\n    # NOTE: SVG must not have a width / height specified\n    # This is the actual SVG as a string. Done this way to reduce\n    # complexity / having to store additional \"logos\" in Postgres\n    svg_logo: str | None = None\n\n    @classmethod\n    def model_validate(cls, *args: Any, **kwargs: Any) -> \"NavigationItem\":\n        instance = super().model_validate(*args, **kwargs)\n        if bool(instance.icon) == bool(instance.svg_logo):\n            raise ValueError(\"Exactly one of fa_icon or svg_logo must be specified\")\n        return instance\n\n\nclass LogoDisplayStyle(str, Enum):\n    LOGO_AND_NAME = \"logo_and_name\"\n    LOGO_ONLY = \"logo_only\"\n    NAME_ONLY = \"name_only\"\n\n\nclass EnterpriseSettings(BaseModel):\n    \"\"\"General settings that only apply to the Enterprise Edition of Onyx\n\n    NOTE: don't put anything sensitive in here, as this is accessible without auth.\"\"\"\n\n    application_name: str | None = None\n    use_custom_logo: bool = False\n    use_custom_logotype: bool = False\n    logo_display_style: LogoDisplayStyle | None = None\n\n    # custom navigation\n    custom_nav_items: List[NavigationItem] = Field(default_factory=list)\n\n    # custom Chat components\n    two_lines_for_chat_header: bool | None = None\n    custom_lower_disclaimer_content: str | None = None\n    custom_header_content: str | None = None\n    custom_popup_header: str | None = None\n    custom_popup_content: str | None = None\n    enable_consent_screen: bool | None = None\n    consent_screen_prompt: str | None = None\n    show_first_visit_notice: bool | None = None\n    custom_greeting_message: str | None = None\n\n    def check_validity(self) -> None:\n        return\n\n\nclass AnalyticsScriptUpload(BaseModel):\n    script: str\n    secret_key: str\n"
  },
  {
    "path": "backend/ee/onyx/server/enterprise_settings/store.py",
    "content": "import os\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\nfrom typing import IO\n\nfrom fastapi import HTTPException\nfrom fastapi import UploadFile\n\nfrom ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload\nfrom ee.onyx.server.enterprise_settings.models import EnterpriseSettings\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY\nfrom onyx.configs.constants import KV_ENTERPRISE_SETTINGS_KEY\nfrom onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n_LOGO_FILENAME = \"__logo__\"\n_LOGOTYPE_FILENAME = \"__logotype__\"\n\n\ndef load_settings() -> EnterpriseSettings:\n    \"\"\"Loads settings data directly from DB. This should be used primarily\n    for checking what is actually in the DB, aka for editing and saving back settings.\n\n    Runtime settings actually used by the application should be checked with\n    load_runtime_settings as defaults may be applied at runtime.\n    \"\"\"\n\n    dynamic_config_store = get_kv_store()\n    try:\n        settings = EnterpriseSettings(\n            **cast(dict, dynamic_config_store.load(KV_ENTERPRISE_SETTINGS_KEY))\n        )\n    except KvKeyNotFoundError:\n        settings = EnterpriseSettings()\n        dynamic_config_store.store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump())\n\n    return settings\n\n\ndef store_settings(settings: EnterpriseSettings) -> None:\n    \"\"\"Stores settings directly to the kv store / db.\"\"\"\n\n    get_kv_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump())\n\n\ndef load_runtime_settings() -> EnterpriseSettings:\n    \"\"\"Loads settings from DB and applies any defaults or transformations for use\n    at runtime.\n\n    Should not be stored back to the DB.\n    \"\"\"\n    enterprise_settings = load_settings()\n    if not enterprise_settings.application_name:\n        enterprise_settings.application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    return enterprise_settings\n\n\n_CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get(\"CUSTOM_ANALYTICS_SECRET_KEY\")\n\n\ndef load_analytics_script() -> str | None:\n    dynamic_config_store = get_kv_store()\n    try:\n        return cast(str, dynamic_config_store.load(KV_CUSTOM_ANALYTICS_SCRIPT_KEY))\n    except KvKeyNotFoundError:\n        return None\n\n\ndef store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> None:\n    if (\n        not _CUSTOM_ANALYTICS_SECRET_KEY\n        or analytics_script_upload.secret_key != _CUSTOM_ANALYTICS_SECRET_KEY\n    ):\n        raise ValueError(\"Invalid secret key\")\n\n    get_kv_store().store(KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script)\n\n\ndef is_valid_file_type(filename: str) -> bool:\n    valid_extensions = (\".png\", \".jpg\", \".jpeg\")\n    return filename.endswith(valid_extensions)\n\n\ndef guess_file_type(filename: str) -> str:\n    if filename.lower().endswith(\".png\"):\n        return \"image/png\"\n    elif filename.lower().endswith(\".jpg\") or filename.lower().endswith(\".jpeg\"):\n        return \"image/jpeg\"\n    return \"application/octet-stream\"\n\n\ndef upload_logo(file: UploadFile | str, is_logotype: bool = False) -> bool:\n    content: IO[Any]\n\n    if isinstance(file, str):\n        logger.notice(f\"Uploading logo from local path {file}\")\n        if not os.path.isfile(file) or not is_valid_file_type(file):\n            logger.error(\n                \"Invalid file type- only .png, .jpg, and .jpeg files are allowed\"\n            )\n            return False\n\n        with open(file, \"rb\") as file_handle:\n            file_content = file_handle.read()\n        content = BytesIO(file_content)\n        display_name = file\n        file_type = guess_file_type(file)\n\n    else:\n        logger.notice(\"Uploading logo from uploaded file\")\n        if not file.filename or not is_valid_file_type(file.filename):\n            raise HTTPException(\n                status_code=400,\n                detail=\"Invalid file type- only .png, .jpg, and .jpeg files are allowed\",\n            )\n        content = file.file\n        display_name = file.filename\n        file_type = file.content_type or \"image/jpeg\"\n\n    file_store = get_default_file_store()\n    file_store.save_file(\n        content=content,\n        display_name=display_name,\n        file_origin=FileOrigin.OTHER,\n        file_type=file_type,\n        file_id=_LOGOTYPE_FILENAME if is_logotype else _LOGO_FILENAME,\n    )\n    return True\n\n\ndef get_logo_filename() -> str:\n    return _LOGO_FILENAME\n\n\ndef get_logotype_filename() -> str:\n    return _LOGOTYPE_FILENAME\n"
  },
  {
    "path": "backend/ee/onyx/server/evals/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/evals/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\n\nfrom ee.onyx.auth.users import current_cloud_superuser\nfrom onyx.background.celery.apps.client import celery_app as client_app\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.models import User\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.server.evals.models import EvalRunAck\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/evals\")\n\n\n@router.post(\"/eval_run\", response_model=EvalRunAck)\ndef eval_run(\n    request: EvalConfigurationOptions,\n    user: User = Depends(current_cloud_superuser),  # noqa: ARG001\n) -> EvalRunAck:\n    \"\"\"\n    Run an evaluation with the given message and optional dataset.\n    This endpoint requires a valid API key for authentication.\n    \"\"\"\n    client_app.send_task(\n        OnyxCeleryTask.EVAL_RUN_TASK,\n        kwargs={\n            \"configuration_dict\": request.model_dump(),\n        },\n    )\n    return EvalRunAck(success=True)\n"
  },
  {
    "path": "backend/ee/onyx/server/features/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/features/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/features/hooks/api.py",
    "content": "import httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import Query\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import User\nfrom onyx.db.constants import UNSET\nfrom onyx.db.constants import UnsetType\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.hook import create_hook__no_commit\nfrom onyx.db.hook import delete_hook__no_commit\nfrom onyx.db.hook import get_hook_by_id\nfrom onyx.db.hook import get_hook_execution_logs\nfrom onyx.db.hook import get_hooks\nfrom onyx.db.hook import update_hook__no_commit\nfrom onyx.db.models import Hook\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.api_dependencies import require_hook_enabled\nfrom onyx.hooks.models import HookCreateRequest\nfrom onyx.hooks.models import HookExecutionRecord\nfrom onyx.hooks.models import HookPointMetaResponse\nfrom onyx.hooks.models import HookResponse\nfrom onyx.hooks.models import HookUpdateRequest\nfrom onyx.hooks.models import HookValidateResponse\nfrom onyx.hooks.models import HookValidateStatus\nfrom onyx.hooks.registry import get_all_specs\nfrom onyx.hooks.registry import get_hook_point_spec\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.url import SSRFException\nfrom onyx.utils.url import validate_outbound_http_url\n\nlogger = setup_logger()\n\n# ---------------------------------------------------------------------------\n# SSRF protection\n# ---------------------------------------------------------------------------\n\n\ndef _check_ssrf_safety(endpoint_url: str) -> None:\n    \"\"\"Raise OnyxError if endpoint_url could be used for SSRF.\n\n    Delegates to validate_outbound_http_url with https_only=True.\n    Uses BAD_GATEWAY so the frontend maps the error to the Endpoint URL field.\n    \"\"\"\n    try:\n        validate_outbound_http_url(endpoint_url, https_only=True)\n    except (SSRFException, ValueError) as e:\n        raise OnyxError(OnyxErrorCode.BAD_GATEWAY, str(e))\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _hook_to_response(hook: Hook, creator_email: str | None = None) -> HookResponse:\n    return HookResponse(\n        id=hook.id,\n        name=hook.name,\n        hook_point=hook.hook_point,\n        endpoint_url=hook.endpoint_url,\n        api_key_masked=(\n            hook.api_key.get_value(apply_mask=True) if hook.api_key else None\n        ),\n        fail_strategy=hook.fail_strategy,\n        timeout_seconds=hook.timeout_seconds,\n        is_active=hook.is_active,\n        is_reachable=hook.is_reachable,\n        creator_email=(\n            creator_email\n            if creator_email is not None\n            else (hook.creator.email if hook.creator else None)\n        ),\n        created_at=hook.created_at,\n        updated_at=hook.updated_at,\n    )\n\n\ndef _get_hook_or_404(\n    db_session: Session,\n    hook_id: int,\n    include_creator: bool = False,\n) -> Hook:\n    hook = get_hook_by_id(\n        db_session=db_session,\n        hook_id=hook_id,\n        include_creator=include_creator,\n    )\n    if hook is None:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, f\"Hook {hook_id} not found.\")\n    return hook\n\n\ndef _raise_for_validation_failure(validation: HookValidateResponse) -> None:\n    \"\"\"Raise an appropriate OnyxError for a non-passed validation result.\"\"\"\n    if validation.status == HookValidateStatus.auth_failed:\n        raise OnyxError(OnyxErrorCode.CREDENTIAL_INVALID, validation.error_message)\n    if validation.status == HookValidateStatus.timeout:\n        raise OnyxError(\n            OnyxErrorCode.GATEWAY_TIMEOUT,\n            f\"Endpoint validation failed: {validation.error_message}\",\n        )\n    raise OnyxError(\n        OnyxErrorCode.BAD_GATEWAY,\n        f\"Endpoint validation failed: {validation.error_message}\",\n    )\n\n\ndef _validate_endpoint(\n    endpoint_url: str,\n    api_key: str | None,\n    timeout_seconds: float,\n) -> HookValidateResponse:\n    \"\"\"Check whether endpoint_url is reachable by sending an empty POST request.\n\n    We use POST since hook endpoints expect POST requests. The server will typically\n    respond with 4xx (missing/invalid body) — that is fine. Any HTTP response means\n    the server is up and routable. A 401/403 response returns auth_failed\n    (not reachable — indicates the api_key is invalid).\n\n    Timeout handling:\n    - Any httpx.TimeoutException (ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) →\n      timeout (operator should consider increasing timeout_seconds).\n    - All other exceptions → cannot_connect.\n    \"\"\"\n    _check_ssrf_safety(endpoint_url)\n    headers: dict[str, str] = {}\n    if api_key:\n        headers[\"Authorization\"] = f\"Bearer {api_key}\"\n    try:\n        with httpx.Client(timeout=timeout_seconds, follow_redirects=False) as client:\n            response = client.post(endpoint_url, headers=headers)\n        if response.status_code in (401, 403):\n            return HookValidateResponse(\n                status=HookValidateStatus.auth_failed,\n                error_message=f\"Authentication failed (HTTP {response.status_code})\",\n            )\n        return HookValidateResponse(status=HookValidateStatus.passed)\n    except httpx.TimeoutException as exc:\n        # Any timeout (connect, read, or write) means the configured timeout_seconds\n        # is too low for this endpoint. Report as timeout so the UI directs the user\n        # to increase the timeout setting.\n        logger.warning(\n            \"Hook endpoint validation: timeout for %s\",\n            endpoint_url,\n            exc_info=exc,\n        )\n        return HookValidateResponse(\n            status=HookValidateStatus.timeout,\n            error_message=\"Endpoint timed out — consider increasing timeout_seconds.\",\n        )\n    except Exception as exc:\n        logger.warning(\n            \"Hook endpoint validation: connection error for %s\",\n            endpoint_url,\n            exc_info=exc,\n        )\n        return HookValidateResponse(\n            status=HookValidateStatus.cannot_connect, error_message=str(exc)\n        )\n\n\n# ---------------------------------------------------------------------------\n# Routers\n# ---------------------------------------------------------------------------\n\nrouter = APIRouter(prefix=\"/admin/hooks\")\n\n\n# ---------------------------------------------------------------------------\n# Hook endpoints\n# ---------------------------------------------------------------------------\n\n\n@router.get(\"/specs\")\ndef get_hook_point_specs(\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n) -> list[HookPointMetaResponse]:\n    return [\n        HookPointMetaResponse(\n            hook_point=spec.hook_point,\n            display_name=spec.display_name,\n            description=spec.description,\n            docs_url=spec.docs_url,\n            input_schema=spec.input_schema,\n            output_schema=spec.output_schema,\n            default_timeout_seconds=spec.default_timeout_seconds,\n            default_fail_strategy=spec.default_fail_strategy,\n            fail_hard_description=spec.fail_hard_description,\n        )\n        for spec in get_all_specs()\n    ]\n\n\n@router.get(\"\")\ndef list_hooks(\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> list[HookResponse]:\n    hooks = get_hooks(db_session=db_session, include_creator=True)\n    return [_hook_to_response(h) for h in hooks]\n\n\n@router.post(\"\")\ndef create_hook(\n    req: HookCreateRequest,\n    user: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookResponse:\n    \"\"\"Create a new hook. The endpoint is validated before persisting — creation fails if\n    the endpoint cannot be reached or the api_key is invalid. Hooks are created active.\n    \"\"\"\n    spec = get_hook_point_spec(req.hook_point)\n    api_key = req.api_key.get_secret_value() if req.api_key else None\n    validation = _validate_endpoint(\n        endpoint_url=req.endpoint_url,\n        api_key=api_key,\n        timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,\n    )\n    if validation.status != HookValidateStatus.passed:\n        _raise_for_validation_failure(validation)\n\n    hook = create_hook__no_commit(\n        db_session=db_session,\n        name=req.name,\n        hook_point=req.hook_point,\n        endpoint_url=req.endpoint_url,\n        api_key=api_key,\n        fail_strategy=req.fail_strategy or spec.default_fail_strategy,\n        timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,\n        is_active=True,\n        is_reachable=True,\n        creator_id=user.id,\n    )\n    db_session.commit()\n    return _hook_to_response(hook, creator_email=user.email)\n\n\n@router.get(\"/{hook_id}\")\ndef get_hook(\n    hook_id: int,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookResponse:\n    hook = _get_hook_or_404(db_session, hook_id, include_creator=True)\n    return _hook_to_response(hook)\n\n\n@router.patch(\"/{hook_id}\")\ndef update_hook(\n    hook_id: int,\n    req: HookUpdateRequest,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookResponse:\n    \"\"\"Update hook fields. If endpoint_url, api_key, or timeout_seconds changes, the\n    endpoint is re-validated using the effective values. For active hooks the update is\n    rejected on validation failure, keeping live traffic unaffected. For inactive hooks\n    the update goes through regardless and is_reachable is updated to reflect the result.\n\n    Note: if an active hook's endpoint is currently down, even a timeout_seconds-only\n    increase will be rejected. The recovery flow is: deactivate → update → reactivate.\n    \"\"\"\n    # api_key: UNSET = no change, None = clear, value = update\n    api_key: str | None | UnsetType\n    if \"api_key\" not in req.model_fields_set:\n        api_key = UNSET\n    elif req.api_key is None:\n        api_key = None\n    else:\n        api_key = req.api_key.get_secret_value()\n\n    endpoint_url_changing = \"endpoint_url\" in req.model_fields_set\n    api_key_changing = not isinstance(api_key, UnsetType)\n    timeout_changing = \"timeout_seconds\" in req.model_fields_set\n\n    validated_is_reachable: bool | None = None\n    if endpoint_url_changing or api_key_changing or timeout_changing:\n        existing = _get_hook_or_404(db_session, hook_id)\n        effective_url: str = (\n            req.endpoint_url if endpoint_url_changing else existing.endpoint_url  # type: ignore[assignment]  # endpoint_url is required on create and cannot be cleared on update\n        )\n        effective_api_key: str | None = (\n            (api_key if not isinstance(api_key, UnsetType) else None)\n            if api_key_changing\n            else (\n                existing.api_key.get_value(apply_mask=False)\n                if existing.api_key\n                else None\n            )\n        )\n        effective_timeout: float = (\n            req.timeout_seconds if timeout_changing else existing.timeout_seconds  # type: ignore[assignment]  # req.timeout_seconds is non-None when timeout_changing (validated by HookUpdateRequest)\n        )\n        validation = _validate_endpoint(\n            endpoint_url=effective_url,\n            api_key=effective_api_key,\n            timeout_seconds=effective_timeout,\n        )\n        if existing.is_active and validation.status != HookValidateStatus.passed:\n            _raise_for_validation_failure(validation)\n        validated_is_reachable = validation.status == HookValidateStatus.passed\n\n    hook = update_hook__no_commit(\n        db_session=db_session,\n        hook_id=hook_id,\n        name=req.name,\n        endpoint_url=(req.endpoint_url if endpoint_url_changing else UNSET),\n        api_key=api_key,\n        fail_strategy=req.fail_strategy,\n        timeout_seconds=req.timeout_seconds,\n        is_reachable=validated_is_reachable,\n        include_creator=True,\n    )\n    db_session.commit()\n    return _hook_to_response(hook)\n\n\n@router.delete(\"/{hook_id}\")\ndef delete_hook(\n    hook_id: int,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> None:\n    delete_hook__no_commit(db_session=db_session, hook_id=hook_id)\n    db_session.commit()\n\n\n@router.post(\"/{hook_id}/activate\")\ndef activate_hook(\n    hook_id: int,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookResponse:\n    hook = _get_hook_or_404(db_session, hook_id)\n    if not hook.endpoint_url:\n        raise OnyxError(\n            OnyxErrorCode.INVALID_INPUT, \"Hook has no endpoint URL configured.\"\n        )\n\n    api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None\n    validation = _validate_endpoint(\n        endpoint_url=hook.endpoint_url,\n        api_key=api_key,\n        timeout_seconds=hook.timeout_seconds,\n    )\n    if validation.status != HookValidateStatus.passed:\n        # Persist is_reachable=False in a separate session so the request\n        # session has no commits on the failure path and the transaction\n        # boundary stays clean.\n        if hook.is_reachable is not False:\n            with get_session_with_current_tenant() as side_session:\n                update_hook__no_commit(\n                    db_session=side_session, hook_id=hook_id, is_reachable=False\n                )\n                side_session.commit()\n        _raise_for_validation_failure(validation)\n\n    hook = update_hook__no_commit(\n        db_session=db_session,\n        hook_id=hook_id,\n        is_active=True,\n        is_reachable=True,\n        include_creator=True,\n    )\n    db_session.commit()\n    return _hook_to_response(hook)\n\n\n@router.post(\"/{hook_id}/validate\")\ndef validate_hook(\n    hook_id: int,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookValidateResponse:\n    hook = _get_hook_or_404(db_session, hook_id)\n    if not hook.endpoint_url:\n        raise OnyxError(\n            OnyxErrorCode.INVALID_INPUT, \"Hook has no endpoint URL configured.\"\n        )\n\n    api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None\n    validation = _validate_endpoint(\n        endpoint_url=hook.endpoint_url,\n        api_key=api_key,\n        timeout_seconds=hook.timeout_seconds,\n    )\n    validation_passed = validation.status == HookValidateStatus.passed\n    if hook.is_reachable != validation_passed:\n        update_hook__no_commit(\n            db_session=db_session, hook_id=hook_id, is_reachable=validation_passed\n        )\n        db_session.commit()\n    return validation\n\n\n@router.post(\"/{hook_id}/deactivate\")\ndef deactivate_hook(\n    hook_id: int,\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> HookResponse:\n    hook = update_hook__no_commit(\n        db_session=db_session,\n        hook_id=hook_id,\n        is_active=False,\n        include_creator=True,\n    )\n    db_session.commit()\n    return _hook_to_response(hook)\n\n\n# ---------------------------------------------------------------------------\n# Execution log endpoints\n# ---------------------------------------------------------------------------\n\n\n@router.get(\"/{hook_id}/execution-logs\")\ndef list_hook_execution_logs(\n    hook_id: int,\n    limit: int = Query(default=10, ge=1, le=100),\n    _: User = Depends(current_admin_user),\n    _hook_enabled: None = Depends(require_hook_enabled),\n    db_session: Session = Depends(get_session),\n) -> list[HookExecutionRecord]:\n    _get_hook_or_404(db_session, hook_id)\n    logs = get_hook_execution_logs(db_session=db_session, hook_id=hook_id, limit=limit)\n    return [\n        HookExecutionRecord(\n            error_message=log.error_message,\n            status_code=log.status_code,\n            duration_ms=log.duration_ms,\n            created_at=log.created_at,\n        )\n        for log in logs\n    ]\n"
  },
  {
    "path": "backend/ee/onyx/server/license/api.py",
    "content": "\"\"\"License API endpoints for self-hosted deployments.\n\nThese endpoints allow self-hosted Onyx instances to:\n1. Claim a license after Stripe checkout (via cloud data plane proxy)\n2. Upload a license file manually (for air-gapped deployments)\n3. View license status and seat usage\n4. Refresh/delete the local license\n\nNOTE: Cloud (MULTI_TENANT) deployments do NOT use these endpoints.\nCloud licensing is managed via the control plane and gated_tenants Redis key.\n\"\"\"\n\nimport requests\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import UploadFile\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.auth.users import current_admin_user\nfrom ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL\nfrom ee.onyx.db.license import delete_license as db_delete_license\nfrom ee.onyx.db.license import get_license\nfrom ee.onyx.db.license import get_license_metadata\nfrom ee.onyx.db.license import invalidate_license_cache\nfrom ee.onyx.db.license import refresh_license_cache\nfrom ee.onyx.db.license import update_license_cache\nfrom ee.onyx.db.license import upsert_license\nfrom ee.onyx.server.license.models import LicenseResponse\nfrom ee.onyx.server.license.models import LicenseSource\nfrom ee.onyx.server.license.models import LicenseStatusResponse\nfrom ee.onyx.server.license.models import LicenseUploadResponse\nfrom ee.onyx.server.license.models import SeatUsageResponse\nfrom ee.onyx.utils.license import verify_license_signature\nfrom onyx.auth.users import User\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/license\")\n\n# PEM-style delimiters used in license file format\n_PEM_BEGIN = \"-----BEGIN ONYX LICENSE-----\"\n_PEM_END = \"-----END ONYX LICENSE-----\"\n\n\ndef _strip_pem_delimiters(content: str) -> str:\n    \"\"\"Strip PEM-style delimiters from license content if present.\"\"\"\n    content = content.strip()\n    if content.startswith(_PEM_BEGIN) and content.endswith(_PEM_END):\n        # Remove first and last lines (the delimiters)\n        lines = content.split(\"\\n\")\n        return \"\\n\".join(lines[1:-1]).strip()\n    return content\n\n\n@router.get(\"\")\nasync def get_license_status(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LicenseStatusResponse:\n    \"\"\"Get current license status and seat usage.\"\"\"\n    metadata = get_license_metadata(db_session)\n\n    if not metadata:\n        return LicenseStatusResponse(has_license=False)\n\n    return LicenseStatusResponse(\n        has_license=True,\n        seats=metadata.seats,\n        used_seats=metadata.used_seats,\n        plan_type=metadata.plan_type,\n        issued_at=metadata.issued_at,\n        expires_at=metadata.expires_at,\n        grace_period_end=metadata.grace_period_end,\n        status=metadata.status,\n        source=metadata.source,\n    )\n\n\n@router.get(\"/seats\")\nasync def get_seat_usage(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> SeatUsageResponse:\n    \"\"\"Get detailed seat usage information.\"\"\"\n    metadata = get_license_metadata(db_session)\n\n    if not metadata:\n        return SeatUsageResponse(\n            total_seats=0,\n            used_seats=0,\n            available_seats=0,\n        )\n\n    return SeatUsageResponse(\n        total_seats=metadata.seats,\n        used_seats=metadata.used_seats,\n        available_seats=max(0, metadata.seats - metadata.used_seats),\n    )\n\n\n@router.post(\"/claim\")\nasync def claim_license(\n    session_id: str | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LicenseResponse:\n    \"\"\"\n    Claim a license from the control plane (self-hosted only).\n\n    Two modes:\n    1. With session_id: After Stripe checkout, exchange session_id for license\n    2. Without session_id: Re-claim using existing license for auth\n\n    Use without session_id after:\n    - Updating seats via the billing API\n    - Returning from the Stripe customer portal\n    - Any operation that regenerates the license on control plane\n    Claim a license from the control plane (self-hosted only).\n\n    Two modes:\n    1. With session_id: After Stripe checkout, exchange session_id for license\n    2. Without session_id: Re-claim using existing license for auth\n    \"\"\"\n    if MULTI_TENANT:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"License claiming is only available for self-hosted deployments\",\n        )\n\n    try:\n        if session_id:\n            # Claim license after checkout using session_id\n            url = f\"{CLOUD_DATA_PLANE_URL}/proxy/claim-license\"\n            response = requests.post(\n                url,\n                json={\"session_id\": session_id},\n                headers={\"Content-Type\": \"application/json\"},\n                timeout=30,\n            )\n        else:\n            # Re-claim using existing license for auth\n            metadata = get_license_metadata(db_session)\n            if not metadata or not metadata.tenant_id:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    \"No license found. Provide session_id after checkout.\",\n                )\n\n            license_row = get_license(db_session)\n            if not license_row or not license_row.license_data:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    \"No license found in database\",\n                )\n\n            url = f\"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}\"\n            response = requests.get(\n                url,\n                headers={\n                    \"Authorization\": f\"Bearer {license_row.license_data}\",\n                    \"Content-Type\": \"application/json\",\n                },\n                timeout=30,\n            )\n\n        response.raise_for_status()\n\n        data = response.json()\n        license_data = data.get(\"license\")\n\n        if not license_data:\n            raise OnyxError(OnyxErrorCode.NOT_FOUND, \"No license in response\")\n\n        # Verify signature before persisting\n        payload = verify_license_signature(license_data)\n\n        # Store in DB\n        upsert_license(db_session, license_data)\n\n        try:\n            update_license_cache(payload, source=LicenseSource.AUTO_FETCH)\n        except Exception as cache_error:\n            logger.warning(f\"Failed to update license cache: {cache_error}\")\n\n        logger.info(\n            f\"License claimed: seats={payload.seats}, expires={payload.expires_at.date()}\"\n        )\n        return LicenseResponse(success=True, license=payload)\n\n    except requests.HTTPError as e:\n        status_code = e.response.status_code if e.response is not None else 502\n        detail = \"Failed to claim license\"\n        try:\n            error_data = e.response.json() if e.response is not None else {}\n            detail = error_data.get(\"detail\", detail)\n        except Exception:\n            pass\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=status_code\n        )\n    except ValueError as e:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))\n    except requests.RequestException:\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY, \"Failed to connect to license server\"\n        )\n\n\n@router.post(\"/upload\")\nasync def upload_license(\n    license_file: UploadFile = File(...),\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LicenseUploadResponse:\n    \"\"\"\n    Upload a license file manually (self-hosted only).\n\n    Used for air-gapped deployments where the cloud data plane is not accessible.\n    The license file must be cryptographically signed by Onyx.\n    \"\"\"\n    if MULTI_TENANT:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"License upload is only available for self-hosted deployments\",\n        )\n\n    try:\n        content = await license_file.read()\n        license_data = content.decode(\"utf-8\").strip()\n        # Strip PEM-style delimiters if present (used in .lic file format)\n        license_data = _strip_pem_delimiters(license_data)\n        # Remove any stray whitespace/newlines from user input\n        license_data = license_data.strip()\n    except UnicodeDecodeError:\n        raise OnyxError(OnyxErrorCode.INVALID_INPUT, \"Invalid license file format\")\n\n    # Verify cryptographic signature - this is the only validation needed\n    # The license's tenant_id identifies the customer in control plane, not locally\n    try:\n        payload = verify_license_signature(license_data)\n    except ValueError as e:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))\n\n    # Persist to DB and update cache\n    upsert_license(db_session, license_data)\n\n    try:\n        update_license_cache(payload, source=LicenseSource.MANUAL_UPLOAD)\n    except Exception as cache_error:\n        logger.warning(f\"Failed to update license cache: {cache_error}\")\n\n    return LicenseUploadResponse(\n        success=True,\n        message=f\"License uploaded successfully. {payload.seats} seats, expires {payload.expires_at.date()}\",\n    )\n\n\n@router.post(\"/refresh\")\nasync def refresh_license_cache_endpoint(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LicenseStatusResponse:\n    \"\"\"\n    Force refresh the license cache from the local database.\n\n    Useful after manual database changes or to verify license validity.\n    Does NOT fetch from control plane - use /claim for that.\n    \"\"\"\n    metadata = refresh_license_cache(db_session)\n\n    if not metadata:\n        return LicenseStatusResponse(has_license=False)\n\n    return LicenseStatusResponse(\n        has_license=True,\n        seats=metadata.seats,\n        used_seats=metadata.used_seats,\n        plan_type=metadata.plan_type,\n        issued_at=metadata.issued_at,\n        expires_at=metadata.expires_at,\n        grace_period_end=metadata.grace_period_end,\n        status=metadata.status,\n        source=metadata.source,\n    )\n\n\n@router.delete(\"\")\nasync def delete_license(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, bool]:\n    \"\"\"\n    Delete the current license.\n\n    Admin only - removes license from database and invalidates cache.\n    \"\"\"\n    if MULTI_TENANT:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"License deletion is only available for self-hosted deployments\",\n        )\n\n    try:\n        invalidate_license_cache()\n    except Exception as cache_error:\n        logger.warning(f\"Failed to invalidate license cache: {cache_error}\")\n\n    deleted = db_delete_license(db_session)\n\n    return {\"deleted\": deleted}\n"
  },
  {
    "path": "backend/ee/onyx/server/license/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\n\nfrom pydantic import BaseModel\n\nfrom onyx.server.settings.models import ApplicationStatus\n\n\nclass PlanType(str, Enum):\n    MONTHLY = \"monthly\"\n    ANNUAL = \"annual\"\n\n\nclass LicenseSource(str, Enum):\n    AUTO_FETCH = \"auto_fetch\"\n    MANUAL_UPLOAD = \"manual_upload\"\n\n\nclass LicensePayload(BaseModel):\n    \"\"\"The payload portion of a signed license.\"\"\"\n\n    version: str\n    tenant_id: str\n    organization_name: str | None = None\n    issued_at: datetime\n    expires_at: datetime\n    seats: int\n    plan_type: PlanType\n    billing_cycle: str | None = None\n    grace_period_days: int = 30\n    stripe_subscription_id: str | None = None\n    stripe_customer_id: str | None = None\n\n\nclass LicenseData(BaseModel):\n    \"\"\"Full signed license structure.\"\"\"\n\n    payload: LicensePayload\n    signature: str\n\n\nclass LicenseMetadata(BaseModel):\n    \"\"\"Cached license metadata stored in Redis.\"\"\"\n\n    tenant_id: str\n    organization_name: str | None = None\n    seats: int\n    used_seats: int\n    plan_type: PlanType\n    issued_at: datetime\n    expires_at: datetime\n    grace_period_end: datetime | None = None\n    status: ApplicationStatus\n    source: LicenseSource | None = None\n    stripe_subscription_id: str | None = None\n\n\nclass LicenseStatusResponse(BaseModel):\n    \"\"\"Response for license status API.\"\"\"\n\n    has_license: bool\n    seats: int = 0\n    used_seats: int = 0\n    plan_type: PlanType | None = None\n    issued_at: datetime | None = None\n    expires_at: datetime | None = None\n    grace_period_end: datetime | None = None\n    status: ApplicationStatus | None = None\n    source: LicenseSource | None = None\n\n\nclass LicenseResponse(BaseModel):\n    \"\"\"Response after license fetch/upload.\"\"\"\n\n    success: bool\n    message: str | None = None\n    license: LicensePayload | None = None\n\n\nclass LicenseUploadResponse(BaseModel):\n    \"\"\"Response after license upload.\"\"\"\n\n    success: bool\n    message: str | None = None\n\n\nclass SeatUsageResponse(BaseModel):\n    \"\"\"Response for seat usage API.\"\"\"\n\n    total_seats: int\n    used_seats: int\n    available_seats: int\n"
  },
  {
    "path": "backend/ee/onyx/server/manage/standard_answer.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.standard_answer import fetch_standard_answer\nfrom ee.onyx.db.standard_answer import fetch_standard_answer_categories\nfrom ee.onyx.db.standard_answer import fetch_standard_answer_category\nfrom ee.onyx.db.standard_answer import fetch_standard_answers\nfrom ee.onyx.db.standard_answer import insert_standard_answer\nfrom ee.onyx.db.standard_answer import insert_standard_answer_category\nfrom ee.onyx.db.standard_answer import remove_standard_answer\nfrom ee.onyx.db.standard_answer import update_standard_answer\nfrom ee.onyx.db.standard_answer import update_standard_answer_category\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.server.manage.models import StandardAnswer\nfrom onyx.server.manage.models import StandardAnswerCategory\nfrom onyx.server.manage.models import StandardAnswerCategoryCreationRequest\nfrom onyx.server.manage.models import StandardAnswerCreationRequest\n\nrouter = APIRouter(prefix=\"/manage\")\n\n\n@router.post(\"/admin/standard-answer\")\ndef create_standard_answer(\n    standard_answer_creation_request: StandardAnswerCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> StandardAnswer:\n    standard_answer_model = insert_standard_answer(\n        keyword=standard_answer_creation_request.keyword,\n        answer=standard_answer_creation_request.answer,\n        category_ids=standard_answer_creation_request.categories,\n        match_regex=standard_answer_creation_request.match_regex,\n        match_any_keywords=standard_answer_creation_request.match_any_keywords,\n        db_session=db_session,\n    )\n    return StandardAnswer.from_model(standard_answer_model)\n\n\n@router.get(\"/admin/standard-answer\")\ndef list_standard_answers(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> list[StandardAnswer]:\n    standard_answer_models = fetch_standard_answers(db_session=db_session)\n    return [\n        StandardAnswer.from_model(standard_answer_model)\n        for standard_answer_model in standard_answer_models\n    ]\n\n\n@router.patch(\"/admin/standard-answer/{standard_answer_id}\")\ndef patch_standard_answer(\n    standard_answer_id: int,\n    standard_answer_creation_request: StandardAnswerCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> StandardAnswer:\n    existing_standard_answer = fetch_standard_answer(\n        standard_answer_id=standard_answer_id,\n        db_session=db_session,\n    )\n\n    if existing_standard_answer is None:\n        raise HTTPException(status_code=404, detail=\"Standard answer not found\")\n\n    standard_answer_model = update_standard_answer(\n        standard_answer_id=standard_answer_id,\n        keyword=standard_answer_creation_request.keyword,\n        answer=standard_answer_creation_request.answer,\n        category_ids=standard_answer_creation_request.categories,\n        match_regex=standard_answer_creation_request.match_regex,\n        match_any_keywords=standard_answer_creation_request.match_any_keywords,\n        db_session=db_session,\n    )\n    return StandardAnswer.from_model(standard_answer_model)\n\n\n@router.delete(\"/admin/standard-answer/{standard_answer_id}\")\ndef delete_standard_answer(\n    standard_answer_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> None:\n    return remove_standard_answer(\n        standard_answer_id=standard_answer_id,\n        db_session=db_session,\n    )\n\n\n@router.post(\"/admin/standard-answer/category\")\ndef create_standard_answer_category(\n    standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> StandardAnswerCategory:\n    standard_answer_category_model = insert_standard_answer_category(\n        category_name=standard_answer_category_creation_request.name,\n        db_session=db_session,\n    )\n    return StandardAnswerCategory.from_model(standard_answer_category_model)\n\n\n@router.get(\"/admin/standard-answer/category\")\ndef list_standard_answer_categories(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> list[StandardAnswerCategory]:\n    standard_answer_category_models = fetch_standard_answer_categories(\n        db_session=db_session\n    )\n    return [\n        StandardAnswerCategory.from_model(standard_answer_category_model)\n        for standard_answer_category_model in standard_answer_category_models\n    ]\n\n\n@router.patch(\"/admin/standard-answer/category/{standard_answer_category_id}\")\ndef patch_standard_answer_category(\n    standard_answer_category_id: int,\n    standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> StandardAnswerCategory:\n    existing_standard_answer_category = fetch_standard_answer_category(\n        standard_answer_category_id=standard_answer_category_id,\n        db_session=db_session,\n    )\n\n    if existing_standard_answer_category is None:\n        raise HTTPException(\n            status_code=404, detail=\"Standard answer category not found\"\n        )\n\n    standard_answer_category_model = update_standard_answer_category(\n        standard_answer_category_id=standard_answer_category_id,\n        category_name=standard_answer_category_creation_request.name,\n        db_session=db_session,\n    )\n    return StandardAnswerCategory.from_model(standard_answer_category_model)\n"
  },
  {
    "path": "backend/ee/onyx/server/middleware/license_enforcement.py",
    "content": "\"\"\"Middleware to enforce license status for SELF-HOSTED deployments only.\n\nNOTE: This middleware is NOT used for multi-tenant (cloud) deployments.\nMulti-tenant gating is handled separately by the control plane via the\n/tenants/product-gating endpoint and is_tenant_gated() checks.\n\nIMPORTANT: Mutual Exclusivity with ENTERPRISE_EDITION_ENABLED\n============================================================\nThis middleware is controlled by LICENSE_ENFORCEMENT_ENABLED env var.\nIt works alongside the legacy ENTERPRISE_EDITION_ENABLED system:\n\n- LICENSE_ENFORCEMENT_ENABLED=false (default):\n  Middleware is disabled. EE features are controlled solely by\n  ENTERPRISE_EDITION_ENABLED. This preserves legacy behavior.\n\n- LICENSE_ENFORCEMENT_ENABLED=true:\n  Middleware actively enforces license status. EE features require\n  a valid license, regardless of ENTERPRISE_EDITION_ENABLED.\n\nEventually, ENTERPRISE_EDITION_ENABLED will be removed and license\nenforcement will be the only mechanism for gating EE features.\n\nLicense Enforcement States (when enabled)\n=========================================\nFor self-hosted deployments:\n\n1. No license (never subscribed):\n   - Allow community features (basic connectors, search, chat)\n   - Block EE-only features (analytics, user groups, etc.)\n\n2. GATED_ACCESS (fully expired):\n   - Block all routes except billing/auth/license\n   - User must renew subscription to continue\n\n3. Valid license (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER):\n   - Full access to all EE features\n   - Seat limits enforced\n   - GRACE_PERIOD/PAYMENT_REMINDER are for notifications only, not blocking\n\"\"\"\n\nimport logging\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\n\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi.responses import JSONResponse\nfrom sqlalchemy.exc import SQLAlchemyError\n\nfrom ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED\nfrom ee.onyx.configs.license_enforcement_config import EE_ONLY_PATH_PREFIXES\nfrom ee.onyx.configs.license_enforcement_config import (\n    LICENSE_ENFORCEMENT_ALLOWED_PREFIXES,\n)\nfrom ee.onyx.db.license import get_cached_license_metadata\nfrom ee.onyx.db.license import refresh_license_cache\nfrom onyx.cache.interface import CACHE_TRANSIENT_ERRORS\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.server.settings.models import ApplicationStatus\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\ndef _is_path_allowed(path: str) -> bool:\n    \"\"\"Check if path is in allowlist (prefix match).\"\"\"\n    return any(\n        path.startswith(prefix) for prefix in LICENSE_ENFORCEMENT_ALLOWED_PREFIXES\n    )\n\n\ndef _is_ee_only_path(path: str) -> bool:\n    \"\"\"Check if path requires EE license (prefix match).\"\"\"\n    return any(path.startswith(prefix) for prefix in EE_ONLY_PATH_PREFIXES)\n\n\ndef add_license_enforcement_middleware(\n    app: FastAPI, logger: logging.LoggerAdapter\n) -> None:\n    logger.info(\"License enforcement middleware registered\")\n\n    @app.middleware(\"http\")\n    async def enforce_license(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        \"\"\"Block requests when license is expired/gated.\"\"\"\n        if not LICENSE_ENFORCEMENT_ENABLED:\n            return await call_next(request)\n\n        path = request.url.path\n        if path.startswith(\"/api\"):\n            path = path[4:]\n\n        if _is_path_allowed(path):\n            return await call_next(request)\n\n        is_gated = False\n        tenant_id = get_current_tenant_id()\n\n        try:\n            metadata = get_cached_license_metadata(tenant_id)\n\n            # If no cached metadata, check database (cache may have been cleared)\n            if not metadata:\n                logger.debug(\n                    \"[license_enforcement] No cached license, checking database...\"\n                )\n                try:\n                    with get_session_with_current_tenant() as db_session:\n                        metadata = refresh_license_cache(db_session, tenant_id)\n                        if metadata:\n                            logger.info(\n                                \"[license_enforcement] Loaded license from database\"\n                            )\n                except SQLAlchemyError as db_error:\n                    logger.warning(\n                        f\"[license_enforcement] Failed to check database for license: {db_error}\"\n                    )\n\n            if metadata:\n                # User HAS a license (current or expired)\n                if metadata.status == ApplicationStatus.GATED_ACCESS:\n                    # License fully expired - gate the user\n                    # Note: GRACE_PERIOD and PAYMENT_REMINDER are for notifications only,\n                    # they don't block access\n                    is_gated = True\n                else:\n                    # License is active - check seat limit\n                    # used_seats in cache is kept accurate via invalidation\n                    # when users are added/removed\n                    if metadata.used_seats > metadata.seats:\n                        logger.info(\n                            f\"[license_enforcement] Blocking request: \"\n                            f\"seat limit exceeded ({metadata.used_seats}/{metadata.seats})\"\n                        )\n                        return JSONResponse(\n                            status_code=402,\n                            content={\n                                \"detail\": {\n                                    \"error\": \"seat_limit_exceeded\",\n                                    \"message\": f\"Seat limit exceeded: {metadata.used_seats} of {metadata.seats} seats used.\",\n                                    \"used_seats\": metadata.used_seats,\n                                    \"seats\": metadata.seats,\n                                }\n                            },\n                        )\n            else:\n                # No license in cache OR database = never subscribed\n                # Allow community features, but block EE-only features\n                if _is_ee_only_path(path):\n                    logger.info(\n                        f\"[license_enforcement] Blocking EE-only path (no license): {path}\"\n                    )\n                    return JSONResponse(\n                        status_code=402,\n                        content={\n                            \"detail\": {\n                                \"error\": \"enterprise_license_required\",\n                                \"message\": \"This feature requires an Enterprise license. \"\n                                \"Please upgrade to access this functionality.\",\n                            }\n                        },\n                    )\n                logger.debug(\n                    \"[license_enforcement] No license, allowing community features\"\n                )\n                is_gated = False\n        except CACHE_TRANSIENT_ERRORS as e:\n            logger.warning(f\"Failed to check license metadata: {e}\")\n            # Fail open - don't block users due to cache connectivity issues\n            is_gated = False\n\n        if is_gated:\n            logger.info(\n                f\"[license_enforcement] Blocking request (license expired): {path}\"\n            )\n\n            return JSONResponse(\n                status_code=402,\n                content={\n                    \"detail\": {\n                        \"error\": \"license_expired\",\n                        \"message\": \"Your subscription has expired. Please update your billing.\",\n                    }\n                },\n            )\n\n        return await call_next(request)\n"
  },
  {
    "path": "backend/ee/onyx/server/middleware/tenant_tracking.py",
    "content": "import logging\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\n\nfrom fastapi import FastAPI\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import Response\n\nfrom ee.onyx.auth.users import decode_anonymous_user_jwt_token\nfrom onyx.auth.utils import extract_tenant_from_auth_header\nfrom onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME\nfrom onyx.configs.constants import TENANT_ID_COOKIE_NAME\nfrom onyx.db.engine.sql_engine import is_valid_schema_name\nfrom onyx.redis.redis_pool import retrieve_auth_token_data_from_redis\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n\ndef add_api_server_tenant_id_middleware(\n    app: FastAPI, logger: logging.LoggerAdapter\n) -> None:\n    @app.middleware(\"http\")\n    async def set_tenant_id(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        \"\"\"Extracts the tenant id from multiple locations and sets the context var.\n\n        This is very specific to the api server and probably not something you'd want\n        to use elsewhere.\n        \"\"\"\n        try:\n            if MULTI_TENANT:\n                tenant_id = await _get_tenant_id_from_request(request, logger)\n            else:\n                tenant_id = POSTGRES_DEFAULT_SCHEMA\n\n            CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n            return await call_next(request)\n\n        except Exception as e:\n            logger.exception(f\"Error in tenant ID middleware: {str(e)}\")\n            raise\n\n\nasync def _get_tenant_id_from_request(\n    request: Request, logger: logging.LoggerAdapter\n) -> str:\n    \"\"\"\n    Attempt to extract tenant_id from:\n    1) The API key or PAT (Personal Access Token) header\n    2) The Redis-based token (stored in Cookie: fastapiusersauth)\n    3) The anonymous user cookie\n    Fallback: POSTGRES_DEFAULT_SCHEMA\n    \"\"\"\n    # Check for API key or PAT in Authorization header\n    tenant_id = extract_tenant_from_auth_header(request)\n    if tenant_id is not None:\n        return tenant_id\n\n    try:\n        # Look up token data in Redis\n\n        token_data = await retrieve_auth_token_data_from_redis(request)\n\n        if token_data:\n            tenant_id_from_payload = token_data.get(\n                \"tenant_id\", POSTGRES_DEFAULT_SCHEMA\n            )\n\n            tenant_id = (\n                str(tenant_id_from_payload)\n                if tenant_id_from_payload is not None\n                else None\n            )\n\n            if tenant_id and not is_valid_schema_name(tenant_id):\n                raise HTTPException(status_code=400, detail=\"Invalid tenant ID format\")\n\n        # Check for anonymous user cookie\n        anonymous_user_cookie = request.cookies.get(ANONYMOUS_USER_COOKIE_NAME)\n        if anonymous_user_cookie:\n            try:\n                anonymous_user_data = decode_anonymous_user_jwt_token(\n                    anonymous_user_cookie\n                )\n                tenant_id = anonymous_user_data.get(\n                    \"tenant_id\", POSTGRES_DEFAULT_SCHEMA\n                )\n\n                if not tenant_id or not is_valid_schema_name(tenant_id):\n                    raise HTTPException(\n                        status_code=400, detail=\"Invalid tenant ID format\"\n                    )\n\n                return tenant_id\n\n            except Exception as e:\n                logger.error(f\"Error decoding anonymous user cookie: {str(e)}\")\n                # Continue and attempt to authenticate\n\n        logger.debug(\n            \"Token data not found or expired in Redis, defaulting to POSTGRES_DEFAULT_SCHEMA\"\n        )\n\n        # Return POSTGRES_DEFAULT_SCHEMA, so non-authenticated requests are sent to the default schema\n        # The CURRENT_TENANT_ID_CONTEXTVAR is initialized with POSTGRES_DEFAULT_SCHEMA,\n        # so we maintain consistency by returning it here when no valid tenant is found.\n        return POSTGRES_DEFAULT_SCHEMA\n\n    except Exception as e:\n        logger.error(f\"Unexpected error in _get_tenant_id_from_request: {str(e)}\")\n        raise HTTPException(status_code=500, detail=\"Internal server error\")\n\n    finally:\n        if tenant_id:\n            return tenant_id\n\n        # As a final step, check for explicit tenant_id cookie\n        tenant_id_cookie = request.cookies.get(TENANT_ID_COOKIE_NAME)\n        if tenant_id_cookie and is_valid_schema_name(tenant_id_cookie):\n            return tenant_id_cookie\n\n        # If we've reached this point, return the default schema\n        return POSTGRES_DEFAULT_SCHEMA\n"
  },
  {
    "path": "backend/ee/onyx/server/oauth/api.py",
    "content": "import base64\nimport uuid\n\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import JSONResponse\n\nfrom ee.onyx.server.oauth.api_router import router\nfrom ee.onyx.server.oauth.confluence_cloud import ConfluenceCloudOAuth\nfrom ee.onyx.server.oauth.google_drive import GoogleDriveOAuth\nfrom ee.onyx.server.oauth.slack import SlackOAuth\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\n@router.post(\"/prepare-authorization-request\")\ndef prepare_authorization_request(\n    connector: DocumentSource,\n    redirect_on_success: str | None,\n    user: User = Depends(current_admin_user),\n    tenant_id: str | None = Depends(get_current_tenant_id),\n) -> JSONResponse:\n    \"\"\"Used by the frontend to generate the url for the user's browser during auth request.\n\n    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/\n    \"\"\"\n\n    # create random oauth state param for security and to retrieve user data later\n    oauth_uuid = uuid.uuid4()\n    oauth_uuid_str = str(oauth_uuid)\n\n    # urlsafe b64 encode the uuid for the oauth url\n    oauth_state = (\n        base64.urlsafe_b64encode(oauth_uuid.bytes).rstrip(b\"=\").decode(\"utf-8\")\n    )\n\n    session: str | None = None\n    if connector == DocumentSource.SLACK:\n        if not DEV_MODE:\n            oauth_url = SlackOAuth.generate_oauth_url(oauth_state)\n        else:\n            oauth_url = SlackOAuth.generate_dev_oauth_url(oauth_state)\n\n        session = SlackOAuth.session_dump_json(\n            email=user.email, redirect_on_success=redirect_on_success\n        )\n    elif connector == DocumentSource.CONFLUENCE:\n        if not DEV_MODE:\n            oauth_url = ConfluenceCloudOAuth.generate_oauth_url(oauth_state)\n        else:\n            oauth_url = ConfluenceCloudOAuth.generate_dev_oauth_url(oauth_state)\n        session = ConfluenceCloudOAuth.session_dump_json(\n            email=user.email, redirect_on_success=redirect_on_success\n        )\n    elif connector == DocumentSource.GOOGLE_DRIVE:\n        if not DEV_MODE:\n            oauth_url = GoogleDriveOAuth.generate_oauth_url(oauth_state)\n        else:\n            oauth_url = GoogleDriveOAuth.generate_dev_oauth_url(oauth_state)\n        session = GoogleDriveOAuth.session_dump_json(\n            email=user.email, redirect_on_success=redirect_on_success\n        )\n    else:\n        oauth_url = None\n\n    if not oauth_url:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"The document source type {connector} does not have OAuth implemented\",\n        )\n\n    if not session:\n        raise HTTPException(\n            status_code=500,\n            detail=f\"The document source type {connector} failed to generate an OAuth session.\",\n        )\n\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # store important session state to retrieve when the user is redirected back\n    # 10 min is the max we want an oauth flow to be valid\n    r.set(f\"da_oauth:{oauth_uuid_str}\", session, ex=600)\n\n    return JSONResponse(content={\"url\": oauth_url})\n"
  },
  {
    "path": "backend/ee/onyx/server/oauth/api_router.py",
    "content": "from fastapi import APIRouter\n\nrouter: APIRouter = APIRouter(prefix=\"/oauth\")\n"
  },
  {
    "path": "backend/ee/onyx/server/oauth/confluence_cloud.py",
    "content": "import base64\nimport uuid\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import JSONResponse\nfrom pydantic import BaseModel\nfrom pydantic import ValidationError\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.oauth.api_router import router\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.utils import CONFLUENCE_OAUTH_TOKEN_URL\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.credentials import fetch_credential_by_id_for_user\nfrom onyx.db.credentials import update_credential_json\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nclass ConfluenceCloudOAuth:\n    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/\n\n    class OAuthSession(BaseModel):\n        \"\"\"Stored in redis to be looked up on callback\"\"\"\n\n        email: str\n        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds\n\n    class TokenResponse(BaseModel):\n        access_token: str\n        expires_in: int\n        token_type: str\n        refresh_token: str\n        scope: str\n\n    class AccessibleResources(BaseModel):\n        id: str\n        name: str\n        url: str\n        scopes: list[str]\n        avatarUrl: str\n\n    CLIENT_ID = OAUTH_CONFLUENCE_CLOUD_CLIENT_ID\n    CLIENT_SECRET = OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET\n    TOKEN_URL = CONFLUENCE_OAUTH_TOKEN_URL\n\n    ACCESSIBLE_RESOURCE_URL = (\n        \"https://api.atlassian.com/oauth/token/accessible-resources\"\n    )\n\n    # All read scopes per https://developer.atlassian.com/cloud/confluence/scopes-for-oauth-2-3LO-and-forge-apps/\n    CONFLUENCE_OAUTH_SCOPE = (\n        # classic scope\n        \"read:confluence-space.summary%20\"\n        \"read:confluence-props%20\"\n        \"read:confluence-content.all%20\"\n        \"read:confluence-content.summary%20\"\n        \"read:confluence-content.permission%20\"\n        \"read:confluence-user%20\"\n        \"read:confluence-groups%20\"\n        \"read:space:confluence%20\"\n        \"readonly:content.attachment:confluence%20\"\n        \"search:confluence%20\"\n        # granular scope\n        \"read:attachment:confluence%20\"  # possibly unneeded unless calling v2 attachments api\n        \"read:content-details:confluence%20\"  # for permission sync\n        \"offline_access\"\n    )\n\n    REDIRECT_URI = f\"{WEB_DOMAIN}/admin/connectors/confluence/oauth/callback\"\n    DEV_REDIRECT_URI = f\"https://redirectmeto.com/{REDIRECT_URI}\"\n\n    # eventually for Confluence Data Center\n    # oauth_url = (\n    #     f\"http://localhost:8090/rest/oauth/v2/authorize?client_id={CONFLUENCE_OAUTH_CLIENT_ID}\"\n    #     f\"&scope={CONFLUENCE_OAUTH_SCOPE_2}\"\n    #     f\"&redirect_uri={redirectme_uri}\"\n    # )\n\n    @classmethod\n    def generate_oauth_url(cls, state: str) -> str:\n        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)\n\n    @classmethod\n    def generate_dev_oauth_url(cls, state: str) -> str:\n        \"\"\"dev mode workaround for localhost testing\n        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https\n        \"\"\"\n        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)\n\n    @classmethod\n    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:\n        # https://developer.atlassian.com/cloud/jira/platform/oauth-2-3lo-apps/#1--direct-the-user-to-the-authorization-url-to-get-an-authorization-code\n\n        url = (\n            \"https://auth.atlassian.com/authorize\"\n            f\"?audience=api.atlassian.com\"\n            f\"&client_id={cls.CLIENT_ID}\"\n            f\"&scope={cls.CONFLUENCE_OAUTH_SCOPE}\"\n            f\"&redirect_uri={redirect_uri}\"\n            f\"&state={state}\"\n            \"&response_type=code\"\n            \"&prompt=consent\"\n        )\n        return url\n\n    @classmethod\n    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:\n        \"\"\"Temporary state to store in redis. to be looked up on auth response.\n        Returns a json string.\n        \"\"\"\n        session = ConfluenceCloudOAuth.OAuthSession(\n            email=email, redirect_on_success=redirect_on_success\n        )\n        return session.model_dump_json()\n\n    @classmethod\n    def parse_session(cls, session_json: str) -> OAuthSession:\n        session = ConfluenceCloudOAuth.OAuthSession.model_validate_json(session_json)\n        return session\n\n    @classmethod\n    def generate_finalize_url(cls, credential_id: int) -> str:\n        return f\"{WEB_DOMAIN}/admin/connectors/confluence/oauth/finalize?credential={credential_id}\"\n\n\n@router.post(\"/connector/confluence/callback\")\ndef confluence_oauth_callback(\n    code: str,\n    state: str,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str | None = Depends(get_current_tenant_id),\n) -> JSONResponse:\n    \"\"\"Handles the backend logic for the frontend page that the user is redirected to\n    after visiting the oauth authorization url.\"\"\"\n\n    if not ConfluenceCloudOAuth.CLIENT_ID or not ConfluenceCloudOAuth.CLIENT_SECRET:\n        raise HTTPException(\n            status_code=500,\n            detail=\"Confluence Cloud client ID or client secret is not configured.\",\n        )\n\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # recover the state\n    padded_state = state + \"=\" * (\n        -len(state) % 4\n    )  # Add padding back (Base64 decoding requires padding)\n    uuid_bytes = base64.urlsafe_b64decode(\n        padded_state\n    )  # Decode the Base64 string back to bytes\n\n    # Convert bytes back to a UUID\n    oauth_uuid = uuid.UUID(bytes=uuid_bytes)\n    oauth_uuid_str = str(oauth_uuid)\n\n    r_key = f\"da_oauth:{oauth_uuid_str}\"\n\n    session_json_bytes = cast(bytes, r.get(r_key))\n    if not session_json_bytes:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Confluence Cloud OAuth failed - OAuth state key not found: key={r_key}\",\n        )\n\n    session_json = session_json_bytes.decode(\"utf-8\")\n    try:\n        session = ConfluenceCloudOAuth.parse_session(session_json)\n\n        if not DEV_MODE:\n            redirect_uri = ConfluenceCloudOAuth.REDIRECT_URI\n        else:\n            redirect_uri = ConfluenceCloudOAuth.DEV_REDIRECT_URI\n\n        # Exchange the authorization code for an access token\n        response = requests.post(\n            ConfluenceCloudOAuth.TOKEN_URL,\n            headers={\"Content-Type\": \"application/x-www-form-urlencoded\"},\n            data={\n                \"client_id\": ConfluenceCloudOAuth.CLIENT_ID,\n                \"client_secret\": ConfluenceCloudOAuth.CLIENT_SECRET,\n                \"code\": code,\n                \"redirect_uri\": redirect_uri,\n                \"grant_type\": \"authorization_code\",\n            },\n        )\n\n        token_response: ConfluenceCloudOAuth.TokenResponse | None = None\n\n        try:\n            token_response = ConfluenceCloudOAuth.TokenResponse.model_validate_json(\n                response.text\n            )\n        except Exception:\n            raise RuntimeError(\n                \"Confluence Cloud OAuth failed during code/token exchange.\"\n            )\n\n        now = datetime.now(timezone.utc)\n        expires_at = now + timedelta(seconds=token_response.expires_in)\n\n        credential_info = CredentialBase(\n            credential_json={\n                \"confluence_access_token\": token_response.access_token,\n                \"confluence_refresh_token\": token_response.refresh_token,\n                \"created_at\": now.isoformat(),\n                \"expires_at\": expires_at.isoformat(),\n                \"expires_in\": token_response.expires_in,\n                \"scope\": token_response.scope,\n            },\n            admin_public=True,\n            source=DocumentSource.CONFLUENCE,\n            name=\"Confluence Cloud OAuth\",\n        )\n\n        credential = create_credential(credential_info, user, db_session)\n    except Exception as e:\n        return JSONResponse(\n            status_code=500,\n            content={\n                \"success\": False,\n                \"message\": f\"An error occurred during Confluence Cloud OAuth: {str(e)}\",\n            },\n        )\n    finally:\n        r.delete(r_key)\n\n    # return the result\n    return JSONResponse(\n        content={\n            \"success\": True,\n            \"message\": \"Confluence Cloud OAuth completed successfully.\",\n            \"finalize_url\": ConfluenceCloudOAuth.generate_finalize_url(credential.id),\n            \"redirect_on_success\": session.redirect_on_success,\n        }\n    )\n\n\n@router.get(\"/connector/confluence/accessible-resources\")\ndef confluence_oauth_accessible_resources(\n    credential_id: int,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001\n) -> JSONResponse:\n    \"\"\"Atlassian's API is weird and does not supply us with enough info to be in a\n    usable state after authorizing.  All API's require a cloud id. We have to list\n    the accessible resources/sites and let the user choose which site to use.\"\"\"\n\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if not credential:\n        raise HTTPException(400, f\"Credential {credential_id} not found.\")\n\n    credential_dict = (\n        credential.credential_json.get_value(apply_mask=False)\n        if credential.credential_json\n        else {}\n    )\n    access_token = credential_dict[\"confluence_access_token\"]\n\n    try:\n        # Exchange the authorization code for an access token\n        response = requests.get(\n            ConfluenceCloudOAuth.ACCESSIBLE_RESOURCE_URL,\n            headers={\n                \"Authorization\": f\"Bearer {access_token}\",\n                \"Accept\": \"application/json\",\n            },\n        )\n\n        response.raise_for_status()\n        accessible_resources_data = response.json()\n\n        # Validate the list of AccessibleResources\n        try:\n            accessible_resources = [\n                ConfluenceCloudOAuth.AccessibleResources(**resource)\n                for resource in accessible_resources_data\n            ]\n        except ValidationError as e:\n            raise RuntimeError(f\"Failed to parse accessible resources: {e}\")\n    except Exception as e:\n        return JSONResponse(\n            status_code=500,\n            content={\n                \"success\": False,\n                \"message\": f\"An error occurred retrieving Confluence Cloud accessible resources: {str(e)}\",\n            },\n        )\n\n    # return the result\n    return JSONResponse(\n        content={\n            \"success\": True,\n            \"message\": \"Confluence Cloud get accessible resources completed successfully.\",\n            \"accessible_resources\": [\n                resource.model_dump() for resource in accessible_resources\n            ],\n        }\n    )\n\n\n@router.post(\"/connector/confluence/finalize\")\ndef confluence_oauth_finalize(\n    credential_id: int,\n    cloud_id: str,\n    cloud_name: str,\n    cloud_url: str,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001\n) -> JSONResponse:\n    \"\"\"Saves the info for the selected cloud site to the credential.\n    This is the final step in the confluence oauth flow where after the traditional\n    OAuth process, the user has to select a site to associate with the credentials.\n    After this, the credential is usable.\"\"\"\n\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if not credential:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Confluence Cloud OAuth failed - credential {credential_id} not found.\",\n        )\n\n    existing_credential_json = (\n        credential.credential_json.get_value(apply_mask=False)\n        if credential.credential_json\n        else {}\n    )\n    new_credential_json: dict[str, Any] = dict(existing_credential_json)\n    new_credential_json[\"cloud_id\"] = cloud_id\n    new_credential_json[\"cloud_name\"] = cloud_name\n    new_credential_json[\"wiki_base\"] = cloud_url\n\n    try:\n        update_credential_json(credential_id, new_credential_json, user, db_session)\n    except Exception as e:\n        return JSONResponse(\n            status_code=500,\n            content={\n                \"success\": False,\n                \"message\": f\"An error occurred during Confluence Cloud OAuth: {str(e)}\",\n            },\n        )\n\n    # return the result\n    return JSONResponse(\n        content={\n            \"success\": True,\n            \"message\": \"Confluence Cloud OAuth finalized successfully.\",\n            \"redirect_url\": f\"{WEB_DOMAIN}/admin/connectors/confluence\",\n        }\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/oauth/google_drive.py",
    "content": "import base64\nimport json\nimport uuid\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import JSONResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.oauth.api_router import router\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.google_utils.google_auth import get_google_oauth_creds\nfrom onyx.connectors.google_utils.google_auth import sanitize_oauth_credentials\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_TOKEN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import CredentialBase\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nclass GoogleDriveOAuth:\n    # https://developers.google.com/identity/protocols/oauth2\n    # https://developers.google.com/identity/protocols/oauth2/web-server\n\n    class OAuthSession(BaseModel):\n        \"\"\"Stored in redis to be looked up on callback\"\"\"\n\n        email: str\n        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds\n\n    CLIENT_ID = OAUTH_GOOGLE_DRIVE_CLIENT_ID\n    CLIENT_SECRET = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\n\n    TOKEN_URL = \"https://oauth2.googleapis.com/token\"\n\n    # SCOPE is per https://docs.danswer.dev/connectors/google-drive\n    # TODO: Merge with or use google_utils.GOOGLE_SCOPES\n    SCOPE = (\n        \"https://www.googleapis.com/auth/drive.readonly%20\"\n        \"https://www.googleapis.com/auth/drive.metadata.readonly%20\"\n        \"https://www.googleapis.com/auth/admin.directory.user.readonly%20\"\n        \"https://www.googleapis.com/auth/admin.directory.group.readonly\"\n    )\n\n    REDIRECT_URI = f\"{WEB_DOMAIN}/admin/connectors/google-drive/oauth/callback\"\n    DEV_REDIRECT_URI = f\"https://redirectmeto.com/{REDIRECT_URI}\"\n\n    @classmethod\n    def generate_oauth_url(cls, state: str) -> str:\n        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)\n\n    @classmethod\n    def generate_dev_oauth_url(cls, state: str) -> str:\n        \"\"\"dev mode workaround for localhost testing\n        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https\n        \"\"\"\n\n        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)\n\n    @classmethod\n    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:\n        # without prompt=consent, a refresh token is only issued the first time the user approves\n        url = (\n            f\"https://accounts.google.com/o/oauth2/v2/auth\"\n            f\"?client_id={cls.CLIENT_ID}\"\n            f\"&redirect_uri={redirect_uri}\"\n            \"&response_type=code\"\n            f\"&scope={cls.SCOPE}\"\n            \"&access_type=offline\"\n            f\"&state={state}\"\n            \"&prompt=consent\"\n        )\n        return url\n\n    @classmethod\n    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:\n        \"\"\"Temporary state to store in redis. to be looked up on auth response.\n        Returns a json string.\n        \"\"\"\n        session = GoogleDriveOAuth.OAuthSession(\n            email=email, redirect_on_success=redirect_on_success\n        )\n        return session.model_dump_json()\n\n    @classmethod\n    def parse_session(cls, session_json: str) -> OAuthSession:\n        session = GoogleDriveOAuth.OAuthSession.model_validate_json(session_json)\n        return session\n\n\n@router.post(\"/connector/google-drive/callback\")\ndef handle_google_drive_oauth_callback(\n    code: str,\n    state: str,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str | None = Depends(get_current_tenant_id),\n) -> JSONResponse:\n    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:\n        raise HTTPException(\n            status_code=500,\n            detail=\"Google Drive client ID or client secret is not configured.\",\n        )\n\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # recover the state\n    padded_state = state + \"=\" * (\n        -len(state) % 4\n    )  # Add padding back (Base64 decoding requires padding)\n    uuid_bytes = base64.urlsafe_b64decode(\n        padded_state\n    )  # Decode the Base64 string back to bytes\n\n    # Convert bytes back to a UUID\n    oauth_uuid = uuid.UUID(bytes=uuid_bytes)\n    oauth_uuid_str = str(oauth_uuid)\n\n    r_key = f\"da_oauth:{oauth_uuid_str}\"\n\n    session_json_bytes = cast(bytes, r.get(r_key))\n    if not session_json_bytes:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Google Drive OAuth failed - OAuth state key not found: key={r_key}\",\n        )\n\n    session_json = session_json_bytes.decode(\"utf-8\")\n    try:\n        session = GoogleDriveOAuth.parse_session(session_json)\n\n        if not DEV_MODE:\n            redirect_uri = GoogleDriveOAuth.REDIRECT_URI\n        else:\n            redirect_uri = GoogleDriveOAuth.DEV_REDIRECT_URI\n\n        # Exchange the authorization code for an access token\n        response = requests.post(\n            GoogleDriveOAuth.TOKEN_URL,\n            headers={\"Content-Type\": \"application/x-www-form-urlencoded\"},\n            data={\n                \"client_id\": GoogleDriveOAuth.CLIENT_ID,\n                \"client_secret\": GoogleDriveOAuth.CLIENT_SECRET,\n                \"code\": code,\n                \"redirect_uri\": redirect_uri,\n                \"grant_type\": \"authorization_code\",\n            },\n        )\n\n        response.raise_for_status()\n\n        authorization_response: dict[str, Any] = response.json()\n\n        # the connector wants us to store the json in its authorized_user_info format\n        # returned from OAuthCredentials.get_authorized_user_info().\n        # So refresh immediately via get_google_oauth_creds with the params filled in\n        # from fields in authorization_response to get the json we need\n        authorized_user_info = {}\n        authorized_user_info[\"client_id\"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID\n        authorized_user_info[\"client_secret\"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\n        authorized_user_info[\"refresh_token\"] = authorization_response[\"refresh_token\"]\n\n        token_json_str = json.dumps(authorized_user_info)\n        oauth_creds = get_google_oauth_creds(\n            token_json_str=token_json_str, source=DocumentSource.GOOGLE_DRIVE\n        )\n        if not oauth_creds:\n            raise RuntimeError(\"get_google_oauth_creds returned None.\")\n\n        # save off the credentials\n        oauth_creds_sanitized_json_str = sanitize_oauth_credentials(oauth_creds)\n\n        credential_dict: dict[str, str] = {}\n        credential_dict[DB_CREDENTIALS_DICT_TOKEN_KEY] = oauth_creds_sanitized_json_str\n        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = session.email\n        credential_dict[DB_CREDENTIALS_AUTHENTICATION_METHOD] = (\n            GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value\n        )\n\n        credential_info = CredentialBase(\n            credential_json=credential_dict,\n            admin_public=True,\n            source=DocumentSource.GOOGLE_DRIVE,\n            name=\"OAuth (interactive)\",\n        )\n\n        create_credential(credential_info, user, db_session)\n    except Exception as e:\n        return JSONResponse(\n            status_code=500,\n            content={\n                \"success\": False,\n                \"message\": f\"An error occurred during Google Drive OAuth: {str(e)}\",\n            },\n        )\n    finally:\n        r.delete(r_key)\n\n    # return the result\n    return JSONResponse(\n        content={\n            \"success\": True,\n            \"message\": \"Google Drive OAuth completed successfully.\",\n            \"finalize_url\": None,\n            \"redirect_on_success\": session.redirect_on_success,\n        }\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/oauth/slack.py",
    "content": "import base64\nimport uuid\nfrom typing import cast\n\nimport requests\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import JSONResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.oauth.api_router import router\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import CredentialBase\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nclass SlackOAuth:\n    # https://knock.app/blog/how-to-authenticate-users-in-slack-using-oauth\n    # Example: https://api.slack.com/authentication/oauth-v2#exchanging\n\n    class OAuthSession(BaseModel):\n        \"\"\"Stored in redis to be looked up on callback\"\"\"\n\n        email: str\n        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds\n\n    CLIENT_ID = OAUTH_SLACK_CLIENT_ID\n    CLIENT_SECRET = OAUTH_SLACK_CLIENT_SECRET\n\n    TOKEN_URL = \"https://slack.com/api/oauth.v2.access\"\n\n    # SCOPE is per https://docs.danswer.dev/connectors/slack\n    BOT_SCOPE = (\n        \"channels:history,\"\n        \"channels:read,\"\n        \"groups:history,\"\n        \"groups:read,\"\n        \"channels:join,\"\n        \"im:history,\"\n        \"users:read,\"\n        \"users:read.email,\"\n        \"usergroups:read\"\n    )\n\n    REDIRECT_URI = f\"{WEB_DOMAIN}/admin/connectors/slack/oauth/callback\"\n    DEV_REDIRECT_URI = f\"https://redirectmeto.com/{REDIRECT_URI}\"\n\n    @classmethod\n    def generate_oauth_url(cls, state: str) -> str:\n        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)\n\n    @classmethod\n    def generate_dev_oauth_url(cls, state: str) -> str:\n        \"\"\"dev mode workaround for localhost testing\n        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https\n        \"\"\"\n\n        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)\n\n    @classmethod\n    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:\n        url = (\n            f\"https://slack.com/oauth/v2/authorize\"\n            f\"?client_id={cls.CLIENT_ID}\"\n            f\"&redirect_uri={redirect_uri}\"\n            f\"&scope={cls.BOT_SCOPE}\"\n            f\"&state={state}\"\n        )\n        return url\n\n    @classmethod\n    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:\n        \"\"\"Temporary state to store in redis. to be looked up on auth response.\n        Returns a json string.\n        \"\"\"\n        session = SlackOAuth.OAuthSession(\n            email=email, redirect_on_success=redirect_on_success\n        )\n        return session.model_dump_json()\n\n    @classmethod\n    def parse_session(cls, session_json: str) -> OAuthSession:\n        session = SlackOAuth.OAuthSession.model_validate_json(session_json)\n        return session\n\n\n@router.post(\"/connector/slack/callback\")\ndef handle_slack_oauth_callback(\n    code: str,\n    state: str,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str | None = Depends(get_current_tenant_id),\n) -> JSONResponse:\n    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:\n        raise HTTPException(\n            status_code=500,\n            detail=\"Slack client ID or client secret is not configured.\",\n        )\n\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # recover the state\n    padded_state = state + \"=\" * (\n        -len(state) % 4\n    )  # Add padding back (Base64 decoding requires padding)\n    uuid_bytes = base64.urlsafe_b64decode(\n        padded_state\n    )  # Decode the Base64 string back to bytes\n\n    # Convert bytes back to a UUID\n    oauth_uuid = uuid.UUID(bytes=uuid_bytes)\n    oauth_uuid_str = str(oauth_uuid)\n\n    r_key = f\"da_oauth:{oauth_uuid_str}\"\n\n    session_json_bytes = cast(bytes, r.get(r_key))\n    if not session_json_bytes:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Slack OAuth failed - OAuth state key not found: key={r_key}\",\n        )\n\n    session_json = session_json_bytes.decode(\"utf-8\")\n    try:\n        session = SlackOAuth.parse_session(session_json)\n\n        if not DEV_MODE:\n            redirect_uri = SlackOAuth.REDIRECT_URI\n        else:\n            redirect_uri = SlackOAuth.DEV_REDIRECT_URI\n\n        # Exchange the authorization code for an access token\n        response = requests.post(\n            SlackOAuth.TOKEN_URL,\n            headers={\"Content-Type\": \"application/x-www-form-urlencoded\"},\n            data={\n                \"client_id\": SlackOAuth.CLIENT_ID,\n                \"client_secret\": SlackOAuth.CLIENT_SECRET,\n                \"code\": code,\n                \"redirect_uri\": redirect_uri,\n            },\n        )\n\n        response_data = response.json()\n\n        if not response_data.get(\"ok\"):\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Slack OAuth failed: {response_data.get('error')}\",\n            )\n\n        # Extract token and team information\n        access_token: str = response_data.get(\"access_token\")\n        team_id: str = response_data.get(\"team\", {}).get(\"id\")\n        authed_user_id: str = response_data.get(\"authed_user\", {}).get(\"id\")\n\n        credential_info = CredentialBase(\n            credential_json={\"slack_bot_token\": access_token},\n            admin_public=True,\n            source=DocumentSource.SLACK,\n            name=\"Slack OAuth\",\n        )\n\n        create_credential(credential_info, user, db_session)\n    except Exception as e:\n        return JSONResponse(\n            status_code=500,\n            content={\n                \"success\": False,\n                \"message\": f\"An error occurred during Slack OAuth: {str(e)}\",\n            },\n        )\n    finally:\n        r.delete(r_key)\n\n    # return the result\n    return JSONResponse(\n        content={\n            \"success\": True,\n            \"message\": \"Slack OAuth completed successfully.\",\n            \"finalize_url\": None,\n            \"redirect_on_success\": session.redirect_on_success,\n            \"team_id\": team_id,\n            \"authed_user_id\": authed_user_id,\n        }\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/models.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.server.manage.models import StandardAnswer\n\n\nclass StandardAnswerRequest(BaseModel):\n    message: str\n    slack_bot_categories: list[str]\n\n\nclass StandardAnswerResponse(BaseModel):\n    standard_answers: list[StandardAnswer] = Field(default_factory=list)\n\n\nclass SearchFlowClassificationRequest(BaseModel):\n    user_query: str\n\n\nclass SearchFlowClassificationResponse(BaseModel):\n    is_search_flow: bool\n\n\n# NOTE: This model is used for the core flow of the Onyx application, any\n# changes to it should be reviewed and approved by an experienced team member.\n# It is very important to 1. avoid bloat and 2. that this remains backwards\n# compatible across versions.\nclass SendSearchQueryRequest(BaseModel):\n    search_query: str\n    filters: BaseFilters | None = None\n    num_docs_fed_to_llm_selection: int | None = None\n    run_query_expansion: bool = False\n    num_hits: int = 30\n    hybrid_alpha: float | None = None\n    include_content: bool = False\n    stream: bool = False\n\n\nclass SearchDocWithContent(SearchDoc):\n    # Allows None because this is determined by a flag but the object used in code\n    # of the search path uses this type\n    content: str | None\n\n    @classmethod\n    def from_inference_sections(\n        cls,\n        sections: Sequence[InferenceSection],\n        include_content: bool = False,\n        is_internet: bool = False,\n    ) -> list[\"SearchDocWithContent\"]:\n        \"\"\"Convert InferenceSections to SearchDocWithContent objects.\n\n        Args:\n            sections: Sequence of InferenceSection objects\n            include_content: If True, populate content field with combined_content\n            is_internet: Whether these are internet search results\n\n        Returns:\n            List of SearchDocWithContent with optional content\n        \"\"\"\n        if not sections:\n            return []\n\n        return [\n            cls(\n                document_id=(chunk := section.center_chunk).document_id,\n                chunk_ind=chunk.chunk_id,\n                semantic_identifier=chunk.semantic_identifier or \"Unknown\",\n                link=chunk.source_links[0] if chunk.source_links else None,\n                blurb=chunk.blurb,\n                source_type=chunk.source_type,\n                boost=chunk.boost,\n                hidden=chunk.hidden,\n                metadata=chunk.metadata,\n                score=chunk.score,\n                match_highlights=chunk.match_highlights,\n                updated_at=chunk.updated_at,\n                primary_owners=chunk.primary_owners,\n                secondary_owners=chunk.secondary_owners,\n                is_internet=is_internet,\n                content=section.combined_content if include_content else None,\n            )\n            for section in sections\n        ]\n\n\nclass SearchFullResponse(BaseModel):\n    all_executed_queries: list[str]\n    search_docs: list[SearchDocWithContent]\n    # Reasoning tokens output by the LLM for the document selection\n    doc_selection_reasoning: str | None = None\n    # This a list of document ids that are in the search_docs list\n    llm_selected_doc_ids: list[str] | None = None\n    # Error message if the search failed partway through\n    error: str | None = None\n\n\nclass SearchQueryResponse(BaseModel):\n    query: str\n    query_expansions: list[str] | None\n    created_at: datetime\n\n\nclass SearchHistoryResponse(BaseModel):\n    search_queries: list[SearchQueryResponse]\n"
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/query_backend.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.onyxbot.slack.handlers.handle_standard_answers import (\n    oneoff_standard_answers,\n)\nfrom ee.onyx.server.query_and_chat.models import StandardAnswerRequest\nfrom ee.onyx.server.query_and_chat.models import StandardAnswerResponse\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nbasic_router = APIRouter(prefix=\"/query\")\n\n\n@basic_router.get(\"/standard-answer\")\ndef get_standard_answer(\n    request: StandardAnswerRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> StandardAnswerResponse:\n    try:\n        standard_answers = oneoff_standard_answers(\n            message=request.message,\n            slack_bot_categories=request.slack_bot_categories,\n            db_session=db_session,\n        )\n        return StandardAnswerResponse(standard_answers=standard_answers)\n    except Exception as e:\n        logger.error(f\"Error in get_standard_answer: {str(e)}\", exc_info=True)\n        raise HTTPException(status_code=500, detail=\"An internal server error occurred\")\n"
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/search_backend.py",
    "content": "from collections.abc import Generator\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import StreamingResponse\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.search import fetch_search_queries_for_user\nfrom ee.onyx.search.process_search_query import gather_search_stream\nfrom ee.onyx.search.process_search_query import stream_search_query\nfrom ee.onyx.secondary_llm_flows.search_flow_classification import (\n    classify_is_search_flow,\n)\nfrom ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest\nfrom ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse\nfrom ee.onyx.server.query_and_chat.models import SearchFullResponse\nfrom ee.onyx.server.query_and_chat.models import SearchHistoryResponse\nfrom ee.onyx.server.query_and_chat.models import SearchQueryResponse\nfrom ee.onyx.server.query_and_chat.models import SendSearchQueryRequest\nfrom ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.server.usage_limits import check_llm_cost_limit_for_provider\nfrom onyx.server.utils import get_json_line\nfrom onyx.server.utils_vector_db import require_vector_db\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/search\")\n\n\n@router.post(\"/search-flow-classification\")\ndef search_flow_classification(\n    request: SearchFlowClassificationRequest,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SearchFlowClassificationResponse:\n    query = request.user_query\n    # This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document\n    # Most likely something needs to be done with the text included so we'll just classify it as a chat flow\n    if len(query) > 200:\n        return SearchFlowClassificationResponse(is_search_flow=False)\n\n    llm = get_default_llm()\n\n    check_llm_cost_limit_for_provider(\n        db_session=db_session,\n        tenant_id=get_current_tenant_id(),\n        llm_provider_api_key=llm.config.api_key,\n    )\n\n    try:\n        is_search_flow = classify_is_search_flow(query=query, llm=llm)\n    except Exception as e:\n        logger.exception(\n            \"Search flow classification failed; defaulting to chat flow\",\n            exc_info=e,\n        )\n        is_search_flow = False\n\n    return SearchFlowClassificationResponse(is_search_flow=is_search_flow)\n\n\n# NOTE: This endpoint is used for the core flow of the Onyx application, any\n# changes to it should be reviewed and approved by an experienced team member.\n# It is very important to 1. avoid bloat and 2. that this remains backwards\n# compatible across versions.\n@router.post(\n    \"/send-search-message\",\n    response_model=None,\n    dependencies=[Depends(require_vector_db)],\n)\ndef handle_send_search_message(\n    request: SendSearchQueryRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StreamingResponse | SearchFullResponse:\n    \"\"\"\n    Executes a search query with optional streaming.\n\n    If hybrid_alpha is unset and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH\n    is True, executes pure keyword search.\n\n    Returns:\n        StreamingResponse with SSE if stream=True, otherwise SearchFullResponse.\n    \"\"\"\n    logger.debug(f\"Received search query: {request.search_query}\")\n\n    if request.hybrid_alpha is None and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH:\n        request.hybrid_alpha = 0.0\n\n    # Non-streaming path\n    if not request.stream:\n        try:\n            packets = stream_search_query(request, user, db_session)\n            return gather_search_stream(packets)\n        except NotImplementedError as e:\n            return SearchFullResponse(\n                all_executed_queries=[],\n                search_docs=[],\n                error=str(e),\n            )\n\n    # Streaming path\n    def stream_generator() -> Generator[str, None, None]:\n        try:\n            with get_session_with_current_tenant() as streaming_db_session:\n                for packet in stream_search_query(request, user, streaming_db_session):\n                    yield get_json_line(packet.model_dump())\n        except NotImplementedError as e:\n            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())\n        except HTTPException:\n            raise\n        except Exception as e:\n            logger.exception(\"Error in search streaming\")\n            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())\n\n    return StreamingResponse(stream_generator(), media_type=\"text/event-stream\")\n\n\n@router.get(\"/search-history\")\ndef get_search_history(\n    limit: int = 100,\n    filter_days: int | None = None,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SearchHistoryResponse:\n    \"\"\"\n    Fetch past search queries for the authenticated user.\n\n    Args:\n        limit: Maximum number of queries to return (default 100)\n        filter_days: Only return queries from the last N days (optional)\n\n    Returns:\n        SearchHistoryResponse with list of search queries, ordered by most recent first.\n    \"\"\"\n    # Validate limit\n    if limit <= 0:\n        raise HTTPException(\n            status_code=400,\n            detail=\"limit must be greater than 0\",\n        )\n    if limit > 1000:\n        raise HTTPException(\n            status_code=400,\n            detail=\"limit must be at most 1000\",\n        )\n\n    # Validate filter_days\n    if filter_days is not None and filter_days <= 0:\n        raise HTTPException(\n            status_code=400,\n            detail=\"filter_days must be greater than 0\",\n        )\n\n    search_queries = fetch_search_queries_for_user(\n        db_session=db_session,\n        user_id=user.id,\n        filter_days=filter_days,\n        limit=limit,\n    )\n\n    return SearchHistoryResponse(\n        search_queries=[\n            SearchQueryResponse(\n                query=sq.query,\n                query_expansions=sq.query_expansions,\n                created_at=sq.created_at,\n            )\n            for sq in search_queries\n        ]\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/streaming_models.py",
    "content": "from typing import Literal\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\n\nfrom ee.onyx.server.query_and_chat.models import SearchDocWithContent\n\n\nclass SearchQueriesPacket(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    type: Literal[\"search_queries\"] = \"search_queries\"\n    all_executed_queries: list[str]\n\n\nclass SearchDocsPacket(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    type: Literal[\"search_docs\"] = \"search_docs\"\n    search_docs: list[SearchDocWithContent]\n\n\nclass SearchErrorPacket(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    type: Literal[\"search_error\"] = \"search_error\"\n    error: str\n\n\nclass LLMSelectedDocsPacket(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    type: Literal[\"llm_selected_docs\"] = \"llm_selected_docs\"\n    # None if LLM selection failed, empty list if no docs selected, list of IDs otherwise\n    llm_selected_doc_ids: list[str] | None\n"
  },
  {
    "path": "backend/ee/onyx/server/query_and_chat/token_limit.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Sequence\nfrom datetime import datetime\nfrom itertools import groupby\nfrom typing import Dict\nfrom typing import List\nfrom typing import Tuple\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.api_key import is_api_key_email_address\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import TokenRateLimit\nfrom onyx.db.models import TokenRateLimit__UserGroup\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.token_limit import fetch_all_user_token_rate_limits\nfrom onyx.server.query_and_chat.token_limit import _get_cutoff_time\nfrom onyx.server.query_and_chat.token_limit import _is_rate_limited\nfrom onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_global\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\n\ndef _check_token_rate_limits(user: User) -> None:\n    # Anonymous users are only rate limited by global settings\n    if user.is_anonymous:\n        _user_is_rate_limited_by_global()\n\n    elif is_api_key_email_address(user.email):\n        # API keys are only rate limited by global settings\n        _user_is_rate_limited_by_global()\n\n    else:\n        run_functions_tuples_in_parallel(\n            [\n                (_user_is_rate_limited, (user.id,)),\n                (_user_is_rate_limited_by_group, (user.id,)),\n                (_user_is_rate_limited_by_global, ()),\n            ]\n        )\n\n\n\"\"\"\nUser rate limits\n\"\"\"\n\n\ndef _user_is_rate_limited(user_id: UUID) -> None:\n    with get_session_with_current_tenant() as db_session:\n        user_rate_limits = fetch_all_user_token_rate_limits(\n            db_session=db_session, enabled_only=True, ordered=False\n        )\n\n        if user_rate_limits:\n            user_cutoff_time = _get_cutoff_time(user_rate_limits)\n            user_usage = _fetch_user_usage(user_id, user_cutoff_time, db_session)\n\n            if _is_rate_limited(user_rate_limits, user_usage):\n                raise HTTPException(\n                    status_code=429,\n                    detail=\"Token budget exceeded for user. Try again later.\",\n                )\n\n\ndef _fetch_user_usage(\n    user_id: UUID, cutoff_time: datetime, db_session: Session\n) -> Sequence[tuple[datetime, int]]:\n    \"\"\"\n    Fetch user usage within the cutoff time, grouped by minute\n    \"\"\"\n    result = db_session.execute(\n        select(\n            func.date_trunc(\"minute\", ChatMessage.time_sent),\n            func.sum(ChatMessage.token_count),\n        )\n        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)\n        .where(ChatSession.user_id == user_id, ChatMessage.time_sent >= cutoff_time)\n        .group_by(func.date_trunc(\"minute\", ChatMessage.time_sent))\n    ).all()\n\n    return [(row[0], row[1]) for row in result]\n\n\n\"\"\"\nUser Group rate limits\n\"\"\"\n\n\ndef _user_is_rate_limited_by_group(user_id: UUID) -> None:\n    with get_session_with_current_tenant() as db_session:\n        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)\n\n        if group_rate_limits:\n            # Group cutoff time is the same for all groups.\n            # This could be optimized to only fetch the maximum cutoff time for\n            # a specific group, but seems unnecessary for now.\n            group_cutoff_time = _get_cutoff_time(\n                [e for sublist in group_rate_limits.values() for e in sublist]\n            )\n\n            user_group_ids = list(group_rate_limits.keys())\n            group_usage = _fetch_user_group_usage(\n                user_group_ids, group_cutoff_time, db_session\n            )\n\n            has_at_least_one_untriggered_limit = False\n            for user_group_id, rate_limits in group_rate_limits.items():\n                usage = group_usage.get(user_group_id, [])\n\n                if not _is_rate_limited(rate_limits, usage):\n                    has_at_least_one_untriggered_limit = True\n                    break\n\n            if not has_at_least_one_untriggered_limit:\n                raise HTTPException(\n                    status_code=429,\n                    detail=\"Token budget exceeded for user's groups. Try again later.\",\n                )\n\n\ndef _fetch_all_user_group_rate_limits(\n    user_id: UUID, db_session: Session\n) -> Dict[int, List[TokenRateLimit]]:\n    group_limits = (\n        select(TokenRateLimit, User__UserGroup.user_group_id)\n        .join(\n            TokenRateLimit__UserGroup,\n            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,\n        )\n        .join(\n            UserGroup,\n            UserGroup.id == TokenRateLimit__UserGroup.user_group_id,\n        )\n        .join(\n            User__UserGroup,\n            User__UserGroup.user_group_id == UserGroup.id,\n        )\n        .where(\n            User__UserGroup.user_id == user_id,\n            TokenRateLimit.enabled.is_(True),\n        )\n    )\n\n    raw_rate_limits = db_session.execute(group_limits).all()\n\n    group_rate_limits = defaultdict(list)\n    for rate_limit, user_group_id in raw_rate_limits:\n        group_rate_limits[user_group_id].append(rate_limit)\n\n    return group_rate_limits\n\n\ndef _fetch_user_group_usage(\n    user_group_ids: list[int], cutoff_time: datetime, db_session: Session\n) -> dict[int, list[Tuple[datetime, int]]]:\n    \"\"\"\n    Fetch user group usage within the cutoff time, grouped by minute\n    \"\"\"\n    user_group_usage = db_session.execute(\n        select(\n            func.sum(ChatMessage.token_count),\n            func.date_trunc(\"minute\", ChatMessage.time_sent),\n            UserGroup.id,\n        )\n        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)\n        .join(User__UserGroup, User__UserGroup.user_id == ChatSession.user_id)\n        .join(UserGroup, UserGroup.id == User__UserGroup.user_group_id)\n        .filter(UserGroup.id.in_(user_group_ids), ChatMessage.time_sent >= cutoff_time)\n        .group_by(func.date_trunc(\"minute\", ChatMessage.time_sent), UserGroup.id)\n    ).all()\n\n    return {\n        user_group_id: [(usage, time_sent) for time_sent, usage, _ in group_usage]\n        for user_group_id, group_usage in groupby(\n            user_group_usage, key=lambda row: row[2]\n        )\n    }\n"
  },
  {
    "path": "backend/ee/onyx/server/query_history/api.py",
    "content": "import uuid\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom http import HTTPStatus\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi.responses import StreamingResponse\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.background.task_name_builders import query_history_task_name\nfrom ee.onyx.db.query_history import get_all_query_history_export_tasks\nfrom ee.onyx.db.query_history import get_page_of_chat_sessions\nfrom ee.onyx.db.query_history import get_total_filtered_chat_sessions_count\nfrom ee.onyx.server.query_history.models import ChatSessionMinimal\nfrom ee.onyx.server.query_history.models import ChatSessionSnapshot\nfrom ee.onyx.server.query_history.models import MessageSnapshot\nfrom ee.onyx.server.query_history.models import QueryHistoryExport\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import get_display_email\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.background.task_utils import construct_query_history_report_name\nfrom onyx.chat.chat_utils import create_chat_history_chain\nfrom onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import FileType\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.configs.constants import QueryHistoryType\nfrom onyx.configs.constants import SessionType\nfrom onyx.db.chat import get_chat_session_by_id\nfrom onyx.db.chat import get_chat_sessions_by_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import TaskStatus\nfrom onyx.db.file_record import get_query_history_export_files\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import User\nfrom onyx.db.tasks import get_task_with_id\nfrom onyx.db.tasks import register_task\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.server.documents.models import PaginatedReturn\nfrom onyx.server.query_and_chat.models import ChatSessionDetails\nfrom onyx.server.query_and_chat.models import ChatSessionsResponse\nfrom onyx.utils.threadpool_concurrency import parallel_yield\nfrom shared_configs.contextvars import get_current_tenant_id\n\nrouter = APIRouter()\n\nONYX_ANONYMIZED_EMAIL = \"anonymous@anonymous.invalid\"\n\n\ndef ensure_query_history_is_enabled(\n    disallowed: list[QueryHistoryType],\n) -> None:\n    if ONYX_QUERY_HISTORY_TYPE in disallowed:\n        raise HTTPException(\n            status_code=HTTPStatus.FORBIDDEN,\n            detail=\"Query history has been disabled by the administrator.\",\n        )\n\n\ndef yield_snapshot_from_chat_session(\n    chat_session: ChatSession,\n    db_session: Session,\n) -> Generator[ChatSessionSnapshot | None]:\n    yield snapshot_from_chat_session(chat_session=chat_session, db_session=db_session)\n\n\ndef fetch_and_process_chat_session_history(\n    db_session: Session,\n    start: datetime,\n    end: datetime,\n    limit: int | None = 500,  # noqa: ARG001\n) -> Generator[ChatSessionSnapshot]:\n    PAGE_SIZE = 100\n\n    page = 0\n    while True:\n        paged_chat_sessions = get_page_of_chat_sessions(\n            start_time=start,\n            end_time=end,\n            db_session=db_session,\n            page_num=page,\n            page_size=PAGE_SIZE,\n        )\n\n        if not paged_chat_sessions:\n            break\n\n        paged_snapshots = parallel_yield(\n            [\n                yield_snapshot_from_chat_session(\n                    db_session=db_session,\n                    chat_session=chat_session,\n                )\n                for chat_session in paged_chat_sessions\n            ]\n        )\n\n        for snapshot in paged_snapshots:\n            if snapshot:\n                yield snapshot\n\n        # If we've fetched *less* than a `PAGE_SIZE` worth\n        # of data, we have reached the end of the\n        # pagination sequence; break.\n        if len(paged_chat_sessions) < PAGE_SIZE:\n            break\n\n        page += 1\n\n\ndef snapshot_from_chat_session(\n    chat_session: ChatSession,\n    db_session: Session,\n) -> ChatSessionSnapshot | None:\n    try:\n        # Older chats may not have the right structure\n        messages = create_chat_history_chain(\n            chat_session_id=chat_session.id, db_session=db_session\n        )\n    except RuntimeError:\n        return None\n\n    flow_type = SessionType.SLACK if chat_session.onyxbot_flow else SessionType.CHAT\n\n    return ChatSessionSnapshot(\n        id=chat_session.id,\n        user_email=get_display_email(\n            chat_session.user.email if chat_session.user else None\n        ),\n        name=chat_session.description,\n        messages=[\n            MessageSnapshot.build(message)\n            for message in messages\n            if message.message_type != MessageType.SYSTEM\n        ],\n        assistant_id=chat_session.persona_id,\n        assistant_name=chat_session.persona.name if chat_session.persona else None,\n        time_created=chat_session.time_created,\n        flow_type=flow_type,\n    )\n\n\n@router.get(\"/admin/chat-sessions\")\ndef admin_get_chat_sessions(\n    user_id: UUID,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ChatSessionsResponse:\n    # we specifically don't allow this endpoint if \"anonymized\" since\n    # this is a direct query on the user id\n    ensure_query_history_is_enabled(\n        [\n            QueryHistoryType.DISABLED,\n            QueryHistoryType.ANONYMIZED,\n        ]\n    )\n\n    try:\n        chat_sessions = get_chat_sessions_by_user(\n            user_id=user_id, deleted=False, db_session=db_session, limit=0\n        )\n\n    except ValueError:\n        raise ValueError(\"Chat session does not exist or has been deleted\")\n\n    return ChatSessionsResponse(\n        sessions=[\n            ChatSessionDetails(\n                id=chat.id,\n                name=chat.description,\n                persona_id=chat.persona_id,\n                time_created=chat.time_created.isoformat(),\n                time_updated=chat.time_updated.isoformat(),\n                shared_status=chat.shared_status,\n                current_alternate_model=chat.current_alternate_model,\n            )\n            for chat in chat_sessions\n        ]\n    )\n\n\n@router.get(\"/admin/chat-session-history\")\ndef get_chat_session_history(\n    page_num: int = Query(0, ge=0),\n    page_size: int = Query(10, ge=1),\n    feedback_type: QAFeedbackType | None = None,\n    start_time: datetime | None = None,\n    end_time: datetime | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> PaginatedReturn[ChatSessionMinimal]:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n\n    page_of_chat_sessions = get_page_of_chat_sessions(\n        page_num=page_num,\n        page_size=page_size,\n        db_session=db_session,\n        start_time=start_time,\n        end_time=end_time,\n        feedback_filter=feedback_type,\n    )\n\n    total_filtered_chat_sessions_count = get_total_filtered_chat_sessions_count(\n        db_session=db_session,\n        start_time=start_time,\n        end_time=end_time,\n        feedback_filter=feedback_type,\n    )\n\n    minimal_chat_sessions: list[ChatSessionMinimal] = []\n\n    for chat_session in page_of_chat_sessions:\n        minimal_chat_session = ChatSessionMinimal.from_chat_session(chat_session)\n        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:\n            minimal_chat_session.user_email = ONYX_ANONYMIZED_EMAIL\n        minimal_chat_sessions.append(minimal_chat_session)\n\n    return PaginatedReturn(\n        items=minimal_chat_sessions,\n        total_items=total_filtered_chat_sessions_count,\n    )\n\n\n@router.get(\"/admin/chat-session-history/{chat_session_id}\")\ndef get_chat_session_admin(\n    chat_session_id: UUID,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ChatSessionSnapshot:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n\n    try:\n        chat_session = get_chat_session_by_id(\n            chat_session_id=chat_session_id,\n            user_id=None,  # view chat regardless of user\n            db_session=db_session,\n            include_deleted=True,\n        )\n    except ValueError:\n        raise HTTPException(\n            HTTPStatus.BAD_REQUEST,\n            f\"Chat session with id '{chat_session_id}' does not exist.\",\n        )\n    snapshot = snapshot_from_chat_session(\n        chat_session=chat_session, db_session=db_session\n    )\n\n    if snapshot is None:\n        raise HTTPException(\n            HTTPStatus.BAD_REQUEST,\n            f\"Could not create snapshot for chat session with id '{chat_session_id}'\",\n        )\n\n    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:\n        snapshot.user_email = ONYX_ANONYMIZED_EMAIL\n\n    return snapshot\n\n\n@router.get(\"/admin/query-history/list\")\ndef list_all_query_history_exports(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[QueryHistoryExport]:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n    try:\n        pending_tasks = [\n            QueryHistoryExport.from_task(task)\n            for task in get_all_query_history_export_tasks(db_session=db_session)\n        ]\n        generated_files = [\n            QueryHistoryExport.from_file(file)\n            for file in get_query_history_export_files(db_session=db_session)\n        ]\n        merged = pending_tasks + generated_files\n\n        # We sort based off of the start-time of the task.\n        # We also return it in reverse order since viewing generated reports in most-recent to least-recent is most common.\n        merged.sort(key=lambda task: task.start_time, reverse=True)\n\n        return merged\n    except Exception as e:\n        raise HTTPException(\n            HTTPStatus.INTERNAL_SERVER_ERROR, f\"Failed to get all tasks: {e}\"\n        )\n\n\n@router.post(\"/admin/query-history/start-export\", tags=PUBLIC_API_TAGS)\ndef start_query_history_export(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n    start: datetime | None = None,\n    end: datetime | None = None,\n) -> dict[str, str]:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n\n    start = start or datetime.fromtimestamp(0, tz=timezone.utc)\n    end = end or datetime.now(tz=timezone.utc)\n\n    if start >= end:\n        raise HTTPException(\n            HTTPStatus.BAD_REQUEST,\n            f\"Start time must come before end time, but instead got the start time coming after; {start=} {end=}\",\n        )\n\n    task_id_uuid = uuid.uuid4()\n    task_id = str(task_id_uuid)\n    start_time = datetime.now(tz=timezone.utc)\n\n    register_task(\n        db_session=db_session,\n        task_name=query_history_task_name(start=start, end=end),\n        task_id=task_id,\n        status=TaskStatus.PENDING,\n        start_time=start_time,\n    )\n\n    client_app.send_task(\n        OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK,\n        task_id=task_id,\n        priority=OnyxCeleryPriority.MEDIUM,\n        queue=OnyxCeleryQueues.CSV_GENERATION,\n        kwargs={\n            \"start\": start,\n            \"end\": end,\n            \"start_time\": start_time,\n            \"tenant_id\": get_current_tenant_id(),\n        },\n    )\n\n    return {\"request_id\": task_id}\n\n\n@router.get(\"/admin/query-history/export-status\", tags=PUBLIC_API_TAGS)\ndef get_query_history_export_status(\n    request_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n\n    task = get_task_with_id(db_session=db_session, task_id=request_id)\n\n    if task:\n        return {\"status\": task.status}\n\n    # If task is None, then it's possible that the task has already finished processing.\n    # Therefore, we should then check if the export file has already been stored inside of the file-store.\n    # If that *also* doesn't exist, then we can return a 404.\n    file_store = get_default_file_store()\n\n    report_name = construct_query_history_report_name(request_id)\n    has_file = file_store.has_file(\n        file_id=report_name,\n        file_origin=FileOrigin.QUERY_HISTORY_CSV,\n        file_type=FileType.CSV,\n    )\n\n    if not has_file:\n        raise HTTPException(\n            HTTPStatus.NOT_FOUND,\n            f\"No task with {request_id=} was found\",\n        )\n\n    return {\"status\": TaskStatus.SUCCESS}\n\n\n@router.get(\"/admin/query-history/download\", tags=PUBLIC_API_TAGS)\ndef download_query_history_csv(\n    request_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StreamingResponse:\n    ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])\n\n    report_name = construct_query_history_report_name(request_id)\n    file_store = get_default_file_store()\n    has_file = file_store.has_file(\n        file_id=report_name,\n        file_origin=FileOrigin.QUERY_HISTORY_CSV,\n        file_type=FileType.CSV,\n    )\n\n    if has_file:\n        try:\n            csv_stream = file_store.read_file(report_name)\n        except Exception as e:\n            raise HTTPException(\n                HTTPStatus.INTERNAL_SERVER_ERROR,\n                f\"Failed to read query history file: {str(e)}\",\n            )\n        csv_stream.seek(0)\n        return StreamingResponse(\n            iter(csv_stream),\n            media_type=FileType.CSV,\n            headers={\"Content-Disposition\": f\"attachment;filename={report_name}\"},\n        )\n\n    # If the file doesn't exist yet, it may still be processing.\n    # Therefore, we check the task queue to determine its status, if there is any.\n    task = get_task_with_id(db_session=db_session, task_id=request_id)\n    if not task:\n        raise HTTPException(\n            HTTPStatus.NOT_FOUND,\n            f\"No task with {request_id=} was found\",\n        )\n\n    if task.status in [TaskStatus.STARTED, TaskStatus.PENDING]:\n        raise HTTPException(\n            HTTPStatus.ACCEPTED, f\"Task with {request_id=} is still being worked on\"\n        )\n\n    elif task.status == TaskStatus.FAILURE:\n        raise HTTPException(\n            HTTPStatus.INTERNAL_SERVER_ERROR,\n            f\"Task with {request_id=} failed to be processed\",\n        )\n    else:\n        # This is the final case in which `task.status == SUCCESS`\n        raise RuntimeError(\n            \"The task was marked as success, the file was not found in the file store; this is an internal error...\"\n        )\n"
  },
  {
    "path": "backend/ee/onyx/server/query_history/models.py",
    "content": "from datetime import datetime\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom ee.onyx.background.task_name_builders import QUERY_HISTORY_TASK_NAME_PREFIX\nfrom onyx.auth.users import get_display_email\nfrom onyx.background.task_utils import extract_task_id_from_query_history_report_name\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.configs.constants import SessionType\nfrom onyx.db.enums import TaskStatus\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import FileRecord\nfrom onyx.db.models import TaskQueueState\n\n\nclass AbridgedSearchDoc(BaseModel):\n    \"\"\"A subset of the info present in `SearchDoc`\"\"\"\n\n    document_id: str\n    semantic_identifier: str\n    link: str | None\n\n\nclass MessageSnapshot(BaseModel):\n    id: int\n    message: str\n    message_type: MessageType\n    documents: list[AbridgedSearchDoc]\n    feedback_type: QAFeedbackType | None\n    feedback_text: str | None\n    time_created: datetime\n\n    @classmethod\n    def build(cls, message: ChatMessage) -> \"MessageSnapshot\":\n        latest_messages_feedback_obj = (\n            message.chat_message_feedbacks[-1]\n            if len(message.chat_message_feedbacks) > 0\n            else None\n        )\n        feedback_type = (\n            (\n                QAFeedbackType.LIKE\n                if latest_messages_feedback_obj.is_positive\n                else QAFeedbackType.DISLIKE\n            )\n            if latest_messages_feedback_obj\n            else None\n        )\n        feedback_text = (\n            latest_messages_feedback_obj.feedback_text\n            if latest_messages_feedback_obj\n            else None\n        )\n        return cls(\n            id=message.id,\n            message=message.message,\n            message_type=message.message_type,\n            documents=[\n                AbridgedSearchDoc(\n                    document_id=document.document_id,\n                    semantic_identifier=document.semantic_id,\n                    link=document.link,\n                )\n                for document in message.search_docs\n            ],\n            feedback_type=feedback_type,\n            feedback_text=feedback_text,\n            time_created=message.time_sent,\n        )\n\n\nclass ChatSessionMinimal(BaseModel):\n    id: UUID\n    user_email: str\n    name: str | None\n    first_user_message: str\n    first_ai_message: str\n    assistant_id: int | None\n    assistant_name: str | None\n    time_created: datetime\n    feedback_type: QAFeedbackType | None\n    flow_type: SessionType\n    conversation_length: int\n\n    @classmethod\n    def from_chat_session(cls, chat_session: ChatSession) -> \"ChatSessionMinimal\":\n        first_user_message = next(\n            (\n                message.message\n                for message in chat_session.messages\n                if message.message_type == MessageType.USER\n            ),\n            \"\",\n        )\n        first_ai_message = next(\n            (\n                message.message\n                for message in chat_session.messages\n                if message.message_type == MessageType.ASSISTANT\n            ),\n            \"\",\n        )\n\n        list_of_message_feedbacks = [\n            feedback.is_positive\n            for message in chat_session.messages\n            for feedback in message.chat_message_feedbacks\n        ]\n        session_feedback_type = None\n        if list_of_message_feedbacks:\n            if all(list_of_message_feedbacks):\n                session_feedback_type = QAFeedbackType.LIKE\n            elif not any(list_of_message_feedbacks):\n                session_feedback_type = QAFeedbackType.DISLIKE\n            else:\n                session_feedback_type = QAFeedbackType.MIXED\n\n        return cls(\n            id=chat_session.id,\n            user_email=get_display_email(\n                chat_session.user.email if chat_session.user else None\n            ),\n            name=chat_session.description,\n            first_user_message=first_user_message,\n            first_ai_message=first_ai_message,\n            assistant_id=chat_session.persona_id,\n            assistant_name=(\n                chat_session.persona.name if chat_session.persona else None\n            ),\n            time_created=chat_session.time_created,\n            feedback_type=session_feedback_type,\n            flow_type=(\n                SessionType.SLACK if chat_session.onyxbot_flow else SessionType.CHAT\n            ),\n            conversation_length=len(\n                [\n                    message\n                    for message in chat_session.messages\n                    if message.message_type != MessageType.SYSTEM\n                ]\n            ),\n        )\n\n\nclass ChatSessionSnapshot(BaseModel):\n    id: UUID\n    user_email: str\n    name: str | None\n    messages: list[MessageSnapshot]\n    assistant_id: int | None\n    assistant_name: str | None\n    time_created: datetime\n    flow_type: SessionType\n\n\nclass QuestionAnswerPairSnapshot(BaseModel):\n    chat_session_id: UUID\n    # 1-indexed message number in the chat_session\n    # e.g. the first message pair in the chat_session is 1, the second is 2, etc.\n    message_pair_num: int\n    user_message: str\n    ai_response: str\n    retrieved_documents: list[AbridgedSearchDoc]\n    feedback_type: QAFeedbackType | None\n    feedback_text: str | None\n    persona_name: str | None\n    user_email: str\n    time_created: datetime\n    flow_type: SessionType\n\n    @classmethod\n    def from_chat_session_snapshot(\n        cls,\n        chat_session_snapshot: ChatSessionSnapshot,\n    ) -> list[\"QuestionAnswerPairSnapshot\"]:\n        message_pairs: list[tuple[MessageSnapshot, MessageSnapshot]] = []\n        for ind in range(1, len(chat_session_snapshot.messages), 2):\n            message_pairs.append(\n                (\n                    chat_session_snapshot.messages[ind - 1],\n                    chat_session_snapshot.messages[ind],\n                )\n            )\n\n        return [\n            cls(\n                chat_session_id=chat_session_snapshot.id,\n                message_pair_num=ind + 1,\n                user_message=user_message.message,\n                ai_response=ai_message.message,\n                retrieved_documents=ai_message.documents,\n                feedback_type=ai_message.feedback_type,\n                feedback_text=ai_message.feedback_text,\n                persona_name=chat_session_snapshot.assistant_name,\n                user_email=get_display_email(chat_session_snapshot.user_email),\n                time_created=user_message.time_created,\n                flow_type=chat_session_snapshot.flow_type,\n            )\n            for ind, (user_message, ai_message) in enumerate(message_pairs)\n        ]\n\n    def to_json(self) -> dict[str, str | None]:\n        return {\n            \"chat_session_id\": str(self.chat_session_id),\n            \"message_pair_num\": str(self.message_pair_num),\n            \"user_message\": self.user_message,\n            \"ai_response\": self.ai_response,\n            \"retrieved_documents\": \"|\".join(\n                [\n                    doc.link or doc.semantic_identifier\n                    for doc in self.retrieved_documents\n                ]\n            ),\n            \"feedback_type\": self.feedback_type.value if self.feedback_type else \"\",\n            \"feedback_text\": self.feedback_text or \"\",\n            \"persona_name\": self.persona_name,\n            \"user_email\": self.user_email,\n            \"time_created\": str(self.time_created),\n            \"flow_type\": self.flow_type,\n        }\n\n\nclass QueryHistoryExport(BaseModel):\n    task_id: str\n    status: TaskStatus\n    start: datetime\n    end: datetime\n    start_time: datetime\n\n    @classmethod\n    def from_task(\n        cls,\n        task_queue_state: TaskQueueState,\n    ) -> \"QueryHistoryExport\":\n        start_end = task_queue_state.task_name.removeprefix(\n            f\"{QUERY_HISTORY_TASK_NAME_PREFIX}_\"\n        )\n        start, end = start_end.split(\"_\")\n\n        if not task_queue_state.start_time:\n            raise RuntimeError(\"The start time of the task must always be present\")\n\n        return cls(\n            task_id=task_queue_state.task_id,\n            status=task_queue_state.status,\n            start=datetime.fromisoformat(start),\n            end=datetime.fromisoformat(end),\n            start_time=task_queue_state.start_time,\n        )\n\n    @classmethod\n    def from_file(\n        cls,\n        file: FileRecord,\n    ) -> \"QueryHistoryExport\":\n        if not file.file_metadata or not isinstance(file.file_metadata, dict):\n            raise RuntimeError(\n                \"The file metadata must be non-null, and must be of type `dict[str, str]`\"\n            )\n\n        metadata = QueryHistoryFileMetadata.model_validate(dict(file.file_metadata))\n        task_id = extract_task_id_from_query_history_report_name(file.file_id)\n\n        return cls(\n            task_id=task_id,\n            status=TaskStatus.SUCCESS,\n            start=metadata.start,\n            end=metadata.end,\n            start_time=metadata.start_time,\n        )\n\n\nclass QueryHistoryFileMetadata(BaseModel):\n    start: datetime\n    end: datetime\n    start_time: datetime\n"
  },
  {
    "path": "backend/ee/onyx/server/reporting/usage_export_api.py",
    "content": "from collections.abc import Generator\nfrom datetime import datetime\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom fastapi.responses import StreamingResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.usage_export import get_all_usage_reports\nfrom ee.onyx.db.usage_export import get_usage_report_data\nfrom ee.onyx.db.usage_export import UsageReportMetadata\nfrom onyx.auth.users import current_admin_user\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.file_store.constants import STANDARD_CHUNK_SIZE\nfrom shared_configs.contextvars import get_current_tenant_id\n\nrouter = APIRouter()\n\n\nclass GenerateUsageReportParams(BaseModel):\n    period_from: str | None = None\n    period_to: str | None = None\n\n\n@router.post(\"/admin/usage-report\", status_code=204)\ndef generate_report(\n    params: GenerateUsageReportParams,\n    user: User = Depends(current_admin_user),\n) -> None:\n    # Validate period parameters\n    if params.period_from and params.period_to:\n        try:\n            datetime.fromisoformat(params.period_from)\n            datetime.fromisoformat(params.period_to)\n        except ValueError as e:\n            raise HTTPException(status_code=400, detail=str(e))\n\n    tenant_id = get_current_tenant_id()\n    client_app.send_task(\n        OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,\n        kwargs={\n            \"tenant_id\": tenant_id,\n            \"user_id\": str(user.id) if user else None,\n            \"period_from\": params.period_from,\n            \"period_to\": params.period_to,\n        },\n    )\n\n    return None\n\n\n@router.get(\"/admin/usage-report/{report_name}\")\ndef read_usage_report(\n    report_name: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),  # noqa: ARG001\n) -> Response:\n    try:\n        file = get_usage_report_data(report_name)\n    except (ValueError, RuntimeError) as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n    def iterfile() -> Generator[bytes, None, None]:\n        while True:\n            chunk = file.read(STANDARD_CHUNK_SIZE)\n            if not chunk:\n                break\n            yield chunk\n\n    return StreamingResponse(\n        content=iterfile(),\n        media_type=\"application/zip\",\n        headers={\"Content-Disposition\": f\"attachment; filename={report_name}\"},\n    )\n\n\n@router.get(\"/admin/usage-report\")\ndef fetch_usage_reports(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[UsageReportMetadata]:\n    try:\n        return get_all_usage_reports(db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n"
  },
  {
    "path": "backend/ee/onyx/server/reporting/usage_export_generation.py",
    "content": "import csv\nimport tempfile\nimport uuid\nimport zipfile\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom fastapi_users_db_sqlalchemy import UUID_ID\nfrom sqlalchemy import cast\nfrom sqlalchemy.dialects.postgresql import UUID\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.usage_export import get_all_empty_chat_message_entries\nfrom ee.onyx.db.usage_export import write_usage_report\nfrom ee.onyx.server.reporting.usage_export_models import UsageReportMetadata\nfrom ee.onyx.server.reporting.usage_export_models import UserSkeleton\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.models import User\nfrom onyx.db.users import get_all_users\nfrom onyx.file_store.constants import MAX_IN_MEMORY_SIZE\nfrom onyx.file_store.file_store import FileStore\nfrom onyx.file_store.file_store import get_default_file_store\n\n\ndef generate_chat_messages_report(\n    db_session: Session,\n    file_store: FileStore,\n    report_id: str,\n    period: tuple[datetime, datetime] | None,\n) -> str:\n    file_name = f\"{report_id}_chat_sessions\"\n\n    if period is None:\n        period = (\n            datetime.fromtimestamp(0, tz=timezone.utc),\n            datetime.now(tz=timezone.utc),\n        )\n    else:\n        # time-picker sends a time which is at the beginning of the day\n        # so we need to add one day to the end time to make it inclusive\n        period = (\n            period[0],\n            period[1] + timedelta(days=1),\n        )\n\n    with tempfile.SpooledTemporaryFile(\n        max_size=MAX_IN_MEMORY_SIZE, mode=\"w+\"\n    ) as temp_file:\n        csvwriter = csv.writer(temp_file, delimiter=\",\")\n        csvwriter.writerow(\n            [\n                \"session_id\",\n                \"user_id\",\n                \"flow_type\",\n                \"time_sent\",\n                \"assistant_name\",\n                \"user_email\",\n                \"number_of_tokens\",\n            ]\n        )\n        for chat_message_skeleton_batch in get_all_empty_chat_message_entries(\n            db_session, period\n        ):\n            for chat_message_skeleton in chat_message_skeleton_batch:\n                csvwriter.writerow(\n                    [\n                        chat_message_skeleton.chat_session_id,\n                        chat_message_skeleton.user_id,\n                        chat_message_skeleton.flow_type,\n                        chat_message_skeleton.time_sent.isoformat(),\n                        chat_message_skeleton.assistant_name,\n                        chat_message_skeleton.user_email,\n                        chat_message_skeleton.number_of_tokens,\n                    ]\n                )\n\n        # after writing seek to beginning of buffer\n        temp_file.seek(0)\n        file_id = file_store.save_file(\n            content=temp_file,\n            display_name=file_name,\n            file_origin=FileOrigin.GENERATED_REPORT,\n            file_type=\"text/csv\",\n        )\n\n    return file_id\n\n\ndef generate_user_report(\n    db_session: Session,\n    file_store: FileStore,\n    report_id: str,\n) -> str:\n    file_name = f\"{report_id}_users\"\n\n    with tempfile.SpooledTemporaryFile(\n        max_size=MAX_IN_MEMORY_SIZE, mode=\"w+\"\n    ) as temp_file:\n        csvwriter = csv.writer(temp_file, delimiter=\",\")\n        csvwriter.writerow([\"user_id\", \"is_active\"])\n\n        users = get_all_users(db_session)\n        for user in users:\n            user_skeleton = UserSkeleton(\n                user_id=str(user.id),\n                is_active=user.is_active,\n            )\n            csvwriter.writerow([user_skeleton.user_id, user_skeleton.is_active])\n\n        temp_file.seek(0)\n        file_id = file_store.save_file(\n            content=temp_file,\n            display_name=file_name,\n            file_origin=FileOrigin.GENERATED_REPORT,\n            file_type=\"text/csv\",\n        )\n\n    return file_id\n\n\ndef create_new_usage_report(\n    db_session: Session,\n    user_id: UUID_ID | None,  # None = auto-generated\n    period: tuple[datetime, datetime] | None,\n) -> UsageReportMetadata:\n    report_id = str(uuid.uuid4())\n    file_store = get_default_file_store()\n\n    messages_file_id = generate_chat_messages_report(\n        db_session, file_store, report_id, period\n    )\n    users_file_id = generate_user_report(db_session, file_store, report_id)\n\n    with tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE) as zip_buffer:\n        with zipfile.ZipFile(zip_buffer, \"a\", zipfile.ZIP_DEFLATED) as zip_file:\n            # write messages\n            chat_messages_tmpfile = file_store.read_file(\n                messages_file_id, mode=\"b\", use_tempfile=True\n            )\n            zip_file.writestr(\n                \"chat_messages.csv\",\n                chat_messages_tmpfile.read(),\n            )\n\n            # write users\n            users_tmpfile = file_store.read_file(\n                users_file_id, mode=\"b\", use_tempfile=True\n            )\n            zip_file.writestr(\"users.csv\", users_tmpfile.read())\n\n        zip_buffer.seek(0)\n\n        # store zip blob to file_store\n        report_name = f\"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d')}_{report_id}_usage_report.zip\"\n        file_store.save_file(\n            content=zip_buffer,\n            display_name=report_name,\n            file_origin=FileOrigin.GENERATED_REPORT,\n            file_type=\"application/zip\",\n            file_id=report_name,\n        )\n\n    # add report after zip file is written\n    new_report = write_usage_report(db_session, report_name, user_id, period)\n\n    # get user email\n    requestor_user = (\n        db_session.query(User)\n        .filter(cast(User.id, UUID) == new_report.requestor_user_id)\n        .one_or_none()\n        if new_report.requestor_user_id\n        else None\n    )\n    requestor_email = requestor_user.email if requestor_user else None\n\n    return UsageReportMetadata(\n        report_name=new_report.report_name,\n        requestor=requestor_email,\n        time_created=new_report.time_created,\n        period_from=new_report.period_from,\n        period_to=new_report.period_to,\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/reporting/usage_export_models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\n\nclass FlowType(str, Enum):\n    CHAT = \"chat\"\n    SLACK = \"slack\"\n\n\nclass ChatMessageSkeleton(BaseModel):\n    message_id: int\n    chat_session_id: UUID\n    user_id: str | None\n    flow_type: FlowType\n    time_sent: datetime\n    assistant_name: str | None\n    user_email: str | None\n    number_of_tokens: int\n\n\nclass UserSkeleton(BaseModel):\n    user_id: str\n    is_active: bool\n\n\nclass UsageReportMetadata(BaseModel):\n    report_name: str\n    requestor: str | None\n    time_created: datetime\n    period_from: datetime | None  # None = All time\n    period_to: datetime | None\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/scim/api.py",
    "content": "\"\"\"SCIM 2.0 API endpoints (RFC 7644).\n\nThis module provides the FastAPI router for SCIM service discovery,\nUser CRUD, and Group CRUD. Identity providers (Okta, Azure AD) call\nthese endpoints to provision and manage users and groups.\n\nService discovery endpoints are unauthenticated — IdPs may probe them\nbefore bearer token configuration is complete. All other endpoints\nrequire a valid SCIM bearer token.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import FastAPI\nfrom fastapi import Query\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi.responses import JSONResponse\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy import func\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom ee.onyx.server.scim.auth import ScimAuthError\nfrom ee.onyx.server.scim.auth import verify_scim_token\nfrom ee.onyx.server.scim.filtering import parse_scim_filter\nfrom ee.onyx.server.scim.models import SCIM_LIST_RESPONSE_SCHEMA\nfrom ee.onyx.server.scim.models import ScimError\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimListResponse\nfrom ee.onyx.server.scim.models import ScimMappingFields\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimPatchRequest\nfrom ee.onyx.server.scim.models import ScimServiceProviderConfig\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.patch import apply_group_patch\nfrom ee.onyx.server.scim.patch import apply_user_patch\nfrom ee.onyx.server.scim.patch import ScimPatchError\nfrom ee.onyx.server.scim.providers.base import get_default_provider\nfrom ee.onyx.server.scim.providers.base import ScimProvider\nfrom ee.onyx.server.scim.providers.base import serialize_emails\nfrom ee.onyx.server.scim.schema_definitions import ENTERPRISE_USER_SCHEMA_DEF\nfrom ee.onyx.server.scim.schema_definitions import GROUP_RESOURCE_TYPE\nfrom ee.onyx.server.scim.schema_definitions import GROUP_SCHEMA_DEF\nfrom ee.onyx.server.scim.schema_definitions import SERVICE_PROVIDER_CONFIG\nfrom ee.onyx.server.scim.schema_definitions import USER_RESOURCE_TYPE\nfrom ee.onyx.server.scim.schema_definitions import USER_SCHEMA_DEF\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import GrantSource\nfrom onyx.db.enums import Permission\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import ScimUserMapping\nfrom onyx.db.models import User\nfrom onyx.db.models import UserGroup\nfrom onyx.db.models import UserRole\nfrom onyx.db.permissions import recompute_permissions_for_group__no_commit\nfrom onyx.db.permissions import recompute_user_permissions__no_commit\nfrom onyx.db.users import assign_user_to_default_groups__no_commit\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\nlogger = setup_logger()\n\n# Group names reserved for system default groups (seeded by migration).\n_RESERVED_GROUP_NAMES = frozenset({\"Admin\", \"Basic\"})\n\n\nclass ScimJSONResponse(JSONResponse):\n    \"\"\"JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1).\"\"\"\n\n    media_type = \"application/scim+json\"\n\n\n# NOTE: All URL paths in this router (/ServiceProviderConfig, /ResourceTypes,\n# /Schemas, /Users, /Groups) are mandated by the SCIM spec (RFC 7643/7644).\n# IdPs like Okta and Azure AD hardcode these exact paths, so they cannot be\n# changed to kebab-case.\n\n\nscim_router = APIRouter(prefix=\"/scim/v2\", tags=[\"SCIM\"])\n\n_pw_helper = PasswordHelper()\n\n\ndef register_scim_exception_handlers(app: FastAPI) -> None:\n    \"\"\"Register SCIM-specific exception handlers on the FastAPI app.\n\n    Call this after ``app.include_router(scim_router)`` so that auth\n    failures from ``verify_scim_token`` return RFC 7644 §3.12 error\n    envelopes (with ``schemas`` and ``status`` fields) instead of\n    FastAPI's default ``{\"detail\": \"...\"}`` format.\n    \"\"\"\n\n    @app.exception_handler(ScimAuthError)\n    async def _handle_scim_auth_error(\n        _request: Request, exc: ScimAuthError\n    ) -> ScimJSONResponse:\n        return _scim_error_response(exc.status_code, exc.detail)\n\n\ndef _get_provider(\n    _token: ScimToken = Depends(verify_scim_token),\n) -> ScimProvider:\n    \"\"\"Resolve the SCIM provider for the current request.\n\n    Currently returns OktaProvider for all requests. When multi-provider\n    support is added (ENG-3652), this will resolve based on token metadata\n    or tenant configuration — no endpoint changes required.\n    \"\"\"\n    return get_default_provider()\n\n\n# ---------------------------------------------------------------------------\n# Service Discovery Endpoints (unauthenticated)\n# ---------------------------------------------------------------------------\n\n\n@scim_router.get(\"/ServiceProviderConfig\")\ndef get_service_provider_config() -> ScimServiceProviderConfig:\n    \"\"\"Advertise supported SCIM features (RFC 7643 §5).\"\"\"\n    return SERVICE_PROVIDER_CONFIG\n\n\n@scim_router.get(\"/ResourceTypes\")\ndef get_resource_types() -> ScimJSONResponse:\n    \"\"\"List available SCIM resource types (RFC 7643 §6).\n\n    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs\n    like Entra ID expect a JSON object, not a bare array.\n    \"\"\"\n    resources = [USER_RESOURCE_TYPE, GROUP_RESOURCE_TYPE]\n    return ScimJSONResponse(\n        content={\n            \"schemas\": [SCIM_LIST_RESPONSE_SCHEMA],\n            \"totalResults\": len(resources),\n            \"Resources\": [\n                r.model_dump(exclude_none=True, by_alias=True) for r in resources\n            ],\n        }\n    )\n\n\n@scim_router.get(\"/Schemas\")\ndef get_schemas() -> ScimJSONResponse:\n    \"\"\"Return SCIM schema definitions (RFC 7643 §7).\n\n    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs\n    like Entra ID expect a JSON object, not a bare array.\n    \"\"\"\n    schemas = [USER_SCHEMA_DEF, GROUP_SCHEMA_DEF, ENTERPRISE_USER_SCHEMA_DEF]\n    return ScimJSONResponse(\n        content={\n            \"schemas\": [SCIM_LIST_RESPONSE_SCHEMA],\n            \"totalResults\": len(schemas),\n            \"Resources\": [s.model_dump(exclude_none=True) for s in schemas],\n        }\n    )\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _scim_error_response(status: int, detail: str) -> ScimJSONResponse:\n    \"\"\"Build a SCIM-compliant error response (RFC 7644 §3.12).\"\"\"\n    logger.warning(\"SCIM error response: status=%s detail=%s\", status, detail)\n    body = ScimError(status=str(status), detail=detail)\n    return ScimJSONResponse(\n        status_code=status,\n        content=body.model_dump(exclude_none=True),\n    )\n\n\ndef _parse_excluded_attributes(raw: str | None) -> set[str]:\n    \"\"\"Parse the ``excludedAttributes`` query parameter (RFC 7644 §3.4.2.5).\n\n    Returns a set of lowercased attribute names to omit from responses.\n    \"\"\"\n    if not raw:\n        return set()\n    return {attr.strip().lower() for attr in raw.split(\",\") if attr.strip()}\n\n\ndef _apply_exclusions(\n    resource: ScimUserResource | ScimGroupResource,\n    excluded: set[str],\n) -> dict:\n    \"\"\"Serialize a SCIM resource, omitting attributes the IdP excluded.\n\n    RFC 7644 §3.4.2.5 lets the IdP pass ``?excludedAttributes=groups,emails``\n    to reduce response payload size. We strip those fields after serialization\n    so the rest of the pipeline doesn't need to know about them.\n    \"\"\"\n    data = resource.model_dump(exclude_none=True, by_alias=True)\n    for attr in excluded:\n        # Match case-insensitively against the camelCase field names\n        keys_to_remove = [k for k in data if k.lower() == attr]\n        for k in keys_to_remove:\n            del data[k]\n    return data\n\n\ndef _check_seat_availability(dal: ScimDAL) -> str | None:\n    \"\"\"Return an error message if seat limit is reached, else None.\"\"\"\n    check_fn = fetch_ee_implementation_or_noop(\n        \"onyx.db.license\", \"check_seat_availability\", None\n    )\n    if check_fn is None:\n        return None\n    result = check_fn(dal.session, seats_needed=1)\n    if not result.available:\n        return result.error_message or \"Seat limit reached\"\n    return None\n\n\ndef _fetch_user_or_404(user_id: str, dal: ScimDAL) -> User | ScimJSONResponse:\n    \"\"\"Parse *user_id* as UUID, look up the user, or return a 404 error.\"\"\"\n    try:\n        uid = UUID(user_id)\n    except ValueError:\n        return _scim_error_response(404, f\"User {user_id} not found\")\n    user = dal.get_user(uid)\n    if not user:\n        return _scim_error_response(404, f\"User {user_id} not found\")\n    return user\n\n\ndef _scim_name_to_str(name: ScimName | None) -> str | None:\n    \"\"\"Extract a display name string from a SCIM name object.\n\n    Returns None if no name is provided, so the caller can decide\n    whether to update the user's personal_name.\n    \"\"\"\n    if not name:\n        return None\n    # If the client explicitly provides ``formatted``, prefer it — the client\n    # knows what display string it wants. Otherwise build from components.\n    if name.formatted:\n        return name.formatted\n    parts = \" \".join(part for part in [name.givenName, name.familyName] if part)\n    return parts or None\n\n\ndef _scim_resource_response(\n    resource: ScimUserResource | ScimGroupResource | ScimListResponse,\n    status_code: int = 200,\n) -> ScimJSONResponse:\n    \"\"\"Serialize a SCIM resource as ``application/scim+json``.\"\"\"\n    content = resource.model_dump(exclude_none=True, by_alias=True)\n    return ScimJSONResponse(\n        status_code=status_code,\n        content=content,\n    )\n\n\ndef _build_list_response(\n    resources: list[ScimUserResource | ScimGroupResource],\n    total: int,\n    start_index: int,\n    count: int,\n    excluded: set[str] | None = None,\n) -> ScimListResponse | ScimJSONResponse:\n    \"\"\"Build a SCIM list response, optionally applying attribute exclusions.\n\n    RFC 7644 §3.4.2.5 — IdPs may request certain attributes be omitted via\n    the ``excludedAttributes`` query parameter.\n    \"\"\"\n    if excluded:\n        envelope = ScimListResponse(\n            totalResults=total,\n            startIndex=start_index,\n            itemsPerPage=count,\n        )\n        data = envelope.model_dump(exclude_none=True)\n        data[\"Resources\"] = [_apply_exclusions(r, excluded) for r in resources]\n        return ScimJSONResponse(content=data)\n\n    return _scim_resource_response(\n        ScimListResponse(\n            totalResults=total,\n            startIndex=start_index,\n            itemsPerPage=count,\n            Resources=resources,\n        )\n    )\n\n\ndef _extract_enterprise_fields(\n    resource: ScimUserResource,\n) -> tuple[str | None, str | None]:\n    \"\"\"Extract department and manager from enterprise extension.\"\"\"\n    ext = resource.enterprise_extension\n    if not ext:\n        return None, None\n    department = ext.department\n    manager = ext.manager.value if ext.manager else None\n    return department, manager\n\n\ndef _mapping_to_fields(\n    mapping: ScimUserMapping | None,\n) -> ScimMappingFields | None:\n    \"\"\"Extract round-trip fields from a SCIM user mapping.\"\"\"\n    if not mapping:\n        return None\n    return ScimMappingFields(\n        department=mapping.department,\n        manager=mapping.manager,\n        given_name=mapping.given_name,\n        family_name=mapping.family_name,\n        scim_emails_json=mapping.scim_emails_json,\n    )\n\n\ndef _fields_from_resource(resource: ScimUserResource) -> ScimMappingFields:\n    \"\"\"Build mapping fields from an incoming SCIM user resource.\"\"\"\n    department, manager = _extract_enterprise_fields(resource)\n    return ScimMappingFields(\n        department=department,\n        manager=manager,\n        given_name=resource.name.givenName if resource.name else None,\n        family_name=resource.name.familyName if resource.name else None,\n        scim_emails_json=serialize_emails(resource.emails),\n    )\n\n\n# ---------------------------------------------------------------------------\n# User CRUD (RFC 7644 §3)\n# ---------------------------------------------------------------------------\n\n\n@scim_router.get(\"/Users\", response_model=None)\ndef list_users(\n    filter: str | None = Query(None),\n    excludedAttributes: str | None = None,\n    startIndex: int = Query(1, ge=1),\n    count: int = Query(100, ge=0, le=500),\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimListResponse | ScimJSONResponse:\n    \"\"\"List users with optional SCIM filter and pagination.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n    dal.commit()\n\n    try:\n        scim_filter = parse_scim_filter(filter)\n    except ValueError as e:\n        return _scim_error_response(400, str(e))\n\n    try:\n        users_with_mappings, total = dal.list_users(scim_filter, startIndex, count)\n    except ValueError as e:\n        return _scim_error_response(400, str(e))\n\n    user_groups_map = dal.get_users_groups_batch([u.id for u, _ in users_with_mappings])\n    resources: list[ScimUserResource | ScimGroupResource] = [\n        provider.build_user_resource(\n            user,\n            mapping.external_id if mapping else None,\n            groups=user_groups_map.get(user.id, []),\n            scim_username=mapping.scim_username if mapping else None,\n            fields=_mapping_to_fields(mapping),\n        )\n        for user, mapping in users_with_mappings\n    ]\n\n    return _build_list_response(\n        resources,\n        total,\n        startIndex,\n        count,\n        excluded=_parse_excluded_attributes(excludedAttributes),\n    )\n\n\n@scim_router.get(\"/Users/{user_id}\", response_model=None)\ndef get_user(\n    user_id: str,\n    excludedAttributes: str | None = None,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimUserResource | ScimJSONResponse:\n    \"\"\"Get a single user by ID.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n    dal.commit()\n\n    result = _fetch_user_or_404(user_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    user = result\n\n    mapping = dal.get_user_mapping_by_user_id(user.id)\n\n    resource = provider.build_user_resource(\n        user,\n        mapping.external_id if mapping else None,\n        groups=dal.get_user_groups(user.id),\n        scim_username=mapping.scim_username if mapping else None,\n        fields=_mapping_to_fields(mapping),\n    )\n\n    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted\n    excluded = _parse_excluded_attributes(excludedAttributes)\n    if excluded:\n        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))\n\n    return _scim_resource_response(resource)\n\n\n@scim_router.post(\"/Users\", status_code=201, response_model=None)\ndef create_user(\n    user_resource: ScimUserResource,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimUserResource | ScimJSONResponse:\n    \"\"\"Create a new user from a SCIM provisioning request.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    email = user_resource.userName.strip()\n\n    # Check for existing user — if they exist but aren't SCIM-managed yet,\n    # link them to the IdP rather than rejecting with 409.\n    external_id: str | None = user_resource.externalId\n    scim_username: str = user_resource.userName.strip()\n    fields: ScimMappingFields = _fields_from_resource(user_resource)\n\n    existing_user = dal.get_user_by_email(email)\n    if existing_user:\n        existing_mapping = dal.get_user_mapping_by_user_id(existing_user.id)\n        if existing_mapping:\n            return _scim_error_response(409, f\"User with email {email} already exists\")\n\n        # Adopt pre-existing user into SCIM management.\n        # Reactivating a deactivated user consumes a seat, so enforce the\n        # seat limit the same way replace_user does.\n        if user_resource.active and not existing_user.is_active:\n            seat_error = _check_seat_availability(dal)\n            if seat_error:\n                return _scim_error_response(403, seat_error)\n\n        personal_name = _scim_name_to_str(user_resource.name)\n        dal.update_user(\n            existing_user,\n            is_active=user_resource.active,\n            **({\"personal_name\": personal_name} if personal_name else {}),\n        )\n\n        try:\n            dal.create_user_mapping(\n                external_id=external_id,\n                user_id=existing_user.id,\n                scim_username=scim_username,\n                fields=fields,\n            )\n            dal.commit()\n        except IntegrityError:\n            dal.rollback()\n            return _scim_error_response(\n                409, f\"User with email {email} already has a SCIM mapping\"\n            )\n\n        return _scim_resource_response(\n            provider.build_user_resource(\n                existing_user,\n                external_id,\n                scim_username=scim_username,\n                fields=fields,\n            ),\n            status_code=201,\n        )\n\n    # Only enforce seat limit for net-new users — adopting a pre-existing\n    # user doesn't consume a new seat.\n    seat_error = _check_seat_availability(dal)\n    if seat_error:\n        return _scim_error_response(403, seat_error)\n\n    # Create user with a random password (SCIM users authenticate via IdP)\n    personal_name = _scim_name_to_str(user_resource.name)\n    user = User(\n        email=email,\n        hashed_password=_pw_helper.hash(_pw_helper.generate()),\n        role=UserRole.BASIC,\n        account_type=AccountType.STANDARD,\n        is_active=user_resource.active,\n        is_verified=True,\n        personal_name=personal_name,\n    )\n\n    try:\n        dal.add_user(user)\n    except IntegrityError:\n        dal.rollback()\n        return _scim_error_response(409, f\"User with email {email} already exists\")\n\n    # Always create a SCIM mapping so that the user is marked as\n    # SCIM-managed. externalId may be None (RFC 7643 says it's optional).\n    try:\n        dal.create_user_mapping(\n            external_id=external_id,\n            user_id=user.id,\n            scim_username=scim_username,\n            fields=fields,\n        )\n    except IntegrityError:\n        dal.rollback()\n        return _scim_error_response(\n            409, f\"User with email {email} already has a SCIM mapping\"\n        )\n\n    # Assign user to default group BEFORE commit so everything is atomic.\n    # If this fails, the entire user creation rolls back and IdP can retry.\n    try:\n        assign_user_to_default_groups__no_commit(db_session, user)\n    except Exception:\n        dal.rollback()\n        logger.exception(f\"Failed to assign SCIM user {email} to default groups\")\n        return _scim_error_response(\n            500, f\"Failed to assign user {email} to default group\"\n        )\n\n    dal.commit()\n\n    return _scim_resource_response(\n        provider.build_user_resource(\n            user,\n            external_id,\n            scim_username=scim_username,\n            fields=fields,\n        ),\n        status_code=201,\n    )\n\n\n@scim_router.put(\"/Users/{user_id}\", response_model=None)\ndef replace_user(\n    user_id: str,\n    user_resource: ScimUserResource,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimUserResource | ScimJSONResponse:\n    \"\"\"Replace a user entirely (RFC 7644 §3.5.1).\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_user_or_404(user_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    user = result\n\n    # Handle activation (need seat check) / deactivation\n    is_reactivation = user_resource.active and not user.is_active\n    if is_reactivation:\n        seat_error = _check_seat_availability(dal)\n        if seat_error:\n            return _scim_error_response(403, seat_error)\n\n    personal_name = _scim_name_to_str(user_resource.name)\n\n    dal.update_user(\n        user,\n        email=user_resource.userName.strip(),\n        is_active=user_resource.active,\n        personal_name=personal_name,\n    )\n\n    # Reconcile default-group membership on reactivation\n    if is_reactivation:\n        assign_user_to_default_groups__no_commit(\n            db_session, user, is_admin=(user.role == UserRole.ADMIN)\n        )\n\n    new_external_id = user_resource.externalId\n    scim_username = user_resource.userName.strip()\n    fields = _fields_from_resource(user_resource)\n    dal.sync_user_external_id(\n        user.id,\n        new_external_id,\n        scim_username=scim_username,\n        fields=fields,\n    )\n\n    dal.commit()\n\n    return _scim_resource_response(\n        provider.build_user_resource(\n            user,\n            new_external_id,\n            groups=dal.get_user_groups(user.id),\n            scim_username=scim_username,\n            fields=fields,\n        )\n    )\n\n\n@scim_router.patch(\"/Users/{user_id}\", response_model=None)\ndef patch_user(\n    user_id: str,\n    patch_request: ScimPatchRequest,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimUserResource | ScimJSONResponse:\n    \"\"\"Partially update a user (RFC 7644 §3.5.2).\n\n    This is the primary endpoint for user deprovisioning — Okta sends\n    ``PATCH {\"active\": false}`` rather than DELETE.\n    \"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_user_or_404(user_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    user = result\n\n    mapping = dal.get_user_mapping_by_user_id(user.id)\n    external_id = mapping.external_id if mapping else None\n    current_scim_username = mapping.scim_username if mapping else None\n    current_fields = _mapping_to_fields(mapping)\n\n    current = provider.build_user_resource(\n        user,\n        external_id,\n        groups=dal.get_user_groups(user.id),\n        scim_username=current_scim_username,\n        fields=current_fields,\n    )\n\n    try:\n        patched, ent_data = apply_user_patch(\n            patch_request.Operations, current, provider.ignored_patch_paths\n        )\n    except ScimPatchError as e:\n        return _scim_error_response(e.status, e.detail)\n\n    # Apply changes back to the DB model\n    is_reactivation = patched.active and not user.is_active\n    if patched.active != user.is_active:\n        if patched.active:\n            seat_error = _check_seat_availability(dal)\n            if seat_error:\n                return _scim_error_response(403, seat_error)\n\n    # Track the scim_username — if userName was patched, update it\n    new_scim_username = patched.userName.strip() if patched.userName else None\n\n    # If displayName was explicitly patched (different from the original), use\n    # it as personal_name directly.  Otherwise, derive from name components.\n    personal_name: str | None\n    if patched.displayName and patched.displayName != current.displayName:\n        personal_name = patched.displayName\n    else:\n        personal_name = _scim_name_to_str(patched.name)\n\n    dal.update_user(\n        user,\n        email=(\n            patched.userName.strip()\n            if patched.userName.strip().lower() != user.email.lower()\n            else None\n        ),\n        is_active=patched.active if patched.active != user.is_active else None,\n        personal_name=personal_name,\n    )\n\n    # Reconcile default-group membership on reactivation\n    if is_reactivation:\n        assign_user_to_default_groups__no_commit(\n            db_session, user, is_admin=(user.role == UserRole.ADMIN)\n        )\n\n    # Build updated fields by merging PATCH enterprise data with current values\n    cf = current_fields or ScimMappingFields()\n    fields = ScimMappingFields(\n        department=ent_data.get(\"department\", cf.department),\n        manager=ent_data.get(\"manager\", cf.manager),\n        given_name=patched.name.givenName if patched.name else cf.given_name,\n        family_name=patched.name.familyName if patched.name else cf.family_name,\n        scim_emails_json=(\n            serialize_emails(patched.emails)\n            if patched.emails is not None\n            else cf.scim_emails_json\n        ),\n    )\n\n    dal.sync_user_external_id(\n        user.id,\n        patched.externalId,\n        scim_username=new_scim_username,\n        fields=fields,\n    )\n\n    dal.commit()\n\n    return _scim_resource_response(\n        provider.build_user_resource(\n            user,\n            patched.externalId,\n            groups=dal.get_user_groups(user.id),\n            scim_username=new_scim_username,\n            fields=fields,\n        )\n    )\n\n\n@scim_router.delete(\"/Users/{user_id}\", status_code=204, response_model=None)\ndef delete_user(\n    user_id: str,\n    _token: ScimToken = Depends(verify_scim_token),\n    db_session: Session = Depends(get_session),\n) -> Response | ScimJSONResponse:\n    \"\"\"Delete a user (RFC 7644 §3.6).\n\n    Deactivates the user and removes the SCIM mapping. Note that Okta\n    typically uses PATCH active=false instead of DELETE.\n    A second DELETE returns 404 per RFC 7644 §3.6.\n    \"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_user_or_404(user_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    user = result\n\n    # If no SCIM mapping exists, the user was already deleted from\n    # SCIM's perspective — return 404 per RFC 7644 §3.6.\n    mapping = dal.get_user_mapping_by_user_id(user.id)\n    if not mapping:\n        return _scim_error_response(404, f\"User {user_id} not found\")\n\n    dal.deactivate_user(user)\n    dal.delete_user_mapping(mapping.id)\n\n    dal.commit()\n\n    return Response(status_code=204)\n\n\n# ---------------------------------------------------------------------------\n# Group helpers\n# ---------------------------------------------------------------------------\n\n\ndef _fetch_group_or_404(group_id: str, dal: ScimDAL) -> UserGroup | ScimJSONResponse:\n    \"\"\"Parse *group_id* as int, look up the group, or return a 404 error.\"\"\"\n    try:\n        gid = int(group_id)\n    except ValueError:\n        return _scim_error_response(404, f\"Group {group_id} not found\")\n    group = dal.get_group(gid)\n    if not group:\n        return _scim_error_response(404, f\"Group {group_id} not found\")\n    return group\n\n\ndef _parse_member_uuids(\n    members: list[ScimGroupMember],\n) -> tuple[list[UUID], str | None]:\n    \"\"\"Parse member value strings to UUIDs.\n\n    Returns (uuid_list, error_message). error_message is None on success.\n    \"\"\"\n    uuids: list[UUID] = []\n    for m in members:\n        try:\n            uuids.append(UUID(m.value))\n        except ValueError:\n            return [], f\"Invalid member ID: {m.value}\"\n    return uuids, None\n\n\ndef _validate_and_parse_members(\n    members: list[ScimGroupMember], dal: ScimDAL\n) -> tuple[list[UUID], str | None]:\n    \"\"\"Parse and validate member UUIDs exist in the database.\n\n    Returns (uuid_list, error_message). error_message is None on success.\n    \"\"\"\n    uuids, err = _parse_member_uuids(members)\n    if err:\n        return [], err\n\n    if uuids:\n        missing = dal.validate_member_ids(uuids)\n        if missing:\n            return [], f\"Member(s) not found: {', '.join(str(u) for u in missing)}\"\n\n    return uuids, None\n\n\n# ---------------------------------------------------------------------------\n# Group CRUD (RFC 7644 §3)\n# ---------------------------------------------------------------------------\n\n\n@scim_router.get(\"/Groups\", response_model=None)\ndef list_groups(\n    filter: str | None = Query(None),\n    excludedAttributes: str | None = None,\n    startIndex: int = Query(1, ge=1),\n    count: int = Query(100, ge=0, le=500),\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimListResponse | ScimJSONResponse:\n    \"\"\"List groups with optional SCIM filter and pagination.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n    dal.commit()\n\n    try:\n        scim_filter = parse_scim_filter(filter)\n    except ValueError as e:\n        return _scim_error_response(400, str(e))\n\n    try:\n        groups_with_ext_ids, total = dal.list_groups(scim_filter, startIndex, count)\n    except ValueError as e:\n        return _scim_error_response(400, str(e))\n\n    resources: list[ScimUserResource | ScimGroupResource] = [\n        provider.build_group_resource(group, dal.get_group_members(group.id), ext_id)\n        for group, ext_id in groups_with_ext_ids\n    ]\n\n    return _build_list_response(\n        resources,\n        total,\n        startIndex,\n        count,\n        excluded=_parse_excluded_attributes(excludedAttributes),\n    )\n\n\n@scim_router.get(\"/Groups/{group_id}\", response_model=None)\ndef get_group(\n    group_id: str,\n    excludedAttributes: str | None = None,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimGroupResource | ScimJSONResponse:\n    \"\"\"Get a single group by ID.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n    dal.commit()\n\n    result = _fetch_group_or_404(group_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    group = result\n\n    mapping = dal.get_group_mapping_by_group_id(group.id)\n    members = dal.get_group_members(group.id)\n\n    resource = provider.build_group_resource(\n        group, members, mapping.external_id if mapping else None\n    )\n\n    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted\n    excluded = _parse_excluded_attributes(excludedAttributes)\n    if excluded:\n        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))\n\n    return _scim_resource_response(resource)\n\n\n@scim_router.post(\"/Groups\", status_code=201, response_model=None)\ndef create_group(\n    group_resource: ScimGroupResource,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimGroupResource | ScimJSONResponse:\n    \"\"\"Create a new group from a SCIM provisioning request.\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    if group_resource.displayName in _RESERVED_GROUP_NAMES:\n        return _scim_error_response(\n            409, f\"'{group_resource.displayName}' is a reserved group name.\"\n        )\n\n    if dal.get_group_by_name(group_resource.displayName):\n        return _scim_error_response(\n            409, f\"Group with name '{group_resource.displayName}' already exists\"\n        )\n\n    member_uuids, err = _validate_and_parse_members(group_resource.members, dal)\n    if err:\n        return _scim_error_response(400, err)\n\n    db_group = UserGroup(\n        name=group_resource.displayName,\n        is_up_to_date=True,\n        time_last_modified_by_user=func.now(),\n    )\n    try:\n        dal.add_group(db_group)\n    except IntegrityError:\n        dal.rollback()\n        return _scim_error_response(\n            409, f\"Group with name '{group_resource.displayName}' already exists\"\n        )\n\n    # Every group gets the \"basic\" permission by default.\n    dal.add_permission_grant_to_group(\n        group_id=db_group.id,\n        permission=Permission.BASIC_ACCESS,\n        grant_source=GrantSource.SYSTEM,\n    )\n\n    dal.upsert_group_members(db_group.id, member_uuids)\n\n    # Recompute permissions for initial members.\n    recompute_user_permissions__no_commit(member_uuids, db_session)\n\n    external_id = group_resource.externalId\n    if external_id:\n        dal.create_group_mapping(external_id=external_id, user_group_id=db_group.id)\n\n    dal.commit()\n\n    members = dal.get_group_members(db_group.id)\n    return _scim_resource_response(\n        provider.build_group_resource(db_group, members, external_id),\n        status_code=201,\n    )\n\n\n@scim_router.put(\"/Groups/{group_id}\", response_model=None)\ndef replace_group(\n    group_id: str,\n    group_resource: ScimGroupResource,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimGroupResource | ScimJSONResponse:\n    \"\"\"Replace a group entirely (RFC 7644 §3.5.1).\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_group_or_404(group_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    group = result\n\n    if group.name in _RESERVED_GROUP_NAMES and group_resource.displayName != group.name:\n        return _scim_error_response(\n            409, f\"'{group.name}' is a reserved group name and cannot be renamed.\"\n        )\n\n    if (\n        group_resource.displayName in _RESERVED_GROUP_NAMES\n        and group_resource.displayName != group.name\n    ):\n        return _scim_error_response(\n            409, f\"'{group_resource.displayName}' is a reserved group name.\"\n        )\n\n    member_uuids, err = _validate_and_parse_members(group_resource.members, dal)\n    if err:\n        return _scim_error_response(400, err)\n\n    # Capture old member IDs before replacing so we can recompute their\n    # permissions after they are removed from the group.\n    old_member_ids = {uid for uid, _ in dal.get_group_members(group.id)}\n\n    dal.update_group(group, name=group_resource.displayName)\n    dal.replace_group_members(group.id, member_uuids)\n    dal.sync_group_external_id(group.id, group_resource.externalId)\n\n    # Recompute permissions for current members (batch) and removed members.\n    recompute_permissions_for_group__no_commit(group.id, db_session)\n    removed_ids = list(old_member_ids - set(member_uuids))\n    recompute_user_permissions__no_commit(removed_ids, db_session)\n\n    dal.commit()\n\n    members = dal.get_group_members(group.id)\n    return _scim_resource_response(\n        provider.build_group_resource(group, members, group_resource.externalId)\n    )\n\n\n@scim_router.patch(\"/Groups/{group_id}\", response_model=None)\ndef patch_group(\n    group_id: str,\n    patch_request: ScimPatchRequest,\n    _token: ScimToken = Depends(verify_scim_token),\n    provider: ScimProvider = Depends(_get_provider),\n    db_session: Session = Depends(get_session),\n) -> ScimGroupResource | ScimJSONResponse:\n    \"\"\"Partially update a group (RFC 7644 §3.5.2).\n\n    Handles member add/remove operations from Okta and Azure AD.\n    \"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_group_or_404(group_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    group = result\n\n    mapping = dal.get_group_mapping_by_group_id(group.id)\n    external_id = mapping.external_id if mapping else None\n\n    current_members = dal.get_group_members(group.id)\n    current = provider.build_group_resource(group, current_members, external_id)\n\n    try:\n        patched, added_ids, removed_ids = apply_group_patch(\n            patch_request.Operations, current, provider.ignored_patch_paths\n        )\n    except ScimPatchError as e:\n        return _scim_error_response(e.status, e.detail)\n\n    new_name = patched.displayName if patched.displayName != group.name else None\n\n    if group.name in _RESERVED_GROUP_NAMES and new_name:\n        return _scim_error_response(\n            409, f\"'{group.name}' is a reserved group name and cannot be renamed.\"\n        )\n\n    if new_name and new_name in _RESERVED_GROUP_NAMES:\n        return _scim_error_response(409, f\"'{new_name}' is a reserved group name.\")\n\n    dal.update_group(group, name=new_name)\n\n    affected_uuids: list[UUID] = []\n\n    if added_ids:\n        add_uuids = [UUID(mid) for mid in added_ids if _is_valid_uuid(mid)]\n        if add_uuids:\n            missing = dal.validate_member_ids(add_uuids)\n            if missing:\n                return _scim_error_response(\n                    400,\n                    f\"Member(s) not found: {', '.join(str(u) for u in missing)}\",\n                )\n            dal.upsert_group_members(group.id, add_uuids)\n            affected_uuids.extend(add_uuids)\n\n    if removed_ids:\n        remove_uuids = [UUID(mid) for mid in removed_ids if _is_valid_uuid(mid)]\n        dal.remove_group_members(group.id, remove_uuids)\n        affected_uuids.extend(remove_uuids)\n\n    # Recompute permissions for all users whose group membership changed.\n    recompute_user_permissions__no_commit(affected_uuids, db_session)\n\n    dal.sync_group_external_id(group.id, patched.externalId)\n    dal.commit()\n\n    members = dal.get_group_members(group.id)\n    return _scim_resource_response(\n        provider.build_group_resource(group, members, patched.externalId)\n    )\n\n\n@scim_router.delete(\"/Groups/{group_id}\", status_code=204, response_model=None)\ndef delete_group(\n    group_id: str,\n    _token: ScimToken = Depends(verify_scim_token),\n    db_session: Session = Depends(get_session),\n) -> Response | ScimJSONResponse:\n    \"\"\"Delete a group (RFC 7644 §3.6).\"\"\"\n    dal = ScimDAL(db_session)\n    dal.update_token_last_used(_token.id)\n\n    result = _fetch_group_or_404(group_id, dal)\n    if isinstance(result, ScimJSONResponse):\n        return result\n    group = result\n\n    if group.name in _RESERVED_GROUP_NAMES:\n        return _scim_error_response(409, f\"'{group.name}' is a reserved group name.\")\n\n    # Capture member IDs before deletion so we can recompute their permissions.\n    affected_user_ids = [uid for uid, _ in dal.get_group_members(group.id)]\n\n    mapping = dal.get_group_mapping_by_group_id(group.id)\n    if mapping:\n        dal.delete_group_mapping(mapping.id)\n\n    dal.delete_group_with_members(group)\n\n    # Recompute permissions for users who lost this group membership.\n    recompute_user_permissions__no_commit(affected_user_ids, db_session)\n\n    dal.commit()\n\n    return Response(status_code=204)\n\n\ndef _is_valid_uuid(value: str) -> bool:\n    \"\"\"Check if a string is a valid UUID.\"\"\"\n    try:\n        UUID(value)\n        return True\n    except ValueError:\n        return False\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/auth.py",
    "content": "\"\"\"SCIM bearer token authentication.\n\nSCIM endpoints are authenticated via bearer tokens that admins create in the\nOnyx UI. This module provides:\n\n  - ``verify_scim_token``: FastAPI dependency that extracts, hashes, and\n    validates the token from the Authorization header.\n  - ``generate_scim_token``: Creates a new cryptographically random token\n    and returns the raw value, its SHA-256 hash, and a display suffix.\n\nToken format: ``onyx_scim_<random>`` where ``<random>`` is 48 bytes of\nURL-safe base64 from ``secrets.token_urlsafe``.\n\nThe hash is stored in the ``scim_token`` table; the raw value is shown to\nthe admin exactly once at creation time.\n\"\"\"\n\nimport hashlib\nimport secrets\n\nfrom fastapi import Depends\nfrom fastapi import Request\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom onyx.auth.utils import get_hashed_bearer_token_from_request\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import ScimToken\n\n\nclass ScimAuthError(Exception):\n    \"\"\"Raised when SCIM bearer token authentication fails.\n\n    Unlike HTTPException, this carries the status and detail so the SCIM\n    exception handler can wrap them in an RFC 7644 §3.12 error envelope\n    with ``schemas`` and ``status`` fields.\n    \"\"\"\n\n    def __init__(self, status_code: int, detail: str) -> None:\n        self.status_code = status_code\n        self.detail = detail\n        super().__init__(detail)\n\n\nSCIM_TOKEN_PREFIX = \"onyx_scim_\"\nSCIM_TOKEN_LENGTH = 48\n\n\ndef _hash_scim_token(token: str) -> str:\n    \"\"\"SHA-256 hash a SCIM token. No salt needed — tokens are random.\"\"\"\n    return hashlib.sha256(token.encode(\"utf-8\")).hexdigest()\n\n\ndef generate_scim_token() -> tuple[str, str, str]:\n    \"\"\"Generate a new SCIM bearer token.\n\n    Returns:\n        A tuple of ``(raw_token, hashed_token, token_display)`` where\n        ``token_display`` is a masked version showing only the last 4 chars.\n    \"\"\"\n    raw_token = SCIM_TOKEN_PREFIX + secrets.token_urlsafe(SCIM_TOKEN_LENGTH)\n    hashed_token = _hash_scim_token(raw_token)\n    token_display = SCIM_TOKEN_PREFIX + \"****\" + raw_token[-4:]\n    return raw_token, hashed_token, token_display\n\n\ndef _get_hashed_scim_token_from_request(request: Request) -> str | None:\n    \"\"\"Extract and hash a SCIM token from the request Authorization header.\"\"\"\n    return get_hashed_bearer_token_from_request(\n        request,\n        valid_prefixes=[SCIM_TOKEN_PREFIX],\n        hash_fn=_hash_scim_token,\n    )\n\n\ndef _get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:\n    return ScimDAL(db_session)\n\n\ndef verify_scim_token(\n    request: Request,\n    dal: ScimDAL = Depends(_get_scim_dal),\n) -> ScimToken:\n    \"\"\"FastAPI dependency that authenticates SCIM requests.\n\n    Extracts the bearer token from the Authorization header, hashes it,\n    looks it up in the database, and verifies it is active.\n\n    Note:\n        This dependency does NOT update ``last_used_at`` — the endpoint\n        should do that via ``ScimDAL.update_token_last_used()`` so the\n        timestamp write is part of the endpoint's transaction.\n\n    Raises:\n        HTTPException(401): If the token is missing, invalid, or inactive.\n    \"\"\"\n    hashed = _get_hashed_scim_token_from_request(request)\n    if not hashed:\n        raise ScimAuthError(401, \"Missing or invalid SCIM bearer token\")\n\n    token = dal.get_token_by_hash(hashed)\n\n    if not token:\n        raise ScimAuthError(401, \"Invalid SCIM bearer token\")\n\n    if not token.is_active:\n        raise ScimAuthError(401, \"SCIM token has been revoked\")\n\n    return token\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/filtering.py",
    "content": "\"\"\"SCIM filter expression parser (RFC 7644 §3.4.2.2).\n\nIdentity providers (Okta, Azure AD, OneLogin, etc.) use filters to look up\nresources before deciding whether to create or update them. For example, when\nan admin assigns a user to the Onyx app, the IdP first checks whether that\nuser already exists::\n\n    GET /scim/v2/Users?filter=userName eq \"john@example.com\"\n\nIf zero results come back the IdP creates the user (``POST``); if a match is\nfound it links to the existing record and uses ``PUT``/``PATCH`` going forward.\nThe same pattern applies to groups (``displayName eq \"Engineering\"``).\n\nThis module parses the subset of the SCIM filter grammar that identity\nproviders actually send in practice:\n\n    attribute SP operator SP value\n\nSupported operators: ``eq``, ``co`` (contains), ``sw`` (starts with).\nCompound filters (``and`` / ``or``) are not supported; if an IdP sends one\nthe parser returns ``None`` and the caller falls back to an unfiltered list.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom dataclasses import dataclass\nfrom enum import Enum\n\n\nclass ScimFilterOperator(str, Enum):\n    \"\"\"Supported SCIM filter operators.\"\"\"\n\n    EQUAL = \"eq\"\n    CONTAINS = \"co\"\n    STARTS_WITH = \"sw\"\n\n\n@dataclass(frozen=True, slots=True)\nclass ScimFilter:\n    \"\"\"Parsed SCIM filter expression.\"\"\"\n\n    attribute: str\n    operator: ScimFilterOperator\n    value: str\n\n\n# Matches: attribute operator \"value\" (with or without quotes around value)\n# Groups: (attribute) (operator) (\"quoted value\" | unquoted_value)\n_FILTER_RE = re.compile(\n    r\"^(\\S+)\\s+(eq|co|sw)\\s+\"  # attribute + operator\n    r'(?:\"([^\"]*)\"'  # quoted value\n    r\"|'([^']*)')\"  # or single-quoted value\n    r\"$\",\n    re.IGNORECASE,\n)\n\n\ndef parse_scim_filter(filter_string: str | None) -> ScimFilter | None:\n    \"\"\"Parse a simple SCIM filter expression.\n\n    Args:\n        filter_string: Raw filter query parameter value, e.g.\n            ``'userName eq \"john@example.com\"'``\n\n    Returns:\n        A ``ScimFilter`` if the expression is valid and uses a supported\n        operator, or ``None`` if the input is empty / missing.\n\n    Raises:\n        ValueError: If the filter string is present but malformed or uses\n            an unsupported operator.\n    \"\"\"\n    if not filter_string or not filter_string.strip():\n        return None\n\n    match = _FILTER_RE.match(filter_string.strip())\n    if not match:\n        raise ValueError(f\"Unsupported or malformed SCIM filter: {filter_string}\")\n\n    return _build_filter(match, filter_string)\n\n\ndef _build_filter(match: re.Match[str], raw: str) -> ScimFilter:\n    \"\"\"Extract fields from a regex match and construct a ScimFilter.\"\"\"\n    attribute = match.group(1)\n    op_str = match.group(2).lower()\n    # Value is in group 3 (double-quoted) or group 4 (single-quoted)\n    value = match.group(3) if match.group(3) is not None else match.group(4)\n\n    if value is None:\n        raise ValueError(f\"Unsupported or malformed SCIM filter: {raw}\")\n\n    operator = ScimFilterOperator(op_str)\n\n    return ScimFilter(attribute=attribute, operator=operator, value=value)\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/models.py",
    "content": "\"\"\"Pydantic schemas for SCIM 2.0 provisioning (RFC 7643 / RFC 7644).\n\nSCIM protocol schemas follow the wire format defined in:\n  - Core Schema: https://datatracker.ietf.org/doc/html/rfc7643\n  - Protocol:    https://datatracker.ietf.org/doc/html/rfc7644\n\nAdmin API schemas are internal to Onyx and used for SCIM token management.\n\"\"\"\n\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom enum import Enum\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom pydantic import field_validator\n\n\n# ---------------------------------------------------------------------------\n# SCIM Schema URIs (RFC 7643 §8)\n# Every SCIM JSON payload includes a \"schemas\" array identifying its type.\n# IdPs like Okta/Azure AD use these URIs to determine how to parse responses.\n# ---------------------------------------------------------------------------\n\nSCIM_USER_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:User\"\nSCIM_GROUP_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:Group\"\nSCIM_LIST_RESPONSE_SCHEMA = \"urn:ietf:params:scim:api:messages:2.0:ListResponse\"\nSCIM_PATCH_OP_SCHEMA = \"urn:ietf:params:scim:api:messages:2.0:PatchOp\"\nSCIM_ERROR_SCHEMA = \"urn:ietf:params:scim:api:messages:2.0:Error\"\nSCIM_SERVICE_PROVIDER_CONFIG_SCHEMA = (\n    \"urn:ietf:params:scim:schemas:core:2.0:ServiceProviderConfig\"\n)\nSCIM_RESOURCE_TYPE_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:ResourceType\"\nSCIM_SCHEMA_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:Schema\"\nSCIM_ENTERPRISE_USER_SCHEMA = (\n    \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\"\n)\n\n\n# ---------------------------------------------------------------------------\n# SCIM Protocol Schemas\n# ---------------------------------------------------------------------------\n\n\nclass ScimName(BaseModel):\n    \"\"\"User name components (RFC 7643 §4.1.1).\"\"\"\n\n    givenName: str | None = None\n    familyName: str | None = None\n    formatted: str | None = None\n\n\nclass ScimEmail(BaseModel):\n    \"\"\"Email sub-attribute (RFC 7643 §4.1.2).\"\"\"\n\n    value: str\n    type: str | None = None\n    primary: bool = False\n\n\nclass ScimMeta(BaseModel):\n    \"\"\"Resource metadata (RFC 7643 §3.1).\"\"\"\n\n    resourceType: str | None = None\n    created: datetime | None = None\n    lastModified: datetime | None = None\n    location: str | None = None\n\n\nclass ScimUserGroupRef(BaseModel):\n    \"\"\"Group reference within a User resource (RFC 7643 §4.1.2, read-only).\"\"\"\n\n    value: str\n    display: str | None = None\n\n\nclass ScimManagerRef(BaseModel):\n    \"\"\"Manager sub-attribute for the enterprise extension (RFC 7643 §4.3).\"\"\"\n\n    value: str | None = None\n\n\nclass ScimEnterpriseExtension(BaseModel):\n    \"\"\"Enterprise User extension attributes (RFC 7643 §4.3).\"\"\"\n\n    department: str | None = None\n    manager: ScimManagerRef | None = None\n\n\n@dataclass\nclass ScimMappingFields:\n    \"\"\"Stored SCIM mapping fields that need to round-trip through the IdP.\n\n    Entra ID sends structured name components, email metadata, and enterprise\n    extension attributes that must be returned verbatim in subsequent GET\n    responses. These fields are persisted on ScimUserMapping and threaded\n    through the DAL, provider, and endpoint layers.\n    \"\"\"\n\n    department: str | None = None\n    manager: str | None = None\n    given_name: str | None = None\n    family_name: str | None = None\n    scim_emails_json: str | None = None\n\n\nclass ScimUserResource(BaseModel):\n    \"\"\"SCIM User resource representation (RFC 7643 §4.1).\n\n    This is the JSON shape that IdPs send when creating/updating a user via\n    SCIM, and the shape we return in GET responses. Field names use camelCase\n    to match the SCIM wire format (not Python convention).\n    \"\"\"\n\n    model_config = ConfigDict(populate_by_name=True)\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_USER_SCHEMA])\n    id: str | None = None  # Onyx's internal user ID, set on responses\n    externalId: str | None = None  # IdP's identifier for this user\n    userName: str  # Typically the user's email address\n    name: ScimName | None = None\n    displayName: str | None = None\n    emails: list[ScimEmail] = Field(default_factory=list)\n    active: bool = True\n    groups: list[ScimUserGroupRef] = Field(default_factory=list)\n    meta: ScimMeta | None = None\n    enterprise_extension: ScimEnterpriseExtension | None = Field(\n        default=None,\n        alias=\"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\",\n    )\n\n\nclass ScimGroupMember(BaseModel):\n    \"\"\"Group member reference (RFC 7643 §4.2).\n\n    Represents a user within a SCIM group. The IdP sends these when adding\n    or removing users from groups. ``value`` is the Onyx user ID.\n    \"\"\"\n\n    value: str  # User ID of the group member\n    display: str | None = None\n\n\nclass ScimGroupResource(BaseModel):\n    \"\"\"SCIM Group resource representation (RFC 7643 §4.2).\"\"\"\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_GROUP_SCHEMA])\n    id: str | None = None\n    externalId: str | None = None\n    displayName: str\n    members: list[ScimGroupMember] = Field(default_factory=list)\n    meta: ScimMeta | None = None\n\n\nclass ScimListResponse(BaseModel):\n    \"\"\"Paginated list response (RFC 7644 §3.4.2).\"\"\"\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_LIST_RESPONSE_SCHEMA])\n    totalResults: int\n    startIndex: int = 1\n    itemsPerPage: int = 100\n    Resources: list[ScimUserResource | ScimGroupResource] = Field(default_factory=list)\n\n\nclass ScimPatchOperationType(str, Enum):\n    \"\"\"Supported PATCH operations (RFC 7644 §3.5.2).\"\"\"\n\n    ADD = \"add\"\n    REPLACE = \"replace\"\n    REMOVE = \"remove\"\n\n\nclass ScimPatchResourceValue(BaseModel):\n    \"\"\"Partial resource dict for path-less PATCH replace operations.\n\n    When an IdP sends a PATCH without a ``path``, the ``value`` is a dict\n    of resource attributes to set.  IdPs may include read-only fields\n    (``id``, ``schemas``, ``meta``) alongside actual changes — these are\n    stripped by the provider's ``ignored_patch_paths`` before processing.\n\n    ``extra=\"allow\"`` lets unknown attributes pass through so the patch\n    handler can decide what to do with them (ignore or reject).\n    \"\"\"\n\n    model_config = ConfigDict(extra=\"allow\")\n\n    active: bool | None = None\n    userName: str | None = None\n    displayName: str | None = None\n    externalId: str | None = None\n    name: ScimName | None = None\n    members: list[ScimGroupMember] | None = None\n    id: str | None = None\n    schemas: list[str] | None = None\n    meta: ScimMeta | None = None\n\n\nScimPatchValue = str | bool | list[ScimGroupMember] | ScimPatchResourceValue | None\n\n\nclass ScimPatchOperation(BaseModel):\n    \"\"\"Single PATCH operation (RFC 7644 §3.5.2).\"\"\"\n\n    op: ScimPatchOperationType\n    path: str | None = None\n    value: ScimPatchValue = None\n\n    @field_validator(\"op\", mode=\"before\")\n    @classmethod\n    def normalize_operation(cls, v: object) -> object:\n        \"\"\"Normalize op to lowercase for case-insensitive matching.\n\n        Some IdPs (e.g. Entra ID) send capitalized ops like ``\"Replace\"``\n        instead of ``\"replace\"``. This is safe for all providers since the\n        enum values are lowercase. If a future provider requires other\n        pre-processing quirks, move patch deserialization into the provider\n        subclass instead of adding more special cases here.\n        \"\"\"\n        return v.lower() if isinstance(v, str) else v\n\n\nclass ScimPatchRequest(BaseModel):\n    \"\"\"PATCH request body (RFC 7644 §3.5.2).\n\n    IdPs use PATCH to make incremental changes — e.g. deactivating a user\n    (replace active=false) or adding/removing group members — instead of\n    replacing the entire resource with PUT.\n    \"\"\"\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_PATCH_OP_SCHEMA])\n    Operations: list[ScimPatchOperation]\n\n\nclass ScimError(BaseModel):\n    \"\"\"SCIM error response (RFC 7644 §3.12).\"\"\"\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_ERROR_SCHEMA])\n    status: str\n    detail: str | None = None\n    scimType: str | None = None\n\n\n# ---------------------------------------------------------------------------\n# Service Provider Configuration (RFC 7643 §5)\n# ---------------------------------------------------------------------------\n\n\nclass ScimSupported(BaseModel):\n    \"\"\"Generic supported/not-supported flag used in ServiceProviderConfig.\"\"\"\n\n    supported: bool\n\n\nclass ScimFilterConfig(BaseModel):\n    \"\"\"Filter configuration within ServiceProviderConfig (RFC 7643 §5).\"\"\"\n\n    supported: bool\n    maxResults: int = 100\n\n\nclass ScimServiceProviderConfig(BaseModel):\n    \"\"\"SCIM ServiceProviderConfig resource (RFC 7643 §5).\n\n    Served at GET /scim/v2/ServiceProviderConfig. IdPs fetch this during\n    initial setup to discover which SCIM features our server supports\n    (e.g. PATCH yes, bulk no, filtering yes).\n    \"\"\"\n\n    schemas: list[str] = Field(\n        default_factory=lambda: [SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA]\n    )\n    patch: ScimSupported = ScimSupported(supported=True)\n    bulk: ScimSupported = ScimSupported(supported=False)\n    filter: ScimFilterConfig = ScimFilterConfig(supported=True)\n    changePassword: ScimSupported = ScimSupported(supported=False)\n    sort: ScimSupported = ScimSupported(supported=False)\n    etag: ScimSupported = ScimSupported(supported=False)\n    authenticationSchemes: list[dict[str, str]] = Field(\n        default_factory=lambda: [\n            {\n                \"type\": \"oauthbearertoken\",\n                \"name\": \"OAuth Bearer Token\",\n                \"description\": \"Authentication scheme using a SCIM bearer token\",\n            }\n        ]\n    )\n\n\nclass ScimSchemaAttribute(BaseModel):\n    \"\"\"Attribute definition within a SCIM Schema (RFC 7643 §7).\"\"\"\n\n    name: str\n    type: str\n    multiValued: bool = False\n    required: bool = False\n    description: str = \"\"\n    caseExact: bool = False\n    mutability: str = \"readWrite\"\n    returned: str = \"default\"\n    uniqueness: str = \"none\"\n    subAttributes: list[\"ScimSchemaAttribute\"] = Field(default_factory=list)\n\n\nclass ScimSchemaDefinition(BaseModel):\n    \"\"\"SCIM Schema definition (RFC 7643 §7).\n\n    Served at GET /scim/v2/Schemas. Describes the attributes available\n    on each resource type so IdPs know which fields they can provision.\n    \"\"\"\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_SCHEMA_SCHEMA])\n    id: str\n    name: str\n    description: str\n    attributes: list[ScimSchemaAttribute] = Field(default_factory=list)\n\n\nclass ScimSchemaExtension(BaseModel):\n    \"\"\"Schema extension reference within ResourceType (RFC 7643 §6).\"\"\"\n\n    model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True)\n\n    schema_: str = Field(alias=\"schema\")\n    required: bool\n\n\nclass ScimResourceType(BaseModel):\n    \"\"\"SCIM ResourceType resource (RFC 7643 §6).\n\n    Served at GET /scim/v2/ResourceTypes. Tells the IdP which resource\n    types are available (Users, Groups) and their respective endpoints.\n    \"\"\"\n\n    model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True)\n\n    schemas: list[str] = Field(default_factory=lambda: [SCIM_RESOURCE_TYPE_SCHEMA])\n    id: str\n    name: str\n    endpoint: str\n    description: str | None = None\n    schema_: str = Field(alias=\"schema\")\n    schemaExtensions: list[ScimSchemaExtension] = Field(default_factory=list)\n\n\n# ---------------------------------------------------------------------------\n# Admin API Schemas (Onyx-internal, for SCIM token management)\n# These are NOT part of the SCIM protocol. They power the Onyx admin UI\n# where admins create/revoke the bearer tokens that IdPs use to authenticate.\n# ---------------------------------------------------------------------------\n\n\nclass ScimTokenCreate(BaseModel):\n    \"\"\"Request to create a new SCIM bearer token.\"\"\"\n\n    name: str\n\n\nclass ScimTokenResponse(BaseModel):\n    \"\"\"SCIM token metadata returned in list/get responses.\"\"\"\n\n    id: int\n    name: str\n    token_display: str\n    is_active: bool\n    created_at: datetime\n    last_used_at: datetime | None = None\n    idp_domain: str | None = None\n\n\nclass ScimTokenCreatedResponse(ScimTokenResponse):\n    \"\"\"Response returned when a new SCIM token is created.\n\n    Includes the raw token value which is only available at creation time.\n    \"\"\"\n\n    raw_token: str\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/patch.py",
    "content": "\"\"\"SCIM PATCH operation handler (RFC 7644 §3.5.2).\n\nIdentity providers use PATCH to make incremental changes to SCIM resources\ninstead of replacing the entire resource with PUT. Common operations include:\n\n  - Deactivating a user: ``replace`` ``active`` with ``false``\n  - Adding group members: ``add`` to ``members``\n  - Removing group members: ``remove`` from ``members[value eq \"...\"]``\n\nThis module applies PATCH operations to Pydantic SCIM resource objects and\nreturns the modified result. It does NOT touch the database — the caller is\nresponsible for persisting changes.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nimport re\nfrom dataclasses import dataclass\nfrom dataclasses import field\nfrom typing import Any\n\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimPatchOperation\nfrom ee.onyx.server.scim.models import ScimPatchOperationType\nfrom ee.onyx.server.scim.models import ScimPatchResourceValue\nfrom ee.onyx.server.scim.models import ScimPatchValue\nfrom ee.onyx.server.scim.models import ScimUserResource\n\nlogger = logging.getLogger(__name__)\n\n# Lowercased enterprise extension URN for case-insensitive matching\n_ENTERPRISE_URN_LOWER = SCIM_ENTERPRISE_USER_SCHEMA.lower()\n\n# Pattern for email filter paths, e.g.:\n#   emails[primary eq true].value  (Okta)\n#   emails[type eq \"work\"].value   (Azure AD / Entra ID)\n_EMAIL_FILTER_RE = re.compile(\n    r\"^emails\\[.+\\]\\.value$\",\n    re.IGNORECASE,\n)\n\n# Pattern for member removal path: members[value eq \"user-id\"]\n_MEMBER_FILTER_RE = re.compile(\n    r'^members\\[value\\s+eq\\s+\"([^\"]+)\"\\]$',\n    re.IGNORECASE,\n)\n\n# ---------------------------------------------------------------------------\n# Dispatch tables for user PATCH paths\n#\n# Maps lowercased SCIM path → (camelCase key, target dict name).\n# \"data\" writes to the top-level resource dict, \"name\" writes to the\n# name sub-object dict. This replaces the elif chains for simple fields.\n# ---------------------------------------------------------------------------\n\n_USER_REPLACE_PATHS: dict[str, tuple[str, str]] = {\n    \"active\": (\"active\", \"data\"),\n    \"username\": (\"userName\", \"data\"),\n    \"externalid\": (\"externalId\", \"data\"),\n    \"name.givenname\": (\"givenName\", \"name\"),\n    \"name.familyname\": (\"familyName\", \"name\"),\n    \"name.formatted\": (\"formatted\", \"name\"),\n}\n\n_USER_REMOVE_PATHS: dict[str, tuple[str, str]] = {\n    \"externalid\": (\"externalId\", \"data\"),\n    \"name.givenname\": (\"givenName\", \"name\"),\n    \"name.familyname\": (\"familyName\", \"name\"),\n    \"name.formatted\": (\"formatted\", \"name\"),\n    \"displayname\": (\"displayName\", \"data\"),\n}\n\n_GROUP_REPLACE_PATHS: dict[str, tuple[str, str]] = {\n    \"displayname\": (\"displayName\", \"data\"),\n    \"externalid\": (\"externalId\", \"data\"),\n}\n\n\nclass ScimPatchError(Exception):\n    \"\"\"Raised when a PATCH operation cannot be applied.\"\"\"\n\n    def __init__(self, detail: str, status: int = 400) -> None:\n        self.detail = detail\n        self.status = status\n        super().__init__(detail)\n\n\n@dataclass\nclass _UserPatchCtx:\n    \"\"\"Bundles the mutable state for user PATCH operations.\"\"\"\n\n    data: dict[str, Any]\n    name_data: dict[str, Any]\n    ent_data: dict[str, str | None] = field(default_factory=dict)\n\n\n# ---------------------------------------------------------------------------\n# User PATCH\n# ---------------------------------------------------------------------------\n\n\ndef apply_user_patch(\n    operations: list[ScimPatchOperation],\n    current: ScimUserResource,\n    ignored_paths: frozenset[str] = frozenset(),\n) -> tuple[ScimUserResource, dict[str, str | None]]:\n    \"\"\"Apply SCIM PATCH operations to a user resource.\n\n    Args:\n        operations: The PATCH operations to apply.\n        current: The current user resource state.\n        ignored_paths: SCIM attribute paths to silently skip (from provider).\n\n    Returns:\n        A tuple of (modified user resource, enterprise extension data dict).\n        The enterprise dict has keys ``\"department\"`` and ``\"manager\"``\n        with values set only when a PATCH operation touched them.\n\n    Raises:\n        ScimPatchError: If an operation targets an unsupported path.\n    \"\"\"\n    data = current.model_dump()\n    ctx = _UserPatchCtx(data=data, name_data=data.get(\"name\") or {})\n\n    for op in operations:\n        if op.op in (ScimPatchOperationType.REPLACE, ScimPatchOperationType.ADD):\n            _apply_user_replace(op, ctx, ignored_paths)\n        elif op.op == ScimPatchOperationType.REMOVE:\n            _apply_user_remove(op, ctx, ignored_paths)\n        else:\n            raise ScimPatchError(\n                f\"Unsupported operation '{op.op.value}' on User resource\"\n            )\n\n    ctx.data[\"name\"] = ctx.name_data\n    return ScimUserResource.model_validate(ctx.data), ctx.ent_data\n\n\ndef _apply_user_replace(\n    op: ScimPatchOperation,\n    ctx: _UserPatchCtx,\n    ignored_paths: frozenset[str],\n) -> None:\n    \"\"\"Apply a replace/add operation to user data.\"\"\"\n    path = (op.path or \"\").lower()\n\n    if not path:\n        # No path — value is a resource dict of top-level attributes to set.\n        if isinstance(op.value, ScimPatchResourceValue):\n            for key, val in op.value.model_dump(exclude_unset=True).items():\n                _set_user_field(key.lower(), val, ctx, ignored_paths, strict=False)\n        else:\n            raise ScimPatchError(\"Replace without path requires a dict value\")\n        return\n\n    _set_user_field(path, op.value, ctx, ignored_paths)\n\n\ndef _apply_user_remove(\n    op: ScimPatchOperation,\n    ctx: _UserPatchCtx,\n    ignored_paths: frozenset[str],\n) -> None:\n    \"\"\"Apply a remove operation to user data — clears the target field.\"\"\"\n    path = (op.path or \"\").lower()\n    if not path:\n        raise ScimPatchError(\"Remove operation requires a path\")\n\n    if path in ignored_paths:\n        return\n\n    entry = _USER_REMOVE_PATHS.get(path)\n    if entry:\n        key, target = entry\n        target_dict = ctx.data if target == \"data\" else ctx.name_data\n        target_dict[key] = None\n        return\n\n    raise ScimPatchError(f\"Unsupported remove path '{path}' for User PATCH\")\n\n\ndef _set_user_field(\n    path: str,\n    value: ScimPatchValue,\n    ctx: _UserPatchCtx,\n    ignored_paths: frozenset[str],\n    *,\n    strict: bool = True,\n) -> None:\n    \"\"\"Set a single field on user data by SCIM path.\n\n    Args:\n        strict: When ``False`` (path-less replace), unknown attributes are\n            silently skipped.  When ``True`` (explicit path), they raise.\n    \"\"\"\n    if path in ignored_paths:\n        return\n\n    # Simple field writes handled by the dispatch table\n    entry = _USER_REPLACE_PATHS.get(path)\n    if entry:\n        key, target = entry\n        target_dict = ctx.data if target == \"data\" else ctx.name_data\n        target_dict[key] = value\n        return\n\n    # displayName sets both the top-level field and the name.formatted sub-field\n    if path == \"displayname\":\n        ctx.data[\"displayName\"] = value\n        ctx.name_data[\"formatted\"] = value\n    elif path == \"name\":\n        if isinstance(value, dict):\n            for k, v in value.items():\n                ctx.name_data[k] = v\n    elif path == \"emails\":\n        if isinstance(value, list):\n            ctx.data[\"emails\"] = value\n    elif _EMAIL_FILTER_RE.match(path):\n        _update_primary_email(ctx.data, value)\n    elif path.startswith(_ENTERPRISE_URN_LOWER):\n        _set_enterprise_field(path, value, ctx.ent_data)\n    elif not strict:\n        return\n    else:\n        raise ScimPatchError(f\"Unsupported path '{path}' for User PATCH\")\n\n\ndef _update_primary_email(data: dict[str, Any], value: ScimPatchValue) -> None:\n    \"\"\"Update the primary email entry via an email filter path.\"\"\"\n    emails: list[dict] = data.get(\"emails\") or []\n    for email_entry in emails:\n        if email_entry.get(\"primary\"):\n            email_entry[\"value\"] = value\n            break\n    else:\n        emails.append({\"value\": value, \"type\": \"work\", \"primary\": True})\n    data[\"emails\"] = emails\n\n\ndef _to_dict(value: ScimPatchValue) -> dict | None:\n    \"\"\"Coerce a SCIM patch value to a plain dict if possible.\n\n    Pydantic may parse raw dicts as ``ScimPatchResourceValue`` (which uses\n    ``extra=\"allow\"``), so we also dump those back to a dict.\n    \"\"\"\n    if isinstance(value, dict):\n        return value\n    if isinstance(value, ScimPatchResourceValue):\n        return value.model_dump(exclude_unset=True)\n    return None\n\n\ndef _set_enterprise_field(\n    path: str,\n    value: ScimPatchValue,\n    ent_data: dict[str, str | None],\n) -> None:\n    \"\"\"Handle enterprise extension URN paths or value dicts.\"\"\"\n    # Full URN as key with dict value (path-less PATCH)\n    # e.g. key=\"urn:...:user\", value={\"department\": \"Eng\", \"manager\": {...}}\n    if path == _ENTERPRISE_URN_LOWER:\n        d = _to_dict(value)\n        if d is not None:\n            if \"department\" in d:\n                ent_data[\"department\"] = d[\"department\"]\n            if \"manager\" in d:\n                mgr = d[\"manager\"]\n                if isinstance(mgr, dict):\n                    ent_data[\"manager\"] = mgr.get(\"value\")\n        return\n\n    # Dotted URN path, e.g. \"urn:...:user:department\"\n    suffix = path[len(_ENTERPRISE_URN_LOWER) :].lstrip(\":\").lower()\n    if suffix == \"department\":\n        ent_data[\"department\"] = str(value) if value is not None else None\n    elif suffix == \"manager\":\n        d = _to_dict(value)\n        if d is not None:\n            ent_data[\"manager\"] = d.get(\"value\")\n        elif isinstance(value, str):\n            ent_data[\"manager\"] = value\n    else:\n        # Unknown enterprise attributes are silently ignored rather than\n        # rejected — IdPs may send attributes we don't model yet.\n        logger.warning(\"Ignoring unknown enterprise extension attribute '%s'\", suffix)\n\n\n# ---------------------------------------------------------------------------\n# Group PATCH\n# ---------------------------------------------------------------------------\n\n\ndef apply_group_patch(\n    operations: list[ScimPatchOperation],\n    current: ScimGroupResource,\n    ignored_paths: frozenset[str] = frozenset(),\n) -> tuple[ScimGroupResource, list[str], list[str]]:\n    \"\"\"Apply SCIM PATCH operations to a group resource.\n\n    Args:\n        operations: The PATCH operations to apply.\n        current: The current group resource state.\n        ignored_paths: SCIM attribute paths to silently skip (from provider).\n\n    Returns:\n        A tuple of (modified group, added member IDs, removed member IDs).\n        The caller uses the member ID lists to update the database.\n\n    Raises:\n        ScimPatchError: If an operation targets an unsupported path.\n    \"\"\"\n    data = current.model_dump()\n    current_members: list[dict] = list(data.get(\"members\") or [])\n    added_ids: list[str] = []\n    removed_ids: list[str] = []\n\n    for op in operations:\n        if op.op == ScimPatchOperationType.REPLACE:\n            _apply_group_replace(\n                op, data, current_members, added_ids, removed_ids, ignored_paths\n            )\n        elif op.op == ScimPatchOperationType.ADD:\n            _apply_group_add(op, current_members, added_ids)\n        elif op.op == ScimPatchOperationType.REMOVE:\n            _apply_group_remove(op, current_members, removed_ids)\n        else:\n            raise ScimPatchError(\n                f\"Unsupported operation '{op.op.value}' on Group resource\"\n            )\n\n    data[\"members\"] = current_members\n    group = ScimGroupResource.model_validate(data)\n    return group, added_ids, removed_ids\n\n\ndef _apply_group_replace(\n    op: ScimPatchOperation,\n    data: dict,\n    current_members: list[dict],\n    added_ids: list[str],\n    removed_ids: list[str],\n    ignored_paths: frozenset[str],\n) -> None:\n    \"\"\"Apply a replace operation to group data.\"\"\"\n    path = (op.path or \"\").lower()\n\n    if not path:\n        if isinstance(op.value, ScimPatchResourceValue):\n            dumped = op.value.model_dump(exclude_unset=True)\n            for key, val in dumped.items():\n                if key.lower() == \"members\":\n                    _replace_members(val, current_members, added_ids, removed_ids)\n                else:\n                    _set_group_field(key.lower(), val, data, ignored_paths)\n        else:\n            raise ScimPatchError(\"Replace without path requires a dict value\")\n        return\n\n    if path == \"members\":\n        _replace_members(\n            _members_to_dicts(op.value), current_members, added_ids, removed_ids\n        )\n        return\n\n    _set_group_field(path, op.value, data, ignored_paths)\n\n\ndef _members_to_dicts(\n    value: str | bool | list[ScimGroupMember] | ScimPatchResourceValue | None,\n) -> list[dict]:\n    \"\"\"Convert a member list value to a list of dicts for internal processing.\"\"\"\n    if not isinstance(value, list):\n        raise ScimPatchError(\"Replace members requires a list value\")\n    return [m.model_dump(exclude_none=True) for m in value]\n\n\ndef _replace_members(\n    value: list[dict],\n    current_members: list[dict],\n    added_ids: list[str],\n    removed_ids: list[str],\n) -> None:\n    \"\"\"Replace the entire group member list.\"\"\"\n    old_ids = {m[\"value\"] for m in current_members}\n    new_ids = {m.get(\"value\", \"\") for m in value}\n\n    removed_ids.extend(old_ids - new_ids)\n    added_ids.extend(new_ids - old_ids)\n\n    current_members[:] = value\n\n\ndef _set_group_field(\n    path: str,\n    value: ScimPatchValue,\n    data: dict,\n    ignored_paths: frozenset[str],\n) -> None:\n    \"\"\"Set a single field on group data by SCIM path.\"\"\"\n    if path in ignored_paths:\n        return\n\n    entry = _GROUP_REPLACE_PATHS.get(path)\n    if entry:\n        key, _ = entry\n        data[key] = value\n        return\n\n    raise ScimPatchError(f\"Unsupported path '{path}' for Group PATCH\")\n\n\ndef _apply_group_add(\n    op: ScimPatchOperation,\n    members: list[dict],\n    added_ids: list[str],\n) -> None:\n    \"\"\"Add members to a group.\"\"\"\n    path = (op.path or \"\").lower()\n\n    if path and path != \"members\":\n        raise ScimPatchError(f\"Unsupported add path '{op.path}' for Group\")\n\n    if not isinstance(op.value, list):\n        raise ScimPatchError(\"Add members requires a list value\")\n\n    member_dicts = [m.model_dump(exclude_none=True) for m in op.value]\n\n    existing_ids = {m[\"value\"] for m in members}\n    for member_data in member_dicts:\n        member_id = member_data.get(\"value\", \"\")\n        if member_id and member_id not in existing_ids:\n            members.append(member_data)\n            added_ids.append(member_id)\n            existing_ids.add(member_id)\n\n\ndef _apply_group_remove(\n    op: ScimPatchOperation,\n    members: list[dict],\n    removed_ids: list[str],\n) -> None:\n    \"\"\"Remove members from a group.\"\"\"\n    if not op.path:\n        raise ScimPatchError(\"Remove operation requires a path\")\n\n    match = _MEMBER_FILTER_RE.match(op.path)\n    if not match:\n        raise ScimPatchError(\n            f\"Unsupported remove path '{op.path}'. Expected: members[value eq \\\"user-id\\\"]\"\n        )\n\n    target_id = match.group(1)\n    original_len = len(members)\n    members[:] = [m for m in members if m.get(\"value\") != target_id]\n\n    if len(members) < original_len:\n        removed_ids.append(target_id)\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/providers/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/scim/providers/base.py",
    "content": "\"\"\"Base SCIM provider abstraction.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom uuid import UUID\n\nfrom pydantic import ValidationError\n\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_USER_SCHEMA\nfrom ee.onyx.server.scim.models import ScimEmail\nfrom ee.onyx.server.scim.models import ScimEnterpriseExtension\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimManagerRef\nfrom ee.onyx.server.scim.models import ScimMappingFields\nfrom ee.onyx.server.scim.models import ScimMeta\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimUserGroupRef\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom onyx.db.models import User\nfrom onyx.db.models import UserGroup\n\n\nlogger = logging.getLogger(__name__)\n\nCOMMON_IGNORED_PATCH_PATHS: frozenset[str] = frozenset(\n    {\n        \"id\",\n        \"schemas\",\n        \"meta\",\n    }\n)\n\n\nclass ScimProvider(ABC):\n    \"\"\"Base class for provider-specific SCIM behavior.\n\n    Subclass this to handle IdP-specific quirks. The base class provides\n    RFC 7643-compliant response builders that populate all standard fields.\n    \"\"\"\n\n    @property\n    @abstractmethod\n    def name(self) -> str:\n        \"\"\"Short identifier for this provider (e.g. ``\"okta\"``).\"\"\"\n        ...\n\n    @property\n    @abstractmethod\n    def ignored_patch_paths(self) -> frozenset[str]:\n        \"\"\"SCIM attribute paths to silently skip in PATCH value-object dicts.\n\n        IdPs may include read-only or meta fields alongside actual changes\n        (e.g. Okta sends ``{\"id\": \"...\", \"active\": false}``). Paths listed\n        here are silently dropped instead of raising an error.\n        \"\"\"\n        ...\n\n    @property\n    def user_schemas(self) -> list[str]:\n        \"\"\"Schema URIs to include in User resource responses.\n\n        Override in subclasses to advertise additional schemas (e.g. the\n        enterprise extension for Entra ID).\n        \"\"\"\n        return [SCIM_USER_SCHEMA]\n\n    def build_user_resource(\n        self,\n        user: User,\n        external_id: str | None = None,\n        groups: list[tuple[int, str]] | None = None,\n        scim_username: str | None = None,\n        fields: ScimMappingFields | None = None,\n    ) -> ScimUserResource:\n        \"\"\"Build a SCIM User response from an Onyx User.\n\n        Args:\n            user: The Onyx user model.\n            external_id: The IdP's external identifier for this user.\n            groups: List of ``(group_id, group_name)`` tuples for the\n                ``groups`` read-only attribute. Pass ``None`` or ``[]``\n                for newly-created users.\n            scim_username: The original-case userName from the IdP. Falls\n                back to ``user.email`` (lowercase) when not available.\n            fields: Stored mapping fields that the IdP expects round-tripped.\n        \"\"\"\n        f = fields or ScimMappingFields()\n        group_refs = [\n            ScimUserGroupRef(value=str(gid), display=gname)\n            for gid, gname in (groups or [])\n        ]\n\n        username = scim_username or user.email\n\n        # Build enterprise extension when at least one value is present.\n        # Dynamically add the enterprise URN to schemas per RFC 7643 §3.0.\n        enterprise_ext: ScimEnterpriseExtension | None = None\n        schemas = list(self.user_schemas)\n        if f.department is not None or f.manager is not None:\n            manager_ref = (\n                ScimManagerRef(value=f.manager) if f.manager is not None else None\n            )\n            enterprise_ext = ScimEnterpriseExtension(\n                department=f.department,\n                manager=manager_ref,\n            )\n            if SCIM_ENTERPRISE_USER_SCHEMA not in schemas:\n                schemas.append(SCIM_ENTERPRISE_USER_SCHEMA)\n\n        name = self.build_scim_name(user, f)\n        emails = _deserialize_emails(f.scim_emails_json, username)\n\n        resource = ScimUserResource(\n            schemas=schemas,\n            id=str(user.id),\n            externalId=external_id,\n            userName=username,\n            name=name,\n            displayName=user.personal_name,\n            emails=emails,\n            active=user.is_active,\n            groups=group_refs,\n            meta=ScimMeta(resourceType=\"User\"),\n        )\n        resource.enterprise_extension = enterprise_ext\n        return resource\n\n    def build_group_resource(\n        self,\n        group: UserGroup,\n        members: list[tuple[UUID, str | None]],\n        external_id: str | None = None,\n    ) -> ScimGroupResource:\n        \"\"\"Build a SCIM Group response from an Onyx UserGroup.\"\"\"\n        scim_members = [\n            ScimGroupMember(value=str(uid), display=email) for uid, email in members\n        ]\n        return ScimGroupResource(\n            id=str(group.id),\n            externalId=external_id,\n            displayName=group.name,\n            members=scim_members,\n            meta=ScimMeta(resourceType=\"Group\"),\n        )\n\n    def build_scim_name(\n        self,\n        user: User,\n        fields: ScimMappingFields,\n    ) -> ScimName:\n        \"\"\"Build SCIM name components for the response.\n\n        Round-trips stored ``given_name``/``family_name`` when available (so\n        the IdP gets back what it sent). Falls back to splitting\n        ``personal_name`` for users provisioned before we stored components.\n        Always returns a ScimName — Okta's spec tests expect ``name``\n        (with ``givenName``/``familyName``) on every user resource.\n        Providers may override for custom behavior.\n        \"\"\"\n        if fields.given_name is not None or fields.family_name is not None:\n            return ScimName(\n                givenName=fields.given_name or \"\",\n                familyName=fields.family_name or \"\",\n                formatted=user.personal_name or \"\",\n            )\n        if not user.personal_name:\n            # Derive a reasonable name from the email so that SCIM spec tests\n            # see non-empty givenName / familyName for every user resource.\n            local = user.email.split(\"@\")[0] if user.email else \"\"\n            return ScimName(givenName=local, familyName=\"\", formatted=local)\n        parts = user.personal_name.split(\" \", 1)\n        return ScimName(\n            givenName=parts[0],\n            familyName=parts[1] if len(parts) > 1 else \"\",\n            formatted=user.personal_name,\n        )\n\n\ndef _deserialize_emails(stored_json: str | None, username: str) -> list[ScimEmail]:\n    \"\"\"Deserialize stored email entries or build a default work email.\"\"\"\n    if stored_json:\n        try:\n            entries = json.loads(stored_json)\n            if isinstance(entries, list) and entries:\n                return [ScimEmail(**e) for e in entries]\n        except (json.JSONDecodeError, TypeError, ValidationError):\n            logger.warning(\n                \"Corrupt scim_emails_json, falling back to default: %s\", stored_json\n            )\n    return [ScimEmail(value=username, type=\"work\", primary=True)]\n\n\ndef serialize_emails(emails: list[ScimEmail]) -> str | None:\n    \"\"\"Serialize SCIM email entries to JSON for storage.\"\"\"\n    if not emails:\n        return None\n    return json.dumps([e.model_dump(exclude_none=True) for e in emails])\n\n\ndef get_default_provider() -> ScimProvider:\n    \"\"\"Return the default SCIM provider.\n\n    Currently returns ``OktaProvider`` since Okta is the primary supported\n    IdP. When provider detection is added (via token metadata or tenant\n    config), this can be replaced with dynamic resolution.\n    \"\"\"\n    from ee.onyx.server.scim.providers.okta import OktaProvider\n\n    return OktaProvider()\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/providers/entra.py",
    "content": "\"\"\"Entra ID (Azure AD) SCIM provider.\"\"\"\n\nfrom __future__ import annotations\n\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_USER_SCHEMA\nfrom ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS\nfrom ee.onyx.server.scim.providers.base import ScimProvider\n\n_ENTRA_IGNORED_PATCH_PATHS = COMMON_IGNORED_PATCH_PATHS\n\n\nclass EntraProvider(ScimProvider):\n    \"\"\"Entra ID (Azure AD) SCIM provider.\n\n    Entra behavioral notes:\n      - Sends capitalized PATCH ops (``\"Add\"``, ``\"Replace\"``, ``\"Remove\"``)\n        — handled by ``ScimPatchOperation.normalize_op`` validator.\n      - Sends the enterprise extension URN as a key in path-less PATCH value\n        dicts — handled by ``_set_enterprise_field`` in ``patch.py`` to\n        store department/manager values.\n      - Expects the enterprise extension schema in ``schemas`` arrays and\n        ``/Schemas`` + ``/ResourceTypes`` discovery endpoints.\n    \"\"\"\n\n    @property\n    def name(self) -> str:\n        return \"entra\"\n\n    @property\n    def ignored_patch_paths(self) -> frozenset[str]:\n        return _ENTRA_IGNORED_PATCH_PATHS\n\n    @property\n    def user_schemas(self) -> list[str]:\n        return [SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA]\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/providers/okta.py",
    "content": "\"\"\"Okta SCIM provider.\"\"\"\n\nfrom __future__ import annotations\n\nfrom ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS\nfrom ee.onyx.server.scim.providers.base import ScimProvider\n\n\nclass OktaProvider(ScimProvider):\n    \"\"\"Okta SCIM provider.\n\n    Okta behavioral notes:\n      - Uses ``PATCH {\"active\": false}`` for deprovisioning (not DELETE)\n      - Sends path-less PATCH with value dicts containing extra fields\n        (``id``, ``schemas``)\n      - Expects ``displayName`` and ``groups`` in user responses\n      - Only uses ``eq`` operator for ``userName`` filter\n    \"\"\"\n\n    @property\n    def name(self) -> str:\n        return \"okta\"\n\n    @property\n    def ignored_patch_paths(self) -> frozenset[str]:\n        return COMMON_IGNORED_PATCH_PATHS\n"
  },
  {
    "path": "backend/ee/onyx/server/scim/schema_definitions.py",
    "content": "\"\"\"Static SCIM service discovery responses (RFC 7643 §5, §6, §7).\n\nPre-built at import time — these never change at runtime. Separated from\napi.py to keep the endpoint module focused on request handling.\n\"\"\"\n\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_GROUP_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_USER_SCHEMA\nfrom ee.onyx.server.scim.models import ScimResourceType\nfrom ee.onyx.server.scim.models import ScimSchemaAttribute\nfrom ee.onyx.server.scim.models import ScimSchemaDefinition\nfrom ee.onyx.server.scim.models import ScimServiceProviderConfig\n\nSERVICE_PROVIDER_CONFIG = ScimServiceProviderConfig()\n\nUSER_RESOURCE_TYPE = ScimResourceType.model_validate(\n    {\n        \"id\": \"User\",\n        \"name\": \"User\",\n        \"endpoint\": \"/scim/v2/Users\",\n        \"description\": \"SCIM User resource\",\n        \"schema\": SCIM_USER_SCHEMA,\n        \"schemaExtensions\": [\n            {\"schema\": SCIM_ENTERPRISE_USER_SCHEMA, \"required\": False}\n        ],\n    }\n)\n\nGROUP_RESOURCE_TYPE = ScimResourceType.model_validate(\n    {\n        \"id\": \"Group\",\n        \"name\": \"Group\",\n        \"endpoint\": \"/scim/v2/Groups\",\n        \"description\": \"SCIM Group resource\",\n        \"schema\": SCIM_GROUP_SCHEMA,\n    }\n)\n\nUSER_SCHEMA_DEF = ScimSchemaDefinition(\n    id=SCIM_USER_SCHEMA,\n    name=\"User\",\n    description=\"SCIM core User schema\",\n    attributes=[\n        ScimSchemaAttribute(\n            name=\"userName\",\n            type=\"string\",\n            required=True,\n            uniqueness=\"server\",\n            description=\"Unique identifier for the user, typically an email address.\",\n        ),\n        ScimSchemaAttribute(\n            name=\"name\",\n            type=\"complex\",\n            description=\"The components of the user's name.\",\n            subAttributes=[\n                ScimSchemaAttribute(\n                    name=\"givenName\",\n                    type=\"string\",\n                    description=\"The user's first name.\",\n                ),\n                ScimSchemaAttribute(\n                    name=\"familyName\",\n                    type=\"string\",\n                    description=\"The user's last name.\",\n                ),\n                ScimSchemaAttribute(\n                    name=\"formatted\",\n                    type=\"string\",\n                    description=\"The full name, including all middle names and titles.\",\n                ),\n            ],\n        ),\n        ScimSchemaAttribute(\n            name=\"emails\",\n            type=\"complex\",\n            multiValued=True,\n            description=\"Email addresses for the user.\",\n            subAttributes=[\n                ScimSchemaAttribute(\n                    name=\"value\",\n                    type=\"string\",\n                    description=\"Email address value.\",\n                ),\n                ScimSchemaAttribute(\n                    name=\"type\",\n                    type=\"string\",\n                    description=\"Label for this email (e.g. 'work').\",\n                ),\n                ScimSchemaAttribute(\n                    name=\"primary\",\n                    type=\"boolean\",\n                    description=\"Whether this is the primary email.\",\n                ),\n            ],\n        ),\n        ScimSchemaAttribute(\n            name=\"active\",\n            type=\"boolean\",\n            description=\"Whether the user account is active.\",\n        ),\n        ScimSchemaAttribute(\n            name=\"externalId\",\n            type=\"string\",\n            description=\"Identifier from the provisioning client (IdP).\",\n            caseExact=True,\n        ),\n    ],\n)\n\nENTERPRISE_USER_SCHEMA_DEF = ScimSchemaDefinition(\n    id=SCIM_ENTERPRISE_USER_SCHEMA,\n    name=\"EnterpriseUser\",\n    description=\"Enterprise User extension (RFC 7643 §4.3)\",\n    attributes=[\n        ScimSchemaAttribute(\n            name=\"department\",\n            type=\"string\",\n            description=\"Department.\",\n        ),\n        ScimSchemaAttribute(\n            name=\"manager\",\n            type=\"complex\",\n            description=\"The user's manager.\",\n            subAttributes=[\n                ScimSchemaAttribute(\n                    name=\"value\",\n                    type=\"string\",\n                    description=\"Manager user ID.\",\n                ),\n            ],\n        ),\n    ],\n)\n\nGROUP_SCHEMA_DEF = ScimSchemaDefinition(\n    id=SCIM_GROUP_SCHEMA,\n    name=\"Group\",\n    description=\"SCIM core Group schema\",\n    attributes=[\n        ScimSchemaAttribute(\n            name=\"displayName\",\n            type=\"string\",\n            required=True,\n            description=\"Human-readable name for the group.\",\n        ),\n        ScimSchemaAttribute(\n            name=\"members\",\n            type=\"complex\",\n            multiValued=True,\n            description=\"Members of the group.\",\n            subAttributes=[\n                ScimSchemaAttribute(\n                    name=\"value\",\n                    type=\"string\",\n                    description=\"User ID of the group member.\",\n                ),\n                ScimSchemaAttribute(\n                    name=\"display\",\n                    type=\"string\",\n                    mutability=\"readOnly\",\n                    description=\"Display name of the group member.\",\n                ),\n            ],\n        ),\n        ScimSchemaAttribute(\n            name=\"externalId\",\n            type=\"string\",\n            description=\"Identifier from the provisioning client (IdP).\",\n            caseExact=True,\n        ),\n    ],\n)\n"
  },
  {
    "path": "backend/ee/onyx/server/seeding.py",
    "content": "import json\nimport os\nfrom copy import deepcopy\nfrom typing import List\nfrom typing import Optional\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.standard_answer import (\n    create_initial_default_standard_answer_category,\n)\nfrom ee.onyx.server.enterprise_settings.models import AnalyticsScriptUpload\nfrom ee.onyx.server.enterprise_settings.models import EnterpriseSettings\nfrom ee.onyx.server.enterprise_settings.models import NavigationItem\nfrom ee.onyx.server.enterprise_settings.store import store_analytics_script\nfrom ee.onyx.server.enterprise_settings.store import (\n    store_settings as store_ee_settings,\n)\nfrom ee.onyx.server.enterprise_settings.store import upload_logo\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import Tool\nfrom onyx.db.persona import upsert_persona\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.settings.models import Settings\nfrom onyx.server.settings.store import store_settings as store_base_settings\nfrom onyx.utils.logger import setup_logger\n\n\nclass CustomToolSeed(BaseModel):\n    name: str\n    description: str\n    definition_path: str\n    custom_headers: Optional[List[dict]] = None\n    display_name: Optional[str] = None\n    in_code_tool_id: Optional[str] = None\n    user_id: Optional[str] = None\n\n\nlogger = setup_logger()\n\n_SEED_CONFIG_ENV_VAR_NAME = \"ENV_SEED_CONFIGURATION\"\n\n\nclass NavigationItemSeed(BaseModel):\n    link: str\n    title: str\n    # NOTE: SVG at this path must not have a width / height specified\n    svg_path: str\n\n\nclass SeedConfiguration(BaseModel):\n    llms: list[LLMProviderUpsertRequest] | None = None\n    admin_user_emails: list[str] | None = None\n    seeded_logo_path: str | None = None\n    personas: list[PersonaUpsertRequest] | None = None\n    settings: Settings | None = None\n    enterprise_settings: EnterpriseSettings | None = None\n\n    # allows for specifying custom navigation items that have your own custom SVG logos\n    nav_item_overrides: list[NavigationItemSeed] | None = None\n\n    # Use existing `CUSTOM_ANALYTICS_SECRET_KEY` for reference\n    analytics_script_path: str | None = None\n    custom_tools: List[CustomToolSeed] | None = None\n\n\ndef _parse_env() -> SeedConfiguration | None:\n    seed_config_str = os.getenv(_SEED_CONFIG_ENV_VAR_NAME)\n    if not seed_config_str:\n        return None\n    seed_config = SeedConfiguration.model_validate_json(seed_config_str)\n    return seed_config\n\n\ndef _seed_custom_tools(db_session: Session, tools: List[CustomToolSeed]) -> None:\n    if tools:\n        logger.notice(\"Seeding Custom Tools\")\n        for tool in tools:\n            try:\n                logger.debug(f\"Attempting to seed tool: {tool.name}\")\n                logger.debug(f\"Reading definition from: {tool.definition_path}\")\n                with open(tool.definition_path, \"r\") as file:\n                    file_content = file.read()\n                    if not file_content.strip():\n                        raise ValueError(\"File is empty\")\n                    openapi_schema = json.loads(file_content)\n                db_tool = Tool(\n                    name=tool.name,\n                    description=tool.description,\n                    openapi_schema=openapi_schema,\n                    custom_headers=tool.custom_headers,\n                    display_name=tool.display_name,\n                    in_code_tool_id=tool.in_code_tool_id,\n                    user_id=tool.user_id,\n                )\n                db_session.add(db_tool)\n                logger.debug(f\"Successfully added tool: {tool.name}\")\n            except FileNotFoundError:\n                logger.error(\n                    f\"Definition file not found for tool {tool.name}: {tool.definition_path}\"\n                )\n            except json.JSONDecodeError as e:\n                logger.error(\n                    f\"Invalid JSON in definition file for tool {tool.name}: {str(e)}\"\n                )\n            except Exception as e:\n                logger.error(f\"Failed to seed tool {tool.name}: {str(e)}\")\n        db_session.commit()\n        logger.notice(f\"Successfully seeded {len(tools)} Custom Tools\")\n\n\ndef _seed_llms(\n    db_session: Session, llm_upsert_requests: list[LLMProviderUpsertRequest]\n) -> None:\n    if not llm_upsert_requests:\n        return\n\n    logger.notice(\"Seeding LLMs\")\n    for request in llm_upsert_requests:\n        existing = fetch_existing_llm_provider(name=request.name, db_session=db_session)\n        if existing:\n            request.id = existing.id\n    seeded_providers: list[LLMProviderView] = []\n    for llm_upsert_request in llm_upsert_requests:\n        try:\n            seeded_providers.append(upsert_llm_provider(llm_upsert_request, db_session))\n        except ValueError as e:\n            logger.warning(\n                \"Failed to upsert LLM provider '%s' during seeding: %s\",\n                llm_upsert_request.name,\n                e,\n            )\n\n    default_provider = next(\n        (p for p in seeded_providers if p.model_configurations), None\n    )\n    if not default_provider:\n        return\n\n    visible_configs = [\n        mc for mc in default_provider.model_configurations if mc.is_visible\n    ]\n    default_config = (\n        visible_configs[0]\n        if visible_configs\n        else default_provider.model_configurations[0]\n    )\n    update_default_provider(\n        provider_id=default_provider.id,\n        model_name=default_config.name,\n        db_session=db_session,\n    )\n\n\ndef _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:\n    if personas:\n        logger.notice(\"Seeding Personas\")\n        try:\n            for persona in personas:\n                upsert_persona(\n                    user=None,  # Seeding is done as admin\n                    name=persona.name,\n                    description=persona.description,\n                    document_set_ids=persona.document_set_ids,\n                    llm_model_provider_override=persona.llm_model_provider_override,\n                    llm_model_version_override=persona.llm_model_version_override,\n                    starter_messages=persona.starter_messages,\n                    is_public=persona.is_public,\n                    db_session=db_session,\n                    tool_ids=persona.tool_ids,\n                    display_priority=persona.display_priority,\n                    system_prompt=persona.system_prompt,\n                    task_prompt=persona.task_prompt,\n                    datetime_aware=persona.datetime_aware,\n                    is_featured=persona.is_featured,\n                    commit=False,\n                )\n            db_session.commit()\n        except Exception:\n            logger.exception(\"Failed to seed personas.\")\n            raise\n\n\ndef _seed_settings(settings: Settings) -> None:\n    logger.notice(\"Seeding Settings\")\n    try:\n        store_base_settings(settings)\n        logger.notice(\"Successfully seeded Settings\")\n    except ValueError as e:\n        logger.error(f\"Failed to seed Settings: {str(e)}\")\n\n\ndef _seed_enterprise_settings(seed_config: SeedConfiguration) -> None:\n    if (\n        seed_config.enterprise_settings is not None\n        or seed_config.nav_item_overrides is not None\n    ):\n        final_enterprise_settings = (\n            deepcopy(seed_config.enterprise_settings)\n            if seed_config.enterprise_settings\n            else EnterpriseSettings()\n        )\n\n        final_nav_items = final_enterprise_settings.custom_nav_items\n        if seed_config.nav_item_overrides is not None:\n            final_nav_items = []\n            for item in seed_config.nav_item_overrides:\n                with open(item.svg_path, \"r\") as file:\n                    svg_content = file.read().strip()\n\n                final_nav_items.append(\n                    NavigationItem(\n                        link=item.link,\n                        title=item.title,\n                        svg_logo=svg_content,\n                    )\n                )\n\n        final_enterprise_settings.custom_nav_items = final_nav_items\n\n        logger.notice(\"Seeding enterprise settings\")\n        store_ee_settings(final_enterprise_settings)\n\n\ndef _seed_logo(logo_path: str | None) -> None:\n    if logo_path:\n        logger.notice(\"Uploading logo\")\n        upload_logo(file=logo_path)\n\n\ndef _seed_analytics_script(seed_config: SeedConfiguration) -> None:\n    custom_analytics_secret_key = os.environ.get(\"CUSTOM_ANALYTICS_SECRET_KEY\")\n    if seed_config.analytics_script_path and custom_analytics_secret_key:\n        logger.notice(\"Seeding analytics script\")\n        try:\n            with open(seed_config.analytics_script_path, \"r\") as file:\n                script_content = file.read()\n            analytics_script = AnalyticsScriptUpload(\n                script=script_content, secret_key=custom_analytics_secret_key\n            )\n            store_analytics_script(analytics_script)\n        except FileNotFoundError:\n            logger.error(\n                f\"Analytics script file not found: {seed_config.analytics_script_path}\"\n            )\n        except ValueError as e:\n            logger.error(f\"Failed to seed analytics script: {str(e)}\")\n\n\ndef get_seed_config() -> SeedConfiguration | None:\n    return _parse_env()\n\n\ndef seed_db() -> None:\n    seed_config = _parse_env()\n    if seed_config is None:\n        logger.debug(\"No seeding configuration file passed\")\n        return\n\n    with get_session_with_current_tenant() as db_session:\n        if seed_config.llms is not None:\n            _seed_llms(db_session, seed_config.llms)\n        if seed_config.personas is not None:\n            _seed_personas(db_session, seed_config.personas)\n        if seed_config.settings is not None:\n            _seed_settings(seed_config.settings)\n        if seed_config.custom_tools is not None:\n            _seed_custom_tools(db_session, seed_config.custom_tools)\n\n        _seed_logo(seed_config.seeded_logo_path)\n        _seed_enterprise_settings(seed_config)\n        _seed_analytics_script(seed_config)\n\n        logger.notice(\"Verifying default standard answer category exists.\")\n        create_initial_default_standard_answer_category(db_session)\n"
  },
  {
    "path": "backend/ee/onyx/server/settings/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/settings/api.py",
    "content": "\"\"\"EE Settings API - provides license-aware settings override.\"\"\"\n\nfrom redis.exceptions import RedisError\nfrom sqlalchemy.exc import SQLAlchemyError\n\nfrom ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED\nfrom ee.onyx.db.license import get_cached_license_metadata\nfrom ee.onyx.db.license import refresh_license_cache\nfrom onyx.cache.interface import CACHE_TRANSIENT_ERRORS\nfrom onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.server.settings.models import ApplicationStatus\nfrom onyx.server.settings.models import Settings\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n# Only GATED_ACCESS actually blocks access - other statuses are for notifications\n_BLOCKING_STATUS = ApplicationStatus.GATED_ACCESS\n\n\ndef check_ee_features_enabled() -> bool:\n    \"\"\"EE version: checks if EE features should be available.\n\n    Returns True if:\n    - LICENSE_ENFORCEMENT_ENABLED is False (legacy/rollout mode)\n    - Cloud mode (MULTI_TENANT) - cloud handles its own gating\n    - Self-hosted with a valid (non-expired) license\n\n    Returns False if:\n    - Self-hosted with no license (never subscribed)\n    - Self-hosted with expired license\n    \"\"\"\n    if not LICENSE_ENFORCEMENT_ENABLED:\n        # License enforcement disabled - allow EE features (legacy behavior)\n        return True\n\n    if MULTI_TENANT:\n        # Cloud mode - EE features always available (gating handled by is_tenant_gated)\n        return True\n\n    # Self-hosted with enforcement - check for valid license\n    tenant_id = get_current_tenant_id()\n    try:\n        metadata = get_cached_license_metadata(tenant_id)\n        if not metadata:\n            # Cache miss — warm from DB so cold-start doesn't block EE features\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    metadata = refresh_license_cache(db_session, tenant_id)\n            except SQLAlchemyError as db_error:\n                logger.warning(f\"Failed to load license from DB: {db_error}\")\n\n        if metadata and metadata.status != _BLOCKING_STATUS:\n            # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)\n            return True\n    except RedisError as e:\n        logger.warning(f\"Failed to check license for EE features: {e}\")\n        # Fail closed - if Redis is down, other things will break anyway\n        return False\n\n    # No license or GATED_ACCESS - no EE features\n    return False\n\n\ndef apply_license_status_to_settings(settings: Settings) -> Settings:\n    \"\"\"EE version: checks license status for self-hosted deployments.\n\n    For self-hosted, looks up license metadata and overrides application_status\n    if the license indicates GATED_ACCESS (fully expired).\n\n    Also sets ee_features_enabled based on license status to control\n    visibility of EE features in the UI.\n\n    For multi-tenant (cloud), the settings already have the correct status\n    from the control plane, so no override is needed.\n\n    If LICENSE_ENFORCEMENT_ENABLED is false, ee_features_enabled is set to True\n    (since EE code was loaded via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES).\n    \"\"\"\n    if not LICENSE_ENFORCEMENT_ENABLED:\n        # License enforcement disabled - EE code is loaded via\n        # ENABLE_PAID_ENTERPRISE_EDITION_FEATURES, so EE features are on\n        settings.ee_features_enabled = True\n        return settings\n\n    if MULTI_TENANT:\n        # Cloud mode - EE features always available (gating handled by is_tenant_gated)\n        settings.ee_features_enabled = True\n        return settings\n\n    tenant_id = get_current_tenant_id()\n    try:\n        metadata = get_cached_license_metadata(tenant_id)\n        if not metadata:\n            # Cache miss (e.g. after TTL expiry). Fall back to DB so\n            # the /settings request doesn't falsely return GATED_ACCESS\n            # while the cache is cold.\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    metadata = refresh_license_cache(db_session, tenant_id)\n            except SQLAlchemyError as db_error:\n                logger.warning(\n                    f\"Failed to load license from DB for settings: {db_error}\"\n                )\n\n        if metadata:\n            if metadata.status == _BLOCKING_STATUS:\n                settings.application_status = metadata.status\n                settings.ee_features_enabled = False\n            elif metadata.used_seats > metadata.seats:\n                # License is valid but seat limit exceeded\n                settings.application_status = ApplicationStatus.SEAT_LIMIT_EXCEEDED\n                settings.seat_count = metadata.seats\n                settings.used_seats = metadata.used_seats\n                settings.ee_features_enabled = True\n            else:\n                # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)\n                settings.ee_features_enabled = True\n        else:\n            # No license found in cache or DB.\n            if ENTERPRISE_EDITION_ENABLED:\n                # Legacy EE flag is set → prior EE usage (e.g. permission\n                # syncing) means indexed data may need protection.\n                settings.application_status = _BLOCKING_STATUS\n            settings.ee_features_enabled = False\n    except CACHE_TRANSIENT_ERRORS as e:\n        logger.warning(f\"Failed to check license metadata for settings: {e}\")\n        # Fail closed - disable EE features if we can't verify license\n        settings.ee_features_enabled = False\n\n    return settings\n"
  },
  {
    "path": "backend/ee/onyx/server/tenant_usage_limits.py",
    "content": "\"\"\"Tenant-specific usage limit overrides from the control plane (EE version).\"\"\"\n\nimport time\n\nimport requests\n\nfrom ee.onyx.server.tenants.access import generate_data_plane_token\nfrom onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.server.tenant_usage_limits import TenantUsageLimitOverrides\nfrom onyx.server.usage_limits import NO_LIMIT\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# In-memory storage for tenant overrides (populated at startup)\n_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None\n_last_fetch_time: float = 0.0\n_FETCH_INTERVAL = 60 * 60 * 24  # 24 hours\n_ERROR_FETCH_INTERVAL = 30 * 60  # 30 minutes (if the last fetch failed)\n\n\ndef fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:\n    \"\"\"\n    Fetch tenant-specific usage limit overrides from the control plane.\n\n    Returns:\n        Dictionary mapping tenant_id to their specific limit overrides.\n        Returns empty dict on any error (falls back to defaults).\n    \"\"\"\n    try:\n        token = generate_data_plane_token()\n        headers = {\n            \"Authorization\": f\"Bearer {token}\",\n            \"Content-Type\": \"application/json\",\n        }\n        url = f\"{CONTROL_PLANE_API_BASE_URL}/usage-limit-overrides\"\n        response = requests.get(url, headers=headers, timeout=30)\n        response.raise_for_status()\n\n        tenant_overrides = response.json()\n\n        # Parse each tenant's overrides\n        result: dict[str, TenantUsageLimitOverrides] = {}\n        for override_data in tenant_overrides:\n            tenant_id = override_data[\"tenant_id\"]\n            try:\n                result[tenant_id] = TenantUsageLimitOverrides(**override_data)\n            except Exception as e:\n                logger.warning(\n                    f\"Failed to parse usage limit overrides for tenant {tenant_id}: {e}\"\n                )\n\n        return (\n            result or None\n        )  # if empty dictionary, something went wrong and we shouldn't enforce limits\n\n    except requests.exceptions.RequestException as e:\n        logger.warning(f\"Failed to fetch usage limit overrides from control plane: {e}\")\n        return None\n    except Exception as e:\n        logger.error(f\"Error parsing usage limit overrides: {e}\")\n        return None\n\n\ndef load_usage_limit_overrides() -> None:\n    \"\"\"\n    Load tenant usage limit overrides from the control plane.\n    \"\"\"\n    global _tenant_usage_limit_overrides\n    global _last_fetch_time\n\n    logger.info(\"Loading tenant usage limit overrides from control plane...\")\n    overrides = fetch_usage_limit_overrides()\n\n    _last_fetch_time = time.time()\n\n    # use the new result if it exists, otherwise use the old result\n    # (prevents us from updating to a failed fetch result)\n    _tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides\n\n    if overrides:\n        logger.info(f\"Loaded usage limit overrides for {len(overrides)} tenants\")\n    else:\n        logger.info(\"No tenant-specific usage limit overrides found\")\n\n\ndef unlimited(tenant_id: str) -> TenantUsageLimitOverrides:\n    return TenantUsageLimitOverrides(\n        tenant_id=tenant_id,\n        llm_cost_cents_trial=NO_LIMIT,\n        llm_cost_cents_paid=NO_LIMIT,\n        chunks_indexed_trial=NO_LIMIT,\n        chunks_indexed_paid=NO_LIMIT,\n        api_calls_trial=NO_LIMIT,\n        api_calls_paid=NO_LIMIT,\n        non_streaming_calls_trial=NO_LIMIT,\n        non_streaming_calls_paid=NO_LIMIT,\n    )\n\n\ndef get_tenant_usage_limit_overrides(\n    tenant_id: str,\n) -> TenantUsageLimitOverrides | None:\n    \"\"\"\n    Get the usage limit overrides for a specific tenant.\n\n    Args:\n        tenant_id: The tenant ID to look up\n\n    Returns:\n        TenantUsageLimitOverrides if the tenant has overrides, None otherwise.\n    \"\"\"\n\n    if DEV_MODE:  # in dev mode, we return unlimited limits for all tenants\n        return unlimited(tenant_id)\n\n    global _tenant_usage_limit_overrides\n    time_since = time.time() - _last_fetch_time\n    if (\n        _tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL\n    ) or (time_since > _FETCH_INTERVAL):\n        logger.debug(\n            f\"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}\"\n        )\n\n        load_usage_limit_overrides()\n\n    # If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.\n    if _tenant_usage_limit_overrides is None or DEV_MODE:\n        return unlimited(tenant_id)\n    return _tenant_usage_limit_overrides.get(tenant_id)\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/server/tenants/access.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\n\nimport jwt\nfrom fastapi import HTTPException\nfrom fastapi import Request\n\nfrom onyx.configs.app_configs import DATA_PLANE_SECRET\nfrom onyx.configs.app_configs import EXPECTED_API_KEY\nfrom onyx.configs.app_configs import JWT_ALGORITHM\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef generate_data_plane_token() -> str:\n    if DATA_PLANE_SECRET is None:\n        raise ValueError(\"DATA_PLANE_SECRET is not set\")\n\n    payload = {\n        \"iss\": \"data_plane\",\n        \"exp\": datetime.utcnow() + timedelta(minutes=5),\n        \"iat\": datetime.utcnow(),\n        \"scope\": \"api_access\",\n    }\n\n    token = jwt.encode(payload, DATA_PLANE_SECRET, algorithm=JWT_ALGORITHM)\n    return token\n\n\nasync def control_plane_dep(request: Request) -> None:\n    api_key = request.headers.get(\"X-API-KEY\")\n    if api_key != EXPECTED_API_KEY:\n        logger.warning(\"Invalid API key\")\n        raise HTTPException(status_code=401, detail=\"Invalid API key\")\n\n    auth_header = request.headers.get(\"Authorization\")\n    if not auth_header or not auth_header.startswith(\"Bearer \"):\n        logger.warning(\"Invalid authorization header\")\n        raise HTTPException(status_code=401, detail=\"Invalid authorization header\")\n\n    token = auth_header.split(\" \")[1]\n    try:\n        payload = jwt.decode(token, DATA_PLANE_SECRET, algorithms=[JWT_ALGORITHM])\n        if payload.get(\"scope\") != \"tenant:create\":\n            logger.warning(\"Insufficient permissions\")\n            raise HTTPException(status_code=403, detail=\"Insufficient permissions\")\n    except jwt.ExpiredSignatureError:\n        logger.warning(\"Token has expired\")\n        raise HTTPException(status_code=401, detail=\"Token has expired\")\n    except jwt.InvalidTokenError:\n        logger.warning(\"Invalid token\")\n        raise HTTPException(status_code=401, detail=\"Invalid token\")\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/admin_api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom fastapi_users import exceptions\n\nfrom ee.onyx.auth.users import current_cloud_superuser\nfrom ee.onyx.server.tenants.models import ImpersonateRequest\nfrom ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email\nfrom onyx.auth.users import auth_backend\nfrom onyx.auth.users import get_redis_strategy\nfrom onyx.auth.users import User\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.users import get_user_by_email\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\n\n@router.post(\"/impersonate\")\nasync def impersonate_user(\n    impersonate_request: ImpersonateRequest,\n    _: User = Depends(current_cloud_superuser),\n) -> Response:\n    \"\"\"Allows a cloud superuser to impersonate another user by generating an impersonation JWT token\"\"\"\n    try:\n        tenant_id = get_tenant_id_for_email(impersonate_request.email)\n    except exceptions.UserNotExists:\n        detail = f\"User has no tenant mapping: {impersonate_request.email=}\"\n        logger.warning(detail)\n        raise HTTPException(status_code=422, detail=detail)\n\n    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:\n        user_to_impersonate = get_user_by_email(\n            impersonate_request.email, tenant_session\n        )\n        if user_to_impersonate is None:\n            detail = (\n                f\"User not found in tenant: {impersonate_request.email=} {tenant_id=}\"\n            )\n            logger.warning(detail)\n            raise HTTPException(status_code=422, detail=detail)\n\n        token = await get_redis_strategy().write_token(user_to_impersonate)\n\n    response = await auth_backend.transport.get_login_response(token)\n    response.set_cookie(\n        key=\"fastapiusersauth\",\n        value=token,\n        httponly=True,\n        secure=True,\n        samesite=\"lax\",\n    )\n    return response\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/anonymous_user_path.py",
    "content": "from sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import TenantAnonymousUserPath\n\n\ndef get_anonymous_user_path(tenant_id: str, db_session: Session) -> str | None:\n    result = db_session.execute(\n        select(TenantAnonymousUserPath).where(\n            TenantAnonymousUserPath.tenant_id == tenant_id\n        )\n    )\n    result_scalar = result.scalar_one_or_none()\n    if result_scalar:\n        return result_scalar.anonymous_user_path\n    else:\n        return None\n\n\ndef modify_anonymous_user_path(\n    tenant_id: str, anonymous_user_path: str, db_session: Session\n) -> None:\n    # Enforce lowercase path at DB operation level\n    anonymous_user_path = anonymous_user_path.lower()\n\n    existing_entry = (\n        db_session.query(TenantAnonymousUserPath).filter_by(tenant_id=tenant_id).first()\n    )\n\n    if existing_entry:\n        existing_entry.anonymous_user_path = anonymous_user_path\n\n    else:\n        new_entry = TenantAnonymousUserPath(\n            tenant_id=tenant_id, anonymous_user_path=anonymous_user_path\n        )\n        db_session.add(new_entry)\n\n    db_session.commit()\n\n\ndef get_tenant_id_for_anonymous_user_path(\n    anonymous_user_path: str, db_session: Session\n) -> str | None:\n    result = db_session.execute(\n        select(TenantAnonymousUserPath).where(\n            TenantAnonymousUserPath.anonymous_user_path == anonymous_user_path\n        )\n    )\n    result_scalar = result.scalar_one_or_none()\n    if result_scalar:\n        return result_scalar.tenant_id\n    else:\n        return None\n\n\ndef validate_anonymous_user_path(path: str) -> None:\n    if not path or \"/\" in path or not path.replace(\"-\", \"\").isalnum():\n        raise ValueError(\"Invalid path. Use only letters, numbers, and hyphens.\")\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/anonymous_users_api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom sqlalchemy.exc import IntegrityError\n\nfrom ee.onyx.auth.users import generate_anonymous_user_jwt_token\nfrom ee.onyx.server.tenants.anonymous_user_path import get_anonymous_user_path\nfrom ee.onyx.server.tenants.anonymous_user_path import (\n    get_tenant_id_for_anonymous_user_path,\n)\nfrom ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path\nfrom ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path\nfrom ee.onyx.server.tenants.models import AnonymousUserPath\nfrom onyx.auth.users import anonymous_user_enabled\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import User\nfrom onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\n\n@router.get(\"/anonymous-user-path\")\nasync def get_anonymous_user_path_api(\n    _: User = Depends(current_admin_user),\n) -> AnonymousUserPath:\n    tenant_id = get_current_tenant_id()\n\n    if tenant_id is None:\n        raise HTTPException(status_code=404, detail=\"Tenant not found\")\n\n    with get_session_with_shared_schema() as db_session:\n        current_path = get_anonymous_user_path(tenant_id, db_session)\n\n    return AnonymousUserPath(anonymous_user_path=current_path)\n\n\n@router.post(\"/anonymous-user-path\")\nasync def set_anonymous_user_path_api(\n    anonymous_user_path: str,\n    _: User = Depends(current_admin_user),\n) -> None:\n    tenant_id = get_current_tenant_id()\n    try:\n        validate_anonymous_user_path(anonymous_user_path)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    with get_session_with_shared_schema() as db_session:\n        try:\n            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)\n        except IntegrityError:\n            raise HTTPException(\n                status_code=409,\n                detail=\"The anonymous user path is already in use. Please choose a different path.\",\n            )\n        except Exception as e:\n            logger.exception(f\"Failed to modify anonymous user path: {str(e)}\")\n            raise HTTPException(\n                status_code=500,\n                detail=\"An unexpected error occurred while modifying the anonymous user path\",\n            )\n\n\n@router.post(\"/anonymous-user\")\nasync def login_as_anonymous_user(\n    anonymous_user_path: str,\n) -> Response:\n    with get_session_with_shared_schema() as db_session:\n        tenant_id = get_tenant_id_for_anonymous_user_path(\n            anonymous_user_path, db_session\n        )\n        if not tenant_id:\n            raise HTTPException(status_code=404, detail=\"Tenant not found\")\n\n    if not anonymous_user_enabled(tenant_id=tenant_id):\n        raise HTTPException(status_code=403, detail=\"Anonymous user is not enabled\")\n\n    token = generate_anonymous_user_jwt_token(tenant_id)\n\n    response = Response()\n    response.delete_cookie(FASTAPI_USERS_AUTH_COOKIE_NAME)\n    response.set_cookie(\n        key=ANONYMOUS_USER_COOKIE_NAME,\n        value=token,\n        httponly=True,\n        secure=True,\n        samesite=\"strict\",\n    )\n    return response\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/api.py",
    "content": "from fastapi import APIRouter\n\nfrom ee.onyx.server.tenants.admin_api import router as admin_router\nfrom ee.onyx.server.tenants.anonymous_users_api import router as anonymous_users_router\nfrom ee.onyx.server.tenants.billing_api import router as billing_router\nfrom ee.onyx.server.tenants.proxy import router as proxy_router\nfrom ee.onyx.server.tenants.team_membership_api import router as team_membership_router\nfrom ee.onyx.server.tenants.tenant_management_api import (\n    router as tenant_management_router,\n)\nfrom ee.onyx.server.tenants.user_invitations_api import (\n    router as user_invitations_router,\n)\n\n# Create a main router to include all sub-routers\n# Note: We don't add a prefix here as each router already has the /tenants prefix\nrouter = APIRouter()\n\n# Include all the individual routers\nrouter.include_router(admin_router)\nrouter.include_router(anonymous_users_router)\nrouter.include_router(billing_router)\nrouter.include_router(team_membership_router)\nrouter.include_router(tenant_management_router)\nrouter.include_router(user_invitations_router)\nrouter.include_router(proxy_router)\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/billing.py",
    "content": "from typing import cast\nfrom typing import Literal\n\nimport requests\nimport stripe\n\nfrom ee.onyx.configs.app_configs import STRIPE_SECRET_KEY\nfrom ee.onyx.server.tenants.access import generate_data_plane_token\nfrom ee.onyx.server.tenants.models import BillingInformation\nfrom ee.onyx.server.tenants.models import SubscriptionStatusResponse\nfrom onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL\nfrom onyx.utils.logger import setup_logger\n\nstripe.api_key = STRIPE_SECRET_KEY\n\nlogger = setup_logger()\n\n\ndef fetch_stripe_checkout_session(\n    tenant_id: str,\n    billing_period: Literal[\"monthly\", \"annual\"] = \"monthly\",\n    seats: int | None = None,\n) -> str:\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    url = f\"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session\"\n    payload = {\n        \"tenant_id\": tenant_id,\n        \"billing_period\": billing_period,\n        \"seats\": seats,\n    }\n    response = requests.post(url, headers=headers, json=payload)\n    if not response.ok:\n        try:\n            data = response.json()\n            error_msg = (\n                data.get(\"error\")\n                or f\"Request failed with status {response.status_code}\"\n            )\n        except (ValueError, requests.exceptions.JSONDecodeError):\n            error_msg = f\"Request failed with status {response.status_code}: {response.text[:200]}\"\n        raise Exception(error_msg)\n    data = response.json()\n    if data.get(\"error\"):\n        raise Exception(data[\"error\"])\n    return data[\"sessionId\"]\n\n\ndef fetch_tenant_stripe_information(tenant_id: str) -> dict:\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    url = f\"{CONTROL_PLANE_API_BASE_URL}/tenant-stripe-information\"\n    params = {\"tenant_id\": tenant_id}\n    response = requests.get(url, headers=headers, params=params)\n    response.raise_for_status()\n    return response.json()\n\n\ndef fetch_billing_information(\n    tenant_id: str,\n) -> BillingInformation | SubscriptionStatusResponse:\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    url = f\"{CONTROL_PLANE_API_BASE_URL}/billing-information\"\n    params = {\"tenant_id\": tenant_id}\n    response = requests.get(url, headers=headers, params=params)\n    response.raise_for_status()\n\n    response_data = response.json()\n\n    # Check if the response indicates no subscription\n    if (\n        isinstance(response_data, dict)\n        and \"subscribed\" in response_data\n        and not response_data[\"subscribed\"]\n    ):\n        return SubscriptionStatusResponse(**response_data)\n\n    # Otherwise, parse as BillingInformation\n    return BillingInformation(**response_data)\n\n\ndef fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:\n    \"\"\"\n    Fetch a Stripe customer portal session URL from the control plane.\n    NOTE: This is currently only used for multi-tenant (cloud) deployments.\n    Self-hosted proxy endpoints will be added in a future phase.\n    \"\"\"\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    url = f\"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session\"\n    payload = {\"tenant_id\": tenant_id}\n    if return_url:\n        payload[\"return_url\"] = return_url\n    response = requests.post(url, headers=headers, json=payload)\n    response.raise_for_status()\n    return response.json()[\"url\"]\n\n\ndef register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:\n    \"\"\"\n    Update the number of seats for a tenant's subscription.\n    Preserves the existing price (monthly, annual, or grandfathered).\n    \"\"\"\n    response = fetch_tenant_stripe_information(tenant_id)\n    stripe_subscription_id = cast(str, response.get(\"stripe_subscription_id\"))\n\n    subscription = stripe.Subscription.retrieve(stripe_subscription_id)\n    subscription_item = subscription[\"items\"][\"data\"][0]\n\n    # Use existing price to preserve the customer's current plan\n    current_price_id = subscription_item.price.id\n\n    updated_subscription = stripe.Subscription.modify(\n        stripe_subscription_id,\n        items=[\n            {\n                \"id\": subscription_item.id,\n                \"price\": current_price_id,\n                \"quantity\": number_of_users,\n            }\n        ],\n        metadata={\"tenant_id\": str(tenant_id)},\n    )\n    return updated_subscription\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/billing_api.py",
    "content": "\"\"\"Billing API endpoints for cloud multi-tenant deployments.\n\nDEPRECATED: These /tenants/* billing endpoints are being replaced by /admin/billing/*\nwhich provides a unified API for both self-hosted and cloud deployments.\n\nTODO(ENG-3533): Migrate frontend to use /admin/billing/* endpoints and remove this file.\nhttps://linear.app/onyx-app/issue/ENG-3533/migrate-tenantsbilling-adminbilling\n\nCurrent endpoints to migrate:\n- GET  /tenants/billing-information     -> GET  /admin/billing/information\n- POST /tenants/create-customer-portal-session -> POST /admin/billing/portal-session\n- POST /tenants/create-subscription-session    -> POST /admin/billing/checkout-session\n- GET  /tenants/stripe-publishable-key  -> (keep as-is, shared endpoint)\n\nNote: /tenants/product-gating/* endpoints are control-plane-to-data-plane calls\nand are NOT part of this migration - they stay here.\n\"\"\"\n\nimport asyncio\n\nimport httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\n\nfrom ee.onyx.auth.users import current_admin_user\nfrom ee.onyx.server.tenants.access import control_plane_dep\nfrom ee.onyx.server.tenants.billing import fetch_billing_information\nfrom ee.onyx.server.tenants.billing import fetch_customer_portal_session\nfrom ee.onyx.server.tenants.billing import fetch_stripe_checkout_session\nfrom ee.onyx.server.tenants.models import BillingInformation\nfrom ee.onyx.server.tenants.models import CreateCheckoutSessionRequest\nfrom ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest\nfrom ee.onyx.server.tenants.models import ProductGatingFullSyncRequest\nfrom ee.onyx.server.tenants.models import ProductGatingRequest\nfrom ee.onyx.server.tenants.models import ProductGatingResponse\nfrom ee.onyx.server.tenants.models import StripePublishableKeyResponse\nfrom ee.onyx.server.tenants.models import SubscriptionSessionResponse\nfrom ee.onyx.server.tenants.models import SubscriptionStatusResponse\nfrom ee.onyx.server.tenants.product_gating import overwrite_full_gated_set\nfrom ee.onyx.server.tenants.product_gating import store_product_gating\nfrom onyx.auth.users import User\nfrom onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE\nfrom onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\n# Cache for Stripe publishable key to avoid hitting S3 on every request\n_stripe_publishable_key_cache: str | None = None\n_stripe_key_lock = asyncio.Lock()\n\n\n@router.post(\"/product-gating\")\ndef gate_product(\n    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)\n) -> ProductGatingResponse:\n    \"\"\"\n    Gating the product means that the product is not available to the tenant.\n    They will be directed to the billing page.\n    We gate the product when their subscription has ended.\n    \"\"\"\n    try:\n        store_product_gating(\n            product_gating_request.tenant_id, product_gating_request.application_status\n        )\n        return ProductGatingResponse(updated=True, error=None)\n\n    except Exception as e:\n        logger.exception(\"Failed to gate product\")\n        return ProductGatingResponse(updated=False, error=str(e))\n\n\n@router.post(\"/product-gating/full-sync\")\ndef gate_product_full_sync(\n    product_gating_request: ProductGatingFullSyncRequest,\n    _: None = Depends(control_plane_dep),\n) -> ProductGatingResponse:\n    \"\"\"\n    Bulk operation to overwrite the entire gated tenant set.\n    This replaces all currently gated tenants with the provided list.\n    Gated tenants are not available to access the product and will be\n    directed to the billing page when their subscription has ended.\n    \"\"\"\n    try:\n        overwrite_full_gated_set(product_gating_request.gated_tenant_ids)\n        return ProductGatingResponse(updated=True, error=None)\n\n    except Exception as e:\n        logger.exception(\"Failed to gate products during full sync\")\n        return ProductGatingResponse(updated=False, error=str(e))\n\n\n@router.get(\"/billing-information\")\nasync def billing_information(\n    _: User = Depends(current_admin_user),\n) -> BillingInformation | SubscriptionStatusResponse:\n    logger.info(\"Fetching billing information\")\n    tenant_id = get_current_tenant_id()\n    return fetch_billing_information(tenant_id)\n\n\n@router.post(\"/create-customer-portal-session\")\nasync def create_customer_portal_session(\n    _: User = Depends(current_admin_user),\n) -> dict:\n    \"\"\"Create a Stripe customer portal session via the control plane.\"\"\"\n    tenant_id = get_current_tenant_id()\n    return_url = f\"{WEB_DOMAIN}/admin/billing\"\n\n    try:\n        portal_url = fetch_customer_portal_session(tenant_id, return_url)\n        return {\"stripe_customer_portal_url\": portal_url}\n    except OnyxError:\n        raise\n    except Exception:\n        logger.exception(\"Failed to create customer portal session\")\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            \"Failed to create customer portal session\",\n        )\n\n\n@router.post(\"/create-checkout-session\")\nasync def create_checkout_session(\n    request: CreateCheckoutSessionRequest | None = None,\n    _: User = Depends(current_admin_user),\n) -> dict:\n    \"\"\"Create a Stripe checkout session via the control plane.\"\"\"\n    tenant_id = get_current_tenant_id()\n    billing_period = request.billing_period if request else \"monthly\"\n    seats = request.seats if request else None\n\n    try:\n        checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)\n        return {\"stripe_checkout_url\": checkout_url}\n    except OnyxError:\n        raise\n    except Exception:\n        logger.exception(\"Failed to create checkout session\")\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            \"Failed to create checkout session\",\n        )\n\n\n@router.post(\"/create-subscription-session\")\nasync def create_subscription_session(\n    request: CreateSubscriptionSessionRequest | None = None,\n    _: User = Depends(current_admin_user),\n) -> SubscriptionSessionResponse:\n    try:\n        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()\n        if not tenant_id:\n            raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, \"Tenant ID not found\")\n\n        billing_period = request.billing_period if request else \"monthly\"\n        session_id = fetch_stripe_checkout_session(tenant_id, billing_period)\n        return SubscriptionSessionResponse(sessionId=session_id)\n\n    except OnyxError:\n        raise\n    except Exception:\n        logger.exception(\"Failed to create subscription session\")\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            \"Failed to create subscription session\",\n        )\n\n\n@router.get(\"/stripe-publishable-key\")\nasync def get_stripe_publishable_key() -> StripePublishableKeyResponse:\n    \"\"\"\n    Fetch the Stripe publishable key.\n    Priority: env var override (for testing) > S3 bucket (production).\n    This endpoint is public (no auth required) since publishable keys are safe to expose.\n    The key is cached in memory to avoid hitting S3 on every request.\n    \"\"\"\n    global _stripe_publishable_key_cache\n\n    # Fast path: return cached value without lock\n    if _stripe_publishable_key_cache:\n        return StripePublishableKeyResponse(\n            publishable_key=_stripe_publishable_key_cache\n        )\n\n    # Use lock to prevent concurrent S3 requests\n    async with _stripe_key_lock:\n        # Double-check after acquiring lock (another request may have populated cache)\n        if _stripe_publishable_key_cache:\n            return StripePublishableKeyResponse(\n                publishable_key=_stripe_publishable_key_cache\n            )\n\n        # Check for env var override first (for local testing with pk_test_* keys)\n        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:\n            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()\n            if not key.startswith(\"pk_\"):\n                raise OnyxError(\n                    OnyxErrorCode.INTERNAL_ERROR,\n                    \"Invalid Stripe publishable key format\",\n                )\n            _stripe_publishable_key_cache = key\n            return StripePublishableKeyResponse(publishable_key=key)\n\n        # Fall back to S3 bucket\n        if not STRIPE_PUBLISHABLE_KEY_URL:\n            raise OnyxError(\n                OnyxErrorCode.INTERNAL_ERROR,\n                \"Stripe publishable key is not configured\",\n            )\n\n        try:\n            async with httpx.AsyncClient() as client:\n                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)\n                response.raise_for_status()\n                key = response.text.strip()\n\n                # Validate key format\n                if not key.startswith(\"pk_\"):\n                    raise OnyxError(\n                        OnyxErrorCode.INTERNAL_ERROR,\n                        \"Invalid Stripe publishable key format\",\n                    )\n\n                _stripe_publishable_key_cache = key\n                return StripePublishableKeyResponse(publishable_key=key)\n        except httpx.HTTPError:\n            raise OnyxError(\n                OnyxErrorCode.INTERNAL_ERROR,\n                \"Failed to fetch Stripe publishable key\",\n            )\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/models.py",
    "content": "from datetime import datetime\nfrom typing import Literal\n\nfrom pydantic import BaseModel\n\nfrom onyx.server.settings.models import ApplicationStatus\n\n\nclass CheckoutSessionCreationRequest(BaseModel):\n    quantity: int\n\n\nclass CreateTenantRequest(BaseModel):\n    tenant_id: str\n    initial_admin_email: str\n\n\nclass ProductGatingRequest(BaseModel):\n    tenant_id: str\n    application_status: ApplicationStatus\n\n\nclass ProductGatingFullSyncRequest(BaseModel):\n    gated_tenant_ids: list[str]\n\n\nclass SubscriptionStatusResponse(BaseModel):\n    subscribed: bool\n\n\nclass BillingInformation(BaseModel):\n    stripe_subscription_id: str\n    status: str\n    current_period_start: datetime\n    current_period_end: datetime\n    number_of_seats: int\n    cancel_at_period_end: bool\n    canceled_at: datetime | None\n    trial_start: datetime | None\n    trial_end: datetime | None\n    seats: int\n    payment_method_enabled: bool\n\n\nclass CreateCheckoutSessionRequest(BaseModel):\n    billing_period: Literal[\"monthly\", \"annual\"] = \"monthly\"\n    seats: int | None = None\n    email: str | None = None\n\n\nclass CheckoutSessionCreationResponse(BaseModel):\n    id: str\n\n\nclass ImpersonateRequest(BaseModel):\n    email: str\n\n\nclass TenantCreationPayload(BaseModel):\n    tenant_id: str\n    email: str\n    referral_source: str | None = None\n\n\nclass TenantDeletionPayload(BaseModel):\n    tenant_id: str\n    email: str\n\n\nclass AnonymousUserPath(BaseModel):\n    anonymous_user_path: str | None\n\n\nclass ProductGatingResponse(BaseModel):\n    updated: bool\n    error: str | None\n\n\nclass SubscriptionSessionResponse(BaseModel):\n    sessionId: str\n\n\nclass CreateSubscriptionSessionRequest(BaseModel):\n    \"\"\"Request to create a subscription checkout session.\"\"\"\n\n    billing_period: Literal[\"monthly\", \"annual\"] = \"monthly\"\n\n\nclass TenantByDomainResponse(BaseModel):\n    tenant_id: str\n    number_of_users: int\n    creator_email: str\n\n\nclass TenantByDomainRequest(BaseModel):\n    email: str\n\n\nclass RequestInviteRequest(BaseModel):\n    tenant_id: str\n\n\nclass RequestInviteResponse(BaseModel):\n    success: bool\n    message: str\n\n\nclass PendingUserSnapshot(BaseModel):\n    email: str\n\n\nclass ApproveUserRequest(BaseModel):\n    email: str\n\n\nclass StripePublishableKeyResponse(BaseModel):\n    publishable_key: str\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/product_gating.py",
    "content": "from typing import cast\n\nfrom ee.onyx.configs.app_configs import GATED_TENANTS_KEY\nfrom onyx.configs.constants import ONYX_CLOUD_TENANT_ID\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.server.settings.models import ApplicationStatus\nfrom onyx.server.settings.store import load_settings\nfrom onyx.server.settings.store import store_settings\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\ndef update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:\n    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n    # Maintain the GATED_ACCESS set\n    if status == ApplicationStatus.GATED_ACCESS:\n        redis_client.sadd(GATED_TENANTS_KEY, tenant_id)\n    else:\n        redis_client.srem(GATED_TENANTS_KEY, tenant_id)\n\n\ndef store_product_gating(tenant_id: str, application_status: ApplicationStatus) -> None:\n    try:\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n        settings = load_settings()\n        settings.application_status = application_status\n        store_settings(settings)\n\n        # Store gated tenant information in Redis\n        update_tenant_gating(tenant_id, application_status)\n\n        if token is not None:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n    except Exception:\n        logger.exception(\"Failed to gate product\")\n        raise\n\n\ndef overwrite_full_gated_set(tenant_ids: list[str]) -> None:\n    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n    pipeline = redis_client.pipeline()\n\n    # using pipeline doesn't automatically add the tenant_id prefix\n    full_gated_set_key = f\"{ONYX_CLOUD_TENANT_ID}:{GATED_TENANTS_KEY}\"\n\n    # Clear the existing set\n    pipeline.delete(full_gated_set_key)\n\n    # Add all tenant IDs to the set and set their status\n    for tenant_id in tenant_ids:\n        pipeline.sadd(full_gated_set_key, tenant_id)\n\n    # Execute all commands at once\n    pipeline.execute()\n\n\ndef get_gated_tenants() -> set[str]:\n    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))\n    return {tenant_id.decode(\"utf-8\") for tenant_id in gated_tenants_bytes}\n\n\ndef is_tenant_gated(tenant_id: str) -> bool:\n    \"\"\"Fast O(1) check if tenant is in gated set (multi-tenant only).\"\"\"\n    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n    return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/provisioning.py",
    "content": "import asyncio\nimport uuid\n\nimport aiohttp  # Async HTTP client\nimport httpx\nimport requests\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL\nfrom ee.onyx.server.tenants.access import generate_data_plane_token\nfrom ee.onyx.server.tenants.models import TenantByDomainResponse\nfrom ee.onyx.server.tenants.models import TenantCreationPayload\nfrom ee.onyx.server.tenants.models import TenantDeletionPayload\nfrom ee.onyx.server.tenants.schema_management import create_schema_if_not_exists\nfrom ee.onyx.server.tenants.schema_management import drop_schema\nfrom ee.onyx.server.tenants.schema_management import run_alembic_migrations\nfrom ee.onyx.server.tenants.user_mapping import add_users_to_tenant\nfrom ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email\nfrom ee.onyx.server.tenants.user_mapping import user_owns_a_tenant\nfrom onyx.auth.users import exceptions\nfrom onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import COHERE_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS\nfrom onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.image_generation import create_default_image_gen_config_from_api_key\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_cloud_embedding_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import AvailableTenant\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.models import UserTenantMapping\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG\nfrom onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG\nfrom onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    get_recommendations,\n)\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    model_configurations_for_provider,\n)\nfrom onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.setup import setup_onyx\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import TENANT_ID_PREFIX\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.enums import EmbeddingProvider\n\n\nlogger = setup_logger()\n\n\nasync def get_or_provision_tenant(\n    email: str,\n    referral_source: str | None = None,\n    request: Request | None = None,\n) -> str:\n    \"\"\"\n    Get existing tenant ID for an email or create a new tenant if none exists.\n    This function should only be called after we have verified we want this user's tenant to exist.\n    It returns the tenant ID associated with the email, creating a new tenant if necessary.\n    \"\"\"\n    # Early return for non-multi-tenant mode\n    if not MULTI_TENANT:\n        return POSTGRES_DEFAULT_SCHEMA\n\n    if referral_source and request:\n        await submit_to_hubspot(email, referral_source, request)\n\n    # First, check if the user already has a tenant\n    tenant_id: str | None = None\n    try:\n        tenant_id = get_tenant_id_for_email(email)\n        return tenant_id\n    except exceptions.UserNotExists:\n        # User doesn't exist, so we need to create a new tenant or assign an existing one\n        pass\n\n    try:\n        # Try to get a pre-provisioned tenant\n        tenant_id = await get_available_tenant()\n\n        if tenant_id:\n            # Run migrations to ensure the pre-provisioned tenant schema is current.\n            # Pool tenants may have been created before a new migration was deployed.\n            # Capture as a non-optional local so mypy can type the lambda correctly.\n            _tenant_id: str = tenant_id\n            loop = asyncio.get_running_loop()\n            try:\n                await loop.run_in_executor(\n                    None, lambda: run_alembic_migrations(_tenant_id)\n                )\n            except Exception:\n                # The tenant was already dequeued from the pool — roll it back so\n                # it doesn't end up orphaned (schema exists, but not assigned to anyone).\n                logger.exception(\n                    f\"Migration failed for pre-provisioned tenant {_tenant_id}; rolling back\"\n                )\n                try:\n                    await rollback_tenant_provisioning(_tenant_id)\n                except Exception:\n                    logger.exception(f\"Failed to rollback orphaned tenant {_tenant_id}\")\n                raise\n            # If we have a pre-provisioned tenant, assign it to the user\n            await assign_tenant_to_user(tenant_id, email, referral_source)\n            logger.info(f\"Assigned pre-provisioned tenant {tenant_id} to user {email}\")\n        else:\n            # If no pre-provisioned tenant is available, create a new one on-demand\n            tenant_id = await create_tenant(email, referral_source)\n\n        # Notify control plane if we have created / assigned a new tenant\n        if not DEV_MODE:\n            await notify_control_plane(tenant_id, email, referral_source)\n\n        return tenant_id\n\n    except Exception as e:\n        # If we've encountered an error, log and raise an exception\n        error_msg = \"Failed to provision tenant\"\n        logger.error(error_msg, exc_info=e)\n        raise HTTPException(\n            status_code=500,\n            detail=\"Failed to provision tenant. Please try again later.\",\n        )\n\n\nasync def create_tenant(\n    email: str,\n    referral_source: str | None = None,  # noqa: ARG001\n) -> str:\n    \"\"\"\n    Create a new tenant on-demand when no pre-provisioned tenants are available.\n    This is the fallback method when we can't use a pre-provisioned tenant.\n\n    \"\"\"\n    tenant_id = TENANT_ID_PREFIX + str(uuid.uuid4())\n    logger.info(f\"Creating new tenant {tenant_id} for user {email}\")\n\n    try:\n        # Provision tenant on data plane\n        await provision_tenant(tenant_id, email)\n\n    except Exception as e:\n        logger.exception(f\"Tenant provisioning failed: {str(e)}\")\n        # Attempt to rollback the tenant provisioning\n        try:\n            await rollback_tenant_provisioning(tenant_id)\n        except Exception:\n            logger.exception(f\"Failed to rollback tenant provisioning for {tenant_id}\")\n        raise HTTPException(status_code=500, detail=\"Failed to provision tenant.\")\n\n    return tenant_id\n\n\nasync def provision_tenant(tenant_id: str, email: str) -> None:\n    if not MULTI_TENANT:\n        raise HTTPException(status_code=403, detail=\"Multi-tenancy is not enabled\")\n\n    if user_owns_a_tenant(email):\n        raise HTTPException(\n            status_code=409, detail=\"User already belongs to an organization\"\n        )\n\n    logger.debug(f\"Provisioning tenant {tenant_id} for user {email}\")\n\n    try:\n        # Create the schema for the tenant\n        if not create_schema_if_not_exists(tenant_id):\n            logger.debug(f\"Created schema for tenant {tenant_id}\")\n        else:\n            logger.debug(f\"Schema already exists for tenant {tenant_id}\")\n\n        # Set up the tenant with all necessary configurations\n        await setup_tenant(tenant_id)\n\n        # Assign the tenant to the user\n        await assign_tenant_to_user(tenant_id, email)\n\n    except Exception as e:\n        logger.exception(f\"Failed to create tenant {tenant_id}\")\n        raise HTTPException(\n            status_code=500, detail=f\"Failed to create tenant: {str(e)}\"\n        )\n\n\nasync def notify_control_plane(\n    tenant_id: str, email: str, referral_source: str | None = None\n) -> None:\n    logger.info(\"Fetching billing information\")\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    payload = TenantCreationPayload(\n        tenant_id=tenant_id, email=email, referral_source=referral_source\n    )\n\n    async with aiohttp.ClientSession() as session:\n        async with session.post(\n            f\"{CONTROL_PLANE_API_BASE_URL}/tenants/create\",\n            headers=headers,\n            json=payload.model_dump(),\n        ) as response:\n            if response.status != 200:\n                error_text = await response.text()\n                logger.error(f\"Control plane tenant creation failed: {error_text}\")\n                raise Exception(\n                    f\"Failed to create tenant on control plane: {error_text}\"\n                )\n\n\nasync def rollback_tenant_provisioning(tenant_id: str) -> None:\n    \"\"\"\n    Logic to rollback tenant provisioning on data plane.\n    Handles each step independently to ensure maximum cleanup even if some steps fail.\n    \"\"\"\n    logger.info(f\"Rolling back tenant provisioning for tenant_id: {tenant_id}\")\n\n    # Track if any part of the rollback fails\n    rollback_errors = []\n\n    # 1. Try to drop the tenant's schema\n    try:\n        drop_schema(tenant_id)\n        logger.info(f\"Successfully dropped schema for tenant {tenant_id}\")\n    except Exception as e:\n        error_msg = f\"Failed to drop schema for tenant {tenant_id}: {str(e)}\"\n        logger.error(error_msg)\n        rollback_errors.append(error_msg)\n\n    # 2. Try to remove tenant mapping\n    try:\n        with get_session_with_shared_schema() as db_session:\n            db_session.begin()\n            try:\n                db_session.query(UserTenantMapping).filter(\n                    UserTenantMapping.tenant_id == tenant_id\n                ).delete()\n                db_session.commit()\n                logger.info(\n                    f\"Successfully removed user mappings for tenant {tenant_id}\"\n                )\n            except Exception as e:\n                db_session.rollback()\n                raise e\n    except Exception as e:\n        error_msg = f\"Failed to remove user mappings for tenant {tenant_id}: {str(e)}\"\n        logger.error(error_msg)\n        rollback_errors.append(error_msg)\n\n    # 3. If this tenant was in the available tenants table, remove it\n    try:\n        with get_session_with_shared_schema() as db_session:\n            db_session.begin()\n            try:\n                available_tenant = (\n                    db_session.query(AvailableTenant)\n                    .filter(AvailableTenant.tenant_id == tenant_id)\n                    .first()\n                )\n\n                if available_tenant:\n                    db_session.delete(available_tenant)\n                    db_session.commit()\n                    logger.info(\n                        f\"Removed tenant {tenant_id} from available tenants table\"\n                    )\n            except Exception as e:\n                db_session.rollback()\n                raise e\n    except Exception as e:\n        error_msg = f\"Failed to remove tenant {tenant_id} from available tenants table: {str(e)}\"\n        logger.error(error_msg)\n        rollback_errors.append(error_msg)\n\n    # Log summary of rollback operation\n    if rollback_errors:\n        logger.error(f\"Tenant rollback completed with {len(rollback_errors)} errors\")\n    else:\n        logger.info(f\"Tenant rollback completed successfully for tenant {tenant_id}\")\n\n\ndef _build_model_configuration_upsert_requests(\n    provider_name: str,\n    recommendations: LLMRecommendations,\n) -> list[ModelConfigurationUpsertRequest]:\n    model_configurations = model_configurations_for_provider(\n        provider_name, recommendations\n    )\n    return [\n        ModelConfigurationUpsertRequest(\n            name=model_configuration.name,\n            is_visible=model_configuration.is_visible,\n            max_input_tokens=model_configuration.max_input_tokens,\n            supports_image_input=model_configuration.supports_image_input,\n        )\n        for model_configuration in model_configurations\n    ]\n\n\ndef configure_default_api_keys(db_session: Session) -> None:\n    \"\"\"Configure default LLM providers using recommended-models.json for model selection.\"\"\"\n    # Load recommendations from JSON config\n    recommendations = get_recommendations()\n\n    has_set_default_provider = False\n\n    def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:\n        nonlocal has_set_default_provider\n        try:\n            existing = fetch_existing_llm_provider(\n                name=request.name, db_session=db_session\n            )\n            if existing:\n                request.id = existing.id\n            provider = upsert_llm_provider(request, db_session)\n            if not has_set_default_provider:\n                update_default_provider(provider.id, default_model, db_session)\n                has_set_default_provider = True\n        except Exception as e:\n            logger.error(f\"Failed to configure {request.provider} provider: {e}\")\n\n    # Configure OpenAI provider\n    if OPENAI_DEFAULT_API_KEY:\n        default_model = recommendations.get_default_model(OPENAI_PROVIDER_NAME)\n        if default_model is None:\n            logger.error(\n                f\"No default model found for {OPENAI_PROVIDER_NAME} in recommendations\"\n            )\n        default_model_name = default_model.name if default_model else \"gpt-5.2\"\n\n        openai_provider = LLMProviderUpsertRequest(\n            name=\"OpenAI\",\n            provider=OPENAI_PROVIDER_NAME,\n            api_key=OPENAI_DEFAULT_API_KEY,\n            model_configurations=_build_model_configuration_upsert_requests(\n                OPENAI_PROVIDER_NAME, recommendations\n            ),\n            api_key_changed=True,\n            is_auto_mode=True,\n        )\n        _upsert(openai_provider, default_model_name)\n\n        # Create default image generation config using the OpenAI API key\n        try:\n            create_default_image_gen_config_from_api_key(\n                db_session, OPENAI_DEFAULT_API_KEY\n            )\n        except Exception as e:\n            logger.error(f\"Failed to create default image gen config: {e}\")\n    else:\n        logger.info(\n            \"OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration\"\n        )\n\n    # Configure Anthropic provider\n    if ANTHROPIC_DEFAULT_API_KEY:\n        default_model = recommendations.get_default_model(ANTHROPIC_PROVIDER_NAME)\n        if default_model is None:\n            logger.error(\n                f\"No default model found for {ANTHROPIC_PROVIDER_NAME} in recommendations\"\n            )\n        default_model_name = (\n            default_model.name if default_model else \"claude-sonnet-4-5\"\n        )\n\n        anthropic_provider = LLMProviderUpsertRequest(\n            name=\"Anthropic\",\n            provider=ANTHROPIC_PROVIDER_NAME,\n            api_key=ANTHROPIC_DEFAULT_API_KEY,\n            model_configurations=_build_model_configuration_upsert_requests(\n                ANTHROPIC_PROVIDER_NAME, recommendations\n            ),\n            api_key_changed=True,\n            is_auto_mode=True,\n        )\n        _upsert(anthropic_provider, default_model_name)\n    else:\n        logger.info(\n            \"ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration\"\n        )\n\n    # Configure Vertex AI provider\n    if VERTEXAI_DEFAULT_CREDENTIALS:\n        default_model = recommendations.get_default_model(VERTEXAI_PROVIDER_NAME)\n        if default_model is None:\n            logger.error(\n                f\"No default model found for {VERTEXAI_PROVIDER_NAME} in recommendations\"\n            )\n        default_model_name = default_model.name if default_model else \"gemini-2.5-pro\"\n\n        # Vertex AI uses custom_config for credentials and location\n        custom_config = {\n            VERTEX_CREDENTIALS_FILE_KWARG: VERTEXAI_DEFAULT_CREDENTIALS,\n            VERTEX_LOCATION_KWARG: VERTEXAI_DEFAULT_LOCATION,\n        }\n\n        vertexai_provider = LLMProviderUpsertRequest(\n            name=\"Google Vertex AI\",\n            provider=VERTEXAI_PROVIDER_NAME,\n            custom_config=custom_config,\n            model_configurations=_build_model_configuration_upsert_requests(\n                VERTEXAI_PROVIDER_NAME, recommendations\n            ),\n            api_key_changed=True,\n            is_auto_mode=True,\n        )\n        _upsert(vertexai_provider, default_model_name)\n    else:\n        logger.info(\n            \"VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration\"\n        )\n\n    # Configure OpenRouter provider\n    if OPENROUTER_DEFAULT_API_KEY:\n        default_model = recommendations.get_default_model(OPENROUTER_PROVIDER_NAME)\n        if default_model is None:\n            logger.error(\n                f\"No default model found for {OPENROUTER_PROVIDER_NAME} in recommendations\"\n            )\n        default_model_name = default_model.name if default_model else \"z-ai/glm-4.7\"\n\n        # For OpenRouter, we use the visible models from recommendations as model_configurations\n        # since OpenRouter models are dynamic (fetched from their API)\n        visible_models = recommendations.get_visible_models(OPENROUTER_PROVIDER_NAME)\n        model_configurations = [\n            ModelConfigurationUpsertRequest(\n                name=model.name,\n                is_visible=True,\n                max_input_tokens=None,\n                display_name=model.display_name,\n            )\n            for model in visible_models\n        ]\n\n        openrouter_provider = LLMProviderUpsertRequest(\n            name=\"OpenRouter\",\n            provider=OPENROUTER_PROVIDER_NAME,\n            api_key=OPENROUTER_DEFAULT_API_KEY,\n            model_configurations=model_configurations,\n            api_key_changed=True,\n            is_auto_mode=True,\n        )\n        _upsert(openrouter_provider, default_model_name)\n    else:\n        logger.info(\n            \"OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration\"\n        )\n\n    # Configure Cohere embedding provider\n    if COHERE_DEFAULT_API_KEY:\n        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(\n            provider_type=EmbeddingProvider.COHERE,\n            api_key=COHERE_DEFAULT_API_KEY,\n        )\n\n        try:\n            logger.info(\"Attempting to upsert Cohere cloud embedding provider\")\n            upsert_cloud_embedding_provider(db_session, cloud_embedding_provider)\n            logger.info(\"Successfully upserted Cohere cloud embedding provider\")\n\n            logger.info(\"Updating search settings with Cohere embedding model details\")\n            query = (\n                select(SearchSettings)\n                .where(SearchSettings.status == IndexModelStatus.FUTURE)\n                .order_by(SearchSettings.id.desc())\n            )\n            result = db_session.execute(query)\n            current_search_settings = result.scalars().first()\n\n            if current_search_settings:\n                current_search_settings.model_name = (\n                    \"embed-english-v3.0\"  # Cohere's latest model as of now\n                )\n                current_search_settings.model_dim = (\n                    1024  # Cohere's embed-english-v3.0 dimension\n                )\n                current_search_settings.provider_type = EmbeddingProvider.COHERE\n                current_search_settings.index_name = (\n                    \"danswer_chunk_cohere_embed_english_v3_0\"\n                )\n                current_search_settings.query_prefix = \"\"\n                current_search_settings.passage_prefix = \"\"\n                db_session.commit()\n            else:\n                raise RuntimeError(\n                    \"No search settings specified, DB is not in a valid state\"\n                )\n            logger.info(\"Fetching updated search settings to verify changes\")\n            updated_query = (\n                select(SearchSettings)\n                .where(SearchSettings.status == IndexModelStatus.PRESENT)\n                .order_by(SearchSettings.id.desc())\n            )\n            updated_result = db_session.execute(updated_query)\n            updated_result.scalars().first()\n\n        except Exception:\n            logger.exception(\"Failed to configure Cohere embedding provider\")\n    else:\n        logger.info(\n            \"COHERE_DEFAULT_API_KEY not set, skipping Cohere embedding provider configuration\"\n        )\n\n\nasync def submit_to_hubspot(\n    email: str, referral_source: str | None, request: Request\n) -> None:\n    if not HUBSPOT_TRACKING_URL:\n        logger.info(\"HUBSPOT_TRACKING_URL not set, skipping HubSpot submission\")\n        return\n\n    # HubSpot tracking cookie\n    hubspot_cookie = request.cookies.get(\"hubspotutk\")\n\n    # IP address\n    ip_address = request.client.host if request.client else None\n\n    data = {\n        \"fields\": [\n            {\"name\": \"email\", \"value\": email},\n            {\"name\": \"referral_source\", \"value\": referral_source or \"\"},\n        ],\n        \"context\": {\n            \"hutk\": hubspot_cookie,\n            \"ipAddress\": ip_address,\n            \"pageUri\": str(request.url),\n            \"pageName\": \"User Registration\",\n        },\n    }\n\n    async with httpx.AsyncClient() as client:\n        response = await client.post(HUBSPOT_TRACKING_URL, json=data)\n\n    if response.status_code != 200:\n        logger.error(f\"Failed to submit to HubSpot: {response.text}\")\n\n\nasync def delete_user_from_control_plane(tenant_id: str, email: str) -> None:\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n    payload = TenantDeletionPayload(tenant_id=tenant_id, email=email)\n\n    async with aiohttp.ClientSession() as session:\n        async with session.delete(\n            f\"{CONTROL_PLANE_API_BASE_URL}/tenants/delete\",\n            headers=headers,\n            json=payload.model_dump(),\n        ) as response:\n            if response.status != 200:\n                error_text = await response.text()\n                logger.error(f\"Control plane tenant creation failed: {error_text}\")\n                raise Exception(\n                    f\"Failed to delete tenant on control plane: {error_text}\"\n                )\n\n\ndef get_tenant_by_domain_from_control_plane(\n    domain: str,\n    tenant_id: str,\n) -> TenantByDomainResponse | None:\n    \"\"\"\n    Fetches tenant information from the control plane based on the email domain.\n\n    Args:\n        domain: The email domain to search for (e.g., \"example.com\")\n\n    Returns:\n        A dictionary containing tenant information if found, None otherwise\n    \"\"\"\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n\n    try:\n        response = requests.get(\n            f\"{CONTROL_PLANE_API_BASE_URL}/tenant-by-domain\",\n            headers=headers,\n            json={\"domain\": domain, \"tenant_id\": tenant_id},\n        )\n\n        if response.status_code != 200:\n            logger.error(f\"Control plane tenant lookup failed: {response.text}\")\n            return None\n\n        response_data = response.json()\n        if not response_data:\n            return None\n\n        return TenantByDomainResponse(\n            tenant_id=response_data.get(\"tenant_id\"),\n            number_of_users=response_data.get(\"number_of_users\"),\n            creator_email=response_data.get(\"creator_email\"),\n        )\n    except Exception as e:\n        logger.error(f\"Error fetching tenant by domain: {str(e)}\")\n        return None\n\n\nasync def get_available_tenant() -> str | None:\n    \"\"\"\n    Get an available pre-provisioned tenant from the NewAvailableTenant table.\n    Returns the tenant_id if one is available, None otherwise.\n    Uses row-level locking to prevent race conditions when multiple processes\n    try to get an available tenant simultaneously.\n    \"\"\"\n    if not MULTI_TENANT:\n        return None\n\n    with get_session_with_shared_schema() as db_session:\n        try:\n            db_session.begin()\n\n            # Get the oldest available tenant with FOR UPDATE lock to prevent race conditions\n            available_tenant = (\n                db_session.query(AvailableTenant)\n                .order_by(AvailableTenant.date_created)\n                .with_for_update(skip_locked=True)  # Skip locked rows to avoid blocking\n                .first()\n            )\n\n            if available_tenant:\n                tenant_id = available_tenant.tenant_id\n                # Remove the tenant from the available tenants table\n                db_session.delete(available_tenant)\n                db_session.commit()\n                logger.info(f\"Using pre-provisioned tenant {tenant_id}\")\n                return tenant_id\n            else:\n                db_session.rollback()\n                return None\n        except Exception:\n            logger.exception(\"Error getting available tenant\")\n            db_session.rollback()\n            return None\n\n\nasync def setup_tenant(tenant_id: str) -> None:\n    \"\"\"\n    Set up a tenant with all necessary configurations.\n    This is a centralized function that handles all tenant setup logic.\n    \"\"\"\n    token = None\n    try:\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n        # Run Alembic migrations in a way that isolates it from the current event loop\n        # Create a new event loop for this synchronous operation\n        loop = asyncio.get_event_loop()\n        # Use run_in_executor which properly isolates the thread execution\n        await loop.run_in_executor(None, lambda: run_alembic_migrations(tenant_id))\n\n        # Configure the tenant with default settings\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Configure default API keys\n            configure_default_api_keys(db_session)\n\n            # Set up Onyx with appropriate settings\n            current_search_settings = (\n                db_session.query(SearchSettings)\n                .filter_by(status=IndexModelStatus.FUTURE)\n                .first()\n            )\n            cohere_enabled = (\n                current_search_settings is not None\n                and current_search_settings.provider_type == EmbeddingProvider.COHERE\n            )\n            setup_onyx(db_session, tenant_id, cohere_enabled=cohere_enabled)\n\n    except Exception as e:\n        logger.exception(f\"Failed to set up tenant {tenant_id}\")\n        raise e\n    finally:\n        if token is not None:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\nasync def assign_tenant_to_user(\n    tenant_id: str,\n    email: str,\n    referral_source: str | None = None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Assign a tenant to a user and perform necessary operations.\n    Uses transaction handling to ensure atomicity and includes retry logic\n    for control plane notifications.\n    \"\"\"\n    # First, add the user to the tenant in a transaction\n\n    try:\n        add_users_to_tenant([email], tenant_id)\n    except Exception:\n        logger.exception(f\"Failed to assign tenant {tenant_id} to user {email}\")\n        raise Exception(\"Failed to assign tenant to user\")\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/proxy.py",
    "content": "\"\"\"Proxy endpoints for billing operations.\n\nThese endpoints run on the CLOUD DATA PLANE (cloud.onyx.app) and serve as a proxy\nfor self-hosted instances to reach the control plane.\n\nFlow:\n  Self-hosted backend → Cloud DP /proxy/* (license auth) → Control plane (JWT auth)\n\nSelf-hosted instances call these endpoints with their license in the Authorization\nheader. The cloud data plane validates the license signature and forwards the\nrequest to the control plane using JWT authentication.\n\nAuth levels by endpoint:\n- /create-checkout-session: No auth (new customer) or expired license OK (renewal)\n- /claim-license: Session ID based (one-time after Stripe payment)\n- /create-customer-portal-session: Expired license OK (need portal to fix payment)\n- /billing-information: Valid license required\n- /license/{tenant_id}: Valid license required\n- /seats/update: Valid license required\n\"\"\"\n\nfrom typing import Literal\n\nimport httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import Header\nfrom fastapi import HTTPException\nfrom pydantic import BaseModel\n\nfrom ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED\nfrom ee.onyx.server.billing.models import SeatUpdateRequest\nfrom ee.onyx.server.billing.models import SeatUpdateResponse\nfrom ee.onyx.server.license.models import LicensePayload\nfrom ee.onyx.server.tenants.access import generate_data_plane_token\nfrom ee.onyx.utils.license import is_license_valid\nfrom ee.onyx.utils.license import verify_license_signature\nfrom onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/proxy\")\n\n\ndef _check_license_enforcement_enabled() -> None:\n    \"\"\"Ensure LICENSE_ENFORCEMENT_ENABLED is true (proxy endpoints only work on cloud DP).\"\"\"\n    if not LICENSE_ENFORCEMENT_ENABLED:\n        raise HTTPException(\n            status_code=501,\n            detail=\"Proxy endpoints are only available on cloud data plane\",\n        )\n\n\ndef _extract_license_from_header(\n    authorization: str | None,\n    required: bool = True,\n) -> str | None:\n    \"\"\"Extract license data from Authorization header.\n\n    Self-hosted instances authenticate to these proxy endpoints by sending their\n    license as a Bearer token: `Authorization: Bearer <base64-encoded-license>`.\n\n    We use the Bearer scheme (RFC 6750) because:\n    1. It's the standard HTTP auth scheme for token-based authentication\n    2. The license blob is cryptographically signed (RSA), so it's self-validating\n    3. No other auth schemes (Basic, Digest, etc.) are supported for license auth\n\n    The license data is the base64-encoded signed blob that contains tenant_id,\n    seats, expiration, etc. We verify the signature to authenticate the caller.\n\n    Args:\n        authorization: The Authorization header value (e.g., \"Bearer <license>\")\n        required: If True, raise 401 when header is missing/invalid\n\n    Returns:\n        License data string (base64-encoded), or None if not required and missing\n\n    Raises:\n        HTTPException: 401 if required and header is missing/invalid\n    \"\"\"\n    if not authorization or not authorization.startswith(\"Bearer \"):\n        if required:\n            raise HTTPException(\n                status_code=401, detail=\"Missing or invalid authorization header\"\n            )\n        return None\n\n    return authorization.split(\" \", 1)[1]\n\n\ndef verify_license_auth(\n    license_data: str,\n    allow_expired: bool = False,\n) -> LicensePayload:\n    \"\"\"Verify license signature and optionally check expiry.\n\n    Args:\n        license_data: Base64-encoded signed license blob\n        allow_expired: If True, accept expired licenses (for renewal flows)\n\n    Returns:\n        LicensePayload if valid\n\n    Raises:\n        HTTPException: If license is invalid or expired (when not allowed)\n    \"\"\"\n    _check_license_enforcement_enabled()\n\n    try:\n        payload = verify_license_signature(license_data)\n    except ValueError as e:\n        raise HTTPException(status_code=401, detail=f\"Invalid license: {e}\")\n\n    if not allow_expired and not is_license_valid(payload):\n        raise HTTPException(status_code=401, detail=\"License has expired\")\n\n    return payload\n\n\nasync def get_license_payload(\n    authorization: str | None = Header(None, alias=\"Authorization\"),\n) -> LicensePayload:\n    \"\"\"Dependency: Require valid (non-expired) license.\n\n    Used for endpoints that require an active subscription.\n    \"\"\"\n    license_data = _extract_license_from_header(authorization, required=True)\n    # license_data is guaranteed non-None when required=True\n    assert license_data is not None\n    return verify_license_auth(license_data, allow_expired=False)\n\n\nasync def get_license_payload_allow_expired(\n    authorization: str | None = Header(None, alias=\"Authorization\"),\n) -> LicensePayload:\n    \"\"\"Dependency: Require license with valid signature, expired OK.\n\n    Used for endpoints needed to fix payment issues (portal, renewal checkout).\n    \"\"\"\n    license_data = _extract_license_from_header(authorization, required=True)\n    # license_data is guaranteed non-None when required=True\n    assert license_data is not None\n    return verify_license_auth(license_data, allow_expired=True)\n\n\nasync def get_optional_license_payload(\n    authorization: str | None = Header(None, alias=\"Authorization\"),\n) -> LicensePayload | None:\n    \"\"\"Dependency: Optional license auth (for checkout - new customers have none).\n\n    Returns None if no license provided, otherwise validates and returns payload.\n    Expired licenses are allowed for renewal flows.\n    \"\"\"\n    _check_license_enforcement_enabled()\n\n    license_data = _extract_license_from_header(authorization, required=False)\n    if license_data is None:\n        return None\n\n    return verify_license_auth(license_data, allow_expired=True)\n\n\nasync def forward_to_control_plane(\n    method: str,\n    path: str,\n    body: dict | None = None,\n    params: dict | None = None,\n) -> dict:\n    \"\"\"Forward a request to the control plane with proper authentication.\"\"\"\n    token = generate_data_plane_token()\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n\n    url = f\"{CONTROL_PLANE_API_BASE_URL}{path}\"\n\n    try:\n        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:\n            if method == \"GET\":\n                response = await client.get(url, headers=headers, params=params)\n            elif method == \"POST\":\n                response = await client.post(url, headers=headers, json=body)\n            else:\n                raise ValueError(f\"Unsupported HTTP method: {method}\")\n\n            response.raise_for_status()\n            return response.json()\n\n    except httpx.HTTPStatusError as e:\n        status_code = e.response.status_code\n        detail = \"Control plane request failed\"\n        try:\n            error_data = e.response.json()\n            detail = error_data.get(\"detail\", detail)\n        except Exception:\n            pass\n        logger.error(f\"Control plane returned {status_code}: {detail}\")\n        raise HTTPException(status_code=status_code, detail=detail)\n    except httpx.RequestError:\n        logger.exception(\"Failed to connect to control plane\")\n        raise HTTPException(\n            status_code=502, detail=\"Failed to connect to control plane\"\n        )\n\n\n# -----------------------------------------------------------------------------\n# Endpoints\n# -----------------------------------------------------------------------------\n\n\nclass CreateCheckoutSessionRequest(BaseModel):\n    billing_period: Literal[\"monthly\", \"annual\"] = \"monthly\"\n    seats: int | None = None\n    email: str | None = None\n    # Redirect URL after successful checkout - self-hosted passes their instance URL\n    redirect_url: str | None = None\n    # Cancel URL when user exits checkout - returns to upgrade page\n    cancel_url: str | None = None\n\n\nclass CreateCheckoutSessionResponse(BaseModel):\n    url: str\n\n\n@router.post(\"/create-checkout-session\")\nasync def proxy_create_checkout_session(\n    request_body: CreateCheckoutSessionRequest,\n    license_payload: LicensePayload | None = Depends(get_optional_license_payload),\n) -> CreateCheckoutSessionResponse:\n    \"\"\"Proxy checkout session creation to control plane.\n\n    Auth: Optional license (new customers don't have one yet).\n    If license provided, expired is OK (for renewals).\n    \"\"\"\n    # license_payload is None for new customers who don't have a license yet.\n    # In that case, tenant_id is omitted from the request body and the control\n    # plane will create a new tenant during checkout completion.\n    tenant_id = license_payload.tenant_id if license_payload else None\n\n    body: dict = {\n        \"billing_period\": request_body.billing_period,\n    }\n    if tenant_id:\n        body[\"tenant_id\"] = tenant_id\n    if request_body.seats is not None:\n        body[\"seats\"] = request_body.seats\n    if request_body.email:\n        body[\"email\"] = request_body.email\n    if request_body.redirect_url:\n        body[\"redirect_url\"] = request_body.redirect_url\n    if request_body.cancel_url:\n        body[\"cancel_url\"] = request_body.cancel_url\n\n    result = await forward_to_control_plane(\n        \"POST\", \"/create-checkout-session\", body=body\n    )\n    return CreateCheckoutSessionResponse(url=result[\"url\"])\n\n\nclass ClaimLicenseRequest(BaseModel):\n    session_id: str\n\n\nclass ClaimLicenseResponse(BaseModel):\n    tenant_id: str\n    license: str\n    message: str | None = None\n\n\n@router.post(\"/claim-license\")\nasync def proxy_claim_license(\n    request_body: ClaimLicenseRequest,\n) -> ClaimLicenseResponse:\n    \"\"\"Claim a license after successful Stripe checkout.\n\n    Auth: Session ID based (one-time use after payment).\n    The control plane verifies the session_id is valid and unclaimed.\n\n    Returns the license to the caller. For self-hosted instances, they will\n    store the license locally. The cloud DP doesn't need to store it.\n    \"\"\"\n    _check_license_enforcement_enabled()\n\n    result = await forward_to_control_plane(\n        \"POST\",\n        \"/claim-license\",\n        body={\"session_id\": request_body.session_id},\n    )\n\n    tenant_id = result.get(\"tenant_id\")\n    license_data = result.get(\"license\")\n\n    if not tenant_id or not license_data:\n        logger.error(f\"Control plane returned incomplete claim response: {result}\")\n        raise HTTPException(\n            status_code=502,\n            detail=\"Control plane returned incomplete license data\",\n        )\n\n    return ClaimLicenseResponse(\n        tenant_id=tenant_id,\n        license=license_data,\n        message=\"License claimed successfully\",\n    )\n\n\nclass CreateCustomerPortalSessionRequest(BaseModel):\n    return_url: str | None = None\n\n\nclass CreateCustomerPortalSessionResponse(BaseModel):\n    url: str\n\n\n@router.post(\"/create-customer-portal-session\")\nasync def proxy_create_customer_portal_session(\n    request_body: CreateCustomerPortalSessionRequest | None = None,\n    license_payload: LicensePayload = Depends(get_license_payload_allow_expired),\n) -> CreateCustomerPortalSessionResponse:\n    \"\"\"Proxy customer portal session creation to control plane.\n\n    Auth: License required, expired OK (need portal to fix payment issues).\n    \"\"\"\n    # tenant_id is a required field in LicensePayload (Pydantic validates this),\n    # but we check explicitly for defense in depth\n    if not license_payload.tenant_id:\n        raise HTTPException(status_code=401, detail=\"License missing tenant_id\")\n\n    tenant_id = license_payload.tenant_id\n\n    body: dict = {\"tenant_id\": tenant_id}\n    if request_body and request_body.return_url:\n        body[\"return_url\"] = request_body.return_url\n\n    result = await forward_to_control_plane(\n        \"POST\", \"/create-customer-portal-session\", body=body\n    )\n    return CreateCustomerPortalSessionResponse(url=result[\"url\"])\n\n\nclass BillingInformationResponse(BaseModel):\n    tenant_id: str\n    status: str | None = None\n    plan_type: str | None = None\n    seats: int | None = None\n    billing_period: str | None = None\n    current_period_start: str | None = None\n    current_period_end: str | None = None\n    cancel_at_period_end: bool = False\n    canceled_at: str | None = None\n    trial_start: str | None = None\n    trial_end: str | None = None\n    payment_method_enabled: bool = False\n    stripe_subscription_id: str | None = None\n\n\n@router.get(\"/billing-information\")\nasync def proxy_billing_information(\n    license_payload: LicensePayload = Depends(get_license_payload),\n) -> BillingInformationResponse:\n    \"\"\"Proxy billing information request to control plane.\n\n    Auth: Valid (non-expired) license required.\n    \"\"\"\n    # tenant_id is a required field in LicensePayload (Pydantic validates this),\n    # but we check explicitly for defense in depth\n    if not license_payload.tenant_id:\n        raise HTTPException(status_code=401, detail=\"License missing tenant_id\")\n\n    tenant_id = license_payload.tenant_id\n\n    result = await forward_to_control_plane(\n        \"GET\", \"/billing-information\", params={\"tenant_id\": tenant_id}\n    )\n    # Add tenant_id from license if not in response (control plane may not include it)\n    if \"tenant_id\" not in result:\n        result[\"tenant_id\"] = tenant_id\n    return BillingInformationResponse(**result)\n\n\nclass LicenseFetchResponse(BaseModel):\n    license: str\n    tenant_id: str\n\n\n@router.get(\"/license/{tenant_id}\")\nasync def proxy_license_fetch(\n    tenant_id: str,\n    license_payload: LicensePayload = Depends(get_license_payload),\n) -> LicenseFetchResponse:\n    \"\"\"Proxy license fetch to control plane.\n\n    Auth: Valid license required.\n    The tenant_id in path must match the authenticated tenant.\n    \"\"\"\n    # tenant_id is a required field in LicensePayload (Pydantic validates this),\n    # but we check explicitly for defense in depth\n    if not license_payload.tenant_id:\n        raise HTTPException(status_code=401, detail=\"License missing tenant_id\")\n\n    if tenant_id != license_payload.tenant_id:\n        raise HTTPException(\n            status_code=403,\n            detail=\"Cannot fetch license for a different tenant\",\n        )\n\n    result = await forward_to_control_plane(\"GET\", f\"/license/{tenant_id}\")\n\n    license_data = result.get(\"license\")\n    if not license_data:\n        logger.error(f\"Control plane returned incomplete license response: {result}\")\n        raise HTTPException(\n            status_code=502,\n            detail=\"Control plane returned incomplete license data\",\n        )\n\n    # Return license to caller - self-hosted instance stores it via /api/license/claim\n    return LicenseFetchResponse(license=license_data, tenant_id=tenant_id)\n\n\n@router.post(\"/seats/update\")\nasync def proxy_seat_update(\n    request_body: SeatUpdateRequest,\n    license_payload: LicensePayload = Depends(get_license_payload),\n) -> SeatUpdateResponse:\n    \"\"\"Proxy seat update to control plane.\n\n    Auth: Valid (non-expired) license required.\n    Handles Stripe proration and license regeneration.\n    Returns the regenerated license in the response for the caller to store.\n    \"\"\"\n    if not license_payload.tenant_id:\n        raise HTTPException(status_code=401, detail=\"License missing tenant_id\")\n\n    tenant_id = license_payload.tenant_id\n\n    result = await forward_to_control_plane(\n        \"POST\",\n        \"/seats/update\",\n        body={\n            \"tenant_id\": tenant_id,\n            \"new_seat_count\": request_body.new_seat_count,\n        },\n    )\n\n    # Return license in response - self-hosted instance stores it via /api/license/claim\n    return SeatUpdateResponse(\n        success=result.get(\"success\", False),\n        current_seats=result.get(\"current_seats\", 0),\n        used_seats=result.get(\"used_seats\", 0),\n        message=result.get(\"message\"),\n        license=result.get(\"license\"),\n    )\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/schema_management.py",
    "content": "import logging\nimport os\nimport re\nfrom types import SimpleNamespace\n\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.schema import CreateSchema\n\nfrom alembic import command\nfrom alembic.config import Config\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\nlogger = logging.getLogger(__name__)\n\n# Regex pattern for valid tenant IDs:\n# - UUID format: tenant_xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\n# - AWS instance ID format: tenant_i-xxxxxxxxxxxxxxxxx\n# Also useful for not accidentally dropping `public` schema\nTENANT_ID_PATTERN = re.compile(\n    rf\"^{re.escape(TENANT_ID_PREFIX)}(\"\n    r\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"  # UUID\n    r\"|i-[a-f0-9]+\"  # AWS instance ID\n    r\")$\"\n)\n\n\ndef validate_tenant_id(tenant_id: str) -> bool:\n    \"\"\"Validate that tenant_id matches expected format.\n\n    This is important for SQL injection prevention since schema names\n    cannot be parameterized in SQL and must be formatted directly.\n    \"\"\"\n    return bool(TENANT_ID_PATTERN.match(tenant_id))\n\n\ndef run_alembic_migrations(schema_name: str) -> None:\n    logger.info(f\"Starting Alembic migrations for schema: {schema_name}\")\n\n    try:\n        current_dir = os.path.dirname(os.path.abspath(__file__))\n        root_dir = os.path.abspath(os.path.join(current_dir, \"..\", \"..\", \"..\", \"..\"))\n        alembic_ini_path = os.path.join(root_dir, \"alembic.ini\")\n\n        # Configure Alembic\n        alembic_cfg = Config(alembic_ini_path)\n        alembic_cfg.set_main_option(\"sqlalchemy.url\", build_connection_string())\n        alembic_cfg.set_main_option(\n            \"script_location\", os.path.join(root_dir, \"alembic\")\n        )\n\n        # Ensure that logging isn't broken\n        alembic_cfg.attributes[\"configure_logger\"] = False\n\n        # Mimic command-line options by adding 'cmd_opts' to the config\n        alembic_cfg.cmd_opts = SimpleNamespace()  # type: ignore\n        alembic_cfg.cmd_opts.x = [f\"schemas={schema_name}\"]  # type: ignore\n\n        # Run migrations programmatically\n        command.upgrade(alembic_cfg, \"head\")\n\n        # Run migrations programmatically\n        logger.info(\n            f\"Alembic migrations completed successfully for schema: {schema_name}\"\n        )\n\n    except Exception as e:\n        logger.exception(f\"Alembic migration failed for schema {schema_name}: {str(e)}\")\n        raise\n\n\ndef create_schema_if_not_exists(tenant_id: str) -> bool:\n    with Session(get_sqlalchemy_engine()) as db_session:\n        with db_session.begin():\n            result = db_session.execute(\n                text(\n                    \"SELECT schema_name FROM information_schema.schemata WHERE schema_name = :schema_name\"\n                ),\n                {\"schema_name\": tenant_id},\n            )\n            schema_exists = result.scalar() is not None\n            if not schema_exists:\n                stmt = CreateSchema(tenant_id)\n                db_session.execute(stmt)\n                return True\n            return False\n\n\ndef drop_schema(tenant_id: str) -> None:\n    \"\"\"Drop a tenant's schema.\n\n    Uses strict regex validation to reject unexpected formats early,\n    preventing SQL injection since schema names cannot be parameterized.\n    \"\"\"\n    if not validate_tenant_id(tenant_id):\n        raise ValueError(f\"Invalid tenant_id format: {tenant_id}\")\n\n    with get_sqlalchemy_engine().connect() as connection:\n        with connection.begin():\n            # Use string formatting with validated tenant_id (safe after validation)\n            connection.execute(text(f'DROP SCHEMA IF EXISTS \"{tenant_id}\" CASCADE'))\n\n\ndef get_current_alembic_version(tenant_id: str) -> str:\n    \"\"\"Get the current Alembic version for a tenant.\"\"\"\n    from alembic.runtime.migration import MigrationContext\n    from sqlalchemy import text\n\n    engine = get_sqlalchemy_engine()\n\n    # Set the search path to the tenant's schema\n    with engine.connect() as connection:\n        connection.execute(text(f'SET search_path TO \"{tenant_id}\"'))\n\n        # Get the current version from the alembic_version table\n        context = MigrationContext.configure(connection)\n        current_rev = context.get_current_revision()\n\n    return current_rev or \"head\"\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/team_membership_api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.tenants.provisioning import delete_user_from_control_plane\nfrom ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant\nfrom ee.onyx.server.tenants.user_mapping import remove_users_from_tenant\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import User\nfrom onyx.db.auth import get_user_count\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.users import delete_user_from_db\nfrom onyx.db.users import get_user_by_email\nfrom onyx.server.manage.models import UserByEmail\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\n\n@router.post(\"/leave-team\")\nasync def leave_organization(\n    user_email: UserByEmail,\n    current_user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    tenant_id = get_current_tenant_id()\n\n    if current_user.email != user_email.user_email:\n        raise HTTPException(\n            status_code=403, detail=\"You can only leave the organization as yourself\"\n        )\n\n    user_to_delete = get_user_by_email(user_email.user_email, db_session)\n    if user_to_delete is None:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    num_admin_users = await get_user_count(only_admin_users=True)\n\n    should_delete_tenant = num_admin_users == 1\n\n    if should_delete_tenant:\n        logger.info(\n            \"Last admin user is leaving the organization. Deleting tenant from control plane.\"\n        )\n        try:\n            await delete_user_from_control_plane(tenant_id, user_to_delete.email)\n            logger.debug(\"User deleted from control plane\")\n        except Exception as e:\n            logger.exception(\n                f\"Failed to delete user from control plane for tenant {tenant_id}: {e}\"\n            )\n            raise HTTPException(\n                status_code=500,\n                detail=f\"Failed to remove user from control plane: {str(e)}\",\n            )\n\n    db_session.expunge(user_to_delete)\n    delete_user_from_db(user_to_delete, db_session)\n\n    if should_delete_tenant:\n        remove_all_users_from_tenant(tenant_id)\n    else:\n        remove_users_from_tenant([user_to_delete.email], tenant_id)\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/tenant_management_api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\n\nfrom ee.onyx.server.tenants.models import TenantByDomainResponse\nfrom ee.onyx.server.tenants.provisioning import get_tenant_by_domain_from_control_plane\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import User\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\nFORBIDDEN_COMMON_EMAIL_SUBSTRINGS = [\n    \"gmail\",\n    \"outlook\",\n    \"yahoo\",\n    \"hotmail\",\n    \"icloud\",\n    \"msn\",\n    \"hotmail\",\n    \"hotmail.co.uk\",\n]\n\n\n@router.get(\"/existing-team-by-domain\")\ndef get_existing_tenant_by_domain(\n    user: User = Depends(current_user),\n) -> TenantByDomainResponse | None:\n    domain = user.email.split(\"@\")[1]\n    if any(substring in domain for substring in FORBIDDEN_COMMON_EMAIL_SUBSTRINGS):\n        return None\n\n    tenant_id = get_current_tenant_id()\n\n    return get_tenant_by_domain_from_control_plane(domain, tenant_id)\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/user_invitations_api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\n\nfrom ee.onyx.server.tenants.models import ApproveUserRequest\nfrom ee.onyx.server.tenants.models import PendingUserSnapshot\nfrom ee.onyx.server.tenants.models import RequestInviteRequest\nfrom ee.onyx.server.tenants.user_mapping import accept_user_invite\nfrom ee.onyx.server.tenants.user_mapping import approve_user_invite\nfrom ee.onyx.server.tenants.user_mapping import deny_user_invite\nfrom ee.onyx.server.tenants.user_mapping import invite_self_to_tenant\nfrom onyx.auth.invited_users import get_pending_users\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import User\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/tenants\")\n\n\n@router.post(\"/users/invite/request\")\nasync def request_invite(\n    invite_request: RequestInviteRequest,\n    user: User = Depends(current_admin_user),\n) -> None:\n    try:\n        invite_self_to_tenant(user.email, invite_request.tenant_id)\n    except Exception as e:\n        logger.exception(\n            f\"Failed to invite self to tenant {invite_request.tenant_id}: {e}\"\n        )\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.get(\"/users/pending\")\ndef list_pending_users(\n    _: User = Depends(current_admin_user),\n) -> list[PendingUserSnapshot]:\n    pending_emails = get_pending_users()\n    return [PendingUserSnapshot(email=email) for email in pending_emails]\n\n\n@router.post(\"/users/invite/approve\")\nasync def approve_user(\n    approve_user_request: ApproveUserRequest,\n    _: User = Depends(current_admin_user),\n) -> None:\n    tenant_id = get_current_tenant_id()\n    approve_user_invite(approve_user_request.email, tenant_id)\n\n\n@router.post(\"/users/invite/accept\")\nasync def accept_invite(\n    invite_request: RequestInviteRequest,\n    user: User = Depends(current_user),\n) -> None:\n    \"\"\"\n    Accept an invitation to join a tenant.\n    \"\"\"\n    try:\n        accept_user_invite(user.email, invite_request.tenant_id)\n    except Exception as e:\n        logger.exception(f\"Failed to accept invite: {str(e)}\")\n        raise HTTPException(status_code=500, detail=\"Failed to accept invitation\")\n\n\n@router.post(\"/users/invite/deny\")\nasync def deny_invite(\n    invite_request: RequestInviteRequest,\n    user: User = Depends(current_user),\n) -> None:\n    \"\"\"\n    Deny an invitation to join a tenant.\n    \"\"\"\n    try:\n        deny_user_invite(user.email, invite_request.tenant_id)\n    except Exception as e:\n        logger.exception(f\"Failed to deny invite: {str(e)}\")\n        raise HTTPException(status_code=500, detail=\"Failed to deny invitation\")\n"
  },
  {
    "path": "backend/ee/onyx/server/tenants/user_mapping.py",
    "content": "from fastapi_users import exceptions\nfrom sqlalchemy import select\n\nfrom onyx.auth.invited_users import get_invited_users\nfrom onyx.auth.invited_users import get_pending_users\nfrom onyx.auth.invited_users import write_invited_users\nfrom onyx.auth.invited_users import write_pending_users\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import UserTenantMapping\nfrom onyx.server.manage.models import TenantSnapshot\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\ndef get_tenant_id_for_email(email: str) -> str:\n    if not MULTI_TENANT:\n        return POSTGRES_DEFAULT_SCHEMA\n    # Implement logic to get tenant_id from the mapping table\n    try:\n        with get_session_with_shared_schema() as db_session:\n            # First try to get an active tenant\n            result = db_session.execute(\n                select(UserTenantMapping).where(\n                    UserTenantMapping.email == email,\n                    UserTenantMapping.active == True,  # noqa: E712\n                )\n            )\n            mapping = result.scalar_one_or_none()\n            tenant_id = mapping.tenant_id if mapping else None\n\n            # If no active tenant found, try to get the first inactive one\n            if tenant_id is None:\n                result = db_session.execute(\n                    select(UserTenantMapping).where(\n                        UserTenantMapping.email == email,\n                        UserTenantMapping.active == False,  # noqa: E712\n                    )\n                )\n                mapping = result.scalar_one_or_none()\n                if mapping:\n                    # Mark this mapping as active\n                    mapping.active = True\n                    db_session.commit()\n                    tenant_id = mapping.tenant_id\n    except Exception as e:\n        logger.exception(f\"Error getting tenant id for email {email}: {e}\")\n        raise exceptions.UserNotExists()\n\n    if tenant_id is None:\n        raise exceptions.UserNotExists()\n    return tenant_id\n\n\ndef user_owns_a_tenant(email: str) -> bool:\n    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:\n        result = (\n            db_session.query(UserTenantMapping)\n            .filter(UserTenantMapping.email == email)\n            .first()\n        )\n        return result is not None\n\n\ndef add_users_to_tenant(emails: list[str], tenant_id: str) -> None:\n    \"\"\"\n    Add users to a tenant with proper transaction handling.\n    Checks if users already have a tenant mapping to avoid duplicates.\n\n    If a user already has an active mapping to a different tenant, they receive\n    an inactive mapping (invitation) to this tenant. They can accept the\n    invitation later to switch tenants.\n\n    \"\"\"\n    unique_emails = set(emails)\n    if not unique_emails:\n        return\n\n    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:\n        try:\n            # Start a transaction\n            db_session.begin()\n\n            # Batch query 1: Get all existing mappings for these emails to this tenant\n            # Lock rows to prevent concurrent modifications\n            existing_mappings = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.email.in_(unique_emails),\n                    UserTenantMapping.tenant_id == tenant_id,\n                )\n                .with_for_update()\n                .all()\n            )\n            emails_with_mapping = {m.email for m in existing_mappings}\n\n            # Batch query 2: Get all active mappings for these emails (any tenant)\n            active_mappings = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.email.in_(unique_emails),\n                    UserTenantMapping.active == True,  # noqa: E712\n                )\n                .all()\n            )\n            emails_with_active_mapping = {m.email for m in active_mappings}\n\n            # Add mappings for emails that don't already have one to this tenant\n            for email in unique_emails:\n                if email in emails_with_mapping:\n                    continue\n\n                # Create mapping: inactive if user belongs to another tenant (invitation),\n                # active otherwise\n                db_session.add(\n                    UserTenantMapping(\n                        email=email,\n                        tenant_id=tenant_id,\n                        active=email not in emails_with_active_mapping,\n                    )\n                )\n\n            # Commit the transaction\n            db_session.commit()\n            logger.info(f\"Successfully added users {emails} to tenant {tenant_id}\")\n\n        except Exception:\n            logger.exception(f\"Failed to add users to tenant {tenant_id}\")\n            db_session.rollback()\n            raise\n\n\ndef remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:\n    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:\n        try:\n            mappings_to_delete = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.email.in_(emails),\n                    UserTenantMapping.tenant_id == tenant_id,\n                )\n                .all()\n            )\n\n            for mapping in mappings_to_delete:\n                db_session.delete(mapping)\n\n            db_session.commit()\n        except Exception as e:\n            logger.exception(\n                f\"Failed to remove users from tenant {tenant_id}: {str(e)}\"\n            )\n            db_session.rollback()\n\n\ndef remove_all_users_from_tenant(tenant_id: str) -> None:\n    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:\n        db_session.query(UserTenantMapping).filter(\n            UserTenantMapping.tenant_id == tenant_id\n        ).delete()\n        db_session.commit()\n\n\ndef invite_self_to_tenant(email: str, tenant_id: str) -> None:\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n    try:\n        pending_users = get_pending_users()\n        if email in pending_users:\n            return\n        write_pending_users(pending_users + [email])\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef approve_user_invite(email: str, tenant_id: str) -> None:\n    \"\"\"\n    Approve a user invite to a tenant.\n    This will delete all existing records for this email and create a new mapping entry for the user in this tenant.\n    \"\"\"\n    with get_session_with_shared_schema() as db_session:\n        # Delete all existing records for this email\n        db_session.query(UserTenantMapping).filter(\n            UserTenantMapping.email == email\n        ).delete()\n\n        # Create a new mapping entry for the user in this tenant\n        new_mapping = UserTenantMapping(email=email, tenant_id=tenant_id, active=True)\n        db_session.add(new_mapping)\n        db_session.commit()\n\n    # Also remove the user from pending users list\n    # Remove from pending users\n    pending_users = get_pending_users()\n    if email in pending_users:\n        pending_users.remove(email)\n        write_pending_users(pending_users)\n\n    # Add to invited users\n    invited_users = get_invited_users()\n    if email not in invited_users:\n        invited_users.append(email)\n        write_invited_users(invited_users)\n\n\ndef accept_user_invite(email: str, tenant_id: str) -> None:\n    \"\"\"\n    Accept an invitation to join a tenant.\n    This activates the user's mapping to the tenant.\n    \"\"\"\n    with get_session_with_shared_schema() as db_session:\n        try:\n            # Lock the user's mappings first to prevent race conditions.\n            # This ensures no concurrent request can modify this user's mappings.\n            active_mapping = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.email == email,\n                    UserTenantMapping.active == True,  # noqa: E712\n                )\n                .with_for_update()\n                .first()\n            )\n\n            # If an active mapping exists, delete it\n            if active_mapping:\n                db_session.delete(active_mapping)\n                logger.info(\n                    f\"Deleted existing active mapping for user {email} in tenant {tenant_id}\"\n                )\n\n            # Find the inactive mapping for this user and tenant\n            mapping = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.email == email,\n                    UserTenantMapping.tenant_id == tenant_id,\n                    UserTenantMapping.active == False,  # noqa: E712\n                )\n                .first()\n            )\n\n            if mapping:\n                # Set all other mappings for this user to inactive\n                db_session.query(UserTenantMapping).filter(\n                    UserTenantMapping.email == email,\n                    UserTenantMapping.active == True,  # noqa: E712\n                ).update({\"active\": False})\n\n                # Activate this mapping\n                mapping.active = True\n                db_session.commit()\n                logger.info(f\"User {email} accepted invitation to tenant {tenant_id}\")\n            else:\n                logger.warning(\n                    f\"No invitation found for user {email} in tenant {tenant_id}\"\n                )\n\n        except Exception as e:\n            db_session.rollback()\n            logger.exception(\n                f\"Failed to accept invitation for user {email} to tenant {tenant_id}: {str(e)}\"\n            )\n            raise\n\n    # Remove from invited users list since they've accepted\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n    try:\n        invited_users = get_invited_users()\n        if email in invited_users:\n            invited_users.remove(email)\n            write_invited_users(invited_users)\n            logger.info(f\"Removed {email} from invited users list after acceptance\")\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef deny_user_invite(email: str, tenant_id: str) -> None:\n    \"\"\"\n    Deny an invitation to join a tenant.\n    This removes the user's mapping to the tenant.\n    \"\"\"\n    with get_session_with_shared_schema() as db_session:\n        # Delete the mapping for this user and tenant\n        result = (\n            db_session.query(UserTenantMapping)\n            .filter(\n                UserTenantMapping.email == email,\n                UserTenantMapping.tenant_id == tenant_id,\n                UserTenantMapping.active == False,  # noqa: E712\n            )\n            .delete()\n        )\n\n        db_session.commit()\n        if result:\n            logger.info(f\"User {email} denied invitation to tenant {tenant_id}\")\n        else:\n            logger.warning(\n                f\"No invitation found for user {email} in tenant {tenant_id}\"\n            )\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n    try:\n        pending_users = get_invited_users()\n        if email in pending_users:\n            pending_users.remove(email)\n            write_invited_users(pending_users)\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef get_tenant_count(tenant_id: str) -> int:\n    \"\"\"\n    Get the number of active users for this tenant.\n\n    A user counts toward the seat count if:\n    1. They have an active mapping to this tenant (UserTenantMapping.active == True)\n    2. AND the User is active (User.is_active == True)\n    3. AND the User is not the anonymous system user\n\n    TODO: Exclude API key dummy users from seat counting. API keys create\n    users with emails like `__DANSWER_API_KEY_*` that should not count toward\n    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518\n    \"\"\"\n    from onyx.configs.constants import ANONYMOUS_USER_EMAIL\n    from onyx.db.models import User\n\n    # First get all emails with active mappings to this tenant\n    with get_session_with_shared_schema() as db_session:\n        active_mapping_emails = (\n            db_session.query(UserTenantMapping.email)\n            .filter(\n                UserTenantMapping.tenant_id == tenant_id,\n                UserTenantMapping.active == True,  # noqa: E712\n                UserTenantMapping.email != ANONYMOUS_USER_EMAIL,\n            )\n            .all()\n        )\n        emails = [email for (email,) in active_mapping_emails]\n\n    if not emails:\n        return 0\n\n    # Now count how many of those users are actually active in the tenant's User table\n    with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n        user_count = (\n            db_session.query(User)\n            .filter(\n                User.email.in_(emails),  # type: ignore\n                User.is_active == True,  # type: ignore  # noqa: E712\n            )\n            .count()\n        )\n\n        return user_count\n\n\ndef get_tenant_invitation(email: str) -> TenantSnapshot | None:\n    \"\"\"\n    Get the first tenant invitation for this user\n    \"\"\"\n    with get_session_with_shared_schema() as db_session:\n        # Get the first tenant invitation for this user\n        invitation = (\n            db_session.query(UserTenantMapping)\n            .filter(\n                UserTenantMapping.email == email,\n                UserTenantMapping.active == False,  # noqa: E712\n            )\n            .first()\n        )\n\n        if invitation:\n            # Get the user count for this tenant\n            user_count = (\n                db_session.query(UserTenantMapping)\n                .filter(\n                    UserTenantMapping.tenant_id == invitation.tenant_id,\n                    UserTenantMapping.active == True,  # noqa: E712\n                )\n                .count()\n            )\n            return TenantSnapshot(\n                tenant_id=invitation.tenant_id, number_of_users=user_count\n            )\n\n        return None\n"
  },
  {
    "path": "backend/ee/onyx/server/token_rate_limits/api.py",
    "content": "from collections import defaultdict\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.token_limit import fetch_all_user_group_token_rate_limits_by_group\nfrom ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user\nfrom ee.onyx.db.token_limit import insert_user_group_token_rate_limit\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.token_limit import fetch_all_user_token_rate_limits\nfrom onyx.db.token_limit import insert_user_token_rate_limit\nfrom onyx.server.query_and_chat.token_limit import any_rate_limit_exists\nfrom onyx.server.token_rate_limits.models import TokenRateLimitArgs\nfrom onyx.server.token_rate_limits.models import TokenRateLimitDisplay\n\nrouter = APIRouter(prefix=\"/admin/token-rate-limits\", tags=PUBLIC_API_TAGS)\n\n\n\"\"\"\nGroup Token Limit Settings\n\"\"\"\n\n\n@router.get(\"/user-groups\")\ndef get_all_group_token_limit_settings(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, list[TokenRateLimitDisplay]]:\n    user_groups_to_token_rate_limits = fetch_all_user_group_token_rate_limits_by_group(\n        db_session\n    )\n\n    token_rate_limits_by_group = defaultdict(list)\n    for token_rate_limit, group_name in user_groups_to_token_rate_limits:\n        token_rate_limits_by_group[group_name].append(\n            TokenRateLimitDisplay.from_db(token_rate_limit)\n        )\n\n    return dict(token_rate_limits_by_group)\n\n\n@router.get(\"/user-group/{group_id}\")\ndef get_group_token_limit_settings(\n    group_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[TokenRateLimitDisplay]:\n    return [\n        TokenRateLimitDisplay.from_db(token_rate_limit)\n        for token_rate_limit in fetch_user_group_token_rate_limits_for_user(\n            db_session=db_session,\n            group_id=group_id,\n            user=user,\n        )\n    ]\n\n\n@router.post(\"/user-group/{group_id}\")\ndef create_group_token_limit_settings(\n    group_id: int,\n    token_limit_settings: TokenRateLimitArgs,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> TokenRateLimitDisplay:\n    rate_limit_display = TokenRateLimitDisplay.from_db(\n        insert_user_group_token_rate_limit(\n            db_session=db_session,\n            token_rate_limit_settings=token_limit_settings,\n            group_id=group_id,\n        )\n    )\n    # clear cache in case this was the first rate limit created\n    any_rate_limit_exists.cache_clear()\n    return rate_limit_display\n\n\n\"\"\"\nUser Token Limit Settings\n\"\"\"\n\n\n@router.get(\"/users\")\ndef get_user_token_limit_settings(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[TokenRateLimitDisplay]:\n    return [\n        TokenRateLimitDisplay.from_db(token_rate_limit)\n        for token_rate_limit in fetch_all_user_token_rate_limits(db_session)\n    ]\n\n\n@router.post(\"/users\")\ndef create_user_token_limit_settings(\n    token_limit_settings: TokenRateLimitArgs,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> TokenRateLimitDisplay:\n    rate_limit_display = TokenRateLimitDisplay.from_db(\n        insert_user_token_rate_limit(db_session, token_limit_settings)\n    )\n    # clear cache in case this was the first rate limit created\n    any_rate_limit_exists.cache_clear()\n    return rate_limit_display\n"
  },
  {
    "path": "backend/ee/onyx/server/usage_limits.py",
    "content": "\"\"\"EE Usage limits - trial detection via billing information.\"\"\"\n\nfrom ee.onyx.server.tenants.billing import fetch_billing_information\nfrom ee.onyx.server.tenants.models import BillingInformation\nfrom ee.onyx.server.tenants.models import SubscriptionStatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef is_tenant_on_trial(tenant_id: str) -> bool:\n    \"\"\"\n    Determine if a tenant is currently on a trial subscription.\n\n    In multi-tenant mode, we fetch billing information from the control plane\n    to determine if the tenant has an active trial.\n    \"\"\"\n    if not MULTI_TENANT:\n        return False\n\n    try:\n        billing_info = fetch_billing_information(tenant_id)\n\n        # If not subscribed at all, check if we have trial information\n        if isinstance(billing_info, SubscriptionStatusResponse):\n            # No subscription means they're likely on trial (new tenant)\n            return True\n\n        if isinstance(billing_info, BillingInformation):\n            return billing_info.status == \"trialing\"\n\n        return False\n\n    except Exception as e:\n        logger.warning(f\"Failed to fetch billing info for trial check: {e}\")\n        # Default to trial limits on error (more restrictive = safer)\n        return True\n"
  },
  {
    "path": "backend/ee/onyx/server/user_group/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.persona import update_persona_access\nfrom ee.onyx.db.user_group import add_users_to_user_group\nfrom ee.onyx.db.user_group import delete_user_group as db_delete_user_group\nfrom ee.onyx.db.user_group import fetch_user_group\nfrom ee.onyx.db.user_group import fetch_user_groups\nfrom ee.onyx.db.user_group import fetch_user_groups_for_user\nfrom ee.onyx.db.user_group import insert_user_group\nfrom ee.onyx.db.user_group import prepare_user_group_for_deletion\nfrom ee.onyx.db.user_group import rename_user_group\nfrom ee.onyx.db.user_group import update_user_curator_relationship\nfrom ee.onyx.db.user_group import update_user_group\nfrom ee.onyx.server.user_group.models import AddUsersToUserGroupRequest\nfrom ee.onyx.server.user_group.models import MinimalUserGroupSnapshot\nfrom ee.onyx.server.user_group.models import SetCuratorRequest\nfrom ee.onyx.server.user_group.models import UpdateGroupAgentsRequest\nfrom ee.onyx.server.user_group.models import UserGroup\nfrom ee.onyx.server.user_group.models import UserGroupCreate\nfrom ee.onyx.server.user_group.models import UserGroupRename\nfrom ee.onyx.server.user_group.models import UserGroupUpdate\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/manage\", tags=PUBLIC_API_TAGS)\n\n\n@router.get(\"/admin/user-group\")\ndef list_user_groups(\n    include_default: bool = False,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserGroup]:\n    if user.role == UserRole.ADMIN:\n        user_groups = fetch_user_groups(\n            db_session,\n            only_up_to_date=False,\n            eager_load_for_snapshot=True,\n            include_default=include_default,\n        )\n    else:\n        user_groups = fetch_user_groups_for_user(\n            db_session=db_session,\n            user_id=user.id,\n            only_curator_groups=user.role == UserRole.CURATOR,\n            eager_load_for_snapshot=True,\n            include_default=include_default,\n        )\n    return [UserGroup.from_model(user_group) for user_group in user_groups]\n\n\n@router.get(\"/user-groups/minimal\")\ndef list_minimal_user_groups(\n    include_default: bool = False,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[MinimalUserGroupSnapshot]:\n    if user.role == UserRole.ADMIN:\n        user_groups = fetch_user_groups(\n            db_session,\n            only_up_to_date=False,\n            include_default=include_default,\n        )\n    else:\n        user_groups = fetch_user_groups_for_user(\n            db_session=db_session,\n            user_id=user.id,\n            include_default=include_default,\n        )\n    return [\n        MinimalUserGroupSnapshot.from_model(user_group) for user_group in user_groups\n    ]\n\n\n@router.get(\"/admin/user-group/{user_group_id}/permissions\")\ndef get_user_group_permissions(\n    user_group_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[str]:\n    group = fetch_user_group(db_session, user_group_id)\n    if group is None:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, \"User group not found\")\n    return [\n        grant.permission.value\n        for grant in group.permission_grants\n        if not grant.is_deleted\n    ]\n\n\n@router.post(\"/admin/user-group\")\ndef create_user_group(\n    user_group: UserGroupCreate,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> UserGroup:\n    try:\n        db_user_group = insert_user_group(db_session, user_group)\n    except IntegrityError:\n        raise HTTPException(\n            400,\n            f\"User group with name '{user_group.name}' already exists. Please \"\n            + \"choose a different name.\",\n        )\n    return UserGroup.from_model(db_user_group)\n\n\n@router.patch(\"/admin/user-group/rename\")\ndef rename_user_group_endpoint(\n    rename_request: UserGroupRename,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> UserGroup:\n    group = fetch_user_group(db_session, rename_request.id)\n    if group and group.is_default:\n        raise OnyxError(OnyxErrorCode.CONFLICT, \"Cannot rename a default system group.\")\n    try:\n        return UserGroup.from_model(\n            rename_user_group(\n                db_session=db_session,\n                user_group_id=rename_request.id,\n                new_name=rename_request.name,\n            )\n        )\n    except IntegrityError:\n        raise OnyxError(\n            OnyxErrorCode.DUPLICATE_RESOURCE,\n            f\"User group with name '{rename_request.name}' already exists.\",\n        )\n    except ValueError as e:\n        msg = str(e)\n        if \"not found\" in msg.lower():\n            raise OnyxError(OnyxErrorCode.NOT_FOUND, msg)\n        raise OnyxError(OnyxErrorCode.CONFLICT, msg)\n\n\n@router.patch(\"/admin/user-group/{user_group_id}\")\ndef patch_user_group(\n    user_group_id: int,\n    user_group_update: UserGroupUpdate,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> UserGroup:\n    try:\n        return UserGroup.from_model(\n            update_user_group(\n                db_session=db_session,\n                user=user,\n                user_group_id=user_group_id,\n                user_group_update=user_group_update,\n            )\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@router.post(\"/admin/user-group/{user_group_id}/add-users\")\ndef add_users(\n    user_group_id: int,\n    add_users_request: AddUsersToUserGroupRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> UserGroup:\n    try:\n        return UserGroup.from_model(\n            add_users_to_user_group(\n                db_session=db_session,\n                user=user,\n                user_group_id=user_group_id,\n                user_ids=add_users_request.user_ids,\n            )\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@router.post(\"/admin/user-group/{user_group_id}/set-curator\")\ndef set_user_curator(\n    user_group_id: int,\n    set_curator_request: SetCuratorRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        update_user_curator_relationship(\n            db_session=db_session,\n            user_group_id=user_group_id,\n            set_curator_request=set_curator_request,\n            user_making_change=user,\n        )\n    except ValueError as e:\n        logger.error(f\"Error setting user curator: {e}\")\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@router.delete(\"/admin/user-group/{user_group_id}\")\ndef delete_user_group(\n    user_group_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    group = fetch_user_group(db_session, user_group_id)\n    if group and group.is_default:\n        raise OnyxError(OnyxErrorCode.CONFLICT, \"Cannot delete a default system group.\")\n    try:\n        prepare_user_group_for_deletion(db_session, user_group_id)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n    if DISABLE_VECTOR_DB:\n        user_group = fetch_user_group(db_session, user_group_id)\n        if user_group:\n            db_delete_user_group(db_session, user_group)\n\n\n@router.patch(\"/admin/user-group/{user_group_id}/agents\")\ndef update_group_agents(\n    user_group_id: int,\n    request: UpdateGroupAgentsRequest,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    for agent_id in request.added_agent_ids:\n        persona = get_persona_by_id(\n            persona_id=agent_id, user=user, db_session=db_session\n        )\n        current_group_ids = [g.id for g in persona.groups]\n        if user_group_id not in current_group_ids:\n            update_persona_access(\n                persona_id=agent_id,\n                creator_user_id=user.id,\n                db_session=db_session,\n                group_ids=current_group_ids + [user_group_id],\n            )\n\n    for agent_id in request.removed_agent_ids:\n        persona = get_persona_by_id(\n            persona_id=agent_id, user=user, db_session=db_session\n        )\n        current_group_ids = [g.id for g in persona.groups]\n        update_persona_access(\n            persona_id=agent_id,\n            creator_user_id=user.id,\n            db_session=db_session,\n            group_ids=[gid for gid in current_group_ids if gid != user_group_id],\n        )\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/ee/onyx/server/user_group/models.py",
    "content": "from uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.models import UserGroup as UserGroupModel\nfrom onyx.server.documents.models import ConnectorCredentialPairDescriptor\nfrom onyx.server.documents.models import ConnectorSnapshot\nfrom onyx.server.documents.models import CredentialSnapshot\nfrom onyx.server.features.document_set.models import DocumentSet\nfrom onyx.server.features.persona.models import PersonaSnapshot\nfrom onyx.server.manage.models import UserInfo\nfrom onyx.server.manage.models import UserPreferences\n\n\nclass UserGroup(BaseModel):\n    id: int\n    name: str\n    users: list[UserInfo]\n    curator_ids: list[UUID]\n    cc_pairs: list[ConnectorCredentialPairDescriptor]\n    document_sets: list[DocumentSet]\n    personas: list[PersonaSnapshot]\n    is_up_to_date: bool\n    is_up_for_deletion: bool\n    is_default: bool\n\n    @classmethod\n    def from_model(cls, user_group_model: UserGroupModel) -> \"UserGroup\":\n        return cls(\n            id=user_group_model.id,\n            name=user_group_model.name,\n            users=[\n                UserInfo(\n                    id=str(user.id),\n                    email=user.email,\n                    is_active=user.is_active,\n                    is_superuser=user.is_superuser,\n                    is_verified=user.is_verified,\n                    role=user.role,\n                    preferences=UserPreferences(\n                        default_model=user.default_model,\n                        chosen_assistants=user.chosen_assistants,\n                    ),\n                )\n                for user in user_group_model.users\n            ],\n            curator_ids=[\n                user.user_id\n                for user in user_group_model.user_group_relationships\n                if user.is_curator and user.user_id is not None\n            ],\n            cc_pairs=[\n                ConnectorCredentialPairDescriptor(\n                    id=cc_pair_relationship.cc_pair.id,\n                    name=cc_pair_relationship.cc_pair.name,\n                    connector=ConnectorSnapshot.from_connector_db_model(\n                        cc_pair_relationship.cc_pair.connector,\n                        credential_ids=[cc_pair_relationship.cc_pair.credential_id],\n                    ),\n                    credential=CredentialSnapshot.from_credential_db_model(\n                        cc_pair_relationship.cc_pair.credential\n                    ),\n                    access_type=cc_pair_relationship.cc_pair.access_type,\n                )\n                for cc_pair_relationship in user_group_model.cc_pair_relationships\n                if cc_pair_relationship.is_current\n            ],\n            document_sets=[\n                DocumentSet.from_model(ds) for ds in user_group_model.document_sets\n            ],\n            personas=[\n                PersonaSnapshot.from_model(persona)\n                for persona in user_group_model.personas\n                if not persona.deleted\n            ],\n            is_up_to_date=user_group_model.is_up_to_date,\n            is_up_for_deletion=user_group_model.is_up_for_deletion,\n            is_default=user_group_model.is_default,\n        )\n\n\nclass MinimalUserGroupSnapshot(BaseModel):\n    id: int\n    name: str\n    is_default: bool\n\n    @classmethod\n    def from_model(cls, user_group_model: UserGroupModel) -> \"MinimalUserGroupSnapshot\":\n        return cls(\n            id=user_group_model.id,\n            name=user_group_model.name,\n            is_default=user_group_model.is_default,\n        )\n\n\nclass UserGroupCreate(BaseModel):\n    name: str\n    user_ids: list[UUID]\n    cc_pair_ids: list[int]\n\n\nclass UserGroupUpdate(BaseModel):\n    user_ids: list[UUID]\n    cc_pair_ids: list[int]\n\n\nclass AddUsersToUserGroupRequest(BaseModel):\n    user_ids: list[UUID]\n\n\nclass UserGroupRename(BaseModel):\n    id: int\n    name: str\n\n\nclass SetCuratorRequest(BaseModel):\n    user_id: UUID\n    is_curator: bool\n\n\nclass UpdateGroupAgentsRequest(BaseModel):\n    added_agent_ids: list[int]\n    removed_agent_ids: list[int]\n"
  },
  {
    "path": "backend/ee/onyx/utils/__init__.py",
    "content": ""
  },
  {
    "path": "backend/ee/onyx/utils/encryption.py",
    "content": "from functools import lru_cache\nfrom os import urandom\n\nfrom cryptography.hazmat.backends import default_backend\nfrom cryptography.hazmat.primitives import padding\nfrom cryptography.hazmat.primitives.ciphers import algorithms\nfrom cryptography.hazmat.primitives.ciphers import Cipher\nfrom cryptography.hazmat.primitives.ciphers import modes\n\nfrom onyx.configs.app_configs import ENCRYPTION_KEY_SECRET\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n\n@lru_cache(maxsize=2)\ndef _get_trimmed_key(key: str) -> bytes:\n    encoded_key = key.encode()\n    key_length = len(encoded_key)\n    if key_length < 16:\n        raise RuntimeError(\"Invalid ENCRYPTION_KEY_SECRET - too short\")\n\n    # Trim to the largest valid AES key size that fits\n    valid_lengths = [32, 24, 16]\n    for size in valid_lengths:\n        if key_length >= size:\n            return encoded_key[:size]\n\n    raise AssertionError(\"unreachable\")\n\n\ndef _encrypt_string(input_str: str, key: str | None = None) -> bytes:\n    effective_key = key if key is not None else ENCRYPTION_KEY_SECRET\n    if not effective_key:\n        return input_str.encode()\n\n    trimmed = _get_trimmed_key(effective_key)\n    iv = urandom(16)\n    padder = padding.PKCS7(algorithms.AES.block_size).padder()\n    padded_data = padder.update(input_str.encode()) + padder.finalize()\n\n    cipher = Cipher(algorithms.AES(trimmed), modes.CBC(iv), backend=default_backend())\n    encryptor = cipher.encryptor()\n    encrypted_data = encryptor.update(padded_data) + encryptor.finalize()\n\n    return iv + encrypted_data\n\n\ndef _decrypt_bytes(input_bytes: bytes, key: str | None = None) -> str:\n    effective_key = key if key is not None else ENCRYPTION_KEY_SECRET\n    if not effective_key:\n        return input_bytes.decode()\n\n    trimmed = _get_trimmed_key(effective_key)\n    try:\n        iv = input_bytes[:16]\n        encrypted_data = input_bytes[16:]\n\n        cipher = Cipher(\n            algorithms.AES(trimmed), modes.CBC(iv), backend=default_backend()\n        )\n        decryptor = cipher.decryptor()\n        decrypted_padded_data = decryptor.update(encrypted_data) + decryptor.finalize()\n\n        unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()\n        decrypted_data = unpadder.update(decrypted_padded_data) + unpadder.finalize()\n\n        return decrypted_data.decode()\n    except (ValueError, UnicodeDecodeError):\n        if key is not None:\n            # Explicit key was provided — don't fall back silently\n            raise\n        # Read path: attempt raw UTF-8 decode as a fallback for legacy data.\n        # Does NOT handle data encrypted with a different key — that\n        # ciphertext is not valid UTF-8 and will raise below.\n        logger.warning(\n            \"AES decryption failed — falling back to raw decode. Run the re-encrypt secrets script to rotate to the current key.\"\n        )\n        try:\n            return input_bytes.decode()\n        except UnicodeDecodeError:\n            raise ValueError(\n                \"Data is not valid UTF-8 — likely encrypted with a different key. \"\n                \"Run the re-encrypt secrets script to rotate to the current key.\"\n            ) from None\n\n\ndef encrypt_string_to_bytes(input_str: str, key: str | None = None) -> bytes:\n    versioned_encryption_fn = fetch_versioned_implementation(\n        \"onyx.utils.encryption\", \"_encrypt_string\"\n    )\n    return versioned_encryption_fn(input_str, key=key)\n\n\ndef decrypt_bytes_to_string(input_bytes: bytes, key: str | None = None) -> str:\n    versioned_decryption_fn = fetch_versioned_implementation(\n        \"onyx.utils.encryption\", \"_decrypt_bytes\"\n    )\n    return versioned_decryption_fn(input_bytes, key=key)\n\n\ndef test_encryption() -> None:\n    test_string = \"Onyx is the BEST!\"\n    encrypted_bytes = encrypt_string_to_bytes(test_string)\n    decrypted_string = decrypt_bytes_to_string(encrypted_bytes)\n    if test_string != decrypted_string:\n        raise RuntimeError(\"Encryption decryption test failed\")\n"
  },
  {
    "path": "backend/ee/onyx/utils/license.py",
    "content": "\"\"\"RSA-4096 license signature verification utilities.\"\"\"\n\nimport base64\nimport json\nimport os\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\n\nfrom cryptography.exceptions import InvalidSignature\nfrom cryptography.hazmat.primitives import hashes\nfrom cryptography.hazmat.primitives import serialization\nfrom cryptography.hazmat.primitives.asymmetric import padding\nfrom cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey\n\nfrom ee.onyx.server.license.models import LicenseData\nfrom ee.onyx.server.license.models import LicensePayload\nfrom onyx.server.settings.models import ApplicationStatus\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Path to the license public key file\n_LICENSE_PUBLIC_KEY_PATH = (\n    Path(__file__).parent.parent.parent.parent / \"keys\" / \"license_public_key.pem\"\n)\n\n\ndef _get_public_key() -> RSAPublicKey:\n    \"\"\"Load the public key from file, with env var override.\"\"\"\n    # Allow env var override for flexibility\n    key_pem = os.environ.get(\"LICENSE_PUBLIC_KEY_PEM\")\n\n    if not key_pem:\n        # Read from file\n        if not _LICENSE_PUBLIC_KEY_PATH.exists():\n            raise ValueError(\n                f\"License public key not found at {_LICENSE_PUBLIC_KEY_PATH}. \"\n                \"License verification requires the control plane public key.\"\n            )\n        key_pem = _LICENSE_PUBLIC_KEY_PATH.read_text()\n\n    key = serialization.load_pem_public_key(key_pem.encode())\n    if not isinstance(key, RSAPublicKey):\n        raise ValueError(\"Expected RSA public key\")\n    return key\n\n\ndef verify_license_signature(license_data: str) -> LicensePayload:\n    \"\"\"\n    Verify RSA-4096 signature and return payload if valid.\n\n    Args:\n        license_data: Base64-encoded JSON containing payload and signature\n\n    Returns:\n        LicensePayload if signature is valid\n\n    Raises:\n        ValueError: If license data is invalid or signature verification fails\n    \"\"\"\n    try:\n        decoded = json.loads(base64.b64decode(license_data))\n\n        # Parse into LicenseData to validate structure\n        license_obj = LicenseData(**decoded)\n\n        # IMPORTANT: Use the ORIGINAL payload JSON for signature verification,\n        # not re-serialized through Pydantic. Pydantic may format fields differently\n        # (e.g., datetime \"+00:00\" vs \"Z\") which would break signature verification.\n        original_payload = decoded.get(\"payload\", {})\n        payload_json = json.dumps(original_payload, sort_keys=True)\n        signature_bytes = base64.b64decode(license_obj.signature)\n\n        # Verify signature using PSS padding (modern standard)\n        public_key = _get_public_key()\n\n        public_key.verify(\n            signature_bytes,\n            payload_json.encode(),\n            padding.PSS(\n                mgf=padding.MGF1(hashes.SHA256()),\n                salt_length=padding.PSS.MAX_LENGTH,\n            ),\n            hashes.SHA256(),\n        )\n\n        return license_obj.payload\n\n    except InvalidSignature:\n        logger.error(\"[verify_license] FAILED: Signature verification failed\")\n        raise ValueError(\"Invalid license signature\")\n    except json.JSONDecodeError as e:\n        logger.error(f\"[verify_license] FAILED: JSON decode error: {e}\")\n        raise ValueError(\"Invalid license format: not valid JSON\")\n    except (ValueError, KeyError, TypeError) as e:\n        logger.error(\n            f\"[verify_license] FAILED: Validation error: {type(e).__name__}: {e}\"\n        )\n        raise ValueError(f\"Invalid license format: {type(e).__name__}: {e}\")\n    except Exception:\n        logger.exception(\"[verify_license] FAILED: Unexpected error\")\n        raise ValueError(\"License verification failed: unexpected error\")\n\n\ndef get_license_status(\n    payload: LicensePayload,\n    grace_period_end: datetime | None = None,\n) -> ApplicationStatus:\n    \"\"\"\n    Determine current license status based on expiry.\n\n    Args:\n        payload: The verified license payload\n        grace_period_end: Optional grace period end datetime\n\n    Returns:\n        ApplicationStatus indicating current license state\n    \"\"\"\n    now = datetime.now(timezone.utc)\n\n    # Check if grace period has expired\n    if grace_period_end and now > grace_period_end:\n        return ApplicationStatus.GATED_ACCESS\n\n    # Check if license has expired\n    if now > payload.expires_at:\n        if grace_period_end and now <= grace_period_end:\n            return ApplicationStatus.GRACE_PERIOD\n        return ApplicationStatus.GATED_ACCESS\n\n    # License is valid\n    return ApplicationStatus.ACTIVE\n\n\ndef is_license_valid(payload: LicensePayload) -> bool:\n    \"\"\"Check if a license is currently valid (not expired).\"\"\"\n    now = datetime.now(timezone.utc)\n    return now <= payload.expires_at\n"
  },
  {
    "path": "backend/ee/onyx/utils/posthog_client.py",
    "content": "import json\nfrom typing import Any\nfrom urllib.parse import unquote\n\nfrom posthog import Posthog\n\nfrom ee.onyx.configs.app_configs import MARKETING_POSTHOG_API_KEY\nfrom ee.onyx.configs.app_configs import POSTHOG_API_KEY\nfrom ee.onyx.configs.app_configs import POSTHOG_DEBUG_LOGS_ENABLED\nfrom ee.onyx.configs.app_configs import POSTHOG_HOST\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef posthog_on_error(error: Any, items: Any) -> None:\n    \"\"\"Log any PostHog delivery errors.\"\"\"\n    logger.error(f\"PostHog error: {error}, items: {items}\")\n\n\nposthog: Posthog | None = None\nif POSTHOG_API_KEY:\n    posthog = Posthog(\n        project_api_key=POSTHOG_API_KEY,\n        host=POSTHOG_HOST,\n        debug=POSTHOG_DEBUG_LOGS_ENABLED,\n        on_error=posthog_on_error,\n    )\nelif MULTI_TENANT:\n    logger.warning(\n        \"POSTHOG_API_KEY is not set but MULTI_TENANT is enabled — \"\n        \"PostHog telemetry and feature flags will be disabled\"\n    )\n\n# For cross referencing between cloud and www Onyx sites\n# NOTE: These clients are separate because they are separate posthog projects.\n# We should eventually unify them into a single posthog project,\n# which would no longer require this workaround\nmarketing_posthog = None\nif MARKETING_POSTHOG_API_KEY:\n    marketing_posthog = Posthog(\n        project_api_key=MARKETING_POSTHOG_API_KEY,\n        host=POSTHOG_HOST,\n        debug=POSTHOG_DEBUG_LOGS_ENABLED,\n        on_error=posthog_on_error,\n    )\n\n\ndef capture_and_sync_with_alternate_posthog(\n    alternate_distinct_id: str, event: str, properties: dict[str, Any]\n) -> None:\n    \"\"\"\n    Identify in both PostHog projects and capture the event in marketing.\n    - Marketing keeps the marketing distinct_id (for feature flags).\n    - Cloud identify uses the cloud distinct_id\n    \"\"\"\n    if not marketing_posthog:\n        return\n\n    props = properties.copy()\n\n    try:\n        marketing_posthog.identify(distinct_id=alternate_distinct_id, properties=props)\n        marketing_posthog.capture(alternate_distinct_id, event, props)\n        marketing_posthog.flush()\n    except Exception as e:\n        logger.error(f\"Error capturing marketing posthog event: {e}\")\n\n    try:\n        if posthog and (cloud_user_id := props.get(\"onyx_cloud_user_id\")):\n            cloud_props = props.copy()\n            cloud_props.pop(\"onyx_cloud_user_id\", None)\n\n            posthog.identify(\n                distinct_id=cloud_user_id,\n                properties=cloud_props,\n            )\n    except Exception as e:\n        logger.error(f\"Error identifying cloud posthog user: {e}\")\n\n\ndef alias_user(distinct_id: str, anonymous_id: str) -> None:\n    \"\"\"Link an anonymous distinct_id to an identified user, merging person profiles.\n\n    No-ops when the IDs match (e.g. returning users whose PostHog cookie\n    already contains their identified user ID).\n    \"\"\"\n    if not posthog or anonymous_id == distinct_id:\n        return\n\n    try:\n        posthog.alias(previous_id=anonymous_id, distinct_id=distinct_id)\n        posthog.flush()\n    except Exception as e:\n        logger.error(f\"Error aliasing PostHog user: {e}\")\n\n\ndef get_anon_id_from_request(request: Any) -> str | None:\n    \"\"\"Extract the anonymous distinct_id from the app PostHog cookie on a request.\"\"\"\n    if not POSTHOG_API_KEY:\n        return None\n\n    cookie_name = f\"ph_{POSTHOG_API_KEY}_posthog\"\n    if (cookie_value := request.cookies.get(cookie_name)) and (\n        parsed := parse_posthog_cookie(cookie_value)\n    ):\n        return parsed.get(\"distinct_id\")\n\n    return None\n\n\ndef get_marketing_posthog_cookie_name() -> str | None:\n    if not MARKETING_POSTHOG_API_KEY:\n        return None\n    return f\"onyx_custom_ph_{MARKETING_POSTHOG_API_KEY}_posthog\"\n\n\ndef parse_posthog_cookie(cookie_value: str) -> dict[str, Any] | None:\n    \"\"\"\n    Parse a URL-encoded JSON PostHog cookie\n\n    Expected format (URL-encoded):\n    {\"distinct_id\":\"...\", \"featureFlags\":{\"landing_page_variant\":\"...\"}, ...}\n\n    Returns:\n        Dict with 'distinct_id' explicitly required and all other cookie values\n        passed through as-is, or None if parsing fails or distinct_id is missing.\n    \"\"\"\n    try:\n        decoded_cookie = unquote(cookie_value)\n        cookie_data = json.loads(decoded_cookie)\n\n        distinct_id = cookie_data.get(\"distinct_id\")\n        if not distinct_id or not isinstance(distinct_id, str):\n            return None\n\n        return cookie_data\n    except (json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:\n        logger.warning(f\"Failed to parse cookie: {e}\")\n        return None\n"
  },
  {
    "path": "backend/ee/onyx/utils/telemetry.py",
    "content": "from typing import Any\n\nfrom ee.onyx.utils.posthog_client import posthog\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef event_telemetry(\n    distinct_id: str, event: str, properties: dict[str, Any] | None = None\n) -> None:\n    \"\"\"Capture and send an event to PostHog, flushing immediately.\"\"\"\n    if not posthog:\n        return\n\n    logger.info(f\"Capturing PostHog event: {distinct_id} {event} {properties}\")\n    try:\n        posthog.capture(distinct_id, event, properties)\n        posthog.flush()\n    except Exception as e:\n        logger.error(f\"Error capturing PostHog event: {e}\")\n\n\ndef identify_user(distinct_id: str, properties: dict[str, Any] | None = None) -> None:\n    \"\"\"Create/update a PostHog person profile, flushing immediately.\"\"\"\n    if not posthog:\n        return\n\n    try:\n        posthog.identify(distinct_id, properties)\n        posthog.flush()\n    except Exception as e:\n        logger.error(f\"Error identifying PostHog user: {e}\")\n"
  },
  {
    "path": "backend/generated/README.md",
    "content": "- Generated Files\n* Generated files live here. This directory should be git ignored."
  },
  {
    "path": "backend/keys/license_public_key.pem",
    "content": "-----BEGIN PUBLIC KEY-----\nMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA5DpchQujdxjCwpc4/RQP\nHej6rc3SS/5ENCXL0I8NAfMogel0fqG6PKRhonyEh/Bt3P4q18y8vYzAShwf4b6Q\naS0WwshbvnkjyWlsK0BY4HLBKPkTpes7kaz8MwmPZDeelvGJ7SNv3FvyJR4QsoSQ\nGSoB5iTH7hi63TjzdxtckkXoNG+GdVd/koxVDUv2uWcAoWIFTTcbKWyuq2SS/5Sf\nxdVaIArqfAhLpnNbnM9OS7lZ1xP+29ZXpHxDoeluz35tJLMNBYn9u0y+puo1kW1E\nTOGizlAq5kmEMsTJ55e9ZuyIV3gZAUaUKe8CxYJPkOGt0Gj6e1jHoHZCBJmaq97Y\nstKj//84HNBzajaryEZuEfRecJ94ANEjkD8u9cGmW+9VxRe5544zWguP5WMT/nv1\n0Q+jkOBW2hkY5SS0Rug4cblxiB7bDymWkaX6+sC0VWd5g6WXp36EuP2T0v3mYuHU\nGDEiWbD44ToREPVwE/M07ny8qhLo/HYk2l8DKFt83hXe7ePBnyQdcsrVbQWOO1na\nj43OkoU5gOFyOkrk2RmmtCjA8jSnw+tGCTpRaRcshqoWC1MjZyU+8/kDteXNkmv9\n/B5VxzYSyX+abl7yAu5wLiUPW8l+mOazzWu0nPkmiA160ArxnRyxbGnmp4dUIrt5\nazYku4tQYLSsSabfhcpeiCsCAwEAAQ==\n-----END PUBLIC KEY-----\n"
  },
  {
    "path": "backend/model_server/__init__.py",
    "content": ""
  },
  {
    "path": "backend/model_server/constants.py",
    "content": "MODEL_WARM_UP_STRING = \"hi \" * 512\n\n\nclass GPUStatus:\n    CUDA = \"cuda\"\n    MAC_MPS = \"mps\"\n    NONE = \"none\"\n"
  },
  {
    "path": "backend/model_server/encoders.py",
    "content": "import asyncio\nimport time\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom fastapi import APIRouter\nfrom fastapi import HTTPException\nfrom fastapi import Request\n\nfrom model_server.utils import simple_log_function_time\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.model_server_models import Embedding\nfrom shared_configs.model_server_models import EmbedRequest\nfrom shared_configs.model_server_models import EmbedResponse\n\nif TYPE_CHECKING:\n    from sentence_transformers import SentenceTransformer\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/encoder\")\n\n\n_GLOBAL_MODELS_DICT: dict[str, \"SentenceTransformer\"] = {}\n\n\ndef get_embedding_model(\n    model_name: str,\n    max_context_length: int,\n) -> \"SentenceTransformer\":\n    \"\"\"\n    Loads or returns a cached SentenceTransformer, sets max_seq_length, pins device,\n    pre-warms rotary caches once, and wraps encode() with a lock to avoid cache races.\n    \"\"\"\n    from sentence_transformers import SentenceTransformer\n\n    def _prewarm_rope(st_model: \"SentenceTransformer\", target_len: int) -> None:\n        \"\"\"\n        Build RoPE cos/sin caches once on the final device/dtype so later forwards only read.\n        Works by calling the underlying HF model directly with dummy IDs/attention.\n        \"\"\"\n        try:\n            # ensure > max seq after tokenization\n            # Ideally we would use the saved tokenizer, but whatever it's ok\n            # we'll make an assumption about tokenization here\n            long_text = \"x \" * (target_len * 2)\n            _ = st_model.encode(\n                [long_text],\n                batch_size=1,\n                convert_to_tensor=True,\n                show_progress_bar=False,\n                normalize_embeddings=False,\n            )\n            logger.info(\"RoPE pre-warm successful\")\n        except Exception as e:\n            logger.warning(f\"RoPE pre-warm skipped/failed: {e}\")\n\n    global _GLOBAL_MODELS_DICT\n\n    if model_name not in _GLOBAL_MODELS_DICT:\n        logger.notice(f\"Loading {model_name}\")\n        model = SentenceTransformer(\n            model_name_or_path=model_name,\n            trust_remote_code=True,\n        )\n        model.max_seq_length = max_context_length\n        _prewarm_rope(model, max_context_length)\n        _GLOBAL_MODELS_DICT[model_name] = model\n    else:\n        model = _GLOBAL_MODELS_DICT[model_name]\n        if max_context_length != model.max_seq_length:\n            model.max_seq_length = max_context_length\n            prev = getattr(model, \"_rope_prewarmed_to\", 0)\n            if max_context_length > int(prev or 0):\n                _prewarm_rope(model, max_context_length)\n\n    return _GLOBAL_MODELS_DICT[model_name]\n\n\nENCODING_RETRIES = 3\nENCODING_RETRY_DELAY = 0.1\n\n\ndef _concurrent_embedding(\n    texts: list[str], model: \"SentenceTransformer\", normalize_embeddings: bool\n) -> Any:\n    \"\"\"Synchronous wrapper for concurrent_embedding to use with run_in_executor.\"\"\"\n    for _ in range(ENCODING_RETRIES):\n        try:\n            return model.encode(texts, normalize_embeddings=normalize_embeddings)\n        except RuntimeError as e:\n            # There is a concurrency bug in the SentenceTransformer library that causes\n            # the model to fail to encode texts. It's pretty rare and we want to allow\n            # concurrent embedding, hence we retry (the specific error is\n            # \"RuntimeError: Already borrowed\" and occurs in the transformers library)\n            logger.warning(f\"Error encoding texts, retrying: {e}\")\n            time.sleep(ENCODING_RETRY_DELAY)\n    return model.encode(texts, normalize_embeddings=normalize_embeddings)\n\n\n@simple_log_function_time()\nasync def embed_text(\n    texts: list[str],\n    model_name: str | None,\n    max_context_length: int,\n    normalize_embeddings: bool,\n    prefix: str | None,\n    gpu_type: str = \"UNKNOWN\",\n) -> list[Embedding]:\n    if not all(texts):\n        logger.error(\"Empty strings provided for embedding\")\n        raise ValueError(\"Empty strings are not allowed for embedding.\")\n\n    if not texts:\n        logger.error(\"No texts provided for embedding\")\n        raise ValueError(\"No texts provided for embedding.\")\n\n    start = time.monotonic()\n\n    total_chars = 0\n    for text in texts:\n        total_chars += len(text)\n\n    # Only local models should call this function now\n    # API providers should go directly to API server\n\n    if model_name is not None:\n        logger.info(\n            f\"Embedding {len(texts)} texts with {total_chars} total characters with local model: {model_name}\"\n        )\n\n        prefixed_texts = [f\"{prefix}{text}\" for text in texts] if prefix else texts\n\n        local_model = get_embedding_model(\n            model_name=model_name, max_context_length=max_context_length\n        )\n        # Run CPU-bound embedding in a thread pool\n        embeddings_vectors = await asyncio.get_event_loop().run_in_executor(\n            None,\n            lambda: _concurrent_embedding(\n                prefixed_texts, local_model, normalize_embeddings\n            ),\n        )\n        embeddings = [\n            embedding if isinstance(embedding, list) else embedding.tolist()\n            for embedding in embeddings_vectors\n        ]\n\n        elapsed = time.monotonic() - start\n        logger.info(\n            f\"Successfully embedded {len(texts)} texts with {total_chars} total characters \"\n            f\"with local model {model_name} in {elapsed:.2f}\"\n        )\n        logger.info(\n            f\"event=embedding_model \"\n            f\"texts={len(texts)} \"\n            f\"chars={total_chars} \"\n            f\"model={model_name} \"\n            f\"gpu={gpu_type} \"\n            f\"elapsed={elapsed:.2f}\"\n        )\n    else:\n        logger.error(\"Model name not specified for embedding\")\n        raise ValueError(\"Model name must be provided to run embeddings.\")\n\n    return embeddings\n\n\n@router.post(\"/bi-encoder-embed\")\nasync def route_bi_encoder_embed(\n    request: Request,\n    embed_request: EmbedRequest,\n) -> EmbedResponse:\n    return await process_embed_request(embed_request, request.app.state.gpu_type)\n\n\nasync def process_embed_request(\n    embed_request: EmbedRequest, gpu_type: str = \"UNKNOWN\"\n) -> EmbedResponse:\n    from litellm.exceptions import RateLimitError\n\n    # Only local models should use this endpoint - API providers should make direct API calls\n    if embed_request.provider_type is not None:\n        raise ValueError(\n            f\"Model server embedding endpoint should only be used for local models. \"\n            f\"API provider '{embed_request.provider_type}' should make direct API calls instead.\"\n        )\n\n    if not embed_request.texts:\n        raise HTTPException(status_code=400, detail=\"No texts to be embedded\")\n\n    if not all(embed_request.texts):\n        raise ValueError(\"Empty strings are not allowed for embedding.\")\n\n    try:\n        if embed_request.text_type == EmbedTextType.QUERY:\n            prefix = embed_request.manual_query_prefix\n        elif embed_request.text_type == EmbedTextType.PASSAGE:\n            prefix = embed_request.manual_passage_prefix\n        else:\n            prefix = None\n\n        embeddings = await embed_text(\n            texts=embed_request.texts,\n            model_name=embed_request.model_name,\n            max_context_length=embed_request.max_context_length,\n            normalize_embeddings=embed_request.normalize_embeddings,\n            prefix=prefix,\n            gpu_type=gpu_type,\n        )\n        return EmbedResponse(embeddings=embeddings)\n    except RateLimitError as e:\n        raise HTTPException(\n            status_code=429,\n            detail=str(e),\n        )\n    except Exception as e:\n        logger.exception(\n            f\"Error during embedding process: provider={embed_request.provider_type} model={embed_request.model_name}\"\n        )\n        raise HTTPException(\n            status_code=500, detail=f\"Error during embedding process: {e}\"\n        )\n"
  },
  {
    "path": "backend/model_server/legacy/README.md",
    "content": "This directory contains code that was useful and may become useful again in the future.\n\nWe stopped using rerankers because the state of the art rerankers are not significantly better than the biencoders and much worse than LLMs which are also capable of acting on a small set of documents for filtering, reranking, etc.\n\nWe stopped using the internal query classifier as that's now offloaded to the LLM which does query expansion so we know ahead of time if it's a keyword or semantic query.\n"
  },
  {
    "path": "backend/model_server/legacy/__init__.py",
    "content": ""
  },
  {
    "path": "backend/model_server/legacy/custom_models.py",
    "content": "# from typing import cast\n# from typing import Optional\n# from typing import TYPE_CHECKING\n\n# import numpy as np\n# import torch\n# import torch.nn.functional as F\n# from fastapi import APIRouter\n# from huggingface_hub import snapshot_download\n# from pydantic import BaseModel\n\n# from model_server.constants import MODEL_WARM_UP_STRING\n# from model_server.legacy.onyx_torch_model import ConnectorClassifier\n# from model_server.legacy.onyx_torch_model import HybridClassifier\n# from model_server.utils import simple_log_function_time\n# from onyx.utils.logger import setup_logger\n# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO\n# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG\n# from shared_configs.configs import INDEXING_ONLY\n# from shared_configs.configs import INTENT_MODEL_TAG\n# from shared_configs.configs import INTENT_MODEL_VERSION\n# from shared_configs.model_server_models import IntentRequest\n# from shared_configs.model_server_models import IntentResponse\n\n# if TYPE_CHECKING:\n#     from setfit import SetFitModel  # type: ignore[import-untyped]\n#     from transformers import PreTrainedTokenizer, BatchEncoding\n\n\n# INFORMATION_CONTENT_MODEL_WARM_UP_STRING = \"hi\" * 50\n\n# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = 1.0\n# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN = 0.7\n# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE = 4.0\n# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH = 10\n# INFORMATION_CONTENT_MODEL_VERSION = \"onyx-dot-app/information-content-model\"\n# INFORMATION_CONTENT_MODEL_TAG: str | None = None\n\n\n# class ConnectorClassificationRequest(BaseModel):\n#     available_connectors: list[str]\n#     query: str\n\n\n# class ConnectorClassificationResponse(BaseModel):\n#     connectors: list[str]\n\n\n# class ContentClassificationPrediction(BaseModel):\n#     predicted_label: int\n#     content_boost_factor: float\n\n\n# logger = setup_logger()\n\n# router = APIRouter(prefix=\"/custom\")\n\n# _CONNECTOR_CLASSIFIER_TOKENIZER: Optional[\"PreTrainedTokenizer\"] = None\n# _CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None\n\n# _INTENT_TOKENIZER: Optional[\"PreTrainedTokenizer\"] = None\n# _INTENT_MODEL: HybridClassifier | None = None\n\n# _INFORMATION_CONTENT_MODEL: Optional[\"SetFitModel\"] = None\n\n# _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = \"\"  # spec to model version!\n\n\n# def get_connector_classifier_tokenizer() -> \"PreTrainedTokenizer\":\n#     global _CONNECTOR_CLASSIFIER_TOKENIZER\n#     from transformers import AutoTokenizer, PreTrainedTokenizer\n\n#     if _CONNECTOR_CLASSIFIER_TOKENIZER is None:\n#         # The tokenizer details are not uploaded to the HF hub since it's just the\n#         # unmodified distilbert tokenizer.\n#         _CONNECTOR_CLASSIFIER_TOKENIZER = cast(\n#             PreTrainedTokenizer,\n#             AutoTokenizer.from_pretrained(\"distilbert-base-uncased\"),\n#         )\n#     return _CONNECTOR_CLASSIFIER_TOKENIZER\n\n\n# def get_local_connector_classifier(\n#     model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,\n#     tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,\n# ) -> ConnectorClassifier:\n#     global _CONNECTOR_CLASSIFIER_MODEL\n#     if _CONNECTOR_CLASSIFIER_MODEL is None:\n#         try:\n#             # Calculate where the cache should be, then load from local if available\n#             local_path = snapshot_download(\n#                 repo_id=model_name_or_path, revision=tag, local_files_only=True\n#             )\n#             _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(\n#                 local_path\n#             )\n#         except Exception as e:\n#             logger.warning(f\"Failed to load model directly: {e}\")\n#             try:\n#                 # Attempt to download the model snapshot\n#                 logger.info(f\"Downloading model snapshot for {model_name_or_path}\")\n#                 local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)\n#                 _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(\n#                     local_path\n#                 )\n#             except Exception as e:\n#                 logger.error(\n#                     f\"Failed to load model even after attempted snapshot download: {e}\"\n#                 )\n#                 raise\n#     return _CONNECTOR_CLASSIFIER_MODEL\n\n\n# def get_intent_model_tokenizer() -> \"PreTrainedTokenizer\":\n#     from transformers import AutoTokenizer, PreTrainedTokenizer\n\n#     global _INTENT_TOKENIZER\n#     if _INTENT_TOKENIZER is None:\n#         # The tokenizer details are not uploaded to the HF hub since it's just the\n#         # unmodified distilbert tokenizer.\n#         _INTENT_TOKENIZER = cast(\n#             PreTrainedTokenizer,\n#             AutoTokenizer.from_pretrained(\"distilbert-base-uncased\"),\n#         )\n#     return _INTENT_TOKENIZER\n\n\n# def get_local_intent_model(\n#     model_name_or_path: str = INTENT_MODEL_VERSION,\n#     tag: str | None = INTENT_MODEL_TAG,\n# ) -> HybridClassifier:\n#     global _INTENT_MODEL\n#     if _INTENT_MODEL is None:\n#         try:\n#             # Calculate where the cache should be, then load from local if available\n#             logger.notice(f\"Loading model from local cache: {model_name_or_path}\")\n#             local_path = snapshot_download(\n#                 repo_id=model_name_or_path, revision=tag, local_files_only=True\n#             )\n#             _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)\n#             logger.notice(f\"Loaded model from local cache: {local_path}\")\n#         except Exception as e:\n#             logger.warning(f\"Failed to load model directly: {e}\")\n#             try:\n#                 # Attempt to download the model snapshot\n#                 logger.notice(f\"Downloading model snapshot for {model_name_or_path}\")\n#                 local_path = snapshot_download(\n#                     repo_id=model_name_or_path, revision=tag, local_files_only=False\n#                 )\n#                 _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)\n#             except Exception as e:\n#                 logger.error(\n#                     f\"Failed to load model even after attempted snapshot download: {e}\"\n#                 )\n#                 raise\n#     return _INTENT_MODEL\n\n\n# def get_local_information_content_model(\n#     model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,\n#     tag: str | None = INFORMATION_CONTENT_MODEL_TAG,\n# ) -> \"SetFitModel\":\n#     from setfit import SetFitModel\n\n#     global _INFORMATION_CONTENT_MODEL\n#     if _INFORMATION_CONTENT_MODEL is None:\n#         try:\n#             # Calculate where the cache should be, then load from local if available\n#             logger.notice(\n#                 f\"Loading content information model from local cache: {model_name_or_path}\"\n#             )\n#             local_path = snapshot_download(\n#                 repo_id=model_name_or_path, revision=tag, local_files_only=True\n#             )\n#             _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)\n#             logger.notice(\n#                 f\"Loaded content information model from local cache: {local_path}\"\n#             )\n#         except Exception as e:\n#             logger.warning(f\"Failed to load content information model directly: {e}\")\n#             try:\n#                 # Attempt to download the model snapshot\n#                 logger.notice(\n#                     f\"Downloading content information model snapshot for {model_name_or_path}\"\n#                 )\n#                 local_path = snapshot_download(\n#                     repo_id=model_name_or_path, revision=tag, local_files_only=False\n#                 )\n#                 _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)\n#             except Exception as e:\n#                 logger.error(\n#                     f\"Failed to load content information model even after attempted snapshot download: {e}\"\n#                 )\n#                 raise\n\n#     return _INFORMATION_CONTENT_MODEL\n\n\n# def tokenize_connector_classification_query(\n#     connectors: list[str],\n#     query: str,\n#     tokenizer: \"PreTrainedTokenizer\",\n#     connector_token_end_id: int,\n# ) -> tuple[torch.Tensor, torch.Tensor]:\n#     \"\"\"\n#     Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models\n\n#     The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end\n#     token and then the user query.\n#     \"\"\"\n\n#     input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)\n\n#     for connector in connectors:\n#         connector_token_ids = tokenizer(\n#             connector,\n#             add_special_tokens=False,\n#             return_tensors=\"pt\",\n#         )\n\n#         input_ids = torch.cat(\n#             (\n#                 input_ids,\n#                 connector_token_ids[\"input_ids\"].squeeze(dim=0),\n#                 torch.tensor([connector_token_end_id], dtype=torch.long),\n#             ),\n#             dim=-1,\n#         )\n#     query_token_ids = tokenizer(\n#         query,\n#         add_special_tokens=False,\n#         return_tensors=\"pt\",\n#     )\n\n#     input_ids = torch.cat(\n#         (\n#             input_ids,\n#             query_token_ids[\"input_ids\"].squeeze(dim=0),\n#             torch.tensor([tokenizer.sep_token_id], dtype=torch.long),\n#         ),\n#         dim=-1,\n#     )\n#     attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)\n\n#     return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)\n\n\n# def warm_up_connector_classifier_model() -> None:\n#     logger.info(\n#         f\"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}\"\n#     )\n#     connector_classifier_tokenizer = get_connector_classifier_tokenizer()\n#     connector_classifier = get_local_connector_classifier()\n\n#     input_ids, attention_mask = tokenize_connector_classification_query(\n#         [\"GitHub\"],\n#         \"onyx classifier query google doc\",\n#         connector_classifier_tokenizer,\n#         connector_classifier.connector_end_token_id,\n#     )\n#     input_ids = input_ids.to(connector_classifier.device)\n#     attention_mask = attention_mask.to(connector_classifier.device)\n\n#     connector_classifier(input_ids, attention_mask)\n\n\n# def warm_up_intent_model() -> None:\n#     logger.notice(f\"Warming up Intent Model: {INTENT_MODEL_VERSION}\")\n#     intent_tokenizer = get_intent_model_tokenizer()\n#     tokens = intent_tokenizer(\n#         MODEL_WARM_UP_STRING, return_tensors=\"pt\", truncation=True, padding=True\n#     )\n\n#     intent_model = get_local_intent_model()\n#     device = intent_model.device\n#     intent_model(\n#         query_ids=tokens[\"input_ids\"].to(device),\n#         query_mask=tokens[\"attention_mask\"].to(device),\n#     )\n\n\n# def warm_up_information_content_model() -> None:\n#     logger.notice(\"Warming up Content Model\")  # TODO: add version if needed\n\n#     information_content_model = get_local_information_content_model()\n#     information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)\n\n\n# @simple_log_function_time()\n# def run_inference(tokens: \"BatchEncoding\") -> tuple[list[float], list[float]]:\n#     intent_model = get_local_intent_model()\n#     device = intent_model.device\n\n#     outputs = intent_model(\n#         query_ids=tokens[\"input_ids\"].to(device),\n#         query_mask=tokens[\"attention_mask\"].to(device),\n#     )\n\n#     token_logits = outputs[\"token_logits\"]\n#     intent_logits = outputs[\"intent_logits\"]\n\n#     # Move tensors to CPU before applying softmax and converting to numpy\n#     intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]\n#     token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]\n\n#     # Extract the probabilities for the positive class (index 1) for each token\n#     token_positive_probs = token_probabilities[:, 1].tolist()\n\n#     return intent_probabilities.tolist(), token_positive_probs\n\n\n# @simple_log_function_time()\n# def run_content_classification_inference(\n#     text_inputs: list[str],\n# ) -> list[ContentClassificationPrediction]:\n#     \"\"\"\n#     Assign a score to the segments in question. The model stored in get_local_information_content_model()\n#     creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.\n#     In the code outside of the model/inference model servers that score will be converted into the actual\n#     boost factor.\n#     \"\"\"\n\n#     def _prob_to_score(prob: float) -> float:\n#         \"\"\"\n#         Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!\n#         \"\"\"\n#         _MIN_BASE_SCORE = 0.25\n#         _MAX_BASE_SCORE = 0.75\n#         if prob < _MIN_BASE_SCORE:\n#             raw_score = 0.0\n#         elif prob < _MAX_BASE_SCORE:\n#             raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)\n#         else:\n#             raw_score = 1.0\n#         return (\n#             INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN\n#             + (\n#                 INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX\n#                 - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN\n#             )\n#             * raw_score\n#         )\n\n#     _BATCH_SIZE = 32\n#     content_model = get_local_information_content_model()\n\n#     # Process inputs in batches\n#     all_output_classes: list[int] = []\n#     all_base_output_probabilities: list[float] = []\n\n#     for i in range(0, len(text_inputs), _BATCH_SIZE):\n#         batch = text_inputs[i : i + _BATCH_SIZE]\n#         batch_with_prefix = []\n#         batch_indices = []\n\n#         # Pre-allocate results for this batch\n#         batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)\n#         batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)\n\n#         # Pre-process batch to handle long input exceptions\n#         for j, text in enumerate(batch):\n#             if len(text) == 0:\n#                 # if no input, treat as non-informative from the model's perspective\n#                 batch_output_classes[j] = np.array(0)\n#                 batch_probabilities[j] = np.array(0.0)\n#                 logger.warning(\"Input for Content Information Model is empty\")\n\n#             elif (\n#                 len(text.split())\n#                 <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH\n#             ):\n#                 # if input is short, use the model\n#                 batch_with_prefix.append(\n#                     _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text\n#                 )\n#                 batch_indices.append(j)\n#             else:\n#                 # if longer than cutoff, treat as informative (stay with default), but issue warning\n#                 logger.warning(\"Input for Content Information Model too long\")\n\n#         if batch_with_prefix:  # Only run model if we have valid inputs\n#             # Get predictions for the batch\n#             model_output_classes = content_model(batch_with_prefix)\n#             model_output_probabilities = content_model.predict_proba(batch_with_prefix)\n\n#             # Place results in the correct positions\n#             for idx, batch_idx in enumerate(batch_indices):\n#                 batch_output_classes[batch_idx] = model_output_classes[idx].numpy()\n#                 batch_probabilities[batch_idx] = model_output_probabilities[idx][\n#                     1\n#                 ].numpy()  # x[1] is prob of the positive class\n\n#         all_output_classes.extend([int(x) for x in batch_output_classes])\n#         all_base_output_probabilities.extend([float(x) for x in batch_probabilities])\n\n#     logits = [\n#         np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)\n#         for p in all_base_output_probabilities\n#     ]\n#     scaled_logits = [\n#         logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE\n#         for logit in logits\n#     ]\n#     output_probabilities_with_temp = [\n#         np.exp(scaled_logit) / (1 + np.exp(scaled_logit))\n#         for scaled_logit in scaled_logits\n#     ]\n\n#     prediction_scores = [\n#         _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp\n#     ]\n\n#     content_classification_predictions = [\n#         ContentClassificationPrediction(\n#             predicted_label=predicted_label, content_boost_factor=output_score\n#         )\n#         for predicted_label, output_score in zip(all_output_classes, prediction_scores)\n#     ]\n\n#     return content_classification_predictions\n\n\n# def map_keywords(\n#     input_ids: torch.Tensor, tokenizer: \"PreTrainedTokenizer\", is_keyword: list[bool]\n# ) -> list[str]:\n#     tokens = tokenizer.convert_ids_to_tokens(input_ids)  # type: ignore\n\n#     if not len(tokens) == len(is_keyword):\n#         raise ValueError(\"Length of tokens and keyword predictions must match\")\n\n#     if input_ids[0] == tokenizer.cls_token_id:\n#         tokens = tokens[1:]\n#         is_keyword = is_keyword[1:]\n\n#     if input_ids[-1] == tokenizer.sep_token_id:\n#         tokens = tokens[:-1]\n#         is_keyword = is_keyword[:-1]\n\n#     unk_token = tokenizer.unk_token\n#     if unk_token in tokens:\n#         raise ValueError(\"Unknown token detected in the input\")\n\n#     keywords = []\n#     current_keyword = \"\"\n\n#     for ind, token in enumerate(tokens):\n#         if is_keyword[ind]:\n#             if token.startswith(\"##\"):\n#                 current_keyword += token[2:]\n#             else:\n#                 if current_keyword:\n#                     keywords.append(current_keyword)\n#                 current_keyword = token\n#         else:\n#             # If mispredicted a later token of a keyword, add it to the current keyword\n#             # to complete it\n#             if current_keyword:\n#                 if len(current_keyword) > 2 and current_keyword.startswith(\"##\"):\n#                     current_keyword = current_keyword[2:]\n\n#                 else:\n#                     keywords.append(current_keyword)\n#                     current_keyword = \"\"\n\n#     if current_keyword:\n#         keywords.append(current_keyword)\n\n#     return keywords\n\n\n# def clean_keywords(keywords: list[str]) -> list[str]:\n#     cleaned_words = []\n#     for word in keywords:\n#         word = word[:-2] if word.endswith(\"'s\") else word\n#         word = word.replace(\"/\", \" \")\n#         word = word.replace(\"'\", \"\").replace('\"', \"\")\n#         cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])\n#     return cleaned_words\n\n\n# def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:\n#     tokenizer = get_connector_classifier_tokenizer()\n#     model = get_local_connector_classifier()\n\n#     connector_names = req.available_connectors\n\n#     input_ids, attention_mask = tokenize_connector_classification_query(\n#         connector_names,\n#         req.query,\n#         tokenizer,\n#         model.connector_end_token_id,\n#     )\n#     input_ids = input_ids.to(model.device)\n#     attention_mask = attention_mask.to(model.device)\n\n#     global_confidence, classifier_confidence = model(input_ids, attention_mask)\n\n#     if global_confidence.item() < 0.5:\n#         return []\n\n#     passed_connectors = []\n\n#     for i, connector_name in enumerate(connector_names):\n#         if classifier_confidence.view(-1)[i].item() > 0.5:\n#             passed_connectors.append(connector_name)\n\n#     return passed_connectors\n\n\n# def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:\n#     tokenizer = get_intent_model_tokenizer()\n#     model_input = tokenizer(\n#         intent_req.query, return_tensors=\"pt\", truncation=False, padding=False\n#     )\n\n#     if len(model_input.input_ids[0]) > 512:\n#         # If the user text is too long, assume it is semantic and keep all words\n#         return True, intent_req.query.split()\n\n#     intent_probs, token_probs = run_inference(model_input)\n\n#     is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold\n\n#     keyword_preds = [\n#         token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs\n#     ]\n\n#     try:\n#         keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)\n#     except Exception as e:\n#         logger.warning(\n#             f\"Failed to extract keywords for query: {intent_req.query} due to {e}\"\n#         )\n#         # Fallback to keeping all words\n#         keywords = intent_req.query.split()\n\n#     cleaned_keywords = clean_keywords(keywords)\n\n#     return is_keyword_sequence, cleaned_keywords\n\n\n# @router.post(\"/connector-classification\")\n# async def process_connector_classification_request(\n#     classification_request: ConnectorClassificationRequest,\n# ) -> ConnectorClassificationResponse:\n#     if INDEXING_ONLY:\n#         raise RuntimeError(\n#             \"Indexing model server should not call connector classification endpoint\"\n#         )\n\n#     if len(classification_request.available_connectors) == 0:\n#         return ConnectorClassificationResponse(connectors=[])\n\n#     connectors = run_connector_classification(classification_request)\n#     return ConnectorClassificationResponse(connectors=connectors)\n\n\n# @router.post(\"/query-analysis\")\n# async def process_analysis_request(\n#     intent_request: IntentRequest,\n# ) -> IntentResponse:\n#     if INDEXING_ONLY:\n#         raise RuntimeError(\"Indexing model server should not call intent endpoint\")\n\n#     is_keyword, keywords = run_analysis(intent_request)\n#     return IntentResponse(is_keyword=is_keyword, keywords=keywords)\n\n\n# @router.post(\"/content-classification\")\n# async def process_content_classification_request(\n#     content_classification_requests: list[str],\n# ) -> list[ContentClassificationPrediction]:\n#     return run_content_classification_inference(content_classification_requests)\n"
  },
  {
    "path": "backend/model_server/legacy/onyx_torch_model.py",
    "content": "# import json\n# import os\n# from typing import cast\n# from typing import TYPE_CHECKING\n\n# import torch\n# import torch.nn as nn\n\n\n# if TYPE_CHECKING:\n#     from transformers import DistilBertConfig\n\n\n# class HybridClassifier(nn.Module):\n#     def __init__(self) -> None:\n#         from transformers import DistilBertConfig, DistilBertModel\n\n#         super().__init__()\n#         config = DistilBertConfig()\n#         self.distilbert = DistilBertModel(config)\n#         config = self.distilbert.config  # type: ignore\n\n#         # Keyword tokenwise binary classification layer\n#         self.keyword_classifier = nn.Linear(config.dim, 2)\n\n#         # Intent Classifier layers\n#         self.pre_classifier = nn.Linear(config.dim, config.dim)\n#         self.intent_classifier = nn.Linear(config.dim, 2)\n\n#         self.device = torch.device(\"cpu\")\n\n#     def forward(\n#         self,\n#         query_ids: torch.Tensor,\n#         query_mask: torch.Tensor,\n#     ) -> dict[str, torch.Tensor]:\n#         outputs = self.distilbert(input_ids=query_ids, attention_mask=query_mask)\n#         sequence_output = outputs.last_hidden_state\n\n#         # Intent classification on the CLS token\n#         cls_token_state = sequence_output[:, 0, :]\n#         pre_classifier_out = self.pre_classifier(cls_token_state)\n#         intent_logits = self.intent_classifier(pre_classifier_out)\n\n#         # Keyword classification on all tokens\n#         token_logits = self.keyword_classifier(sequence_output)\n\n#         return {\"intent_logits\": intent_logits, \"token_logits\": token_logits}\n\n#     @classmethod\n#     def from_pretrained(cls, load_directory: str) -> \"HybridClassifier\":\n#         model_path = os.path.join(load_directory, \"pytorch_model.bin\")\n#         config_path = os.path.join(load_directory, \"config.json\")\n\n#         with open(config_path, \"r\") as f:\n#             config = json.load(f)\n#         model = cls(**config)\n\n#         if torch.backends.mps.is_available():\n#             # Apple silicon GPU\n#             device = torch.device(\"mps\")\n#         elif torch.cuda.is_available():\n#             device = torch.device(\"cuda\")\n#         else:\n#             device = torch.device(\"cpu\")\n\n#         model.load_state_dict(torch.load(model_path, map_location=device))\n#         model = model.to(device)\n\n#         model.device = device\n\n#         model.eval()\n#         # Eval doesn't set requires_grad to False, do it manually to save memory and have faster inference\n#         for param in model.parameters():\n#             param.requires_grad = False\n\n#         return model\n\n\n# class ConnectorClassifier(nn.Module):\n#     def __init__(self, config: \"DistilBertConfig\") -> None:\n#         from transformers import DistilBertTokenizer, DistilBertModel\n\n#         super().__init__()\n\n#         self.config = config\n#         self.distilbert = DistilBertModel(config)\n#         config = self.distilbert.config  # type: ignore\n#         self.connector_global_classifier = nn.Linear(config.dim, 1)\n#         self.connector_match_classifier = nn.Linear(config.dim, 1)\n#         self.tokenizer = DistilBertTokenizer.from_pretrained(\"distilbert-base-uncased\")\n\n#         # Token indicating end of connector name, and on which classifier is used\n#         self.connector_end_token_id = self.tokenizer.get_vocab()[\n#             self.config.connector_end_token\n#         ]\n\n#         self.device = torch.device(\"cpu\")\n\n#     def forward(\n#         self,\n#         input_ids: torch.Tensor,\n#         attention_mask: torch.Tensor,\n#     ) -> tuple[torch.Tensor, torch.Tensor]:\n#         hidden_states = self.distilbert(\n#             input_ids=input_ids, attention_mask=attention_mask\n#         ).last_hidden_state\n\n#         cls_hidden_states = hidden_states[\n#             :, 0, :\n#         ]  # Take leap of faith that first token is always [CLS]\n#         global_logits = self.connector_global_classifier(cls_hidden_states).view(-1)\n#         global_confidence = torch.sigmoid(global_logits).view(-1)\n\n#         connector_end_position_ids = input_ids == self.connector_end_token_id\n#         connector_end_hidden_states = hidden_states[connector_end_position_ids]\n#         classifier_output = self.connector_match_classifier(connector_end_hidden_states)\n#         classifier_confidence = torch.nn.functional.sigmoid(classifier_output).view(-1)\n\n#         return global_confidence, classifier_confidence\n\n#     @classmethod\n#     def from_pretrained(cls, repo_dir: str) -> \"ConnectorClassifier\":\n#         from transformers import DistilBertConfig\n\n#         config = cast(\n#             DistilBertConfig,\n#             DistilBertConfig.from_pretrained(os.path.join(repo_dir, \"config.json\")),\n#         )\n#         device = (\n#             torch.device(\"cuda\")\n#             if torch.cuda.is_available()\n#             else (\n#                 torch.device(\"mps\")\n#                 if torch.backends.mps.is_available()\n#                 else torch.device(\"cpu\")\n#             )\n#         )\n#         state_dict = torch.load(\n#             os.path.join(repo_dir, \"pytorch_model.pt\"),\n#             map_location=device,\n#             weights_only=True,\n#         )\n\n#         model = cls(config)\n#         model.load_state_dict(state_dict)\n#         model.to(device)\n#         model.device = device\n#         model.eval()\n\n#         for param in model.parameters():\n#             param.requires_grad = False\n\n#         return model\n"
  },
  {
    "path": "backend/model_server/legacy/reranker.py",
    "content": "# import asyncio\n# from typing import Optional\n# from typing import TYPE_CHECKING\n\n# from fastapi import APIRouter\n# from fastapi import HTTPException\n\n# from model_server.utils import simple_log_function_time\n# from onyx.utils.logger import setup_logger\n# from shared_configs.configs import INDEXING_ONLY\n# from shared_configs.model_server_models import RerankRequest\n# from shared_configs.model_server_models import RerankResponse\n\n# if TYPE_CHECKING:\n#     from sentence_transformers import CrossEncoder\n\n# logger = setup_logger()\n\n# router = APIRouter(prefix=\"/encoder\")\n\n# _RERANK_MODEL: Optional[\"CrossEncoder\"] = None\n\n\n# def get_local_reranking_model(\n#     model_name: str,\n# ) -> \"CrossEncoder\":\n#     global _RERANK_MODEL\n#     from sentence_transformers import CrossEncoder\n\n#     if _RERANK_MODEL is None:\n#         logger.notice(f\"Loading {model_name}\")\n#         model = CrossEncoder(model_name)\n#         _RERANK_MODEL = model\n#     return _RERANK_MODEL\n\n\n# @simple_log_function_time()\n# async def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]:\n#     cross_encoder = get_local_reranking_model(model_name)\n#     # Run CPU-bound reranking in a thread pool\n#     return await asyncio.get_event_loop().run_in_executor(\n#         None,\n#         lambda: cross_encoder.predict([(query, doc) for doc in docs]).tolist(),\n#     )\n\n\n# @router.post(\"/cross-encoder-scores\")\n# async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse:\n#     \"\"\"Cross encoders can be purely black box from the app perspective\"\"\"\n#     # Only local models should use this endpoint - API providers should make direct API calls\n#     if rerank_request.provider_type is not None:\n#         raise ValueError(\n#             f\"Model server reranking endpoint should only be used for local models. \"\n#             f\"API provider '{rerank_request.provider_type}' should make direct API calls instead.\"\n#         )\n\n#     if INDEXING_ONLY:\n#         raise RuntimeError(\"Indexing model server should not call reranking endpoint\")\n\n#     if not rerank_request.documents or not rerank_request.query:\n#         raise HTTPException(\n#             status_code=400, detail=\"Missing documents or query for reranking\"\n#         )\n#     if not all(rerank_request.documents):\n#         raise ValueError(\"Empty documents cannot be reranked.\")\n\n#     try:\n#         # At this point, provider_type is None, so handle local reranking\n#         sim_scores = await local_rerank(\n#             query=rerank_request.query,\n#             docs=rerank_request.documents,\n#             model_name=rerank_request.model_name,\n#         )\n#         return RerankResponse(scores=sim_scores)\n\n#     except Exception as e:\n#         logger.exception(f\"Error during reranking process:\\n{str(e)}\")\n#         raise HTTPException(\n#             status_code=500, detail=\"Failed to run Cross-Encoder reranking\"\n#         )\n"
  },
  {
    "path": "backend/model_server/main.py",
    "content": "import logging\nimport os\nimport shutil\nfrom collections.abc import AsyncGenerator\nfrom contextlib import asynccontextmanager\nfrom pathlib import Path\n\nimport sentry_sdk\nimport torch\nimport uvicorn\nfrom fastapi import FastAPI\nfrom prometheus_fastapi_instrumentator import Instrumentator\nfrom sentry_sdk.integrations.fastapi import FastApiIntegration\nfrom sentry_sdk.integrations.starlette import StarletteIntegration\nfrom transformers import logging as transformer_logging\n\nfrom model_server.encoders import router as encoders_router\nfrom model_server.management_endpoints import router as management_router\nfrom model_server.utils import get_gpu_type\nfrom onyx import __version__\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.logger import setup_uvicorn_logger\nfrom onyx.utils.middleware import add_onyx_request_id_middleware\nfrom onyx.utils.middleware import add_onyx_tenant_id_middleware\nfrom shared_configs.configs import INDEXING_ONLY\nfrom shared_configs.configs import MIN_THREADS_ML_MODELS\nfrom shared_configs.configs import MODEL_SERVER_ALLOWED_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.configs import SENTRY_DSN\n\nos.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\nos.environ[\"HF_HUB_DISABLE_TELEMETRY\"] = \"1\"\n\nHF_CACHE_PATH = Path(\".cache/huggingface\")\nTEMP_HF_CACHE_PATH = Path(\".cache/temp_huggingface\")\n\ntransformer_logging.set_verbosity_error()\n\nlogger = setup_logger()\n\nfile_handlers = [\n    h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)\n]\n\nsetup_uvicorn_logger(shared_file_handlers=file_handlers)\n\n\ndef _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -> None:\n    \"\"\"\n    This moves the files from the temp huggingface cache to the huggingface cache\n\n    We have to move each file individually because the directories might\n    have the same name but not the same contents and we dont want to remove\n    the files in the existing huggingface cache that don't exist in the temp\n    huggingface cache.\n    \"\"\"\n\n    for item in source.iterdir():\n        target_path = dest / item.relative_to(source)\n        if item.is_dir():\n            _move_files_recursively(item, target_path, overwrite)\n        else:\n            target_path.parent.mkdir(parents=True, exist_ok=True)\n            if target_path.exists() and not overwrite:\n                continue\n            shutil.move(str(item), str(target_path))\n\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI) -> AsyncGenerator:\n    gpu_type = get_gpu_type()\n    logger.notice(f\"Torch GPU Detection: gpu_type={gpu_type}\")\n\n    app.state.gpu_type = gpu_type\n\n    try:\n        if TEMP_HF_CACHE_PATH.is_dir():\n            logger.notice(\"Moving contents of temp_huggingface to huggingface cache.\")\n            _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH)\n            shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True)\n            logger.notice(\"Moved contents of temp_huggingface to huggingface cache.\")\n    except Exception as e:\n        logger.warning(\n            f\"Error moving contents of temp_huggingface to huggingface cache: {e}. \"\n            \"This is not a critical error and the model server will continue to run.\"\n        )\n\n    torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))\n    logger.notice(f\"Torch Threads: {torch.get_num_threads()}\")\n\n    yield\n\n\ndef get_model_app() -> FastAPI:\n    application = FastAPI(\n        title=\"Onyx Model Server\", version=__version__, lifespan=lifespan\n    )\n    if SENTRY_DSN:\n        sentry_sdk.init(\n            dsn=SENTRY_DSN,\n            integrations=[StarletteIntegration(), FastApiIntegration()],\n            traces_sample_rate=0.1,\n            release=__version__,\n        )\n        logger.info(\"Sentry initialized\")\n    else:\n        logger.debug(\"Sentry DSN not provided, skipping Sentry initialization\")\n\n    application.include_router(management_router)\n    application.include_router(encoders_router)\n\n    request_id_prefix = \"INF\"\n    if INDEXING_ONLY:\n        request_id_prefix = \"IDX\"\n\n    add_onyx_tenant_id_middleware(application, logger)\n    add_onyx_request_id_middleware(application, request_id_prefix, logger)\n\n    # Initialize and instrument the app\n    Instrumentator().instrument(application).expose(application)\n\n    return application\n\n\napp = get_model_app()\n\n\nif __name__ == \"__main__\":\n    logger.notice(\n        f\"Starting Onyx Model Server on http://{MODEL_SERVER_ALLOWED_HOST}:{str(MODEL_SERVER_PORT)}/\"\n    )\n    logger.notice(f\"Model Server Version: {__version__}\")\n    uvicorn.run(app, host=MODEL_SERVER_ALLOWED_HOST, port=MODEL_SERVER_PORT)\n"
  },
  {
    "path": "backend/model_server/management_endpoints.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Response\n\nfrom model_server.constants import GPUStatus\nfrom model_server.utils import get_gpu_type\n\nrouter = APIRouter(prefix=\"/api\")\n\n\n@router.get(\"/health\")\nasync def healthcheck() -> Response:\n    return Response(status_code=200)\n\n\n@router.get(\"/gpu-status\")\nasync def route_gpu_status() -> dict[str, bool | str]:\n    gpu_type = get_gpu_type()\n    gpu_available = gpu_type != GPUStatus.NONE\n    return {\"gpu_available\": gpu_available, \"type\": gpu_type}\n"
  },
  {
    "path": "backend/model_server/utils.py",
    "content": "import asyncio\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom functools import wraps\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nimport torch\n\nfrom model_server.constants import GPUStatus\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nF = TypeVar(\"F\", bound=Callable)\nFG = TypeVar(\"FG\", bound=Callable[..., Generator | Iterator])\n\n\ndef simple_log_function_time(\n    func_name: str | None = None,\n    debug_only: bool = False,\n    include_args: bool = False,\n) -> Callable[[F], F]:\n    def decorator(func: F) -> F:\n        if asyncio.iscoroutinefunction(func):\n\n            @wraps(func)\n            async def wrapped_async_func(*args: Any, **kwargs: Any) -> Any:\n                start_time = time.time()\n                result = await func(*args, **kwargs)\n                elapsed_time_str = str(time.time() - start_time)\n                log_name = func_name or func.__name__\n                args_str = f\" args={args} kwargs={kwargs}\" if include_args else \"\"\n                final_log = f\"{log_name}{args_str} took {elapsed_time_str} seconds\"\n                if debug_only:\n                    logger.debug(final_log)\n                else:\n                    logger.notice(final_log)\n                return result\n\n            return cast(F, wrapped_async_func)\n        else:\n\n            @wraps(func)\n            def wrapped_sync_func(*args: Any, **kwargs: Any) -> Any:\n                start_time = time.time()\n                result = func(*args, **kwargs)\n                elapsed_time_str = str(time.time() - start_time)\n                log_name = func_name or func.__name__\n                args_str = f\" args={args} kwargs={kwargs}\" if include_args else \"\"\n                final_log = f\"{log_name}{args_str} took {elapsed_time_str} seconds\"\n                if debug_only:\n                    logger.debug(final_log)\n                else:\n                    logger.notice(final_log)\n                return result\n\n            return cast(F, wrapped_sync_func)\n\n    return decorator\n\n\ndef get_gpu_type() -> str:\n    if torch.cuda.is_available():\n        return GPUStatus.CUDA\n    if torch.backends.mps.is_available():\n        return GPUStatus.MAC_MPS\n\n    return GPUStatus.NONE\n"
  },
  {
    "path": "backend/onyx/__init__.py",
    "content": "import os\n\n__version__ = os.environ.get(\"ONYX_VERSION\", \"\") or \"Development\"\n"
  },
  {
    "path": "backend/onyx/access/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/access/access.py",
    "content": "from collections.abc import Callable\nfrom typing import cast\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.access.utils import prefix_user_email\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.db.document import get_access_info_for_document\nfrom onyx.db.document import get_access_info_for_documents\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.user_file import fetch_user_files_with_access_relationships\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\n\ndef _get_access_for_document(\n    document_id: str,\n    db_session: Session,\n) -> DocumentAccess:\n    info = get_access_info_for_document(\n        db_session=db_session,\n        document_id=document_id,\n    )\n\n    doc_access = DocumentAccess.build(\n        user_emails=info[1] if info and info[1] else [],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=info[2] if info else False,\n    )\n\n    return doc_access\n\n\ndef get_access_for_document(\n    document_id: str,\n    db_session: Session,\n) -> DocumentAccess:\n    versioned_get_access_for_document_fn = fetch_versioned_implementation(\n        \"onyx.access.access\", \"_get_access_for_document\"\n    )\n    return versioned_get_access_for_document_fn(document_id, db_session)\n\n\ndef get_null_document_access() -> DocumentAccess:\n    return DocumentAccess.build(\n        user_emails=[],\n        user_groups=[],\n        is_public=False,\n        external_user_emails=[],\n        external_user_group_ids=[],\n    )\n\n\ndef _get_access_for_documents(\n    document_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    document_access_info = get_access_info_for_documents(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n    doc_access = {}\n    for document_id, user_emails, is_public in document_access_info:\n        doc_access[document_id] = DocumentAccess.build(\n            user_emails=[email for email in user_emails if email],\n            # MIT version will wipe all groups and external groups on update\n            user_groups=[],\n            is_public=is_public,\n            external_user_emails=[],\n            external_user_group_ids=[],\n        )\n\n    # Sometimes the document has not been indexed by the indexing job yet, in those cases\n    # the document does not exist and so we use least permissive. Specifically the EE version\n    # checks the MIT version permissions and creates a superset. This ensures that this flow\n    # does not fail even if the Document has not yet been indexed.\n    for doc_id in document_ids:\n        if doc_id not in doc_access:\n            doc_access[doc_id] = get_null_document_access()\n    return doc_access\n\n\ndef get_access_for_documents(\n    document_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    \"\"\"Fetches all access information for the given documents.\"\"\"\n    versioned_get_access_for_documents_fn = fetch_versioned_implementation(\n        \"onyx.access.access\", \"_get_access_for_documents\"\n    )\n    return versioned_get_access_for_documents_fn(document_ids, db_session)\n\n\ndef _get_acl_for_user(\n    user: User, db_session: Session  # noqa: ARG001\n) -> set[str]:  # noqa: ARG001\n    \"\"\"Returns a list of ACL entries that the user has access to. This is meant to be\n    used downstream to filter out documents that the user does not have access to. The\n    user should have access to a document if at least one entry in the document's ACL\n    matches one entry in the returned set.\n\n    Anonymous users only have access to public documents.\n    \"\"\"\n    if user.is_anonymous:\n        return {PUBLIC_DOC_PAT}\n    return {prefix_user_email(user.email), PUBLIC_DOC_PAT}\n\n\ndef get_acl_for_user(user: User, db_session: Session | None = None) -> set[str]:\n    versioned_acl_for_user_fn = fetch_versioned_implementation(\n        \"onyx.access.access\", \"_get_acl_for_user\"\n    )\n    return versioned_acl_for_user_fn(user, db_session)\n\n\ndef source_should_fetch_permissions_during_indexing(source: DocumentSource) -> bool:\n    _source_should_fetch_permissions_during_indexing_func = cast(\n        Callable[[DocumentSource], bool],\n        fetch_ee_implementation_or_noop(\n            \"onyx.external_permissions.sync_params\",\n            \"source_should_fetch_permissions_during_indexing\",\n            False,\n        ),\n    )\n    return _source_should_fetch_permissions_during_indexing_func(source)\n\n\ndef get_access_for_user_files(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    versioned_fn = fetch_versioned_implementation(\n        \"onyx.access.access\", \"get_access_for_user_files_impl\"\n    )\n    return versioned_fn(user_file_ids, db_session)\n\n\ndef get_access_for_user_files_impl(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> dict[str, DocumentAccess]:\n    user_files = fetch_user_files_with_access_relationships(user_file_ids, db_session)\n    return build_access_for_user_files_impl(user_files)\n\n\ndef build_access_for_user_files(\n    user_files: list[UserFile],\n) -> dict[str, DocumentAccess]:\n    \"\"\"Compute access from pre-loaded UserFile objects (with relationships).\n    Callers must ensure UserFile.user, Persona.users, and Persona.user are\n    eagerly loaded (and Persona.groups for the EE path).\"\"\"\n    versioned_fn = fetch_versioned_implementation(\n        \"onyx.access.access\", \"build_access_for_user_files_impl\"\n    )\n    return versioned_fn(user_files)\n\n\ndef build_access_for_user_files_impl(\n    user_files: list[UserFile],\n) -> dict[str, DocumentAccess]:\n    result: dict[str, DocumentAccess] = {}\n    for user_file in user_files:\n        emails, is_public = collect_user_file_access(user_file)\n        result[str(user_file.id)] = DocumentAccess.build(\n            user_emails=list(emails),\n            user_groups=[],\n            is_public=is_public,\n            external_user_emails=[],\n            external_user_group_ids=[],\n        )\n    return result\n\n\ndef collect_user_file_access(user_file: UserFile) -> tuple[set[str], bool]:\n    \"\"\"Collect all user emails that should have access to this user file.\n    Includes the owner plus any users who have access via shared personas.\n    Returns (emails, is_public).\"\"\"\n    emails: set[str] = {user_file.user.email}\n    is_public = False\n    for persona in user_file.assistants:\n        if persona.deleted:\n            continue\n        if persona.is_public:\n            is_public = True\n        if persona.user_id is not None and persona.user:\n            emails.add(persona.user.email)\n        for shared_user in persona.users:\n            emails.add(shared_user.email)\n    return emails, is_public\n"
  },
  {
    "path": "backend/onyx/access/hierarchy_access.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.db.models import User\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\n\ndef _get_user_external_group_ids(\n    db_session: Session,  # noqa: ARG001\n    user: User,  # noqa: ARG001\n) -> list[str]:\n    return []\n\n\ndef get_user_external_group_ids(db_session: Session, user: User) -> list[str]:\n    versioned_get_user_external_group_ids = fetch_versioned_implementation(\n        \"onyx.access.hierarchy_access\", \"_get_user_external_group_ids\"\n    )\n    return versioned_get_user_external_group_ids(db_session, user)\n"
  },
  {
    "path": "backend/onyx/access/models.py",
    "content": "from dataclasses import dataclass\n\nfrom onyx.access.utils import prefix_external_group\nfrom onyx.access.utils import prefix_user_email\nfrom onyx.access.utils import prefix_user_group\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\n\n\n@dataclass(frozen=True)\nclass ExternalAccess:\n    # arbitrary limit to prevent excessively large permissions sets\n    # not internally enforced ... the caller can check this before using the instance\n    MAX_NUM_ENTRIES = 5000\n\n    # Emails of external users with access to the doc externally\n    external_user_emails: set[str]\n    # Names or external IDs of groups with access to the doc\n    external_user_group_ids: set[str]\n    # Whether the document is public in the external system or Onyx\n    is_public: bool\n\n    def __str__(self) -> str:\n        \"\"\"Prevent extremely long logs\"\"\"\n\n        def truncate_set(s: set[str], max_len: int = 100) -> str:\n            s_str = str(s)\n            if len(s_str) > max_len:\n                return f\"{s_str[:max_len]}... ({len(s)} items)\"\n            return s_str\n\n        return (\n            f\"ExternalAccess(\"\n            f\"external_user_emails={truncate_set(self.external_user_emails)}, \"\n            f\"external_user_group_ids={truncate_set(self.external_user_group_ids)}, \"\n            f\"is_public={self.is_public})\"\n        )\n\n    @property\n    def num_entries(self) -> int:\n        return len(self.external_user_emails) + len(self.external_user_group_ids)\n\n    @classmethod\n    def public(cls) -> \"ExternalAccess\":\n        return cls(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        )\n\n    @classmethod\n    def empty(cls) -> \"ExternalAccess\":\n        \"\"\"\n        A helper function that returns an *empty* set of external user-emails and group-ids, and sets `is_public` to `False`.\n        This effectively makes the document in question \"private\" or inaccessible to anyone else.\n\n        This is especially helpful to use when you are performing permission-syncing, and some document's permissions aren't able\n        to be determined (for whatever reason). Setting its `ExternalAccess` to \"private\" is a feasible fallback.\n        \"\"\"\n\n        return cls(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=False,\n        )\n\n\n@dataclass(frozen=True)\nclass DocExternalAccess:\n    \"\"\"\n    This is just a class to wrap the external access and the document ID\n    together. It's used for syncing document permissions to Vespa.\n    \"\"\"\n\n    external_access: ExternalAccess\n    # The document ID\n    doc_id: str\n\n    def to_dict(self) -> dict:\n        return {\n            \"external_access\": {\n                \"external_user_emails\": list(self.external_access.external_user_emails),\n                \"external_user_group_ids\": list(\n                    self.external_access.external_user_group_ids\n                ),\n                \"is_public\": self.external_access.is_public,\n            },\n            \"doc_id\": self.doc_id,\n        }\n\n    @classmethod\n    def from_dict(cls, data: dict) -> \"DocExternalAccess\":\n        external_access = ExternalAccess(\n            external_user_emails=set(\n                data[\"external_access\"].get(\"external_user_emails\", [])\n            ),\n            external_user_group_ids=set(\n                data[\"external_access\"].get(\"external_user_group_ids\", [])\n            ),\n            is_public=data[\"external_access\"][\"is_public\"],\n        )\n        return cls(\n            external_access=external_access,\n            doc_id=data[\"doc_id\"],\n        )\n\n\n@dataclass(frozen=True)\nclass NodeExternalAccess:\n    \"\"\"\n    Wraps external access with a hierarchy node's raw ID.\n    Used for syncing hierarchy node permissions (e.g., folder permissions).\n    \"\"\"\n\n    external_access: ExternalAccess\n    # The raw node ID from the source system (e.g., Google Drive folder ID)\n    raw_node_id: str\n    # The source type (e.g., \"google_drive\")\n    source: str\n\n    def to_dict(self) -> dict:\n        return {\n            \"external_access\": {\n                \"external_user_emails\": list(self.external_access.external_user_emails),\n                \"external_user_group_ids\": list(\n                    self.external_access.external_user_group_ids\n                ),\n                \"is_public\": self.external_access.is_public,\n            },\n            \"raw_node_id\": self.raw_node_id,\n            \"source\": self.source,\n        }\n\n    @classmethod\n    def from_dict(cls, data: dict) -> \"NodeExternalAccess\":\n        external_access = ExternalAccess(\n            external_user_emails=set(\n                data[\"external_access\"].get(\"external_user_emails\", [])\n            ),\n            external_user_group_ids=set(\n                data[\"external_access\"].get(\"external_user_group_ids\", [])\n            ),\n            is_public=data[\"external_access\"][\"is_public\"],\n        )\n        return cls(\n            external_access=external_access,\n            raw_node_id=data[\"raw_node_id\"],\n            source=data[\"source\"],\n        )\n\n\n# Union type for elements that can have permissions synced\nElementExternalAccess = DocExternalAccess | NodeExternalAccess\n\n\n# TODO(andrei): First refactor this into a pydantic model, then get rid of\n# duplicate fields.\n@dataclass(frozen=True, init=False)\nclass DocumentAccess(ExternalAccess):\n    # User emails for Onyx users, None indicates admin\n    user_emails: set[str | None]\n\n    # Names of user groups associated with this document\n    user_groups: set[str]\n\n    external_user_emails: set[str]\n    external_user_group_ids: set[str]\n    is_public: bool\n\n    def __init__(self) -> None:\n        raise TypeError(\n            \"Use `DocumentAccess.build(...)` instead of creating an instance directly.\"\n        )\n\n    def to_acl(self) -> set[str]:\n        \"\"\"Converts the access state to a set of formatted ACL strings.\n\n        NOTE: When querying for documents, the supplied ACL filter strings must\n        be formatted in the same way as this function.\n        \"\"\"\n        acl_set: set[str] = set()\n        for user_email in self.user_emails:\n            if user_email:\n                acl_set.add(prefix_user_email(user_email))\n\n        for group_name in self.user_groups:\n            acl_set.add(prefix_user_group(group_name))\n\n        for external_user_email in self.external_user_emails:\n            acl_set.add(prefix_user_email(external_user_email))\n\n        for external_group_id in self.external_user_group_ids:\n            acl_set.add(prefix_external_group(external_group_id))\n\n        if self.is_public:\n            acl_set.add(PUBLIC_DOC_PAT)\n\n        return acl_set\n\n    @classmethod\n    def build(\n        cls,\n        user_emails: list[str | None],\n        user_groups: list[str],\n        external_user_emails: list[str],\n        external_user_group_ids: list[str],\n        is_public: bool,\n    ) -> \"DocumentAccess\":\n        \"\"\"Don't prefix incoming data wth acl type, prefix on read from to_acl!\"\"\"\n\n        obj = object.__new__(cls)\n        object.__setattr__(\n            obj, \"user_emails\", {user_email for user_email in user_emails if user_email}\n        )\n        object.__setattr__(obj, \"user_groups\", set(user_groups))\n        object.__setattr__(\n            obj,\n            \"external_user_emails\",\n            {external_email for external_email in external_user_emails},\n        )\n        object.__setattr__(\n            obj,\n            \"external_user_group_ids\",\n            {external_group_id for external_group_id in external_user_group_ids},\n        )\n        object.__setattr__(obj, \"is_public\", is_public)\n\n        return obj\n\n\ndefault_public_access = DocumentAccess.build(\n    external_user_emails=[],\n    external_user_group_ids=[],\n    user_emails=[],\n    user_groups=[],\n    is_public=True,\n)\n"
  },
  {
    "path": "backend/onyx/access/utils.py",
    "content": "from onyx.configs.constants import DocumentSource\n\n\ndef prefix_user_email(user_email: str) -> str:\n    \"\"\"Prefixes a user email to eliminate collision with group names.\n    This applies to both a Onyx user and an External user, this is to make the query time\n    more efficient\"\"\"\n    return f\"user_email:{user_email}\"\n\n\ndef prefix_user_group(user_group_name: str) -> str:\n    \"\"\"Prefixes a user group name to eliminate collision with user emails.\n    This assumes that user ids are prefixed with a different prefix.\"\"\"\n    return f\"group:{user_group_name}\"\n\n\ndef prefix_external_group(ext_group_name: str) -> str:\n    \"\"\"Prefixes an external group name to eliminate collision with user emails / Onyx groups.\"\"\"\n    return f\"external_group:{ext_group_name}\"\n\n\ndef build_ext_group_name_for_onyx(ext_group_name: str, source: DocumentSource) -> str:\n    \"\"\"\n    External groups may collide across sources, every source needs its own prefix.\n    NOTE: the name is lowercased to handle case sensitivity for group names\n    \"\"\"\n    return f\"{source.value}_{ext_group_name}\".lower()\n"
  },
  {
    "path": "backend/onyx/auth/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/auth/anonymous_user.py",
    "content": "from collections.abc import Mapping\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.configs.constants import ANONYMOUS_USER_INFO_ID\nfrom onyx.configs.constants import KV_ANONYMOUS_USER_PERSONALIZATION_KEY\nfrom onyx.configs.constants import KV_ANONYMOUS_USER_PREFERENCES_KEY\nfrom onyx.key_value_store.store import KeyValueStore\nfrom onyx.key_value_store.store import KvKeyNotFoundError\nfrom onyx.server.manage.models import UserInfo\nfrom onyx.server.manage.models import UserPersonalization\nfrom onyx.server.manage.models import UserPreferences\n\n\ndef set_anonymous_user_preferences(\n    store: KeyValueStore, preferences: UserPreferences\n) -> None:\n    store.store(KV_ANONYMOUS_USER_PREFERENCES_KEY, preferences.model_dump())\n\n\ndef set_anonymous_user_personalization(\n    store: KeyValueStore, personalization: UserPersonalization\n) -> None:\n    store.store(KV_ANONYMOUS_USER_PERSONALIZATION_KEY, personalization.model_dump())\n\n\ndef load_anonymous_user_preferences(store: KeyValueStore) -> UserPreferences:\n    try:\n        preferences_data = cast(\n            Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PREFERENCES_KEY)\n        )\n        return UserPreferences(**preferences_data)\n    except KvKeyNotFoundError:\n        return UserPreferences(\n            chosen_assistants=None, default_model=None, auto_scroll=True\n        )\n\n\ndef fetch_anonymous_user_info(store: KeyValueStore) -> UserInfo:\n    \"\"\"Fetch a UserInfo object for anonymous users (used for API responses).\"\"\"\n    personalization = UserPersonalization()\n    try:\n        personalization_data = cast(\n            Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PERSONALIZATION_KEY)\n        )\n        personalization = UserPersonalization(**personalization_data)\n    except KvKeyNotFoundError:\n        pass\n\n    return UserInfo(\n        id=ANONYMOUS_USER_INFO_ID,\n        email=ANONYMOUS_USER_EMAIL,\n        is_active=True,\n        is_superuser=False,\n        is_verified=True,\n        role=UserRole.LIMITED,\n        preferences=load_anonymous_user_preferences(store),\n        personalization=personalization,\n        is_anonymous_user=True,\n        password_configured=False,\n    )\n"
  },
  {
    "path": "backend/onyx/auth/api_key.py",
    "content": "import hashlib\nimport secrets\nimport uuid\nfrom urllib.parse import quote\n\nfrom fastapi import Request\nfrom passlib.hash import sha256_crypt\nfrom pydantic import BaseModel\n\nfrom onyx.auth.constants import API_KEY_LENGTH\nfrom onyx.auth.constants import API_KEY_PREFIX\nfrom onyx.auth.constants import DEPRECATED_API_KEY_PREFIX\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.utils import get_hashed_bearer_token_from_request\nfrom onyx.configs.app_configs import API_KEY_HASH_ROUNDS\nfrom shared_configs.configs import MULTI_TENANT\n\n\nclass ApiKeyDescriptor(BaseModel):\n    api_key_id: int\n    api_key_display: str\n    api_key: str | None = None  # only present on initial creation\n    api_key_name: str | None = None\n    api_key_role: UserRole\n\n    user_id: uuid.UUID\n\n\ndef generate_api_key(tenant_id: str | None = None) -> str:\n    if not MULTI_TENANT or not tenant_id:\n        return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_LENGTH)\n\n    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID\n    return f\"{API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(API_KEY_LENGTH)}\"\n\n\ndef _deprecated_hash_api_key(api_key: str) -> str:\n    return sha256_crypt.hash(api_key, salt=\"\", rounds=API_KEY_HASH_ROUNDS)\n\n\ndef hash_api_key(api_key: str) -> str:\n    # NOTE: no salt is needed, as the API key is randomly generated\n    # and overlaps are impossible\n    if api_key.startswith(API_KEY_PREFIX):\n        return hashlib.sha256(api_key.encode(\"utf-8\")).hexdigest()\n\n    if api_key.startswith(DEPRECATED_API_KEY_PREFIX):\n        return _deprecated_hash_api_key(api_key)\n\n    raise ValueError(f\"Invalid API key prefix: {api_key[:3]}\")\n\n\ndef build_displayable_api_key(api_key: str) -> str:\n    if api_key.startswith(API_KEY_PREFIX):\n        api_key = api_key[len(API_KEY_PREFIX) :]\n\n    return API_KEY_PREFIX + api_key[:4] + \"********\" + api_key[-4:]\n\n\ndef get_hashed_api_key_from_request(request: Request) -> str | None:\n    \"\"\"Extract and hash API key from Authorization header.\n\n    Accepts both \"Bearer <key>\" and raw key formats.\n    \"\"\"\n    return get_hashed_bearer_token_from_request(\n        request,\n        valid_prefixes=[API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX],\n        hash_fn=hash_api_key,\n        allow_non_bearer=True,  # API keys historically support both formats\n    )\n"
  },
  {
    "path": "backend/onyx/auth/captcha.py",
    "content": "\"\"\"Captcha verification for user registration.\"\"\"\n\nimport httpx\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.configs.app_configs import CAPTCHA_ENABLED\nfrom onyx.configs.app_configs import RECAPTCHA_SCORE_THRESHOLD\nfrom onyx.configs.app_configs import RECAPTCHA_SECRET_KEY\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nRECAPTCHA_VERIFY_URL = \"https://www.google.com/recaptcha/api/siteverify\"\n\n\nclass CaptchaVerificationError(Exception):\n    \"\"\"Raised when captcha verification fails.\"\"\"\n\n\nclass RecaptchaResponse(BaseModel):\n    \"\"\"Response from Google reCAPTCHA verification API.\"\"\"\n\n    success: bool\n    score: float | None = None  # Only present for reCAPTCHA v3\n    action: str | None = None\n    challenge_ts: str | None = None\n    hostname: str | None = None\n    error_codes: list[str] | None = Field(default=None, alias=\"error-codes\")\n\n\ndef is_captcha_enabled() -> bool:\n    \"\"\"Check if captcha verification is enabled.\"\"\"\n    return CAPTCHA_ENABLED and bool(RECAPTCHA_SECRET_KEY)\n\n\nasync def verify_captcha_token(\n    token: str,\n    expected_action: str = \"signup\",\n) -> None:\n    \"\"\"\n    Verify a reCAPTCHA token with Google's API.\n\n    Args:\n        token: The reCAPTCHA response token from the client\n        expected_action: Expected action name for v3 verification\n\n    Raises:\n        CaptchaVerificationError: If verification fails\n    \"\"\"\n    if not is_captcha_enabled():\n        return\n\n    if not token:\n        raise CaptchaVerificationError(\"Captcha token is required\")\n\n    try:\n        async with httpx.AsyncClient() as client:\n            response = await client.post(\n                RECAPTCHA_VERIFY_URL,\n                data={\n                    \"secret\": RECAPTCHA_SECRET_KEY,\n                    \"response\": token,\n                },\n                timeout=10.0,\n            )\n            response.raise_for_status()\n\n            data = response.json()\n            result = RecaptchaResponse(**data)\n\n            if not result.success:\n                error_codes = result.error_codes or [\"unknown-error\"]\n                logger.warning(f\"Captcha verification failed: {error_codes}\")\n                raise CaptchaVerificationError(\n                    f\"Captcha verification failed: {', '.join(error_codes)}\"\n                )\n\n            # For reCAPTCHA v3, also check the score\n            if result.score is not None:\n                if result.score < RECAPTCHA_SCORE_THRESHOLD:\n                    logger.warning(\n                        f\"Captcha score too low: {result.score} < {RECAPTCHA_SCORE_THRESHOLD}\"\n                    )\n                    raise CaptchaVerificationError(\n                        \"Captcha verification failed: suspicious activity detected\"\n                    )\n\n                # Optionally verify the action matches\n                if result.action and result.action != expected_action:\n                    logger.warning(\n                        f\"Captcha action mismatch: {result.action} != {expected_action}\"\n                    )\n                    raise CaptchaVerificationError(\n                        \"Captcha verification failed: action mismatch\"\n                    )\n\n            logger.debug(\n                f\"Captcha verification passed: score={result.score}, action={result.action}\"\n            )\n\n    except httpx.HTTPError as e:\n        logger.error(f\"Captcha API request failed: {e}\")\n        # In case of API errors, we might want to allow registration\n        # to prevent blocking legitimate users. This is a policy decision.\n        raise CaptchaVerificationError(\"Captcha verification service unavailable\")\n"
  },
  {
    "path": "backend/onyx/auth/constants.py",
    "content": "\"\"\"Authentication constants shared across auth modules.\"\"\"\n\n# API Key constants\nAPI_KEY_PREFIX = \"on_\"\nDEPRECATED_API_KEY_PREFIX = \"dn_\"\nAPI_KEY_LENGTH = 192\n\n# PAT constants\nPAT_PREFIX = \"onyx_pat_\"\nPAT_LENGTH = 192\n\n# Shared header constants\nAPI_KEY_HEADER_NAME = \"Authorization\"\nAPI_KEY_HEADER_ALTERNATIVE_NAME = \"X-Onyx-Authorization\"\nBEARER_PREFIX = \"Bearer \"\n"
  },
  {
    "path": "backend/onyx/auth/disposable_email_validator.py",
    "content": "\"\"\"\nUtility to validate and block disposable/temporary email addresses.\n\nThis module fetches a list of known disposable email domains from a remote source\nand caches them for performance. It's used during user registration to prevent\nabuse from temporary email services.\n\"\"\"\n\nimport threading\nimport time\nfrom typing import Set\n\nimport httpx\n\nfrom onyx.configs.app_configs import DISPOSABLE_EMAIL_DOMAINS_URL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass DisposableEmailValidator:\n    \"\"\"\n    Thread-safe singleton validator for disposable email domains.\n\n    Fetches and caches the list of disposable domains, with periodic refresh.\n    \"\"\"\n\n    _instance: \"DisposableEmailValidator | None\" = None\n    _lock = threading.Lock()\n\n    def __new__(cls) -> \"DisposableEmailValidator\":\n        if cls._instance is None:\n            with cls._lock:\n                if cls._instance is None:\n                    cls._instance = super().__new__(cls)\n        return cls._instance\n\n    def __init__(self) -> None:\n        # Check if already initialized using a try/except to avoid type issues\n        try:\n            if self._initialized:\n                return\n        except AttributeError:\n            pass\n\n        self._domains: Set[str] = set()\n        self._last_fetch_time: float = 0\n        self._fetch_lock = threading.Lock()\n        # Cache for 1 hour\n        self._cache_duration = 3600\n        # Hardcoded fallback list of common disposable domains\n        # This ensures we block at least these even if the remote fetch fails\n        self._fallback_domains = {\n            \"trashlify.com\",\n            \"10minutemail.com\",\n            \"guerrillamail.com\",\n            \"mailinator.com\",\n            \"tempmail.com\",\n            \"chat-tempmail.com\",\n            \"throwaway.email\",\n            \"yopmail.com\",\n            \"temp-mail.org\",\n            \"getnada.com\",\n            \"maildrop.cc\",\n        }\n        # Set initialized flag last to prevent race conditions\n        self._initialized: bool = True\n\n    def _should_refresh(self) -> bool:\n        \"\"\"Check if the cached domains should be refreshed.\"\"\"\n        return (time.time() - self._last_fetch_time) > self._cache_duration\n\n    def _fetch_domains(self) -> Set[str]:\n        \"\"\"\n        Fetch disposable email domains from the configured URL.\n\n        Returns:\n            Set of domain strings (lowercased)\n        \"\"\"\n        if not DISPOSABLE_EMAIL_DOMAINS_URL:\n            logger.debug(\"DISPOSABLE_EMAIL_DOMAINS_URL not configured\")\n            return self._fallback_domains.copy()\n\n        try:\n            logger.info(\n                f\"Fetching disposable email domains from {DISPOSABLE_EMAIL_DOMAINS_URL}\"\n            )\n            with httpx.Client(timeout=10.0) as client:\n                response = client.get(DISPOSABLE_EMAIL_DOMAINS_URL)\n                response.raise_for_status()\n\n                domains_list = response.json()\n\n                if not isinstance(domains_list, list):\n                    logger.error(\n                        f\"Expected list from disposable domains URL, got {type(domains_list)}\"\n                    )\n                    return self._fallback_domains.copy()\n\n                # Convert all to lowercase and create set\n                domains = {domain.lower().strip() for domain in domains_list if domain}\n\n                # Always include fallback domains\n                domains.update(self._fallback_domains)\n\n                logger.info(\n                    f\"Successfully fetched {len(domains)} disposable email domains\"\n                )\n                return domains\n\n        except httpx.HTTPError as e:\n            logger.warning(f\"Failed to fetch disposable domains (HTTP error): {e}\")\n        except Exception as e:\n            logger.warning(f\"Failed to fetch disposable domains: {e}\")\n\n        # On error, return fallback domains\n        return self._fallback_domains.copy()\n\n    def get_domains(self) -> Set[str]:\n        \"\"\"\n        Get the cached set of disposable email domains.\n        Refreshes the cache if needed.\n\n        Returns:\n            Set of disposable domain strings (lowercased)\n        \"\"\"\n        # Fast path: return cached domains if still fresh\n        if self._domains and not self._should_refresh():\n            return self._domains.copy()\n\n        # Slow path: need to refresh\n        with self._fetch_lock:\n            # Double-check after acquiring lock\n            if self._domains and not self._should_refresh():\n                return self._domains.copy()\n\n            self._domains = self._fetch_domains()\n            self._last_fetch_time = time.time()\n            return self._domains.copy()\n\n    def is_disposable(self, email: str) -> bool:\n        \"\"\"\n        Check if an email address uses a disposable domain.\n\n        Args:\n            email: The email address to check\n\n        Returns:\n            True if the email domain is disposable, False otherwise\n        \"\"\"\n        if not email or \"@\" not in email:\n            return False\n\n        parts = email.split(\"@\")\n        if len(parts) != 2 or not parts[0]:  # Must have user@domain with non-empty user\n            return False\n\n        domain = parts[1].lower().strip()\n        if not domain:  # Domain part must not be empty\n            return False\n\n        disposable_domains = self.get_domains()\n        return domain in disposable_domains\n\n\n# Global singleton instance\n_validator = DisposableEmailValidator()\n\n\ndef is_disposable_email(email: str) -> bool:\n    \"\"\"\n    Check if an email address uses a disposable/temporary domain.\n\n    This is a convenience function that uses the global validator instance.\n\n    Args:\n        email: The email address to check\n\n    Returns:\n        True if the email uses a disposable domain, False otherwise\n    \"\"\"\n    return _validator.is_disposable(email)\n\n\ndef refresh_disposable_domains() -> None:\n    \"\"\"\n    Force a refresh of the disposable domains list.\n\n    This can be called manually if you want to update the list\n    without waiting for the cache to expire.\n    \"\"\"\n    _validator._last_fetch_time = 0\n    _validator.get_domains()\n"
  },
  {
    "path": "backend/onyx/auth/email_utils.py",
    "content": "import base64\nimport smtplib\nfrom datetime import datetime\nfrom email.mime.image import MIMEImage\nfrom email.mime.multipart import MIMEMultipart\nfrom email.mime.text import MIMEText\nfrom email.utils import formatdate\nfrom email.utils import make_msgid\n\nimport sendgrid  # type: ignore\nfrom sendgrid.helpers.mail import Attachment  # type: ignore\nfrom sendgrid.helpers.mail import Content\nfrom sendgrid.helpers.mail import ContentId\nfrom sendgrid.helpers.mail import Disposition\nfrom sendgrid.helpers.mail import Email\nfrom sendgrid.helpers.mail import FileContent\nfrom sendgrid.helpers.mail import FileName\nfrom sendgrid.helpers.mail import FileType\nfrom sendgrid.helpers.mail import Mail\nfrom sendgrid.helpers.mail import To\n\nfrom onyx.configs.app_configs import EMAIL_CONFIGURED\nfrom onyx.configs.app_configs import EMAIL_FROM\nfrom onyx.configs.app_configs import SENDGRID_API_KEY\nfrom onyx.configs.app_configs import SMTP_PASS\nfrom onyx.configs.app_configs import SMTP_PORT\nfrom onyx.configs.app_configs import SMTP_SERVER\nfrom onyx.configs.app_configs import SMTP_USER\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME\nfrom onyx.configs.constants import ONYX_DISCORD_URL\nfrom onyx.db.models import User\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.url import add_url_params\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\nHTML_EMAIL_TEMPLATE = \"\"\"\\\n<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n  <meta charset=\"UTF-8\">\n  <meta name=\"viewport\" content=\"width=device-width\" />\n  <title>{title}</title>\n  <style>\n    body, table, td, a {{\n      font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, Helvetica, Arial, sans-serif;\n      text-size-adjust: 100%;\n      margin: 0;\n      padding: 0;\n      -webkit-font-smoothing: antialiased;\n      -webkit-text-size-adjust: none;\n    }}\n    body {{\n      background-color: #f7f7f7;\n      color: #333;\n    }}\n    .body-content {{\n      color: #333;\n    }}\n    .email-container {{\n      width: 100%;\n      max-width: 600px;\n      margin: 0 auto;\n      background-color: #ffffff;\n      border-radius: 6px;\n      overflow: hidden;\n      border: 1px solid #eaeaea;\n    }}\n    .header {{\n      background-color: #000000;\n      padding: 20px;\n      text-align: center;\n    }}\n    .header img {{\n      max-width: 140px;\n      width: 140px;\n      height: auto;\n      filter: brightness(1.1) contrast(1.2);\n      border-radius: 8px;\n      padding: 5px;\n    }}\n    .body-content {{\n      padding: 20px 30px;\n    }}\n    .title {{\n      font-size: 20px;\n      font-weight: bold;\n      margin: 0 0 10px;\n    }}\n    .message {{\n      font-size: 16px;\n      line-height: 1.5;\n      margin: 0 0 20px;\n    }}\n    .cta-button {{\n      display: inline-block;\n      padding: 14px 24px;\n      background-color: #0055FF;\n      color: #ffffff !important;\n      text-decoration: none;\n      border-radius: 4px;\n      font-weight: 600;\n      font-size: 16px;\n      margin-top: 10px;\n      box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n      text-align: center;\n    }}\n    .footer {{\n      font-size: 13px;\n      color: #6A7280;\n      text-align: center;\n      padding: 20px;\n    }}\n    .footer a {{\n      color: #6b7280;\n      text-decoration: underline;\n    }}\n  </style>\n</head>\n<body>\n  <table role=\"presentation\" class=\"email-container\" cellpadding=\"0\" cellspacing=\"0\">\n    <tr>\n      <td class=\"header\">\n        <img\n          style=\"background-color: #ffffff; border-radius: 8px;\"\n          src=\"cid:logo.png\"\n          alt=\"{application_name} Logo\"\n        >\n      </td>\n    </tr>\n    <tr>\n      <td class=\"body-content\">\n        <h1 class=\"title\">{heading}</h1>\n        <div class=\"message\">\n          {message}\n        </div>\n        {cta_block}\n      </td>\n    </tr>\n    <tr>\n      <td class=\"footer\">\n        © {year} {application_name}. All rights reserved.\n        {community_link_fragment}\n      </td>\n    </tr>\n  </table>\n</body>\n</html>\n\"\"\"\n\n\ndef build_html_email(\n    application_name: str | None,\n    heading: str,\n    message: str,\n    cta_text: str | None = None,\n    cta_link: str | None = None,\n) -> str:\n    community_link_fragment = \"\"\n    if application_name == ONYX_DEFAULT_APPLICATION_NAME:\n        community_link_fragment = f'<br>Have questions? Join our Discord community <a href=\"{ONYX_DISCORD_URL}\">here</a>.'\n\n    if cta_text and cta_link:\n        cta_block = f'<a class=\"cta-button\" href=\"{cta_link}\">{cta_text}</a>'\n    else:\n        cta_block = \"\"\n    return HTML_EMAIL_TEMPLATE.format(\n        application_name=application_name,\n        title=heading,\n        heading=heading,\n        message=message,\n        cta_block=cta_block,\n        community_link_fragment=community_link_fragment,\n        year=datetime.now().year,\n    )\n\n\ndef send_email(\n    user_email: str,\n    subject: str,\n    html_body: str,\n    text_body: str,\n    mail_from: str = EMAIL_FROM,\n    inline_png: tuple[str, bytes] | None = None,\n) -> None:\n    if not EMAIL_CONFIGURED:\n        raise ValueError(\"Email is not configured.\")\n\n    if SENDGRID_API_KEY:\n        send_email_with_sendgrid(\n            user_email, subject, html_body, text_body, mail_from, inline_png\n        )\n        return\n\n    send_email_with_smtplib(\n        user_email, subject, html_body, text_body, mail_from, inline_png\n    )\n\n\ndef send_email_with_sendgrid(\n    user_email: str,\n    subject: str,\n    html_body: str,\n    text_body: str,\n    mail_from: str = EMAIL_FROM,\n    inline_png: tuple[str, bytes] | None = None,\n) -> None:\n    from_email = Email(mail_from) if mail_from else Email(\"noreply@onyx.app\")\n    to_email = To(user_email)\n\n    mail = Mail(\n        from_email=from_email,\n        to_emails=to_email,\n        subject=subject,\n        plain_text_content=Content(\"text/plain\", text_body),\n    )\n\n    # Add HTML content\n    mail.add_content(Content(\"text/html\", html_body))\n\n    if inline_png:\n        image_name, image_data = inline_png\n\n        # Create attachment\n        encoded_image = base64.b64encode(image_data).decode()\n        attachment = Attachment()\n        attachment.file_content = FileContent(encoded_image)\n        attachment.file_name = FileName(image_name)\n        attachment.file_type = FileType(\"image/png\")\n        attachment.disposition = Disposition(\"inline\")\n        attachment.content_id = ContentId(image_name)\n\n        mail.add_attachment(attachment)\n\n    # Get a JSON-ready representation of the Mail object\n    mail_json = mail.get()\n\n    sg = sendgrid.SendGridAPIClient(api_key=SENDGRID_API_KEY)\n    response = sg.client.mail.send.post(request_body=mail_json)  # can raise\n    if response.status_code != 202:\n        logger.warning(f\"Unexpected status code {response.status_code}\")\n\n\ndef send_email_with_smtplib(\n    user_email: str,\n    subject: str,\n    html_body: str,\n    text_body: str,\n    mail_from: str = EMAIL_FROM,\n    inline_png: tuple[str, bytes] | None = None,\n) -> None:\n\n    # Create a multipart/alternative message - this indicates these are alternative versions of the same content\n    msg = MIMEMultipart(\"alternative\")\n    msg[\"Subject\"] = subject\n    msg[\"To\"] = user_email\n    if mail_from:\n        msg[\"From\"] = mail_from\n    msg[\"Date\"] = formatdate(localtime=True)\n    msg[\"Message-ID\"] = make_msgid(domain=\"onyx.app\")\n\n    # Add text part first (lowest priority)\n    text_part = MIMEText(text_body, \"plain\")\n    msg.attach(text_part)\n\n    if inline_png:\n        # For HTML with images, create a multipart/related container\n        related = MIMEMultipart(\"related\")\n\n        # Add the HTML part to the related container\n        html_part = MIMEText(html_body, \"html\")\n        related.attach(html_part)\n\n        # Add image with proper Content-ID to the related container\n        img = MIMEImage(inline_png[1], _subtype=\"png\")\n        img.add_header(\"Content-ID\", f\"<{inline_png[0]}>\")\n        img.add_header(\"Content-Disposition\", \"inline\", filename=inline_png[0])\n        related.attach(img)\n\n        # Add the related part to the message (higher priority than text)\n        msg.attach(related)\n    else:\n        # No images, just add HTML directly (higher priority than text)\n        html_part = MIMEText(html_body, \"html\")\n        msg.attach(html_part)\n\n    with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:\n        s.starttls()\n        s.login(SMTP_USER, SMTP_PASS)\n        s.send_message(msg)\n\n\ndef send_subscription_cancellation_email(user_email: str) -> None:\n    \"\"\"This is templated but isn't meaningful for whitelabeling.\"\"\"\n\n    # Example usage of the reusable HTML\n    try:\n        load_runtime_settings_fn = fetch_versioned_implementation(\n            \"onyx.server.enterprise_settings.store\", \"load_runtime_settings\"\n        )\n        settings = load_runtime_settings_fn()\n        application_name = settings.application_name\n    except ModuleNotFoundError:\n        application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    onyx_file = OnyxRuntime.get_emailable_logo()\n\n    subject = f\"Your {application_name} Subscription Has Been Canceled\"\n    heading = \"Subscription Canceled\"\n    message = (\n        \"<p>We're sorry to see you go.</p>\"\n        \"<p>Your subscription has been canceled and will end on your next billing date.</p>\"\n        \"<p>If you change your mind, you can always come back!</p>\"\n    )\n    cta_text = \"Renew Subscription\"\n    cta_link = \"https://www.onyx.app/pricing\"\n    html_content = build_html_email(\n        application_name,\n        heading,\n        message,\n        cta_text,\n        cta_link,\n    )\n    text_content = (\n        \"We're sorry to see you go.\\n\"\n        \"Your subscription has been canceled and will end on your next billing date.\\n\"\n        \"If you change your mind, visit https://www.onyx.app/pricing\"\n    )\n    send_email(\n        user_email,\n        subject,\n        html_content,\n        text_content,\n        inline_png=(\"logo.png\", onyx_file.data),\n    )\n\n\ndef build_user_email_invite(\n    from_email: str, to_email: str, application_name: str, auth_type: AuthType\n) -> tuple[str, str]:\n    heading = \"You've Been Invited!\"\n\n    # the exact action taken by the user, and thus the message, depends on the auth type\n    message = f\"<p>You have been invited by {from_email} to join an organization on {application_name}.</p>\"\n    if auth_type == AuthType.CLOUD:\n        message += (\n            \"<p>To join the organization, please click the button below to set a password \"\n            \"or login with Google and complete your registration.</p>\"\n        )\n    elif auth_type == AuthType.BASIC:\n        message += \"<p>To join the organization, please click the button below to set a password and complete your registration.</p>\"\n    elif auth_type == AuthType.GOOGLE_OAUTH:\n        message += \"<p>To join the organization, please click the button below to login with Google and complete your registration.</p>\"\n    elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:\n        message += \"<p>To join the organization, please click the button below to complete your registration.</p>\"\n    else:\n        raise ValueError(f\"Invalid auth type: {auth_type}\")\n\n    cta_text = \"Join Organization\"\n    cta_link = f\"{WEB_DOMAIN}/auth/signup?email={to_email}\"\n\n    html_content = build_html_email(\n        application_name,\n        heading,\n        message,\n        cta_text,\n        cta_link,\n    )\n\n    # text content is the fallback for clients that don't support HTML\n    # not as critical, so not having special cases for each auth type\n    text_content = (\n        f\"You have been invited by {from_email} to join an organization on {application_name}.\\n\"\n        \"To join the organization, please visit the following link:\\n\"\n        f\"{WEB_DOMAIN}/auth/signup?email={to_email}\\n\"\n    )\n    if auth_type == AuthType.CLOUD:\n        text_content += \"You'll be asked to set a password or login with Google to complete your registration.\"\n\n    return text_content, html_content\n\n\ndef send_user_email_invite(\n    user_email: str, current_user: User, auth_type: AuthType\n) -> None:\n    try:\n        load_runtime_settings_fn = fetch_versioned_implementation(\n            \"onyx.server.enterprise_settings.store\", \"load_runtime_settings\"\n        )\n        settings = load_runtime_settings_fn()\n        application_name = settings.application_name\n    except ModuleNotFoundError:\n        application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    onyx_file = OnyxRuntime.get_emailable_logo()\n\n    subject = f\"Invitation to Join {application_name} Organization\"\n\n    text_content, html_content = build_user_email_invite(\n        current_user.email, user_email, application_name, auth_type\n    )\n\n    send_email(\n        user_email,\n        subject,\n        html_content,\n        text_content,\n        inline_png=(\"logo.png\", onyx_file.data),\n    )\n\n\ndef send_forgot_password_email(\n    user_email: str,\n    token: str,\n    tenant_id: str,\n    mail_from: str = EMAIL_FROM,\n) -> None:\n    # Builds a forgot password email with or without fancy HTML\n    try:\n        load_runtime_settings_fn = fetch_versioned_implementation(\n            \"onyx.server.enterprise_settings.store\", \"load_runtime_settings\"\n        )\n        settings = load_runtime_settings_fn()\n        application_name = settings.application_name\n    except ModuleNotFoundError:\n        application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    onyx_file = OnyxRuntime.get_emailable_logo()\n\n    subject = f\"Reset Your {application_name} Password\"\n    heading = \"Reset Your Password\"\n    tenant_param = f\"&tenant={tenant_id}\" if tenant_id and MULTI_TENANT else \"\"\n    message = \"<p>Please click the button below to reset your password. This link will expire in 24 hours.</p>\"\n    cta_text = \"Reset Password\"\n    cta_link = f\"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}\"\n    html_content = build_html_email(\n        application_name,\n        heading,\n        message,\n        cta_text,\n        cta_link,\n    )\n    text_content = (\n        f\"Please click the following link to reset your password. This link will expire in 24 hours.\\n\"\n        f\"{WEB_DOMAIN}/auth/reset-password?token={token}{tenant_param}\"\n    )\n    send_email(\n        user_email,\n        subject,\n        html_content,\n        text_content,\n        mail_from,\n        inline_png=(\"logo.png\", onyx_file.data),\n    )\n\n\ndef send_user_verification_email(\n    user_email: str,\n    token: str,\n    new_organization: bool = False,\n    mail_from: str = EMAIL_FROM,\n) -> None:\n    # Builds a verification email\n    try:\n        load_runtime_settings_fn = fetch_versioned_implementation(\n            \"onyx.server.enterprise_settings.store\", \"load_runtime_settings\"\n        )\n        settings = load_runtime_settings_fn()\n        application_name = settings.application_name\n    except ModuleNotFoundError:\n        application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    onyx_file = OnyxRuntime.get_emailable_logo()\n\n    subject = f\"{application_name} Email Verification\"\n    link = f\"{WEB_DOMAIN}/auth/verify-email?token={token}\"\n    if new_organization:\n        link = add_url_params(link, {\"first_user\": \"true\"})\n    message = (\n        f\"<p>Click the following link to verify your email address:</p><p>{link}</p>\"\n    )\n    html_content = build_html_email(\n        application_name,\n        \"Verify Your Email\",\n        message,\n    )\n    text_content = f\"Click the following link to verify your email address: {link}\"\n    send_email(\n        user_email,\n        subject,\n        html_content,\n        text_content,\n        mail_from,\n        inline_png=(\"logo.png\", onyx_file.data),\n    )\n"
  },
  {
    "path": "backend/onyx/auth/invited_users.py",
    "content": "from typing import cast\n\nfrom onyx.configs.constants import KV_PENDING_USERS_KEY\nfrom onyx.configs.constants import KV_USER_STORE_KEY\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.utils.special_types import JSON_ro\n\n\ndef remove_user_from_invited_users(email: str) -> int:\n    try:\n        store = get_kv_store()\n        user_emails = cast(list, store.load(KV_USER_STORE_KEY))\n        remaining_users = [user for user in user_emails if user != email]\n        store.store(KV_USER_STORE_KEY, cast(JSON_ro, remaining_users))\n        return len(remaining_users)\n    except KvKeyNotFoundError:\n        return 0\n\n\ndef get_invited_users() -> list[str]:\n    try:\n        store = get_kv_store()\n        return cast(list, store.load(KV_USER_STORE_KEY))\n    except KvKeyNotFoundError:\n        return list()\n\n\ndef write_invited_users(emails: list[str]) -> int:\n    store = get_kv_store()\n    store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails))\n    return len(emails)\n\n\ndef get_pending_users() -> list[str]:\n    try:\n        store = get_kv_store()\n        return cast(list, store.load(KV_PENDING_USERS_KEY))\n    except KvKeyNotFoundError:\n        return list()\n\n\ndef write_pending_users(emails: list[str]) -> int:\n    store = get_kv_store()\n    store.store(KV_PENDING_USERS_KEY, cast(JSON_ro, emails))\n    return len(emails)\n"
  },
  {
    "path": "backend/onyx/auth/jwt.py",
    "content": "import json\nfrom enum import Enum\nfrom functools import lru_cache\nfrom typing import Any\nfrom typing import cast\n\nimport jwt\nimport requests\nfrom cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey\nfrom jwt import decode as jwt_decode\nfrom jwt import InvalidTokenError\nfrom jwt import PyJWTError\nfrom jwt.algorithms import RSAAlgorithm\n\nfrom onyx.configs.app_configs import JWT_PUBLIC_KEY_URL\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n_PUBLIC_KEY_FETCH_ATTEMPTS = 2\n\n\nclass PublicKeyFormat(Enum):\n    JWKS = \"jwks\"\n    PEM = \"pem\"\n\n\n@lru_cache()\ndef _fetch_public_key_payload() -> tuple[str | dict[str, Any], PublicKeyFormat] | None:\n    \"\"\"Fetch and cache the raw JWT verification material.\"\"\"\n    if JWT_PUBLIC_KEY_URL is None:\n        logger.error(\"JWT_PUBLIC_KEY_URL is not set\")\n        return None\n\n    try:\n        response = requests.get(JWT_PUBLIC_KEY_URL)\n        response.raise_for_status()\n    except requests.RequestException as exc:\n        logger.error(f\"Failed to fetch JWT public key: {str(exc)}\")\n        return None\n    content_type = response.headers.get(\"Content-Type\", \"\").lower()\n    raw_body = response.text\n    body_lstripped = raw_body.lstrip()\n\n    if \"application/json\" in content_type or body_lstripped.startswith(\"{\"):\n        try:\n            data = response.json()\n        except ValueError:\n            logger.error(\"JWT public key URL returned invalid JSON\")\n            return None\n\n        if isinstance(data, dict) and \"keys\" in data:\n            return data, PublicKeyFormat.JWKS\n\n        logger.error(\n            \"JWT public key URL returned JSON but no JWKS 'keys' field was found\"\n        )\n        return None\n\n    body = raw_body.strip()\n    if not body:\n        logger.error(\"JWT public key URL returned an empty response\")\n        return None\n\n    return body, PublicKeyFormat.PEM\n\n\ndef get_public_key(token: str) -> RSAPublicKey | str | None:\n    \"\"\"Return the concrete public key used to verify the provided JWT token.\"\"\"\n    payload = _fetch_public_key_payload()\n    if payload is None:\n        logger.error(\"Failed to retrieve public key payload\")\n        return None\n\n    key_material, key_format = payload\n\n    if key_format is PublicKeyFormat.JWKS:\n        jwks_data = cast(dict[str, Any], key_material)\n        return _resolve_public_key_from_jwks(token, jwks_data)\n\n    return cast(str, key_material)\n\n\ndef _resolve_public_key_from_jwks(\n    token: str, jwks_payload: dict[str, Any]\n) -> RSAPublicKey | None:\n    try:\n        header = jwt.get_unverified_header(token)\n    except PyJWTError as e:\n        logger.error(f\"Unable to parse JWT header: {str(e)}\")\n        return None\n\n    keys = jwks_payload.get(\"keys\", []) if isinstance(jwks_payload, dict) else []\n    if not keys:\n        logger.error(\"JWKS payload did not contain any keys\")\n        return None\n\n    kid = header.get(\"kid\")\n    thumbprint = header.get(\"x5t\")\n\n    candidates = []\n    if kid:\n        candidates = [k for k in keys if k.get(\"kid\") == kid]\n    if not candidates and thumbprint:\n        candidates = [k for k in keys if k.get(\"x5t\") == thumbprint]\n    if not candidates and len(keys) == 1:\n        candidates = keys\n\n    if not candidates:\n        logger.warning(\n            \"No matching JWK found for token header (kid=%s, x5t=%s)\", kid, thumbprint\n        )\n        return None\n\n    if len(candidates) > 1:\n        logger.warning(\n            \"Multiple JWKs matched token header kid=%s; selecting the first occurrence\",\n            kid,\n        )\n\n    jwk = candidates[0]\n    try:\n        return cast(RSAPublicKey, RSAAlgorithm.from_jwk(json.dumps(jwk)))\n    except ValueError as e:\n        logger.error(f\"Failed to construct RSA key from JWK: {str(e)}\")\n        return None\n\n\nasync def verify_jwt_token(token: str) -> dict[str, Any] | None:\n    for attempt in range(_PUBLIC_KEY_FETCH_ATTEMPTS):\n        public_key = get_public_key(token)\n        if public_key is None:\n            logger.error(\"Unable to resolve a public key for JWT verification\")\n            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:\n                _fetch_public_key_payload.cache_clear()\n                continue\n            return None\n\n        try:\n            payload = jwt_decode(\n                token,\n                public_key,\n                algorithms=[\"RS256\"],\n                options={\"verify_aud\": False},\n            )\n        except InvalidTokenError as e:\n            logger.error(f\"Invalid JWT token: {str(e)}\")\n            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:\n                _fetch_public_key_payload.cache_clear()\n                continue\n            return None\n        except PyJWTError as e:\n            logger.error(f\"JWT decoding error: {str(e)}\")\n            if attempt < _PUBLIC_KEY_FETCH_ATTEMPTS - 1:\n                _fetch_public_key_payload.cache_clear()\n                continue\n            return None\n\n        return payload\n\n    return None\n"
  },
  {
    "path": "backend/onyx/auth/oauth_refresher.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\nfrom typing import List\nfrom typing import Optional\n\nimport httpx\nfrom fastapi_users.manager import BaseUserManager\nfrom sqlalchemy.ext.asyncio import AsyncSession\n\nfrom onyx.configs.app_configs import OAUTH_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_CLIENT_SECRET\nfrom onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Standard OAuth refresh token endpoints\nREFRESH_ENDPOINTS = {\n    \"google\": \"https://oauth2.googleapis.com/token\",\n}\n\n\n# NOTE: Keeping this as a utility function for potential future debugging,\n# but not using it in production code\nasync def _test_expire_oauth_token(\n    user: User,\n    oauth_account: OAuthAccount,\n    db_session: AsyncSession,  # noqa: ARG001\n    user_manager: BaseUserManager[User, Any],\n    expire_in_seconds: int = 10,\n) -> bool:\n    \"\"\"\n    Utility function for testing - Sets an OAuth token to expire in a short time\n    to facilitate testing of the refresh flow.\n    Not used in production code.\n    \"\"\"\n    try:\n        new_expires_at = int(\n            (datetime.now(timezone.utc).timestamp() + expire_in_seconds)\n        )\n\n        updated_data: Dict[str, Any] = {\"expires_at\": new_expires_at}\n\n        await user_manager.user_db.update_oauth_account(\n            user, cast(Any, oauth_account), updated_data\n        )\n\n        return True\n    except Exception as e:\n        logger.exception(f\"Error setting artificial expiration: {str(e)}\")\n        return False\n\n\nasync def refresh_oauth_token(\n    user: User,\n    oauth_account: OAuthAccount,\n    db_session: AsyncSession,  # noqa: ARG001\n    user_manager: BaseUserManager[User, Any],\n) -> bool:\n    \"\"\"\n    Attempt to refresh an OAuth token that's about to expire or has expired.\n    Returns True if successful, False otherwise.\n    \"\"\"\n    if not oauth_account.refresh_token:\n        logger.warning(\n            f\"No refresh token available for {user.email}'s {oauth_account.oauth_name} account\"\n        )\n        return False\n\n    provider = oauth_account.oauth_name\n    if provider not in REFRESH_ENDPOINTS:\n        logger.warning(f\"Refresh endpoint not configured for provider: {provider}\")\n        return False\n\n    try:\n        logger.info(f\"Refreshing OAuth token for {user.email}'s {provider} account\")\n\n        async with httpx.AsyncClient() as client:\n            response = await client.post(\n                REFRESH_ENDPOINTS[provider],\n                data={\n                    \"client_id\": OAUTH_CLIENT_ID,\n                    \"client_secret\": OAUTH_CLIENT_SECRET,\n                    \"refresh_token\": oauth_account.refresh_token,\n                    \"grant_type\": \"refresh_token\",\n                },\n                headers={\"Content-Type\": \"application/x-www-form-urlencoded\"},\n            )\n\n            if response.status_code != 200:\n                logger.error(\n                    f\"Failed to refresh OAuth token: Status {response.status_code}\"\n                )\n                return False\n\n            token_data = response.json()\n\n            new_access_token = token_data.get(\"access_token\")\n            new_refresh_token = token_data.get(\n                \"refresh_token\", oauth_account.refresh_token\n            )\n            expires_in = token_data.get(\"expires_in\")\n\n            # Calculate new expiry time if provided\n            new_expires_at: Optional[int] = None\n            if expires_in:\n                new_expires_at = int(\n                    (datetime.now(timezone.utc).timestamp() + expires_in)\n                )\n\n            # Update the OAuth account\n            updated_data: Dict[str, Any] = {\n                \"access_token\": new_access_token,\n                \"refresh_token\": new_refresh_token,\n            }\n\n            if new_expires_at:\n                updated_data[\"expires_at\"] = new_expires_at\n\n                # Update oidc_expiry in user model if we're tracking it\n                if TRACK_EXTERNAL_IDP_EXPIRY:\n                    oidc_expiry = datetime.fromtimestamp(\n                        new_expires_at, tz=timezone.utc\n                    )\n                    await user_manager.user_db.update(\n                        user, {\"oidc_expiry\": oidc_expiry}\n                    )\n\n            # Update the OAuth account\n            await user_manager.user_db.update_oauth_account(\n                user, cast(Any, oauth_account), updated_data\n            )\n\n            logger.info(f\"Successfully refreshed OAuth token for {user.email}\")\n            return True\n\n    except Exception as e:\n        logger.exception(f\"Error refreshing OAuth token: {str(e)}\")\n        return False\n\n\nasync def check_and_refresh_oauth_tokens(\n    user: User,\n    db_session: AsyncSession,\n    user_manager: BaseUserManager[User, Any],\n) -> None:\n    \"\"\"\n    Check if any OAuth tokens are expired or about to expire and refresh them.\n    \"\"\"\n    if not hasattr(user, \"oauth_accounts\") or not user.oauth_accounts:\n        return\n\n    now_timestamp = datetime.now(timezone.utc).timestamp()\n\n    # Buffer time to refresh tokens before they expire (in seconds)\n    buffer_seconds = 300  # 5 minutes\n\n    for oauth_account in user.oauth_accounts:\n        # Skip accounts without refresh tokens\n        if not oauth_account.refresh_token:\n            continue\n\n        # If token is about to expire, refresh it\n        if (\n            oauth_account.expires_at\n            and oauth_account.expires_at - now_timestamp < buffer_seconds\n        ):\n            logger.info(f\"OAuth token for {user.email} is about to expire - refreshing\")\n            success = await refresh_oauth_token(\n                user, oauth_account, db_session, user_manager\n            )\n\n            if not success:\n                logger.warning(\n                    \"Failed to refresh OAuth token. User may need to re-authenticate.\"\n                )\n\n\nasync def check_oauth_account_has_refresh_token(\n    user: User,  # noqa: ARG001\n    oauth_account: OAuthAccount,\n) -> bool:\n    \"\"\"\n    Check if an OAuth account has a refresh token.\n    Returns True if a refresh token exists, False otherwise.\n    \"\"\"\n    return bool(oauth_account.refresh_token)\n\n\nasync def get_oauth_accounts_requiring_refresh_token(user: User) -> List[OAuthAccount]:\n    \"\"\"\n    Returns a list of OAuth accounts for a user that are missing refresh tokens.\n    These accounts will need re-authentication to get refresh tokens.\n    \"\"\"\n    if not hasattr(user, \"oauth_accounts\") or not user.oauth_accounts:\n        return []\n\n    accounts_needing_refresh = []\n    for oauth_account in user.oauth_accounts:\n        has_refresh_token = await check_oauth_account_has_refresh_token(\n            user, oauth_account\n        )\n        if not has_refresh_token:\n            accounts_needing_refresh.append(oauth_account)\n\n    return accounts_needing_refresh\n"
  },
  {
    "path": "backend/onyx/auth/oauth_token_manager.py",
    "content": "import time\nfrom typing import Any\nfrom urllib.parse import urlencode\nfrom uuid import UUID\n\nimport requests\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import OAuthUserToken\nfrom onyx.db.oauth_config import get_user_oauth_token\nfrom onyx.db.oauth_config import upsert_user_oauth_token\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.sensitive import SensitiveValue\n\n\nlogger = setup_logger()\n\n\nclass OAuthTokenManager:\n    \"\"\"Manages OAuth token retrieval, refresh, and validation\"\"\"\n\n    def __init__(self, oauth_config: OAuthConfig, user_id: UUID, db_session: Session):\n        self.oauth_config = oauth_config\n        self.user_id = user_id\n        self.db_session = db_session\n\n    def get_valid_access_token(self) -> str | None:\n        \"\"\"Get valid access token, refreshing if necessary\"\"\"\n        user_token = get_user_oauth_token(\n            self.oauth_config.id, self.user_id, self.db_session\n        )\n\n        if not user_token:\n            return None\n\n        if not user_token.token_data:\n            return None\n\n        token_data = self._unwrap_token_data(user_token.token_data)\n\n        # Check if token is expired\n        if OAuthTokenManager.is_token_expired(token_data):\n            # Try to refresh if we have a refresh token\n            if \"refresh_token\" in token_data:\n                try:\n                    return self.refresh_token(user_token)\n                except Exception as e:\n                    logger.warning(f\"Failed to refresh token: {e}\")\n                    return None\n            else:\n                return None\n\n        return token_data.get(\"access_token\")\n\n    def refresh_token(self, user_token: OAuthUserToken) -> str:\n        \"\"\"Refresh access token using refresh token\"\"\"\n        if not user_token.token_data:\n            raise ValueError(\"No token data available for refresh\")\n\n        if (\n            self.oauth_config.client_id is None\n            or self.oauth_config.client_secret is None\n        ):\n            raise ValueError(\n                \"OAuth client_id and client_secret are required for token refresh\"\n            )\n\n        token_data = self._unwrap_token_data(user_token.token_data)\n\n        data: dict[str, str] = {\n            \"grant_type\": \"refresh_token\",\n            \"refresh_token\": token_data[\"refresh_token\"],\n            \"client_id\": self._unwrap_sensitive_str(self.oauth_config.client_id),\n            \"client_secret\": self._unwrap_sensitive_str(\n                self.oauth_config.client_secret\n            ),\n        }\n        response = requests.post(\n            self.oauth_config.token_url,\n            data=data,\n            headers={\"Accept\": \"application/json\"},\n        )\n        response.raise_for_status()\n\n        new_token_data = response.json()\n\n        # Calculate expires_at if expires_in is present\n        if \"expires_in\" in new_token_data:\n            new_token_data[\"expires_at\"] = (\n                int(time.time()) + new_token_data[\"expires_in\"]\n            )\n\n        # Preserve refresh_token if not returned (some providers don't return it)\n        if \"refresh_token\" not in new_token_data and \"refresh_token\" in token_data:\n            new_token_data[\"refresh_token\"] = token_data[\"refresh_token\"]\n\n        # Update token in DB\n        upsert_user_oauth_token(\n            self.oauth_config.id,\n            self.user_id,\n            new_token_data,\n            self.db_session,\n        )\n\n        return new_token_data[\"access_token\"]\n\n    @classmethod\n    def token_expiration_time(cls, token_data: dict[str, Any]) -> int | None:\n        \"\"\"Get the token expiration time\"\"\"\n        expires_at = token_data.get(\"expires_at\")\n        if not expires_at:\n            return None\n\n        return expires_at\n\n    @classmethod\n    def is_token_expired(cls, token_data: dict[str, Any]) -> bool:\n        \"\"\"Check if token is expired (with 60 second buffer)\"\"\"\n        expires_at = cls.token_expiration_time(token_data)\n        if not expires_at:\n            return False  # No expiration data, assume valid\n\n        # Add 60 second buffer to avoid race conditions\n        return int(time.time()) + 60 >= expires_at\n\n    def exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:\n        \"\"\"Exchange authorization code for access token\"\"\"\n        if (\n            self.oauth_config.client_id is None\n            or self.oauth_config.client_secret is None\n        ):\n            raise ValueError(\n                \"OAuth client_id and client_secret are required for code exchange\"\n            )\n\n        data: dict[str, str] = {\n            \"grant_type\": \"authorization_code\",\n            \"code\": code,\n            \"client_id\": self._unwrap_sensitive_str(self.oauth_config.client_id),\n            \"client_secret\": self._unwrap_sensitive_str(\n                self.oauth_config.client_secret\n            ),\n            \"redirect_uri\": redirect_uri,\n        }\n        response = requests.post(\n            self.oauth_config.token_url,\n            data=data,\n            headers={\"Accept\": \"application/json\"},\n        )\n        response.raise_for_status()\n\n        token_data = response.json()\n\n        # Calculate expires_at if expires_in is present\n        if \"expires_in\" in token_data:\n            token_data[\"expires_at\"] = int(time.time()) + token_data[\"expires_in\"]\n\n        return token_data\n\n    @staticmethod\n    def build_authorization_url(\n        oauth_config: OAuthConfig, redirect_uri: str, state: str\n    ) -> str:\n        \"\"\"Build OAuth authorization URL\"\"\"\n        if oauth_config.client_id is None:\n            raise ValueError(\"OAuth client_id is required to build authorization URL\")\n\n        params: dict[str, Any] = {\n            \"client_id\": OAuthTokenManager._unwrap_sensitive_str(\n                oauth_config.client_id\n            ),\n            \"redirect_uri\": redirect_uri,\n            \"response_type\": \"code\",\n            \"state\": state,\n        }\n\n        # Add scopes if configured\n        if oauth_config.scopes:\n            params[\"scope\"] = \" \".join(oauth_config.scopes)\n\n        # Add any additional provider-specific parameters\n        if oauth_config.additional_params:\n            params.update(oauth_config.additional_params)\n\n        # Check if URL already has query parameters\n        separator = \"&\" if \"?\" in oauth_config.authorization_url else \"?\"\n\n        return f\"{oauth_config.authorization_url}{separator}{urlencode(params)}\"\n\n    @staticmethod\n    def _unwrap_sensitive_str(value: SensitiveValue[str] | str) -> str:\n        if isinstance(value, SensitiveValue):\n            return value.get_value(apply_mask=False)\n        return value\n\n    @staticmethod\n    def _unwrap_token_data(\n        token_data: SensitiveValue[dict[str, Any]] | dict[str, Any],\n    ) -> dict[str, Any]:\n        if isinstance(token_data, SensitiveValue):\n            return token_data.get_value(apply_mask=False)\n        return token_data\n"
  },
  {
    "path": "backend/onyx/auth/pat.py",
    "content": "\"\"\"Personal Access Token generation and validation.\"\"\"\n\nimport hashlib\nimport secrets\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom urllib.parse import quote\n\nfrom fastapi import Request\n\nfrom onyx.auth.constants import PAT_LENGTH\nfrom onyx.auth.constants import PAT_PREFIX\nfrom onyx.auth.utils import get_hashed_bearer_token_from_request\nfrom shared_configs.configs import MULTI_TENANT\n\n\ndef generate_pat(tenant_id: str | None = None) -> str:\n    \"\"\"Generate cryptographically secure PAT.\"\"\"\n    if MULTI_TENANT and tenant_id:\n        encoded_tenant = quote(tenant_id)\n        return f\"{PAT_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(PAT_LENGTH)}\"\n    return PAT_PREFIX + secrets.token_urlsafe(PAT_LENGTH)\n\n\ndef hash_pat(token: str) -> str:\n    \"\"\"Hash PAT using SHA256 (no salt needed due to cryptographic randomness).\"\"\"\n    return hashlib.sha256(token.encode(\"utf-8\")).hexdigest()\n\n\ndef build_displayable_pat(token: str) -> str:\n    \"\"\"Create masked display version: show prefix + first 4 random chars, mask middle, show last 4.\n\n    Example: onyx_pat_abc1****xyz9\n    \"\"\"\n    # Show first 12 chars (onyx_pat_ + 4 random chars) and last 4 chars\n    return f\"{token[:12]}****{token[-4:]}\"\n\n\ndef get_hashed_pat_from_request(request: Request) -> str | None:\n    \"\"\"Extract and hash PAT from Authorization header.\n\n    Only accepts \"Bearer <token>\" format (unlike API keys which support raw format).\n    \"\"\"\n    return get_hashed_bearer_token_from_request(\n        request,\n        valid_prefixes=[PAT_PREFIX],\n        hash_fn=hash_pat,\n        allow_non_bearer=False,  # PATs require Bearer prefix\n    )\n\n\ndef calculate_expiration(days: int | None) -> datetime | None:\n    \"\"\"Calculate expiration at 23:59:59.999999 UTC on the target date. None = no expiration.\"\"\"\n    if days is None:\n        return None\n    expiry_date = datetime.now(timezone.utc).date() + timedelta(days=days)\n    return datetime.combine(expiry_date, datetime.max.time()).replace(\n        tzinfo=timezone.utc\n    )\n"
  },
  {
    "path": "backend/onyx/auth/permissions.py",
    "content": "\"\"\"\nPermission resolution for group-based authorization.\n\nGranted permissions are stored as a JSONB column on the User table and\nloaded for free with every auth query. Implied permissions are expanded\nat read time — only directly granted permissions are persisted.\n\"\"\"\n\nfrom collections.abc import Callable\nfrom collections.abc import Coroutine\nfrom typing import Any\n\nfrom fastapi import Depends\n\nfrom onyx.auth.users import current_user\nfrom onyx.db.enums import Permission\nfrom onyx.db.models import User\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nALL_PERMISSIONS: frozenset[str] = frozenset(p.value for p in Permission)\n\n# Implication map: granted permission -> set of permissions it implies.\nIMPLIED_PERMISSIONS: dict[str, set[str]] = {\n    Permission.ADD_AGENTS.value: {Permission.READ_AGENTS.value},\n    Permission.MANAGE_AGENTS.value: {\n        Permission.ADD_AGENTS.value,\n        Permission.READ_AGENTS.value,\n    },\n    Permission.MANAGE_DOCUMENT_SETS.value: {\n        Permission.READ_DOCUMENT_SETS.value,\n        Permission.READ_CONNECTORS.value,\n    },\n    Permission.ADD_CONNECTORS.value: {Permission.READ_CONNECTORS.value},\n    Permission.MANAGE_CONNECTORS.value: {\n        Permission.ADD_CONNECTORS.value,\n        Permission.READ_CONNECTORS.value,\n    },\n    Permission.MANAGE_USER_GROUPS.value: {\n        Permission.READ_CONNECTORS.value,\n        Permission.READ_DOCUMENT_SETS.value,\n        Permission.READ_AGENTS.value,\n        Permission.READ_USERS.value,\n    },\n}\n\n\ndef resolve_effective_permissions(granted: set[str]) -> set[str]:\n    \"\"\"Expand granted permissions with their implied permissions.\n\n    If \"admin\" is present, returns all 19 permissions.\n    \"\"\"\n    if Permission.FULL_ADMIN_PANEL_ACCESS.value in granted:\n        return set(ALL_PERMISSIONS)\n\n    effective = set(granted)\n    changed = True\n    while changed:\n        changed = False\n        for perm in list(effective):\n            implied = IMPLIED_PERMISSIONS.get(perm)\n            if implied and not implied.issubset(effective):\n                effective |= implied\n                changed = True\n    return effective\n\n\ndef get_effective_permissions(user: User) -> set[Permission]:\n    \"\"\"Read granted permissions from the column and expand implied permissions.\"\"\"\n    granted: set[Permission] = set()\n    for p in user.effective_permissions:\n        try:\n            granted.add(Permission(p))\n        except ValueError:\n            logger.warning(f\"Skipping unknown permission '{p}' for user {user.id}\")\n    if Permission.FULL_ADMIN_PANEL_ACCESS in granted:\n        return set(Permission)\n    expanded = resolve_effective_permissions({p.value for p in granted})\n    return {Permission(p) for p in expanded}\n\n\ndef require_permission(\n    required: Permission,\n) -> Callable[..., Coroutine[Any, Any, User]]:\n    \"\"\"FastAPI dependency factory for permission-based access control.\n\n    Usage:\n        @router.get(\"/endpoint\")\n        def endpoint(user: User = Depends(require_permission(Permission.MANAGE_CONNECTORS))):\n            ...\n    \"\"\"\n\n    async def dependency(user: User = Depends(current_user)) -> User:\n        effective = get_effective_permissions(user)\n\n        if Permission.FULL_ADMIN_PANEL_ACCESS in effective:\n            return user\n\n        if required not in effective:\n            raise OnyxError(\n                OnyxErrorCode.INSUFFICIENT_PERMISSIONS,\n                \"You do not have the required permissions for this action.\",\n            )\n\n        return user\n\n    return dependency\n"
  },
  {
    "path": "backend/onyx/auth/schemas.py",
    "content": "import uuid\nfrom enum import Enum\nfrom typing import Any\n\nfrom fastapi_users import schemas\nfrom typing_extensions import override\n\nfrom onyx.db.enums import AccountType\n\n\nclass UserRole(str, Enum):\n    \"\"\"\n    User roles\n    - Basic can't perform any admin actions\n    - Admin can perform all admin actions\n    - Curator can perform admin actions for\n        groups they are curators of\n    - Global Curator can perform admin actions\n        for all groups they are a member of\n    - Limited can access a limited set of basic api endpoints\n    - Slack are users that have used onyx via slack but dont have a web login\n    - External permissioned users that have been picked up during the external permissions sync process but don't have a web login\n    \"\"\"\n\n    LIMITED = \"limited\"\n    BASIC = \"basic\"\n    ADMIN = \"admin\"\n    CURATOR = \"curator\"\n    GLOBAL_CURATOR = \"global_curator\"\n    SLACK_USER = \"slack_user\"\n    EXT_PERM_USER = \"ext_perm_user\"\n\n    def is_web_login(self) -> bool:\n        return self not in [\n            UserRole.SLACK_USER,\n            UserRole.EXT_PERM_USER,\n        ]\n\n\nclass UserRead(schemas.BaseUser[uuid.UUID]):\n    role: UserRole\n\n\nclass UserCreate(schemas.BaseUserCreate):\n    role: UserRole = UserRole.BASIC\n    account_type: AccountType = AccountType.STANDARD\n    tenant_id: str | None = None\n    # Captcha token for cloud signup protection (optional, only used when captcha is enabled)\n    # Excluded from create_update_dict so it never reaches the DB layer\n    captcha_token: str | None = None\n\n    @override\n    def create_update_dict(self) -> dict[str, Any]:\n        d = super().create_update_dict()\n        d.pop(\"captcha_token\", None)\n        # Force STANDARD for self-registration; only trusted paths\n        # (SCIM, API key creation) supply a different account_type directly.\n        d[\"account_type\"] = AccountType.STANDARD\n        return d\n\n    @override\n    def create_update_dict_superuser(self) -> dict[str, Any]:\n        d = super().create_update_dict_superuser()\n        d.pop(\"captcha_token\", None)\n        d.setdefault(\"account_type\", self.account_type)\n        return d\n\n\nclass UserUpdate(schemas.BaseUserUpdate):\n    \"\"\"\n    Role updates are not allowed through the user update endpoint for security reasons\n    Role changes should be handled through a separate, admin-only process\n    \"\"\"\n\n\nclass AuthBackend(str, Enum):\n    REDIS = \"redis\"\n    POSTGRES = \"postgres\"\n    JWT = \"jwt\"\n"
  },
  {
    "path": "backend/onyx/auth/users.py",
    "content": "import base64\nimport hashlib\nimport json\nimport os\nimport random\nimport secrets\nimport string\nimport uuid\nfrom collections.abc import AsyncGenerator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\nfrom typing import List\nfrom typing import Literal\nfrom typing import Optional\nfrom typing import Protocol\nfrom typing import Tuple\nfrom typing import TypeVar\nfrom urllib.parse import urlparse\n\nimport jwt\nfrom email_validator import EmailNotValidError\nfrom email_validator import EmailUndeliverableError\nfrom email_validator import validate_email\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi import status\nfrom fastapi import WebSocket\nfrom fastapi.responses import JSONResponse\nfrom fastapi.responses import RedirectResponse\nfrom fastapi.security import OAuth2PasswordRequestForm\nfrom fastapi_users import BaseUserManager\nfrom fastapi_users import exceptions\nfrom fastapi_users import FastAPIUsers\nfrom fastapi_users import models\nfrom fastapi_users import schemas\nfrom fastapi_users import UUIDIDMixin\nfrom fastapi_users.authentication import AuthenticationBackend\nfrom fastapi_users.authentication import CookieTransport\nfrom fastapi_users.authentication import JWTStrategy\nfrom fastapi_users.authentication import RedisStrategy\nfrom fastapi_users.authentication import Strategy\nfrom fastapi_users.authentication.strategy.db import AccessTokenDatabase\nfrom fastapi_users.authentication.strategy.db import DatabaseStrategy\nfrom fastapi_users.exceptions import UserAlreadyExists\nfrom fastapi_users.jwt import decode_jwt\nfrom fastapi_users.jwt import generate_jwt\nfrom fastapi_users.jwt import SecretType\nfrom fastapi_users.manager import UserManagerDependency\nfrom fastapi_users.openapi import OpenAPIResponseType\nfrom fastapi_users.router.common import ErrorCode\nfrom fastapi_users.router.common import ErrorModel\nfrom fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase\nfrom httpx_oauth.integrations.fastapi import OAuth2AuthorizeCallback\nfrom httpx_oauth.oauth2 import BaseOAuth2\nfrom httpx_oauth.oauth2 import GetAccessTokenError\nfrom httpx_oauth.oauth2 import OAuth2Token\nfrom pydantic import BaseModel\nfrom sqlalchemy import nulls_last\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.api_key import get_hashed_api_key_from_request\nfrom onyx.auth.disposable_email_validator import is_disposable_email\nfrom onyx.auth.email_utils import send_forgot_password_email\nfrom onyx.auth.email_utils import send_user_verification_email\nfrom onyx.auth.invited_users import get_invited_users\nfrom onyx.auth.invited_users import remove_user_from_invited_users\nfrom onyx.auth.jwt import verify_jwt_token\nfrom onyx.auth.pat import get_hashed_pat_from_request\nfrom onyx.auth.schemas import AuthBackend\nfrom onyx.auth.schemas import UserCreate\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.app_configs import AUTH_BACKEND\nfrom onyx.configs.app_configs import AUTH_COOKIE_EXPIRE_TIME_SECONDS\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import EMAIL_CONFIGURED\nfrom onyx.configs.app_configs import JWT_PUBLIC_KEY_URL\nfrom onyx.configs.app_configs import PASSWORD_MAX_LENGTH\nfrom onyx.configs.app_configs import PASSWORD_MIN_LENGTH\nfrom onyx.configs.app_configs import PASSWORD_REQUIRE_DIGIT\nfrom onyx.configs.app_configs import PASSWORD_REQUIRE_LOWERCASE\nfrom onyx.configs.app_configs import PASSWORD_REQUIRE_SPECIAL_CHAR\nfrom onyx.configs.app_configs import PASSWORD_REQUIRE_UPPERCASE\nfrom onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX\nfrom onyx.configs.app_configs import REQUIRE_EMAIL_VERIFICATION\nfrom onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS\nfrom onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY\nfrom onyx.configs.app_configs import USER_AUTH_SECRET\nfrom onyx.configs.app_configs import VALID_EMAIL_DOMAINS\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.configs.constants import ANONYMOUS_USER_UUID\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN\nfrom onyx.configs.constants import DANSWER_API_KEY_PREFIX\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import PASSWORD_SPECIAL_CHARS\nfrom onyx.configs.constants import UNNAMED_KEY_PLACEHOLDER\nfrom onyx.db.api_key import fetch_user_for_api_key\nfrom onyx.db.auth import get_access_token_db\nfrom onyx.db.auth import get_default_admin_user_emails\nfrom onyx.db.auth import get_user_count\nfrom onyx.db.auth import get_user_db\nfrom onyx.db.auth import SQLAlchemyUserAdminDB\nfrom onyx.db.engine.async_sql_engine import get_async_session\nfrom onyx.db.engine.async_sql_engine import get_async_session_context_manager\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import AccessToken\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.pat import fetch_user_for_pat\nfrom onyx.db.users import assign_user_to_default_groups__no_commit\nfrom onyx.db.users import get_user_by_email\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import log_onyx_error\nfrom onyx.error_handling.exceptions import onyx_error_to_json_response\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.redis.redis_pool import get_async_redis_connection\nfrom onyx.redis.redis_pool import retrieve_ws_token_data\nfrom onyx.server.settings.store import load_settings\nfrom onyx.server.utils import BasicAuthenticationError\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_alias\nfrom onyx.utils.telemetry import mt_cloud_get_anon_id\nfrom onyx.utils.telemetry import mt_cloud_identify\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom onyx.utils.timing import log_function_time\nfrom onyx.utils.url import add_url_params\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.configs import async_return_default_schema\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nREGISTER_INVITE_ONLY_CODE = \"REGISTER_INVITE_ONLY\"\n\n\ndef is_user_admin(user: User) -> bool:\n    return user.role == UserRole.ADMIN\n\n\ndef verify_auth_setting() -> None:\n    \"\"\"Log warnings for AUTH_TYPE issues.\n\n    This only runs on app startup not during migrations/scripts.\n    \"\"\"\n    raw_auth_type = (os.environ.get(\"AUTH_TYPE\") or \"\").lower()\n\n    if raw_auth_type == \"cloud\":\n        raise ValueError(\n            \"'cloud' is not a valid auth type for self-hosted deployments.\"\n        )\n    if raw_auth_type == \"disabled\":\n        logger.warning(\n            \"AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration.\"\n        )\n\n    logger.notice(f\"Using Auth Type: {AUTH_TYPE.value}\")\n\n\ndef get_display_email(email: str | None, space_less: bool = False) -> str:\n    if email and email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN):\n        name = email.split(\"@\")[0]\n        if name == DANSWER_API_KEY_PREFIX + UNNAMED_KEY_PLACEHOLDER:\n            return \"Unnamed API Key\"\n\n        if space_less:\n            return name\n\n        return name.replace(\"API_KEY__\", \"API Key: \")\n\n    return email or \"\"\n\n\ndef generate_password() -> str:\n    lowercase_letters = string.ascii_lowercase\n    uppercase_letters = string.ascii_uppercase\n    digits = string.digits\n    special_characters = string.punctuation\n\n    # Ensure at least one of each required character type\n    password = [\n        secrets.choice(uppercase_letters),\n        secrets.choice(digits),\n        secrets.choice(special_characters),\n    ]\n\n    # Fill the rest with a mix of characters\n    remaining_length = 12 - len(password)\n    all_characters = lowercase_letters + uppercase_letters + digits + special_characters\n    password.extend(secrets.choice(all_characters) for _ in range(remaining_length))\n\n    # Shuffle the password to randomize the position of the required characters\n    random.shuffle(password)\n\n    return \"\".join(password)\n\n\ndef user_needs_to_be_verified() -> bool:\n    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:\n        return REQUIRE_EMAIL_VERIFICATION\n\n    # For other auth types, if the user is authenticated it's assumed that\n    # the user is already verified via the external IDP\n    return False\n\n\ndef anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:\n    from onyx.cache.factory import get_cache_backend\n\n    cache = get_cache_backend(tenant_id=tenant_id)\n    value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)\n\n    if value is None:\n        return False\n\n    return int(value.decode(\"utf-8\")) == 1\n\n\ndef workspace_invite_only_enabled() -> bool:\n    settings = load_settings()\n    return settings.invite_only_enabled\n\n\ndef verify_email_is_invited(email: str) -> None:\n    if AUTH_TYPE in {AuthType.SAML, AuthType.OIDC}:\n        # SSO providers manage membership; allow JIT provisioning regardless of invites\n        return\n\n    if not workspace_invite_only_enabled():\n        return\n\n    whitelist = get_invited_users()\n\n    if not email:\n        raise OnyxError(OnyxErrorCode.INVALID_INPUT, \"Email must be specified\")\n\n    try:\n        email_info = validate_email(email, check_deliverability=False)\n    except EmailUndeliverableError:\n        raise OnyxError(OnyxErrorCode.INVALID_INPUT, \"Email is not valid\")\n\n    for email_whitelist in whitelist:\n        try:\n            # normalized emails are now being inserted into the db\n            # we can remove this normalization on read after some time has passed\n            email_info_whitelist = validate_email(\n                email_whitelist, check_deliverability=False\n            )\n        except EmailNotValidError:\n            continue\n\n        # oddly, normalization does not include lowercasing the user part of the\n        # email address ... which we want to allow\n        if email_info.normalized.lower() == email_info_whitelist.normalized.lower():\n            return\n\n    raise OnyxError(\n        OnyxErrorCode.UNAUTHORIZED,\n        \"This workspace is invite-only. Please ask your admin to invite you.\",\n    )\n\n\ndef verify_email_in_whitelist(email: str, tenant_id: str) -> None:\n    with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n        if not get_user_by_email(email, db_session):\n            verify_email_is_invited(email)\n\n\ndef verify_email_domain(email: str, *, is_registration: bool = False) -> None:\n    if email.count(\"@\") != 1:\n        raise OnyxError(OnyxErrorCode.INVALID_INPUT, \"Email is not valid\")\n\n    local_part, domain = email.split(\"@\")\n    domain = domain.lower()\n    local_part = local_part.lower()\n\n    if AUTH_TYPE == AuthType.CLOUD:\n        # Normalize googlemail.com to gmail.com (they deliver to the same inbox)\n        if domain == \"googlemail.com\":\n            raise OnyxError(\n                OnyxErrorCode.INVALID_INPUT,\n                \"Please use @gmail.com instead of @googlemail.com.\",\n            )\n\n        # Only block dotted Gmail on new signups — existing users must still be\n        # able to sign in with the address they originally registered with.\n        if is_registration and domain == \"gmail.com\" and \".\" in local_part:\n            raise OnyxError(\n                OnyxErrorCode.INVALID_INPUT,\n                \"Gmail addresses with '.' are not allowed. Please use your base email address.\",\n            )\n\n        if \"+\" in local_part and domain != \"onyx.app\":\n            raise OnyxError(\n                OnyxErrorCode.INVALID_INPUT,\n                \"Email addresses with '+' are not allowed. Please use your base email address.\",\n            )\n\n    # Check if email uses a disposable/temporary domain\n    if is_disposable_email(email):\n        raise OnyxError(\n            OnyxErrorCode.INVALID_INPUT,\n            \"Disposable email addresses are not allowed. Please use a permanent email address.\",\n        )\n\n    # Check domain whitelist if configured\n    if VALID_EMAIL_DOMAINS:\n        if domain not in VALID_EMAIL_DOMAINS:\n            raise OnyxError(OnyxErrorCode.INVALID_INPUT, \"Email domain is not valid\")\n\n\ndef enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:\n    \"\"\"Raise HTTPException(402) if adding users would exceed the seat limit.\n\n    No-op for multi-tenant or CE deployments.\n    \"\"\"\n    if MULTI_TENANT:\n        return\n\n    result = fetch_ee_implementation_or_noop(\n        \"onyx.db.license\", \"check_seat_availability\", None\n    )(db_session, seats_needed=seats_needed)\n\n    if result is not None and not result.available:\n        raise OnyxError(OnyxErrorCode.SEAT_LIMIT_EXCEEDED, result.error_message)\n\n\nclass UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):\n    reset_password_token_secret = USER_AUTH_SECRET\n    verification_token_secret = USER_AUTH_SECRET\n    verification_token_lifetime_seconds = AUTH_COOKIE_EXPIRE_TIME_SECONDS\n    user_db: SQLAlchemyUserDatabase[User, uuid.UUID]\n\n    async def get_by_email(self, user_email: str) -> User:\n        tenant_id = fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.user_mapping\", \"get_tenant_id_for_email\", None\n        )(user_email)\n        async with get_async_session_context_manager(tenant_id) as db_session:\n            if MULTI_TENANT:\n                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](\n                    db_session, User, OAuthAccount\n                )\n                user = await tenant_user_db.get_by_email(user_email)\n            else:\n                user = await self.user_db.get_by_email(user_email)\n\n        if not user:\n            raise exceptions.UserNotExists()\n\n        return user\n\n    async def create(\n        self,\n        user_create: schemas.UC | UserCreate,\n        safe: bool = False,\n        request: Optional[Request] = None,\n    ) -> User:\n        # Verify captcha if enabled (for cloud signup protection)\n        from onyx.auth.captcha import CaptchaVerificationError\n        from onyx.auth.captcha import is_captcha_enabled\n        from onyx.auth.captcha import verify_captcha_token\n\n        if is_captcha_enabled() and request is not None:\n            # Get captcha token from request body or headers\n            captcha_token = None\n            if hasattr(user_create, \"captcha_token\"):\n                captcha_token = getattr(user_create, \"captcha_token\", None)\n\n            # Also check headers as a fallback\n            if not captcha_token:\n                captcha_token = request.headers.get(\"X-Captcha-Token\")\n\n            try:\n                await verify_captcha_token(\n                    captcha_token or \"\", expected_action=\"signup\"\n                )\n            except CaptchaVerificationError as e:\n                raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))\n\n        # We verify the password here to make sure it's valid before we proceed\n        await self.validate_password(\n            user_create.password, cast(schemas.UC, user_create)\n        )\n\n        # Check for disposable emails BEFORE provisioning tenant\n        # This prevents creating tenants for throwaway email addresses\n        try:\n            verify_email_domain(user_create.email, is_registration=True)\n        except OnyxError as e:\n            # Log blocked disposable email attempts\n            if \"Disposable email\" in e.detail:\n                domain = (\n                    user_create.email.split(\"@\")[-1]\n                    if \"@\" in user_create.email\n                    else \"unknown\"\n                )\n                logger.warning(\n                    f\"Blocked disposable email registration attempt: {domain}\",\n                    extra={\"email_domain\": domain},\n                )\n            raise\n\n        user_count: int | None = None\n        referral_source = (\n            request.cookies.get(\"referral_source\", None)\n            if request is not None\n            else None\n        )\n\n        tenant_id = await fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.provisioning\",\n            \"get_or_provision_tenant\",\n            async_return_default_schema,\n        )(\n            email=user_create.email,\n            referral_source=referral_source,\n            request=request,\n        )\n        user: User\n\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n        try:\n            async with get_async_session_context_manager(tenant_id) as db_session:\n                # Check invite list based on deployment mode\n                if MULTI_TENANT:\n                    # Multi-tenant: Only require invite for existing tenants\n                    # New tenant creation (first user) doesn't require an invite\n                    user_count = await get_user_count()\n                    if user_count > 0:\n                        # Tenant already has users - require invite for new users\n                        verify_email_is_invited(user_create.email)\n                else:\n                    # Single-tenant: Check invite list (skips if SAML/OIDC or no list configured)\n                    verify_email_is_invited(user_create.email)\n                if MULTI_TENANT:\n                    tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](\n                        db_session, User, OAuthAccount\n                    )\n                    self.user_db = tenant_user_db\n\n                if hasattr(user_create, \"role\"):\n                    user_create.role = UserRole.BASIC\n\n                    user_count = await get_user_count()\n                    if (\n                        user_count == 0\n                        or user_create.email in get_default_admin_user_emails()\n                    ):\n                        user_create.role = UserRole.ADMIN\n\n                # Check seat availability for new users (single-tenant only)\n                with get_session_with_current_tenant() as sync_db:\n                    existing = get_user_by_email(user_create.email, sync_db)\n                    if existing is None:\n                        enforce_seat_limit(sync_db)\n\n                user_created = False\n                try:\n                    user = await super().create(user_create, safe=safe, request=request)\n                    user_created = True\n                except IntegrityError as error:\n                    # Race condition: another request created the same user after the\n                    # pre-insert existence check but before our commit.\n                    await self.user_db.session.rollback()\n                    logger.warning(\n                        \"IntegrityError while creating user %s, assuming duplicate: %s\",\n                        user_create.email,\n                        str(error),\n                    )\n                    try:\n                        user = await self.get_by_email(user_create.email)\n                    except exceptions.UserNotExists:\n                        # Unexpected integrity error, surface it for handling upstream.\n                        raise error\n\n                    if MULTI_TENANT:\n                        user_by_session = await db_session.get(User, user.id)\n                        if user_by_session:\n                            user = user_by_session\n\n                    if (\n                        user.account_type.is_web_login()\n                        or not isinstance(user_create, UserCreate)\n                        or not user_create.account_type.is_web_login()\n                    ):\n                        raise exceptions.UserAlreadyExists()\n\n                    # Cache id before expire — accessing attrs on an expired\n                    # object triggers a sync lazy-load which raises MissingGreenlet\n                    # in this async context.\n                    user_id = user.id\n                    self._upgrade_user_to_standard__sync(user_id, user_create)\n                    # Expire so the async session re-fetches the row updated by\n                    # the sync session above.\n                    self.user_db.session.expire(user)\n                    user = await self.user_db.get(user_id)  # type: ignore[assignment]\n                except exceptions.UserAlreadyExists:\n                    user = await self.get_by_email(user_create.email)\n\n                    # we must use the existing user in the session if it matches\n                    # the user we just got by email. Note that this only applies\n                    # to multi-tenant, due to the overwriting of the user_db\n                    if MULTI_TENANT:\n                        user_by_session = await db_session.get(User, user.id)\n                        if user_by_session:\n                            user = user_by_session\n\n                    # Handle case where user has used product outside of web and is now creating an account through web\n                    if (\n                        user.account_type.is_web_login()\n                        or not isinstance(user_create, UserCreate)\n                        or not user_create.account_type.is_web_login()\n                    ):\n                        raise exceptions.UserAlreadyExists()\n\n                    # Cache id before expire — accessing attrs on an expired\n                    # object triggers a sync lazy-load which raises MissingGreenlet\n                    # in this async context.\n                    user_id = user.id\n                    self._upgrade_user_to_standard__sync(user_id, user_create)\n                    # Expire so the async session re-fetches the row updated by\n                    # the sync session above.\n                    self.user_db.session.expire(user)\n                    user = await self.user_db.get(user_id)  # type: ignore[assignment]\n                if user_created:\n                    await self._assign_default_pinned_assistants(user, db_session)\n                remove_user_from_invited_users(user_create.email)\n        finally:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n        return user\n\n    async def _assign_default_pinned_assistants(\n        self, user: User, db_session: AsyncSession\n    ) -> None:\n        if user.pinned_assistants is not None:\n            return\n\n        result = await db_session.execute(\n            select(Persona.id)\n            .where(\n                Persona.is_featured.is_(True),\n                Persona.is_public.is_(True),\n                Persona.is_listed.is_(True),\n                Persona.deleted.is_(False),\n            )\n            .order_by(\n                nulls_last(Persona.display_priority.asc()),\n                Persona.id.asc(),\n            )\n        )\n        default_persona_ids = list(result.scalars().all())\n        if not default_persona_ids:\n            return\n\n        await self.user_db.update(\n            user,\n            {\"pinned_assistants\": default_persona_ids},\n        )\n        user.pinned_assistants = default_persona_ids\n\n    def _upgrade_user_to_standard__sync(\n        self,\n        user_id: uuid.UUID,\n        user_create: UserCreate,\n    ) -> None:\n        \"\"\"Upgrade a non-web user to STANDARD and assign default groups atomically.\n\n        All writes happen in a single sync transaction so neither the field\n        update nor the group assignment is visible without the other.\n        \"\"\"\n        with get_session_with_current_tenant() as sync_db:\n            sync_user = sync_db.query(User).filter(User.id == user_id).first()  # type: ignore[arg-type]\n            if sync_user:\n                sync_user.hashed_password = self.password_helper.hash(\n                    user_create.password\n                )\n                sync_user.is_verified = user_create.is_verified or False\n                sync_user.role = user_create.role\n                sync_user.account_type = AccountType.STANDARD\n                assign_user_to_default_groups__no_commit(\n                    sync_db,\n                    sync_user,\n                    is_admin=(user_create.role == UserRole.ADMIN),\n                )\n                sync_db.commit()\n            else:\n                logger.warning(\n                    \"User %s not found in sync session during upgrade to standard; \"\n                    \"skipping upgrade\",\n                    user_id,\n                )\n\n    async def validate_password(self, password: str, _: schemas.UC | models.UP) -> None:\n        # Validate password according to configurable security policy (defined via environment variables)\n        if len(password) < PASSWORD_MIN_LENGTH:\n            raise exceptions.InvalidPasswordException(\n                reason=f\"Password must be at least {PASSWORD_MIN_LENGTH} characters long.\"\n            )\n        if len(password) > PASSWORD_MAX_LENGTH:\n            raise exceptions.InvalidPasswordException(\n                reason=f\"Password must not exceed {PASSWORD_MAX_LENGTH} characters.\"\n            )\n        if PASSWORD_REQUIRE_UPPERCASE and not any(char.isupper() for char in password):\n            raise exceptions.InvalidPasswordException(\n                reason=\"Password must contain at least one uppercase letter.\"\n            )\n        if PASSWORD_REQUIRE_LOWERCASE and not any(char.islower() for char in password):\n            raise exceptions.InvalidPasswordException(\n                reason=\"Password must contain at least one lowercase letter.\"\n            )\n        if PASSWORD_REQUIRE_DIGIT and not any(char.isdigit() for char in password):\n            raise exceptions.InvalidPasswordException(\n                reason=\"Password must contain at least one number.\"\n            )\n        if PASSWORD_REQUIRE_SPECIAL_CHAR and not any(\n            char in PASSWORD_SPECIAL_CHARS for char in password\n        ):\n            raise exceptions.InvalidPasswordException(\n                reason=f\"Password must contain at least one special character from the following set: {PASSWORD_SPECIAL_CHARS}.\"\n            )\n        return\n\n    @log_function_time(print_only=True)\n    async def oauth_callback(\n        self,\n        oauth_name: str,\n        access_token: str,\n        account_id: str,\n        account_email: str,\n        expires_at: Optional[int] = None,\n        refresh_token: Optional[str] = None,\n        request: Optional[Request] = None,\n        *,\n        associate_by_email: bool = False,\n        is_verified_by_default: bool = False,\n    ) -> User:\n        referral_source = (\n            getattr(request.state, \"referral_source\", None) if request else None\n        )\n\n        tenant_id = await fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.provisioning\",\n            \"get_or_provision_tenant\",\n            async_return_default_schema,\n        )(\n            email=account_email,\n            referral_source=referral_source,\n            request=request,\n        )\n\n        if not tenant_id:\n            raise HTTPException(status_code=401, detail=\"User not found\")\n\n        # Proceed with the tenant context\n        token = None\n        async with get_async_session_context_manager(tenant_id) as db_session:\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n            verify_email_in_whitelist(account_email, tenant_id)\n            verify_email_domain(account_email)\n\n            # NOTE(rkuo): If this UserManager is instantiated per connection\n            # should we even be doing this here?\n            if MULTI_TENANT:\n                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](\n                    db_session, User, OAuthAccount\n                )\n                self.user_db = tenant_user_db\n\n            oauth_account_dict = {\n                \"oauth_name\": oauth_name,\n                \"access_token\": access_token,\n                \"account_id\": account_id,\n                \"account_email\": account_email,\n                \"expires_at\": expires_at,\n                \"refresh_token\": refresh_token,\n            }\n\n            user: User | None = None\n\n            try:\n                # Attempt to get user by OAuth account\n                user = await self.get_by_oauth_account(oauth_name, account_id)\n\n            except exceptions.UserNotExists:\n                try:\n                    # Attempt to get user by email\n                    user = await self.user_db.get_by_email(account_email)\n                    if not associate_by_email:\n                        raise exceptions.UserAlreadyExists()\n\n                    # Make sure user is not None before adding OAuth account\n                    if user is not None:\n                        user = await self.user_db.add_oauth_account(\n                            user, oauth_account_dict\n                        )\n                    else:\n                        # This shouldn't happen since get_by_email would raise UserNotExists\n                        # but adding as a safeguard\n                        raise exceptions.UserNotExists()\n\n                except exceptions.UserNotExists:\n                    verify_email_domain(account_email, is_registration=True)\n\n                    # Check seat availability before creating (single-tenant only)\n                    with get_session_with_current_tenant() as sync_db:\n                        enforce_seat_limit(sync_db)\n\n                    password = self.password_helper.generate()\n                    user_dict = {\n                        \"email\": account_email,\n                        \"hashed_password\": self.password_helper.hash(password),\n                        \"is_verified\": is_verified_by_default,\n                        \"account_type\": AccountType.STANDARD,\n                    }\n\n                    user = await self.user_db.create(user_dict)\n                    await self.user_db.add_oauth_account(user, oauth_account_dict)\n                    await self._assign_default_pinned_assistants(user, db_session)\n                    await self.on_after_register(user, request)\n\n            else:\n                # User exists, update OAuth account if needed\n                if user is not None:  # Add explicit check\n                    for existing_oauth_account in user.oauth_accounts:\n                        if (\n                            existing_oauth_account.account_id == account_id\n                            and existing_oauth_account.oauth_name == oauth_name\n                        ):\n                            user = await self.user_db.update_oauth_account(\n                                user,\n                                # NOTE: OAuthAccount DOES implement the OAuthAccountProtocol\n                                # but the type checker doesn't know that :(\n                                existing_oauth_account,  # type: ignore\n                                oauth_account_dict,\n                            )\n\n            # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to\n            # re-authenticate that frequently, so by default this is disabled\n            if expires_at and TRACK_EXTERNAL_IDP_EXPIRY:\n                oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)\n                await self.user_db.update(\n                    user, update_dict={\"oidc_expiry\": oidc_expiry}\n                )\n\n            # Handle case where user has used product outside of web and is now creating an account through web\n            if not user.account_type.is_web_login():\n                # We must use the existing user in the session if it matches\n                # the user we just got by email/oauth. Note that this only applies\n                # to multi-tenant, due to the overwriting of the user_db\n                if MULTI_TENANT:\n                    if user.id:\n                        user_by_session = await db_session.get(User, user.id)\n                        if user_by_session:\n                            user = user_by_session\n\n                # If the user is inactive, check seat availability before\n                # upgrading role — otherwise they'd become an inactive BASIC\n                # user who still can't log in.\n                if not user.is_active:\n                    with get_session_with_current_tenant() as sync_db:\n                        enforce_seat_limit(sync_db)\n\n                # Upgrade the user and assign default groups in a single\n                # transaction so neither change is visible without the other.\n                was_inactive = not user.is_active\n                with get_session_with_current_tenant() as sync_db:\n                    sync_user = sync_db.query(User).filter(User.id == user.id).first()  # type: ignore[arg-type]\n                    if sync_user:\n                        sync_user.is_verified = is_verified_by_default\n                        sync_user.role = UserRole.BASIC\n                        sync_user.account_type = AccountType.STANDARD\n                        if was_inactive:\n                            sync_user.is_active = True\n                        assign_user_to_default_groups__no_commit(sync_db, sync_user)\n                        sync_db.commit()\n\n                # Refresh the async user object so downstream code\n                # (e.g. oidc_expiry check) sees the updated fields.\n                self.user_db.session.expire(user)\n                user = await self.user_db.get(user.id)\n                assert user is not None\n\n            # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false`\n            # otherwise, the oidc expiry will always be old, and the user will never be able to login\n            if user.oidc_expiry is not None and not TRACK_EXTERNAL_IDP_EXPIRY:\n                await self.user_db.update(user, {\"oidc_expiry\": None})\n                user.oidc_expiry = None  # type: ignore\n            remove_user_from_invited_users(user.email)\n            if token:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n            return user\n\n    async def on_after_login(\n        self,\n        user: User,\n        request: Optional[Request] = None,\n        response: Optional[Response] = None,\n    ) -> None:\n        try:\n            if response and request and ANONYMOUS_USER_COOKIE_NAME in request.cookies:\n                response.delete_cookie(\n                    ANONYMOUS_USER_COOKIE_NAME,\n                    # Ensure cookie deletion doesn't override other cookies by setting the same path/domain\n                    path=\"/\",\n                    domain=None,\n                    secure=WEB_DOMAIN.startswith(\"https\"),\n                )\n                logger.debug(f\"Deleted anonymous user cookie for user {user.email}\")\n        except Exception:\n            logger.exception(\"Error deleting anonymous user cookie\")\n\n        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()\n\n        # Link the anonymous PostHog session to the identified user so that\n        # pre-login session recordings and events merge into one person profile.\n        if anon_id := mt_cloud_get_anon_id(request):\n            mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)\n\n        mt_cloud_identify(\n            distinct_id=str(user.id),\n            properties={\"email\": user.email, \"tenant_id\": tenant_id},\n        )\n\n    async def on_after_register(\n        self, user: User, request: Optional[Request] = None\n    ) -> None:\n        tenant_id = await fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.provisioning\",\n            \"get_or_provision_tenant\",\n            async_return_default_schema,\n        )(\n            email=user.email,\n            request=request,\n        )\n\n        user_count = None\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n        try:\n            user_count = await get_user_count()\n            logger.debug(f\"Current tenant user count: {user_count}\")\n\n            # Link the anonymous PostHog session to the identified user so\n            # that pre-signup session recordings merge into one person profile.\n            if anon_id := mt_cloud_get_anon_id(request):\n                mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)\n\n            # Ensure a PostHog person profile exists for this user.\n            mt_cloud_identify(\n                distinct_id=str(user.id),\n                properties={\"email\": user.email, \"tenant_id\": tenant_id},\n            )\n\n            mt_cloud_telemetry(\n                tenant_id=tenant_id,\n                distinct_id=str(user.id),\n                event=MilestoneRecordType.USER_SIGNED_UP,\n            )\n\n            if user_count == 1:\n                mt_cloud_telemetry(\n                    tenant_id=tenant_id,\n                    distinct_id=str(user.id),\n                    event=MilestoneRecordType.TENANT_CREATED,\n                )\n\n            # Assign user to the appropriate default group (Admin or Basic).\n            # Must happen inside the try block while tenant context is active,\n            # otherwise get_session_with_current_tenant() targets the wrong schema.\n            is_admin = user_count == 1 or user.email in get_default_admin_user_emails()\n            with get_session_with_current_tenant() as db_session:\n                assign_user_to_default_groups__no_commit(\n                    db_session, user, is_admin=is_admin\n                )\n                db_session.commit()\n\n        finally:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n        # Fetch EE PostHog functions if available\n        get_marketing_posthog_cookie_name = fetch_ee_implementation_or_noop(\n            module=\"onyx.utils.posthog_client\",\n            attribute=\"get_marketing_posthog_cookie_name\",\n            noop_return_value=None,\n        )\n        parse_posthog_cookie = fetch_ee_implementation_or_noop(\n            module=\"onyx.utils.posthog_client\",\n            attribute=\"parse_posthog_cookie\",\n            noop_return_value=None,\n        )\n        capture_and_sync_with_alternate_posthog = fetch_ee_implementation_or_noop(\n            module=\"onyx.utils.posthog_client\",\n            attribute=\"capture_and_sync_with_alternate_posthog\",\n            noop_return_value=None,\n        )\n\n        if (\n            request\n            and user_count is not None\n            and (marketing_cookie_name := get_marketing_posthog_cookie_name())\n            and (marketing_cookie_value := request.cookies.get(marketing_cookie_name))\n            and (parsed_cookie := parse_posthog_cookie(marketing_cookie_value))\n        ):\n            marketing_anonymous_id = parsed_cookie[\"distinct_id\"]\n\n            # Technically, USER_SIGNED_UP is only fired from the cloud site when\n            # it is the first user in a tenant. However, it is semantically correct\n            # for the marketing site and should probably be refactored for the cloud site\n            # to also be semantically correct.\n            properties = {\n                \"email\": user.email,\n                \"onyx_cloud_user_id\": str(user.id),\n                \"tenant_id\": str(tenant_id) if tenant_id else None,\n                \"role\": user.role.value,\n                \"is_first_user\": user_count == 1,\n                \"source\": \"marketing_site_signup\",\n                \"conversion_timestamp\": datetime.now(timezone.utc).isoformat(),\n            }\n\n            # Add all other values from the marketing cookie (featureFlags, etc.)\n            for key, value in parsed_cookie.items():\n                if key != \"distinct_id\":\n                    properties.setdefault(key, value)\n\n            capture_and_sync_with_alternate_posthog(\n                alternate_distinct_id=marketing_anonymous_id,\n                event=MilestoneRecordType.USER_SIGNED_UP,\n                properties=properties,\n            )\n\n        logger.debug(f\"User {user.id} has registered.\")\n        optional_telemetry(\n            record_type=RecordType.SIGN_UP,\n            data={\"action\": \"create\"},\n            user_id=str(user.id),\n        )\n\n    async def on_after_forgot_password(\n        self,\n        user: User,\n        token: str,\n        request: Optional[Request] = None,  # noqa: ARG002\n    ) -> None:\n        if not EMAIL_CONFIGURED:\n            logger.error(\n                \"Email is not configured. Please configure email in the admin panel\"\n            )\n            raise HTTPException(\n                status.HTTP_500_INTERNAL_SERVER_ERROR,\n                \"Your admin has not enabled this feature.\",\n            )\n        tenant_id = await fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.provisioning\",\n            \"get_or_provision_tenant\",\n            async_return_default_schema,\n        )(email=user.email)\n\n        send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)\n\n    async def on_after_request_verify(\n        self,\n        user: User,\n        token: str,\n        request: Optional[Request] = None,  # noqa: ARG002\n    ) -> None:\n        verify_email_domain(user.email)\n\n        logger.notice(\n            f\"Verification requested for user {user.id}. Verification token: {token}\"\n        )\n        user_count = await get_user_count()\n        send_user_verification_email(\n            user.email, token, new_organization=user_count == 1\n        )\n\n    @log_function_time(print_only=True)\n    async def authenticate(\n        self, credentials: OAuth2PasswordRequestForm\n    ) -> Optional[User]:\n        email = credentials.username\n\n        tenant_id: str | None = None\n        try:\n            tenant_id = fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.provisioning\",\n                \"get_tenant_id_for_email\",\n                POSTGRES_DEFAULT_SCHEMA,\n            )(\n                email=email,\n            )\n        except Exception as e:\n            logger.warning(\n                f\"User attempted to login with invalid credentials: {str(e)}\"\n            )\n\n        if not tenant_id:\n            # User not found in mapping\n            self.password_helper.hash(credentials.password)\n            return None\n\n        # Create a tenant-specific session\n        async with get_async_session_context_manager(tenant_id) as tenant_session:\n            tenant_user_db: SQLAlchemyUserDatabase = SQLAlchemyUserDatabase(\n                tenant_session, User\n            )\n            self.user_db = tenant_user_db\n\n            # Proceed with authentication\n            try:\n                user = await self.get_by_email(email)\n\n            except exceptions.UserNotExists:\n                self.password_helper.hash(credentials.password)\n                return None\n\n            if not user.account_type.is_web_login():\n                raise BasicAuthenticationError(\n                    detail=\"NO_WEB_LOGIN_AND_HAS_NO_PASSWORD\",\n                )\n\n            verified, updated_password_hash = self.password_helper.verify_and_update(\n                credentials.password, user.hashed_password\n            )\n            if not verified:\n                return None\n\n            if updated_password_hash is not None:\n                await self.user_db.update(\n                    user, {\"hashed_password\": updated_password_hash}\n                )\n\n            return user\n\n    async def reset_password_as_admin(self, user_id: uuid.UUID) -> str:\n        \"\"\"Admin-only. Generate a random password for a user and return it.\"\"\"\n        user = await self.get(user_id)\n        new_password = generate_password()\n        await self._update(user, {\"password\": new_password})\n        return new_password\n\n    async def change_password_if_old_matches(\n        self, user: User, old_password: str, new_password: str\n    ) -> None:\n        \"\"\"\n        For normal users to change password if they know the old one.\n        Raises 400 if old password doesn't match.\n        \"\"\"\n        verified, updated_password_hash = self.password_helper.verify_and_update(\n            old_password, user.hashed_password\n        )\n        if not verified:\n            # Raise some HTTPException (or your custom exception) if old password is invalid:\n            from fastapi import HTTPException, status\n\n            raise HTTPException(\n                status_code=status.HTTP_400_BAD_REQUEST,\n                detail=\"Invalid current password\",\n            )\n\n        # If the hash was upgraded behind the scenes, we can keep it before setting the new password:\n        if updated_password_hash:\n            user.hashed_password = updated_password_hash\n\n        # Now apply and validate the new password\n        await self._update(user, {\"password\": new_password})\n\n\nasync def get_user_manager(\n    user_db: SQLAlchemyUserDatabase = Depends(get_user_db),\n) -> AsyncGenerator[UserManager, None]:\n    yield UserManager(user_db)\n\n\ncookie_transport = CookieTransport(\n    cookie_max_age=SESSION_EXPIRE_TIME_SECONDS,\n    cookie_secure=WEB_DOMAIN.startswith(\"https\"),\n    cookie_name=FASTAPI_USERS_AUTH_COOKIE_NAME,\n)\n\n\nT = TypeVar(\"T\", covariant=True)\nID = TypeVar(\"ID\", contravariant=True)\n\n\n# Protocol for strategies that support token refreshing without inheritance.\nclass RefreshableStrategy(Protocol):\n    \"\"\"Protocol for authentication strategies that support token refreshing.\"\"\"\n\n    async def refresh_token(self, token: Optional[str], user: Any) -> str:\n        \"\"\"\n        Refresh an existing token by extending its lifetime.\n        Returns either the same token with extended expiration or a new token.\n        \"\"\"\n        ...\n\n\nclass TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):\n    \"\"\"\n    A custom strategy that fetches the actual async Redis connection inside each method.\n    We do NOT pass a synchronous or \"coroutine\" redis object to the constructor.\n    \"\"\"\n\n    def __init__(\n        self,\n        lifetime_seconds: Optional[int] = SESSION_EXPIRE_TIME_SECONDS,\n        key_prefix: str = REDIS_AUTH_KEY_PREFIX,\n    ):\n        self.lifetime_seconds = lifetime_seconds\n        self.key_prefix = key_prefix\n\n    async def write_token(self, user: User) -> str:\n        redis = await get_async_redis_connection()\n\n        tenant_id = await fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.provisioning\",\n            \"get_or_provision_tenant\",\n            async_return_default_schema,\n        )(email=user.email)\n\n        token_data = {\n            \"sub\": str(user.id),\n            \"tenant_id\": tenant_id,\n        }\n        token = secrets.token_urlsafe()\n        await redis.set(\n            f\"{self.key_prefix}{token}\",\n            json.dumps(token_data),\n            ex=self.lifetime_seconds,\n        )\n        return token\n\n    async def read_token(\n        self, token: Optional[str], user_manager: BaseUserManager[User, uuid.UUID]\n    ) -> Optional[User]:\n        redis = await get_async_redis_connection()\n        token_data_str = await redis.get(f\"{self.key_prefix}{token}\")\n        if not token_data_str:\n            return None\n\n        try:\n            token_data = json.loads(token_data_str)\n            user_id = token_data[\"sub\"]\n            parsed_id = user_manager.parse_id(user_id)\n            return await user_manager.get(parsed_id)\n        except (exceptions.UserNotExists, exceptions.InvalidID, KeyError):\n            return None\n\n    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002\n        \"\"\"Properly delete the token from async redis.\"\"\"\n        redis = await get_async_redis_connection()\n        await redis.delete(f\"{self.key_prefix}{token}\")\n\n    async def refresh_token(self, token: Optional[str], user: User) -> str:\n        \"\"\"Refresh a token by extending its expiration time in Redis.\"\"\"\n        if token is None:\n            # If no token provided, create a new one\n            return await self.write_token(user)\n\n        redis = await get_async_redis_connection()\n        token_key = f\"{self.key_prefix}{token}\"\n\n        # Check if token exists\n        token_data_str = await redis.get(token_key)\n        if not token_data_str:\n            # Token not found, create new one\n            return await self.write_token(user)\n\n        # Token exists, extend its lifetime\n        token_data = json.loads(token_data_str)\n        await redis.set(\n            token_key,\n            json.dumps(token_data),\n            ex=self.lifetime_seconds,\n        )\n\n        return token\n\n\nclass RefreshableDatabaseStrategy(DatabaseStrategy[User, uuid.UUID, AccessToken]):\n    \"\"\"Database strategy with token refreshing capabilities.\"\"\"\n\n    def __init__(\n        self,\n        access_token_db: AccessTokenDatabase[AccessToken],\n        lifetime_seconds: Optional[int] = None,\n    ):\n        super().__init__(access_token_db, lifetime_seconds)\n        self._access_token_db = access_token_db\n\n    async def refresh_token(self, token: Optional[str], user: User) -> str:\n        \"\"\"Refresh a token by updating its expiration time in the database.\"\"\"\n        if token is None:\n            return await self.write_token(user)\n\n        # Find the token in database\n        access_token = await self._access_token_db.get_by_token(token)\n\n        if access_token is None:\n            # Token not found, create new one\n            return await self.write_token(user)\n\n        # Update expiration time\n        new_expires = datetime.now(timezone.utc) + timedelta(\n            seconds=float(self.lifetime_seconds or SESSION_EXPIRE_TIME_SECONDS)\n        )\n        await self._access_token_db.update(access_token, {\"expires\": new_expires})\n\n        return token\n\n\nclass SingleTenantJWTStrategy(JWTStrategy[User, uuid.UUID]):\n    \"\"\"Stateless JWT strategy for single-tenant deployments.\n\n    Tokens are self-contained and verified via signature — no Redis or DB\n    lookup required per request. An ``iat`` claim is embedded so that\n    downstream code can determine when the token was created without\n    querying an external store.\n\n    Refresh is implemented by issuing a brand-new JWT (the old one remains\n    valid until its natural expiry).  ``destroy_token`` is a no-op because\n    JWTs cannot be server-side invalidated.\n    \"\"\"\n\n    def __init__(\n        self,\n        secret: SecretType,\n        lifetime_seconds: int | None = SESSION_EXPIRE_TIME_SECONDS,\n        token_audience: list[str] | None = None,\n        algorithm: str = \"HS256\",\n        public_key: SecretType | None = None,\n    ):\n        super().__init__(\n            secret=secret,\n            lifetime_seconds=lifetime_seconds,\n            token_audience=token_audience or [\"fastapi-users:auth\"],\n            algorithm=algorithm,\n            public_key=public_key,\n        )\n\n    async def write_token(self, user: User) -> str:\n        data = {\n            \"sub\": str(user.id),\n            \"aud\": self.token_audience,\n            \"iat\": int(datetime.now(timezone.utc).timestamp()),\n        }\n        return generate_jwt(\n            data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm\n        )\n\n    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002\n        # JWTs are stateless — nothing to invalidate server-side.\n        # NOTE: a compromise that makes JWT auth stateful but revocable\n        # is to include a token_version claim in the JWT payload. The token_version\n        # is incremented whenever the user logs out (or gets login revoked). Whenever\n        # the JWT is used, it is only valid if the token_version claim is the same as the one\n        # in the db. If not, the JWT is invalid and the user needs to login again.\n        return\n\n    async def refresh_token(\n        self,\n        token: Optional[str],  # noqa: ARG002\n        user: User,  # noqa: ARG002\n    ) -> str:\n        \"\"\"Issue a fresh JWT with a new expiry.\"\"\"\n        return await self.write_token(user)\n\n\ndef get_redis_strategy() -> TenantAwareRedisStrategy:\n    return TenantAwareRedisStrategy()\n\n\ndef get_database_strategy(\n    access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db),\n) -> RefreshableDatabaseStrategy:\n    return RefreshableDatabaseStrategy(\n        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS\n    )\n\n\ndef get_jwt_strategy() -> SingleTenantJWTStrategy:\n    return SingleTenantJWTStrategy(\n        secret=USER_AUTH_SECRET,\n        lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,\n    )\n\n\nif AUTH_BACKEND == AuthBackend.JWT:\n    if MULTI_TENANT or AUTH_TYPE == AuthType.CLOUD:\n        raise ValueError(\n            \"JWT auth backend is only supported for single-tenant, self-hosted deployments. Use 'redis' or 'postgres' instead.\"\n        )\n    if not USER_AUTH_SECRET:\n        raise ValueError(\"USER_AUTH_SECRET is required for JWT auth backend.\")\n\nif AUTH_BACKEND == AuthBackend.REDIS:\n    auth_backend = AuthenticationBackend(\n        name=\"redis\", transport=cookie_transport, get_strategy=get_redis_strategy\n    )\nelif AUTH_BACKEND == AuthBackend.POSTGRES:\n    auth_backend = AuthenticationBackend(\n        name=\"postgres\", transport=cookie_transport, get_strategy=get_database_strategy\n    )\nelif AUTH_BACKEND == AuthBackend.JWT:\n    auth_backend = AuthenticationBackend(\n        name=\"jwt\", transport=cookie_transport, get_strategy=get_jwt_strategy\n    )\nelse:\n    raise ValueError(f\"Invalid auth backend: {AUTH_BACKEND}\")\n\n\nclass FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):\n    def get_logout_router(\n        self,\n        backend: AuthenticationBackend,\n        requires_verification: bool = REQUIRE_EMAIL_VERIFICATION,\n    ) -> APIRouter:\n        \"\"\"\n        Provide a router for logout only for OAuth/OIDC Flows.\n        This way the login router does not need to be included\n        \"\"\"\n        router = APIRouter()\n\n        get_current_user_token = self.authenticator.current_user_token(\n            active=True, verified=requires_verification\n        )\n\n        logout_responses: OpenAPIResponseType = {\n            **{\n                status.HTTP_401_UNAUTHORIZED: {\n                    \"description\": \"Missing token or inactive user.\"\n                }\n            },\n            **backend.transport.get_openapi_logout_responses_success(),\n        }\n\n        @router.post(\n            \"/logout\", name=f\"auth:{backend.name}.logout\", responses=logout_responses\n        )\n        async def logout(\n            user_token: Tuple[models.UP, str] = Depends(get_current_user_token),\n            strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),\n        ) -> Response:\n            user, token = user_token\n            return await backend.logout(strategy, user, token)\n\n        return router\n\n    def get_refresh_router(\n        self,\n        backend: AuthenticationBackend,\n        requires_verification: bool = REQUIRE_EMAIL_VERIFICATION,\n    ) -> APIRouter:\n        \"\"\"\n        Provide a router for session token refreshing.\n        \"\"\"\n        # Import the oauth_refresher here to avoid circular imports\n        from onyx.auth.oauth_refresher import check_and_refresh_oauth_tokens\n\n        router = APIRouter()\n\n        get_current_user_token = self.authenticator.current_user_token(\n            active=True, verified=requires_verification\n        )\n\n        refresh_responses: OpenAPIResponseType = {\n            **{\n                status.HTTP_401_UNAUTHORIZED: {\n                    \"description\": \"Missing token or inactive user.\"\n                }\n            },\n            **backend.transport.get_openapi_login_responses_success(),\n        }\n\n        @router.post(\n            \"/refresh\", name=f\"auth:{backend.name}.refresh\", responses=refresh_responses\n        )\n        async def refresh(\n            user_token: Tuple[models.UP, str] = Depends(get_current_user_token),\n            strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),\n            user_manager: BaseUserManager[models.UP, models.ID] = Depends(\n                get_user_manager\n            ),\n            db_session: AsyncSession = Depends(get_async_session),\n        ) -> Response:\n            try:\n                user, token = user_token\n                logger.info(f\"Processing token refresh request for user {user.email}\")\n\n                # Check if user has OAuth accounts that need refreshing\n                await check_and_refresh_oauth_tokens(\n                    user=cast(User, user),\n                    db_session=db_session,\n                    user_manager=cast(Any, user_manager),\n                )\n\n                # Check if strategy supports refreshing\n                supports_refresh = hasattr(strategy, \"refresh_token\") and callable(\n                    getattr(strategy, \"refresh_token\")\n                )\n\n                if supports_refresh:\n                    try:\n                        refresh_method = getattr(strategy, \"refresh_token\")\n                        new_token = await refresh_method(token, user)\n                        logger.info(\n                            f\"Successfully refreshed session token for user {user.email}\"\n                        )\n                        return await backend.transport.get_login_response(new_token)\n                    except Exception as e:\n                        logger.error(f\"Error refreshing session token: {str(e)}\")\n                        # Fallback to logout and login if refresh fails\n                        await backend.logout(strategy, user, token)\n                        return await backend.login(strategy, user)\n\n                # Fallback: logout and login again\n                logger.info(\n                    \"Strategy doesn't support refresh - using logout/login flow\"\n                )\n                await backend.logout(strategy, user, token)\n                return await backend.login(strategy, user)\n            except Exception as e:\n                logger.error(f\"Unexpected error in refresh endpoint: {str(e)}\")\n                raise HTTPException(\n                    status_code=status.HTTP_400_BAD_REQUEST,\n                    detail=f\"Token refresh failed: {str(e)}\",\n                )\n\n        return router\n\n\nfastapi_users = FastAPIUserWithLogoutRouter[User, uuid.UUID](\n    get_user_manager, [auth_backend]\n)\n\n\n# NOTE: verified=REQUIRE_EMAIL_VERIFICATION is not used here since we\n# take care of that in `double_check_user` ourself. This is needed, since\n# we want the /me endpoint to still return a user even if they are not\n# yet verified, so that the frontend knows they exist\noptional_fastapi_current_user = fastapi_users.current_user(active=True, optional=True)\n\n\n_JWT_EMAIL_CLAIM_KEYS = (\"email\", \"preferred_username\", \"upn\")\n\n\ndef _extract_email_from_jwt(payload: dict[str, Any]) -> str | None:\n    \"\"\"Return the best-effort email/username from a decoded JWT payload.\"\"\"\n    for key in _JWT_EMAIL_CLAIM_KEYS:\n        value = payload.get(key)\n        if isinstance(value, str) and value:\n            try:\n                email_info = validate_email(value, check_deliverability=False)\n            except EmailNotValidError:\n                continue\n            normalized_email = email_info.normalized or email_info.email\n            return normalized_email.lower()\n    return None\n\n\nasync def _sync_jwt_oidc_expiry(\n    user_manager: UserManager, user: User, payload: dict[str, Any]\n) -> None:\n    if TRACK_EXTERNAL_IDP_EXPIRY:\n        expires_at = payload.get(\"exp\")\n        if expires_at is None:\n            return\n        try:\n            expiry_timestamp = int(expires_at)\n        except (TypeError, ValueError):\n            logger.warning(\"Invalid exp claim on JWT for user %s\", user.email)\n            return\n\n        oidc_expiry = datetime.fromtimestamp(expiry_timestamp, tz=timezone.utc)\n        if user.oidc_expiry == oidc_expiry:\n            return\n\n        await user_manager.user_db.update(user, {\"oidc_expiry\": oidc_expiry})\n        user.oidc_expiry = oidc_expiry\n        return\n\n    if user.oidc_expiry is not None:\n        await user_manager.user_db.update(user, {\"oidc_expiry\": None})\n        user.oidc_expiry = None  # type: ignore\n\n\nasync def _get_or_create_user_from_jwt(\n    payload: dict[str, Any],\n    request: Request,\n    async_db_session: AsyncSession,\n) -> User | None:\n    email = _extract_email_from_jwt(payload)\n    if email is None:\n        logger.warning(\n            \"JWT token decoded successfully but no email claim found; skipping auth\"\n        )\n        return None\n\n    # Enforce the same allowlist/domain policies as other auth flows\n    verify_email_is_invited(email)\n    verify_email_domain(email)\n\n    user_db: SQLAlchemyUserAdminDB[User, uuid.UUID] = SQLAlchemyUserAdminDB(\n        async_db_session, User, OAuthAccount\n    )\n    user_manager = UserManager(user_db)\n\n    try:\n        user = await user_manager.get_by_email(email)\n        if not user.is_active:\n            logger.warning(\"Inactive user %s attempted JWT login; skipping\", email)\n            return None\n        if not user.account_type.is_web_login():\n            raise exceptions.UserNotExists()\n    except exceptions.UserNotExists:\n        logger.info(\"Provisioning user %s from JWT login\", email)\n        try:\n            user = await user_manager.create(\n                UserCreate(\n                    email=email,\n                    password=generate_password(),\n                    is_verified=True,\n                ),\n                request=request,\n            )\n        except exceptions.UserAlreadyExists:\n            user = await user_manager.get_by_email(email)\n            if not user.is_active:\n                logger.warning(\n                    \"Inactive user %s attempted JWT login during provisioning race; skipping\",\n                    email,\n                )\n                return None\n            if not user.account_type.is_web_login():\n                logger.warning(\n                    \"Non-web-login user %s attempted JWT login during provisioning race; skipping\",\n                    email,\n                )\n                return None\n\n    await _sync_jwt_oidc_expiry(user_manager, user, payload)\n    return user\n\n\nasync def _check_for_saml_and_jwt(\n    request: Request,\n    user: User | None,\n    async_db_session: AsyncSession,\n) -> User | None:\n    # If user is None, check for JWT in Authorization header\n    if user is None and JWT_PUBLIC_KEY_URL is not None:\n        auth_header = request.headers.get(\"Authorization\")\n        if auth_header and auth_header.startswith(\"Bearer \"):\n            token = auth_header[len(\"Bearer \") :].strip()\n            payload = await verify_jwt_token(token)\n            if payload is not None:\n                user = await _get_or_create_user_from_jwt(\n                    payload, request, async_db_session\n                )\n\n    return user\n\n\nasync def optional_user(\n    request: Request,\n    async_db_session: AsyncSession = Depends(get_async_session),\n    user: User | None = Depends(optional_fastapi_current_user),\n) -> User | None:\n\n    if user := await _check_for_saml_and_jwt(request, user, async_db_session):\n        # If user is already set, _check_for_saml_and_jwt returns the same user object\n        return user\n\n    try:\n        if hashed_pat := get_hashed_pat_from_request(request):\n            user = await fetch_user_for_pat(hashed_pat, async_db_session)\n        elif hashed_api_key := get_hashed_api_key_from_request(request):\n            user = await fetch_user_for_api_key(hashed_api_key, async_db_session)\n    except ValueError:\n        logger.warning(\"Issue with validating authentication token\")\n        return None\n\n    return user\n\n\ndef get_anonymous_user() -> User:\n    \"\"\"Create anonymous user object.\"\"\"\n    user = User(\n        id=uuid.UUID(ANONYMOUS_USER_UUID),\n        email=ANONYMOUS_USER_EMAIL,\n        hashed_password=\"\",\n        is_active=True,\n        is_verified=True,\n        is_superuser=False,\n        role=UserRole.LIMITED,\n        account_type=AccountType.ANONYMOUS,\n        use_memories=False,\n        enable_memory_tool=False,\n    )\n    return user\n\n\nasync def double_check_user(\n    user: User | None,\n    include_expired: bool = False,\n    allow_anonymous_access: bool = False,\n) -> User:\n    if user is not None:\n        # If user attempted to authenticate, verify them, do not default\n        # to anonymous access if it fails.\n        if user_needs_to_be_verified() and not user.is_verified:\n            raise BasicAuthenticationError(\n                detail=\"Access denied. User is not verified.\",\n            )\n\n        if (\n            user.oidc_expiry\n            and user.oidc_expiry < datetime.now(timezone.utc)\n            and not include_expired\n        ):\n            raise BasicAuthenticationError(\n                detail=\"Access denied. User's OIDC token has expired.\",\n            )\n\n        return user\n\n    if allow_anonymous_access:\n        return get_anonymous_user()\n\n    raise BasicAuthenticationError(\n        detail=\"Access denied. User is not authenticated.\",\n    )\n\n\nasync def current_user_with_expired_token(\n    user: User | None = Depends(optional_user),\n) -> User:\n    return await double_check_user(user, include_expired=True)\n\n\nasync def current_limited_user(\n    user: User | None = Depends(optional_user),\n) -> User:\n    return await double_check_user(user)\n\n\nasync def current_chat_accessible_user(\n    user: User | None = Depends(optional_user),\n) -> User:\n    tenant_id = get_current_tenant_id()\n\n    return await double_check_user(\n        user, allow_anonymous_access=anonymous_user_enabled(tenant_id=tenant_id)\n    )\n\n\nasync def current_user(\n    user: User | None = Depends(optional_user),\n) -> User:\n    user = await double_check_user(user)\n\n    if user.role == UserRole.LIMITED:\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User role is LIMITED. BASIC or higher permissions are required.\",\n        )\n    return user\n\n\nasync def current_curator_or_admin_user(\n    user: User = Depends(current_user),\n) -> User:\n    allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN}\n    if user.role not in allowed_roles:\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User is not a curator or admin.\",\n        )\n\n    return user\n\n\nasync def current_admin_user(user: User = Depends(current_user)) -> User:\n    if user.role != UserRole.ADMIN:\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User must be an admin to perform this action.\",\n        )\n\n    return user\n\n\nasync def _get_user_from_token_data(token_data: dict) -> User | None:\n    \"\"\"Shared logic: token data dict → User object.\n\n    Args:\n        token_data: Decoded token data containing 'sub' (user ID).\n\n    Returns:\n        User object if found and active, None otherwise.\n    \"\"\"\n    user_id = token_data.get(\"sub\")\n    if not user_id:\n        return None\n\n    try:\n        user_uuid = uuid.UUID(user_id)\n    except ValueError:\n        return None\n\n    async with get_async_session_context_manager() as async_db_session:\n        user = await async_db_session.get(User, user_uuid)\n        if user is None or not user.is_active:\n            return None\n        return user\n\n\n_LOOPBACK_HOSTNAMES = frozenset({\"localhost\", \"127.0.0.1\", \"::1\"})\n\n\ndef _is_same_origin(actual: str, expected: str) -> bool:\n    \"\"\"Compare two origins for the WebSocket CSWSH check.\n\n    Scheme and hostname must match exactly.  Port must also match, except\n    when the hostname is a loopback address (localhost / 127.0.0.1 / ::1),\n    where port is ignored.  On loopback, all ports belong to the same\n    operator, so port differences carry no security significance — the\n    CSWSH threat is remote origins, not local ones.\n    \"\"\"\n    a = urlparse(actual.rstrip(\"/\"))\n    e = urlparse(expected.rstrip(\"/\"))\n\n    if a.scheme != e.scheme or a.hostname != e.hostname:\n        return False\n\n    if a.hostname in _LOOPBACK_HOSTNAMES:\n        return True\n\n    actual_port = a.port or (443 if a.scheme == \"https\" else 80)\n    expected_port = e.port or (443 if e.scheme == \"https\" else 80)\n\n    return actual_port == expected_port\n\n\nasync def current_user_from_websocket(\n    websocket: WebSocket,\n    token: str = Query(..., description=\"WebSocket authentication token\"),\n) -> User:\n    \"\"\"\n    WebSocket authentication dependency using query parameter.\n\n    Validates the WS token from query param and returns the User.\n    Raises BasicAuthenticationError if authentication fails.\n\n    The token must be obtained from POST /voice/ws-token before connecting.\n    Tokens are single-use and expire after 60 seconds.\n\n    Usage:\n        1. POST /voice/ws-token -> {\"token\": \"xxx\"}\n        2. Connect to ws://host/path?token=xxx\n\n    This applies the same auth checks as current_user() for HTTP endpoints.\n    \"\"\"\n    # Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH).\n    # Browsers always send Origin on WebSocket connections.\n    origin = websocket.headers.get(\"origin\")\n    if not origin:\n        logger.warning(\"WS auth: missing Origin header\")\n        raise BasicAuthenticationError(detail=\"Access denied. Missing origin.\")\n\n    if not _is_same_origin(origin, WEB_DOMAIN):\n        logger.warning(f\"WS auth: origin mismatch. Expected {WEB_DOMAIN}, got {origin}\")\n        raise BasicAuthenticationError(detail=\"Access denied. Invalid origin.\")\n\n    # Validate WS token in Redis (single-use, deleted after retrieval)\n    try:\n        token_data = await retrieve_ws_token_data(token)\n        if token_data is None:\n            raise BasicAuthenticationError(\n                detail=\"Access denied. Invalid or expired authentication token.\"\n            )\n    except BasicAuthenticationError:\n        raise\n    except Exception as e:\n        logger.error(f\"WS auth: error during token validation: {e}\")\n        raise BasicAuthenticationError(\n            detail=\"Authentication verification failed.\"\n        ) from e\n\n    # Get user from token data\n    user = await _get_user_from_token_data(token_data)\n    if user is None:\n        logger.warning(f\"WS auth: user not found for id={token_data.get('sub')}\")\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User not found or inactive.\"\n        )\n\n    # Apply same checks as HTTP auth (verification, OIDC expiry, role)\n    user = await double_check_user(user)\n\n    # Block LIMITED users (same as current_user)\n    if user.role == UserRole.LIMITED:\n        logger.warning(f\"WS auth: user {user.email} has LIMITED role\")\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User role is LIMITED. BASIC or higher permissions are required.\",\n        )\n\n    logger.debug(f\"WS auth: authenticated {user.email}\")\n    return user\n\n\ndef get_default_admin_user_emails_() -> list[str]:\n    # No default seeding available for Onyx MIT\n    return []\n\n\nSTATE_TOKEN_AUDIENCE = \"fastapi-users:oauth-state\"\nSTATE_TOKEN_LIFETIME_SECONDS = 3600\nCSRF_TOKEN_KEY = \"csrftoken\"\nCSRF_TOKEN_COOKIE_NAME = \"fastapiusersoauthcsrf\"\nPKCE_COOKIE_NAME_PREFIX = \"fastapiusersoauthpkce\"\n\n\nclass OAuth2AuthorizeResponse(BaseModel):\n    authorization_url: str\n\n\ndef generate_state_token(\n    data: Dict[str, str],\n    secret: SecretType,\n    lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,\n) -> str:\n    data[\"aud\"] = STATE_TOKEN_AUDIENCE\n\n    return generate_jwt(data, secret, lifetime_seconds)\n\n\ndef generate_csrf_token() -> str:\n    return secrets.token_urlsafe(32)\n\n\ndef _base64url_encode(data: bytes) -> str:\n    return base64.urlsafe_b64encode(data).rstrip(b\"=\").decode(\"ascii\")\n\n\ndef generate_pkce_pair() -> tuple[str, str]:\n    verifier = secrets.token_urlsafe(64)\n    challenge = _base64url_encode(hashlib.sha256(verifier.encode(\"ascii\")).digest())\n    return verifier, challenge\n\n\ndef get_pkce_cookie_name(state: str) -> str:\n    state_hash = hashlib.sha256(state.encode(\"utf-8\")).hexdigest()\n    return f\"{PKCE_COOKIE_NAME_PREFIX}_{state_hash}\"\n\n\n# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91\ndef create_onyx_oauth_router(\n    oauth_client: BaseOAuth2,\n    backend: AuthenticationBackend,\n    state_secret: SecretType,\n    redirect_url: Optional[str] = None,\n    associate_by_email: bool = False,\n    is_verified_by_default: bool = False,\n    enable_pkce: bool = False,\n) -> APIRouter:\n    return get_oauth_router(\n        oauth_client,\n        backend,\n        get_user_manager,\n        state_secret,\n        redirect_url,\n        associate_by_email,\n        is_verified_by_default,\n        enable_pkce=enable_pkce,\n    )\n\n\ndef get_oauth_router(\n    oauth_client: BaseOAuth2,\n    backend: AuthenticationBackend,\n    get_user_manager: UserManagerDependency[models.UP, models.ID],\n    state_secret: SecretType,\n    redirect_url: Optional[str] = None,\n    associate_by_email: bool = False,\n    is_verified_by_default: bool = False,\n    *,\n    csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,\n    csrf_token_cookie_path: str = \"/\",\n    csrf_token_cookie_domain: Optional[str] = None,\n    csrf_token_cookie_secure: Optional[bool] = None,\n    csrf_token_cookie_httponly: bool = True,\n    csrf_token_cookie_samesite: Optional[Literal[\"lax\", \"strict\", \"none\"]] = \"lax\",\n    enable_pkce: bool = False,\n) -> APIRouter:\n    \"\"\"Generate a router with the OAuth routes.\"\"\"\n    router = APIRouter()\n    callback_route_name = f\"oauth:{oauth_client.name}.{backend.name}.callback\"\n\n    if redirect_url is not None:\n        oauth2_authorize_callback = OAuth2AuthorizeCallback(\n            oauth_client,\n            redirect_url=redirect_url,\n        )\n    else:\n        oauth2_authorize_callback = OAuth2AuthorizeCallback(\n            oauth_client,\n            route_name=callback_route_name,\n        )\n\n    async def null_access_token_state() -> tuple[OAuth2Token, Optional[str]] | None:\n        return None\n\n    access_token_state_dependency = (\n        oauth2_authorize_callback if not enable_pkce else null_access_token_state\n    )\n\n    if csrf_token_cookie_secure is None:\n        csrf_token_cookie_secure = WEB_DOMAIN.startswith(\"https\")\n\n    @router.get(\n        \"/authorize\",\n        name=f\"oauth:{oauth_client.name}.{backend.name}.authorize\",\n        response_model=OAuth2AuthorizeResponse,\n    )\n    async def authorize(\n        request: Request,\n        response: Response,\n        redirect: bool = Query(False),\n        scopes: List[str] = Query(None),\n    ) -> Response | OAuth2AuthorizeResponse:\n        referral_source = request.cookies.get(\"referral_source\", None)\n\n        if redirect_url is not None:\n            authorize_redirect_url = redirect_url\n        else:\n            # Use WEB_DOMAIN instead of request.url_for() to prevent host\n            # header poisoning — request.url_for() trusts the Host header.\n            callback_path = request.app.url_path_for(callback_route_name)\n            authorize_redirect_url = f\"{WEB_DOMAIN}{callback_path}\"\n\n        next_url = request.query_params.get(\"next\", \"/\")\n\n        csrf_token = generate_csrf_token()\n        state_data: Dict[str, str] = {\n            \"next_url\": next_url,\n            \"referral_source\": referral_source or \"default_referral\",\n            CSRF_TOKEN_KEY: csrf_token,\n        }\n        state = generate_state_token(state_data, state_secret)\n        pkce_cookie: tuple[str, str] | None = None\n\n        if enable_pkce:\n            code_verifier, code_challenge = generate_pkce_pair()\n            pkce_cookie_name = get_pkce_cookie_name(state)\n            pkce_cookie = (pkce_cookie_name, code_verifier)\n            authorization_url = await oauth_client.get_authorization_url(\n                authorize_redirect_url,\n                state,\n                scopes,\n                code_challenge=code_challenge,\n                code_challenge_method=\"S256\",\n            )\n        else:\n            # Get the basic authorization URL\n            authorization_url = await oauth_client.get_authorization_url(\n                authorize_redirect_url,\n                state,\n                scopes,\n            )\n\n        # For Google OAuth, add parameters to request refresh tokens\n        if oauth_client.name == \"google\":\n            authorization_url = add_url_params(\n                authorization_url, {\"access_type\": \"offline\", \"prompt\": \"consent\"}\n            )\n\n        def set_oauth_cookie(\n            target_response: Response,\n            *,\n            key: str,\n            value: str,\n        ) -> None:\n            target_response.set_cookie(\n                key=key,\n                value=value,\n                max_age=STATE_TOKEN_LIFETIME_SECONDS,\n                path=csrf_token_cookie_path,\n                domain=csrf_token_cookie_domain,\n                secure=csrf_token_cookie_secure,\n                httponly=csrf_token_cookie_httponly,\n                samesite=csrf_token_cookie_samesite,\n            )\n\n        response_with_cookies: Response\n        if redirect:\n            response_with_cookies = RedirectResponse(authorization_url, status_code=302)\n        else:\n            response_with_cookies = response\n\n        set_oauth_cookie(\n            response_with_cookies,\n            key=csrf_token_cookie_name,\n            value=csrf_token,\n        )\n        if pkce_cookie is not None:\n            pkce_cookie_name, code_verifier = pkce_cookie\n            set_oauth_cookie(\n                response_with_cookies,\n                key=pkce_cookie_name,\n                value=code_verifier,\n            )\n\n        if redirect:\n            return response_with_cookies\n\n        return OAuth2AuthorizeResponse(authorization_url=authorization_url)\n\n    @log_function_time(print_only=True)\n    @router.get(\n        \"/callback\",\n        name=callback_route_name,\n        description=\"The response varies based on the authentication backend used.\",\n        responses={\n            status.HTTP_400_BAD_REQUEST: {\n                \"model\": ErrorModel,\n                \"content\": {\n                    \"application/json\": {\n                        \"examples\": {\n                            \"INVALID_STATE_TOKEN\": {\n                                \"summary\": \"Invalid state token.\",\n                                \"value\": None,\n                            },\n                            ErrorCode.LOGIN_BAD_CREDENTIALS: {\n                                \"summary\": \"User is inactive.\",\n                                \"value\": {\"detail\": ErrorCode.LOGIN_BAD_CREDENTIALS},\n                            },\n                        }\n                    }\n                },\n            },\n        },\n    )\n    async def callback(\n        request: Request,\n        access_token_state: Tuple[OAuth2Token, Optional[str]] | None = Depends(\n            access_token_state_dependency\n        ),\n        code: Optional[str] = None,\n        state: Optional[str] = None,\n        error: Optional[str] = None,\n        user_manager: BaseUserManager[models.UP, models.ID] = Depends(get_user_manager),\n        strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),\n    ) -> Response:\n        pkce_cookie_name: str | None = None\n\n        def delete_pkce_cookie(response: Response) -> None:\n            if enable_pkce and pkce_cookie_name:\n                response.delete_cookie(\n                    key=pkce_cookie_name,\n                    path=csrf_token_cookie_path,\n                    domain=csrf_token_cookie_domain,\n                    secure=csrf_token_cookie_secure,\n                    httponly=csrf_token_cookie_httponly,\n                    samesite=csrf_token_cookie_samesite,\n                )\n\n        def build_error_response(exc: OnyxError) -> JSONResponse:\n            log_onyx_error(exc)\n            error_response = onyx_error_to_json_response(exc)\n            delete_pkce_cookie(error_response)\n            return error_response\n\n        def decode_and_validate_state(state_value: str) -> Dict[str, str]:\n            try:\n                state_data = decode_jwt(\n                    state_value, state_secret, [STATE_TOKEN_AUDIENCE]\n                )\n            except jwt.DecodeError:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    getattr(\n                        ErrorCode,\n                        \"ACCESS_TOKEN_DECODE_ERROR\",\n                        \"ACCESS_TOKEN_DECODE_ERROR\",\n                    ),\n                )\n            except jwt.ExpiredSignatureError:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    getattr(\n                        ErrorCode,\n                        \"ACCESS_TOKEN_ALREADY_EXPIRED\",\n                        \"ACCESS_TOKEN_ALREADY_EXPIRED\",\n                    ),\n                )\n            except jwt.PyJWTError:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    getattr(\n                        ErrorCode,\n                        \"ACCESS_TOKEN_DECODE_ERROR\",\n                        \"ACCESS_TOKEN_DECODE_ERROR\",\n                    ),\n                )\n\n            cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)\n            state_csrf_token = state_data.get(CSRF_TOKEN_KEY)\n            if (\n                not cookie_csrf_token\n                or not state_csrf_token\n                or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)\n            ):\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    getattr(ErrorCode, \"OAUTH_INVALID_STATE\", \"OAUTH_INVALID_STATE\"),\n                )\n\n            return state_data\n\n        token: OAuth2Token\n        state_data: Dict[str, str]\n\n        # `code`, `state`, and `error` are read directly only in the PKCE path.\n        # In the non-PKCE path, `oauth2_authorize_callback` consumes them.\n        if enable_pkce:\n            if state is not None:\n                pkce_cookie_name = get_pkce_cookie_name(state)\n\n            if error is not None:\n                return build_error_response(\n                    OnyxError(\n                        OnyxErrorCode.VALIDATION_ERROR,\n                        \"Authorization request failed or was denied\",\n                    )\n                )\n            if code is None:\n                return build_error_response(\n                    OnyxError(\n                        OnyxErrorCode.VALIDATION_ERROR,\n                        \"Missing authorization code in OAuth callback\",\n                    )\n                )\n            if state is None:\n                return build_error_response(\n                    OnyxError(\n                        OnyxErrorCode.VALIDATION_ERROR,\n                        \"Missing state parameter in OAuth callback\",\n                    )\n                )\n\n            state_value = state\n\n            if redirect_url is not None:\n                callback_redirect_url = redirect_url\n            else:\n                callback_path = request.app.url_path_for(callback_route_name)\n                callback_redirect_url = f\"{WEB_DOMAIN}{callback_path}\"\n\n            code_verifier = request.cookies.get(cast(str, pkce_cookie_name))\n            if not code_verifier:\n                return build_error_response(\n                    OnyxError(\n                        OnyxErrorCode.VALIDATION_ERROR,\n                        \"Missing PKCE verifier cookie in OAuth callback\",\n                    )\n                )\n\n            try:\n                state_data = decode_and_validate_state(state_value)\n            except OnyxError as e:\n                return build_error_response(e)\n\n            try:\n                token = await oauth_client.get_access_token(\n                    code, callback_redirect_url, code_verifier\n                )\n            except GetAccessTokenError:\n                return build_error_response(\n                    OnyxError(\n                        OnyxErrorCode.VALIDATION_ERROR,\n                        \"Authorization code exchange failed\",\n                    )\n                )\n        else:\n            if access_token_state is None:\n                raise OnyxError(\n                    OnyxErrorCode.INTERNAL_ERROR, \"Missing OAuth callback state\"\n                )\n            token, callback_state = access_token_state\n            if callback_state is None:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    \"Missing state parameter in OAuth callback\",\n                )\n            state_data = decode_and_validate_state(callback_state)\n\n        async def complete_login_flow(\n            token: OAuth2Token, state_data: Dict[str, str]\n        ) -> RedirectResponse:\n            account_id, account_email = await oauth_client.get_id_email(\n                token[\"access_token\"]\n            )\n\n            if account_email is None:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,\n                )\n\n            next_url = state_data.get(\"next_url\", \"/\")\n            referral_source = state_data.get(\"referral_source\", None)\n            try:\n                tenant_id = fetch_ee_implementation_or_noop(\n                    \"onyx.server.tenants.user_mapping\", \"get_tenant_id_for_email\", None\n                )(account_email)\n            except exceptions.UserNotExists:\n                tenant_id = None\n\n            request.state.referral_source = referral_source\n\n            # Proceed to authenticate or create the user\n            try:\n                user = await user_manager.oauth_callback(\n                    oauth_client.name,\n                    token[\"access_token\"],\n                    account_id,\n                    account_email,\n                    token.get(\"expires_at\"),\n                    token.get(\"refresh_token\"),\n                    request,\n                    associate_by_email=associate_by_email,\n                    is_verified_by_default=is_verified_by_default,\n                )\n            except UserAlreadyExists:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    ErrorCode.OAUTH_USER_ALREADY_EXISTS,\n                )\n\n            if not user.is_active:\n                raise OnyxError(\n                    OnyxErrorCode.VALIDATION_ERROR,\n                    ErrorCode.LOGIN_BAD_CREDENTIALS,\n                )\n\n            # Login user\n            response = await backend.login(strategy, user)\n            await user_manager.on_after_login(user, request, response)\n\n            # Prepare redirect response\n            if tenant_id is None:\n                # Use URL utility to add parameters\n                redirect_destination = add_url_params(next_url, {\"new_team\": \"true\"})\n                redirect_response = RedirectResponse(\n                    redirect_destination, status_code=302\n                )\n            else:\n                # No parameters to add\n                redirect_response = RedirectResponse(next_url, status_code=302)\n\n            # Copy headers from auth response to redirect response, with special handling for Set-Cookie\n            for header_name, header_value in response.headers.items():\n                header_name_lower = header_name.lower()\n                if header_name_lower == \"set-cookie\":\n                    redirect_response.headers.append(header_name, header_value)\n                    continue\n                if header_name_lower in {\"location\", \"content-length\"}:\n                    continue\n                redirect_response.headers[header_name] = header_value\n\n            return redirect_response\n\n        if enable_pkce:\n            try:\n                redirect_response = await complete_login_flow(token, state_data)\n            except OnyxError as e:\n                return build_error_response(e)\n            delete_pkce_cookie(redirect_response)\n            return redirect_response\n\n        return await complete_login_flow(token, state_data)\n\n    return router\n"
  },
  {
    "path": "backend/onyx/auth/utils.py",
    "content": "\"\"\"Shared authentication utilities for bearer token extraction and validation.\"\"\"\n\nfrom collections.abc import Callable\nfrom urllib.parse import unquote\n\nfrom fastapi import Request\n\nfrom onyx.auth.constants import API_KEY_HEADER_ALTERNATIVE_NAME\nfrom onyx.auth.constants import API_KEY_HEADER_NAME\nfrom onyx.auth.constants import API_KEY_PREFIX\nfrom onyx.auth.constants import BEARER_PREFIX\nfrom onyx.auth.constants import DEPRECATED_API_KEY_PREFIX\nfrom onyx.auth.constants import PAT_PREFIX\n\n\ndef get_hashed_bearer_token_from_request(\n    request: Request,\n    valid_prefixes: list[str],\n    hash_fn: Callable[[str], str],\n    allow_non_bearer: bool = False,\n) -> str | None:\n    \"\"\"Generic extraction and hashing of bearer tokens from request headers.\n\n    Args:\n        request: The FastAPI request\n        valid_prefixes: List of valid token prefixes (e.g., [\"on_\", \"onyx_pat_\"])\n        hash_fn: Function to hash the token (e.g., hash_api_key or hash_pat)\n        allow_non_bearer: If True, accept raw tokens without \"Bearer \" prefix\n\n    Returns:\n        Hashed token if valid format, else None\n    \"\"\"\n    auth_header = request.headers.get(\n        API_KEY_HEADER_ALTERNATIVE_NAME\n    ) or request.headers.get(API_KEY_HEADER_NAME)\n\n    if not auth_header:\n        return None\n\n    # Handle bearer format\n    if auth_header.startswith(BEARER_PREFIX):\n        token = auth_header[len(BEARER_PREFIX) :].strip()\n    elif allow_non_bearer:\n        token = auth_header\n    else:\n        return None\n\n    # Check if token starts with any valid prefix\n    if valid_prefixes:\n        valid = any(token.startswith(prefix) for prefix in valid_prefixes)\n        if not valid:\n            return None\n\n    return hash_fn(token)\n\n\ndef _extract_tenant_from_bearer_token(\n    request: Request, valid_prefixes: list[str]\n) -> str | None:\n    \"\"\"Generic tenant extraction from bearer token. Returns None if invalid format.\n\n    Args:\n        request: The FastAPI request\n        valid_prefixes: List of valid token prefixes (e.g., [\"on_\", \"dn_\"])\n\n    Returns:\n        Tenant ID if found in format <prefix><tenant>.<random>, else None\n    \"\"\"\n    auth_header = request.headers.get(\n        API_KEY_HEADER_ALTERNATIVE_NAME\n    ) or request.headers.get(API_KEY_HEADER_NAME)\n\n    if not auth_header or not auth_header.startswith(BEARER_PREFIX):\n        return None\n\n    token = auth_header[len(BEARER_PREFIX) :].strip()\n\n    # Check if token starts with any valid prefix\n    matched_prefix = None\n    for prefix in valid_prefixes:\n        if token.startswith(prefix):\n            matched_prefix = prefix\n            break\n\n    if not matched_prefix:\n        return None\n\n    # Parse tenant from token format: <prefix><tenant>.<random>\n    parts = token[len(matched_prefix) :].split(\".\", 1)\n    if len(parts) != 2:\n        return None\n\n    tenant_id = parts[0]\n    return unquote(tenant_id) if tenant_id else None\n\n\ndef extract_tenant_from_auth_header(request: Request) -> str | None:\n    \"\"\"Extract tenant ID from API key or PAT header.\n\n    Unified function for extracting tenant from any bearer token (API key or PAT).\n    Checks all known token prefixes in order.\n\n    Returns:\n        Tenant ID if found, else None\n    \"\"\"\n    return _extract_tenant_from_bearer_token(\n        request, [API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX, PAT_PREFIX]\n    )\n"
  },
  {
    "path": "backend/onyx/background/README.md",
    "content": "# Overview of Onyx Background Jobs\n\nThe background jobs take care of:\n1. Pulling/Indexing documents (from connectors)\n2. Updating document metadata (from connectors)\n3. Cleaning up checkpoints and logic around indexing work (indexing indexing checkpoints and index attempt metadata)\n4. Handling user uploaded files and deletions (from the Projects feature and uploads via the Chat)\n5. Reporting metrics on things like queue length for monitoring purposes\n\n## Worker → Queue Mapping\n\n| Worker | File | Queues |\n|--------|------|--------|\n| Primary | `apps/primary.py` | `celery` |\n| Light | `apps/light.py` | `vespa_metadata_sync`, `connector_deletion`, `doc_permissions_upsert`, `checkpoint_cleanup`, `index_attempt_cleanup` |\n| Heavy | `apps/heavy.py` | `connector_pruning`, `connector_doc_permissions_sync`, `connector_external_group_sync`, `csv_generation`, `sandbox` |\n| Docprocessing | `apps/docprocessing.py` | `docprocessing` |\n| Docfetching | `apps/docfetching.py` | `connector_doc_fetching` |\n| User File Processing | `apps/user_file_processing.py` | `user_file_processing`, `user_file_project_sync`, `user_file_delete` |\n| Monitoring | `apps/monitoring.py` | `monitoring` |\n| Background (consolidated) | `apps/background.py` | All queues above except `celery` |\n\n## Non-Worker Apps\n| App | File | Purpose |\n|-----|------|---------|\n| **Beat** | `beat.py` | Celery beat scheduler with `DynamicTenantScheduler` that generates per-tenant periodic task schedules |\n| **Client** | `client.py` | Minimal app for task submission from non-worker processes (e.g., API server) |\n\n### Shared Module\n`app_base.py` provides:\n- `TenantAwareTask` - Base task class that sets tenant context\n- Signal handlers for logging, cleanup, and lifecycle events\n- Readiness probes and health checks\n\n\n## Worker Details\n\n### Primary (Coordinator and task dispatcher)\nIt is the single worker which handles tasks from the default celery queue. It is a singleton worker ensured by the `PRIMARY_WORKER` Redis lock\nwhich it touches every `CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8` seconds (using Celery Bootsteps)\n\nOn startup:\n- waits for redis, postgres, document index to all be healthy\n- acquires the singleton lock\n- cleans all the redis states associated with background jobs\n- mark orphaned index attempts failed\n\nThen it cycles through its tasks as scheduled by Celery Beat:\n\n| Task | Frequency | Description |\n|------|-----------|-------------|\n| `check_for_indexing` | 15s | Scans for connectors needing indexing → dispatches to `DOCFETCHING` queue |\n| `check_for_vespa_sync_task` | 20s | Finds stale documents/document sets → dispatches sync tasks to `VESPA_METADATA_SYNC` queue |\n| `check_for_pruning` | 20s | Finds connectors due for pruning → dispatches to `CONNECTOR_PRUNING` queue |\n| `check_for_connector_deletion` | 20s | Processes deletion requests → dispatches to `CONNECTOR_DELETION` queue |\n| `check_for_user_file_processing` | 20s | Checks for user uploads → dispatches to `USER_FILE_PROCESSING` queue |\n| `check_for_checkpoint_cleanup` | 1h | Cleans up old indexing checkpoints |\n| `check_for_index_attempt_cleanup` | 30m | Cleans up old index attempts |\n| `kombu_message_cleanup_task` | periodic | Cleans orphaned Kombu messages from DB (Kombu being the messaging framework used by Celery) |\n| `celery_beat_heartbeat` | 1m | Heartbeat for Beat watchdog |\n\nWatchdog is a separate Python process managed by supervisord which runs alongside celery workers. It checks the ONYX_CELERY_BEAT_HEARTBEAT_KEY in\nRedis to ensure Celery Beat is not dead. Beat schedules the celery_beat_heartbeat for Primary to touch the key and share that it's still alive.\nSee supervisord.conf for watchdog config.\n\n\n### Light\nFast and short living tasks that are not resource intensive. High concurrency:\nCan have 24 concurrent workers, each with a prefetch of 8 for a total of 192 tasks in flight at once.\n\nTasks it handles:\n- Syncs access/permissions, document sets, boosts, hidden state\n- Deletes documents that are marked for deletion in Postgres\n- Cleanup of checkpoints and index attempts\n\n\n### Heavy\nLong running, resource intensive tasks, handles pruning and sandbox operations. Low concurrency - max concurrency of 4 with 1 prefetch.\n\nDoes not interact with the Document Index, it handles the syncs with external systems. Large volume API calls to handle pruning and fetching permissions, etc.\n\nGenerates CSV exports which may take a long time with significant data in Postgres.\n\nSandbox (new feature) for running Next.js, Python virtual env, OpenCode AI Agent, and access to knowledge files\n\n\n### Docprocessing, Docfetching, User File Processing\nDocprocessing and Docfetching are for indexing documents:\n- Docfetching runs connectors to pull documents from external APIs (Google Drive, Confluence, etc.), stores batches to file storage, and dispatches docprocessing tasks\n- Docprocessing retrieves batches, runs the indexing pipeline (chunking, embedding), and indexes into the Document Index \nUser Files come from uploads directly via the input bar\n\n\n### Monitoring\nObservability and metrics collections:\n- Queue lengths, connector success/failure, lconnector latencies\n- Memory of supervisor managed processes (workers, beat, slack)\n- Cloud and multitenant specific monitorings\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/app_base.py",
    "content": "import logging\nimport multiprocessing\nimport os\nimport time\nfrom typing import Any\nfrom typing import cast\n\nimport sentry_sdk\nfrom celery import bootsteps  # type: ignore\nfrom celery import Task\nfrom celery.app import trace\nfrom celery.exceptions import WorkerShutdown\nfrom celery.signals import task_postrun\nfrom celery.signals import task_prerun\nfrom celery.states import READY_STATES\nfrom celery.utils.log import get_task_logger\nfrom celery.worker import strategy  # type: ignore\nfrom redis.lock import Lock as RedisLock\nfrom sentry_sdk.integrations.celery import CeleryIntegration\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\nfrom onyx import __version__\nfrom onyx.background.celery.apps.task_formatters import CeleryTaskColoredFormatter\nfrom onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter\nfrom onyx.background.celery.celery_utils import celery_is_worker_primary\nfrom onyx.background.celery.celery_utils import make_probe_path\nfrom onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX\nfrom onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom onyx.document_index.opensearch.client import (\n    wait_for_opensearch_with_timeout,\n)\nfrom onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_delete import RedisConnectorDelete\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync\nfrom onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrune\nfrom onyx.redis.redis_document_set import RedisDocumentSet\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_usergroup import RedisUserGroup\nfrom onyx.tracing.setup import setup_tracing\nfrom onyx.utils.logger import ColoredFormatter\nfrom onyx.utils.logger import LoggerContextVars\nfrom onyx.utils.logger import PlainFormatter\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import DEV_LOGGING_ENABLED\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import SENTRY_DSN\nfrom shared_configs.configs import TENANT_ID_PREFIX\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\ntask_logger = get_task_logger(__name__)\n\nif SENTRY_DSN:\n    sentry_sdk.init(\n        dsn=SENTRY_DSN,\n        integrations=[CeleryIntegration()],\n        traces_sample_rate=0.1,\n        release=__version__,\n    )\n    logger.info(\"Sentry initialized\")\nelse:\n    logger.debug(\"Sentry DSN not provided, skipping Sentry initialization\")\n\n\nclass TenantAwareTask(Task):\n    \"\"\"A custom base Task that sets tenant_id in a contextvar before running.\"\"\"\n\n    abstract = True  # So Celery knows not to register this as a real task.\n\n    def __call__(self, *args: Any, **kwargs: Any) -> Any:\n        # Grab tenant_id from the kwargs, or fallback to default if missing.\n        tenant_id = kwargs.get(\"tenant_id\", None) or POSTGRES_DEFAULT_SCHEMA\n\n        # Set the context var\n        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n        # Actually run the task now\n        try:\n            return super().__call__(*args, **kwargs)\n        finally:\n            # Clear or reset after the task runs\n            # so it does not leak into any subsequent tasks on the same worker process\n            CURRENT_TENANT_ID_CONTEXTVAR.set(None)\n\n\n@task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,  # noqa: ARG001\n    task_id: str | None = None,  # noqa: ARG001\n    task: Task | None = None,  # noqa: ARG001\n    args: tuple[Any, ...] | None = None,  # noqa: ARG001\n    kwargs: dict[str, Any] | None = None,  # noqa: ARG001\n    **other_kwargs: Any,  # noqa: ARG001\n) -> None:\n    # Reset any per-task logging context so that prefixes (e.g. pruning_ctx)\n    # from a previous task executed in the same worker process do not leak\n    # into the next task's log messages. This fixes incorrect [CC Pair:/Index Attempt]\n    # prefixes observed when a pruning task finishes and an indexing task\n    # runs in the same process.\n\n    LoggerContextVars.reset()\n\n\ndef on_task_postrun(\n    sender: Any | None = None,  # noqa: ARG001\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,  # noqa: ARG001\n    kwargs: dict[str, Any] | None = None,\n    retval: Any | None = None,  # noqa: ARG001\n    state: str | None = None,\n    **kwds: Any,  # noqa: ARG001\n) -> None:\n    \"\"\"We handle this signal in order to remove completed tasks\n    from their respective tasksets. This allows us to track the progress of document set\n    and user group syncs.\n\n    This function runs after any task completes (both success and failure)\n    Note that this signal does not fire on a task that failed to complete and is going\n    to be retried.\n\n    This also does not fire if a worker with acks_late=False crashes (which all of our\n    long running workers are)\n    \"\"\"\n    if not task:\n        return\n\n    task_logger.debug(f\"Task {task.name} (ID: {task_id}) completed with state: {state}\")\n\n    if state not in READY_STATES:\n        return\n\n    if not task_id:\n        return\n\n    if task.name.startswith(ONYX_CLOUD_CELERY_TASK_PREFIX):\n        # this is a cloud / all tenant task ... no postrun is needed\n        return\n\n    # Get tenant_id directly from kwargs- each celery task has a tenant_id kwarg\n    if not kwargs:\n        logger.error(f\"Task {task.name} (ID: {task_id}) is missing kwargs\")\n        tenant_id = POSTGRES_DEFAULT_SCHEMA\n    else:\n        tenant_id = cast(str, kwargs.get(\"tenant_id\", POSTGRES_DEFAULT_SCHEMA))\n\n    task_logger.debug(\n        f\"Task {task.name} (ID: {task_id}) completed with state: {state} {f'for tenant_id={tenant_id}' if tenant_id else ''}\"\n    )\n\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # NOTE: we want to remove the `Redis*` classes, prefer to just have functions to\n    # do these things going forward. In short, things should generally be like the doc\n    # sync task rather than the others below\n    if task_id.startswith(DOCUMENT_SYNC_PREFIX):\n        r.srem(DOCUMENT_SYNC_TASKSET_KEY, task_id)\n        return\n\n    if task_id.startswith(RedisDocumentSet.PREFIX):\n        document_set_id = RedisDocumentSet.get_id_from_task_id(task_id)\n        if document_set_id is not None:\n            rds = RedisDocumentSet(tenant_id, int(document_set_id))\n            r.srem(rds.taskset_key, task_id)\n        return\n\n    if task_id.startswith(RedisUserGroup.PREFIX):\n        usergroup_id = RedisUserGroup.get_id_from_task_id(task_id)\n        if usergroup_id is not None:\n            rug = RedisUserGroup(tenant_id, int(usergroup_id))\n            r.srem(rug.taskset_key, task_id)\n        return\n\n    if task_id.startswith(RedisConnectorDelete.PREFIX):\n        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)\n        if cc_pair_id is not None:\n            RedisConnectorDelete.remove_from_taskset(int(cc_pair_id), task_id, r)\n        return\n\n    if task_id.startswith(RedisConnectorPrune.SUBTASK_PREFIX):\n        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)\n        if cc_pair_id is not None:\n            RedisConnectorPrune.remove_from_taskset(int(cc_pair_id), task_id, r)\n        return\n\n    if task_id.startswith(RedisConnectorPermissionSync.SUBTASK_PREFIX):\n        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)\n        if cc_pair_id is not None:\n            RedisConnectorPermissionSync.remove_from_taskset(\n                int(cc_pair_id), task_id, r\n            )\n        return\n\n    if task_id.startswith(RedisConnectorExternalGroupSync.SUBTASK_PREFIX):\n        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)\n        if cc_pair_id is not None:\n            RedisConnectorExternalGroupSync.remove_from_taskset(\n                int(cc_pair_id), task_id, r\n            )\n        return\n\n\ndef on_celeryd_init(\n    sender: str,  # noqa: ARG001\n    conf: Any = None,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> None:\n    \"\"\"The first signal sent on celery worker startup\"\"\"\n\n    # NOTE(rkuo): start method \"fork\" is unsafe and we really need it to be \"spawn\"\n    # But something is blocking set_start_method from working in the cloud unless\n    # force=True. so we use force=True as a fallback.\n\n    all_start_methods: list[str] = multiprocessing.get_all_start_methods()\n    logger.info(f\"Multiprocessing all start methods: {all_start_methods}\")\n\n    try:\n        multiprocessing.set_start_method(\"spawn\")  # fork is unsafe, set to spawn\n    except Exception:\n        logger.info(\n            \"Multiprocessing set_start_method exceptioned. Trying force=True...\"\n        )\n        try:\n            multiprocessing.set_start_method(\n                \"spawn\", force=True\n            )  # fork is unsafe, set to spawn\n        except Exception:\n            logger.info(\n                \"Multiprocessing set_start_method force=True exceptioned even with force=True.\"\n            )\n\n    logger.info(\n        f\"Multiprocessing selected start method: {multiprocessing.get_start_method()}\"\n    )\n\n    # Initialize tracing in workers if credentials are available.\n    setup_tracing()\n\n\ndef wait_for_redis(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001\n    \"\"\"Waits for redis to become ready subject to a hardcoded timeout.\n    Will raise WorkerShutdown to kill the celery worker if the timeout\n    is reached.\"\"\"\n\n    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)\n\n    WAIT_INTERVAL = 5\n    WAIT_LIMIT = 60\n\n    ready = False\n    time_start = time.monotonic()\n    logger.info(\"Redis: Readiness probe starting.\")\n    while True:\n        try:\n            if r.ping():\n                ready = True\n                break\n        except Exception:\n            pass\n\n        time_elapsed = time.monotonic() - time_start\n        if time_elapsed > WAIT_LIMIT:\n            break\n\n        logger.info(\n            f\"Redis: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}\"\n        )\n\n        time.sleep(WAIT_INTERVAL)\n\n    if not ready:\n        msg = f\"Redis: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting...\"\n        logger.error(msg)\n        raise WorkerShutdown(msg)\n\n    logger.info(\"Redis: Readiness probe succeeded. Continuing...\")\n    return\n\n\ndef wait_for_db(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001\n    \"\"\"Waits for the db to become ready subject to a hardcoded timeout.\n    Will raise WorkerShutdown to kill the celery worker if the timeout is reached.\"\"\"\n\n    WAIT_INTERVAL = 5\n    WAIT_LIMIT = 60\n\n    ready = False\n    time_start = time.monotonic()\n    logger.info(\"Database: Readiness probe starting.\")\n    while True:\n        try:\n            with Session(get_sqlalchemy_engine()) as db_session:\n                result = db_session.execute(text(\"SELECT NOW()\")).scalar()\n                if result:\n                    ready = True\n                    break\n        except Exception:\n            pass\n\n        time_elapsed = time.monotonic() - time_start\n        if time_elapsed > WAIT_LIMIT:\n            break\n\n        logger.info(\n            f\"Database: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}\"\n        )\n\n        time.sleep(WAIT_INTERVAL)\n\n    if not ready:\n        msg = f\"Database: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting...\"\n        logger.error(msg)\n        raise WorkerShutdown(msg)\n\n    logger.info(\"Database: Readiness probe succeeded. Continuing...\")\n    return\n\n\ndef on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001\n    logger.info(f\"Running as a secondary celery worker: pid={os.getpid()}\")\n\n    # Set up variables for waiting on primary worker\n    WAIT_INTERVAL = 5\n    WAIT_LIMIT = 60\n    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)\n    time_start = time.monotonic()\n\n    logger.info(\"Waiting for primary worker to be ready...\")\n    while True:\n        if r.exists(OnyxRedisLocks.PRIMARY_WORKER):\n            break\n\n        time_elapsed = time.monotonic() - time_start\n        logger.info(\n            f\"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}\"\n        )\n        if time_elapsed > WAIT_LIMIT:\n            msg = f\"Primary worker was not ready within the timeout. ({WAIT_LIMIT} seconds). Exiting...\"\n            logger.error(msg)\n            raise WorkerShutdown(msg)\n\n        time.sleep(WAIT_INTERVAL)\n\n    logger.info(\"Wait for primary worker completed successfully. Continuing...\")\n    return\n\n\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001\n    task_logger.info(\"worker_ready signal received.\")\n\n    # file based way to do readiness/liveness probes\n    # https://medium.com/ambient-innovation/health-checks-for-celery-in-kubernetes-cf3274a3e106\n    # https://github.com/celery/celery/issues/4079#issuecomment-1270085680\n\n    hostname: str = cast(str, sender.hostname)\n    path = make_probe_path(\"readiness\", hostname)\n    path.touch()\n    logger.info(f\"Readiness signal touched at {path}.\")\n\n\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001\n    HttpxPool.close_all()\n\n    hostname: str = cast(str, sender.hostname)\n    path = make_probe_path(\"readiness\", hostname)\n    path.unlink(missing_ok=True)\n\n    if not celery_is_worker_primary(sender):\n        return\n\n    if not hasattr(sender, \"primary_worker_lock\"):\n        # primary_worker_lock will not exist when MULTI_TENANT is True\n        return\n\n    if not sender.primary_worker_lock:\n        return\n\n    logger.info(\"Releasing primary worker lock.\")\n    lock: RedisLock = sender.primary_worker_lock\n    try:\n        if lock.owned():\n            try:\n                lock.release()\n                sender.primary_worker_lock = None\n            except Exception:\n                logger.exception(\"Failed to release primary worker lock\")\n    except Exception:\n        logger.exception(\"Failed to check if primary worker lock is owned\")\n\n\ndef on_setup_logging(\n    loglevel: int,\n    logfile: str | None,\n    format: str,  # noqa: ARG001\n    colorize: bool,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> None:\n    # TODO: could unhardcode format and colorize and accept these as options from\n    # celery's config\n\n    root_logger = logging.getLogger()\n    root_logger.handlers = []\n\n    # Define the log format\n    log_format = (\n        \"%(levelname)-8s %(asctime)s %(filename)15s:%(lineno)-4d: %(name)s %(message)s\"\n    )\n\n    # Set up the root handler\n    root_handler = logging.StreamHandler()\n    root_formatter = ColoredFormatter(\n        log_format,\n        datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n    )\n    root_handler.setFormatter(root_formatter)\n    root_logger.addHandler(root_handler)\n\n    if logfile:\n        # Truncate log file if DEV_LOGGING_ENABLED (for clean dev experience)\n        if DEV_LOGGING_ENABLED and os.path.exists(logfile):\n            try:\n                open(logfile, \"w\").close()  # Truncate the file\n            except Exception:\n                pass  # Ignore errors, just proceed with normal logging\n\n        root_file_handler = logging.FileHandler(logfile)\n        root_file_formatter = PlainFormatter(\n            log_format,\n            datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n        )\n        root_file_handler.setFormatter(root_file_formatter)\n        root_logger.addHandler(root_file_handler)\n\n    root_logger.setLevel(loglevel)\n\n    # Configure the task logger\n    task_logger.handlers = []\n\n    task_handler = logging.StreamHandler()\n    task_handler.addFilter(TenantContextFilter())\n    task_formatter = CeleryTaskColoredFormatter(\n        log_format,\n        datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n    )\n    task_handler.setFormatter(task_formatter)\n    task_logger.addHandler(task_handler)\n\n    if logfile:\n        # No need to truncate again, already done above for root logger\n        task_file_handler = logging.FileHandler(logfile)\n        task_file_handler.addFilter(TenantContextFilter())\n        task_file_formatter = CeleryTaskPlainFormatter(\n            log_format,\n            datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n        )\n        task_file_handler.setFormatter(task_file_formatter)\n        task_logger.addHandler(task_file_handler)\n\n    task_logger.setLevel(loglevel)\n    task_logger.propagate = False\n\n    # hide celery task received spam\n    # e.g. \"Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] received\"\n    strategy.logger.setLevel(logging.WARNING)\n\n    # uncomment this to hide celery task succeeded/failed spam\n    # e.g. \"Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] succeeded in 0.03137450001668185s: None\"\n    trace.logger.setLevel(logging.WARNING)\n\n\ndef set_task_finished_log_level(logLevel: int) -> None:\n    \"\"\"call this to override the setLevel in on_setup_logging. We are interested\n    in the task timings in the cloud but it can be spammy for self hosted.\"\"\"\n    trace.logger.setLevel(logLevel)\n\n\nclass TenantContextFilter(logging.Filter):\n    \"\"\"Logging filter to inject tenant ID into the logger's name.\"\"\"\n\n    def filter(self, record: logging.LogRecord) -> bool:\n        if not MULTI_TENANT:\n            record.name = \"\"\n            return True\n\n        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()\n        if tenant_id:\n            # Match the 8 character tenant abbreviation used in OnyxLoggingAdapter\n            tenant_id = tenant_id.split(TENANT_ID_PREFIX)[-1][:8]\n            record.name = f\"[t:{tenant_id}]\"\n        else:\n            record.name = \"\"\n        return True\n\n\n@task_postrun.connect\ndef reset_tenant_id(\n    sender: Any | None = None,  # noqa: ARG001\n    task_id: str | None = None,  # noqa: ARG001\n    task: Task | None = None,  # noqa: ARG001\n    args: tuple[Any, ...] | None = None,  # noqa: ARG001\n    kwargs: dict[str, Any] | None = None,  # noqa: ARG001\n    **other_kwargs: Any,  # noqa: ARG001\n) -> None:\n    \"\"\"Signal handler to reset tenant ID in context var after task ends.\"\"\"\n    CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)\n\n\ndef wait_for_vespa_or_shutdown(\n    sender: Any,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Waits for Vespa to become ready subject to a timeout.\n    Raises WorkerShutdown if the timeout is reached.\"\"\"\n\n    if DISABLE_VECTOR_DB:\n        logger.info(\n            \"DISABLE_VECTOR_DB is set — skipping Vespa/OpenSearch readiness check.\"\n        )\n        return\n\n    if not wait_for_vespa_with_timeout():\n        msg = \"[Vespa] Readiness probe did not succeed within the timeout. Exiting...\"\n        logger.error(msg)\n        raise WorkerShutdown(msg)\n\n    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n        if not wait_for_opensearch_with_timeout():\n            msg = \"[OpenSearch] Readiness probe did not succeed within the timeout. Exiting...\"\n            logger.error(msg)\n            raise WorkerShutdown(msg)\n\n\n# File for validating worker liveness\nclass LivenessProbe(bootsteps.StartStopStep):\n    requires = {\"celery.worker.components:Timer\"}\n\n    def __init__(self, worker: Any, **kwargs: Any) -> None:\n        super().__init__(worker, **kwargs)\n        self.requests: list[Any] = []\n        self.task_tref = None\n        self.path = make_probe_path(\"liveness\", worker.hostname)\n\n    def start(self, worker: Any) -> None:\n        self.task_tref = worker.timer.call_repeatedly(\n            15.0,\n            self.update_liveness_file,\n            (worker,),\n            priority=10,\n        )\n\n    def stop(self, worker: Any) -> None:  # noqa: ARG002\n        self.path.unlink(missing_ok=True)\n        if self.task_tref:\n            self.task_tref.cancel()\n\n    def update_liveness_file(self, worker: Any) -> None:  # noqa: ARG002\n        self.path.touch()\n\n\ndef get_bootsteps() -> list[type]:\n    return [LivenessProbe]\n\n\n# Task modules that require a vector DB (Vespa/OpenSearch).\n# When DISABLE_VECTOR_DB is True these are excluded from autodiscover lists.\n_VECTOR_DB_TASK_MODULES: set[str] = {\n    \"onyx.background.celery.tasks.connector_deletion\",\n    \"onyx.background.celery.tasks.docprocessing\",\n    \"onyx.background.celery.tasks.docfetching\",\n    \"onyx.background.celery.tasks.pruning\",\n    \"onyx.background.celery.tasks.vespa\",\n    \"onyx.background.celery.tasks.opensearch_migration\",\n    \"onyx.background.celery.tasks.doc_permission_syncing\",\n    \"onyx.background.celery.tasks.hierarchyfetching\",\n    # EE modules that are vector-DB-dependent\n    \"ee.onyx.background.celery.tasks.doc_permission_syncing\",\n    \"ee.onyx.background.celery.tasks.external_group_syncing\",\n}\n# NOTE: \"onyx.background.celery.tasks.shared\" is intentionally NOT in the set\n# above. It contains celery_beat_heartbeat (which only writes to Redis) alongside\n# document cleanup tasks. The cleanup tasks won't be invoked in minimal mode\n# because the periodic tasks that trigger them are in other filtered modules.\n\n\ndef filter_task_modules(modules: list[str]) -> list[str]:\n    \"\"\"Remove vector-DB-dependent task modules when DISABLE_VECTOR_DB is True.\"\"\"\n    if not DISABLE_VECTOR_DB:\n        return modules\n    return [m for m in modules if m not in _VECTOR_DB_TASK_MODULES]\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/beat.py",
    "content": "from datetime import timedelta\nfrom typing import Any\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery.beat import PersistentScheduler  # type: ignore\nfrom celery.signals import beat_init\nfrom celery.utils.log import get_task_logger\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.background.celery.celery_utils import make_probe_path\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom shared_configs.configs import IGNORED_SYNCING_TENANT_LIST\nfrom shared_configs.configs import MULTI_TENANT\n\ntask_logger = get_task_logger(__name__)\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.beat\")\n\n\nclass DynamicTenantScheduler(PersistentScheduler):\n    \"\"\"This scheduler is useful because we can dynamically adjust task generation rates\n    through it.\"\"\"\n\n    RELOAD_INTERVAL = 60\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        super().__init__(*args, **kwargs)\n\n        self.last_beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n        self._reload_interval = timedelta(\n            seconds=DynamicTenantScheduler.RELOAD_INTERVAL\n        )\n        self._last_reload = self.app.now() - self._reload_interval\n\n        # Let the parent class handle store initialization\n        self.setup_schedule()\n        task_logger.info(\n            f\"DynamicTenantScheduler initialized: reload_interval={self._reload_interval}\"\n        )\n\n        self._liveness_probe_path = make_probe_path(\"liveness\", \"beat@hostname\")\n\n        # do not set the initial schedule here because we don't have db access yet.\n        # do it in beat_init after the db engine is initialized\n\n        # An initial schedule is required ... otherwise, the scheduler will delay\n        # for 5 minutes before calling tick()\n\n    def setup_schedule(self) -> None:\n        super().setup_schedule()\n\n    def tick(self) -> float:\n        retval = super().tick()\n        now = self.app.now()\n        if (\n            self._last_reload is None\n            or (now - self._last_reload) > self._reload_interval\n        ):\n            task_logger.debug(\"Reload interval reached, initiating task update\")\n            self._liveness_probe_path.touch()\n\n            try:\n                self._try_updating_schedule()\n            except (AttributeError, KeyError):\n                task_logger.exception(\"Failed to process task configuration\")\n            except Exception:\n                task_logger.exception(\"Unexpected error updating tasks\")\n\n            self._last_reload = now\n\n        return retval\n\n    def _generate_schedule(\n        self, tenant_ids: list[str] | list[None], beat_multiplier: float\n    ) -> dict[str, dict[str, Any]]:\n        \"\"\"Given a list of tenant id's, generates a new beat schedule for celery.\"\"\"\n        new_schedule: dict[str, dict[str, Any]] = {}\n\n        if MULTI_TENANT:\n            # cloud tasks are system wide and thus only need to be on the beat schedule\n            # once for all tenants\n            get_cloud_tasks_to_schedule = fetch_versioned_implementation(\n                \"onyx.background.celery.tasks.beat_schedule\",\n                \"get_cloud_tasks_to_schedule\",\n            )\n\n            cloud_tasks_to_schedule: list[dict[str, Any]] = get_cloud_tasks_to_schedule(\n                beat_multiplier\n            )\n            for task in cloud_tasks_to_schedule:\n                task_name = task[\"name\"]\n                cloud_task = {\n                    \"task\": task[\"task\"],\n                    \"schedule\": task[\"schedule\"],\n                    \"kwargs\": task.get(\"kwargs\", {}),\n                }\n                if options := task.get(\"options\"):\n                    task_logger.debug(f\"Adding options to task {task_name}: {options}\")\n                    cloud_task[\"options\"] = options\n                new_schedule[task_name] = cloud_task\n\n        # regular task beats are multiplied across all tenants\n        # note that currently this just schedules for a single tenant in self hosted\n        # and doesn't do anything in the cloud because it's much more scalable\n        # to schedule a single cloud beat task to dispatch per tenant tasks.\n        get_tasks_to_schedule = fetch_versioned_implementation(\n            \"onyx.background.celery.tasks.beat_schedule\", \"get_tasks_to_schedule\"\n        )\n\n        tasks_to_schedule: list[dict[str, Any]] = get_tasks_to_schedule()\n\n        for tenant_id in tenant_ids:\n            if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:\n                task_logger.debug(\n                    f\"Skipping tenant {tenant_id} as it is in the ignored syncing list\"\n                )\n                continue\n\n            for task in tasks_to_schedule:\n                task_name = task[\"name\"]\n                tenant_task_name = f\"{task['name']}-{tenant_id}\"\n\n                task_logger.debug(f\"Creating task configuration for {tenant_task_name}\")\n                tenant_task = {\n                    \"task\": task[\"task\"],\n                    \"schedule\": task[\"schedule\"],\n                    \"kwargs\": {\"tenant_id\": tenant_id},\n                }\n                if options := task.get(\"options\"):\n                    task_logger.debug(\n                        f\"Adding options to task {tenant_task_name}: {options}\"\n                    )\n                    tenant_task[\"options\"] = options\n\n                new_schedule[tenant_task_name] = tenant_task\n\n        return new_schedule\n\n    def _try_updating_schedule(self) -> None:\n        \"\"\"Only updates the actual beat schedule on the celery app when it changes\"\"\"\n        do_update = False\n\n        task_logger.debug(\"_try_updating_schedule starting\")\n\n        tenant_ids = get_all_tenant_ids()\n        task_logger.debug(f\"Found {len(tenant_ids)} IDs\")\n\n        # get current schedule and extract current tenants\n        current_schedule = self.schedule.items()\n\n        # get potential new state\n        try:\n            beat_multiplier = OnyxRuntime.get_beat_multiplier()\n        except Exception:\n            beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n        new_schedule = self._generate_schedule(tenant_ids, beat_multiplier)\n\n        # if the schedule or beat multiplier has changed, update\n        while True:\n            if beat_multiplier != self.last_beat_multiplier:\n                do_update = True\n                break\n\n            if not DynamicTenantScheduler._compare_schedules(\n                current_schedule, new_schedule\n            ):\n                do_update = True\n                break\n\n            break\n\n        if not do_update:\n            # exit early if nothing changed\n            task_logger.info(\n                f\"_try_updating_schedule - Schedule unchanged: tasks={len(new_schedule)} beat_multiplier={beat_multiplier}\"\n            )\n            return\n\n        # schedule needs updating\n        task_logger.debug(\n            \"Schedule update required\",\n            extra={\n                \"new_tasks\": len(new_schedule),\n                \"current_tasks\": len(current_schedule),\n            },\n        )\n\n        # Create schedule entries\n        entries = {}\n        for name, entry in new_schedule.items():\n            entries[name] = self.Entry(\n                name=name,\n                app=self.app,\n                task=entry[\"task\"],\n                schedule=entry[\"schedule\"],\n                options=entry.get(\"options\", {}),\n                kwargs=entry.get(\"kwargs\", {}),\n            )\n\n        # Update the schedule using the scheduler's methods\n        self.schedule.clear()\n        self.schedule.update(entries)\n\n        # Ensure changes are persisted\n        self.sync()\n\n        task_logger.info(\n            f\"_try_updating_schedule - Schedule updated: \"\n            f\"prev_num_tasks={len(current_schedule)} \"\n            f\"prev_beat_multiplier={self.last_beat_multiplier} \"\n            f\"tasks={len(new_schedule)} \"\n            f\"beat_multiplier={beat_multiplier}\"\n        )\n\n        self.last_beat_multiplier = beat_multiplier\n\n    @staticmethod\n    def _compare_schedules(schedule1: dict, schedule2: dict) -> bool:\n        \"\"\"Compare schedules by task name only to determine if an update is needed.\n        True if equivalent, False if not.\"\"\"\n        current_tasks = set(name for name, _ in schedule1)\n        new_tasks = set(schedule2.keys())\n        return current_tasks == new_tasks\n\n\n@beat_init.connect\ndef on_beat_init(sender: Any, **kwargs: Any) -> None:\n    task_logger.info(\"beat_init signal received.\")\n\n    # Celery beat shouldn't touch the db at all. But just setting a low minimum here.\n    SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)\n    SqlEngine.init_engine(pool_size=2, max_overflow=0)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    path = make_probe_path(\"readiness\", \"beat@hostname\")\n    path.touch()\n    task_logger.info(f\"Readiness signal touched at {path}.\")\n\n    # first time init of the scheduler after db has been init'ed\n    scheduler: DynamicTenantScheduler = sender.scheduler\n    scheduler._try_updating_schedule()\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\ncelery_app.conf.beat_scheduler = DynamicTenantScheduler\ncelery_app.conf.task_default_base = app_base.TenantAwareTask\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/client.py",
    "content": "from celery import Celery\n\nimport onyx.background.celery.apps.app_base as app_base\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.client\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/docfetching.py",
    "content": "from typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_postrun\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_prerun\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_rejected\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_retry\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_revoked\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun\nfrom onyx.server.metrics.metrics_server import start_metrics_server\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.docfetching\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n    on_celery_task_prerun(task_id, task)\n    on_indexing_task_prerun(task_id, task, kwargs)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n    on_celery_task_postrun(task_id, task, state)\n    on_indexing_task_postrun(task_id, task, kwargs, state)\n\n\n@signals.task_retry.connect\ndef on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001\n    # task_retry signal doesn't pass task_id in kwargs; get it from\n    # the sender (the task instance) via sender.request.id.\n    task_id = getattr(getattr(sender, \"request\", None), \"id\", None)\n    on_celery_task_retry(task_id, sender)\n\n\n@signals.task_revoked.connect\ndef on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:\n    task_name = getattr(sender, \"name\", None) or str(sender)\n    on_celery_task_revoked(kwargs.get(\"task_id\"), task_name)\n\n\n@signals.task_rejected.connect\ndef on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001\n    # task_rejected sends the Consumer as sender, not the task instance.\n    # The task name must be extracted from the Celery message headers.\n    message = kwargs.get(\"message\")\n    task_name: str | None = None\n    if message is not None:\n        headers = getattr(message, \"headers\", None) or {}\n        task_name = headers.get(\"task\")\n    if task_name is None:\n        task_name = \"unknown\"\n    on_celery_task_rejected(None, task_name)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    logger.info(\"worker_init signal received.\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME)\n    pool_size = cast(int, sender.concurrency)  # type: ignore\n    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    start_metrics_server(\"docfetching\")\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.docfetching\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/docprocessing.py",
    "content": "from typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_process_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_postrun\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_prerun\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_rejected\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_retry\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_revoked\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun\nfrom onyx.server.metrics.metrics_server import start_metrics_server\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.docprocessing\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n    on_celery_task_prerun(task_id, task)\n    on_indexing_task_prerun(task_id, task, kwargs)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n    on_celery_task_postrun(task_id, task, state)\n    on_indexing_task_postrun(task_id, task, kwargs, state)\n\n\n@signals.task_retry.connect\ndef on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001\n    # task_retry signal doesn't pass task_id in kwargs; get it from\n    # the sender (the task instance) via sender.request.id.\n    task_id = getattr(getattr(sender, \"request\", None), \"id\", None)\n    on_celery_task_retry(task_id, sender)\n\n\n@signals.task_revoked.connect\ndef on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:\n    task_name = getattr(sender, \"name\", None) or str(sender)\n    on_celery_task_revoked(kwargs.get(\"task_id\"), task_name)\n\n\n@signals.task_rejected.connect\ndef on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001\n    # task_rejected sends the Consumer as sender, not the task instance.\n    # The task name must be extracted from the Celery message headers.\n    message = kwargs.get(\"message\")\n    task_name: str | None = None\n    if message is not None:\n        headers = getattr(message, \"headers\", None) or {}\n        task_name = headers.get(\"task\")\n    if task_name is None:\n        task_name = \"unknown\"\n    on_celery_task_rejected(None, task_name)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    logger.info(\"worker_init signal received.\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME)\n\n    # rkuo: Transient errors keep happening in the indexing watchdog threads.\n    # \"SSL connection has been closed unexpectedly\"\n    # actually setting the spawn method in the cloud fixes 95% of these.\n    # setting pre ping might help even more, but not worrying about that yet\n    pool_size = cast(int, sender.concurrency)  # type: ignore\n    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    start_metrics_server(\"docprocessing\")\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n# Note: worker_process_init only fires in prefork pool mode. Docprocessing uses\n# worker_pool=\"threads\" (see configs/docprocessing.py), so this handler is\n# effectively a no-op in normal operation. It remains as a safety net in case\n# the pool type is ever changed to prefork. Prometheus metrics are safe in\n# thread-pool mode since all threads share the same process memory and can\n# update the same Counter/Gauge/Histogram objects directly.\n@worker_process_init.connect\ndef init_worker(**kwargs: Any) -> None:  # noqa: ARG001\n    SqlEngine.reset_engine()\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.docprocessing\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/heavy.py",
    "content": "from typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.heavy\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    logger.info(\"worker_init signal received.\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)\n    pool_size = cast(int, sender.concurrency)  # type: ignore\n    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.pruning\",\n            # Sandbox tasks (file sync, cleanup)\n            \"onyx.server.features.build.sandbox.tasks\",\n            \"onyx.background.celery.tasks.hierarchyfetching\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/light.py",
    "content": "from typing import Any\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.background.celery.celery_utils import httpx_init_vespa_pool\nfrom onyx.configs.app_configs import MANAGED_VESPA\nfrom onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH\nfrom onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.light\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    EXTRA_CONCURRENCY = 8  # small extra fudge factor for connection limits\n\n    logger.info(\"worker_init signal received.\")\n\n    logger.info(f\"Concurrency: {sender.concurrency}\")  # type: ignore\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)\n    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=EXTRA_CONCURRENCY)  # type: ignore\n\n    if MANAGED_VESPA:\n        httpx_init_vespa_pool(\n            sender.concurrency + EXTRA_CONCURRENCY,  # type: ignore\n            ssl_cert=VESPA_CLOUD_CERT_PATH,\n            ssl_key=VESPA_CLOUD_KEY_PATH,\n        )\n    else:\n        httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY)  # type: ignore\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.shared\",\n            \"onyx.background.celery.tasks.vespa\",\n            \"onyx.background.celery.tasks.connector_deletion\",\n            \"onyx.background.celery.tasks.doc_permission_syncing\",\n            \"onyx.background.celery.tasks.docprocessing\",\n            \"onyx.background.celery.tasks.opensearch_migration\",\n            # Sandbox cleanup tasks (isolated in build feature)\n            \"onyx.server.features.build.sandbox.tasks\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/monitoring.py",
    "content": "import multiprocessing\nfrom typing import Any\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_MONITORING_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.monitoring\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n# Set by on_worker_init so on_worker_ready knows whether to start the server.\n_prometheus_collectors_ok: bool = False\n\n\n@worker_init.connect\ndef on_worker_init(sender: Any, **kwargs: Any) -> None:\n    global _prometheus_collectors_ok\n\n    logger.info(\"worker_init signal received.\")\n    logger.info(f\"Multiprocessing start method: {multiprocessing.get_start_method()}\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_MONITORING_APP_NAME)\n    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=3)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n\n    _prometheus_collectors_ok = _setup_prometheus_collectors(sender)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\ndef _setup_prometheus_collectors(sender: Any) -> bool:\n    \"\"\"Register Prometheus collectors that need Redis/DB access.\n\n    Passes the Celery app so the queue depth collector can obtain a fresh\n    broker Redis client on each scrape (rather than holding a stale reference).\n\n    Returns True if registration succeeded, False otherwise.\n    \"\"\"\n    try:\n        from onyx.server.metrics.indexing_pipeline_setup import (\n            setup_indexing_pipeline_metrics,\n        )\n\n        setup_indexing_pipeline_metrics(sender.app)\n        logger.info(\"Prometheus indexing pipeline collectors registered\")\n        return True\n    except Exception:\n        logger.exception(\"Failed to register Prometheus indexing pipeline collectors\")\n        return False\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    if _prometheus_collectors_ok:\n        from onyx.server.metrics.metrics_server import start_metrics_server\n\n        start_metrics_server(\"monitoring\")\n    else:\n        logger.warning(\n            \"Skipping Prometheus metrics server — collector registration failed\"\n        )\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.monitoring\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/primary.py",
    "content": "import logging\nimport os\nfrom typing import Any\nfrom typing import cast\n\nfrom celery import bootsteps  # type: ignore\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.exceptions import WorkerShutdown\nfrom celery.result import AsyncResult\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\nfrom redis.lock import Lock as RedisLock\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_utils import celery_is_worker_primary\nfrom onyx.background.celery.tasks.vespa.document_sync import reset_document_sync\nfrom onyx.configs.app_configs import CELERY_WORKER_PRIMARY_POOL_OVERFLOW\nfrom onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.index_attempt import mark_attempt_canceled\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.redis.redis_connector_delete import RedisConnectorDelete\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync\nfrom onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrune\nfrom onyx.redis.redis_connector_stop import RedisConnectorStop\nfrom onyx.redis.redis_document_set import RedisDocumentSet\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_usergroup import RedisUserGroup\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.primary\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    logger.info(\"worker_init signal received.\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)\n    pool_size = cast(int, sender.concurrency)  # type: ignore\n    SqlEngine.init_engine(\n        pool_size=pool_size, max_overflow=CELERY_WORKER_PRIMARY_POOL_OVERFLOW\n    )\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    logger.info(f\"Running as the primary celery worker: pid={os.getpid()}\")\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    # This is singleton work that should be done on startup exactly once\n    # by the primary worker. This is unnecessary in the multi tenant scenario\n    r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)\n\n    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic\n    replication_info: dict[str, Any] = cast(dict, r.info(\"replication\"))\n    role: str = cast(str, replication_info.get(\"role\", \"\"))\n    connected_slaves: int = replication_info.get(\"connected_slaves\", 0)\n\n    logger.info(\n        f\"Redis INFO REPLICATION: role={role} connected_slaves={connected_slaves}\"\n    )\n\n    memory_info: dict[str, Any] = cast(dict, r.info(\"memory\"))\n    maxmemory_policy: str = cast(str, memory_info.get(\"maxmemory_policy\", \"\"))\n\n    logger.info(f\"Redis INFO MEMORY: maxmemory_policy={maxmemory_policy}\")\n\n    # For the moment, we're assuming that we are the only primary worker\n    # that should be running.\n    # TODO: maybe check for or clean up another zombie primary worker if we detect it\n    r.delete(OnyxRedisLocks.PRIMARY_WORKER)\n\n    # this process wide lock is taken to help other workers start up in order.\n    # it is planned to use this lock to enforce singleton behavior on the primary\n    # worker, since the primary worker does redis cleanup on startup, but this isn't\n    # implemented yet.\n\n    # set thread_local=False since we don't control what thread the periodic task might\n    # reacquire the lock with\n    lock: RedisLock = r.lock(\n        OnyxRedisLocks.PRIMARY_WORKER,\n        timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,\n        thread_local=False,\n    )\n\n    logger.info(\"Primary worker lock: Acquire starting.\")\n    acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2)\n    if acquired:\n        logger.info(\"Primary worker lock: Acquire succeeded.\")\n    else:\n        logger.error(\"Primary worker lock: Acquire failed!\")\n        raise WorkerShutdown(\"Primary worker lock could not be acquired!\")\n\n    # tacking on our own user data to the sender\n    sender.primary_worker_lock = lock  # type: ignore\n\n    # As currently designed, when this worker starts as \"primary\", we reinitialize redis\n    # to a clean state (for our purposes, anyway)\n    r.delete(OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)\n\n    r.delete(OnyxRedisConstants.ACTIVE_FENCES)\n\n    # NOTE: we want to remove the `Redis*` classes, prefer to just have functions\n    # This is the preferred way to do this going forward\n    reset_document_sync(r)\n\n    RedisDocumentSet.reset_all(r)\n    RedisUserGroup.reset_all(r)\n    RedisConnectorDelete.reset_all(r)\n    RedisConnectorPrune.reset_all(r)\n    RedisConnectorStop.reset_all(r)\n    RedisConnectorPermissionSync.reset_all(r)\n    RedisConnectorExternalGroupSync.reset_all(r)\n\n    # mark orphaned index attempts as failed\n    # This uses database coordination instead of Redis fencing\n    with get_session_with_current_tenant() as db_session:\n        # Get potentially orphaned attempts (those with active status and task IDs)\n        potentially_orphaned_ids = IndexingCoordination.get_orphaned_index_attempt_ids(\n            db_session\n        )\n\n        for attempt_id in potentially_orphaned_ids:\n            attempt = get_index_attempt(db_session, attempt_id)\n\n            # handle case where not started or docfetching is done but indexing is not\n            if (\n                not attempt\n                or not attempt.celery_task_id\n                or attempt.total_batches is not None\n            ):\n                continue\n\n            # Check if the Celery task actually exists\n            try:\n                result: AsyncResult = AsyncResult(attempt.celery_task_id)\n\n                # If the task is not in PENDING state, it exists in Celery\n                if result.state != \"PENDING\":\n                    continue\n\n                # Task is orphaned - mark as failed\n                failure_reason = (\n                    f\"Orphaned index attempt found on startup - Celery task not found: \"\n                    f\"index_attempt={attempt.id} \"\n                    f\"cc_pair={attempt.connector_credential_pair_id} \"\n                    f\"search_settings={attempt.search_settings_id} \"\n                    f\"celery_task_id={attempt.celery_task_id}\"\n                )\n                logger.warning(failure_reason)\n                mark_attempt_canceled(attempt.id, db_session, failure_reason)\n\n            except Exception:\n                # If we can't check the task status, be conservative and continue\n                logger.warning(\n                    f\"Could not verify Celery task status on startup for attempt {attempt.id}, task_id={attempt.celery_task_id}\"\n                )\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n    # this can be spammy, so just enable it in the cloud for now\n    if MULTI_TENANT:\n        app_base.set_task_finished_log_level(logging.INFO)\n\n\nclass HubPeriodicTask(bootsteps.StartStopStep):\n    \"\"\"Regularly reacquires the primary worker lock outside of the task queue.\n    Use the task_logger in this class to avoid double logging.\n\n    This cannot be done inside a regular beat task because it must run on schedule and\n    a queue of existing work would starve the task from running.\n    \"\"\"\n\n    # it's unclear to me whether using the hub's timer or the bootstep timer is better\n    requires = {\"celery.worker.components:Hub\"}\n\n    def __init__(self, worker: Any, **kwargs: Any) -> None:  # noqa: ARG002\n        self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8  # Interval in seconds\n        self.task_tref = None\n\n    def start(self, worker: Any) -> None:\n        if not celery_is_worker_primary(worker):\n            return\n\n        # Access the worker's event loop (hub)\n        hub = worker.consumer.controller.hub\n\n        # Schedule the periodic task\n        self.task_tref = hub.call_repeatedly(\n            self.interval, self.run_periodic_task, worker\n        )\n        task_logger.info(\"Scheduled periodic task with hub.\")\n\n    def run_periodic_task(self, worker: Any) -> None:\n        try:\n            if not celery_is_worker_primary(worker):\n                return\n\n            if not hasattr(worker, \"primary_worker_lock\"):\n                return\n\n            lock: RedisLock = worker.primary_worker_lock\n\n            r = get_redis_client(tenant_id=POSTGRES_DEFAULT_SCHEMA)\n\n            if lock.owned():\n                task_logger.debug(\"Reacquiring primary worker lock.\")\n                lock.reacquire()\n            else:\n                task_logger.warning(\n                    \"Full acquisition of primary worker lock. Reasons could be worker restart or lock expiration.\"\n                )\n                lock = r.lock(\n                    OnyxRedisLocks.PRIMARY_WORKER,\n                    timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,\n                )\n\n                task_logger.info(\"Primary worker lock: Acquire starting.\")\n                acquired = lock.acquire(\n                    blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2\n                )\n                if acquired:\n                    task_logger.info(\"Primary worker lock: Acquire succeeded.\")\n                    worker.primary_worker_lock = lock\n                else:\n                    task_logger.error(\"Primary worker lock: Acquire failed!\")\n                    raise TimeoutError(\"Primary worker lock could not be acquired!\")\n\n        except Exception:\n            task_logger.exception(\"Periodic task failed.\")\n\n    def stop(self, worker: Any) -> None:  # noqa: ARG002\n        # Cancel the scheduled task when the worker stops\n        if self.task_tref:\n            self.task_tref.cancel()\n            task_logger.info(\"Canceled periodic task with hub.\")\n\n\ncelery_app.steps[\"worker\"].add(HubPeriodicTask)\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.connector_deletion\",\n            \"onyx.background.celery.tasks.docprocessing\",\n            \"onyx.background.celery.tasks.evals\",\n            \"onyx.background.celery.tasks.hierarchyfetching\",\n            \"onyx.background.celery.tasks.periodic\",\n            \"onyx.background.celery.tasks.pruning\",\n            \"onyx.background.celery.tasks.shared\",\n            \"onyx.background.celery.tasks.vespa\",\n            \"onyx.background.celery.tasks.llm_model_update\",\n            \"onyx.background.celery.tasks.user_file_processing\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/task_formatters.py",
    "content": "import logging\n\nfrom celery import current_task\n\nfrom onyx.utils.logger import ColoredFormatter\nfrom onyx.utils.logger import PlainFormatter\n\n\nclass CeleryTaskPlainFormatter(PlainFormatter):\n    def format(self, record: logging.LogRecord) -> str:\n        task = current_task\n        if task and task.request:\n            record.__dict__.update(task_id=task.request.id, task_name=task.name)\n            record.msg = f\"[{task.name}({task.request.id})] {record.msg}\"\n\n        return super().format(record)\n\n\nclass CeleryTaskColoredFormatter(ColoredFormatter):\n    def format(self, record: logging.LogRecord) -> str:\n        task = current_task\n        if task and task.request:\n            record.__dict__.update(task_id=task.request.id, task_name=task.name)\n            record.msg = f\"[{task.name}({task.request.id})] {record.msg}\"\n\n        return super().format(record)\n"
  },
  {
    "path": "backend/onyx/background/celery/apps/user_file_processing.py",
    "content": "from typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom celery import signals\nfrom celery import Task\nfrom celery.apps.worker import Worker\nfrom celery.signals import celeryd_init\nfrom celery.signals import worker_init\nfrom celery.signals import worker_process_init\nfrom celery.signals import worker_ready\nfrom celery.signals import worker_shutdown\n\nimport onyx.background.celery.apps.app_base as app_base\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\ncelery_app = Celery(__name__)\ncelery_app.config_from_object(\"onyx.background.celery.configs.user_file_processing\")\ncelery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]\n\n\n@signals.task_prerun.connect\ndef on_task_prerun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)\n\n\n@signals.task_postrun.connect\ndef on_task_postrun(\n    sender: Any | None = None,\n    task_id: str | None = None,\n    task: Task | None = None,\n    args: tuple | None = None,\n    kwargs: dict | None = None,\n    retval: Any | None = None,\n    state: str | None = None,\n    **kwds: Any,\n) -> None:\n    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)\n\n\n@celeryd_init.connect\ndef on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:\n    app_base.on_celeryd_init(sender, conf, **kwargs)\n\n\n@worker_init.connect\ndef on_worker_init(sender: Worker, **kwargs: Any) -> None:\n    logger.info(\"worker_init signal received.\")\n\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME)\n\n    # rkuo: Transient errors keep happening in the indexing watchdog threads.\n    # \"SSL connection has been closed unexpectedly\"\n    # actually setting the spawn method in the cloud fixes 95% of these.\n    # setting pre ping might help even more, but not worrying about that yet\n    pool_size = cast(int, sender.concurrency)  # type: ignore\n    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)\n\n    app_base.wait_for_redis(sender, **kwargs)\n    app_base.wait_for_db(sender, **kwargs)\n    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)\n\n    # Less startup checks in multi-tenant case\n    if MULTI_TENANT:\n        return\n\n    app_base.on_secondary_worker_init(sender, **kwargs)\n\n\n@worker_ready.connect\ndef on_worker_ready(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_ready(sender, **kwargs)\n\n\n@worker_shutdown.connect\ndef on_worker_shutdown(sender: Any, **kwargs: Any) -> None:\n    app_base.on_worker_shutdown(sender, **kwargs)\n\n\n@worker_process_init.connect\ndef init_worker(**kwargs: Any) -> None:  # noqa: ARG001\n    SqlEngine.reset_engine()\n\n\n@signals.setup_logging.connect\ndef on_setup_logging(\n    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any\n) -> None:\n    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)\n\n\nbase_bootsteps = app_base.get_bootsteps()\nfor bootstep in base_bootsteps:\n    celery_app.steps[\"worker\"].add(bootstep)\n\ncelery_app.autodiscover_tasks(\n    app_base.filter_task_modules(\n        [\n            \"onyx.background.celery.tasks.user_file_processing\",\n        ]\n    )\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/celery_k8s_probe.py",
    "content": "# script to use as a kubernetes readiness / liveness probe\n\nimport argparse\nimport sys\nimport time\nfrom pathlib import Path\n\n\ndef main_readiness(filename: str) -> int:\n    \"\"\"Checks if the file exists.\"\"\"\n    path = Path(filename)\n    if not path.is_file():\n        return 1\n\n    return 0\n\n\ndef main_liveness(filename: str) -> int:\n    \"\"\"Checks if the file exists AND was recently modified.\"\"\"\n    path = Path(filename)\n    if not path.is_file():\n        return 1\n\n    stats = path.stat()\n    liveness_timestamp = stats.st_mtime\n    current_timestamp = time.time()\n    time_diff = current_timestamp - liveness_timestamp\n    if time_diff > 60:\n        return 1\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    exit_code: int\n\n    parser = argparse.ArgumentParser(description=\"k8s readiness/liveness probe\")\n    parser.add_argument(\n        \"--probe\",\n        type=str,\n        choices=[\"readiness\", \"liveness\"],\n        help=\"The type of probe\",\n        required=True,\n    )\n    parser.add_argument(\"--filename\", help=\"The filename to watch\", required=True)\n    args = parser.parse_args()\n\n    if args.probe == \"readiness\":\n        exit_code = main_readiness(args.filename)\n    elif args.probe == \"liveness\":\n        exit_code = main_liveness(args.filename)\n    else:\n        raise ValueError(f\"Unknown probe type: {args.probe}\")\n\n    sys.exit(exit_code)\n"
  },
  {
    "path": "backend/onyx/background/celery/celery_redis.py",
    "content": "# These are helper objects for tracking the keys we need to write in redis\nimport json\nimport threading\nfrom typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom redis import Redis\n\nfrom onyx.background.celery.configs.base import CELERY_SEPARATOR\nfrom onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS\n\n\n_broker_client: Redis | None = None\n_broker_url: str | None = None\n_broker_client_lock = threading.Lock()\n\n\ndef celery_get_broker_client(app: Celery) -> Redis:\n    \"\"\"Return a shared Redis client connected to the Celery broker DB.\n\n    Uses a module-level singleton so all tasks on a worker share one\n    connection instead of creating a new one per call. The client\n    connects directly to the broker Redis DB (parsed from the broker URL).\n\n    Thread-safe via lock — safe for use in Celery thread-pool workers.\n\n    Usage:\n        r_celery = celery_get_broker_client(self.app)\n        length = celery_get_queue_length(queue, r_celery)\n    \"\"\"\n    global _broker_client, _broker_url\n    with _broker_client_lock:\n        url = app.conf.broker_url\n        if _broker_client is not None and _broker_url == url:\n            try:\n                _broker_client.ping()\n                return _broker_client\n            except Exception:\n                try:\n                    _broker_client.close()\n                except Exception:\n                    pass\n                _broker_client = None\n        elif _broker_client is not None:\n            try:\n                _broker_client.close()\n            except Exception:\n                pass\n            _broker_client = None\n\n        _broker_url = url\n        _broker_client = Redis.from_url(\n            url,\n            decode_responses=False,\n            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,\n            socket_keepalive=True,\n            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,\n            retry_on_timeout=True,\n        )\n        return _broker_client\n\n\ndef celery_get_unacked_length(r: Redis) -> int:\n    \"\"\"Checking the unacked queue is useful because a non-zero length tells us there\n    may be prefetched tasks.\n\n    There can be other tasks in here besides indexing tasks, so this is mostly useful\n    just to see if the task count is non zero.\n\n    ref: https://blog.hikaru.run/2022/08/29/get-waiting-tasks-count-in-celery.html\n    \"\"\"\n    length = cast(int, r.hlen(\"unacked\"))\n    return length\n\n\ndef celery_get_unacked_task_ids(queue: str, r: Redis) -> set[str]:\n    \"\"\"Gets the set of task id's matching the given queue in the unacked hash.\n\n    Unacked entries belonging to the indexing queues are \"prefetched\", so this gives\n    us crucial visibility as to what tasks are in that state.\n    \"\"\"\n    tasks: set[str] = set()\n\n    for _, v in r.hscan_iter(\"unacked\"):\n        v_bytes = cast(bytes, v)\n        v_str = v_bytes.decode(\"utf-8\")\n        task = json.loads(v_str)\n\n        task_description = task[0]\n        task_queue = task[2]\n\n        if task_queue != queue:\n            continue\n\n        task_id = task_description.get(\"headers\", {}).get(\"id\")\n        if not task_id:\n            continue\n\n        # if the queue matches and we see the task_id, add it\n        tasks.add(task_id)\n    return tasks\n\n\ndef celery_get_queue_length(queue: str, r: Redis) -> int:\n    \"\"\"This is a redis specific way to get the length of a celery queue.\n    It is priority aware and knows how to count across the multiple redis lists\n    used to implement task prioritization.\n    This operation is not atomic.\"\"\"\n    total_length = 0\n    for i in range(len(OnyxCeleryPriority)):\n        queue_name = queue\n        if i > 0:\n            queue_name += CELERY_SEPARATOR\n            queue_name += str(i)\n\n        length = r.llen(queue_name)\n        total_length += cast(int, length)\n\n    return total_length\n\n\ndef celery_find_task(task_id: str, queue: str, r: Redis) -> int:\n    \"\"\"This is a redis specific way to find a task for a particular queue in redis.\n    It is priority aware and knows how to look through the multiple redis lists\n    used to implement task prioritization.\n    This operation is not atomic.\n\n    This is a linear search O(n) ... so be careful using it when the task queues can be larger.\n\n    Returns true if the id is in the queue, False if not.\n    \"\"\"\n    for priority in range(len(OnyxCeleryPriority)):\n        queue_name = f\"{queue}{CELERY_SEPARATOR}{priority}\" if priority > 0 else queue\n\n        tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))\n        for task in tasks:\n            task_dict: dict[str, Any] = json.loads(task.decode(\"utf-8\"))\n            if task_dict.get(\"headers\", {}).get(\"id\") == task_id:\n                return True\n\n    return False\n\n\ndef celery_get_queued_task_ids(queue: str, r: Redis) -> set[str]:\n    \"\"\"This is a redis specific way to build a list of tasks in a queue and return them\n    as a set.\n\n    This helps us read the queue once and then efficiently look for missing tasks\n    in the queue.\n    \"\"\"\n\n    task_set: set[str] = set()\n\n    for priority in range(len(OnyxCeleryPriority)):\n        queue_name = f\"{queue}{CELERY_SEPARATOR}{priority}\" if priority > 0 else queue\n\n        tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))\n        for task in tasks:\n            task_dict: dict[str, Any] = json.loads(task.decode(\"utf-8\"))\n            task_id = task_dict.get(\"headers\", {}).get(\"id\")\n            if task_id:\n                task_set.add(task_id)\n\n    return task_set\n\n\ndef celery_inspect_get_workers(name_filter: str | None, app: Celery) -> list[str]:\n    \"\"\"Returns a list of current workers containing name_filter, or all workers if\n    name_filter is None.\n\n    We've empirically discovered that the celery inspect API is potentially unstable\n    and may hang or return empty results when celery is under load. Suggest using this\n    more to debug and troubleshoot than in production code.\n    \"\"\"\n    worker_names: list[str] = []\n\n    # filter for and create an indexing specific inspect object\n    inspect = app.control.inspect()\n    workers: dict[str, Any] = inspect.ping()  # type: ignore\n    if workers:\n        for worker_name in list(workers.keys()):\n            # if the name filter not set, return all worker names\n            if not name_filter:\n                worker_names.append(worker_name)\n                continue\n\n            # if the name filter is set, return only worker names that contain the name filter\n            if name_filter not in worker_name:\n                continue\n\n            worker_names.append(worker_name)\n\n    return worker_names\n\n\ndef celery_inspect_get_reserved(worker_names: list[str], app: Celery) -> set[str]:\n    \"\"\"Returns a list of reserved tasks on the specified workers.\n\n    We've empirically discovered that the celery inspect API is potentially unstable\n    and may hang or return empty results when celery is under load. Suggest using this\n    more to debug and troubleshoot than in production code.\n    \"\"\"\n    reserved_task_ids: set[str] = set()\n\n    inspect = app.control.inspect(destination=worker_names)\n\n    # get the list of reserved tasks\n    reserved_tasks: dict[str, list] | None = inspect.reserved()  # type: ignore\n    if reserved_tasks:\n        for _, task_list in reserved_tasks.items():\n            for task in task_list:\n                reserved_task_ids.add(task[\"id\"])\n\n    return reserved_task_ids\n\n\ndef celery_inspect_get_active(worker_names: list[str], app: Celery) -> set[str]:\n    \"\"\"Returns a list of active tasks on the specified workers.\n\n    We've empirically discovered that the celery inspect API is potentially unstable\n    and may hang or return empty results when celery is under load. Suggest using this\n    more to debug and troubleshoot than in production code.\n    \"\"\"\n    active_task_ids: set[str] = set()\n\n    inspect = app.control.inspect(destination=worker_names)\n\n    # get the list of reserved tasks\n    active_tasks: dict[str, list] | None = inspect.active()  # type: ignore\n    if active_tasks:\n        for _, task_list in active_tasks.items():\n            for task in task_list:\n                active_task_ids.add(task[\"id\"])\n\n    return active_task_ids\n"
  },
  {
    "path": "backend/onyx/background/celery/celery_utils.py",
    "content": "from collections.abc import Generator\nfrom collections.abc import Iterator\nfrom collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nimport httpx\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE\nfrom onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT\nfrom onyx.connectors.connector_runner import CheckpointOutputWrapper\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import ConnectorCheckpoint\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SlimConnector\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nCT = TypeVar(\"CT\", bound=ConnectorCheckpoint)\n\n\nclass SlimConnectorExtractionResult(BaseModel):\n    \"\"\"Result of extracting document IDs and hierarchy nodes from a connector.\n\n    raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).\n    Use raw_id_to_parent.keys() wherever the old set of IDs was needed.\n    \"\"\"\n\n    raw_id_to_parent: dict[str, str | None]\n    hierarchy_nodes: list[HierarchyNode]\n\n\ndef _checkpointed_batched_items(\n    connector: CheckpointedConnector[CT],\n    start: float,\n    end: float,\n) -> Generator[list[Document | HierarchyNode | ConnectorFailure], None, None]:\n    \"\"\"Loop through all checkpoint steps and yield batched items.\n\n    Some checkpointed connectors (e.g. IMAP) are multi-step: the first\n    checkpoint call may only initialize internal state without yielding\n    any documents. This function loops until checkpoint.has_more is False\n    to ensure all items are collected across every step.\n    \"\"\"\n    checkpoint = connector.build_dummy_checkpoint()\n    while True:\n        checkpoint_output = connector.load_from_checkpoint(\n            start=start, end=end, checkpoint=checkpoint\n        )\n        wrapper: CheckpointOutputWrapper[CT] = CheckpointOutputWrapper()\n        batch: list[Document | HierarchyNode | ConnectorFailure] = []\n        for document, hierarchy_node, failure, next_checkpoint in wrapper(\n            checkpoint_output\n        ):\n            if document is not None:\n                batch.append(document)\n            elif hierarchy_node is not None:\n                batch.append(hierarchy_node)\n            elif failure is not None:\n                batch.append(failure)\n\n            if next_checkpoint is not None:\n                checkpoint = next_checkpoint\n\n        if batch:\n            yield batch\n\n        if not checkpoint.has_more:\n            break\n\n\ndef _get_failure_id(failure: ConnectorFailure) -> str | None:\n    \"\"\"Extract the document/entity ID from a ConnectorFailure.\"\"\"\n    if failure.failed_document:\n        return failure.failed_document.document_id\n    if failure.failed_entity:\n        return failure.failed_entity.entity_id\n    return None\n\n\nclass BatchResult(BaseModel):\n    raw_id_to_parent: dict[str, str | None]\n    hierarchy_nodes: list[HierarchyNode]\n\n\ndef _extract_from_batch(\n    doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],\n) -> BatchResult:\n    \"\"\"Separate a batch into document IDs (with parent mapping) and hierarchy nodes.\n\n    ConnectorFailure items have their failed document/entity IDs added to the\n    ID dict so that failed-to-retrieve documents are not accidentally pruned.\n    \"\"\"\n    ids: dict[str, str | None] = {}\n    hierarchy_nodes: list[HierarchyNode] = []\n    for item in doc_list:\n        if isinstance(item, HierarchyNode):\n            hierarchy_nodes.append(item)\n        elif isinstance(item, ConnectorFailure):\n            failed_id = _get_failure_id(item)\n            if failed_id:\n                ids[failed_id] = None\n            logger.warning(\n                f\"Failed to retrieve document {failed_id}: {item.failure_message}\"\n            )\n        else:\n            ids[item.id] = item.parent_hierarchy_raw_node_id\n    return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)\n\n\ndef extract_ids_from_runnable_connector(\n    runnable_connector: BaseConnector,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> SlimConnectorExtractionResult:\n    \"\"\"\n    Extract document IDs and hierarchy nodes from a runnable connector.\n\n    Hierarchy nodes yielded alongside documents/slim docs are collected and\n    returned in the result. ConnectorFailure items have their IDs preserved\n    so that failed-to-retrieve documents are not accidentally pruned.\n\n    Optionally, a callback can be passed to handle the length of each document batch.\n    \"\"\"\n    all_raw_id_to_parent: dict[str, str | None] = {}\n    all_hierarchy_nodes: list[HierarchyNode] = []\n\n    # Sequence (covariant) lets all the specific list[...] iterator types unify here\n    raw_batch_generator: (\n        Iterator[Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure]]\n        | None\n    ) = None\n\n    if isinstance(runnable_connector, SlimConnector):\n        raw_batch_generator = runnable_connector.retrieve_all_slim_docs()\n    elif isinstance(runnable_connector, SlimConnectorWithPermSync):\n        raw_batch_generator = runnable_connector.retrieve_all_slim_docs_perm_sync()\n    # If the connector isn't slim, fall back to running it normally to get ids\n    elif isinstance(runnable_connector, LoadConnector):\n        raw_batch_generator = runnable_connector.load_from_state()\n    elif isinstance(runnable_connector, PollConnector):\n        start = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp()\n        end = datetime.now(timezone.utc).timestamp()\n        raw_batch_generator = runnable_connector.poll_source(start=start, end=end)\n    elif isinstance(runnable_connector, CheckpointedConnector):\n        start = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp()\n        end = datetime.now(timezone.utc).timestamp()\n        raw_batch_generator = _checkpointed_batched_items(\n            runnable_connector, start, end\n        )\n    else:\n        raise RuntimeError(\"Pruning job could not find a valid runnable_connector.\")\n\n    # this function is called per batch for rate limiting\n    doc_batch_processing_func = (\n        rate_limit_builder(\n            max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60\n        )(lambda x: x)\n        if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE\n        else lambda x: x\n    )\n\n    # process raw batches to extract both IDs and hierarchy nodes\n    for doc_list in raw_batch_generator:\n        if callback and callback.should_stop():\n            raise RuntimeError(\n                \"extract_ids_from_runnable_connector: Stop signal detected\"\n            )\n\n        batch_result = _extract_from_batch(doc_list)\n        batch_ids = batch_result.raw_id_to_parent\n        batch_nodes = batch_result.hierarchy_nodes\n        doc_batch_processing_func(batch_ids)\n        all_raw_id_to_parent.update(batch_ids)\n        all_hierarchy_nodes.extend(batch_nodes)\n\n        if callback:\n            callback.progress(\"extract_ids_from_runnable_connector\", len(batch_ids))\n\n    return SlimConnectorExtractionResult(\n        raw_id_to_parent=all_raw_id_to_parent,\n        hierarchy_nodes=all_hierarchy_nodes,\n    )\n\n\ndef celery_is_listening_to_queue(worker: Any, name: str) -> bool:\n    \"\"\"Checks to see if we're listening to the named queue\"\"\"\n\n    # how to get a list of queues this worker is listening to\n    # https://stackoverflow.com/questions/29790523/how-to-determine-which-queues-a-celery-worker-is-consuming-at-runtime\n    queue_names = list(worker.app.amqp.queues.consume_from.keys())\n    for queue_name in queue_names:\n        if queue_name == name:\n            return True\n\n    return False\n\n\ndef celery_is_worker_primary(worker: Any) -> bool:\n    \"\"\"There are multiple approaches that could be taken to determine if a celery worker\n    is 'primary', as defined by us. But the way we do it is to check the hostname set\n    for the celery worker, which can be done on the\n    command line with '--hostname'.\"\"\"\n    hostname = worker.hostname\n    if hostname.startswith(\"primary\"):\n        return True\n\n    return False\n\n\ndef httpx_init_vespa_pool(\n    max_keepalive_connections: int,\n    timeout: int = VESPA_REQUEST_TIMEOUT,\n    ssl_cert: str | None = None,\n    ssl_key: str | None = None,\n) -> None:\n    httpx_cert = None\n    httpx_verify = False\n    if ssl_cert and ssl_key:\n        httpx_cert = cast(tuple[str, str], (ssl_cert, ssl_key))\n        httpx_verify = True\n\n    HttpxPool.init_client(\n        name=\"vespa\",\n        cert=httpx_cert,\n        verify=httpx_verify,\n        timeout=timeout,\n        http2=False,\n        limits=httpx.Limits(max_keepalive_connections=max_keepalive_connections),\n    )\n\n\ndef make_probe_path(probe: str, hostname: str) -> Path:\n    \"\"\"templates the path for a k8s probe file.\n\n    e.g. /tmp/onyx_k8s_indexing_readiness.txt\n    \"\"\"\n    hostname_parts = hostname.split(\"@\")\n    if len(hostname_parts) != 2:\n        raise ValueError(f\"hostname could not be split! {hostname=}\")\n\n    name = hostname_parts[0]\n    if not name:\n        raise ValueError(f\"name cannot be empty! {name=}\")\n\n    safe_name = \"\".join(c for c in name if c.isalnum()).rstrip()\n    return Path(f\"/tmp/onyx_k8s_{safe_name}_{probe}.txt\")\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/base.py",
    "content": "# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html\nimport urllib.parse\n\nfrom onyx.configs.app_configs import CELERY_BROKER_POOL_LIMIT\nfrom onyx.configs.app_configs import CELERY_RESULT_EXPIRES\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND\nfrom onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PASSWORD\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.configs.app_configs import REDIS_SSL\nfrom onyx.configs.app_configs import REDIS_SSL_CA_CERTS\nfrom onyx.configs.app_configs import REDIS_SSL_CERT_REQS\nfrom onyx.configs.app_configs import USE_REDIS_IAM_AUTH\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS\n\nCELERY_SEPARATOR = \":\"\n\nCELERY_PASSWORD_PART = \"\"\nif REDIS_PASSWORD:\n    CELERY_PASSWORD_PART = \":\" + urllib.parse.quote(REDIS_PASSWORD, safe=\"\") + \"@\"\n\nREDIS_SCHEME = \"redis\"\n\n# SSL-specific query parameters for Redis URL\nSSL_QUERY_PARAMS = \"\"\nif REDIS_SSL and not USE_REDIS_IAM_AUTH:\n    REDIS_SCHEME = \"rediss\"\n    SSL_QUERY_PARAMS = f\"?ssl_cert_reqs={REDIS_SSL_CERT_REQS}\"\n    if REDIS_SSL_CA_CERTS:\n        SSL_QUERY_PARAMS += f\"&ssl_ca_certs={REDIS_SSL_CA_CERTS}\"\n\n# region Broker settings\n# example celery_broker_url: \"redis://:password@localhost:6379/15\"\nbroker_url = f\"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}{SSL_QUERY_PARAMS}\"\n\nbroker_connection_retry_on_startup = True\nbroker_pool_limit = CELERY_BROKER_POOL_LIMIT\n\n# redis broker settings\n# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html\nbroker_transport_options = {\n    \"priority_steps\": list(range(len(OnyxCeleryPriority))),\n    \"sep\": CELERY_SEPARATOR,\n    \"queue_order_strategy\": \"priority\",\n    \"retry_on_timeout\": True,\n    \"health_check_interval\": REDIS_HEALTH_CHECK_INTERVAL,\n    \"socket_keepalive\": True,\n    \"socket_keepalive_options\": REDIS_SOCKET_KEEPALIVE_OPTIONS,\n}\n# endregion\n\n# redis backend settings\n# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings\n\n# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend\nredis_socket_keepalive = True\nredis_retry_on_timeout = True\nredis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL\n\n\ntask_default_priority = OnyxCeleryPriority.MEDIUM\ntask_acks_late = True\n\n# region Task result backend settings\n# It's possible we don't even need celery's result backend, in which case all of the optimization below\n# might be irrelevant\nresult_backend = f\"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}\"\nresult_expires = CELERY_RESULT_EXPIRES  # 86400 seconds is the default\n# endregion\n\n# Leaving this to the default of True may cause double logging since both our own app\n# and celery think they are controlling the logger.\n# TODO: Configure celery's logger entirely manually and set this to False\n# worker_hijack_root_logger = False\n\n# region Notes on serialization performance\n# Option 0: Defaults (json serializer, no compression)\n# about 1.5 KB per queued task. 1KB in queue, 400B for result, 100 as a child entry in generator result\n\n# Option 1: Reduces generator task result sizes by roughly 20%\n# task_compression = \"bzip2\"\n# task_serializer = \"pickle\"\n# result_compression = \"bzip2\"\n# result_serializer = \"pickle\"\n# accept_content=[\"pickle\"]\n\n# Option 2: this significantly reduces the size of the result for generator tasks since the list of children\n# can be large. small tasks change very little\n# def pickle_bz2_encoder(data):\n#     return bz2.compress(pickle.dumps(data))\n\n# def pickle_bz2_decoder(data):\n#     return pickle.loads(bz2.decompress(data))\n\n# from kombu import serialization  # To register custom serialization with Celery/Kombu\n\n# serialization.register('pickle-bzip2', pickle_bz2_encoder, pickle_bz2_decoder, 'application/x-pickle-bz2', 'binary')\n\n# task_serializer = \"pickle-bzip2\"\n# result_serializer = \"pickle-bzip2\"\n# accept_content=[\"pickle\", \"pickle-bzip2\"]\n# endregion\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/beat.py",
    "content": "# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html\nimport onyx.background.celery.configs.base as shared_config\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/client.py",
    "content": "import onyx.background.celery.configs.base as shared_config\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/docfetching.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_DOCFETCHING_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\n# Docfetching worker configuration\nworker_concurrency = CELERY_WORKER_DOCFETCHING_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/docprocessing.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_DOCPROCESSING_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\n# Indexing worker specific ... this lets us track the transition to STARTED in redis\n# We don't currently rely on this but it has the potential to be useful and\n# indexing tasks are not high volume\n\n# we don't turn this on yet because celery occasionally runs tasks more than once\n# which means a duplicate run might change the task state unexpectedly\n# task_track_started = True\n\nworker_concurrency = CELERY_WORKER_DOCPROCESSING_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/heavy.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_HEAVY_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\nworker_concurrency = CELERY_WORKER_HEAVY_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/light.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_LIGHT_CONCURRENCY\nfrom onyx.configs.app_configs import CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\nworker_concurrency = CELERY_WORKER_LIGHT_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/monitoring.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_MONITORING_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\n# Monitoring worker specific settings\nworker_concurrency = CELERY_WORKER_MONITORING_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/primary.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_PRIMARY_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\nworker_concurrency = CELERY_WORKER_PRIMARY_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/configs/user_file_processing.py",
    "content": "import onyx.background.celery.configs.base as shared_config\nfrom onyx.configs.app_configs import CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY\n\nbroker_url = shared_config.broker_url\nbroker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup\nbroker_pool_limit = shared_config.broker_pool_limit\nbroker_transport_options = shared_config.broker_transport_options\n\nredis_socket_keepalive = shared_config.redis_socket_keepalive\nredis_retry_on_timeout = shared_config.redis_retry_on_timeout\nredis_backend_health_check_interval = shared_config.redis_backend_health_check_interval\n\nresult_backend = shared_config.result_backend\nresult_expires = shared_config.result_expires  # 86400 seconds is the default\n\ntask_default_priority = shared_config.task_default_priority\ntask_acks_late = shared_config.task_acks_late\n\n# User file processing worker configuration\nworker_concurrency = CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY\nworker_pool = \"threads\"\nworker_prefetch_multiplier = 1\n"
  },
  {
    "path": "backend/onyx/background/celery/memory_monitoring.py",
    "content": "# backend/onyx/background/celery/memory_monitoring.py\nimport logging\nimport os\nfrom logging.handlers import RotatingFileHandler\n\nimport psutil\n\nfrom onyx.utils.logger import is_running_in_container\nfrom onyx.utils.logger import setup_logger\n\n# Regular application logger\nlogger = setup_logger()\n\n# Only set up memory monitoring in container environment\nif is_running_in_container():\n    # Set up a dedicated memory monitoring logger\n    MEMORY_LOG_DIR = \"/var/log/onyx/memory\"\n    MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, \"memory_usage.log\")\n    MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024  # 10MB\n    MEMORY_LOG_BACKUP_COUNT = 5  # Keep 5 backup files\n\n    # Ensure log directory exists\n    os.makedirs(MEMORY_LOG_DIR, exist_ok=True)\n\n    # Create a dedicated logger for memory monitoring\n    memory_logger = logging.getLogger(\"memory_monitoring\")\n    memory_logger.setLevel(logging.INFO)\n\n    # Create a rotating file handler\n    memory_handler = RotatingFileHandler(\n        MEMORY_LOG_FILE,\n        maxBytes=MEMORY_LOG_MAX_BYTES,\n        backupCount=MEMORY_LOG_BACKUP_COUNT,\n    )\n\n    # Create a formatter that includes all relevant information\n    memory_formatter = logging.Formatter(\n        \"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%Y-%m-%d %H:%M:%S\"\n    )\n    memory_handler.setFormatter(memory_formatter)\n    memory_logger.addHandler(memory_handler)\nelse:\n    # Create a null logger when not in container\n    memory_logger = logging.getLogger(\"memory_monitoring\")\n    memory_logger.addHandler(logging.NullHandler())\n\n\ndef emit_process_memory(\n    pid: int, process_name: str, additional_metadata: dict[str, str | int]\n) -> None:\n    # Skip memory monitoring if not in container\n    if not is_running_in_container():\n        return\n\n    try:\n        process = psutil.Process(pid)\n        memory_info = process.memory_info()\n        cpu_percent = process.cpu_percent(interval=0.1)\n\n        # Build metadata string from additional_metadata dictionary\n        metadata_str = \" \".join(\n            [f\"{key}={value}\" for key, value in additional_metadata.items()]\n        )\n        metadata_str = f\" {metadata_str}\" if metadata_str else \"\"\n\n        memory_logger.info(\n            f\"PROCESS_MEMORY process_name={process_name} pid={pid} \"\n            f\"rss_mb={memory_info.rss / (1024 * 1024):.2f} \"\n            f\"vms_mb={memory_info.vms / (1024 * 1024):.2f} \"\n            f\"cpu={cpu_percent:.2f}{metadata_str}\"\n        )\n    except Exception:\n        logger.exception(\"Error monitoring process memory.\")\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/beat_schedule.py",
    "content": "import copy\nfrom datetime import timedelta\nfrom typing import Any\n\nfrom celery.schedules import crontab\n\nfrom onyx.configs.app_configs import AUTO_LLM_CONFIG_URL\nfrom onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED\nfrom onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES\nfrom onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom shared_configs.configs import MULTI_TENANT\n\n# choosing 15 minutes because it roughly gives us enough time to process many tasks\n# we might be able to reduce this greatly if we can run a unified\n# loop across all tenants rather than tasks per tenant\n\n# we set expires because it isn't necessary to queue up these tasks\n# it's only important that they run relatively regularly\nBEAT_EXPIRES_DEFAULT = 15 * 60  # 15 minutes (in seconds)\n\n# hack to slow down task dispatch in the cloud until\n# we have a better implementation (backpressure, etc)\n# Note that DynamicTenantScheduler can adjust the runtime value for this via Redis\nCLOUD_BEAT_MULTIPLIER_DEFAULT = 8.0\nCLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT = 1.0\n\n# tasks that run in either self-hosted on cloud\nbeat_task_templates: list[dict] = [\n    {\n        \"name\": \"check-for-user-file-processing\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-user-file-project-sync\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-user-file-delete\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-indexing\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_INDEXING,\n        \"schedule\": timedelta(seconds=15),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-checkpoint-cleanup\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,\n        \"schedule\": timedelta(hours=1),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.LOW,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-index-attempt-cleanup\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,\n        \"schedule\": timedelta(minutes=30),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-connector-deletion\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-vespa-sync\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-pruning\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_PRUNING,\n        \"schedule\": timedelta(seconds=20),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.MEDIUM,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"check-for-hierarchy-fetching\",\n        \"task\": OnyxCeleryTask.CHECK_FOR_HIERARCHY_FETCHING,\n        \"schedule\": timedelta(hours=1),  # Check hourly, but only fetch once per day\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.LOW,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": \"monitor-background-processes\",\n        \"task\": OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,\n        \"schedule\": timedelta(minutes=5),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.LOW,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n            \"queue\": OnyxCeleryQueues.MONITORING,\n        },\n    },\n    # Sandbox cleanup tasks\n    {\n        \"name\": \"cleanup-idle-sandboxes\",\n        \"task\": OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,\n        \"schedule\": timedelta(minutes=1),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.LOW,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n            \"queue\": OnyxCeleryQueues.SANDBOX,\n        },\n    },\n    {\n        \"name\": \"cleanup-old-snapshots\",\n        \"task\": OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,\n        \"schedule\": timedelta(hours=24),\n        \"options\": {\n            \"priority\": OnyxCeleryPriority.LOW,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n            \"queue\": OnyxCeleryQueues.SANDBOX,\n        },\n    },\n]\n\nif ENTERPRISE_EDITION_ENABLED:\n    beat_task_templates.extend(\n        [\n            {\n                \"name\": \"check-for-doc-permissions-sync\",\n                \"task\": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,\n                \"schedule\": timedelta(seconds=30),\n                \"options\": {\n                    \"priority\": OnyxCeleryPriority.MEDIUM,\n                    \"expires\": BEAT_EXPIRES_DEFAULT,\n                },\n            },\n            {\n                \"name\": \"check-for-external-group-sync\",\n                \"task\": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,\n                \"schedule\": timedelta(seconds=20),\n                \"options\": {\n                    \"priority\": OnyxCeleryPriority.MEDIUM,\n                    \"expires\": BEAT_EXPIRES_DEFAULT,\n                },\n            },\n        ]\n    )\n\n# Add the Auto LLM update task if the config URL is set (has a default)\nif AUTO_LLM_CONFIG_URL:\n    beat_task_templates.append(\n        {\n            \"name\": \"check-for-auto-llm-update\",\n            \"task\": OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,\n            \"schedule\": timedelta(seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS),\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.LOW,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n            },\n        }\n    )\n\n# Add scheduled eval task if datasets are configured\nif SCHEDULED_EVAL_DATASET_NAMES:\n    beat_task_templates.append(\n        {\n            \"name\": \"scheduled-eval-pipeline\",\n            \"task\": OnyxCeleryTask.SCHEDULED_EVAL_TASK,\n            # run every Sunday at midnight UTC\n            \"schedule\": crontab(\n                hour=0,\n                minute=0,\n                day_of_week=0,\n            ),\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.LOW,\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n            },\n        }\n    )\n\n# Add OpenSearch migration task if enabled.\nif ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n    beat_task_templates.append(\n        {\n            \"name\": \"migrate-chunks-from-vespa-to-opensearch\",\n            \"task\": OnyxCeleryTask.MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK,\n            # Try to enqueue an invocation of this task with this frequency.\n            \"schedule\": timedelta(seconds=120),  # 2 minutes\n            \"options\": {\n                \"priority\": OnyxCeleryPriority.LOW,\n                # If the task was not dequeued in this time, revoke it.\n                \"expires\": BEAT_EXPIRES_DEFAULT,\n                \"queue\": OnyxCeleryQueues.OPENSEARCH_MIGRATION,\n            },\n        }\n    )\n\n\n# Beat task names that require a vector DB. Filtered out when DISABLE_VECTOR_DB.\n_VECTOR_DB_BEAT_TASK_NAMES: set[str] = {\n    \"check-for-indexing\",\n    \"check-for-connector-deletion\",\n    \"check-for-vespa-sync\",\n    \"check-for-pruning\",\n    \"check-for-hierarchy-fetching\",\n    \"check-for-checkpoint-cleanup\",\n    \"check-for-index-attempt-cleanup\",\n    \"check-for-doc-permissions-sync\",\n    \"check-for-external-group-sync\",\n    \"migrate-chunks-from-vespa-to-opensearch\",\n}\n\nif DISABLE_VECTOR_DB:\n    beat_task_templates = [\n        t for t in beat_task_templates if t[\"name\"] not in _VECTOR_DB_BEAT_TASK_NAMES\n    ]\n\n\ndef make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:\n    cloud_task: dict[str, Any] = {}\n\n    # constant options for cloud beat task generators\n    task_schedule: timedelta = task[\"schedule\"]\n    cloud_task[\"schedule\"] = task_schedule\n    cloud_task[\"options\"] = {}\n    cloud_task[\"options\"][\"priority\"] = OnyxCeleryPriority.HIGHEST\n    cloud_task[\"options\"][\"expires\"] = BEAT_EXPIRES_DEFAULT\n\n    # settings dependent on the original task\n    cloud_task[\"name\"] = f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_{task['name']}\"\n    cloud_task[\"task\"] = OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR\n    cloud_task[\"kwargs\"] = {}\n    cloud_task[\"kwargs\"][\"task_name\"] = task[\"task\"]\n\n    optional_fields = [\"queue\", \"priority\", \"expires\"]\n    for field in optional_fields:\n        if field in task[\"options\"]:\n            cloud_task[\"kwargs\"][field] = task[\"options\"][field]\n\n    return cloud_task\n\n\n# tasks that only run in the cloud and are system wide\n# the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = \"cloud\" to be seen\n# by the DynamicTenantScheduler as system wide task and not a per tenant task\nbeat_cloud_tasks: list[dict] = [\n    # cloud specific tasks\n    {\n        \"name\": f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-alembic\",\n        \"task\": OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,\n        \"schedule\": timedelta(hours=1),\n        \"options\": {\n            \"queue\": OnyxCeleryQueues.MONITORING,\n            \"priority\": OnyxCeleryPriority.HIGH,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-queues\",\n        \"task\": OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES,\n        \"schedule\": timedelta(seconds=30),\n        \"options\": {\n            \"queue\": OnyxCeleryQueues.MONITORING,\n            \"priority\": OnyxCeleryPriority.HIGH,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-available-tenants\",\n        \"task\": OnyxCeleryTask.CLOUD_CHECK_AVAILABLE_TENANTS,\n        \"schedule\": timedelta(minutes=10),\n        \"options\": {\n            \"queue\": OnyxCeleryQueues.MONITORING,\n            \"priority\": OnyxCeleryPriority.HIGH,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n    {\n        \"name\": f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-pidbox\",\n        \"task\": OnyxCeleryTask.CLOUD_MONITOR_CELERY_PIDBOX,\n        \"schedule\": timedelta(hours=4),\n        \"options\": {\n            \"queue\": OnyxCeleryQueues.MONITORING,\n            \"priority\": OnyxCeleryPriority.HIGH,\n            \"expires\": BEAT_EXPIRES_DEFAULT,\n        },\n    },\n]\n\n# tasks that only run self hosted\ntasks_to_schedule: list[dict] = []\nif not MULTI_TENANT:\n    tasks_to_schedule.extend(\n        [\n            {\n                \"name\": \"monitor-celery-queues\",\n                \"task\": OnyxCeleryTask.MONITOR_CELERY_QUEUES,\n                \"schedule\": timedelta(seconds=10),\n                \"options\": {\n                    \"priority\": OnyxCeleryPriority.MEDIUM,\n                    \"expires\": BEAT_EXPIRES_DEFAULT,\n                    \"queue\": OnyxCeleryQueues.MONITORING,\n                },\n            },\n            {\n                \"name\": \"monitor-process-memory\",\n                \"task\": OnyxCeleryTask.MONITOR_PROCESS_MEMORY,\n                \"schedule\": timedelta(minutes=5),\n                \"options\": {\n                    \"priority\": OnyxCeleryPriority.LOW,\n                    \"expires\": BEAT_EXPIRES_DEFAULT,\n                    \"queue\": OnyxCeleryQueues.MONITORING,\n                },\n            },\n            {\n                \"name\": \"celery-beat-heartbeat\",\n                \"task\": OnyxCeleryTask.CELERY_BEAT_HEARTBEAT,\n                \"schedule\": timedelta(minutes=1),\n                \"options\": {\n                    \"priority\": OnyxCeleryPriority.HIGHEST,\n                    \"expires\": BEAT_EXPIRES_DEFAULT,\n                    \"queue\": OnyxCeleryQueues.PRIMARY,\n                },\n            },\n        ]\n    )\n\n    tasks_to_schedule.extend(beat_task_templates)\n\n\ndef generate_cloud_tasks(\n    beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float\n) -> list[dict[str, Any]]:\n    \"\"\"\n    beat_tasks: system wide tasks that can be sent as is\n    beat_templates: task templates that will be transformed into per tenant tasks via\n    the cloud_beat_task_generator\n    beat_multiplier: a multiplier that can be applied on top of the task schedule\n    to speed up or slow down the task generation rate. useful in production.\n\n    Returns a list of cloud tasks, which consists of incoming tasks + tasks generated\n    from incoming templates.\n    \"\"\"\n\n    if beat_multiplier <= 0:\n        raise ValueError(\"beat_multiplier must be positive!\")\n\n    cloud_tasks: list[dict] = []\n\n    # generate our tenant aware cloud tasks from the templates\n    for beat_template in beat_templates:\n        cloud_task = make_cloud_generator_task(beat_template)\n        cloud_tasks.append(cloud_task)\n\n    # factor in the cloud multiplier for the above\n    for cloud_task in cloud_tasks:\n        cloud_task[\"schedule\"] = cloud_task[\"schedule\"] * beat_multiplier\n\n    # add the fixed cloud/system beat tasks. No multiplier for these.\n    cloud_tasks.extend(copy.deepcopy(beat_tasks))\n    return cloud_tasks\n\n\ndef get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:\n    return generate_cloud_tasks(beat_cloud_tasks, beat_task_templates, beat_multiplier)\n\n\ndef get_tasks_to_schedule() -> list[dict[str, Any]]:\n    return tasks_to_schedule\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/connector_deletion/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/connector_deletion/tasks.py",
    "content": "import traceback\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom pydantic import ValidationError\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_redis import celery_get_queued_task_ids\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import OnyxRedisSignals\nfrom onyx.db.connector import fetch_connector_by_id\nfrom onyx.db.connector_credential_pair import add_deletion_failure_message\nfrom onyx.db.connector_credential_pair import (\n    delete_connector_credential_pair__no_commit,\n)\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.document import (\n    delete_all_documents_by_connector_credential_pair__no_commit,\n)\nfrom onyx.db.document import get_document_ids_for_connector_credential_pair\nfrom onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.index_attempt import delete_index_attempts\nfrom onyx.db.index_attempt import get_recent_attempts_for_cc_pair\nfrom onyx.db.permission_sync_attempt import (\n    delete_doc_permission_sync_attempts__no_commit,\n)\nfrom onyx.db.permission_sync_attempt import (\n    delete_external_group_permission_sync_attempts__no_commit,\n)\nfrom onyx.db.search_settings import get_all_search_settings\nfrom onyx.db.sync_record import cleanup_sync_records\nfrom onyx.db.sync_record import insert_sync_record\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.db.tag import delete_orphan_tags__no_commit\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_delete import RedisConnectorDelete\nfrom onyx.redis.redis_connector_delete import RedisConnectorDeletePayload\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom onyx.utils.variable_functionality import noop_fallback\n\n\nclass TaskDependencyError(RuntimeError):\n    \"\"\"Raised to the caller to indicate dependent tasks are running that would interfere\n    with connector deletion.\"\"\"\n\n\ndef revoke_tasks_blocking_deletion(\n    redis_connector: RedisConnector, db_session: Session, app: Celery\n) -> None:\n    search_settings_list = get_all_search_settings(db_session)\n    for search_settings in search_settings_list:\n        try:\n            recent_index_attempts = get_recent_attempts_for_cc_pair(\n                cc_pair_id=redis_connector.cc_pair_id,\n                search_settings_id=search_settings.id,\n                limit=1,\n                db_session=db_session,\n            )\n            if (\n                recent_index_attempts\n                and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS\n                and recent_index_attempts[0].celery_task_id\n            ):\n                app.control.revoke(recent_index_attempts[0].celery_task_id)\n                task_logger.info(\n                    f\"Revoked indexing task {recent_index_attempts[0].celery_task_id}.\"\n                )\n        except Exception:\n            task_logger.exception(\"Exception while revoking indexing task\")\n\n    try:\n        permissions_sync_payload = redis_connector.permissions.payload\n        if permissions_sync_payload and permissions_sync_payload.celery_task_id:\n            app.control.revoke(permissions_sync_payload.celery_task_id)\n            task_logger.info(\n                f\"Revoked permissions sync task {permissions_sync_payload.celery_task_id}.\"\n            )\n    except Exception:\n        task_logger.exception(\"Exception while revoking pruning task\")\n\n    try:\n        prune_payload = redis_connector.prune.payload\n        if prune_payload and prune_payload.celery_task_id:\n            app.control.revoke(prune_payload.celery_task_id)\n            task_logger.info(f\"Revoked pruning task {prune_payload.celery_task_id}.\")\n    except Exception:\n        task_logger.exception(\"Exception while revoking permissions sync task\")\n\n    try:\n        external_group_sync_payload = redis_connector.external_group_sync.payload\n        if external_group_sync_payload and external_group_sync_payload.celery_task_id:\n            app.control.revoke(external_group_sync_payload.celery_task_id)\n            task_logger.info(\n                f\"Revoked external group sync task {external_group_sync_payload.celery_task_id}.\"\n            )\n    except Exception:\n        task_logger.exception(\"Exception while revoking external group sync task\")\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    trail=False,\n    bind=True,\n)\ndef check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | None:\n    r = get_redis_client()\n    r_replica = get_redis_replica_client()\n\n    lock_beat: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # Prevent this task from overlapping with itself\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        # we want to run this less frequently than the overall task\n        lock_beat.reacquire()\n        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES):\n            # clear fences that don't have associated celery tasks in progress\n            try:\n                r_celery = celery_get_broker_client(self.app)\n                validate_connector_deletion_fences(\n                    tenant_id, r, r_replica, r_celery, lock_beat\n                )\n            except Exception:\n                task_logger.exception(\n                    \"Exception while validating connector deletion fences\"\n                )\n\n            r.set(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES, 1, ex=300)\n\n        # collect cc_pair_ids\n        cc_pair_ids: list[int] = []\n        with get_session_with_current_tenant() as db_session:\n            cc_pairs = get_connector_credential_pairs(db_session)\n            for cc_pair in cc_pairs:\n                cc_pair_ids.append(cc_pair.id)\n\n        # try running cleanup on the cc_pair_ids\n        for cc_pair_id in cc_pair_ids:\n            with get_session_with_current_tenant() as db_session:\n                redis_connector = RedisConnector(tenant_id, cc_pair_id)\n                try:\n                    try_generate_document_cc_pair_cleanup_tasks(\n                        self.app, cc_pair_id, db_session, lock_beat, tenant_id\n                    )\n                except TaskDependencyError as e:\n                    # this means we wanted to start deleting but dependent tasks were running\n                    # on the first error, we set a stop signal and revoke the dependent tasks\n                    # on subsequent errors, we hard reset blocking fences after our specified timeout\n                    # is exceeded\n                    task_logger.info(str(e))\n\n                    if not redis_connector.stop.fenced:\n                        # one time revoke of celery tasks\n                        task_logger.info(\"Revoking any tasks blocking deletion.\")\n                        revoke_tasks_blocking_deletion(\n                            redis_connector, db_session, self.app\n                        )\n                        redis_connector.stop.set_fence(True)\n                        redis_connector.stop.set_timeout()\n                    else:\n                        # stop signal already set\n                        if redis_connector.stop.timed_out:\n                            # waiting too long, just reset blocking fences\n                            task_logger.info(\n                                \"Timed out waiting for tasks blocking deletion. Resetting blocking fences.\"\n                            )\n\n                            redis_connector.prune.reset()\n                            redis_connector.permissions.reset()\n                            redis_connector.external_group_sync.reset()\n                        else:\n                            # just wait\n                            pass\n                else:\n                    # clear the stop signal if it exists ... no longer needed\n                    redis_connector.stop.set_fence(False)\n\n        lock_beat.reacquire()\n        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n        for key in keys:\n            key_bytes = cast(bytes, key)\n\n            if not r.exists(key_bytes):\n                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)\n                continue\n\n            key_str = key_bytes.decode(\"utf-8\")\n            if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):\n                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during connector deletion check\")\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n\n    return True\n\n\ndef try_generate_document_cc_pair_cleanup_tasks(\n    app: Celery,\n    cc_pair_id: int,\n    db_session: Session,\n    lock_beat: RedisLock,\n    tenant_id: str,\n) -> int | None:\n    \"\"\"Returns an int if syncing is needed. The int represents the number of sync tasks generated.\n    Note that syncing can still be required even if the number of sync tasks generated is zero.\n    Returns None if no syncing is required.\n\n    Will raise TaskDependencyError if dependent tasks such as indexing and pruning are\n    still running. In our case, the caller reacts by setting a stop signal in Redis to\n    exit those tasks as quickly as possible.\n    \"\"\"\n\n    lock_beat.reacquire()\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    # don't generate sync tasks if tasks are still pending\n    if redis_connector.delete.fenced:\n        return None\n\n    # we need to load the state of the object inside the fence\n    # to avoid a race condition with db.commit/fence deletion\n    # at the end of this taskset\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    if not cc_pair:\n        return None\n\n    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:\n        # there should be no in-progress sync records if this is up to date\n        # clean it up just in case things got into a bad state\n        cleanup_sync_records(\n            db_session=db_session,\n            entity_id=cc_pair_id,\n            sync_type=SyncType.CONNECTOR_DELETION,\n        )\n        return None\n\n    # set a basic fence to start\n    redis_connector.delete.set_active()\n    fence_payload = RedisConnectorDeletePayload(\n        num_tasks=None,\n        submitted=datetime.now(timezone.utc),\n    )\n\n    redis_connector.delete.set_fence(fence_payload)\n\n    try:\n        # do not proceed if connector indexing or connector pruning are running\n        search_settings_list = get_all_search_settings(db_session)\n        for search_settings in search_settings_list:\n            recent_index_attempts = get_recent_attempts_for_cc_pair(\n                cc_pair_id=cc_pair_id,\n                search_settings_id=search_settings.id,\n                limit=1,\n                db_session=db_session,\n            )\n            if (\n                recent_index_attempts\n                and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS\n            ):\n                raise TaskDependencyError(\n                    \"Connector deletion - Delayed (indexing in progress): \"\n                    f\"cc_pair={cc_pair_id} \"\n                    f\"search_settings={search_settings.id}\"\n                )\n\n        if redis_connector.prune.fenced:\n            raise TaskDependencyError(\n                f\"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}\"\n            )\n\n        if redis_connector.permissions.fenced:\n            raise TaskDependencyError(\n                f\"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}\"\n            )\n\n        # add tasks to celery and build up the task set to monitor in redis\n        redis_connector.delete.taskset_clear()\n\n        # Add all documents that need to be updated into the queue\n        task_logger.info(\n            f\"RedisConnectorDeletion.generate_tasks starting. cc_pair={cc_pair_id}\"\n        )\n        tasks_generated = redis_connector.delete.generate_tasks(\n            app, db_session, lock_beat\n        )\n        if tasks_generated is None:\n            raise ValueError(\"RedisConnectorDeletion.generate_tasks returned None\")\n\n        try:\n            insert_sync_record(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.CONNECTOR_DELETION,\n            )\n        except Exception:\n            task_logger.exception(\"insert_sync_record exceptioned.\")\n\n    except TaskDependencyError:\n        redis_connector.delete.set_fence(None)\n        raise\n    except Exception:\n        task_logger.exception(\"Unexpected exception\")\n        redis_connector.delete.set_fence(None)\n        return None\n    else:\n        # Currently we are allowing the sync to proceed with 0 tasks.\n        # It's possible for sets/groups to be generated initially with no entries\n        # and they still need to be marked as up to date.\n        # if tasks_generated == 0:\n        #     return 0\n\n        task_logger.info(\n            f\"RedisConnectorDeletion.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}\"\n        )\n\n        # set this only after all tasks have been added\n        fence_payload.num_tasks = tasks_generated\n        redis_connector.delete.set_fence(fence_payload)\n\n    return tasks_generated\n\n\ndef monitor_connector_deletion_taskset(\n    tenant_id: str,\n    key_bytes: bytes,\n    r: Redis,  # noqa: ARG001\n) -> None:\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(f\"could not parse cc_pair_id from {fence_key}\")\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    fence_data = redis_connector.delete.payload\n    if not fence_data:\n        task_logger.warning(\n            f\"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}\"\n        )\n        return\n\n    if fence_data.num_tasks is None:\n        # the fence is setting up but isn't ready yet\n        return\n\n    remaining = redis_connector.delete.get_remaining()\n    task_logger.info(\n        f\"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}\"\n    )\n    if remaining > 0:\n        with get_session_with_current_tenant() as db_session:\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.CONNECTOR_DELETION,\n                sync_status=SyncStatus.IN_PROGRESS,\n                num_docs_synced=remaining,\n            )\n        return\n\n    with get_session_with_current_tenant() as db_session:\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=cc_pair_id,\n        )\n        credential_id_to_delete: int | None = None\n        connector_id_to_delete: int | None = None\n        if not cc_pair:\n            task_logger.warning(\n                f\"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}\"\n            )\n            return\n\n        try:\n            doc_ids = get_document_ids_for_connector_credential_pair(\n                db_session, cc_pair.connector_id, cc_pair.credential_id\n            )\n            if len(doc_ids) > 0:\n                # NOTE(rkuo): if this happens, documents somehow got added while\n                # deletion was in progress. Likely a bug gating off pruning and indexing\n                # work before deletion starts.\n                task_logger.warning(\n                    \"Connector deletion - documents still found after taskset completion. \"\n                    \"Clearing the current deletion attempt and allowing deletion to restart: \"\n                    f\"cc_pair={cc_pair_id} \"\n                    f\"docs_deleted={fence_data.num_tasks} \"\n                    f\"docs_remaining={len(doc_ids)}\"\n                )\n\n                # We don't want to waive off why we get into this state, but resetting\n                # our attempt and letting the deletion restart is a good way to recover\n                redis_connector.delete.reset()\n                raise RuntimeError(\n                    \"Connector deletion - documents still found after taskset completion\"\n                )\n\n            # clean up the rest of the related Postgres entities\n            # index attempts\n            delete_index_attempts(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n            )\n\n            # permission sync attempts\n            delete_doc_permission_sync_attempts__no_commit(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n            )\n            delete_external_group_permission_sync_attempts__no_commit(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n            )\n\n            # document sets\n            delete_document_set_cc_pair_relationship__no_commit(\n                db_session=db_session,\n                connector_id=cc_pair.connector_id,\n                credential_id=cc_pair.credential_id,\n            )\n\n            # user groups\n            cleanup_user_groups = fetch_versioned_implementation_with_fallback(\n                \"onyx.db.user_group\",\n                \"delete_user_group_cc_pair_relationship__no_commit\",\n                noop_fallback,\n            )\n            cleanup_user_groups(\n                cc_pair_id=cc_pair_id,\n                db_session=db_session,\n            )\n\n            # delete orphan tags\n            delete_orphan_tags__no_commit(db_session)\n\n            # Store IDs before potentially expiring cc_pair\n            connector_id_to_delete = cc_pair.connector_id\n            credential_id_to_delete = cc_pair.credential_id\n\n            # Explicitly delete document by connector credential pair records before deleting the connector\n            # This is needed because connector_id is a primary key in that table and cascading deletes won't work\n            delete_all_documents_by_connector_credential_pair__no_commit(\n                db_session=db_session,\n                connector_id=connector_id_to_delete,\n                credential_id=credential_id_to_delete,\n            )\n\n            # Flush to ensure document deletion happens before connector deletion\n            db_session.flush()\n\n            # Expire the cc_pair to ensure SQLAlchemy doesn't try to manage its state\n            # related to the deleted DocumentByConnectorCredentialPair during commit\n            db_session.expire(cc_pair)\n\n            # finally, delete the cc-pair\n            delete_connector_credential_pair__no_commit(\n                db_session=db_session,\n                connector_id=connector_id_to_delete,\n                credential_id=credential_id_to_delete,\n            )\n            # if there are no credentials left, delete the connector\n            connector = fetch_connector_by_id(\n                db_session=db_session,\n                connector_id=connector_id_to_delete,\n            )\n            if not connector or not len(connector.credentials):\n                task_logger.info(\n                    \"Connector deletion - Found no credentials left for connector, deleting connector\"\n                )\n                db_session.delete(connector)\n            db_session.commit()\n\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.CONNECTOR_DELETION,\n                sync_status=SyncStatus.SUCCESS,\n                num_docs_synced=fence_data.num_tasks,\n            )\n\n        except Exception as e:\n            db_session.rollback()\n            stack_trace = traceback.format_exc()\n            error_message = f\"Error: {str(e)}\\n\\nStack Trace:\\n{stack_trace}\"\n            add_deletion_failure_message(db_session, cc_pair_id, error_message)\n\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=cc_pair_id,\n                sync_type=SyncType.CONNECTOR_DELETION,\n                sync_status=SyncStatus.FAILED,\n                num_docs_synced=fence_data.num_tasks,\n            )\n\n            task_logger.exception(\n                f\"Connector deletion exceptioned: \"\n                f\"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}\"\n            )\n            raise e\n\n    task_logger.info(\n        f\"Connector deletion succeeded: \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"connector={connector_id_to_delete} \"\n        f\"credential={credential_id_to_delete} \"\n        f\"docs_deleted={fence_data.num_tasks}\"\n    )\n\n    redis_connector.delete.reset()\n\n\ndef validate_connector_deletion_fences(\n    tenant_id: str,\n    r: Redis,\n    r_replica: Redis,\n    r_celery: Redis,\n    lock_beat: RedisLock,\n) -> None:\n    # building lookup table can be expensive, so we won't bother\n    # validating until the queue is small\n    CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN = 1024\n\n    queue_len = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)\n    if queue_len > CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN:\n        return\n\n    queued_upsert_tasks = celery_get_queued_task_ids(\n        OnyxCeleryQueues.CONNECTOR_DELETION, r_celery\n    )\n\n    # validate all existing connector deletion jobs\n    lock_beat.reacquire()\n    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n    for key in keys:\n        key_bytes = cast(bytes, key)\n        key_str = key_bytes.decode(\"utf-8\")\n        if not key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):\n            continue\n\n        validate_connector_deletion_fence(\n            tenant_id,\n            key_bytes,\n            queued_upsert_tasks,\n            r,\n        )\n\n        lock_beat.reacquire()\n\n    return\n\n\ndef validate_connector_deletion_fence(\n    tenant_id: str,\n    key_bytes: bytes,\n    queued_upsert_tasks: set[str],\n    r: Redis,\n) -> None:\n    \"\"\"Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.\n    This can happen if the indexing worker hard crashes or is terminated.\n    Being in this bad state means the fence will never clear without help, so this function\n    gives the help.\n\n    How this works:\n    1. This function renews the active signal with a 5 minute TTL under the following conditions\n    1.2. When the task is seen in the redis queue\n    1.3. When the task is seen in the reserved / prefetched list\n\n    2. Externally, the active signal is renewed when:\n    2.1. The fence is created\n    2.2. The indexing watchdog checks the spawned task.\n\n    3. The TTL allows us to get through the transitions on fence startup\n    and when the task starts executing.\n\n    More TTL clarification: it is seemingly impossible to exactly query Celery for\n    whether a task is in the queue or currently executing.\n    1. An unknown task id is always returned as state PENDING.\n    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task\n    and the time it actually starts on the worker.\n\n    queued_tasks: the celery queue of lightweight permission sync tasks\n    reserved_tasks: prefetched tasks for sync task generator\n    \"\"\"\n    # if the fence doesn't exist, there's nothing to do\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(\n            f\"validate_connector_deletion_fence - could not parse id from {fence_key}\"\n        )\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n    # parse out metadata and initialize the helper class with it\n    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))\n\n    # check to see if the fence/payload exists\n    if not redis_connector.delete.fenced:\n        return\n\n    # in the cloud, the payload format may have changed ...\n    # it's a little sloppy, but just reset the fence for now if that happens\n    # TODO: add intentional cleanup/abort logic\n    try:\n        payload = redis_connector.delete.payload\n    except ValidationError:\n        task_logger.exception(\n            \"validate_connector_deletion_fence - \"\n            \"Resetting fence because fence schema is out of date: \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={fence_key}\"\n        )\n\n        redis_connector.delete.reset()\n        return\n\n    if not payload:\n        return\n\n    # OK, there's actually something for us to validate\n\n    # look up every task in the current taskset in the celery queue\n    # every entry in the taskset should have an associated entry in the celery task queue\n    # because we get the celery tasks first, the entries in our own permissions taskset\n    # should be roughly a subset of the tasks in celery\n\n    # this check isn't very exact, but should be sufficient over a period of time\n    # A single successful check over some number of attempts is sufficient.\n\n    # TODO: if the number of tasks in celery is much lower than than the taskset length\n    # we might be able to shortcut the lookup since by definition some of the tasks\n    # must not exist in celery.\n\n    tasks_scanned = 0\n    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad\n\n    for member in r.sscan_iter(redis_connector.delete.taskset_key):\n        tasks_scanned += 1\n\n        member_bytes = cast(bytes, member)\n        member_str = member_bytes.decode(\"utf-8\")\n        if member_str in queued_upsert_tasks:\n            continue\n\n        tasks_not_in_celery += 1\n\n    task_logger.info(\n        f\"validate_connector_deletion_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}\"\n    )\n\n    # we're active if there are still tasks to run and those tasks all exist in celery\n    if tasks_scanned > 0 and tasks_not_in_celery == 0:\n        redis_connector.delete.set_active()\n        return\n\n    # we may want to enable this check if using the active task list somehow isn't good enough\n    # if redis_connector_index.generator_locked():\n    #     logger.info(f\"{payload.celery_task_id} is currently executing.\")\n\n    # if we get here, we didn't find any direct indication that the associated celery tasks exist,\n    # but they still might be there due to gaps in our ability to check states during transitions\n    # Checking the active signal safeguards us against these transition periods\n    # (which has a duration that allows us to bridge those gaps)\n    if redis_connector.delete.active():\n        return\n\n    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.\n    task_logger.warning(\n        \"validate_connector_deletion_fence - \"\n        \"Resetting fence because no associated celery tasks were found: \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"fence={fence_key}\"\n    )\n\n    redis_connector.delete.reset()\n    return\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/docfetching/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/docfetching/task_creation_utils.py",
    "content": "from uuid import uuid4\n\nfrom celery import Celery\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.index_attempt import mark_attempt_failed\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import SearchSettings\n\n\ndef try_creating_docfetching_task(\n    celery_app: Celery,\n    cc_pair: ConnectorCredentialPair,\n    search_settings: SearchSettings,\n    reindex: bool,\n    db_session: Session,\n    r: Redis,\n    tenant_id: str,\n) -> int | None:\n    \"\"\"Checks for any conditions that should block the indexing task from being\n    created, then creates the task.\n\n    Does not check for scheduling related conditions as this function\n    is used to trigger indexing immediately.\n\n    Now uses database-based coordination instead of Redis fencing.\n    \"\"\"\n\n    LOCK_TIMEOUT = 30\n\n    # we need to serialize any attempt to trigger indexing since it can be triggered\n    # either via celery beat or manually (API call)\n    lock: RedisLock = r.lock(\n        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + \"try_creating_indexing_task\",\n        timeout=LOCK_TIMEOUT,\n    )\n\n    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)\n    if not acquired:\n        return None\n\n    index_attempt_id = None\n    try:\n        # Basic status checks\n        db_session.refresh(cc_pair)\n        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n            return None\n\n        # Generate custom task ID for tracking\n        custom_task_id = f\"docfetching_{cc_pair.id}_{search_settings.id}_{uuid4()}\"\n\n        # Try to create a new index attempt using database coordination\n        # This replaces the Redis fencing mechanism\n        index_attempt_id = IndexingCoordination.try_create_index_attempt(\n            db_session=db_session,\n            cc_pair_id=cc_pair.id,\n            search_settings_id=search_settings.id,\n            celery_task_id=custom_task_id,\n            from_beginning=reindex,\n        )\n\n        if index_attempt_id is None:\n            # Another indexing attempt is already running\n            return None\n\n        # Use higher priority for first-time indexing to ensure new connectors\n        # get processed before re-indexing of existing connectors\n        has_successful_attempt = cc_pair.last_successful_index_time is not None\n        priority = (\n            OnyxCeleryPriority.MEDIUM\n            if has_successful_attempt\n            else OnyxCeleryPriority.HIGH\n        )\n\n        # Send the task to Celery\n        result = celery_app.send_task(\n            OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,\n            kwargs=dict(\n                index_attempt_id=index_attempt_id,\n                cc_pair_id=cc_pair.id,\n                search_settings_id=search_settings.id,\n                tenant_id=tenant_id,\n            ),\n            queue=OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,\n            task_id=custom_task_id,\n            priority=priority,\n        )\n        if not result:\n            raise RuntimeError(\"send_task for connector_doc_fetching_task failed.\")\n\n        task_logger.info(\n            f\"Created docfetching task: \"\n            f\"cc_pair={cc_pair.id} \"\n            f\"search_settings={search_settings.id} \"\n            f\"attempt_id={index_attempt_id} \"\n            f\"celery_task_id={custom_task_id}\"\n        )\n\n        return index_attempt_id\n\n    except Exception:\n        task_logger.exception(\n            f\"try_creating_indexing_task - Unexpected exception: cc_pair={cc_pair.id} search_settings={search_settings.id}\"\n        )\n\n        # Clean up on failure\n        if index_attempt_id is not None:\n            mark_attempt_failed(index_attempt_id, db_session)\n\n        return None\n    finally:\n        if lock.owned():\n            lock.release()\n\n    return index_attempt_id\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/docfetching/tasks.py",
    "content": "import multiprocessing\nimport os\nimport time\nimport traceback\nfrom time import sleep\n\nimport sentry_sdk\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\n\nfrom onyx import __version__\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.memory_monitoring import emit_process_memory\nfrom onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat\nfrom onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat\nfrom onyx.background.celery.tasks.docprocessing.tasks import ConnectorIndexingLogBuilder\nfrom onyx.background.celery.tasks.docprocessing.utils import IndexingCallback\nfrom onyx.background.celery.tasks.models import DocProcessingContext\nfrom onyx.background.celery.tasks.models import IndexingWatchdogTerminalStatus\nfrom onyx.background.celery.tasks.models import SimpleJobResult\nfrom onyx.background.indexing.job_client import SimpleJob\nfrom onyx.background.indexing.job_client import SimpleJobClient\nfrom onyx.background.indexing.job_client import SimpleJobException\nfrom onyx.background.indexing.run_docfetching import run_docfetching_entrypoint\nfrom onyx.configs.constants import CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.index_attempt import mark_attempt_canceled\nfrom onyx.db.index_attempt import mark_attempt_failed\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import global_version\nfrom shared_configs.configs import SENTRY_DSN\n\nlogger = setup_logger()\n\n\ndef _verify_indexing_attempt(\n    index_attempt_id: int,\n    cc_pair_id: int,\n    search_settings_id: int,\n) -> None:\n    \"\"\"\n    Verify that the indexing attempt exists and is in the correct state.\n    \"\"\"\n\n    with get_session_with_current_tenant() as db_session:\n        attempt = get_index_attempt(db_session, index_attempt_id)\n\n        if not attempt:\n            raise SimpleJobException(\n                f\"docfetching_task - IndexAttempt not found: attempt_id={index_attempt_id}\",\n                code=IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND.code,\n            )\n\n        if attempt.connector_credential_pair_id != cc_pair_id:\n            raise SimpleJobException(\n                f\"docfetching_task - CC pair mismatch: expected={cc_pair_id} actual={attempt.connector_credential_pair_id}\",\n                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,\n            )\n\n        if attempt.search_settings_id != search_settings_id:\n            raise SimpleJobException(\n                f\"docfetching_task - Search settings mismatch: expected={search_settings_id} actual={attempt.search_settings_id}\",\n                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,\n            )\n\n        if attempt.status not in [\n            IndexingStatus.NOT_STARTED,\n            IndexingStatus.IN_PROGRESS,\n        ]:\n            raise SimpleJobException(\n                f\"docfetching_task - Invalid attempt status: attempt_id={index_attempt_id} status={attempt.status}\",\n                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,\n            )\n\n        # Check for cancellation\n        if IndexingCoordination.check_cancellation_requested(\n            db_session, index_attempt_id\n        ):\n            raise SimpleJobException(\n                f\"docfetching_task - Cancellation requested: attempt_id={index_attempt_id}\",\n                code=IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL.code,\n            )\n\n    logger.info(\n        f\"docfetching_task - IndexAttempt verified: \"\n        f\"attempt_id={index_attempt_id} \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"search_settings={search_settings_id}\"\n    )\n\n\ndef docfetching_task(\n    app: Celery,\n    index_attempt_id: int,\n    cc_pair_id: int,\n    search_settings_id: int,\n    is_ee: bool,\n    tenant_id: str,\n) -> None:\n    \"\"\"\n    This function is run in a SimpleJob as a new process. It is responsible for validating\n    some stuff, but basically it just calls run_indexing_entrypoint.\n\n    NOTE: if an exception is raised out of this task, the primary worker will detect\n    that the task transitioned to a \"READY\" state but the generator_complete_key doesn't exist.\n    This will cause the primary worker to abort the indexing attempt and clean up.\n    \"\"\"\n\n    # Start heartbeat for this indexing attempt\n    heartbeat_thread, stop_event = start_heartbeat(index_attempt_id)\n    try:\n        _docfetching_task(\n            app, index_attempt_id, cc_pair_id, search_settings_id, is_ee, tenant_id\n        )\n    finally:\n        stop_heartbeat(heartbeat_thread, stop_event)  # Stop heartbeat before exiting\n\n\ndef _docfetching_task(\n    app: Celery,\n    index_attempt_id: int,\n    cc_pair_id: int,\n    search_settings_id: int,\n    is_ee: bool,\n    tenant_id: str,\n) -> None:\n    # Since connector_indexing_proxy_task spawns a new process using this function as\n    # the entrypoint, we init Sentry here.\n    if SENTRY_DSN:\n        sentry_sdk.init(\n            dsn=SENTRY_DSN,\n            traces_sample_rate=0.1,\n            release=__version__,\n        )\n        logger.info(\"Sentry initialized\")\n    else:\n        logger.debug(\"Sentry DSN not provided, skipping Sentry initialization\")\n\n    logger.info(\n        f\"Indexing spawned task starting: \"\n        f\"attempt={index_attempt_id} \"\n        f\"tenant={tenant_id} \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"search_settings={search_settings_id}\"\n    )\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    # TODO: remove all fences, cause all signals to be set in postgres\n    if redis_connector.delete.fenced:\n        raise SimpleJobException(\n            f\"Indexing will not start because connector deletion is in progress: \"\n            f\"attempt={index_attempt_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={redis_connector.delete.fence_key}\",\n            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION.code,\n        )\n\n    if redis_connector.stop.fenced:\n        raise SimpleJobException(\n            f\"Indexing will not start because a connector stop signal was detected: \"\n            f\"attempt={index_attempt_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={redis_connector.stop.fence_key}\",\n            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL.code,\n        )\n\n    # Verify the indexing attempt exists and is valid\n    # This replaces the Redis fence payload waiting\n    _verify_indexing_attempt(index_attempt_id, cc_pair_id, search_settings_id)\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            attempt = get_index_attempt(db_session, index_attempt_id)\n            if not attempt:\n                raise SimpleJobException(\n                    f\"Index attempt not found: index_attempt={index_attempt_id}\",\n                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,\n                )\n\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n            )\n\n            if not cc_pair:\n                raise SimpleJobException(\n                    f\"cc_pair not found: cc_pair={cc_pair_id}\",\n                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,\n                )\n\n        # define a callback class\n        callback = IndexingCallback(\n            redis_connector,\n        )\n\n        logger.info(\n            f\"Indexing spawned task running entrypoint: attempt={index_attempt_id} \"\n            f\"tenant={tenant_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"search_settings={search_settings_id}\"\n        )\n\n        # This is where the heavy/real work happens\n        run_docfetching_entrypoint(\n            app,\n            index_attempt_id,\n            tenant_id,\n            cc_pair_id,\n            is_ee,\n            callback=callback,\n        )\n\n    except ConnectorValidationError:\n        raise SimpleJobException(\n            f\"Indexing task failed: attempt={index_attempt_id} \"\n            f\"tenant={tenant_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"search_settings={search_settings_id}\",\n            code=IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR.code,\n        )\n\n    except Exception as e:\n        logger.exception(\n            f\"Indexing spawned task failed: attempt={index_attempt_id} \"\n            f\"tenant={tenant_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"search_settings={search_settings_id}\"\n        )\n\n        # special bulletproofing ... truncate long exception messages\n        # for exception types that require more args, this will fail\n        # thus the try/except\n        try:\n            sanitized_e = type(e)(str(e)[:1024])\n            sanitized_e.__traceback__ = e.__traceback__\n            raise sanitized_e\n        except Exception:\n            raise e\n\n    logger.info(\n        f\"Indexing spawned task finished: attempt={index_attempt_id} cc_pair={cc_pair_id} search_settings={search_settings_id}\"\n    )\n    os._exit(0)  # ensure process exits cleanly\n\n\ndef process_job_result(\n    job: SimpleJob,\n    connector_source: str | None,\n    index_attempt_id: int,\n    log_builder: ConnectorIndexingLogBuilder,\n) -> SimpleJobResult:\n    result = SimpleJobResult()\n    result.connector_source = connector_source\n\n    if job.process:\n        result.exit_code = job.process.exitcode\n\n    if job.status != \"error\":\n        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED\n        return result\n\n    ignore_exitcode = False\n\n    # In EKS, there is an edge case where successful tasks return exit\n    # code 1 in the cloud due to the set_spawn_method not sticking.\n    # Workaround: check that the total number of batches is set, since this only\n    # happens when docfetching completed successfully\n    with get_session_with_current_tenant() as db_session:\n        index_attempt = get_index_attempt(db_session, index_attempt_id)\n        if index_attempt and index_attempt.total_batches is not None:\n            ignore_exitcode = True\n\n    if ignore_exitcode:\n        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED\n        task_logger.warning(\n            log_builder.build(\n                \"Indexing watchdog - spawned task has non-zero exit code but completion signal is OK. Continuing...\",\n                exit_code=str(result.exit_code),\n            )\n        )\n    else:\n        if result.exit_code is not None:\n            result.status = IndexingWatchdogTerminalStatus.from_code(result.exit_code)\n\n        job_level_exception = job.exception()\n        result.exception_str = f\"Docfetching returned exit code {result.exit_code} with exception: {job_level_exception}\"\n\n    return result\n\n\n@shared_task(\n    name=OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,\n    bind=True,\n    acks_late=False,\n    track_started=True,\n)\ndef docfetching_proxy_task(\n    self: Task,\n    index_attempt_id: int,\n    cc_pair_id: int,\n    search_settings_id: int,\n    tenant_id: str,\n) -> None:\n    \"\"\"\n    This task is the entrypoint for the full indexing pipeline, which is composed of two tasks:\n    docfetching and docprocessing.\n    This task is spawned by \"try_creating_indexing_task\" which is called in the \"check_for_indexing\" task.\n\n    This task spawns a new process for a new scheduled index attempt. That\n    new process (which runs the docfetching_task function) does the following:\n\n    1)  determines parameters of the indexing attempt (which connector indexing function to run,\n        start and end time, from prev checkpoint or not), then run that connector. Specifically,\n        connectors are responsible for reading data from an outside source and converting it to Onyx documents.\n        At the moment these two steps (reading external data and converting to an Onyx document)\n        are not parallelized in most connectors; that's a subject for future work.\n\n    Each document batch produced by step 1 is stored in the file store, and a docprocessing task is spawned\n    to process it. docprocessing involves the steps listed below.\n\n    2) upserts documents to postgres (index_doc_batch_prepare)\n    3) chunks each document (optionally adds context for contextual rag)\n    4) embeds chunks (embed_chunks_with_failure_handling) via a call to the model server\n    5) write chunks to vespa (write_chunks_to_vector_db_with_backoff)\n    6) update document and indexing metadata in postgres\n    7) pulls all document IDs from the source and compares those IDs to locally stored documents and deletes\n    all locally stored IDs missing from the most recently pulled document ID list\n\n    Some important notes:\n    Invariants:\n    - docfetching proxy tasks are spawned by check_for_indexing. The proxy then runs the docfetching_task wrapped in a watchdog.\n      The watchdog is responsible for monitoring the docfetching_task and marking the index attempt as failed\n      if it is not making progress.\n    - All docprocessing tasks are spawned by a docfetching task.\n    - all docfetching tasks, docprocessing tasks, and document batches in the file store are\n      associated with a specific index attempt.\n    - the index attempt status is the source of truth for what is currently happening with the index attempt.\n      It is coupled with the creation/running of docfetching and docprocessing tasks as much as possible.\n\n    How we deal with failures/ partial indexing:\n    - non-checkpointed connectors/ new runs in general => delete the old document batches from the file store and do the new run\n    - checkpointed connectors + resuming from checkpoint => reissue the old document batches and do a new run\n\n    Misc:\n    - most inter-process communication is handled in postgres, some is still in redis and we're trying to remove it\n    - Heartbeat spawned in docfetching and docprocessing is how check_for_indexing monitors liveliness\n    - progress based liveliness check: if nothing is done in 3-6 hours, mark the attempt as failed\n    - TODO: task level timeouts (i.e. a connector stuck in an infinite loop)\n\n\n    Comments below are from the old version and some may no longer be valid.\n    TODO(rkuo): refactor this so that there is a single return path where we canonically\n    log the result of running this function.\n\n    Some more Richard notes:\n    celery out of process task execution strategy is pool=prefork, but it uses fork,\n    and forking is inherently unstable.\n\n    To work around this, we use pool=threads and proxy our work to a spawned task.\n\n    acks_late must be set to False. Otherwise, celery's visibility timeout will\n    cause any task that runs longer than the timeout to be redispatched by the broker.\n    There appears to be no good workaround for this, so we need to handle redispatching\n    manually.\n    NOTE: we try/except all db access in this function because as a watchdog, this function\n    needs to be extremely stable.\n    \"\"\"\n    # TODO: remove dependence on Redis\n    start = time.monotonic()\n\n    result = SimpleJobResult()\n\n    ctx = DocProcessingContext(\n        tenant_id=tenant_id,\n        cc_pair_id=cc_pair_id,\n        search_settings_id=search_settings_id,\n        index_attempt_id=index_attempt_id,\n    )\n\n    log_builder = ConnectorIndexingLogBuilder(ctx)\n\n    task_logger.info(\n        log_builder.build(\n            \"Indexing watchdog - starting\",\n            mp_start_method=str(multiprocessing.get_start_method()),\n        )\n    )\n\n    if not self.request.id:\n        task_logger.error(\"self.request.id is None!\")\n\n    client = SimpleJobClient()\n    task_logger.info(f\"submitting docfetching_task with tenant_id={tenant_id}\")\n\n    job = client.submit(\n        docfetching_task,\n        self.app,\n        index_attempt_id,\n        cc_pair_id,\n        search_settings_id,\n        global_version.is_ee_version(),\n        tenant_id,\n    )\n\n    if not job or not job.process:\n        result.status = IndexingWatchdogTerminalStatus.SPAWN_FAILED\n        task_logger.info(\n            log_builder.build(\n                \"Indexing watchdog - finished\",\n                status=str(result.status.value),\n                exit_code=str(result.exit_code),\n            )\n        )\n        return\n\n    # Ensure the process has moved out of the starting state\n    num_waits = 0\n    while True:\n        if num_waits > 15:\n            result.status = IndexingWatchdogTerminalStatus.SPAWN_NOT_ALIVE\n            task_logger.info(\n                log_builder.build(\n                    \"Indexing watchdog - finished\",\n                    status=str(result.status.value),\n                    exit_code=str(result.exit_code),\n                )\n            )\n            job.release()\n            return\n\n        if job.process.is_alive() or job.process.exitcode is not None:\n            break\n\n        sleep(1)\n        num_waits += 1\n\n    task_logger.info(\n        log_builder.build(\n            \"Indexing watchdog - spawn succeeded\",\n            pid=str(job.process.pid),\n        )\n    )\n\n    # Track the last time memory info was emitted\n    last_memory_emit_time = 0.0\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            index_attempt = get_index_attempt(\n                db_session=db_session,\n                index_attempt_id=index_attempt_id,\n                eager_load_cc_pair=True,\n            )\n            if not index_attempt:\n                raise RuntimeError(\"Index attempt not found\")\n\n            result.connector_source = (\n                index_attempt.connector_credential_pair.connector.source.value\n            )\n\n        while True:\n            sleep(5)\n\n            time.monotonic()\n\n            # if the job is done, clean up and break\n            if job.done():\n                try:\n                    result = process_job_result(\n                        job, result.connector_source, index_attempt_id, log_builder\n                    )\n                except Exception:\n                    task_logger.exception(\n                        log_builder.build(\n                            \"Indexing watchdog - spawned task exceptioned\"\n                        )\n                    )\n                finally:\n                    job.release()\n                    break\n\n            # log the memory usage for tracking down memory leaks / connector-specific memory issues\n            pid = job.process.pid\n            if pid is not None:\n                # Only emit memory info once per minute (60 seconds)\n                current_time = time.monotonic()\n                if current_time - last_memory_emit_time >= 60.0:\n                    emit_process_memory(\n                        pid,\n                        \"indexing_worker\",\n                        {\n                            \"cc_pair_id\": cc_pair_id,\n                            \"search_settings_id\": search_settings_id,\n                            \"index_attempt_id\": index_attempt_id,\n                        },\n                    )\n                    last_memory_emit_time = current_time\n\n            # if the spawned task is still running, restart the check once again\n            # if the index attempt is not in a finished status\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    index_attempt = get_index_attempt(\n                        db_session=db_session, index_attempt_id=index_attempt_id\n                    )\n\n                    if not index_attempt:\n                        continue\n\n                    if not index_attempt.is_finished():\n                        continue\n\n            except Exception:\n                task_logger.exception(\n                    log_builder.build(\n                        \"Indexing watchdog - transient exception looking up index attempt\"\n                    )\n                )\n                continue\n\n    except Exception as e:\n        result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED\n        if isinstance(e, ConnectorValidationError):\n            # No need to expose full stack trace for validation errors\n            result.exception_str = str(e)\n        else:\n            result.exception_str = traceback.format_exc()\n\n    # handle exit and reporting\n    elapsed = time.monotonic() - start\n    if result.exception_str is not None:\n        # print with exception\n        try:\n            with get_session_with_current_tenant() as db_session:\n                attempt = get_index_attempt(db_session, ctx.index_attempt_id)\n\n                # only mark failures if not already terminal,\n                # otherwise we're overwriting potential real stack traces\n                if attempt and not attempt.status.is_terminal():\n                    failure_reason = (\n                        f\"Spawned task exceptioned: exit_code={result.exit_code}\"\n                    )\n                    mark_attempt_failed(\n                        ctx.index_attempt_id,\n                        db_session,\n                        failure_reason=failure_reason,\n                        full_exception_trace=result.exception_str,\n                    )\n        except Exception:\n            task_logger.exception(\n                log_builder.build(\n                    \"Indexing watchdog - transient exception marking index attempt as failed\"\n                )\n            )\n\n        normalized_exception_str = \"None\"\n        if result.exception_str:\n            normalized_exception_str = result.exception_str.replace(\n                \"\\n\", \"\\\\n\"\n            ).replace('\"', '\\\\\"')\n\n        task_logger.warning(\n            log_builder.build(\n                \"Indexing watchdog - finished\",\n                source=result.connector_source,\n                status=result.status.value,\n                exit_code=str(result.exit_code),\n                exception=f'\"{normalized_exception_str}\"',\n                elapsed=f\"{elapsed:.2f}s\",\n            )\n        )\n        raise RuntimeError(f\"Exception encountered: traceback={result.exception_str}\")\n\n    # print without exception\n    if result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL:\n        try:\n            with get_session_with_current_tenant() as db_session:\n                logger.exception(\n                    f\"Marking attempt {index_attempt_id} as canceled due to termination signal\"\n                )\n                mark_attempt_canceled(\n                    index_attempt_id,\n                    db_session,\n                    \"Connector termination signal detected\",\n                )\n        except Exception:\n            task_logger.exception(\n                log_builder.build(\n                    \"Indexing watchdog - transient exception marking index attempt as canceled\"\n                )\n            )\n\n        job.cancel()\n    elif result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_ACTIVITY_TIMEOUT:\n        try:\n            with get_session_with_current_tenant() as db_session:\n                mark_attempt_failed(\n                    index_attempt_id,\n                    db_session,\n                    \"Indexing watchdog - activity timeout exceeded: \"\n                    f\"attempt={index_attempt_id} \"\n                    f\"timeout={CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s\",\n                )\n        except Exception:\n            logger.exception(\n                log_builder.build(\n                    \"Indexing watchdog - transient exception marking index attempt as failed\"\n                )\n            )\n        job.cancel()\n    else:\n        pass\n\n    task_logger.info(\n        log_builder.build(\n            \"Indexing watchdog - finished\",\n            source=result.connector_source,\n            status=str(result.status.value),\n            exit_code=str(result.exit_code),\n            elapsed=f\"{elapsed:.2f}s\",\n        )\n    )\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/docprocessing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/docprocessing/heartbeat.py",
    "content": "import contextvars\nimport threading\n\nfrom sqlalchemy import update\n\nfrom onyx.configs.constants import INDEXING_WORKER_HEARTBEAT_INTERVAL\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import IndexAttempt\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef start_heartbeat(index_attempt_id: int) -> tuple[threading.Thread, threading.Event]:\n    \"\"\"Start a heartbeat thread for the given index attempt\"\"\"\n    stop_event = threading.Event()\n\n    def heartbeat_loop() -> None:\n        while not stop_event.wait(INDEXING_WORKER_HEARTBEAT_INTERVAL):\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    db_session.execute(\n                        update(IndexAttempt)\n                        .where(IndexAttempt.id == index_attempt_id)\n                        .values(heartbeat_counter=IndexAttempt.heartbeat_counter + 1)\n                    )\n                    db_session.commit()\n            except Exception:\n                logger.exception(\n                    \"Failed to update heartbeat counter for index attempt %s\",\n                    index_attempt_id,\n                )\n\n    # Ensure contextvars from the outer context are available in the thread\n    context = contextvars.copy_context()\n    thread = threading.Thread(target=context.run, args=(heartbeat_loop,), daemon=True)\n    thread.start()\n    return thread, stop_event\n\n\ndef stop_heartbeat(thread: threading.Thread, stop_event: threading.Event) -> None:\n    \"\"\"Stop the heartbeat thread\"\"\"\n    stop_event.set()\n    thread.join(timeout=5)  # Wait up to 5 seconds for clean shutdown\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/docprocessing/tasks.py",
    "content": "import gc\nimport os\nimport time\nimport traceback\nfrom collections import defaultdict\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom fastapi import HTTPException\nfrom pydantic import BaseModel\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy import exists\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_find_task\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.background.celery.celery_utils import httpx_init_vespa_pool\nfrom onyx.background.celery.memory_monitoring import emit_process_memory\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.background.celery.tasks.docfetching.task_creation_utils import (\n    try_creating_docfetching_task,\n)\nfrom onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat\nfrom onyx.background.celery.tasks.docprocessing.heartbeat import stop_heartbeat\nfrom onyx.background.celery.tasks.docprocessing.utils import IndexingCallback\nfrom onyx.background.celery.tasks.docprocessing.utils import is_in_repeated_error_state\nfrom onyx.background.celery.tasks.docprocessing.utils import should_index\nfrom onyx.background.celery.tasks.models import DocProcessingContext\nfrom onyx.background.indexing.checkpointing_utils import cleanup_checkpoint\nfrom onyx.background.indexing.checkpointing_utils import (\n    get_index_attempts_with_old_checkpoints,\n)\nfrom onyx.background.indexing.index_attempt_utils import cleanup_index_attempts\nfrom onyx.background.indexing.index_attempt_utils import get_old_index_attempts\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import MANAGED_VESPA\nfrom onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH\nfrom onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import OnyxRedisSignals\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.db.connector import mark_ccpair_with_indexing_trigger\nfrom onyx.db.connector_credential_pair import (\n    fetch_indexable_standard_connector_credential_pair_ids,\n)\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.connector_credential_pair import set_cc_pair_repeated_error_state\nfrom onyx.db.connector_credential_pair import update_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingMode\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import SwitchoverType\nfrom onyx.db.index_attempt import create_index_attempt_error\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair\nfrom onyx.db.index_attempt import IndexAttemptError\nfrom onyx.db.index_attempt import mark_attempt_canceled\nfrom onyx.db.index_attempt import mark_attempt_failed\nfrom onyx.db.index_attempt import mark_attempt_partially_succeeded\nfrom onyx.db.index_attempt import mark_attempt_succeeded\nfrom onyx.db.indexing_coordination import CoordinationStatus\nfrom onyx.db.indexing_coordination import INDEXING_PROGRESS_TIMEOUT_HOURS\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import get_secondary_search_settings\nfrom onyx.db.swap_index import check_and_perform_index_swap\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.file_store.document_batch_storage import DocumentBatchStorage\nfrom onyx.file_store.document_batch_storage import get_document_batch_storage\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.indexing.adapters.document_indexing_adapter import (\n    DocumentIndexingBatchAdapter,\n)\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_pipeline import run_indexing_pipeline\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT\nfrom onyx.redis.redis_utils import is_fence\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.middleware import make_randomized_onyx_request_id\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_HOST\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_PORT\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import USAGE_LIMITS_ENABLED\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR\n\nlogger = setup_logger()\n\nDOCPROCESSING_STALL_TIMEOUT_MULTIPLIER = 4\nDOCPROCESSING_HEARTBEAT_TIMEOUT_MULTIPLIER = 24\n# Heartbeat timeout: if no heartbeat received for 30 minutes, consider it dead\n# This should be much longer than INDEXING_WORKER_HEARTBEAT_INTERVAL (30s)\nHEARTBEAT_TIMEOUT_SECONDS = 30 * 60  # 30 minutes\nINDEX_ATTEMPT_BATCH_SIZE = 500\n\n\ndef _get_fence_validation_block_expiration() -> int:\n    \"\"\"\n    Compute the expiration time for the fence validation block signal.\n    Base expiration is 60 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.\n    \"\"\"\n    base_expiration = 60  # seconds\n\n    if not MULTI_TENANT:\n        return base_expiration\n\n    try:\n        beat_multiplier = OnyxRuntime.get_beat_multiplier()\n    except Exception:\n        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n    return int(base_expiration * beat_multiplier)\n\n\ndef validate_active_indexing_attempts(\n    lock_beat: RedisLock,\n) -> None:\n    \"\"\"\n    Validates that active indexing attempts are still alive by checking heartbeat.\n    If no heartbeat has been received for a certain amount of time, mark the attempt as failed.\n\n    This uses the heartbeat_counter field which is incremented by active worker threads\n    every INDEXING_WORKER_HEARTBEAT_INTERVAL seconds.\n    \"\"\"\n    logger.info(\"Validating active indexing attempts\")\n\n    with get_session_with_current_tenant() as db_session:\n        # Find all active indexing attempts\n        active_attempts = (\n            db_session.execute(\n                select(IndexAttempt).where(\n                    IndexAttempt.status.in_([IndexingStatus.IN_PROGRESS]),\n                    IndexAttempt.celery_task_id.isnot(None),\n                )\n            )\n            .scalars()\n            .all()\n        )\n\n        for attempt in active_attempts:\n            lock_beat.reacquire()\n\n            # Initialize timeout for each attempt to prevent state pollution\n            heartbeat_timeout_seconds = HEARTBEAT_TIMEOUT_SECONDS\n\n            # Double-check the attempt still exists and has the same status\n            fresh_attempt = get_index_attempt(db_session, attempt.id)\n            if not fresh_attempt or fresh_attempt.status.is_terminal():\n                continue\n\n            # Check if this attempt has been updated with heartbeat tracking\n            if fresh_attempt.last_heartbeat_time is None:\n                # First time seeing this attempt - initialize heartbeat tracking\n                fresh_attempt.last_heartbeat_value = fresh_attempt.heartbeat_counter\n                fresh_attempt.last_heartbeat_time = datetime.now(timezone.utc)\n                db_session.commit()\n\n                task_logger.info(\n                    f\"Initialized heartbeat tracking for attempt {fresh_attempt.id}: counter={fresh_attempt.heartbeat_counter}\"\n                )\n                continue\n\n            # Check if the heartbeat counter has advanced since last check\n            current_counter = fresh_attempt.heartbeat_counter\n            last_known_counter = fresh_attempt.last_heartbeat_value\n            last_check_time = fresh_attempt.last_heartbeat_time\n\n            task_logger.debug(\n                f\"Checking heartbeat for attempt {fresh_attempt.id}: \"\n                f\"current_counter={current_counter} \"\n                f\"last_known_counter={last_known_counter} \"\n                f\"last_check_time={last_check_time}\"\n            )\n\n            if current_counter > last_known_counter:\n                # Heartbeat has advanced - worker is alive\n                fresh_attempt.last_heartbeat_value = current_counter\n                fresh_attempt.last_heartbeat_time = datetime.now(timezone.utc)\n                db_session.commit()\n\n                task_logger.debug(\n                    f\"Heartbeat advanced for attempt {fresh_attempt.id}: new_counter={current_counter}\"\n                )\n                continue\n\n            if fresh_attempt.total_batches and fresh_attempt.completed_batches == 0:\n                heartbeat_timeout_seconds = (\n                    HEARTBEAT_TIMEOUT_SECONDS\n                    * DOCPROCESSING_HEARTBEAT_TIMEOUT_MULTIPLIER\n                )\n            cutoff_time = datetime.now(timezone.utc) - timedelta(\n                seconds=heartbeat_timeout_seconds\n            )\n\n            # Heartbeat hasn't advanced - check if it's been too long\n            if last_check_time >= cutoff_time:\n                task_logger.debug(\n                    f\"Heartbeat hasn't advanced for attempt {fresh_attempt.id} but still within timeout window\"\n                )\n                continue\n\n            # No heartbeat for too long - mark as failed\n            failure_reason = (\n                f\"No heartbeat received for {heartbeat_timeout_seconds} seconds\"\n            )\n\n            task_logger.warning(\n                f\"Heartbeat timeout for attempt {fresh_attempt.id}: \"\n                f\"last_heartbeat_time={last_check_time} \"\n                f\"cutoff_time={cutoff_time} \"\n                f\"counter={current_counter}\"\n            )\n\n            try:\n                mark_attempt_failed(\n                    fresh_attempt.id,\n                    db_session,\n                    failure_reason=failure_reason,\n                )\n\n                task_logger.error(\n                    f\"Marked attempt {fresh_attempt.id} as failed due to heartbeat timeout\"\n                )\n\n            except Exception:\n                task_logger.exception(\n                    f\"Failed to mark attempt {fresh_attempt.id} as failed due to heartbeat timeout\"\n                )\n\n\nclass ConnectorIndexingLogBuilder:\n    def __init__(self, ctx: DocProcessingContext):\n        self.ctx = ctx\n\n    def build(self, msg: str, **kwargs: Any) -> str:\n        msg_final = (\n            f\"{msg}: \"\n            f\"tenant_id={self.ctx.tenant_id} \"\n            f\"attempt={self.ctx.index_attempt_id} \"\n            f\"cc_pair={self.ctx.cc_pair_id} \"\n            f\"search_settings={self.ctx.search_settings_id}\"\n        )\n\n        # Append extra keyword arguments in logfmt style\n        if kwargs:\n            extra_logfmt = \" \".join(f\"{key}={value}\" for key, value in kwargs.items())\n            msg_final = f\"{msg_final} {extra_logfmt}\"\n\n        return msg_final\n\n\ndef monitor_indexing_attempt_progress(\n    attempt: IndexAttempt, tenant_id: str, db_session: Session, task: Task\n) -> None:\n    \"\"\"\n    TODO: rewrite this docstring\n    Monitor the progress of an indexing attempt using database coordination.\n    This replaces the Redis fence-based monitoring.\n\n    Race condition handling:\n    - Uses database coordination status to track progress\n    - Only updates CC pair status based on confirmed database state\n    - Handles concurrent completion gracefully\n    \"\"\"\n    if not attempt.celery_task_id:\n        # Attempt hasn't been assigned a task yet\n        return\n\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session, attempt.connector_credential_pair_id\n    )\n    if not cc_pair:\n        task_logger.warning(f\"CC pair not found for attempt {attempt.id}\")\n        return\n\n    # Check if the CC Pair should be moved to INITIAL_INDEXING\n    if cc_pair.status == ConnectorCredentialPairStatus.SCHEDULED:\n        cc_pair.status = ConnectorCredentialPairStatus.INITIAL_INDEXING\n        db_session.commit()\n\n    # Get coordination status to track progress\n\n    coordination_status = IndexingCoordination.get_coordination_status(\n        db_session, attempt.id\n    )\n\n    current_db_time = get_db_current_time(db_session)\n    total_batches: int | str = (\n        coordination_status.total_batches\n        if coordination_status.total_batches is not None\n        else \"?\"\n    )\n    if coordination_status.found:\n        task_logger.info(\n            f\"Indexing attempt progress: \"\n            f\"attempt={attempt.id} \"\n            f\"cc_pair={attempt.connector_credential_pair_id} \"\n            f\"search_settings={attempt.search_settings_id} \"\n            f\"completed_batches={coordination_status.completed_batches} \"\n            f\"total_batches={total_batches} \"\n            f\"total_docs={coordination_status.total_docs} \"\n            f\"total_failures={coordination_status.total_failures}\"\n            f\"elapsed={(current_db_time - attempt.time_created).seconds}\"\n        )\n\n    if coordination_status.cancellation_requested:\n        task_logger.info(f\"Indexing attempt {attempt.id} has been cancelled\")\n        mark_attempt_canceled(attempt.id, db_session)\n        return\n\n    storage = get_document_batch_storage(\n        attempt.connector_credential_pair_id, attempt.id\n    )\n\n    # Check task completion using Celery\n    try:\n        check_indexing_completion(\n            attempt.id, coordination_status, storage, tenant_id, task\n        )\n    except Exception as e:\n        logger.exception(\n            f\"Failed to monitor document processing completion: attempt={attempt.id} error={str(e)}\"\n        )\n\n        # Mark the attempt as failed if monitoring fails\n        try:\n            with get_session_with_current_tenant() as db_session:\n                mark_attempt_failed(\n                    attempt.id,\n                    db_session,\n                    failure_reason=f\"Processing monitoring failed: {str(e)}\",\n                    full_exception_trace=traceback.format_exc(),\n                )\n\n        except Exception:\n            logger.exception(\"Failed to mark attempt as failed\")\n\n        # Try to clean up storage\n        try:\n            logger.info(f\"Cleaning up storage after monitoring failure: {storage}\")\n            storage.cleanup_all_batches()\n        except Exception:\n            logger.exception(\"Failed to cleanup storage after monitoring failure\")\n\n\ndef _resolve_indexing_entity_errors(\n    cc_pair_id: int,\n    db_session: Session,\n) -> None:\n    unresolved_errors = get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair_id,\n        unresolved_only=True,\n        db_session=db_session,\n    )\n    for error in unresolved_errors:\n        if error.entity_id:\n            error.is_resolved = True\n            db_session.add(error)\n    db_session.commit()\n\n\ndef check_indexing_completion(\n    index_attempt_id: int,\n    coordination_status: CoordinationStatus,\n    storage: DocumentBatchStorage,\n    tenant_id: str,\n    task: Task,\n) -> None:\n\n    logger.info(\n        f\"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}\"\n    )\n\n    # Check if indexing is complete and all batches are processed\n    batches_total = coordination_status.total_batches\n    batches_processed = coordination_status.completed_batches\n    indexing_completed = (\n        batches_total is not None and batches_processed >= batches_total\n    )\n\n    logger.info(\n        f\"Indexing status: \"\n        f\"indexing_completed={indexing_completed} \"\n        f\"batches_processed={batches_processed}/{batches_total if batches_total is not None else '?'} \"\n        f\"total_docs={coordination_status.total_docs} \"\n        f\"total_chunks={coordination_status.total_chunks} \"\n        f\"total_failures={coordination_status.total_failures}\"\n    )\n\n    # Update progress tracking and check for stalls\n    with get_session_with_current_tenant() as db_session:\n        stalled_timeout_hours = INDEXING_PROGRESS_TIMEOUT_HOURS\n        # Index attempts that are waiting between docfetching and\n        # docprocessing get a generous stalling timeout\n        if batches_total is not None and batches_processed == 0:\n            stalled_timeout_hours = (\n                stalled_timeout_hours * DOCPROCESSING_STALL_TIMEOUT_MULTIPLIER\n            )\n\n        timed_out = not IndexingCoordination.update_progress_tracking(\n            db_session,\n            index_attempt_id,\n            batches_processed,\n            timeout_hours=stalled_timeout_hours,\n        )\n\n        # Check for stalls (3-6 hour timeout). Only applies to in-progress attempts.\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        if attempt and timed_out:\n            if attempt.status == IndexingStatus.IN_PROGRESS:\n                logger.error(\n                    f\"Indexing attempt {index_attempt_id} has been indexing for \"\n                    f\"{stalled_timeout_hours // 2}-{stalled_timeout_hours} hours without progress. \"\n                    f\"Marking it as failed.\"\n                )\n                mark_attempt_failed(\n                    index_attempt_id, db_session, failure_reason=\"Stalled indexing\"\n                )\n            elif (\n                attempt.status == IndexingStatus.NOT_STARTED and attempt.celery_task_id\n            ):\n                # Check if the task exists in the celery queue\n                # This handles the case where Redis dies after task creation but before task execution\n                redis_celery = celery_get_broker_client(task.app)\n                task_exists = celery_find_task(\n                    attempt.celery_task_id,\n                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,\n                    redis_celery,\n                )\n                unacked_task_ids = celery_get_unacked_task_ids(\n                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, redis_celery\n                )\n\n                if not task_exists and attempt.celery_task_id not in unacked_task_ids:\n                    # there is a race condition where the docfetching task has been taken off\n                    # the queues (i.e. started) but the indexing attempt still has a status of\n                    # Not Started because the switch to in progress takes like 0.1 seconds.\n                    # sleep a bit and confirm that the attempt is still not in progress.\n                    time.sleep(1)\n                    attempt = get_index_attempt(db_session, index_attempt_id)\n                    if attempt and attempt.status == IndexingStatus.NOT_STARTED:\n                        logger.error(\n                            f\"Task {attempt.celery_task_id} attached to indexing attempt \"\n                            f\"{index_attempt_id} does not exist in the queue. \"\n                            f\"Marking indexing attempt as failed.\"\n                        )\n                        mark_attempt_failed(\n                            index_attempt_id,\n                            db_session,\n                            failure_reason=\"Task not in queue\",\n                        )\n            else:\n                logger.info(\n                    f\"Indexing attempt {index_attempt_id} is {attempt.status}. 3-6 hours without heartbeat \"\n                    \"but task is in the queue. Likely underprovisioned docfetching worker.\"\n                )\n                # Update last progress time so we won't time out again for another 3 hours\n                IndexingCoordination.update_progress_tracking(\n                    db_session,\n                    index_attempt_id,\n                    batches_processed,\n                    force_update_progress=True,\n                )\n\n    # check again on the next check_for_indexing task\n    # TODO: on the cloud this is currently 25 minutes at most, which\n    # is honestly too slow. We should either increase the frequency of\n    # this task or change where we check for completion.\n    if not indexing_completed:\n        return\n\n    # If processing is complete, handle completion\n    logger.info(f\"Connector indexing finished for index attempt {index_attempt_id}.\")\n\n    # All processing is complete\n    total_failures = coordination_status.total_failures\n\n    with get_session_with_current_tenant() as db_session:\n        if total_failures == 0:\n            attempt = mark_attempt_succeeded(index_attempt_id, db_session)\n            logger.info(f\"Index attempt {index_attempt_id} completed successfully\")\n        else:\n            attempt = mark_attempt_partially_succeeded(index_attempt_id, db_session)\n            logger.info(\n                f\"Index attempt {index_attempt_id} completed with {total_failures} failures\"\n            )\n\n        # Update CC pair status if successful\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session, attempt.connector_credential_pair_id\n        )\n        if cc_pair is None:\n            raise RuntimeError(\n                f\"CC pair {attempt.connector_credential_pair_id} not found in database\"\n            )\n\n        if attempt.status.is_successful():\n            # NOTE: we define the last successful index time as the time the last successful\n            # attempt finished. This is distinct from the poll_range_end of the last successful\n            # attempt, which is the time up to which documents have been fetched.\n            cc_pair.last_successful_index_time = attempt.time_updated\n            if cc_pair.status in [\n                ConnectorCredentialPairStatus.SCHEDULED,\n                ConnectorCredentialPairStatus.INITIAL_INDEXING,\n            ]:\n                # User file connectors must be paused on success\n                # NOTE: _run_indexing doesn't update connectors if the index attempt is the future embedding model\n                cc_pair.status = ConnectorCredentialPairStatus.ACTIVE\n                db_session.commit()\n\n            mt_cloud_telemetry(\n                tenant_id=tenant_id,\n                distinct_id=tenant_id,\n                event=MilestoneRecordType.CONNECTOR_SUCCEEDED,\n            )\n\n            # Clear repeated error state on success\n            if cc_pair.in_repeated_error_state:\n                cc_pair.in_repeated_error_state = False\n                db_session.commit()\n\n            if attempt.status == IndexingStatus.SUCCESS:\n                logger.info(\n                    f\"Resolving indexing entity errors for attempt {index_attempt_id}\"\n                )\n                _resolve_indexing_entity_errors(\n                    cc_pair_id=attempt.connector_credential_pair_id,\n                    db_session=db_session,\n                )\n\n    # Clean up FileStore storage (still needed for document batches during transition)\n    try:\n        logger.info(f\"Cleaning up storage after indexing completion: {storage}\")\n        storage.cleanup_all_batches()\n    except Exception:\n        logger.exception(\"Failed to clean up document batches - continuing\")\n\n    logger.info(f\"Database coordination completed for attempt {index_attempt_id}\")\n\n\ndef active_indexing_attempt(\n    cc_pair_id: int,\n    search_settings_id: int,\n    db_session: Session,\n) -> bool:\n    \"\"\"\n    Check if there's already an active indexing attempt for this CC pair + search settings.\n    This prevents race conditions where multiple indexing attempts could be created.\n    We check for any non-terminal status (NOT_STARTED, IN_PROGRESS).\n\n    Returns True if there's an active indexing attempt, False otherwise.\n    \"\"\"\n    active_indexing_attempt = db_session.execute(\n        select(\n            exists().where(\n                IndexAttempt.connector_credential_pair_id == cc_pair_id,\n                IndexAttempt.search_settings_id == search_settings_id,\n                IndexAttempt.status.in_(\n                    [\n                        IndexingStatus.NOT_STARTED,\n                        IndexingStatus.IN_PROGRESS,\n                    ]\n                ),\n            )\n        )\n    ).scalar()\n\n    if active_indexing_attempt:\n        task_logger.debug(\n            f\"active_indexing_attempt - Skipping due to active indexing attempt: \"\n            f\"cc_pair={cc_pair_id} search_settings={search_settings_id}\"\n        )\n\n    return bool(active_indexing_attempt)\n\n\ndef _kickoff_indexing_tasks(\n    celery_app: Celery,\n    db_session: Session,\n    search_settings: SearchSettings,\n    cc_pair_ids: list[int],\n    secondary_index_building: bool,\n    redis_client: Redis,\n    lock_beat: RedisLock,\n    tenant_id: str,\n) -> int:\n    \"\"\"Kick off indexing tasks for the given cc_pair_ids and search_settings.\n\n    Returns the number of tasks successfully created.\n    \"\"\"\n    tasks_created = 0\n\n    for cc_pair_id in cc_pair_ids:\n        lock_beat.reacquire()\n\n        # Lightweight check prior to fetching cc pair\n        if active_indexing_attempt(\n            cc_pair_id=cc_pair_id,\n            search_settings_id=search_settings.id,\n            db_session=db_session,\n        ):\n            continue\n\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=cc_pair_id,\n        )\n        if not cc_pair:\n            task_logger.warning(\n                f\"_kickoff_indexing_tasks - CC pair not found: cc_pair={cc_pair_id}\"\n            )\n            continue\n\n        # Heavyweight check after fetching cc pair\n        if not should_index(\n            cc_pair=cc_pair,\n            search_settings_instance=search_settings,\n            secondary_index_building=secondary_index_building,\n            db_session=db_session,\n        ):\n            task_logger.debug(\n                f\"_kickoff_indexing_tasks - Not indexing cc_pair_id: {cc_pair_id} \"\n                f\"search_settings={search_settings.id}, \"\n                f\"secondary_index_building={secondary_index_building}\"\n            )\n            continue\n\n        task_logger.debug(\n            f\"_kickoff_indexing_tasks - Will index cc_pair_id: {cc_pair_id} \"\n            f\"search_settings={search_settings.id}, \"\n            f\"secondary_index_building={secondary_index_building}\"\n        )\n\n        reindex = False\n        # the indexing trigger is only checked and cleared with the current search settings\n        if search_settings.status.is_current() and cc_pair.indexing_trigger is not None:\n            if cc_pair.indexing_trigger == IndexingMode.REINDEX:\n                reindex = True\n\n            task_logger.info(\n                f\"_kickoff_indexing_tasks - Connector indexing manual trigger detected: \"\n                f\"cc_pair={cc_pair.id} \"\n                f\"search_settings={search_settings.id} \"\n                f\"indexing_mode={cc_pair.indexing_trigger}\"\n            )\n\n            mark_ccpair_with_indexing_trigger(cc_pair.id, None, db_session)\n\n        # using a task queue and only allowing one task per cc_pair/search_setting\n        # prevents us from starving out certain attempts\n        attempt_id = try_creating_docfetching_task(\n            celery_app,\n            cc_pair,\n            search_settings,\n            reindex,\n            db_session,\n            redis_client,\n            tenant_id,\n        )\n\n        if attempt_id is not None:\n            task_logger.info(\n                f\"Connector indexing queued: index_attempt={attempt_id} cc_pair={cc_pair.id} search_settings={search_settings.id}\"\n            )\n            tasks_created += 1\n        else:\n            task_logger.error(\n                f\"Failed to create indexing task: cc_pair={cc_pair.id} search_settings={search_settings.id}\"\n            )\n\n    return tasks_created\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_INDEXING,\n    soft_time_limit=300,\n    bind=True,\n)\ndef check_for_indexing(self: Task, *, tenant_id: str) -> int | None:\n    \"\"\"a lightweight task used to kick off the pipeline of indexing tasks.\n    Occcasionally does some validation of existing state to clear up error conditions.\n\n    This task is the entrypoint for the full \"indexing pipeline\", which is composed\n    of two tasks: \"docfetching\" and \"docprocessing\". More details in\n    the docfetching task (OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK).\n\n    For cc pairs that should be indexed (see should_index()), this task\n    calls try_creating_docfetching_task, which creates a docfetching task.\n    All the logic for determining what state the indexing pipeline is in\n    w.r.t previous failed attempt, checkpointing, etc is handled in the docfetching task.\n    \"\"\"\n\n    time_start = time.monotonic()\n    task_logger.warning(\"check_for_indexing - Starting\")\n\n    tasks_created = 0\n    locked = False\n    redis_client = get_redis_client()\n    redis_client_replica = get_redis_replica_client()\n\n    # we need to use celery's redis client to access its redis data\n    # (which lives on a different db number)\n    # redis_client_celery: Redis = self.app.broker_connection().channel().client  # type: ignore\n\n    lock_beat: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CHECK_INDEXING_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        locked = True\n\n        # SPECIAL 0/3: sync lookup table for active fences\n        # we want to run this less frequently than the overall task\n        if not redis_client.exists(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE):\n            # build a lookup table of existing fences\n            # this is just a migration concern and should be unnecessary once\n            # lookup tables are rolled out\n            for key_bytes in redis_client_replica.scan_iter(\n                count=SCAN_ITER_COUNT_DEFAULT\n            ):\n                if is_fence(key_bytes) and not redis_client.sismember(\n                    OnyxRedisConstants.ACTIVE_FENCES, key_bytes\n                ):\n                    logger.warning(f\"Adding {key_bytes} to the lookup table.\")\n                    redis_client.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)\n\n            redis_client.set(\n                OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE,\n                1,\n                ex=OnyxRuntime.get_build_fence_lookup_table_interval(),\n            )\n\n        # 1/3: KICKOFF\n\n        # check for search settings swap\n        with get_session_with_current_tenant() as db_session:\n            old_search_settings = check_and_perform_index_swap(db_session=db_session)\n            current_search_settings = get_current_search_settings(db_session)\n            # So that the first time users aren't surprised by really slow speed of first\n            # batch of documents indexed\n            if current_search_settings.provider_type is None and not MULTI_TENANT:\n                if old_search_settings:\n                    embedding_model = EmbeddingModel.from_db_model(\n                        search_settings=current_search_settings,\n                        server_host=INDEXING_MODEL_SERVER_HOST,\n                        server_port=INDEXING_MODEL_SERVER_PORT,\n                    )\n\n                    # only warm up if search settings were changed\n                    warm_up_bi_encoder(\n                        embedding_model=embedding_model,\n                    )\n\n        # gather search settings and indexable cc_pair_ids\n        # indexable CC pairs include everything for future model and only active cc pairs for current model\n        lock_beat.reacquire()\n        with get_session_with_current_tenant() as db_session:\n            # Get CC pairs for primary search settings\n            standard_cc_pair_ids = (\n                fetch_indexable_standard_connector_credential_pair_ids(\n                    db_session, active_cc_pairs_only=True\n                )\n            )\n\n            primary_cc_pair_ids = standard_cc_pair_ids\n\n            # Get CC pairs for secondary search settings\n            secondary_cc_pair_ids: list[int] = []\n            secondary_search_settings = get_secondary_search_settings(db_session)\n            if secondary_search_settings:\n                # For ACTIVE_ONLY, we skip paused connectors\n                include_paused = (\n                    secondary_search_settings.switchover_type\n                    != SwitchoverType.ACTIVE_ONLY\n                )\n                standard_cc_pair_ids = (\n                    fetch_indexable_standard_connector_credential_pair_ids(\n                        db_session, active_cc_pairs_only=not include_paused\n                    )\n                )\n\n                secondary_cc_pair_ids = standard_cc_pair_ids\n\n        # Flag CC pairs in repeated error state for primary/current search settings\n        with get_session_with_current_tenant() as db_session:\n            for cc_pair_id in primary_cc_pair_ids:\n                lock_beat.reacquire()\n\n                cc_pair = get_connector_credential_pair_from_id(\n                    db_session=db_session,\n                    cc_pair_id=cc_pair_id,\n                )\n\n                # if already in repeated error state, don't do anything\n                # this is important so that we don't keep pausing the connector\n                # immediately upon a user un-pausing it to manually re-trigger and\n                # recover.\n                if (\n                    cc_pair\n                    and not cc_pair.in_repeated_error_state\n                    and is_in_repeated_error_state(\n                        cc_pair=cc_pair,\n                        search_settings_id=current_search_settings.id,\n                        db_session=db_session,\n                    )\n                ):\n                    set_cc_pair_repeated_error_state(\n                        db_session=db_session,\n                        cc_pair_id=cc_pair_id,\n                        in_repeated_error_state=True,\n                    )\n                    # When entering repeated error state, also pause the connector\n                    # to prevent continued indexing retry attempts burning through embedding credits.\n                    # NOTE: only for Cloud, since most self-hosted users use self-hosted embedding\n                    # models. Also, they are more prone to repeated failures -> eventual success.\n                    if AUTH_TYPE == AuthType.CLOUD:\n                        update_connector_credential_pair_from_id(\n                            db_session=db_session,\n                            cc_pair_id=cc_pair.id,\n                            status=ConnectorCredentialPairStatus.PAUSED,\n                        )\n\n        # NOTE: At this point, we haven't done heavy checks on whether or not the CC pairs should actually be indexed\n        # Heavy check, should_index(), is called in _kickoff_indexing_tasks\n        with get_session_with_current_tenant() as db_session:\n            # Primary first\n            tasks_created += _kickoff_indexing_tasks(\n                celery_app=self.app,\n                db_session=db_session,\n                search_settings=current_search_settings,\n                cc_pair_ids=primary_cc_pair_ids,\n                secondary_index_building=secondary_search_settings is not None,\n                redis_client=redis_client,\n                lock_beat=lock_beat,\n                tenant_id=tenant_id,\n            )\n\n            # Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)\n            if (\n                secondary_search_settings\n                and secondary_search_settings.switchover_type != SwitchoverType.INSTANT\n                and secondary_cc_pair_ids\n            ):\n                tasks_created += _kickoff_indexing_tasks(\n                    celery_app=self.app,\n                    db_session=db_session,\n                    search_settings=secondary_search_settings,\n                    cc_pair_ids=secondary_cc_pair_ids,\n                    secondary_index_building=True,\n                    redis_client=redis_client,\n                    lock_beat=lock_beat,\n                    tenant_id=tenant_id,\n                )\n            elif (\n                secondary_search_settings\n                and secondary_search_settings.switchover_type == SwitchoverType.INSTANT\n            ):\n                task_logger.info(\n                    f\"Skipping secondary indexing: switchover_type=INSTANT for search_settings={secondary_search_settings.id}\"\n                )\n\n        # 2/3: VALIDATE\n        # Check for inconsistent index attempts - active attempts without task IDs\n        # This can happen if attempt creation fails partway through\n        lock_beat.reacquire()\n        with get_session_with_current_tenant() as db_session:\n            inconsistent_attempts = (\n                db_session.execute(\n                    select(IndexAttempt).where(\n                        IndexAttempt.status.in_(\n                            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n                        ),\n                        IndexAttempt.celery_task_id.is_(None),\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n            for attempt in inconsistent_attempts:\n                lock_beat.reacquire()\n\n                # Double-check the attempt still has the inconsistent state\n                fresh_attempt = get_index_attempt(db_session, attempt.id)\n                if (\n                    not fresh_attempt\n                    or fresh_attempt.celery_task_id\n                    or fresh_attempt.status.is_terminal()\n                ):\n                    continue\n\n                failure_reason = (\n                    f\"Inconsistent index attempt found - active status without Celery task: \"\n                    f\"index_attempt={attempt.id} \"\n                    f\"cc_pair={attempt.connector_credential_pair_id} \"\n                    f\"search_settings={attempt.search_settings_id}\"\n                )\n                task_logger.error(failure_reason)\n                mark_attempt_failed(\n                    attempt.id, db_session, failure_reason=failure_reason\n                )\n\n        lock_beat.reacquire()\n        # we want to run this less frequently than the overall task\n        if not redis_client.exists(OnyxRedisSignals.BLOCK_VALIDATE_INDEXING_FENCES):\n            # Check for orphaned index attempts that have Celery task IDs but no actual running tasks\n            # This can happen if workers crash or tasks are terminated unexpectedly\n            # We reuse the same Redis signal name for backwards compatibility\n            try:\n                validate_active_indexing_attempts(lock_beat)\n            except Exception:\n                task_logger.exception(\n                    \"Exception while validating active indexing attempts\"\n                )\n\n            redis_client.set(\n                OnyxRedisSignals.BLOCK_VALIDATE_INDEXING_FENCES,\n                1,\n                ex=_get_fence_validation_block_expiration(),\n            )\n\n        # 3/3: FINALIZE - Monitor active indexing attempts using database\n        lock_beat.reacquire()\n        with get_session_with_current_tenant() as db_session:\n            # Monitor all active indexing attempts directly from the database\n            # This replaces the Redis fence-based monitoring\n            active_attempts = (\n                db_session.execute(\n                    select(IndexAttempt).where(\n                        IndexAttempt.status.in_(\n                            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n                        )\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n            for attempt in active_attempts:\n                try:\n                    monitor_indexing_attempt_progress(\n                        attempt, tenant_id, db_session, self\n                    )\n                except Exception:\n                    task_logger.exception(f\"Error monitoring attempt {attempt.id}\")\n\n                lock_beat.reacquire()\n\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during indexing check\")\n    finally:\n        if locked:\n            if lock_beat.owned():\n                lock_beat.release()\n            else:\n                task_logger.error(\n                    f\"check_for_indexing - Lock not owned on completion: tenant={tenant_id}\"\n                )\n                redis_lock_dump(lock_beat, redis_client)\n\n    time_elapsed = time.monotonic() - time_start\n    task_logger.info(f\"check_for_indexing finished: elapsed={time_elapsed:.2f}\")\n    return tasks_created\n\n\n# primary\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,\n    soft_time_limit=300,\n    bind=True,\n)\ndef check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Clean up old checkpoints that are older than 7 days.\"\"\"\n    locked = False\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock.acquire(blocking=False):\n        return None\n\n    try:\n        locked = True\n        with get_session_with_current_tenant() as db_session:\n            old_attempts = get_index_attempts_with_old_checkpoints(db_session)\n            for attempt in old_attempts:\n                task_logger.info(\n                    f\"Cleaning up checkpoint for index attempt {attempt.id}\"\n                )\n                self.app.send_task(\n                    OnyxCeleryTask.CLEANUP_CHECKPOINT,\n                    kwargs={\n                        \"index_attempt_id\": attempt.id,\n                        \"tenant_id\": tenant_id,\n                    },\n                    queue=OnyxCeleryQueues.CHECKPOINT_CLEANUP,\n                    priority=OnyxCeleryPriority.MEDIUM,\n                )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during checkpoint cleanup\")\n        return None\n    finally:\n        if locked:\n            if lock.owned():\n                lock.release()\n            else:\n                task_logger.error(\n                    f\"check_for_checkpoint_cleanup - Lock not owned on completion: tenant={tenant_id}\"\n                )\n\n\n# light worker\n@shared_task(\n    name=OnyxCeleryTask.CLEANUP_CHECKPOINT,\n    bind=True,\n)\ndef cleanup_checkpoint_task(\n    self: Task,  # noqa: ARG001\n    *,\n    index_attempt_id: int,\n    tenant_id: str | None,\n) -> None:\n    \"\"\"Clean up a checkpoint for a given index attempt\"\"\"\n\n    start = time.monotonic()\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            cleanup_checkpoint(db_session, index_attempt_id)\n    finally:\n        elapsed = time.monotonic() - start\n\n        task_logger.info(\n            f\"cleanup_checkpoint_task completed: tenant_id={tenant_id} index_attempt_id={index_attempt_id} elapsed={elapsed:.2f}\"\n        )\n\n\n# primary\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,\n    soft_time_limit=300,\n    bind=True,\n)\ndef check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Clean up old index attempts that are older than 7 days.\"\"\"\n    locked = False\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock.acquire(blocking=False):\n        task_logger.info(\n            f\"check_for_index_attempt_cleanup - Lock not acquired: tenant={tenant_id}\"\n        )\n        return None\n\n    try:\n        locked = True\n        batch_size = INDEX_ATTEMPT_BATCH_SIZE\n        with get_session_with_current_tenant() as db_session:\n            old_attempts = get_old_index_attempts(db_session)\n            # We need to batch this because during the initial run, the system might have a large number\n            # of index attempts since they were never deleted. After that, the number will be\n            # significantly lower.\n            if len(old_attempts) == 0:\n                task_logger.info(\n                    \"check_for_index_attempt_cleanup - No index attempts to cleanup\"\n                )\n                return\n\n            for i in range(0, len(old_attempts), batch_size):\n                batch = old_attempts[i : i + batch_size]\n                task_logger.info(\n                    f\"check_for_index_attempt_cleanup - Cleaning up index attempts {len(batch)}\"\n                )\n                self.app.send_task(\n                    OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,\n                    kwargs={\n                        \"index_attempt_ids\": [attempt.id for attempt in batch],\n                        \"tenant_id\": tenant_id,\n                    },\n                    queue=OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP,\n                    priority=OnyxCeleryPriority.MEDIUM,\n                )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during index attempt cleanup check\")\n        return None\n    finally:\n        if locked:\n            if lock.owned():\n                lock.release()\n            else:\n                task_logger.error(\n                    f\"check_for_index_attempt_cleanup - Lock not owned on completion: tenant={tenant_id}\"\n                )\n\n\n# light worker\n@shared_task(\n    name=OnyxCeleryTask.CLEANUP_INDEX_ATTEMPT,\n    bind=True,\n)\ndef cleanup_index_attempt_task(\n    self: Task,  # noqa: ARG001\n    *,\n    index_attempt_ids: list[int],\n    tenant_id: str,\n) -> None:\n    \"\"\"Clean up an index attempt\"\"\"\n    start = time.monotonic()\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            cleanup_index_attempts(db_session, index_attempt_ids)\n\n    finally:\n        elapsed = time.monotonic() - start\n\n        task_logger.info(\n            f\"cleanup_index_attempt_task completed: tenant_id={tenant_id} \"\n            f\"index_attempt_ids={index_attempt_ids} \"\n            f\"elapsed={elapsed:.2f}\"\n        )\n\n\nclass DocumentProcessingBatch(BaseModel):\n    \"\"\"Data structure for a document processing batch.\"\"\"\n\n    batch_id: str\n    index_attempt_id: int\n    cc_pair_id: int\n    tenant_id: str\n    batch_num: int\n\n\ndef _check_failure_threshold(\n    total_failures: int,\n    document_count: int,\n    batch_num: int,\n    last_failure: ConnectorFailure | None,\n) -> None:\n    \"\"\"Check if we've hit the failure threshold and raise an appropriate exception if so.\n\n    We consider the threshold hit if:\n    1. We have more than 3 failures AND\n    2. Failures account for more than 10% of processed documents\n    \"\"\"\n    failure_ratio = total_failures / (document_count or 1)\n\n    FAILURE_THRESHOLD = 3\n    FAILURE_RATIO_THRESHOLD = 0.1\n    if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:\n        logger.error(\n            f\"Connector run failed with '{total_failures}' errors after '{batch_num}' batches.\"\n        )\n        if last_failure and last_failure.exception:\n            raise last_failure.exception from last_failure.exception\n\n        raise RuntimeError(\n            f\"Connector run encountered too many errors, aborting. Last error: {last_failure}\"\n        )\n\n\ndef _resolve_indexing_document_errors(\n    cc_pair_id: int,\n    failures: list[ConnectorFailure],\n    document_batch: list[Document],\n) -> None:\n    with get_session_with_current_tenant() as db_session_temp:\n        # get previously unresolved errors\n        unresolved_errors = get_index_attempt_errors_for_cc_pair(\n            cc_pair_id=cc_pair_id,\n            unresolved_only=True,\n            db_session=db_session_temp,\n        )\n        doc_id_to_unresolved_errors: dict[str, list[IndexAttemptError]] = defaultdict(\n            list\n        )\n        for error in unresolved_errors:\n            if error.document_id:\n                doc_id_to_unresolved_errors[error.document_id].append(error)\n\n        # resolve errors for documents that were successfully indexed\n        failed_document_ids = [\n            failure.failed_document.document_id\n            for failure in failures\n            if failure.failed_document\n        ]\n        successful_document_ids = [\n            document.id\n            for document in document_batch\n            if document.id not in failed_document_ids\n        ]\n        for document_id in successful_document_ids:\n            if document_id not in doc_id_to_unresolved_errors:\n                continue\n\n            logger.info(f\"Resolving IndexAttemptError for document '{document_id}'\")\n            for error in doc_id_to_unresolved_errors[document_id]:\n                error.is_resolved = True\n                db_session_temp.add(error)\n\n        db_session_temp.commit()\n\n\n@shared_task(\n    name=OnyxCeleryTask.DOCPROCESSING_TASK,\n    bind=True,\n)\ndef docprocessing_task(\n    self: Task,  # noqa: ARG001\n    index_attempt_id: int,\n    cc_pair_id: int,\n    tenant_id: str,\n    batch_num: int,\n) -> None:\n    \"\"\"Process a batch of documents through the indexing pipeline.\n\n    This task retrieves documents from storage and processes them through\n    the indexing pipeline (embedding + vector store indexing).\n    \"\"\"\n    # Start heartbeat for this indexing attempt\n    heartbeat_thread, stop_event = start_heartbeat(index_attempt_id)\n    try:\n        # Cannot use the TaskSingleton approach here because the worker is multithreaded\n        token = INDEX_ATTEMPT_INFO_CONTEXTVAR.set((cc_pair_id, index_attempt_id))\n        _docprocessing_task(index_attempt_id, cc_pair_id, tenant_id, batch_num)\n    finally:\n        stop_heartbeat(heartbeat_thread, stop_event)  # Stop heartbeat before exiting\n        INDEX_ATTEMPT_INFO_CONTEXTVAR.reset(token)\n\n\ndef _check_chunk_usage_limit(tenant_id: str) -> None:\n    \"\"\"Check if chunk indexing usage limit has been exceeded.\n\n    Raises UsageLimitExceededError if the limit is exceeded.\n    \"\"\"\n    if not USAGE_LIMITS_ENABLED:\n        return\n\n    from onyx.db.usage import UsageType\n    from onyx.server.usage_limits import check_usage_and_raise\n\n    with get_session_with_current_tenant() as db_session:\n        check_usage_and_raise(\n            db_session=db_session,\n            usage_type=UsageType.CHUNKS_INDEXED,\n            tenant_id=tenant_id,\n            pending_amount=0,  # Just check current usage\n        )\n\n\ndef _docprocessing_task(\n    index_attempt_id: int,\n    cc_pair_id: int,\n    tenant_id: str,\n    batch_num: int,\n) -> None:\n    start_time = time.monotonic()\n\n    if tenant_id:\n        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n    # Check if chunk indexing usage limit has been exceeded before processing\n    if USAGE_LIMITS_ENABLED:\n        try:\n            _check_chunk_usage_limit(tenant_id)\n        except HTTPException as e:\n            # Log the error and fail the indexing attempt\n            task_logger.error(\n                f\"Chunk indexing usage limit exceeded for tenant {tenant_id}: {e}\"\n            )\n            with get_session_with_current_tenant() as db_session:\n                from onyx.db.index_attempt import mark_attempt_failed\n\n                mark_attempt_failed(\n                    index_attempt_id=index_attempt_id,\n                    db_session=db_session,\n                    failure_reason=str(e),\n                )\n            raise\n\n    task_logger.info(\n        f\"Processing document batch: attempt={index_attempt_id} batch_num={batch_num} \"\n    )\n\n    # Get the document batch storage\n    storage = get_document_batch_storage(cc_pair_id, index_attempt_id)\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    r = get_redis_client(tenant_id=tenant_id)\n\n    # 20 is the documented default for httpx max_keepalive_connections\n    if MANAGED_VESPA:\n        httpx_init_vespa_pool(\n            20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH\n        )\n    else:\n        httpx_init_vespa_pool(20)\n\n    # dummy lock to satisfy linter\n    per_batch_lock: RedisLock | None = None\n    try:\n        # FIX: Monitor memory before loading documents to track problematic batches\n        emit_process_memory(\n            os.getpid(),\n            \"docprocessing\",\n            {\n                \"phase\": \"before_load\",\n                \"tenant_id\": tenant_id,\n                \"cc_pair_id\": cc_pair_id,\n                \"index_attempt_id\": index_attempt_id,\n                \"batch_num\": batch_num,\n            },\n        )\n\n        # Retrieve documents from storage\n        documents = storage.get_batch(batch_num)\n        if not documents:\n            task_logger.error(f\"No documents found for batch {batch_num}\")\n            return\n\n        # FIX: Monitor memory after loading documents\n        emit_process_memory(\n            os.getpid(),\n            \"docprocessing\",\n            {\n                \"phase\": \"after_load\",\n                \"tenant_id\": tenant_id,\n                \"cc_pair_id\": cc_pair_id,\n                \"index_attempt_id\": index_attempt_id,\n                \"batch_num\": batch_num,\n                \"doc_count\": len(documents),\n            },\n        )\n\n        with get_session_with_current_tenant() as db_session:\n            # matches parts of _run_indexing\n            index_attempt = get_index_attempt(\n                db_session,\n                index_attempt_id,\n                eager_load_cc_pair=True,\n                eager_load_search_settings=True,\n            )\n            if not index_attempt:\n                raise RuntimeError(f\"Index attempt {index_attempt_id} not found\")\n\n            if index_attempt.search_settings is None:\n                raise ValueError(\"Search settings must be set for indexing\")\n\n            if (\n                index_attempt.celery_task_id is None\n                or index_attempt.status.is_terminal()\n            ):\n                raise RuntimeError(\n                    f\"Index attempt {index_attempt_id} is not running, status {index_attempt.status}\"\n                )\n\n            cross_batch_db_lock: RedisLock = r.lock(\n                redis_connector.db_lock_key(index_attempt.search_settings.id),\n                timeout=CELERY_INDEXING_LOCK_TIMEOUT,\n                thread_local=False,\n            )\n\n            callback = IndexingCallback(\n                redis_connector,\n            )\n            # TODO: right now this is the only thing the callback is used for,\n            # probably there is a simpler way to handle pausing\n            if callback.should_stop():\n                raise RuntimeError(\"Docprocessing cancelled by connector pausing\")\n\n            # Set up indexing pipeline components\n            embedding_model = DefaultIndexingEmbedder.from_db_search_settings(\n                search_settings=index_attempt.search_settings,\n                callback=callback,\n            )\n\n            document_indices = get_all_document_indices(\n                index_attempt.search_settings,\n                None,\n                httpx_client=HttpxPool.get(\"vespa\"),\n            )\n\n            # Set up metadata for this batch\n            index_attempt_metadata = IndexAttemptMetadata(\n                attempt_id=index_attempt_id,\n                connector_id=index_attempt.connector_credential_pair.connector.id,\n                credential_id=index_attempt.connector_credential_pair.credential.id,\n                request_id=make_randomized_onyx_request_id(\"DIP\"),\n                structured_id=f\"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}\",\n                batch_num=batch_num,\n            )\n\n            # Process documents through indexing pipeline\n            connector_source = (\n                index_attempt.connector_credential_pair.connector.source.value\n            )\n            task_logger.info(\n                f\"Processing {len(documents)} documents through indexing pipeline: \"\n                f\"cc_pair_id={cc_pair_id}, source={connector_source}, \"\n                f\"batch_num={batch_num}\"\n            )\n\n            adapter = DocumentIndexingBatchAdapter(\n                db_session=db_session,\n                connector_id=index_attempt.connector_credential_pair.connector.id,\n                credential_id=index_attempt.connector_credential_pair.credential.id,\n                tenant_id=tenant_id,\n                index_attempt_metadata=index_attempt_metadata,\n            )\n\n            # real work happens here!\n            index_pipeline_result = run_indexing_pipeline(\n                embedder=embedding_model,\n                document_indices=document_indices,\n                ignore_time_skip=True,  # Documents are already filtered during extraction\n                db_session=db_session,\n                tenant_id=tenant_id,\n                document_batch=documents,\n                request_id=index_attempt_metadata.request_id,\n                adapter=adapter,\n            )\n\n        # Track chunk indexing usage for cloud usage limits\n        if USAGE_LIMITS_ENABLED and index_pipeline_result.total_chunks > 0:\n            try:\n                from onyx.db.usage import increment_usage\n                from onyx.db.usage import UsageType\n\n                with get_session_with_current_tenant() as usage_db_session:\n                    increment_usage(\n                        db_session=usage_db_session,\n                        usage_type=UsageType.CHUNKS_INDEXED,\n                        amount=index_pipeline_result.total_chunks,\n                    )\n                    usage_db_session.commit()\n            except Exception as e:\n                # Log but don't fail indexing if usage tracking fails\n                task_logger.warning(f\"Failed to track chunk indexing usage: {e}\")\n\n        # Update batch completion and document counts atomically using database coordination\n\n        with get_session_with_current_tenant() as db_session, cross_batch_db_lock:\n            IndexingCoordination.update_batch_completion_and_docs(\n                db_session=db_session,\n                index_attempt_id=index_attempt_id,\n                total_docs_indexed=index_pipeline_result.total_docs,\n                new_docs_indexed=index_pipeline_result.new_docs,\n                total_chunks=index_pipeline_result.total_chunks,\n            )\n\n            _resolve_indexing_document_errors(\n                cc_pair_id,\n                index_pipeline_result.failures,\n                documents,\n            )\n\n        coordination_status = None\n        # Record failures in the database\n        if index_pipeline_result.failures:\n            with get_session_with_current_tenant() as db_session:\n                for failure in index_pipeline_result.failures:\n                    create_index_attempt_error(\n                        index_attempt_id,\n                        cc_pair_id,\n                        failure,\n                        db_session,\n                    )\n            # Use database state instead of FileStore for failure checking\n            with get_session_with_current_tenant() as db_session:\n                coordination_status = IndexingCoordination.get_coordination_status(\n                    db_session, index_attempt_id\n                )\n                _check_failure_threshold(\n                    coordination_status.total_failures,\n                    coordination_status.total_docs,\n                    batch_num,\n                    index_pipeline_result.failures[-1],\n                )\n\n        # Add telemetry for indexing progress using database coordination status\n        # only re-fetch coordination status if necessary\n        if coordination_status is None:\n            with get_session_with_current_tenant() as db_session:\n                coordination_status = IndexingCoordination.get_coordination_status(\n                    db_session, index_attempt_id\n                )\n\n        optional_telemetry(\n            record_type=RecordType.INDEXING_PROGRESS,\n            data={\n                \"index_attempt_id\": index_attempt_id,\n                \"cc_pair_id\": cc_pair_id,\n                \"current_docs_indexed\": coordination_status.total_docs,\n                \"current_chunks_indexed\": coordination_status.total_chunks,\n                \"source\": index_attempt.connector_credential_pair.connector.source.value,\n                \"completed_batches\": coordination_status.completed_batches,\n                \"total_batches\": coordination_status.total_batches,\n            },\n            tenant_id=tenant_id,\n        )\n        # Clean up this batch after successful processing\n        storage.delete_batch_by_num(batch_num)\n\n        # FIX: Explicitly clear document batch from memory and force garbage collection\n        # This helps prevent memory accumulation across multiple batches\n        # NOTE: Thread-local event loops in embedding threads are cleaned up automatically\n        # via the _cleanup_thread_local decorator in search_nlp_models.py\n        del documents\n        gc.collect()\n\n        # FIX: Log final memory usage to track problematic tenants/CC pairs\n        emit_process_memory(\n            os.getpid(),\n            \"docprocessing\",\n            {\n                \"phase\": \"after_processing\",\n                \"tenant_id\": tenant_id,\n                \"cc_pair_id\": cc_pair_id,\n                \"index_attempt_id\": index_attempt_id,\n                \"batch_num\": batch_num,\n                \"chunks_processed\": index_pipeline_result.total_chunks,\n            },\n        )\n\n        elapsed_time = time.monotonic() - start_time\n        task_logger.info(\n            f\"Completed document batch processing: \"\n            f\"index_attempt={index_attempt_id} \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"search_settings={index_attempt.search_settings.id} \"\n            f\"batch_num={batch_num} \"\n            f\"docs={len(index_pipeline_result.failures) + index_pipeline_result.total_docs} \"\n            f\"chunks={index_pipeline_result.total_chunks} \"\n            f\"failures={len(index_pipeline_result.failures)} \"\n            f\"elapsed={elapsed_time:.2f}s\"\n        )\n\n    except Exception:\n        task_logger.exception(\n            f\"Document batch processing failed: batch_num={batch_num} attempt={index_attempt_id} \"\n        )\n\n        raise\n    finally:\n        if per_batch_lock and per_batch_lock.owned():\n            per_batch_lock.release()\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/docprocessing/utils.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom redis import Redis\nfrom redis.exceptions import LockError\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.index_attempt import get_last_attempt_for_cc_pair\nfrom onyx.db.index_attempt import get_recent_attempts_for_cc_pair\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import SearchSettings\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nNUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE = 5\n\n\nclass IndexingCallbackBase(IndexingHeartbeatInterface):\n    PARENT_CHECK_INTERVAL = 60\n\n    def __init__(\n        self,\n        parent_pid: int,\n        redis_connector: RedisConnector,\n        redis_lock: RedisLock,\n        redis_client: Redis,\n        timeout_seconds: int | None = None,\n    ):\n        super().__init__()\n        self.parent_pid = parent_pid\n        self.redis_connector: RedisConnector = redis_connector\n        self.redis_lock: RedisLock = redis_lock\n        self.redis_client = redis_client\n        self.started: datetime = datetime.now(timezone.utc)\n        self.redis_lock.reacquire()\n\n        self.last_tag: str = f\"{self.__class__.__name__}.__init__\"\n        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)\n        self.last_lock_monotonic = time.monotonic()\n\n        self.last_parent_check = time.monotonic()\n        self.start_monotonic = time.monotonic()\n        self.timeout_seconds = timeout_seconds\n\n    def should_stop(self) -> bool:\n        # Check if the associated indexing attempt has been cancelled\n        # TODO: Pass index_attempt_id to the callback and check cancellation using the db\n        if bool(self.redis_connector.stop.fenced):\n            return True\n\n        # Check if the task has exceeded its timeout\n        # NOTE: Celery's soft_time_limit does not work with thread pools,\n        # so we must enforce timeouts internally.\n        if self.timeout_seconds is not None:\n            elapsed = time.monotonic() - self.start_monotonic\n            if elapsed > self.timeout_seconds:\n                logger.warning(\n                    f\"IndexingCallback Docprocessing - task timeout exceeded: \"\n                    f\"elapsed={elapsed:.0f}s timeout={self.timeout_seconds}s \"\n                    f\"cc_pair={self.redis_connector.cc_pair_id}\"\n                )\n                return True\n\n        return False\n\n    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002\n        \"\"\"Amount isn't used yet.\"\"\"\n\n        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside\n        # with daemon=True. It seems likely some indexing tasks will need to spawn other processes\n        # eventually, which daemon=True prevents, so leave this code in until we're ready to test it.\n\n        # if self.parent_pid:\n        #     # check if the parent pid is alive so we aren't running as a zombie\n        #     now = time.monotonic()\n        #     if now - self.last_parent_check > IndexingCallback.PARENT_CHECK_INTERVAL:\n        #         try:\n        #             # this is unintuitive, but it checks if the parent pid is still running\n        #             os.kill(self.parent_pid, 0)\n        #         except Exception:\n        #             logger.exception(\"IndexingCallback - parent pid check exceptioned\")\n        #             raise\n        #         self.last_parent_check = now\n\n        try:\n            current_time = time.monotonic()\n            if current_time - self.last_lock_monotonic >= (\n                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                self.redis_lock.reacquire()\n                self.last_lock_reacquire = datetime.now(timezone.utc)\n                self.last_lock_monotonic = time.monotonic()\n\n            self.last_tag = tag\n        except LockError:\n            logger.exception(\n                f\"{self.__class__.__name__} - lock.reacquire exceptioned: \"\n                f\"lock_timeout={self.redis_lock.timeout} \"\n                f\"start={self.started} \"\n                f\"last_tag={self.last_tag} \"\n                f\"last_reacquired={self.last_lock_reacquire} \"\n                f\"now={datetime.now(timezone.utc)}\"\n            )\n\n            redis_lock_dump(self.redis_lock, self.redis_client)\n            raise\n\n\n# NOTE: we're in the process of removing all fences from indexing; this will\n# eventually no longer be used. For now, it is used only for connector pausing.\nclass IndexingCallback(IndexingHeartbeatInterface):\n    def __init__(\n        self,\n        redis_connector: RedisConnector,\n    ):\n        self.redis_connector = redis_connector\n\n    def should_stop(self) -> bool:\n        # Check if the associated indexing attempt has been cancelled\n        # TODO: Pass index_attempt_id to the callback and check cancellation using the db\n        return bool(self.redis_connector.stop.fenced)\n\n    # included to satisfy old interface\n    def progress(self, tag: str, amount: int) -> None:\n        pass\n\n\n# NOTE: The validate_indexing_fence and validate_indexing_fences functions have been removed\n# as they are no longer needed with database-based coordination. The new validation is\n# handled by validate_active_indexing_attempts in the main indexing tasks module.\n\n\ndef is_in_repeated_error_state(\n    cc_pair: ConnectorCredentialPair, search_settings_id: int, db_session: Session\n) -> bool:\n    \"\"\"Checks if the cc pair / search setting combination is in a repeated error state.\"\"\"\n    # if the connector doesn't have a refresh_freq, a single failed attempt is enough\n    number_of_failed_attempts_in_a_row_needed = (\n        NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE\n        if cc_pair.connector.refresh_freq is not None\n        else 1\n    )\n\n    most_recent_index_attempts = get_recent_attempts_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        search_settings_id=search_settings_id,\n        limit=number_of_failed_attempts_in_a_row_needed,\n        db_session=db_session,\n    )\n    return len(\n        most_recent_index_attempts\n    ) >= number_of_failed_attempts_in_a_row_needed and all(\n        attempt.status == IndexingStatus.FAILED\n        for attempt in most_recent_index_attempts\n    )\n\n\ndef should_index(\n    cc_pair: ConnectorCredentialPair,\n    search_settings_instance: SearchSettings,\n    secondary_index_building: bool,\n    db_session: Session,\n) -> bool:\n    \"\"\"Checks various global settings and past indexing attempts to determine if\n    we should try to start indexing the cc pair / search setting combination.\n\n    Note that tactical checks such as preventing overlap with a currently running task\n    are not handled here.\n\n    Return True if we should try to index, False if not.\n    \"\"\"\n    connector = cc_pair.connector\n    last_index_attempt = get_last_attempt_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        search_settings_id=search_settings_instance.id,\n        db_session=db_session,\n    )\n    all_recent_errored = is_in_repeated_error_state(\n        cc_pair=cc_pair,\n        search_settings_id=search_settings_instance.id,\n        db_session=db_session,\n    )\n\n    # uncomment for debugging\n    # task_logger.debug(\n    #     f\"_should_index: \"\n    #     f\"cc_pair={cc_pair.id} \"\n    #     f\"connector={cc_pair.connector_id} \"\n    #     f\"refresh_freq={connector.refresh_freq}\"\n    # )\n\n    # don't kick off indexing for `NOT_APPLICABLE` sources\n    if connector.source == DocumentSource.NOT_APPLICABLE:\n        # print(f\"Not indexing cc_pair={cc_pair.id}: NOT_APPLICABLE source\")\n        return False\n\n    # User can still manually create single indexing attempts via the UI for the\n    # currently in use index\n    if DISABLE_INDEX_UPDATE_ON_SWAP:\n        if (\n            search_settings_instance.status == IndexModelStatus.PRESENT\n            and secondary_index_building\n        ):\n            # print(\n            #     f\"Not indexing cc_pair={cc_pair.id}: DISABLE_INDEX_UPDATE_ON_SWAP is True and secondary index building\"\n            # )\n            return False\n\n    # When switching over models, always index at least once\n    if search_settings_instance.status == IndexModelStatus.FUTURE:\n        if last_index_attempt:\n            # No new index if the last index attempt succeeded\n            # Once is enough. The model will never be able to swap otherwise.\n            if last_index_attempt.status == IndexingStatus.SUCCESS:\n                # print(\n                #     f\"Not indexing cc_pair={cc_pair.id}: FUTURE model with successful last index attempt={last_index.id}\"\n                # )\n                return False\n\n            # No new index if the last index attempt is waiting to start\n            if last_index_attempt.status == IndexingStatus.NOT_STARTED:\n                # print(\n                #     f\"Not indexing cc_pair={cc_pair.id}: FUTURE model with NOT_STARTED last index attempt={last_index.id}\"\n                # )\n                return False\n\n            # No new index if the last index attempt is running\n            if last_index_attempt.status == IndexingStatus.IN_PROGRESS:\n                # print(\n                #     f\"Not indexing cc_pair={cc_pair.id}: FUTURE model with IN_PROGRESS last index attempt={last_index.id}\"\n                # )\n                return False\n        else:\n            if (\n                connector.id == 0 or connector.source == DocumentSource.INGESTION_API\n            ):  # Ingestion API\n                # print(\n                #     f\"Not indexing cc_pair={cc_pair.id}: FUTURE model with Ingestion API source\"\n                # )\n                return False\n        return True\n\n    # If the connector is paused or is the ingestion API, don't index\n    # NOTE: during an embedding model switch over, the following logic\n    # is bypassed by the above check for a future model\n    if (\n        not cc_pair.status.is_active()\n        or connector.id == 0\n        or connector.source == DocumentSource.INGESTION_API\n    ):\n        # print(\n        #     f\"Not indexing cc_pair={cc_pair.id}: Connector is paused or is Ingestion API\"\n        # )\n        return False\n\n    if search_settings_instance.status.is_current():\n        if cc_pair.indexing_trigger is not None:\n            # if a manual indexing trigger is on the cc pair, honor it for live search settings\n            return True\n\n    # if no attempt has ever occurred, we should index regardless of refresh_freq\n    if not last_index_attempt:\n        return True\n\n    if connector.refresh_freq is None:\n        # print(f\"Not indexing cc_pair={cc_pair.id}: refresh_freq is None\")\n        return False\n\n    # if in the \"initial\" phase, we should always try and kick-off indexing\n    # as soon as possible if there is no ongoing attempt. In other words,\n    # no delay UNLESS we're repeatedly failing to index.\n    if (\n        cc_pair.status == ConnectorCredentialPairStatus.INITIAL_INDEXING\n        and not all_recent_errored\n    ):\n        return True\n\n    current_db_time = get_db_current_time(db_session)\n    time_since_index = current_db_time - last_index_attempt.time_updated\n    if time_since_index.total_seconds() < connector.refresh_freq:\n        # print(\n        #     f\"Not indexing cc_pair={cc_pair.id}: Last index attempt={last_index_attempt.id} \"\n        #     f\"too recent ({time_since_index.total_seconds()}s < {connector.refresh_freq}s)\"\n        # )\n        return False\n\n    return True\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/evals/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/evals/tasks.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nfrom celery import shared_task\nfrom celery import Task\n\nfrom onyx.configs.app_configs import BRAINTRUST_API_KEY\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES\nfrom onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL\nfrom onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.evals.eval import run_eval\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@shared_task(\n    name=OnyxCeleryTask.EVAL_RUN_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n    trail=False,\n)\ndef eval_run_task(\n    self: Task,  # noqa: ARG001\n    *,\n    configuration_dict: dict[str, Any],\n) -> None:\n    \"\"\"Background task to run an evaluation with the given configuration\"\"\"\n    try:\n        configuration = EvalConfigurationOptions.model_validate(configuration_dict)\n        run_eval(configuration, remote_dataset_name=configuration.dataset_name)\n        logger.info(\"Successfully completed eval run task\")\n\n    except Exception:\n        logger.error(\"Failed to run eval task\")\n        raise\n\n\n@shared_task(\n    name=OnyxCeleryTask.SCHEDULED_EVAL_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT * 5,  # Allow more time for multiple datasets\n    bind=True,\n    trail=False,\n)\ndef scheduled_eval_task(self: Task, **kwargs: Any) -> None:  # noqa: ARG001\n    \"\"\"\n    Scheduled task to run evaluations on configured datasets.\n    Runs weekly on Sunday at midnight UTC.\n\n    Configure via environment variables (with defaults):\n    - SCHEDULED_EVAL_DATASET_NAMES: Comma-separated list of Braintrust dataset names\n    - SCHEDULED_EVAL_PERMISSIONS_EMAIL: Email for search permissions (default: roshan@onyx.app)\n    - SCHEDULED_EVAL_PROJECT: Braintrust project name\n    \"\"\"\n    if not BRAINTRUST_API_KEY:\n        logger.error(\"BRAINTRUST_API_KEY is not configured, cannot run scheduled evals\")\n        return\n\n    if not SCHEDULED_EVAL_PROJECT:\n        logger.error(\n            \"SCHEDULED_EVAL_PROJECT is not configured, cannot run scheduled evals\"\n        )\n        return\n\n    if not SCHEDULED_EVAL_DATASET_NAMES:\n        logger.info(\"No scheduled eval datasets configured, skipping\")\n        return\n\n    if not SCHEDULED_EVAL_PERMISSIONS_EMAIL:\n        logger.error(\"SCHEDULED_EVAL_PERMISSIONS_EMAIL not configured\")\n        return\n\n    project_name = SCHEDULED_EVAL_PROJECT\n    dataset_names = SCHEDULED_EVAL_DATASET_NAMES\n    permissions_email = SCHEDULED_EVAL_PERMISSIONS_EMAIL\n\n    # Create a timestamp for the scheduled run\n    run_timestamp = datetime.now(timezone.utc).strftime(\"%Y-%m-%d\")\n\n    logger.info(\n        f\"Starting scheduled eval pipeline for project '{project_name}' with {len(dataset_names)} dataset(s): {dataset_names}\"\n    )\n\n    pipeline_start = datetime.now(timezone.utc)\n    results: list[dict[str, Any]] = []\n\n    for dataset_name in dataset_names:\n        start_time = datetime.now(timezone.utc)\n        error_message: str | None = None\n        success = False\n\n        # Create informative experiment name for scheduled runs\n        experiment_name = f\"{dataset_name} - {run_timestamp}\"\n\n        try:\n            logger.info(\n                f\"Running scheduled eval for dataset: {dataset_name} (project: {project_name})\"\n            )\n\n            configuration = EvalConfigurationOptions(\n                search_permissions_email=permissions_email,\n                dataset_name=dataset_name,\n                no_send_logs=False,\n                braintrust_project=project_name,\n                experiment_name=experiment_name,\n            )\n\n            result = run_eval(\n                configuration=configuration,\n                remote_dataset_name=dataset_name,\n            )\n            success = result.success\n            logger.info(f\"Completed eval for {dataset_name}: success={success}\")\n\n        except Exception as e:\n            logger.exception(f\"Failed to run scheduled eval for {dataset_name}\")\n            error_message = str(e)\n            success = False\n\n        end_time = datetime.now(timezone.utc)\n\n        results.append(\n            {\n                \"dataset_name\": dataset_name,\n                \"success\": success,\n                \"start_time\": start_time,\n                \"end_time\": end_time,\n                \"error_message\": error_message,\n            }\n        )\n\n    pipeline_end = datetime.now(timezone.utc)\n    total_duration = (pipeline_end - pipeline_start).total_seconds()\n\n    passed_count = sum(1 for r in results if r[\"success\"])\n    logger.info(\n        f\"Scheduled eval pipeline completed: {passed_count}/{len(results)} passed in {total_duration:.1f}s\"\n    )\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/hierarchyfetching/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py",
    "content": "\"\"\"Celery tasks for hierarchy fetching.\n\nThis module provides tasks for fetching hierarchy node information from connectors.\nHierarchy nodes represent structural elements like folders, spaces, and pages that\ncan be used to filter search results.\n\nThe hierarchy fetching pipeline runs once per day per connector and fetches\nstructural information from the connector source.\n\"\"\"\n\nimport time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom uuid import uuid4\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.connectors.factory import ConnectorMissingException\nfrom onyx.connectors.factory import identify_connector_class\nfrom onyx.connectors.factory import instantiate_connector\nfrom onyx.connectors.interfaces import HierarchyConnector\nfrom onyx.connectors.models import HierarchyNode as PydanticHierarchyNode\nfrom onyx.db.connector import mark_cc_pair_as_hierarchy_fetched\nfrom onyx.db.connector_credential_pair import (\n    fetch_indexable_standard_connector_credential_pair_ids,\n)\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import upsert_hierarchy_nodes_batch\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch\nfrom onyx.redis.redis_hierarchy import ensure_source_node_exists\nfrom onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Hierarchy fetching runs once per day (24 hours in seconds)\nHIERARCHY_FETCH_INTERVAL_SECONDS = 24 * 60 * 60\n\n\ndef _connector_supports_hierarchy_fetching(\n    cc_pair: ConnectorCredentialPair,\n) -> bool:\n    \"\"\"Return True only for connectors whose class implements HierarchyConnector.\"\"\"\n    try:\n        connector_class = identify_connector_class(\n            cc_pair.connector.source,\n        )\n    except ConnectorMissingException as e:\n        task_logger.warning(\n            \"Skipping hierarchy fetching enqueue for source=%s input_type=%s: %s\",\n            cc_pair.connector.source,\n            cc_pair.connector.input_type,\n            str(e),\n        )\n        return False\n\n    return issubclass(connector_class, HierarchyConnector)\n\n\ndef _is_hierarchy_fetching_due(cc_pair: ConnectorCredentialPair) -> bool:\n    \"\"\"Returns boolean indicating if hierarchy fetching is due for this connector.\n\n    Hierarchy fetching should run once per day for active connectors.\n    \"\"\"\n    # Skip if not active\n    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:\n        return False\n\n    # Skip if connector has never successfully indexed\n    if not cc_pair.last_successful_index_time:\n        return False\n\n    # Check if we've fetched hierarchy recently\n    last_fetch = cc_pair.last_time_hierarchy_fetch\n    if last_fetch is None:\n        # Never fetched before - fetch now\n        return True\n\n    # Check if enough time has passed since last fetch\n    next_fetch_time = last_fetch + timedelta(seconds=HIERARCHY_FETCH_INTERVAL_SECONDS)\n    return datetime.now(timezone.utc) >= next_fetch_time\n\n\ndef _try_creating_hierarchy_fetching_task(\n    celery_app: Celery,\n    cc_pair: ConnectorCredentialPair,\n    db_session: Session,\n    r: Redis,\n    tenant_id: str,\n) -> str | None:\n    \"\"\"Try to create a hierarchy fetching task for a connector.\n\n    Returns the task ID if created, None otherwise.\n    \"\"\"\n    LOCK_TIMEOUT = 30\n\n    # Serialize task creation attempts\n    lock: RedisLock = r.lock(\n        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + f\"hierarchy_fetching_{cc_pair.id}\",\n        timeout=LOCK_TIMEOUT,\n    )\n\n    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)\n    if not acquired:\n        return None\n\n    try:\n        # Refresh to get latest state\n        db_session.refresh(cc_pair)\n        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n            return None\n\n        # Generate task ID\n        custom_task_id = f\"hierarchy_fetching_{cc_pair.id}_{uuid4()}\"\n\n        # Send the task\n        result = celery_app.send_task(\n            OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,\n            kwargs=dict(\n                cc_pair_id=cc_pair.id,\n                tenant_id=tenant_id,\n            ),\n            queue=OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING,\n            task_id=custom_task_id,\n            priority=OnyxCeleryPriority.LOW,\n        )\n\n        if not result:\n            raise RuntimeError(\"send_task for hierarchy_fetching_task failed.\")\n\n        task_logger.info(\n            f\"Created hierarchy fetching task: cc_pair={cc_pair.id} celery_task_id={custom_task_id}\"\n        )\n\n        return custom_task_id\n\n    except Exception:\n        task_logger.exception(\n            f\"Failed to create hierarchy fetching task: cc_pair={cc_pair.id}\"\n        )\n        return None\n    finally:\n        if lock.owned():\n            lock.release()\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_HIERARCHY_FETCHING,\n    soft_time_limit=300,\n    bind=True,\n)\ndef check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:\n    \"\"\"Check for connectors that need hierarchy fetching and spawn tasks.\n\n    This task runs periodically (once per day) and checks all active connectors\n    to see if they need hierarchy information fetched.\n    \"\"\"\n    time_start = time.monotonic()\n    task_logger.info(\"check_for_hierarchy_fetching - Starting\")\n\n    tasks_created = 0\n    locked = False\n    redis_client = get_redis_client()\n\n    lock_beat: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CHECK_HIERARCHY_FETCHING_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # These tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        locked = True\n\n        with get_session_with_current_tenant() as db_session:\n            # Get all active connector credential pairs\n            cc_pair_ids = fetch_indexable_standard_connector_credential_pair_ids(\n                db_session=db_session,\n                active_cc_pairs_only=True,\n            )\n\n            for cc_pair_id in cc_pair_ids:\n                lock_beat.reacquire()\n                cc_pair = get_connector_credential_pair_from_id(\n                    db_session=db_session,\n                    cc_pair_id=cc_pair_id,\n                )\n\n                if not cc_pair or not _connector_supports_hierarchy_fetching(cc_pair):\n                    continue\n\n                if not _is_hierarchy_fetching_due(cc_pair):\n                    continue\n\n                task_id = _try_creating_hierarchy_fetching_task(\n                    celery_app=self.app,\n                    cc_pair=cc_pair,\n                    db_session=db_session,\n                    r=redis_client,\n                    tenant_id=tenant_id,\n                )\n\n                if task_id:\n                    tasks_created += 1\n\n    except Exception:\n        task_logger.exception(\"check_for_hierarchy_fetching - Unexpected error\")\n    finally:\n        if locked:\n            if lock_beat.owned():\n                lock_beat.release()\n            else:\n                task_logger.error(\n                    \"check_for_hierarchy_fetching - Lock not owned on completion\"\n                )\n\n    time_elapsed = time.monotonic() - time_start\n    task_logger.info(\n        f\"check_for_hierarchy_fetching finished: tasks_created={tasks_created} elapsed={time_elapsed:.2f}s\"\n    )\n    return tasks_created\n\n\n# Batch size for hierarchy node processing\nHIERARCHY_NODE_BATCH_SIZE = 100\n\n\ndef _run_hierarchy_extraction(\n    db_session: Session,\n    cc_pair: ConnectorCredentialPair,\n    source: DocumentSource,\n    tenant_id: str,\n) -> int:\n    \"\"\"\n    Run the hierarchy extraction for a connector.\n\n    Instantiates the connector and calls load_hierarchy() if the connector\n    implements HierarchyConnector.\n\n    Returns the total number of hierarchy nodes extracted.\n    \"\"\"\n    connector = cc_pair.connector\n    credential = cc_pair.credential\n\n    # Instantiate the connector using its configured input type\n    runnable_connector = instantiate_connector(\n        db_session=db_session,\n        source=source,\n        input_type=connector.input_type,\n        connector_specific_config=connector.connector_specific_config,\n        credential=credential,\n    )\n\n    # Check if the connector supports hierarchy fetching\n    if not isinstance(runnable_connector, HierarchyConnector):\n        task_logger.debug(\n            f\"Connector {source} does not implement HierarchyConnector, skipping\"\n        )\n        return 0\n\n    redis_client = get_redis_client(tenant_id=tenant_id)\n\n    # Ensure the SOURCE-type root node exists before processing hierarchy nodes.\n    # This is the root of the hierarchy tree - all other nodes for this source\n    # should ultimately have this as an ancestor.\n    ensure_source_node_exists(redis_client, db_session, source)\n\n    # Determine time range: start from last hierarchy fetch, end at now\n    last_fetch = cc_pair.last_time_hierarchy_fetch\n    start_time = last_fetch.timestamp() if last_fetch else 0\n    end_time = datetime.now(timezone.utc).timestamp()\n\n    # Check if connector is public - all hierarchy nodes from public connectors\n    # should be accessible to all users\n    is_connector_public = cc_pair.access_type == AccessType.PUBLIC\n\n    total_nodes = 0\n    node_batch: list[PydanticHierarchyNode] = []\n\n    def _process_batch() -> int:\n        \"\"\"Process accumulated hierarchy nodes batch.\"\"\"\n        if not node_batch:\n            return 0\n\n        upserted_nodes = upsert_hierarchy_nodes_batch(\n            db_session=db_session,\n            nodes=node_batch,\n            source=source,\n            commit=True,\n            is_connector_public=is_connector_public,\n        )\n\n        upsert_hierarchy_node_cc_pair_entries(\n            db_session=db_session,\n            hierarchy_node_ids=[n.id for n in upserted_nodes],\n            connector_id=cc_pair.connector_id,\n            credential_id=cc_pair.credential_id,\n            commit=True,\n        )\n\n        # Cache in Redis for fast ancestor resolution\n        cache_entries = [\n            HierarchyNodeCacheEntry.from_db_model(node) for node in upserted_nodes\n        ]\n        cache_hierarchy_nodes_batch(\n            redis_client=redis_client,\n            source=source,\n            entries=cache_entries,\n        )\n\n        count = len(node_batch)\n        node_batch.clear()\n        return count\n\n    # Fetch hierarchy nodes from the connector\n    for node in runnable_connector.load_hierarchy(start=start_time, end=end_time):\n        node_batch.append(node)\n        if len(node_batch) >= HIERARCHY_NODE_BATCH_SIZE:\n            total_nodes += _process_batch()\n\n    # Process any remaining nodes\n    total_nodes += _process_batch()\n\n    return total_nodes\n\n\n@shared_task(\n    name=OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,\n    soft_time_limit=3600,  # 1 hour soft limit\n    time_limit=3900,  # 1 hour 5 min hard limit\n    bind=True,\n)\ndef connector_hierarchy_fetching_task(\n    self: Task,  # noqa: ARG001\n    *,\n    cc_pair_id: int,\n    tenant_id: str,\n) -> None:\n    \"\"\"Fetch hierarchy information from a connector.\n\n    This task fetches structural information (folders, spaces, pages, etc.)\n    from the connector source and stores it in the database.\n    \"\"\"\n    task_logger.info(\n        f\"connector_hierarchy_fetching_task starting: cc_pair={cc_pair_id} tenant={tenant_id}\"\n    )\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n            )\n\n            if not cc_pair:\n                task_logger.warning(\n                    f\"CC pair not found for hierarchy fetching: cc_pair={cc_pair_id}\"\n                )\n                return\n\n            if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n                task_logger.info(\n                    f\"Skipping hierarchy fetching for deleting connector: cc_pair={cc_pair_id}\"\n                )\n                return\n\n            source = cc_pair.connector.source\n            total_nodes = _run_hierarchy_extraction(\n                db_session=db_session,\n                cc_pair=cc_pair,\n                source=source,\n                tenant_id=tenant_id,\n            )\n\n            task_logger.info(\n                f\"connector_hierarchy_fetching_task: Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}\"\n            )\n\n            # Update the last fetch time to prevent re-running until next interval\n            mark_cc_pair_as_hierarchy_fetched(db_session, cc_pair_id)\n\n    except Exception:\n        task_logger.exception(\n            f\"connector_hierarchy_fetching_task failed: cc_pair={cc_pair_id}\"\n        )\n        raise\n\n    task_logger.info(\n        f\"connector_hierarchy_fetching_task completed: cc_pair={cc_pair_id}\"\n    )\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/llm_model_update/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/llm_model_update/tasks.py",
    "content": "from celery import shared_task\nfrom celery import Task\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.app_configs import AUTO_LLM_CONFIG_URL\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.llm.well_known_providers.auto_update_service import (\n    sync_llm_models_from_github,\n)\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,\n    ignore_result=True,\n    soft_time_limit=300,  # 5 minute timeout\n    trail=False,\n    bind=True,\n)\ndef check_for_auto_llm_updates(\n    self: Task,  # noqa: ARG001\n    *,\n    tenant_id: str,  # noqa: ARG001\n) -> bool | None:\n    \"\"\"Periodic task to fetch LLM model updates from GitHub\n    and sync them to providers in Auto mode.\n\n    This task checks the GitHub-hosted config file and updates all\n    providers that have is_auto_mode=True.\n    \"\"\"\n    if not AUTO_LLM_CONFIG_URL:\n        task_logger.debug(\"AUTO_LLM_CONFIG_URL not configured, skipping\")\n        return None\n\n    try:\n        # Sync to database\n        with get_session_with_current_tenant() as db_session:\n            results = sync_llm_models_from_github(db_session)\n\n            if results:\n                task_logger.info(f\"Auto mode sync results: {results}\")\n            else:\n                task_logger.debug(\"No model updates applied\")\n\n    except Exception:\n        task_logger.exception(\"Error in auto LLM update task\")\n        raise\n\n    return True\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/models.py",
    "content": "from enum import Enum\n\nfrom pydantic import BaseModel\n\n\nclass DocProcessingContext(BaseModel):\n    tenant_id: str\n    cc_pair_id: int\n    search_settings_id: int\n    index_attempt_id: int\n\n\nclass IndexingWatchdogTerminalStatus(str, Enum):\n    \"\"\"The different statuses the watchdog can finish with.\n\n    TODO: create broader success/failure/abort categories\n    \"\"\"\n\n    UNDEFINED = \"undefined\"\n\n    SUCCEEDED = \"succeeded\"\n\n    SPAWN_FAILED = \"spawn_failed\"  # connector spawn failed\n    SPAWN_NOT_ALIVE = (\n        \"spawn_not_alive\"  # spawn succeeded but process did not come alive\n    )\n\n    BLOCKED_BY_DELETION = \"blocked_by_deletion\"\n    BLOCKED_BY_STOP_SIGNAL = \"blocked_by_stop_signal\"\n    FENCE_NOT_FOUND = \"fence_not_found\"  # fence does not exist\n    FENCE_READINESS_TIMEOUT = (\n        \"fence_readiness_timeout\"  # fence exists but wasn't ready within the timeout\n    )\n    FENCE_MISMATCH = \"fence_mismatch\"  # task and fence metadata mismatch\n    TASK_ALREADY_RUNNING = \"task_already_running\"  # task appears to be running already\n    INDEX_ATTEMPT_MISMATCH = (\n        \"index_attempt_mismatch\"  # expected index attempt metadata not found in db\n    )\n\n    CONNECTOR_VALIDATION_ERROR = (\n        \"connector_validation_error\"  # the connector validation failed\n    )\n    CONNECTOR_EXCEPTIONED = \"connector_exceptioned\"  # the connector itself exceptioned\n    WATCHDOG_EXCEPTIONED = \"watchdog_exceptioned\"  # the watchdog exceptioned\n\n    # the watchdog received a termination signal\n    TERMINATED_BY_SIGNAL = \"terminated_by_signal\"\n\n    # the watchdog terminated the task due to no activity\n    TERMINATED_BY_ACTIVITY_TIMEOUT = \"terminated_by_activity_timeout\"\n\n    # NOTE: this may actually be the same as SIGKILL, but parsed differently by python\n    # consolidate once we know more\n    OUT_OF_MEMORY = \"out_of_memory\"\n\n    PROCESS_SIGNAL_SIGKILL = \"process_signal_sigkill\"\n\n    @property\n    def code(self) -> int:\n        _ENUM_TO_CODE: dict[IndexingWatchdogTerminalStatus, int] = {\n            IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL: -9,\n            IndexingWatchdogTerminalStatus.OUT_OF_MEMORY: 137,\n            IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR: 247,\n            IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION: 248,\n            IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL: 249,\n            IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND: 250,\n            IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT: 251,\n            IndexingWatchdogTerminalStatus.FENCE_MISMATCH: 252,\n            IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING: 253,\n            IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH: 254,\n            IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED: 255,\n        }\n\n        return _ENUM_TO_CODE[self]\n\n    @classmethod\n    def from_code(cls, code: int) -> \"IndexingWatchdogTerminalStatus\":\n        _CODE_TO_ENUM: dict[int, IndexingWatchdogTerminalStatus] = {\n            -9: IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL,\n            137: IndexingWatchdogTerminalStatus.OUT_OF_MEMORY,\n            247: IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR,\n            248: IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION,\n            249: IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL,\n            250: IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND,\n            251: IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT,\n            252: IndexingWatchdogTerminalStatus.FENCE_MISMATCH,\n            253: IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING,\n            254: IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH,\n            255: IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED,\n        }\n\n        if code in _CODE_TO_ENUM:\n            return _CODE_TO_ENUM[code]\n\n        return IndexingWatchdogTerminalStatus.UNDEFINED\n\n\nclass SimpleJobResult:\n    \"\"\"The data we want to have when the watchdog finishes\"\"\"\n\n    def __init__(self) -> None:\n        self.status = IndexingWatchdogTerminalStatus.UNDEFINED\n        self.connector_source = None\n        self.exit_code = None\n        self.exception_str = None\n\n    status: IndexingWatchdogTerminalStatus\n    connector_source: str | None\n    exit_code: int | None\n    exception_str: str | None\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/monitoring/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/monitoring/tasks.py",
    "content": "import json\nimport time\nfrom datetime import timedelta\nfrom itertools import islice\nfrom typing import Any\nfrom typing import cast\nfrom typing import Literal\n\nimport psutil\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom pydantic import BaseModel\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy import select\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.background.celery.memory_monitoring import emit_process_memory\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import ONYX_CLOUD_TENANT_ID\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import SyncRecord\nfrom onyx.db.models import UserGroup\nfrom onyx.db.search_settings import get_active_search_settings_list\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom onyx.utils.logger import is_running_in_container\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n_MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes\n_MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60  # 6 minutes\n\n_CONNECTOR_INDEX_ATTEMPT_START_LATENCY_KEY_FMT = (\n    \"monitoring_connector_index_attempt_start_latency:{cc_pair_id}:{index_attempt_id}\"\n)\n\n_CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT = (\n    \"monitoring_connector_index_attempt_run_success:{cc_pair_id}:{index_attempt_id}\"\n)\n\n_FINAL_METRIC_KEY_FMT = \"sync_final_metrics:{sync_type}:{entity_id}:{sync_record_id}\"\n\n_SYNC_START_LATENCY_KEY_FMT = (\n    \"sync_start_latency:{sync_type}:{entity_id}:{sync_record_id}\"\n)\n\n_CONNECTOR_START_TIME_KEY_FMT = \"connector_start_time:{cc_pair_id}:{index_attempt_id}\"\n_CONNECTOR_END_TIME_KEY_FMT = \"connector_end_time:{cc_pair_id}:{index_attempt_id}\"\n_SYNC_START_TIME_KEY_FMT = \"sync_start_time:{sync_type}:{entity_id}:{sync_record_id}\"\n_SYNC_END_TIME_KEY_FMT = \"sync_end_time:{sync_type}:{entity_id}:{sync_record_id}\"\n\n\ndef _mark_metric_as_emitted(redis_std: Redis, key: str) -> None:\n    \"\"\"Mark a metric as having been emitted by setting a Redis key with expiration\"\"\"\n    redis_std.set(key, \"1\", ex=24 * 60 * 60)  # Expire after 1 day\n\n\ndef _has_metric_been_emitted(redis_std: Redis, key: str) -> bool:\n    \"\"\"Check if a metric has been emitted by checking for existence of Redis key\"\"\"\n    return bool(redis_std.exists(key))\n\n\nclass Metric(BaseModel):\n    key: (\n        str | None\n    )  # only required if we need to store that we have emitted this metric\n    name: str\n    value: Any\n    tags: dict[str, str]\n\n    def log(self) -> None:\n        \"\"\"Log the metric in a standardized format\"\"\"\n        data = {\n            \"metric\": self.name,\n            \"value\": self.value,\n            \"tags\": self.tags,\n        }\n        task_logger.info(json.dumps(data))\n\n    def emit(self, tenant_id: str) -> None:\n        # Convert value to appropriate type based on the input value\n        bool_value = None\n        float_value = None\n        int_value = None\n        string_value = None\n        # NOTE: have to do bool first, since `isinstance(True, int)` is true\n        # e.g. bool is a subclass of int\n        if isinstance(self.value, bool):\n            bool_value = self.value\n        elif isinstance(self.value, int):\n            int_value = self.value\n        elif isinstance(self.value, float):\n            float_value = self.value\n        elif isinstance(self.value, str):\n            string_value = self.value\n        else:\n            task_logger.error(\n                f\"Invalid metric value type: {type(self.value)} ({self.value}) for metric {self.name}.\"\n            )\n            return\n\n        # don't send None values over the wire\n        data = {\n            k: v\n            for k, v in {\n                \"metric_name\": self.name,\n                \"float_value\": float_value,\n                \"int_value\": int_value,\n                \"string_value\": string_value,\n                \"bool_value\": bool_value,\n                \"tags\": self.tags,\n            }.items()\n            if v is not None\n        }\n        task_logger.info(f\"Emitting metric: {data}\")\n        optional_telemetry(\n            record_type=RecordType.METRIC,\n            data=data,\n            tenant_id=tenant_id,\n        )\n\n\ndef _collect_queue_metrics(redis_celery: Redis) -> list[Metric]:\n    \"\"\"Collect metrics about queue lengths for different Celery queues\"\"\"\n    metrics = []\n    queue_mappings = {\n        \"celery_queue_length\": OnyxCeleryQueues.PRIMARY,\n        \"docprocessing_queue_length\": OnyxCeleryQueues.DOCPROCESSING,\n        \"docfetching_queue_length\": OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,\n        \"sync_queue_length\": OnyxCeleryQueues.VESPA_METADATA_SYNC,\n        \"deletion_queue_length\": OnyxCeleryQueues.CONNECTOR_DELETION,\n        \"pruning_queue_length\": OnyxCeleryQueues.CONNECTOR_PRUNING,\n        \"permissions_sync_queue_length\": OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,\n        \"external_group_sync_queue_length\": OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,\n        \"permissions_upsert_queue_length\": OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT,\n        \"hierarchy_fetching_queue_length\": OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING,\n        \"llm_model_update_queue_length\": OnyxCeleryQueues.LLM_MODEL_UPDATE,\n        \"checkpoint_cleanup_queue_length\": OnyxCeleryQueues.CHECKPOINT_CLEANUP,\n        \"index_attempt_cleanup_queue_length\": OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP,\n        \"csv_generation_queue_length\": OnyxCeleryQueues.CSV_GENERATION,\n        \"user_file_processing_queue_length\": OnyxCeleryQueues.USER_FILE_PROCESSING,\n        \"user_file_project_sync_queue_length\": OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,\n        \"user_file_delete_queue_length\": OnyxCeleryQueues.USER_FILE_DELETE,\n        \"monitoring_queue_length\": OnyxCeleryQueues.MONITORING,\n        \"sandbox_queue_length\": OnyxCeleryQueues.SANDBOX,\n        \"opensearch_migration_queue_length\": OnyxCeleryQueues.OPENSEARCH_MIGRATION,\n    }\n\n    for name, queue in queue_mappings.items():\n        metrics.append(\n            Metric(\n                key=None,\n                name=name,\n                value=celery_get_queue_length(queue, redis_celery),\n                tags={\"queue\": name},\n            )\n        )\n\n    return metrics\n\n\ndef _build_connector_start_latency_metric(\n    cc_pair: ConnectorCredentialPair,\n    recent_attempt: IndexAttempt,\n    second_most_recent_attempt: IndexAttempt | None,\n    redis_std: Redis,\n) -> Metric | None:\n    if not recent_attempt.time_started:\n        return None\n\n    # check if we already emitted a metric for this index attempt\n    metric_key = _CONNECTOR_INDEX_ATTEMPT_START_LATENCY_KEY_FMT.format(\n        cc_pair_id=cc_pair.id,\n        index_attempt_id=recent_attempt.id,\n    )\n    if _has_metric_been_emitted(redis_std, metric_key):\n        task_logger.info(\n            f\"Skipping metric for connector {cc_pair.connector.id} \"\n            f\"index attempt {recent_attempt.id} because it has already been \"\n            \"emitted\"\n        )\n        return None\n\n    # Connector start latency\n    # first run case - we should start as soon as it's created\n    if not second_most_recent_attempt:\n        desired_start_time = cc_pair.connector.time_created\n    else:\n        if not cc_pair.connector.refresh_freq:\n            task_logger.debug(\n                \"Connector has no refresh_freq and this is a non-initial index attempt. \"\n                \"Assuming user manually triggered indexing, so we'll skip start latency metric.\"\n            )\n            return None\n\n        desired_start_time = second_most_recent_attempt.time_updated + timedelta(\n            seconds=cc_pair.connector.refresh_freq\n        )\n\n    start_latency = (recent_attempt.time_started - desired_start_time).total_seconds()\n\n    task_logger.info(\n        f\"Start latency for index attempt {recent_attempt.id}: {start_latency:.2f}s \"\n        f\"(desired: {desired_start_time}, actual: {recent_attempt.time_started})\"\n    )\n\n    job_id = build_job_id(\"connector\", str(cc_pair.id), str(recent_attempt.id))\n\n    return Metric(\n        key=metric_key,\n        name=\"connector_start_latency\",\n        value=start_latency,\n        tags={\n            \"job_id\": job_id,\n            \"connector_id\": str(cc_pair.connector.id),\n            \"source\": str(cc_pair.connector.source),\n        },\n    )\n\n\ndef _build_connector_final_metrics(\n    cc_pair: ConnectorCredentialPair,\n    recent_attempts: list[IndexAttempt],\n    redis_std: Redis,\n) -> list[Metric]:\n    \"\"\"\n    Final metrics for connector index attempts:\n      - Boolean success/fail metric\n      - If success, emit:\n          * duration (seconds)\n          * doc_count\n    \"\"\"\n    metrics = []\n    for attempt in recent_attempts:\n        metric_key = _CONNECTOR_INDEX_ATTEMPT_RUN_SUCCESS_KEY_FMT.format(\n            cc_pair_id=cc_pair.id,\n            index_attempt_id=attempt.id,\n        )\n        if _has_metric_been_emitted(redis_std, metric_key):\n            task_logger.info(\n                f\"Skipping final metrics for connector {cc_pair.connector.id} index attempt {attempt.id}, already emitted.\"\n            )\n            continue\n\n        # We only emit final metrics if the attempt is in a terminal state\n        if attempt.status not in [\n            IndexingStatus.SUCCESS,\n            IndexingStatus.FAILED,\n            IndexingStatus.CANCELED,\n        ]:\n            # Not finished; skip\n            continue\n\n        job_id = build_job_id(\"connector\", str(cc_pair.id), str(attempt.id))\n        success = attempt.status == IndexingStatus.SUCCESS\n        metrics.append(\n            Metric(\n                key=metric_key,  # We'll mark the same key for any final metrics\n                name=\"connector_run_succeeded\",\n                value=success,\n                tags={\n                    \"job_id\": job_id,\n                    \"connector_id\": str(cc_pair.connector.id),\n                    \"source\": str(cc_pair.connector.source),\n                    \"status\": attempt.status.value,\n                },\n            )\n        )\n\n        if success:\n            # Make sure we have valid time_started\n            if attempt.time_started and attempt.time_updated:\n                duration_seconds = (\n                    attempt.time_updated - attempt.time_started\n                ).total_seconds()\n                metrics.append(\n                    Metric(\n                        key=None,  # No need for a new key, or you can reuse the same if you prefer\n                        name=\"connector_index_duration_seconds\",\n                        value=duration_seconds,\n                        tags={\n                            \"job_id\": job_id,\n                            \"connector_id\": str(cc_pair.connector.id),\n                            \"source\": str(cc_pair.connector.source),\n                        },\n                    )\n                )\n            else:\n                task_logger.error(\n                    f\"Index attempt {attempt.id} succeeded but has missing time \"\n                    f\"(time_started={attempt.time_started}, time_updated={attempt.time_updated}).\"\n                )\n\n            # For doc counts, choose whichever field is more relevant\n            doc_count = attempt.total_docs_indexed or 0\n            metrics.append(\n                Metric(\n                    key=None,\n                    name=\"connector_index_doc_count\",\n                    value=doc_count,\n                    tags={\n                        \"job_id\": job_id,\n                        \"connector_id\": str(cc_pair.connector.id),\n                        \"source\": str(cc_pair.connector.source),\n                    },\n                )\n            )\n\n    return metrics\n\n\ndef _collect_connector_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:\n    \"\"\"Collect metrics about connector runs from the past hour\"\"\"\n    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)\n\n    # Get all connector credential pairs\n    cc_pairs = db_session.scalars(select(ConnectorCredentialPair)).all()\n    # Might be more than one search setting, or just one\n    active_search_settings_list = get_active_search_settings_list(db_session)\n\n    metrics = []\n\n    # If you want to process each cc_pair against each search setting:\n    for cc_pair in cc_pairs:\n        for search_settings in active_search_settings_list:\n            recent_attempts = (\n                db_session.query(IndexAttempt)\n                .filter(\n                    IndexAttempt.connector_credential_pair_id == cc_pair.id,\n                    IndexAttempt.search_settings_id == search_settings.id,\n                )\n                .order_by(IndexAttempt.time_created.desc())\n                .limit(2)\n                .all()\n            )\n\n            if not recent_attempts:\n                continue\n\n            most_recent_attempt = recent_attempts[0]\n            second_most_recent_attempt = (\n                recent_attempts[1] if len(recent_attempts) > 1 else None\n            )\n\n            if one_hour_ago > most_recent_attempt.time_created:\n                continue\n\n            # Build a job_id for correlation\n            job_id = build_job_id(\n                \"connector\", str(cc_pair.id), str(most_recent_attempt.id)\n            )\n\n            # Add raw start time metric if available\n            if most_recent_attempt.time_started:\n                start_time_key = _CONNECTOR_START_TIME_KEY_FMT.format(\n                    cc_pair_id=cc_pair.id,\n                    index_attempt_id=most_recent_attempt.id,\n                )\n                metrics.append(\n                    Metric(\n                        key=start_time_key,\n                        name=\"connector_start_time\",\n                        value=most_recent_attempt.time_started.timestamp(),\n                        tags={\n                            \"job_id\": job_id,\n                            \"connector_id\": str(cc_pair.connector.id),\n                            \"source\": str(cc_pair.connector.source),\n                        },\n                    )\n                )\n\n            # Add raw end time metric if available and in terminal state\n            if (\n                most_recent_attempt.status.is_terminal()\n                and most_recent_attempt.time_updated\n            ):\n                end_time_key = _CONNECTOR_END_TIME_KEY_FMT.format(\n                    cc_pair_id=cc_pair.id,\n                    index_attempt_id=most_recent_attempt.id,\n                )\n                metrics.append(\n                    Metric(\n                        key=end_time_key,\n                        name=\"connector_end_time\",\n                        value=most_recent_attempt.time_updated.timestamp(),\n                        tags={\n                            \"job_id\": job_id,\n                            \"connector_id\": str(cc_pair.connector.id),\n                            \"source\": str(cc_pair.connector.source),\n                        },\n                    )\n                )\n\n            # Connector start latency\n            start_latency_metric = _build_connector_start_latency_metric(\n                cc_pair, most_recent_attempt, second_most_recent_attempt, redis_std\n            )\n\n            if start_latency_metric:\n                metrics.append(start_latency_metric)\n\n            # Connector run success/failure\n            final_metrics = _build_connector_final_metrics(\n                cc_pair, recent_attempts, redis_std\n            )\n            metrics.extend(final_metrics)\n\n    return metrics\n\n\ndef _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]:\n    \"\"\"\n    Collect metrics for document set and group syncing:\n      - Success/failure status\n      - Start latency (for doc sets / user groups)\n      - Duration & doc count (only if success)\n      - Throughput (docs/min) (only if success)\n      - Raw start/end times for each sync\n    \"\"\"\n\n    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)\n\n    # Get all sync records that ended in the last hour\n    recent_sync_records = db_session.scalars(\n        select(SyncRecord)\n        .where(SyncRecord.sync_end_time.isnot(None))\n        .where(SyncRecord.sync_end_time >= one_hour_ago)\n        .order_by(SyncRecord.sync_end_time.desc())\n    ).all()\n\n    task_logger.info(\n        f\"Collecting sync metrics for {len(recent_sync_records)} sync records\"\n    )\n\n    metrics = []\n\n    for sync_record in recent_sync_records:\n        # Build a job_id for correlation\n        job_id = build_job_id(\"sync_record\", str(sync_record.id))\n\n        # Add raw start time metric\n        start_time_key = _SYNC_START_TIME_KEY_FMT.format(\n            sync_type=sync_record.sync_type,\n            entity_id=sync_record.entity_id,\n            sync_record_id=sync_record.id,\n        )\n        metrics.append(\n            Metric(\n                key=start_time_key,\n                name=\"sync_start_time\",\n                value=sync_record.sync_start_time.timestamp(),\n                tags={\n                    \"job_id\": job_id,\n                    \"sync_type\": str(sync_record.sync_type),\n                },\n            )\n        )\n\n        # Add raw end time metric if available\n        if sync_record.sync_end_time:\n            end_time_key = _SYNC_END_TIME_KEY_FMT.format(\n                sync_type=sync_record.sync_type,\n                entity_id=sync_record.entity_id,\n                sync_record_id=sync_record.id,\n            )\n            metrics.append(\n                Metric(\n                    key=end_time_key,\n                    name=\"sync_end_time\",\n                    value=sync_record.sync_end_time.timestamp(),\n                    tags={\n                        \"job_id\": job_id,\n                        \"sync_type\": str(sync_record.sync_type),\n                    },\n                )\n            )\n\n        # Emit a SUCCESS/FAIL boolean metric\n        #    Use a single Redis key to avoid re-emitting final metrics\n        final_metric_key = _FINAL_METRIC_KEY_FMT.format(\n            sync_type=sync_record.sync_type,\n            entity_id=sync_record.entity_id,\n            sync_record_id=sync_record.id,\n        )\n        if not _has_metric_been_emitted(redis_std, final_metric_key):\n            # Evaluate success\n            sync_succeeded = sync_record.sync_status == SyncStatus.SUCCESS\n\n            metrics.append(\n                Metric(\n                    key=final_metric_key,\n                    name=\"sync_run_succeeded\",\n                    value=sync_succeeded,\n                    tags={\n                        \"job_id\": job_id,\n                        \"sync_type\": str(sync_record.sync_type),\n                        \"status\": str(sync_record.sync_status),\n                    },\n                )\n            )\n\n            # If successful, emit additional metrics\n            if sync_succeeded:\n                if sync_record.sync_end_time and sync_record.sync_start_time:\n                    duration_seconds = (\n                        sync_record.sync_end_time - sync_record.sync_start_time\n                    ).total_seconds()\n                else:\n                    task_logger.error(\n                        f\"Invalid times for sync record {sync_record.id}: \"\n                        f\"start={sync_record.sync_start_time}, end={sync_record.sync_end_time}\"\n                    )\n                    duration_seconds = None\n\n                doc_count = sync_record.num_docs_synced or 0\n\n                sync_speed = None\n                if duration_seconds and duration_seconds > 0:\n                    duration_mins = duration_seconds / 60.0\n                    sync_speed = (\n                        doc_count / duration_mins if duration_mins > 0 else None\n                    )\n\n                # Emit duration, doc count, speed\n                if duration_seconds is not None:\n                    metrics.append(\n                        Metric(\n                            key=final_metric_key,\n                            name=\"sync_duration_seconds\",\n                            value=duration_seconds,\n                            tags={\n                                \"job_id\": job_id,\n                                \"sync_type\": str(sync_record.sync_type),\n                            },\n                        )\n                    )\n                else:\n                    task_logger.error(\n                        f\"Invalid sync record {sync_record.id} with no duration\"\n                    )\n\n                metrics.append(\n                    Metric(\n                        key=final_metric_key,\n                        name=\"sync_doc_count\",\n                        value=doc_count,\n                        tags={\n                            \"job_id\": job_id,\n                            \"sync_type\": str(sync_record.sync_type),\n                        },\n                    )\n                )\n\n                if sync_speed is not None:\n                    metrics.append(\n                        Metric(\n                            key=final_metric_key,\n                            name=\"sync_speed_docs_per_min\",\n                            value=sync_speed,\n                            tags={\n                                \"job_id\": job_id,\n                                \"sync_type\": str(sync_record.sync_type),\n                            },\n                        )\n                    )\n                else:\n                    task_logger.error(\n                        f\"Invalid sync record {sync_record.id} with no duration\"\n                    )\n\n        # Emit start latency\n        start_latency_key = _SYNC_START_LATENCY_KEY_FMT.format(\n            sync_type=sync_record.sync_type,\n            entity_id=sync_record.entity_id,\n            sync_record_id=sync_record.id,\n        )\n        if not _has_metric_been_emitted(redis_std, start_latency_key):\n            # Get the entity's last update time based on sync type\n            entity: DocumentSet | UserGroup | None = None\n            if sync_record.sync_type == SyncType.DOCUMENT_SET:\n                entity = db_session.scalar(\n                    select(DocumentSet).where(DocumentSet.id == sync_record.entity_id)\n                )\n            elif sync_record.sync_type == SyncType.USER_GROUP:\n                entity = db_session.scalar(\n                    select(UserGroup).where(UserGroup.id == sync_record.entity_id)\n                )\n            else:\n                # Only user groups and document set sync records have\n                #  an associated entity we can use for latency metrics\n                continue\n\n            if entity is None:\n                task_logger.error(\n                    f\"Sync record of type {sync_record.sync_type} doesn't have an entity \"\n                    f\"associated with it (id={sync_record.entity_id}). Skipping start latency metric.\"\n                )\n\n            # Calculate start latency in seconds:\n            #    (actual sync start) - (last modified time)\n            if (\n                entity is not None\n                and entity.time_last_modified_by_user\n                and sync_record.sync_start_time\n            ):\n                start_latency = (\n                    sync_record.sync_start_time - entity.time_last_modified_by_user\n                ).total_seconds()\n\n                if start_latency < 0:\n                    task_logger.error(\n                        f\"Negative start latency for sync record {sync_record.id} \"\n                        f\"(start={sync_record.sync_start_time}, entity_modified={entity.time_last_modified_by_user})\"\n                    )\n                    continue\n\n                metrics.append(\n                    Metric(\n                        key=start_latency_key,\n                        name=\"sync_start_latency_seconds\",\n                        value=start_latency,\n                        tags={\n                            \"job_id\": job_id,\n                            \"sync_type\": str(sync_record.sync_type),\n                        },\n                    )\n                )\n\n    return metrics\n\n\ndef build_job_id(\n    job_type: Literal[\"connector\", \"sync_record\"],\n    primary_id: str,\n    secondary_id: str | None = None,\n) -> str:\n    if job_type == \"connector\":\n        if secondary_id is None:\n            raise ValueError(\n                \"secondary_id (attempt_id) is required for connector job_type\"\n            )\n        return f\"connector:{primary_id}:attempt:{secondary_id}\"\n    elif job_type == \"sync_record\":\n        return f\"sync_record:{primary_id}\"\n\n\n@shared_task(\n    name=OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,\n    ignore_result=True,\n    soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,\n    time_limit=_MONITORING_TIME_LIMIT,\n    queue=OnyxCeleryQueues.MONITORING,\n    bind=True,\n)\ndef monitor_background_processes(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Collect and emit metrics about background processes.\n    This task runs periodically to gather metrics about:\n    - Queue lengths for different Celery queues\n    - Connector run metrics (start latency, success rate)\n    - Syncing speed metrics\n    - Worker status and task counts\n    \"\"\"\n    if tenant_id is not None:\n        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n    task_logger.info(\"Starting background monitoring\")\n    r = get_redis_client()\n\n    lock_monitoring: RedisLock = r.lock(\n        OnyxRedisLocks.MONITOR_BACKGROUND_PROCESSES_LOCK,\n        timeout=_MONITORING_SOFT_TIME_LIMIT,\n    )\n\n    # these tasks should never overlap\n    if not lock_monitoring.acquire(blocking=False):\n        task_logger.info(\"Skipping monitoring task because it is already running\")\n        return None\n\n    try:\n        redis_std = get_redis_client()\n\n        # Collect queue metrics with broker connection\n        r_celery = celery_get_broker_client(self.app)\n        queue_metrics = _collect_queue_metrics(r_celery)\n\n        # Collect remaining metrics (no broker connection needed)\n        with get_session_with_current_tenant() as db_session:\n            all_metrics: list[Metric] = queue_metrics\n            all_metrics.extend(_collect_connector_metrics(db_session, redis_std))\n            all_metrics.extend(_collect_sync_metrics(db_session, redis_std))\n\n            for metric in all_metrics:\n                if metric.key is None or not _has_metric_been_emitted(\n                    redis_std, metric.key\n                ):\n                    metric.log()\n                    metric.emit(tenant_id)\n\n                if metric.key is not None:\n                    _mark_metric_as_emitted(redis_std, metric.key)\n\n        task_logger.info(\"Successfully collected background metrics\")\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception as e:\n        task_logger.exception(\"Error collecting background process metrics\")\n        raise e\n    finally:\n        if lock_monitoring.owned():\n            lock_monitoring.release()\n\n        task_logger.info(\"Background monitoring task finished\")\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,\n)\ndef cloud_check_alembic() -> bool | None:\n    \"\"\"A task to verify that all tenants are on the same alembic revision.\n\n    This check is expected to fail if a cloud alembic migration is currently running\n    across all tenants.\n\n    TODO: have the cloud migration script set an activity signal that this check\n    uses to know it doesn't make sense to run a check at the present time.\n    \"\"\"\n\n    # Used as a placeholder if the alembic revision cannot be retrieved\n    ALEMBIC_NULL_REVISION = \"000000000000\"\n\n    time_start = time.monotonic()\n\n    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n    lock_beat: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CLOUD_CHECK_ALEMBIC_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    last_lock_time = time.monotonic()\n\n    tenant_to_revision: dict[str, str] = {}\n    revision_counts: dict[str, int] = {}\n    out_of_date_tenants: dict[str, str] = {}\n    top_revision: str = \"\"\n    tenant_ids: list[str] | list[None] = []\n\n    try:\n        # map tenant_id to revision (or ALEMBIC_NULL_REVISION if the query fails)\n        tenant_ids = get_all_tenant_ids()\n        for tenant_id in tenant_ids:\n            current_time = time.monotonic()\n            if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4):\n                lock_beat.reacquire()\n                last_lock_time = current_time\n\n            if tenant_id is None:\n                continue\n\n            with get_session_with_shared_schema() as session:\n                try:\n                    result = session.execute(\n                        text(f'SELECT * FROM \"{tenant_id}\".alembic_version LIMIT 1')\n                    )\n                    result_scalar: str | None = result.scalar_one_or_none()\n                    if result_scalar is None:\n                        raise ValueError(\"Alembic version should not be None.\")\n\n                    tenant_to_revision[tenant_id] = result_scalar\n                except Exception:\n                    task_logger.error(f\"Tenant {tenant_id} has no revision!\")\n                    tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION\n\n        # get the total count of each revision\n        for k, v in tenant_to_revision.items():\n            revision_counts[v] = revision_counts.get(v, 0) + 1\n\n        # error if any null revision tenants are found\n        if ALEMBIC_NULL_REVISION in revision_counts:\n            num_null_revisions = revision_counts[ALEMBIC_NULL_REVISION]\n            raise ValueError(f\"No revision was found for {num_null_revisions} tenants!\")\n\n        # get the revision with the most counts\n        sorted_revision_counts = sorted(\n            revision_counts.items(), key=lambda item: item[1], reverse=True\n        )\n\n        if len(sorted_revision_counts) == 0:\n            raise ValueError(\n                f\"cloud_check_alembic - No revisions found for {len(tenant_ids)} tenant ids!\"\n            )\n\n        top_revision, _ = sorted_revision_counts[0]\n\n        # build a list of out of date tenants\n        for k, v in tenant_to_revision.items():\n            if v == top_revision:\n                continue\n\n            out_of_date_tenants[k] = v\n\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n        raise\n    except Exception:\n        task_logger.exception(\"Unexpected exception during cloud alembic check\")\n        raise\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n        else:\n            task_logger.error(\"cloud_check_alembic - Lock not owned on completion\")\n            redis_lock_dump(lock_beat, redis_client)\n\n    if len(out_of_date_tenants) > 0:\n        task_logger.error(\n            f\"Found out of date tenants: \"\n            f\"num_out_of_date_tenants={len(out_of_date_tenants)} \"\n            f\"num_tenants={len(tenant_ids)} \"\n            f\"revision={top_revision}\"\n        )\n\n        num_to_log = min(5, len(out_of_date_tenants))\n        task_logger.info(\n            f\"Logging {num_to_log}/{len(out_of_date_tenants)} out of date tenants.\"\n        )\n        for k, v in islice(out_of_date_tenants.items(), 5):\n            task_logger.info(f\"Out of date tenant: tenant={k} revision={v}\")\n    else:\n        task_logger.info(\n            f\"All tenants are up to date: num_tenants={len(tenant_ids)} revision={top_revision}\"\n        )\n\n    time_elapsed = time.monotonic() - time_start\n    task_logger.info(\n        f\"cloud_check_alembic finished: num_tenants={len(tenant_ids)} elapsed={time_elapsed:.2f}\"\n    )\n    return True\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES, ignore_result=True, bind=True\n)\ndef cloud_monitor_celery_queues(\n    self: Task,\n) -> None:\n    return monitor_celery_queues_helper(self)\n\n\n@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)\ndef monitor_celery_queues(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001\n    return monitor_celery_queues_helper(self)\n\n\ndef monitor_celery_queues_helper(\n    task: Task,\n) -> None:\n    \"\"\"A task to monitor all celery queue lengths.\"\"\"\n\n    r_celery = celery_get_broker_client(task.app)\n    n_celery = celery_get_queue_length(OnyxCeleryQueues.PRIMARY, r_celery)\n    n_docfetching = celery_get_queue_length(\n        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery\n    )\n    n_docprocessing = celery_get_queue_length(OnyxCeleryQueues.DOCPROCESSING, r_celery)\n\n    n_user_file_processing = celery_get_queue_length(\n        OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery\n    )\n    n_user_file_project_sync = celery_get_queue_length(\n        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, r_celery\n    )\n    n_user_file_delete = celery_get_queue_length(\n        OnyxCeleryQueues.USER_FILE_DELETE, r_celery\n    )\n    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)\n    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)\n    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)\n    n_permissions_sync = celery_get_queue_length(\n        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery\n    )\n    n_external_group_sync = celery_get_queue_length(\n        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery\n    )\n    n_permissions_upsert = celery_get_queue_length(\n        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery\n    )\n    n_hierarchy_fetching = celery_get_queue_length(\n        OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING, r_celery\n    )\n    n_llm_model_update = celery_get_queue_length(\n        OnyxCeleryQueues.LLM_MODEL_UPDATE, r_celery\n    )\n    n_checkpoint_cleanup = celery_get_queue_length(\n        OnyxCeleryQueues.CHECKPOINT_CLEANUP, r_celery\n    )\n    n_index_attempt_cleanup = celery_get_queue_length(\n        OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP, r_celery\n    )\n    n_csv_generation = celery_get_queue_length(\n        OnyxCeleryQueues.CSV_GENERATION, r_celery\n    )\n    n_monitoring = celery_get_queue_length(OnyxCeleryQueues.MONITORING, r_celery)\n    n_sandbox = celery_get_queue_length(OnyxCeleryQueues.SANDBOX, r_celery)\n    n_opensearch_migration = celery_get_queue_length(\n        OnyxCeleryQueues.OPENSEARCH_MIGRATION, r_celery\n    )\n\n    n_docfetching_prefetched = celery_get_unacked_task_ids(\n        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery\n    )\n    n_docprocessing_prefetched = celery_get_unacked_task_ids(\n        OnyxCeleryQueues.DOCPROCESSING, r_celery\n    )\n\n    task_logger.info(\n        f\"Queue lengths: celery={n_celery} \"\n        f\"docfetching={n_docfetching} \"\n        f\"docfetching_prefetched={len(n_docfetching_prefetched)} \"\n        f\"docprocessing={n_docprocessing} \"\n        f\"docprocessing_prefetched={len(n_docprocessing_prefetched)} \"\n        f\"user_file_processing={n_user_file_processing} \"\n        f\"user_file_project_sync={n_user_file_project_sync} \"\n        f\"user_file_delete={n_user_file_delete} \"\n        f\"sync={n_sync} \"\n        f\"deletion={n_deletion} \"\n        f\"pruning={n_pruning} \"\n        f\"permissions_sync={n_permissions_sync} \"\n        f\"external_group_sync={n_external_group_sync} \"\n        f\"permissions_upsert={n_permissions_upsert} \"\n        f\"hierarchy_fetching={n_hierarchy_fetching} \"\n        f\"llm_model_update={n_llm_model_update} \"\n        f\"checkpoint_cleanup={n_checkpoint_cleanup} \"\n        f\"index_attempt_cleanup={n_index_attempt_cleanup} \"\n        f\"csv_generation={n_csv_generation} \"\n        f\"monitoring={n_monitoring} \"\n        f\"sandbox={n_sandbox} \"\n        f\"opensearch_migration={n_opensearch_migration} \"\n    )\n\n\n\"\"\"Memory monitoring\"\"\"\n\n\ndef _get_cmdline_for_process(process: psutil.Process) -> str | None:\n    try:\n        return \" \".join(process.cmdline())\n    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):\n        return None\n\n\n@shared_task(\n    name=OnyxCeleryTask.MONITOR_PROCESS_MEMORY,\n    ignore_result=True,\n    soft_time_limit=_MONITORING_SOFT_TIME_LIMIT,\n    time_limit=_MONITORING_TIME_LIMIT,\n    queue=OnyxCeleryQueues.MONITORING,\n    bind=True,\n)\ndef monitor_process_memory(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001\n    \"\"\"\n    Task to monitor memory usage of supervisor-managed processes.\n    This periodically checks the memory usage of processes and logs information\n    in a standardized format.\n\n    The task looks for processes managed by supervisor and logs their\n    memory usage statistics. This is useful for monitoring memory consumption\n    over time and identifying potential memory leaks.\n    \"\"\"\n    # don't run this task in multi-tenant mode, have other, better means of monitoring\n    if MULTI_TENANT:\n        return\n\n    # Skip memory monitoring if not in container\n    if not is_running_in_container():\n        return\n\n    try:\n        # Get all supervisor-managed processes\n        supervisor_processes: dict[int, str] = {}\n\n        # Map cmd line elements to more readable process names\n        process_type_mapping = {\n            \"--hostname=primary\": \"primary\",\n            \"--hostname=light\": \"light\",\n            \"--hostname=heavy\": \"heavy\",\n            \"--hostname=indexing\": \"indexing\",\n            \"--hostname=monitoring\": \"monitoring\",\n            \"beat\": \"beat\",\n            \"slack/listener.py\": \"slack\",\n        }\n\n        # Find all python processes that are likely celery workers\n        for proc in psutil.process_iter():\n            cmdline = _get_cmdline_for_process(proc)\n            if not cmdline:\n                continue\n\n            # Match supervisor-managed processes\n            for process_name, process_type in process_type_mapping.items():\n                if process_name in cmdline:\n                    if process_type in supervisor_processes.values():\n                        task_logger.error(\n                            f\"Duplicate process type for type {process_type} with cmd {cmdline} with pid={proc.pid}.\"\n                        )\n                        continue\n\n                    supervisor_processes[proc.pid] = process_type\n                    break\n\n        if len(supervisor_processes) != len(process_type_mapping):\n            task_logger.error(\n                f\"Missing processes: {set(process_type_mapping.keys()).symmetric_difference(supervisor_processes.values())}\"\n            )\n\n        # Log memory usage for each process\n        for pid, process_type in supervisor_processes.items():\n            try:\n                emit_process_memory(pid, process_type, {})\n            except psutil.NoSuchProcess:\n                # Process may have terminated since we obtained the list\n                continue\n            except Exception as e:\n                task_logger.exception(f\"Error monitoring process {pid}: {str(e)}\")\n\n    except Exception:\n        task_logger.exception(\"Error in monitor_process_memory task\")\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_PIDBOX, ignore_result=True, bind=True\n)\ndef cloud_monitor_celery_pidbox(\n    self: Task,\n) -> None:\n    \"\"\"\n    Celery can leave behind orphaned pidboxes from old workers that are idle and never cleaned up.\n    This task removes them based on idle time to avoid Redis clutter and overflowing the instance.\n    This is a real issue we've observed in production.\n\n    Note:\n    - Setting CELERY_ENABLE_REMOTE_CONTROL = False would prevent pidbox keys entirely,\n    but might also disable features like inspect, broadcast, and worker remote control.\n    Use with caution.\n    \"\"\"\n\n    num_deleted = 0\n\n    MAX_PIDBOX_IDLE = 24 * 3600  # 1 day in seconds\n    r_celery = celery_get_broker_client(self.app)\n    for key in r_celery.scan_iter(\"*.reply.celery.pidbox\"):\n        key_bytes = cast(bytes, key)\n        key_str = key_bytes.decode(\"utf-8\")\n        if key_str.startswith(\"_kombu\"):\n            continue\n\n        idletime_raw = r_celery.object(\"idletime\", key)\n        if idletime_raw is None:\n            continue\n\n        idletime = cast(int, idletime_raw)\n        if idletime < MAX_PIDBOX_IDLE:\n            continue\n\n        r_celery.delete(key)\n        task_logger.info(\n            f\"Deleted idle pidbox: pidbox={key_str} idletime={idletime} max_idletime={MAX_PIDBOX_IDLE}\"\n        )\n        num_deleted += 1\n\n    # Enable later in case we want some aggregate metrics\n    # task_logger.info(f\"Deleted idle pidbox: pidbox={key_str}\")\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/opensearch_migration/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/opensearch_migration/constants.py",
    "content": "# Tasks are expected to cease execution and do cleanup after the soft time\n# limit. In principle they are also forceably terminated after the hard time\n# limit, in practice this does not happen since we use threadpools for Celery\n# task execution, and we simple hope that the total task time plus cleanup does\n# not exceed this. Therefore tasks should regularly check their timeout and lock\n# status. The lock timeout is the maximum time the lock manager (Redis in this\n# case) will enforce the lock, independent of what is happening in the task. To\n# reduce the chances that a task is still doing work while a lock has expired,\n# make the lock timeout well above the task timeouts. In practice we should\n# never see locks be held for this long anyway because a task should release the\n# lock after its cleanup which happens at most after its soft timeout.\n\n# Constants corresponding to migrate_documents_from_vespa_to_opensearch_task.\nfrom onyx.configs.app_configs import OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE\n\n\nMIGRATION_TASK_SOFT_TIME_LIMIT_S = 60 * 5  # 5 minutes.\nMIGRATION_TASK_TIME_LIMIT_S = 60 * 6  # 6 minutes.\n# The maximum time the lock can be held for. Will automatically be released\n# after this time.\nMIGRATION_TASK_LOCK_TIMEOUT_S = 60 * 7  # 7 minutes.\nassert (\n    MIGRATION_TASK_SOFT_TIME_LIMIT_S < MIGRATION_TASK_TIME_LIMIT_S\n), \"The soft time limit must be less than the time limit.\"\nassert (\n    MIGRATION_TASK_TIME_LIMIT_S < MIGRATION_TASK_LOCK_TIMEOUT_S\n), \"The time limit must be less than the lock timeout.\"\n# Time to wait to acquire the lock.\nMIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S = 60 * 2  # 2 minutes.\n\n# Constants corresponding to check_for_documents_for_opensearch_migration_task.\nCHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S = 60  # 60 seconds / 1 minute.\nCHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S = 90  # 90 seconds.\n# The maximum time the lock can be held for. Will automatically be released\n# after this time.\nCHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S = 120  # 120 seconds / 2 minutes.\nassert (\n    CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S\n), \"The soft time limit must be less than the time limit.\"\nassert (\n    CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S\n), \"The time limit must be less than the lock timeout.\"\n# Time to wait to acquire the lock.\nCHECK_FOR_DOCUMENTS_TASK_LOCK_BLOCKING_TIMEOUT_S = 30  # 30 seconds.\n\nTOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE = 15\n\n# WARNING: Do not change these values without knowing what changes also need to\n# be made to OpenSearchTenantMigrationRecord.\nGET_VESPA_CHUNKS_PAGE_SIZE = OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE\nGET_VESPA_CHUNKS_SLICE_COUNT = 4\n\n# String used to indicate in the vespa_visit_continuation_token mapping that the\n# slice has finished and there is nothing left to visit.\nFINISHED_VISITING_SLICE_CONTINUATION_TOKEN = (\n    \"FINISHED_VISITING_SLICE_CONTINUATION_TOKEN\"\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/opensearch_migration/tasks.py",
    "content": "\"\"\"Celery tasks for migrating documents from Vespa to OpenSearch.\"\"\"\n\nimport time\nimport traceback\n\nfrom celery import shared_task\nfrom celery import Task\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    FINISHED_VISITING_SLICE_CONTINUATION_TOKEN,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    GET_VESPA_CHUNKS_PAGE_SIZE,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    MIGRATION_TASK_LOCK_TIMEOUT_S,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    MIGRATION_TASK_SOFT_TIME_LIMIT_S,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    MIGRATION_TASK_TIME_LIMIT_S,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.transformer import (\n    transform_vespa_chunks_to_opensearch_chunks,\n)\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping\nfrom onyx.db.opensearch_migration import get_vespa_visit_state\nfrom onyx.db.opensearch_migration import is_migration_completed\nfrom onyx.db.opensearch_migration import (\n    mark_migration_completed_time_if_not_set_with_commit,\n)\nfrom onyx.db.opensearch_migration import (\n    try_insert_opensearch_tenant_migration_record_with_commit,\n)\nfrom onyx.db.opensearch_migration import update_vespa_visit_progress_with_commit\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchDocumentIndex,\n)\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.redis.redis_pool import get_redis_client\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\ndef is_continuation_token_done_for_all_slices(\n    continuation_token_map: dict[int, str | None],\n) -> bool:\n    return all(\n        continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN\n        for continuation_token in continuation_token_map.values()\n    )\n\n\n# shared_task allows this task to be shared across celery app instances.\n@shared_task(\n    name=OnyxCeleryTask.MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK,\n    # Does not store the task's return value in the result backend.\n    ignore_result=True,\n    # WARNING: This is here just for rigor but since we use threads for Celery\n    # this config is not respected and timeout logic must be implemented in the\n    # task.\n    soft_time_limit=MIGRATION_TASK_SOFT_TIME_LIMIT_S,\n    # WARNING: This is here just for rigor but since we use threads for Celery\n    # this config is not respected and timeout logic must be implemented in the\n    # task.\n    time_limit=MIGRATION_TASK_TIME_LIMIT_S,\n    # Passed in self to the task to get task metadata.\n    bind=True,\n)\ndef migrate_chunks_from_vespa_to_opensearch_task(\n    self: Task,  # noqa: ARG001\n    *,\n    tenant_id: str,\n) -> bool | None:\n    \"\"\"\n    Periodic task to migrate chunks from Vespa to OpenSearch via the Visit API.\n\n    Uses Vespa's Visit API to iterate through ALL chunks in bulk (not\n    per-document), transform them, and index them into OpenSearch. Progress is\n    tracked via a continuation token map stored in the\n    OpenSearchTenantMigrationRecord.\n\n    The first time we see no continuation token map and non-zero chunks\n    migrated, we consider the migration complete and all subsequent invocations\n    are no-ops.\n\n    We divide the index into GET_VESPA_CHUNKS_SLICE_COUNT independent slices\n    where progress is tracked for each slice.\n\n    Returns:\n        None if OpenSearch migration is not enabled, or if the lock could not be\n            acquired; effectively a no-op. True if the task completed\n            successfully. False if the task errored.\n    \"\"\"\n    # 1. Check if we should run the task.\n    # 1.a. If OpenSearch indexing is disabled, we don't run the task.\n    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n        task_logger.warning(\n            \"OpenSearch migration is not enabled, skipping chunk migration task.\"\n        )\n        return None\n    task_logger.info(\"Starting chunk-level migration from Vespa to OpenSearch.\")\n    task_start_time = time.monotonic()\n\n    # 1.b. Only one instance per tenant of this task may run concurrently at\n    # once. If we fail to acquire a lock, we assume it is because another task\n    # has one and we exit.\n    r = get_redis_client()\n    lock: RedisLock = r.lock(\n        name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,\n        # The maximum time the lock can be held for. Will automatically be\n        # released after this time.\n        timeout=MIGRATION_TASK_LOCK_TIMEOUT_S,\n        # .acquire will block until the lock is acquired.\n        blocking=True,\n        # Time to wait to acquire the lock.\n        blocking_timeout=MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,\n    )\n    if not lock.acquire():\n        task_logger.warning(\n            \"The OpenSearch migration task timed out waiting for the lock.\"\n        )\n        return None\n    else:\n        task_logger.info(\n            f\"Acquired the OpenSearch migration lock. Took {time.monotonic() - task_start_time:.3f} seconds. \"\n            f\"Token: {lock.local.token}\"\n        )\n\n    # 2. Prepare to migrate.\n    total_chunks_migrated_this_task = 0\n    total_chunks_errored_this_task = 0\n    try:\n        # 2.a. Double-check that tenant info is correct.\n        if tenant_id != get_current_tenant_id():\n            err_str = (\n                f\"Tenant ID mismatch in the OpenSearch migration task: \"\n                f\"{tenant_id} != {get_current_tenant_id()}. This should never happen.\"\n            )\n            task_logger.error(err_str)\n            return False\n\n        # Do as much as we can with a DB session in one spot to not hold a\n        # session during a migration batch.\n        with get_session_with_current_tenant() as db_session:\n            # 2.b. Immediately check to see if this tenant is done, to save\n            # having to do any other work. This function does not require a\n            # migration record to necessarily exist.\n            if is_migration_completed(db_session):\n                return True\n\n            # 2.c. Try to insert the OpenSearchTenantMigrationRecord table if it\n            # does not exist.\n            try_insert_opensearch_tenant_migration_record_with_commit(db_session)\n\n            # 2.d. Get search settings.\n            search_settings = get_current_search_settings(db_session)\n            indexing_setting = IndexingSetting.from_db_model(search_settings)\n\n            # 2.e. Build sanitized to original doc ID mapping to check for\n            # conflicts in the event we sanitize a doc ID to an\n            # already-existing doc ID.\n            # We reconstruct this mapping for every task invocation because\n            # a document may have been added in the time between two tasks.\n            sanitized_doc_start_time = time.monotonic()\n            sanitized_to_original_doc_id_mapping = (\n                build_sanitized_to_original_doc_id_mapping(db_session)\n            )\n            task_logger.debug(\n                f\"Built sanitized_to_original_doc_id_mapping with {len(sanitized_to_original_doc_id_mapping)} entries \"\n                f\"in {time.monotonic() - sanitized_doc_start_time:.3f} seconds.\"\n            )\n\n            # 2.f. Get the current migration state.\n            continuation_token_map, total_chunks_migrated = get_vespa_visit_state(\n                db_session\n            )\n            # 2.f.1. Double-check that the migration state does not imply\n            # completion. Really we should never have to enter this block as we\n            # would expect is_migration_completed to return True, but in the\n            # strange event that the migration is complete but the migration\n            # completed time was never stamped, we do so here.\n            if is_continuation_token_done_for_all_slices(continuation_token_map):\n                task_logger.info(\n                    f\"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}.\"\n                )\n                mark_migration_completed_time_if_not_set_with_commit(db_session)\n                return True\n        task_logger.debug(\n            f\"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. \"\n            f\"Continuation token map: {continuation_token_map}\"\n        )\n\n        with get_vespa_http_client(\n            timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S\n        ) as vespa_client:\n            # 2.g. Create the OpenSearch and Vespa document indexes.\n            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)\n            opensearch_document_index = OpenSearchDocumentIndex(\n                tenant_state=tenant_state,\n                index_name=search_settings.index_name,\n                embedding_dim=indexing_setting.final_embedding_dim,\n                embedding_precision=indexing_setting.embedding_precision,\n            )\n            vespa_document_index = VespaDocumentIndex(\n                index_name=search_settings.index_name,\n                tenant_state=tenant_state,\n                large_chunks_enabled=False,\n                httpx_client=vespa_client,\n            )\n\n            # 2.h. Get the approximate chunk count in Vespa as of this time to\n            # update the migration record.\n            approx_chunk_count_in_vespa: int | None = None\n            get_chunk_count_start_time = time.monotonic()\n            try:\n                approx_chunk_count_in_vespa = vespa_document_index.get_chunk_count()\n            except Exception:\n                # This failure should not be blocking.\n                task_logger.exception(\n                    \"Error getting approximate chunk count in Vespa. Moving on...\"\n                )\n            task_logger.debug(\n                f\"Took {time.monotonic() - get_chunk_count_start_time:.3f} seconds to attempt to get \"\n                f\"approximate chunk count in Vespa. Got {approx_chunk_count_in_vespa}.\"\n            )\n\n            # 3. Do the actual migration in batches until we run out of time.\n            while (\n                time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S\n                and lock.owned()\n            ):\n                # 3.a. Get the next batch of raw chunks from Vespa.\n                get_vespa_chunks_start_time = time.monotonic()\n                raw_vespa_chunks, next_continuation_token_map = (\n                    vespa_document_index.get_all_raw_document_chunks_paginated(\n                        continuation_token_map=continuation_token_map,\n                        page_size=GET_VESPA_CHUNKS_PAGE_SIZE,\n                    )\n                )\n                task_logger.debug(\n                    f\"Read {len(raw_vespa_chunks)} chunks from Vespa in {time.monotonic() - get_vespa_chunks_start_time:.3f} \"\n                    f\"seconds. Next continuation token map: {next_continuation_token_map}\"\n                )\n\n                # 3.b. Transform the raw chunks to OpenSearch chunks in memory.\n                opensearch_document_chunks, errored_chunks = (\n                    transform_vespa_chunks_to_opensearch_chunks(\n                        raw_vespa_chunks,\n                        tenant_state,\n                        sanitized_to_original_doc_id_mapping,\n                    )\n                )\n                if len(opensearch_document_chunks) != len(raw_vespa_chunks):\n                    task_logger.error(\n                        f\"Migration task error: Number of candidate chunks to migrate ({len(opensearch_document_chunks)}) does \"\n                        f\"not match number of chunks in Vespa ({len(raw_vespa_chunks)}). {len(errored_chunks)} chunks \"\n                        \"errored.\"\n                    )\n\n                # 3.c. Index the OpenSearch chunks into OpenSearch.\n                index_opensearch_chunks_start_time = time.monotonic()\n                opensearch_document_index.index_raw_chunks(\n                    chunks=opensearch_document_chunks\n                )\n                task_logger.debug(\n                    f\"Indexed {len(opensearch_document_chunks)} chunks into OpenSearch in \"\n                    f\"{time.monotonic() - index_opensearch_chunks_start_time:.3f} seconds.\"\n                )\n\n                total_chunks_migrated_this_task += len(opensearch_document_chunks)\n                total_chunks_errored_this_task += len(errored_chunks)\n\n                # Do as much as we can with a DB session in one spot to not hold a\n                # session during a migration batch.\n                with get_session_with_current_tenant() as db_session:\n                    # 3.d. Update the migration state.\n                    update_vespa_visit_progress_with_commit(\n                        db_session,\n                        continuation_token_map=next_continuation_token_map,\n                        chunks_processed=len(opensearch_document_chunks),\n                        chunks_errored=len(errored_chunks),\n                        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,\n                    )\n\n                    # 3.e. Get the current migration state. Even thought we\n                    # technically have it in-memory since we just wrote it, we\n                    # want to reference the DB as the source of truth at all\n                    # times.\n                    continuation_token_map, total_chunks_migrated = (\n                        get_vespa_visit_state(db_session)\n                    )\n                    # 3.e.1. Check if the migration is done.\n                    if is_continuation_token_done_for_all_slices(\n                        continuation_token_map\n                    ):\n                        task_logger.info(\n                            f\"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}.\"\n                        )\n                        mark_migration_completed_time_if_not_set_with_commit(db_session)\n                        return True\n                task_logger.debug(\n                    f\"Read the tenant migration record. Total chunks migrated: {total_chunks_migrated}. \"\n                    f\"Continuation token map: {continuation_token_map}\"\n                )\n    except Exception:\n        traceback.print_exc()\n        task_logger.exception(\"Error in the OpenSearch migration task.\")\n        return False\n    finally:\n        if lock.owned():\n            lock.release()\n        else:\n            task_logger.warning(\n                \"The OpenSearch migration lock was not owned on completion of the migration task.\"\n            )\n\n    task_logger.info(\n        f\"OpenSearch chunk migration task pausing (time limit reached). \"\n        f\"Total chunks migrated this task: {total_chunks_migrated_this_task}. \"\n        f\"Total chunks errored this task: {total_chunks_errored_this_task}. \"\n        f\"Elapsed: {time.monotonic() - task_start_time:.3f}s. \"\n        \"Will resume from continuation token on next invocation.\"\n    )\n\n    return True\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/opensearch_migration/transformer.py",
    "content": "import traceback\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.schema import DocumentChunk\nfrom onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST\nfrom onyx.document_index.vespa_constants import BLURB\nfrom onyx.document_index.vespa_constants import BOOST\nfrom onyx.document_index.vespa_constants import CHUNK_CONTEXT\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import CONTENT\nfrom onyx.document_index.vespa_constants import DOC_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_SETS\nfrom onyx.document_index.vespa_constants import EMBEDDINGS\nfrom onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY\nfrom onyx.document_index.vespa_constants import HIDDEN\nfrom onyx.document_index.vespa_constants import IMAGE_FILE_NAME\nfrom onyx.document_index.vespa_constants import METADATA_LIST\nfrom onyx.document_index.vespa_constants import METADATA_SUFFIX\nfrom onyx.document_index.vespa_constants import PERSONAS\nfrom onyx.document_index.vespa_constants import PRIMARY_OWNERS\nfrom onyx.document_index.vespa_constants import SECONDARY_OWNERS\nfrom onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER\nfrom onyx.document_index.vespa_constants import SOURCE_LINKS\nfrom onyx.document_index.vespa_constants import SOURCE_TYPE\nfrom onyx.document_index.vespa_constants import TENANT_ID\nfrom onyx.document_index.vespa_constants import TITLE\nfrom onyx.document_index.vespa_constants import TITLE_EMBEDDING\nfrom onyx.document_index.vespa_constants import USER_PROJECT\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger(__name__)\n\n\nFIELDS_NEEDED_FOR_TRANSFORMATION: list[str] = [\n    DOCUMENT_ID,\n    CHUNK_ID,\n    TITLE,\n    TITLE_EMBEDDING,\n    CONTENT,\n    EMBEDDINGS,\n    SOURCE_TYPE,\n    METADATA_LIST,\n    DOC_UPDATED_AT,\n    HIDDEN,\n    BOOST,\n    SEMANTIC_IDENTIFIER,\n    IMAGE_FILE_NAME,\n    SOURCE_LINKS,\n    BLURB,\n    DOC_SUMMARY,\n    CHUNK_CONTEXT,\n    METADATA_SUFFIX,\n    DOCUMENT_SETS,\n    USER_PROJECT,\n    PERSONAS,\n    PRIMARY_OWNERS,\n    SECONDARY_OWNERS,\n    ACCESS_CONTROL_LIST,\n]\nif MULTI_TENANT:\n    FIELDS_NEEDED_FOR_TRANSFORMATION.append(TENANT_ID)\n\n\ndef _extract_content_vector(embeddings: Any) -> list[float]:\n    \"\"\"Extracts the full chunk embedding vector from Vespa's embeddings tensor.\n\n    Vespa stores embeddings as a tensor<float>(t{},x[dim]) where 't' maps\n    embedding names (like \"full_chunk\") to vectors. The API can return this in\n    different formats:\n    1. Direct list: {\"full_chunk\": [...]}\n    2. Blocks format: {\"blocks\": {\"full_chunk\": [0.1, 0.2, ...]}}\n    3. Possibly other formats.\n\n    We only support formats 1 and 2. Any other supplied format will raise an\n    error.\n\n    Raises:\n        ValueError: If the embeddings format is not supported.\n\n    Returns:\n        The full chunk content embedding vector as a list of floats.\n    \"\"\"\n    if isinstance(embeddings, dict):\n        # Handle format 1.\n        full_chunk_embedding = embeddings.get(FULL_CHUNK_EMBEDDING_KEY)\n        if isinstance(full_chunk_embedding, list):\n            # Double check that within the list we have floats and not another\n            # list or dict.\n            if not full_chunk_embedding:\n                raise ValueError(\"Full chunk embedding is empty.\")\n            if isinstance(full_chunk_embedding[0], float):\n                return full_chunk_embedding\n\n        # Handle format 2.\n        blocks = embeddings.get(\"blocks\")\n        if isinstance(blocks, dict):\n            full_chunk_embedding = blocks.get(FULL_CHUNK_EMBEDDING_KEY)\n            if isinstance(full_chunk_embedding, list):\n                # Double check that within the list we have floats and not another\n                # list or dict.\n                if not full_chunk_embedding:\n                    raise ValueError(\"Full chunk embedding is empty.\")\n                if isinstance(full_chunk_embedding[0], float):\n                    return full_chunk_embedding\n\n    raise ValueError(f\"Unknown embedding format: {type(embeddings)}\")\n\n\ndef _extract_title_vector(title_embedding: Any | None) -> list[float] | None:\n    \"\"\"Extract the title embedding vector.\n\n    Returns None if no title embedding exists.\n\n    Vespa returns title_embedding as tensor<float>(x[dim]) which can be in\n    formats:\n    1. Direct list: [0.1, 0.2, ...]\n    2. Values format: {\"values\": [0.1, 0.2, ...]}\n    3. Possibly other formats.\n\n    Only formats 1 and 2 are supported. Any other supplied format will raise an\n    error.\n\n    Raises:\n        ValueError: If the title embedding format is not supported.\n\n    Returns:\n        The title embedding vector as a list of floats.\n    \"\"\"\n    if title_embedding is None:\n        return None\n\n    # Handle format 1.\n    if isinstance(title_embedding, list):\n        # Double check that within the list we have floats and not another\n        # list or dict.\n        if not title_embedding:\n            return None\n        if isinstance(title_embedding[0], float):\n            return title_embedding\n\n    # Handle format 2.\n    if isinstance(title_embedding, dict):\n        # Try values format.\n        values = title_embedding.get(\"values\")\n        if values is not None and isinstance(values, list):\n            # Double check that within the list we have floats and not another\n            # list or dict.\n            if not values:\n                return None\n            if isinstance(values[0], float):\n                return values\n\n    raise ValueError(f\"Unknown title embedding format: {type(title_embedding)}\")\n\n\ndef _transform_vespa_document_sets_to_opensearch_document_sets(\n    vespa_document_sets: dict[str, int] | None,\n) -> list[str] | None:\n    if not vespa_document_sets:\n        return None\n    return list(vespa_document_sets.keys())\n\n\ndef _transform_vespa_acl_to_opensearch_acl(\n    vespa_acl: dict[str, int] | None,\n) -> tuple[bool, list[str]]:\n    if not vespa_acl:\n        return False, []\n    acl_list = list(vespa_acl.keys())\n    is_public = PUBLIC_DOC_PAT in acl_list\n    if is_public:\n        acl_list.remove(PUBLIC_DOC_PAT)\n    return is_public, acl_list\n\n\ndef transform_vespa_chunks_to_opensearch_chunks(\n    vespa_chunks: list[dict[str, Any]],\n    tenant_state: TenantState,\n    sanitized_to_original_doc_id_mapping: dict[str, str],\n) -> tuple[list[DocumentChunk], list[dict[str, Any]]]:\n    result: list[DocumentChunk] = []\n    errored_chunks: list[dict[str, Any]] = []\n    for vespa_chunk in vespa_chunks:\n        try:\n            # This should exist; fail loudly if it does not.\n            vespa_document_id: str = vespa_chunk[DOCUMENT_ID]\n            if not vespa_document_id:\n                raise ValueError(\"Missing document_id in Vespa chunk.\")\n            # Vespa doc IDs were sanitized using\n            # replace_invalid_doc_id_characters. This was a poor design choice\n            # and we don't want this in OpenSearch; whatever restrictions there\n            # may be on indexed chunk ID should have no bearing on the chunk's\n            # document ID field, even if document ID is an argument to the chunk\n            # ID. Deliberately choose to use the real doc ID supplied to this\n            # function.\n            if vespa_document_id in sanitized_to_original_doc_id_mapping:\n                logger.warning(\n                    f\"Migration warning: Vespa document ID {vespa_document_id} does not match the document ID supplied \"\n                    f\"{sanitized_to_original_doc_id_mapping[vespa_document_id]}. \"\n                    \"The Vespa ID will be discarded.\"\n                )\n            document_id = sanitized_to_original_doc_id_mapping.get(\n                vespa_document_id, vespa_document_id\n            )\n\n            # This should exist; fail loudly if it does not.\n            chunk_index: int = vespa_chunk[CHUNK_ID]\n\n            title: str | None = vespa_chunk.get(TITLE)\n            # WARNING: Should supply format.tensors=short-value to the Vespa\n            # client in order to get a supported format for the tensors.\n            title_vector: list[float] | None = _extract_title_vector(\n                vespa_chunk.get(TITLE_EMBEDDING)\n            )\n\n            # This should exist; fail loudly if it does not.\n            content: str = vespa_chunk[CONTENT]\n            if not content:\n                raise ValueError(\n                    f\"Missing content in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}.\"\n                )\n            # This should exist; fail loudly if it does not.\n            # WARNING: Should supply format.tensors=short-value to the Vespa\n            # client in order to get a supported format for the tensors.\n            content_vector: list[float] = _extract_content_vector(\n                vespa_chunk[EMBEDDINGS]\n            )\n            if not content_vector:\n                raise ValueError(\n                    f\"Missing content_vector in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}.\"\n                )\n\n            # This should exist; fail loudly if it does not.\n            source_type: str = vespa_chunk[SOURCE_TYPE]\n            if not source_type:\n                raise ValueError(\n                    f\"Missing source_type in Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index}.\"\n                )\n\n            metadata_list: list[str] | None = vespa_chunk.get(METADATA_LIST)\n\n            _raw_doc_updated_at: int | None = vespa_chunk.get(DOC_UPDATED_AT)\n            last_updated: datetime | None = (\n                datetime.fromtimestamp(_raw_doc_updated_at, tz=timezone.utc)\n                if _raw_doc_updated_at is not None\n                else None\n            )\n\n            hidden: bool = vespa_chunk.get(HIDDEN, False)\n\n            # This should exist; fail loudly if it does not.\n            global_boost: int = vespa_chunk[BOOST]\n\n            # This should exist; fail loudly if it does not.\n            semantic_identifier: str = vespa_chunk[SEMANTIC_IDENTIFIER]\n            if not semantic_identifier:\n                raise ValueError(\n                    f\"Missing semantic_identifier in Vespa chunk with document ID {vespa_document_id} and chunk \"\n                    f\"index {chunk_index}.\"\n                )\n\n            image_file_id: str | None = vespa_chunk.get(IMAGE_FILE_NAME)\n            source_links: str | None = vespa_chunk.get(SOURCE_LINKS)\n            blurb: str = vespa_chunk.get(BLURB, \"\")\n            doc_summary: str = vespa_chunk.get(DOC_SUMMARY, \"\")\n            chunk_context: str = vespa_chunk.get(CHUNK_CONTEXT, \"\")\n            metadata_suffix: str | None = vespa_chunk.get(METADATA_SUFFIX)\n            document_sets: list[str] | None = (\n                _transform_vespa_document_sets_to_opensearch_document_sets(\n                    vespa_chunk.get(DOCUMENT_SETS)\n                )\n            )\n            user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)\n            personas: list[int] | None = vespa_chunk.get(PERSONAS)\n            primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)\n            secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)\n\n            is_public, acl_list = _transform_vespa_acl_to_opensearch_acl(\n                vespa_chunk.get(ACCESS_CONTROL_LIST)\n            )\n            if not is_public and not acl_list:\n                logger.warning(\n                    f\"Migration warning: Vespa chunk with document ID {vespa_document_id} and chunk index {chunk_index} has no \"\n                    \"public ACL and no access control list. This does not make sense as it implies the document is never \"\n                    \"searchable. Continuing with the migration...\"\n                )\n\n            chunk_tenant_id: str | None = vespa_chunk.get(TENANT_ID)\n            if MULTI_TENANT:\n                if not chunk_tenant_id:\n                    raise ValueError(\n                        \"Missing tenant_id in Vespa chunk in a multi-tenant environment.\"\n                    )\n                if chunk_tenant_id != tenant_state.tenant_id:\n                    raise ValueError(\n                        f\"Chunk tenant_id {chunk_tenant_id} does not match expected tenant_id {tenant_state.tenant_id}\"\n                    )\n\n            opensearch_chunk = DocumentChunk(\n                # We deliberately choose to use the doc ID supplied to this function\n                # over the Vespa doc ID.\n                document_id=document_id,\n                chunk_index=chunk_index,\n                title=title,\n                title_vector=title_vector,\n                content=content,\n                content_vector=content_vector,\n                source_type=source_type,\n                metadata_list=metadata_list,\n                last_updated=last_updated,\n                public=is_public,\n                access_control_list=acl_list,\n                hidden=hidden,\n                global_boost=global_boost,\n                semantic_identifier=semantic_identifier,\n                image_file_id=image_file_id,\n                source_links=source_links,\n                blurb=blurb,\n                doc_summary=doc_summary,\n                chunk_context=chunk_context,\n                metadata_suffix=metadata_suffix,\n                document_sets=document_sets,\n                user_projects=user_projects,\n                personas=personas,\n                primary_owners=primary_owners,\n                secondary_owners=secondary_owners,\n                tenant_id=tenant_state,\n            )\n\n            result.append(opensearch_chunk)\n        except Exception:\n            traceback.print_exc()\n            logger.exception(\n                f\"Migration error: Error transforming Vespa chunk with document ID {vespa_chunk.get(DOCUMENT_ID)} \"\n                f\"and chunk index {vespa_chunk.get(CHUNK_ID)} into an OpenSearch chunk. Continuing with \"\n                \"the migration...\"\n            )\n            errored_chunks.append(vespa_chunk)\n\n    return result, errored_chunks\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/periodic/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/periodic/tasks.py",
    "content": "#####\n# Periodic Tasks\n#####\nimport json\nfrom typing import Any\n\nfrom celery import shared_task\nfrom celery.contrib.abortable import AbortableTask  # type: ignore\nfrom celery.exceptions import TaskRevokedError\nfrom sqlalchemy import inspect\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PostgresAdvisoryLocks\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n\n@shared_task(\n    name=OnyxCeleryTask.KOMBU_MESSAGE_CLEANUP_TASK,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n    base=AbortableTask,\n)\ndef kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:  # noqa: ARG001\n    \"\"\"Runs periodically to clean up the kombu_message table\"\"\"\n\n    # we will select messages older than this amount to clean up\n    KOMBU_MESSAGE_CLEANUP_AGE = 7  # days\n    KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000\n\n    ctx = {}\n    ctx[\"last_processed_id\"] = 0\n    ctx[\"deleted\"] = 0\n    ctx[\"cleanup_age\"] = KOMBU_MESSAGE_CLEANUP_AGE\n    ctx[\"page_limit\"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT\n    with get_session_with_current_tenant() as db_session:\n        # Exit the task if we can't take the advisory lock\n        result = db_session.execute(\n            text(\"SELECT pg_try_advisory_lock(:id)\"),\n            {\"id\": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value},\n        ).scalar()\n        if not result:\n            return 0\n\n        while True:\n            if self.is_aborted():\n                raise TaskRevokedError(\"kombu_message_cleanup_task was aborted.\")\n\n            b = kombu_message_cleanup_task_helper(ctx, db_session)\n            if not b:\n                break\n\n            db_session.commit()\n\n    if ctx[\"deleted\"] > 0:\n        task_logger.info(\n            f\"Deleted {ctx['deleted']} orphaned messages from kombu_message.\"\n        )\n\n    return ctx[\"deleted\"]\n\n\ndef kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:\n    \"\"\"\n    Helper function to clean up old messages from the `kombu_message` table that are no longer relevant.\n\n    This function retrieves messages from the `kombu_message` table that are no longer visible and\n    older than a specified interval. It checks if the corresponding task_id exists in the\n    `celery_taskmeta` table. If the task_id does not exist, the message is deleted.\n\n    Args:\n        ctx (dict): A context dictionary containing configuration parameters such as:\n            - 'cleanup_age' (int): The age in days after which messages are considered old.\n            - 'page_limit' (int): The maximum number of messages to process in one batch.\n            - 'last_processed_id' (int): The ID of the last processed message to handle pagination.\n            - 'deleted' (int): A counter to track the number of deleted messages.\n        db_session (Session): The SQLAlchemy database session for executing queries.\n\n    Returns:\n        bool: Returns True if there are more rows to process, False if not.\n    \"\"\"\n\n    inspector = inspect(db_session.bind)\n    if not inspector:\n        return False\n\n    # With the move to redis as celery's broker and backend, kombu tables may not even exist.\n    # We can fail silently.\n    if not inspector.has_table(\"kombu_message\"):\n        return False\n\n    query = text(\n        \"\"\"\n    SELECT id, timestamp, payload\n    FROM kombu_message WHERE visible = 'false'\n    AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days\n    AND id > :last_processed_id\n    ORDER BY id\n    LIMIT :page_limit\n\"\"\"\n    )\n    kombu_messages = db_session.execute(\n        query,\n        {\n            \"interval_days\": f\"{ctx['cleanup_age']} days\",\n            \"page_limit\": ctx[\"page_limit\"],\n            \"last_processed_id\": ctx[\"last_processed_id\"],\n        },\n    ).fetchall()\n\n    if len(kombu_messages) == 0:\n        return False\n\n    for msg in kombu_messages:\n        payload = json.loads(msg[2])\n        task_id = payload[\"headers\"][\"id\"]\n\n        # Check if task_id exists in celery_taskmeta\n        task_exists = db_session.execute(\n            text(\"SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id\"),\n            {\"task_id\": task_id},\n        ).fetchone()\n\n        # If task_id does not exist, delete the message\n        if not task_exists:\n            result = db_session.execute(\n                text(\"DELETE FROM kombu_message WHERE id = :message_id\"),\n                {\"message_id\": msg[0]},\n            )\n            if result.rowcount > 0:  # type: ignore\n                ctx[\"deleted\"] += 1\n\n        ctx[\"last_processed_id\"] = msg[0]\n\n    return True\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/pruning/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/pruning/tasks.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom uuid import uuid4\n\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom pydantic import ValidationError\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_find_task\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_redis import celery_get_queued_task_ids\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.background.celery.celery_utils import extract_ids_from_runnable_connector\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.background.celery.tasks.docprocessing.utils import IndexingCallbackBase\nfrom onyx.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT\nfrom onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import OnyxRedisSignals\nfrom onyx.connectors.factory import instantiate_connector\nfrom onyx.connectors.models import InputType\nfrom onyx.db.connector import mark_ccpair_as_pruned\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.document import get_documents_for_connector_credential_pair\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.hierarchy import delete_orphaned_hierarchy_nodes\nfrom onyx.db.hierarchy import link_hierarchy_nodes_to_documents\nfrom onyx.db.hierarchy import remove_stale_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import reparent_orphaned_hierarchy_nodes\nfrom onyx.db.hierarchy import update_document_parent_hierarchy_nodes\nfrom onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import upsert_hierarchy_nodes_batch\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import HierarchyNode as DBHierarchyNode\nfrom onyx.db.sync_record import insert_sync_record\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.db.tag import delete_orphan_tags__no_commit\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrune\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrunePayload\nfrom onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch\nfrom onyx.redis.redis_hierarchy import ensure_source_node_exists\nfrom onyx.redis.redis_hierarchy import evict_hierarchy_nodes_from_cache\nfrom onyx.redis.redis_hierarchy import get_node_id_from_raw_id\nfrom onyx.redis.redis_hierarchy import get_source_node_id_from_cache\nfrom onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\nfrom onyx.server.utils import make_short_id\nfrom onyx.utils.logger import format_error_for_logging\nfrom onyx.utils.logger import LoggerContextVars\nfrom onyx.utils.logger import pruning_ctx\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef _get_pruning_block_expiration() -> int:\n    \"\"\"\n    Compute the expiration time for the pruning block signal.\n    Base expiration is 60 seconds (1 minute), multiplied by the beat multiplier only in MULTI_TENANT mode.\n    \"\"\"\n    base_expiration = 60  # seconds\n\n    if not MULTI_TENANT:\n        return base_expiration\n\n    try:\n        beat_multiplier = OnyxRuntime.get_beat_multiplier()\n    except Exception:\n        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n    return int(base_expiration * beat_multiplier)\n\n\ndef _get_fence_validation_block_expiration() -> int:\n    \"\"\"\n    Compute the expiration time for the fence validation block signal.\n    Base expiration is 300 seconds, multiplied by the beat multiplier only in MULTI_TENANT mode.\n    \"\"\"\n    base_expiration = 300  # seconds\n\n    if not MULTI_TENANT:\n        return base_expiration\n\n    try:\n        beat_multiplier = OnyxRuntime.get_beat_multiplier()\n    except Exception:\n        beat_multiplier = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n    return int(base_expiration * beat_multiplier)\n\n\nclass PruneCallback(IndexingCallbackBase):\n    def progress(self, tag: str, amount: int) -> None:\n        self.redis_connector.prune.set_active()\n        super().progress(tag, amount)\n\n\ndef _resolve_and_update_document_parents(\n    db_session: Session,\n    redis_client: Redis,\n    source: DocumentSource,\n    raw_id_to_parent: dict[str, str | None],\n) -> None:\n    \"\"\"Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for\n    each document and bulk-update the DB. Mirrors the resolution logic in\n    run_docfetching.py.\"\"\"\n    source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)\n\n    resolved: dict[str, int | None] = {}\n    for doc_id, raw_parent_id in raw_id_to_parent.items():\n        if raw_parent_id is None:\n            continue\n        node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)\n        resolved[doc_id] = node_id if found else source_node_id\n\n    if not resolved:\n        return\n\n    update_document_parent_hierarchy_nodes(\n        db_session=db_session,\n        doc_parent_map=resolved,\n        commit=True,\n    )\n    task_logger.info(\n        f\"Pruning: resolved and updated parent hierarchy for {len(resolved)} documents (source={source.value})\"\n    )\n\n\n\"\"\"Jobs / utils for kicking off pruning tasks.\"\"\"\n\n\ndef _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:\n    \"\"\"Returns boolean indicating if pruning is due.\n\n    Next pruning time is calculated as a delta from the last successful prune, or the\n    last successful indexing if pruning has never succeeded.\n\n    TODO(rkuo): consider whether we should allow pruning to be immediately rescheduled\n    if pruning fails (which is what it does now). A backoff could be reasonable.\n    \"\"\"\n\n    # skip pruning if no prune frequency is set\n    # pruning can still be forced via the API which will run a pruning task directly\n    if not cc_pair.connector.prune_freq:\n        return False\n\n    # skip pruning if not active\n    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:\n        return False\n\n    # skip pruning if the next scheduled prune time hasn't been reached yet\n    last_pruned = cc_pair.last_pruned\n    if not last_pruned:\n        if not cc_pair.last_successful_index_time:\n            # if we've never indexed, we can't prune\n            return False\n\n        # if never pruned, use the connector creation time. We could also\n        # compute the completion time of the first successful index attempt, but\n        # that is a reasonably heavy operation. This is a reasonable approximation —\n        # in the worst case, we'll prune a little bit earlier than we should.\n        last_pruned = cc_pair.connector.time_created\n\n    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)\n    return datetime.now(timezone.utc) >= next_prune\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_PRUNING,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    bind=True,\n)\ndef check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:\n    r = get_redis_client()\n    r_replica = get_redis_replica_client()\n\n    lock_beat: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_PRUNE_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        # the entire task needs to run frequently in order to finalize pruning\n\n        # but pruning only kicks off once per hour\n        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):\n            task_logger.info(\"Checking for pruning due\")\n\n            cc_pair_ids: list[int] = []\n            with get_session_with_current_tenant() as db_session:\n                cc_pairs = get_connector_credential_pairs(db_session)\n                for cc_pair_entry in cc_pairs:\n                    cc_pair_ids.append(cc_pair_entry.id)\n\n            for cc_pair_id in cc_pair_ids:\n                lock_beat.reacquire()\n                with get_session_with_current_tenant() as db_session:\n                    cc_pair = get_connector_credential_pair_from_id(\n                        db_session=db_session,\n                        cc_pair_id=cc_pair_id,\n                    )\n                    if not cc_pair:\n                        logger.error(f\"CC pair not found: {cc_pair_id}\")\n                        continue\n\n                    if not _is_pruning_due(cc_pair):\n                        logger.info(f\"CC pair not due for pruning: {cc_pair_id}\")\n                        continue\n\n                    payload_id = try_creating_prune_generator_task(\n                        self.app, cc_pair, db_session, r, tenant_id\n                    )\n                    if not payload_id:\n                        logger.info(f\"Pruning not created: {cc_pair_id}\")\n                        continue\n\n                    task_logger.info(\n                        f\"Pruning queued: cc_pair={cc_pair.id} id={payload_id}\"\n                    )\n            r.set(OnyxRedisSignals.BLOCK_PRUNING, 1, ex=_get_pruning_block_expiration())\n\n        # we want to run this less frequently than the overall task\n        lock_beat.reacquire()\n        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES):\n            # clear any permission fences that don't have associated celery tasks in progress\n            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),\n            # or be currently executing\n            try:\n                r_celery = celery_get_broker_client(self.app)\n                validate_pruning_fences(tenant_id, r, r_replica, r_celery, lock_beat)\n            except Exception:\n                task_logger.exception(\"Exception while validating pruning fences\")\n\n            r.set(\n                OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES,\n                1,\n                ex=_get_fence_validation_block_expiration(),\n            )\n\n        # use a lookup table to find active fences. We still have to verify the fence\n        # exists since it is an optimization and not the source of truth.\n        lock_beat.reacquire()\n        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n        for key in keys:\n            key_bytes = cast(bytes, key)\n\n            if not r.exists(key_bytes):\n                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)\n                continue\n\n            key_str = key_bytes.decode(\"utf-8\")\n            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):\n                with get_session_with_current_tenant() as db_session:\n                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(f\"Unexpected pruning check exception: {error_msg}\")\n        task_logger.exception(\"Unexpected exception during pruning check\")\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n    task_logger.info(f\"check_for_pruning finished: tenant={tenant_id}\")\n    return True\n\n\ndef try_creating_prune_generator_task(\n    celery_app: Celery,\n    cc_pair: ConnectorCredentialPair,\n    db_session: Session,\n    r: Redis,\n    tenant_id: str,\n) -> str | None:\n    \"\"\"Checks for any conditions that should block the pruning generator task from being\n    created, then creates the task.\n\n    Does not check for scheduling related conditions as this function\n    is used to trigger prunes immediately, e.g. via the web ui.\n    \"\"\"\n\n    logger.info(f\"try_creating_prune_generator_task: cc_pair={cc_pair.id}\")\n\n    redis_connector = RedisConnector(tenant_id, cc_pair.id)\n\n    if not ALLOW_SIMULTANEOUS_PRUNING:\n        count = redis_connector.prune.get_active_task_count()\n        if count > 0:\n            logger.info(\n                f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} no simultaneous pruning allowed\"\n            )\n            return None\n\n    LOCK_TIMEOUT = 30\n\n    # we need to serialize starting pruning since it can be triggered either via\n    # celery beat or manually (API call)\n    lock: RedisLock = r.lock(\n        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + \"try_creating_prune_generator_task\",\n        timeout=LOCK_TIMEOUT,\n    )\n\n    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)\n    if not acquired:\n        logger.info(\n            f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} lock not acquired\"\n        )\n        return None\n\n    try:\n        # skip pruning if already pruning\n        if redis_connector.prune.fenced:\n            logger.info(\n                f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} already pruning\"\n            )\n            return None\n\n        # skip pruning if the cc_pair is deleting\n        if redis_connector.delete.fenced:\n            logger.info(\n                f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} deleting\"\n            )\n            return None\n\n        # skip pruning if doc permissions sync is running\n        if redis_connector.permissions.fenced:\n            logger.info(\n                f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} permissions sync running\"\n            )\n            return None\n\n        db_session.refresh(cc_pair)\n        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n            logger.info(\n                f\"try_creating_prune_generator_task: cc_pair={cc_pair.id} deleting\"\n            )\n            return None\n\n        # add a long running generator task to the queue\n        redis_connector.prune.generator_clear()\n        redis_connector.prune.taskset_clear()\n\n        custom_task_id = f\"{redis_connector.prune.generator_task_key}_{uuid4()}\"\n\n        # create before setting fence to avoid race condition where the monitoring\n        # task updates the sync record before it is created\n        try:\n            insert_sync_record(\n                db_session=db_session,\n                entity_id=cc_pair.id,\n                sync_type=SyncType.PRUNING,\n            )\n        except Exception:\n            task_logger.exception(\"insert_sync_record exceptioned.\")\n\n        # signal active before the fence is set\n        redis_connector.prune.set_active()\n\n        # set a basic fence to start\n        payload = RedisConnectorPrunePayload(\n            id=make_short_id(),\n            submitted=datetime.now(timezone.utc),\n            started=None,\n            celery_task_id=None,\n        )\n        redis_connector.prune.set_fence(payload)\n\n        result = celery_app.send_task(\n            OnyxCeleryTask.CONNECTOR_PRUNING_GENERATOR_TASK,\n            kwargs=dict(\n                cc_pair_id=cc_pair.id,\n                connector_id=cc_pair.connector_id,\n                credential_id=cc_pair.credential_id,\n                tenant_id=tenant_id,\n            ),\n            queue=OnyxCeleryQueues.CONNECTOR_PRUNING,\n            task_id=custom_task_id,\n            priority=OnyxCeleryPriority.LOW,\n        )\n\n        # fill in the celery task id\n        payload.celery_task_id = result.id\n        redis_connector.prune.set_fence(payload)\n\n        payload_id = payload.id\n    except Exception as e:\n        error_msg = format_error_for_logging(e)\n        task_logger.warning(\n            f\"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}\"\n        )\n        task_logger.exception(f\"Unexpected exception: cc_pair={cc_pair.id}\")\n        return None\n    finally:\n        if lock.owned():\n            lock.release()\n    task_logger.info(\n        f\"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}\"\n    )\n    return payload_id\n\n\n@shared_task(\n    name=OnyxCeleryTask.CONNECTOR_PRUNING_GENERATOR_TASK,\n    acks_late=False,\n    soft_time_limit=JOB_TIMEOUT,\n    track_started=True,\n    trail=False,\n    bind=True,\n)\ndef connector_pruning_generator_task(\n    self: Task,\n    cc_pair_id: int,\n    connector_id: int,\n    credential_id: int,\n    tenant_id: str,\n) -> None:\n    \"\"\"connector pruning task. For a cc pair, this task pulls all document IDs from the source\n    and compares those IDs to locally stored documents and deletes all locally stored IDs missing\n    from the most recently pulled document ID list\"\"\"\n\n    payload_id: str | None = None\n\n    LoggerContextVars.reset()\n\n    pruning_ctx_dict = pruning_ctx.get()\n    pruning_ctx_dict[\"cc_pair_id\"] = cc_pair_id\n    pruning_ctx_dict[\"request_id\"] = self.request.id\n    pruning_ctx.set(pruning_ctx_dict)\n\n    task_logger.info(f\"Pruning generator starting: cc_pair={cc_pair_id}\")\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n    r = get_redis_client()\n\n    # this wait is needed to avoid a race condition where\n    # the primary worker sends the task and it is immediately executed\n    # before the primary worker can finalize the fence\n    start = time.monotonic()\n    while True:\n        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:\n            raise ValueError(\n                f\"connector_prune_generator_task - timed out waiting for fence to be ready: \"\n                f\"fence={redis_connector.prune.fence_key}\"\n            )\n\n        if not redis_connector.prune.fenced:  # The fence must exist\n            raise ValueError(\n                f\"connector_prune_generator_task - fence not found: fence={redis_connector.prune.fence_key}\"\n            )\n\n        payload = redis_connector.prune.payload  # The payload must exist\n        if not payload:\n            raise ValueError(\n                \"connector_prune_generator_task: payload invalid or not found\"\n            )\n\n        if payload.celery_task_id is None:\n            logger.info(\n                f\"connector_prune_generator_task - Waiting for fence: fence={redis_connector.prune.fence_key}\"\n            )\n            time.sleep(1)\n            continue\n\n        payload_id = payload.id\n\n        logger.info(\n            f\"connector_prune_generator_task - Fence found, continuing...: \"\n            f\"fence={redis_connector.prune.fence_key} \"\n            f\"payload_id={payload.id}\"\n        )\n        break\n\n    # set thread_local=False since we don't control what thread the indexing/pruning\n    # might run our callback with\n    lock: RedisLock = r.lock(\n        OnyxRedisLocks.PRUNING_LOCK_PREFIX + f\"_{redis_connector.cc_pair_id}\",\n        timeout=CELERY_PRUNING_LOCK_TIMEOUT,\n        thread_local=False,\n    )\n\n    acquired = lock.acquire(blocking=False)\n    if not acquired:\n        task_logger.warning(\n            f\"Pruning task already running, exiting...: cc_pair={cc_pair_id}\"\n        )\n        return None\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            cc_pair = get_connector_credential_pair(\n                db_session=db_session,\n                connector_id=connector_id,\n                credential_id=credential_id,\n            )\n\n            if not cc_pair:\n                task_logger.warning(\n                    f\"cc_pair not found for {connector_id} {credential_id}\"\n                )\n                return\n\n            payload = redis_connector.prune.payload\n            if not payload:\n                raise ValueError(f\"No fence payload found: cc_pair={cc_pair_id}\")\n\n            new_payload = RedisConnectorPrunePayload(\n                id=payload.id,\n                submitted=payload.submitted,\n                started=datetime.now(timezone.utc),\n                celery_task_id=payload.celery_task_id,\n            )\n            redis_connector.prune.set_fence(new_payload)\n\n            task_logger.info(\n                f\"Pruning generator running connector: cc_pair={cc_pair_id} connector_source={cc_pair.connector.source}\"\n            )\n\n            runnable_connector = instantiate_connector(\n                db_session,\n                cc_pair.connector.source,\n                InputType.SLIM_RETRIEVAL,\n                cc_pair.connector.connector_specific_config,\n                cc_pair.credential,\n            )\n\n            callback = PruneCallback(\n                0,\n                redis_connector,\n                lock,\n                r,\n                timeout_seconds=JOB_TIMEOUT,\n            )\n\n            # Extract docs and hierarchy nodes from the source\n            extraction_result = extract_ids_from_runnable_connector(\n                runnable_connector, callback\n            )\n            all_connector_doc_ids = extraction_result.raw_id_to_parent\n\n            # Process hierarchy nodes (same as docfetching):\n            # upsert to Postgres and cache in Redis\n            source = cc_pair.connector.source\n            redis_client = get_redis_client(tenant_id=tenant_id)\n\n            ensure_source_node_exists(redis_client, db_session, source)\n\n            upserted_nodes: list[DBHierarchyNode] = []\n            if extraction_result.hierarchy_nodes:\n                is_connector_public = cc_pair.access_type == AccessType.PUBLIC\n\n                upserted_nodes = upsert_hierarchy_nodes_batch(\n                    db_session=db_session,\n                    nodes=extraction_result.hierarchy_nodes,\n                    source=source,\n                    commit=True,\n                    is_connector_public=is_connector_public,\n                )\n\n                upsert_hierarchy_node_cc_pair_entries(\n                    db_session=db_session,\n                    hierarchy_node_ids=[n.id for n in upserted_nodes],\n                    connector_id=connector_id,\n                    credential_id=credential_id,\n                    commit=True,\n                )\n\n                cache_entries = [\n                    HierarchyNodeCacheEntry.from_db_model(node)\n                    for node in upserted_nodes\n                ]\n                cache_hierarchy_nodes_batch(\n                    redis_client=redis_client,\n                    source=source,\n                    entries=cache_entries,\n                )\n\n                task_logger.info(\n                    f\"Pruning: persisted and cached {len(extraction_result.hierarchy_nodes)} \"\n                    f\"hierarchy nodes for cc_pair={cc_pair_id}\"\n                )\n\n            # Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id\n            # and bulk-update documents, mirroring the docfetching resolution\n            _resolve_and_update_document_parents(\n                db_session=db_session,\n                redis_client=redis_client,\n                source=source,\n                raw_id_to_parent=all_connector_doc_ids,\n            )\n\n            # Link hierarchy nodes to documents for sources where pages can be\n            # both hierarchy nodes AND documents (e.g. Notion, Confluence)\n            all_doc_id_list = list(all_connector_doc_ids.keys())\n            link_hierarchy_nodes_to_documents(\n                db_session=db_session,\n                document_ids=all_doc_id_list,\n                source=source,\n                commit=True,\n            )\n\n            # a list of docs in our local index\n            all_indexed_document_ids = {\n                doc.id\n                for doc in get_documents_for_connector_credential_pair(\n                    db_session=db_session,\n                    connector_id=connector_id,\n                    credential_id=credential_id,\n                )\n            }\n\n            # generate list of docs to remove (no longer in the source)\n            doc_ids_to_remove = list(\n                all_indexed_document_ids - all_connector_doc_ids.keys()\n            )\n\n            task_logger.info(\n                \"Pruning set collected: \"\n                f\"cc_pair={cc_pair_id} \"\n                f\"connector_source={cc_pair.connector.source} \"\n                f\"docs_to_remove={len(doc_ids_to_remove)}\"\n            )\n\n            task_logger.info(\n                f\"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}\"\n            )\n            tasks_generated = redis_connector.prune.generate_tasks(\n                set(doc_ids_to_remove), self.app, db_session, None\n            )\n            if tasks_generated is None:\n                return None\n\n            task_logger.info(\n                f\"RedisConnector.prune.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}\"\n            )\n\n            redis_connector.prune.generator_complete = tasks_generated\n\n            # --- Hierarchy node pruning ---\n            live_node_ids = {n.id for n in upserted_nodes}\n            stale_removed = remove_stale_hierarchy_node_cc_pair_entries(\n                db_session=db_session,\n                connector_id=connector_id,\n                credential_id=credential_id,\n                live_hierarchy_node_ids=live_node_ids,\n                commit=True,\n            )\n            deleted_raw_ids = delete_orphaned_hierarchy_nodes(\n                db_session=db_session,\n                source=source,\n                commit=True,\n            )\n            reparented_nodes = reparent_orphaned_hierarchy_nodes(\n                db_session=db_session,\n                source=source,\n                commit=True,\n            )\n            if deleted_raw_ids:\n                evict_hierarchy_nodes_from_cache(redis_client, source, deleted_raw_ids)\n            if reparented_nodes:\n                reparented_cache_entries = [\n                    HierarchyNodeCacheEntry.from_db_model(node)\n                    for node in reparented_nodes\n                ]\n                cache_hierarchy_nodes_batch(\n                    redis_client, source, reparented_cache_entries\n                )\n            if stale_removed or deleted_raw_ids or reparented_nodes:\n                task_logger.info(\n                    f\"Hierarchy node pruning: cc_pair={cc_pair_id} \"\n                    f\"stale_entries_removed={stale_removed} \"\n                    f\"nodes_deleted={len(deleted_raw_ids)} \"\n                    f\"nodes_reparented={len(reparented_nodes)}\"\n                )\n    except Exception as e:\n        task_logger.exception(\n            f\"Pruning exceptioned: cc_pair={cc_pair_id} connector={connector_id} payload_id={payload_id}\"\n        )\n\n        redis_connector.prune.reset()\n        raise e\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"Pruning generator finished: cc_pair={cc_pair_id} payload_id={payload_id}\"\n    )\n\n\n\"\"\"Monitoring pruning utils\"\"\"\n\n\ndef monitor_ccpair_pruning_taskset(\n    tenant_id: str,\n    key_bytes: bytes,\n    r: Redis,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(\n            f\"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}\"\n        )\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if not redis_connector.prune.fenced:\n        return\n\n    initial = redis_connector.prune.generator_complete\n    if initial is None:\n        return\n\n    remaining = redis_connector.prune.get_remaining()\n    task_logger.info(\n        f\"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}\"\n    )\n    if remaining > 0:\n        return\n\n    mark_ccpair_as_pruned(int(cc_pair_id), db_session)\n    task_logger.info(\n        f\"Connector pruning finished: cc_pair={cc_pair_id} num_pruned={initial}\"\n    )\n\n    update_sync_record_status(\n        db_session=db_session,\n        entity_id=cc_pair_id,\n        sync_type=SyncType.PRUNING,\n        sync_status=SyncStatus.SUCCESS,\n        num_docs_synced=initial,\n    )\n\n    delete_orphan_tags__no_commit(db_session)\n\n    redis_connector.prune.taskset_clear()\n    redis_connector.prune.generator_clear()\n    redis_connector.prune.set_fence(None)\n\n\ndef validate_pruning_fences(\n    tenant_id: str,\n    r: Redis,\n    r_replica: Redis,\n    r_celery: Redis,\n    lock_beat: RedisLock,\n) -> None:\n    # building lookup table can be expensive, so we won't bother\n    # validating until the queue is small\n    PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN = 1024\n\n    queue_len = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)\n    if queue_len > PERMISSION_SYNC_VALIDATION_MAX_QUEUE_LEN:\n        return\n\n    # the queue for a single pruning generator task\n    reserved_generator_tasks = celery_get_unacked_task_ids(\n        OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery\n    )\n\n    # the queue for a reasonably large set of lightweight deletion tasks\n    queued_upsert_tasks = celery_get_queued_task_ids(\n        OnyxCeleryQueues.CONNECTOR_DELETION, r_celery\n    )\n\n    # Use replica for this because the worst thing that happens\n    # is that we don't run the validation on this pass\n    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n    for key in keys:\n        key_bytes = cast(bytes, key)\n        key_str = key_bytes.decode(\"utf-8\")\n        if not key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):\n            continue\n\n        validate_pruning_fence(\n            tenant_id,\n            key_bytes,\n            reserved_generator_tasks,\n            queued_upsert_tasks,\n            r,\n            r_celery,\n        )\n\n        lock_beat.reacquire()\n\n    return\n\n\ndef validate_pruning_fence(\n    tenant_id: str,\n    key_bytes: bytes,\n    reserved_tasks: set[str],\n    queued_tasks: set[str],\n    r: Redis,\n    r_celery: Redis,\n) -> None:\n    \"\"\"See validate_indexing_fence for an overall idea of validation flows.\n\n    queued_tasks: the celery queue of lightweight permission sync tasks\n    reserved_tasks: prefetched tasks for sync task generator\n    \"\"\"\n    # if the fence doesn't exist, there's nothing to do\n    fence_key = key_bytes.decode(\"utf-8\")\n    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)\n    if cc_pair_id_str is None:\n        task_logger.warning(\n            f\"validate_pruning_fence - could not parse id from {fence_key}\"\n        )\n        return\n\n    cc_pair_id = int(cc_pair_id_str)\n    # parse out metadata and initialize the helper class with it\n    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))\n\n    # check to see if the fence/payload exists\n    if not redis_connector.prune.fenced:\n        return\n\n    # in the cloud, the payload format may have changed ...\n    # it's a little sloppy, but just reset the fence for now if that happens\n    # TODO: add intentional cleanup/abort logic\n    try:\n        payload = redis_connector.prune.payload\n    except ValidationError:\n        task_logger.exception(\n            \"validate_pruning_fence - \"\n            \"Resetting fence because fence schema is out of date: \"\n            f\"cc_pair={cc_pair_id} \"\n            f\"fence={fence_key}\"\n        )\n\n        redis_connector.prune.reset()\n        return\n\n    if not payload:\n        return\n\n    if not payload.celery_task_id:\n        return\n\n    # OK, there's actually something for us to validate\n\n    # either the generator task must be in flight or its subtasks must be\n    found = celery_find_task(\n        payload.celery_task_id,\n        OnyxCeleryQueues.CONNECTOR_PRUNING,\n        r_celery,\n    )\n    if found:\n        # the celery task exists in the redis queue\n        redis_connector.prune.set_active()\n        return\n\n    if payload.celery_task_id in reserved_tasks:\n        # the celery task was prefetched and is reserved within a worker\n        redis_connector.prune.set_active()\n        return\n\n    # look up every task in the current taskset in the celery queue\n    # every entry in the taskset should have an associated entry in the celery task queue\n    # because we get the celery tasks first, the entries in our own pruning taskset\n    # should be roughly a subset of the tasks in celery\n\n    # this check isn't very exact, but should be sufficient over a period of time\n    # A single successful check over some number of attempts is sufficient.\n\n    # TODO: if the number of tasks in celery is much lower than than the taskset length\n    # we might be able to shortcut the lookup since by definition some of the tasks\n    # must not exist in celery.\n\n    tasks_scanned = 0\n    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad\n\n    for member in r.sscan_iter(redis_connector.prune.taskset_key):\n        tasks_scanned += 1\n\n        member_bytes = cast(bytes, member)\n        member_str = member_bytes.decode(\"utf-8\")\n        if member_str in queued_tasks:\n            continue\n\n        if member_str in reserved_tasks:\n            continue\n\n        tasks_not_in_celery += 1\n\n    task_logger.info(\n        f\"validate_pruning_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}\"\n    )\n\n    # we're active if there are still tasks to run and those tasks all exist in celery\n    if tasks_scanned > 0 and tasks_not_in_celery == 0:\n        redis_connector.prune.set_active()\n        return\n\n    # we may want to enable this check if using the active task list somehow isn't good enough\n    # if redis_connector_index.generator_locked():\n    #     logger.info(f\"{payload.celery_task_id} is currently executing.\")\n\n    # if we get here, we didn't find any direct indication that the associated celery tasks exist,\n    # but they still might be there due to gaps in our ability to check states during transitions\n    # Checking the active signal safeguards us against these transition periods\n    # (which has a duration that allows us to bridge those gaps)\n    if redis_connector.prune.active():\n        return\n\n    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.\n    task_logger.warning(\n        \"validate_pruning_fence - \"\n        \"Resetting fence because no associated celery tasks were found: \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"fence={fence_key} \"\n        f\"payload_id={payload.id}\"\n    )\n\n    redis_connector.prune.reset()\n    return\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py",
    "content": "import httpx\nfrom tenacity import retry\nfrom tenacity import retry_if_exception_type\nfrom tenacity import stop_after_delay\nfrom tenacity import wait_random_exponential\n\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\n\n\nclass RetryDocumentIndex:\n    \"\"\"A wrapper class to help with specific retries against Vespa involving\n    read timeouts.\n\n    wait_random_exponential implements full jitter as per this article:\n    https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/\"\"\"\n\n    MAX_WAIT = 30\n\n    # STOP_AFTER + MAX_WAIT should be slightly less (5?) than the celery soft_time_limit\n    STOP_AFTER = 70\n\n    def __init__(self, index: DocumentIndex):\n        self.index: DocumentIndex = index\n\n    @retry(\n        retry=retry_if_exception_type(httpx.ReadTimeout),\n        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),\n        stop=stop_after_delay(STOP_AFTER),\n    )\n    def delete_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,\n        chunk_count: int | None,\n    ) -> int:\n        return self.index.delete_single(\n            doc_id,\n            tenant_id=tenant_id,\n            chunk_count=chunk_count,\n        )\n\n    @retry(\n        retry=retry_if_exception_type(httpx.ReadTimeout),\n        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),\n        stop=stop_after_delay(STOP_AFTER),\n    )\n    def update_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,\n        chunk_count: int | None,\n        fields: VespaDocumentFields | None,\n        user_fields: VespaDocumentUserFields | None,\n    ) -> None:\n        self.index.update_single(\n            doc_id,\n            tenant_id=tenant_id,\n            chunk_count=chunk_count,\n            fields=fields,\n            user_fields=user_fields,\n        )\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/shared/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/shared/tasks.py",
    "content": "import time\nfrom enum import Enum\nfrom http import HTTPStatus\n\nimport httpx\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom redis import Redis\nfrom tenacity import RetryError\n\nfrom onyx.access.access import get_access_for_document\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex\nfrom onyx.configs.constants import ONYX_CELERY_BEAT_HEARTBEAT_KEY\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.document import delete_document_by_connector_credential_pair__no_commit\nfrom onyx.db.document import delete_documents_complete__no_commit\nfrom onyx.db.document import fetch_chunk_count_for_document\nfrom onyx.db.document import get_document\nfrom onyx.db.document import get_document_connector_count\nfrom onyx.db.document import mark_document_as_modified\nfrom onyx.db.document import mark_document_as_synced\nfrom onyx.db.document_set import fetch_document_sets_for_document\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.relationships import delete_document_references_from_kg\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\n\nDOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES = 3\n\n\n# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT\nLIGHT_SOFT_TIME_LIMIT = 105\nLIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15\n\n\nclass OnyxCeleryTaskCompletionStatus(str, Enum):\n    \"\"\"The different statuses the watchdog can finish with.\n\n    TODO: create broader success/failure/abort categories\n    \"\"\"\n\n    UNDEFINED = \"undefined\"\n\n    SUCCEEDED = \"succeeded\"\n\n    SKIPPED = \"skipped\"\n\n    SOFT_TIME_LIMIT = \"soft_time_limit\"\n\n    NON_RETRYABLE_EXCEPTION = \"non_retryable_exception\"\n    RETRYABLE_EXCEPTION = \"retryable_exception\"\n\n\n@shared_task(\n    name=OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,\n    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,\n    time_limit=LIGHT_TIME_LIMIT,\n    max_retries=DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES,\n    bind=True,\n)\ndef document_by_cc_pair_cleanup_task(\n    self: Task,\n    document_id: str,\n    connector_id: int,\n    credential_id: int,\n    tenant_id: str,\n) -> bool:\n    \"\"\"A lightweight subtask used to clean up document to cc pair relationships.\n    Created by connection deletion and connector pruning parent tasks.\"\"\"\n\n    \"\"\"\n    To delete a connector / credential pair:\n    (1) find all documents associated with connector / credential pair where there\n    this the is only connector / credential pair that has indexed it\n    (2) delete all documents from document stores\n    (3) delete all entries from postgres\n    (4) find all documents associated with connector / credential pair where there\n    are multiple connector / credential pairs that have indexed it\n    (5) update document store entries to remove access associated with the\n    connector / credential pair from the access list\n    (6) delete all relevant entries from postgres\n    \"\"\"\n    task_logger.debug(f\"Task start: doc={document_id}\")\n\n    start = time.monotonic()\n\n    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            action = \"skip\"\n\n            active_search_settings = get_active_search_settings(db_session)\n            # This flow is for updates and deletion so we get all indices.\n            document_indices = get_all_document_indices(\n                active_search_settings.primary,\n                active_search_settings.secondary,\n                httpx_client=HttpxPool.get(\"vespa\"),\n            )\n\n            retry_document_indices: list[RetryDocumentIndex] = [\n                RetryDocumentIndex(document_index)\n                for document_index in document_indices\n            ]\n\n            count = get_document_connector_count(db_session, document_id)\n            if count == 1:\n                # count == 1 means this is the only remaining cc_pair reference to the doc\n                # delete it from vespa and the db\n                action = \"delete\"\n\n                chunk_count = fetch_chunk_count_for_document(document_id, db_session)\n\n                for retry_document_index in retry_document_indices:\n                    _ = retry_document_index.delete_single(\n                        document_id,\n                        tenant_id=tenant_id,\n                        chunk_count=chunk_count,\n                    )\n\n                delete_document_references_from_kg(\n                    db_session=db_session,\n                    document_id=document_id,\n                )\n\n                delete_documents_complete__no_commit(\n                    db_session=db_session,\n                    document_ids=[document_id],\n                )\n                db_session.commit()\n\n                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED\n            elif count > 1:\n                action = \"update\"\n\n                # count > 1 means the document still has cc_pair references\n                doc = get_document(document_id, db_session)\n                if not doc:\n                    return False\n\n                # the below functions do not include cc_pairs being deleted.\n                # i.e. they will correctly omit access for the current cc_pair\n                doc_access = get_access_for_document(\n                    document_id=document_id, db_session=db_session\n                )\n\n                doc_sets = fetch_document_sets_for_document(document_id, db_session)\n                update_doc_sets: set[str] = set(doc_sets)\n\n                fields = VespaDocumentFields(\n                    document_sets=update_doc_sets,\n                    access=doc_access,\n                    boost=doc.boost,\n                    hidden=doc.hidden,\n                )\n\n                for retry_document_index in retry_document_indices:\n                    # TODO(andrei): Previously there was a comment here saying\n                    # it was ok if a doc did not exist in the document index. I\n                    # don't agree with that claim, so keep an eye on this task\n                    # to see if this raises.\n                    retry_document_index.update_single(\n                        document_id,\n                        tenant_id=tenant_id,\n                        chunk_count=doc.chunk_count,\n                        fields=fields,\n                        user_fields=None,\n                    )\n\n                # there are still other cc_pair references to the doc, so just resync to Vespa\n                delete_document_by_connector_credential_pair__no_commit(\n                    db_session=db_session,\n                    document_id=document_id,\n                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(\n                        connector_id=connector_id,\n                        credential_id=credential_id,\n                    ),\n                )\n\n                mark_document_as_synced(document_id, db_session)\n                db_session.commit()\n\n                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED\n            else:\n                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED\n\n            elapsed = time.monotonic() - start\n            task_logger.info(\n                f\"doc={document_id} action={action} refcount={count} elapsed={elapsed:.2f}\"\n            )\n    except SoftTimeLimitExceeded:\n        task_logger.info(f\"SoftTimeLimitExceeded exception. doc={document_id}\")\n        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT\n    except Exception as ex:\n        e: Exception | None = None\n        while True:\n            if isinstance(ex, RetryError):\n                task_logger.warning(\n                    f\"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}\"\n                )\n\n                # only set the inner exception if it is of type Exception\n                e_temp = ex.last_attempt.exception()\n                if isinstance(e_temp, Exception):\n                    e = e_temp\n            else:\n                e = ex\n\n            if isinstance(e, httpx.HTTPStatusError):\n                if e.response.status_code == HTTPStatus.BAD_REQUEST:\n                    task_logger.exception(\n                        f\"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}\"\n                    )\n                completion_status = (\n                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION\n                )\n                break\n\n            task_logger.exception(\n                f\"document_by_cc_pair_cleanup_task exceptioned: doc={document_id}\"\n            )\n\n            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION\n            if (\n                self.max_retries is not None\n                and self.request.retries >= self.max_retries\n            ):\n                # This is the last attempt! mark the document as dirty in the db so that it\n                # eventually gets fixed out of band via stale document reconciliation\n                task_logger.warning(\n                    f\"Max celery task retries reached. Marking doc as dirty for reconciliation: doc={document_id}\"\n                )\n                with get_session_with_current_tenant() as db_session:\n                    # delete the cc pair relationship now and let reconciliation clean it up\n                    # in vespa\n                    delete_document_by_connector_credential_pair__no_commit(\n                        db_session=db_session,\n                        document_id=document_id,\n                        connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(\n                            connector_id=connector_id,\n                            credential_id=credential_id,\n                        ),\n                    )\n                    mark_document_as_modified(document_id, db_session)\n                completion_status = (\n                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION\n                )\n                break\n\n            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64\n            countdown = 2 ** (self.request.retries + 4)\n            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception\n            break  # we won't hit this, but it looks weird not to have it\n    finally:\n        task_logger.info(\n            f\"document_by_cc_pair_cleanup_task completed: status={completion_status.value} doc={document_id}\"\n        )\n\n    if completion_status != OnyxCeleryTaskCompletionStatus.SUCCEEDED:\n        return False\n\n    task_logger.info(f\"document_by_cc_pair_cleanup_task finished: doc={document_id}\")\n    return True\n\n\n@shared_task(name=OnyxCeleryTask.CELERY_BEAT_HEARTBEAT, ignore_result=True, bind=True)\ndef celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001\n    \"\"\"When this task runs, it writes a key to Redis with a TTL.\n\n    An external observer can check this key to figure out if the celery beat is still running.\n    \"\"\"\n    time_start = time.monotonic()\n    r: Redis = get_redis_client()\n    r.set(ONYX_CELERY_BEAT_HEARTBEAT_KEY, 1, ex=600)\n    time_elapsed = time.monotonic() - time_start\n    task_logger.info(f\"celery_beat_heartbeat finished: elapsed={time_elapsed:.2f}\")\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/user_file_processing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/user_file_processing/tasks.py",
    "content": "import datetime\nimport time\nfrom typing import Any\nfrom uuid import UUID\n\nimport httpx\nimport sqlalchemy as sa\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom retry import retry\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.access import build_access_for_user_files\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.celery_redis import celery_get_broker_client\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_utils import httpx_init_vespa_pool\nfrom onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import MANAGED_VESPA\nfrom onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH\nfrom onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES\nfrom onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\nfrom onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH\nfrom onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH\nfrom onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH\nfrom onyx.connectors.file.connector import LocalFileConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.search_settings import get_active_search_settings_list\nfrom onyx.db.user_file import fetch_user_files_with_access_relationships\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.utils import store_user_file_plaintext\nfrom onyx.file_store.utils import user_file_id_to_plaintext_file_name\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_pipeline import run_indexing_pipeline\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.variable_functionality import global_version\n\n\ndef _as_uuid(value: str | UUID) -> UUID:\n    \"\"\"Return a UUID, accepting either a UUID or a string-like value.\"\"\"\n    return value if isinstance(value, UUID) else UUID(str(value))\n\n\ndef _user_file_lock_key(user_file_id: str | UUID) -> str:\n    return f\"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}\"\n\n\ndef _user_file_queued_key(user_file_id: str | UUID) -> str:\n    \"\"\"Key that exists while a process_single_user_file task is sitting in the queue.\n\n    The beat generator sets this with a TTL equal to CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\n    before enqueuing and the worker deletes it as its first action.  This prevents\n    the beat from adding duplicate tasks for files that already have a live task\n    in flight.\n    \"\"\"\n    return f\"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}\"\n\n\ndef user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:\n    return f\"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}\"\n\n\ndef _user_file_project_sync_queued_key(user_file_id: str | UUID) -> str:\n    return f\"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_QUEUED_PREFIX}:{user_file_id}\"\n\n\ndef _user_file_delete_lock_key(user_file_id: str | UUID) -> str:\n    return f\"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}\"\n\n\ndef _user_file_delete_queued_key(user_file_id: str | UUID) -> str:\n    \"\"\"Key that exists while a delete_single_user_file task is sitting in the queue.\n\n    The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES\n    before enqueuing and the worker deletes it as its first action.  This prevents\n    the beat from adding duplicate tasks for files that already have a live task\n    in flight.\n    \"\"\"\n    return f\"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}\"\n\n\ndef get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:\n    redis_celery = celery_get_broker_client(celery_app)\n    return celery_get_queue_length(\n        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, redis_celery\n    )\n\n\ndef enqueue_user_file_project_sync_task(\n    *,\n    celery_app: Celery,\n    redis_client: Redis,\n    user_file_id: str | UUID,\n    tenant_id: str,\n    priority: OnyxCeleryPriority = OnyxCeleryPriority.HIGH,\n) -> bool:\n    \"\"\"Enqueue a project-sync task if no matching queued task already exists.\"\"\"\n    queued_key = _user_file_project_sync_queued_key(user_file_id)\n\n    # NX+EX gives us atomic dedupe and a self-healing TTL.\n    queued_guard_set = redis_client.set(\n        queued_key,\n        1,\n        nx=True,\n        ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,\n    )\n    if not queued_guard_set:\n        return False\n\n    try:\n        celery_app.send_task(\n            OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,\n            kwargs={\"user_file_id\": str(user_file_id), \"tenant_id\": tenant_id},\n            queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,\n            priority=priority,\n            expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,\n        )\n    except Exception:\n        # Roll back the queued guard if task publish fails.\n        redis_client.delete(queued_key)\n        raise\n\n    return True\n\n\n@retry(tries=3, delay=1, backoff=2, jitter=(0.0, 1.0))\ndef _visit_chunks(\n    *,\n    http_client: httpx.Client,\n    index_name: str,\n    selection: str,\n    continuation: str | None = None,\n) -> tuple[list[dict[str, Any]], str | None]:\n    task_logger.info(\n        f\"Visiting chunks for index={index_name} with selection={selection}\"\n    )\n    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n    params: dict[str, str] = {\n        \"selection\": selection,\n        \"wantedDocumentCount\": \"100\",  # Use smaller batch size to avoid timeouts\n    }\n    if continuation:\n        params[\"continuation\"] = continuation\n    resp = http_client.get(base_url, params=params, timeout=None)\n    resp.raise_for_status()\n    payload = resp.json()\n    return payload.get(\"documents\", []), payload.get(\"continuation\")\n\n\ndef _get_document_chunk_count(\n    *,\n    index_name: str,\n    selection: str,\n) -> int:\n    chunk_count = 0\n    continuation = None\n    while True:\n        docs, continuation = _visit_chunks(\n            http_client=HttpxPool.get(\"vespa\"),\n            index_name=index_name,\n            selection=selection,\n            continuation=continuation,\n        )\n        if not docs:\n            break\n        chunk_count += len(docs)\n        if not continuation:\n            break\n    return chunk_count\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,\n    soft_time_limit=300,\n    bind=True,\n    ignore_result=True,\n)\ndef check_user_file_processing(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Scan for user files with PROCESSING status and enqueue per-file tasks.\n\n    Three mechanisms prevent queue runaway:\n\n    1. **Queue depth backpressure** – if the broker queue already has more than\n       USER_FILE_PROCESSING_MAX_QUEUE_DEPTH items we skip this beat cycle\n       entirely.  Workers are clearly behind; adding more tasks would only make\n       the backlog worse.\n\n    2. **Per-file queued guard** – before enqueuing a task we set a short-lived\n       Redis key (TTL = CELERY_USER_FILE_PROCESSING_TASK_EXPIRES).  If that key\n       already exists the file already has a live task in the queue, so we skip\n       it.  The worker deletes the key the moment it picks up the task so the\n       next beat cycle can re-enqueue if the file is still PROCESSING.\n\n    3. **Task expiry** – every enqueued task carries an `expires` value equal to\n       CELERY_USER_FILE_PROCESSING_TASK_EXPIRES.  If a task is still sitting in\n       the queue after that deadline, Celery discards it without touching the DB.\n       This is a belt-and-suspenders defence: even if the guard key is lost (e.g.\n       Redis restart), stale tasks evict themselves rather than piling up forever.\n    \"\"\"\n    task_logger.info(\"check_user_file_processing - Starting\")\n\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.USER_FILE_PROCESSING_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # Do not overlap generator runs\n    if not lock.acquire(blocking=False):\n        return None\n\n    enqueued = 0\n    skipped_guard = 0\n    try:\n        # --- Protection 1: queue depth backpressure ---\n        r_celery = celery_get_broker_client(self.app)\n        queue_len = celery_get_queue_length(\n            OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery\n        )\n        if queue_len > USER_FILE_PROCESSING_MAX_QUEUE_DEPTH:\n            task_logger.warning(\n                f\"check_user_file_processing - Queue depth {queue_len} exceeds \"\n                f\"{USER_FILE_PROCESSING_MAX_QUEUE_DEPTH}, skipping enqueue for \"\n                f\"tenant={tenant_id}\"\n            )\n            return None\n\n        with get_session_with_current_tenant() as db_session:\n            user_file_ids = (\n                db_session.execute(\n                    select(UserFile.id).where(\n                        UserFile.status == UserFileStatus.PROCESSING\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n            for user_file_id in user_file_ids:\n                # --- Protection 2: per-file queued guard ---\n                queued_key = _user_file_queued_key(user_file_id)\n                guard_set = redis_client.set(\n                    queued_key,\n                    1,\n                    ex=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,\n                    nx=True,\n                )\n                if not guard_set:\n                    skipped_guard += 1\n                    continue\n\n                # --- Protection 3: task expiry ---\n                # If task submission fails, clear the guard immediately so the\n                # next beat cycle can retry enqueuing this file.\n                try:\n                    self.app.send_task(\n                        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,\n                        kwargs={\n                            \"user_file_id\": str(user_file_id),\n                            \"tenant_id\": tenant_id,\n                        },\n                        queue=OnyxCeleryQueues.USER_FILE_PROCESSING,\n                        priority=OnyxCeleryPriority.HIGH,\n                        expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,\n                    )\n                except Exception:\n                    redis_client.delete(queued_key)\n                    raise\n                enqueued += 1\n\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} tasks for tenant={tenant_id}\"\n    )\n    return None\n\n\ndef _process_user_file_without_vector_db(\n    uf: UserFile,\n    documents: list[Document],\n    db_session: Session,\n) -> None:\n    \"\"\"Process a user file when the vector DB is disabled.\n\n    Extracts raw text and computes a token count, stores the plaintext in\n    the file store, and marks the file as COMPLETED.  Skips embedding and\n    the indexing pipeline entirely.\n    \"\"\"\n    from onyx.llm.factory import get_default_llm\n    from onyx.llm.factory import get_llm_tokenizer_encode_func\n\n    # Combine section text from all document sections\n    combined_text = \" \".join(\n        section.text for doc in documents for section in doc.sections if section.text\n    )\n\n    # Compute token count using the user's default LLM tokenizer\n    try:\n        llm = get_default_llm()\n        encode = get_llm_tokenizer_encode_func(llm)\n        token_count: int | None = len(encode(combined_text))\n    except Exception:\n        task_logger.warning(\n            f\"_process_user_file_without_vector_db - Failed to compute token count for {uf.id}, falling back to None\"\n        )\n        token_count = None\n\n    # Persist plaintext for fast FileReaderTool loads\n    store_user_file_plaintext(\n        user_file_id=uf.id,\n        plaintext_content=combined_text,\n    )\n\n    # Update the DB record\n    if uf.status != UserFileStatus.DELETING:\n        uf.status = UserFileStatus.COMPLETED\n    uf.token_count = token_count\n    uf.chunk_count = 0  # no chunks without vector DB\n    uf.last_project_sync_at = datetime.datetime.now(datetime.timezone.utc)\n    db_session.add(uf)\n    db_session.commit()\n\n    task_logger.info(\n        f\"_process_user_file_without_vector_db - Completed id={uf.id} tokens={token_count}\"\n    )\n\n\ndef _process_user_file_with_indexing(\n    uf: UserFile,\n    user_file_id: str,\n    documents: list[Document],\n    tenant_id: str,\n    db_session: Session,\n) -> None:\n    \"\"\"Process a user file through the full indexing pipeline (vector DB path).\"\"\"\n    # 20 is the documented default for httpx max_keepalive_connections\n    if MANAGED_VESPA:\n        httpx_init_vespa_pool(\n            20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH\n        )\n    else:\n        httpx_init_vespa_pool(20)\n\n    search_settings_list = get_active_search_settings_list(db_session)\n    current_search_settings = next(\n        (ss for ss in search_settings_list if ss.status.is_current()),\n        None,\n    )\n    if current_search_settings is None:\n        raise RuntimeError(\n            f\"_process_user_file_with_indexing - No current search settings found for tenant={tenant_id}\"\n        )\n\n    adapter = UserFileIndexingAdapter(\n        tenant_id=tenant_id,\n        db_session=db_session,\n    )\n\n    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(\n        search_settings=current_search_settings,\n    )\n\n    document_indices = get_all_document_indices(\n        current_search_settings,\n        None,\n        httpx_client=HttpxPool.get(\"vespa\"),\n    )\n\n    index_pipeline_result = run_indexing_pipeline(\n        embedder=embedding_model,\n        document_indices=document_indices,\n        ignore_time_skip=True,\n        db_session=db_session,\n        tenant_id=tenant_id,\n        document_batch=documents,\n        request_id=None,\n        adapter=adapter,\n    )\n\n    task_logger.info(\n        f\"_process_user_file_with_indexing - Indexing pipeline completed ={index_pipeline_result}\"\n    )\n\n    if (\n        index_pipeline_result.failures\n        or index_pipeline_result.total_docs != len(documents)\n        or index_pipeline_result.total_chunks == 0\n    ):\n        task_logger.error(\n            f\"_process_user_file_with_indexing - Indexing pipeline failed id={user_file_id}\"\n        )\n        if uf.status != UserFileStatus.DELETING:\n            uf.status = UserFileStatus.FAILED\n            db_session.add(uf)\n            db_session.commit()\n        raise RuntimeError(f\"Indexing pipeline failed for user file {user_file_id}\")\n\n\ndef process_user_file_impl(\n    *, user_file_id: str, tenant_id: str, redis_locking: bool\n) -> None:\n    \"\"\"Core implementation for processing a single user file.\n\n    When redis_locking=True, acquires a per-file Redis lock and clears the\n    queued-key guard (Celery path).  When redis_locking=False, skips all Redis\n    operations (BackgroundTask path).\n    \"\"\"\n    task_logger.info(f\"process_user_file_impl - Starting id={user_file_id}\")\n    start = time.monotonic()\n\n    file_lock: RedisLock | None = None\n    if redis_locking:\n        redis_client = get_redis_client(tenant_id=tenant_id)\n        redis_client.delete(_user_file_queued_key(user_file_id))\n        file_lock = redis_client.lock(\n            _user_file_lock_key(user_file_id),\n            timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,\n        )\n        if file_lock is not None and not file_lock.acquire(blocking=False):\n            task_logger.info(\n                f\"process_user_file_impl - Lock held, skipping user_file_id={user_file_id}\"\n            )\n            return\n\n    documents: list[Document] = []\n    try:\n        with get_session_with_current_tenant() as db_session:\n            uf = db_session.get(UserFile, _as_uuid(user_file_id))\n            if not uf:\n                task_logger.warning(\n                    f\"process_user_file_impl - UserFile not found id={user_file_id}\"\n                )\n                return\n\n            if uf.status not in (\n                UserFileStatus.PROCESSING,\n                UserFileStatus.INDEXING,\n            ):\n                task_logger.info(\n                    f\"process_user_file_impl - Skipping id={user_file_id} status={uf.status}\"\n                )\n                return\n\n            connector = LocalFileConnector(\n                file_locations=[uf.file_id],\n                file_names=[uf.name] if uf.name else None,\n            )\n            connector.load_credentials({})\n\n            try:\n                for batch in connector.load_from_state():\n                    documents.extend(\n                        [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n                    )\n\n                for document in documents:\n                    document.id = str(user_file_id)\n                    document.source = DocumentSource.USER_FILE\n\n                if DISABLE_VECTOR_DB:\n                    _process_user_file_without_vector_db(\n                        uf=uf,\n                        documents=documents,\n                        db_session=db_session,\n                    )\n                else:\n                    _process_user_file_with_indexing(\n                        uf=uf,\n                        user_file_id=user_file_id,\n                        documents=documents,\n                        tenant_id=tenant_id,\n                        db_session=db_session,\n                    )\n\n            except Exception as e:\n                task_logger.exception(\n                    f\"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}\"\n                )\n                current_user_file = db_session.get(UserFile, _as_uuid(user_file_id))\n                if (\n                    current_user_file\n                    and current_user_file.status != UserFileStatus.DELETING\n                ):\n                    uf.status = UserFileStatus.FAILED\n                    db_session.add(uf)\n                    db_session.commit()\n                return\n\n        elapsed = time.monotonic() - start\n        task_logger.info(\n            f\"process_user_file_impl - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s\"\n        )\n    except Exception as e:\n        with get_session_with_current_tenant() as db_session:\n            uf = db_session.get(UserFile, _as_uuid(user_file_id))\n            if uf:\n                if uf.status != UserFileStatus.DELETING:\n                    uf.status = UserFileStatus.FAILED\n                db_session.add(uf)\n                db_session.commit()\n\n        task_logger.exception(\n            f\"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}\"\n        )\n        raise\n    finally:\n        if file_lock is not None and file_lock.owned():\n            file_lock.release()\n\n\n@shared_task(\n    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,\n    bind=True,\n    ignore_result=True,\n)\ndef process_single_user_file(\n    self: Task,  # noqa: ARG001\n    *,\n    user_file_id: str,\n    tenant_id: str,\n) -> None:\n    process_user_file_impl(\n        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True\n    )\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,\n    soft_time_limit=300,\n    bind=True,\n    ignore_result=True,\n)\ndef check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Scan for user files with DELETING status and enqueue per-file tasks.\n\n    Three mechanisms prevent queue runaway (mirrors check_user_file_processing):\n\n    1. **Queue depth backpressure** – if the broker queue already has more than\n       USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.\n\n    2. **Per-file queued guard** – before enqueuing a task we set a short-lived\n       Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES).  If that key\n       already exists the file already has a live task in the queue, so we skip\n       it.  The worker deletes the key the moment it picks up the task so the\n       next beat cycle can re-enqueue if the file is still DELETING.\n\n    3. **Task expiry** – every enqueued task carries an `expires` value equal to\n       CELERY_USER_FILE_DELETE_TASK_EXPIRES.  If a task is still sitting in\n       the queue after that deadline, Celery discards it without touching the DB.\n    \"\"\"\n    task_logger.info(\"check_for_user_file_delete - Starting\")\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.USER_FILE_DELETE_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n    if not lock.acquire(blocking=False):\n        return None\n\n    enqueued = 0\n    skipped_guard = 0\n    try:\n        # --- Protection 1: queue depth backpressure ---\n        # NOTE: must use the broker's Redis client (not redis_client) because\n        # Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.\n        r_celery = celery_get_broker_client(self.app)\n        queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)\n        if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:\n            task_logger.warning(\n                f\"check_for_user_file_delete - Queue depth {queue_len} exceeds \"\n                f\"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for \"\n                f\"tenant={tenant_id}\"\n            )\n            return None\n\n        with get_session_with_current_tenant() as db_session:\n            user_file_ids = (\n                db_session.execute(\n                    select(UserFile.id).where(\n                        UserFile.status == UserFileStatus.DELETING\n                    )\n                )\n                .scalars()\n                .all()\n            )\n            for user_file_id in user_file_ids:\n                # --- Protection 2: per-file queued guard ---\n                queued_key = _user_file_delete_queued_key(user_file_id)\n                guard_set = redis_client.set(\n                    queued_key,\n                    1,\n                    ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,\n                    nx=True,\n                )\n                if not guard_set:\n                    skipped_guard += 1\n                    continue\n\n                # --- Protection 3: task expiry ---\n                try:\n                    self.app.send_task(\n                        OnyxCeleryTask.DELETE_SINGLE_USER_FILE,\n                        kwargs={\n                            \"user_file_id\": str(user_file_id),\n                            \"tenant_id\": tenant_id,\n                        },\n                        queue=OnyxCeleryQueues.USER_FILE_DELETE,\n                        priority=OnyxCeleryPriority.HIGH,\n                        expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,\n                    )\n                except Exception:\n                    redis_client.delete(queued_key)\n                    raise\n                enqueued += 1\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}\"\n    )\n    return None\n\n\ndef delete_user_file_impl(\n    *, user_file_id: str, tenant_id: str, redis_locking: bool\n) -> None:\n    \"\"\"Core implementation for deleting a single user file.\n\n    When redis_locking=True, acquires a per-file Redis lock (Celery path).\n    When redis_locking=False, skips Redis operations (BackgroundTask path).\n    \"\"\"\n    task_logger.info(f\"delete_user_file_impl - Starting id={user_file_id}\")\n\n    file_lock: RedisLock | None = None\n    if redis_locking:\n        redis_client = get_redis_client(tenant_id=tenant_id)\n        # Clear the queued guard so the beat can re-enqueue if deletion fails\n        # and the file remains in DELETING status.\n        redis_client.delete(_user_file_delete_queued_key(user_file_id))\n        file_lock = redis_client.lock(\n            _user_file_delete_lock_key(user_file_id),\n            timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n        )\n        if file_lock is not None and not file_lock.acquire(blocking=False):\n            task_logger.info(\n                f\"delete_user_file_impl - Lock held, skipping user_file_id={user_file_id}\"\n            )\n            return\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            user_file = db_session.get(UserFile, _as_uuid(user_file_id))\n            if not user_file:\n                task_logger.info(\n                    f\"delete_user_file_impl - User file not found id={user_file_id}\"\n                )\n                return\n\n            if not DISABLE_VECTOR_DB:\n                if MANAGED_VESPA:\n                    httpx_init_vespa_pool(\n                        20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH\n                    )\n                else:\n                    httpx_init_vespa_pool(20)\n\n                active_search_settings = get_active_search_settings(db_session)\n                document_indices = get_all_document_indices(\n                    search_settings=active_search_settings.primary,\n                    secondary_search_settings=active_search_settings.secondary,\n                    httpx_client=HttpxPool.get(\"vespa\"),\n                )\n                retry_document_indices: list[RetryDocumentIndex] = [\n                    RetryDocumentIndex(document_index)\n                    for document_index in document_indices\n                ]\n                index_name = active_search_settings.primary.index_name\n                selection = f\"{index_name}.document_id=='{user_file_id}'\"\n\n                chunk_count = 0\n                if user_file.chunk_count is None or user_file.chunk_count == 0:\n                    chunk_count = _get_document_chunk_count(\n                        index_name=index_name,\n                        selection=selection,\n                    )\n                else:\n                    chunk_count = user_file.chunk_count\n\n                for retry_document_index in retry_document_indices:\n                    retry_document_index.delete_single(\n                        doc_id=user_file_id,\n                        tenant_id=tenant_id,\n                        chunk_count=chunk_count,\n                    )\n\n            file_store = get_default_file_store()\n            try:\n                file_store.delete_file(user_file.file_id)\n                file_store.delete_file(\n                    user_file_id_to_plaintext_file_name(user_file.id)\n                )\n            except Exception as e:\n                task_logger.exception(\n                    f\"delete_user_file_impl - Error deleting file id={user_file.id} - {e.__class__.__name__}\"\n                )\n\n            db_session.delete(user_file)\n            db_session.commit()\n            task_logger.info(f\"delete_user_file_impl - Completed id={user_file_id}\")\n    except Exception as e:\n        task_logger.exception(\n            f\"delete_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}\"\n        )\n        raise\n    finally:\n        if file_lock is not None and file_lock.owned():\n            file_lock.release()\n\n\n@shared_task(\n    name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,\n    bind=True,\n    ignore_result=True,\n)\ndef process_single_user_file_delete(\n    self: Task,  # noqa: ARG001\n    *,\n    user_file_id: str,\n    tenant_id: str,\n) -> None:\n    delete_user_file_impl(\n        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True\n    )\n\n\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,\n    soft_time_limit=300,\n    bind=True,\n    ignore_result=True,\n)\ndef check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:\n    \"\"\"Scan for user files needing project sync and enqueue per-file tasks.\"\"\"\n    task_logger.info(\"Starting\")\n\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.USER_FILE_PROJECT_SYNC_BEAT_LOCK,\n        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n    )\n\n    if not lock.acquire(blocking=False):\n        return None\n\n    enqueued = 0\n    skipped_guard = 0\n    try:\n        queue_depth = get_user_file_project_sync_queue_depth(self.app)\n        if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:\n            task_logger.warning(\n                f\"Queue depth {queue_depth} exceeds \"\n                f\"{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}, skipping enqueue for tenant={tenant_id}\"\n            )\n            return None\n\n        with get_session_with_current_tenant() as db_session:\n            user_file_ids = (\n                db_session.execute(\n                    select(UserFile.id).where(\n                        sa.and_(\n                            sa.or_(\n                                UserFile.needs_project_sync.is_(True),\n                                UserFile.needs_persona_sync.is_(True),\n                            ),\n                            UserFile.status == UserFileStatus.COMPLETED,\n                        )\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n            for user_file_id in user_file_ids:\n                if not enqueue_user_file_project_sync_task(\n                    celery_app=self.app,\n                    redis_client=redis_client,\n                    user_file_id=user_file_id,\n                    tenant_id=tenant_id,\n                    priority=OnyxCeleryPriority.HIGH,\n                ):\n                    skipped_guard += 1\n                    continue\n                enqueued += 1\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\n        f\"Enqueued {enqueued} Skipped guard {skipped_guard} tasks for tenant={tenant_id}\"\n    )\n    return None\n\n\ndef project_sync_user_file_impl(\n    *, user_file_id: str, tenant_id: str, redis_locking: bool\n) -> None:\n    \"\"\"Core implementation for syncing a user file's project/persona metadata.\n\n    When redis_locking=True, acquires a per-file Redis lock and clears the\n    queued-key guard (Celery path).  When redis_locking=False, skips Redis\n    operations (BackgroundTask path).\n    \"\"\"\n    task_logger.info(f\"project_sync_user_file_impl - Starting id={user_file_id}\")\n\n    file_lock: RedisLock | None = None\n    if redis_locking:\n        redis_client = get_redis_client(tenant_id=tenant_id)\n        redis_client.delete(_user_file_project_sync_queued_key(user_file_id))\n        file_lock = redis_client.lock(\n            user_file_project_sync_lock_key(user_file_id),\n            timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,\n        )\n        if file_lock is not None and not file_lock.acquire(blocking=False):\n            task_logger.info(\n                f\"project_sync_user_file_impl - Lock held, skipping user_file_id={user_file_id}\"\n            )\n            return\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            user_files = fetch_user_files_with_access_relationships(\n                [user_file_id],\n                db_session,\n                eager_load_groups=global_version.is_ee_version(),\n            )\n            user_file = user_files[0] if user_files else None\n            if not user_file:\n                task_logger.info(\n                    f\"project_sync_user_file_impl - User file not found id={user_file_id}\"\n                )\n                return\n\n            if not DISABLE_VECTOR_DB:\n                if MANAGED_VESPA:\n                    httpx_init_vespa_pool(\n                        20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH\n                    )\n                else:\n                    httpx_init_vespa_pool(20)\n\n                active_search_settings = get_active_search_settings(db_session)\n                document_indices = get_all_document_indices(\n                    search_settings=active_search_settings.primary,\n                    secondary_search_settings=active_search_settings.secondary,\n                    httpx_client=HttpxPool.get(\"vespa\"),\n                )\n                retry_document_indices: list[RetryDocumentIndex] = [\n                    RetryDocumentIndex(document_index)\n                    for document_index in document_indices\n                ]\n\n                project_ids = [project.id for project in user_file.projects]\n                persona_ids = [p.id for p in user_file.assistants if not p.deleted]\n\n                file_id_str = str(user_file.id)\n                access_map = build_access_for_user_files([user_file])\n                access = access_map.get(file_id_str)\n\n                for retry_document_index in retry_document_indices:\n                    retry_document_index.update_single(\n                        doc_id=file_id_str,\n                        tenant_id=tenant_id,\n                        chunk_count=user_file.chunk_count,\n                        fields=(\n                            VespaDocumentFields(access=access)\n                            if access is not None\n                            else None\n                        ),\n                        user_fields=VespaDocumentUserFields(\n                            user_projects=project_ids,\n                            personas=persona_ids,\n                        ),\n                    )\n\n            task_logger.info(\n                f\"project_sync_user_file_impl - User file id={user_file_id}\"\n            )\n\n            user_file.needs_project_sync = False\n            user_file.needs_persona_sync = False\n            user_file.last_project_sync_at = datetime.datetime.now(\n                datetime.timezone.utc\n            )\n            db_session.add(user_file)\n            db_session.commit()\n\n    except Exception as e:\n        task_logger.exception(\n            f\"project_sync_user_file_impl - Error syncing project for file id={user_file_id} - {e.__class__.__name__}\"\n        )\n        raise\n    finally:\n        if file_lock is not None and file_lock.owned():\n            file_lock.release()\n\n\n@shared_task(\n    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,\n    bind=True,\n    ignore_result=True,\n)\ndef process_single_user_file_project_sync(\n    self: Task,  # noqa: ARG001\n    *,\n    user_file_id: str,\n    tenant_id: str,\n) -> None:\n    project_sync_user_file_impl(\n        user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True\n    )\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/vespa/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/background/celery/tasks/vespa/document_sync.py",
    "content": "import time\nfrom typing import cast\nfrom uuid import uuid4\n\nfrom celery import Celery\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DB_YIELD_PER_DEFAULT\nfrom onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.db.document import construct_document_id_select_by_needs_sync\nfrom onyx.db.document import count_documents_by_needs_sync\nfrom onyx.utils.logger import setup_logger\n\n# Redis keys for document sync tracking\nDOCUMENT_SYNC_PREFIX = \"documentsync\"\nDOCUMENT_SYNC_FENCE_KEY = f\"{DOCUMENT_SYNC_PREFIX}_fence\"\nDOCUMENT_SYNC_TASKSET_KEY = f\"{DOCUMENT_SYNC_PREFIX}_taskset\"\nFENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\nTASKSET_TTL = FENCE_TTL\n\nlogger = setup_logger()\n\n\ndef is_document_sync_fenced(r: Redis) -> bool:\n    \"\"\"Check if document sync tasks are currently in progress.\"\"\"\n    return bool(r.exists(DOCUMENT_SYNC_FENCE_KEY))\n\n\ndef get_document_sync_payload(r: Redis) -> int | None:\n    \"\"\"Get the initial number of tasks that were created.\"\"\"\n    bytes_result = r.get(DOCUMENT_SYNC_FENCE_KEY)\n    if bytes_result is None:\n        return None\n    return int(cast(int, bytes_result))\n\n\ndef get_document_sync_remaining(r: Redis) -> int:\n    \"\"\"Get the number of tasks still pending completion.\"\"\"\n    return cast(int, r.scard(DOCUMENT_SYNC_TASKSET_KEY))\n\n\ndef set_document_sync_fence(r: Redis, payload: int | None) -> None:\n    \"\"\"Set up the fence and register with active fences.\"\"\"\n    if payload is None:\n        r.srem(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)\n        r.delete(DOCUMENT_SYNC_FENCE_KEY)\n        return\n\n    r.set(DOCUMENT_SYNC_FENCE_KEY, payload, ex=FENCE_TTL)\n    r.sadd(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)\n\n\ndef delete_document_sync_taskset(r: Redis) -> None:\n    \"\"\"Clear the document sync taskset.\"\"\"\n    r.delete(DOCUMENT_SYNC_TASKSET_KEY)\n\n\ndef reset_document_sync(r: Redis) -> None:\n    \"\"\"Reset all document sync tracking data.\"\"\"\n    r.srem(OnyxRedisConstants.ACTIVE_FENCES, DOCUMENT_SYNC_FENCE_KEY)\n    r.delete(DOCUMENT_SYNC_TASKSET_KEY)\n    r.delete(DOCUMENT_SYNC_FENCE_KEY)\n\n\ndef generate_document_sync_tasks(\n    r: Redis,\n    max_tasks: int,\n    celery_app: Celery,\n    db_session: Session,\n    lock: RedisLock,\n    tenant_id: str,\n) -> tuple[int, int]:\n    \"\"\"Generate sync tasks for all documents that need syncing.\n\n    Args:\n        r: Redis client\n        max_tasks: Maximum number of tasks to generate\n        celery_app: Celery application instance\n        db_session: Database session\n        lock: Redis lock for coordination\n        tenant_id: Tenant identifier\n\n    Returns:\n        tuple[int, int]: (tasks_generated, total_docs_found)\n    \"\"\"\n    last_lock_time = time.monotonic()\n    num_tasks_sent = 0\n    num_docs = 0\n\n    # Get all documents that need syncing\n    stmt = construct_document_id_select_by_needs_sync()\n\n    for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):\n        doc_id = cast(str, doc_id)\n        current_time = time.monotonic()\n\n        # Reacquire lock periodically to prevent timeout\n        if current_time - last_lock_time >= (CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4):\n            lock.reacquire()\n            last_lock_time = current_time\n\n        num_docs += 1\n\n        # Create a unique task ID\n        custom_task_id = f\"{DOCUMENT_SYNC_PREFIX}_{uuid4()}\"\n\n        # Add to the tracking taskset in Redis BEFORE creating the celery task\n        r.sadd(DOCUMENT_SYNC_TASKSET_KEY, custom_task_id)\n        r.expire(DOCUMENT_SYNC_TASKSET_KEY, TASKSET_TTL)\n\n        # Create the Celery task\n        celery_app.send_task(\n            OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,\n            kwargs=dict(document_id=doc_id, tenant_id=tenant_id),\n            queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,\n            task_id=custom_task_id,\n            priority=OnyxCeleryPriority.MEDIUM,\n            ignore_result=True,\n        )\n\n        num_tasks_sent += 1\n\n        if num_tasks_sent >= max_tasks:\n            break\n\n    return num_tasks_sent, num_docs\n\n\ndef try_generate_stale_document_sync_tasks(\n    celery_app: Celery,\n    max_tasks: int,\n    db_session: Session,\n    r: Redis,\n    lock_beat: RedisLock,\n    tenant_id: str,\n) -> int | None:\n    # the fence is up, do nothing\n    if is_document_sync_fenced(r):\n        return None\n\n    # add tasks to celery and build up the task set to monitor in redis\n    stale_doc_count = count_documents_by_needs_sync(db_session)\n    if stale_doc_count == 0:\n        logger.info(\"No stale documents found. Skipping sync tasks generation.\")\n        return None\n\n    logger.info(\n        f\"Stale documents found (at least {stale_doc_count}). Generating sync tasks in one batch.\"\n    )\n\n    logger.info(\"generate_document_sync_tasks starting for all documents.\")\n\n    # Generate all tasks in one pass\n    result = generate_document_sync_tasks(\n        r, max_tasks, celery_app, db_session, lock_beat, tenant_id\n    )\n\n    if result is None:\n        return None\n\n    tasks_generated, total_docs = result\n\n    if tasks_generated >= max_tasks:\n        logger.info(\n            f\"generate_document_sync_tasks reached the task generation limit: \"\n            f\"tasks_generated={tasks_generated} max_tasks={max_tasks}\"\n        )\n    else:\n        logger.info(\n            f\"generate_document_sync_tasks finished for all documents. \"\n            f\"tasks_generated={tasks_generated} total_docs_found={total_docs}\"\n        )\n\n    set_document_sync_fence(r, tasks_generated)\n    return tasks_generated\n"
  },
  {
    "path": "backend/onyx/background/celery/tasks/vespa/tasks.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom http import HTTPStatus\nfrom typing import Any\nfrom typing import cast\n\nimport httpx\nfrom celery import Celery\nfrom celery import shared_task\nfrom celery import Task\nfrom celery.exceptions import SoftTimeLimitExceeded\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\nfrom tenacity import RetryError\n\nfrom onyx.access.access import get_access_for_document\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex\nfrom onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT\nfrom onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT\nfrom onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus\nfrom onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_FENCE_KEY\nfrom onyx.background.celery.tasks.vespa.document_sync import get_document_sync_payload\nfrom onyx.background.celery.tasks.vespa.document_sync import get_document_sync_remaining\nfrom onyx.background.celery.tasks.vespa.document_sync import reset_document_sync\nfrom onyx.background.celery.tasks.vespa.document_sync import (\n    try_generate_stale_document_sync_tasks,\n)\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.configs.app_configs import VESPA_SYNC_MAX_TASKS\nfrom onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.document import get_document\nfrom onyx.db.document import mark_document_as_synced\nfrom onyx.db.document_set import delete_document_set\nfrom onyx.db.document_set import fetch_document_sets\nfrom onyx.db.document_set import fetch_document_sets_for_document\nfrom onyx.db.document_set import get_document_set_by_id\nfrom onyx.db.document_set import mark_document_set_as_synced\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import UserGroup\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.sync_record import cleanup_sync_records\nfrom onyx.db.sync_record import insert_sync_record\nfrom onyx.db.sync_record import update_sync_record_status\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.httpx.httpx_pool import HttpxPool\nfrom onyx.redis.redis_document_set import RedisDocumentSet\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.redis.redis_pool import redis_lock_dump\nfrom onyx.redis.redis_usergroup import RedisUserGroup\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom onyx.utils.variable_functionality import global_version\nfrom onyx.utils.variable_functionality import noop_fallback\n\nlogger = setup_logger()\n\n\n# celery auto associates tasks created inside another task,\n# which bloats the result metadata considerably. trail=False prevents this.\n# TODO(andrei): Rename all these kinds of functions from *vespa* to a more\n# generic *document_index*.\n@shared_task(\n    name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,\n    ignore_result=True,\n    soft_time_limit=JOB_TIMEOUT,\n    trail=False,\n    bind=True,\n)\ndef check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:\n    \"\"\"Runs periodically to check if any document needs syncing.\n    Generates sets of tasks for Celery if syncing is needed.\"\"\"\n\n    # Useful for debugging timing issues with reacquisitions.\n    # TODO: remove once more generalized logging is in place\n    task_logger.info(\"check_for_vespa_sync_task started\")\n\n    time_start = time.monotonic()\n\n    r = get_redis_client()\n    r_replica = get_redis_replica_client()\n\n    lock_beat: RedisLock = r.lock(\n        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,\n        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,\n    )\n\n    # these tasks should never overlap\n    if not lock_beat.acquire(blocking=False):\n        return None\n\n    try:\n        # 1/3: KICKOFF\n        with get_session_with_current_tenant() as db_session:\n            try_generate_stale_document_sync_tasks(\n                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id\n            )\n\n        # region document set scan\n        lock_beat.reacquire()\n        document_set_ids: list[int] = []\n        with get_session_with_current_tenant() as db_session:\n            # check if any document sets are not synced\n            document_set_info = fetch_document_sets(\n                user_id=None, db_session=db_session, include_outdated=True\n            )\n\n            for document_set, _ in document_set_info:\n                document_set_ids.append(document_set.id)\n\n        for document_set_id in document_set_ids:\n            lock_beat.reacquire()\n            with get_session_with_current_tenant() as db_session:\n                try_generate_document_set_sync_tasks(\n                    self.app, document_set_id, db_session, r, lock_beat, tenant_id\n                )\n        # endregion\n\n        # check if any user groups are not synced\n        lock_beat.reacquire()\n        if global_version.is_ee_version():\n            try:\n                fetch_user_groups = fetch_versioned_implementation(\n                    \"onyx.db.user_group\", \"fetch_user_groups\"\n                )\n            except ModuleNotFoundError:\n                # Always exceptions on the MIT version, which is expected\n                # We shouldn't actually get here if the ee version check works\n                pass\n            else:\n                usergroup_ids: list[int] = []\n                with get_session_with_current_tenant() as db_session:\n                    user_groups = fetch_user_groups(\n                        db_session=db_session, only_up_to_date=False\n                    )\n\n                    for usergroup in user_groups:\n                        usergroup_ids.append(usergroup.id)\n\n                for usergroup_id in usergroup_ids:\n                    lock_beat.reacquire()\n                    with get_session_with_current_tenant() as db_session:\n                        try_generate_user_group_sync_tasks(\n                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id\n                        )\n\n        # 2/3: VALIDATE: TODO\n\n        # 3/3: FINALIZE\n        lock_beat.reacquire()\n        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))\n        for key in keys:\n            key_bytes = cast(bytes, key)\n\n            if not r.exists(key_bytes):\n                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)\n                continue\n\n            key_str = key_bytes.decode(\"utf-8\")\n            # NOTE: removing the \"Redis*\" classes, prefer to just have functions to\n            # do these things going forward. In short, things should generally be like the doc\n            # sync task rather than the others\n            if key_str == DOCUMENT_SYNC_FENCE_KEY:\n                monitor_document_sync_taskset(r)\n            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):\n                with get_session_with_current_tenant() as db_session:\n                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)\n            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):\n                monitor_usergroup_taskset = (\n                    fetch_versioned_implementation_with_fallback(\n                        \"onyx.background.celery.tasks.vespa.tasks\",\n                        \"monitor_usergroup_taskset\",\n                        noop_fallback,\n                    )\n                )\n                with get_session_with_current_tenant() as db_session:\n                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)\n\n    except SoftTimeLimitExceeded:\n        task_logger.info(\n            \"Soft time limit exceeded, task is being terminated gracefully.\"\n        )\n    except Exception:\n        task_logger.exception(\"Unexpected exception during vespa metadata sync\")\n    finally:\n        if lock_beat.owned():\n            lock_beat.release()\n        else:\n            task_logger.error(\n                f\"check_for_vespa_sync_task - Lock not owned on completion: tenant={tenant_id}\"\n            )\n            redis_lock_dump(lock_beat, r)\n\n    time_elapsed = time.monotonic() - time_start\n    task_logger.debug(f\"check_for_vespa_sync_task finished: elapsed={time_elapsed:.2f}\")\n    return True\n\n\ndef try_generate_document_set_sync_tasks(\n    celery_app: Celery,\n    document_set_id: int,\n    db_session: Session,\n    r: Redis,\n    lock_beat: RedisLock,\n    tenant_id: str,\n) -> int | None:\n    lock_beat.reacquire()\n\n    rds = RedisDocumentSet(tenant_id, document_set_id)\n\n    # don't generate document set sync tasks if tasks are still pending\n    if rds.fenced:\n        return None\n\n    # don't generate sync tasks if we're up to date\n    # race condition with the monitor/cleanup function if we use a cached result!\n    document_set = get_document_set_by_id(\n        db_session=db_session,\n        document_set_id=document_set_id,\n    )\n    if not document_set:\n        return None\n\n    if document_set.is_up_to_date:\n        # there should be no in-progress sync records if this is up to date\n        # clean it up just in case things got into a bad state\n        cleanup_sync_records(\n            db_session=db_session,\n            entity_id=document_set_id,\n            sync_type=SyncType.DOCUMENT_SET,\n        )\n        return None\n\n    # add tasks to celery and build up the task set to monitor in redis\n    r.delete(rds.taskset_key)\n\n    task_logger.info(\n        f\"RedisDocumentSet.generate_tasks starting. document_set_id={document_set.id}\"\n    )\n\n    # Add all documents that need to be updated into the queue\n    result = rds.generate_tasks(\n        VESPA_SYNC_MAX_TASKS, celery_app, db_session, r, lock_beat, tenant_id\n    )\n    if result is None:\n        return None\n\n    tasks_generated = result[0]\n    # Currently we are allowing the sync to proceed with 0 tasks.\n    # It's possible for sets/groups to be generated initially with no entries\n    # and they still need to be marked as up to date.\n    # if tasks_generated == 0:\n    #     return 0\n\n    task_logger.info(\n        f\"RedisDocumentSet.generate_tasks finished. document_set={document_set.id} tasks_generated={tasks_generated}\"\n    )\n\n    # create before setting fence to avoid race condition where the monitoring\n    # task updates the sync record before it is created\n    try:\n        insert_sync_record(\n            db_session=db_session,\n            entity_id=document_set_id,\n            sync_type=SyncType.DOCUMENT_SET,\n        )\n    except Exception:\n        task_logger.exception(\"insert_sync_record exceptioned.\")\n\n    # set this only after all tasks have been added\n    rds.set_fence(tasks_generated)\n    return tasks_generated\n\n\ndef try_generate_user_group_sync_tasks(\n    celery_app: Celery,\n    usergroup_id: int,\n    db_session: Session,\n    r: Redis,\n    lock_beat: RedisLock,\n    tenant_id: str,\n) -> int | None:\n    lock_beat.reacquire()\n\n    rug = RedisUserGroup(tenant_id, usergroup_id)\n    if rug.fenced:\n        # don't generate sync tasks if tasks are still pending\n        return None\n\n    # race condition with the monitor/cleanup function if we use a cached result!\n    fetch_user_group = cast(\n        Callable[[Session, int], UserGroup | None],\n        fetch_versioned_implementation(\"onyx.db.user_group\", \"fetch_user_group\"),\n    )\n\n    usergroup = fetch_user_group(db_session, usergroup_id)\n    if not usergroup:\n        return None\n\n    if usergroup.is_up_to_date:\n        # there should be no in-progress sync records if this is up to date\n        # clean it up just in case things got into a bad state\n        cleanup_sync_records(\n            db_session=db_session,\n            entity_id=usergroup_id,\n            sync_type=SyncType.USER_GROUP,\n        )\n        return None\n\n    # add tasks to celery and build up the task set to monitor in redis\n    r.delete(rug.taskset_key)\n\n    # Add all documents that need to be updated into the queue\n    task_logger.info(\n        f\"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}\"\n    )\n    result = rug.generate_tasks(\n        VESPA_SYNC_MAX_TASKS, celery_app, db_session, r, lock_beat, tenant_id\n    )\n    if result is None:\n        return None\n\n    tasks_generated = result[0]\n    # Currently we are allowing the sync to proceed with 0 tasks.\n    # It's possible for sets/groups to be generated initially with no entries\n    # and they still need to be marked as up to date.\n    # if tasks_generated == 0:\n    #     return 0\n\n    task_logger.info(\n        f\"RedisUserGroup.generate_tasks finished. usergroup={usergroup.id} tasks_generated={tasks_generated}\"\n    )\n\n    # create before setting fence to avoid race condition where the monitoring\n    # task updates the sync record before it is created\n    try:\n        insert_sync_record(\n            db_session=db_session,\n            entity_id=usergroup_id,\n            sync_type=SyncType.USER_GROUP,\n        )\n    except Exception:\n        task_logger.exception(\"insert_sync_record exceptioned.\")\n\n    # set this only after all tasks have been added\n    rug.set_fence(tasks_generated)\n\n    return tasks_generated\n\n\ndef monitor_document_sync_taskset(r: Redis) -> None:\n    initial_count = get_document_sync_payload(r)\n    if initial_count is None:\n        return\n\n    remaining = get_document_sync_remaining(r)\n    task_logger.info(\n        f\"Document sync progress: remaining={remaining} initial={initial_count}\"\n    )\n    if remaining == 0:\n        reset_document_sync(r)\n        task_logger.info(f\"Successfully synced all documents. count={initial_count}\")\n\n\ndef monitor_document_set_taskset(\n    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session\n) -> None:\n    fence_key = key_bytes.decode(\"utf-8\")\n    document_set_id_str = RedisDocumentSet.get_id_from_fence_key(fence_key)\n    if document_set_id_str is None:\n        task_logger.warning(f\"could not parse document set id from {fence_key}\")\n        return\n\n    document_set_id = int(document_set_id_str)\n\n    rds = RedisDocumentSet(tenant_id, document_set_id)\n    if not rds.fenced:\n        return\n\n    initial_count = rds.payload\n    if initial_count is None:\n        return\n\n    count = cast(int, r.scard(rds.taskset_key))\n    task_logger.info(\n        f\"Document set sync progress: document_set={document_set_id} remaining={count} initial={initial_count}\"\n    )\n    if count > 0:\n        update_sync_record_status(\n            db_session=db_session,\n            entity_id=document_set_id,\n            sync_type=SyncType.DOCUMENT_SET,\n            sync_status=SyncStatus.IN_PROGRESS,\n            num_docs_synced=count,\n        )\n        return\n\n    document_set = cast(\n        DocumentSet,\n        get_document_set_by_id(db_session=db_session, document_set_id=document_set_id),\n    )  # casting since we \"know\" a document set with this ID exists\n    if document_set:\n        has_connector_pairs = bool(document_set.connector_credential_pairs)\n        # Federated connectors should keep a document set alive even without cc pairs.\n        has_federated_connectors = bool(\n            getattr(document_set, \"federated_connectors\", [])\n        )\n\n        if not has_connector_pairs and not has_federated_connectors:\n            # If there are no connectors of any kind, delete the document set.\n            delete_document_set(document_set_row=document_set, db_session=db_session)\n            task_logger.info(\n                f\"Successfully deleted document set: document_set={document_set_id}\"\n            )\n        else:\n            mark_document_set_as_synced(document_set_id, db_session)\n            task_logger.info(\n                f\"Successfully synced document set: document_set={document_set_id}\"\n            )\n\n        try:\n            update_sync_record_status(\n                db_session=db_session,\n                entity_id=document_set_id,\n                sync_type=SyncType.DOCUMENT_SET,\n                sync_status=SyncStatus.SUCCESS,\n                num_docs_synced=initial_count,\n            )\n        except Exception:\n            task_logger.exception(\n                f\"update_sync_record_status exceptioned. document_set_id={document_set_id} Resetting document set regardless.\"\n            )\n\n    rds.reset()\n\n\n@shared_task(\n    name=OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,\n    bind=True,\n    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,\n    time_limit=LIGHT_TIME_LIMIT,\n    max_retries=3,\n)\ndef vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) -> bool:\n    start = time.monotonic()\n\n    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            active_search_settings = get_active_search_settings(db_session)\n            # This flow is for updates so we get all indices.\n            document_indices = get_all_document_indices(\n                search_settings=active_search_settings.primary,\n                secondary_search_settings=active_search_settings.secondary,\n                httpx_client=HttpxPool.get(\"vespa\"),\n            )\n\n            retry_document_indices: list[RetryDocumentIndex] = [\n                RetryDocumentIndex(document_index)\n                for document_index in document_indices\n            ]\n\n            doc = get_document(document_id, db_session)\n            if not doc:\n                elapsed = time.monotonic() - start\n                task_logger.info(\n                    f\"doc={document_id} action=no_operation elapsed={elapsed:.2f}\"\n                )\n                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED\n            else:\n                # document set sync\n                doc_sets = fetch_document_sets_for_document(document_id, db_session)\n                update_doc_sets: set[str] = set(doc_sets)\n\n                # User group sync\n                doc_access = get_access_for_document(\n                    document_id=document_id, db_session=db_session\n                )\n\n                fields = VespaDocumentFields(\n                    document_sets=update_doc_sets,\n                    access=doc_access,\n                    boost=doc.boost,\n                    hidden=doc.hidden,\n                    # aggregated_boost_factor=doc.aggregated_boost_factor,\n                )\n\n                for retry_document_index in retry_document_indices:\n                    # TODO(andrei): Previously there was a comment here saying\n                    # it was ok if a doc did not exist in the document index. I\n                    # don't agree with that claim, so keep an eye on this task\n                    # to see if this raises.\n                    retry_document_index.update_single(\n                        document_id,\n                        tenant_id=tenant_id,\n                        chunk_count=doc.chunk_count,\n                        fields=fields,\n                        user_fields=None,\n                    )\n\n                # update db last. Worst case = we crash right before this and\n                # the sync might repeat again later\n                mark_document_as_synced(document_id, db_session)\n\n                elapsed = time.monotonic() - start\n                task_logger.info(f\"doc={document_id} action=sync elapsed={elapsed:.2f}\")\n                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED\n    except SoftTimeLimitExceeded:\n        task_logger.info(f\"SoftTimeLimitExceeded exception. doc={document_id}\")\n        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT\n    except Exception as ex:\n        e: Exception | None = None\n        while True:\n            if isinstance(ex, RetryError):\n                task_logger.warning(\n                    f\"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}\"\n                )\n\n                # only set the inner exception if it is of type Exception\n                e_temp = ex.last_attempt.exception()\n                if isinstance(e_temp, Exception):\n                    e = e_temp\n            else:\n                e = ex\n\n            if isinstance(e, httpx.HTTPStatusError):\n                if e.response.status_code == HTTPStatus.BAD_REQUEST:\n                    task_logger.exception(\n                        f\"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}\"\n                    )\n                completion_status = (\n                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION\n                )\n                break\n\n            task_logger.exception(\n                f\"vespa_metadata_sync_task exceptioned: doc={document_id}\"\n            )\n\n            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION\n            if (\n                self.max_retries is not None\n                and self.request.retries >= self.max_retries\n            ):\n                completion_status = (\n                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION\n                )\n\n            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64\n            countdown = 2 ** (self.request.retries + 4)\n            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception\n            break  # we won't hit this, but it looks weird not to have it\n    finally:\n        task_logger.info(\n            f\"vespa_metadata_sync_task completed: status={completion_status.value} doc={document_id}\"\n        )\n\n    return completion_status == OnyxCeleryTaskCompletionStatus.SUCCEEDED\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/beat.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.background.celery.apps.beat import celery_app\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\napp: Celery = celery_app\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/client.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\nThis code is different from the primary/beat stubs because there is no EE version to\nfetch. Port over the code in those files if we add an EE version of this worker.\n\nThis is an app stub purely for sending tasks as a client.\n\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\n\n\ndef get_app() -> Celery:\n    from onyx.background.celery.apps.client import celery_app\n\n    return celery_app\n\n\napp = get_app()\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/docfetching.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\nThis code is different from the primary/beat stubs because there is no EE version to\nfetch. Port over the code in those files if we add an EE version of this worker.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\n\n\ndef get_app() -> Celery:\n    from onyx.background.celery.apps.docfetching import celery_app\n\n    return celery_app\n\n\napp = get_app()\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/docprocessing.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\nThis code is different from the primary/beat stubs because there is no EE version to\nfetch. Port over the code in those files if we add an EE version of this worker.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\n\n\ndef get_app() -> Celery:\n    from onyx.background.celery.apps.docprocessing import celery_app\n\n    return celery_app\n\n\napp = get_app()\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/heavy.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\nThis code is different from the primary/beat stubs because there is no EE version to\nfetch. Port over the code in those files if we add an EE version of this worker.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\napp: Celery = fetch_versioned_implementation(\n    \"onyx.background.celery.apps.heavy\",\n    \"celery_app\",\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/light.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\nThis code is different from the primary/beat stubs because there is no EE version to\nfetch. Port over the code in those files if we add an EE version of this worker.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\napp: Celery = fetch_versioned_implementation(\n    \"onyx.background.celery.apps.light\",\n    \"celery_app\",\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/monitoring.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\napp: Celery = fetch_versioned_implementation(\n    \"onyx.background.celery.apps.monitoring\",\n    \"celery_app\",\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/primary.py",
    "content": "\"\"\"Factory stub for running celery worker / celery beat.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\napp: Celery = fetch_versioned_implementation(\n    \"onyx.background.celery.apps.primary\",\n    \"celery_app\",\n)\n"
  },
  {
    "path": "backend/onyx/background/celery/versioned_apps/user_file_processing.py",
    "content": "\"\"\"Factory stub for running the user file processing Celery worker.\"\"\"\n\nfrom celery import Celery\n\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\nset_is_ee_based_on_env_variable()\n\n\ndef get_app() -> Celery:\n    from onyx.background.celery.apps.user_file_processing import celery_app\n\n    return celery_app\n\n\napp = get_app()\n"
  },
  {
    "path": "backend/onyx/background/error_logging.py",
    "content": "from sqlalchemy.exc import IntegrityError\n\nfrom onyx.db.background_error import create_background_error\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n\ndef emit_background_error(\n    message: str,\n    cc_pair_id: int | None = None,\n) -> None:\n    \"\"\"Currently just saves a row in the background_errors table.\n\n    In the future, could create notifications based on the severity.\"\"\"\n    error_message = \"\"\n\n    # try to write to the db, but handle IntegrityError specifically\n    try:\n        with get_session_with_current_tenant() as db_session:\n            create_background_error(db_session, message, cc_pair_id)\n    except IntegrityError as e:\n        # Log an error if the cc_pair_id was deleted or any other exception occurs\n        error_message = (\n            f\"Failed to create background error: {str(e)}. Original message: {message}\"\n        )\n    except Exception:\n        pass\n\n    if not error_message:\n        return\n\n    # if we get here from an IntegrityError, try to write the error message to the db\n    # we need a new session because the first session is now invalid\n    try:\n        with get_session_with_current_tenant() as db_session:\n            create_background_error(db_session, error_message, None)\n    except Exception:\n        pass\n"
  },
  {
    "path": "backend/onyx/background/indexing/checkpointing_utils.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom io import BytesIO\n\nfrom sqlalchemy import and_\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import NUM_DAYS_TO_KEEP_CHECKPOINTS\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexingStatus\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.object_size_check import deep_getsizeof\n\nlogger = setup_logger()\n\n_NUM_RECENT_ATTEMPTS_TO_CONSIDER = 50\n\n\ndef _build_checkpoint_pointer(index_attempt_id: int) -> str:\n    return f\"checkpoint_{index_attempt_id}.json\"\n\n\ndef save_checkpoint(\n    db_session: Session, index_attempt_id: int, checkpoint: ConnectorCheckpoint\n) -> str:\n    \"\"\"Save a checkpoint for a given index attempt to the file store\"\"\"\n    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)\n\n    file_store = get_default_file_store()\n    file_store.save_file(\n        content=BytesIO(checkpoint.model_dump_json().encode()),\n        display_name=checkpoint_pointer,\n        file_origin=FileOrigin.INDEXING_CHECKPOINT,\n        file_type=\"application/json\",\n        file_id=checkpoint_pointer,\n    )\n\n    index_attempt = get_index_attempt(db_session, index_attempt_id)\n    if not index_attempt:\n        raise RuntimeError(f\"Index attempt {index_attempt_id} not found in DB.\")\n    index_attempt.checkpoint_pointer = checkpoint_pointer\n    db_session.add(index_attempt)\n    db_session.commit()\n    return checkpoint_pointer\n\n\ndef load_checkpoint(\n    index_attempt_id: int, connector: BaseConnector\n) -> ConnectorCheckpoint:\n    \"\"\"Load a checkpoint for a given index attempt from the file store\"\"\"\n    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)\n    file_store = get_default_file_store()\n    checkpoint_io = file_store.read_file(checkpoint_pointer, mode=\"rb\")\n    checkpoint_data = checkpoint_io.read().decode(\"utf-8\")\n    if isinstance(connector, CheckpointedConnector):\n        return connector.validate_checkpoint_json(checkpoint_data)\n    return ConnectorCheckpoint.model_validate_json(checkpoint_data)\n\n\ndef get_latest_valid_checkpoint(\n    db_session: Session,\n    cc_pair_id: int,\n    search_settings_id: int,\n    window_start: datetime,\n    window_end: datetime,\n    connector: BaseConnector,\n) -> tuple[ConnectorCheckpoint, bool]:\n    \"\"\"Get the latest valid checkpoint for a given connector credential pair\"\"\"\n    checkpoint_candidates = get_recent_completed_attempts_for_cc_pair(\n        cc_pair_id=cc_pair_id,\n        search_settings_id=search_settings_id,\n        db_session=db_session,\n        limit=_NUM_RECENT_ATTEMPTS_TO_CONSIDER,\n    )\n\n    # don't keep using checkpoints if we've had a bunch of failed attempts in a row\n    # where we make no progress. Only do this if we have had at least\n    # _NUM_RECENT_ATTEMPTS_TO_CONSIDER completed attempts.\n    if len(checkpoint_candidates) >= _NUM_RECENT_ATTEMPTS_TO_CONSIDER:\n        had_any_progress = False\n        for candidate in checkpoint_candidates:\n            if (\n                candidate.total_docs_indexed is not None\n                and candidate.total_docs_indexed > 0\n            ) or candidate.status.is_successful():\n                had_any_progress = True\n                break\n\n        if not had_any_progress:\n            logger.warning(\n                f\"{_NUM_RECENT_ATTEMPTS_TO_CONSIDER} consecutive failed attempts without progress \"\n                f\"found for cc_pair={cc_pair_id}. Ignoring checkpoint to let the run start \"\n                \"from scratch.\"\n            )\n            return connector.build_dummy_checkpoint(), False\n\n    # filter out any candidates that don't meet the criteria\n    checkpoint_candidates = [\n        candidate\n        for candidate in checkpoint_candidates\n        if (\n            candidate.poll_range_start == window_start\n            and candidate.poll_range_end == window_end\n            and (\n                candidate.status == IndexingStatus.FAILED\n                # if the background job was killed (and thus the attempt was canceled)\n                # we still want to use the checkpoint so that we can pick up where we left off\n                or candidate.status == IndexingStatus.CANCELED\n            )\n            and candidate.checkpoint_pointer is not None\n            # NOTE: There are a couple connectors that may make progress but not have\n            # any \"total_docs_indexed\". E.g. they are going through\n            # Slack channels, and tons of them don't have any updates.\n            # Leaving the below in as historical context / in-case we want to use it again.\n            # we want to make sure that the checkpoint is actually useful\n            # if it's only gone through a few docs, it's probably not worth\n            # using. This also avoids weird cases where a connector is basically\n            # non-functional but still \"makes progress\" by slowly moving the\n            # checkpoint forward run after run\n            # and candidate.total_docs_indexed\n            # and candidate.total_docs_indexed > 100\n        )\n    ]\n\n    # assumes latest checkpoint is the furthest along. This only isn't true\n    # if something else has gone wrong.\n    latest_valid_checkpoint_candidate = (\n        checkpoint_candidates[0] if checkpoint_candidates else None\n    )\n\n    checkpoint = connector.build_dummy_checkpoint()\n    if latest_valid_checkpoint_candidate is None:\n        logger.info(\n            f\"No valid checkpoint found for cc_pair={cc_pair_id}. Starting from scratch.\"\n        )\n        return checkpoint, False\n\n    try:\n        previous_checkpoint = load_checkpoint(\n            index_attempt_id=latest_valid_checkpoint_candidate.id,\n            connector=connector,\n        )\n    except Exception:\n        logger.exception(\n            f\"Failed to load checkpoint from previous failed attempt with ID \"\n            f\"{latest_valid_checkpoint_candidate.id}. Falling back to default checkpoint.\"\n        )\n        return checkpoint, False\n\n    logger.info(\n        f\"Using checkpoint from previous failed attempt with ID \"\n        f\"{latest_valid_checkpoint_candidate.id}. Previous checkpoint: \"\n        f\"{previous_checkpoint}\"\n    )\n    return previous_checkpoint, True\n\n\ndef get_index_attempts_with_old_checkpoints(\n    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_CHECKPOINTS\n) -> list[IndexAttempt]:\n    \"\"\"Get all index attempts with checkpoints older than the specified number of days.\n\n    Args:\n        db_session: The database session\n        days_to_keep: Number of days to keep checkpoints for (default: NUM_DAYS_TO_KEEP_CHECKPOINTS)\n\n    Returns:\n        List of IndexAttempt objects with old checkpoints\n    \"\"\"\n    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)\n\n    # Find all index attempts with checkpoints older than cutoff_date\n    old_attempts = (\n        db_session.query(IndexAttempt)\n        .filter(\n            and_(\n                IndexAttempt.checkpoint_pointer.isnot(None),\n                IndexAttempt.time_created < cutoff_date,\n            )\n        )\n        .all()\n    )\n\n    return old_attempts\n\n\ndef cleanup_checkpoint(db_session: Session, index_attempt_id: int) -> None:\n    \"\"\"Clean up a checkpoint for a given index attempt\"\"\"\n    index_attempt = get_index_attempt(db_session, index_attempt_id)\n    if not index_attempt:\n        raise RuntimeError(f\"Index attempt {index_attempt_id} not found in DB.\")\n\n    if not index_attempt.checkpoint_pointer:\n        return None\n\n    file_store = get_default_file_store()\n    file_store.delete_file(index_attempt.checkpoint_pointer)\n\n    index_attempt.checkpoint_pointer = None\n    db_session.add(index_attempt)\n    db_session.commit()\n\n    return None\n\n\ndef check_checkpoint_size(checkpoint: ConnectorCheckpoint) -> None:\n    \"\"\"Check if the checkpoint content size exceeds the limit (200MB)\"\"\"\n    content_size = deep_getsizeof(checkpoint.model_dump())\n    if content_size > 200_000_000:  # 200MB in bytes\n        raise ValueError(\n            f\"Checkpoint content size ({content_size} bytes) exceeds 200MB limit\"\n        )\n"
  },
  {
    "path": "backend/onyx/background/indexing/dask_utils.py",
    "content": "import asyncio\n\nimport psutil\nfrom dask.distributed import WorkerPlugin\nfrom distributed import Worker\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass ResourceLogger(WorkerPlugin):\n    def __init__(self, log_interval: int = 60 * 5):\n        self.log_interval = log_interval\n\n    def setup(self, worker: Worker) -> None:\n        \"\"\"This method will be called when the plugin is attached to a worker.\"\"\"\n        self.worker = worker\n        worker.loop.add_callback(self.log_resources)\n\n    async def log_resources(self) -> None:\n        \"\"\"Periodically log CPU and memory usage.\n\n        NOTE: must be async or else will clog up the worker indefinitely due to the fact that\n        Dask uses Tornado under the hood (which is async)\"\"\"\n        while True:\n            cpu_percent = psutil.cpu_percent(interval=None)\n            memory_available_gb = psutil.virtual_memory().available / (1024.0**3)\n            # You can now log these values or send them to a monitoring service\n            logger.debug(\n                f\"Worker {self.worker.address}: CPU usage {cpu_percent}%, Memory available {memory_available_gb}GB\"\n            )\n            await asyncio.sleep(self.log_interval)\n"
  },
  {
    "path": "backend/onyx/background/indexing/index_attempt_utils.py",
    "content": "from datetime import timedelta\n\nfrom sqlalchemy import func\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexAttemptError\n\n\n# Always retain at least this many attempts per connector/search settings pair\nNUM_RECENT_INDEX_ATTEMPTS_TO_KEEP = 10\n\n\ndef get_old_index_attempts(\n    db_session: Session, days_to_keep: int = NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS\n) -> list[IndexAttempt]:\n    \"\"\"\n    Get index attempts older than the specified number of days while retaining\n    the latest NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP per connector/search settings pair.\n    \"\"\"\n    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)\n    ranked_attempts = (\n        db_session.query(\n            IndexAttempt.id.label(\"attempt_id\"),\n            IndexAttempt.time_created.label(\"time_created\"),\n            func.row_number()\n            .over(\n                partition_by=(\n                    IndexAttempt.connector_credential_pair_id,\n                    IndexAttempt.search_settings_id,\n                ),\n                order_by=IndexAttempt.time_created.desc(),\n            )\n            .label(\"attempt_rank\"),\n        )\n    ).subquery()\n\n    return (\n        db_session.query(IndexAttempt)\n        .join(\n            ranked_attempts,\n            IndexAttempt.id == ranked_attempts.c.attempt_id,\n        )\n        .filter(\n            ranked_attempts.c.time_created < cutoff_date,\n            ranked_attempts.c.attempt_rank > NUM_RECENT_INDEX_ATTEMPTS_TO_KEEP,\n        )\n        .all()\n    )\n\n\ndef cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:\n    \"\"\"Clean up multiple index attempts\"\"\"\n    db_session.query(IndexAttemptError).filter(\n        IndexAttemptError.index_attempt_id.in_(index_attempt_ids)\n    ).delete(synchronize_session=False)\n\n    db_session.query(IndexAttempt).filter(\n        IndexAttempt.id.in_(index_attempt_ids)\n    ).delete(synchronize_session=False)\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/background/indexing/job_client.py",
    "content": "\"\"\"Custom client that works similarly to Dask, but simpler and more lightweight.\nDask jobs behaved very strangely - they would die all the time, retries would\nnot follow the expected behavior, etc.\n\nNOTE: cannot use Celery directly due to\nhttps://github.com/celery/celery/issues/7007#issuecomment-1740139367\"\"\"\n\nimport multiprocessing as mp\nimport sys\nimport traceback\nfrom collections.abc import Callable\nfrom dataclasses import dataclass\nfrom multiprocessing.context import SpawnProcess\nfrom typing import Any\nfrom typing import Literal\nfrom typing import Optional\n\nfrom onyx.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import TENANT_ID_PREFIX\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\nclass SimpleJobException(Exception):\n    \"\"\"lets us raise an exception that will return a specific error code\"\"\"\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        code: int | None = kwargs.pop(\"code\", None)\n        self.code = code\n        super().__init__(*args, **kwargs)\n\n\nJobStatusType = (\n    Literal[\"error\"]\n    | Literal[\"finished\"]\n    | Literal[\"pending\"]\n    | Literal[\"running\"]\n    | Literal[\"cancelled\"]\n)\n\n\ndef _initializer(\n    func: Callable,\n    queue: mp.Queue,\n    args: list | tuple,\n    kwargs: dict[str, Any] | None = None,\n) -> Any:\n    \"\"\"Initialize the child process with a fresh SQLAlchemy Engine.\n\n    Based on SQLAlchemy's recommendations to handle multiprocessing:\n    https://docs.sqlalchemy.org/en/20/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork\n    \"\"\"\n    if kwargs is None:\n        kwargs = {}\n\n    logger.info(\"Initializing spawned worker child process.\")\n    # 1. Get tenant_id from args or fallback to default\n    tenant_id = POSTGRES_DEFAULT_SCHEMA\n    for arg in reversed(args):\n        if isinstance(arg, str) and arg.startswith(TENANT_ID_PREFIX):\n            tenant_id = arg\n            break\n\n    # 2. Set the tenant context before running anything\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n    # Reset the engine in the child process\n    SqlEngine.reset_engine()\n\n    # Optionally set a custom app name for database logging purposes\n    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME)\n\n    # Initialize a new engine with desired parameters\n    SqlEngine.init_engine(\n        pool_size=4, max_overflow=12, pool_recycle=60, pool_pre_ping=True\n    )\n\n    # Proceed with executing the target function\n    try:\n        return func(*args, **kwargs)\n    except SimpleJobException as e:\n        logger.exception(\"SimpleJob raised a SimpleJobException\")\n        error_msg = traceback.format_exc()\n        queue.put(error_msg)  # Send the exception to the parent process\n\n        sys.exit(e.code)  # use the given exit code\n    except Exception:\n        logger.exception(\"SimpleJob raised an exception\")\n        error_msg = traceback.format_exc()\n        queue.put(error_msg)  # Send the exception to the parent process\n\n        sys.exit(255)  # use 255 to indicate a generic exception\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef _run_in_process(\n    func: Callable,\n    queue: mp.Queue,\n    args: list | tuple,\n    kwargs: dict[str, Any] | None = None,\n) -> None:\n    _initializer(func, queue, args, kwargs)\n\n\n@dataclass\nclass SimpleJob:\n    \"\"\"Drop in replacement for `dask.distributed.Future`\"\"\"\n\n    id: int\n    process: Optional[\"SpawnProcess\"] = None\n    queue: Optional[mp.Queue] = None\n    _exception: Optional[str] = None\n\n    def cancel(self) -> bool:\n        return self.release()\n\n    def release(self) -> bool:\n        if self.process is not None and self.process.is_alive():\n            self.process.terminate()\n            return True\n        return False\n\n    @property\n    def status(self) -> JobStatusType:\n        if not self.process:\n            return \"pending\"\n        elif self.process.is_alive():\n            return \"running\"\n        elif self.process.exitcode is None:\n            return \"cancelled\"\n        elif self.process.exitcode != 0:\n            return \"error\"\n        else:\n            return \"finished\"\n\n    def done(self) -> bool:\n        return (\n            self.status == \"finished\"\n            or self.status == \"cancelled\"\n            or self.status == \"error\"\n        )\n\n    def exception(self) -> str:\n        \"\"\"Needed to match the Dask API, but not implemented since we don't currently\n        have a way to get back the exception information from the child process.\"\"\"\n\n        \"\"\"Retrieve exception from the multiprocessing queue if available.\"\"\"\n        if self._exception is None and self.queue and not self.queue.empty():\n            self._exception = self.queue.get()  # Get exception from queue\n\n        return (\n            self._exception or f\"Job with ID '{self.id}' did not report an exception.\"\n        )\n\n\nclass SimpleJobClient:\n    \"\"\"Drop in replacement for `dask.distributed.Client`\"\"\"\n\n    def __init__(self, n_workers: int = 1) -> None:\n        self.n_workers = n_workers\n        self.job_id_counter = 0\n        self.jobs: dict[int, SimpleJob] = {}\n\n    def _cleanup_completed_jobs(self) -> None:\n        current_job_ids = list(self.jobs.keys())\n        for job_id in current_job_ids:\n            job = self.jobs.get(job_id)\n            if job and job.done():\n                logger.debug(f\"Cleaning up job with id: '{job.id}'\")\n                del self.jobs[job.id]\n\n    def submit(\n        self,\n        func: Callable,\n        *args: Any,\n        pure: bool = True,  # noqa: ARG002\n    ) -> SimpleJob | None:\n        \"\"\"NOTE: `pure` arg is needed so this can be a drop in replacement for Dask\"\"\"\n        self._cleanup_completed_jobs()\n        if len(self.jobs) >= self.n_workers:\n            logger.debug(\n                f\"No available workers to run job. Currently running '{len(self.jobs)}' jobs, with a limit of '{self.n_workers}'.\"\n            )\n            return None\n\n        job_id = self.job_id_counter\n        self.job_id_counter += 1\n\n        # this approach allows us to always \"spawn\" a new process regardless of\n        # get_start_method's current setting\n        ctx = mp.get_context(\"spawn\")\n        queue = ctx.Queue()\n        process = ctx.Process(\n            target=_run_in_process, args=(func, queue, args), daemon=True\n        )\n        job = SimpleJob(id=job_id, process=process, queue=queue)\n        process.start()\n\n        self.jobs[job_id] = job\n\n        return job\n"
  },
  {
    "path": "backend/onyx/background/indexing/memory_tracer.py",
    "content": "import tracemalloc\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nDANSWER_TRACEMALLOC_FRAMES = 10\n\n\nclass MemoryTracer:\n    def __init__(self, interval: int = 0, num_print_entries: int = 5):\n        self.interval = interval\n        self.num_print_entries = num_print_entries\n        self.snapshot_first: tracemalloc.Snapshot | None = None\n        self.snapshot_prev: tracemalloc.Snapshot | None = None\n        self.snapshot: tracemalloc.Snapshot | None = None\n        self.counter = 0\n\n    def start(self) -> None:\n        \"\"\"Start the memory tracer if interval is greater than 0.\"\"\"\n        if self.interval > 0:\n            logger.debug(f\"Memory tracer starting: interval={self.interval}\")\n            tracemalloc.start(DANSWER_TRACEMALLOC_FRAMES)\n            self._take_snapshot()\n\n    def stop(self) -> None:\n        \"\"\"Stop the memory tracer if it's running.\"\"\"\n        if self.interval > 0:\n            self.log_final_diff()\n            tracemalloc.stop()\n            logger.debug(\"Memory tracer stopped.\")\n\n    def _take_snapshot(self) -> None:\n        \"\"\"Take a snapshot and update internal snapshot states.\"\"\"\n        snapshot = tracemalloc.take_snapshot()\n        # Filter out irrelevant frames\n        snapshot = snapshot.filter_traces(\n            (\n                tracemalloc.Filter(False, tracemalloc.__file__),\n                tracemalloc.Filter(False, \"<frozen importlib._bootstrap>\"),\n                tracemalloc.Filter(False, \"<frozen importlib._bootstrap_external>\"),\n            )\n        )\n\n        if not self.snapshot_first:\n            self.snapshot_first = snapshot\n\n        if self.snapshot:\n            self.snapshot_prev = self.snapshot\n\n        self.snapshot = snapshot\n\n    def _log_diff(\n        self, current: tracemalloc.Snapshot, previous: tracemalloc.Snapshot\n    ) -> None:\n        \"\"\"Log the memory difference between two snapshots.\"\"\"\n        stats = current.compare_to(previous, \"traceback\")\n        for s in stats[: self.num_print_entries]:\n            logger.debug(f\"Tracer diff: {s}\")\n            for line in s.traceback.format():\n                logger.debug(f\"* {line}\")\n\n    def increment_and_maybe_trace(self) -> None:\n        \"\"\"Increment counter and perform trace if interval is hit.\"\"\"\n        if self.interval <= 0:\n            return\n\n        self.counter += 1\n        if self.counter % self.interval == 0:\n            logger.debug(\n                f\"Running trace comparison for batch {self.counter}. interval={self.interval}\"\n            )\n            self._take_snapshot()\n            if self.snapshot and self.snapshot_prev:\n                self._log_diff(self.snapshot, self.snapshot_prev)\n\n    def log_final_diff(self) -> None:\n        \"\"\"Log the final memory diff between start and end of indexing.\"\"\"\n        if self.interval <= 0:\n            return\n\n        logger.debug(\n            f\"Running trace comparison between start and end of indexing. {self.counter} batches processed.\"\n        )\n        self._take_snapshot()\n        if self.snapshot and self.snapshot_first:\n            self._log_diff(self.snapshot, self.snapshot_first)\n"
  },
  {
    "path": "backend/onyx/background/indexing/models.py",
    "content": "from datetime import datetime\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.models import IndexAttemptError\n\n\nclass IndexAttemptErrorPydantic(BaseModel):\n    id: int\n    connector_credential_pair_id: int\n\n    document_id: str | None\n    document_link: str | None\n\n    entity_id: str | None\n    failed_time_range_start: datetime | None\n    failed_time_range_end: datetime | None\n\n    failure_message: str\n    is_resolved: bool = False\n\n    time_created: datetime\n\n    index_attempt_id: int\n\n    @classmethod\n    def from_model(cls, model: IndexAttemptError) -> \"IndexAttemptErrorPydantic\":\n        return cls(\n            id=model.id,\n            connector_credential_pair_id=model.connector_credential_pair_id,\n            document_id=model.document_id,\n            document_link=model.document_link,\n            entity_id=model.entity_id,\n            failed_time_range_start=model.failed_time_range_start,\n            failed_time_range_end=model.failed_time_range_end,\n            failure_message=model.failure_message,\n            is_resolved=model.is_resolved,\n            time_created=model.time_created,\n            index_attempt_id=model.index_attempt_id,\n        )\n"
  },
  {
    "path": "backend/onyx/background/indexing/run_docfetching.py",
    "content": "import sys\nimport time\nimport traceback\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom celery import Celery\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.access import source_should_fetch_permissions_during_indexing\nfrom onyx.background.indexing.checkpointing_utils import check_checkpoint_size\nfrom onyx.background.indexing.checkpointing_utils import get_latest_valid_checkpoint\nfrom onyx.background.indexing.checkpointing_utils import save_checkpoint\nfrom onyx.background.indexing.memory_tracer import MemoryTracer\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD\nfrom onyx.configs.app_configs import INDEXING_TRACER_INTERVAL\nfrom onyx.configs.app_configs import INTEGRATION_TESTS_MODE\nfrom onyx.configs.app_configs import LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE\nfrom onyx.configs.app_configs import MAX_FILE_SIZE_BYTES\nfrom onyx.configs.app_configs import POLL_CONNECTOR_OFFSET\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.connectors.connector_runner import ConnectorRunner\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.factory import instantiate_connector\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorStopSignal\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.connector import mark_ccpair_with_indexing_trigger\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.connector_credential_pair import get_last_successful_attempt_poll_range_end\nfrom onyx.db.connector_credential_pair import update_connector_credential_pair\nfrom onyx.db.constants import CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX\nfrom onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import upsert_hierarchy_nodes_batch\nfrom onyx.db.index_attempt import create_index_attempt_error\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair\nfrom onyx.db.index_attempt import mark_attempt_canceled\nfrom onyx.db.index_attempt import mark_attempt_failed\nfrom onyx.db.index_attempt import transition_attempt_to_in_progress\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.db.models import IndexAttempt\nfrom onyx.file_store.document_batch_storage import DocumentBatchStorage\nfrom onyx.file_store.document_batch_storage import get_document_batch_storage\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.indexing.indexing_pipeline import index_doc_batch_prepare\nfrom onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch\nfrom onyx.redis.redis_hierarchy import ensure_source_node_exists\nfrom onyx.redis.redis_hierarchy import get_node_id_from_raw_id\nfrom onyx.redis.redis_hierarchy import get_source_node_id_from_cache\nfrom onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    get_persistent_document_writer,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.middleware import make_randomized_onyx_request_id\nfrom onyx.utils.postgres_sanitization import sanitize_document_for_postgres\nfrom onyx.utils.postgres_sanitization import sanitize_hierarchy_nodes_for_postgres\nfrom onyx.utils.variable_functionality import global_version\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR\n\nlogger = setup_logger(propagate=False)\n\nINDEXING_TRACER_NUM_PRINT_ENTRIES = 5\n\n\ndef _get_connector_runner(\n    db_session: Session,\n    attempt: IndexAttempt,\n    batch_size: int,\n    start_time: datetime,\n    end_time: datetime,\n    include_permissions: bool,\n    leave_connector_active: bool = LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE,\n) -> ConnectorRunner:\n    \"\"\"\n    NOTE: `start_time` and `end_time` are only used for poll connectors\n\n    Returns an iterator of document batches and whether the returned documents\n    are the complete list of existing documents of the connector. If the task\n    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.\n    \"\"\"\n\n    task = attempt.connector_credential_pair.connector.input_type\n\n    try:\n        runnable_connector = instantiate_connector(\n            db_session=db_session,\n            source=attempt.connector_credential_pair.connector.source,\n            input_type=task,\n            connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config,\n            credential=attempt.connector_credential_pair.credential,\n        )\n\n        # validate the connector settings\n        if not INTEGRATION_TESTS_MODE:\n            runnable_connector.validate_connector_settings()\n            if attempt.connector_credential_pair.access_type == AccessType.SYNC:\n                runnable_connector.validate_perm_sync()\n\n    except UnexpectedValidationError as e:\n        logger.exception(\n            \"Unable to instantiate connector due to an unexpected temporary issue.\"\n        )\n        raise e\n    except Exception as e:\n        logger.exception(\"Unable to instantiate connector. Pausing until fixed.\")\n        # since we failed to even instantiate the connector, we pause the CCPair since\n        # it will never succeed\n\n        # Sometimes there are cases where the connector will\n        # intermittently fail to initialize in which case we should pass in\n        # leave_connector_active=True to allow it to continue.\n        # For example, if there is nightly maintenance on a Confluence Server instance,\n        # the connector will fail to initialize every night.\n        if not leave_connector_active:\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=attempt.connector_credential_pair.id,\n            )\n            if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:\n                update_connector_credential_pair(\n                    db_session=db_session,\n                    connector_id=attempt.connector_credential_pair.connector.id,\n                    credential_id=attempt.connector_credential_pair.credential.id,\n                    status=ConnectorCredentialPairStatus.PAUSED,\n                )\n        raise e\n\n    return ConnectorRunner(\n        connector=runnable_connector,\n        batch_size=batch_size,\n        include_permissions=include_permissions,\n        time_range=(start_time, end_time),\n    )\n\n\ndef strip_null_characters(doc_batch: list[Document]) -> list[Document]:\n    cleaned_batch = []\n    for doc in doc_batch:\n        if sys.getsizeof(doc) > MAX_FILE_SIZE_BYTES:\n            logger.warning(\n                f\"doc {doc.id} too large, Document size: {sys.getsizeof(doc)}\"\n            )\n        cleaned_batch.append(sanitize_document_for_postgres(doc))\n\n    return cleaned_batch\n\n\ndef _check_connector_and_attempt_status(\n    db_session_temp: Session,\n    cc_pair_id: int,\n    search_settings_status: IndexModelStatus,\n    index_attempt_id: int,\n) -> None:\n    \"\"\"\n    Checks the status of the connector credential pair and index attempt.\n    Raises a RuntimeError if any conditions are not met.\n    \"\"\"\n    cc_pair_loop = get_connector_credential_pair_from_id(\n        db_session_temp,\n        cc_pair_id,\n    )\n    if not cc_pair_loop:\n        raise RuntimeError(f\"CC pair {cc_pair_id} not found in DB.\")\n\n    if (\n        cc_pair_loop.status == ConnectorCredentialPairStatus.PAUSED\n        and search_settings_status != IndexModelStatus.FUTURE\n    ) or cc_pair_loop.status == ConnectorCredentialPairStatus.DELETING:\n        raise ConnectorStopSignal(f\"Connector {cc_pair_loop.status.value.lower()}\")\n\n    index_attempt_loop = get_index_attempt(db_session_temp, index_attempt_id)\n    if not index_attempt_loop:\n        raise RuntimeError(f\"Index attempt {index_attempt_id} not found in DB.\")\n\n    if index_attempt_loop.status == IndexingStatus.CANCELED:\n        raise ConnectorStopSignal(f\"Index attempt {index_attempt_id} was canceled\")\n\n    if index_attempt_loop.status != IndexingStatus.IN_PROGRESS:\n        error_str = \"\"\n        if index_attempt_loop.error_msg:\n            error_str = f\" Original error: {index_attempt_loop.error_msg}\"\n\n        raise RuntimeError(\n            f\"Index Attempt is not running, status is {index_attempt_loop.status}.{error_str}\"\n        )\n\n    if index_attempt_loop.celery_task_id is None:\n        raise RuntimeError(f\"Index attempt {index_attempt_id} has no celery task id\")\n\n\n# TODO: delete from here if ends up unused\ndef _check_failure_threshold(\n    total_failures: int,\n    document_count: int,\n    batch_num: int,\n    last_failure: ConnectorFailure | None,\n) -> None:\n    \"\"\"Check if we've hit the failure threshold and raise an appropriate exception if so.\n\n    We consider the threshold hit if:\n    1. We have more than 3 failures AND\n    2. Failures account for more than 10% of processed documents\n    \"\"\"\n    failure_ratio = total_failures / (document_count or 1)\n\n    FAILURE_THRESHOLD = 3\n    FAILURE_RATIO_THRESHOLD = 0.1\n    if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:\n        logger.error(\n            f\"Connector run failed with '{total_failures}' errors after '{batch_num}' batches.\"\n        )\n        if last_failure and last_failure.exception:\n            raise last_failure.exception from last_failure.exception\n\n        raise RuntimeError(\n            f\"Connector run encountered too many errors, aborting. Last error: {last_failure}\"\n        )\n\n\ndef run_docfetching_entrypoint(\n    app: Celery,\n    index_attempt_id: int,\n    tenant_id: str,\n    connector_credential_pair_id: int,\n    is_ee: bool = False,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> None:\n    \"\"\"Don't swallow exceptions here ... propagate them up.\"\"\"\n\n    if is_ee:\n        global_version.set_ee()\n\n    # set the indexing attempt ID so that all log messages from this process\n    # will have it added as a prefix\n    token = INDEX_ATTEMPT_INFO_CONTEXTVAR.set(\n        (connector_credential_pair_id, index_attempt_id)\n    )\n    with get_session_with_current_tenant() as db_session:\n        attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)\n\n        tenant_str = \"\"\n        if MULTI_TENANT:\n            tenant_str = f\" for tenant {tenant_id}\"\n\n        connector_name = attempt.connector_credential_pair.connector.name\n        connector_config = (\n            attempt.connector_credential_pair.connector.connector_specific_config\n        )\n        credential_id = attempt.connector_credential_pair.credential_id\n\n    logger.info(\n        f\"Docfetching starting{tenant_str}: \"\n        f\"connector='{connector_name}' \"\n        f\"config='{connector_config}' \"\n        f\"credentials='{credential_id}'\"\n    )\n\n    connector_document_extraction(\n        app,\n        index_attempt_id,\n        attempt.connector_credential_pair_id,\n        attempt.search_settings_id,\n        tenant_id,\n        callback,\n    )\n\n    logger.info(\n        f\"Docfetching finished{tenant_str}: \"\n        f\"connector='{connector_name}' \"\n        f\"config='{connector_config}' \"\n        f\"credentials='{credential_id}'\"\n    )\n\n    INDEX_ATTEMPT_INFO_CONTEXTVAR.reset(token)\n\n\ndef connector_document_extraction(\n    app: Celery,\n    index_attempt_id: int,\n    cc_pair_id: int,\n    search_settings_id: int,\n    tenant_id: str,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> None:\n    \"\"\"Extract documents from connector and queue them for indexing pipeline processing.\n\n    This is the first part of the split indexing process that runs the connector\n    and extracts documents, storing them in the filestore for later processing.\n    \"\"\"\n\n    start_time = time.monotonic()\n\n    logger.info(\n        f\"Document extraction starting: \"\n        f\"attempt={index_attempt_id} \"\n        f\"cc_pair={cc_pair_id} \"\n        f\"search_settings={search_settings_id} \"\n        f\"tenant={tenant_id}\"\n    )\n\n    # Get batch storage (transition to IN_PROGRESS is handled by run_indexing_entrypoint)\n    batch_storage = get_document_batch_storage(cc_pair_id, index_attempt_id)\n\n    # Initialize memory tracer. NOTE: won't actually do anything if\n    # `INDEXING_TRACER_INTERVAL` is 0.\n    memory_tracer = MemoryTracer(interval=INDEXING_TRACER_INTERVAL)\n    memory_tracer.start()\n\n    index_attempt = None\n    last_batch_num = 0  # used to continue from checkpointing\n    # comes from _run_indexing\n    with get_session_with_current_tenant() as db_session:\n        index_attempt = get_index_attempt(\n            db_session,\n            index_attempt_id,\n            eager_load_cc_pair=True,\n            eager_load_search_settings=True,\n        )\n        if not index_attempt:\n            raise RuntimeError(f\"Index attempt {index_attempt_id} not found\")\n\n        if index_attempt.search_settings is None:\n            raise ValueError(\"Search settings must be set for indexing\")\n\n        # Clear the indexing trigger if it was set, to prevent duplicate indexing attempts\n        if index_attempt.connector_credential_pair.indexing_trigger is not None:\n            logger.info(\n                \"Clearing indexing trigger: \"\n                f\"cc_pair={index_attempt.connector_credential_pair.id} \"\n                f\"trigger={index_attempt.connector_credential_pair.indexing_trigger}\"\n            )\n            mark_ccpair_with_indexing_trigger(\n                index_attempt.connector_credential_pair.id, None, db_session\n            )\n\n        db_connector = index_attempt.connector_credential_pair.connector\n        db_credential = index_attempt.connector_credential_pair.credential\n        processing_mode = index_attempt.connector_credential_pair.processing_mode\n        is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT\n        is_connector_public = (\n            index_attempt.connector_credential_pair.access_type == AccessType.PUBLIC\n        )\n\n        from_beginning = index_attempt.from_beginning\n        has_successful_attempt = (\n            index_attempt.connector_credential_pair.last_successful_index_time\n            is not None\n        )\n        # Use higher priority for first-time indexing to ensure new connectors\n        # get processed before re-indexing of existing connectors\n        docprocessing_priority = (\n            OnyxCeleryPriority.MEDIUM\n            if has_successful_attempt\n            else OnyxCeleryPriority.HIGH\n        )\n\n        earliest_index_time = (\n            db_connector.indexing_start.timestamp()\n            if db_connector.indexing_start\n            else 0\n        )\n        should_fetch_permissions_during_indexing = (\n            index_attempt.connector_credential_pair.access_type == AccessType.SYNC\n            and source_should_fetch_permissions_during_indexing(db_connector.source)\n            and is_primary\n            # if we've already successfully indexed, let the doc_sync job\n            # take care of doc-level permissions\n            and (from_beginning or not has_successful_attempt)\n        )\n\n        # Set up time windows for polling\n        last_successful_index_poll_range_end = (\n            earliest_index_time\n            if from_beginning\n            else get_last_successful_attempt_poll_range_end(\n                cc_pair_id=cc_pair_id,\n                earliest_index=earliest_index_time,\n                search_settings=index_attempt.search_settings,\n                db_session=db_session,\n            )\n        )\n\n        if last_successful_index_poll_range_end > POLL_CONNECTOR_OFFSET:\n            window_start = datetime.fromtimestamp(\n                last_successful_index_poll_range_end, tz=timezone.utc\n            ) - timedelta(minutes=POLL_CONNECTOR_OFFSET)\n        else:\n            # don't go into \"negative\" time if we've never indexed before\n            window_start = datetime.fromtimestamp(0, tz=timezone.utc)\n\n        most_recent_attempt = next(\n            iter(\n                get_recent_completed_attempts_for_cc_pair(\n                    cc_pair_id=cc_pair_id,\n                    search_settings_id=index_attempt.search_settings_id,\n                    db_session=db_session,\n                    limit=1,\n                )\n            ),\n            None,\n        )\n\n        # if the last attempt failed, try and use the same window. This is necessary\n        # to ensure correctness with checkpointing. If we don't do this, things like\n        # new slack channels could be missed (since existing slack channels are\n        # cached as part of the checkpoint).\n        if (\n            most_recent_attempt\n            and most_recent_attempt.poll_range_end\n            and (\n                most_recent_attempt.status == IndexingStatus.FAILED\n                or most_recent_attempt.status == IndexingStatus.CANCELED\n            )\n        ):\n            window_end = most_recent_attempt.poll_range_end\n        else:\n            window_end = datetime.now(tz=timezone.utc)\n\n        # set time range in db\n        index_attempt.poll_range_start = window_start\n        index_attempt.poll_range_end = window_end\n        db_session.commit()\n\n        # TODO: maybe memory tracer here\n\n        # Set up connector runner\n        connector_runner = _get_connector_runner(\n            db_session=db_session,\n            attempt=index_attempt,\n            batch_size=INDEX_BATCH_SIZE,\n            start_time=window_start,\n            end_time=window_end,\n            include_permissions=should_fetch_permissions_during_indexing,\n        )\n\n        # don't use a checkpoint if we're explicitly indexing from\n        # the beginning in order to avoid weird interactions between\n        # checkpointing / failure handling\n        # OR\n        # if the last attempt was successful\n        if index_attempt.from_beginning or (\n            most_recent_attempt and most_recent_attempt.status.is_successful()\n        ):\n            logger.info(\n                f\"Cleaning up all old batches for index attempt {index_attempt_id} before starting new run\"\n            )\n            batch_storage.cleanup_all_batches()\n            checkpoint = connector_runner.connector.build_dummy_checkpoint()\n        else:\n            logger.info(\n                f\"Getting latest valid checkpoint for index attempt {index_attempt_id}\"\n            )\n            checkpoint, resuming_from_checkpoint = get_latest_valid_checkpoint(\n                db_session=db_session,\n                cc_pair_id=cc_pair_id,\n                search_settings_id=index_attempt.search_settings_id,\n                window_start=window_start,\n                window_end=window_end,\n                connector=connector_runner.connector,\n            )\n\n            # checkpoint resumption OR the connector already finished.\n            if (\n                isinstance(connector_runner.connector, CheckpointedConnector)\n                and resuming_from_checkpoint\n            ) or (\n                most_recent_attempt\n                and most_recent_attempt.total_batches is not None\n                and not checkpoint.has_more\n            ):\n                reissued_batch_count, completed_batches = reissue_old_batches(\n                    batch_storage,\n                    index_attempt_id,\n                    cc_pair_id,\n                    tenant_id,\n                    app,\n                    most_recent_attempt,\n                    docprocessing_priority,\n                )\n                last_batch_num = reissued_batch_count + completed_batches\n                index_attempt.completed_batches = completed_batches\n                db_session.commit()\n            else:\n                logger.info(\n                    f\"Cleaning up all batches for index attempt {index_attempt_id} before starting new run\"\n                )\n                # for non-checkpointed connectors, throw out batches from previous unsuccessful attempts\n                # because we'll be getting those documents again anyways.\n                batch_storage.cleanup_all_batches()\n\n        # Save initial checkpoint\n        save_checkpoint(\n            db_session=db_session,\n            index_attempt_id=index_attempt_id,\n            checkpoint=checkpoint,\n        )\n\n    try:\n        batch_num = last_batch_num  # starts at 0 if no last batch\n        total_doc_batches_queued = 0\n        total_failures = 0\n        document_count = 0\n\n        # Ensure the SOURCE-type root hierarchy node exists before processing.\n        # This is the root of the hierarchy tree for this source - all other\n        # hierarchy nodes should ultimately have this as an ancestor.\n        redis_client = get_redis_client(tenant_id=tenant_id)\n        with get_session_with_current_tenant() as db_session:\n            ensure_source_node_exists(redis_client, db_session, db_connector.source)\n\n        # Main extraction loop\n        while checkpoint.has_more:\n            logger.info(\n                f\"Running '{db_connector.source.value}' connector with checkpoint: {checkpoint}\"\n            )\n            for (\n                document_batch,\n                hierarchy_node_batch,\n                failure,\n                next_checkpoint,\n            ) in connector_runner.run(checkpoint):\n                # Check if connector is disabled mid run and stop if so unless it's the secondary\n                # index being built. We want to populate it even for paused connectors\n                # Often paused connectors are sources that aren't updated frequently but the\n                # contents still need to be initially pulled.\n                if callback and callback.should_stop():\n                    raise ConnectorStopSignal(\"Connector stop signal detected\")\n\n                # will exception if the connector/index attempt is marked as paused/failed\n                with get_session_with_current_tenant() as db_session_tmp:\n                    _check_connector_and_attempt_status(\n                        db_session_tmp,\n                        cc_pair_id,\n                        index_attempt.search_settings.status,\n                        index_attempt_id,\n                    )\n\n                # save record of any failures at the connector level\n                if failure is not None:\n                    total_failures += 1\n                    with get_session_with_current_tenant() as db_session:\n                        create_index_attempt_error(\n                            index_attempt_id,\n                            cc_pair_id,\n                            failure,\n                            db_session,\n                        )\n                    _check_failure_threshold(\n                        total_failures, document_count, batch_num, failure\n                    )\n\n                # Save checkpoint if provided\n                if next_checkpoint:\n                    checkpoint = next_checkpoint\n\n                # Process hierarchy nodes batch - upsert to Postgres and cache in Redis\n                if hierarchy_node_batch:\n                    hierarchy_node_batch_cleaned = (\n                        sanitize_hierarchy_nodes_for_postgres(hierarchy_node_batch)\n                    )\n                    with get_session_with_current_tenant() as db_session:\n                        upserted_nodes = upsert_hierarchy_nodes_batch(\n                            db_session=db_session,\n                            nodes=hierarchy_node_batch_cleaned,\n                            source=db_connector.source,\n                            commit=True,\n                            is_connector_public=is_connector_public,\n                        )\n\n                        upsert_hierarchy_node_cc_pair_entries(\n                            db_session=db_session,\n                            hierarchy_node_ids=[n.id for n in upserted_nodes],\n                            connector_id=db_connector.id,\n                            credential_id=db_credential.id,\n                            commit=True,\n                        )\n\n                        # Cache in Redis for fast ancestor resolution during doc processing\n                        redis_client = get_redis_client(tenant_id=tenant_id)\n                        cache_entries = [\n                            HierarchyNodeCacheEntry.from_db_model(node)\n                            for node in upserted_nodes\n                        ]\n                        cache_hierarchy_nodes_batch(\n                            redis_client=redis_client,\n                            source=db_connector.source,\n                            entries=cache_entries,\n                        )\n\n                    logger.debug(\n                        f\"Persisted and cached {len(hierarchy_node_batch_cleaned)} hierarchy nodes for attempt={index_attempt_id}\"\n                    )\n\n                # below is all document processing task, so if no batch we can just continue\n                if not document_batch:\n                    continue\n\n                # Clean documents and create batch\n                doc_batch_cleaned = strip_null_characters(document_batch)\n\n                # Resolve parent_hierarchy_raw_node_id to parent_hierarchy_node_id\n                # using the Redis cache (just populated from hierarchy nodes batch)\n                with get_session_with_current_tenant() as db_session_tmp:\n                    source_node_id = get_source_node_id_from_cache(\n                        redis_client, db_session_tmp, db_connector.source\n                    )\n                for doc in doc_batch_cleaned:\n                    if doc.parent_hierarchy_raw_node_id is not None:\n                        node_id, found = get_node_id_from_raw_id(\n                            redis_client,\n                            db_connector.source,\n                            doc.parent_hierarchy_raw_node_id,\n                        )\n                        doc.parent_hierarchy_node_id = (\n                            node_id if found else source_node_id\n                        )\n                    else:\n                        doc.parent_hierarchy_node_id = source_node_id\n\n                batch_description = []\n\n                for doc in doc_batch_cleaned:\n                    batch_description.append(doc.to_short_descriptor())\n\n                    doc_size = 0\n                    for section in doc.sections:\n                        if (\n                            isinstance(section, TextSection)\n                            and section.text is not None\n                        ):\n                            doc_size += len(section.text)\n\n                    if doc_size > INDEXING_SIZE_WARNING_THRESHOLD:\n                        logger.warning(\n                            f\"Document size: doc='{doc.to_short_descriptor()}' \"\n                            f\"size={doc_size} \"\n                            f\"threshold={INDEXING_SIZE_WARNING_THRESHOLD}\"\n                        )\n\n                logger.debug(f\"Indexing batch of documents: {batch_description}\")\n                memory_tracer.increment_and_maybe_trace()\n\n                if processing_mode == ProcessingMode.FILE_SYSTEM:\n                    # File system only - write directly to persistent storage,\n                    # skip chunking/embedding/Vespa but still track documents in DB\n\n                    # IMPORTANT: Write to S3 FIRST, before marking as indexed in DB.\n\n                    # Write documents to persistent file system\n                    # Use creator_id for user-segregated storage paths (sandbox isolation)\n                    creator_id = index_attempt.connector_credential_pair.creator_id\n                    if creator_id is None:\n                        raise ValueError(\n                            f\"ConnectorCredentialPair {index_attempt.connector_credential_pair.id} \"\n                            \"must have a creator_id for persistent document storage\"\n                        )\n                    user_id_str: str = str(creator_id)\n                    writer = get_persistent_document_writer(\n                        user_id=user_id_str,\n                        tenant_id=tenant_id,\n                    )\n                    written_paths = writer.write_documents(doc_batch_cleaned)\n\n                    # Only after successful S3 write, mark documents as indexed in DB\n                    with get_session_with_current_tenant() as db_session:\n                        # Create metadata for the batch\n                        index_attempt_metadata = IndexAttemptMetadata(\n                            attempt_id=index_attempt_id,\n                            connector_id=db_connector.id,\n                            credential_id=db_credential.id,\n                            request_id=make_randomized_onyx_request_id(\"FSI\"),\n                            structured_id=f\"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}\",\n                            batch_num=batch_num,\n                        )\n\n                        # Upsert documents to PostgreSQL (document table + cc_pair relationship)\n                        # This is a subset of what docprocessing does - just DB tracking, no chunking/embedding\n                        index_doc_batch_prepare(\n                            documents=doc_batch_cleaned,\n                            index_attempt_metadata=index_attempt_metadata,\n                            db_session=db_session,\n                            ignore_time_skip=True,  # Documents already filtered during extraction\n                        )\n\n                        # Mark documents as indexed for the CC pair\n                        mark_document_as_indexed_for_cc_pair__no_commit(\n                            connector_id=db_connector.id,\n                            credential_id=db_credential.id,\n                            document_ids=[doc.id for doc in doc_batch_cleaned],\n                            db_session=db_session,\n                        )\n                        db_session.commit()\n\n                    # Update coordination directly (no docprocessing task)\n                    with get_session_with_current_tenant() as db_session:\n                        IndexingCoordination.update_batch_completion_and_docs(\n                            db_session=db_session,\n                            index_attempt_id=index_attempt_id,\n                            total_docs_indexed=len(doc_batch_cleaned),\n                            new_docs_indexed=len(doc_batch_cleaned),\n                            total_chunks=0,  # No chunks for file system mode\n                        )\n\n                    batch_num += 1\n                    total_doc_batches_queued += 1\n\n                    logger.info(\n                        f\"Wrote documents to file system: \"\n                        f\"batch_num={batch_num} \"\n                        f\"docs={len(written_paths)} \"\n                        f\"attempt={index_attempt_id}\"\n                    )\n                else:\n                    # REGULAR mode (default): Full pipeline - store and queue docprocessing\n                    batch_storage.store_batch(batch_num, doc_batch_cleaned)\n\n                    # Create processing task data\n                    processing_batch_data = {\n                        \"index_attempt_id\": index_attempt_id,\n                        \"cc_pair_id\": cc_pair_id,\n                        \"tenant_id\": tenant_id,\n                        \"batch_num\": batch_num,  # 0-indexed\n                    }\n\n                    # Queue document processing task\n                    app.send_task(\n                        OnyxCeleryTask.DOCPROCESSING_TASK,\n                        kwargs=processing_batch_data,\n                        queue=OnyxCeleryQueues.DOCPROCESSING,\n                        priority=docprocessing_priority,\n                    )\n\n                    batch_num += 1\n                    total_doc_batches_queued += 1\n\n                    logger.info(\n                        f\"Queued document processing batch: \"\n                        f\"batch_num={batch_num} \"\n                        f\"docs={len(doc_batch_cleaned)} \"\n                        f\"attempt={index_attempt_id}\"\n                    )\n\n            # Check checkpoint size periodically\n            CHECKPOINT_SIZE_CHECK_INTERVAL = 100\n            if batch_num % CHECKPOINT_SIZE_CHECK_INTERVAL == 0:\n                check_checkpoint_size(checkpoint)\n\n            # Save latest checkpoint\n            # NOTE: checkpointing is used to track which batches have\n            # been sent to the filestore, NOT which batches have been fully indexed\n            # as it used to be.\n            with get_session_with_current_tenant() as db_session:\n                save_checkpoint(\n                    db_session=db_session,\n                    index_attempt_id=index_attempt_id,\n                    checkpoint=checkpoint,\n                )\n\n        elapsed_time = time.monotonic() - start_time\n\n        logger.info(\n            f\"Document extraction completed: \"\n            f\"attempt={index_attempt_id} \"\n            f\"batches_queued={total_doc_batches_queued} \"\n            f\"elapsed={elapsed_time:.2f}s\"\n        )\n\n        # Set total batches in database to signal extraction completion.\n        # Used by check_for_indexing to determine if the index attempt is complete.\n        with get_session_with_current_tenant() as db_session:\n            IndexingCoordination.set_total_batches(\n                db_session=db_session,\n                index_attempt_id=index_attempt_id,\n                total_batches=batch_num,\n            )\n\n        # Trigger file sync to user's sandbox (if running) - only for FILE_SYSTEM mode\n        # This syncs the newly written documents from S3 to any running sandbox pod\n        if processing_mode == ProcessingMode.FILE_SYSTEM:\n            creator_id = index_attempt.connector_credential_pair.creator_id\n            if creator_id:\n                source_value = db_connector.source.value\n                app.send_task(\n                    OnyxCeleryTask.SANDBOX_FILE_SYNC,\n                    kwargs={\n                        \"user_id\": str(creator_id),\n                        \"tenant_id\": tenant_id,\n                        \"source\": source_value,\n                    },\n                    queue=OnyxCeleryQueues.SANDBOX,\n                )\n                logger.info(\n                    f\"Triggered sandbox file sync for user {creator_id} source={source_value} after indexing complete\"\n                )\n\n    except Exception as e:\n        logger.exception(\n            f\"Document extraction failed: attempt={index_attempt_id} error={str(e)}\"\n        )\n\n        # Do NOT clean up batches on failure; future runs will use those batches\n        # while docfetching will continue from the saved checkpoint if one exists\n\n        if isinstance(e, ConnectorValidationError):\n            # On validation errors during indexing, we want to cancel the indexing attempt\n            # and mark the CCPair as invalid. This prevents the connector from being\n            # used in the future until the credentials are updated.\n            with get_session_with_current_tenant() as db_session_temp:\n                logger.exception(\n                    f\"Marking attempt {index_attempt_id} as canceled due to validation error.\"\n                )\n                mark_attempt_canceled(\n                    index_attempt_id,\n                    db_session_temp,\n                    reason=f\"{CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX}{str(e)}\",\n                )\n\n                if is_primary:\n                    if not index_attempt:\n                        # should always be set by now\n                        raise RuntimeError(\"Should never happen.\")\n\n                    VALIDATION_ERROR_THRESHOLD = 5\n\n                    recent_index_attempts = get_recent_completed_attempts_for_cc_pair(\n                        cc_pair_id=cc_pair_id,\n                        search_settings_id=index_attempt.search_settings_id,\n                        limit=VALIDATION_ERROR_THRESHOLD,\n                        db_session=db_session_temp,\n                    )\n                    num_validation_errors = len(\n                        [\n                            index_attempt\n                            for index_attempt in recent_index_attempts\n                            if index_attempt.error_msg\n                            and index_attempt.error_msg.startswith(\n                                CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX\n                            )\n                        ]\n                    )\n\n                    if num_validation_errors >= VALIDATION_ERROR_THRESHOLD:\n                        logger.warning(\n                            f\"Connector {db_connector.id} has {num_validation_errors} consecutive validation\"\n                            f\" errors. Marking the CC Pair as invalid.\"\n                        )\n                        update_connector_credential_pair(\n                            db_session=db_session_temp,\n                            connector_id=db_connector.id,\n                            credential_id=db_credential.id,\n                            status=ConnectorCredentialPairStatus.INVALID,\n                        )\n            raise e\n        elif isinstance(e, ConnectorStopSignal):\n            with get_session_with_current_tenant() as db_session_temp:\n                logger.exception(\n                    f\"Marking attempt {index_attempt_id} as canceled due to stop signal.\"\n                )\n                mark_attempt_canceled(\n                    index_attempt_id,\n                    db_session_temp,\n                    reason=str(e),\n                )\n\n        else:\n            with get_session_with_current_tenant() as db_session_temp:\n                # don't overwrite attempts that are already failed/canceled for another reason\n                index_attempt = get_index_attempt(db_session_temp, index_attempt_id)\n                if index_attempt and index_attempt.status in [\n                    IndexingStatus.CANCELED,\n                    IndexingStatus.FAILED,\n                ]:\n                    logger.info(\n                        f\"Attempt {index_attempt_id} is already failed/canceled, skipping marking as failed.\"\n                    )\n                    raise e\n\n                mark_attempt_failed(\n                    index_attempt_id,\n                    db_session_temp,\n                    failure_reason=str(e),\n                    full_exception_trace=traceback.format_exc(),\n                )\n\n            raise e\n\n    finally:\n        memory_tracer.stop()\n\n\ndef reissue_old_batches(\n    batch_storage: DocumentBatchStorage,\n    index_attempt_id: int,\n    cc_pair_id: int,\n    tenant_id: str,\n    app: Celery,\n    most_recent_attempt: IndexAttempt | None,\n    priority: OnyxCeleryPriority,\n) -> tuple[int, int]:\n    # When loading from a checkpoint, we need to start new docprocessing tasks\n    # tied to the new index attempt for any batches left over in the file store\n    old_batches = batch_storage.get_all_batches_for_cc_pair()\n    batch_storage.update_old_batches_to_new_index_attempt(old_batches)\n    for batch_id in old_batches:\n        logger.info(\n            f\"Re-issuing docprocessing task for batch {batch_id} for index attempt {index_attempt_id}\"\n        )\n        path_info = batch_storage.extract_path_info(batch_id)\n        if path_info is None:\n            logger.warning(\n                f\"Could not extract path info from batch {batch_id}, skipping\"\n            )\n            continue\n        if path_info.cc_pair_id != cc_pair_id:\n            raise RuntimeError(f\"Batch {batch_id} is not for cc pair {cc_pair_id}\")\n\n        app.send_task(\n            OnyxCeleryTask.DOCPROCESSING_TASK,\n            kwargs={\n                \"index_attempt_id\": index_attempt_id,\n                \"cc_pair_id\": cc_pair_id,\n                \"tenant_id\": tenant_id,\n                \"batch_num\": path_info.batch_num,  # use same batch num as previously\n            },\n            queue=OnyxCeleryQueues.DOCPROCESSING,\n            priority=priority,\n        )\n    recent_batches = most_recent_attempt.completed_batches if most_recent_attempt else 0\n    # resume from the batch num of the last attempt. This should be one more\n    # than the last batch created by docfetching regardless of whether the batch\n    # is still in the filestore waiting for processing or not.\n    last_batch_num = len(old_batches) + recent_batches\n    logger.info(\n        f\"Starting from batch {last_batch_num} due to re-issued batches: {old_batches}, completed batches: {recent_batches}\"\n    )\n    return len(old_batches), recent_batches\n"
  },
  {
    "path": "backend/onyx/background/periodic_poller.py",
    "content": "\"\"\"Periodic poller for NO_VECTOR_DB deployments.\n\nReplaces Celery Beat and background workers with a lightweight daemon thread\nthat runs from the API server process.  Two responsibilities:\n\n1. Recovery polling (every 30 s): re-processes user files stuck in\n   PROCESSING / DELETING / needs_sync states via the drain loops defined\n   in ``task_utils.py``.\n\n2. Periodic task execution (configurable intervals): runs LLM model updates\n   and scheduled evals at their configured cadences, with Postgres advisory\n   lock deduplication across multiple API server instances.\n\"\"\"\n\nimport threading\nimport time\nfrom collections.abc import Callable\nfrom dataclasses import dataclass\nfrom dataclasses import field\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nRECOVERY_INTERVAL_SECONDS = 30\nPERIODIC_TASK_LOCK_BASE = 20_000\nPERIODIC_TASK_KV_PREFIX = \"periodic_poller:last_claimed:\"\n\n\n# ------------------------------------------------------------------\n# Periodic task definitions\n# ------------------------------------------------------------------\n\n\n_NEVER_RAN: float = -1e18\n\n\n@dataclass\nclass _PeriodicTaskDef:\n    name: str\n    interval_seconds: float\n    lock_id: int\n    run_fn: Callable[[], None]\n    last_run_at: float = field(default=_NEVER_RAN)\n\n\ndef _run_auto_llm_update() -> None:\n    from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL\n\n    if not AUTO_LLM_CONFIG_URL:\n        return\n\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n    from onyx.llm.well_known_providers.auto_update_service import (\n        sync_llm_models_from_github,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        sync_llm_models_from_github(db_session)\n\n\ndef _run_cache_cleanup() -> None:\n    from onyx.cache.postgres_backend import cleanup_expired_cache_entries\n\n    cleanup_expired_cache_entries()\n\n\ndef _run_scheduled_eval() -> None:\n    from onyx.configs.app_configs import BRAINTRUST_API_KEY\n    from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES\n    from onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL\n    from onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT\n\n    if not all(\n        [\n            BRAINTRUST_API_KEY,\n            SCHEDULED_EVAL_PROJECT,\n            SCHEDULED_EVAL_DATASET_NAMES,\n            SCHEDULED_EVAL_PERMISSIONS_EMAIL,\n        ]\n    ):\n        return\n\n    from datetime import datetime\n    from datetime import timezone\n\n    from onyx.evals.eval import run_eval\n    from onyx.evals.models import EvalConfigurationOptions\n\n    run_timestamp = datetime.now(timezone.utc).strftime(\"%Y-%m-%d\")\n    for dataset_name in SCHEDULED_EVAL_DATASET_NAMES:\n        try:\n            run_eval(\n                configuration=EvalConfigurationOptions(\n                    search_permissions_email=SCHEDULED_EVAL_PERMISSIONS_EMAIL,\n                    dataset_name=dataset_name,\n                    no_send_logs=False,\n                    braintrust_project=SCHEDULED_EVAL_PROJECT,\n                    experiment_name=f\"{dataset_name} - {run_timestamp}\",\n                ),\n                remote_dataset_name=dataset_name,\n            )\n        except Exception:\n            logger.exception(\n                f\"Periodic poller - Failed scheduled eval for dataset {dataset_name}\"\n            )\n\n\n_CACHE_CLEANUP_INTERVAL_SECONDS = 300\n\n\ndef _build_periodic_tasks() -> list[_PeriodicTaskDef]:\n    from onyx.cache.interface import CacheBackendType\n    from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL\n    from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS\n    from onyx.configs.app_configs import CACHE_BACKEND\n    from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES\n\n    tasks: list[_PeriodicTaskDef] = []\n    if CACHE_BACKEND == CacheBackendType.POSTGRES:\n        tasks.append(\n            _PeriodicTaskDef(\n                name=\"cache-cleanup\",\n                interval_seconds=_CACHE_CLEANUP_INTERVAL_SECONDS,\n                lock_id=PERIODIC_TASK_LOCK_BASE + 2,\n                run_fn=_run_cache_cleanup,\n            )\n        )\n    if AUTO_LLM_CONFIG_URL:\n        tasks.append(\n            _PeriodicTaskDef(\n                name=\"auto-llm-update\",\n                interval_seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS,\n                lock_id=PERIODIC_TASK_LOCK_BASE,\n                run_fn=_run_auto_llm_update,\n            )\n        )\n    if SCHEDULED_EVAL_DATASET_NAMES:\n        tasks.append(\n            _PeriodicTaskDef(\n                name=\"scheduled-eval\",\n                interval_seconds=7 * 24 * 3600,\n                lock_id=PERIODIC_TASK_LOCK_BASE + 1,\n                run_fn=_run_scheduled_eval,\n            )\n        )\n    return tasks\n\n\n# ------------------------------------------------------------------\n# Periodic task runner with advisory-lock-guarded claim\n# ------------------------------------------------------------------\n\n\ndef _try_claim_task(task_def: _PeriodicTaskDef) -> bool:\n    \"\"\"Atomically check whether *task_def* should run and record a claim.\n\n    Uses a transaction-scoped advisory lock for atomicity combined with a\n    ``KVStore`` timestamp for cross-instance dedup.  The DB session is held\n    only for this brief claim transaction, not during task execution.\n    \"\"\"\n    from datetime import datetime\n    from datetime import timezone\n\n    from sqlalchemy import text\n\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n    from onyx.db.models import KVStore\n\n    kv_key = PERIODIC_TASK_KV_PREFIX + task_def.name\n\n    with get_session_with_current_tenant() as db_session:\n        acquired = db_session.execute(\n            text(\"SELECT pg_try_advisory_xact_lock(:id)\"),\n            {\"id\": task_def.lock_id},\n        ).scalar()\n        if not acquired:\n            return False\n\n        row = db_session.query(KVStore).filter_by(key=kv_key).first()\n        if row and row.value is not None:\n            last_claimed = datetime.fromisoformat(str(row.value))\n            elapsed = (datetime.now(timezone.utc) - last_claimed).total_seconds()\n            if elapsed < task_def.interval_seconds:\n                return False\n\n        now_ts = datetime.now(timezone.utc).isoformat()\n        if row:\n            row.value = now_ts\n        else:\n            db_session.add(KVStore(key=kv_key, value=now_ts))\n        db_session.commit()\n\n    return True\n\n\ndef _try_run_periodic_task(task_def: _PeriodicTaskDef) -> None:\n    \"\"\"Run *task_def* if its interval has elapsed and no peer holds the lock.\"\"\"\n    now = time.monotonic()\n    if now - task_def.last_run_at < task_def.interval_seconds:\n        return\n\n    if not _try_claim_task(task_def):\n        return\n\n    try:\n        task_def.run_fn()\n        task_def.last_run_at = now\n    except Exception:\n        logger.exception(\n            f\"Periodic poller - Error running periodic task {task_def.name}\"\n        )\n\n\n# ------------------------------------------------------------------\n# Recovery / drain loop runner\n# ------------------------------------------------------------------\n\n\ndef _run_drain_loops(tenant_id: str) -> None:\n    from onyx.background.task_utils import drain_delete_loop\n    from onyx.background.task_utils import drain_processing_loop\n    from onyx.background.task_utils import drain_project_sync_loop\n\n    drain_processing_loop(tenant_id)\n    drain_delete_loop(tenant_id)\n    drain_project_sync_loop(tenant_id)\n\n\n# ------------------------------------------------------------------\n# Startup recovery (10g)\n# ------------------------------------------------------------------\n\n\ndef recover_stuck_user_files(tenant_id: str) -> None:\n    \"\"\"Run all drain loops once to re-process files left in intermediate states.\n\n    Called from ``lifespan()`` on startup when ``DISABLE_VECTOR_DB`` is set.\n    \"\"\"\n    logger.info(\"recover_stuck_user_files - Checking for stuck user files\")\n    try:\n        _run_drain_loops(tenant_id)\n    except Exception:\n        logger.exception(\"recover_stuck_user_files - Error during recovery\")\n\n\n# ------------------------------------------------------------------\n# Daemon thread (10f)\n# ------------------------------------------------------------------\n\n_shutdown_event = threading.Event()\n_poller_thread: threading.Thread | None = None\n\n\ndef _poller_loop(tenant_id: str) -> None:\n    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n    periodic_tasks = _build_periodic_tasks()\n    logger.info(\n        f\"Periodic poller started with {len(periodic_tasks)} periodic task(s): {[t.name for t in periodic_tasks]}\"\n    )\n\n    while not _shutdown_event.is_set():\n        try:\n            _run_drain_loops(tenant_id)\n        except Exception:\n            logger.exception(\"Periodic poller - Error in recovery polling\")\n\n        for task_def in periodic_tasks:\n            try:\n                _try_run_periodic_task(task_def)\n            except Exception:\n                logger.exception(\n                    f\"Periodic poller - Unhandled error checking task {task_def.name}\"\n                )\n\n        _shutdown_event.wait(RECOVERY_INTERVAL_SECONDS)\n\n\ndef start_periodic_poller(tenant_id: str) -> None:\n    \"\"\"Start the periodic poller daemon thread.\"\"\"\n    global _poller_thread  # noqa: PLW0603\n    _shutdown_event.clear()\n    _poller_thread = threading.Thread(\n        target=_poller_loop,\n        args=(tenant_id,),\n        daemon=True,\n        name=\"no-vectordb-periodic-poller\",\n    )\n    _poller_thread.start()\n    logger.info(\"Periodic poller thread started\")\n\n\ndef stop_periodic_poller() -> None:\n    \"\"\"Signal the periodic poller to stop and wait for it to exit.\"\"\"\n    global _poller_thread  # noqa: PLW0603\n    if _poller_thread is None:\n        return\n    _shutdown_event.set()\n    _poller_thread.join(timeout=10)\n    if _poller_thread.is_alive():\n        logger.warning(\"Periodic poller thread did not stop within timeout\")\n    _poller_thread = None\n    logger.info(\"Periodic poller thread stopped\")\n"
  },
  {
    "path": "backend/onyx/background/task_utils.py",
    "content": "\"\"\"Background task utilities.\n\nContains query-history report helpers (used by all deployment modes) and\nin-process background task execution helpers for NO_VECTOR_DB mode:\n\n- Atomic claim-and-mark helpers that prevent duplicate processing\n- Drain loops that process all pending user file work\n\nEach claim function runs a short-lived transaction: SELECT ... FOR UPDATE\nSKIP LOCKED, UPDATE the row to remove it from future queries, COMMIT.\nAfter the commit the row lock is released, but the row is no longer\neligible for re-claiming.  No long-lived sessions or advisory locks.\n\"\"\"\n\nfrom uuid import UUID\n\nimport sqlalchemy as sa\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# ------------------------------------------------------------------\n# Query-history report helpers (pre-existing, used by all modes)\n# ------------------------------------------------------------------\n\nQUERY_REPORT_NAME_PREFIX = \"query-history\"\n\n\ndef construct_query_history_report_name(\n    task_id: str,\n) -> str:\n    return f\"{QUERY_REPORT_NAME_PREFIX}-{task_id}.csv\"\n\n\ndef extract_task_id_from_query_history_report_name(name: str) -> str:\n    return name.removeprefix(f\"{QUERY_REPORT_NAME_PREFIX}-\").removesuffix(\".csv\")\n\n\n# ------------------------------------------------------------------\n# Atomic claim-and-mark helpers\n# ------------------------------------------------------------------\n# Each function runs inside a single short-lived session/transaction:\n#   1. SELECT ... FOR UPDATE SKIP LOCKED  (locks one eligible row)\n#   2. UPDATE the row so it is no longer eligible\n#   3. COMMIT  (releases the row lock)\n# After the commit, no other drain loop can claim the same row.\n\n\ndef _claim_next_processing_file(db_session: Session) -> UUID | None:\n    \"\"\"Claim the next PROCESSING file by transitioning it to INDEXING.\n\n    Returns the file id, or None when no eligible files remain.\n    \"\"\"\n    file_id = db_session.execute(\n        select(UserFile.id)\n        .where(UserFile.status == UserFileStatus.PROCESSING)\n        .order_by(UserFile.created_at)\n        .limit(1)\n        .with_for_update(skip_locked=True)\n    ).scalar_one_or_none()\n    if file_id is None:\n        return None\n\n    db_session.execute(\n        sa.update(UserFile)\n        .where(UserFile.id == file_id)\n        .values(status=UserFileStatus.INDEXING)\n    )\n    db_session.commit()\n    return file_id\n\n\ndef _claim_next_deleting_file(\n    db_session: Session,\n    exclude_ids: set[UUID] | None = None,\n) -> UUID | None:\n    \"\"\"Claim the next DELETING file.\n\n    No status transition needed — the impl deletes the row on success.\n    The short-lived FOR UPDATE lock prevents concurrent claims.\n    *exclude_ids* prevents re-processing the same file if the impl fails.\n    \"\"\"\n    stmt = (\n        select(UserFile.id)\n        .where(UserFile.status == UserFileStatus.DELETING)\n        .order_by(UserFile.created_at)\n        .limit(1)\n        .with_for_update(skip_locked=True)\n    )\n    if exclude_ids:\n        stmt = stmt.where(UserFile.id.notin_(exclude_ids))\n    file_id = db_session.execute(stmt).scalar_one_or_none()\n    db_session.commit()\n    return file_id\n\n\ndef _claim_next_sync_file(\n    db_session: Session,\n    exclude_ids: set[UUID] | None = None,\n) -> UUID | None:\n    \"\"\"Claim the next file needing project/persona sync.\n\n    No status transition needed — the impl clears the sync flags on\n    success.  The short-lived FOR UPDATE lock prevents concurrent claims.\n    *exclude_ids* prevents re-processing the same file if the impl fails.\n    \"\"\"\n    stmt = (\n        select(UserFile.id)\n        .where(\n            sa.and_(\n                sa.or_(\n                    UserFile.needs_project_sync.is_(True),\n                    UserFile.needs_persona_sync.is_(True),\n                ),\n                UserFile.status == UserFileStatus.COMPLETED,\n            )\n        )\n        .order_by(UserFile.created_at)\n        .limit(1)\n        .with_for_update(skip_locked=True)\n    )\n    if exclude_ids:\n        stmt = stmt.where(UserFile.id.notin_(exclude_ids))\n    file_id = db_session.execute(stmt).scalar_one_or_none()\n    db_session.commit()\n    return file_id\n\n\n# ------------------------------------------------------------------\n# Drain loops — process *all* pending work of each type\n# ------------------------------------------------------------------\n\n\ndef drain_processing_loop(tenant_id: str) -> None:\n    \"\"\"Process all pending PROCESSING user files.\"\"\"\n    from onyx.background.celery.tasks.user_file_processing.tasks import (\n        process_user_file_impl,\n    )\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    while True:\n        with get_session_with_current_tenant() as session:\n            file_id = _claim_next_processing_file(session)\n        if file_id is None:\n            break\n        try:\n            process_user_file_impl(\n                user_file_id=str(file_id),\n                tenant_id=tenant_id,\n                redis_locking=False,\n            )\n        except Exception:\n            logger.exception(f\"Failed to process user file {file_id}\")\n\n\ndef drain_delete_loop(tenant_id: str) -> None:\n    \"\"\"Delete all pending DELETING user files.\"\"\"\n    from onyx.background.celery.tasks.user_file_processing.tasks import (\n        delete_user_file_impl,\n    )\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    failed: set[UUID] = set()\n    while True:\n        with get_session_with_current_tenant() as session:\n            file_id = _claim_next_deleting_file(session, exclude_ids=failed)\n        if file_id is None:\n            break\n        try:\n            delete_user_file_impl(\n                user_file_id=str(file_id),\n                tenant_id=tenant_id,\n                redis_locking=False,\n            )\n        except Exception:\n            logger.exception(f\"Failed to delete user file {file_id}\")\n            failed.add(file_id)\n\n\ndef drain_project_sync_loop(tenant_id: str) -> None:\n    \"\"\"Sync all pending project/persona metadata for user files.\"\"\"\n    from onyx.background.celery.tasks.user_file_processing.tasks import (\n        project_sync_user_file_impl,\n    )\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    failed: set[UUID] = set()\n    while True:\n        with get_session_with_current_tenant() as session:\n            file_id = _claim_next_sync_file(session, exclude_ids=failed)\n        if file_id is None:\n            break\n        try:\n            project_sync_user_file_impl(\n                user_file_id=str(file_id),\n                tenant_id=tenant_id,\n                redis_locking=False,\n            )\n        except Exception:\n            logger.exception(f\"Failed to sync user file {file_id}\")\n            failed.add(file_id)\n"
  },
  {
    "path": "backend/onyx/cache/factory.py",
    "content": "from collections.abc import Callable\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import CacheBackendType\nfrom onyx.configs.app_configs import CACHE_BACKEND\n\n\ndef _build_redis_backend(tenant_id: str) -> CacheBackend:\n    from onyx.cache.redis_backend import RedisCacheBackend\n    from onyx.redis.redis_pool import redis_pool\n\n    return RedisCacheBackend(redis_pool.get_client(tenant_id))\n\n\ndef _build_postgres_backend(tenant_id: str) -> CacheBackend:\n    from onyx.cache.postgres_backend import PostgresCacheBackend\n\n    return PostgresCacheBackend(tenant_id)\n\n\n_BACKEND_BUILDERS: dict[CacheBackendType, Callable[[str], CacheBackend]] = {\n    CacheBackendType.REDIS: _build_redis_backend,\n    CacheBackendType.POSTGRES: _build_postgres_backend,\n}\n\n\ndef get_cache_backend(*, tenant_id: str | None = None) -> CacheBackend:\n    \"\"\"Return a tenant-aware ``CacheBackend``.\n\n    If *tenant_id* is ``None``, the current tenant is read from the\n    thread-local context variable (same behaviour as ``get_redis_client``).\n    \"\"\"\n    if tenant_id is None:\n        from shared_configs.contextvars import get_current_tenant_id\n\n        tenant_id = get_current_tenant_id()\n\n    builder = _BACKEND_BUILDERS.get(CACHE_BACKEND)\n    if builder is None:\n        raise ValueError(\n            f\"Unsupported CACHE_BACKEND={CACHE_BACKEND!r}. Supported values: {[t.value for t in CacheBackendType]}\"\n        )\n    return builder(tenant_id)\n\n\ndef get_shared_cache_backend() -> CacheBackend:\n    \"\"\"Return a ``CacheBackend`` in the shared (cross-tenant) namespace.\"\"\"\n    from shared_configs.configs import DEFAULT_REDIS_PREFIX\n\n    return get_cache_backend(tenant_id=DEFAULT_REDIS_PREFIX)\n"
  },
  {
    "path": "backend/onyx/cache/interface.py",
    "content": "import abc\nfrom enum import Enum\n\nfrom redis.exceptions import RedisError\nfrom sqlalchemy.exc import SQLAlchemyError\n\nTTL_KEY_NOT_FOUND = -2\nTTL_NO_EXPIRY = -1\n\nCACHE_TRANSIENT_ERRORS: tuple[type[Exception], ...] = (RedisError, SQLAlchemyError)\n\"\"\"Exception types that represent transient cache connectivity / operational\nfailures.  Callers that want to fail-open (or fail-closed) on cache errors\nshould catch this tuple instead of bare ``Exception``.\n\nWhen adding a new ``CacheBackend`` implementation, add its transient error\nbase class(es) here so all call-sites pick it up automatically.\"\"\"\n\n\nclass CacheBackendType(str, Enum):\n    REDIS = \"redis\"\n    POSTGRES = \"postgres\"\n\n\nclass CacheLock(abc.ABC):\n    \"\"\"Abstract distributed lock returned by CacheBackend.lock().\"\"\"\n\n    @abc.abstractmethod\n    def acquire(\n        self,\n        blocking: bool = True,\n        blocking_timeout: float | None = None,\n    ) -> bool:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def release(self) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def owned(self) -> bool:\n        raise NotImplementedError\n\n    def __enter__(self) -> \"CacheLock\":\n        if not self.acquire():\n            raise RuntimeError(\"Failed to acquire lock\")\n        return self\n\n    def __exit__(self, *args: object) -> None:\n        self.release()\n\n\nclass CacheBackend(abc.ABC):\n    \"\"\"Thin abstraction over a key-value cache with TTL, locks, and blocking lists.\n\n    Covers the subset of Redis operations used outside of Celery. When\n    CACHE_BACKEND=postgres, a PostgreSQL-backed implementation is used instead.\n    \"\"\"\n\n    # -- basic key/value ---------------------------------------------------\n\n    @abc.abstractmethod\n    def get(self, key: str) -> bytes | None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def set(\n        self,\n        key: str,\n        value: str | bytes | int | float,\n        ex: int | None = None,\n    ) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def delete(self, key: str) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def exists(self, key: str) -> bool:\n        raise NotImplementedError\n\n    # -- TTL ---------------------------------------------------------------\n\n    @abc.abstractmethod\n    def expire(self, key: str, seconds: int) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def ttl(self, key: str) -> int:\n        \"\"\"Return remaining TTL in seconds.\n\n        Returns ``TTL_NO_EXPIRY`` (-1) if key exists without expiry,\n        ``TTL_KEY_NOT_FOUND`` (-2) if key is missing or expired.\n        \"\"\"\n        raise NotImplementedError\n\n    # -- distributed lock --------------------------------------------------\n\n    @abc.abstractmethod\n    def lock(self, name: str, timeout: float | None = None) -> CacheLock:\n        raise NotImplementedError\n\n    # -- blocking list (used by MCP OAuth BLPOP pattern) -------------------\n\n    @abc.abstractmethod\n    def rpush(self, key: str, value: str | bytes) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:\n        \"\"\"Block until a value is available on one of *keys*, or *timeout* expires.\n\n        Returns ``(key, value)`` or ``None`` on timeout.\n        \"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/cache/postgres_backend.py",
    "content": "\"\"\"PostgreSQL-backed ``CacheBackend`` for NO_VECTOR_DB deployments.\n\nUses the ``cache_store`` table for key-value storage, PostgreSQL advisory locks\nfor distributed locking, and a polling loop for the BLPOP pattern.\n\"\"\"\n\nimport hashlib\nimport struct\nimport time\nimport uuid\nfrom contextlib import AbstractContextManager\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom sqlalchemy import delete\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import CacheLock\nfrom onyx.cache.interface import TTL_KEY_NOT_FOUND\nfrom onyx.cache.interface import TTL_NO_EXPIRY\nfrom onyx.db.models import CacheStore\n\n_LIST_KEY_PREFIX = \"_q:\"\n# ASCII: ':' (0x3A) < ';' (0x3B). Upper bound for range queries so [prefix+, prefix;)\n# captures all list-item keys (e.g. _q:mylist:123:uuid) without including other\n# lists whose names share a prefix (e.g. _q:mylist2:...).\n_LIST_KEY_RANGE_TERMINATOR = \";\"\n_LIST_ITEM_TTL_SECONDS = 3600\n_LOCK_POLL_INTERVAL = 0.1\n_BLPOP_POLL_INTERVAL = 0.25\n\n\ndef _list_item_key(key: str) -> str:\n    \"\"\"Unique key for a list item. Timestamp for FIFO ordering; UUID prevents\n    collision when concurrent rpush calls occur within the same nanosecond.\n    \"\"\"\n    return f\"{_LIST_KEY_PREFIX}{key}:{time.time_ns()}:{uuid.uuid4().hex}\"\n\n\ndef _to_bytes(value: str | bytes | int | float) -> bytes:\n    if isinstance(value, bytes):\n        return value\n    return str(value).encode()\n\n\n# ------------------------------------------------------------------\n# Lock\n# ------------------------------------------------------------------\n\n\nclass PostgresCacheLock(CacheLock):\n    \"\"\"Advisory-lock-based distributed lock.\n\n    Uses ``get_session_with_tenant`` for connection lifecycle.  The lock is tied\n    to the session's connection; releasing or closing the session frees it.\n\n    NOTE: Unlike Redis locks, advisory locks do not auto-expire after\n    ``timeout`` seconds.  They are released when ``release()`` is\n    called or when the session is closed.\n    \"\"\"\n\n    def __init__(self, lock_id: int, timeout: float | None, tenant_id: str) -> None:\n        self._lock_id = lock_id\n        self._timeout = timeout\n        self._tenant_id = tenant_id\n        self._session_cm: AbstractContextManager[Session] | None = None\n        self._session: Session | None = None\n        self._acquired = False\n\n    def acquire(\n        self,\n        blocking: bool = True,\n        blocking_timeout: float | None = None,\n    ) -> bool:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        self._session_cm = get_session_with_tenant(tenant_id=self._tenant_id)\n        self._session = self._session_cm.__enter__()\n        try:\n            if not blocking:\n                return self._try_lock()\n\n            effective_timeout = blocking_timeout or self._timeout\n            deadline = (\n                (time.monotonic() + effective_timeout) if effective_timeout else None\n            )\n            while True:\n                if self._try_lock():\n                    return True\n                if deadline is not None and time.monotonic() >= deadline:\n                    return False\n                time.sleep(_LOCK_POLL_INTERVAL)\n        finally:\n            if not self._acquired:\n                self._close_session()\n\n    def release(self) -> None:\n        if not self._acquired or self._session is None:\n            return\n        try:\n            self._session.execute(select(func.pg_advisory_unlock(self._lock_id)))\n        finally:\n            self._acquired = False\n            self._close_session()\n\n    def owned(self) -> bool:\n        return self._acquired\n\n    def _close_session(self) -> None:\n        if self._session_cm is not None:\n            try:\n                self._session_cm.__exit__(None, None, None)\n            finally:\n                self._session_cm = None\n                self._session = None\n\n    def _try_lock(self) -> bool:\n        assert self._session is not None\n        result = self._session.execute(\n            select(func.pg_try_advisory_lock(self._lock_id))\n        ).scalar()\n        if result:\n            self._acquired = True\n            return True\n        return False\n\n\n# ------------------------------------------------------------------\n# Backend\n# ------------------------------------------------------------------\n\n\nclass PostgresCacheBackend(CacheBackend):\n    \"\"\"``CacheBackend`` backed by the ``cache_store`` table in PostgreSQL.\n\n    Each operation opens and closes its own database session so the backend\n    is safe to share across threads.  Tenant isolation is handled by\n    SQLAlchemy's ``schema_translate_map`` (set by ``get_session_with_tenant``).\n    \"\"\"\n\n    def __init__(self, tenant_id: str) -> None:\n        self._tenant_id = tenant_id\n\n    # -- basic key/value ---------------------------------------------------\n\n    def get(self, key: str) -> bytes | None:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        stmt = select(CacheStore.value).where(\n            CacheStore.key == key,\n            or_(CacheStore.expires_at.is_(None), CacheStore.expires_at > func.now()),\n        )\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            value = session.execute(stmt).scalar_one_or_none()\n        if value is None:\n            return None\n        return bytes(value)\n\n    def set(\n        self,\n        key: str,\n        value: str | bytes | int | float,\n        ex: int | None = None,\n    ) -> None:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        value_bytes = _to_bytes(value)\n        expires_at = (\n            datetime.now(timezone.utc) + timedelta(seconds=ex)\n            if ex is not None\n            else None\n        )\n        stmt = (\n            pg_insert(CacheStore)\n            .values(key=key, value=value_bytes, expires_at=expires_at)\n            .on_conflict_do_update(\n                index_elements=[CacheStore.key],\n                set_={\"value\": value_bytes, \"expires_at\": expires_at},\n            )\n        )\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            session.execute(stmt)\n            session.commit()\n\n    def delete(self, key: str) -> None:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            session.execute(delete(CacheStore).where(CacheStore.key == key))\n            session.commit()\n\n    def exists(self, key: str) -> bool:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        stmt = (\n            select(CacheStore.key)\n            .where(\n                CacheStore.key == key,\n                or_(\n                    CacheStore.expires_at.is_(None),\n                    CacheStore.expires_at > func.now(),\n                ),\n            )\n            .limit(1)\n        )\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            return session.execute(stmt).first() is not None\n\n    # -- TTL ---------------------------------------------------------------\n\n    def expire(self, key: str, seconds: int) -> None:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        new_exp = datetime.now(timezone.utc) + timedelta(seconds=seconds)\n        stmt = (\n            update(CacheStore).where(CacheStore.key == key).values(expires_at=new_exp)\n        )\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            session.execute(stmt)\n            session.commit()\n\n    def ttl(self, key: str) -> int:\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        stmt = select(CacheStore.expires_at).where(CacheStore.key == key)\n        with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n            result = session.execute(stmt).first()\n        if result is None:\n            return TTL_KEY_NOT_FOUND\n        expires_at: datetime | None = result[0]\n        if expires_at is None:\n            return TTL_NO_EXPIRY\n        remaining = (expires_at - datetime.now(timezone.utc)).total_seconds()\n        if remaining <= 0:\n            return TTL_KEY_NOT_FOUND\n        return int(remaining)\n\n    # -- distributed lock --------------------------------------------------\n\n    def lock(self, name: str, timeout: float | None = None) -> CacheLock:\n        return PostgresCacheLock(\n            self._lock_id_for(name), timeout, tenant_id=self._tenant_id\n        )\n\n    # -- blocking list (MCP OAuth BLPOP pattern) ---------------------------\n\n    def rpush(self, key: str, value: str | bytes) -> None:\n        self.set(_list_item_key(key), value, ex=_LIST_ITEM_TTL_SECONDS)\n\n    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:\n        if timeout <= 0:\n            raise ValueError(\n                \"PostgresCacheBackend.blpop requires timeout > 0. \"\n                \"timeout=0 would block the calling thread indefinitely \"\n                \"with no way to interrupt short of process termination.\"\n            )\n        from onyx.db.engine.sql_engine import get_session_with_tenant\n\n        deadline = time.monotonic() + timeout\n        while True:\n            for key in keys:\n                lower = f\"{_LIST_KEY_PREFIX}{key}:\"\n                upper = f\"{_LIST_KEY_PREFIX}{key}{_LIST_KEY_RANGE_TERMINATOR}\"\n                stmt = (\n                    select(CacheStore)\n                    .where(\n                        CacheStore.key >= lower,\n                        CacheStore.key < upper,\n                        or_(\n                            CacheStore.expires_at.is_(None),\n                            CacheStore.expires_at > func.now(),\n                        ),\n                    )\n                    .order_by(CacheStore.key)\n                    .limit(1)\n                    .with_for_update(skip_locked=True)\n                )\n                with get_session_with_tenant(tenant_id=self._tenant_id) as session:\n                    row = session.execute(stmt).scalars().first()\n                    if row is not None:\n                        value = bytes(row.value) if row.value else b\"\"\n                        session.delete(row)\n                        session.commit()\n                        return (key.encode(), value)\n            if time.monotonic() >= deadline:\n                return None\n            time.sleep(_BLPOP_POLL_INTERVAL)\n\n    # -- helpers -----------------------------------------------------------\n\n    def _lock_id_for(self, name: str) -> int:\n        \"\"\"Map *name* to a 64-bit signed int for ``pg_advisory_lock``.\"\"\"\n        h = hashlib.md5(\n            f\"{self._tenant_id}:{name}\".encode(), usedforsecurity=False\n        ).digest()\n        return struct.unpack(\"q\", h[:8])[0]\n\n\n# ------------------------------------------------------------------\n# Periodic cleanup\n# ------------------------------------------------------------------\n\n\ndef cleanup_expired_cache_entries() -> None:\n    \"\"\"Delete rows whose ``expires_at`` is in the past.\n\n    Called by the periodic poller every 5 minutes.\n    \"\"\"\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    with get_session_with_current_tenant() as session:\n        session.execute(\n            delete(CacheStore).where(\n                CacheStore.expires_at.is_not(None),\n                CacheStore.expires_at < func.now(),\n            )\n        )\n        session.commit()\n"
  },
  {
    "path": "backend/onyx/cache/redis_backend.py",
    "content": "from typing import cast\n\nfrom redis.client import Redis\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import CacheLock\n\n\nclass RedisCacheLock(CacheLock):\n    \"\"\"Wraps ``redis.lock.Lock`` behind the ``CacheLock`` interface.\"\"\"\n\n    def __init__(self, lock: RedisLock) -> None:\n        self._lock = lock\n\n    def acquire(\n        self,\n        blocking: bool = True,\n        blocking_timeout: float | None = None,\n    ) -> bool:\n        return bool(\n            self._lock.acquire(\n                blocking=blocking,\n                blocking_timeout=blocking_timeout,\n            )\n        )\n\n    def release(self) -> None:\n        self._lock.release()\n\n    def owned(self) -> bool:\n        return bool(self._lock.owned())\n\n\nclass RedisCacheBackend(CacheBackend):\n    \"\"\"``CacheBackend`` implementation that delegates to a ``redis.Redis`` client.\n\n    This is a thin pass-through — every method maps 1-to-1 to the underlying\n    Redis command.  ``TenantRedis`` key-prefixing is handled by the client\n    itself (provided by ``get_redis_client``).\n    \"\"\"\n\n    def __init__(self, redis_client: Redis) -> None:\n        self._r = redis_client\n\n    # -- basic key/value ---------------------------------------------------\n\n    def get(self, key: str) -> bytes | None:\n        val = self._r.get(key)\n        if val is None:\n            return None\n        if isinstance(val, bytes):\n            return val\n        return str(val).encode()\n\n    def set(\n        self,\n        key: str,\n        value: str | bytes | int | float,\n        ex: int | None = None,\n    ) -> None:\n        self._r.set(key, value, ex=ex)\n\n    def delete(self, key: str) -> None:\n        self._r.delete(key)\n\n    def exists(self, key: str) -> bool:\n        return bool(self._r.exists(key))\n\n    # -- TTL ---------------------------------------------------------------\n\n    def expire(self, key: str, seconds: int) -> None:\n        self._r.expire(key, seconds)\n\n    def ttl(self, key: str) -> int:\n        return cast(int, self._r.ttl(key))\n\n    # -- distributed lock --------------------------------------------------\n\n    def lock(self, name: str, timeout: float | None = None) -> CacheLock:\n        return RedisCacheLock(self._r.lock(name, timeout=timeout))\n\n    # -- blocking list (MCP OAuth BLPOP pattern) ---------------------------\n\n    def rpush(self, key: str, value: str | bytes) -> None:\n        self._r.rpush(key, value)\n\n    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:\n        result = cast(list[bytes] | None, self._r.blpop(keys, timeout=timeout))\n        if result is None:\n            return None\n        return (result[0], result[1])\n"
  },
  {
    "path": "backend/onyx/chat/COMPRESSION.md",
    "content": "# Chat History Compression\n\nCompresses long chat histories by summarizing older messages while keeping recent ones verbatim.\n\n## Architecture Decisions\n\n### Branch-Aware via Tree Structure\nSummaries are stored as `ChatMessage` records with two key fields:\n- `parent_message_id` → last message when compression triggered (places summary in the tree)\n- `last_summarized_message_id` → pointer to an older message up the chain (the cutoff). Messages after this are kept verbatim.\n\n**Why store summary as a separate message?** If we embedded the summary in the `last_summarized_message_id` message itself, that message would contain context from messages that came after it—context that doesn't exist in other branches. By creating the summary as a new message attached to the branch tip, it only applies to the specific branch where compression occurred. It's only back-pointed to by the\nbranch which it applies to. All of this is necessary because we keep the last few messages verbatim and also to support branching logic.\n\n### Progressive Summarization\nSubsequent compressions incorporate the existing summary text + new messages, preventing information loss in very long conversations.\n\n### Cutoff Marker Prompt Strategy\nThe LLM receives older messages, a cutoff marker, then recent messages. It summarizes only content before the marker while using recent context to inform what's important.\n\n## Token Budget\n\nContext window breakdown:\n- `max_context_tokens` — LLM's total context window\n- `reserved_tokens` — space for system prompt, tools, files, etc.\n- Available for chat history = `max_context_tokens - reserved_tokens`\nNote: If there is a lot of reserved tokens, chat compression may happen fairly frequently which is costly, slow, and leads to a bad user experience. Possible area of future improvement.\n\nConfigurable ratios:\n- `COMPRESSION_TRIGGER_RATIO` (default 0.75) — compress when chat history exceeds this ratio of available space\n- `RECENT_MESSAGES_RATIO` (default 0.2) — portion of chat history to keep verbatim when compressing\n\n## Flow\n\n1. Trigger when `history_tokens > available * 0.75`\n2. Find existing summary for branch (if any)\n3. Split messages: older (summarize) / recent (keep 25%)\n4. Generate summary via LLM\n5. Save as `ChatMessage` with `parent_message_id` + `last_summarized_message_id`\n\n## Key Functions\n\n| Function | Purpose |\n|----------|---------|\n| `get_compression_params` | Check if compression needed based on token counts |\n| `find_summary_for_branch` | Find applicable summary by checking `parent_message_id` membership |\n| `get_messages_to_summarize` | Split messages at token budget boundary |\n| `compress_chat_history` | Orchestrate flow, save summary message |\n"
  },
  {
    "path": "backend/onyx/chat/README.md",
    "content": "# Overview of Context Management\n\nThis document reviews some design decisions around the main agent-loop powering Onyx's chat flow.\nIt is highly recommended for all engineers contributing to this flow to be familiar with the concepts here.\n\n> Note: it is assumed the reader is familiar with the Onyx product and features such as Projects, User files, Citations, etc. \n\n## System Prompt\n\nThe system prompt is a default prompt that comes packaged with the system. Users can edit the default prompt and it will be persisted in the database.\n\nSome parts of the system prompt are dynamically updated / inserted:\n\n- Datetime of the message sent\n- Tools description of when to use certain tools depending on if the tool is available in that cycle\n- If the user has just called a search related tool, then a section about citations is included\n\n## Custom Agent Prompt\n\nThe custom agent is inserted as a user message above the most recent user message, it is dynamically moved in the history as the user sends more messages.\nIf the user has opted to completely replace the System Prompt, then this Custom Agent prompt replaces the system prompt and does not move along the history.\n\n## How Files are handled\n\nOn upload, Files are processed for tokens, if too many tokens to fit in the context, it’s considered a failed inclusion. This is done using the LLM tokenizer.\n\n- In many cases, there is not a known tokenizer for each LLM so there is a default tokenizer used as a catchall.\n- File upload happens in 2 parts - the actual upload + token counting.\n- Files are added into chat context as a “point in time” inclusion and move up the context window as the conversation progresses.\n  Every file knows how many tokens it is (model agnostic), image files have some assumed number of tokens.\n\nImage files are attached to User Messages also as point in time inclusions.\n\n**Future Extension**:\nFiles selected from the search results are also counted as “point in time” inclusions. Files that are too large cannot be selected.\nFor these files, the \"entire file\" does not exist for most connectors, it's pieced back together from the search engine.\n\n## Projects\n\nIf a Project contains few enough files that it all fits in the model context, we keep it close enough in the history to ensure it is easy for the LLM to\naccess. Note that the project documents are assumed to be quite useful and that they should 1. never be dropped from context, 2. is not just a needle in\na haystack type search with a strong keyword to make the LLM attend to it.\n\nProject files are vectorized and stored in the Search Engine so that if the user chooses a model with less context than the number of tokens in the project,\nthe system can RAG over the project files.\n\n## How documents are represented\n\nDocuments from search or uploaded Project files are represented as a json so that the LLM can easily understand it. It is represented with a prefix string to\nmake the context clearer to the LLM. Note that for search results (whether web or internal, it will just be the json) and it will be a Tool Call type of\nmessage rather than a user message.\n\n```\nHere are some documents provided for context, they may not all be relevant:\n{\n    \"documents\": [\n        {\"document\": 1, \"title\": \"Hello\", \"metadata\": \"status closed\", \"contents\": \"Foo\"},\n        {\"document\": 2, \"title\": \"World\", \"contents\": \"Bar\"}\n    ]\n}\n```\n\nDocuments are represented with the `document` key so that the LLM can easily cite them with a single number. The tool returns have to be richer to be able to\ntranslate this into links and other UI elements. What the LLM sees is far simpler to reduce noise/hallucinations.\n\nNote that documents included in a single turn should be collapsed into a single user message.\n\nSearch tools also give URLs to the LLM so that open_url (a separate tool) can be called on them.\n\n## Reminders\n\nTo ensure the LLM follows certain specific instructions, instructions are added at the very end of the chat context as a user message. If a search related\ntool is used, a citation reminder is always added. Otherwise, by default there is no reminder. If the user configures reminders, those are added to the\nfinal message. If a search related tool just ran and the user has reminders, both appear in a single message.\n\nIf a search related tool is called at any point during the turn, the reminder will remain at the end until the turn is over and the agent has responded.\n\n## Tool Calls\n\nAs tool call responses can get very long (like an internal search can be many thousands of tokens), tool responses are current replaced with a hardcoded\nstring saying it is no longer available. Tool Call details like the search query and other arguments are kept in the history as this is information\nrich and generally very few tokens.\n\n> Note: in the Internal Search flow with query expansion, the Tool Call which was actually run differs from what the LLM provided as arguments.\n> What the LLM sees in the history (to be most informative for future calls) is the full set of expanded queries.\n\n**Possible Future Extension**:\nInstead of dropping the Tool Call response, we might summarize it using an LLM so that it is just 1-2 sentences and captures the main points. That said,\nthis is questionable value add because anything relevant and useful should be already captured in the Agent response.\n\n## Examples\n\n```\nS -> System Message\nCA -> Custom Agent as a User Message\nA -> Agent Message response to user\nU -> User Message\nTC -> Agent Message for a tool call\nTR -> Tool response\nR -> Reminder\nF -> Point in time File\nP -> Project Files (not overflowed case)\n1,2,3 etc. to represent turn number. A turn consists of a user input and a final response from the Agent\n\nFlow with Custom Agent\nS, U1, TC, TR, A1, CA, U2, A2  -- user sends another message, triggers tool call -> S, U1, TC, TR, A1, U2, A2, CA, U3, TC, TR, R, A3\n- Custom agent response moves\n- Reminder inserted after TR\n\nFlow with Project and File Upload\nS, CA, P, F, U1, A1 -- user sends another message -> S, F, U1, A1, CA, P, U2, A2\n- File stays in place, above the user message\n- Project files move along the chain as new messages are sent\n- Custom Agent prompt comes before project files which come before user uploaded files in each turn\n\nReminders during a single Turn\nS, U1, TC, TR, R -- agent calls another tool -> S, U1, TC, TR, TC, TR, R, A1\n- Reminder moved to the end\n```\n\n## Product considerations\n\nProject files are important to the entire duration of the chat session. If the user has uploaded project files, they are likely very intent on working with\nthose files. The LLM is much better at referencing documents close to the end of the context window so keeping it there for ease of access.\n\nUser uploaded files are considered relevant for that point in time, it is ok if the Agent forgets about it as the chat gets long. If every uploaded file is\nconstantly moved towards the end of the chat, it would degrade quality as these stack up. Even with a single file, there is some cost of making the previous\nUser Message further away. This tradeoff is accepted for Projects because of the intent of the feature.\n\nReminder are absolutely necessary to ensure 1-2 specific instructions get followed with a very high probability. It is less detailed than the system prompt\nand should be very targetted for it to work reliably and also not interfere with the last user message.\n\n## Reasons / Experiments\n\nCustom Agent instructions being placed in the system prompt is poorly followed. It also degrades performance of the system especially when the instructions\nare orthogonal (or even possibly contradictory) to the system prompt. For weaker models, it causes strange artifacts in tool calls and final responses\nthat completely ruins the user experience. Empirically, this way works better across a range of models especially when the history gets longer.\nHaving the Custom Agent instructions not move means it fades more as the chat gets long which is also not ok from a UX perspective.\n\nDifferent LLMs vary in this but some now have a section that cannot be set via the API layer called the \"System Prompt\" (OpenAI terminology) which contains\ninformation like the model cutoff date, identity, and some other basic non-changing information. The System prompt described above is in that convention called\nthe \"Developer Prompt\". It seems the distribution of the System Prompt, by which I mean the style of wording and terms used can also affect the behavior. This\nis different between different models and not necessarily scientific so the system prompt is built from an exploration across different models. It currently\nstarts with: \"You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent...\"\n\nLLMs are able to handle changes in topic best at message boundaries. There are special tokens under the hood for this. We also use this property to slice up\nthe history in the way presented above.\n\nReminder messages are placed at the end of the prompt because all model fine tuning approaches cause the LLMs to attend very strongly to the tokens at the very\nback of the context closest to generation. This is the only way to get the LLMs to not miss critical information and for the product to be reliable. Specifically\nthe built-in reminders are around citations and what tools it should call in certain situations.\n\nThe document json includes a field for the LLM to cite (it's a single number) to make citations reliable and avoid weird artifacts. It's called \"document\" so\nthat the LLM does not create weird artifacts in reasoning like \"I should reference citation_id: 5 for...\". It is also strategically placed so that it is easy to\nreference. It is followed by a couple short sections like the metadata and title before the long content section. It seems LLMs are still better at local\nattention despite having global access.\n\nIn a similar concept, LLM instructions in the system prompt are structured specifically so that there are coherent sections for the LLM to attend to. This is\nfairly surprising actually but if there is a line of instructions effectively saying \"If you try to use some tools and find that you need more information or\nneed to call additional tools, you are encouraged to do this\", having this in the Tool section of the System prompt makes all the LLMs follow it well but if it's\neven just a paragraph away like near the beginning of the prompt, it is often ignored. The difference is as drastic as a 30% follow rate to a 90% follow\nrate by even just moving the same statement a few sentences.\n\n## Other related pointers\n\n- How messages, files, images are stored can be found in backend/onyx/db/models.py, there is also a README.md under that directory that may be helpful.\n\n---\n\n# Overview of LLM flow architecture\n\n**Concepts:**\nTurn: User sends a message and AI does some set of things and responds\nStep/Cycle: 1 single LLM inference given some context and some tools\n\n## 1. Top Level (process_message function):\n\nThis function can be thought of as the set-up and validation layer. It ensures that the database is in a valid state, reads the\nmessages in the session and sets up all the necessary items to run the chat loop and state containers. The major things it does\nare:\n\n- Validates the request\n- Builds the chat history for the session\n- Fetches any additional context such as files and images\n- Prepares all of the tools for the LLM\n- Creates the state container objects for use in the loop\n\n### Execution (`_run_models` function):\n\nEach model runs in its own worker thread inside a `ThreadPoolExecutor`. Workers write packets to a shared\n`merged_queue` via an `Emitter`; the main thread drains the queue and yields packets in arrival order. This\nmeans the top level is isolated from the LLM flow and can yield packets as soon as they are produced. If a\nworker fails, the main thread yields a `StreamingError` for that model and keeps the other models running.\nAll saving and database operations are handled by the main thread after the workers complete (or by the\nworkers themselves via self-completion if the drain loop exits early).\n\n### Emitter\n\nThe emitter is an object that lower levels use to send packets without needing to yield them all the way back\nup the call stack. Each `Emitter` tags every packet with a `model_index` and places it on the shared\n`merged_queue` as a `(model_idx, packet)` tuple. The drain loop in `_run_models` consumes these tuples and\nyields the packets to the caller. Both the emitter and the state container are mutating state objects used\nonly to accumulate state. There should be no logic dependent on the states of these objects, especially in\nthe lower levels. The emitter should only take packets and should not be used for other things.\n\n### State Container\n\nThe state container is used to accumulate state during the LLM flow. Similar to the emitter, it should not be used for logic,\nonly for accumulating state. It is used to gather all of the necessary information for saving the chat turn into the database.\nSo it will accumulate answer tokens, reasoning tokens, tool calls, citation info, etc. This is used at the end of the flow once\nthe lower level is completed whether on its own or stopped by the user. At that point, all of the state is read and stored into\nthe database. The state container can be added to by any of the underlying layers, this is fine.\n\n### Stopping Generation\n\nThe drain loop in `_run_models` checks `check_is_connected()` every 50 ms (on queue timeout). The signal itself\nis stored in Redis and is set by the user calling the stop endpoint. On disconnect, the drain loop saves\npartial state for every model, yields an `OverallStop(stop_reason=\"user_cancelled\")` packet, and returns.\nA `drain_done` event signals emitters to stop blocking so worker threads can exit quickly. Workers that\nalready completed successfully will self-complete (persist their response) if the drain loop exited before\nreaching the normal completion path.\n\n## 2. LLM Loop (run_llm_loop function)\n\nThis function handles the logic of the Turn. It's essentially a while loop where context is added and modified (according what\nis outlined in the first half of this doc). Its main functionality is:\n\n- Translate and truncate the context for the LLM inference\n- Add context modifiers like reminders, updates to the system prompts, etc.\n- Run tool calls and gather results\n- Build some of the objects stored in the state container.\n\n## 3. LLM Step (run_llm_step function)\n\nThis function is a single inference of the LLM. It's a wrapper around the LLM stream function which handles packet translations\nso that the Emitter can emit individual tokens as soon as they arrive. It also keeps track of the different sections since they\ndo not all come at once (reasoning, answers, tool calls are all built up token by token). This layer also tracks the different\ntool calls and returns that to the LLM Loop to execute.\n\n## Things to know\n\n- Packets are labeled with a \"turn_index\" field as part of the Placement of the packet. This is not the same as the backend\n  concept of a turn. The turn_index for the frontend is which block does this packet belong to. So while a reasoning + tool call\n  comes from the same LLM inference (same backend LLM step), they are 2 turns to the frontend because that's how it's rendered.\n\n- There are 3 representations of a message, each scoped to a different layer:\n  1. **ChatMessage** — The database model. Should be converted into ChatMessageSimple early and never passed deep into the flow.\n  2. **ChatMessageSimple** — The canonical data model used throughout the codebase. This is the rich, full-featured representation\n     of a message. Any modifications or additions to message structure should be made here.\n  3. **LanguageModelInput** — The LLM-facing representation. Intentionally minimal so the LLM interface layer stays clean and\n     easy to maintain/extend.\n"
  },
  {
    "path": "backend/onyx/chat/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/chat/chat_processing_checker.py",
    "content": "from uuid import UUID\n\nfrom onyx.cache.interface import CacheBackend\n\nPREFIX = \"chatprocessing\"\nFENCE_PREFIX = f\"{PREFIX}_fence\"\nFENCE_TTL = 30 * 60  # 30 minutes\n\n\ndef _get_fence_key(chat_session_id: UUID) -> str:\n    \"\"\"Generate the cache key for a chat session processing fence.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n\n    Returns:\n        The fence key string. Tenant isolation is handled automatically\n        by the cache backend (Redis key-prefixing or Postgres schema routing).\n    \"\"\"\n    return f\"{FENCE_PREFIX}_{chat_session_id}\"\n\n\ndef set_processing_status(\n    chat_session_id: UUID, cache: CacheBackend, value: bool\n) -> None:\n    \"\"\"Set or clear the fence for a chat session processing a message.\n\n    If the key exists, a message is being processed.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n        cache: Tenant-aware cache backend\n        value: True to set the fence, False to clear it\n    \"\"\"\n    fence_key = _get_fence_key(chat_session_id)\n    if value:\n        cache.set(fence_key, 0, ex=FENCE_TTL)\n    else:\n        cache.delete(fence_key)\n\n\ndef is_chat_session_processing(chat_session_id: UUID, cache: CacheBackend) -> bool:\n    \"\"\"Check if the chat session is processing a message.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n        cache: Tenant-aware cache backend\n\n    Returns:\n        True if the chat session is processing a message, False otherwise\n    \"\"\"\n    return cache.exists(_get_fence_key(chat_session_id))\n"
  },
  {
    "path": "backend/onyx/chat/chat_state.py",
    "content": "import threading\nfrom collections.abc import Callable\nfrom dataclasses import dataclass\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.models import ChatLoadedFile\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ExtractedContextFiles\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import SearchParams\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.memory import UserMemoryContext\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import Persona\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import ToolCallInfo\n\n# Type alias for search doc deduplication key\n# Simple key: just document_id (str)\n# Full key: (document_id, chunk_ind, match_highlights)\nSearchDocKey = str | tuple[str, int, tuple[str, ...]]\n\n\nclass ChatStateContainer:\n    \"\"\"Container for accumulating state during LLM loop execution.\n\n    This container holds the partial state that can be saved to the database\n    if the generation is stopped by the user or completes normally.\n\n    Thread-safe: All write operations are protected by a lock to ensure safe\n    concurrent access from multiple threads. For thread-safe reads, use the\n    getter methods. Direct attribute access is not thread-safe.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._lock = threading.Lock()\n        # These are collected at the end after the entire tool call is completed\n        self.tool_calls: list[ToolCallInfo] = []\n        # This is accumulated during the streaming\n        self.reasoning_tokens: str | None = None\n        # This is accumulated during the streaming of the answer\n        self.answer_tokens: str | None = None\n        # Store citation mapping for building citation_docs_info during partial saves\n        self.citation_to_doc: CitationMapping = {}\n        # True if this turn is a clarification question (deep research flow)\n        self.is_clarification: bool = False\n        # Pre-answer processing time (time before answer starts) in seconds\n        self.pre_answer_processing_time: float | None = None\n        # Note: LLM cost tracking is now handled in multi_llm.py\n        # Search doc collection - maps dedup key to SearchDoc for all docs from tool calls\n        self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}\n        # Track which citation numbers were actually emitted during streaming\n        self._emitted_citations: set[int] = set()\n\n    def add_tool_call(self, tool_call: ToolCallInfo) -> None:\n        \"\"\"Add a tool call to the accumulated state.\"\"\"\n        with self._lock:\n            self.tool_calls.append(tool_call)\n\n    def set_reasoning_tokens(self, reasoning: str | None) -> None:\n        \"\"\"Set the reasoning tokens from the final answer generation.\"\"\"\n        with self._lock:\n            self.reasoning_tokens = reasoning\n\n    def set_answer_tokens(self, answer: str | None) -> None:\n        \"\"\"Set the answer tokens from the final answer generation.\"\"\"\n        with self._lock:\n            self.answer_tokens = answer\n\n    def set_citation_mapping(self, citation_to_doc: CitationMapping) -> None:\n        \"\"\"Set the citation mapping from citation processor.\"\"\"\n        with self._lock:\n            self.citation_to_doc = citation_to_doc\n\n    def set_is_clarification(self, is_clarification: bool) -> None:\n        \"\"\"Set whether this turn is a clarification question.\"\"\"\n        with self._lock:\n            self.is_clarification = is_clarification\n\n    def get_answer_tokens(self) -> str | None:\n        \"\"\"Thread-safe getter for answer_tokens.\"\"\"\n        with self._lock:\n            return self.answer_tokens\n\n    def get_reasoning_tokens(self) -> str | None:\n        \"\"\"Thread-safe getter for reasoning_tokens.\"\"\"\n        with self._lock:\n            return self.reasoning_tokens\n\n    def get_tool_calls(self) -> list[ToolCallInfo]:\n        \"\"\"Thread-safe getter for tool_calls (returns a copy).\"\"\"\n        with self._lock:\n            return self.tool_calls.copy()\n\n    def get_citation_to_doc(self) -> CitationMapping:\n        \"\"\"Thread-safe getter for citation_to_doc (returns a copy).\"\"\"\n        with self._lock:\n            return self.citation_to_doc.copy()\n\n    def get_is_clarification(self) -> bool:\n        \"\"\"Thread-safe getter for is_clarification.\"\"\"\n        with self._lock:\n            return self.is_clarification\n\n    def set_pre_answer_processing_time(self, duration: float | None) -> None:\n        \"\"\"Set the pre-answer processing time (time before answer starts).\"\"\"\n        with self._lock:\n            self.pre_answer_processing_time = duration\n\n    def get_pre_answer_processing_time(self) -> float | None:\n        \"\"\"Thread-safe getter for pre_answer_processing_time.\"\"\"\n        with self._lock:\n            return self.pre_answer_processing_time\n\n    @staticmethod\n    def create_search_doc_key(\n        search_doc: SearchDoc, use_simple_key: bool = True\n    ) -> SearchDocKey:\n        \"\"\"Create a unique key for a SearchDoc for deduplication.\n\n        Args:\n            search_doc: The SearchDoc to create a key for\n            use_simple_key: If True (default), use only document_id for deduplication.\n                If False, include chunk_ind and match_highlights so that the same\n                document/chunk with different highlights are stored separately.\n        \"\"\"\n        if use_simple_key:\n            return search_doc.document_id\n        match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))\n        return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)\n\n    def add_search_docs(\n        self, search_docs: list[SearchDoc], use_simple_key: bool = True\n    ) -> None:\n        \"\"\"Add search docs to the accumulated collection with deduplication.\n\n        Args:\n            search_docs: List of SearchDoc objects to add\n            use_simple_key: If True (default), deduplicate by document_id only.\n                If False, deduplicate by document_id + chunk_ind + match_highlights.\n        \"\"\"\n        with self._lock:\n            for doc in search_docs:\n                key = self.create_search_doc_key(doc, use_simple_key)\n                if key not in self._all_search_docs:\n                    self._all_search_docs[key] = doc\n\n    def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:\n        \"\"\"Thread-safe getter for all accumulated search docs (returns a copy).\"\"\"\n        with self._lock:\n            return self._all_search_docs.copy()\n\n    def add_emitted_citation(self, citation_num: int) -> None:\n        \"\"\"Add a citation number that was actually emitted during streaming.\"\"\"\n        with self._lock:\n            self._emitted_citations.add(citation_num)\n\n    def get_emitted_citations(self) -> set[int]:\n        \"\"\"Thread-safe getter for emitted citations (returns a copy).\"\"\"\n        with self._lock:\n            return self._emitted_citations.copy()\n\n\nclass AvailableFiles(BaseModel):\n    \"\"\"Separated file IDs for the FileReaderTool so it knows which loader to use.\"\"\"\n\n    # IDs from the ``user_file`` table (project / persona-attached files).\n    user_file_ids: list[UUID] = []\n    # IDs from the ``file_record`` table (chat-attached files).\n    chat_file_ids: list[UUID] = []\n\n\n@dataclass(frozen=True)\nclass ChatTurnSetup:\n    \"\"\"Immutable context produced by ``build_chat_turn`` and consumed by ``_run_models``.\"\"\"\n\n    new_msg_req: SendMessageRequest\n    chat_session: ChatSession\n    persona: Persona\n    user_message: ChatMessage\n    user_identity: LLMUserIdentity\n    llms: list[LLM]  # length 1 for single-model, N for multi-model\n    model_display_names: list[str]  # parallel to llms\n    simple_chat_history: list[ChatMessageSimple]\n    extracted_context_files: ExtractedContextFiles\n    reserved_messages: list[ChatMessage]  # length 1 for single, N for multi\n    reserved_token_count: int\n    search_params: SearchParams\n    all_injected_file_metadata: dict[str, FileToolMetadata]\n    available_files: AvailableFiles\n    tool_id_to_name_map: dict[int, str]\n    forced_tool_id: int | None\n    files: list[ChatLoadedFile]\n    chat_files_for_tools: list[ChatFile]\n    custom_agent_prompt: str | None\n    user_memory_context: UserMemoryContext\n    # For deep research: was the last assistant message a clarification request?\n    skip_clarification: bool\n    check_is_connected: Callable[[], bool]\n    cache: CacheBackend\n    # Execution params forwarded to per-model tool construction\n    bypass_acl: bool\n    slack_context: SlackContext | None\n    custom_tool_additional_headers: dict[str, str] | None\n    mcp_headers: dict[str, str] | None\n"
  },
  {
    "path": "backend/onyx/chat/chat_utils.py",
    "content": "import json\nimport re\nfrom collections.abc import Callable\nfrom typing import cast\nfrom uuid import UUID\n\nfrom fastapi.datastructures import Headers\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.models import ChatHistoryResult\nfrom onyx.chat.models import ChatLoadedFile\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.chat import get_chat_messages_by_session\nfrom onyx.db.chat import get_or_create_root_message\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import is_kg_config_settings_enabled_valid\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import Persona\nfrom onyx.db.models import SearchDoc as DbSearchDoc\nfrom onyx.db.models import UserFile\nfrom onyx.db.projects import check_project_ownership\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.file_store.utils import plaintext_file_name_for_id\nfrom onyx.file_store.utils import store_plaintext\nfrom onyx.kg.models import KGException\nfrom onyx.kg.setup.kg_default_entity_definitions import (\n    populate_missing_default_entity_types__commit,\n)\nfrom onyx.prompts.chat_prompts import ADDITIONAL_CONTEXT_PROMPT\nfrom onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE\nfrom onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.timing import log_function_time\n\n\nlogger = setup_logger()\nIMAGE_GENERATION_TOOL_NAME = \"generate_image\"\n\n\nclass FileContextResult(BaseModel):\n    \"\"\"Result of building a file's LLM context representation.\"\"\"\n\n    message: ChatMessageSimple\n    tool_metadata: FileToolMetadata\n\n\ndef build_file_context(\n    tool_file_id: str,\n    filename: str,\n    file_type: ChatFileType,\n    content_text: str | None = None,\n    token_count: int = 0,\n    approx_char_count: int | None = None,\n) -> FileContextResult:\n    \"\"\"Build the LLM context representation for a single file.\n\n    Centralises how files should appear in the LLM prompt\n    — the ID that FileReaderTool accepts (``UserFile.id`` for user files).\n    \"\"\"\n    if file_type.use_metadata_only():\n        message_text = (\n            f\"File: {filename} (id={tool_file_id})\\n\"\n            \"Use the file_reader or python tools to access \"\n            \"this file's contents.\"\n        )\n        message = ChatMessageSimple(\n            message=message_text,\n            token_count=max(1, len(message_text) // 4),\n            message_type=MessageType.USER,\n            file_id=tool_file_id,\n        )\n    else:\n        message_text = f\"File: {filename}\\n{content_text or ''}\\nEnd of File\"\n        message = ChatMessageSimple(\n            message=message_text,\n            token_count=token_count,\n            message_type=MessageType.USER,\n            file_id=tool_file_id,\n        )\n\n    metadata = FileToolMetadata(\n        file_id=tool_file_id,\n        filename=filename,\n        approx_char_count=(\n            approx_char_count\n            if approx_char_count is not None\n            else len(content_text or \"\")\n        ),\n    )\n\n    return FileContextResult(message=message, tool_metadata=metadata)\n\n\ndef create_chat_session_from_request(\n    chat_session_request: ChatSessionCreationRequest,\n    user_id: UUID | None,\n    db_session: Session,\n) -> ChatSession:\n    \"\"\"Create a chat session from a ChatSessionCreationRequest.\n\n    Includes project ownership validation when project_id is provided.\n\n    Args:\n        chat_session_request: The request containing persona_id, description, and project_id\n        user_id: The ID of the user creating the session (can be None for anonymous)\n        db_session: The database session\n\n    Returns:\n        The newly created ChatSession\n\n    Raises:\n        ValueError: If user lacks access to the specified project\n        Exception: If the persona is invalid\n    \"\"\"\n    project_id = chat_session_request.project_id\n    if project_id:\n        if not check_project_ownership(project_id, user_id, db_session):\n            raise ValueError(\"User does not have access to project\")\n\n    return create_chat_session(\n        db_session=db_session,\n        description=chat_session_request.description or \"\",\n        user_id=user_id,\n        persona_id=chat_session_request.persona_id,\n        project_id=chat_session_request.project_id,\n    )\n\n\ndef create_chat_history_chain(\n    chat_session_id: UUID,\n    db_session: Session,\n    prefetch_top_two_level_tool_calls: bool = True,\n    # Optional id at which we finish processing\n    stop_at_message_id: int | None = None,\n) -> list[ChatMessage]:\n    \"\"\"Build the linear chain of messages without including the root message\"\"\"\n    mainline_messages: list[ChatMessage] = []\n\n    all_chat_messages = get_chat_messages_by_session(\n        chat_session_id=chat_session_id,\n        user_id=None,\n        db_session=db_session,\n        skip_permission_check=True,\n        prefetch_top_two_level_tool_calls=prefetch_top_two_level_tool_calls,\n    )\n\n    if not all_chat_messages:\n        root_message = get_or_create_root_message(\n            chat_session_id=chat_session_id, db_session=db_session\n        )\n    else:\n        root_message = all_chat_messages[0]\n        if root_message.parent_message is not None:\n            raise RuntimeError(\n                \"Invalid root message, unable to fetch valid chat message sequence\"\n            )\n\n    current_message: ChatMessage | None = root_message\n    previous_message: ChatMessage | None = None\n    while current_message is not None:\n        child_msg = current_message.latest_child_message\n\n        # Break if at the end of the chain\n        # or have reached the `final_id` of the submitted message\n        if not child_msg or (\n            stop_at_message_id and current_message.id == stop_at_message_id\n        ):\n            break\n        current_message = child_msg\n\n        if (\n            current_message.message_type == MessageType.ASSISTANT\n            and previous_message is not None\n            and previous_message.message_type == MessageType.ASSISTANT\n            and mainline_messages\n        ):\n            # Note that 2 user messages in a row is fine since this is often used for\n            # adding custom prompts and reminders\n            raise RuntimeError(\n                \"Invalid message chain, cannot have two assistant messages in a row\"\n            )\n        else:\n            mainline_messages.append(current_message)\n\n        previous_message = current_message\n\n    return mainline_messages\n\n\ndef reorganize_citations(\n    answer: str, citations: list[CitationInfo]\n) -> tuple[str, list[CitationInfo]]:\n    \"\"\"For a complete, citation-aware response, we want to reorganize the citations so that\n    they are in the order of the documents that were used in the response. This just looks nicer / avoids\n    confusion (\"Why is there [7] when only 2 documents are cited?\").\"\"\"\n\n    # Regular expression to find all instances of [[x]](LINK)\n    pattern = r\"\\[\\[(.*?)\\]\\]\\((.*?)\\)\"\n\n    all_citation_matches = re.findall(pattern, answer)\n\n    new_citation_info: dict[int, CitationInfo] = {}\n    for citation_match in all_citation_matches:\n        try:\n            citation_num = int(citation_match[0])\n            if citation_num in new_citation_info:\n                continue\n\n            matching_citation = next(\n                iter([c for c in citations if c.citation_number == int(citation_num)]),\n                None,\n            )\n            if matching_citation is None:\n                continue\n\n            new_citation_info[citation_num] = CitationInfo(\n                citation_number=len(new_citation_info) + 1,\n                document_id=matching_citation.document_id,\n            )\n        except Exception:\n            pass\n\n    # Function to replace citations with their new number\n    def slack_link_format(match: re.Match) -> str:\n        link_text = match.group(1)\n        try:\n            citation_num = int(link_text)\n            if citation_num in new_citation_info:\n                link_text = new_citation_info[citation_num].citation_number\n        except Exception:\n            pass\n\n        link_url = match.group(2)\n        return f\"[[{link_text}]]({link_url})\"\n\n    # Substitute all matches in the input text\n    new_answer = re.sub(pattern, slack_link_format, answer)\n\n    # if any citations weren't parsable, just add them back to be safe\n    for citation in citations:\n        if citation.citation_number not in new_citation_info:\n            new_citation_info[citation.citation_number] = citation\n\n    return new_answer, list(new_citation_info.values())\n\n\ndef build_citation_map_from_infos(\n    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]\n) -> dict[int, int]:\n    \"\"\"Translate a list of streaming CitationInfo objects into a mapping of\n    citation number -> saved search doc DB id.\n\n    Always cites the first instance of a document_id and assumes db_docs are\n    ordered as shown to the user (display order).\n    \"\"\"\n    doc_id_to_saved_doc_id_map: dict[str, int] = {}\n    for db_doc in db_docs:\n        if db_doc.document_id not in doc_id_to_saved_doc_id_map:\n            doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id\n\n    citation_to_saved_doc_id_map: dict[int, int] = {}\n    for citation in citations_list:\n        if citation.citation_number not in citation_to_saved_doc_id_map:\n            saved_id = doc_id_to_saved_doc_id_map.get(citation.document_id)\n            if saved_id is not None:\n                citation_to_saved_doc_id_map[citation.citation_number] = saved_id\n\n    return citation_to_saved_doc_id_map\n\n\ndef build_citation_map_from_numbers(\n    cited_numbers: list[int] | set[int], db_docs: list[DbSearchDoc]\n) -> dict[int, int]:\n    \"\"\"Translate parsed citation numbers (e.g., from [[n]]) into a mapping of\n    citation number -> saved search doc DB id by positional index.\n    \"\"\"\n    citation_to_saved_doc_id_map: dict[int, int] = {}\n    for num in sorted(set(cited_numbers)):\n        idx = num - 1\n        if 0 <= idx < len(db_docs):\n            citation_to_saved_doc_id_map[num] = db_docs[idx].id\n\n    return citation_to_saved_doc_id_map\n\n\ndef extract_headers(\n    headers: dict[str, str] | Headers, pass_through_headers: list[str] | None\n) -> dict[str, str]:\n    \"\"\"\n    Extract headers specified in pass_through_headers from input headers.\n    Handles both dict and FastAPI Headers objects, accounting for lowercase keys.\n\n    Args:\n        headers: Input headers as dict or Headers object.\n\n    Returns:\n        dict: Filtered headers based on pass_through_headers.\n    \"\"\"\n    if not pass_through_headers:\n        return {}\n\n    extracted_headers: dict[str, str] = {}\n    for key in pass_through_headers:\n        if key in headers:\n            extracted_headers[key] = headers[key]\n        else:\n            # fastapi makes all header keys lowercase, handling that here\n            lowercase_key = key.lower()\n            if lowercase_key in headers:\n                extracted_headers[lowercase_key] = headers[lowercase_key]\n    return extracted_headers\n\n\ndef process_kg_commands(\n    message: str,\n    persona_name: str,\n    tenant_id: str,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    # Temporarily, until we have a draft UI for the KG Operations/Management\n    # TODO: move to api endpoint once we get frontend\n    if not persona_name.startswith(TMP_DRALPHA_PERSONA_NAME):\n        return\n\n    kg_config_settings = get_kg_config_settings()\n    if not is_kg_config_settings_enabled_valid(kg_config_settings):\n        return\n\n    if message == \"kg_setup\":\n        populate_missing_default_entity_types__commit(db_session=db_session)\n        raise KGException(\"KG setup done\")\n\n\ndef _get_or_extract_plaintext(\n    file_id: str,\n    extract_fn: Callable[[], str],\n) -> str:\n    \"\"\"Load cached plaintext for a file, or extract and store it.\n\n    Tries to read pre-stored plaintext from the file store.  On a miss,\n    calls extract_fn to produce the text, then stores the result so\n    future calls skip the expensive extraction.\n    \"\"\"\n    file_store = get_default_file_store()\n    plaintext_key = plaintext_file_name_for_id(file_id)\n\n    # Try cached plaintext first.\n    try:\n        plaintext_io = file_store.read_file(plaintext_key, mode=\"b\")\n        return plaintext_io.read().decode(\"utf-8\")\n    except Exception:\n        logger.exception(f\"Error when reading file, id={file_id}\")\n\n    # Cache miss — extract and store.\n    content_text = extract_fn()\n    if content_text:\n        store_plaintext(file_id, content_text)\n    return content_text\n\n\n@log_function_time(print_only=True)\ndef load_chat_file(\n    file_descriptor: FileDescriptor, db_session: Session\n) -> ChatLoadedFile:\n    file_io = get_default_file_store().read_file(file_descriptor[\"id\"], mode=\"b\")\n    content = file_io.read()\n\n    # Extract text content if it's a text file type (not an image)\n    content_text = None\n    # `FileDescriptor` is often JSON-roundtripped (e.g. JSONB / API), so `type`\n    # may arrive as a raw string value instead of a `ChatFileType`.\n    file_type = ChatFileType(file_descriptor[\"type\"])\n\n    if file_type.is_text_file():\n        file_id = file_descriptor[\"id\"]\n\n        def _extract() -> str:\n            return extract_file_text(\n                file=file_io,\n                file_name=file_descriptor.get(\"name\") or \"\",\n                break_on_unprocessable=False,\n            )\n\n        # Use the user_file_id as cache key when available (matches what\n        # the celery indexing worker stores), otherwise fall back to the\n        # file store id (covers code-interpreter-generated files, etc.).\n        user_file_id_str = file_descriptor.get(\"user_file_id\")\n        cache_key = user_file_id_str or file_id\n\n        try:\n            content_text = _get_or_extract_plaintext(cache_key, _extract)\n        except Exception as e:\n            logger.warning(\n                f\"Failed to retrieve content for file {file_descriptor['id']}: {str(e)}\"\n            )\n\n    # Get token count from UserFile if available\n    token_count = 0\n    user_file_id_str = file_descriptor.get(\"user_file_id\")\n    if user_file_id_str:\n        try:\n            user_file_id = UUID(user_file_id_str)\n            user_file = (\n                db_session.query(UserFile).filter(UserFile.id == user_file_id).first()\n            )\n            if user_file and user_file.token_count:\n                token_count = user_file.token_count\n        except (ValueError, TypeError) as e:\n            logger.warning(\n                f\"Failed to get token count for file {file_descriptor['id']}: {e}\"\n            )\n\n    return ChatLoadedFile(\n        file_id=file_descriptor[\"id\"],\n        content=content,\n        file_type=file_type,\n        filename=file_descriptor.get(\"name\"),\n        content_text=content_text,\n        token_count=token_count,\n    )\n\n\ndef load_all_chat_files(\n    chat_messages: list[ChatMessage],\n    db_session: Session,\n) -> list[ChatLoadedFile]:\n    # TODO There is likely a more efficient/standard way to load the files here.\n    file_descriptors_for_history: list[FileDescriptor] = []\n    for chat_message in chat_messages:\n        if chat_message.files:\n            file_descriptors_for_history.extend(chat_message.files)\n\n    files = cast(\n        list[ChatLoadedFile],\n        run_functions_tuples_in_parallel(\n            [\n                (load_chat_file, (file, db_session))\n                for file in file_descriptors_for_history\n            ]\n        ),\n    )\n    return files\n\n\ndef convert_chat_history_basic(\n    chat_history: list[ChatMessage],\n    token_counter: Callable[[str], int],\n    max_individual_message_tokens: int | None = None,\n    max_total_tokens: int | None = None,\n) -> list[ChatMessageSimple]:\n    \"\"\"Convert ChatMessage history to ChatMessageSimple format with no tool calls or files included.\n\n    Args:\n        chat_history: List of ChatMessage objects to convert\n        token_counter: Function to count tokens in a message string\n        max_individual_message_tokens: If set, messages exceeding this number of tokens are dropped.\n            If None, no messages are dropped based on individual token count.\n        max_total_tokens: If set, maximum number of tokens allowed for the entire history.\n            If None, the history is not trimmed based on total token count.\n\n    Returns:\n        List of ChatMessageSimple objects\n    \"\"\"\n    # Defensive: treat a non-positive total budget as \"no history\".\n    if max_total_tokens is not None and max_total_tokens <= 0:\n        return []\n\n    # Convert only the core USER/ASSISTANT messages; omit files and tool calls.\n    converted: list[ChatMessageSimple] = []\n    for chat_message in chat_history:\n        if chat_message.message_type not in (MessageType.USER, MessageType.ASSISTANT):\n            continue\n\n        message = chat_message.message or \"\"\n        token_count = getattr(chat_message, \"token_count\", None)\n        if token_count is None:\n            token_count = token_counter(message)\n\n        # Drop any single message that would dominate the context window.\n        if (\n            max_individual_message_tokens is not None\n            and token_count > max_individual_message_tokens\n        ):\n            continue\n\n        converted.append(\n            ChatMessageSimple(\n                message=message,\n                token_count=token_count,\n                message_type=chat_message.message_type,\n                image_files=None,\n            )\n        )\n\n    if max_total_tokens is None:\n        return converted\n\n    # Enforce a max total budget by keeping a contiguous suffix of the conversation.\n    trimmed_reversed: list[ChatMessageSimple] = []\n    total_tokens = 0\n    for msg in reversed(converted):\n        if total_tokens + msg.token_count > max_total_tokens:\n            break\n        trimmed_reversed.append(msg)\n        total_tokens += msg.token_count\n\n    return list(reversed(trimmed_reversed))\n\n\ndef _build_tool_call_response_history_message(\n    tool_name: str,\n    generated_images: list[dict] | None,\n    tool_call_response: str | None,\n) -> str:\n    if tool_name != IMAGE_GENERATION_TOOL_NAME:\n        return TOOL_CALL_RESPONSE_CROSS_MESSAGE\n\n    if generated_images:\n        llm_image_context: list[dict[str, str]] = []\n        for image in generated_images:\n            file_id = image.get(\"file_id\")\n            revised_prompt = image.get(\"revised_prompt\")\n            if not isinstance(file_id, str):\n                continue\n\n            llm_image_context.append(\n                {\n                    \"file_id\": file_id,\n                    \"revised_prompt\": (\n                        revised_prompt if isinstance(revised_prompt, str) else \"\"\n                    ),\n                }\n            )\n\n        if llm_image_context:\n            return json.dumps(llm_image_context)\n\n    if tool_call_response:\n        return tool_call_response\n\n    return TOOL_CALL_RESPONSE_CROSS_MESSAGE\n\n\ndef convert_chat_history(\n    chat_history: list[ChatMessage],\n    files: list[ChatLoadedFile],\n    context_image_files: list[ChatLoadedFile],\n    additional_context: str | None,\n    token_counter: Callable[[str], int],\n    tool_id_to_name_map: dict[int, str],\n) -> ChatHistoryResult:\n    \"\"\"Convert ChatMessage history to ChatMessageSimple format.\n\n    For user messages: includes attached files (images attached to message, text files as separate messages)\n    For assistant messages with tool calls: creates ONE ASSISTANT message with tool_calls array,\n        followed by N TOOL_CALL_RESPONSE messages (OpenAI parallel tool calling format)\n    For assistant messages without tool calls: creates a simple ASSISTANT message\n\n    Every injected text-file message is tagged with ``file_id`` and its\n    metadata is collected in ``ChatHistoryResult.all_injected_file_metadata``.\n    After context-window truncation, callers compare surviving ``file_id`` tags\n    against this map to discover \"forgotten\" files and provide their metadata\n    to the FileReaderTool.\n    \"\"\"\n    simple_messages: list[ChatMessageSimple] = []\n    all_injected_file_metadata: dict[str, FileToolMetadata] = {}\n\n    # Create a mapping of file IDs to loaded files for quick lookup\n    file_map = {str(f.file_id): f for f in files}\n\n    # Find the index of the last USER message\n    last_user_message_idx = None\n    for i in range(len(chat_history) - 1, -1, -1):\n        if chat_history[i].message_type == MessageType.USER:\n            last_user_message_idx = i\n            break\n\n    for idx, chat_message in enumerate(chat_history):\n        if chat_message.message_type == MessageType.USER:\n            # Process files attached to this message\n            text_files: list[tuple[ChatLoadedFile, FileDescriptor]] = []\n            image_files: list[ChatLoadedFile] = []\n\n            if chat_message.files:\n                for file_descriptor in chat_message.files:\n                    file_id = file_descriptor[\"id\"]\n                    loaded_file = file_map.get(file_id)\n                    if loaded_file:\n                        if loaded_file.file_type == ChatFileType.IMAGE:\n                            image_files.append(loaded_file)\n                        else:\n                            # Text files (DOC, PLAIN_TEXT, TABULAR) are added as separate messages\n                            text_files.append((loaded_file, file_descriptor))\n\n            # Add text files as separate messages before the user message.\n            # Each message is tagged with ``file_id`` so that forgotten files\n            # can be detected after context-window truncation.\n            for text_file, fd in text_files:\n                # Use user_file_id as the FileReaderTool accepts that.\n                # Fall back to the file-store path id.\n                tool_id = fd.get(\"user_file_id\") or text_file.file_id\n                filename = text_file.filename or \"unknown\"\n                ctx = build_file_context(\n                    tool_file_id=tool_id,\n                    filename=filename,\n                    file_type=text_file.file_type,\n                    content_text=text_file.content_text,\n                    token_count=text_file.token_count,\n                )\n                simple_messages.append(ctx.message)\n                all_injected_file_metadata[tool_id] = ctx.tool_metadata\n\n            # Sum token counts from image files (excluding project image files)\n            image_token_count = (\n                sum(img.token_count for img in image_files) if image_files else 0\n            )\n\n            # Add the user message with image files attached\n            # If this is the last USER message, also include context_image_files\n            # Note: context image file tokens are NOT counted in the token count\n            if idx == last_user_message_idx:\n                if context_image_files:\n                    image_files.extend(context_image_files)\n\n                if additional_context:\n                    simple_messages.append(\n                        ChatMessageSimple(\n                            message=ADDITIONAL_CONTEXT_PROMPT.format(\n                                additional_context=additional_context\n                            ),\n                            token_count=token_counter(additional_context),\n                            message_type=MessageType.USER,\n                            image_files=None,\n                        )\n                    )\n\n            simple_messages.append(\n                ChatMessageSimple(\n                    message=chat_message.message,\n                    token_count=chat_message.token_count + image_token_count,\n                    message_type=MessageType.USER,\n                    image_files=image_files if image_files else None,\n                )\n            )\n\n        elif chat_message.message_type == MessageType.ASSISTANT:\n            # Handle tool calls if present using OpenAI parallel tool calling format:\n            # 1. Group tool calls by turn_number\n            # 2. For each turn: ONE ASSISTANT message with tool_calls array\n            # 3. Followed by N TOOL_CALL_RESPONSE messages (one per tool call)\n            if chat_message.tool_calls:\n                # Group tool calls by turn number\n                tool_calls_by_turn: dict[int, list] = {}\n                for tool_call in chat_message.tool_calls:\n                    if tool_call.turn_number not in tool_calls_by_turn:\n                        tool_calls_by_turn[tool_call.turn_number] = []\n                    tool_calls_by_turn[tool_call.turn_number].append(tool_call)\n\n                # Sort turns and process each turn\n                for turn_number in sorted(tool_calls_by_turn.keys()):\n                    turn_tool_calls = tool_calls_by_turn[turn_number]\n                    # Sort by tool_id within the turn for consistent ordering\n                    turn_tool_calls.sort(key=lambda tc: tc.tool_id)\n\n                    # Build ToolCallSimple list for this turn\n                    tool_calls_simple: list[ToolCallSimple] = []\n                    for tool_call in turn_tool_calls:\n                        tool_name = tool_id_to_name_map.get(\n                            tool_call.tool_id, \"unknown\"\n                        )\n                        tool_calls_simple.append(\n                            ToolCallSimple(\n                                tool_call_id=tool_call.tool_call_id,\n                                tool_name=tool_name,\n                                tool_arguments=tool_call.tool_call_arguments or {},\n                                token_count=tool_call.tool_call_tokens,\n                            )\n                        )\n\n                    # Create ONE ASSISTANT message with all tool calls for this turn\n                    total_tool_call_tokens = sum(\n                        tc.token_count for tc in tool_calls_simple\n                    )\n                    simple_messages.append(\n                        ChatMessageSimple(\n                            message=\"\",  # No text content when making tool calls\n                            token_count=total_tool_call_tokens,\n                            message_type=MessageType.ASSISTANT,\n                            tool_calls=tool_calls_simple,\n                            image_files=None,\n                        )\n                    )\n\n                    # Add TOOL_CALL_RESPONSE messages for each tool call in this turn\n                    for tool_call in turn_tool_calls:\n                        tool_name = tool_id_to_name_map.get(\n                            tool_call.tool_id, \"unknown\"\n                        )\n                        tool_response_message = (\n                            _build_tool_call_response_history_message(\n                                tool_name=tool_name,\n                                generated_images=tool_call.generated_images,\n                                tool_call_response=tool_call.tool_call_response,\n                            )\n                        )\n                        simple_messages.append(\n                            ChatMessageSimple(\n                                message=tool_response_message,\n                                token_count=(\n                                    token_counter(tool_response_message)\n                                    if tool_name == IMAGE_GENERATION_TOOL_NAME\n                                    else 20\n                                ),\n                                message_type=MessageType.TOOL_CALL_RESPONSE,\n                                tool_call_id=tool_call.tool_call_id,\n                                image_files=None,\n                            )\n                        )\n\n            # Add the assistant message itself (the final answer)\n            simple_messages.append(\n                ChatMessageSimple(\n                    message=chat_message.message,\n                    token_count=chat_message.token_count,\n                    message_type=MessageType.ASSISTANT,\n                    image_files=None,\n                )\n            )\n        else:\n            raise ValueError(\n                f\"Invalid message type when constructing simple history: {chat_message.message_type}\"\n            )\n\n    return ChatHistoryResult(\n        simple_messages=simple_messages,\n        all_injected_file_metadata=all_injected_file_metadata,\n    )\n\n\ndef get_custom_agent_prompt(persona: Persona, chat_session: ChatSession) -> str | None:\n    \"\"\"Get the custom agent prompt from persona or project instructions. If it's replacing the base system prompt,\n    it does not count as a custom agent prompt (logic exists later also to drop it in this case).\n\n    Chat Sessions in Projects that are using a custom agent will retain the custom agent prompt.\n    Priority: persona.system_prompt (if not default Agent) > chat_session.project.instructions\n\n    # NOTE: Logic elsewhere allows saving empty strings for potentially other purposes but for constructing the prompts\n    # we never want to return an empty string for a prompt so it's translated into an explicit None.\n\n    Args:\n        persona: The Persona object\n        chat_session: The ChatSession object\n\n    Returns:\n        The prompt to use for the custom Agent part of the prompt.\n    \"\"\"\n    # If using a custom Agent, always respect its prompt, even if in a Project, and even if it's an empty custom prompt.\n    if persona.id != DEFAULT_PERSONA_ID:\n        # Logic exists later also to drop it in this case but this is strictly correct anyhow.\n        if persona.replace_base_system_prompt:\n            return None\n        return persona.system_prompt or None\n\n    # If in a project and using the default Agent, respect the project instructions.\n    if chat_session.project and chat_session.project.instructions:\n        return chat_session.project.instructions\n\n    return None\n\n\ndef is_last_assistant_message_clarification(chat_history: list[ChatMessage]) -> bool:\n    \"\"\"Check if the last assistant message in chat history was a clarification question.\n\n    This is used in the deep research flow to determine whether to skip the\n    clarification step when the user has already responded to a clarification.\n\n    Args:\n        chat_history: List of ChatMessage objects in chronological order\n\n    Returns:\n        True if the last assistant message has is_clarification=True, False otherwise\n    \"\"\"\n    for message in reversed(chat_history):\n        if message.message_type == MessageType.ASSISTANT:\n            return message.is_clarification\n    return False\n\n\ndef create_tool_call_failure_messages(\n    tool_calls: list[ToolCallKickoff], token_counter: Callable[[str], int]\n) -> list[ChatMessageSimple]:\n    \"\"\"Create ChatMessageSimple objects for failed tool calls.\n\n    Creates messages using OpenAI parallel tool calling format:\n    1. An ASSISTANT message with tool_calls field containing all failed tool calls\n    2. A TOOL_CALL_RESPONSE failure message for each tool call\n\n    Args:\n        tool_calls: List of ToolCallKickoff objects representing the failed tool calls\n        token_counter: Function to count tokens in a message string\n\n    Returns:\n        List containing ChatMessageSimple objects: one assistant message with all tool calls\n        followed by a failure response for each tool call\n    \"\"\"\n    if not tool_calls:\n        return []\n\n    # Create ToolCallSimple for each failed tool call\n    tool_calls_simple: list[ToolCallSimple] = []\n    for tool_call in tool_calls:\n        tool_call_token_count = token_counter(tool_call.to_msg_str())\n        tool_calls_simple.append(\n            ToolCallSimple(\n                tool_call_id=tool_call.tool_call_id,\n                tool_name=tool_call.tool_name,\n                tool_arguments=tool_call.tool_args,\n                token_count=tool_call_token_count,\n            )\n        )\n\n    total_token_count = sum(tc.token_count for tc in tool_calls_simple)\n\n    # Create ONE ASSISTANT message with all tool_calls (OpenAI format)\n    assistant_msg = ChatMessageSimple(\n        message=\"\",  # No text content when making tool calls\n        token_count=total_token_count,\n        message_type=MessageType.ASSISTANT,\n        tool_calls=tool_calls_simple,\n        image_files=None,\n    )\n\n    messages: list[ChatMessageSimple] = [assistant_msg]\n\n    # Create a TOOL_CALL_RESPONSE failure message for each tool call\n    for tool_call in tool_calls:\n        failure_response_msg = ChatMessageSimple(\n            message=TOOL_CALL_FAILURE_PROMPT,\n            token_count=50,  # Tiny overestimate\n            message_type=MessageType.TOOL_CALL_RESPONSE,\n            tool_call_id=tool_call.tool_call_id,\n            image_files=None,\n        )\n        messages.append(failure_response_msg)\n\n    return messages\n"
  },
  {
    "path": "backend/onyx/chat/citation_processor.py",
    "content": "\"\"\"\nDynamic Citation Processor for LLM Responses\n\nThis module provides a citation processor that can:\n- Accept citation number to SearchDoc mappings dynamically\n- Process token streams from LLMs to extract citations\n- Handle citations in three modes: REMOVE, KEEP_MARKERS, or HYPERLINK\n- Emit CitationInfo objects for detected citations (in HYPERLINK mode)\n- Track all seen citations regardless of mode\n- Maintain a list of cited documents in order of first citation\n\"\"\"\n\nimport re\nfrom collections.abc import Generator\nfrom enum import Enum\nfrom typing import TypeAlias\n\nfrom onyx.configs.chat_configs import STOP_STREAM_PAT\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.prompts.constants import TRIPLE_BACKTICK\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass CitationMode(Enum):\n    \"\"\"Defines how citations should be handled in the output.\n\n    REMOVE: Citations are completely removed from output text.\n            No CitationInfo objects are emitted.\n            Use case: When you need to remove citations from the output if they are not shared with the user\n            (e.g. in discord bot, public slack bot).\n\n    KEEP_MARKERS: Original citation markers like [1], [2] are preserved unchanged.\n                  No CitationInfo objects are emitted.\n                  Use case: When you need to track citations in research agent and later process\n                  them with collapse_citations() to renumber.\n\n    HYPERLINK: Citations are replaced with markdown links like [[1]](url).\n               CitationInfo objects are emitted for UI tracking.\n               Use case: Final reports shown to users with clickable links.\n    \"\"\"\n\n    REMOVE = \"remove\"\n    KEEP_MARKERS = \"keep_markers\"\n    HYPERLINK = \"hyperlink\"\n\n\nCitationMapping: TypeAlias = dict[int, SearchDoc]\n\n\n# ============================================================================\n# Utility functions\n# ============================================================================\n\n\ndef in_code_block(llm_text: str) -> bool:\n    \"\"\"Check if we're currently inside a code block by counting triple backticks.\"\"\"\n    count = llm_text.count(TRIPLE_BACKTICK)\n    return count % 2 != 0\n\n\n# ============================================================================\n# Main Citation Processor with Dynamic Mapping\n# ============================================================================\n\n\nclass DynamicCitationProcessor:\n    \"\"\"\n    A citation processor that accepts dynamic citation mappings.\n\n    This processor is designed for multi-turn conversations where the citation\n    number to document mapping is provided externally. It processes streaming\n    tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and handles\n    them according to the configured CitationMode:\n\n    CitationMode.HYPERLINK (default):\n        1. Replaces citation markers with formatted markdown links (e.g., [[1]](url))\n        2. Emits CitationInfo objects for tracking\n        3. Maintains the order in which documents were first cited\n        Use case: Final reports shown to users with clickable links.\n\n    CitationMode.KEEP_MARKERS:\n        1. Preserves original citation markers like [1], [2] unchanged\n        2. Does NOT emit CitationInfo objects\n        3. Still tracks all seen citations via get_seen_citations()\n        Use case: When citations need later processing (e.g., renumbering).\n\n    CitationMode.REMOVE:\n        1. Removes citation markers entirely from the output text\n        2. Does NOT emit CitationInfo objects\n        3. Still tracks all seen citations via get_seen_citations()\n        Use case: Research agent intermediate reports.\n\n    Features:\n        - Accepts citation number → SearchDoc mapping via update_citation_mapping()\n        - Configurable citation mode at initialization\n        - Always tracks seen citations regardless of mode\n        - Holds back tokens that might be partial citations\n        - Maintains list of cited SearchDocs in order of first citation\n        - Handles unicode bracket variants (【】, ［］)\n        - Skips citation processing inside code blocks\n\n    Example (HYPERLINK mode - default):\n        processor = DynamicCitationProcessor()\n\n        # Set up citation mapping\n        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})\n\n        # Process tokens from LLM\n        for token in llm_stream:\n            for result in processor.process_token(token):\n                if isinstance(result, str):\n                    print(result)  # Display text with [[1]](url) format\n                elif isinstance(result, CitationInfo):\n                    handle_citation(result)  # Track citation\n\n        # Get cited documents at the end\n        cited_docs = processor.get_cited_documents()\n\n    Example (KEEP_MARKERS mode):\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})\n\n        # Process tokens from LLM\n        for token in llm_stream:\n            for result in processor.process_token(token):\n                # Only strings are yielded, no CitationInfo objects\n                print(result)  # Display text with original [1] format preserved\n\n        # Get all seen citations after processing\n        seen_citations = processor.get_seen_citations()  # {1: search_doc1, ...}\n\n    Example (REMOVE mode):\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: search_doc1, 2: search_doc2})\n\n        # Process tokens - citations are removed but tracked\n        for token in llm_stream:\n            for result in processor.process_token(token):\n                print(result)  # Text without any citation markers\n\n        # Citations are still tracked\n        seen_citations = processor.get_seen_citations()\n    \"\"\"\n\n    def __init__(\n        self,\n        citation_mode: CitationMode = CitationMode.HYPERLINK,\n        stop_stream: str | None = STOP_STREAM_PAT,\n    ):\n        \"\"\"\n        Initialize the citation processor.\n\n        Args:\n            citation_mode: How to handle citations in the output. One of:\n                - CitationMode.HYPERLINK (default): Replace [1] with [[1]](url)\n                  and emit CitationInfo objects.\n                - CitationMode.KEEP_MARKERS: Keep original [1] markers unchanged,\n                  no CitationInfo objects emitted.\n                - CitationMode.REMOVE: Remove citations entirely from output,\n                  no CitationInfo objects emitted.\n                All modes track seen citations via get_seen_citations().\n            stop_stream: Optional stop token pattern to halt processing early.\n                When this pattern is detected in the token stream, processing stops.\n                Defaults to STOP_STREAM_PAT from chat configs.\n        \"\"\"\n\n        # Citation mapping from citation number to SearchDoc\n        self.citation_to_doc: CitationMapping = {}\n        self.seen_citations: CitationMapping = {}  # citation num -> SearchDoc\n\n        # Token processing state\n        self.llm_out = \"\"  # entire output so far\n        self.curr_segment = \"\"  # tokens held for citation processing\n        self.hold = \"\"  # tokens held for stop token processing\n        self.stop_stream = stop_stream\n        self.citation_mode = citation_mode\n\n        # Citation tracking\n        self.cited_documents_in_order: list[SearchDoc] = (\n            []\n        )  # SearchDocs in citation order\n        self.cited_document_ids: set[str] = set()  # all cited document_ids\n        self.recent_cited_documents: set[str] = (\n            set()\n        )  # recently cited (for deduplication)\n        self.non_citation_count = 0\n\n        # Citation patterns\n        # Matches potential incomplete citations: '[', '[[', '[1', '[[1', '[1,', '[1, ', etc.\n        # Also matches unicode bracket variants: 【, ［\n        self.possible_citation_pattern = re.compile(r\"([\\[【［]+(?:\\d+,? ?)*$)\")\n\n        # Matches complete citations:\n        # group 1: '[[1]]', [[2]], etc. (also matches 【【1】】, ［［1］］, 【1】, ［1］)\n        # group 2: '[1]', '[1, 2]', '[1,2,16]', etc. (also matches unicode variants)\n        self.citation_pattern = re.compile(\n            r\"([\\[【［]{2}\\d+[\\]】］]{2})|([\\[【［]\\d+(?:, ?\\d+)*[\\]】］])\"\n        )\n\n    def update_citation_mapping(\n        self,\n        citation_mapping: CitationMapping,\n        update_duplicate_keys: bool = False,\n    ) -> None:\n        \"\"\"\n        Update the citation number to SearchDoc mapping.\n\n        This can be called multiple times to add or update mappings. New mappings\n        will be merged with existing ones.\n\n        Args:\n            citation_mapping: Dictionary mapping citation numbers (1, 2, 3, ...) to SearchDoc objects\n            update_duplicate_keys: If True, update existing mappings with new values when keys overlap.\n                If False (default), filter out duplicate keys and only add non-duplicates.\n                The default behavior is useful when OpenURL may have the same citation number as a\n                Web Search result - in those cases, we keep the web search citation and snippet etc.\n        \"\"\"\n        if update_duplicate_keys:\n            # Update all mappings, including duplicates\n            self.citation_to_doc.update(citation_mapping)\n        else:\n            # Filter out duplicate keys and only add non-duplicates\n            # Reason for this is that OpenURL may have the same citation number as a Web Search result\n            # For those, we should just keep the web search citation and snippet etc.\n            duplicate_keys = set(citation_mapping.keys()) & set(\n                self.citation_to_doc.keys()\n            )\n            non_duplicate_mapping = {\n                k: v for k, v in citation_mapping.items() if k not in duplicate_keys\n            }\n            self.citation_to_doc.update(non_duplicate_mapping)\n\n    def process_token(\n        self, token: str | None\n    ) -> Generator[str | CitationInfo, None, None]:\n        \"\"\"\n        Process a token from the LLM stream.\n\n        This method:\n        1. Accumulates tokens until a complete citation or non-citation is found\n        2. Holds back potential partial citations (e.g., \"[\", \"[1\")\n        3. Yields text chunks when they're safe to display\n        4. Handles code blocks (avoids processing citations inside code)\n        5. Handles stop tokens\n        6. Always tracks seen citations in self.seen_citations\n\n        Behavior depends on the `citation_mode` setting from __init__:\n        - HYPERLINK: Citations are replaced with [[n]](url) format and CitationInfo\n          objects are yielded before each formatted citation\n        - KEEP_MARKERS: Original citation markers like [1] are preserved unchanged,\n          no CitationInfo objects are yielded\n        - REMOVE: Citations are removed entirely from output,\n          no CitationInfo objects are yielded\n\n        Args:\n            token: The next token from the LLM stream, or None to signal end of stream.\n                Pass None to flush any remaining buffered text at end of stream.\n\n        Yields:\n            str: Text chunks to display. Citation format depends on citation_mode.\n            CitationInfo: Citation metadata (only when citation_mode=HYPERLINK)\n        \"\"\"\n        # None -> end of stream, flush remaining segment\n        if token is None:\n            if self.curr_segment:\n                yield self.curr_segment\n            return\n\n        # Handle stop stream token\n        if self.stop_stream:\n            next_hold = self.hold + token\n            if self.stop_stream in next_hold:\n                # Extract text before the stop pattern\n                stop_pos = next_hold.find(self.stop_stream)\n                text_before_stop = next_hold[:stop_pos]\n                # Process the text before stop pattern if any exists\n                if text_before_stop:\n                    # Process text_before_stop through normal flow\n                    self.hold = \"\"\n                    token = text_before_stop\n                    # Continue to normal processing below\n                else:\n                    # Stop pattern at the beginning, nothing to yield\n                    return\n            elif next_hold == self.stop_stream[: len(next_hold)]:\n                self.hold = next_hold\n                return\n            else:\n                token = next_hold\n                self.hold = \"\"\n\n        self.curr_segment += token\n        self.llm_out += token\n\n        # Handle code blocks without language tags\n        # If we see ``` followed by \\n, add \"plaintext\" language specifier\n        if \"`\" in self.curr_segment:\n            if self.curr_segment.endswith(\"`\"):\n                pass\n            elif \"```\" in self.curr_segment:\n                parts = self.curr_segment.split(\"```\")\n                if len(parts) > 1 and len(parts[1]) > 0:\n                    piece_that_comes_after = parts[1][0]\n                    if piece_that_comes_after == \"\\n\" and in_code_block(self.llm_out):\n                        self.curr_segment = self.curr_segment.replace(\n                            \"```\", \"```plaintext\"\n                        )\n\n        # Look for citations in current segment\n        citation_matches = list(self.citation_pattern.finditer(self.curr_segment))\n        possible_citation_found = bool(\n            re.search(self.possible_citation_pattern, self.curr_segment)\n        )\n\n        result = \"\"\n        if citation_matches and not in_code_block(self.llm_out):\n            match_idx = 0\n            for match in citation_matches:\n                match_span = match.span()\n\n                # Get text before/between citations\n                intermatch_str = self.curr_segment[match_idx : match_span[0]]\n                self.non_citation_count += len(intermatch_str)\n                match_idx = match_span[1]\n\n                # Check if there is already a space before this citation\n                if intermatch_str:\n                    has_leading_space = intermatch_str[-1].isspace()\n                else:\n                    # No text between citations (consecutive citations)\n                    # If match_idx > 0, we've already processed a citation, so don't add space\n                    if match_idx > 0:\n                        # Consecutive citations - don't add space between them\n                        has_leading_space = True\n                    else:\n                        # Citation at start of segment - check if previous output has space\n                        segment_start_idx = len(self.llm_out) - len(self.curr_segment)\n                        if segment_start_idx > 0:\n                            has_leading_space = self.llm_out[\n                                segment_start_idx - 1\n                            ].isspace()\n                        else:\n                            has_leading_space = False\n\n                # Reset recent citations if no citations found for a while\n                if self.non_citation_count > 5:\n                    self.recent_cited_documents.clear()\n\n                # Process the citation (returns formatted citation text and CitationInfo objects)\n                # Always tracks seen citations regardless of citation_mode\n                citation_text, citation_info_list = self._process_citation(\n                    match, has_leading_space\n                )\n\n                if self.citation_mode == CitationMode.HYPERLINK:\n                    # HYPERLINK mode: Replace citations with markdown links [[n]](url)\n                    # Yield text before citation FIRST (preserve order)\n                    if intermatch_str:\n                        yield intermatch_str\n                    # Yield CitationInfo objects BEFORE the citation text\n                    # This allows the frontend to receive citation metadata before the token\n                    # that contains [[n]](link), enabling immediate rendering\n                    for citation in citation_info_list:\n                        yield citation\n                    # Then yield the formatted citation text\n                    if citation_text:\n                        yield citation_text\n\n                elif self.citation_mode == CitationMode.KEEP_MARKERS:\n                    # KEEP_MARKERS mode: Preserve original citation markers unchanged\n                    # Yield text before citation\n                    if intermatch_str:\n                        yield intermatch_str\n                    # Yield the original citation marker as-is\n                    yield match.group()\n\n                else:  # CitationMode.REMOVE\n                    # REMOVE mode: Remove citations entirely from output\n                    # This strips citation markers like [1], [2], 【1】 from the output text\n                    # When removing citations, we need to handle spacing to avoid issues like:\n                    # - \"text [1] more\" -> \"text  more\" (double space)\n                    # - \"text [1].\" -> \"text .\" (space before punctuation)\n                    if intermatch_str:\n                        remaining_text = self.curr_segment[match_span[1] :]\n                        # Strip trailing space from intermatch if:\n                        # 1. Remaining text starts with space (avoids double space)\n                        # 2. Remaining text starts with punctuation (avoids space before punctuation)\n                        if intermatch_str[-1].isspace() and remaining_text:\n                            first_char = remaining_text[0]\n                            # Check if next char is space or common punctuation\n                            if first_char.isspace() or first_char in \".,;:!?)]}\":\n                                intermatch_str = intermatch_str.rstrip()\n                        if intermatch_str:\n                            yield intermatch_str\n\n                self.non_citation_count = 0\n\n            # Leftover text could be part of next citation\n            self.curr_segment = self.curr_segment[match_idx:]\n            self.non_citation_count = len(self.curr_segment)\n\n        # Hold onto the current segment if potential citations found, otherwise stream it\n        if not possible_citation_found:\n            result += self.curr_segment\n            self.non_citation_count += len(self.curr_segment)\n            self.curr_segment = \"\"\n\n        if result:\n            yield result\n\n    def _process_citation(\n        self, match: re.Match, has_leading_space: bool\n    ) -> tuple[str, list[CitationInfo]]:\n        \"\"\"\n        Process a single citation match and return formatted citation text and citation info objects.\n\n        This is an internal method called by process_token(). The match string can be\n        in various formats: '[1]', '[1, 13, 6]', '[[4]]', '【1】', '［1］', etc.\n\n        This method always:\n        1. Extracts citation numbers from the match\n        2. Looks up the corresponding SearchDoc from the mapping\n        3. Tracks seen citations in self.seen_citations (regardless of citation_mode)\n\n        When citation_mode is HYPERLINK:\n        4. Creates formatted citation text as [[n]](url)\n        5. Creates CitationInfo objects for new citations\n        6. Handles deduplication of recently cited documents\n\n        When citation_mode is REMOVE or KEEP_MARKERS:\n        4. Returns empty string and empty list (caller handles output based on mode)\n\n        Args:\n            match: Regex match object containing the citation pattern\n            has_leading_space: Whether the text immediately before this citation\n                ends with whitespace. Used to determine if a leading space should\n                be added to the formatted output.\n\n        Returns:\n            Tuple of (formatted_citation_text, citation_info_list):\n            - formatted_citation_text: Markdown-formatted citation text like\n              \"[[1]](https://example.com)\" or empty string if not in HYPERLINK mode\n            - citation_info_list: List of CitationInfo objects for newly cited\n              documents, or empty list if not in HYPERLINK mode\n        \"\"\"\n        citation_str: str = match.group()  # e.g., '[1]', '[1, 2, 3]', '[[1]]', '【1】'\n        formatted = (\n            match.lastindex == 1\n        )  # True means already in form '[[1]]' or '【【1】】'\n\n        citation_info_list: list[CitationInfo] = []\n        formatted_citation_parts: list[str] = []\n\n        # Extract citation numbers - regex ensures matched brackets, so we can simply slice\n        citation_content = citation_str[2:-2] if formatted else citation_str[1:-1]\n\n        for num_str in citation_content.split(\",\"):\n            num_str = num_str.strip()\n            if not num_str:\n                continue\n\n            try:\n                num = int(num_str)\n            except ValueError:\n                # Invalid citation, skip it\n                logger.warning(f\"Invalid citation number format: {num_str}\")\n                continue\n\n            # Check if we have a mapping for this citation number\n            if num not in self.citation_to_doc:\n                logger.warning(\n                    f\"Citation number {num} not found in mapping. Available: {list(self.citation_to_doc.keys())}\"\n                )\n                continue\n\n            # Get the SearchDoc\n            search_doc = self.citation_to_doc[num]\n            doc_id = search_doc.document_id\n            link = search_doc.link or \"\"\n\n            # Always track seen citations regardless of citation_mode setting\n            self.seen_citations[num] = search_doc\n\n            # Only generate formatted citations and CitationInfo in HYPERLINK mode\n            if self.citation_mode != CitationMode.HYPERLINK:\n                continue\n\n            # Format the citation text as [[n]](link)\n            formatted_citation_parts.append(f\"[[{num}]]({link})\")\n\n            # Skip creating CitationInfo for citations of the same work if cited recently (deduplication)\n            if doc_id in self.recent_cited_documents:\n                continue\n            self.recent_cited_documents.add(doc_id)\n\n            # Track cited documents and create CitationInfo only for new citations\n            if doc_id not in self.cited_document_ids:\n                self.cited_document_ids.add(doc_id)\n                self.cited_documents_in_order.append(search_doc)\n                citation_info_list.append(\n                    CitationInfo(\n                        citation_number=num,\n                        document_id=doc_id,\n                    )\n                )\n\n        # Join all citation parts with spaces\n        formatted_citation_text = \" \".join(formatted_citation_parts)\n\n        # Apply leading space only if the text didn't already have one\n        if formatted_citation_text and not has_leading_space:\n            formatted_citation_text = \" \" + formatted_citation_text\n\n        return formatted_citation_text, citation_info_list\n\n    def get_cited_documents(self) -> list[SearchDoc]:\n        \"\"\"\n        Get the list of cited SearchDoc objects in the order they were first cited.\n\n        Note: This list is only populated when `citation_mode=HYPERLINK`.\n        When using REMOVE or KEEP_MARKERS mode, this will return an empty list.\n        Use get_seen_citations() instead if you need to track citations without\n        emitting CitationInfo objects.\n\n        Returns:\n            List of SearchDoc objects in the order they were first cited.\n            Empty list if citation_mode is not HYPERLINK.\n        \"\"\"\n        return self.cited_documents_in_order\n\n    def get_cited_document_ids(self) -> list[str]:\n        \"\"\"\n        Get the list of cited document IDs in the order they were first cited.\n\n        Note: This list is only populated when `citation_mode=HYPERLINK`.\n        When using REMOVE or KEEP_MARKERS mode, this will return an empty list.\n        Use get_seen_citations() instead if you need to track citations without\n        emitting CitationInfo objects.\n\n        Returns:\n            List of document IDs (strings) in the order they were first cited.\n            Empty list if citation_mode is not HYPERLINK.\n        \"\"\"\n        return [doc.document_id for doc in self.cited_documents_in_order]\n\n    def get_seen_citations(self) -> CitationMapping:\n        \"\"\"\n        Get all seen citations as a mapping from citation number to SearchDoc.\n\n        This returns all citations that have been encountered during processing,\n        regardless of the `citation_mode` setting. Citations are tracked\n        whenever they are parsed, making this useful for cases where you need to\n        know which citations appeared in the text without emitting CitationInfo objects.\n\n        This is particularly useful when using REMOVE or KEEP_MARKERS mode, as\n        get_cited_documents() will be empty in those cases, but get_seen_citations()\n        will still contain all the citations that were found.\n\n        Returns:\n            Dictionary mapping citation numbers (int) to SearchDoc objects.\n            The dictionary is keyed by the citation number as it appeared in\n            the text (e.g., {1: SearchDoc(...), 3: SearchDoc(...)}).\n        \"\"\"\n        return self.seen_citations\n\n    @property\n    def num_cited_documents(self) -> int:\n        \"\"\"\n        Get the number of unique documents that have been cited.\n\n        Note: This count is only updated when `citation_mode=HYPERLINK`.\n        When using REMOVE or KEEP_MARKERS mode, this will always return 0.\n        Use len(get_seen_citations()) instead if you need to count citations\n        without emitting CitationInfo objects.\n\n        Returns:\n            Number of unique documents cited. 0 if citation_mode is not HYPERLINK.\n        \"\"\"\n        return len(self.cited_document_ids)\n\n    def reset_recent_citations(self) -> None:\n        \"\"\"\n        Reset the recent citations tracker.\n\n        The processor tracks \"recently cited\" documents to avoid emitting duplicate\n        CitationInfo objects for the same document when it's cited multiple times\n        in close succession. This method clears that tracker.\n\n        This is primarily useful when `citation_mode=HYPERLINK` to allow\n        previously cited documents to emit CitationInfo objects again. Has no\n        effect when using REMOVE or KEEP_MARKERS mode.\n\n        The recent citation tracker is also automatically cleared when more than\n        5 non-citation characters are processed between citations.\n        \"\"\"\n        self.recent_cited_documents.clear()\n\n    def get_next_citation_number(self) -> int:\n        \"\"\"\n        Get the next available citation number for adding new documents to the mapping.\n\n        This method returns the next citation number that should be used when adding\n        new documents via update_citation_mapping(). Useful when dynamically adding\n        citations during processing (e.g., from tool results like web search).\n\n        If no citations exist yet in the mapping, returns 1.\n        Otherwise, returns max(existing_citation_numbers) + 1.\n\n        Returns:\n            The next available citation number (1-indexed integer).\n\n        Example:\n            # After adding citations 1, 2, 3\n            processor.get_next_citation_number()  # Returns 4\n\n            # With non-sequential citations 1, 5, 10\n            processor.get_next_citation_number()  # Returns 11\n        \"\"\"\n        if not self.citation_to_doc:\n            return 1\n        return max(self.citation_to_doc.keys()) + 1\n"
  },
  {
    "path": "backend/onyx/chat/citation_utils.py",
    "content": "import re\n\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES\nfrom onyx.tools.models import ToolResponse\n\n\ndef update_citation_processor_from_tool_response(\n    tool_response: ToolResponse,\n    citation_processor: DynamicCitationProcessor,\n) -> None:\n    \"\"\"Update citation processor if this was a citeable tool with a SearchDocsResponse.\n\n    Checks if the tool call is citeable and if the response contains a SearchDocsResponse,\n    then creates a mapping from citation numbers to SearchDoc objects and updates the\n    citation processor.\n\n    Args:\n        tool_response: The response from the tool execution (must have tool_call set)\n        citation_processor: The DynamicCitationProcessor to update\n    \"\"\"\n    # Early return if tool_call is not set\n    if tool_response.tool_call is None:\n        return\n\n    # Update citation processor if this was a search tool\n    if tool_response.tool_call.tool_name in CITEABLE_TOOLS_NAMES:\n        # Check if the rich_response is a SearchDocsResponse\n        if isinstance(tool_response.rich_response, SearchDocsResponse):\n            search_response = tool_response.rich_response\n\n            # Create mapping from citation number to SearchDoc\n            citation_to_doc: CitationMapping = {}\n            for (\n                citation_num,\n                doc_id,\n            ) in search_response.citation_mapping.items():\n                # Find the SearchDoc with this doc_id\n                matching_doc = next(\n                    (\n                        doc\n                        for doc in search_response.search_docs\n                        if doc.document_id == doc_id\n                    ),\n                    None,\n                )\n                if matching_doc:\n                    citation_to_doc[citation_num] = matching_doc\n\n            # Update the citation processor\n            citation_processor.update_citation_mapping(citation_to_doc)\n\n\ndef extract_citation_order_from_text(text: str) -> list[int]:\n    \"\"\"Extract citation numbers from text in order of first appearance.\n\n    Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns\n    the citation numbers in the order they first appear in the text.\n\n    Args:\n        text: The text containing citations\n\n    Returns:\n        List of citation numbers in order of first appearance (no duplicates)\n    \"\"\"\n    # Same pattern used in collapse_citations and DynamicCitationProcessor\n    # Group 2 captures the number in double bracket format: [[1]], 【【1】】\n    # Group 4 captures the numbers in single bracket format: [1], [1, 2]\n    citation_pattern = re.compile(\n        r\"([\\[【［]{2}(\\d+)[\\]】］]{2})|([\\[【［]([\\d]+(?: *, *\\d+)*)[\\]】］])\"\n    )\n    seen: set[int] = set()\n    order: list[int] = []\n\n    for match in citation_pattern.finditer(text):\n        # Group 2 is for double bracket single number, group 4 is for single bracket\n        if match.group(2):\n            nums_str = match.group(2)\n        elif match.group(4):\n            nums_str = match.group(4)\n        else:\n            continue\n\n        for num_str in nums_str.split(\",\"):\n            num_str = num_str.strip()\n            if num_str:\n                try:\n                    num = int(num_str)\n                    if num not in seen:\n                        seen.add(num)\n                        order.append(num)\n                except ValueError:\n                    continue\n\n    return order\n\n\ndef collapse_citations(\n    answer_text: str,\n    existing_citation_mapping: CitationMapping,\n    new_citation_mapping: CitationMapping,\n) -> tuple[str, CitationMapping]:\n    \"\"\"Collapse the citations in the text to use the smallest possible numbers.\n\n    This function takes citations in the text (like [25], [30], etc.) and replaces them\n    with the smallest possible numbers. It starts numbering from the next available\n    integer after the existing citation mapping. If a citation refers to a document\n    that already exists in the existing citation mapping (matched by document_id),\n    it uses the existing citation number instead of assigning a new one.\n\n    Args:\n        answer_text: The text containing citations to collapse (e.g., \"See [25] and [30]\")\n        existing_citation_mapping: Citations already processed/displayed. These mappings\n            are preserved unchanged in the output.\n        new_citation_mapping: Citations from the current text that need to be collapsed.\n            The keys are the citation numbers as they appear in answer_text.\n\n    Returns:\n        A tuple of (updated_text, combined_mapping) where:\n        - updated_text: The text with citations replaced with collapsed numbers\n        - combined_mapping: All values from existing_citation_mapping plus the new\n          mappings with their (possibly renumbered) keys\n    \"\"\"\n    # Build a reverse lookup: document_id -> existing citation number\n    doc_id_to_existing_citation: dict[str, int] = {\n        doc.document_id: citation_num\n        for citation_num, doc in existing_citation_mapping.items()\n    }\n\n    # Determine the next available citation number\n    if existing_citation_mapping:\n        next_citation_num = max(existing_citation_mapping.keys()) + 1\n    else:\n        next_citation_num = 1\n\n    # Build the mapping from old citation numbers (in new_citation_mapping) to new numbers\n    old_to_new: dict[int, int] = {}\n    additional_mappings: CitationMapping = {}\n\n    for old_num, search_doc in new_citation_mapping.items():\n        doc_id = search_doc.document_id\n\n        # Check if this document already exists in existing citations\n        if doc_id in doc_id_to_existing_citation:\n            # Use the existing citation number\n            old_to_new[old_num] = doc_id_to_existing_citation[doc_id]\n        else:\n            # Check if we've already assigned a new number to this document\n            # (handles case where same doc appears with different old numbers)\n            existing_new_num = None\n            for mapped_old, mapped_new in old_to_new.items():\n                if (\n                    mapped_old in new_citation_mapping\n                    and new_citation_mapping[mapped_old].document_id == doc_id\n                ):\n                    existing_new_num = mapped_new\n                    break\n\n            if existing_new_num is not None:\n                old_to_new[old_num] = existing_new_num\n            else:\n                # Assign the next available number\n                old_to_new[old_num] = next_citation_num\n                additional_mappings[next_citation_num] = search_doc\n                next_citation_num += 1\n\n    # Pattern to match citations like [25], [1, 2, 3], [[25]], etc.\n    # Also matches unicode bracket variants: 【】, ［］\n    citation_pattern = re.compile(\n        r\"([\\[【［]{2}\\d+[\\]】］]{2})|([\\[【［]\\d+(?:, ?\\d+)*[\\]】］])\"\n    )\n\n    def replace_citation(match: re.Match) -> str:\n        \"\"\"Replace citation numbers in a match with their new collapsed values.\"\"\"\n        citation_str = match.group()\n\n        # Determine bracket style\n        if (\n            citation_str.startswith(\"[[\")\n            or citation_str.startswith(\"【【\")\n            or citation_str.startswith(\"［［\")\n        ):\n            open_bracket = citation_str[:2]\n            close_bracket = citation_str[-2:]\n            content = citation_str[2:-2]\n        else:\n            open_bracket = citation_str[0]\n            close_bracket = citation_str[-1]\n            content = citation_str[1:-1]\n\n        # Parse and replace citation numbers\n        new_nums = []\n        for num_str in content.split(\",\"):\n            num_str = num_str.strip()\n            if not num_str:\n                continue\n            try:\n                num = int(num_str)\n                # Only replace if we have a mapping for this number\n                if num in old_to_new:\n                    new_nums.append(str(old_to_new[num]))\n                else:\n                    # Keep original if not in our mapping\n                    new_nums.append(num_str)\n            except ValueError:\n                new_nums.append(num_str)\n\n        # Reconstruct the citation with original bracket style\n        new_content = \", \".join(new_nums)\n        return f\"{open_bracket}{new_content}{close_bracket}\"\n\n    # Replace all citations in the text\n    updated_text = citation_pattern.sub(replace_citation, answer_text)\n\n    # Build the combined mapping\n    combined_mapping: CitationMapping = dict(existing_citation_mapping)\n    combined_mapping.update(additional_mappings)\n\n    return updated_text, combined_mapping\n"
  },
  {
    "path": "backend/onyx/chat/compression.py",
    "content": "\"\"\"\nChat history compression via summarization.\n\nThis module handles compressing long chat histories by summarizing older messages\nwhile keeping recent messages verbatim.\n\nSummaries are branch-aware: each summary's parent_message_id points to the last\nmessage when compression triggered, making it part of the tree structure.\n\"\"\"\n\nfrom typing import NamedTuple\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.chat_configs import COMPRESSION_TRIGGER_RATIO\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.models import ChatMessage\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.prompts.compression_prompts import PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK\nfrom onyx.prompts.compression_prompts import PROGRESSIVE_USER_REMINDER\nfrom onyx.prompts.compression_prompts import SUMMARIZATION_CUTOFF_MARKER\nfrom onyx.prompts.compression_prompts import SUMMARIZATION_PROMPT\nfrom onyx.prompts.compression_prompts import USER_REMINDER\nfrom onyx.tracing.framework.create import ensure_trace\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n# Ratio of available context to allocate for recent messages after compression\nRECENT_MESSAGES_RATIO = 0.2\n\n\nclass CompressionResult(BaseModel):\n    \"\"\"Result of a compression operation.\"\"\"\n\n    summary_created: bool\n    messages_summarized: int\n    error: str | None = None\n\n\nclass CompressionParams(BaseModel):\n    \"\"\"Parameters for compression operation.\"\"\"\n\n    should_compress: bool\n    tokens_for_recent: int = 0\n\n\nclass SummaryContent(NamedTuple):\n    \"\"\"Messages split for summarization.\"\"\"\n\n    older_messages: list[ChatMessage]\n    recent_messages: list[ChatMessage]\n\n\ndef calculate_total_history_tokens(chat_history: list[ChatMessage]) -> int:\n    \"\"\"\n    Calculate the total token count for the given chat history.\n\n    Args:\n        chat_history: Branch-aware list of messages\n\n    Returns:\n        Total token count for the history\n    \"\"\"\n    return sum(m.token_count or 0 for m in chat_history)\n\n\ndef get_compression_params(\n    max_input_tokens: int,\n    current_history_tokens: int,\n    reserved_tokens: int,\n) -> CompressionParams:\n    \"\"\"\n    Calculate compression parameters based on model's context window.\n\n    Args:\n        max_input_tokens: The maximum input tokens for the LLM\n        current_history_tokens: Current total tokens in chat history\n        reserved_tokens: Tokens reserved for system prompt, tools, files, etc.\n\n    Returns:\n        CompressionParams indicating whether to compress and token budgets\n    \"\"\"\n    available = max_input_tokens - reserved_tokens\n\n    # Check trigger threshold\n    trigger_threshold = int(available * COMPRESSION_TRIGGER_RATIO)\n\n    if current_history_tokens <= trigger_threshold:\n        return CompressionParams(should_compress=False)\n\n    # Calculate token budget for recent messages as a percentage of current history\n    # This ensures we always have messages to summarize when compression triggers\n    tokens_for_recent = int(current_history_tokens * RECENT_MESSAGES_RATIO)\n\n    return CompressionParams(\n        should_compress=True,\n        tokens_for_recent=tokens_for_recent,\n    )\n\n\ndef find_summary_for_branch(\n    db_session: Session,\n    chat_history: list[ChatMessage],\n) -> ChatMessage | None:\n    \"\"\"\n    Find the most recent summary that applies to the current branch.\n\n    A summary applies if its parent_message_id is in the current chat history,\n    meaning it was created on this branch.\n\n    Args:\n        db_session: Database session\n        chat_history: Branch-aware list of messages\n\n    Returns:\n        The applicable summary message, or None if no summary exists for this branch\n    \"\"\"\n    if not chat_history:\n        return None\n\n    history_ids = {m.id for m in chat_history}\n    chat_session_id = chat_history[0].chat_session_id\n\n    # Query all summaries for this session (typically few), then filter in Python.\n    # Order by time_sent descending to get the most recent summary first.\n    summaries = (\n        db_session.query(ChatMessage)\n        .filter(\n            ChatMessage.chat_session_id == chat_session_id,\n            ChatMessage.last_summarized_message_id.isnot(None),\n        )\n        .order_by(ChatMessage.time_sent.desc())\n        .all()\n    )\n    # Optimization to avoid using IN clause for large histories\n    for summary in summaries:\n        if summary.parent_message_id in history_ids:\n            return summary\n\n    return None\n\n\ndef get_messages_to_summarize(\n    chat_history: list[ChatMessage],\n    existing_summary: ChatMessage | None,\n    tokens_for_recent: int,\n) -> SummaryContent:\n    \"\"\"\n    Split messages into those to summarize and those to keep verbatim.\n\n    Args:\n        chat_history: Branch-aware list of messages\n        existing_summary: Existing summary for this branch (if any)\n        tokens_for_recent: Token budget for recent messages to keep\n\n    Returns:\n        SummaryContent with older_messages to summarize and recent_messages to keep\n    \"\"\"\n    # Filter to messages after the existing summary's cutoff using timestamp\n    if existing_summary and existing_summary.last_summarized_message_id:\n        cutoff_id = existing_summary.last_summarized_message_id\n        last_summarized_msg = next(m for m in chat_history if m.id == cutoff_id)\n        messages = [\n            m for m in chat_history if m.time_sent > last_summarized_msg.time_sent\n        ]\n    else:\n        messages = list(chat_history)\n\n    # Filter out empty messages\n    messages = [m for m in messages if m.message]\n\n    if not messages:\n        return SummaryContent(older_messages=[], recent_messages=[])\n\n    # Work backwards from most recent, keeping messages until we exceed budget\n    recent_messages: list[ChatMessage] = []\n    tokens_used = 0\n\n    for msg in reversed(messages):\n        msg_tokens = msg.token_count or 0\n        if tokens_used + msg_tokens > tokens_for_recent and recent_messages:\n            break\n        recent_messages.insert(0, msg)\n        tokens_used += msg_tokens\n\n    # Ensure cutoff is right before a user message by moving any leading\n    # non-user messages from recent_messages to older_messages\n    while recent_messages and recent_messages[0].message_type != MessageType.USER:\n        recent_messages.pop(0)\n\n    # Everything else gets summarized\n    recent_ids = {m.id for m in recent_messages}\n    older_messages = [m for m in messages if m.id not in recent_ids]\n\n    return SummaryContent(\n        older_messages=older_messages, recent_messages=recent_messages\n    )\n\n\ndef _build_llm_messages_for_summarization(\n    messages: list[ChatMessage],\n    tool_id_to_name: dict[int, str],\n) -> list[UserMessage | AssistantMessage]:\n    \"\"\"Convert ChatMessage objects to LLM message format for summarization.\n\n    This is intentionally different from translate_history_to_llm_format in llm_step.py:\n    - Compacts tool calls to \"[Used tools: tool1, tool2]\" to save tokens in summaries\n    - Skips TOOL_CALL_RESPONSE messages entirely (tool usage captured in assistant message)\n    - No image/multimodal handling (summaries are text-only)\n    - No caching or LLMConfig-specific behavior needed\n    \"\"\"\n    result: list[UserMessage | AssistantMessage] = []\n\n    for msg in messages:\n        # Skip empty messages\n        if not msg.message:\n            continue\n\n        # Handle assistant messages with tool calls compactly\n        if msg.message_type == MessageType.ASSISTANT:\n            if msg.tool_calls:\n                tool_names = [\n                    tool_id_to_name.get(tc.tool_id, \"unknown\") for tc in msg.tool_calls\n                ]\n                result.append(\n                    AssistantMessage(content=f\"[Used tools: {', '.join(tool_names)}]\")\n                )\n            else:\n                result.append(AssistantMessage(content=msg.message))\n            continue\n\n        # Skip tool call response messages - tool calls are captured above via assistant messages\n        if msg.message_type == MessageType.TOOL_CALL_RESPONSE:\n            continue\n\n        # Handle user messages\n        if msg.message_type == MessageType.USER:\n            result.append(UserMessage(content=msg.message))\n\n    return result\n\n\ndef generate_summary(\n    older_messages: list[ChatMessage],\n    recent_messages: list[ChatMessage],\n    llm: LLM,\n    tool_id_to_name: dict[int, str],\n    existing_summary: str | None = None,\n) -> str:\n    \"\"\"\n    Generate a summary using cutoff marker approach.\n\n    The cutoff marker tells the LLM to summarize only older messages,\n    while using recent messages as context to inform what's important.\n\n    Messages are sent as separate UserMessage/AssistantMessage objects rather\n    than being concatenated into a single message.\n\n    Args:\n        older_messages: Messages to compress into summary (before cutoff)\n        recent_messages: Messages kept verbatim (after cutoff, for context only)\n        llm: LLM to use for summarization\n        tool_id_to_name: Mapping of tool IDs to display names\n        existing_summary: Previous summary text to incorporate (progressive)\n\n    Returns:\n        Summary text\n    \"\"\"\n    # Build system prompt\n    system_content = SUMMARIZATION_PROMPT\n    if existing_summary:\n        # Progressive summarization: append existing summary to system prompt\n        system_content += PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK.format(\n            previous_summary=existing_summary\n        )\n        final_reminder = PROGRESSIVE_USER_REMINDER\n    else:\n        final_reminder = USER_REMINDER\n\n    # Convert messages to LLM format (using compression-specific conversion)\n    older_llm_messages = _build_llm_messages_for_summarization(\n        older_messages, tool_id_to_name\n    )\n    recent_llm_messages = _build_llm_messages_for_summarization(\n        recent_messages, tool_id_to_name\n    )\n\n    # Build message list with separate messages\n    input_messages: list[ChatCompletionMessage] = [\n        SystemMessage(content=system_content),\n    ]\n\n    # Add older messages (to be summarized)\n    input_messages.extend(older_llm_messages)\n\n    # Add cutoff marker as a user message\n    input_messages.append(UserMessage(content=SUMMARIZATION_CUTOFF_MARKER))\n\n    # Add recent messages (for context only)\n    input_messages.extend(recent_llm_messages)\n\n    # Add final reminder\n    input_messages.append(UserMessage(content=final_reminder))\n\n    with llm_generation_span(\n        llm=llm,\n        flow=\"chat_history_summarization\",\n        input_messages=input_messages,\n    ) as span_generation:\n        response = llm.invoke(input_messages)\n        record_llm_response(span_generation, response)\n\n    content = response.choice.message.content\n    if not (content and content.strip()):\n        raise ValueError(\"LLM returned empty summary\")\n    return content.strip()\n\n\ndef compress_chat_history(\n    db_session: Session,\n    chat_history: list[ChatMessage],\n    llm: LLM,\n    compression_params: CompressionParams,\n    tool_id_to_name: dict[int, str],\n) -> CompressionResult:\n    \"\"\"\n    Main compression function. Creates a summary ChatMessage.\n\n    The summary message's parent_message_id points to the last message in\n    chat_history, making it branch-aware via the tree structure.\n\n    Note: This takes the entire chat history as input, splits it into older\n    messages (to summarize) and recent messages (kept verbatim within the\n    token budget), generates a summary of the older part, and persists the\n    new summary message with its parent set to the last message in history.\n\n    Past summary is taken into context (progressive summarization): we find\n    at most one existing summary for this branch. If present, only messages\n    after that summary's last_summarized_message_id are considered; the\n    existing summary text is passed into the LLM so the new summary\n    incorporates it instead of summarizing from scratch.\n\n    For more details, see the COMPRESSION.md file.\n\n    Args:\n        db_session: Database session\n        chat_history: Branch-aware list of messages\n        llm: LLM to use for summarization\n        compression_params: Parameters from get_compression_params\n        tool_id_to_name: Mapping of tool IDs to display names\n\n    Returns:\n        CompressionResult indicating success/failure\n    \"\"\"\n    if not chat_history:\n        return CompressionResult(summary_created=False, messages_summarized=0)\n\n    chat_session_id = chat_history[0].chat_session_id\n\n    logger.info(\n        f\"Starting compression for session {chat_session_id}, \"\n        f\"history_len={len(chat_history)}, tokens_for_recent={compression_params.tokens_for_recent}\"\n    )\n\n    with ensure_trace(\n        \"chat_history_compression\",\n        group_id=str(chat_session_id),\n        metadata={\n            \"tenant_id\": get_current_tenant_id(),\n            \"chat_session_id\": str(chat_session_id),\n        },\n    ):\n        try:\n            # Find existing summary for this branch\n            existing_summary = find_summary_for_branch(db_session, chat_history)\n\n            # Get messages to summarize\n            summary_content = get_messages_to_summarize(\n                chat_history,\n                existing_summary,\n                tokens_for_recent=compression_params.tokens_for_recent,\n            )\n\n            if not summary_content.older_messages:\n                logger.debug(\"No messages to summarize, skipping compression\")\n                return CompressionResult(summary_created=False, messages_summarized=0)\n\n            # Generate summary (incorporate existing summary if present)\n            existing_summary_text = (\n                existing_summary.message if existing_summary else None\n            )\n            summary_text = generate_summary(\n                older_messages=summary_content.older_messages,\n                recent_messages=summary_content.recent_messages,\n                llm=llm,\n                tool_id_to_name=tool_id_to_name,\n                existing_summary=existing_summary_text,\n            )\n\n            # Calculate token count for the summary\n            tokenizer = get_tokenizer(None, None)\n            summary_token_count = len(tokenizer.encode(summary_text))\n            logger.debug(\n                f\"Generated summary ({summary_token_count} tokens): {summary_text[:200]}...\"\n            )\n\n            # Create new summary as a ChatMessage\n            # Parent is the last message in history - this makes the summary branch-aware\n            summary_message = ChatMessage(\n                chat_session_id=chat_session_id,\n                message_type=MessageType.ASSISTANT,\n                message=summary_text,\n                token_count=summary_token_count,\n                parent_message_id=chat_history[-1].id,\n                last_summarized_message_id=summary_content.older_messages[-1].id,\n            )\n            db_session.add(summary_message)\n            db_session.commit()\n\n            logger.info(\n                f\"Compressed {len(summary_content.older_messages)} messages into summary \"\n                f\"(session_id={chat_session_id}, \"\n                f\"summary_tokens={summary_token_count})\"\n            )\n\n            return CompressionResult(\n                summary_created=True,\n                messages_summarized=len(summary_content.older_messages),\n            )\n\n        except Exception as e:\n            logger.exception(f\"Compression failed for session {chat_session_id}: {e}\")\n            db_session.rollback()\n            return CompressionResult(\n                summary_created=False,\n                messages_summarized=0,\n                error=str(e),\n            )\n"
  },
  {
    "path": "backend/onyx/chat/emitter.py",
    "content": "import threading\nfrom queue import Queue\n\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import Packet\n\n\nclass Emitter:\n    \"\"\"Routes packets from LLM/tool execution to the ``_run_models`` drain loop.\n\n    Tags every packet with ``model_index`` and places it on ``merged_queue``\n    as a ``(model_idx, packet)`` tuple for ordered consumption downstream.\n\n    Args:\n        merged_queue: Shared queue owned by ``_run_models``.\n        model_idx: Index embedded in packet placements (``0`` for N=1 runs).\n        drain_done: Optional event set by ``_run_models`` when the drain loop\n            exits early (e.g. HTTP disconnect). When set, ``emit`` returns\n            immediately so worker threads can exit fast.\n    \"\"\"\n\n    def __init__(\n        self,\n        merged_queue: Queue[tuple[int, Packet | Exception | object]],\n        model_idx: int = 0,\n        drain_done: threading.Event | None = None,\n    ) -> None:\n        self._model_idx = model_idx\n        self._merged_queue = merged_queue\n        self._drain_done = drain_done\n\n    def emit(self, packet: Packet) -> None:\n        if self._drain_done is not None and self._drain_done.is_set():\n            return\n        base = packet.placement or Placement(turn_index=0)\n        tagged = Packet(\n            placement=base.model_copy(update={\"model_index\": self._model_idx}),\n            obj=packet.obj,\n        )\n        self._merged_queue.put((self._model_idx, tagged))\n"
  },
  {
    "path": "backend/onyx/chat/llm_loop.py",
    "content": "import json\nimport time\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import Literal\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.chat_utils import create_tool_call_failure_messages\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_processor import CitationMode\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.chat.citation_utils import update_citation_processor_from_tool_response\nfrom onyx.chat.emitter import Emitter\nfrom onyx.chat.llm_step import extract_tool_calls_from_response_text\nfrom onyx.chat.llm_step import run_llm_step\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ContextFileMetadata\nfrom onyx.chat.models import ExtractedContextFiles\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import LlmStepResult\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.chat.prompt_utils import build_reminder_message\nfrom onyx.chat.prompt_utils import build_system_prompt\nfrom onyx.chat.prompt_utils import (\n    get_default_base_system_prompt,\n)\nfrom onyx.configs.app_configs import INTEGRATION_TESTS_MODE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.memory import add_memory\nfrom onyx.db.memory import update_memory_at_index\nfrom onyx.db.memory import UserMemoryContext\nfrom onyx.db.models import Persona\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.interfaces import ToolChoiceOptions\nfrom onyx.llm.utils import is_true_openai_model\nfrom onyx.prompts.chat_prompts import IMAGE_GEN_REMINDER\nfrom onyx.prompts.chat_prompts import OPEN_URL_REMINDER\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ToolCallDebug\nfrom onyx.server.query_and_chat.streaming_models import TopLevelBranching\nfrom onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES\nfrom onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import CustomToolCallSummary\nfrom onyx.tools.models import MemoryToolResponseSnapshot\nfrom onyx.tools.models import PythonToolRichResponse\nfrom onyx.tools.models import ToolCallInfo\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.images.models import (\n    FinalImageGenerationResponse,\n)\nfrom onyx.tools.tool_implementations.memory.models import MemoryToolResponse\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.tools.tool_runner import run_tool_calls\nfrom onyx.tracing.framework.create import trace\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nclass EmptyLLMResponseError(RuntimeError):\n    \"\"\"Raised when the streamed LLM response completes without a usable answer.\"\"\"\n\n    def __init__(\n        self,\n        *,\n        provider: str,\n        model: str,\n        tool_choice: ToolChoiceOptions,\n        client_error_msg: str,\n        error_code: str = \"EMPTY_LLM_RESPONSE\",\n        is_retryable: bool = True,\n    ) -> None:\n        super().__init__(client_error_msg)\n        self.provider = provider\n        self.model = model\n        self.tool_choice = tool_choice\n        self.client_error_msg = client_error_msg\n        self.error_code = error_code\n        self.is_retryable = is_retryable\n\n\ndef _build_empty_llm_response_error(\n    llm: LLM,\n    llm_step_result: LlmStepResult,\n    tool_choice: ToolChoiceOptions,\n) -> EmptyLLMResponseError:\n    provider = llm.config.model_provider\n    model = llm.config.model_name\n\n    # OpenAI quota exhaustion has reached us as a streamed \"stop\" with zero content.\n    # When the stream is completely empty and there is no reasoning/tool output, surface\n    # the likely account-level cause instead of a generic tool-calling error.\n    if (\n        not llm_step_result.reasoning\n        and provider == LlmProviderNames.OPENAI\n        and is_true_openai_model(provider, model)\n    ):\n        return EmptyLLMResponseError(\n            provider=provider,\n            model=model,\n            tool_choice=tool_choice,\n            client_error_msg=(\n                \"The selected OpenAI model returned an empty streamed response \"\n                \"before producing any tokens. This commonly happens when the API \"\n                \"key or project has no remaining quota or billing is not enabled. \"\n                \"Verify quota and billing for this key and try again.\"\n            ),\n            error_code=\"BUDGET_EXCEEDED\",\n            is_retryable=False,\n        )\n\n    return EmptyLLMResponseError(\n        provider=provider,\n        model=model,\n        tool_choice=tool_choice,\n        client_error_msg=(\n            \"The selected model returned no final answer before the stream \"\n            \"completed. No text or tool calls were received from the upstream \"\n            \"provider.\"\n        ),\n    )\n\n\ndef _looks_like_xml_tool_call_payload(text: str | None) -> bool:\n    \"\"\"Detect XML-style marshaled tool calls emitted as plain text.\"\"\"\n    if not text:\n        return False\n    lowered = text.lower()\n    return (\n        \"<function_calls\" in lowered\n        and \"<invoke\" in lowered\n        and \"<parameter\" in lowered\n    )\n\n\ndef _try_fallback_tool_extraction(\n    llm_step_result: LlmStepResult,\n    tool_choice: ToolChoiceOptions,\n    fallback_extraction_attempted: bool,\n    tool_defs: list[dict],\n    turn_index: int,\n) -> tuple[LlmStepResult, bool]:\n    \"\"\"Attempt to extract tool calls from response text as a fallback.\n\n    This is a last resort fallback for low quality LLMs or those that don't have\n    tool calling from the serving layer. Also triggers if there's reasoning but\n    no answer and no tool calls.\n\n    Args:\n        llm_step_result: The result from the LLM step\n        tool_choice: The tool choice option used for this step\n        fallback_extraction_attempted: Whether fallback extraction was already attempted\n        tool_defs: List of tool definitions\n        turn_index: The current turn index for placement\n\n    Returns:\n        Tuple of (possibly updated LlmStepResult, whether fallback was attempted this call)\n    \"\"\"\n    if fallback_extraction_attempted:\n        return llm_step_result, False\n\n    no_tool_calls = (\n        not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0\n    )\n    reasoning_but_no_answer_or_tools = (\n        llm_step_result.reasoning and not llm_step_result.answer and no_tool_calls\n    )\n    xml_tool_call_text_detected = no_tool_calls and (\n        _looks_like_xml_tool_call_payload(llm_step_result.answer)\n        or _looks_like_xml_tool_call_payload(llm_step_result.raw_answer)\n        or _looks_like_xml_tool_call_payload(llm_step_result.reasoning)\n    )\n    should_try_fallback = (\n        (tool_choice == ToolChoiceOptions.REQUIRED and no_tool_calls)\n        or reasoning_but_no_answer_or_tools\n        or xml_tool_call_text_detected\n    )\n\n    if not should_try_fallback:\n        return llm_step_result, False\n\n    # Try to extract from answer first, then fall back to reasoning\n    extracted_tool_calls: list[ToolCallKickoff] = []\n\n    if llm_step_result.answer:\n        extracted_tool_calls = extract_tool_calls_from_response_text(\n            response_text=llm_step_result.answer,\n            tool_definitions=tool_defs,\n            placement=Placement(turn_index=turn_index),\n        )\n    if (\n        not extracted_tool_calls\n        and llm_step_result.raw_answer\n        and llm_step_result.raw_answer != llm_step_result.answer\n    ):\n        extracted_tool_calls = extract_tool_calls_from_response_text(\n            response_text=llm_step_result.raw_answer,\n            tool_definitions=tool_defs,\n            placement=Placement(turn_index=turn_index),\n        )\n    if not extracted_tool_calls and llm_step_result.reasoning:\n        extracted_tool_calls = extract_tool_calls_from_response_text(\n            response_text=llm_step_result.reasoning,\n            tool_definitions=tool_defs,\n            placement=Placement(turn_index=turn_index),\n        )\n    if extracted_tool_calls:\n        logger.info(\n            f\"Extracted {len(extracted_tool_calls)} tool call(s) from response text as fallback\"\n        )\n        return (\n            LlmStepResult(\n                reasoning=llm_step_result.reasoning,\n                answer=llm_step_result.answer,\n                tool_calls=extracted_tool_calls,\n                raw_answer=llm_step_result.raw_answer,\n            ),\n            True,\n        )\n\n    return llm_step_result, True\n\n\n# Hardcoded oppinionated value, might breaks down to something like:\n# Cycle 1: Calls web_search for something\n# Cycle 2: Calls open_url for some results\n# Cycle 3: Calls web_search for some other aspect of the question\n# Cycle 4: Calls open_url for some results\n# Cycle 5: Maybe call open_url for some additional results or because last set failed\n# Cycle 6: No more tools available, forced to answer\nMAX_LLM_CYCLES = 6\n\n\ndef _build_context_file_citation_mapping(\n    file_metadata: list[ContextFileMetadata],\n    starting_citation_num: int = 1,\n) -> CitationMapping:\n    \"\"\"Build citation mapping for context files.\n\n    Converts context file metadata into SearchDoc objects that can be cited.\n    Citation numbers start from the provided starting number.\n\n    Args:\n        file_metadata: List of context file metadata\n        starting_citation_num: Starting citation number (default: 1)\n\n    Returns:\n        Dictionary mapping citation numbers to SearchDoc objects\n    \"\"\"\n    citation_mapping: CitationMapping = {}\n\n    for idx, file_meta in enumerate(file_metadata, start=starting_citation_num):\n        search_doc = SearchDoc(\n            document_id=file_meta.file_id,\n            chunk_ind=0,\n            semantic_identifier=file_meta.filename,\n            link=None,\n            blurb=file_meta.file_content,\n            source_type=DocumentSource.FILE,\n            boost=1,\n            hidden=False,\n            metadata={},\n            score=0.0,\n            match_highlights=[file_meta.file_content],\n        )\n        citation_mapping[idx] = search_doc\n\n    return citation_mapping\n\n\ndef _build_project_message(\n    context_files: ExtractedContextFiles | None,\n    token_counter: Callable[[str], int] | None,\n) -> list[ChatMessageSimple]:\n    \"\"\"Build messages for context-injected / tool-backed files.\n\n    Returns up to two messages:\n    1. The full-text files message (if file_texts is populated).\n    2. A lightweight metadata message for files the LLM should access via the\n       FileReaderTool (e.g. oversized files that don't fit in context).\n    \"\"\"\n    if not context_files:\n        return []\n\n    messages: list[ChatMessageSimple] = []\n    if context_files.file_texts:\n        messages.append(\n            _create_context_files_message(context_files, token_counter=None)\n        )\n    if context_files.file_metadata_for_tool and token_counter:\n        messages.append(\n            _create_file_tool_metadata_message(\n                context_files.file_metadata_for_tool, token_counter\n            )\n        )\n    return messages\n\n\ndef construct_message_history(\n    system_prompt: ChatMessageSimple | None,\n    custom_agent_prompt: ChatMessageSimple | None,\n    simple_chat_history: list[ChatMessageSimple],\n    reminder_message: ChatMessageSimple | None,\n    context_files: ExtractedContextFiles | None,\n    available_tokens: int,\n    last_n_user_messages: int | None = None,\n    token_counter: Callable[[str], int] | None = None,\n    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,\n) -> list[ChatMessageSimple]:\n    if last_n_user_messages is not None:\n        if last_n_user_messages <= 0:\n            raise ValueError(\n                \"filtering chat history by last N user messages must be a value greater than 0\"\n            )\n\n    # Build the project / file-metadata messages up front so we can use their\n    # actual token counts for the budget.\n    project_messages = _build_project_message(context_files, token_counter)\n    project_messages_tokens = sum(m.token_count for m in project_messages)\n\n    history_token_budget = available_tokens\n    history_token_budget -= system_prompt.token_count if system_prompt else 0\n    history_token_budget -= (\n        custom_agent_prompt.token_count if custom_agent_prompt else 0\n    )\n    history_token_budget -= project_messages_tokens\n    history_token_budget -= reminder_message.token_count if reminder_message else 0\n\n    if history_token_budget < 0:\n        raise ValueError(\"Not enough tokens available to construct message history\")\n\n    if system_prompt:\n        system_prompt.should_cache = True\n\n    # If no history, build minimal context\n    if not simple_chat_history:\n        result = [system_prompt] if system_prompt else []\n        if custom_agent_prompt:\n            result.append(custom_agent_prompt)\n        result.extend(project_messages)\n        if reminder_message:\n            result.append(reminder_message)\n        return result\n\n    # If last_n_user_messages is set, filter history to only include the last n user messages\n    if last_n_user_messages is not None:\n        # Find all user message indices\n        user_msg_indices = [\n            i\n            for i, msg in enumerate(simple_chat_history)\n            if msg.message_type == MessageType.USER\n        ]\n\n        if not user_msg_indices:\n            raise ValueError(\"No user message found in simple_chat_history\")\n\n        # If we have more than n user messages, keep only the last n\n        if len(user_msg_indices) > last_n_user_messages:\n            # Find the index of the n-th user message from the end\n            # For example, if last_n_user_messages=2, we want the 2nd-to-last user message\n            nth_user_msg_index = user_msg_indices[-(last_n_user_messages)]\n            # Keep everything from that user message onwards\n            simple_chat_history = simple_chat_history[nth_user_msg_index:]\n\n    # Find the last USER message in the history\n    # The history may contain tool calls and responses after the last user message\n    last_user_msg_index = None\n    for i in range(len(simple_chat_history) - 1, -1, -1):\n        if simple_chat_history[i].message_type == MessageType.USER:\n            last_user_msg_index = i\n            break\n\n    if last_user_msg_index is None:\n        raise ValueError(\"No user message found in simple_chat_history\")\n\n    # Split history into three parts:\n    # 1. History before the last user message\n    # 2. The last user message\n    # 3. Messages after the last user message (tool calls, responses, etc.)\n    history_before_last_user = simple_chat_history[:last_user_msg_index]\n    last_user_message = simple_chat_history[last_user_msg_index]\n    messages_after_last_user = simple_chat_history[last_user_msg_index + 1 :]\n\n    # Calculate tokens needed for the last user message and everything after it\n    last_user_tokens = last_user_message.token_count\n    after_user_tokens = sum(msg.token_count for msg in messages_after_last_user)\n\n    # Check if we can fit at least the last user message and messages after it\n    required_tokens = last_user_tokens + after_user_tokens\n    if required_tokens > history_token_budget:\n        raise ValueError(\n            f\"Not enough tokens to include the last user message and subsequent messages. \"\n            f\"Required: {required_tokens}, Available: {history_token_budget}\"\n        )\n\n    # Calculate remaining budget for history before the last user message\n    remaining_budget = history_token_budget - required_tokens\n\n    # Truncate history_before_last_user from the top to fit in remaining budget.\n    # Track dropped file messages so we can provide their metadata to the\n    # FileReaderTool instead.\n    truncated_history_before: list[ChatMessageSimple] = []\n    dropped_file_ids: list[str] = []\n    current_token_count = 0\n\n    for msg in reversed(history_before_last_user):\n        if current_token_count + msg.token_count <= remaining_budget:\n            msg.should_cache = True\n            truncated_history_before.insert(0, msg)\n            current_token_count += msg.token_count\n        else:\n            # Can't fit this message, stop truncating.\n            # This message and everything older is dropped.\n            break\n\n    # Collect file_ids from ALL dropped messages (those not in\n    # truncated_history_before). The truncation loop above keeps the most\n    # recent messages, so the dropped ones are at the start of the original\n    # list up to (len(history) - len(kept)).\n    num_kept = len(truncated_history_before)\n    for msg in history_before_last_user[: len(history_before_last_user) - num_kept]:\n        if msg.file_id is not None:\n            dropped_file_ids.append(msg.file_id)\n\n    # Also treat \"orphaned\" metadata entries as dropped -- these are files\n    # from messages removed by summary truncation (before convert_chat_history\n    # ran), so no ChatMessageSimple was ever tagged with their file_id.\n    if all_injected_file_metadata:\n        surviving_file_ids = {\n            msg.file_id for msg in simple_chat_history if msg.file_id is not None\n        }\n        for fid in all_injected_file_metadata:\n            if fid not in surviving_file_ids and fid not in dropped_file_ids:\n                dropped_file_ids.append(fid)\n\n    # Build a forgotten-files metadata message if any file messages were\n    # dropped AND we have metadata for them (meaning the FileReaderTool is\n    # available). Reserve tokens for this message in the budget.\n    forgotten_files_message: ChatMessageSimple | None = None\n    if dropped_file_ids and all_injected_file_metadata and token_counter:\n        forgotten_meta = [\n            all_injected_file_metadata[fid]\n            for fid in dropped_file_ids\n            if fid in all_injected_file_metadata\n        ]\n        if forgotten_meta:\n            logger.debug(\n                f\"FileReader: building forgotten-files message for {[(m.file_id, m.filename) for m in forgotten_meta]}\"\n            )\n            forgotten_files_message = _create_file_tool_metadata_message(\n                forgotten_meta, token_counter\n            )\n            # Shrink the remaining budget. If the metadata message doesn't\n            # fit we may need to drop more history messages.\n            remaining_budget -= forgotten_files_message.token_count\n            while truncated_history_before and current_token_count > remaining_budget:\n                evicted = truncated_history_before.pop(0)\n                current_token_count -= evicted.token_count\n                # If the evicted message is itself a file, add it to the\n                # forgotten metadata (it's now dropped too).\n                if (\n                    evicted.file_id is not None\n                    and evicted.file_id in all_injected_file_metadata\n                    and evicted.file_id not in {m.file_id for m in forgotten_meta}\n                ):\n                    forgotten_meta.append(all_injected_file_metadata[evicted.file_id])\n                    # Rebuild the message with the new entry\n                    forgotten_files_message = _create_file_tool_metadata_message(\n                        forgotten_meta, token_counter\n                    )\n\n    # Attach project images to the last user message\n    if context_files and context_files.image_files:\n        existing_images = last_user_message.image_files or []\n        last_user_message = ChatMessageSimple(\n            message=last_user_message.message,\n            token_count=last_user_message.token_count,\n            message_type=last_user_message.message_type,\n            image_files=existing_images + context_files.image_files,\n        )\n\n    # Build the final message list according to README ordering:\n    # [system], [history_before_last_user], [custom_agent], [context_files],\n    # [forgotten_files], [last_user_message], [messages_after_last_user], [reminder]\n    result = [system_prompt] if system_prompt else []\n\n    # 1. Add truncated history before last user message\n    result.extend(truncated_history_before)\n\n    # 2. Add custom agent prompt (inserted before last user message)\n    if custom_agent_prompt:\n        result.append(custom_agent_prompt)\n\n    # 3. Add context files / file-metadata messages (inserted before last user message)\n    result.extend(project_messages)\n\n    # 4. Add forgotten-files metadata (right before the user's question)\n    if forgotten_files_message:\n        result.append(forgotten_files_message)\n\n    # 5. Add last user message (with context images attached)\n    result.append(last_user_message)\n\n    # 6. Add messages after last user message (tool calls, responses, etc.)\n    result.extend(messages_after_last_user)\n\n    # 7. Add reminder message at the very end\n    if reminder_message:\n        result.append(reminder_message)\n\n    return _drop_orphaned_tool_call_responses(result)\n\n\ndef _drop_orphaned_tool_call_responses(\n    messages: list[ChatMessageSimple],\n) -> list[ChatMessageSimple]:\n    \"\"\"Drop tool response messages whose tool_call_id is not in prior assistant tool calls.\n\n    This can happen when history truncation drops an ASSISTANT tool-call message but\n    leaves a later TOOL_CALL_RESPONSE message in context. Some providers (e.g. Ollama)\n    reject such history with an \"unexpected tool call id\" error.\n    \"\"\"\n    known_tool_call_ids: set[str] = set()\n    sanitized: list[ChatMessageSimple] = []\n\n    for msg in messages:\n        if msg.message_type == MessageType.ASSISTANT and msg.tool_calls:\n            for tool_call in msg.tool_calls:\n                known_tool_call_ids.add(tool_call.tool_call_id)\n            sanitized.append(msg)\n            continue\n\n        if msg.message_type == MessageType.TOOL_CALL_RESPONSE:\n            if msg.tool_call_id and msg.tool_call_id in known_tool_call_ids:\n                sanitized.append(msg)\n            else:\n                logger.debug(\n                    \"Dropping orphaned tool response with tool_call_id=%s while constructing message history\",\n                    msg.tool_call_id,\n                )\n            continue\n\n        sanitized.append(msg)\n\n    return sanitized\n\n\ndef _create_file_tool_metadata_message(\n    file_metadata: list[FileToolMetadata],\n    token_counter: Callable[[str], int],\n) -> ChatMessageSimple:\n    \"\"\"Build a lightweight metadata-only message listing files available via FileReaderTool.\n\n    Used when files are too large to fit in context and the vector DB is\n    disabled, so the LLM must use ``read_file`` to inspect them.\n    \"\"\"\n    lines = [\n        \"You have access to the following files. Use the read_file tool to \"\n        \"read sections of any file. You MUST pass the file_id UUID (not the \"\n        \"filename) to read_file:\"\n    ]\n    for meta in file_metadata:\n        lines.append(\n            f'- file_id=\"{meta.file_id}\" filename=\"{meta.filename}\" (~{meta.approx_char_count:,} chars)'\n        )\n\n    message_content = \"\\n\".join(lines)\n    return ChatMessageSimple(\n        message=message_content,\n        token_count=token_counter(message_content),\n        message_type=MessageType.USER,\n    )\n\n\ndef _create_context_files_message(\n    context_files: ExtractedContextFiles,\n    token_counter: Callable[[str], int] | None,  # noqa: ARG001\n) -> ChatMessageSimple:\n    \"\"\"Convert context files to a ChatMessageSimple message.\n\n    Format follows the README specification for document representation.\n    \"\"\"\n    import json\n\n    # Format as documents JSON as described in README\n    documents_list = []\n    for idx, file_text in enumerate(context_files.file_texts, start=1):\n        title = (\n            context_files.file_metadata[idx - 1].filename\n            if idx - 1 < len(context_files.file_metadata)\n            else None\n        )\n        entry: dict[str, Any] = {\"document\": idx}\n        if title:\n            entry[\"title\"] = title\n        entry[\"contents\"] = file_text\n        documents_list.append(entry)\n\n    documents_json = json.dumps({\"documents\": documents_list}, indent=2)\n    message_content = f\"Here are some documents provided for context, they may not all be relevant:\\n{documents_json}\"\n\n    # Use pre-calculated token count from context_files\n    return ChatMessageSimple(\n        message=message_content,\n        token_count=context_files.total_token_count,\n        message_type=MessageType.USER,\n    )\n\n\ndef run_llm_loop(\n    emitter: Emitter,\n    state_container: ChatStateContainer,\n    simple_chat_history: list[ChatMessageSimple],\n    tools: list[Tool],\n    custom_agent_prompt: str | None,\n    context_files: ExtractedContextFiles,\n    persona: Persona | None,\n    user_memory_context: UserMemoryContext | None,\n    llm: LLM,\n    token_counter: Callable[[str], int],\n    db_session: Session,\n    forced_tool_id: int | None = None,\n    user_identity: LLMUserIdentity | None = None,\n    chat_session_id: str | None = None,\n    chat_files: list[ChatFile] | None = None,\n    include_citations: bool = True,\n    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,\n    inject_memories_in_prompt: bool = True,\n) -> None:\n    with trace(\n        \"run_llm_loop\",\n        group_id=chat_session_id,\n        metadata={\n            \"tenant_id\": get_current_tenant_id(),\n            \"chat_session_id\": chat_session_id,\n        },\n    ):\n        # Fix some LiteLLM issues,\n        from onyx.llm.litellm_singleton.config import (\n            initialize_litellm,\n        )  # Here for lazy load LiteLLM\n\n        initialize_litellm()\n\n        # Track when the loop starts for calculating time-to-answer\n        loop_start_time = time.monotonic()\n\n        # Initialize citation processor for handling citations dynamically\n        # When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)\n        # When include_citations is False, use REMOVE mode to strip citations from output\n        citation_processor = DynamicCitationProcessor(\n            citation_mode=(\n                CitationMode.HYPERLINK if include_citations else CitationMode.REMOVE\n            )\n        )\n\n        # Add project file citation mappings if project files are present\n        project_citation_mapping: CitationMapping = {}\n        if context_files.file_metadata:\n            project_citation_mapping = _build_context_file_citation_mapping(\n                context_files.file_metadata\n            )\n            citation_processor.update_citation_mapping(project_citation_mapping)\n\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer=None,\n            tool_calls=None,\n            raw_answer=None,\n        )\n\n        # Pass the total budget to construct_message_history, which will handle token allocation\n        available_tokens = llm.config.max_input_tokens\n        tool_choice: ToolChoiceOptions = ToolChoiceOptions.AUTO\n        # Initialize gathered_documents with project files if present\n        gathered_documents: list[SearchDoc] | None = (\n            list(project_citation_mapping.values())\n            if project_citation_mapping\n            else None\n        )\n        # TODO allow citing of images in Projects. Since attached to the last user message, it has no text associated with it.\n        # One future workaround is to include the images as separate user messages with citation information and process those.\n        always_cite_documents: bool = bool(\n            context_files.use_as_search_filter or context_files.file_texts\n        )\n        should_cite_documents: bool = False\n        ran_image_gen: bool = False\n        just_ran_web_search: bool = False\n        has_called_search_tool: bool = False\n        code_interpreter_file_generated: bool = False\n        fallback_extraction_attempted: bool = False\n        citation_mapping: dict[int, str] = {}  # Maps citation_num -> document_id/URL\n\n        # Fetch this in a short-lived session so the long-running stream loop does\n        # not pin a connection just to keep read state alive.\n        with get_session_with_current_tenant() as prompt_db_session:\n            default_base_system_prompt: str = get_default_base_system_prompt(\n                prompt_db_session\n            )\n        system_prompt = None\n        custom_agent_prompt_msg = None\n\n        reasoning_cycles = 0\n        for llm_cycle_count in range(MAX_LLM_CYCLES):\n            # Handling tool calls based on cycle count and past cycle conditions\n            out_of_cycles = llm_cycle_count == MAX_LLM_CYCLES - 1\n            if forced_tool_id:\n                # Needs to be just the single one because the \"required\" currently doesn't have a specified tool, just a binary\n                final_tools = [tool for tool in tools if tool.id == forced_tool_id]\n                if not final_tools:\n                    raise ValueError(f\"Tool {forced_tool_id} not found in tools\")\n                tool_choice = ToolChoiceOptions.REQUIRED\n                forced_tool_id = None\n            elif out_of_cycles or ran_image_gen:\n                # Last cycle, no tools allowed, just answer!\n                tool_choice = ToolChoiceOptions.NONE\n                final_tools = []\n            else:\n                tool_choice = ToolChoiceOptions.AUTO\n                final_tools = tools\n\n            # Handling the system prompt and custom agent prompt\n            # The section below calculates the available tokens for history a bit more accurately\n            # now that project files are loaded in.\n            if persona and persona.replace_base_system_prompt:\n                # Handles the case where user has checked off the \"Replace base system prompt\" checkbox\n                system_prompt = (\n                    ChatMessageSimple(\n                        message=persona.system_prompt,\n                        token_count=token_counter(persona.system_prompt),\n                        message_type=MessageType.SYSTEM,\n                    )\n                    if persona.system_prompt\n                    else None\n                )\n                custom_agent_prompt_msg = None\n            else:\n                # If it's an empty string, we assume the user does not want to include it as an empty System message\n                if default_base_system_prompt:\n                    prompt_memory_context = (\n                        user_memory_context\n                        if inject_memories_in_prompt\n                        else (\n                            user_memory_context.without_memories()\n                            if user_memory_context\n                            else None\n                        )\n                    )\n                    system_prompt_str = build_system_prompt(\n                        base_system_prompt=default_base_system_prompt,\n                        datetime_aware=persona.datetime_aware if persona else True,\n                        user_memory_context=prompt_memory_context,\n                        tools=tools,\n                        should_cite_documents=should_cite_documents\n                        or always_cite_documents,\n                    )\n                    system_prompt = ChatMessageSimple(\n                        message=system_prompt_str,\n                        token_count=token_counter(system_prompt_str),\n                        message_type=MessageType.SYSTEM,\n                    )\n                    custom_agent_prompt_msg = (\n                        ChatMessageSimple(\n                            message=custom_agent_prompt,\n                            token_count=token_counter(custom_agent_prompt),\n                            message_type=MessageType.USER,\n                        )\n                        if custom_agent_prompt\n                        else None\n                    )\n                else:\n                    # If there is a custom agent prompt, it replaces the system prompt when the default system prompt is empty\n                    system_prompt = (\n                        ChatMessageSimple(\n                            message=custom_agent_prompt,\n                            token_count=token_counter(custom_agent_prompt),\n                            message_type=MessageType.SYSTEM,\n                        )\n                        if custom_agent_prompt\n                        else None\n                    )\n                    custom_agent_prompt_msg = None\n\n            reminder_message_text: str | None\n            if ran_image_gen:\n                # Some models are trained to give back images to the user for some similar tool\n                # This is to prevent it generating things like:\n                # [Cute Cat](attachment://a_cute_cat_sitting_playfully.png)\n                reminder_message_text = IMAGE_GEN_REMINDER\n            elif just_ran_web_search and not out_of_cycles:\n                reminder_message_text = OPEN_URL_REMINDER\n            else:\n                # This is the default case, the LLM at this point may answer so it is important\n                # to include the reminder. Potentially this should also mention citation\n                reminder_message_text = build_reminder_message(\n                    reminder_text=(\n                        persona.task_prompt if persona and persona.task_prompt else None\n                    ),\n                    include_citation_reminder=should_cite_documents\n                    or always_cite_documents,\n                    include_file_reminder=code_interpreter_file_generated,\n                    is_last_cycle=out_of_cycles,\n                )\n\n            reminder_msg = (\n                ChatMessageSimple(\n                    message=reminder_message_text,\n                    token_count=token_counter(reminder_message_text),\n                    message_type=MessageType.USER_REMINDER,\n                )\n                if reminder_message_text\n                else None\n            )\n\n            truncated_message_history = construct_message_history(\n                system_prompt=system_prompt,\n                custom_agent_prompt=custom_agent_prompt_msg,\n                simple_chat_history=simple_chat_history,\n                reminder_message=reminder_msg,\n                context_files=context_files,\n                available_tokens=available_tokens,\n                token_counter=token_counter,\n                all_injected_file_metadata=all_injected_file_metadata,\n            )\n\n            # This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result\n            # It also pre-processes the tool calls in preparation for running them\n            tool_defs = [tool.tool_definition() for tool in final_tools]\n\n            # Calculate total processing time from loop start until now\n            # This measures how long the user waits before the answer starts streaming\n            pre_answer_processing_time = time.monotonic() - loop_start_time\n\n            llm_step_result, has_reasoned = run_llm_step(\n                emitter=emitter,\n                history=truncated_message_history,\n                tool_definitions=tool_defs,\n                tool_choice=tool_choice,\n                llm=llm,\n                placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),\n                citation_processor=citation_processor,\n                state_container=state_container,\n                # The rich docs representation is passed in so that when yielding the answer, it can also\n                # immediately yield the full set of found documents. This gives us the option to show the\n                # final set of documents immediately if desired.\n                final_documents=gathered_documents,\n                user_identity=user_identity,\n                pre_answer_processing_time=pre_answer_processing_time,\n            )\n            if has_reasoned:\n                reasoning_cycles += 1\n\n            # Fallback extraction for LLMs that don't support tool calling natively or are lower quality\n            # and might incorrectly output tool calls in other channels\n            llm_step_result, attempted = _try_fallback_tool_extraction(\n                llm_step_result=llm_step_result,\n                tool_choice=tool_choice,\n                fallback_extraction_attempted=fallback_extraction_attempted,\n                tool_defs=tool_defs,\n                turn_index=llm_cycle_count + reasoning_cycles,\n            )\n            if attempted:\n                # To prevent the case of excessive looping with bad models, we only allow one fallback attempt\n                fallback_extraction_attempted = True\n\n            # Save citation mapping after each LLM step for incremental state updates\n            state_container.set_citation_mapping(citation_processor.citation_to_doc)\n\n            # Run the LLM selected tools, there is some more logic here than a simple execution\n            # each tool might have custom logic here\n            tool_responses: list[ToolResponse] = []\n            tool_calls = llm_step_result.tool_calls or []\n\n            if INTEGRATION_TESTS_MODE and tool_calls:\n                for tool_call in tool_calls:\n                    emitter.emit(\n                        Packet(\n                            placement=tool_call.placement,\n                            obj=ToolCallDebug(\n                                tool_call_id=tool_call.tool_call_id,\n                                tool_name=tool_call.tool_name,\n                                tool_args=tool_call.tool_args,\n                            ),\n                        )\n                    )\n\n            if len(tool_calls) > 1:\n                emitter.emit(\n                    Packet(\n                        placement=Placement(\n                            turn_index=tool_calls[0].placement.turn_index\n                        ),\n                        obj=TopLevelBranching(num_parallel_branches=len(tool_calls)),\n                    )\n                )\n\n            # Quick note for why citation_mapping and citation_processors are both needed:\n            # 1. Tools return lightweight string mappings, not SearchDoc objects\n            # 2. The SearchDoc resolution is deliberately deferred to llm_loop.py\n            # 3. The citation_processor operates on SearchDoc objects and can't provide a complete reverse URL lookup for\n            # in-flight citations\n            # It can be cleaned up but not super trivial or worthwhile right now\n            just_ran_web_search = False\n            parallel_tool_call_results = run_tool_calls(\n                tool_calls=tool_calls,\n                tools=final_tools,\n                message_history=truncated_message_history,\n                user_memory_context=user_memory_context,\n                user_info=None,  # TODO, this is part of memories right now, might want to separate it out\n                citation_mapping=citation_mapping,\n                next_citation_num=citation_processor.get_next_citation_number(),\n                max_concurrent_tools=None,\n                skip_search_query_expansion=has_called_search_tool,\n                chat_files=chat_files,\n                url_snippet_map=extract_url_snippet_map(gathered_documents or []),\n                inject_memories_in_prompt=inject_memories_in_prompt,\n            )\n            tool_responses = parallel_tool_call_results.tool_responses\n            citation_mapping = parallel_tool_call_results.updated_citation_mapping\n\n            # Failure case, give something reasonable to the LLM to try again\n            if tool_calls and not tool_responses:\n                failure_messages = create_tool_call_failure_messages(\n                    tool_calls, token_counter\n                )\n                simple_chat_history.extend(failure_messages)\n                continue\n\n            for tool_response in tool_responses:\n                # Extract tool_call from the response (set by run_tool_calls)\n                if tool_response.tool_call is None:\n                    raise ValueError(\"Tool response missing tool_call reference\")\n\n                tool_call = tool_response.tool_call\n                tab_index = tool_call.placement.tab_index\n\n                # Track if search tool was called (for skipping query expansion on subsequent calls)\n                if tool_call.tool_name == SearchTool.NAME:\n                    has_called_search_tool = True\n\n                # Track if code interpreter generated files with download links\n                if (\n                    tool_call.tool_name == PythonTool.NAME\n                    and not code_interpreter_file_generated\n                ):\n                    try:\n                        parsed = json.loads(tool_response.llm_facing_response)\n                        if parsed.get(\"generated_files\"):\n                            code_interpreter_file_generated = True\n                    except (json.JSONDecodeError, AttributeError):\n                        pass\n\n                # Build a mapping of tool names to tool objects for getting tool_id\n                tools_by_name = {tool.name: tool for tool in final_tools}\n\n                # Add the results to the chat history. Even though tools may run in parallel,\n                # LLM APIs require linear history, so results are added sequentially.\n                # Get the tool object to retrieve tool_id\n                tool = tools_by_name.get(tool_call.tool_name)\n                if not tool:\n                    raise ValueError(\n                        f\"Tool '{tool_call.tool_name}' not found in tools list\"\n                    )\n\n                # Extract search_docs if this is a search tool response\n                search_docs = None\n                displayed_docs = None\n                if isinstance(tool_response.rich_response, SearchDocsResponse):\n                    search_docs = tool_response.rich_response.search_docs\n                    displayed_docs = tool_response.rich_response.displayed_docs\n\n                    # Add ALL search docs to state container for DB persistence\n                    if search_docs:\n                        state_container.add_search_docs(search_docs)\n\n                    if gathered_documents:\n                        gathered_documents.extend(search_docs)\n                    else:\n                        gathered_documents = search_docs\n\n                    # This is used for the Open URL reminder in the next cycle\n                    # only do this if the web search tool yielded results\n                    if search_docs and tool_call.tool_name == WebSearchTool.NAME:\n                        just_ran_web_search = True\n\n                # Extract generated_images if this is an image generation tool response\n                generated_images = None\n                if isinstance(\n                    tool_response.rich_response, FinalImageGenerationResponse\n                ):\n                    generated_images = tool_response.rich_response.generated_images\n\n                # Extract generated_files if this is a code interpreter response\n                generated_files = None\n                if isinstance(tool_response.rich_response, PythonToolRichResponse):\n                    generated_files = (\n                        tool_response.rich_response.generated_files or None\n                    )\n\n                # Persist memory if this is a memory tool response\n                memory_snapshot: MemoryToolResponseSnapshot | None = None\n                if isinstance(tool_response.rich_response, MemoryToolResponse):\n                    persisted_memory_id: int | None = None\n                    if user_memory_context and user_memory_context.user_id:\n                        if tool_response.rich_response.index_to_replace is not None:\n                            memory = update_memory_at_index(\n                                user_id=user_memory_context.user_id,\n                                index=tool_response.rich_response.index_to_replace,\n                                new_text=tool_response.rich_response.memory_text,\n                                db_session=db_session,\n                            )\n                            persisted_memory_id = memory.id if memory else None\n                        else:\n                            memory = add_memory(\n                                user_id=user_memory_context.user_id,\n                                memory_text=tool_response.rich_response.memory_text,\n                                db_session=db_session,\n                            )\n                            persisted_memory_id = memory.id\n                    operation: Literal[\"add\", \"update\"] = (\n                        \"update\"\n                        if tool_response.rich_response.index_to_replace is not None\n                        else \"add\"\n                    )\n                    memory_snapshot = MemoryToolResponseSnapshot(\n                        memory_text=tool_response.rich_response.memory_text,\n                        operation=operation,\n                        memory_id=persisted_memory_id,\n                        index=tool_response.rich_response.index_to_replace,\n                    )\n\n                if memory_snapshot:\n                    saved_response = json.dumps(memory_snapshot.model_dump())\n                elif isinstance(tool_response.rich_response, CustomToolCallSummary):\n                    saved_response = json.dumps(\n                        tool_response.rich_response.model_dump()\n                    )\n                elif isinstance(tool_response.rich_response, str):\n                    saved_response = tool_response.rich_response\n                else:\n                    saved_response = tool_response.llm_facing_response\n\n                tool_call_info = ToolCallInfo(\n                    parent_tool_call_id=None,  # Top-level tool calls are attached to the chat message\n                    turn_index=llm_cycle_count + reasoning_cycles,\n                    tab_index=tab_index,\n                    tool_name=tool_call.tool_name,\n                    tool_call_id=tool_call.tool_call_id,\n                    tool_id=tool.id,\n                    reasoning_tokens=llm_step_result.reasoning,  # All tool calls from this loop share the same reasoning\n                    tool_call_arguments=tool_call.tool_args,\n                    tool_call_response=saved_response,\n                    search_docs=displayed_docs or search_docs,\n                    generated_images=generated_images,\n                    generated_files=generated_files,\n                )\n                # Add to state container for partial save support\n                state_container.add_tool_call(tool_call_info)\n\n                # Update citation processor if this was a search tool\n                update_citation_processor_from_tool_response(\n                    tool_response, citation_processor\n                )\n\n            # After processing all tool responses for this turn, add messages to history\n            # using OpenAI parallel tool calling format:\n            # 1. ONE ASSISTANT message with tool_calls array\n            # 2. N TOOL_CALL_RESPONSE messages (one per tool call)\n            if tool_responses:\n                # Filter to only responses with valid tool_call references\n                valid_tool_responses = [\n                    tr for tr in tool_responses if tr.tool_call is not None\n                ]\n\n                # Build ToolCallSimple list for all tool calls in this turn\n                tool_calls_simple: list[ToolCallSimple] = []\n                for tool_response in valid_tool_responses:\n                    tc = tool_response.tool_call\n                    assert (\n                        tc is not None\n                    )  # Already filtered above, this is just for typing purposes\n\n                    tool_call_message = tc.to_msg_str()\n                    tool_call_token_count = token_counter(tool_call_message)\n\n                    tool_calls_simple.append(\n                        ToolCallSimple(\n                            tool_call_id=tc.tool_call_id,\n                            tool_name=tc.tool_name,\n                            tool_arguments=tc.tool_args,\n                            token_count=tool_call_token_count,\n                        )\n                    )\n\n                # Create ONE ASSISTANT message with all tool calls for this turn\n                total_tool_call_tokens = sum(tc.token_count for tc in tool_calls_simple)\n                assistant_with_tools = ChatMessageSimple(\n                    message=\"\",  # No text content when making tool calls\n                    token_count=total_tool_call_tokens,\n                    message_type=MessageType.ASSISTANT,\n                    tool_calls=tool_calls_simple,\n                    image_files=None,\n                )\n                simple_chat_history.append(assistant_with_tools)\n\n                # Add TOOL_CALL_RESPONSE messages for each tool call\n                for tool_response in valid_tool_responses:\n                    tc = tool_response.tool_call\n                    assert tc is not None  # Already filtered above\n\n                    tool_response_message = tool_response.llm_facing_response\n                    tool_response_token_count = token_counter(tool_response_message)\n\n                    tool_response_msg = ChatMessageSimple(\n                        message=tool_response_message,\n                        token_count=tool_response_token_count,\n                        message_type=MessageType.TOOL_CALL_RESPONSE,\n                        tool_call_id=tc.tool_call_id,\n                        image_files=None,\n                    )\n                    simple_chat_history.append(tool_response_msg)\n\n            # If no tool calls, then it must have answered, wrap up\n            if not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0:\n                break\n\n            # Certain tools do not allow further actions, force the LLM wrap up on the next cycle\n            if any(\n                tool.tool_name in STOPPING_TOOLS_NAMES\n                for tool in llm_step_result.tool_calls\n            ):\n                ran_image_gen = True\n\n            if llm_step_result.tool_calls and any(\n                tool.tool_name in CITEABLE_TOOLS_NAMES\n                for tool in llm_step_result.tool_calls\n            ):\n                # As long as 1 tool with citeable documents is called at any point, we ask the LLM to try to cite\n                should_cite_documents = True\n\n        if not llm_step_result.answer and not llm_step_result.tool_calls:\n            raise _build_empty_llm_response_error(\n                llm=llm,\n                llm_step_result=llm_step_result,\n                tool_choice=tool_choice,\n            )\n\n        if not llm_step_result.answer:\n            raise RuntimeError(\n                \"The LLM did not return a final answer after tool execution. \"\n                \"Typically this indicates invalid tool-call output, a model/provider mismatch, \"\n                \"or serving API misconfiguration.\"\n            )\n\n        emitter.emit(\n            Packet(\n                placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),\n                obj=OverallStop(type=\"stop\"),\n            )\n        )\n"
  },
  {
    "path": "backend/onyx/chat/llm_step.py",
    "content": "import json\nimport re\nimport time\nimport uuid\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Mapping\nfrom collections.abc import Sequence\nfrom html import unescape\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.chat.emitter import Emitter\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import LlmStepResult\nfrom onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta\nfrom onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS\nfrom onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.interfaces import ToolChoiceOptions\nfrom onyx.llm.model_response import Delta\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import FunctionCall\nfrom onyx.llm.models import ImageContentPart\nfrom onyx.llm.models import ImageUrlDetail\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import TextContentPart\nfrom onyx.llm.models import ToolCall\nfrom onyx.llm.models import ToolMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.prompt_cache.processor import process_with_prompt_cache\nfrom onyx.llm.utils import model_needs_formatting_reenabled\nfrom onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN\nfrom onyx.prompts.constants import SYSTEM_REMINDER_TAG_CLOSE\nfrom onyx.prompts.constants import SYSTEM_REMINDER_TAG_OPEN\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDelta\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDone\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tracing.framework.create import generation_span\nfrom onyx.utils.b64 import get_image_type_from_bytes\nfrom onyx.utils.jsonriver import Parser\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.postgres_sanitization import sanitize_string\nfrom onyx.utils.text_processing import find_all_json_objects\n\nlogger = setup_logger()\n\n_XML_INVOKE_BLOCK_RE = re.compile(\n    r\"<invoke\\b(?P<attrs>[^>]*)>(?P<body>.*?)</invoke>\",\n    re.IGNORECASE | re.DOTALL,\n)\n_XML_PARAMETER_RE = re.compile(\n    r\"<parameter\\b(?P<attrs>[^>]*)>(?P<value>.*?)</parameter>\",\n    re.IGNORECASE | re.DOTALL,\n)\n_FUNCTION_CALLS_OPEN_MARKER = \"<function_calls\"\n_FUNCTION_CALLS_CLOSE_MARKER = \"</function_calls>\"\n\n\nclass _XmlToolCallContentFilter:\n    \"\"\"Streaming filter that strips XML-style tool call payload blocks from text.\"\"\"\n\n    def __init__(self) -> None:\n        self._pending = \"\"\n        self._inside_function_calls_block = False\n\n    def process(self, content: str) -> str:\n        if not content:\n            return \"\"\n\n        self._pending += content\n        output_parts: list[str] = []\n\n        while self._pending:\n            pending_lower = self._pending.lower()\n\n            if self._inside_function_calls_block:\n                end_idx = pending_lower.find(_FUNCTION_CALLS_CLOSE_MARKER)\n                if end_idx == -1:\n                    # Keep buffering until we see the close marker.\n                    return \"\".join(output_parts)\n\n                # Drop the whole function_calls block.\n                self._pending = self._pending[\n                    end_idx + len(_FUNCTION_CALLS_CLOSE_MARKER) :\n                ]\n                self._inside_function_calls_block = False\n                continue\n\n            start_idx = _find_function_calls_open_marker(pending_lower)\n            if start_idx == -1:\n                # Keep only a possible prefix of \"<function_calls\" in the buffer so\n                # marker splits across chunks are handled correctly.\n                tail_len = _matching_open_marker_prefix_len(self._pending)\n                emit_upto = len(self._pending) - tail_len\n                if emit_upto > 0:\n                    output_parts.append(self._pending[:emit_upto])\n                    self._pending = self._pending[emit_upto:]\n                return \"\".join(output_parts)\n\n            if start_idx > 0:\n                output_parts.append(self._pending[:start_idx])\n\n            # Enter block-stripping mode and keep scanning for close marker.\n            self._pending = self._pending[start_idx:]\n            self._inside_function_calls_block = True\n\n        return \"\".join(output_parts)\n\n    def flush(self) -> str:\n        if self._inside_function_calls_block:\n            # Drop any incomplete block at stream end.\n            self._pending = \"\"\n            self._inside_function_calls_block = False\n            return \"\"\n\n        remaining = self._pending\n        self._pending = \"\"\n        return remaining\n\n\ndef _matching_open_marker_prefix_len(text: str) -> int:\n    \"\"\"Return longest suffix of text that matches prefix of \"<function_calls\".\"\"\"\n    max_len = min(len(text), len(_FUNCTION_CALLS_OPEN_MARKER) - 1)\n    text_lower = text.lower()\n    marker_lower = _FUNCTION_CALLS_OPEN_MARKER\n\n    for candidate_len in range(max_len, 0, -1):\n        if text_lower.endswith(marker_lower[:candidate_len]):\n            return candidate_len\n\n    return 0\n\n\ndef _is_valid_function_calls_open_follower(char: str | None) -> bool:\n    return char is None or char in {\">\", \" \", \"\\t\", \"\\n\", \"\\r\"}\n\n\ndef _find_function_calls_open_marker(text_lower: str) -> int:\n    \"\"\"Find '<function_calls' with a valid tag boundary follower.\"\"\"\n    search_from = 0\n    while True:\n        idx = text_lower.find(_FUNCTION_CALLS_OPEN_MARKER, search_from)\n        if idx == -1:\n            return -1\n\n        follower_pos = idx + len(_FUNCTION_CALLS_OPEN_MARKER)\n        follower = text_lower[follower_pos] if follower_pos < len(text_lower) else None\n        if _is_valid_function_calls_open_follower(follower):\n            return idx\n\n        search_from = idx + 1\n\n\ndef _try_parse_json_string(value: Any) -> Any:\n    \"\"\"Attempt to parse a JSON string value into its Python equivalent.\n\n    If value is a string that looks like a JSON array or object, parse it.\n    Otherwise return the value unchanged.\n\n    This handles the case where the LLM returns arguments like:\n    - queries: '[\"query1\", \"query2\"]' instead of [\"query1\", \"query2\"]\n    \"\"\"\n    if not isinstance(value, str):\n        return value\n\n    stripped = value.strip()\n    # Only attempt to parse if it looks like a JSON array or object\n    if not (\n        (stripped.startswith(\"[\") and stripped.endswith(\"]\"))\n        or (stripped.startswith(\"{\") and stripped.endswith(\"}\"))\n    ):\n        return value\n\n    try:\n        return json.loads(stripped)\n    except json.JSONDecodeError:\n        return value\n\n\ndef _parse_tool_args_to_dict(raw_args: Any) -> dict[str, Any]:\n    \"\"\"Parse tool arguments into a dict.\n\n    Normal case:\n    - raw_args == '{\"queries\":[...]}' -> dict via json.loads\n\n    Defensive case (JSON string literal of an object):\n    - raw_args == '\"{\\\\\"queries\\\\\":[...]}\"' -> json.loads -> str -> json.loads -> dict\n\n    Also handles the case where argument values are JSON strings that need parsing:\n    - {\"queries\": '[\"q1\", \"q2\"]'} -> {\"queries\": [\"q1\", \"q2\"]}\n\n    Anything else returns {}.\n    \"\"\"\n\n    if raw_args is None:\n        return {}\n\n    if isinstance(raw_args, dict):\n        # Parse any string values that look like JSON arrays/objects\n        return {\n            k: _try_parse_json_string(sanitize_string(v) if isinstance(v, str) else v)\n            for k, v in raw_args.items()\n        }\n\n    if not isinstance(raw_args, str):\n        return {}\n\n    # Sanitize before parsing to remove NULL bytes and surrogates\n    raw_args = sanitize_string(raw_args)\n\n    try:\n        parsed1: Any = json.loads(raw_args)\n    except json.JSONDecodeError:\n        return {}\n\n    if isinstance(parsed1, dict):\n        # Parse any string values that look like JSON arrays/objects\n        return {k: _try_parse_json_string(v) for k, v in parsed1.items()}\n\n    if isinstance(parsed1, str):\n        try:\n            parsed2: Any = json.loads(parsed1)\n        except json.JSONDecodeError:\n            return {}\n        if isinstance(parsed2, dict):\n            # Parse any string values that look like JSON arrays/objects\n            return {k: _try_parse_json_string(v) for k, v in parsed2.items()}\n        return {}\n\n    return {}\n\n\ndef _format_message_history_for_logging(\n    message_history: LanguageModelInput,\n) -> str:\n    \"\"\"Format message history for logging, with special handling for tool calls.\n\n    Tool calls are formatted as JSON with 4-space indentation for readability.\n    \"\"\"\n    formatted_lines = []\n\n    separator = \"================================================\"\n\n    # Handle single ChatCompletionMessage - wrap in list for uniform processing\n    if isinstance(\n        message_history, (SystemMessage, UserMessage, AssistantMessage, ToolMessage)\n    ):\n        message_history = [message_history]\n\n    # Handle sequence of messages\n    for i, msg in enumerate(message_history):\n        if isinstance(msg, SystemMessage):\n            formatted_lines.append(f\"Message {i + 1} [system]:\")\n            formatted_lines.append(separator)\n            formatted_lines.append(f\"{msg.content}\")\n\n        elif isinstance(msg, UserMessage):\n            formatted_lines.append(f\"Message {i + 1} [user]:\")\n            formatted_lines.append(separator)\n            if isinstance(msg.content, str):\n                formatted_lines.append(f\"{msg.content}\")\n            elif isinstance(msg.content, list):\n                # Handle multimodal content (text + images)\n                for part in msg.content:\n                    if isinstance(part, TextContentPart):\n                        formatted_lines.append(f\"{part.text}\")\n                    elif isinstance(part, ImageContentPart):\n                        url = part.image_url.url\n                        formatted_lines.append(f\"[Image: {url[:50]}...]\")\n\n        elif isinstance(msg, AssistantMessage):\n            formatted_lines.append(f\"Message {i + 1} [assistant]:\")\n            formatted_lines.append(separator)\n            if msg.content:\n                formatted_lines.append(f\"{msg.content}\")\n\n            if msg.tool_calls:\n                formatted_lines.append(\"Tool calls:\")\n                for tool_call in msg.tool_calls:\n                    tool_call_dict: dict[str, Any] = {\n                        \"id\": tool_call.id,\n                        \"type\": tool_call.type,\n                        \"function\": {\n                            \"name\": tool_call.function.name,\n                            \"arguments\": tool_call.function.arguments,\n                        },\n                    }\n                    tool_call_json = json.dumps(tool_call_dict, indent=4)\n                    formatted_lines.append(tool_call_json)\n\n        elif isinstance(msg, ToolMessage):\n            formatted_lines.append(f\"Message {i + 1} [tool]:\")\n            formatted_lines.append(separator)\n            formatted_lines.append(f\"Tool call ID: {msg.tool_call_id}\")\n            formatted_lines.append(f\"Response: {msg.content}\")\n\n        else:\n            # Fallback for unknown message types\n            formatted_lines.append(f\"Message {i + 1} [unknown]:\")\n            formatted_lines.append(separator)\n            formatted_lines.append(f\"{msg}\")\n\n        # Add separator before next message (or at end)\n        if i < len(message_history) - 1:\n            formatted_lines.append(separator)\n\n    return \"\\n\".join(formatted_lines)\n\n\ndef _update_tool_call_with_delta(\n    tool_calls_in_progress: dict[int, dict[str, Any]],\n    tool_call_delta: Any,\n) -> None:\n    index = tool_call_delta.index\n\n    if index not in tool_calls_in_progress:\n        tool_calls_in_progress[index] = {\n            # Fallback ID in case the provider never sends one via deltas.\n            \"id\": f\"fallback_{uuid.uuid4().hex}\",\n            \"name\": None,\n            \"arguments\": \"\",\n        }\n\n    if tool_call_delta.id:\n        tool_calls_in_progress[index][\"id\"] = tool_call_delta.id\n\n    if tool_call_delta.function:\n        if tool_call_delta.function.name:\n            tool_calls_in_progress[index][\"name\"] = tool_call_delta.function.name\n\n        if tool_call_delta.function.arguments:\n            tool_calls_in_progress[index][\n                \"arguments\"\n            ] += tool_call_delta.function.arguments\n\n\ndef _extract_tool_call_kickoffs(\n    id_to_tool_call_map: dict[int, dict[str, Any]],\n    turn_index: int,\n    tab_index: int | None = None,\n    sub_turn_index: int | None = None,\n) -> list[ToolCallKickoff]:\n    \"\"\"Extract ToolCallKickoff objects from the tool call map.\n\n    Returns a list of ToolCallKickoff objects for valid tool calls (those with both id and name).\n    Each tool call is assigned the given turn_index and a tab_index based on its order.\n\n    Args:\n        id_to_tool_call_map: Map of tool call index to tool call data\n        turn_index: The turn index for this set of tool calls\n        tab_index: If provided, use this tab_index for all tool calls (otherwise auto-increment)\n        sub_turn_index: The sub-turn index for nested tool calls\n    \"\"\"\n    tool_calls: list[ToolCallKickoff] = []\n    tab_index_calculated = 0\n    for tool_call_data in id_to_tool_call_map.values():\n        if tool_call_data.get(\"id\") and tool_call_data.get(\"name\"):\n            tool_args = _parse_tool_args_to_dict(tool_call_data.get(\"arguments\"))\n\n            tool_calls.append(\n                ToolCallKickoff(\n                    tool_call_id=tool_call_data[\"id\"],\n                    tool_name=tool_call_data[\"name\"],\n                    tool_args=tool_args,\n                    placement=Placement(\n                        turn_index=turn_index,\n                        tab_index=(\n                            tab_index_calculated if tab_index is None else tab_index\n                        ),\n                        sub_turn_index=sub_turn_index,\n                    ),\n                )\n            )\n            tab_index_calculated += 1\n    return tool_calls\n\n\ndef extract_tool_calls_from_response_text(\n    response_text: str | None,\n    tool_definitions: list[dict],\n    placement: Placement,\n) -> list[ToolCallKickoff]:\n    \"\"\"Extract tool calls from LLM response text by matching JSON against tool definitions.\n\n    This is a fallback mechanism for when the LLM was expected to return tool calls\n    but didn't use the proper tool call format. It searches for tool calls embedded\n    in response text (JSON first, then XML-like invoke blocks) that match available\n    tool definitions.\n\n    Args:\n        response_text: The LLM's text response to search for tool calls\n        tool_definitions: List of tool definitions to match against\n        placement: Placement information for the tool calls\n\n    Returns:\n        List of ToolCallKickoff objects for any matched tool calls\n    \"\"\"\n    if not response_text or not tool_definitions:\n        return []\n\n    # Build a map of tool names to their definitions\n    tool_name_to_def: dict[str, dict] = {}\n    for tool_def in tool_definitions:\n        if tool_def.get(\"type\") == \"function\" and \"function\" in tool_def:\n            func_def = tool_def[\"function\"]\n            tool_name = func_def.get(\"name\")\n            if tool_name:\n                tool_name_to_def[tool_name] = func_def\n\n    if not tool_name_to_def:\n        return []\n\n    matched_tool_calls: list[tuple[str, dict[str, Any]]] = []\n    # Find all JSON objects in the response text\n    json_objects = find_all_json_objects(response_text)\n    prev_json_obj: dict[str, Any] | None = None\n    prev_tool_call: tuple[str, dict[str, Any]] | None = None\n\n    for json_obj in json_objects:\n        matched_tool_call = _try_match_json_to_tool(json_obj, tool_name_to_def)\n        if not matched_tool_call:\n            continue\n\n        # `find_all_json_objects` can return both an outer tool-call object and\n        # its nested arguments object. If both resolve to the same tool call,\n        # drop only this nested duplicate artifact.\n        if (\n            prev_json_obj is not None\n            and prev_tool_call is not None\n            and matched_tool_call == prev_tool_call\n            and _is_nested_arguments_duplicate(\n                previous_json_obj=prev_json_obj,\n                current_json_obj=json_obj,\n                tool_name_to_def=tool_name_to_def,\n            )\n        ):\n            continue\n\n        matched_tool_calls.append(matched_tool_call)\n        prev_json_obj = json_obj\n        prev_tool_call = matched_tool_call\n\n    # Some providers/models emit XML-style function calls instead of JSON objects.\n    # Keep this as a fallback behind JSON extraction to preserve current behavior.\n    if not matched_tool_calls:\n        matched_tool_calls = _extract_xml_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_name_to_def=tool_name_to_def,\n        )\n\n    tool_calls: list[ToolCallKickoff] = []\n    for tab_index, (tool_name, tool_args) in enumerate(matched_tool_calls):\n        tool_calls.append(\n            ToolCallKickoff(\n                tool_call_id=f\"extracted_{uuid.uuid4().hex[:8]}\",\n                tool_name=tool_name,\n                tool_args=tool_args,\n                placement=Placement(\n                    turn_index=placement.turn_index,\n                    tab_index=tab_index,\n                    sub_turn_index=placement.sub_turn_index,\n                ),\n            )\n        )\n\n    logger.info(\n        f\"Extracted {len(tool_calls)} tool call(s) from response text as fallback\"\n    )\n\n    return tool_calls\n\n\ndef _extract_xml_tool_calls_from_response_text(\n    response_text: str,\n    tool_name_to_def: dict[str, dict],\n) -> list[tuple[str, dict[str, Any]]]:\n    \"\"\"Extract XML-style tool calls from response text.\n\n    Supports formats such as:\n    <function_calls>\n      <invoke name=\"internal_search\">\n        <parameter name=\"queries\" string=\"false\">[\"foo\"]</parameter>\n      </invoke>\n    </function_calls>\n    \"\"\"\n    matched_tool_calls: list[tuple[str, dict[str, Any]]] = []\n\n    for invoke_match in _XML_INVOKE_BLOCK_RE.finditer(response_text):\n        invoke_attrs = invoke_match.group(\"attrs\")\n        tool_name = _extract_xml_attribute(invoke_attrs, \"name\")\n        if not tool_name or tool_name not in tool_name_to_def:\n            continue\n\n        tool_args: dict[str, Any] = {}\n        invoke_body = invoke_match.group(\"body\")\n        for parameter_match in _XML_PARAMETER_RE.finditer(invoke_body):\n            parameter_attrs = parameter_match.group(\"attrs\")\n            parameter_name = _extract_xml_attribute(parameter_attrs, \"name\")\n            if not parameter_name:\n                continue\n\n            string_attr = _extract_xml_attribute(parameter_attrs, \"string\")\n            tool_args[parameter_name] = _parse_xml_parameter_value(\n                raw_value=parameter_match.group(\"value\"),\n                string_attr=string_attr,\n            )\n\n        matched_tool_calls.append((tool_name, tool_args))\n\n    return matched_tool_calls\n\n\ndef _extract_xml_attribute(attrs: str, attr_name: str) -> str | None:\n    \"\"\"Extract a single XML-style attribute value from a tag attribute string.\"\"\"\n    attr_match = re.search(\n        rf\"\"\"\\b{re.escape(attr_name)}\\s*=\\s*(['\"])(.*?)\\1\"\"\",\n        attrs,\n        flags=re.IGNORECASE | re.DOTALL,\n    )\n    if not attr_match:\n        return None\n    return sanitize_string(unescape(attr_match.group(2).strip()))\n\n\ndef _parse_xml_parameter_value(raw_value: str, string_attr: str | None) -> Any:\n    \"\"\"Parse a parameter value from XML-style tool call payloads.\"\"\"\n    value = sanitize_string(unescape(raw_value).strip())\n\n    if string_attr and string_attr.lower() == \"true\":\n        return value\n\n    try:\n        return json.loads(value)\n    except json.JSONDecodeError:\n        return value\n\n\ndef _resolve_tool_arguments(obj: dict[str, Any]) -> dict[str, Any] | None:\n    \"\"\"Extract and parse an arguments/parameters value from a tool-call-like object.\n\n    Looks for \"arguments\" or \"parameters\" keys, handles JSON-string values,\n    and returns a dict if successful, or None otherwise.\n    \"\"\"\n    arguments = obj.get(\"arguments\", obj.get(\"parameters\", {}))\n    if isinstance(arguments, str):\n        arguments = sanitize_string(arguments)\n        try:\n            arguments = json.loads(arguments)\n        except json.JSONDecodeError:\n            arguments = {}\n    if isinstance(arguments, dict):\n        return arguments\n    return None\n\n\ndef _try_match_json_to_tool(\n    json_obj: dict[str, Any],\n    tool_name_to_def: dict[str, dict],\n) -> tuple[str, dict[str, Any]] | None:\n    \"\"\"Try to match a JSON object to a tool definition.\n\n    Supports several formats:\n    1. Direct tool call format: {\"name\": \"tool_name\", \"arguments\": {...}}\n    2. Function call format: {\"function\": {\"name\": \"tool_name\", \"arguments\": {...}}}\n    3. Tool name as key: {\"tool_name\": {...arguments...}}\n    4. Arguments matching a tool's parameter schema\n\n    Args:\n        json_obj: The JSON object to match\n        tool_name_to_def: Map of tool names to their function definitions\n\n    Returns:\n        Tuple of (tool_name, tool_args) if matched, None otherwise\n    \"\"\"\n    # Format 1: Direct tool call format {\"name\": \"...\", \"arguments\": {...}}\n    if \"name\" in json_obj and json_obj[\"name\"] in tool_name_to_def:\n        tool_name = json_obj[\"name\"]\n        arguments = _resolve_tool_arguments(json_obj)\n        if arguments is not None:\n            return (tool_name, arguments)\n\n    # Format 2: Function call format {\"function\": {\"name\": \"...\", \"arguments\": {...}}}\n    if \"function\" in json_obj and isinstance(json_obj[\"function\"], dict):\n        func_obj = json_obj[\"function\"]\n        if \"name\" in func_obj and func_obj[\"name\"] in tool_name_to_def:\n            tool_name = func_obj[\"name\"]\n            arguments = _resolve_tool_arguments(func_obj)\n            if arguments is not None:\n                return (tool_name, arguments)\n\n    # Format 3: Tool name as key {\"tool_name\": {...arguments...}}\n    for tool_name in tool_name_to_def:\n        if tool_name in json_obj:\n            arguments = json_obj[tool_name]\n            if isinstance(arguments, dict):\n                return (tool_name, arguments)\n\n    # Format 4: Check if the JSON object matches a tool's parameter schema\n    for tool_name, func_def in tool_name_to_def.items():\n        params = func_def.get(\"parameters\", {})\n        properties = params.get(\"properties\", {})\n        required = params.get(\"required\", [])\n\n        if not properties:\n            continue\n\n        # Check if all required parameters are present (empty required = all optional)\n        if all(req in json_obj for req in required):\n            # Check if any of the tool's properties are in the JSON object\n            matching_props = [prop for prop in properties if prop in json_obj]\n            if matching_props:\n                # Filter to only include known properties\n                filtered_args = {k: v for k, v in json_obj.items() if k in properties}\n                return (tool_name, filtered_args)\n\n    return None\n\n\ndef _is_nested_arguments_duplicate(\n    previous_json_obj: dict[str, Any],\n    current_json_obj: dict[str, Any],\n    tool_name_to_def: dict[str, dict],\n) -> bool:\n    \"\"\"Detect when current object is the nested args object from previous tool call.\"\"\"\n    extracted_args = _extract_nested_arguments_obj(previous_json_obj, tool_name_to_def)\n    return extracted_args is not None and current_json_obj == extracted_args\n\n\ndef _extract_nested_arguments_obj(\n    json_obj: dict[str, Any],\n    tool_name_to_def: dict[str, dict],\n) -> dict[str, Any] | None:\n    # Format 1: {\"name\": \"...\", \"arguments\": {...}} or {\"name\": \"...\", \"parameters\": {...}}\n    if \"name\" in json_obj and json_obj[\"name\"] in tool_name_to_def:\n        args_obj = json_obj.get(\"arguments\", json_obj.get(\"parameters\"))\n        if isinstance(args_obj, dict):\n            return args_obj\n\n    # Format 2: {\"function\": {\"name\": \"...\", \"arguments\": {...}}}\n    if \"function\" in json_obj and isinstance(json_obj[\"function\"], dict):\n        function_obj = json_obj[\"function\"]\n        if \"name\" in function_obj and function_obj[\"name\"] in tool_name_to_def:\n            args_obj = function_obj.get(\"arguments\", function_obj.get(\"parameters\"))\n            if isinstance(args_obj, dict):\n                return args_obj\n\n    # Format 3: {\"tool_name\": {...arguments...}}\n    for tool_name in tool_name_to_def:\n        if tool_name in json_obj and isinstance(json_obj[tool_name], dict):\n            return json_obj[tool_name]\n\n    return None\n\n\ndef _build_structured_assistant_message(msg: ChatMessageSimple) -> AssistantMessage:\n    tool_calls_list: list[ToolCall] | None = None\n    if msg.tool_calls:\n        tool_calls_list = [\n            ToolCall(\n                id=tc.tool_call_id,\n                type=\"function\",\n                function=FunctionCall(\n                    name=tc.tool_name,\n                    arguments=json.dumps(tc.tool_arguments),\n                ),\n            )\n            for tc in msg.tool_calls\n        ]\n\n    return AssistantMessage(\n        role=\"assistant\",\n        content=msg.message or None,\n        tool_calls=tool_calls_list,\n    )\n\n\ndef _build_structured_tool_response_message(msg: ChatMessageSimple) -> ToolMessage:\n    if not msg.tool_call_id:\n        raise ValueError(\n            f\"Tool call response message encountered but tool_call_id is not available. Message: {msg}\"\n        )\n\n    return ToolMessage(\n        role=\"tool\",\n        content=msg.message,\n        tool_call_id=msg.tool_call_id,\n    )\n\n\nclass _HistoryMessageFormatter:\n    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:\n        raise NotImplementedError\n\n    def format_tool_response_message(\n        self, msg: ChatMessageSimple\n    ) -> ToolMessage | UserMessage:\n        raise NotImplementedError\n\n\nclass _DefaultHistoryMessageFormatter(_HistoryMessageFormatter):\n    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:\n        return _build_structured_assistant_message(msg)\n\n    def format_tool_response_message(self, msg: ChatMessageSimple) -> ToolMessage:\n        return _build_structured_tool_response_message(msg)\n\n\nclass _OllamaHistoryMessageFormatter(_HistoryMessageFormatter):\n    def format_assistant_message(self, msg: ChatMessageSimple) -> AssistantMessage:\n        if not msg.tool_calls:\n            return _build_structured_assistant_message(msg)\n\n        tool_call_lines = [\n            (\n                f\"[Tool Call] name={tc.tool_name} id={tc.tool_call_id} args={json.dumps(tc.tool_arguments)}\"\n            )\n            for tc in msg.tool_calls\n        ]\n        assistant_content = (\n            \"\\n\".join([msg.message, *tool_call_lines])\n            if msg.message\n            else \"\\n\".join(tool_call_lines)\n        )\n        return AssistantMessage(\n            role=\"assistant\",\n            content=assistant_content,\n            tool_calls=None,\n        )\n\n    def format_tool_response_message(self, msg: ChatMessageSimple) -> UserMessage:\n        if not msg.tool_call_id:\n            raise ValueError(\n                f\"Tool call response message encountered but tool_call_id is not available. Message: {msg}\"\n            )\n\n        return UserMessage(\n            role=\"user\",\n            content=f\"[Tool Result] id={msg.tool_call_id}\\n{msg.message}\",\n        )\n\n\n_DEFAULT_HISTORY_MESSAGE_FORMATTER = _DefaultHistoryMessageFormatter()\n_OLLAMA_HISTORY_MESSAGE_FORMATTER = _OllamaHistoryMessageFormatter()\n\n\ndef _get_history_message_formatter(llm_config: LLMConfig) -> _HistoryMessageFormatter:\n    if llm_config.model_provider == LlmProviderNames.OLLAMA_CHAT:\n        return _OLLAMA_HISTORY_MESSAGE_FORMATTER\n\n    return _DEFAULT_HISTORY_MESSAGE_FORMATTER\n\n\ndef translate_history_to_llm_format(\n    history: list[ChatMessageSimple],\n    llm_config: LLMConfig,\n) -> LanguageModelInput:\n    \"\"\"Convert a list of ChatMessageSimple to LanguageModelInput format.\n\n    Converts ChatMessageSimple messages to ChatCompletionMessage format,\n    handling different message types and image files for multimodal support.\n    \"\"\"\n    messages: list[ChatCompletionMessage] = []\n    history_message_formatter = _get_history_message_formatter(llm_config)\n    # Note: cacheability is computed from pre-translation ChatMessageSimple types.\n    # Some providers flatten tool history into plain assistant/user text, so this split\n    # may be less semantically meaningful, but it remains safe and order-preserving.\n    last_cacheable_msg_idx = -1\n    all_previous_msgs_cacheable = True\n\n    for idx, msg in enumerate(history):\n        # if the message is being added to the history\n        if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [\n            MessageType.SYSTEM,\n            MessageType.USER,\n            MessageType.USER_REMINDER,\n            MessageType.ASSISTANT,\n            MessageType.TOOL_CALL_RESPONSE,\n        ]:\n            all_previous_msgs_cacheable = (\n                all_previous_msgs_cacheable and msg.should_cache\n            )\n            if all_previous_msgs_cacheable:\n                last_cacheable_msg_idx = idx\n\n        if msg.message_type == MessageType.SYSTEM:\n            system_msg = SystemMessage(\n                role=\"system\",\n                content=msg.message,\n            )\n            messages.append(system_msg)\n\n        elif msg.message_type == MessageType.USER:\n            # Handle user messages with potential images\n            if msg.image_files:\n                # Build content parts: text + images\n                content_parts: list[TextContentPart | ImageContentPart] = [\n                    TextContentPart(\n                        type=\"text\",\n                        text=msg.message,\n                    )\n                ]\n\n                # Add image parts\n                for img_file in msg.image_files:\n                    if img_file.file_type == ChatFileType.IMAGE:\n                        try:\n                            image_type = get_image_type_from_bytes(img_file.content)\n                            base64_data = img_file.to_base64()\n                            image_url = f\"data:{image_type};base64,{base64_data}\"\n\n                            image_part = ImageContentPart(\n                                type=\"image_url\",\n                                image_url=ImageUrlDetail(\n                                    url=image_url,\n                                    detail=None,\n                                ),\n                            )\n                            content_parts.append(image_part)\n                        except Exception as e:\n                            logger.warning(\n                                f\"Failed to process image file {img_file.file_id}: {e}. Skipping image.\"\n                            )\n                user_msg = UserMessage(\n                    role=\"user\",\n                    content=content_parts,\n                )\n                messages.append(user_msg)\n            else:\n                # Simple text-only user message\n                user_msg_text = UserMessage(\n                    role=\"user\",\n                    content=msg.message,\n                )\n                messages.append(user_msg_text)\n\n        elif msg.message_type == MessageType.USER_REMINDER:\n            # User reminder messages are wrapped with system-reminder tags\n            # and converted to UserMessage (LLM APIs don't have a native reminder type)\n            wrapped_content = f\"{SYSTEM_REMINDER_TAG_OPEN}\\n{msg.message}\\n{SYSTEM_REMINDER_TAG_CLOSE}\"\n            reminder_msg = UserMessage(\n                role=\"user\",\n                content=wrapped_content,\n            )\n            messages.append(reminder_msg)\n\n        elif msg.message_type == MessageType.ASSISTANT:\n            messages.append(history_message_formatter.format_assistant_message(msg))\n\n        elif msg.message_type == MessageType.TOOL_CALL_RESPONSE:\n            messages.append(history_message_formatter.format_tool_response_message(msg))\n\n        else:\n            logger.warning(\n                f\"Unknown message type {msg.message_type} in history. Skipping message.\"\n            )\n\n    # Apply model-specific formatting when translating to LLM format (e.g. OpenAI\n    # reasoning models need CODE_BLOCK_MARKDOWN prefix for correct markdown generation)\n    if model_needs_formatting_reenabled(llm_config.model_name):\n        for i, m in enumerate(messages):\n            if isinstance(m, SystemMessage):\n                messages[i] = SystemMessage(\n                    role=\"system\",\n                    content=CODE_BLOCK_MARKDOWN + m.content,\n                )\n                break\n\n    # prompt caching: rely on should_cache in ChatMessageSimple to\n    # pick the split point for the cacheable prefix and suffix\n    if last_cacheable_msg_idx != -1:\n        processed_messages, _ = process_with_prompt_cache(\n            llm_config=llm_config,\n            cacheable_prefix=messages[: last_cacheable_msg_idx + 1],\n            suffix=messages[last_cacheable_msg_idx + 1 :],\n            continuation=False,\n        )\n        assert isinstance(processed_messages, list)  # for mypy\n        messages = processed_messages\n\n    return messages\n\n\ndef _increment_turns(\n    turn_index: int, sub_turn_index: int | None\n) -> tuple[int, int | None]:\n    if sub_turn_index is None:\n        return turn_index + 1, None\n    else:\n        return turn_index, sub_turn_index + 1\n\n\ndef _delta_has_action(delta: Delta) -> bool:\n    return bool(delta.content or delta.reasoning_content or delta.tool_calls)\n\n\ndef run_llm_step_pkt_generator(\n    history: list[ChatMessageSimple],\n    tool_definitions: list[dict],\n    tool_choice: ToolChoiceOptions,\n    llm: LLM,\n    placement: Placement,\n    state_container: ChatStateContainer | None,\n    citation_processor: DynamicCitationProcessor | None,\n    reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n    final_documents: list[SearchDoc] | None = None,\n    user_identity: LLMUserIdentity | None = None,\n    custom_token_processor: (\n        Callable[[Delta | None, Any], tuple[Delta | None, Any]] | None\n    ) = None,\n    max_tokens: int | None = None,\n    # TODO: Temporary handling of nested tool calls with agents, figure out a better way to handle this\n    use_existing_tab_index: bool = False,\n    is_deep_research: bool = False,\n    pre_answer_processing_time: float | None = None,\n    timeout_override: int | None = None,\n) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:\n    \"\"\"Run an LLM step and stream the response as packets.\n    NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and\n    delicate logic that is core to the app's main functionality.\n\n    This generator function streams LLM responses, processing reasoning content,\n    answer content, tool calls, and citations. It yields Packet objects for\n    real-time streaming to clients and accumulates the final result.\n\n    Args:\n        history: List of chat messages in the conversation history.\n        tool_definitions: List of tool definitions available to the LLM.\n        tool_choice: Tool choice configuration (e.g., \"auto\", \"required\", \"none\").\n        llm: Language model interface to use for generation.\n        placement: Placement info (turn_index, tab_index, sub_turn_index) for\n            positioning packets in the conversation UI.\n        state_container: Container for storing chat state (reasoning, answers).\n        citation_processor: Optional processor for extracting and formatting citations\n            from the response. If provided, processes tokens to identify citations.\n        reasoning_effort: Optional reasoning effort configuration for models that\n            support reasoning (e.g., o1 models).\n        final_documents: Optional list of search documents to include in the response\n            start packet.\n        user_identity: Optional user identity information for the LLM.\n        custom_token_processor: Optional callable that processes each token delta\n            before yielding. Receives (delta, processor_state) and returns\n            (modified_delta, new_processor_state). Can return None for delta to skip.\n        max_tokens: Optional maximum number of tokens for the LLM response.\n        use_existing_tab_index: If True, use the tab_index from placement for all\n            tool calls instead of auto-incrementing.\n        is_deep_research: If True, treat content before tool calls as reasoning\n            when tool_choice is REQUIRED.\n        pre_answer_processing_time: Optional time spent processing before the\n            answer started, recorded in state_container for analytics.\n        timeout_override: Optional timeout override for the LLM call.\n\n    Yields:\n        Packet: Streaming packets containing:\n            - ReasoningStart/ReasoningDelta/ReasoningDone for reasoning content\n            - AgentResponseStart/AgentResponseDelta for answer content\n            - CitationInfo for extracted citations\n            - ToolCallKickoff for tool calls (extracted at the end)\n\n    Returns:\n        tuple[LlmStepResult, bool]: A tuple containing:\n            - LlmStepResult: The final result with accumulated reasoning, answer,\n              and tool calls (if any).\n            - bool: Whether reasoning occurred during this step. This should be used to\n              increment the turn index or sub_turn index for the rest of the LLM loop.\n\n    Note:\n        The function handles incremental state updates, saving reasoning and answer\n        tokens to the state container as they are generated. Tool calls are extracted\n        and yielded only after the stream completes.\n    \"\"\"\n\n    turn_index = placement.turn_index\n    tab_index = placement.tab_index\n    sub_turn_index = placement.sub_turn_index\n\n    def _current_placement() -> Placement:\n        return Placement(\n            turn_index=turn_index,\n            tab_index=tab_index,\n            sub_turn_index=sub_turn_index,\n        )\n\n    llm_msg_history = translate_history_to_llm_format(history, llm.config)\n    has_reasoned = False\n\n    if LOG_ONYX_MODEL_INTERACTIONS:\n        logger.debug(\n            f\"Message history:\\n{_format_message_history_for_logging(llm_msg_history)}\"\n        )\n\n    id_to_tool_call_map: dict[int, dict[str, Any]] = {}\n    arg_parsers: dict[int, Parser] = {}\n    reasoning_start = False\n    answer_start = False\n    accumulated_reasoning = \"\"\n    accumulated_answer = \"\"\n    accumulated_raw_answer = \"\"\n    stream_chunk_count = 0\n    actionable_chunk_count = 0\n    empty_chunk_count = 0\n    finish_reasons: set[str] = set()\n    xml_tool_call_content_filter = _XmlToolCallContentFilter()\n\n    processor_state: Any = None\n\n    with generation_span(\n        model=llm.config.model_name,\n        model_config={\n            \"base_url\": str(llm.config.api_base or \"\"),\n            \"model_impl\": \"litellm\",\n        },\n    ) as span_generation:\n        span_generation.span_data.input = cast(\n            Sequence[Mapping[str, Any]], llm_msg_history\n        )\n        stream_start_time = time.monotonic()\n        first_action_recorded = False\n\n        def _emit_citation_results(\n            results: Generator[str | CitationInfo, None, None],\n        ) -> Generator[Packet, None, None]:\n            \"\"\"Yield packets for citation processor results (str or CitationInfo).\"\"\"\n            nonlocal accumulated_answer\n\n            for result in results:\n                if isinstance(result, str):\n                    accumulated_answer += result\n                    if state_container:\n                        state_container.set_answer_tokens(accumulated_answer)\n                    yield Packet(\n                        placement=_current_placement(),\n                        obj=AgentResponseDelta(content=result),\n                    )\n                elif isinstance(result, CitationInfo):\n                    yield Packet(\n                        placement=_current_placement(),\n                        obj=result,\n                    )\n                    if state_container:\n                        state_container.add_emitted_citation(result.citation_number)\n\n        def _close_reasoning_if_active() -> Generator[Packet, None, None]:\n            \"\"\"Emit ReasoningDone and increment turns if reasoning is in progress.\"\"\"\n            nonlocal reasoning_start\n            nonlocal has_reasoned\n            nonlocal turn_index\n            nonlocal sub_turn_index\n\n            if reasoning_start:\n                yield Packet(\n                    placement=Placement(\n                        turn_index=turn_index,\n                        tab_index=tab_index,\n                        sub_turn_index=sub_turn_index,\n                    ),\n                    obj=ReasoningDone(),\n                )\n                has_reasoned = True\n                turn_index, sub_turn_index = _increment_turns(\n                    turn_index, sub_turn_index\n                )\n                reasoning_start = False\n\n        def _emit_content_chunk(content_chunk: str) -> Generator[Packet, None, None]:\n            nonlocal accumulated_answer\n            nonlocal accumulated_reasoning\n            nonlocal answer_start\n            nonlocal reasoning_start\n            nonlocal turn_index\n            nonlocal sub_turn_index\n\n            # When tool_choice is REQUIRED, content before tool calls is reasoning/thinking\n            # about which tool to call, not an actual answer to the user.\n            # Treat this content as reasoning instead of answer.\n            if is_deep_research and tool_choice == ToolChoiceOptions.REQUIRED:\n                accumulated_reasoning += content_chunk\n                if state_container:\n                    state_container.set_reasoning_tokens(accumulated_reasoning)\n                if not reasoning_start:\n                    yield Packet(\n                        placement=_current_placement(),\n                        obj=ReasoningStart(),\n                    )\n                yield Packet(\n                    placement=_current_placement(),\n                    obj=ReasoningDelta(reasoning=content_chunk),\n                )\n                reasoning_start = True\n                return\n\n            # Normal flow for AUTO or NONE tool choice\n            yield from _close_reasoning_if_active()\n\n            if not answer_start:\n                # Store pre-answer processing time in state container for save_chat\n                if state_container and pre_answer_processing_time is not None:\n                    state_container.set_pre_answer_processing_time(\n                        pre_answer_processing_time\n                    )\n\n                yield Packet(\n                    placement=_current_placement(),\n                    obj=AgentResponseStart(\n                        final_documents=final_documents,\n                        pre_answer_processing_seconds=pre_answer_processing_time,\n                    ),\n                )\n                answer_start = True\n\n            if citation_processor:\n                yield from _emit_citation_results(\n                    citation_processor.process_token(content_chunk)\n                )\n            else:\n                accumulated_answer += content_chunk\n                # Save answer incrementally to state container\n                if state_container:\n                    state_container.set_answer_tokens(accumulated_answer)\n                yield Packet(\n                    placement=_current_placement(),\n                    obj=AgentResponseDelta(content=content_chunk),\n                )\n\n        for packet in llm.stream(\n            prompt=llm_msg_history,\n            tools=tool_definitions,\n            tool_choice=tool_choice,\n            structured_response_format=None,  # TODO\n            max_tokens=max_tokens,\n            reasoning_effort=reasoning_effort,\n            user_identity=user_identity,\n            timeout_override=timeout_override,\n        ):\n            stream_chunk_count += 1\n            if packet.usage:\n                usage = packet.usage\n                span_generation.span_data.usage = {\n                    \"input_tokens\": usage.prompt_tokens,\n                    \"output_tokens\": usage.completion_tokens,\n                    \"cache_read_input_tokens\": usage.cache_read_input_tokens,\n                    \"cache_creation_input_tokens\": usage.cache_creation_input_tokens,\n                }\n                # Note: LLM cost tracking is now handled in multi_llm.py\n            finish_reason = packet.choice.finish_reason\n            if finish_reason:\n                finish_reasons.add(str(finish_reason))\n            delta = packet.choice.delta\n\n            # Weird behavior from some model providers, just log and ignore for now\n            if (\n                not delta.content\n                and delta.reasoning_content is None\n                and not delta.tool_calls\n            ):\n                empty_chunk_count += 1\n                logger.warning(\n                    \"LLM packet is empty (no content, reasoning, or tool calls). \"\n                    f\"finish_reason={finish_reason}. Skipping: {packet}\"\n                )\n                continue\n\n            if not first_action_recorded and _delta_has_action(delta):\n                span_generation.span_data.time_to_first_action_seconds = (\n                    time.monotonic() - stream_start_time\n                )\n                first_action_recorded = True\n            if _delta_has_action(delta):\n                actionable_chunk_count += 1\n\n            if custom_token_processor:\n                # The custom token processor can modify the deltas for specific custom logic\n                # It can also return a state so that it can handle aggregated delta logic etc.\n                # Loosely typed so the function can be flexible\n                modified_delta, processor_state = custom_token_processor(\n                    delta, processor_state\n                )\n                if modified_delta is None:\n                    continue\n                delta = modified_delta\n\n            # Should only happen once, frontend does not expect multiple\n            # ReasoningStart or ReasoningDone packets.\n            if delta.reasoning_content:\n                accumulated_reasoning += delta.reasoning_content\n                # Save reasoning incrementally to state container\n                if state_container:\n                    state_container.set_reasoning_tokens(accumulated_reasoning)\n                if not reasoning_start:\n                    yield Packet(\n                        placement=_current_placement(),\n                        obj=ReasoningStart(),\n                    )\n                yield Packet(\n                    placement=_current_placement(),\n                    obj=ReasoningDelta(reasoning=delta.reasoning_content),\n                )\n                reasoning_start = True\n\n            if delta.content:\n                # Keep raw content for fallback extraction. Display content can be\n                # filtered and, in deep-research REQUIRED mode, routed as reasoning.\n                accumulated_raw_answer += delta.content\n                filtered_content = xml_tool_call_content_filter.process(delta.content)\n                if filtered_content:\n                    yield from _emit_content_chunk(filtered_content)\n\n            if delta.tool_calls:\n                yield from _close_reasoning_if_active()\n\n                for tool_call_delta in delta.tool_calls:\n                    # maybe_emit depends and update being called first and attaching the delta\n                    _update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)\n                    yield from maybe_emit_argument_delta(\n                        tool_calls_in_progress=id_to_tool_call_map,\n                        tool_call_delta=tool_call_delta,\n                        placement=_current_placement(),\n                        parsers=arg_parsers,\n                    )\n\n        # Flush any tail text buffered while checking for split \"<function_calls\" markers.\n        filtered_content_tail = xml_tool_call_content_filter.flush()\n        if filtered_content_tail:\n            yield from _emit_content_chunk(filtered_content_tail)\n\n        # Flush custom token processor to get any final tool calls\n        if custom_token_processor:\n            flush_delta, processor_state = custom_token_processor(None, processor_state)\n            if (\n                not first_action_recorded\n                and flush_delta is not None\n                and _delta_has_action(flush_delta)\n            ):\n                span_generation.span_data.time_to_first_action_seconds = (\n                    time.monotonic() - stream_start_time\n                )\n                first_action_recorded = True\n            if flush_delta and flush_delta.tool_calls:\n                for tool_call_delta in flush_delta.tool_calls:\n                    _update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)\n\n        tool_calls = _extract_tool_call_kickoffs(\n            id_to_tool_call_map=id_to_tool_call_map,\n            turn_index=turn_index,\n            tab_index=tab_index if use_existing_tab_index else None,\n            sub_turn_index=sub_turn_index,\n        )\n        if tool_calls:\n            tool_calls_list: list[ToolCall] = [\n                ToolCall(\n                    id=kickoff.tool_call_id,\n                    type=\"function\",\n                    function=FunctionCall(\n                        name=kickoff.tool_name,\n                        arguments=json.dumps(kickoff.tool_args),\n                    ),\n                )\n                for kickoff in tool_calls\n            ]\n\n            assistant_msg: AssistantMessage = AssistantMessage(\n                role=\"assistant\",\n                content=accumulated_answer if accumulated_answer else None,\n                tool_calls=tool_calls_list,\n            )\n            span_generation.span_data.output = [assistant_msg.model_dump()]\n        elif accumulated_answer:\n            assistant_msg_no_tools = AssistantMessage(\n                role=\"assistant\",\n                content=accumulated_answer,\n                tool_calls=None,\n            )\n            span_generation.span_data.output = [assistant_msg_no_tools.model_dump()]\n\n        # Record reasoning content for tracing (extended thinking from reasoning models)\n        if accumulated_reasoning:\n            span_generation.span_data.reasoning = accumulated_reasoning\n\n    # This may happen if the custom token processor is used to modify other packets into reasoning\n    # Then there won't necessarily be anything else to come after the reasoning tokens\n    yield from _close_reasoning_if_active()\n\n    # Flush any remaining content from citation processor\n    # Reasoning is always first so this should use the post-incremented value of turn_index\n    # Note that this doesn't need to handle any sub-turns as those docs will not have citations\n    # as clickable items and will be stripped out instead.\n    if citation_processor:\n        yield from _emit_citation_results(citation_processor.process_token(None))\n\n    # Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it\n    # Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)\n    if LOG_ONYX_MODEL_INTERACTIONS:\n        logger.debug(f\"Accumulated reasoning: {accumulated_reasoning}\")\n        logger.debug(f\"Accumulated answer: {accumulated_answer}\")\n\n        if tool_calls:\n            tool_calls_str = \"\\n\".join(\n                f\"  - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}\"\n                for tc in tool_calls\n            )\n            logger.debug(f\"Tool calls:\\n{tool_calls_str}\")\n        else:\n            logger.debug(\"Tool calls: []\")\n\n    if actionable_chunk_count == 0:\n        logger.warning(\n            \"LLM stream completed with no actionable deltas. \"\n            f\"chunks={stream_chunk_count}, empty_chunks={empty_chunk_count}, \"\n            f\"finish_reasons={sorted(finish_reasons)}, \"\n            f\"provider={llm.config.model_provider}, model={llm.config.model_name}, \"\n            f\"tool_choice={tool_choice}, tools_sent={len(tool_definitions)}\"\n        )\n\n    return (\n        LlmStepResult(\n            reasoning=accumulated_reasoning if accumulated_reasoning else None,\n            answer=accumulated_answer if accumulated_answer else None,\n            tool_calls=tool_calls if tool_calls else None,\n            raw_answer=accumulated_raw_answer if accumulated_raw_answer else None,\n        ),\n        has_reasoned,\n    )\n\n\ndef run_llm_step(\n    emitter: Emitter,\n    history: list[ChatMessageSimple],\n    tool_definitions: list[dict],\n    tool_choice: ToolChoiceOptions,\n    llm: LLM,\n    placement: Placement,\n    state_container: ChatStateContainer | None,\n    citation_processor: DynamicCitationProcessor | None,\n    reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n    final_documents: list[SearchDoc] | None = None,\n    user_identity: LLMUserIdentity | None = None,\n    custom_token_processor: (\n        Callable[[Delta | None, Any], tuple[Delta | None, Any]] | None\n    ) = None,\n    max_tokens: int | None = None,\n    use_existing_tab_index: bool = False,\n    is_deep_research: bool = False,\n    pre_answer_processing_time: float | None = None,\n    timeout_override: int | None = None,\n) -> tuple[LlmStepResult, bool]:\n    \"\"\"Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.\n\n    Returns:\n        tuple[LlmStepResult, bool]: The LLM step result and whether reasoning occurred.\n    \"\"\"\n    step_generator = run_llm_step_pkt_generator(\n        history=history,\n        tool_definitions=tool_definitions,\n        tool_choice=tool_choice,\n        llm=llm,\n        placement=placement,\n        state_container=state_container,\n        citation_processor=citation_processor,\n        reasoning_effort=reasoning_effort,\n        final_documents=final_documents,\n        user_identity=user_identity,\n        custom_token_processor=custom_token_processor,\n        max_tokens=max_tokens,\n        use_existing_tab_index=use_existing_tab_index,\n        is_deep_research=is_deep_research,\n        pre_answer_processing_time=pre_answer_processing_time,\n        timeout_override=timeout_override,\n    )\n\n    while True:\n        try:\n            packet = next(step_generator)\n            emitter.emit(packet)\n        except StopIteration as e:\n            llm_step_result, has_reasoned = e.value\n            return llm_step_result, has_reasoned\n"
  },
  {
    "path": "backend/onyx/chat/models.py",
    "content": "from collections.abc import Iterator\nfrom typing import Any\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.models import SearchToolUsage\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType\n\n\nclass StreamingError(BaseModel):\n    error: str\n    stack_trace: str | None = None\n    error_code: str | None = (\n        None  # e.g., \"RATE_LIMIT\", \"AUTH_ERROR\", \"TOOL_CALL_FAILED\"\n    )\n    is_retryable: bool = True  # Hint to frontend if retry might help\n    details: dict | None = None  # Additional context (tool name, model name, etc.)\n\n\nclass CustomToolResponse(BaseModel):\n    response: ToolResultType\n    tool_name: str\n\n\nclass CreateChatSessionID(BaseModel):\n    chat_session_id: UUID\n\n\nAnswerStreamPart = (\n    Packet\n    | MessageResponseIDInfo\n    | MultiModelMessageResponseIDInfo\n    | StreamingError\n    | CreateChatSessionID\n)\n\nAnswerStream = Iterator[AnswerStreamPart]\n\n\nclass ToolCallResponse(BaseModel):\n    \"\"\"Tool call with full details for non-streaming response.\"\"\"\n\n    tool_name: str\n    tool_arguments: dict[str, Any]\n    tool_result: str\n    search_docs: list[SearchDoc] | None = None\n    generated_images: list[GeneratedImage] | None = None\n    # Reasoning that led to the tool call\n    pre_reasoning: str | None = None\n\n\nclass ChatBasicResponse(BaseModel):\n    # This is built piece by piece, any of these can be None as the flow could break\n    answer: str\n    answer_citationless: str\n\n    top_documents: list[SearchDoc]\n\n    error_msg: str | None\n    message_id: int\n    citation_info: list[CitationInfo]\n\n\nclass ChatFullResponse(BaseModel):\n    \"\"\"Complete non-streaming response with all available data.\n    NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an\n    experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.\n    \"\"\"\n\n    # Core response fields\n    answer: str\n    answer_citationless: str\n    pre_answer_reasoning: str | None = None\n    tool_calls: list[ToolCallResponse] = []\n\n    # Documents & citations\n    top_documents: list[SearchDoc]\n    citation_info: list[CitationInfo]\n\n    # Metadata\n    message_id: int\n    chat_session_id: UUID | None = None\n    error_msg: str | None = None\n\n\nclass ChatLoadedFile(InMemoryChatFile):\n    content_text: str | None\n    token_count: int\n\n\nclass ToolCallSimple(BaseModel):\n    \"\"\"Tool call for ChatMessageSimple representation (mirrors OpenAI format).\n\n    Used when an ASSISTANT message contains one or more tool calls.\n    Each tool call has an ID, name, arguments, and token count for tracking.\n    \"\"\"\n\n    tool_call_id: str\n    tool_name: str\n    tool_arguments: dict[str, Any]\n    token_count: int = 0\n\n\nclass ChatMessageSimple(BaseModel):\n    message: str\n    token_count: int\n    message_type: MessageType\n    # Only for USER type messages\n    image_files: list[ChatLoadedFile] | None = None\n    # Only for TOOL_CALL_RESPONSE type messages\n    tool_call_id: str | None = None\n    # For ASSISTANT messages with tool calls (OpenAI parallel tool calling format)\n    tool_calls: list[ToolCallSimple] | None = None\n    # The last message for which this is true\n    # AND is true for all previous messages\n    # (counting from the start of the history)\n    # represents the end of the cacheable prefix\n    # used for prompt caching\n    should_cache: bool = False\n    # When this message represents an injected text file, this is the file's ID.\n    # Used to detect which file messages survive context-window truncation.\n    file_id: str | None = None\n\n\nclass ContextFileMetadata(BaseModel):\n    \"\"\"Metadata for a context-injected file to enable citation support.\"\"\"\n\n    file_id: str\n    filename: str\n    file_content: str\n\n\nclass FileToolMetadata(BaseModel):\n    \"\"\"Lightweight metadata for exposing files to the FileReaderTool.\n\n    Used when files cannot be loaded directly into context (project too large\n    or persona-attached user_files without direct-load path). The LLM receives\n    a listing of these so it knows which files it can read via ``read_file``.\n    \"\"\"\n\n    file_id: str\n    filename: str\n    approx_char_count: int\n\n\nclass ChatHistoryResult(BaseModel):\n    \"\"\"Result of converting chat history to simple format.\n\n    Bundles the simple messages with metadata for every text file that was\n    injected into the history. After context-window truncation drops older\n    messages, callers compare surviving ``file_id`` tags against this map\n    to discover \"forgotten\" files whose metadata should be provided to the\n    FileReaderTool.\n    \"\"\"\n\n    simple_messages: list[ChatMessageSimple]\n    all_injected_file_metadata: dict[str, FileToolMetadata]\n\n\nclass ExtractedContextFiles(BaseModel):\n    \"\"\"Result of attempting to load user files (from a project or persona) into context.\"\"\"\n\n    file_texts: list[str]\n    image_files: list[ChatLoadedFile]\n    use_as_search_filter: bool\n    total_token_count: int\n    # Lightweight metadata for files exposed via FileReaderTool\n    # (populated when files don't fit in context and vector DB is disabled).\n    file_metadata: list[ContextFileMetadata]\n    uncapped_token_count: int | None\n    file_metadata_for_tool: list[FileToolMetadata] = []\n\n\nclass SearchParams(BaseModel):\n    \"\"\"Resolved search filter IDs and search-tool usage for a chat turn.\"\"\"\n\n    project_id_filter: int | None\n    persona_id_filter: int | None\n    search_usage: SearchToolUsage\n\n\nclass LlmStepResult(BaseModel):\n    reasoning: str | None\n    answer: str | None\n    tool_calls: list[ToolCallKickoff] | None\n    # Raw LLM text before any display-oriented filtering/sanitization.\n    # Used for fallback tool-call extraction when providers emit calls as text.\n    raw_answer: str | None = None\n"
  },
  {
    "path": "backend/onyx/chat/process_message.py",
    "content": "\"\"\"\nIMPORTANT: familiarize yourself with the design concepts prior to contributing to this file.\nAn overview can be found in the README.md file in this directory.\n\"\"\"\n\nimport contextvars\nimport io\nimport queue\nimport re\nimport threading\nimport traceback\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom concurrent.futures import ThreadPoolExecutor\nfrom contextvars import Token\nfrom typing import Final\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.cache.factory import get_cache_backend\nfrom onyx.chat.chat_processing_checker import set_processing_status\nfrom onyx.chat.chat_state import AvailableFiles\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.chat_state import ChatTurnSetup\nfrom onyx.chat.chat_utils import build_file_context\nfrom onyx.chat.chat_utils import convert_chat_history\nfrom onyx.chat.chat_utils import create_chat_history_chain\nfrom onyx.chat.chat_utils import create_chat_session_from_request\nfrom onyx.chat.chat_utils import get_custom_agent_prompt\nfrom onyx.chat.chat_utils import is_last_assistant_message_clarification\nfrom onyx.chat.chat_utils import load_all_chat_files\nfrom onyx.chat.compression import calculate_total_history_tokens\nfrom onyx.chat.compression import compress_chat_history\nfrom onyx.chat.compression import find_summary_for_branch\nfrom onyx.chat.compression import get_compression_params\nfrom onyx.chat.emitter import Emitter\nfrom onyx.chat.llm_loop import EmptyLLMResponseError\nfrom onyx.chat.llm_loop import run_llm_loop\nfrom onyx.chat.models import AnswerStream\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.models import ChatBasicResponse\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.chat.models import ChatLoadedFile\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ContextFileMetadata\nfrom onyx.chat.models import CreateChatSessionID\nfrom onyx.chat.models import ExtractedContextFiles\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import SearchParams\nfrom onyx.chat.models import StreamingError\nfrom onyx.chat.models import ToolCallResponse\nfrom onyx.chat.prompt_utils import calculate_reserved_tokens\nfrom onyx.chat.save_chat import save_chat_turn\nfrom onyx.chat.stop_signal_checker import is_connected as check_stop_signal\nfrom onyx.chat.stop_signal_checker import reset_cancel_status\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import INTEGRATION_TESTS_MODE\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.chat import create_new_chat_message\nfrom onyx.db.chat import get_chat_session_by_id\nfrom onyx.db.chat import get_or_create_root_message\nfrom onyx.db.chat import reserve_message_id\nfrom onyx.db.chat import reserve_multi_model_message_ids\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import HookPoint\nfrom onyx.db.memory import get_memories\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.projects import get_user_files_from_project\nfrom onyx.db.tools import get_tools\nfrom onyx.deep_research.dr_loop import run_deep_research_llm_loop\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import log_onyx_error\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.file_store.utils import load_in_memory_chat_files\nfrom onyx.file_store.utils import verify_user_files\nfrom onyx.hooks.executor import execute_hook\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.hooks.points.query_processing import QueryProcessingPayload\nfrom onyx.hooks.points.query_processing import QueryProcessingResponse\nfrom onyx.llm.factory import get_llm_for_persona\nfrom onyx.llm.factory import get_llm_token_counter\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.llm.request_context import reset_llm_mock_response\nfrom onyx.llm.request_context import set_llm_mock_response\nfrom onyx.llm.utils import litellm_exception_to_error_msg\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type\nfrom onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import ModelResponseSlot\nfrom onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.usage_limits import check_llm_cost_limit_for_provider\nfrom onyx.tools.constants import FILE_READER_TOOL_ID\nfrom onyx.tools.constants import SEARCH_TOOL_ID\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import SearchToolUsage\nfrom onyx.tools.tool_constructor import construct_tools\nfrom onyx.tools.tool_constructor import CustomToolConfig\nfrom onyx.tools.tool_constructor import FileReaderToolConfig\nfrom onyx.tools.tool_constructor import SearchToolConfig\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\nERROR_TYPE_CANCELLED = \"cancelled\"\nAPPROX_CHARS_PER_TOKEN = 4\n\n\ndef _collect_available_file_ids(\n    chat_history: list[ChatMessage],\n    project_id: int | None,\n    user_id: UUID | None,\n    db_session: Session,\n) -> AvailableFiles:\n    \"\"\"Collect all file IDs the FileReaderTool should be allowed to access.\n\n    Returns *separate* lists for chat-attached files (``file_record`` IDs) and\n    project/user files (``user_file`` IDs) so the tool can pick the right\n    loader without a try/except fallback.\"\"\"\n    chat_file_ids: set[UUID] = set()\n    user_file_ids: set[UUID] = set()\n\n    for msg in chat_history:\n        if not msg.files:\n            continue\n        for fd in msg.files:\n            try:\n                chat_file_ids.add(UUID(fd[\"id\"]))\n            except (ValueError, KeyError):\n                pass\n\n    if project_id:\n        user_files = get_user_files_from_project(\n            project_id=project_id,\n            user_id=user_id,\n            db_session=db_session,\n        )\n        for uf in user_files:\n            user_file_ids.add(uf.id)\n\n    return AvailableFiles(\n        user_file_ids=list(user_file_ids),\n        chat_file_ids=list(chat_file_ids),\n    )\n\n\ndef _should_enable_slack_search(\n    persona: Persona,\n    filters: BaseFilters | None,\n) -> bool:\n    \"\"\"Determine if Slack search should be enabled.\n\n    Returns True if:\n    - Source type filter exists and includes Slack, OR\n    - Default persona with no source type filter\n    \"\"\"\n    source_types = filters.source_type if filters else None\n    return (source_types is not None and DocumentSource.SLACK in source_types) or (\n        persona.id == DEFAULT_PERSONA_ID and source_types is None\n    )\n\n\ndef _convert_loaded_files_to_chat_files(\n    loaded_files: list[ChatLoadedFile],\n) -> list[ChatFile]:\n    \"\"\"Convert ChatLoadedFile objects to ChatFile for tool usage (e.g., PythonTool).\n\n    Args:\n        loaded_files: List of ChatLoadedFile objects from the chat history\n\n    Returns:\n        List of ChatFile objects that can be passed to tools\n    \"\"\"\n    chat_files = []\n    for loaded_file in loaded_files:\n        if len(loaded_file.content) > 0:\n            chat_files.append(\n                ChatFile(\n                    filename=loaded_file.filename or f\"file_{loaded_file.file_id}\",\n                    content=loaded_file.content,\n                )\n            )\n    return chat_files\n\n\ndef resolve_context_user_files(\n    persona: Persona,\n    project_id: int | None,\n    user_id: UUID | None,\n    db_session: Session,\n) -> list[UserFile]:\n    \"\"\"Apply the precedence rule to decide which user files to load.\n\n    A custom persona fully supersedes the project.  When a chat uses a\n    custom persona, the project is purely organisational — its files are\n    never loaded and never made searchable.\n\n    Custom persona → persona's own user_files (may be empty).\n    Default persona inside a project → project files.\n    Otherwise → empty list.\n    \"\"\"\n    if persona.id != DEFAULT_PERSONA_ID:\n        return list(persona.user_files) if persona.user_files else []\n    if project_id:\n        return get_user_files_from_project(\n            project_id=project_id,\n            user_id=user_id,\n            db_session=db_session,\n        )\n    return []\n\n\ndef _empty_extracted_context_files() -> ExtractedContextFiles:\n    return ExtractedContextFiles(\n        file_texts=[],\n        image_files=[],\n        use_as_search_filter=False,\n        total_token_count=0,\n        file_metadata=[],\n        uncapped_token_count=None,\n    )\n\n\ndef _extract_text_from_in_memory_file(f: InMemoryChatFile) -> str | None:\n    \"\"\"Extract text content from an InMemoryChatFile.\n\n    PLAIN_TEXT: the content is pre-extracted UTF-8 plaintext stored during\n    ingestion — decode directly.\n    DOC / CSV / other text types: the content is the original file bytes —\n    use extract_file_text which handles encoding detection and format parsing.\n    \"\"\"\n    try:\n        if f.file_type == ChatFileType.PLAIN_TEXT:\n            return f.content.decode(\"utf-8\", errors=\"ignore\").replace(\"\\x00\", \"\")\n        return extract_file_text(\n            file=io.BytesIO(f.content),\n            file_name=f.filename or \"\",\n            break_on_unprocessable=False,\n        )\n    except Exception:\n        logger.warning(f\"Failed to extract text from file {f.file_id}\", exc_info=True)\n        return None\n\n\ndef extract_context_files(\n    user_files: list[UserFile],\n    llm_max_context_window: int,\n    reserved_token_count: int,\n    db_session: Session,\n    # Because the tokenizer is a generic tokenizer, the token count may be incorrect.\n    # to account for this, the maximum context that is allowed for this function is\n    # 60% of the LLM's max context window. The other benefit is that for projects with\n    # more files, this makes it so that we don't throw away the history too quickly every time.\n    max_llm_context_percentage: float = 0.6,\n) -> ExtractedContextFiles:\n    \"\"\"Load user files into context if they fit; otherwise flag for search.\n\n    The caller is responsible for deciding *which* user files to pass in\n    (project files, persona files, etc.).  This function only cares about\n    the all-or-nothing fit check and the actual content loading.\n\n    Args:\n        project_id: The project ID to load files from\n        user_id: The user ID for authorization\n        llm_max_context_window: Maximum tokens allowed in the LLM context window\n        reserved_token_count: Number of tokens to reserve for other content\n        db_session: Database session\n        max_llm_context_percentage: Maximum percentage of the LLM context window to use.\n    Returns:\n        ExtractedContextFiles containing:\n        - List of text content strings from context files (text files only)\n        - List of image files from context (ChatLoadedFile objects)\n        - Total token count of all extracted files\n        - File metadata for context files\n        - Uncapped token count of all extracted files\n        - File metadata for files that don't fit in context and vector DB is disabled\n    \"\"\"\n    # TODO(yuhong): I believe this is not handling all file types correctly.\n\n    if not user_files:\n        return _empty_extracted_context_files()\n\n    # Aggregate tokens for the file content that will be added\n    # Skip tokens for those with metadata only\n    aggregate_tokens = sum(\n        uf.token_count or 0\n        for uf in user_files\n        if not mime_type_to_chat_file_type(uf.file_type).use_metadata_only()\n    )\n    max_actual_tokens = (\n        llm_max_context_window - reserved_token_count\n    ) * max_llm_context_percentage\n\n    if aggregate_tokens >= max_actual_tokens:\n        use_as_search_filter = not DISABLE_VECTOR_DB\n        if DISABLE_VECTOR_DB:\n            overflow_tool_metadata = [_build_tool_metadata(uf) for uf in user_files]\n        else:\n            overflow_tool_metadata = [\n                _build_tool_metadata(uf)\n                for uf in user_files\n                if mime_type_to_chat_file_type(uf.file_type).use_metadata_only()\n            ]\n        return ExtractedContextFiles(\n            file_texts=[],\n            image_files=[],\n            use_as_search_filter=use_as_search_filter,\n            total_token_count=0,\n            file_metadata=[],\n            uncapped_token_count=aggregate_tokens,\n            file_metadata_for_tool=overflow_tool_metadata,\n        )\n\n    # Files fit — load them into context\n    user_file_map = {uf.file_id: uf for uf in user_files}\n    in_memory_files = load_in_memory_chat_files(\n        user_file_ids=[uf.id for uf in user_files],\n        db_session=db_session,\n    )\n\n    file_texts: list[str] = []\n    image_files: list[ChatLoadedFile] = []\n    file_metadata: list[ContextFileMetadata] = []\n    tool_metadata: list[FileToolMetadata] = []\n    total_token_count = 0\n\n    for f in in_memory_files:\n        uf = user_file_map.get(str(f.file_id))\n        filename = f.filename or f\"file_{f.file_id}\"\n\n        if f.file_type.use_metadata_only():\n            # Metadata-only files are not injected as full text.\n            # Only the metadata is provided, with LLM using tools\n            if not uf:\n                logger.error(\n                    f\"File with id={f.file_id} in metadata-only path with no associated user file\"\n                )\n                continue\n            tool_metadata.append(_build_tool_metadata(uf))\n        elif f.file_type.is_text_file():\n            text_content = _extract_text_from_in_memory_file(f)\n            if not text_content:\n                continue\n            if not uf:\n                logger.warning(f\"No user file for file_id={f.file_id}\")\n                continue\n            file_texts.append(text_content)\n            file_metadata.append(\n                ContextFileMetadata(\n                    file_id=str(uf.id),\n                    filename=filename,\n                    file_content=text_content,\n                )\n            )\n            if uf.token_count:\n                total_token_count += uf.token_count\n        elif f.file_type == ChatFileType.IMAGE:\n            token_count = uf.token_count if uf and uf.token_count else 0\n            total_token_count += token_count\n            image_files.append(\n                ChatLoadedFile(\n                    file_id=f.file_id,\n                    content=f.content,\n                    file_type=f.file_type,\n                    filename=f.filename,\n                    content_text=None,\n                    token_count=token_count,\n                )\n            )\n\n    return ExtractedContextFiles(\n        file_texts=file_texts,\n        image_files=image_files,\n        use_as_search_filter=False,\n        total_token_count=total_token_count,\n        file_metadata=file_metadata,\n        uncapped_token_count=aggregate_tokens,\n        file_metadata_for_tool=tool_metadata,\n    )\n\n\ndef _build_tool_metadata(user_file: UserFile) -> FileToolMetadata:\n    \"\"\"Build lightweight FileToolMetadata from a UserFile record.\n\n    Delegates to ``build_file_context`` so that the file ID exposed to the\n    LLM is always consistent with what FileReaderTool expects.\n    \"\"\"\n    return build_file_context(\n        tool_file_id=str(user_file.id),\n        filename=user_file.name,\n        file_type=mime_type_to_chat_file_type(user_file.file_type),\n        approx_char_count=(user_file.token_count or 0) * APPROX_CHARS_PER_TOKEN,\n    ).tool_metadata\n\n\ndef determine_search_params(\n    persona_id: int,\n    project_id: int | None,\n    extracted_context_files: ExtractedContextFiles,\n) -> SearchParams:\n    \"\"\"Decide which search filter IDs and search-tool usage apply for a chat turn.\n\n    A custom persona fully supersedes the project — project files are never\n    searchable and the search tool config is entirely controlled by the\n    persona.  The project_id filter is only set for the default persona.\n\n    For the default persona inside a project:\n      - Files overflow  → ENABLED  (vector DB scopes to these files)\n      - Files fit       → DISABLED (content already in prompt)\n      - No files at all → DISABLED (nothing to search)\n    \"\"\"\n    is_custom_persona = persona_id != DEFAULT_PERSONA_ID\n\n    project_id_filter: int | None = None\n    persona_id_filter: int | None = None\n    if extracted_context_files.use_as_search_filter:\n        if is_custom_persona:\n            persona_id_filter = persona_id\n        else:\n            project_id_filter = project_id\n\n    search_usage = SearchToolUsage.AUTO\n    if not is_custom_persona and project_id:\n        has_context_files = bool(extracted_context_files.uncapped_token_count)\n        files_loaded_in_context = bool(extracted_context_files.file_texts)\n\n        if extracted_context_files.use_as_search_filter:\n            search_usage = SearchToolUsage.ENABLED\n        elif files_loaded_in_context or not has_context_files:\n            search_usage = SearchToolUsage.DISABLED\n\n    return SearchParams(\n        project_id_filter=project_id_filter,\n        persona_id_filter=persona_id_filter,\n        search_usage=search_usage,\n    )\n\n\ndef _resolve_query_processing_hook_result(\n    hook_result: QueryProcessingResponse | HookSkipped | HookSoftFailed,\n    message_text: str,\n) -> str:\n    \"\"\"Apply the Query Processing hook result to the message text.\n\n    Returns the (possibly rewritten) message text, or raises OnyxError with\n    QUERY_REJECTED if the hook signals rejection (query is null or empty).\n    HookSkipped and HookSoftFailed are pass-throughs — the original text is\n    returned unchanged.\n    \"\"\"\n    if isinstance(hook_result, (HookSkipped, HookSoftFailed)):\n        return message_text\n    if not (hook_result.query and hook_result.query.strip()):\n        raise OnyxError(\n            OnyxErrorCode.QUERY_REJECTED,\n            hook_result.rejection_message\n            or \"The hook extension for query processing did not return a valid query. No rejection reason was provided.\",\n        )\n    return hook_result.query.strip()\n\n\ndef build_chat_turn(\n    new_msg_req: SendMessageRequest,\n    user: User,\n    db_session: Session,\n    # None → single-model (persona default LLM); non-empty list → multi-model (one LLM per override)\n    llm_overrides: list[LLMOverride] | None,\n    *,\n    litellm_additional_headers: dict[str, str] | None = None,\n    custom_tool_additional_headers: dict[str, str] | None = None,\n    mcp_headers: dict[str, str] | None = None,\n    bypass_acl: bool = False,\n    # Slack context for federated Slack search\n    slack_context: SlackContext | None = None,\n    # Additional context to include in the chat history, e.g. Slack threads where the\n    # conversation cannot be represented by a chain of User/Assistant messages.\n    # NOTE: not stored in the database, only passed in to the LLM as context\n    additional_context: str | None = None,\n) -> Generator[AnswerStreamPart, None, ChatTurnSetup]:\n    \"\"\"Shared setup generator for both single-model and multi-model chat turns.\n\n    Yields the packet(s) the frontend needs for request tracking, then returns an\n    immutable ``ChatTurnSetup`` containing everything the execution strategy needs.\n\n    Callers use::\n\n        setup = yield from build_chat_turn(new_msg_req, ..., llm_overrides=...)\n\n    to forward yielded packets upstream while receiving the return value locally.\n\n    Args:\n        llm_overrides: ``None`` → single-model (persona default LLM).\n                       Non-empty list → multi-model (one LLM per override).\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    is_multi = bool(llm_overrides)\n\n    user_id = user.id\n    llm_user_identifier = (\n        \"anonymous_user\" if user.is_anonymous else (user.email or str(user_id))\n    )\n\n    # ── Session resolution ───────────────────────────────────────────────────\n    if not new_msg_req.chat_session_id:\n        if not new_msg_req.chat_session_info:\n            raise RuntimeError(\"Must specify a chat session id or chat session info\")\n        chat_session = create_chat_session_from_request(\n            chat_session_request=new_msg_req.chat_session_info,\n            user_id=user_id,\n            db_session=db_session,\n        )\n        yield CreateChatSessionID(chat_session_id=chat_session.id)\n        chat_session = get_chat_session_by_id(\n            chat_session_id=chat_session.id,\n            user_id=user_id,\n            db_session=db_session,\n            eager_load_persona=True,\n        )\n    else:\n        chat_session = get_chat_session_by_id(\n            chat_session_id=new_msg_req.chat_session_id,\n            user_id=user_id,\n            db_session=db_session,\n            eager_load_persona=True,\n        )\n\n    persona = chat_session.persona\n    message_text = new_msg_req.message\n\n    user_identity = LLMUserIdentity(\n        user_id=llm_user_identifier, session_id=str(chat_session.id)\n    )\n\n    # Milestone tracking, most devs using the API don't need to understand this\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=str(user.id) if not user.is_anonymous else tenant_id,\n        event=MilestoneRecordType.MULTIPLE_ASSISTANTS,\n    )\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=str(user.id) if not user.is_anonymous else tenant_id,\n        event=MilestoneRecordType.USER_MESSAGE_SENT,\n        properties={\n            \"origin\": new_msg_req.origin.value,\n            \"has_files\": len(new_msg_req.file_descriptors) > 0,\n            \"has_project\": chat_session.project_id is not None,\n            \"has_persona\": persona is not None and persona.id != DEFAULT_PERSONA_ID,\n            \"deep_research\": new_msg_req.deep_research,\n        },\n    )\n\n    # Check LLM cost limits before using the LLM (only for Onyx-managed keys),\n    # then build the LLM instance(s).\n    llms: list[LLM] = []\n    model_display_names: list[str] = []\n    selected_overrides: list[LLMOverride | None] = (\n        list(llm_overrides or [])\n        if is_multi\n        else [new_msg_req.llm_override or chat_session.llm_override]\n    )\n    for override in selected_overrides:\n        llm = get_llm_for_persona(\n            persona=persona,\n            user=user,\n            llm_override=override,\n            additional_headers=litellm_additional_headers,\n        )\n        check_llm_cost_limit_for_provider(\n            db_session=db_session,\n            tenant_id=tenant_id,\n            llm_provider_api_key=llm.config.api_key,\n        )\n        llms.append(llm)\n        model_display_names.append(_build_model_display_name(override))\n    token_counter = get_llm_token_counter(llms[0])\n\n    # not sure why we do this, but to maintain parity with previous code:\n    if not is_multi:\n        model_display_names = [\"\"]\n\n    # Verify that the user-specified files actually belong to the user\n    verify_user_files(\n        user_files=new_msg_req.file_descriptors,\n        user_id=user_id,\n        db_session=db_session,\n        project_id=chat_session.project_id,\n    )\n\n    # Re-create linear history of messages\n    chat_history = create_chat_history_chain(\n        chat_session_id=chat_session.id, db_session=db_session\n    )\n\n    # Determine the parent message based on the request:\n    # - AUTO_PLACE_AFTER_LATEST_MESSAGE (-1): auto-place after latest message in chain\n    # - None or root ID: regeneration from root (first message)\n    # - positive int: place after that specific parent message\n    root_message = get_or_create_root_message(\n        chat_session_id=chat_session.id, db_session=db_session\n    )\n\n    if new_msg_req.parent_message_id == AUTO_PLACE_AFTER_LATEST_MESSAGE:\n        parent_message = chat_history[-1] if chat_history else root_message\n    elif (\n        new_msg_req.parent_message_id is None\n        or new_msg_req.parent_message_id == root_message.id\n    ):\n        # Regeneration from root — clear history so we start fresh\n        parent_message = root_message\n        chat_history = []\n    else:\n        parent_message = None\n        for i in range(len(chat_history) - 1, -1, -1):\n            if chat_history[i].id == new_msg_req.parent_message_id:\n                parent_message = chat_history[i]\n                # Truncate to only messages up to and including the parent\n                chat_history = chat_history[: i + 1]\n                break\n\n    if parent_message is None:\n        raise ValueError(\n            \"The new message sent is not on the latest mainline of messages\"\n        )\n\n    # ── Query Processing hook + user message ─────────────────────────────────\n    # Skipped on regeneration (parent is USER type): message already exists/was accepted.\n    if parent_message.message_type == MessageType.USER:\n        user_message = parent_message\n    else:\n        # New message — run the Query Processing hook before saving to DB.\n        # Skipped on regeneration: the message already exists and was accepted previously.\n        # Skip for empty/whitespace-only messages — no meaningful query to process,\n        # and SendMessageRequest.message has no min_length guard.\n        if message_text.strip():\n            hook_result = execute_hook(\n                db_session=db_session,\n                hook_point=HookPoint.QUERY_PROCESSING,\n                payload=QueryProcessingPayload(\n                    query=message_text,\n                    # Pass None for anonymous users or authenticated users without an email\n                    # (e.g. some SSO flows). QueryProcessingPayload.user_email is str | None,\n                    # so None is accepted and serialised as null in both cases.\n                    user_email=None if user.is_anonymous else user.email,\n                    chat_session_id=str(chat_session.id),\n                ).model_dump(),\n                response_type=QueryProcessingResponse,\n            )\n            message_text = _resolve_query_processing_hook_result(\n                hook_result, message_text\n            )\n\n        user_message = create_new_chat_message(\n            chat_session_id=chat_session.id,\n            parent_message=parent_message,\n            message=message_text,\n            token_count=token_counter(message_text),\n            message_type=MessageType.USER,\n            files=new_msg_req.file_descriptors,\n            db_session=db_session,\n            commit=True,\n        )\n        chat_history.append(user_message)\n\n    # Collect file IDs for the file reader tool *before* summary truncation so\n    # that files attached to older (summarized-away) messages are still accessible\n    # via the FileReaderTool.\n    available_files = _collect_available_file_ids(\n        chat_history=chat_history,\n        project_id=chat_session.project_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n    # Find applicable summary for the current branch\n    summary_message = find_summary_for_branch(db_session, chat_history)\n    # Collect file metadata from messages that will be dropped by summary truncation.\n    # These become \"pre-summarized\" file metadata so the forgotten-file mechanism can\n    # still tell the LLM about them.\n    summarized_file_metadata: dict[str, FileToolMetadata] = {}\n    if summary_message and summary_message.last_summarized_message_id:\n        cutoff_id = summary_message.last_summarized_message_id\n        for msg in chat_history:\n            if msg.id > cutoff_id or not msg.files:\n                continue\n            for fd in msg.files:\n                file_id = fd.get(\"id\")\n                if not file_id:\n                    continue\n                summarized_file_metadata[file_id] = FileToolMetadata(\n                    file_id=file_id,\n                    filename=fd.get(\"name\") or \"unknown\",\n                    # We don't know the exact size without loading the file,\n                    # but 0 signals \"unknown\" to the LLM.\n                    approx_char_count=0,\n                )\n        # Filter chat_history to only messages after the cutoff\n        chat_history = [m for m in chat_history if m.id > cutoff_id]\n\n    # Compute skip-clarification flag for deep research path (cheap, always available)\n    skip_clarification = is_last_assistant_message_clarification(chat_history)\n\n    user_memory_context = get_memories(user, db_session)\n\n    # This prompt may come from the Agent or Project. Fetched here (before run_llm_loop)\n    # because the inner loop shouldn't need to access the DB-form chat history, but we\n    # need it early for token reservation.\n    custom_agent_prompt = get_custom_agent_prompt(persona, chat_session)\n\n    # When use_memories is disabled, strip memories from the prompt context but keep\n    # user info/preferences. The full context is still passed to the LLM loop for\n    # memory tool persistence.\n    prompt_memory_context = (\n        user_memory_context\n        if user.use_memories\n        else user_memory_context.without_memories()\n    )\n\n    # ── Token reservation ────────────────────────────────────────────────────\n    max_reserved_system_prompt_tokens_str = (persona.system_prompt or \"\") + (\n        custom_agent_prompt or \"\"\n    )\n    reserved_token_count = calculate_reserved_tokens(\n        db_session=db_session,\n        persona_system_prompt=max_reserved_system_prompt_tokens_str,\n        token_counter=token_counter,\n        files=new_msg_req.file_descriptors,\n        user_memory_context=prompt_memory_context,\n    )\n\n    # Determine which user files to use. A custom persona fully supersedes the project —\n    # project files are never loaded or searchable when a custom persona is in play.\n    # Only the default persona inside a project uses the project's files.\n    context_user_files = resolve_context_user_files(\n        persona=persona,\n        project_id=chat_session.project_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n    # Use the smallest context window across models for safety (harmless for N=1).\n    llm_max_context_window = min(llm.config.max_input_tokens for llm in llms)\n\n    extracted_context_files = extract_context_files(\n        user_files=context_user_files,\n        llm_max_context_window=llm_max_context_window,\n        reserved_token_count=reserved_token_count,\n        db_session=db_session,\n    )\n\n    search_params = determine_search_params(\n        persona_id=persona.id,\n        project_id=chat_session.project_id,\n        extracted_context_files=extracted_context_files,\n    )\n\n    # Also grant access to persona-attached user files for FileReaderTool\n    if persona.user_files:\n        existing = set(available_files.user_file_ids)\n        for uf in persona.user_files:\n            if uf.id not in existing:\n                available_files.user_file_ids.append(uf.id)\n\n    all_tools = get_tools(db_session)\n    tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}\n\n    search_tool_id = next(\n        (tool.id for tool in all_tools if tool.in_code_tool_id == SEARCH_TOOL_ID), None\n    )\n\n    forced_tool_id = new_msg_req.forced_tool_id\n    if (\n        search_params.search_usage == SearchToolUsage.DISABLED\n        and forced_tool_id is not None\n        and search_tool_id is not None\n        and forced_tool_id == search_tool_id\n    ):\n        forced_tool_id = None\n\n    # TODO(nmgarza5): Once summarization is done, we don't need to load all files from the beginning.\n    # Load all files needed for this chat chain into memory.\n    files = load_all_chat_files(chat_history, db_session)\n    # Convert loaded files to ChatFile format for tools like PythonTool\n    chat_files_for_tools = _convert_loaded_files_to_chat_files(files)\n\n    # ── Reserve assistant message ID(s) → yield to frontend ──────────────────\n    if is_multi:\n        assert llm_overrides is not None\n        reserved_messages = reserve_multi_model_message_ids(\n            db_session=db_session,\n            chat_session_id=chat_session.id,\n            parent_message_id=user_message.id,\n            model_display_names=model_display_names,\n        )\n        yield MultiModelMessageResponseIDInfo(\n            user_message_id=user_message.id,\n            responses=[\n                ModelResponseSlot(message_id=m.id, model_name=name)\n                for m, name in zip(reserved_messages, model_display_names)\n            ],\n        )\n    else:\n        assistant_response = reserve_message_id(\n            db_session=db_session,\n            chat_session_id=chat_session.id,\n            parent_message=user_message.id,\n            message_type=MessageType.ASSISTANT,\n        )\n        reserved_messages = [assistant_response]\n        yield MessageResponseIDInfo(\n            user_message_id=user_message.id,\n            reserved_assistant_message_id=assistant_response.id,\n        )\n\n    # Convert the chat history into a simple format that is free of any DB objects\n    # and is easy to parse for the agent loop.\n    has_file_reader_tool = any(\n        tool.in_code_tool_id == FILE_READER_TOOL_ID for tool in persona.tools\n    )\n\n    chat_history_result = convert_chat_history(\n        chat_history=chat_history,\n        files=files,\n        context_image_files=extracted_context_files.image_files,\n        additional_context=additional_context or new_msg_req.additional_context,\n        token_counter=token_counter,\n        tool_id_to_name_map=tool_id_to_name_map,\n    )\n    simple_chat_history = chat_history_result.simple_messages\n\n    # Metadata for every text file injected into the history. After context-window\n    # truncation drops older messages, the LLM loop compares surviving file_id tags\n    # against this map to discover \"forgotten\" files and provide their metadata to\n    # FileReaderTool.\n    all_injected_file_metadata: dict[str, FileToolMetadata] = (\n        chat_history_result.all_injected_file_metadata if has_file_reader_tool else {}\n    )\n\n    # Merge in file metadata from messages dropped by summary truncation. These files\n    # are no longer in simple_chat_history so they'd be invisible to the forgotten-file\n    # mechanism — they'll always appear as \"forgotten\" since no surviving message carries\n    # their file_id tag.\n    if summarized_file_metadata:\n        for fid, meta in summarized_file_metadata.items():\n            all_injected_file_metadata.setdefault(fid, meta)\n\n    if all_injected_file_metadata:\n        logger.debug(\n            f\"FileReader: file metadata for LLM: {[(fid, m.filename) for fid, m in all_injected_file_metadata.items()]}\"\n        )\n\n    if summary_message is not None:\n        summary_simple = ChatMessageSimple(\n            message=summary_message.message,\n            token_count=summary_message.token_count,\n            message_type=MessageType.ASSISTANT,\n        )\n        simple_chat_history.insert(0, summary_simple)\n\n    # ── Stop signal and processing status ────────────────────────────────────\n    cache = get_cache_backend()\n    reset_cancel_status(chat_session.id, cache)\n\n    def check_is_connected() -> bool:\n        return check_stop_signal(chat_session.id, cache)\n\n    set_processing_status(\n        chat_session_id=chat_session.id,\n        cache=cache,\n        value=True,\n    )\n\n    # Release any read transaction before the long-running LLM stream.\n    # If commit fails here, reset the processing status before propagating —\n    # otherwise the chat session appears stuck at \"processing\" permanently.\n    try:\n        db_session.commit()\n    except Exception:\n        set_processing_status(chat_session_id=chat_session.id, cache=cache, value=False)\n        raise\n\n    return ChatTurnSetup(\n        new_msg_req=new_msg_req,\n        chat_session=chat_session,\n        persona=persona,\n        user_message=user_message,\n        user_identity=user_identity,\n        llms=llms,\n        model_display_names=model_display_names,\n        simple_chat_history=simple_chat_history,\n        extracted_context_files=extracted_context_files,\n        reserved_messages=reserved_messages,\n        reserved_token_count=reserved_token_count,\n        search_params=search_params,\n        all_injected_file_metadata=all_injected_file_metadata,\n        available_files=available_files,\n        tool_id_to_name_map=tool_id_to_name_map,\n        forced_tool_id=forced_tool_id,\n        files=files,\n        chat_files_for_tools=chat_files_for_tools,\n        custom_agent_prompt=custom_agent_prompt,\n        user_memory_context=user_memory_context,\n        skip_clarification=skip_clarification,\n        check_is_connected=check_is_connected,\n        cache=cache,\n        bypass_acl=bypass_acl,\n        slack_context=slack_context,\n        custom_tool_additional_headers=custom_tool_additional_headers,\n        mcp_headers=mcp_headers,\n    )\n\n\n# Sentinel placed on the merged queue when a model thread finishes.\n_MODEL_DONE = object()\n\n# How often the drain loop polls for user-initiated cancellation (stop button).\n_CANCEL_POLL_INTERVAL_S: Final[float] = 0.05\n\n\ndef _run_models(\n    setup: ChatTurnSetup,\n    user: User,\n    db_session: Session,\n    external_state_container: ChatStateContainer | None = None,\n) -> AnswerStream:\n    \"\"\"Stream packets from one or more LLM loops running in parallel worker threads.\n\n    Each model gets its own worker thread, DB session, and ``Emitter``. Threads write\n    packets to a shared unbounded queue as they are produced; the drain loop yields them\n    in arrival order so the caller receives a single interleaved stream regardless of\n    how many models are running.\n\n    Single-model (N=1) and multi-model (N>1) use the same execution path. Every\n    packet is tagged with ``model_index`` by the model's Emitter — ``0`` for N=1,\n    ``0``/``1``/``2`` for multi-model.\n\n    Args:\n        setup: Fully constructed turn context — LLMs, persona, history, tool config.\n        user: Authenticated user making the request.\n        db_session: Caller's DB session (used for setup reads; each worker opens its own\n            session because SQLAlchemy sessions are not thread-safe).\n        external_state_container: Pre-constructed state container for the first model.\n            Used by evals and the non-streaming API path so the caller can inspect\n            accumulated state (tool calls, answer tokens, citations) after the stream\n            is consumed. When ``None`` a fresh container is created automatically.\n\n    Returns:\n        Generator yielding ``Packet`` objects as they arrive from worker threads —\n        answer tokens, tool output, citations — followed by a terminal ``Packet``\n        containing ``OverallStop`` once all models complete (or one containing\n        ``OverallStop(stop_reason=\"user_cancelled\")`` if the connection drops).\n    \"\"\"\n    n_models = len(setup.llms)\n\n    merged_queue: queue.Queue[tuple[int, Packet | Exception | object]] = queue.Queue()\n\n    state_containers: list[ChatStateContainer] = [\n        (\n            external_state_container\n            if (external_state_container is not None and i == 0)\n            else ChatStateContainer()\n        )\n        for i in range(n_models)\n    ]\n    model_succeeded: list[bool] = [False] * n_models\n    # Set to True when a model raises an exception (distinct from \"still running\").\n    # Used in the stop-button path to avoid calling completion for errored models.\n    model_errored: list[bool] = [False] * n_models\n\n    # Set when the drain loop exits early (HTTP disconnect / GeneratorExit).\n    # Signals emitters to skip future puts so workers exit promptly.\n    drain_done = threading.Event()\n\n    def _run_model(model_idx: int) -> None:\n        \"\"\"Run one LLM loop inside a worker thread, writing packets to ``merged_queue``.\"\"\"\n        model_emitter = Emitter(\n            model_idx=model_idx,\n            merged_queue=merged_queue,\n            drain_done=drain_done,\n        )\n        sc = state_containers[model_idx]\n        model_llm = setup.llms[model_idx]\n\n        try:\n            # Each worker opens its own session — SQLAlchemy sessions are not thread-safe.\n            # Do NOT write to the outer db_session (or any shared DB state) from here;\n            # all DB writes in this thread must go through thread_db_session.\n            with get_session_with_current_tenant() as thread_db_session:\n                thread_tool_dict = construct_tools(\n                    persona=setup.persona,\n                    db_session=thread_db_session,\n                    emitter=model_emitter,\n                    user=user,\n                    llm=model_llm,\n                    search_tool_config=SearchToolConfig(\n                        user_selected_filters=setup.new_msg_req.internal_search_filters,\n                        project_id_filter=setup.search_params.project_id_filter,\n                        persona_id_filter=setup.search_params.persona_id_filter,\n                        bypass_acl=setup.bypass_acl,\n                        slack_context=setup.slack_context,\n                        enable_slack_search=_should_enable_slack_search(\n                            setup.persona, setup.new_msg_req.internal_search_filters\n                        ),\n                    ),\n                    custom_tool_config=CustomToolConfig(\n                        chat_session_id=setup.chat_session.id,\n                        message_id=setup.user_message.id,\n                        additional_headers=setup.custom_tool_additional_headers,\n                        mcp_headers=setup.mcp_headers,\n                    ),\n                    file_reader_tool_config=FileReaderToolConfig(\n                        user_file_ids=setup.available_files.user_file_ids,\n                        chat_file_ids=setup.available_files.chat_file_ids,\n                    ),\n                    allowed_tool_ids=setup.new_msg_req.allowed_tool_ids,\n                    search_usage_forcing_setting=setup.search_params.search_usage,\n                )\n                model_tools = [\n                    tool\n                    for tool_list in thread_tool_dict.values()\n                    for tool in tool_list\n                ]\n\n                if setup.forced_tool_id and setup.forced_tool_id not in {\n                    tool.id for tool in model_tools\n                }:\n                    raise ValueError(\n                        f\"Forced tool {setup.forced_tool_id} not found in tools\"\n                    )\n\n                # Per-thread copy: run_llm_loop mutates simple_chat_history in-place.\n                if n_models == 1 and setup.new_msg_req.deep_research:\n                    if setup.chat_session.project_id:\n                        raise RuntimeError(\n                            \"Deep research is not supported for projects\"\n                        )\n                    run_deep_research_llm_loop(\n                        emitter=model_emitter,\n                        state_container=sc,\n                        simple_chat_history=list(setup.simple_chat_history),\n                        tools=model_tools,\n                        custom_agent_prompt=setup.custom_agent_prompt,\n                        llm=model_llm,\n                        token_counter=get_llm_token_counter(model_llm),\n                        db_session=thread_db_session,\n                        skip_clarification=setup.skip_clarification,\n                        user_identity=setup.user_identity,\n                        chat_session_id=str(setup.chat_session.id),\n                        all_injected_file_metadata=setup.all_injected_file_metadata,\n                    )\n                else:\n                    run_llm_loop(\n                        emitter=model_emitter,\n                        state_container=sc,\n                        simple_chat_history=list(setup.simple_chat_history),\n                        tools=model_tools,\n                        custom_agent_prompt=setup.custom_agent_prompt,\n                        context_files=setup.extracted_context_files,\n                        persona=setup.persona,\n                        user_memory_context=setup.user_memory_context,\n                        llm=model_llm,\n                        token_counter=get_llm_token_counter(model_llm),\n                        db_session=thread_db_session,\n                        forced_tool_id=setup.forced_tool_id,\n                        user_identity=setup.user_identity,\n                        chat_session_id=str(setup.chat_session.id),\n                        chat_files=setup.chat_files_for_tools,\n                        include_citations=setup.new_msg_req.include_citations,\n                        all_injected_file_metadata=setup.all_injected_file_metadata,\n                        inject_memories_in_prompt=user.use_memories,\n                    )\n\n            model_succeeded[model_idx] = True\n\n        except Exception as e:\n            model_errored[model_idx] = True\n            merged_queue.put((model_idx, e))\n\n        finally:\n            merged_queue.put((model_idx, _MODEL_DONE))\n\n    def _delete_orphaned_message(model_idx: int, context: str) -> None:\n        \"\"\"Delete a reserved ChatMessage that was never populated due to a model error.\"\"\"\n        try:\n            orphaned = db_session.get(\n                ChatMessage, setup.reserved_messages[model_idx].id\n            )\n            if orphaned is not None:\n                db_session.delete(orphaned)\n                db_session.commit()\n        except Exception:\n            logger.exception(\n                \"%s orphan cleanup failed for model %d (%s)\",\n                context,\n                model_idx,\n                setup.model_display_names[model_idx],\n            )\n\n    # Copy contextvars before submitting futures — ThreadPoolExecutor does NOT\n    # auto-propagate contextvars in Python 3.11; threads would inherit a blank context.\n    worker_context = contextvars.copy_context()\n    executor = ThreadPoolExecutor(\n        max_workers=n_models, thread_name_prefix=\"multi-model\"\n    )\n    completion_persisted: bool = False\n    try:\n        for i in range(n_models):\n            executor.submit(worker_context.run, _run_model, i)\n\n        # ── Main thread: merge and yield packets ────────────────────────────\n        models_remaining = n_models\n        while models_remaining > 0:\n            try:\n                model_idx, item = merged_queue.get(timeout=_CANCEL_POLL_INTERVAL_S)\n            except queue.Empty:\n                # Check for user-initiated cancellation every 50 ms.\n                if not setup.check_is_connected():\n                    # Save state for every model before exiting.\n                    # - Succeeded models: full answer (is_connected=True).\n                    # - Still-in-flight models: partial answer + \"stopped by user\".\n                    # - Errored models: delete the orphaned reserved message; do NOT\n                    #   save \"stopped by user\" for a model that actually threw an exception.\n                    for i in range(n_models):\n                        if model_errored[i]:\n                            _delete_orphaned_message(i, \"stop-button\")\n                            continue\n                        try:\n                            succeeded = model_succeeded[i]\n                            llm_loop_completion_handle(\n                                state_container=state_containers[i],\n                                is_connected=lambda: succeeded,\n                                db_session=db_session,\n                                assistant_message=setup.reserved_messages[i],\n                                llm=setup.llms[i],\n                                reserved_tokens=setup.reserved_token_count,\n                            )\n                        except Exception:\n                            logger.exception(\n                                \"stop-button completion failed for model %d (%s)\",\n                                i,\n                                setup.model_display_names[i],\n                            )\n                    yield Packet(\n                        placement=Placement(turn_index=0),\n                        obj=OverallStop(type=\"stop\", stop_reason=\"user_cancelled\"),\n                    )\n                    completion_persisted = True\n                    return\n                continue\n            else:\n                if item is _MODEL_DONE:\n                    models_remaining -= 1\n                elif isinstance(item, Exception):\n                    # Yield a tagged error for this model but keep the other models running.\n                    # Do NOT decrement models_remaining — _run_model's finally always posts\n                    # _MODEL_DONE, which is the sole completion signal.\n                    error_msg = str(item)\n                    stack_trace = \"\".join(\n                        traceback.format_exception(type(item), item, item.__traceback__)\n                    )\n                    model_llm = setup.llms[model_idx]\n                    if model_llm.config.api_key and len(model_llm.config.api_key) > 2:\n                        error_msg = error_msg.replace(\n                            model_llm.config.api_key, \"[REDACTED_API_KEY]\"\n                        )\n                        stack_trace = stack_trace.replace(\n                            model_llm.config.api_key, \"[REDACTED_API_KEY]\"\n                        )\n                    yield StreamingError(\n                        error=error_msg,\n                        stack_trace=stack_trace,\n                        error_code=\"MODEL_ERROR\",\n                        is_retryable=True,\n                        details={\n                            \"model\": model_llm.config.model_name,\n                            \"provider\": model_llm.config.model_provider,\n                            \"model_index\": model_idx,\n                        },\n                    )\n                elif isinstance(item, Packet):\n                    # model_index already embedded by the model's Emitter in _run_model\n                    yield item\n\n        # ── Completion: save each successful model's response ───────────────\n        # All model loops have completed (run_llm_loop returned) — no more writes\n        # to state_containers. Worker threads may still be closing their own DB\n        # sessions, but the main-thread db_session is unshared and safe to use.\n        for i in range(n_models):\n            if not model_succeeded[i]:\n                # Model errored — delete its orphaned reserved message.\n                _delete_orphaned_message(i, \"normal\")\n                continue\n            try:\n                llm_loop_completion_handle(\n                    state_container=state_containers[i],\n                    is_connected=setup.check_is_connected,\n                    db_session=db_session,\n                    assistant_message=setup.reserved_messages[i],\n                    llm=setup.llms[i],\n                    reserved_tokens=setup.reserved_token_count,\n                )\n            except Exception:\n                logger.exception(\n                    \"normal completion failed for model %d (%s)\",\n                    i,\n                    setup.model_display_names[i],\n                )\n        completion_persisted = True\n\n    finally:\n        if completion_persisted:\n            # Normal exit or stop-button exit: completion already persisted.\n            # Threads are done (normal path) or can finish in the background (stop-button).\n            executor.shutdown(wait=False)\n        else:\n            # Early exit (GeneratorExit from raw HTTP disconnect, or unhandled\n            # exception in the drain loop).\n            # 1. Signal emitters to stop — future emit() calls return immediately,\n            #    so workers exit their LLM loops promptly.\n            drain_done.set()\n            # 2. Wait for all workers to finish. Once drain_done is set the Emitter\n            #    short-circuits, so workers should exit quickly.\n            executor.shutdown(wait=True)\n            # 3. All workers are done — complete from the main thread only.\n            for i in range(n_models):\n                if model_succeeded[i]:\n                    try:\n                        llm_loop_completion_handle(\n                            state_container=state_containers[i],\n                            # Model already finished — persist full response.\n                            is_connected=lambda: True,\n                            db_session=db_session,\n                            assistant_message=setup.reserved_messages[i],\n                            llm=setup.llms[i],\n                            reserved_tokens=setup.reserved_token_count,\n                        )\n                    except Exception:\n                        logger.exception(\n                            \"disconnect completion failed for model %d (%s)\",\n                            i,\n                            setup.model_display_names[i],\n                        )\n                elif model_errored[i]:\n                    _delete_orphaned_message(i, \"disconnect\")\n            # 4. Drain buffered packets from memory — no consumer is running.\n            while not merged_queue.empty():\n                try:\n                    merged_queue.get_nowait()\n                except queue.Empty:\n                    break\n\n\ndef _stream_chat_turn(\n    new_msg_req: SendMessageRequest,\n    user: User,\n    db_session: Session,\n    llm_overrides: list[LLMOverride] | None = None,\n    litellm_additional_headers: dict[str, str] | None = None,\n    custom_tool_additional_headers: dict[str, str] | None = None,\n    mcp_headers: dict[str, str] | None = None,\n    bypass_acl: bool = False,\n    additional_context: str | None = None,\n    slack_context: SlackContext | None = None,\n    external_state_container: ChatStateContainer | None = None,\n) -> AnswerStream:\n    \"\"\"Private implementation for single-model and multi-model chat turn streaming.\n\n    Builds the turn context via ``build_chat_turn``, then streams packets from\n    ``_run_models`` back to the caller. Handles setup errors, LLM errors, and\n    cancellation uniformly, saving whatever partial state has been accumulated\n    before re-raising or yielding a terminal error packet.\n\n    Not called directly — use the public wrappers:\n    - ``handle_stream_message_objects`` for single-model (N=1) requests.\n    - ``handle_multi_model_stream`` for side-by-side multi-model comparison (N>1).\n\n    Args:\n        new_msg_req: The incoming chat request from the user.\n        user: Authenticated user; may be anonymous for public personas.\n        db_session: Database session for this request.\n        llm_overrides: ``None`` → single-model (persona default LLM).\n            Non-empty list → multi-model (one LLM per override, 2–3 items).\n        litellm_additional_headers: Extra headers forwarded to the LLM provider.\n        custom_tool_additional_headers: Extra headers for custom tool HTTP calls.\n        mcp_headers: Extra headers for MCP tool calls.\n        bypass_acl: If ``True``, document ACL checks are skipped (used by Slack bot).\n        additional_context: Extra context prepended to the LLM's chat history, not\n            stored in the DB (used for Slack thread hydration).\n        slack_context: Federated Slack search context passed through to the search tool.\n        external_state_container: Optional pre-constructed state container. When\n            provided, accumulated state (tool calls, citations, answer tokens) is\n            written into it so the caller can inspect the result after streaming.\n\n    Returns:\n        Generator yielding ``Packet`` objects — answer tokens, tool output, citations —\n        followed by a terminal ``Packet`` containing ``OverallStop``.\n    \"\"\"\n    if new_msg_req.mock_llm_response is not None and not INTEGRATION_TESTS_MODE:\n        raise ValueError(\n            \"mock_llm_response can only be used when INTEGRATION_TESTS_MODE=true\"\n        )\n\n    mock_response_token: Token[str | None] | None = None\n    setup: ChatTurnSetup | None = None\n\n    try:\n        setup = yield from build_chat_turn(\n            new_msg_req=new_msg_req,\n            user=user,\n            db_session=db_session,\n            llm_overrides=llm_overrides,\n            litellm_additional_headers=litellm_additional_headers,\n            custom_tool_additional_headers=custom_tool_additional_headers,\n            mcp_headers=mcp_headers,\n            bypass_acl=bypass_acl,\n            slack_context=slack_context,\n            additional_context=additional_context,\n        )\n\n        # Set mock response token right before the LLM stream begins so that\n        # run_in_background threads inherit the correct context.\n        if new_msg_req.mock_llm_response is not None:\n            mock_response_token = set_llm_mock_response(new_msg_req.mock_llm_response)\n\n        yield from _run_models(\n            setup=setup,\n            user=user,\n            db_session=db_session,\n            external_state_container=external_state_container,\n        )\n\n    except OnyxError as e:\n        if e.error_code is not OnyxErrorCode.QUERY_REJECTED:\n            log_onyx_error(e)\n        yield StreamingError(\n            error=e.detail,\n            error_code=e.error_code.code,\n            is_retryable=e.status_code >= 500,\n        )\n        db_session.rollback()\n        return\n\n    except ValueError as e:\n        logger.exception(\"Failed to process chat message.\")\n        yield StreamingError(\n            error=str(e),\n            error_code=\"VALIDATION_ERROR\",\n            is_retryable=True,\n        )\n        db_session.rollback()\n        return\n\n    except EmptyLLMResponseError as e:\n        stack_trace = traceback.format_exc()\n        logger.warning(\n            f\"LLM returned an empty response (provider={e.provider}, model={e.model}, tool_choice={e.tool_choice})\"\n        )\n        yield StreamingError(\n            error=e.client_error_msg,\n            stack_trace=stack_trace,\n            error_code=e.error_code,\n            is_retryable=e.is_retryable,\n            details={\n                \"model\": e.model,\n                \"provider\": e.provider,\n                \"tool_choice\": e.tool_choice.value,\n            },\n        )\n        db_session.rollback()\n\n    except Exception as e:\n        logger.exception(f\"Failed to process chat message due to {e}\")\n        stack_trace = traceback.format_exc()\n\n        llm = setup.llms[0] if setup else None\n        if llm:\n            client_error_msg, error_code, is_retryable = litellm_exception_to_error_msg(\n                e, llm\n            )\n            if llm.config.api_key and len(llm.config.api_key) > 2:\n                client_error_msg = client_error_msg.replace(\n                    llm.config.api_key, \"[REDACTED_API_KEY]\"\n                )\n                stack_trace = stack_trace.replace(\n                    llm.config.api_key, \"[REDACTED_API_KEY]\"\n                )\n            yield StreamingError(\n                error=client_error_msg,\n                stack_trace=stack_trace,\n                error_code=error_code,\n                is_retryable=is_retryable,\n                details={\n                    \"model\": llm.config.model_name,\n                    \"provider\": llm.config.model_provider,\n                },\n            )\n        else:\n            yield StreamingError(\n                error=\"Failed to initialize the chat. Please check your configuration and try again.\",\n                stack_trace=stack_trace,\n                error_code=\"INIT_FAILED\",\n                is_retryable=True,\n            )\n        db_session.rollback()\n\n    finally:\n        if mock_response_token is not None:\n            reset_llm_mock_response(mock_response_token)\n        try:\n            if setup is not None:\n                set_processing_status(\n                    chat_session_id=setup.chat_session.id,\n                    cache=setup.cache,\n                    value=False,\n                )\n        except Exception:\n            logger.exception(\"Error in setting processing status\")\n\n\ndef handle_stream_message_objects(\n    new_msg_req: SendMessageRequest,\n    user: User,\n    db_session: Session,\n    litellm_additional_headers: dict[str, str] | None = None,\n    custom_tool_additional_headers: dict[str, str] | None = None,\n    mcp_headers: dict[str, str] | None = None,\n    bypass_acl: bool = False,\n    additional_context: str | None = None,\n    slack_context: SlackContext | None = None,\n    external_state_container: ChatStateContainer | None = None,\n) -> AnswerStream:\n    \"\"\"Single-model streaming entrypoint. For multi-model comparison, use ``handle_multi_model_stream``.\"\"\"\n    yield from _stream_chat_turn(\n        new_msg_req=new_msg_req,\n        user=user,\n        db_session=db_session,\n        llm_overrides=None,\n        litellm_additional_headers=litellm_additional_headers,\n        custom_tool_additional_headers=custom_tool_additional_headers,\n        mcp_headers=mcp_headers,\n        bypass_acl=bypass_acl,\n        additional_context=additional_context,\n        slack_context=slack_context,\n        external_state_container=external_state_container,\n    )\n\n\ndef _build_model_display_name(override: LLMOverride | None) -> str:\n    \"\"\"Build a human-readable display name from an LLM override.\"\"\"\n    if override is None:\n        return \"unknown\"\n    return override.display_name or override.model_version or \"unknown\"\n\n\ndef handle_multi_model_stream(\n    new_msg_req: SendMessageRequest,\n    user: User,\n    db_session: Session,\n    llm_overrides: list[LLMOverride],\n    litellm_additional_headers: dict[str, str] | None = None,\n    custom_tool_additional_headers: dict[str, str] | None = None,\n    mcp_headers: dict[str, str] | None = None,\n) -> AnswerStream:\n    \"\"\"Thin wrapper for side-by-side multi-model comparison (2–3 models).\n\n    Validates the override list and delegates to ``_stream_chat_turn``,\n    which handles both single-model and multi-model execution via the same path.\n\n    Args:\n        new_msg_req: The incoming chat request. ``deep_research`` must be ``False``.\n        user: Authenticated user making the request.\n        db_session: Database session for this request.\n        llm_overrides: Exactly 2 or 3 ``LLMOverride`` objects — one per model to run.\n        litellm_additional_headers: Extra headers forwarded to each LLM provider.\n        custom_tool_additional_headers: Extra headers for custom tool HTTP calls.\n        mcp_headers: Extra headers for MCP tool calls.\n\n    Returns:\n        Generator yielding interleaved ``Packet`` objects from all models, each tagged\n        with ``model_index`` in its placement.\n    \"\"\"\n    n_models = len(llm_overrides)\n    if n_models < 2 or n_models > 3:\n        yield StreamingError(\n            error=f\"Multi-model requires 2-3 overrides, got {n_models}\",\n            error_code=\"VALIDATION_ERROR\",\n            is_retryable=False,\n        )\n        return\n    if new_msg_req.deep_research:\n        yield StreamingError(\n            error=\"Multi-model is not supported with deep research\",\n            error_code=\"VALIDATION_ERROR\",\n            is_retryable=False,\n        )\n        return\n    yield from _stream_chat_turn(\n        new_msg_req=new_msg_req,\n        user=user,\n        db_session=db_session,\n        llm_overrides=llm_overrides,\n        litellm_additional_headers=litellm_additional_headers,\n        custom_tool_additional_headers=custom_tool_additional_headers,\n        mcp_headers=mcp_headers,\n    )\n\n\ndef llm_loop_completion_handle(\n    state_container: ChatStateContainer,\n    is_connected: Callable[[], bool],\n    db_session: Session,\n    assistant_message: ChatMessage,\n    llm: LLM,\n    reserved_tokens: int,\n) -> None:\n    chat_session_id = assistant_message.chat_session_id\n\n    # Snapshot all state under the container's lock before any DB write.\n    # Worker threads may still be running (e.g. user-cancellation path), so\n    # direct attribute access is not thread-safe — use the provided getters.\n    answer_tokens = state_container.get_answer_tokens()\n    reasoning_tokens = state_container.get_reasoning_tokens()\n    citation_to_doc = state_container.get_citation_to_doc()\n    tool_calls = state_container.get_tool_calls()\n    is_clarification = state_container.get_is_clarification()\n    all_search_docs = state_container.get_all_search_docs()\n    emitted_citations = state_container.get_emitted_citations()\n    pre_answer_processing_time = state_container.get_pre_answer_processing_time()\n\n    completed_normally = is_connected()\n    if completed_normally:\n        if answer_tokens is None:\n            raise RuntimeError(\n                \"LLM run completed normally but did not return an answer.\"\n            )\n        final_answer = answer_tokens\n    else:\n        # Stopped by user - append stop message\n        logger.debug(f\"Chat session {chat_session_id} stopped by user\")\n        if answer_tokens:\n            final_answer = (\n                answer_tokens + \" ... \\n\\nGeneration was stopped by the user.\"\n            )\n        else:\n            final_answer = \"The generation was stopped by the user.\"\n\n    save_chat_turn(\n        message_text=final_answer,\n        reasoning_tokens=reasoning_tokens,\n        citation_to_doc=citation_to_doc,\n        tool_calls=tool_calls,\n        all_search_docs=all_search_docs,\n        db_session=db_session,\n        assistant_message=assistant_message,\n        is_clarification=is_clarification,\n        emitted_citations=emitted_citations,\n        pre_answer_processing_time=pre_answer_processing_time,\n    )\n\n    # Check if compression is needed after saving the message\n    updated_chat_history = create_chat_history_chain(\n        chat_session_id=chat_session_id,\n        db_session=db_session,\n    )\n    total_tokens = calculate_total_history_tokens(updated_chat_history)\n\n    compression_params = get_compression_params(\n        max_input_tokens=llm.config.max_input_tokens,\n        current_history_tokens=total_tokens,\n        reserved_tokens=reserved_tokens,\n    )\n    if compression_params.should_compress:\n        # Build tool mapping for formatting messages\n        all_tools = get_tools(db_session)\n        tool_id_to_name = {tool.id: tool.name for tool in all_tools}\n\n        compress_chat_history(\n            db_session=db_session,\n            chat_history=updated_chat_history,\n            llm=llm,\n            compression_params=compression_params,\n            tool_id_to_name=tool_id_to_name,\n        )\n\n\n_CITATION_LINK_START_PATTERN = re.compile(r\"\\s*\\[\\[\\d+\\]\\]\\(\")\n\n\ndef _find_markdown_link_end(text: str, destination_start: int) -> int | None:\n    depth = 0\n    i = destination_start\n\n    while i < len(text):\n        curr = text[i]\n        if curr == \"\\\\\":\n            i += 2\n            continue\n\n        if curr == \"(\":\n            depth += 1\n        elif curr == \")\":\n            if depth == 0:\n                return i\n            depth -= 1\n\n        i += 1\n\n    return None\n\n\ndef remove_answer_citations(answer: str) -> str:\n    stripped_parts: list[str] = []\n    cursor = 0\n\n    while match := _CITATION_LINK_START_PATTERN.search(answer, cursor):\n        stripped_parts.append(answer[cursor : match.start()])\n        link_end = _find_markdown_link_end(answer, match.end())\n        if link_end is None:\n            stripped_parts.append(answer[match.start() :])\n            return \"\".join(stripped_parts)\n\n        cursor = link_end + 1\n\n    stripped_parts.append(answer[cursor:])\n    return \"\".join(stripped_parts)\n\n\n@log_function_time()\ndef gather_stream(\n    packets: AnswerStream,\n) -> ChatBasicResponse:\n    answer: str | None = None\n    citations: list[CitationInfo] = []\n    error_msg: str | None = None\n    message_id: int | None = None\n    top_documents: list[SearchDoc] = []\n\n    for packet in packets:\n        if isinstance(packet, Packet):\n            # Handle the different packet object types\n            if isinstance(packet.obj, AgentResponseStart):\n                # AgentResponseStart contains the final documents\n                if packet.obj.final_documents:\n                    top_documents = packet.obj.final_documents\n            elif isinstance(packet.obj, AgentResponseDelta):\n                # AgentResponseDelta contains incremental content updates\n                if answer is None:\n                    answer = \"\"\n                if packet.obj.content:\n                    answer += packet.obj.content\n            elif isinstance(packet.obj, CitationInfo):\n                # CitationInfo contains citation information\n                citations.append(packet.obj)\n        elif isinstance(packet, StreamingError):\n            error_msg = packet.error\n        elif isinstance(packet, MessageResponseIDInfo):\n            message_id = packet.reserved_assistant_message_id\n\n    if message_id is None:\n        raise ValueError(\"Message ID is required\")\n\n    if answer is None:\n        if error_msg is not None:\n            answer = \"\"\n        else:\n            # This should never be the case as these non-streamed flows do not have a stop-generation signal\n            raise RuntimeError(\"Answer was not generated\")\n\n    return ChatBasicResponse(\n        answer=answer,\n        answer_citationless=remove_answer_citations(answer),\n        citation_info=citations,\n        message_id=message_id,\n        error_msg=error_msg,\n        top_documents=top_documents,\n    )\n\n\n@log_function_time()\ndef gather_stream_full(\n    packets: AnswerStream,\n    state_container: ChatStateContainer,\n) -> ChatFullResponse:\n    \"\"\"\n    Aggregate streaming packets and state container into a complete ChatFullResponse.\n\n    This function consumes all packets from the stream and combines them with\n    the accumulated state from the ChatStateContainer to build a complete response\n    including answer, reasoning, citations, and tool calls.\n\n    Args:\n        packets: The stream of packets from handle_stream_message_objects\n        state_container: The state container that accumulates tool calls, reasoning, etc.\n\n    Returns:\n        ChatFullResponse with all available data\n    \"\"\"\n    answer: str | None = None\n    citations: list[CitationInfo] = []\n    error_msg: str | None = None\n    message_id: int | None = None\n    top_documents: list[SearchDoc] = []\n    chat_session_id: UUID | None = None\n\n    for packet in packets:\n        if isinstance(packet, Packet):\n            if isinstance(packet.obj, AgentResponseStart):\n                if packet.obj.final_documents:\n                    top_documents = packet.obj.final_documents\n            elif isinstance(packet.obj, AgentResponseDelta):\n                if answer is None:\n                    answer = \"\"\n                if packet.obj.content:\n                    answer += packet.obj.content\n            elif isinstance(packet.obj, CitationInfo):\n                citations.append(packet.obj)\n        elif isinstance(packet, StreamingError):\n            error_msg = packet.error\n        elif isinstance(packet, MessageResponseIDInfo):\n            message_id = packet.reserved_assistant_message_id\n        elif isinstance(packet, CreateChatSessionID):\n            chat_session_id = packet.chat_session_id\n\n    if message_id is None:\n        raise ValueError(\"Message ID is required\")\n\n    # Use state_container for complete answer (handles edge cases gracefully)\n    final_answer = state_container.get_answer_tokens() or answer or \"\"\n\n    # Get reasoning from state container (None when model doesn't produce reasoning)\n    reasoning = state_container.get_reasoning_tokens()\n\n    # Convert ToolCallInfo list to ToolCallResponse list\n    tool_call_responses = [\n        ToolCallResponse(\n            tool_name=tc.tool_name,\n            tool_arguments=tc.tool_call_arguments,\n            tool_result=tc.tool_call_response,\n            search_docs=tc.search_docs,\n            generated_images=tc.generated_images,\n            pre_reasoning=tc.reasoning_tokens,\n        )\n        for tc in state_container.get_tool_calls()\n    ]\n\n    return ChatFullResponse(\n        answer=final_answer,\n        answer_citationless=remove_answer_citations(final_answer),\n        pre_answer_reasoning=reasoning,\n        tool_calls=tool_call_responses,\n        top_documents=top_documents,\n        citation_info=citations,\n        message_id=message_id,\n        chat_session_id=chat_session_id,\n        error_msg=error_msg,\n    )\n"
  },
  {
    "path": "backend/onyx/chat/prompt_utils.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Sequence\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.memory import UserMemoryContext\nfrom onyx.db.persona import get_default_behavior_persona\nfrom onyx.db.user_file import calculate_user_files_token_count\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.prompts.chat_prompts import CITATION_REMINDER\nfrom onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT\nfrom onyx.prompts.chat_prompts import FILE_REMINDER\nfrom onyx.prompts.chat_prompts import LAST_CYCLE_CITATION_REMINDER\nfrom onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE\nfrom onyx.prompts.prompt_utils import get_company_context\nfrom onyx.prompts.prompt_utils import handle_onyx_date_awareness\nfrom onyx.prompts.prompt_utils import replace_citation_guidance_tag\nfrom onyx.prompts.prompt_utils import replace_reminder_tag\nfrom onyx.prompts.tool_prompts import GENERATE_IMAGE_GUIDANCE\nfrom onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE\nfrom onyx.prompts.tool_prompts import MEMORY_GUIDANCE\nfrom onyx.prompts.tool_prompts import OPEN_URLS_GUIDANCE\nfrom onyx.prompts.tool_prompts import PYTHON_TOOL_GUIDANCE\nfrom onyx.prompts.tool_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE\nfrom onyx.prompts.tool_prompts import TOOL_SECTION_HEADER\nfrom onyx.prompts.tool_prompts import WEB_SEARCH_GUIDANCE\nfrom onyx.prompts.tool_prompts import WEB_SEARCH_SITE_DISABLED_GUIDANCE\nfrom onyx.prompts.user_info import BASIC_INFORMATION_PROMPT\nfrom onyx.prompts.user_info import TEAM_INFORMATION_PROMPT\nfrom onyx.prompts.user_info import USER_INFORMATION_HEADER\nfrom onyx.prompts.user_info import USER_MEMORIES_PROMPT\nfrom onyx.prompts.user_info import USER_PREFERENCES_PROMPT\nfrom onyx.prompts.user_info import USER_ROLE_PROMPT\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.utils.timing import log_function_time\n\n\ndef get_default_base_system_prompt(db_session: Session) -> str:\n    default_persona = get_default_behavior_persona(db_session)\n    return (\n        default_persona.system_prompt\n        if default_persona and default_persona.system_prompt is not None\n        else DEFAULT_SYSTEM_PROMPT\n    )\n\n\n@log_function_time(print_only=True)\ndef calculate_reserved_tokens(\n    db_session: Session,\n    persona_system_prompt: str,\n    token_counter: Callable[[str], int],\n    files: list[FileDescriptor] | None = None,\n    user_memory_context: UserMemoryContext | None = None,\n) -> int:\n    \"\"\"\n    Calculate reserved token count for system prompt and user files.\n\n    This is used for token estimation purposes to reserve space for:\n    - The system prompt (base + custom agent prompt + all guidance)\n    - User files attached to the message\n\n    Args:\n        db_session: Database session\n        persona_system_prompt: Custom agent system prompt (can be empty string)\n        token_counter: Function that counts tokens in text\n        files: List of file descriptors from the chat message (optional)\n        user_memory_context: User memory context (optional)\n\n    Returns:\n        Total reserved token count\n    \"\"\"\n    base_system_prompt = get_default_base_system_prompt(db_session)\n\n    # This is for token estimation purposes\n    fake_system_prompt = build_system_prompt(\n        base_system_prompt=base_system_prompt,\n        datetime_aware=True,\n        user_memory_context=user_memory_context,\n        tools=None,\n        should_cite_documents=True,\n        include_all_guidance=True,\n    )\n\n    custom_agent_prompt = persona_system_prompt if persona_system_prompt else \"\"\n\n    reserved_token_count = token_counter(\n        # Annoying that the dict has no attributes now\n        custom_agent_prompt\n        + \" \"\n        + fake_system_prompt\n    )\n\n    # Calculate total token count for files in the last message\n    file_token_count = 0\n    if files:\n        # Extract user_file_id from each file descriptor\n        user_file_ids: list[UUID] = []\n        for file in files:\n            uid = file.get(\"user_file_id\")\n            if not uid:\n                continue\n            try:\n                user_file_ids.append(UUID(uid))\n            except (TypeError, ValueError, AttributeError):\n                # Skip invalid user_file_id values\n                continue\n        if user_file_ids:\n            file_token_count = calculate_user_files_token_count(\n                user_file_ids, db_session\n            )\n\n    reserved_token_count += file_token_count\n\n    return reserved_token_count\n\n\ndef build_reminder_message(\n    reminder_text: str | None,\n    include_citation_reminder: bool,\n    include_file_reminder: bool,\n    is_last_cycle: bool,\n) -> str | None:\n    reminder = reminder_text.strip() if reminder_text else \"\"\n    if is_last_cycle:\n        reminder += \"\\n\\n\" + LAST_CYCLE_CITATION_REMINDER\n    if include_citation_reminder:\n        reminder += \"\\n\\n\" + CITATION_REMINDER\n    if include_file_reminder:\n        reminder += \"\\n\\n\" + FILE_REMINDER\n    reminder = reminder.strip()\n    return reminder if reminder else None\n\n\ndef _build_user_information_section(\n    user_memory_context: UserMemoryContext | None,\n    company_context: str | None,\n) -> str:\n    \"\"\"Build the complete '# User Information' section with all sub-sections\n    in the correct order: Basic Info → Team Info → Preferences → Memories.\"\"\"\n    sections: list[str] = []\n\n    if user_memory_context:\n        ctx = user_memory_context\n        has_basic_info = ctx.user_info.name or ctx.user_info.email or ctx.user_info.role\n\n        if has_basic_info:\n            role_line = (\n                USER_ROLE_PROMPT.format(user_role=ctx.user_info.role).strip()\n                if ctx.user_info.role\n                else \"\"\n            )\n            if role_line:\n                role_line = \"\\n\" + role_line\n            sections.append(\n                BASIC_INFORMATION_PROMPT.format(\n                    user_name=ctx.user_info.name or \"\",\n                    user_email=ctx.user_info.email or \"\",\n                    user_role=role_line,\n                )\n            )\n\n    if company_context:\n        sections.append(\n            TEAM_INFORMATION_PROMPT.format(team_information=company_context.strip())\n        )\n\n    if user_memory_context:\n        ctx = user_memory_context\n\n        if ctx.user_preferences:\n            sections.append(\n                USER_PREFERENCES_PROMPT.format(user_preferences=ctx.user_preferences)\n            )\n\n        if ctx.memories:\n            formatted_memories = \"\\n\".join(f\"- {memory}\" for memory in ctx.memories)\n            sections.append(\n                USER_MEMORIES_PROMPT.format(user_memories=formatted_memories)\n            )\n\n    if not sections:\n        return \"\"\n\n    return USER_INFORMATION_HEADER + \"\\n\".join(sections)\n\n\ndef build_system_prompt(\n    base_system_prompt: str,\n    datetime_aware: bool = False,\n    user_memory_context: UserMemoryContext | None = None,\n    tools: Sequence[Tool] | None = None,\n    should_cite_documents: bool = False,\n    include_all_guidance: bool = False,\n) -> str:\n    \"\"\"Should only be called with the default behavior system prompt.\n    If the user has replaced the default behavior prompt with their custom agent prompt, do not call this function.\n    \"\"\"\n    system_prompt = handle_onyx_date_awareness(base_system_prompt, datetime_aware)\n\n    # Replace citation guidance placeholder if present\n    system_prompt, should_append_citation_guidance = replace_citation_guidance_tag(\n        system_prompt,\n        should_cite_documents=should_cite_documents,\n        include_all_guidance=include_all_guidance,\n    )\n\n    # Replace reminder tag placeholder if present\n    system_prompt = replace_reminder_tag(system_prompt)\n\n    company_context = get_company_context()\n    user_info_section = _build_user_information_section(\n        user_memory_context, company_context\n    )\n    system_prompt += user_info_section\n\n    # Append citation guidance after company context if placeholder was not present\n    # This maintains backward compatibility and ensures citations are always enforced when needed\n    if should_append_citation_guidance:\n        system_prompt += REQUIRE_CITATION_GUIDANCE\n\n    if include_all_guidance:\n        tool_sections = [\n            TOOL_DESCRIPTION_SEARCH_GUIDANCE,\n            INTERNAL_SEARCH_GUIDANCE,\n            WEB_SEARCH_GUIDANCE.format(\n                site_colon_disabled=WEB_SEARCH_SITE_DISABLED_GUIDANCE\n            ),\n            OPEN_URLS_GUIDANCE,\n            PYTHON_TOOL_GUIDANCE,\n            GENERATE_IMAGE_GUIDANCE,\n            MEMORY_GUIDANCE,\n        ]\n        system_prompt += TOOL_SECTION_HEADER + \"\\n\".join(tool_sections)\n        return system_prompt\n\n    if tools:\n        has_web_search = any(isinstance(tool, WebSearchTool) for tool in tools)\n        has_internal_search = any(isinstance(tool, SearchTool) for tool in tools)\n        has_open_urls = any(isinstance(tool, OpenURLTool) for tool in tools)\n        has_python = any(isinstance(tool, PythonTool) for tool in tools)\n        has_generate_image = any(\n            isinstance(tool, ImageGenerationTool) for tool in tools\n        )\n        has_memory = any(isinstance(tool, MemoryTool) for tool in tools)\n\n        tool_guidance_sections: list[str] = []\n\n        if has_web_search or has_internal_search or include_all_guidance:\n            tool_guidance_sections.append(TOOL_DESCRIPTION_SEARCH_GUIDANCE)\n\n        # These are not included at the Tool level because the ordering may matter.\n        if has_internal_search or include_all_guidance:\n            tool_guidance_sections.append(INTERNAL_SEARCH_GUIDANCE)\n\n        if has_web_search or include_all_guidance:\n            site_disabled_guidance = \"\"\n            if has_web_search:\n                web_search_tool = next(\n                    (t for t in tools if isinstance(t, WebSearchTool)), None\n                )\n                if web_search_tool and not web_search_tool.supports_site_filter:\n                    site_disabled_guidance = WEB_SEARCH_SITE_DISABLED_GUIDANCE\n            tool_guidance_sections.append(\n                WEB_SEARCH_GUIDANCE.format(site_colon_disabled=site_disabled_guidance)\n            )\n\n        if has_open_urls or include_all_guidance:\n            tool_guidance_sections.append(OPEN_URLS_GUIDANCE)\n\n        if has_python or include_all_guidance:\n            tool_guidance_sections.append(PYTHON_TOOL_GUIDANCE)\n\n        if has_generate_image or include_all_guidance:\n            tool_guidance_sections.append(GENERATE_IMAGE_GUIDANCE)\n\n        if has_memory or include_all_guidance:\n            tool_guidance_sections.append(MEMORY_GUIDANCE)\n\n        if tool_guidance_sections:\n            system_prompt += TOOL_SECTION_HEADER + \"\\n\".join(tool_guidance_sections)\n\n    return system_prompt\n"
  },
  {
    "path": "backend/onyx/chat/save_chat.py",
    "content": "import json\nimport mimetypes\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.chat_state import SearchDocKey\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.chat import add_search_docs_to_chat_message\nfrom onyx.db.chat import add_search_docs_to_tool_call\nfrom onyx.db.chat import create_db_search_doc\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ToolCall\nfrom onyx.db.tools import create_tool_call_no_commit\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.natural_language_processing.utils import BaseTokenizer\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type\nfrom onyx.tools.models import ToolCallInfo\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.postgres_sanitization import sanitize_string\n\nlogger = setup_logger()\n\n\ndef _extract_referenced_file_descriptors(\n    tool_calls: list[ToolCallInfo],\n    message_text: str,\n) -> list[FileDescriptor]:\n    \"\"\"Extract FileDescriptors for code interpreter files referenced in the message text.\"\"\"\n    descriptors: list[FileDescriptor] = []\n    for tool_call_info in tool_calls:\n        if not tool_call_info.generated_files:\n            continue\n        for gen_file in tool_call_info.generated_files:\n            file_id = (\n                gen_file.file_link.rsplit(\"/\", 1)[-1] if gen_file.file_link else \"\"\n            )\n            if file_id and file_id in message_text:\n                mime_type, _ = mimetypes.guess_type(gen_file.filename)\n                descriptors.append(\n                    FileDescriptor(\n                        id=file_id,\n                        type=mime_type_to_chat_file_type(mime_type),\n                        name=gen_file.filename,\n                    )\n                )\n    return descriptors\n\n\ndef _create_and_link_tool_calls(\n    tool_calls: list[ToolCallInfo],\n    assistant_message: ChatMessage,\n    db_session: Session,\n    default_tokenizer: BaseTokenizer,\n    tool_call_to_search_doc_ids: dict[str, list[int]],\n) -> None:\n    \"\"\"\n    Create ToolCall entries and link parent references and SearchDocs.\n\n    This function handles the logic of:\n    1. Creating all ToolCall objects (with temporary parent references)\n    2. Flushing to get DB IDs\n    3. Building mappings and updating parent references\n    4. Linking SearchDocs to ToolCalls\n\n\n    Args:\n        tool_calls: List of tool call information to create\n        assistant_message: The ChatMessage these tool calls belong to\n        db_session: Database session\n        default_tokenizer: Tokenizer for calculating token counts\n        tool_call_to_search_doc_ids: Mapping from tool_call_id to list of search_doc IDs\n    \"\"\"\n    # Create all ToolCall objects first (without parent_tool_call_id set)\n    # We'll update parent references after flushing to get IDs\n    tool_call_objects: list[ToolCall] = []\n    tool_call_info_map: dict[str, ToolCallInfo] = {}\n\n    for tool_call_info in tool_calls:\n        tool_call_info_map[tool_call_info.tool_call_id] = tool_call_info\n\n        # Calculate tool_call_tokens from arguments\n        try:\n            arguments_json_str = json.dumps(tool_call_info.tool_call_arguments)\n            tool_call_tokens = len(default_tokenizer.encode(arguments_json_str))\n        except Exception as e:\n            logger.warning(\n                f\"Failed to tokenize tool call arguments for {tool_call_info.tool_call_id}: {e}. Using length as (over) estimate.\"\n            )\n            arguments_json_str = json.dumps(tool_call_info.tool_call_arguments)\n            tool_call_tokens = len(arguments_json_str)\n\n        parent_message_id = (\n            assistant_message.id if tool_call_info.parent_tool_call_id is None else None\n        )\n\n        # Create ToolCall DB entry (parent_tool_call_id will be set after flush)\n        # This is needed to get the IDs for the parent pointers\n        tool_call = create_tool_call_no_commit(\n            chat_session_id=assistant_message.chat_session_id,\n            parent_chat_message_id=parent_message_id,\n            turn_number=tool_call_info.turn_index,\n            tool_id=tool_call_info.tool_id,\n            tool_call_id=tool_call_info.tool_call_id,\n            tool_call_arguments=tool_call_info.tool_call_arguments,\n            tool_call_response=tool_call_info.tool_call_response,\n            tool_call_tokens=tool_call_tokens,\n            db_session=db_session,\n            parent_tool_call_id=None,  # Will be updated after flush\n            reasoning_tokens=tool_call_info.reasoning_tokens,\n            generated_images=(\n                [img.model_dump() for img in tool_call_info.generated_images]\n                if tool_call_info.generated_images\n                else None\n            ),\n            tab_index=tool_call_info.tab_index,\n            add_only=True,\n        )\n\n        # Flush to get all of the IDs\n        db_session.flush()\n\n        tool_call_objects.append(tool_call)\n\n    # Build mapping of tool calls (tool_call_id string -> DB id int)\n    tool_call_map: dict[str, int] = {}\n    for tool_call_obj in tool_call_objects:\n        tool_call_map[tool_call_obj.tool_call_id] = tool_call_obj.id\n\n    # Update parent_tool_call_id for all tool calls\n    # Filter out orphaned children (whose parents don't exist) - this can happen\n    # when generation is stopped mid-execution and parent tool calls were cancelled\n    valid_tool_calls: list[ToolCall] = []\n    for tool_call_obj in tool_call_objects:\n        tool_call_info = tool_call_info_map[tool_call_obj.tool_call_id]\n        if tool_call_info.parent_tool_call_id is not None:\n            parent_id = tool_call_map.get(tool_call_info.parent_tool_call_id)\n            if parent_id is not None:\n                tool_call_obj.parent_tool_call_id = parent_id\n                valid_tool_calls.append(tool_call_obj)\n            else:\n                # Parent doesn't exist (likely cancelled) - skip this orphaned child\n                logger.warning(\n                    f\"Skipping tool call '{tool_call_obj.tool_call_id}' with missing parent \"\n                    f\"'{tool_call_info.parent_tool_call_id}' (likely cancelled during execution)\"\n                )\n                # Remove from DB session to prevent saving\n                db_session.delete(tool_call_obj)\n        else:\n            # Top-level tool call (no parent)\n            valid_tool_calls.append(tool_call_obj)\n\n    # Link SearchDocs only to valid ToolCalls\n    for tool_call_obj in valid_tool_calls:\n        search_doc_ids = tool_call_to_search_doc_ids.get(tool_call_obj.tool_call_id, [])\n        if search_doc_ids:\n            add_search_docs_to_tool_call(\n                tool_call_id=tool_call_obj.id,\n                search_doc_ids=search_doc_ids,\n                db_session=db_session,\n            )\n\n\ndef save_chat_turn(\n    message_text: str,\n    reasoning_tokens: str | None,\n    tool_calls: list[ToolCallInfo],\n    citation_to_doc: dict[int, SearchDoc],\n    all_search_docs: dict[SearchDocKey, SearchDoc],\n    db_session: Session,\n    assistant_message: ChatMessage,\n    is_clarification: bool = False,\n    emitted_citations: set[int] | None = None,\n    pre_answer_processing_time: float | None = None,\n) -> None:\n    \"\"\"\n    Save a chat turn by populating the assistant_message and creating related entities.\n\n    This function:\n    1. Updates the ChatMessage with text, reasoning tokens, and token count\n    2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs\n    3. Builds tool_call -> search_doc mapping for displayed docs\n    4. Builds citation mapping from citation_to_doc\n    5. Links all unique SearchDocs to the ChatMessage\n    6. Creates ToolCall entries and links SearchDocs to them\n    7. Builds the citations mapping for the ChatMessage\n\n    Args:\n        message_text: The message content to save\n        reasoning_tokens: Optional reasoning tokens for the message\n        tool_calls: List of tool call information to create ToolCall entries (may include search_docs)\n        citation_to_doc: Mapping from citation number to SearchDoc for building citations\n        all_search_docs: Pre-deduplicated search docs from ChatStateContainer\n        db_session: Database session for persistence\n        assistant_message: The ChatMessage object to populate (should already exist in DB)\n        is_clarification: Whether this assistant message is a clarification question (deep research flow)\n        emitted_citations: Set of citation numbers that were actually emitted during streaming.\n            If provided, only citations in this set will be saved; others are filtered out.\n        pre_answer_processing_time: Duration of processing before answer starts (in seconds)\n    \"\"\"\n    # 1. Update ChatMessage with message content, reasoning tokens, and token count\n    sanitized_message_text = (\n        sanitize_string(message_text) if message_text else message_text\n    )\n    assistant_message.message = sanitized_message_text\n    assistant_message.reasoning_tokens = (\n        sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens\n    )\n    assistant_message.is_clarification = is_clarification\n\n    # Use pre-answer processing time (captured when MESSAGE_START was emitted)\n    if pre_answer_processing_time is not None:\n        assistant_message.processing_duration_seconds = pre_answer_processing_time\n\n    # Calculate token count using default tokenizer, when storing, this should not use the LLM\n    # specific one so we use a system default tokenizer here.\n    default_tokenizer = get_tokenizer(None, None)\n    if sanitized_message_text:\n        assistant_message.token_count = len(\n            default_tokenizer.encode(sanitized_message_text)\n        )\n    else:\n        assistant_message.token_count = 0\n\n    # 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs\n    search_doc_key_to_id: dict[SearchDocKey, int] = {}\n    for key, search_doc_py in all_search_docs.items():\n        db_search_doc = create_db_search_doc(\n            server_search_doc=search_doc_py,\n            db_session=db_session,\n            commit=False,\n        )\n        search_doc_key_to_id[key] = db_search_doc.id\n\n    # 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)\n    tool_call_to_search_doc_ids: dict[str, list[int]] = {}\n    for tool_call_info in tool_calls:\n        if tool_call_info.search_docs:\n            search_doc_ids_for_tool: list[int] = []\n            for search_doc_py in tool_call_info.search_docs:\n                key = ChatStateContainer.create_search_doc_key(search_doc_py)\n                if key in search_doc_key_to_id:\n                    search_doc_ids_for_tool.append(search_doc_key_to_id[key])\n                else:\n                    # Displayed doc not in all_search_docs - create it\n                    # This can happen if displayed_docs contains docs not in search_docs\n                    db_search_doc = create_db_search_doc(\n                        server_search_doc=search_doc_py,\n                        db_session=db_session,\n                        commit=False,\n                    )\n                    search_doc_key_to_id[key] = db_search_doc.id\n                    search_doc_ids_for_tool.append(db_search_doc.id)\n            tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(\n                set(search_doc_ids_for_tool)\n            )\n\n    # Collect all search doc IDs for ChatMessage linking\n    all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())\n\n    # 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID\n    # Only include citations that were actually emitted during streaming\n    citation_number_to_search_doc_id: dict[int, int] = {}\n\n    for citation_num, search_doc_py in citation_to_doc.items():\n        # Skip citations that weren't actually emitted (if emitted_citations is provided)\n        if emitted_citations is not None and citation_num not in emitted_citations:\n            continue\n\n        # Create the unique key for this SearchDoc version\n        search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)\n\n        # Get the search doc ID (should already exist from processing tool_calls)\n        if search_doc_key in search_doc_key_to_id:\n            db_search_doc_id = search_doc_key_to_id[search_doc_key]\n        else:\n            # Citation doc not found in tool call search_docs\n            # Expected case: Project files (source_type=FILE) are cited but don't come from tool calls\n            # Unexpected case: Other citation-only docs (indicates a potential issue upstream)\n            is_project_file = search_doc_py.source_type == DocumentSource.FILE\n\n            if is_project_file:\n                logger.info(\n                    f\"Project file citation {search_doc_py.document_id} not in tool calls, creating it\"\n                )\n            else:\n                logger.warning(\n                    f\"Citation doc {search_doc_py.document_id} not found in tool call search_docs, creating it\"\n                )\n\n            # Create the SearchDoc in the database\n            # NOTE: It's important that this maps to the saved DB Document ID, because\n            # the match-highlights are specific to this saved version, not any document that has\n            # the same document_id.\n            db_search_doc = create_db_search_doc(\n                server_search_doc=search_doc_py,\n                db_session=db_session,\n                commit=False,\n            )\n            db_search_doc_id = db_search_doc.id\n            search_doc_key_to_id[search_doc_key] = db_search_doc_id\n\n            # Link project files to ChatMessage to enable frontend preview\n            if is_project_file:\n                all_search_doc_ids_set.add(db_search_doc_id)\n\n        # Build mapping from citation number to search doc ID\n        citation_number_to_search_doc_id[citation_num] = db_search_doc_id\n\n    # 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage\n    final_search_doc_ids: list[int] = list(all_search_doc_ids_set)\n    if final_search_doc_ids:\n        add_search_docs_to_chat_message(\n            chat_message_id=assistant_message.id,\n            search_doc_ids=final_search_doc_ids,\n            db_session=db_session,\n        )\n\n    # 6. Create ToolCall entries and link SearchDocs to them\n    _create_and_link_tool_calls(\n        tool_calls=tool_calls,\n        assistant_message=assistant_message,\n        db_session=db_session,\n        default_tokenizer=default_tokenizer,\n        tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,\n    )\n\n    # 7. Build citations mapping - use the mapping we already built in step 4\n    assistant_message.citations = (\n        citation_number_to_search_doc_id if citation_number_to_search_doc_id else None\n    )\n\n    # 8. Attach code interpreter generated files that the assistant actually\n    # referenced in its response, so they are available via load_all_chat_files\n    # on subsequent turns. Files not mentioned are intermediate artifacts.\n    if sanitized_message_text:\n        referenced = _extract_referenced_file_descriptors(\n            tool_calls, sanitized_message_text\n        )\n        if referenced:\n            existing_files = assistant_message.files or []\n            assistant_message.files = existing_files + referenced\n\n    # Finally save the messages, tool calls, and docs\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/chat/stop_signal_checker.py",
    "content": "from uuid import UUID\n\nfrom onyx.cache.interface import CacheBackend\n\nPREFIX = \"chatsessionstop\"\nFENCE_PREFIX = f\"{PREFIX}_fence\"\nFENCE_TTL = 10 * 60  # 10 minutes\n\n\ndef _get_fence_key(chat_session_id: UUID) -> str:\n    \"\"\"Generate the cache key for a chat session stop signal fence.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n\n    Returns:\n        The fence key string. Tenant isolation is handled automatically\n        by the cache backend (Redis key-prefixing or Postgres schema routing).\n    \"\"\"\n    return f\"{FENCE_PREFIX}_{chat_session_id}\"\n\n\ndef set_fence(chat_session_id: UUID, cache: CacheBackend, value: bool) -> None:\n    \"\"\"Set or clear the stop signal fence for a chat session.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n        cache: Tenant-aware cache backend\n        value: True to set the fence (stop signal), False to clear it\n    \"\"\"\n    fence_key = _get_fence_key(chat_session_id)\n    if not value:\n        cache.delete(fence_key)\n        return\n    cache.set(fence_key, 0, ex=FENCE_TTL)\n\n\ndef is_connected(chat_session_id: UUID, cache: CacheBackend) -> bool:\n    \"\"\"Check if the chat session should continue (not stopped).\n\n    Args:\n        chat_session_id: The UUID of the chat session to check\n        cache: Tenant-aware cache backend\n\n    Returns:\n        True if the session should continue, False if it should stop\n    \"\"\"\n    return not cache.exists(_get_fence_key(chat_session_id))\n\n\ndef reset_cancel_status(chat_session_id: UUID, cache: CacheBackend) -> None:\n    \"\"\"Clear the stop signal for a chat session.\n\n    Args:\n        chat_session_id: The UUID of the chat session\n        cache: Tenant-aware cache backend\n    \"\"\"\n    cache.delete(_get_fence_key(chat_session_id))\n"
  },
  {
    "path": "backend/onyx/chat/tool_call_args_streaming.py",
    "content": "from collections.abc import Generator\nfrom collections.abc import Mapping\nfrom typing import Any\nfrom typing import Type\n\nfrom onyx.llm.model_response import ChatCompletionDeltaToolCall\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta\nfrom onyx.tools.built_in_tools import TOOL_NAME_TO_CLASS\nfrom onyx.tools.interface import Tool\nfrom onyx.utils.jsonriver import Parser\n\n\ndef _get_tool_class(\n    tool_calls_in_progress: Mapping[int, Mapping[str, Any]],\n    tool_call_delta: ChatCompletionDeltaToolCall,\n) -> Type[Tool] | None:\n    \"\"\"Look up the Tool subclass for a streaming tool call delta.\"\"\"\n    tool_name = tool_calls_in_progress.get(tool_call_delta.index, {}).get(\"name\")\n    if not tool_name:\n        return None\n    return TOOL_NAME_TO_CLASS.get(tool_name)\n\n\ndef maybe_emit_argument_delta(\n    tool_calls_in_progress: Mapping[int, Mapping[str, Any]],\n    tool_call_delta: ChatCompletionDeltaToolCall,\n    placement: Placement,\n    parsers: dict[int, Parser],\n) -> Generator[Packet, None, None]:\n    \"\"\"Emit decoded tool-call argument deltas to the frontend.\n\n    Uses a ``jsonriver.Parser`` per tool-call index to incrementally parse\n    the JSON argument string and extract only the newly-appended content\n    for each string-valued argument.\n\n    NOTE: Non-string arguments (numbers, booleans, null, arrays, objects)\n    are skipped — they are available in the final tool-call kickoff packet.\n\n    ``parsers`` is a mutable dict keyed by tool-call index. A new\n    ``Parser`` is created automatically for each new index.\n    \"\"\"\n    tool_cls = _get_tool_class(tool_calls_in_progress, tool_call_delta)\n    if not tool_cls or not tool_cls.should_emit_argument_deltas():\n        return\n\n    fn = tool_call_delta.function\n    delta_fragment = fn.arguments if fn else None\n    if not delta_fragment:\n        return\n\n    idx = tool_call_delta.index\n    if idx not in parsers:\n        parsers[idx] = Parser()\n    parser = parsers[idx]\n\n    deltas = parser.feed(delta_fragment)\n\n    argument_deltas: dict[str, str] = {}\n    for delta in deltas:\n        if isinstance(delta, dict):\n            for key, value in delta.items():\n                if isinstance(value, str):\n                    argument_deltas[key] = argument_deltas.get(key, \"\") + value\n\n    if not argument_deltas:\n        return\n\n    tc_data = tool_calls_in_progress[tool_call_delta.index]\n    yield Packet(\n        placement=placement,\n        obj=ToolCallArgumentDelta(\n            tool_type=tc_data.get(\"name\", \"\"),\n            argument_deltas=argument_deltas,\n        ),\n    )\n"
  },
  {
    "path": "backend/onyx/configs/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/configs/agent_configs.py",
    "content": "import os\n\n\nAGENT_DEFAULT_RETRIEVAL_HITS = 15\nAGENT_DEFAULT_RERANKING_HITS = 10\nAGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8\nAGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3\nAGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5\n\nAGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = 25\nAGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = 35\n\n\nAGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5\nAGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3\nAGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10\nAGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000\n\nINITIAL_SEARCH_DECOMPOSITION_ENABLED = True\n\nAGENT_DEFAULT_RETRIEVAL_HITS = 15\nAGENT_DEFAULT_RERANKING_HITS = 10\nAGENT_DEFAULT_MAX_VERIFIVATION_HITS = 30\nAGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8\nAGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3\nAGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5\nAGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5\nAGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3\nAGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10\nAGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000\n\n\nAGENT_ALLOW_REFINEMENT = os.environ.get(\"AGENT_ALLOW_REFINEMENT\", \"\").lower() == \"true\"\n\nAGENT_ANSWER_GENERATION_BY_FAST_LLM = (\n    os.environ.get(\"AGENT_ANSWER_GENERATION_BY_FAST_LLM\", \"\").lower() == \"true\"\n)\n\nAGENT_RETRIEVAL_STATS = (\n    not os.environ.get(\"AGENT_RETRIEVAL_STATS\") == \"False\"\n) or True  # default True\n\n\nAGENT_MAX_VERIFICATION_HITS = int(\n    os.environ.get(\"AGENT_MAX_VERIFICATION_HITS\") or AGENT_DEFAULT_MAX_VERIFIVATION_HITS\n)  # 30\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\n# Reranking agent configs\n# Reranking stats - no influence on flow outside of stats collection\nAGENT_RERANKING_STATS = (\n    not os.environ.get(\"AGENT_RERANKING_STATS\") == \"True\"\n) or False  # default False\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\nAGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS\")\n    or AGENT_DEFAULT_RERANKING_HITS\n)  # 10\n\nAGENT_NUM_DOCS_FOR_DECOMPOSITION = int(\n    os.environ.get(\"AGENT_NUM_DOCS_FOR_DECOMPOSITION\")\n    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION\n)  # 3\n\nAGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(\n    os.environ.get(\"AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION\")\n    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION\n)  # 5\n\nAGENT_EXPLORATORY_SEARCH_RESULTS = int(\n    os.environ.get(\"AGENT_EXPLORATORY_SEARCH_RESULTS\")\n    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS\n)  # 5\n\nAGENT_MIN_ORIG_QUESTION_DOCS = int(\n    os.environ.get(\"AGENT_MIN_ORIG_QUESTION_DOCS\")\n    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS\n)  # 3\n\nAGENT_MAX_ANSWER_CONTEXT_DOCS = int(\n    os.environ.get(\"AGENT_MAX_ANSWER_CONTEXT_DOCS\")\n    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS\n)  # 8\n\n\nAGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(\n    os.environ.get(\"AGENT_MAX_STATIC_HISTORY_WORD_LENGTH\")\n    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH\n)  # 2000\n\nAGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = int(\n    os.environ.get(\"AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER\")\n    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER\n)  # 25\n\nAGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = int(\n    os.environ.get(\"AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER\")\n    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER\n)  # 35\n\n\nAGENT_RETRIEVAL_STATS = (\n    not os.environ.get(\"AGENT_RETRIEVAL_STATS\") == \"False\"\n) or True  # default True\n\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\n# Reranking agent configs\n# Reranking stats - no influence on flow outside of stats collection\nAGENT_RERANKING_STATS = (\n    not os.environ.get(\"AGENT_RERANKING_STATS\") == \"True\"\n) or False  # default False\n\nAGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_MAX_QUERY_RETRIEVAL_RESULTS\") or AGENT_DEFAULT_RETRIEVAL_HITS\n)  # 15\n\nAGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(\n    os.environ.get(\"AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS\")\n    or AGENT_DEFAULT_RERANKING_HITS\n)  # 10\n\nAGENT_NUM_DOCS_FOR_DECOMPOSITION = int(\n    os.environ.get(\"AGENT_NUM_DOCS_FOR_DECOMPOSITION\")\n    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION\n)  # 3\n\nAGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(\n    os.environ.get(\"AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION\")\n    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION\n)  # 5\n\nAGENT_EXPLORATORY_SEARCH_RESULTS = int(\n    os.environ.get(\"AGENT_EXPLORATORY_SEARCH_RESULTS\")\n    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS\n)  # 5\n\nAGENT_MIN_ORIG_QUESTION_DOCS = int(\n    os.environ.get(\"AGENT_MIN_ORIG_QUESTION_DOCS\")\n    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS\n)  # 3\n\nAGENT_MAX_ANSWER_CONTEXT_DOCS = int(\n    os.environ.get(\"AGENT_MAX_ANSWER_CONTEXT_DOCS\")\n    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS\n)  # 8\n\n\nAGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(\n    os.environ.get(\"AGENT_MAX_STATIC_HISTORY_WORD_LENGTH\")\n    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH\n)  # 2000\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 15  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 45  # in seconds\nAGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 5  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 8  # in seconds\nAGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 8  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 45  # in seconds\nAGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_GENERAL_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 8  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 10  # in seconds\nAGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 9  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 45  # in seconds\nAGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 15  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 40  # in seconds\nAGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 20  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 60  # in seconds\nAGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 6  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 12  # in seconds\nAGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_SUBANSWER_CHECK\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 12  # in seconds\nAGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 6  # in seconds\nAGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 6  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 8  # in seconds\nAGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 6  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 12  # in seconds\nAGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_COMPARE_ANSWERS\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS\n)\n\n\nAGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 6  # in seconds\nAGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION\")\n    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION\n)\n\nAGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 12  # in seconds\nAGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(\n    os.environ.get(\"AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION\")\n    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_VALIDATION = 4\nAGENT_MAX_TOKENS_VALIDATION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_VALIDATION\") or AGENT_DEFAULT_MAX_TOKENS_VALIDATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION = 256\nAGENT_MAX_TOKENS_SUBANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_SUBANSWER_GENERATION\")\n    or AGENT_DEFAULT_MAX_TOKENS_SUBANSWER_GENERATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION = 1024\nAGENT_MAX_TOKENS_ANSWER_GENERATION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_ANSWER_GENERATION\")\n    or AGENT_DEFAULT_MAX_TOKENS_ANSWER_GENERATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION = 256\nAGENT_MAX_TOKENS_SUBQUESTION_GENERATION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_SUBQUESTION_GENERATION\")\n    or AGENT_DEFAULT_MAX_TOKENS_SUBQUESTION_GENERATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = 1024\nAGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_ENTITY_TERM_EXTRACTION\")\n    or AGENT_DEFAULT_MAX_TOKENS_ENTITY_TERM_EXTRACTION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION = 64\nAGENT_MAX_TOKENS_SUBQUERY_GENERATION = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_SUBQUERY_GENERATION\")\n    or AGENT_DEFAULT_MAX_TOKENS_SUBQUERY_GENERATION\n)\n\nAGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY = 128\nAGENT_MAX_TOKENS_HISTORY_SUMMARY = int(\n    os.environ.get(\"AGENT_MAX_TOKENS_HISTORY_SUMMARY\")\n    or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY\n)\n\n# Parameters for the Thoughtful/Deep Research flows\nTF_DR_TIMEOUT_LONG = int(os.environ.get(\"TF_DR_TIMEOUT_LONG\") or 120)\nTF_DR_TIMEOUT_SHORT = int(os.environ.get(\"TF_DR_TIMEOUT_SHORT\") or 60)\n\n\nTF_DR_DEFAULT_FAST = (os.environ.get(\"TF_DR_DEFAULT_FAST\") or \"False\").lower() == \"true\"\n\nGRAPH_VERSION_NAME: str = \"a\"\n"
  },
  {
    "path": "backend/onyx/configs/app_configs.py",
    "content": "import json\nimport os\nimport urllib.parse\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import cast\n\nfrom onyx.auth.schemas import AuthBackend\nfrom onyx.cache.interface import CacheBackendType\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import QueryHistoryType\nfrom onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy\nfrom onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT\nfrom onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n#####\n# App Configs\n#####\nAPP_HOST = \"0.0.0.0\"\nAPP_PORT = 8080\n# API_PREFIX is used to prepend a base path for all API routes\n# generally used if using a reverse proxy which doesn't support stripping the `/api`\n# prefix from requests directed towards the API server. In these cases, set this to `/api`\nAPP_API_PREFIX = os.environ.get(\"API_PREFIX\", \"\")\n\n# Certain services need to make HTTP requests to the API server, such as the MCP server and Discord bot\nAPI_SERVER_PROTOCOL = os.environ.get(\"API_SERVER_PROTOCOL\", \"http\")\nAPI_SERVER_HOST = os.environ.get(\"API_SERVER_HOST\", \"127.0.0.1\")\n# This override allows self-hosting the MCP server with Onyx Cloud backend.\nAPI_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS = os.environ.get(\n    \"API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS\"\n)\n\n# Whether to send user metadata (user_id/email and session_id) to the LLM provider.\n# Disabled by default.\nSEND_USER_METADATA_TO_LLM_PROVIDER = (\n    os.environ.get(\"SEND_USER_METADATA_TO_LLM_PROVIDER\", \"\")\n).lower() == \"true\"\n\n#####\n# User Facing Features Configs\n#####\nBLURB_SIZE = 128  # Number Encoder Tokens included in the chunk blurb\n\n# Hard ceiling for the admin-configurable file upload size (in MB).\n# Self-hosted customers can raise or lower this via the environment variable.\n_raw_max_upload_size_mb = int(os.environ.get(\"MAX_ALLOWED_UPLOAD_SIZE_MB\", \"250\"))\nif _raw_max_upload_size_mb < 0:\n    logger.warning(\n        \"MAX_ALLOWED_UPLOAD_SIZE_MB=%d is negative; falling back to 250\",\n        _raw_max_upload_size_mb,\n    )\n    _raw_max_upload_size_mb = 250\nMAX_ALLOWED_UPLOAD_SIZE_MB = _raw_max_upload_size_mb\n\n# Default fallback for the per-user file upload size limit (in MB) when no\n# admin-configured value exists.  Clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at\n# runtime so this never silently exceeds the hard ceiling.\n_raw_default_upload_size_mb = int(\n    os.environ.get(\"DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\", \"100\")\n)\nif _raw_default_upload_size_mb < 0:\n    logger.warning(\n        \"DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=%d is negative; falling back to 100\",\n        _raw_default_upload_size_mb,\n    )\n    _raw_default_upload_size_mb = 100\nDEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB = _raw_default_upload_size_mb\nGENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(\n    os.environ.get(\"GENERATIVE_MODEL_ACCESS_CHECK_FREQ\") or 86400\n)  # 1 day\n\n# Controls whether users can use User Knowledge (personal documents) in assistants\nDISABLE_USER_KNOWLEDGE = os.environ.get(\"DISABLE_USER_KNOWLEDGE\", \"\").lower() == \"true\"\n\n# Disables vector DB (Vespa/OpenSearch) entirely. When True, connectors and RAG search\n# are disabled but core chat, tools, user file uploads, and Projects still work.\nDISABLE_VECTOR_DB = os.environ.get(\"DISABLE_VECTOR_DB\", \"\").lower() == \"true\"\n\n# Which backend to use for caching, locks, and ephemeral state.\n# \"redis\" (default) or \"postgres\" (only valid when DISABLE_VECTOR_DB=true).\nCACHE_BACKEND = CacheBackendType(\n    os.environ.get(\"CACHE_BACKEND\", CacheBackendType.REDIS)\n)\n\n# If set to true, will show extra/uncommon connectors in the \"Other\" category\nSHOW_EXTRA_CONNECTORS = os.environ.get(\"SHOW_EXTRA_CONNECTORS\", \"\").lower() == \"true\"\n\n# Controls whether to allow admin query history reports with:\n# 1. associated user emails\n# 2. anonymized user emails\n# 3. no queries\nONYX_QUERY_HISTORY_TYPE = QueryHistoryType(\n    (os.environ.get(\"ONYX_QUERY_HISTORY_TYPE\") or QueryHistoryType.NORMAL.value).lower()\n)\n\n#####\n# Web Configs\n#####\n# WEB_DOMAIN is used to set the redirect_uri after login flows\n# NOTE: if you are having problems accessing the Onyx web UI locally (especially\n# on Windows, try  setting this to `http://127.0.0.1:3000` instead and see if that\n# fixes it)\nWEB_DOMAIN = os.environ.get(\"WEB_DOMAIN\") or \"http://localhost:3000\"\n\n\n#####\n# Auth Configs\n#####\n# Silently default to basic - warnings/errors logged in verify_auth_setting()\n# which only runs on app startup, not during migrations/scripts\n_auth_type_str = (os.environ.get(\"AUTH_TYPE\") or \"\").lower()\nif _auth_type_str in [auth_type.value for auth_type in AuthType]:\n    AUTH_TYPE = AuthType(_auth_type_str)\nelse:\n    AUTH_TYPE = AuthType.BASIC\n\nPASSWORD_MIN_LENGTH = int(os.getenv(\"PASSWORD_MIN_LENGTH\", 8))\nPASSWORD_MAX_LENGTH = int(os.getenv(\"PASSWORD_MAX_LENGTH\", 64))\nPASSWORD_REQUIRE_UPPERCASE = (\n    os.environ.get(\"PASSWORD_REQUIRE_UPPERCASE\", \"false\").lower() == \"true\"\n)\nPASSWORD_REQUIRE_LOWERCASE = (\n    os.environ.get(\"PASSWORD_REQUIRE_LOWERCASE\", \"false\").lower() == \"true\"\n)\nPASSWORD_REQUIRE_DIGIT = (\n    os.environ.get(\"PASSWORD_REQUIRE_DIGIT\", \"false\").lower() == \"true\"\n)\nPASSWORD_REQUIRE_SPECIAL_CHAR = (\n    os.environ.get(\"PASSWORD_REQUIRE_SPECIAL_CHAR\", \"false\").lower() == \"true\"\n)\n\n# Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive\n# information. This provides an extra layer of security on top of Postgres access controls\n# and is available in Onyx EE\nENCRYPTION_KEY_SECRET = os.environ.get(\"ENCRYPTION_KEY_SECRET\") or \"\"\n\n# Turn off mask if admin users should see full credentials for data connectors.\nMASK_CREDENTIAL_PREFIX = (\n    os.environ.get(\"MASK_CREDENTIAL_PREFIX\", \"True\").lower() != \"false\"\n)\n\nAUTH_BACKEND = AuthBackend(os.environ.get(\"AUTH_BACKEND\") or AuthBackend.REDIS.value)\n\nSESSION_EXPIRE_TIME_SECONDS = int(\n    os.environ.get(\"SESSION_EXPIRE_TIME_SECONDS\")\n    or os.environ.get(\"REDIS_AUTH_EXPIRE_TIME_SECONDS\")\n    or 86400 * 7\n)  # 7 days\n\n# Default request timeout, mostly used by connectors\nREQUEST_TIMEOUT_SECONDS = int(os.environ.get(\"REQUEST_TIMEOUT_SECONDS\") or 60)\n\n# set `VALID_EMAIL_DOMAINS` to a comma seperated list of domains in order to\n# restrict access to Onyx to only users with emails from those domains.\n# E.g. `VALID_EMAIL_DOMAINS=example.com,example.org` will restrict Onyx\n# signups to users with either an @example.com or an @example.org email.\n# NOTE: maintaining `VALID_EMAIL_DOMAIN` to keep backwards compatibility\n_VALID_EMAIL_DOMAIN = os.environ.get(\"VALID_EMAIL_DOMAIN\", \"\")\n_VALID_EMAIL_DOMAINS_STR = (\n    os.environ.get(\"VALID_EMAIL_DOMAINS\", \"\") or _VALID_EMAIL_DOMAIN\n)\nVALID_EMAIL_DOMAINS = (\n    [\n        domain.strip().lower()\n        for domain in _VALID_EMAIL_DOMAINS_STR.split(\",\")\n        if domain.strip()\n    ]\n    if _VALID_EMAIL_DOMAINS_STR\n    else []\n)\n\n# Disposable email blocking - blocks temporary/throwaway email addresses\n# Set to empty string to disable disposable email blocking\nDISPOSABLE_EMAIL_DOMAINS_URL = os.environ.get(\n    \"DISPOSABLE_EMAIL_DOMAINS_URL\",\n    \"https://disposable.github.io/disposable-email-domains/domains.json\",\n)\n\n# OAuth Login Flow\n# Used for both Google OAuth2 and OIDC flows\nOAUTH_CLIENT_ID = (\n    os.environ.get(\"OAUTH_CLIENT_ID\", os.environ.get(\"GOOGLE_OAUTH_CLIENT_ID\")) or \"\"\n)\nOAUTH_CLIENT_SECRET = (\n    os.environ.get(\"OAUTH_CLIENT_SECRET\", os.environ.get(\"GOOGLE_OAUTH_CLIENT_SECRET\"))\n    or \"\"\n)\n\n# Whether Google OAuth is enabled (requires both client ID and secret)\nOAUTH_ENABLED = bool(OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET)\n\n# OpenID Connect configuration URL for OIDC integrations\nOPENID_CONFIG_URL = os.environ.get(\"OPENID_CONFIG_URL\") or \"\"\n\n# Applicable for OIDC Auth, allows you to override the scopes that\n# are requested from the OIDC provider. Currently used when passing\n# over access tokens to tool calls and the tool needs more scopes\nOIDC_SCOPE_OVERRIDE: list[str] | None = None\n_OIDC_SCOPE_OVERRIDE = os.environ.get(\"OIDC_SCOPE_OVERRIDE\")\n\nif _OIDC_SCOPE_OVERRIDE:\n    try:\n        OIDC_SCOPE_OVERRIDE = [\n            scope.strip() for scope in _OIDC_SCOPE_OVERRIDE.split(\",\")\n        ]\n    except Exception:\n        pass\n\n# Enables PKCE for OIDC login flow. Disabled by default to preserve\n# backwards compatibility for existing OIDC deployments.\nOIDC_PKCE_ENABLED = os.environ.get(\"OIDC_PKCE_ENABLED\", \"\").lower() == \"true\"\n\n# Applicable for SAML Auth\nSAML_CONF_DIR = os.environ.get(\"SAML_CONF_DIR\") or \"/app/onyx/configs/saml_config\"\n\n# JWT Public Key URL for JWT token verification\nJWT_PUBLIC_KEY_URL: str | None = os.getenv(\"JWT_PUBLIC_KEY_URL\", None)\n\nUSER_AUTH_SECRET = os.environ.get(\"USER_AUTH_SECRET\", \"\")\n\nif AUTH_TYPE == AuthType.BASIC and not USER_AUTH_SECRET:\n    logger.warning(\n        \"USER_AUTH_SECRET is not set. This is required for secure password reset \"\n        \"and email verification tokens. Please set USER_AUTH_SECRET in production.\"\n    )\n\n# Duration (in seconds) for which the FastAPI Users JWT token remains valid in the user's browser.\n# By default, this is set to match the Redis expiry time for consistency.\nAUTH_COOKIE_EXPIRE_TIME_SECONDS = int(\n    os.environ.get(\"AUTH_COOKIE_EXPIRE_TIME_SECONDS\") or 86400 * 7\n)  # 7 days\n\n# for basic auth\nREQUIRE_EMAIL_VERIFICATION = (\n    os.environ.get(\"REQUIRE_EMAIL_VERIFICATION\", \"\").lower() == \"true\"\n)\nSMTP_SERVER = os.environ.get(\"SMTP_SERVER\") or \"\"\nSMTP_PORT = int(os.environ.get(\"SMTP_PORT\") or \"587\")\nSMTP_USER = os.environ.get(\"SMTP_USER\") or \"\"\nSMTP_PASS = os.environ.get(\"SMTP_PASS\") or \"\"\nEMAIL_FROM = os.environ.get(\"EMAIL_FROM\") or SMTP_USER\n\nSENDGRID_API_KEY = os.environ.get(\"SENDGRID_API_KEY\") or \"\"\nEMAIL_CONFIGURED = all([SMTP_SERVER, SMTP_USER, SMTP_PASS]) or SENDGRID_API_KEY\n\n# If set, Onyx will listen to the `expires_at` returned by the identity\n# provider (e.g. Okta, Google, etc.) and force the user to re-authenticate\n# after this time has elapsed. Disabled since by default many auth providers\n# have very short expiry times (e.g. 1 hour) which provide a poor user experience\nTRACK_EXTERNAL_IDP_EXPIRY = (\n    os.environ.get(\"TRACK_EXTERNAL_IDP_EXPIRY\", \"\").lower() == \"true\"\n)\n\n\n#####\n# DB Configs\n#####\nDOCUMENT_INDEX_NAME = \"danswer_index\"\n\n# OpenSearch Configs\nOPENSEARCH_HOST = os.environ.get(\"OPENSEARCH_HOST\") or \"localhost\"\nOPENSEARCH_REST_API_PORT = int(os.environ.get(\"OPENSEARCH_REST_API_PORT\") or 9200)\n# TODO(andrei): 60 seconds is too much, we're just setting a high default\n# timeout for now to examine why queries are slow.\n# NOTE: This timeout applies to all requests the client makes, including bulk\n# indexing.\nDEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S = int(\n    os.environ.get(\"DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S\") or 60\n)\n# TODO(andrei): 50 seconds is too much, we're just setting a high default\n# timeout for now to examine why queries are slow.\n# NOTE: To get useful partial results, this value should be less than the client\n# timeout above.\nDEFAULT_OPENSEARCH_QUERY_TIMEOUT_S = int(\n    os.environ.get(\"DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S\") or 50\n)\nOPENSEARCH_ADMIN_USERNAME = os.environ.get(\"OPENSEARCH_ADMIN_USERNAME\", \"admin\")\nOPENSEARCH_ADMIN_PASSWORD = os.environ.get(\n    \"OPENSEARCH_ADMIN_PASSWORD\", \"StrongPassword123!\"\n)\nUSING_AWS_MANAGED_OPENSEARCH = (\n    os.environ.get(\"USING_AWS_MANAGED_OPENSEARCH\", \"\").lower() == \"true\"\n)\n# Profiling adds some overhead to OpenSearch operations. This overhead is\n# unknown right now. Defaults to True.\nOPENSEARCH_PROFILING_DISABLED = (\n    os.environ.get(\"OPENSEARCH_PROFILING_DISABLED\", \"true\").lower() == \"true\"\n)\n# Whether to disable match highlights for OpenSearch. Defaults to True for now\n# as we investigate query performance.\nOPENSEARCH_MATCH_HIGHLIGHTS_DISABLED = (\n    os.environ.get(\"OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED\", \"true\").lower() == \"true\"\n)\n# When enabled, OpenSearch returns detailed score breakdowns for each hit.\n# Useful for debugging and tuning search relevance. Has ~10-30% performance overhead according to documentation.\n# Seems for Hybrid Search in practice, the impact is actually more like 1000x slower.\nOPENSEARCH_EXPLAIN_ENABLED = (\n    os.environ.get(\"OPENSEARCH_EXPLAIN_ENABLED\", \"\").lower() == \"true\"\n)\n# Analyzer used for full-text fields (title, content). Use OpenSearch built-in analyzer\n# names (e.g. \"english\", \"standard\", \"german\"). Affects stemming and tokenization;\n# existing indices need reindexing after a change.\nOPENSEARCH_TEXT_ANALYZER = os.environ.get(\"OPENSEARCH_TEXT_ANALYZER\") or \"english\"\n\n# This is the \"base\" config for now, the idea is that at least for our dev\n# environments we always want to be dual indexing into both OpenSearch and Vespa\n# to stress test the new codepaths. Only enable this if there is some instance\n# of OpenSearch running for the relevant Onyx instance.\n# NOTE: Now enabled on by default, unless the env indicates otherwise.\nENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (\n    os.environ.get(\"ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\", \"true\").lower() == \"true\"\n)\n# NOTE: This effectively does nothing anymore, admins can now toggle whether\n# retrieval is through OpenSearch. This value is only used as a final fallback\n# in case that doesn't work for whatever reason.\n# Given that the \"base\" config above is true, this enables whether we want to\n# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this\n# in the event we see issues with OpenSearch retrieval in our dev environments.\nENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (\n    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\n    and os.environ.get(\"ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX\", \"\").lower() == \"true\"\n)\n# Whether we should check for and create an index if necessary every time we\n# instantiate an OpenSearchDocumentIndex on multitenant cloud. Defaults to True.\nVERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (\n    os.environ.get(\"VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT\", \"true\").lower()\n    == \"true\"\n)\nOPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(\n    os.environ.get(\"OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE\") or 500\n)\n# If set, will override the default number of shards and replicas for the index.\nOPENSEARCH_INDEX_NUM_SHARDS: int | None = (\n    int(os.environ[\"OPENSEARCH_INDEX_NUM_SHARDS\"])\n    if os.environ.get(\"OPENSEARCH_INDEX_NUM_SHARDS\", None) is not None\n    else None\n)\nOPENSEARCH_INDEX_NUM_REPLICAS: int | None = (\n    int(os.environ[\"OPENSEARCH_INDEX_NUM_REPLICAS\"])\n    if os.environ.get(\"OPENSEARCH_INDEX_NUM_REPLICAS\", None) is not None\n    else None\n)\nONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH = (\n    os.environ.get(\"ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH\", \"\").lower()\n    == \"true\"\n)\n\nVESPA_HOST = os.environ.get(\"VESPA_HOST\") or \"localhost\"\n# NOTE: this is used if and only if the vespa config server is accessible via a\n# different host than the main vespa application\nVESPA_CONFIG_SERVER_HOST = os.environ.get(\"VESPA_CONFIG_SERVER_HOST\") or VESPA_HOST\nVESPA_PORT = os.environ.get(\"VESPA_PORT\") or \"8081\"\nVESPA_TENANT_PORT = os.environ.get(\"VESPA_TENANT_PORT\") or \"19071\"\n# the number of times to try and connect to vespa on startup before giving up\nVESPA_NUM_ATTEMPTS_ON_STARTUP = int(os.environ.get(\"NUM_RETRIES_ON_STARTUP\") or 10)\n\nVESPA_CLOUD_URL = os.environ.get(\"VESPA_CLOUD_URL\", \"\")\n\nVESPA_CLOUD_CERT_PATH = os.environ.get(\"VESPA_CLOUD_CERT_PATH\")\nVESPA_CLOUD_KEY_PATH = os.environ.get(\"VESPA_CLOUD_KEY_PATH\")\n\n# Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder)\nINDEX_BATCH_SIZE = int(os.environ.get(\"INDEX_BATCH_SIZE\") or 16)\n\nMAX_DRIVE_WORKERS = int(os.environ.get(\"MAX_DRIVE_WORKERS\", 4))\n\n# Below are intended to match the env variables names used by the official postgres docker image\n# https://hub.docker.com/_/postgres\nPOSTGRES_USER = os.environ.get(\"POSTGRES_USER\") or \"postgres\"\n# URL-encode the password for asyncpg to avoid issues with special characters on some machines.\nPOSTGRES_PASSWORD = urllib.parse.quote_plus(\n    os.environ.get(\"POSTGRES_PASSWORD\") or \"password\"\n)\nPOSTGRES_HOST = os.environ.get(\"POSTGRES_HOST\") or \"127.0.0.1\"\nPOSTGRES_PORT = os.environ.get(\"POSTGRES_PORT\") or \"5432\"\nPOSTGRES_DB = os.environ.get(\"POSTGRES_DB\") or \"postgres\"\nAWS_REGION_NAME = os.environ.get(\"AWS_REGION_NAME\") or \"us-east-2\"\n\nPOSTGRES_API_SERVER_POOL_SIZE = int(\n    os.environ.get(\"POSTGRES_API_SERVER_POOL_SIZE\") or 40\n)\nPOSTGRES_API_SERVER_POOL_OVERFLOW = int(\n    os.environ.get(\"POSTGRES_API_SERVER_POOL_OVERFLOW\") or 10\n)\n\nPOSTGRES_API_SERVER_READ_ONLY_POOL_SIZE = int(\n    os.environ.get(\"POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE\") or 10\n)\nPOSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW = int(\n    os.environ.get(\"POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW\") or 5\n)\n\n# defaults to False\n# generally should only be used for\nPOSTGRES_USE_NULL_POOL = os.environ.get(\"POSTGRES_USE_NULL_POOL\", \"\").lower() == \"true\"\n\n# defaults to False\nPOSTGRES_POOL_PRE_PING = os.environ.get(\"POSTGRES_POOL_PRE_PING\", \"\").lower() == \"true\"\n\n# recycle timeout in seconds\nPOSTGRES_POOL_RECYCLE_DEFAULT = 60 * 20  # 20 minutes\ntry:\n    POSTGRES_POOL_RECYCLE = int(\n        os.environ.get(\"POSTGRES_POOL_RECYCLE\", POSTGRES_POOL_RECYCLE_DEFAULT)\n    )\nexcept ValueError:\n    POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT\n\n# RDS IAM authentication - enables IAM-based authentication for PostgreSQL\nUSE_IAM_AUTH = os.getenv(\"USE_IAM_AUTH\", \"False\").lower() == \"true\"\n\n# Redis IAM authentication - enables IAM-based authentication for Redis ElastiCache\n# Note: This is separate from RDS IAM auth as they use different authentication mechanisms\nUSE_REDIS_IAM_AUTH = os.getenv(\"USE_REDIS_IAM_AUTH\", \"False\").lower() == \"true\"\nREDIS_SSL = os.getenv(\"REDIS_SSL\", \"\").lower() == \"true\"\nREDIS_HOST = os.environ.get(\"REDIS_HOST\") or \"localhost\"\nREDIS_PORT = int(os.environ.get(\"REDIS_PORT\", 6379))\nREDIS_PASSWORD = os.environ.get(\"REDIS_PASSWORD\") or \"\"\n\n# this assumes that other redis settings remain the same as the primary\nREDIS_REPLICA_HOST = os.environ.get(\"REDIS_REPLICA_HOST\") or REDIS_HOST\n\nREDIS_AUTH_KEY_PREFIX = \"fastapi_users_token:\"\n\n# Rate limiting for auth endpoints\nRATE_LIMIT_WINDOW_SECONDS: int | None = None\n_rate_limit_window_seconds_str = os.environ.get(\"RATE_LIMIT_WINDOW_SECONDS\")\nif _rate_limit_window_seconds_str is not None:\n    try:\n        RATE_LIMIT_WINDOW_SECONDS = int(_rate_limit_window_seconds_str)\n    except ValueError:\n        pass\n\nRATE_LIMIT_MAX_REQUESTS: int | None = None\n_rate_limit_max_requests_str = os.environ.get(\"RATE_LIMIT_MAX_REQUESTS\")\nif _rate_limit_max_requests_str is not None:\n    try:\n        RATE_LIMIT_MAX_REQUESTS = int(_rate_limit_max_requests_str)\n    except ValueError:\n        pass\n\nAUTH_RATE_LIMITING_ENABLED = RATE_LIMIT_MAX_REQUESTS and RATE_LIMIT_WINDOW_SECONDS\n# Used for general redis things\nREDIS_DB_NUMBER = int(os.environ.get(\"REDIS_DB_NUMBER\", 0))\n\n# Used by celery as broker and backend\nREDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int(\n    os.environ.get(\"REDIS_DB_NUMBER_CELERY_RESULT_BACKEND\", 14)\n)\nREDIS_DB_NUMBER_CELERY = int(os.environ.get(\"REDIS_DB_NUMBER_CELERY\", 15))  # broker\n\n# will propagate to both our redis client as well as celery's redis client\nREDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get(\"REDIS_HEALTH_CHECK_INTERVAL\", 60))\n\n# our redis client only, not celery's\nREDIS_POOL_MAX_CONNECTIONS = int(os.environ.get(\"REDIS_POOL_MAX_CONNECTIONS\", 128))\n\n# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings\n# should be one of \"required\", \"optional\", or \"none\"\nREDIS_SSL_CERT_REQS = os.getenv(\"REDIS_SSL_CERT_REQS\", \"none\")\nREDIS_SSL_CA_CERTS = os.getenv(\"REDIS_SSL_CA_CERTS\", None)\n\nCELERY_RESULT_EXPIRES = int(os.environ.get(\"CELERY_RESULT_EXPIRES\", 86400))  # seconds\n\n# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit\n# Setting to None may help when there is a proxy in the way closing idle connections\n_CELERY_BROKER_POOL_LIMIT_DEFAULT = 10\ntry:\n    CELERY_BROKER_POOL_LIMIT = int(\n        os.environ.get(\"CELERY_BROKER_POOL_LIMIT\", _CELERY_BROKER_POOL_LIMIT_DEFAULT)\n    )\nexcept ValueError:\n    CELERY_BROKER_POOL_LIMIT = _CELERY_BROKER_POOL_LIMIT_DEFAULT\n\n_CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT = 24\ntry:\n    CELERY_WORKER_LIGHT_CONCURRENCY = int(\n        os.environ.get(\n            \"CELERY_WORKER_LIGHT_CONCURRENCY\",\n            _CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT,\n        )\n    )\nexcept ValueError:\n    CELERY_WORKER_LIGHT_CONCURRENCY = _CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT\n\n_CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT = 8\ntry:\n    CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = int(\n        os.environ.get(\n            \"CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER\",\n            _CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT,\n        )\n    )\nexcept ValueError:\n    CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = (\n        _CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT\n    )\n\n_CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT = 6\ntry:\n    env_value = os.environ.get(\"CELERY_WORKER_DOCPROCESSING_CONCURRENCY\")\n    if not env_value:\n        env_value = os.environ.get(\"NUM_INDEXING_WORKERS\")\n\n    if not env_value:\n        env_value = str(_CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT)\n    CELERY_WORKER_DOCPROCESSING_CONCURRENCY = int(env_value)\nexcept ValueError:\n    CELERY_WORKER_DOCPROCESSING_CONCURRENCY = (\n        _CELERY_WORKER_DOCPROCESSING_CONCURRENCY_DEFAULT\n    )\n\n_CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT = 1\ntry:\n    env_value = os.environ.get(\"CELERY_WORKER_DOCFETCHING_CONCURRENCY\")\n    if not env_value:\n        env_value = os.environ.get(\"NUM_DOCFETCHING_WORKERS\")\n\n    if not env_value:\n        env_value = str(_CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT)\n    CELERY_WORKER_DOCFETCHING_CONCURRENCY = int(env_value)\nexcept ValueError:\n    CELERY_WORKER_DOCFETCHING_CONCURRENCY = (\n        _CELERY_WORKER_DOCFETCHING_CONCURRENCY_DEFAULT\n    )\n\nCELERY_WORKER_PRIMARY_CONCURRENCY = int(\n    os.environ.get(\"CELERY_WORKER_PRIMARY_CONCURRENCY\") or 4\n)\n\nCELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(\n    os.environ.get(\"CELERY_WORKER_PRIMARY_POOL_OVERFLOW\") or 4\n)\n\n# Individual worker concurrency settings\nCELERY_WORKER_HEAVY_CONCURRENCY = int(\n    os.environ.get(\"CELERY_WORKER_HEAVY_CONCURRENCY\") or 4\n)\n\nCELERY_WORKER_MONITORING_CONCURRENCY = int(\n    os.environ.get(\"CELERY_WORKER_MONITORING_CONCURRENCY\") or 1\n)\n\nCELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = int(\n    os.environ.get(\"CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY\") or 2\n)\n\n# The maximum number of tasks that can be queued up to sync to Vespa in a single pass\nVESPA_SYNC_MAX_TASKS = 8192\n\nDB_YIELD_PER_DEFAULT = 64\n\n#####\n# Connector Configs\n#####\nPOLL_CONNECTOR_OFFSET = 30  # Minutes overlap between poll windows\n\n# View the list here:\n# https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/factory.py\n# If this is empty, all connectors are enabled, this is an option for security heavy orgs where\n# only very select connectors are enabled and admins cannot add other connector types\nENABLED_CONNECTOR_TYPES = os.environ.get(\"ENABLED_CONNECTOR_TYPES\") or \"\"\n\n# If set to true, curators can only access and edit assistants that they created\nCURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS = (\n    os.environ.get(\"CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS\", \"\").lower()\n    == \"true\"\n)\n\n# Some calls to get information on expert users are quite costly especially with rate limiting\n# Since experts are not used in the actual user experience, currently it is turned off\n# for some connectors\nENABLE_EXPENSIVE_EXPERT_CALLS = False\n\n\n# TODO these should be available for frontend configuration, via advanced options expandable\nWEB_CONNECTOR_IGNORED_CLASSES = os.environ.get(\n    \"WEB_CONNECTOR_IGNORED_CLASSES\", \"sidebar,footer\"\n).split(\",\")\nWEB_CONNECTOR_IGNORED_ELEMENTS = os.environ.get(\n    \"WEB_CONNECTOR_IGNORED_ELEMENTS\", \"nav,footer,meta,script,style,symbol,aside\"\n).split(\",\")\nWEB_CONNECTOR_OAUTH_CLIENT_ID = os.environ.get(\"WEB_CONNECTOR_OAUTH_CLIENT_ID\")\nWEB_CONNECTOR_OAUTH_CLIENT_SECRET = os.environ.get(\"WEB_CONNECTOR_OAUTH_CLIENT_SECRET\")\nWEB_CONNECTOR_OAUTH_TOKEN_URL = os.environ.get(\"WEB_CONNECTOR_OAUTH_TOKEN_URL\")\nWEB_CONNECTOR_VALIDATE_URLS = os.environ.get(\"WEB_CONNECTOR_VALIDATE_URLS\")\n\nHTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY = os.environ.get(\n    \"HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY\",\n    HtmlBasedConnectorTransformLinksStrategy.STRIP,\n)\n\nNOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP = (\n    os.environ.get(\"NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP\", \"\").lower()\n    == \"true\"\n)\n\n\n#####\n# Confluence Connector Configs\n#####\n\nCONFLUENCE_CONNECTOR_LABELS_TO_SKIP = [\n    ignored_tag\n    for ignored_tag in os.environ.get(\"CONFLUENCE_CONNECTOR_LABELS_TO_SKIP\", \"\").split(\n        \",\"\n    )\n    if ignored_tag\n]\n\n# Attachments exceeding this size will not be retrieved (in bytes)\nCONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD = int(\n    os.environ.get(\"CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD\", 10 * 1024 * 1024)\n)\n# Attachments with more chars than this will not be indexed. This is to prevent extremely\n# large files from freezing indexing. 200,000 is ~100 google doc pages.\nCONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD = int(\n    os.environ.get(\"CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD\", 200_000)\n)\n\n# A JSON-formatted array. Each item in the array should have the following structure:\n# {\n#     \"user_id\": \"1234567890\",\n#     \"username\": \"bob\",\n#     \"display_name\": \"Bob Fitzgerald\",\n#     \"email\": \"bob@example.com\",\n#     \"type\": \"known\"\n# }\n_RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE = os.environ.get(\n    \"CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE\", \"\"\n)\nCONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE = cast(\n    list[dict[str, str]] | None,\n    (\n        json.loads(_RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE)\n        if _RAW_CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE\n        else None\n    ),\n)\n\n# Due to breakages in the confluence API, the timezone offset must be specified client side\n# to match the user's specified timezone.\n\n# The current state of affairs:\n# CQL queries are parsed in the user's timezone and cannot be specified in UTC\n# no API retrieves the user's timezone\n# All data is returned in UTC, so we can't derive the user's timezone from that\n\n# https://community.developer.atlassian.com/t/confluence-cloud-time-zone-get-via-rest-api/35954/16\n# https://jira.atlassian.com/browse/CONFCLOUD-69670\n\n\ndef get_current_tz_offset() -> int:\n    # datetime now() gets local time, datetime.now(timezone.utc) gets UTC time.\n    # remove tzinfo to compare non-timezone-aware objects.\n    time_diff = datetime.now() - datetime.now(timezone.utc).replace(tzinfo=None)\n    return round(time_diff.total_seconds() / 3600)\n\n\n# enter as a floating point offset from UTC in hours (-24 < val < 24)\n# this will be applied globally, so it probably makes sense to transition this to per\n# connector as some point.\n# For the default value, we assume that the user's local timezone is more likely to be\n# correct (i.e. the configured user's timezone or the default server one) than UTC.\n# https://developer.atlassian.com/cloud/confluence/cql-fields/#created\nCONFLUENCE_TIMEZONE_OFFSET = float(\n    os.environ.get(\"CONFLUENCE_TIMEZONE_OFFSET\", get_current_tz_offset())\n)\n\nCONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC = (\n    os.environ.get(\"CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC\", \"\").lower() == \"true\"\n)\n\nGOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(\n    os.environ.get(\"GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD\", 10 * 1024 * 1024)\n)\n\n# Default size threshold for Drupal Wiki attachments (10MB)\nDRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD = int(\n    os.environ.get(\"DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD\", 10 * 1024 * 1024)\n)\n\n# Default size threshold for SharePoint files (20MB)\nSHAREPOINT_CONNECTOR_SIZE_THRESHOLD = int(\n    os.environ.get(\"SHAREPOINT_CONNECTOR_SIZE_THRESHOLD\", 20 * 1024 * 1024)\n)\n\n# When True, group sync enumerates every Azure AD group in the tenant (expensive).\n# When False (default), only groups found in site role assignments are synced.\n# Can be overridden per-connector via the \"exhaustive_ad_enumeration\" key in\n# connector_specific_config.\nSHAREPOINT_EXHAUSTIVE_AD_ENUMERATION = (\n    os.environ.get(\"SHAREPOINT_EXHAUSTIVE_AD_ENUMERATION\", \"\").lower() == \"true\"\n)\n\nBLOB_STORAGE_SIZE_THRESHOLD = int(\n    os.environ.get(\"BLOB_STORAGE_SIZE_THRESHOLD\", 20 * 1024 * 1024)\n)\n\nJIRA_CONNECTOR_LABELS_TO_SKIP = [\n    ignored_tag\n    for ignored_tag in os.environ.get(\"JIRA_CONNECTOR_LABELS_TO_SKIP\", \"\").split(\",\")\n    if ignored_tag\n]\n# Maximum size for Jira tickets in bytes (default: 100KB)\nJIRA_CONNECTOR_MAX_TICKET_SIZE = int(\n    os.environ.get(\"JIRA_CONNECTOR_MAX_TICKET_SIZE\", 100 * 1024)\n)\nJIRA_SLIM_PAGE_SIZE = int(os.environ.get(\"JIRA_SLIM_PAGE_SIZE\", 500))\n\nGONG_CONNECTOR_START_TIME = os.environ.get(\"GONG_CONNECTOR_START_TIME\")\n\nGITHUB_CONNECTOR_BASE_URL = os.environ.get(\"GITHUB_CONNECTOR_BASE_URL\") or None\n\nGITLAB_CONNECTOR_INCLUDE_CODE_FILES = (\n    os.environ.get(\"GITLAB_CONNECTOR_INCLUDE_CODE_FILES\", \"\").lower() == \"true\"\n)\n\n# Typically set to http://localhost:3000 for OAuth connector development\nCONNECTOR_LOCALHOST_OVERRIDE = os.getenv(\"CONNECTOR_LOCALHOST_OVERRIDE\")\n\n# Egnyte specific configs\nEGNYTE_CLIENT_ID = os.getenv(\"EGNYTE_CLIENT_ID\")\nEGNYTE_CLIENT_SECRET = os.getenv(\"EGNYTE_CLIENT_SECRET\")\n\n# Linear specific configs\nLINEAR_CLIENT_ID = os.getenv(\"LINEAR_CLIENT_ID\")\nLINEAR_CLIENT_SECRET = os.getenv(\"LINEAR_CLIENT_SECRET\")\n\n# Slack specific configs\nSLACK_NUM_THREADS = int(os.getenv(\"SLACK_NUM_THREADS\") or 8)\nMAX_SLACK_QUERY_EXPANSIONS = int(os.environ.get(\"MAX_SLACK_QUERY_EXPANSIONS\", \"5\"))\n\n# Slack federated search thread context settings\n# Batch size for fetching thread context (controls concurrent API calls per batch)\nSLACK_THREAD_CONTEXT_BATCH_SIZE = int(\n    os.environ.get(\"SLACK_THREAD_CONTEXT_BATCH_SIZE\", \"5\")\n)\n# Maximum messages to fetch thread context for (top N by relevance get full context)\nMAX_SLACK_THREAD_CONTEXT_MESSAGES = int(\n    os.environ.get(\"MAX_SLACK_THREAD_CONTEXT_MESSAGES\", \"5\")\n)\n\n# TestRail specific configs\nTESTRAIL_BASE_URL = os.environ.get(\"TESTRAIL_BASE_URL\", \"\")\nTESTRAIL_USERNAME = os.environ.get(\"TESTRAIL_USERNAME\", \"\")\nTESTRAIL_API_KEY = os.environ.get(\"TESTRAIL_API_KEY\", \"\")\n\nLEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE = (\n    os.environ.get(\"LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE\", \"\").lower()\n    == \"true\"\n)\n\nDEFAULT_PRUNING_FREQ = 60 * 60 * 24  # Once a day\n\nALLOW_SIMULTANEOUS_PRUNING = (\n    os.environ.get(\"ALLOW_SIMULTANEOUS_PRUNING\", \"\").lower() == \"true\"\n)\n\n# This is the maximum rate at which documents are queried for a pruning job. 0 disables the limitation.\nMAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int(\n    os.environ.get(\"MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE\", 0)\n)\n\n# comma delimited list of zendesk article labels to skip indexing for\nZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS = os.environ.get(\n    \"ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS\", \"\"\n).split(\",\")\n\n\n#####\n# Indexing Configs\n#####\n# NOTE: Currently only supported in the Confluence and Google Drive connectors +\n# only handles some failures (Confluence = handles API call failures, Google\n# Drive = handles failures pulling files / parsing them)\nCONTINUE_ON_CONNECTOR_FAILURE = os.environ.get(\n    \"CONTINUE_ON_CONNECTOR_FAILURE\", \"\"\n).lower() not in [\"false\", \"\"]\n# When swapping to a new embedding model, a secondary index is created in the background, to conserve\n# resources, we pause updates on the primary index by default while the secondary index is created\nDISABLE_INDEX_UPDATE_ON_SWAP = (\n    os.environ.get(\"DISABLE_INDEX_UPDATE_ON_SWAP\", \"\").lower() == \"true\"\n)\n# More accurate results at the expense of indexing speed and index size (stores additional 4 MINI_CHUNK vectors)\nENABLE_MULTIPASS_INDEXING = (\n    os.environ.get(\"ENABLE_MULTIPASS_INDEXING\", \"\").lower() == \"true\"\n)\n# Enable contextual retrieval\nENABLE_CONTEXTUAL_RAG = os.environ.get(\"ENABLE_CONTEXTUAL_RAG\", \"\").lower() == \"true\"\n\nDEFAULT_CONTEXTUAL_RAG_LLM_NAME = \"gpt-4o-mini\"\nDEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER = \"DevEnvPresetOpenAI\"\n# Finer grained chunking for more detail retention\n# Slightly larger since the sentence aware split is a max cutoff so most minichunks will be under MINI_CHUNK_SIZE\n# tokens. But we need it to be at least as big as 1/4th chunk size to avoid having a tiny mini-chunk at the end\nMINI_CHUNK_SIZE = 150\n\n# This is the number of regular chunks per large chunk\nLARGE_CHUNK_RATIO = 4\n\n# The maximum number of chunks that can be held for 1 document processing batch\n# The purpose of this is to set an upper bound on memory usage\nMAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get(\"MAX_CHUNKS_PER_DOC_BATCH\") or 1000)\n\n# Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out\n# We don't want the metadata to overwhelm the actual contents of the chunk\nSKIP_METADATA_IN_CHUNK = os.environ.get(\"SKIP_METADATA_IN_CHUNK\", \"\").lower() == \"true\"\n\n# The indexer will warn in the logs whenver a document exceeds this threshold (in bytes)\nINDEXING_SIZE_WARNING_THRESHOLD = int(\n    os.environ.get(\"INDEXING_SIZE_WARNING_THRESHOLD\") or 100 * 1024 * 1024\n)\n\n# during indexing, will log verbose memory diff stats every x batches and at the end.\n# 0 disables this behavior and is the default.\nINDEXING_TRACER_INTERVAL = int(os.environ.get(\"INDEXING_TRACER_INTERVAL\") or 0)\n\n# Enable multi-threaded embedding model calls for parallel processing\n# Note: only applies for API-based embedding models\nINDEXING_EMBEDDING_MODEL_NUM_THREADS = int(\n    os.environ.get(\"INDEXING_EMBEDDING_MODEL_NUM_THREADS\") or 8\n)\n\n# Maximum file size in a document to be indexed\nMAX_DOCUMENT_CHARS = int(os.environ.get(\"MAX_DOCUMENT_CHARS\") or 5_000_000)\nMAX_FILE_SIZE_BYTES = int(\n    os.environ.get(\"MAX_FILE_SIZE_BYTES\") or 2 * 1024 * 1024 * 1024\n)  # 2GB in bytes\n\n# Use document summary for contextual rag\nUSE_DOCUMENT_SUMMARY = os.environ.get(\"USE_DOCUMENT_SUMMARY\", \"true\").lower() == \"true\"\n# Use chunk summary for contextual rag\nUSE_CHUNK_SUMMARY = os.environ.get(\"USE_CHUNK_SUMMARY\", \"true\").lower() == \"true\"\n# Average summary embeddings for contextual rag (not yet implemented)\nAVERAGE_SUMMARY_EMBEDDINGS = (\n    os.environ.get(\"AVERAGE_SUMMARY_EMBEDDINGS\", \"false\").lower() == \"true\"\n)\n\nMAX_TOKENS_FOR_FULL_INCLUSION = 4096\n\n# The intent was to have this be configurable per query, but I don't think any\n# codepath was actually configuring this, so for the migrated Vespa interface\n# we'll just use the default value, but also have it be configurable by env var.\nRECENCY_BIAS_MULTIPLIER = float(os.environ.get(\"RECENCY_BIAS_MULTIPLIER\") or 1.0)\n\n# Should match the rerank-count value set in\n# backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja.\nRERANK_COUNT = int(os.environ.get(\"RERANK_COUNT\") or 1000)\n\n\n#####\n# Tool Configs\n#####\n# Code Interpreter Service Configuration\nCODE_INTERPRETER_BASE_URL = os.environ.get(\n    \"CODE_INTERPRETER_BASE_URL\", \"http://localhost:8000\"\n)\n\nCODE_INTERPRETER_DEFAULT_TIMEOUT_MS = int(\n    os.environ.get(\"CODE_INTERPRETER_DEFAULT_TIMEOUT_MS\") or 60_000\n)\n\nCODE_INTERPRETER_MAX_OUTPUT_LENGTH = int(\n    os.environ.get(\"CODE_INTERPRETER_MAX_OUTPUT_LENGTH\") or 50_000\n)\n\n\n#####\n# Miscellaneous\n#####\nJOB_TIMEOUT = 60 * 60 * 6  # 6 hours default\n# Logs Onyx only model interactions like prompts, responses, messages etc.\nLOG_ONYX_MODEL_INTERACTIONS = (\n    os.environ.get(\"LOG_ONYX_MODEL_INTERACTIONS\", \"\").lower() == \"true\"\n)\n\nPROMPT_CACHE_CHAT_HISTORY = (\n    os.environ.get(\"PROMPT_CACHE_CHAT_HISTORY\", \"\").lower() == \"true\"\n)\n# If set to `true` will enable additional logs about Vespa query performance\n# (time spent on finding the right docs + time spent fetching summaries from disk)\nLOG_VESPA_TIMING_INFORMATION = (\n    os.environ.get(\"LOG_VESPA_TIMING_INFORMATION\", \"\").lower() == \"true\"\n)\nLOG_ENDPOINT_LATENCY = os.environ.get(\"LOG_ENDPOINT_LATENCY\", \"\").lower() == \"true\"\nLOG_POSTGRES_LATENCY = os.environ.get(\"LOG_POSTGRES_LATENCY\", \"\").lower() == \"true\"\nLOG_POSTGRES_CONN_COUNTS = (\n    os.environ.get(\"LOG_POSTGRES_CONN_COUNTS\", \"\").lower() == \"true\"\n)\n# Anonymous usage telemetry\nDISABLE_TELEMETRY = os.environ.get(\"DISABLE_TELEMETRY\", \"\").lower() == \"true\"\n\n#####\n# Braintrust Configuration\n#####\n# Braintrust project name\nBRAINTRUST_PROJECT = os.environ.get(\"BRAINTRUST_PROJECT\", \"Onyx\")\n# Braintrust API key - if provided, Braintrust tracing will be enabled\nBRAINTRUST_API_KEY = os.environ.get(\"BRAINTRUST_API_KEY\") or \"\"\n# Maximum concurrency for Braintrust evaluations\n# None means unlimited concurrency, otherwise specify a number\n_braintrust_concurrency = os.environ.get(\"BRAINTRUST_MAX_CONCURRENCY\")\nBRAINTRUST_MAX_CONCURRENCY = (\n    int(_braintrust_concurrency) if _braintrust_concurrency else None\n)\n\n#####\n# Scheduled Evals Configuration\n#####\n# Comma-separated list of Braintrust dataset names to run on schedule\nSCHEDULED_EVAL_DATASET_NAMES = [\n    name.strip()\n    for name in os.environ.get(\"SCHEDULED_EVAL_DATASET_NAMES\", \"\").split(\",\")\n    if name.strip()\n]\n# Email address to use for search permissions during scheduled evals\nSCHEDULED_EVAL_PERMISSIONS_EMAIL = os.environ.get(\n    \"SCHEDULED_EVAL_PERMISSIONS_EMAIL\", \"roshan@onyx.app\"\n)\n# Braintrust project name to use for scheduled evals\nSCHEDULED_EVAL_PROJECT = os.environ.get(\"SCHEDULED_EVAL_PROJECT\", \"st-dev\")\n\n#####\n# Langfuse Configuration\n#####\n# Langfuse API credentials - if provided, Langfuse tracing will be enabled\nLANGFUSE_SECRET_KEY = os.environ.get(\"LANGFUSE_SECRET_KEY\") or \"\"\nLANGFUSE_PUBLIC_KEY = os.environ.get(\"LANGFUSE_PUBLIC_KEY\") or \"\"\nLANGFUSE_HOST = os.environ.get(\"LANGFUSE_HOST\") or \"\"  # For self-hosted Langfuse\n\n# Defined custom query/answer conditions to validate the query and the LLM answer.\n# Format: list of strings\nCUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(\n    os.environ.get(\"CUSTOM_ANSWER_VALIDITY_CONDITIONS\", \"[]\")\n)\n\nVESPA_REQUEST_TIMEOUT = int(os.environ.get(\"VESPA_REQUEST_TIMEOUT\") or \"15\")\n# This is the timeout for the client side of the Vespa migration task. When\n# exceeded, an exception is raised in our code. This value should be higher than\n# VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT.\nVESPA_MIGRATION_REQUEST_TIMEOUT_S = int(\n    os.environ.get(\"VESPA_MIGRATION_REQUEST_TIMEOUT_S\") or \"120\"\n)\n# This is the timeout Vespa uses on the server side to know when to wrap up its\n# traversal and try to report partial results. This differs from the client\n# timeout above which raises an exception in our code when exceeded. This\n# timeout allows Vespa to return gracefully. This value should be lower than\n# VESPA_MIGRATION_REQUEST_TIMEOUT_S. Formatted as <number of seconds>s.\nVESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT = os.environ.get(\n    \"VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT\", \"110s\"\n)\n\nSYSTEM_RECURSION_LIMIT = int(os.environ.get(\"SYSTEM_RECURSION_LIMIT\") or \"1000\")\n\nPARSE_WITH_TRAFILATURA = os.environ.get(\"PARSE_WITH_TRAFILATURA\", \"\").lower() == \"true\"\n\n# allow for custom error messages for different errors returned by litellm\n# for example, can specify: {\"Violated content safety policy\": \"EVIL REQUEST!!!\"}\n# to make it so that if an LLM call returns an error containing \"Violated content safety policy\"\n# the end user will see \"EVIL REQUEST!!!\" instead of the default error message.\n_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = os.environ.get(\n    \"LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS\", \"\"\n)\nLITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS: dict[str, str] | None = None\ntry:\n    LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS = cast(\n        dict[str, str], json.loads(_LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS)\n    )\nexcept json.JSONDecodeError:\n    pass\n\n# Auto LLM Configuration - fetches model configs from GitHub for providers in Auto mode\nAUTO_LLM_CONFIG_URL = os.environ.get(\n    \"AUTO_LLM_CONFIG_URL\",\n    \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/onyx/llm/well_known_providers/recommended-models.json\",\n)\n\n# How often to check for auto LLM model updates (in seconds)\nAUTO_LLM_UPDATE_INTERVAL_SECONDS = int(\n    os.environ.get(\"AUTO_LLM_UPDATE_INTERVAL_SECONDS\", 1800)  # 30 minutes\n)\n\n#####\n# Enterprise Edition Configs\n#####\n# NOTE: this should only be enabled if you have purchased an enterprise license.\n# if you're interested in an enterprise license, please reach out to us at\n# founders@onyx.app OR message Chris Weaver or Yuhong Sun in the Onyx\n# Discord community https://discord.gg/4NA5SbzrWb\nENTERPRISE_EDITION_ENABLED = (\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() == \"true\"\n)\n\n#####\n# Image Generation Configuration (DEPRECATED)\n# These environment variables will be deprecated soon.\n# To configure image generation, please visit the Image Generation page in the Admin Panel.\n#####\n# Azure Image Configurations\nAZURE_IMAGE_API_VERSION = os.environ.get(\"AZURE_IMAGE_API_VERSION\") or os.environ.get(\n    \"AZURE_DALLE_API_VERSION\"\n)\nAZURE_IMAGE_API_KEY = os.environ.get(\"AZURE_IMAGE_API_KEY\") or os.environ.get(\n    \"AZURE_DALLE_API_KEY\"\n)\nAZURE_IMAGE_API_BASE = os.environ.get(\"AZURE_IMAGE_API_BASE\") or os.environ.get(\n    \"AZURE_DALLE_API_BASE\"\n)\nAZURE_IMAGE_DEPLOYMENT_NAME = os.environ.get(\n    \"AZURE_IMAGE_DEPLOYMENT_NAME\"\n) or os.environ.get(\"AZURE_DALLE_DEPLOYMENT_NAME\")\n\n# configurable image model\nIMAGE_MODEL_NAME = os.environ.get(\"IMAGE_MODEL_NAME\", \"gpt-image-1\")\nIMAGE_MODEL_PROVIDER = os.environ.get(\"IMAGE_MODEL_PROVIDER\", \"openai\")\n\n# Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH\nMANAGED_VESPA = os.environ.get(\"MANAGED_VESPA\", \"\").lower() == \"true\"\n\nENABLE_EMAIL_INVITES = os.environ.get(\"ENABLE_EMAIL_INVITES\", \"\").lower() == \"true\"\n\n# Limit on number of users a free trial tenant can invite (cloud only)\nNUM_FREE_TRIAL_USER_INVITES = int(os.environ.get(\"NUM_FREE_TRIAL_USER_INVITES\", \"10\"))\n\n# Security and authentication\nDATA_PLANE_SECRET = os.environ.get(\n    \"DATA_PLANE_SECRET\", \"\"\n)  # Used for secure communication between the control and data plane\nEXPECTED_API_KEY = os.environ.get(\n    \"EXPECTED_API_KEY\", \"\"\n)  # Additional security check for the control plane API\n\n# API configuration\nCONTROL_PLANE_API_BASE_URL = os.environ.get(\n    \"CONTROL_PLANE_API_BASE_URL\", \"http://localhost:8082\"\n)\n\nOAUTH_SLACK_CLIENT_ID = os.environ.get(\"OAUTH_SLACK_CLIENT_ID\", \"\")\nOAUTH_SLACK_CLIENT_SECRET = os.environ.get(\"OAUTH_SLACK_CLIENT_SECRET\", \"\")\nOAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get(\n    \"OAUTH_CONFLUENCE_CLOUD_CLIENT_ID\", \"\"\n)\nOAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET = os.environ.get(\n    \"OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET\", \"\"\n)\nOAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get(\"OAUTH_GOOGLE_DRIVE_CLIENT_ID\", \"\")\nOAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(\n    \"OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\", \"\"\n)\n\n# JWT configuration\nJWT_ALGORITHM = \"HS256\"\n\n#####\n# API Key Configs\n#####\n# refers to the rounds described here: https://passlib.readthedocs.io/en/stable/lib/passlib.hash.sha256_crypt.html\n_API_KEY_HASH_ROUNDS_RAW = os.environ.get(\"API_KEY_HASH_ROUNDS\")\nAPI_KEY_HASH_ROUNDS = (\n    int(_API_KEY_HASH_ROUNDS_RAW) if _API_KEY_HASH_ROUNDS_RAW else None\n)\n\n#####\n# MCP Server Configs\n#####\nMCP_SERVER_ENABLED = os.environ.get(\"MCP_SERVER_ENABLED\", \"\").lower() == \"true\"\nMCP_SERVER_HOST = os.environ.get(\"MCP_SERVER_HOST\", \"0.0.0.0\")\nMCP_SERVER_PORT = int(os.environ.get(\"MCP_SERVER_PORT\") or 8090)\n\n# CORS origins for MCP clients (comma-separated)\n# Local dev: \"http://localhost:*\"\n# Production: \"https://trusted-client.com,https://another-client.com\"\nMCP_SERVER_CORS_ORIGINS = [\n    origin.strip()\n    for origin in os.environ.get(\"MCP_SERVER_CORS_ORIGINS\", \"\").split(\",\")\n    if origin.strip()\n]\n\n\nPOD_NAME = os.environ.get(\"POD_NAME\")\nPOD_NAMESPACE = os.environ.get(\"POD_NAMESPACE\")\n\n\nDEV_MODE = os.environ.get(\"DEV_MODE\", \"\").lower() == \"true\"\n\n\nINTEGRATION_TESTS_MODE = os.environ.get(\"INTEGRATION_TESTS_MODE\", \"\").lower() == \"true\"\n\n#####\n# Captcha Configuration (for cloud signup protection)\n#####\n# Enable captcha verification for new user registration\nCAPTCHA_ENABLED = os.environ.get(\"CAPTCHA_ENABLED\", \"\").lower() == \"true\"\n\n# Google reCAPTCHA secret key (server-side validation)\nRECAPTCHA_SECRET_KEY = os.environ.get(\"RECAPTCHA_SECRET_KEY\", \"\")\n\n# Minimum score threshold for reCAPTCHA v3 (0.0-1.0, higher = more likely human)\n# 0.5 is the recommended default\nRECAPTCHA_SCORE_THRESHOLD = float(os.environ.get(\"RECAPTCHA_SCORE_THRESHOLD\", \"0.5\"))\n\nMOCK_CONNECTOR_FILE_PATH = os.environ.get(\"MOCK_CONNECTOR_FILE_PATH\")\n\n# Set to true to mock LLM responses for testing purposes\nMOCK_LLM_RESPONSE = (\n    os.environ.get(\"MOCK_LLM_RESPONSE\") if os.environ.get(\"MOCK_LLM_RESPONSE\") else None\n)\n\n\nDEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB = 20\n\n# Number of pre-provisioned tenants to maintain\nTARGET_AVAILABLE_TENANTS = int(os.environ.get(\"TARGET_AVAILABLE_TENANTS\", \"5\"))\n\n\n# Image summarization configuration\nIMAGE_SUMMARIZATION_SYSTEM_PROMPT = os.environ.get(\n    \"IMAGE_SUMMARIZATION_SYSTEM_PROMPT\",\n    DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT,\n)\n\n# The user prompt for image summarization - the image filename will be automatically prepended\nIMAGE_SUMMARIZATION_USER_PROMPT = os.environ.get(\n    \"IMAGE_SUMMARIZATION_USER_PROMPT\",\n    DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT,\n)\n\n# Knowledge Graph Read Only User Configuration\nDB_READONLY_USER: str = os.environ.get(\"DB_READONLY_USER\", \"db_readonly_user\")\nDB_READONLY_PASSWORD: str = urllib.parse.quote_plus(\n    os.environ.get(\"DB_READONLY_PASSWORD\") or \"password\"\n)\n\n# File Store Configuration\n# Which backend to use for file storage: \"s3\" (S3/MinIO) or \"postgres\" (PostgreSQL Large Objects)\nFILE_STORE_BACKEND = os.environ.get(\"FILE_STORE_BACKEND\", \"s3\")\n\nS3_FILE_STORE_BUCKET_NAME = (\n    os.environ.get(\"S3_FILE_STORE_BUCKET_NAME\") or \"onyx-file-store-bucket\"\n)\nS3_FILE_STORE_PREFIX = os.environ.get(\"S3_FILE_STORE_PREFIX\") or \"onyx-files\"\n# S3_ENDPOINT_URL is for MinIO and other S3-compatible storage. Leave blank for AWS S3.\nS3_ENDPOINT_URL = os.environ.get(\"S3_ENDPOINT_URL\")\nS3_VERIFY_SSL = os.environ.get(\"S3_VERIFY_SSL\", \"\").lower() == \"true\"\n\n# S3/MinIO Access Keys\nS3_AWS_ACCESS_KEY_ID = os.environ.get(\"S3_AWS_ACCESS_KEY_ID\")\nS3_AWS_SECRET_ACCESS_KEY = os.environ.get(\"S3_AWS_SECRET_ACCESS_KEY\")\n\n# Should we force S3 local checksumming\nS3_GENERATE_LOCAL_CHECKSUM = (\n    os.environ.get(\"S3_GENERATE_LOCAL_CHECKSUM\", \"\").lower() == \"true\"\n)\n\n# Forcing Vespa Language\n# English: en, German:de, etc. See: https://docs.vespa.ai/en/linguistics.html\nVESPA_LANGUAGE_OVERRIDE = os.environ.get(\"VESPA_LANGUAGE_OVERRIDE\")\n\n\n#####\n# Default LLM API Keys (for cloud deployments)\n# These are Onyx-managed API keys provided to tenants by default\n#####\nOPENAI_DEFAULT_API_KEY = os.environ.get(\"OPENAI_DEFAULT_API_KEY\")\nANTHROPIC_DEFAULT_API_KEY = os.environ.get(\"ANTHROPIC_DEFAULT_API_KEY\")\nCOHERE_DEFAULT_API_KEY = os.environ.get(\"COHERE_DEFAULT_API_KEY\")\nVERTEXAI_DEFAULT_CREDENTIALS = os.environ.get(\"VERTEXAI_DEFAULT_CREDENTIALS\")\nVERTEXAI_DEFAULT_LOCATION = os.environ.get(\"VERTEXAI_DEFAULT_LOCATION\", \"global\")\nOPENROUTER_DEFAULT_API_KEY = os.environ.get(\"OPENROUTER_DEFAULT_API_KEY\")\n\nINSTANCE_TYPE = (\n    \"managed\"\n    if os.environ.get(\"IS_MANAGED_INSTANCE\", \"\").lower() == \"true\"\n    else \"cloud\" if AUTH_TYPE == AuthType.CLOUD else \"self_hosted\"\n)\n\n\n## Discord Bot Configuration\nDISCORD_BOT_TOKEN = os.environ.get(\"DISCORD_BOT_TOKEN\")\nDISCORD_BOT_INVOKE_CHAR = os.environ.get(\"DISCORD_BOT_INVOKE_CHAR\", \"!\")\n\n\n## Stripe Configuration\n# URL to fetch the Stripe publishable key from a public S3 bucket.\n# Publishable keys are safe to expose publicly - they can only initialize\n# Stripe.js and tokenize payment info, not make charges or access data.\nSTRIPE_PUBLISHABLE_KEY_URL = (\n    \"https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt\"\n)\n# Override for local testing with Stripe test keys (pk_test_*)\nSTRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get(\"STRIPE_PUBLISHABLE_KEY\")\n"
  },
  {
    "path": "backend/onyx/configs/chat_configs.py",
    "content": "import os\n\nPROMPTS_YAML = \"./onyx/seeding/prompts.yaml\"\nPERSONAS_YAML = \"./onyx/seeding/personas.yaml\"\nNUM_RETURNED_HITS = 50\n\n# May be less depending on model\nMAX_CHUNKS_FED_TO_CHAT = int(os.environ.get(\"MAX_CHUNKS_FED_TO_CHAT\") or 25)\n\n# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay\n# Capped in Vespa at 0.5\nDOC_TIME_DECAY = float(\n    os.environ.get(\"DOC_TIME_DECAY\") or 0.5  # Hits limit at 2 years by default\n)\nBASE_RECENCY_DECAY = 0.5\nFAVOR_RECENT_DECAY_MULTIPLIER = 2.0\n# For the highest matching base size chunk, how many chunks above and below do we pull in by default\n# Note this is not in any of the deployment configs yet\n# Currently only applies to search flow not chat\nCONTEXT_CHUNKS_ABOVE = int(os.environ.get(\"CONTEXT_CHUNKS_ABOVE\") or 1)\nCONTEXT_CHUNKS_BELOW = int(os.environ.get(\"CONTEXT_CHUNKS_BELOW\") or 1)\n# Fairly long but this is to account for edge cases where the LLM pauses for much longer than usual\n# The alternative is to fail the request completely so this is intended to be fairly lenient.\nLLM_SOCKET_READ_TIMEOUT = int(\n    os.environ.get(\"LLM_SOCKET_READ_TIMEOUT\") or \"60\"\n)  # 60 seconds\n# Weighting factor between vector and keyword Search; 1 for completely vector\n# search, 0 for keyword. Enforces a valid range of [0, 1]. A supplied value from\n# the env outside of this range will be clipped to the respective end of the\n# range. Defaults to 0.5.\nHYBRID_ALPHA = max(0, min(1, float(os.environ.get(\"HYBRID_ALPHA\") or 0.5)))\n# Weighting factor between Title and Content of documents during search, 1 for completely\n# Title based. Default heavily favors Content because Title is also included at the top of\n# Content. This is to avoid cases where the Content is very relevant but it may not be clear\n# if the title is separated out. Title is most of a \"boost\" than a separate field.\nTITLE_CONTENT_RATIO = max(\n    0, min(1, float(os.environ.get(\"TITLE_CONTENT_RATIO\") or 0.10))\n)\n\n# Stops streaming answers back to the UI if this pattern is seen:\nSTOP_STREAM_PAT = os.environ.get(\"STOP_STREAM_PAT\") or None\n\n# Set this to \"true\" to hard delete chats\n# This will make chats unviewable by admins after a user deletes them\n# As opposed to soft deleting them, which just hides them from non-admin users\nHARD_DELETE_CHATS = os.environ.get(\"HARD_DELETE_CHATS\", \"\").lower() == \"true\"\n\n# Internet Search\nNUM_INTERNET_SEARCH_RESULTS = int(os.environ.get(\"NUM_INTERNET_SEARCH_RESULTS\") or 10)\nNUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get(\"NUM_INTERNET_SEARCH_CHUNKS\") or 50)\n\nVESPA_SEARCHER_THREADS = int(os.environ.get(\"VESPA_SEARCHER_THREADS\") or 2)\n\n# Whether or not to use the semantic & keyword search expansions for Basic Search\nUSE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (\n    os.environ.get(\"USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH\", \"false\").lower()\n    == \"true\"\n)\n\n# Chat History Compression\n# Trigger compression when history exceeds this ratio of available context window\nCOMPRESSION_TRIGGER_RATIO = float(os.environ.get(\"COMPRESSION_TRIGGER_RATIO\", \"0.75\"))\n\nSKIP_DEEP_RESEARCH_CLARIFICATION = (\n    os.environ.get(\"SKIP_DEEP_RESEARCH_CLARIFICATION\", \"false\").lower() == \"true\"\n)\n"
  },
  {
    "path": "backend/onyx/configs/constants.py",
    "content": "import platform\nimport re\nimport socket\nfrom enum import auto\nfrom enum import Enum\n\n\nONYX_DEFAULT_APPLICATION_NAME = \"Onyx\"\nONYX_DISCORD_URL = \"https://discord.gg/4NA5SbzrWb\"\nONYX_UTM_SOURCE = \"onyx_app\"\nSLACK_USER_TOKEN_PREFIX = \"xoxp-\"\nSLACK_BOT_TOKEN_PREFIX = \"xoxb-\"\nONYX_EMAILABLE_LOGO_MAX_DIM = 512\n\nSOURCE_TYPE = \"source_type\"\n# stored in the `metadata` of a chunk. Used to signify that this chunk should\n# not be used for QA. For example, Google Drive file types which can't be parsed\n# are still useful as a search result but not for QA.\nIGNORE_FOR_QA = \"ignore_for_qa\"\n# NOTE: deprecated, only used for porting key from old system\nGEN_AI_API_KEY_STORAGE_KEY = \"genai_api_key\"\nPUBLIC_DOC_PAT = \"PUBLIC\"\nID_SEPARATOR = \":;:\"\nDEFAULT_BOOST = 0\n\n# Tag for endpoints that should be included in the public API documentation\nPUBLIC_API_TAGS: list[str | Enum] = [\"public\"]\n\n# Cookies\nFASTAPI_USERS_AUTH_COOKIE_NAME = (\n    \"fastapiusersauth\"  # Currently a constant, but logic allows for configuration\n)\nTENANT_ID_COOKIE_NAME = \"onyx_tid\"  # tenant id - for workaround cases\nANONYMOUS_USER_COOKIE_NAME = \"onyx_anonymous_user\"\n\n# ID used in UserInfo API responses for anonymous users (not a UUID, just a string identifier)\nANONYMOUS_USER_INFO_ID = \"__anonymous_user__\"\n# Placeholder user for migrating no-auth data to first registered user\nNO_AUTH_PLACEHOLDER_USER_UUID = \"00000000-0000-0000-0000-000000000001\"\nNO_AUTH_PLACEHOLDER_USER_EMAIL = \"no-auth-placeholder@onyx.app\"\n# Real anonymous user in DB for anonymous access feature\nANONYMOUS_USER_UUID = \"00000000-0000-0000-0000-000000000002\"\nANONYMOUS_USER_EMAIL = \"anonymous@onyx.app\"\n\n# For chunking/processing chunks\nRETURN_SEPARATOR = \"\\n\\r\\n\"\nSECTION_SEPARATOR = \"\\n\\n\"\n# For combining attributes, doesn't have to be unique/perfect to work\nINDEX_SEPARATOR = \"===\"\n\n# For File Connector Metadata override file\nONYX_METADATA_FILENAME = \".onyx_metadata.json\"\n\n# Messages\nDISABLED_GEN_AI_MSG = (\n    \"Your System Admin has disabled the Generative AI functionalities of Onyx.\\n\"\n    \"Please contact them if you wish to have this enabled.\\n\"\n    \"You can still use Onyx as a search engine.\"\n)\n\n#####\n# Version Pattern Configs\n#####\n# Version patterns for Docker image tags\nSTABLE_VERSION_PATTERN = re.compile(r\"^v(\\d+)\\.(\\d+)\\.(\\d+)$\")\nDEV_VERSION_PATTERN = re.compile(r\"^v(\\d+)\\.(\\d+)\\.(\\d+)-beta\\.(\\d+)$\")\n\nDEFAULT_PERSONA_ID = 0\n\nDEFAULT_CC_PAIR_ID = 1\n\n\nCANCEL_CHECK_INTERVAL = 20\nDISPATCH_SEP_CHAR = \"\\n\"\nFORMAT_DOCS_SEPARATOR = \"\\n\\n\"\nNUM_EXPLORATORY_DOCS = 15\n# Postgres connection constants for application_name\nPOSTGRES_WEB_APP_NAME = \"web\"\nPOSTGRES_INDEXER_APP_NAME = \"indexer\"\nPOSTGRES_CELERY_APP_NAME = \"celery\"\nPOSTGRES_CELERY_BEAT_APP_NAME = \"celery_beat\"\nPOSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = \"celery_worker_primary\"\nPOSTGRES_CELERY_WORKER_LIGHT_APP_NAME = \"celery_worker_light\"\nPOSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME = \"celery_worker_docprocessing\"\nPOSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = \"celery_worker_docfetching\"\nPOSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = \"celery_worker_indexing_child\"\nPOSTGRES_CELERY_WORKER_HEAVY_APP_NAME = \"celery_worker_heavy\"\nPOSTGRES_CELERY_WORKER_MONITORING_APP_NAME = \"celery_worker_monitoring\"\nPOSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (\n    \"celery_worker_user_file_processing\"\n)\nPOSTGRES_PERMISSIONS_APP_NAME = \"permissions\"\nPOSTGRES_UNKNOWN_APP_NAME = \"unknown\"\n\nSSL_CERT_FILE = \"bundle.pem\"\n# API Keys\nDANSWER_API_KEY_PREFIX = \"API_KEY__\"\nDANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = \"onyxapikey.ai\"\nUNNAMED_KEY_PLACEHOLDER = \"Unnamed\"\nDISCORD_SERVICE_API_KEY_NAME = \"discord-bot-service\"\n\n# Key-Value store keys\nKV_REINDEX_KEY = \"needs_reindexing\"\nKV_UNSTRUCTURED_API_KEY = \"unstructured_api_key\"\nKV_USER_STORE_KEY = \"INVITED_USERS\"\nKV_PENDING_USERS_KEY = \"PENDING_USERS\"\nKV_ANONYMOUS_USER_PREFERENCES_KEY = \"anonymous_user_preferences\"\nKV_ANONYMOUS_USER_PERSONALIZATION_KEY = \"anonymous_user_personalization\"\nKV_CRED_KEY = \"credential_id_{}\"\nKV_GMAIL_CRED_KEY = \"gmail_app_credential\"\nKV_GMAIL_SERVICE_ACCOUNT_KEY = \"gmail_service_account_key\"\nKV_GOOGLE_DRIVE_CRED_KEY = \"google_drive_app_credential\"\nKV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = \"google_drive_service_account_key\"\nKV_GEN_AI_KEY_CHECK_TIME = \"genai_api_key_last_check_time\"\nKV_SETTINGS_KEY = \"onyx_settings\"\nKV_CUSTOMER_UUID_KEY = \"customer_uuid\"\nKV_INSTANCE_DOMAIN_KEY = \"instance_domain\"\nKV_ENTERPRISE_SETTINGS_KEY = \"onyx_enterprise_settings\"\nKV_CUSTOM_ANALYTICS_SCRIPT_KEY = \"__custom_analytics_script__\"\nKV_KG_CONFIG_KEY = \"kg_config\"\n\n# NOTE: we use this timeout / 4 in various places to refresh a lock\n# might be worth separating this timeout into separate timeouts for each situation\nCELERY_GENERIC_BEAT_LOCK_TIMEOUT = 120\n\nCELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120\n\n\nCELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120\n\n\n# hard timeout applied by the watchdog to the indexing connector run\n# to handle hung connectors\nCELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT = 3 * 60 * 60  # 3 hours (in seconds)\n\n# soft timeout for the lock taken by the indexing connector run\n# allows the lock to eventually expire if the managing code around it dies\n# if we can get callbacks as object bytes download, we could lower this a lot.\n# CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 15 minutes\n# hard termination should always fire first if the connector is hung\nCELERY_INDEXING_LOCK_TIMEOUT = CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 900\n\n# Heartbeat interval for indexing worker liveness detection\nINDEXING_WORKER_HEARTBEAT_INTERVAL = 30  # seconds\n\n# how long a task should wait for associated fence to be ready\nCELERY_TASK_WAIT_FOR_FENCE_TIMEOUT = 5 * 60  # 5 min\n\n# needs to be long enough to cover the maximum time it takes to download an object\n# if we can get callbacks as object bytes download, we could lower this a lot.\nCELERY_PRUNING_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)\n\nCELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)\n\nCELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300  # 5 min\n\nCELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT = 30 * 60  # 30 minutes (in seconds)\n\n# How long a queued user-file task is valid before workers discard it.\n# Should be longer than the beat interval (20 s) but short enough to prevent\n# indefinite queue growth.  Workers drop tasks older than this without touching\n# the DB, so a shorter value = faster drain of stale duplicates.\nCELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60  # 1 minute (in seconds)\n\n# Maximum number of tasks allowed in the user-file-processing queue before the\n# beat generator stops adding more.  Prevents unbounded queue growth when workers\n# fall behind.\nUSER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500\n# How long a queued user-file-project-sync task remains valid.\n# Should be short enough to discard stale queue entries under load while still\n# allowing workers enough time to pick up new tasks.\nCELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES = 60  # 1 minute (in seconds)\n\n# Max queue depth before user-file-project-sync producers stop enqueuing.\n# This applies backpressure when workers are falling behind.\nUSER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500\n\nCELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)\n\n# How long a queued user-file-delete task is valid before workers discard it.\n# Mirrors the processing task expiry to prevent indefinite queue growth when\n# files are stuck in DELETING status and the beat keeps re-enqueuing them.\nCELERY_USER_FILE_DELETE_TASK_EXPIRES = 60  # 1 minute (in seconds)\n\n# Max queue depth before the delete beat stops enqueuing more delete tasks.\nUSER_FILE_DELETE_MAX_QUEUE_DEPTH = 500\n\nCELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)\n\nDANSWER_REDIS_FUNCTION_LOCK_PREFIX = \"da_function_lock:\"\n\nTMP_DRALPHA_PERSONA_NAME = \"KG Beta\"\n\n\nclass DocumentSource(str, Enum):\n    # Special case, document passed in via Onyx APIs without specifying a source type\n    INGESTION_API = \"ingestion_api\"\n    SLACK = \"slack\"\n    WEB = \"web\"\n    GOOGLE_DRIVE = \"google_drive\"\n    GMAIL = \"gmail\"\n    REQUESTTRACKER = \"requesttracker\"\n    GITHUB = \"github\"\n    GITBOOK = \"gitbook\"\n    GITLAB = \"gitlab\"\n    GURU = \"guru\"\n    BOOKSTACK = \"bookstack\"\n    OUTLINE = \"outline\"\n    CONFLUENCE = \"confluence\"\n    JIRA = \"jira\"\n    SLAB = \"slab\"\n    PRODUCTBOARD = \"productboard\"\n    FILE = \"file\"\n    CODA = \"coda\"\n    CANVAS = \"canvas\"\n    NOTION = \"notion\"\n    ZULIP = \"zulip\"\n    LINEAR = \"linear\"\n    HUBSPOT = \"hubspot\"\n    DOCUMENT360 = \"document360\"\n    GONG = \"gong\"\n    GOOGLE_SITES = \"google_sites\"\n    ZENDESK = \"zendesk\"\n    LOOPIO = \"loopio\"\n    DROPBOX = \"dropbox\"\n    SHAREPOINT = \"sharepoint\"\n    TEAMS = \"teams\"\n    SALESFORCE = \"salesforce\"\n    DISCOURSE = \"discourse\"\n    AXERO = \"axero\"\n    CLICKUP = \"clickup\"\n    MEDIAWIKI = \"mediawiki\"\n    WIKIPEDIA = \"wikipedia\"\n    ASANA = \"asana\"\n    S3 = \"s3\"\n    R2 = \"r2\"\n    GOOGLE_CLOUD_STORAGE = \"google_cloud_storage\"\n    OCI_STORAGE = \"oci_storage\"\n    XENFORO = \"xenforo\"\n    NOT_APPLICABLE = \"not_applicable\"\n    DISCORD = \"discord\"\n    FRESHDESK = \"freshdesk\"\n    FIREFLIES = \"fireflies\"\n    EGNYTE = \"egnyte\"\n    AIRTABLE = \"airtable\"\n    HIGHSPOT = \"highspot\"\n    DRUPAL_WIKI = \"drupal_wiki\"\n\n    IMAP = \"imap\"\n    BITBUCKET = \"bitbucket\"\n    TESTRAIL = \"testrail\"\n\n    # Special case just for integration tests\n    MOCK_CONNECTOR = \"mock_connector\"\n    # Special case for user files\n    USER_FILE = \"user_file\"\n    # Raw files for Craft sandbox access (xlsx, pptx, docx, etc.)\n    # Uses RAW_BINARY processing mode - no text extraction\n    CRAFT_FILE = \"craft_file\"\n\n\nclass FederatedConnectorSource(str, Enum):\n    FEDERATED_SLACK = \"federated_slack\"\n\n    def to_non_federated_source(self) -> DocumentSource | None:\n        if self == FederatedConnectorSource.FEDERATED_SLACK:\n            return DocumentSource.SLACK\n        return None\n\n\nDocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]\n\n\nclass NotificationType(str, Enum):\n    REINDEX = \"reindex\"\n    PERSONA_SHARED = \"persona_shared\"\n    TRIAL_ENDS_TWO_DAYS = \"two_day_trial_ending\"  # 2 days left in trial\n    RELEASE_NOTES = \"release_notes\"\n    ASSISTANT_FILES_READY = \"assistant_files_ready\"\n    FEATURE_ANNOUNCEMENT = \"feature_announcement\"\n\n\nclass BlobType(str, Enum):\n    R2 = \"r2\"\n    S3 = \"s3\"\n    GOOGLE_CLOUD_STORAGE = \"google_cloud_storage\"\n    OCI_STORAGE = \"oci_storage\"\n\n\nclass DocumentIndexType(str, Enum):\n    COMBINED = \"combined\"  # Vespa\n    SPLIT = \"split\"  # Typesense + Qdrant\n\n\nclass AuthType(str, Enum):\n    BASIC = \"basic\"\n    GOOGLE_OAUTH = \"google_oauth\"\n    OIDC = \"oidc\"\n    SAML = \"saml\"\n\n    # google auth and basic\n    CLOUD = \"cloud\"\n\n\nclass QueryHistoryType(str, Enum):\n    DISABLED = \"disabled\"\n    ANONYMIZED = \"anonymized\"\n    NORMAL = \"normal\"\n\n\n# Special characters for password validation\nPASSWORD_SPECIAL_CHARS = \"!@#$%^&*()_+-=[]{}|;:,.<>?\"\n\n\nclass SessionType(str, Enum):\n    CHAT = \"Chat\"\n    SEARCH = \"Search\"\n    SLACK = \"Slack\"\n\n\nclass QAFeedbackType(str, Enum):\n    LIKE = \"like\"  # User likes the answer, used for metrics\n    DISLIKE = \"dislike\"  # User dislikes the answer, used for metrics\n    MIXED = \"mixed\"  # User likes some answers and dislikes other, used for chat session metrics\n\n\nclass SearchFeedbackType(str, Enum):\n    ENDORSE = \"endorse\"  # boost this document for all future queries\n    REJECT = \"reject\"  # down-boost this document for all future queries\n    HIDE = \"hide\"  # mark this document as untrusted, hide from LLM\n    UNHIDE = \"unhide\"\n\n\nclass MessageType(str, Enum):\n    # Using OpenAI standards, Langchain equivalent shown in comment\n    # System message is always constructed on the fly, not saved\n    SYSTEM = \"system\"  # SystemMessage\n    USER = \"user\"  # HumanMessage\n    ASSISTANT = \"assistant\"  # AIMessage - Can include tool_calls field for parallel tool calling\n    TOOL_CALL_RESPONSE = \"tool_call_response\"\n    USER_REMINDER = \"user_reminder\"  # Custom Onyx message type which is translated into a USER message when passed to the LLM\n\n\nclass ChatMessageSimpleType(str, Enum):\n    USER = \"user\"\n    ASSISTANT = \"assistant\"\n    TOOL_CALL = \"tool_call\"\n    FILE_TEXT = \"file_text\"\n\n\nclass TokenRateLimitScope(str, Enum):\n    USER = \"user\"\n    USER_GROUP = \"user_group\"\n    GLOBAL = \"global\"\n\n\nclass FileStoreType(str, Enum):\n    S3 = \"s3\"\n    POSTGRES = \"postgres\"\n\n\nclass FileOrigin(str, Enum):\n    CHAT_UPLOAD = \"chat_upload\"\n    CHAT_IMAGE_GEN = \"chat_image_gen\"\n    CONNECTOR = \"connector\"\n    CONNECTOR_METADATA = \"connector_metadata\"\n    GENERATED_REPORT = \"generated_report\"\n    INDEXING_CHECKPOINT = \"indexing_checkpoint\"\n    PLAINTEXT_CACHE = \"plaintext_cache\"\n    OTHER = \"other\"\n    QUERY_HISTORY_CSV = \"query_history_csv\"\n    SANDBOX_SNAPSHOT = \"sandbox_snapshot\"\n    USER_FILE = \"user_file\"\n\n\nclass FileType(str, Enum):\n    CSV = \"text/csv\"\n\n\nclass MilestoneRecordType(str, Enum):\n    TENANT_CREATED = \"tenant_created\"\n    USER_SIGNED_UP = \"user_signed_up\"\n    VISITED_ADMIN_PAGE = \"visited_admin_page\"\n    CREATED_CONNECTOR = \"created_connector\"\n    CONNECTOR_SUCCEEDED = \"connector_succeeded\"\n    RAN_QUERY = \"ran_query\"\n    USER_MESSAGE_SENT = \"user_message_sent\"\n    MULTIPLE_ASSISTANTS = \"multiple_assistants\"\n    CREATED_ASSISTANT = \"created_assistant\"\n    CREATED_ONYX_BOT = \"created_onyx_bot\"\n    REQUESTED_CONNECTOR = \"requested_connector\"\n\n\nclass PostgresAdvisoryLocks(Enum):\n    KOMBU_MESSAGE_CLEANUP_LOCK_ID = auto()\n\n\nclass OnyxCeleryQueues:\n    # \"celery\" is the default queue defined by celery and also the queue\n    # we are running in the primary worker to run system tasks\n    # Tasks running in this queue should be designed specifically to run quickly\n    PRIMARY = \"celery\"\n\n    # Light queue\n    VESPA_METADATA_SYNC = \"vespa_metadata_sync\"\n    DOC_PERMISSIONS_UPSERT = \"doc_permissions_upsert\"\n    CONNECTOR_DELETION = \"connector_deletion\"\n    LLM_MODEL_UPDATE = \"llm_model_update\"\n    CHECKPOINT_CLEANUP = \"checkpoint_cleanup\"\n    INDEX_ATTEMPT_CLEANUP = \"index_attempt_cleanup\"\n    # Heavy queue\n    CONNECTOR_PRUNING = \"connector_pruning\"\n    CONNECTOR_DOC_PERMISSIONS_SYNC = \"connector_doc_permissions_sync\"\n    CONNECTOR_EXTERNAL_GROUP_SYNC = \"connector_external_group_sync\"\n    CONNECTOR_HIERARCHY_FETCHING = \"connector_hierarchy_fetching\"\n    CSV_GENERATION = \"csv_generation\"\n\n    # User file processing queue\n    USER_FILE_PROCESSING = \"user_file_processing\"\n    USER_FILE_PROJECT_SYNC = \"user_file_project_sync\"\n    USER_FILE_DELETE = \"user_file_delete\"\n    # Document processing pipeline queue\n    DOCPROCESSING = \"docprocessing\"\n    CONNECTOR_DOC_FETCHING = \"connector_doc_fetching\"\n\n    # Monitoring queue\n    MONITORING = \"monitoring\"\n\n    # Sandbox processing queue\n    SANDBOX = \"sandbox\"\n\n    OPENSEARCH_MIGRATION = \"opensearch_migration\"\n\n\nclass OnyxRedisLocks:\n    PRIMARY_WORKER = \"da_lock:primary_worker\"\n    CHECK_VESPA_SYNC_BEAT_LOCK = \"da_lock:check_vespa_sync_beat\"\n    CHECK_CONNECTOR_DELETION_BEAT_LOCK = \"da_lock:check_connector_deletion_beat\"\n    CHECK_PRUNE_BEAT_LOCK = \"da_lock:check_prune_beat\"\n    CHECK_HIERARCHY_FETCHING_BEAT_LOCK = \"da_lock:check_hierarchy_fetching_beat\"\n    CHECK_INDEXING_BEAT_LOCK = \"da_lock:check_indexing_beat\"\n    CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK = \"da_lock:check_checkpoint_cleanup_beat\"\n    CHECK_INDEX_ATTEMPT_CLEANUP_BEAT_LOCK = \"da_lock:check_index_attempt_cleanup_beat\"\n    CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK = (\n        \"da_lock:check_connector_doc_permissions_sync_beat\"\n    )\n    CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (\n        \"da_lock:check_connector_external_group_sync_beat\"\n    )\n    OPENSEARCH_MIGRATION_BEAT_LOCK = \"da_lock:opensearch_migration_beat\"\n\n    MONITOR_BACKGROUND_PROCESSES_LOCK = \"da_lock:monitor_background_processes\"\n    CHECK_AVAILABLE_TENANTS_LOCK = \"da_lock:check_available_tenants\"\n    CLOUD_PRE_PROVISION_TENANT_LOCK = \"da_lock:pre_provision_tenant\"\n\n    CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX = (\n        \"da_lock:connector_doc_permissions_sync\"\n    )\n    CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX = \"da_lock:connector_external_group_sync\"\n    PRUNING_LOCK_PREFIX = \"da_lock:pruning\"\n    INDEXING_METADATA_PREFIX = \"da_metadata:indexing\"\n\n    SLACK_BOT_LOCK = \"da_lock:slack_bot\"\n    SLACK_BOT_HEARTBEAT_PREFIX = \"da_heartbeat:slack_bot\"\n    ANONYMOUS_USER_ENABLED = \"anonymous_user_enabled\"\n\n    CLOUD_BEAT_TASK_GENERATOR_LOCK = \"da_lock:cloud_beat_task_generator\"\n    CLOUD_CHECK_ALEMBIC_BEAT_LOCK = \"da_lock:cloud_check_alembic\"\n\n    # User file processing\n    USER_FILE_PROCESSING_BEAT_LOCK = \"da_lock:check_user_file_processing_beat\"\n    USER_FILE_PROCESSING_LOCK_PREFIX = \"da_lock:user_file_processing\"\n    # Short-lived key set when a task is enqueued; cleared when the worker picks it up.\n    # Prevents the beat from re-enqueuing the same file while a task is already queued.\n    USER_FILE_QUEUED_PREFIX = \"da_lock:user_file_queued\"\n    USER_FILE_PROJECT_SYNC_BEAT_LOCK = \"da_lock:check_user_file_project_sync_beat\"\n    USER_FILE_PROJECT_SYNC_LOCK_PREFIX = \"da_lock:user_file_project_sync\"\n    USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = \"da_lock:user_file_project_sync_queued\"\n    USER_FILE_DELETE_BEAT_LOCK = \"da_lock:check_user_file_delete_beat\"\n    USER_FILE_DELETE_LOCK_PREFIX = \"da_lock:user_file_delete\"\n    # Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.\n    # Prevents the beat from re-enqueuing the same file while a delete task is already queued.\n    USER_FILE_DELETE_QUEUED_PREFIX = \"da_lock:user_file_delete_queued\"\n\n    # Release notes\n    RELEASE_NOTES_FETCH_LOCK = \"da_lock:release_notes_fetch\"\n\n    # Sandbox cleanup\n    CLEANUP_IDLE_SANDBOXES_BEAT_LOCK = \"da_lock:cleanup_idle_sandboxes_beat\"\n    CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK = \"da_lock:cleanup_old_snapshots_beat\"\n\n    # Sandbox file sync\n    SANDBOX_FILE_SYNC_LOCK_PREFIX = \"da_lock:sandbox_file_sync\"\n\n\nclass OnyxRedisSignals:\n    BLOCK_VALIDATE_INDEXING_FENCES = \"signal:block_validate_indexing_fences\"\n    BLOCK_VALIDATE_EXTERNAL_GROUP_SYNC_FENCES = (\n        \"signal:block_validate_external_group_sync_fences\"\n    )\n    BLOCK_VALIDATE_PERMISSION_SYNC_FENCES = (\n        \"signal:block_validate_permission_sync_fences\"\n    )\n    BLOCK_PRUNING = \"signal:block_pruning\"\n    BLOCK_VALIDATE_PRUNING_FENCES = \"signal:block_validate_pruning_fences\"\n    BLOCK_BUILD_FENCE_LOOKUP_TABLE = \"signal:block_build_fence_lookup_table\"\n    BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES = (\n        \"signal:block_validate_connector_deletion_fences\"\n    )\n\n\nclass OnyxRedisConstants:\n    ACTIVE_FENCES = \"active_fences\"\n\n\nclass OnyxCeleryPriority(int, Enum):\n    HIGHEST = 0\n    HIGH = auto()\n    MEDIUM = auto()\n    LOW = auto()\n    LOWEST = auto()\n\n\n# a prefix used to distinguish system wide tasks in the cloud\nONYX_CLOUD_CELERY_TASK_PREFIX = \"cloud\"\n\n# the tenant id we use for system level redis operations\nONYX_CLOUD_TENANT_ID = \"cloud\"\n\n# the redis namespace for runtime variables\nONYX_CLOUD_REDIS_RUNTIME = \"runtime\"\nCLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT = 600\n\n\nclass OnyxCeleryTask:\n    DEFAULT = \"celery\"\n\n    CLOUD_BEAT_TASK_GENERATOR = f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_generate_beat_tasks\"\n    CLOUD_MONITOR_ALEMBIC = f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_alembic\"\n    CLOUD_MONITOR_CELERY_QUEUES = (\n        f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_queues\"\n    )\n    CLOUD_CHECK_AVAILABLE_TENANTS = (\n        f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check_available_tenants\"\n    )\n    CLOUD_MONITOR_CELERY_PIDBOX = (\n        f\"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_pidbox\"\n    )\n\n    CHECK_FOR_CONNECTOR_DELETION = \"check_for_connector_deletion_task\"\n    CHECK_FOR_VESPA_SYNC_TASK = \"check_for_vespa_sync_task\"\n    CHECK_FOR_INDEXING = \"check_for_indexing\"\n    CHECK_FOR_PRUNING = \"check_for_pruning\"\n    CHECK_FOR_HIERARCHY_FETCHING = \"check_for_hierarchy_fetching\"\n    CHECK_FOR_DOC_PERMISSIONS_SYNC = \"check_for_doc_permissions_sync\"\n    CHECK_FOR_EXTERNAL_GROUP_SYNC = \"check_for_external_group_sync\"\n    CHECK_FOR_AUTO_LLM_UPDATE = \"check_for_auto_llm_update\"\n\n    # User file processing\n    CHECK_FOR_USER_FILE_PROCESSING = \"check_for_user_file_processing\"\n    PROCESS_SINGLE_USER_FILE = \"process_single_user_file\"\n    CHECK_FOR_USER_FILE_PROJECT_SYNC = \"check_for_user_file_project_sync\"\n    PROCESS_SINGLE_USER_FILE_PROJECT_SYNC = \"process_single_user_file_project_sync\"\n    CHECK_FOR_USER_FILE_DELETE = \"check_for_user_file_delete\"\n    DELETE_SINGLE_USER_FILE = \"delete_single_user_file\"\n\n    # Connector checkpoint cleanup\n    CHECK_FOR_CHECKPOINT_CLEANUP = \"check_for_checkpoint_cleanup\"\n    CLEANUP_CHECKPOINT = \"cleanup_checkpoint\"\n\n    # Connector index attempt cleanup\n    CHECK_FOR_INDEX_ATTEMPT_CLEANUP = \"check_for_index_attempt_cleanup\"\n    CLEANUP_INDEX_ATTEMPT = \"cleanup_index_attempt\"\n\n    MONITOR_BACKGROUND_PROCESSES = \"monitor_background_processes\"\n    MONITOR_CELERY_QUEUES = \"monitor_celery_queues\"\n    MONITOR_PROCESS_MEMORY = \"monitor_process_memory\"\n    CELERY_BEAT_HEARTBEAT = \"celery_beat_heartbeat\"\n\n    KOMBU_MESSAGE_CLEANUP_TASK = \"kombu_message_cleanup_task\"\n    CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK = (\n        \"connector_permission_sync_generator_task\"\n    )\n    UPDATE_EXTERNAL_DOCUMENT_PERMISSIONS_TASK = (\n        \"update_external_document_permissions_task\"\n    )\n    CONNECTOR_EXTERNAL_GROUP_SYNC_GENERATOR_TASK = (\n        \"connector_external_group_sync_generator_task\"\n    )\n\n    # New split indexing tasks\n    CONNECTOR_DOC_FETCHING_TASK = \"connector_doc_fetching_task\"\n    DOCPROCESSING_TASK = \"docprocessing_task\"\n\n    CONNECTOR_PRUNING_GENERATOR_TASK = \"connector_pruning_generator_task\"\n    CONNECTOR_HIERARCHY_FETCHING_TASK = \"connector_hierarchy_fetching_task\"\n    DOCUMENT_BY_CC_PAIR_CLEANUP_TASK = \"document_by_cc_pair_cleanup_task\"\n    VESPA_METADATA_SYNC_TASK = \"vespa_metadata_sync_task\"\n\n    # chat retention\n    CHECK_TTL_MANAGEMENT_TASK = \"check_ttl_management_task\"\n    PERFORM_TTL_MANAGEMENT_TASK = \"perform_ttl_management_task\"\n\n    GENERATE_USAGE_REPORT_TASK = \"generate_usage_report_task\"\n\n    EVAL_RUN_TASK = \"eval_run_task\"\n    SCHEDULED_EVAL_TASK = \"scheduled_eval_task\"\n\n    EXPORT_QUERY_HISTORY_TASK = \"export_query_history_task\"\n    EXPORT_QUERY_HISTORY_CLEANUP_TASK = \"export_query_history_cleanup_task\"\n\n    # Hook execution log retention\n    HOOK_EXECUTION_LOG_CLEANUP_TASK = \"hook_execution_log_cleanup_task\"\n\n    # Sandbox cleanup\n    CLEANUP_IDLE_SANDBOXES = \"cleanup_idle_sandboxes\"\n    CLEANUP_OLD_SNAPSHOTS = \"cleanup_old_snapshots\"\n\n    # Sandbox file sync\n    SANDBOX_FILE_SYNC = \"sandbox_file_sync\"\n\n    CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK = (\n        \"check_for_documents_for_opensearch_migration_task\"\n    )\n    MIGRATE_DOCUMENTS_FROM_VESPA_TO_OPENSEARCH_TASK = (\n        \"migrate_documents_from_vespa_to_opensearch_task\"\n    )\n    MIGRATE_CHUNKS_FROM_VESPA_TO_OPENSEARCH_TASK = (\n        \"migrate_chunks_from_vespa_to_opensearch_task\"\n    )\n\n\n# this needs to correspond to the matching entry in supervisord\nONYX_CELERY_BEAT_HEARTBEAT_KEY = \"onyx:celery:beat:heartbeat\"\n\nREDIS_SOCKET_KEEPALIVE_OPTIONS = {}\nREDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15\nREDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3\n\nif platform.system() == \"Darwin\":\n    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60  # type: ignore[attr-defined,unused-ignore]\nelse:\n    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60  # type: ignore[attr-defined,unused-ignore]\n\n\nclass OnyxCallTypes(str, Enum):\n    FIREFLIES = \"FIREFLIES\"\n    GONG = \"GONG\"\n\n\nNUM_DAYS_TO_KEEP_CHECKPOINTS = 7\n# checkpoints are queried based on index attempts, so we need to keep index attempts for one more day\nNUM_DAYS_TO_KEEP_INDEX_ATTEMPTS = NUM_DAYS_TO_KEEP_CHECKPOINTS + 1\n\n# TODO: this should be stored likely in database\nDocumentSourceDescription: dict[DocumentSource, str] = {\n    # Special case, document passed in via Onyx APIs without specifying a source type\n    DocumentSource.INGESTION_API: \"ingestion_api\",\n    DocumentSource.SLACK: \"slack channels for discussions and collaboration\",\n    DocumentSource.WEB: \"indexed web pages\",\n    DocumentSource.GOOGLE_DRIVE: \"google drive documents (docs, sheets, etc.)\",\n    DocumentSource.GMAIL: \"email messages\",\n    DocumentSource.REQUESTTRACKER: \"requesttracker\",\n    DocumentSource.GITHUB: \"github data (issues, PRs)\",\n    DocumentSource.GITBOOK: \"gitbook data\",\n    DocumentSource.GITLAB: \"gitlab data\",\n    DocumentSource.BITBUCKET: \"bitbucket data\",\n    DocumentSource.GURU: \"guru data\",\n    DocumentSource.BOOKSTACK: \"bookstack data\",\n    DocumentSource.OUTLINE: \"outline data\",\n    DocumentSource.CONFLUENCE: \"confluence data (pages, spaces, etc.)\",\n    DocumentSource.JIRA: \"jira data (issues, tickets, projects, etc.)\",\n    DocumentSource.SLAB: \"slab data\",\n    DocumentSource.PRODUCTBOARD: \"productboard data (boards, etc.)\",\n    DocumentSource.FILE: \"files\",\n    DocumentSource.CANVAS: \"canvas lms - courses, pages, assignments, and announcements\",\n    DocumentSource.CODA: \"coda - team workspace with docs, tables, and pages\",\n    DocumentSource.NOTION: \"notion data - a workspace that combines note-taking, \\\nproject management, and collaboration tools into a single, customizable platform\",\n    DocumentSource.ZULIP: \"zulip data\",\n    DocumentSource.LINEAR: \"linear data - project management tool, including tickets etc.\",\n    DocumentSource.HUBSPOT: \"hubspot data - CRM and marketing automation data\",\n    DocumentSource.DOCUMENT360: \"document360 data\",\n    DocumentSource.GONG: \"gong - call transcripts\",\n    DocumentSource.GOOGLE_SITES: \"google_sites - websites\",\n    DocumentSource.ZENDESK: \"zendesk - customer support data\",\n    DocumentSource.LOOPIO: \"loopio - rfp data\",\n    DocumentSource.DROPBOX: \"dropbox - files\",\n    DocumentSource.SHAREPOINT: \"sharepoint - files\",\n    DocumentSource.TEAMS: \"teams - chat and collaboration\",\n    DocumentSource.SALESFORCE: \"salesforce - CRM data\",\n    DocumentSource.DISCOURSE: \"discourse - discussion forums\",\n    DocumentSource.AXERO: \"axero - employee engagement data\",\n    DocumentSource.CLICKUP: \"clickup - project management tool\",\n    DocumentSource.MEDIAWIKI: \"mediawiki - wiki data\",\n    DocumentSource.WIKIPEDIA: \"wikipedia - encyclopedia data\",\n    DocumentSource.ASANA: \"asana\",\n    DocumentSource.S3: \"s3\",\n    DocumentSource.R2: \"r2\",\n    DocumentSource.GOOGLE_CLOUD_STORAGE: \"google_cloud_storage - cloud storage\",\n    DocumentSource.OCI_STORAGE: \"oci_storage - cloud storage\",\n    DocumentSource.XENFORO: \"xenforo - forum data\",\n    DocumentSource.DISCORD: \"discord - chat and collaboration\",\n    DocumentSource.FRESHDESK: \"freshdesk - customer support data\",\n    DocumentSource.FIREFLIES: \"fireflies - call transcripts\",\n    DocumentSource.EGNYTE: \"egnyte - files\",\n    DocumentSource.AIRTABLE: \"airtable - database\",\n    DocumentSource.HIGHSPOT: \"highspot - CRM data\",\n    DocumentSource.DRUPAL_WIKI: \"drupal wiki - knowledge base content (pages, spaces, attachments)\",\n    DocumentSource.IMAP: \"imap - email data\",\n    DocumentSource.TESTRAIL: \"testrail - test case management tool for QA processes\",\n}\n"
  },
  {
    "path": "backend/onyx/configs/embedding_configs.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.db.enums import EmbeddingPrecision\n\n\nclass _BaseEmbeddingModel(BaseModel):\n    \"\"\"Private model for defining base embedding model configurations.\"\"\"\n\n    name: str\n    dim: int\n    index_name: str\n\n\nclass SupportedEmbeddingModel(BaseModel):\n    name: str\n    dim: int\n    index_name: str\n    embedding_precision: EmbeddingPrecision\n\n\n# Base embedding model configurations (without precision)\n_BASE_EMBEDDING_MODELS = [\n    # Cloud-based models\n    _BaseEmbeddingModel(\n        name=\"cohere/embed-english-v3.0\",\n        dim=1024,\n        index_name=\"danswer_chunk_cohere_embed_english_v3_0\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"cohere/embed-english-v3.0\",\n        dim=1024,\n        index_name=\"danswer_chunk_embed_english_v3_0\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"cohere/embed-english-light-v3.0\",\n        dim=384,\n        index_name=\"danswer_chunk_cohere_embed_english_light_v3_0\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"cohere/embed-english-light-v3.0\",\n        dim=384,\n        index_name=\"danswer_chunk_embed_english_light_v3_0\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"openai/text-embedding-3-large\",\n        dim=3072,\n        index_name=\"danswer_chunk_openai_text_embedding_3_large\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"openai/text-embedding-3-large\",\n        dim=3072,\n        index_name=\"danswer_chunk_text_embedding_3_large\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"openai/text-embedding-3-small\",\n        dim=1536,\n        index_name=\"danswer_chunk_openai_text_embedding_3_small\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"openai/text-embedding-3-small\",\n        dim=1536,\n        index_name=\"danswer_chunk_text_embedding_3_small\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"google/gemini-embedding-001\",\n        dim=3072,\n        index_name=\"danswer_chunk_gemini_embedding_001\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"google/text-embedding-005\",\n        dim=768,\n        index_name=\"danswer_chunk_text_embedding_005\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"voyage/voyage-large-2-instruct\",\n        dim=1024,\n        index_name=\"danswer_chunk_voyage_large_2_instruct\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"voyage/voyage-large-2-instruct\",\n        dim=1024,\n        index_name=\"danswer_chunk_large_2_instruct\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"voyage/voyage-light-2-instruct\",\n        dim=384,\n        index_name=\"danswer_chunk_voyage_light_2_instruct\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"voyage/voyage-light-2-instruct\",\n        dim=384,\n        index_name=\"danswer_chunk_light_2_instruct\",\n    ),\n    # Self-hosted models\n    _BaseEmbeddingModel(\n        name=\"nomic-ai/nomic-embed-text-v1\",\n        dim=768,\n        index_name=\"danswer_chunk_nomic_ai_nomic_embed_text_v1\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"nomic-ai/nomic-embed-text-v1\",\n        dim=768,\n        index_name=\"danswer_chunk_nomic_embed_text_v1\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"intfloat/e5-base-v2\",\n        dim=768,\n        index_name=\"danswer_chunk_intfloat_e5_base_v2\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"intfloat/e5-small-v2\",\n        dim=384,\n        index_name=\"danswer_chunk_intfloat_e5_small_v2\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"intfloat/multilingual-e5-base\",\n        dim=768,\n        index_name=\"danswer_chunk_intfloat_multilingual_e5_base\",\n    ),\n    _BaseEmbeddingModel(\n        name=\"intfloat/multilingual-e5-small\",\n        dim=384,\n        index_name=\"danswer_chunk_intfloat_multilingual_e5_small\",\n    ),\n]\n\n# Automatically generate both FLOAT and BFLOAT16 versions of all models\nSUPPORTED_EMBEDDING_MODELS = [\n    # BFLOAT16 precision versions\n    *[\n        SupportedEmbeddingModel(\n            name=model.name,\n            dim=model.dim,\n            index_name=f\"{model.index_name}_bfloat16\",\n            embedding_precision=EmbeddingPrecision.BFLOAT16,\n        )\n        for model in _BASE_EMBEDDING_MODELS\n    ],\n    # FLOAT precision versions\n    # NOTE: need to keep this one for backwards compatibility. We now default to\n    # BFLOAT16.\n    *[\n        SupportedEmbeddingModel(\n            name=model.name,\n            dim=model.dim,\n            index_name=model.index_name,\n            embedding_precision=EmbeddingPrecision.FLOAT,\n        )\n        for model in _BASE_EMBEDDING_MODELS\n    ],\n]\n"
  },
  {
    "path": "backend/onyx/configs/kg_configs.py",
    "content": "import os\n\nKG_RESEARCH_NUM_RETRIEVED_DOCS: int = int(\n    os.environ.get(\"KG_RESEARCH_NUM_RETRIEVED_DOCS\", \"25\")\n)\n\n\nKG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES: int = int(\n    os.environ.get(\"KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES\", \"10\")\n)\n\n\nKG_ENTITY_EXTRACTION_TIMEOUT: int = int(\n    os.environ.get(\"KG_ENTITY_EXTRACTION_TIMEOUT\", \"15\")\n)\n\nKG_RELATIONSHIP_EXTRACTION_TIMEOUT: int = int(\n    os.environ.get(\"KG_RELATIONSHIP_EXTRACTION_TIMEOUT\", \"15\")\n)\n\nKG_STRATEGY_GENERATION_TIMEOUT: int = int(\n    os.environ.get(\"KG_STRATEGY_GENERATION_TIMEOUT\", \"20\")\n)\n\nKG_SQL_GENERATION_TIMEOUT: int = int(os.environ.get(\"KG_SQL_GENERATION_TIMEOUT\", \"40\"))\n\nKG_SQL_GENERATION_TIMEOUT_OVERRIDE: int = int(\n    os.environ.get(\"KG_SQL_GENERATION_TIMEOUT_OVERRIDE\", \"40\")\n)\n\nKG_SQL_GENERATION_MAX_TOKENS: int = int(\n    os.environ.get(\"KG_SQL_GENERATION_MAX_TOKENS\", \"1500\")\n)\n\nKG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX: str = os.environ.get(\n    \"KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX\", \"allowed_docs\"\n)\n\nKG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX: str = os.environ.get(\n    \"KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX\", \"kg_relationships_with_access\"\n)\n\nKG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX: str = os.environ.get(\n    \"KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX\", \"kg_entities_with_access\"\n)\n\n\nKG_FILTER_CONSTRUCTION_TIMEOUT: int = int(\n    os.environ.get(\"KG_FILTER_CONSTRUCTION_TIMEOUT\", \"15\")\n)\n\n\nKG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT: int = int(\n    os.environ.get(\"KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT\", \"100\")\n)\n\nKG_FILTERED_SEARCH_TIMEOUT: int = int(\n    os.environ.get(\"KG_FILTERED_SEARCH_TIMEOUT\", \"30\")\n)\n\n\nKG_OBJECT_SOURCE_RESEARCH_TIMEOUT: int = int(\n    os.environ.get(\"KG_OBJECT_SOURCE_RESEARCH_TIMEOUT\", \"30\")\n)\n\nKG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION: int = int(\n    os.environ.get(\"KG_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION\", \"45\")\n)\n\nKG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION: int = int(\n    os.environ.get(\"KG_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION\", \"15\")\n)\n\nKG_MAX_TOKENS_ANSWER_GENERATION: int = int(\n    os.environ.get(\"KG_MAX_TOKENS_ANSWER_GENERATION\", \"1024\")\n)\n\nKG_MAX_DEEP_SEARCH_RESULTS: int = int(\n    os.environ.get(\"KG_MAX_DEEP_SEARCH_RESULTS\", \"30\")\n)\n\n\nKG_METADATA_TRACKING_THRESHOLD: int = int(\n    os.environ.get(\"KG_METADATA_TRACKING_THRESHOLD\", \"10\")\n)\n\n\nKG_DEFAULT_MAX_PARENT_RECURSION_DEPTH: int = int(\n    os.environ.get(\"KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH\", \"2\")\n)\n\n\n_KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT: float = max(\n    1e-3,\n    min(1, float(os.environ.get(\"KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT\", \"0.25\"))),\n)\n_KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT: float = max(\n    1e-3,\n    min(1, float(os.environ.get(\"KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT\", \"0.25\"))),\n)\n_KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT: float = max(\n    1e-3,\n    min(1, float(os.environ.get(\"KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT\", \"0.5\"))),\n)\n_KG_NORMALIZATION_RERANK_NGRAM_SUMS: float = (\n    _KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT\n    + _KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT\n    + _KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT\n)\n\nKG_NORMALIZATION_RERANK_NGRAM_WEIGHTS: tuple[float, float, float] = (\n    _KG_NORMALIZATION_RERANK_UNIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,\n    _KG_NORMALIZATION_RERANK_BIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,\n    _KG_NORMALIZATION_RERANK_TRIGRAM_WEIGHT / _KG_NORMALIZATION_RERANK_NGRAM_SUMS,\n)\n\n\nKG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT: float = max(\n    0,\n    min(1, float(os.environ.get(\"KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT\", \"0.25\"))),\n)\n\n\nKG_NORMALIZATION_RERANK_THRESHOLD: float = float(\n    os.environ.get(\"KG_NORMALIZATION_RERANK_THRESHOLD\", \"0.3\")\n)\n\n\nKG_CLUSTERING_RETRIEVE_THRESHOLD: float = float(\n    os.environ.get(\"KG_CLUSTERING_RETRIEVE_THRESHOLD\", \"0.6\")\n)\n\n\nKG_CLUSTERING_THRESHOLD: float = float(\n    os.environ.get(\"KG_CLUSTERING_THRESHOLD\", \"0.96\")\n)\n\nKG_MAX_SEARCH_DOCUMENTS: int = int(os.environ.get(\"KG_MAX_SEARCH_DOCUMENTS\", \"15\"))\n\nKG_MAX_DECOMPOSITION_SEGMENTS: int = int(\n    os.environ.get(\"KG_MAX_DECOMPOSITION_SEGMENTS\", \"10\")\n)\nKG_BETA_ASSISTANT_DESCRIPTION = \"The KG Beta assistant uses the Onyx Knowledge Graph (beta) structure \\\nto answer questions\"\n"
  },
  {
    "path": "backend/onyx/configs/llm_configs.py",
    "content": "from onyx.configs.app_configs import DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB\nfrom onyx.server.settings.store import load_settings\n\n\ndef get_image_extraction_and_analysis_enabled() -> bool:\n    \"\"\"Get image extraction and analysis enabled setting from workspace settings or fallback to False\"\"\"\n    try:\n        settings = load_settings()\n        if settings.image_extraction_and_analysis_enabled is not None:\n            return settings.image_extraction_and_analysis_enabled\n    except Exception:\n        pass\n\n    return False\n\n\ndef get_search_time_image_analysis_enabled() -> bool:\n    \"\"\"Get search time image analysis enabled setting from workspace settings or fallback to False\"\"\"\n    try:\n        settings = load_settings()\n        if settings.search_time_image_analysis_enabled is not None:\n            return settings.search_time_image_analysis_enabled\n    except Exception:\n        pass\n\n    return False\n\n\ndef get_image_analysis_max_size_mb() -> int:\n    \"\"\"Get image analysis max size MB setting from workspace settings or fallback to environment variable\"\"\"\n    try:\n        settings = load_settings()\n        if settings.image_analysis_max_size_mb is not None:\n            return settings.image_analysis_max_size_mb\n    except Exception:\n        pass\n\n    return DEFAULT_IMAGE_ANALYSIS_MAX_SIZE_MB\n"
  },
  {
    "path": "backend/onyx/configs/model_configs.py",
    "content": "import json\nimport os\n\n#####\n# Embedding/Reranking Model Configs\n#####\n# Important considerations when choosing models\n# Max tokens count needs to be high considering use case (at least 512)\n# Models used must be MIT or Apache license\n# Inference/Indexing speed\n# https://huggingface.co/DOCUMENT_ENCODER_MODEL\n# The useable models configured as below must be SentenceTransformer compatible\n# NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING\n# IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI\nDEFAULT_DOCUMENT_ENCODER_MODEL = \"nomic-ai/nomic-embed-text-v1\"\nDOCUMENT_ENCODER_MODEL = (\n    os.environ.get(\"DOCUMENT_ENCODER_MODEL\") or DEFAULT_DOCUMENT_ENCODER_MODEL\n)\n# If the below is changed, Vespa deployment must also be changed\nDOC_EMBEDDING_DIM = int(os.environ.get(\"DOC_EMBEDDING_DIM\") or 768)\nNORMALIZE_EMBEDDINGS = (\n    os.environ.get(\"NORMALIZE_EMBEDDINGS\") or \"true\"\n).lower() == \"true\"\n\n# Old default model settings, which are needed for an automatic easy upgrade\nOLD_DEFAULT_DOCUMENT_ENCODER_MODEL = \"thenlper/gte-small\"\nOLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM = 384\nOLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS = False\n\n# These are only used if reranking is turned off, to normalize the direct retrieval scores for display\n# Currently unused\nSIM_SCORE_RANGE_LOW = float(os.environ.get(\"SIM_SCORE_RANGE_LOW\") or 0.0)\nSIM_SCORE_RANGE_HIGH = float(os.environ.get(\"SIM_SCORE_RANGE_HIGH\") or 1.0)\n# Certain models like e5, BGE, etc use a prefix for asymmetric retrievals (query generally shorter than docs)\nASYM_QUERY_PREFIX = os.environ.get(\"ASYM_QUERY_PREFIX\", \"search_query: \")\nASYM_PASSAGE_PREFIX = os.environ.get(\"ASYM_PASSAGE_PREFIX\", \"search_document: \")\n# Purely an optimization, memory limitation consideration\n\n# User's set embedding batch size overrides the default encoding batch sizes\nEMBEDDING_BATCH_SIZE = int(os.environ.get(\"EMBEDDING_BATCH_SIZE\") or 0) or None\n\nBATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8\n# don't send over too many chunks at once, as sending too many could cause timeouts\nBATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512\n# For score display purposes, only way is to know the expected ranges\nCROSS_ENCODER_RANGE_MAX = 1\nCROSS_ENCODER_RANGE_MIN = 0\n\n\n#####\n# Generative AI Model Configs\n#####\n\n# NOTE: the 2 below should only be used for dev.\nGEN_AI_API_KEY = os.environ.get(\"GEN_AI_API_KEY\")\nGEN_AI_MODEL_VERSION = os.environ.get(\"GEN_AI_MODEL_VERSION\")\n\n# Override the auto-detection of LLM max context length\nGEN_AI_MAX_TOKENS = int(os.environ.get(\"GEN_AI_MAX_TOKENS\") or 0) or None\n\n# Set this to be enough for an answer + quotes. Also used for Chat\n# This is the minimum token context we will leave for the LLM to generate an answer\nGEN_AI_NUM_RESERVED_OUTPUT_TOKENS = int(\n    os.environ.get(\"GEN_AI_NUM_RESERVED_OUTPUT_TOKENS\") or 1024\n)\n\n# Fallback token limit for models where the max context is unknown\n# Set conservatively at 32K to handle most modern models\nGEN_AI_MODEL_FALLBACK_MAX_TOKENS = int(\n    os.environ.get(\"GEN_AI_MODEL_FALLBACK_MAX_TOKENS\") or 32000\n)\n\n# This is used when computing how much context space is available for documents\n# ahead of time in order to let the user know if they can \"select\" more documents\n# It represents a maximum \"expected\" number of input tokens from the latest user\n# message. At query time, we don't actually enforce this - we will only throw an\n# error if the total # of tokens exceeds the max input tokens.\nGEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512\nGEN_AI_TEMPERATURE = float(os.environ.get(\"GEN_AI_TEMPERATURE\") or 0)\n\n# should be used if you are using a custom LLM inference provider that doesn't support\n# streaming format AND you are still using the langchain/litellm LLM class\nDISABLE_LITELLM_STREAMING = (\n    os.environ.get(\"DISABLE_LITELLM_STREAMING\") or \"false\"\n).lower() == \"true\"\n\n# extra headers to pass to LiteLLM\nLITELLM_EXTRA_HEADERS: dict[str, str] | None = None\n_LITELLM_EXTRA_HEADERS_RAW = os.environ.get(\"LITELLM_EXTRA_HEADERS\")\nif _LITELLM_EXTRA_HEADERS_RAW:\n    try:\n        LITELLM_EXTRA_HEADERS = json.loads(_LITELLM_EXTRA_HEADERS_RAW)\n    except Exception:\n        # need to import here to avoid circular imports\n        from onyx.utils.logger import setup_logger\n\n        logger = setup_logger()\n        logger.error(\n            \"Failed to parse LITELLM_EXTRA_HEADERS, must be a valid JSON object\"\n        )\n\n# if specified, will pass through request headers to the call to the LLM\nLITELLM_PASS_THROUGH_HEADERS: list[str] | None = None\n_LITELLM_PASS_THROUGH_HEADERS_RAW = os.environ.get(\"LITELLM_PASS_THROUGH_HEADERS\")\nif _LITELLM_PASS_THROUGH_HEADERS_RAW:\n    try:\n        LITELLM_PASS_THROUGH_HEADERS = json.loads(_LITELLM_PASS_THROUGH_HEADERS_RAW)\n    except Exception:\n        # need to import here to avoid circular imports\n        from onyx.utils.logger import setup_logger\n\n        logger = setup_logger()\n        logger.error(\n            \"Failed to parse LITELLM_PASS_THROUGH_HEADERS, must be a valid JSON object\"\n        )\n\n\n# if specified, will merge the specified JSON with the existing body of the\n# request before sending it to the LLM\nLITELLM_EXTRA_BODY: dict | None = None\n_LITELLM_EXTRA_BODY_RAW = os.environ.get(\"LITELLM_EXTRA_BODY\")\nif _LITELLM_EXTRA_BODY_RAW:\n    try:\n        LITELLM_EXTRA_BODY = json.loads(_LITELLM_EXTRA_BODY_RAW)\n    except Exception:\n        pass\n\n#####\n# Prompt Caching Configs\n#####\n# Enable prompt caching framework\nENABLE_PROMPT_CACHING = (\n    os.environ.get(\"ENABLE_PROMPT_CACHING\", \"true\").lower() != \"false\"\n)\n\n# Cache TTL multiplier - store caches slightly longer than provider TTL\n# This allows for some clock skew and ensures we don't lose cache metadata prematurely\nPROMPT_CACHE_REDIS_TTL_MULTIPLIER = float(\n    os.environ.get(\"PROMPT_CACHE_REDIS_TTL_MULTIPLIER\") or 1.2\n)\n"
  },
  {
    "path": "backend/onyx/configs/onyxbot_configs.py",
    "content": "import os\n\n#####\n# Onyx Slack Bot Configs\n#####\nONYX_BOT_NUM_RETRIES = int(os.environ.get(\"ONYX_BOT_NUM_RETRIES\", \"5\"))\n# Number of docs to display in \"Reference Documents\"\nONYX_BOT_NUM_DOCS_TO_DISPLAY = int(os.environ.get(\"ONYX_BOT_NUM_DOCS_TO_DISPLAY\", \"5\"))\n# If the LLM fails to answer, Onyx can still show the \"Reference Documents\"\nONYX_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(\n    \"ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER\", \"\"\n).lower() not in [\"false\", \"\"]\n# When Onyx is considering a message, what emoji does it react with\nONYX_BOT_REACT_EMOJI = os.environ.get(\"ONYX_BOT_REACT_EMOJI\") or \"eyes\"\n# When User needs more help, what should the emoji be\nONYX_BOT_FOLLOWUP_EMOJI = os.environ.get(\"ONYX_BOT_FOLLOWUP_EMOJI\") or \"sos\"\n# What kind of message should be shown when someone gives an AI answer feedback to OnyxBot\n# Defaults to Private if not provided or invalid\n# Private: Only visible to user clicking the feedback\n# Anonymous: Public but anonymous\n# Public: Visible with the user name who submitted the feedback\nONYX_BOT_FEEDBACK_VISIBILITY = (\n    os.environ.get(\"ONYX_BOT_FEEDBACK_VISIBILITY\") or \"private\"\n)\n# Should OnyxBot send an apology message if it's not able to find an answer\n# That way the user isn't confused as to why OnyxBot reacted but then said nothing\n# Off by default to be less intrusive (don't want to give a notif that just says we couldnt help)\nNOTIFY_SLACKBOT_NO_ANSWER = (\n    os.environ.get(\"NOTIFY_SLACKBOT_NO_ANSWER\", \"\").lower() == \"true\"\n)\n# Mostly for debugging purposes but it's for explaining what went wrong\n# if OnyxBot couldn't find an answer\nONYX_BOT_DISPLAY_ERROR_MSGS = os.environ.get(\n    \"ONYX_BOT_DISPLAY_ERROR_MSGS\", \"\"\n).lower() not in [\n    \"false\",\n    \"\",\n]\n\n# Maximum Questions Per Minute, Default Uncapped\nONYX_BOT_MAX_QPM = int(os.environ.get(\"ONYX_BOT_MAX_QPM\") or 0) or None\n# Maximum time to wait when a question is queued\nONYX_BOT_MAX_WAIT_TIME = int(os.environ.get(\"ONYX_BOT_MAX_WAIT_TIME\") or 180)\n\n# Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback.\n# Set to 0 to disable it (default)\nONYX_BOT_FEEDBACK_REMINDER = int(os.environ.get(\"ONYX_BOT_FEEDBACK_REMINDER\") or 0)\n\n# ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of\n# responses OnyxBot can send in a given time period.\n# Set to 0 to disable the limit.\nONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD = int(\n    os.environ.get(\"ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD\", \"5000\")\n)\n# ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS is the number\n# of seconds until the response limit is reset.\nONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS = int(\n    os.environ.get(\"ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS\", \"86400\")\n)\n"
  },
  {
    "path": "backend/onyx/configs/research_configs.py",
    "content": ""
  },
  {
    "path": "backend/onyx/configs/saml_config/template.settings.json",
    "content": "{\n  \"strict\": true,\n  \"debug\": false,\n  \"idp\": {\n    \"entityId\": \"<Provide This from IDP>\",\n    \"singleSignOnService\": {\n      \"url\": \"<Replace this with your IDP URL> https://trial-1234567.okta.com/home/trial-1234567_onyx/somevalues/somevalues\",\n      \"binding\": \"urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect\"\n    },\n    \"x509cert\": \"<Provide this>\"\n  },\n  \"sp\": {\n    \"entityId\": \"<Provide This from IDP>\",\n    \"assertionConsumerService\": {\n      \"url\": \"http://127.0.0.1:3000/auth/saml/callback\",\n      \"binding\": \"urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST\"\n    },\n    \"x509cert\": \"<Provide this>\"\n  }\n}\n"
  },
  {
    "path": "backend/onyx/configs/tool_configs.py",
    "content": "import json\nimport os\n\n\nIMAGE_GENERATION_OUTPUT_FORMAT = os.environ.get(\n    \"IMAGE_GENERATION_OUTPUT_FORMAT\", \"b64_json\"\n)\n\n# if specified, will pass through request headers to the call to API calls made by custom tools\nCUSTOM_TOOL_PASS_THROUGH_HEADERS: list[str] | None = None\n_CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW = os.environ.get(\n    \"CUSTOM_TOOL_PASS_THROUGH_HEADERS\"\n)\nif _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW:\n    try:\n        CUSTOM_TOOL_PASS_THROUGH_HEADERS = json.loads(\n            _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW\n        )\n    except Exception:\n        # need to import here to avoid circular imports\n        from onyx.utils.logger import setup_logger\n\n        logger = setup_logger()\n        logger.error(\n            \"Failed to parse CUSTOM_TOOL_PASS_THROUGH_HEADERS, must be a valid JSON object\"\n        )\n"
  },
  {
    "path": "backend/onyx/connectors/README.md",
    "content": "<!-- ONYX_METADATA={\"link\": \"https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md\"} -->\n\n# Writing a new Onyx Connector\n\nThis README covers how to contribute a new Connector for Onyx. It includes an overview of the design, interfaces,\nand required changes.\n\nThank you for your contribution!\n\n### Connector Overview\n\nConnectors come in 3 different flows:\n\n- Load Connector:\n  - Bulk indexes documents to reflect a point in time. This type of connector generally works by either pulling all\n    documents via a connector's API or loads the documents from some sort of a dump file.\n- Poll Connector:\n  - Incrementally updates documents based on a provided time range. It is used by the background job to pull the latest\n    changes and additions since the last round of polling. This connector helps keep the document index up to date\n    without needing to fetch/embed/index every document which would be too slow to do frequently on large sets of\n    documents.\n- Slim Connector:\n  - This connector should be a lighter weight method of checking all documents in the source to see if they still exist.\n  - This connector should be identical to the Poll or Load Connector except that it only fetches the IDs of the documents, not the documents themselves.\n  - This is used by our pruning job which removes old documents from the index.\n  - The optional start and end datetimes can be ignored.\n- Event Based connectors:\n  - Connectors that listen to events and update documents accordingly.\n  - Currently not used by the background job, this exists for future design purposes.\n\n### Connector Implementation\n\nRefer to [interfaces.py](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/interfaces.py)\nand this first contributor created Pull Request for a new connector (Shoutout to Dan Brown):\n[Reference Pull Request](https://github.com/onyx-dot-app/onyx/pull/139)\n\nFor implementing a Slim Connector, refer to the comments in this PR:\n[Slim Connector PR](https://github.com/onyx-dot-app/onyx/pull/3303/files)\n\nAll new connectors should have tests added to the `backend/tests/daily/connectors` directory. Refer to the above PR for an example of adding tests for a new connector.\n\n#### Implementing the new Connector\n\nThe connector must subclass one or more of LoadConnector, PollConnector, CheckpointedConnector, or CheckpointedConnectorWithPermSync\n\nThe `__init__` should take arguments for configuring what documents the connector will and where it finds those\ndocuments. For example, if you have a wiki site, it may include the configuration for the team, topic, folder, etc. of\nthe documents to fetch. It may also include the base domain of the wiki. Alternatively, if all the access information\nof the connector is stored in the credential/token, then there may be no required arguments.\n\n`load_credentials` should take a dictionary which provides all the access information that the connector might need.\nFor example this could be the user's username and access token.\n\nRefer to the existing connectors for `load_from_state` and `poll_source` examples. There is not yet a process to listen\nfor EventConnector events, this will come down the line.\n\n#### Development Tip\n\nIt may be handy to test your new connector separate from the rest of the stack while developing.\nFollow the below template:\n\n```commandline\nif __name__ == \"__main__\":\n    import time\n    test_connector = NewConnector(space=\"engineering\")\n    test_connector.load_credentials({\n        \"user_id\": \"foobar\",\n        \"access_token\": \"fake_token\"\n    })\n    all_docs = test_connector.load_from_state()\n\n    current = time.time()\n    one_day_ago = current - 24 * 60 * 60  # 1 day\n    latest_docs = test_connector.poll_source(one_day_ago, current)\n```\n\n> Note: Be sure to set PYTHONPATH to onyx/backend before running the above main.\n\n### Additional Required Changes:\n\n#### Backend Changes\n\n- Add a new type to\n  [DocumentSource](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/configs/constants.py)\n- Add a mapping from DocumentSource (and optionally connector type) to the right connector class\n  [here](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/factory.py#L33)\n\n#### Frontend Changes\n\n- Add the new Connector definition to the `SOURCE_METADATA_MAP` [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/lib/sources.ts#L59).\n- Add the definition for the new Form to the `connectorConfigs` object [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/lib/connectors/connectors.ts#L79).\n\n#### Docs Changes\n\nCreate the new connector page (with guiding images!) with how to get the connector credentials and how to set up the\nconnector in Onyx. Then create a Pull Request in [https://github.com/onyx-dot-app/documentation](https://github.com/onyx-dot-app/documentation).\n\n### Before opening PR\n\n1. Be sure to fully test changes end to end with setting up the connector and updating the index with new docs from the\n   new connector. To make it easier to review, please attach a video showing the successful creation of the connector via the UI (starting from the `Add Connector` page).\n2. Add a folder + tests under `backend/tests/daily/connectors` director. For an example, checkout the [test for Confluence](https://github.com/onyx-dot-app/onyx/blob/main/backend/tests/daily/connectors/confluence/test_confluence_basic.py). In the PR description, include a guide on how to setup the new source to pass the test. Before merging, we will re-create the environment and make sure the test(s) pass.\n3. Be sure to run the linting/formatting, refer to the formatting and linting section in\n   [CONTRIBUTING.md](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md#formatting-and-linting)\n"
  },
  {
    "path": "backend/onyx/connectors/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/airtable/airtable_connector.py",
    "content": "import contextvars\nimport re\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import Future\nfrom concurrent.futures import ThreadPoolExecutor\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom pyairtable import Api as AirtableApi\nfrom pyairtable.api.types import RecordDict\nfrom pyairtable.models.schema import TableSchema\nfrom retry import retry\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# NOTE: all are made lowercase to avoid case sensitivity issues\n# These field types are considered metadata by default when\n# treat_all_non_attachment_fields_as_metadata is False\nDEFAULT_METADATA_FIELD_TYPES = {\n    \"singlecollaborator\",\n    \"collaborator\",\n    \"createdby\",\n    \"singleselect\",\n    \"multipleselects\",\n    \"checkbox\",\n    \"date\",\n    \"datetime\",\n    \"email\",\n    \"phone\",\n    \"url\",\n    \"number\",\n    \"currency\",\n    \"duration\",\n    \"percent\",\n    \"rating\",\n    \"createdtime\",\n    \"lastmodifiedtime\",\n    \"autonumber\",\n    \"rollup\",\n    \"lookup\",\n    \"count\",\n    \"formula\",\n    \"date\",\n}\n\n\nclass AirtableClientNotSetUpError(PermissionError):\n    def __init__(self) -> None:\n        super().__init__(\"Airtable Client is not set up, was load_credentials called?\")\n\n\n# Matches URLs like https://airtable.com/appXXX/tblYYY/viwZZZ?blocks=hide\n# Captures: base_id (appXXX), table_id (tblYYY), and optionally view_id (viwZZZ)\n_AIRTABLE_URL_PATTERN = re.compile(\n    r\"https?://airtable\\.com/(app[A-Za-z0-9]+)/(tbl[A-Za-z0-9]+)(?:/(viw[A-Za-z0-9]+))?\",\n)\n\n\ndef parse_airtable_url(\n    url: str,\n) -> tuple[str, str, str | None]:\n    \"\"\"Parse an Airtable URL into (base_id, table_id, view_id).\n\n    Accepts URLs like:\n      https://airtable.com/appXXX/tblYYY\n      https://airtable.com/appXXX/tblYYY/viwZZZ\n      https://airtable.com/appXXX/tblYYY/viwZZZ?blocks=hide\n\n    Returns:\n        (base_id, table_id, view_id or None)\n\n    Raises:\n        ValueError if the URL doesn't match the expected format.\n    \"\"\"\n    match = _AIRTABLE_URL_PATTERN.search(url.strip())\n    if not match:\n        raise ValueError(\n            f\"Could not parse Airtable URL: '{url}'. Expected format: https://airtable.com/appXXX/tblYYY[/viwZZZ]\"\n        )\n    return match.group(1), match.group(2), match.group(3)\n\n\nclass AirtableConnector(LoadConnector):\n    def __init__(\n        self,\n        base_id: str = \"\",\n        table_name_or_id: str = \"\",\n        airtable_url: str = \"\",\n        treat_all_non_attachment_fields_as_metadata: bool = False,\n        view_id: str | None = None,\n        share_id: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        \"\"\"Initialize an AirtableConnector.\n\n        Args:\n            base_id: The ID of the Airtable base (not required when airtable_url is set)\n            table_name_or_id: The name or ID of the table (not required when airtable_url is set)\n            airtable_url: An Airtable URL to parse base_id, table_id, and view_id from.\n                Overrides base_id, table_name_or_id, and view_id if provided.\n            treat_all_non_attachment_fields_as_metadata: If True, all fields except attachments will be treated as metadata.\n                If False, only fields with types in DEFAULT_METADATA_FIELD_TYPES will be treated as metadata.\n            view_id: Optional ID of a specific view to use\n            share_id: Optional ID of a \"share\" to use for generating record URLs\n            batch_size: Number of records to process in each batch\n\n        Mode is auto-detected: if a specific table is identified (via URL or\n        base_id + table_name_or_id), the connector indexes that single table.\n        Otherwise, it discovers and indexes all accessible bases and tables.\n        \"\"\"\n        # If a URL is provided, parse it to extract base_id, table_id, and view_id\n        if airtable_url:\n            parsed_base_id, parsed_table_id, parsed_view_id = parse_airtable_url(\n                airtable_url\n            )\n            base_id = parsed_base_id\n            table_name_or_id = parsed_table_id\n            if parsed_view_id:\n                view_id = parsed_view_id\n\n        self.base_id = base_id\n        self.table_name_or_id = table_name_or_id\n        self.index_all = not (base_id and table_name_or_id)\n        self.view_id = view_id\n        self.share_id = share_id\n        self.batch_size = batch_size\n        self._airtable_client: AirtableApi | None = None\n        self.treat_all_non_attachment_fields_as_metadata = (\n            treat_all_non_attachment_fields_as_metadata\n        )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self._airtable_client = AirtableApi(credentials[\"airtable_access_token\"])\n        return None\n\n    @property\n    def airtable_client(self) -> AirtableApi:\n        if not self._airtable_client:\n            raise AirtableClientNotSetUpError()\n        return self._airtable_client\n\n    def validate_connector_settings(self) -> None:\n        if self.index_all:\n            try:\n                bases = self.airtable_client.bases()\n                if not bases:\n                    raise ConnectorValidationError(\n                        \"No bases found. Ensure your API token has access to at least one base.\"\n                    )\n            except ConnectorValidationError:\n                raise\n            except Exception as e:\n                raise ConnectorValidationError(f\"Failed to list Airtable bases: {e}\")\n        else:\n            if not self.base_id or not self.table_name_or_id:\n                raise ConnectorValidationError(\n                    \"A valid Airtable URL or base_id and table_name_or_id are required when not using index_all mode.\"\n                )\n            try:\n                table = self.airtable_client.table(self.base_id, self.table_name_or_id)\n                table.schema()\n            except Exception as e:\n                raise ConnectorValidationError(\n                    f\"Failed to access table '{self.table_name_or_id}' in base '{self.base_id}': {e}\"\n                )\n\n    @classmethod\n    def _get_record_url(\n        cls,\n        base_id: str,\n        table_id: str,\n        record_id: str,\n        share_id: str | None,\n        view_id: str | None,\n        field_id: str | None = None,\n        attachment_id: str | None = None,\n    ) -> str:\n        \"\"\"Constructs the URL for a record, optionally including field and attachment IDs\n\n        Full possible structure is:\n\n        https://airtable.com/BASE_ID/SHARE_ID/TABLE_ID/VIEW_ID/RECORD_ID/FIELD_ID/ATTACHMENT_ID\n        \"\"\"\n        # If we have a shared link, use that view for better UX\n        if share_id:\n            base_url = f\"https://airtable.com/{base_id}/{share_id}/{table_id}\"\n        else:\n            base_url = f\"https://airtable.com/{base_id}/{table_id}\"\n\n        if view_id:\n            base_url = f\"{base_url}/{view_id}\"\n\n        base_url = f\"{base_url}/{record_id}\"\n\n        if field_id and attachment_id:\n            return f\"{base_url}/{field_id}/{attachment_id}?blocks=hide\"\n\n        return base_url\n\n    def _extract_field_values(\n        self,\n        field_id: str,\n        field_name: str,\n        field_info: Any,\n        field_type: str,\n        base_id: str,\n        table_id: str,\n        view_id: str | None,\n        record_id: str,\n    ) -> list[tuple[str, str]]:\n        \"\"\"\n        Extract value(s) + links from a field regardless of its type.\n        Attachments are represented as multiple sections, and therefore\n        returned as a list of tuples (value, link).\n        \"\"\"\n        if field_info is None:\n            return []\n\n        # skip references to other records for now (would need to do another\n        # request to get the actual record name/type)\n        # TODO: support this\n        if field_type == \"multipleRecordLinks\":\n            return []\n\n        # Get the base URL for this record\n        default_link = self._get_record_url(\n            base_id, table_id, record_id, self.share_id, self.view_id or view_id\n        )\n\n        if field_type == \"multipleAttachments\":\n            attachment_texts: list[tuple[str, str]] = []\n            for attachment in field_info:\n                url = attachment.get(\"url\")\n                filename = attachment.get(\"filename\", \"\")\n                if not url:\n                    continue\n\n                @retry(\n                    tries=5,\n                    delay=1,\n                    backoff=2,\n                    max_delay=10,\n                )\n                def get_attachment_with_retry(url: str, record_id: str) -> bytes | None:\n                    try:\n                        attachment_response = requests.get(url)\n                        attachment_response.raise_for_status()\n                        return attachment_response.content\n                    except requests.exceptions.HTTPError as e:\n                        if e.response.status_code == 410:\n                            logger.info(f\"Refreshing attachment for {filename}\")\n                            # Re-fetch the record to get a fresh URL\n                            refreshed_record = self.airtable_client.table(\n                                base_id, table_id\n                            ).get(record_id)\n                            for refreshed_attachment in refreshed_record[\"fields\"][\n                                field_name\n                            ]:\n                                if refreshed_attachment.get(\"filename\") == filename:\n                                    new_url = refreshed_attachment.get(\"url\")\n                                    if new_url:\n                                        attachment_response = requests.get(new_url)\n                                        attachment_response.raise_for_status()\n                                        return attachment_response.content\n\n                            logger.error(f\"Failed to refresh attachment for {filename}\")\n                        raise\n\n                attachment_content = get_attachment_with_retry(url, record_id)\n                if attachment_content:\n                    try:\n                        file_ext = get_file_ext(filename)\n                        attachment_id = attachment[\"id\"]\n                        attachment_text = extract_file_text(\n                            BytesIO(attachment_content),\n                            filename,\n                            break_on_unprocessable=False,\n                            extension=file_ext,\n                        )\n                        if attachment_text:\n                            # Use the helper method to construct attachment URLs\n                            attachment_link = self._get_record_url(\n                                base_id,\n                                table_id,\n                                record_id,\n                                self.share_id,\n                                self.view_id or view_id,\n                                field_id,\n                                attachment_id,\n                            )\n                            attachment_texts.append(\n                                (f\"{filename}:\\n{attachment_text}\", attachment_link)\n                            )\n                    except Exception as e:\n                        logger.warning(\n                            f\"Failed to process attachment {filename}: {str(e)}\"\n                        )\n            return attachment_texts\n\n        if field_type in [\"singleCollaborator\", \"collaborator\", \"createdBy\"]:\n            combined = []\n            collab_name = field_info.get(\"name\")\n            collab_email = field_info.get(\"email\")\n            if collab_name:\n                combined.append(collab_name)\n            if collab_email:\n                combined.append(f\"({collab_email})\")\n            return [(\" \".join(combined) if combined else str(field_info), default_link)]\n\n        if isinstance(field_info, list):\n            return [(str(item), default_link) for item in field_info]\n\n        return [(str(field_info), default_link)]\n\n    def _should_be_metadata(self, field_type: str) -> bool:\n        \"\"\"Determine if a field type should be treated as metadata.\n\n        When treat_all_non_attachment_fields_as_metadata is True, all fields except\n        attachments are treated as metadata. Otherwise, only fields with types listed\n        in DEFAULT_METADATA_FIELD_TYPES are treated as metadata.\"\"\"\n        if self.treat_all_non_attachment_fields_as_metadata:\n            return field_type.lower() != \"multipleattachments\"\n        return field_type.lower() in DEFAULT_METADATA_FIELD_TYPES\n\n    def _process_field(\n        self,\n        field_id: str,\n        field_name: str,\n        field_info: Any,\n        field_type: str,\n        base_id: str,\n        table_id: str,\n        view_id: str | None,\n        record_id: str,\n    ) -> tuple[list[TextSection], dict[str, str | list[str]]]:\n        \"\"\"\n        Process a single Airtable field and return sections or metadata.\n\n        Args:\n            field_name: Name of the field\n            field_info: Raw field information from Airtable\n            field_type: Airtable field type\n\n        Returns:\n            (list of Sections, dict of metadata)\n        \"\"\"\n        if field_info is None:\n            return [], {}\n\n        # Get the value(s) for the field\n        field_value_and_links = self._extract_field_values(\n            field_id=field_id,\n            field_name=field_name,\n            field_info=field_info,\n            field_type=field_type,\n            base_id=base_id,\n            table_id=table_id,\n            view_id=view_id,\n            record_id=record_id,\n        )\n        if len(field_value_and_links) == 0:\n            return [], {}\n\n        # Determine if it should be metadata or a section\n        if self._should_be_metadata(field_type):\n            field_values = [value for value, _ in field_value_and_links]\n            if len(field_values) > 1:\n                return [], {field_name: field_values}\n            return [], {field_name: field_values[0]}\n\n        # Otherwise, create relevant sections\n        sections = [\n            TextSection(\n                link=link,\n                text=(\n                    f\"{field_name}:\\n------------------------\\n{text}\\n------------------------\"\n                ),\n            )\n            for text, link in field_value_and_links\n        ]\n        return sections, {}\n\n    def _process_record(\n        self,\n        record: RecordDict,\n        table_schema: TableSchema,\n        primary_field_name: str | None,\n        base_id: str,\n        base_name: str | None = None,\n    ) -> Document | None:\n        \"\"\"Process a single Airtable record into a Document.\n\n        Args:\n            record: The Airtable record to process\n            table_schema: Schema information for the table\n            primary_field_name: Name of the primary field, if any\n            base_id: The ID of the base this record belongs to\n            base_name: The name of the base (used in semantic ID for index_all mode)\n\n        Returns:\n            Document object representing the record\n        \"\"\"\n        table_id = table_schema.id\n        table_name = table_schema.name\n        record_id = record[\"id\"]\n        fields = record[\"fields\"]\n        sections: list[TextSection] = []\n        metadata: dict[str, str | list[str]] = {}\n\n        # Get primary field value if it exists\n        primary_field_value = (\n            fields.get(primary_field_name) if primary_field_name else None\n        )\n        view_id = table_schema.views[0].id if table_schema.views else None\n\n        for field_schema in table_schema.fields:\n            field_name = field_schema.name\n            field_val = fields.get(field_name)\n            field_type = field_schema.type\n\n            logger.debug(\n                f\"Processing field '{field_name}' of type '{field_type}' for record '{record_id}'.\"\n            )\n\n            field_sections, field_metadata = self._process_field(\n                field_id=field_schema.id,\n                field_name=field_name,\n                field_info=field_val,\n                field_type=field_type,\n                base_id=base_id,\n                table_id=table_id,\n                view_id=view_id,\n                record_id=record_id,\n            )\n\n            sections.extend(field_sections)\n            metadata.update(field_metadata)\n\n        if not sections:\n            logger.warning(f\"No sections found for record {record_id}\")\n            return None\n\n        # Include base name in semantic ID only in index_all mode\n        if self.index_all and base_name:\n            semantic_id = (\n                f\"{base_name} > {table_name}: {primary_field_value}\"\n                if primary_field_value\n                else f\"{base_name} > {table_name}\"\n            )\n        else:\n            semantic_id = (\n                f\"{table_name}: {primary_field_value}\"\n                if primary_field_value\n                else table_name\n            )\n\n        # Build hierarchy source_path for Craft file system subdirectory structure.\n        # This creates: airtable/{base_name}/{table_name}/record.json\n        source_path: list[str] = []\n        if base_name:\n            source_path.append(base_name)\n        source_path.append(table_name)\n\n        return Document(\n            id=f\"airtable__{record_id}\",\n            sections=(cast(list[TextSection | ImageSection], sections)),\n            source=DocumentSource.AIRTABLE,\n            semantic_identifier=semantic_id,\n            metadata=metadata,\n            doc_metadata={\n                \"hierarchy\": {\n                    \"source_path\": source_path,\n                    \"base_id\": base_id,\n                    \"table_id\": table_id,\n                    \"table_name\": table_name,\n                    **({\"base_name\": base_name} if base_name else {}),\n                }\n            },\n        )\n\n    def _resolve_base_name(self, base_id: str) -> str | None:\n        \"\"\"Try to resolve a human-readable base name from the API.\"\"\"\n        try:\n            for base_info in self.airtable_client.bases():\n                if base_info.id == base_id:\n                    return base_info.name\n        except Exception:\n            logger.debug(f\"Could not resolve base name for {base_id}\")\n        return None\n\n    def _index_table(\n        self,\n        base_id: str,\n        table_name_or_id: str,\n        base_name: str | None = None,\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Index all records from a single table. Yields batches of Documents.\"\"\"\n        # Resolve base name for hierarchy if not provided\n        if base_name is None:\n            base_name = self._resolve_base_name(base_id)\n\n        table = self.airtable_client.table(base_id, table_name_or_id)\n        records = table.all()\n\n        table_schema = table.schema()\n        primary_field_name = None\n\n        # Find a primary field from the schema\n        for field in table_schema.fields:\n            if field.id == table_schema.primary_field_id:\n                primary_field_name = field.name\n                break\n\n        logger.info(\n            f\"Processing {len(records)} records from table '{table_schema.name}' in base '{base_name or base_id}'.\"\n        )\n\n        if not records:\n            return\n\n        # Process records in parallel batches using ThreadPoolExecutor\n        PARALLEL_BATCH_SIZE = 8\n        max_workers = min(PARALLEL_BATCH_SIZE, len(records))\n\n        for i in range(0, len(records), PARALLEL_BATCH_SIZE):\n            batch_records = records[i : i + PARALLEL_BATCH_SIZE]\n            record_documents: list[Document | HierarchyNode] = []\n\n            with ThreadPoolExecutor(max_workers=max_workers) as executor:\n                # Submit batch tasks\n                future_to_record: dict[Future[Document | None], RecordDict] = {}\n                for record in batch_records:\n                    # Capture the current context so that the thread gets the current tenant ID\n                    current_context = contextvars.copy_context()\n                    future_to_record[\n                        executor.submit(\n                            current_context.run,\n                            self._process_record,\n                            record=record,\n                            table_schema=table_schema,\n                            primary_field_name=primary_field_name,\n                            base_id=base_id,\n                            base_name=base_name,\n                        )\n                    ] = record\n\n                # Wait for all tasks in this batch to complete\n                for future in as_completed(future_to_record):\n                    record = future_to_record[future]\n                    try:\n                        document = future.result()\n                        if document:\n                            record_documents.append(document)\n                    except Exception as e:\n                        logger.exception(f\"Failed to process record {record['id']}\")\n                        raise e\n\n            if record_documents:\n                yield record_documents\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"\n        Fetch all records from one or all tables.\n\n        NOTE: Airtable does not support filtering by time updated, so\n        we have to fetch all records every time.\n        \"\"\"\n        if not self.airtable_client:\n            raise AirtableClientNotSetUpError()\n\n        if self.index_all:\n            yield from self._load_all()\n        else:\n            yield from self._index_table(\n                base_id=self.base_id,\n                table_name_or_id=self.table_name_or_id,\n            )\n\n    def _load_all(self) -> GenerateDocumentsOutput:\n        \"\"\"Discover all bases and tables, then index everything.\"\"\"\n        bases = self.airtable_client.bases()\n        logger.info(f\"Discovered {len(bases)} Airtable base(s).\")\n\n        for base_info in bases:\n            base_id = base_info.id\n            base_name = base_info.name\n            logger.info(f\"Listing tables for base '{base_name}' ({base_id}).\")\n\n            try:\n                base = self.airtable_client.base(base_id)\n                tables = base.tables()\n            except Exception:\n                logger.exception(\n                    f\"Failed to list tables for base '{base_name}' ({base_id}), skipping.\"\n                )\n                continue\n\n            logger.info(f\"Found {len(tables)} table(s) in base '{base_name}'.\")\n\n            for table in tables:\n                try:\n                    yield from self._index_table(\n                        base_id=base_id,\n                        table_name_or_id=table.id,\n                        base_name=base_name,\n                    )\n                except Exception:\n                    logger.exception(\n                        f\"Failed to index table '{table.name}' ({table.id}) in base '{base_name}' ({base_id}), skipping.\"\n                    )\n                    continue\n"
  },
  {
    "path": "backend/onyx/connectors/asana/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/asana/asana_api.py",
    "content": "import time\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom typing import Dict\n\nimport asana  # type: ignore\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# https://github.com/Asana/python-asana/tree/master?tab=readme-ov-file#documentation-for-api-endpoints\nclass AsanaTask:\n    def __init__(\n        self,\n        id: str,\n        title: str,\n        text: str,\n        link: str,\n        last_modified: datetime,\n        project_gid: str,\n        project_name: str,\n    ) -> None:\n        self.id = id\n        self.title = title\n        self.text = text\n        self.link = link\n        self.last_modified = last_modified\n        self.project_gid = project_gid\n        self.project_name = project_name\n\n    def __str__(self) -> str:\n        return f\"ID: {self.id}\\nTitle: {self.title}\\nLast modified: {self.last_modified}\\nText: {self.text}\"\n\n\nclass AsanaAPI:\n    def __init__(\n        self, api_token: str, workspace_gid: str, team_gid: str | None\n    ) -> None:\n        self._user = None\n        self.workspace_gid = workspace_gid\n        self.team_gid = team_gid\n\n        self.configuration = asana.Configuration()\n        self.api_client = asana.ApiClient(self.configuration)\n        self.tasks_api = asana.TasksApi(self.api_client)\n        self.stories_api = asana.StoriesApi(self.api_client)\n        self.users_api = asana.UsersApi(self.api_client)\n        self.project_api = asana.ProjectsApi(self.api_client)\n        self.workspaces_api = asana.WorkspacesApi(self.api_client)\n\n        self.api_error_count = 0\n        self.configuration.access_token = api_token\n        self.task_count = 0\n\n    def get_tasks(\n        self, project_gids: list[str] | None, start_date: str\n    ) -> Iterator[AsanaTask]:\n        \"\"\"Get all tasks from the projects with the given gids that were modified since the given date.\n        If project_gids is None, get all tasks from all projects in the workspace.\"\"\"\n        logger.info(\"Starting to fetch Asana projects\")\n        projects = self.project_api.get_projects(\n            opts={\n                \"workspace\": self.workspace_gid,\n                \"opt_fields\": \"gid,name,archived,modified_at\",\n            }\n        )\n        start_seconds = int(time.mktime(datetime.now().timetuple()))\n        projects_list = []\n        project_count = 0\n        for project_info in projects:\n            project_gid = project_info[\"gid\"]\n            if project_gids is None or project_gid in project_gids:\n                projects_list.append(project_gid)\n            else:\n                logger.debug(\n                    f\"Skipping project: {project_gid} - not in accepted project_gids\"\n                )\n            project_count += 1\n            if project_count % 100 == 0:\n                logger.info(f\"Processed {project_count} projects\")\n\n        logger.info(f\"Found {len(projects_list)} projects to process\")\n        for project_gid in projects_list:\n            for task in self._get_tasks_for_project(\n                project_gid, start_date, start_seconds\n            ):\n                yield task\n        logger.info(f\"Completed fetching {self.task_count} tasks from Asana\")\n        if self.api_error_count > 0:\n            logger.warning(\n                f\"Encountered {self.api_error_count} API errors during task fetching\"\n            )\n\n    def _get_tasks_for_project(\n        self, project_gid: str, start_date: str, start_seconds: int\n    ) -> Iterator[AsanaTask]:\n        project = self.project_api.get_project(project_gid, opts={})\n        project_name = project.get(\"name\", project_gid)\n        team = project.get(\"team\") or {}\n        team_gid = team.get(\"gid\")\n\n        if project.get(\"archived\"):\n            logger.info(f\"Skipping archived project: {project_name} ({project_gid})\")\n            return\n        if not team_gid:\n            logger.info(\n                f\"Skipping project without a team: {project_name} ({project_gid})\"\n            )\n            return\n        if project.get(\"privacy_setting\") == \"private\":\n            if self.team_gid and team_gid != self.team_gid:\n                logger.info(\n                    f\"Skipping private project not in configured team: {project_name} ({project_gid})\"\n                )\n                return\n            logger.info(\n                f\"Processing private project in configured team: {project_name} ({project_gid})\"\n            )\n\n        simple_start_date = start_date.split(\".\")[0].split(\"+\")[0]\n        logger.info(\n            f\"Fetching tasks modified since {simple_start_date} for project: {project_name} ({project_gid})\"\n        )\n\n        opts = {\n            \"opt_fields\": \"name,memberships,memberships.project,completed_at,completed_by,created_at,\"\n            \"created_by,custom_fields,dependencies,due_at,due_on,external,html_notes,liked,likes,\"\n            \"modified_at,notes,num_hearts,parent,projects,resource_subtype,resource_type,start_on,\"\n            \"workspace,permalink_url\",\n            \"modified_since\": start_date,\n        }\n        tasks_from_api = self.tasks_api.get_tasks_for_project(project_gid, opts)\n        for data in tasks_from_api:\n            self.task_count += 1\n            if self.task_count % 10 == 0:\n                end_seconds = time.mktime(datetime.now().timetuple())\n                runtime_seconds = end_seconds - start_seconds\n                if runtime_seconds > 0:\n                    logger.info(\n                        f\"Processed {self.task_count} tasks in {runtime_seconds:.0f} seconds \"\n                        f\"({self.task_count / runtime_seconds:.2f} tasks/second)\"\n                    )\n\n            logger.debug(f\"Processing Asana task: {data['name']}\")\n\n            text = self._construct_task_text(data)\n\n            try:\n                text += self._fetch_and_add_comments(data[\"gid\"])\n\n                last_modified_date = self.format_date(data[\"modified_at\"])\n                text += f\"Last modified: {last_modified_date}\\n\"\n\n                task = AsanaTask(\n                    id=data[\"gid\"],\n                    title=data[\"name\"],\n                    text=text,\n                    link=data[\"permalink_url\"],\n                    last_modified=datetime.fromisoformat(data[\"modified_at\"]),\n                    project_gid=project_gid,\n                    project_name=project_name,\n                )\n                yield task\n            except Exception:\n                logger.error(\n                    f\"Error processing task {data['gid']} in project {project_gid}\",\n                    exc_info=True,\n                )\n                self.api_error_count += 1\n\n    def _construct_task_text(self, data: Dict) -> str:\n        text = f\"{data['name']}\\n\\n\"\n\n        if data[\"notes\"]:\n            text += f\"{data['notes']}\\n\\n\"\n\n        if data[\"created_by\"] and data[\"created_by\"][\"gid\"]:\n            creator = self.get_user(data[\"created_by\"][\"gid\"])[\"name\"]\n            created_date = self.format_date(data[\"created_at\"])\n            text += f\"Created by: {creator} on {created_date}\\n\"\n\n        if data[\"due_on\"]:\n            due_date = self.format_date(data[\"due_on\"])\n            text += f\"Due date: {due_date}\\n\"\n\n        if data[\"completed_at\"]:\n            completed_date = self.format_date(data[\"completed_at\"])\n            text += f\"Completed on: {completed_date}\\n\"\n\n        text += \"\\n\"\n        return text\n\n    def _fetch_and_add_comments(self, task_gid: str) -> str:\n        text = \"\"\n        stories_opts: Dict[str, str] = {}\n        story_start = time.time()\n        stories = self.stories_api.get_stories_for_task(task_gid, stories_opts)\n\n        story_count = 0\n        comment_count = 0\n\n        for story in stories:\n            story_count += 1\n            if story[\"resource_subtype\"] == \"comment_added\":\n                comment = self.stories_api.get_story(\n                    story[\"gid\"], opts={\"opt_fields\": \"text,created_by,created_at\"}\n                )\n                commenter = self.get_user(comment[\"created_by\"][\"gid\"])[\"name\"]\n                text += f\"Comment by {commenter}: {comment['text']}\\n\\n\"\n                comment_count += 1\n\n        story_duration = time.time() - story_start\n        logger.debug(\n            f\"Processed {story_count} stories (including {comment_count} comments) in {story_duration:.2f} seconds\"\n        )\n\n        return text\n\n    def get_user(self, user_gid: str) -> Dict:\n        if self._user is not None:\n            return self._user\n        self._user = self.users_api.get_user(user_gid, {\"opt_fields\": \"name,email\"})\n\n        if not self._user:\n            logger.warning(f\"Unable to fetch user information for user_gid: {user_gid}\")\n            return {\"name\": \"Unknown\"}\n        return self._user\n\n    def format_date(self, date_str: str) -> str:\n        date = datetime.fromisoformat(date_str)\n        return time.strftime(\"%Y-%m-%d\", date.timetuple())\n\n    def get_time(self) -> str:\n        return time.strftime(\"%Y-%m-%d %H:%M:%S\", time.localtime())\n"
  },
  {
    "path": "backend/onyx/connectors/asana/connector.py",
    "content": "import datetime\nfrom typing import Any\n\nfrom onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.asana import asana_api\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass AsanaConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        asana_workspace_id: str,\n        asana_project_ids: str | None = None,\n        asana_team_id: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,\n    ) -> None:\n        self.workspace_id = asana_workspace_id.strip()\n        if asana_project_ids:\n            project_ids = [\n                project_id.strip()\n                for project_id in asana_project_ids.split(\",\")\n                if project_id.strip()\n            ]\n            self.project_ids_to_index = project_ids or None\n        else:\n            self.project_ids_to_index = None\n        self.asana_team_id = (asana_team_id.strip() or None) if asana_team_id else None\n        self.batch_size = batch_size\n        self.continue_on_failure = continue_on_failure\n        logger.info(\n            f\"AsanaConnector initialized with workspace_id: {asana_workspace_id}\"\n        )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.api_token = credentials[\"asana_api_token_secret\"]\n        self.asana_client = asana_api.AsanaAPI(\n            api_token=self.api_token,\n            workspace_gid=self.workspace_id,\n            team_gid=self.asana_team_id,\n        )\n        logger.info(\"Asana credentials loaded and API client initialized\")\n        return None\n\n    def poll_source(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch | None,  # noqa: ARG002\n    ) -> GenerateDocumentsOutput:\n        start_time = datetime.datetime.fromtimestamp(start).isoformat()\n        logger.info(f\"Starting Asana poll from {start_time}\")\n        asana = asana_api.AsanaAPI(\n            api_token=self.api_token,\n            workspace_gid=self.workspace_id,\n            team_gid=self.asana_team_id,\n        )\n        docs_batch: list[Document | HierarchyNode] = []\n        tasks = asana.get_tasks(self.project_ids_to_index, start_time)\n\n        for task in tasks:\n            doc = self._message_to_doc(task)\n            docs_batch.append(doc)\n\n            if len(docs_batch) >= self.batch_size:\n                logger.info(f\"Yielding batch of {len(docs_batch)} documents\")\n                yield docs_batch\n                docs_batch = []\n\n        if docs_batch:\n            logger.info(f\"Yielding final batch of {len(docs_batch)} documents\")\n            yield docs_batch\n\n        logger.info(\"Asana poll completed\")\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        logger.notice(\"Starting full index of all Asana tasks\")\n        return self.poll_source(start=0, end=None)\n\n    def _message_to_doc(self, task: asana_api.AsanaTask) -> Document:\n        logger.debug(f\"Converting Asana task {task.id} to Document\")\n        return Document(\n            id=task.id,\n            sections=[TextSection(link=task.link, text=task.text)],\n            doc_updated_at=task.last_modified,\n            source=DocumentSource.ASANA,\n            semantic_identifier=task.title,\n            metadata={\n                \"group\": task.project_gid,\n                \"project\": task.project_name,\n            },\n        )\n\n\nif __name__ == \"__main__\":\n    import time\n    import os\n\n    logger.notice(\"Starting Asana connector test\")\n    connector = AsanaConnector(\n        os.environ[\"WORKSPACE_ID\"],\n        os.environ[\"PROJECT_IDS\"],\n        os.environ[\"TEAM_ID\"],\n    )\n    connector.load_credentials(\n        {\n            \"asana_api_token_secret\": os.environ[\"API_TOKEN\"],\n        }\n    )\n    logger.info(\"Loading all documents from Asana\")\n    all_docs = connector.load_from_state()\n    current = time.time()\n    one_day_ago = current - 24 * 60 * 60  # 1 day\n    logger.info(\"Polling for documents updated in the last 24 hours\")\n    latest_docs = connector.poll_source(one_day_ago, current)\n    for docs in latest_docs:\n        for doc in docs:\n            if isinstance(doc, HierarchyNode):\n                print(\"hierarchynode:\", doc.display_name)\n            else:\n                print(doc.id)\n    logger.notice(\"Asana connector test completed\")\n"
  },
  {
    "path": "backend/onyx/connectors/axero/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/axero/connector.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport requests\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    process_in_batches,\n)\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\n\nlogger = setup_logger()\n\n\nENTITY_NAME_MAP = {1: \"Forum\", 3: \"Article\", 4: \"Blog\", 9: \"Wiki\"}\n\n\ndef _get_auth_header(api_key: str) -> dict[str, str]:\n    return {\"Rest-Api-Key\": api_key}\n\n\n@retry_builder()\n@rate_limit_builder(max_calls=5, period=1)\ndef _rate_limited_request(\n    endpoint: str, headers: dict, params: dict | None = None\n) -> Any:\n    # https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/370/rest-api\n    return requests.get(endpoint, headers=headers, params=params)\n\n\n# https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/595/rest-api-get-content-list\ndef _get_entities(\n    entity_type: int,\n    api_key: str,\n    axero_base_url: str,\n    start: datetime,\n    end: datetime,\n    space_id: str | None = None,\n) -> list[dict]:\n    endpoint = axero_base_url + \"api/content/list\"\n    page_num = 1\n    pages_fetched = 0\n    pages_to_return = []\n    break_out = False\n    while True:\n        params = {\n            \"EntityType\": str(entity_type),\n            \"SortColumn\": \"DateUpdated\",\n            \"SortOrder\": \"1\",  # descending\n            \"StartPage\": str(page_num),\n        }\n\n        if space_id is not None:\n            params[\"SpaceID\"] = space_id\n\n        res = _rate_limited_request(\n            endpoint, headers=_get_auth_header(api_key), params=params\n        )\n        res.raise_for_status()\n\n        # Axero limitations:\n        # No next page token, can paginate but things may have changed\n        # for example, a doc that hasn't been read in by Onyx is updated and is now front of the list\n        # due to this limitation and the fact that Axero has no rate limiting but API calls can cause\n        # increased latency for the team, we have to just fetch all the pages quickly to reduce the\n        # chance of missing a document due to an update (it will still get updated next pass)\n        # Assumes the volume of data isn't too big to store in memory (probably fine)\n        data = res.json()\n        total_records = data[\"TotalRecords\"]\n        contents = data[\"ResponseData\"]\n        pages_fetched += len(contents)\n        logger.debug(f\"Fetched {pages_fetched} {ENTITY_NAME_MAP[entity_type]}\")\n\n        for page in contents:\n            update_time = time_str_to_utc(page[\"DateUpdated\"])\n\n            if update_time > end:\n                continue\n\n            if update_time < start:\n                break_out = True\n                break\n\n            pages_to_return.append(page)\n\n        if pages_fetched >= total_records:\n            break\n\n        page_num += 1\n\n        if break_out:\n            break\n\n    return pages_to_return\n\n\ndef _get_obj_by_id(obj_id: int, api_key: str, axero_base_url: str) -> dict:\n    endpoint = axero_base_url + f\"api/content/{obj_id}\"\n    res = _rate_limited_request(endpoint, headers=_get_auth_header(api_key))\n    res.raise_for_status()\n\n    return res.json()\n\n\nclass AxeroForum(BaseModel):\n    doc_id: str\n    title: str\n    link: str\n    initial_content: str\n    responses: list[str]\n    last_update: datetime\n\n\ndef _map_post_to_parent(\n    posts: dict,\n    api_key: str,\n    axero_base_url: str,\n) -> list[AxeroForum]:\n    \"\"\"Cannot handle in batches since the posts aren't ordered or structured in any way\n    may need to map any number of them to the initial post\"\"\"\n    epoch_str = \"1970-01-01T00:00:00.000\"\n    post_map: dict[int, AxeroForum] = {}\n\n    for ind, post in enumerate(posts):\n        if (ind + 1) % 25 == 0:\n            logger.debug(f\"Processed {ind + 1} posts or responses\")\n\n        post_time = time_str_to_utc(\n            post.get(\"DateUpdated\") or post.get(\"DateCreated\") or epoch_str\n        )\n        p_id = post.get(\"ParentContentID\")\n        if p_id in post_map:\n            axero_forum = post_map[p_id]\n            axero_forum.responses.insert(0, post.get(\"ContentSummary\"))\n            axero_forum.last_update = max(axero_forum.last_update, post_time)\n        else:\n            initial_post_d = _get_obj_by_id(p_id, api_key, axero_base_url)[\n                \"ResponseData\"\n            ]\n            initial_post_time = time_str_to_utc(\n                initial_post_d.get(\"DateUpdated\")\n                or initial_post_d.get(\"DateCreated\")\n                or epoch_str\n            )\n            post_map[p_id] = AxeroForum(\n                doc_id=\"AXERO_\" + str(initial_post_d.get(\"ContentID\")),\n                title=initial_post_d.get(\"ContentTitle\"),\n                link=initial_post_d.get(\"ContentURL\"),\n                initial_content=initial_post_d.get(\"ContentSummary\"),\n                responses=[post.get(\"ContentSummary\")],\n                last_update=max(post_time, initial_post_time),\n            )\n\n    return list(post_map.values())\n\n\ndef _get_forums(\n    api_key: str,\n    axero_base_url: str,\n    space_id: str | None = None,\n) -> list[dict]:\n    endpoint = axero_base_url + \"api/content/list\"\n    page_num = 1\n    pages_fetched = 0\n    pages_to_return = []\n    break_out = False\n\n    while True:\n        params = {\n            \"EntityType\": \"54\",\n            \"SortColumn\": \"DateUpdated\",\n            \"SortOrder\": \"1\",  # descending\n            \"StartPage\": str(page_num),\n        }\n\n        if space_id is not None:\n            params[\"SpaceID\"] = space_id\n\n        res = _rate_limited_request(\n            endpoint, headers=_get_auth_header(api_key), params=params\n        )\n        res.raise_for_status()\n\n        data = res.json()\n        total_records = data[\"TotalRecords\"]\n        contents = data[\"ResponseData\"]\n        pages_fetched += len(contents)\n        logger.debug(f\"Fetched {pages_fetched} forums\")\n\n        for page in contents:\n            pages_to_return.append(page)\n\n        if pages_fetched >= total_records:\n            break\n\n        page_num += 1\n\n        if break_out:\n            break\n\n    return pages_to_return\n\n\ndef _translate_forum_to_doc(af: AxeroForum) -> Document:\n    doc = Document(\n        id=af.doc_id,\n        sections=[TextSection(link=af.link, text=reply) for reply in af.responses],\n        source=DocumentSource.AXERO,\n        semantic_identifier=af.title,\n        doc_updated_at=af.last_update,\n        metadata={},\n    )\n\n    return doc\n\n\ndef _translate_content_to_doc(content: dict) -> Document:\n    page_text = \"\"\n    summary = content.get(\"ContentSummary\")\n    body = content.get(\"ContentBody\")\n    if summary:\n        page_text += f\"{summary}\\n\"\n\n    if body:\n        content_parsed = parse_html_page_basic(body)\n        page_text += content_parsed\n\n    doc = Document(\n        id=\"AXERO_\" + str(content[\"ContentID\"]),\n        sections=[TextSection(link=content[\"ContentURL\"], text=page_text)],\n        source=DocumentSource.AXERO,\n        semantic_identifier=content[\"ContentTitle\"],\n        doc_updated_at=time_str_to_utc(content[\"DateUpdated\"]),\n        metadata={\"space\": content[\"SpaceName\"]},\n    )\n\n    return doc\n\n\nclass AxeroConnector(PollConnector):\n    def __init__(\n        self,\n        # Strings of the integer ids of the spaces\n        spaces: list[str] | None = None,\n        include_article: bool = True,\n        include_blog: bool = True,\n        include_wiki: bool = True,\n        include_forum: bool = True,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.include_article = include_article\n        self.include_blog = include_blog\n        self.include_wiki = include_wiki\n        self.include_forum = include_forum\n        self.batch_size = batch_size\n        self.space_ids = spaces\n        self.axero_key = None\n        self.base_url = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.axero_key = credentials[\"axero_api_token\"]\n        # As the API key specifically applies to a particular deployment, this is\n        # included as part of the credential\n        base_url = credentials[\"base_url\"]\n        if not base_url.endswith(\"/\"):\n            base_url += \"/\"\n        self.base_url = base_url\n        return None\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if not self.axero_key or not self.base_url:\n            raise ConnectorMissingCredentialError(\"Axero\")\n\n        start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc)\n        end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc)\n\n        entity_types = []\n        if self.include_article:\n            entity_types.append(3)\n        if self.include_blog:\n            entity_types.append(4)\n        if self.include_wiki:\n            entity_types.append(9)\n\n        iterable_space_ids = self.space_ids if self.space_ids else [None]\n\n        for space_id in iterable_space_ids:\n            for entity in entity_types:\n                axero_obj = _get_entities(\n                    entity_type=entity,\n                    api_key=self.axero_key,\n                    axero_base_url=self.base_url,\n                    start=start_datetime,\n                    end=end_datetime,\n                    space_id=space_id,\n                )\n                yield from process_in_batches(\n                    objects=axero_obj,\n                    process_function=_translate_content_to_doc,\n                    batch_size=self.batch_size,\n                )\n\n            if self.include_forum:\n                forums_posts = _get_forums(\n                    api_key=self.axero_key,\n                    axero_base_url=self.base_url,\n                    space_id=space_id,\n                )\n\n                all_axero_forums = _map_post_to_parent(\n                    posts=forums_posts,\n                    api_key=self.axero_key,\n                    axero_base_url=self.base_url,\n                )\n\n                filtered_forums = [\n                    f\n                    for f in all_axero_forums\n                    if f.last_update >= start_datetime and f.last_update <= end_datetime\n                ]\n\n                yield from process_in_batches(\n                    objects=filtered_forums,\n                    process_function=_translate_forum_to_doc,\n                    batch_size=self.batch_size,\n                )\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = AxeroConnector()\n    connector.load_credentials(\n        {\n            \"axero_api_token\": os.environ[\"AXERO_API_TOKEN\"],\n            \"base_url\": os.environ[\"AXERO_BASE_URL\"],\n        }\n    )\n    current = time.time()\n\n    one_year_ago = current - 24 * 60 * 60 * 360\n    latest_docs = connector.poll_source(one_year_ago, current)\n\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/bitbucket/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/bitbucket/connector.py",
    "content": "from __future__ import annotations\n\nimport copy\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.bitbucket.utils import build_auth_client\nfrom onyx.connectors.bitbucket.utils import list_repositories\nfrom onyx.connectors.bitbucket.utils import map_pr_to_document\nfrom onyx.connectors.bitbucket.utils import paginate\nfrom onyx.connectors.bitbucket.utils import PR_LIST_RESPONSE_FIELDS\nfrom onyx.connectors.bitbucket.utils import SLIM_PR_LIST_RESPONSE_FIELDS\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    import httpx\n\nlogger = setup_logger()\n\n\nclass BitbucketConnectorCheckpoint(ConnectorCheckpoint):\n    \"\"\"Checkpoint state for resumable Bitbucket PR indexing.\n\n    Fields:\n        repos_queue: Materialized list of repository slugs to process.\n        current_repo_index: Index of the repository currently being processed.\n        next_url: Bitbucket \"next\" URL for continuing pagination within the current repo.\n    \"\"\"\n\n    repos_queue: list[str] = []\n    current_repo_index: int = 0\n    next_url: str | None = None\n\n\nclass BitbucketConnector(\n    CheckpointedConnector[BitbucketConnectorCheckpoint],\n    SlimConnectorWithPermSync,\n):\n    \"\"\"Connector for indexing Bitbucket Cloud pull requests.\n\n    Args:\n        workspace: Bitbucket workspace ID.\n        repositories: Comma-separated list of repository slugs to index.\n        projects: Comma-separated list of project keys to index all repositories within.\n        batch_size: Max number of documents to yield per batch.\n    \"\"\"\n\n    def __init__(\n        self,\n        workspace: str,\n        repositories: str | None = None,\n        projects: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.workspace = workspace\n        self._repositories = (\n            [s.strip() for s in repositories.split(\",\") if s.strip()]\n            if repositories\n            else None\n        )\n        self._projects: list[str] | None = (\n            [s.strip() for s in projects.split(\",\") if s.strip()] if projects else None\n        )\n        self.batch_size = batch_size\n        self.email: str | None = None\n        self.api_token: str | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"Load API token-based credentials.\n\n        Expects a dict with keys: `bitbucket_email`, `bitbucket_api_token`.\n        \"\"\"\n        self.email = credentials.get(\"bitbucket_email\")\n        self.api_token = credentials.get(\"bitbucket_api_token\")\n        if not self.email or not self.api_token:\n            raise ConnectorMissingCredentialError(\"Bitbucket\")\n        return None\n\n    def _client(self) -> httpx.Client:\n        \"\"\"Build an authenticated HTTP client or raise if credentials missing.\"\"\"\n        if not self.email or not self.api_token:\n            raise ConnectorMissingCredentialError(\"Bitbucket\")\n        return build_auth_client(self.email, self.api_token)\n\n    def _iter_pull_requests_for_repo(\n        self,\n        client: httpx.Client,\n        repo_slug: str,\n        params: dict[str, Any] | None = None,\n        start_url: str | None = None,\n        on_page: Callable[[str | None], None] | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        base = f\"https://api.bitbucket.org/2.0/repositories/{self.workspace}/{repo_slug}/pullrequests\"\n        yield from paginate(\n            client,\n            base,\n            params,\n            start_url=start_url,\n            on_page=on_page,\n        )\n\n    def _build_params(\n        self,\n        fields: str = PR_LIST_RESPONSE_FIELDS,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Build Bitbucket fetch params.\n\n        Always include OPEN, MERGED, and DECLINED PRs. If both ``start`` and\n        ``end`` are provided, apply a single updated_on time window.\n        \"\"\"\n\n        def _iso(ts: SecondsSinceUnixEpoch) -> str:\n            return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()\n\n        def _tc_epoch(\n            lower_epoch: SecondsSinceUnixEpoch | None,\n            upper_epoch: SecondsSinceUnixEpoch | None,\n        ) -> str | None:\n            if lower_epoch is not None and upper_epoch is not None:\n                lower_iso = _iso(lower_epoch)\n                upper_iso = _iso(upper_epoch)\n                return f'(updated_on >= \"{lower_iso}\" AND updated_on <= \"{upper_iso}\")'\n            return None\n\n        params: dict[str, Any] = {\"fields\": fields, \"pagelen\": 50}\n        time_clause = _tc_epoch(start, end)\n        q = '(state = \"OPEN\" OR state = \"MERGED\" OR state = \"DECLINED\")'\n        if time_clause:\n            q = f\"{q} AND {time_clause}\"\n        params[\"q\"] = q\n        return params\n\n    def _iter_target_repositories(self, client: httpx.Client) -> Iterator[str]:\n        \"\"\"Yield repository slugs based on configuration.\n\n        Priority:\n        - repositories list\n        - projects list (list repos by project key)\n        - workspace (all repos)\n        \"\"\"\n        if self._repositories:\n            for slug in self._repositories:\n                yield slug\n            return\n        if self._projects:\n            for project_key in self._projects:\n                for repo in list_repositories(client, self.workspace, project_key):\n                    slug_val = repo.get(\"slug\")\n                    if isinstance(slug_val, str) and slug_val:\n                        yield slug_val\n            return\n        for repo in list_repositories(client, self.workspace, None):\n            slug_val = repo.get(\"slug\")\n            if isinstance(slug_val, str) and slug_val:\n                yield slug_val\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: BitbucketConnectorCheckpoint,\n    ) -> CheckpointOutput[BitbucketConnectorCheckpoint]:\n        \"\"\"Resumable PR ingestion across repos and pages within a time window.\n\n        Yields Documents (or ConnectorFailure for per-PR mapping failures) and returns\n        an updated checkpoint that records repo position and next page URL.\n        \"\"\"\n        new_checkpoint = copy.deepcopy(checkpoint)\n\n        with self._client() as client:\n            # Materialize target repositories once\n            if not new_checkpoint.repos_queue:\n                # Preserve explicit order; otherwise ensure deterministic ordering\n                repos_list = list(self._iter_target_repositories(client))\n                new_checkpoint.repos_queue = sorted(set(repos_list))\n                new_checkpoint.current_repo_index = 0\n                new_checkpoint.next_url = None\n\n            repos = new_checkpoint.repos_queue\n            if not repos or new_checkpoint.current_repo_index >= len(repos):\n                new_checkpoint.has_more = False\n                return new_checkpoint\n\n            repo_slug = repos[new_checkpoint.current_repo_index]\n\n            first_page_params = self._build_params(\n                fields=PR_LIST_RESPONSE_FIELDS,\n                start=start,\n                end=end,\n            )\n\n            def _on_page(next_url: str | None) -> None:\n                new_checkpoint.next_url = next_url\n\n            for pr in self._iter_pull_requests_for_repo(\n                client,\n                repo_slug,\n                params=first_page_params,\n                start_url=new_checkpoint.next_url,\n                on_page=_on_page,\n            ):\n                try:\n                    document = map_pr_to_document(pr, self.workspace, repo_slug)\n                    yield document\n                except Exception as e:\n                    pr_id = pr.get(\"id\")\n                    pr_link = (\n                        f\"https://bitbucket.org/{self.workspace}/{repo_slug}/pull-requests/{pr_id}\"\n                        if pr_id is not None\n                        else None\n                    )\n                    yield ConnectorFailure(\n                        failed_document=DocumentFailure(\n                            document_id=(\n                                f\"{DocumentSource.BITBUCKET.value}:{self.workspace}:{repo_slug}:pr:{pr_id}\"\n                                if pr_id is not None\n                                else f\"{DocumentSource.BITBUCKET.value}:{self.workspace}:{repo_slug}:pr:unknown\"\n                            ),\n                            document_link=pr_link,\n                        ),\n                        failure_message=f\"Failed to process Bitbucket PR: {e}\",\n                        exception=e,\n                    )\n\n            # Advance to next repository (if any) and set has_more accordingly\n            new_checkpoint.current_repo_index += 1\n            new_checkpoint.next_url = None\n            new_checkpoint.has_more = new_checkpoint.current_repo_index < len(repos)\n\n        return new_checkpoint\n\n    @override\n    def build_dummy_checkpoint(self) -> BitbucketConnectorCheckpoint:\n        \"\"\"Create an initial checkpoint with work remaining.\"\"\"\n        return BitbucketConnectorCheckpoint(has_more=True)\n\n    @override\n    def validate_checkpoint_json(\n        self, checkpoint_json: str\n    ) -> BitbucketConnectorCheckpoint:\n        \"\"\"Validate and deserialize a checkpoint instance from JSON.\"\"\"\n        return BitbucketConnectorCheckpoint.model_validate_json(checkpoint_json)\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> Iterator[list[SlimDocument | HierarchyNode]]:\n        \"\"\"Return only document IDs for all existing pull requests.\"\"\"\n        batch: list[SlimDocument | HierarchyNode] = []\n        params = self._build_params(\n            fields=SLIM_PR_LIST_RESPONSE_FIELDS,\n            start=start,\n            end=end,\n        )\n        with self._client() as client:\n            for slug in self._iter_target_repositories(client):\n                for pr in self._iter_pull_requests_for_repo(\n                    client, slug, params=params\n                ):\n                    pr_id = pr[\"id\"]\n                    doc_id = f\"{DocumentSource.BITBUCKET.value}:{self.workspace}:{slug}:pr:{pr_id}\"\n                    batch.append(SlimDocument(id=doc_id))\n                    if len(batch) >= self.batch_size:\n                        yield batch\n                        batch = []\n                        if callback:\n                            if callback.should_stop():\n                                # Note: this is not actually used for permission sync yet, just pruning\n                                raise RuntimeError(\n                                    \"bitbucket_pr_sync: Stop signal detected\"\n                                )\n                            callback.progress(\"bitbucket_pr_sync\", len(batch))\n        if batch:\n            yield batch\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"Validate Bitbucket credentials and workspace access by probing a lightweight endpoint.\n\n        Raises:\n            CredentialExpiredError: on HTTP 401\n            InsufficientPermissionsError: on HTTP 403\n            UnexpectedValidationError: on any other failure\n        \"\"\"\n        try:\n            with self._client() as client:\n                url = f\"https://api.bitbucket.org/2.0/repositories/{self.workspace}\"\n                resp = client.get(\n                    url,\n                    params={\"pagelen\": 1, \"fields\": \"pagelen\"},\n                    timeout=REQUEST_TIMEOUT_SECONDS,\n                )\n                if resp.status_code == 401:\n                    raise CredentialExpiredError(\n                        \"Invalid or expired Bitbucket credentials (HTTP 401).\"\n                    )\n                if resp.status_code == 403:\n                    raise InsufficientPermissionsError(\n                        \"Insufficient permissions to access Bitbucket workspace (HTTP 403).\"\n                    )\n                if resp.status_code < 200 or resp.status_code >= 300:\n                    raise UnexpectedValidationError(\n                        f\"Unexpected Bitbucket error (status={resp.status_code}).\"\n                    )\n        except Exception as e:\n            # Network or other unexpected errors\n            if isinstance(\n                e,\n                (\n                    CredentialExpiredError,\n                    InsufficientPermissionsError,\n                    UnexpectedValidationError,\n                    ConnectorMissingCredentialError,\n                ),\n            ):\n                raise\n            raise UnexpectedValidationError(\n                f\"Unexpected error while validating Bitbucket settings: {e}\"\n            )\n"
  },
  {
    "path": "backend/onyx/connectors/bitbucket/utils.py",
    "content": "from __future__ import annotations\n\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport httpx\n\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n# Fields requested from Bitbucket PR list endpoint to ensure rich PR data\nPR_LIST_RESPONSE_FIELDS: str = \",\".join(\n    [\n        \"next\",\n        \"page\",\n        \"pagelen\",\n        \"values.author\",\n        \"values.close_source_branch\",\n        \"values.closed_by\",\n        \"values.comment_count\",\n        \"values.created_on\",\n        \"values.description\",\n        \"values.destination\",\n        \"values.draft\",\n        \"values.id\",\n        \"values.links\",\n        \"values.merge_commit\",\n        \"values.participants\",\n        \"values.reason\",\n        \"values.rendered\",\n        \"values.reviewers\",\n        \"values.source\",\n        \"values.state\",\n        \"values.summary\",\n        \"values.task_count\",\n        \"values.title\",\n        \"values.type\",\n        \"values.updated_on\",\n    ]\n)\n\n# Minimal fields for slim retrieval (IDs only)\nSLIM_PR_LIST_RESPONSE_FIELDS: str = \",\".join(\n    [\n        \"next\",\n        \"page\",\n        \"pagelen\",\n        \"values.id\",\n    ]\n)\n\n\n# Minimal fields for repository list calls\nREPO_LIST_RESPONSE_FIELDS: str = \",\".join(\n    [\n        \"next\",\n        \"page\",\n        \"pagelen\",\n        \"values.slug\",\n        \"values.full_name\",\n        \"values.project.key\",\n    ]\n)\n\n\nclass BitbucketRetriableError(Exception):\n    \"\"\"Raised for retriable Bitbucket conditions (429, 5xx).\"\"\"\n\n\nclass BitbucketNonRetriableError(Exception):\n    \"\"\"Raised for non-retriable Bitbucket client errors (4xx except 429).\"\"\"\n\n\n@retry_builder(\n    tries=6,\n    delay=1,\n    backoff=2,\n    max_delay=30,\n    exceptions=(BitbucketRetriableError, httpx.RequestError),\n)\n@rate_limit_builder(max_calls=60, period=60)\ndef bitbucket_get(\n    client: httpx.Client, url: str, params: dict[str, Any] | None = None\n) -> httpx.Response:\n    \"\"\"Perform a GET against Bitbucket with retry and rate limiting.\n\n    Retries on 429 and 5xx responses, and on transport errors. Honors\n    `Retry-After` header for 429 when present by sleeping before retrying.\n    \"\"\"\n    try:\n        response = client.get(url, params=params, timeout=REQUEST_TIMEOUT_SECONDS)\n    except httpx.RequestError:\n        # Allow retry_builder to handle retries of transport errors\n        raise\n\n    try:\n        response.raise_for_status()\n    except httpx.HTTPStatusError as e:\n        status = e.response.status_code if e.response is not None else None\n        if status == 429:\n            retry_after = e.response.headers.get(\"Retry-After\") if e.response else None\n            if retry_after is not None:\n                try:\n                    time.sleep(int(retry_after))\n                except (TypeError, ValueError):\n                    pass\n            raise BitbucketRetriableError(\"Bitbucket rate limit exceeded (429)\") from e\n        if status is not None and 500 <= status < 600:\n            raise BitbucketRetriableError(f\"Bitbucket server error: {status}\") from e\n        if status is not None and 400 <= status < 500:\n            raise BitbucketNonRetriableError(f\"Bitbucket client error: {status}\") from e\n        # Unknown status, propagate\n        raise\n\n    return response\n\n\ndef build_auth_client(email: str, api_token: str) -> httpx.Client:\n    \"\"\"Create an authenticated httpx client for Bitbucket Cloud API.\"\"\"\n    return httpx.Client(auth=(email, api_token), http2=True)\n\n\ndef paginate(\n    client: httpx.Client,\n    url: str,\n    params: dict[str, Any] | None = None,\n    start_url: str | None = None,\n    on_page: Callable[[str | None], None] | None = None,\n) -> Iterator[dict[str, Any]]:\n    \"\"\"Iterate over paginated Bitbucket API responses yielding individual values.\n\n    Args:\n        client: Authenticated HTTP client.\n        url: Base collection URL (first page when start_url is None).\n        params: Query params for the first page.\n        start_url: If provided, start from this absolute URL (ignores params).\n        on_page: Optional callback invoked after each page with the next page URL.\n    \"\"\"\n    next_url = start_url or url\n    # If resuming from a next URL, do not pass params again\n    query = params.copy() if params else None\n    query = None if start_url else query\n    while next_url:\n        resp = bitbucket_get(client, next_url, params=query)\n        data = resp.json()\n        values = data.get(\"values\", [])\n        for item in values:\n            yield item\n        next_url = data.get(\"next\")\n        if on_page is not None:\n            on_page(next_url)\n        # only include params on first call, next_url will contain all necessary params\n        query = None\n\n\ndef list_repositories(\n    client: httpx.Client, workspace: str, project_key: str | None = None\n) -> Iterator[dict[str, Any]]:\n    \"\"\"List repositories in a workspace, optionally filtered by project key.\"\"\"\n    base_url = f\"https://api.bitbucket.org/2.0/repositories/{workspace}\"\n    params: dict[str, Any] = {\n        \"fields\": REPO_LIST_RESPONSE_FIELDS,\n        \"pagelen\": 100,\n        # Ensure deterministic ordering\n        \"sort\": \"full_name\",\n    }\n    if project_key:\n        params[\"q\"] = f'project.key=\"{project_key}\"'\n    yield from paginate(client, base_url, params)\n\n\ndef map_pr_to_document(pr: dict[str, Any], workspace: str, repo_slug: str) -> Document:\n    \"\"\"Map a Bitbucket pull request JSON to Onyx Document.\"\"\"\n    pr_id = pr[\"id\"]\n    title = pr.get(\"title\") or f\"PR {pr_id}\"\n    description = pr.get(\"description\") or \"\"\n    state = pr.get(\"state\")\n    draft = pr.get(\"draft\", False)\n    author = pr.get(\"author\", {})\n    reviewers = pr.get(\"reviewers\", [])\n    participants = pr.get(\"participants\", [])\n\n    link = pr.get(\"links\", {}).get(\"html\", {}).get(\"href\") or (\n        f\"https://bitbucket.org/{workspace}/{repo_slug}/pull-requests/{pr_id}\"\n    )\n\n    created_on = pr.get(\"created_on\")\n    updated_on = pr.get(\"updated_on\")\n    updated_dt = (\n        datetime.fromisoformat(updated_on.replace(\"Z\", \"+00:00\")).astimezone(\n            timezone.utc\n        )\n        if isinstance(updated_on, str)\n        else None\n    )\n\n    source_branch = pr.get(\"source\", {}).get(\"branch\", {}).get(\"name\", \"\")\n    destination_branch = pr.get(\"destination\", {}).get(\"branch\", {}).get(\"name\", \"\")\n\n    approved_by = [\n        _get_user_name(p.get(\"user\", {})) for p in participants if p.get(\"approved\")\n    ]\n\n    primary_owner = None\n    if author:\n        primary_owner = BasicExpertInfo(\n            display_name=_get_user_name(author),\n        )\n\n    secondary_owners = [\n        BasicExpertInfo(display_name=_get_user_name(r)) for r in reviewers\n    ] or None\n\n    reviewer_names = [_get_user_name(r) for r in reviewers]\n\n    # Create a concise summary of key PR info\n    created_date = created_on.split(\"T\")[0] if created_on else \"N/A\"\n    updated_date = updated_on.split(\"T\")[0] if updated_on else \"N/A\"\n    content_text = (\n        \"Pull Request Information:\\n\"\n        f\"- Pull Request ID: {pr_id}\\n\"\n        f\"- Title: {title}\\n\"\n        f\"- State: {state or 'N/A'} {'(Draft)' if draft else ''}\\n\"\n    )\n    if state == \"DECLINED\":\n        content_text += f\"- Reason: {pr.get('reason', 'N/A')}\\n\"\n    content_text += (\n        f\"- Author: {_get_user_name(author) if author else 'N/A'}\\n\"\n        f\"- Reviewers: {', '.join(reviewer_names) if reviewer_names else 'N/A'}\\n\"\n        f\"- Branch: {source_branch} -> {destination_branch}\\n\"\n        f\"- Created: {created_date}\\n\"\n        f\"- Updated: {updated_date}\"\n    )\n    if description:\n        content_text += f\"\\n\\nDescription:\\n{description}\"\n    sections: list[TextSection | ImageSection] = [\n        TextSection(link=link, text=content_text)\n    ]\n\n    metadata: dict[str, str | list[str]] = {\n        \"object_type\": \"PullRequest\",\n        \"workspace\": workspace,\n        \"repository\": repo_slug,\n        \"pr_key\": f\"{workspace}/{repo_slug}#{pr_id}\",\n        \"id\": str(pr_id),\n        \"title\": title,\n        \"state\": state or \"\",\n        \"draft\": str(bool(draft)),\n        \"link\": link,\n        \"author\": _get_user_name(author) if author else \"\",\n        \"reviewers\": reviewer_names,\n        \"approved_by\": approved_by,\n        \"comment_count\": str(pr.get(\"comment_count\", \"\")),\n        \"task_count\": str(pr.get(\"task_count\", \"\")),\n        \"created_on\": created_on or \"\",\n        \"updated_on\": updated_on or \"\",\n        \"source_branch\": source_branch,\n        \"destination_branch\": destination_branch,\n        \"closed_by\": (\n            _get_user_name(pr.get(\"closed_by\", {})) if pr.get(\"closed_by\") else \"\"\n        ),\n        \"close_source_branch\": str(bool(pr.get(\"close_source_branch\", False))),\n    }\n\n    return Document(\n        id=f\"{DocumentSource.BITBUCKET.value}:{workspace}:{repo_slug}:pr:{pr_id}\",\n        sections=sections,\n        source=DocumentSource.BITBUCKET,\n        semantic_identifier=f\"#{pr_id}: {title}\",\n        title=title,\n        doc_updated_at=updated_dt,\n        primary_owners=[primary_owner] if primary_owner else None,\n        secondary_owners=secondary_owners,\n        metadata=metadata,\n    )\n\n\ndef _get_user_name(user: dict[str, Any]) -> str:\n    return user.get(\"display_name\") or user.get(\"nickname\") or \"unknown\"\n"
  },
  {
    "path": "backend/onyx/connectors/blob/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/blob/connector.py",
    "content": "import os\nimport time\nfrom collections.abc import Mapping\nfrom datetime import datetime\nfrom datetime import timezone\nfrom io import BytesIO\nfrom numbers import Integral\nfrom typing import Any\nfrom typing import Optional\nfrom urllib.parse import quote\n\nimport boto3\nfrom botocore.client import Config\nfrom botocore.credentials import RefreshableCredentials\nfrom botocore.exceptions import ClientError\nfrom botocore.exceptions import NoCredentialsError\nfrom botocore.exceptions import PartialCredentialsError\nfrom botocore.session import get_session\nfrom mypy_boto3_s3 import S3Client\n\nfrom onyx.configs.app_configs import BLOB_STORAGE_SIZE_THRESHOLD\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import BlobType\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    process_onyx_metadata,\n)\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_text_and_images\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nDOWNLOAD_CHUNK_SIZE = 1024 * 1024\nSIZE_THRESHOLD_BUFFER = 64\n\n\nclass BlobStorageConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        bucket_type: str,\n        bucket_name: str,\n        prefix: str = \"\",\n        batch_size: int = INDEX_BATCH_SIZE,\n        european_residency: bool = False,\n    ) -> None:\n        self.bucket_type: BlobType = BlobType(bucket_type)\n        self.bucket_name = bucket_name.strip()\n        self.prefix = prefix if not prefix or prefix.endswith(\"/\") else prefix + \"/\"\n        self.batch_size = batch_size\n        self.s3_client: Optional[S3Client] = None\n        self._allow_images: bool | None = None\n        self.size_threshold: int | None = BLOB_STORAGE_SIZE_THRESHOLD\n        self.bucket_region: Optional[str] = None\n        self.european_residency: bool = european_residency\n\n    def set_allow_images(self, allow_images: bool) -> None:\n        \"\"\"Set whether to process images in this connector.\"\"\"\n        logger.info(f\"Setting allow_images to {allow_images}.\")\n        self._allow_images = allow_images\n\n    def _detect_bucket_region(self) -> None:\n        \"\"\"Detect and cache the actual region of the S3 bucket using head_bucket.\"\"\"\n        if self.s3_client is None:\n            logger.warning(\n                \"S3 client not initialized. Skipping bucket region detection.\"\n            )\n            return\n\n        try:\n            response = self.s3_client.head_bucket(Bucket=self.bucket_name)\n            # The region is in the response headers as 'x-amz-bucket-region'\n            self.bucket_region = response.get(\"BucketRegion\") or response.get(\n                \"ResponseMetadata\", {}\n            ).get(\"HTTPHeaders\", {}).get(\"x-amz-bucket-region\")\n\n            if self.bucket_region:\n                logger.debug(f\"Detected bucket region: {self.bucket_region}\")\n            else:\n                logger.warning(\"Bucket region not found in head_bucket response\")\n        except Exception as e:\n            logger.warning(f\"Failed to detect bucket region via head_bucket: {e}\")\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"Checks for boto3 credentials based on the bucket type.\n        (1) R2: Access Key ID, Secret Access Key, Account ID\n        (2) S3: AWS Access Key ID, AWS Secret Access Key or IAM role or Assume Role\n        (3) GOOGLE_CLOUD_STORAGE: Access Key ID, Secret Access Key, Project ID\n        (4) OCI_STORAGE: Namespace, Region, Access Key ID, Secret Access Key\n\n        For each bucket type, the method initializes the appropriate S3 client:\n        - R2: Uses Cloudflare R2 endpoint with S3v4 signature\n        - S3: Creates a standard boto3 S3 client\n        - GOOGLE_CLOUD_STORAGE: Uses Google Cloud Storage endpoint\n        - OCI_STORAGE: Uses Oracle Cloud Infrastructure Object Storage endpoint\n\n        Raises ConnectorMissingCredentialError if required credentials are missing.\n        Raises ValueError for unsupported bucket types.\n        \"\"\"\n\n        logger.debug(\n            f\"Loading credentials for {self.bucket_name} or type {self.bucket_type}\"\n        )\n\n        if self.bucket_type == BlobType.R2:\n            if not all(\n                credentials.get(key)\n                for key in [\"r2_access_key_id\", \"r2_secret_access_key\", \"account_id\"]\n            ):\n                raise ConnectorMissingCredentialError(\"Cloudflare R2\")\n\n            # Use EU endpoint if european_residency is enabled\n            subdomain = \"eu.\" if self.european_residency else \"\"\n            endpoint_url = f\"https://{credentials['account_id']}.{subdomain}r2.cloudflarestorage.com\"\n\n            self.s3_client = boto3.client(\n                \"s3\",\n                endpoint_url=endpoint_url,\n                aws_access_key_id=credentials[\"r2_access_key_id\"],\n                aws_secret_access_key=credentials[\"r2_secret_access_key\"],\n                region_name=\"auto\",\n                config=Config(signature_version=\"s3v4\"),\n            )\n\n        elif self.bucket_type == BlobType.S3:\n            # For S3, we can use either access keys or IAM roles.\n            authentication_method = credentials.get(\n                \"authentication_method\", \"access_key\"\n            )\n            logger.debug(\n                f\"Using authentication method: {authentication_method} for S3 bucket.\"\n            )\n            if authentication_method == \"access_key\":\n                logger.debug(\"Using access key authentication for S3 bucket.\")\n                if not all(\n                    credentials.get(key)\n                    for key in [\"aws_access_key_id\", \"aws_secret_access_key\"]\n                ):\n                    raise ConnectorMissingCredentialError(\"Amazon S3\")\n\n                session = boto3.Session(\n                    aws_access_key_id=credentials[\"aws_access_key_id\"],\n                    aws_secret_access_key=credentials[\"aws_secret_access_key\"],\n                )\n                self.s3_client = session.client(\"s3\")\n            elif authentication_method == \"iam_role\":\n                # If using IAM roles, we assume the role and let boto3 handle the credentials.\n                role_arn = credentials.get(\"aws_role_arn\")\n                # create session name using timestamp\n                if not role_arn:\n                    raise ConnectorMissingCredentialError(\n                        \"Amazon S3 IAM role ARN is required for assuming role.\"\n                    )\n\n                def _refresh_credentials() -> dict[str, str]:\n                    \"\"\"Refreshes the credentials for the assumed role.\"\"\"\n                    sts_client = boto3.client(\"sts\")\n                    assumed_role_object = sts_client.assume_role(\n                        RoleArn=role_arn,\n                        RoleSessionName=f\"onyx_blob_storage_{int(time.time())}\",\n                    )\n                    creds = assumed_role_object[\"Credentials\"]\n                    return {\n                        \"access_key\": creds[\"AccessKeyId\"],\n                        \"secret_key\": creds[\"SecretAccessKey\"],\n                        \"token\": creds[\"SessionToken\"],\n                        \"expiry_time\": creds[\"Expiration\"].isoformat(),\n                    }\n\n                refreshable = RefreshableCredentials.create_from_metadata(\n                    metadata=_refresh_credentials(),\n                    refresh_using=_refresh_credentials,\n                    method=\"sts-assume-role\",\n                )\n                botocore_session = get_session()\n                botocore_session._credentials = refreshable  # type: ignore[attr-defined]\n                session = boto3.Session(botocore_session=botocore_session)\n                self.s3_client = session.client(\"s3\")\n            elif authentication_method == \"assume_role\":\n                # We will assume the instance role to access S3.\n                logger.debug(\"Using instance role authentication for S3 bucket.\")\n                self.s3_client = boto3.client(\"s3\")\n            else:\n                raise ConnectorValidationError(\"Invalid authentication method for S3. \")\n\n            # This is important for correct citation links\n            # NOTE: the client region actually doesn't matter for accessing the bucket\n            self._detect_bucket_region()\n\n        elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:\n            if not all(\n                credentials.get(key) for key in [\"access_key_id\", \"secret_access_key\"]\n            ):\n                raise ConnectorMissingCredentialError(\"Google Cloud Storage\")\n\n            self.s3_client = boto3.client(\n                \"s3\",\n                endpoint_url=\"https://storage.googleapis.com\",\n                aws_access_key_id=credentials[\"access_key_id\"],\n                aws_secret_access_key=credentials[\"secret_access_key\"],\n                region_name=\"auto\",\n            )\n\n        elif self.bucket_type == BlobType.OCI_STORAGE:\n            if not all(\n                credentials.get(key)\n                for key in [\"namespace\", \"region\", \"access_key_id\", \"secret_access_key\"]\n            ):\n                raise ConnectorMissingCredentialError(\"Oracle Cloud Infrastructure\")\n\n            self.s3_client = boto3.client(\n                \"s3\",\n                endpoint_url=f\"https://{credentials['namespace']}.compat.objectstorage.{credentials['region']}.oraclecloud.com\",\n                aws_access_key_id=credentials[\"access_key_id\"],\n                aws_secret_access_key=credentials[\"secret_access_key\"],\n                region_name=credentials[\"region\"],\n            )\n\n        else:\n            raise ValueError(f\"Unsupported bucket type: {self.bucket_type}\")\n\n        return None\n\n    def _download_object(self, key: str) -> bytes | None:\n        if self.s3_client is None:\n            raise ConnectorMissingCredentialError(\"Blob storage\")\n        response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)\n        body = response[\"Body\"]\n\n        try:\n            if self.size_threshold is None:\n                return body.read()\n\n            return self._read_stream_with_limit(body, key)\n        finally:\n            body.close()\n\n    def _read_stream_with_limit(self, body: Any, key: str) -> bytes | None:\n        if self.size_threshold is None:\n            return body.read()\n\n        bytes_read = 0\n        chunks: list[bytes] = []\n        chunk_size = min(\n            DOWNLOAD_CHUNK_SIZE, self.size_threshold + SIZE_THRESHOLD_BUFFER\n        )\n\n        for chunk in body.iter_chunks(chunk_size=chunk_size):\n            if not chunk:\n                continue\n            chunks.append(chunk)\n            bytes_read += len(chunk)\n\n            if bytes_read > self.size_threshold + SIZE_THRESHOLD_BUFFER:\n                logger.warning(\n                    f\"{key} exceeds size threshold of {self.size_threshold}. Skipping.\"\n                )\n                return None\n\n        return b\"\".join(chunks)\n\n    # NOTE: Left in as may be useful for one-off access to documents and sharing across orgs.\n    # def _get_presigned_url(self, key: str) -> str:\n    #     if self.s3_client is None:\n    #         raise ConnectorMissingCredentialError(\"Blog storage\")\n\n    #     url = self.s3_client.generate_presigned_url(\n    #         \"get_object\",\n    #         Params={\"Bucket\": self.bucket_name, \"Key\": key},\n    #         ExpiresIn=self.presign_length,\n    #     )\n    #     return url\n\n    def _get_blob_link(self, key: str) -> str:\n        # NOTE: We store the object dashboard URL instead of the actual object URL\n        # This is because the actual object URL requires S3 client authentication\n        # Accessing through the browser will always return an unauthorized error\n\n        if self.s3_client is None:\n            raise ConnectorMissingCredentialError(\"Blob storage\")\n\n        # URL encode the key to handle special characters, spaces, etc.\n        # safe='/' keeps forward slashes unencoded for proper path structure\n        encoded_key = quote(key, safe=\"/\")\n\n        if self.bucket_type == BlobType.R2:\n            account_id = self.s3_client.meta.endpoint_url.split(\"//\")[1].split(\".\")[0]\n            subdomain = \"eu/\" if self.european_residency else \"default/\"\n\n            return f\"https://dash.cloudflare.com/{account_id}/r2/{subdomain}buckets/{self.bucket_name}/objects/{encoded_key}/details\"\n\n        elif self.bucket_type == BlobType.S3:\n            region = self.bucket_region or self.s3_client.meta.region_name\n            return f\"https://s3.console.aws.amazon.com/s3/object/{self.bucket_name}?region={region}&prefix={encoded_key}\"\n\n        elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:\n            return f\"https://console.cloud.google.com/storage/browser/_details/{self.bucket_name}/{encoded_key}\"\n\n        elif self.bucket_type == BlobType.OCI_STORAGE:\n            namespace = self.s3_client.meta.endpoint_url.split(\"//\")[1].split(\".\")[0]\n            region = self.s3_client.meta.region_name\n            return f\"https://objectstorage.{region}.oraclecloud.com/n/{namespace}/b/{self.bucket_name}/o/{encoded_key}\"\n\n        else:\n            # This should never happen!\n            raise ValueError(f\"Unsupported bucket type: {self.bucket_type}\")\n\n    @staticmethod\n    def _extract_size_bytes(obj: Mapping[str, Any]) -> int | None:\n        \"\"\"Return the first numeric size field found on the object metadata.\"\"\"\n\n        candidate_keys = (\n            \"Size\",\n            \"size\",\n            \"ContentLength\",\n            \"content_length\",\n            \"Content-Length\",\n            \"contentLength\",\n            \"bytes\",\n            \"Bytes\",\n        )\n\n        def _normalize(value: Any) -> int | None:\n            if value is None or isinstance(value, bool):\n                return None\n            if isinstance(value, Integral):\n                return int(value)\n            try:\n                numeric = float(value)\n            except (TypeError, ValueError):\n                return None\n            if numeric >= 0 and numeric.is_integer():\n                return int(numeric)\n            return None\n\n        for key in candidate_keys:\n            if key in obj:\n                normalized = _normalize(obj.get(key))\n                if normalized is not None:\n                    return normalized\n\n        for key, value in obj.items():\n            if not isinstance(key, str):\n                continue\n            lowered_key = key.lower()\n            if \"size\" in lowered_key or \"length\" in lowered_key:\n                normalized = _normalize(value)\n                if normalized is not None:\n                    return normalized\n\n        return None\n\n    def _yield_blob_objects(\n        self,\n        start: datetime,\n        end: datetime,\n    ) -> GenerateDocumentsOutput:\n        if self.s3_client is None:\n            raise ConnectorMissingCredentialError(\"Blob storage\")\n\n        paginator = self.s3_client.get_paginator(\"list_objects_v2\")\n        pages = paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix)\n\n        batch: list[Document | HierarchyNode] = []\n        for page in pages:\n            if \"Contents\" not in page:\n                continue\n\n            for obj in page[\"Contents\"]:\n                if obj[\"Key\"].endswith(\"/\"):\n                    continue\n\n                last_modified = obj[\"LastModified\"].replace(tzinfo=timezone.utc)\n\n                if not start <= last_modified <= end:\n                    continue\n\n                file_name = os.path.basename(obj[\"Key\"])\n                file_ext = get_file_ext(file_name)\n                key = obj[\"Key\"]\n                link = self._get_blob_link(key)\n\n                size_bytes = self._extract_size_bytes(obj)\n                if (\n                    self.size_threshold is not None\n                    and isinstance(size_bytes, int)\n                    and self.size_threshold is not None\n                    and size_bytes > self.size_threshold\n                ):\n                    logger.warning(\n                        f\"{file_name} exceeds size threshold of {self.size_threshold}. Skipping.\"\n                    )\n                    continue\n\n                # Handle image files\n                if file_ext in OnyxFileExtensions.IMAGE_EXTENSIONS:\n                    if not self._allow_images:\n                        logger.debug(\n                            f\"Skipping image file: {key} (image processing not enabled)\"\n                        )\n                        continue\n\n                    # Process the image file\n                    try:\n                        downloaded_file = self._download_object(key)\n                        if downloaded_file is None:\n                            continue\n\n                        # TODO: Refactor to avoid direct DB access in connector\n                        # This will require broader refactoring across the codebase\n                        image_section, _ = store_image_and_create_section(\n                            image_data=downloaded_file,\n                            file_id=f\"{self.bucket_type}_{self.bucket_name}_{key.replace('/', '_')}\",\n                            display_name=file_name,\n                            link=link,\n                            file_origin=FileOrigin.CONNECTOR,\n                        )\n\n                        batch.append(\n                            Document(\n                                id=f\"{self.bucket_type}:{self.bucket_name}:{key}\",\n                                sections=[image_section],\n                                source=DocumentSource(self.bucket_type.value),\n                                semantic_identifier=file_name,\n                                doc_updated_at=last_modified,\n                                metadata={},\n                            )\n                        )\n\n                        if len(batch) == self.batch_size:\n                            yield batch\n                            batch = []\n                    except Exception:\n                        logger.exception(f\"Error processing image {key}\")\n                    continue\n\n                # Handle text and document files\n                try:\n                    downloaded_file = self._download_object(key)\n                    if downloaded_file is None:\n                        continue\n                    extraction_result = extract_text_and_images(\n                        BytesIO(downloaded_file), file_name=file_name\n                    )\n\n                    onyx_metadata, custom_tags = process_onyx_metadata(\n                        extraction_result.metadata\n                    )\n                    file_display_name = onyx_metadata.file_display_name or file_name\n                    time_updated = onyx_metadata.doc_updated_at or last_modified\n                    link = onyx_metadata.link or link\n                    primary_owners = onyx_metadata.primary_owners\n                    secondary_owners = onyx_metadata.secondary_owners\n                    source_type = onyx_metadata.source_type or DocumentSource(\n                        self.bucket_type.value\n                    )\n\n                    sections: list[TextSection | ImageSection] = []\n                    if extraction_result.text_content.strip():\n                        logger.debug(\n                            f\"Creating TextSection for {file_name} with link: {link}\"\n                        )\n                        sections.append(\n                            TextSection(\n                                link=link,\n                                text=extraction_result.text_content.strip(),\n                            )\n                        )\n\n                    batch.append(\n                        Document(\n                            id=f\"{self.bucket_type}:{self.bucket_name}:{key}\",\n                            sections=(\n                                sections\n                                if sections\n                                else [TextSection(link=link, text=\"\")]\n                            ),\n                            source=source_type,\n                            semantic_identifier=file_display_name,\n                            doc_updated_at=time_updated,\n                            metadata=custom_tags,\n                            primary_owners=primary_owners,\n                            secondary_owners=secondary_owners,\n                        )\n                    )\n                    if len(batch) == self.batch_size:\n                        yield batch\n                        batch = []\n\n                except Exception:\n                    logger.exception(f\"Error decoding object {key} as UTF-8\")\n        if batch:\n            yield batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        logger.debug(\"Loading blob objects\")\n        return self._yield_blob_objects(\n            start=datetime(1970, 1, 1, tzinfo=timezone.utc),\n            end=datetime.now(timezone.utc),\n        )\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if self.s3_client is None:\n            raise ConnectorMissingCredentialError(\"Blob storage\")\n\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        for batch in self._yield_blob_objects(start_datetime, end_datetime):\n            yield batch\n\n        return None\n\n    def validate_connector_settings(self) -> None:\n        if self.s3_client is None:\n            raise ConnectorMissingCredentialError(\n                \"Blob storage credentials not loaded.\"\n            )\n\n        if not self.bucket_name:\n            raise ConnectorValidationError(\n                \"No bucket name was provided in connector settings.\"\n            )\n\n        try:\n            # We only fetch one object/page as a light-weight validation step.\n            # This ensures we trigger typical S3 permission checks (ListObjectsV2, etc.).\n            self.s3_client.list_objects_v2(\n                Bucket=self.bucket_name, Prefix=self.prefix, MaxKeys=1\n            )\n\n        except NoCredentialsError:\n            raise ConnectorMissingCredentialError(\n                \"No valid blob storage credentials found or provided to boto3.\"\n            )\n        except PartialCredentialsError:\n            raise ConnectorMissingCredentialError(\n                \"Partial or incomplete blob storage credentials provided to boto3.\"\n            )\n        except ClientError as e:\n            error_code = e.response[\"Error\"].get(\"Code\", \"\")\n            status_code = e.response[\"ResponseMetadata\"].get(\"HTTPStatusCode\")\n\n            # Most common S3 error cases\n            if error_code in [\n                \"AccessDenied\",\n                \"InvalidAccessKeyId\",\n                \"SignatureDoesNotMatch\",\n            ]:\n                if status_code == 403 or error_code == \"AccessDenied\":\n                    raise InsufficientPermissionsError(\n                        f\"Insufficient permissions to list objects in bucket '{self.bucket_name}'. \"\n                        \"Please check your bucket policy and/or IAM policy.\"\n                    )\n                if status_code == 401 or error_code == \"SignatureDoesNotMatch\":\n                    raise CredentialExpiredError(\n                        \"Provided blob storage credentials appear invalid or expired.\"\n                    )\n\n                raise CredentialExpiredError(\n                    f\"Credential issue encountered ({error_code}).\"\n                )\n\n            if error_code == \"NoSuchBucket\" or status_code == 404:\n                raise ConnectorValidationError(\n                    f\"Bucket '{self.bucket_name}' does not exist or cannot be found.\"\n                )\n\n            raise ConnectorValidationError(\n                f\"Unexpected S3 client error (code={error_code}, status={status_code}): {e}\"\n            )\n\n        except Exception as e:\n            # Catch-all for anything not captured by the above\n            # Since we are unsure of the error and it may not disable the connector,\n            #  raise an unexpected error (does not disable connector)\n            raise UnexpectedValidationError(\n                f\"Unexpected error during blob storage settings validation: {e}\"\n            )\n\n\nif __name__ == \"__main__\":\n    credentials_dict = {\n        \"aws_access_key_id\": os.environ.get(\"AWS_ACCESS_KEY_ID\"),\n        \"aws_secret_access_key\": os.environ.get(\"AWS_SECRET_ACCESS_KEY\"),\n    }\n\n    # Initialize the connector\n    connector = BlobStorageConnector(\n        bucket_type=os.environ.get(\"BUCKET_TYPE\") or \"s3\",\n        bucket_name=os.environ.get(\"BUCKET_NAME\") or \"test\",\n        prefix=\"\",\n    )\n\n    try:\n        connector.load_credentials(credentials_dict)\n        document_batch_generator = connector.load_from_state()\n        for document_batch in document_batch_generator:\n            print(\"First batch of documents:\")\n            for doc in document_batch:\n                if isinstance(doc, HierarchyNode):\n                    print(\"hierarchynode:\", doc.display_name)\n                    continue\n\n                print(f\"Document ID: {doc.id}\")\n                print(f\"Semantic Identifier: {doc.semantic_identifier}\")\n                print(f\"Source: {doc.source}\")\n                print(f\"Updated At: {doc.doc_updated_at}\")\n                print(\"Sections:\")\n                for section in doc.sections:\n                    print(f\"  - Link: {section.link}\")\n                    if isinstance(section, TextSection) and section.text is not None:\n                        print(f\"  - Text: {section.text[:100]}...\")\n                    elif hasattr(section, \"image_file_id\") and section.image_file_id:\n                        print(f\"  - Image: {section.image_file_id}\")\n                    else:\n                        print(\"Error: Unknown section type\")\n                print(\"---\")\n            break\n\n    except ConnectorMissingCredentialError as e:\n        print(f\"Error: {e}\")\n    except Exception as e:\n        print(f\"An unexpected error occurred: {e}\")\n"
  },
  {
    "path": "backend/onyx/connectors/bookstack/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/bookstack/client.py",
    "content": "from typing import Any\n\nimport requests\n\n\nclass BookStackClientRequestFailedError(ConnectionError):\n    def __init__(self, status: int, error: str) -> None:\n        self.status_code = status\n        self.error = error\n        super().__init__(\n            \"BookStack Client request failed with status {status}: {error}\".format(\n                status=status, error=error\n            )\n        )\n\n\nclass BookStackApiClient:\n    def __init__(\n        self,\n        base_url: str,\n        token_id: str,\n        token_secret: str,\n    ) -> None:\n        self.base_url = base_url\n        self.token_id = token_id\n        self.token_secret = token_secret\n\n    def get(self, endpoint: str, params: dict[str, str]) -> dict[str, Any]:\n        url: str = self._build_url(endpoint)\n        headers = self._build_headers()\n        response = requests.get(url, headers=headers, params=params)\n\n        try:\n            json = response.json()\n        except Exception:\n            json = {}\n\n        if response.status_code >= 300:\n            error = response.reason\n            response_error = json.get(\"error\", {}).get(\"message\", \"\")\n            if response_error:\n                error = response_error\n            raise BookStackClientRequestFailedError(response.status_code, error)\n\n        return json\n\n    def _build_headers(self) -> dict[str, str]:\n        auth = \"Token \" + self.token_id + \":\" + self.token_secret\n        return {\n            \"Authorization\": auth,\n            \"Accept\": \"application/json\",\n        }\n\n    def _build_url(self, endpoint: str) -> str:\n        return self.base_url.rstrip(\"/\") + \"/api/\" + endpoint.lstrip(\"/\")\n\n    def build_app_url(self, endpoint: str) -> str:\n        return self.base_url.rstrip(\"/\") + \"/\" + endpoint.lstrip(\"/\")\n"
  },
  {
    "path": "backend/onyx/connectors/bookstack/connector.py",
    "content": "import html\nimport time\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom typing import Any\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.bookstack.client import BookStackApiClient\nfrom onyx.connectors.bookstack.client import BookStackClientRequestFailedError\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\n\n\nclass BookstackConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.batch_size = batch_size\n        self.bookstack_client: BookStackApiClient | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.bookstack_client = BookStackApiClient(\n            base_url=credentials[\"bookstack_base_url\"],\n            token_id=credentials[\"bookstack_api_token_id\"],\n            token_secret=credentials[\"bookstack_api_token_secret\"],\n        )\n        return None\n\n    @staticmethod\n    def _get_doc_batch(\n        batch_size: int,\n        bookstack_client: BookStackApiClient,\n        endpoint: str,\n        transformer: Callable[[BookStackApiClient, dict], Document],\n        start_ind: int,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> tuple[list[Document | HierarchyNode], int]:\n        params = {\n            \"count\": str(batch_size),\n            \"offset\": str(start_ind),\n            \"sort\": \"+id\",\n        }\n\n        if start:\n            params[\"filter[updated_at:gte]\"] = datetime.utcfromtimestamp(\n                start\n            ).strftime(\"%Y-%m-%d\")\n\n        if end:\n            params[\"filter[updated_at:lte]\"] = datetime.utcfromtimestamp(end).strftime(\n                \"%Y-%m-%d\"\n            )\n\n        batch = bookstack_client.get(endpoint, params=params).get(\"data\", [])\n        doc_batch: list[Document | HierarchyNode] = [\n            transformer(bookstack_client, item) for item in batch\n        ]\n\n        return doc_batch, len(batch)\n\n    @staticmethod\n    def _book_to_document(\n        bookstack_client: BookStackApiClient, book: dict[str, Any]\n    ) -> Document:\n        url = bookstack_client.build_app_url(\"/books/\" + str(book.get(\"slug\")))\n        title = str(book.get(\"name\", \"\"))\n        text = book.get(\"name\", \"\") + \"\\n\" + book.get(\"description\", \"\")\n        updated_at_str = (\n            str(book.get(\"updated_at\")) if book.get(\"updated_at\") is not None else None\n        )\n        return Document(\n            id=\"book__\" + str(book.get(\"id\")),\n            sections=[TextSection(link=url, text=text)],\n            source=DocumentSource.BOOKSTACK,\n            semantic_identifier=\"Book: \" + title,\n            title=title,\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"book\"},\n        )\n\n    @staticmethod\n    def _chapter_to_document(\n        bookstack_client: BookStackApiClient, chapter: dict[str, Any]\n    ) -> Document:\n        url = bookstack_client.build_app_url(\n            \"/books/\"\n            + str(chapter.get(\"book_slug\"))\n            + \"/chapter/\"\n            + str(chapter.get(\"slug\"))\n        )\n        title = str(chapter.get(\"name\", \"\"))\n        text = chapter.get(\"name\", \"\") + \"\\n\" + chapter.get(\"description\", \"\")\n        updated_at_str = (\n            str(chapter.get(\"updated_at\"))\n            if chapter.get(\"updated_at\") is not None\n            else None\n        )\n        return Document(\n            id=\"chapter__\" + str(chapter.get(\"id\")),\n            sections=[TextSection(link=url, text=text)],\n            source=DocumentSource.BOOKSTACK,\n            semantic_identifier=\"Chapter: \" + title,\n            title=title,\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"chapter\"},\n        )\n\n    @staticmethod\n    def _shelf_to_document(\n        bookstack_client: BookStackApiClient, shelf: dict[str, Any]\n    ) -> Document:\n        url = bookstack_client.build_app_url(\"/shelves/\" + str(shelf.get(\"slug\")))\n        title = str(shelf.get(\"name\", \"\"))\n        text = shelf.get(\"name\", \"\") + \"\\n\" + shelf.get(\"description\", \"\")\n        updated_at_str = (\n            str(shelf.get(\"updated_at\"))\n            if shelf.get(\"updated_at\") is not None\n            else None\n        )\n        return Document(\n            id=\"shelf:\" + str(shelf.get(\"id\")),\n            sections=[TextSection(link=url, text=text)],\n            source=DocumentSource.BOOKSTACK,\n            semantic_identifier=\"Shelf: \" + title,\n            title=title,\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"shelf\"},\n        )\n\n    @staticmethod\n    def _page_to_document(\n        bookstack_client: BookStackApiClient, page: dict[str, Any]\n    ) -> Document:\n        page_id = str(page.get(\"id\"))\n        title = str(page.get(\"name\", \"\"))\n        page_data = bookstack_client.get(\"/pages/\" + page_id, {})\n        url = bookstack_client.build_app_url(\n            \"/books/\"\n            + str(page.get(\"book_slug\"))\n            + \"/page/\"\n            + str(page_data.get(\"slug\"))\n        )\n        page_html = \"<h1>\" + html.escape(title) + \"</h1>\" + str(page_data.get(\"html\"))\n        text = parse_html_page_basic(page_html)\n        updated_at_str = (\n            str(page_data.get(\"updated_at\"))\n            if page_data.get(\"updated_at\") is not None\n            else None\n        )\n        time.sleep(0.1)\n        return Document(\n            id=\"page:\" + page_id,\n            sections=[TextSection(link=url, text=text)],\n            source=DocumentSource.BOOKSTACK,\n            semantic_identifier=\"Page: \" + str(title),\n            title=str(title),\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"page\"},\n        )\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        if self.bookstack_client is None:\n            raise ConnectorMissingCredentialError(\"Bookstack\")\n\n        return self.poll_source(None, None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n    ) -> GenerateDocumentsOutput:\n        if self.bookstack_client is None:\n            raise ConnectorMissingCredentialError(\"Bookstack\")\n\n        transform_by_endpoint: dict[\n            str, Callable[[BookStackApiClient, dict], Document]\n        ] = {\n            \"/books\": self._book_to_document,\n            \"/chapters\": self._chapter_to_document,\n            \"/shelves\": self._shelf_to_document,\n            \"/pages\": self._page_to_document,\n        }\n\n        for endpoint, transform in transform_by_endpoint.items():\n            start_ind = 0\n            while True:\n                doc_batch, num_results = self._get_doc_batch(\n                    batch_size=self.batch_size,\n                    bookstack_client=self.bookstack_client,\n                    endpoint=endpoint,\n                    transformer=transform,\n                    start_ind=start_ind,\n                    start=start,\n                    end=end,\n                )\n                start_ind += num_results\n                if doc_batch:\n                    yield doc_batch\n\n                if num_results < self.batch_size:\n                    break\n                else:\n                    time.sleep(0.2)\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Validate that the BookStack credentials and connector settings are correct.\n        Specifically checks that we can make an authenticated request to BookStack.\n        \"\"\"\n        if not self.bookstack_client:\n            raise ConnectorMissingCredentialError(\n                \"BookStack credentials have not been loaded.\"\n            )\n\n        try:\n            # Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials\n            _ = self.bookstack_client.get(\n                \"/books\", params={\"count\": \"1\", \"offset\": \"0\"}\n            )\n\n        except BookStackClientRequestFailedError as e:\n            # Check for HTTP status codes\n            if e.status_code == 401:\n                raise CredentialExpiredError(\n                    \"Your BookStack credentials appear to be invalid or expired (HTTP 401).\"\n                ) from e\n            elif e.status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"The configured BookStack token does not have sufficient permissions (HTTP 403).\"\n                ) from e\n            else:\n                raise ConnectorValidationError(\n                    f\"Unexpected BookStack error (status={e.status_code}): {e}\"\n                ) from e\n\n        except Exception as exc:\n            raise ConnectorValidationError(\n                f\"Unexpected error while validating BookStack connector settings: {exc}\"\n            ) from exc\n"
  },
  {
    "path": "backend/onyx/connectors/canvas/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/canvas/access.py",
    "content": "\"\"\"\nPermissioning / AccessControl logic for Canvas courses.\n\nCE stub — returns None (no permissions). The EE implementation is loaded\nat runtime via ``fetch_versioned_implementation``.\n\"\"\"\n\nfrom collections.abc import Callable\nfrom typing import cast\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.canvas.client import CanvasApiClient\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\ndef get_course_permissions(\n    canvas_client: CanvasApiClient,\n    course_id: int,\n) -> ExternalAccess | None:\n    if not global_version.is_ee_version():\n        return None\n\n    ee_get_course_permissions = cast(\n        Callable[[CanvasApiClient, int], ExternalAccess | None],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.canvas.access\",\n            \"get_course_permissions\",\n        ),\n    )\n\n    return ee_get_course_permissions(canvas_client, course_id)\n"
  },
  {
    "path": "backend/onyx/connectors/canvas/client.py",
    "content": "from __future__ import annotations\n\nimport logging\nimport re\nfrom collections.abc import Iterator\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rl_requests,\n)\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\nlogger = logging.getLogger(__name__)\n\n# Requests timeout in seconds.\n_CANVAS_CALL_TIMEOUT: int = 30\n_CANVAS_API_VERSION: str = \"/api/v1\"\n# Matches the \"next\" URL in a Canvas Link header, e.g.:\n#   <https://canvas.example.com/api/v1/courses?page=2>; rel=\"next\"\n# Captures the URL inside the angle brackets.\n_NEXT_LINK_PATTERN: re.Pattern[str] = re.compile(r'<([^>]+)>;\\s*rel=\"next\"')\n\n\n_STATUS_TO_ERROR_CODE: dict[int, OnyxErrorCode] = {\n    401: OnyxErrorCode.CREDENTIAL_EXPIRED,\n    403: OnyxErrorCode.INSUFFICIENT_PERMISSIONS,\n    404: OnyxErrorCode.BAD_GATEWAY,\n    429: OnyxErrorCode.RATE_LIMITED,\n}\n\n\ndef _error_code_for_status(status_code: int) -> OnyxErrorCode:\n    \"\"\"Map an HTTP status code to the appropriate OnyxErrorCode.\n\n    Expects a >= 400 status code. Known codes (401, 403, 404, 429) are\n    mapped to specific error codes; all other codes (unrecognised 4xx\n    and 5xx) map to BAD_GATEWAY as unexpected upstream errors.\n    \"\"\"\n    if status_code in _STATUS_TO_ERROR_CODE:\n        return _STATUS_TO_ERROR_CODE[status_code]\n    return OnyxErrorCode.BAD_GATEWAY\n\n\nclass CanvasApiClient:\n    def __init__(\n        self,\n        bearer_token: str,\n        canvas_base_url: str,\n    ) -> None:\n        parsed_base = urlparse(canvas_base_url)\n        if not parsed_base.hostname:\n            raise ValueError(\"canvas_base_url must include a valid host\")\n        if parsed_base.scheme != \"https\":\n            raise ValueError(\"canvas_base_url must use https\")\n\n        self._bearer_token = bearer_token\n        self.base_url = (\n            canvas_base_url.rstrip(\"/\").removesuffix(_CANVAS_API_VERSION)\n            + _CANVAS_API_VERSION\n        )\n        # Hostname is already validated above; reuse parsed_base instead\n        # of re-parsing.  Used by _parse_next_link to validate pagination URLs.\n        self._expected_host: str = parsed_base.hostname\n\n    def get(\n        self,\n        endpoint: str = \"\",\n        params: dict[str, Any] | None = None,\n        full_url: str | None = None,\n    ) -> tuple[Any, str | None]:\n        \"\"\"Make a GET request to the Canvas API.\n\n        Returns a tuple of (json_body, next_url).\n        next_url is parsed from the Link header and is None if there are no more pages.\n        If full_url is provided, it is used directly (for following pagination links).\n\n        Security note: full_url must only be set to values returned by\n        ``_parse_next_link``, which validates the host against the configured\n        Canvas base URL.  Passing an arbitrary URL would leak the bearer token.\n        \"\"\"\n        # full_url is used when following pagination (Canvas returns the\n        # next-page URL in the Link header).  For the first request we build\n        # the URL from the endpoint name instead.\n        url = full_url if full_url else self._build_url(endpoint)\n        headers = self._build_headers()\n\n        response = rl_requests.get(\n            url,\n            headers=headers,\n            params=params if not full_url else None,\n            timeout=_CANVAS_CALL_TIMEOUT,\n        )\n\n        try:\n            response_json = response.json()\n        except ValueError as e:\n            if response.status_code < 300:\n                raise OnyxError(\n                    OnyxErrorCode.BAD_GATEWAY,\n                    detail=f\"Invalid JSON in Canvas response: {e}\",\n                )\n            logger.warning(\n                \"Failed to parse JSON from Canvas error response (status=%d): %s\",\n                response.status_code,\n                e,\n            )\n            response_json = {}\n\n        if response.status_code >= 400:\n            # Try to extract the most specific error message from the\n            # Canvas response body.  Canvas uses three different shapes\n            # depending on the endpoint and error type:\n            default_error: str = response.reason or f\"HTTP {response.status_code}\"\n            error = default_error\n            if isinstance(response_json, dict):\n                # Shape 1: {\"error\": {\"message\": \"Not authorized\"}}\n                error_field = response_json.get(\"error\")\n                if isinstance(error_field, dict):\n                    response_error = error_field.get(\"message\", \"\")\n                    if response_error:\n                        error = response_error\n                # Shape 2: {\"error\": \"Invalid access token\"}\n                elif isinstance(error_field, str):\n                    error = error_field\n                # Shape 3: {\"errors\": [{\"message\": \"...\"}]}\n                # Used for validation errors.  Only use as fallback if\n                # we didn't already find a more specific message above.\n                if error == default_error:\n                    errors_list = response_json.get(\"errors\")\n                    if isinstance(errors_list, list) and errors_list:\n                        first_error = errors_list[0]\n                        if isinstance(first_error, dict):\n                            msg = first_error.get(\"message\", \"\")\n                            if msg:\n                                error = msg\n            raise OnyxError(\n                _error_code_for_status(response.status_code),\n                detail=error,\n                status_code_override=response.status_code,\n            )\n\n        next_url = self._parse_next_link(response.headers.get(\"Link\", \"\"))\n        return response_json, next_url\n\n    def _parse_next_link(self, link_header: str) -> str | None:\n        \"\"\"Extract the 'next' URL from a Canvas Link header.\n\n        Only returns URLs whose host matches the configured Canvas base URL\n        to prevent leaking the bearer token to arbitrary hosts.\n        \"\"\"\n        expected_host = self._expected_host\n        for match in _NEXT_LINK_PATTERN.finditer(link_header):\n            url = match.group(1)\n            parsed_url = urlparse(url)\n            if parsed_url.hostname != expected_host:\n                raise OnyxError(\n                    OnyxErrorCode.BAD_GATEWAY,\n                    detail=(\n                        \"Canvas pagination returned an unexpected host \"\n                        f\"({parsed_url.hostname}); expected {expected_host}\"\n                    ),\n                )\n            if parsed_url.scheme != \"https\":\n                raise OnyxError(\n                    OnyxErrorCode.BAD_GATEWAY,\n                    detail=(\n                        \"Canvas pagination link must use https, \"\n                        f\"got {parsed_url.scheme!r}\"\n                    ),\n                )\n            return url\n        return None\n\n    def _build_headers(self) -> dict[str, str]:\n        \"\"\"Return the Authorization header with the bearer token.\"\"\"\n        return {\"Authorization\": f\"Bearer {self._bearer_token}\"}\n\n    def _build_url(self, endpoint: str) -> str:\n        \"\"\"Build a full Canvas API URL from an endpoint path.\n\n        Assumes endpoint is non-empty (e.g. ``\"courses\"``, ``\"announcements\"``).\n        Only called on a first request, endpoint must be set for first request.\n        Verify endpoint exists in case of future changes where endpoint might be optional.\n        Leading slashes are stripped to avoid double-slash in the result.\n        self.base_url is already normalized with no trailing slash.\n        \"\"\"\n        final_url = self.base_url\n        clean_endpoint = endpoint.lstrip(\"/\")\n        if clean_endpoint:\n            final_url += \"/\" + clean_endpoint\n        return final_url\n\n    def paginate(\n        self,\n        endpoint: str,\n        params: dict[str, Any] | None = None,\n    ) -> Iterator[list[Any]]:\n        \"\"\"Yield each page of results, following Link-header pagination.\n\n        Makes the first request with endpoint + params, then follows\n        next_url from Link headers for subsequent pages.\n        \"\"\"\n        response, next_url = self.get(endpoint, params=params)\n        while True:\n            if not response:\n                break\n            yield response\n            if not next_url:\n                break\n            response, next_url = self.get(full_url=next_url)\n"
  },
  {
    "path": "backend/onyx/connectors/canvas/connector.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import Literal\nfrom typing import NoReturn\nfrom typing import TypeAlias\n\nfrom pydantic import BaseModel\nfrom retry import retry\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.canvas.access import get_course_permissions\nfrom onyx.connectors.canvas.client import CanvasApiClient\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _handle_canvas_api_error(e: OnyxError) -> NoReturn:\n    \"\"\"Map Canvas API errors to connector framework exceptions.\"\"\"\n    if e.status_code == 401:\n        raise CredentialExpiredError(\n            \"Canvas API token is invalid or expired (HTTP 401).\"\n        )\n    elif e.status_code == 403:\n        raise InsufficientPermissionsError(\n            \"Canvas API token does not have sufficient permissions (HTTP 403).\"\n        )\n    elif e.status_code == 429:\n        raise ConnectorValidationError(\n            \"Canvas rate-limit exceeded (HTTP 429). Please try again later.\"\n        )\n    elif e.status_code >= 500:\n        raise UnexpectedValidationError(\n            f\"Unexpected Canvas HTTP error (status={e.status_code}): {e}\"\n        )\n    else:\n        raise ConnectorValidationError(\n            f\"Canvas API error (status={e.status_code}): {e}\"\n        )\n\n\nclass CanvasCourse(BaseModel):\n    id: int\n    name: str | None = None\n    course_code: str | None = None\n    created_at: str | None = None\n    workflow_state: str | None = None\n\n    @classmethod\n    def from_api(cls, payload: dict[str, Any]) -> \"CanvasCourse\":\n        return cls(\n            id=payload[\"id\"],\n            name=payload.get(\"name\"),\n            course_code=payload.get(\"course_code\"),\n            created_at=payload.get(\"created_at\"),\n            workflow_state=payload.get(\"workflow_state\"),\n        )\n\n\nclass CanvasPage(BaseModel):\n    page_id: int\n    url: str\n    title: str\n    body: str | None = None\n    created_at: str | None = None\n    updated_at: str | None = None\n    course_id: int\n\n    @classmethod\n    def from_api(cls, payload: dict[str, Any], course_id: int) -> \"CanvasPage\":\n        return cls(\n            page_id=payload[\"page_id\"],\n            url=payload[\"url\"],\n            title=payload[\"title\"],\n            body=payload.get(\"body\"),\n            created_at=payload.get(\"created_at\"),\n            updated_at=payload.get(\"updated_at\"),\n            course_id=course_id,\n        )\n\n\nclass CanvasAssignment(BaseModel):\n    id: int\n    name: str\n    description: str | None = None\n    html_url: str\n    course_id: int\n    created_at: str | None = None\n    updated_at: str | None = None\n    due_at: str | None = None\n\n    @classmethod\n    def from_api(cls, payload: dict[str, Any], course_id: int) -> \"CanvasAssignment\":\n        return cls(\n            id=payload[\"id\"],\n            name=payload[\"name\"],\n            description=payload.get(\"description\"),\n            html_url=payload[\"html_url\"],\n            course_id=course_id,\n            created_at=payload.get(\"created_at\"),\n            updated_at=payload.get(\"updated_at\"),\n            due_at=payload.get(\"due_at\"),\n        )\n\n\nclass CanvasAnnouncement(BaseModel):\n    id: int\n    title: str\n    message: str | None = None\n    html_url: str\n    posted_at: str | None = None\n    course_id: int\n\n    @classmethod\n    def from_api(cls, payload: dict[str, Any], course_id: int) -> \"CanvasAnnouncement\":\n        return cls(\n            id=payload[\"id\"],\n            title=payload[\"title\"],\n            message=payload.get(\"message\"),\n            html_url=payload[\"html_url\"],\n            posted_at=payload.get(\"posted_at\"),\n            course_id=course_id,\n        )\n\n\nCanvasStage: TypeAlias = Literal[\"pages\", \"assignments\", \"announcements\"]\n\n\nclass CanvasConnectorCheckpoint(ConnectorCheckpoint):\n    \"\"\"Checkpoint state for resumable Canvas indexing.\n\n    Fields:\n        course_ids: Materialized list of course IDs to process.\n        current_course_index: Index into course_ids for current course.\n        stage: Which item type we're processing for the current course.\n        next_url: Pagination cursor within the current stage. None means\n            start from the first page; a URL means resume from that page.\n\n    Invariant:\n        If current_course_index is incremented, stage must be reset to\n        \"pages\" and next_url must be reset to None.\n    \"\"\"\n\n    course_ids: list[int] = []\n    current_course_index: int = 0\n    stage: CanvasStage = \"pages\"\n    next_url: str | None = None\n\n    def advance_course(self) -> None:\n        \"\"\"Move to the next course and reset within-course state.\"\"\"\n        self.current_course_index += 1\n        self.stage = \"pages\"\n        self.next_url = None\n\n\nclass CanvasConnector(\n    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],\n    SlimConnectorWithPermSync,\n):\n    def __init__(\n        self,\n        canvas_base_url: str,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.canvas_base_url = canvas_base_url.rstrip(\"/\").removesuffix(\"/api/v1\")\n        self.batch_size = batch_size\n        self._canvas_client: CanvasApiClient | None = None\n        self._course_permissions_cache: dict[int, ExternalAccess | None] = {}\n\n    @property\n    def canvas_client(self) -> CanvasApiClient:\n        if self._canvas_client is None:\n            raise ConnectorMissingCredentialError(\"Canvas\")\n        return self._canvas_client\n\n    def _get_course_permissions(self, course_id: int) -> ExternalAccess | None:\n        \"\"\"Get course permissions with caching.\"\"\"\n        if course_id not in self._course_permissions_cache:\n            self._course_permissions_cache[course_id] = get_course_permissions(\n                canvas_client=self.canvas_client,\n                course_id=course_id,\n            )\n        return self._course_permissions_cache[course_id]\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_courses(self) -> list[CanvasCourse]:\n        \"\"\"Fetch all courses accessible to the authenticated user.\"\"\"\n        logger.debug(\"Fetching Canvas courses\")\n\n        courses: list[CanvasCourse] = []\n        for page in self.canvas_client.paginate(\n            \"courses\", params={\"per_page\": \"100\", \"state[]\": \"available\"}\n        ):\n            courses.extend(CanvasCourse.from_api(c) for c in page)\n        return courses\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_pages(self, course_id: int) -> list[CanvasPage]:\n        \"\"\"Fetch all pages for a given course.\"\"\"\n        logger.debug(f\"Fetching pages for course {course_id}\")\n\n        pages: list[CanvasPage] = []\n        for page in self.canvas_client.paginate(\n            f\"courses/{course_id}/pages\",\n            params={\"per_page\": \"100\", \"include[]\": \"body\", \"published\": \"true\"},\n        ):\n            pages.extend(CanvasPage.from_api(p, course_id=course_id) for p in page)\n        return pages\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_assignments(self, course_id: int) -> list[CanvasAssignment]:\n        \"\"\"Fetch all assignments for a given course.\"\"\"\n        logger.debug(f\"Fetching assignments for course {course_id}\")\n\n        assignments: list[CanvasAssignment] = []\n        for page in self.canvas_client.paginate(\n            f\"courses/{course_id}/assignments\",\n            params={\"per_page\": \"100\", \"published\": \"true\"},\n        ):\n            assignments.extend(\n                CanvasAssignment.from_api(a, course_id=course_id) for a in page\n            )\n        return assignments\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_announcements(self, course_id: int) -> list[CanvasAnnouncement]:\n        \"\"\"Fetch all announcements for a given course.\"\"\"\n        logger.debug(f\"Fetching announcements for course {course_id}\")\n\n        announcements: list[CanvasAnnouncement] = []\n        for page in self.canvas_client.paginate(\n            \"announcements\",\n            params={\n                \"per_page\": \"100\",\n                \"context_codes[]\": f\"course_{course_id}\",\n                \"active_only\": \"true\",\n            },\n        ):\n            announcements.extend(\n                CanvasAnnouncement.from_api(a, course_id=course_id) for a in page\n            )\n        return announcements\n\n    def _build_document(\n        self,\n        doc_id: str,\n        link: str,\n        text: str,\n        semantic_identifier: str,\n        doc_updated_at: datetime | None,\n        course_id: int,\n        doc_type: str,\n    ) -> Document:\n        \"\"\"Build a Document with standard Canvas fields.\"\"\"\n        return Document(\n            id=doc_id,\n            sections=cast(\n                list[TextSection | ImageSection],\n                [TextSection(link=link, text=text)],\n            ),\n            source=DocumentSource.CANVAS,\n            semantic_identifier=semantic_identifier,\n            doc_updated_at=doc_updated_at,\n            metadata={\"course_id\": str(course_id), \"type\": doc_type},\n        )\n\n    def _convert_page_to_document(self, page: CanvasPage) -> Document:\n        \"\"\"Convert a Canvas page to a Document.\"\"\"\n        link = f\"{self.canvas_base_url}/courses/{page.course_id}/pages/{page.url}\"\n\n        text_parts = [page.title]\n        body_text = parse_html_page_basic(page.body) if page.body else \"\"\n        if body_text:\n            text_parts.append(body_text)\n\n        doc_updated_at = (\n            datetime.fromisoformat(page.updated_at.replace(\"Z\", \"+00:00\")).astimezone(\n                timezone.utc\n            )\n            if page.updated_at\n            else None\n        )\n\n        document = self._build_document(\n            doc_id=f\"canvas-page-{page.course_id}-{page.page_id}\",\n            link=link,\n            text=\"\\n\\n\".join(text_parts),\n            semantic_identifier=page.title or f\"Page {page.page_id}\",\n            doc_updated_at=doc_updated_at,\n            course_id=page.course_id,\n            doc_type=\"page\",\n        )\n        return document\n\n    def _convert_assignment_to_document(self, assignment: CanvasAssignment) -> Document:\n        \"\"\"Convert a Canvas assignment to a Document.\"\"\"\n        text_parts = [assignment.name]\n        desc_text = (\n            parse_html_page_basic(assignment.description)\n            if assignment.description\n            else \"\"\n        )\n        if desc_text:\n            text_parts.append(desc_text)\n        if assignment.due_at:\n            due_dt = datetime.fromisoformat(\n                assignment.due_at.replace(\"Z\", \"+00:00\")\n            ).astimezone(timezone.utc)\n            text_parts.append(f\"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}\")\n\n        doc_updated_at = (\n            datetime.fromisoformat(\n                assignment.updated_at.replace(\"Z\", \"+00:00\")\n            ).astimezone(timezone.utc)\n            if assignment.updated_at\n            else None\n        )\n\n        document = self._build_document(\n            doc_id=f\"canvas-assignment-{assignment.course_id}-{assignment.id}\",\n            link=assignment.html_url,\n            text=\"\\n\\n\".join(text_parts),\n            semantic_identifier=assignment.name or f\"Assignment {assignment.id}\",\n            doc_updated_at=doc_updated_at,\n            course_id=assignment.course_id,\n            doc_type=\"assignment\",\n        )\n        return document\n\n    def _convert_announcement_to_document(\n        self, announcement: CanvasAnnouncement\n    ) -> Document:\n        \"\"\"Convert a Canvas announcement to a Document.\"\"\"\n        text_parts = [announcement.title]\n        msg_text = (\n            parse_html_page_basic(announcement.message) if announcement.message else \"\"\n        )\n        if msg_text:\n            text_parts.append(msg_text)\n\n        doc_updated_at = (\n            datetime.fromisoformat(\n                announcement.posted_at.replace(\"Z\", \"+00:00\")\n            ).astimezone(timezone.utc)\n            if announcement.posted_at\n            else None\n        )\n\n        document = self._build_document(\n            doc_id=f\"canvas-announcement-{announcement.course_id}-{announcement.id}\",\n            link=announcement.html_url,\n            text=\"\\n\\n\".join(text_parts),\n            semantic_identifier=announcement.title or f\"Announcement {announcement.id}\",\n            doc_updated_at=doc_updated_at,\n            course_id=announcement.course_id,\n            doc_type=\"announcement\",\n        )\n        return document\n\n    @override\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"Load and validate Canvas credentials.\"\"\"\n        access_token = credentials.get(\"canvas_access_token\")\n        if not access_token:\n            raise ConnectorMissingCredentialError(\"Canvas\")\n\n        try:\n            client = CanvasApiClient(\n                bearer_token=access_token,\n                canvas_base_url=self.canvas_base_url,\n            )\n            client.get(\"courses\", params={\"per_page\": \"1\"})\n        except ValueError as e:\n            raise ConnectorValidationError(f\"Invalid Canvas base URL: {e}\")\n        except OnyxError as e:\n            _handle_canvas_api_error(e)\n\n        self._canvas_client = client\n        return None\n\n    @override\n    def validate_connector_settings(self) -> None:\n        \"\"\"Validate Canvas connector settings by testing API access.\"\"\"\n        try:\n            self.canvas_client.get(\"courses\", params={\"per_page\": \"1\"})\n            logger.info(\"Canvas connector settings validated successfully\")\n        except OnyxError as e:\n            _handle_canvas_api_error(e)\n        except ConnectorMissingCredentialError:\n            raise\n        except Exception as exc:\n            raise UnexpectedValidationError(\n                f\"Unexpected error during Canvas settings validation: {exc}\"\n            )\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: CanvasConnectorCheckpoint,\n    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:\n        # TODO(benwu408): implemented in PR3 (checkpoint)\n        raise NotImplementedError\n\n    @override\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: CanvasConnectorCheckpoint,\n    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:\n        # TODO(benwu408): implemented in PR3 (checkpoint)\n        raise NotImplementedError\n\n    @override\n    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:\n        # TODO(benwu408): implemented in PR3 (checkpoint)\n        raise NotImplementedError\n\n    @override\n    def validate_checkpoint_json(\n        self, checkpoint_json: str\n    ) -> CanvasConnectorCheckpoint:\n        # TODO(benwu408): implemented in PR3 (checkpoint)\n        raise NotImplementedError\n\n    @override\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        # TODO(benwu408): implemented in PR4 (perm sync)\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/connectors/clickup/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/clickup/connector.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import Optional\n\nimport requests\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.retry_wrapper import retry_builder\n\n\nCLICKUP_API_BASE_URL = \"https://api.clickup.com/api/v2\"\n\n\nclass ClickupConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        api_token: str | None = None,\n        team_id: str | None = None,\n        connector_type: str | None = None,\n        connector_ids: list[str] | None = None,\n        retrieve_task_comments: bool = True,\n    ) -> None:\n        self.batch_size = batch_size\n        self.api_token = api_token\n        self.team_id = team_id\n        self.connector_type = connector_type if connector_type else \"workspace\"\n        self.connector_ids = connector_ids\n        self.retrieve_task_comments = retrieve_task_comments\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.api_token = credentials[\"clickup_api_token\"]\n        self.team_id = credentials[\"clickup_team_id\"]\n        return None\n\n    @retry_builder()\n    @rate_limit_builder(max_calls=100, period=60)\n    def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:\n        if not self.api_token:\n            raise ConnectorMissingCredentialError(\"Clickup\")\n\n        headers = {\"Authorization\": self.api_token}\n\n        response = requests.get(\n            f\"{CLICKUP_API_BASE_URL}/{endpoint}\", headers=headers, params=params\n        )\n\n        response.raise_for_status()\n\n        return response.json()\n\n    def _get_task_comments(self, task_id: str) -> list[TextSection]:\n        url_endpoint = f\"/task/{task_id}/comment\"\n        response = self._make_request(url_endpoint)\n        comments = [\n            TextSection(\n                link=f\"https://app.clickup.com/t/{task_id}?comment={comment_dict['id']}\",\n                text=comment_dict[\"comment_text\"],\n            )\n            for comment_dict in response[\"comments\"]\n        ]\n\n        return comments\n\n    def _get_all_tasks_filtered(\n        self,\n        start: int | None = None,\n        end: int | None = None,\n    ) -> GenerateDocumentsOutput:\n        doc_batch: list[Document | HierarchyNode] = []\n        page: int = 0\n        params = {\n            \"include_markdown_description\": \"true\",\n            \"include_closed\": \"true\",\n            \"page\": page,\n        }\n\n        if start is not None:\n            params[\"date_updated_gt\"] = start\n        if end is not None:\n            params[\"date_updated_lt\"] = end\n\n        if self.connector_type == \"list\":\n            params[\"list_ids[]\"] = self.connector_ids\n        elif self.connector_type == \"folder\":\n            params[\"project_ids[]\"] = self.connector_ids\n        elif self.connector_type == \"space\":\n            params[\"space_ids[]\"] = self.connector_ids\n\n        url_endpoint = f\"/team/{self.team_id}/task\"\n\n        while True:\n            response = self._make_request(url_endpoint, params)\n\n            page += 1\n            params[\"page\"] = page\n\n            for task in response[\"tasks\"]:\n                document = Document(\n                    id=task[\"id\"],\n                    source=DocumentSource.CLICKUP,\n                    semantic_identifier=task[\"name\"],\n                    doc_updated_at=(\n                        datetime.fromtimestamp(\n                            round(float(task[\"date_updated\"]) / 1000, 3)\n                        ).replace(tzinfo=timezone.utc)\n                    ),\n                    primary_owners=[\n                        BasicExpertInfo(\n                            display_name=task[\"creator\"][\"username\"],\n                            email=task[\"creator\"][\"email\"],\n                        )\n                    ],\n                    secondary_owners=[\n                        BasicExpertInfo(\n                            display_name=assignee[\"username\"],\n                            email=assignee[\"email\"],\n                        )\n                        for assignee in task[\"assignees\"]\n                    ],\n                    title=task[\"name\"],\n                    sections=[\n                        TextSection(\n                            link=task[\"url\"],\n                            text=(\n                                task[\"markdown_description\"]\n                                if \"markdown_description\" in task\n                                else task[\"description\"]\n                            ),\n                        )\n                    ],\n                    metadata={\n                        \"id\": task[\"id\"],\n                        \"status\": task[\"status\"][\"status\"],\n                        \"list\": task[\"list\"][\"name\"],\n                        \"project\": task[\"project\"][\"name\"],\n                        \"folder\": task[\"folder\"][\"name\"],\n                        \"space_id\": task[\"space\"][\"id\"],\n                        \"tags\": [tag[\"name\"] for tag in task[\"tags\"]],\n                        \"priority\": (\n                            task[\"priority\"][\"priority\"]\n                            if \"priority\" in task and task[\"priority\"] is not None\n                            else \"\"\n                        ),\n                    },\n                )\n\n                extra_fields = [\n                    \"date_created\",\n                    \"date_updated\",\n                    \"date_closed\",\n                    \"date_done\",\n                    \"due_date\",\n                ]\n                for extra_field in extra_fields:\n                    if extra_field in task and task[extra_field] is not None:\n                        document.metadata[extra_field] = task[extra_field]\n\n                if self.retrieve_task_comments:\n                    document.sections.extend(self._get_task_comments(task[\"id\"]))\n\n                doc_batch.append(document)\n\n                if len(doc_batch) >= self.batch_size:\n                    yield doc_batch\n                    doc_batch = []\n\n            if response.get(\"last_page\") is True or len(response[\"tasks\"]) < 100:\n                break\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        if self.api_token is None:\n            raise ConnectorMissingCredentialError(\"Clickup\")\n\n        return self._get_all_tasks_filtered(None, None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if self.api_token is None:\n            raise ConnectorMissingCredentialError(\"Clickup\")\n\n        return self._get_all_tasks_filtered(int(start * 1000), int(end * 1000))\n\n\nif __name__ == \"__main__\":\n    import os\n\n    clickup_connector = ClickupConnector()\n\n    clickup_connector.load_credentials(\n        {\n            \"clickup_api_token\": os.environ[\"clickup_api_token\"],\n            \"clickup_team_id\": os.environ[\"clickup_team_id\"],\n        }\n    )\n\n    latest_docs = clickup_connector.load_from_state()\n\n    for doc in latest_docs:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/coda/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/coda/connector.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\nfrom typing import List\nfrom typing import Optional\n\nfrom pydantic import BaseModel\nfrom retry import retry\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rl_requests,\n)\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.batching import batch_generator\nfrom onyx.utils.logger import setup_logger\n\n_CODA_CALL_TIMEOUT = 30\n_CODA_BASE_URL = \"https://coda.io/apis/v1\"\n\nlogger = setup_logger()\n\n\nclass CodaClientRequestFailedError(ConnectionError):\n    def __init__(self, message: str, status_code: int):\n        super().__init__(\n            f\"Coda API request failed with status {status_code}: {message}\"\n        )\n        self.status_code = status_code\n\n\nclass CodaDoc(BaseModel):\n    id: str\n    browser_link: str\n    name: str\n    created_at: str\n    updated_at: str\n    workspace_id: str\n    workspace_name: str\n    folder_id: str | None\n    folder_name: str | None\n\n\nclass CodaPage(BaseModel):\n    id: str\n    browser_link: str\n    name: str\n    content_type: str\n    created_at: str\n    updated_at: str\n    doc_id: str\n\n\nclass CodaTable(BaseModel):\n    id: str\n    name: str\n    browser_link: str\n    created_at: str\n    updated_at: str\n    doc_id: str\n\n\nclass CodaRow(BaseModel):\n    id: str\n    name: Optional[str] = None\n    index: Optional[int] = None\n    browser_link: str\n    created_at: str\n    updated_at: str\n    values: Dict[str, Any]\n    table_id: str\n    doc_id: str\n\n\nclass CodaApiClient:\n    def __init__(\n        self,\n        bearer_token: str,\n    ) -> None:\n        self.bearer_token = bearer_token\n        self.base_url = os.environ.get(\"CODA_BASE_URL\", _CODA_BASE_URL)\n\n    def get(\n        self, endpoint: str, params: Optional[dict[str, str]] = None\n    ) -> dict[str, Any]:\n        url = self._build_url(endpoint)\n        headers = self._build_headers()\n\n        response = rl_requests.get(\n            url, headers=headers, params=params, timeout=_CODA_CALL_TIMEOUT\n        )\n\n        try:\n            json = response.json()\n        except Exception:\n            json = {}\n\n        if response.status_code >= 300:\n            error = response.reason\n            response_error = json.get(\"error\", {}).get(\"message\", \"\")\n            if response_error:\n                error = response_error\n            raise CodaClientRequestFailedError(error, response.status_code)\n\n        return json\n\n    def _build_headers(self) -> Dict[str, str]:\n        return {\"Authorization\": f\"Bearer {self.bearer_token}\"}\n\n    def _build_url(self, endpoint: str) -> str:\n        return self.base_url.rstrip(\"/\") + \"/\" + endpoint.lstrip(\"/\")\n\n\nclass CodaConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        index_page_content: bool = True,\n        workspace_id: str | None = None,\n    ) -> None:\n        self.batch_size = batch_size\n        self.index_page_content = index_page_content\n        self.workspace_id = workspace_id\n        self._coda_client: CodaApiClient | None = None\n\n    @property\n    def coda_client(self) -> CodaApiClient:\n        if self._coda_client is None:\n            raise ConnectorMissingCredentialError(\"Coda\")\n        return self._coda_client\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _get_doc(self, doc_id: str) -> CodaDoc:\n        \"\"\"Fetch a specific Coda document by its ID.\"\"\"\n        logger.debug(f\"Fetching Coda doc with ID: {doc_id}\")\n        try:\n            response = self.coda_client.get(f\"docs/{doc_id}\")\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 404:\n                raise ConnectorValidationError(f\"Failed to fetch doc: {doc_id}\") from e\n            else:\n                raise\n\n        return CodaDoc(\n            id=response[\"id\"],\n            browser_link=response[\"browserLink\"],\n            name=response[\"name\"],\n            created_at=response[\"createdAt\"],\n            updated_at=response[\"updatedAt\"],\n            workspace_id=response[\"workspace\"][\"id\"],\n            workspace_name=response[\"workspace\"][\"name\"],\n            folder_id=response[\"folder\"][\"id\"] if response.get(\"folder\") else None,\n            folder_name=response[\"folder\"][\"name\"] if response.get(\"folder\") else None,\n        )\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _get_page(self, doc_id: str, page_id: str) -> CodaPage:\n        \"\"\"Fetch a specific page from a Coda document.\"\"\"\n        logger.debug(f\"Fetching Coda page with ID: {page_id}\")\n        try:\n            response = self.coda_client.get(f\"docs/{doc_id}/pages/{page_id}\")\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 404:\n                raise ConnectorValidationError(\n                    f\"Failed to fetch page: {page_id} from doc: {doc_id}\"\n                ) from e\n            else:\n                raise\n\n        return CodaPage(\n            id=response[\"id\"],\n            doc_id=doc_id,\n            browser_link=response[\"browserLink\"],\n            name=response[\"name\"],\n            content_type=response[\"contentType\"],\n            created_at=response[\"createdAt\"],\n            updated_at=response[\"updatedAt\"],\n        )\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _get_table(self, doc_id: str, table_id: str) -> CodaTable:\n        \"\"\"Fetch a specific table from a Coda document.\"\"\"\n        logger.debug(f\"Fetching Coda table with ID: {table_id}\")\n        try:\n            response = self.coda_client.get(f\"docs/{doc_id}/tables/{table_id}\")\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 404:\n                raise ConnectorValidationError(\n                    f\"Failed to fetch table: {table_id} from doc: {doc_id}\"\n                ) from e\n            else:\n                raise\n\n        return CodaTable(\n            id=response[\"id\"],\n            name=response[\"name\"],\n            browser_link=response[\"browserLink\"],\n            created_at=response[\"createdAt\"],\n            updated_at=response[\"updatedAt\"],\n            doc_id=doc_id,\n        )\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _get_row(self, doc_id: str, table_id: str, row_id: str) -> CodaRow:\n        \"\"\"Fetch a specific row from a Coda table.\"\"\"\n        logger.debug(f\"Fetching Coda row with ID: {row_id}\")\n        try:\n            response = self.coda_client.get(\n                f\"docs/{doc_id}/tables/{table_id}/rows/{row_id}\"\n            )\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 404:\n                raise ConnectorValidationError(\n                    f\"Failed to fetch row: {row_id} from table: {table_id} in doc: {doc_id}\"\n                ) from e\n            else:\n                raise\n\n        values = {}\n        for col_name, col_value in response.get(\"values\", {}).items():\n            values[col_name] = col_value\n\n        return CodaRow(\n            id=response[\"id\"],\n            name=response.get(\"name\"),\n            index=response.get(\"index\"),\n            browser_link=response[\"browserLink\"],\n            created_at=response[\"createdAt\"],\n            updated_at=response[\"updatedAt\"],\n            values=values,\n            table_id=table_id,\n            doc_id=doc_id,\n        )\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_all_docs(\n        self, endpoint: str = \"docs\", params: Optional[Dict[str, str]] = None\n    ) -> List[CodaDoc]:\n        \"\"\"List all Coda documents in the workspace.\"\"\"\n        logger.debug(\"Listing documents in Coda\")\n\n        all_docs: List[CodaDoc] = []\n        next_page_token: str | None = None\n        params = params or {}\n\n        if self.workspace_id:\n            params[\"workspaceId\"] = self.workspace_id\n\n        while True:\n            if next_page_token:\n                params[\"pageToken\"] = next_page_token\n\n            try:\n                response = self.coda_client.get(endpoint, params=params)\n            except CodaClientRequestFailedError as e:\n                if e.status_code == 404:\n                    raise ConnectorValidationError(\"Failed to list docs\") from e\n                else:\n                    raise\n\n            items = response.get(\"items\", [])\n\n            for item in items:\n                doc = CodaDoc(\n                    id=item[\"id\"],\n                    browser_link=item[\"browserLink\"],\n                    name=item[\"name\"],\n                    created_at=item[\"createdAt\"],\n                    updated_at=item[\"updatedAt\"],\n                    workspace_id=item[\"workspace\"][\"id\"],\n                    workspace_name=item[\"workspace\"][\"name\"],\n                    folder_id=item[\"folder\"][\"id\"] if item.get(\"folder\") else None,\n                    folder_name=item[\"folder\"][\"name\"] if item.get(\"folder\") else None,\n                )\n                all_docs.append(doc)\n\n            next_page_token = response.get(\"nextPageToken\")\n            if not next_page_token:\n                break\n\n        logger.debug(f\"Found {len(all_docs)} docs\")\n        return all_docs\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_pages_in_doc(self, doc_id: str) -> List[CodaPage]:\n        \"\"\"List all pages in a Coda document.\"\"\"\n        logger.debug(f\"Listing pages in Coda doc with ID: {doc_id}\")\n\n        pages: List[CodaPage] = []\n        endpoint = f\"docs/{doc_id}/pages\"\n        params: Dict[str, str] = {}\n        next_page_token: str | None = None\n\n        while True:\n            if next_page_token:\n                params[\"pageToken\"] = next_page_token\n\n            try:\n                response = self.coda_client.get(endpoint, params=params)\n            except CodaClientRequestFailedError as e:\n                if e.status_code == 404:\n                    raise ConnectorValidationError(\n                        f\"Failed to list pages for doc: {doc_id}\"\n                    ) from e\n                else:\n                    raise\n\n            items = response.get(\"items\", [])\n            for item in items:\n                # can be removed if we don't care to skip hidden pages\n                if item.get(\"isHidden\", False):\n                    continue\n\n                pages.append(\n                    CodaPage(\n                        id=item[\"id\"],\n                        browser_link=item[\"browserLink\"],\n                        name=item[\"name\"],\n                        content_type=item[\"contentType\"],\n                        created_at=item[\"createdAt\"],\n                        updated_at=item[\"updatedAt\"],\n                        doc_id=doc_id,\n                    )\n                )\n\n            next_page_token = response.get(\"nextPageToken\")\n            if not next_page_token:\n                break\n\n        logger.debug(f\"Found {len(pages)} pages in doc {doc_id}\")\n        return pages\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_page_content(self, doc_id: str, page_id: str) -> str:\n        \"\"\"Fetch the content of a Coda page.\"\"\"\n        logger.debug(f\"Fetching content for page {page_id} in doc {doc_id}\")\n\n        content_parts = []\n        next_page_token: str | None = None\n        params: Dict[str, str] = {}\n\n        while True:\n            if next_page_token:\n                params[\"pageToken\"] = next_page_token\n\n            try:\n                response = self.coda_client.get(\n                    f\"docs/{doc_id}/pages/{page_id}/content\", params=params\n                )\n            except CodaClientRequestFailedError as e:\n                if e.status_code == 404:\n                    logger.debug(f\"No content available for page {page_id}\")\n                    return \"\"\n                raise\n\n            items = response.get(\"items\", [])\n\n            for item in items:\n                item_content = item.get(\"itemContent\", {})\n\n                content_text = item_content.get(\"content\", \"\")\n                if content_text:\n                    content_parts.append(content_text)\n\n            next_page_token = response.get(\"nextPageToken\")\n            if not next_page_token:\n                break\n\n        return \"\\n\\n\".join(content_parts)\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_tables(self, doc_id: str) -> List[CodaTable]:\n        \"\"\"List all tables in a Coda document.\"\"\"\n        logger.debug(f\"Listing tables in Coda doc with ID: {doc_id}\")\n\n        tables: List[CodaTable] = []\n        endpoint = f\"docs/{doc_id}/tables\"\n        params: Dict[str, str] = {}\n        next_page_token: str | None = None\n\n        while True:\n            if next_page_token:\n                params[\"pageToken\"] = next_page_token\n\n            try:\n                response = self.coda_client.get(endpoint, params=params)\n            except CodaClientRequestFailedError as e:\n                if e.status_code == 404:\n                    raise ConnectorValidationError(\n                        f\"Failed to list tables for doc: {doc_id}\"\n                    ) from e\n                else:\n                    raise\n\n            items = response.get(\"items\", [])\n            for item in items:\n                tables.append(\n                    CodaTable(\n                        id=item[\"id\"],\n                        browser_link=item[\"browserLink\"],\n                        name=item[\"name\"],\n                        created_at=item[\"createdAt\"],\n                        updated_at=item[\"updatedAt\"],\n                        doc_id=doc_id,\n                    )\n                )\n\n            next_page_token = response.get(\"nextPageToken\")\n            if not next_page_token:\n                break\n\n        logger.debug(f\"Found {len(tables)} tables in doc {doc_id}\")\n        return tables\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _list_rows_and_values(self, doc_id: str, table_id: str) -> List[CodaRow]:\n        \"\"\"List all rows and their values in a table.\"\"\"\n        logger.debug(f\"Listing rows in Coda table: {table_id} in Coda doc: {doc_id}\")\n\n        rows: List[CodaRow] = []\n        endpoint = f\"docs/{doc_id}/tables/{table_id}/rows\"\n        params: Dict[str, str] = {\"valueFormat\": \"rich\"}\n        next_page_token: str | None = None\n\n        while True:\n            if next_page_token:\n                params[\"pageToken\"] = next_page_token\n\n            try:\n                response = self.coda_client.get(endpoint, params=params)\n            except CodaClientRequestFailedError as e:\n                if e.status_code == 404:\n                    raise ConnectorValidationError(\n                        f\"Failed to list rows for table: {table_id} in doc: {doc_id}\"\n                    ) from e\n                else:\n                    raise\n\n            items = response.get(\"items\", [])\n            for item in items:\n                values = {}\n                for col_name, col_value in item.get(\"values\", {}).items():\n                    values[col_name] = col_value\n\n                rows.append(\n                    CodaRow(\n                        id=item[\"id\"],\n                        name=item[\"name\"],\n                        index=item[\"index\"],\n                        browser_link=item[\"browserLink\"],\n                        created_at=item[\"createdAt\"],\n                        updated_at=item[\"updatedAt\"],\n                        values=values,\n                        table_id=table_id,\n                        doc_id=doc_id,\n                    )\n                )\n\n            next_page_token = response.get(\"nextPageToken\")\n            if not next_page_token:\n                break\n\n        logger.debug(f\"Found {len(rows)} rows in table {table_id}\")\n        return rows\n\n    def _convert_page_to_document(self, page: CodaPage, content: str = \"\") -> Document:\n        \"\"\"Convert a page into a Document.\"\"\"\n        page_updated = datetime.fromisoformat(page.updated_at).astimezone(timezone.utc)\n\n        text_parts = [page.name, page.browser_link]\n        if content:\n            text_parts.append(content)\n\n        sections = [TextSection(link=page.browser_link, text=\"\\n\\n\".join(text_parts))]\n\n        return Document(\n            id=f\"coda-page-{page.doc_id}-{page.id}\",\n            sections=cast(list[TextSection | ImageSection], sections),\n            source=DocumentSource.CODA,\n            semantic_identifier=page.name or f\"Page {page.id}\",\n            doc_updated_at=page_updated,\n            metadata={\n                \"browser_link\": page.browser_link,\n                \"doc_id\": page.doc_id,\n                \"content_type\": page.content_type,\n            },\n        )\n\n    def _convert_table_with_rows_to_document(\n        self, table: CodaTable, rows: List[CodaRow]\n    ) -> Document:\n        \"\"\"Convert a table and its rows into a single Document with multiple sections (one per row).\"\"\"\n        table_updated = datetime.fromisoformat(table.updated_at).astimezone(\n            timezone.utc\n        )\n\n        sections: List[TextSection] = []\n        for row in rows:\n            content_text = \" \".join(\n                str(v) if not isinstance(v, list) else \" \".join(map(str, v))\n                for v in row.values.values()\n            )\n\n            row_name = row.name or f\"Row {row.index or row.id}\"\n            text = f\"{row_name}: {content_text}\" if content_text else row_name\n\n            sections.append(TextSection(link=row.browser_link, text=text))\n\n        # If no rows, create a single section for the table itself\n        if not sections:\n            sections = [\n                TextSection(link=table.browser_link, text=f\"Table: {table.name}\")\n            ]\n\n        return Document(\n            id=f\"coda-table-{table.doc_id}-{table.id}\",\n            sections=cast(list[TextSection | ImageSection], sections),\n            source=DocumentSource.CODA,\n            semantic_identifier=table.name or f\"Table {table.id}\",\n            doc_updated_at=table_updated,\n            metadata={\n                \"browser_link\": table.browser_link,\n                \"doc_id\": table.doc_id,\n                \"row_count\": str(len(rows)),\n            },\n        )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"Load and validate Coda credentials.\"\"\"\n        self._coda_client = CodaApiClient(bearer_token=credentials[\"coda_bearer_token\"])\n\n        try:\n            self._coda_client.get(\"docs\", params={\"limit\": \"1\"})\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 401:\n                raise ConnectorMissingCredentialError(\"Invalid Coda API token\")\n            raise\n\n        return None\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"Load all documents from Coda workspace.\"\"\"\n\n        def _iter_documents() -> Generator[Document, None, None]:\n            docs = self._list_all_docs()\n            logger.info(f\"Found {len(docs)} Coda docs to process\")\n\n            for doc in docs:\n                logger.debug(f\"Processing doc: {doc.name} ({doc.id})\")\n\n                try:\n                    pages = self._list_pages_in_doc(doc.id)\n                    for page in pages:\n                        content = \"\"\n                        if self.index_page_content:\n                            try:\n                                content = self._fetch_page_content(doc.id, page.id)\n                            except Exception as e:\n                                logger.warning(\n                                    f\"Failed to fetch content for page {page.id}: {e}\"\n                                )\n                        yield self._convert_page_to_document(page, content)\n                except ConnectorValidationError as e:\n                    logger.warning(f\"Failed to list pages for doc {doc.id}: {e}\")\n\n                try:\n                    tables = self._list_tables(doc.id)\n                    for table in tables:\n                        try:\n                            rows = self._list_rows_and_values(doc.id, table.id)\n                            yield self._convert_table_with_rows_to_document(table, rows)\n                        except ConnectorValidationError as e:\n                            logger.warning(\n                                f\"Failed to list rows for table {table.id}: {e}\"\n                            )\n                            yield self._convert_table_with_rows_to_document(table, [])\n                except ConnectorValidationError as e:\n                    logger.warning(f\"Failed to list tables for doc {doc.id}: {e}\")\n\n        return batch_generator(_iter_documents(), self.batch_size)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        \"\"\"\n        Polls the Coda API for documents updated between start and end timestamps.\n        We refer to page and table update times to determine if they need to be re-indexed.\n        \"\"\"\n\n        def _iter_documents() -> Generator[Document, None, None]:\n            docs = self._list_all_docs()\n            logger.info(\n                f\"Polling {len(docs)} Coda docs for updates between {start} and {end}\"\n            )\n\n            for doc in docs:\n                try:\n                    pages = self._list_pages_in_doc(doc.id)\n                    for page in pages:\n                        page_timestamp = (\n                            datetime.fromisoformat(page.updated_at)\n                            .astimezone(timezone.utc)\n                            .timestamp()\n                        )\n                        if start < page_timestamp <= end:\n                            content = \"\"\n                            if self.index_page_content:\n                                try:\n                                    content = self._fetch_page_content(doc.id, page.id)\n                                except Exception as e:\n                                    logger.warning(\n                                        f\"Failed to fetch content for page {page.id}: {e}\"\n                                    )\n                            yield self._convert_page_to_document(page, content)\n                except ConnectorValidationError as e:\n                    logger.warning(f\"Failed to list pages for doc {doc.id}: {e}\")\n\n                try:\n                    tables = self._list_tables(doc.id)\n                    for table in tables:\n                        table_timestamp = (\n                            datetime.fromisoformat(table.updated_at)\n                            .astimezone(timezone.utc)\n                            .timestamp()\n                        )\n\n                        try:\n                            rows = self._list_rows_and_values(doc.id, table.id)\n\n                            table_or_rows_updated = start < table_timestamp <= end\n                            if not table_or_rows_updated:\n                                for row in rows:\n                                    row_timestamp = (\n                                        datetime.fromisoformat(row.updated_at)\n                                        .astimezone(timezone.utc)\n                                        .timestamp()\n                                    )\n                                    if start < row_timestamp <= end:\n                                        table_or_rows_updated = True\n                                        break\n\n                            if table_or_rows_updated:\n                                yield self._convert_table_with_rows_to_document(\n                                    table, rows\n                                )\n\n                        except ConnectorValidationError as e:\n                            logger.warning(\n                                f\"Failed to list rows for table {table.id}: {e}\"\n                            )\n                            if table_timestamp > start and table_timestamp <= end:\n                                yield self._convert_table_with_rows_to_document(\n                                    table, []\n                                )\n\n                except ConnectorValidationError as e:\n                    logger.warning(f\"Failed to list tables for doc {doc.id}: {e}\")\n\n        return batch_generator(_iter_documents(), self.batch_size)\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"Validates the Coda connector settings calling the 'whoami' endpoint.\"\"\"\n        try:\n            response = self.coda_client.get(\"whoami\")\n            logger.info(\n                f\"Coda connector validated for user: {response.get('name', 'Unknown')}\"\n            )\n\n            if self.workspace_id:\n                params = {\"workspaceId\": self.workspace_id, \"limit\": \"1\"}\n                self.coda_client.get(\"docs\", params=params)\n                logger.info(f\"Validated access to workspace: {self.workspace_id}\")\n\n        except CodaClientRequestFailedError as e:\n            if e.status_code == 401:\n                raise CredentialExpiredError(\n                    \"Coda credential appears to be invalid or expired (HTTP 401).\"\n                )\n            elif e.status_code == 404:\n                raise ConnectorValidationError(\n                    \"Coda workspace not found or not accessible (HTTP 404). \"\n                    \"Please verify the workspace_id is correct and shared with the integration.\"\n                )\n            elif e.status_code == 429:\n                raise ConnectorValidationError(\n                    \"Validation failed due to Coda rate-limits being exceeded (HTTP 429). Please try again later.\"\n                )\n            else:\n                raise UnexpectedValidationError(\n                    f\"Unexpected Coda HTTP error (status={e.status_code}): {e}\"\n                )\n        except Exception as exc:\n            raise UnexpectedValidationError(\n                f\"Unexpected error during Coda settings validation: {exc}\"\n            )\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/confluence/access.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\ndef get_page_restrictions(\n    confluence_client: OnyxConfluence,\n    page_id: str,\n    page_restrictions: dict[str, Any],\n    ancestors: list[dict[str, Any]],\n) -> ExternalAccess | None:\n    \"\"\"\n    Get page access restrictions for a Confluence page.\n    This functionality requires Enterprise Edition.\n\n    Note: This wrapper is only called from permission sync path. Group IDs are\n    left unprefixed here because upsert_document_external_perms handles prefixing.\n\n    Args:\n        confluence_client: OnyxConfluence client instance\n        page_id: The ID of the page\n        page_restrictions: Dictionary containing page restriction data\n        ancestors: List of ancestor pages with their restriction data\n\n    Returns:\n        ExternalAccess object for the page. None if EE is not enabled or no restrictions found.\n    \"\"\"\n    # Check if EE is enabled\n    if not global_version.is_ee_version():\n        return None\n\n    # Fetch the EE implementation\n    ee_get_all_page_restrictions = cast(\n        Callable[\n            [OnyxConfluence, str, dict[str, Any], list[dict[str, Any]], bool],\n            ExternalAccess | None,\n        ],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.confluence.page_access\", \"get_page_restrictions\"\n        ),\n    )\n\n    # add_prefix=False: permission sync path - upsert_document_external_perms handles prefixing\n    return ee_get_all_page_restrictions(\n        confluence_client, page_id, page_restrictions, ancestors, False\n    )\n\n\ndef get_all_space_permissions(\n    confluence_client: OnyxConfluence,\n    is_cloud: bool,\n) -> dict[str, ExternalAccess]:\n    \"\"\"\n    Get access permissions for all spaces in Confluence.\n    This functionality requires Enterprise Edition.\n\n    Note: This wrapper is only called from permission sync path. Group IDs are\n    left unprefixed here because upsert_document_external_perms handles prefixing.\n\n    Args:\n        confluence_client: OnyxConfluence client instance\n        is_cloud: Whether this is a Confluence Cloud instance\n\n    Returns:\n        Dictionary mapping space keys to ExternalAccess objects. Empty dict if EE is not enabled.\n    \"\"\"\n    # Check if EE is enabled\n    if not global_version.is_ee_version():\n        return {}\n\n    # Fetch the EE implementation\n    ee_get_all_space_permissions = cast(\n        Callable[\n            [OnyxConfluence, bool, bool],\n            dict[str, ExternalAccess],\n        ],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.confluence.space_access\",\n            \"get_all_space_permissions\",\n        ),\n    )\n\n    # add_prefix=False: permission sync path - upsert_document_external_perms handles prefixing\n    return ee_get_all_space_permissions(confluence_client, is_cloud, False)\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/connector.py",
    "content": "import copy\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom urllib.parse import quote\n\nfrom atlassian.errors import ApiError  # type: ignore\nfrom requests.exceptions import HTTPError\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP\nfrom onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET\nfrom onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.access import get_all_space_permissions\nfrom onyx.connectors.confluence.access import get_page_restrictions\nfrom onyx.connectors.confluence.onyx_confluence import extract_text_from_confluence_html\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.connectors.confluence.utils import build_confluence_document_id\nfrom onyx.connectors.confluence.utils import convert_attachment_to_content\nfrom onyx.connectors.confluence.utils import datetime_from_string\nfrom onyx.connectors.confluence.utils import update_param_in_path\nfrom onyx.connectors.confluence.utils import validate_attachment_filetype\nfrom onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    is_atlassian_date_error,\n)\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import ConnectorCheckpoint\nfrom onyx.connectors.interfaces import ConnectorFailure\nfrom onyx.connectors.interfaces import CredentialsConnector\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnector\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n# Potential Improvements\n# 1. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost\n_COMMENT_EXPANSION_FIELDS = [\"body.storage.value\"]\n_PAGE_EXPANSION_FIELDS = [\n    \"body.storage.value\",\n    \"version\",\n    \"space\",\n    \"metadata.labels\",\n    \"history.lastUpdated\",\n    \"ancestors\",  # For hierarchy node tracking\n]\n_ATTACHMENT_EXPANSION_FIELDS = [\n    \"version\",\n    \"space\",\n    \"metadata.labels\",\n]\n_RESTRICTIONS_EXPANSION_FIELDS = [\n    \"space\",\n    \"restrictions.read.restrictions.user\",\n    \"restrictions.read.restrictions.group\",\n    \"ancestors.restrictions.read.restrictions.user\",\n    \"ancestors.restrictions.read.restrictions.group\",\n]\n\n_SLIM_DOC_BATCH_SIZE = 5000\n\nONE_HOUR = 3600\nONE_DAY = ONE_HOUR * 24\n\nMAX_CACHED_IDS = 100\n\n\ndef _get_page_id(page: dict[str, Any], allow_missing: bool = False) -> str:\n    if allow_missing and \"id\" not in page:\n        return \"unknown\"\n    return str(page[\"id\"])\n\n\nclass ConfluenceCheckpoint(ConnectorCheckpoint):\n    next_page_url: str | None\n\n\nclass ConfluenceConnector(\n    CheckpointedConnector[ConfluenceCheckpoint],\n    SlimConnector,\n    SlimConnectorWithPermSync,\n    CredentialsConnector,\n):\n    def __init__(\n        self,\n        wiki_base: str,\n        is_cloud: bool,\n        space: str = \"\",\n        page_id: str = \"\",\n        index_recursively: bool = False,\n        cql_query: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,\n        # if a page has one of the labels specified in this list, we will just\n        # skip it. This is generally used to avoid indexing extra sensitive\n        # pages.\n        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,\n        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,\n        scoped_token: bool = False,\n    ) -> None:\n        self.wiki_base = wiki_base\n        self.is_cloud = is_cloud\n        self.space = space\n        self.page_id = page_id\n        self.index_recursively = index_recursively\n        self.cql_query = cql_query\n        self.batch_size = batch_size\n        self.labels_to_skip = labels_to_skip\n        self.timezone_offset = timezone_offset\n        self.scoped_token = scoped_token\n        self._confluence_client: OnyxConfluence | None = None\n        self._low_timeout_confluence_client: OnyxConfluence | None = None\n        self._fetched_titles: set[str] = set()\n        self.allow_images = False\n\n        # Track hierarchy nodes we've already yielded to avoid duplicates\n        self.seen_hierarchy_node_raw_ids: set[str] = set()\n\n        # Remove trailing slash from wiki_base if present\n        self.wiki_base = wiki_base.rstrip(\"/\")\n        \"\"\"\n        If nothing is provided, we default to fetching all pages\n        Only one or none of the following options should be specified so\n            the order shouldn't matter\n        However, we use elif to ensure that only of the following is enforced\n        \"\"\"\n        base_cql_page_query = \"type=page\"\n        if cql_query:\n            base_cql_page_query = cql_query\n        elif page_id:\n            if index_recursively:\n                base_cql_page_query += f\" and (ancestor='{page_id}' or id='{page_id}')\"\n            else:\n                base_cql_page_query += f\" and id='{page_id}'\"\n        elif space:\n            uri_safe_space = quote(space)\n            base_cql_page_query += f\" and space='{uri_safe_space}'\"\n\n        self.base_cql_page_query = base_cql_page_query\n\n        self.cql_label_filter = \"\"\n        if labels_to_skip:\n            labels_to_skip = list(set(labels_to_skip))\n            comma_separated_labels = \",\".join(\n                f\"'{quote(label)}'\" for label in labels_to_skip\n            )\n            self.cql_label_filter = f\" and label not in ({comma_separated_labels})\"\n\n        self.timezone: timezone = timezone(offset=timedelta(hours=timezone_offset))\n        self.credentials_provider: CredentialsProviderInterface | None = None\n\n        self.probe_kwargs = {\n            \"max_backoff_retries\": 6,\n            \"max_backoff_seconds\": 10,\n        }\n\n        self.final_kwargs = {\n            \"max_backoff_retries\": 10,\n            \"max_backoff_seconds\": 60,\n        }\n\n        # deprecated\n        self.continue_on_failure = continue_on_failure\n\n    def set_allow_images(self, value: bool) -> None:\n        logger.info(f\"Setting allow_images to {value}.\")\n        self.allow_images = value\n\n    def _yield_space_hierarchy_nodes(\n        self,\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield hierarchy nodes for all spaces we're indexing.\"\"\"\n        space_keys = [self.space] if self.space else None\n\n        for space in self.confluence_client.retrieve_confluence_spaces(\n            space_keys=space_keys,\n            limit=50,\n        ):\n            space_key = space.get(\"key\")\n            if not space_key or space_key in self.seen_hierarchy_node_raw_ids:\n                continue\n\n            self.seen_hierarchy_node_raw_ids.add(space_key)\n\n            # Build space link\n            space_link = f\"{self.wiki_base}/spaces/{space_key}\"\n\n            yield HierarchyNode(\n                raw_node_id=space_key,\n                raw_parent_id=None,  # Parent is SOURCE\n                display_name=space.get(\"name\", space_key),\n                link=space_link,\n                node_type=HierarchyNodeType.SPACE,\n            )\n\n    def _yield_ancestor_hierarchy_nodes(\n        self,\n        page: dict[str, Any],\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield hierarchy nodes for all unseen ancestors of this page.\n\n        Any page that appears as an ancestor of another page IS a hierarchy node\n        (it has at least one child - the page we're currently processing).\n\n        This ensures parent nodes are always yielded before child documents.\n\n        Note: raw_node_id for page hierarchy nodes uses the page URL (same as document.id)\n        to enable document<->hierarchy node linking in the indexing pipeline.\n        Space hierarchy nodes use the space key since they don't have documents.\n        \"\"\"\n        ancestors = page.get(\"ancestors\", [])\n        space_key = page.get(\"space\", {}).get(\"key\")\n\n        # Ensure space is yielded first (if not already)\n        if space_key and space_key not in self.seen_hierarchy_node_raw_ids:\n            self.seen_hierarchy_node_raw_ids.add(space_key)\n            space = page.get(\"space\", {})\n            yield HierarchyNode(\n                raw_node_id=space_key,\n                raw_parent_id=None,  # Parent is SOURCE\n                display_name=space.get(\"name\", space_key),\n                link=f\"{self.wiki_base}/spaces/{space_key}\",\n                node_type=HierarchyNodeType.SPACE,\n            )\n\n        # Walk through ancestors (root to immediate parent)\n        # Build a list of (ancestor_url, ancestor_data) pairs first\n        ancestor_urls: list[str | None] = []\n        for ancestor in ancestors:\n            if \"_links\" in ancestor and \"webui\" in ancestor[\"_links\"]:\n                ancestor_urls.append(\n                    build_confluence_document_id(\n                        self.wiki_base, ancestor[\"_links\"][\"webui\"], self.is_cloud\n                    )\n                )\n            else:\n                ancestor_urls.append(None)\n\n        for i, ancestor in enumerate(ancestors):\n            ancestor_url = ancestor_urls[i]\n            if not ancestor_url:\n                # Can't build URL for this ancestor, skip it\n                continue\n\n            if ancestor_url in self.seen_hierarchy_node_raw_ids:\n                continue\n\n            self.seen_hierarchy_node_raw_ids.add(ancestor_url)\n\n            # Determine parent of this ancestor\n            if i == 0:\n                # First ancestor - parent is the space\n                parent_raw_id = space_key\n            else:\n                # Parent is the previous ancestor (use URL)\n                parent_raw_id = ancestor_urls[i - 1]\n\n            yield HierarchyNode(\n                raw_node_id=ancestor_url,  # Use URL to match document.id\n                raw_parent_id=parent_raw_id,\n                display_name=ancestor.get(\"title\", f\"Page {ancestor.get('id')}\"),\n                link=ancestor_url,\n                node_type=HierarchyNodeType.PAGE,\n            )\n\n    def _get_parent_hierarchy_raw_id(self, page: dict[str, Any]) -> str | None:\n        \"\"\"Get the raw hierarchy node ID of this page's parent.\n\n        Returns:\n            - Parent page URL if page has a parent page (last item in ancestors)\n            - Space key if page is at top level of space\n            - None if we can't determine\n\n        Note: For pages, we return URLs (to match document.id and hierarchy node raw_node_id).\n        For spaces, we return the space key (spaces don't have documents).\n        \"\"\"\n        ancestors = page.get(\"ancestors\", [])\n        if ancestors:\n            # Last ancestor is the immediate parent page - use URL\n            parent = ancestors[-1]\n            if \"_links\" in parent and \"webui\" in parent[\"_links\"]:\n                return build_confluence_document_id(\n                    self.wiki_base, parent[\"_links\"][\"webui\"], self.is_cloud\n                )\n            # Fallback to page ID if URL not available (shouldn't happen normally)\n            return str(parent.get(\"id\"))\n\n        # Top-level page - parent is the space (use space key)\n        return page.get(\"space\", {}).get(\"key\")\n\n    def _maybe_yield_page_hierarchy_node(\n        self, page: dict[str, Any]\n    ) -> HierarchyNode | None:\n        \"\"\"Yield a hierarchy node for this page if not already yielded.\n\n        Used when a page has attachments - attachments are children of the page\n        in the hierarchy, so the page must be a hierarchy node.\n\n        Note: raw_node_id uses the page URL (same as document.id) to enable\n        document<->hierarchy node linking in the indexing pipeline.\n        \"\"\"\n        # Build page URL - we use this as raw_node_id to match document.id\n        if \"_links\" not in page or \"webui\" not in page[\"_links\"]:\n            return None  # Can't build URL, skip\n\n        page_url = build_confluence_document_id(\n            self.wiki_base, page[\"_links\"][\"webui\"], self.is_cloud\n        )\n\n        if page_url in self.seen_hierarchy_node_raw_ids:\n            return None\n\n        self.seen_hierarchy_node_raw_ids.add(page_url)\n\n        # Get parent hierarchy ID\n        parent_raw_id = self._get_parent_hierarchy_raw_id(page)\n\n        return HierarchyNode(\n            raw_node_id=page_url,  # Use URL to match document.id\n            raw_parent_id=parent_raw_id,\n            display_name=page.get(\"title\", f\"Page {_get_page_id(page)}\"),\n            link=page_url,\n            node_type=HierarchyNodeType.PAGE,\n        )\n\n    @property\n    def confluence_client(self) -> OnyxConfluence:\n        if self._confluence_client is None:\n            raise ConnectorMissingCredentialError(\"Confluence\")\n        return self._confluence_client\n\n    @property\n    def low_timeout_confluence_client(self) -> OnyxConfluence:\n        if self._low_timeout_confluence_client is None:\n            raise ConnectorMissingCredentialError(\"Confluence\")\n        return self._low_timeout_confluence_client\n\n    def set_credentials_provider(\n        self, credentials_provider: CredentialsProviderInterface\n    ) -> None:\n        self.credentials_provider = credentials_provider\n\n        # raises exception if there's a problem\n        confluence_client = OnyxConfluence(\n            is_cloud=self.is_cloud,\n            url=self.wiki_base,\n            credentials_provider=credentials_provider,\n            scoped_token=self.scoped_token,\n        )\n        confluence_client._probe_connection(**self.probe_kwargs)\n        confluence_client._initialize_connection(**self.final_kwargs)\n\n        self._confluence_client = confluence_client\n\n        # create a low timeout confluence client for sync flows\n        low_timeout_confluence_client = OnyxConfluence(\n            is_cloud=self.is_cloud,\n            url=self.wiki_base,\n            credentials_provider=credentials_provider,\n            timeout=3,\n            scoped_token=self.scoped_token,\n        )\n        low_timeout_confluence_client._probe_connection(**self.probe_kwargs)\n        low_timeout_confluence_client._initialize_connection(**self.final_kwargs)\n\n        self._low_timeout_confluence_client = low_timeout_confluence_client\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        raise NotImplementedError(\"Use set_credentials_provider with this connector.\")\n\n    def _construct_page_cql_query(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> str:\n        \"\"\"\n        Constructs a CQL query for use in the confluence API. See\n        https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/\n        for more information. This is JUST the CQL, not the full URL used to hit the API.\n        Use _build_page_retrieval_url to get the full URL.\n        \"\"\"\n        page_query = self.base_cql_page_query + self.cql_label_filter\n        # Add time filters\n        if start:\n            formatted_start_time = datetime.fromtimestamp(\n                start, tz=self.timezone\n            ).strftime(\"%Y-%m-%d %H:%M\")\n            page_query += f\" and lastmodified >= '{formatted_start_time}'\"\n        if end:\n            formatted_end_time = datetime.fromtimestamp(end, tz=self.timezone).strftime(\n                \"%Y-%m-%d %H:%M\"\n            )\n            page_query += f\" and lastmodified <= '{formatted_end_time}'\"\n\n        page_query += \" order by lastmodified asc\"\n        return page_query\n\n    def _construct_attachment_query(\n        self,\n        confluence_page_id: str,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> str:\n        attachment_query = f\"type=attachment and container='{confluence_page_id}'\"\n        attachment_query += self.cql_label_filter\n        # Add time filters to avoid reprocessing unchanged attachments during refresh\n        if start:\n            formatted_start_time = datetime.fromtimestamp(\n                start, tz=self.timezone\n            ).strftime(\"%Y-%m-%d %H:%M\")\n            attachment_query += f\" and lastmodified >= '{formatted_start_time}'\"\n        if end:\n            formatted_end_time = datetime.fromtimestamp(end, tz=self.timezone).strftime(\n                \"%Y-%m-%d %H:%M\"\n            )\n            attachment_query += f\" and lastmodified <= '{formatted_end_time}'\"\n        attachment_query += \" order by lastmodified asc\"\n        return attachment_query\n\n    def _get_comment_string_for_page_id(self, page_id: str) -> str:\n        comment_string = \"\"\n        comment_cql = f\"type=comment and container='{page_id}'\"\n        comment_cql += self.cql_label_filter\n        expand = \",\".join(_COMMENT_EXPANSION_FIELDS)\n\n        for comment in self.confluence_client.paginated_cql_retrieval(\n            cql=comment_cql,\n            expand=expand,\n        ):\n            comment_string += \"\\nComment:\\n\"\n            comment_string += extract_text_from_confluence_html(\n                confluence_client=self.confluence_client,\n                confluence_object=comment,\n                fetched_titles=set(),\n            )\n        return comment_string\n\n    def _convert_page_to_document(\n        self, page: dict[str, Any]\n    ) -> Document | ConnectorFailure:\n        \"\"\"\n        Converts a Confluence page to a Document object.\n        Includes the page content, comments, and attachments.\n        \"\"\"\n        page_id = page_url = \"\"\n        try:\n            # Extract basic page information\n            page_id = _get_page_id(page)\n            page_title = page[\"title\"]\n            logger.info(f\"Converting page {page_title} to document\")\n            page_url = build_confluence_document_id(\n                self.wiki_base, page[\"_links\"][\"webui\"], self.is_cloud\n            )\n\n            # Get the page content\n            page_content = extract_text_from_confluence_html(\n                self.confluence_client, page, self._fetched_titles\n            )\n\n            # Create the main section for the page content\n            sections: list[TextSection | ImageSection] = [\n                TextSection(text=page_content, link=page_url)\n            ]\n\n            # Process comments if available\n            comment_text = self._get_comment_string_for_page_id(page_id)\n            if comment_text:\n                sections.append(\n                    TextSection(text=comment_text, link=f\"{page_url}#comments\")\n                )\n            # Note: attachments are no longer merged into the page document.\n            # They are indexed as separate documents downstream.\n\n            # Extract metadata\n            metadata = {}\n            if \"space\" in page:\n                metadata[\"space\"] = page[\"space\"].get(\"name\", \"\")\n\n            # Extract labels\n            labels = []\n            if \"metadata\" in page and \"labels\" in page[\"metadata\"]:\n                for label in page[\"metadata\"][\"labels\"].get(\"results\", []):\n                    labels.append(label.get(\"name\", \"\"))\n            if labels:\n                metadata[\"labels\"] = labels\n\n            # Extract owners\n            primary_owners = []\n            if \"version\" in page and \"by\" in page[\"version\"]:\n                author = page[\"version\"][\"by\"]\n                display_name = author.get(\"displayName\", \"Unknown\")\n                email = author.get(\"email\", \"unknown@domain.invalid\")\n                primary_owners.append(\n                    BasicExpertInfo(display_name=display_name, email=email)\n                )\n\n            # Determine parent hierarchy node\n            parent_hierarchy_raw_node_id = self._get_parent_hierarchy_raw_id(page)\n\n            # Create the document\n            return Document(\n                id=page_url,\n                sections=sections,\n                source=DocumentSource.CONFLUENCE,\n                semantic_identifier=page_title,\n                metadata=metadata,\n                doc_updated_at=datetime_from_string(page[\"version\"][\"when\"]),\n                primary_owners=primary_owners if primary_owners else None,\n                parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n            )\n        except Exception as e:\n            logger.error(f\"Error converting page {page.get('id', 'unknown')}: {e}\")\n            if is_atlassian_date_error(e):  # propagate error to be caught and retried\n                raise\n            return ConnectorFailure(\n                failed_document=DocumentFailure(\n                    document_id=page_id,\n                    document_link=page_url,\n                ),\n                failure_message=f\"Error converting page {page.get('id', 'unknown')}: {e}\",\n                exception=e,\n            )\n\n    def _fetch_page_attachments(\n        self,\n        page: dict[str, Any],\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> tuple[list[Document | HierarchyNode], list[ConnectorFailure]]:\n        \"\"\"\n        Inline attachments are added directly to the document as text or image sections by\n        this function. The returned documents/connectorfailures are for non-inline attachments\n        and those at the end of the page.\n\n        If there are valid attachments, the page itself is yielded as a hierarchy node\n        (since attachments are children of the page in the hierarchy).\n        \"\"\"\n        attachment_query = self._construct_attachment_query(\n            _get_page_id(page), start, end\n        )\n        attachment_failures: list[ConnectorFailure] = []\n        attachment_docs: list[Document | HierarchyNode] = []\n        page_url = \"\"\n        page_hierarchy_node_yielded = False\n\n        try:\n            for attachment in self.confluence_client.paginated_cql_retrieval(\n                cql=attachment_query,\n                expand=\",\".join(_ATTACHMENT_EXPANSION_FIELDS),\n            ):\n                media_type: str = attachment.get(\"metadata\", {}).get(\"mediaType\", \"\")\n\n                # TODO(rkuo): this check is partially redundant with validate_attachment_filetype\n                # and checks in convert_attachment_to_content/process_attachment\n                # but doing the check here avoids an unnecessary download. Due for refactoring.\n                if not self.allow_images:\n                    if media_type.startswith(\"image/\"):\n                        logger.info(\n                            f\"Skipping attachment because allow images is False: {attachment['title']}\"\n                        )\n                        continue\n\n                if not validate_attachment_filetype(\n                    attachment,\n                ):\n                    logger.info(\n                        f\"Skipping attachment because it is not an accepted file type: {attachment['title']}\"\n                    )\n                    continue\n\n                logger.info(\n                    f\"Processing attachment: {attachment['title']} attached to page {page['title']}\"\n                )\n                # Attachment document id: use the download URL for stable identity\n                try:\n                    object_url = build_confluence_document_id(\n                        self.wiki_base, attachment[\"_links\"][\"download\"], self.is_cloud\n                    )\n                except Exception as e:\n                    logger.warning(\n                        f\"Invalid attachment url for id {attachment['id']}, skipping\"\n                    )\n                    logger.debug(f\"Error building attachment url: {e}\")\n                    continue\n                try:\n                    response = convert_attachment_to_content(\n                        confluence_client=self.confluence_client,\n                        attachment=attachment,\n                        page_id=_get_page_id(page),\n                        allow_images=self.allow_images,\n                    )\n                    if response is None:\n                        continue\n\n                    content_text, file_storage_name = response\n\n                    sections: list[TextSection | ImageSection] = []\n                    if content_text:\n                        sections.append(TextSection(text=content_text, link=object_url))\n                    elif file_storage_name:\n                        sections.append(\n                            ImageSection(\n                                link=object_url, image_file_id=file_storage_name\n                            )\n                        )\n\n                    # Build attachment-specific metadata\n                    attachment_metadata: dict[str, str | list[str]] = {}\n                    if \"space\" in attachment:\n                        attachment_metadata[\"space\"] = attachment[\"space\"].get(\n                            \"name\", \"\"\n                        )\n                    labels: list[str] = []\n                    if \"metadata\" in attachment and \"labels\" in attachment[\"metadata\"]:\n                        for label in attachment[\"metadata\"][\"labels\"].get(\n                            \"results\", []\n                        ):\n                            labels.append(label.get(\"name\", \"\"))\n                    if labels:\n                        attachment_metadata[\"labels\"] = labels\n                    page_url = page_url or build_confluence_document_id(\n                        self.wiki_base, page[\"_links\"][\"webui\"], self.is_cloud\n                    )\n                    attachment_metadata[\"parent_page_id\"] = page_url\n                    attachment_id = build_confluence_document_id(\n                        self.wiki_base, attachment[\"_links\"][\"webui\"], self.is_cloud\n                    )\n\n                    primary_owners: list[BasicExpertInfo] | None = None\n                    if \"version\" in attachment and \"by\" in attachment[\"version\"]:\n                        author = attachment[\"version\"][\"by\"]\n                        display_name = author.get(\"displayName\", \"Unknown\")\n                        email = author.get(\"email\", \"unknown@domain.invalid\")\n                        primary_owners = [\n                            BasicExpertInfo(display_name=display_name, email=email)\n                        ]\n\n                    # Attachments have their parent page as the hierarchy parent\n                    # Use page URL to match the hierarchy node's raw_node_id\n                    attachment_parent_hierarchy_raw_id = page_url\n\n                    attachment_doc = Document(\n                        id=attachment_id,\n                        sections=sections,\n                        source=DocumentSource.CONFLUENCE,\n                        semantic_identifier=attachment.get(\"title\", object_url),\n                        metadata=attachment_metadata,\n                        doc_updated_at=(\n                            datetime_from_string(attachment[\"version\"][\"when\"])\n                            if attachment.get(\"version\")\n                            and attachment[\"version\"].get(\"when\")\n                            else None\n                        ),\n                        primary_owners=primary_owners,\n                        parent_hierarchy_raw_node_id=attachment_parent_hierarchy_raw_id,\n                    )\n\n                    # If this is the first valid attachment, yield the page as a\n                    # hierarchy node (attachments are children of the page)\n                    if not page_hierarchy_node_yielded:\n                        page_hierarchy_node = self._maybe_yield_page_hierarchy_node(\n                            page\n                        )\n                        if page_hierarchy_node:\n                            attachment_docs.append(page_hierarchy_node)\n                        page_hierarchy_node_yielded = True\n\n                    attachment_docs.append(attachment_doc)\n                except Exception as e:\n                    logger.error(\n                        f\"Failed to extract/summarize attachment {attachment['title']}\",\n                        exc_info=e,\n                    )\n                    if is_atlassian_date_error(e):\n                        # propagate error to be caught and retried\n                        raise\n                    attachment_failures.append(\n                        ConnectorFailure(\n                            failed_document=DocumentFailure(\n                                document_id=object_url,\n                                document_link=object_url,\n                            ),\n                            failure_message=f\"Failed to extract/summarize attachment {attachment['title']} for doc {object_url}\",\n                            exception=e,\n                        )\n                    )\n        except HTTPError as e:\n            # If we get a 403 after all retries, the user likely doesn't have permission\n            # to access attachments on this page. Log and skip rather than failing the whole job.\n            page_id = _get_page_id(page, allow_missing=True)\n            page_title = page.get(\"title\", \"unknown\")\n            if e.response and e.response.status_code in [401, 403]:\n                failure_message_prefix = (\n                    \"Invalid credentials (401)\"\n                    if e.response.status_code == 401\n                    else \"Permission denied (403)\"\n                )\n                failure_message = (\n                    f\"{failure_message_prefix} when fetching attachments for page '{page_title}' \"\n                    f\"(ID: {page_id}). The user may not have permission to query attachments on this page. \"\n                    \"Skipping attachments for this page.\"\n                )\n                logger.warning(failure_message)\n\n                # Build the page URL for the failure record\n                try:\n                    page_url = build_confluence_document_id(\n                        self.wiki_base, page[\"_links\"][\"webui\"], self.is_cloud\n                    )\n                except Exception:\n                    page_url = f\"page_id:{page_id}\"\n\n                return [], [\n                    ConnectorFailure(\n                        failed_document=DocumentFailure(\n                            document_id=page_id,\n                            document_link=page_url,\n                        ),\n                        failure_message=failure_message,\n                        exception=e,\n                    )\n                ]\n            else:\n                raise\n\n        return attachment_docs, attachment_failures\n\n    def _fetch_document_batches(\n        self,\n        checkpoint: ConfluenceCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> CheckpointOutput[ConfluenceCheckpoint]:\n        \"\"\"\n        Yields batches of Documents and HierarchyNodes. For each page:\n         - Yield hierarchy nodes for spaces and ancestor pages (parent-before-child ordering)\n         - Create a Document with 1 Section for the page text/comments\n         - Then fetch attachments. For each attachment:\n             - Attempt to convert it with convert_attachment_to_content(...)\n             - If successful, create a new Section with the extracted text or summary.\n        \"\"\"\n        checkpoint = copy.deepcopy(checkpoint)\n\n        # Yield space hierarchy nodes FIRST (only once per connector run)\n        if not checkpoint.next_page_url:\n            yield from self._yield_space_hierarchy_nodes()\n\n        # use \"start\" when last_updated is 0 or for confluence server\n        start_ts = start\n        page_query_url = checkpoint.next_page_url or self._build_page_retrieval_url(\n            start_ts, end, self.batch_size\n        )\n        logger.debug(f\"page_query_url: {page_query_url}\")\n\n        # store the next page start for confluence server, cursor for confluence cloud\n        def store_next_page_url(next_page_url: str) -> None:\n            checkpoint.next_page_url = next_page_url\n\n        for page in self.confluence_client.paginated_page_retrieval(\n            cql_url=page_query_url,\n            limit=self.batch_size,\n            next_page_callback=store_next_page_url,\n        ):\n            # Yield hierarchy nodes for all ancestors (parent-before-child ordering)\n            yield from self._yield_ancestor_hierarchy_nodes(page)\n\n            # Build doc from page\n            doc_or_failure = self._convert_page_to_document(page)\n\n            if isinstance(doc_or_failure, ConnectorFailure):\n                yield doc_or_failure\n                continue\n\n            # yield completed document (or failure)\n            yield doc_or_failure\n\n            # Now get attachments for that page:\n            attachment_docs, attachment_failures = self._fetch_page_attachments(\n                page, start, end\n            )\n            # yield attached docs and failures\n            yield from attachment_docs\n            yield from attachment_failures\n\n            # Create checkpoint once a full page of results is returned\n            if checkpoint.next_page_url and checkpoint.next_page_url != page_query_url:\n                return checkpoint\n\n        checkpoint.has_more = False\n        return checkpoint\n\n    def _build_page_retrieval_url(\n        self,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n        limit: int,\n    ) -> str:\n        \"\"\"\n        Builds the full URL used to retrieve pages from the confluence API.\n        This can be used as input to the confluence client's _paginate_url\n        or paginated_page_retrieval methods.\n        \"\"\"\n        page_query = self._construct_page_cql_query(start, end)\n        cql_url = self.confluence_client.build_cql_url(\n            page_query, expand=\",\".join(_PAGE_EXPANSION_FIELDS)\n        )\n        return update_param_in_path(cql_url, \"limit\", str(limit))\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: ConfluenceCheckpoint,\n    ) -> CheckpointOutput[ConfluenceCheckpoint]:\n        end += ONE_DAY  # handle time zone weirdness\n        try:\n            return self._fetch_document_batches(checkpoint, start, end)\n        except Exception as e:\n            if is_atlassian_date_error(e) and start is not None:\n                logger.warning(\n                    \"Confluence says we provided an invalid 'updated' field. This may indicate\"\n                    \"a real issue, but can also appear during edge cases like daylight\"\n                    f\"savings time changes. Retrying with a 1 hour offset. Error: {e}\"\n                )\n                return self._fetch_document_batches(checkpoint, start - ONE_HOUR, end)\n            raise\n\n    @override\n    def build_dummy_checkpoint(self) -> ConfluenceCheckpoint:\n        return ConfluenceCheckpoint(has_more=True, next_page_url=None)\n\n    @override\n    def validate_checkpoint_json(self, checkpoint_json: str) -> ConfluenceCheckpoint:\n        return ConfluenceCheckpoint.model_validate_json(checkpoint_json)\n\n    @override\n    def retrieve_all_slim_docs(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        return self._retrieve_all_slim_docs(\n            start=start,\n            end=end,\n            callback=callback,\n            include_permissions=False,\n        )\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        \"\"\"\n        Return 'slim' docs (IDs + minimal permission data).\n        Does not fetch actual text. Used primarily for incremental permission sync.\n        \"\"\"\n        return self._retrieve_all_slim_docs(\n            start=start,\n            end=end,\n            callback=callback,\n            include_permissions=True,\n        )\n\n    def _retrieve_all_slim_docs(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n        include_permissions: bool = True,\n    ) -> GenerateSlimDocumentOutput:\n        doc_metadata_list: list[SlimDocument | HierarchyNode] = []\n        restrictions_expand = \",\".join(_RESTRICTIONS_EXPANSION_FIELDS)\n\n        space_level_access_info: dict[str, ExternalAccess] = {}\n        if include_permissions:\n            space_level_access_info = get_all_space_permissions(\n                self.confluence_client, self.is_cloud\n            )\n\n        # Yield space hierarchy nodes first\n        for node in self._yield_space_hierarchy_nodes():\n            doc_metadata_list.append(node)\n\n        def get_external_access(\n            doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]\n        ) -> ExternalAccess | None:\n            return get_page_restrictions(\n                self.confluence_client, doc_id, restrictions, ancestors\n            ) or space_level_access_info.get(page_space_key)\n\n        # Query pages (with optional time filtering for indexing_start)\n        page_query = self._construct_page_cql_query(start, end)\n        for page in self.confluence_client.cql_paginate_all_expansions(\n            cql=page_query,\n            expand=restrictions_expand,\n            limit=_SLIM_DOC_BATCH_SIZE,\n        ):\n            # Yield ancestor hierarchy nodes for this page\n            for node in self._yield_ancestor_hierarchy_nodes(page):\n                doc_metadata_list.append(node)\n\n            page_id = _get_page_id(page)\n            page_restrictions = page.get(\"restrictions\") or {}\n            page_space_key = page.get(\"space\", {}).get(\"key\")\n            page_ancestors = page.get(\"ancestors\", [])\n\n            page_id = build_confluence_document_id(\n                self.wiki_base, page[\"_links\"][\"webui\"], self.is_cloud\n            )\n            doc_metadata_list.append(\n                SlimDocument(\n                    id=page_id,\n                    external_access=(\n                        get_external_access(page_id, page_restrictions, page_ancestors)\n                        if include_permissions\n                        else None\n                    ),\n                    parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(\n                        page\n                    ),\n                )\n            )\n\n            # Query attachments for each page\n            page_hierarchy_node_yielded = False\n            attachment_query = self._construct_attachment_query(\n                _get_page_id(page), start, end\n            )\n            for attachment in self.confluence_client.cql_paginate_all_expansions(\n                cql=attachment_query,\n                expand=restrictions_expand,\n                limit=_SLIM_DOC_BATCH_SIZE,\n            ):\n                # If you skip images, you'll skip them in the permission sync\n                attachment[\"metadata\"].get(\"mediaType\", \"\")\n                if not validate_attachment_filetype(\n                    attachment,\n                ):\n                    continue\n\n                # If this page has valid attachments and we haven't yielded it as a\n                # hierarchy node yet, do so now (attachments are children of the page)\n                if not page_hierarchy_node_yielded:\n                    page_node = self._maybe_yield_page_hierarchy_node(page)\n                    if page_node:\n                        doc_metadata_list.append(page_node)\n                    page_hierarchy_node_yielded = True\n\n                attachment_restrictions = attachment.get(\"restrictions\", {})\n                if not attachment_restrictions:\n                    attachment_restrictions = page_restrictions or {}\n\n                attachment_space_key = attachment.get(\"space\", {}).get(\"key\")\n                if not attachment_space_key:\n                    attachment_space_key = page_space_key\n\n                attachment_id = build_confluence_document_id(\n                    self.wiki_base,\n                    attachment[\"_links\"][\"webui\"],\n                    self.is_cloud,\n                )\n                doc_metadata_list.append(\n                    SlimDocument(\n                        id=attachment_id,\n                        external_access=(\n                            get_external_access(\n                                attachment_id, attachment_restrictions, []\n                            )\n                            if include_permissions\n                            else None\n                        ),\n                        parent_hierarchy_raw_node_id=page_id,\n                    )\n                )\n\n            if len(doc_metadata_list) > _SLIM_DOC_BATCH_SIZE:\n                yield doc_metadata_list[:_SLIM_DOC_BATCH_SIZE]\n                doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]\n\n                if callback and callback.should_stop():\n                    raise RuntimeError(\n                        \"retrieve_all_slim_docs_perm_sync: Stop signal detected\"\n                    )\n                if callback:\n                    callback.progress(\"retrieve_all_slim_docs_perm_sync\", 1)\n\n        yield doc_metadata_list\n\n    def validate_connector_settings(self) -> None:\n        try:\n            spaces_iter = self.low_timeout_confluence_client.retrieve_confluence_spaces(\n                limit=1,\n            )\n            first_space = next(spaces_iter, None)\n        except HTTPError as e:\n            status_code = e.response.status_code if e.response else None\n            if status_code == 401:\n                raise CredentialExpiredError(\n                    \"Invalid or expired Confluence credentials (HTTP 401).\"\n                )\n            elif status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"Insufficient permissions to access Confluence resources (HTTP 403).\"\n                )\n            raise UnexpectedValidationError(\n                f\"Unexpected Confluence error (status={status_code}): {e}\"\n            )\n        except Exception as e:\n            raise UnexpectedValidationError(\n                f\"Unexpected error while validating Confluence settings: {e}\"\n            )\n\n        if not first_space:\n            raise ConnectorValidationError(\n                \"No Confluence spaces found. Either your credentials lack permissions, or \"\n                \"there truly are no spaces in this Confluence instance.\"\n            )\n\n        if self.space:\n            try:\n                self.low_timeout_confluence_client.get_space(self.space)\n            except ApiError as e:\n                raise ConnectorValidationError(\n                    \"Invalid Confluence space key provided\"\n                ) from e\n\n\nif __name__ == \"__main__\":\n    import os\n    from onyx.utils.variable_functionality import global_version\n    from tests.daily.connectors.utils import load_all_from_connector\n\n    # For connector permission testing, set EE to true.\n    global_version.set_ee()\n\n    # base url\n    wiki_base = os.environ[\"CONFLUENCE_URL\"]\n\n    # auth stuff\n    username = os.environ[\"CONFLUENCE_USERNAME\"]\n    access_token = os.environ[\"CONFLUENCE_ACCESS_TOKEN\"]\n    is_cloud = os.environ[\"CONFLUENCE_IS_CLOUD\"].lower() == \"true\"\n\n    # space + page\n    space = os.environ[\"CONFLUENCE_SPACE_KEY\"]\n    # page_id = os.environ[\"CONFLUENCE_PAGE_ID\"]\n\n    confluence_connector = ConfluenceConnector(\n        wiki_base=wiki_base,\n        space=space,\n        is_cloud=is_cloud,\n        # page_id=page_id,\n    )\n\n    credentials_provider = OnyxStaticCredentialsProvider(\n        None,\n        DocumentSource.CONFLUENCE,\n        {\n            \"confluence_username\": username,\n            \"confluence_access_token\": access_token,\n        },\n    )\n    confluence_connector.set_credentials_provider(credentials_provider)\n\n    start = 0.0\n    end = datetime.now().timestamp()\n\n    # Fetch all `SlimDocuments`.\n    for slim_doc in confluence_connector.retrieve_all_slim_docs_perm_sync():\n        print(slim_doc)\n\n    # Fetch all `Documents`.\n    for doc in load_all_from_connector(\n        connector=confluence_connector,\n        start=start,\n        end=end,\n    ).documents:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/models.py",
    "content": "from pydantic import BaseModel\n\n\nclass ConfluenceUser(BaseModel):\n    user_id: str  # accountId in Cloud, userKey in Server\n    username: str | None  # Confluence Cloud doesn't give usernames\n    display_name: str\n    # Confluence Data Center doesn't give email back by default,\n    # have to fetch it with a different endpoint\n    email: str | None\n    type: str\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/onyx_confluence.py",
    "content": "\"\"\"\n# README (notes on Confluence pagination):\n\nWe've noticed that the `search/users` and `users/memberof` endpoints for Confluence Cloud use offset-based pagination as\nopposed to cursor-based. We also know that page-retrieval uses cursor-based pagination.\n\nOur default pagination strategy right now for cloud is to assume cursor-based.\nHowever, if you notice that a cloud API is not being properly paginated (i.e., if the `_links.next` is not appearing in the\nreturned payload), then you can force offset-based pagination.\n\n# TODO (@raunakab)\nWe haven't explored all of the cloud APIs' pagination strategies. @raunakab take time to go through this and figure them out.\n\"\"\"\n\nimport json\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\nfrom urllib.parse import quote\n\nimport bs4\nfrom atlassian import Confluence  # type:ignore\nfrom redis import Redis\nfrom requests import HTTPError\n\nfrom onyx.configs.app_configs import CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE\nfrom onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET\nfrom onyx.connectors.confluence.models import ConfluenceUser\nfrom onyx.connectors.confluence.user_profile_override import (\n    process_confluence_user_profiles_override,\n)\nfrom onyx.connectors.confluence.utils import _handle_http_error\nfrom onyx.connectors.confluence.utils import confluence_refresh_tokens\nfrom onyx.connectors.confluence.utils import get_start_param_from_url\nfrom onyx.connectors.confluence.utils import update_param_in_path\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\nfrom onyx.file_processing.html_utils import format_document_soup\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nF = TypeVar(\"F\", bound=Callable[..., Any])\n\n\n# https://jira.atlassian.com/browse/CONFCLOUD-76433\n_PROBLEMATIC_EXPANSIONS = \"body.storage.value\"\n_REPLACEMENT_EXPANSIONS = \"body.view.value\"\n\n_USER_NOT_FOUND = \"Unknown Confluence User\"\n_USER_ID_TO_DISPLAY_NAME_CACHE: dict[str, str | None] = {}\n_USER_EMAIL_CACHE: dict[str, str | None] = {}\n_DEFAULT_PAGINATION_LIMIT = 1000\n\n_CONFLUENCE_SPACES_API_V1 = \"rest/api/space\"\n_CONFLUENCE_SPACES_API_V2 = \"wiki/api/v2/spaces\"\n\n\nclass ConfluenceRateLimitError(Exception):\n    pass\n\n\nclass OnyxConfluence:\n    \"\"\"\n    This is a custom Confluence class that:\n\n    A. overrides the default Confluence class to add a custom CQL method.\n    B.\n    This is necessary because the default Confluence class does not properly support cql expansions.\n    All methods are automatically wrapped with handle_confluence_rate_limit.\n    \"\"\"\n\n    CREDENTIAL_PREFIX = \"connector:confluence:credential\"\n    CREDENTIAL_TTL = 300  # 5 min\n    PROBE_TIMEOUT = 5  # 5 seconds\n\n    def __init__(\n        self,\n        is_cloud: bool,\n        url: str,\n        credentials_provider: CredentialsProviderInterface,\n        timeout: int | None = None,\n        scoped_token: bool = False,\n        # should generally not be passed in, but making it overridable for\n        # easier testing\n        confluence_user_profiles_override: list[dict[str, str]] | None = (\n            CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE\n        ),\n    ) -> None:\n        self.base_url = url  #'/'.join(url.rstrip(\"/\").split(\"/\")[:-1])\n        url = scoped_url(url, \"confluence\") if scoped_token else url\n\n        self._is_cloud = is_cloud\n        self._url = url.rstrip(\"/\")\n        self._credentials_provider = credentials_provider\n        self.scoped_token = scoped_token\n        self.redis_client: Redis | None = None\n        self.static_credentials: dict[str, Any] | None = None\n        if self._credentials_provider.is_dynamic():\n            self.redis_client = get_redis_client(\n                tenant_id=credentials_provider.get_tenant_id()\n            )\n        else:\n            self.static_credentials = self._credentials_provider.get_credentials()\n\n        self._confluence = Confluence(url)\n        self.credential_key: str = (\n            self.CREDENTIAL_PREFIX\n            + f\":credential_{self._credentials_provider.get_provider_key()}\"\n        )\n\n        self._kwargs: Any = None\n\n        self.shared_base_kwargs: dict[str, str | int | bool] = {\n            \"api_version\": \"cloud\" if is_cloud else \"latest\",\n            \"backoff_and_retry\": False,\n            \"cloud\": is_cloud,\n        }\n        if timeout:\n            self.shared_base_kwargs[\"timeout\"] = timeout\n\n        self._confluence_user_profiles_override = (\n            process_confluence_user_profiles_override(confluence_user_profiles_override)\n            if confluence_user_profiles_override\n            else None\n        )\n\n    def _renew_credentials(self) -> tuple[dict[str, Any], bool]:\n        \"\"\"credential_json - the current json credentials\n        Returns a tuple\n        1. The up to date credentials\n        2. True if the credentials were updated\n\n        This method is intended to be used within a distributed lock.\n        Lock, call this, update credentials if the tokens were refreshed, then release\n        \"\"\"\n        # static credentials are preloaded, so no locking/redis required\n        if self.static_credentials:\n            return self.static_credentials, False\n\n        if not self.redis_client:\n            raise RuntimeError(\"self.redis_client is None\")\n\n        # dynamic credentials need locking\n        # check redis first, then fallback to the DB\n        credential_raw = self.redis_client.get(self.credential_key)\n        if credential_raw is not None:\n            credential_bytes = cast(bytes, credential_raw)\n            credential_str = credential_bytes.decode(\"utf-8\")\n            credential_json: dict[str, Any] = json.loads(credential_str)\n        else:\n            credential_json = self._credentials_provider.get_credentials()\n\n        if \"confluence_refresh_token\" not in credential_json:\n            # static credentials ... cache them permanently and return\n            self.static_credentials = credential_json\n            return credential_json, False\n\n        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:\n            raise RuntimeError(\"OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!\")\n\n        if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:\n            raise RuntimeError(\"OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!\")\n\n        # check if we should refresh tokens. we're deciding to refresh halfway\n        # to expiration\n        now = datetime.now(timezone.utc)\n        created_at = datetime.fromisoformat(credential_json[\"created_at\"])\n        expires_in: int = credential_json[\"expires_in\"]\n        renew_at = created_at + timedelta(seconds=expires_in // 2)\n        if now <= renew_at:\n            # cached/current credentials are reasonably up to date\n            return credential_json, False\n\n        # we need to refresh\n        logger.info(\"Renewing Confluence Cloud credentials...\")\n        new_credentials = confluence_refresh_tokens(\n            OAUTH_CONFLUENCE_CLOUD_CLIENT_ID,\n            OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET,\n            credential_json[\"cloud_id\"],\n            credential_json[\"confluence_refresh_token\"],\n        )\n\n        # store the new credentials to redis and to the db thru the provider\n        # redis: we use a 5 min TTL because we are given a 10 minute grace period\n        # when keys are rotated. it's easier to expire the cached credentials\n        # reasonably frequently rather than trying to handle strong synchronization\n        # between the db and redis everywhere the credentials might be updated\n        new_credential_str = json.dumps(new_credentials)\n        self.redis_client.set(\n            self.credential_key, new_credential_str, nx=True, ex=self.CREDENTIAL_TTL\n        )\n        self._credentials_provider.set_credentials(new_credentials)\n\n        return new_credentials, True\n\n    @staticmethod\n    def _make_oauth2_dict(credentials: dict[str, Any]) -> dict[str, Any]:\n        oauth2_dict: dict[str, Any] = {}\n        if \"confluence_refresh_token\" in credentials:\n            oauth2_dict[\"client_id\"] = OAUTH_CONFLUENCE_CLOUD_CLIENT_ID\n            oauth2_dict[\"token\"] = {}\n            oauth2_dict[\"token\"][\"access_token\"] = credentials[\n                \"confluence_access_token\"\n            ]\n        return oauth2_dict\n\n    def _build_spaces_url(\n        self,\n        is_v2: bool,\n        base_url: str,\n        limit: int,\n        space_keys: list[str] | None,\n        start: int | None = None,\n    ) -> str:\n        \"\"\"Build URL for Confluence spaces API with query parameters.\"\"\"\n        key_param = \"keys\" if is_v2 else \"spaceKey\"\n\n        params = [f\"limit={limit}\"]\n        if space_keys:\n            params.append(f\"{key_param}={','.join(space_keys)}\")\n        if start is not None and not is_v2:\n            params.append(f\"start={start}\")\n\n        return f\"{base_url}?{'&'.join(params)}\"\n\n    def _paginate_spaces_for_endpoint(\n        self,\n        is_v2: bool,\n        base_url: str,\n        limit: int,\n        space_keys: list[str] | None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"Internal helper to paginate through spaces for a specific API endpoint.\"\"\"\n        start = 0\n        url = self._build_spaces_url(\n            is_v2, base_url, limit, space_keys, start if not is_v2 else None\n        )\n\n        while url:\n            response = self.get(url, advanced_mode=True)\n            response.raise_for_status()\n            data = response.json()\n\n            results = data.get(\"results\", [])\n            if not results:\n                return\n\n            yield from results\n\n            if is_v2:\n                url = data.get(\"_links\", {}).get(\"next\", \"\")\n            else:\n                if len(results) < limit:\n                    return\n                start += len(results)\n                url = self._build_spaces_url(is_v2, base_url, limit, space_keys, start)\n\n    def retrieve_confluence_spaces(\n        self,\n        space_keys: list[str] | None = None,\n        limit: int = 50,\n    ) -> Iterator[dict[str, str]]:\n        \"\"\"\n        Retrieve spaces from Confluence using v2 API (Cloud) or v1 API (Server/fallback).\n\n        Args:\n            space_keys: Optional list of space keys to filter by\n            limit: Results per page (default 50)\n\n        Yields:\n            Space dictionaries with keys: id, key, name, type, status, etc.\n\n        Note:\n            For Cloud instances, attempts v2 API first. If v2 returns 404,\n            automatically falls back to v1 API for compatibility with older instances.\n        \"\"\"\n        # Determine API version once\n        use_v2 = self._is_cloud and not self.scoped_token\n        base_url = _CONFLUENCE_SPACES_API_V2 if use_v2 else _CONFLUENCE_SPACES_API_V1\n\n        try:\n            yield from self._paginate_spaces_for_endpoint(\n                use_v2, base_url, limit, space_keys\n            )\n        except HTTPError as e:\n            if e.response.status_code == 404 and use_v2:\n                logger.warning(\n                    \"v2 spaces API returned 404, falling back to v1 API. This may indicate an older Confluence Cloud instance.\"\n                )\n                # Fallback to v1\n                yield from self._paginate_spaces_for_endpoint(\n                    False, _CONFLUENCE_SPACES_API_V1, limit, space_keys\n                )\n            else:\n                raise\n\n    def _probe_connection(\n        self,\n        **kwargs: Any,\n    ) -> None:\n        merged_kwargs = {**self.shared_base_kwargs, **kwargs}\n        # add special timeout to make sure that we don't hang indefinitely\n        merged_kwargs[\"timeout\"] = self.PROBE_TIMEOUT\n\n        with self._credentials_provider:\n            credentials, _ = self._renew_credentials()\n            if self.scoped_token:\n                # v2 endpoint doesn't always work with scoped tokens, use v1\n                token = credentials[\"confluence_access_token\"]\n                probe_url = f\"{self.base_url}/{_CONFLUENCE_SPACES_API_V1}?limit=1\"\n                import requests\n\n                try:\n                    r = requests.get(\n                        probe_url,\n                        headers={\"Authorization\": f\"Bearer {token}\"},\n                        timeout=10,\n                    )\n                    r.raise_for_status()\n                except HTTPError as e:\n                    if e.response.status_code == 403:\n                        logger.warning(\n                            \"scoped token authenticated but not valid for probe endpoint (spaces)\"\n                        )\n                    else:\n                        if \"WWW-Authenticate\" in e.response.headers:\n                            logger.warning(\n                                f\"WWW-Authenticate: {e.response.headers['WWW-Authenticate']}\"\n                            )\n                            logger.warning(f\"Full error: {e.response.text}\")\n                        raise e\n                return\n\n        # Initialize connection with probe timeout settings\n        self._confluence = self._initialize_connection_helper(\n            credentials, **merged_kwargs\n        )\n\n        # Retrieve first space to validate connection\n        spaces_iter = self.retrieve_confluence_spaces(limit=1)\n        first_space = next(spaces_iter, None)\n\n        if not first_space:\n            raise RuntimeError(\n                f\"No spaces found at {self._url}! Check your credentials and wiki_base and make sure is_cloud is set correctly.\"\n            )\n\n        logger.info(\"Confluence probe succeeded.\")\n\n    def _initialize_connection(\n        self,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Called externally to init the connection in a thread safe manner.\"\"\"\n        merged_kwargs = {**self.shared_base_kwargs, **kwargs}\n        with self._credentials_provider:\n            credentials, _ = self._renew_credentials()\n            self._confluence = self._initialize_connection_helper(\n                credentials, **merged_kwargs\n            )\n            self._kwargs = merged_kwargs\n\n    def _initialize_connection_helper(\n        self,\n        credentials: dict[str, Any],\n        **kwargs: Any,\n    ) -> Confluence:\n        \"\"\"Called internally to init the connection. Distributed locking\n        to prevent multiple threads from modifying the credentials\n        must be handled around this function.\"\"\"\n\n        confluence = None\n\n        # probe connection with direct client, no retries\n        if \"confluence_refresh_token\" in credentials:\n            logger.info(\"Connecting to Confluence Cloud with OAuth Access Token.\")\n\n            oauth2_dict: dict[str, Any] = OnyxConfluence._make_oauth2_dict(credentials)\n            url = f\"https://api.atlassian.com/ex/confluence/{credentials['cloud_id']}\"\n            confluence = Confluence(url=url, oauth2=oauth2_dict, **kwargs)\n        else:\n            logger.info(\n                f\"Connecting to Confluence with Personal Access Token as user: {credentials['confluence_username']}\"\n            )\n            if self._is_cloud:\n                confluence = Confluence(\n                    url=self._url,\n                    username=credentials[\"confluence_username\"],\n                    password=credentials[\"confluence_access_token\"],\n                    **kwargs,\n                )\n            else:\n                confluence = Confluence(\n                    url=self._url,\n                    token=credentials[\"confluence_access_token\"],\n                    **kwargs,\n                )\n\n        return confluence\n\n    # https://developer.atlassian.com/cloud/confluence/rate-limiting/\n    # This uses the native rate limiting option provided by the\n    # confluence client and otherwise applies a simpler set of error handling.\n    def _make_rate_limited_confluence_method(\n        self, name: str, credential_provider: CredentialsProviderInterface | None\n    ) -> Callable[..., Any]:\n        def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:\n            MAX_RETRIES = 5\n\n            TIMEOUT = 600\n            timeout_at = time.monotonic() + TIMEOUT\n\n            for attempt in range(MAX_RETRIES):\n                if time.monotonic() > timeout_at:\n                    raise TimeoutError(\n                        f\"Confluence call attempts took longer than {TIMEOUT} seconds.\"\n                    )\n\n                # we're relying more on the client to rate limit itself\n                # and applying our own retries in a more specific set of circumstances\n                try:\n                    if credential_provider:\n                        with credential_provider:\n                            credentials, renewed = self._renew_credentials()\n                            if renewed:\n                                self._confluence = self._initialize_connection_helper(\n                                    credentials, **self._kwargs\n                                )\n                            attr = getattr(self._confluence, name, None)\n                            if attr is None:\n                                # The underlying Confluence client doesn't have this attribute\n                                raise AttributeError(\n                                    f\"'{type(self).__name__}' object has no attribute '{name}'\"\n                                )\n\n                            return attr(*args, **kwargs)\n                    else:\n                        attr = getattr(self._confluence, name, None)\n                        if attr is None:\n                            # The underlying Confluence client doesn't have this attribute\n                            raise AttributeError(\n                                f\"'{type(self).__name__}' object has no attribute '{name}'\"\n                            )\n\n                        return attr(*args, **kwargs)\n\n                except HTTPError as e:\n                    delay_until = _handle_http_error(e, attempt, MAX_RETRIES)\n                    logger.warning(\n                        f\"HTTPError in confluence call. Retrying in {delay_until} seconds...\"\n                    )\n                    while time.monotonic() < delay_until:\n                        # in the future, check a signal here to exit\n                        time.sleep(1)\n                except AttributeError as e:\n                    # Some error within the Confluence library, unclear why it fails.\n                    # Users reported it to be intermittent, so just retry\n                    if attempt == MAX_RETRIES - 1:\n                        raise e\n\n                    logger.exception(\n                        \"Confluence Client raised an AttributeError. Retrying...\"\n                    )\n                    time.sleep(5)\n\n        return wrapped_call\n\n    def __getattr__(self, name: str) -> Any:\n        \"\"\"Dynamically intercept attribute/method access.\"\"\"\n        attr = getattr(self._confluence, name, None)\n        if attr is None:\n            # The underlying Confluence client doesn't have this attribute\n            raise AttributeError(\n                f\"'{type(self).__name__}' object has no attribute '{name}'\"\n            )\n\n        # If it's not a method, just return it after ensuring token validity\n        if not callable(attr):\n            return attr\n\n        # skip methods that start with \"_\"\n        if name.startswith(\"_\"):\n            return attr\n\n        # wrap the method with our retry handler\n        rate_limited_method: Callable[..., Any] = (\n            self._make_rate_limited_confluence_method(name, self._credentials_provider)\n        )\n\n        return rate_limited_method\n\n    def _try_one_by_one_for_paginated_url(\n        self,\n        url_suffix: str,\n        initial_start: int,\n        limit: int,\n    ) -> Generator[dict[str, Any], None, str | None]:\n        \"\"\"\n        Go through `limit` items, starting at `initial_start` one by one (e.g. using\n        `limit=1` for each call).\n\n        If we encounter an error, we skip the item and try the next one. We will return\n        the items we were able to retrieve successfully.\n\n        Returns the expected next url_suffix. Returns None if it thinks we've hit the end.\n\n        TODO (chris): make this yield failures as well as successes.\n        TODO (chris): make this work for confluence cloud somehow.\n        \"\"\"\n        if self._is_cloud:\n            raise RuntimeError(\"This method is not implemented for Confluence Cloud.\")\n\n        found_empty_page = False\n        temp_url_suffix = url_suffix\n\n        for ind in range(limit):\n            try:\n                temp_url_suffix = update_param_in_path(\n                    url_suffix, \"start\", str(initial_start + ind)\n                )\n                temp_url_suffix = update_param_in_path(temp_url_suffix, \"limit\", \"1\")\n                logger.info(f\"Making recovery confluence call to {temp_url_suffix}\")\n                raw_response = self.get(path=temp_url_suffix, advanced_mode=True)\n                raw_response.raise_for_status()\n\n                latest_results = raw_response.json().get(\"results\", [])\n                yield from latest_results\n\n                if not latest_results:\n                    # no more results, break out of the loop\n                    logger.info(\n                        f\"No results found for call '{temp_url_suffix}'Stopping pagination.\"\n                    )\n                    found_empty_page = True\n                    break\n            except Exception:\n                logger.exception(\n                    f\"Error in confluence call to {temp_url_suffix}. Continuing.\"\n                )\n\n        if found_empty_page:\n            return None\n\n        # if we got here, we successfully tried `limit` items\n        return update_param_in_path(url_suffix, \"start\", str(initial_start + limit))\n\n    def _paginate_url(\n        self,\n        url_suffix: str,\n        limit: int | None = None,\n        # Called with the next url to use to get the next page\n        next_page_callback: Callable[[str], None] | None = None,\n        force_offset_pagination: bool = False,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        This will paginate through the top level query.\n        \"\"\"\n        if not limit:\n            limit = _DEFAULT_PAGINATION_LIMIT\n\n        url_suffix = update_param_in_path(url_suffix, \"limit\", str(limit))\n\n        while url_suffix:\n            logger.debug(f\"Making confluence call to {url_suffix}\")\n            try:\n                # Only pass params if they're not already in the URL to avoid duplicate\n                # params accumulating. Confluence's _links.next already includes these.\n                params = {}\n                if \"body-format=\" not in url_suffix:\n                    params[\"body-format\"] = \"atlas_doc_format\"\n                if \"expand=\" not in url_suffix:\n                    params[\"expand\"] = \"body.atlas_doc_format\"\n\n                raw_response = self.get(\n                    path=url_suffix,\n                    advanced_mode=True,\n                    params=params,\n                )\n            except Exception as e:\n                logger.exception(f\"Error in confluence call to {url_suffix}\")\n                raise e\n\n            try:\n                raw_response.raise_for_status()\n            except Exception as e:\n                logger.warning(f\"Error in confluence call to {url_suffix}\")\n\n                # If the problematic expansion is in the url, replace it\n                # with the replacement expansion and try again\n                # If that fails, raise the error\n                if _PROBLEMATIC_EXPANSIONS in url_suffix:\n                    logger.warning(\n                        f\"Replacing {_PROBLEMATIC_EXPANSIONS} with {_REPLACEMENT_EXPANSIONS} and trying again.\"\n                    )\n                    url_suffix = url_suffix.replace(\n                        _PROBLEMATIC_EXPANSIONS,\n                        _REPLACEMENT_EXPANSIONS,\n                    )\n                    continue\n\n                # If we fail due to a 500, try one by one.\n                # NOTE: this iterative approach only works for server, since cloud uses cursor-based\n                # pagination\n                if raw_response.status_code == 500 and not self._is_cloud:\n                    initial_start = get_start_param_from_url(url_suffix)\n                    if initial_start is None:\n                        # can't handle this if we don't have offset-based pagination\n                        raise\n\n                    # this will just yield the successful items from the batch\n                    new_url_suffix = yield from self._try_one_by_one_for_paginated_url(\n                        url_suffix,\n                        initial_start=initial_start,\n                        limit=limit,\n                    )\n\n                    # this means we ran into an empty page\n                    if new_url_suffix is None:\n                        if next_page_callback:\n                            next_page_callback(\"\")\n                        break\n\n                    url_suffix = new_url_suffix\n                    continue\n\n                else:\n                    logger.exception(\n                        f\"Error in confluence call to {url_suffix} \\n\"\n                        f\"Raw Response Text: {raw_response.text} \\n\"\n                        f\"Full Response: {raw_response.__dict__} \\n\"\n                        f\"Error: {e} \\n\"\n                    )\n                    raise\n\n            try:\n                next_response = raw_response.json()\n            except Exception as e:\n                logger.exception(\n                    f\"Failed to parse response as JSON. Response: {raw_response.__dict__}\"\n                )\n                raise e\n\n            # Yield the results individually.\n            results = cast(list[dict[str, Any]], next_response.get(\"results\", []))\n\n            # Note 1:\n            # Make sure we don't update the start by more than the amount\n            # of results we were able to retrieve. The Confluence API has a\n            # weird behavior where if you pass in a limit that is too large for\n            # the configured server, it will artificially limit the amount of\n            # results returned BUT will not apply this to the start parameter.\n            # This will cause us to miss results.\n            #\n            # Note 2:\n            # We specifically perform manual yielding (i.e., `for x in xs: yield x`) as opposed to using a `yield from xs`\n            # because we *have to call the `next_page_callback`* prior to yielding the last element!\n            #\n            # If we did:\n            #\n            # ```py\n            # yield from results\n            # if next_page_callback:\n            #   next_page_callback(url_suffix)\n            # ```\n            #\n            # then the logic would fail since the iterator would finish (and the calling scope would exit out of its driving\n            # loop) prior to the callback being called.\n\n            old_url_suffix = url_suffix\n            updated_start = get_start_param_from_url(old_url_suffix)\n            url_suffix = cast(str, next_response.get(\"_links\", {}).get(\"next\", \"\"))\n            for i, result in enumerate(results):\n                updated_start += 1\n                if url_suffix and next_page_callback and i == len(results) - 1:\n                    # update the url if we're on the last result in the page\n                    if not self._is_cloud:\n                        # If confluence claims there are more results, we update the start param\n                        # based on how many results were returned and try again.\n                        url_suffix = update_param_in_path(\n                            url_suffix, \"start\", str(updated_start)\n                        )\n                    # notify the caller of the new url\n                    next_page_callback(url_suffix)\n\n                elif force_offset_pagination and i == len(results) - 1:\n                    url_suffix = update_param_in_path(\n                        old_url_suffix, \"start\", str(updated_start)\n                    )\n\n                yield result\n\n            # we've observed that Confluence sometimes returns a next link despite giving\n            # 0 results. This is a bug with Confluence, so we need to check for it and\n            # stop paginating.\n            if url_suffix and not results:\n                logger.info(\n                    f\"No results found for call '{old_url_suffix}' despite next link being present. Stopping pagination.\"\n                )\n                break\n\n    def build_cql_url(self, cql: str, expand: str | None = None) -> str:\n        expand_string = f\"&expand={expand}\" if expand else \"\"\n        return f\"rest/api/content/search?cql={cql}{expand_string}\"\n\n    def paginated_cql_retrieval(\n        self,\n        cql: str,\n        expand: str | None = None,\n        limit: int | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        The content/search endpoint can be used to fetch pages, attachments, and comments.\n        \"\"\"\n        cql_url = self.build_cql_url(cql, expand)\n        yield from self._paginate_url(cql_url, limit)\n\n    def paginated_page_retrieval(\n        self,\n        cql_url: str,\n        limit: int,\n        # Called with the next url to use to get the next page\n        next_page_callback: Callable[[str], None] | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        Error handling (and testing) wrapper for _paginate_url,\n        because the current approach to page retrieval involves handling the\n        next page links manually.\n        \"\"\"\n        try:\n            yield from self._paginate_url(\n                cql_url, limit=limit, next_page_callback=next_page_callback\n            )\n        except Exception as e:\n            logger.exception(f\"Error in paginated_page_retrieval: {e}\")\n            raise e\n\n    def cql_paginate_all_expansions(\n        self,\n        cql: str,\n        expand: str | None = None,\n        limit: int | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        This function will paginate through the top level query first, then\n        paginate through all of the expansions.\n        \"\"\"\n\n        def _traverse_and_update(data: dict | list) -> None:\n            if isinstance(data, dict):\n                next_url = data.get(\"_links\", {}).get(\"next\")\n                if next_url and \"results\" in data:\n                    data[\"results\"].extend(self._paginate_url(next_url, limit=limit))\n\n                for value in data.values():\n                    _traverse_and_update(value)\n            elif isinstance(data, list):\n                for item in data:\n                    _traverse_and_update(item)\n\n        for confluence_object in self.paginated_cql_retrieval(cql, expand, limit):\n            _traverse_and_update(confluence_object)\n            yield confluence_object\n\n    def paginated_cql_user_retrieval(\n        self,\n        expand: str | None = None,\n        limit: int | None = None,\n    ) -> Iterator[ConfluenceUser]:\n        \"\"\"\n        The search/user endpoint can be used to fetch users.\n        It's a separate endpoint from the content/search endpoint used only for users.\n        Otherwise it's very similar to the content/search endpoint.\n        \"\"\"\n\n        # this is needed since there is a live bug with Confluence Server/Data Center\n        # where not all users are returned by the APIs. This is a workaround needed until\n        # that is patched.\n        if self._confluence_user_profiles_override:\n            yield from self._confluence_user_profiles_override\n\n        elif self._is_cloud:\n            cql = \"type=user\"\n            url = \"rest/api/search/user\"\n            expand_string = f\"&expand={expand}\" if expand else \"\"\n            url += f\"?cql={cql}{expand_string}\"\n            for user_result in self._paginate_url(\n                url, limit, force_offset_pagination=True\n            ):\n                # Example response:\n                # {\n                #     'user': {\n                #         'type': 'known',\n                #         'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',\n                #         'accountType': 'atlassian',\n                #         'email': 'chris@danswer.ai',\n                #         'publicName': 'Chris Weaver',\n                #         'profilePicture': {\n                #             'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',\n                #             'width': 48,\n                #             'height': 48,\n                #             'isDefault': False\n                #         },\n                #         'displayName': 'Chris Weaver',\n                #         'isExternalCollaborator': False,\n                #         '_expandable': {\n                #             'operations': '',\n                #             'personalSpace': ''\n                #         },\n                #         '_links': {\n                #             'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d'\n                #         }\n                #     },\n                #     'title': 'Chris Weaver',\n                #     'excerpt': '',\n                #     'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',\n                #     'breadcrumbs': [],\n                #     'entityType': 'user',\n                #     'iconCssClass': 'aui-icon content-type-profile',\n                #     'lastModified': '2025-02-18T04:08:03.579Z',\n                #     'score': 0.0\n                # }\n                user = user_result[\"user\"]\n                yield ConfluenceUser(\n                    user_id=user[\"accountId\"],\n                    username=None,\n                    display_name=user[\"displayName\"],\n                    email=user.get(\"email\"),\n                    type=user[\"accountType\"],\n                )\n        else:\n            for user in self._paginate_url(\"rest/api/user/list\", limit):\n                yield ConfluenceUser(\n                    user_id=user[\"userKey\"],\n                    username=user[\"username\"],\n                    display_name=user[\"displayName\"],\n                    email=None,\n                    type=user.get(\"type\", \"user\"),\n                )\n\n    def paginated_groups_by_user_retrieval(\n        self,\n        user_id: str,  # accountId in Cloud, userKey in Server\n        limit: int | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        This is not an SQL like query.\n        It's a confluence specific endpoint that can be used to fetch groups.\n        \"\"\"\n        user_field = \"accountId\" if self._is_cloud else \"key\"\n        user_value = user_id\n        # Server uses userKey (but calls it key during the API call), Cloud uses accountId\n        user_query = f\"{user_field}={quote(user_value)}\"\n\n        url = f\"rest/api/user/memberof?{user_query}\"\n        yield from self._paginate_url(url, limit, force_offset_pagination=True)\n\n    def paginated_groups_retrieval(\n        self,\n        limit: int | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        This is not an SQL like query.\n        It's a confluence specific endpoint that can be used to fetch groups.\n        \"\"\"\n        yield from self._paginate_url(\"rest/api/group\", limit)\n\n    def paginated_group_members_retrieval(\n        self,\n        group_name: str,\n        limit: int | None = None,\n    ) -> Iterator[dict[str, Any]]:\n        \"\"\"\n        This is not an SQL like query.\n        It's a confluence specific endpoint that can be used to fetch the members of a group.\n        THIS DOESN'T WORK FOR SERVER because it breaks when there is a slash in the group name.\n        E.g. neither \"test/group\" nor \"test%2Fgroup\" works for confluence.\n        \"\"\"\n        group_name = quote(group_name)\n        yield from self._paginate_url(f\"rest/api/group/{group_name}/member\", limit)\n\n    def get_all_space_permissions_server(\n        self,\n        space_key: str,\n    ) -> list[dict[str, Any]]:\n        \"\"\"\n        This is a confluence server/data center specific method that can be used to\n        fetch the permissions of a space.\n\n        NOTE: This uses the JSON-RPC API which is the ONLY way to get space permissions\n        on Confluence Server/Data Center. The REST API equivalent (expand=permissions)\n        is Cloud-only and not available on Data Center as of version 8.9.x.\n\n        If this fails with 401 Unauthorized, the customer needs to enable JSON-RPC:\n        Confluence Admin -> General Configuration -> Further Configuration\n        -> Enable \"Remote API (XML-RPC & SOAP)\"\n        \"\"\"\n        url = \"rpc/json-rpc/confluenceservice-v2\"\n        data = {\n            \"jsonrpc\": \"2.0\",\n            \"method\": \"getSpacePermissionSets\",\n            \"id\": 7,\n            \"params\": [space_key],\n        }\n        try:\n            response = self.post(url, data=data)\n        except HTTPError as e:\n            if e.response is not None and e.response.status_code == 401:\n                raise HTTPError(\n                    \"Unauthorized (401) when calling JSON-RPC API for space permissions. \"\n                    \"This is likely because the Remote API is disabled. \"\n                    \"To fix: Confluence Admin -> General Configuration -> Further Configuration \"\n                    \"-> Enable 'Remote API (XML-RPC & SOAP)'\",\n                    response=e.response,\n                ) from e\n            raise\n        logger.debug(f\"jsonrpc response: {response}\")\n        if not response.get(\"result\"):\n            logger.warning(\n                f\"No jsonrpc response for space permissions for space {space_key}\\nResponse: {response}\"\n            )\n\n        return response.get(\"result\", [])\n\n    def get_current_user(self, expand: str | None = None) -> Any:\n        \"\"\"\n        Implements a method that isn't in the third party client.\n\n        Get information about the current user\n        :param expand: OPTIONAL expand for get status of user.\n                Possible param is \"status\". Results are \"Active, Deactivated\"\n        :return: Returns the user details\n        \"\"\"\n\n        from atlassian.errors import ApiPermissionError  # type:ignore\n\n        url = \"rest/api/user/current\"\n        params = {}\n        if expand:\n            params[\"expand\"] = expand\n        try:\n            response = self.get(url, params=params)\n        except HTTPError as e:\n            if e.response.status_code == 403:\n                raise ApiPermissionError(\n                    \"The calling user does not have permission\", reason=e\n                )\n            raise\n        return response\n\n\ndef get_user_email_from_username__server(\n    confluence_client: OnyxConfluence, user_name: str\n) -> str | None:\n    global _USER_EMAIL_CACHE\n    if _USER_EMAIL_CACHE.get(user_name) is None:\n        try:\n            response = confluence_client.get_mobile_parameters(user_name)\n            email = response.get(\"email\")\n        except HTTPError as e:\n            status_code = e.response.status_code if e.response is not None else \"N/A\"\n            logger.warning(\n                f\"Failed to get confluence email for {user_name}: HTTP {status_code} - {e}\"\n            )\n            # For now, we'll just return None and log a warning. This means\n            # we will keep retrying to get the email every group sync.\n            email = None\n        except Exception as e:\n            logger.warning(\n                f\"Failed to get confluence email for {user_name}: {type(e).__name__} - {e}\"\n            )\n            email = None\n        _USER_EMAIL_CACHE[user_name] = email\n    return _USER_EMAIL_CACHE[user_name]\n\n\ndef _get_user(confluence_client: OnyxConfluence, user_id: str) -> str:\n    \"\"\"Get Confluence Display Name based on the account-id or userkey value\n\n    Args:\n        user_id (str): The user id (i.e: the account-id or userkey)\n        confluence_client (Confluence): The Confluence Client\n\n    Returns:\n        str: The User Display Name. 'Unknown User' if the user is deactivated or not found\n    \"\"\"\n    global _USER_ID_TO_DISPLAY_NAME_CACHE\n    if _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) is None:\n        try:\n            result = confluence_client.get_user_details_by_userkey(user_id)\n            found_display_name = result.get(\"displayName\")\n        except Exception:\n            found_display_name = None\n\n        if not found_display_name:\n            try:\n                result = confluence_client.get_user_details_by_accountid(user_id)\n                found_display_name = result.get(\"displayName\")\n            except Exception:\n                found_display_name = None\n\n        _USER_ID_TO_DISPLAY_NAME_CACHE[user_id] = found_display_name\n\n    return _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) or _USER_NOT_FOUND\n\n\ndef sanitize_attachment_title(title: str) -> str:\n    \"\"\"\n    Sanitize the attachment title to be a valid HTML attribute.\n    \"\"\"\n    return title.replace(\"<\", \"_\").replace(\">\", \"_\").replace(\" \", \"_\").replace(\":\", \"_\")\n\n\ndef extract_text_from_confluence_html(\n    confluence_client: OnyxConfluence,\n    confluence_object: dict[str, Any],\n    fetched_titles: set[str],\n) -> str:\n    \"\"\"Parse a Confluence html page and replace the 'user Id' by the real\n        User Display Name\n\n    Args:\n        confluence_object (dict): The confluence object as a dict\n        confluence_client (Confluence): Confluence client\n        fetched_titles (set[str]): The titles of the pages that have already been fetched\n    Returns:\n        str: loaded and formated Confluence page\n    \"\"\"\n    body = confluence_object[\"body\"]\n    object_html = body.get(\"storage\", body.get(\"view\", {})).get(\"value\")\n\n    soup = bs4.BeautifulSoup(object_html, \"html.parser\")\n\n    _remove_macro_stylings(soup=soup)\n\n    for user in soup.findAll(\"ri:user\"):\n        user_id = (\n            user.attrs[\"ri:account-id\"]\n            if \"ri:account-id\" in user.attrs\n            else user.get(\"ri:userkey\")\n        )\n        if not user_id:\n            logger.warning(\n                f\"ri:userkey not found in ri:user element. Found attrs: {user.attrs}\"\n            )\n            continue\n        # Include @ sign for tagging, more clear for LLM\n        user.replaceWith(\"@\" + _get_user(confluence_client, user_id))\n\n    for html_page_reference in soup.findAll(\"ac:structured-macro\"):\n        # Here, we only want to process page within page macros\n        if html_page_reference.attrs.get(\"ac:name\") != \"include\":\n            continue\n\n        page_data = html_page_reference.find(\"ri:page\")\n        if not page_data:\n            logger.warning(\n                f\"Skipping retrieval of {html_page_reference} because because page data is missing\"\n            )\n            continue\n\n        page_title = page_data.attrs.get(\"ri:content-title\")\n        if not page_title:\n            # only fetch pages that have a title\n            logger.warning(\n                f\"Skipping retrieval of {html_page_reference} because it has no title\"\n            )\n            continue\n\n        if page_title in fetched_titles:\n            # prevent recursive fetching of pages\n            logger.debug(f\"Skipping {page_title} because it has already been fetched\")\n            continue\n\n        fetched_titles.add(page_title)\n\n        # Wrap this in a try-except because there are some pages that might not exist\n        try:\n            page_query = f\"type=page and title='{quote(page_title)}'\"\n\n            page_contents: dict[str, Any] | None = None\n            # Confluence enforces title uniqueness, so we should only get one result here\n            for page in confluence_client.paginated_cql_retrieval(\n                cql=page_query,\n                expand=\"body.storage.value\",\n                limit=1,\n            ):\n                page_contents = page\n                break\n        except Exception as e:\n            logger.warning(\n                f\"Error getting page contents for object {confluence_object}: {e}\"\n            )\n            continue\n\n        if not page_contents:\n            continue\n\n        text_from_page = extract_text_from_confluence_html(\n            confluence_client=confluence_client,\n            confluence_object=page_contents,\n            fetched_titles=fetched_titles,\n        )\n\n        html_page_reference.replaceWith(text_from_page)\n\n    for html_link_body in soup.findAll(\"ac:link-body\"):\n        # This extracts the text from inline links in the page so they can be\n        # represented in the document text as plain text\n        try:\n            text_from_link = html_link_body.text\n            html_link_body.replaceWith(f\"(LINK TEXT: {text_from_link})\")\n        except Exception as e:\n            logger.warning(f\"Error processing ac:link-body: {e}\")\n\n    for html_attachment in soup.findAll(\"ri:attachment\"):\n        # This extracts the text from inline attachments in the page so they can be\n        # represented in the document text as plain text\n        try:\n            html_attachment.replaceWith(\n                f\"<attachment>{sanitize_attachment_title(html_attachment.attrs['ri:filename'])}</attachment>\"\n            )  # to be replaced later\n        except Exception as e:\n            logger.warning(f\"Error processing ac:attachment: {e}\")\n\n    return format_document_soup(soup)\n\n\ndef _remove_macro_stylings(soup: bs4.BeautifulSoup) -> None:\n    for macro_root in soup.findAll(\"ac:structured-macro\"):\n        if not isinstance(macro_root, bs4.Tag):\n            continue\n\n        macro_styling = macro_root.find(name=\"ac:parameter\", attrs={\"ac:name\": \"page\"})\n        if not macro_styling or not isinstance(macro_styling, bs4.Tag):\n            continue\n\n        macro_styling.extract()\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/user_profile_override.py",
    "content": "from onyx.connectors.confluence.models import ConfluenceUser\n\n\ndef process_confluence_user_profiles_override(\n    confluence_user_email_override: list[dict[str, str]],\n) -> list[ConfluenceUser]:\n    return [\n        ConfluenceUser(\n            user_id=override[\"user_id\"],\n            # username is not returned by the Confluence Server API anyways\n            username=override[\"username\"],\n            display_name=override[\"display_name\"],\n            email=override[\"email\"],\n            type=override[\"type\"],\n        )\n        for override in confluence_user_email_override\n        if override is not None\n    ]\n"
  },
  {
    "path": "backend/onyx/connectors/confluence/utils.py",
    "content": "import math\nimport time\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import cast\nfrom typing import TYPE_CHECKING\nfrom typing import TypeVar\nfrom urllib.parse import parse_qs\nfrom urllib.parse import quote\nfrom urllib.parse import urljoin\nfrom urllib.parse import urlparse\n\nimport requests\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import (\n    CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD,\n)\nfrom onyx.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    from onyx.connectors.confluence.onyx_confluence import OnyxConfluence\n\n\nlogger = setup_logger()\n\nCONFLUENCE_OAUTH_TOKEN_URL = \"https://auth.atlassian.com/oauth/token\"\nRATE_LIMIT_MESSAGE_LOWERCASE = \"Rate limit exceeded\".lower()\n\n\nclass TokenResponse(BaseModel):\n    access_token: str\n    expires_in: int\n    token_type: str\n    refresh_token: str\n    scope: str\n\n\ndef validate_attachment_filetype(\n    attachment: dict[str, Any],\n) -> bool:\n    \"\"\"\n    Validates if the attachment is a supported file type.\n    \"\"\"\n    media_type = attachment.get(\"metadata\", {}).get(\"mediaType\", \"\")\n    if media_type.startswith(\"image/\"):\n        return media_type in OnyxMimeTypes.IMAGE_MIME_TYPES\n\n    # For non-image files, check if we support the extension\n    title = attachment.get(\"title\", \"\")\n    extension = get_file_ext(title)\n\n    return extension in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS\n\n\nclass AttachmentProcessingResult(BaseModel):\n    \"\"\"\n    A container for results after processing a Confluence attachment.\n    'text' is the textual content of the attachment.\n    'file_name' is the final file name used in FileStore to store the content.\n    'error' holds an exception or string if something failed.\n    \"\"\"\n\n    text: str | None\n    file_name: str | None\n    error: str | None = None\n\n\ndef _make_attachment_link(\n    confluence_client: \"OnyxConfluence\",\n    attachment: dict[str, Any],\n    parent_content_id: str | None = None,\n) -> str | None:\n    download_link = \"\"\n\n    if \"api.atlassian.com\" in confluence_client.url:\n        # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get\n        if not parent_content_id:\n            logger.warning(\n                \"parent_content_id is required to download attachments from Confluence Cloud!\"\n            )\n            return None\n\n        download_link = (\n            confluence_client.url\n            + f\"/rest/api/content/{parent_content_id}/child/attachment/{attachment['id']}/download\"\n        )\n    else:\n        download_link = confluence_client.url + attachment[\"_links\"][\"download\"]\n\n    return download_link\n\n\ndef process_attachment(\n    confluence_client: \"OnyxConfluence\",\n    attachment: dict[str, Any],\n    parent_content_id: str | None,\n    allow_images: bool,\n) -> AttachmentProcessingResult:\n    \"\"\"\n    Processes a Confluence attachment. If it's a document, extracts text,\n    or if it's an image, stores it for later analysis. Returns a structured result.\n    \"\"\"\n    try:\n        # Get the media type from the attachment metadata\n        media_type: str = attachment.get(\"metadata\", {}).get(\"mediaType\", \"\")\n        # Validate the attachment type\n        if not validate_attachment_filetype(attachment):\n            return AttachmentProcessingResult(\n                text=None,\n                file_name=None,\n                error=f\"Unsupported file type: {media_type}\",\n            )\n\n        attachment_link = _make_attachment_link(\n            confluence_client, attachment, parent_content_id\n        )\n        if not attachment_link:\n            return AttachmentProcessingResult(\n                text=None, file_name=None, error=\"Failed to make attachment link\"\n            )\n\n        attachment_size = attachment[\"extensions\"][\"fileSize\"]\n\n        if media_type.startswith(\"image/\"):\n            if not allow_images:\n                return AttachmentProcessingResult(\n                    text=None,\n                    file_name=None,\n                    error=\"Image downloading is not enabled\",\n                )\n        else:\n            if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD:\n                logger.warning(\n                    f\"Skipping {attachment_link} due to size. \"\n                    f\"size={attachment_size} \"\n                    f\"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}\"\n                )\n                return AttachmentProcessingResult(\n                    text=None,\n                    file_name=None,\n                    error=f\"Attachment text too long: {attachment_size} chars\",\n                )\n\n        logger.info(\n            f\"Downloading attachment: title={attachment['title']} length={attachment_size} link={attachment_link}\"\n        )\n\n        # Download the attachment\n        resp: requests.Response = confluence_client._session.get(attachment_link)\n        if resp.status_code != 200:\n            logger.warning(\n                f\"Failed to fetch {attachment_link} with status code {resp.status_code}\"\n            )\n            return AttachmentProcessingResult(\n                text=None,\n                file_name=None,\n                error=f\"Attachment download status code is {resp.status_code}\",\n            )\n\n        raw_bytes = resp.content\n        if not raw_bytes:\n            return AttachmentProcessingResult(\n                text=None, file_name=None, error=\"attachment.content is None\"\n            )\n\n        # Process image attachments\n        if media_type.startswith(\"image/\"):\n            return _process_image_attachment(\n                confluence_client, attachment, raw_bytes, media_type\n            )\n\n        # Process document attachments\n        try:\n            text = extract_file_text(\n                file=BytesIO(raw_bytes),\n                file_name=attachment[\"title\"],\n            )\n\n            # Skip if the text is too long\n            if len(text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:\n                return AttachmentProcessingResult(\n                    text=None,\n                    file_name=None,\n                    error=f\"Attachment text too long: {len(text)} chars\",\n                )\n\n            return AttachmentProcessingResult(text=text, file_name=None, error=None)\n        except Exception as e:\n            return AttachmentProcessingResult(\n                text=None, file_name=None, error=f\"Failed to extract text: {e}\"\n            )\n\n    except Exception as e:\n        return AttachmentProcessingResult(\n            text=None, file_name=None, error=f\"Failed to process attachment: {e}\"\n        )\n\n\ndef _process_image_attachment(\n    confluence_client: \"OnyxConfluence\",  # noqa: ARG001\n    attachment: dict[str, Any],\n    raw_bytes: bytes,\n    media_type: str,\n) -> AttachmentProcessingResult:\n    \"\"\"Process an image attachment by saving it without generating a summary.\"\"\"\n    try:\n        # Use the standardized image storage and section creation\n        section, file_name = store_image_and_create_section(\n            image_data=raw_bytes,\n            file_id=Path(attachment[\"id\"]).name,\n            display_name=attachment[\"title\"],\n            media_type=media_type,\n            file_origin=FileOrigin.CONNECTOR,\n        )\n        logger.info(f\"Stored image attachment with file name: {file_name}\")\n\n        # Return empty text but include the file_name for later processing\n        return AttachmentProcessingResult(text=\"\", file_name=file_name, error=None)\n    except Exception as e:\n        msg = f\"Image storage failed for {attachment['title']}: {e}\"\n        logger.error(msg, exc_info=e)\n        return AttachmentProcessingResult(text=None, file_name=None, error=msg)\n\n\ndef convert_attachment_to_content(\n    confluence_client: \"OnyxConfluence\",\n    attachment: dict[str, Any],\n    page_id: str,\n    allow_images: bool,\n) -> tuple[str | None, str | None] | None:\n    \"\"\"\n    Facade function which:\n      1. Validates attachment type\n      2. Extracts content or stores image for later processing\n      3. Returns (content_text, stored_file_name) or None if we should skip it\n    \"\"\"\n    media_type = attachment.get(\"metadata\", {}).get(\"mediaType\", \"\")\n    # Quick check for unsupported types:\n    if media_type.startswith(\"video/\") or media_type == \"application/gliffy+json\":\n        logger.warning(\n            f\"Skipping unsupported attachment type: '{media_type}' for {attachment['title']}\"\n        )\n        return None\n\n    result = process_attachment(confluence_client, attachment, page_id, allow_images)\n    if result.error is not None:\n        logger.warning(\n            f\"Attachment {attachment['title']} encountered error: {result.error}\"\n        )\n        return None\n\n    # Return the text and the file name\n    return result.text, result.file_name\n\n\ndef build_confluence_document_id(\n    base_url: str, content_url: str, is_cloud: bool\n) -> str:\n    \"\"\"For confluence, the document id is the page url for a page based document\n        or the attachment download url for an attachment based document\n\n    Args:\n        base_url (str): The base url of the Confluence instance\n        content_url (str): The url of the page or attachment download url\n\n    Returns:\n        str: The document id\n    \"\"\"\n\n    # NOTE: urljoin is tricky and will drop the last segment of the base if it doesn't\n    # end with \"/\" because it believes that makes it a file.\n    final_url = base_url.rstrip(\"/\") + \"/\"\n    if is_cloud and not final_url.endswith(\"/wiki/\"):\n        final_url = urljoin(final_url, \"wiki\") + \"/\"\n    final_url = urljoin(final_url, content_url.lstrip(\"/\"))\n    return final_url\n\n\ndef datetime_from_string(datetime_string: str) -> datetime:\n    datetime_object = datetime.fromisoformat(datetime_string)\n\n    if datetime_object.tzinfo is None:\n        # If no timezone info, assume it is UTC\n        datetime_object = datetime_object.replace(tzinfo=timezone.utc)\n    else:\n        # If not in UTC, translate it\n        datetime_object = datetime_object.astimezone(timezone.utc)\n\n    return datetime_object\n\n\ndef confluence_refresh_tokens(\n    client_id: str, client_secret: str, cloud_id: str, refresh_token: str\n) -> dict[str, Any]:\n    # rotate the refresh and access token\n    # Note that access tokens are only good for an hour in confluence cloud,\n    # so we're going to have problems if the connector runs for longer\n    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/#use-a-refresh-token-to-get-another-access-token-and-refresh-token-pair\n    response = requests.post(\n        CONFLUENCE_OAUTH_TOKEN_URL,\n        headers={\"Content-Type\": \"application/x-www-form-urlencoded\"},\n        data={\n            \"grant_type\": \"refresh_token\",\n            \"client_id\": client_id,\n            \"client_secret\": client_secret,\n            \"refresh_token\": refresh_token,\n        },\n    )\n\n    try:\n        token_response = TokenResponse.model_validate_json(response.text)\n    except Exception:\n        raise RuntimeError(\"Confluence Cloud token refresh failed.\")\n\n    now = datetime.now(timezone.utc)\n    expires_at = now + timedelta(seconds=token_response.expires_in)\n\n    new_credentials: dict[str, Any] = {}\n    new_credentials[\"confluence_access_token\"] = token_response.access_token\n    new_credentials[\"confluence_refresh_token\"] = token_response.refresh_token\n    new_credentials[\"created_at\"] = now.isoformat()\n    new_credentials[\"expires_at\"] = expires_at.isoformat()\n    new_credentials[\"expires_in\"] = token_response.expires_in\n    new_credentials[\"scope\"] = token_response.scope\n    new_credentials[\"cloud_id\"] = cloud_id\n    return new_credentials\n\n\nF = TypeVar(\"F\", bound=Callable[..., Any])\n\n\n# https://developer.atlassian.com/cloud/confluence/rate-limiting/\n# this uses the native rate limiting option provided by the\n# confluence client and otherwise applies a simpler set of error handling\ndef handle_confluence_rate_limit(confluence_call: F) -> F:\n    def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:\n        MAX_RETRIES = 5\n\n        TIMEOUT = 600\n        timeout_at = time.monotonic() + TIMEOUT\n\n        for attempt in range(MAX_RETRIES):\n            if time.monotonic() > timeout_at:\n                raise TimeoutError(\n                    f\"Confluence call attempts took longer than {TIMEOUT} seconds.\"\n                )\n\n            try:\n                # we're relying more on the client to rate limit itself\n                # and applying our own retries in a more specific set of circumstances\n                return confluence_call(*args, **kwargs)\n            except requests.HTTPError as e:\n                delay_until = _handle_http_error(e, attempt, MAX_RETRIES)\n                logger.warning(\n                    f\"HTTPError in confluence call. Retrying in {delay_until} seconds...\"\n                )\n                while time.monotonic() < delay_until:\n                    # in the future, check a signal here to exit\n                    time.sleep(1)\n            except AttributeError as e:\n                # Some error within the Confluence library, unclear why it fails.\n                # Users reported it to be intermittent, so just retry\n                if attempt == MAX_RETRIES - 1:\n                    raise e\n\n                logger.exception(\n                    \"Confluence Client raised an AttributeError. Retrying...\"\n                )\n                time.sleep(5)\n\n    return cast(F, wrapped_call)\n\n\ndef _handle_http_error(e: requests.HTTPError, attempt: int, max_retries: int) -> int:\n    MIN_DELAY = 2\n    MAX_DELAY = 60\n    STARTING_DELAY = 5\n    BACKOFF = 2\n\n    # Check if the response or headers are None to avoid potential AttributeError\n    if e.response is None or e.response.headers is None:\n        logger.warning(\"HTTPError with `None` as response or as headers\")\n        raise e\n\n    # Confluence Server returns 403 when rate limited\n    if e.response.status_code == 403:\n        FORBIDDEN_MAX_RETRY_ATTEMPTS = 7\n        FORBIDDEN_RETRY_DELAY = 10\n        if attempt < FORBIDDEN_MAX_RETRY_ATTEMPTS:\n            logger.warning(\n                \"403 error. This sometimes happens when we hit \"\n                f\"Confluence rate limits. Retrying in {FORBIDDEN_RETRY_DELAY} seconds...\"\n            )\n            return FORBIDDEN_RETRY_DELAY\n\n        raise e\n\n    if e.response.status_code >= 500:\n        if attempt >= max_retries - 1:\n            raise e\n\n        delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY)\n        logger.warning(\n            f\"Server error {e.response.status_code}. \"\n            f\"Retrying in {delay} seconds (attempt {attempt + 1})...\"\n        )\n        return math.ceil(time.monotonic() + delay)\n\n    if (\n        e.response.status_code != 429\n        and RATE_LIMIT_MESSAGE_LOWERCASE not in e.response.text.lower()\n    ):\n        raise e\n\n    retry_after = None\n\n    retry_after_header = e.response.headers.get(\"Retry-After\")\n    if retry_after_header is not None:\n        try:\n            retry_after = int(retry_after_header)\n            if retry_after > MAX_DELAY:\n                logger.warning(\n                    f\"Clamping retry_after from {retry_after} to {MAX_DELAY} seconds...\"\n                )\n                retry_after = MAX_DELAY\n            if retry_after < MIN_DELAY:\n                retry_after = MIN_DELAY\n        except ValueError:\n            pass\n\n    if retry_after is not None:\n        logger.warning(\n            f\"Rate limiting with retry header. Retrying after {retry_after} seconds...\"\n        )\n        delay = retry_after\n    else:\n        logger.warning(\n            \"Rate limiting without retry header. Retrying with exponential backoff...\"\n        )\n        delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY)\n\n    delay_until = math.ceil(time.monotonic() + delay)\n    return delay_until\n\n\ndef get_single_param_from_url(url: str, param: str) -> str | None:\n    \"\"\"Get a parameter from a url\"\"\"\n    parsed_url = urlparse(url)\n    return parse_qs(parsed_url.query).get(param, [None])[0]\n\n\ndef get_start_param_from_url(url: str) -> int:\n    \"\"\"Get the start parameter from a url\"\"\"\n    start_str = get_single_param_from_url(url, \"start\")\n    return int(start_str) if start_str else 0\n\n\ndef update_param_in_path(path: str, param: str, value: str) -> str:\n    \"\"\"Update a parameter in a path. Path should look something like:\n\n    /api/rest/users?start=0&limit=10\n    \"\"\"\n    parsed_url = urlparse(path)\n    query_params = parse_qs(parsed_url.query)\n    query_params[param] = [value]\n    return (\n        path.split(\"?\")[0]\n        + \"?\"\n        + \"&\".join(f\"{k}={quote(v[0])}\" for k, v in query_params.items())\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/connector_runner.py",
    "content": "import sys\nimport time\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom typing import Generic\nfrom typing import TypeVar\n\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\nTimeRange = tuple[datetime, datetime]\n\nCT = TypeVar(\"CT\", bound=ConnectorCheckpoint)\n\n\ndef batched_doc_ids(\n    checkpoint_connector_generator: CheckpointOutput[CT],\n    batch_size: int,\n) -> Generator[set[str], None, None]:\n    batch: set[str] = set()\n    for document, hierarchy_node, failure, next_checkpoint in CheckpointOutputWrapper[\n        CT\n    ]()(checkpoint_connector_generator):\n        if document is not None:\n            batch.add(document.id)\n        elif (\n            failure and failure.failed_document and failure.failed_document.document_id\n        ):\n            batch.add(failure.failed_document.document_id)\n        # HierarchyNodes don't have IDs that need to be batched for doc processing\n\n        if len(batch) >= batch_size:\n            yield batch\n            batch = set()\n    if len(batch) > 0:\n        yield batch\n\n\nclass CheckpointOutputWrapper(Generic[CT]):\n    \"\"\"\n    Wraps a CheckpointOutput generator to give things back in a more digestible format,\n    specifically for Document outputs.\n    The connector format is easier for the connector implementor (e.g. it enforces exactly\n    one new checkpoint is returned AND that the checkpoint is at the end), thus the different\n    formats.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self.next_checkpoint: CT | None = None\n\n    def __call__(\n        self,\n        checkpoint_connector_generator: CheckpointOutput[CT],\n    ) -> Generator[\n        tuple[\n            Document | None, HierarchyNode | None, ConnectorFailure | None, CT | None\n        ],\n        None,\n        None,\n    ]:\n        # grabs the final return value and stores it in the `next_checkpoint` variable\n        def _inner_wrapper(\n            checkpoint_connector_generator: CheckpointOutput[CT],\n        ) -> CheckpointOutput[CT]:\n            self.next_checkpoint = yield from checkpoint_connector_generator\n            return self.next_checkpoint  # not used\n\n        for item in _inner_wrapper(checkpoint_connector_generator):\n            if isinstance(item, Document):\n                yield item, None, None, None\n            elif isinstance(item, HierarchyNode):\n                yield None, item, None, None\n            elif isinstance(item, ConnectorFailure):\n                yield None, None, item, None\n            else:\n                raise ValueError(f\"Invalid connector output type: {type(item)}\")\n\n        if self.next_checkpoint is None:\n            raise RuntimeError(\n                \"Checkpoint is None. This should never happen - the connector should always return a checkpoint.\"\n            )\n\n        yield None, None, None, self.next_checkpoint\n\n\nclass ConnectorRunner(Generic[CT]):\n    \"\"\"\n    Handles:\n        - Batching\n        - Additional exception logging\n        - Combining different connector types to a single interface\n    \"\"\"\n\n    def __init__(\n        self,\n        connector: BaseConnector,\n        batch_size: int,\n        # cannot be True for non-checkpointed connectors\n        include_permissions: bool,\n        time_range: TimeRange | None = None,\n    ):\n        if not isinstance(connector, CheckpointedConnector) and include_permissions:\n            raise ValueError(\n                \"include_permissions cannot be True for non-checkpointed connectors\"\n            )\n\n        self.connector = connector\n        self.time_range = time_range\n        self.batch_size = batch_size\n        self.include_permissions = include_permissions\n\n        self.doc_batch: list[Document] = []\n        self.hierarchy_node_batch: list[HierarchyNode] = []\n\n    def run(self, checkpoint: CT) -> Generator[\n        tuple[\n            list[Document] | None,\n            list[HierarchyNode] | None,\n            ConnectorFailure | None,\n            CT | None,\n        ],\n        None,\n        None,\n    ]:\n        \"\"\"\n        Yields batches of Documents, HierarchyNodes, failures, and checkpoints.\n\n        Returns tuples of:\n        - (doc_batch, None, None, None) - batch of documents\n        - (None, hierarchy_batch, None, None) - batch of hierarchy nodes\n        - (None, None, failure, None) - a connector failure\n        - (None, None, None, checkpoint) - new checkpoint\n        \"\"\"\n        try:\n            if isinstance(self.connector, CheckpointedConnector):\n                if self.time_range is None:\n                    raise ValueError(\"time_range is required for CheckpointedConnector\")\n\n                start = time.monotonic()\n                if self.include_permissions:\n                    if not isinstance(\n                        self.connector, CheckpointedConnectorWithPermSync\n                    ):\n                        raise ValueError(\n                            \"Connector does not support permission syncing\"\n                        )\n                    load_from_checkpoint = (\n                        self.connector.load_from_checkpoint_with_perm_sync\n                    )\n                else:\n                    load_from_checkpoint = self.connector.load_from_checkpoint\n                checkpoint_connector_generator = load_from_checkpoint(\n                    start=self.time_range[0].timestamp(),\n                    end=self.time_range[1].timestamp(),\n                    checkpoint=checkpoint,\n                )\n                next_checkpoint: CT | None = None\n                # this is guaranteed to always run at least once with next_checkpoint being non-None\n                for (\n                    document,\n                    hierarchy_node,\n                    failure,\n                    next_checkpoint,\n                ) in CheckpointOutputWrapper[CT]()(checkpoint_connector_generator):\n                    if document is not None:\n                        self.doc_batch.append(document)\n\n                    if hierarchy_node is not None:\n                        self.hierarchy_node_batch.append(hierarchy_node)\n\n                    if failure is not None:\n                        yield None, None, failure, None\n\n                    # Yield hierarchy nodes batch if it reaches batch_size\n                    # (yield nodes before docs to maintain parent-before-child invariant)\n                    if len(self.hierarchy_node_batch) >= self.batch_size:\n                        yield None, self.hierarchy_node_batch, None, None\n                        self.hierarchy_node_batch = []\n\n                    # Yield document batch if it reaches batch_size\n                    # First flush any pending hierarchy nodes to ensure parents exist\n                    if len(self.doc_batch) >= self.batch_size:\n                        if len(self.hierarchy_node_batch) > 0:\n                            yield None, self.hierarchy_node_batch, None, None\n                            self.hierarchy_node_batch = []\n                        yield self.doc_batch, None, None, None\n                        self.doc_batch = []\n\n                # yield remaining hierarchy nodes first (parents before children)\n                if len(self.hierarchy_node_batch) > 0:\n                    yield None, self.hierarchy_node_batch, None, None\n                    self.hierarchy_node_batch = []\n\n                # yield remaining documents\n                if len(self.doc_batch) > 0:\n                    yield self.doc_batch, None, None, None\n                    self.doc_batch = []\n\n                yield None, None, None, next_checkpoint\n\n                logger.debug(\n                    f\"Connector took {time.monotonic() - start} seconds to get to the next checkpoint.\"\n                )\n\n            else:\n                finished_checkpoint = self.connector.build_dummy_checkpoint()\n                finished_checkpoint.has_more = False\n\n                if isinstance(self.connector, PollConnector):\n                    if self.time_range is None:\n                        raise ValueError(\"time_range is required for PollConnector\")\n\n                    for batch in self.connector.poll_source(\n                        start=self.time_range[0].timestamp(),\n                        end=self.time_range[1].timestamp(),\n                    ):\n                        docs, nodes = self._separate_batch(batch)\n                        if nodes:\n                            yield None, nodes, None, None\n                        if docs:\n                            yield docs, None, None, None\n\n                    yield None, None, None, finished_checkpoint\n                elif isinstance(self.connector, LoadConnector):\n                    for batch in self.connector.load_from_state():\n                        docs, nodes = self._separate_batch(batch)\n                        if nodes:\n                            yield None, nodes, None, None\n                        if docs:\n                            yield docs, None, None, None\n\n                    yield None, None, None, finished_checkpoint\n                else:\n                    raise ValueError(f\"Invalid connector. type: {type(self.connector)}\")\n        except Exception:\n            exc_type, _, exc_traceback = sys.exc_info()\n\n            # Traverse the traceback to find the last frame where the exception was raised\n            tb = exc_traceback\n            if tb is None:\n                logger.error(\"No traceback found for exception\")\n                raise\n\n            while tb.tb_next:\n                tb = tb.tb_next  # Move to the next frame in the traceback\n\n            # Get the local variables from the frame where the exception occurred\n            local_vars = tb.tb_frame.f_locals\n            local_vars_str = \"\\n\".join(\n                f\"{key}: {value}\" for key, value in local_vars.items()\n            )\n            logger.error(\n                f\"Error in connector. type: {exc_type};\\nlocal_vars below -> \\n{local_vars_str[:1024]}\"\n            )\n            raise\n\n    def _separate_batch(\n        self, batch: list[Document | HierarchyNode]\n    ) -> tuple[list[Document], list[HierarchyNode]]:\n        \"\"\"Separate a mixed batch into Documents and HierarchyNodes.\"\"\"\n        docs: list[Document] = []\n        nodes: list[HierarchyNode] = []\n        for item in batch:\n            if isinstance(item, Document):\n                docs.append(item)\n            elif isinstance(item, HierarchyNode):\n                nodes.append(item)\n        return docs, nodes\n"
  },
  {
    "path": "backend/onyx/connectors/credentials_provider.py",
    "content": "import uuid\nfrom types import TracebackType\nfrom typing import Any\n\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy import select\n\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import Credential\nfrom onyx.redis.redis_pool import get_redis_client\n\n\nclass OnyxDBCredentialsProvider(\n    CredentialsProviderInterface[\"OnyxDBCredentialsProvider\"]\n):\n    \"\"\"Implementation to allow the connector to callback and update credentials in the db.\n    Required in cases where credentials can rotate while the connector is running.\n    \"\"\"\n\n    LOCK_TTL = 900  # TTL of the lock\n\n    def __init__(self, tenant_id: str, connector_name: str, credential_id: int):\n        self._tenant_id = tenant_id\n        self._connector_name = connector_name\n        self._credential_id = credential_id\n\n        self.redis_client = get_redis_client(tenant_id=tenant_id)\n\n        # lock used to prevent overlapping renewal of credentials\n        self.lock_key = f\"da_lock:connector:{connector_name}:credential_{credential_id}\"\n        self._lock: RedisLock = self.redis_client.lock(self.lock_key, self.LOCK_TTL)\n\n    def __enter__(self) -> \"OnyxDBCredentialsProvider\":\n        acquired = self._lock.acquire(blocking_timeout=self.LOCK_TTL)\n        if not acquired:\n            raise RuntimeError(f\"Could not acquire lock for key: {self.lock_key}\")\n\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_value: BaseException | None,\n        traceback: TracebackType | None,\n    ) -> None:\n        \"\"\"Release the lock when exiting the context.\"\"\"\n        if self._lock and self._lock.owned():\n            self._lock.release()\n\n    def get_tenant_id(self) -> str | None:\n        return self._tenant_id\n\n    def get_provider_key(self) -> str:\n        return str(self._credential_id)\n\n    def get_credentials(self) -> dict[str, Any]:\n        with get_session_with_tenant(tenant_id=self._tenant_id) as db_session:\n            credential = db_session.execute(\n                select(Credential).where(Credential.id == self._credential_id)\n            ).scalar_one()\n\n            if credential is None:\n                raise ValueError(\n                    f\"No credential found: credential={self._credential_id}\"\n                )\n\n            if credential.credential_json is None:\n                return {}\n            return credential.credential_json.get_value(apply_mask=False)\n\n    def set_credentials(self, credential_json: dict[str, Any]) -> None:\n        with get_session_with_tenant(tenant_id=self._tenant_id) as db_session:\n            try:\n                credential = db_session.execute(\n                    select(Credential)\n                    .where(Credential.id == self._credential_id)\n                    .with_for_update()\n                ).scalar_one()\n\n                if credential is None:\n                    raise ValueError(\n                        f\"No credential found: credential={self._credential_id}\"\n                    )\n\n                credential.credential_json = credential_json  # type: ignore[assignment]\n                db_session.commit()\n            except Exception:\n                db_session.rollback()\n                raise\n\n    def is_dynamic(self) -> bool:\n        return True\n\n\nclass OnyxStaticCredentialsProvider(\n    CredentialsProviderInterface[\"OnyxStaticCredentialsProvider\"]\n):\n    \"\"\"Implementation (a very simple one!) to handle static credentials.\"\"\"\n\n    def __init__(\n        self,\n        tenant_id: str | None,\n        connector_name: str,\n        credential_json: dict[str, Any],\n    ):\n        self._tenant_id = tenant_id\n        self._connector_name = connector_name\n        self._credential_json = credential_json\n\n        self._provider_key = str(uuid.uuid4())\n\n    def __enter__(self) -> \"OnyxStaticCredentialsProvider\":\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_value: BaseException | None,\n        traceback: TracebackType | None,\n    ) -> None:\n        pass\n\n    def get_tenant_id(self) -> str | None:\n        return self._tenant_id\n\n    def get_provider_key(self) -> str:\n        return self._provider_key\n\n    def get_credentials(self) -> dict[str, Any]:\n        return self._credential_json\n\n    def set_credentials(self, credential_json: dict[str, Any]) -> None:\n        self._credential_json = credential_json\n\n    def is_dynamic(self) -> bool:\n        return False\n"
  },
  {
    "path": "backend/onyx/connectors/cross_connector_utils/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py",
    "content": "import re\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import TypeVar\nfrom urllib.parse import urljoin\nfrom urllib.parse import urlparse\n\nimport requests\nfrom dateutil.parser import parse\nfrom dateutil.parser import ParserError\n\nfrom onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import IGNORE_FOR_QA\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import OnyxMetadata\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import is_valid_email\n\n\nT = TypeVar(\"T\")\nU = TypeVar(\"U\")\nlogger = setup_logger()\n\n\ndef datetime_to_utc(dt: datetime) -> datetime:\n    if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:\n        dt = dt.replace(tzinfo=timezone.utc)\n\n    return dt.astimezone(timezone.utc)\n\n\ndef time_str_to_utc(datetime_str: str) -> datetime:\n    # Remove all timezone abbreviations in parentheses\n    normalized = re.sub(r\"\\([A-Z]+\\)\", \"\", datetime_str).strip()\n\n    # Remove any remaining parentheses and their contents\n    normalized = re.sub(r\"\\(.*?\\)\", \"\", normalized).strip()\n\n    candidates: list[str] = [normalized]\n\n    # Some sources (e.g. Gmail) may prefix the value with labels like \"Date:\"\n    label_stripped = re.sub(\n        r\"^\\s*[A-Za-z][A-Za-z\\s_-]*:\\s*\", \"\", normalized, count=1\n    ).strip()\n    if label_stripped and label_stripped != normalized:\n        candidates.append(label_stripped)\n\n    # Fix common format issues (e.g. \"0000\" => \"+0000\")\n    for candidate in list(candidates):\n        if \" 0000\" in candidate:\n            fixed = candidate.replace(\" 0000\", \" +0000\")\n            if fixed not in candidates:\n                candidates.append(fixed)\n\n    last_exception: Exception | None = None\n    for candidate in candidates:\n        try:\n            dt = parse(candidate)\n            return datetime_to_utc(dt)\n        except (ValueError, ParserError) as exc:\n            last_exception = exc\n\n    if last_exception is not None:\n        raise last_exception\n\n    # Fallback in case parsing failed without raising (should not happen)\n    raise ValueError(f\"Unable to parse datetime string: {datetime_str}\")\n\n\n# TODO: use this function in other connectors\ndef datetime_from_utc_timestamp(timestamp: int) -> datetime:\n    \"\"\"Convert a Unix timestamp to a datetime object in UTC\"\"\"\n\n    return datetime.fromtimestamp(timestamp, tz=timezone.utc)\n\n\ndef basic_expert_info_representation(info: BasicExpertInfo) -> str | None:\n    if info.first_name and info.last_name:\n        return f\"{info.first_name} {info.middle_initial} {info.last_name}\"\n\n    if info.display_name:\n        return info.display_name\n\n    if info.email and is_valid_email(info.email):\n        return info.email\n\n    if info.first_name:\n        return info.first_name\n\n    return None\n\n\ndef get_experts_stores_representations(\n    experts: list[BasicExpertInfo] | None,\n) -> list[str] | None:\n    \"\"\"Gets string representations of experts supplied.\n\n    If an expert cannot be represented as a string, it is omitted from the\n    result.\n    \"\"\"\n    if not experts:\n        return None\n\n    reps: list[str | None] = [\n        basic_expert_info_representation(owner) for owner in experts\n    ]\n    return [owner for owner in reps if owner is not None]\n\n\ndef process_in_batches(\n    objects: list[T], process_function: Callable[[T], U], batch_size: int\n) -> Iterator[list[U]]:\n    for i in range(0, len(objects), batch_size):\n        yield [process_function(obj) for obj in objects[i : i + batch_size]]\n\n\ndef get_metadata_keys_to_ignore() -> list[str]:\n    return [IGNORE_FOR_QA]\n\n\ndef _parse_document_source(connector_type: Any) -> DocumentSource | None:\n    if connector_type is None:\n        return None\n\n    if isinstance(connector_type, DocumentSource):\n        return connector_type\n\n    if not isinstance(connector_type, str):\n        logger.warning(f\"Invalid connector_type type: {type(connector_type).__name__}\")\n        return None\n\n    normalized = re.sub(r\"[\\s\\-]+\", \"_\", connector_type.strip().lower())\n    try:\n        return DocumentSource(normalized)\n    except ValueError:\n        logger.warning(\n            f\"Invalid connector_type value: '{connector_type}' (normalized: '{normalized}')\"\n        )\n        return None\n\n\ndef process_onyx_metadata(\n    metadata: dict[str, Any],\n) -> tuple[OnyxMetadata, dict[str, Any]]:\n    \"\"\"\n    Users may set Onyx metadata and custom tags in text files. https://docs.onyx.app/admins/connectors/official/file\n    Any unrecognized fields are treated as custom tags.\n    \"\"\"\n    p_owner_names = metadata.get(\"primary_owners\")\n    p_owners = (\n        [BasicExpertInfo(display_name=name) for name in p_owner_names]\n        if p_owner_names\n        else None\n    )\n\n    s_owner_names = metadata.get(\"secondary_owners\")\n    s_owners = (\n        [BasicExpertInfo(display_name=name) for name in s_owner_names]\n        if s_owner_names\n        else None\n    )\n    source_type = _parse_document_source(metadata.get(\"connector_type\"))\n\n    dt_str = metadata.get(\"doc_updated_at\")\n    doc_updated_at = time_str_to_utc(dt_str) if dt_str else None\n\n    return (\n        OnyxMetadata(\n            document_id=metadata.get(\"id\"),\n            source_type=source_type,\n            link=metadata.get(\"link\"),\n            file_display_name=metadata.get(\"file_display_name\"),\n            title=metadata.get(\"title\"),\n            primary_owners=p_owners,\n            secondary_owners=s_owners,\n            doc_updated_at=doc_updated_at,\n        ),\n        {\n            k: v\n            for k, v in metadata.items()\n            if k\n            not in [\n                \"document_id\",\n                \"time_updated\",\n                \"doc_updated_at\",\n                \"link\",\n                \"primary_owners\",\n                \"secondary_owners\",\n                \"filename\",\n                \"file_display_name\",\n                \"title\",\n                \"connector_type\",\n                \"pdf_password\",\n                \"mime_type\",\n            ]\n        },\n    )\n\n\ndef get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:\n    if CONNECTOR_LOCALHOST_OVERRIDE:\n        # Used for development\n        base_domain = CONNECTOR_LOCALHOST_OVERRIDE\n    return f\"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}\"\n\n\ndef is_atlassian_date_error(e: Exception) -> bool:\n    return \"field 'updated' is invalid\" in str(e)\n\n\ndef get_cloudId(base_url: str) -> str:\n    tenant_info_url = urljoin(base_url, \"/_edge/tenant_info\")\n    response = requests.get(tenant_info_url, timeout=10)\n    response.raise_for_status()\n    return response.json()[\"cloudId\"]\n\n\ndef scoped_url(url: str, product: str) -> str:\n    parsed = urlparse(url)\n    base_url = parsed.scheme + \"://\" + parsed.netloc\n    cloud_id = get_cloudId(base_url)\n    return f\"https://api.atlassian.com/ex/{product}/{cloud_id}{parsed.path}\"\n"
  },
  {
    "path": "backend/onyx/connectors/cross_connector_utils/rate_limit_wrapper.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom functools import wraps\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nimport requests\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nF = TypeVar(\"F\", bound=Callable[..., Any])\n\n\nclass RateLimitTriedTooManyTimesError(Exception):\n    pass\n\n\nclass _RateLimitDecorator:\n    \"\"\"Builds a generic wrapper/decorator for calls to external APIs that\n    prevents making more than `max_calls` requests per `period`\n\n    Implementation inspired by the `ratelimit` library:\n    https://github.com/tomasbasham/ratelimit.\n\n    NOTE: is not thread safe.\n    \"\"\"\n\n    def __init__(\n        self,\n        max_calls: int,\n        period: float,  # in seconds\n        sleep_time: float = 2,  # in seconds\n        sleep_backoff: float = 2,  # applies exponential backoff\n        max_num_sleep: int = 0,\n    ):\n        self.max_calls = max_calls\n        self.period = period\n        self.sleep_time = sleep_time\n        self.sleep_backoff = sleep_backoff\n        self.max_num_sleep = max_num_sleep\n\n        self.call_history: list[float] = []\n        self.curr_calls = 0\n\n    def __call__(self, func: F) -> F:\n        @wraps(func)\n        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:\n            # cleanup calls which are no longer relevant\n            self._cleanup()\n\n            # check if we've exceeded the rate limit\n            sleep_cnt = 0\n            while len(self.call_history) == self.max_calls:\n                sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt)\n                logger.notice(\n                    f\"Rate limit exceeded for function {func.__name__}. Waiting {sleep_time} seconds before retrying.\"\n                )\n                time.sleep(sleep_time)\n                sleep_cnt += 1\n                if self.max_num_sleep != 0 and sleep_cnt >= self.max_num_sleep:\n                    raise RateLimitTriedTooManyTimesError(\n                        f\"Exceeded '{self.max_num_sleep}' retries for function '{func.__name__}'\"\n                    )\n\n                self._cleanup()\n\n            # add the current call to the call history\n            self.call_history.append(time.monotonic())\n            return func(*args, **kwargs)\n\n        return cast(F, wrapped_func)\n\n    def _cleanup(self) -> None:\n        curr_time = time.monotonic()\n        time_to_expire_before = curr_time - self.period\n        self.call_history = [\n            call_time\n            for call_time in self.call_history\n            if call_time > time_to_expire_before\n        ]\n\n\nrate_limit_builder = _RateLimitDecorator\n\n\n\"\"\"If you want to allow the external service to tell you when you've hit the rate limit,\nuse the following instead\"\"\"\n\nR = TypeVar(\"R\", bound=Callable[..., requests.Response])\n\n\ndef wrap_request_to_handle_ratelimiting(\n    request_fn: R, default_wait_time_sec: int = 30, max_waits: int = 30\n) -> R:\n    def wrapped_request(*args: list, **kwargs: dict[str, Any]) -> requests.Response:\n        for _ in range(max_waits):\n            response = request_fn(*args, **kwargs)\n            if response.status_code == 429:\n                try:\n                    wait_time = int(\n                        response.headers.get(\"Retry-After\", default_wait_time_sec)\n                    )\n                except ValueError:\n                    wait_time = default_wait_time_sec\n\n                time.sleep(wait_time)\n                continue\n\n            return response\n\n        raise RateLimitTriedTooManyTimesError(f\"Exceeded '{max_waits}' retries\")\n\n    return cast(R, wrapped_request)\n\n\n_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get)\n_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post)\n\n\nclass _RateLimitedRequest:\n    get = _rate_limited_get\n    post = _rate_limited_post\n\n\nrl_requests = _RateLimitedRequest\n"
  },
  {
    "path": "backend/onyx/connectors/discord/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/discord/connector.py",
    "content": "import asyncio\nfrom collections.abc import AsyncGenerator\nfrom collections.abc import AsyncIterable\nfrom collections.abc import Iterable\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nfrom discord import Client\nfrom discord.channel import TextChannel\nfrom discord.channel import Thread\nfrom discord.enums import MessageType\nfrom discord.errors import LoginFailure\nfrom discord.flags import Intents\nfrom discord.message import Message as DiscordMessage\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import CredentialInvalidError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n_DISCORD_DOC_ID_PREFIX = \"DISCORD_\"\n_SNIPPET_LENGTH = 30\n\n\ndef _convert_message_to_document(\n    message: DiscordMessage,\n    sections: list[TextSection],\n) -> Document:\n    \"\"\"\n    Convert a discord message to a document\n    Sections are collected before calling this function because it relies on async\n        calls to fetch the thread history if there is one\n    \"\"\"\n\n    metadata: dict[str, str | list[str]] = {}\n    semantic_substring = \"\"\n\n    # Only messages from TextChannels will make it here but we have to check for it anyways\n    if isinstance(message.channel, TextChannel) and (\n        channel_name := message.channel.name\n    ):\n        metadata[\"Channel\"] = channel_name\n        semantic_substring += f\" in Channel: #{channel_name}\"\n\n    # Single messages dont have a title\n    title = \"\"\n\n    # If there is a thread, add more detail to the metadata, title, and semantic identifier\n    if isinstance(message.channel, Thread):\n        # Threads do have a title\n        title = message.channel.name\n\n        # If its a thread, update the metadata, title, and semantic_substring\n        metadata[\"Thread\"] = title\n\n        # Add more detail to the semantic identifier if available\n        semantic_substring += f\" in Thread: {title}\"\n\n    snippet: str = (\n        message.content[:_SNIPPET_LENGTH].rstrip() + \"...\"\n        if len(message.content) > _SNIPPET_LENGTH\n        else message.content\n    )\n\n    semantic_identifier = f\"{message.author.name} said{semantic_substring}: {snippet}\"\n\n    return Document(\n        id=f\"{_DISCORD_DOC_ID_PREFIX}{message.id}\",\n        source=DocumentSource.DISCORD,\n        semantic_identifier=semantic_identifier,\n        doc_updated_at=message.edited_at,\n        title=title,\n        sections=(cast(list[TextSection | ImageSection], sections)),\n        metadata=metadata,\n    )\n\n\nasync def _fetch_filtered_channels(\n    discord_client: Client,\n    server_ids: list[int] | None,\n    channel_names: list[str] | None,\n) -> list[TextChannel]:\n    filtered_channels: list[TextChannel] = []\n\n    for channel in discord_client.get_all_channels():\n        if not channel.permissions_for(channel.guild.me).read_message_history:\n            continue\n        if not isinstance(channel, TextChannel):\n            continue\n        if server_ids and len(server_ids) > 0 and channel.guild.id not in server_ids:\n            continue\n        if channel_names and channel.name not in channel_names:\n            continue\n        filtered_channels.append(channel)\n\n    logger.info(f\"Found {len(filtered_channels)} channels for the authenticated user\")\n    return filtered_channels\n\n\nasync def _fetch_documents_from_channel(\n    channel: TextChannel,\n    start_time: datetime | None,\n    end_time: datetime | None,\n) -> AsyncIterable[Document]:\n    # Discord's epoch starts at 2015-01-01\n    discord_epoch = datetime(2015, 1, 1, tzinfo=timezone.utc)\n    if start_time and start_time < discord_epoch:\n        start_time = discord_epoch\n\n    # NOTE: limit=None is the correct way to fetch all messages and threads with pagination\n    # The discord package erroneously uses limit for both pagination AND number of results\n    # This causes the history and archived_threads methods to return 100 results even if there are more results within the filters\n    # Pagination is handled automatically (100 results at a time) when limit=None\n\n    async for channel_message in channel.history(\n        limit=None,\n        after=start_time,\n        before=end_time,\n    ):\n        # Skip messages that are not the default type\n        if channel_message.type != MessageType.default:\n            continue\n\n        sections: list[TextSection] = [\n            TextSection(\n                text=channel_message.content,\n                link=channel_message.jump_url,\n            )\n        ]\n\n        yield _convert_message_to_document(channel_message, sections)\n\n    for active_thread in channel.threads:\n        async for thread_message in active_thread.history(\n            limit=None,\n            after=start_time,\n            before=end_time,\n        ):\n            # Skip messages that are not the default type\n            if thread_message.type != MessageType.default:\n                continue\n\n            sections = [\n                TextSection(\n                    text=thread_message.content,\n                    link=thread_message.jump_url,\n                )\n            ]\n\n            yield _convert_message_to_document(thread_message, sections)\n\n    async for archived_thread in channel.archived_threads(\n        limit=None,\n    ):\n        async for thread_message in archived_thread.history(\n            limit=None,\n            after=start_time,\n            before=end_time,\n        ):\n            # Skip messages that are not the default type\n            if thread_message.type != MessageType.default:\n                continue\n\n            sections = [\n                TextSection(\n                    text=thread_message.content,\n                    link=thread_message.jump_url,\n                )\n            ]\n\n            yield _convert_message_to_document(thread_message, sections)\n\n\ndef _manage_async_retrieval(\n    token: str,\n    requested_start_date_string: str,\n    channel_names: list[str],\n    server_ids: list[int],\n    start: datetime | None = None,\n    end: datetime | None = None,\n) -> Iterable[Document]:\n    # parse requested_start_date_string to datetime\n    pull_date: datetime | None = (\n        datetime.strptime(requested_start_date_string, \"%Y-%m-%d\").replace(\n            tzinfo=timezone.utc\n        )\n        if requested_start_date_string\n        else None\n    )\n\n    # Set start_time to the later of start and pull_date, or whichever is provided\n    start_time = max(filter(None, [start, pull_date])) if start or pull_date else None\n\n    end_time: datetime | None = end\n\n    async def _async_fetch() -> AsyncGenerator[Document, None]:\n        intents = Intents.default()\n        intents.message_content = True\n        async with Client(intents=intents) as discord_client:\n            start_task = asyncio.create_task(discord_client.start(token))\n            ready_task = asyncio.create_task(discord_client.wait_until_ready())\n\n            done, _ = await asyncio.wait(\n                {start_task, ready_task},\n                return_when=asyncio.FIRST_COMPLETED,\n            )\n\n            # start() runs indefinitely once connected, so it only lands\n            # in `done` when login/connection failed — propagate the error.\n            if start_task in done:\n                ready_task.cancel()\n                start_task.result()\n\n            filtered_channels: list[TextChannel] = await _fetch_filtered_channels(\n                discord_client=discord_client,\n                server_ids=server_ids,\n                channel_names=channel_names,\n            )\n\n            for channel in filtered_channels:\n                async for doc in _fetch_documents_from_channel(\n                    channel=channel,\n                    start_time=start_time,\n                    end_time=end_time,\n                ):\n                    yield doc\n\n    def run_and_yield() -> Iterable[Document]:\n        loop = asyncio.new_event_loop()\n        async_gen = _async_fetch()\n        try:\n            while True:\n                try:\n                    doc = loop.run_until_complete(anext(async_gen))\n                    yield doc\n                except StopAsyncIteration:\n                    break\n        finally:\n            # Must close the async generator before the loop so the Discord\n            # client's `async with` block can await its shutdown coroutine.\n            # The nested try/finally ensures the loop always closes even if\n            # aclose() raises (same pattern as cursor.close() before conn.close()).\n            try:\n                loop.run_until_complete(async_gen.aclose())\n            finally:\n                loop.close()\n\n    return run_and_yield()\n\n\nclass DiscordConnector(PollConnector, LoadConnector):\n    def __init__(\n        self,\n        server_ids: list[str] = [],\n        channel_names: list[str] = [],\n        # YYYY-MM-DD\n        start_date: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ):\n        self.batch_size = batch_size\n        self.channel_names: list[str] = channel_names if channel_names else []\n        self.server_ids: list[int] = (\n            [int(server_id) for server_id in server_ids] if server_ids else []\n        )\n        self._discord_bot_token: str | None = None\n        self.requested_start_date_string: str = start_date or \"\"\n\n    @property\n    def discord_bot_token(self) -> str:\n        if self._discord_bot_token is None:\n            raise ConnectorMissingCredentialError(\"Discord\")\n        return self._discord_bot_token\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self._discord_bot_token = credentials[\"discord_bot_token\"]\n        return None\n\n    def validate_connector_settings(self) -> None:\n        loop = asyncio.new_event_loop()\n        try:\n            client = Client(intents=Intents.default())\n            try:\n                loop.run_until_complete(client.login(self.discord_bot_token))\n            except LoginFailure as e:\n                raise CredentialInvalidError(f\"Invalid Discord bot token: {e}\")\n            finally:\n                loop.run_until_complete(client.close())\n        finally:\n            loop.close()\n\n    def _manage_doc_batching(\n        self,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> GenerateDocumentsOutput:\n        doc_batch: list[Document | HierarchyNode] = []\n        for doc in _manage_async_retrieval(\n            token=self.discord_bot_token,\n            requested_start_date_string=self.requested_start_date_string,\n            channel_names=self.channel_names,\n            server_ids=self.server_ids,\n            start=start,\n            end=end,\n        ):\n            doc_batch.append(doc)\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        return self._manage_doc_batching(\n            datetime.fromtimestamp(start, tz=timezone.utc),\n            datetime.fromtimestamp(end, tz=timezone.utc),\n        )\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._manage_doc_batching(None, None)\n\n\nif __name__ == \"__main__\":\n    import os\n    import time\n\n    end = time.time()\n    # 1 day\n    start = end - 24 * 60 * 60 * 1\n    # \"1,2,3\"\n    server_ids: str | None = os.environ.get(\"server_ids\", None)\n    # \"channel1,channel2\"\n    channel_names: str | None = os.environ.get(\"channel_names\", None)\n\n    connector = DiscordConnector(\n        server_ids=server_ids.split(\",\") if server_ids else [],\n        channel_names=channel_names.split(\",\") if channel_names else [],\n        start_date=os.environ.get(\"start_date\", None),\n    )\n    connector.load_credentials(\n        {\"discord_bot_token\": os.environ.get(\"discord_bot_token\")}\n    )\n\n    for doc_batch in connector.poll_source(start, end):\n        for doc in doc_batch:\n            print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/discourse/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/discourse/connector.py",
    "content": "import time\nimport urllib.parse\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom pydantic import BaseModel\nfrom requests import Response\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n\nclass DiscoursePerms(BaseModel):\n    api_key: str\n    api_username: str\n\n\n@retry_builder()\ndef discourse_request(\n    endpoint: str, perms: DiscoursePerms, params: dict | None = None\n) -> Response:\n    headers = {\"Api-Key\": perms.api_key, \"Api-Username\": perms.api_username}\n\n    response = requests.get(endpoint, headers=headers, params=params)\n    response.raise_for_status()\n\n    return response\n\n\nclass DiscourseConnector(PollConnector):\n    def __init__(\n        self,\n        base_url: str,\n        categories: list[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        parsed_url = urllib.parse.urlparse(base_url)\n        if not parsed_url.scheme:\n            base_url = \"https://\" + base_url\n        self.base_url = base_url\n\n        self.categories = [c.lower() for c in categories] if categories else []\n        self.category_id_map: dict[int, dict] = {}\n\n        self.batch_size = batch_size\n        self.permissions: DiscoursePerms | None = None\n        self.active_categories: set | None = None\n\n    @rate_limit_builder(max_calls=50, period=60)\n    def _make_request(self, endpoint: str, params: dict | None = None) -> Response:\n        if not self.permissions:\n            raise ConnectorMissingCredentialError(\"Discourse\")\n        return discourse_request(endpoint, self.permissions, params)\n\n    def _get_categories_map(\n        self,\n    ) -> None:\n        assert self.permissions is not None\n        categories_endpoint = urllib.parse.urljoin(self.base_url, \"categories.json\")\n        response = self._make_request(\n            endpoint=categories_endpoint,\n            params={\"include_subcategories\": True},\n        )\n        categories = response.json()[\"category_list\"][\"categories\"]\n        self.category_id_map = {\n            cat[\"id\"]: {\"name\": cat[\"name\"], \"slug\": cat[\"slug\"]}\n            for cat in categories\n            if not self.categories or cat[\"name\"].lower() in self.categories\n        }\n        self.active_categories = set(self.category_id_map)\n\n    def _get_doc_from_topic(self, topic_id: int) -> Document:\n        assert self.permissions is not None\n        topic_endpoint = urllib.parse.urljoin(self.base_url, f\"t/{topic_id}.json\")\n        response = self._make_request(endpoint=topic_endpoint)\n        topic = response.json()\n\n        topic_url = urllib.parse.urljoin(self.base_url, f\"t/{topic['slug']}\")\n\n        sections = []\n        poster = None\n        responders = []\n        seen_names = set()\n        for ind, post in enumerate(topic[\"post_stream\"][\"posts\"]):\n            if ind == 0:\n                poster_name = post.get(\"name\")\n                if poster_name:\n                    seen_names.add(poster_name)\n                    poster = BasicExpertInfo(display_name=poster_name)\n            else:\n                responder_name = post.get(\"name\")\n                if responder_name and responder_name not in seen_names:\n                    seen_names.add(responder_name)\n                    responders.append(BasicExpertInfo(display_name=responder_name))\n\n            sections.append(\n                TextSection(link=topic_url, text=parse_html_page_basic(post[\"cooked\"]))\n            )\n        category_name = self.category_id_map.get(topic[\"category_id\"], {}).get(\"name\")\n\n        metadata: dict[str, str | list[str]] = (\n            {\n                \"category\": category_name,\n            }\n            if category_name\n            else {}\n        )\n\n        if topic.get(\"tags\"):\n            metadata[\"tags\"] = topic[\"tags\"]\n\n        doc = Document(\n            id=\"_\".join([DocumentSource.DISCOURSE.value, str(topic[\"id\"])]),\n            sections=cast(list[TextSection | ImageSection], sections),\n            source=DocumentSource.DISCOURSE,\n            semantic_identifier=topic[\"title\"],\n            doc_updated_at=time_str_to_utc(topic[\"last_posted_at\"]),\n            primary_owners=[poster] if poster else None,\n            secondary_owners=responders or None,\n            metadata=metadata,\n        )\n        return doc\n\n    def _get_latest_topics(\n        self, start: datetime | None, end: datetime | None, page: int\n    ) -> list[int]:\n        assert self.permissions is not None\n        topic_ids = []\n\n        if not self.categories:\n            latest_endpoint = urllib.parse.urljoin(\n                self.base_url, f\"latest.json?page={page}\"\n            )\n            response = self._make_request(endpoint=latest_endpoint)\n            topics = response.json()[\"topic_list\"][\"topics\"]\n\n        else:\n            topics = []\n            empty_categories = []\n\n            for category_id, category_dict in self.category_id_map.items():\n                category_endpoint = urllib.parse.urljoin(\n                    self.base_url,\n                    f\"c/{category_dict['slug']}/{category_id}.json?page={page}&sys=latest\",\n                )\n                response = self._make_request(endpoint=category_endpoint)\n                new_topics = response.json()[\"topic_list\"][\"topics\"]\n\n                if len(new_topics) == 0:\n                    empty_categories.append(category_id)\n                topics.extend(new_topics)\n\n            for empty_category in empty_categories:\n                self.category_id_map.pop(empty_category)\n\n        for topic in topics:\n            last_time = topic.get(\"last_posted_at\")\n            if not last_time:\n                continue\n\n            last_time_dt = time_str_to_utc(last_time)\n            if (start and start > last_time_dt) or (end and end < last_time_dt):\n                continue\n\n            topic_ids.append(topic[\"id\"])\n\n        return topic_ids\n\n    def _yield_discourse_documents(\n        self,\n        start: datetime,\n        end: datetime,\n    ) -> GenerateDocumentsOutput:\n        page = 0\n        while topic_ids := self._get_latest_topics(start, end, page):\n            doc_batch: list[Document | HierarchyNode] = []\n            for topic_id in topic_ids:\n                doc_batch.append(self._get_doc_from_topic(topic_id))\n                if len(doc_batch) >= self.batch_size:\n                    yield doc_batch\n                    doc_batch = []\n\n            if doc_batch:\n                yield doc_batch\n            page += 1\n\n    def load_credentials(\n        self,\n        credentials: dict[str, Any],\n    ) -> dict[str, Any] | None:\n        self.permissions = DiscoursePerms(\n            api_key=credentials[\"discourse_api_key\"],\n            api_username=credentials[\"discourse_api_username\"],\n        )\n        return None\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if self.permissions is None:\n            raise ConnectorMissingCredentialError(\"Discourse\")\n\n        start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc)\n        end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc)\n\n        self._get_categories_map()\n\n        yield from self._yield_discourse_documents(start_datetime, end_datetime)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = DiscourseConnector(base_url=os.environ[\"DISCOURSE_BASE_URL\"])\n    connector.load_credentials(\n        {\n            \"discourse_api_key\": os.environ[\"DISCOURSE_API_KEY\"],\n            \"discourse_api_username\": os.environ[\"DISCOURSE_API_USERNAME\"],\n        }\n    )\n\n    current = time.time()\n    one_year_ago = current - 24 * 60 * 60 * 360\n    latest_docs = connector.poll_source(one_year_ago, current)\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/document360/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/document360/connector.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import List\nfrom typing import Optional\n\nimport requests\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.document360.utils import flatten_child_categories\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.utils.retry_wrapper import retry_builder\n\n# Limitations and Potential Improvements\n# 1. The \"Categories themselves contain potentially relevant information\" but they're not pulled in\n# 2. Only the HTML Articles are supported, Document360 also has a Markdown and \"Block\" format\n# 3. The contents are not as cleaned up as other HTML connectors\n\nDOCUMENT360_BASE_URL = \"https://portal.document360.io\"\nDOCUMENT360_API_BASE_URL = \"https://apihub.document360.io/v2\"\n\n\nclass Document360Connector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        workspace: str,\n        categories: List[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        portal_id: Optional[str] = None,\n        api_token: Optional[str] = None,\n    ) -> None:\n        self.portal_id = portal_id\n        self.workspace = workspace\n        self.categories = categories\n        self.batch_size = batch_size\n        self.api_token = api_token\n\n    def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:\n        self.api_token = credentials.get(\"document360_api_token\")\n        self.portal_id = credentials.get(\"portal_id\")\n        return None\n\n    # rate limiting set based on the enterprise plan: https://apidocs.document360.com/apidocs/rate-limiting\n    # NOTE: retry will handle cases where user is not on enterprise plan - we will just hit the rate limit\n    # and then retry after a period\n    @retry_builder()\n    @rate_limit_builder(max_calls=100, period=60)\n    def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:\n        if not self.api_token:\n            raise ConnectorMissingCredentialError(\"Document360\")\n\n        headers = {\"accept\": \"application/json\", \"api_token\": self.api_token}\n\n        response = requests.get(\n            f\"{DOCUMENT360_API_BASE_URL}/{endpoint}\", headers=headers, params=params\n        )\n        response.raise_for_status()\n\n        return response.json()[\"data\"]\n\n    def _get_workspace_id_by_name(self) -> str:\n        projects = self._make_request(\"ProjectVersions\")\n        workspace_id = next(\n            (\n                project[\"id\"]\n                for project in projects\n                if project[\"version_code_name\"] == self.workspace\n            ),\n            None,\n        )\n        if workspace_id is None:\n            raise ValueError(\"Not able to find Workspace ID by the user provided name\")\n\n        return workspace_id\n\n    def _get_articles_with_category(self, workspace_id: str) -> Any:\n        all_categories = self._make_request(\n            f\"ProjectVersions/{workspace_id}/categories\"\n        )\n        articles_with_category = []\n\n        for category in all_categories:\n            if not self.categories or category[\"name\"] in self.categories:\n                for article in category[\"articles\"]:\n                    articles_with_category.append(\n                        {\"id\": article[\"id\"], \"category_name\": category[\"name\"]}\n                    )\n                for child_category in category[\"child_categories\"]:\n                    all_nested_categories = flatten_child_categories(child_category)\n                    for nested_category in all_nested_categories:\n                        for article in nested_category[\"articles\"]:\n                            articles_with_category.append(\n                                {\n                                    \"id\": article[\"id\"],\n                                    \"category_name\": nested_category[\"name\"],\n                                }\n                            )\n\n        return articles_with_category\n\n    def _process_articles(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.api_token is None:\n            raise ConnectorMissingCredentialError(\"Document360\")\n\n        workspace_id = self._get_workspace_id_by_name()\n        articles = self._get_articles_with_category(workspace_id)\n\n        doc_batch: List[Document | HierarchyNode] = []\n\n        for article in articles:\n            article_details = self._make_request(\n                f\"Articles/{article['id']}\", {\"langCode\": \"en\"}\n            )\n\n            updated_at = datetime.strptime(\n                article_details[\"modified_at\"], \"%Y-%m-%dT%H:%M:%S.%fZ\"\n            ).replace(tzinfo=timezone.utc)\n            if start is not None and updated_at < start:\n                continue\n            if end is not None and updated_at > end:\n                continue\n\n            authors = [\n                BasicExpertInfo(\n                    display_name=author.get(\"name\"), email=author[\"email_id\"]\n                )\n                for author in article_details.get(\"authors\", [])\n                if author[\"email_id\"]\n            ]\n\n            doc_link = (\n                article_details[\"url\"]\n                if article_details.get(\"url\")\n                else f\"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}\"\n            )\n\n            html_content = article_details[\"html_content\"]\n            article_content = (\n                parse_html_page_basic(html_content) if html_content is not None else \"\"\n            )\n            doc_text = (\n                f\"{article_details.get('description', '')}\\n{article_content}\".strip()\n            )\n\n            document = Document(\n                id=article_details[\"id\"],\n                sections=[TextSection(link=doc_link, text=doc_text)],\n                source=DocumentSource.DOCUMENT360,\n                semantic_identifier=article_details[\"title\"],\n                doc_updated_at=updated_at,\n                primary_owners=authors,\n                metadata={\n                    \"workspace\": self.workspace,\n                    \"category\": article[\"category_name\"],\n                },\n            )\n\n            doc_batch.append(document)\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._process_articles()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n        return self._process_articles(start_datetime, end_datetime)\n\n\nif __name__ == \"__main__\":\n    import time\n    import os\n\n    document360_connector = Document360Connector(os.environ[\"DOCUMENT360_WORKSPACE\"])\n    document360_connector.load_credentials(\n        {\n            \"portal_id\": os.environ[\"DOCUMENT360_PORTAL_ID\"],\n            \"document360_api_token\": os.environ[\"DOCUMENT360_API_TOKEN\"],\n        }\n    )\n\n    current = time.time()\n    one_year_ago = current - 24 * 60 * 60 * 360\n    latest_docs = document360_connector.poll_source(one_year_ago, current)\n\n    for doc in latest_docs:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/document360/utils.py",
    "content": "def flatten_child_categories(category: dict) -> list[dict]:\n    if not category[\"child_categories\"]:\n        return [category]\n    else:\n        flattened_categories = [category]\n        for child_category in category[\"child_categories\"]:\n            flattened_categories.extend(flatten_child_categories(child_category))\n        return flattened_categories\n"
  },
  {
    "path": "backend/onyx/connectors/dropbox/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/dropbox/connector.py",
    "content": "from datetime import timezone\nfrom io import BytesIO\nfrom typing import Any\n\nfrom dropbox import Dropbox  # type: ignore[import-untyped]\nfrom dropbox.exceptions import ApiError  # type: ignore[import-untyped]\nfrom dropbox.exceptions import AuthError\nfrom dropbox.files import FileMetadata  # type: ignore[import-untyped]\nfrom dropbox.files import FolderMetadata\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialInvalidError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\nclass DropboxConnector(LoadConnector, PollConnector):\n    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:\n        self.batch_size = batch_size\n        self.dropbox_client: Dropbox | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.dropbox_client = Dropbox(credentials[\"dropbox_access_token\"])\n        return None\n\n    def _download_file(self, path: str) -> bytes:\n        \"\"\"Download a single file from Dropbox.\"\"\"\n        if self.dropbox_client is None:\n            raise ConnectorMissingCredentialError(\"Dropbox\")\n        _, resp = self.dropbox_client.files_download(path)\n        return resp.content\n\n    def _get_shared_link(self, path: str) -> str:\n        \"\"\"Create a shared link for a file in Dropbox.\"\"\"\n        if self.dropbox_client is None:\n            raise ConnectorMissingCredentialError(\"Dropbox\")\n\n        try:\n            # Check if a shared link already exists\n            shared_links = self.dropbox_client.sharing_list_shared_links(path=path)\n            if shared_links.links:\n                return shared_links.links[0].url\n\n            link_metadata = (\n                self.dropbox_client.sharing_create_shared_link_with_settings(path)\n            )\n            return link_metadata.url\n        except ApiError as err:\n            logger.exception(f\"Failed to create a shared link for {path}: {err}\")\n            return \"\"\n\n    def _yield_files_recursive(\n        self,\n        path: str,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Yield files in batches from a specified Dropbox folder, including subfolders.\"\"\"\n        if self.dropbox_client is None:\n            raise ConnectorMissingCredentialError(\"Dropbox\")\n\n        result = self.dropbox_client.files_list_folder(\n            path,\n            limit=self.batch_size,\n            recursive=False,\n            include_non_downloadable_files=False,\n        )\n\n        while True:\n            batch: list[Document | HierarchyNode] = []\n            for entry in result.entries:\n                if isinstance(entry, FileMetadata):\n                    modified_time = entry.client_modified\n                    if modified_time.tzinfo is None:\n                        # If no timezone info, assume it is UTC\n                        modified_time = modified_time.replace(tzinfo=timezone.utc)\n                    else:\n                        # If not in UTC, translate it\n                        modified_time = modified_time.astimezone(timezone.utc)\n\n                    time_as_seconds = int(modified_time.timestamp())\n                    if start and time_as_seconds < start:\n                        continue\n                    if end and time_as_seconds > end:\n                        continue\n\n                    downloaded_file = self._download_file(entry.path_display)\n                    link = self._get_shared_link(entry.path_display)\n                    try:\n                        text = extract_file_text(\n                            BytesIO(downloaded_file),\n                            file_name=entry.name,\n                            break_on_unprocessable=False,\n                        )\n                        batch.append(\n                            Document(\n                                id=f\"doc:{entry.id}\",\n                                sections=[TextSection(link=link, text=text)],\n                                source=DocumentSource.DROPBOX,\n                                semantic_identifier=entry.name,\n                                doc_updated_at=modified_time,\n                                metadata={\"type\": \"article\"},\n                            )\n                        )\n                    except Exception as e:\n                        logger.exception(\n                            f\"Error decoding file {entry.path_display} as utf-8 error occurred: {e}\"\n                        )\n\n                elif isinstance(entry, FolderMetadata):\n                    yield from self._yield_files_recursive(entry.path_lower, start, end)\n\n            if batch:\n                yield batch\n\n            if not result.has_more:\n                break\n\n            result = self.dropbox_client.files_list_folder_continue(result.cursor)\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self.poll_source(None, None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n    ) -> GenerateDocumentsOutput:\n        if self.dropbox_client is None:\n            raise ConnectorMissingCredentialError(\"Dropbox\")\n\n        for batch in self._yield_files_recursive(\"\", start, end):\n            yield batch\n\n        return None\n\n    def validate_connector_settings(self) -> None:\n        if self.dropbox_client is None:\n            raise ConnectorMissingCredentialError(\"Dropbox credentials not loaded.\")\n\n        try:\n            self.dropbox_client.files_list_folder(path=\"\", limit=1)\n        except AuthError as e:\n            logger.exception(\"Failed to validate Dropbox credentials\")\n            raise CredentialInvalidError(f\"Dropbox credential is invalid: {e.error}\")\n        except ApiError as e:\n            if (\n                e.error is not None\n                and \"insufficient_permissions\" in str(e.error).lower()\n            ):\n                raise InsufficientPermissionsError(\n                    \"Your Dropbox token does not have sufficient permissions.\"\n                )\n            raise ConnectorValidationError(\n                f\"Unexpected Dropbox error during validation: {e.user_message_text or e}\"\n            )\n        except Exception as e:\n            raise Exception(f\"Unexpected error during Dropbox settings validation: {e}\")\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = DropboxConnector()\n    connector.load_credentials(\n        {\n            \"dropbox_access_token\": os.environ[\"DROPBOX_ACCESS_TOKEN\"],\n        }\n    )\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/drupal_wiki/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/drupal_wiki/connector.py",
    "content": "import mimetypes\nfrom io import BytesIO\nfrom typing import Any\n\nimport requests\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE\nfrom onyx.configs.app_configs import DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    datetime_from_utc_timestamp,\n)\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import rate_limit_builder\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import rl_requests\nfrom onyx.connectors.drupal_wiki.models import DrupalWikiCheckpoint\nfrom onyx.connectors.drupal_wiki.models import DrupalWikiPage\nfrom onyx.connectors.drupal_wiki.models import DrupalWikiPageResponse\nfrom onyx.connectors.drupal_wiki.models import DrupalWikiSpaceResponse\nfrom onyx.connectors.drupal_wiki.utils import build_drupal_wiki_document_id\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import ConnectorFailure\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnector\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_text_and_images\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.b64 import get_image_type_from_bytes\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\nMAX_API_PAGE_SIZE = 2000  # max allowed by API\nDRUPAL_WIKI_SPACE_KEY = \"space\"\n\n\nrate_limited_get = retry_builder()(\n    rate_limit_builder(max_calls=10, period=1)(rl_requests.get)\n)\n\n\nclass DrupalWikiConnector(\n    CheckpointedConnector[DrupalWikiCheckpoint],\n    SlimConnector,\n):\n    # Deprecated parameters that may exist in old connector configurations\n    _DEPRECATED_PARAMS = {\"drupal_wiki_scope\", \"include_all_spaces\"}\n\n    def __init__(\n        self,\n        base_url: str,\n        spaces: list[str] | None = None,\n        pages: list[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,\n        include_attachments: bool = False,\n        allow_images: bool = False,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"\n        Initialize the Drupal Wiki connector.\n\n        Args:\n            base_url: The base URL of the Drupal Wiki instance (e.g., https://help.drupal-wiki.com)\n            spaces: List of space IDs to index. If None and pages is also None, all spaces will be indexed.\n            pages: List of page IDs to index. If provided, these specific pages will be indexed.\n            batch_size: Number of documents to process in a batch.\n            continue_on_failure: If True, continue indexing even if some documents fail.\n            include_attachments: If True, enable processing of page attachments including images and documents.\n            allow_images: If True, enable processing of image attachments.\n        \"\"\"\n\n        #########################################################\n        # TODO: Remove this after 02/01/2026 and remove **kwargs from the function signature\n        # Check for deprecated parameters from old connector configurations\n        # If attempting to update without deleting the connector:\n        # Remove the deprecated parameters from the custom_connector_config in the relevant connector table rows\n        deprecated_found = set(kwargs.keys()) & self._DEPRECATED_PARAMS\n        if deprecated_found:\n            raise ConnectorValidationError(\n                f\"Outdated Drupal Wiki connector configuration detected \"\n                f\"(found deprecated parameters: {', '.join(deprecated_found)}). \"\n                f\"Please delete and recreate this connector, or contact Onyx support \"\n                f\"for assistance with updating the configuration without deleting the connector.\"\n            )\n        # Reject any other unexpected parameters\n        if kwargs:\n            raise ConnectorValidationError(\n                f\"Unexpected parameters for Drupal Wiki connector: {', '.join(kwargs.keys())}\"\n            )\n        #########################################################\n\n        self.base_url = base_url.rstrip(\"/\")\n        self.spaces = spaces or []\n        self.pages = pages or []\n\n        # If no specific spaces or pages are provided, index all spaces\n        self.include_all_spaces = not self.spaces and not self.pages\n\n        self.batch_size = batch_size\n        self.continue_on_failure = continue_on_failure\n\n        # Attachment processing configuration\n        self.include_attachments = include_attachments\n        self.allow_images = allow_images\n\n        self.headers: dict[str, str] = {\"Accept\": \"application/json\"}\n        self._api_token: str | None = None  # set by load_credentials\n\n    def set_allow_images(self, value: bool) -> None:\n        logger.info(f\"Setting allow_images to {value}.\")\n        self.allow_images = value\n\n    def _get_page_attachments(self, page_id: int) -> list[dict[str, Any]]:\n        \"\"\"\n        Get all attachments for a specific page.\n\n        Args:\n            page_id: ID of the page.\n\n        Returns:\n            List of attachment dictionaries.\n        \"\"\"\n        url = f\"{self.base_url}/api/rest/scope/api/attachment\"\n        params = {\"pageId\": str(page_id)}\n        logger.debug(f\"Fetching attachments for page {page_id} from {url}\")\n\n        try:\n            response = rate_limited_get(url, headers=self.headers, params=params)\n            response.raise_for_status()\n            attachments = response.json()\n            logger.info(f\"Found {len(attachments)} attachments for page {page_id}\")\n            return attachments\n        except Exception as e:\n            logger.warning(f\"Failed to fetch attachments for page {page_id}: {e}\")\n            return []\n\n    def _download_attachment(self, attachment_id: int) -> bytes:\n        \"\"\"\n        Download attachment content.\n\n        Args:\n            attachment_id: ID of the attachment to download.\n\n        Returns:\n            Raw bytes of the attachment.\n        \"\"\"\n        url = f\"{self.base_url}/api/rest/scope/api/attachment/{attachment_id}/download\"\n        logger.info(f\"Downloading attachment {attachment_id} from {url}\")\n\n        # Use headers without Accept for binary downloads\n        download_headers = {\"Authorization\": f\"Bearer {self._api_token}\"}\n\n        response = rate_limited_get(url, headers=download_headers)\n        response.raise_for_status()\n\n        return response.content\n\n    def _validate_attachment_filetype(self, attachment: dict[str, Any]) -> bool:\n        \"\"\"\n        Validate if the attachment file type is supported.\n\n        Args:\n            attachment: Attachment dictionary from Drupal Wiki API.\n\n        Returns:\n            True if the file type is supported, False otherwise.\n        \"\"\"\n        file_name = attachment.get(\"fileName\", \"\")\n        if not file_name:\n            return False\n\n        # Get file extension\n        file_extension = get_file_ext(file_name)\n\n        if file_extension in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:\n            return True\n\n        logger.warning(f\"Unsupported file type: {file_extension} for {file_name}\")\n        return False\n\n    def _get_media_type_from_filename(self, filename: str) -> str:\n        \"\"\"\n        Get media type from filename using the standard mimetypes library.\n\n        Args:\n            filename: The filename.\n\n        Returns:\n            Media type string.\n        \"\"\"\n        mime_type, _encoding = mimetypes.guess_type(filename)\n        return mime_type or \"application/octet-stream\"\n\n    def _process_attachment(\n        self,\n        attachment: dict[str, Any],\n        page_id: int,\n        download_url: str,\n    ) -> tuple[list[TextSection | ImageSection], str | None]:\n        \"\"\"\n        Process a single attachment and return generated sections.\n\n        Args:\n            attachment: Attachment dictionary from Drupal Wiki API.\n            page_id: ID of the parent page.\n            download_url: Direct download URL for the attachment.\n\n        Returns:\n            Tuple of (sections, error_message). If error_message is not None, the\n            sections list should be treated as invalid.\n        \"\"\"\n        sections: list[TextSection | ImageSection] = []\n\n        try:\n            if not self._validate_attachment_filetype(attachment):\n                return (\n                    [],\n                    f\"Unsupported file type: {attachment.get('fileName', 'unknown')}\",\n                )\n\n            attachment_id = attachment[\"id\"]\n            file_name = attachment.get(\"fileName\", f\"attachment_{attachment_id}\")\n            file_size = attachment.get(\"fileSize\", 0)\n            media_type = self._get_media_type_from_filename(file_name)\n\n            if file_size > DRUPAL_WIKI_ATTACHMENT_SIZE_THRESHOLD:\n                return [], f\"Attachment too large: {file_size} bytes\"\n\n            try:\n                raw_bytes = self._download_attachment(attachment_id)\n            except Exception as e:\n                return [], f\"Failed to download attachment: {e}\"\n\n            if media_type.startswith(\"image/\"):\n                if not self.allow_images:\n                    logger.info(\n                        f\"Skipping image attachment {file_name} because allow_images is False\",\n                    )\n                    return [], None\n\n                try:\n                    image_section, _ = store_image_and_create_section(\n                        image_data=raw_bytes,\n                        file_id=str(attachment_id),\n                        display_name=attachment.get(\n                            \"name\", attachment.get(\"fileName\", \"Unknown\")\n                        ),\n                        link=download_url,\n                        media_type=media_type,\n                        file_origin=FileOrigin.CONNECTOR,\n                    )\n                    sections.append(image_section)\n                    logger.debug(f\"Stored image attachment with file name: {file_name}\")\n                except Exception as e:\n                    return [], f\"Image storage failed: {e}\"\n\n                return sections, None\n\n            image_counter = 0\n\n            def _store_embedded_image(image_data: bytes, image_name: str) -> None:\n                nonlocal image_counter\n\n                if not self.allow_images:\n                    return\n\n                media_for_image = self._get_media_type_from_filename(image_name)\n                if media_for_image == \"application/octet-stream\":\n                    try:\n                        media_for_image = get_image_type_from_bytes(image_data)\n                    except ValueError:\n                        logger.warning(\n                            f\"Unable to determine media type for embedded image {image_name} on attachment {file_name}\"\n                        )\n\n                image_counter += 1\n                display_name = (\n                    image_name\n                    or f\"{attachment.get('name', file_name)} - embedded image {image_counter}\"\n                )\n\n                try:\n                    image_section, _ = store_image_and_create_section(\n                        image_data=image_data,\n                        file_id=f\"{attachment_id}_embedded_{image_counter}\",\n                        display_name=display_name,\n                        link=download_url,\n                        media_type=media_for_image,\n                        file_origin=FileOrigin.CONNECTOR,\n                    )\n                    sections.append(image_section)\n                except Exception as err:\n                    logger.warning(\n                        f\"Failed to store embedded image {image_name or image_counter} for attachment {file_name}: {err}\"\n                    )\n\n            extraction_result = extract_text_and_images(\n                file=BytesIO(raw_bytes),\n                file_name=file_name,\n                content_type=media_type,\n                image_callback=_store_embedded_image if self.allow_images else None,\n            )\n\n            text_content = extraction_result.text_content.strip()\n            if text_content:\n                sections.insert(0, TextSection(text=text_content, link=download_url))\n                logger.info(\n                    f\"Extracted {len(text_content)} characters from {file_name}\"\n                )\n            elif not sections:\n                return [], f\"No text extracted for {file_name}\"\n\n            return sections, None\n\n        except Exception as e:\n            logger.error(\n                f\"Failed to process attachment {attachment.get('name', 'unknown')} on page {page_id}: {e}\"\n            )\n            return [], f\"Failed to process attachment: {e}\"\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"\n        Load credentials for the Drupal Wiki connector.\n\n        Args:\n            credentials: Dictionary containing the API token.\n\n        Returns:\n            None\n        \"\"\"\n\n        api_token = credentials.get(\"drupal_wiki_api_token\", \"\").strip()\n\n        if not api_token:\n            raise ConnectorValidationError(\n                \"API token is required for Drupal Wiki connector\"\n            )\n\n        self._api_token = api_token\n        self.headers.update(\n            {\n                \"Authorization\": f\"Bearer {api_token}\",\n            }\n        )\n\n        return None\n\n    def _get_space_ids(self) -> list[int]:\n        \"\"\"\n        Get all space IDs from the Drupal Wiki instance.\n\n        Returns:\n            List of space IDs (deduplicated). The list is sorted to be deterministic.\n        \"\"\"\n        url = f\"{self.base_url}/api/rest/scope/api/space\"\n        size = MAX_API_PAGE_SIZE\n        page = 0\n        all_space_ids: set[int] = set()\n        has_more = True\n        last_num_ids = -1\n\n        while has_more and len(all_space_ids) > last_num_ids:\n            last_num_ids = len(all_space_ids)\n            params = {\"size\": size, \"page\": page}\n            logger.debug(f\"Fetching spaces from {url} (page={page}, size={size})\")\n            response = rate_limited_get(url, headers=self.headers, params=params)\n            response.raise_for_status()\n            resp_json = response.json()\n            space_response = DrupalWikiSpaceResponse.model_validate(resp_json)\n\n            logger.info(f\"Fetched {len(space_response.content)} spaces from {page}\")\n            # Collect ids into the set to deduplicate\n            for space in space_response.content:\n                all_space_ids.add(space.id)\n\n            # Continue if we got a full page, indicating there might be more\n            has_more = len(space_response.content) >= size\n\n            page += 1\n\n        # Return a deterministic, sorted list of ids\n        space_id_list = list(sorted(all_space_ids))\n        logger.debug(f\"Total spaces fetched: {len(space_id_list)}\")\n        return space_id_list\n\n    def _get_pages_for_space(\n        self, space_id: int, modified_after: SecondsSinceUnixEpoch | None = None\n    ) -> list[DrupalWikiPage]:\n        \"\"\"\n        Get all pages for a specific space, optionally filtered by modification time.\n\n        Args:\n            space_id: ID of the space.\n            modified_after: Only return pages modified after this timestamp (seconds since Unix epoch).\n\n        Returns:\n            List of DrupalWikiPage objects.\n        \"\"\"\n        url = f\"{self.base_url}/api/rest/scope/api/page\"\n        size = MAX_API_PAGE_SIZE\n        page = 0\n        all_pages = []\n        has_more = True\n\n        while has_more:\n            params: dict[str, str | int] = {\n                DRUPAL_WIKI_SPACE_KEY: str(space_id),\n                \"size\": size,\n                \"page\": page,\n            }\n\n            # Add modifiedAfter parameter if provided\n            if modified_after is not None:\n                params[\"modifiedAfter\"] = int(modified_after)\n\n            logger.debug(\n                f\"Fetching pages for space {space_id} from {url} ({page=}, {size=}, {modified_after=})\"\n            )\n            response = rate_limited_get(url, headers=self.headers, params=params)\n            response.raise_for_status()\n            resp_json = response.json()\n\n            try:\n                page_response = DrupalWikiPageResponse.model_validate(resp_json)\n            except Exception as e:\n                logger.error(f\"Failed to validate Drupal Wiki page response: {e}\")\n                raise ConnectorValidationError(f\"Invalid API response format: {e}\")\n\n            logger.info(\n                f\"Fetched {len(page_response.content)} pages in space {space_id} (page={page})\"\n            )\n\n            # Pydantic should automatically parse content items as DrupalWikiPage objects\n            # If validation fails, it will raise an exception which we should catch\n            all_pages.extend(page_response.content)\n\n            # Continue if we got a full page, indicating there might be more\n            has_more = len(page_response.content) >= size\n\n            page += 1\n\n        logger.debug(f\"Total pages fetched for space {space_id}: {len(all_pages)}\")\n        return all_pages\n\n    def _get_page_content(self, page_id: int) -> DrupalWikiPage:\n        \"\"\"\n        Get the content of a specific page.\n\n        Args:\n            page_id: ID of the page.\n\n        Returns:\n            DrupalWikiPage object.\n        \"\"\"\n        url = f\"{self.base_url}/api/rest/scope/api/page/{page_id}\"\n        response = rate_limited_get(url, headers=self.headers)\n        response.raise_for_status()\n\n        return DrupalWikiPage.model_validate(response.json())\n\n    def _process_page(self, page: DrupalWikiPage) -> Document | ConnectorFailure:\n        \"\"\"\n        Process a page and convert it to a Document.\n\n        Args:\n            page: DrupalWikiPage object.\n\n        Returns:\n            Document object or ConnectorFailure.\n        \"\"\"\n        try:\n            # Extract text from HTML, handle None body\n            text_content = parse_html_page_basic(page.body or \"\")\n\n            # Ensure text_content is a string, not None\n            if text_content is None:\n                text_content = \"\"\n\n            # Create document URL\n            page_url = build_drupal_wiki_document_id(self.base_url, page.id)\n\n            # Create sections with just the page content\n            sections: list[TextSection | ImageSection] = [\n                TextSection(text=text_content, link=page_url)\n            ]\n\n            # Only process attachments if self.include_attachments is True\n            if self.include_attachments:\n                attachments = self._get_page_attachments(page.id)\n                for attachment in attachments:\n                    logger.info(\n                        f\"Processing attachment: {attachment.get('name', 'Unknown')} (ID: {attachment['id']})\"\n                    )\n                    # Use downloadUrl from API; fallback to page URL\n                    raw_download = attachment.get(\"downloadUrl\")\n                    if raw_download:\n                        download_url = (\n                            raw_download\n                            if raw_download.startswith(\"http\")\n                            else f\"{self.base_url.rstrip('/')}\" + raw_download\n                        )\n                    else:\n                        download_url = page_url\n                    # Process the attachment\n                    attachment_sections, error = self._process_attachment(\n                        attachment, page.id, download_url\n                    )\n                    if error:\n                        logger.warning(\n                            f\"Error processing attachment {attachment.get('name', 'Unknown')}: {error}\"\n                        )\n                        continue\n\n                    if attachment_sections:\n                        sections.extend(attachment_sections)\n                        logger.debug(\n                            f\"Added {len(attachment_sections)} section(s) for attachment {attachment.get('name', 'Unknown')}\"\n                        )\n\n            # Create metadata\n            metadata: dict[str, str | list[str]] = {\n                \"space_id\": str(page.homeSpace),\n                \"page_id\": str(page.id),\n                \"type\": page.type,\n            }\n\n            # Create document\n            return Document(\n                id=page_url,\n                sections=sections,\n                source=DocumentSource.DRUPAL_WIKI,\n                semantic_identifier=page.title,\n                metadata=metadata,\n                doc_updated_at=datetime_from_utc_timestamp(page.lastModified),\n            )\n        except Exception as e:\n            logger.error(f\"Error processing page {page.id}: {e}\")\n            return ConnectorFailure(\n                failed_document=DocumentFailure(\n                    document_id=str(page.id),\n                    document_link=build_drupal_wiki_document_id(self.base_url, page.id),\n                ),\n                failure_message=f\"Error processing page {page.id}: {e}\",\n                exception=e,\n            )\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: DrupalWikiCheckpoint,\n    ) -> CheckpointOutput[DrupalWikiCheckpoint]:\n        \"\"\"\n        Load documents from a checkpoint.\n\n        Args:\n            start: Start time as seconds since Unix epoch.\n            end: End time as seconds since Unix epoch.\n            checkpoint: Checkpoint to resume from.\n\n        Returns:\n            Generator yielding documents and the updated checkpoint.\n        \"\"\"\n        # Ensure page_ids is not None\n        if checkpoint.page_ids is None:\n            checkpoint.page_ids = []\n\n        # Initialize page_ids from self.pages if not already set\n        if not checkpoint.page_ids and self.pages:\n            logger.info(f\"Initializing page_ids from self.pages: {self.pages}\")\n            checkpoint.page_ids = [int(page_id.strip()) for page_id in self.pages]\n\n        # Ensure spaces is not None\n        if checkpoint.spaces is None:\n            checkpoint.spaces = []\n\n        while checkpoint.current_page_id_index < len(checkpoint.page_ids):\n            page_id = checkpoint.page_ids[checkpoint.current_page_id_index]\n            logger.debug(f\"Processing page ID: {page_id}\")\n\n            try:\n                # Get the page content directly\n                page = self._get_page_content(page_id)\n\n                # Skip pages outside the time range\n                if not self._is_page_in_time_range(page.lastModified, start, end):\n                    logger.info(f\"Skipping page {page_id} - outside time range\")\n                    checkpoint.current_page_id_index += 1\n                    continue\n\n                # Process the page\n                doc_or_failure = self._process_page(page)\n                yield doc_or_failure\n\n            except Exception as e:\n                logger.error(f\"Error processing page ID {page_id}: {e}\")\n                yield ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=str(page_id),\n                        document_link=build_drupal_wiki_document_id(\n                            self.base_url, page_id\n                        ),\n                    ),\n                    failure_message=f\"Error processing page ID {page_id}: {e}\",\n                    exception=e,\n                )\n\n            # Move to the next page ID\n            checkpoint.current_page_id_index += 1\n\n        # TODO: The main benefit of CheckpointedConnectors is that they can \"save their work\"\n        # by storing a checkpoint so transient errors are easy to recover from: simply resume\n        # from the last checkpoint. The way to get checkpoints saved is to return them somewhere\n        # in the middle of this function. The guarantee our checkpointing system gives to you,\n        # the connector implementer, is that when you return a checkpoint, this connector will\n        # at a later time (generally within a few seconds) call the load_from_checkpoint function\n        # again with the checkpoint you last returned as long as has_more=True.\n\n        # Process spaces if include_all_spaces is True or spaces are provided\n        if self.include_all_spaces or self.spaces:\n            # If include_all_spaces is True, always fetch all spaces\n            if self.include_all_spaces:\n                logger.info(\"Fetching all spaces\")\n                # Fetch all spaces\n                all_space_ids = self._get_space_ids()\n                # checkpoint.spaces expects a list of ints; assign returned list\n                checkpoint.spaces = all_space_ids\n                logger.info(f\"Found {len(checkpoint.spaces)} spaces to process\")\n            # Otherwise, use provided spaces if checkpoint is empty\n            elif not checkpoint.spaces:\n                logger.info(f\"Using provided spaces: {self.spaces}\")\n                # Use provided spaces\n                checkpoint.spaces = [int(space_id.strip()) for space_id in self.spaces]\n\n            # Process spaces from the checkpoint\n            while checkpoint.current_space_index < len(checkpoint.spaces):\n                space_id = checkpoint.spaces[checkpoint.current_space_index]\n                logger.debug(f\"Processing space ID: {space_id}\")\n\n                # Get pages for the current space, filtered by start time if provided\n                pages = self._get_pages_for_space(space_id, modified_after=start)\n\n                # Process pages from the checkpoint\n                while checkpoint.current_page_index < len(pages):\n                    page = pages[checkpoint.current_page_index]\n                    logger.debug(f\"Processing page: {page.title} (ID: {page.id})\")\n\n                    # For space-based pages, we already filtered by modifiedAfter in the API call\n                    # Only need to check the end time boundary\n                    if end and page.lastModified >= end:\n                        logger.info(\n                            f\"Skipping page {page.id} - outside time range (after end)\"\n                        )\n                        checkpoint.current_page_index += 1\n                        continue\n\n                    # Process the page\n                    doc_or_failure = self._process_page(page)\n                    yield doc_or_failure\n\n                    # Move to the next page\n                    checkpoint.current_page_index += 1\n\n                # Move to the next space\n                checkpoint.current_space_index += 1\n                checkpoint.current_page_index = 0\n\n        # All spaces and pages processed\n        logger.info(\"Finished processing all spaces and pages\")\n        checkpoint.has_more = False\n        return checkpoint\n\n    @override\n    def build_dummy_checkpoint(self) -> DrupalWikiCheckpoint:\n        \"\"\"\n        Build a dummy checkpoint.\n\n        Returns:\n            DrupalWikiCheckpoint with default values.\n        \"\"\"\n        return DrupalWikiCheckpoint(\n            has_more=True,\n            current_space_index=0,\n            current_page_index=0,\n            current_page_id_index=0,\n            spaces=[],\n            page_ids=[],\n            is_processing_specific_pages=False,\n        )\n\n    @override\n    def validate_checkpoint_json(self, checkpoint_json: str) -> DrupalWikiCheckpoint:\n        \"\"\"\n        Validate a checkpoint JSON string.\n\n        Args:\n            checkpoint_json: JSON string representing a checkpoint.\n\n        Returns:\n            Validated DrupalWikiCheckpoint.\n        \"\"\"\n        return DrupalWikiCheckpoint.model_validate_json(checkpoint_json)\n\n    # TODO: unify approach with load_from_checkpoint.\n    # Ideally slim retrieval shares a lot of the same code with non-slim\n    # and we pass in a param is_slim to the main helper function\n    # that does the retrieval.\n    @override\n    def retrieve_all_slim_docs(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        \"\"\"\n        Retrieve all slim documents.\n\n        Args:\n            start: Start time as seconds since Unix epoch.\n            end: End time as seconds since Unix epoch.\n            callback: Callback for indexing heartbeat.\n\n        Returns:\n            Generator yielding batches of SlimDocument objects.\n        \"\"\"\n        slim_docs: list[SlimDocument | HierarchyNode] = []\n        logger.info(\n            f\"Starting retrieve_all_slim_docs with include_all_spaces={self.include_all_spaces}, spaces={self.spaces}\"\n        )\n\n        # Process specific page IDs if provided\n        if self.pages:\n            logger.info(f\"Processing specific pages: {self.pages}\")\n            for page_id in self.pages:\n                try:\n                    # Get the page content directly\n                    page_content = self._get_page_content(int(page_id.strip()))\n\n                    # Skip pages outside the time range\n                    if not self._is_page_in_time_range(\n                        page_content.lastModified, start, end\n                    ):\n                        logger.info(f\"Skipping page {page_id} - outside time range\")\n                        continue\n\n                    # Create slim document for the page\n                    page_url = build_drupal_wiki_document_id(\n                        self.base_url, page_content.id\n                    )\n                    slim_docs.append(\n                        SlimDocument(\n                            id=page_url,\n                        )\n                    )\n                    logger.debug(f\"Added slim document for page {page_content.id}\")\n\n                    # Process attachments for this page\n                    attachments = self._get_page_attachments(page_content.id)\n                    for attachment in attachments:\n                        if self._validate_attachment_filetype(attachment):\n                            attachment_url = f\"{page_url}#attachment-{attachment['id']}\"\n                            slim_docs.append(\n                                SlimDocument(\n                                    id=attachment_url,\n                                )\n                            )\n                            logger.debug(\n                                f\"Added slim document for attachment {attachment['id']}\"\n                            )\n\n                    # Yield batch if it reaches the batch size\n                    if len(slim_docs) >= self.batch_size:\n                        logger.debug(\n                            f\"Yielding batch of {len(slim_docs)} slim documents\"\n                        )\n                        yield slim_docs\n                        slim_docs = []\n\n                        if callback and callback.should_stop():\n                            return\n                        if callback:\n                            callback.progress(\"retrieve_all_slim_docs\", 1)\n\n                except Exception as e:\n                    logger.error(\n                        f\"Error processing page ID {page_id} for slim documents: {e}\"\n                    )\n\n        # Process spaces if include_all_spaces is True or spaces are provided\n        if self.include_all_spaces or self.spaces:\n            logger.info(\"Processing spaces for slim documents\")\n            # Get spaces to process\n            spaces_to_process = []\n            if self.include_all_spaces:\n                logger.info(\"Fetching all spaces for slim documents\")\n                # Fetch all spaces\n                all_space_ids = self._get_space_ids()\n                spaces_to_process = all_space_ids\n                logger.info(f\"Found {len(spaces_to_process)} spaces to process\")\n            else:\n                logger.info(f\"Using provided spaces: {self.spaces}\")\n                # Use provided spaces\n                spaces_to_process = [int(space_id.strip()) for space_id in self.spaces]\n\n            # Process each space\n            for space_id in spaces_to_process:\n                logger.info(f\"Processing space ID: {space_id}\")\n                # Get pages for the current space, filtered by start time if provided\n                pages = self._get_pages_for_space(space_id, modified_after=start)\n\n                # Process each page\n                for page in pages:\n                    logger.debug(f\"Processing page: {page.title} (ID: {page.id})\")\n                    # Skip pages outside the time range\n                    if end and page.lastModified >= end:\n                        logger.info(\n                            f\"Skipping page {page.id} - outside time range (after end)\"\n                        )\n                        continue\n\n                    # Create slim document for the page\n                    page_url = build_drupal_wiki_document_id(self.base_url, page.id)\n                    slim_docs.append(\n                        SlimDocument(\n                            id=page_url,\n                        )\n                    )\n                    logger.info(f\"Added slim document for page {page.id}\")\n\n                    # Process attachments for this page\n                    attachments = self._get_page_attachments(page.id)\n                    for attachment in attachments:\n                        if self._validate_attachment_filetype(attachment):\n                            attachment_url = f\"{page_url}#attachment-{attachment['id']}\"\n                            slim_docs.append(\n                                SlimDocument(\n                                    id=attachment_url,\n                                )\n                            )\n                            logger.info(\n                                f\"Added slim document for attachment {attachment['id']}\"\n                            )\n\n                    # Yield batch if it reaches the batch size\n                    if len(slim_docs) >= self.batch_size:\n                        logger.info(\n                            f\"Yielding batch of {len(slim_docs)} slim documents\"\n                        )\n                        yield slim_docs\n                        slim_docs = []\n\n                        if callback and callback.should_stop():\n                            return\n                        if callback:\n                            callback.progress(\"retrieve_all_slim_docs\", 1)\n\n        # Yield remaining documents\n        if slim_docs:\n            logger.debug(f\"Yielding final batch of {len(slim_docs)} slim documents\")\n            yield slim_docs\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Validate the connector settings.\n\n        Raises:\n            ConnectorValidationError: If the settings are invalid.\n        \"\"\"\n        if not self.headers:\n            raise ConnectorMissingCredentialError(\"Drupal Wiki\")\n\n        try:\n            # Try to fetch spaces to validate the connection\n            # Call the new helper which returns the list of space ids\n            self._get_space_ids()\n        except requests.exceptions.RequestException as e:\n            raise ConnectorValidationError(f\"Failed to connect to Drupal Wiki: {e}\")\n\n    def _is_page_in_time_range(\n        self,\n        last_modified: int,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n    ) -> bool:\n        \"\"\"\n        Check if a page's last modified timestamp falls within the specified time range.\n\n        Args:\n            last_modified: The page's last modified timestamp.\n            start: Start time as seconds since Unix epoch (inclusive).\n            end: End time as seconds since Unix epoch (exclusive).\n\n        Returns:\n            True if the page is within the time range, False otherwise.\n        \"\"\"\n        return (not start or last_modified >= start) and (\n            not end or last_modified < end\n        )\n"
  },
  {
    "path": "backend/onyx/connectors/drupal_wiki/models.py",
    "content": "from enum import Enum\nfrom typing import Generic\nfrom typing import List\nfrom typing import Optional\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\n\nfrom onyx.connectors.interfaces import ConnectorCheckpoint\n\n\nclass SpaceAccessStatus(str, Enum):\n    \"\"\"Enum for Drupal Wiki space access status\"\"\"\n\n    PRIVATE = \"PRIVATE\"\n    ANONYMOUS = \"ANONYMOUS\"\n    AUTHENTICATED = \"AUTHENTICATED\"\n\n\nclass DrupalWikiSpace(BaseModel):\n    \"\"\"Model for a Drupal Wiki space\"\"\"\n\n    id: int\n    name: str\n    type: str\n    description: Optional[str] = None\n    accessStatus: Optional[SpaceAccessStatus] = None\n    color: Optional[str] = None\n\n\nclass DrupalWikiPage(BaseModel):\n    \"\"\"Model for a Drupal Wiki page\"\"\"\n\n    id: int\n    title: str\n    homeSpace: int\n    lastModified: int\n    type: str\n    body: Optional[str] = None\n\n\nT = TypeVar(\"T\")\n\n\nclass DrupalWikiBaseResponse(BaseModel, Generic[T]):\n    \"\"\"Base model for Drupal Wiki API responses\"\"\"\n\n    totalPages: int\n    totalElements: int\n    size: int\n    content: List[T]\n    number: int\n    first: bool\n    last: bool\n    numberOfElements: int\n    empty: bool\n\n\nclass DrupalWikiSpaceResponse(DrupalWikiBaseResponse[DrupalWikiSpace]):\n    \"\"\"Model for the response from the Drupal Wiki spaces API\"\"\"\n\n\nclass DrupalWikiPageResponse(DrupalWikiBaseResponse[DrupalWikiPage]):\n    \"\"\"Model for the response from the Drupal Wiki pages API\"\"\"\n\n\nclass DrupalWikiCheckpoint(ConnectorCheckpoint):\n    \"\"\"Checkpoint for the Drupal Wiki connector\"\"\"\n\n    current_space_index: int = 0\n    current_page_index: int = 0\n    current_page_id_index: int = 0\n    spaces: List[int] = []\n    page_ids: List[int] = []\n    is_processing_specific_pages: bool = False\n"
  },
  {
    "path": "backend/onyx/connectors/drupal_wiki/utils.py",
    "content": "from onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef build_drupal_wiki_document_id(base_url: str, page_id: int) -> str:\n    \"\"\"Build a document ID for a Drupal Wiki page using the real URL format\"\"\"\n    # Ensure base_url ends with a slash\n    base_url = base_url.rstrip(\"/\") + \"/\"\n    return f\"{base_url}node/{page_id}\"\n"
  },
  {
    "path": "backend/onyx/connectors/egnyte/connector.py",
    "content": "import io\nimport os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import IO\nfrom urllib.parse import quote\n\nfrom pydantic import Field\n\nfrom onyx.configs.app_configs import EGNYTE_CLIENT_ID\nfrom onyx.configs.app_configs import EGNYTE_CLIENT_SECRET\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_oauth_callback_uri,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import OAuthConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import detect_encoding\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.extract_file_text import read_text_file\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import request_with_retries\n\n\nlogger = setup_logger()\n\n_EGNYTE_API_BASE = \"https://{domain}.egnyte.com/pubapi/v1\"\n_EGNYTE_APP_BASE = \"https://{domain}.egnyte.com\"\n\n\ndef _parse_last_modified(last_modified: str) -> datetime:\n    return datetime.strptime(last_modified, \"%a, %d %b %Y %H:%M:%S %Z\").replace(\n        tzinfo=timezone.utc\n    )\n\n\ndef _process_egnyte_file(\n    file_metadata: dict[str, Any],\n    file_content: IO,\n    base_url: str,\n    folder_path: str | None = None,\n) -> Document | None:\n    \"\"\"Process an Egnyte file into a Document object\n\n    Args:\n        file_data: The file data from Egnyte API\n        file_content: The raw content of the file in bytes\n        base_url: The base URL for the Egnyte instance\n        folder_path: Optional folder path to filter results\n    \"\"\"\n    # Skip if file path doesn't match folder path filter\n    if folder_path and not file_metadata[\"path\"].startswith(folder_path):\n        raise ValueError(\n            f\"File path {file_metadata['path']} does not match folder path {folder_path}\"\n        )\n\n    file_name = file_metadata[\"name\"]\n    extension = get_file_ext(file_name)\n\n    # Explicitly excluding image extensions here. TODO: consider allowing images\n    if extension not in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:\n        logger.warning(f\"Skipping file '{file_name}' with extension '{extension}'\")\n        return None\n\n    # Extract text content based on file type\n    # TODO @wenxi-onyx: convert to extract_text_and_images\n    if extension in OnyxFileExtensions.PLAIN_TEXT_EXTENSIONS:\n        encoding = detect_encoding(file_content)\n        file_content_raw, file_metadata = read_text_file(\n            file_content, encoding=encoding, ignore_onyx_metadata=False\n        )\n    else:\n        file_content_raw = extract_file_text(\n            file=file_content,\n            file_name=file_name,\n            break_on_unprocessable=True,\n        )\n\n    # Build the web URL for the file\n    web_url = f\"{base_url}/navigate/file/{file_metadata['group_id']}\"\n\n    # Create document metadata\n    metadata: dict[str, str | list[str]] = {\n        \"file_path\": file_metadata[\"path\"],\n        \"last_modified\": file_metadata.get(\"last_modified\", \"\"),\n    }\n\n    # Add lock info if present\n    if lock_info := file_metadata.get(\"lock_info\"):\n        metadata[\"lock_owner\"] = (\n            f\"{lock_info.get('first_name', '')} {lock_info.get('last_name', '')}\"\n        )\n\n    # Create the document owners\n    primary_owner = None\n    if uploaded_by := file_metadata.get(\"uploaded_by\"):\n        primary_owner = BasicExpertInfo(\n            email=uploaded_by,  # Using username as email since that's what we have\n        )\n\n    # Create the document\n    return Document(\n        id=f\"egnyte-{file_metadata['entry_id']}\",\n        sections=[TextSection(text=file_content_raw.strip(), link=web_url)],\n        source=DocumentSource.EGNYTE,\n        semantic_identifier=file_name,\n        metadata=metadata,\n        doc_updated_at=(\n            _parse_last_modified(file_metadata[\"last_modified\"])\n            if \"last_modified\" in file_metadata\n            else None\n        ),\n        primary_owners=[primary_owner] if primary_owner else None,\n    )\n\n\nclass EgnyteConnector(LoadConnector, PollConnector, OAuthConnector):\n    class AdditionalOauthKwargs(OAuthConnector.AdditionalOauthKwargs):\n        egnyte_domain: str = Field(\n            title=\"Egnyte Domain\",\n            description=(\n                \"The domain for the Egnyte instance (e.g. 'company' for company.egnyte.com)\"\n            ),\n        )\n\n    def __init__(\n        self,\n        folder_path: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.domain = \"\"  # will always be set in `load_credentials`\n        self.folder_path = folder_path or \"\"  # Root folder if not specified\n        self.batch_size = batch_size\n        self.access_token: str | None = None\n\n    @classmethod\n    def oauth_id(cls) -> DocumentSource:\n        return DocumentSource.EGNYTE\n\n    @classmethod\n    def oauth_authorization_url(\n        cls,\n        base_domain: str,\n        state: str,\n        additional_kwargs: dict[str, str],\n    ) -> str:\n        if not EGNYTE_CLIENT_ID:\n            raise ValueError(\"EGNYTE_CLIENT_ID environment variable must be set\")\n\n        oauth_kwargs = cls.AdditionalOauthKwargs(**additional_kwargs)\n\n        callback_uri = get_oauth_callback_uri(base_domain, \"egnyte\")\n        return (\n            f\"https://{oauth_kwargs.egnyte_domain}.egnyte.com/puboauth/token\"\n            f\"?client_id={EGNYTE_CLIENT_ID}\"\n            f\"&redirect_uri={callback_uri}\"\n            f\"&scope=Egnyte.filesystem\"\n            f\"&state={state}\"\n            f\"&response_type=code\"\n        )\n\n    @classmethod\n    def oauth_code_to_token(\n        cls,\n        base_domain: str,\n        code: str,\n        additional_kwargs: dict[str, str],\n    ) -> dict[str, Any]:\n        if not EGNYTE_CLIENT_ID:\n            raise ValueError(\"EGNYTE_CLIENT_ID environment variable must be set\")\n        if not EGNYTE_CLIENT_SECRET:\n            raise ValueError(\"EGNYTE_CLIENT_SECRET environment variable must be set\")\n\n        oauth_kwargs = cls.AdditionalOauthKwargs(**additional_kwargs)\n\n        # Exchange code for token\n        url = f\"https://{oauth_kwargs.egnyte_domain}.egnyte.com/puboauth/token\"\n        redirect_uri = get_oauth_callback_uri(base_domain, \"egnyte\")\n\n        data = {\n            \"client_id\": EGNYTE_CLIENT_ID,\n            \"client_secret\": EGNYTE_CLIENT_SECRET,\n            \"code\": code,\n            \"grant_type\": \"authorization_code\",\n            \"redirect_uri\": redirect_uri,\n            \"scope\": \"Egnyte.filesystem\",\n        }\n        headers = {\"Content-Type\": \"application/x-www-form-urlencoded\"}\n\n        response = request_with_retries(\n            method=\"POST\",\n            url=url,\n            data=data,\n            headers=headers,\n            # try a lot faster since this is a realtime flow\n            backoff=0,\n            delay=0.1,\n        )\n        if not response.ok:\n            raise RuntimeError(f\"Failed to exchange code for token: {response.text}\")\n\n        token_data = response.json()\n        return {\n            \"domain\": oauth_kwargs.egnyte_domain,\n            \"access_token\": token_data[\"access_token\"],\n        }\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.domain = credentials[\"domain\"]\n        self.access_token = credentials[\"access_token\"]\n        return None\n\n    def _get_files_list(\n        self,\n        path: str,\n    ) -> Generator[dict[str, Any], None, None]:\n        if not self.access_token or not self.domain:\n            raise ConnectorMissingCredentialError(\"Egnyte\")\n\n        headers = {\n            \"Authorization\": f\"Bearer {self.access_token}\",\n        }\n\n        params: dict[str, Any] = {\n            \"list_content\": True,\n        }\n\n        url_encoded_path = quote(path or \"\")\n        url = f\"{_EGNYTE_API_BASE.format(domain=self.domain)}/fs/{url_encoded_path}\"\n        response = request_with_retries(\n            method=\"GET\", url=url, headers=headers, params=params\n        )\n        if not response.ok:\n            raise RuntimeError(f\"Failed to fetch files from Egnyte: {response.text}\")\n\n        data = response.json()\n\n        # Yield files from current directory\n        for file in data.get(\"files\", []):\n            yield file\n\n        # Recursively traverse folders\n        for folder in data.get(\"folders\", []):\n            yield from self._get_files_list(folder[\"path\"])\n\n    def _should_index_file(\n        self,\n        file: dict[str, Any],\n        start_time: datetime | None = None,\n        end_time: datetime | None = None,\n    ) -> bool:\n        \"\"\"Return True if file should be included based on filters.\"\"\"\n        if file[\"is_folder\"]:\n            return False\n\n        file_modified = _parse_last_modified(file[\"last_modified\"])\n        if start_time and file_modified < start_time:\n            return False\n        if end_time and file_modified > end_time:\n            return False\n\n        return True\n\n    def _process_files(\n        self,\n        start_time: datetime | None = None,\n        end_time: datetime | None = None,\n    ) -> Generator[list[Document | HierarchyNode], None, None]:\n        current_batch: list[Document | HierarchyNode] = []\n\n        # Iterate through yielded files and filter them\n        for file in self._get_files_list(self.folder_path):\n            if not self._should_index_file(file, start_time, end_time):\n                logger.debug(f\"Skipping file '{file['path']}'.\")\n                continue\n\n            try:\n                # Set up request with streaming enabled\n                headers = {\n                    \"Authorization\": f\"Bearer {self.access_token}\",\n                }\n                url_encoded_path = quote(file[\"path\"])\n                url = f\"{_EGNYTE_API_BASE.format(domain=self.domain)}/fs-content/{url_encoded_path}\"\n                response = request_with_retries(\n                    method=\"GET\",\n                    url=url,\n                    headers=headers,\n                    stream=True,\n                )\n\n                if not response.ok:\n                    logger.error(\n                        f\"Failed to fetch file content: {file['path']} (status code: {response.status_code})\"\n                    )\n                    continue\n\n                # Stream the response content into a BytesIO buffer\n                buffer = io.BytesIO()\n                for chunk in response.iter_content(chunk_size=8192):\n                    if chunk:\n                        buffer.write(chunk)\n\n                # Reset buffer's position to the start\n                buffer.seek(0)\n\n                # Process the streamed file content\n                doc = _process_egnyte_file(\n                    file_metadata=file,\n                    file_content=buffer,\n                    base_url=_EGNYTE_APP_BASE.format(domain=self.domain),\n                    folder_path=self.folder_path,\n                )\n\n                if doc is not None:\n                    current_batch.append(doc)\n\n                    if len(current_batch) >= self.batch_size:\n                        yield current_batch\n                        current_batch = []\n\n            except Exception:\n                logger.exception(f\"Failed to process file {file['path']}\")\n                continue\n\n        if current_batch:\n            yield current_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        yield from self._process_files()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_time = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_time = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        yield from self._process_files(start_time=start_time, end_time=end_time)\n\n\nif __name__ == \"__main__\":\n    connector = EgnyteConnector()\n    connector.load_credentials(\n        {\n            \"domain\": os.environ[\"EGNYTE_DOMAIN\"],\n            \"access_token\": os.environ[\"EGNYTE_ACCESS_TOKEN\"],\n        }\n    )\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/exceptions.py",
    "content": "class ValidationError(Exception):\n    \"\"\"General exception for validation errors.\"\"\"\n\n    def __init__(self, message: str):\n        self.message = message\n        super().__init__(self.message)\n\n\nclass ConnectorValidationError(ValidationError):\n    \"\"\"General exception for connector validation errors.\"\"\"\n\n    def __init__(self, message: str):\n        self.message = message\n        super().__init__(self.message)\n\n\nclass UnexpectedValidationError(ValidationError):\n    \"\"\"Raised when an unexpected error occurs during connector validation.\n\n    Unexpected errors don't necessarily mean the credential is invalid,\n    but rather that there was an error during the validation process\n    or we encountered a currently unhandled error case.\n\n    Currently, unexpected validation errors are defined as transient and should not be\n    used to disable the connector.\n    \"\"\"\n\n    def __init__(self, message: str = \"Unexpected error during connector validation\"):\n        super().__init__(message)\n\n\nclass CredentialInvalidError(ConnectorValidationError):\n    \"\"\"Raised when a connector's credential is invalid.\"\"\"\n\n    def __init__(self, message: str = \"Credential is invalid\"):\n        super().__init__(message)\n\n\nclass CredentialExpiredError(ConnectorValidationError):\n    \"\"\"Raised when a connector's credential is expired.\"\"\"\n\n    def __init__(self, message: str = \"Credential has expired\"):\n        super().__init__(message)\n\n\nclass InsufficientPermissionsError(ConnectorValidationError):\n    \"\"\"Raised when the credential does not have sufficient API permissions.\"\"\"\n\n    def __init__(\n        self, message: str = \"Insufficient permissions for the requested operation\"\n    ):\n        super().__init__(message)\n"
  },
  {
    "path": "backend/onyx/connectors/factory.py",
    "content": "import importlib\nfrom typing import Any\nfrom typing import Type\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import INTEGRATION_TESTS_MODE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled\nfrom onyx.connectors.credentials_provider import OnyxDBCredentialsProvider\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CredentialsConnector\nfrom onyx.connectors.interfaces import EventConnector\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.registry import CONNECTOR_CLASS_MAP\nfrom onyx.db.connector import fetch_connector_by_id\nfrom onyx.db.credentials import backend_update_credential_json\nfrom onyx.db.credentials import fetch_credential_by_id\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import Credential\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nclass ConnectorMissingException(Exception):\n    pass\n\n\n# Cache for already imported connector classes\n_connector_cache: dict[DocumentSource, Type[BaseConnector]] = {}\n\n\ndef _load_connector_class(source: DocumentSource) -> Type[BaseConnector]:\n    \"\"\"Dynamically load and cache a connector class.\"\"\"\n    if source in _connector_cache:\n        return _connector_cache[source]\n\n    if source not in CONNECTOR_CLASS_MAP:\n        raise ConnectorMissingException(f\"Connector not found for source={source}\")\n\n    mapping = CONNECTOR_CLASS_MAP[source]\n\n    try:\n        module = importlib.import_module(mapping.module_path)\n        connector_class = getattr(module, mapping.class_name)\n        _connector_cache[source] = connector_class\n        return connector_class\n    except (ImportError, AttributeError) as e:\n        raise ConnectorMissingException(\n            f\"Failed to import {mapping.class_name} from {mapping.module_path}: {e}\"\n        )\n\n\ndef _validate_connector_supports_input_type(\n    connector: Type[BaseConnector],\n    input_type: InputType | None,\n    source: DocumentSource,\n) -> None:\n    \"\"\"Validate that a connector supports the requested input type.\"\"\"\n    if input_type is None:\n        return\n\n    # Check each input type requirement separately for clarity\n    load_state_unsupported = input_type == InputType.LOAD_STATE and not issubclass(\n        connector, LoadConnector\n    )\n\n    poll_unsupported = (\n        input_type == InputType.POLL\n        # Either poll or checkpoint works for this, in the future\n        # all connectors should be checkpoint connectors\n        and (\n            not issubclass(connector, PollConnector)\n            and not issubclass(connector, CheckpointedConnector)\n        )\n    )\n\n    event_unsupported = input_type == InputType.EVENT and not issubclass(\n        connector, EventConnector\n    )\n\n    if any([load_state_unsupported, poll_unsupported, event_unsupported]):\n        raise ConnectorMissingException(\n            f\"Connector for source={source} does not accept input_type={input_type}\"\n        )\n\n\ndef identify_connector_class(\n    source: DocumentSource,\n    input_type: InputType | None = None,\n) -> Type[BaseConnector]:\n    # Load the connector class using lazy loading\n    connector = _load_connector_class(source)\n\n    # Validate connector supports the requested input_type\n    _validate_connector_supports_input_type(connector, input_type, source)\n\n    return connector\n\n\ndef instantiate_connector(\n    db_session: Session,\n    source: DocumentSource,\n    input_type: InputType,\n    connector_specific_config: dict[str, Any],\n    credential: Credential,\n) -> BaseConnector:\n    connector_class = identify_connector_class(source, input_type)\n\n    connector = connector_class(**connector_specific_config)\n\n    if isinstance(connector, CredentialsConnector):\n        provider = OnyxDBCredentialsProvider(\n            get_current_tenant_id(), str(source), credential.id\n        )\n        connector.set_credentials_provider(provider)\n    else:\n        credential_json = (\n            credential.credential_json.get_value(apply_mask=False)\n            if credential.credential_json\n            else {}\n        )\n        new_credentials = connector.load_credentials(credential_json)\n\n        if new_credentials is not None:\n            backend_update_credential_json(credential, new_credentials, db_session)\n\n    connector.set_allow_images(get_image_extraction_and_analysis_enabled())\n\n    return connector\n\n\ndef validate_ccpair_for_user(\n    connector_id: int,\n    credential_id: int,\n    access_type: AccessType,\n    db_session: Session,\n    enforce_creation: bool = True,\n) -> bool:\n    if INTEGRATION_TESTS_MODE:\n        return True\n\n    # Validate the connector settings\n    connector = fetch_connector_by_id(connector_id, db_session)\n    credential = fetch_credential_by_id(\n        credential_id,\n        db_session,\n    )\n\n    if not connector:\n        raise ValueError(\"Connector not found\")\n\n    if (\n        connector.source == DocumentSource.INGESTION_API\n        or connector.source == DocumentSource.MOCK_CONNECTOR\n    ):\n        return True\n\n    if not credential:\n        raise ValueError(\"Credential not found\")\n\n    try:\n        runnable_connector = instantiate_connector(\n            db_session=db_session,\n            source=connector.source,\n            input_type=connector.input_type,\n            connector_specific_config=connector.connector_specific_config,\n            credential=credential,\n        )\n    except ConnectorValidationError as e:\n        raise e\n    except Exception as e:\n        if enforce_creation:\n            raise ConnectorValidationError(str(e))\n        else:\n            return False\n\n    runnable_connector.validate_connector_settings()\n    if access_type == AccessType.SYNC:\n        runnable_connector.validate_perm_sync()\n    return True\n"
  },
  {
    "path": "backend/onyx/connectors/file/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/file/connector.py",
    "content": "import json\nimport os\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import IO\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    process_onyx_metadata,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_text_and_images\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _create_image_section(\n    image_data: bytes,\n    parent_file_name: str,\n    display_name: str,\n    media_type: str | None = None,\n    link: str | None = None,\n    idx: int = 0,\n) -> tuple[ImageSection, str | None]:\n    \"\"\"\n    Creates an ImageSection for an image file or embedded image.\n    Stores the image in FileStore but does not generate a summary.\n\n    Args:\n        image_data: Raw image bytes\n        db_session: Database session\n        parent_file_name: Name of the parent file (for embedded images)\n        display_name: Display name for the image\n        idx: Index for embedded images\n\n    Returns:\n        Tuple of (ImageSection, stored_file_name or None)\n    \"\"\"\n    # Create a unique identifier for the image\n    file_id = f\"{parent_file_name}_embedded_{idx}\" if idx > 0 else parent_file_name\n\n    # Store the image and create a section\n    try:\n        section, stored_file_name = store_image_and_create_section(\n            image_data=image_data,\n            file_id=file_id,\n            display_name=display_name,\n            media_type=(\n                media_type if media_type is not None else \"application/octet-stream\"\n            ),\n            link=link,\n            file_origin=FileOrigin.CONNECTOR,\n        )\n        return section, stored_file_name\n    except Exception as e:\n        logger.error(f\"Failed to store image {display_name}: {e}\")\n        raise e\n\n\ndef _process_file(\n    file_id: str,\n    file_name: str,\n    file: IO[Any],\n    metadata: dict[str, Any] | None,\n    pdf_pass: str | None,\n    file_type: str | None,\n) -> list[Document]:\n    \"\"\"\n    Process a file and return a list of Documents.\n    For images, creates ImageSection objects without summarization.\n    For documents with embedded images, extracts and stores the images.\n    \"\"\"\n    if metadata is None:\n        metadata = {}\n\n    # Get file extension and determine file type\n    extension = get_file_ext(file_name)\n\n    if extension not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:\n        logger.warning(\n            f\"Skipping file '{file_name}' with unrecognized extension '{extension}'\"\n        )\n        return []\n\n    # If a zip is uploaded with a metadata file, we can process it here\n    onyx_metadata, custom_tags = process_onyx_metadata(metadata)\n    file_display_name = onyx_metadata.file_display_name or os.path.basename(file_name)\n    time_updated = onyx_metadata.doc_updated_at or datetime.now(timezone.utc)\n    primary_owners = onyx_metadata.primary_owners\n    secondary_owners = onyx_metadata.secondary_owners\n    link = onyx_metadata.link\n\n    # These metadata items are not settable by the user\n    source_type = onyx_metadata.source_type or DocumentSource.FILE\n\n    doc_id = onyx_metadata.document_id or f\"FILE_CONNECTOR__{file_id}\"\n    title = metadata.get(\"title\") or file_display_name\n\n    # 1) If the file itself is an image, handle that scenario quickly\n    if extension in OnyxFileExtensions.IMAGE_EXTENSIONS:\n        # Read the image data\n        image_data = file.read()\n        if not image_data:\n            logger.warning(f\"Empty image file: {file_name}\")\n            return []\n\n        # Create an ImageSection for the image\n        try:\n            section, _ = _create_image_section(\n                image_data=image_data,\n                parent_file_name=file_id,\n                display_name=title,\n                media_type=file_type,\n            )\n\n            return [\n                Document(\n                    id=doc_id,\n                    sections=[section],\n                    source=source_type,\n                    semantic_identifier=file_display_name,\n                    title=title,\n                    doc_updated_at=time_updated,\n                    primary_owners=primary_owners,\n                    secondary_owners=secondary_owners,\n                    metadata=custom_tags,\n                )\n            ]\n        except Exception as e:\n            logger.error(f\"Failed to process image file {file_name}: {e}\")\n            return []\n\n    # 2) Otherwise: text-based approach. Possibly with embedded images.\n    file.seek(0)\n\n    # Extract text and images from the file\n    extraction_result = extract_text_and_images(\n        file=file,\n        file_name=file_name,\n        pdf_pass=pdf_pass,\n        content_type=file_type,\n    )\n\n    # Each file may have file-specific ONYX_METADATA https://docs.onyx.app/admins/connectors/official/file\n    # If so, we should add it to any metadata processed so far\n    if extraction_result.metadata:\n        logger.debug(\n            f\"Found file-specific metadata for {file_name}: {extraction_result.metadata}\"\n        )\n        onyx_metadata, more_custom_tags = process_onyx_metadata(\n            extraction_result.metadata\n        )\n\n        # Add file-specific tags\n        custom_tags.update(more_custom_tags)\n\n        # File-specific metadata overrides metadata processed so far\n        source_type = onyx_metadata.source_type or source_type\n        primary_owners = onyx_metadata.primary_owners or primary_owners\n        secondary_owners = onyx_metadata.secondary_owners or secondary_owners\n        time_updated = onyx_metadata.doc_updated_at or time_updated\n        file_display_name = onyx_metadata.file_display_name or file_display_name\n        title = onyx_metadata.title or onyx_metadata.file_display_name or title\n        link = onyx_metadata.link or link\n\n    # Build sections: first the text as a single Section\n    sections: list[TextSection | ImageSection] = []\n    if extraction_result.text_content.strip():\n        logger.debug(f\"Creating TextSection for {file_name} with link: {link}\")\n        sections.append(\n            TextSection(link=link, text=extraction_result.text_content.strip())\n        )\n\n    # Then any extracted images from docx, PDFs, etc.\n    for idx, (img_data, img_name) in enumerate(\n        extraction_result.embedded_images, start=1\n    ):\n        # Store each embedded image as a separate file in FileStore\n        # and create a section with the image reference\n        try:\n            image_section, stored_file_name = _create_image_section(\n                image_data=img_data,\n                parent_file_name=file_id,\n                display_name=f\"{title} - image {idx}\",\n                media_type=\"application/octet-stream\",  # Default media type for embedded images\n                idx=idx,\n            )\n            sections.append(image_section)\n            logger.debug(\n                f\"Created ImageSection for embedded image {idx} in {file_name}, stored as: {stored_file_name}\"\n            )\n        except Exception as e:\n            logger.warning(\n                f\"Failed to process embedded image {idx} in {file_name}: {e}\"\n            )\n\n    return [\n        Document(\n            id=doc_id,\n            sections=sections,\n            source=source_type,\n            semantic_identifier=file_display_name,\n            title=title,\n            doc_updated_at=time_updated,\n            primary_owners=primary_owners,\n            secondary_owners=secondary_owners,\n            metadata=custom_tags,\n        )\n    ]\n\n\nclass LocalFileConnector(LoadConnector):\n    \"\"\"\n    Connector that reads files from Postgres and yields Documents, including\n    embedded image extraction without summarization.\n\n    file_locations are S3/Filestore UUIDs\n    file_names are the names of the files\n    \"\"\"\n\n    # Note: file_names is a required parameter, but should not break backwards compatibility.\n    # If add_file_names migration is not run, old file connector configs will not have file_names.\n    # file_names is only used for display purposes in the UI and file_locations is used as a fallback.\n    def __init__(\n        self,\n        file_locations: list[Path | str],\n        file_names: list[str] | None = None,  # noqa: ARG002\n        zip_metadata_file_id: str | None = None,\n        zip_metadata: dict[str, Any] | None = None,  # Deprecated, for backwards compat\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.file_locations = [str(loc) for loc in file_locations]\n        self.batch_size = batch_size\n        self.pdf_pass: str | None = None\n        self._zip_metadata_file_id = zip_metadata_file_id\n        self._zip_metadata_deprecated = zip_metadata\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.pdf_pass = credentials.get(\"pdf_password\")\n\n        return None\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"\n        Iterates over each file path, fetches from Postgres, tries to parse text\n        or images, and yields Document batches.\n        \"\"\"\n        # Load metadata dict at start (from file store or deprecated inline format)\n        zip_metadata: dict[str, Any] = {}\n        if self._zip_metadata_file_id:\n            try:\n                file_store = get_default_file_store()\n                metadata_io = file_store.read_file(\n                    file_id=self._zip_metadata_file_id, mode=\"b\"\n                )\n                metadata_bytes = metadata_io.read()\n                loaded_metadata = json.loads(metadata_bytes)\n                if isinstance(loaded_metadata, list):\n                    zip_metadata = {d[\"filename\"]: d for d in loaded_metadata}\n                else:\n                    zip_metadata = loaded_metadata\n            except Exception as e:\n                logger.warning(f\"Failed to load metadata from file store: {e}\")\n        elif self._zip_metadata_deprecated:\n            logger.warning(\n                \"Using deprecated inline zip_metadata dict. Re-upload files to use the new file store format.\"\n            )\n            zip_metadata = self._zip_metadata_deprecated\n\n        documents: list[Document | HierarchyNode] = []\n\n        for file_id in self.file_locations:\n            file_store = get_default_file_store()\n            file_record = file_store.read_file_record(file_id=file_id)\n            if not file_record:\n                # typically an unsupported extension\n                logger.warning(f\"No file record found for '{file_id}' in PG; skipping.\")\n                continue\n\n            metadata = zip_metadata.get(\n                file_record.display_name, {}\n            ) or zip_metadata.get(os.path.basename(file_record.display_name), {})\n            file_io = file_store.read_file(file_id=file_id, mode=\"b\")\n            new_docs = _process_file(\n                file_id=file_id,\n                file_name=file_record.display_name,\n                file=file_io,\n                metadata=metadata,\n                pdf_pass=self.pdf_pass,\n                file_type=file_record.file_type,\n            )\n            documents.extend(new_docs)\n\n            if len(documents) >= self.batch_size:\n                yield documents\n\n                documents = []\n\n        if documents:\n            yield documents\n\n\nif __name__ == \"__main__\":\n    connector = LocalFileConnector(\n        file_locations=[os.environ[\"TEST_FILE\"]],\n        file_names=[os.environ[\"TEST_FILE\"]],\n    )\n    connector.load_credentials({\"pdf_password\": os.environ.get(\"PDF_PASSWORD\")})\n    doc_batches = connector.load_from_state()\n    for batch in doc_batches:\n        print(\"BATCH:\", batch)\n"
  },
  {
    "path": "backend/onyx/connectors/fireflies/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/fireflies/connector.py",
    "content": "from collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import cast\nfrom typing import List\n\nimport requests\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_FIREFLIES_ID_PREFIX = \"FIREFLIES_\"\n\n_FIREFLIES_API_URL = \"https://api.fireflies.ai/graphql\"\n\n_FIREFLIES_TRANSCRIPT_QUERY_SIZE = 50  # Max page size is 50\n\n_FIREFLIES_API_QUERY = \"\"\"\n    query Transcripts($fromDate: DateTime, $toDate: DateTime, $limit: Int!, $skip: Int!) {\n        transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {\n            id\n            title\n            organizer_email\n            participants\n            date\n            duration\n            transcript_url\n            sentences {\n                text\n                speaker_name\n                start_time\n            }\n        }\n    }\n\"\"\"\n\nONE_MINUTE = 60\n\n\ndef _create_doc_from_transcript(transcript: dict) -> Document | None:\n    sections: List[TextSection] = []\n    current_speaker_name = None\n    current_link = \"\"\n    current_text = \"\"\n\n    if transcript[\"sentences\"] is None:\n        return None\n\n    for sentence in transcript[\"sentences\"]:\n        if sentence[\"speaker_name\"] != current_speaker_name:\n            if current_speaker_name is not None:\n                sections.append(\n                    TextSection(\n                        link=current_link,\n                        text=current_text.strip(),\n                    )\n                )\n            current_speaker_name = sentence.get(\"speaker_name\") or \"Unknown Speaker\"\n            current_link = f\"{transcript['transcript_url']}?t={sentence['start_time']}\"\n            current_text = f\"{current_speaker_name}: \"\n\n        cleaned_text = sentence[\"text\"].replace(\"\\xa0\", \" \")\n        current_text += f\"{cleaned_text} \"\n\n    # Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.\n    sections.append(\n        TextSection(\n            link=current_link,\n            text=current_text.strip(),\n        )\n    )\n\n    fireflies_id = _FIREFLIES_ID_PREFIX + transcript[\"id\"]\n\n    meeting_title = transcript[\"title\"] or \"No Title\"\n\n    meeting_date_unix = transcript[\"date\"]\n    meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)\n\n    # Build hierarchy based on meeting date (year-month)\n    year_month = meeting_date.strftime(\"%Y-%m\")\n\n    meeting_organizer_email = transcript[\"organizer_email\"]\n    organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]\n\n    meeting_participants_email_list = []\n    for participant in transcript.get(\"participants\", []):\n        if participant != meeting_organizer_email and participant:\n            meeting_participants_email_list.append(BasicExpertInfo(email=participant))\n\n    return Document(\n        id=fireflies_id,\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.FIREFLIES,\n        semantic_identifier=meeting_title,\n        doc_metadata={\n            \"hierarchy\": {\n                \"source_path\": [year_month],\n                \"year_month\": year_month,\n                \"meeting_title\": meeting_title,\n                \"organizer_email\": meeting_organizer_email,\n            }\n        },\n        metadata={\n            k: str(v)\n            for k, v in {\n                \"meeting_date\": meeting_date,\n                \"duration_min\": transcript.get(\"duration\"),\n            }.items()\n            if v is not None\n        },\n        doc_updated_at=meeting_date,\n        primary_owners=organizer_email_user_info,\n        secondary_owners=meeting_participants_email_list,\n    )\n\n\n# If not all transcripts are being indexed, try using a more-recently-generated\n# API key.\nclass FirefliesConnector(PollConnector, LoadConnector):\n    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:\n        self.batch_size = batch_size\n\n    def load_credentials(self, credentials: dict[str, str]) -> None:\n        api_key = credentials.get(\"fireflies_api_key\")\n\n        if not isinstance(api_key, str):\n            raise ConnectorMissingCredentialError(\n                \"The Fireflies API key must be a string\"\n            )\n\n        self.api_key = api_key\n\n        return None\n\n    def _fetch_transcripts(\n        self, start_datetime: str | None = None, end_datetime: str | None = None\n    ) -> Iterator[List[dict]]:\n        if self.api_key is None:\n            raise ConnectorMissingCredentialError(\"Missing API key\")\n\n        headers = {\n            \"Content-Type\": \"application/json\",\n            \"Authorization\": \"Bearer \" + self.api_key,\n        }\n\n        skip = 0\n        variables: dict[str, int | str] = {\n            \"limit\": _FIREFLIES_TRANSCRIPT_QUERY_SIZE,\n        }\n\n        if start_datetime:\n            variables[\"fromDate\"] = start_datetime\n        if end_datetime:\n            variables[\"toDate\"] = end_datetime\n\n        while True:\n            variables[\"skip\"] = skip\n            response = requests.post(\n                _FIREFLIES_API_URL,\n                headers=headers,\n                json={\"query\": _FIREFLIES_API_QUERY, \"variables\": variables},\n            )\n\n            response.raise_for_status()\n\n            if response.status_code == 204:\n                break\n\n            received_transcripts = response.json()\n            parsed_transcripts = received_transcripts.get(\"data\", {}).get(\n                \"transcripts\", []\n            )\n\n            yield parsed_transcripts\n\n            if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_QUERY_SIZE:\n                break\n\n            skip += _FIREFLIES_TRANSCRIPT_QUERY_SIZE\n\n    def _process_transcripts(\n        self, start: str | None = None, end: str | None = None\n    ) -> GenerateDocumentsOutput:\n        doc_batch: List[Document | HierarchyNode] = []\n\n        for transcript_batch in self._fetch_transcripts(start, end):\n            for transcript in transcript_batch:\n                if doc := _create_doc_from_transcript(transcript):\n                    doc_batch.append(doc)\n\n                if len(doc_batch) >= self.batch_size:\n                    yield doc_batch\n                    doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._process_transcripts()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        # add some leeway to account for any timezone funkiness and/or bad handling\n        # of start time on the Fireflies side\n        start = max(0, start - ONE_MINUTE)\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(\n            \"%Y-%m-%dT%H:%M:%S.000Z\"\n        )\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(\n            \"%Y-%m-%dT%H:%M:%S.000Z\"\n        )\n\n        yield from self._process_transcripts(start_datetime, end_datetime)\n"
  },
  {
    "path": "backend/onyx/connectors/freshdesk/__init__,py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/freshdesk/connector.py",
    "content": "import json\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import List\n\nimport requests\nfrom retry import retry\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rl_requests,\n)\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_FRESHDESK_ID_PREFIX = \"FRESHDESK_\"\n\n\n_TICKET_FIELDS_TO_INCLUDE = {\n    \"fr_escalated\",\n    \"spam\",\n    \"priority\",\n    \"source\",\n    \"status\",\n    \"type\",\n    \"is_escalated\",\n    \"tags\",\n    \"nr_due_by\",\n    \"nr_escalated\",\n    \"cc_emails\",\n    \"fwd_emails\",\n    \"reply_cc_emails\",\n    \"ticket_cc_emails\",\n    \"support_email\",\n    \"to_emails\",\n}\n\n_SOURCE_NUMBER_TYPE_MAP: dict[int, str] = {\n    1: \"Email\",\n    2: \"Portal\",\n    3: \"Phone\",\n    7: \"Chat\",\n    9: \"Feedback Widget\",\n    10: \"Outbound Email\",\n}\n\n_PRIORITY_NUMBER_TYPE_MAP: dict[int, str] = {\n    1: \"low\",\n    2: \"medium\",\n    3: \"high\",\n    4: \"urgent\",\n}\n\n_STATUS_NUMBER_TYPE_MAP: dict[int, str] = {\n    2: \"open\",\n    3: \"pending\",\n    4: \"resolved\",\n    5: \"closed\",\n}\n\n\n# TODO: unify this with other generic rate limited requests with retries (e.g. Axero, Notion?)\n@retry(tries=3, delay=1, backoff=2)\ndef _rate_limited_freshdesk_get(\n    url: str, auth: tuple, params: dict\n) -> requests.Response:\n    return rl_requests.get(url, auth=auth, params=params)\n\n\ndef _create_metadata_from_ticket(ticket: dict) -> dict:\n    metadata: dict[str, str | list[str]] = {}\n    # Combine all emails into a list so there are no repeated emails\n    email_data: set[str] = set()\n\n    for key, value in ticket.items():\n        # Skip fields that aren't useful for embedding\n        if key not in _TICKET_FIELDS_TO_INCLUDE:\n            continue\n\n        # Skip empty fields\n        if not value or value == \"[]\":\n            continue\n\n        # Convert strings or lists to strings\n        stringified_value: str | list[str]\n        if isinstance(value, list):\n            stringified_value = [str(item) for item in value]\n        else:\n            stringified_value = str(value)\n\n        if \"email\" in key:\n            if isinstance(stringified_value, list):\n                email_data.update(stringified_value)\n            else:\n                email_data.add(stringified_value)\n        else:\n            metadata[key] = stringified_value\n\n    if email_data:\n        metadata[\"emails\"] = list(email_data)\n\n    # Convert source numbers to human-parsable string\n    if source_number := ticket.get(\"source\"):\n        metadata[\"source\"] = _SOURCE_NUMBER_TYPE_MAP.get(\n            source_number, \"Unknown Source Type\"\n        )\n\n    # Convert priority numbers to human-parsable string\n    if priority_number := ticket.get(\"priority\"):\n        metadata[\"priority\"] = _PRIORITY_NUMBER_TYPE_MAP.get(\n            priority_number, \"Unknown Priority\"\n        )\n\n    # Convert status to human-parsable string\n    if status_number := ticket.get(\"status\"):\n        metadata[\"status\"] = _STATUS_NUMBER_TYPE_MAP.get(\n            status_number, \"Unknown Status\"\n        )\n\n    due_by = datetime.fromisoformat(ticket[\"due_by\"].replace(\"Z\", \"+00:00\"))\n    metadata[\"overdue\"] = str(datetime.now(timezone.utc) > due_by)\n\n    return metadata\n\n\ndef _create_doc_from_ticket(ticket: dict, domain: str) -> Document:\n    # Use the ticket description as the text\n    text = f\"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}\"\n    metadata = _create_metadata_from_ticket(ticket)\n\n    # This is also used in the ID because it is more unique than the just the ticket ID\n    link = f\"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}\"\n\n    return Document(\n        id=_FRESHDESK_ID_PREFIX + link,\n        sections=[\n            TextSection(\n                link=link,\n                text=text,\n            )\n        ],\n        source=DocumentSource.FRESHDESK,\n        semantic_identifier=ticket[\"subject\"],\n        metadata=metadata,\n        doc_updated_at=datetime.fromisoformat(\n            ticket[\"updated_at\"].replace(\"Z\", \"+00:00\")\n        ),\n    )\n\n\nclass FreshdeskConnector(PollConnector, LoadConnector):\n    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:\n        self.batch_size = batch_size\n\n    def load_credentials(self, credentials: dict[str, str | int]) -> None:\n        api_key = credentials.get(\"freshdesk_api_key\")\n        domain = credentials.get(\"freshdesk_domain\")\n        if not all(isinstance(cred, str) for cred in [domain, api_key]):\n            raise ConnectorMissingCredentialError(\n                \"All Freshdesk credentials must be strings\"\n            )\n\n        # TODO: Move the domain to the connector-specific configuration instead of part of the credential\n        # Then apply normalization and validation against the config\n        # Clean and normalize the domain URL\n        domain = str(domain).strip().lower()\n\n        # Remove any trailing slashes\n        domain = domain.rstrip(\"/\")\n\n        # Remove protocol if present\n        if domain.startswith((\"http://\", \"https://\")):\n            domain = domain.replace(\"http://\", \"\").replace(\"https://\", \"\")\n\n        # Remove .freshdesk.com suffix and any API paths if present\n        if \".freshdesk.com\" in domain:\n            domain = domain.split(\".freshdesk.com\")[0]\n\n        if not domain:\n            raise ConnectorMissingCredentialError(\"Freshdesk domain cannot be empty\")\n\n        self.api_key = str(api_key)\n        self.domain = domain\n\n    def _fetch_tickets(\n        self,\n        start: datetime | None = None,\n        end: datetime | None = None,  # noqa: ARG002\n    ) -> Iterator[List[dict]]:\n        \"\"\"\n        'end' is not currently used, so we may double fetch tickets created after the indexing\n        starts but before the actual call is made.\n\n        To use 'end' would require us to use the search endpoint but it has limitations,\n        namely having to fetch all IDs and then individually fetch each ticket because there is no\n        'include' field available for this endpoint:\n        https://developers.freshdesk.com/api/#filter_tickets\n        \"\"\"\n        if self.api_key is None or self.domain is None:\n            raise ConnectorMissingCredentialError(\"freshdesk\")\n\n        base_url = f\"https://{self.domain}.freshdesk.com/api/v2/tickets\"\n        params: dict[str, int | str] = {\n            \"include\": \"description\",\n            \"per_page\": 50,\n            \"page\": 1,\n        }\n\n        if start:\n            params[\"updated_since\"] = start.isoformat()\n\n        while True:\n            # Freshdesk API uses API key as the username and any value as the password.\n            response = _rate_limited_freshdesk_get(\n                base_url,\n                auth=(self.api_key, \"CanYouBelieveFreshdeskDoesThis\"),\n                params=params,\n            )\n            response.raise_for_status()\n\n            if response.status_code == 204:\n                break\n\n            tickets = json.loads(response.content)\n            logger.info(\n                f\"Fetched {len(tickets)} tickets from Freshdesk API (Page {params['page']})\"\n            )\n\n            yield tickets\n\n            if len(tickets) < int(params[\"per_page\"]):\n                break\n\n            params[\"page\"] = int(params[\"page\"]) + 1\n\n    def _process_tickets(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        doc_batch: List[Document | HierarchyNode] = []\n\n        for ticket_batch in self._fetch_tickets(start, end):\n            for ticket in ticket_batch:\n                doc_batch.append(_create_doc_from_ticket(ticket, self.domain))\n\n                if len(doc_batch) >= self.batch_size:\n                    yield doc_batch\n                    doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._process_tickets()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        yield from self._process_tickets(start_datetime, end_datetime)\n"
  },
  {
    "path": "backend/onyx/connectors/gitbook/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/gitbook/connector.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport requests\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nGITBOOK_API_BASE = \"https://api.gitbook.com/v1/\"\n\n\nclass GitbookApiClient:\n    def __init__(self, access_token: str) -> None:\n        self.access_token = access_token\n\n    def get(self, endpoint: str, params: dict[str, Any] | None = None) -> Any:\n        headers = {\n            \"Authorization\": f\"Bearer {self.access_token}\",\n            \"Content-Type\": \"application/json\",\n        }\n\n        url = urljoin(GITBOOK_API_BASE, endpoint.lstrip(\"/\"))\n        response = requests.get(url, headers=headers, params=params)\n        response.raise_for_status()\n        return response.json()\n\n    def get_page_content(self, space_id: str, page_id: str) -> dict[str, Any]:\n        return self.get(f\"/spaces/{space_id}/content/page/{page_id}\")\n\n\ndef _extract_text_from_document(document: dict[str, Any]) -> str:\n    \"\"\"Extract text content from GitBook document structure by parsing the document nodes\n    into markdown format.\"\"\"\n\n    def parse_leaf(leaf: dict[str, Any]) -> str:\n        text = leaf.get(\"text\", \"\")\n        leaf.get(\"marks\", [])\n        return text\n\n    def parse_text_node(node: dict[str, Any]) -> str:\n        text = \"\"\n        for leaf in node.get(\"leaves\", []):\n            text += parse_leaf(leaf)\n        return text\n\n    def parse_block_node(node: dict[str, Any]) -> str:\n        block_type = node.get(\"type\", \"\")\n        result = \"\"\n\n        if block_type == \"heading-1\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"# {text}\\n\\n\"\n\n        elif block_type == \"heading-2\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"## {text}\\n\\n\"\n\n        elif block_type == \"heading-3\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"### {text}\\n\\n\"\n\n        elif block_type == \"heading-4\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"#### {text}\\n\\n\"\n\n        elif block_type == \"heading-5\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"##### {text}\\n\\n\"\n\n        elif block_type == \"heading-6\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"###### {text}\\n\\n\"\n\n        elif block_type == \"list-unordered\":\n            for list_item in node.get(\"nodes\", []):\n                paragraph = list_item.get(\"nodes\", [])[0]\n                text = \"\".join(parse_text_node(n) for n in paragraph.get(\"nodes\", []))\n                result += f\"* {text}\\n\"\n            result += \"\\n\"\n\n        elif block_type == \"paragraph\":\n            text = \"\".join(parse_text_node(n) for n in node.get(\"nodes\", []))\n            result = f\"{text}\\n\\n\"\n\n        elif block_type == \"list-tasks\":\n            for task_item in node.get(\"nodes\", []):\n                checked = task_item.get(\"data\", {}).get(\"checked\", False)\n                paragraph = task_item.get(\"nodes\", [])[0]\n                text = \"\".join(parse_text_node(n) for n in paragraph.get(\"nodes\", []))\n                checkbox = \"[x]\" if checked else \"[ ]\"\n                result += f\"- {checkbox} {text}\\n\"\n            result += \"\\n\"\n\n        elif block_type == \"code\":\n            for code_line in node.get(\"nodes\", []):\n                if code_line.get(\"type\") == \"code-line\":\n                    text = \"\".join(\n                        parse_text_node(n) for n in code_line.get(\"nodes\", [])\n                    )\n                    result += f\"{text}\\n\"\n            result += \"\\n\"\n\n        elif block_type == \"blockquote\":\n            for quote_node in node.get(\"nodes\", []):\n                if quote_node.get(\"type\") == \"paragraph\":\n                    text = \"\".join(\n                        parse_text_node(n) for n in quote_node.get(\"nodes\", [])\n                    )\n                    result += f\"> {text}\\n\"\n            result += \"\\n\"\n\n        elif block_type == \"table\":\n            records = node.get(\"data\", {}).get(\"records\", {})\n            definition = node.get(\"data\", {}).get(\"definition\", {})\n            view = node.get(\"data\", {}).get(\"view\", {})\n\n            columns = view.get(\"columns\", [])\n\n            header_cells = []\n            for col_id in columns:\n                col_def = definition.get(col_id, {})\n                header_cells.append(col_def.get(\"title\", \"\"))\n\n            result = \"| \" + \" | \".join(header_cells) + \" |\\n\"\n            result += \"|\" + \"---|\" * len(header_cells) + \"\\n\"\n\n            sorted_records = sorted(\n                records.items(), key=lambda x: x[1].get(\"orderIndex\", \"\")\n            )\n\n            for record_id, record_data in sorted_records:\n                values = record_data.get(\"values\", {})\n                row_cells = []\n                for col_id in columns:\n                    fragment_id = values.get(col_id, \"\")\n                    fragment_text = \"\"\n                    for fragment in node.get(\"fragments\", []):\n                        if fragment.get(\"fragment\") == fragment_id:\n                            for frag_node in fragment.get(\"nodes\", []):\n                                if frag_node.get(\"type\") == \"paragraph\":\n                                    fragment_text = \"\".join(\n                                        parse_text_node(n)\n                                        for n in frag_node.get(\"nodes\", [])\n                                    )\n                                    break\n                    row_cells.append(fragment_text)\n                result += \"| \" + \" | \".join(row_cells) + \" |\\n\"\n\n            result += \"\\n\"\n        return result\n\n    if not document or \"document\" not in document:\n        return \"\"\n\n    markdown = \"\"\n    nodes = document[\"document\"].get(\"nodes\", [])\n\n    for node in nodes:\n        markdown += parse_block_node(node)\n\n    return markdown\n\n\ndef _convert_page_to_document(\n    client: GitbookApiClient, space_id: str, page: dict[str, Any]\n) -> Document:\n    page_id = page[\"id\"]\n    page_content = client.get_page_content(space_id, page_id)\n\n    return Document(\n        id=f\"gitbook-{space_id}-{page_id}\",\n        sections=[\n            TextSection(\n                link=page.get(\"urls\", {}).get(\"app\", \"\"),\n                text=_extract_text_from_document(page_content),\n            )\n        ],\n        source=DocumentSource.GITBOOK,\n        semantic_identifier=page.get(\"title\", \"\"),\n        doc_updated_at=datetime.fromisoformat(page[\"updatedAt\"]).replace(\n            tzinfo=timezone.utc\n        ),\n        metadata={\n            \"path\": page.get(\"path\", \"\"),\n            \"type\": page.get(\"type\", \"\"),\n            \"kind\": page.get(\"kind\", \"\"),\n        },\n    )\n\n\nclass GitbookConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        space_id: str,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.space_id = space_id\n        self.batch_size = batch_size\n        self.access_token: str | None = None\n        self.client: GitbookApiClient | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> None:\n        access_token = credentials.get(\"gitbook_api_key\")\n        if not access_token:\n            raise ConnectorMissingCredentialError(\"GitBook access token\")\n        self.access_token = access_token\n        self.client = GitbookApiClient(access_token)\n\n    def _fetch_all_pages(\n        self,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> GenerateDocumentsOutput:\n        if not self.client:\n            raise ConnectorMissingCredentialError(\"GitBook\")\n\n        try:\n            content = self.client.get(f\"/spaces/{self.space_id}/content/pages\")\n            pages: list[dict[str, Any]] = content.get(\"pages\", [])\n            current_batch: list[Document | HierarchyNode] = []\n\n            logger.info(f\"Found {len(pages)} root pages.\")\n            logger.info(\n                f\"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}\"\n            )\n\n            while pages:\n                page = pages.pop(0)\n\n                updated_at_raw = page.get(\"updatedAt\")\n                if updated_at_raw is None:\n                    # if updatedAt is not present, that means the page has never been edited\n                    continue\n\n                updated_at = datetime.fromisoformat(updated_at_raw)\n                if start and updated_at < start:\n                    continue\n                if end and updated_at > end:\n                    continue\n\n                current_batch.append(\n                    _convert_page_to_document(self.client, self.space_id, page)\n                )\n\n                if len(current_batch) >= self.batch_size:\n                    yield current_batch\n                    current_batch = []\n\n                pages.extend(page.get(\"pages\", []))\n\n            if current_batch:\n                yield current_batch\n\n        except requests.RequestException as e:\n            logger.error(f\"Error fetching GitBook content: {str(e)}\")\n            raise\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._fetch_all_pages()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n        return self._fetch_all_pages(start_datetime, end_datetime)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = GitbookConnector(\n        space_id=os.environ[\"GITBOOK_SPACE_ID\"],\n    )\n    connector.load_credentials({\"gitbook_api_key\": os.environ[\"GITBOOK_API_KEY\"]})\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/github/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/github/connector.py",
    "content": "import copy\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\n\nfrom github import Github\nfrom github import RateLimitExceededException\nfrom github import Repository\nfrom github.GithubException import GithubException\nfrom github.Issue import Issue\nfrom github.NamedUser import NamedUser\nfrom github.PaginatedList import PaginatedList\nfrom github.PullRequest import PullRequest\nfrom pydantic import BaseModel\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.connector_runner import ConnectorRunner\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.github.models import SerializedRepository\nfrom onyx.connectors.github.rate_limit_utils import sleep_after_rate_limit_exception\nfrom onyx.connectors.github.utils import deserialize_repository\nfrom onyx.connectors.github.utils import get_external_access_permission\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import ConnectorCheckpoint\nfrom onyx.connectors.interfaces import ConnectorFailure\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nITEMS_PER_PAGE = 100\nCURSOR_LOG_FREQUENCY = 50\n\n_MAX_NUM_RATE_LIMIT_RETRIES = 5\n\nONE_DAY = timedelta(days=1)\nSLIM_BATCH_SIZE = 100\n# Cases\n# X (from start) standard run, no fallback to cursor-based pagination\n# X (from start) standard run errors, fallback to cursor-based pagination\n#  X error in the middle of a page\n#  X no errors: run to completion\n# X (from checkpoint) standard run, no fallback to cursor-based pagination\n# X (from checkpoint) continue from cursor-based pagination\n#  - retrying\n#  - no retrying\n\n# things to check:\n# checkpoint state on return\n# checkpoint progress (no infinite loop)\n\n\nclass DocMetadata(BaseModel):\n    repo: str\n\n\ndef get_nextUrl_key(pag_list: PaginatedList[PullRequest | Issue]) -> str:\n    if \"_PaginatedList__nextUrl\" in pag_list.__dict__:\n        return \"_PaginatedList__nextUrl\"\n    for key in pag_list.__dict__:\n        if \"__nextUrl\" in key:\n            return key\n    for key in pag_list.__dict__:\n        if \"nextUrl\" in key:\n            return key\n    return \"\"\n\n\ndef get_nextUrl(\n    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str\n) -> str | None:\n    return getattr(pag_list, nextUrl_key) if nextUrl_key else None\n\n\ndef set_nextUrl(\n    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str, nextUrl: str\n) -> None:\n    if nextUrl_key:\n        setattr(pag_list, nextUrl_key, nextUrl)\n    elif nextUrl:\n        raise ValueError(\"Next URL key not found: \" + str(pag_list.__dict__))\n\n\ndef _paginate_until_error(\n    git_objs: Callable[[], PaginatedList[PullRequest | Issue]],\n    cursor_url: str | None,\n    prev_num_objs: int,\n    cursor_url_callback: Callable[[str | None, int], None],\n    retrying: bool = False,\n) -> Generator[PullRequest | Issue, None, None]:\n    num_objs = prev_num_objs\n    pag_list = git_objs()\n    nextUrl_key = get_nextUrl_key(pag_list)\n    if cursor_url:\n        set_nextUrl(pag_list, nextUrl_key, cursor_url)\n    elif retrying:\n        # if we are retrying, we want to skip the objects retrieved\n        # over previous calls. Unfortunately, this WILL retrieve all\n        # pages before the one we are resuming from, so we really\n        # don't want this case to be hit often\n        logger.warning(\n            \"Retrying from a previous cursor-based pagination call. \"\n            \"This will retrieve all pages before the one we are resuming from, \"\n            \"which may take a while and consume many API calls.\"\n        )\n        pag_list = cast(PaginatedList[PullRequest | Issue], pag_list[prev_num_objs:])\n        num_objs = 0\n\n    try:\n        # this for loop handles cursor-based pagination\n        for issue_or_pr in pag_list:\n            num_objs += 1\n            yield issue_or_pr\n            # used to store the current cursor url in the checkpoint. This value\n            # is updated during iteration over pag_list.\n            cursor_url_callback(get_nextUrl(pag_list, nextUrl_key), num_objs)\n\n            if num_objs % CURSOR_LOG_FREQUENCY == 0:\n                logger.info(\n                    f\"Retrieved {num_objs} objects with current cursor url: {get_nextUrl(pag_list, nextUrl_key)}\"\n                )\n\n    except Exception as e:\n        logger.exception(f\"Error during cursor-based pagination: {e}\")\n        if num_objs - prev_num_objs > 0:\n            raise\n\n        if get_nextUrl(pag_list, nextUrl_key) is not None and not retrying:\n            logger.info(\n                \"Assuming that this error is due to cursor \"\n                \"expiration because no objects were retrieved. \"\n                \"Retrying from the first page.\"\n            )\n            yield from _paginate_until_error(\n                git_objs, None, prev_num_objs, cursor_url_callback, retrying=True\n            )\n            return\n\n        # for no cursor url or if we reach this point after a retry, raise the error\n        raise\n\n\ndef _get_batch_rate_limited(\n    # We pass in a callable because we want git_objs to produce a fresh\n    # PaginatedList each time it's called to avoid using the same object for cursor-based pagination\n    # from a partial offset-based pagination call.\n    git_objs: Callable[[], PaginatedList],\n    page_num: int,\n    cursor_url: str | None,\n    prev_num_objs: int,\n    cursor_url_callback: Callable[[str | None, int], None],\n    github_client: Github,\n    attempt_num: int = 0,\n) -> Generator[PullRequest | Issue, None, None]:\n    if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:\n        raise RuntimeError(\n            \"Re-tried fetching batch too many times. Something is going wrong with fetching objects from Github\"\n        )\n    try:\n        if cursor_url:\n            # when this is set, we are resuming from an earlier\n            # cursor-based pagination call.\n            yield from _paginate_until_error(\n                git_objs, cursor_url, prev_num_objs, cursor_url_callback\n            )\n            return\n        objs = list(git_objs().get_page(page_num))\n        # fetch all data here to disable lazy loading later\n        # this is needed to capture the rate limit exception here (if one occurs)\n        for obj in objs:\n            if hasattr(obj, \"raw_data\"):\n                getattr(obj, \"raw_data\")\n        yield from objs\n    except RateLimitExceededException:\n        sleep_after_rate_limit_exception(github_client)\n        yield from _get_batch_rate_limited(\n            git_objs,\n            page_num,\n            cursor_url,\n            prev_num_objs,\n            cursor_url_callback,\n            github_client,\n            attempt_num + 1,\n        )\n    except GithubException as e:\n        if not (\n            e.status == 422\n            and (\n                \"cursor\" in (e.message or \"\")\n                or \"cursor\" in (e.data or {}).get(\"message\", \"\")\n            )\n        ):\n            raise\n        # Fallback to a cursor-based pagination strategy\n        # This can happen for \"large datasets,\" but there's no documentation\n        # On the error on the web as far as we can tell.\n        # Error message:\n        # \"Pagination with the page parameter is not supported for large datasets,\n        # please use cursor based pagination (after/before)\"\n        yield from _paginate_until_error(\n            git_objs, cursor_url, prev_num_objs, cursor_url_callback\n        )\n\n\ndef _get_userinfo(user: NamedUser) -> dict[str, str]:\n    def _safe_get(attr_name: str) -> str | None:\n        try:\n            return cast(str | None, getattr(user, attr_name))\n        except GithubException:\n            logger.debug(f\"Error getting {attr_name} for user\")\n            return None\n\n    return {\n        k: v\n        for k, v in {\n            \"login\": _safe_get(\"login\"),\n            \"name\": _safe_get(\"name\"),\n            \"email\": _safe_get(\"email\"),\n        }.items()\n        if v is not None\n    }\n\n\ndef _convert_pr_to_document(\n    pull_request: PullRequest, repo_external_access: ExternalAccess | None\n) -> Document:\n    repo_full_name = pull_request.base.repo.full_name if pull_request.base else \"\"\n    # Split full_name (e.g., \"owner/repo\") into owner and repo\n    parts = repo_full_name.split(\"/\", 1)\n    owner_name = parts[0] if parts else \"\"\n    repo_name = parts[1] if len(parts) > 1 else repo_full_name\n\n    doc_metadata = {\n        \"repo\": repo_full_name,\n        \"hierarchy\": {\n            \"source_path\": [owner_name, repo_name, \"pull_requests\"],\n            \"owner\": owner_name,\n            \"repo\": repo_name,\n            \"object_type\": \"pull_request\",\n        },\n    }\n    return Document(\n        id=pull_request.html_url,\n        sections=[\n            TextSection(link=pull_request.html_url, text=pull_request.body or \"\")\n        ],\n        external_access=repo_external_access,\n        source=DocumentSource.GITHUB,\n        semantic_identifier=f\"{pull_request.number}: {pull_request.title}\",\n        # updated_at is UTC time but is timezone unaware, explicitly add UTC\n        # as there is logic in indexing to prevent wrong timestamped docs\n        # due to local time discrepancies with UTC\n        doc_updated_at=(\n            pull_request.updated_at.replace(tzinfo=timezone.utc)\n            if pull_request.updated_at\n            else None\n        ),\n        # this metadata is used in perm sync\n        doc_metadata=doc_metadata,\n        metadata={\n            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)\n            for k, v in {\n                \"object_type\": \"PullRequest\",\n                \"id\": pull_request.number,\n                \"merged\": pull_request.merged,\n                \"state\": pull_request.state,\n                \"user\": _get_userinfo(pull_request.user) if pull_request.user else None,\n                \"assignees\": [\n                    _get_userinfo(assignee) for assignee in pull_request.assignees\n                ],\n                \"repo\": (\n                    pull_request.base.repo.full_name if pull_request.base else None\n                ),\n                \"num_commits\": str(pull_request.commits),\n                \"num_files_changed\": str(pull_request.changed_files),\n                \"labels\": [label.name for label in pull_request.labels],\n                \"created_at\": (\n                    pull_request.created_at.replace(tzinfo=timezone.utc)\n                    if pull_request.created_at\n                    else None\n                ),\n                \"updated_at\": (\n                    pull_request.updated_at.replace(tzinfo=timezone.utc)\n                    if pull_request.updated_at\n                    else None\n                ),\n                \"closed_at\": (\n                    pull_request.closed_at.replace(tzinfo=timezone.utc)\n                    if pull_request.closed_at\n                    else None\n                ),\n                \"merged_at\": (\n                    pull_request.merged_at.replace(tzinfo=timezone.utc)\n                    if pull_request.merged_at\n                    else None\n                ),\n                \"merged_by\": (\n                    _get_userinfo(pull_request.merged_by)\n                    if pull_request.merged_by\n                    else None\n                ),\n            }.items()\n            if v is not None\n        },\n    )\n\n\ndef _fetch_issue_comments(issue: Issue) -> str:\n    comments = issue.get_comments()\n    return \"\\nComment: \".join(comment.body for comment in comments)\n\n\ndef _convert_issue_to_document(\n    issue: Issue, repo_external_access: ExternalAccess | None\n) -> Document:\n    repo_full_name = issue.repository.full_name if issue.repository else \"\"\n    # Split full_name (e.g., \"owner/repo\") into owner and repo\n    parts = repo_full_name.split(\"/\", 1)\n    owner_name = parts[0] if parts else \"\"\n    repo_name = parts[1] if len(parts) > 1 else repo_full_name\n\n    doc_metadata = {\n        \"repo\": repo_full_name,\n        \"hierarchy\": {\n            \"source_path\": [owner_name, repo_name, \"issues\"],\n            \"owner\": owner_name,\n            \"repo\": repo_name,\n            \"object_type\": \"issue\",\n        },\n    }\n    return Document(\n        id=issue.html_url,\n        sections=[TextSection(link=issue.html_url, text=issue.body or \"\")],\n        source=DocumentSource.GITHUB,\n        external_access=repo_external_access,\n        semantic_identifier=f\"{issue.number}: {issue.title}\",\n        # updated_at is UTC time but is timezone unaware\n        doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),\n        # this metadata is used in perm sync\n        doc_metadata=doc_metadata,\n        metadata={\n            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)\n            for k, v in {\n                \"object_type\": \"Issue\",\n                \"id\": issue.number,\n                \"state\": issue.state,\n                \"user\": _get_userinfo(issue.user) if issue.user else None,\n                \"assignees\": [_get_userinfo(assignee) for assignee in issue.assignees],\n                \"repo\": issue.repository.full_name if issue.repository else None,\n                \"labels\": [label.name for label in issue.labels],\n                \"created_at\": (\n                    issue.created_at.replace(tzinfo=timezone.utc)\n                    if issue.created_at\n                    else None\n                ),\n                \"updated_at\": (\n                    issue.updated_at.replace(tzinfo=timezone.utc)\n                    if issue.updated_at\n                    else None\n                ),\n                \"closed_at\": (\n                    issue.closed_at.replace(tzinfo=timezone.utc)\n                    if issue.closed_at\n                    else None\n                ),\n                \"closed_by\": (\n                    _get_userinfo(issue.closed_by) if issue.closed_by else None\n                ),\n            }.items()\n            if v is not None\n        },\n    )\n\n\nclass GithubConnectorStage(Enum):\n    START = \"start\"\n    PRS = \"prs\"\n    ISSUES = \"issues\"\n\n\nclass GithubConnectorCheckpoint(ConnectorCheckpoint):\n    stage: GithubConnectorStage\n    curr_page: int\n\n    cached_repo_ids: list[int] | None = None\n    cached_repo: SerializedRepository | None = None\n\n    # Used for the fallback cursor-based pagination strategy\n    num_retrieved: int\n    cursor_url: str | None = None\n\n    def reset(self) -> None:\n        \"\"\"\n        Resets curr_page, num_retrieved, and cursor_url to their initial values (0, 0, None)\n        \"\"\"\n        self.curr_page = 0\n        self.num_retrieved = 0\n        self.cursor_url = None\n\n\ndef make_cursor_url_callback(\n    checkpoint: GithubConnectorCheckpoint,\n) -> Callable[[str | None, int], None]:\n    def cursor_url_callback(cursor_url: str | None, num_objs: int) -> None:\n        # we want to maintain the old cursor url so code after retrieval\n        # can determine that we are using the fallback cursor-based pagination strategy\n        if cursor_url:\n            checkpoint.cursor_url = cursor_url\n        checkpoint.num_retrieved = num_objs\n\n    return cursor_url_callback\n\n\nclass GithubConnector(CheckpointedConnectorWithPermSync[GithubConnectorCheckpoint]):\n    def __init__(\n        self,\n        repo_owner: str,\n        repositories: str | None = None,\n        state_filter: str = \"all\",\n        include_prs: bool = True,\n        include_issues: bool = False,\n    ) -> None:\n        self.repo_owner = repo_owner\n        self.repositories = repositories\n        self.state_filter = state_filter\n        self.include_prs = include_prs\n        self.include_issues = include_issues\n        self.github_client: Github | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        # defaults to 30 items per page, can be set to as high as 100\n        self.github_client = (\n            Github(\n                credentials[\"github_access_token\"],\n                base_url=GITHUB_CONNECTOR_BASE_URL,\n                per_page=ITEMS_PER_PAGE,\n            )\n            if GITHUB_CONNECTOR_BASE_URL\n            else Github(credentials[\"github_access_token\"], per_page=ITEMS_PER_PAGE)\n        )\n        return None\n\n    def get_github_repo(\n        self, github_client: Github, attempt_num: int = 0\n    ) -> Repository.Repository:\n        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:\n            raise RuntimeError(\n                \"Re-tried fetching repo too many times. Something is going wrong with fetching objects from Github\"\n            )\n\n        try:\n            return github_client.get_repo(f\"{self.repo_owner}/{self.repositories}\")\n        except RateLimitExceededException:\n            sleep_after_rate_limit_exception(github_client)\n            return self.get_github_repo(github_client, attempt_num + 1)\n\n    def get_github_repos(\n        self, github_client: Github, attempt_num: int = 0\n    ) -> list[Repository.Repository]:\n        \"\"\"Get specific repositories based on comma-separated repo_name string.\"\"\"\n        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:\n            raise RuntimeError(\n                \"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github\"\n            )\n\n        try:\n            repos = []\n            # Split repo_name by comma and strip whitespace\n            repo_names = [\n                name.strip() for name in (cast(str, self.repositories)).split(\",\")\n            ]\n\n            for repo_name in repo_names:\n                if repo_name:  # Skip empty strings\n                    try:\n                        repo = github_client.get_repo(f\"{self.repo_owner}/{repo_name}\")\n                        repos.append(repo)\n                    except GithubException as e:\n                        logger.warning(\n                            f\"Could not fetch repo {self.repo_owner}/{repo_name}: {e}\"\n                        )\n\n            return repos\n        except RateLimitExceededException:\n            sleep_after_rate_limit_exception(github_client)\n            return self.get_github_repos(github_client, attempt_num + 1)\n\n    def get_all_repos(\n        self, github_client: Github, attempt_num: int = 0\n    ) -> list[Repository.Repository]:\n        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:\n            raise RuntimeError(\n                \"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github\"\n            )\n\n        try:\n            # Try to get organization first\n            try:\n                org = github_client.get_organization(self.repo_owner)\n                return list(org.get_repos())\n\n            except GithubException:\n                # If not an org, try as a user\n                user = github_client.get_user(self.repo_owner)\n                return list(user.get_repos())\n        except RateLimitExceededException:\n            sleep_after_rate_limit_exception(github_client)\n            return self.get_all_repos(github_client, attempt_num + 1)\n\n    def fetch_configured_repos(self) -> list[Repository.Repository]:\n        \"\"\"\n        Fetch the configured repositories based on the connector settings.\n\n        Returns:\n            list[Repository.Repository]: The configured repositories.\n        \"\"\"\n        assert self.github_client is not None  # mypy\n        if self.repositories:\n            if \",\" in self.repositories:\n                return self.get_github_repos(self.github_client)\n            else:\n                return [self.get_github_repo(self.github_client)]\n        else:\n            return self.get_all_repos(self.github_client)\n\n    def _pull_requests_func(\n        self, repo: Repository.Repository\n    ) -> Callable[[], PaginatedList[PullRequest]]:\n        return lambda: repo.get_pulls(\n            state=self.state_filter, sort=\"updated\", direction=\"desc\"\n        )\n\n    def _issues_func(\n        self, repo: Repository.Repository\n    ) -> Callable[[], PaginatedList[Issue]]:\n        return lambda: repo.get_issues(\n            state=self.state_filter, sort=\"updated\", direction=\"desc\"\n        )\n\n    def _fetch_from_github(\n        self,\n        checkpoint: GithubConnectorCheckpoint,\n        start: datetime | None = None,\n        end: datetime | None = None,\n        include_permissions: bool = False,\n    ) -> Generator[Document | ConnectorFailure, None, GithubConnectorCheckpoint]:\n        if self.github_client is None:\n            raise ConnectorMissingCredentialError(\"GitHub\")\n\n        checkpoint = copy.deepcopy(checkpoint)\n\n        # First run of the connector, fetch all repos and store in checkpoint\n        if checkpoint.cached_repo_ids is None:\n            repos = self.fetch_configured_repos()\n            if not repos:\n                checkpoint.has_more = False\n                return checkpoint\n\n            curr_repo = repos.pop()\n            checkpoint.cached_repo_ids = [repo.id for repo in repos]\n            checkpoint.cached_repo = SerializedRepository(\n                id=curr_repo.id,\n                headers=curr_repo.raw_headers,\n                raw_data=curr_repo.raw_data,\n            )\n            checkpoint.stage = GithubConnectorStage.PRS\n            checkpoint.curr_page = 0\n            # save checkpoint with repo ids retrieved\n            return checkpoint\n\n        if checkpoint.cached_repo is None:\n            raise ValueError(\"No repo saved in checkpoint\")\n\n        # Deserialize the repository from the checkpoint\n        repo = deserialize_repository(checkpoint.cached_repo, self.github_client)\n\n        cursor_url_callback = make_cursor_url_callback(checkpoint)\n        repo_external_access: ExternalAccess | None = None\n        if include_permissions:\n            repo_external_access = get_external_access_permission(\n                repo, self.github_client\n            )\n        if self.include_prs and checkpoint.stage == GithubConnectorStage.PRS:\n            logger.info(f\"Fetching PRs for repo: {repo.name}\")\n\n            pr_batch = _get_batch_rate_limited(\n                self._pull_requests_func(repo),\n                checkpoint.curr_page,\n                checkpoint.cursor_url,\n                checkpoint.num_retrieved,\n                cursor_url_callback,\n                self.github_client,\n            )\n            checkpoint.curr_page += 1  # NOTE: not used for cursor-based fallback\n            done_with_prs = False\n            num_prs = 0\n            pr = None\n            for pr in pr_batch:\n                num_prs += 1\n\n                # we iterate backwards in time, so at this point we stop processing prs\n                if (\n                    start is not None\n                    and pr.updated_at\n                    and pr.updated_at.replace(tzinfo=timezone.utc) < start\n                ):\n                    done_with_prs = True\n                    break\n                # Skip PRs updated after the end date\n                if (\n                    end is not None\n                    and pr.updated_at\n                    and pr.updated_at.replace(tzinfo=timezone.utc) > end\n                ):\n                    continue\n                try:\n                    yield _convert_pr_to_document(\n                        cast(PullRequest, pr), repo_external_access\n                    )\n                except Exception as e:\n                    error_msg = f\"Error converting PR to document: {e}\"\n                    logger.exception(error_msg)\n                    yield ConnectorFailure(\n                        failed_document=DocumentFailure(\n                            document_id=str(pr.id), document_link=pr.html_url\n                        ),\n                        failure_message=error_msg,\n                        exception=e,\n                    )\n                    continue\n\n            # If we reach this point with a cursor url in the checkpoint, we were using\n            # the fallback cursor-based pagination strategy. That strategy tries to get all\n            # PRs, so having curosr_url set means we are done with prs. However, we need to\n            # return AFTER the checkpoint reset to avoid infinite loops.\n\n            # if we found any PRs on the page and there are more PRs to get, return the checkpoint.\n            # In offset mode, while indexing without time constraints, the pr batch\n            # will be empty when we're done.\n            used_cursor = checkpoint.cursor_url is not None\n            logger.info(f\"Fetched {num_prs} PRs for repo: {repo.name}\")\n            if num_prs > 0 and not done_with_prs and not used_cursor:\n                return checkpoint\n\n            # if we went past the start date during the loop or there are no more\n            # prs to get, we move on to issues\n            checkpoint.stage = GithubConnectorStage.ISSUES\n            checkpoint.reset()\n\n            if used_cursor:\n                # save the checkpoint after changing stage; next run will continue from issues\n                return checkpoint\n\n        checkpoint.stage = GithubConnectorStage.ISSUES\n\n        if self.include_issues and checkpoint.stage == GithubConnectorStage.ISSUES:\n            logger.info(f\"Fetching issues for repo: {repo.name}\")\n\n            issue_batch = list(\n                _get_batch_rate_limited(\n                    self._issues_func(repo),\n                    checkpoint.curr_page,\n                    checkpoint.cursor_url,\n                    checkpoint.num_retrieved,\n                    cursor_url_callback,\n                    self.github_client,\n                )\n            )\n            logger.info(f\"Fetched {len(issue_batch)} issues for repo: {repo.name}\")\n            checkpoint.curr_page += 1\n            done_with_issues = False\n            num_issues = 0\n            for issue in issue_batch:\n                num_issues += 1\n                issue = cast(Issue, issue)\n                # we iterate backwards in time, so at this point we stop processing prs\n                if (\n                    start is not None\n                    and issue.updated_at.replace(tzinfo=timezone.utc) < start\n                ):\n                    done_with_issues = True\n                    break\n                # Skip PRs updated after the end date\n                if (\n                    end is not None\n                    and issue.updated_at.replace(tzinfo=timezone.utc) > end\n                ):\n                    continue\n\n                if issue.pull_request is not None:\n                    # PRs are handled separately\n                    continue\n\n                try:\n                    yield _convert_issue_to_document(issue, repo_external_access)\n                except Exception as e:\n                    error_msg = f\"Error converting issue to document: {e}\"\n                    logger.exception(error_msg)\n                    yield ConnectorFailure(\n                        failed_document=DocumentFailure(\n                            document_id=str(issue.id),\n                            document_link=issue.html_url,\n                        ),\n                        failure_message=error_msg,\n                        exception=e,\n                    )\n                    continue\n\n            logger.info(f\"Fetched {num_issues} issues for repo: {repo.name}\")\n            # if we found any issues on the page, and we're not done, return the checkpoint.\n            # don't return if we're using cursor-based pagination to avoid infinite loops\n            if num_issues > 0 and not done_with_issues and not checkpoint.cursor_url:\n                return checkpoint\n\n            # if we went past the start date during the loop or there are no more\n            # issues to get, we move on to the next repo\n            checkpoint.stage = GithubConnectorStage.PRS\n            checkpoint.reset()\n\n        checkpoint.has_more = len(checkpoint.cached_repo_ids) > 0\n        if checkpoint.cached_repo_ids:\n            next_id = checkpoint.cached_repo_ids.pop()\n            next_repo = self.github_client.get_repo(next_id)\n            checkpoint.cached_repo = SerializedRepository(\n                id=next_id,\n                headers=next_repo.raw_headers,\n                raw_data=next_repo.raw_data,\n            )\n            checkpoint.stage = GithubConnectorStage.PRS\n            checkpoint.reset()\n\n        if checkpoint.cached_repo_ids:\n            logger.info(\n                f\"{len(checkpoint.cached_repo_ids)} repos remaining (IDs: {checkpoint.cached_repo_ids})\"\n            )\n        else:\n            logger.info(\"No more repos remaining\")\n\n        return checkpoint\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GithubConnectorCheckpoint,\n        include_permissions: bool = False,\n    ) -> CheckpointOutput[GithubConnectorCheckpoint]:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        # add a day for timezone safety\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) + ONE_DAY\n\n        # Move start time back by 3 hours, since some Issues/PRs are getting dropped\n        # Could be due to delayed processing on GitHub side\n        # The non-updated issues since last poll will be shortcut-ed and not embedded\n        adjusted_start_datetime = start_datetime - timedelta(hours=3)\n\n        epoch = datetime.fromtimestamp(0, tz=timezone.utc)\n        if adjusted_start_datetime < epoch:\n            adjusted_start_datetime = epoch\n\n        return self._fetch_from_github(\n            checkpoint,\n            start=adjusted_start_datetime,\n            end=end_datetime,\n            include_permissions=include_permissions,\n        )\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GithubConnectorCheckpoint,\n    ) -> CheckpointOutput[GithubConnectorCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=False\n        )\n\n    @override\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GithubConnectorCheckpoint,\n    ) -> CheckpointOutput[GithubConnectorCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=True\n        )\n\n    def validate_connector_settings(self) -> None:\n        if self.github_client is None:\n            raise ConnectorMissingCredentialError(\"GitHub credentials not loaded.\")\n\n        if not self.repo_owner:\n            raise ConnectorValidationError(\n                \"Invalid connector settings: 'repo_owner' must be provided.\"\n            )\n\n        try:\n            if self.repositories:\n                if \",\" in self.repositories:\n                    # Multiple repositories specified\n                    repo_names = [name.strip() for name in self.repositories.split(\",\")]\n                    if not repo_names:\n                        raise ConnectorValidationError(\n                            \"Invalid connector settings: No valid repository names provided.\"\n                        )\n\n                    # Validate at least one repository exists and is accessible\n                    valid_repos = False\n                    validation_errors = []\n\n                    for repo_name in repo_names:\n                        if not repo_name:\n                            continue\n\n                        try:\n                            test_repo = self.github_client.get_repo(\n                                f\"{self.repo_owner}/{repo_name}\"\n                            )\n                            logger.info(\n                                f\"Successfully accessed repository: {self.repo_owner}/{repo_name}\"\n                            )\n                            test_repo.get_contents(\"\")\n                            valid_repos = True\n                            # If at least one repo is valid, we can proceed\n                            break\n                        except GithubException as e:\n                            validation_errors.append(\n                                f\"Repository '{repo_name}': {e.data.get('message', str(e))}\"\n                            )\n\n                    if not valid_repos:\n                        error_msg = (\n                            \"None of the specified repositories could be accessed: \"\n                        )\n                        error_msg += \", \".join(validation_errors)\n                        raise ConnectorValidationError(error_msg)\n                else:\n                    # Single repository (backward compatibility)\n                    test_repo = self.github_client.get_repo(\n                        f\"{self.repo_owner}/{self.repositories}\"\n                    )\n                    test_repo.get_contents(\"\")\n            else:\n                # Try to get organization first\n                try:\n                    org = self.github_client.get_organization(self.repo_owner)\n                    total_count = org.get_repos().totalCount\n                    if total_count == 0:\n                        raise ConnectorValidationError(\n                            f\"Found no repos for organization: {self.repo_owner}. Does the credential have the right scopes?\"\n                        )\n                except GithubException as e:\n                    # Check for missing SSO\n                    MISSING_SSO_ERROR_MESSAGE = \"You must grant your Personal Access token access to this organization\".lower()\n                    if MISSING_SSO_ERROR_MESSAGE in str(e).lower():\n                        SSO_GUIDE_LINK = (\n                            \"https://docs.github.com/en/enterprise-cloud@latest/authentication/\"\n                            \"authenticating-with-saml-single-sign-on/\"\n                            \"authorizing-a-personal-access-token-for-use-with-saml-single-sign-on\"\n                        )\n                        raise ConnectorValidationError(\n                            f\"Your GitHub token is missing authorization to access the \"\n                            f\"`{self.repo_owner}` organization. Please follow the guide to \"\n                            f\"authorize your token: {SSO_GUIDE_LINK}\"\n                        )\n                    # If not an org, try as a user\n                    user = self.github_client.get_user(self.repo_owner)\n\n                    # Check if we can access any repos\n                    total_count = user.get_repos().totalCount\n                    if total_count == 0:\n                        raise ConnectorValidationError(\n                            f\"Found no repos for user: {self.repo_owner}. Does the credential have the right scopes?\"\n                        )\n\n        except RateLimitExceededException:\n            raise UnexpectedValidationError(\n                \"Validation failed due to GitHub rate-limits being exceeded. Please try again later.\"\n            )\n\n        except GithubException as e:\n            if e.status == 401:\n                raise CredentialExpiredError(\n                    \"GitHub credential appears to be invalid or expired (HTTP 401).\"\n                )\n            elif e.status == 403:\n                raise InsufficientPermissionsError(\n                    \"Your GitHub token does not have sufficient permissions for this repository (HTTP 403).\"\n                )\n            elif e.status == 404:\n                if self.repositories:\n                    if \",\" in self.repositories:\n                        raise ConnectorValidationError(\n                            f\"None of the specified GitHub repositories could be found for owner: {self.repo_owner}\"\n                        )\n                    else:\n                        raise ConnectorValidationError(\n                            f\"GitHub repository not found with name: {self.repo_owner}/{self.repositories}\"\n                        )\n                else:\n                    raise ConnectorValidationError(\n                        f\"GitHub user or organization not found: {self.repo_owner}\"\n                    )\n            else:\n                raise ConnectorValidationError(\n                    f\"Unexpected GitHub error (status={e.status}): {e.data}\"\n                )\n\n        except Exception as exc:\n            raise Exception(\n                f\"Unexpected error during GitHub settings validation: {exc}\"\n            )\n\n    def validate_checkpoint_json(\n        self, checkpoint_json: str\n    ) -> GithubConnectorCheckpoint:\n        return GithubConnectorCheckpoint.model_validate_json(checkpoint_json)\n\n    def build_dummy_checkpoint(self) -> GithubConnectorCheckpoint:\n        return GithubConnectorCheckpoint(\n            stage=GithubConnectorStage.PRS, curr_page=0, has_more=True, num_retrieved=0\n        )\n\n\nif __name__ == \"__main__\":\n    import os\n\n    # Initialize the connector\n    connector = GithubConnector(\n        repo_owner=os.environ[\"REPO_OWNER\"],\n        repositories=os.environ.get(\"REPOSITORIES\"),\n    )\n    connector.load_credentials(\n        {\"github_access_token\": os.environ[\"ACCESS_TOKEN_GITHUB\"]}\n    )\n\n    if connector.github_client:\n        get_external_access_permission(\n            connector.get_github_repos(connector.github_client).pop(),\n            connector.github_client,\n        )\n\n    # Create a time range from epoch to now\n    end_time = datetime.now(timezone.utc)\n    start_time = datetime.fromtimestamp(0, tz=timezone.utc)\n    time_range = (start_time, end_time)\n\n    # Initialize the runner with a batch size of 10\n    runner: ConnectorRunner[GithubConnectorCheckpoint] = ConnectorRunner(\n        connector, batch_size=10, include_permissions=False, time_range=time_range\n    )\n\n    # Get initial checkpoint\n    checkpoint = connector.build_dummy_checkpoint()\n\n    # Run the connector\n    while checkpoint.has_more:\n        for doc_batch, hierarchy_node_batch, failure, next_checkpoint in runner.run(\n            checkpoint\n        ):\n            if doc_batch:\n                print(f\"Retrieved batch of {len(doc_batch)} documents\")\n                for doc in doc_batch:\n                    print(f\"Document: {doc.semantic_identifier}\")\n            if failure:\n                print(f\"Failure: {failure.failure_message}\")\n            if next_checkpoint:\n                checkpoint = next_checkpoint\n"
  },
  {
    "path": "backend/onyx/connectors/github/models.py",
    "content": "from typing import Any\n\nfrom github import Repository\nfrom github.Requester import Requester\nfrom pydantic import BaseModel\n\n\nclass SerializedRepository(BaseModel):\n    # id is part of the raw_data as well, just pulled out for convenience\n    id: int\n    headers: dict[str, str | int]\n    raw_data: dict[str, Any]\n\n    def to_Repository(self, requester: Requester) -> Repository.Repository:\n        return Repository.Repository(\n            requester, self.headers, self.raw_data, completed=True\n        )\n"
  },
  {
    "path": "backend/onyx/connectors/github/rate_limit_utils.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom github import Github\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef sleep_after_rate_limit_exception(github_client: Github) -> None:\n    \"\"\"\n    Sleep until the GitHub rate limit resets.\n\n    Args:\n        github_client: The GitHub client that hit the rate limit\n    \"\"\"\n    sleep_time = github_client.get_rate_limit().core.reset.replace(\n        tzinfo=timezone.utc\n    ) - datetime.now(tz=timezone.utc)\n    sleep_time += timedelta(minutes=1)  # add an extra minute just to be safe\n    logger.notice(f\"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.\")\n    time.sleep(sleep_time.total_seconds())\n"
  },
  {
    "path": "backend/onyx/connectors/github/utils.py",
    "content": "from collections.abc import Callable\nfrom typing import cast\n\nfrom github import Github\nfrom github.Repository import Repository\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.github.models import SerializedRepository\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\nlogger = setup_logger()\n\n\ndef get_external_access_permission(\n    repo: Repository, github_client: Github\n) -> ExternalAccess:\n    \"\"\"\n    Get the external access permission for a repository.\n    This functionality requires Enterprise Edition.\n    \"\"\"\n    # Check if EE is enabled\n    if not global_version.is_ee_version():\n        # For the MIT version, return an empty ExternalAccess (private document)\n        return ExternalAccess.empty()\n\n    # Fetch the EE implementation\n    ee_get_external_access_permission = cast(\n        Callable[[Repository, Github, bool], ExternalAccess],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.github.utils\",\n            \"get_external_access_permission\",\n        ),\n    )\n\n    return ee_get_external_access_permission(repo, github_client, True)\n\n\ndef deserialize_repository(\n    cached_repo: SerializedRepository, github_client: Github\n) -> Repository:\n    \"\"\"\n    Deserialize a SerializedRepository back into a Repository object.\n    \"\"\"\n    # Try to access the requester - different PyGithub versions may use different attribute names\n    try:\n        # Try to get the requester using getattr to avoid linter errors\n        requester = getattr(github_client, \"_requester\", None)\n        if requester is None:\n            requester = getattr(github_client, \"_Github__requester\", None)\n        if requester is None:\n            # If we can't find the requester attribute, we need to fall back to recreating the repo\n            raise AttributeError(\"Could not find requester attribute\")\n\n        return cached_repo.to_Repository(requester)\n    except Exception as e:\n        # If all else fails, re-fetch the repo directly\n        logger.warning(\n            f\"Failed to deserialize repository: {e}. Attempting to re-fetch.\"\n        )\n        repo_id = cached_repo.id\n        return github_client.get_repo(repo_id)\n"
  },
  {
    "path": "backend/onyx/connectors/gitlab/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/gitlab/connector.py",
    "content": "import fnmatch\nimport itertools\nfrom collections import deque\nfrom collections.abc import Iterable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import TypeVar\n\nimport gitlab\nimport pytz\nfrom gitlab.v4.objects import Project\n\nfrom onyx.configs.app_configs import GITLAB_CONNECTOR_INCLUDE_CODE_FILES\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nT = TypeVar(\"T\")\n\n\nlogger = setup_logger()\n\n# List of directories/Files to exclude\nexclude_patterns = [\n    \"logs\",\n    \".github/\",\n    \".gitlab/\",\n    \".pre-commit-config.yaml\",\n]\n\n\ndef _batch_gitlab_objects(git_objs: Iterable[T], batch_size: int) -> Iterator[list[T]]:\n    it = iter(git_objs)\n    while True:\n        batch = list(itertools.islice(it, batch_size))\n        if not batch:\n            break\n        yield batch\n\n\ndef get_author(author: Any) -> BasicExpertInfo:\n    return BasicExpertInfo(\n        display_name=author.get(\"name\"),\n    )\n\n\ndef _convert_merge_request_to_document(mr: Any) -> Document:\n    doc = Document(\n        id=mr.web_url,\n        sections=[TextSection(link=mr.web_url, text=mr.description or \"\")],\n        source=DocumentSource.GITLAB,\n        semantic_identifier=mr.title,\n        # updated_at is UTC time but is timezone unaware, explicitly add UTC\n        # as there is logic in indexing to prevent wrong timestamped docs\n        # due to local time discrepancies with UTC\n        doc_updated_at=mr.updated_at.replace(tzinfo=timezone.utc),\n        primary_owners=[get_author(mr.author)],\n        metadata={\"state\": mr.state, \"type\": \"MergeRequest\"},\n    )\n    return doc\n\n\ndef _convert_issue_to_document(issue: Any) -> Document:\n    doc = Document(\n        id=issue.web_url,\n        sections=[TextSection(link=issue.web_url, text=issue.description or \"\")],\n        source=DocumentSource.GITLAB,\n        semantic_identifier=issue.title,\n        # updated_at is UTC time but is timezone unaware, explicitly add UTC\n        # as there is logic in indexing to prevent wrong timestamped docs\n        # due to local time discrepancies with UTC\n        doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),\n        primary_owners=[get_author(issue.author)],\n        metadata={\"state\": issue.state, \"type\": issue.type if issue.type else \"Issue\"},\n    )\n    return doc\n\n\ndef _convert_code_to_document(\n    project: Project, file: Any, url: str, projectName: str, projectOwner: str\n) -> Document:\n    # Dynamically get the default branch from the project object\n    default_branch = project.default_branch\n\n    # Fetch the file content using the correct branch\n    file_content_obj = project.files.get(\n        file_path=file[\"path\"],\n        ref=default_branch,  # Use the default branch\n    )\n    try:\n        file_content = file_content_obj.decode().decode(\"utf-8\")\n    except UnicodeDecodeError:\n        file_content = file_content_obj.decode().decode(\"latin-1\")\n\n    # Construct the file URL dynamically using the default branch\n    file_url = (\n        f\"{url}/{projectOwner}/{projectName}/-/blob/{default_branch}/{file['path']}\"\n    )\n\n    # Create and return a Document object\n    doc = Document(\n        id=file[\"id\"],\n        sections=[TextSection(link=file_url, text=file_content)],\n        source=DocumentSource.GITLAB,\n        semantic_identifier=file[\"name\"],\n        doc_updated_at=datetime.now().replace(tzinfo=timezone.utc),\n        primary_owners=[],  # Add owners if needed\n        metadata={\"type\": \"CodeFile\"},\n    )\n    return doc\n\n\ndef _should_exclude(path: str) -> bool:\n    \"\"\"Check if a path matches any of the exclude patterns.\"\"\"\n    return any(fnmatch.fnmatch(path, pattern) for pattern in exclude_patterns)\n\n\nclass GitlabConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        project_owner: str,\n        project_name: str,\n        batch_size: int = INDEX_BATCH_SIZE,\n        state_filter: str = \"all\",\n        include_mrs: bool = True,\n        include_issues: bool = True,\n        include_code_files: bool = GITLAB_CONNECTOR_INCLUDE_CODE_FILES,\n    ) -> None:\n        self.project_owner = project_owner\n        self.project_name = project_name\n        self.batch_size = batch_size\n        self.state_filter = state_filter\n        self.include_mrs = include_mrs\n        self.include_issues = include_issues\n        self.include_code_files = include_code_files\n        self.gitlab_client: gitlab.Gitlab | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.gitlab_client = gitlab.Gitlab(\n            credentials[\"gitlab_url\"], private_token=credentials[\"gitlab_access_token\"]\n        )\n        return None\n\n    def _fetch_from_gitlab(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.gitlab_client is None:\n            raise ConnectorMissingCredentialError(\"Gitlab\")\n        project: Project = self.gitlab_client.projects.get(\n            f\"{self.project_owner}/{self.project_name}\"\n        )\n\n        # Fetch code files\n        if self.include_code_files:\n            # Fetching using BFS as project.report_tree with recursion causing slow load\n            queue = deque([\"\"])  # Start with the root directory\n            while queue:\n                current_path = queue.popleft()\n                files = project.repository_tree(path=current_path, all=True)\n                for file_batch in _batch_gitlab_objects(files, self.batch_size):\n                    code_doc_batch: list[Document | HierarchyNode] = []\n                    for file in file_batch:\n                        if _should_exclude(file[\"path\"]):\n                            continue\n\n                        if file[\"type\"] == \"blob\":\n                            code_doc_batch.append(\n                                _convert_code_to_document(\n                                    project,\n                                    file,\n                                    self.gitlab_client.url,\n                                    self.project_name,\n                                    self.project_owner,\n                                )\n                            )\n                        elif file[\"type\"] == \"tree\":\n                            queue.append(file[\"path\"])\n\n                    if code_doc_batch:\n                        yield code_doc_batch\n\n        if self.include_mrs:\n            merge_requests = project.mergerequests.list(\n                state=self.state_filter,\n                order_by=\"updated_at\",\n                sort=\"desc\",\n                iterator=True,\n            )\n\n            for mr_batch in _batch_gitlab_objects(merge_requests, self.batch_size):\n                mr_doc_batch: list[Document | HierarchyNode] = []\n                for mr in mr_batch:\n                    mr.updated_at = datetime.strptime(\n                        mr.updated_at, \"%Y-%m-%dT%H:%M:%S.%f%z\"\n                    )\n                    if start is not None and mr.updated_at < start.replace(\n                        tzinfo=pytz.UTC\n                    ):\n                        yield mr_doc_batch\n                        return\n                    if end is not None and mr.updated_at > end.replace(tzinfo=pytz.UTC):\n                        continue\n                    mr_doc_batch.append(_convert_merge_request_to_document(mr))\n                yield mr_doc_batch\n\n        if self.include_issues:\n            issues = project.issues.list(state=self.state_filter, iterator=True)\n\n            for issue_batch in _batch_gitlab_objects(issues, self.batch_size):\n                issue_doc_batch: list[Document | HierarchyNode] = []\n                for issue in issue_batch:\n                    issue.updated_at = datetime.strptime(\n                        issue.updated_at, \"%Y-%m-%dT%H:%M:%S.%f%z\"\n                    )\n                    if start is not None:\n                        start = start.replace(tzinfo=pytz.UTC)\n                        if issue.updated_at < start:\n                            yield issue_doc_batch\n                            return\n                    if end is not None:\n                        end = end.replace(tzinfo=pytz.UTC)\n                        if issue.updated_at > end:\n                            continue\n                    issue_doc_batch.append(_convert_issue_to_document(issue))\n                yield issue_doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._fetch_from_gitlab()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n        return self._fetch_from_gitlab(start_datetime, end_datetime)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = GitlabConnector(\n        # gitlab_url=\"https://gitlab.com/api/v4\",\n        project_owner=os.environ[\"PROJECT_OWNER\"],\n        project_name=os.environ[\"PROJECT_NAME\"],\n        batch_size=10,\n        state_filter=\"all\",\n        include_mrs=True,\n        include_issues=True,\n        include_code_files=GITLAB_CONNECTOR_INCLUDE_CODE_FILES,\n    )\n\n    connector.load_credentials(\n        {\n            \"gitlab_access_token\": os.environ[\"GITLAB_ACCESS_TOKEN\"],\n            \"gitlab_url\": os.environ[\"GITLAB_URL\"],\n        }\n    )\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/gmail/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/gmail/connector.py",
    "content": "from base64 import urlsafe_b64decode\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\n\nfrom google.oauth2.credentials import Credentials as OAuthCredentials\nfrom google.oauth2.service_account import Credentials as ServiceAccountCredentials\nfrom googleapiclient.errors import HttpError  # type: ignore\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.google_utils.google_auth import get_google_creds\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.google_utils.google_utils import (\n    execute_paginated_retrieval_with_max_pages,\n)\nfrom onyx.connectors.google_utils.google_utils import execute_single_retrieval\nfrom onyx.connectors.google_utils.google_utils import PAGE_TOKEN_KEY\nfrom onyx.connectors.google_utils.resources import get_admin_service\nfrom onyx.connectors.google_utils.resources import get_gmail_service\nfrom onyx.connectors.google_utils.resources import GmailService\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR\nfrom onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS\nfrom onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE\nfrom onyx.connectors.google_utils.shared_constants import USER_FIELDS\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import ConnectorFailure\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\n\nlogger = setup_logger()\n\n# This is for the initial list call to get the thread ids\nTHREAD_LIST_FIELDS = \"nextPageToken, threads(id)\"\n\n# These are the fields to retrieve using the ID from the initial list call\nPARTS_FIELDS = \"parts(body(data), mimeType)\"\nPAYLOAD_FIELDS = f\"payload(headers, {PARTS_FIELDS})\"\nMESSAGES_FIELDS = f\"messages(id, {PAYLOAD_FIELDS})\"\nTHREADS_FIELDS = f\"threads(id, {MESSAGES_FIELDS})\"\nTHREAD_FIELDS = f\"id, {MESSAGES_FIELDS}\"\n\nEMAIL_FIELDS = [\n    \"cc\",\n    \"bcc\",\n    \"from\",\n    \"to\",\n]\n\nMAX_MESSAGE_BODY_BYTES = 10 * 1024 * 1024  # 10MB cap to keep large threads safe\n\nPAGES_PER_CHECKPOINT = 1\n\nadd_retries = retry_builder(tries=50, max_delay=30)\n\n\ndef _is_mail_service_disabled_error(error: HttpError) -> bool:\n    \"\"\"Detect if the Gmail API is telling us the mailbox is not provisioned.\"\"\"\n\n    if error.resp.status != 400:\n        return False\n\n    error_message = str(error)\n    return (\n        \"Mail service not enabled\" in error_message\n        or \"failedPrecondition\" in error_message\n    )\n\n\ndef _build_time_range_query(\n    time_range_start: SecondsSinceUnixEpoch | None = None,\n    time_range_end: SecondsSinceUnixEpoch | None = None,\n) -> str | None:\n    query = \"\"\n    if time_range_start is not None and time_range_start != 0:\n        query += f\"after:{int(time_range_start)}\"\n    if time_range_end is not None and time_range_end != 0:\n        query += f\" before:{int(time_range_end)}\"\n    query = query.strip()\n\n    if len(query) == 0:\n        return None\n\n    return query\n\n\ndef _clean_email_and_extract_name(email: str) -> tuple[str, str | None]:\n    email = email.strip()\n    if \"<\" in email and \">\" in email:\n        # Handle format: \"Display Name <email@domain.com>\"\n        display_name = email[: email.find(\"<\")].strip()\n        email_address = email[email.find(\"<\") + 1 : email.find(\">\")].strip()\n        return email_address, display_name if display_name else None\n    else:\n        # Handle plain email address\n        return email.strip(), None\n\n\ndef _get_owners_from_emails(emails: dict[str, str | None]) -> list[BasicExpertInfo]:\n    owners = []\n    for email, names in emails.items():\n        if names:\n            name_parts = names.split(\" \")\n            first_name = \" \".join(name_parts[:-1])\n            last_name = name_parts[-1]\n        else:\n            first_name = None\n            last_name = None\n        owners.append(\n            BasicExpertInfo(email=email, first_name=first_name, last_name=last_name)\n        )\n    return owners\n\n\ndef _get_message_body(payload: dict[str, Any]) -> str:\n    \"\"\"\n    Gmail threads can contain large inline parts (including attachments\n    transmitted as base64). Only decode text/plain parts and skip anything\n    that breaches the safety threshold to protect against OOMs.\n    \"\"\"\n\n    message_body_chunks: list[str] = []\n    stack = [payload]\n\n    while stack:\n        part = stack.pop()\n        if not part:\n            continue\n\n        children = part.get(\"parts\", [])\n        stack.extend(reversed(children))\n\n        mime_type = part.get(\"mimeType\")\n        if mime_type != \"text/plain\":\n            continue\n\n        body = part.get(\"body\", {})\n        data = body.get(\"data\", \"\")\n\n        if not data:\n            continue\n\n        # base64 inflates storage by ~4/3; work with decoded size estimate\n        approx_decoded_size = (len(data) * 3) // 4\n        if approx_decoded_size > MAX_MESSAGE_BODY_BYTES:\n            logger.warning(\n                \"Skipping oversized Gmail message part (%s bytes > %s limit)\",\n                approx_decoded_size,\n                MAX_MESSAGE_BODY_BYTES,\n            )\n            continue\n\n        try:\n            text = urlsafe_b64decode(data).decode()\n        except (ValueError, UnicodeDecodeError) as error:\n            logger.warning(\"Failed to decode Gmail message part: %s\", error)\n            continue\n\n        message_body_chunks.append(text)\n\n    return \"\".join(message_body_chunks)\n\n\ndef _build_document_link(thread_id: str) -> str:\n    return f\"https://mail.google.com/mail/u/0/#inbox/{thread_id}\"\n\n\ndef message_to_section(message: Dict[str, Any]) -> tuple[TextSection, dict[str, str]]:\n    link = _build_document_link(message[\"id\"])\n\n    payload = message.get(\"payload\", {})\n    headers = payload.get(\"headers\", [])\n    metadata: dict[str, Any] = {}\n    for header in headers:\n        name = header.get(\"name\").lower()\n        value = header.get(\"value\")\n        if name in EMAIL_FIELDS:\n            metadata[name] = value\n        if name == \"subject\":\n            metadata[\"subject\"] = value\n        if name == \"date\":\n            metadata[\"updated_at\"] = value\n\n    if labels := message.get(\"labelIds\"):\n        metadata[\"labels\"] = labels\n\n    message_data = \"\"\n    for name, value in metadata.items():\n        # updated at isnt super useful for the llm\n        if name != \"updated_at\":\n            message_data += f\"{name}: {value}\\n\"\n\n    message_body_text: str = _get_message_body(payload)\n\n    return TextSection(link=link, text=message_body_text + message_data), metadata\n\n\ndef thread_to_document(\n    full_thread: Dict[str, Any], email_used_to_fetch_thread: str\n) -> Document | None:\n    all_messages = full_thread.get(\"messages\", [])\n    if not all_messages:\n        return None\n\n    sections = []\n    semantic_identifier = \"\"\n    updated_at = None\n    from_emails: dict[str, str | None] = {}\n    other_emails: dict[str, str | None] = {}\n    for message in all_messages:\n        section, message_metadata = message_to_section(message)\n        sections.append(section)\n\n        for name, value in message_metadata.items():\n            if name in EMAIL_FIELDS:\n                email, display_name = _clean_email_and_extract_name(value)\n                if name == \"from\":\n                    from_emails[email] = (\n                        display_name if not from_emails.get(email) else None\n                    )\n                else:\n                    other_emails[email] = (\n                        display_name if not other_emails.get(email) else None\n                    )\n\n        # If we haven't set the semantic identifier yet, set it to the subject of the first message\n        if not semantic_identifier:\n            semantic_identifier = message_metadata.get(\"subject\", \"\")\n\n        if message_metadata.get(\"updated_at\"):\n            updated_at = message_metadata.get(\"updated_at\")\n\n    updated_at_datetime = None\n    if updated_at:\n        updated_at_datetime = time_str_to_utc(updated_at)\n\n    id = full_thread.get(\"id\")\n    if not id:\n        raise ValueError(\"Thread ID is required\")\n\n    primary_owners = _get_owners_from_emails(from_emails)\n    secondary_owners = _get_owners_from_emails(other_emails)\n\n    # If emails have no subject, match Gmail's default \"no subject\"\n    # Search will break without a semantic identifier\n    if not semantic_identifier:\n        semantic_identifier = \"(no subject)\"\n\n    # NOTE: we're choosing to unconditionally include perm sync info\n    # (external_access) as it doesn't cost much space\n    return Document(\n        id=id,\n        semantic_identifier=semantic_identifier,\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.GMAIL,\n        # This is used to perform permission sync\n        primary_owners=primary_owners,\n        secondary_owners=secondary_owners,\n        doc_updated_at=updated_at_datetime,\n        # Not adding emails to metadata because it's already in the sections\n        metadata={},\n        external_access=ExternalAccess(\n            external_user_emails={email_used_to_fetch_thread},\n            external_user_group_ids=set(),\n            is_public=False,\n        ),\n    )\n\n\ndef _full_thread_from_id(\n    thread_id: str,\n    user_email: str,\n    gmail_service: GmailService,\n) -> Document | ConnectorFailure | None:\n    try:\n        thread = next(\n            execute_single_retrieval(\n                retrieval_function=gmail_service.users().threads().get,\n                list_key=None,\n                userId=user_email,\n                fields=THREAD_FIELDS,\n                id=thread_id,\n                continue_on_404_or_403=True,\n            ),\n            None,\n        )\n        if thread is None:\n            raise ValueError(f\"Thread {thread_id} not found\")\n        return thread_to_document(thread, user_email)\n    except Exception as e:\n        return ConnectorFailure(\n            failed_document=DocumentFailure(\n                document_id=thread_id, document_link=_build_document_link(thread_id)\n            ),\n            failure_message=f\"Failed to retrieve thread {thread_id}\",\n            exception=e,\n        )\n\n\ndef _slim_thread_from_id(\n    thread_id: str,\n    user_email: str,\n    gmail_service: GmailService,  # noqa: ARG001\n) -> SlimDocument:\n    return SlimDocument(\n        id=thread_id,\n        external_access=ExternalAccess(\n            external_user_emails={user_email},\n            external_user_group_ids=set(),\n            is_public=False,\n        ),\n    )\n\n\nclass GmailCheckpoint(ConnectorCheckpoint):\n    user_emails: list[str] = []  # stack of user emails to process\n    page_token: str | None = None\n\n\nclass GmailConnector(\n    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GmailCheckpoint]\n):\n    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:\n        self.batch_size = batch_size\n\n        self._creds: OAuthCredentials | ServiceAccountCredentials | None = None\n        self._primary_admin_email: str | None = None\n\n    @property\n    def primary_admin_email(self) -> str:\n        if self._primary_admin_email is None:\n            raise RuntimeError(\n                \"Primary admin email missing, should not call this property before calling load_credentials\"\n            )\n        return self._primary_admin_email\n\n    @property\n    def google_domain(self) -> str:\n        if self._primary_admin_email is None:\n            raise RuntimeError(\n                \"Primary admin email missing, should not call this property before calling load_credentials\"\n            )\n        return self._primary_admin_email.split(\"@\")[-1]\n\n    @property\n    def creds(self) -> OAuthCredentials | ServiceAccountCredentials:\n        if self._creds is None:\n            raise RuntimeError(\n                \"Creds missing, should not call this property before calling load_credentials\"\n            )\n        return self._creds\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:\n        primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]\n        self._primary_admin_email = primary_admin_email\n\n        self._creds, new_creds_dict = get_google_creds(\n            credentials=credentials,\n            source=DocumentSource.GMAIL,\n        )\n        return new_creds_dict\n\n    def _get_all_user_emails(self) -> list[str]:\n        \"\"\"\n        List all user emails if we are on a Google Workspace domain.\n        If the domain is gmail.com, or if we attempt to call the Admin SDK and\n        get a 404 or 403, fall back to using the single user.\n        A 404 indicates a personal Gmail account with no Workspace domain.\n        A 403 indicates insufficient permissions (e.g., OAuth user without admin privileges).\n        \"\"\"\n\n        try:\n            admin_service = get_admin_service(self.creds, self.primary_admin_email)\n            emails = []\n            for user in execute_paginated_retrieval(\n                retrieval_function=admin_service.users().list,\n                list_key=\"users\",\n                fields=USER_FIELDS,\n                domain=self.google_domain,\n            ):\n                if email := user.get(\"primaryEmail\"):\n                    emails.append(email)\n            return emails\n\n        except HttpError as e:\n            if e.resp.status == 404:\n                logger.warning(\n                    \"Received 404 from Admin SDK; this may indicate a personal Gmail account \"\n                    \"with no Workspace domain. Falling back to single user.\"\n                )\n                return [self.primary_admin_email]\n            elif e.resp.status == 403:\n                logger.warning(\n                    \"Received 403 from Admin SDK; this may indicate insufficient permissions \"\n                    \"(e.g., OAuth user without admin privileges or service account without \"\n                    \"domain-wide delegation). Falling back to single user.\"\n                )\n                return [self.primary_admin_email]\n            raise\n\n    def _fetch_threads_impl(\n        self,\n        user_email: str,\n        time_range_start: SecondsSinceUnixEpoch | None = None,\n        time_range_end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n        page_token: str | None = None,\n        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005\n        is_slim: bool = False,\n    ) -> Iterator[Document | ConnectorFailure] | GenerateSlimDocumentOutput:\n        query = _build_time_range_query(time_range_start, time_range_end)\n        slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n        logger.info(\n            f\"Fetching {'slim' if is_slim else 'full'} threads for user: {user_email}\"\n        )\n        gmail_service = get_gmail_service(self.creds, user_email)\n        try:\n            for thread in execute_paginated_retrieval_with_max_pages(\n                max_num_pages=PAGES_PER_CHECKPOINT,\n                retrieval_function=gmail_service.users().threads().list,\n                list_key=\"threads\",\n                userId=user_email,\n                fields=THREAD_LIST_FIELDS,\n                q=query,\n                continue_on_404_or_403=True,\n                **({PAGE_TOKEN_KEY: page_token} if page_token else {}),\n            ):\n                # if a page token is returned, set it and leave the function\n                if isinstance(thread, str):\n                    set_page_token(thread)\n                    return\n                if is_slim:\n                    slim_doc_batch.append(\n                        SlimDocument(\n                            id=thread[\"id\"],\n                            external_access=ExternalAccess(\n                                external_user_emails={user_email},\n                                external_user_group_ids=set(),\n                                is_public=False,\n                            ),\n                        )\n                    )\n                    if len(slim_doc_batch) >= SLIM_BATCH_SIZE:\n                        yield slim_doc_batch\n                        slim_doc_batch = []\n                else:\n                    result = _full_thread_from_id(\n                        thread[\"id\"], user_email, gmail_service\n                    )\n                    if result is not None:\n                        yield result\n                if callback:\n                    tag = (\n                        \"retrieve_all_slim_docs_perm_sync\"\n                        if is_slim\n                        else \"gmail_retrieve_all_docs\"\n                    )\n                    if callback.should_stop():\n                        raise RuntimeError(f\"{tag}: Stop signal detected\")\n\n                    callback.progress(tag, 1)\n            if slim_doc_batch:\n                yield slim_doc_batch\n\n            # done with user\n            set_page_token(None)\n        except HttpError as e:\n            if _is_mail_service_disabled_error(e):\n                logger.warning(\n                    \"Skipping Gmail sync for %s because the mailbox is disabled.\",\n                    user_email,\n                )\n                return\n            raise\n\n    def _fetch_threads(\n        self,\n        user_email: str,\n        page_token: str | None = None,\n        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005\n        time_range_start: SecondsSinceUnixEpoch | None = None,\n        time_range_end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> Iterator[Document | ConnectorFailure]:\n        yield from cast(\n            Iterator[Document | ConnectorFailure],\n            self._fetch_threads_impl(\n                user_email,\n                time_range_start,\n                time_range_end,\n                callback,\n                page_token,\n                set_page_token,\n                False,\n            ),\n        )\n\n    def _fetch_slim_threads(\n        self,\n        user_email: str,\n        page_token: str | None = None,\n        set_page_token: Callable[[str | None], None] = lambda x: None,  # noqa: ARG005\n        time_range_start: SecondsSinceUnixEpoch | None = None,\n        time_range_end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        yield from cast(\n            GenerateSlimDocumentOutput,\n            self._fetch_threads_impl(\n                user_email,\n                time_range_start,\n                time_range_end,\n                callback,\n                page_token,\n                set_page_token,\n                True,\n            ),\n        )\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GmailCheckpoint,\n    ) -> CheckpointOutput[GmailCheckpoint]:\n        if not checkpoint.user_emails:\n            checkpoint.user_emails = self._get_all_user_emails()\n        try:\n\n            def set_page_token(page_token: str | None) -> None:\n                checkpoint.page_token = page_token\n\n            yield from self._fetch_threads(\n                checkpoint.user_emails[-1],\n                checkpoint.page_token,\n                set_page_token,\n                start,\n                end,\n                callback=None,\n            )\n            if checkpoint.page_token is None:\n                # we're done with this user\n                checkpoint.user_emails.pop()\n\n            if len(checkpoint.user_emails) == 0:\n                checkpoint.has_more = False\n            return checkpoint\n        except Exception as e:\n            if MISSING_SCOPES_ERROR_STR in str(e):\n                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e\n            raise e\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GmailCheckpoint,\n    ) -> CheckpointOutput[GmailCheckpoint]:\n        return self._load_from_checkpoint(\n            start=start,\n            end=end,\n            checkpoint=checkpoint,\n        )\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GmailCheckpoint,\n    ) -> CheckpointOutput[GmailCheckpoint]:\n        # NOTE: we're choosing to unconditionally include perm sync info\n        # (external_access) as it doesn't cost much space\n        return self._load_from_checkpoint(\n            start=start,\n            end=end,\n            checkpoint=checkpoint,\n        )\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        try:\n            pt_dict: dict[str, str | None] = {PAGE_TOKEN_KEY: None}\n\n            def set_page_token(page_token: str | None) -> None:\n                pt_dict[PAGE_TOKEN_KEY] = page_token\n\n            for user_email in self._get_all_user_emails():\n                yield from self._fetch_slim_threads(\n                    user_email,\n                    pt_dict[PAGE_TOKEN_KEY],\n                    set_page_token,\n                    start,\n                    end,\n                    callback=callback,\n                )\n        except Exception as e:\n            if MISSING_SCOPES_ERROR_STR in str(e):\n                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e\n            raise e\n\n    def build_dummy_checkpoint(self) -> GmailCheckpoint:\n        return GmailCheckpoint(has_more=True)\n\n    def validate_checkpoint_json(self, checkpoint_json: str) -> GmailCheckpoint:\n        return GmailCheckpoint.model_validate_json(checkpoint_json)\n\n\nif __name__ == \"__main__\":\n    pass\n"
  },
  {
    "path": "backend/onyx/connectors/gong/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/gong/connector.py",
    "content": "import base64\nimport time\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom requests.adapters import HTTPAdapter\nfrom urllib3.util import Retry\n\nfrom onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE\nfrom onyx.configs.app_configs import GONG_CONNECTOR_START_TIME\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass GongConnector(LoadConnector, PollConnector):\n    BASE_URL = \"https://api.gong.io\"\n    MAX_CALL_DETAILS_ATTEMPTS = 6\n    CALL_DETAILS_DELAY = 30  # in seconds\n    # Gong API limit is 3 calls/sec — stay safely under it\n    MIN_REQUEST_INTERVAL = 0.5  # seconds between requests\n\n    def __init__(\n        self,\n        workspaces: list[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        continue_on_fail: bool = CONTINUE_ON_CONNECTOR_FAILURE,\n        hide_user_info: bool = False,\n    ) -> None:\n        self.workspaces = workspaces\n        self.batch_size: int = batch_size\n        self.continue_on_fail = continue_on_fail\n        self.auth_token_basic: str | None = None\n        self.hide_user_info = hide_user_info\n        self._last_request_time: float = 0.0\n\n        # urllib3 Retry already respects the Retry-After header by default\n        # (respect_retry_after_header=True), so on 429 it will sleep for the\n        # duration Gong specifies before retrying.\n        retry_strategy = Retry(\n            total=10,\n            backoff_factor=2,\n            status_forcelist=[429, 500, 502, 503, 504],\n        )\n\n        session = requests.Session()\n        session.mount(GongConnector.BASE_URL, HTTPAdapter(max_retries=retry_strategy))\n        self._session = session\n\n    @staticmethod\n    def make_url(endpoint: str) -> str:\n        url = f\"{GongConnector.BASE_URL}{endpoint}\"\n        return url\n\n    def _throttled_request(\n        self, method: str, url: str, **kwargs: Any\n    ) -> requests.Response:\n        \"\"\"Rate-limited request wrapper. Enforces MIN_REQUEST_INTERVAL between\n        calls to stay under Gong's 3 calls/sec limit and avoid triggering 429s.\"\"\"\n        now = time.monotonic()\n        elapsed = now - self._last_request_time\n        if elapsed < self.MIN_REQUEST_INTERVAL:\n            time.sleep(self.MIN_REQUEST_INTERVAL - elapsed)\n\n        response = self._session.request(method, url, **kwargs)\n        self._last_request_time = time.monotonic()\n        return response\n\n    def _get_workspace_id_map(self) -> dict[str, str]:\n        response = self._throttled_request(\n            \"GET\", GongConnector.make_url(\"/v2/workspaces\")\n        )\n        response.raise_for_status()\n\n        workspaces_details = response.json().get(\"workspaces\")\n        name_id_map = {\n            workspace[\"name\"]: workspace[\"id\"] for workspace in workspaces_details\n        }\n        id_id_map = {\n            workspace[\"id\"]: workspace[\"id\"] for workspace in workspaces_details\n        }\n        # In very rare case, if a workspace is given a name which is the id of another workspace,\n        # Then the user input is treated as the name\n        return {**id_id_map, **name_id_map}\n\n    def _get_transcript_batches(\n        self, start_datetime: str | None = None, end_datetime: str | None = None\n    ) -> Generator[list[dict[str, Any]], None, None]:\n        body: dict[str, dict] = {\"filter\": {}}\n        if start_datetime:\n            body[\"filter\"][\"fromDateTime\"] = start_datetime\n        if end_datetime:\n            body[\"filter\"][\"toDateTime\"] = end_datetime\n\n        # The batch_ids in the previous method appears to be batches of call_ids to process\n        # In this method, we will retrieve transcripts for them in batches.\n        transcripts: list[dict[str, Any]] = []\n        workspace_list = self.workspaces or [None]  # type: ignore\n        workspace_map = self._get_workspace_id_map() if self.workspaces else {}\n\n        for workspace in workspace_list:\n            if workspace:\n                logger.info(f\"Updating Gong workspace: {workspace}\")\n                workspace_id = workspace_map.get(workspace)\n                if not workspace_id:\n                    logger.error(f\"Invalid Gong workspace: {workspace}\")\n                    if not self.continue_on_fail:\n                        raise ValueError(f\"Invalid workspace: {workspace}\")\n                    continue\n                body[\"filter\"][\"workspaceId\"] = workspace_id\n            else:\n                if \"workspaceId\" in body[\"filter\"]:\n                    del body[\"filter\"][\"workspaceId\"]\n\n            while True:\n                response = self._throttled_request(\n                    \"POST\", GongConnector.make_url(\"/v2/calls/transcript\"), json=body\n                )\n                # If no calls in the range, just break out\n                if response.status_code == 404:\n                    break\n\n                try:\n                    response.raise_for_status()\n                except Exception:\n                    logger.error(f\"Error fetching transcripts: {response.text}\")\n                    raise\n\n                data = response.json()\n                call_transcripts = data.get(\"callTranscripts\", [])\n                transcripts.extend(call_transcripts)\n\n                while len(transcripts) >= self.batch_size:\n                    yield transcripts[: self.batch_size]\n                    transcripts = transcripts[self.batch_size :]\n\n                cursor = data.get(\"records\", {}).get(\"cursor\")\n                if cursor:\n                    body[\"cursor\"] = cursor\n                else:\n                    break\n\n        if transcripts:\n            yield transcripts\n\n    def _get_call_details_by_ids(self, call_ids: list[str]) -> dict:\n        body = {\n            \"filter\": {\"callIds\": call_ids},\n            \"contentSelector\": {\"exposedFields\": {\"parties\": True}},\n        }\n\n        response = self._throttled_request(\n            \"POST\", GongConnector.make_url(\"/v2/calls/extensive\"), json=body\n        )\n        response.raise_for_status()\n\n        calls = response.json().get(\"calls\")\n        call_to_metadata = {}\n        for call in calls:\n            call_to_metadata[call[\"metaData\"][\"id\"]] = call\n\n        return call_to_metadata\n\n    @staticmethod\n    def _parse_parties(parties: list[dict]) -> dict[str, str]:\n        id_mapping = {}\n        for party in parties:\n            name = party.get(\"name\")\n            email = party.get(\"emailAddress\")\n\n            if name and email:\n                full_identifier = f\"{name} ({email})\"\n            elif name:\n                full_identifier = name\n            elif email:\n                full_identifier = email\n            else:\n                full_identifier = \"Unknown\"\n\n            id_mapping[party[\"speakerId\"]] = full_identifier\n\n        return id_mapping\n\n    def _fetch_calls(\n        self, start_datetime: str | None = None, end_datetime: str | None = None\n    ) -> GenerateDocumentsOutput:\n        num_calls = 0\n\n        for transcript_batch in self._get_transcript_batches(\n            start_datetime, end_datetime\n        ):\n            doc_batch: list[Document | HierarchyNode] = []\n\n            transcript_call_ids = cast(\n                list[str],\n                [t.get(\"callId\") for t in transcript_batch if t.get(\"callId\")],\n            )\n\n            call_details_map: dict[str, Any] = {}\n\n            # There's a likely race condition in the API where a transcript will have a\n            # call id but the call to v2/calls/extensive will not return all of the id's\n            # retry with exponential backoff has been observed to mitigate this\n            # in ~2 minutes. After max attempts, proceed with whatever we have —\n            # the per-call loop below will skip missing IDs gracefully.\n            current_attempt = 0\n            while True:\n                current_attempt += 1\n                call_details_map = self._get_call_details_by_ids(transcript_call_ids)\n                if set(transcript_call_ids) == set(call_details_map.keys()):\n                    # we got all the id's we were expecting ... break and continue\n                    break\n\n                # we are missing some id's. Log and retry with exponential backoff\n                missing_call_ids = set(transcript_call_ids) - set(\n                    call_details_map.keys()\n                )\n                logger.warning(\n                    f\"_get_call_details_by_ids is missing call id's: \"\n                    f\"current_attempt={current_attempt} \"\n                    f\"missing_call_ids={missing_call_ids}\"\n                )\n                if current_attempt >= self.MAX_CALL_DETAILS_ATTEMPTS:\n                    logger.error(\n                        f\"Giving up on missing call id's after \"\n                        f\"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: \"\n                        f\"missing_call_ids={missing_call_ids} — \"\n                        f\"proceeding with {len(call_details_map)} of \"\n                        f\"{len(transcript_call_ids)} calls\"\n                    )\n                    break\n\n                wait_seconds = self.CALL_DETAILS_DELAY * pow(2, current_attempt - 1)\n                logger.warning(\n                    f\"_get_call_details_by_ids waiting to retry: \"\n                    f\"wait={wait_seconds}s \"\n                    f\"current_attempt={current_attempt} \"\n                    f\"next_attempt={current_attempt + 1} \"\n                    f\"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}\"\n                )\n                time.sleep(wait_seconds)\n\n            # now we can iterate per call/transcript\n            for transcript in transcript_batch:\n                call_id = transcript.get(\"callId\")\n\n                if not call_id or call_id not in call_details_map:\n                    # NOTE(rkuo): seeing odd behavior where call_ids from the transcript\n                    # don't have call details. adding error debugging logs to trace.\n                    logger.error(\n                        f\"Couldn't get call information for Call ID: {call_id}\"\n                    )\n                    if call_id:\n                        logger.error(\n                            f\"Call debug info: call_id={call_id} \"\n                            f\"call_ids={transcript_call_ids} \"\n                            f\"call_details_map={call_details_map.keys()}\"\n                        )\n                    if not self.continue_on_fail:\n                        raise RuntimeError(\n                            f\"Couldn't get call information for Call ID: {call_id}\"\n                        )\n                    continue\n\n                call_details = call_details_map[call_id]\n                call_metadata = call_details[\"metaData\"]\n\n                call_time_str = call_metadata[\"started\"]\n                call_title = call_metadata[\"title\"]\n                logger.info(\n                    f\"{num_calls + 1}: Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}\"\n                )\n\n                call_parties = cast(list[dict] | None, call_details.get(\"parties\"))\n                if call_parties is None:\n                    logger.error(f\"Couldn't get parties for Call ID: {call_id}\")\n                    call_parties = []\n\n                id_to_name_map = self._parse_parties(call_parties)\n\n                # Keeping a separate dict here in case the parties info is incomplete\n                speaker_to_name: dict[str, str] = {}\n\n                transcript_text = \"\"\n                call_purpose = call_metadata[\"purpose\"]\n                if call_purpose:\n                    transcript_text += f\"Call Description: {call_purpose}\\n\\n\"\n\n                contents = transcript[\"transcript\"]\n                for segment in contents:\n                    speaker_id = segment.get(\"speakerId\", \"\")\n                    if speaker_id not in speaker_to_name:\n                        if self.hide_user_info:\n                            speaker_to_name[speaker_id] = (\n                                f\"User {len(speaker_to_name) + 1}\"\n                            )\n                        else:\n                            speaker_to_name[speaker_id] = id_to_name_map.get(\n                                speaker_id, \"Unknown\"\n                            )\n\n                    speaker_name = speaker_to_name[speaker_id]\n\n                    sentences = segment.get(\"sentences\", {})\n                    monolog = \" \".join(\n                        [sentence.get(\"text\", \"\") for sentence in sentences]\n                    )\n                    transcript_text += f\"{speaker_name}: {monolog}\\n\\n\"\n\n                metadata = {}\n                if call_metadata.get(\"system\"):\n                    metadata[\"client\"] = call_metadata.get(\"system\")\n                # TODO calls have a clientUniqueId field, can pull that in later\n\n                doc_batch.append(\n                    Document(\n                        id=call_id,\n                        sections=[\n                            TextSection(link=call_metadata[\"url\"], text=transcript_text)\n                        ],\n                        source=DocumentSource.GONG,\n                        # Should not ever be Untitled as a call cannot be made without a Title\n                        semantic_identifier=call_title or \"Untitled\",\n                        doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(\n                            timezone.utc\n                        ),\n                        metadata={\"client\": call_metadata.get(\"system\")},\n                    )\n                )\n\n                num_calls += 1\n\n            yield doc_batch\n\n        logger.info(f\"_fetch_calls finished: num_calls={num_calls}\")\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        combined = (\n            f\"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}\"\n        )\n        self.auth_token_basic = base64.b64encode(combined.encode(\"utf-8\")).decode(\n            \"utf-8\"\n        )\n\n        if self.auth_token_basic is None:\n            raise ConnectorMissingCredentialError(\"Gong\")\n\n        self._session.headers.update(\n            {\"Authorization\": f\"Basic {self.auth_token_basic}\"}\n        )\n        return None\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._fetch_calls()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        # if this env variable is set, don't start from a timestamp before the specified\n        # start time\n        # TODO: remove this once this is globally available\n        if GONG_CONNECTOR_START_TIME:\n            special_start_datetime = datetime.fromisoformat(GONG_CONNECTOR_START_TIME)\n            special_start_datetime = special_start_datetime.replace(tzinfo=timezone.utc)\n        else:\n            special_start_datetime = datetime.fromtimestamp(0, tz=timezone.utc)\n\n        # don't let the special start dt be past the end time, this causes issues when\n        # the Gong API (`filter.fromDateTime: must be before toDateTime`)\n        special_start_datetime = min(special_start_datetime, end_datetime)\n\n        start_datetime = max(\n            datetime.fromtimestamp(start, tz=timezone.utc), special_start_datetime\n        )\n\n        # Because these are meeting start times, the meeting needs to end and be processed\n        # so adding a 1 day buffer and fetching by default till current time\n        start_one_day_offset = start_datetime - timedelta(days=1)\n        start_time = start_one_day_offset.isoformat()\n\n        end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()\n\n        logger.info(f\"Fetching Gong calls between {start_time} and {end_time}\")\n        return self._fetch_calls(start_time, end_time)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = GongConnector()\n    connector.load_credentials(\n        {\n            \"gong_access_key\": os.environ[\"GONG_ACCESS_KEY\"],\n            \"gong_access_key_secret\": os.environ[\"GONG_ACCESS_KEY_SECRET\"],\n        }\n    )\n\n    latest_docs = connector.load_from_state()\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/google_drive/connector.py",
    "content": "import copy\nimport json\nimport os\nimport sys\nimport threading\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\nfrom typing import Protocol\nfrom urllib.parse import parse_qs\nfrom urllib.parse import urlparse\nfrom urllib.parse import urlunparse\n\nfrom google.auth.exceptions import RefreshError\nfrom google.oauth2.credentials import Credentials as OAuthCredentials\nfrom google.oauth2.service_account import Credentials as ServiceAccountCredentials\nfrom googleapiclient.errors import HttpError  # type: ignore\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import MAX_DRIVE_WORKERS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.google_drive.doc_conversion import build_slim_document\nfrom onyx.connectors.google_drive.doc_conversion import (\n    convert_drive_item_to_document,\n)\nfrom onyx.connectors.google_drive.doc_conversion import onyx_document_id_from_drive_file\nfrom onyx.connectors.google_drive.doc_conversion import PermissionSyncContext\nfrom onyx.connectors.google_drive.file_retrieval import crawl_folders_for_files\nfrom onyx.connectors.google_drive.file_retrieval import DriveFileFieldType\nfrom onyx.connectors.google_drive.file_retrieval import get_all_files_for_oauth\nfrom onyx.connectors.google_drive.file_retrieval import (\n    get_all_files_in_my_drive_and_shared,\n)\nfrom onyx.connectors.google_drive.file_retrieval import get_external_access_for_folder\nfrom onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive\nfrom onyx.connectors.google_drive.file_retrieval import get_folder_metadata\nfrom onyx.connectors.google_drive.file_retrieval import get_root_folder_id\nfrom onyx.connectors.google_drive.file_retrieval import get_shared_drive_name\nfrom onyx.connectors.google_drive.file_retrieval import has_link_only_permission\nfrom onyx.connectors.google_drive.models import DriveRetrievalStage\nfrom onyx.connectors.google_drive.models import GoogleDriveCheckpoint\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.connectors.google_drive.models import RetrievedDriveFile\nfrom onyx.connectors.google_drive.models import StageCompletion\nfrom onyx.connectors.google_utils.google_auth import get_google_creds\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.google_utils.google_utils import get_file_owners\nfrom onyx.connectors.google_utils.google_utils import GoogleFields\nfrom onyx.connectors.google_utils.resources import get_admin_service\nfrom onyx.connectors.google_utils.resources import get_drive_service\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR\nfrom onyx.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS\nfrom onyx.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE\nfrom onyx.connectors.google_utils.shared_constants import USER_FIELDS\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import NormalizationResult\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import EntityFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\nfrom onyx.utils.threadpool_concurrency import parallel_yield\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.threadpool_concurrency import ThreadSafeDict\nfrom onyx.utils.threadpool_concurrency import ThreadSafeSet\n\nlogger = setup_logger()\n# TODO: Improve this by using the batch utility: https://googleapis.github.io/google-api-python-client/docs/batch.html\n# All file retrievals could be batched and made at once\n\nBATCHES_PER_CHECKPOINT = 1\n\nDRIVE_BATCH_SIZE = 80\n\nSHARED_DRIVE_PAGES_PER_CHECKPOINT = 2\nMY_DRIVE_PAGES_PER_CHECKPOINT = 2\nOAUTH_PAGES_PER_CHECKPOINT = 2\nFOLDERS_PER_CHECKPOINT = 1\n\n\ndef _extract_str_list_from_comma_str(string: str | None) -> list[str]:\n    if not string:\n        return []\n    return [s.strip() for s in string.split(\",\") if s.strip()]\n\n\ndef _extract_ids_from_urls(urls: list[str]) -> list[str]:\n    return [urlparse(url).path.strip(\"/\").split(\"/\")[-1] for url in urls]\n\n\ndef _clean_requested_drive_ids(\n    requested_drive_ids: set[str],\n    requested_folder_ids: set[str],\n    all_drive_ids_available: set[str],\n) -> tuple[list[str], list[str]]:\n    invalid_requested_drive_ids = requested_drive_ids - all_drive_ids_available\n    filtered_folder_ids = requested_folder_ids - all_drive_ids_available\n    if invalid_requested_drive_ids:\n        logger.warning(\n            f\"Some shared drive IDs were not found. IDs: {invalid_requested_drive_ids}\"\n        )\n        logger.warning(\"Checking for folder access instead...\")\n        filtered_folder_ids.update(invalid_requested_drive_ids)\n\n    valid_requested_drive_ids = requested_drive_ids - invalid_requested_drive_ids\n    return sorted(valid_requested_drive_ids), sorted(filtered_folder_ids)\n\n\ndef _get_parent_id_from_file(drive_file: GoogleDriveFileType) -> str | None:\n    \"\"\"Extract the first parent ID from a drive file.\"\"\"\n    parents = drive_file.get(\"parents\")\n    if parents and len(parents) > 0:\n        return parents[0]  # files have a unique parent\n    return None\n\n\ndef _is_shared_drive_root(folder: GoogleDriveFileType) -> bool:\n    \"\"\"\n    Check if a folder is a verified shared drive root.\n\n    For shared drives, we can verify using driveId:\n    - If driveId is set and folder_id == driveId AND no parents, it's the shared drive root\n    - If driveId is set but folder_id != driveId with empty parents, it's a permission issue\n\n    Returns True only for verified shared drive roots.\n    \"\"\"\n    folder_id = folder.get(\"id\")\n    drive_id = folder.get(\"driveId\")\n    parents = folder.get(\"parents\", [])\n\n    # Must have no parents to be a root\n    if parents:\n        return False\n\n    # For shared drive content, the root has id == driveId\n    return bool(drive_id and folder_id == drive_id)\n\n\ndef _public_access() -> ExternalAccess:\n    return ExternalAccess(\n        external_user_emails=set(),\n        external_user_group_ids=set(),\n        is_public=True,\n    )\n\n\nclass CredentialedRetrievalMethod(Protocol):\n    def __call__(\n        self,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]: ...\n\n\ndef add_retrieval_info(\n    drive_files: Iterator[GoogleDriveFileType | str],\n    user_email: str,\n    completion_stage: DriveRetrievalStage,\n    parent_id: str | None = None,\n) -> Iterator[RetrievedDriveFile | str]:\n    for file in drive_files:\n        if isinstance(file, str):\n            yield file\n            continue\n        yield RetrievedDriveFile(\n            drive_file=file,\n            user_email=user_email,\n            parent_id=parent_id,\n            completion_stage=completion_stage,\n        )\n\n\nclass DriveIdStatus(Enum):\n    AVAILABLE = \"available\"\n    IN_PROGRESS = \"in_progress\"\n    FINISHED = \"finished\"\n\n\nclass GoogleDriveConnector(\n    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]\n):\n    def __init__(\n        self,\n        include_shared_drives: bool = False,\n        include_my_drives: bool = False,\n        include_files_shared_with_me: bool = False,\n        shared_drive_urls: str | None = None,\n        my_drive_emails: str | None = None,\n        shared_folder_urls: str | None = None,\n        specific_user_emails: str | None = None,\n        exclude_domain_link_only: bool = False,\n        batch_size: int = INDEX_BATCH_SIZE,  # noqa: ARG002\n        # OLD PARAMETERS\n        folder_paths: list[str] | None = None,\n        include_shared: bool | None = None,\n        follow_shortcuts: bool | None = None,\n        only_org_public: bool | None = None,\n        continue_on_failure: bool | None = None,\n    ) -> None:\n        # Check for old input parameters\n        if folder_paths is not None:\n            logger.warning(\n                \"The 'folder_paths' parameter is deprecated. Use 'shared_folder_urls' instead.\"\n            )\n        if include_shared is not None:\n            logger.warning(\n                \"The 'include_shared' parameter is deprecated. Use 'include_files_shared_with_me' instead.\"\n            )\n        if follow_shortcuts is not None:\n            logger.warning(\"The 'follow_shortcuts' parameter is deprecated.\")\n        if only_org_public is not None:\n            logger.warning(\"The 'only_org_public' parameter is deprecated.\")\n        if continue_on_failure is not None:\n            logger.warning(\"The 'continue_on_failure' parameter is deprecated.\")\n\n        if not any(\n            (\n                include_shared_drives,\n                include_my_drives,\n                include_files_shared_with_me,\n                shared_folder_urls,\n                my_drive_emails,\n                shared_drive_urls,\n            )\n        ):\n            raise ConnectorValidationError(\n                \"Nothing to index. Please specify at least one of the following: \"\n                \"include_shared_drives, include_my_drives, include_files_shared_with_me, \"\n                \"shared_folder_urls, or my_drive_emails\"\n            )\n\n        specific_requests_made = False\n        if bool(shared_drive_urls) or bool(my_drive_emails) or bool(shared_folder_urls):\n            specific_requests_made = True\n        self.specific_requests_made = specific_requests_made\n\n        # NOTE: potentially modified in load_credentials if using service account\n        self.include_files_shared_with_me = (\n            False if specific_requests_made else include_files_shared_with_me\n        )\n        self.include_my_drives = False if specific_requests_made else include_my_drives\n        self.include_shared_drives = (\n            False if specific_requests_made else include_shared_drives\n        )\n\n        shared_drive_url_list = _extract_str_list_from_comma_str(shared_drive_urls)\n        self._requested_shared_drive_ids = set(\n            _extract_ids_from_urls(shared_drive_url_list)\n        )\n\n        self._requested_my_drive_emails = set(\n            _extract_str_list_from_comma_str(my_drive_emails)\n        )\n\n        shared_folder_url_list = _extract_str_list_from_comma_str(shared_folder_urls)\n        self._requested_folder_ids = set(_extract_ids_from_urls(shared_folder_url_list))\n        self._specific_user_emails = _extract_str_list_from_comma_str(\n            specific_user_emails\n        )\n        self.exclude_domain_link_only = exclude_domain_link_only\n\n        self._primary_admin_email: str | None = None\n\n        self._creds: OAuthCredentials | ServiceAccountCredentials | None = None\n        self._creds_dict: dict[str, Any] | None = None\n\n        # ids of folders and shared drives that have been traversed\n        self._retrieved_folder_and_drive_ids: set[str] = set()\n\n        # Cache of known My Drive root IDs (user_email -> root_id)\n        # Used to verify if a folder with no parents is actually a My Drive root\n        # Thread-safe because multiple impersonation threads access this concurrently\n        self._my_drive_root_id_cache: ThreadSafeDict[str, str] = ThreadSafeDict()\n\n        self.allow_images = False\n\n        self.size_threshold = GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD\n\n    def set_allow_images(self, value: bool) -> None:\n        self.allow_images = value\n\n    @property\n    def primary_admin_email(self) -> str:\n        if self._primary_admin_email is None:\n            raise RuntimeError(\n                \"Primary admin email missing, should not call this property before calling load_credentials\"\n            )\n        return self._primary_admin_email\n\n    @property\n    def google_domain(self) -> str:\n        if self._primary_admin_email is None:\n            raise RuntimeError(\n                \"Primary admin email missing, should not call this property before calling load_credentials\"\n            )\n        return self._primary_admin_email.split(\"@\")[-1]\n\n    @property\n    def creds(self) -> OAuthCredentials | ServiceAccountCredentials:\n        if self._creds is None:\n            raise RuntimeError(\n                \"Creds missing, should not call this property before calling load_credentials\"\n            )\n        return self._creds\n\n    @classmethod\n    @override\n    def normalize_url(cls, url: str) -> NormalizationResult:\n        \"\"\"Normalize a Google Drive URL to match the canonical Document.id format.\n\n        Reuses the connector's existing document ID creation logic from\n        onyx_document_id_from_drive_file.\n        \"\"\"\n        parsed = urlparse(url)\n        netloc = parsed.netloc.lower()\n\n        if not (\n            netloc.startswith(\"docs.google.com\")\n            or netloc.startswith(\"drive.google.com\")\n        ):\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Handle ?id= query parameter case\n        query_params = parse_qs(parsed.query)\n        doc_id = query_params.get(\"id\", [None])[0]\n        if doc_id:\n            scheme = parsed.scheme or \"https\"\n            netloc = \"drive.google.com\"\n            path = f\"/file/d/{doc_id}\"\n            params = \"\"\n            query = \"\"\n            fragment = \"\"\n            normalized = urlunparse(\n                (scheme, netloc, path, params, query, fragment)\n            ).rstrip(\"/\")\n            return NormalizationResult(normalized_url=normalized, use_default=False)\n\n        # Extract file ID and use connector's function\n        path_parts = parsed.path.split(\"/\")\n        file_id = None\n        for i, part in enumerate(path_parts):\n            if part == \"d\" and i + 1 < len(path_parts):\n                file_id = path_parts[i + 1]\n                break\n\n        if not file_id:\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Create minimal file object for connector function\n        file_obj = {\"webViewLink\": url, \"id\": file_id}\n        normalized = onyx_document_id_from_drive_file(file_obj).rstrip(\"/\")\n        return NormalizationResult(normalized_url=normalized, use_default=False)\n\n    # TODO: ensure returned new_creds_dict is actually persisted when this is called?\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:\n        try:\n            self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]\n        except KeyError:\n            raise ValueError(\"Credentials json missing primary admin key\")\n\n        self._creds, new_creds_dict = get_google_creds(\n            credentials=credentials,\n            source=DocumentSource.GOOGLE_DRIVE,\n        )\n\n        # Service account connectors don't have a specific setting determining whether\n        # to include \"shared with me\" for each user, so we default to true unless the connector\n        # is in specific folders/drives mode. Note that shared files are only picked up during\n        # the My Drive stage, so this does nothing if the connector is set to only index shared drives.\n        if (\n            isinstance(self._creds, ServiceAccountCredentials)\n            and not self.specific_requests_made\n        ):\n            self.include_files_shared_with_me = True\n\n        self._creds_dict = new_creds_dict\n\n        return new_creds_dict\n\n    def _update_traversed_parent_ids(self, folder_id: str) -> None:\n        self._retrieved_folder_and_drive_ids.add(folder_id)\n\n    def _get_all_user_emails(self) -> list[str]:\n        if self._specific_user_emails:\n            return self._specific_user_emails\n\n        # Start with primary admin email\n        user_emails = [self.primary_admin_email]\n\n        # Only fetch additional users if using service account\n        if isinstance(self.creds, OAuthCredentials):\n            return user_emails\n\n        admin_service = get_admin_service(\n            creds=self.creds,\n            user_email=self.primary_admin_email,\n        )\n\n        # Get admins first since they're more likely to have access to most files\n        for is_admin in [True, False]:\n            query = \"isAdmin=true\" if is_admin else \"isAdmin=false\"\n            for user in execute_paginated_retrieval(\n                retrieval_function=admin_service.users().list,\n                list_key=\"users\",\n                fields=USER_FIELDS,\n                domain=self.google_domain,\n                query=query,\n            ):\n                if email := user.get(\"primaryEmail\"):\n                    if email not in user_emails:\n                        user_emails.append(email)\n        return user_emails\n\n    def _get_my_drive_root_id(self, user_email: str) -> str | None:\n        \"\"\"\n        Get the My Drive root folder ID for a user.\n\n        Uses a cache to avoid repeated API calls. Returns None if the user\n        doesn't have access to Drive APIs or the call fails.\n        \"\"\"\n        if user_email in self._my_drive_root_id_cache:\n            return self._my_drive_root_id_cache[user_email]\n\n        try:\n            drive_service = get_drive_service(self.creds, user_email)\n            root_id = get_root_folder_id(drive_service)\n            self._my_drive_root_id_cache[user_email] = root_id\n            return root_id\n        except Exception:\n            # User might not have access to Drive APIs\n            return None\n\n    def _is_my_drive_root(\n        self, folder: GoogleDriveFileType, retriever_email: str\n    ) -> bool:\n        \"\"\"\n        Check if a folder is a My Drive root.\n\n        For My Drive folders (no driveId), we verify by comparing the folder ID\n        to the actual My Drive root ID obtained via files().get(fileId='root').\n        \"\"\"\n        folder_id = folder.get(\"id\")\n        drive_id = folder.get(\"driveId\")\n        parents = folder.get(\"parents\", [])\n\n        # If there are parents, this is not a root\n        if parents:\n            return False\n\n        # If driveId is set, this is shared drive content, not My Drive\n        if drive_id:\n            return False\n\n        # Get the My Drive root ID for this user and compare\n        root_id = self._get_my_drive_root_id(retriever_email)\n        if root_id and folder_id == root_id:\n            return True\n\n        # Also check with admin in case the retriever doesn't have access\n        admin_root_id = self._get_my_drive_root_id(self.primary_admin_email)\n        if admin_root_id and folder_id == admin_root_id:\n            return True\n\n        return False\n\n    def _get_new_ancestors_for_files(\n        self,\n        files: list[RetrievedDriveFile],\n        seen_hierarchy_node_raw_ids: ThreadSafeSet[str],\n        fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str],\n        permission_sync_context: PermissionSyncContext | None = None,\n        add_prefix: bool = False,\n    ) -> list[HierarchyNode]:\n        \"\"\"\n        Get all NEW ancestor hierarchy nodes for a batch of files.\n\n        For each file, walks up the parent chain until reaching a root/drive\n        (terminal node with no parent). Returns HierarchyNode objects for all\n        new ancestors.\n\n        The function tracks two separate sets:\n        - seen_hierarchy_node_raw_ids: Nodes we've already yielded (to avoid duplicates)\n        - fully_walked_hierarchy_node_raw_ids: Nodes where we've successfully walked\n          to a terminal root. Only skip walking from a node if it's in this set.\n\n        This separation ensures that if User A can access folder C but not its parent B,\n        a later User B who has access to both can still complete the walk to the root.\n\n        Args:\n            files: List of retrieved drive files to get ancestors for\n            seen_hierarchy_node_raw_ids: Set of already-yielded node IDs (modified in place)\n            fully_walked_hierarchy_node_raw_ids: Set of node IDs where the walk to root\n                succeeded (modified in place)\n            permission_sync_context: If provided, permissions will be fetched for hierarchy nodes.\n                Contains google_domain and primary_admin_email needed for permission syncing.\n            add_prefix: When True, prefix group IDs with source type (for indexing path).\n                       When False (default), leave unprefixed (for permission sync path).\n\n        Returns:\n            List of HierarchyNode objects for new ancestors (ordered parent-first)\n        \"\"\"\n        service = get_drive_service(self.creds, self.primary_admin_email)\n        field_type = (\n            DriveFileFieldType.WITH_PERMISSIONS\n            if permission_sync_context\n            else DriveFileFieldType.STANDARD\n        )\n        new_nodes: list[HierarchyNode] = []\n\n        for file in files:\n            parent_id = _get_parent_id_from_file(file.drive_file)\n            if not parent_id:\n                continue\n\n            # Only skip if we've already successfully walked from this node to a root.\n            # Don't skip just because it's \"seen\" - a previous user may have failed\n            # to walk to the root, and this user might have better access.\n            if parent_id in fully_walked_hierarchy_node_raw_ids:\n                continue\n\n            # Walk up the parent chain\n            ancestors_to_add: list[HierarchyNode] = []\n            node_ids_in_walk: list[str] = []\n            current_id: str | None = parent_id\n            reached_terminal = False\n\n            while current_id:\n                node_ids_in_walk.append(current_id)\n\n                # If we hit a node that's already been fully walked, we know\n                # the path from here to root is complete\n                if current_id in fully_walked_hierarchy_node_raw_ids:\n                    reached_terminal = True\n                    break\n\n                # Fetch folder metadata\n                folder = self._get_folder_metadata(\n                    current_id, file.user_email, field_type\n                )\n                if not folder:\n                    # Can't access this folder - stop climbing\n                    # Don't mark as fully walked since we didn't reach root\n                    break\n\n                folder_parent_id = _get_parent_id_from_file(folder)\n\n                # Create the node BEFORE marking as seen to avoid a race condition where:\n                # 1. Thread A marks node as \"seen\"\n                # 2. Thread A fails to create node (e.g., API error in get_external_access)\n                # 3. Thread B sees node as \"already seen\" and skips it\n                # 4. Result: node is never yielded\n                #\n                # By creating first and then atomically checking/marking, we ensure that\n                # if creation fails, another thread can still try. If both succeed,\n                # only one will add to ancestors_to_add (the one that wins check_and_add).\n                if permission_sync_context:\n                    external_access = get_external_access_for_folder(\n                        folder,\n                        permission_sync_context.google_domain,\n                        service,\n                        add_prefix,\n                    )\n                else:\n                    external_access = _public_access()\n\n                node = HierarchyNode(\n                    raw_node_id=current_id,\n                    raw_parent_id=folder_parent_id,\n                    display_name=folder.get(\"name\", \"Unknown Folder\"),\n                    link=folder.get(\"webViewLink\"),\n                    node_type=HierarchyNodeType.FOLDER,\n                    external_access=external_access,\n                )\n\n                # Now atomically check and add - only append if we're the first thread\n                # to successfully create this node\n                already_seen = seen_hierarchy_node_raw_ids.check_and_add(current_id)\n                if not already_seen:\n                    ancestors_to_add.append(node)\n\n                # Check if this is a verified terminal node (actual root, not just\n                # empty parents due to permission limitations)\n                # Check shared drive root first (simple ID comparison)\n                if _is_shared_drive_root(folder):\n                    # files().get() returns 'Drive' for shared drive roots;\n                    # fetch the real name via drives().get().\n                    # Try both the retriever and admin since the admin may\n                    # not have access to private shared drives.\n                    drive_name = self._get_shared_drive_name(\n                        current_id, file.user_email\n                    )\n                    if drive_name:\n                        node.display_name = drive_name\n                    node.node_type = HierarchyNodeType.SHARED_DRIVE\n                    reached_terminal = True\n                    break\n\n                # Check if this is a My Drive root (requires API call, but cached)\n                if self._is_my_drive_root(folder, file.user_email):\n                    reached_terminal = True\n                    break\n\n                # If parents is empty but we couldn't verify it's a true root,\n                # stop walking but don't mark as fully walked (another user\n                # with better access might be able to continue)\n                if folder_parent_id is None:\n                    break\n\n                # Move to parent\n                current_id = folder_parent_id\n\n            # If we successfully reached a terminal node (or a fully-walked node),\n            # mark all nodes in this walk as fully walked\n            if reached_terminal:\n                fully_walked_hierarchy_node_raw_ids.update(set(node_ids_in_walk))\n\n            new_nodes += ancestors_to_add\n\n        return new_nodes\n\n    def _get_folder_metadata(\n        self, folder_id: str, retriever_email: str, field_type: DriveFileFieldType\n    ) -> GoogleDriveFileType | None:\n        \"\"\"\n        Fetch metadata for a folder by ID.\n\n        Important: When a user has access to a shared folder but NOT its parent,\n        the Google Drive API returns the folder metadata WITHOUT the parent info.\n        To handle this, if the retriever gets a folder without parents, we also\n        try with admin who may have better access and can see the parent chain.\n        \"\"\"\n        best_folder: GoogleDriveFileType | None = None\n\n        # Use a set to deduplicate if retriever_email == primary_admin_email\n        for email in {retriever_email, self.primary_admin_email}:\n            service = get_drive_service(self.creds, email)\n            folder = get_folder_metadata(service, folder_id, field_type)\n\n            if not folder:\n                logger.debug(f\"Failed to fetch folder {folder_id} using {email}\")\n                continue\n\n            logger.debug(f\"Successfully fetched folder {folder_id} using {email}\")\n\n            # If this folder has parents, use it\n            if folder.get(\"parents\"):\n                return folder\n\n            # Folder has no parents - could be a root OR user lacks access to parent\n            # Keep this as a fallback but try admin to see if they can see parents\n            if best_folder is None:\n                best_folder = folder\n                logger.debug(\n                    f\"Folder {folder_id} has no parents when fetched by {email}, will try admin to check for parent access\"\n                )\n\n        if best_folder:\n            logger.debug(\n                f\"Successfully fetched folder {folder_id} but no parents found\"\n            )\n            return best_folder\n\n        logger.debug(\n            f\"All attempts failed to fetch folder {folder_id} (tried {retriever_email} and {self.primary_admin_email})\"\n        )\n        return None\n\n    def _get_shared_drive_name(self, drive_id: str, retriever_email: str) -> str | None:\n        \"\"\"Fetch the name of a shared drive, trying both the retriever and admin.\"\"\"\n        for email in {retriever_email, self.primary_admin_email}:\n            svc = get_drive_service(self.creds, email)\n            name = get_shared_drive_name(svc, drive_id)\n            if name:\n                return name\n        return None\n\n    def get_all_drive_ids(self) -> set[str]:\n        return self._get_all_drives_for_user(self.primary_admin_email)\n\n    def _get_all_drives_for_user(self, user_email: str) -> set[str]:\n        drive_service = get_drive_service(self.creds, user_email)\n        is_service_account = isinstance(self.creds, ServiceAccountCredentials)\n        logger.info(\n            f\"Getting all drives for user {user_email} with service account: {is_service_account}\"\n        )\n        all_drive_ids: set[str] = set()\n        for drive in execute_paginated_retrieval(\n            retrieval_function=drive_service.drives().list,\n            list_key=\"drives\",\n            useDomainAdminAccess=is_service_account,\n            fields=\"drives(id),nextPageToken\",\n        ):\n            all_drive_ids.add(drive[\"id\"])\n\n        if not all_drive_ids:\n            logger.warning(\n                \"No drives found even though indexing shared drives was requested.\"\n            )\n\n        return all_drive_ids\n\n    def make_drive_id_getter(\n        self, drive_ids: list[str], checkpoint: GoogleDriveCheckpoint\n    ) -> Callable[[str], str | None]:\n        status_lock = threading.Lock()\n\n        in_progress_drive_ids = {\n            completion.current_folder_or_drive_id: user_email\n            for user_email, completion in checkpoint.completion_map.items()\n            if completion.stage == DriveRetrievalStage.SHARED_DRIVE_FILES\n            and completion.current_folder_or_drive_id is not None\n        }\n        drive_id_status: dict[str, DriveIdStatus] = {}\n        for drive_id in drive_ids:\n            if drive_id in self._retrieved_folder_and_drive_ids:\n                drive_id_status[drive_id] = DriveIdStatus.FINISHED\n            elif drive_id in in_progress_drive_ids:\n                drive_id_status[drive_id] = DriveIdStatus.IN_PROGRESS\n            else:\n                drive_id_status[drive_id] = DriveIdStatus.AVAILABLE\n\n        def get_available_drive_id(thread_id: str) -> str | None:\n            completion = checkpoint.completion_map[thread_id]\n            with status_lock:\n                future_work = None\n                for drive_id, status in drive_id_status.items():\n                    if drive_id in self._retrieved_folder_and_drive_ids:\n                        drive_id_status[drive_id] = DriveIdStatus.FINISHED\n                        continue\n                    if drive_id in completion.processed_drive_ids:\n                        continue\n\n                    if status == DriveIdStatus.AVAILABLE:\n                        # add to processed drive ids so if this user fails to retrieve once\n                        # they won't try again on the next checkpoint run\n                        completion.processed_drive_ids.add(drive_id)\n                        return drive_id\n                    elif status == DriveIdStatus.IN_PROGRESS:\n                        logger.debug(f\"Drive id in progress: {drive_id}\")\n                        future_work = drive_id\n\n                if future_work:\n                    # in this case, all drive ids are either finished or in progress.\n                    # This thread will pick up one of the in progress ones in case it fails.\n                    # This is a much simpler approach than waiting for a failure picking it up,\n                    # at the cost of some repeated work until all shared drives are retrieved.\n                    # we avoid apocalyptic cases like all threads focusing on one huge drive\n                    # because the drive id is added to _retrieved_folder_and_drive_ids after any thread\n                    # manages to retrieve any file from it (unfortunately, this is also the reason we currently\n                    # sometimes fail to retrieve restricted access folders/files)\n                    completion.processed_drive_ids.add(future_work)\n                    return future_work\n            return None  # no work available, return None\n\n        return get_available_drive_id\n\n    def _impersonate_user_for_retrieval(\n        self,\n        user_email: str,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        get_new_drive_id: Callable[[str], str | None],\n        sorted_filtered_folder_ids: list[str],\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        logger.info(f\"Impersonating user {user_email}\")\n        curr_stage = checkpoint.completion_map[user_email]\n        resuming = True\n        if curr_stage.stage == DriveRetrievalStage.START:\n            logger.info(f\"Setting stage to {DriveRetrievalStage.MY_DRIVE_FILES.value}\")\n            curr_stage.stage = DriveRetrievalStage.MY_DRIVE_FILES\n            resuming = False\n        drive_service = get_drive_service(self.creds, user_email)\n\n        # validate that the user has access to the drive APIs by performing a simple\n        # request and checking for a 401\n        try:\n            logger.debug(f\"Getting root folder id for user {user_email}\")\n            # default is ~17mins of retries, don't do that here for cases so we don't\n            # waste 17mins everytime we run into a user without access to drive APIs\n            retry_builder(tries=3, delay=1)(get_root_folder_id)(drive_service)\n        except HttpError as e:\n            if e.status_code == 401:\n                # fail gracefully, let the other impersonations continue\n                # one user without access shouldn't block the entire connector\n                logger.warning(\n                    f\"User '{user_email}' does not have access to the drive APIs.\"\n                )\n                # mark this user as done so we don't try to retrieve anything for them\n                # again\n                curr_stage.stage = DriveRetrievalStage.DONE\n                return\n            raise\n        except RefreshError as e:\n            logger.warning(\n                f\"User '{user_email}' could not refresh their token. Error: {e}\"\n            )\n            # mark this user as done so we don't try to retrieve anything for them\n            # again\n            yield RetrievedDriveFile(\n                completion_stage=DriveRetrievalStage.DONE,\n                drive_file={},\n                user_email=user_email,\n                error=e,\n            )\n            curr_stage.stage = DriveRetrievalStage.DONE\n            return\n        # if we are including my drives, try to get the current user's my\n        # drive if any of the following are true:\n        # - include_my_drives is true\n        # - the current user's email is in the requested emails\n        if curr_stage.stage == DriveRetrievalStage.MY_DRIVE_FILES:\n            if self.include_my_drives or user_email in self._requested_my_drive_emails:\n                logger.info(\n                    f\"Getting all files in my drive as '{user_email}. Resuming: {resuming}. \"\n                    f\"Stage completed until: {curr_stage.completed_until}. \"\n                    f\"Next page token: {curr_stage.next_page_token}\"\n                )\n\n                for file_or_token in add_retrieval_info(\n                    get_all_files_in_my_drive_and_shared(\n                        service=drive_service,\n                        update_traversed_ids_func=self._update_traversed_parent_ids,\n                        field_type=field_type,\n                        include_shared_with_me=self.include_files_shared_with_me,\n                        max_num_pages=MY_DRIVE_PAGES_PER_CHECKPOINT,\n                        start=curr_stage.completed_until if resuming else start,\n                        end=end,\n                        cache_folders=not bool(curr_stage.completed_until),\n                        page_token=curr_stage.next_page_token,\n                    ),\n                    user_email,\n                    DriveRetrievalStage.MY_DRIVE_FILES,\n                ):\n                    if isinstance(file_or_token, str):\n                        logger.debug(f\"Done with max num pages for user {user_email}\")\n                        checkpoint.completion_map[user_email].next_page_token = (\n                            file_or_token\n                        )\n                        return  # done with the max num pages, return checkpoint\n                    yield file_or_token\n\n            checkpoint.completion_map[user_email].next_page_token = None\n            curr_stage.stage = DriveRetrievalStage.SHARED_DRIVE_FILES\n            curr_stage.current_folder_or_drive_id = None\n            return  # resume from next stage on the next run\n\n        if curr_stage.stage == DriveRetrievalStage.SHARED_DRIVE_FILES:\n\n            def _yield_from_drive(\n                drive_id: str, drive_start: SecondsSinceUnixEpoch | None\n            ) -> Iterator[RetrievedDriveFile | str]:\n                yield from add_retrieval_info(\n                    get_files_in_shared_drive(\n                        service=drive_service,\n                        drive_id=drive_id,\n                        field_type=field_type,\n                        max_num_pages=SHARED_DRIVE_PAGES_PER_CHECKPOINT,\n                        update_traversed_ids_func=self._update_traversed_parent_ids,\n                        cache_folders=not bool(\n                            drive_start\n                        ),  # only cache folders for 0 or None\n                        start=drive_start,\n                        end=end,\n                        page_token=curr_stage.next_page_token,\n                    ),\n                    user_email,\n                    DriveRetrievalStage.SHARED_DRIVE_FILES,\n                    parent_id=drive_id,\n                )\n\n            # resume from a checkpoint\n            if resuming and (drive_id := curr_stage.current_folder_or_drive_id):\n                resume_start = curr_stage.completed_until\n                for file_or_token in _yield_from_drive(drive_id, resume_start):\n                    if isinstance(file_or_token, str):\n                        checkpoint.completion_map[user_email].next_page_token = (\n                            file_or_token\n                        )\n                        return  # done with the max num pages, return checkpoint\n                    yield file_or_token\n\n            drive_id = get_new_drive_id(user_email)\n            if drive_id:\n                logger.info(\n                    f\"Getting files in shared drive '{drive_id}' as '{user_email}. Resuming: {resuming}\"\n                )\n                curr_stage.completed_until = 0\n                curr_stage.current_folder_or_drive_id = drive_id\n                for file_or_token in _yield_from_drive(drive_id, start):\n                    if isinstance(file_or_token, str):\n                        checkpoint.completion_map[user_email].next_page_token = (\n                            file_or_token\n                        )\n                        return  # done with the max num pages, return checkpoint\n                    yield file_or_token\n                curr_stage.current_folder_or_drive_id = None\n                return  # get a new drive id on the next run\n\n            checkpoint.completion_map[user_email].next_page_token = None\n            curr_stage.stage = DriveRetrievalStage.FOLDER_FILES\n            curr_stage.current_folder_or_drive_id = None\n            return  # resume from next stage on the next run\n\n        # In the folder files section of service account retrieval we take extra care\n        # to not retrieve duplicate docs. In particular, we only add a folder to\n        # retrieved_folder_and_drive_ids when all users are finished retrieving files\n        # from that folder, and maintain a set of all file ids that have been retrieved\n        # for each folder. This might get rather large; in practice we assume that the\n        # specific folders users choose to index don't have too many files.\n        if curr_stage.stage == DriveRetrievalStage.FOLDER_FILES:\n\n            def _yield_from_folder_crawl(\n                folder_id: str, folder_start: SecondsSinceUnixEpoch | None\n            ) -> Iterator[RetrievedDriveFile]:\n                for retrieved_file in crawl_folders_for_files(\n                    service=drive_service,\n                    parent_id=folder_id,\n                    field_type=field_type,\n                    user_email=user_email,\n                    traversed_parent_ids=self._retrieved_folder_and_drive_ids,\n                    update_traversed_ids_func=self._update_traversed_parent_ids,\n                    start=folder_start,\n                    end=end,\n                ):\n                    yield retrieved_file\n\n            # resume from a checkpoint\n            last_processed_folder = None\n            if resuming:\n                folder_id = curr_stage.current_folder_or_drive_id\n                if folder_id is None:\n                    logger.warning(\n                        f\"folder id not set in checkpoint for user {user_email}. \"\n                        \"This happens occasionally when the connector is interrupted \"\n                        \"and resumed.\"\n                    )\n                else:\n                    resume_start = curr_stage.completed_until\n                    yield from _yield_from_folder_crawl(folder_id, resume_start)\n                last_processed_folder = folder_id\n\n            skipping_seen_folders = last_processed_folder is not None\n            # NOTE: this assumes a small number of folders to crawl. If someone\n            # really wants to specify a large number of folders, we should use\n            # binary search to find the first unseen folder.\n            num_completed_folders = 0\n            for folder_id in sorted_filtered_folder_ids:\n                if skipping_seen_folders:\n                    skipping_seen_folders = folder_id != last_processed_folder\n                    continue\n\n                if folder_id in self._retrieved_folder_and_drive_ids:\n                    continue\n\n                curr_stage.completed_until = 0\n                curr_stage.current_folder_or_drive_id = folder_id\n\n                if num_completed_folders >= FOLDERS_PER_CHECKPOINT:\n                    return  # resume from this folder on the next run\n\n                logger.info(f\"Getting files in folder '{folder_id}' as '{user_email}'\")\n                yield from _yield_from_folder_crawl(folder_id, start)\n                num_completed_folders += 1\n\n        curr_stage.stage = DriveRetrievalStage.DONE\n\n    def _manage_service_account_retrieval(\n        self,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        \"\"\"\n        The current implementation of the service account retrieval does some\n        initial setup work using the primary admin email, then runs MAX_DRIVE_WORKERS\n        concurrent threads, each of which impersonates a different user and retrieves\n        files for that user. Technically, the actual work each thread does is \"yield the\n        next file retrieved by the user\", at which point it returns to the thread pool;\n        see parallel_yield for more details.\n        \"\"\"\n        if checkpoint.completion_stage == DriveRetrievalStage.START:\n            checkpoint.completion_stage = DriveRetrievalStage.USER_EMAILS\n\n        if checkpoint.completion_stage == DriveRetrievalStage.USER_EMAILS:\n            all_org_emails: list[str] = self._get_all_user_emails()\n            checkpoint.user_emails = all_org_emails\n            checkpoint.completion_stage = DriveRetrievalStage.DRIVE_IDS\n        else:\n            if checkpoint.user_emails is None:\n                raise ValueError(\"user emails not set\")\n            all_org_emails = checkpoint.user_emails\n\n        sorted_drive_ids, sorted_folder_ids = self._determine_retrieval_ids(\n            checkpoint, DriveRetrievalStage.MY_DRIVE_FILES\n        )\n\n        # Setup initial completion map on first connector run\n        for email in all_org_emails:\n            # don't overwrite existing completion map on resuming runs\n            if email in checkpoint.completion_map:\n                continue\n            checkpoint.completion_map[email] = StageCompletion(\n                stage=DriveRetrievalStage.START,\n                completed_until=0,\n                processed_drive_ids=set(),\n            )\n\n        # we've found all users and drives, now time to actually start\n        # fetching stuff\n        logger.info(f\"Found {len(all_org_emails)} users to impersonate\")\n        logger.debug(f\"Users: {all_org_emails}\")\n        logger.info(f\"Found {len(sorted_drive_ids)} drives to retrieve\")\n        logger.debug(f\"Drives: {sorted_drive_ids}\")\n        logger.info(f\"Found {len(sorted_folder_ids)} folders to retrieve\")\n        logger.debug(f\"Folders: {sorted_folder_ids}\")\n\n        drive_id_getter = self.make_drive_id_getter(sorted_drive_ids, checkpoint)\n\n        # only process emails that we haven't already completed retrieval for\n        non_completed_org_emails = [\n            user_email\n            for user_email, stage_completion in checkpoint.completion_map.items()\n            if stage_completion.stage != DriveRetrievalStage.DONE\n        ]\n\n        logger.debug(f\"Non-completed users remaining: {len(non_completed_org_emails)}\")\n\n        # don't process too many emails before returning a checkpoint. This is\n        # to resolve the case where there are a ton of emails that don't have access\n        # to the drive APIs. Without this, we could loop through these emails for\n        # more than 3 hours, causing a timeout and stalling progress.\n        email_batch_takes_us_to_completion = True\n        MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING = MAX_DRIVE_WORKERS\n        if len(non_completed_org_emails) > MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING:\n            non_completed_org_emails = non_completed_org_emails[\n                :MAX_EMAILS_TO_PROCESS_BEFORE_CHECKPOINTING\n            ]\n            email_batch_takes_us_to_completion = False\n\n        user_retrieval_gens = [\n            self._impersonate_user_for_retrieval(\n                email,\n                field_type,\n                checkpoint,\n                drive_id_getter,\n                sorted_folder_ids,\n                start,\n                end,\n            )\n            for email in non_completed_org_emails\n        ]\n        yield from parallel_yield(user_retrieval_gens, max_workers=MAX_DRIVE_WORKERS)\n\n        # if there are more emails to process, don't mark as complete\n        if not email_batch_takes_us_to_completion:\n            return\n\n        remaining_folders = (\n            set(sorted_drive_ids) | set(sorted_folder_ids)\n        ) - self._retrieved_folder_and_drive_ids\n        if remaining_folders:\n            logger.warning(\n                f\"Some folders/drives were not retrieved. IDs: {remaining_folders}\"\n            )\n        if any(\n            checkpoint.completion_map[user_email].stage != DriveRetrievalStage.DONE\n            for user_email in all_org_emails\n        ):\n            logger.info(\n                \"some users did not complete retrieval, returning checkpoint for another run\"\n            )\n            return\n        checkpoint.completion_stage = DriveRetrievalStage.DONE\n\n    def _determine_retrieval_ids(\n        self,\n        checkpoint: GoogleDriveCheckpoint,\n        next_stage: DriveRetrievalStage,\n    ) -> tuple[list[str], list[str]]:\n        all_drive_ids = self.get_all_drive_ids()\n        sorted_drive_ids: list[str] = []\n        sorted_folder_ids: list[str] = []\n        if checkpoint.completion_stage == DriveRetrievalStage.DRIVE_IDS:\n            if self._requested_shared_drive_ids or self._requested_folder_ids:\n                (\n                    sorted_drive_ids,\n                    sorted_folder_ids,\n                ) = _clean_requested_drive_ids(\n                    requested_drive_ids=self._requested_shared_drive_ids,\n                    requested_folder_ids=self._requested_folder_ids,\n                    all_drive_ids_available=all_drive_ids,\n                )\n            elif self.include_shared_drives:\n                sorted_drive_ids = sorted(all_drive_ids)\n\n            checkpoint.drive_ids_to_retrieve = sorted_drive_ids\n            checkpoint.folder_ids_to_retrieve = sorted_folder_ids\n            checkpoint.completion_stage = next_stage\n        else:\n            if checkpoint.drive_ids_to_retrieve is None:\n                raise ValueError(\"drive ids to retrieve not set in checkpoint\")\n            if checkpoint.folder_ids_to_retrieve is None:\n                raise ValueError(\"folder ids to retrieve not set in checkpoint\")\n            # When loading from a checkpoint, load the previously cached drive and folder ids\n            sorted_drive_ids = checkpoint.drive_ids_to_retrieve\n            sorted_folder_ids = checkpoint.folder_ids_to_retrieve\n\n        return sorted_drive_ids, sorted_folder_ids\n\n    def _oauth_retrieval_all_files(\n        self,\n        field_type: DriveFileFieldType,\n        drive_service: GoogleDriveService,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        page_token: str | None = None,\n    ) -> Iterator[RetrievedDriveFile | str]:\n        if not self.include_files_shared_with_me and not self.include_my_drives:\n            return\n\n        logger.info(\n            f\"Getting shared files/my drive files for OAuth \"\n            f\"with include_files_shared_with_me={self.include_files_shared_with_me}, \"\n            f\"include_my_drives={self.include_my_drives}, \"\n            f\"include_shared_drives={self.include_shared_drives}.\"\n            f\"Using '{self.primary_admin_email}' as the account.\"\n        )\n        yield from add_retrieval_info(\n            get_all_files_for_oauth(\n                service=drive_service,\n                include_files_shared_with_me=self.include_files_shared_with_me,\n                include_my_drives=self.include_my_drives,\n                include_shared_drives=self.include_shared_drives,\n                field_type=field_type,\n                max_num_pages=OAUTH_PAGES_PER_CHECKPOINT,\n                start=start,\n                end=end,\n                page_token=page_token,\n            ),\n            self.primary_admin_email,\n            DriveRetrievalStage.OAUTH_FILES,\n        )\n\n    def _oauth_retrieval_drives(\n        self,\n        field_type: DriveFileFieldType,\n        drive_service: GoogleDriveService,\n        drive_ids_to_retrieve: list[str],\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile | str]:\n        def _yield_from_drive(\n            drive_id: str, drive_start: SecondsSinceUnixEpoch | None\n        ) -> Iterator[RetrievedDriveFile | str]:\n            yield from add_retrieval_info(\n                get_files_in_shared_drive(\n                    service=drive_service,\n                    drive_id=drive_id,\n                    field_type=field_type,\n                    max_num_pages=SHARED_DRIVE_PAGES_PER_CHECKPOINT,\n                    cache_folders=not bool(\n                        drive_start\n                    ),  # only cache folders for 0 or None\n                    update_traversed_ids_func=self._update_traversed_parent_ids,\n                    start=drive_start,\n                    end=end,\n                    page_token=checkpoint.completion_map[\n                        self.primary_admin_email\n                    ].next_page_token,\n                ),\n                self.primary_admin_email,\n                DriveRetrievalStage.SHARED_DRIVE_FILES,\n                parent_id=drive_id,\n            )\n\n        # If we are resuming from a checkpoint, we need to finish retrieving the files from the last drive we retrieved\n        if (\n            checkpoint.completion_map[self.primary_admin_email].stage\n            == DriveRetrievalStage.SHARED_DRIVE_FILES\n        ):\n            drive_id = checkpoint.completion_map[\n                self.primary_admin_email\n            ].current_folder_or_drive_id\n            if drive_id is None:\n                raise ValueError(\"drive id not set in checkpoint\")\n            resume_start = checkpoint.completion_map[\n                self.primary_admin_email\n            ].completed_until\n            for file_or_token in _yield_from_drive(drive_id, resume_start):\n                if isinstance(file_or_token, str):\n                    checkpoint.completion_map[\n                        self.primary_admin_email\n                    ].next_page_token = file_or_token\n                    return  # done with the max num pages, return checkpoint\n                yield file_or_token\n            checkpoint.completion_map[self.primary_admin_email].next_page_token = None\n\n        for drive_id in drive_ids_to_retrieve:\n            if drive_id in self._retrieved_folder_and_drive_ids:\n                logger.info(\n                    f\"Skipping drive '{drive_id}' as it has already been retrieved\"\n                )\n                continue\n            logger.info(\n                f\"Getting files in shared drive '{drive_id}' as '{self.primary_admin_email}'\"\n            )\n            for file_or_token in _yield_from_drive(drive_id, start):\n                if isinstance(file_or_token, str):\n                    checkpoint.completion_map[\n                        self.primary_admin_email\n                    ].next_page_token = file_or_token\n                    return  # done with the max num pages, return checkpoint\n                yield file_or_token\n            checkpoint.completion_map[self.primary_admin_email].next_page_token = None\n\n    def _oauth_retrieval_folders(\n        self,\n        field_type: DriveFileFieldType,\n        drive_service: GoogleDriveService,\n        drive_ids_to_retrieve: set[str],\n        folder_ids_to_retrieve: set[str],\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        \"\"\"\n        If there are any remaining folder ids to retrieve found earlier in the\n        retrieval process, we recursively descend the file tree and retrieve all\n        files in the folder(s).\n        \"\"\"\n        # Even if no folders were requested, we still check if any drives were requested\n        # that could be folders.\n        remaining_folders = (\n            folder_ids_to_retrieve - self._retrieved_folder_and_drive_ids\n        )\n\n        def _yield_from_folder_crawl(\n            folder_id: str, folder_start: SecondsSinceUnixEpoch | None\n        ) -> Iterator[RetrievedDriveFile]:\n            yield from crawl_folders_for_files(\n                service=drive_service,\n                parent_id=folder_id,\n                field_type=field_type,\n                user_email=self.primary_admin_email,\n                traversed_parent_ids=self._retrieved_folder_and_drive_ids,\n                update_traversed_ids_func=self._update_traversed_parent_ids,\n                start=folder_start,\n                end=end,\n            )\n\n        # resume from a checkpoint\n        # TODO: actually checkpoint folder retrieval. Since we moved towards returning from\n        # generator functions to indicate when a checkpoint should be returned, this code\n        # shouldn't be used currently. Unfortunately folder crawling is quite difficult to checkpoint\n        # effectively (likely need separate folder crawling and file retrieval stages),\n        # so we'll revisit this later.\n        if checkpoint.completion_map[\n            self.primary_admin_email\n        ].stage == DriveRetrievalStage.FOLDER_FILES and (\n            folder_id := checkpoint.completion_map[\n                self.primary_admin_email\n            ].current_folder_or_drive_id\n        ):\n            resume_start = checkpoint.completion_map[\n                self.primary_admin_email\n            ].completed_until\n            yield from _yield_from_folder_crawl(folder_id, resume_start)\n\n        # the times stored in the completion_map aren't used due to the crawling behavior\n        # instead, the traversed_parent_ids are used to determine what we have left to retrieve\n        for folder_id in remaining_folders:\n            logger.info(\n                f\"Getting files in folder '{folder_id}' as '{self.primary_admin_email}'\"\n            )\n            yield from _yield_from_folder_crawl(folder_id, start)\n\n        remaining_folders = (\n            drive_ids_to_retrieve | folder_ids_to_retrieve\n        ) - self._retrieved_folder_and_drive_ids\n        if remaining_folders:\n            logger.warning(\n                f\"Some folders/drives were not retrieved. IDs: {remaining_folders}\"\n            )\n\n    def _checkpointed_retrieval(\n        self,\n        retrieval_method: CredentialedRetrievalMethod,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        drive_files = retrieval_method(\n            field_type=field_type,\n            checkpoint=checkpoint,\n            start=start,\n            end=end,\n        )\n\n        for file in drive_files:\n            drive_file = file.drive_file or {}\n            completion = checkpoint.completion_map[file.user_email]\n\n            completed_until = completion.completed_until\n            modified_time = drive_file.get(GoogleFields.MODIFIED_TIME.value)\n            if isinstance(modified_time, str):\n                try:\n                    completed_until = datetime.fromisoformat(modified_time).timestamp()\n                except ValueError:\n                    logger.warning(\n                        \"Invalid modifiedTime for file '%s' (stage=%s, user=%s).\",\n                        drive_file.get(\"id\"),\n                        file.completion_stage,\n                        file.user_email,\n                    )\n\n            completion.update(\n                stage=file.completion_stage,\n                completed_until=completed_until,\n                current_folder_or_drive_id=file.parent_id,\n            )\n\n            if file.error is not None or not drive_file:\n                yield file\n                continue\n\n            try:\n                document_id = onyx_document_id_from_drive_file(drive_file)\n            except KeyError as exc:\n                logger.warning(\n                    \"Drive file missing id/webViewLink (stage=%s user=%s). Skipping.\",\n                    file.completion_stage,\n                    file.user_email,\n                )\n                if file.error is None:\n                    file.error = exc\n                yield file\n                continue\n\n            logger.debug(\n                f\"Updating checkpoint for file: {drive_file.get('name')}. \"\n                f\"Seen: {document_id in checkpoint.all_retrieved_file_ids}\"\n            )\n            if document_id in checkpoint.all_retrieved_file_ids:\n                continue\n\n            checkpoint.all_retrieved_file_ids.add(document_id)\n            yield file\n\n    def _manage_oauth_retrieval(\n        self,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        if checkpoint.completion_stage == DriveRetrievalStage.START:\n            checkpoint.completion_stage = DriveRetrievalStage.OAUTH_FILES\n            checkpoint.completion_map[self.primary_admin_email] = StageCompletion(\n                stage=DriveRetrievalStage.START,\n                completed_until=0,\n                current_folder_or_drive_id=None,\n            )\n\n        drive_service = get_drive_service(self.creds, self.primary_admin_email)\n\n        if checkpoint.completion_stage == DriveRetrievalStage.OAUTH_FILES:\n            completion = checkpoint.completion_map[self.primary_admin_email]\n            all_files_start = start\n            # if resuming from a checkpoint\n            if completion.stage == DriveRetrievalStage.OAUTH_FILES:\n                all_files_start = completion.completed_until\n\n            for file_or_token in self._oauth_retrieval_all_files(\n                field_type=field_type,\n                drive_service=drive_service,\n                start=all_files_start,\n                end=end,\n                page_token=checkpoint.completion_map[\n                    self.primary_admin_email\n                ].next_page_token,\n            ):\n                if isinstance(file_or_token, str):\n                    checkpoint.completion_map[\n                        self.primary_admin_email\n                    ].next_page_token = file_or_token\n                    return  # done with the max num pages, return checkpoint\n                yield file_or_token\n            checkpoint.completion_stage = DriveRetrievalStage.DRIVE_IDS\n            checkpoint.completion_map[self.primary_admin_email].next_page_token = None\n            return  # create a new checkpoint\n\n        all_requested = (\n            self.include_files_shared_with_me\n            and self.include_my_drives\n            and self.include_shared_drives\n        )\n        if all_requested:\n            # If all 3 are true, we already yielded from get_all_files_for_oauth\n            checkpoint.completion_stage = DriveRetrievalStage.DONE\n            return\n\n        sorted_drive_ids, sorted_folder_ids = self._determine_retrieval_ids(\n            checkpoint, DriveRetrievalStage.SHARED_DRIVE_FILES\n        )\n\n        if checkpoint.completion_stage == DriveRetrievalStage.SHARED_DRIVE_FILES:\n            for file_or_token in self._oauth_retrieval_drives(\n                field_type=field_type,\n                drive_service=drive_service,\n                drive_ids_to_retrieve=sorted_drive_ids,\n                checkpoint=checkpoint,\n                start=start,\n                end=end,\n            ):\n                if isinstance(file_or_token, str):\n                    checkpoint.completion_map[\n                        self.primary_admin_email\n                    ].next_page_token = file_or_token\n                    return  # done with the max num pages, return checkpoint\n                yield file_or_token\n            checkpoint.completion_stage = DriveRetrievalStage.FOLDER_FILES\n            checkpoint.completion_map[self.primary_admin_email].next_page_token = None\n            return  # create a new checkpoint\n\n        if checkpoint.completion_stage == DriveRetrievalStage.FOLDER_FILES:\n            yield from self._oauth_retrieval_folders(\n                field_type=field_type,\n                drive_service=drive_service,\n                drive_ids_to_retrieve=set(sorted_drive_ids),\n                folder_ids_to_retrieve=set(sorted_folder_ids),\n                checkpoint=checkpoint,\n                start=start,\n                end=end,\n            )\n\n        checkpoint.completion_stage = DriveRetrievalStage.DONE\n\n    def _fetch_drive_items(\n        self,\n        field_type: DriveFileFieldType,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> Iterator[RetrievedDriveFile]:\n        retrieval_method = (\n            self._manage_service_account_retrieval\n            if isinstance(self.creds, ServiceAccountCredentials)\n            else self._manage_oauth_retrieval\n        )\n\n        return self._checkpointed_retrieval(\n            retrieval_method=retrieval_method,\n            field_type=field_type,\n            checkpoint=checkpoint,\n            start=start,\n            end=end,\n        )\n\n    def _convert_retrieved_files_to_documents(\n        self,\n        drive_files_iter: Iterator[RetrievedDriveFile],\n        checkpoint: GoogleDriveCheckpoint,\n        include_permissions: bool,\n    ) -> Iterator[Document | ConnectorFailure | HierarchyNode]:\n        \"\"\"\n        Converts retrieved files to documents, yielding HierarchyNode\n        objects for ancestor folders before the converted documents.\n        \"\"\"\n        permission_sync_context = (\n            PermissionSyncContext(\n                primary_admin_email=self.primary_admin_email,\n                google_domain=self.google_domain,\n            )\n            if include_permissions\n            else None\n        )\n\n        files_batch: list[RetrievedDriveFile] = []\n        for retrieved_file in drive_files_iter:\n            if self.exclude_domain_link_only and has_link_only_permission(\n                retrieved_file.drive_file\n            ):\n                continue\n            if retrieved_file.error is None:\n                files_batch.append(retrieved_file)\n                continue\n\n            failure_stage = retrieved_file.completion_stage.value\n            failure_message = f\"retrieval failure during stage: {failure_stage},\"\n            failure_message += f\"user: {retrieved_file.user_email},\"\n            failure_message += f\"parent drive/folder: {retrieved_file.parent_id},\"\n            failure_message += f\"error: {retrieved_file.error}\"\n            logger.error(failure_message)\n            yield ConnectorFailure(\n                failed_entity=EntityFailure(\n                    entity_id=retrieved_file.drive_file.get(\"id\", failure_stage),\n                ),\n                failure_message=failure_message,\n                exception=retrieved_file.error,\n            )\n\n        new_ancestors = self._get_new_ancestors_for_files(\n            files=files_batch,\n            seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,\n            fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,\n            permission_sync_context=permission_sync_context,\n            add_prefix=True,\n        )\n        if new_ancestors:\n            logger.debug(f\"Yielding {len(new_ancestors)} new hierarchy nodes\")\n            yield from new_ancestors\n\n        func_with_args = [\n            (\n                self._convert_retrieved_file_to_document,\n                (retrieved_file, permission_sync_context),\n            )\n            for retrieved_file in files_batch\n        ]\n        raw_results = cast(\n            list[Document | ConnectorFailure | None],\n            run_functions_tuples_in_parallel(func_with_args, max_workers=8),\n        )\n\n        results: list[Document | ConnectorFailure] = [\n            r for r in raw_results if r is not None\n        ]\n        logger.debug(f\"batch has {len(results)} docs or failures\")\n        yield from results\n\n        checkpoint.retrieved_folder_and_drive_ids = self._retrieved_folder_and_drive_ids\n\n    def _convert_retrieved_file_to_document(\n        self,\n        retrieved_file: RetrievedDriveFile,\n        permission_sync_context: PermissionSyncContext | None,\n    ) -> Document | ConnectorFailure | None:\n        \"\"\"\n        Converts a single retrieved file to a document.\n        \"\"\"\n        try:\n            return convert_drive_item_to_document(\n                self.creds,\n                self.allow_images,\n                self.size_threshold,\n                permission_sync_context,\n                [retrieved_file.user_email, self.primary_admin_email]\n                + get_file_owners(retrieved_file.drive_file, self.primary_admin_email),\n                retrieved_file.drive_file,\n            )\n        except Exception as e:\n            logger.exception(\n                f\"Error extracting document: \"\n                f\"{retrieved_file.drive_file.get('name')} from Google Drive\"\n            )\n            return ConnectorFailure(\n                failed_entity=EntityFailure(\n                    entity_id=retrieved_file.drive_file.get(\"id\", \"unknown\"),\n                ),\n                failure_message=(\n                    f\"Error extracting document: \"\n                    f\"{retrieved_file.drive_file.get('name')}\"\n                ),\n                exception=e,\n            )\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GoogleDriveCheckpoint,\n        include_permissions: bool,\n    ) -> CheckpointOutput[GoogleDriveCheckpoint]:\n        \"\"\"\n        Entrypoint for the connector; first run is with an empty checkpoint.\n        \"\"\"\n        if self._creds is None or self._primary_admin_email is None:\n            raise RuntimeError(\n                \"Credentials missing, should not call this method before calling load_credentials\"\n            )\n\n        logger.info(\n            f\"Loading from checkpoint with completion stage: {checkpoint.completion_stage},\"\n            f\"num retrieved ids: {len(checkpoint.all_retrieved_file_ids)}\"\n        )\n        checkpoint = copy.deepcopy(checkpoint)\n        self._retrieved_folder_and_drive_ids = checkpoint.retrieved_folder_and_drive_ids\n        try:\n            field_type = (\n                DriveFileFieldType.WITH_PERMISSIONS\n                if include_permissions or self.exclude_domain_link_only\n                else DriveFileFieldType.STANDARD\n            )\n            drive_files_iter = self._fetch_drive_items(\n                field_type=field_type,\n                checkpoint=checkpoint,\n                start=start,\n                end=end,\n            )\n            yield from self._convert_retrieved_files_to_documents(\n                drive_files_iter, checkpoint, include_permissions\n            )\n        except Exception as e:\n            if MISSING_SCOPES_ERROR_STR in str(e):\n                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e\n            raise e\n        checkpoint.retrieved_folder_and_drive_ids = self._retrieved_folder_and_drive_ids\n\n        logger.info(\n            f\"num drive files retrieved: {len(checkpoint.all_retrieved_file_ids)}\"\n        )\n        if checkpoint.completion_stage == DriveRetrievalStage.DONE:\n            checkpoint.has_more = False\n        return checkpoint\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GoogleDriveCheckpoint,\n    ) -> CheckpointOutput[GoogleDriveCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=False\n        )\n\n    @override\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: GoogleDriveCheckpoint,\n    ) -> CheckpointOutput[GoogleDriveCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=True\n        )\n\n    def _extract_slim_docs_from_google_drive(\n        self,\n        checkpoint: GoogleDriveCheckpoint,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        files_batch: list[RetrievedDriveFile] = []\n        slim_batch: list[SlimDocument | HierarchyNode] = []\n\n        def _yield_slim_batch() -> list[SlimDocument | HierarchyNode]:\n            \"\"\"Process files batch and return items to yield (hierarchy nodes + slim docs).\"\"\"\n            nonlocal files_batch, slim_batch\n\n            # Get new ancestor hierarchy nodes first\n            permission_sync_context = PermissionSyncContext(\n                primary_admin_email=self.primary_admin_email,\n                google_domain=self.google_domain,\n            )\n            new_ancestors = self._get_new_ancestors_for_files(\n                files=files_batch,\n                seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,\n                fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,\n                permission_sync_context=permission_sync_context,\n            )\n\n            # Build slim documents\n            for file in files_batch:\n                if doc := build_slim_document(\n                    self.creds,\n                    file.drive_file,\n                    PermissionSyncContext(\n                        primary_admin_email=self.primary_admin_email,\n                        google_domain=self.google_domain,\n                    ),\n                    retriever_email=file.user_email,\n                ):\n                    slim_batch.append(doc)\n\n            # Combine: hierarchy nodes first, then slim docs\n            result: list[SlimDocument | HierarchyNode] = []\n            result.extend(new_ancestors)\n            result.extend(slim_batch)\n            files_batch = []\n            slim_batch = []\n            return result\n\n        for file in self._fetch_drive_items(\n            field_type=DriveFileFieldType.SLIM,\n            checkpoint=checkpoint,\n            start=start,\n            end=end,\n        ):\n            if file.error is not None:\n                raise file.error\n            if self.exclude_domain_link_only and has_link_only_permission(\n                file.drive_file\n            ):\n                continue\n            files_batch.append(file)\n\n            if len(files_batch) >= SLIM_BATCH_SIZE:\n                yield _yield_slim_batch()\n                if callback:\n                    if callback.should_stop():\n                        raise RuntimeError(\n                            \"_extract_slim_docs_from_google_drive: Stop signal detected\"\n                        )\n                    callback.progress(\"_extract_slim_docs_from_google_drive\", 1)\n\n        # Yield remaining files\n        if files_batch:\n            yield _yield_slim_batch()\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        try:\n            checkpoint = self.build_dummy_checkpoint()\n            while checkpoint.completion_stage != DriveRetrievalStage.DONE:\n                yield from self._extract_slim_docs_from_google_drive(\n                    checkpoint=checkpoint,\n                    start=start,\n                    end=end,\n                    callback=callback,\n                )\n            logger.info(\"Drive perm sync: Slim doc retrieval complete\")\n\n        except Exception as e:\n            if MISSING_SCOPES_ERROR_STR in str(e):\n                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e\n            raise e\n\n    def validate_connector_settings(self) -> None:\n        if self._creds is None:\n            raise ConnectorMissingCredentialError(\n                \"Google Drive credentials not loaded.\"\n            )\n\n        if self._primary_admin_email is None:\n            raise ConnectorValidationError(\n                \"Primary admin email not found in credentials. Ensure DB_CREDENTIALS_PRIMARY_ADMIN_KEY is set.\"\n            )\n\n        try:\n            drive_service = get_drive_service(self._creds, self._primary_admin_email)\n            drive_service.files().list(pageSize=1, fields=\"files(id)\").execute()\n\n            if isinstance(self._creds, ServiceAccountCredentials):\n                # default is ~17mins of retries, don't do that here since this is called from\n                # the UI\n                retry_builder(tries=3, delay=0.1)(get_root_folder_id)(drive_service)\n\n        except HttpError as e:\n            status_code = e.resp.status if e.resp else None\n            if status_code == 401:\n                raise CredentialExpiredError(\n                    \"Invalid or expired Google Drive credentials (401).\"\n                )\n            elif status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"Google Drive app lacks required permissions (403). \"\n                    \"Please ensure the necessary scopes are granted and Drive \"\n                    \"apps are enabled.\"\n                )\n            else:\n                raise ConnectorValidationError(\n                    f\"Unexpected Google Drive error (status={status_code}): {e}\"\n                )\n\n        except Exception as e:\n            # Check for scope-related hints from the error message\n            if MISSING_SCOPES_ERROR_STR in str(e):\n                raise InsufficientPermissionsError(\n                    f\"Google Drive credentials are missing required scopes. {ONYX_SCOPE_INSTRUCTIONS}\"\n                )\n            raise ConnectorValidationError(\n                f\"Unexpected error during Google Drive validation: {e}\"\n            )\n\n    @override\n    def build_dummy_checkpoint(self) -> GoogleDriveCheckpoint:\n        return GoogleDriveCheckpoint(\n            retrieved_folder_and_drive_ids=set(),\n            completion_stage=DriveRetrievalStage.START,\n            completion_map=ThreadSafeDict(),\n            all_retrieved_file_ids=set(),\n            has_more=True,\n        )\n\n    @override\n    def validate_checkpoint_json(self, checkpoint_json: str) -> GoogleDriveCheckpoint:\n        return GoogleDriveCheckpoint.model_validate_json(checkpoint_json)\n\n\ndef get_credentials_from_env(email: str, oauth: bool) -> dict:\n    if oauth:\n        raw_credential_string = os.environ[\"GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR\"]\n    else:\n        raw_credential_string = os.environ[\"GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR\"]\n\n    refried_credential_string = json.dumps(json.loads(raw_credential_string))\n\n    # This is the Oauth token\n    DB_CREDENTIALS_DICT_TOKEN_KEY = \"google_tokens\"\n    # This is the service account key\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = \"google_service_account_key\"\n    # The email saved for both auth types\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY = \"google_primary_admin\"\n    DB_CREDENTIALS_AUTHENTICATION_METHOD = \"authentication_method\"\n    cred_key = (\n        DB_CREDENTIALS_DICT_TOKEN_KEY\n        if oauth\n        else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY\n    )\n    return {\n        cred_key: refried_credential_string,\n        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,\n        DB_CREDENTIALS_AUTHENTICATION_METHOD: \"uploaded\",\n    }\n\n\nclass CheckpointOutputWrapper:\n    \"\"\"\n    Wraps a CheckpointOutput generator to give things back in a more digestible format.\n    The connector format is easier for the connector implementor (e.g. it enforces exactly\n    one new checkpoint is returned AND that the checkpoint is at the end), thus the different\n    formats.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self.next_checkpoint: GoogleDriveCheckpoint | None = None\n\n    def __call__(\n        self,\n        checkpoint_connector_generator: CheckpointOutput[GoogleDriveCheckpoint],\n    ) -> Generator[\n        tuple[Document | None, ConnectorFailure | None, GoogleDriveCheckpoint | None],\n        None,\n        None,\n    ]:\n        # grabs the final return value and stores it in the `next_checkpoint` variable\n        def _inner_wrapper(\n            checkpoint_connector_generator: CheckpointOutput[GoogleDriveCheckpoint],\n        ) -> CheckpointOutput[GoogleDriveCheckpoint]:\n            self.next_checkpoint = yield from checkpoint_connector_generator\n            return self.next_checkpoint  # not used\n\n        for document_or_failure in _inner_wrapper(checkpoint_connector_generator):\n            if isinstance(document_or_failure, Document):\n                yield document_or_failure, None, None\n            elif isinstance(document_or_failure, ConnectorFailure):\n                yield None, document_or_failure, None\n            else:\n                raise ValueError(\n                    f\"Invalid document_or_failure type: {type(document_or_failure)}\"\n                )\n\n        if self.next_checkpoint is None:\n            raise RuntimeError(\n                \"Checkpoint is None. This should never happen - the connector should always return a checkpoint.\"\n            )\n\n        yield None, None, self.next_checkpoint\n\n\ndef yield_all_docs_from_checkpoint_connector(\n    connector: GoogleDriveConnector,\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n) -> Iterator[Document | ConnectorFailure]:\n    num_iterations = 0\n\n    checkpoint = connector.build_dummy_checkpoint()\n    while checkpoint.has_more:\n        doc_batch_generator = CheckpointOutputWrapper()(\n            connector.load_from_checkpoint(start, end, checkpoint)\n        )\n        for document, failure, next_checkpoint in doc_batch_generator:\n            if failure is not None:\n                yield failure\n            if document is not None:\n                yield document\n            if next_checkpoint is not None:\n                checkpoint = next_checkpoint\n\n        num_iterations += 1\n        if num_iterations > 100_000:\n            raise RuntimeError(\"Too many iterations. Infinite loop?\")\n\n\nif __name__ == \"__main__\":\n    import time\n\n    creds = get_credentials_from_env(\n        os.environ[\"GOOGLE_DRIVE_PRIMARY_ADMIN_EMAIL\"], False\n    )\n    connector = GoogleDriveConnector(\n        include_shared_drives=True,\n        shared_drive_urls=None,\n        include_my_drives=True,\n        my_drive_emails=None,\n        shared_folder_urls=None,\n        include_files_shared_with_me=True,\n        specific_user_emails=None,\n    )\n    connector.load_credentials(creds)\n    max_fsize = 0\n    biggest_fsize = 0\n    num_errors = 0\n    start_time = time.time()\n    with open(\"stats.txt\", \"w\") as f:\n        for num, doc_or_failure in enumerate(\n            yield_all_docs_from_checkpoint_connector(connector, 0, time.time())\n        ):\n            if num % 200 == 0:\n                f.write(f\"Processed {num} files\\n\")\n                f.write(f\"Max file size: {max_fsize / 1000_000:.2f} MB\\n\")\n                f.write(f\"Time so far: {time.time() - start_time:.2f} seconds\\n\")\n                f.write(\n                    f\"Docs per minute: {num / (time.time() - start_time) * 60:.2f}\\n\"\n                )\n                biggest_fsize = max(biggest_fsize, max_fsize)\n                max_fsize = 0\n            if isinstance(doc_or_failure, Document):\n                max_fsize = max(max_fsize, sys.getsizeof(doc_or_failure))\n            elif isinstance(doc_or_failure, ConnectorFailure):\n                num_errors += 1\n        print(f\"Num errors: {num_errors}\")\n        print(f\"Biggest file size: {biggest_fsize / 1000_000:.2f} MB\")\n        print(f\"Time taken: {time.time() - start_time:.2f} seconds\")\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/constants.py",
    "content": "UNSUPPORTED_FILE_TYPE_CONTENT = \"\"  # keep empty for now\nDRIVE_FOLDER_TYPE = \"application/vnd.google-apps.folder\"\nDRIVE_SHORTCUT_TYPE = \"application/vnd.google-apps.shortcut\"\nDRIVE_FILE_TYPE = \"application/vnd.google-apps.file\"\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/doc_conversion.py",
    "content": "import io\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom typing import Any\nfrom typing import cast\nfrom urllib.parse import urlparse\nfrom urllib.parse import urlunparse\n\nfrom googleapiclient.errors import HttpError  # type: ignore\nfrom googleapiclient.http import MediaIoBaseDownload  # type: ignore\nfrom pydantic import BaseModel\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE\nfrom onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE\nfrom onyx.connectors.google_drive.models import GDriveMimeType\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.connectors.google_drive.section_extraction import get_document_sections\nfrom onyx.connectors.google_drive.section_extraction import HEADING_DELIMITER\nfrom onyx.connectors.google_utils.resources import get_drive_service\nfrom onyx.connectors.google_utils.resources import get_google_docs_service\nfrom onyx.connectors.google_utils.resources import GoogleDocsService\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.extract_file_text import pptx_to_text\nfrom onyx.file_processing.extract_file_text import read_docx_file\nfrom onyx.file_processing.extract_file_text import read_pdf_file\nfrom onyx.file_processing.extract_file_text import xlsx_to_text\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom onyx.utils.variable_functionality import noop_fallback\n\nlogger = setup_logger()\n\n# Cache for folder path lookups to avoid redundant API calls\n# Maps folder_id -> (folder_name, parent_id)\n_folder_cache: dict[str, tuple[str, str | None]] = {}\n\n\ndef _get_folder_info(\n    service: GoogleDriveService, folder_id: str\n) -> tuple[str, str | None]:\n    \"\"\"Fetch folder name and parent ID, with caching.\"\"\"\n    if folder_id in _folder_cache:\n        return _folder_cache[folder_id]\n\n    try:\n        folder = (\n            service.files()\n            .get(\n                fileId=folder_id,\n                fields=\"name, parents\",\n                supportsAllDrives=True,\n            )\n            .execute()\n        )\n        folder_name = folder.get(\"name\", \"Unknown\")\n        parents = folder.get(\"parents\", [])\n        parent_id = parents[0] if parents else None\n        _folder_cache[folder_id] = (folder_name, parent_id)\n        return folder_name, parent_id\n    except HttpError as e:\n        logger.warning(f\"Failed to get folder info for {folder_id}: {e}\")\n        _folder_cache[folder_id] = (\"Unknown\", None)\n        return \"Unknown\", None\n\n\ndef _get_drive_name(service: GoogleDriveService, drive_id: str) -> str:\n    \"\"\"Fetch shared drive name.\"\"\"\n    cache_key = f\"drive_{drive_id}\"\n    if cache_key in _folder_cache:\n        return _folder_cache[cache_key][0]\n\n    try:\n        drive = service.drives().get(driveId=drive_id).execute()\n        drive_name = drive.get(\"name\", f\"Shared Drive {drive_id}\")\n        _folder_cache[cache_key] = (drive_name, None)\n        return drive_name\n    except HttpError as e:\n        logger.warning(f\"Failed to get drive name for {drive_id}: {e}\")\n        _folder_cache[cache_key] = (f\"Shared Drive {drive_id}\", None)\n        return f\"Shared Drive {drive_id}\"\n\n\ndef build_folder_path(\n    file: GoogleDriveFileType,\n    service: GoogleDriveService,\n    drive_id: str | None = None,\n    user_email: str | None = None,\n) -> list[str]:\n    \"\"\"\n    Build the full folder path for a file by walking up the parent chain.\n    Returns a list of folder names from root to immediate parent.\n\n    Args:\n        file: The Google Drive file object\n        service: Google Drive service instance\n        drive_id: Optional drive ID (will be extracted from file if not provided)\n        user_email: Optional user email to check ownership for \"My Drive\" vs \"Shared with me\"\n    \"\"\"\n    path_parts: list[str] = []\n\n    # Get drive_id from file if not provided\n    if drive_id is None:\n        drive_id = file.get(\"driveId\")\n\n    # Check if file is owned by the user (for distinguishing \"My Drive\" vs \"Shared with me\")\n    is_owned_by_user = False\n    if user_email:\n        owners = file.get(\"owners\", [])\n        is_owned_by_user = any(\n            owner.get(\"emailAddress\", \"\").lower() == user_email.lower()\n            for owner in owners\n        )\n\n    # Get the file's parent folder ID\n    parents = file.get(\"parents\", [])\n    if not parents:\n        # File is at root level\n        if drive_id:\n            return [_get_drive_name(service, drive_id)]\n        # If not in a shared drive, check if it's owned by the user\n        if is_owned_by_user:\n            return [\"My Drive\"]\n        else:\n            return [\"Shared with me\"]\n\n    parent_id: str | None = parents[0]\n\n    # Walk up the folder hierarchy (limit to 50 levels to prevent infinite loops)\n    visited: set[str] = set()\n    for _ in range(50):\n        if not parent_id or parent_id in visited:\n            break\n        visited.add(parent_id)\n\n        folder_name, next_parent = _get_folder_info(service, parent_id)\n\n        # Check if we've reached the root (parent is the drive itself or no parent)\n        if next_parent is None:\n            # This folder's name is either the drive root, My Drive, or Shared with me\n            if drive_id:\n                path_parts.insert(0, _get_drive_name(service, drive_id))\n            else:\n                # Not in a shared drive - determine if it's \"My Drive\" or \"Shared with me\"\n                if is_owned_by_user:\n                    path_parts.insert(0, \"My Drive\")\n                else:\n                    path_parts.insert(0, \"Shared with me\")\n            break\n        else:\n            path_parts.insert(0, folder_name)\n            parent_id = next_parent\n\n    # If we didn't find a root, determine the root based on ownership and drive\n    if not path_parts:\n        if drive_id:\n            return [_get_drive_name(service, drive_id)]\n        elif is_owned_by_user:\n            return [\"My Drive\"]\n        else:\n            return [\"Shared with me\"]\n\n    return path_parts\n\n\n# This is not a standard valid unicode char, it is used by the docs advanced API to\n# represent smart chips (elements like dates and doc links).\nSMART_CHIP_CHAR = \"\\ue907\"\nWEB_VIEW_LINK_KEY = \"webViewLink\"\n# Fallback templates for generating web links when Drive omits webViewLink.\n_FALLBACK_WEB_VIEW_LINK_TEMPLATES = {\n    GDriveMimeType.DOC.value: \"https://docs.google.com/document/d/{}/view\",\n    GDriveMimeType.SPREADSHEET.value: \"https://docs.google.com/spreadsheets/d/{}/view\",\n    GDriveMimeType.PPT.value: \"https://docs.google.com/presentation/d/{}/view\",\n}\n\nMAX_RETRIEVER_EMAILS = 20\nCHUNK_SIZE_BUFFER = 64  # extra bytes past the limit to read\n\n# Mapping of Google Drive mime types to export formats\nGOOGLE_MIME_TYPES_TO_EXPORT = {\n    GDriveMimeType.DOC.value: \"text/plain\",\n    GDriveMimeType.SPREADSHEET.value: \"text/csv\",\n    GDriveMimeType.PPT.value: \"text/plain\",\n}\n\n# Define Google MIME types mapping\nGOOGLE_MIME_TYPES = {\n    GDriveMimeType.DOC.value: \"text/plain\",\n    GDriveMimeType.SPREADSHEET.value: \"text/csv\",\n    GDriveMimeType.PPT.value: \"text/plain\",\n}\n\n\nclass PermissionSyncContext(BaseModel):\n    \"\"\"\n    This is the information that is needed to sync permissions for a document.\n    \"\"\"\n\n    primary_admin_email: str\n    google_domain: str\n\n\ndef onyx_document_id_from_drive_file(file: GoogleDriveFileType) -> str:\n    link = file.get(WEB_VIEW_LINK_KEY)\n    if not link:\n        file_id = file.get(\"id\")\n        if not file_id:\n            raise KeyError(\n                f\"Google Drive file missing both '{WEB_VIEW_LINK_KEY}' and 'id' fields.\"\n            )\n        mime_type = file.get(\"mimeType\", \"\")\n        template = _FALLBACK_WEB_VIEW_LINK_TEMPLATES.get(mime_type)\n        if template is None:\n            link = f\"https://drive.google.com/file/d/{file_id}/view\"\n        else:\n            link = template.format(file_id)\n        logger.debug(\n            \"Missing webViewLink for Google Drive file with id %s. Falling back to constructed link %s\",\n            file_id,\n            link,\n        )\n    parsed_url = urlparse(link)\n    parsed_url = parsed_url._replace(query=\"\")  # remove query parameters\n    spl_path = parsed_url.path.split(\"/\")\n    if spl_path and (spl_path[-1] in [\"edit\", \"view\", \"preview\"]):\n        spl_path.pop()\n        parsed_url = parsed_url._replace(path=\"/\".join(spl_path))\n    # Remove query parameters and reconstruct URL\n    return urlunparse(parsed_url)\n\n\ndef download_request(\n    service: GoogleDriveService, file_id: str, size_threshold: int\n) -> bytes:\n    \"\"\"\n    Download the file from Google Drive.\n    \"\"\"\n    # For other file types, download the file\n    # Use the correct API call for downloading files\n    request = service.files().get_media(fileId=file_id)\n    return _download_request(request, file_id, size_threshold)\n\n\n_DOWNLOAD_NUM_RETRIES = 3\n\n\ndef _download_request(request: Any, file_id: str, size_threshold: int) -> bytes:\n    response_bytes = io.BytesIO()\n    downloader = MediaIoBaseDownload(\n        response_bytes, request, chunksize=size_threshold + CHUNK_SIZE_BUFFER\n    )\n    done = False\n    while not done:\n        # num_retries enables automatic retry with exponential backoff for transient errors\n        download_progress, done = downloader.next_chunk(\n            num_retries=_DOWNLOAD_NUM_RETRIES\n        )\n        if download_progress.resumable_progress > size_threshold:\n            logger.warning(\n                f\"File {file_id} exceeds size threshold of {size_threshold}. Skipping2.\"\n            )\n            return bytes()\n\n    response = response_bytes.getvalue()\n    if not response:\n        logger.warning(f\"Failed to download {file_id}\")\n        return bytes()\n    return response\n\n\ndef _download_and_extract_sections_basic(\n    file: dict[str, str],\n    service: GoogleDriveService,\n    allow_images: bool,\n    size_threshold: int,\n) -> list[TextSection | ImageSection]:\n    \"\"\"Extract text and images from a Google Drive file.\"\"\"\n    file_id = file[\"id\"]\n    file_name = file[\"name\"]\n    mime_type = file[\"mimeType\"]\n    link = file.get(WEB_VIEW_LINK_KEY, \"\")\n\n    # For non-Google files, download the file\n    # Use the correct API call for downloading files\n    # lazy evaluation to only download the file if necessary\n    def response_call() -> bytes:\n        return download_request(service, file_id, size_threshold)\n\n    if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:\n        # Skip images if not explicitly enabled\n        if not allow_images:\n            return []\n\n        # Store images for later processing\n        sections: list[TextSection | ImageSection] = []\n        try:\n            section, embedded_id = store_image_and_create_section(\n                image_data=response_call(),\n                file_id=file_id,\n                display_name=file_name,\n                media_type=mime_type,\n                file_origin=FileOrigin.CONNECTOR,\n                link=link,\n            )\n            sections.append(section)\n        except Exception as e:\n            logger.error(f\"Failed to process image {file_name}: {e}\")\n        return sections\n\n    # For Google Docs, Sheets, and Slides, export as plain text\n    if mime_type in GOOGLE_MIME_TYPES_TO_EXPORT:\n        export_mime_type = GOOGLE_MIME_TYPES_TO_EXPORT[mime_type]\n        # Use the correct API call for exporting files\n        request = service.files().export_media(\n            fileId=file_id, mimeType=export_mime_type\n        )\n        response = _download_request(request, file_id, size_threshold)\n        if not response:\n            logger.warning(f\"Failed to export {file_name} as {export_mime_type}\")\n            return []\n\n        text = response.decode(\"utf-8\")\n        return [TextSection(link=link, text=text)]\n\n    # Process based on mime type\n    if mime_type == \"text/plain\":\n        try:\n            text = response_call().decode(\"utf-8\")\n            return [TextSection(link=link, text=text)]\n        except UnicodeDecodeError as e:\n            logger.warning(f\"Failed to extract text from {file_name}: {e}\")\n            return []\n\n    elif (\n        mime_type\n        == \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"\n    ):\n        text, _ = read_docx_file(io.BytesIO(response_call()))\n        return [TextSection(link=link, text=text)]\n\n    elif (\n        mime_type == \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n    ):\n        text = xlsx_to_text(io.BytesIO(response_call()), file_name=file_name)\n        return [TextSection(link=link, text=text)] if text else []\n\n    elif (\n        mime_type\n        == \"application/vnd.openxmlformats-officedocument.presentationml.presentation\"\n    ):\n        text = pptx_to_text(io.BytesIO(response_call()), file_name=file_name)\n        return [TextSection(link=link, text=text)] if text else []\n\n    elif mime_type == \"application/pdf\":\n        text, _pdf_meta, images = read_pdf_file(io.BytesIO(response_call()))\n        pdf_sections: list[TextSection | ImageSection] = [\n            TextSection(link=link, text=text)\n        ]\n\n        # Process embedded images in the PDF\n        try:\n            for idx, (img_data, img_name) in enumerate(images):\n                section, embedded_id = store_image_and_create_section(\n                    image_data=img_data,\n                    file_id=f\"{file_id}_img_{idx}\",\n                    display_name=img_name or f\"{file_name} - image {idx}\",\n                    file_origin=FileOrigin.CONNECTOR,\n                )\n                pdf_sections.append(section)\n        except Exception as e:\n            logger.error(f\"Failed to process PDF images in {file_name}: {e}\")\n        return pdf_sections\n\n    # Final attempt at extracting text\n    file_ext = get_file_ext(file.get(\"name\", \"\"))\n    if file_ext not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:\n        logger.warning(f\"Skipping file {file.get('name')} due to extension.\")\n        return []\n\n    try:\n        text = extract_file_text(io.BytesIO(response_call()), file_name)\n        return [TextSection(link=link, text=text)]\n    except Exception as e:\n        logger.warning(f\"Failed to extract text from {file_name}: {e}\")\n        return []\n\n\ndef _find_nth(haystack: str, needle: str, n: int, start: int = 0) -> int:\n    start = haystack.find(needle, start)\n    while start >= 0 and n > 1:\n        start = haystack.find(needle, start + len(needle))\n        n -= 1\n    return start\n\n\ndef align_basic_advanced(\n    basic_sections: list[TextSection | ImageSection], adv_sections: list[TextSection]\n) -> list[TextSection | ImageSection]:\n    \"\"\"Align the basic sections with the advanced sections.\n    In particular, the basic sections contain all content of the file,\n    including smart chips like dates and doc links. The advanced sections\n    are separated by section headers and contain header-based links that\n    improve user experience when they click on the source in the UI.\n\n    There are edge cases in text matching (i.e. the heading is a smart chip or\n    there is a smart chip in the doc with text containing the actual heading text)\n    that make the matching imperfect; this is hence done on a best-effort basis.\n    \"\"\"\n    if len(adv_sections) <= 1:\n        return basic_sections  # no benefit from aligning\n\n    basic_full_text = \"\".join(\n        [section.text for section in basic_sections if isinstance(section, TextSection)]\n    )\n    new_sections: list[TextSection | ImageSection] = []\n    heading_start = 0\n    for adv_ind in range(1, len(adv_sections)):\n        heading = adv_sections[adv_ind].text.split(HEADING_DELIMITER)[0]\n        # retrieve the longest part of the heading that is not a smart chip\n        heading_key = max(heading.split(SMART_CHIP_CHAR), key=len).strip()\n        if heading_key == \"\":\n            logger.warning(\n                f\"Cannot match heading: {heading}, its link will come from the following section\"\n            )\n            continue\n        heading_offset = heading.find(heading_key)\n\n        # count occurrences of heading str in previous section\n        heading_count = adv_sections[adv_ind - 1].text.count(heading_key)\n\n        prev_start = heading_start\n        heading_start = (\n            _find_nth(basic_full_text, heading_key, heading_count, start=prev_start)\n            - heading_offset\n        )\n        if heading_start < 0:\n            logger.warning(\n                f\"Heading key {heading_key} from heading {heading} not found in basic text\"\n            )\n            heading_start = prev_start\n            continue\n\n        new_sections.append(\n            TextSection(\n                link=adv_sections[adv_ind - 1].link,\n                text=basic_full_text[prev_start:heading_start],\n            )\n        )\n\n    # handle last section\n    new_sections.append(\n        TextSection(link=adv_sections[-1].link, text=basic_full_text[heading_start:])\n    )\n    return new_sections\n\n\ndef _get_external_access_for_raw_gdrive_file(\n    file: GoogleDriveFileType,\n    company_domain: str,\n    retriever_drive_service: GoogleDriveService | None,\n    admin_drive_service: GoogleDriveService,\n    fallback_user_email: str,\n    add_prefix: bool = False,\n) -> ExternalAccess:\n    \"\"\"\n    Get the external access for a raw Google Drive file.\n\n    add_prefix: When True, prefix group IDs with source type (for indexing path).\n               When False (default), leave unprefixed (for permission sync path\n               where upsert_document_external_perms handles prefixing).\n    fallback_user_email: When permission info can't be retrieved (e.g. externally-owned\n               files), fall back to granting access to this user.\n    \"\"\"\n    external_access_fn = cast(\n        Callable[\n            [\n                GoogleDriveFileType,\n                str,\n                GoogleDriveService | None,\n                GoogleDriveService,\n                str,\n                bool,\n            ],\n            ExternalAccess,\n        ],\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.external_permissions.google_drive.doc_sync\",\n            \"get_external_access_for_raw_gdrive_file\",\n            fallback=noop_fallback,\n        ),\n    )\n    return external_access_fn(\n        file,\n        company_domain,\n        retriever_drive_service,\n        admin_drive_service,\n        fallback_user_email,\n        add_prefix,\n    )\n\n\ndef convert_drive_item_to_document(\n    creds: Any,\n    allow_images: bool,\n    size_threshold: int,\n    # if not specified, we will not sync permissions\n    # will also be a no-op if EE is not enabled\n    permission_sync_context: PermissionSyncContext | None,\n    retriever_emails: list[str],\n    file: GoogleDriveFileType,\n) -> Document | ConnectorFailure | None:\n    \"\"\"\n    Attempt to convert a drive item to a document with each retriever email\n    in order. returns upon a successful retrieval or a non-403 error.\n\n    We used to always get the user email from the file owners when available,\n    but this was causing issues with shared folders where the owner was not included in the service account\n    now we use the email of the account that successfully listed the file. There are cases where a\n    user that can list a file cannot download it, so we retry with file owners and admin email.\n    \"\"\"\n    first_error = None\n    doc_or_failure = None\n    retriever_emails = retriever_emails[:MAX_RETRIEVER_EMAILS]\n    # use seen instead of list(set()) to avoid re-ordering the retriever emails\n    seen = set()\n    for retriever_email in retriever_emails:\n        if retriever_email in seen:\n            continue\n        seen.add(retriever_email)\n        doc_or_failure = _convert_drive_item_to_document(\n            creds,\n            allow_images,\n            size_threshold,\n            retriever_email,\n            file,\n            permission_sync_context,\n        )\n\n        # There are a variety of permissions-based errors that occasionally occur\n        # when retrieving files. Often when these occur, there is another user\n        # that can successfully retrieve the file, so we try the next user.\n        if (\n            doc_or_failure is None\n            or isinstance(doc_or_failure, Document)\n            or not (\n                isinstance(doc_or_failure.exception, HttpError)\n                and doc_or_failure.exception.status_code in [401, 403, 404]\n            )\n        ):\n            return doc_or_failure\n\n        if first_error is None:\n            first_error = doc_or_failure\n        else:\n            first_error.failure_message += f\"\\n\\n{doc_or_failure.failure_message}\"\n\n    if (\n        first_error\n        and isinstance(first_error.exception, HttpError)\n        and first_error.exception.status_code == 403\n    ):\n        # This SHOULD happen very rarely, and we don't want to break the indexing process when\n        # a high volume of 403s occurs early. We leave a verbose log to help investigate.\n        logger.error(\n            f\"Skipping file id: {file.get('id')} name: {file.get('name')} due to 403 error.\"\n            f\"Attempted to retrieve with {retriever_emails},\"\n            f\"got the following errors: {first_error.failure_message}\"\n        )\n        return None\n    return first_error\n\n\ndef _convert_drive_item_to_document(\n    creds: Any,\n    allow_images: bool,\n    size_threshold: int,\n    retriever_email: str,\n    file: GoogleDriveFileType,\n    # if not specified, we will not sync permissions\n    # will also be a no-op if EE is not enabled\n    permission_sync_context: PermissionSyncContext | None,\n) -> Document | ConnectorFailure | None:\n    \"\"\"\n    Main entry point for converting a Google Drive file => Document object.\n    \"\"\"\n    sections: list[TextSection | ImageSection] = []\n\n    # Only construct these services when needed\n    def _get_drive_service() -> GoogleDriveService:\n        return get_drive_service(creds, user_email=retriever_email)\n\n    def _get_docs_service() -> GoogleDocsService:\n        return get_google_docs_service(creds, user_email=retriever_email)\n\n    doc_id = \"unknown\"\n\n    try:\n        # skip shortcuts or folders\n        if file.get(\"mimeType\") in [DRIVE_SHORTCUT_TYPE, DRIVE_FOLDER_TYPE]:\n            logger.info(\"Skipping shortcut/folder.\")\n            return None\n\n        size_str = file.get(\"size\")\n        if size_str:\n            try:\n                size_int = int(size_str)\n            except ValueError:\n                logger.warning(f\"Parsing string to int failed: size_str={size_str}\")\n            else:\n                if size_int > size_threshold:\n                    logger.warning(\n                        f\"{file.get('name')} exceeds size threshold of {size_threshold}. Skipping.\"\n                    )\n                    return None\n\n        # If it's a Google Doc, we might do advanced parsing\n        if file.get(\"mimeType\") == GDriveMimeType.DOC.value:\n            try:\n                logger.debug(f\"starting advanced parsing for {file.get('name')}\")\n                # get_document_sections is the advanced approach for Google Docs\n                doc_sections = get_document_sections(\n                    docs_service=_get_docs_service(),\n                    doc_id=file.get(\"id\", \"\"),\n                )\n                if doc_sections:\n                    sections = cast(list[TextSection | ImageSection], doc_sections)\n                    if any(SMART_CHIP_CHAR in section.text for section in doc_sections):\n                        logger.debug(\n                            f\"found smart chips in {file.get('name')}, aligning with basic sections\"\n                        )\n                        basic_sections = _download_and_extract_sections_basic(\n                            file, _get_drive_service(), allow_images, size_threshold\n                        )\n                        sections = align_basic_advanced(basic_sections, doc_sections)\n\n            except Exception as e:\n                logger.warning(\n                    f\"Error in advanced parsing: {e}. Falling back to basic extraction.\"\n                )\n        # Not Google Doc, attempt basic extraction\n        else:\n            sections = _download_and_extract_sections_basic(\n                file, _get_drive_service(), allow_images, size_threshold\n            )\n\n        # If we still don't have any sections, skip this file\n        if not sections:\n            logger.warning(f\"No content extracted from {file.get('name')}. Skipping.\")\n            return None\n\n        doc_id = onyx_document_id_from_drive_file(file)\n        external_access = (\n            _get_external_access_for_raw_gdrive_file(\n                file=file,\n                company_domain=permission_sync_context.google_domain,\n                # try both retriever_email and primary_admin_email if necessary\n                retriever_drive_service=_get_drive_service(),\n                admin_drive_service=get_drive_service(\n                    creds, user_email=permission_sync_context.primary_admin_email\n                ),\n                add_prefix=True,  # Indexing path - prefix here\n                fallback_user_email=retriever_email,\n            )\n            if permission_sync_context\n            else None\n        )\n\n        # Build doc_metadata with hierarchy information\n        file_name = file.get(\"name\", \"\")\n        mime_type = file.get(\"mimeType\", \"\")\n        drive_id = file.get(\"driveId\")\n\n        # Build full folder path by walking up the parent chain\n        # Pass retriever_email to determine if file is in \"My Drive\" vs \"Shared with me\"\n        source_path = build_folder_path(\n            file, _get_drive_service(), drive_id, retriever_email\n        )\n\n        doc_metadata = {\n            \"hierarchy\": {\n                \"source_path\": source_path,\n                \"drive_id\": drive_id,\n                \"file_name\": file_name,\n                \"mime_type\": mime_type,\n            }\n        }\n\n        # Create the document\n        return Document(\n            id=doc_id,\n            sections=sections,\n            source=DocumentSource.GOOGLE_DRIVE,\n            semantic_identifier=file_name,\n            doc_metadata=doc_metadata,\n            metadata={\n                \"owner_names\": \", \".join(\n                    owner.get(\"displayName\", \"\") for owner in file.get(\"owners\", [])\n                ),\n            },\n            doc_updated_at=datetime.fromisoformat(\n                file.get(\"modifiedTime\", \"\").replace(\"Z\", \"+00:00\")\n            ),\n            external_access=external_access,\n            parent_hierarchy_raw_node_id=(file.get(\"parents\") or [None])[0],\n        )\n    except Exception as e:\n        doc_id = \"unknown\"\n        try:\n            doc_id = onyx_document_id_from_drive_file(file)\n        except Exception as e2:\n            logger.warning(f\"Error getting document id from file: {e2}\")\n\n        file_name = file.get(\"name\")\n        error_str = (\n            f\"Error converting file '{file_name}' to Document as {retriever_email}: {e}\"\n        )\n        if isinstance(e, HttpError) and e.status_code == 403:\n            logger.warning(\n                f\"Uncommon permissions error while downloading file. User \"\n                f\"{retriever_email} was able to see file {file_name} \"\n                \"but cannot download it.\"\n            )\n            logger.warning(error_str)\n\n        return ConnectorFailure(\n            failed_document=DocumentFailure(\n                document_id=doc_id,\n                document_link=(\n                    sections[0].link if sections else None\n                ),  # TODO: see if this is the best way to get a link\n            ),\n            failed_entity=None,\n            failure_message=error_str,\n            exception=e,\n        )\n\n\ndef build_slim_document(\n    creds: Any,\n    file: GoogleDriveFileType,\n    # if not specified, we will not sync permissions\n    # will also be a no-op if EE is not enabled\n    permission_sync_context: PermissionSyncContext | None,\n    retriever_email: str,\n) -> SlimDocument | None:\n    if file.get(\"mimeType\") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:\n        return None\n\n    owner_email = cast(str | None, file.get(\"owners\", [{}])[0].get(\"emailAddress\"))\n    external_access = (\n        _get_external_access_for_raw_gdrive_file(\n            file=file,\n            company_domain=permission_sync_context.google_domain,\n            retriever_drive_service=(\n                get_drive_service(\n                    creds,\n                    user_email=owner_email,\n                )\n                if owner_email\n                else None\n            ),\n            admin_drive_service=get_drive_service(\n                creds,\n                user_email=permission_sync_context.primary_admin_email,\n            ),\n            fallback_user_email=retriever_email,\n        )\n        if permission_sync_context\n        else None\n    )\n    return SlimDocument(\n        id=onyx_document_id_from_drive_file(file),\n        external_access=external_access,\n        parent_hierarchy_raw_node_id=(file.get(\"parents\") or [None])[0],\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/file_retrieval.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\nfrom typing import cast\nfrom urllib.parse import parse_qs\nfrom urllib.parse import urlparse\n\nfrom googleapiclient.discovery import Resource  # type: ignore\nfrom googleapiclient.errors import HttpError  # type: ignore\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE\nfrom onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE\nfrom onyx.connectors.google_drive.models import DriveRetrievalStage\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.connectors.google_drive.models import RetrievedDriveFile\nfrom onyx.connectors.google_utils.google_utils import execute_paginated_retrieval\nfrom onyx.connectors.google_utils.google_utils import (\n    execute_paginated_retrieval_with_max_pages,\n)\nfrom onyx.connectors.google_utils.google_utils import GoogleFields\nfrom onyx.connectors.google_utils.google_utils import ORDER_BY_KEY\nfrom onyx.connectors.google_utils.google_utils import PAGE_TOKEN_KEY\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom onyx.utils.variable_functionality import noop_fallback\n\n\nlogger = setup_logger()\n\n\nclass DriveFileFieldType(Enum):\n    \"\"\"Enum to specify which fields to retrieve from Google Drive files\"\"\"\n\n    SLIM = \"slim\"  # Minimal fields for basic file info\n    STANDARD = \"standard\"  # Standard fields including content metadata\n    WITH_PERMISSIONS = \"with_permissions\"  # Full fields including permissions\n\n\nPERMISSION_FULL_DESCRIPTION = (\n    \"permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails)\"\n)\nFILE_FIELDS = (\n    \"nextPageToken, files(mimeType, id, name, driveId, parents, \"\n    \"modifiedTime, webViewLink, shortcutDetails, owners(emailAddress), size)\"\n)\nFILE_FIELDS_WITH_PERMISSIONS = (\n    f\"nextPageToken, files(mimeType, id, name, driveId, parents, {PERMISSION_FULL_DESCRIPTION}, permissionIds, \"\n    \"modifiedTime, webViewLink, shortcutDetails, owners(emailAddress), size)\"\n)\nSLIM_FILE_FIELDS = (\n    f\"nextPageToken, files(mimeType, driveId, id, name, parents, {PERMISSION_FULL_DESCRIPTION}, \"\n    \"permissionIds, webViewLink, owners(emailAddress), modifiedTime)\"\n)\nFOLDER_FIELDS = \"nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)\"\n\nHIERARCHY_FIELDS = \"id, name, parents, webViewLink, mimeType, driveId\"\n\nHIERARCHY_FIELDS_WITH_PERMISSIONS = (\n    \"id, name, parents, webViewLink, mimeType, permissionIds, driveId\"\n)\n\n\ndef generate_time_range_filter(\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n) -> str:\n    time_range_filter = \"\"\n    if start is not None:\n        time_start = datetime.fromtimestamp(start, tz=timezone.utc).isoformat()\n        time_range_filter += (\n            f\" and {GoogleFields.MODIFIED_TIME.value} >= '{time_start}'\"\n        )\n    if end is not None:\n        time_stop = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()\n        time_range_filter += f\" and {GoogleFields.MODIFIED_TIME.value} <= '{time_stop}'\"\n    return time_range_filter\n\n\nLINK_ONLY_PERMISSION_TYPES = {\"domain\", \"anyone\"}\n\n\ndef has_link_only_permission(file: GoogleDriveFileType) -> bool:\n    \"\"\"\n    Return True if any permission requires a direct link to access\n    (allowFileDiscovery is explicitly false for supported types).\n    \"\"\"\n    permissions = file.get(\"permissions\") or []\n    for permission in permissions:\n        if permission.get(\"type\") not in LINK_ONLY_PERMISSION_TYPES:\n            continue\n        if permission.get(\"allowFileDiscovery\") is False:\n            return True\n    return False\n\n\ndef _get_folders_in_parent(\n    service: Resource,\n    parent_id: str | None = None,\n) -> Iterator[GoogleDriveFileType]:\n    # Follow shortcuts to folders\n    query = f\"(mimeType = '{DRIVE_FOLDER_TYPE}' or mimeType = '{DRIVE_SHORTCUT_TYPE}')\"\n    query += \" and trashed = false\"\n\n    if parent_id:\n        query += f\" and '{parent_id}' in parents\"\n\n    for file in execute_paginated_retrieval(\n        retrieval_function=service.files().list,\n        list_key=\"files\",\n        continue_on_404_or_403=True,\n        corpora=\"allDrives\",\n        supportsAllDrives=True,\n        includeItemsFromAllDrives=True,\n        fields=FOLDER_FIELDS,\n        q=query,\n    ):\n        yield file\n\n\ndef get_folder_metadata(\n    service: Resource,\n    folder_id: str,\n    field_type: DriveFileFieldType,\n) -> GoogleDriveFileType | None:\n    \"\"\"Fetch metadata for a folder by ID.\"\"\"\n    fields = _get_hierarchy_fields_for_file_type(field_type)\n    try:\n        return (\n            service.files()\n            .get(\n                fileId=folder_id,\n                fields=fields,\n                supportsAllDrives=True,\n            )\n            .execute()\n        )\n    except HttpError as e:\n        if e.resp.status in (403, 404):\n            logger.debug(f\"Cannot access folder {folder_id}: {e}\")\n        else:\n            raise e\n    return None\n\n\ndef _get_hierarchy_fields_for_file_type(field_type: DriveFileFieldType) -> str:\n    if field_type == DriveFileFieldType.WITH_PERMISSIONS:\n        return HIERARCHY_FIELDS_WITH_PERMISSIONS\n    else:\n        return HIERARCHY_FIELDS\n\n\ndef get_shared_drive_name(\n    service: Resource,\n    drive_id: str,\n) -> str | None:\n    \"\"\"Fetch the actual name of a shared drive via the drives().get() API.\n\n    The files().get() API returns 'Drive' as the name for shared drive root\n    folders. Only drives().get() returns the real user-assigned name.\n    \"\"\"\n    try:\n        drive = service.drives().get(driveId=drive_id, fields=\"name\").execute()\n        return drive.get(\"name\")\n    except HttpError as e:\n        if e.resp.status in (403, 404):\n            logger.debug(f\"Cannot access drive {drive_id}: {e}\")\n        else:\n            raise\n    return None\n\n\ndef get_external_access_for_folder(\n    folder: GoogleDriveFileType,\n    google_domain: str,\n    drive_service: GoogleDriveService,\n    add_prefix: bool = False,\n) -> ExternalAccess:\n    \"\"\"\n    Extract ExternalAccess from a folder's permissions.\n\n    This fetches permissions using the Drive API (via permissionIds) and extracts\n    user emails, group emails, and public access status.\n\n    Uses the EE implementation if available, otherwise returns public access\n    (fallback for non-EE deployments).\n\n    Args:\n        folder: The folder metadata from Google Drive API (must include permissionIds field)\n        google_domain: The company's Google Workspace domain (e.g., \"company.com\")\n        drive_service: Google Drive service for fetching permission details\n        add_prefix: When True, prefix group IDs with source type (for indexing path).\n                   When False (default), leave unprefixed (for permission sync path\n                   where upsert_document_external_perms handles prefixing).\n\n    Returns:\n        ExternalAccess with extracted permission info\n    \"\"\"\n    # Try to get the EE implementation\n    get_folder_access_fn = cast(\n        Callable[[GoogleDriveFileType, str, GoogleDriveService, bool], ExternalAccess],\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.external_permissions.google_drive.doc_sync\",\n            \"get_external_access_for_folder\",\n            noop_fallback,\n        ),\n    )\n\n    return get_folder_access_fn(folder, google_domain, drive_service, add_prefix)\n\n\ndef _get_fields_for_file_type(field_type: DriveFileFieldType) -> str:\n    \"\"\"Get the appropriate fields string based on the field type enum\"\"\"\n    if field_type == DriveFileFieldType.SLIM:\n        return SLIM_FILE_FIELDS\n    elif field_type == DriveFileFieldType.WITH_PERMISSIONS:\n        return FILE_FIELDS_WITH_PERMISSIONS\n    else:  # DriveFileFieldType.STANDARD\n        return FILE_FIELDS\n\n\ndef _get_files_in_parent(\n    service: Resource,\n    parent_id: str,\n    field_type: DriveFileFieldType,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n) -> Iterator[GoogleDriveFileType]:\n    query = f\"mimeType != '{DRIVE_FOLDER_TYPE}' and '{parent_id}' in parents\"\n    query += \" and trashed = false\"\n    query += generate_time_range_filter(start, end)\n\n    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}\n\n    for file in execute_paginated_retrieval(\n        retrieval_function=service.files().list,\n        list_key=\"files\",\n        continue_on_404_or_403=True,\n        corpora=\"allDrives\",\n        supportsAllDrives=True,\n        includeItemsFromAllDrives=True,\n        fields=_get_fields_for_file_type(field_type),\n        q=query,\n        **kwargs,\n    ):\n        yield file\n\n\ndef crawl_folders_for_files(\n    service: Resource,\n    parent_id: str,\n    field_type: DriveFileFieldType,\n    user_email: str,\n    traversed_parent_ids: set[str],\n    update_traversed_ids_func: Callable[[str], None],\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n) -> Iterator[RetrievedDriveFile]:\n    \"\"\"\n    This function starts crawling from any folder. It is slower though.\n    \"\"\"\n    logger.info(\"Entered crawl_folders_for_files with parent_id: \" + parent_id)\n    if parent_id not in traversed_parent_ids:\n        logger.info(\"Parent id not in traversed parent ids, getting files\")\n        found_files = False\n        file = {}\n        try:\n            for file in _get_files_in_parent(\n                service=service,\n                parent_id=parent_id,\n                field_type=field_type,\n                start=start,\n                end=end,\n            ):\n                logger.info(f\"Found file: {file['name']}, user email: {user_email}\")\n                found_files = True\n                yield RetrievedDriveFile(\n                    drive_file=file,\n                    user_email=user_email,\n                    parent_id=parent_id,\n                    completion_stage=DriveRetrievalStage.FOLDER_FILES,\n                )\n            # Only mark a folder as done if it was fully traversed without errors\n            # This usually indicates that the owner of the folder was impersonated.\n            # In cases where this never happens, most likely the folder owner is\n            # not part of the google workspace in question (or for oauth, the authenticated\n            # user doesn't own the folder)\n            if found_files:\n                update_traversed_ids_func(parent_id)\n        except Exception as e:\n            if isinstance(e, HttpError) and e.status_code == 403:\n                # don't yield an error here because this is expected behavior\n                # when a user doesn't have access to a folder\n                logger.debug(f\"Error getting files in parent {parent_id}: {e}\")\n            else:\n                logger.error(f\"Error getting files in parent {parent_id}: {e}\")\n                yield RetrievedDriveFile(\n                    drive_file=file,\n                    user_email=user_email,\n                    parent_id=parent_id,\n                    completion_stage=DriveRetrievalStage.FOLDER_FILES,\n                    error=e,\n                )\n    else:\n        logger.info(f\"Skipping subfolder files since already traversed: {parent_id}\")\n\n    for subfolder in _get_folders_in_parent(\n        service=service,\n        parent_id=parent_id,\n    ):\n        logger.info(\"Fetching all files in subfolder: \" + subfolder[\"name\"])\n        yield from crawl_folders_for_files(\n            service=service,\n            parent_id=subfolder[\"id\"],\n            field_type=field_type,\n            user_email=user_email,\n            traversed_parent_ids=traversed_parent_ids,\n            update_traversed_ids_func=update_traversed_ids_func,\n            start=start,\n            end=end,\n        )\n\n\ndef get_files_in_shared_drive(\n    service: Resource,\n    drive_id: str,\n    field_type: DriveFileFieldType,\n    max_num_pages: int,\n    update_traversed_ids_func: Callable[[str], None] = lambda _: None,\n    cache_folders: bool = True,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n    page_token: str | None = None,\n) -> Iterator[GoogleDriveFileType | str]:\n    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}\n    if page_token:\n        logger.info(f\"Using page token: {page_token}\")\n        kwargs[PAGE_TOKEN_KEY] = page_token\n\n    if cache_folders:\n        # If we know we are going to folder crawl later, we can cache the folders here\n        # Get all folders being queried and add them to the traversed set\n        folder_query = f\"mimeType = '{DRIVE_FOLDER_TYPE}'\"\n        folder_query += \" and trashed = false\"\n        for folder in execute_paginated_retrieval(\n            retrieval_function=service.files().list,\n            list_key=\"files\",\n            continue_on_404_or_403=True,\n            corpora=\"drive\",\n            driveId=drive_id,\n            supportsAllDrives=True,\n            includeItemsFromAllDrives=True,\n            fields=\"nextPageToken, files(id)\",\n            q=folder_query,\n        ):\n            update_traversed_ids_func(folder[\"id\"])\n\n    # Get all files in the shared drive\n    file_query = f\"mimeType != '{DRIVE_FOLDER_TYPE}'\"\n    file_query += \" and trashed = false\"\n    file_query += generate_time_range_filter(start, end)\n\n    for file in execute_paginated_retrieval_with_max_pages(\n        retrieval_function=service.files().list,\n        max_num_pages=max_num_pages,\n        list_key=\"files\",\n        continue_on_404_or_403=True,\n        corpora=\"drive\",\n        driveId=drive_id,\n        supportsAllDrives=True,\n        includeItemsFromAllDrives=True,\n        fields=_get_fields_for_file_type(field_type),\n        q=file_query,\n        **kwargs,\n    ):\n        # If we found any files, mark this drive as traversed. When a user has access to a drive,\n        # they have access to all the files in the drive. Also not a huge deal if we re-traverse\n        # empty drives.\n        # NOTE: ^^ the above is not actually true due to folder restrictions:\n        # https://support.google.com/a/users/answer/12380484?hl=en\n        # So we may have to change this logic for people who use folder restrictions.\n        update_traversed_ids_func(drive_id)\n        yield file\n\n\ndef get_all_files_in_my_drive_and_shared(\n    service: GoogleDriveService,\n    update_traversed_ids_func: Callable,\n    field_type: DriveFileFieldType,\n    include_shared_with_me: bool,\n    max_num_pages: int,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n    cache_folders: bool = True,\n    page_token: str | None = None,\n) -> Iterator[GoogleDriveFileType | str]:\n    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}\n    if page_token:\n        logger.info(f\"Using page token: {page_token}\")\n        kwargs[PAGE_TOKEN_KEY] = page_token\n\n    if cache_folders:\n        # If we know we are going to folder crawl later, we can cache the folders here\n        # Get all folders being queried and add them to the traversed set\n        folder_query = f\"mimeType = '{DRIVE_FOLDER_TYPE}'\"\n        folder_query += \" and trashed = false\"\n        if not include_shared_with_me:\n            folder_query += \" and 'me' in owners\"\n        found_folders = False\n        for folder in execute_paginated_retrieval(\n            retrieval_function=service.files().list,\n            list_key=\"files\",\n            corpora=\"user\",\n            fields=_get_fields_for_file_type(field_type),\n            q=folder_query,\n        ):\n            update_traversed_ids_func(folder[GoogleFields.ID])\n            found_folders = True\n        if found_folders:\n            update_traversed_ids_func(get_root_folder_id(service))\n\n    # Then get the files\n    file_query = f\"mimeType != '{DRIVE_FOLDER_TYPE}'\"\n    file_query += \" and trashed = false\"\n    if not include_shared_with_me:\n        file_query += \" and 'me' in owners\"\n    file_query += generate_time_range_filter(start, end)\n    yield from execute_paginated_retrieval_with_max_pages(\n        retrieval_function=service.files().list,\n        max_num_pages=max_num_pages,\n        list_key=\"files\",\n        continue_on_404_or_403=False,\n        corpora=\"user\",\n        fields=_get_fields_for_file_type(field_type),\n        q=file_query,\n        **kwargs,\n    )\n\n\ndef get_all_files_for_oauth(\n    service: GoogleDriveService,\n    include_files_shared_with_me: bool,\n    include_my_drives: bool,\n    # One of the above 2 should be true\n    include_shared_drives: bool,\n    field_type: DriveFileFieldType,\n    max_num_pages: int,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n    page_token: str | None = None,\n) -> Iterator[GoogleDriveFileType | str]:\n    kwargs = {ORDER_BY_KEY: GoogleFields.MODIFIED_TIME.value}\n    if page_token:\n        logger.info(f\"Using page token: {page_token}\")\n        kwargs[PAGE_TOKEN_KEY] = page_token\n\n    should_get_all = (\n        include_shared_drives and include_my_drives and include_files_shared_with_me\n    )\n    corpora = \"allDrives\" if should_get_all else \"user\"\n\n    file_query = f\"mimeType != '{DRIVE_FOLDER_TYPE}'\"\n    file_query += \" and trashed = false\"\n    file_query += generate_time_range_filter(start, end)\n\n    if not should_get_all:\n        if include_files_shared_with_me and not include_my_drives:\n            file_query += \" and not 'me' in owners\"\n        if not include_files_shared_with_me and include_my_drives:\n            file_query += \" and 'me' in owners\"\n\n    yield from execute_paginated_retrieval_with_max_pages(\n        max_num_pages=max_num_pages,\n        retrieval_function=service.files().list,\n        list_key=\"files\",\n        continue_on_404_or_403=False,\n        corpora=corpora,\n        includeItemsFromAllDrives=should_get_all,\n        supportsAllDrives=should_get_all,\n        fields=_get_fields_for_file_type(field_type),\n        q=file_query,\n        **kwargs,\n    )\n\n\n# Just in case we need to get the root folder id\ndef get_root_folder_id(service: Resource) -> str:\n    # we dont paginate here because there is only one root folder per user\n    # https://developers.google.com/drive/api/guides/v2-to-v3-reference\n    return (\n        service.files()\n        .get(fileId=\"root\", fields=GoogleFields.ID.value)\n        .execute()[GoogleFields.ID.value]\n    )\n\n\ndef _extract_file_id_from_web_view_link(web_view_link: str) -> str:\n    parsed = urlparse(web_view_link)\n    path_parts = [part for part in parsed.path.split(\"/\") if part]\n\n    if \"d\" in path_parts:\n        idx = path_parts.index(\"d\")\n        if idx + 1 < len(path_parts):\n            return path_parts[idx + 1]\n\n    query_params = parse_qs(parsed.query)\n    for key in (\"id\", \"fileId\"):\n        value = query_params.get(key)\n        if value and value[0]:\n            return value[0]\n\n    raise ValueError(\n        f\"Unable to extract Drive file id from webViewLink: {web_view_link}\"\n    )\n\n\ndef get_file_by_web_view_link(\n    service: GoogleDriveService,\n    web_view_link: str,\n    fields: str,\n) -> GoogleDriveFileType:\n    \"\"\"Retrieve a Google Drive file using its webViewLink.\"\"\"\n    file_id = _extract_file_id_from_web_view_link(web_view_link)\n    return (\n        service.files()\n        .get(\n            fileId=file_id,\n            supportsAllDrives=True,\n            fields=fields,\n        )\n        .execute()\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/models.py",
    "content": "from enum import Enum\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom pydantic import field_serializer\nfrom pydantic import field_validator\n\nfrom onyx.connectors.interfaces import ConnectorCheckpoint\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.utils.threadpool_concurrency import ThreadSafeDict\nfrom onyx.utils.threadpool_concurrency import ThreadSafeSet\n\n\nclass GDriveMimeType(str, Enum):\n    DOC = \"application/vnd.google-apps.document\"\n    SPREADSHEET = \"application/vnd.google-apps.spreadsheet\"\n    SPREADSHEET_OPEN_FORMAT = (\n        \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n    )\n    SPREADSHEET_MS_EXCEL = \"application/vnd.ms-excel\"\n    PDF = \"application/pdf\"\n    WORD_DOC = \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"\n    PPT = \"application/vnd.google-apps.presentation\"\n    POWERPOINT = (\n        \"application/vnd.openxmlformats-officedocument.presentationml.presentation\"\n    )\n    PLAIN_TEXT = \"text/plain\"\n    MARKDOWN = \"text/markdown\"\n\n\nGoogleDriveFileType = dict[str, Any]\n\n\nTOKEN_EXPIRATION_TIME = 3600  # 1 hour\n\n\n# These correspond to The major stages of retrieval for google drive.\n# The stages for the oauth flow are:\n# get_all_files_for_oauth(),\n# get_all_drive_ids(),\n# get_files_in_shared_drive(),\n# crawl_folders_for_files()\n#\n# The stages for the service account flow are roughly:\n# get_all_user_emails(),\n# get_all_drive_ids(),\n# get_files_in_shared_drive(),\n# Then for each user:\n#   get_files_in_my_drive()\n#   get_files_in_shared_drive()\n#   crawl_folders_for_files()\nclass DriveRetrievalStage(str, Enum):\n    START = \"start\"\n    DONE = \"done\"\n    # OAuth specific stages\n    OAUTH_FILES = \"oauth_files\"\n\n    # Service account specific stages\n    USER_EMAILS = \"user_emails\"\n    MY_DRIVE_FILES = \"my_drive_files\"\n\n    # Used for both oauth and service account flows\n    DRIVE_IDS = \"drive_ids\"\n    SHARED_DRIVE_FILES = \"shared_drive_files\"\n    FOLDER_FILES = \"folder_files\"\n\n\nclass StageCompletion(BaseModel):\n    \"\"\"\n    Describes the point in the retrieval+indexing process that the\n    connector is at. completed_until is the timestamp of the latest\n    file that has been retrieved or error that has been yielded.\n    Optional fields are used for retrieval stages that need more information\n    for resuming than just the timestamp of the latest file.\n    \"\"\"\n\n    stage: DriveRetrievalStage\n    completed_until: SecondsSinceUnixEpoch\n    current_folder_or_drive_id: str | None = None\n    next_page_token: str | None = None\n\n    # only used for shared drives\n    processed_drive_ids: set[str] = set()\n\n    def update(\n        self,\n        stage: DriveRetrievalStage,\n        completed_until: SecondsSinceUnixEpoch,\n        current_folder_or_drive_id: str | None = None,\n    ) -> None:\n        self.stage = stage\n        self.completed_until = completed_until\n        self.current_folder_or_drive_id = current_folder_or_drive_id\n\n\nclass RetrievedDriveFile(BaseModel):\n    \"\"\"\n    Describes a file that has been retrieved from google drive.\n    user_email is the email of the user that the file was retrieved\n    by impersonating. If an error worthy of being reported is encountered,\n    error should be set and later propagated as a ConnectorFailure.\n    \"\"\"\n\n    # The stage at which this file was retrieved\n    completion_stage: DriveRetrievalStage\n\n    # The file that was retrieved\n    drive_file: GoogleDriveFileType\n\n    # The email of the user that the file was retrieved by impersonating\n    user_email: str\n\n    # The id of the parent folder or drive of the file\n    parent_id: str | None = None\n\n    # Any unexpected error that occurred while retrieving the file.\n    # In particular, this is not used for 403/404 errors, which are expected\n    # in the context of impersonating all the users to try to retrieve all\n    # files from all their Drives and Folders.\n    error: Exception | None = None\n\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n\nclass GoogleDriveCheckpoint(ConnectorCheckpoint):\n    # Checkpoint version of _retrieved_ids\n    retrieved_folder_and_drive_ids: set[str]\n\n    # Describes the point in the retrieval+indexing process that the\n    # checkpoint is at. when this is set to a given stage, the connector\n    # has finished yielding all values from the previous stage.\n    completion_stage: DriveRetrievalStage\n\n    # The latest timestamp of a file that has been retrieved per user email.\n    # StageCompletion is used to track the completion of each stage, but the\n    # timestamp part is not used for folder crawling.\n    completion_map: ThreadSafeDict[str, StageCompletion]\n\n    # all file ids that have been retrieved\n    all_retrieved_file_ids: set[str] = set()\n\n    # cached version of the drive and folder ids to retrieve\n    drive_ids_to_retrieve: list[str] | None = None\n    folder_ids_to_retrieve: list[str] | None = None\n\n    # cached user emails\n    user_emails: list[str] | None = None\n\n    # Hierarchy node raw IDs that have already been yielded.\n    # Used to avoid yielding duplicate hierarchy nodes across checkpoints.\n    # Thread-safe because multiple impersonation threads access this concurrently.\n    # Uses default_factory to ensure each checkpoint instance gets a fresh set.\n    seen_hierarchy_node_raw_ids: ThreadSafeSet[str] = Field(\n        default_factory=ThreadSafeSet\n    )\n\n    # Hierarchy node raw IDs where we have successfully walked up to a terminal\n    # node (a drive root with no parent). This is separate from seen_hierarchy_node_raw_ids\n    # because a node might be yielded before we've walked its full ancestry chain.\n    # We only skip walking from a node if it's in this set, ensuring that if one user\n    # fails to walk to the root, another user with better access can still complete the walk.\n    # Thread-safe because multiple impersonation threads access this concurrently.\n    # Uses default_factory to ensure each checkpoint instance gets a fresh set.\n    fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str] = Field(\n        default_factory=ThreadSafeSet\n    )\n\n    @field_serializer(\"completion_map\")\n    def serialize_completion_map(\n        self, completion_map: ThreadSafeDict[str, StageCompletion], _info: Any\n    ) -> dict[str, StageCompletion]:\n        return completion_map._dict\n\n    @field_serializer(\"seen_hierarchy_node_raw_ids\")\n    def serialize_seen_hierarchy(\n        self, seen_hierarchy_node_raw_ids: ThreadSafeSet[str], _info: Any\n    ) -> set[str]:\n        return seen_hierarchy_node_raw_ids.copy()\n\n    @field_serializer(\"fully_walked_hierarchy_node_raw_ids\")\n    def serialize_fully_walked_hierarchy(\n        self, fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str], _info: Any\n    ) -> set[str]:\n        return fully_walked_hierarchy_node_raw_ids.copy()\n\n    @field_validator(\"completion_map\", mode=\"before\")\n    def validate_completion_map(cls, v: Any) -> ThreadSafeDict[str, StageCompletion]:\n        assert isinstance(v, dict) or isinstance(v, ThreadSafeDict)\n        return ThreadSafeDict(\n            {k: StageCompletion.model_validate(val) for k, val in v.items()}\n        )\n\n    @field_validator(\"seen_hierarchy_node_raw_ids\", mode=\"before\")\n    def validate_seen_hierarchy(cls, v: Any) -> ThreadSafeSet[str]:\n        if isinstance(v, ThreadSafeSet):\n            return v\n        if isinstance(v, set):\n            return ThreadSafeSet(v)\n        if isinstance(v, list):\n            return ThreadSafeSet(set(v))\n        return ThreadSafeSet()\n\n    @field_validator(\"fully_walked_hierarchy_node_raw_ids\", mode=\"before\")\n    def validate_fully_walked_hierarchy(cls, v: Any) -> ThreadSafeSet[str]:\n        if isinstance(v, ThreadSafeSet):\n            return v\n        if isinstance(v, set):\n            return ThreadSafeSet(v)\n        if isinstance(v, list):\n            return ThreadSafeSet(set(v))\n        return ThreadSafeSet()\n"
  },
  {
    "path": "backend/onyx/connectors/google_drive/section_extraction.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\n\nfrom onyx.connectors.google_utils.resources import GoogleDocsService\nfrom onyx.connectors.models import TextSection\n\nHEADING_DELIMITER = \"\\n\"\n\n\nclass CurrentHeading(BaseModel):\n    id: str | None\n    text: str\n\n\ndef _build_gdoc_section_link(doc_id: str, tab_id: str, heading_id: str | None) -> str:\n    \"\"\"Builds a Google Doc link that jumps to a specific heading\"\"\"\n    # NOTE: doesn't support docs with multiple tabs atm, if we need that ask\n    # @Chris\n    heading_str = f\"#heading={heading_id}\" if heading_id else \"\"\n    return f\"https://docs.google.com/document/d/{doc_id}/edit?tab={tab_id}{heading_str}\"\n\n\ndef _extract_id_from_heading(paragraph: dict[str, Any]) -> str:\n    \"\"\"Extracts the id from a heading paragraph element\"\"\"\n    return paragraph[\"paragraphStyle\"][\"headingId\"]\n\n\ndef _extract_text_from_paragraph(paragraph: dict[str, Any]) -> str:\n    \"\"\"Extracts the text content from a paragraph element\"\"\"\n    text_elements = []\n    for element in paragraph.get(\"elements\", []):\n        if \"textRun\" in element:\n            text_elements.append(element[\"textRun\"].get(\"content\", \"\"))\n\n        # Handle links\n        if \"textStyle\" in element and \"link\" in element[\"textStyle\"]:\n            text_elements.append(f\"({element['textStyle']['link'].get('url', '')})\")\n\n        if \"person\" in element:\n            name = element[\"person\"].get(\"personProperties\", {}).get(\"name\", \"\")\n            email = element[\"person\"].get(\"personProperties\", {}).get(\"email\", \"\")\n            person_str = \"<Person|\"\n            if name:\n                person_str += f\"name: {name}, \"\n            if email:\n                person_str += f\"email: {email}\"\n            person_str += \">\"\n            text_elements.append(person_str)\n\n        if \"richLink\" in element:\n            props = element[\"richLink\"].get(\"richLinkProperties\", {})\n            title = props.get(\"title\", \"\")\n            uri = props.get(\"uri\", \"\")\n            link_str = f\"[{title}]({uri})\"\n            text_elements.append(link_str)\n\n    return \"\".join(text_elements)\n\n\ndef _extract_text_from_table(table: dict[str, Any]) -> str:\n    \"\"\"\n    Extracts the text content from a table element.\n    \"\"\"\n    row_strs = []\n\n    for row in table.get(\"tableRows\", []):\n        cells = row.get(\"tableCells\", [])\n        cell_strs = []\n        for cell in cells:\n            child_elements = cell.get(\"content\", {})\n            cell_str = []\n            for child_elem in child_elements:\n                if \"paragraph\" not in child_elem:\n                    continue\n                cell_str.append(_extract_text_from_paragraph(child_elem[\"paragraph\"]))\n            cell_strs.append(\"\".join(cell_str))\n        row_strs.append(\", \".join(cell_strs))\n    return \"\\n\".join(row_strs)\n\n\ndef get_document_sections(\n    docs_service: GoogleDocsService,\n    doc_id: str,\n) -> list[TextSection]:\n    \"\"\"Extracts sections from a Google Doc, including their headings and content\"\"\"\n    # Fetch the document structure\n    http_request = docs_service.documents().get(documentId=doc_id)\n\n    # Google has poor support for tabs in the docs api, see\n    # https://cloud.google.com/python/docs/reference/cloudtasks/\n    # latest/google.cloud.tasks_v2.types.HttpRequest\n    # https://developers.google.com/workspace/docs/api/how-tos/tabs\n    # https://developers.google.com/workspace/docs/api/reference/rest/v1/documents/get\n    # this is a hack to use the param mentioned in the rest api docs\n    # TODO: check if it can be specified i.e. in documents()\n    http_request.uri += \"&includeTabsContent=true\"\n    doc = http_request.execute()\n\n    # Get the content\n    tabs = doc.get(\"tabs\", {})\n    sections: list[TextSection] = []\n    for tab in tabs:\n        sections.extend(get_tab_sections(tab, doc_id))\n    return sections\n\n\ndef _is_heading(paragraph: dict[str, Any]) -> bool:\n    \"\"\"Checks if a paragraph (a block of text in a drive document) is a heading\"\"\"\n    if not (\n        \"paragraphStyle\" in paragraph\n        and \"namedStyleType\" in paragraph[\"paragraphStyle\"]\n    ):\n        return False\n\n    style = paragraph[\"paragraphStyle\"][\"namedStyleType\"]\n    is_heading = style.startswith(\"HEADING_\")\n    is_title = style.startswith(\"TITLE\")\n    return is_heading or is_title\n\n\ndef _add_finished_section(\n    sections: list[TextSection],\n    doc_id: str,\n    tab_id: str,\n    current_heading: CurrentHeading,\n    current_section: list[str],\n) -> None:\n    \"\"\"Adds a finished section to the list of sections if the section has content.\n    Returns the list of sections to use going forward, which may be the old list\n    if a new section was not added.\n    \"\"\"\n    if not (current_section or current_heading.text):\n        return\n    # If we were building a previous section, add it to sections list\n\n    # this is unlikely to ever matter, but helps if the doc contains weird headings\n    header_text = current_heading.text.replace(HEADING_DELIMITER, \"\")\n    section_text = f\"{header_text}{HEADING_DELIMITER}\" + \"\\n\".join(current_section)\n    sections.append(\n        TextSection(\n            text=section_text.strip(),\n            link=_build_gdoc_section_link(doc_id, tab_id, current_heading.id),\n        )\n    )\n\n\ndef get_tab_sections(tab: dict[str, Any], doc_id: str) -> list[TextSection]:\n    tab_id = tab[\"tabProperties\"][\"tabId\"]\n    content = tab.get(\"documentTab\", {}).get(\"body\", {}).get(\"content\", [])\n\n    sections: list[TextSection] = []\n    current_section: list[str] = []\n    current_heading = CurrentHeading(id=None, text=\"\")\n\n    for element in content:\n        if \"paragraph\" in element:\n            paragraph = element[\"paragraph\"]\n\n            # If this is not a heading, add content to current section\n            if not _is_heading(paragraph):\n                text = _extract_text_from_paragraph(paragraph)\n                if text.strip():\n                    current_section.append(text)\n                continue\n\n            _add_finished_section(\n                sections, doc_id, tab_id, current_heading, current_section\n            )\n\n            current_section = []\n\n            # Start new heading\n            heading_id = _extract_id_from_heading(paragraph)\n            heading_text = _extract_text_from_paragraph(paragraph)\n            current_heading = CurrentHeading(\n                id=heading_id,\n                text=heading_text,\n            )\n        elif \"table\" in element:\n            text = _extract_text_from_table(element[\"table\"])\n            if text.strip():\n                current_section.append(text)\n\n    # Don't forget to add the last section\n    _add_finished_section(sections, doc_id, tab_id, current_heading, current_section)\n\n    return sections\n"
  },
  {
    "path": "backend/onyx/connectors/google_site/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/google_site/connector.py",
    "content": "import os\nimport re\nfrom typing import Any\nfrom typing import cast\n\nfrom bs4 import BeautifulSoup\nfrom bs4 import Tag\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import load_files_from_zip\nfrom onyx.file_processing.extract_file_text import read_text_file\nfrom onyx.file_processing.html_utils import web_html_cleanup\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef a_tag_text_to_path(atag: Tag) -> str:\n    page_path = atag.text.strip().lower()\n    page_path = re.sub(r\"[^a-zA-Z0-9\\s]\", \"\", page_path)\n    page_path = \"-\".join(page_path.split())\n\n    return page_path\n\n\ndef find_google_sites_page_path_from_navbar(\n    element: BeautifulSoup | Tag, path: str, depth: int\n) -> str | None:\n    lis = cast(\n        list[Tag],\n        element.find_all(\"li\", attrs={\"data-nav-level\": f\"{depth}\"}),\n    )\n    for li in lis:\n        a = cast(Tag, li.find(\"a\"))\n        if a.get(\"aria-selected\") == \"true\":\n            return f\"{path}/{a_tag_text_to_path(a)}\"\n        elif a.get(\"aria-expanded\") == \"true\":\n            sub_path = find_google_sites_page_path_from_navbar(\n                element, f\"{path}/{a_tag_text_to_path(a)}\", depth + 1\n            )\n            if sub_path:\n                return sub_path\n\n    return None\n\n\nclass GoogleSitesConnector(LoadConnector):\n    def __init__(\n        self,\n        zip_path: str,\n        base_url: str,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ):\n        self.zip_path = zip_path\n        self.base_url = base_url\n        self.batch_size = batch_size\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        pass\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        documents: list[Document | HierarchyNode] = []\n\n        file_content_io = get_default_file_store().read_file(self.zip_path, mode=\"b\")\n\n        # load the HTML files\n        files = load_files_from_zip(file_content_io)\n        count = 0\n        for file_info, file_io in files:\n            # skip non-published files\n            if \"/PUBLISHED/\" not in file_info.filename:\n                continue\n\n            file_path, extension = os.path.splitext(file_info.filename)\n            if extension != \".html\":\n                continue\n\n            file_content, _ = read_text_file(file_io)\n            soup = BeautifulSoup(file_content, \"html.parser\")\n\n            # get the link out of the navbar\n            header = cast(Tag, soup.find(\"header\"))\n            nav = cast(Tag, header.find(\"nav\"))\n            path = find_google_sites_page_path_from_navbar(nav, \"\", 1)\n            if not path:\n                count += 1\n                logger.error(\n                    f\"Could not find path for '{file_info.filename}'. \"\n                    + \"This page will not have a working link.\\n\\n\"\n                    + f\"# of broken links so far - {count}\"\n                )\n            logger.info(f\"Path to page: {path}\")\n            # cleanup the hidden `Skip to main content` and `Skip to navigation` that\n            # appears at the top of every page\n            for div in soup.find_all(\"div\", attrs={\"data-is-touch-wrapper\": \"true\"}):\n                div.extract()\n\n            # get the body of the page\n            parsed_html = web_html_cleanup(\n                soup, additional_element_types_to_discard=[\"header\", \"nav\"]\n            )\n\n            title = parsed_html.title or file_path.split(\"/\")[-1]\n            documents.append(\n                Document(\n                    id=f\"{DocumentSource.GOOGLE_SITES.value}:{path}\",\n                    source=DocumentSource.GOOGLE_SITES,\n                    semantic_identifier=title,\n                    sections=[\n                        TextSection(\n                            link=(\n                                (self.base_url.rstrip(\"/\") + \"/\" + path.lstrip(\"/\"))\n                                if path\n                                else \"\"\n                            ),\n                            text=parsed_html.cleaned_text,\n                        )\n                    ],\n                    metadata={},\n                )\n            )\n\n            if len(documents) >= self.batch_size:\n                yield documents\n                documents = []\n\n        if documents:\n            yield documents\n\n\nif __name__ == \"__main__\":\n    connector = GoogleSitesConnector(\n        os.environ[\"GOOGLE_SITES_ZIP_PATH\"],\n        os.environ.get(\"GOOGLE_SITES_BASE_URL\", \"\"),\n    )\n    for doc_batch in connector.load_from_state():\n        for doc in doc_batch:\n            print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/google_utils/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/google_utils/google_auth.py",
    "content": "import json\nfrom typing import Any\n\nfrom google.auth.transport.requests import Request\nfrom google.oauth2.credentials import Credentials as OAuthCredentials\nfrom google.oauth2.service_account import Credentials as ServiceAccountCredentials\n\nfrom onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_TOKEN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GOOGLE_SCOPES,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef sanitize_oauth_credentials(oauth_creds: OAuthCredentials) -> str:\n    \"\"\"we really don't want to be persisting the client id and secret anywhere but the\n    environment.\n\n    Returns a string of serialized json.\n    \"\"\"\n\n    # strip the client id and secret\n    oauth_creds_json_str = oauth_creds.to_json()\n    oauth_creds_sanitized_json: dict[str, Any] = json.loads(oauth_creds_json_str)\n    oauth_creds_sanitized_json.pop(\"client_id\", None)\n    oauth_creds_sanitized_json.pop(\"client_secret\", None)\n    oauth_creds_sanitized_json_str = json.dumps(oauth_creds_sanitized_json)\n    return oauth_creds_sanitized_json_str\n\n\ndef get_google_oauth_creds(\n    token_json_str: str, source: DocumentSource\n) -> OAuthCredentials | None:\n    \"\"\"creds_json only needs to contain client_id, client_secret and refresh_token to\n    refresh the creds.\n\n    expiry and token are optional ... however, if passing in expiry, token\n    should also be passed in or else we may not return any creds.\n    (probably a sign we should refactor the function)\n    \"\"\"\n    creds_json = json.loads(token_json_str)\n    creds = OAuthCredentials.from_authorized_user_info(\n        info=creds_json,\n        scopes=GOOGLE_SCOPES[source],\n    )\n    if creds.valid:\n        return creds\n\n    if creds.expired and creds.refresh_token:\n        try:\n            creds.refresh(Request())\n            if creds.valid:\n                logger.notice(\"Refreshed Google Drive tokens.\")\n                return creds\n        except Exception:\n            logger.exception(\"Failed to refresh google drive access token\")\n            return None\n\n    return None\n\n\ndef get_google_creds(\n    credentials: dict[str, str],\n    source: DocumentSource,\n) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:\n    \"\"\"Checks for two different types of credentials.\n    (1) A credential which holds a token acquired via a user going through\n    the Google OAuth flow.\n    (2) A credential which holds a service account key JSON file, which\n    can then be used to impersonate any user in the workspace.\n\n    Return a tuple where:\n        The first element is the requested credentials\n        The second element is a new credentials dict that the caller should write back\n        to the db. This happens if token rotation occurs while loading credentials.\n    \"\"\"\n    oauth_creds = None\n    service_creds = None\n    new_creds_dict = None\n    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:\n        # OAUTH\n        authentication_method: str = credentials.get(\n            DB_CREDENTIALS_AUTHENTICATION_METHOD,\n            GoogleOAuthAuthenticationMethod.UPLOADED.value,\n        )\n\n        credentials_dict_str = credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]\n        credentials_dict = json.loads(credentials_dict_str)\n\n        # only send what get_google_oauth_creds needs\n        authorized_user_info = {}\n\n        # oauth_interactive is sanitized and needs credentials from the environment\n        if (\n            authentication_method\n            == GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value\n        ):\n            authorized_user_info[\"client_id\"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID\n            authorized_user_info[\"client_secret\"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET\n        else:\n            authorized_user_info[\"client_id\"] = credentials_dict[\"client_id\"]\n            authorized_user_info[\"client_secret\"] = credentials_dict[\"client_secret\"]\n\n        authorized_user_info[\"refresh_token\"] = credentials_dict[\"refresh_token\"]\n\n        authorized_user_info[\"token\"] = credentials_dict[\"token\"]\n        authorized_user_info[\"expiry\"] = credentials_dict[\"expiry\"]\n\n        token_json_str = json.dumps(authorized_user_info)\n        oauth_creds = get_google_oauth_creds(\n            token_json_str=token_json_str, source=source\n        )\n\n        # tell caller to update token stored in DB if the refresh token changed\n        if oauth_creds:\n            if oauth_creds.refresh_token != authorized_user_info[\"refresh_token\"]:\n                # if oauth_interactive, sanitize the credentials so they don't get stored in the db\n                if (\n                    authentication_method\n                    == GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value\n                ):\n                    oauth_creds_json_str = sanitize_oauth_credentials(oauth_creds)\n                else:\n                    oauth_creds_json_str = oauth_creds.to_json()\n\n                new_creds_dict = {\n                    DB_CREDENTIALS_DICT_TOKEN_KEY: oauth_creds_json_str,\n                    DB_CREDENTIALS_PRIMARY_ADMIN_KEY: credentials[\n                        DB_CREDENTIALS_PRIMARY_ADMIN_KEY\n                    ],\n                    DB_CREDENTIALS_AUTHENTICATION_METHOD: authentication_method,\n                }\n    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:\n        # SERVICE ACCOUNT\n        service_account_key_json_str = credentials[\n            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY\n        ]\n        service_account_key = json.loads(service_account_key_json_str)\n\n        service_creds = ServiceAccountCredentials.from_service_account_info(\n            service_account_key, scopes=GOOGLE_SCOPES[source]\n        )\n\n        if not service_creds.valid or not service_creds.expired:\n            service_creds.refresh(Request())\n\n        if not service_creds.valid:\n            raise PermissionError(\n                f\"Unable to access {source} - service account credentials are invalid.\"\n            )\n\n    creds: ServiceAccountCredentials | OAuthCredentials | None = (\n        oauth_creds or service_creds\n    )\n    if creds is None:\n        raise PermissionError(\n            f\"Unable to access {source} - unknown credential structure.\"\n        )\n\n    return creds, new_creds_dict\n"
  },
  {
    "path": "backend/onyx/connectors/google_utils/google_kv.py",
    "content": "import json\nfrom typing import cast\nfrom urllib.parse import parse_qs\nfrom urllib.parse import ParseResult\nfrom urllib.parse import urlparse\n\nfrom google.oauth2.credentials import Credentials as OAuthCredentials\nfrom google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import KV_CRED_KEY\nfrom onyx.configs.constants import KV_GMAIL_CRED_KEY\nfrom onyx.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY\nfrom onyx.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY\nfrom onyx.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY\nfrom onyx.connectors.google_utils.resources import get_drive_service\nfrom onyx.connectors.google_utils.resources import get_gmail_service\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_TOKEN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GOOGLE_SCOPES,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    MISSING_SCOPES_ERROR_STR,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    ONYX_SCOPE_INSTRUCTIONS,\n)\nfrom onyx.db.credentials import update_credential_json\nfrom onyx.db.models import User\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import unwrap_str\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.server.documents.models import GoogleAppCredentials\nfrom onyx.server.documents.models import GoogleServiceAccountKey\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _build_frontend_google_drive_redirect(source: DocumentSource) -> str:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        return f\"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback\"\n    elif source == DocumentSource.GMAIL:\n        return f\"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback\"\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n\n\ndef _get_current_oauth_user(creds: OAuthCredentials, source: DocumentSource) -> str:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        drive_service = get_drive_service(creds)\n        user_info = (\n            drive_service.about()\n            .get(\n                fields=\"user(emailAddress)\",\n            )\n            .execute()\n        )\n        email = user_info.get(\"user\", {}).get(\"emailAddress\")\n    elif source == DocumentSource.GMAIL:\n        gmail_service = get_gmail_service(creds)\n        user_info = (\n            gmail_service.users()\n            .getProfile(\n                userId=\"me\",\n                fields=\"emailAddress\",\n            )\n            .execute()\n        )\n        email = user_info.get(\"emailAddress\")\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n    return email\n\n\ndef verify_csrf(credential_id: int, state: str) -> None:\n    csrf = unwrap_str(get_kv_store().load(KV_CRED_KEY.format(str(credential_id))))\n    if csrf != state:\n        raise PermissionError(\n            \"State from Google Drive Connector callback does not match expected\"\n        )\n\n\ndef update_credential_access_tokens(\n    auth_code: str,\n    credential_id: int,\n    user: User,\n    db_session: Session,\n    source: DocumentSource,\n    auth_method: GoogleOAuthAuthenticationMethod,\n) -> OAuthCredentials | None:\n    app_credentials = get_google_app_cred(source)\n    flow = InstalledAppFlow.from_client_config(\n        app_credentials.model_dump(),\n        scopes=GOOGLE_SCOPES[source],\n        redirect_uri=_build_frontend_google_drive_redirect(source),\n    )\n    flow.fetch_token(code=auth_code)\n    creds = flow.credentials\n    token_json_str = creds.to_json()\n\n    # Get user email from Google API so we know who\n    # the primary admin is for this connector\n    try:\n        email = _get_current_oauth_user(creds, source)\n    except Exception as e:\n        if MISSING_SCOPES_ERROR_STR in str(e):\n            raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e\n        raise e\n\n    new_creds_dict = {\n        DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str,\n        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,\n        DB_CREDENTIALS_AUTHENTICATION_METHOD: auth_method.value,\n    }\n\n    if not update_credential_json(credential_id, new_creds_dict, user, db_session):\n        return None\n    return creds\n\n\ndef build_service_account_creds(\n    source: DocumentSource,\n    primary_admin_email: str | None = None,\n    name: str | None = None,\n) -> CredentialBase:\n    service_account_key = get_service_account_key(source=source)\n\n    credential_dict = {\n        DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(),\n    }\n    if primary_admin_email:\n        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = primary_admin_email\n\n    credential_dict[DB_CREDENTIALS_AUTHENTICATION_METHOD] = (\n        GoogleOAuthAuthenticationMethod.UPLOADED.value\n    )\n\n    return CredentialBase(\n        credential_json=credential_dict,\n        admin_public=True,\n        source=source,\n        name=name,\n    )\n\n\ndef get_auth_url(credential_id: int, source: DocumentSource) -> str:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))\n    elif source == DocumentSource.GMAIL:\n        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n    credential_json = json.loads(creds_str)\n    flow = InstalledAppFlow.from_client_config(\n        credential_json,\n        scopes=GOOGLE_SCOPES[source],\n        redirect_uri=_build_frontend_google_drive_redirect(source),\n    )\n    auth_url, _ = flow.authorization_url(prompt=\"consent\")\n\n    parsed_url = cast(ParseResult, urlparse(auth_url))\n    params = parse_qs(parsed_url.query)\n\n    get_kv_store().store(\n        KV_CRED_KEY.format(credential_id),\n        {\"value\": params.get(\"state\", [None])[0]},\n        encrypt=True,\n    )\n    return str(auth_url)\n\n\ndef get_google_app_cred(source: DocumentSource) -> GoogleAppCredentials:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))\n    elif source == DocumentSource.GMAIL:\n        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n    return GoogleAppCredentials(**json.loads(creds_str))\n\n\ndef upsert_google_app_cred(\n    app_credentials: GoogleAppCredentials, source: DocumentSource\n) -> None:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        get_kv_store().store(\n            KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True\n        )\n    elif source == DocumentSource.GMAIL:\n        get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n\n\ndef delete_google_app_cred(source: DocumentSource) -> None:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)\n    elif source == DocumentSource.GMAIL:\n        get_kv_store().delete(KV_GMAIL_CRED_KEY)\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n\n\ndef get_service_account_key(source: DocumentSource) -> GoogleServiceAccountKey:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))\n    elif source == DocumentSource.GMAIL:\n        creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n    return GoogleServiceAccountKey(**json.loads(creds_str))\n\n\ndef upsert_service_account_key(\n    service_account_key: GoogleServiceAccountKey, source: DocumentSource\n) -> None:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        get_kv_store().store(\n            KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY,\n            service_account_key.json(),\n            encrypt=True,\n        )\n    elif source == DocumentSource.GMAIL:\n        get_kv_store().store(\n            KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True\n        )\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n\n\ndef delete_service_account_key(source: DocumentSource) -> None:\n    if source == DocumentSource.GOOGLE_DRIVE:\n        get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)\n    elif source == DocumentSource.GMAIL:\n        get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)\n    else:\n        raise ValueError(f\"Unsupported source: {source}\")\n"
  },
  {
    "path": "backend/onyx/connectors/google_utils/google_utils.py",
    "content": "import re\nimport socket\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\nfrom typing import Any\n\nfrom googleapiclient.errors import HttpError  # type: ignore\n\nfrom onyx.connectors.google_drive.models import GoogleDriveFileType\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n_RATE_LIMIT_REASONS = {\"userRateLimitExceeded\", \"rateLimitExceeded\"}\n\n\ndef _is_rate_limit_error(error: HttpError) -> bool:\n    \"\"\"Google sometimes returns rate-limit errors as 403 with reason\n    'userRateLimitExceeded' instead of 429. This helper detects both.\"\"\"\n    if error.resp.status == 429:\n        return True\n    if error.resp.status != 403:\n        return False\n    error_details = getattr(error, \"error_details\", None) or []\n    for detail in error_details:\n        if isinstance(detail, dict) and detail.get(\"reason\") in _RATE_LIMIT_REASONS:\n            return True\n    return \"userRateLimitExceeded\" in str(error) or \"rateLimitExceeded\" in str(error)\n\n\n# Google Drive APIs are quite flakey and may 500 for an\n# extended period of time. This is now addressed by checkpointing.\n#\n# NOTE: We previously tried to combat this here by adding a very\n# long retry period (~20 minutes of trying, one request a minute.)\n# This is no longer necessary due to checkpointing.\nadd_retries = retry_builder(tries=5, max_delay=10)\n\nNEXT_PAGE_TOKEN_KEY = \"nextPageToken\"\nPAGE_TOKEN_KEY = \"pageToken\"\nORDER_BY_KEY = \"orderBy\"\n\n\n# See https://developers.google.com/drive/api/reference/rest/v3/files/list for more\nclass GoogleFields(str, Enum):\n    ID = \"id\"\n    CREATED_TIME = \"createdTime\"\n    MODIFIED_TIME = \"modifiedTime\"\n    NAME = \"name\"\n    SIZE = \"size\"\n    PARENTS = \"parents\"\n\n\ndef _execute_with_retry(request: Any) -> Any:\n    max_attempts = 6\n    attempt = 1\n\n    while attempt < max_attempts:\n        # Note for reasons unknown, the Google API will sometimes return a 429\n        # and even after waiting the retry period, it will return another 429.\n        # It could be due to a few possibilities:\n        # 1. Other things are also requesting from the Drive/Gmail API with the same key\n        # 2. It's a rolling rate limit so the moment we get some amount of requests cleared, we hit it again very quickly\n        # 3. The retry-after has a maximum and we've already hit the limit for the day\n        # or it's something else...\n        try:\n            return request.execute()\n        except HttpError as error:\n            attempt += 1\n\n            if _is_rate_limit_error(error):\n                # Attempt to get 'Retry-After' from headers\n                retry_after = error.resp.get(\"Retry-After\")\n                if retry_after:\n                    sleep_time = int(retry_after)\n                else:\n                    # Extract 'Retry after' timestamp from error message\n                    match = re.search(\n                        r\"Retry after (\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z)\",\n                        str(error),\n                    )\n                    if match:\n                        retry_after_timestamp = match.group(1)\n                        retry_after_dt = datetime.strptime(\n                            retry_after_timestamp, \"%Y-%m-%dT%H:%M:%S.%fZ\"\n                        ).replace(tzinfo=timezone.utc)\n                        current_time = datetime.now(timezone.utc)\n                        sleep_time = max(\n                            int((retry_after_dt - current_time).total_seconds()),\n                            0,\n                        )\n                    else:\n                        logger.error(\n                            f\"No Retry-After header or timestamp found in error message: {error}\"\n                        )\n                        sleep_time = 60\n\n                sleep_time += 3  # Add a buffer to be safe\n\n                logger.info(\n                    f\"Rate limit exceeded. Attempt {attempt}/{max_attempts}. Sleeping for {sleep_time} seconds.\"\n                )\n                time.sleep(sleep_time)\n\n            else:\n                raise\n\n    # If we've exhausted all attempts\n    raise Exception(f\"Failed to execute request after {max_attempts} attempts\")\n\n\ndef get_file_owners(file: GoogleDriveFileType, primary_admin_email: str) -> list[str]:\n    \"\"\"\n    Get the owners of a file if the attribute is present.\n    \"\"\"\n    return [\n        email\n        for owner in file.get(\"owners\", [])\n        if (email := owner.get(\"emailAddress\"))\n        and email.split(\"@\")[-1] == primary_admin_email.split(\"@\")[-1]\n    ]\n\n\ndef _execute_single_retrieval(\n    retrieval_function: Callable,\n    continue_on_404_or_403: bool = False,\n    **request_kwargs: Any,\n) -> GoogleDriveFileType:\n    \"\"\"Execute a single retrieval from Google Drive API\"\"\"\n    try:\n        results = retrieval_function(**request_kwargs).execute()\n    except HttpError as e:\n        if e.resp.status >= 500:\n            results = add_retries(\n                lambda: retrieval_function(**request_kwargs).execute()\n            )()\n        elif e.resp.status == 400:\n            if (\n                \"pageToken\" in request_kwargs\n                and \"Invalid Value\" in str(e)\n                and \"pageToken\" in str(e)\n            ):\n                logger.warning(\n                    f\"Invalid page token: {request_kwargs['pageToken']}, retrying from start of request\"\n                )\n                request_kwargs.pop(\"pageToken\")\n                return _execute_single_retrieval(\n                    retrieval_function,\n                    continue_on_404_or_403,\n                    **request_kwargs,\n                )\n            logger.error(f\"Error executing request: {e}\")\n            raise e\n        elif _is_rate_limit_error(e):\n            results = _execute_with_retry(retrieval_function(**request_kwargs))\n        elif e.resp.status == 404 or e.resp.status == 403:\n            if continue_on_404_or_403:\n                logger.debug(f\"Error executing request: {e}\")\n                results = {}\n            else:\n                raise e\n        else:\n            logger.exception(\"Error executing request:\")\n            raise e\n    except (TimeoutError, socket.timeout) as error:\n        logger.warning(\n            \"Timed out executing Google API request; retrying with backoff. Details: %s\",\n            error,\n        )\n        results = add_retries(lambda: retrieval_function(**request_kwargs).execute())()\n\n    return results\n\n\ndef execute_single_retrieval(\n    retrieval_function: Callable,\n    list_key: str | None = None,\n    continue_on_404_or_403: bool = False,\n    **request_kwargs: Any,\n) -> Iterator[GoogleDriveFileType]:\n    results = _execute_single_retrieval(\n        retrieval_function,\n        continue_on_404_or_403,\n        **request_kwargs,\n    )\n    if list_key:\n        for item in results.get(list_key, []):\n            yield item\n    else:\n        yield results\n\n\n# included for type purposes; caller should not need to address\n# Nones unless max_num_pages is specified. Use\n# execute_paginated_retrieval_with_max_pages instead if you want\n# the early stop + yield None after max_num_pages behavior.\ndef execute_paginated_retrieval(\n    retrieval_function: Callable,\n    list_key: str | None = None,\n    continue_on_404_or_403: bool = False,\n    **kwargs: Any,\n) -> Iterator[GoogleDriveFileType]:\n    for item in _execute_paginated_retrieval(\n        retrieval_function,\n        list_key,\n        continue_on_404_or_403,\n        **kwargs,\n    ):\n        if not isinstance(item, str):\n            yield item\n\n\ndef execute_paginated_retrieval_with_max_pages(\n    retrieval_function: Callable,\n    max_num_pages: int,\n    list_key: str | None = None,\n    continue_on_404_or_403: bool = False,\n    **kwargs: Any,\n) -> Iterator[GoogleDriveFileType | str]:\n    yield from _execute_paginated_retrieval(\n        retrieval_function,\n        list_key,\n        continue_on_404_or_403,\n        max_num_pages=max_num_pages,\n        **kwargs,\n    )\n\n\ndef _execute_paginated_retrieval(\n    retrieval_function: Callable,\n    list_key: str | None = None,\n    continue_on_404_or_403: bool = False,\n    max_num_pages: int | None = None,\n    **kwargs: Any,\n) -> Iterator[GoogleDriveFileType | str]:\n    \"\"\"Execute a paginated retrieval from Google Drive API\n    Args:\n        retrieval_function: The specific list function to call (e.g., service.files().list)\n        list_key: If specified, each object returned by the retrieval function\n                  will be accessed at the specified key and yielded from.\n        continue_on_404_or_403: If True, the retrieval will continue even if the request returns a 404 or 403 error.\n        max_num_pages: If specified, the retrieval will stop after the specified number of pages and yield None.\n        **kwargs: Arguments to pass to the list function\n    \"\"\"\n    if \"fields\" not in kwargs or \"nextPageToken\" not in kwargs[\"fields\"]:\n        raise ValueError(\n            \"fields must contain nextPageToken for execute_paginated_retrieval\"\n        )\n    next_page_token = kwargs.get(PAGE_TOKEN_KEY, \"\")\n    num_pages = 0\n    while next_page_token is not None:\n        if max_num_pages is not None and num_pages >= max_num_pages:\n            yield next_page_token\n            return\n        num_pages += 1\n        request_kwargs = kwargs.copy()\n        if next_page_token:\n            request_kwargs[PAGE_TOKEN_KEY] = next_page_token\n        results = _execute_single_retrieval(\n            retrieval_function,\n            continue_on_404_or_403,\n            **request_kwargs,\n        )\n\n        next_page_token = results.get(NEXT_PAGE_TOKEN_KEY)\n        if list_key:\n            for item in results.get(list_key, []):\n                yield item\n        else:\n            yield results\n"
  },
  {
    "path": "backend/onyx/connectors/google_utils/resources.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\n\nfrom google.auth.exceptions import RefreshError\nfrom google.oauth2.credentials import Credentials as OAuthCredentials\nfrom google.oauth2.service_account import Credentials as ServiceAccountCredentials\nfrom googleapiclient.discovery import build  # type: ignore[import-untyped]\nfrom googleapiclient.discovery import Resource\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass GoogleDriveService(Resource):\n    pass\n\n\nclass GoogleDocsService(Resource):\n    pass\n\n\nclass AdminService(Resource):\n    pass\n\n\nclass GmailService(Resource):\n    pass\n\n\nclass RefreshableDriveObject:\n    \"\"\"\n    Running Google drive service retrieval functions\n    involves accessing methods of the service object (ie. files().list())\n    which can raise a RefreshError if the access token is expired.\n    This class is a wrapper that propagates the ability to refresh the access token\n    and retry the final retrieval function until execute() is called.\n    \"\"\"\n\n    def __init__(\n        self,\n        call_stack: Callable[[ServiceAccountCredentials | OAuthCredentials], Any],\n        creds: ServiceAccountCredentials | OAuthCredentials,\n        creds_getter: Callable[..., ServiceAccountCredentials | OAuthCredentials],\n    ):\n        self.call_stack = call_stack\n        self.creds = creds\n        self.creds_getter = creds_getter\n\n    def __getattr__(self, name: str) -> Any:\n        if name == \"execute\":\n            return self.make_refreshable_execute()\n        return RefreshableDriveObject(\n            lambda creds: getattr(self.call_stack(creds), name),\n            self.creds,\n            self.creds_getter,\n        )\n\n    def __call__(self, *args: Any, **kwargs: Any) -> Any:\n        return RefreshableDriveObject(\n            lambda creds: self.call_stack(creds)(*args, **kwargs),\n            self.creds,\n            self.creds_getter,\n        )\n\n    def make_refreshable_execute(self) -> Callable:\n        def execute(*args: Any, **kwargs: Any) -> Any:\n            try:\n                return self.call_stack(self.creds).execute(*args, **kwargs)\n            except RefreshError as e:\n                logger.warning(\n                    f\"RefreshError, going to attempt a creds refresh and retry: {e}\"\n                )\n                # Refresh the access token\n                self.creds = self.creds_getter()\n                return self.call_stack(self.creds).execute(*args, **kwargs)\n\n        return execute\n\n\ndef _get_google_service(\n    service_name: str,\n    service_version: str,\n    creds: ServiceAccountCredentials | OAuthCredentials,\n    user_email: str | None = None,\n) -> GoogleDriveService | GoogleDocsService | AdminService | GmailService:\n    service: Resource\n    if isinstance(creds, ServiceAccountCredentials):\n        # NOTE: https://developers.google.com/identity/protocols/oauth2/service-account#error-codes\n        creds = creds.with_subject(user_email)\n        service = build(service_name, service_version, credentials=creds)\n    elif isinstance(creds, OAuthCredentials):\n        service = build(service_name, service_version, credentials=creds)\n\n    return service\n\n\ndef get_google_docs_service(\n    creds: ServiceAccountCredentials | OAuthCredentials,\n    user_email: str | None = None,\n) -> GoogleDocsService:\n    return _get_google_service(\"docs\", \"v1\", creds, user_email)\n\n\ndef get_drive_service(\n    creds: ServiceAccountCredentials | OAuthCredentials,\n    user_email: str | None = None,\n) -> GoogleDriveService:\n    return _get_google_service(\"drive\", \"v3\", creds, user_email)\n\n\ndef get_admin_service(\n    creds: ServiceAccountCredentials | OAuthCredentials,\n    user_email: str | None = None,\n) -> AdminService:\n    return _get_google_service(\"admin\", \"directory_v1\", creds, user_email)\n\n\ndef get_gmail_service(\n    creds: ServiceAccountCredentials | OAuthCredentials,\n    user_email: str | None = None,\n) -> GmailService:\n    return _get_google_service(\"gmail\", \"v1\", creds, user_email)\n"
  },
  {
    "path": "backend/onyx/connectors/google_utils/shared_constants.py",
    "content": "from enum import Enum as PyEnum\n\nfrom onyx.configs.constants import DocumentSource\n\n# NOTE: do not need https://www.googleapis.com/auth/documents.readonly\n# this is counted under `/auth/drive.readonly`\nGOOGLE_SCOPES = {\n    DocumentSource.GOOGLE_DRIVE: [\n        \"https://www.googleapis.com/auth/drive.readonly\",\n        \"https://www.googleapis.com/auth/drive.metadata.readonly\",\n        \"https://www.googleapis.com/auth/admin.directory.group.readonly\",\n        \"https://www.googleapis.com/auth/admin.directory.user.readonly\",\n    ],\n    DocumentSource.GMAIL: [\n        \"https://www.googleapis.com/auth/gmail.readonly\",\n        \"https://www.googleapis.com/auth/admin.directory.user.readonly\",\n        \"https://www.googleapis.com/auth/admin.directory.group.readonly\",\n    ],\n}\n\n# This is the Oauth token\nDB_CREDENTIALS_DICT_TOKEN_KEY = \"google_tokens\"\n# This is the service account key\nDB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = \"google_service_account_key\"\n# The email saved for both auth types\nDB_CREDENTIALS_PRIMARY_ADMIN_KEY = \"google_primary_admin\"\n\n# https://developers.google.com/workspace/guides/create-credentials\n# Internally defined authentication method type.\n# The value must be one of \"oauth_interactive\" or \"uploaded\"\n# Used to disambiguate whether credentials have already been created via\n# certain methods and what actions we allow users to take\nDB_CREDENTIALS_AUTHENTICATION_METHOD = \"authentication_method\"\n\n\nclass GoogleOAuthAuthenticationMethod(str, PyEnum):\n    OAUTH_INTERACTIVE = \"oauth_interactive\"\n    UPLOADED = \"uploaded\"\n\n\nUSER_FIELDS = \"nextPageToken, users(primaryEmail)\"\n\n# Error message substrings\nMISSING_SCOPES_ERROR_STR = \"client not authorized for any of the scopes requested\"\n\n# Documentation and error messages\nSCOPE_DOC_URL = \"https://docs.onyx.app/admins/connectors/official/google_drive/overview\"\nONYX_SCOPE_INSTRUCTIONS = (\n    \"You have upgraded Onyx without updating the Google Auth scopes. \"\n    f\"Please refer to the documentation to learn how to update the scopes: {SCOPE_DOC_URL}\"\n)\n\n\n# This is the maximum number of threads that can be retrieved at once\nSLIM_BATCH_SIZE = 500\n"
  },
  {
    "path": "backend/onyx/connectors/guru/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/guru/connector.py",
    "content": "import json\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport requests\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n# Potential Improvements\n# 1. Support fetching per collection via collection token (configured at connector creation)\nGURU_API_BASE = \"https://api.getguru.com/api/v1/\"\nGURU_QUERY_ENDPOINT = GURU_API_BASE + \"search/query\"\nGURU_CARDS_URL = \"https://app.getguru.com/card/\"\n\n\ndef unixtime_to_guru_time_str(unix_time: SecondsSinceUnixEpoch) -> str:\n    date_obj = datetime.fromtimestamp(unix_time, tz=timezone.utc)\n    date_str = date_obj.strftime(\"%Y-%m-%dT%H:%M:%S.%f\")[:-3]\n    tz_str = date_obj.strftime(\"%z\")\n    return date_str + tz_str\n\n\nclass GuruConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        guru_user: str | None = None,\n        guru_user_token: str | None = None,\n    ) -> None:\n        self.batch_size = batch_size\n        self.guru_user = guru_user\n        self.guru_user_token = guru_user_token\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.guru_user = credentials[\"guru_user\"]\n        self.guru_user_token = credentials[\"guru_user_token\"]\n        return None\n\n    def _process_cards(\n        self, start_str: str | None = None, end_str: str | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.guru_user is None or self.guru_user_token is None:\n            raise ConnectorMissingCredentialError(\"Guru\")\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        session = requests.Session()\n        session.auth = (self.guru_user, self.guru_user_token)\n\n        params: dict[str, str | int] = {\"maxResults\": self.batch_size}\n\n        if start_str is not None and end_str is not None:\n            params[\"q\"] = f\"lastModified >= {start_str} AND lastModified < {end_str}\"\n\n        current_url = GURU_QUERY_ENDPOINT  # This is how they handle pagination, a different url will be provided\n        while True:\n            response = session.get(current_url, params=params)\n            response.raise_for_status()\n\n            if response.status_code == 204:\n                break\n\n            cards = json.loads(response.text)\n            for card in cards:\n                title = card[\"preferredPhrase\"]\n                link = GURU_CARDS_URL + card[\"slug\"]\n                content_text = parse_html_page_basic(card[\"content\"])\n                last_updated = time_str_to_utc(card[\"lastModified\"])\n                last_verified = (\n                    time_str_to_utc(card.get(\"lastVerified\"))\n                    if card.get(\"lastVerified\")\n                    else None\n                )\n\n                # For Onyx, we decay document score overtime, either last_updated or\n                # last_verified is a good enough signal for the document's recency\n                latest_time = (\n                    max(last_verified, last_updated) if last_verified else last_updated\n                )\n\n                metadata_dict: dict[str, str | list[str]] = {}\n                tags = [tag.get(\"value\") for tag in card.get(\"tags\", [])]\n                if tags:\n                    metadata_dict[\"tags\"] = tags\n\n                boards = [board.get(\"title\") for board in card.get(\"boards\", [])]\n                if boards:\n                    # In UI it's called Folders\n                    metadata_dict[\"folders\"] = boards\n\n                collection = card.get(\"collection\", {})\n                if collection:\n                    metadata_dict[\"collection_name\"] = collection.get(\"name\", \"\")\n\n                owner = card.get(\"owner\", {})\n                author = None\n                if owner:\n                    author = BasicExpertInfo(\n                        email=owner.get(\"email\"),\n                        first_name=owner.get(\"firstName\"),\n                        last_name=owner.get(\"lastName\"),\n                    )\n\n                doc_batch.append(\n                    Document(\n                        id=card[\"id\"],\n                        sections=[TextSection(link=link, text=content_text)],\n                        source=DocumentSource.GURU,\n                        semantic_identifier=title,\n                        doc_updated_at=latest_time,\n                        primary_owners=[author] if author is not None else None,\n                        # Can add verifies and commenters later\n                        metadata=metadata_dict,\n                    )\n                )\n\n                if len(doc_batch) >= self.batch_size:\n                    yield doc_batch\n                    doc_batch = []\n\n            if not hasattr(response, \"links\") or not response.links:\n                break\n            current_url = response.links[\"next-page\"][\"url\"]\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._process_cards()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_time = unixtime_to_guru_time_str(start)\n        end_time = unixtime_to_guru_time_str(end)\n\n        return self._process_cards(start_time, end_time)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = GuruConnector()\n    connector.load_credentials(\n        {\n            \"guru_user\": os.environ[\"GURU_USER\"],\n            \"guru_user_token\": os.environ[\"GURU_USER_TOKEN\"],\n        }\n    )\n\n    latest_docs = connector.load_from_state()\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/highspot/__init__.py",
    "content": "\"\"\"\nHighspot connector package for Onyx.\nEnables integration with Highspot's knowledge base.\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/connectors/highspot/client.py",
    "content": "import base64\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\nfrom typing import Optional\nfrom urllib.parse import urljoin\n\nimport requests\nfrom requests.adapters import HTTPAdapter\nfrom requests.exceptions import HTTPError\nfrom requests.exceptions import RequestException\nfrom requests.exceptions import Timeout\nfrom urllib3.util.retry import Retry\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\nPAGE_SIZE = 100\n\n\nclass HighspotClientError(Exception):\n    \"\"\"Base exception for Highspot API client errors.\"\"\"\n\n    def __init__(self, message: str, status_code: Optional[int] = None):\n        self.message = message\n        self.status_code = status_code\n        super().__init__(self.message)\n\n\nclass HighspotAuthenticationError(HighspotClientError):\n    \"\"\"Exception raised for authentication errors.\"\"\"\n\n\nclass HighspotRateLimitError(HighspotClientError):\n    \"\"\"Exception raised when rate limit is exceeded.\"\"\"\n\n    def __init__(self, message: str, retry_after: Optional[str] = None):\n        self.retry_after = retry_after\n        super().__init__(message)\n\n\nclass HighspotClient:\n    \"\"\"\n    Client for interacting with the Highspot API.\n\n    Uses basic authentication with provided key (username) and secret (password).\n    Implements retry logic, error handling, and connection pooling.\n    \"\"\"\n\n    BASE_URL = \"https://api-su2.highspot.com/v1.0/\"\n\n    def __init__(\n        self,\n        key: str,\n        secret: str,\n        base_url: str = BASE_URL,\n        timeout: int = 30,\n        max_retries: int = 3,\n        backoff_factor: float = 0.5,\n        status_forcelist: Optional[List[int]] = None,\n    ):\n        \"\"\"\n        Initialize the Highspot API client.\n\n        Args:\n            key: API key (used as username)\n            secret: API secret (used as password)\n            base_url: Base URL for the Highspot API\n            timeout: Request timeout in seconds\n            max_retries: Maximum number of retries for failed requests\n            backoff_factor: Backoff factor for retries\n            status_forcelist: HTTP status codes to retry on\n        \"\"\"\n        if not key or not secret:\n            raise ValueError(\"API key and secret are required\")\n\n        self.key = key\n        self.secret = secret\n        self.base_url = base_url.rstrip(\"/\") + \"/\"\n        self.timeout = timeout\n\n        # Set up session with retry logic\n        self.session = requests.Session()\n        retry_strategy = Retry(\n            total=max_retries,\n            backoff_factor=backoff_factor,\n            status_forcelist=status_forcelist or [429, 500, 502, 503, 504],\n            allowed_methods=[\"GET\", \"POST\", \"PUT\", \"DELETE\"],\n        )\n        adapter = HTTPAdapter(max_retries=retry_strategy)\n        self.session.mount(\"http://\", adapter)\n        self.session.mount(\"https://\", adapter)\n\n        # Set up authentication\n        self._setup_auth()\n\n    def _setup_auth(self) -> None:\n        \"\"\"Set up basic authentication for the session.\"\"\"\n        auth = f\"{self.key}:{self.secret}\"\n        encoded_auth = base64.b64encode(auth.encode()).decode()\n        self.session.headers.update(\n            {\n                \"Authorization\": f\"Basic {encoded_auth}\",\n                \"Content-Type\": \"application/json\",\n                \"Accept\": \"application/json\",\n            }\n        )\n\n    def _make_request(\n        self,\n        method: str,\n        endpoint: str,\n        params: Optional[Dict[str, Any]] = None,\n        data: Optional[Dict[str, Any]] = None,\n        json_data: Optional[Dict[str, Any]] = None,\n        headers: Optional[Dict[str, str]] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Make a request to the Highspot API.\n\n        Args:\n            method: HTTP method (GET, POST, etc.)\n            endpoint: API endpoint\n            params: URL parameters\n            data: Form data\n            json_data: JSON data\n            headers: Additional headers\n\n        Returns:\n            API response as a dictionary\n\n        Raises:\n            HighspotClientError: On API errors\n            HighspotAuthenticationError: On authentication errors\n            HighspotRateLimitError: On rate limiting\n            requests.exceptions.RequestException: On request failures\n        \"\"\"\n        url = urljoin(self.base_url, endpoint)\n        request_headers = {}\n        if headers:\n            request_headers.update(headers)\n\n        try:\n            logger.debug(f\"Making {method} request to {url}\")\n            response = self.session.request(\n                method=method,\n                url=url,\n                params=params,\n                data=data,\n                json=json_data,\n                headers=request_headers,\n                timeout=self.timeout,\n            )\n            response.raise_for_status()\n\n            if response.content and response.content.strip():\n                return response.json()\n            return {}\n\n        except HTTPError as e:\n            status_code = e.response.status_code\n            error_msg = str(e)\n\n            try:\n                error_data = e.response.json()\n                if isinstance(error_data, dict):\n                    error_msg = error_data.get(\"message\", str(e))\n            except (ValueError, KeyError):\n                pass\n\n            if status_code == 401:\n                raise HighspotAuthenticationError(f\"Authentication failed: {error_msg}\")\n            elif status_code == 429:\n                retry_after = e.response.headers.get(\"Retry-After\")\n                raise HighspotRateLimitError(\n                    f\"Rate limit exceeded: {error_msg}\", retry_after=retry_after\n                )\n            else:\n                raise HighspotClientError(\n                    f\"API error {status_code}: {error_msg}\", status_code=status_code\n                )\n\n        except Timeout:\n            raise HighspotClientError(\"Request timed out\")\n        except RequestException as e:\n            raise HighspotClientError(f\"Request failed: {str(e)}\")\n\n    def get_spots(self) -> List[Dict[str, Any]]:\n        \"\"\"\n        Get all available spots, paginated.\n\n        Returns:\n            List of spots with their names and IDs\n        \"\"\"\n        all_spots = []\n        has_more = True\n        current_offset = 0\n\n        while has_more:\n            params = {\"right\": \"view\", \"start\": current_offset, \"limit\": PAGE_SIZE}\n            response = self._make_request(\"GET\", \"spots\", params=params)\n            found_spots = response.get(\"collection\", [])\n            logger.info(f\"Received {len(found_spots)} spots at offset {current_offset}\")\n            all_spots.extend(found_spots)\n            if len(found_spots) < PAGE_SIZE:\n                has_more = False\n            else:\n                current_offset += PAGE_SIZE\n        logger.info(f\"Total spots retrieved: {len(all_spots)}\")\n        return all_spots\n\n    def get_spot(self, spot_id: str) -> Dict[str, Any]:\n        \"\"\"\n        Get details for a specific spot.\n\n        Args:\n            spot_id: ID of the spot\n\n        Returns:\n            Spot details\n        \"\"\"\n        if not spot_id:\n            raise ValueError(\"spot_id is required\")\n        return self._make_request(\"GET\", f\"spots/{spot_id}\")\n\n    def get_spot_items(\n        self, spot_id: str, offset: int = 0, page_size: int = PAGE_SIZE\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Get items in a specific spot.\n\n        Args:\n            spot_id: ID of the spot\n            offset: offset number\n            page_size: Number of items per page\n\n        Returns:\n            Items in the spot\n        \"\"\"\n        if not spot_id:\n            raise ValueError(\"spot_id is required\")\n\n        params = {\"spot\": spot_id, \"start\": offset, \"limit\": page_size}\n        return self._make_request(\"GET\", \"items\", params=params)\n\n    def get_item(self, item_id: str) -> Dict[str, Any]:\n        \"\"\"\n        Get details for a specific item.\n\n        Args:\n            item_id: ID of the item\n\n        Returns:\n            Item details\n        \"\"\"\n        if not item_id:\n            raise ValueError(\"item_id is required\")\n        return self._make_request(\"GET\", f\"items/{item_id}\")\n\n    def get_item_content(self, item_id: str) -> bytes:\n        \"\"\"\n        Get the raw content of an item.\n\n        Args:\n            item_id: ID of the item\n\n        Returns:\n            Raw content bytes\n        \"\"\"\n        if not item_id:\n            raise ValueError(\"item_id is required\")\n\n        url = urljoin(self.base_url, f\"items/{item_id}/content\")\n        response = self.session.get(url, timeout=self.timeout)\n        response.raise_for_status()\n        return response.content\n\n    def health_check(self) -> bool:\n        \"\"\"\n        Check if the API is accessible and credentials are valid.\n\n        Returns:\n            True if API is accessible, False otherwise\n        \"\"\"\n        try:\n            self._make_request(\"GET\", \"spots\", params={\"limit\": 1})\n            return True\n        except (HighspotClientError, HighspotAuthenticationError):\n            return False\n"
  },
  {
    "path": "backend/onyx/connectors/highspot/connector.py",
    "content": "import os\nfrom datetime import datetime\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\nfrom typing import Optional\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.highspot.client import HighspotClient\nfrom onyx.connectors.highspot.client import HighspotClientError\nfrom onyx.connectors.highspot.utils import scrape_url_content\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n_SLIM_BATCH_SIZE = 1000\n\n\nclass HighspotSpot(BaseModel):\n    id: str\n    name: str\n\n\nclass HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):\n    \"\"\"\n    Connector for loading data from Highspot.\n\n    Retrieves content from specified spots using the Highspot API.\n    If no spots are specified, retrieves content from all available spots.\n    \"\"\"\n\n    def __init__(\n        self,\n        spot_names: list[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ):\n        \"\"\"\n        Initialize the Highspot connector.\n\n        Args:\n            spot_names: List of spot names to retrieve content from (if empty, gets all spots)\n            batch_size: Number of items to retrieve in each batch\n        \"\"\"\n        self.spot_names = spot_names or []\n        self.batch_size = batch_size\n\n        self._client: Optional[HighspotClient] = None\n        self.highspot_url: Optional[str] = None\n        self.key: Optional[str] = None\n        self.secret: Optional[str] = None\n\n    @property\n    def client(self) -> HighspotClient:\n        if self._client is None:\n            if not self.key or not self.secret:\n                raise ConnectorMissingCredentialError(\"Highspot\")\n            # Ensure highspot_url is a string, use default if None\n            base_url = (\n                self.highspot_url\n                if self.highspot_url is not None\n                else HighspotClient.BASE_URL\n            )\n            self._client = HighspotClient(self.key, self.secret, base_url=base_url)\n        return self._client\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        logger.info(\"Loading Highspot credentials\")\n        self.highspot_url = credentials.get(\"highspot_url\")\n        self.key = credentials.get(\"highspot_key\")\n        self.secret = credentials.get(\"highspot_secret\")\n        return None\n\n    def _fetch_spots(self) -> list[HighspotSpot]:\n        \"\"\"\n        Populate the spot ID map with all available spots.\n        Keys are stored as lowercase for case-insensitive lookups.\n        \"\"\"\n        return [\n            HighspotSpot(id=spot[\"id\"], name=spot[\"title\"])\n            for spot in self.client.get_spots()\n        ]\n\n    def _fetch_spots_to_process(self) -> list[HighspotSpot]:\n        \"\"\"\n        Fetch spots to process based on the configured spot names.\n        \"\"\"\n        spots = self._fetch_spots()\n        if not spots:\n            raise ValueError(\"No spots found in Highspot.\")\n\n        if self.spot_names:\n            lower_spot_names = [name.lower() for name in self.spot_names]\n            spots_to_process = [\n                spot for spot in spots if spot.name.lower() in lower_spot_names\n            ]\n            if not spots_to_process:\n                raise ValueError(\n                    f\"No valid spots found in Highspot. Found {spots} but {self.spot_names} were requested.\"\n                )\n            return spots_to_process\n\n        return spots\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"\n        Load content from configured spots in Highspot.\n        If no spots are configured, loads from all spots.\n\n        Yields:\n            Batches of Document objects\n        \"\"\"\n        return self.poll_source(None, None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n    ) -> GenerateDocumentsOutput:\n        \"\"\"\n        Poll Highspot for content updated since the start time.\n\n        Args:\n            start: Start time as seconds since Unix epoch\n            end: End time as seconds since Unix epoch\n\n        Yields:\n            Batches of Document objects\n        \"\"\"\n        spots_to_process = self._fetch_spots_to_process()\n\n        doc_batch: list[Document | HierarchyNode] = []\n        try:\n            for spot in spots_to_process:\n                try:\n                    offset = 0\n                    has_more = True\n\n                    while has_more:\n                        logger.info(\n                            f\"Retrieving items from spot {spot.name}, offset {offset}\"\n                        )\n                        response = self.client.get_spot_items(\n                            spot_id=spot.id, offset=offset, page_size=self.batch_size\n                        )\n                        items = response.get(\"collection\", [])\n                        logger.info(\n                            f\"Received {len(items)} items from spot {spot.name}\"\n                        )\n                        if not items:\n                            has_more = False\n                            continue\n\n                        for item in items:\n                            try:\n                                item_id = item.get(\"id\")\n                                if not item_id:\n                                    logger.warning(\"Item without ID found, skipping\")\n                                    continue\n\n                                item_details = self.client.get_item(item_id)\n                                if not item_details:\n                                    logger.warning(\n                                        f\"Item {item_id} details not found, skipping\"\n                                    )\n                                    continue\n                                # Apply time filter if specified\n                                if start or end:\n                                    updated_at = item_details.get(\"date_updated\")\n                                    if updated_at:\n                                        # Convert to datetime for comparison\n                                        try:\n                                            updated_time = datetime.fromisoformat(\n                                                updated_at.replace(\"Z\", \"+00:00\")\n                                            )\n                                            if (\n                                                start\n                                                and updated_time.timestamp() < start\n                                            ) or (\n                                                end and updated_time.timestamp() > end\n                                            ):\n                                                continue\n                                        except (ValueError, TypeError):\n                                            # Skip if date cannot be parsed\n                                            logger.warning(\n                                                f\"Invalid date format for item {item_id}: {updated_at}\"\n                                            )\n                                            continue\n\n                                content = self._get_item_content(item_details)\n\n                                title = item_details.get(\"title\", \"\")\n\n                                doc_batch.append(\n                                    Document(\n                                        id=f\"HIGHSPOT_{item_id}\",\n                                        sections=[\n                                            TextSection(\n                                                link=item_details.get(\n                                                    \"url\",\n                                                    f\"https://www.highspot.com/items/{item_id}\",\n                                                ),\n                                                text=content,\n                                            )\n                                        ],\n                                        source=DocumentSource.HIGHSPOT,\n                                        semantic_identifier=title,\n                                        metadata={\n                                            \"spot_name\": spot.name,\n                                            \"type\": item_details.get(\n                                                \"content_type\", \"\"\n                                            ),\n                                            \"created_at\": item_details.get(\n                                                \"date_added\", \"\"\n                                            ),\n                                            \"author\": item_details.get(\"author\", \"\"),\n                                            \"language\": item_details.get(\n                                                \"language\", \"\"\n                                            ),\n                                            \"can_download\": str(\n                                                item_details.get(\"can_download\", False)\n                                            ),\n                                        },\n                                        doc_updated_at=item_details.get(\"date_updated\"),\n                                    )\n                                )\n\n                                if len(doc_batch) >= self.batch_size:\n                                    yield doc_batch\n                                    doc_batch = []\n\n                            except HighspotClientError as e:\n                                item_id = \"ID\" if not item_id else item_id\n                                logger.error(\n                                    f\"Error retrieving item {item_id}: {str(e)}\"\n                                )\n                            except Exception as e:\n                                item_id = \"ID\" if not item_id else item_id\n                                logger.error(\n                                    f\"Unexpected error for item {item_id}: {str(e)}\"\n                                )\n\n                        has_more = len(items) >= self.batch_size\n                        offset += self.batch_size\n\n                except (HighspotClientError, ValueError) as e:\n                    logger.error(f\"Error processing spot {spot.name}: {str(e)}\")\n                    raise\n                except Exception as e:\n                    logger.error(\n                        f\"Unexpected error processing spot {spot.name}: {str(e)}\"\n                    )\n                    raise\n\n        except Exception as e:\n            logger.error(f\"Error in Highspot connector: {str(e)}\")\n            raise\n\n        if doc_batch:\n            yield doc_batch\n\n    def _get_item_content(self, item_details: Dict[str, Any]) -> str:\n        \"\"\"\n        Get the text content of an item.\n\n        Args:\n            item_details: Item details from the API\n\n        Returns:\n            Text content of the item\n        \"\"\"\n        item_id = item_details.get(\"id\", \"\")\n        content_name = item_details.get(\"content_name\", \"\")\n        is_valid_format = content_name and \".\" in content_name\n        file_extension = content_name.split(\".\")[-1].lower() if is_valid_format else \"\"\n        file_extension = \".\" + file_extension if file_extension else \"\"\n        can_download = item_details.get(\"can_download\", False)\n        content_type = item_details.get(\"content_type\", \"\")\n\n        # Extract title and description once at the beginning\n        title, description = self._extract_title_and_description(item_details)\n        default_content = f\"{title}\\n{description}\"\n        logger.info(\n            f\"Processing item {item_id} with extension {file_extension} and file name {content_name}\"\n        )\n\n        try:\n            if content_type == \"WebLink\":\n                url = item_details.get(\"url\")\n                if not url:\n                    return default_content\n                content = scrape_url_content(url, True)\n                return content if content else default_content\n\n            elif (\n                is_valid_format\n                and file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS\n                and can_download\n            ):\n                content_response = self.client.get_item_content(item_id)\n                # Process and extract text from binary content based on type\n                if content_response:\n                    text_content = extract_file_text(\n                        BytesIO(content_response), content_name, False\n                    )\n                    return text_content if text_content else default_content\n                return default_content\n\n            else:\n                logger.warning(\n                    f\"Item {item_id} has unsupported format: {file_extension}\"\n                )\n                return default_content\n\n        except HighspotClientError as e:\n            error_context = f\"item {item_id}\" if item_id else \"(item id not found)\"\n            logger.warning(f\"Could not retrieve content for {error_context}: {str(e)}\")\n            return default_content\n        except ValueError as e:\n            error_context = f\"item {item_id}\" if item_id else \"(item id not found)\"\n            logger.error(f\"Value error for {error_context}: {str(e)}\")\n            return default_content\n\n        except Exception as e:\n            error_context = f\"item {item_id}\" if item_id else \"(item id not found)\"\n            logger.error(\n                f\"Unexpected error retrieving content for {error_context}: {str(e)}\"\n            )\n            return default_content\n\n    def _extract_title_and_description(\n        self, item_details: Dict[str, Any]\n    ) -> tuple[str, str]:\n        \"\"\"\n        Extract the title and description from item details.\n\n        Args:\n            item_details: Item details from the API\n\n        Returns:\n            Tuple of title and description\n        \"\"\"\n        title = item_details.get(\"title\", \"\")\n        description = item_details.get(\"description\", \"\")\n        return title, description\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        \"\"\"\n        Retrieve all document IDs from the configured spots.\n        If no spots are configured, retrieves from all spots.\n\n        Args:\n            start: Optional start time filter\n            end: Optional end time filter\n            callback: Optional indexing heartbeat callback\n\n        Yields:\n            Batches of SlimDocument objects\n        \"\"\"\n        spots_to_process = self._fetch_spots_to_process()\n\n        slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n        try:\n            for spot in spots_to_process:\n                try:\n                    offset = 0\n                    has_more = True\n\n                    while has_more:\n                        logger.info(\n                            f\"Retrieving slim documents from spot {spot.name}, offset {offset}\"\n                        )\n                        response = self.client.get_spot_items(\n                            spot_id=spot.id, offset=offset, page_size=self.batch_size\n                        )\n\n                        items = response.get(\"collection\", [])\n                        if not items:\n                            has_more = False\n                            continue\n\n                        for item in items:\n                            item_id = item.get(\"id\")\n                            if not item_id:\n                                logger.warning(\"Item without ID found, skipping\")\n                                continue\n\n                            slim_doc_batch.append(\n                                SlimDocument(id=f\"HIGHSPOT_{item_id}\")\n                            )\n\n                            if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:\n                                yield slim_doc_batch\n                                slim_doc_batch = []\n\n                        has_more = len(items) >= self.batch_size\n                        offset += self.batch_size\n\n                except (HighspotClientError, ValueError):\n                    logger.exception(\n                        f\"Error retrieving slim documents from spot {spot.name}\"\n                    )\n                    raise\n\n            if slim_doc_batch:\n                yield slim_doc_batch\n        except Exception:\n            logger.exception(\"Error in Highspot Slim Connector\")\n            raise\n\n    def validate_credentials(self) -> bool:\n        \"\"\"\n        Validate that the provided credentials can access the Highspot API.\n\n        Returns:\n            True if credentials are valid, False otherwise\n        \"\"\"\n        try:\n            return self.client.health_check()\n        except Exception as e:\n            logger.error(f\"Failed to validate credentials: {str(e)}\")\n            return False\n\n\nif __name__ == \"__main__\":\n    spot_names: List[str] = []\n    connector = HighspotConnector(spot_names)\n    credentials = {\n        \"highspot_key\": os.environ.get(\"HIGHSPOT_KEY\"),\n        \"highspot_secret\": os.environ.get(\"HIGHSPOT_SECRET\"),\n    }\n    connector.load_credentials(credentials=credentials)\n    for doc in connector.load_from_state():\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/highspot/utils.py",
    "content": "from typing import Optional\nfrom urllib.parse import urlparse\n\nfrom bs4 import BeautifulSoup\nfrom playwright.sync_api import sync_playwright\n\nfrom onyx.file_processing.html_utils import web_html_cleanup\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Constants\nWEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20\nJAVASCRIPT_DISABLED_MESSAGE = \"You have JavaScript disabled in your browser\"\nDEFAULT_TIMEOUT = 60000  # 60 seconds\n\n\ndef scrape_url_content(\n    url: str, scroll_before_scraping: bool = False, timeout_ms: int = DEFAULT_TIMEOUT\n) -> Optional[str]:\n    \"\"\"\n    Scrapes content from a given URL and returns the cleaned text.\n\n    Args:\n        url: The URL to scrape\n        scroll_before_scraping: Whether to scroll through the page to load lazy content\n        timeout_ms: Timeout in milliseconds for page navigation and loading\n\n    Returns:\n        The cleaned text content of the page or None if scraping fails\n    \"\"\"\n    playwright = None\n    browser = None\n    try:\n        validate_url(url)\n        playwright = sync_playwright().start()\n        browser = playwright.chromium.launch(headless=True)\n        context = browser.new_context()\n        page = context.new_page()\n\n        logger.info(f\"Navigating to URL: {url}\")\n        try:\n            page.goto(url, timeout=timeout_ms)\n        except Exception as e:\n            logger.error(f\"Failed to navigate to {url}: {str(e)}\")\n            return None\n\n        if scroll_before_scraping:\n            logger.debug(\"Scrolling page to load lazy content\")\n            scroll_attempts = 0\n            previous_height = page.evaluate(\"document.body.scrollHeight\")\n            while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:\n                page.evaluate(\"window.scrollTo(0, document.body.scrollHeight)\")\n                try:\n                    page.wait_for_load_state(\"networkidle\", timeout=timeout_ms)\n                except Exception as e:\n                    logger.warning(f\"Network idle wait timed out: {str(e)}\")\n                    break\n\n                new_height = page.evaluate(\"document.body.scrollHeight\")\n                if new_height == previous_height:\n                    break\n                previous_height = new_height\n                scroll_attempts += 1\n\n        content = page.content()\n        soup = BeautifulSoup(content, \"html.parser\")\n\n        parsed_html = web_html_cleanup(soup)\n\n        if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text:\n            logger.debug(\"JavaScript disabled message detected, checking iframes\")\n            try:\n                iframe_count = page.frame_locator(\"iframe\").locator(\"html\").count()\n                if iframe_count > 0:\n                    iframe_texts = (\n                        page.frame_locator(\"iframe\").locator(\"html\").all_inner_texts()\n                    )\n                    iframe_content = \"\\n\".join(iframe_texts)\n\n                    if len(parsed_html.cleaned_text) < 700:\n                        parsed_html.cleaned_text = iframe_content\n                    else:\n                        parsed_html.cleaned_text += \"\\n\" + iframe_content\n            except Exception as e:\n                logger.warning(f\"Error processing iframes: {str(e)}\")\n\n        return parsed_html.cleaned_text\n\n    except Exception as e:\n        logger.error(f\"Error scraping URL {url}: {str(e)}\")\n        return None\n\n    finally:\n        if browser:\n            try:\n                browser.close()\n            except Exception as e:\n                logger.debug(f\"Error closing browser: {str(e)}\")\n        if playwright:\n            try:\n                playwright.stop()\n            except Exception as e:\n                logger.debug(f\"Error stopping playwright: {str(e)}\")\n\n\ndef validate_url(url: str) -> None:\n    \"\"\"\n    Validates that a URL is properly formatted.\n\n    Args:\n        url: The URL to validate\n\n    Raises:\n        ValueError: If URL is not valid\n    \"\"\"\n    parse = urlparse(url)\n    if parse.scheme != \"http\" and parse.scheme != \"https\":\n        raise ValueError(\"URL must be of scheme https?://\")\n\n    if not parse.hostname:\n        raise ValueError(\"URL must include a hostname\")\n"
  },
  {
    "path": "backend/onyx/connectors/hubspot/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/hubspot/connector.py",
    "content": "import re\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nimport requests\nfrom hubspot import HubSpot  # type: ignore\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.hubspot.rate_limit import HubSpotRateLimiter\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nHUBSPOT_BASE_URL = \"https://app.hubspot.com\"\nHUBSPOT_API_URL = \"https://api.hubapi.com/integrations/v1/me\"\n\nAVAILABLE_OBJECT_TYPES = {\"tickets\", \"companies\", \"deals\", \"contacts\"}\n\nHUBSPOT_PAGE_SIZE = 100\n\nT = TypeVar(\"T\")\n\nlogger = setup_logger()\n\n\nclass HubSpotConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        access_token: str | None = None,\n        object_types: list[str] | None = None,\n    ) -> None:\n        self.batch_size = batch_size\n        self._access_token = access_token\n        self._portal_id: str | None = None\n        self._rate_limiter = HubSpotRateLimiter()\n\n        # Set object types to fetch, default to all available types\n        if object_types is None:\n            self.object_types = AVAILABLE_OBJECT_TYPES.copy()\n        else:\n            object_types_set = set(object_types)\n\n            # Validate provided object types\n            invalid_types = object_types_set - AVAILABLE_OBJECT_TYPES\n            if invalid_types:\n                raise ValueError(\n                    f\"Invalid object types: {invalid_types}. Available types: {AVAILABLE_OBJECT_TYPES}\"\n                )\n            self.object_types = object_types_set.copy()\n\n    @property\n    def access_token(self) -> str:\n        \"\"\"Get the access token, raising an exception if not set.\"\"\"\n        if self._access_token is None:\n            raise ConnectorMissingCredentialError(\"HubSpot access token not set\")\n        return self._access_token\n\n    @access_token.setter\n    def access_token(self, value: str | None) -> None:\n        \"\"\"Set the access token.\"\"\"\n        self._access_token = value\n\n    @property\n    def portal_id(self) -> str:\n        \"\"\"Get the portal ID, raising an exception if not set.\"\"\"\n        if self._portal_id is None:\n            raise ConnectorMissingCredentialError(\"HubSpot portal ID not set\")\n        return self._portal_id\n\n    @portal_id.setter\n    def portal_id(self, value: str | None) -> None:\n        \"\"\"Set the portal ID.\"\"\"\n        self._portal_id = value\n\n    def _call_hubspot(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:\n        return self._rate_limiter.call(func, *args, **kwargs)\n\n    def _paginated_results(\n        self,\n        fetch_page: Callable[..., Any],\n        **kwargs: Any,\n    ) -> Generator[Any, None, None]:\n        base_kwargs = dict(kwargs)\n        base_kwargs.setdefault(\"limit\", HUBSPOT_PAGE_SIZE)\n\n        after: str | None = None\n        while True:\n            page_kwargs = base_kwargs.copy()\n            if after is not None:\n                page_kwargs[\"after\"] = after\n\n            page = self._call_hubspot(fetch_page, **page_kwargs)\n            results = getattr(page, \"results\", [])\n            for result in results:\n                yield result\n\n            paging = getattr(page, \"paging\", None)\n            next_page = getattr(paging, \"next\", None) if paging else None\n            if next_page is None:\n                break\n\n            after = getattr(next_page, \"after\", None)\n            if after is None:\n                break\n\n    def _clean_html_content(self, html_content: str) -> str:\n        \"\"\"Clean HTML content and extract raw text\"\"\"\n        if not html_content:\n            return \"\"\n\n        # Remove HTML tags using regex\n        clean_text = re.sub(r\"<[^>]+>\", \"\", html_content)\n\n        # Decode common HTML entities\n        clean_text = clean_text.replace(\"&nbsp;\", \" \")\n        clean_text = clean_text.replace(\"&amp;\", \"&\")\n        clean_text = clean_text.replace(\"&lt;\", \"<\")\n        clean_text = clean_text.replace(\"&gt;\", \">\")\n        clean_text = clean_text.replace(\"&quot;\", '\"')\n        clean_text = clean_text.replace(\"&#39;\", \"'\")\n\n        # Clean up whitespace\n        clean_text = \" \".join(clean_text.split())\n\n        return clean_text.strip()\n\n    def get_portal_id(self) -> str:\n        headers = {\n            \"Authorization\": f\"Bearer {self.access_token}\",\n            \"Content-Type\": \"application/json\",\n        }\n\n        response = requests.get(HUBSPOT_API_URL, headers=headers)\n        if response.status_code != 200:\n            raise Exception(\"Error fetching portal ID\")\n\n        data = response.json()\n        return str(data[\"portalId\"])\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.access_token = cast(str, credentials[\"hubspot_access_token\"])\n        self.portal_id = self.get_portal_id()\n        return None\n\n    def _get_object_url(self, object_type: str, object_id: str) -> str:\n        \"\"\"Generate HubSpot URL for different object types\"\"\"\n        if object_type == \"tickets\":\n            return (\n                f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-5/{object_id}\"\n            )\n        elif object_type == \"companies\":\n            return (\n                f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-2/{object_id}\"\n            )\n        elif object_type == \"deals\":\n            return (\n                f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-3/{object_id}\"\n            )\n        elif object_type == \"contacts\":\n            return (\n                f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/record/0-1/{object_id}\"\n            )\n        elif object_type == \"notes\":\n            return (\n                f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/objects/0-4/{object_id}\"\n            )\n        else:\n            return f\"{HUBSPOT_BASE_URL}/contacts/{self.portal_id}/{object_type}/{object_id}\"\n\n    def _get_associated_objects(\n        self,\n        api_client: HubSpot,\n        object_id: str,\n        from_object_type: str,\n        to_object_type: str,\n    ) -> list[dict[str, Any]]:\n        \"\"\"Get associated objects for a given object\"\"\"\n        try:\n            associations_iter = self._paginated_results(\n                api_client.crm.associations.v4.basic_api.get_page,\n                object_type=from_object_type,\n                object_id=object_id,\n                to_object_type=to_object_type,\n            )\n\n            object_ids = [assoc.to_object_id for assoc in associations_iter]\n\n            associated_objects: list[dict[str, Any]] = []\n\n            if to_object_type == \"contacts\":\n                for obj_id in object_ids:\n                    try:\n                        obj = self._call_hubspot(\n                            api_client.crm.contacts.basic_api.get_by_id,\n                            contact_id=obj_id,\n                            properties=[\n                                \"firstname\",\n                                \"lastname\",\n                                \"email\",\n                                \"company\",\n                                \"jobtitle\",\n                            ],\n                        )\n                        associated_objects.append(obj.to_dict())\n                    except Exception as e:\n                        logger.warning(f\"Failed to fetch contact {obj_id}: {e}\")\n\n            elif to_object_type == \"companies\":\n                for obj_id in object_ids:\n                    try:\n                        obj = self._call_hubspot(\n                            api_client.crm.companies.basic_api.get_by_id,\n                            company_id=obj_id,\n                            properties=[\n                                \"name\",\n                                \"domain\",\n                                \"industry\",\n                                \"city\",\n                                \"state\",\n                            ],\n                        )\n                        associated_objects.append(obj.to_dict())\n                    except Exception as e:\n                        logger.warning(f\"Failed to fetch company {obj_id}: {e}\")\n\n            elif to_object_type == \"deals\":\n                for obj_id in object_ids:\n                    try:\n                        obj = self._call_hubspot(\n                            api_client.crm.deals.basic_api.get_by_id,\n                            deal_id=obj_id,\n                            properties=[\n                                \"dealname\",\n                                \"amount\",\n                                \"dealstage\",\n                                \"closedate\",\n                                \"pipeline\",\n                            ],\n                        )\n                        associated_objects.append(obj.to_dict())\n                    except Exception as e:\n                        logger.warning(f\"Failed to fetch deal {obj_id}: {e}\")\n\n            elif to_object_type == \"tickets\":\n                for obj_id in object_ids:\n                    try:\n                        obj = self._call_hubspot(\n                            api_client.crm.tickets.basic_api.get_by_id,\n                            ticket_id=obj_id,\n                            properties=[\"subject\", \"content\", \"hs_ticket_priority\"],\n                        )\n                        associated_objects.append(obj.to_dict())\n                    except Exception as e:\n                        logger.warning(f\"Failed to fetch ticket {obj_id}: {e}\")\n\n            return associated_objects\n\n        except Exception as e:\n            logger.warning(\n                f\"Failed to get associations from {from_object_type} to {to_object_type}: {e}\"\n            )\n            return []\n\n    def _get_associated_notes(\n        self,\n        api_client: HubSpot,\n        object_id: str,\n        object_type: str,\n    ) -> list[dict[str, Any]]:\n        \"\"\"Get notes associated with a given object\"\"\"\n        try:\n            associations_iter = self._paginated_results(\n                api_client.crm.associations.v4.basic_api.get_page,\n                object_type=object_type,\n                object_id=object_id,\n                to_object_type=\"notes\",\n            )\n\n            note_ids = [assoc.to_object_id for assoc in associations_iter]\n\n            associated_notes = []\n\n            for note_id in note_ids:\n                try:\n                    # Notes are engagements in HubSpot, use the engagements API\n                    note = self._call_hubspot(\n                        api_client.crm.objects.notes.basic_api.get_by_id,\n                        note_id=note_id,\n                        properties=[\n                            \"hs_note_body\",\n                            \"hs_timestamp\",\n                            \"hs_created_by\",\n                            \"hubspot_owner_id\",\n                        ],\n                    )\n                    associated_notes.append(note.to_dict())\n                except Exception as e:\n                    logger.warning(f\"Failed to fetch note {note_id}: {e}\")\n\n            return associated_notes\n\n        except Exception as e:\n            logger.warning(f\"Failed to get notes for {object_type} {object_id}: {e}\")\n            return []\n\n    def _create_object_section(\n        self, obj: dict[str, Any], object_type: str\n    ) -> TextSection:\n        \"\"\"Create a TextSection for an associated object\"\"\"\n        obj_id = obj.get(\"id\", \"\")\n        properties = obj.get(\"properties\", {})\n\n        if object_type == \"contacts\":\n            name_parts = []\n            if properties.get(\"firstname\"):\n                name_parts.append(properties[\"firstname\"])\n            if properties.get(\"lastname\"):\n                name_parts.append(properties[\"lastname\"])\n\n            if name_parts:\n                name = \" \".join(name_parts)\n            elif properties.get(\"email\"):\n                # Use email as fallback if no first/last name\n                name = properties[\"email\"]\n            else:\n                name = \"Unknown Contact\"\n\n            content_parts = [f\"Contact: {name}\"]\n            if properties.get(\"email\"):\n                content_parts.append(f\"Email: {properties['email']}\")\n            if properties.get(\"company\"):\n                content_parts.append(f\"Company: {properties['company']}\")\n            if properties.get(\"jobtitle\"):\n                content_parts.append(f\"Job Title: {properties['jobtitle']}\")\n\n        elif object_type == \"companies\":\n            name = properties.get(\"name\", \"Unknown Company\")\n            content_parts = [f\"Company: {name}\"]\n            if properties.get(\"domain\"):\n                content_parts.append(f\"Domain: {properties['domain']}\")\n            if properties.get(\"industry\"):\n                content_parts.append(f\"Industry: {properties['industry']}\")\n            if properties.get(\"city\") and properties.get(\"state\"):\n                content_parts.append(\n                    f\"Location: {properties['city']}, {properties['state']}\"\n                )\n\n        elif object_type == \"deals\":\n            name = properties.get(\"dealname\", \"Unknown Deal\")\n            content_parts = [f\"Deal: {name}\"]\n            if properties.get(\"amount\"):\n                content_parts.append(f\"Amount: ${properties['amount']}\")\n            if properties.get(\"dealstage\"):\n                content_parts.append(f\"Stage: {properties['dealstage']}\")\n            if properties.get(\"closedate\"):\n                content_parts.append(f\"Close Date: {properties['closedate']}\")\n            if properties.get(\"pipeline\"):\n                content_parts.append(f\"Pipeline: {properties['pipeline']}\")\n\n        elif object_type == \"tickets\":\n            name = properties.get(\"subject\", \"Unknown Ticket\")\n            content_parts = [f\"Ticket: {name}\"]\n            if properties.get(\"content\"):\n                content_parts.append(f\"Content: {properties['content']}\")\n            if properties.get(\"hs_ticket_priority\"):\n                content_parts.append(f\"Priority: {properties['hs_ticket_priority']}\")\n        elif object_type == \"notes\":\n            # Notes have a body property that contains the note content\n            body = properties.get(\"hs_note_body\", \"\")\n            timestamp = properties.get(\"hs_timestamp\", \"\")\n\n            # Clean HTML content to get raw text\n            clean_body = self._clean_html_content(body)\n\n            # Use full content, not truncated\n            content_parts = [f\"Note: {clean_body}\"]\n            if timestamp:\n                content_parts.append(f\"Created: {timestamp}\")\n        else:\n            content_parts = [f\"{object_type.capitalize()}: {obj_id}\"]\n\n        content = \"\\n\".join(content_parts)\n        link = self._get_object_url(object_type, obj_id)\n\n        return TextSection(link=link, text=content)\n\n    def _process_tickets(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        api_client = HubSpot(access_token=self.access_token)\n\n        tickets_iter = self._paginated_results(\n            api_client.crm.tickets.basic_api.get_page,\n            properties=[\n                \"subject\",\n                \"content\",\n                \"hs_ticket_priority\",\n                \"createdate\",\n                \"hs_lastmodifieddate\",\n            ],\n            associations=[\"contacts\", \"companies\", \"deals\"],\n        )\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        for ticket in tickets_iter:\n            updated_at = ticket.updated_at.replace(tzinfo=None)\n            if start is not None and updated_at < start.replace(tzinfo=None):\n                continue\n            if end is not None and updated_at > end.replace(tzinfo=None):\n                continue\n\n            title = ticket.properties.get(\"subject\") or f\"Ticket {ticket.id}\"\n            link = self._get_object_url(\"tickets\", ticket.id)\n            content_text = ticket.properties.get(\"content\") or \"\"\n\n            # Main ticket section\n            sections = [TextSection(link=link, text=content_text)]\n\n            # Metadata with parent object IDs\n            metadata: dict[str, str | list[str]] = {\n                \"object_type\": \"ticket\",\n            }\n\n            if ticket.properties.get(\"hs_ticket_priority\"):\n                metadata[\"priority\"] = ticket.properties[\"hs_ticket_priority\"]\n\n            # Add associated objects as sections\n            associated_contact_ids = []\n            associated_company_ids = []\n            associated_deal_ids = []\n\n            # Get associated contacts\n            associated_contacts = self._get_associated_objects(\n                api_client, ticket.id, \"tickets\", \"contacts\"\n            )\n            for contact in associated_contacts:\n                sections.append(self._create_object_section(contact, \"contacts\"))\n                associated_contact_ids.append(contact[\"id\"])\n\n            # Get associated companies\n            associated_companies = self._get_associated_objects(\n                api_client, ticket.id, \"tickets\", \"companies\"\n            )\n            for company in associated_companies:\n                sections.append(self._create_object_section(company, \"companies\"))\n                associated_company_ids.append(company[\"id\"])\n\n            # Get associated deals\n            associated_deals = self._get_associated_objects(\n                api_client, ticket.id, \"tickets\", \"deals\"\n            )\n            for deal in associated_deals:\n                sections.append(self._create_object_section(deal, \"deals\"))\n                associated_deal_ids.append(deal[\"id\"])\n\n            # Get associated notes\n            associated_notes = self._get_associated_notes(\n                api_client, ticket.id, \"tickets\"\n            )\n            for note in associated_notes:\n                sections.append(self._create_object_section(note, \"notes\"))\n\n            # Add association IDs to metadata\n            if associated_contact_ids:\n                metadata[\"associated_contact_ids\"] = associated_contact_ids\n            if associated_company_ids:\n                metadata[\"associated_company_ids\"] = associated_company_ids\n            if associated_deal_ids:\n                metadata[\"associated_deal_ids\"] = associated_deal_ids\n\n            doc_batch.append(\n                Document(\n                    id=f\"hubspot_ticket_{ticket.id}\",\n                    sections=cast(list[TextSection | ImageSection], sections),\n                    source=DocumentSource.HUBSPOT,\n                    semantic_identifier=title,\n                    doc_updated_at=ticket.updated_at.replace(tzinfo=timezone.utc),\n                    metadata=metadata,\n                    doc_metadata={\n                        \"hierarchy\": {\n                            \"source_path\": [\"Tickets\"],\n                            \"object_type\": \"ticket\",\n                            \"object_id\": ticket.id,\n                        }\n                    },\n                )\n            )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def _process_companies(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        api_client = HubSpot(access_token=self.access_token)\n\n        companies_iter = self._paginated_results(\n            api_client.crm.companies.basic_api.get_page,\n            properties=[\n                \"name\",\n                \"domain\",\n                \"industry\",\n                \"city\",\n                \"state\",\n                \"description\",\n                \"createdate\",\n                \"hs_lastmodifieddate\",\n            ],\n            associations=[\"contacts\", \"deals\", \"tickets\"],\n        )\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        for company in companies_iter:\n            updated_at = company.updated_at.replace(tzinfo=None)\n            if start is not None and updated_at < start.replace(tzinfo=None):\n                continue\n            if end is not None and updated_at > end.replace(tzinfo=None):\n                continue\n\n            title = company.properties.get(\"name\") or f\"Company {company.id}\"\n            link = self._get_object_url(\"companies\", company.id)\n\n            # Build main content\n            content_parts = [f\"Company: {title}\"]\n            if company.properties.get(\"domain\"):\n                content_parts.append(f\"Domain: {company.properties['domain']}\")\n            if company.properties.get(\"industry\"):\n                content_parts.append(f\"Industry: {company.properties['industry']}\")\n            if company.properties.get(\"city\") and company.properties.get(\"state\"):\n                content_parts.append(\n                    f\"Location: {company.properties['city']}, {company.properties['state']}\"\n                )\n            if company.properties.get(\"description\"):\n                content_parts.append(\n                    f\"Description: {company.properties['description']}\"\n                )\n\n            content_text = \"\\n\".join(content_parts)\n\n            # Main company section\n            sections = [TextSection(link=link, text=content_text)]\n\n            # Metadata with parent object IDs\n            metadata: dict[str, str | list[str]] = {\n                \"company_id\": company.id,\n                \"object_type\": \"company\",\n            }\n\n            if company.properties.get(\"industry\"):\n                metadata[\"industry\"] = company.properties[\"industry\"]\n            if company.properties.get(\"domain\"):\n                metadata[\"domain\"] = company.properties[\"domain\"]\n\n            # Add associated objects as sections\n            associated_contact_ids = []\n            associated_deal_ids = []\n            associated_ticket_ids = []\n\n            # Get associated contacts\n            associated_contacts = self._get_associated_objects(\n                api_client, company.id, \"companies\", \"contacts\"\n            )\n            for contact in associated_contacts:\n                sections.append(self._create_object_section(contact, \"contacts\"))\n                associated_contact_ids.append(contact[\"id\"])\n\n            # Get associated deals\n            associated_deals = self._get_associated_objects(\n                api_client, company.id, \"companies\", \"deals\"\n            )\n            for deal in associated_deals:\n                sections.append(self._create_object_section(deal, \"deals\"))\n                associated_deal_ids.append(deal[\"id\"])\n\n            # Get associated tickets\n            associated_tickets = self._get_associated_objects(\n                api_client, company.id, \"companies\", \"tickets\"\n            )\n            for ticket in associated_tickets:\n                sections.append(self._create_object_section(ticket, \"tickets\"))\n                associated_ticket_ids.append(ticket[\"id\"])\n\n            # Get associated notes\n            associated_notes = self._get_associated_notes(\n                api_client, company.id, \"companies\"\n            )\n            for note in associated_notes:\n                sections.append(self._create_object_section(note, \"notes\"))\n\n            # Add association IDs to metadata\n            if associated_contact_ids:\n                metadata[\"associated_contact_ids\"] = associated_contact_ids\n            if associated_deal_ids:\n                metadata[\"associated_deal_ids\"] = associated_deal_ids\n            if associated_ticket_ids:\n                metadata[\"associated_ticket_ids\"] = associated_ticket_ids\n\n            doc_batch.append(\n                Document(\n                    id=f\"hubspot_company_{company.id}\",\n                    sections=cast(list[TextSection | ImageSection], sections),\n                    source=DocumentSource.HUBSPOT,\n                    semantic_identifier=title,\n                    doc_updated_at=company.updated_at.replace(tzinfo=timezone.utc),\n                    metadata=metadata,\n                    doc_metadata={\n                        \"hierarchy\": {\n                            \"source_path\": [\"Companies\"],\n                            \"object_type\": \"company\",\n                            \"object_id\": company.id,\n                        }\n                    },\n                )\n            )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def _process_deals(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        api_client = HubSpot(access_token=self.access_token)\n\n        deals_iter = self._paginated_results(\n            api_client.crm.deals.basic_api.get_page,\n            properties=[\n                \"dealname\",\n                \"amount\",\n                \"dealstage\",\n                \"closedate\",\n                \"pipeline\",\n                \"description\",\n                \"createdate\",\n                \"hs_lastmodifieddate\",\n            ],\n            associations=[\"contacts\", \"companies\", \"tickets\"],\n        )\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        for deal in deals_iter:\n            updated_at = deal.updated_at.replace(tzinfo=None)\n            if start is not None and updated_at < start.replace(tzinfo=None):\n                continue\n            if end is not None and updated_at > end.replace(tzinfo=None):\n                continue\n\n            title = deal.properties.get(\"dealname\") or f\"Deal {deal.id}\"\n            link = self._get_object_url(\"deals\", deal.id)\n\n            # Build main content\n            content_parts = [f\"Deal: {title}\"]\n            if deal.properties.get(\"amount\"):\n                content_parts.append(f\"Amount: ${deal.properties['amount']}\")\n            if deal.properties.get(\"dealstage\"):\n                content_parts.append(f\"Stage: {deal.properties['dealstage']}\")\n            if deal.properties.get(\"closedate\"):\n                content_parts.append(f\"Close Date: {deal.properties['closedate']}\")\n            if deal.properties.get(\"pipeline\"):\n                content_parts.append(f\"Pipeline: {deal.properties['pipeline']}\")\n            if deal.properties.get(\"description\"):\n                content_parts.append(f\"Description: {deal.properties['description']}\")\n\n            content_text = \"\\n\".join(content_parts)\n\n            # Main deal section\n            sections = [TextSection(link=link, text=content_text)]\n\n            # Metadata with parent object IDs\n            metadata: dict[str, str | list[str]] = {\n                \"deal_id\": deal.id,\n                \"object_type\": \"deal\",\n            }\n\n            if deal.properties.get(\"dealstage\"):\n                metadata[\"deal_stage\"] = deal.properties[\"dealstage\"]\n            if deal.properties.get(\"pipeline\"):\n                metadata[\"pipeline\"] = deal.properties[\"pipeline\"]\n            if deal.properties.get(\"amount\"):\n                metadata[\"amount\"] = deal.properties[\"amount\"]\n\n            # Add associated objects as sections\n            associated_contact_ids = []\n            associated_company_ids = []\n            associated_ticket_ids = []\n\n            # Get associated contacts\n            associated_contacts = self._get_associated_objects(\n                api_client, deal.id, \"deals\", \"contacts\"\n            )\n            for contact in associated_contacts:\n                sections.append(self._create_object_section(contact, \"contacts\"))\n                associated_contact_ids.append(contact[\"id\"])\n\n            # Get associated companies\n            associated_companies = self._get_associated_objects(\n                api_client, deal.id, \"deals\", \"companies\"\n            )\n            for company in associated_companies:\n                sections.append(self._create_object_section(company, \"companies\"))\n                associated_company_ids.append(company[\"id\"])\n\n            # Get associated tickets\n            associated_tickets = self._get_associated_objects(\n                api_client, deal.id, \"deals\", \"tickets\"\n            )\n            for ticket in associated_tickets:\n                sections.append(self._create_object_section(ticket, \"tickets\"))\n                associated_ticket_ids.append(ticket[\"id\"])\n\n            # Get associated notes\n            associated_notes = self._get_associated_notes(api_client, deal.id, \"deals\")\n            for note in associated_notes:\n                sections.append(self._create_object_section(note, \"notes\"))\n\n            # Add association IDs to metadata\n            if associated_contact_ids:\n                metadata[\"associated_contact_ids\"] = associated_contact_ids\n            if associated_company_ids:\n                metadata[\"associated_company_ids\"] = associated_company_ids\n            if associated_ticket_ids:\n                metadata[\"associated_ticket_ids\"] = associated_ticket_ids\n\n            doc_batch.append(\n                Document(\n                    id=f\"hubspot_deal_{deal.id}\",\n                    sections=cast(list[TextSection | ImageSection], sections),\n                    source=DocumentSource.HUBSPOT,\n                    semantic_identifier=title,\n                    doc_updated_at=deal.updated_at.replace(tzinfo=timezone.utc),\n                    metadata=metadata,\n                    doc_metadata={\n                        \"hierarchy\": {\n                            \"source_path\": [\"Deals\"],\n                            \"object_type\": \"deal\",\n                            \"object_id\": deal.id,\n                        }\n                    },\n                )\n            )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def _process_contacts(\n        self, start: datetime | None = None, end: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        api_client = HubSpot(access_token=self.access_token)\n\n        contacts_iter = self._paginated_results(\n            api_client.crm.contacts.basic_api.get_page,\n            properties=[\n                \"firstname\",\n                \"lastname\",\n                \"email\",\n                \"company\",\n                \"jobtitle\",\n                \"phone\",\n                \"city\",\n                \"state\",\n                \"createdate\",\n                \"lastmodifieddate\",\n            ],\n            associations=[\"companies\", \"deals\", \"tickets\"],\n        )\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        for contact in contacts_iter:\n            updated_at = contact.updated_at.replace(tzinfo=None)\n            if start is not None and updated_at < start.replace(tzinfo=None):\n                continue\n            if end is not None and updated_at > end.replace(tzinfo=None):\n                continue\n\n            # Build contact name\n            name_parts = []\n            if contact.properties.get(\"firstname\"):\n                name_parts.append(contact.properties[\"firstname\"])\n            if contact.properties.get(\"lastname\"):\n                name_parts.append(contact.properties[\"lastname\"])\n\n            if name_parts:\n                title = \" \".join(name_parts)\n            elif contact.properties.get(\"email\"):\n                # Use email as fallback if no first/last name\n                title = contact.properties[\"email\"]\n            else:\n                title = f\"Contact {contact.id}\"\n\n            link = self._get_object_url(\"contacts\", contact.id)\n\n            # Build main content\n            content_parts = [f\"Contact: {title}\"]\n            if contact.properties.get(\"email\"):\n                content_parts.append(f\"Email: {contact.properties['email']}\")\n            if contact.properties.get(\"company\"):\n                content_parts.append(f\"Company: {contact.properties['company']}\")\n            if contact.properties.get(\"jobtitle\"):\n                content_parts.append(f\"Job Title: {contact.properties['jobtitle']}\")\n            if contact.properties.get(\"phone\"):\n                content_parts.append(f\"Phone: {contact.properties['phone']}\")\n            if contact.properties.get(\"city\") and contact.properties.get(\"state\"):\n                content_parts.append(\n                    f\"Location: {contact.properties['city']}, {contact.properties['state']}\"\n                )\n\n            content_text = \"\\n\".join(content_parts)\n\n            # Main contact section\n            sections = [TextSection(link=link, text=content_text)]\n\n            # Metadata with parent object IDs\n            metadata: dict[str, str | list[str]] = {\n                \"contact_id\": contact.id,\n                \"object_type\": \"contact\",\n            }\n\n            if contact.properties.get(\"email\"):\n                metadata[\"email\"] = contact.properties[\"email\"]\n            if contact.properties.get(\"company\"):\n                metadata[\"company\"] = contact.properties[\"company\"]\n            if contact.properties.get(\"jobtitle\"):\n                metadata[\"job_title\"] = contact.properties[\"jobtitle\"]\n\n            # Add associated objects as sections\n            associated_company_ids = []\n            associated_deal_ids = []\n            associated_ticket_ids = []\n\n            # Get associated companies\n            associated_companies = self._get_associated_objects(\n                api_client, contact.id, \"contacts\", \"companies\"\n            )\n            for company in associated_companies:\n                sections.append(self._create_object_section(company, \"companies\"))\n                associated_company_ids.append(company[\"id\"])\n\n            # Get associated deals\n            associated_deals = self._get_associated_objects(\n                api_client, contact.id, \"contacts\", \"deals\"\n            )\n            for deal in associated_deals:\n                sections.append(self._create_object_section(deal, \"deals\"))\n                associated_deal_ids.append(deal[\"id\"])\n\n            # Get associated tickets\n            associated_tickets = self._get_associated_objects(\n                api_client, contact.id, \"contacts\", \"tickets\"\n            )\n            for ticket in associated_tickets:\n                sections.append(self._create_object_section(ticket, \"tickets\"))\n                associated_ticket_ids.append(ticket[\"id\"])\n\n            # Get associated notes\n            associated_notes = self._get_associated_notes(\n                api_client, contact.id, \"contacts\"\n            )\n            for note in associated_notes:\n                sections.append(self._create_object_section(note, \"notes\"))\n\n            # Add association IDs to metadata\n            if associated_company_ids:\n                metadata[\"associated_company_ids\"] = associated_company_ids\n            if associated_deal_ids:\n                metadata[\"associated_deal_ids\"] = associated_deal_ids\n            if associated_ticket_ids:\n                metadata[\"associated_ticket_ids\"] = associated_ticket_ids\n\n            doc_batch.append(\n                Document(\n                    id=f\"hubspot_contact_{contact.id}\",\n                    sections=cast(list[TextSection | ImageSection], sections),\n                    source=DocumentSource.HUBSPOT,\n                    semantic_identifier=title,\n                    doc_updated_at=contact.updated_at.replace(tzinfo=timezone.utc),\n                    metadata=metadata,\n                    doc_metadata={\n                        \"hierarchy\": {\n                            \"source_path\": [\"Contacts\"],\n                            \"object_type\": \"contact\",\n                            \"object_id\": contact.id,\n                        }\n                    },\n                )\n            )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"Load all HubSpot objects (tickets, companies, deals, contacts)\"\"\"\n        # Process each object type based on configuration\n        if \"tickets\" in self.object_types:\n            yield from self._process_tickets()\n        if \"companies\" in self.object_types:\n            yield from self._process_companies()\n        if \"deals\" in self.object_types:\n            yield from self._process_deals()\n        if \"contacts\" in self.object_types:\n            yield from self._process_contacts()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        # Process each object type with time filtering based on configuration\n        if \"tickets\" in self.object_types:\n            yield from self._process_tickets(start_datetime, end_datetime)\n        if \"companies\" in self.object_types:\n            yield from self._process_companies(start_datetime, end_datetime)\n        if \"deals\" in self.object_types:\n            yield from self._process_deals(start_datetime, end_datetime)\n        if \"contacts\" in self.object_types:\n            yield from self._process_contacts(start_datetime, end_datetime)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = HubSpotConnector()\n    connector.load_credentials(\n        {\"hubspot_access_token\": os.environ[\"HUBSPOT_ACCESS_TOKEN\"]}\n    )\n    # Run the first example\n    document_batches = connector.load_from_state()\n    first_batch = next(document_batches)\n    for doc in first_batch:\n        print(doc.model_dump_json(indent=2))\n"
  },
  {
    "path": "backend/onyx/connectors/hubspot/rate_limit.py",
    "content": "from __future__ import annotations\n\nimport time\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import TypeVar\n\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    RateLimitTriedTooManyTimesError,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nT = TypeVar(\"T\")\n\n# HubSpot exposes a ten second rolling window (x-hubspot-ratelimit-interval-milliseconds)\n# with a maximum of 190 requests, and a per-second limit of 19 requests.\n_HUBSPOT_TEN_SECOND_LIMIT = 190\n_HUBSPOT_TEN_SECOND_PERIOD = 10  # seconds\n_HUBSPOT_SECONDLY_LIMIT = 19\n_HUBSPOT_SECONDLY_PERIOD = 1  # second\n_DEFAULT_SLEEP_SECONDS = 10\n_SLEEP_PADDING_SECONDS = 1.0\n_MAX_RATE_LIMIT_RETRIES = 5\n\n\ndef _extract_header(headers: Any, key: str) -> str | None:\n    if headers is None:\n        return None\n\n    getter = getattr(headers, \"get\", None)\n    if callable(getter):\n        value = getter(key)\n        if value is not None:\n            return value\n\n    if isinstance(headers, dict):\n        value = headers.get(key)\n        if value is not None:\n            return value\n\n    return None\n\n\ndef is_rate_limit_error(exception: Exception) -> bool:\n    status = getattr(exception, \"status\", None)\n    if status == 429:\n        return True\n\n    headers = getattr(exception, \"headers\", None)\n    if headers is not None:\n        remaining = _extract_header(headers, \"x-hubspot-ratelimit-remaining\")\n        if remaining == \"0\":\n            return True\n        secondly_remaining = _extract_header(\n            headers, \"x-hubspot-ratelimit-secondly-remaining\"\n        )\n        if secondly_remaining == \"0\":\n            return True\n\n    message = str(exception)\n    return \"RATE_LIMIT\" in message or \"Too Many Requests\" in message\n\n\ndef get_rate_limit_retry_delay_seconds(exception: Exception) -> float:\n    headers = getattr(exception, \"headers\", None)\n\n    retry_after = _extract_header(headers, \"Retry-After\")\n    if retry_after:\n        try:\n            return float(retry_after) + _SLEEP_PADDING_SECONDS\n        except ValueError:\n            logger.debug(\n                \"Failed to parse Retry-After header '%s' as float\", retry_after\n            )\n\n    interval_ms = _extract_header(headers, \"x-hubspot-ratelimit-interval-milliseconds\")\n    if interval_ms:\n        try:\n            return float(interval_ms) / 1000.0 + _SLEEP_PADDING_SECONDS\n        except ValueError:\n            logger.debug(\n                \"Failed to parse x-hubspot-ratelimit-interval-milliseconds '%s' as float\",\n                interval_ms,\n            )\n\n    secondly_limit = _extract_header(headers, \"x-hubspot-ratelimit-secondly\")\n    if secondly_limit:\n        try:\n            per_second = max(float(secondly_limit), 1.0)\n            return (1.0 / per_second) + _SLEEP_PADDING_SECONDS\n        except ValueError:\n            logger.debug(\n                \"Failed to parse x-hubspot-ratelimit-secondly '%s' as float\",\n                secondly_limit,\n            )\n\n    return _DEFAULT_SLEEP_SECONDS + _SLEEP_PADDING_SECONDS\n\n\nclass HubSpotRateLimiter:\n    def __init__(\n        self,\n        *,\n        ten_second_limit: int = _HUBSPOT_TEN_SECOND_LIMIT,\n        ten_second_period: int = _HUBSPOT_TEN_SECOND_PERIOD,\n        secondly_limit: int = _HUBSPOT_SECONDLY_LIMIT,\n        secondly_period: int = _HUBSPOT_SECONDLY_PERIOD,\n        max_retries: int = _MAX_RATE_LIMIT_RETRIES,\n    ) -> None:\n        self._max_retries = max_retries\n\n        @rate_limit_builder(max_calls=secondly_limit, period=secondly_period)\n        @rate_limit_builder(max_calls=ten_second_limit, period=ten_second_period)\n        def _execute(callable_: Callable[[], T]) -> T:\n            return callable_()\n\n        self._execute = _execute\n\n    def call(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:\n        attempts = 0\n\n        while True:\n            try:\n                return self._execute(lambda: func(*args, **kwargs))\n            except Exception as exc:  # pylint: disable=broad-except\n                if not is_rate_limit_error(exc):\n                    raise\n\n                attempts += 1\n                if attempts > self._max_retries:\n                    raise RateLimitTriedTooManyTimesError(\n                        \"Exceeded configured HubSpot rate limit retries\"\n                    ) from exc\n\n                wait_time = get_rate_limit_retry_delay_seconds(exc)\n                logger.notice(\n                    \"HubSpot rate limit reached. Sleeping %.2f seconds before retrying.\",\n                    wait_time,\n                )\n                time.sleep(wait_time)\n"
  },
  {
    "path": "backend/onyx/connectors/imap/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/imap/connector.py",
    "content": "import copy\nimport email\nimport imaplib\nimport os\nimport re\nfrom datetime import datetime\nfrom datetime import timezone\nfrom email.message import Message\nfrom email.utils import parseaddr\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\n\nimport bs4\nfrom pydantic import BaseModel\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.imap.models import EmailHeaders\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import CredentialsConnector\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n_DEFAULT_IMAP_PORT_NUMBER = int(os.environ.get(\"IMAP_PORT\", 993))\n_IMAP_OKAY_STATUS = \"OK\"\n_PAGE_SIZE = 100\n_USERNAME_KEY = \"imap_username\"\n_PASSWORD_KEY = \"imap_password\"\n\n\nclass CurrentMailbox(BaseModel):\n    mailbox: str\n    todo_email_ids: list[str]\n\n\n# An email has a list of mailboxes.\n# Each mailbox has a list of email-ids inside of it.\n#\n# Usage:\n# To use this checkpointer, first fetch all the mailboxes.\n# Then, pop a mailbox and fetch all of its email-ids.\n# Then, pop each email-id and fetch its content (and parse it, etc..).\n# When you have popped all email-ids for this mailbox, pop the next mailbox and repeat the above process until you're done.\n#\n# For initial checkpointing, set both fields to `None`.\nclass ImapCheckpoint(ConnectorCheckpoint):\n    todo_mailboxes: list[str] | None = None\n    current_mailbox: CurrentMailbox | None = None\n\n\nclass LoginState(str, Enum):\n    LoggedIn = \"logged_in\"\n    LoggedOut = \"logged_out\"\n\n\nclass ImapConnector(\n    CredentialsConnector,\n    CheckpointedConnectorWithPermSync[ImapCheckpoint],\n):\n    def __init__(\n        self,\n        host: str,\n        port: int = _DEFAULT_IMAP_PORT_NUMBER,\n        mailboxes: list[str] | None = None,\n    ) -> None:\n        self._host = host\n        self._port = port\n        self._mailboxes = mailboxes\n        self._credentials: dict[str, Any] | None = None\n\n    @property\n    def credentials(self) -> dict[str, Any]:\n        if not self._credentials:\n            raise RuntimeError(\n                \"Credentials have not been initialized; call `set_credentials_provider` first\"\n            )\n        return self._credentials\n\n    def _get_mail_client(self) -> imaplib.IMAP4_SSL:\n        \"\"\"\n        Returns a new `imaplib.IMAP4_SSL` instance.\n\n        The `imaplib.IMAP4_SSL` object is supposed to be an \"ephemeral\" object; it's not something that you can login,\n        logout, then log back into again. I.e., the following will fail:\n\n        ```py\n        mail_client.login(..)\n        mail_client.logout();\n        mail_client.login(..)\n        ```\n\n        Therefore, you need a fresh, new instance in order to operate with IMAP. This function gives one to you.\n\n        # Notes\n        This function will throw an error if the credentials have not yet been set.\n        \"\"\"\n\n        def get_or_raise(name: str) -> str:\n            value = self.credentials.get(name)\n            if not value:\n                raise RuntimeError(f\"Credential item {name=} was not found\")\n            if not isinstance(value, str):\n                raise RuntimeError(\n                    f\"Credential item {name=} must be of type str, instead received {type(name)=}\"\n                )\n            return value\n\n        username = get_or_raise(_USERNAME_KEY)\n        password = get_or_raise(_PASSWORD_KEY)\n\n        mail_client = imaplib.IMAP4_SSL(host=self._host, port=self._port)\n        status, _data = mail_client.login(user=username, password=password)\n\n        if status != _IMAP_OKAY_STATUS:\n            raise RuntimeError(f\"Failed to log into imap server; {status=}\")\n\n        return mail_client\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: ImapCheckpoint,\n        include_perm_sync: bool,\n    ) -> CheckpointOutput[ImapCheckpoint]:\n        checkpoint = cast(ImapCheckpoint, copy.deepcopy(checkpoint))\n        checkpoint.has_more = True\n\n        mail_client = self._get_mail_client()\n\n        if checkpoint.todo_mailboxes is None:\n            # This is the dummy checkpoint.\n            # Fill it with mailboxes first.\n            if self._mailboxes:\n                checkpoint.todo_mailboxes = _sanitize_mailbox_names(self._mailboxes)\n            else:\n                fetched_mailboxes = _fetch_all_mailboxes_for_email_account(\n                    mail_client=mail_client\n                )\n                if not fetched_mailboxes:\n                    raise RuntimeError(\n                        \"Failed to find any mailboxes for this email account\"\n                    )\n                checkpoint.todo_mailboxes = _sanitize_mailbox_names(fetched_mailboxes)\n\n            return checkpoint\n\n        if (\n            not checkpoint.current_mailbox\n            or not checkpoint.current_mailbox.todo_email_ids\n        ):\n            if not checkpoint.todo_mailboxes:\n                checkpoint.has_more = False\n                return checkpoint\n\n            mailbox = checkpoint.todo_mailboxes.pop()\n            email_ids = _fetch_email_ids_in_mailbox(\n                mail_client=mail_client,\n                mailbox=mailbox,\n                start=start,\n                end=end,\n            )\n            checkpoint.current_mailbox = CurrentMailbox(\n                mailbox=mailbox,\n                todo_email_ids=email_ids,\n            )\n\n        _select_mailbox(\n            mail_client=mail_client, mailbox=checkpoint.current_mailbox.mailbox\n        )\n        current_todos = cast(\n            list, copy.deepcopy(checkpoint.current_mailbox.todo_email_ids[:_PAGE_SIZE])\n        )\n        checkpoint.current_mailbox.todo_email_ids = (\n            checkpoint.current_mailbox.todo_email_ids[_PAGE_SIZE:]\n        )\n\n        for email_id in current_todos:\n            email_msg = _fetch_email(mail_client=mail_client, email_id=email_id)\n            if not email_msg:\n                logger.warn(f\"Failed to fetch message {email_id=}; skipping\")\n                continue\n\n            email_headers = EmailHeaders.from_email_msg(email_msg=email_msg)\n\n            yield _convert_email_headers_and_body_into_document(\n                email_msg=email_msg,\n                email_headers=email_headers,\n                include_perm_sync=include_perm_sync,\n            )\n\n        return checkpoint\n\n    # impls for BaseConnector\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        raise NotImplementedError(\"Use `set_credentials_provider` instead\")\n\n    def validate_connector_settings(self) -> None:\n        self._get_mail_client()\n\n    # impls for CredentialsConnector\n\n    def set_credentials_provider(\n        self, credentials_provider: CredentialsProviderInterface\n    ) -> None:\n        self._credentials = credentials_provider.get_credentials()\n\n    # impls for CheckpointedConnector\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: ImapCheckpoint,\n    ) -> CheckpointOutput[ImapCheckpoint]:\n        return self._load_from_checkpoint(\n            start=start, end=end, checkpoint=checkpoint, include_perm_sync=False\n        )\n\n    def build_dummy_checkpoint(self) -> ImapCheckpoint:\n        return ImapCheckpoint(has_more=True)\n\n    def validate_checkpoint_json(self, checkpoint_json: str) -> ImapCheckpoint:\n        return ImapCheckpoint.model_validate_json(json_data=checkpoint_json)\n\n    # impls for CheckpointedConnectorWithPermSync\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: ImapCheckpoint,\n    ) -> CheckpointOutput[ImapCheckpoint]:\n        return self._load_from_checkpoint(\n            start=start, end=end, checkpoint=checkpoint, include_perm_sync=True\n        )\n\n\ndef _fetch_all_mailboxes_for_email_account(mail_client: imaplib.IMAP4_SSL) -> list[str]:\n    status, mailboxes_data = mail_client.list(directory=\"*\", pattern=\"*\")\n    if status != _IMAP_OKAY_STATUS:\n        raise RuntimeError(f\"Failed to fetch mailboxes; {status=}\")\n\n    mailboxes = []\n\n    for mailboxes_raw in mailboxes_data:\n        if isinstance(mailboxes_raw, bytes):\n            mailboxes_str = mailboxes_raw.decode()\n        elif isinstance(mailboxes_raw, str):\n            mailboxes_str = mailboxes_raw\n        else:\n            logger.warn(\n                f\"Expected the mailbox data to be of type str, instead got {type(mailboxes_raw)=} {mailboxes_raw}; skipping\"\n            )\n            continue\n\n        # The mailbox LIST response output can be found here:\n        # https://www.rfc-editor.org/rfc/rfc3501.html#section-7.2.2\n        #\n        # The general format is:\n        # `(<name-attributes>) <hierarchy-delimiter> <mailbox-name>`\n        #\n        # The below regex matches on that pattern; from there, we select the 3rd match (index 2), which is the mailbox-name.\n        match = re.match(r'\\([^)]*\\)\\s+\"([^\"]+)\"\\s+\"?(.+?)\"?$', mailboxes_str)\n        if not match:\n            logger.warn(\n                f\"Invalid mailbox-data formatting structure: {mailboxes_str=}; skipping\"\n            )\n            continue\n\n        mailbox = match.group(2)\n        mailboxes.append(mailbox)\n\n    return mailboxes\n\n\ndef _select_mailbox(mail_client: imaplib.IMAP4_SSL, mailbox: str) -> None:\n    status, _ids = mail_client.select(mailbox=mailbox, readonly=True)\n    if status != _IMAP_OKAY_STATUS:\n        raise RuntimeError(f\"Failed to select {mailbox=}\")\n\n\ndef _fetch_email_ids_in_mailbox(\n    mail_client: imaplib.IMAP4_SSL,\n    mailbox: str,\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n) -> list[str]:\n    _select_mailbox(mail_client=mail_client, mailbox=mailbox)\n\n    start_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(\"%d-%b-%Y\")\n    end_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime(\"%d-%b-%Y\")\n    search_criteria = f'(SINCE \"{start_str}\" BEFORE \"{end_str}\")'\n\n    status, email_ids_byte_array = mail_client.search(None, search_criteria)\n\n    if status != _IMAP_OKAY_STATUS or not email_ids_byte_array:\n        raise RuntimeError(f\"Failed to fetch email ids; {status=}\")\n\n    email_ids: bytes = email_ids_byte_array[0]\n\n    return [email_id.decode() for email_id in email_ids.split()]\n\n\ndef _fetch_email(mail_client: imaplib.IMAP4_SSL, email_id: str) -> Message | None:\n    status, msg_data = mail_client.fetch(message_set=email_id, message_parts=\"(RFC822)\")\n    if status != _IMAP_OKAY_STATUS or not msg_data:\n        return None\n\n    data = msg_data[0]\n    if not isinstance(data, tuple):\n        raise RuntimeError(\n            f\"Message data should be a tuple; instead got a {type(data)=} {data=}\"\n        )\n\n    _metadata, raw_email = data\n    return email.message_from_bytes(raw_email)\n\n\ndef _convert_email_headers_and_body_into_document(\n    email_msg: Message,\n    email_headers: EmailHeaders,\n    include_perm_sync: bool,\n) -> Document:\n    sender_name, sender_addr = _parse_singular_addr(raw_header=email_headers.sender)\n    parsed_recipients = (\n        _parse_addrs(raw_header=email_headers.recipients)\n        if email_headers.recipients\n        else []\n    )\n\n    expert_info_map = {\n        recipient_addr: BasicExpertInfo(\n            display_name=recipient_name, email=recipient_addr\n        )\n        for recipient_name, recipient_addr in parsed_recipients\n    }\n    if sender_addr not in expert_info_map:\n        expert_info_map[sender_addr] = BasicExpertInfo(\n            display_name=sender_name, email=sender_addr\n        )\n\n    email_body = _parse_email_body(email_msg=email_msg, email_headers=email_headers)\n    primary_owners = list(expert_info_map.values())\n    external_access = (\n        ExternalAccess(\n            external_user_emails=set(expert_info_map.keys()),\n            external_user_group_ids=set(),\n            is_public=False,\n        )\n        if include_perm_sync\n        else None\n    )\n\n    return Document(\n        id=email_headers.id,\n        title=email_headers.subject,\n        semantic_identifier=email_headers.subject,\n        metadata={},\n        source=DocumentSource.IMAP,\n        sections=[TextSection(text=email_body)],\n        primary_owners=primary_owners,\n        external_access=external_access,\n    )\n\n\ndef _parse_email_body(\n    email_msg: Message,\n    email_headers: EmailHeaders,\n) -> str:\n    body = None\n    for part in email_msg.walk():\n        if part.is_multipart():\n            # Multipart parts are *containers* for other parts, not the actual content itself.\n            # Therefore, we skip until we find the individual parts instead.\n            continue\n\n        charset = part.get_content_charset() or \"utf-8\"\n\n        try:\n            raw_payload = part.get_payload(decode=True)\n            if not isinstance(raw_payload, bytes):\n                logger.warn(\n                    \"Payload section from email was expected to be an array of bytes, instead got \"\n                    f\"{type(raw_payload)=}, {raw_payload=}\"\n                )\n                continue\n            body = raw_payload.decode(charset)\n            break\n        except (UnicodeDecodeError, LookupError) as e:\n            print(f\"Warning: Could not decode part with charset {charset}. Error: {e}\")\n            continue\n\n    if not body:\n        logger.warn(\n            f\"Email with {email_headers.id=} has an empty body; returning an empty string\"\n        )\n        return \"\"\n\n    soup = bs4.BeautifulSoup(markup=body, features=\"html.parser\")\n\n    return \" \".join(str_section for str_section in soup.stripped_strings)\n\n\ndef _sanitize_mailbox_names(mailboxes: list[str]) -> list[str]:\n    \"\"\"\n    Mailboxes with special characters in them must be enclosed by double-quotes, as per the IMAP protocol.\n    Just to be safe, we wrap *all* mailboxes with double-quotes.\n    \"\"\"\n    return [f'\"{mailbox}\"' for mailbox in mailboxes if mailbox]\n\n\ndef _parse_addrs(raw_header: str) -> list[tuple[str, str]]:\n    addrs = raw_header.split(\",\")\n    name_addr_pairs = [parseaddr(addr=addr) for addr in addrs if addr]\n    return [(name, addr) for name, addr in name_addr_pairs if addr]\n\n\ndef _parse_singular_addr(raw_header: str) -> tuple[str, str]:\n    addrs = _parse_addrs(raw_header=raw_header)\n    if not addrs:\n        raise RuntimeError(\n            f\"Parsing email header resulted in no addresses being found; {raw_header=}\"\n        )\n    elif len(addrs) >= 2:\n        raise RuntimeError(\n            f\"Expected a singular address, but instead got multiple; {raw_header=} {addrs=}\"\n        )\n\n    return addrs[0]\n\n\nif __name__ == \"__main__\":\n    import time\n    from tests.daily.connectors.utils import load_all_from_connector\n    from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\n\n    host = os.environ.get(\"IMAP_HOST\")\n    mailboxes_str = os.environ.get(\"IMAP_MAILBOXES\")\n    username = os.environ.get(\"IMAP_USERNAME\")\n    password = os.environ.get(\"IMAP_PASSWORD\")\n\n    mailboxes = (\n        [mailbox.strip() for mailbox in mailboxes_str.split(\",\")]\n        if mailboxes_str\n        else []\n    )\n\n    if not host:\n        raise RuntimeError(\"`IMAP_HOST` must be set\")\n\n    imap_connector = ImapConnector(\n        host=host,\n        mailboxes=mailboxes,\n    )\n\n    imap_connector.set_credentials_provider(\n        OnyxStaticCredentialsProvider(\n            tenant_id=None,\n            connector_name=DocumentSource.IMAP,\n            credential_json={\n                _USERNAME_KEY: username,\n                _PASSWORD_KEY: password,\n            },\n        )\n    )\n\n    for doc in load_all_from_connector(\n        connector=imap_connector,\n        start=0,\n        end=time.time(),\n    ).documents:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/imap/models.py",
    "content": "import email\nfrom datetime import datetime\nfrom email.message import Message\nfrom enum import Enum\n\nfrom pydantic import BaseModel\n\n\nclass Header(str, Enum):\n    SUBJECT_HEADER = \"subject\"\n    FROM_HEADER = \"from\"\n    TO_HEADER = \"to\"\n    DELIVERED_TO_HEADER = (\n        \"Delivered-To\"  # Used in mailing lists instead of the \"to\" header.\n    )\n    DATE_HEADER = \"date\"\n    MESSAGE_ID_HEADER = \"Message-ID\"\n\n\nclass EmailHeaders(BaseModel):\n    \"\"\"\n    Model for email headers extracted from IMAP messages.\n    \"\"\"\n\n    id: str\n    subject: str\n    sender: str\n    recipients: str | None\n    date: datetime\n\n    @classmethod\n    def from_email_msg(cls, email_msg: Message) -> \"EmailHeaders\":\n        def _decode(header: str, default: str | None = None) -> str | None:\n            value = email_msg.get(header, default)\n            if not value:\n                return None\n\n            decoded_value, encoding = email.header.decode_header(value)[0]\n            if isinstance(decoded_value, bytes):\n                encoding = encoding or \"utf-8\"\n                return decoded_value.decode(encoding, errors=\"replace\")\n            elif isinstance(decoded_value, str):\n                return decoded_value\n            else:\n                return None\n\n        def _parse_date(date_str: str | None) -> datetime | None:\n            if not date_str:\n                return None\n            try:\n                return email.utils.parsedate_to_datetime(date_str)\n            except (TypeError, ValueError):\n                return None\n\n        message_id = _decode(header=Header.MESSAGE_ID_HEADER)\n        # It's possible for the subject line to not exist or be an empty string.\n        subject = _decode(header=Header.SUBJECT_HEADER) or \"Unknown Subject\"\n        from_ = _decode(header=Header.FROM_HEADER)\n        to = _decode(header=Header.TO_HEADER)\n        if not to:\n            to = _decode(header=Header.DELIVERED_TO_HEADER)\n        date_str = _decode(header=Header.DATE_HEADER)\n        date = _parse_date(date_str=date_str)\n\n        # If any of the above are `None`, model validation will fail.\n        # Therefore, no guards (i.e.: `if <header> is None: raise RuntimeError(..)`) were written.\n        return cls.model_validate(\n            {\n                \"id\": message_id,\n                \"subject\": subject,\n                \"sender\": from_,\n                \"recipients\": to,\n                \"date\": date,\n            }\n        )\n"
  },
  {
    "path": "backend/onyx/connectors/interfaces.py",
    "content": "import abc\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom types import TracebackType\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TypeAlias\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\nSecondsSinceUnixEpoch = float\n\n# Output types that can include HierarchyNode alongside Documents/SlimDocuments\nGenerateDocumentsOutput = Iterator[list[Document | HierarchyNode]]\nGenerateSlimDocumentOutput = Iterator[list[SlimDocument | HierarchyNode]]\n\nCT = TypeVar(\"CT\", bound=ConnectorCheckpoint)\n\n\nclass NormalizationResult(BaseModel):\n    \"\"\"Result of URL normalization attempt.\n\n    Attributes:\n        normalized_url: The normalized URL string, or None if normalization failed\n        use_default: If True, fall back to default normalizer. If False, return None.\n    \"\"\"\n\n    normalized_url: str | None\n    use_default: bool = False\n\n\nclass BaseConnector(abc.ABC, Generic[CT]):\n    REDIS_KEY_PREFIX = \"da_connector_data:\"\n\n    @abc.abstractmethod\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        raise NotImplementedError\n\n    @staticmethod\n    def parse_metadata(metadata: dict[str, Any]) -> list[str]:\n        \"\"\"Parse the metadata for a document/chunk into a string to pass to Generative AI as additional context\"\"\"\n        custom_parser_req_msg = (\n            \"Specific metadata parsing required, connector has not implemented it.\"\n        )\n        metadata_lines = []\n        for metadata_key, metadata_value in metadata.items():\n            if isinstance(metadata_value, str):\n                metadata_lines.append(f\"{metadata_key}: {metadata_value}\")\n            elif isinstance(metadata_value, list):\n                if not all([isinstance(val, str) for val in metadata_value]):\n                    raise RuntimeError(custom_parser_req_msg)\n                metadata_lines.append(f\"{metadata_key}: {', '.join(metadata_value)}\")\n            else:\n                raise RuntimeError(custom_parser_req_msg)\n        return metadata_lines\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Override this if your connector needs to validate credentials or settings.\n        Raise an exception if invalid, otherwise do nothing.\n\n        Default is a no-op (always successful).\n        \"\"\"\n\n    def validate_perm_sync(self) -> None:\n        \"\"\"\n        Don't override this; add a function to perm_sync_valid.py in the ee package\n        to do permission sync validation\n        \"\"\"\n        validate_connector_settings_fn = fetch_ee_implementation_or_noop(\n            \"onyx.connectors.perm_sync_valid\",\n            \"validate_perm_sync\",\n            noop_return_value=None,\n        )\n        validate_connector_settings_fn(self)\n\n    def set_allow_images(self, value: bool) -> None:\n        \"\"\"Implement if the underlying connector wants to skip/allow image downloading\n        based on the application level image analysis setting.\"\"\"\n\n    @classmethod\n    def normalize_url(cls, url: str) -> \"NormalizationResult\":  # noqa: ARG003\n        \"\"\"Normalize a URL to match the canonical Document.id format used during ingestion.\n\n        Connectors that use URLs as document IDs should override this method.\n        Returns NormalizationResult with use_default=True if not implemented.\n        \"\"\"\n        return NormalizationResult(normalized_url=None, use_default=True)\n\n    def build_dummy_checkpoint(self) -> CT:\n        # TODO: find a way to make this work without type: ignore\n        return ConnectorCheckpoint(has_more=True)  # type: ignore\n\n\n# Large set update or reindex, generally pulling a complete state or from a savestate file\nclass LoadConnector(BaseConnector):\n    @abc.abstractmethod\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        raise NotImplementedError\n\n\n# Small set updates by time\nclass PollConnector(BaseConnector):\n    @abc.abstractmethod\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        raise NotImplementedError\n\n\n# Slim connectors retrieve just the ids of documents\nclass SlimConnector(BaseConnector):\n    @abc.abstractmethod\n    def retrieve_all_slim_docs(\n        self,\n    ) -> GenerateSlimDocumentOutput:\n        raise NotImplementedError\n\n\n# Slim connectors retrieve both the ids AND\n# permission syncing information for connected documents\nclass SlimConnectorWithPermSync(BaseConnector):\n    @abc.abstractmethod\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        raise NotImplementedError\n\n\nclass OAuthConnector(BaseConnector):\n    class AdditionalOauthKwargs(BaseModel):\n        # if overridden, all fields should be str type\n        pass\n\n    @classmethod\n    @abc.abstractmethod\n    def oauth_id(cls) -> DocumentSource:\n        raise NotImplementedError\n\n    @classmethod\n    @abc.abstractmethod\n    def oauth_authorization_url(\n        cls,\n        base_domain: str,\n        state: str,\n        additional_kwargs: dict[str, str],\n    ) -> str:\n        raise NotImplementedError\n\n    @classmethod\n    @abc.abstractmethod\n    def oauth_code_to_token(\n        cls,\n        base_domain: str,\n        code: str,\n        additional_kwargs: dict[str, str],\n    ) -> dict[str, Any]:\n        raise NotImplementedError\n\n\nT = TypeVar(\"T\", bound=\"CredentialsProviderInterface\")\n\n\nclass CredentialsProviderInterface(abc.ABC, Generic[T]):\n    @abc.abstractmethod\n    def __enter__(self) -> T:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_value: BaseException | None,\n        traceback: TracebackType | None,\n    ) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def get_tenant_id(self) -> str | None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def get_provider_key(self) -> str:\n        \"\"\"a unique key that the connector can use to lock around a credential\n        that might be used simultaneously.\n\n        Will typically be the credential id, but can also just be something random\n        in cases when there is nothing to lock (aka static credentials)\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def get_credentials(self) -> dict[str, Any]:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def set_credentials(self, credential_json: dict[str, Any]) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def is_dynamic(self) -> bool:\n        \"\"\"If dynamic, the credentials may change during usage ... meaning the client\n        needs to use the locking features of the credentials provider to operate\n        correctly.\n\n        If static, the client can simply reference the credentials once and use them\n        through the entire indexing run.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass CredentialsConnector(BaseConnector):\n    \"\"\"Implement this if the connector needs to be able to read and write credentials\n    on the fly. Typically used with shared credentials/tokens that might be renewed\n    at any time.\"\"\"\n\n    @abc.abstractmethod\n    def set_credentials_provider(\n        self, credentials_provider: CredentialsProviderInterface\n    ) -> None:\n        raise NotImplementedError\n\n\n# Event driven\nclass EventConnector(BaseConnector):\n    @abc.abstractmethod\n    def handle_event(self, event: Any) -> GenerateDocumentsOutput:\n        raise NotImplementedError\n\n\nCheckpointOutput: TypeAlias = Generator[\n    Document | HierarchyNode | ConnectorFailure, None, CT\n]\n\nHierarchyOutput: TypeAlias = Generator[HierarchyNode, None, None]\n\n\nclass CheckpointedConnector(BaseConnector[CT]):\n    @abc.abstractmethod\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: CT,\n    ) -> CheckpointOutput[CT]:\n        \"\"\"Yields back documents or failures. Final return is the new checkpoint.\n\n        Final return can be access via either:\n\n        ```\n        try:\n            for document_or_failure in connector.load_from_checkpoint(start, end, checkpoint):\n                print(document_or_failure)\n        except StopIteration as e:\n            checkpoint = e.value  # Extracting the return value\n            print(checkpoint)\n        ```\n\n        OR\n\n        ```\n        checkpoint = yield from connector.load_from_checkpoint(start, end, checkpoint)\n        ```\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def build_dummy_checkpoint(self) -> CT:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def validate_checkpoint_json(self, checkpoint_json: str) -> CT:\n        \"\"\"Validate the checkpoint json and return the checkpoint object\"\"\"\n        raise NotImplementedError\n\n\nclass CheckpointedConnectorWithPermSync(CheckpointedConnector[CT]):\n    @abc.abstractmethod\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: CT,\n    ) -> CheckpointOutput[CT]:\n        raise NotImplementedError\n\n\nclass HierarchyConnector(BaseConnector):\n    @abc.abstractmethod\n    def load_hierarchy(\n        self,\n        start: SecondsSinceUnixEpoch,  # may be unused if the connector must load the full hierarchy each time\n        end: SecondsSinceUnixEpoch,\n    ) -> HierarchyOutput:\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/connectors/jira/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/jira/access.py",
    "content": "\"\"\"\nPermissioning / AccessControl logic for JIRA Projects + Issues.\n\"\"\"\n\nfrom collections.abc import Callable\nfrom typing import cast\n\nfrom jira import JIRA\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\ndef get_project_permissions(\n    jira_client: JIRA,\n    jira_project: str,\n    add_prefix: bool = False,\n) -> ExternalAccess | None:\n    \"\"\"\n    Fetch the project + issue level permissions / access-control.\n    This functionality requires Enterprise Edition.\n\n    Args:\n        jira_client: The JIRA client instance.\n        jira_project: The JIRA project string.\n        add_prefix: When True, prefix group IDs with source type (for indexing path).\n                   When False (default), leave unprefixed (for permission sync path\n                   where upsert_document_external_perms handles prefixing).\n\n    Returns:\n        ExternalAccess object for the page. None if EE is not enabled or no restrictions found.\n    \"\"\"\n\n    # Check if EE is enabled\n    if not global_version.is_ee_version():\n        return None\n\n    ee_get_project_permissions = cast(\n        Callable[\n            [JIRA, str, bool],\n            ExternalAccess | None,\n        ],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.jira.page_access\", \"get_project_permissions\"\n        ),\n    )\n\n    return ee_get_project_permissions(\n        jira_client,\n        jira_project,\n        add_prefix,\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/jira/connector.py",
    "content": "import copy\nimport json\nimport os\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterable\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\n\nimport requests\nfrom jira import JIRA\nfrom jira.exceptions import JIRAError\nfrom jira.resources import Issue\nfrom more_itertools import chunked\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP\nfrom onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE\nfrom onyx.configs.app_configs import JIRA_SLIM_PAGE_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    is_atlassian_date_error,\n)\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.jira.access import get_project_permissions\nfrom onyx.connectors.jira.utils import best_effort_basic_expert_info\nfrom onyx.connectors.jira.utils import best_effort_get_field_from_issue\nfrom onyx.connectors.jira.utils import build_jira_client\nfrom onyx.connectors.jira.utils import build_jira_url\nfrom onyx.connectors.jira.utils import extract_text_from_adf\nfrom onyx.connectors.jira.utils import get_comment_strs\nfrom onyx.connectors.jira.utils import JIRA_CLOUD_API_VERSION\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nONE_HOUR = 3600\n\n_MAX_RESULTS_FETCH_IDS = 5000  # 5000\n_JIRA_FULL_PAGE_SIZE = 50\n\n# Constants for Jira field names\n_FIELD_REPORTER = \"reporter\"\n_FIELD_ASSIGNEE = \"assignee\"\n_FIELD_PRIORITY = \"priority\"\n_FIELD_STATUS = \"status\"\n_FIELD_RESOLUTION = \"resolution\"\n_FIELD_LABELS = \"labels\"\n_FIELD_KEY = \"key\"\n_FIELD_CREATED = \"created\"\n_FIELD_DUEDATE = \"duedate\"\n_FIELD_ISSUETYPE = \"issuetype\"\n_FIELD_PARENT = \"parent\"\n_FIELD_ASSIGNEE_EMAIL = \"assignee_email\"\n_FIELD_REPORTER_EMAIL = \"reporter_email\"\n_FIELD_PROJECT = \"project\"\n_FIELD_PROJECT_NAME = \"project_name\"\n_FIELD_UPDATED = \"updated\"\n_FIELD_RESOLUTION_DATE = \"resolutiondate\"\n_FIELD_RESOLUTION_DATE_KEY = \"resolution_date\"\n\n\ndef _is_cloud_client(jira_client: JIRA) -> bool:\n    return jira_client._options[\"rest_api_version\"] == JIRA_CLOUD_API_VERSION\n\n\ndef _perform_jql_search(\n    jira_client: JIRA,\n    jql: str,\n    start: int,\n    max_results: int,\n    fields: str | None = None,\n    all_issue_ids: list[list[str]] | None = None,\n    checkpoint_callback: (\n        Callable[[Iterator[list[str]], str | None], None] | None\n    ) = None,\n    nextPageToken: str | None = None,\n    ids_done: bool = False,\n) -> Iterable[Issue]:\n    \"\"\"\n    The caller should expect\n    a) this function returns an iterable of issues of length 0 < len(issues) <= max_results.\n       - caveat; if all_issue_ids is provided, the iterable will be the size of some sub-list.\n       - this will only not match the above bound if a recent deployment changed max_results.\n\n    IF the v3 API is used (i.e. the jira instance is a cloud instance), then the caller should expect:\n\n    b) this function will call checkpoint_callback ONCE after at least one of the following has happened:\n       - a new batch of ids has been fetched via enhanced search\n       - a batch of issues has been bulk-fetched\n    c) checkpoint_callback is called with the new all_issue_ids and the pageToken of the enhanced\n       search request. We pass in a pageToken of None once we've fetched all the issue ids.\n\n    Note: nextPageToken is valid for 7 days according to a post from a year ago, so for now\n    we won't add any handling for restarting (just re-index, since there's no easy\n    way to recover from this).\n    \"\"\"\n    # it would be preferable to use one approach for both versions, but\n    # v2 doesnt have the bulk fetch api and v3 has fully deprecated the search\n    # api that v2 uses\n    if _is_cloud_client(jira_client):\n        if all_issue_ids is None:\n            raise ValueError(\"all_issue_ids is required for v3\")\n        return _perform_jql_search_v3(\n            jira_client,\n            jql,\n            max_results,\n            all_issue_ids,\n            fields=fields,\n            checkpoint_callback=checkpoint_callback,\n            nextPageToken=nextPageToken,\n            ids_done=ids_done,\n        )\n    else:\n        return _perform_jql_search_v2(jira_client, jql, start, max_results, fields)\n\n\ndef _handle_jira_search_error(e: Exception, jql: str) -> None:\n    \"\"\"Handle common Jira search errors and raise appropriate exceptions.\n\n    Args:\n        e: The exception raised by the Jira API\n        jql: The JQL query that caused the error\n\n    Raises:\n        ConnectorValidationError: For HTTP 400 errors (invalid JQL or project)\n        CredentialExpiredError: For HTTP 401 errors\n        InsufficientPermissionsError: For HTTP 403 errors\n        Exception: Re-raises the original exception for other error types\n    \"\"\"\n    # Extract error information from the exception\n    error_text = \"\"\n    status_code = None\n\n    def _format_error_text(error_payload: Any) -> str:\n        error_messages = (\n            error_payload.get(\"errorMessages\", [])\n            if isinstance(error_payload, dict)\n            else []\n        )\n        if error_messages:\n            return (\n                \"; \".join(error_messages)\n                if isinstance(error_messages, list)\n                else str(error_messages)\n            )\n        return str(error_payload)\n\n    # Try to get status code and error text from JIRAError or requests response\n    if hasattr(e, \"status_code\"):\n        status_code = e.status_code\n        raw_text = getattr(e, \"text\", \"\")\n        if isinstance(raw_text, str):\n            try:\n                error_text = _format_error_text(json.loads(raw_text))\n            except Exception:\n                error_text = raw_text\n        else:\n            error_text = str(raw_text)\n    elif hasattr(e, \"response\") and e.response is not None:\n        status_code = e.response.status_code\n        # Try JSON first, fall back to text\n        try:\n            error_json = e.response.json()\n            error_text = _format_error_text(error_json)\n        except Exception:\n            error_text = e.response.text\n\n    # Handle specific status codes\n    if status_code == 400:\n        if \"does not exist for the field 'project'\" in error_text:\n            raise ConnectorValidationError(\n                f\"The specified Jira project does not exist or you don't have access to it. JQL query: {jql}. Error: {error_text}\"\n            )\n        raise ConnectorValidationError(\n            f\"Invalid JQL query. JQL: {jql}. Error: {error_text}\"\n        )\n    elif status_code == 401:\n        raise CredentialExpiredError(\n            \"Jira credentials are expired or invalid (HTTP 401).\"\n        )\n    elif status_code == 403:\n        raise InsufficientPermissionsError(\n            f\"Insufficient permissions to execute JQL query. JQL: {jql}\"\n        )\n\n    # Re-raise for other error types\n    raise e\n\n\ndef enhanced_search_ids(\n    jira_client: JIRA, jql: str, nextPageToken: str | None = None\n) -> tuple[list[str], str | None]:\n    # https://community.atlassian.com/forums/Jira-articles/\n    # Avoiding-Pitfalls-A-Guide-to-Smooth-Migration-to-Enhanced-JQL/ba-p/2985433\n    # For cloud, it's recommended that we fetch all ids first then use the bulk fetch API.\n    # The enhanced search isn't currently supported by our python library, so we have to\n    # do this janky thing where we use the session directly.\n    enhanced_search_path = jira_client._get_url(\"search/jql\")\n    params: dict[str, str | int | None] = {\n        \"jql\": jql,\n        \"maxResults\": _MAX_RESULTS_FETCH_IDS,\n        \"nextPageToken\": nextPageToken,\n        \"fields\": \"id\",\n    }\n    try:\n        response = jira_client._session.get(enhanced_search_path, params=params)\n        response.raise_for_status()\n        response_json = response.json()\n    except Exception as e:\n        _handle_jira_search_error(e, jql)\n        raise  # Explicitly re-raise for type checker, should never reach here\n\n    return [str(issue[\"id\"]) for issue in response_json[\"issues\"]], response_json.get(\n        \"nextPageToken\"\n    )\n\n\ndef _bulk_fetch_request(\n    jira_client: JIRA, issue_ids: list[str], fields: str | None\n) -> list[dict[str, Any]]:\n    \"\"\"Raw POST to the bulkfetch endpoint. Returns the list of raw issue dicts.\"\"\"\n    bulk_fetch_path = jira_client._get_url(\"issue/bulkfetch\")\n    # Prepare the payload according to Jira API v3 specification\n    payload: dict[str, Any] = {\"issueIdsOrKeys\": issue_ids}\n    # Only restrict fields if specified, might want to explicitly do this in the future\n    # to avoid reading unnecessary data\n    payload[\"fields\"] = fields.split(\",\") if fields else [\"*all\"]\n\n    resp = jira_client._session.post(bulk_fetch_path, json=payload)\n    return resp.json()[\"issues\"]\n\n\ndef bulk_fetch_issues(\n    jira_client: JIRA, issue_ids: list[str], fields: str | None = None\n) -> list[Issue]:\n    # TODO(evan): move away from this jira library if they continue to not support\n    # the endpoints we need. Using private fields is not ideal, but\n    # is likely fine for now since we pin the library version\n\n    try:\n        raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)\n    except requests.exceptions.JSONDecodeError:\n        if len(issue_ids) <= 1:\n            logger.exception(\n                f\"Jira bulk-fetch response for issue(s) {issue_ids} could not \"\n                f\"be decoded as JSON (response too large or truncated).\"\n            )\n            raise\n\n        mid = len(issue_ids) // 2\n        logger.warning(\n            f\"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. \"\n            f\"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}.\"\n        )\n        left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)\n        right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)\n        return left + right\n    except Exception as e:\n        logger.error(f\"Error fetching issues: {e}\")\n        raise\n\n    return [\n        Issue(jira_client._options, jira_client._session, raw=issue)\n        for issue in raw_issues\n    ]\n\n\ndef _perform_jql_search_v3(\n    jira_client: JIRA,\n    jql: str,\n    max_results: int,\n    all_issue_ids: list[list[str]],\n    fields: str | None = None,\n    checkpoint_callback: (\n        Callable[[Iterator[list[str]], str | None], None] | None\n    ) = None,\n    nextPageToken: str | None = None,\n    ids_done: bool = False,\n) -> Iterable[Issue]:\n    \"\"\"\n    The way this works is we get all the issue ids and bulk fetch them in batches.\n    However, for really large deployments we can't do these operations sequentially,\n    as it might take several hours to fetch all the issue ids.\n\n    So, each run of this function does at least one of:\n     - fetch a batch of issue ids\n     - bulk fetch a batch of issues\n\n    If all_issue_ids is not None, we use it to bulk fetch issues.\n    \"\"\"\n\n    # with some careful synchronization these steps can be done in parallel,\n    # leaving that out for now to avoid rate limit issues\n    if not ids_done:\n        new_ids, pageToken = enhanced_search_ids(jira_client, jql, nextPageToken)\n        if checkpoint_callback is not None:\n            checkpoint_callback(chunked(new_ids, max_results), pageToken)\n\n    # bulk fetch issues from ids. Note that the above callback MAY mutate all_issue_ids,\n    # but this fetch always just takes the last id batch.\n    if all_issue_ids:\n        yield from bulk_fetch_issues(jira_client, all_issue_ids.pop(), fields)\n\n\ndef _perform_jql_search_v2(\n    jira_client: JIRA,\n    jql: str,\n    start: int,\n    max_results: int,\n    fields: str | None = None,\n) -> Iterable[Issue]:\n    \"\"\"\n    Unfortunately, jira server/data center will forever use the v2 APIs that are now deprecated.\n    \"\"\"\n    logger.debug(\n        f\"Fetching Jira issues with JQL: {jql}, starting at {start}, max results: {max_results}\"\n    )\n    try:\n        issues = jira_client.search_issues(\n            jql_str=jql,\n            startAt=start,\n            maxResults=max_results,\n            fields=fields,\n        )\n    except JIRAError as e:\n        _handle_jira_search_error(e, jql)\n        raise  # Explicitly re-raise for type checker, should never reach here\n\n    for issue in issues:\n        if isinstance(issue, Issue):\n            yield issue\n        else:\n            raise RuntimeError(f\"Found Jira object not of type Issue: {issue}\")\n\n\ndef process_jira_issue(\n    jira_base_url: str,\n    issue: Issue,\n    comment_email_blacklist: tuple[str, ...] = (),\n    labels_to_skip: set[str] | None = None,\n    parent_hierarchy_raw_node_id: str | None = None,\n) -> Document | None:\n    if labels_to_skip:\n        if any(label in issue.fields.labels for label in labels_to_skip):\n            logger.info(\n                f\"Skipping {issue.key} because it has a label to skip. Found \"\n                f\"labels: {issue.fields.labels}. Labels to skip: {labels_to_skip}.\"\n            )\n            return None\n\n    if isinstance(issue.fields.description, str):\n        description = issue.fields.description\n    else:\n        description = extract_text_from_adf(issue.raw[\"fields\"][\"description\"])\n\n    comments = get_comment_strs(\n        issue=issue,\n        comment_email_blacklist=comment_email_blacklist,\n    )\n    ticket_content = f\"{description}\\n\" + \"\\n\".join(\n        [f\"Comment: {comment}\" for comment in comments if comment]\n    )\n\n    # Check ticket size\n    if len(ticket_content.encode(\"utf-8\")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:\n        logger.info(\n            f\"Skipping {issue.key} because it exceeds the maximum size of {JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes.\"\n        )\n        return None\n\n    page_url = build_jira_url(jira_base_url, issue.key)\n\n    metadata_dict: dict[str, str | list[str]] = {}\n    people = set()\n\n    creator = best_effort_get_field_from_issue(issue, _FIELD_REPORTER)\n    if creator is not None and (\n        basic_expert_info := best_effort_basic_expert_info(creator)\n    ):\n        people.add(basic_expert_info)\n        metadata_dict[_FIELD_REPORTER] = basic_expert_info.get_semantic_name()\n        if email := basic_expert_info.get_email():\n            metadata_dict[_FIELD_REPORTER_EMAIL] = email\n\n    assignee = best_effort_get_field_from_issue(issue, _FIELD_ASSIGNEE)\n    if assignee is not None and (\n        basic_expert_info := best_effort_basic_expert_info(assignee)\n    ):\n        people.add(basic_expert_info)\n        metadata_dict[_FIELD_ASSIGNEE] = basic_expert_info.get_semantic_name()\n        if email := basic_expert_info.get_email():\n            metadata_dict[_FIELD_ASSIGNEE_EMAIL] = email\n\n    metadata_dict[_FIELD_KEY] = issue.key\n    if priority := best_effort_get_field_from_issue(issue, _FIELD_PRIORITY):\n        metadata_dict[_FIELD_PRIORITY] = priority.name\n    if status := best_effort_get_field_from_issue(issue, _FIELD_STATUS):\n        metadata_dict[_FIELD_STATUS] = status.name\n    if resolution := best_effort_get_field_from_issue(issue, _FIELD_RESOLUTION):\n        metadata_dict[_FIELD_RESOLUTION] = resolution.name\n    if labels := best_effort_get_field_from_issue(issue, _FIELD_LABELS):\n        metadata_dict[_FIELD_LABELS] = labels\n    if created := best_effort_get_field_from_issue(issue, _FIELD_CREATED):\n        metadata_dict[_FIELD_CREATED] = created\n    if updated := best_effort_get_field_from_issue(issue, _FIELD_UPDATED):\n        metadata_dict[_FIELD_UPDATED] = updated\n    if duedate := best_effort_get_field_from_issue(issue, _FIELD_DUEDATE):\n        metadata_dict[_FIELD_DUEDATE] = duedate\n    if issuetype := best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE):\n        metadata_dict[_FIELD_ISSUETYPE] = issuetype.name\n    if resolutiondate := best_effort_get_field_from_issue(\n        issue, _FIELD_RESOLUTION_DATE\n    ):\n        metadata_dict[_FIELD_RESOLUTION_DATE_KEY] = resolutiondate\n\n    parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)\n    if parent is not None:\n        metadata_dict[_FIELD_PARENT] = parent.key\n\n    project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)\n    if project is not None:\n        metadata_dict[_FIELD_PROJECT_NAME] = project.name\n        metadata_dict[_FIELD_PROJECT] = project.key\n    else:\n        logger.error(f\"Project should exist but does not for {issue.key}\")\n\n    return Document(\n        id=page_url,\n        sections=[TextSection(link=page_url, text=ticket_content)],\n        source=DocumentSource.JIRA,\n        semantic_identifier=f\"{issue.key}: {issue.fields.summary}\",\n        title=f\"{issue.key} {issue.fields.summary}\",\n        doc_updated_at=time_str_to_utc(issue.fields.updated),\n        primary_owners=list(people) or None,\n        metadata=metadata_dict,\n        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n    )\n\n\nclass JiraConnectorCheckpoint(ConnectorCheckpoint):\n    # used for v3 (cloud) endpoint\n    all_issue_ids: list[list[str]] = []\n    ids_done: bool = False\n    cursor: str | None = None\n    # deprecated\n    # Used for v2 endpoint (server/data center)\n    offset: int | None = None\n    # Track hierarchy nodes we've already yielded to avoid duplicates across restarts\n    seen_hierarchy_node_ids: list[str] = []\n\n\nclass JiraConnector(\n    CheckpointedConnectorWithPermSync[JiraConnectorCheckpoint],\n    SlimConnectorWithPermSync,\n):\n    def __init__(\n        self,\n        jira_base_url: str,\n        project_key: str | None = None,\n        comment_email_blacklist: list[str] | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n        # if a ticket has one of the labels specified in this list, we will just\n        # skip it. This is generally used to avoid indexing extra sensitive\n        # tickets.\n        labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,\n        # Custom JQL query to filter Jira issues\n        jql_query: str | None = None,\n        scoped_token: bool = False,\n    ) -> None:\n        self.batch_size = batch_size\n\n        # dealing with scoped tokens is a bit tricky becasue we need to hit api.atlassian.net\n        # when making jira requests but still want correct links to issues in the UI.\n        # So, the user's base url is stored here, but converted to a scoped url when passed\n        # to the jira client.\n        self.jira_base = jira_base_url.rstrip(\"/\")  # Remove trailing slash if present\n        self.jira_project = project_key\n        self._comment_email_blacklist = comment_email_blacklist or []\n        self.labels_to_skip = set(labels_to_skip)\n        self.jql_query = jql_query\n        self.scoped_token = scoped_token\n        self._jira_client: JIRA | None = None\n        # Cache project permissions to avoid fetching them repeatedly across runs\n        self._project_permissions_cache: dict[str, Any] = {}\n\n    @property\n    def comment_email_blacklist(self) -> tuple:\n        return tuple(email.strip() for email in self._comment_email_blacklist)\n\n    @property\n    def jira_client(self) -> JIRA:\n        if self._jira_client is None:\n            raise ConnectorMissingCredentialError(\"Jira\")\n        return self._jira_client\n\n    @property\n    def quoted_jira_project(self) -> str:\n        # Quote the project name to handle reserved words\n        if not self.jira_project:\n            return \"\"\n        return f'\"{self.jira_project}\"'\n\n    def _get_project_permissions(\n        self, project_key: str, add_prefix: bool = False\n    ) -> Any:\n        \"\"\"Get project permissions with caching.\n\n        Args:\n            project_key: The Jira project key\n            add_prefix: When True, prefix group IDs with source type (for indexing path).\n                       When False (default), leave unprefixed (for permission sync path).\n\n        Returns:\n            The external access permissions for the project\n        \"\"\"\n        # Use different cache keys for prefixed vs unprefixed to avoid mixing\n        cache_key = f\"{project_key}:{'prefixed' if add_prefix else 'unprefixed'}\"\n        if cache_key not in self._project_permissions_cache:\n            self._project_permissions_cache[cache_key] = get_project_permissions(\n                jira_client=self.jira_client,\n                jira_project=project_key,\n                add_prefix=add_prefix,\n            )\n        return self._project_permissions_cache[cache_key]\n\n    def _is_epic(self, issue: Issue) -> bool:\n        \"\"\"Check if issue is an Epic.\"\"\"\n        issuetype = best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE)\n        if issuetype is None:\n            return False\n        return issuetype.name.lower() == \"epic\"\n\n    def _is_parent_epic(self, parent: Any) -> bool:\n        \"\"\"Check if a parent reference is an Epic.\n\n        The parent object from issue.fields.parent has a different structure\n        than a full Issue, so we handle it separately.\n        \"\"\"\n        parent_issuetype = (\n            getattr(parent.fields, \"issuetype\", None)\n            if hasattr(parent, \"fields\")\n            else None\n        )\n        if parent_issuetype is None:\n            return False\n        return parent_issuetype.name.lower() == \"epic\"\n\n    def _yield_project_hierarchy_node(\n        self,\n        project_key: str,\n        project_name: str | None,\n        seen_hierarchy_node_ids: set[str],\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield a hierarchy node for a project if not already yielded.\"\"\"\n        if project_key in seen_hierarchy_node_ids:\n            return\n\n        seen_hierarchy_node_ids.add(project_key)\n\n        yield HierarchyNode(\n            raw_node_id=project_key,\n            raw_parent_id=None,  # Parent is SOURCE\n            display_name=project_name or project_key,\n            link=f\"{self.jira_base}/projects/{project_key}\",\n            node_type=HierarchyNodeType.PROJECT,\n        )\n\n    def _yield_epic_hierarchy_node(\n        self,\n        issue: Issue,\n        project_key: str,\n        seen_hierarchy_node_ids: set[str],\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield a hierarchy node for an Epic issue.\"\"\"\n        issue_key = issue.key\n        if issue_key in seen_hierarchy_node_ids:\n            return\n\n        seen_hierarchy_node_ids.add(issue_key)\n\n        yield HierarchyNode(\n            raw_node_id=issue_key,\n            raw_parent_id=project_key,\n            display_name=f\"{issue_key}: {issue.fields.summary}\",\n            link=build_jira_url(self.jira_base, issue_key),\n            node_type=HierarchyNodeType.FOLDER,  # don't have a separate epic node type\n        )\n\n    def _yield_parent_hierarchy_node_if_epic(\n        self,\n        parent: Any,\n        project_key: str,\n        seen_hierarchy_node_ids: set[str],\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield hierarchy node for parent issue if it's an Epic we haven't seen.\"\"\"\n        parent_key = parent.key\n        if parent_key in seen_hierarchy_node_ids:\n            return\n\n        if not self._is_parent_epic(parent):\n            # Not an epic, don't create hierarchy node for it\n            return\n\n        seen_hierarchy_node_ids.add(parent_key)\n\n        # Get summary if available\n        parent_summary = (\n            getattr(parent.fields, \"summary\", None)\n            if hasattr(parent, \"fields\")\n            else None\n        )\n        display_name = (\n            f\"{parent_key}: {parent_summary}\" if parent_summary else parent_key\n        )\n\n        yield HierarchyNode(\n            raw_node_id=parent_key,\n            raw_parent_id=project_key,\n            display_name=display_name,\n            link=build_jira_url(self.jira_base, parent_key),\n            node_type=HierarchyNodeType.FOLDER,  # don't have a separate epic node type\n        )\n\n    def _get_parent_hierarchy_raw_node_id(self, issue: Issue, project_key: str) -> str:\n        \"\"\"Determine the parent hierarchy node ID for an issue.\n\n        Returns:\n            - Epic key if issue's parent is an Epic\n            - Project key otherwise (for top-level issues or non-epic parents)\n        \"\"\"\n        parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)\n        if parent is None:\n            # No parent, directly under project\n            return project_key\n\n        if self._is_parent_epic(parent):\n            return parent.key\n\n        # For non-epic parents (e.g., story with subtasks),\n        # the document belongs directly under the project in the hierarchy\n        return project_key\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self._jira_client = build_jira_client(\n            credentials=credentials,\n            jira_base=self.jira_base,\n            scoped_token=self.scoped_token,\n        )\n        return None\n\n    def _get_jql_query(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> str:\n        \"\"\"Get the JQL query based on configuration and time range\n\n        If a custom JQL query is provided, it will be used and combined with time constraints.\n        Otherwise, the query will be constructed based on project key (if provided).\n        \"\"\"\n        start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(\n            \"%Y-%m-%d %H:%M\"\n        )\n        end_date_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime(\n            \"%Y-%m-%d %H:%M\"\n        )\n\n        time_jql = f\"updated >= '{start_date_str}' AND updated <= '{end_date_str}'\"\n\n        # If custom JQL query is provided, use it and combine with time constraints\n        if self.jql_query:\n            return f\"({self.jql_query}) AND {time_jql}\"\n\n        # Otherwise, use project key if provided\n        if self.jira_project:\n            base_jql = f\"project = {self.quoted_jira_project}\"\n            return f\"{base_jql} AND {time_jql}\"\n\n        return time_jql\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: JiraConnectorCheckpoint,\n    ) -> CheckpointOutput[JiraConnectorCheckpoint]:\n        jql = self._get_jql_query(start, end)\n        try:\n            return self._load_from_checkpoint(\n                jql, checkpoint, include_permissions=False\n            )\n        except Exception as e:\n            if is_atlassian_date_error(e):\n                jql = self._get_jql_query(start - ONE_HOUR, end)\n                return self._load_from_checkpoint(\n                    jql, checkpoint, include_permissions=False\n                )\n            raise e\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: JiraConnectorCheckpoint,\n    ) -> CheckpointOutput[JiraConnectorCheckpoint]:\n        \"\"\"Load documents from checkpoint with permission information included.\"\"\"\n        jql = self._get_jql_query(start, end)\n        try:\n            return self._load_from_checkpoint(jql, checkpoint, include_permissions=True)\n        except Exception as e:\n            if is_atlassian_date_error(e):\n                jql = self._get_jql_query(start - ONE_HOUR, end)\n                return self._load_from_checkpoint(\n                    jql, checkpoint, include_permissions=True\n                )\n            raise e\n\n    def _load_from_checkpoint(\n        self, jql: str, checkpoint: JiraConnectorCheckpoint, include_permissions: bool\n    ) -> CheckpointOutput[JiraConnectorCheckpoint]:\n        # Get the current offset from checkpoint or start at 0\n        starting_offset = checkpoint.offset or 0\n        current_offset = starting_offset\n        new_checkpoint = copy.deepcopy(checkpoint)\n\n        # Convert checkpoint list to set for efficient lookups\n        seen_hierarchy_node_ids = set(new_checkpoint.seen_hierarchy_node_ids)\n\n        checkpoint_callback = make_checkpoint_callback(new_checkpoint)\n\n        for issue in _perform_jql_search(\n            jira_client=self.jira_client,\n            jql=jql,\n            start=current_offset,\n            max_results=_JIRA_FULL_PAGE_SIZE,\n            all_issue_ids=new_checkpoint.all_issue_ids,\n            checkpoint_callback=checkpoint_callback,\n            nextPageToken=new_checkpoint.cursor,\n            ids_done=new_checkpoint.ids_done,\n        ):\n            issue_key = issue.key\n            try:\n                # Get project info for hierarchy\n                project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)\n                project_key = project.key if project else None\n                project_name = project.name if project else None\n\n                # Yield hierarchy nodes BEFORE the document (parent-before-child)\n                if project_key:\n                    # 1. Yield project hierarchy node (if not already yielded)\n                    yield from self._yield_project_hierarchy_node(\n                        project_key, project_name, seen_hierarchy_node_ids\n                    )\n\n                    # 2. If parent is an Epic, yield hierarchy node for it\n                    parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)\n                    if parent:\n                        yield from self._yield_parent_hierarchy_node_if_epic(\n                            parent, project_key, seen_hierarchy_node_ids\n                        )\n\n                    # 3. If this issue IS an Epic, yield it as hierarchy node\n                    if self._is_epic(issue):\n                        yield from self._yield_epic_hierarchy_node(\n                            issue, project_key, seen_hierarchy_node_ids\n                        )\n\n                # Determine parent hierarchy node ID for the document\n                parent_hierarchy_raw_node_id = (\n                    self._get_parent_hierarchy_raw_node_id(issue, project_key)\n                    if project_key\n                    else None\n                )\n\n                if document := process_jira_issue(\n                    jira_base_url=self.jira_base,\n                    issue=issue,\n                    comment_email_blacklist=self.comment_email_blacklist,\n                    labels_to_skip=self.labels_to_skip,\n                    parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n                ):\n                    # Add permission information to the document if requested\n                    if include_permissions:\n                        document.external_access = self._get_project_permissions(\n                            project_key,\n                            add_prefix=True,  # Indexing path - prefix here\n                        )\n                    yield document\n\n            except Exception as e:\n                yield ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=issue_key,\n                        document_link=build_jira_url(self.jira_base, issue_key),\n                    ),\n                    failure_message=f\"Failed to process Jira issue: {str(e)}\",\n                    exception=e,\n                )\n\n            current_offset += 1\n\n        # Update checkpoint with seen hierarchy nodes\n        new_checkpoint.seen_hierarchy_node_ids = list(seen_hierarchy_node_ids)\n\n        # Update checkpoint\n        self.update_checkpoint_for_next_run(\n            new_checkpoint, current_offset, starting_offset, _JIRA_FULL_PAGE_SIZE\n        )\n\n        return new_checkpoint\n\n    def update_checkpoint_for_next_run(\n        self,\n        checkpoint: JiraConnectorCheckpoint,\n        current_offset: int,\n        starting_offset: int,\n        page_size: int,\n    ) -> None:\n        if _is_cloud_client(self.jira_client):\n            # other updates done in the checkpoint callback\n            checkpoint.has_more = (\n                len(checkpoint.all_issue_ids) > 0 or not checkpoint.ids_done\n            )\n        else:\n            checkpoint.offset = current_offset\n            # if we didn't retrieve a full batch, we're done\n            checkpoint.has_more = current_offset - starting_offset == page_size\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        one_day = timedelta(hours=24).total_seconds()\n\n        start = start or 0\n        end = (\n            end or datetime.now().timestamp() + one_day\n        )  # we add one day to account for any potential timezone issues\n\n        jql = self._get_jql_query(start, end)\n        checkpoint = self.build_dummy_checkpoint()\n        checkpoint_callback = make_checkpoint_callback(checkpoint)\n        prev_offset = 0\n        current_offset = 0\n        slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n\n        # Track seen hierarchy nodes within this sync run\n        seen_hierarchy_node_ids: set[str] = set()\n\n        while checkpoint.has_more:\n            for issue in _perform_jql_search(\n                jira_client=self.jira_client,\n                jql=jql,\n                start=current_offset,\n                max_results=JIRA_SLIM_PAGE_SIZE,\n                all_issue_ids=checkpoint.all_issue_ids,\n                checkpoint_callback=checkpoint_callback,\n                nextPageToken=checkpoint.cursor,\n                ids_done=checkpoint.ids_done,\n            ):\n                # Get project info\n                project = best_effort_get_field_from_issue(issue, _FIELD_PROJECT)\n                project_key = project.key if project else None\n                project_name = project.name if project else None\n\n                if not project_key:\n                    continue\n\n                # Yield hierarchy nodes BEFORE the slim document (parent-before-child)\n                # 1. Yield project hierarchy node (if not already yielded)\n                for node in self._yield_project_hierarchy_node(\n                    project_key, project_name, seen_hierarchy_node_ids\n                ):\n                    slim_doc_batch.append(node)\n\n                # 2. If parent is an Epic, yield hierarchy node for it\n                parent = best_effort_get_field_from_issue(issue, _FIELD_PARENT)\n                if parent:\n                    for node in self._yield_parent_hierarchy_node_if_epic(\n                        parent, project_key, seen_hierarchy_node_ids\n                    ):\n                        slim_doc_batch.append(node)\n\n                # 3. If this issue IS an Epic, yield it as hierarchy node\n                if self._is_epic(issue):\n                    for node in self._yield_epic_hierarchy_node(\n                        issue, project_key, seen_hierarchy_node_ids\n                    ):\n                        slim_doc_batch.append(node)\n\n                # Now add the slim document\n                issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY)\n                doc_id = build_jira_url(self.jira_base, issue_key)\n\n                slim_doc_batch.append(\n                    SlimDocument(\n                        id=doc_id,\n                        # Permission sync path - don't prefix, upsert_document_external_perms handles it\n                        external_access=self._get_project_permissions(\n                            project_key, add_prefix=False\n                        ),\n                        parent_hierarchy_raw_node_id=(\n                            self._get_parent_hierarchy_raw_node_id(issue, project_key)\n                            if project_key\n                            else None\n                        ),\n                    )\n                )\n                current_offset += 1\n                if len(slim_doc_batch) >= JIRA_SLIM_PAGE_SIZE:\n                    yield slim_doc_batch\n                    slim_doc_batch = []\n            self.update_checkpoint_for_next_run(\n                checkpoint, current_offset, prev_offset, JIRA_SLIM_PAGE_SIZE\n            )\n            prev_offset = current_offset\n\n        if slim_doc_batch:\n            yield slim_doc_batch\n\n    def validate_connector_settings(self) -> None:\n        if self._jira_client is None:\n            raise ConnectorMissingCredentialError(\"Jira\")\n\n        # If a custom JQL query is set, validate it's valid\n        if self.jql_query:\n            try:\n                # Try to execute the JQL query with a small limit to validate its syntax\n                # Use next(iter(...), None) to get just the first result without\n                # forcing evaluation of all results\n                next(\n                    iter(\n                        _perform_jql_search(\n                            jira_client=self.jira_client,\n                            jql=self.jql_query,\n                            start=0,\n                            max_results=1,\n                            all_issue_ids=[],\n                        )\n                    ),\n                    None,\n                )\n            except Exception as e:\n                self._handle_jira_connector_settings_error(e)\n\n        # If a specific project is set, validate it exists\n        elif self.jira_project:\n            try:\n                self.jira_client.project(self.jira_project)\n            except Exception as e:\n                self._handle_jira_connector_settings_error(e)\n        else:\n            # If neither JQL nor project specified, validate we can access the Jira API\n            try:\n                # Try to list projects to validate access\n                self.jira_client.projects()\n            except Exception as e:\n                self._handle_jira_connector_settings_error(e)\n\n    def _handle_jira_connector_settings_error(self, e: Exception) -> None:\n        \"\"\"Helper method to handle Jira API errors consistently.\n\n        Extracts error messages from the Jira API response for all status codes when possible,\n        providing more user-friendly error messages.\n\n        Args:\n            e: The exception raised by the Jira API\n\n        Raises:\n            CredentialExpiredError: If the status code is 401\n            InsufficientPermissionsError: If the status code is 403\n            ConnectorValidationError: For other HTTP errors with extracted error messages\n        \"\"\"\n        status_code = getattr(e, \"status_code\", None)\n        logger.error(f\"Jira API error during validation: {e}\")\n\n        # Handle specific status codes with appropriate exceptions\n        if status_code == 401:\n            raise CredentialExpiredError(\n                \"Jira credential appears to be expired or invalid (HTTP 401).\"\n            )\n        elif status_code == 403:\n            raise InsufficientPermissionsError(\n                \"Your Jira token does not have sufficient permissions for this configuration (HTTP 403).\"\n            )\n        elif status_code == 429:\n            raise ConnectorValidationError(\n                \"Validation failed due to Jira rate-limits being exceeded. Please try again later.\"\n            )\n\n        # Try to extract original error message from the response\n        error_message = getattr(e, \"text\", None)\n        if error_message is None:\n            raise UnexpectedValidationError(\n                f\"Unexpected Jira error during validation: {e}\"\n            )\n\n        raise ConnectorValidationError(\n            f\"Validation failed due to Jira error: {error_message}\"\n        )\n\n    @override\n    def validate_checkpoint_json(self, checkpoint_json: str) -> JiraConnectorCheckpoint:\n        return JiraConnectorCheckpoint.model_validate_json(checkpoint_json)\n\n    @override\n    def build_dummy_checkpoint(self) -> JiraConnectorCheckpoint:\n        return JiraConnectorCheckpoint(\n            has_more=True,\n        )\n\n\ndef make_checkpoint_callback(\n    checkpoint: JiraConnectorCheckpoint,\n) -> Callable[[Iterator[list[str]], str | None], None]:\n    def checkpoint_callback(\n        issue_ids: Iterator[list[str]], pageToken: str | None\n    ) -> None:\n        for id_batch in issue_ids:\n            checkpoint.all_issue_ids.append(id_batch)\n        checkpoint.cursor = pageToken\n        # pageToken starts out as None and is only None once we've fetched all the issue ids\n        checkpoint.ids_done = pageToken is None\n\n    return checkpoint_callback\n\n\nif __name__ == \"__main__\":\n    import os\n    from onyx.utils.variable_functionality import global_version\n    from tests.daily.connectors.utils import load_all_from_connector\n\n    # For connector permission testing, set EE to true.\n    global_version.set_ee()\n\n    connector = JiraConnector(\n        jira_base_url=os.environ[\"JIRA_BASE_URL\"],\n        project_key=os.environ.get(\"JIRA_PROJECT_KEY\"),\n        comment_email_blacklist=[],\n    )\n\n    connector.load_credentials(\n        {\n            \"jira_user_email\": os.environ[\"JIRA_USER_EMAIL\"],\n            \"jira_api_token\": os.environ[\"JIRA_API_TOKEN\"],\n        }\n    )\n\n    start = 0\n    end = datetime.now().timestamp()\n\n    for slim_doc in connector.retrieve_all_slim_docs_perm_sync(\n        start=start,\n        end=end,\n    ):\n        print(slim_doc)\n\n    for doc in load_all_from_connector(\n        connector=connector,\n        start=start,\n        end=end,\n    ).documents:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/jira/utils.py",
    "content": "\"\"\"Module with custom fields processing functions\"\"\"\n\nimport os\nfrom typing import Any\nfrom typing import List\nfrom urllib.parse import urlparse\n\nfrom jira import JIRA\nfrom jira.resources import CustomFieldOption\nfrom jira.resources import Issue\nfrom jira.resources import User\n\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nPROJECT_URL_PAT = \"projects\"\nJIRA_SERVER_API_VERSION = os.environ.get(\"JIRA_SERVER_API_VERSION\") or \"2\"\nJIRA_CLOUD_API_VERSION = os.environ.get(\"JIRA_CLOUD_API_VERSION\") or \"3\"\n\n\ndef best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None:\n    display_name = None\n    email = None\n\n    try:\n        if hasattr(obj, \"displayName\"):\n            display_name = obj.displayName\n        else:\n            display_name = obj.get(\"displayName\")\n\n        if hasattr(obj, \"emailAddress\"):\n            email = obj.emailAddress\n        else:\n            email = obj.get(\"emailAddress\")\n\n    except Exception:\n        return None\n\n    if not email and not display_name:\n        return None\n\n    return BasicExpertInfo(display_name=display_name, email=email)\n\n\ndef best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:\n    if hasattr(jira_issue, field):\n        return getattr(jira_issue, field)\n\n    if hasattr(jira_issue, \"fields\") and hasattr(jira_issue.fields, field):\n        return getattr(jira_issue.fields, field)\n\n    try:\n        return jira_issue.raw[\"fields\"][field]\n    except Exception:\n        return None\n\n\ndef extract_text_from_adf(adf: dict | None) -> str:\n    \"\"\"Extracts plain text from Atlassian Document Format:\n    https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/\n\n    WARNING: This function is incomplete and will e.g. skip lists!\n    \"\"\"\n    # TODO: complete this function\n    texts = []\n    if adf is not None and \"content\" in adf:\n        for block in adf[\"content\"]:\n            if \"content\" in block:\n                for item in block[\"content\"]:\n                    if item[\"type\"] == \"text\":\n                        texts.append(item[\"text\"])\n    return \" \".join(texts)\n\n\ndef build_jira_url(jira_base_url: str, issue_key: str) -> str:\n    \"\"\"\n    Get the url used to access an issue in the UI.\n    \"\"\"\n    return f\"{jira_base_url}/browse/{issue_key}\"\n\n\ndef build_jira_client(\n    credentials: dict[str, Any], jira_base: str, scoped_token: bool = False\n) -> JIRA:\n\n    jira_base = scoped_url(jira_base, \"jira\") if scoped_token else jira_base\n    api_token = credentials[\"jira_api_token\"]\n    # if user provide an email we assume it's cloud\n    if \"jira_user_email\" in credentials:\n        email = credentials[\"jira_user_email\"]\n        return JIRA(\n            basic_auth=(email, api_token),\n            server=jira_base,\n            options={\"rest_api_version\": JIRA_CLOUD_API_VERSION},\n        )\n    else:\n        return JIRA(\n            token_auth=api_token,\n            server=jira_base,\n            options={\"rest_api_version\": JIRA_SERVER_API_VERSION},\n        )\n\n\ndef extract_jira_project(url: str) -> tuple[str, str]:\n    parsed_url = urlparse(url)\n    jira_base = parsed_url.scheme + \"://\" + parsed_url.netloc\n\n    # Split the path by '/' and find the position of 'projects' to get the project name\n    split_path = parsed_url.path.split(\"/\")\n    if PROJECT_URL_PAT in split_path:\n        project_pos = split_path.index(PROJECT_URL_PAT)\n        if len(split_path) > project_pos + 1:\n            jira_project = split_path[project_pos + 1]\n        else:\n            raise ValueError(\"No project name found in the URL\")\n    else:\n        raise ValueError(\"'projects' not found in the URL\")\n\n    return jira_base, jira_project\n\n\ndef get_comment_strs(\n    issue: Issue, comment_email_blacklist: tuple[str, ...] = ()\n) -> list[str]:\n    comment_strs = []\n    for comment in issue.fields.comment.comments:\n        try:\n            if isinstance(comment.body, str):\n                body_text = comment.body\n            else:\n                body_text = extract_text_from_adf(comment.raw[\"body\"])\n\n            if (\n                hasattr(comment, \"author\")\n                and hasattr(comment.author, \"emailAddress\")\n                and comment.author.emailAddress in comment_email_blacklist\n            ):\n                continue  # Skip adding comment if author's email is in blacklist\n\n            comment_strs.append(body_text)\n        except Exception as e:\n            logger.error(f\"Failed to process comment due to an error: {e}\")\n            continue\n\n    return comment_strs\n\n\ndef get_jira_project_key_from_issue(issue: Issue) -> str | None:\n    if not hasattr(issue, \"fields\"):\n        return None\n    if not hasattr(issue.fields, \"project\"):\n        return None\n    if not hasattr(issue.fields.project, \"key\"):\n        return None\n\n    return issue.fields.project.key\n\n\nclass CustomFieldExtractor:\n    @staticmethod\n    def _process_custom_field_value(value: Any) -> str:\n        \"\"\"\n        Process a custom field value to a string\n        \"\"\"\n        try:\n            if isinstance(value, str):\n                return value\n            elif isinstance(value, CustomFieldOption):\n                return value.value\n            elif isinstance(value, User):\n                return value.displayName\n            elif isinstance(value, List):\n                return \" \".join(\n                    [CustomFieldExtractor._process_custom_field_value(v) for v in value]\n                )\n            else:\n                return str(value)\n        except Exception as e:\n            logger.error(f\"Error processing custom field value {value}: {e}\")\n            return \"\"\n\n    @staticmethod\n    def get_issue_custom_fields(\n        jira: Issue, custom_fields: dict, max_value_length: int = 250\n    ) -> dict:\n        \"\"\"\n        Process all custom fields of an issue to a dictionary of strings\n        :param jira: jira_issue, bug or similar\n        :param custom_fields: custom fields dictionary\n        :param max_value_length: maximum length of the value to be processed, if exceeded, it will be truncated\n        \"\"\"\n\n        issue_custom_fields = {\n            custom_fields[key]: value\n            for key, value in jira.fields.__dict__.items()\n            if value and key in custom_fields.keys()\n        }\n\n        processed_fields = {}\n\n        if issue_custom_fields:\n            for key, value in issue_custom_fields.items():\n                processed = CustomFieldExtractor._process_custom_field_value(value)\n                # We need max length  parameter, because there are some plugins that often has very long description\n                # and there is just a technical information so we just avoid long values\n                if len(processed) < max_value_length:\n                    processed_fields[key] = processed\n\n        return processed_fields\n\n    @staticmethod\n    def get_all_custom_fields(jira_client: JIRA) -> dict:\n        \"\"\"Get all custom fields from Jira\"\"\"\n        fields = jira_client.fields()\n        fields_dct = {\n            field[\"id\"]: field[\"name\"] for field in fields if field[\"custom\"] is True\n        }\n        return fields_dct\n\n\nclass CommonFieldExtractor:\n    @staticmethod\n    def get_issue_common_fields(jira: Issue) -> dict:\n        return {\n            \"Priority\": jira.fields.priority.name if jira.fields.priority else None,\n            \"Reporter\": (\n                jira.fields.reporter.displayName if jira.fields.reporter else None\n            ),\n            \"Assignee\": (\n                jira.fields.assignee.displayName if jira.fields.assignee else None\n            ),\n            \"Status\": jira.fields.status.name if jira.fields.status else None,\n            \"Resolution\": (\n                jira.fields.resolution.name if jira.fields.resolution else None\n            ),\n        }\n"
  },
  {
    "path": "backend/onyx/connectors/linear/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/linear/connector.py",
    "content": "import os\nimport re\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom urllib.parse import urlparse\n\nimport requests\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import LINEAR_CLIENT_ID\nfrom onyx.configs.app_configs import LINEAR_CLIENT_SECRET\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_oauth_callback_uri,\n)\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import NormalizationResult\nfrom onyx.connectors.interfaces import OAuthConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import request_with_retries\n\n\nlogger = setup_logger()\n\n_NUM_RETRIES = 5\n_TIMEOUT = 60\n_LINEAR_GRAPHQL_URL = \"https://api.linear.app/graphql\"\n\n\ndef _make_query(request_body: dict[str, Any], api_key: str) -> requests.Response:\n    headers = {\n        \"Authorization\": api_key,\n        \"Content-Type\": \"application/json\",\n    }\n\n    for i in range(_NUM_RETRIES):\n        try:\n            response = requests.post(\n                _LINEAR_GRAPHQL_URL,\n                headers=headers,\n                json=request_body,\n                timeout=_TIMEOUT,\n            )\n            if not response.ok:\n                raise RuntimeError(\n                    f\"Error fetching issues from Linear: {response.text}\"\n                )\n\n            return response\n        except Exception as e:\n            if i == _NUM_RETRIES - 1:\n                raise e\n\n            logger.warning(f\"A Linear GraphQL error occurred: {e}. Retrying...\")\n\n    raise RuntimeError(\n        \"Unexpected execution when querying Linear. This should never happen.\"\n    )\n\n\nclass LinearConnector(LoadConnector, PollConnector, OAuthConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.batch_size = batch_size\n        self.linear_api_key: str | None = None\n\n    @classmethod\n    def oauth_id(cls) -> DocumentSource:\n        return DocumentSource.LINEAR\n\n    @classmethod\n    def oauth_authorization_url(\n        cls,\n        base_domain: str,\n        state: str,\n        additional_kwargs: dict[str, str],  # noqa: ARG003\n    ) -> str:\n        if not LINEAR_CLIENT_ID:\n            raise ValueError(\"LINEAR_CLIENT_ID environment variable must be set\")\n\n        callback_uri = get_oauth_callback_uri(base_domain, DocumentSource.LINEAR.value)\n        return (\n            f\"https://linear.app/oauth/authorize\"\n            f\"?client_id={LINEAR_CLIENT_ID}\"\n            f\"&redirect_uri={callback_uri}\"\n            f\"&response_type=code\"\n            f\"&scope=read\"\n            f\"&state={state}\"\n            f\"&prompt=consent\"  # prompts user for access; allows choosing workspace\n        )\n\n    @classmethod\n    def oauth_code_to_token(\n        cls,\n        base_domain: str,\n        code: str,\n        additional_kwargs: dict[str, str],  # noqa: ARG003\n    ) -> dict[str, Any]:\n        data = {\n            \"code\": code,\n            \"redirect_uri\": get_oauth_callback_uri(\n                base_domain, DocumentSource.LINEAR.value\n            ),\n            \"client_id\": LINEAR_CLIENT_ID,\n            \"client_secret\": LINEAR_CLIENT_SECRET,\n            \"grant_type\": \"authorization_code\",\n        }\n        headers = {\"Content-Type\": \"application/x-www-form-urlencoded\"}\n\n        response = request_with_retries(\n            method=\"POST\",\n            url=\"https://api.linear.app/oauth/token\",\n            data=data,\n            headers=headers,\n            backoff=0,\n            delay=0.1,\n        )\n        if not response.ok:\n            raise RuntimeError(f\"Failed to exchange code for token: {response.text}\")\n\n        token_data = response.json()\n\n        return {\n            \"access_token\": token_data[\"access_token\"],\n        }\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        if \"linear_api_key\" in credentials:\n            self.linear_api_key = cast(str, credentials[\"linear_api_key\"])\n        elif \"access_token\" in credentials:\n            self.linear_api_key = \"Bearer \" + cast(str, credentials[\"access_token\"])\n        else:\n            # May need to handle case in the future if the OAuth flow expires\n            raise ConnectorMissingCredentialError(\"Linear\")\n\n        return None\n\n    def _process_issues(\n        self, start_str: datetime | None = None, end_str: datetime | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.linear_api_key is None:\n            raise ConnectorMissingCredentialError(\"Linear\")\n\n        lte_filter = f'lte: \"{end_str}\"' if end_str else \"\"\n        gte_filter = f'gte: \"{start_str}\"' if start_str else \"\"\n        updatedAtFilter = f\"\"\"\n            {lte_filter}\n            {gte_filter}\n        \"\"\"\n\n        query = (\n            \"\"\"\n            query IterateIssueBatches($first: Int, $after: String) {\n                issues(\n                    orderBy: updatedAt,\n                    first: $first,\n                    after: $after,\n                    filter: {\n                        updatedAt: {\n        \"\"\"\n            + updatedAtFilter\n            + \"\"\"\n                        },\n\n                    }\n                ) {\n                    edges {\n                        node {\n                            id\n                            createdAt\n                            updatedAt\n                            archivedAt\n                            number\n                            title\n                            priority\n                            estimate\n                            sortOrder\n                            startedAt\n                            completedAt\n                            startedTriageAt\n                            triagedAt\n                            canceledAt\n                            autoClosedAt\n                            autoArchivedAt\n                            dueDate\n                            slaStartedAt\n                            slaBreachesAt\n                            trashed\n                            snoozedUntilAt\n                            team {\n                                name\n                            }\n                            creator {\n                                name\n                                email\n                            }\n                            assignee {\n                                name\n                                email\n                            }\n                            previousIdentifiers\n                            subIssueSortOrder\n                            priorityLabel\n                            identifier\n                            url\n                            branchName\n                            state {\n                                id\n                                name\n                            }\n                            customerTicketCount\n                            description\n                            comments {\n                                nodes {\n                                    url\n                                    body\n                                }\n                            }\n                        }\n                    }\n                    pageInfo {\n                        hasNextPage\n                        endCursor\n                    }\n                }\n            }\n        \"\"\"\n        )\n\n        has_more = True\n        endCursor = None\n        while has_more:\n            graphql_query = {\n                \"query\": query,\n                \"variables\": {\n                    \"first\": self.batch_size,\n                    \"after\": endCursor,\n                },\n            }\n            logger.debug(f\"Requesting issues from Linear with query: {graphql_query}\")\n\n            response = _make_query(graphql_query, self.linear_api_key)\n            response_json = response.json()\n            logger.debug(f\"Raw response from Linear: {response_json}\")\n            edges = response_json[\"data\"][\"issues\"][\"edges\"]\n\n            documents: list[Document | HierarchyNode] = []\n            for edge in edges:\n                node = edge[\"node\"]\n                # Create sections for description and comments\n                sections = [\n                    TextSection(\n                        link=node[\"url\"],\n                        text=node[\"description\"] or \"\",\n                    )\n                ]\n\n                # Add comment sections\n                for comment in node[\"comments\"][\"nodes\"]:\n                    sections.append(\n                        TextSection(\n                            link=node[\"url\"],\n                            text=comment[\"body\"] or \"\",\n                        )\n                    )\n\n                # Cast the sections list to the expected type\n                typed_sections = cast(list[TextSection | ImageSection], sections)\n\n                # Extract team name for hierarchy\n                team_name = (node.get(\"team\") or {}).get(\"name\") or \"Unknown Team\"\n                identifier = node.get(\"identifier\", node[\"id\"])\n\n                documents.append(\n                    Document(\n                        id=node[\"id\"],\n                        sections=typed_sections,\n                        source=DocumentSource.LINEAR,\n                        semantic_identifier=f\"[{node['identifier']}] {node['title']}\",\n                        title=node[\"title\"],\n                        doc_updated_at=time_str_to_utc(node[\"updatedAt\"]),\n                        doc_metadata={\n                            \"hierarchy\": {\n                                \"source_path\": [team_name],\n                                \"team_name\": team_name,\n                                \"identifier\": identifier,\n                            }\n                        },\n                        metadata={\n                            k: str(v)\n                            for k, v in {\n                                \"team\": (node.get(\"team\") or {}).get(\"name\"),\n                                \"creator\": node.get(\"creator\"),\n                                \"assignee\": node.get(\"assignee\"),\n                                \"state\": (node.get(\"state\") or {}).get(\"name\"),\n                                \"priority\": node.get(\"priority\"),\n                                \"estimate\": node.get(\"estimate\"),\n                                \"started_at\": node.get(\"startedAt\"),\n                                \"completed_at\": node.get(\"completedAt\"),\n                                \"created_at\": node.get(\"createdAt\"),\n                                \"due_date\": node.get(\"dueDate\"),\n                            }.items()\n                            if v is not None\n                        },\n                    )\n                )\n            yield documents\n\n            endCursor = response_json[\"data\"][\"issues\"][\"pageInfo\"][\"endCursor\"]\n            has_more = response_json[\"data\"][\"issues\"][\"pageInfo\"][\"hasNextPage\"]\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        yield from self._process_issues()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_time = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_time = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        yield from self._process_issues(start_str=start_time, end_str=end_time)\n\n    @classmethod\n    @override\n    def normalize_url(cls, url: str) -> NormalizationResult:\n        \"\"\"Extract Linear issue identifier from URL.\n\n        Linear URLs are like: https://linear.app/team/issue/IDENTIFIER/...\n        Returns the identifier (e.g., \"DAN-2327\") which can be used to match Document.link.\n        \"\"\"\n        parsed = urlparse(url)\n        netloc = parsed.netloc.lower()\n\n        if \"linear.app\" not in netloc:\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Extract identifier from path: /team/issue/IDENTIFIER/...\n        # Pattern: /{team}/issue/{identifier}/...\n        path_parts = [p for p in parsed.path.split(\"/\") if p]\n        if len(path_parts) >= 3 and path_parts[1] == \"issue\":\n            identifier = path_parts[2]\n            # Validate identifier format (e.g., \"DAN-2327\")\n            if re.match(r\"^[A-Z]+-\\d+$\", identifier):\n                return NormalizationResult(normalized_url=identifier, use_default=False)\n\n        return NormalizationResult(normalized_url=None, use_default=False)\n\n\nif __name__ == \"__main__\":\n    connector = LinearConnector()\n    connector.load_credentials({\"linear_api_key\": os.environ[\"LINEAR_API_KEY\"]})\n\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/loopio/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/loopio/connector.py",
    "content": "import json\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nfrom oauthlib.oauth2 import BackendApplicationClient\nfrom requests_oauthlib import OAuth2Session  # type: ignore\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.file_processing.html_utils import strip_excessive_newlines_and_spaces\nfrom onyx.utils.logger import setup_logger\n\nLOOPIO_API_BASE = \"https://api.loopio.com/\"\nLOOPIO_AUTH_URL = LOOPIO_API_BASE + \"oauth2/access_token\"\nLOOPIO_DATA_URL = LOOPIO_API_BASE + \"data/\"\n\nlogger = setup_logger()\n\n\nclass LoopioConnector(LoadConnector, PollConnector):\n    def __init__(\n        self,\n        loopio_stack_name: str | None = None,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.batch_size = batch_size\n        self.loopio_client_id: str | None = None\n        self.loopio_client_token: str | None = None\n        self.loopio_stack_name = loopio_stack_name\n\n    def _fetch_data(\n        self, resource: str, params: dict[str, str | int]\n    ) -> Generator[dict[str, Any], None, None]:\n        client = BackendApplicationClient(\n            client_id=self.loopio_client_id, scope=[\"library:read\"]\n        )\n        session = OAuth2Session(client=client)\n        session.fetch_token(\n            token_url=LOOPIO_AUTH_URL,\n            client_id=self.loopio_client_id,\n            client_secret=self.loopio_client_token,\n        )\n        page = 0\n        stop_at_page = 1\n        while (page := page + 1) <= stop_at_page:\n            params[\"page\"] = page\n            response = session.request(\n                \"GET\",\n                LOOPIO_DATA_URL + resource,\n                headers={\"Accept\": \"application/json\"},\n                params=params,\n            )\n            if response.status_code == 400:\n                logger.error(\n                    f\"Loopio API returned 400 for {resource} with params {params}\",\n                )\n                logger.error(response.text)\n            response.raise_for_status()\n            response_data = json.loads(response.text)\n            stop_at_page = response_data.get(\"totalPages\", 1)\n            yield response_data\n\n    def _build_search_filter(\n        self, stack_name: str | None, start: str | None, end: str | None\n    ) -> dict[str, Any]:\n        filter: dict[str, Any] = {}\n        if start is not None and end is not None:\n            filter[\"lastUpdatedDate\"] = {\"gte\": start, \"lt\": end}\n\n        if stack_name is not None:\n            # Right now this is fetching the stacks every time, which is not ideal.\n            # We should update this later to store the ID when we create the Connector\n            for stack in self._fetch_data(resource=\"v2/stacks\", params={}):\n                for item in stack[\"items\"]:\n                    if item[\"name\"] == stack_name:\n                        filter[\"locations\"] = [{\"stackID\": item[\"id\"]}]\n                        break\n            if \"locations\" not in filter:\n                raise ValueError(f\"Stack {stack_name} not found in Loopio\")\n        return filter\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.loopio_subdomain = credentials[\"loopio_subdomain\"]\n        self.loopio_client_id = credentials[\"loopio_client_id\"]\n        self.loopio_client_token = credentials[\"loopio_client_token\"]\n        return None\n\n    def _process_entries(\n        self, start: str | None = None, end: str | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.loopio_client_id is None or self.loopio_client_token is None:\n            raise ConnectorMissingCredentialError(\"Loopio\")\n\n        filter = self._build_search_filter(\n            stack_name=self.loopio_stack_name, start=start, end=end\n        )\n        params: dict[str, str | int] = {\"pageSize\": self.batch_size}\n        params[\"filter\"] = json.dumps(filter)\n\n        doc_batch: list[Document | HierarchyNode] = []\n        for library_entries in self._fetch_data(\n            resource=\"v2/libraryEntries\", params=params\n        ):\n            for entry in library_entries.get(\"items\", []):\n                link = f\"https://{self.loopio_subdomain}.loopio.com/library?entry={entry['id']}\"\n                topic = \"/\".join(\n                    part[\"name\"] for part in entry[\"location\"].values() if part\n                )\n\n                answer_text = entry.get(\"answer\", {}).get(\"text\", \"\")\n                if not answer_text:\n                    logger.warning(\n                        f\"The Library entry {entry['id']} has no answer text. Skipping.\"\n                    )\n                    continue\n\n                try:\n                    answer = parse_html_page_basic(answer_text)\n                except Exception as e:\n                    logger.error(f\"Error parsing HTML for entry {entry['id']}: {e}\")\n                    continue\n\n                questions = [\n                    question.get(\"text\").replace(\"\\xa0\", \" \")\n                    for question in entry[\"questions\"]\n                    if question.get(\"text\")\n                ]\n                questions_string = strip_excessive_newlines_and_spaces(\n                    \"\\n\".join(questions)\n                )\n                content_text = f\"{answer}\\n\\nRelated Questions: {questions_string}\"\n                content_text = strip_excessive_newlines_and_spaces(\n                    content_text.replace(\"\\xa0\", \" \")\n                )\n\n                last_updated = time_str_to_utc(entry[\"lastUpdatedDate\"])\n                last_reviewed = (\n                    time_str_to_utc(entry[\"lastReviewedDate\"])\n                    if entry.get(\"lastReviewedDate\")\n                    else None\n                )\n\n                # For Onyx, we decay document score overtime, either last_updated or\n                # last_reviewed is a good enough signal for the document's recency\n                latest_time = (\n                    max(last_reviewed, last_updated) if last_reviewed else last_updated\n                )\n                creator = entry.get(\"creator\")\n                last_updated_by = entry.get(\"lastUpdatedBy\")\n                last_reviewed_by = entry.get(\"lastReviewedBy\")\n\n                primary_owners: list[BasicExpertInfo] = [\n                    BasicExpertInfo(display_name=owner.get(\"name\"))\n                    for owner in [creator, last_updated_by]\n                    if owner is not None\n                ]\n                secondary_owners: list[BasicExpertInfo] = [\n                    BasicExpertInfo(display_name=owner.get(\"name\"))\n                    for owner in [last_reviewed_by]\n                    if owner is not None\n                ]\n                doc_batch.append(\n                    Document(\n                        id=str(entry[\"id\"]),\n                        sections=[TextSection(link=link, text=content_text)],\n                        source=DocumentSource.LOOPIO,\n                        semantic_identifier=questions[0],\n                        doc_updated_at=latest_time,\n                        primary_owners=primary_owners,\n                        secondary_owners=secondary_owners,\n                        metadata={\n                            \"topic\": topic,\n                            \"questions\": \"\\n\".join(questions),\n                            \"creator\": creator.get(\"name\") if creator else \"\",\n                        },\n                    )\n                )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n        if len(doc_batch) > 0:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._process_entries()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_time = datetime.fromtimestamp(start, tz=timezone.utc).isoformat(\n            timespec=\"seconds\"\n        )\n        end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat(\n            timespec=\"seconds\"\n        )\n\n        return self._process_entries(start_time, end_time)\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = LoopioConnector(\n        loopio_stack_name=os.environ.get(\"LOOPIO_STACK_NAME\", None)\n    )\n    connector.load_credentials(\n        {\n            \"loopio_client_id\": os.environ[\"LOOPIO_CLIENT_ID\"],\n            \"loopio_client_token\": os.environ[\"LOOPIO_CLIENT_TOKEN\"],\n            \"loopio_subdomain\": os.environ[\"LOOPIO_SUBDOMAIN\"],\n        }\n    )\n\n    latest_docs = connector.load_from_state()\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/mediawiki/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/mediawiki/family.py",
    "content": "from __future__ import annotations\n\nimport builtins\nimport functools\nimport itertools\nimport tempfile\nfrom typing import Any\nfrom unittest import mock\nfrom urllib.parse import urlparse\nfrom urllib.parse import urlunparse\n\nfrom pywikibot import family  # type: ignore[import-untyped]\nfrom pywikibot import pagegenerators\nfrom pywikibot.scripts import generate_family_file  # type: ignore[import-untyped]\nfrom pywikibot.scripts.generate_user_files import pywikibot  # type: ignore[import-untyped]\n\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\npywikibot.config.base_dir = tempfile.TemporaryDirectory().name\n\n\n@mock.patch.object(\n    builtins, \"print\", lambda *args: logger.info(\"\\t\".join(map(str, args)))\n)\nclass FamilyFileGeneratorInMemory(generate_family_file.FamilyFileGenerator):\n    \"\"\"A subclass of FamilyFileGenerator that writes the family file to memory instead of to disk.\"\"\"\n\n    def __init__(\n        self,\n        url: str,\n        name: str,\n        dointerwiki: str | bool = True,\n        verify: str | bool = True,\n    ):\n        \"\"\"Initialize the FamilyFileGeneratorInMemory.\"\"\"\n\n        url_parse = urlparse(url, \"https\")\n        if not url_parse.netloc and url_parse.path:\n            url = urlunparse(\n                (url_parse.scheme, url_parse.path, url_parse.netloc, *url_parse[3:])\n            )\n        else:\n            url = urlunparse(url_parse)\n        assert isinstance(url, str)\n\n        if any(x not in generate_family_file.NAME_CHARACTERS for x in name):\n            raise ValueError(\n                f'ERROR: Name of family \"{name}\" must be ASCII letters and digits [a-zA-Z0-9]',\n            )\n\n        if isinstance(dointerwiki, bool):\n            dointerwiki = \"Y\" if dointerwiki else \"N\"\n        assert isinstance(dointerwiki, str)\n\n        if isinstance(verify, bool):\n            verify = \"Y\" if verify else \"N\"\n        assert isinstance(verify, str)\n\n        super().__init__(url, name, dointerwiki, verify)\n        self.family_definition: type[family.Family] | None = None\n\n    def get_params(self) -> bool:\n        \"\"\"Get the parameters for the family class definition.\n\n        This override prevents the method from prompting the user for input (which would be impossible in this context).\n        We do all the input validation in the constructor.\n        \"\"\"\n        return True\n\n    def writefile(self, verify: Any) -> None:  # noqa: ARG002\n        \"\"\"Write the family file.\n\n        This overrides the method in the parent class to write the family definition to memory instead of to disk.\n\n        Args:\n            verify: unused argument necessary to match the signature of the method in the parent class.\n        \"\"\"\n        code_hostname_pairs = {\n            f\"{k}\": f\"{urlparse(w.server).netloc}\" for k, w in self.wikis.items()\n        }\n\n        code_path_pairs = {f\"{k}\": f\"{w.scriptpath}\" for k, w in self.wikis.items()}\n\n        code_protocol_pairs = {\n            f\"{k}\": f\"{urlparse(w.server).scheme}\" for k, w in self.wikis.items()\n        }\n\n        class Family(family.Family):  # noqa: D101\n            \"\"\"The family definition for the wiki.\"\"\"\n\n            name = \"%(name)s\"\n            langs = code_hostname_pairs\n\n            def scriptpath(self, code: str) -> str:\n                return code_path_pairs[code]\n\n            def protocol(self, code: str) -> str:\n                return code_protocol_pairs[code]\n\n        self.family_definition = Family\n\n\n@functools.lru_cache(maxsize=None)\ndef generate_family_class(url: str, name: str) -> type[family.Family]:\n    \"\"\"Generate a family file for a given URL and name.\n\n    Args:\n        url: The URL of the wiki.\n        name: The short name of the wiki (customizable by the user).\n\n    Returns:\n        The family definition.\n\n    Raises:\n        ValueError: If the family definition was not generated.\n    \"\"\"\n\n    generator = FamilyFileGeneratorInMemory(url, name, \"Y\", \"Y\")\n    generator.run()\n    if generator.family_definition is None:\n        raise ValueError(\"Family definition was not generated.\")\n    return generator.family_definition\n\n\ndef family_class_dispatch(url: str, name: str) -> type[family.Family]:\n    \"\"\"Find or generate a family class for a given URL and name.\n\n    Args:\n        url: The URL of the wiki.\n        name: The short name of the wiki (customizable by the user).\n\n    \"\"\"\n    if \"wikipedia\" in url:\n        import pywikibot.families.wikipedia_family  # type: ignore[import-untyped]\n\n        return pywikibot.families.wikipedia_family.Family\n    # TODO: Support additional families pre-defined in `pywikibot.families.*_family.py` files\n    return generate_family_class(url, name)\n\n\nif __name__ == \"__main__\":\n    url = \"fallout.fandom.com/wiki/Fallout_Wiki\"\n    name = \"falloutfandom\"\n\n    categories: list[str] = []\n    pages = [\"Fallout: New Vegas\"]\n    recursion_depth = 1\n    family_type = generate_family_class(url, name)\n\n    site = pywikibot.Site(fam=family_type(), code=\"en\")\n    categories = [\n        pywikibot.Category(site, f\"Category:{category.replace(' ', '_')}\")\n        for category in categories\n    ]\n    pages = [pywikibot.Page(site, page) for page in pages]\n    all_pages = itertools.chain(\n        pages,\n        *[\n            pagegenerators.CategorizedPageGenerator(category, recurse=recursion_depth)\n            for category in categories\n        ],\n    )\n    for page in all_pages:\n        print(page.title())\n        print(page.text[:1000])\n"
  },
  {
    "path": "backend/onyx/connectors/mediawiki/wiki.py",
    "content": "from __future__ import annotations\n\nimport datetime\nimport itertools\nimport tempfile\nfrom collections.abc import Iterator\nfrom typing import Any\nfrom typing import cast\nfrom typing import ClassVar\n\nimport pywikibot.time  # type: ignore[import-untyped]\nfrom pywikibot import pagegenerators\nfrom pywikibot import textlib\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.mediawiki.family import family_class_dispatch\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\npywikibot.config.base_dir = tempfile.TemporaryDirectory().name\n\n\ndef pywikibot_timestamp_to_utc_datetime(\n    timestamp: pywikibot.time.Timestamp,\n) -> datetime.datetime:\n    \"\"\"Convert a pywikibot timestamp to a datetime object in UTC.\n\n    Args:\n        timestamp: The pywikibot timestamp to convert.\n\n    Returns:\n        A datetime object in UTC.\n    \"\"\"\n    return datetime.datetime.astimezone(timestamp, tz=datetime.timezone.utc)\n\n\ndef get_doc_from_page(\n    page: pywikibot.Page, site: pywikibot.Site | None, source_type: DocumentSource\n) -> Document:\n    \"\"\"Generate Onyx Document from a MediaWiki page object.\n\n    Args:\n        page: Page from a MediaWiki site.\n        site: MediaWiki site (used to parse the sections of the page using the site template, if available).\n        source_type: Source of the document.\n\n    Returns:\n        Generated document.\n    \"\"\"\n    page_text = page.text\n    sections_extracted: textlib.Content = textlib.extract_sections(page_text, site)\n\n    sections = [\n        TextSection(\n            link=f\"{page.full_url()}#\" + section.heading.replace(\" \", \"_\"),\n            text=section.title + section.content,\n        )\n        for section in sections_extracted.sections\n    ]\n    sections.append(\n        TextSection(\n            link=page.full_url(),\n            text=sections_extracted.header,\n        )\n    )\n\n    return Document(\n        source=source_type,\n        title=page.title(),\n        doc_updated_at=pywikibot_timestamp_to_utc_datetime(\n            page.latest_revision.timestamp\n        ),\n        sections=cast(list[TextSection | ImageSection], sections),\n        semantic_identifier=page.title(),\n        metadata={\"categories\": [category.title() for category in page.categories()]},\n        id=f\"MEDIAWIKI_{page.pageid}_{page.full_url()}\",\n    )\n\n\nclass MediaWikiConnector(LoadConnector, PollConnector):\n    \"\"\"A connector for MediaWiki wikis.\n\n    Args:\n        hostname: The hostname of the wiki.\n        categories: The categories to include in the index.\n        pages: The pages to include in the index.\n        recurse_depth: The depth to recurse into categories. -1 means unbounded recursion.\n        language_code: The language code of the wiki.\n        batch_size: The batch size for loading documents.\n\n    Raises:\n        ValueError: If `recurse_depth` is not an integer greater than or equal to -1.\n    \"\"\"\n\n    document_source_type: ClassVar[DocumentSource] = DocumentSource.MEDIAWIKI\n    \"\"\"DocumentSource type for all documents generated by instances of this class. Can be overridden for connectors\n    tailored for specific sites.\"\"\"\n\n    def __init__(\n        self,\n        hostname: str,\n        categories: list[str],\n        pages: list[str],\n        recurse_depth: int,\n        language_code: str = \"en\",\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        if recurse_depth < -1:\n            raise ValueError(\n                f\"recurse_depth must be an integer greater than or equal to -1. Got {recurse_depth} instead.\"\n            )\n        # -1 means infinite recursion, which `pywikibot` will only do with `True`\n        self.recurse_depth: bool | int = True if recurse_depth == -1 else recurse_depth\n\n        self.batch_size = batch_size\n\n        # short names can only have ascii letters and digits\n        self.family = family_class_dispatch(hostname, \"WikipediaConnector\")()\n        self.site = pywikibot.Site(fam=self.family, code=language_code)\n        self.categories = [\n            pywikibot.Category(\n                self.site,\n                (\n                    f\"{category.replace(' ', '_')}\"\n                    if category.startswith(\"Category:\")\n                    else f\"Category:{category.replace(' ', '_')}\"\n                ),\n            )\n            for category in categories\n        ]\n\n        self.pages = []\n        for page in pages:\n            if not page:\n                continue\n            self.pages.append(pywikibot.Page(self.site, page))\n\n    def load_credentials(\n        self,\n        credentials: dict[str, Any],  # noqa: ARG002\n    ) -> dict[str, Any] | None:\n        \"\"\"Load credentials for a MediaWiki site.\n\n        Note:\n            For most read-only operations, MediaWiki API credentials are not necessary.\n            This method can be overridden in the event that a particular MediaWiki site\n            requires credentials.\n        \"\"\"\n        return None\n\n    def _get_doc_batch(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Request batches of pages from a MediaWiki site.\n\n        Args:\n            start: The beginning of the time period of pages to request.\n            end: The end of the time period of pages to request.\n\n        Yields:\n            Lists of Documents containing each parsed page in a batch.\n        \"\"\"\n        doc_batch: list[Document | HierarchyNode] = []\n\n        # Pywikibot can handle batching for us, including only loading page contents when we finally request them.\n        category_pages = [\n            pagegenerators.PreloadingGenerator(\n                pagegenerators.EdittimeFilterPageGenerator(\n                    pagegenerators.CategorizedPageGenerator(\n                        category, recurse=self.recurse_depth\n                    ),\n                    last_edit_start=(\n                        datetime.datetime.fromtimestamp(start) if start else None\n                    ),\n                    last_edit_end=datetime.datetime.fromtimestamp(end) if end else None,\n                ),\n                groupsize=self.batch_size,\n            )\n            for category in self.categories\n        ]\n\n        # Since we can specify both individual pages and categories, we need to iterate over all of them.\n        all_pages: Iterator[pywikibot.Page] = itertools.chain(\n            self.pages, *category_pages\n        )\n        for page in all_pages:\n            logger.info(\n                f\"MediaWikiConnector: title='{page.title()}' url={page.full_url()}\"\n            )\n            doc_batch.append(\n                get_doc_from_page(page, self.site, self.document_source_type)\n            )\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"Load all documents from the source.\n\n        Returns:\n            A generator of documents.\n        \"\"\"\n        return self.poll_source(None, None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Poll the source for new documents.\n\n        Args:\n            start: The start of the time range to poll.\n            end: The end of the time range to poll.\n\n        Returns:\n            A generator of documents.\n        \"\"\"\n        return self._get_doc_batch(start, end)\n\n\nif __name__ == \"__main__\":\n    HOSTNAME = \"fallout.fandom.com\"\n    test_connector = MediaWikiConnector(\n        hostname=HOSTNAME,\n        categories=[\"Fallout:_New_Vegas_factions\"],\n        pages=[\"Fallout: New Vegas\"],\n        recurse_depth=1,\n    )\n\n    all_docs = list(test_connector.load_from_state())\n    print(\"All docs\", all_docs)\n    current = datetime.datetime.now().timestamp()\n    one_day_ago = current - 30 * 24 * 60 * 60  # 30 days\n\n    latest_docs = list(test_connector.poll_source(one_day_ago, current))\n\n    print(\"Latest docs\", latest_docs)\n"
  },
  {
    "path": "backend/onyx/connectors/microsoft_graph_env.py",
    "content": "\"\"\"Inverse mapping from user-facing Microsoft host URLs to the SDK's AzureEnvironment.\n\nThe office365 library's GraphClient requires an ``AzureEnvironment`` string\n(e.g. ``\"Global\"``, ``\"GCC High\"``) to route requests to the correct national\ncloud.  Our connectors instead expose free-text ``authority_host`` and\n``graph_api_host`` fields so the frontend doesn't need to know about SDK\ninternals.\n\nThis module bridges the gap: given the two host URLs the user configured, it\nresolves the matching ``AzureEnvironment`` value (and the implied SharePoint\ndomain suffix) so callers can pass ``environment=…`` to ``GraphClient``.\n\"\"\"\n\nfrom office365.graph_client import AzureEnvironment  # type: ignore[import-untyped]\nfrom pydantic import BaseModel\n\nfrom onyx.connectors.exceptions import ConnectorValidationError\n\n\nclass MicrosoftGraphEnvironment(BaseModel):\n    \"\"\"One row of the inverse mapping.\"\"\"\n\n    environment: str\n    graph_host: str\n    authority_host: str\n    sharepoint_domain_suffix: str\n\n\n_ENVIRONMENTS: list[MicrosoftGraphEnvironment] = [\n    MicrosoftGraphEnvironment(\n        environment=AzureEnvironment.Global,\n        graph_host=\"https://graph.microsoft.com\",\n        authority_host=\"https://login.microsoftonline.com\",\n        sharepoint_domain_suffix=\"sharepoint.com\",\n    ),\n    MicrosoftGraphEnvironment(\n        environment=AzureEnvironment.USGovernmentHigh,\n        graph_host=\"https://graph.microsoft.us\",\n        authority_host=\"https://login.microsoftonline.us\",\n        sharepoint_domain_suffix=\"sharepoint.us\",\n    ),\n    MicrosoftGraphEnvironment(\n        environment=AzureEnvironment.USGovernmentDoD,\n        graph_host=\"https://dod-graph.microsoft.us\",\n        authority_host=\"https://login.microsoftonline.us\",\n        sharepoint_domain_suffix=\"sharepoint.us\",\n    ),\n    MicrosoftGraphEnvironment(\n        environment=AzureEnvironment.China,\n        graph_host=\"https://microsoftgraph.chinacloudapi.cn\",\n        authority_host=\"https://login.chinacloudapi.cn\",\n        sharepoint_domain_suffix=\"sharepoint.cn\",\n    ),\n    MicrosoftGraphEnvironment(\n        environment=AzureEnvironment.Germany,\n        graph_host=\"https://graph.microsoft.de\",\n        authority_host=\"https://login.microsoftonline.de\",\n        sharepoint_domain_suffix=\"sharepoint.de\",\n    ),\n]\n\n_GRAPH_HOST_INDEX: dict[str, MicrosoftGraphEnvironment] = {\n    env.graph_host: env for env in _ENVIRONMENTS\n}\n\n\ndef resolve_microsoft_environment(\n    graph_api_host: str,\n    authority_host: str,\n) -> MicrosoftGraphEnvironment:\n    \"\"\"Return the ``MicrosoftGraphEnvironment`` that matches the supplied hosts.\n\n    Raises ``ConnectorValidationError`` when the combination is unknown or\n    internally inconsistent (e.g. a GCC-High graph host paired with a\n    commercial authority host).\n    \"\"\"\n    graph_api_host = graph_api_host.rstrip(\"/\")\n    authority_host = authority_host.rstrip(\"/\")\n\n    env = _GRAPH_HOST_INDEX.get(graph_api_host)\n    if env is None:\n        known = \", \".join(sorted(_GRAPH_HOST_INDEX))\n        raise ConnectorValidationError(\n            f\"Unsupported Microsoft Graph API host '{graph_api_host}'. Recognised hosts: {known}\"\n        )\n\n    if env.authority_host != authority_host:\n        raise ConnectorValidationError(\n            f\"Authority host '{authority_host}' is inconsistent with \"\n            f\"graph API host '{graph_api_host}'. \"\n            f\"Expected authority host '{env.authority_host}' \"\n            f\"for the {env.environment} environment.\"\n        )\n\n    return env\n"
  },
  {
    "path": "backend/onyx/connectors/mock_connector/connector.py",
    "content": "from typing import Any\n\nimport httpx\nfrom pydantic import BaseModel\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\nEXTERNAL_USER_EMAILS = {\"test@example.com\", \"admin@example.com\"}\nEXTERNAL_USER_GROUP_IDS = {\"mock-group-1\", \"mock-group-2\"}\n\n\nclass MockConnectorCheckpoint(ConnectorCheckpoint):\n    last_document_id: str | None = None\n\n\nclass SingleConnectorYield(BaseModel):\n    documents: list[Document]\n    checkpoint: MockConnectorCheckpoint\n    failures: list[ConnectorFailure]\n    unhandled_exception: str | None = None\n\n\nclass MockConnector(CheckpointedConnectorWithPermSync[MockConnectorCheckpoint]):\n    def __init__(\n        self,\n        mock_server_host: str,\n        mock_server_port: int,\n    ) -> None:\n        self.mock_server_host = mock_server_host\n        self.mock_server_port = mock_server_port\n        self.client = httpx.Client(timeout=30.0)\n\n        self.connector_yields: list[SingleConnectorYield] | None = None\n        self.current_yield_index: int = 0\n\n    def load_credentials(\n        self,\n        credentials: dict[str, Any],  # noqa: ARG002\n    ) -> dict[str, Any] | None:\n        response = self.client.get(self._get_mock_server_url(\"get-documents\"))\n        response.raise_for_status()\n        data = response.json()\n\n        self.connector_yields = [\n            SingleConnectorYield(**yield_data) for yield_data in data\n        ]\n        return None\n\n    def _get_mock_server_url(self, endpoint: str) -> str:\n        return f\"http://{self.mock_server_host}:{self.mock_server_port}/{endpoint}\"\n\n    def _save_checkpoint(self, checkpoint: MockConnectorCheckpoint) -> None:\n        response = self.client.post(\n            self._get_mock_server_url(\"add-checkpoint\"),\n            json=checkpoint.model_dump(mode=\"json\"),\n        )\n        response.raise_for_status()\n\n    def _load_from_checkpoint_common(\n        self,\n        start: SecondsSinceUnixEpoch,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch,  # noqa: ARG002\n        checkpoint: MockConnectorCheckpoint,\n        include_permissions: bool = False,\n    ) -> CheckpointOutput[MockConnectorCheckpoint]:\n        if self.connector_yields is None:\n            raise ValueError(\"No connector yields configured\")\n\n        # Save the checkpoint to the mock server\n        self._save_checkpoint(checkpoint)\n\n        yield_index = self.current_yield_index\n        self.current_yield_index += 1\n        current_yield = self.connector_yields[yield_index]\n\n        # If the current yield has an unhandled exception, raise it\n        # This is used to simulate an unhandled failure in the connector.\n        if current_yield.unhandled_exception:\n            raise RuntimeError(current_yield.unhandled_exception)\n\n        # yield all documents\n        for document in current_yield.documents:\n            # If permissions are requested and not already set, add mock permissions\n            if include_permissions and document.external_access is None:\n                # Add mock permissions - make documents accessible to specific users/groups\n                document.external_access = ExternalAccess(\n                    external_user_emails=EXTERNAL_USER_EMAILS,\n                    external_user_group_ids=EXTERNAL_USER_GROUP_IDS,\n                    is_public=False,\n                )\n            yield document\n\n        for failure in current_yield.failures:\n            yield failure\n\n        return current_yield.checkpoint\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: MockConnectorCheckpoint,\n    ) -> CheckpointOutput[MockConnectorCheckpoint]:\n        return self._load_from_checkpoint_common(\n            start, end, checkpoint, include_permissions=False\n        )\n\n    @override\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: MockConnectorCheckpoint,\n    ) -> CheckpointOutput[MockConnectorCheckpoint]:\n        return self._load_from_checkpoint_common(\n            start, end, checkpoint, include_permissions=True\n        )\n\n    @override\n    def build_dummy_checkpoint(self) -> MockConnectorCheckpoint:\n        return MockConnectorCheckpoint(\n            has_more=True,\n            last_document_id=None,\n        )\n\n    def validate_checkpoint_json(self, checkpoint_json: str) -> MockConnectorCheckpoint:\n        return MockConnectorCheckpoint.model_validate_json(checkpoint_json)\n"
  },
  {
    "path": "backend/onyx/connectors/models.py",
    "content": "import sys\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_validator\nfrom pydantic import model_validator\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import INDEX_SEPARATOR\nfrom onyx.configs.constants import RETURN_SEPARATOR\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.utils.text_processing import make_url_compatible\n\n\nclass InputType(str, Enum):\n    LOAD_STATE = \"load_state\"  # e.g. loading a current full state or a save state, such as from a file\n    POLL = \"poll\"  # e.g. calling an API to get all documents in the last hour\n    EVENT = \"event\"  # e.g. registered an endpoint as a listener, and processing connector events\n    SLIM_RETRIEVAL = \"slim_retrieval\"\n\n\nclass ConnectorMissingCredentialError(PermissionError):\n    def __init__(self, connector_name: str) -> None:\n        connector_name = connector_name or \"Unknown\"\n        super().__init__(\n            f\"{connector_name} connector missing credentials, was load_credentials called?\"\n        )\n\n\nclass Section(BaseModel):\n    \"\"\"Base section class with common attributes\"\"\"\n\n    link: str | None = None\n    text: str | None = None\n    image_file_id: str | None = None\n\n\nclass TextSection(Section):\n    \"\"\"Section containing text content\"\"\"\n\n    text: str\n\n    def __sizeof__(self) -> int:\n        return sys.getsizeof(self.text) + sys.getsizeof(self.link)\n\n\nclass ImageSection(Section):\n    \"\"\"Section containing an image reference\"\"\"\n\n    image_file_id: str\n\n    def __sizeof__(self) -> int:\n        return sys.getsizeof(self.image_file_id) + sys.getsizeof(self.link)\n\n\nclass BasicExpertInfo(BaseModel):\n    \"\"\"Basic Information for the owner of a document, any of the fields can be left as None\n    Display fallback goes as follows:\n    - first_name + (optional middle_initial) + last_name\n    - display_name\n    - email\n    - first_name\n    \"\"\"\n\n    display_name: str | None = None\n    first_name: str | None = None\n    middle_initial: str | None = None\n    last_name: str | None = None\n    email: str | None = None\n\n    def get_semantic_name(self) -> str:\n        if self.first_name and self.last_name:\n            name_parts = [self.first_name]\n            if self.middle_initial:\n                name_parts.append(self.middle_initial + \".\")\n            name_parts.append(self.last_name)\n            return \" \".join([name_part.capitalize() for name_part in name_parts])\n\n        if self.display_name:\n            return self.display_name\n\n        if self.email:\n            return self.email\n\n        if self.first_name:\n            return self.first_name.capitalize()\n\n        return \"Unknown\"\n\n    def get_email(self) -> str | None:\n        return self.email or None\n\n    def __eq__(self, other: Any) -> bool:\n        if not isinstance(other, BasicExpertInfo):\n            return False\n        return (\n            self.display_name,\n            self.first_name,\n            self.middle_initial,\n            self.last_name,\n            self.email,\n        ) == (\n            other.display_name,\n            other.first_name,\n            other.middle_initial,\n            other.last_name,\n            other.email,\n        )\n\n    def __hash__(self) -> int:\n        return hash(\n            (\n                self.display_name,\n                self.first_name,\n                self.middle_initial,\n                self.last_name,\n                self.email,\n            )\n        )\n\n    def __sizeof__(self) -> int:\n        size = sys.getsizeof(self.display_name)\n        size += sys.getsizeof(self.first_name)\n        size += sys.getsizeof(self.middle_initial)\n        size += sys.getsizeof(self.last_name)\n        size += sys.getsizeof(self.email)\n        return size\n\n    @classmethod\n    def from_dict(cls, model_dict: dict[str, Any]) -> \"BasicExpertInfo\":\n\n        first_name = cast(str, model_dict.get(\"FirstName\"))\n        last_name = cast(str, model_dict.get(\"LastName\"))\n        email = cast(str, model_dict.get(\"Email\"))\n        display_name = cast(str, model_dict.get(\"Name\"))\n\n        # Check if all fields are None\n        if (\n            first_name is None\n            and last_name is None\n            and email is None\n            and display_name is None\n        ):\n            raise ValueError(\"No identifying information found for user\")\n\n        return cls(\n            first_name=first_name,\n            last_name=last_name,\n            email=email,\n            display_name=display_name,\n        )\n\n\nclass DocumentBase(BaseModel):\n    \"\"\"Used for Onyx ingestion api, the ID is inferred before use if not provided\"\"\"\n\n    id: str | None = None\n    sections: list[TextSection | ImageSection]\n    source: DocumentSource | None = None\n    semantic_identifier: str  # displayed in the UI as the main identifier for the doc\n    # TODO(andrei): Ideally we could improve this to where each value is just a\n    # list of strings.\n    metadata: dict[str, str | list[str]]\n\n    @field_validator(\"metadata\", mode=\"before\")\n    @classmethod\n    def _coerce_metadata_values(cls, v: dict[str, Any]) -> dict[str, str | list[str]]:\n        return {\n            key: [str(item) for item in val] if isinstance(val, list) else str(val)\n            for key, val in v.items()\n        }\n\n    # UTC time\n    doc_updated_at: datetime | None = None\n    chunk_count: int | None = None\n\n    # Owner, creator, etc.\n    primary_owners: list[BasicExpertInfo] | None = None\n    # Assignee, space owner, etc.\n    secondary_owners: list[BasicExpertInfo] | None = None\n    # title is used for search whereas semantic_identifier is used for displaying in the UI\n    # different because Slack message may display as #general but general should not be part\n    # of the search, at least not in the same way as a document title should be for like Confluence\n    # The default title is semantic_identifier though unless otherwise specified\n    title: str | None = None\n    from_ingestion_api: bool = False\n    # Anything else that may be useful that is specific to this particular connector type that other\n    # parts of the code may need. If you're unsure, this can be left as None\n    additional_info: Any = None\n\n    # only filled in EE for connectors w/ permission sync enabled\n    external_access: ExternalAccess | None = None\n    doc_metadata: dict[str, Any] | None = None\n\n    # Parent hierarchy node raw ID - the folder/space/page containing this document\n    # If None, document's hierarchy position is unknown or connector doesn't support hierarchy\n    parent_hierarchy_raw_node_id: str | None = None\n\n    # Resolved database ID of the parent hierarchy node\n    # Set during docfetching after hierarchy nodes are cached\n    parent_hierarchy_node_id: int | None = None\n\n    def get_title_for_document_index(\n        self,\n    ) -> str | None:\n        # If title is explicitly empty, return a None here for embedding purposes\n        if self.title == \"\":\n            return None\n        replace_chars = set(RETURN_SEPARATOR)\n        title = self.semantic_identifier if self.title is None else self.title\n        for char in replace_chars:\n            title = title.replace(char, \" \")\n        title = title.strip()\n        return title\n\n    def get_metadata_str_attributes(self) -> list[str] | None:\n        if not self.metadata:\n            return None\n        # Combined string for the key/value for easy filtering\n        return convert_metadata_dict_to_list_of_strings(self.metadata)\n\n    def __sizeof__(self) -> int:\n        size = sys.getsizeof(self.id)\n        for section in self.sections:\n            size += sys.getsizeof(section)\n        size += sys.getsizeof(self.source)\n        size += sys.getsizeof(self.semantic_identifier)\n        size += sys.getsizeof(self.doc_updated_at)\n        size += sys.getsizeof(self.chunk_count)\n\n        if self.primary_owners is not None:\n            for primary_owner in self.primary_owners:\n                size += sys.getsizeof(primary_owner)\n        else:\n            size += sys.getsizeof(self.primary_owners)\n\n        if self.secondary_owners is not None:\n            for secondary_owner in self.secondary_owners:\n                size += sys.getsizeof(secondary_owner)\n        else:\n            size += sys.getsizeof(self.secondary_owners)\n\n        size += sys.getsizeof(self.title)\n        size += sys.getsizeof(self.from_ingestion_api)\n        size += sys.getsizeof(self.additional_info)\n        return size\n\n    def get_text_content(self) -> str:\n        return \" \".join([section.text for section in self.sections if section.text])\n\n\ndef convert_metadata_dict_to_list_of_strings(\n    metadata: dict[str, str | list[str]],\n) -> list[str]:\n    \"\"\"Converts a metadata dict to a list of strings.\n\n    Each string is a key-value pair separated by the INDEX_SEPARATOR. If a key\n    points to a list of values, each value generates a unique pair.\n\n    NOTE: Whatever formatting strategy is used here to generate a key-value\n    string must be replicated when constructing query filters.\n\n    Args:\n        metadata: The metadata dict to convert where values can be either a\n            string or a list of strings.\n\n    Returns:\n        A list of strings where each string is a key-value pair separated by the\n            INDEX_SEPARATOR.\n    \"\"\"\n    attributes: list[str] = []\n    for k, v in metadata.items():\n        if isinstance(v, list):\n            attributes.extend([k + INDEX_SEPARATOR + vi for vi in v])\n        else:\n            attributes.append(k + INDEX_SEPARATOR + v)\n    return attributes\n\n\ndef convert_metadata_list_of_strings_to_dict(\n    metadata_list: list[str],\n) -> dict[str, str | list[str]]:\n    \"\"\"\n    Converts a list of strings to a metadata dict. The inverse of\n    convert_metadata_dict_to_list_of_strings.\n\n    Assumes the input strings are formatted as in the output of\n    convert_metadata_dict_to_list_of_strings.\n\n    The schema of the output metadata dict is suboptimal yet bound to legacy\n    code. Ideally each key would just point to a list of strings, where each\n    list might contain just one element.\n\n    Args:\n        metadata_list: The list of strings to convert to a metadata dict.\n\n    Returns:\n        A metadata dict where values can be either a string or a list of\n            strings.\n    \"\"\"\n    metadata: dict[str, str | list[str]] = {}\n    for item in metadata_list:\n        key, value = item.split(INDEX_SEPARATOR, 1)\n        if key in metadata:\n            # We have already seen this key therefore it must point to a list.\n            if isinstance(metadata[key], list):\n                cast(list[str], metadata[key]).append(value)\n            else:\n                metadata[key] = [cast(str, metadata[key]), value]\n        else:\n            metadata[key] = value\n    return metadata\n\n\nclass Document(DocumentBase):\n    \"\"\"Used for Onyx ingestion api, the ID is required\"\"\"\n\n    id: str\n    source: DocumentSource\n\n    def to_short_descriptor(self) -> str:\n        \"\"\"Used when logging the identity of a document\"\"\"\n        return f\"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'\"\n\n    @classmethod\n    def from_base(cls, base: DocumentBase) -> \"Document\":\n        return cls(\n            id=(\n                make_url_compatible(base.id)\n                if base.id\n                else \"ingestion_api_\" + make_url_compatible(base.semantic_identifier)\n            ),\n            sections=base.sections,\n            source=base.source or DocumentSource.INGESTION_API,\n            semantic_identifier=base.semantic_identifier,\n            metadata=base.metadata,\n            doc_updated_at=base.doc_updated_at,\n            primary_owners=base.primary_owners,\n            secondary_owners=base.secondary_owners,\n            title=base.title,\n            from_ingestion_api=base.from_ingestion_api,\n        )\n\n    def __sizeof__(self) -> int:\n        size = super().__sizeof__()\n        size += sys.getsizeof(self.id)\n        size += sys.getsizeof(self.source)\n        return size\n\n\nclass IndexingDocument(Document):\n    \"\"\"Document with processed sections for indexing\"\"\"\n\n    processed_sections: list[Section] = []\n\n    def get_total_char_length(self) -> int:\n        \"\"\"Get the total character length of the document including processed sections\"\"\"\n        title_len = len(self.title or self.semantic_identifier)\n\n        # Use processed_sections if available, otherwise fall back to original sections\n        if self.processed_sections:\n            section_len = sum(\n                len(section.text) if section.text is not None else 0\n                for section in self.processed_sections\n            )\n        else:\n            section_len = sum(\n                (\n                    len(section.text)\n                    if isinstance(section, TextSection) and section.text is not None\n                    else 0\n                )\n                for section in self.sections\n            )\n\n        return title_len + section_len\n\n\nclass SlimDocument(BaseModel):\n    id: str\n    external_access: ExternalAccess | None = None\n    parent_hierarchy_raw_node_id: str | None = None\n\n\nclass HierarchyNode(BaseModel):\n    \"\"\"\n    Hierarchy node yielded by connectors.\n\n    This is the Pydantic model used by connectors, distinct from the\n    SQLAlchemy HierarchyNode model in db/models.py. The connector runner\n    layer converts this to the DB model when persisting to Postgres.\n    \"\"\"\n\n    # Raw identifier from the source system\n    # e.g., \"1h7uWUR2BYZjtMfEXFt43tauj-Gp36DTPtwnsNuA665I\" for Google Drive\n    raw_node_id: str\n\n    # Raw ID of parent node, or None for SOURCE-level children (direct children of the source root)\n    raw_parent_id: str | None = None\n\n    # Human-readable name for display\n    display_name: str\n\n    # Link to view this node in the source system\n    link: str | None = None\n\n    # What kind of structural node this is (folder, space, page, etc.)\n    node_type: HierarchyNodeType\n\n    # If this hierarchy node represents a document (e.g., Confluence page),\n    # The db model stores that doc's document_id. This gets set during docprocessing\n    # after the document row is created. Matching is done by raw_node_id matching document.id.\n    # so, we don't allow connectors to specify this as it would be unused\n    # document_id: str | None = None\n\n    # External access information for the node\n    external_access: ExternalAccess | None = None\n\n\nclass IndexAttemptMetadata(BaseModel):\n    connector_id: int\n    credential_id: int\n    batch_num: int | None = None\n    attempt_id: int | None = None\n    request_id: str | None = None\n\n    # Work in progress: will likely contain metadata about cc pair / index attempt\n    structured_id: str | None = None\n\n\nclass ConnectorCheckpoint(BaseModel):\n    # TODO: maybe move this to something disk-based to handle extremely large checkpoints?\n    has_more: bool\n\n    def __str__(self) -> str:\n        \"\"\"String representation of the checkpoint, with truncation for large checkpoint content.\"\"\"\n        MAX_CHECKPOINT_CONTENT_CHARS = 1000\n\n        content_str = self.model_dump_json()\n        if len(content_str) > MAX_CHECKPOINT_CONTENT_CHARS:\n            content_str = content_str[: MAX_CHECKPOINT_CONTENT_CHARS - 3] + \"...\"\n        return content_str\n\n\nclass DocumentFailure(BaseModel):\n    document_id: str\n    document_link: str | None = None\n\n\nclass EntityFailure(BaseModel):\n    entity_id: str\n    missed_time_range: tuple[datetime, datetime] | None = None\n\n\nclass ConnectorFailure(BaseModel):\n    failed_document: DocumentFailure | None = None\n    failed_entity: EntityFailure | None = None\n    failure_message: str\n    exception: Exception | None = Field(default=None, exclude=True)\n\n    model_config = {\"arbitrary_types_allowed\": True}\n\n    @model_validator(mode=\"before\")\n    def check_failed_fields(cls, values: dict) -> dict:\n        failed_document = values.get(\"failed_document\")\n        failed_entity = values.get(\"failed_entity\")\n        if (failed_document is None and failed_entity is None) or (\n            failed_document is not None and failed_entity is not None\n        ):\n            raise ValueError(\n                \"Exactly one of 'failed_document' or 'failed_entity' must be specified.\"\n            )\n        return values\n\n\nclass ConnectorStopSignal(Exception):\n    \"\"\"A custom exception used to signal a stop in processing.\"\"\"\n\n\nclass OnyxMetadata(BaseModel):\n    # Careful overriding the document_id, may cause visual issues in the UI.\n    # Kept here for API based use cases mostly\n    document_id: str | None = None\n    source_type: DocumentSource | None = None\n    link: str | None = None\n    file_display_name: str | None = None\n    primary_owners: list[BasicExpertInfo] | None = None\n    secondary_owners: list[BasicExpertInfo] | None = None\n    doc_updated_at: datetime | None = None\n    title: str | None = None\n\n\nclass DocExtractionContext(BaseModel):\n    index_name: str\n    cc_pair_id: int\n    connector_id: int\n    credential_id: int\n    source: DocumentSource\n    earliest_index_time: float\n    from_beginning: bool\n    is_primary: bool\n    should_fetch_permissions_during_indexing: bool\n    search_settings_status: IndexModelStatus\n    doc_extraction_complete_batch_num: int | None\n\n\nclass DocIndexingContext(BaseModel):\n    batches_done: int\n    total_failures: int\n    net_doc_change: int\n    total_chunks: int\n"
  },
  {
    "path": "backend/onyx/connectors/notion/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/notion/connector.py",
    "content": "import re\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom typing import Optional\nfrom urllib.parse import parse_qs\nfrom urllib.parse import urlparse\n\nimport requests\nfrom pydantic import BaseModel\nfrom retry import retry\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rl_requests,\n)\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import NormalizationResult\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.utils.batching import batch_generator\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_NOTION_PAGE_SIZE = 100\n_NOTION_CALL_TIMEOUT = 30  # 30 seconds\n_MAX_PAGES = 1000\n\n\n# TODO: Tables need to be ingested, Pages need to have their metadata ingested\n\n\nclass NotionPage(BaseModel):\n    \"\"\"Represents a Notion Page object\"\"\"\n\n    id: str\n    created_time: str\n    last_edited_time: str\n    in_trash: bool\n    properties: dict[str, Any]\n    url: str\n\n    database_name: str | None = None  # Only applicable to the database type page (wiki)\n    parent: dict[str, Any] | None = (\n        None  # Raw parent object from API for hierarchy tracking\n    )\n\n\nclass NotionDataSource(BaseModel):\n    \"\"\"Represents a Notion Data Source within a database.\"\"\"\n\n    id: str\n    name: str = \"\"\n\n\nclass NotionBlock(BaseModel):\n    \"\"\"Represents a Notion Block object\"\"\"\n\n    id: str  # Used for the URL\n    text: str\n    # In a plaintext representation of the page, how this block should be joined\n    # with the existing text up to this point, separated out from text for clarity\n    prefix: str\n\n\nclass NotionSearchResponse(BaseModel):\n    \"\"\"Represents the response from the Notion Search API\"\"\"\n\n    results: list[dict[str, Any]]\n    next_cursor: Optional[str]\n    has_more: bool = False\n\n\nclass BlockReadOutput(BaseModel):\n    \"\"\"Output from reading blocks of a page.\"\"\"\n\n    blocks: list[NotionBlock]\n    child_page_ids: list[str]\n    hierarchy_nodes: list[HierarchyNode]\n\n\nclass NotionConnector(LoadConnector, PollConnector):\n    \"\"\"Notion Page connector that reads all Notion pages\n    this integration has been granted access to.\n\n    Arguments:\n        batch_size (int): Number of objects to index in a batch\n    \"\"\"\n\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        recursive_index_enabled: bool = not NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP,\n        root_page_id: str | None = None,\n    ) -> None:\n        \"\"\"Initialize with parameters.\"\"\"\n        self.batch_size = batch_size\n        self.headers = {\n            \"Content-Type\": \"application/json\",\n            \"Notion-Version\": \"2026-03-11\",\n        }\n        self.indexed_pages: set[str] = set()\n        self.root_page_id = root_page_id\n        # if enabled, will recursively index child pages as they are found rather\n        # relying entirely on the `search` API. We have received reports that the\n        # `search` API misses many pages - in those cases, this might need to be\n        # turned on. It's not currently known why/when this is required.\n        # NOTE: this also removes all benefits polling, since we need to traverse\n        # all pages regardless of if they are updated. If the notion workspace is\n        # very large, this may not be practical.\n        self.recursive_index_enabled = recursive_index_enabled or self.root_page_id\n\n        # Hierarchy tracking state\n        self.seen_hierarchy_node_raw_ids: set[str] = set()\n        self.workspace_id: str | None = None\n        self.workspace_name: str | None = None\n        # Maps child page IDs to their containing page ID (discovered in _read_blocks).\n        # Used to resolve block_id parent types to the actual containing page.\n        self._child_page_parent_map: dict[str, str] = {}\n        # Maps data_source_id -> database_id (populated in _read_pages_from_database).\n        # Used to resolve data_source_id parent types back to the database.\n        self._data_source_to_database_map: dict[str, str] = {}\n\n    @classmethod\n    @override\n    def normalize_url(cls, url: str) -> NormalizationResult:\n        \"\"\"Normalize a Notion URL to extract the page ID (UUID format).\"\"\"\n        parsed = urlparse(url)\n        netloc = parsed.netloc.lower()\n\n        if not (\"notion.so\" in netloc or \"notion.site\" in netloc):\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Extract page ID from path (format: \"Title-PageID\")\n        path_last = parsed.path.split(\"/\")[-1]\n        candidate = path_last.split(\"-\")[-1] if \"-\" in path_last else path_last\n\n        # Clean and format as UUID\n        candidate = re.sub(r\"[^0-9a-fA-F-]\", \"\", candidate)\n        cleaned = candidate.replace(\"-\", \"\")\n\n        if len(cleaned) == 32 and re.fullmatch(r\"[0-9a-fA-F]{32}\", cleaned):\n            normalized_uuid = (\n                f\"{cleaned[0:8]}-{cleaned[8:12]}-{cleaned[12:16]}-{cleaned[16:20]}-{cleaned[20:]}\"\n            ).lower()\n            return NormalizationResult(\n                normalized_url=normalized_uuid, use_default=False\n            )\n\n        # Try query params\n        params = parse_qs(parsed.query)\n        for key in (\"p\", \"page_id\"):\n            if key in params and params[key]:\n                candidate = params[key][0].replace(\"-\", \"\")\n                if len(candidate) == 32 and re.fullmatch(r\"[0-9a-fA-F]{32}\", candidate):\n                    normalized_uuid = (\n                        f\"{candidate[0:8]}-{candidate[8:12]}-{candidate[12:16]}-{candidate[16:20]}-{candidate[20:]}\"\n                    ).lower()\n                    return NormalizationResult(\n                        normalized_url=normalized_uuid, use_default=False\n                    )\n\n        return NormalizationResult(normalized_url=None, use_default=False)\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_child_blocks(\n        self, block_id: str, cursor: str | None = None\n    ) -> dict[str, Any] | None:\n        \"\"\"Fetch all child blocks via the Notion API.\"\"\"\n        logger.debug(f\"Fetching children of block with ID '{block_id}'\")\n        block_url = f\"https://api.notion.com/v1/blocks/{block_id}/children\"\n        query_params = None if not cursor else {\"start_cursor\": cursor}\n        res = rl_requests.get(\n            block_url,\n            headers=self.headers,\n            params=query_params,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        try:\n            res.raise_for_status()\n        except Exception as e:\n            if res.status_code == 404:\n                # this happens when a page is not shared with the integration\n                # in this case, we should just ignore the page\n                logger.error(\n                    f\"Unable to access block with ID '{block_id}'. \"\n                    f\"This is likely due to the block not being shared \"\n                    f\"with the Onyx integration. Exact exception:\\n\\n{e}\"\n                )\n            else:\n                logger.exception(\n                    f\"Error fetching blocks with status code {res.status_code}: {res.json()}\"\n                )\n\n            # This can occasionally happen, the reason is unknown and cannot be reproduced on our internal Notion\n            # Assuming this will not be a critical loss of data\n            return None\n        return res.json()\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_page(self, page_id: str) -> NotionPage:\n        \"\"\"Fetch a page from its ID via the Notion API, retry with database if page fetch fails.\"\"\"\n        logger.debug(f\"Fetching page for ID '{page_id}'\")\n        page_url = f\"https://api.notion.com/v1/pages/{page_id}\"\n        res = rl_requests.get(\n            page_url,\n            headers=self.headers,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        try:\n            res.raise_for_status()\n        except Exception as e:\n            logger.warning(\n                f\"Failed to fetch page, trying database for ID '{page_id}'. Exception: {e}\"\n            )\n            # Try fetching as a database if page fetch fails, this happens if the page is set to a wiki\n            # it becomes a database from the notion perspective\n            return self._fetch_database_as_page(page_id)\n        return NotionPage(**res.json())\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_database_as_page(self, database_id: str) -> NotionPage:\n        \"\"\"Attempt to fetch a database as a page.\n\n        Note: As of API 2025-09-03, database objects no longer include\n        `properties` (schema moved to individual data sources).\n        \"\"\"\n        logger.debug(f\"Fetching database for ID '{database_id}' as a page\")\n        database_url = f\"https://api.notion.com/v1/databases/{database_id}\"\n        res = rl_requests.get(\n            database_url,\n            headers=self.headers,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        try:\n            res.raise_for_status()\n        except Exception as e:\n            logger.exception(f\"Error fetching database as page - {res.json()}\")\n            raise e\n        db_data = res.json()\n        database_name = db_data.get(\"title\")\n        database_name = (\n            database_name[0].get(\"text\", {}).get(\"content\") if database_name else None\n        )\n\n        db_data.setdefault(\"properties\", {})\n\n        return NotionPage(**db_data, database_name=database_name)\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_data_sources_for_database(\n        self, database_id: str\n    ) -> list[NotionDataSource]:\n        \"\"\"Fetch the list of data sources for a database.\"\"\"\n        logger.debug(f\"Fetching data sources for database '{database_id}'\")\n        res = rl_requests.get(\n            f\"https://api.notion.com/v1/databases/{database_id}\",\n            headers=self.headers,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        try:\n            res.raise_for_status()\n        except Exception as e:\n            if res.status_code in (403, 404):\n                logger.error(\n                    f\"Unable to access database with ID '{database_id}'. \"\n                    f\"This is likely due to the database not being shared \"\n                    f\"with the Onyx integration. Exact exception:\\n{e}\"\n                )\n                return []\n            logger.exception(f\"Error fetching database - {res.json()}\")\n            raise e\n\n        db_data = res.json()\n        data_sources = db_data.get(\"data_sources\", [])\n        return [\n            NotionDataSource(id=ds[\"id\"], name=ds.get(\"name\", \"\"))\n            for ds in data_sources\n            if ds.get(\"id\")\n        ]\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_data_source(\n        self, data_source_id: str, cursor: str | None = None\n    ) -> dict[str, Any]:\n        \"\"\"Query a data source via POST /v1/data_sources/{id}/query.\"\"\"\n        logger.debug(f\"Querying data source '{data_source_id}'\")\n        url = f\"https://api.notion.com/v1/data_sources/{data_source_id}/query\"\n        body = None if not cursor else {\"start_cursor\": cursor}\n        res = rl_requests.post(\n            url,\n            headers=self.headers,\n            json=body,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        try:\n            res.raise_for_status()\n        except Exception as e:\n            if res.status_code in (403, 404):\n                logger.error(\n                    f\"Unable to access data source with ID '{data_source_id}'. \"\n                    f\"This is likely due to it not being shared \"\n                    f\"with the Onyx integration. Exact exception:\\n{e}\"\n                )\n                return {\"results\": [], \"next_cursor\": None}\n            logger.exception(f\"Error querying data source - {res.json()}\")\n            raise e\n        return res.json()\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _fetch_workspace_info(self) -> tuple[str, str]:\n        \"\"\"Fetch workspace ID and name from the bot user endpoint.\"\"\"\n        res = rl_requests.get(\n            \"https://api.notion.com/v1/users/me\",\n            headers=self.headers,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        res.raise_for_status()\n        data = res.json()\n        bot = data.get(\"bot\", {})\n        # workspace_id may be in bot object, fallback to user id\n        workspace_id = bot.get(\"workspace_id\", data.get(\"id\"))\n        workspace_name = bot.get(\"workspace_name\", \"Notion Workspace\")\n        return workspace_id, workspace_name\n\n    def _get_workspace_hierarchy_node(self) -> HierarchyNode | None:\n        \"\"\"Get the workspace hierarchy node, fetching workspace info if needed.\n\n        Returns None if the workspace node has already been yielded.\n        \"\"\"\n        if self.workspace_id is None:\n            self.workspace_id, self.workspace_name = self._fetch_workspace_info()\n\n        if self.workspace_id in self.seen_hierarchy_node_raw_ids:\n            return None\n\n        self.seen_hierarchy_node_raw_ids.add(self.workspace_id)\n        return HierarchyNode(\n            raw_node_id=self.workspace_id,\n            raw_parent_id=None,  # Parent is SOURCE (auto-created by system)\n            display_name=self.workspace_name or \"Notion Workspace\",\n            link=f\"https://notion.so/{self.workspace_id.replace('-', '')}\",\n            node_type=HierarchyNodeType.WORKSPACE,\n        )\n\n    def _get_parent_raw_id(\n        self, parent: dict[str, Any] | None, page_id: str | None = None\n    ) -> str | None:\n        \"\"\"Get the parent raw ID for hierarchy tracking.\n\n        Returns workspace_id for top-level pages, or the direct parent ID for nested pages.\n\n        Args:\n            parent: The parent object from the Notion API\n            page_id: The page's own ID, used to look up block_id parents in our cache\n        \"\"\"\n        if not parent:\n            return self.workspace_id  # Default to workspace if no parent info\n\n        parent_type = parent.get(\"type\")\n\n        if parent_type == \"workspace\":\n            return self.workspace_id\n        elif parent_type == \"block_id\":\n            # Inline page in a block - resolve to the containing page if we discovered it\n            if page_id and page_id in self._child_page_parent_map:\n                return self._child_page_parent_map[page_id]\n            # Fallback to workspace if we don't know the parent\n            return self.workspace_id\n        elif parent_type == \"data_source_id\":\n            ds_id = parent.get(\"data_source_id\")\n            if ds_id:\n                return self._data_source_to_database_map.get(ds_id, self.workspace_id)\n        elif parent_type in [\"page_id\", \"database_id\"]:\n            return parent.get(parent_type)\n\n        return self.workspace_id\n\n    def _maybe_yield_hierarchy_node(\n        self,\n        raw_node_id: str,\n        raw_parent_id: str | None,\n        display_name: str,\n        link: str | None,\n        node_type: HierarchyNodeType,\n    ) -> HierarchyNode | None:\n        \"\"\"Create and return a hierarchy node if not already yielded.\n\n        Args:\n            raw_node_id: The raw ID of the node\n            raw_parent_id: The raw ID of the parent node\n            display_name: Human-readable name\n            link: URL to the node in Notion\n            node_type: Type of hierarchy node\n\n        Returns:\n            HierarchyNode if new, None if already yielded\n        \"\"\"\n        if raw_node_id in self.seen_hierarchy_node_raw_ids:\n            return None\n        self.seen_hierarchy_node_raw_ids.add(raw_node_id)\n        return HierarchyNode(\n            raw_node_id=raw_node_id,\n            raw_parent_id=raw_parent_id,\n            display_name=display_name,\n            link=link,\n            node_type=node_type,\n        )\n\n    @staticmethod\n    def _properties_to_str(properties: dict[str, Any]) -> str:\n        \"\"\"Converts Notion properties to a string\"\"\"\n\n        def _recurse_list_properties(inner_list: list[Any]) -> str | None:\n            list_properties: list[str | None] = []\n            for item in inner_list:\n                if item and isinstance(item, dict):\n                    list_properties.append(_recurse_properties(item))\n                elif item and isinstance(item, list):\n                    list_properties.append(_recurse_list_properties(item))\n                else:\n                    list_properties.append(str(item))\n            return (\n                \", \".join(\n                    [\n                        list_property\n                        for list_property in list_properties\n                        if list_property\n                    ]\n                )\n                or None\n            )\n\n        def _recurse_properties(inner_dict: dict[str, Any]) -> str | None:\n            sub_inner_dict: dict[str, Any] | list[Any] | str = inner_dict\n            while isinstance(sub_inner_dict, dict) and \"type\" in sub_inner_dict:\n                type_name = sub_inner_dict[\"type\"]\n                sub_inner_dict = sub_inner_dict[type_name]\n\n                # If the innermost layer is None, the value is not set\n                if not sub_inner_dict:\n                    return None\n\n            # TODO there may be more types to handle here\n            if isinstance(sub_inner_dict, list):\n                return _recurse_list_properties(sub_inner_dict)\n            elif isinstance(sub_inner_dict, str):\n                # For some objects the innermost value could just be a string, not sure what causes this\n                return sub_inner_dict\n            elif isinstance(sub_inner_dict, dict):\n                if \"name\" in sub_inner_dict:\n                    return sub_inner_dict[\"name\"]\n                if \"content\" in sub_inner_dict:\n                    return sub_inner_dict[\"content\"]\n                start = sub_inner_dict.get(\"start\")\n                end = sub_inner_dict.get(\"end\")\n                if start is not None:\n                    if end is not None:\n                        return f\"{start} - {end}\"\n                    return start\n                elif end is not None:\n                    return f\"Until {end}\"\n\n                if \"id\" in sub_inner_dict:\n                    # This is not useful to index, it's a reference to another Notion object\n                    # and this ID value in plaintext is useless outside of the Notion context\n                    logger.debug(\"Skipping Notion object id field property\")\n                    return None\n\n            logger.debug(f\"Unreadable property from innermost prop: {sub_inner_dict}\")\n            return None\n\n        result = \"\"\n        for prop_name, prop in properties.items():\n            if not prop or not isinstance(prop, dict):\n                continue\n\n            try:\n                inner_value = _recurse_properties(prop)\n            except Exception as e:\n                # This is not a critical failure, these properties are not the actual contents of the page\n                # more similar to metadata\n                logger.warning(f\"Error recursing properties for {prop_name}: {e}\")\n                continue\n            # Not a perfect way to format Notion database tables but there's no perfect representation\n            # since this must be represented as plaintext\n            if inner_value:\n                result += f\"{prop_name}: {inner_value}\\t\"\n\n        return result\n\n    def _read_pages_from_database(\n        self,\n        database_id: str,\n        database_parent_raw_id: str | None = None,\n        database_name: str | None = None,\n    ) -> BlockReadOutput:\n        \"\"\"Returns blocks, page IDs, and hierarchy nodes from a database.\n\n        Args:\n            database_id: The ID of the database\n            database_parent_raw_id: The raw ID of the database's parent (containing page or workspace)\n            database_name: The name of the database (from child_database block title)\n        \"\"\"\n        result_blocks: list[NotionBlock] = []\n        result_pages: list[str] = []\n        hierarchy_nodes: list[HierarchyNode] = []\n\n        # Create hierarchy node for this database if not already yielded.\n        # Notion URLs omit dashes from UUIDs: https://notion.so/17ab3186873d418fb899c3f6a43f68de\n        db_node = self._maybe_yield_hierarchy_node(\n            raw_node_id=database_id,\n            raw_parent_id=database_parent_raw_id or self.workspace_id,\n            display_name=database_name or f\"Database {database_id}\",\n            link=f\"https://notion.so/{database_id.replace('-', '')}\",\n            node_type=HierarchyNodeType.DATABASE,\n        )\n        if db_node:\n            hierarchy_nodes.append(db_node)\n\n        # Discover all data sources under this database, then query each one.\n        # Even legacy single-source databases have one entry in the array.\n        data_sources = self._fetch_data_sources_for_database(database_id)\n        if not data_sources:\n            logger.warning(\n                f\"Database '{database_id}' returned zero data sources — \"\n                f\"no pages will be indexed from this database.\"\n            )\n        for ds in data_sources:\n            self._data_source_to_database_map[ds.id] = database_id\n            cursor = None\n            while True:\n                data = self._fetch_data_source(ds.id, cursor)\n\n                for result in data[\"results\"]:\n                    obj_id = result[\"id\"]\n                    obj_type = result[\"object\"]\n                    text = self._properties_to_str(result.get(\"properties\", {}))\n                    if text:\n                        result_blocks.append(\n                            NotionBlock(id=obj_id, text=text, prefix=\"\\n\")\n                        )\n\n                    if not self.recursive_index_enabled:\n                        continue\n\n                    if obj_type == \"page\":\n                        logger.debug(\n                            f\"Found page with ID '{obj_id}' in database '{database_id}'\"\n                        )\n                        result_pages.append(result[\"id\"])\n                    elif obj_type == \"database\":\n                        logger.debug(\n                            f\"Found database with ID '{obj_id}' in database '{database_id}'\"\n                        )\n                        nested_db_title = result.get(\"title\", [])\n                        nested_db_name = None\n                        if nested_db_title and len(nested_db_title) > 0:\n                            nested_db_name = (\n                                nested_db_title[0].get(\"text\", {}).get(\"content\")\n                            )\n                        nested_output = self._read_pages_from_database(\n                            obj_id,\n                            database_parent_raw_id=database_id,\n                            database_name=nested_db_name,\n                        )\n                        result_pages.extend(nested_output.child_page_ids)\n                        hierarchy_nodes.extend(nested_output.hierarchy_nodes)\n\n                if data[\"next_cursor\"] is None:\n                    break\n\n                cursor = data[\"next_cursor\"]\n\n        return BlockReadOutput(\n            blocks=result_blocks,\n            child_page_ids=result_pages,\n            hierarchy_nodes=hierarchy_nodes,\n        )\n\n    def _read_blocks(\n        self, base_block_id: str, containing_page_id: str | None = None\n    ) -> BlockReadOutput:\n        \"\"\"Reads all child blocks for the specified block.\n\n        Args:\n            base_block_id: The block ID to read children from\n            containing_page_id: The ID of the page that contains this block tree.\n                Used to correctly map child pages/databases to their parent page\n                rather than intermediate block IDs.\n        \"\"\"\n        # If no containing_page_id provided, assume base_block_id is the page itself\n        page_id = containing_page_id or base_block_id\n        result_blocks: list[NotionBlock] = []\n        child_pages: list[str] = []\n        hierarchy_nodes: list[HierarchyNode] = []\n        cursor = None\n        while True:\n            data = self._fetch_child_blocks(base_block_id, cursor)\n\n            # this happens when a block is not shared with the integration\n            if data is None:\n                return BlockReadOutput(\n                    blocks=result_blocks,\n                    child_page_ids=child_pages,\n                    hierarchy_nodes=hierarchy_nodes,\n                )\n\n            for result in data[\"results\"]:\n                logger.debug(\n                    f\"Found child block for block with ID '{base_block_id}': {result}\"\n                )\n                result_block_id = result[\"id\"]\n                result_type = result[\"type\"]\n                result_obj = result[result_type]\n\n                if result_type == \"ai_block\":\n                    logger.warning(\n                        f\"Skipping 'ai_block' ('{result_block_id}') for base block '{base_block_id}': \"\n                        f\"Notion API does not currently support reading AI blocks (as of 24/02/09) \"\n                        f\"(discussion: https://github.com/onyx-dot-app/onyx/issues/1053)\"\n                    )\n                    continue\n\n                if result_type == \"unsupported\":\n                    logger.warning(\n                        f\"Skipping unsupported block type '{result_type}' \"\n                        f\"('{result_block_id}') for base block '{base_block_id}': \"\n                        f\"(discussion: https://github.com/onyx-dot-app/onyx/issues/1230)\"\n                    )\n                    continue\n\n                if result_type == \"external_object_instance_page\":\n                    logger.warning(\n                        f\"Skipping 'external_object_instance_page' ('{result_block_id}') for base block '{base_block_id}': \"\n                        f\"Notion API does not currently support reading external blocks (as of 24/07/03) \"\n                        f\"(discussion: https://github.com/onyx-dot-app/onyx/issues/1761)\"\n                    )\n                    continue\n\n                cur_result_text_arr = []\n                if \"rich_text\" in result_obj:\n                    for rich_text in result_obj[\"rich_text\"]:\n                        # skip if doesn't have text object\n                        if \"text\" in rich_text:\n                            text = rich_text[\"text\"][\"content\"]\n                            cur_result_text_arr.append(text)\n\n                if result[\"has_children\"]:\n                    if result_type == \"child_page\":\n                        # Child pages will not be included at this top level, it will be a separate document.\n                        # Track parent page so we can resolve block_id parents later.\n                        # Use page_id (not base_block_id) to ensure we map to the containing page,\n                        # not an intermediate block like a toggle or callout.\n                        child_pages.append(result_block_id)\n                        self._child_page_parent_map[result_block_id] = page_id\n                    else:\n                        logger.debug(f\"Entering sub-block: {result_block_id}\")\n                        sub_output = self._read_blocks(result_block_id, page_id)\n                        logger.debug(f\"Finished sub-block: {result_block_id}\")\n                        result_blocks.extend(sub_output.blocks)\n                        child_pages.extend(sub_output.child_page_ids)\n                        hierarchy_nodes.extend(sub_output.hierarchy_nodes)\n\n                if result_type == \"child_database\":\n                    # Extract database name from the child_database block\n                    db_title = result_obj.get(\"title\", \"\")\n                    db_output = self._read_pages_from_database(\n                        result_block_id,\n                        database_parent_raw_id=page_id,  # Parent is the containing page\n                        database_name=db_title or None,\n                    )\n                    # A database on a page often looks like a table, we need to include it for the contents\n                    # of the page but the children (cells) should be processed as other Documents\n                    result_blocks.extend(db_output.blocks)\n                    hierarchy_nodes.extend(db_output.hierarchy_nodes)\n\n                    if self.recursive_index_enabled:\n                        child_pages.extend(db_output.child_page_ids)\n\n                if cur_result_text_arr:\n                    new_block = NotionBlock(\n                        id=result_block_id,\n                        text=\"\\n\".join(cur_result_text_arr),\n                        prefix=\"\\n\",\n                    )\n                    result_blocks.append(new_block)\n\n            if data[\"next_cursor\"] is None:\n                break\n\n            cursor = data[\"next_cursor\"]\n\n        return BlockReadOutput(\n            blocks=result_blocks,\n            child_page_ids=child_pages,\n            hierarchy_nodes=hierarchy_nodes,\n        )\n\n    def _read_page_title(self, page: NotionPage) -> str | None:\n        \"\"\"Extracts the title from a Notion page\"\"\"\n        page_title = None\n        if hasattr(page, \"database_name\") and page.database_name:\n            return page.database_name\n        for _, prop in page.properties.items():\n            if prop[\"type\"] == \"title\" and len(prop[\"title\"]) > 0:\n                page_title = \" \".join([t[\"plain_text\"] for t in prop[\"title\"]]).strip()\n                break\n\n        return page_title\n\n    def _read_pages(\n        self,\n        pages: list[NotionPage],\n    ) -> Generator[Document | HierarchyNode, None, None]:\n        \"\"\"Reads pages for rich text content and generates Documents and HierarchyNodes\n\n        Note that a page which is turned into a \"wiki\" becomes a database but both top level pages and top level databases\n        do not seem to have any properties associated with them.\n\n        Pages that are part of a database can have properties which are like the values of the row in the \"database\" table\n        in which they exist\n\n        This is not clearly outlined in the Notion API docs but it is observable empirically.\n        https://developers.notion.com/docs/working-with-page-content\n        \"\"\"\n        all_child_page_ids: list[str] = []\n        for page in pages:\n            if page.id in self.indexed_pages:\n                logger.debug(f\"Already indexed page with ID '{page.id}'. Skipping.\")\n                continue\n\n            logger.info(f\"Reading page with ID '{page.id}', with url {page.url}\")\n            block_output = self._read_blocks(page.id)\n            all_child_page_ids.extend(block_output.child_page_ids)\n\n            # okay to mark here since there's no way for this to not succeed\n            # without a critical failure\n            self.indexed_pages.add(page.id)\n\n            raw_page_title = self._read_page_title(page)\n            page_title = raw_page_title or f\"Untitled Page with ID {page.id}\"\n            parent_raw_id = self._get_parent_raw_id(page.parent, page_id=page.id)\n\n            # If this page has children (pages or databases), yield it as a hierarchy node FIRST\n            # This ensures parent nodes are created before child documents reference them\n            if block_output.child_page_ids or block_output.hierarchy_nodes:\n                hierarchy_node = self._maybe_yield_hierarchy_node(\n                    raw_node_id=page.id,\n                    raw_parent_id=parent_raw_id,\n                    display_name=page_title,\n                    link=page.url,\n                    node_type=HierarchyNodeType.PAGE,\n                )\n                if hierarchy_node:\n                    yield hierarchy_node\n\n            # Yield database hierarchy nodes discovered in this page's blocks\n            for db_node in block_output.hierarchy_nodes:\n                yield db_node\n\n            if not block_output.blocks:\n                if not raw_page_title:\n                    logger.warning(\n                        f\"No blocks OR title found for page with ID '{page.id}'. Skipping.\"\n                    )\n                    continue\n\n                logger.debug(f\"No blocks found for page with ID '{page.id}'\")\n                \"\"\"\n                Something like:\n\n                TITLE\n\n                PROP1: PROP1_VALUE\n                PROP2: PROP2_VALUE\n                \"\"\"\n                text = page_title\n                if page.properties:\n                    text += \"\\n\\n\" + \"\\n\".join(\n                        [f\"{key}: {value}\" for key, value in page.properties.items()]\n                    )\n                sections = [\n                    TextSection(\n                        link=f\"{page.url}\",\n                        text=text,\n                    )\n                ]\n            else:\n                sections = [\n                    TextSection(\n                        link=f\"{page.url}#{block.id.replace('-', '')}\",\n                        text=block.prefix + block.text,\n                    )\n                    for block in block_output.blocks\n                ]\n\n            yield (\n                Document(\n                    id=page.id,\n                    sections=cast(list[TextSection | ImageSection], sections),\n                    source=DocumentSource.NOTION,\n                    semantic_identifier=page_title,\n                    doc_updated_at=datetime.fromisoformat(\n                        page.last_edited_time\n                    ).astimezone(timezone.utc),\n                    metadata={},\n                    parent_hierarchy_raw_node_id=parent_raw_id,\n                )\n            )\n            self.indexed_pages.add(page.id)\n\n        if self.recursive_index_enabled and all_child_page_ids:\n            # NOTE: checking if page_id is in self.indexed_pages to prevent extra\n            # calls to `_fetch_page` for pages we've already indexed\n            for child_page_batch_ids in batch_generator(\n                all_child_page_ids, batch_size=INDEX_BATCH_SIZE\n            ):\n                child_page_batch = [\n                    self._fetch_page(page_id)\n                    for page_id in child_page_batch_ids\n                    if page_id not in self.indexed_pages\n                ]\n                yield from self._read_pages(child_page_batch)\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:\n        \"\"\"Search for pages from a Notion database. Includes some small number of\n        retries to handle misc, flakey failures.\"\"\"\n        logger.debug(f\"Searching for pages in Notion with query_dict: {query_dict}\")\n        res = rl_requests.post(\n            \"https://api.notion.com/v1/search\",\n            headers=self.headers,\n            json=query_dict,\n            timeout=_NOTION_CALL_TIMEOUT,\n        )\n        res.raise_for_status()\n        return NotionSearchResponse(**res.json())\n\n    # The | Document is needed for mypy type checking\n    def _yield_database_hierarchy_nodes(\n        self,\n    ) -> Generator[HierarchyNode | Document, None, None]:\n        \"\"\"Search for all data sources and yield hierarchy nodes for their parent databases.\n\n        This must be called BEFORE page indexing so that database hierarchy nodes\n        exist when pages inside databases reference them as parents.\n\n        With the new API, search returns data source objects instead of databases.\n        Multiple data sources can share the same parent database, so we use\n        database_id as the hierarchy node key and deduplicate via\n        _maybe_yield_hierarchy_node.\n        \"\"\"\n        query_dict: dict[str, Any] = {\n            \"filter\": {\"property\": \"object\", \"value\": \"data_source\"},\n            \"page_size\": _NOTION_PAGE_SIZE,\n        }\n        pages_seen = 0\n        while pages_seen < _MAX_PAGES:\n            db_res = self._search_notion(query_dict)\n            for ds in db_res.results:\n                # Extract the parent database_id from the data source's parent\n                ds_parent = ds.get(\"parent\", {})\n                db_id = ds_parent.get(\"database_id\")\n                if not db_id:\n                    continue\n\n                # Populate the mapping so _get_parent_raw_id can resolve later\n                ds_id = ds.get(\"id\")\n                if not ds_id:\n                    continue\n                self._data_source_to_database_map[ds_id] = db_id\n\n                # Fetch the database to get its actual name and parent\n                try:\n                    db_page = self._fetch_database_as_page(db_id)\n                    db_name = db_page.database_name or f\"Database {db_id}\"\n                    parent_raw_id = self._get_parent_raw_id(db_page.parent)\n                    db_url = (\n                        db_page.url or f\"https://notion.so/{db_id.replace('-', '')}\"\n                    )\n                except requests.exceptions.RequestException as e:\n                    logger.warning(\n                        f\"Could not fetch database '{db_id}', \"\n                        f\"defaulting to workspace root. Error: {e}\"\n                    )\n                    db_name = f\"Database {db_id}\"\n                    parent_raw_id = self.workspace_id\n                    db_url = f\"https://notion.so/{db_id.replace('-', '')}\"\n\n                # _maybe_yield_hierarchy_node deduplicates by raw_node_id,\n                # so multiple data sources under one database produce one node.\n                node = self._maybe_yield_hierarchy_node(\n                    raw_node_id=db_id,\n                    raw_parent_id=parent_raw_id or self.workspace_id,\n                    display_name=db_name,\n                    link=db_url,\n                    node_type=HierarchyNodeType.DATABASE,\n                )\n                if node:\n                    yield node\n\n            if not db_res.has_more:\n                break\n            query_dict[\"start_cursor\"] = db_res.next_cursor\n            pages_seen += 1\n\n    def _filter_pages_by_time(\n        self,\n        pages: list[dict[str, Any]],\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        filter_field: str = \"last_edited_time\",\n    ) -> list[NotionPage]:\n        \"\"\"A helper function to filter out pages outside of a time\n        range. This functionality doesn't yet exist in the Notion Search API,\n        but when it does, this approach can be deprecated.\n\n        Arguments:\n            pages (list[dict]) - Pages to filter\n            start (float) - start epoch time to filter from\n            end (float) - end epoch time to filter to\n            filter_field (str) - the attribute on the page to apply the filter\n        \"\"\"\n        filtered_pages: list[NotionPage] = []\n        for page in pages:\n            # Parse ISO 8601 timestamp and convert to UTC epoch time\n            timestamp = page[filter_field].replace(\".000Z\", \"+00:00\")\n            compare_time = datetime.fromisoformat(timestamp).timestamp()\n            if compare_time > start and compare_time <= end:\n                filtered_pages += [NotionPage(**page)]\n        return filtered_pages\n\n    def _recursive_load(self) -> GenerateDocumentsOutput:\n        if self.root_page_id is None or not self.recursive_index_enabled:\n            raise RuntimeError(\n                \"Recursive page lookup is not enabled, but we are trying to recursively load pages. This should never happen.\"\n            )\n\n        # Yield workspace hierarchy node FIRST before any pages\n        workspace_node = self._get_workspace_hierarchy_node()\n        if workspace_node:\n            yield [workspace_node]\n\n        logger.info(\n            f\"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}\"\n        )\n        pages = [self._fetch_page(page_id=self.root_page_id)]\n        yield from batch_generator(self._read_pages(pages), self.batch_size)\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        \"\"\"Applies integration token to headers\"\"\"\n        self.headers[\"Authorization\"] = (\n            f\"Bearer {credentials['notion_integration_token']}\"\n        )\n        return None\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"Loads all page data from a Notion workspace.\n\n        Returns:\n            list[Document]: list of documents.\n        \"\"\"\n        # TODO: remove once Notion search issue is discovered\n        if self.recursive_index_enabled and self.root_page_id:\n            yield from self._recursive_load()\n            return\n\n        # Yield workspace hierarchy node FIRST before any pages\n        workspace_node = self._get_workspace_hierarchy_node()\n        if workspace_node:\n            yield [workspace_node]\n\n        # Yield database hierarchy nodes BEFORE pages so parent references resolve\n        yield from batch_generator(\n            self._yield_database_hierarchy_nodes(), self.batch_size\n        )\n\n        query_dict: dict[str, Any] = {\n            \"filter\": {\"property\": \"object\", \"value\": \"page\"},\n            \"page_size\": _NOTION_PAGE_SIZE,\n        }\n        while True:\n            db_res = self._search_notion(query_dict)\n            pages = [NotionPage(**page) for page in db_res.results]\n            yield from batch_generator(self._read_pages(pages), self.batch_size)\n            if db_res.has_more:\n                query_dict[\"start_cursor\"] = db_res.next_cursor\n            else:\n                break\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Uses the Notion search API to fetch updated pages\n        within a time period.\n        Unfortunately the search API doesn't yet support filtering by times,\n        so until they add that, we're just going to page through results until,\n        we reach ones that are older than our search criteria.\n        \"\"\"\n        # TODO: remove once Notion search issue is discovered\n        if self.recursive_index_enabled and self.root_page_id:\n            yield from self._recursive_load()\n            return\n\n        # Yield workspace hierarchy node FIRST before any pages\n        workspace_node = self._get_workspace_hierarchy_node()\n        if workspace_node:\n            yield [workspace_node]\n\n        # Yield database hierarchy nodes BEFORE pages so parent references resolve.\n        # We yield all databases without time filtering because a page's parent\n        # database might not have been edited even if the page was.\n        yield from batch_generator(\n            self._yield_database_hierarchy_nodes(), self.batch_size\n        )\n\n        query_dict: dict[str, Any] = {\n            \"page_size\": _NOTION_PAGE_SIZE,\n            \"sort\": {\"timestamp\": \"last_edited_time\", \"direction\": \"descending\"},\n            \"filter\": {\"property\": \"object\", \"value\": \"page\"},\n        }\n        while True:\n            db_res = self._search_notion(query_dict)\n            pages = self._filter_pages_by_time(\n                db_res.results, start, end, filter_field=\"last_edited_time\"\n            )\n            if len(pages) > 0:\n                yield from batch_generator(self._read_pages(pages), self.batch_size)\n                if db_res.has_more:\n                    query_dict[\"start_cursor\"] = db_res.next_cursor\n                else:\n                    break\n            else:\n                break\n\n    def validate_connector_settings(self) -> None:\n        if not self.headers.get(\"Authorization\"):\n            raise ConnectorMissingCredentialError(\"Notion credentials not loaded.\")\n\n        try:\n            # We'll do a minimal search call (page_size=1) to confirm accessibility\n            if self.root_page_id:\n                # If root_page_id is set, fetch the specific page\n                res = rl_requests.get(\n                    f\"https://api.notion.com/v1/pages/{self.root_page_id}\",\n                    headers=self.headers,\n                    timeout=_NOTION_CALL_TIMEOUT,\n                )\n            else:\n                # If root_page_id is not set, perform a minimal search\n                test_query = {\n                    \"filter\": {\"property\": \"object\", \"value\": \"page\"},\n                    \"page_size\": 1,\n                }\n                res = rl_requests.post(\n                    \"https://api.notion.com/v1/search\",\n                    headers=self.headers,\n                    json=test_query,\n                    timeout=_NOTION_CALL_TIMEOUT,\n                )\n            res.raise_for_status()\n\n        except requests.exceptions.HTTPError as http_err:\n            status_code = http_err.response.status_code if http_err.response else None\n\n            if status_code == 401:\n                raise CredentialExpiredError(\n                    \"Notion credential appears to be invalid or expired (HTTP 401).\"\n                )\n            elif status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"Your Notion token does not have sufficient permissions (HTTP 403).\"\n                )\n            elif status_code == 404:\n                # Typically means resource not found or not shared. Could be root_page_id is invalid.\n                raise ConnectorValidationError(\n                    \"Notion resource not found or not shared with the integration (HTTP 404).\"\n                )\n            elif status_code == 429:\n                raise ConnectorValidationError(\n                    \"Validation failed due to Notion rate-limits being exceeded (HTTP 429). Please try again later.\"\n                )\n            else:\n                raise UnexpectedValidationError(\n                    f\"Unexpected Notion HTTP error (status={status_code}): {http_err}\"\n                ) from http_err\n\n        except Exception as exc:\n            raise UnexpectedValidationError(\n                f\"Unexpected error during Notion settings validation: {exc}\"\n            )\n\n\nif __name__ == \"__main__\":\n    import os\n\n    root_page_id = os.environ.get(\"NOTION_ROOT_PAGE_ID\")\n    connector = NotionConnector(root_page_id=root_page_id)\n    connector.load_credentials(\n        {\"notion_integration_token\": os.environ.get(\"NOTION_INTEGRATION_TOKEN\")}\n    )\n    document_batches = connector.load_from_state()\n    for doc_batch in document_batches:\n        for doc in doc_batch:\n            print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/outline/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/outline/client.py",
    "content": "from typing import Any\n\nimport requests\nfrom requests.exceptions import ConnectionError as RequestsConnectionError\nfrom requests.exceptions import RequestException\nfrom requests.exceptions import Timeout\n\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\n\n\nclass OutlineClientRequestFailedError(ConnectionError):\n    \"\"\"Custom error class for handling failed requests to the Outline API with status code and error message\"\"\"\n\n    def __init__(self, status: int, error: str) -> None:\n        self.status_code = status\n        self.error = error\n        super().__init__(f\"Outline Client request failed with status {status}: {error}\")\n\n\nclass OutlineApiClient:\n    \"\"\"Client for interacting with the Outline API. Handles authentication and making HTTP requests.\"\"\"\n\n    def __init__(\n        self,\n        api_token: str,\n        base_url: str,\n    ) -> None:\n        self.base_url = base_url.rstrip(\"/\")\n        self.api_token = api_token\n\n    def post(self, endpoint: str, data: dict[str, Any] | None = None) -> dict[str, Any]:\n        if data is None:\n            data = {}\n        url: str = self._build_url(endpoint)\n        headers = self._build_headers()\n\n        try:\n            response = requests.post(\n                url, headers=headers, json=data, timeout=REQUEST_TIMEOUT_SECONDS\n            )\n        except Timeout:\n            raise OutlineClientRequestFailedError(\n                408,\n                f\"Request timed out - server did not respond within {REQUEST_TIMEOUT_SECONDS} seconds\",\n            )\n        except RequestsConnectionError as e:\n            raise OutlineClientRequestFailedError(\n                -1, f\"Connection error - unable to reach Outline server: {e}\"\n            )\n        except RequestException as e:\n            raise OutlineClientRequestFailedError(-1, f\"Network error occurred: {e}\")\n\n        if response.status_code >= 300:\n            error = response.reason\n            try:\n                response_json = response.json()\n                if isinstance(response_json, dict):\n                    response_error = response_json.get(\"error\", {}).get(\"message\", \"\")\n                    if response_error:\n                        error = response_error\n            except Exception:\n                # If JSON parsing fails, fall back to response.text for better debugging\n                if response.text.strip():\n                    error = f\"{response.reason}: {response.text.strip()}\"\n            raise OutlineClientRequestFailedError(response.status_code, error)\n\n        try:\n            return response.json()\n        except Exception:\n            raise OutlineClientRequestFailedError(\n                response.status_code,\n                f\"Response was successful but contained invalid JSON: {response.text}\",\n            )\n\n    def _build_headers(self) -> dict[str, str]:\n        return {\n            \"Authorization\": f\"Bearer {self.api_token}\",\n            \"Accept\": \"application/json\",\n            \"Content-Type\": \"application/json\",\n        }\n\n    def _build_url(self, endpoint: str) -> str:\n        return self.base_url.rstrip(\"/\") + \"/api/\" + endpoint.lstrip(\"/\")\n\n    def build_app_url(self, endpoint: str) -> str:\n        return self.base_url.rstrip(\"/\") + \"/\" + endpoint.lstrip(\"/\")\n"
  },
  {
    "path": "backend/onyx/connectors/outline/connector.py",
    "content": "import html\nimport time\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.outline.client import OutlineApiClient\nfrom onyx.connectors.outline.client import OutlineClientRequestFailedError\n\n\nclass OutlineConnector(LoadConnector, PollConnector):\n    \"\"\"Connector for Outline knowledge base. Handles authentication, document loading and polling.\n    Implements both LoadConnector for initial state loading and PollConnector for incremental updates.\n    \"\"\"\n\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.batch_size = batch_size\n        self.outline_client: OutlineApiClient | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        required_keys = [\"outline_api_token\", \"outline_base_url\"]\n        for key in required_keys:\n            if key not in credentials:\n                raise ConnectorMissingCredentialError(\"Outline\")\n\n        self.outline_client = OutlineApiClient(\n            api_token=credentials[\"outline_api_token\"],\n            base_url=credentials[\"outline_base_url\"],\n        )\n        return None\n\n    @staticmethod\n    def _get_doc_batch(\n        batch_size: int,\n        outline_client: OutlineApiClient,\n        endpoint: str,\n        transformer: Callable[[OutlineApiClient, dict], Document],\n        start_ind: int,\n    ) -> tuple[list[Document], int]:\n        data = {\n            \"limit\": batch_size,\n            \"offset\": start_ind,\n        }\n\n        batch = outline_client.post(endpoint, data=data).get(\"data\", [])\n        doc_batch = [transformer(outline_client, item) for item in batch]\n\n        return doc_batch, len(batch)\n\n    @staticmethod\n    def _collection_to_document(\n        outline_client: OutlineApiClient, collection: dict[str, Any]\n    ) -> Document:\n        url = outline_client.build_app_url(f\"/collection/{collection.get('id')}\")\n        title = str(collection.get(\"name\", \"\"))\n        name = collection.get(\"name\") or \"\"\n        description = collection.get(\"description\") or \"\"\n        text = name + \"\\n\" + description\n        updated_at_str = (\n            str(collection.get(\"updatedAt\"))\n            if collection.get(\"updatedAt\") is not None\n            else None\n        )\n        return Document(\n            id=\"outline_collection__\" + str(collection.get(\"id\")),\n            sections=[TextSection(link=url, text=html.unescape(text))],\n            source=DocumentSource.OUTLINE,\n            semantic_identifier=\"Collection: \" + title,\n            title=title,\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"collection\"},\n        )\n\n    @staticmethod\n    def _document_to_document(\n        outline_client: OutlineApiClient, document: dict[str, Any]\n    ) -> Document:\n        url = outline_client.build_app_url(f\"/doc/{document.get('id')}\")\n        title = str(document.get(\"title\", \"\"))\n        doc_title = document.get(\"title\") or \"\"\n        doc_text = document.get(\"text\") or \"\"\n        text = doc_title + \"\\n\" + doc_text\n        updated_at_str = (\n            str(document.get(\"updatedAt\"))\n            if document.get(\"updatedAt\") is not None\n            else None\n        )\n        return Document(\n            id=\"outline_document__\" + str(document.get(\"id\")),\n            sections=[TextSection(link=url, text=html.unescape(text))],\n            source=DocumentSource.OUTLINE,\n            semantic_identifier=\"Document: \" + title,\n            title=title,\n            doc_updated_at=(\n                time_str_to_utc(updated_at_str) if updated_at_str is not None else None\n            ),\n            metadata={\"type\": \"document\"},\n        )\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        if self.outline_client is None:\n            raise ConnectorMissingCredentialError(\"Outline\")\n\n        return self._fetch_documents()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if self.outline_client is None:\n            raise ConnectorMissingCredentialError(\"Outline\")\n\n        # Outline API does not support date-based filtering natively,\n        # so we implement client-side filtering after fetching documents\n        def time_filter(doc: Document) -> bool:\n            if doc.doc_updated_at is None:\n                return False\n            doc_timestamp = doc.doc_updated_at.timestamp()\n            if doc_timestamp < start:\n                return False\n            if doc_timestamp > end:\n                return False\n            return True\n\n        return self._fetch_documents(time_filter)\n\n    def _fetch_documents(\n        self, time_filter: Callable[[Document], bool] | None = None\n    ) -> GenerateDocumentsOutput:\n        if self.outline_client is None:\n            raise ConnectorMissingCredentialError(\"Outline\")\n\n        transform_by_endpoint: dict[\n            str, Callable[[OutlineApiClient, dict], Document]\n        ] = {\n            \"documents.list\": self._document_to_document,\n            \"collections.list\": self._collection_to_document,\n        }\n\n        for endpoint, transform in transform_by_endpoint.items():\n            start_ind = 0\n            while True:\n                doc_batch, num_results = self._get_doc_batch(\n                    batch_size=self.batch_size,\n                    outline_client=self.outline_client,\n                    endpoint=endpoint,\n                    transformer=transform,\n                    start_ind=start_ind,\n                )\n\n                # Apply time filtering if specified\n                filtered_batch: list[Document | HierarchyNode] = []\n                for doc in doc_batch:\n                    if time_filter is None or time_filter(doc):\n                        filtered_batch.append(doc)\n\n                start_ind += num_results\n                if filtered_batch:\n                    yield filtered_batch\n\n                if num_results < self.batch_size:\n                    break\n                else:\n                    time.sleep(0.2)\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Validate that the Outline credentials and connector settings are correct.\n        Specifically checks that we can make an authenticated request to Outline.\n        \"\"\"\n        if not self.outline_client:\n            raise ConnectorMissingCredentialError(\"Outline\")\n\n        try:\n            # Use auth.info endpoint for validation\n            _ = self.outline_client.post(\"auth.info\", data={})\n\n        except OutlineClientRequestFailedError as e:\n            # Check for HTTP status codes\n            if e.status_code == 401:\n                raise CredentialExpiredError(\n                    \"Your Outline credentials appear to be invalid or expired (HTTP 401).\"\n                ) from e\n            elif e.status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"The configured Outline token does not have sufficient permissions (HTTP 403).\"\n                ) from e\n            else:\n                raise ConnectorValidationError(\n                    f\"Unexpected Outline error (status={e.status_code}): {e}\"\n                ) from e\n\n        except Exception as exc:\n            raise ConnectorValidationError(\n                f\"Unexpected error while validating Outline connector settings: {exc}\"\n            ) from exc\n"
  },
  {
    "path": "backend/onyx/connectors/productboard/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/productboard/connector.py",
    "content": "from collections.abc import Generator\nfrom itertools import chain\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom dateutil import parser\nfrom retry import retry\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n_PRODUCT_BOARD_BASE_URL = \"https://api.productboard.com\"\n\n\nclass ProductboardApiError(Exception):\n    pass\n\n\nclass ProductboardConnector(PollConnector):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.batch_size = batch_size\n        self.access_token: str | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self.access_token = credentials[\"productboard_access_token\"]\n        return None\n\n    def _build_headers(self) -> dict[str, str]:\n        return {\n            \"Authorization\": f\"Bearer {self.access_token}\",\n            \"X-Version\": \"1\",\n        }\n\n    @staticmethod\n    def _parse_description_html(description_html: str) -> str:\n        soup = BeautifulSoup(description_html, \"html.parser\")\n        return soup.get_text()\n\n    @staticmethod\n    def _get_owner_email(productboard_obj: dict[str, Any]) -> str | None:\n        owner_dict = cast(dict[str, str] | None, productboard_obj.get(\"owner\"))\n        if not owner_dict:\n            return None\n        return owner_dict.get(\"email\")\n\n    def _fetch_documents(\n        self,\n        initial_link: str,\n    ) -> Generator[dict[str, Any], None, None]:\n        headers = self._build_headers()\n\n        @retry(tries=3, delay=1, backoff=2)\n        def fetch(link: str) -> dict[str, Any]:\n            response = requests.get(link, headers=headers)\n            if not response.ok:\n                # rate-limiting is at 50 requests per second.\n                # The delay in this retry should handle this while this is\n                # not parallelized.\n                raise ProductboardApiError(\n                    f\"Failed to fetch from productboard - status code: {response.status_code} - response: {response.text}\"\n                )\n\n            return response.json()\n\n        curr_link = initial_link\n        while True:\n            response_json = fetch(curr_link)\n            for entity in response_json[\"data\"]:\n                yield entity\n\n            curr_link = response_json.get(\"links\", {}).get(\"next\")\n            if not curr_link:\n                break\n\n    def _get_features(self) -> Generator[Document, None, None]:\n        \"\"\"A Feature is like a ticket in Jira\"\"\"\n        for feature in self._fetch_documents(\n            initial_link=f\"{_PRODUCT_BOARD_BASE_URL}/features\"\n        ):\n            owner = self._get_owner_email(feature)\n            experts = [BasicExpertInfo(email=owner)] if owner else None\n\n            metadata: dict[str, str | list[str]] = {}\n            entity_type = feature.get(\"type\", \"feature\")\n            if entity_type:\n                metadata[\"entity_type\"] = str(entity_type)\n\n            status = feature.get(\"status\", {}).get(\"name\")\n            if status:\n                metadata[\"status\"] = str(status)\n\n            yield Document(\n                id=feature[\"id\"],\n                sections=[\n                    TextSection(\n                        link=feature[\"links\"][\"html\"],\n                        text=self._parse_description_html(feature[\"description\"]),\n                    )\n                ],\n                semantic_identifier=feature[\"name\"],\n                source=DocumentSource.PRODUCTBOARD,\n                doc_updated_at=time_str_to_utc(feature[\"updatedAt\"]),\n                primary_owners=experts,\n                metadata=metadata,\n            )\n\n    def _get_components(self) -> Generator[Document, None, None]:\n        \"\"\"A Component is like an epic in Jira. It contains Features\"\"\"\n        for component in self._fetch_documents(\n            initial_link=f\"{_PRODUCT_BOARD_BASE_URL}/components\"\n        ):\n            owner = self._get_owner_email(component)\n            experts = [BasicExpertInfo(email=owner)] if owner else None\n\n            yield Document(\n                id=component[\"id\"],\n                sections=[\n                    TextSection(\n                        link=component[\"links\"][\"html\"],\n                        text=self._parse_description_html(component[\"description\"]),\n                    )\n                ],\n                semantic_identifier=component[\"name\"],\n                source=DocumentSource.PRODUCTBOARD,\n                doc_updated_at=time_str_to_utc(component[\"updatedAt\"]),\n                primary_owners=experts,\n                metadata={\n                    \"entity_type\": \"component\",\n                },\n            )\n\n    def _get_products(self) -> Generator[Document, None, None]:\n        \"\"\"A Product is the highest level of organization.\n        A Product contains components, which contains features.\"\"\"\n        for product in self._fetch_documents(\n            initial_link=f\"{_PRODUCT_BOARD_BASE_URL}/products\"\n        ):\n            owner = self._get_owner_email(product)\n            experts = [BasicExpertInfo(email=owner)] if owner else None\n\n            yield Document(\n                id=product[\"id\"],\n                sections=[\n                    TextSection(\n                        link=product[\"links\"][\"html\"],\n                        text=self._parse_description_html(product[\"description\"]),\n                    )\n                ],\n                semantic_identifier=product[\"name\"],\n                source=DocumentSource.PRODUCTBOARD,\n                doc_updated_at=time_str_to_utc(product[\"updatedAt\"]),\n                primary_owners=experts,\n                metadata={\n                    \"entity_type\": \"product\",\n                },\n            )\n\n    def _get_objectives(self) -> Generator[Document, None, None]:\n        for objective in self._fetch_documents(\n            initial_link=f\"{_PRODUCT_BOARD_BASE_URL}/objectives\"\n        ):\n            owner = self._get_owner_email(objective)\n            experts = [BasicExpertInfo(email=owner)] if owner else None\n\n            metadata: dict[str, str | list[str]] = {\n                \"entity_type\": \"objective\",\n            }\n            if objective.get(\"state\"):\n                metadata[\"state\"] = str(objective[\"state\"])\n\n            yield Document(\n                id=objective[\"id\"],\n                sections=[\n                    TextSection(\n                        link=objective[\"links\"][\"html\"],\n                        text=self._parse_description_html(objective[\"description\"]),\n                    )\n                ],\n                semantic_identifier=objective[\"name\"],\n                source=DocumentSource.PRODUCTBOARD,\n                doc_updated_at=time_str_to_utc(objective[\"updatedAt\"]),\n                primary_owners=experts,\n                metadata=metadata,\n            )\n\n    def _is_updated_at_out_of_time_range(\n        self,\n        document: Document,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n    ) -> bool:\n        updated_at = cast(str, document.metadata.get(\"updated_at\", \"\"))\n        if updated_at:\n            updated_at_datetime = parser.parse(updated_at)\n            if (\n                updated_at_datetime.timestamp() < start\n                or updated_at_datetime.timestamp() > end\n            ):\n                return True\n        else:\n            logger.debug(f\"Unable to find updated_at for document '{document.id}'\")\n\n        return False\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        if self.access_token is None:\n            raise PermissionError(\n                \"Access token is not set up, was load_credentials called?\"\n            )\n\n        document_batch: list[Document | HierarchyNode] = []\n\n        # NOTE: there is a concept of a \"Note\" in productboard, however\n        # there is no read API for it atm. Additionally, comments are not\n        # included with features. Finally, \"Releases\" are not fetched atm,\n        # since they do not provide an updatedAt.\n        feature_documents = self._get_features()\n        component_documents = self._get_components()\n        product_documents = self._get_products()\n        objective_documents = self._get_objectives()\n        for document in chain(\n            feature_documents,\n            component_documents,\n            product_documents,\n            objective_documents,\n        ):\n            # skip documents that are not in the time range\n            if self._is_updated_at_out_of_time_range(document, start, end):\n                continue\n\n            document_batch.append(document)\n            if len(document_batch) >= self.batch_size:\n                yield document_batch\n                document_batch = []\n\n        if document_batch:\n            yield document_batch\n\n\nif __name__ == \"__main__\":\n    import os\n    import time\n\n    connector = ProductboardConnector()\n    connector.load_credentials(\n        {\n            \"productboard_access_token\": os.environ[\"PRODUCTBOARD_ACCESS_TOKEN\"],\n        }\n    )\n\n    current = time.time()\n    one_year_ago = current - 24 * 60 * 60 * 360\n    latest_docs = connector.poll_source(one_year_ago, current)\n    print(next(latest_docs))\n"
  },
  {
    "path": "backend/onyx/connectors/registry.py",
    "content": "\"\"\"Registry mapping for connector classes.\"\"\"\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\n\n\nclass ConnectorMapping(BaseModel):\n    module_path: str\n    class_name: str\n\n\n# Mapping of DocumentSource to connector details for lazy loading\nCONNECTOR_CLASS_MAP = {\n    DocumentSource.WEB: ConnectorMapping(\n        module_path=\"onyx.connectors.web.connector\",\n        class_name=\"WebConnector\",\n    ),\n    DocumentSource.FILE: ConnectorMapping(\n        module_path=\"onyx.connectors.file.connector\",\n        class_name=\"LocalFileConnector\",\n    ),\n    DocumentSource.SLACK: ConnectorMapping(\n        module_path=\"onyx.connectors.slack.connector\",\n        class_name=\"SlackConnector\",\n    ),\n    DocumentSource.GITHUB: ConnectorMapping(\n        module_path=\"onyx.connectors.github.connector\",\n        class_name=\"GithubConnector\",\n    ),\n    DocumentSource.GMAIL: ConnectorMapping(\n        module_path=\"onyx.connectors.gmail.connector\",\n        class_name=\"GmailConnector\",\n    ),\n    DocumentSource.GITLAB: ConnectorMapping(\n        module_path=\"onyx.connectors.gitlab.connector\",\n        class_name=\"GitlabConnector\",\n    ),\n    DocumentSource.GITBOOK: ConnectorMapping(\n        module_path=\"onyx.connectors.gitbook.connector\",\n        class_name=\"GitbookConnector\",\n    ),\n    DocumentSource.GOOGLE_DRIVE: ConnectorMapping(\n        module_path=\"onyx.connectors.google_drive.connector\",\n        class_name=\"GoogleDriveConnector\",\n    ),\n    DocumentSource.BOOKSTACK: ConnectorMapping(\n        module_path=\"onyx.connectors.bookstack.connector\",\n        class_name=\"BookstackConnector\",\n    ),\n    DocumentSource.OUTLINE: ConnectorMapping(\n        module_path=\"onyx.connectors.outline.connector\",\n        class_name=\"OutlineConnector\",\n    ),\n    DocumentSource.CONFLUENCE: ConnectorMapping(\n        module_path=\"onyx.connectors.confluence.connector\",\n        class_name=\"ConfluenceConnector\",\n    ),\n    DocumentSource.JIRA: ConnectorMapping(\n        module_path=\"onyx.connectors.jira.connector\",\n        class_name=\"JiraConnector\",\n    ),\n    DocumentSource.PRODUCTBOARD: ConnectorMapping(\n        module_path=\"onyx.connectors.productboard.connector\",\n        class_name=\"ProductboardConnector\",\n    ),\n    DocumentSource.SLAB: ConnectorMapping(\n        module_path=\"onyx.connectors.slab.connector\",\n        class_name=\"SlabConnector\",\n    ),\n    DocumentSource.CODA: ConnectorMapping(\n        module_path=\"onyx.connectors.coda.connector\",\n        class_name=\"CodaConnector\",\n    ),\n    DocumentSource.CANVAS: ConnectorMapping(\n        module_path=\"onyx.connectors.canvas.connector\",\n        class_name=\"CanvasConnector\",\n    ),\n    DocumentSource.NOTION: ConnectorMapping(\n        module_path=\"onyx.connectors.notion.connector\",\n        class_name=\"NotionConnector\",\n    ),\n    DocumentSource.ZULIP: ConnectorMapping(\n        module_path=\"onyx.connectors.zulip.connector\",\n        class_name=\"ZulipConnector\",\n    ),\n    DocumentSource.GURU: ConnectorMapping(\n        module_path=\"onyx.connectors.guru.connector\",\n        class_name=\"GuruConnector\",\n    ),\n    DocumentSource.LINEAR: ConnectorMapping(\n        module_path=\"onyx.connectors.linear.connector\",\n        class_name=\"LinearConnector\",\n    ),\n    DocumentSource.HUBSPOT: ConnectorMapping(\n        module_path=\"onyx.connectors.hubspot.connector\",\n        class_name=\"HubSpotConnector\",\n    ),\n    DocumentSource.DOCUMENT360: ConnectorMapping(\n        module_path=\"onyx.connectors.document360.connector\",\n        class_name=\"Document360Connector\",\n    ),\n    DocumentSource.GONG: ConnectorMapping(\n        module_path=\"onyx.connectors.gong.connector\",\n        class_name=\"GongConnector\",\n    ),\n    DocumentSource.GOOGLE_SITES: ConnectorMapping(\n        module_path=\"onyx.connectors.google_site.connector\",\n        class_name=\"GoogleSitesConnector\",\n    ),\n    DocumentSource.ZENDESK: ConnectorMapping(\n        module_path=\"onyx.connectors.zendesk.connector\",\n        class_name=\"ZendeskConnector\",\n    ),\n    DocumentSource.LOOPIO: ConnectorMapping(\n        module_path=\"onyx.connectors.loopio.connector\",\n        class_name=\"LoopioConnector\",\n    ),\n    DocumentSource.DROPBOX: ConnectorMapping(\n        module_path=\"onyx.connectors.dropbox.connector\",\n        class_name=\"DropboxConnector\",\n    ),\n    DocumentSource.SHAREPOINT: ConnectorMapping(\n        module_path=\"onyx.connectors.sharepoint.connector\",\n        class_name=\"SharepointConnector\",\n    ),\n    DocumentSource.TEAMS: ConnectorMapping(\n        module_path=\"onyx.connectors.teams.connector\",\n        class_name=\"TeamsConnector\",\n    ),\n    DocumentSource.SALESFORCE: ConnectorMapping(\n        module_path=\"onyx.connectors.salesforce.connector\",\n        class_name=\"SalesforceConnector\",\n    ),\n    DocumentSource.DISCOURSE: ConnectorMapping(\n        module_path=\"onyx.connectors.discourse.connector\",\n        class_name=\"DiscourseConnector\",\n    ),\n    DocumentSource.AXERO: ConnectorMapping(\n        module_path=\"onyx.connectors.axero.connector\",\n        class_name=\"AxeroConnector\",\n    ),\n    DocumentSource.CLICKUP: ConnectorMapping(\n        module_path=\"onyx.connectors.clickup.connector\",\n        class_name=\"ClickupConnector\",\n    ),\n    DocumentSource.MEDIAWIKI: ConnectorMapping(\n        module_path=\"onyx.connectors.mediawiki.wiki\",\n        class_name=\"MediaWikiConnector\",\n    ),\n    DocumentSource.WIKIPEDIA: ConnectorMapping(\n        module_path=\"onyx.connectors.wikipedia.connector\",\n        class_name=\"WikipediaConnector\",\n    ),\n    DocumentSource.ASANA: ConnectorMapping(\n        module_path=\"onyx.connectors.asana.connector\",\n        class_name=\"AsanaConnector\",\n    ),\n    DocumentSource.S3: ConnectorMapping(\n        module_path=\"onyx.connectors.blob.connector\",\n        class_name=\"BlobStorageConnector\",\n    ),\n    DocumentSource.R2: ConnectorMapping(\n        module_path=\"onyx.connectors.blob.connector\",\n        class_name=\"BlobStorageConnector\",\n    ),\n    DocumentSource.GOOGLE_CLOUD_STORAGE: ConnectorMapping(\n        module_path=\"onyx.connectors.blob.connector\",\n        class_name=\"BlobStorageConnector\",\n    ),\n    DocumentSource.OCI_STORAGE: ConnectorMapping(\n        module_path=\"onyx.connectors.blob.connector\",\n        class_name=\"BlobStorageConnector\",\n    ),\n    DocumentSource.XENFORO: ConnectorMapping(\n        module_path=\"onyx.connectors.xenforo.connector\",\n        class_name=\"XenforoConnector\",\n    ),\n    DocumentSource.DISCORD: ConnectorMapping(\n        module_path=\"onyx.connectors.discord.connector\",\n        class_name=\"DiscordConnector\",\n    ),\n    DocumentSource.FRESHDESK: ConnectorMapping(\n        module_path=\"onyx.connectors.freshdesk.connector\",\n        class_name=\"FreshdeskConnector\",\n    ),\n    DocumentSource.FIREFLIES: ConnectorMapping(\n        module_path=\"onyx.connectors.fireflies.connector\",\n        class_name=\"FirefliesConnector\",\n    ),\n    DocumentSource.EGNYTE: ConnectorMapping(\n        module_path=\"onyx.connectors.egnyte.connector\",\n        class_name=\"EgnyteConnector\",\n    ),\n    DocumentSource.AIRTABLE: ConnectorMapping(\n        module_path=\"onyx.connectors.airtable.airtable_connector\",\n        class_name=\"AirtableConnector\",\n    ),\n    DocumentSource.HIGHSPOT: ConnectorMapping(\n        module_path=\"onyx.connectors.highspot.connector\",\n        class_name=\"HighspotConnector\",\n    ),\n    DocumentSource.DRUPAL_WIKI: ConnectorMapping(\n        module_path=\"onyx.connectors.drupal_wiki.connector\",\n        class_name=\"DrupalWikiConnector\",\n    ),\n    DocumentSource.IMAP: ConnectorMapping(\n        module_path=\"onyx.connectors.imap.connector\",\n        class_name=\"ImapConnector\",\n    ),\n    DocumentSource.BITBUCKET: ConnectorMapping(\n        module_path=\"onyx.connectors.bitbucket.connector\",\n        class_name=\"BitbucketConnector\",\n    ),\n    DocumentSource.TESTRAIL: ConnectorMapping(\n        module_path=\"onyx.connectors.testrail.connector\",\n        class_name=\"TestRailConnector\",\n    ),\n    # just for integration tests\n    DocumentSource.MOCK_CONNECTOR: ConnectorMapping(\n        module_path=\"onyx.connectors.mock_connector.connector\",\n        class_name=\"MockConnector\",\n    ),\n}\n"
  },
  {
    "path": "backend/onyx/connectors/requesttracker/.gitignore",
    "content": ".env\n"
  },
  {
    "path": "backend/onyx/connectors/requesttracker/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/requesttracker/connector.py",
    "content": "# from datetime import datetime\n# from datetime import timezone\n# from logging import DEBUG as LOG_LVL_DEBUG\n# from typing import Any\n# from typing import List\n# from typing import Optional\n# from rt.rest1 import ALL_QUEUES\n# from rt.rest1 import Rt\n# from onyx.configs.app_configs import INDEX_BATCH_SIZE\n# from onyx.configs.constants import DocumentSource\n# from onyx.connectors.interfaces import GenerateDocumentsOutput\n# from onyx.connectors.interfaces import PollConnector\n# from onyx.connectors.interfaces import SecondsSinceUnixEpoch\n# from onyx.connectors.models import ConnectorMissingCredentialError\n# from onyx.connectors.models import Document\n# from onyx.connectors.models import Section\n# from onyx.utils.logger import setup_logger\n# logger = setup_logger()\n# class RequestTrackerError(Exception):\n#     pass\n# class RequestTrackerConnector(PollConnector):\n#     def __init__(\n#         self,\n#         batch_size: int = INDEX_BATCH_SIZE,\n#     ) -> None:\n#         self.batch_size = batch_size\n#     def txn_link(self, tid: int, txn: int) -> str:\n#         return f\"{self.rt_base_url}/Ticket/Display.html?id={tid}&txn={txn}\"\n#     def build_doc_sections_from_txn(\n#         self, connection: Rt, ticket_id: int\n#     ) -> List[Section]:\n#         Sections: List[Section] = []\n#         get_history_resp = connection.get_history(ticket_id)\n#         if get_history_resp is None:\n#             raise RequestTrackerError(f\"Ticket {ticket_id} cannot be found\")\n#         for tx in get_history_resp:\n#             Sections.append(\n#                 Section(\n#                     link=self.txn_link(ticket_id, int(tx[\"id\"])),\n#                     text=\"\\n\".join(\n#                         [\n#                             f\"{k}:\\n{v}\\n\" if k != \"Attachments\" else \"\"\n#                             for (k, v) in tx.items()\n#                         ]\n#                     ),\n#                 )\n#             )\n#         return Sections\n#     def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:\n#         self.rt_username = credentials.get(\"requesttracker_username\")\n#         self.rt_password = credentials.get(\"requesttracker_password\")\n#         self.rt_base_url = credentials.get(\"requesttracker_base_url\")\n#         return None\n#     # This does not include RT file attachments yet.\n#     def _process_tickets(\n#         self, start: datetime, end: datetime\n#     ) -> GenerateDocumentsOutput:\n#         if any([self.rt_username, self.rt_password, self.rt_base_url]) is None:\n#             raise ConnectorMissingCredentialError(\"requesttracker\")\n#         Rt0 = Rt(\n#             f\"{self.rt_base_url}/REST/1.0/\",\n#             self.rt_username,\n#             self.rt_password,\n#         )\n#         Rt0.login()\n#         d0 = start.strftime(\"%Y-%m-%d %H:%M:%S\")\n#         d1 = end.strftime(\"%Y-%m-%d %H:%M:%S\")\n#         tickets = Rt0.search(\n#             Queue=ALL_QUEUES,\n#             raw_query=f\"Updated > '{d0}' AND Updated < '{d1}'\",\n#         )\n#         doc_batch: List[Document] = []\n#         for ticket in tickets:\n#             ticket_keys_to_omit = [\"id\", \"Subject\"]\n#             tid: int = int(ticket[\"numerical_id\"])\n#             ticketLink: str = f\"{self.rt_base_url}/Ticket/Display.html?id={tid}\"\n#             logger.info(f\"Processing ticket {tid}\")\n#             doc = Document(\n#                 id=ticket[\"id\"],\n#                 # Will add title to the first section later in processing\n#                 sections=[Section(link=ticketLink, text=\"\")]\n#                 + self.build_doc_sections_from_txn(Rt0, tid),\n#                 source=DocumentSource.REQUESTTRACKER,\n#                 semantic_identifier=ticket[\"Subject\"],\n#                 metadata={\n#                     key: value\n#                     for key, value in ticket.items()\n#                     if key not in ticket_keys_to_omit\n#                 },\n#             )\n#             doc_batch.append(doc)\n#             if len(doc_batch) >= self.batch_size:\n#                 yield doc_batch\n#                 doc_batch = []\n#         if doc_batch:\n#             yield doc_batch\n#     def poll_source(\n#         self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n#     ) -> GenerateDocumentsOutput:\n#         # Keep query short, only look behind 1 day at maximum\n#         one_day_ago: float = end - (24 * 60 * 60)\n#         _start: float = start if start > one_day_ago else one_day_ago\n#         start_datetime = datetime.fromtimestamp(_start, tz=timezone.utc)\n#         end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)\n#         yield from self._process_tickets(start_datetime, end_datetime)\n# if __name__ == \"__main__\":\n#     import time\n#     import os\n#     from dotenv import load_dotenv\n#     load_dotenv()\n#     logger.setLevel(LOG_LVL_DEBUG)\n#     rt_connector = RequestTrackerConnector()\n#     rt_connector.load_credentials(\n#         {\n#             \"requesttracker_username\": os.getenv(\"RT_USERNAME\"),\n#             \"requesttracker_password\": os.getenv(\"RT_PASSWORD\"),\n#             \"requesttracker_base_url\": os.getenv(\"RT_BASE_URL\"),\n#         }\n#     )\n#     current = time.time()\n#     one_day_ago = current - (24 * 60 * 60)  # 1 days\n#     latest_docs = rt_connector.poll_source(one_day_ago, current)\n#     for doc in latest_docs:\n#         print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/salesforce/blacklist.py",
    "content": "# NOTE(rkuo): I can't find an actual API that allows us to distinguish\n# broken/incompatible objects from regular ones.\n# taking hints from\n# https://docs.resco.net/wiki/Salesforce_object_blacklist\n\nSALESFORCE_BLACKLISTED_PREFIXES: set[str] = set(\n    [\n        \"process\",\n        \"aura\",\n        \"app\",\n        \"auth\",\n        \"duplicate\",\n        \"secure\",\n        \"data\",\n        \"listemail\",\n        \"fsl__optimization\",\n        \"fsl_scheduling\",\n        \"feed\",\n        \"chatter\",\n    ]\n)\n\nSALESFORCE_BLACKLISTED_SUFFIXES: set[str] = set(\n    [\n        \"history\",\n        \"share\",\n        \"__tag\",\n        \"__hd\",\n        \"feed\",\n        \"changeevent\",\n        \"__ka\",\n        \"__votestat\",\n        \"__viewstat\",\n        \"__kav\",\n        \"__datacategoryselection\",\n        \"subscription\",\n        \"definition\",\n        \"eventstream\",\n        \"__mdt\",\n    ]\n)\n\nSALESFORCE_BLACKLISTED_OBJECTS: set[str] = set(\n    [\n        \"acceptedeventrelation\",\n        \"accountchangeevent\",\n        \"accountcontactrole\",\n        \"accountcontactrolechangeevent\",\n        \"accounthistory\",\n        \"accountshare\",\n        \"actionlinkgrouptemplate\",\n        \"actionlinktemplate\",\n        \"activityhistory\",\n        \"adminsetupevent\",\n        \"aggregateresult\",\n        \"announcement\",\n        \"apexclass\",\n        \"apexcomponent\",\n        \"apexemailnotification\",\n        \"apexlog\",\n        \"apexpage\",\n        \"apexpageinfo\",\n        \"apextestqueueitem\",\n        \"apextestresult\",\n        \"apextestresultlimits\",\n        \"apextestrunresult\",\n        \"apextestsuite\",\n        \"apextrigger\",\n        \"apievent\",\n        \"apptabmember\",\n        \"assetchangeevent\",\n        \"assethistory\",\n        \"assetrelationshiphistory\",\n        \"assettokenevent\",\n        \"assignmentrule\",\n        \"asyncapexjob\",\n        \"backgroundoperation\",\n        \"backgroundoperationresult\",\n        \"batchapexerrorevent\",\n        \"brandingset\",\n        \"brandingsetproperty\",\n        \"brandtemplate\",\n        \"businessprocess\",\n        \"campaignchangeevent\",\n        \"campaignhistory\",\n        \"campaignshare\",\n        \"casechangeevent\",\n        \"caseexternaldocument\",\n        \"casehistory\",\n        \"caseshare\",\n        \"clientbrowser\",\n        \"collaborationgroup\",\n        \"collaborationgroupmember\",\n        \"collaborationgroupmemberrequest\",\n        \"collaborationinvitation\",\n        \"connectedapplication\",\n        \"contactchangeevent\",\n        \"contacthistory\",\n        \"contactrequest\",\n        \"contactrequestshare\",\n        \"contactshare\",\n        \"contentasset\",\n        \"contentbody\",\n        \"contentdocumenthistory\",\n        \"contenthubrepository\",\n        \"contenttagsubscription\",\n        \"contentusersubscription\",\n        \"contentversionhistory\",\n        \"contracthistory\",\n        \"corswhitelistentry\",\n        \"cronjobdetail\",\n        \"crontrigger\",\n        \"csptrustedsite\",\n        \"custombrand\",\n        \"custombrandasset\",\n        \"customhelpmenuitem\",\n        \"customhelpmenusection\",\n        \"customhttpheader\",\n        \"customobjectuserlicensemetrics\",\n        \"custompermission\",\n        \"custompermissiondependency\",\n        \"dandbcompany\",\n        \"dashboard\",\n        \"dashboardcomponent\",\n        \"digitalsignature\",\n        \"documentattachmentmap\",\n        \"domain\",\n        \"domainsite\",\n        \"emailcapture\",\n        \"emaildomainfilter\",\n        \"emaildomainkey\",\n        \"emailrelay\",\n        \"emailservicesaddress\",\n        \"emailservicesfunction\",\n        \"emailstatus\",\n        \"emailtemplate\",\n        \"embeddedservicedetail\",\n        \"embeddedservicelabel\",\n        \"entityparticle\",\n        \"eventbussubscriber\",\n        \"eventchangeevent\",\n        \"eventlogfile\",\n        \"eventrelationchangeevent\",\n        \"expressionfilter\",\n        \"expressionfiltercriteria\",\n        \"externaldatasource\",\n        \"externaldatauserauth\",\n        \"fieldhistoryarchive\",\n        \"fieldpermissions\",\n        \"fieldservicemobilesettings\",\n        \"filesearchactivity\",\n        \"fiscalyearsettings\",\n        \"flexqueueitem\",\n        \"flowinterview\",\n        \"flowinterviewshare\",\n        \"flowrecordrelation\",\n        \"flowstagerelation\",\n        \"forecastingshare\",\n        \"forecastshare\",\n        \"fsl__criteria__c\",\n        \"fsl__gantt_filter__c\",\n        \"fsl__ganttpalette__c\",\n        \"fsl__service_goal__c\",\n        \"fsl__slr_cache__c\",\n        \"fsl__territory_optimization_request__c\",\n        \"goalhistory\",\n        \"goalshare\",\n        \"grantedbylicense\",\n        \"idpeventlog\",\n        \"iframewhitelisturl\",\n        \"image\",\n        \"imageshare\",\n        \"installedmobileapp\",\n        \"leadchangeevent\",\n        \"leadhistory\",\n        \"leadshare\",\n        \"lightningexitbypagemetrics\",\n        \"lightningexperiencetheme\",\n        \"lightningtogglemetrics\",\n        \"lightningusagebyapptypemetrics\",\n        \"lightningusagebybrowsermetrics\",\n        \"lightningusagebyflexipagemetrics\",\n        \"lightningusagebypagemetrics\",\n        \"linkedarticle\",\n        \"listemailchangeevent\",\n        \"listemailshare\",\n        \"listview\",\n        \"listviewchart\",\n        \"listviewchartinstance\",\n        \"listviewevent\",\n        \"loginasevent\",\n        \"loginevent\",\n        \"logingeo\",\n        \"loginhistory\",\n        \"loginip\",\n        \"logoutevent\",\n        \"lookedupfromactivity\",\n        \"macro\",\n        \"macrohistory\",\n        \"macroinstruction\",\n        \"macroshare\",\n        \"mailmergetemplate\",\n        \"matchingrule\",\n        \"matchingruleitem\",\n        \"metricdatalinkhistory\",\n        \"metrichistory\",\n        \"metricshare\",\n        \"mobilesettingsassignment\",\n        \"mydomaindiscoverablelogin\",\n        \"name\",\n        \"namedcredential\",\n        \"noteandattachment\",\n        \"notificationmember\",\n        \"oauthtoken\",\n        \"objectpermissions\",\n        \"onboardingmetrics\",\n        \"openactivity\",\n        \"opportunitychangeevent\",\n        \"opportunitycontactrolechangeevent\",\n        \"opportunityfieldhistory\",\n        \"opportunityhistory\",\n        \"opportunityshare\",\n        \"orderchangeevent\",\n        \"orderhistory\",\n        \"orderitemchangeevent\",\n        \"orderitemhistory\",\n        \"ordershare\",\n        \"orgdeleterequest\",\n        \"orgdeleterequestshare\",\n        \"orglifecyclenotification\",\n        \"orgwideemailaddress\",\n        \"outgoingemail\",\n        \"outgoingemailrelation\",\n        \"ownerchangeoptioninfo\",\n        \"packagelicense\",\n        \"period\",\n        \"permissionsetlicense\",\n        \"permissionsetlicenseassign\",\n        \"permissionsettabsetting\",\n        \"person\",\n        \"picklistvalueinfo\",\n        \"platformaction\",\n        \"platformcachepartition\",\n        \"platformcachepartitiontype\",\n        \"platformstatusalertevent\",\n        \"pricebook2history\",\n        \"processinstancehistory\",\n        \"product2changeevent\",\n        \"product2history\",\n        \"publisher\",\n        \"pushtopic\",\n        \"pushupgradeexcludedorg\",\n        \"quicktexthistory\",\n        \"quicktextshare\",\n        \"quotetemplaterichtextdata\",\n        \"recordaction\",\n        \"recordactionhistory\",\n        \"recordvisibility\",\n        \"relationshipdomain\",\n        \"relationshipinfo\",\n        \"reportevent\",\n        \"samlssoconfig\",\n        \"scontrol\",\n        \"searchactivity\",\n        \"searchlayout\",\n        \"searchpromotionrule\",\n        \"securitycustombaseline\",\n        \"servicereportlayout\",\n        \"sessionpermsetactivation\",\n        \"setupaudittrail\",\n        \"setupentityaccess\",\n        \"site\",\n        \"sitedetail\",\n        \"sitehistory\",\n        \"siteiframewhitelisturl\",\n        \"solutionhistory\",\n        \"sosdeployment\",\n        \"sossession\",\n        \"sossessionactivity\",\n        \"sossessionhistory\",\n        \"sossessionshare\",\n        \"staticresource\",\n        \"streamingchannel\",\n        \"streamingchannelshare\",\n        \"subscriberpackage\",\n        \"subscriberpackageversion\",\n        \"taskchangeevent\",\n        \"tenantusageentitlement\",\n        \"testsuitemembership\",\n        \"thirdpartyaccountlink\",\n        \"todaygoal\",\n        \"todaygoalshare\",\n        \"transactionsecuritypolicy\",\n        \"twofactorinfo\",\n        \"twofactormethodsinfo\",\n        \"twofactortempcode\",\n        \"urievent\",\n        \"userappinfo\",\n        \"userappmenucustomization\",\n        \"userappmenucustomizationshare\",\n        \"userappmenuitem\",\n        \"userchangeevent\",\n        \"useremailpreferredperson\",\n        \"useremailpreferredpersonshare\",\n        \"userentityaccess\",\n        \"userfieldaccess\",\n        \"userlicense\",\n        \"userlistview\",\n        \"userlistviewcriterion\",\n        \"userlogin\",\n        \"userpackagelicense\",\n        \"userpermissionaccess\",\n        \"userpreference\",\n        \"userprovaccount\",\n        \"userprovaccountstaging\",\n        \"userprovisioningconfig\",\n        \"userprovisioninglog\",\n        \"userprovisioningrequest\",\n        \"userprovisioningrequestshare\",\n        \"userprovmocktarget\",\n        \"userrecordaccess\",\n        \"usershare\",\n        \"verificationhistory\",\n        \"visibilitychangenotification\",\n        \"visualforceaccessmetrics\",\n        \"waveautoinstallrequest\",\n        \"wavecompatibilitycheckitem\",\n        \"weblink\",\n        \"workcoachinghistory\",\n        \"workcoachingshare\",\n        \"workfeedbackhistory\",\n        \"workfeedbackquestion\",\n        \"workfeedbackquestionhistory\",\n        \"workfeedbackquestionsethistory\",\n        \"workfeedbackquestionsetshare\",\n        \"workfeedbackquestionshare\",\n        \"workfeedbackrequesthistory\",\n        \"workfeedbackrequestshare\",\n        \"workfeedbackshare\",\n        \"workfeedbacktemplateshare\",\n        \"workperformancecyclehistory\",\n        \"workperformancecycleshare\",\n    ]\n)\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/connector.py",
    "content": "import csv\nimport gc\nimport json\nimport os\nimport sys\nimport tempfile\nimport time\nfrom collections import defaultdict\nfrom collections.abc import Callable\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.salesforce.doc_conversion import convert_sf_object_to_doc\nfrom onyx.connectors.salesforce.doc_conversion import convert_sf_query_result_to_doc\nfrom onyx.connectors.salesforce.doc_conversion import ID_PREFIX\nfrom onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce\nfrom onyx.connectors.salesforce.salesforce_calls import fetch_all_csvs_in_parallel\nfrom onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite\nfrom onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE\nfrom onyx.connectors.salesforce.utils import ID_FIELD\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\nfrom onyx.connectors.salesforce.utils import NAME_FIELD\nfrom onyx.connectors.salesforce.utils import USER_OBJECT_TYPE\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _convert_to_metadata_value(value: Any) -> str | list[str]:\n    \"\"\"Convert a Salesforce field value to a valid metadata value.\n\n    Document metadata expects str | list[str], but Salesforce returns\n    various types (bool, float, int, etc.). This function ensures all\n    values are properly converted to strings.\n    \"\"\"\n    if isinstance(value, list):\n        return [str(item) for item in value]\n    return str(value)\n\n\n_DEFAULT_PARENT_OBJECT_TYPES = [ACCOUNT_OBJECT_TYPE]\n\n_DEFAULT_ATTRIBUTES_TO_KEEP: dict[str, dict[str, str]] = {\n    \"Opportunity\": {\n        ACCOUNT_OBJECT_TYPE: \"account\",\n        \"FiscalQuarter\": \"fiscal_quarter\",\n        \"FiscalYear\": \"fiscal_year\",\n        \"IsClosed\": \"is_closed\",\n        NAME_FIELD: \"name\",\n        \"StageName\": \"stage_name\",\n        \"Type\": \"type\",\n        \"Amount\": \"amount\",\n        \"CloseDate\": \"close_date\",\n        \"Probability\": \"probability\",\n        \"CreatedDate\": \"created_date\",\n        MODIFIED_FIELD: \"last_modified_date\",\n    },\n    \"Contact\": {\n        ACCOUNT_OBJECT_TYPE: \"account\",\n        \"CreatedDate\": \"created_date\",\n        MODIFIED_FIELD: \"last_modified_date\",\n    },\n}\n\n\nclass SalesforceCheckpoint(ConnectorCheckpoint):\n    initial_sync_complete: bool\n    current_timestamp: SecondsSinceUnixEpoch\n\n\nclass SalesforceConnectorContext:\n    parent_types: set[str] = set()\n    child_types: set[str] = set()\n    parent_to_child_types: dict[str, set[str]] = {}  # map from parent to child types\n    child_to_parent_types: dict[str, set[str]] = {}  # map from child to parent types\n    parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}\n    type_to_queryable_fields: dict[str, set[str]] = {}\n    prefix_to_type: dict[str, str] = {}  # infer the object type of an id immediately\n\n    parent_to_child_relationships: dict[str, set[str]] = (\n        {}\n    )  # map from parent to child relationships\n    parent_to_relationship_queryable_fields: dict[str, dict[str, set[str]]] = (\n        {}\n    )  # map from relationship to queryable fields\n\n    parent_child_names_to_relationships: dict[str, str] = {}\n\n\ndef _extract_fields_and_associations_from_config(\n    config: dict[str, Any], object_type: str\n) -> tuple[list[str] | None, dict[str, list[str]]]:\n    \"\"\"\n    Extract fields and associations for a specific object type from custom config.\n\n    Returns:\n        tuple of (fields_list, associations_dict)\n        - fields_list: List of fields to query, or None if not specified (use all)\n        - associations_dict: Dict mapping association names to their config\n    \"\"\"\n    if object_type not in config:\n        return None, {}\n\n    obj_config = config[object_type]\n    fields = obj_config.get(\"fields\")\n    associations = obj_config.get(\"associations\", {})\n\n    return fields, associations\n\n\ndef _validate_custom_query_config(config: dict[str, Any]) -> None:\n    \"\"\"\n    Validate the structure of the custom query configuration.\n    \"\"\"\n\n    for object_type, obj_config in config.items():\n        if not isinstance(obj_config, dict):\n            raise ValueError(\n                f\"top level object {object_type} must be mapped to a dictionary\"\n            )\n\n        # Check if fields is a list when present\n        if \"fields\" in obj_config:\n            if not isinstance(obj_config[\"fields\"], list):\n                raise ValueError(\"if fields key exists, value must be a list\")\n            for v in obj_config[\"fields\"]:\n                if not isinstance(v, str):\n                    raise ValueError(f\"if fields list value {v} is not a string\")\n\n        # Check if associations is a dict when present\n        if \"associations\" in obj_config:\n            if not isinstance(obj_config[\"associations\"], dict):\n                raise ValueError(\n                    \"if associations key exists, value must be a dictionary\"\n                )\n            for assoc_name, assoc_fields in obj_config[\"associations\"].items():\n                if not isinstance(assoc_fields, list):\n                    raise ValueError(\n                        f\"associations list value {assoc_fields} for key {assoc_name} is not a list\"\n                    )\n                for v in assoc_fields:\n                    if not isinstance(v, str):\n                        raise ValueError(\n                            f\"if associations list value {v} is not a string\"\n                        )\n\n\nclass SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):\n    \"\"\"Approach outline\n\n    Goal\n    - get data for every record of every parent object type\n    - The data should consist of the parent object record and all direct child relationship objects\n\n\n    Initial sync\n    - Does a full sync, then indexes each parent object + children as a document via\n    the local sqlite db\n\n    - get the first level children object types of parent object types\n    - bulk export all object types to CSV\n    -- NOTE: bulk exports of an object type contain parent id's, but not child id's\n    - Load all CSV's to the DB\n    - generate all parent object types as documents and yield them\n\n    - Initial sync's must always be for the entire dataset.\n      Otherwise, you can have cases where some records relate to other records that were\n      updated recently. The more recently updated records will not be pulled down in the query.\n\n    Delta sync's\n    - delta sync's detect changes in parent objects, then perform a full sync of\n    each parent object and its children\n\n    If loading the entire db, this approach is much slower. For deltas, it works well.\n\n    - query all changed records (includes children and parents)\n    - extrapolate all changed parent objects\n    - for each parent object, construct a query and yield the result back\n\n    - Delta sync's can be done object by object by identifying the parent id of any changed\n      record, and querying a single record at a time to get all the updated data.  In this way,\n      we avoid having to keep a locally synchronized copy of the entire salesforce db.\n\n    TODO: verify record to doc conversion\n    figure out why sometimes the field names are missing.\n    \"\"\"\n\n    MAX_BATCH_BYTES = 1024 * 1024\n    LOG_INTERVAL = 10.0  # how often to log stats in loop heavy parts of the connector\n\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        requested_objects: list[str] = [],\n        custom_query_config: str | None = None,\n    ) -> None:\n        self.batch_size = batch_size\n        self._sf_client: OnyxSalesforce | None = None\n\n        # Validate and store custom query config\n        if custom_query_config:\n            config_json = json.loads(custom_query_config)\n            self.custom_query_config: dict[str, Any] | None = config_json\n            # If custom query config is provided, use the object types from it\n            self.parent_object_list = list(config_json.keys())\n        else:\n            self.custom_query_config = None\n            # Use the traditional requested_objects approach\n            self.parent_object_list = (\n                [obj.strip().capitalize() for obj in requested_objects]\n                if requested_objects\n                else _DEFAULT_PARENT_OBJECT_TYPES\n            )\n\n    def load_credentials(\n        self,\n        credentials: dict[str, Any],\n    ) -> dict[str, Any] | None:\n        domain = \"test\" if credentials.get(\"is_sandbox\") else None\n        self._sf_client = OnyxSalesforce(\n            username=credentials[\"sf_username\"],\n            password=credentials[\"sf_password\"],\n            security_token=credentials[\"sf_security_token\"],\n            domain=domain,\n        )\n        return None\n\n    @property\n    def sf_client(self) -> OnyxSalesforce:\n        if self._sf_client is None:\n            raise ConnectorMissingCredentialError(\"Salesforce\")\n        return self._sf_client\n\n    @staticmethod\n    def reconstruct_object_types(directory: str) -> dict[str, list[str] | None]:\n        \"\"\"\n        Scans the given directory for all CSV files and reconstructs the available object types.\n        Assumes filenames are formatted as \"ObjectType.filename.csv\" or \"ObjectType.csv\".\n\n        Args:\n            directory (str): The path to the directory containing CSV files.\n\n        Returns:\n            dict[str, list[str]]: A dictionary mapping object types to lists of file paths.\n        \"\"\"\n        object_types = defaultdict(list)\n\n        for filename in os.listdir(directory):\n            if filename.endswith(\".csv\"):\n                parts = filename.split(\".\", 1)  # Split on the first period\n                object_type = parts[0]  # Take the first part as the object type\n                object_types[object_type].append(os.path.join(directory, filename))\n\n        return dict(object_types)\n\n    @staticmethod\n    def _download_object_csvs(\n        all_types_to_filter: dict[str, bool],\n        queryable_fields_by_type: dict[str, set[str]],\n        directory: str,\n        sf_client: OnyxSalesforce,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> None:\n        # checkpoint - we've found all object types, now time to fetch the data\n        logger.info(\"Fetching CSVs for all object types\")\n\n        # This takes like 30 minutes first time and <2 minutes for updates\n        object_type_to_csv_path = fetch_all_csvs_in_parallel(\n            sf_client=sf_client,\n            all_types_to_filter=all_types_to_filter,\n            queryable_fields_by_type=queryable_fields_by_type,\n            start=start,\n            end=end,\n            target_dir=directory,\n        )\n\n        # print useful information\n        num_csvs = 0\n        num_bytes = 0\n        for object_type, csv_paths in object_type_to_csv_path.items():\n            if not csv_paths:\n                continue\n\n            for csv_path in csv_paths:\n                if not csv_path:\n                    continue\n\n                file_path = Path(csv_path)\n                file_size = file_path.stat().st_size\n                num_csvs += 1\n                num_bytes += file_size\n                logger.info(\n                    f\"CSV download: object_type={object_type} path={csv_path} bytes={file_size}\"\n                )\n\n        logger.info(\n            f\"CSV download total: total_csvs={num_csvs} total_bytes={num_bytes}\"\n        )\n\n    @staticmethod\n    def _load_csvs_to_db(\n        csv_directory: str, remove_ids: bool, sf_db: OnyxSalesforceSQLite\n    ) -> dict[str, str]:\n        \"\"\"\n        Returns a dict of id to object type. Each id is a newly seen row in salesforce.\n        \"\"\"\n\n        updated_ids: dict[str, str] = {}\n\n        object_type_to_csv_path = SalesforceConnector.reconstruct_object_types(\n            csv_directory\n        )\n\n        # NOTE(rkuo): this timing note is meaningless without a reference point in terms\n        # of number of records, etc\n        # This takes like 10 seconds\n\n        # This is for testing the rest of the functionality if data has\n        # already been fetched and put in sqlite\n        # from import onyx.connectors.salesforce.sf_db.sqlite_functions find_ids_by_type\n        # for object_type in self.parent_object_list:\n        #     updated_ids.update(list(find_ids_by_type(object_type)))\n\n        # This takes 10-70 minutes first time (idk why the range is so big)\n        total_types = len(object_type_to_csv_path)\n        logger.info(f\"Starting to process {total_types} object types\")\n\n        for i, (object_type, csv_paths) in enumerate(\n            object_type_to_csv_path.items(), 1\n        ):\n            logger.info(f\"Processing object type {object_type} ({i}/{total_types})\")\n            # If path is None, it means it failed to fetch the csv\n            if csv_paths is None:\n                continue\n\n            # Go through each csv path and use it to update the db\n            for csv_path in csv_paths:\n                num_records = 0\n\n                logger.debug(\n                    f\"Processing CSV: object_type={object_type} \"\n                    f\"csv={csv_path} \"\n                    f\"len={Path(csv_path).stat().st_size} \"\n                    f\"records={num_records}\"\n                )\n\n                with open(csv_path, \"r\", newline=\"\", encoding=\"utf-8\") as f:\n                    reader = csv.DictReader(f)\n                    for row in reader:\n                        num_records += 1\n\n                new_ids = sf_db.update_from_csv(\n                    object_type=object_type,\n                    csv_download_path=csv_path,\n                    remove_ids=remove_ids,\n                )\n                for new_id in new_ids:\n                    updated_ids[new_id] = object_type\n\n                sf_db.flush()\n\n                logger.debug(\n                    f\"Added {len(new_ids)} new/updated records for {object_type}\"\n                )\n\n                logger.info(\n                    f\"Processed CSV: object_type={object_type} \"\n                    f\"csv={csv_path} \"\n                    f\"len={Path(csv_path).stat().st_size} \"\n                    f\"records={num_records} \"\n                    f\"db_len={sf_db.file_size}\"\n                )\n                os.remove(csv_path)\n\n        return updated_ids\n\n    # @staticmethod\n    # def _get_child_types(\n    #     parent_types: list[str], sf_client: OnyxSalesforce\n    # ) -> set[str]:\n    #     all_types: set[str] = set(parent_types)\n\n    #     # Step 1 - get all object types\n    #     logger.info(f\"Parent object types: num={len(parent_types)} list={parent_types}\")\n\n    #     # This takes like 20 seconds\n    #     for parent_object_type in parent_types:\n    #         child_types = sf_client.get_children_of_sf_type(parent_object_type)\n    #         logger.debug(\n    #             f\"Found {len(child_types)} child types for {parent_object_type}\"\n    #         )\n\n    #         all_types.update(child_types.keys())\n\n    #     # Always want to make sure user is grabbed for permissioning purposes\n    #     all_types.add(USER_OBJECT_TYPE)\n    #     # Always want to make sure account is grabbed for reference purposes\n    #     all_types.add(ACCOUNT_OBJECT_TYPE)\n\n    #     logger.info(f\"All object types: num={len(all_types)} list={all_types}\")\n\n    #     # gc.collect()\n    #     return all_types\n\n    # @staticmethod\n    # def _get_all_types(parent_types: list[str], sf_client: Salesforce) -> set[str]:\n    #     all_types: set[str] = set(parent_types)\n\n    #     # Step 1 - get all object types\n    #     logger.info(f\"Parent object types: num={len(parent_types)} list={parent_types}\")\n\n    #     # This takes like 20 seconds\n    #     for parent_object_type in parent_types:\n    #         child_types = get_children_of_sf_type(sf_client, parent_object_type)\n    #         logger.debug(\n    #             f\"Found {len(child_types)} child types for {parent_object_type}\"\n    #         )\n\n    #         all_types.update(child_types)\n\n    #     # Always want to make sure user is grabbed for permissioning purposes\n    #     all_types.add(USER_OBJECT_TYPE)\n\n    #     logger.info(f\"All object types: num={len(all_types)} list={all_types}\")\n\n    #     # gc.collect()\n    #     return all_types\n\n    def _yield_doc_batches(\n        self,\n        sf_db: OnyxSalesforceSQLite,\n        type_to_processed: dict[str, int],\n        changed_ids_to_type: dict[str, str],\n        parent_types: set[str],\n        increment_parents_changed: Callable[[], None],\n    ) -> GenerateDocumentsOutput:\n        \"\"\" \"\"\"\n        docs_to_yield: list[Document | HierarchyNode] = []\n        docs_to_yield_bytes = 0\n\n        last_log_time = 0.0\n\n        for (\n            parent_type,\n            parent_id,\n            examined_ids,\n        ) in sf_db.get_changed_parent_ids_by_type(\n            changed_ids=list(changed_ids_to_type.keys()),\n            parent_types=parent_types,\n        ):\n            now = time.monotonic()\n\n            processed = examined_ids - 1\n            if now - last_log_time > SalesforceConnector.LOG_INTERVAL:\n                logger.info(\n                    f\"Processing stats: {type_to_processed} \"\n                    f\"file_size={sf_db.file_size} \"\n                    f\"processed={processed} \"\n                    f\"remaining={len(changed_ids_to_type) - processed}\"\n                )\n                last_log_time = now\n\n            type_to_processed[parent_type] = type_to_processed.get(parent_type, 0) + 1\n\n            parent_object = sf_db.get_record(parent_id, parent_type)\n            if not parent_object:\n                logger.warning(\n                    f\"Failed to get parent object {parent_id} for {parent_type}\"\n                )\n                continue\n\n            # use the db to create a document we can yield\n            doc = convert_sf_object_to_doc(\n                sf_db,\n                sf_object=parent_object,\n                sf_instance=self.sf_client.sf_instance,\n            )\n\n            doc.metadata[\"object_type\"] = parent_type\n\n            # Add default attributes to the metadata\n            for (\n                sf_attribute,\n                canonical_attribute,\n            ) in _DEFAULT_ATTRIBUTES_TO_KEEP.get(parent_type, {}).items():\n                if sf_attribute in parent_object.data:\n                    doc.metadata[canonical_attribute] = _convert_to_metadata_value(\n                        parent_object.data[sf_attribute]\n                    )\n\n            doc_sizeof = sys.getsizeof(doc)\n            docs_to_yield_bytes += doc_sizeof\n            docs_to_yield.append(doc)\n            increment_parents_changed()\n\n            # memory usage is sensitive to the input length, so we're yielding immediately\n            # if the batch exceeds a certain byte length\n            if (\n                len(docs_to_yield) >= self.batch_size\n                or docs_to_yield_bytes > SalesforceConnector.MAX_BATCH_BYTES\n            ):\n                yield docs_to_yield\n                docs_to_yield = []\n                docs_to_yield_bytes = 0\n\n                # observed a memory leak / size issue with the account table if we don't gc.collect here.\n                gc.collect()\n\n        yield docs_to_yield\n\n    def _full_sync(\n        self,\n        temp_dir: str,\n    ) -> GenerateDocumentsOutput:\n        type_to_processed: dict[str, int] = {}\n\n        logger.info(\"_fetch_from_salesforce starting (full sync).\")\n        if not self._sf_client:\n            raise RuntimeError(\"self._sf_client is None!\")\n\n        changed_ids_to_type: dict[str, str] = {}\n        parents_changed = 0\n        examined_ids = 0\n\n        sf_db = OnyxSalesforceSQLite(os.path.join(temp_dir, \"salesforce_db.sqlite\"))\n        sf_db.connect()\n\n        try:\n            sf_db.apply_schema()\n            sf_db.log_stats()\n\n            ctx = self._make_context(\n                None, None, temp_dir, self.parent_object_list, self._sf_client\n            )\n            gc.collect()\n\n            # Step 2 - load CSV's to sqlite\n            object_type_to_csv_paths = SalesforceConnector.reconstruct_object_types(\n                temp_dir\n            )\n\n            total_types = len(object_type_to_csv_paths)\n            logger.info(f\"Starting to process {total_types} object types\")\n\n            for i, (object_type, csv_paths) in enumerate(\n                object_type_to_csv_paths.items(), 1\n            ):\n                logger.info(f\"Processing object type {object_type} ({i}/{total_types})\")\n                # If path is None, it means it failed to fetch the csv\n                if csv_paths is None:\n                    continue\n\n                # Go through each csv path and use it to update the db\n                for csv_path in csv_paths:\n                    num_records = 0\n                    with open(csv_path, \"r\", newline=\"\", encoding=\"utf-8\") as f:\n                        reader = csv.DictReader(f)\n                        for row in reader:\n                            num_records += 1\n\n                    logger.debug(\n                        f\"Processing CSV: object_type={object_type} \"\n                        f\"csv={csv_path} \"\n                        f\"len={Path(csv_path).stat().st_size} \"\n                        f\"records={num_records}\"\n                    )\n\n                    new_ids = sf_db.update_from_csv(\n                        object_type=object_type,\n                        csv_download_path=csv_path,\n                    )\n                    for new_id in new_ids:\n                        changed_ids_to_type[new_id] = object_type\n\n                    sf_db.flush()\n\n                    logger.debug(\n                        f\"Added {len(new_ids)} new/updated records for {object_type}\"\n                    )\n\n                    logger.info(\n                        f\"Processed CSV: object_type={object_type} \"\n                        f\"csv={csv_path} \"\n                        f\"len={Path(csv_path).stat().st_size} \"\n                        f\"records={num_records} \"\n                        f\"db_len={sf_db.file_size}\"\n                    )\n\n                    os.remove(csv_path)\n                    gc.collect()\n\n            gc.collect()\n\n            logger.info(f\"Found {len(changed_ids_to_type)} total updated records\")\n            logger.info(\n                f\"Starting to process parent objects of types: {ctx.parent_types}\"\n            )\n\n            # Step 3 - extract and index docs\n            def increment_parents_changed() -> None:\n                nonlocal parents_changed\n                parents_changed += 1\n\n            yield from self._yield_doc_batches(\n                sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                ctx.parent_types,\n                increment_parents_changed,\n            )\n        except Exception:\n            logger.exception(\"Unexpected exception\")\n            raise\n        finally:\n            logger.info(\n                f\"Final processing stats: \"\n                f\"examined={examined_ids} \"\n                f\"parents_changed={parents_changed} \"\n                f\"remaining={len(changed_ids_to_type) - examined_ids}\"\n            )\n\n            logger.info(f\"Top level object types processed: {type_to_processed}\")\n\n            sf_db.close()\n\n    def _delta_sync(\n        self,\n        temp_dir: str,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n    ) -> GenerateDocumentsOutput:\n        type_to_processed: dict[str, int] = {}\n\n        logger.info(\"_fetch_from_salesforce starting (delta sync).\")\n        if not self._sf_client:\n            raise RuntimeError(\"self._sf_client is None!\")\n\n        changed_ids_to_type: dict[str, str] = {}\n        parents_changed = 0\n        processed = 0\n\n        sf_db = OnyxSalesforceSQLite(os.path.join(temp_dir, \"salesforce_db.sqlite\"))\n        sf_db.connect()\n\n        try:\n            sf_db.apply_schema()\n            sf_db.log_stats()\n\n            ctx = self._make_context(\n                start, end, temp_dir, self.parent_object_list, self._sf_client\n            )\n            gc.collect()\n\n            # Step 2 - load CSV's to sqlite\n            changed_ids_to_type = SalesforceConnector._load_csvs_to_db(\n                temp_dir, False, sf_db\n            )\n            gc.collect()\n\n            logger.info(f\"Found {len(changed_ids_to_type)} total updated records\")\n            logger.info(\n                f\"Starting to process parent objects of types: {ctx.parent_types}\"\n            )\n\n            # Step 3 - extract and index docs\n            docs_to_yield: list[Document | HierarchyNode] = []\n            docs_to_yield_bytes = 0\n\n            last_log_time = 0.0\n\n            # this is a partial sync, so all changed parent id's must be retrieved from salesforce\n            # NOTE: it may be an option to identify the object type of an id with its prefix\n            # but unfortunately it's possible for an object type to not have a prefix.\n            # so that would work in many important cases, but not all.\n            for (\n                parent_id,\n                actual_parent_type,\n                num_examined,\n            ) in sf_db.get_changed_parent_ids_by_type_2(\n                changed_ids=changed_ids_to_type,\n                parent_types=ctx.parent_types,\n                parent_relationship_fields_by_type=ctx.parent_reference_fields_by_type,\n                prefix_to_type=ctx.prefix_to_type,\n            ):\n                # this yields back each changed parent record, where changed means\n                # the parent record itself or a child record was updated.\n                now = time.monotonic()\n\n                # query salesforce for the changed parent id record\n                # NOTE(rkuo): we only know the record id and its possible types,\n                # so we actually need to check each type until we succeed\n                # to be entirely correct\n                # this may be a source of inefficiency and thinking about\n                # caching the most likely parent record type might be helpful\n\n                # actual_parent_type: str | None = None\n                # for possible_parent_type in possible_parent_types:\n                #     queryable_fields = ctx.queryable_fields_by_type[\n                #         possible_parent_type\n                #     ]\n                #     query = _get_object_by_id_query(\n                #         parent_id, possible_parent_type, queryable_fields\n                #     )\n                #     result = self._sf_client.query(query)\n                #     if result:\n                #         actual_parent_type = possible_parent_type\n                #         print(result)\n                #         break\n\n                # get the parent record fields\n                record = self._sf_client.query_object(\n                    actual_parent_type, parent_id, ctx.type_to_queryable_fields\n                )\n                if not record:\n                    continue\n\n                # queryable_fields = ctx.type_to_queryable_fields[\n                #     actual_parent_type\n                # ]\n                # query = get_object_by_id_query(\n                #     parent_id, actual_parent_type, queryable_fields\n                # )\n                # result = self._sf_client.query(query)\n                # if not result:\n                #     continue\n\n                # # print(result)\n                # record: dict[str, Any] = {}\n\n                # record_0 = result[\"records\"][0]\n                # for record_key, record_value in record_0.items():\n                #     if record_key == \"attributes\":\n                #         continue\n\n                #     record[record_key] = record_value\n\n                # for this parent type, increment the counter on the stats object\n                type_to_processed[actual_parent_type] = (\n                    type_to_processed.get(actual_parent_type, 0) + 1\n                )\n\n                # get the child records\n                child_relationships = ctx.parent_to_child_relationships[\n                    actual_parent_type\n                ]\n                relationship_to_queryable_fields = (\n                    ctx.parent_to_relationship_queryable_fields[actual_parent_type]\n                )\n                child_records = self.sf_client.get_child_objects_by_id(\n                    parent_id,\n                    actual_parent_type,\n                    list(child_relationships),\n                    relationship_to_queryable_fields,\n                )\n\n                # NOTE(rkuo): does using the parent last modified make sense if the update\n                # is being triggered because a child object changed?\n                primary_owner_list: list[BasicExpertInfo] | None = None\n                if \"LastModifiedById\" in record:\n                    try:\n                        last_modified_by_id = record[\"LastModifiedById\"]\n                        user_record = self.sf_client.query_object(\n                            USER_OBJECT_TYPE,\n                            last_modified_by_id,\n                            ctx.type_to_queryable_fields,\n                        )\n                        if user_record:\n                            primary_owner = BasicExpertInfo.from_dict(user_record)\n                            primary_owner_list = [primary_owner]\n                    except Exception:\n                        pass\n\n                # for child_record_key, child_record in child_records.items():\n                #     if not child_record:\n                #         continue\n\n                #     child_text_section = _extract_section(\n                #         child_record,\n                #         f\"https://{self._sf_client.sf_instance}/{child_record_key}\",\n                #     )\n                #     sections.append(child_text_section)\n\n                # for parent_relationship_field in parent_relationship_fields:\n                #     parent_relationship_id\n                # json.loads(parent_object.data)\n\n                # create and yield a document from the salesforce query\n                doc = convert_sf_query_result_to_doc(\n                    parent_id,\n                    record,\n                    child_records,\n                    primary_owner_list,\n                    self._sf_client,\n                )\n\n                # doc = Document(\n                #     id=ID_PREFIX + parent_id,\n                #     sections=cast(list[TextSection | ImageSection], sections),\n                #     source=DocumentSource.SALESFORCE,\n                #     semantic_identifier=parent_semantic_identifier,\n                #     doc_updated_at=time_str_to_utc(parent_last_modified_date),\n                #     primary_owners=primary_owner_list,\n                #     metadata={},\n                # )\n\n                # Add default attributes to the metadata\n                for (\n                    sf_attribute,\n                    canonical_attribute,\n                ) in _DEFAULT_ATTRIBUTES_TO_KEEP.get(actual_parent_type, {}).items():\n                    if sf_attribute in record:\n                        doc.metadata[canonical_attribute] = _convert_to_metadata_value(\n                            record[sf_attribute]\n                        )\n\n                doc_sizeof = sys.getsizeof(doc)\n                docs_to_yield_bytes += doc_sizeof\n                docs_to_yield.append(doc)\n                parents_changed += 1\n\n                # memory usage is sensitive to the input length, so we're yielding immediately\n                # if the batch exceeds a certain byte length\n                if (\n                    len(docs_to_yield) >= self.batch_size\n                    or docs_to_yield_bytes > SalesforceConnector.MAX_BATCH_BYTES\n                ):\n                    yield docs_to_yield\n                    docs_to_yield = []\n                    docs_to_yield_bytes = 0\n\n                    # observed a memory leak / size issue with the account table if we don't gc.collect here.\n                    gc.collect()\n\n                processed = num_examined\n                if now - last_log_time > SalesforceConnector.LOG_INTERVAL:\n                    logger.info(\n                        f\"Processing stats: {type_to_processed} \"\n                        f\"processed={processed} \"\n                        f\"remaining={len(changed_ids_to_type) - processed}\"\n                    )\n                    last_log_time = now\n\n            yield docs_to_yield\n        except Exception:\n            logger.exception(\"Unexpected exception\")\n            raise\n        finally:\n            logger.info(\n                f\"Final processing stats: \"\n                f\"processed={processed} \"\n                f\"remaining={len(changed_ids_to_type) - processed} \"\n                f\"parents_changed={parents_changed}\"\n            )\n\n            logger.info(f\"Top level object types processed: {type_to_processed}\")\n\n            sf_db.close()\n\n    def _make_context(\n        self,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n        temp_dir: str,\n        parent_object_list: list[str],\n        sf_client: OnyxSalesforce,\n    ) -> SalesforceConnectorContext:\n        \"\"\"NOTE: I suspect we're doing way too many queries here. Likely fewer queries\n        and just parsing all the info we need in less passes will work.\"\"\"\n\n        parent_types = set(parent_object_list)\n        child_types: set[str] = set()\n        parent_to_child_types: dict[str, set[str]] = (\n            {}\n        )  # map from parent to child types\n        child_to_parent_types: dict[str, set[str]] = (\n            {}\n        )  # map from child to parent types\n\n        parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = (\n            {}\n        )  # for a given object, the fields reference parent objects\n        type_to_queryable_fields: dict[str, set[str]] = {}\n        prefix_to_type: dict[str, str] = {}\n\n        parent_to_child_relationships: dict[str, set[str]] = (\n            {}\n        )  # map from parent to child relationships\n\n        # relationship keys are formatted as \"parent__relationship\"\n        # we have to do this because relationship names are not unique!\n        # values are a dict of relationship names to a list of queryable fields\n        parent_to_relationship_queryable_fields: dict[str, dict[str, set[str]]] = {}\n\n        parent_child_names_to_relationships: dict[str, str] = {}\n\n        full_sync = start is None and end is None\n\n        # Step 1 - make a list of all the types to download (parent + direct child + USER_OBJECT_TYPE)\n        # prefixes = {}\n\n        global_description = sf_client.describe()\n        if not global_description:\n            raise RuntimeError(\"sf_client.describe failed\")\n\n        for sobject in global_description[\"sobjects\"]:\n            if sobject[\"keyPrefix\"]:\n                prefix_to_type[sobject[\"keyPrefix\"]] = sobject[\"name\"]\n                # prefixes[sobject['keyPrefix']] = {\n                #     'object_name': sobject['name'],\n                #     'label': sobject['label'],\n                #     'is_custom': sobject['custom']\n                # }\n\n        logger.info(f\"Describe: num_prefixes={len(prefix_to_type)}\")\n\n        logger.info(f\"Parent object types: num={len(parent_types)} list={parent_types}\")\n        for parent_type in parent_types:\n            # parent_onyx_sf_type = OnyxSalesforceType(parent_type, sf_client)\n\n            custom_fields: list[str] | None = []\n            associations_config: dict[str, list[str]] | None = None\n\n            # Set queryable fields for parent type\n            if self.custom_query_config:\n                custom_fields, associations_config = (\n                    _extract_fields_and_associations_from_config(\n                        self.custom_query_config, parent_type\n                    )\n                )\n                custom_fields = custom_fields or []\n\n                # Get custom fields for parent type\n                field_set = set(custom_fields)\n                # used during doc conversion\n                # field_set.add(NAME_FIELD) # does not always exist\n                field_set.add(ID_FIELD)\n                field_set.add(MODIFIED_FIELD)\n\n                # Use only the specified fields\n                type_to_queryable_fields[parent_type] = field_set\n                logger.info(f\"Using custom fields for {parent_type}: {field_set}\")\n            else:\n                # Use all queryable fields\n                type_to_queryable_fields[parent_type] = (\n                    sf_client.get_queryable_fields_by_type(parent_type)\n                )\n                logger.info(f\"Using all fields for {parent_type}\")\n\n            child_types_all = sf_client.get_children_of_sf_type(parent_type)\n            logger.debug(f\"Found {len(child_types_all)} child types for {parent_type}\")\n            logger.debug(f\"child types: {child_types_all}\")\n\n            child_types_working = child_types_all.copy()\n            if associations_config is not None:\n                child_types_working = {\n                    k: v for k, v in child_types_all.items() if k in associations_config\n                }\n                any_not_found = False\n                for k in associations_config:\n                    if k not in child_types_working:\n                        any_not_found = True\n                        logger.warning(f\"Association {k} not found in {parent_type}\")\n                if any_not_found:\n                    queryable_fields = sf_client.get_queryable_fields_by_type(\n                        parent_type\n                    )\n                    raise RuntimeError(\n                        f\"Associations {associations_config} not found in {parent_type} \"\n                        \"make sure your parent-child associations are in the right order\"\n                        # f\"with child objects {child_types_all}\"\n                        # f\" and fields {queryable_fields}\"\n                    )\n\n            parent_to_child_relationships[parent_type] = set()\n            parent_to_child_types[parent_type] = set()\n            parent_to_relationship_queryable_fields[parent_type] = {}\n\n            for child_type, child_relationship in child_types_working.items():\n                child_type = cast(str, child_type)\n\n                # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)\n\n                # map parent name to child name\n                parent_to_child_types[parent_type].add(child_type)\n\n                # reverse map child name to parent name\n                if child_type not in child_to_parent_types:\n                    child_to_parent_types[child_type] = set()\n                child_to_parent_types[child_type].add(parent_type)\n\n                # map parent name to child relationship\n                parent_to_child_relationships[parent_type].add(child_relationship)\n\n                # map relationship to queryable fields of the target table\n                if config_fields := (\n                    associations_config and associations_config.get(child_type)\n                ):\n                    field_set = set(config_fields)\n                    # these are expected and used during doc conversion\n                    # field_set.add(NAME_FIELD) # does not always exist\n                    field_set.add(ID_FIELD)\n                    field_set.add(MODIFIED_FIELD)\n                    queryable_fields = field_set\n                else:\n                    queryable_fields = sf_client.get_queryable_fields_by_type(\n                        child_type\n                    )\n\n                if child_relationship in parent_to_relationship_queryable_fields:\n                    raise RuntimeError(f\"{child_relationship=} already exists\")\n\n                parent_to_relationship_queryable_fields[parent_type][\n                    child_relationship\n                ] = queryable_fields\n\n                type_to_queryable_fields[child_type] = queryable_fields\n\n                parent_child_names_to_relationships[f\"{parent_type}__{child_type}\"] = (\n                    child_relationship\n                )\n\n            child_types.update(child_types_working.keys())\n            logger.info(\n                f\"Child object types: parent={parent_type} num={len(child_types_working)} list={child_types_working.keys()}\"\n            )\n\n        logger.info(\n            f\"Final child object types: num={len(child_types)} list={child_types}\"\n        )\n\n        all_types: set[str] = set(parent_types)\n        all_types.update(child_types)\n\n        # NOTE(rkuo): should this be an implicit parent type?\n        all_types.add(USER_OBJECT_TYPE)  # Always add User for permissioning purposes\n        all_types.add(ACCOUNT_OBJECT_TYPE)  # Always add Account for reference purposes\n\n        logger.info(f\"All object types: num={len(all_types)} list={all_types}\")\n\n        # Ensure User and Account have queryable fields if they weren't already processed\n        essential_types = [USER_OBJECT_TYPE, ACCOUNT_OBJECT_TYPE]\n        for essential_type in essential_types:\n            if essential_type not in type_to_queryable_fields:\n                type_to_queryable_fields[essential_type] = (\n                    sf_client.get_queryable_fields_by_type(essential_type)\n                )\n\n        # 1.1 - Detect all fields in child types which reference a parent type.\n        # build dicts to detect relationships between parent and child\n        for child_type in child_types.union(essential_types):\n            # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)\n            parent_reference_fields = sf_client.get_parent_reference_fields(\n                child_type, parent_types\n            )\n\n            parent_reference_fields_by_type[child_type] = parent_reference_fields\n\n        # Only add time filter if there is at least one object of the type\n        # in the database. We aren't worried about partially completed object update runs\n        # because this occurs after we check for existing csvs which covers this case\n        # NOTE(rkuo):\n        all_types_to_filter: dict[str, bool] = {}\n        for sf_type in all_types:\n            # onyx_sf_type = OnyxSalesforceType(sf_type, sf_client)\n\n            # NOTE(rkuo): I'm not convinced it makes sense to restrict filtering at all\n            # all_types_to_filter[sf_type] = sf_db.object_type_count(sf_type) > 0\n            all_types_to_filter[sf_type] = not full_sync\n\n        # Step 1.2 - bulk download the CSV's for each object type\n        SalesforceConnector._download_object_csvs(\n            all_types_to_filter,\n            type_to_queryable_fields,\n            temp_dir,\n            sf_client,\n            start,\n            end,\n        )\n\n        return_context = SalesforceConnectorContext()\n        return_context.parent_types = parent_types\n        return_context.child_types = child_types\n        return_context.parent_to_child_types = parent_to_child_types\n        return_context.child_to_parent_types = child_to_parent_types\n        return_context.parent_reference_fields_by_type = parent_reference_fields_by_type\n        return_context.type_to_queryable_fields = type_to_queryable_fields\n        return_context.prefix_to_type = prefix_to_type\n\n        return_context.parent_to_child_relationships = parent_to_child_relationships\n        return_context.parent_to_relationship_queryable_fields = (\n            parent_to_relationship_queryable_fields\n        )\n\n        return_context.parent_child_names_to_relationships = (\n            parent_child_names_to_relationships\n        )\n\n        return return_context\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        # Always use a temp directory for SQLite - the database is rebuilt\n        # from scratch each time via CSV downloads, so there's no caching benefit\n        # from persisting it. Using temp dirs also avoids collisions between\n        # multiple CC pairs and eliminates stale WAL/SHM file issues.\n        # TODO(evan): make this thing checkpointed and persist/load db from filestore\n        with tempfile.TemporaryDirectory() as temp_dir:\n            yield from self._full_sync(temp_dir)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        \"\"\"Poll source will synchronize updated parent objects one by one.\"\"\"\n        # Always use a temp directory - see comment in load_from_state()\n        with tempfile.TemporaryDirectory() as temp_dir:\n            yield from self._delta_sync(temp_dir, start, end)\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        doc_metadata_list: list[SlimDocument | HierarchyNode] = []\n        for parent_object_type in self.parent_object_list:\n            query = f\"SELECT Id FROM {parent_object_type}\"\n            query_result = self.sf_client.safe_query_all(query)\n            doc_metadata_list.extend(\n                SlimDocument(\n                    id=f\"{ID_PREFIX}{instance_dict.get('Id', '')}\",\n                    external_access=None,\n                )\n                for instance_dict in query_result[\"records\"]\n            )\n\n        yield doc_metadata_list\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Validate that the Salesforce credentials and connector settings are correct.\n        Specifically checks that we can make an authenticated request to Salesforce.\n        \"\"\"\n\n        try:\n            # Attempt to fetch a small batch of objects (arbitrary endpoint) to verify credentials\n            self.sf_client.describe()\n        except Exception as e:\n            raise ConnectorMissingCredentialError(\n                f\"Failed to validate Salesforce credentials. Please check yourcredentials and try again. Error: {e}\"\n            )\n\n        if self.custom_query_config:\n            try:\n                _validate_custom_query_config(self.custom_query_config)\n            except Exception as e:\n                raise ConnectorMissingCredentialError(\n                    f\"Failed to validate Salesforce custom query config. Please check yourconfig and try again. Error: {e}\"\n                )\n\n        logger.info(\"Salesforce credentials validated successfully.\")\n\n    # @override\n    # def load_from_checkpoint(\n    #     self,\n    #     start: SecondsSinceUnixEpoch,\n    #     end: SecondsSinceUnixEpoch,\n    #     checkpoint: SalesforceCheckpoint,\n    # ) -> CheckpointOutput[SalesforceCheckpoint]:\n    #     try:\n    #         return self._fetch_document_batches(checkpoint, start, end)\n    #     except Exception as e:\n    #         if _should_propagate_error(e) and start is not None:\n    #             logger.warning(\n    #                 \"Confluence says we provided an invalid 'updated' field. This may indicate\"\n    #                 \"a real issue, but can also appear during edge cases like daylight\"\n    #                 f\"savings time changes. Retrying with a 1 hour offset. Error: {e}\"\n    #             )\n    #             return self._fetch_document_batches(checkpoint, start - ONE_HOUR, end)\n    #         raise\n\n    # @override\n    # def build_dummy_checkpoint(self) -> SalesforceCheckpoint:\n    #     return SalesforceCheckpoint(last_updated=0, has_more=True, last_seen_doc_ids=[])\n\n    # @override\n    # def validate_checkpoint_json(self, checkpoint_json: str) -> SalesforceCheckpoint:\n    #     return SalesforceCheckpoint.model_validate_json(checkpoint_json)\n\n\nif __name__ == \"__main__\":\n    connector = SalesforceConnector(requested_objects=[ACCOUNT_OBJECT_TYPE])\n\n    connector.load_credentials(\n        {\n            \"sf_username\": os.environ[\"SF_USERNAME\"],\n            \"sf_password\": os.environ[\"SF_PASSWORD\"],\n            \"sf_security_token\": os.environ[\"SF_SECURITY_TOKEN\"],\n        }\n    )\n    start_time = time.monotonic()\n    doc_count = 0\n    section_count = 0\n    text_count = 0\n    for doc_batch in connector.load_from_state():\n        doc_count += len(doc_batch)\n        print(f\"doc_count: {doc_count}\")\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            section_count += len(doc.sections)\n            for section in doc.sections:\n                if isinstance(section, TextSection) and section.text is not None:\n                    text_count += len(section.text)\n    end_time = time.monotonic()\n\n    print(f\"Doc count: {doc_count}\")\n    print(f\"Section count: {section_count}\")\n    print(f\"Text count: {text_count}\")\n    print(f\"Time taken: {end_time - start_time}\")\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/doc_conversion.py",
    "content": "import re\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce\nfrom onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite\nfrom onyx.connectors.salesforce.utils import ID_FIELD\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\nfrom onyx.connectors.salesforce.utils import NAME_FIELD\nfrom onyx.connectors.salesforce.utils import SalesforceObject\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nID_PREFIX = \"SALESFORCE_\"\n\n# All of these types of keys are handled by specific fields in the doc\n# conversion process (E.g. URLs) or are not useful for the user (E.g. UUIDs)\n_SF_JSON_FILTER = r\"Id$|Date$|stamp$|url$\"\n\n\ndef _clean_salesforce_dict(data: dict | list) -> dict | list:\n    \"\"\"Clean and transform Salesforce API response data by recursively:\n    1. Extracting records from the response if present\n    2. Merging attributes into the main dictionary\n    3. Filtering out keys matching certain patterns (Id, Date, stamp, url)\n    4. Removing '__c' suffix from custom field names\n    5. Removing None values and empty containers\n\n    Args:\n        data: A dictionary or list from Salesforce API response\n\n    Returns:\n        Cleaned dictionary or list with transformed keys and filtered values\n    \"\"\"\n    if isinstance(data, dict):\n        if \"records\" in data.keys():\n            data = data[\"records\"]\n    if isinstance(data, dict):\n        if \"attributes\" in data.keys():\n            if isinstance(data[\"attributes\"], dict):\n                data.update(data.pop(\"attributes\"))\n\n    if isinstance(data, dict):\n        filtered_dict = {}\n        for key, value in data.items():\n            if not re.search(_SF_JSON_FILTER, key, re.IGNORECASE):\n                # remove the custom object indicator for display\n                if \"__c\" in key:\n                    key = key[:-3]\n                if isinstance(value, (dict, list)):\n                    filtered_value = _clean_salesforce_dict(value)\n                    # Only add non-empty dictionaries or lists\n                    if filtered_value:\n                        filtered_dict[key] = filtered_value\n                elif value is not None:\n                    filtered_dict[key] = value\n        return filtered_dict\n\n    if isinstance(data, list):\n        filtered_list = []\n        for item in data:\n            filtered_item: dict | list\n            if isinstance(item, (dict, list)):\n                filtered_item = _clean_salesforce_dict(item)\n                # Only add non-empty dictionaries or lists\n                if filtered_item:\n                    filtered_list.append(filtered_item)\n            elif item is not None:\n                filtered_list.append(item)\n        return filtered_list\n\n    return data\n\n\ndef _json_to_natural_language(data: dict | list, indent: int = 0) -> str:\n    \"\"\"Convert a nested dictionary or list into a human-readable string format.\n\n    Recursively traverses the data structure and formats it with:\n    - Key-value pairs on separate lines\n    - Nested structures indented for readability\n    - Lists and dictionaries handled with appropriate formatting\n\n    Args:\n        data: The dictionary or list to convert\n        indent: Number of spaces to indent (default: 0)\n\n    Returns:\n        A formatted string representation of the data structure\n    \"\"\"\n    result = []\n    indent_str = \" \" * indent\n\n    if isinstance(data, dict):\n        for key, value in data.items():\n            if isinstance(value, (dict, list)):\n                result.append(f\"{indent_str}{key}:\")\n                result.append(_json_to_natural_language(value, indent + 2))\n            else:\n                result.append(f\"{indent_str}{key}: {value}\")\n    elif isinstance(data, list):\n        for item in data:\n            result.append(_json_to_natural_language(item, indent + 2))\n\n    return \"\\n\".join(result)\n\n\ndef _extract_section(salesforce_object_data: dict[str, Any], link: str) -> TextSection:\n    \"\"\"Converts a dict to a TextSection\"\"\"\n\n    # Extract text from a Salesforce API response dictionary by:\n    # 1. Cleaning the dictionary\n    # 2. Converting the cleaned dictionary to natural language\n    processed_dict = _clean_salesforce_dict(salesforce_object_data)\n    natural_language_for_dict = _json_to_natural_language(processed_dict)\n\n    return TextSection(\n        text=natural_language_for_dict,\n        link=link,\n    )\n\n\ndef _extract_primary_owner(\n    sf_db: OnyxSalesforceSQLite,\n    sf_object: SalesforceObject,\n) -> BasicExpertInfo | None:\n    object_dict = sf_object.data\n    if not (last_modified_by_id := object_dict.get(\"LastModifiedById\")):\n        logger.warning(f\"No LastModifiedById found for {sf_object.id}\")\n        return None\n    if not (last_modified_by := sf_db.get_record(last_modified_by_id)):\n        logger.warning(f\"No LastModifiedBy found for {last_modified_by_id}\")\n        return None\n\n    user_data = last_modified_by.data\n    expert_info = BasicExpertInfo(\n        first_name=user_data.get(\"FirstName\"),\n        last_name=user_data.get(\"LastName\"),\n        email=user_data.get(\"Email\"),\n        display_name=user_data.get(NAME_FIELD),\n    )\n\n    # Check if all fields are None\n    if (\n        expert_info.first_name is None\n        and expert_info.last_name is None\n        and expert_info.email is None\n        and expert_info.display_name is None\n    ):\n        logger.warning(f\"No identifying information found for user {user_data}\")\n        return None\n\n    return expert_info\n\n\ndef convert_sf_query_result_to_doc(\n    record_id: str,\n    record: dict[str, Any],\n    child_records: dict[str, dict[str, Any]],\n    primary_owner_list: list[BasicExpertInfo] | None,\n    sf_client: OnyxSalesforce,\n) -> Document:\n    \"\"\"Generates a yieldable Document from query results\"\"\"\n\n    base_url = f\"https://{sf_client.sf_instance}\"\n    extracted_doc_updated_at = time_str_to_utc(record[MODIFIED_FIELD])\n    extracted_semantic_identifier = record.get(NAME_FIELD) or record.get(\n        ID_FIELD, \"Unknown Object\"\n    )\n\n    sections = [_extract_section(record, f\"{base_url}/{record_id}\")]\n    for child_record_key, child_record in child_records.items():\n        if not child_record:\n            continue\n\n        key_fields = child_record_key.split(\":\")\n        child_record_id = key_fields[1]\n\n        child_text_section = _extract_section(\n            child_record,\n            f\"{base_url}/{child_record_id}\",\n        )\n        sections.append(child_text_section)\n\n    doc = Document(\n        id=f\"{ID_PREFIX}{record_id}\",\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.SALESFORCE,\n        semantic_identifier=extracted_semantic_identifier,\n        doc_updated_at=extracted_doc_updated_at,\n        primary_owners=primary_owner_list,\n        metadata={},\n    )\n    return doc\n\n\ndef convert_sf_object_to_doc(\n    sf_db: OnyxSalesforceSQLite,\n    sf_object: SalesforceObject,\n    sf_instance: str,\n) -> Document:\n    \"\"\"Would be nice if this function was documented\"\"\"\n    object_dict = sf_object.data\n    salesforce_id = object_dict[ID_FIELD]\n    onyx_salesforce_id = f\"{ID_PREFIX}{salesforce_id}\"\n    base_url = f\"https://{sf_instance}\"\n    extracted_doc_updated_at = time_str_to_utc(object_dict[MODIFIED_FIELD])\n    extracted_semantic_identifier = object_dict.get(NAME_FIELD) or object_dict.get(\n        ID_FIELD, \"Unknown Object\"\n    )\n\n    sections = [_extract_section(sf_object.data, f\"{base_url}/{sf_object.id}\")]\n    for id in sf_db.get_child_ids(sf_object.id):\n        if not (child_object := sf_db.get_record(id, isChild=True)):\n            continue\n        sections.append(\n            _extract_section(child_object.data, f\"{base_url}/{child_object.id}\")\n        )\n\n    # NOTE(rkuo): does using the parent last modified make sense if the update\n    # is being triggered because a child object changed?\n    primary_owner_list: list[BasicExpertInfo] | None = None\n\n    primary_owner = sf_db.make_basic_expert_info_from_record(sf_object)\n    if primary_owner:\n        primary_owner_list = [primary_owner]\n\n    doc = Document(\n        id=onyx_salesforce_id,\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.SALESFORCE,\n        semantic_identifier=extracted_semantic_identifier,\n        doc_updated_at=extracted_doc_updated_at,\n        primary_owners=primary_owner_list,\n        metadata={},\n    )\n    return doc\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/onyx_salesforce.py",
    "content": "import time\nfrom typing import Any\n\nfrom simple_salesforce import Salesforce\nfrom simple_salesforce import SFType\nfrom simple_salesforce.exceptions import SalesforceRefusedRequest\n\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_OBJECTS\nfrom onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_PREFIXES\nfrom onyx.connectors.salesforce.blacklist import SALESFORCE_BLACKLISTED_SUFFIXES\nfrom onyx.connectors.salesforce.salesforce_calls import get_object_by_id_query\nfrom onyx.connectors.salesforce.utils import ID_FIELD\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\n\nlogger = setup_logger()\n\n\ndef is_salesforce_rate_limit_error(exception: Exception) -> bool:\n    \"\"\"Check if an exception is a Salesforce rate limit error.\"\"\"\n    return isinstance(\n        exception, SalesforceRefusedRequest\n    ) and \"REQUEST_LIMIT_EXCEEDED\" in str(exception)\n\n\nclass OnyxSalesforce(Salesforce):\n    SOQL_MAX_SUBQUERIES = 20\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        super().__init__(*args, **kwargs)\n\n        self.parent_types: set[str] = set()\n        self.child_types: set[str] = set()\n        self.parent_to_child_types: dict[str, set[str]] = (\n            {}\n        )  # map from parent to child types\n        self.child_to_parent_types: dict[str, set[str]] = (\n            {}\n        )  # map from child to parent types\n        self.parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}\n        self.queryable_fields_by_type: dict[str, list[str]] = {}\n        self.prefix_to_type: dict[str, str] = (\n            {}\n        )  # infer the object type of an id immediately\n\n    def initialize(self) -> bool:\n        \"\"\"Eventually cache all first run client state with this method\"\"\"\n        return True\n\n    def is_blacklisted(self, object_type: str) -> bool:\n        \"\"\"Returns True if the object type is blacklisted.\"\"\"\n        object_type_lower = object_type.lower()\n        if object_type_lower in SALESFORCE_BLACKLISTED_OBJECTS:\n            return True\n        for prefix in SALESFORCE_BLACKLISTED_PREFIXES:\n            if object_type_lower.startswith(prefix):\n                return True\n\n        for suffix in SALESFORCE_BLACKLISTED_SUFFIXES:\n            if object_type_lower.endswith(suffix):\n                return True\n\n        return False\n\n    @retry_builder(\n        tries=6,\n        delay=20,\n        backoff=1.5,\n        max_delay=60,\n        exceptions=(SalesforceRefusedRequest,),\n    )\n    @rate_limit_builder(max_calls=50, period=60)\n    def safe_query(self, query: str, **kwargs: Any) -> dict[str, Any]:\n        \"\"\"Wrapper around the original query method with retry logic and rate limiting.\"\"\"\n        try:\n            return super().query(query, **kwargs)\n        except SalesforceRefusedRequest as e:\n            if is_salesforce_rate_limit_error(e):\n                logger.warning(\n                    f\"Salesforce rate limit exceeded for query: {query[:100]}...\"\n                )\n                # Add additional delay for rate limit errors\n                time.sleep(5)\n            raise\n\n    @retry_builder(\n        tries=5,\n        delay=20,\n        backoff=1.5,\n        max_delay=60,\n        exceptions=(SalesforceRefusedRequest,),\n    )\n    @rate_limit_builder(max_calls=50, period=60)\n    def safe_query_all(self, query: str, **kwargs: Any) -> dict[str, Any]:\n        \"\"\"Wrapper around the original query_all method with retry logic and rate limiting.\"\"\"\n        try:\n            return super().query_all(query, **kwargs)\n        except SalesforceRefusedRequest as e:\n            if is_salesforce_rate_limit_error(e):\n                logger.warning(\n                    f\"Salesforce rate limit exceeded for query_all: {query[:100]}...\"\n                )\n                # Add additional delay for rate limit errors\n                time.sleep(5)\n            raise\n\n    @staticmethod\n    def _make_child_objects_by_id_query(\n        object_id: str,\n        sf_type: str,\n        child_relationships: list[str],\n        relationships_to_fields: dict[str, set[str]],\n    ) -> str:\n        \"\"\"Returns a SOQL query given the object id, type and child relationships.\n\n        object_id: the id of the parent object\n        sf_type: the object name/type of the parent object\n        child_relationships: a list of the child object names/types to retrieve\n        relationships_to_fields: a mapping of objects to their queryable fields\n\n        When the query is executed, it comes back as result.records[0][child_relationship]\n        \"\"\"\n\n        # supposedly the real limit is 200? But we limit to 10 for practical reasons\n        SUBQUERY_LIMIT = 10\n\n        query = \"SELECT \"\n        for child_relationship in child_relationships:\n            # TODO(rkuo): what happens if there is a very large list of child records?\n            # is that possible problem?\n\n            # NOTE: we actually have to list out the subqueries we want.\n            # We can't use the following shortcuts:\n            #   FIELDS(ALL) can include binary fields, so don't use that\n            #   FIELDS(CUSTOM) can include aggregate queries, so don't use that\n            fields = relationships_to_fields[child_relationship]\n            fields_fragment = \",\".join(fields)\n            query += f\"(SELECT {fields_fragment} FROM {child_relationship} LIMIT {SUBQUERY_LIMIT}), \"\n\n        query = query.rstrip(\", \")\n        query += f\" FROM {sf_type} WHERE Id = '{object_id}'\"\n        return query\n\n    def query_object(\n        self,\n        object_type: str,\n        object_id: str,\n        type_to_queryable_fields: dict[str, set[str]],\n    ) -> dict[str, Any] | None:\n        record: dict[str, Any] = {}\n\n        queryable_fields = type_to_queryable_fields[object_type]\n        query = get_object_by_id_query(object_id, object_type, queryable_fields)\n        result = self.safe_query(query)\n        if not result:\n            return None\n\n        record_0 = result[\"records\"][0]\n        for record_key, record_value in record_0.items():\n            if record_key == \"attributes\":\n                continue\n\n            record[record_key] = record_value\n\n        return record\n\n    def get_child_objects_by_id(\n        self,\n        object_id: str,\n        sf_type: str,\n        child_relationships: list[str],\n        relationships_to_fields: dict[str, set[str]],\n    ) -> dict[str, dict[str, Any]]:\n        \"\"\"There's a limit on the number of subqueries we can put in a single query.\"\"\"\n        child_records: dict[str, dict[str, Any]] = {}\n        child_relationships_batch: list[str] = []\n        remaining_child_relationships = list(child_relationships)\n\n        while True:\n            process_batch = False\n\n            if (\n                len(remaining_child_relationships) == 0\n                and len(child_relationships_batch) == 0\n            ):\n                break\n\n            if len(child_relationships_batch) >= OnyxSalesforce.SOQL_MAX_SUBQUERIES:\n                process_batch = True\n\n            if len(remaining_child_relationships) == 0:\n                process_batch = True\n\n            if process_batch:\n                if len(child_relationships_batch) == 0:\n                    break\n\n                query = OnyxSalesforce._make_child_objects_by_id_query(\n                    object_id,\n                    sf_type,\n                    child_relationships_batch,\n                    relationships_to_fields,\n                )\n\n                try:\n                    result = self.safe_query(query)\n                except Exception:\n                    logger.exception(f\"Query failed: {query=}\")\n                else:\n                    for child_record_key, child_result in result[\"records\"][0].items():\n                        if child_record_key == \"attributes\":\n                            continue\n\n                        if not child_result:\n                            continue\n\n                        for child_record in child_result[\"records\"]:\n                            child_record_id = child_record[ID_FIELD]\n                            if not child_record_id:\n                                logger.warning(\"Child record has no id\")\n                                continue\n\n                            child_records[f\"{child_record_key}:{child_record_id}\"] = (\n                                child_record\n                            )\n                finally:\n                    child_relationships_batch.clear()\n\n                continue\n\n            if len(remaining_child_relationships) == 0:\n                break\n\n            child_relationship = remaining_child_relationships.pop(0)\n\n            # this is binary content, skip it\n            if child_relationship == \"Attachments\":\n                continue\n\n            child_relationships_batch.append(child_relationship)\n\n        return child_records\n\n    @retry_builder(\n        tries=3,\n        delay=1,\n        backoff=2,\n        exceptions=(SalesforceRefusedRequest,),\n    )\n    def describe_type(self, name: str) -> Any:\n        sf_object = SFType(name, self.session_id, self.sf_instance)\n        try:\n            result = sf_object.describe()\n            return result\n        except SalesforceRefusedRequest as e:\n            if is_salesforce_rate_limit_error(e):\n                logger.warning(\n                    f\"Salesforce rate limit exceeded for describe_type: {name}\"\n                )\n                # Add additional delay for rate limit errors\n                time.sleep(3)\n            raise\n\n    def get_queryable_fields_by_type(self, name: str) -> set[str]:\n        object_description = self.describe_type(name)\n        if object_description is None:\n            return set()\n\n        fields: list[dict[str, Any]] = object_description[\"fields\"]\n        valid_fields: set[str] = set()\n        field_names_to_remove: set[str] = set()\n        for field in fields:\n            if compound_field_name := field.get(\"compoundFieldName\"):\n                # We do want to get name fields even if they are compound\n                if not field.get(\"nameField\"):\n                    field_names_to_remove.add(compound_field_name)\n\n            field_name = field.get(\"name\")\n            field_type = field.get(\"type\")\n            if field_type in [\"base64\", \"blob\", \"encryptedstring\"]:\n                continue\n\n            if field_name:\n                valid_fields.add(field_name)\n\n        return valid_fields - field_names_to_remove\n\n    def get_children_of_sf_type(self, sf_type: str) -> dict[str, str]:\n        \"\"\"Returns a dict of child object names to relationship names.\n        Relationship names (not object names) are used in subqueries!\n        \"\"\"\n        names_to_relationships: dict[str, str] = {}\n\n        object_description = self.describe_type(sf_type)\n\n        index = 0\n        len_relationships = len(object_description[\"childRelationships\"])\n        for child_relationship in object_description[\"childRelationships\"]:\n            child_name = child_relationship[\"childSObject\"]\n\n            index += 1\n            valid, reason = self._is_valid_child_object(child_relationship)\n            if not valid:\n                logger.debug(\n                    f\"{index}/{len_relationships} - Invalid child object: \"\n                    f\"parent={sf_type} child={child_name} child_field_backreference={child_relationship['field']} {reason=}\"\n                )\n                continue\n\n            logger.debug(\n                f\"{index}/{len_relationships} - Found valid child object: \"\n                f\"parent={sf_type} child={child_name} child_field_backreference={child_relationship['field']}\"\n            )\n\n            name = child_name\n            relationship = child_relationship[\"relationshipName\"]\n\n            names_to_relationships[name] = relationship\n\n        return names_to_relationships\n\n    def _is_valid_child_object(\n        self, child_relationship: dict[str, Any]\n    ) -> tuple[bool, str]:\n\n        if not child_relationship[\"childSObject\"]:\n            return False, \"childSObject is None\"\n\n        child_name = child_relationship[\"childSObject\"]\n\n        if self.is_blacklisted(child_name):\n            return False, f\"{child_name=} is blacklisted.\"\n\n        if not child_relationship[\"relationshipName\"]:\n            return False, f\"{child_name=} has no relationshipName.\"\n\n        object_description = self.describe_type(child_relationship[\"childSObject\"])\n        if not object_description[\"queryable\"]:\n            return False, f\"{child_name=} is not queryable.\"\n\n        if not child_relationship[\"field\"]:\n            return False, f\"{child_name=} has no relationship field.\"\n\n        if child_relationship[\"field\"] == \"RelatedToId\":\n            return False, f\"{child_name=} field is RelatedToId and blacklisted.\"\n\n        return True, \"\"\n\n    def get_parent_reference_fields(\n        self, sf_type: str, parent_types: set[str]\n    ) -> dict[str, list[str]]:\n        \"\"\"\n        sf_type: the type in which to find parent reference fields\n        parent_types: a list of parent reference field types we are actually interested in\n        Other parent types will not be returned.\n\n        Given an object type, returns a dict of field names to a list of referenced parent\n        object types.\n        (Yes, it is possible for a field to reference one of multiple object types,\n        although this seems very unlikely.)\n\n        Returns an empty dict if there are no parent reference fields.\n        \"\"\"\n\n        parent_reference_fields: dict[str, list[str]] = {}\n\n        object_description = self.describe_type(sf_type)\n        for field in object_description[\"fields\"]:\n            if field[\"type\"] == \"reference\":\n                for reference_to in field[\"referenceTo\"]:\n                    if reference_to in parent_types:\n                        if field[\"name\"] not in parent_reference_fields:\n                            parent_reference_fields[field[\"name\"]] = []\n                        parent_reference_fields[field[\"name\"]].append(\n                            field[\"referenceTo\"]\n                        )\n\n        return parent_reference_fields\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/salesforce_calls.py",
    "content": "import gc\nimport os\nimport time\nfrom concurrent.futures import ThreadPoolExecutor\nfrom datetime import datetime\n\nfrom pytz import UTC\nfrom simple_salesforce import Salesforce\nfrom simple_salesforce.bulk2 import SFBulk2Handler\nfrom simple_salesforce.bulk2 import SFBulk2Type\nfrom simple_salesforce.exceptions import SalesforceRefusedRequest\n\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n\ndef is_salesforce_rate_limit_error(exception: Exception) -> bool:\n    \"\"\"Check if an exception is a Salesforce rate limit error.\"\"\"\n    return isinstance(\n        exception, SalesforceRefusedRequest\n    ) and \"REQUEST_LIMIT_EXCEEDED\" in str(exception)\n\n\ndef _build_last_modified_time_filter_for_salesforce(\n    start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n) -> str:\n    if start is None or end is None:\n        return \"\"\n    start_datetime = datetime.fromtimestamp(start, UTC)\n    end_datetime = datetime.fromtimestamp(end, UTC)\n    return f\" WHERE LastModifiedDate > {start_datetime.isoformat()} AND LastModifiedDate < {end_datetime.isoformat()}\"\n\n\ndef _build_created_date_time_filter_for_salesforce(\n    start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None\n) -> str:\n    if start is None or end is None:\n        return \"\"\n    start_datetime = datetime.fromtimestamp(start, UTC)\n    end_datetime = datetime.fromtimestamp(end, UTC)\n    return f\" WHERE CreatedDate > {start_datetime.isoformat()} AND CreatedDate < {end_datetime.isoformat()}\"\n\n\ndef _make_time_filter_for_sf_type(\n    queryable_fields: set[str],\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n) -> str | None:\n\n    if MODIFIED_FIELD in queryable_fields:\n        return _build_last_modified_time_filter_for_salesforce(start, end)\n\n    if \"CreatedDate\" in queryable_fields:\n        return _build_created_date_time_filter_for_salesforce(start, end)\n\n    return None\n\n\ndef _make_time_filtered_query(\n    queryable_fields: set[str], sf_type: str, time_filter: str\n) -> str:\n    query = f\"SELECT {', '.join(queryable_fields)} FROM {sf_type}{time_filter}\"\n    return query\n\n\ndef get_object_by_id_query(\n    object_id: str, sf_type: str, queryable_fields: set[str]\n) -> str:\n    query = (\n        f\"SELECT {', '.join(queryable_fields)} FROM {sf_type} WHERE Id = '{object_id}'\"\n    )\n    return query\n\n\n@retry_builder(\n    tries=5,\n    delay=2,\n    backoff=2,\n    max_delay=60,\n    exceptions=(SalesforceRefusedRequest,),\n)\n@rate_limit_builder(max_calls=50, period=60)\ndef _object_type_has_api_data(\n    sf_client: Salesforce, sf_type: str, time_filter: str\n) -> bool:\n    \"\"\"\n    Use the rest api to check to make sure the query will result in a non-empty response.\n    \"\"\"\n    try:\n        query = f\"SELECT Count() FROM {sf_type}{time_filter} LIMIT 1\"\n        result = sf_client.query(query)\n        if result[\"totalSize\"] == 0:\n            return False\n    except SalesforceRefusedRequest as e:\n        if is_salesforce_rate_limit_error(e):\n            logger.warning(\n                f\"Salesforce rate limit exceeded for object type check: {sf_type}\"\n            )\n            # Add additional delay for rate limit errors\n            time.sleep(3)\n        raise\n\n    except Exception as e:\n        if \"OPERATION_TOO_LARGE\" not in str(e):\n            logger.warning(f\"Object type {sf_type} doesn't support query: {e}\")\n            return False\n    return True\n\n\ndef _bulk_retrieve_from_salesforce(\n    sf_type: str,\n    query: str,\n    target_dir: str,\n    sf_client: Salesforce,\n) -> tuple[str, list[str] | None]:\n    \"\"\"Returns a tuple of\n    1. the salesforce object type (NOTE: seems redundant)\n    2. the list of CSV's written into the target directory\n    \"\"\"\n\n    bulk_2_handler: SFBulk2Handler | None = SFBulk2Handler(\n        session_id=sf_client.session_id,\n        bulk2_url=sf_client.bulk2_url,\n        proxies=sf_client.proxies,\n        session=sf_client.session,\n    )\n    if not bulk_2_handler:\n        return sf_type, None\n\n    # NOTE(rkuo): there are signs this download is allocating large\n    # amounts of memory instead of streaming the results to disk.\n    # we're doing a gc.collect to try and mitigate this.\n\n    # see https://github.com/simple-salesforce/simple-salesforce/issues/428 for a\n    # possible solution\n    bulk_2_type: SFBulk2Type | None = SFBulk2Type(\n        object_name=sf_type,\n        bulk2_url=bulk_2_handler.bulk2_url,\n        headers=bulk_2_handler.headers,\n        session=bulk_2_handler.session,\n    )\n    if not bulk_2_type:\n        return sf_type, None\n\n    logger.info(f\"Downloading {sf_type}\")\n\n    logger.debug(f\"Query: {query}\")\n\n    try:\n        # This downloads the file to a file in the target path with a random name\n        results = bulk_2_type.download(\n            query=query,\n            path=target_dir,\n            max_records=500000,\n        )\n\n        # prepend each downloaded csv with the object type (delimiter = '.')\n        all_download_paths: list[str] = []\n        for result in results:\n            original_file_path = result[\"file\"]\n            directory, filename = os.path.split(original_file_path)\n            new_filename = f\"{sf_type}.{filename}\"\n            new_file_path = os.path.join(directory, new_filename)\n            os.rename(original_file_path, new_file_path)\n            all_download_paths.append(new_file_path)\n    except Exception as e:\n        logger.error(\n            f\"Failed to download salesforce csv for object type {sf_type}: {e}\"\n        )\n        logger.warning(f\"Exceptioning query for object type {sf_type}: {query}\")\n        return sf_type, None\n    finally:\n        bulk_2_handler = None\n        bulk_2_type = None\n        gc.collect()\n\n    logger.info(f\"Downloaded {sf_type} to {all_download_paths}\")\n    return sf_type, all_download_paths\n\n\ndef fetch_all_csvs_in_parallel(\n    sf_client: Salesforce,\n    all_types_to_filter: dict[str, bool],\n    queryable_fields_by_type: dict[str, set[str]],\n    start: SecondsSinceUnixEpoch | None,\n    end: SecondsSinceUnixEpoch | None,\n    target_dir: str,\n) -> dict[str, list[str] | None]:\n    \"\"\"\n    Fetches all the csvs in parallel for the given object types\n    Returns a dict of (sf_type, full_download_path)\n\n    NOTE: We can probably lift object type has api data out of here\n    \"\"\"\n\n    type_to_query = {}\n\n    # query the available fields for each object type and determine how to filter\n    for sf_type, apply_filter in all_types_to_filter.items():\n        queryable_fields = queryable_fields_by_type[sf_type]\n\n        time_filter = \"\"\n        while True:\n            if not apply_filter:\n                break\n\n            if start is not None and end is not None:\n                time_filter_temp = _make_time_filter_for_sf_type(\n                    queryable_fields, start, end\n                )\n                if time_filter_temp is None:\n                    logger.warning(\n                        f\"Object type not filterable: type={sf_type} fields={queryable_fields}\"\n                    )\n                    time_filter = \"\"\n                else:\n                    logger.info(\n                        f\"Object type filterable: type={sf_type} filter={time_filter_temp}\"\n                    )\n                    time_filter = time_filter_temp\n\n            break\n\n        if not _object_type_has_api_data(sf_client, sf_type, time_filter):\n            logger.warning(f\"Object type skipped (no data available): type={sf_type}\")\n            continue\n\n        query = _make_time_filtered_query(queryable_fields, sf_type, time_filter)\n        type_to_query[sf_type] = query\n\n    logger.info(\n        f\"Object types to query: initial={len(all_types_to_filter)} queryable={len(type_to_query)}\"\n    )\n\n    # Run the bulk retrieve in parallel\n    # limit to 4 to help with memory usage\n    with ThreadPoolExecutor(max_workers=4) as executor:\n        results = executor.map(\n            lambda object_type: _bulk_retrieve_from_salesforce(\n                sf_type=object_type,\n                query=type_to_query[object_type],\n                target_dir=target_dir,\n                sf_client=sf_client,\n            ),\n            type_to_query.keys(),\n        )\n        return dict(results)\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/shelve_stuff/old_test_salesforce_shelves.py",
    "content": "import csv\nimport os\nimport shutil\n\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import find_ids_by_type\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import (\n    get_affected_parent_ids_by_type,\n)\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_child_ids\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_record\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import (\n    update_sf_db_with_csv,\n)\nfrom onyx.connectors.salesforce.utils import BASE_DATA_PATH\nfrom onyx.connectors.salesforce.utils import get_object_type_path\n\n_VALID_SALESFORCE_IDS = [\n    \"001bm00000fd9Z3AAI\",\n    \"001bm00000fdYTdAAM\",\n    \"001bm00000fdYTeAAM\",\n    \"001bm00000fdYTfAAM\",\n    \"001bm00000fdYTgAAM\",\n    \"001bm00000fdYThAAM\",\n    \"001bm00000fdYTiAAM\",\n    \"001bm00000fdYTjAAM\",\n    \"001bm00000fdYTkAAM\",\n    \"001bm00000fdYTlAAM\",\n    \"001bm00000fdYTmAAM\",\n    \"001bm00000fdYTnAAM\",\n    \"001bm00000fdYToAAM\",\n    \"500bm00000XoOxtAAF\",\n    \"500bm00000XoOxuAAF\",\n    \"500bm00000XoOxvAAF\",\n    \"500bm00000XoOxwAAF\",\n    \"500bm00000XoOxxAAF\",\n    \"500bm00000XoOxyAAF\",\n    \"500bm00000XoOxzAAF\",\n    \"500bm00000XoOy0AAF\",\n    \"500bm00000XoOy1AAF\",\n    \"500bm00000XoOy2AAF\",\n    \"500bm00000XoOy3AAF\",\n    \"500bm00000XoOy4AAF\",\n    \"500bm00000XoOy5AAF\",\n    \"500bm00000XoOy6AAF\",\n    \"500bm00000XoOy7AAF\",\n    \"500bm00000XoOy8AAF\",\n    \"500bm00000XoOy9AAF\",\n    \"500bm00000XoOyAAAV\",\n    \"500bm00000XoOyBAAV\",\n    \"500bm00000XoOyCAAV\",\n    \"500bm00000XoOyDAAV\",\n    \"500bm00000XoOyEAAV\",\n    \"500bm00000XoOyFAAV\",\n    \"500bm00000XoOyGAAV\",\n    \"500bm00000XoOyHAAV\",\n    \"500bm00000XoOyIAAV\",\n    \"003bm00000EjHCjAAN\",\n    \"003bm00000EjHCkAAN\",\n    \"003bm00000EjHClAAN\",\n    \"003bm00000EjHCmAAN\",\n    \"003bm00000EjHCnAAN\",\n    \"003bm00000EjHCoAAN\",\n    \"003bm00000EjHCpAAN\",\n    \"003bm00000EjHCqAAN\",\n    \"003bm00000EjHCrAAN\",\n    \"003bm00000EjHCsAAN\",\n    \"003bm00000EjHCtAAN\",\n    \"003bm00000EjHCuAAN\",\n    \"003bm00000EjHCvAAN\",\n    \"003bm00000EjHCwAAN\",\n    \"003bm00000EjHCxAAN\",\n    \"003bm00000EjHCyAAN\",\n    \"003bm00000EjHCzAAN\",\n    \"003bm00000EjHD0AAN\",\n    \"003bm00000EjHD1AAN\",\n    \"003bm00000EjHD2AAN\",\n    \"550bm00000EXc2tAAD\",\n    \"006bm000006kyDpAAI\",\n    \"006bm000006kyDqAAI\",\n    \"006bm000006kyDrAAI\",\n    \"006bm000006kyDsAAI\",\n    \"006bm000006kyDtAAI\",\n    \"006bm000006kyDuAAI\",\n    \"006bm000006kyDvAAI\",\n    \"006bm000006kyDwAAI\",\n    \"006bm000006kyDxAAI\",\n    \"006bm000006kyDyAAI\",\n    \"006bm000006kyDzAAI\",\n    \"006bm000006kyE0AAI\",\n    \"006bm000006kyE1AAI\",\n    \"006bm000006kyE2AAI\",\n    \"006bm000006kyE3AAI\",\n    \"006bm000006kyE4AAI\",\n    \"006bm000006kyE5AAI\",\n    \"006bm000006kyE6AAI\",\n    \"006bm000006kyE7AAI\",\n    \"006bm000006kyE8AAI\",\n    \"006bm000006kyE9AAI\",\n    \"006bm000006kyEAAAY\",\n    \"006bm000006kyEBAAY\",\n    \"006bm000006kyECAAY\",\n    \"006bm000006kyEDAAY\",\n    \"006bm000006kyEEAAY\",\n    \"006bm000006kyEFAAY\",\n    \"006bm000006kyEGAAY\",\n    \"006bm000006kyEHAAY\",\n    \"006bm000006kyEIAAY\",\n    \"006bm000006kyEJAAY\",\n    \"005bm000009zy0TAAQ\",\n    \"005bm000009zy25AAA\",\n    \"005bm000009zy26AAA\",\n    \"005bm000009zy28AAA\",\n    \"005bm000009zy29AAA\",\n    \"005bm000009zy2AAAQ\",\n    \"005bm000009zy2BAAQ\",\n]\n\n\ndef clear_sf_db() -> None:\n    \"\"\"\n    Clears the SF DB by deleting all files in the data directory.\n    \"\"\"\n    shutil.rmtree(BASE_DATA_PATH)\n\n\ndef create_csv_file(\n    object_type: str, records: list[dict], filename: str = \"test_data.csv\"\n) -> None:\n    \"\"\"\n    Creates a CSV file for the given object type and records.\n\n    Args:\n        object_type: The Salesforce object type (e.g. \"Account\", \"Contact\")\n        records: List of dictionaries containing the record data\n        filename: Name of the CSV file to create (default: test_data.csv)\n    \"\"\"\n    if not records:\n        return\n\n    # Get all unique fields from records\n    fields: set[str] = set()\n    for record in records:\n        fields.update(record.keys())\n    fields = set(sorted(list(fields)))  # Sort for consistent order\n\n    # Create CSV file\n    csv_path = os.path.join(get_object_type_path(object_type), filename)\n    with open(csv_path, \"w\", newline=\"\", encoding=\"utf-8\") as f:\n        writer = csv.DictWriter(f, fieldnames=fields)\n        writer.writeheader()\n        for record in records:\n            writer.writerow(record)\n\n    # Update the database with the CSV\n    update_sf_db_with_csv(object_type, csv_path)\n\n\ndef create_csv_with_example_data() -> None:\n    \"\"\"\n    Creates CSV files with example data, organized by object type.\n    \"\"\"\n    example_data: dict[str, list[dict]] = {\n        \"Account\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Acme Inc.\",\n                \"BillingCity\": \"New York\",\n                \"Industry\": \"Technology\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Globex Corp\",\n                \"BillingCity\": \"Los Angeles\",\n                \"Industry\": \"Manufacturing\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Initech\",\n                \"BillingCity\": \"Austin\",\n                \"Industry\": \"Software\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[3],\n                \"Name\": \"TechCorp Solutions\",\n                \"BillingCity\": \"San Francisco\",\n                \"Industry\": \"Software\",\n                \"AnnualRevenue\": 5000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[4],\n                \"Name\": \"BioMed Research\",\n                \"BillingCity\": \"Boston\",\n                \"Industry\": \"Healthcare\",\n                \"AnnualRevenue\": 12000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[5],\n                \"Name\": \"Green Energy Co\",\n                \"BillingCity\": \"Portland\",\n                \"Industry\": \"Energy\",\n                \"AnnualRevenue\": 8000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[6],\n                \"Name\": \"DataFlow Analytics\",\n                \"BillingCity\": \"Seattle\",\n                \"Industry\": \"Technology\",\n                \"AnnualRevenue\": 3000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[7],\n                \"Name\": \"Cloud Nine Services\",\n                \"BillingCity\": \"Denver\",\n                \"Industry\": \"Cloud Computing\",\n                \"AnnualRevenue\": 7000000,\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"FirstName\": \"John\",\n                \"LastName\": \"Doe\",\n                \"Email\": \"john.doe@acme.com\",\n                \"Title\": \"CEO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[41],\n                \"FirstName\": \"Jane\",\n                \"LastName\": \"Smith\",\n                \"Email\": \"jane.smith@acme.com\",\n                \"Title\": \"CTO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[42],\n                \"FirstName\": \"Bob\",\n                \"LastName\": \"Johnson\",\n                \"Email\": \"bob.j@globex.com\",\n                \"Title\": \"Sales Director\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[43],\n                \"FirstName\": \"Sarah\",\n                \"LastName\": \"Chen\",\n                \"Email\": \"sarah.chen@techcorp.com\",\n                \"Title\": \"Product Manager\",\n                \"Phone\": \"415-555-0101\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[44],\n                \"FirstName\": \"Michael\",\n                \"LastName\": \"Rodriguez\",\n                \"Email\": \"m.rodriguez@biomed.com\",\n                \"Title\": \"Research Director\",\n                \"Phone\": \"617-555-0202\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[45],\n                \"FirstName\": \"Emily\",\n                \"LastName\": \"Green\",\n                \"Email\": \"emily.g@greenenergy.com\",\n                \"Title\": \"Sustainability Lead\",\n                \"Phone\": \"503-555-0303\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[46],\n                \"FirstName\": \"David\",\n                \"LastName\": \"Kim\",\n                \"Email\": \"david.kim@dataflow.com\",\n                \"Title\": \"Data Scientist\",\n                \"Phone\": \"206-555-0404\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[47],\n                \"FirstName\": \"Rachel\",\n                \"LastName\": \"Taylor\",\n                \"Email\": \"r.taylor@cloudnine.com\",\n                \"Title\": \"Cloud Architect\",\n                \"Phone\": \"303-555-0505\",\n            },\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"Name\": \"Acme Server Upgrade\",\n                \"Amount\": 50000,\n                \"Stage\": \"Prospecting\",\n                \"CloseDate\": \"2024-06-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[63],\n                \"Name\": \"Globex Manufacturing Line\",\n                \"Amount\": 150000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-03-15\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[64],\n                \"Name\": \"Initech Software License\",\n                \"Amount\": 75000,\n                \"Stage\": \"Closed Won\",\n                \"CloseDate\": \"2024-01-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[65],\n                \"Name\": \"TechCorp AI Implementation\",\n                \"Amount\": 250000,\n                \"Stage\": \"Needs Analysis\",\n                \"CloseDate\": \"2024-08-15\",\n                \"Probability\": 60,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[66],\n                \"Name\": \"BioMed Lab Equipment\",\n                \"Amount\": 500000,\n                \"Stage\": \"Value Proposition\",\n                \"CloseDate\": \"2024-09-30\",\n                \"Probability\": 75,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[67],\n                \"Name\": \"Green Energy Solar Project\",\n                \"Amount\": 750000,\n                \"Stage\": \"Proposal\",\n                \"CloseDate\": \"2024-07-15\",\n                \"Probability\": 80,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[68],\n                \"Name\": \"DataFlow Analytics Platform\",\n                \"Amount\": 180000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-05-30\",\n                \"Probability\": 90,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[69],\n                \"Name\": \"Cloud Nine Infrastructure\",\n                \"Amount\": 300000,\n                \"Stage\": \"Qualification\",\n                \"CloseDate\": \"2024-10-15\",\n                \"Probability\": 40,\n            },\n        ],\n    }\n\n    # Create CSV files for each object type\n    for object_type, records in example_data.items():\n        create_csv_file(object_type, records)\n\n\ndef test_query() -> None:\n    \"\"\"\n    Tests querying functionality by verifying:\n    1. All expected Account IDs are found\n    2. Each Account's data matches what was inserted\n    \"\"\"\n    # Expected test data for verification\n    expected_accounts: dict[str, dict[str, str | int]] = {\n        _VALID_SALESFORCE_IDS[0]: {\n            \"Name\": \"Acme Inc.\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n        },\n        _VALID_SALESFORCE_IDS[1]: {\n            \"Name\": \"Globex Corp\",\n            \"BillingCity\": \"Los Angeles\",\n            \"Industry\": \"Manufacturing\",\n        },\n        _VALID_SALESFORCE_IDS[2]: {\n            \"Name\": \"Initech\",\n            \"BillingCity\": \"Austin\",\n            \"Industry\": \"Software\",\n        },\n        _VALID_SALESFORCE_IDS[3]: {\n            \"Name\": \"TechCorp Solutions\",\n            \"BillingCity\": \"San Francisco\",\n            \"Industry\": \"Software\",\n            \"AnnualRevenue\": 5000000,\n        },\n        _VALID_SALESFORCE_IDS[4]: {\n            \"Name\": \"BioMed Research\",\n            \"BillingCity\": \"Boston\",\n            \"Industry\": \"Healthcare\",\n            \"AnnualRevenue\": 12000000,\n        },\n        _VALID_SALESFORCE_IDS[5]: {\n            \"Name\": \"Green Energy Co\",\n            \"BillingCity\": \"Portland\",\n            \"Industry\": \"Energy\",\n            \"AnnualRevenue\": 8000000,\n        },\n        _VALID_SALESFORCE_IDS[6]: {\n            \"Name\": \"DataFlow Analytics\",\n            \"BillingCity\": \"Seattle\",\n            \"Industry\": \"Technology\",\n            \"AnnualRevenue\": 3000000,\n        },\n        _VALID_SALESFORCE_IDS[7]: {\n            \"Name\": \"Cloud Nine Services\",\n            \"BillingCity\": \"Denver\",\n            \"Industry\": \"Cloud Computing\",\n            \"AnnualRevenue\": 7000000,\n        },\n    }\n\n    # Get all Account IDs\n    account_ids = find_ids_by_type(\"Account\")\n\n    # Verify we found all expected accounts\n    assert len(account_ids) == len(\n        expected_accounts\n    ), f\"Expected {len(expected_accounts)} accounts, found {len(account_ids)}\"\n    assert set(account_ids) == set(\n        expected_accounts.keys()\n    ), \"Found account IDs don't match expected IDs\"\n\n    # Verify each account's data\n    for acc_id in account_ids:\n        combined = get_record(acc_id)\n        assert combined is not None, f\"Could not find account {acc_id}\"\n\n        expected = expected_accounts[acc_id]\n\n        # Verify account data matches\n        for key, value in expected.items():\n            value = str(value)\n            assert (\n                combined.data[key] == value\n            ), f\"Account {acc_id} field {key} expected {value}, got {combined.data[key]}\"\n\n    print(\"All query tests passed successfully!\")\n\n\ndef test_upsert() -> None:\n    \"\"\"\n    Tests upsert functionality by:\n    1. Updating an existing account\n    2. Creating a new account\n    3. Verifying both operations were successful\n    \"\"\"\n    # Create CSV for updating an existing account and adding a new one\n    update_data: list[dict[str, str | int]] = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[0],\n            \"Name\": \"Acme Inc. Updated\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n            \"Description\": \"Updated company info\",\n        },\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[2],\n            \"Name\": \"New Company Inc.\",\n            \"BillingCity\": \"Miami\",\n            \"Industry\": \"Finance\",\n            \"AnnualRevenue\": 1000000,\n        },\n    ]\n\n    create_csv_file(\"Account\", update_data, \"update_data.csv\")\n\n    # Verify the update worked\n    updated_record = get_record(_VALID_SALESFORCE_IDS[0])\n    assert updated_record is not None, \"Updated record not found\"\n    assert updated_record.data[\"Name\"] == \"Acme Inc. Updated\", \"Name not updated\"\n    assert (\n        updated_record.data[\"Description\"] == \"Updated company info\"\n    ), \"Description not added\"\n\n    # Verify the new record was created\n    new_record = get_record(_VALID_SALESFORCE_IDS[2])\n    assert new_record is not None, \"New record not found\"\n    assert new_record.data[\"Name\"] == \"New Company Inc.\", \"New record name incorrect\"\n    assert new_record.data[\"AnnualRevenue\"] == \"1000000\", \"New record revenue incorrect\"\n\n    print(\"All upsert tests passed successfully!\")\n\n\ndef test_relationships() -> None:\n    \"\"\"\n    Tests relationship shelf updates and queries by:\n    1. Creating test data with relationships\n    2. Verifying the relationships are correctly stored\n    3. Testing relationship queries\n    \"\"\"\n    # Create test data for each object type\n    test_data: dict[str, list[dict[str, str | int]]] = {\n        \"Case\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[13],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[14],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 2\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[48],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Test\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Test Opportunity\",\n                \"Amount\": 100000,\n            }\n        ],\n    }\n\n    # Create and update CSV files for each object type\n    for object_type, records in test_data.items():\n        create_csv_file(object_type, records, \"relationship_test.csv\")\n\n    # Test relationship queries\n    # All these objects should be children of Acme Inc.\n    child_ids = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert len(child_ids) == 4, f\"Expected 4 child objects, found {len(child_ids)}\"\n    assert _VALID_SALESFORCE_IDS[13] in child_ids, \"Case 1 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[14] in child_ids, \"Case 2 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[48] in child_ids, \"Contact not found in relationship\"\n    assert (\n        _VALID_SALESFORCE_IDS[62] in child_ids\n    ), \"Opportunity not found in relationship\"\n\n    # Test querying relationships for a different account (should be empty)\n    other_account_children = get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert (\n        len(other_account_children) == 0\n    ), \"Expected no children for different account\"\n\n    print(\"All relationship tests passed successfully!\")\n\n\ndef test_account_with_children() -> None:\n    \"\"\"\n    Tests querying all accounts and retrieving their child objects.\n    This test verifies that:\n    1. All accounts can be retrieved\n    2. Child objects are correctly linked\n    3. Child object data is complete and accurate\n    \"\"\"\n    # First get all account IDs\n    account_ids = find_ids_by_type(\"Account\")\n    assert len(account_ids) > 0, \"No accounts found\"\n\n    # For each account, get its children and verify the data\n    for account_id in account_ids:\n        account = get_record(account_id)\n        assert account is not None, f\"Could not find account {account_id}\"\n\n        # Get all child objects\n        child_ids = get_child_ids(account_id)\n\n        # For Acme Inc., verify specific relationships\n        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.\n            assert (\n                len(child_ids) == 4\n            ), f\"Expected 4 children for Acme Inc., found {len(child_ids)}\"\n\n            # Get all child records\n            child_records = []\n            for child_id in child_ids:\n                child_record = get_record(child_id)\n                if child_record is not None:\n                    child_records.append(child_record)\n            # Verify Cases\n            cases = [r for r in child_records if r.type == \"Case\"]\n            assert (\n                len(cases) == 2\n            ), f\"Expected 2 cases for Acme Inc., found {len(cases)}\"\n            case_subjects = {case.data[\"Subject\"] for case in cases}\n            assert \"Test Case 1\" in case_subjects, \"Test Case 1 not found\"\n            assert \"Test Case 2\" in case_subjects, \"Test Case 2 not found\"\n\n            # Verify Contacts\n            contacts = [r for r in child_records if r.type == \"Contact\"]\n            assert (\n                len(contacts) == 1\n            ), f\"Expected 1 contact for Acme Inc., found {len(contacts)}\"\n            contact = contacts[0]\n            assert contact.data[\"FirstName\"] == \"Test\", \"Contact first name mismatch\"\n            assert contact.data[\"LastName\"] == \"Contact\", \"Contact last name mismatch\"\n\n            # Verify Opportunities\n            opportunities = [r for r in child_records if r.type == \"Opportunity\"]\n            assert (\n                len(opportunities) == 1\n            ), f\"Expected 1 opportunity for Acme Inc., found {len(opportunities)}\"\n            opportunity = opportunities[0]\n            assert (\n                opportunity.data[\"Name\"] == \"Test Opportunity\"\n            ), \"Opportunity name mismatch\"\n            assert opportunity.data[\"Amount\"] == \"100000\", \"Opportunity amount mismatch\"\n\n    print(\"All account with children tests passed successfully!\")\n\n\ndef test_relationship_updates() -> None:\n    \"\"\"\n    Tests that relationships are properly updated when a child object's parent reference changes.\n    This test verifies:\n    1. Initial relationship is created correctly\n    2. When parent reference is updated, old relationship is removed\n    3. New relationship is created correctly\n    \"\"\"\n    # Create initial test data - Contact linked to Acme Inc.\n    initial_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[0],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    create_csv_file(\"Contact\", initial_contact, \"initial_contact.csv\")\n\n    # Verify initial relationship\n    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] in acme_children\n    ), \"Initial relationship not created\"\n\n    # Update contact to be linked to Globex Corp instead\n    updated_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[1],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    create_csv_file(\"Contact\", updated_contact, \"updated_contact.csv\")\n\n    # Verify old relationship is removed\n    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] not in acme_children\n    ), \"Old relationship not removed\"\n\n    # Verify new relationship is created\n    globex_children = get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert _VALID_SALESFORCE_IDS[40] in globex_children, \"New relationship not created\"\n\n    print(\"All relationship update tests passed successfully!\")\n\n\ndef test_get_affected_parent_ids() -> None:\n    \"\"\"\n    Tests get_affected_parent_ids functionality by verifying:\n    1. IDs that are directly in the parent_types list are included\n    2. IDs that have children in the updated_ids list are included\n    3. IDs that are neither of the above are not included\n    \"\"\"\n    # Create test data with relationships\n    test_data = {\n        \"Account\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Parent Account 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Parent Account 2\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Not Affected Account\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Child\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n    }\n\n    # Create and update CSV files for test data\n    for object_type, records in test_data.items():\n        create_csv_file(object_type, records)\n\n    # Test Case 1: Account directly in updated_ids and parent_types\n    updated_ids = {_VALID_SALESFORCE_IDS[1]}  # Parent Account 2\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert _VALID_SALESFORCE_IDS[1] in affected_ids, \"Direct parent ID not included\"\n\n    # Test Case 2: Account with child in updated_ids\n    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert (\n        _VALID_SALESFORCE_IDS[0] in affected_ids\n    ), \"Parent of updated child not included\"\n\n    # Test Case 3: Both direct and indirect affects\n    updated_ids = {_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]}  # Both cases\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert len(affected_ids) == 2, \"Expected exactly two affected parent IDs\"\n    assert _VALID_SALESFORCE_IDS[0] in affected_ids, \"Parent of child not included\"\n    assert _VALID_SALESFORCE_IDS[1] in affected_ids, \"Direct parent ID not included\"\n    assert (\n        _VALID_SALESFORCE_IDS[2] not in affected_ids\n    ), \"Unaffected ID incorrectly included\"\n\n    # Test Case 4: No matches\n    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact\n    parent_types = [\"Opportunity\"]  # Wrong type\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert len(affected_ids) == 0, \"Should return empty list when no matches\"\n\n    print(\"All get_affected_parent_ids tests passed successfully!\")\n\n\ndef main_build() -> None:\n    clear_sf_db()\n    create_csv_with_example_data()\n    test_query()\n    test_upsert()\n    test_relationships()\n    test_account_with_children()\n    test_relationship_updates()\n    test_get_affected_parent_ids()\n\n\nif __name__ == \"__main__\":\n    main_build()\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/shelve_stuff/shelve_functions.py",
    "content": "import csv\nimport shelve\n\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_utils import (\n    get_child_to_parent_shelf_path,\n)\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_utils import get_id_type_shelf_path\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_utils import get_object_shelf_path\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_utils import (\n    get_parent_to_child_shelf_path,\n)\nfrom onyx.connectors.salesforce.utils import SalesforceObject\nfrom onyx.connectors.salesforce.utils import validate_salesforce_id\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _update_relationship_shelves(\n    child_id: str,\n    parent_ids: set[str],\n) -> None:\n    \"\"\"Update the relationship shelf when a record is updated.\"\"\"\n    try:\n        # Convert child_id to string once\n        str_child_id = str(child_id)\n\n        # First update child to parent mapping\n        with shelve.open(\n            get_child_to_parent_shelf_path(),\n            flag=\"c\",\n            protocol=None,\n            writeback=True,\n        ) as child_to_parent_db:\n            old_parent_ids = set(child_to_parent_db.get(str_child_id, []))\n            child_to_parent_db[str_child_id] = list(parent_ids)\n\n            # Calculate differences outside the next context manager\n            parent_ids_to_remove = old_parent_ids - parent_ids\n            parent_ids_to_add = parent_ids - old_parent_ids\n\n            # Only sync once at the end\n            child_to_parent_db.sync()\n\n        # Then update parent to child mapping in a single transaction\n        if not parent_ids_to_remove and not parent_ids_to_add:\n            return\n        with shelve.open(\n            get_parent_to_child_shelf_path(),\n            flag=\"c\",\n            protocol=None,\n            writeback=True,\n        ) as parent_to_child_db:\n            # Process all removals first\n            for parent_id in parent_ids_to_remove:\n                str_parent_id = str(parent_id)\n                existing_children = set(parent_to_child_db.get(str_parent_id, []))\n                if str_child_id in existing_children:\n                    existing_children.remove(str_child_id)\n                    parent_to_child_db[str_parent_id] = list(existing_children)\n\n            # Then process all additions\n            for parent_id in parent_ids_to_add:\n                str_parent_id = str(parent_id)\n                existing_children = set(parent_to_child_db.get(str_parent_id, []))\n                existing_children.add(str_child_id)\n                parent_to_child_db[str_parent_id] = list(existing_children)\n\n            # Single sync at the end\n            parent_to_child_db.sync()\n\n    except Exception as e:\n        logger.error(f\"Error updating relationship shelves: {e}\")\n        logger.error(f\"Child ID: {child_id}, Parent IDs: {parent_ids}\")\n        raise\n\n\ndef get_child_ids(parent_id: str) -> set[str]:\n    \"\"\"Get all child IDs for a given parent ID.\n\n    Args:\n        parent_id: The ID of the parent object\n\n    Returns:\n        A set of child object IDs\n    \"\"\"\n    with shelve.open(get_parent_to_child_shelf_path()) as parent_to_child_db:\n        return set(parent_to_child_db.get(parent_id, []))\n\n\ndef update_sf_db_with_csv(\n    object_type: str,\n    csv_download_path: str,\n) -> list[str]:\n    \"\"\"Update the SF DB with a CSV file using shelve storage.\"\"\"\n    updated_ids = []\n    shelf_path = get_object_shelf_path(object_type)\n\n    # First read the CSV to get all the data\n    with open(csv_download_path, \"r\", newline=\"\", encoding=\"utf-8\") as f:\n        reader = csv.DictReader(f)\n        for row in reader:\n            id = row[\"Id\"]\n            parent_ids = set()\n            field_to_remove: set[str] = set()\n            # Update relationship shelves for any parent references\n            for field, value in row.items():\n                if validate_salesforce_id(value) and field != \"Id\":\n                    parent_ids.add(value)\n                    field_to_remove.add(field)\n                if not value:\n                    field_to_remove.add(field)\n            _update_relationship_shelves(id, parent_ids)\n            for field in field_to_remove:\n                # We use this to extract the Primary Owner later\n                if field != \"LastModifiedById\":\n                    del row[field]\n\n            # Update the main object shelf\n            with shelve.open(shelf_path) as object_type_db:\n                object_type_db[id] = row\n            # Update the ID-to-type mapping shelf\n            with shelve.open(get_id_type_shelf_path()) as id_type_db:\n                id_type_db[id] = object_type\n\n            updated_ids.append(id)\n\n    # os.remove(csv_download_path)\n    return updated_ids\n\n\ndef get_type_from_id(object_id: str) -> str | None:\n    \"\"\"Get the type of an object from its ID.\"\"\"\n    # Look up the object type from the ID-to-type mapping\n    with shelve.open(get_id_type_shelf_path()) as id_type_db:\n        if object_id not in id_type_db:\n            logger.warning(f\"Object ID {object_id} not found in ID-to-type mapping\")\n            return None\n        return id_type_db[object_id]\n\n\ndef get_record(\n    object_id: str, object_type: str | None = None\n) -> SalesforceObject | None:\n    \"\"\"\n    Retrieve the record and return it as a SalesforceObject.\n    The object type will be looked up from the ID-to-type mapping shelf.\n    \"\"\"\n    if object_type is None:\n        if not (object_type := get_type_from_id(object_id)):\n            return None\n\n    shelf_path = get_object_shelf_path(object_type)\n    with shelve.open(shelf_path) as db:\n        if object_id not in db:\n            logger.warning(f\"Object ID {object_id} not found in {shelf_path}\")\n            return None\n        data = db[object_id]\n        return SalesforceObject(\n            id=object_id,\n            type=object_type,\n            data=data,\n        )\n\n\ndef find_ids_by_type(object_type: str) -> list[str]:\n    \"\"\"\n    Find all object IDs for rows of the specified type.\n    \"\"\"\n    shelf_path = get_object_shelf_path(object_type)\n    try:\n        with shelve.open(shelf_path) as db:\n            return list(db.keys())\n    except FileNotFoundError:\n        return []\n\n\ndef get_affected_parent_ids_by_type(\n    updated_ids: set[str], parent_types: list[str]\n) -> dict[str, set[str]]:\n    \"\"\"Get IDs of objects that are of the specified parent types and are either in the updated_ids\n    or have children in the updated_ids.\n\n    Args:\n        updated_ids: List of IDs that were updated\n        parent_types: List of object types to filter by\n\n    Returns:\n        A dictionary of IDs that match the criteria\n    \"\"\"\n    affected_ids_by_type: dict[str, set[str]] = {}\n\n    # Check each updated ID\n    for updated_id in updated_ids:\n        # Add the ID itself if it's of a parent type\n        updated_type = get_type_from_id(updated_id)\n        if updated_type in parent_types:\n            affected_ids_by_type.setdefault(updated_type, set()).add(updated_id)\n            continue\n\n        # Get parents of this ID and add them if they're of a parent type\n        with shelve.open(get_child_to_parent_shelf_path()) as child_to_parent_db:\n            parent_ids = child_to_parent_db.get(updated_id, [])\n            for parent_id in parent_ids:\n                parent_type = get_type_from_id(parent_id)\n                if parent_type in parent_types:\n                    affected_ids_by_type.setdefault(parent_type, set()).add(parent_id)\n\n    return affected_ids_by_type\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/shelve_stuff/shelve_utils.py",
    "content": "import os\n\nfrom onyx.connectors.salesforce.utils import BASE_DATA_PATH\nfrom onyx.connectors.salesforce.utils import get_object_type_path\n\n\ndef get_object_shelf_path(object_type: str) -> str:\n    \"\"\"Get the path to the shelf file for a specific object type.\"\"\"\n    base_path = get_object_type_path(object_type)\n    os.makedirs(base_path, exist_ok=True)\n    return os.path.join(base_path, \"data.shelf\")\n\n\ndef get_id_type_shelf_path() -> str:\n    \"\"\"Get the path to the ID-to-type mapping shelf.\"\"\"\n    os.makedirs(BASE_DATA_PATH, exist_ok=True)\n    return os.path.join(BASE_DATA_PATH, \"id_type_mapping.shelf.4g\")\n\n\ndef get_parent_to_child_shelf_path() -> str:\n    \"\"\"Get the path to the parent-to-child mapping shelf.\"\"\"\n    os.makedirs(BASE_DATA_PATH, exist_ok=True)\n    return os.path.join(BASE_DATA_PATH, \"parent_to_child_mapping.shelf.4g\")\n\n\ndef get_child_to_parent_shelf_path() -> str:\n    \"\"\"Get the path to the child-to-parent mapping shelf.\"\"\"\n    os.makedirs(BASE_DATA_PATH, exist_ok=True)\n    return os.path.join(BASE_DATA_PATH, \"child_to_parent_mapping.shelf.4g\")\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/shelve_stuff/test_salesforce_shelves.py",
    "content": "import csv\nimport os\nimport shutil\n\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import find_ids_by_type\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import (\n    get_affected_parent_ids_by_type,\n)\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_child_ids\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import get_record\nfrom onyx.connectors.salesforce.shelve_stuff.shelve_functions import (\n    update_sf_db_with_csv,\n)\nfrom onyx.connectors.salesforce.utils import BASE_DATA_PATH\nfrom onyx.connectors.salesforce.utils import get_object_type_path\n\n_VALID_SALESFORCE_IDS = [\n    \"001bm00000fd9Z3AAI\",\n    \"001bm00000fdYTdAAM\",\n    \"001bm00000fdYTeAAM\",\n    \"001bm00000fdYTfAAM\",\n    \"001bm00000fdYTgAAM\",\n    \"001bm00000fdYThAAM\",\n    \"001bm00000fdYTiAAM\",\n    \"001bm00000fdYTjAAM\",\n    \"001bm00000fdYTkAAM\",\n    \"001bm00000fdYTlAAM\",\n    \"001bm00000fdYTmAAM\",\n    \"001bm00000fdYTnAAM\",\n    \"001bm00000fdYToAAM\",\n    \"500bm00000XoOxtAAF\",\n    \"500bm00000XoOxuAAF\",\n    \"500bm00000XoOxvAAF\",\n    \"500bm00000XoOxwAAF\",\n    \"500bm00000XoOxxAAF\",\n    \"500bm00000XoOxyAAF\",\n    \"500bm00000XoOxzAAF\",\n    \"500bm00000XoOy0AAF\",\n    \"500bm00000XoOy1AAF\",\n    \"500bm00000XoOy2AAF\",\n    \"500bm00000XoOy3AAF\",\n    \"500bm00000XoOy4AAF\",\n    \"500bm00000XoOy5AAF\",\n    \"500bm00000XoOy6AAF\",\n    \"500bm00000XoOy7AAF\",\n    \"500bm00000XoOy8AAF\",\n    \"500bm00000XoOy9AAF\",\n    \"500bm00000XoOyAAAV\",\n    \"500bm00000XoOyBAAV\",\n    \"500bm00000XoOyCAAV\",\n    \"500bm00000XoOyDAAV\",\n    \"500bm00000XoOyEAAV\",\n    \"500bm00000XoOyFAAV\",\n    \"500bm00000XoOyGAAV\",\n    \"500bm00000XoOyHAAV\",\n    \"500bm00000XoOyIAAV\",\n    \"003bm00000EjHCjAAN\",\n    \"003bm00000EjHCkAAN\",\n    \"003bm00000EjHClAAN\",\n    \"003bm00000EjHCmAAN\",\n    \"003bm00000EjHCnAAN\",\n    \"003bm00000EjHCoAAN\",\n    \"003bm00000EjHCpAAN\",\n    \"003bm00000EjHCqAAN\",\n    \"003bm00000EjHCrAAN\",\n    \"003bm00000EjHCsAAN\",\n    \"003bm00000EjHCtAAN\",\n    \"003bm00000EjHCuAAN\",\n    \"003bm00000EjHCvAAN\",\n    \"003bm00000EjHCwAAN\",\n    \"003bm00000EjHCxAAN\",\n    \"003bm00000EjHCyAAN\",\n    \"003bm00000EjHCzAAN\",\n    \"003bm00000EjHD0AAN\",\n    \"003bm00000EjHD1AAN\",\n    \"003bm00000EjHD2AAN\",\n    \"550bm00000EXc2tAAD\",\n    \"006bm000006kyDpAAI\",\n    \"006bm000006kyDqAAI\",\n    \"006bm000006kyDrAAI\",\n    \"006bm000006kyDsAAI\",\n    \"006bm000006kyDtAAI\",\n    \"006bm000006kyDuAAI\",\n    \"006bm000006kyDvAAI\",\n    \"006bm000006kyDwAAI\",\n    \"006bm000006kyDxAAI\",\n    \"006bm000006kyDyAAI\",\n    \"006bm000006kyDzAAI\",\n    \"006bm000006kyE0AAI\",\n    \"006bm000006kyE1AAI\",\n    \"006bm000006kyE2AAI\",\n    \"006bm000006kyE3AAI\",\n    \"006bm000006kyE4AAI\",\n    \"006bm000006kyE5AAI\",\n    \"006bm000006kyE6AAI\",\n    \"006bm000006kyE7AAI\",\n    \"006bm000006kyE8AAI\",\n    \"006bm000006kyE9AAI\",\n    \"006bm000006kyEAAAY\",\n    \"006bm000006kyEBAAY\",\n    \"006bm000006kyECAAY\",\n    \"006bm000006kyEDAAY\",\n    \"006bm000006kyEEAAY\",\n    \"006bm000006kyEFAAY\",\n    \"006bm000006kyEGAAY\",\n    \"006bm000006kyEHAAY\",\n    \"006bm000006kyEIAAY\",\n    \"006bm000006kyEJAAY\",\n    \"005bm000009zy0TAAQ\",\n    \"005bm000009zy25AAA\",\n    \"005bm000009zy26AAA\",\n    \"005bm000009zy28AAA\",\n    \"005bm000009zy29AAA\",\n    \"005bm000009zy2AAAQ\",\n    \"005bm000009zy2BAAQ\",\n]\n\n\ndef clear_sf_db() -> None:\n    \"\"\"\n    Clears the SF DB by deleting all files in the data directory.\n    \"\"\"\n    shutil.rmtree(BASE_DATA_PATH)\n\n\ndef create_csv_file(\n    object_type: str, records: list[dict], filename: str = \"test_data.csv\"\n) -> None:\n    \"\"\"\n    Creates a CSV file for the given object type and records.\n\n    Args:\n        object_type: The Salesforce object type (e.g. \"Account\", \"Contact\")\n        records: List of dictionaries containing the record data\n        filename: Name of the CSV file to create (default: test_data.csv)\n    \"\"\"\n    if not records:\n        return\n\n    # Get all unique fields from records\n    fields: set[str] = set()\n    for record in records:\n        fields.update(record.keys())\n    fields = set(sorted(list(fields)))  # Sort for consistent order\n\n    # Create CSV file\n    csv_path = os.path.join(get_object_type_path(object_type), filename)\n    with open(csv_path, \"w\", newline=\"\", encoding=\"utf-8\") as f:\n        writer = csv.DictWriter(f, fieldnames=fields)\n        writer.writeheader()\n        for record in records:\n            writer.writerow(record)\n\n    # Update the database with the CSV\n    update_sf_db_with_csv(object_type, csv_path)\n\n\ndef create_csv_with_example_data() -> None:\n    \"\"\"\n    Creates CSV files with example data, organized by object type.\n    \"\"\"\n    example_data: dict[str, list[dict]] = {\n        \"Account\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Acme Inc.\",\n                \"BillingCity\": \"New York\",\n                \"Industry\": \"Technology\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Globex Corp\",\n                \"BillingCity\": \"Los Angeles\",\n                \"Industry\": \"Manufacturing\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Initech\",\n                \"BillingCity\": \"Austin\",\n                \"Industry\": \"Software\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[3],\n                \"Name\": \"TechCorp Solutions\",\n                \"BillingCity\": \"San Francisco\",\n                \"Industry\": \"Software\",\n                \"AnnualRevenue\": 5000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[4],\n                \"Name\": \"BioMed Research\",\n                \"BillingCity\": \"Boston\",\n                \"Industry\": \"Healthcare\",\n                \"AnnualRevenue\": 12000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[5],\n                \"Name\": \"Green Energy Co\",\n                \"BillingCity\": \"Portland\",\n                \"Industry\": \"Energy\",\n                \"AnnualRevenue\": 8000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[6],\n                \"Name\": \"DataFlow Analytics\",\n                \"BillingCity\": \"Seattle\",\n                \"Industry\": \"Technology\",\n                \"AnnualRevenue\": 3000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[7],\n                \"Name\": \"Cloud Nine Services\",\n                \"BillingCity\": \"Denver\",\n                \"Industry\": \"Cloud Computing\",\n                \"AnnualRevenue\": 7000000,\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"FirstName\": \"John\",\n                \"LastName\": \"Doe\",\n                \"Email\": \"john.doe@acme.com\",\n                \"Title\": \"CEO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[41],\n                \"FirstName\": \"Jane\",\n                \"LastName\": \"Smith\",\n                \"Email\": \"jane.smith@acme.com\",\n                \"Title\": \"CTO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[42],\n                \"FirstName\": \"Bob\",\n                \"LastName\": \"Johnson\",\n                \"Email\": \"bob.j@globex.com\",\n                \"Title\": \"Sales Director\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[43],\n                \"FirstName\": \"Sarah\",\n                \"LastName\": \"Chen\",\n                \"Email\": \"sarah.chen@techcorp.com\",\n                \"Title\": \"Product Manager\",\n                \"Phone\": \"415-555-0101\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[44],\n                \"FirstName\": \"Michael\",\n                \"LastName\": \"Rodriguez\",\n                \"Email\": \"m.rodriguez@biomed.com\",\n                \"Title\": \"Research Director\",\n                \"Phone\": \"617-555-0202\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[45],\n                \"FirstName\": \"Emily\",\n                \"LastName\": \"Green\",\n                \"Email\": \"emily.g@greenenergy.com\",\n                \"Title\": \"Sustainability Lead\",\n                \"Phone\": \"503-555-0303\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[46],\n                \"FirstName\": \"David\",\n                \"LastName\": \"Kim\",\n                \"Email\": \"david.kim@dataflow.com\",\n                \"Title\": \"Data Scientist\",\n                \"Phone\": \"206-555-0404\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[47],\n                \"FirstName\": \"Rachel\",\n                \"LastName\": \"Taylor\",\n                \"Email\": \"r.taylor@cloudnine.com\",\n                \"Title\": \"Cloud Architect\",\n                \"Phone\": \"303-555-0505\",\n            },\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"Name\": \"Acme Server Upgrade\",\n                \"Amount\": 50000,\n                \"Stage\": \"Prospecting\",\n                \"CloseDate\": \"2024-06-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[63],\n                \"Name\": \"Globex Manufacturing Line\",\n                \"Amount\": 150000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-03-15\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[64],\n                \"Name\": \"Initech Software License\",\n                \"Amount\": 75000,\n                \"Stage\": \"Closed Won\",\n                \"CloseDate\": \"2024-01-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[65],\n                \"Name\": \"TechCorp AI Implementation\",\n                \"Amount\": 250000,\n                \"Stage\": \"Needs Analysis\",\n                \"CloseDate\": \"2024-08-15\",\n                \"Probability\": 60,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[66],\n                \"Name\": \"BioMed Lab Equipment\",\n                \"Amount\": 500000,\n                \"Stage\": \"Value Proposition\",\n                \"CloseDate\": \"2024-09-30\",\n                \"Probability\": 75,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[67],\n                \"Name\": \"Green Energy Solar Project\",\n                \"Amount\": 750000,\n                \"Stage\": \"Proposal\",\n                \"CloseDate\": \"2024-07-15\",\n                \"Probability\": 80,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[68],\n                \"Name\": \"DataFlow Analytics Platform\",\n                \"Amount\": 180000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-05-30\",\n                \"Probability\": 90,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[69],\n                \"Name\": \"Cloud Nine Infrastructure\",\n                \"Amount\": 300000,\n                \"Stage\": \"Qualification\",\n                \"CloseDate\": \"2024-10-15\",\n                \"Probability\": 40,\n            },\n        ],\n    }\n\n    # Create CSV files for each object type\n    for object_type, records in example_data.items():\n        create_csv_file(object_type, records)\n\n\ndef test_query() -> None:\n    \"\"\"\n    Tests querying functionality by verifying:\n    1. All expected Account IDs are found\n    2. Each Account's data matches what was inserted\n    \"\"\"\n    # Expected test data for verification\n    expected_accounts: dict[str, dict[str, str | int]] = {\n        _VALID_SALESFORCE_IDS[0]: {\n            \"Name\": \"Acme Inc.\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n        },\n        _VALID_SALESFORCE_IDS[1]: {\n            \"Name\": \"Globex Corp\",\n            \"BillingCity\": \"Los Angeles\",\n            \"Industry\": \"Manufacturing\",\n        },\n        _VALID_SALESFORCE_IDS[2]: {\n            \"Name\": \"Initech\",\n            \"BillingCity\": \"Austin\",\n            \"Industry\": \"Software\",\n        },\n        _VALID_SALESFORCE_IDS[3]: {\n            \"Name\": \"TechCorp Solutions\",\n            \"BillingCity\": \"San Francisco\",\n            \"Industry\": \"Software\",\n            \"AnnualRevenue\": 5000000,\n        },\n        _VALID_SALESFORCE_IDS[4]: {\n            \"Name\": \"BioMed Research\",\n            \"BillingCity\": \"Boston\",\n            \"Industry\": \"Healthcare\",\n            \"AnnualRevenue\": 12000000,\n        },\n        _VALID_SALESFORCE_IDS[5]: {\n            \"Name\": \"Green Energy Co\",\n            \"BillingCity\": \"Portland\",\n            \"Industry\": \"Energy\",\n            \"AnnualRevenue\": 8000000,\n        },\n        _VALID_SALESFORCE_IDS[6]: {\n            \"Name\": \"DataFlow Analytics\",\n            \"BillingCity\": \"Seattle\",\n            \"Industry\": \"Technology\",\n            \"AnnualRevenue\": 3000000,\n        },\n        _VALID_SALESFORCE_IDS[7]: {\n            \"Name\": \"Cloud Nine Services\",\n            \"BillingCity\": \"Denver\",\n            \"Industry\": \"Cloud Computing\",\n            \"AnnualRevenue\": 7000000,\n        },\n    }\n\n    # Get all Account IDs\n    account_ids = find_ids_by_type(\"Account\")\n\n    # Verify we found all expected accounts\n    assert len(account_ids) == len(\n        expected_accounts\n    ), f\"Expected {len(expected_accounts)} accounts, found {len(account_ids)}\"\n    assert set(account_ids) == set(\n        expected_accounts.keys()\n    ), \"Found account IDs don't match expected IDs\"\n\n    # Verify each account's data\n    for acc_id in account_ids:\n        combined = get_record(acc_id)\n        assert combined is not None, f\"Could not find account {acc_id}\"\n\n        expected = expected_accounts[acc_id]\n\n        # Verify account data matches\n        for key, value in expected.items():\n            value = str(value)\n            assert (\n                combined.data[key] == value\n            ), f\"Account {acc_id} field {key} expected {value}, got {combined.data[key]}\"\n\n    print(\"All query tests passed successfully!\")\n\n\ndef test_upsert() -> None:\n    \"\"\"\n    Tests upsert functionality by:\n    1. Updating an existing account\n    2. Creating a new account\n    3. Verifying both operations were successful\n    \"\"\"\n    # Create CSV for updating an existing account and adding a new one\n    update_data: list[dict[str, str | int]] = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[0],\n            \"Name\": \"Acme Inc. Updated\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n            \"Description\": \"Updated company info\",\n        },\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[2],\n            \"Name\": \"New Company Inc.\",\n            \"BillingCity\": \"Miami\",\n            \"Industry\": \"Finance\",\n            \"AnnualRevenue\": 1000000,\n        },\n    ]\n\n    create_csv_file(\"Account\", update_data, \"update_data.csv\")\n\n    # Verify the update worked\n    updated_record = get_record(_VALID_SALESFORCE_IDS[0])\n    assert updated_record is not None, \"Updated record not found\"\n    assert updated_record.data[\"Name\"] == \"Acme Inc. Updated\", \"Name not updated\"\n    assert (\n        updated_record.data[\"Description\"] == \"Updated company info\"\n    ), \"Description not added\"\n\n    # Verify the new record was created\n    new_record = get_record(_VALID_SALESFORCE_IDS[2])\n    assert new_record is not None, \"New record not found\"\n    assert new_record.data[\"Name\"] == \"New Company Inc.\", \"New record name incorrect\"\n    assert new_record.data[\"AnnualRevenue\"] == \"1000000\", \"New record revenue incorrect\"\n\n    print(\"All upsert tests passed successfully!\")\n\n\ndef test_relationships() -> None:\n    \"\"\"\n    Tests relationship shelf updates and queries by:\n    1. Creating test data with relationships\n    2. Verifying the relationships are correctly stored\n    3. Testing relationship queries\n    \"\"\"\n    # Create test data for each object type\n    test_data: dict[str, list[dict[str, str | int]]] = {\n        \"Case\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[13],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[14],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 2\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[48],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Test\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Test Opportunity\",\n                \"Amount\": 100000,\n            }\n        ],\n    }\n\n    # Create and update CSV files for each object type\n    for object_type, records in test_data.items():\n        create_csv_file(object_type, records, \"relationship_test.csv\")\n\n    # Test relationship queries\n    # All these objects should be children of Acme Inc.\n    child_ids = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert len(child_ids) == 4, f\"Expected 4 child objects, found {len(child_ids)}\"\n    assert _VALID_SALESFORCE_IDS[13] in child_ids, \"Case 1 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[14] in child_ids, \"Case 2 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[48] in child_ids, \"Contact not found in relationship\"\n    assert (\n        _VALID_SALESFORCE_IDS[62] in child_ids\n    ), \"Opportunity not found in relationship\"\n\n    # Test querying relationships for a different account (should be empty)\n    other_account_children = get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert (\n        len(other_account_children) == 0\n    ), \"Expected no children for different account\"\n\n    print(\"All relationship tests passed successfully!\")\n\n\ndef test_account_with_children() -> None:\n    \"\"\"\n    Tests querying all accounts and retrieving their child objects.\n    This test verifies that:\n    1. All accounts can be retrieved\n    2. Child objects are correctly linked\n    3. Child object data is complete and accurate\n    \"\"\"\n    # First get all account IDs\n    account_ids = find_ids_by_type(\"Account\")\n    assert len(account_ids) > 0, \"No accounts found\"\n\n    # For each account, get its children and verify the data\n    for account_id in account_ids:\n        account = get_record(account_id)\n        assert account is not None, f\"Could not find account {account_id}\"\n\n        # Get all child objects\n        child_ids = get_child_ids(account_id)\n\n        # For Acme Inc., verify specific relationships\n        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.\n            assert (\n                len(child_ids) == 4\n            ), f\"Expected 4 children for Acme Inc., found {len(child_ids)}\"\n\n            # Get all child records\n            child_records = []\n            for child_id in child_ids:\n                child_record = get_record(child_id)\n                if child_record is not None:\n                    child_records.append(child_record)\n            # Verify Cases\n            cases = [r for r in child_records if r.type == \"Case\"]\n            assert (\n                len(cases) == 2\n            ), f\"Expected 2 cases for Acme Inc., found {len(cases)}\"\n            case_subjects = {case.data[\"Subject\"] for case in cases}\n            assert \"Test Case 1\" in case_subjects, \"Test Case 1 not found\"\n            assert \"Test Case 2\" in case_subjects, \"Test Case 2 not found\"\n\n            # Verify Contacts\n            contacts = [r for r in child_records if r.type == \"Contact\"]\n            assert (\n                len(contacts) == 1\n            ), f\"Expected 1 contact for Acme Inc., found {len(contacts)}\"\n            contact = contacts[0]\n            assert contact.data[\"FirstName\"] == \"Test\", \"Contact first name mismatch\"\n            assert contact.data[\"LastName\"] == \"Contact\", \"Contact last name mismatch\"\n\n            # Verify Opportunities\n            opportunities = [r for r in child_records if r.type == \"Opportunity\"]\n            assert (\n                len(opportunities) == 1\n            ), f\"Expected 1 opportunity for Acme Inc., found {len(opportunities)}\"\n            opportunity = opportunities[0]\n            assert (\n                opportunity.data[\"Name\"] == \"Test Opportunity\"\n            ), \"Opportunity name mismatch\"\n            assert opportunity.data[\"Amount\"] == \"100000\", \"Opportunity amount mismatch\"\n\n    print(\"All account with children tests passed successfully!\")\n\n\ndef test_relationship_updates() -> None:\n    \"\"\"\n    Tests that relationships are properly updated when a child object's parent reference changes.\n    This test verifies:\n    1. Initial relationship is created correctly\n    2. When parent reference is updated, old relationship is removed\n    3. New relationship is created correctly\n    \"\"\"\n    # Create initial test data - Contact linked to Acme Inc.\n    initial_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[0],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    create_csv_file(\"Contact\", initial_contact, \"initial_contact.csv\")\n\n    # Verify initial relationship\n    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] in acme_children\n    ), \"Initial relationship not created\"\n\n    # Update contact to be linked to Globex Corp instead\n    updated_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[1],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    create_csv_file(\"Contact\", updated_contact, \"updated_contact.csv\")\n\n    # Verify old relationship is removed\n    acme_children = get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] not in acme_children\n    ), \"Old relationship not removed\"\n\n    # Verify new relationship is created\n    globex_children = get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert _VALID_SALESFORCE_IDS[40] in globex_children, \"New relationship not created\"\n\n    print(\"All relationship update tests passed successfully!\")\n\n\ndef test_get_affected_parent_ids() -> None:\n    \"\"\"\n    Tests get_affected_parent_ids functionality by verifying:\n    1. IDs that are directly in the parent_types list are included\n    2. IDs that have children in the updated_ids list are included\n    3. IDs that are neither of the above are not included\n    \"\"\"\n    # Create test data with relationships\n    test_data = {\n        \"Account\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Parent Account 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Parent Account 2\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Not Affected Account\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Child\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n    }\n\n    # Create and update CSV files for test data\n    for object_type, records in test_data.items():\n        create_csv_file(object_type, records)\n\n    # Test Case 1: Account directly in updated_ids and parent_types\n    updated_ids = {_VALID_SALESFORCE_IDS[1]}  # Parent Account 2\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert _VALID_SALESFORCE_IDS[1] in affected_ids, \"Direct parent ID not included\"\n\n    # Test Case 2: Account with child in updated_ids\n    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert (\n        _VALID_SALESFORCE_IDS[0] in affected_ids\n    ), \"Parent of updated child not included\"\n\n    # Test Case 3: Both direct and indirect affects\n    updated_ids = {_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]}  # Both cases\n    parent_types = [\"Account\"]\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert len(affected_ids) == 2, \"Expected exactly two affected parent IDs\"\n    assert _VALID_SALESFORCE_IDS[0] in affected_ids, \"Parent of child not included\"\n    assert _VALID_SALESFORCE_IDS[1] in affected_ids, \"Direct parent ID not included\"\n    assert (\n        _VALID_SALESFORCE_IDS[2] not in affected_ids\n    ), \"Unaffected ID incorrectly included\"\n\n    # Test Case 4: No matches\n    updated_ids = {_VALID_SALESFORCE_IDS[40]}  # Child Contact\n    parent_types = [\"Opportunity\"]  # Wrong type\n    affected_ids = get_affected_parent_ids_by_type(updated_ids, parent_types)\n    assert len(affected_ids) == 0, \"Should return empty list when no matches\"\n\n    print(\"All get_affected_parent_ids tests passed successfully!\")\n\n\ndef main_build() -> None:\n    clear_sf_db()\n    create_csv_with_example_data()\n    test_query()\n    test_upsert()\n    test_relationships()\n    test_account_with_children()\n    test_relationship_updates()\n    test_get_affected_parent_ids()\n\n\nif __name__ == \"__main__\":\n    main_build()\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/sqlite_functions.py",
    "content": "import csv\nimport json\nimport os\nimport sqlite3\nimport time\nfrom collections.abc import Iterator\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE\nfrom onyx.connectors.salesforce.utils import ID_FIELD\nfrom onyx.connectors.salesforce.utils import NAME_FIELD\nfrom onyx.connectors.salesforce.utils import remove_sqlite_db_files\nfrom onyx.connectors.salesforce.utils import SalesforceObject\nfrom onyx.connectors.salesforce.utils import USER_OBJECT_TYPE\nfrom onyx.connectors.salesforce.utils import validate_salesforce_id\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.utils import batch_list\n\n\nlogger = setup_logger()\n\n\nSQLITE_DISK_IO_ERROR = \"disk I/O error\"\n\n\nclass OnyxSalesforceSQLite:\n    \"\"\"Notes on context management using 'with self.conn':\n\n    Does autocommit / rollback on exit.\n    Does NOT close on exit! .close must be called explicitly.\n    \"\"\"\n\n    # NOTE(rkuo): this string could probably occur naturally. A more unique value\n    # might be appropriate here.\n    NULL_ID_STRING = \"N/A\"\n\n    def __init__(self, filename: str, isolation_level: str | None = None):\n        self.filename = filename\n        self.isolation_level = isolation_level\n        self._conn: sqlite3.Connection | None = None\n\n        # this is only set on connection. This variable does not change\n        # when a new db is initialized with this class.\n        self._existing_db = True\n\n    def __del__(self) -> None:\n        self.close()\n\n    @property\n    def file_size(self) -> int:\n        \"\"\"Returns -1 if the file does not exist.\"\"\"\n        if not self.filename:\n            return -1\n\n        if not os.path.exists(self.filename):\n            return -1\n\n        file_path = Path(self.filename)\n        return file_path.stat().st_size\n\n    def connect(self) -> None:\n        if self._conn is not None:\n            self._conn.close()\n            self._conn = None\n\n        self._existing_db = os.path.exists(self.filename)\n\n        # make the path if it doesn't already exist\n        os.makedirs(os.path.dirname(self.filename), exist_ok=True)\n\n        conn = sqlite3.connect(self.filename, timeout=60.0)\n        if self.isolation_level is not None:\n            conn.isolation_level = self.isolation_level\n\n        self._conn = conn\n\n    def close(self) -> None:\n        if self._conn is None:\n            return\n\n        self._conn.close()\n        self._conn = None\n\n    def cursor(self) -> sqlite3.Cursor:\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        return self._conn.cursor()\n\n    def flush(self) -> None:\n        \"\"\"We're using SQLite in WAL mode sometimes. To flush to the DB we have to\n        call this.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\"PRAGMA wal_checkpoint(FULL)\")\n\n    def apply_schema(self) -> None:\n        \"\"\"Initialize the SQLite database with required tables if they don't exist.\n\n        Non-destructive operation. If a disk I/O error is encountered (often due\n        to stale WAL/SHM files from a previous crash), this method will attempt\n        to recover by removing the corrupted files and recreating the database.\n        \"\"\"\n        try:\n            self._apply_schema_impl()\n        except sqlite3.OperationalError as e:\n            if SQLITE_DISK_IO_ERROR not in str(e):\n                raise\n\n            logger.warning(f\"SQLite disk I/O error detected, attempting recovery: {e}\")\n            self._recover_from_corruption()\n            self._apply_schema_impl()\n\n    def _recover_from_corruption(self) -> None:\n        \"\"\"Recover from SQLite corruption by removing all database files and reconnecting.\"\"\"\n        logger.info(f\"Removing corrupted SQLite files: {self.filename}\")\n\n        # Close existing connection\n        self.close()\n\n        # Remove all SQLite files (main db, WAL, SHM)\n        remove_sqlite_db_files(self.filename)\n\n        # Reconnect - this will create a fresh database\n        self.connect()\n\n        logger.info(\"SQLite recovery complete, fresh database created\")\n\n    def _apply_schema_impl(self) -> None:\n        \"\"\"Internal implementation of apply_schema.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        start = time.monotonic()\n\n        with self._conn:\n            cursor = self._conn.cursor()\n\n            if self._existing_db:\n                file_path = Path(self.filename)\n                file_size = file_path.stat().st_size\n                logger.info(f\"init_db - found existing sqlite db: len={file_size}\")\n            else:\n                # NOTE(rkuo): why is this only if the db doesn't exist?\n\n                # Enable WAL mode for better concurrent access and write performance\n                cursor.execute(\"PRAGMA journal_mode=WAL\")\n                cursor.execute(\"PRAGMA synchronous=NORMAL\")\n                cursor.execute(\"PRAGMA temp_store=MEMORY\")\n                cursor.execute(\"PRAGMA cache_size=-2000000\")  # Use 2GB memory for cache\n\n            # Main table for storing Salesforce objects\n            cursor.execute(\n                \"\"\"\n                CREATE TABLE IF NOT EXISTS salesforce_objects (\n                    id TEXT PRIMARY KEY,\n                    object_type TEXT NOT NULL,\n                    data TEXT NOT NULL,  -- JSON serialized data\n                    last_modified INTEGER DEFAULT (strftime('%s', 'now'))  -- Add timestamp for better cache management\n                ) WITHOUT ROWID  -- Optimize for primary key lookups\n            \"\"\"\n            )\n\n            # NOTE(rkuo): this seems completely redundant with relationship_types\n            # Table for parent-child relationships with covering index\n            cursor.execute(\n                \"\"\"\n                CREATE TABLE IF NOT EXISTS relationships (\n                    child_id TEXT NOT NULL,\n                    parent_id TEXT NOT NULL,\n                    PRIMARY KEY (child_id, parent_id)\n                ) WITHOUT ROWID  -- Optimize for primary key lookups\n            \"\"\"\n            )\n\n            # New table for caching parent-child relationships with object types\n            cursor.execute(\n                \"\"\"\n                CREATE TABLE IF NOT EXISTS relationship_types (\n                    child_id TEXT NOT NULL,\n                    parent_id TEXT NOT NULL,\n                    parent_type TEXT NOT NULL,\n                    PRIMARY KEY (child_id, parent_id, parent_type)\n                ) WITHOUT ROWID\n            \"\"\"\n            )\n\n            # Create a table for User email to ID mapping if it doesn't exist\n            cursor.execute(\n                \"\"\"\n                CREATE TABLE IF NOT EXISTS user_email_map (\n                    email TEXT PRIMARY KEY,\n                    user_id TEXT,  -- Nullable to allow for users without IDs\n                    FOREIGN KEY (user_id) REFERENCES salesforce_objects(id)\n                ) WITHOUT ROWID\n            \"\"\"\n            )\n\n            # Create indexes if they don't exist (SQLite ignores IF NOT EXISTS for indexes)\n            def create_index_if_not_exists(\n                index_name: str, create_statement: str\n            ) -> None:\n                cursor.execute(\n                    f\"SELECT name FROM sqlite_master WHERE type='index' AND name='{index_name}'\"\n                )\n                if not cursor.fetchone():\n                    cursor.execute(create_statement)\n\n            create_index_if_not_exists(\n                \"idx_object_type\",\n                \"\"\"\n                CREATE INDEX idx_object_type\n                ON salesforce_objects(object_type, id)\n                WHERE object_type IS NOT NULL\n                \"\"\",\n            )\n\n            create_index_if_not_exists(\n                \"idx_parent_id\",\n                \"\"\"\n                CREATE INDEX idx_parent_id\n                ON relationships(parent_id, child_id)\n                \"\"\",\n            )\n\n            create_index_if_not_exists(\n                \"idx_child_parent\",\n                \"\"\"\n                CREATE INDEX idx_child_parent\n                ON relationships(child_id)\n                WHERE child_id IS NOT NULL\n                \"\"\",\n            )\n\n            create_index_if_not_exists(\n                \"idx_relationship_types_lookup\",\n                \"\"\"\n                CREATE INDEX idx_relationship_types_lookup\n                ON relationship_types(parent_type, child_id, parent_id)\n                \"\"\",\n            )\n\n            elapsed = time.monotonic() - start\n            logger.info(f\"init_db - create tables and indices: elapsed={elapsed:.2f}\")\n\n            # Analyze tables to help query planner\n            # NOTE(rkuo): skip ANALYZE - it takes too long and we likely don't have\n            # complicated queries that need this\n            # start = time.monotonic()\n            # cursor.execute(\"ANALYZE relationships\")\n            # cursor.execute(\"ANALYZE salesforce_objects\")\n            # cursor.execute(\"ANALYZE relationship_types\")\n            # cursor.execute(\"ANALYZE user_email_map\")\n            # elapsed = time.monotonic() - start\n            # logger.info(f\"init_db - analyze: elapsed={elapsed:.2f}\")\n\n            # If database already existed but user_email_map needs to be populated\n            start = time.monotonic()\n            cursor.execute(\"SELECT COUNT(*) FROM user_email_map\")\n            elapsed = time.monotonic() - start\n            logger.info(f\"init_db - count user_email_map: elapsed={elapsed:.2f}\")\n\n            start = time.monotonic()\n            if cursor.fetchone()[0] == 0:\n                OnyxSalesforceSQLite._update_user_email_map(cursor)\n            elapsed = time.monotonic() - start\n            logger.info(f\"init_db - update_user_email_map: elapsed={elapsed:.2f}\")\n\n    def get_user_id_by_email(self, email: str) -> str | None:\n        \"\"\"Get the Salesforce User ID for a given email address.\n\n        Args:\n            email: The email address to look up\n\n        Returns:\n            A tuple of (was_found, user_id):\n                - was_found: True if the email exists in the table, False if not found\n                - user_id: The Salesforce User ID if exists, None otherwise\n        \"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\n                \"SELECT user_id FROM user_email_map WHERE email = ?\", (email,)\n            )\n            result = cursor.fetchone()\n            if result is None:\n                return None\n            return result[0]\n\n    def update_email_to_id_table(self, email: str, id: str | None) -> None:\n        \"\"\"Update the email to ID map table with a new email and ID.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        id_to_use = id or self.NULL_ID_STRING\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\n                \"INSERT OR REPLACE INTO user_email_map (email, user_id) VALUES (?, ?)\",\n                (email, id_to_use),\n            )\n\n    def log_stats(self) -> None:\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cache_pages = self._conn.execute(\"PRAGMA cache_size\").fetchone()[0]\n            page_size = self._conn.execute(\"PRAGMA page_size\").fetchone()[0]\n            if cache_pages >= 0:\n                cache_bytes = cache_pages * page_size\n            else:\n                cache_bytes = abs(cache_pages * 1024)\n            logger.info(\n                f\"SQLite stats: sqlite_version={sqlite3.sqlite_version} \"\n                f\"cache_pages={cache_pages} \"\n                f\"page_size={page_size} \"\n                f\"cache_bytes={cache_bytes}\"\n            )\n\n    # get_changed_parent_ids_by_type_2 replaces this\n    def get_changed_parent_ids_by_type(\n        self,\n        changed_ids: list[str],\n        parent_types: set[str],\n        batch_size: int = 500,\n    ) -> Iterator[tuple[str, str, int]]:\n        \"\"\"Get IDs of objects that are of the specified parent types and are either in the\n        updated_ids or have children in the updated_ids. Yields tuples of (parent_type, affected_ids, num_examined).\n\n        NOTE(rkuo): This function used to have some interesting behavior ... it created batches of id's\n        and yielded back a list once for each parent type within that batch.\n\n        There's no need to expose the details of the internal batching to the caller, so\n        we're now yielding once per changed parent.\n        \"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        updated_parent_ids: set[str] = (\n            set()\n        )  # dedupes parent id's that have already been yielded\n\n        # SQLite typically has a limit of 999 variables\n        num_examined = 0\n        updated_ids_batches = batch_list(changed_ids, batch_size)\n\n        with self._conn:\n            cursor = self._conn.cursor()\n\n            for batch_ids in updated_ids_batches:\n                num_examined += len(batch_ids)\n\n                batch_ids = list(set(batch_ids) - updated_parent_ids)\n                if not batch_ids:\n                    continue\n                id_placeholders = \",\".join([\"?\" for _ in batch_ids])\n\n                for parent_type in parent_types:\n                    affected_ids: set[str] = set()\n\n                    # Get directly updated objects of parent types - using index on object_type\n                    cursor.execute(\n                        f\"\"\"\n                        SELECT id FROM salesforce_objects\n                        WHERE id IN ({id_placeholders})\n                        AND object_type = ?\n                        \"\"\",\n                        batch_ids + [parent_type],\n                    )\n                    affected_ids.update(row[0] for row in cursor.fetchall())\n\n                    # Get parent objects of updated objects - using optimized relationship_types table\n                    cursor.execute(\n                        f\"\"\"\n                        SELECT DISTINCT parent_id\n                        FROM relationship_types\n                        INDEXED BY idx_relationship_types_lookup\n                        WHERE parent_type = ?\n                        AND child_id IN ({id_placeholders})\n                        \"\"\",\n                        [parent_type] + batch_ids,\n                    )\n                    affected_ids.update(row[0] for row in cursor.fetchall())\n\n                    # Remove any parent IDs that have already been processed\n                    newly_affected_ids = affected_ids - updated_parent_ids\n                    # Add the new affected IDs to the set of updated parent IDs\n                    if newly_affected_ids:\n                        # Yield each newly affected ID individually\n                        for parent_id in newly_affected_ids:\n                            yield parent_type, parent_id, num_examined\n\n                        updated_parent_ids.update(newly_affected_ids)\n\n    def get_changed_parent_ids_by_type_2(\n        self,\n        changed_ids: dict[str, str],\n        parent_types: set[str],\n        parent_relationship_fields_by_type: dict[str, dict[str, list[str]]],\n        prefix_to_type: dict[str, str],\n    ) -> Iterator[tuple[str, str, int]]:\n        \"\"\"\n        This function yields back any changed parent id's based on\n        a relationship lookup.\n\n        Yields tuples of (changed_id, parent_type, num_examined)\n        changed_id is the id of the changed parent record\n        parent_type is the object table/type of the id (based on a prefix lookup)\n        num_examined is an integer which signifies our progress through the changed_id's dict\n\n        changed_ids is a list of all id's that changed, both parent and children.\n        parent\n\n        This is much simpler than get_changed_parent_ids_by_type.\n\n        TODO(rkuo): for common entities, the first 3 chars identify the object type\n        see https://help.salesforce.com/s/articleView?id=000385203&type=1\n        \"\"\"\n        changed_parent_ids: set[str] = (\n            set()\n        )  # dedupes parent id's that have already been yielded\n\n        # SQLite typically has a limit of 999 variables\n        num_examined = 0\n\n        for changed_id, changed_type in changed_ids.items():\n            num_examined += 1\n\n            # if we yielded this id already, continue\n            if changed_id in changed_parent_ids:\n                continue\n\n            # if this id is a parent type, yield it directly\n            if changed_type in parent_types:\n                yield changed_id, changed_type, num_examined\n                changed_parent_ids.add(changed_id)\n                continue\n\n            # if this id is a child type, then check the columns\n            # that relate it to the parent id and yield those ids\n            # NOTE: Although unlikely, id's yielded in this way may not be of the\n            # type we're interested in, so the caller must be prepared\n            # for the id to not be present\n\n            # get the child id record\n            sf_object = self.get_record(changed_id, changed_type)\n            if not sf_object:\n                continue\n\n            # get the fields that contain parent id's\n            parent_relationship_fields = parent_relationship_fields_by_type[\n                changed_type\n            ]\n            for field_name, _ in parent_relationship_fields.items():\n                if field_name not in sf_object.data:\n                    logger.warning(f\"{field_name=} not in data for {changed_type=}!\")\n                    continue\n\n                parent_id = cast(str, sf_object.data[field_name])\n                parent_id_prefix = parent_id[:3]\n\n                if parent_id_prefix not in prefix_to_type:\n                    logger.warning(\n                        f\"Could not lookup type for prefix: {parent_id_prefix=}\"\n                    )\n                    continue\n\n                parent_type = prefix_to_type[parent_id_prefix]\n                if parent_type not in parent_types:\n                    continue\n\n                yield parent_id, parent_type, num_examined\n                changed_parent_ids.add(parent_id)\n                break\n\n    def object_type_count(self, object_type: str) -> int:\n        \"\"\"Check if there is at least one object of the specified type in the database.\n\n        Args:\n            object_type: The Salesforce object type to check\n\n        Returns:\n            bool: True if at least one object exists, False otherwise\n        \"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\n                \"SELECT COUNT(*) FROM salesforce_objects WHERE object_type = ?\",\n                (object_type,),\n            )\n            count = cursor.fetchone()[0]\n            return count\n\n    @staticmethod\n    def normalize_record(\n        original_record: dict[str, Any],\n        remove_ids: bool = True,\n    ) -> tuple[dict[str, Any], set[str]]:\n        \"\"\"Takes a dict of field names to values and removes fields\n        we don't want.\n\n        This means most parent id field's and any fields with null values.\n\n        Return a json string and a list of parent_id's in the record.\n        \"\"\"\n        parent_ids: set[str] = set()\n        fields_to_remove: set[str] = set()\n\n        record = original_record.copy()\n\n        for field, value in record.items():\n            # remove empty fields\n            if not value:\n                fields_to_remove.add(field)\n                continue\n\n            if field == \"attributes\":\n                fields_to_remove.add(field)\n                continue\n\n            # remove salesforce id's (and add to parent id set)\n            if (\n                field != ID_FIELD\n                and isinstance(value, str)\n                and validate_salesforce_id(value)\n            ):\n                parent_ids.add(value)\n                if remove_ids:\n                    fields_to_remove.add(field)\n                continue\n\n            # this field is real data, leave it alone\n\n        # Remove unwanted fields\n        for field in fields_to_remove:\n            if field != \"LastModifiedById\":\n                del record[field]\n\n        return record, parent_ids\n\n    def update_from_csv(\n        self, object_type: str, csv_download_path: str, remove_ids: bool = True\n    ) -> list[str]:\n        \"\"\"Update the SF DB with a CSV file using SQLite storage.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        # some customers need this to be larger than the default 128KB, go with 16MB\n        csv.field_size_limit(16 * 1024 * 1024)\n\n        updated_ids = []\n\n        with self._conn:\n            cursor = self._conn.cursor()\n\n            with open(csv_download_path, \"r\", newline=\"\", encoding=\"utf-8\") as f:\n                reader = csv.DictReader(f)\n                uncommitted_rows = 0\n                for row in reader:\n                    if ID_FIELD not in row:\n                        logger.warning(\n                            f\"Row {row} does not have an {ID_FIELD} field in {csv_download_path}\"\n                        )\n                        continue\n\n                    row_id = row[ID_FIELD]\n\n                    normalized_record, parent_ids = (\n                        OnyxSalesforceSQLite.normalize_record(row, remove_ids)\n                    )\n                    normalized_record_json_str = json.dumps(normalized_record)\n\n                    # Update main object data\n                    # NOTE(rkuo): looks like we take a list and dump it as json into the db\n                    cursor.execute(\n                        \"\"\"\n                        INSERT OR REPLACE INTO salesforce_objects (id, object_type, data)\n                        VALUES (?, ?, ?)\n                        \"\"\",\n                        (row_id, object_type, normalized_record_json_str),\n                    )\n\n                    # Update relationships using the same connection\n                    OnyxSalesforceSQLite._update_relationship_tables(\n                        cursor, row_id, parent_ids\n                    )\n                    updated_ids.append(row_id)\n\n                    # periodically commit or else memory will balloon\n                    uncommitted_rows += 1\n                    if uncommitted_rows >= 1024:\n                        self._conn.commit()\n                        uncommitted_rows = 0\n\n            # If we're updating User objects, update the email map\n            if object_type == USER_OBJECT_TYPE:\n                OnyxSalesforceSQLite._update_user_email_map(cursor)\n\n        return updated_ids\n\n    def get_child_ids(self, parent_id: str) -> set[str]:\n        \"\"\"Get all child IDs for a given parent ID.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n\n            # Force index usage with INDEXED BY\n            cursor.execute(\n                \"SELECT child_id FROM relationships INDEXED BY idx_parent_id WHERE parent_id = ?\",\n                (parent_id,),\n            )\n            child_ids = {row[0] for row in cursor.fetchall()}\n        return child_ids\n\n    def get_type_from_id(self, object_id: str) -> str | None:\n        \"\"\"Get the type of an object from its ID.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\n                \"SELECT object_type FROM salesforce_objects WHERE id = ?\", (object_id,)\n            )\n            result = cursor.fetchone()\n            if not result:\n                logger.warning(f\"Object ID {object_id} not found\")\n                return None\n            return result[0]\n\n    def get_record(\n        self, object_id: str, object_type: str | None = None, isChild: bool = False\n    ) -> SalesforceObject | None:\n        \"\"\"Retrieve the record and return it as a SalesforceObject.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        if object_type is None:\n            object_type = self.get_type_from_id(object_id)\n            if not object_type:\n                return None\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            # Get the object data and account data\n            if object_type == ACCOUNT_OBJECT_TYPE or isChild:\n                cursor.execute(\n                    \"SELECT data FROM salesforce_objects WHERE id = ?\", (object_id,)\n                )\n            else:\n                cursor.execute(\n                    \"SELECT pso.data, r.parent_id as parent_id, sso.object_type FROM salesforce_objects pso \\\n                        LEFT JOIN relationships r on r.child_id = pso.id \\\n                        LEFT JOIN salesforce_objects sso on r.parent_id = sso.id \\\n                        WHERE pso.id = ? \",\n                    (object_id,),\n                )\n            result = cursor.fetchall()\n            if not result:\n                logger.warning(f\"Object ID {object_id} not found\")\n                return None\n\n            data = json.loads(result[0][0])\n\n            if object_type != ACCOUNT_OBJECT_TYPE:\n                # convert any account ids of the relationships back into data fields, with name\n                for row in result:\n                    # the following skips Account objects.\n                    if len(row) < 3:\n                        continue\n\n                    if row[1] and row[2] and row[2] == ACCOUNT_OBJECT_TYPE:\n                        data[\"AccountId\"] = row[1]\n                        cursor.execute(\n                            \"SELECT data FROM salesforce_objects WHERE id = ?\",\n                            (row[1],),\n                        )\n                        account_data = json.loads(cursor.fetchone()[0])\n                        data[ACCOUNT_OBJECT_TYPE] = account_data.get(NAME_FIELD, \"\")\n\n            return SalesforceObject(id=object_id, type=object_type, data=data)\n\n    def find_ids_by_type(self, object_type: str) -> list[str]:\n        \"\"\"Find all object IDs for rows of the specified type.\"\"\"\n        if self._conn is None:\n            raise RuntimeError(\"Database connection is closed\")\n\n        with self._conn:\n            cursor = self._conn.cursor()\n            cursor.execute(\n                \"SELECT id FROM salesforce_objects WHERE object_type = ?\",\n                (object_type,),\n            )\n            return [row[0] for row in cursor.fetchall()]\n\n    @staticmethod\n    def _update_relationship_tables(\n        cursor: sqlite3.Cursor, child_id: str, parent_ids: set[str]\n    ) -> None:\n        \"\"\"Given a child id and a set of parent id's, updates the\n        relationships of the child to the parents in the db and removes old relationships.\n\n        Args:\n            conn: The database connection to use (must be in a transaction)\n            child_id: The ID of the child record\n            parent_ids: Set of parent IDs to link to\n        \"\"\"\n\n        try:\n            # Get existing parent IDs\n            cursor.execute(\n                \"SELECT parent_id FROM relationships WHERE child_id = ?\", (child_id,)\n            )\n            old_parent_ids = {row[0] for row in cursor.fetchall()}\n\n            # Calculate differences\n            parent_ids_to_remove = old_parent_ids - parent_ids\n            parent_ids_to_add = parent_ids - old_parent_ids\n\n            # Remove old relationships\n            if parent_ids_to_remove:\n                cursor.executemany(\n                    \"DELETE FROM relationships WHERE child_id = ? AND parent_id = ?\",\n                    [(child_id, parent_id) for parent_id in parent_ids_to_remove],\n                )\n                # Also remove from relationship_types\n                cursor.executemany(\n                    \"DELETE FROM relationship_types WHERE child_id = ? AND parent_id = ?\",\n                    [(child_id, parent_id) for parent_id in parent_ids_to_remove],\n                )\n\n            # Add new relationships\n            if parent_ids_to_add:\n                # First add to relationships table\n                cursor.executemany(\n                    \"INSERT INTO relationships (child_id, parent_id) VALUES (?, ?)\",\n                    [(child_id, parent_id) for parent_id in parent_ids_to_add],\n                )\n\n                # Then get the types of the parent objects and add to relationship_types\n                for parent_id in parent_ids_to_add:\n                    cursor.execute(\n                        \"SELECT object_type FROM salesforce_objects WHERE id = ?\",\n                        (parent_id,),\n                    )\n                    result = cursor.fetchone()\n                    if result:\n                        parent_type = result[0]\n                        cursor.execute(\n                            \"\"\"\n                            INSERT INTO relationship_types (child_id, parent_id, parent_type)\n                            VALUES (?, ?, ?)\n                            \"\"\",\n                            (child_id, parent_id, parent_type),\n                        )\n\n        except Exception:\n            logger.exception(\n                f\"Error updating relationship tables: child_id={child_id} parent_ids={parent_ids}\"\n            )\n            raise\n\n    @staticmethod\n    def _update_user_email_map(cursor: sqlite3.Cursor) -> None:\n        \"\"\"Update the user_email_map table with current User objects.\n        Called internally by update_sf_db_with_csv when User objects are updated.\n        \"\"\"\n\n        cursor.execute(\n            \"\"\"\n            INSERT OR REPLACE INTO user_email_map (email, user_id)\n            SELECT json_extract(data, '$.Email'), id\n            FROM salesforce_objects\n            WHERE object_type = 'User'\n            AND json_extract(data, '$.Email') IS NOT NULL\n            \"\"\"\n        )\n\n    def make_basic_expert_info_from_record(\n        self,\n        sf_object: SalesforceObject,\n    ) -> BasicExpertInfo | None:\n        \"\"\"Parses record for LastModifiedById and returns BasicExpertInfo\n        of the user if possible.\"\"\"\n        object_dict: dict[str, Any] = sf_object.data\n        if not (last_modified_by_id := object_dict.get(\"LastModifiedById\")):\n            logger.warning(f\"No LastModifiedById found for {sf_object.id}\")\n            return None\n        if not (last_modified_by := self.get_record(last_modified_by_id)):\n            logger.warning(f\"No LastModifiedBy found for {last_modified_by_id}\")\n            return None\n\n        try:\n            expert_info = BasicExpertInfo.from_dict(last_modified_by.data)\n        except Exception:\n            return None\n\n        return expert_info\n"
  },
  {
    "path": "backend/onyx/connectors/salesforce/utils.py",
    "content": "import os\nfrom dataclasses import dataclass\nfrom typing import Any\n\nNAME_FIELD = \"Name\"\nMODIFIED_FIELD = \"LastModifiedDate\"\nID_FIELD = \"Id\"\nACCOUNT_OBJECT_TYPE = \"Account\"\nUSER_OBJECT_TYPE = \"User\"\n\n\n@dataclass\nclass SalesforceObject:\n    id: str\n    type: str\n    data: dict[str, Any]\n\n    def to_dict(self) -> dict[str, Any]:\n        return {\n            \"ID\": self.id,\n            \"Type\": self.type,\n            \"Data\": self.data,\n        }\n\n    @classmethod\n    def from_dict(cls, data: dict[str, Any]) -> \"SalesforceObject\":\n        return cls(\n            id=data[ID_FIELD],\n            type=data[\"Type\"],\n            data=data,\n        )\n\n\n# This defines the base path for all data files relative to this file\n# AKA BE CAREFUL WHEN MOVING THIS FILE\nBASE_DATA_PATH = os.path.join(os.path.dirname(__file__), \"data\")\n\n\ndef get_sqlite_db_path(directory: str) -> str:\n    \"\"\"Get the path to the sqlite db file.\"\"\"\n    return os.path.join(directory, \"salesforce_db.sqlite\")\n\n\ndef remove_sqlite_db_files(db_path: str) -> None:\n    \"\"\"Remove SQLite database and all associated files (WAL, SHM).\n\n    SQLite in WAL mode creates additional files:\n    - .sqlite-wal: Write-ahead log\n    - .sqlite-shm: Shared memory file\n\n    If these files become stale (e.g., after a crash), they can cause\n    'disk I/O error' when trying to open the database. This function\n    ensures all related files are removed.\n    \"\"\"\n    files_to_remove = [\n        db_path,\n        f\"{db_path}-wal\",\n        f\"{db_path}-shm\",\n    ]\n    for file_path in files_to_remove:\n        if os.path.exists(file_path):\n            os.remove(file_path)\n\n\n# NOTE: only used with shelves, deprecated at this point\ndef get_object_type_path(object_type: str) -> str:\n    \"\"\"Get the directory path for a specific object type.\"\"\"\n    type_dir = os.path.join(BASE_DATA_PATH, object_type)\n    os.makedirs(type_dir, exist_ok=True)\n    return type_dir\n\n\n_CHECKSUM_CHARS = \"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\"\n_LOOKUP = {format(i, \"05b\"): _CHECKSUM_CHARS[i] for i in range(32)}\n\n\ndef validate_salesforce_id(salesforce_id: str) -> bool:\n    \"\"\"Validate the checksum portion of an 18-character Salesforce ID.\n\n    Args:\n        salesforce_id: An 18-character Salesforce ID\n\n    Returns:\n        bool: True if the checksum is valid, False otherwise\n    \"\"\"\n    if len(salesforce_id) != 18:\n        return False\n\n    chunks = [salesforce_id[0:5], salesforce_id[5:10], salesforce_id[10:15]]\n\n    checksum = salesforce_id[15:18]\n    calculated_checksum = \"\"\n\n    for chunk in chunks:\n        result_string = \"\".join(\n            \"1\" if char.isupper() else \"0\" for char in reversed(chunk)\n        )\n        calculated_checksum += _LOOKUP[result_string]\n\n    return checksum == calculated_checksum\n"
  },
  {
    "path": "backend/onyx/connectors/sharepoint/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/sharepoint/connector.py",
    "content": "import base64\nimport copy\nimport fnmatch\nimport html\nimport io\nimport os\nimport re\nimport time\nfrom collections import deque\nfrom collections.abc import Generator\nfrom collections.abc import Iterable\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\nfrom urllib.parse import quote\nfrom urllib.parse import unquote\nfrom urllib.parse import urlsplit\n\nimport msal  # type: ignore[import-untyped]\nimport requests\nfrom cryptography.hazmat.primitives import hashes\nfrom cryptography.hazmat.primitives import serialization\nfrom cryptography.hazmat.primitives.serialization import pkcs12\nfrom office365.graph_client import GraphClient  # type: ignore[import-untyped]\nfrom office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]\nfrom office365.onedrive.sites.site import Site  # type: ignore[import-untyped]\nfrom office365.onedrive.sites.sites_with_root import SitesWithRoot  # type: ignore[import-untyped]\nfrom office365.runtime.auth.token_response import TokenResponse  # type: ignore[import-untyped]\nfrom office365.runtime.client_request import ClientRequestException  # type: ignore\nfrom office365.runtime.paths.resource_path import ResourcePath  # type: ignore[import-untyped]\nfrom office365.runtime.queries.client_query import ClientQuery  # type: ignore[import-untyped]\nfrom office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom requests.exceptions import HTTPError\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\nfrom onyx.configs.app_configs import SHAREPOINT_CONNECTOR_SIZE_THRESHOLD\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import IndexingHeartbeatInterface\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.microsoft_graph_env import resolve_microsoft_environment\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import EntityFailure\nfrom onyx.connectors.models import ExternalAccess\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.sharepoint.connector_utils import get_sharepoint_external_access\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.file_processing.extract_file_text import extract_text_and_images\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.file_processing.image_utils import store_image_and_create_section\nfrom onyx.utils.b64 import get_image_type_from_bytes\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\nSLIM_BATCH_SIZE = 1000\n_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)\n\n\nSHARED_DOCUMENTS_MAP = {\n    \"Documents\": \"Shared Documents\",\n    \"Dokumente\": \"Freigegebene Dokumente\",\n    \"Documentos\": \"Documentos compartidos\",\n}\nSHARED_DOCUMENTS_MAP_REVERSE = {v: k for k, v in SHARED_DOCUMENTS_MAP.items()}\n\nASPX_EXTENSION = \".aspx\"\n\n\ndef _is_site_excluded(site_url: str, excluded_site_patterns: list[str]) -> bool:\n    \"\"\"Check if a site URL matches any of the exclusion glob patterns.\"\"\"\n    for pattern in excluded_site_patterns:\n        if fnmatch.fnmatch(site_url, pattern) or fnmatch.fnmatch(\n            site_url.rstrip(\"/\"), pattern.rstrip(\"/\")\n        ):\n            return True\n    return False\n\n\ndef _is_path_excluded(item_path: str, excluded_path_patterns: list[str]) -> bool:\n    \"\"\"Check if a drive item path matches any of the exclusion glob patterns.\n\n    item_path is the relative path within a drive, e.g. \"Engineering/API/report.docx\".\n    Matches are attempted against the full path and the filename alone so that\n    patterns like \"*.tmp\" match files at any depth.\n    \"\"\"\n    filename = item_path.rsplit(\"/\", 1)[-1] if \"/\" in item_path else item_path\n    for pattern in excluded_path_patterns:\n        if fnmatch.fnmatch(item_path, pattern) or fnmatch.fnmatch(filename, pattern):\n            return True\n    return False\n\n\ndef _build_item_relative_path(parent_reference_path: str | None, item_name: str) -> str:\n    \"\"\"Build the relative path of a drive item from its parentReference.path and name.\n\n    Example: parentReference.path=\"/drives/abc/root:/Eng/API\", name=\"report.docx\"\n    => \"Eng/API/report.docx\"\n    \"\"\"\n    if parent_reference_path and \"root:/\" in parent_reference_path:\n        folder = unquote(parent_reference_path.split(\"root:/\", 1)[1])\n        if folder:\n            return f\"{folder}/{item_name}\"\n    return item_name\n\n\nDEFAULT_AUTHORITY_HOST = \"https://login.microsoftonline.com\"\nDEFAULT_GRAPH_API_HOST = \"https://graph.microsoft.com\"\nDEFAULT_SHAREPOINT_DOMAIN_SUFFIX = \"sharepoint.com\"\n\nGRAPH_API_BASE = f\"{DEFAULT_GRAPH_API_HOST}/v1.0\"\nGRAPH_API_MAX_RETRIES = 5\nGRAPH_API_RETRYABLE_STATUSES = frozenset({429, 500, 502, 503, 504})\n\n\nclass DriveItemData(BaseModel):\n    \"\"\"Lightweight representation of a Graph API drive item, parsed from JSON.\n\n    Replaces the SDK DriveItem for fetching/listing so that we can paginate\n    lazily through the Graph API without materialising every item in memory.\n    \"\"\"\n\n    id: str\n    name: str\n    web_url: str\n    size: int | None = None\n    mime_type: str | None = None\n    download_url: str | None = None\n    last_modified_datetime: datetime | None = None\n    last_modified_by_display_name: str | None = None\n    last_modified_by_email: str | None = None\n    parent_reference_path: str | None = None\n    drive_id: str | None = None\n\n    @classmethod\n    def from_graph_json(cls, item: dict[str, Any]) -> \"DriveItemData\":\n        last_mod_raw = item.get(\"lastModifiedDateTime\")\n        last_mod: datetime | None = None\n        if isinstance(last_mod_raw, str):\n            last_mod = datetime.fromisoformat(last_mod_raw.replace(\"Z\", \"+00:00\"))\n\n        last_modified_by = item.get(\"lastModifiedBy\", {}).get(\"user\", {})\n        parent_ref = item.get(\"parentReference\", {})\n\n        return cls(\n            id=item[\"id\"],\n            name=item.get(\"name\", \"\"),\n            web_url=item.get(\"webUrl\", \"\"),\n            size=item.get(\"size\"),\n            mime_type=item.get(\"file\", {}).get(\"mimeType\"),\n            download_url=item.get(\"@microsoft.graph.downloadUrl\"),\n            last_modified_datetime=last_mod,\n            last_modified_by_display_name=last_modified_by.get(\"displayName\"),\n            last_modified_by_email=(\n                last_modified_by.get(\"email\")\n                or last_modified_by.get(\"userPrincipalName\")\n            ),\n            parent_reference_path=parent_ref.get(\"path\"),\n            drive_id=parent_ref.get(\"driveId\"),\n        )\n\n    def to_sdk_driveitem(self, graph_client: GraphClient) -> DriveItem:\n        \"\"\"Construct a lazy SDK DriveItem for permission lookups.\"\"\"\n        if not self.drive_id:\n            raise ValueError(\"drive_id is required to construct SDK DriveItem\")\n        path = ResourcePath(\n            self.id,\n            ResourcePath(\"items\", ResourcePath(self.drive_id, ResourcePath(\"drives\"))),\n        )\n        item = DriveItem(graph_client, path)\n        item.set_property(\"id\", self.id)\n        return item\n\n\n# The office365 library's ClientContext caches the access token from its\n# first request and never re-invokes the token callback.  Microsoft access\n# tokens live ~60-75 minutes, so we recreate the cached ClientContext every\n# 30 minutes to let MSAL transparently handle token refresh.\n_REST_CTX_MAX_AGE_S = 30 * 60\n\n\nclass SiteDescriptor(BaseModel):\n    \"\"\"Data class for storing SharePoint site information.\n\n    Args:\n        url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests\n             or https://danswerai.sharepoint.com/teams/team-name)\n        drive_name: The name of the drive to access (e.g. \"Shared Documents\", \"Other Library\")\n                   If None, all drives will be accessed.\n        folder_path: The folder path within the drive to access (e.g. \"test/nested with spaces\")\n                    If None, all folders will be accessed.\n    \"\"\"\n\n    url: str\n    drive_name: str | None\n    folder_path: str | None\n\n\nclass CertificateData(BaseModel):\n    \"\"\"Data class for storing certificate information loaded from PFX file.\"\"\"\n\n    private_key: bytes\n    thumbprint: str\n\n\ndef _site_page_in_time_window(\n    page: dict[str, Any],\n    start: datetime | None,\n    end: datetime | None,\n) -> bool:\n    \"\"\"Return True if the page's lastModifiedDateTime falls within [start, end].\"\"\"\n    if start is None and end is None:\n        return True\n    raw = page.get(\"lastModifiedDateTime\")\n    if not raw:\n        return True\n    if not isinstance(raw, str):\n        raise ValueError(f\"lastModifiedDateTime is not a string: {raw}\")\n    last_modified = datetime.fromisoformat(raw.replace(\"Z\", \"+00:00\"))\n    return (start is None or last_modified >= start) and (\n        end is None or last_modified <= end\n    )\n\n\ndef sleep_and_retry(\n    query_obj: ClientQuery, method_name: str, max_retries: int = 3\n) -> Any:\n    \"\"\"\n    Execute a SharePoint query with retry logic for rate limiting.\n    \"\"\"\n    for attempt in range(max_retries + 1):\n        try:\n            return query_obj.execute_query()\n        except ClientRequestException as e:\n            status = e.response.status_code if e.response is not None else None\n\n            # 429 / 503 — rate limit or transient error.  Back off and retry.\n            if status in (429, 503) and attempt < max_retries:\n                logger.warning(\n                    f\"Rate limit exceeded on {method_name}, attempt {attempt + 1}/{max_retries + 1}, sleeping and retrying\"\n                )\n                retry_after = e.response.headers.get(\"Retry-After\")\n                if retry_after:\n                    sleep_time = int(retry_after)\n                else:\n                    # Exponential backoff: 2^attempt * 5 seconds\n                    sleep_time = min(30, (2**attempt) * 5)\n\n                logger.info(f\"Sleeping for {sleep_time} seconds before retry\")\n                time.sleep(sleep_time)\n                continue\n\n            # Non-retryable error or retries exhausted — log details and raise.\n            if e.response is not None:\n                logger.error(\n                    f\"SharePoint request failed for {method_name}: status={status}, \"\n                )\n            raise e\n\n\nclass SharepointConnectorCheckpoint(ConnectorCheckpoint):\n    cached_site_descriptors: deque[SiteDescriptor] | None = None\n    current_site_descriptor: SiteDescriptor | None = None\n\n    cached_drive_names: deque[str] | None = None\n    current_drive_name: str | None = None\n    # Drive's web_url from the API - used as raw_node_id for DRIVE hierarchy nodes\n    current_drive_web_url: str | None = None\n    # Resolved drive ID — avoids re-resolving on checkpoint resume\n    current_drive_id: str | None = None\n    # Next delta API page URL for per-page checkpointing within a drive.\n    # When set, Phase 3b fetches one page at a time so progress is persisted\n    # between pages.  None means BFS path or no active delta traversal.\n    current_drive_delta_next_link: str | None = None\n\n    process_site_pages: bool = False\n\n    # Track yielded hierarchy nodes by their raw_node_id (URLs) to avoid duplicates\n    seen_hierarchy_node_raw_ids: set[str] = Field(default_factory=set)\n\n    # Track yielded document IDs to avoid processing the same document twice.\n    # The Microsoft Graph delta API can return the same item on multiple pages.\n    seen_document_ids: set[str] = Field(default_factory=set)\n\n\nclass SharepointAuthMethod(Enum):\n    CLIENT_SECRET = \"client_secret\"\n    CERTIFICATE = \"certificate\"\n\n\nclass SizeCapExceeded(Exception):\n    \"\"\"Exception raised when the size cap is exceeded.\"\"\"\n\n\ndef _log_and_raise_for_status(response: requests.Response) -> None:\n    \"\"\"Log the response text and raise for status.\"\"\"\n    try:\n        response.raise_for_status()\n    except Exception:\n        logger.error(f\"HTTP request failed: {response.text}\")\n        raise\n\n\nGRAPH_INVALID_REQUEST_CODE = \"invalidRequest\"\n\n\ndef _is_graph_invalid_request(response: requests.Response) -> bool:\n    \"\"\"Return True if the response body is the generic Graph API\n    ``{\"error\": {\"code\": \"invalidRequest\", \"message\": \"Invalid request\"}}``\n    shape. This particular error has no actionable inner error code and is\n    returned by the site-pages endpoint when a page has a corrupt canvas layout\n    (e.g. duplicate web-part IDs — see SharePoint/sp-dev-docs#8822).\"\"\"\n    try:\n        body = response.json()\n    except Exception:\n        return False\n    error = body.get(\"error\", {})\n    return error.get(\"code\") == GRAPH_INVALID_REQUEST_CODE\n\n\ndef load_certificate_from_pfx(pfx_data: bytes, password: str) -> CertificateData | None:\n    \"\"\"Load certificate from .pfx file for MSAL authentication\"\"\"\n    try:\n        # Load the certificate and private key\n        private_key, certificate, additional_certificates = (\n            pkcs12.load_key_and_certificates(pfx_data, password.encode(\"utf-8\"))\n        )\n\n        # Validate that certificate and private key are not None\n        if certificate is None or private_key is None:\n            raise ValueError(\"Certificate or private key is None\")\n\n        # Convert to PEM format that MSAL expects\n        key_pem = private_key.private_bytes(\n            encoding=serialization.Encoding.PEM,\n            format=serialization.PrivateFormat.PKCS8,\n            encryption_algorithm=serialization.NoEncryption(),\n        )\n\n        return CertificateData(\n            private_key=key_pem,\n            thumbprint=certificate.fingerprint(hashes.SHA1()).hex(),\n        )\n    except Exception as e:\n        logger.error(f\"Error loading certificate: {e}\")\n        return None\n\n\ndef acquire_token_for_rest(\n    msal_app: msal.ConfidentialClientApplication,\n    sp_tenant_domain: str,\n    sharepoint_domain_suffix: str,\n) -> TokenResponse:\n    token = msal_app.acquire_token_for_client(\n        scopes=[f\"https://{sp_tenant_domain}.{sharepoint_domain_suffix}/.default\"]\n    )\n    return TokenResponse.from_json(token)\n\n\ndef _create_document_failure(\n    driveitem: DriveItemData,\n    error_message: str,\n    exception: Exception | None = None,\n) -> ConnectorFailure:\n    \"\"\"Helper method to create a ConnectorFailure for document processing errors.\"\"\"\n    return ConnectorFailure(\n        failed_document=DocumentFailure(\n            document_id=driveitem.id or \"unknown\",\n            document_link=driveitem.web_url,\n        ),\n        failure_message=f\"SharePoint document '{driveitem.name or 'unknown'}': {error_message}\",\n        exception=exception,\n    )\n\n\ndef _create_entity_failure(\n    entity_id: str,\n    error_message: str,\n    time_range: tuple[datetime, datetime] | None = None,\n    exception: Exception | None = None,\n) -> ConnectorFailure:\n    \"\"\"Helper method to create a ConnectorFailure for entity-level errors.\"\"\"\n    return ConnectorFailure(\n        failed_entity=EntityFailure(\n            entity_id=entity_id,\n            missed_time_range=time_range,\n        ),\n        failure_message=f\"SharePoint entity '{entity_id}': {error_message}\",\n        exception=exception,\n    )\n\n\ndef _probe_remote_size(url: str, timeout: int) -> int | None:\n    \"\"\"Determine remote size using HEAD or a range GET probe. Returns None if unknown.\"\"\"\n    try:\n        head_resp = requests.head(url, timeout=timeout, allow_redirects=True)\n        _log_and_raise_for_status(head_resp)\n        cl = head_resp.headers.get(\"Content-Length\")\n        if cl and cl.isdigit():\n            return int(cl)\n    except requests.RequestException:\n        pass\n\n    # Fallback: Range request for first byte to read total from Content-Range\n    try:\n        with requests.get(\n            url,\n            headers={\"Range\": \"bytes=0-0\"},\n            timeout=timeout,\n            stream=True,\n        ) as range_resp:\n            _log_and_raise_for_status(range_resp)\n            cr = range_resp.headers.get(\"Content-Range\")  # e.g., \"bytes 0-0/12345\"\n            if cr and \"/\" in cr:\n                total = cr.split(\"/\")[-1]\n                if total.isdigit():\n                    return int(total)\n    except requests.RequestException:\n        pass\n\n    # If both HEAD and a range GET failed to reveal a size, signal unknown size.\n    # Callers should treat None as \"size unavailable\" and proceed with a safe\n    # streaming path that enforces a hard cap to avoid excessive memory usage.\n    return None\n\n\ndef _download_with_cap(url: str, timeout: int, cap: int) -> bytes:\n    \"\"\"Stream download content with an upper bound on bytes read.\n\n    Behavior:\n    - Checks `Content-Length` first and aborts early if it exceeds `cap`.\n    - Otherwise streams the body in chunks and stops once `cap` is surpassed.\n    - Raises `SizeCapExceeded` when the cap would be exceeded.\n    - Returns the full bytes if the content fits within `cap`.\n    \"\"\"\n    with requests.get(url, stream=True, timeout=timeout) as resp:\n        _log_and_raise_for_status(resp)\n\n        # If the server provides Content-Length, prefer an early decision.\n        cl_header = resp.headers.get(\"Content-Length\")\n        if cl_header and cl_header.isdigit():\n            content_len = int(cl_header)\n            if content_len > cap:\n                logger.warning(\n                    f\"Content-Length {content_len} exceeds cap {cap}; skipping download.\"\n                )\n                raise SizeCapExceeded(\"pre_download\")\n\n        buf = io.BytesIO()\n        # Stream in 64KB chunks; adjust if needed for slower networks.\n        for chunk in resp.iter_content(64 * 1024):\n            if not chunk:\n                continue\n            buf.write(chunk)\n            if buf.tell() > cap:\n                # Avoid keeping a large partial buffer; close and signal caller to skip.\n                logger.warning(\n                    f\"Streaming download exceeded cap {cap} bytes; aborting early.\"\n                )\n                raise SizeCapExceeded(\"during_download\")\n\n        return buf.getvalue()\n\n\ndef _download_via_graph_api(\n    access_token: str,\n    drive_id: str,\n    item_id: str,\n    bytes_allowed: int,\n    graph_api_base: str,\n) -> bytes:\n    \"\"\"Download a drive item via the Graph API /content endpoint with a byte cap.\n\n    Raises SizeCapExceeded if the cap is exceeded.\n    \"\"\"\n    url = f\"{graph_api_base}/drives/{drive_id}/items/{item_id}/content\"\n    headers = {\"Authorization\": f\"Bearer {access_token}\"}\n    with requests.get(\n        url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT_SECONDS\n    ) as resp:\n        _log_and_raise_for_status(resp)\n        buf = io.BytesIO()\n        for chunk in resp.iter_content(64 * 1024):\n            if not chunk:\n                continue\n            buf.write(chunk)\n            if buf.tell() > bytes_allowed:\n                raise SizeCapExceeded(\"during_graph_api_download\")\n        return buf.getvalue()\n\n\ndef _convert_driveitem_to_document_with_permissions(\n    driveitem: DriveItemData,\n    drive_name: str,\n    ctx: ClientContext | None,\n    graph_client: GraphClient,\n    graph_api_base: str,\n    include_permissions: bool = False,\n    parent_hierarchy_raw_node_id: str | None = None,\n    access_token: str | None = None,\n    treat_sharing_link_as_public: bool = False,\n) -> Document | ConnectorFailure | None:\n\n    if not driveitem.name or not driveitem.id:\n        raise ValueError(\"DriveItem name/id is required\")\n\n    if include_permissions and ctx is None:\n        raise ValueError(\"ClientContext is required for permissions\")\n\n    mime_type = driveitem.mime_type\n    if not mime_type or mime_type in OnyxMimeTypes.EXCLUDED_IMAGE_TYPES:\n        logger.debug(\n            f\"Skipping malformed or excluded mime type {mime_type} for {driveitem.name}\"\n        )\n        return None\n\n    file_size = driveitem.size\n    download_url = driveitem.download_url\n\n    if file_size is None and download_url:\n        file_size = _probe_remote_size(download_url, REQUEST_TIMEOUT_SECONDS)\n\n    if file_size is not None and file_size > SHAREPOINT_CONNECTOR_SIZE_THRESHOLD:\n        logger.warning(\n            f\"Skipping '{driveitem.name}' over size threshold ({file_size} > {SHAREPOINT_CONNECTOR_SIZE_THRESHOLD} bytes).\"\n        )\n        return None\n\n    # Prefer downloadUrl streaming with size cap\n    content_bytes: bytes | None = None\n    if download_url:\n        try:\n            content_bytes = _download_with_cap(\n                download_url,\n                REQUEST_TIMEOUT_SECONDS,\n                SHAREPOINT_CONNECTOR_SIZE_THRESHOLD,\n            )\n        except SizeCapExceeded as e:\n            logger.warning(f\"Skipping '{driveitem.name}' exceeded size cap: {str(e)}\")\n            return None\n        except requests.RequestException as e:\n            status = e.response.status_code if e.response is not None else -1\n            logger.warning(\n                f\"Failed to download via downloadUrl for '{driveitem.name}' (status={status}); falling back to Graph API.\"\n            )\n\n    # Fallback: download via Graph API /content endpoint\n    if content_bytes is None and access_token and driveitem.drive_id:\n        try:\n            content_bytes = _download_via_graph_api(\n                access_token,\n                driveitem.drive_id,\n                driveitem.id,\n                SHAREPOINT_CONNECTOR_SIZE_THRESHOLD,\n                graph_api_base=graph_api_base,\n            )\n        except SizeCapExceeded:\n            logger.warning(\n                f\"Skipping '{driveitem.name}' exceeded size cap during Graph API download.\"\n            )\n            return None\n        except Exception as e:\n            logger.warning(\n                f\"Failed to download via Graph API for '{driveitem.name}': {e}\"\n            )\n            return _create_document_failure(\n                driveitem, f\"Failed to download via graph api: {e}\", e\n            )\n\n    sections: list[TextSection | ImageSection] = []\n    file_ext = get_file_ext(driveitem.name)\n\n    if not content_bytes:\n        logger.warning(\n            f\"Zero-length content for '{driveitem.name}'. Skipping text/image extraction.\"\n        )\n    elif file_ext in OnyxFileExtensions.IMAGE_EXTENSIONS:\n        image_section, _ = store_image_and_create_section(\n            image_data=content_bytes,\n            file_id=driveitem.id,\n            display_name=driveitem.name,\n            file_origin=FileOrigin.CONNECTOR,\n        )\n        image_section.link = driveitem.web_url\n        sections.append(image_section)\n    else:\n\n        def _store_embedded_image(img_data: bytes, img_name: str) -> None:\n            try:\n                img_mime = get_image_type_from_bytes(img_data)\n            except ValueError:\n                logger.debug(\n                    \"Skipping embedded image with unknown format for %s\",\n                    driveitem.name,\n                )\n                return\n\n            if img_mime in OnyxMimeTypes.EXCLUDED_IMAGE_TYPES:\n                logger.debug(\n                    \"Skipping embedded image of excluded type %s for %s\",\n                    img_mime,\n                    driveitem.name,\n                )\n                return\n\n            image_section, _ = store_image_and_create_section(\n                image_data=img_data,\n                file_id=f\"{driveitem.id}_img_{len(sections)}\",\n                display_name=img_name or f\"{driveitem.name} - image {len(sections)}\",\n                file_origin=FileOrigin.CONNECTOR,\n            )\n            image_section.link = driveitem.web_url\n            sections.append(image_section)\n\n        extraction_result = extract_text_and_images(\n            file=io.BytesIO(content_bytes),\n            file_name=driveitem.name,\n            image_callback=_store_embedded_image,\n        )\n        if extraction_result.text_content:\n            sections.append(\n                TextSection(link=driveitem.web_url, text=extraction_result.text_content)\n            )\n\n    if include_permissions and ctx is not None:\n        logger.info(f\"Getting external access for {driveitem.name}\")\n        sdk_item = driveitem.to_sdk_driveitem(graph_client)\n        external_access = get_sharepoint_external_access(\n            ctx=ctx,\n            graph_client=graph_client,\n            drive_item=sdk_item,\n            drive_name=drive_name,\n            add_prefix=True,\n            treat_sharing_link_as_public=treat_sharing_link_as_public,\n        )\n    else:\n        external_access = ExternalAccess.empty()\n\n    doc = Document(\n        id=driveitem.id,\n        sections=sections,\n        source=DocumentSource.SHAREPOINT,\n        semantic_identifier=driveitem.name,\n        external_access=external_access,\n        doc_updated_at=(\n            driveitem.last_modified_datetime.replace(tzinfo=timezone.utc)\n            if driveitem.last_modified_datetime\n            else None\n        ),\n        primary_owners=[\n            BasicExpertInfo(\n                display_name=driveitem.last_modified_by_display_name or \"\",\n                email=driveitem.last_modified_by_email or \"\",\n            )\n        ],\n        metadata={\"drive\": drive_name},\n        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n    )\n    return doc\n\n\ndef _convert_sitepage_to_document(\n    site_page: dict[str, Any],\n    site_name: str | None,\n    ctx: ClientContext | None,\n    graph_client: GraphClient,\n    include_permissions: bool = False,\n    parent_hierarchy_raw_node_id: str | None = None,\n    treat_sharing_link_as_public: bool = False,\n) -> Document:\n    \"\"\"Convert a SharePoint site page to a Document object.\"\"\"\n    # Extract text content from the site page\n    page_text = \"\"\n    # Get title and description\n    title = cast(str, site_page.get(\"title\", \"\"))\n    description = cast(str, site_page.get(\"description\", \"\"))\n\n    # Build the text content\n    if title:\n        page_text += f\"# {title}\\n\\n\"\n    if description:\n        page_text += f\"{description}\\n\\n\"\n\n    # Extract content from canvas layout if available\n    canvas_layout = site_page.get(\"canvasLayout\", {})\n    if canvas_layout:\n        horizontal_sections = canvas_layout.get(\"horizontalSections\", [])\n        for section in horizontal_sections:\n            columns = section.get(\"columns\", [])\n            for column in columns:\n                webparts = column.get(\"webparts\", [])\n                for webpart in webparts:\n                    # Extract text from different types of webparts\n                    webpart_type = webpart.get(\"@odata.type\", \"\")\n\n                    # Extract text from text webparts\n                    if webpart_type == \"#microsoft.graph.textWebPart\":\n                        inner_html = webpart.get(\"innerHtml\", \"\")\n                        if inner_html:\n                            # Basic HTML to text conversion\n                            # Remove HTML tags but preserve some structure\n                            text_content = re.sub(r\"<br\\s*/?>\", \"\\n\", inner_html)\n                            text_content = re.sub(r\"<li>\", \"• \", text_content)\n                            text_content = re.sub(r\"</li>\", \"\\n\", text_content)\n                            text_content = re.sub(\n                                r\"<h[1-6][^>]*>\", \"\\n## \", text_content\n                            )\n                            text_content = re.sub(r\"</h[1-6]>\", \"\\n\", text_content)\n                            text_content = re.sub(r\"<p[^>]*>\", \"\\n\", text_content)\n                            text_content = re.sub(r\"</p>\", \"\\n\", text_content)\n                            text_content = re.sub(r\"<[^>]+>\", \"\", text_content)\n                            # Decode HTML entities\n                            text_content = html.unescape(text_content)\n                            # Clean up extra whitespace\n                            text_content = re.sub(\n                                r\"\\n\\s*\\n\", \"\\n\\n\", text_content\n                            ).strip()\n                            if text_content:\n                                page_text += f\"{text_content}\\n\\n\"\n\n                    # Extract text from standard webparts\n                    elif webpart_type == \"#microsoft.graph.standardWebPart\":\n                        data = webpart.get(\"data\", {})\n\n                        # Extract from serverProcessedContent\n                        server_content = data.get(\"serverProcessedContent\", {})\n                        searchable_texts = server_content.get(\n                            \"searchablePlainTexts\", []\n                        )\n\n                        for text_item in searchable_texts:\n                            if isinstance(text_item, dict):\n                                key = text_item.get(\"key\", \"\")\n                                value = text_item.get(\"value\", \"\")\n                                if value:\n                                    # Add context based on key\n                                    if key == \"title\":\n                                        page_text += f\"## {value}\\n\\n\"\n                                    else:\n                                        page_text += f\"{value}\\n\\n\"\n\n                        # Extract description if available\n                        description = data.get(\"description\", \"\")\n                        if description:\n                            page_text += f\"{description}\\n\\n\"\n\n                        # Extract title if available\n                        webpart_title = data.get(\"title\", \"\")\n                        if webpart_title and webpart_title != description:\n                            page_text += f\"## {webpart_title}\\n\\n\"\n\n    page_text = page_text.strip()\n\n    # If no content extracted, use the title as fallback\n    if not page_text and title:\n        page_text = title\n\n    # Parse creation and modification info\n    created_datetime = site_page.get(\"createdDateTime\")\n    if created_datetime:\n        if isinstance(created_datetime, str):\n            created_datetime = datetime.fromisoformat(\n                created_datetime.replace(\"Z\", \"+00:00\")\n            )\n        elif not created_datetime.tzinfo:\n            created_datetime = created_datetime.replace(tzinfo=timezone.utc)\n\n    last_modified_datetime = site_page.get(\"lastModifiedDateTime\")\n    if last_modified_datetime:\n        if isinstance(last_modified_datetime, str):\n            last_modified_datetime = datetime.fromisoformat(\n                last_modified_datetime.replace(\"Z\", \"+00:00\")\n            )\n        elif not last_modified_datetime.tzinfo:\n            last_modified_datetime = last_modified_datetime.replace(tzinfo=timezone.utc)\n\n    # Extract owner information\n    primary_owners = []\n    created_by = site_page.get(\"createdBy\", {}).get(\"user\", {})\n    if created_by.get(\"displayName\"):\n        primary_owners.append(\n            BasicExpertInfo(\n                display_name=created_by.get(\"displayName\"),\n                email=created_by.get(\"email\", \"\"),\n            )\n        )\n\n    web_url = site_page[\"webUrl\"]\n    semantic_identifier = cast(str, site_page.get(\"name\", title))\n    if semantic_identifier.endswith(ASPX_EXTENSION):\n        semantic_identifier = semantic_identifier[: -len(ASPX_EXTENSION)]\n\n    if include_permissions:\n        external_access = get_sharepoint_external_access(\n            ctx=ctx,\n            graph_client=graph_client,\n            site_page=site_page,\n            add_prefix=True,\n            treat_sharing_link_as_public=treat_sharing_link_as_public,\n        )\n    else:\n        external_access = ExternalAccess.empty()\n\n    doc = Document(\n        id=site_page[\"id\"],\n        sections=[TextSection(link=web_url, text=page_text)],\n        source=DocumentSource.SHAREPOINT,\n        external_access=external_access,\n        semantic_identifier=semantic_identifier,\n        doc_updated_at=last_modified_datetime or created_datetime,\n        primary_owners=primary_owners,\n        metadata=(\n            {\n                \"site\": site_name,\n            }\n            if site_name\n            else {}\n        ),\n        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n    )\n    return doc\n\n\ndef _convert_driveitem_to_slim_document(\n    driveitem: DriveItemData,\n    drive_name: str,\n    ctx: ClientContext,\n    graph_client: GraphClient,\n    parent_hierarchy_raw_node_id: str | None = None,\n    treat_sharing_link_as_public: bool = False,\n) -> SlimDocument:\n    if driveitem.id is None:\n        raise ValueError(\"DriveItem ID is required\")\n\n    sdk_item = driveitem.to_sdk_driveitem(graph_client)\n    external_access = get_sharepoint_external_access(\n        ctx=ctx,\n        graph_client=graph_client,\n        drive_item=sdk_item,\n        drive_name=drive_name,\n        treat_sharing_link_as_public=treat_sharing_link_as_public,\n    )\n\n    return SlimDocument(\n        id=driveitem.id,\n        external_access=external_access,\n        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n    )\n\n\ndef _convert_sitepage_to_slim_document(\n    site_page: dict[str, Any],\n    ctx: ClientContext | None,\n    graph_client: GraphClient,\n    parent_hierarchy_raw_node_id: str | None = None,\n    treat_sharing_link_as_public: bool = False,\n) -> SlimDocument:\n    \"\"\"Convert a SharePoint site page to a SlimDocument object.\"\"\"\n    if site_page.get(\"id\") is None:\n        raise ValueError(\"Site page ID is required\")\n\n    external_access = get_sharepoint_external_access(\n        ctx=ctx,\n        graph_client=graph_client,\n        site_page=site_page,\n        treat_sharing_link_as_public=treat_sharing_link_as_public,\n    )\n    id = site_page.get(\"id\")\n    if id is None:\n        raise ValueError(\"Site page ID is required\")\n    return SlimDocument(\n        id=id,\n        external_access=external_access,\n        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,\n    )\n\n\nclass SharepointConnector(\n    SlimConnectorWithPermSync,\n    CheckpointedConnectorWithPermSync[SharepointConnectorCheckpoint],\n):\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        sites: list[str] = [],\n        excluded_sites: list[str] = [],\n        excluded_paths: list[str] = [],\n        include_site_pages: bool = True,\n        include_site_documents: bool = True,\n        treat_sharing_link_as_public: bool = False,\n        authority_host: str = DEFAULT_AUTHORITY_HOST,\n        graph_api_host: str = DEFAULT_GRAPH_API_HOST,\n        sharepoint_domain_suffix: str = DEFAULT_SHAREPOINT_DOMAIN_SUFFIX,\n    ) -> None:\n        self.batch_size = batch_size\n        self.sites = list(sites)\n        self.excluded_sites = [s for p in excluded_sites if (s := p.strip())]\n        self.excluded_paths = [s for p in excluded_paths if (s := p.strip())]\n        self.treat_sharing_link_as_public = treat_sharing_link_as_public\n        self.site_descriptors: list[SiteDescriptor] = self._extract_site_and_drive_info(\n            sites\n        )\n        self._graph_client: GraphClient | None = None\n        self.msal_app: msal.ConfidentialClientApplication | None = None\n        self.include_site_pages = include_site_pages\n        self.include_site_documents = include_site_documents\n        self.sp_tenant_domain: str | None = None\n        self._credential_json: dict[str, Any] | None = None\n        self._cached_rest_ctx: ClientContext | None = None\n        self._cached_rest_ctx_url: str | None = None\n        self._cached_rest_ctx_created_at: float = 0.0\n\n        resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)\n        self._azure_environment = resolved_env.environment\n        self.authority_host = resolved_env.authority_host\n        self.graph_api_host = resolved_env.graph_host\n        self.graph_api_base = f\"{self.graph_api_host}/v1.0\"\n        self.sharepoint_domain_suffix = resolved_env.sharepoint_domain_suffix\n        if sharepoint_domain_suffix != resolved_env.sharepoint_domain_suffix:\n            logger.warning(\n                f\"Configured sharepoint_domain_suffix '{sharepoint_domain_suffix}' \"\n                f\"differs from the expected suffix '{resolved_env.sharepoint_domain_suffix}' \"\n                f\"for the {resolved_env.environment} environment. \"\n                f\"Using '{resolved_env.sharepoint_domain_suffix}'.\"\n            )\n\n    def validate_connector_settings(self) -> None:\n        # Validate that at least one content type is enabled\n        if not self.include_site_documents and not self.include_site_pages:\n            raise ConnectorValidationError(\n                \"At least one content type must be enabled. \"\n                \"Please check either 'Include Site Documents' or 'Include Site Pages' (or both).\"\n            )\n\n        # Ensure sites are sharepoint urls\n        for site_url in self.sites:\n            if not site_url.startswith(\"https://\") or not (\n                \"/sites/\" in site_url or \"/teams/\" in site_url\n            ):\n                raise ConnectorValidationError(\n                    \"Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)\"\n                )\n\n    def _extract_tenant_domain_from_sites(self) -> str | None:\n        \"\"\"Extract the tenant domain from configured site URLs.\n\n        Site URLs look like https://{tenant}.sharepoint.com/sites/... so the\n        tenant domain is the first label of the hostname.\n        \"\"\"\n        for site_url in self.sites:\n            try:\n                hostname = urlsplit(site_url.strip()).hostname\n            except ValueError:\n                continue\n            if not hostname:\n                continue\n            tenant = hostname.split(\".\")[0]\n            if tenant:\n                return tenant\n        logger.warning(f\"No tenant domain found from {len(self.sites)} sites\")\n        return None\n\n    def _resolve_tenant_domain_from_root_site(self) -> str:\n        \"\"\"Resolve tenant domain via GET /v1.0/sites/root which only requires\n        Sites.Read.All (a permission the connector already needs).\"\"\"\n        root_site = self.graph_client.sites.root.get().execute_query()\n        hostname = root_site.site_collection.hostname\n        if not hostname:\n            raise ConnectorValidationError(\n                \"Could not determine tenant domain from root site\"\n            )\n        tenant_domain = hostname.split(\".\")[0]\n        logger.info(\n            \"Resolved tenant domain '%s' from root site hostname '%s'\",\n            tenant_domain,\n            hostname,\n        )\n        return tenant_domain\n\n    def _resolve_tenant_domain(self) -> str:\n        \"\"\"Determine the tenant domain, preferring site URLs over a Graph API\n        call to avoid needing extra permissions.\"\"\"\n        from_sites = self._extract_tenant_domain_from_sites()\n        if from_sites:\n            logger.info(\n                \"Resolved tenant domain '%s' from site URLs\",\n                from_sites,\n            )\n            return from_sites\n\n        logger.info(\"No site URLs available; resolving tenant domain from root site\")\n        return self._resolve_tenant_domain_from_root_site()\n\n    @property\n    def graph_client(self) -> GraphClient:\n        if self._graph_client is None:\n            raise ConnectorMissingCredentialError(\"Sharepoint\")\n\n        return self._graph_client\n\n    def _create_rest_client_context(self, site_url: str) -> ClientContext:\n        \"\"\"Return a ClientContext for SharePoint REST API calls, with caching.\n\n        The office365 library's ClientContext caches the access token from its\n        first request and never re-invokes the token callback.  We cache the\n        context and recreate it when the site URL changes or after\n        ``_REST_CTX_MAX_AGE_S``.  On recreation we also call\n        ``load_credentials`` to build a fresh MSAL app with an empty token\n        cache, guaranteeing a brand-new token from Azure AD.\"\"\"\n        elapsed = time.monotonic() - self._cached_rest_ctx_created_at\n        if (\n            self._cached_rest_ctx is not None\n            and self._cached_rest_ctx_url == site_url\n            and elapsed <= _REST_CTX_MAX_AGE_S\n        ):\n            return self._cached_rest_ctx\n\n        if self._credential_json:\n            logger.info(\n                \"Rebuilding SharePoint REST client context (elapsed=%.0fs, site_changed=%s)\",\n                elapsed,\n                self._cached_rest_ctx_url != site_url,\n            )\n            self.load_credentials(self._credential_json)\n\n        if not self.msal_app or not self.sp_tenant_domain:\n            raise RuntimeError(\"MSAL app or tenant domain is not set\")\n\n        msal_app = self.msal_app\n        sp_tenant_domain = self.sp_tenant_domain\n        sp_domain_suffix = self.sharepoint_domain_suffix\n        self._cached_rest_ctx = ClientContext(site_url).with_access_token(\n            lambda: acquire_token_for_rest(msal_app, sp_tenant_domain, sp_domain_suffix)\n        )\n        self._cached_rest_ctx_url = site_url\n        self._cached_rest_ctx_created_at = time.monotonic()\n        return self._cached_rest_ctx\n\n    @staticmethod\n    def _strip_share_link_tokens(path: str) -> list[str]:\n        # Share links often include a token prefix like /:f:/r/ or /:x:/r/.\n        segments = [segment for segment in path.split(\"/\") if segment]\n        if segments and segments[0].startswith(\":\"):\n            segments = segments[1:]\n            if segments and segments[0] in {\"r\", \"s\", \"g\"}:\n                segments = segments[1:]\n        return segments\n\n    @staticmethod\n    def _normalize_sharepoint_url(url: str) -> tuple[str | None, list[str]]:\n        try:\n            parsed = urlsplit(url)\n        except ValueError:\n            logger.warning(f\"Sharepoint URL '{url}' could not be parsed\")\n            return None, []\n\n        if not parsed.scheme or not parsed.netloc:\n            logger.warning(\n                f\"Sharepoint URL '{url}' is not a valid absolute URL (missing scheme or host)\"\n            )\n            return None, []\n\n        path_segments = SharepointConnector._strip_share_link_tokens(parsed.path)\n        return f\"{parsed.scheme}://{parsed.netloc}\", path_segments\n\n    @staticmethod\n    def _extract_site_and_drive_info(site_urls: list[str]) -> list[SiteDescriptor]:\n        site_data_list = []\n        for url in site_urls:\n            base_url, parts = SharepointConnector._normalize_sharepoint_url(url.strip())\n            if base_url is None:\n                continue\n\n            lower_parts = [part.lower() for part in parts]\n            site_type_index = None\n            for site_token in (\"sites\", \"teams\"):\n                if site_token in lower_parts:\n                    site_type_index = lower_parts.index(site_token)\n                    break\n\n            if site_type_index is None or len(parts) <= site_type_index + 1:\n                logger.warning(\n                    f\"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/<name> or /teams/<name>)\"\n                )\n                continue\n\n            site_path = parts[: site_type_index + 2]\n            remaining_parts = parts[site_type_index + 2 :]\n            site_url = f\"{base_url}/\" + \"/\".join(site_path)\n\n            # Extract drive name and folder path\n            if remaining_parts:\n                drive_name = unquote(remaining_parts[0])\n                folder_path = (\n                    \"/\".join(unquote(part) for part in remaining_parts[1:])\n                    if len(remaining_parts) > 1\n                    else None\n                )\n            else:\n                drive_name = None\n                folder_path = None\n\n            site_data_list.append(\n                SiteDescriptor(\n                    url=site_url,\n                    drive_name=drive_name,\n                    folder_path=folder_path,\n                )\n            )\n        return site_data_list\n\n    def _resolve_drive(\n        self,\n        site_descriptor: SiteDescriptor,\n        drive_name: str,\n    ) -> tuple[str, str | None] | None:\n        \"\"\"Find the drive ID and web_url for a given drive name on a site.\n\n        Returns (drive_id, drive_web_url) or None if the drive was not found.\n        Raises on auth/permission errors so callers can propagate them.\n        \"\"\"\n        site = self.graph_client.sites.get_by_url(site_descriptor.url)\n        drives = site.drives.get().execute_query()\n        logger.info(f\"Found drives: {[d.name for d in drives]}\")\n\n        matched = [\n            d\n            for d in drives\n            if (d.name and d.name.lower() == drive_name.lower())\n            or (\n                d.name in SHARED_DOCUMENTS_MAP\n                and SHARED_DOCUMENTS_MAP[d.name] == drive_name\n            )\n        ]\n        if not matched:\n            logger.warning(f\"Drive '{drive_name}' not found\")\n            return None\n\n        drive = matched[0]\n        drive_web_url: str | None = drive.web_url\n        logger.info(f\"Found drive: {drive.name} (web_url: {drive_web_url})\")\n        return cast(str, drive.id), drive_web_url\n\n    def _get_drive_items_for_drive_id(\n        self,\n        site_descriptor: SiteDescriptor,\n        drive_id: str,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> Generator[DriveItemData, None, None]:\n        \"\"\"Yield drive items lazily for a given drive name.\n\n        Uses the delta API for whole-drive enumeration (flat, incremental via\n        timestamp token) and falls back to BFS /children traversal when a\n        folder_path is configured, since delta cannot scope to a subtree\n        efficiently.\n\n        Returns:\n            A generator of DriveItemData.\n            The generator paginates through the Graph API so items are never\n            all held in memory at once.\n        \"\"\"\n        try:\n            if site_descriptor.folder_path:\n                yield from self._iter_drive_items_paged(\n                    drive_id=drive_id,\n                    folder_path=site_descriptor.folder_path,\n                    start=start,\n                    end=end,\n                )\n            else:\n                yield from self._iter_drive_items_delta(\n                    drive_id=drive_id,\n                    start=start,\n                    end=end,\n                )\n\n        except Exception as e:\n            err_str = str(e)\n            if (\n                \"403 Client Error\" in err_str\n                or \"404 Client Error\" in err_str\n                or \"invalid_client\" in err_str\n            ):\n                raise e\n\n            logger.warning(f\"Failed to process site: {site_descriptor.url} - {err_str}\")\n\n    def _fetch_driveitems(\n        self,\n        site_descriptor: SiteDescriptor,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> Generator[tuple[DriveItemData, str, str | None], None, None]:\n        \"\"\"Yield drive items lazily for all drives in a site.\n\n        Yields (DriveItemData, drive_name, drive_web_url) tuples one item at\n        a time, paginating through the Graph API internally.\n        \"\"\"\n        try:\n            site = self.graph_client.sites.get_by_url(site_descriptor.url)\n            drives = site.drives.get().execute_query()\n            logger.debug(f\"Found drives: {[d.name for d in drives]}\")\n\n            if site_descriptor.drive_name:\n                drives = [\n                    drive\n                    for drive in drives\n                    if drive.name == site_descriptor.drive_name\n                    or (\n                        drive.name in SHARED_DOCUMENTS_MAP\n                        and SHARED_DOCUMENTS_MAP[drive.name]\n                        == site_descriptor.drive_name\n                    )\n                ]\n                if not drives:\n                    logger.warning(f\"Drive '{site_descriptor.drive_name}' not found\")\n                    return\n\n            for drive in drives:\n                try:\n                    drive_name = (\n                        SHARED_DOCUMENTS_MAP[drive.name]\n                        if drive.name in SHARED_DOCUMENTS_MAP\n                        else cast(str, drive.name)\n                    )\n                    drive_web_url: str | None = drive.web_url\n\n                    if site_descriptor.folder_path:\n                        item_iter = self._iter_drive_items_paged(\n                            drive_id=cast(str, drive.id),\n                            folder_path=site_descriptor.folder_path,\n                            start=start,\n                            end=end,\n                        )\n                    else:\n                        item_iter = self._iter_drive_items_delta(\n                            drive_id=cast(str, drive.id),\n                            start=start,\n                            end=end,\n                        )\n\n                    for item in item_iter:\n                        yield item, drive_name or \"\", drive_web_url\n\n                except Exception as e:\n                    logger.warning(f\"Failed to process drive '{drive.name}': {str(e)}\")\n\n        except Exception as e:\n            err_str = str(e)\n            if (\n                \"403 Client Error\" in err_str\n                or \"404 Client Error\" in err_str\n                or \"invalid_client\" in err_str\n            ):\n                raise e\n\n            logger.warning(f\"Failed to process site: {err_str}\")\n\n    def _handle_paginated_sites(\n        self, sites: SitesWithRoot\n    ) -> Generator[Site, None, None]:\n        while sites:\n            if sites.current_page:\n                yield from sites.current_page\n            if not sites.has_next:\n                break\n            sites = sites._get_next().execute_query()\n\n    def _is_driveitem_excluded(self, driveitem: DriveItemData) -> bool:\n        \"\"\"Check if a drive item should be excluded based on excluded_paths patterns.\"\"\"\n        if not self.excluded_paths:\n            return False\n        relative_path = _build_item_relative_path(\n            driveitem.parent_reference_path, driveitem.name\n        )\n        return _is_path_excluded(relative_path, self.excluded_paths)\n\n    def _filter_excluded_sites(\n        self, site_descriptors: list[SiteDescriptor]\n    ) -> list[SiteDescriptor]:\n        \"\"\"Remove sites matching any excluded_sites glob pattern.\"\"\"\n        if not self.excluded_sites:\n            return site_descriptors\n        result = []\n        for sd in site_descriptors:\n            if _is_site_excluded(sd.url, self.excluded_sites):\n                logger.info(f\"Excluding site by denylist: {sd.url}\")\n                continue\n            result.append(sd)\n        return result\n\n    def fetch_sites(self) -> list[SiteDescriptor]:\n        sites = self.graph_client.sites.get_all_sites().execute_query()\n\n        if not sites:\n            raise RuntimeError(\"No sites found in the tenant\")\n\n        # OneDrive personal sites should not be indexed with SharepointConnector\n        site_descriptors = [\n            SiteDescriptor(\n                url=site.web_url or \"\",\n                drive_name=None,\n                folder_path=None,\n            )\n            for site in self._handle_paginated_sites(sites)\n            if \"-my.sharepoint\" not in site.web_url\n        ]\n        return self._filter_excluded_sites(site_descriptors)\n\n    def _fetch_site_pages(\n        self,\n        site_descriptor: SiteDescriptor,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> Generator[dict[str, Any], None, None]:\n        \"\"\"Yield SharePoint site pages (.aspx files) one at a time.\n\n        Pages are fetched via the Graph Pages API and yielded lazily as each\n        API page arrives, so memory stays bounded regardless of total page count.\n        Time-window filtering is applied per-item before yielding.\n        \"\"\"\n        site = self.graph_client.sites.get_by_url(site_descriptor.url)\n        site.execute_query()\n        site_id = site.id\n\n        site_pages_base = (\n            f\"{self.graph_api_base}/sites/{site_id}/pages/microsoft.graph.sitePage\"\n        )\n        page_url: str | None = site_pages_base\n        params: dict[str, str] | None = {\"$expand\": \"canvasLayout\"}\n        total_yielded = 0\n        yielded_ids: set[str] = set()\n\n        while page_url:\n            try:\n                data = self._graph_api_get_json(page_url, params)\n            except HTTPError as e:\n                if e.response is not None and e.response.status_code == 404:\n                    logger.warning(f\"Site page not found: {page_url}\")\n                    break\n                if (\n                    e.response is not None\n                    and e.response.status_code == 400\n                    and _is_graph_invalid_request(e.response)\n                ):\n                    logger.warning(\n                        f\"$expand=canvasLayout on the LIST endpoint returned 400 \"\n                        f\"for site {site_descriptor.url}. Falling back to \"\n                        f\"per-page expansion.\"\n                    )\n                    yield from self._fetch_site_pages_individually(\n                        site_pages_base, start, end, skip_ids=yielded_ids\n                    )\n                    return\n                raise\n\n            params = None  # nextLink already embeds query params\n\n            for page in data.get(\"value\", []):\n                if not _site_page_in_time_window(page, start, end):\n                    continue\n                total_yielded += 1\n                page_id = page.get(\"id\")\n                if page_id:\n                    yielded_ids.add(page_id)\n                yield page\n\n            page_url = data.get(\"@odata.nextLink\")\n\n        logger.debug(f\"Yielded {total_yielded} site pages for {site_descriptor.url}\")\n\n    def _fetch_site_pages_individually(\n        self,\n        site_pages_base: str,\n        start: datetime | None = None,\n        end: datetime | None = None,\n        skip_ids: set[str] | None = None,\n    ) -> Generator[dict[str, Any], None, None]:\n        \"\"\"Fallback for _fetch_site_pages: list pages without $expand, then\n        expand canvasLayout on each page individually.\n\n        The Graph API's LIST endpoint can return 400 when $expand=canvasLayout\n        is used and *any* page in the site has a corrupt canvas layout (e.g.\n        duplicate web part IDs — see SharePoint/sp-dev-docs#8822). Since the\n        LIST expansion is all-or-nothing, a single bad page poisons the entire\n        response. This method works around it by fetching metadata first, then\n        expanding each page individually so only the broken page loses its\n        canvas content.\n\n        ``skip_ids`` contains page IDs already yielded by the caller before the\n        fallback was triggered, preventing duplicates.\n        \"\"\"\n        page_url: str | None = site_pages_base\n        total_yielded = 0\n        _skip_ids = skip_ids or set()\n\n        while page_url:\n            try:\n                data = self._graph_api_get_json(page_url)\n            except HTTPError as e:\n                if e.response is not None and e.response.status_code == 404:\n                    break\n                raise\n\n            for page in data.get(\"value\", []):\n                if not _site_page_in_time_window(page, start, end):\n                    continue\n\n                page_id = page.get(\"id\")\n                if page_id and page_id in _skip_ids:\n                    continue\n\n                if not page_id:\n                    total_yielded += 1\n                    yield page\n                    continue\n\n                expanded = self._try_expand_single_page(site_pages_base, page_id, page)\n                total_yielded += 1\n                yield expanded\n\n            page_url = data.get(\"@odata.nextLink\")\n\n        logger.debug(\n            f\"Yielded {total_yielded} site pages (per-page expansion fallback)\"\n        )\n\n    def _try_expand_single_page(\n        self,\n        site_pages_base: str,\n        page_id: str,\n        fallback_page: dict[str, Any],\n    ) -> dict[str, Any]:\n        \"\"\"Try to GET a single page with $expand=canvasLayout. On 400, return\n        the metadata-only fallback so the page is still indexed (without canvas\n        content).\"\"\"\n        pages_collection = site_pages_base.removesuffix(\"/microsoft.graph.sitePage\")\n        single_url = f\"{pages_collection}/{page_id}/microsoft.graph.sitePage\"\n        try:\n            return self._graph_api_get_json(single_url, {\"$expand\": \"canvasLayout\"})\n        except HTTPError as e:\n            if (\n                e.response is not None\n                and e.response.status_code == 400\n                and _is_graph_invalid_request(e.response)\n            ):\n                page_name = fallback_page.get(\"name\", page_id)\n                logger.warning(\n                    f\"$expand=canvasLayout failed for page '{page_name}' ({page_id}). Indexing metadata only.\"\n                )\n                return fallback_page\n            raise\n\n    def _acquire_token(self) -> dict[str, Any]:\n        \"\"\"\n        Acquire token via MSAL\n        \"\"\"\n        if self.msal_app is None:\n            raise RuntimeError(\"MSAL app is not initialized\")\n\n        token = self.msal_app.acquire_token_for_client(\n            scopes=[f\"{self.graph_api_host}/.default\"]\n        )\n        return token\n\n    def _get_graph_access_token(self) -> str:\n        token_data = self._acquire_token()\n        access_token = token_data.get(\"access_token\")\n        if not access_token:\n            raise RuntimeError(\"Failed to acquire Graph API access token\")\n        return access_token\n\n    def _graph_api_get_json(\n        self,\n        url: str,\n        params: dict[str, str] | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Make an authenticated GET request to the Graph API with retry.\"\"\"\n        access_token = self._get_graph_access_token()\n        headers = {\"Authorization\": f\"Bearer {access_token}\"}\n\n        for attempt in range(GRAPH_API_MAX_RETRIES + 1):\n            try:\n                response = requests.get(\n                    url,\n                    headers=headers,\n                    params=params,\n                    timeout=REQUEST_TIMEOUT_SECONDS,\n                )\n                if response.status_code in GRAPH_API_RETRYABLE_STATUSES:\n                    if attempt < GRAPH_API_MAX_RETRIES:\n                        retry_after = int(\n                            response.headers.get(\"Retry-After\", str(2**attempt))\n                        )\n                        wait = min(retry_after, 60)\n                        logger.warning(\n                            f\"Graph API {response.status_code} on attempt {attempt + 1}, retrying in {wait}s: {url}\"\n                        )\n                        time.sleep(wait)\n                        # Re-acquire token in case it expired during a long traversal\n                        access_token = self._get_graph_access_token()\n                        headers = {\"Authorization\": f\"Bearer {access_token}\"}\n                        continue\n                _log_and_raise_for_status(response)\n                return response.json()\n            except (requests.ConnectionError, requests.Timeout):\n                if attempt < GRAPH_API_MAX_RETRIES:\n                    wait = min(2**attempt, 60)\n                    logger.warning(\n                        f\"Graph API connection error on attempt {attempt + 1}, retrying in {wait}s: {url}\"\n                    )\n                    time.sleep(wait)\n                    continue\n                raise\n\n        raise RuntimeError(\n            f\"Graph API request failed after {GRAPH_API_MAX_RETRIES + 1} attempts: {url}\"\n        )\n\n    def _iter_drive_items_paged(\n        self,\n        drive_id: str,\n        folder_path: str | None = None,\n        start: datetime | None = None,\n        end: datetime | None = None,\n        page_size: int = 200,\n    ) -> Generator[DriveItemData, None, None]:\n        \"\"\"Yield DriveItemData for every file in a drive via the Graph API.\n\n        Performs BFS folder traversal manually, fetching one page of children\n        at a time so that memory usage stays bounded regardless of drive size.\n        \"\"\"\n        base = f\"{self.graph_api_base}/drives/{drive_id}\"\n        if folder_path:\n            encoded_path = quote(folder_path, safe=\"/\")\n            start_url = f\"{base}/root:/{encoded_path}:/children\"\n        else:\n            start_url = f\"{base}/root/children\"\n\n        folder_queue: deque[str] = deque([start_url])\n\n        while folder_queue:\n            page_url: str | None = folder_queue.popleft()\n            params: dict[str, str] | None = {\"$top\": str(page_size)}\n\n            while page_url:\n                data = self._graph_api_get_json(page_url, params)\n                params = None  # nextLink already embeds query params\n\n                for item in data.get(\"value\", []):\n                    if \"folder\" in item:\n                        child_url = f\"{base}/items/{item['id']}/children\"\n                        folder_queue.append(child_url)\n                        continue\n\n                    # Skip non-file items (e.g. OneNote notebooks without a \"file\" facet)\n                    # but still yield them — the downstream conversion handles filtering\n                    # by extension / mime type.\n\n                    # NOTE: We are now including items without a lastModifiedDateTime,\n                    # and respecting when only one of start or end is set.\n                    if start is not None or end is not None:\n                        raw_ts = item.get(\"lastModifiedDateTime\")\n                        if raw_ts:\n                            mod_dt = datetime.fromisoformat(\n                                raw_ts.replace(\"Z\", \"+00:00\")\n                            )\n                            if start is not None and mod_dt < start:\n                                continue\n                            if end is not None and mod_dt > end:\n                                continue\n\n                    yield DriveItemData.from_graph_json(item)\n\n                page_url = data.get(\"@odata.nextLink\")\n\n    def _iter_drive_items_delta(\n        self,\n        drive_id: str,\n        start: datetime | None = None,\n        end: datetime | None = None,\n        page_size: int = 200,\n    ) -> Generator[DriveItemData, None, None]:\n        \"\"\"Yield DriveItemData for every file in a drive via the Graph delta API.\n\n        Uses the flat delta endpoint instead of recursive folder traversal.\n        On subsequent runs (start > epoch), passes the start timestamp as a\n        delta token so that only changed items are returned.\n\n        Falls back to full enumeration if the API returns 410 Gone (expired token).\n        \"\"\"\n        use_timestamp_token = start is not None and start > _EPOCH\n\n        initial_url = f\"{self.graph_api_base}/drives/{drive_id}/root/delta\"\n        if use_timestamp_token:\n            assert start is not None  # mypy\n            token = quote(start.isoformat(timespec=\"seconds\"))\n            initial_url += f\"?token={token}\"\n\n        yield from self._iter_delta_pages(\n            initial_url=initial_url,\n            drive_id=drive_id,\n            start=start,\n            end=end,\n            page_size=page_size,\n            allow_full_resync=use_timestamp_token,\n        )\n\n    def _iter_delta_pages(\n        self,\n        initial_url: str,\n        drive_id: str,\n        start: datetime | None,\n        end: datetime | None,\n        page_size: int,\n        allow_full_resync: bool,\n    ) -> Generator[DriveItemData, None, None]:\n        \"\"\"Paginate through delta API responses, yielding file DriveItemData.\n\n        If the API responds with 410 Gone and allow_full_resync is True,\n        restarts with a full delta enumeration.\n        \"\"\"\n        page_url: str | None = initial_url\n        params: dict[str, str] | None = {\"$top\": str(page_size)}\n\n        while page_url:\n            try:\n                data = self._graph_api_get_json(page_url, params)\n            except requests.HTTPError as e:\n                # 410 means the delta token expired, so we need to fall back to full enumeration\n                if e.response is not None and e.response.status_code == 410:\n                    if not allow_full_resync:\n                        raise\n                    logger.warning(\n                        \"Delta token expired (410 Gone) for drive '%s'. Falling back to full delta enumeration.\",\n                        drive_id,\n                    )\n                    yield from self._iter_delta_pages(\n                        initial_url=f\"{self.graph_api_base}/drives/{drive_id}/root/delta\",\n                        drive_id=drive_id,\n                        start=start,\n                        end=end,\n                        page_size=page_size,\n                        allow_full_resync=False,\n                    )\n                    return\n                raise\n\n            params = None  # nextLink/deltaLink already embed query params\n\n            for item in data.get(\"value\", []):\n                if \"folder\" in item or \"deleted\" in item:\n                    continue\n\n                if start is not None or end is not None:\n                    raw_ts = item.get(\"lastModifiedDateTime\")\n                    if raw_ts:\n                        mod_dt = datetime.fromisoformat(raw_ts.replace(\"Z\", \"+00:00\"))\n                        if start is not None and mod_dt < start:\n                            continue\n                        if end is not None and mod_dt > end:\n                            continue\n\n                yield DriveItemData.from_graph_json(item)\n\n            page_url = data.get(\"@odata.nextLink\")\n            if not page_url:\n                break\n\n    def _build_delta_start_url(\n        self,\n        drive_id: str,\n        start: datetime | None = None,\n        page_size: int = 200,\n    ) -> str:\n        \"\"\"Build the initial delta API URL with query parameters embedded.\n\n        Embeds ``$top`` (and optionally a timestamp ``token``) directly in the\n        URL so that the returned string is fully self-contained and can be\n        stored in a checkpoint without needing a separate params dict.\n        \"\"\"\n        base_url = f\"{self.graph_api_base}/drives/{drive_id}/root/delta\"\n        params = [f\"$top={page_size}\"]\n        if start is not None and start > _EPOCH:\n            token = quote(start.isoformat(timespec=\"seconds\"))\n            params.append(f\"token={token}\")\n        return f\"{base_url}?{'&'.join(params)}\"\n\n    def _fetch_one_delta_page(\n        self,\n        page_url: str,\n        drive_id: str,\n        start: datetime | None = None,\n        end: datetime | None = None,\n        page_size: int = 200,\n    ) -> tuple[list[DriveItemData], str | None]:\n        \"\"\"Fetch a single page of delta API results.\n\n        Returns ``(items, next_page_url)``.  *next_page_url* is ``None`` when\n        the delta enumeration is complete (deltaLink with no nextLink).\n\n        On 410 Gone (expired token) returns ``([], full_resync_url)`` so\n        the caller can store the resync URL in the checkpoint and retry on\n        the next cycle.\n        \"\"\"\n        try:\n            data = self._graph_api_get_json(page_url)\n        except requests.HTTPError as e:\n            if e.response is not None and e.response.status_code == 410:\n                logger.warning(\n                    \"Delta token expired (410 Gone) for drive '%s'. Will restart with full delta enumeration.\",\n                    drive_id,\n                )\n                full_url = f\"{self.graph_api_base}/drives/{drive_id}/root/delta?$top={page_size}\"\n                return [], full_url\n            raise\n\n        items: list[DriveItemData] = []\n        for item in data.get(\"value\", []):\n            if \"folder\" in item or \"deleted\" in item:\n                continue\n            if start is not None or end is not None:\n                raw_ts = item.get(\"lastModifiedDateTime\")\n                if raw_ts:\n                    mod_dt = datetime.fromisoformat(raw_ts.replace(\"Z\", \"+00:00\"))\n                    if start is not None and mod_dt < start:\n                        continue\n                    if end is not None and mod_dt > end:\n                        continue\n            items.append(DriveItemData.from_graph_json(item))\n\n        next_url = data.get(\"@odata.nextLink\")\n        if next_url:\n            return items, next_url\n        return items, None\n\n    @staticmethod\n    def _clear_drive_checkpoint_state(\n        checkpoint: \"SharepointConnectorCheckpoint\",\n    ) -> None:\n        \"\"\"Reset all drive-level fields in the checkpoint.\"\"\"\n        checkpoint.current_drive_name = None\n        checkpoint.current_drive_id = None\n        checkpoint.current_drive_web_url = None\n        checkpoint.current_drive_delta_next_link = None\n        checkpoint.seen_document_ids.clear()\n\n    def _fetch_slim_documents_from_sharepoint(\n        self,\n        start: datetime | None = None,\n        end: datetime | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        site_descriptors = self._filter_excluded_sites(\n            self.site_descriptors or self.fetch_sites()\n        )\n\n        # Create a temporary checkpoint for hierarchy node tracking\n        temp_checkpoint = SharepointConnectorCheckpoint(has_more=True)\n\n        # goes over all urls, converts them into SlimDocument objects and then yields them in batches\n        doc_batch: list[SlimDocument | HierarchyNode] = []\n        for site_descriptor in site_descriptors:\n            site_url = site_descriptor.url\n\n            # Yield site hierarchy node using helper\n            doc_batch.extend(\n                self._yield_site_hierarchy_node(site_descriptor, temp_checkpoint)\n            )\n\n            # Process site documents if flag is True\n            if self.include_site_documents:\n                for driveitem, drive_name, drive_web_url in self._fetch_driveitems(\n                    site_descriptor=site_descriptor,\n                    start=start,\n                    end=end,\n                ):\n                    if self._is_driveitem_excluded(driveitem):\n                        logger.debug(f\"Excluding by path denylist: {driveitem.web_url}\")\n                        continue\n\n                    if drive_web_url:\n                        doc_batch.extend(\n                            self._yield_drive_hierarchy_node(\n                                site_url, drive_web_url, drive_name, temp_checkpoint\n                            )\n                        )\n\n                    folder_path = self._extract_folder_path_from_parent_reference(\n                        driveitem.parent_reference_path\n                    )\n                    if folder_path and drive_web_url:\n                        doc_batch.extend(\n                            self._yield_folder_hierarchy_nodes(\n                                site_url,\n                                drive_web_url,\n                                drive_name,\n                                folder_path,\n                                temp_checkpoint,\n                            )\n                        )\n\n                    parent_hierarchy_url: str | None = None\n                    if drive_web_url:\n                        parent_hierarchy_url = self._get_parent_hierarchy_url(\n                            site_url, drive_web_url, drive_name, driveitem\n                        )\n\n                    try:\n                        logger.debug(f\"Processing: {driveitem.web_url}\")\n                        ctx = self._create_rest_client_context(site_descriptor.url)\n                        doc_batch.append(\n                            _convert_driveitem_to_slim_document(\n                                driveitem,\n                                drive_name,\n                                ctx,\n                                self.graph_client,\n                                parent_hierarchy_raw_node_id=parent_hierarchy_url,\n                                treat_sharing_link_as_public=self.treat_sharing_link_as_public,\n                            )\n                        )\n                    except Exception as e:\n                        logger.warning(f\"Failed to process driveitem: {str(e)}\")\n\n                    if len(doc_batch) >= SLIM_BATCH_SIZE:\n                        yield doc_batch\n                        doc_batch = []\n\n            # Process site pages if flag is True\n            if self.include_site_pages:\n                site_pages = self._fetch_site_pages(\n                    site_descriptor, start=start, end=end\n                )\n                for site_page in site_pages:\n                    logger.debug(\n                        f\"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}\"\n                    )\n                    ctx = self._create_rest_client_context(site_descriptor.url)\n                    doc_batch.append(\n                        _convert_sitepage_to_slim_document(\n                            site_page,\n                            ctx,\n                            self.graph_client,\n                            parent_hierarchy_raw_node_id=site_descriptor.url,\n                            treat_sharing_link_as_public=self.treat_sharing_link_as_public,\n                        )\n                    )\n                    if len(doc_batch) >= SLIM_BATCH_SIZE:\n                        yield doc_batch\n                        doc_batch = []\n        yield doc_batch\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self._credential_json = credentials\n        auth_method = credentials.get(\n            \"authentication_method\", SharepointAuthMethod.CLIENT_SECRET.value\n        )\n        sp_client_id = credentials.get(\"sp_client_id\")\n        sp_client_secret = credentials.get(\"sp_client_secret\")\n        sp_directory_id = credentials.get(\"sp_directory_id\")\n        sp_private_key = credentials.get(\"sp_private_key\")\n        sp_certificate_password = credentials.get(\"sp_certificate_password\")\n\n        if not sp_client_id:\n            raise ConnectorValidationError(\"Client ID is required\")\n        if not sp_directory_id:\n            raise ConnectorValidationError(\"Directory (tenant) ID is required\")\n\n        authority_url = f\"{self.authority_host}/{sp_directory_id}\"\n\n        if auth_method == SharepointAuthMethod.CERTIFICATE.value:\n            logger.info(\"Using certificate authentication\")\n            if not sp_private_key or not sp_certificate_password:\n                raise ConnectorValidationError(\n                    \"Private key and certificate password are required for certificate authentication\"\n                )\n\n            pfx_data = base64.b64decode(sp_private_key)\n            certificate_data = load_certificate_from_pfx(\n                pfx_data, sp_certificate_password\n            )\n            if certificate_data is None:\n                raise RuntimeError(\"Failed to load certificate\")\n\n            logger.info(f\"Creating MSAL app with authority url {authority_url}\")\n            self.msal_app = msal.ConfidentialClientApplication(\n                authority=authority_url,\n                client_id=sp_client_id,\n                client_credential=certificate_data.model_dump(),\n            )\n        elif auth_method == SharepointAuthMethod.CLIENT_SECRET.value:\n            logger.info(\"Using client secret authentication\")\n            self.msal_app = msal.ConfidentialClientApplication(\n                authority=authority_url,\n                client_id=sp_client_id,\n                client_credential=sp_client_secret,\n            )\n        else:\n            raise ConnectorValidationError(\n                \"Invalid authentication method or missing required credentials\"\n            )\n\n        def _acquire_token_for_graph() -> dict[str, Any]:\n            \"\"\"\n            Acquire token via MSAL\n            \"\"\"\n            if self.msal_app is None:\n                raise ConnectorValidationError(\"MSAL app is not initialized\")\n\n            token = self.msal_app.acquire_token_for_client(\n                scopes=[f\"{self.graph_api_host}/.default\"]\n            )\n            if token is None:\n                raise ConnectorValidationError(\"Failed to acquire token for graph\")\n            return token\n\n        self._graph_client = GraphClient(\n            _acquire_token_for_graph, environment=self._azure_environment\n        )\n        if auth_method == SharepointAuthMethod.CERTIFICATE.value:\n            self.sp_tenant_domain = self._resolve_tenant_domain()\n        return None\n\n    def _get_drive_names_for_site(self, site_url: str) -> list[str]:\n        \"\"\"Return all library/drive names for a given SharePoint site.\"\"\"\n        try:\n            site = self.graph_client.sites.get_by_url(site_url)\n            drives = site.drives.get_all(page_loaded=lambda _: None).execute_query()\n            drive_names: list[str] = []\n            for drive in drives:\n                if drive.name is None:\n                    continue\n                drive_names.append(drive.name)\n\n            return drive_names\n        except Exception as e:\n            logger.warning(f\"Failed to fetch drives for site '{site_url}': {e}\")\n            return []\n\n    def _build_folder_url(\n        self, site_url: str, drive_name: str, folder_path: str\n    ) -> str:\n        \"\"\"Build a URL for a folder to use as raw_node_id.\n\n        NOTE: This constructs an approximate folder URL from components rather than\n        fetching the actual webUrl from the API. The constructed URL may differ\n        slightly from SharePoint's canonical webUrl (e.g., URL encoding differences),\n        but it functions correctly as a unique identifier for hierarchy tracking.\n        We avoid fetching folder metadata to minimize API calls.\n        \"\"\"\n        return f\"{site_url}/{drive_name}/{folder_path}\"\n\n    def _extract_folder_path_from_parent_reference(\n        self, parent_reference_path: str | None\n    ) -> str | None:\n        \"\"\"Extract folder path from DriveItem's parentReference.path.\n\n        Example input: \"/drives/b!abc123/root:/Engineering/API\"\n        Example output: \"Engineering/API\"\n\n        Returns None if the item is at the root of the drive.\n        \"\"\"\n        if not parent_reference_path:\n            return None\n\n        # Path format: /drives/{drive_id}/root:/folder/path\n        if \"root:/\" in parent_reference_path:\n            folder_path = parent_reference_path.split(\"root:/\")[1]\n            return folder_path if folder_path else None\n\n        # Item is at drive root\n        return None\n\n    def _yield_site_hierarchy_node(\n        self,\n        site_descriptor: SiteDescriptor,\n        checkpoint: SharepointConnectorCheckpoint,\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield a hierarchy node for a site if not already yielded.\n\n        Uses site.web_url as the raw_node_id (exact URL from API).\n        \"\"\"\n        site_url = site_descriptor.url\n\n        if site_url in checkpoint.seen_hierarchy_node_raw_ids:\n            return\n\n        checkpoint.seen_hierarchy_node_raw_ids.add(site_url)\n\n        # Extract display name from URL (last path segment)\n        display_name = site_url.rstrip(\"/\").split(\"/\")[-1]\n\n        yield HierarchyNode(\n            raw_node_id=site_url,\n            raw_parent_id=None,  # Parent is SOURCE\n            display_name=display_name,\n            link=site_url,\n            node_type=HierarchyNodeType.SITE,\n        )\n\n    def _yield_drive_hierarchy_node(\n        self,\n        site_url: str,\n        drive_web_url: str,\n        drive_name: str,\n        checkpoint: SharepointConnectorCheckpoint,\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield a hierarchy node for a drive if not already yielded.\n\n        Uses drive.web_url as the raw_node_id (exact URL from API).\n        \"\"\"\n        if drive_web_url in checkpoint.seen_hierarchy_node_raw_ids:\n            return\n\n        checkpoint.seen_hierarchy_node_raw_ids.add(drive_web_url)\n\n        yield HierarchyNode(\n            raw_node_id=drive_web_url,\n            raw_parent_id=site_url,  # Site URL is parent\n            display_name=drive_name,\n            link=drive_web_url,\n            node_type=HierarchyNodeType.DRIVE,\n        )\n\n    def _yield_folder_hierarchy_nodes(\n        self,\n        site_url: str,\n        drive_web_url: str,\n        drive_name: str,\n        folder_path: str,\n        checkpoint: SharepointConnectorCheckpoint,\n    ) -> Generator[HierarchyNode, None, None]:\n        \"\"\"Yield hierarchy nodes for all folders in a path.\n\n        For path \"Engineering/API/v2\", yields nodes for:\n        1. \"Engineering\" (parent = drive)\n        2. \"Engineering/API\" (parent = \"Engineering\")\n        3. \"Engineering/API/v2\" (parent = \"Engineering/API\")\n\n        Nodes are yielded in parent-to-child order.\n\n        Uses constructed URLs as raw_node_id. See _build_folder_url for details\n        on why we construct URLs rather than fetching them from the API.\n        \"\"\"\n        if not folder_path:\n            return\n\n        path_parts = folder_path.split(\"/\")\n\n        for i, part in enumerate(path_parts):\n            current_path = \"/\".join(path_parts[: i + 1])\n            folder_url = self._build_folder_url(site_url, drive_name, current_path)\n\n            if folder_url in checkpoint.seen_hierarchy_node_raw_ids:\n                continue\n\n            checkpoint.seen_hierarchy_node_raw_ids.add(folder_url)\n\n            # Determine parent URL\n            if i == 0:\n                # First folder, parent is the drive\n                parent_url = drive_web_url\n            else:\n                # Parent is the previous folder\n                parent_path = \"/\".join(path_parts[:i])\n                parent_url = self._build_folder_url(site_url, drive_name, parent_path)\n\n            yield HierarchyNode(\n                raw_node_id=folder_url,\n                raw_parent_id=parent_url,\n                display_name=part,  # Just the folder name\n                link=folder_url,\n                node_type=HierarchyNodeType.FOLDER,\n            )\n\n    def _get_parent_hierarchy_url(\n        self,\n        site_url: str,\n        drive_web_url: str,\n        drive_name: str,\n        driveitem: DriveItemData,\n    ) -> str:\n        \"\"\"Determine the parent hierarchy node URL for a document.\n\n        Returns:\n            - Folder URL if document is in a folder\n            - Drive URL if document is at drive root\n        \"\"\"\n        folder_path = self._extract_folder_path_from_parent_reference(\n            driveitem.parent_reference_path\n        )\n\n        if folder_path:\n            return self._build_folder_url(site_url, drive_name, folder_path)\n\n        # Document is at drive root\n        return drive_web_url\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SharepointConnectorCheckpoint,\n        include_permissions: bool = False,\n    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:\n\n        if self._graph_client is None:\n            raise ConnectorMissingCredentialError(\"Sharepoint\")\n\n        checkpoint = copy.deepcopy(checkpoint)\n\n        # Phase 1: Initialize cached_site_descriptors if needed\n        if (\n            checkpoint.has_more\n            and checkpoint.cached_site_descriptors is None\n            and not checkpoint.process_site_pages\n        ):\n            logger.info(\"Initializing SharePoint sites for processing\")\n            site_descs = self._filter_excluded_sites(\n                self.site_descriptors or self.fetch_sites()\n            )\n            checkpoint.cached_site_descriptors = deque(site_descs)\n\n            if not checkpoint.cached_site_descriptors:\n                logger.warning(\n                    \"No SharePoint sites found or accessible - nothing to process\"\n                )\n                checkpoint.has_more = False\n                return checkpoint\n\n            logger.info(\n                f\"Found {len(checkpoint.cached_site_descriptors)} sites to process\"\n            )\n            # Set first site and return to allow checkpoint persistence\n            if checkpoint.cached_site_descriptors:\n                checkpoint.current_site_descriptor = (\n                    checkpoint.cached_site_descriptors.popleft()\n                )\n                logger.info(\n                    f\"Starting with site: {checkpoint.current_site_descriptor.url}\"\n                )\n                # Yield site hierarchy node for the first site\n                yield from self._yield_site_hierarchy_node(\n                    checkpoint.current_site_descriptor, checkpoint\n                )\n                return checkpoint\n\n        # Phase 2: Initialize cached_drive_names for current site if needed\n        if checkpoint.current_site_descriptor and checkpoint.cached_drive_names is None:\n            # If site documents flag is False, set empty drive list to skip document processing\n            if not self.include_site_documents:\n                logger.debug(\"Documents disabled, skipping drive initialization\")\n                checkpoint.cached_drive_names = deque()\n                return checkpoint\n\n            logger.info(\n                f\"Initializing drives for site: {checkpoint.current_site_descriptor.url}\"\n            )\n\n            try:\n                # If the user explicitly specified drive(s) for this site, honour that\n                if checkpoint.current_site_descriptor.drive_name:\n                    logger.info(\n                        f\"Using explicitly specified drive: {checkpoint.current_site_descriptor.drive_name}\"\n                    )\n                    checkpoint.cached_drive_names = deque(\n                        [checkpoint.current_site_descriptor.drive_name]\n                    )\n                else:\n                    drive_names = self._get_drive_names_for_site(\n                        checkpoint.current_site_descriptor.url\n                    )\n                    checkpoint.cached_drive_names = deque(drive_names)\n\n                if not checkpoint.cached_drive_names:\n                    logger.warning(\n                        f\"No accessible drives found for site: {checkpoint.current_site_descriptor.url}\"\n                    )\n                else:\n                    logger.info(\n                        f\"Found {len(checkpoint.cached_drive_names)} drives: {list(checkpoint.cached_drive_names)}\"\n                    )\n\n            except Exception as e:\n                logger.error(\n                    f\"Failed to initialize drives for site: {checkpoint.current_site_descriptor.url}: {e}\"\n                )\n                # Yield a ConnectorFailure for site-level access failures\n                start_dt = datetime.fromtimestamp(start, tz=timezone.utc)\n                end_dt = datetime.fromtimestamp(end, tz=timezone.utc)\n                yield _create_entity_failure(\n                    checkpoint.current_site_descriptor.url,\n                    f\"Failed to access site: {str(e)}\",\n                    (start_dt, end_dt),\n                    e,\n                )\n                # Move to next site if available\n                if (\n                    checkpoint.cached_site_descriptors\n                    and len(checkpoint.cached_site_descriptors) > 0\n                ):\n                    checkpoint.current_site_descriptor = (\n                        checkpoint.cached_site_descriptors.popleft()\n                    )\n                    checkpoint.cached_drive_names = None  # Reset for new site\n                    return checkpoint\n                else:\n                    # No more sites - we're done\n                    checkpoint.has_more = False\n                    return checkpoint\n\n            # Return checkpoint to allow persistence after drive initialization\n            return checkpoint\n\n        # Phase 3a: Initialize the next drive for processing\n        if (\n            checkpoint.current_site_descriptor\n            and checkpoint.cached_drive_names\n            and len(checkpoint.cached_drive_names) > 0\n            and checkpoint.current_drive_name is None\n        ):\n            checkpoint.current_drive_name = checkpoint.cached_drive_names.popleft()\n\n            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)\n            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)\n            site_descriptor = checkpoint.current_site_descriptor\n\n            logger.info(\n                f\"Processing drive '{checkpoint.current_drive_name}' in site: {site_descriptor.url}\"\n            )\n            logger.debug(f\"Time range: {start_dt} to {end_dt}\")\n\n            current_drive_name = checkpoint.current_drive_name\n            if current_drive_name is None:\n                logger.warning(\"Current drive name is None, skipping\")\n                return checkpoint\n\n            try:\n                logger.info(\n                    f\"Fetching drive items for drive name: {current_drive_name}\"\n                )\n                result = self._resolve_drive(site_descriptor, current_drive_name)\n                if result is None:\n                    logger.warning(f\"Drive '{current_drive_name}' not found, skipping\")\n                    self._clear_drive_checkpoint_state(checkpoint)\n                    return checkpoint\n\n                drive_id, drive_web_url = result\n                checkpoint.current_drive_id = drive_id\n                checkpoint.current_drive_web_url = drive_web_url\n            except Exception as e:\n                logger.error(\n                    f\"Failed to retrieve items from drive '{current_drive_name}' in site: {site_descriptor.url}: {e}\"\n                )\n                yield _create_entity_failure(\n                    f\"{site_descriptor.url}|{current_drive_name}\",\n                    f\"Failed to access drive '{current_drive_name}' in site '{site_descriptor.url}': {str(e)}\",\n                    (start_dt, end_dt),\n                    e,\n                )\n                self._clear_drive_checkpoint_state(checkpoint)\n                return checkpoint\n\n            display_drive_name = SHARED_DOCUMENTS_MAP.get(\n                current_drive_name, current_drive_name\n            )\n\n            if drive_web_url:\n                yield from self._yield_drive_hierarchy_node(\n                    site_descriptor.url,\n                    drive_web_url,\n                    display_drive_name,\n                    checkpoint,\n                )\n\n            # For non-folder-scoped drives, use delta API with per-page\n            # checkpointing.  Build the initial URL and fall through to 3b.\n            if not site_descriptor.folder_path:\n                checkpoint.current_drive_delta_next_link = self._build_delta_start_url(\n                    drive_id, start_dt\n                )\n            # else: BFS path — delta_next_link stays None;\n            # Phase 3b will use _iter_drive_items_paged.\n\n        # Phase 3b: Process items from the current drive\n        if (\n            checkpoint.current_site_descriptor\n            and checkpoint.current_drive_name is not None\n            and checkpoint.current_drive_id is not None\n        ):\n            site_descriptor = checkpoint.current_site_descriptor\n            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)\n            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)\n            current_drive_name = SHARED_DOCUMENTS_MAP.get(\n                checkpoint.current_drive_name, checkpoint.current_drive_name\n            )\n            drive_web_url = checkpoint.current_drive_web_url\n\n            # --- determine item source ---\n            driveitems: Iterable[DriveItemData]\n            has_more_delta_pages = False\n\n            if checkpoint.current_drive_delta_next_link:\n                # Delta path: fetch one page at a time for checkpointing\n                try:\n                    page_items, next_url = self._fetch_one_delta_page(\n                        page_url=checkpoint.current_drive_delta_next_link,\n                        drive_id=checkpoint.current_drive_id,\n                        start=start_dt,\n                        end=end_dt,\n                    )\n                except Exception as e:\n                    logger.error(\n                        f\"Failed to fetch delta page for drive '{current_drive_name}': {e}\"\n                    )\n                    yield _create_entity_failure(\n                        f\"{site_descriptor.url}|{current_drive_name}\",\n                        f\"Failed to fetch delta page for drive '{current_drive_name}': {str(e)}\",\n                        (start_dt, end_dt),\n                        e,\n                    )\n                    self._clear_drive_checkpoint_state(checkpoint)\n                    return checkpoint\n\n                driveitems = page_items\n                has_more_delta_pages = next_url is not None\n                if next_url:\n                    checkpoint.current_drive_delta_next_link = next_url\n            else:\n                # BFS path (folder-scoped): process all items at once\n                driveitems = self._iter_drive_items_paged(\n                    drive_id=checkpoint.current_drive_id,\n                    folder_path=site_descriptor.folder_path,\n                    start=start_dt,\n                    end=end_dt,\n                )\n\n            item_count = 0\n            for driveitem in driveitems:\n                item_count += 1\n\n                if self._is_driveitem_excluded(driveitem):\n                    logger.debug(f\"Excluding by path denylist: {driveitem.web_url}\")\n                    continue\n\n                if driveitem.id and driveitem.id in checkpoint.seen_document_ids:\n                    logger.debug(\n                        f\"Skipping duplicate document {driveitem.id} ({driveitem.name})\"\n                    )\n                    continue\n\n                driveitem_extension = get_file_ext(driveitem.name)\n                if driveitem_extension not in OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS:\n                    logger.warning(\n                        f\"Skipping {driveitem.web_url} as it is not a supported file type\"\n                    )\n                    continue\n\n                should_yield_if_empty = (\n                    driveitem_extension in OnyxFileExtensions.IMAGE_EXTENSIONS\n                    or driveitem_extension == \".pdf\"\n                )\n\n                folder_path = self._extract_folder_path_from_parent_reference(\n                    driveitem.parent_reference_path\n                )\n                if folder_path and drive_web_url:\n                    yield from self._yield_folder_hierarchy_nodes(\n                        site_descriptor.url,\n                        drive_web_url,\n                        current_drive_name,\n                        folder_path,\n                        checkpoint,\n                    )\n\n                parent_hierarchy_url: str | None = None\n                if drive_web_url:\n                    parent_hierarchy_url = self._get_parent_hierarchy_url(\n                        site_descriptor.url,\n                        drive_web_url,\n                        current_drive_name,\n                        driveitem,\n                    )\n\n                try:\n                    ctx: ClientContext | None = None\n                    if include_permissions:\n                        ctx = self._create_rest_client_context(site_descriptor.url)\n\n                    access_token = self._get_graph_access_token()\n                    doc_or_failure = _convert_driveitem_to_document_with_permissions(\n                        driveitem,\n                        current_drive_name,\n                        ctx,\n                        self.graph_client,\n                        include_permissions=include_permissions,\n                        parent_hierarchy_raw_node_id=parent_hierarchy_url,\n                        graph_api_base=self.graph_api_base,\n                        access_token=access_token,\n                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,\n                    )\n\n                    if isinstance(doc_or_failure, Document):\n                        if doc_or_failure.sections:\n                            checkpoint.seen_document_ids.add(doc_or_failure.id)\n                            yield doc_or_failure\n                        elif should_yield_if_empty:\n                            doc_or_failure.sections = [\n                                TextSection(link=driveitem.web_url, text=\"\")\n                            ]\n                            checkpoint.seen_document_ids.add(doc_or_failure.id)\n                            yield doc_or_failure\n                        else:\n                            logger.warning(\n                                f\"Skipping {driveitem.web_url} as it is empty and not a PDF or image\"\n                            )\n                    elif isinstance(doc_or_failure, ConnectorFailure):\n                        yield doc_or_failure\n                except Exception as e:\n                    logger.warning(\n                        f\"Failed to process driveitem {driveitem.web_url}: {e}\"\n                    )\n                    yield _create_document_failure(\n                        driveitem, f\"Failed to process: {str(e)}\", e\n                    )\n\n            logger.info(f\"Processed {item_count} items in drive '{current_drive_name}'\")\n\n            if has_more_delta_pages:\n                return checkpoint\n\n            self._clear_drive_checkpoint_state(checkpoint)\n\n        # Phase 4: Progression logic - determine next step\n        # If we have more drives in current site, continue with current site\n        if checkpoint.cached_drive_names and len(checkpoint.cached_drive_names) > 0:\n            logger.debug(\n                f\"Continuing with {len(checkpoint.cached_drive_names)} remaining drives in current site\"\n            )\n            return checkpoint\n\n        if (\n            self.include_site_pages\n            and not checkpoint.process_site_pages\n            and checkpoint.current_site_descriptor is not None\n        ):\n            logger.info(\n                f\"Processing site pages for site: {checkpoint.current_site_descriptor.url}\"\n            )\n            checkpoint.process_site_pages = True\n            return checkpoint\n\n        # Phase 5: Process site pages\n        if (\n            checkpoint.process_site_pages\n            and checkpoint.current_site_descriptor is not None\n        ):\n            # Fetch SharePoint site pages (.aspx files)\n            site_descriptor = checkpoint.current_site_descriptor\n            start_dt = datetime.fromtimestamp(start, tz=timezone.utc)\n            end_dt = datetime.fromtimestamp(end, tz=timezone.utc)\n            site_pages = self._fetch_site_pages(\n                site_descriptor, start=start_dt, end=end_dt\n            )\n            for site_page in site_pages:\n                logger.debug(\n                    f\"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}\"\n                )\n                client_ctx: ClientContext | None = None\n                if include_permissions:\n                    client_ctx = self._create_rest_client_context(site_descriptor.url)\n                yield (\n                    _convert_sitepage_to_document(\n                        site_page,\n                        site_descriptor.drive_name,\n                        client_ctx,\n                        self.graph_client,\n                        include_permissions=include_permissions,\n                        # Site pages have the site as their parent\n                        parent_hierarchy_raw_node_id=site_descriptor.url,\n                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,\n                    )\n                )\n            logger.info(\n                f\"Finished processing site pages for site: {site_descriptor.url}\"\n            )\n\n        # If no more drives, move to next site if available\n        if (\n            checkpoint.cached_site_descriptors\n            and len(checkpoint.cached_site_descriptors) > 0\n        ):\n            current_site = (\n                checkpoint.current_site_descriptor.url\n                if checkpoint.current_site_descriptor\n                else \"unknown\"\n            )\n            checkpoint.current_site_descriptor = (\n                checkpoint.cached_site_descriptors.popleft()\n            )\n            checkpoint.cached_drive_names = None  # Reset for new site\n            checkpoint.process_site_pages = False\n            logger.info(\n                f\"Finished site '{current_site}', moving to next site: {checkpoint.current_site_descriptor.url}\"\n            )\n            logger.info(\n                f\"Remaining sites to process: {len(checkpoint.cached_site_descriptors) + 1}\"\n            )\n            # Yield site hierarchy node for the new site\n            yield from self._yield_site_hierarchy_node(\n                checkpoint.current_site_descriptor, checkpoint\n            )\n            return checkpoint\n\n        # No more sites or drives - we're done\n        current_site = (\n            checkpoint.current_site_descriptor.url\n            if checkpoint.current_site_descriptor\n            else \"unknown\"\n        )\n        logger.info(\n            f\"SharePoint processing complete. Finished last site: {current_site}\"\n        )\n        checkpoint.has_more = False\n        return checkpoint\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SharepointConnectorCheckpoint,\n    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=False\n        )\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SharepointConnectorCheckpoint,\n    ) -> CheckpointOutput[SharepointConnectorCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=True\n        )\n\n    def build_dummy_checkpoint(self) -> SharepointConnectorCheckpoint:\n        return SharepointConnectorCheckpoint(has_more=True)\n\n    def validate_checkpoint_json(\n        self, checkpoint_json: str\n    ) -> SharepointConnectorCheckpoint:\n        return SharepointConnectorCheckpoint.model_validate_json(checkpoint_json)\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        start_dt = (\n            datetime.fromtimestamp(start, tz=timezone.utc)\n            if start is not None\n            else None\n        )\n        end_dt = (\n            datetime.fromtimestamp(end, tz=timezone.utc) if end is not None else None\n        )\n        yield from self._fetch_slim_documents_from_sharepoint(\n            start=start_dt,\n            end=end_dt,\n        )\n\n\nif __name__ == \"__main__\":\n    from onyx.connectors.connector_runner import ConnectorRunner\n\n    connector = SharepointConnector(sites=os.environ[\"SHAREPOINT_SITES\"].split(\",\"))\n\n    connector.load_credentials(\n        {\n            \"sp_client_id\": os.environ[\"SHAREPOINT_CLIENT_ID\"],\n            \"sp_client_secret\": os.environ[\"SHAREPOINT_CLIENT_SECRET\"],\n            \"sp_directory_id\": os.environ[\"SHAREPOINT_CLIENT_DIRECTORY_ID\"],\n        }\n    )\n\n    # Create a time range from epoch to now\n    end_time = datetime.now(timezone.utc)\n    start_time = datetime.fromtimestamp(0, tz=timezone.utc)\n    time_range = (start_time, end_time)\n\n    # Initialize the runner with a batch size of 10\n    runner: ConnectorRunner[SharepointConnectorCheckpoint] = ConnectorRunner(\n        connector, batch_size=10, include_permissions=False, time_range=time_range\n    )\n\n    # Get initial checkpoint\n    checkpoint = connector.build_dummy_checkpoint()\n\n    # Run the connector\n    while checkpoint.has_more:\n        for doc_batch, hierarchy_node_batch, failure, next_checkpoint in runner.run(\n            checkpoint\n        ):\n            if doc_batch:\n                print(f\"Retrieved batch of {len(doc_batch)} documents\")\n                for test_doc in doc_batch:\n                    print(f\"Document: {test_doc.semantic_identifier}\")\n            if failure:\n                print(f\"Failure: {failure.failure_message}\")\n            if next_checkpoint:\n                checkpoint = next_checkpoint\n"
  },
  {
    "path": "backend/onyx/connectors/sharepoint/connector_utils.py",
    "content": "from typing import Any\n\nfrom office365.graph_client import GraphClient  # type: ignore[import-untyped]\nfrom office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]\nfrom office365.sharepoint.client_context import ClientContext  # type: ignore[import-untyped]\n\nfrom onyx.connectors.models import ExternalAccess\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\n\n\ndef get_sharepoint_external_access(\n    ctx: ClientContext,\n    graph_client: GraphClient,\n    drive_item: DriveItem | None = None,\n    drive_name: str | None = None,\n    site_page: dict[str, Any] | None = None,\n    add_prefix: bool = False,\n    treat_sharing_link_as_public: bool = False,\n) -> ExternalAccess:\n    if drive_item and drive_item.id is None:\n        raise ValueError(\"DriveItem ID is required\")\n\n    # Get external access using the EE implementation\n    def noop_fallback(\n        *args: Any, **kwargs: Any  # noqa: ARG001\n    ) -> ExternalAccess:  # noqa: ARG001\n        return ExternalAccess.empty()\n\n    get_external_access_func = fetch_versioned_implementation_with_fallback(\n        \"onyx.external_permissions.sharepoint.permission_utils\",\n        \"get_external_access_from_sharepoint\",\n        fallback=noop_fallback,\n    )\n\n    external_access = get_external_access_func(\n        ctx,\n        graph_client,\n        drive_name,\n        drive_item,\n        site_page,\n        add_prefix,\n        treat_sharing_link_as_public,\n    )\n\n    return external_access\n"
  },
  {
    "path": "backend/onyx/connectors/slab/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/slab/connector.py",
    "content": "import json\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport requests\nfrom dateutil import parser\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n# Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min\nSLAB_GRAPHQL_MAX_TRIES = 10\nSLAB_API_URL = \"https://api.slab.com/v1/graphql\"\n\n_SLIM_BATCH_SIZE = 1000\n\n\ndef run_graphql_request(\n    graphql_query: dict, bot_token: str, max_tries: int = SLAB_GRAPHQL_MAX_TRIES\n) -> str:\n    headers = {\"Authorization\": bot_token, \"Content-Type\": \"application/json\"}\n\n    for try_count in range(max_tries):\n        try:\n            response = requests.post(\n                SLAB_API_URL, headers=headers, json=graphql_query, timeout=60\n            )\n            response.raise_for_status()\n\n            if response.status_code != 200:\n                raise ValueError(f\"GraphQL query failed: {graphql_query}\")\n\n            return response.text\n\n        except (requests.exceptions.Timeout, ValueError) as e:\n            if try_count < max_tries - 1:\n                logger.warning(\"A Slab GraphQL error occurred. Retrying...\")\n                continue\n\n            if isinstance(e, requests.exceptions.Timeout):\n                raise TimeoutError(\"Slab API timed out after 3 attempts\")\n            else:\n                raise ValueError(\"Slab GraphQL query failed after 3 attempts\")\n\n    raise RuntimeError(\n        \"Unexpected execution from Slab Connector. This should not happen.\"\n    )  # for static checker\n\n\ndef get_all_post_ids(bot_token: str) -> list[str]:\n    query = \"\"\"\n        query GetAllPostIds {\n            organization {\n                posts {\n                    id\n                }\n            }\n        }\n        \"\"\"\n\n    graphql_query = {\"query\": query}\n\n    results = json.loads(run_graphql_request(graphql_query, bot_token))\n    posts = results[\"data\"][\"organization\"][\"posts\"]\n    return [post[\"id\"] for post in posts]\n\n\ndef get_post_by_id(post_id: str, bot_token: str) -> dict[str, str]:\n    query = \"\"\"\n        query GetPostById($postId: ID!) {\n            post(id: $postId) {\n                title\n                content\n                linkAccess\n                updatedAt\n            }\n        }\n        \"\"\"\n    graphql_query = {\"query\": query, \"variables\": {\"postId\": post_id}}\n    results = json.loads(run_graphql_request(graphql_query, bot_token))\n    return results[\"data\"][\"post\"]\n\n\ndef iterate_post_batches(\n    batch_size: int, bot_token: str\n) -> Generator[list[dict[str, str]], None, None]:\n    \"\"\"This may not be safe to use, not sure if page edits will change the order of results\"\"\"\n    query = \"\"\"\n        query IteratePostBatches($query: String!, $first: Int, $types: [SearchType], $after: String) {\n            search(query: $query, first: $first, types: $types, after: $after) {\n                edges {\n                    node {\n                        ... on PostSearchResult {\n                            post {\n                                id\n                                title\n                                content\n                                updatedAt\n                            }\n                        }\n                    }\n                }\n                pageInfo {\n                    endCursor\n                    hasNextPage\n                }\n            }\n        }\n    \"\"\"\n    pagination_start = None\n    exists_more_pages = True\n    while exists_more_pages:\n        graphql_query = {\n            \"query\": query,\n            \"variables\": {\n                \"query\": \"\",\n                \"first\": batch_size,\n                \"types\": [\"POST\"],\n                \"after\": pagination_start,\n            },\n        }\n        results = json.loads(run_graphql_request(graphql_query, bot_token))\n        pagination_start = results[\"data\"][\"search\"][\"pageInfo\"][\"endCursor\"]\n        hits = results[\"data\"][\"search\"][\"edges\"]\n\n        posts = [hit[\"node\"] for hit in hits]\n        if posts:\n            yield posts\n\n        exists_more_pages = results[\"data\"][\"search\"][\"pageInfo\"][\"hasNextPage\"]\n\n\ndef get_slab_url_from_title_id(base_url: str, title: str, page_id: str) -> str:\n    \"\"\"This is not a documented approach but seems to be the way it works currently\n    May be subject to change without notification\"\"\"\n    title = (\n        title.replace(\"[\", \"\")\n        .replace(\"]\", \"\")\n        .replace(\":\", \"\")\n        .replace(\" \", \"-\")\n        .lower()\n    )\n    url_id = title + \"-\" + page_id\n    return urljoin(urljoin(base_url, \"posts/\"), url_id)\n\n\nclass SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):\n    def __init__(\n        self,\n        base_url: str,\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        self.base_url = base_url\n        self.batch_size = batch_size\n        self._slab_bot_token: str | None = None\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        self._slab_bot_token = credentials[\"slab_bot_token\"]\n        return None\n\n    @property\n    def slab_bot_token(self) -> str:\n        if self._slab_bot_token is None:\n            raise ConnectorMissingCredentialError(\"Slab\")\n        return self._slab_bot_token\n\n    def _iterate_posts(\n        self, time_filter: Callable[[datetime], bool] | None = None\n    ) -> GenerateDocumentsOutput:\n        doc_batch: list[Document | HierarchyNode] = []\n\n        if self.slab_bot_token is None:\n            raise ConnectorMissingCredentialError(\"Slab\")\n\n        all_post_ids: list[str] = get_all_post_ids(self.slab_bot_token)\n\n        for post_id in all_post_ids:\n            post = get_post_by_id(post_id, self.slab_bot_token)\n            last_modified = parser.parse(post[\"updatedAt\"])\n            if time_filter is not None and not time_filter(last_modified):\n                continue\n\n            page_url = get_slab_url_from_title_id(self.base_url, post[\"title\"], post_id)\n\n            content_text = \"\"\n            contents = json.loads(post[\"content\"])\n            for content_segment in contents:\n                insert = content_segment.get(\"insert\")\n                if insert and isinstance(insert, str):\n                    content_text += insert\n\n            doc_batch.append(\n                Document(\n                    id=post_id,  # can't be url as this changes with the post title\n                    sections=[TextSection(link=page_url, text=content_text)],\n                    source=DocumentSource.SLAB,\n                    semantic_identifier=post[\"title\"],\n                    metadata={},\n                )\n            )\n\n            if len(doc_batch) >= self.batch_size:\n                yield doc_batch\n                doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        yield from self._iterate_posts()\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        start_time = datetime.fromtimestamp(start, tz=timezone.utc)\n        end_time = datetime.fromtimestamp(end, tz=timezone.utc)\n\n        yield from self._iterate_posts(\n            time_filter=lambda t: start_time <= t <= end_time\n        )\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n        for post_id in get_all_post_ids(self.slab_bot_token):\n            slim_doc_batch.append(\n                SlimDocument(\n                    id=post_id,\n                )\n            )\n            if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:\n                yield slim_doc_batch\n                slim_doc_batch = []\n        if slim_doc_batch:\n            yield slim_doc_batch\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        Very basic validation, we could do more here\n        \"\"\"\n        if not self.base_url.startswith(\"https://\") and not self.base_url.startswith(\n            \"http://\"\n        ):\n            raise ConnectorValidationError(\n                \"Base URL must start with https:// or http://\"\n            )\n\n        try:\n            get_all_post_ids(self.slab_bot_token)\n        except ConnectorMissingCredentialError:\n            raise\n        except Exception as e:\n            raise ConnectorValidationError(f\"Failed to fetch posts from Slab: {e}\")\n"
  },
  {
    "path": "backend/onyx/connectors/slack/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/slack/access.py",
    "content": "from collections.abc import Callable\nfrom typing import cast\n\nfrom slack_sdk import WebClient\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.slack.models import ChannelType\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\ndef get_channel_access(\n    client: WebClient,\n    channel: ChannelType,\n    user_cache: dict[str, BasicExpertInfo | None],\n) -> ExternalAccess | None:\n    \"\"\"\n    Get channel access permissions for a Slack channel.\n    This functionality requires Enterprise Edition.\n\n    Args:\n        client: Slack WebClient instance\n        channel: Slack channel object containing channel info\n        user_cache: Cache of user IDs to BasicExpertInfo objects. May be updated in place.\n\n    Returns:\n        ExternalAccess object for the channel. None if EE is not enabled.\n    \"\"\"\n    # Check if EE is enabled\n    if not global_version.is_ee_version():\n        return None\n\n    # Fetch the EE implementation\n    ee_get_channel_access = cast(\n        Callable[\n            [WebClient, ChannelType, dict[str, BasicExpertInfo | None]],\n            ExternalAccess,\n        ],\n        fetch_versioned_implementation(\n            \"onyx.external_permissions.slack.channel_access\", \"get_channel_access\"\n        ),\n    )\n\n    return ee_get_channel_access(client, channel, user_cache)\n"
  },
  {
    "path": "backend/onyx/connectors/slack/connector.py",
    "content": "import contextvars\nimport copy\nimport itertools\nimport re\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import Future\nfrom concurrent.futures import ThreadPoolExecutor\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\nfrom http.client import IncompleteRead\nfrom http.client import RemoteDisconnected\nfrom typing import Any\nfrom typing import cast\nfrom urllib.error import URLError\nfrom urllib.parse import urlparse\n\nfrom pydantic import BaseModel\nfrom redis import Redis\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\nfrom slack_sdk.http_retry import ConnectionErrorRetryHandler\nfrom slack_sdk.http_retry import RetryHandler\nfrom slack_sdk.http_retry.builtin_interval_calculators import (\n    FixedValueRetryIntervalCalculator,\n)\nfrom typing_extensions import override\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import SLACK_NUM_THREADS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import CredentialsConnector\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import NormalizationResult\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import EntityFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.slack.access import get_channel_access\nfrom onyx.connectors.slack.models import ChannelType\nfrom onyx.connectors.slack.models import MessageType\nfrom onyx.connectors.slack.models import ThreadType\nfrom onyx.connectors.slack.onyx_retry_handler import OnyxRedisSlackRetryHandler\nfrom onyx.connectors.slack.onyx_slack_web_client import OnyxSlackWebClient\nfrom onyx.connectors.slack.utils import (\n    expert_info_from_slack_id,\n)\nfrom onyx.connectors.slack.utils import get_message_link\nfrom onyx.connectors.slack.utils import make_paginated_slack_api_call\nfrom onyx.connectors.slack.utils import SlackTextCleaner\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_SLACK_LIMIT = 900\n\n\nclass SlackCheckpoint(ConnectorCheckpoint):\n    channel_ids: list[str] | None  # e.g. C8E6WHE2X\n\n    # channel id mapped to the timestamp we want to retrieve messages up to\n    # NOTE: this is usually the earliest timestamp of all the messages we have\n    # since we walk backwards\n    channel_completion_map: dict[str, str]\n    current_channel: ChannelType | None\n    current_channel_access: ExternalAccess | None\n\n    seen_thread_ts: list[\n        str\n    ]  # apparently we identify threads/messages uniquely by timestamp?\n\n\ndef _collect_paginated_channels(\n    client: WebClient,\n    exclude_archived: bool,\n    channel_types: list[str],\n) -> list[ChannelType]:\n    channels: list[ChannelType] = []\n    for result in make_paginated_slack_api_call(\n        client.conversations_list,\n        exclude_archived=exclude_archived,\n        # also get private channels the bot is added to\n        types=channel_types,\n    ):\n        channels.extend(result[\"channels\"])\n\n    return channels\n\n\ndef get_channels(\n    client: WebClient,\n    exclude_archived: bool = True,\n    get_public: bool = True,\n    get_private: bool = True,\n) -> list[ChannelType]:\n    \"\"\"Get all channels in the workspace.\"\"\"\n    channels: list[ChannelType] = []\n    channel_types = []\n    if get_public:\n        channel_types.append(\"public_channel\")\n    if get_private:\n        channel_types.append(\"private_channel\")\n    # Try fetching both public and private channels first:\n    try:\n        channels = _collect_paginated_channels(\n            client=client,\n            exclude_archived=exclude_archived,\n            channel_types=channel_types,\n        )\n    except SlackApiError as e:\n        msg = f\"Unable to fetch private channels due to: {e}.\"\n        if not get_public:\n            logger.warning(msg + \" Public channels are not enabled.\")\n            return []\n\n        logger.warning(msg + \" Trying again with public channels only.\")\n        channel_types = [\"public_channel\"]\n        channels = _collect_paginated_channels(\n            client=client,\n            exclude_archived=exclude_archived,\n            channel_types=channel_types,\n        )\n    return channels\n\n\ndef get_channel_messages(\n    client: WebClient,\n    channel: ChannelType,\n    oldest: str | None = None,\n    latest: str | None = None,\n    callback: IndexingHeartbeatInterface | None = None,\n) -> Generator[list[MessageType], None, None]:\n    \"\"\"Get all messages in a channel\"\"\"\n    # join so that the bot can access messages\n    if not channel[\"is_member\"]:\n        client.conversations_join(\n            channel=channel[\"id\"],\n            is_private=channel[\"is_private\"],\n        )\n        logger.info(f\"Successfully joined '{channel['name']}'\")\n\n    for result in make_paginated_slack_api_call(\n        client.conversations_history,\n        channel=channel[\"id\"],\n        oldest=oldest,\n        latest=latest,\n    ):\n        if callback:\n            if callback.should_stop():\n                raise RuntimeError(\"get_channel_messages: Stop signal detected\")\n\n            callback.progress(\"get_channel_messages\", 0)\n        yield cast(list[MessageType], result[\"messages\"])\n\n\ndef get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType:\n    \"\"\"Get all messages in a thread\"\"\"\n    threads: list[MessageType] = []\n    for result in make_paginated_slack_api_call(\n        client.conversations_replies, channel=channel_id, ts=thread_id\n    ):\n        threads.extend(result[\"messages\"])\n    return threads\n\n\ndef get_latest_message_time(thread: ThreadType) -> datetime:\n    max_ts = max([float(msg.get(\"ts\", 0)) for msg in thread])\n    return datetime.fromtimestamp(max_ts, tz=timezone.utc)\n\n\ndef _build_doc_id(channel_id: str, thread_ts: str) -> str:\n    return f\"{channel_id}__{thread_ts}\"\n\n\ndef thread_to_doc(\n    channel: ChannelType,\n    thread: ThreadType,\n    slack_cleaner: SlackTextCleaner,\n    client: WebClient,\n    user_cache: dict[str, BasicExpertInfo | None],\n    channel_access: ExternalAccess | None,\n) -> Document:\n    channel_id = channel[\"id\"]\n\n    initial_sender_expert_info = expert_info_from_slack_id(\n        user_id=thread[0].get(\"user\"), client=client, user_cache=user_cache\n    )\n    initial_sender_name = (\n        initial_sender_expert_info.get_semantic_name()\n        if initial_sender_expert_info\n        else \"Unknown\"\n    )\n\n    valid_experts = None\n    if ENABLE_EXPENSIVE_EXPERT_CALLS:\n        all_sender_ids = [m.get(\"user\") for m in thread]\n        experts = [\n            expert_info_from_slack_id(\n                user_id=sender_id, client=client, user_cache=user_cache\n            )\n            for sender_id in all_sender_ids\n            if sender_id\n        ]\n        valid_experts = [expert for expert in experts if expert]\n\n    first_message = slack_cleaner.index_clean(cast(str, thread[0][\"text\"]))\n    snippet = (\n        first_message[:50].rstrip() + \"...\"\n        if len(first_message) > 50\n        else first_message\n    )\n\n    doc_sem_id = f\"{initial_sender_name} in #{channel['name']}: {snippet}\".replace(\n        \"\\n\", \" \"\n    )\n\n    channel_name = channel[\"name\"]\n\n    return Document(\n        id=_build_doc_id(channel_id=channel_id, thread_ts=thread[0][\"ts\"]),\n        sections=[\n            TextSection(\n                link=get_message_link(event=m, client=client, channel_id=channel_id),\n                text=slack_cleaner.index_clean(cast(str, m[\"text\"])),\n            )\n            for m in thread\n        ],\n        source=DocumentSource.SLACK,\n        semantic_identifier=doc_sem_id,\n        doc_updated_at=get_latest_message_time(thread),\n        primary_owners=valid_experts,\n        doc_metadata={\n            \"hierarchy\": {\n                \"source_path\": [channel_name],\n                \"channel_name\": channel_name,\n                \"channel_id\": channel_id,\n            }\n        },\n        metadata={\"Channel\": channel_name},\n        external_access=channel_access,\n        parent_hierarchy_raw_node_id=channel_id,\n    )\n\n\n# list of subtypes can be found here: https://api.slack.com/events/message\n_DISALLOWED_MSG_SUBTYPES = {\n    \"channel_join\",\n    \"channel_leave\",\n    \"channel_archive\",\n    \"channel_unarchive\",\n    \"pinned_item\",\n    \"unpinned_item\",\n    \"ekm_access_denied\",\n    \"channel_posting_permissions\",\n    \"group_join\",\n    \"group_leave\",\n    \"group_archive\",\n    \"group_unarchive\",\n    \"channel_leave\",\n    \"channel_name\",\n    \"channel_join\",\n}\n\n\nclass SlackMessageFilterReason(str, Enum):\n    BOT = \"bot\"\n    DISALLOWED = \"disallowed\"\n\n\ndef default_msg_filter(message: MessageType) -> SlackMessageFilterReason | None:\n    \"\"\"Returns a filter reason if the message should be filtered out.\n    Returns None if the message can be kept.\n    \"\"\"\n\n    # Don't keep messages from bots\n    if message.get(\"bot_id\") or message.get(\"app_id\"):\n        bot_profile_name = message.get(\"bot_profile\", {}).get(\"name\")\n        if bot_profile_name == \"DanswerBot Testing\":\n            return None\n        return SlackMessageFilterReason.BOT\n\n    # Uninformative\n    if message.get(\"subtype\", \"\") in _DISALLOWED_MSG_SUBTYPES:\n        return SlackMessageFilterReason.DISALLOWED\n\n    return None\n\n\ndef _bot_inclusive_msg_filter(\n    message: MessageType,\n) -> SlackMessageFilterReason | None:\n    \"\"\"Like default_msg_filter but allows bot/app messages through.\n    Only filters out disallowed subtypes (channel_join, channel_leave, etc.).\n    \"\"\"\n    if message.get(\"subtype\", \"\") in _DISALLOWED_MSG_SUBTYPES:\n        return SlackMessageFilterReason.DISALLOWED\n\n    return None\n\n\ndef filter_channels(\n    all_channels: list[ChannelType],\n    channels_to_connect: list[str] | None,\n    regex_enabled: bool,\n) -> list[ChannelType]:\n    if not channels_to_connect:\n        return all_channels\n\n    if regex_enabled:\n        return [\n            channel\n            for channel in all_channels\n            if any(\n                re.fullmatch(channel_to_connect, channel[\"name\"])\n                for channel_to_connect in channels_to_connect\n            )\n        ]\n\n    # validate that all channels in `channels_to_connect` are valid\n    # fail loudly in the case of an invalid channel so that the user\n    # knows that one of the channels they've specified is typo'd or private\n    all_channel_names = {channel[\"name\"] for channel in all_channels}\n    for channel in channels_to_connect:\n        if channel not in all_channel_names:\n            raise ValueError(\n                f\"Channel '{channel}' not found in workspace. \"\n                f\"Available channels (Showing {len(all_channel_names)} of \"\n                f\"{min(len(all_channel_names), SlackConnector.MAX_CHANNELS_TO_LOG)}): \"\n                f\"{list(itertools.islice(all_channel_names, SlackConnector.MAX_CHANNELS_TO_LOG))}\"\n            )\n\n    return [\n        channel for channel in all_channels if channel[\"name\"] in channels_to_connect\n    ]\n\n\ndef _channel_to_hierarchy_node(\n    channel: ChannelType,\n    channel_access: ExternalAccess | None,\n    workspace_url: str | None = None,\n) -> HierarchyNode:\n    \"\"\"Convert a Slack channel to a HierarchyNode.\n\n    Args:\n        channel: The Slack channel object\n        channel_access: External access permissions for the channel\n        workspace_url: The workspace URL (e.g., https://myworkspace.slack.com)\n\n    Returns:\n        A HierarchyNode representing the channel\n    \"\"\"\n    # Link format: https://{workspace}.slack.com/archives/{channel_id}\n    link = f\"{workspace_url}/archives/{channel['id']}\" if workspace_url else None\n\n    return HierarchyNode(\n        raw_node_id=channel[\"id\"],\n        raw_parent_id=None,  # Direct child of SOURCE\n        display_name=f\"#{channel['name']}\",\n        link=link,\n        node_type=HierarchyNodeType.CHANNEL,\n        external_access=channel_access,\n    )\n\n\ndef _get_channel_by_id(client: WebClient, channel_id: str) -> ChannelType:\n    \"\"\"Get a channel by its ID.\n\n    Args:\n        client: The Slack WebClient instance\n        channel_id: The ID of the channel to fetch\n\n    Returns:\n        The channel information\n\n    Raises:\n        SlackApiError: If the channel cannot be fetched\n    \"\"\"\n    response = client.conversations_info(\n        channel=channel_id,\n    )\n    return cast(ChannelType, response[\"channel\"])\n\n\ndef _get_messages(\n    channel: ChannelType,\n    client: WebClient,\n    oldest: str | None = None,\n    latest: str | None = None,\n    limit: int = _SLACK_LIMIT,\n) -> tuple[list[MessageType], bool]:\n    \"\"\"Slack goes from newest to oldest.\"\"\"\n\n    # have to be in the channel in order to read messages\n    if not channel[\"is_member\"]:\n        try:\n            client.conversations_join(\n                channel=channel[\"id\"],\n                is_private=channel[\"is_private\"],\n            )\n        except SlackApiError as e:\n            if e.response[\"error\"] == \"is_archived\":\n                logger.warning(f\"Channel {channel['name']} is archived. Skipping.\")\n                return [], False\n\n            logger.exception(f\"Error joining channel {channel['name']}\")\n            raise\n        logger.info(f\"Successfully joined '{channel['name']}'\")\n\n    response = client.conversations_history(\n        channel=channel[\"id\"],\n        oldest=oldest,\n        latest=latest,\n        limit=limit,\n    )\n    response.validate()\n\n    messages = cast(list[MessageType], response.get(\"messages\", []))\n\n    cursor = cast(dict[str, Any], response.get(\"response_metadata\", {})).get(\n        \"next_cursor\", \"\"\n    )\n    has_more = bool(cursor)\n    return messages, has_more\n\n\ndef _message_to_doc(\n    message: MessageType,\n    client: WebClient,\n    channel: ChannelType,\n    slack_cleaner: SlackTextCleaner,\n    user_cache: dict[str, BasicExpertInfo | None],\n    seen_thread_ts: set[str],\n    channel_access: ExternalAccess | None,\n    msg_filter_func: Callable[\n        [MessageType], SlackMessageFilterReason | None\n    ] = default_msg_filter,\n) -> tuple[Document | None, SlackMessageFilterReason | None]:\n    \"\"\"Returns a doc or None.\n    If None is returned, the second element of the tuple may be a filter reason\n    \"\"\"\n    filtered_thread: ThreadType | None = None\n    filter_reason: SlackMessageFilterReason | None = None\n    thread_ts = message.get(\"thread_ts\")\n    if thread_ts:\n        # NOTE: if thread_ts is present, there's a thread we need to process\n        # ... otherwise, we can skip it\n\n        # skip threads we've already seen, since we've already processed all\n        # messages in that thread\n        if thread_ts in seen_thread_ts:\n            return None, None\n\n        thread = get_thread(\n            client=client, channel_id=channel[\"id\"], thread_id=thread_ts\n        )\n\n        # we'll just set and use the last filter reason if\n        # we bomb out later\n        filtered_thread = []\n        for message in thread:\n            filter_reason = msg_filter_func(message)\n            if filter_reason:\n                continue\n\n            filtered_thread.append(message)\n    else:\n        filter_reason = msg_filter_func(message)\n        if filter_reason:\n            return None, filter_reason\n\n        filtered_thread = [message]\n\n    # we'll just set and use the last filter reason if we get an empty list\n    if not filtered_thread:\n        return None, filter_reason\n\n    doc = thread_to_doc(\n        channel=channel,\n        thread=filtered_thread,\n        slack_cleaner=slack_cleaner,\n        client=client,\n        user_cache=user_cache,\n        channel_access=channel_access,\n    )\n    return doc, None\n\n\ndef _get_all_doc_ids(\n    client: WebClient,\n    channels: list[str] | None = None,\n    channel_name_regex_enabled: bool = False,\n    msg_filter_func: Callable[\n        [MessageType], SlackMessageFilterReason | None\n    ] = default_msg_filter,\n    callback: IndexingHeartbeatInterface | None = None,\n    workspace_url: str | None = None,\n    start: SecondsSinceUnixEpoch | None = None,\n    end: SecondsSinceUnixEpoch | None = None,\n) -> GenerateSlimDocumentOutput:\n    \"\"\"\n    Get all document ids in the workspace, channel by channel\n    This is pretty identical to get_all_docs, but it returns a set of ids instead of documents\n    This makes it an order of magnitude faster than get_all_docs\n    \"\"\"\n\n    all_channels = get_channels(client)\n    filtered_channels = filter_channels(\n        all_channels, channels, channel_name_regex_enabled\n    )\n    user_cache: dict[str, BasicExpertInfo | None] = {}\n\n    for channel in filtered_channels:\n        channel_id = channel[\"id\"]\n        # NOTE: external_access is a frozen object, so it's okay to safe to use a single\n        # instance for all documents in the channel\n        external_access = get_channel_access(\n            client=client,\n            channel=channel,\n            user_cache=user_cache,\n        )\n\n        # Yield the channel as a HierarchyNode first (before any documents)\n        yield [_channel_to_hierarchy_node(channel, external_access, workspace_url)]\n\n        channel_message_batches = get_channel_messages(\n            client=client,\n            channel=channel,\n            callback=callback,\n            oldest=str(start) if start else None,  # 0.0 -> None intentionally\n            latest=str(end) if end is not None else None,\n        )\n\n        for message_batch in channel_message_batches:\n            slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n            for message in message_batch:\n                filter_reason = msg_filter_func(message)\n                if filter_reason:\n                    continue\n\n                # The document id is the channel id and the ts of the first message in the thread\n                # Since we already have the first message of the thread, we dont have to\n                # fetch the thread for id retrieval, saving time and API calls\n\n                slim_doc_batch.append(\n                    SlimDocument(\n                        id=_build_doc_id(\n                            channel_id=channel_id, thread_ts=message[\"ts\"]\n                        ),\n                        external_access=external_access,\n                        parent_hierarchy_raw_node_id=channel_id,\n                    )\n                )\n\n            yield slim_doc_batch\n\n\nclass ProcessedSlackMessage(BaseModel):\n    doc: Document | None\n    # if the message is part of a thread, this is the thread_ts\n    # otherwise, this is the message_ts. Either way, will be a unique identifier.\n    # In the future, if the message becomes a thread, then the thread_ts\n    # will be set to the message_ts.\n    thread_or_message_ts: str\n\n    # if doc is None, filter_reason may be populated\n    filter_reason: SlackMessageFilterReason | None\n    failure: ConnectorFailure | None\n\n\ndef _process_message(\n    message: MessageType,\n    client: WebClient,\n    channel: ChannelType,\n    slack_cleaner: SlackTextCleaner,\n    user_cache: dict[str, BasicExpertInfo | None],\n    seen_thread_ts: set[str],\n    channel_access: ExternalAccess | None,\n    msg_filter_func: Callable[\n        [MessageType], SlackMessageFilterReason | None\n    ] = default_msg_filter,\n) -> ProcessedSlackMessage:\n    thread_ts = message.get(\"thread_ts\")\n    thread_or_message_ts = thread_ts or message[\"ts\"]\n    try:\n        # causes random failures for testing checkpointing / continue on failure\n        # import random\n        # if random.random() > 0.95:\n        #     raise RuntimeError(\"Random failure :P\")\n\n        doc, filter_reason = _message_to_doc(\n            message=message,\n            client=client,\n            channel=channel,\n            slack_cleaner=slack_cleaner,\n            user_cache=user_cache,\n            seen_thread_ts=seen_thread_ts,\n            channel_access=channel_access,\n            msg_filter_func=msg_filter_func,\n        )\n        return ProcessedSlackMessage(\n            doc=doc,\n            thread_or_message_ts=thread_or_message_ts,\n            filter_reason=filter_reason,\n            failure=None,\n        )\n    except Exception as e:\n        logger.exception(f\"Error processing message {message['ts']}\")\n        return ProcessedSlackMessage(\n            doc=None,\n            thread_or_message_ts=thread_or_message_ts,\n            filter_reason=None,\n            failure=ConnectorFailure(\n                failed_document=DocumentFailure(\n                    document_id=_build_doc_id(\n                        channel_id=channel[\"id\"], thread_ts=thread_or_message_ts\n                    ),\n                    document_link=get_message_link(message, client, channel[\"id\"]),\n                ),\n                failure_message=str(e),\n                exception=e,\n            ),\n        )\n\n\nclass SlackConnector(\n    SlimConnectorWithPermSync,\n    CredentialsConnector,\n    CheckpointedConnectorWithPermSync[SlackCheckpoint],\n):\n    FAST_TIMEOUT = 1\n\n    MAX_RETRIES = 7  # arbitrarily selected\n\n    MAX_CHANNELS_TO_LOG = 50\n\n    # *** values to use when filtering bot channels ***\n\n    # the number of messages in the batch must be greater than or equal to this number\n    # to consider filtering the channel\n    BOT_CHANNEL_MIN_BATCH_SIZE = 256\n\n    # the percentage of messages in the batch above which the channel will be considered\n    # a bot channel\n    BOT_CHANNEL_PERCENTAGE_THRESHOLD = 0.95\n\n    def __init__(\n        self,\n        channels: list[str] | None = None,\n        # if specified, will treat the specified channel strings as\n        # regexes, and will only index channels that fully match the regexes\n        channel_regex_enabled: bool = False,\n        # if True, messages from bots/apps will be indexed instead of filtered out\n        include_bot_messages: bool = False,\n        batch_size: int = INDEX_BATCH_SIZE,\n        num_threads: int = SLACK_NUM_THREADS,\n        use_redis: bool = True,\n    ) -> None:\n        self.channels = channels\n        self.channel_regex_enabled = channel_regex_enabled\n        self.include_bot_messages = include_bot_messages\n        self.msg_filter_func = (\n            _bot_inclusive_msg_filter if include_bot_messages else default_msg_filter\n        )\n        self.batch_size = batch_size\n        self.num_threads = num_threads\n        self.client: WebClient | None = None\n        self.fast_client: WebClient | None = None\n        # just used for efficiency\n        self.text_cleaner: SlackTextCleaner | None = None\n        self.user_cache: dict[str, BasicExpertInfo | None] = {}\n        self.credentials_provider: CredentialsProviderInterface | None = None\n        self.credential_prefix: str | None = None\n        self.use_redis: bool = use_redis\n        # Workspace URL for building channel links (e.g., https://myworkspace.slack.com)\n        self._workspace_url: str | None = None\n        # self.delay_lock: str | None = None  # the redis key for the shared lock\n        # self.delay_key: str | None = None  # the redis key for the shared delay\n\n    @classmethod\n    @override\n    def normalize_url(cls, url: str) -> NormalizationResult:\n        \"\"\"Normalize a Slack URL to extract channel_id__thread_ts format.\"\"\"\n        parsed = urlparse(url)\n        if \"slack.com\" not in parsed.netloc.lower():\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Slack document IDs are format: channel_id__thread_ts\n        # Extract from URL pattern: .../archives/{channel_id}/p{timestamp}\n        path_parts = parsed.path.split(\"/\")\n        if \"archives\" not in path_parts:\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        archives_idx = path_parts.index(\"archives\")\n        if archives_idx + 1 >= len(path_parts):\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        channel_id = path_parts[archives_idx + 1]\n        if archives_idx + 2 >= len(path_parts):\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        thread_part = path_parts[archives_idx + 2]\n        if not thread_part.startswith(\"p\"):\n            return NormalizationResult(normalized_url=None, use_default=False)\n\n        # Convert p1234567890123456 to 1234567890.123456 format\n        timestamp_str = thread_part[1:]  # Remove 'p' prefix\n        if len(timestamp_str) == 16:\n            # Insert dot at position 10 to match canonical format\n            thread_ts = f\"{timestamp_str[:10]}.{timestamp_str[10:]}\"\n        else:\n            thread_ts = timestamp_str\n\n        normalized = f\"{channel_id}__{thread_ts}\"\n        return NormalizationResult(normalized_url=normalized, use_default=False)\n\n    @staticmethod\n    def make_credential_prefix(key: str) -> str:\n        return f\"connector:slack:credential_{key}\"\n\n    @staticmethod\n    def make_delay_lock(prefix: str) -> str:\n        return f\"{prefix}:delay_lock\"\n\n    @staticmethod\n    def make_delay_key(prefix: str) -> str:\n        return f\"{prefix}:delay\"\n\n    @staticmethod\n    def make_slack_web_client(\n        prefix: str, token: str, max_retry_count: int, r: Redis\n    ) -> WebClient:\n        delay_lock = SlackConnector.make_delay_lock(prefix)\n        delay_key = SlackConnector.make_delay_key(prefix)\n\n        # NOTE: slack has a built in RateLimitErrorRetryHandler, but it isn't designed\n        # for concurrent workers. We've extended it with OnyxRedisSlackRetryHandler.\n        connection_error_retry_handler = ConnectionErrorRetryHandler(\n            max_retry_count=max_retry_count,\n            interval_calculator=FixedValueRetryIntervalCalculator(),\n            error_types=[\n                URLError,\n                ConnectionResetError,\n                RemoteDisconnected,\n                IncompleteRead,\n            ],\n        )\n\n        onyx_rate_limit_error_retry_handler = OnyxRedisSlackRetryHandler(\n            max_retry_count=max_retry_count,\n            delay_key=delay_key,\n            r=r,\n        )\n        custom_retry_handlers: list[RetryHandler] = [\n            connection_error_retry_handler,\n            onyx_rate_limit_error_retry_handler,\n        ]\n\n        client = OnyxSlackWebClient(\n            delay_lock=delay_lock,\n            delay_key=delay_key,\n            r=r,\n            token=token,\n            retry_handlers=custom_retry_handlers,\n        )\n        return client\n\n    @property\n    def channels(self) -> list[str] | None:\n        return self._channels\n\n    @channels.setter\n    def channels(self, channels: list[str] | None) -> None:\n        self._channels = (\n            [channel.removeprefix(\"#\") for channel in channels] if channels else None\n        )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        raise NotImplementedError(\"Use set_credentials_provider with this connector.\")\n\n    def set_credentials_provider(\n        self, credentials_provider: CredentialsProviderInterface\n    ) -> None:\n        credentials = credentials_provider.get_credentials()\n        tenant_id = credentials_provider.get_tenant_id()\n        if not tenant_id:\n            raise ValueError(\"tenant_id cannot be None!\")\n\n        bot_token = credentials[\"slack_bot_token\"]\n\n        if self.use_redis:\n            self.redis = get_redis_client(tenant_id=tenant_id)\n            self.credential_prefix = SlackConnector.make_credential_prefix(\n                credentials_provider.get_provider_key()\n            )\n\n            self.client = SlackConnector.make_slack_web_client(\n                self.credential_prefix, bot_token, self.MAX_RETRIES, self.redis\n            )\n        else:\n            connection_error_retry_handler = ConnectionErrorRetryHandler(\n                max_retry_count=self.MAX_RETRIES,\n                interval_calculator=FixedValueRetryIntervalCalculator(),\n                error_types=[\n                    URLError,\n                    ConnectionResetError,\n                    RemoteDisconnected,\n                    IncompleteRead,\n                ],\n            )\n\n            self.client = WebClient(\n                token=bot_token, retry_handlers=[connection_error_retry_handler]\n            )\n\n        # use for requests that must return quickly (e.g. realtime flows where user is waiting)\n        self.fast_client = WebClient(\n            token=bot_token, timeout=SlackConnector.FAST_TIMEOUT\n        )\n        self.text_cleaner = SlackTextCleaner(client=self.client)\n        self.credentials_provider = credentials_provider\n\n        # Extract workspace URL from auth_test response for building channel links\n        try:\n            auth_response = self.client.auth_test()\n            self._workspace_url = auth_response.get(\"url\")\n        except Exception as e:\n            logger.warning(f\"Failed to get workspace URL from auth_test: {e}\")\n            self._workspace_url = None\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        if self.client is None:\n            raise ConnectorMissingCredentialError(\"Slack\")\n\n        return _get_all_doc_ids(\n            client=self.client,\n            channels=self.channels,\n            channel_name_regex_enabled=self.channel_regex_enabled,\n            msg_filter_func=self.msg_filter_func,\n            callback=callback,\n            workspace_url=self._workspace_url,\n            start=start,\n            end=end,\n        )\n\n    def _load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SlackCheckpoint,\n        include_permissions: bool = False,\n    ) -> CheckpointOutput[SlackCheckpoint]:\n        \"\"\"Rough outline:\n\n        Step 1: Get all channels, yield back Checkpoint.\n        Step 2: Loop through each channel. For each channel:\n            Step 2.1: Get messages within the time range.\n            Step 2.2: Process messages in parallel, yield back docs.\n            Step 2.3: Update checkpoint with new_oldest, seen_thread_ts, and current_channel.\n                      Slack returns messages from newest to oldest, so we need to keep track of\n                      the latest message we've seen in each channel.\n            Step 2.4: If there are no more messages in the channel, switch the current\n                      channel to the next channel.\n        \"\"\"\n        num_channels_remaining = 0\n\n        if self.client is None or self.text_cleaner is None:\n            raise ConnectorMissingCredentialError(\"Slack\")\n\n        checkpoint = cast(SlackCheckpoint, copy.deepcopy(checkpoint))\n\n        # if this is the very first time we've called this, need to\n        # get all relevant channels and save them into the checkpoint\n        if checkpoint.channel_ids is None:\n            raw_channels = get_channels(self.client)\n            filtered_channels = filter_channels(\n                raw_channels, self.channels, self.channel_regex_enabled\n            )\n            logger.info(\n                f\"Channels - initial checkpoint: all={len(raw_channels)} post_filtering={len(filtered_channels)}\"\n            )\n\n            checkpoint.channel_ids = [c[\"id\"] for c in filtered_channels]\n            if len(filtered_channels) == 0:\n                checkpoint.has_more = False\n                return checkpoint\n\n            checkpoint.current_channel = filtered_channels[0]\n            if include_permissions:\n                # checkpoint.current_channel is guaranteed to be non-None here since we just assigned it\n                assert checkpoint.current_channel is not None\n                channel_access = get_channel_access(\n                    client=self.client,\n                    channel=checkpoint.current_channel,\n                    user_cache=self.user_cache,\n                )\n                checkpoint.current_channel_access = channel_access\n            checkpoint.has_more = True\n            return checkpoint\n\n        final_channel_ids = checkpoint.channel_ids\n        for channel_id in final_channel_ids:\n            if channel_id not in checkpoint.channel_completion_map:\n                num_channels_remaining += 1\n\n        logger.info(\n            f\"Channels - current status: \"\n            f\"processed={len(final_channel_ids) - num_channels_remaining} \"\n            f\"remaining={num_channels_remaining} \"\n            f\"total={len(final_channel_ids)}\"\n        )\n\n        channel = checkpoint.current_channel\n        if channel is None:\n            raise ValueError(\"current_channel key not set in checkpoint\")\n\n        channel_id = channel[\"id\"]\n        if channel_id not in final_channel_ids:\n            raise ValueError(f\"Channel {channel_id} not found in checkpoint\")\n\n        channel_created = channel[\"created\"]\n\n        seen_thread_ts = set(checkpoint.seen_thread_ts)\n\n        try:\n            num_bot_filtered_messages = 0\n            num_other_filtered_messages = 0\n\n            oldest = str(start) if start else None\n            latest = str(end)\n\n            channel_message_ts = checkpoint.channel_completion_map.get(channel_id)\n            if channel_message_ts:\n                # Set oldest to the checkpoint timestamp to resume from where we left off\n                oldest = channel_message_ts\n            else:\n                # First time processing this channel - yield its hierarchy node\n                yield _channel_to_hierarchy_node(\n                    channel,\n                    checkpoint.current_channel_access,\n                    self._workspace_url,\n                )\n\n            logger.debug(\n                f\"Getting messages for channel {channel} within range {oldest} - {latest}\"\n            )\n\n            message_batch, has_more_in_channel = _get_messages(\n                channel, self.client, oldest, latest\n            )\n\n            logger.info(\n                f\"Retrieved messages: {len(message_batch)=} {channel=} {oldest=} {latest=}\"\n            )\n\n            # message_batch[0] is the newest message (Slack returns newest to oldest)\n            new_oldest = message_batch[0][\"ts\"] if message_batch else latest\n\n            num_threads_start = len(seen_thread_ts)\n\n            # Process messages in parallel using ThreadPoolExecutor\n            with ThreadPoolExecutor(max_workers=self.num_threads) as executor:\n                # NOTE(rkuo): this seems to be assuming the slack sdk is thread safe.\n                # That's a very bold assumption! Haven't seen a direct issue with this\n                # yet, but likely not correct to rely on.\n\n                futures: list[Future[ProcessedSlackMessage]] = []\n                for message in message_batch:\n                    # Capture the current context so that the thread gets the current tenant ID\n                    current_context = contextvars.copy_context()\n                    futures.append(\n                        executor.submit(\n                            current_context.run,\n                            _process_message,\n                            message=message,\n                            client=self.client,\n                            channel=channel,\n                            slack_cleaner=self.text_cleaner,\n                            user_cache=self.user_cache,\n                            seen_thread_ts=seen_thread_ts,\n                            channel_access=checkpoint.current_channel_access,\n                            msg_filter_func=self.msg_filter_func,\n                        )\n                    )\n\n                for future in as_completed(futures):\n                    processed_slack_message = future.result()\n                    doc = processed_slack_message.doc\n                    thread_or_message_ts = processed_slack_message.thread_or_message_ts\n                    failure = processed_slack_message.failure\n                    if doc:\n                        # handle race conditions here since this is single\n                        # threaded. Multi-threaded _process_message reads from this\n                        # but since this is single threaded, we won't run into simul\n                        # writes. At worst, we can duplicate a thread, which will be\n                        # deduped later on.\n                        if thread_or_message_ts not in seen_thread_ts:\n                            yield doc\n\n                        seen_thread_ts.add(thread_or_message_ts)\n                    elif processed_slack_message.filter_reason:\n                        if (\n                            processed_slack_message.filter_reason\n                            == SlackMessageFilterReason.BOT\n                        ):\n                            num_bot_filtered_messages += 1\n                        else:\n                            num_other_filtered_messages += 1\n                    elif failure:\n                        yield failure\n\n            num_threads_processed = len(seen_thread_ts) - num_threads_start\n\n            # calculate a percentage progress for the current channel by determining\n            # how much of the time range we've processed so far\n            new_oldest_seconds_epoch = SecondsSinceUnixEpoch(new_oldest)\n            range_start = start if start else max(0, channel_created)\n            if new_oldest_seconds_epoch < range_start:\n                range_complete = 0.0\n            else:\n                range_complete = new_oldest_seconds_epoch - range_start\n\n            range_total = end - range_start\n            if range_total <= 0:\n                range_total = 1\n            range_percent_complete = range_complete / range_total * 100.0\n\n            num_filtered = num_bot_filtered_messages + num_other_filtered_messages\n            log_func = logger.warning if num_bot_filtered_messages > 0 else logger.info\n            log_func(\n                f\"Message processing stats: \"\n                f\"batch_len={len(message_batch)} \"\n                f\"batch_yielded={num_threads_processed} \"\n                f\"filtered={num_filtered} \"\n                f\"(bot={num_bot_filtered_messages} other={num_other_filtered_messages}) \"\n                f\"total_threads_seen={len(seen_thread_ts)}\"\n            )\n\n            logger.info(\n                f\"Current channel processing stats: {range_start=} range_end={end} percent_complete={range_percent_complete=:.2f}\"\n            )\n\n            checkpoint.seen_thread_ts = list(seen_thread_ts)\n            checkpoint.channel_completion_map[channel[\"id\"]] = new_oldest\n\n            # bypass channels where the first set of messages seen are all\n            # filtered (bots + disallowed subtypes like channel_join)\n            # check at least MIN_BOT_MESSAGE_THRESHOLD messages are in the batch\n            # we shouldn't skip based on a small sampling of messages\n            if (\n                channel_message_ts is None\n                and len(message_batch) > SlackConnector.BOT_CHANNEL_MIN_BATCH_SIZE\n            ):\n                if (\n                    num_filtered\n                    > SlackConnector.BOT_CHANNEL_PERCENTAGE_THRESHOLD\n                    * len(message_batch)\n                ):\n                    logger.warning(\n                        \"Bypassing this channel since it appears to be mostly bot messages\"\n                    )\n                    has_more_in_channel = False\n\n            if not has_more_in_channel:\n                num_channels_remaining -= 1\n\n                new_channel_id = next(\n                    (\n                        channel_id\n                        for channel_id in final_channel_ids\n                        if channel_id not in checkpoint.channel_completion_map\n                    ),\n                    None,\n                )\n\n                if new_channel_id:\n                    new_channel = _get_channel_by_id(self.client, new_channel_id)\n                    checkpoint.current_channel = new_channel\n                    if include_permissions:\n                        channel_access = get_channel_access(\n                            client=self.client,\n                            channel=new_channel,\n                            user_cache=self.user_cache,\n                        )\n                        checkpoint.current_channel_access = channel_access\n                else:\n                    checkpoint.current_channel = None\n\n            checkpoint.has_more = checkpoint.current_channel is not None\n\n            channels_processed = len(final_channel_ids) - num_channels_remaining\n            channels_percent_complete = (\n                channels_processed / len(final_channel_ids) * 100.0\n            )\n            logger.info(\n                f\"All channels processing stats: \"\n                f\"processed={len(final_channel_ids) - num_channels_remaining} \"\n                f\"remaining={num_channels_remaining} \"\n                f\"total={len(final_channel_ids)} \"\n                f\"percent_complete={channels_percent_complete:.2f}\"\n            )\n        except Exception as e:\n            logger.exception(f\"Error processing channel {channel['name']}\")\n            yield ConnectorFailure(\n                failed_entity=EntityFailure(\n                    entity_id=channel[\"id\"],\n                    missed_time_range=(\n                        datetime.fromtimestamp(start, tz=timezone.utc),\n                        datetime.fromtimestamp(end, tz=timezone.utc),\n                    ),\n                ),\n                failure_message=str(e),\n                exception=e,\n            )\n\n        return checkpoint\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SlackCheckpoint,\n    ) -> CheckpointOutput[SlackCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=False\n        )\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: SlackCheckpoint,\n    ) -> CheckpointOutput[SlackCheckpoint]:\n        return self._load_from_checkpoint(\n            start, end, checkpoint, include_permissions=True\n        )\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"\n        1. Verify the bot token is valid for the workspace (via auth_test).\n        2. Ensure the bot has enough scope to list channels.\n        3. Check that every channel specified in self.channels exists (only when regex is not enabled).\n        \"\"\"\n        if self.fast_client is None:\n            raise ConnectorMissingCredentialError(\"Slack credentials not loaded.\")\n\n        try:\n            # 1) Validate connection to workspace\n            auth_response = self.fast_client.auth_test()\n            if not auth_response.get(\"ok\", False):\n                error_msg = auth_response.get(\n                    \"error\", \"Unknown error from Slack auth_test\"\n                )\n                raise ConnectorValidationError(f\"Failed Slack auth_test: {error_msg}\")\n\n            # 2) Minimal test to confirm listing channels works\n            test_resp = self.fast_client.conversations_list(\n                limit=1, types=[\"public_channel\"]\n            )\n            if not test_resp.get(\"ok\", False):\n                error_msg = test_resp.get(\"error\", \"Unknown error from Slack\")\n                if error_msg == \"invalid_auth\":\n                    raise ConnectorValidationError(\n                        f\"Invalid Slack bot token ({error_msg}).\"\n                    )\n                elif error_msg == \"not_authed\":\n                    raise CredentialExpiredError(\n                        f\"Invalid or expired Slack bot token ({error_msg}).\"\n                    )\n                raise UnexpectedValidationError(\n                    f\"Slack API returned a failure: {error_msg}\"\n                )\n\n            # 3) If channels are specified and regex is not enabled, verify each is accessible\n            # NOTE: removed this for now since it may be too slow for large workspaces which may\n            # have some automations which create a lot of channels (100k+)\n\n            # if self.channels and not self.channel_regex_enabled:\n            #     accessible_channels = get_channels(\n            #         client=self.fast_client,\n            #         exclude_archived=True,\n            #         get_public=True,\n            #         get_private=True,\n            #     )\n            #     # For quick lookups by name or ID, build a map:\n            #     accessible_channel_names = {ch[\"name\"] for ch in accessible_channels}\n            #     accessible_channel_ids = {ch[\"id\"] for ch in accessible_channels}\n\n            #     for user_channel in self.channels:\n            #         if (\n            #             user_channel not in accessible_channel_names\n            #             and user_channel not in accessible_channel_ids\n            #         ):\n            #             raise ConnectorValidationError(\n            #                 f\"Channel '{user_channel}' not found or inaccessible in this workspace.\"\n            #             )\n\n        except SlackApiError as e:\n            slack_error = e.response.get(\"error\", \"\")\n            if slack_error == \"ratelimited\":\n                # Handle rate limiting specifically\n                retry_after = int(e.response.headers.get(\"Retry-After\", 1))\n                logger.warning(\n                    f\"Slack API rate limited during validation. Retry suggested after {retry_after} seconds. \"\n                    \"Proceeding with validation, but be aware that connector operations might be throttled.\"\n                )\n                # Continue validation without failing - the connector is likely valid but just rate limited\n                return\n            elif slack_error == \"missing_scope\":\n                raise InsufficientPermissionsError(\n                    \"Slack bot token lacks the necessary scope to list/access channels. \"\n                    \"Please ensure your Slack app has 'channels:read' (and/or 'groups:read' for private channels).\"\n                )\n            elif slack_error == \"invalid_auth\":\n                raise CredentialExpiredError(\n                    f\"Invalid Slack bot token ({slack_error}).\"\n                )\n            elif slack_error == \"not_authed\":\n                raise CredentialExpiredError(\n                    f\"Invalid or expired Slack bot token ({slack_error}).\"\n                )\n            raise UnexpectedValidationError(\n                f\"Unexpected Slack error '{slack_error}' during settings validation.\"\n            )\n        except ConnectorValidationError as e:\n            raise e\n        except Exception as e:\n            raise UnexpectedValidationError(\n                f\"Unexpected error during Slack settings validation: {e}\"\n            )\n\n    @override\n    def build_dummy_checkpoint(self) -> SlackCheckpoint:\n        return SlackCheckpoint(\n            channel_ids=None,\n            channel_completion_map={},\n            current_channel=None,\n            current_channel_access=None,\n            seen_thread_ts=[],\n            has_more=True,\n        )\n\n    @override\n    def validate_checkpoint_json(self, checkpoint_json: str) -> SlackCheckpoint:\n        return SlackCheckpoint.model_validate_json(checkpoint_json)\n\n\nif __name__ == \"__main__\":\n    import os\n    import time\n    from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\n    from shared_configs.contextvars import get_current_tenant_id\n\n    slack_channel = os.environ.get(\"SLACK_CHANNEL\")\n    connector = SlackConnector(\n        channels=[slack_channel] if slack_channel else None,\n    )\n\n    provider = OnyxStaticCredentialsProvider(\n        tenant_id=get_current_tenant_id(),\n        connector_name=\"slack\",\n        credential_json={\n            \"slack_bot_token\": os.environ[\"SLACK_BOT_TOKEN\"],\n        },\n    )\n    connector.set_credentials_provider(provider)\n\n    current = time.time()\n    one_day_ago = current - 24 * 60 * 60  # 1 day\n\n    checkpoint = connector.build_dummy_checkpoint()\n\n    gen = connector.load_from_checkpoint(\n        one_day_ago,\n        current,\n        cast(SlackCheckpoint, checkpoint),\n    )\n    try:\n        for document_or_failure in gen:\n            if isinstance(document_or_failure, Document):\n                print(document_or_failure)\n            elif isinstance(document_or_failure, ConnectorFailure):\n                print(document_or_failure)\n    except StopIteration as e:\n        checkpoint = e.value\n        print(\"Next checkpoint:\", checkpoint)\n\n    print(\"Next checkpoint:\", checkpoint)\n"
  },
  {
    "path": "backend/onyx/connectors/slack/models.py",
    "content": "from typing import NotRequired\n\nfrom typing_extensions import TypedDict\n\n\nclass ChannelTopicPurposeType(TypedDict):\n    \"\"\"\n    Represents the topic or purpose of a Slack channel.\n    \"\"\"\n\n    value: str\n    creator: str\n    last_set: int\n\n\nclass ChannelType(TypedDict):\n    \"\"\"\n    Represents a Slack channel.\n    \"\"\"\n\n    id: str\n    name: str\n    is_channel: bool\n    is_group: bool\n    is_im: bool\n    created: int\n    creator: str\n    is_archived: bool\n    is_general: bool\n    unlinked: int\n    name_normalized: str\n    is_shared: bool\n    is_ext_shared: bool\n    is_org_shared: bool\n    pending_shared: list[str]\n    is_pending_ext_shared: bool\n    is_member: bool\n    is_private: bool\n    is_mpim: bool\n    updated: int\n    topic: ChannelTopicPurposeType\n    purpose: ChannelTopicPurposeType\n    previous_names: list[str]\n    num_members: int\n\n\nclass AttachmentType(TypedDict):\n    \"\"\"\n    Represents a Slack message attachment.\n    \"\"\"\n\n    service_name: NotRequired[str]\n    text: NotRequired[str]\n    fallback: NotRequired[str]\n    thumb_url: NotRequired[str]\n    thumb_width: NotRequired[int]\n    thumb_height: NotRequired[int]\n    id: NotRequired[int]\n\n\nclass BotProfileType(TypedDict):\n    \"\"\"\n    Represents a Slack bot profile.\n    \"\"\"\n\n    id: NotRequired[str]\n    deleted: NotRequired[bool]\n    name: NotRequired[str]\n    updated: NotRequired[int]\n    app_id: NotRequired[str]\n    team_id: NotRequired[str]\n\n\nclass MessageType(TypedDict):\n    \"\"\"\n    Represents a Slack message.\n    \"\"\"\n\n    type: str\n    user: str\n    text: str\n    ts: str\n    attachments: NotRequired[list[AttachmentType]]\n    # Bot-related fields\n    bot_id: NotRequired[str]\n    app_id: NotRequired[str]\n    bot_profile: NotRequired[BotProfileType]\n    # Message threading\n    thread_ts: NotRequired[str]\n    # Message subtype (for filtering certain message types)\n    subtype: NotRequired[str]\n\n\n# list of messages in a thread\nThreadType = list[MessageType]\n"
  },
  {
    "path": "backend/onyx/connectors/slack/onyx_retry_handler.py",
    "content": "import random\nfrom typing import cast\nfrom typing import Optional\n\nfrom redis import Redis\nfrom slack_sdk.http_retry.handler import RetryHandler\nfrom slack_sdk.http_retry.request import HttpRequest\nfrom slack_sdk.http_retry.response import HttpResponse\nfrom slack_sdk.http_retry.state import RetryState\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass OnyxRedisSlackRetryHandler(RetryHandler):\n    \"\"\"\n    This class uses Redis to share a rate limit among multiple threads.\n\n    As currently implemented, this code is already surrounded by a lock in Redis\n    via an override of _perform_urllib_http_request in OnyxSlackWebClient.\n\n    This just sets the desired retry delay with TTL in redis. In conjunction with\n    a custom subclass of the client, the value is read and obeyed prior to an API call\n    and also serialized.\n\n    Another way to do this is just to do exponential backoff. Might be easier?\n\n    Adapted from slack's RateLimitErrorRetryHandler.\n    \"\"\"\n\n    \"\"\"RetryHandler that does retries for rate limited errors.\"\"\"\n\n    def __init__(\n        self,\n        max_retry_count: int,\n        delay_key: str,\n        r: Redis,\n    ):\n        \"\"\"\n        delay_lock: the redis key to use with RedisLock (to synchronize access to delay_key)\n        delay_key: the redis key containing a shared TTL\n        \"\"\"\n        super().__init__(max_retry_count=max_retry_count)\n        self._redis: Redis = r\n        self._delay_key = delay_key\n\n    def _can_retry(\n        self,\n        *,\n        state: RetryState,  # noqa: ARG002\n        request: HttpRequest,  # noqa: ARG002\n        response: Optional[HttpResponse] = None,\n        error: Optional[Exception] = None,  # noqa: ARG002\n    ) -> bool:\n        return response is not None and response.status_code == 429\n\n    def prepare_for_next_attempt(\n        self,\n        *,\n        state: RetryState,\n        request: HttpRequest,  # noqa: ARG002\n        response: Optional[HttpResponse] = None,\n        error: Optional[Exception] = None,\n    ) -> None:\n        \"\"\"As initially designed by the SDK authors, this function is responsible for\n        the wait to retry ... aka we actually sleep in this function.\n\n        This doesn't work well with multiple clients because every thread is unaware\n        of the current retry value until it actually calls the endpoint.\n\n        We're combining this with an actual subclass of the slack web client so\n        that the delay is used BEFORE calling an API endpoint. The subclassed client\n        has already taken the lock in redis when this method is called.\n        \"\"\"\n        ttl_ms: int | None = None\n\n        retry_after_value: str | None = None\n        retry_after_header_name: Optional[str] = None\n        duration_s: float = 1.0  # seconds\n\n        if response is None:\n            # NOTE(rkuo): this logic comes from RateLimitErrorRetryHandler.\n            # This reads oddly, as if the caller itself could raise the exception.\n            # We don't have the luxury of changing this.\n            if error:\n                raise error\n\n            return\n\n        state.next_attempt_requested = True  # this signals the caller to retry\n\n        # calculate wait duration based on retry-after + some jitter\n        for k in response.headers.keys():\n            if k.lower() == \"retry-after\":\n                retry_after_header_name = k\n                break\n\n        try:\n            if retry_after_header_name is None:\n                # This situation usually does not arise. Just in case.\n                raise ValueError(\n                    \"OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header name is None\"\n                )\n\n            retry_after_header_value = response.headers.get(retry_after_header_name)\n            if not retry_after_header_value:\n                raise ValueError(\n                    \"OnyxRedisSlackRetryHandler.prepare_for_next_attempt: retry-after header value is None\"\n                )\n\n            # Handle case where header value might be a list\n            retry_after_value = (\n                retry_after_header_value[0]\n                if isinstance(retry_after_header_value, list)\n                else retry_after_header_value\n            )\n\n            retry_after_value_int = int(\n                retry_after_value\n            )  # will raise ValueError if somehow we can't convert to int\n            jitter = retry_after_value_int * 0.25 * random.random()\n            duration_s = retry_after_value_int + jitter\n        except ValueError:\n            duration_s += random.random()\n\n        # Read and extend the ttl\n        ttl_ms = cast(int, self._redis.pttl(self._delay_key))\n        if ttl_ms < 0:  # negative values are error status codes ... see docs\n            ttl_ms = 0\n        ttl_ms_new = ttl_ms + int(duration_s * 1000.0)\n        self._redis.set(self._delay_key, \"1\", px=ttl_ms_new)\n\n        logger.warning(\n            f\"OnyxRedisSlackRetryHandler.prepare_for_next_attempt setting delay: \"\n            f\"current_attempt={state.current_attempt} \"\n            f\"retry-after={retry_after_value} \"\n            f\"{ttl_ms_new=}\"\n        )\n\n        state.increment_current_attempt()\n"
  },
  {
    "path": "backend/onyx/connectors/slack/onyx_slack_web_client.py",
    "content": "import threading\nimport time\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\nfrom urllib.request import Request\n\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom slack_sdk import WebClient\n\nfrom onyx.connectors.slack.utils import ONYX_SLACK_LOCK_BLOCKING_TIMEOUT\nfrom onyx.connectors.slack.utils import ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT\nfrom onyx.connectors.slack.utils import ONYX_SLACK_LOCK_TTL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass OnyxSlackWebClient(WebClient):\n    \"\"\"Use in combination with the Onyx Retry Handler.\n\n    This client wrapper enforces a proper retry delay through redis BEFORE the api call\n    so that multiple clients can synchronize and rate limit properly.\n\n    The retry handler writes the correct delay value to redis so that it is can be used\n    by this wrapper.\n\n    \"\"\"\n\n    def __init__(\n        self, delay_lock: str, delay_key: str, r: Redis, *args: Any, **kwargs: Any\n    ) -> None:\n        super().__init__(*args, **kwargs)\n        self._delay_key = delay_key\n        self._delay_lock = delay_lock\n        self._redis: Redis = r\n        self.num_requests: int = 0\n        self._lock = threading.Lock()\n\n    def _perform_urllib_http_request(\n        self, *, url: str, args: Dict[str, Dict[str, Any]]\n    ) -> Dict[str, Any]:\n        \"\"\"By locking around the base class method, we ensure that both the delay from\n        Redis and parsing/writing of retry values to Redis are handled properly in\n        one place\"\"\"\n        # lock and extend the ttl\n        lock: RedisLock = self._redis.lock(\n            self._delay_lock,\n            timeout=ONYX_SLACK_LOCK_TTL,\n        )\n\n        # try to acquire the lock\n        start = time.monotonic()\n        while True:\n            acquired = lock.acquire(blocking_timeout=ONYX_SLACK_LOCK_BLOCKING_TIMEOUT)\n            if acquired:\n                break\n\n            # if we couldn't acquire the lock but it exists, there's at least some activity\n            # so keep trying...\n            if self._redis.exists(self._delay_lock):\n                continue\n\n            if time.monotonic() - start > ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT:\n                raise RuntimeError(\n                    f\"OnyxSlackWebClient._perform_urllib_http_request - \"\n                    f\"timed out waiting for lock: {ONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT=}\"\n                )\n\n        try:\n            result = super()._perform_urllib_http_request(url=url, args=args)\n        finally:\n            if lock.owned():\n                lock.release()\n            else:\n                logger.warning(\n                    \"OnyxSlackWebClient._perform_urllib_http_request lock not owned on release\"\n                )\n\n        time.monotonic() - start\n        # logger.info(\n        #     f\"OnyxSlackWebClient._perform_urllib_http_request: Releasing lock: {elapsed=}\"\n        # )\n\n        return result\n\n    def _perform_urllib_http_request_internal(\n        self,\n        url: str,\n        req: Request,\n    ) -> Dict[str, Any]:\n        \"\"\"Overrides the internal method which is mostly the direct call to\n        urllib/urlopen ... so this is a good place to perform our delay.\"\"\"\n\n        # read and execute the delay\n        delay_ms = cast(int, self._redis.pttl(self._delay_key))\n        if delay_ms < 0:  # negative values are error status codes ... see docs\n            delay_ms = 0\n\n        if delay_ms > 0:\n            logger.warning(\n                f\"OnyxSlackWebClient._perform_urllib_http_request_internal delay: {delay_ms=} {self.num_requests=}\"\n            )\n\n            time.sleep(delay_ms / 1000.0)\n\n        result = super()._perform_urllib_http_request_internal(url, req)\n\n        with self._lock:\n            self.num_requests += 1\n\n        # the delay key should have naturally expired by this point\n        return result\n"
  },
  {
    "path": "backend/onyx/connectors/slack/utils.py",
    "content": "import re\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom functools import lru_cache\nfrom functools import wraps\nfrom typing import Any\nfrom typing import cast\n\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\nfrom slack_sdk.web import SlackResponse\n\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.slack.models import MessageType\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n# retry after 0.1, 1.2, 3.4, 7.8, 16.6, 34.2 seconds\nbasic_retry_wrapper = retry_builder(tries=7)\n# number of messages we request per page when fetching paginated slack messages\n_SLACK_LIMIT = 900\n\n# used to serialize access to the retry TTL\nONYX_SLACK_LOCK_TTL = 1800  # how long the lock is allowed to idle before it expires\nONYX_SLACK_LOCK_BLOCKING_TIMEOUT = 60  # how long to wait for the lock per wait attempt\nONYX_SLACK_LOCK_TOTAL_BLOCKING_TIMEOUT = 3600  # how long to wait for the lock in total\n\n\n@lru_cache()\ndef get_base_url(token: str) -> str:\n    \"\"\"Retrieve and cache the base URL of the Slack workspace based on the client token.\"\"\"\n    client = WebClient(token=token)\n    return client.auth_test()[\"url\"]\n\n\ndef get_message_link(event: MessageType, client: WebClient, channel_id: str) -> str:\n    message_ts = event[\"ts\"]\n    message_ts_without_dot = message_ts.replace(\".\", \"\")\n    thread_ts = event.get(\"thread_ts\")\n    base_url = get_base_url(client.token)\n\n    link = f\"{base_url.rstrip('/')}/archives/{channel_id}/p{message_ts_without_dot}\" + (\n        f\"?thread_ts={thread_ts}\" if thread_ts else \"\"\n    )\n    return link\n\n\ndef make_slack_api_call(\n    call: Callable[..., SlackResponse], **kwargs: Any\n) -> SlackResponse:\n    return call(**kwargs)\n\n\ndef make_paginated_slack_api_call(\n    call: Callable[..., SlackResponse], **kwargs: Any\n) -> Generator[dict[str, Any], None, None]:\n    return _make_slack_api_call_paginated(call)(**kwargs)\n\n\ndef _make_slack_api_call_paginated(\n    call: Callable[..., SlackResponse],\n) -> Callable[..., Generator[dict[str, Any], None, None]]:\n    \"\"\"Wraps calls to slack API so that they automatically handle pagination\"\"\"\n\n    @wraps(call)\n    def paginated_call(**kwargs: Any) -> Generator[dict[str, Any], None, None]:\n        cursor: str | None = None\n        has_more = True\n        while has_more:\n            response = call(cursor=cursor, limit=_SLACK_LIMIT, **kwargs)\n            yield cast(dict[str, Any], response.validate())\n            cursor = cast(dict[str, Any], response.get(\"response_metadata\", {})).get(\n                \"next_cursor\", \"\"\n            )\n            has_more = bool(cursor)\n\n    return paginated_call\n\n\n# NOTE(rkuo): we may not need this any more if the integrated retry handlers work as\n# expected.  Do we want to keep this around?\n\n# def make_slack_api_rate_limited(\n#     call: Callable[..., SlackResponse], max_retries: int = 7\n# ) -> Callable[..., SlackResponse]:\n#     \"\"\"Wraps calls to slack API so that they automatically handle rate limiting\"\"\"\n\n#     @wraps(call)\n#     def rate_limited_call(**kwargs: Any) -> SlackResponse:\n#         last_exception = None\n\n#         for _ in range(max_retries):\n#             try:\n#                 # Make the API call\n#                 response = call(**kwargs)\n\n#                 # Check for errors in the response, will raise `SlackApiError`\n#                 # if anything went wrong\n#                 response.validate()\n#                 return response\n\n#             except SlackApiError as e:\n#                 last_exception = e\n#                 try:\n#                     error = e.response[\"error\"]\n#                 except KeyError:\n#                     error = \"unknown error\"\n\n#                 if error == \"ratelimited\":\n#                     # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration\n#                     retry_after = int(e.response.headers.get(\"Retry-After\", 1))\n#                     logger.info(\n#                         f\"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}\"\n#                     )\n#                     time.sleep(retry_after)\n#                 elif error in [\"already_reacted\", \"no_reaction\", \"internal_error\"]:\n#                     # Log internal_error and return the response instead of failing\n#                     logger.warning(\n#                         f\"Slack call encountered '{error}', skipping and continuing...\"\n#                     )\n#                     return e.response\n#                 else:\n#                     # Raise the error for non-transient errors\n#                     raise\n\n#         # If the code reaches this point, all retries have been exhausted\n#         msg = f\"Max retries ({max_retries}) exceeded\"\n#         if last_exception:\n#             raise Exception(msg) from last_exception\n#         else:\n#             raise Exception(msg)\n\n#     return rate_limited_call\n\n# temporarily disabling due to using a different retry approach\n# might be permanent if everything works out\n# def make_slack_api_call_w_retries(\n#     call: Callable[..., SlackResponse], **kwargs: Any\n# ) -> SlackResponse:\n#     return basic_retry_wrapper(call)(**kwargs)\n\n\n# def make_paginated_slack_api_call_w_retries(\n#     call: Callable[..., SlackResponse], **kwargs: Any\n# ) -> Generator[dict[str, Any], None, None]:\n#     return _make_slack_api_call_paginated(basic_retry_wrapper(call))(**kwargs)\n\n\ndef expert_info_from_slack_id(\n    user_id: str | None,\n    client: WebClient,\n    user_cache: dict[str, BasicExpertInfo | None],\n) -> BasicExpertInfo | None:\n    if not user_id:\n        return None\n\n    if user_id in user_cache:\n        return user_cache[user_id]\n\n    response = client.users_info(user=user_id)\n\n    if not response[\"ok\"]:\n        user_cache[user_id] = None\n        return None\n\n    user: dict = cast(dict[Any, dict], response.data).get(\"user\", {})\n    profile = user.get(\"profile\", {})\n\n    expert = BasicExpertInfo(\n        display_name=user.get(\"real_name\") or profile.get(\"display_name\"),\n        first_name=profile.get(\"first_name\"),\n        last_name=profile.get(\"last_name\"),\n        email=profile.get(\"email\"),\n    )\n\n    user_cache[user_id] = expert\n\n    return expert\n\n\nclass SlackTextCleaner:\n    \"\"\"Utility class to replace user IDs with usernames in a message.\n    Handles caching, so the same request is not made multiple times\n    for the same user ID\"\"\"\n\n    def __init__(self, client: WebClient) -> None:\n        self._client = client\n        self._id_to_name_map: dict[str, str] = {}\n\n    def _get_slack_name(self, user_id: str) -> str:\n        if user_id not in self._id_to_name_map:\n            try:\n                response = self._client.users_info(user=user_id)\n                # prefer display name if set, since that is what is shown in Slack\n                self._id_to_name_map[user_id] = (\n                    response[\"user\"][\"profile\"][\"display_name\"]\n                    or response[\"user\"][\"profile\"][\"real_name\"]\n                )\n            except SlackApiError as e:\n                logger.exception(\n                    f\"Error fetching data for user {user_id}: {e.response['error']}\"\n                )\n                raise\n\n        return self._id_to_name_map[user_id]\n\n    def _replace_user_ids_with_names(self, message: str) -> str:\n        # Find user IDs in the message\n        user_ids = re.findall(\"<@(.*?)>\", message)\n\n        # Iterate over each user ID found\n        for user_id in user_ids:\n            try:\n                if user_id in self._id_to_name_map:\n                    user_name = self._id_to_name_map[user_id]\n                else:\n                    user_name = self._get_slack_name(user_id)\n\n                # Replace the user ID with the username in the message\n                message = message.replace(f\"<@{user_id}>\", f\"@{user_name}\")\n            except Exception:\n                logger.exception(\n                    f\"Unable to replace user ID with username for user_id '{user_id}'\"\n                )\n\n        return message\n\n    def index_clean(self, message: str) -> str:\n        \"\"\"During indexing, replace pattern sets that may cause confusion to the model\n        Some special patterns are left in as they can provide information\n        ie. links that contain format text|link, both the text and the link may be informative\n        \"\"\"\n        message = self._replace_user_ids_with_names(message)\n        message = self.replace_tags_basic(message)\n        message = self.replace_channels_basic(message)\n        message = self.replace_special_mentions(message)\n        message = self.replace_special_catchall(message)\n        return message\n\n    @staticmethod\n    def replace_tags_basic(message: str) -> str:\n        \"\"\"Simply replaces all tags with `@<USER_ID>` in order to prevent us from\n        tagging users in Slack when we don't want to\"\"\"\n        # Find user IDs in the message\n        user_ids = re.findall(\"<@(.*?)>\", message)\n        for user_id in user_ids:\n            message = message.replace(f\"<@{user_id}>\", f\"@{user_id}\")\n        return message\n\n    @staticmethod\n    def replace_channels_basic(message: str) -> str:\n        \"\"\"Simply replaces all channel mentions with `#<CHANNEL_ID>` in order\n        to make a message work as part of a link\"\"\"\n        # Find user IDs in the message\n        channel_matches = re.findall(r\"<#(.*?)\\|(.*?)>\", message)\n        for channel_id, channel_name in channel_matches:\n            message = message.replace(\n                f\"<#{channel_id}|{channel_name}>\", f\"#{channel_name}\"\n            )\n        return message\n\n    @staticmethod\n    def replace_special_mentions(message: str) -> str:\n        \"\"\"Simply replaces @channel, @here, and @everyone so we don't tag\n        a bunch of people in Slack when we don't want to\"\"\"\n        # Find user IDs in the message\n        message = message.replace(\"<!channel>\", \"@channel\")\n        message = message.replace(\"<!here>\", \"@here\")\n        message = message.replace(\"<!everyone>\", \"@everyone\")\n        return message\n\n    @staticmethod\n    def replace_special_catchall(message: str) -> str:\n        \"\"\"Replaces pattern of <!something|another-thing> with another-thing\n        This is added for <!subteam^TEAM-ID|@team-name> but may match other cases as well\n        \"\"\"\n\n        pattern = r\"<!([^|]+)\\|([^>]+)>\"\n        return re.sub(pattern, r\"\\2\", message)\n\n    @staticmethod\n    def add_zero_width_whitespace_after_tag(message: str) -> str:\n        \"\"\"Add a 0 width whitespace after every @\"\"\"\n        return message.replace(\"@\", \"@\\u200b\")\n"
  },
  {
    "path": "backend/onyx/connectors/teams/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/teams/connector.py",
    "content": "import copy\nimport os\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nimport msal  # type: ignore\nfrom office365.graph_client import GraphClient  # type: ignore\nfrom office365.runtime.client_request_exception import ClientRequestException  # type: ignore\nfrom office365.runtime.http.request_options import RequestOptions  # type: ignore[import-untyped]\nfrom office365.teams.channels.channel import Channel  # type: ignore\nfrom office365.teams.team import Team  # type: ignore\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.microsoft_graph_env import resolve_microsoft_environment\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import EntityFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.teams.models import Message\nfrom onyx.connectors.teams.utils import fetch_expert_infos\nfrom onyx.connectors.teams.utils import fetch_external_access\nfrom onyx.connectors.teams.utils import fetch_messages\nfrom onyx.connectors.teams.utils import fetch_replies\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_with_timeout\n\nlogger = setup_logger()\n\n_SLIM_DOC_BATCH_SIZE = 5000\n\n\nclass TeamsCheckpoint(ConnectorCheckpoint):\n    todo_team_ids: list[str] | None = None\n\n\nDEFAULT_AUTHORITY_HOST = \"https://login.microsoftonline.com\"\nDEFAULT_GRAPH_API_HOST = \"https://graph.microsoft.com\"\n\n\nclass TeamsConnector(\n    CheckpointedConnectorWithPermSync[TeamsCheckpoint],\n    SlimConnectorWithPermSync,\n):\n    MAX_WORKERS = 10\n\n    def __init__(\n        self,\n        # TODO: (chris) move from \"Display Names\" to IDs, since display names\n        # are not necessarily guaranteed to be unique\n        teams: list[str] = [],\n        max_workers: int = MAX_WORKERS,\n        authority_host: str = DEFAULT_AUTHORITY_HOST,\n        graph_api_host: str = DEFAULT_GRAPH_API_HOST,\n    ) -> None:\n        self.graph_client: GraphClient | None = None\n        self.msal_app: msal.ConfidentialClientApplication | None = None\n        self.max_workers = max_workers\n        self.requested_team_list: list[str] = teams\n\n        resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)\n        self._azure_environment = resolved_env.environment\n        self.authority_host = resolved_env.authority_host\n        self.graph_api_host = resolved_env.graph_host\n\n    # impls for BaseConnector\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        teams_client_id = credentials[\"teams_client_id\"]\n        teams_client_secret = credentials[\"teams_client_secret\"]\n        teams_directory_id = credentials[\"teams_directory_id\"]\n\n        authority_url = f\"{self.authority_host}/{teams_directory_id}\"\n        self.msal_app = msal.ConfidentialClientApplication(\n            authority=authority_url,\n            client_id=teams_client_id,\n            client_credential=teams_client_secret,\n        )\n\n        def _acquire_token_func() -> dict[str, Any]:\n            \"\"\"\n            Acquire token via MSAL\n            \"\"\"\n            if self.msal_app is None:\n                raise RuntimeError(\"MSAL app is not initialized\")\n\n            token = self.msal_app.acquire_token_for_client(\n                scopes=[f\"{self.graph_api_host}/.default\"]\n            )\n\n            if not isinstance(token, dict):\n                raise RuntimeError(\"`token` instance must be of type dict\")\n\n            return token\n\n        self.graph_client = GraphClient(\n            _acquire_token_func, environment=self._azure_environment\n        )\n        return None\n\n    def validate_connector_settings(self) -> None:\n        if self.graph_client is None:\n            raise ConnectorMissingCredentialError(\"Teams credentials not loaded.\")\n\n        # Check if any requested teams have special characters that need client-side filtering\n        has_special_chars = _has_odata_incompatible_chars(self.requested_team_list)\n        if has_special_chars:\n            logger.info(\n                \"Some requested team names contain special characters (&, (, )) that require \"\n                \"client-side filtering during data retrieval.\"\n            )\n\n        # Minimal validation: just check if we can access the teams endpoint\n        timeout = 10  # Short timeout for basic validation\n\n        try:\n            # For validation, do a lightweight check instead of full team search\n            logger.info(\n                f\"Requested team count: {len(self.requested_team_list) if self.requested_team_list else 0}, \"\n                f\"Has special chars: {has_special_chars}\"\n            )\n\n            validation_query = self.graph_client.teams.get().top(1)\n            run_with_timeout(\n                timeout=timeout,\n                func=lambda: validation_query.execute_query(),\n            )\n\n            logger.info(\n                \"Teams validation successful - Access to teams endpoint confirmed\"\n            )\n\n        except TimeoutError as e:\n            raise ConnectorValidationError(\n                f\"Timeout while validating Teams access (waited {timeout}s). \"\n                f\"This may indicate network issues or authentication problems. \"\n                f\"Error: {e}\"\n            )\n\n        except ClientRequestException as e:\n            if not e.response:\n                raise RuntimeError(f\"No response provided in error; {e=}\")\n            status_code = e.response.status_code\n            if status_code == 401:\n                raise CredentialExpiredError(\n                    \"Invalid or expired Microsoft Teams credentials (401 Unauthorized).\"\n                )\n            elif status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"Your app lacks sufficient permissions to read Teams (403 Forbidden).\"\n                )\n            raise UnexpectedValidationError(f\"Unexpected error retrieving teams: {e}\")\n\n        except Exception as e:\n            error_str = str(e).lower()\n            if (\n                \"unauthorized\" in error_str\n                or \"401\" in error_str\n                or \"invalid_grant\" in error_str\n            ):\n                raise CredentialExpiredError(\n                    \"Invalid or expired Microsoft Teams credentials.\"\n                )\n            elif \"forbidden\" in error_str or \"403\" in error_str:\n                raise InsufficientPermissionsError(\n                    \"App lacks required permissions to read from Microsoft Teams.\"\n                )\n            raise ConnectorValidationError(\n                f\"Unexpected error during Teams validation: {e}\"\n            )\n\n    # impls for CheckpointedConnector\n\n    def build_dummy_checkpoint(self) -> TeamsCheckpoint:\n        return TeamsCheckpoint(\n            has_more=True,\n        )\n\n    def validate_checkpoint_json(self, checkpoint_json: str) -> TeamsCheckpoint:\n        return TeamsCheckpoint.model_validate_json(checkpoint_json)\n\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,  # noqa: ARG002\n        checkpoint: TeamsCheckpoint,\n    ) -> CheckpointOutput[TeamsCheckpoint]:\n        if self.graph_client is None:\n            raise ConnectorMissingCredentialError(\"Teams\")\n\n        checkpoint = cast(TeamsCheckpoint, copy.deepcopy(checkpoint))\n\n        todos = checkpoint.todo_team_ids\n\n        if todos is None:\n            teams = _collect_all_teams(\n                graph_client=self.graph_client,\n                requested=self.requested_team_list,\n            )\n            todo_team_ids = [team.id for team in teams if team.id]\n            return TeamsCheckpoint(\n                todo_team_ids=todo_team_ids,\n                has_more=bool(todo_team_ids),\n            )\n\n        # `todos.pop()` should always return an element. This is because if\n        # `todos` was the empty list, then we would have set `has_more=False`\n        # during the previous invocation of `TeamsConnector.load_from_checkpoint`,\n        # meaning that this function wouldn't have been called in the first place.\n        todo_team_id = todos.pop()\n        team = _get_team_by_id(\n            graph_client=self.graph_client,\n            team_id=todo_team_id,\n        )\n        channels = _collect_all_channels_from_team(\n            team=team,\n        )\n\n        # An iterator of channels, in which each channel is an iterator of docs.\n        channels_docs = [\n            _collect_documents_for_channel(\n                graph_client=self.graph_client,\n                team=team,\n                channel=channel,\n                start=start,\n            )\n            for channel in channels\n        ]\n\n        # Was previously `for doc in parallel_yield(gens=docs, max_workers=self.max_workers): ...`.\n        # However, that lead to some weird exceptions (potentially due to non-thread-safe behaviour in the Teams library).\n        # Reverting back to the non-threaded case for now.\n        for channel_docs in channels_docs:\n            for channel_doc in channel_docs:\n                if channel_doc:\n                    yield channel_doc\n\n        logger.info(\n            f\"Processed team with id {todo_team_id}; {len(todos)} team(s) left to process\"\n        )\n\n        return TeamsCheckpoint(\n            todo_team_ids=todos,\n            has_more=bool(todos),\n        )\n\n    def load_from_checkpoint_with_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: TeamsCheckpoint,\n    ) -> CheckpointOutput[TeamsCheckpoint]:\n        # Teams already fetches external_access (permissions) for each document\n        # in _convert_thread_to_document, so we can just delegate to load_from_checkpoint\n        return self.load_from_checkpoint(start, end, checkpoint)\n\n    # impls for SlimConnectorWithPermSync\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> GenerateSlimDocumentOutput:\n        start = start or 0\n\n        teams = _collect_all_teams(\n            graph_client=self.graph_client,\n            requested=self.requested_team_list,\n        )\n\n        for team in teams:\n            if not team.id:\n                logger.warning(\n                    f\"Expected a team with an id, instead got no id: {team=}\"\n                )\n                continue\n\n            channels = _collect_all_channels_from_team(\n                team=team,\n            )\n\n            for channel in channels:\n                if not channel.id:\n                    logger.warning(\n                        f\"Expected a channel with an id, instead got no id: {channel=}\"\n                    )\n                    continue\n\n                external_access = fetch_external_access(\n                    graph_client=self.graph_client, channel=channel\n                )\n\n                messages = fetch_messages(\n                    graph_client=self.graph_client,\n                    team_id=team.id,\n                    channel_id=channel.id,\n                    start=start,\n                )\n\n                slim_doc_buffer: list[SlimDocument | HierarchyNode] = []\n\n                for message in messages:\n                    slim_doc_buffer.append(\n                        SlimDocument(\n                            id=message.id,\n                            external_access=external_access,\n                        )\n                    )\n\n                    if len(slim_doc_buffer) >= _SLIM_DOC_BATCH_SIZE:\n                        if callback:\n                            if callback.should_stop():\n                                raise RuntimeError(\n                                    \"retrieve_all_slim_docs_perm_sync: Stop signal detected\"\n                                )\n                            callback.progress(\"retrieve_all_slim_docs_perm_sync\", 1)\n                        yield slim_doc_buffer\n                        slim_doc_buffer = []\n\n                # Flush any remaining slim documents collected for this channel\n                if slim_doc_buffer:\n                    yield slim_doc_buffer\n                    slim_doc_buffer = []\n\n\ndef _escape_odata_string(name: str) -> str:\n    \"\"\"Escape special characters for OData string literals.\n\n    Uses proper OData v4 string literal escaping:\n    - Single quotes: ' becomes ''\n    - Other characters are handled by using contains() instead of eq for problematic cases\n    \"\"\"\n    # Escape single quotes for OData syntax (replace ' with '')\n    escaped = name.replace(\"'\", \"''\")\n    return escaped\n\n\ndef _has_odata_incompatible_chars(team_names: list[str] | None) -> bool:\n    \"\"\"Check if any team name contains characters that break Microsoft Graph OData filters.\n\n    The Microsoft Graph Teams API has limited OData support. Characters like\n    &, (, and ) cause parsing errors and require client-side filtering instead.\n    \"\"\"\n    if not team_names:\n        return False\n    return any(char in name for name in team_names for char in [\"&\", \"(\", \")\"])\n\n\ndef _can_use_odata_filter(\n    team_names: list[str] | None,\n) -> tuple[bool, list[str], list[str]]:\n    \"\"\"Determine which teams can use OData filtering vs client-side filtering.\n\n    Microsoft Graph /teams endpoint OData limitations:\n    - Only supports basic 'eq' operators in filters\n    - No 'contains', 'startswith', or other advanced operators\n    - Special characters (&, (, )) break OData parsing\n\n    Returns:\n        tuple: (can_use_odata, safe_names, problematic_names)\n    \"\"\"\n    if not team_names:\n        return False, [], []\n\n    safe_names = []\n    problematic_names = []\n\n    for name in team_names:\n        if any(char in name for char in [\"&\", \"(\", \")\"]):\n            problematic_names.append(name)\n        else:\n            safe_names.append(name)\n\n    return bool(safe_names), safe_names, problematic_names\n\n\ndef _build_simple_odata_filter(safe_names: list[str]) -> str | None:\n    \"\"\"Build simple OData filter using only 'eq' operators for safe names.\"\"\"\n    if not safe_names:\n        return None\n\n    filter_parts = []\n    for name in safe_names:\n        escaped_name = _escape_odata_string(name)\n        filter_parts.append(f\"displayName eq '{escaped_name}'\")\n\n    return \" or \".join(filter_parts)\n\n\ndef _construct_semantic_identifier(channel: Channel, top_message: Message) -> str:\n    top_message_user_name: str\n\n    if top_message.from_ and top_message.from_.user:\n        user_display_name = top_message.from_.user.display_name\n        top_message_user_name = (\n            user_display_name if user_display_name else \"Unknown User\"\n        )\n    else:\n        logger.warning(f\"Message {top_message=} has no `from.user` field\")\n        top_message_user_name = \"Unknown User\"\n\n    top_message_content = top_message.body.content or \"\"\n    top_message_subject = top_message.subject or \"Unknown Subject\"\n    channel_name = channel.properties.get(\"displayName\", \"Unknown\")\n\n    try:\n        snippet = parse_html_page_basic(top_message_content.rstrip())\n        snippet = snippet[:50] + \"...\" if len(snippet) > 50 else snippet\n\n    except Exception:\n        logger.exception(\n            f\"Error parsing snippet for message {top_message.id} with url {top_message.web_url}\"\n        )\n        snippet = \"\"\n\n    semantic_identifier = (\n        f\"{top_message_user_name} in {channel_name} about {top_message_subject}\"\n    )\n    if snippet:\n        semantic_identifier += f\": {snippet}\"\n\n    return semantic_identifier\n\n\ndef _convert_thread_to_document(\n    graph_client: GraphClient,\n    channel: Channel,\n    thread: list[Message],\n) -> Document | None:\n    if len(thread) == 0:\n        return None\n\n    most_recent_message_datetime: datetime | None = None\n    top_message = thread[0]\n    thread_text = \"\"\n\n    sorted_thread = sorted(thread, key=lambda m: m.created_date_time, reverse=True)\n\n    if sorted_thread:\n        most_recent_message_datetime = sorted_thread[0].created_date_time\n\n    for message in thread:\n        # Add text and a newline\n        if message.body.content:\n            thread_text += parse_html_page_basic(message.body.content)\n\n        # If it has a subject, that means its the top level post message, so grab its id, url, and subject\n        if message.subject:\n            top_message = message\n\n    if not thread_text:\n        return None\n\n    semantic_string = _construct_semantic_identifier(channel, top_message)\n    expert_infos = fetch_expert_infos(graph_client=graph_client, channel=channel)\n    external_access = fetch_external_access(\n        graph_client=graph_client, channel=channel, expert_infos=expert_infos\n    )\n\n    return Document(\n        id=top_message.id,\n        sections=[TextSection(link=top_message.web_url, text=thread_text)],\n        source=DocumentSource.TEAMS,\n        semantic_identifier=semantic_string,\n        title=\"\",  # teams threads don't really have a \"title\"\n        doc_updated_at=most_recent_message_datetime,\n        primary_owners=expert_infos,\n        metadata={},\n        external_access=external_access,\n    )\n\n\ndef _update_request_url(request: RequestOptions, next_url: str) -> None:\n    request.url = next_url\n\n\ndef _add_prefer_header(request: RequestOptions) -> None:\n    \"\"\"Add Prefer header to work around Microsoft Graph API ampersand bug.\n    See: https://developer.microsoft.com/en-us/graph/known-issues/?search=18185\n    \"\"\"\n    if not hasattr(request, \"headers\") or request.headers is None:\n        request.headers = {}\n    # Add header to handle properly encoded ampersands in filters\n    request.headers[\"Prefer\"] = \"legacySearch=false\"\n\n\ndef _collect_all_teams(\n    graph_client: GraphClient,\n    requested: list[str] | None = None,\n) -> list[Team]:\n    \"\"\"Collect teams from Microsoft Graph using appropriate filtering strategy.\n\n    For teams with special characters (&, (, )), uses client-side filtering\n    with paginated search. For teams without special characters, uses efficient\n    OData server-side filtering.\n\n    Args:\n        graph_client: Authenticated Microsoft Graph client\n        requested: List of team names to find, or None for all teams\n\n    Returns:\n        List of Team objects matching the requested names\n    \"\"\"\n    teams: list[Team] = []\n    next_url: str | None = None\n\n    # Determine filtering strategy based on Microsoft Graph limitations\n    if not requested:\n        # No specific teams requested - return empty list (avoid fetching all teams)\n        logger.info(\"No specific teams requested - returning empty list\")\n        return []\n\n    _, safe_names, problematic_names = _can_use_odata_filter(requested)\n\n    if problematic_names and not safe_names:\n        # ALL requested teams have special characters - cannot use OData filtering\n        logger.info(\n            f\"All requested team names contain special characters (&, (, )) which require \"\n            f\"client-side filtering. Using basic /teams endpoint with pagination. \"\n            f\"Teams: {problematic_names}\"\n        )\n        # Use unfiltered query with pagination limit to avoid fetching too many teams\n        use_client_side_filtering = True\n        odata_filter = None\n    elif problematic_names and safe_names:\n        # Mixed scenario - need to fetch more teams to find the problematic ones\n        logger.info(\n            f\"Mixed team types: will use client-side filtering for all. \"\n            f\"Safe names: {safe_names}, Special char names: {problematic_names}\"\n        )\n        use_client_side_filtering = True\n        odata_filter = None\n    elif safe_names:\n        # All names are safe - use OData filtering\n        logger.info(f\"Using OData filtering for all requested teams: {safe_names}\")\n        use_client_side_filtering = False\n        odata_filter = _build_simple_odata_filter(safe_names)\n    else:\n        # No valid names\n        return []\n\n    # Track pagination to avoid fetching too many teams for client-side filtering\n    max_pages = 200\n    page_count = 0\n\n    while True:\n        try:\n            if use_client_side_filtering:\n                # Use basic /teams endpoint with top parameter to limit results per page\n                query = graph_client.teams.get().top(50)  # Limit to 50 teams per page\n            else:\n                # Use OData filter with only 'eq' operators\n                query = graph_client.teams.get().filter(odata_filter)\n\n            # Add header to work around Microsoft Graph API issues\n            query.before_execute(lambda req: _add_prefer_header(request=req))\n\n            if next_url:\n                url = next_url\n                query.before_execute(\n                    lambda req: _update_request_url(request=req, next_url=url)\n                )\n\n            team_collection = query.execute_query()\n        except (ClientRequestException, ValueError) as e:\n            # If OData filter fails, fall back to client-side filtering\n            if not use_client_side_filtering and odata_filter:\n                logger.warning(\n                    f\"OData filter failed: {e}. Falling back to client-side filtering.\"\n                )\n                use_client_side_filtering = True\n                odata_filter = None\n                teams = []\n                next_url = None\n                page_count = 0\n                continue\n            # If client-side approach also fails, re-raise\n            logger.error(f\"Teams query failed: {e}\")\n            raise\n\n        filtered_teams = (\n            team\n            for team in team_collection\n            if _filter_team(team=team, requested=requested)\n        )\n        teams.extend(filtered_teams)\n\n        # For client-side filtering, check if we found all requested teams or hit page limit\n        if use_client_side_filtering:\n            page_count += 1\n            found_team_names = {\n                team.display_name for team in teams if team.display_name\n            }\n            requested_set = set(requested)\n\n            # Log progress every 10 pages to avoid excessive logging\n            if page_count % 10 == 0:\n                logger.info(\n                    f\"Searched {page_count} pages, found {len(found_team_names)} matching teams so far\"\n                )\n\n            # Stop if we found all requested teams or hit the page limit\n            if requested_set.issubset(found_team_names):\n                logger.info(f\"Found all requested teams after {page_count} pages\")\n                break\n            elif page_count >= max_pages:\n                logger.warning(\n                    f\"Reached maximum page limit ({max_pages}) while searching for teams. \"\n                    f\"Found: {found_team_names & requested_set}, \"\n                    f\"Missing: {requested_set - found_team_names}\"\n                )\n                break\n\n        if not team_collection.has_next:\n            break\n\n        if not isinstance(team_collection._next_request_url, str):\n            raise ValueError(\n                f\"The next request url field should be a string, instead got {type(team_collection._next_request_url)}\"\n            )\n\n        next_url = team_collection._next_request_url\n\n    return teams\n\n\ndef _normalize_team_name(name: str) -> str:\n    \"\"\"Normalize team name for flexible matching.\"\"\"\n    if not name:\n        return \"\"\n    # Convert to lowercase and strip whitespace for case-insensitive matching\n    return name.lower().strip()\n\n\ndef _matches_requested_team(\n    team_display_name: str, requested: list[str] | None\n) -> bool:\n    \"\"\"Check if team display name matches any of the requested team names.\n\n    Uses flexible matching to handle slight variations in team names.\n    \"\"\"\n    if not requested or not team_display_name:\n        return (\n            not requested\n        )  # If no teams requested, match all; if no name, don't match\n\n    normalized_team_name = _normalize_team_name(team_display_name)\n\n    for requested_name in requested:\n        normalized_requested = _normalize_team_name(requested_name)\n\n        # Exact match after normalization\n        if normalized_team_name == normalized_requested:\n            return True\n\n        # Flexible matching - check if team name contains all significant words\n        # This helps with slight variations in formatting\n        team_words = set(normalized_team_name.split())\n        requested_words = set(normalized_requested.split())\n\n        # If the requested name has special characters, split on those too\n        for char in [\"&\", \"(\", \")\"]:\n            if char in normalized_requested:\n                # Split on special characters and add words\n                parts = normalized_requested.replace(char, \" \").split()\n                requested_words.update(parts)\n\n        # Remove very short words that aren't meaningful\n        meaningful_requested_words = {\n            word for word in requested_words if len(word) >= 3\n        }\n\n        # Check if team name contains most of the meaningful words\n        if (\n            meaningful_requested_words\n            and len(meaningful_requested_words & team_words)\n            >= len(meaningful_requested_words) * 0.7\n        ):\n            return True\n\n    return False\n\n\ndef _filter_team(\n    team: Team,\n    requested: list[str] | None = None,\n) -> bool:\n    \"\"\"\n    Returns the true if:\n        - Team is not expired / deleted\n        - Team has a display-name and ID\n        - Team display-name matches any of the requested teams (with flexible matching)\n\n    Otherwise, returns false.\n    \"\"\"\n\n    if not team.id or not team.display_name:\n        return False\n\n    if not _matches_requested_team(team.display_name, requested):\n        return False\n\n    props = team.properties\n\n    expiration = props.get(\"expirationDateTime\")\n    deleted = props.get(\"deletedDateTime\")\n\n    # We just check for the existence of those two fields, not their actual dates.\n    # This is because if these fields do exist, they have to have occurred in the past, thus making them already\n    # expired / deleted.\n    return not expiration and not deleted\n\n\ndef _get_team_by_id(\n    graph_client: GraphClient,\n    team_id: str,\n) -> Team:\n    team_collection = (\n        graph_client.teams.get().filter(f\"id eq '{team_id}'\").top(1).execute_query()\n    )\n\n    if not team_collection:\n        raise ValueError(f\"No team with {team_id=} was found\")\n    elif team_collection.has_next:\n        # shouldn't happen, but catching it regardless\n        raise RuntimeError(f\"Multiple teams with {team_id=} were found\")\n\n    return team_collection[0]\n\n\ndef _collect_all_channels_from_team(\n    team: Team,\n) -> list[Channel]:\n    if not team.id:\n        raise RuntimeError(f\"The {team=} has an empty `id` field\")\n\n    channels: list[Channel] = []\n    next_url = None\n\n    while True:\n        query = team.channels.get_all(\n            # explicitly needed because of incorrect type definitions provided by the `office365` library\n            page_loaded=lambda _: None\n        )\n        if next_url:\n            url = next_url\n            query = query.before_execute(\n                lambda req: _update_request_url(request=req, next_url=url)\n            )\n\n        channel_collection = query.execute_query()\n        channels.extend(channel for channel in channel_collection if channel.id)\n\n        if not channel_collection.has_next:\n            break\n\n    return channels\n\n\ndef _collect_documents_for_channel(\n    graph_client: GraphClient,\n    team: Team,\n    channel: Channel,\n    start: SecondsSinceUnixEpoch,\n) -> Iterator[Document | None | ConnectorFailure]:\n    \"\"\"\n    This function yields an iterator of `Document`s, where each `Document` corresponds to a \"thread\".\n\n    A \"thread\" is the conjunction of the \"root\" message and all of its replies.\n    \"\"\"\n\n    for message in fetch_messages(\n        graph_client=graph_client,\n        team_id=team.id,\n        channel_id=channel.id,\n        start=start,\n    ):\n        try:\n            replies = list(\n                fetch_replies(\n                    graph_client=graph_client,\n                    team_id=team.id,\n                    channel_id=channel.id,\n                    root_message_id=message.id,\n                )\n            )\n\n            thread = [message]\n            thread.extend(replies[::-1])\n\n            # Note:\n            # We convert an entire *thread* (including the root message and its replies) into one, singular `Document`.\n            # I.e., we don't convert each individual message and each individual reply into their own individual `Document`s.\n            if doc := _convert_thread_to_document(\n                graph_client=graph_client,\n                channel=channel,\n                thread=thread,\n            ):\n                yield doc\n\n        except Exception as e:\n            yield ConnectorFailure(\n                failed_entity=EntityFailure(\n                    entity_id=message.id,\n                ),\n                failure_message=f\"Retrieval of message and its replies failed; {channel.id=} {message.id}\",\n                exception=e,\n            )\n\n\nif __name__ == \"__main__\":\n    from tests.daily.connectors.utils import load_all_from_connector\n\n    app_id = os.environ[\"TEAMS_APPLICATION_ID\"]\n    dir_id = os.environ[\"TEAMS_DIRECTORY_ID\"]\n    secret = os.environ[\"TEAMS_SECRET\"]\n\n    teams_env_var = os.environ.get(\"TEAMS\", None)\n    teams = teams_env_var.split(\",\") if teams_env_var else []\n\n    teams_connector = TeamsConnector(teams=teams)\n    teams_connector.load_credentials(\n        {\n            \"teams_client_id\": app_id,\n            \"teams_directory_id\": dir_id,\n            \"teams_client_secret\": secret,\n        }\n    )\n    teams_connector.validate_connector_settings()\n\n    for slim_doc in teams_connector.retrieve_all_slim_docs_perm_sync():\n        ...\n\n    for doc in load_all_from_connector(\n        connector=teams_connector,\n        start=0.0,\n        end=datetime.now(tz=timezone.utc).timestamp(),\n    ).documents:\n        print(doc)\n"
  },
  {
    "path": "backend/onyx/connectors/teams/models.py",
    "content": "from datetime import datetime\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom pydantic.alias_generators import to_camel\n\n\nclass Body(BaseModel):\n    content_type: str\n    content: str | None\n\n    model_config = ConfigDict(\n        alias_generator=to_camel,\n        populate_by_name=True,\n    )\n\n\nclass User(BaseModel):\n    id: str\n    display_name: str\n\n    model_config = ConfigDict(\n        alias_generator=to_camel,\n        populate_by_name=True,\n    )\n\n\nclass From(BaseModel):\n    user: User | None\n\n    model_config = ConfigDict(\n        alias_generator=to_camel,\n        populate_by_name=True,\n    )\n\n\nclass Message(BaseModel):\n    id: str\n    replyToId: str | None\n    subject: str | None\n    from_: From | None = Field(alias=\"from\")\n    body: Body\n    created_date_time: datetime\n    last_modified_date_time: datetime | None\n    last_edited_date_time: datetime | None\n    deleted_date_time: datetime | None\n    web_url: str\n\n    model_config = ConfigDict(\n        alias_generator=to_camel,\n        populate_by_name=True,\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/teams/utils.py",
    "content": "import time\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom http import HTTPStatus\n\nfrom office365.graph_client import GraphClient  # type: ignore[import-untyped]\nfrom office365.teams.channels.channel import Channel  # type: ignore[import-untyped]\nfrom office365.teams.channels.channel import ConversationMember\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.teams.models import Message\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n_PUBLIC_MEMBERSHIP_TYPE = \"standard\"  # public teams channel\n\n\ndef _sanitize_message_user_display_name(value: dict) -> dict:\n    try:\n        from_obj = value.get(\"from\")\n        if isinstance(from_obj, dict):\n            user_obj = from_obj.get(\"user\")\n            if isinstance(user_obj, dict) and user_obj.get(\"displayName\") is None:\n                value = dict(value)\n                from_obj = dict(from_obj)\n                user_obj = dict(user_obj)\n                user_obj[\"displayName\"] = \"Unknown User\"\n                from_obj[\"user\"] = user_obj\n                value[\"from\"] = from_obj\n    except (AttributeError, TypeError, KeyError):\n        pass\n    return value\n\n\ndef _retry(\n    graph_client: GraphClient,\n    request_url: str,\n) -> dict:\n    MAX_RETRIES = 10\n    retry_number = 0\n\n    while retry_number < MAX_RETRIES:\n        response = graph_client.execute_request_direct(request_url)\n        if response.ok:\n            json = response.json()\n            if not isinstance(json, dict):\n                raise RuntimeError(f\"Expected a JSON object, instead got {json=}\")\n\n            return json\n\n        if response.status_code == int(HTTPStatus.TOO_MANY_REQUESTS):\n            retry_number += 1\n\n            cooldown = int(response.headers.get(\"Retry-After\", 10))\n            time.sleep(cooldown)\n\n            continue\n\n        response.raise_for_status()\n\n    raise RuntimeError(\n        f\"Max number of retries for hitting {request_url=} exceeded; unable to fetch data\"\n    )\n\n\ndef _get_next_url(\n    graph_client: GraphClient,\n    json_response: dict,\n) -> str | None:\n    next_url = json_response.get(\"@odata.nextLink\")\n\n    if not next_url:\n        return None\n\n    if not isinstance(next_url, str):\n        raise RuntimeError(\n            f\"Expected a string for the `@odata.nextUrl`, instead got {next_url=}\"\n        )\n\n    return next_url.removeprefix(graph_client.service_root_url()).removeprefix(\"/\")\n\n\ndef _get_or_fetch_email(\n    graph_client: GraphClient,\n    member: ConversationMember,\n) -> str | None:\n    if email := member.properties.get(\"email\"):\n        return email\n\n    user_id = member.properties.get(\"userId\")\n    if not user_id:\n        logger.warn(f\"No user-id found for this member; {member=}\")\n        return None\n\n    json_data = _retry(graph_client=graph_client, request_url=f\"users/{user_id}\")\n    email = json_data.get(\"userPrincipalName\")\n\n    if not isinstance(email, str):\n        logger.warn(f\"Expected email to be of type str, instead got {email=}\")\n        return None\n\n    return email\n\n\ndef _is_channel_public(channel: Channel) -> bool:\n    return (\n        channel.membership_type and channel.membership_type == _PUBLIC_MEMBERSHIP_TYPE\n    )\n\n\ndef fetch_messages(\n    graph_client: GraphClient,\n    team_id: str,\n    channel_id: str,\n    start: SecondsSinceUnixEpoch,\n) -> Generator[Message]:\n    startfmt = datetime.fromtimestamp(start, tz=timezone.utc).strftime(\n        \"%Y-%m-%dT%H:%M:%SZ\"\n    )\n\n    initial_request_url = f\"teams/{team_id}/channels/{channel_id}/messages/delta?$filter=lastModifiedDateTime gt {startfmt}\"\n\n    request_url: str | None = initial_request_url\n\n    while request_url:\n        json_response = _retry(graph_client=graph_client, request_url=request_url)\n\n        for value in json_response.get(\"value\", []):\n            yield Message(**_sanitize_message_user_display_name(value))\n\n        request_url = _get_next_url(\n            graph_client=graph_client, json_response=json_response\n        )\n\n\ndef fetch_replies(\n    graph_client: GraphClient,\n    team_id: str,\n    channel_id: str,\n    root_message_id: str,\n) -> Generator[Message]:\n    initial_request_url = (\n        f\"teams/{team_id}/channels/{channel_id}/messages/{root_message_id}/replies\"\n    )\n\n    request_url: str | None = initial_request_url\n\n    while request_url:\n        json_response = _retry(graph_client=graph_client, request_url=request_url)\n\n        for value in json_response.get(\"value\", []):\n            yield Message(**_sanitize_message_user_display_name(value))\n\n        request_url = _get_next_url(\n            graph_client=graph_client, json_response=json_response\n        )\n\n\ndef fetch_expert_infos(\n    graph_client: GraphClient, channel: Channel\n) -> list[BasicExpertInfo]:\n    members = channel.members.get_all(\n        # explicitly needed because of incorrect type definitions provided by the `office365` library\n        page_loaded=lambda _: None\n    ).execute_query_retry()\n\n    expert_infos = []\n    for member in members:\n        if not member.display_name:\n            logger.warn(f\"Failed to grab the display-name of {member=}; skipping\")\n            continue\n\n        email = _get_or_fetch_email(graph_client=graph_client, member=member)\n        if not email:\n            logger.warn(f\"Failed to grab the email of {member=}; skipping\")\n            continue\n\n        expert_infos.append(\n            BasicExpertInfo(\n                display_name=member.display_name,\n                email=email,\n            )\n        )\n\n    return expert_infos\n\n\ndef fetch_external_access(\n    graph_client: GraphClient,\n    channel: Channel,\n    expert_infos: list[BasicExpertInfo] | None = None,\n) -> ExternalAccess:\n    is_public = _is_channel_public(channel=channel)\n\n    if is_public:\n        return ExternalAccess.public()\n\n    expert_infos = (\n        expert_infos\n        if expert_infos is not None\n        else fetch_expert_infos(graph_client=graph_client, channel=channel)\n    )\n    emails = {expert_info.email for expert_info in expert_infos if expert_info.email}\n\n    return ExternalAccess(\n        external_user_emails=emails,\n        external_user_group_ids=set(),\n        is_public=is_public,\n    )\n"
  },
  {
    "path": "backend/onyx/connectors/testrail/__init__.py",
    "content": "# Package marker for TestRail connector\n"
  },
  {
    "path": "backend/onyx/connectors/testrail/connector.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import ClassVar\nfrom typing import Optional\n\nimport requests\nfrom bs4 import BeautifulSoup\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import format_document_soup\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import remove_markdown_image_references\n\n\nlogger = setup_logger()\n\n\nclass TestRailConnector(LoadConnector, PollConnector):\n    \"\"\"Connector for TestRail.\n\n    Minimal implementation that indexes Test Cases per project.\n    \"\"\"\n\n    document_source_type: ClassVar[DocumentSource] = DocumentSource.TESTRAIL\n\n    # Fields that need ID-to-label value mapping\n    FIELDS_NEEDING_VALUE_MAPPING: ClassVar[set[str]] = {\n        \"priority_id\",\n        \"custom_automation_type\",\n        \"custom_scenario_db_automation\",\n        \"custom_case_golden_canvas_automation\",\n        \"custom_customers\",\n        \"custom_case_environments\",\n        \"custom_case_overall_automation\",\n        \"custom_case_team_ownership\",\n        \"custom_case_unit_or_integration_automation\",\n        \"custom_effort\",\n    }\n\n    def __init__(\n        self,\n        batch_size: int = INDEX_BATCH_SIZE,\n        project_ids: str | list[int] | None = None,\n        cases_page_size: int | None = None,\n        max_pages: int | None = None,\n        skip_doc_absolute_chars: int | None = None,\n    ) -> None:\n        self.base_url: str | None = None\n        self.username: str | None = None\n        self.api_key: str | None = None\n        self.batch_size = batch_size\n        parsed_project_ids: list[int] | None\n\n        # Parse project_ids from string if needed\n        # None = all projects (no filtering), [] = no projects, [1,2,3] = specific projects\n        if isinstance(project_ids, str):\n            if project_ids.strip():\n                parsed_project_ids = [\n                    int(x.strip()) for x in project_ids.split(\",\") if x.strip()\n                ]\n            else:\n                # Empty string from UI means \"all projects\"\n                parsed_project_ids = None\n        elif project_ids is None:\n            parsed_project_ids = None\n        else:\n            parsed_project_ids = [int(pid) for pid in project_ids]\n\n        self.project_ids: list[int] | None = parsed_project_ids\n\n        # Handle empty strings from UI and convert to int with defaults\n        self.cases_page_size = (\n            int(cases_page_size)\n            if cases_page_size and str(cases_page_size).strip()\n            else 250\n        )\n        self.max_pages = (\n            int(max_pages) if max_pages and str(max_pages).strip() else 10000\n        )\n        self.skip_doc_absolute_chars = (\n            int(skip_doc_absolute_chars)\n            if skip_doc_absolute_chars and str(skip_doc_absolute_chars).strip()\n            else 200000\n        )\n\n        # Cache for field labels and value mappings - will be populated on first use\n        self._field_labels: dict[str, str] | None = None\n        self._value_maps: dict[str, dict[str, str]] | None = None\n\n    # --- Rich text sanitization helpers ---\n    # Note: TestRail stores some fields as HTML (e.g. shared test steps).\n    # This function handles both HTML and plain text.\n    @staticmethod\n    def _sanitize_rich_text(value: Any) -> str:\n        if value is None:\n            return \"\"\n        text = str(value)\n\n        # Parse HTML and remove image tags\n        soup = BeautifulSoup(text, \"html.parser\")\n\n        # Remove all img tags and their containers\n        for img_tag in soup.find_all(\"img\"):\n            img_tag.decompose()\n        for span in soup.find_all(\"span\", class_=\"markdown-img-container\"):\n            span.decompose()\n\n        # Use format_document_soup for better HTML-to-text conversion\n        # This preserves document structure (paragraphs, lists, line breaks, etc.)\n        text = format_document_soup(soup)\n\n        # Also remove markdown-style image references (in case any remain)\n        text = remove_markdown_image_references(text)\n\n        return text.strip()\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        # Expected keys from UI credential JSON\n        self.base_url = str(credentials[\"testrail_base_url\"]).rstrip(\"/\")\n        self.username = str(credentials[\"testrail_username\"])  # email or username\n        self.api_key = str(credentials[\"testrail_api_key\"])  # API key (password)\n        return None\n\n    def validate_connector_settings(self) -> None:\n        \"\"\"Lightweight validation to surface common misconfigurations early.\"\"\"\n        projects = self._list_projects()\n        if not projects:\n            logger.warning(\"TestRail: no projects visible to this credential.\")\n\n    # ---- API helpers ----\n    def _api_get(self, endpoint: str, params: Optional[dict[str, Any]] = None) -> Any:\n        if not self.base_url or not self.username or not self.api_key:\n            raise ConnectorMissingCredentialError(\"testrail\")\n\n        # TestRail API base is typically /index.php?/api/v2/<endpoint>\n        url = f\"{self.base_url}/index.php?/api/v2/{endpoint}\"\n        try:\n            response = requests.get(\n                url,\n                auth=(self.username, self.api_key),\n                params=params,\n            )\n            response.raise_for_status()\n        except requests.exceptions.HTTPError as e:\n            status = e.response.status_code if getattr(e, \"response\", None) else None\n            if status == 401:\n                raise CredentialExpiredError(\n                    \"Invalid or expired TestRail credentials (HTTP 401).\"\n                ) from e\n            if status == 403:\n                raise InsufficientPermissionsError(\n                    \"Insufficient permissions to access TestRail resources (HTTP 403).\"\n                ) from e\n            raise UnexpectedValidationError(\n                f\"Unexpected TestRail HTTP error (status={status}).\"\n            ) from e\n        except requests.exceptions.RequestException as e:\n            raise UnexpectedValidationError(f\"TestRail request failed: {e}\") from e\n\n        try:\n            return response.json()\n        except ValueError as e:\n            raise UnexpectedValidationError(\n                \"Invalid JSON returned by TestRail API\"\n            ) from e\n\n    def _list_projects(self) -> list[dict[str, Any]]:\n        projects = self._api_get(\"get_projects\")\n        if isinstance(projects, dict):\n            projects_list = projects.get(\"projects\")\n            return projects_list if isinstance(projects_list, list) else []\n        return []\n\n    def _list_suites(self, project_id: int) -> list[dict[str, Any]]:\n        \"\"\"Return suites for a project. If the project is in single-suite mode,\n        some TestRail instances may return an empty list; callers should\n        gracefully fallback to calling get_cases without suite_id.\n        \"\"\"\n        suites = self._api_get(f\"get_suites/{project_id}\")\n        if isinstance(suites, dict):\n            suites_list = suites.get(\"suites\")\n            return suites_list if isinstance(suites_list, list) else []\n        return []\n\n    def _get_case_fields(self) -> list[dict[str, Any]]:\n        \"\"\"Get case field definitions from TestRail API.\"\"\"\n        try:\n            fields = self._api_get(\"get_case_fields\")\n            return fields if isinstance(fields, list) else []\n        except Exception as e:\n            logger.warning(f\"Failed to fetch case fields from TestRail: {e}\")\n            return []\n\n    def _parse_items_string(self, items_str: str) -> dict[str, str]:\n        \"\"\"Parse items string from field config into ID -> label mapping.\n\n        Format: \"1, Option A\\\\n2, Option B\\\\n3, Option C\"\n        Returns: {\"1\": \"Option A\", \"2\": \"Option B\", \"3\": \"Option C\"}\n        \"\"\"\n        id_to_label: dict[str, str] = {}\n        if not items_str:\n            return id_to_label\n\n        for line in items_str.split(\"\\n\"):\n            line = line.strip()\n            if not line:\n                continue\n            parts = line.split(\",\", 1)\n            if len(parts) == 2:\n                item_id = parts[0].strip()\n                item_label = parts[1].strip()\n                id_to_label[item_id] = item_label\n\n        return id_to_label\n\n    def _build_field_maps(self) -> tuple[dict[str, str], dict[str, dict[str, str]]]:\n        \"\"\"Build both field labels and value mappings in one pass.\n\n        Returns:\n            (field_labels, value_maps) where:\n            - field_labels: system_name -> label\n            - value_maps: system_name -> {id -> label}\n        \"\"\"\n        field_labels = {}\n        value_maps = {}\n\n        try:\n            fields = self._get_case_fields()\n            for field in fields:\n                system_name = field.get(\"system_name\")\n\n                # Build field label map\n                label = field.get(\"label\")\n                if system_name and label:\n                    field_labels[system_name] = label\n\n                # Build value map if needed\n                if system_name in self.FIELDS_NEEDING_VALUE_MAPPING:\n                    configs = field.get(\"configs\", [])\n                    if configs and len(configs) > 0:\n                        options = configs[0].get(\"options\", {})\n                        items_str = options.get(\"items\")\n                        if items_str:\n                            value_maps[system_name] = self._parse_items_string(\n                                items_str\n                            )\n\n        except Exception as e:\n            logger.warning(f\"Failed to build field maps from TestRail: {e}\")\n\n        return field_labels, value_maps\n\n    def _get_field_labels(self) -> dict[str, str]:\n        \"\"\"Get field labels, fetching from API if not cached.\"\"\"\n        if self._field_labels is None:\n            self._field_labels, self._value_maps = self._build_field_maps()\n        return self._field_labels\n\n    def _get_value_maps(self) -> dict[str, dict[str, str]]:\n        \"\"\"Get value maps, fetching from API if not cached.\"\"\"\n        if self._value_maps is None:\n            self._field_labels, self._value_maps = self._build_field_maps()\n        return self._value_maps\n\n    def _map_field_value(self, field_name: str, field_value: Any) -> str:\n        \"\"\"Map a field value using the value map if available.\n\n        Examples:\n        - priority_id: 2 -> \"Medium\"\n        - custom_case_team_ownership: [10] -> \"Sim Platform\"\n        - custom_case_environments: [1, 2] -> \"Local, Cloud\"\n        \"\"\"\n        if field_value is None or field_value == \"\":\n            return \"\"\n\n        # Get value map for this field\n        value_maps = self._get_value_maps()\n        value_map = value_maps.get(field_name, {})\n\n        # Handle list values\n        if isinstance(field_value, list):\n            if not field_value:\n                return \"\"\n            mapped = [value_map.get(str(v), str(v)) for v in field_value]\n            return \", \".join(mapped)\n\n        # Handle single values\n        val_str = str(field_value)\n        return value_map.get(val_str, val_str)\n\n    def _get_cases(\n        self, project_id: int, suite_id: Optional[int], limit: int, offset: int\n    ) -> list[dict[str, Any]]:\n        \"\"\"Get cases for a project from the API.\"\"\"\n        params: dict[str, Any] = {\"limit\": limit, \"offset\": offset}\n        if suite_id is not None:\n            params[\"suite_id\"] = suite_id\n        cases_response = self._api_get(f\"get_cases/{project_id}\", params=params)\n        cases_list: list[dict[str, Any]] = []\n        if isinstance(cases_response, dict):\n            cases_items = cases_response.get(\"cases\")\n            if isinstance(cases_items, list):\n                cases_list = cases_items\n        return cases_list\n\n    def _iter_cases(\n        self,\n        project_id: int,\n        suite_id: Optional[int] = None,\n        start: Optional[SecondsSinceUnixEpoch] = None,\n        end: Optional[SecondsSinceUnixEpoch] = None,\n    ) -> Iterator[dict[str, Any]]:\n        # Pagination: TestRail supports 'limit' and 'offset' for many list endpoints\n        limit = self.cases_page_size\n        # Use a bounded page loop to avoid infinite loops on API anomalies\n        for page_index in range(self.max_pages):\n            offset = page_index * limit\n            cases = self._get_cases(project_id, suite_id, limit, offset)\n\n            if not cases:\n                break\n\n            # Filter by updated window if provided\n            for case in cases:\n                # 'updated_on' is unix timestamp (seconds)\n                updated_on = case.get(\"updated_on\") or case.get(\"created_on\")\n                if start is not None and updated_on is not None and updated_on < start:\n                    continue\n                if end is not None and updated_on is not None and updated_on > end:\n                    continue\n                yield case\n\n            if len(cases) < limit:\n                break\n\n    def _build_case_link(self, project_id: int, case_id: int) -> str:  # noqa: ARG002\n        # Standard UI link to a case\n        return f\"{self.base_url}/index.php?/cases/view/{case_id}\"\n\n    def _doc_from_case(\n        self,\n        project: dict[str, Any],\n        case: dict[str, Any],\n        suite: dict[str, Any] | None = None,  # noqa: ARG002\n    ) -> Document | None:\n        project_id = project.get(\"id\")\n        if not isinstance(project_id, int):\n            logger.warning(\n                \"Skipping TestRail case because project id is missing or invalid: %s\",\n                project_id,\n            )\n            return None\n\n        case_id = case.get(\"id\")\n        if not isinstance(case_id, int):\n            logger.warning(\n                \"Skipping TestRail case because case id is missing or invalid: %s\",\n                case_id,\n            )\n            return None\n\n        title = case.get(\"title\", f\"Case {case_id}\")\n        case_key = f\"C{case_id}\"\n\n        # Convert epoch seconds to aware datetime if available\n        updated = case.get(\"updated_on\") or case.get(\"created_on\")\n        updated_dt = (\n            datetime.fromtimestamp(updated, tz=timezone.utc)\n            if isinstance(updated, (int, float))\n            else None\n        )\n\n        text_lines: list[str] = []\n        if case.get(\"title\"):\n            text_lines.append(f\"Title: {case['title']}\")\n        if case_key:\n            text_lines.append(f\"Case ID: {case_key}\")\n        if case_id is not None:\n            text_lines.append(f\"ID: {case_id}\")\n        doc_link = case.get(\"custom_documentation_link\")\n        if doc_link:\n            text_lines.append(f\"Documentation: {doc_link}\")\n\n        # Add fields that need value mapping\n        field_labels = self._get_field_labels()\n        for field_name in self.FIELDS_NEEDING_VALUE_MAPPING:\n            field_value = case.get(field_name)\n            if field_value is not None and field_value != \"\" and field_value != []:\n                mapped_value = self._map_field_value(field_name, field_value)\n                if mapped_value:\n                    # Get label from TestRail field definition\n                    label = field_labels.get(\n                        field_name, field_name.replace(\"_\", \" \").title()\n                    )\n                    text_lines.append(f\"{label}: {mapped_value}\")\n\n        pre = self._sanitize_rich_text(case.get(\"custom_preconds\"))\n        if pre:\n            text_lines.append(f\"Preconditions: {pre}\")\n\n        # Steps: use separated steps format if available\n        steps_added = False\n        steps_separated = case.get(\"custom_steps_separated\")\n        if isinstance(steps_separated, list) and steps_separated:\n            rendered_steps: list[str] = []\n            for idx, step_item in enumerate(steps_separated, start=1):\n                step_content = self._sanitize_rich_text(step_item.get(\"content\"))\n                step_expected = self._sanitize_rich_text(step_item.get(\"expected\"))\n                parts: list[str] = []\n                if step_content:\n                    parts.append(f\"Step {idx}: {step_content}\")\n                else:\n                    parts.append(f\"Step {idx}:\")\n                if step_expected:\n                    parts.append(f\"Expected: {step_expected}\")\n                rendered_steps.append(\"\\n\".join(parts))\n            if rendered_steps:\n                text_lines.append(\"Steps:\\n\" + \"\\n\".join(rendered_steps))\n                steps_added = True\n\n        # Fallback to custom_steps and custom_expected if no separated steps\n        if not steps_added:\n            custom_steps = self._sanitize_rich_text(case.get(\"custom_steps\"))\n            custom_expected = self._sanitize_rich_text(case.get(\"custom_expected\"))\n            if custom_steps:\n                text_lines.append(f\"Steps: {custom_steps}\")\n            if custom_expected:\n                text_lines.append(f\"Expected: {custom_expected}\")\n\n        link = self._build_case_link(project_id, case_id)\n\n        # Build full text and apply size policies\n        full_text = \"\\n\".join(text_lines)\n        if len(full_text) > self.skip_doc_absolute_chars:\n            logger.warning(\n                f\"Skipping TestRail case {case_id} due to excessive size: {len(full_text)} chars\"\n            )\n            return None\n\n        # Metadata for document identification\n        metadata: dict[str, Any] = {}\n        if case_key:\n            metadata[\"case_key\"] = case_key\n\n        # Include the human-friendly case key in identifiers for easier search\n        display_title = f\"{case_key}: {title}\" if case_key else title\n\n        return Document(\n            id=f\"TESTRAIL_CASE_{case_id}\",\n            source=DocumentSource.TESTRAIL,\n            semantic_identifier=display_title,\n            title=display_title,\n            sections=[TextSection(link=link, text=full_text)],\n            metadata=metadata,\n            doc_updated_at=updated_dt,\n        )\n\n    def _generate_documents(\n        self,\n        start: Optional[SecondsSinceUnixEpoch],\n        end: Optional[SecondsSinceUnixEpoch],\n    ) -> GenerateDocumentsOutput:\n        if not self.base_url or not self.username or not self.api_key:\n            raise ConnectorMissingCredentialError(\"testrail\")\n\n        doc_batch: list[Document | HierarchyNode] = []\n\n        projects = self._list_projects()\n        project_filter: list[int] | None = self.project_ids\n\n        for project in projects:\n            project_id_raw = project.get(\"id\")\n            if not isinstance(project_id_raw, int):\n                logger.warning(\n                    \"Skipping TestRail project with invalid id: %s\", project_id_raw\n                )\n                continue\n            project_id = project_id_raw\n            # None = index all, [] = index none, [1,2,3] = index only those\n            if project_filter is not None and project_id not in project_filter:\n                continue\n\n            suites = self._list_suites(project_id)\n            if suites:\n                for s in suites:\n                    suite_id = s.get(\"id\")\n                    for case in self._iter_cases(project_id, suite_id, start, end):\n                        doc = self._doc_from_case(project, case, s)\n                        if doc is None:\n                            continue\n                        doc_batch.append(doc)\n                        if len(doc_batch) >= self.batch_size:\n                            yield doc_batch\n                            doc_batch = []\n            else:\n                # single-suite mode fallback\n                for case in self._iter_cases(project_id, None, start, end):\n                    doc = self._doc_from_case(project, case, None)\n                    if doc is None:\n                        continue\n                    doc_batch.append(doc)\n                    if len(doc_batch) >= self.batch_size:\n                        yield doc_batch\n                        doc_batch = []\n\n        if doc_batch:\n            yield doc_batch\n\n    # ---- Onyx interfaces ----\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._generate_documents(start=None, end=None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        return self._generate_documents(start=start, end=end)\n\n\nif __name__ == \"__main__\":\n    from onyx.configs.app_configs import (\n        TESTRAIL_API_KEY,\n        TESTRAIL_BASE_URL,\n        TESTRAIL_USERNAME,\n    )\n\n    connector = TestRailConnector()\n\n    connector.load_credentials(\n        {\n            \"testrail_base_url\": TESTRAIL_BASE_URL,\n            \"testrail_username\": TESTRAIL_USERNAME,\n            \"testrail_api_key\": TESTRAIL_API_KEY,\n        }\n    )\n\n    connector.validate_connector_settings()\n\n    # Probe a tiny batch from load\n    total = 0\n    for batch in connector.load_from_state():\n        print(f\"Fetched batch: {len(batch)} docs\")\n        total += len(batch)\n        if total >= 10:\n            break\n    print(f\"Total fetched in test: {total}\")\n"
  },
  {
    "path": "backend/onyx/connectors/web/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/web/connector.py",
    "content": "import ipaddress\nimport random\nimport socket\nimport time\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\nfrom typing import Tuple\nfrom urllib.parse import urljoin\nfrom urllib.parse import urlparse\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom oauthlib.oauth2 import BackendApplicationClient\nfrom playwright.sync_api import BrowserContext\nfrom playwright.sync_api import Playwright\nfrom playwright.sync_api import sync_playwright\nfrom playwright.sync_api import TimeoutError\nfrom requests_oauthlib import OAuth2Session  # type:ignore\nfrom urllib3.exceptions import MaxRetryError\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID\nfrom onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET\nfrom onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL\nfrom onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import web_html_cleanup\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.sitemap import list_pages_for_site\nfrom onyx.utils.web_content import extract_pdf_text\nfrom onyx.utils.web_content import is_pdf_resource\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\nclass ScrapeSessionContext:\n    \"\"\"Session level context for scraping\"\"\"\n\n    def __init__(self, base_url: str, to_visit: list[str]):\n        self.base_url = base_url\n        self.to_visit = to_visit\n        self.visited_links: set[str] = set()\n        self.content_hashes: set[int] = set()\n\n        self.doc_batch: list[Document | HierarchyNode] = []\n\n        self.at_least_one_doc: bool = False\n        self.last_error: str | None = None\n        self.needs_retry: bool = False\n\n        self.playwright: Playwright | None = None\n        self.playwright_context: BrowserContext | None = None\n\n    def initialize(self) -> None:\n        self.stop()\n        self.playwright, self.playwright_context = start_playwright()\n\n    def stop(self) -> None:\n        if self.playwright_context:\n            self.playwright_context.close()\n            self.playwright_context = None\n\n        if self.playwright:\n            self.playwright.stop()\n            self.playwright = None\n\n\nclass ScrapeResult:\n    doc: Document | None = None\n    retry: bool = False\n\n\nWEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20\n# Threshold for determining when to replace vs append iframe content\nIFRAME_TEXT_LENGTH_THRESHOLD = 700\n# Message indicating JavaScript is disabled, which often appears when scraping fails\nJAVASCRIPT_DISABLED_MESSAGE = \"You have JavaScript disabled in your browser\"\n# Grace period after page navigation to allow bot-detection challenges\n# and SPA content rendering to complete\nPAGE_RENDER_TIMEOUT_MS = 5000\n\n# Define common headers that mimic a real browser\nDEFAULT_USER_AGENT = \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36\"\nDEFAULT_HEADERS = {\n    \"User-Agent\": DEFAULT_USER_AGENT,\n    \"Accept\": (\n        \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,\"\n        \"image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\"\n    ),\n    \"Accept-Language\": \"en-US,en;q=0.9\",\n    # Brotli decoding has been flaky in brotlicffi/httpx for certain chunked responses;\n    # stick to gzip/deflate to keep connectivity checks stable.\n    \"Accept-Encoding\": \"gzip, deflate\",\n    \"Connection\": \"keep-alive\",\n    \"Upgrade-Insecure-Requests\": \"1\",\n    \"Sec-Fetch-Dest\": \"document\",\n    \"Sec-Fetch-Mode\": \"navigate\",\n    \"Sec-Fetch-Site\": \"none\",\n    \"Sec-Fetch-User\": \"?1\",\n    \"Sec-CH-UA\": '\"Google Chrome\";v=\"123\", \"Not:A-Brand\";v=\"8\"',\n    \"Sec-CH-UA-Mobile\": \"?0\",\n    \"Sec-CH-UA-Platform\": '\"macOS\"',\n}\n\n\nclass WEB_CONNECTOR_VALID_SETTINGS(str, Enum):\n    # Given a base site, index everything under that path\n    RECURSIVE = \"recursive\"\n    # Given a URL, index only the given page\n    SINGLE = \"single\"\n    # Given a sitemap.xml URL, parse all the pages in it\n    SITEMAP = \"sitemap\"\n    # Given a file upload where every line is a URL, parse all the URLs provided\n    UPLOAD = \"upload\"\n\n\ndef protected_url_check(url: str) -> None:\n    \"\"\"Couple considerations:\n    - DNS mapping changes over time so we don't want to cache the results\n    - Fetching this is assumed to be relatively fast compared to other bottlenecks like reading\n      the page or embedding the contents\n    - To be extra safe, all IPs associated with the URL must be global\n    - This is to prevent misuse and not explicit attacks\n    \"\"\"\n    if not WEB_CONNECTOR_VALIDATE_URLS:\n        return\n\n    parse = urlparse(url)\n    if parse.scheme != \"http\" and parse.scheme != \"https\":\n        raise ValueError(\"URL must be of scheme https?://\")\n\n    if not parse.hostname:\n        raise ValueError(\"URL must include a hostname\")\n\n    try:\n        # This may give a large list of IP addresses for domains with extensive DNS configurations\n        # such as large distributed systems of CDNs\n        info = socket.getaddrinfo(parse.hostname, None)\n    except socket.gaierror as e:\n        raise ConnectionError(f\"DNS resolution failed for {parse.hostname}: {e}\")\n\n    for address in info:\n        ip = address[4][0]\n        if not ipaddress.ip_address(ip).is_global:\n            raise ValueError(\n                f\"Non-global IP address detected: {ip}, skipping page {url}. \"\n                f\"The Web Connector is not allowed to read loopback, link-local, or private ranges\"\n            )\n\n\ndef check_internet_connection(url: str) -> None:\n    try:\n        # Use a more realistic browser-like request\n        session = requests.Session()\n        session.headers.update(DEFAULT_HEADERS)\n\n        response = session.get(url, timeout=5, allow_redirects=True)\n\n        response.raise_for_status()\n    except requests.exceptions.HTTPError as e:\n        # Extract status code from the response, defaulting to -1 if response is None\n        status_code = e.response.status_code if e.response is not None else -1\n\n        # For 403 errors, we do have internet connection, but the request is blocked by the server\n        # this is usually due to bot detection. Future calls (via Playwright) will usually get\n        # around this.\n        if status_code == 403:\n            logger.warning(\n                f\"Received 403 Forbidden for {url}, will retry with browser automation\"\n            )\n            return\n\n        error_msg = {\n            400: \"Bad Request\",\n            401: \"Unauthorized\",\n            403: \"Forbidden\",\n            404: \"Not Found\",\n            500: \"Internal Server Error\",\n            502: \"Bad Gateway\",\n            503: \"Service Unavailable\",\n            504: \"Gateway Timeout\",\n        }.get(status_code, \"HTTP Error\")\n        raise Exception(f\"{error_msg} ({status_code}) for {url} - {e}\")\n    except requests.exceptions.SSLError as e:\n        cause = (\n            e.args[0].reason\n            if isinstance(e.args, tuple) and isinstance(e.args[0], MaxRetryError)\n            else e.args\n        )\n        raise Exception(f\"SSL error {str(cause)}\")\n    except (requests.RequestException, ValueError) as e:\n        raise Exception(f\"Unable to reach {url} - check your internet connection: {e}\")\n\n\ndef is_valid_url(url: str) -> bool:\n    try:\n        result = urlparse(url)\n        return all([result.scheme, result.netloc])\n    except ValueError:\n        return False\n\n\ndef _same_site(base_url: str, candidate_url: str) -> bool:\n    base, candidate = urlparse(base_url), urlparse(candidate_url)\n    base_netloc = base.netloc.lower().removeprefix(\"www.\")\n    candidate_netloc = candidate.netloc.lower().removeprefix(\"www.\")\n    if base_netloc != candidate_netloc:\n        return False\n\n    base_path = (base.path or \"/\").rstrip(\"/\")\n    if base_path in (\"\", \"/\"):\n        return True\n\n    candidate_path = candidate.path or \"/\"\n    if candidate_path == base_path:\n        return True\n\n    boundary = f\"{base_path}/\"\n    return candidate_path.startswith(boundary)\n\n\ndef get_internal_links(\n    base_url: str, url: str, soup: BeautifulSoup, should_ignore_pound: bool = True\n) -> set[str]:\n    internal_links = set()\n    for link in cast(list[dict[str, Any]], soup.find_all(\"a\")):\n        href = cast(str | None, link.get(\"href\"))\n        if not href:\n            continue\n\n        # Account for malformed backslashes in URLs\n        href = href.replace(\"\\\\\", \"/\")\n\n        # \"#!\" indicates the page is using a hashbang URL, which is a client-side routing technique\n        if should_ignore_pound and \"#\" in href and \"#!\" not in href:\n            href = href.split(\"#\")[0]\n\n        if not is_valid_url(href):\n            # Relative path handling\n            href = urljoin(url, href)\n\n        if _same_site(base_url, href):\n            internal_links.add(href)\n    return internal_links\n\n\ndef start_playwright() -> Tuple[Playwright, BrowserContext]:\n    playwright = sync_playwright().start()\n\n    # Launch browser with more realistic settings\n    browser = playwright.chromium.launch(\n        headless=True,\n        args=[\n            \"--disable-blink-features=AutomationControlled\",\n            \"--disable-features=IsolateOrigins,site-per-process\",\n            \"--disable-site-isolation-trials\",\n        ],\n    )\n\n    # Create a context with realistic browser properties\n    context = browser.new_context(\n        user_agent=DEFAULT_USER_AGENT,\n        viewport={\"width\": 1440, \"height\": 900},\n        device_scale_factor=2.0,\n        locale=\"en-US\",\n        timezone_id=\"America/Los_Angeles\",\n        has_touch=False,\n        java_script_enabled=True,\n        color_scheme=\"light\",\n        # Add more realistic browser properties\n        bypass_csp=True,\n        ignore_https_errors=True,\n    )\n\n    # Set additional headers to mimic a real browser\n    context.set_extra_http_headers(\n        {\n            \"Accept\": DEFAULT_HEADERS[\"Accept\"],\n            \"Accept-Language\": DEFAULT_HEADERS[\"Accept-Language\"],\n            \"Sec-Fetch-Dest\": DEFAULT_HEADERS[\"Sec-Fetch-Dest\"],\n            \"Sec-Fetch-Mode\": DEFAULT_HEADERS[\"Sec-Fetch-Mode\"],\n            \"Sec-Fetch-Site\": DEFAULT_HEADERS[\"Sec-Fetch-Site\"],\n            \"Sec-Fetch-User\": DEFAULT_HEADERS[\"Sec-Fetch-User\"],\n            \"Sec-CH-UA\": DEFAULT_HEADERS[\"Sec-CH-UA\"],\n            \"Sec-CH-UA-Mobile\": DEFAULT_HEADERS[\"Sec-CH-UA-Mobile\"],\n            \"Sec-CH-UA-Platform\": DEFAULT_HEADERS[\"Sec-CH-UA-Platform\"],\n            \"Cache-Control\": \"max-age=0\",\n            \"DNT\": \"1\",\n        }\n    )\n\n    # Add a script to modify navigator properties to avoid detection\n    context.add_init_script(\n        \"\"\"\n        Object.defineProperty(navigator, 'webdriver', {\n            get: () => undefined\n        });\n        Object.defineProperty(navigator, 'plugins', {\n            get: () => [1, 2, 3, 4, 5]\n        });\n        Object.defineProperty(navigator, 'languages', {\n            get: () => ['en-US', 'en']\n        });\n    \"\"\"\n    )\n\n    if (\n        WEB_CONNECTOR_OAUTH_CLIENT_ID\n        and WEB_CONNECTOR_OAUTH_CLIENT_SECRET\n        and WEB_CONNECTOR_OAUTH_TOKEN_URL\n    ):\n        client = BackendApplicationClient(client_id=WEB_CONNECTOR_OAUTH_CLIENT_ID)\n        oauth = OAuth2Session(client=client)\n        token = oauth.fetch_token(\n            token_url=WEB_CONNECTOR_OAUTH_TOKEN_URL,\n            client_id=WEB_CONNECTOR_OAUTH_CLIENT_ID,\n            client_secret=WEB_CONNECTOR_OAUTH_CLIENT_SECRET,\n        )\n        context.set_extra_http_headers(\n            {\"Authorization\": \"Bearer {}\".format(token[\"access_token\"])}\n        )\n\n    return playwright, context\n\n\ndef extract_urls_from_sitemap(sitemap_url: str) -> list[str]:\n    # requests should handle brotli compression automatically\n    # as long as the brotli package is available in the venv. Leaving this line here to avoid\n    # a regression as someone says \"Ah, looks like this brotli package isn't used anywhere, let's remove it\"\n    # import brotli\n    try:\n        response = requests.get(sitemap_url, headers=DEFAULT_HEADERS)\n        response.raise_for_status()\n        soup = BeautifulSoup(response.content, \"html.parser\")\n        urls = [\n            _ensure_absolute_url(sitemap_url, loc_tag.text)\n            for loc_tag in soup.find_all(\"loc\")\n        ]\n\n        if len(urls) == 0 and len(soup.find_all(\"urlset\")) == 0:\n            # the given url doesn't look like a sitemap, let's try to find one\n            urls = list_pages_for_site(sitemap_url)\n\n        if len(urls) == 0:\n            raise ValueError(\n                f\"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead.\"\n            )\n\n        return urls\n    except requests.RequestException as e:\n        raise RuntimeError(f\"Failed to fetch sitemap from {sitemap_url}: {e}\")\n    except ValueError as e:\n        raise RuntimeError(f\"Error processing sitemap {sitemap_url}: {e}\")\n    except Exception as e:\n        raise RuntimeError(\n            f\"Unexpected error while processing sitemap {sitemap_url}: {e}\"\n        )\n\n\ndef _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:\n    if not urlparse(maybe_relative_url).netloc:\n        return urljoin(source_url, maybe_relative_url)\n    return maybe_relative_url\n\n\ndef _ensure_valid_url(url: str) -> str:\n    if \"://\" not in url:\n        return \"https://\" + url\n    return url\n\n\ndef _read_urls_file(location: str) -> list[str]:\n    with open(location, \"r\") as f:\n        urls = [_ensure_valid_url(line.strip()) for line in f if line.strip()]\n    return urls\n\n\ndef _get_datetime_from_last_modified_header(last_modified: str) -> datetime | None:\n    try:\n        return datetime.strptime(last_modified, \"%a, %d %b %Y %H:%M:%S %Z\").replace(\n            tzinfo=timezone.utc\n        )\n    except (ValueError, TypeError):\n        return None\n\n\ndef _handle_cookies(context: BrowserContext, url: str) -> None:\n    \"\"\"Handle cookies for the given URL to help with bot detection\"\"\"\n    try:\n        # Parse the URL to get the domain\n        parsed_url = urlparse(url)\n        domain = parsed_url.netloc\n\n        # Add some common cookies that might help with bot detection\n        cookies: list[dict[str, str]] = [\n            {\n                \"name\": \"cookieconsent\",\n                \"value\": \"accepted\",\n                \"domain\": domain,\n                \"path\": \"/\",\n            },\n            {\n                \"name\": \"consent\",\n                \"value\": \"true\",\n                \"domain\": domain,\n                \"path\": \"/\",\n            },\n            {\n                \"name\": \"session\",\n                \"value\": \"random_session_id\",\n                \"domain\": domain,\n                \"path\": \"/\",\n            },\n        ]\n\n        # Add cookies to the context\n        for cookie in cookies:\n            try:\n                context.add_cookies([cookie])  # type: ignore\n            except Exception as e:\n                logger.debug(f\"Failed to add cookie {cookie['name']} for {domain}: {e}\")\n    except Exception:\n        logger.exception(\n            f\"Unexpected error while handling cookies for Web Connector with URL {url}\"\n        )\n\n\nclass WebConnector(LoadConnector):\n    MAX_RETRIES = 3\n\n    def __init__(\n        self,\n        base_url: str,  # Can't change this without disrupting existing users\n        web_connector_type: str = WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,\n        mintlify_cleanup: bool = True,  # Mostly ok to apply to other websites as well\n        batch_size: int = INDEX_BATCH_SIZE,\n        scroll_before_scraping: bool = False,\n        **kwargs: Any,  # noqa: ARG002\n    ) -> None:\n        self.mintlify_cleanup = mintlify_cleanup\n        self.batch_size = batch_size\n        self.recursive = False\n        self.scroll_before_scraping = scroll_before_scraping\n        self.web_connector_type = web_connector_type\n        if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:\n            self.recursive = True\n            self.to_visit_list = [_ensure_valid_url(base_url)]\n            return\n\n        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value:\n            self.to_visit_list = [_ensure_valid_url(base_url)]\n\n        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP:\n            self.to_visit_list = extract_urls_from_sitemap(_ensure_valid_url(base_url))\n\n        elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.UPLOAD:\n            # Explicitly check if running in multi-tenant mode to prevent potential security risks\n            if MULTI_TENANT:\n                raise ValueError(\n                    \"Upload input for web connector is not supported in cloud environments\"\n                )\n\n            logger.warning(\n                \"This is not a UI supported Web Connector flow, are you sure you want to do this?\"\n            )\n            self.to_visit_list = _read_urls_file(base_url)\n\n        else:\n            raise ValueError(\n                \"Invalid Web Connector Config, must choose a valid type between: \"\n            )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        if credentials:\n            logger.warning(\"Unexpected credentials provided for Web Connector\")\n        return None\n\n    def _do_scrape(\n        self,\n        index: int,\n        initial_url: str,\n        session_ctx: ScrapeSessionContext,\n    ) -> ScrapeResult:\n        \"\"\"Returns a ScrapeResult object with a doc and retry flag.\"\"\"\n\n        if session_ctx.playwright is None:\n            raise RuntimeError(\"scrape_context.playwright is None\")\n\n        if session_ctx.playwright_context is None:\n            raise RuntimeError(\"scrape_context.playwright_context is None\")\n\n        result = ScrapeResult()\n\n        # Handle cookies for the URL\n        _handle_cookies(session_ctx.playwright_context, initial_url)\n\n        # First do a HEAD request to check content type without downloading the entire content\n        head_response = requests.head(\n            initial_url, headers=DEFAULT_HEADERS, allow_redirects=True\n        )\n        content_type = head_response.headers.get(\"content-type\")\n        is_pdf = is_pdf_resource(initial_url, content_type)\n\n        if is_pdf:\n            # PDF files are not checked for links\n            response = requests.get(initial_url, headers=DEFAULT_HEADERS)\n            page_text, metadata = extract_pdf_text(response.content)\n            last_modified = response.headers.get(\"Last-Modified\")\n\n            result.doc = Document(\n                id=initial_url,\n                sections=[TextSection(link=initial_url, text=page_text)],\n                source=DocumentSource.WEB,\n                semantic_identifier=initial_url.rstrip(\"/\").split(\"/\")[-1]\n                or initial_url,\n                metadata=metadata,\n                doc_updated_at=(\n                    _get_datetime_from_last_modified_header(last_modified)\n                    if last_modified\n                    else None\n                ),\n            )\n\n            return result\n\n        page = session_ctx.playwright_context.new_page()\n        try:\n            # Use \"commit\" instead of \"domcontentloaded\" to avoid hanging on bot-detection pages\n            # that may never fire domcontentloaded. \"commit\" waits only for navigation to be\n            # committed (response received), then we add a short wait for initial rendering.\n            page_response = page.goto(\n                initial_url,\n                timeout=30000,  # 30 seconds\n                wait_until=\"commit\",  # Wait for navigation to commit\n            )\n            # Give the page a moment to start rendering after navigation commits.\n            # Allows CloudFlare and other bot-detection challenges to complete.\n            page.wait_for_timeout(PAGE_RENDER_TIMEOUT_MS)\n\n            # Wait for network activity to settle so SPAs that fetch content\n            # asynchronously after the initial JS bundle have time to render.\n            try:\n                # A bit of extra time to account for long-polling, websockets, etc.\n                page.wait_for_load_state(\"networkidle\", timeout=PAGE_RENDER_TIMEOUT_MS)\n            except TimeoutError:\n                pass\n\n            last_modified = (\n                page_response.header_value(\"Last-Modified\") if page_response else None\n            )\n            final_url = page.url\n            if final_url != initial_url:\n                protected_url_check(final_url)\n                initial_url = final_url\n                if initial_url in session_ctx.visited_links:\n                    logger.info(\n                        f\"{index}: {initial_url} redirected to {final_url} - already indexed\"\n                    )\n                    page.close()\n                    return result\n\n                logger.info(f\"{index}: {initial_url} redirected to {final_url}\")\n                session_ctx.visited_links.add(initial_url)\n\n            # If we got here, the request was successful\n            if self.scroll_before_scraping:\n                scroll_attempts = 0\n                previous_height = page.evaluate(\"document.body.scrollHeight\")\n                while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:\n                    page.evaluate(\"window.scrollTo(0, document.body.scrollHeight)\")\n                    # Wait for content to load, but catch timeout if page never reaches networkidle\n                    # (e.g., CloudFlare protection keeps making requests)\n                    try:\n                        page.wait_for_load_state(\n                            \"networkidle\", timeout=PAGE_RENDER_TIMEOUT_MS\n                        )\n                    except TimeoutError:\n                        # If networkidle times out, just give it a moment for content to render\n                        time.sleep(1)\n                    time.sleep(0.5)  # let javascript run\n\n                    new_height = page.evaluate(\"document.body.scrollHeight\")\n                    if new_height == previous_height:\n                        break  # Stop scrolling when no more content is loaded\n                    previous_height = new_height\n                    scroll_attempts += 1\n\n            content = page.content()\n            soup = BeautifulSoup(content, \"html.parser\")\n\n            if self.recursive:\n                internal_links = get_internal_links(\n                    session_ctx.base_url, initial_url, soup\n                )\n                for link in internal_links:\n                    if link not in session_ctx.visited_links:\n                        session_ctx.to_visit.append(link)\n\n            if page_response and str(page_response.status)[0] in (\"4\", \"5\"):\n                session_ctx.last_error = f\"Skipped indexing {initial_url} due to HTTP {page_response.status} response\"\n                logger.info(session_ctx.last_error)\n                result.retry = True\n                return result\n\n            # after this point, we don't need the caller to retry\n            parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)\n\n            \"\"\"For websites containing iframes that need to be scraped,\n            the code below can extract text from within these iframes.\n            \"\"\"\n            logger.debug(\n                f\"{index}: Length of cleaned text {len(parsed_html.cleaned_text)}\"\n            )\n            if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text:\n                iframe_count = page.frame_locator(\"iframe\").locator(\"html\").count()\n                if iframe_count > 0:\n                    iframe_texts = (\n                        page.frame_locator(\"iframe\").locator(\"html\").all_inner_texts()\n                    )\n                    document_text = \"\\n\".join(iframe_texts)\n                    \"\"\" 700 is the threshold value for the length of the text extracted\n                    from the iframe based on the issue faced \"\"\"\n                    if len(parsed_html.cleaned_text) < IFRAME_TEXT_LENGTH_THRESHOLD:\n                        parsed_html.cleaned_text = document_text\n                    else:\n                        parsed_html.cleaned_text += \"\\n\" + document_text\n\n            # Sometimes pages with #! will serve duplicate content\n            # There are also just other ways this can happen\n            hashed_text = hash((parsed_html.title, parsed_html.cleaned_text))\n            if hashed_text in session_ctx.content_hashes:\n                logger.info(\n                    f\"{index}: Skipping duplicate title + content for {initial_url}\"\n                )\n                return result\n\n            session_ctx.content_hashes.add(hashed_text)\n\n            result.doc = Document(\n                id=initial_url,\n                sections=[TextSection(link=initial_url, text=parsed_html.cleaned_text)],\n                source=DocumentSource.WEB,\n                semantic_identifier=parsed_html.title or initial_url,\n                metadata={},\n                doc_updated_at=(\n                    _get_datetime_from_last_modified_header(last_modified)\n                    if last_modified\n                    else None\n                ),\n            )\n        finally:\n            page.close()\n\n        return result\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        \"\"\"Traverses through all pages found on the website\n        and converts them into documents\"\"\"\n\n        if not self.to_visit_list:\n            raise ValueError(\"No URLs to visit\")\n\n        base_url = self.to_visit_list[0]  # For the recursive case\n        check_internet_connection(base_url)  # make sure we can connect to the base url\n\n        session_ctx = ScrapeSessionContext(base_url, self.to_visit_list)\n        session_ctx.initialize()\n\n        while session_ctx.to_visit:\n            initial_url = session_ctx.to_visit.pop()\n            if initial_url in session_ctx.visited_links:\n                continue\n            session_ctx.visited_links.add(initial_url)\n\n            try:\n                protected_url_check(initial_url)\n            except Exception as e:\n                session_ctx.last_error = f\"Invalid URL {initial_url} due to {e}\"\n                logger.warning(session_ctx.last_error)\n                continue\n\n            index = len(session_ctx.visited_links)\n            logger.info(f\"{index}: Visiting {initial_url}\")\n\n            # Add retry mechanism with exponential backoff\n            retry_count = 0\n\n            while retry_count < self.MAX_RETRIES:\n                if retry_count > 0:\n                    # Add a random delay between retries (exponential backoff)\n                    delay = min(2**retry_count + random.uniform(0, 1), 10)\n                    logger.info(\n                        f\"Retry {retry_count}/{self.MAX_RETRIES} for {initial_url} after {delay:.2f}s delay\"\n                    )\n                    time.sleep(delay)\n\n                try:\n                    result = self._do_scrape(index, initial_url, session_ctx)\n                    if result.retry:\n                        continue\n\n                    if result.doc:\n                        session_ctx.doc_batch.append(result.doc)\n                except Exception as e:\n                    session_ctx.last_error = f\"Failed to fetch '{initial_url}': {e}\"\n                    logger.exception(session_ctx.last_error)\n                    session_ctx.initialize()\n                    continue\n                finally:\n                    retry_count += 1\n\n                break  # success / don't retry\n\n            if len(session_ctx.doc_batch) >= self.batch_size:\n                session_ctx.initialize()\n                session_ctx.at_least_one_doc = True\n                yield session_ctx.doc_batch\n                session_ctx.doc_batch = []\n\n        if session_ctx.doc_batch:\n            session_ctx.stop()\n            session_ctx.at_least_one_doc = True\n            yield session_ctx.doc_batch\n\n        if not session_ctx.at_least_one_doc:\n            if session_ctx.last_error:\n                raise RuntimeError(session_ctx.last_error)\n            raise RuntimeError(\"No valid pages found.\")\n\n        session_ctx.stop()\n\n    def validate_connector_settings(self) -> None:\n        # Make sure we have at least one valid URL to check\n        if not self.to_visit_list:\n            raise ConnectorValidationError(\n                \"No URL configured. Please provide at least one valid URL.\"\n            )\n\n        if (\n            self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value\n            or self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value\n        ):\n            return None\n\n        # We'll just test the first URL for connectivity and correctness\n        test_url = self.to_visit_list[0]\n\n        # Check that the URL is allowed and well-formed\n        try:\n            protected_url_check(test_url)\n        except ValueError as e:\n            raise ConnectorValidationError(\n                f\"Protected URL check failed for '{test_url}': {e}\"\n            )\n        except ConnectionError as e:\n            # Typically DNS or other network issues\n            raise ConnectorValidationError(str(e))\n\n        # Make a quick request to see if we get a valid response\n        try:\n            check_internet_connection(test_url)\n        except Exception as e:\n            err_str = str(e)\n            if \"401\" in err_str:\n                raise CredentialExpiredError(\n                    f\"Unauthorized access to '{test_url}': {e}\"\n                )\n            elif \"403\" in err_str:\n                raise InsufficientPermissionsError(\n                    f\"Forbidden access to '{test_url}': {e}\"\n                )\n            elif \"404\" in err_str:\n                raise ConnectorValidationError(f\"Page not found for '{test_url}': {e}\")\n            elif \"Max retries exceeded\" in err_str and \"NameResolutionError\" in err_str:\n                raise ConnectorValidationError(\n                    f\"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection.\"\n                )\n            else:\n                # Could be a 5xx or another error, treat as unexpected\n                raise UnexpectedValidationError(\n                    f\"Unexpected error validating '{test_url}': {e}\"\n                )\n\n\nif __name__ == \"__main__\":\n    connector = WebConnector(\"https://docs.onyx.app/\")\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/wikipedia/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/wikipedia/connector.py",
    "content": "from typing import ClassVar\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.mediawiki import wiki\n\n\nclass WikipediaConnector(wiki.MediaWikiConnector):\n    \"\"\"Connector for Wikipedia.\"\"\"\n\n    document_source_type: ClassVar[DocumentSource] = DocumentSource.WIKIPEDIA\n\n    def __init__(\n        self,\n        categories: list[str],\n        pages: list[str],\n        recurse_depth: int,\n        language_code: str = \"en\",\n        batch_size: int = INDEX_BATCH_SIZE,\n    ) -> None:\n        super().__init__(\n            hostname=\"wikipedia.org\",\n            categories=categories,\n            pages=pages,\n            recurse_depth=recurse_depth,\n            language_code=language_code,\n            batch_size=batch_size,\n        )\n"
  },
  {
    "path": "backend/onyx/connectors/xenforo/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/xenforo/connector.py",
    "content": "\"\"\"\nThis is the XenforoConnector class. It is used to connect to a Xenforo forum and load or update documents from the forum.\n\nTo use this class, you need to provide the URL of the Xenforo forum board you want to connect to when creating an instance\nof the class. The URL should be a string that starts with 'http://' or 'https://', followed by the domain name of the\nforum, followed by the board name. For example:\n\n    base_url = 'https://www.example.com/forum/boards/some-topic/'\n\nThe `load_from_state` method is used to load documents from the forum. It takes an optional `state` parameter, which\ncan be used to specify a state from which to start loading documents.\n\"\"\"\n\nimport re\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nimport pytz\nimport requests\nfrom bs4 import BeautifulSoup\nfrom bs4 import Tag\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_title(soup: BeautifulSoup) -> str:\n    el = soup.find(\"h1\", \"p-title-value\")\n    if not el:\n        return \"\"\n    title = el.text\n    for char in (\";\", \":\", \"!\", \"*\", \"/\", \"\\\\\", \"?\", '\"', \"<\", \">\", \"|\"):\n        title = title.replace(char, \"_\")\n    return title\n\n\ndef get_pages(soup: BeautifulSoup, url: str) -> list[str]:\n    page_tags = soup.select(\"li.pageNav-page\")\n    page_numbers = []\n    for button in page_tags:\n        if re.match(r\"^\\d+$\", button.text):\n            page_numbers.append(button.text)\n\n    max_pages = int(max(page_numbers, key=int)) if page_numbers else 1\n\n    all_pages = []\n    for x in range(1, int(max_pages) + 1):\n        all_pages.append(f\"{url}page-{x}\")\n    return all_pages\n\n\ndef parse_post_date(post_element: BeautifulSoup) -> datetime:\n    el = post_element.find(\"time\")\n    if not isinstance(el, Tag) or \"datetime\" not in el.attrs:\n        return datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc)\n\n    date_value = el[\"datetime\"]\n\n    # Ensure date_value is a string (if it's a list, take the first element)\n    if isinstance(date_value, list):\n        date_value = date_value[0]\n\n    post_date = datetime.strptime(date_value, \"%Y-%m-%dT%H:%M:%S%z\")\n    return datetime_to_utc(post_date)\n\n\ndef scrape_page_posts(\n    soup: BeautifulSoup,\n    page_index: int,\n    url: str,\n    initial_run: bool,\n    start_time: datetime,\n) -> list:\n    title = get_title(soup)\n\n    documents = []\n    for post in soup.find_all(\"div\", class_=\"message-inner\"):\n        post_date = parse_post_date(post)\n        if initial_run or post_date > start_time:\n            el = post.find(\"div\", class_=\"bbWrapper\")\n            if not el:\n                continue\n            post_text = el.get_text(strip=True) + \"\\n\"\n            author_tag = post.find(\"a\", class_=\"username\")\n            if author_tag is None:\n                author_tag = post.find(\"span\", class_=\"username\")\n            author = author_tag.get_text(strip=True) if author_tag else \"Deleted author\"\n            formatted_time = post_date.strftime(\"%Y-%m-%d %H:%M:%S\")\n\n            # TODO: if a caller calls this for each page of a thread, it may see the\n            # same post multiple times if there is a sticky post\n            # that appears on each page of a thread.\n            # it's important to generate unique doc id's, so page index is part of the\n            # id. We may want to de-dupe this stuff inside the indexing service.\n            document = Document(\n                id=f\"{DocumentSource.XENFORO.value}_{title}_{page_index}_{formatted_time}\",\n                sections=[TextSection(link=url, text=post_text)],\n                title=title,\n                source=DocumentSource.XENFORO,\n                semantic_identifier=title,\n                primary_owners=[BasicExpertInfo(display_name=author)],\n                metadata={\n                    \"type\": \"post\",\n                    \"author\": author,\n                    \"time\": formatted_time,\n                },\n                doc_updated_at=post_date,\n            )\n\n            documents.append(document)\n    return documents\n\n\nclass XenforoConnector(LoadConnector):\n    # Class variable to track if the connector has been run before\n    has_been_run_before = False\n\n    def __init__(self, base_url: str) -> None:\n        self.base_url = base_url\n        self.initial_run = not XenforoConnector.has_been_run_before\n        self.start = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=1)\n        self.cookies: dict[str, str] = {}\n        # mimic user browser to avoid being blocked by the website (see: https://www.useragents.me/)\n        self.headers = {\n            \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \"\n            \"AppleWebKit/537.36 (KHTML, like Gecko) \"\n            \"Chrome/121.0.0.0 Safari/537.36\"\n        }\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        if credentials:\n            logger.warning(\"Unexpected credentials provided for Xenforo Connector\")\n        return None\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        # Standardize URL to always end in /.\n        if self.base_url[-1] != \"/\":\n            self.base_url += \"/\"\n\n        # Remove all extra parameters from the end such as page, post.\n        matches = (\"threads/\", \"boards/\", \"forums/\")\n        for each in matches:\n            if each in self.base_url:\n                try:\n                    self.base_url = self.base_url[\n                        0 : self.base_url.index(\n                            \"/\", self.base_url.index(each) + len(each)\n                        )\n                        + 1\n                    ]\n                except ValueError:\n                    pass\n\n        doc_batch: list[Document | HierarchyNode] = []\n        all_threads = []\n\n        # If the URL contains \"boards/\" or \"forums/\", find all threads.\n        if \"boards/\" in self.base_url or \"forums/\" in self.base_url:\n            pages = get_pages(self.requestsite(self.base_url), self.base_url)\n\n            # Get all pages on thread_list_page\n            for pre_count, thread_list_page in enumerate(pages, start=1):\n                logger.info(\n                    f\"Getting pages from thread_list_page.. Current: {pre_count}/{len(pages)}\\r\"\n                )\n                all_threads += self.get_threads(thread_list_page)\n        # If the URL contains \"threads/\", add the thread to the list.\n        elif \"threads/\" in self.base_url:\n            all_threads.append(self.base_url)\n\n        # Process all threads\n        for thread_count, thread_url in enumerate(all_threads, start=1):\n            soup = self.requestsite(thread_url)\n            if soup is None:\n                logger.error(f\"Failed to load page: {self.base_url}\")\n                continue\n            pages = get_pages(soup, thread_url)\n            # Getting all pages for all threads\n            for page_index, page in enumerate(pages, start=1):\n                logger.info(\n                    f\"Progress: Page {page_index}/{len(pages)} - Thread {thread_count}/{len(all_threads)}\\r\"\n                )\n                soup_page = self.requestsite(page)\n                doc_batch.extend(\n                    scrape_page_posts(\n                        soup_page, page_index, thread_url, self.initial_run, self.start\n                    )\n                )\n            if doc_batch:\n                yield doc_batch\n\n        # Mark the initial run finished after all threads and pages have been processed\n        XenforoConnector.has_been_run_before = True\n\n    def get_threads(self, url: str) -> list[str]:\n        soup = self.requestsite(url)\n        thread_tags = soup.find_all(class_=\"structItem-title\")\n        base_url = \"{uri.scheme}://{uri.netloc}\".format(uri=urlparse(url))\n        threads = []\n        for x in thread_tags:\n            y = x.find_all(href=True)\n            for element in y:\n                link = element[\"href\"]\n                if \"threads/\" in link:\n                    stripped = link[0 : link.rfind(\"/\") + 1]\n                    if base_url + stripped not in threads:\n                        threads.append(base_url + stripped)\n        return threads\n\n    def requestsite(self, url: str) -> BeautifulSoup:\n        try:\n            response = requests.get(\n                url, cookies=self.cookies, headers=self.headers, timeout=10\n            )\n            if response.status_code != 200:\n                logger.error(\n                    f\"<{url}> Request Error: {response.status_code} - {response.reason}\"\n                )\n            return BeautifulSoup(response.text, \"html.parser\")\n        except TimeoutError:\n            logger.error(\"Timed out Error.\")\n        except Exception as e:\n            logger.error(f\"Error on {url}\")\n            logger.exception(e)\n        return BeautifulSoup(\"\", \"html.parser\")\n\n\nif __name__ == \"__main__\":\n    connector = XenforoConnector(\n        # base_url=\"https://cassiopaea.org/forum/threads/how-to-change-your-emotional-state.41381/\"\n        base_url=\"https://xenforo.com/community/threads/whats-new-with-enhanced-search-resource-manager-and-media-gallery-in-xenforo-2-3.220935/\"\n    )\n    document_batches = connector.load_from_state()\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/zendesk/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/zendesk/connector.py",
    "content": "import copy\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom pydantic import BaseModel\nfrom requests.exceptions import HTTPError\nfrom typing_extensions import override\n\nfrom onyx.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    time_str_to_utc,\n)\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointOutput\nfrom onyx.connectors.interfaces import ConnectorFailure\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.utils.retry_wrapper import retry_builder\n\n\nMAX_PAGE_SIZE = 30  # Zendesk API maximum\nMAX_AUTHOR_MAP_SIZE = 50_000  # Reset author map cache if it gets too large\n_SLIM_BATCH_SIZE = 1000\n\n\nclass ZendeskCredentialsNotSetUpError(PermissionError):\n    def __init__(self) -> None:\n        super().__init__(\n            \"Zendesk Credentials are not set up, was load_credentials called?\"\n        )\n\n\nclass ZendeskClient:\n    def __init__(\n        self,\n        subdomain: str,\n        email: str,\n        token: str,\n        calls_per_minute: int | None = None,\n    ):\n        self.base_url = f\"https://{subdomain}.zendesk.com/api/v2\"\n        self.auth = (f\"{email}/token\", token)\n        self.make_request = request_with_rate_limit(self, calls_per_minute)\n\n\ndef request_with_rate_limit(\n    client: ZendeskClient, max_calls_per_minute: int | None = None\n) -> Callable[[str, dict[str, Any]], dict[str, Any]]:\n    @retry_builder()\n    @(\n        rate_limit_builder(max_calls=max_calls_per_minute, period=60)\n        if max_calls_per_minute\n        else lambda x: x\n    )\n    def make_request(endpoint: str, params: dict[str, Any]) -> dict[str, Any]:\n        response = requests.get(\n            f\"{client.base_url}/{endpoint}\", auth=client.auth, params=params\n        )\n\n        if response.status_code == 429:\n            retry_after = response.headers.get(\"Retry-After\")\n            if retry_after is not None:\n                # Sleep for the duration indicated by the Retry-After header\n                time.sleep(int(retry_after))\n\n        elif (\n            response.status_code == 403\n            and response.json().get(\"error\") == \"SupportProductInactive\"\n        ):\n            return response.json()\n\n        response.raise_for_status()\n        return response.json()\n\n    return make_request\n\n\nclass ZendeskPageResponse(BaseModel):\n    data: list[dict[str, Any]]\n    meta: dict[str, Any]\n    has_more: bool\n\n\ndef _get_content_tag_mapping(client: ZendeskClient) -> dict[str, str]:\n    content_tags: dict[str, str] = {}\n    params = {\"page[size]\": MAX_PAGE_SIZE}\n\n    try:\n        while True:\n            data = client.make_request(\"guide/content_tags\", params)\n\n            for tag in data.get(\"records\", []):\n                content_tags[tag[\"id\"]] = tag[\"name\"]\n\n            # Check if there are more pages\n            if data.get(\"meta\", {}).get(\"has_more\", False):\n                params[\"page[after]\"] = data[\"meta\"][\"after_cursor\"]\n            else:\n                break\n\n        return content_tags\n    except Exception as e:\n        raise Exception(f\"Error fetching content tags: {str(e)}\")\n\n\ndef _get_articles(\n    client: ZendeskClient, start_time: int | None = None, page_size: int = MAX_PAGE_SIZE\n) -> Iterator[dict[str, Any]]:\n    params = {\"page[size]\": page_size, \"sort_by\": \"updated_at\", \"sort_order\": \"asc\"}\n    if start_time is not None:\n        params[\"start_time\"] = start_time\n\n    while True:\n        data = client.make_request(\"help_center/articles\", params)\n        for article in data[\"articles\"]:\n            yield article\n\n        if not data.get(\"meta\", {}).get(\"has_more\"):\n            break\n        params[\"page[after]\"] = data[\"meta\"][\"after_cursor\"]\n\n\ndef _get_article_page(\n    client: ZendeskClient,\n    start_time: int | None = None,\n    after_cursor: str | None = None,\n    page_size: int = MAX_PAGE_SIZE,\n) -> ZendeskPageResponse:\n    params = {\"page[size]\": page_size, \"sort_by\": \"updated_at\", \"sort_order\": \"asc\"}\n    if start_time is not None:\n        params[\"start_time\"] = start_time\n    if after_cursor is not None:\n        params[\"page[after]\"] = after_cursor\n\n    data = client.make_request(\"help_center/articles\", params)\n    return ZendeskPageResponse(\n        data=data[\"articles\"],\n        meta=data[\"meta\"],\n        has_more=bool(data[\"meta\"].get(\"has_more\", False)),\n    )\n\n\ndef _get_tickets(\n    client: ZendeskClient, start_time: int | None = None\n) -> Iterator[dict[str, Any]]:\n    params = {\"start_time\": start_time or 0}\n\n    while True:\n        data = client.make_request(\"incremental/tickets.json\", params)\n        for ticket in data[\"tickets\"]:\n            yield ticket\n\n        if not data.get(\"end_of_stream\", False):\n            params[\"start_time\"] = data[\"end_time\"]\n        else:\n            break\n\n\n# TODO: maybe these don't need to be their own functions?\ndef _get_tickets_page(\n    client: ZendeskClient, start_time: int | None = None\n) -> ZendeskPageResponse:\n    params = {\"start_time\": start_time or 0}\n\n    # NOTE: for some reason zendesk doesn't seem to be respecting the start_time param\n    # in my local testing with very few tickets. We'll look into it if this becomes an\n    # issue in larger deployments\n    data = client.make_request(\"incremental/tickets.json\", params)\n    if data.get(\"error\") == \"SupportProductInactive\":\n        raise ValueError(\n            \"Zendesk Support Product is not active for this account, No tickets to index\"\n        )\n    return ZendeskPageResponse(\n        data=data[\"tickets\"],\n        meta={\"end_time\": data[\"end_time\"]},\n        has_more=not bool(data.get(\"end_of_stream\", False)),\n    )\n\n\ndef _fetch_author(\n    client: ZendeskClient, author_id: str | int\n) -> BasicExpertInfo | None:\n    # Skip fetching if author_id is invalid\n    # cast to str to avoid issues with zendesk changing their types\n    if not author_id or str(author_id) == \"-1\":\n        return None\n\n    try:\n        author_data = client.make_request(f\"users/{author_id}\", {})\n        user = author_data.get(\"user\")\n        return (\n            BasicExpertInfo(display_name=user.get(\"name\"), email=user.get(\"email\"))\n            if user and user.get(\"name\") and user.get(\"email\")\n            else None\n        )\n    except requests.exceptions.HTTPError:\n        # Handle any API errors gracefully\n        return None\n\n\ndef _article_to_document(\n    article: dict[str, Any],\n    content_tags: dict[str, str],\n    author_map: dict[str, BasicExpertInfo],\n    client: ZendeskClient,\n) -> tuple[dict[str, BasicExpertInfo] | None, Document]:\n    author_id = article.get(\"author_id\")\n    if not author_id:\n        author = None\n    else:\n        author = (\n            author_map.get(author_id)\n            if author_id in author_map\n            else _fetch_author(client, author_id)\n        )\n\n    new_author_mapping = {author_id: author} if author_id and author else None\n\n    updated_at = article.get(\"updated_at\")\n    update_time = time_str_to_utc(updated_at) if updated_at else None\n\n    # Build metadata\n    metadata: dict[str, str | list[str]] = {\n        \"labels\": [str(label) for label in article.get(\"label_names\", []) if label],\n        \"content_tags\": [\n            content_tags[tag_id]\n            for tag_id in article.get(\"content_tag_ids\", [])\n            if tag_id in content_tags\n        ],\n    }\n\n    # Remove empty values\n    metadata = {k: v for k, v in metadata.items() if v}\n\n    return new_author_mapping, Document(\n        id=f\"article:{article['id']}\",\n        sections=[\n            TextSection(\n                link=cast(str, article.get(\"html_url\")),\n                text=parse_html_page_basic(article[\"body\"]),\n            )\n        ],\n        source=DocumentSource.ZENDESK,\n        semantic_identifier=article[\"title\"],\n        doc_updated_at=update_time,\n        primary_owners=[author] if author else None,\n        metadata=metadata,\n    )\n\n\ndef _get_comment_text(\n    comment: dict[str, Any],\n    author_map: dict[str, BasicExpertInfo],\n    client: ZendeskClient,\n) -> tuple[dict[str, BasicExpertInfo] | None, str]:\n    author_id = comment.get(\"author_id\")\n    if not author_id:\n        author = None\n    else:\n        author = (\n            author_map.get(author_id)\n            if author_id in author_map\n            else _fetch_author(client, author_id)\n        )\n\n    new_author_mapping = {author_id: author} if author_id and author else None\n\n    comment_text = f\"Comment{' by ' + author.display_name if author and author.display_name else ''}\"\n    comment_text += f\"{' at ' + comment['created_at'] if comment.get('created_at') else ''}:\\n{comment['body']}\"\n\n    return new_author_mapping, comment_text\n\n\ndef _ticket_to_document(\n    ticket: dict[str, Any],\n    author_map: dict[str, BasicExpertInfo],\n    client: ZendeskClient,\n    default_subdomain: str,\n) -> tuple[dict[str, BasicExpertInfo] | None, Document]:\n    submitter_id = ticket.get(\"submitter\")\n    if not submitter_id:\n        submitter = None\n    else:\n        submitter = (\n            author_map.get(submitter_id)\n            if submitter_id in author_map\n            else _fetch_author(client, submitter_id)\n        )\n\n    new_author_mapping = (\n        {submitter_id: submitter} if submitter_id and submitter else None\n    )\n\n    updated_at = ticket.get(\"updated_at\")\n    update_time = time_str_to_utc(updated_at) if updated_at else None\n\n    metadata: dict[str, str | list[str]] = {}\n    if status := ticket.get(\"status\"):\n        metadata[\"status\"] = status\n    if priority := ticket.get(\"priority\"):\n        metadata[\"priority\"] = priority\n    if tags := ticket.get(\"tags\"):\n        metadata[\"tags\"] = tags\n    if ticket_type := ticket.get(\"type\"):\n        metadata[\"ticket_type\"] = ticket_type\n\n    # Fetch comments for the ticket\n    comments_data = client.make_request(f\"tickets/{ticket.get('id')}/comments\", {})\n    comments = comments_data.get(\"comments\", [])\n\n    comment_texts = []\n    for comment in comments:\n        new_author_mapping, comment_text = _get_comment_text(\n            comment, author_map, client\n        )\n        if new_author_mapping:\n            author_map.update(new_author_mapping)\n        comment_texts.append(comment_text)\n\n    comments_text = \"\\n\\n\".join(comment_texts)\n\n    subject = ticket.get(\"subject\")\n    full_text = f\"Ticket Subject:\\n{subject}\\n\\nComments:\\n{comments_text}\"\n\n    ticket_url = ticket.get(\"url\")\n    subdomain = (\n        ticket_url.split(\"//\")[1].split(\".zendesk.com\")[0]\n        if ticket_url\n        else default_subdomain\n    )\n\n    ticket_display_url = (\n        f\"https://{subdomain}.zendesk.com/agent/tickets/{ticket.get('id')}\"\n    )\n\n    return new_author_mapping, Document(\n        id=f\"zendesk_ticket_{ticket['id']}\",\n        sections=[TextSection(link=ticket_display_url, text=full_text)],\n        source=DocumentSource.ZENDESK,\n        semantic_identifier=f\"Ticket #{ticket['id']}: {subject or 'No Subject'}\",\n        doc_updated_at=update_time,\n        primary_owners=[submitter] if submitter else None,\n        metadata=metadata,\n    )\n\n\nclass ZendeskConnectorCheckpoint(ConnectorCheckpoint):\n    # We use cursor-based paginated retrieval for articles\n    after_cursor_articles: str | None\n\n    # We use timestamp-based paginated retrieval for tickets\n    next_start_time_tickets: int | None\n\n    cached_author_map: dict[str, BasicExpertInfo] | None\n    cached_content_tags: dict[str, str] | None\n\n\nclass ZendeskConnector(\n    SlimConnectorWithPermSync, CheckpointedConnector[ZendeskConnectorCheckpoint]\n):\n    def __init__(\n        self,\n        content_type: str = \"articles\",\n        calls_per_minute: int | None = None,\n    ) -> None:\n        self.content_type = content_type\n        self.subdomain = \"\"\n        # Fetch all tags ahead of time\n        self.content_tags: dict[str, str] = {}\n        self.calls_per_minute = calls_per_minute\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        # Subdomain is actually the whole URL\n        subdomain = (\n            credentials[\"zendesk_subdomain\"]\n            .replace(\"https://\", \"\")\n            .split(\".zendesk.com\")[0]\n        )\n        self.subdomain = subdomain\n\n        self.client = ZendeskClient(\n            subdomain,\n            credentials[\"zendesk_email\"],\n            credentials[\"zendesk_token\"],\n            calls_per_minute=self.calls_per_minute,\n        )\n        return None\n\n    @override\n    def load_from_checkpoint(\n        self,\n        start: SecondsSinceUnixEpoch,\n        end: SecondsSinceUnixEpoch,\n        checkpoint: ZendeskConnectorCheckpoint,\n    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:\n        if self.client is None:\n            raise ZendeskCredentialsNotSetUpError()\n\n        if checkpoint.cached_content_tags is None:\n            checkpoint.cached_content_tags = _get_content_tag_mapping(self.client)\n            return checkpoint  # save the content tags to the checkpoint\n        self.content_tags = checkpoint.cached_content_tags\n\n        if self.content_type == \"articles\":\n            checkpoint = yield from self._retrieve_articles(start, end, checkpoint)\n            return checkpoint\n        elif self.content_type == \"tickets\":\n            checkpoint = yield from self._retrieve_tickets(start, end, checkpoint)\n            return checkpoint\n        else:\n            raise ValueError(f\"Unsupported content_type: {self.content_type}\")\n\n    def _retrieve_articles(\n        self,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n        checkpoint: ZendeskConnectorCheckpoint,\n    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:\n        checkpoint = copy.deepcopy(checkpoint)\n        # This one is built on the fly as there may be more many more authors than tags\n        author_map: dict[str, BasicExpertInfo] = checkpoint.cached_author_map or {}\n        after_cursor = checkpoint.after_cursor_articles\n        doc_batch: list[Document] = []\n\n        response = _get_article_page(\n            self.client,\n            start_time=int(start) if start else None,\n            after_cursor=after_cursor,\n        )\n        articles = response.data\n        has_more = response.has_more\n        after_cursor = response.meta.get(\"after_cursor\")\n        for article in articles:\n            if (\n                article.get(\"body\") is None\n                or article.get(\"draft\")\n                or any(\n                    label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS\n                    for label in article.get(\"label_names\", [])\n                )\n            ):\n                continue\n\n            try:\n                new_author_map, document = _article_to_document(\n                    article, self.content_tags, author_map, self.client\n                )\n            except Exception as e:\n                yield ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=f\"{article.get('id')}\",\n                        document_link=article.get(\"html_url\", \"\"),\n                    ),\n                    failure_message=str(e),\n                    exception=e,\n                )\n                continue\n\n            if new_author_map:\n                author_map.update(new_author_map)\n\n            doc_batch.append(document)\n\n        if not has_more:\n            yield from doc_batch\n            checkpoint.has_more = False\n            return checkpoint\n\n        # Sometimes no documents are retrieved, but the cursor\n        # is still updated so the connector makes progress.\n        yield from doc_batch\n        checkpoint.after_cursor_articles = after_cursor\n\n        last_doc_updated_at = doc_batch[-1].doc_updated_at if doc_batch else None\n        checkpoint.has_more = bool(\n            end is None\n            or last_doc_updated_at is None\n            or last_doc_updated_at.timestamp() <= end\n        )\n        checkpoint.cached_author_map = (\n            author_map if len(author_map) <= MAX_AUTHOR_MAP_SIZE else None\n        )\n        return checkpoint\n\n    def _retrieve_tickets(\n        self,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,\n        checkpoint: ZendeskConnectorCheckpoint,\n    ) -> CheckpointOutput[ZendeskConnectorCheckpoint]:\n        checkpoint = copy.deepcopy(checkpoint)\n        if self.client is None:\n            raise ZendeskCredentialsNotSetUpError()\n\n        author_map: dict[str, BasicExpertInfo] = checkpoint.cached_author_map or {}\n\n        doc_batch: list[Document] = []\n        next_start_time = int(checkpoint.next_start_time_tickets or start or 0)\n        ticket_response = _get_tickets_page(self.client, start_time=next_start_time)\n        tickets = ticket_response.data\n        has_more = ticket_response.has_more\n        next_start_time = ticket_response.meta[\"end_time\"]\n        for ticket in tickets:\n            if ticket.get(\"status\") == \"deleted\":\n                continue\n\n            try:\n                new_author_map, document = _ticket_to_document(\n                    ticket=ticket,\n                    author_map=author_map,\n                    client=self.client,\n                    default_subdomain=self.subdomain,\n                )\n            except Exception as e:\n                yield ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=f\"{ticket.get('id')}\",\n                        document_link=ticket.get(\"url\", \"\"),\n                    ),\n                    failure_message=str(e),\n                    exception=e,\n                )\n                continue\n\n            if new_author_map:\n                author_map.update(new_author_map)\n\n            doc_batch.append(document)\n\n        if not has_more:\n            yield from doc_batch\n            checkpoint.has_more = False\n            return checkpoint\n\n        yield from doc_batch\n        checkpoint.next_start_time_tickets = next_start_time\n        last_doc_updated_at = doc_batch[-1].doc_updated_at if doc_batch else None\n        checkpoint.has_more = bool(\n            end is None\n            or last_doc_updated_at is None\n            or last_doc_updated_at.timestamp() <= end\n        )\n        checkpoint.cached_author_map = (\n            author_map if len(author_map) <= MAX_AUTHOR_MAP_SIZE else None\n        )\n        return checkpoint\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        slim_doc_batch: list[SlimDocument | HierarchyNode] = []\n        if self.content_type == \"articles\":\n            articles = _get_articles(\n                self.client, start_time=int(start) if start else None\n            )\n            for article in articles:\n                slim_doc_batch.append(\n                    SlimDocument(\n                        id=f\"article:{article['id']}\",\n                    )\n                )\n                if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:\n                    yield slim_doc_batch\n                    slim_doc_batch = []\n        elif self.content_type == \"tickets\":\n            tickets = _get_tickets(\n                self.client, start_time=int(start) if start else None\n            )\n            for ticket in tickets:\n                slim_doc_batch.append(\n                    SlimDocument(\n                        id=f\"zendesk_ticket_{ticket['id']}\",\n                    )\n                )\n                if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:\n                    yield slim_doc_batch\n                    slim_doc_batch = []\n        else:\n            raise ValueError(f\"Unsupported content_type: {self.content_type}\")\n        if slim_doc_batch:\n            yield slim_doc_batch\n\n    @override\n    def validate_connector_settings(self) -> None:\n        if self.client is None:\n            raise ZendeskCredentialsNotSetUpError()\n\n        try:\n            _get_article_page(self.client, start_time=0)\n        except HTTPError as e:\n            # Check for HTTP status codes\n            if e.response.status_code == 401:\n                raise CredentialExpiredError(\n                    \"Your Zendesk credentials appear to be invalid or expired (HTTP 401).\"\n                ) from e\n            elif e.response.status_code == 403:\n                raise InsufficientPermissionsError(\n                    \"Your Zendesk token does not have sufficient permissions (HTTP 403).\"\n                ) from e\n            elif e.response.status_code == 404:\n                raise ConnectorValidationError(\n                    \"Zendesk resource not found (HTTP 404).\"\n                ) from e\n            else:\n                raise ConnectorValidationError(\n                    f\"Unexpected Zendesk error (status={e.response.status_code}): {e}\"\n                ) from e\n\n    @override\n    def validate_checkpoint_json(\n        self, checkpoint_json: str\n    ) -> ZendeskConnectorCheckpoint:\n        return ZendeskConnectorCheckpoint.model_validate_json(checkpoint_json)\n\n    @override\n    def build_dummy_checkpoint(self) -> ZendeskConnectorCheckpoint:\n        return ZendeskConnectorCheckpoint(\n            after_cursor_articles=None,\n            next_start_time_tickets=None,\n            cached_author_map=None,\n            cached_content_tags=None,\n            has_more=True,\n        )\n\n\nif __name__ == \"__main__\":\n    import os\n\n    connector = ZendeskConnector()\n    connector.load_credentials(\n        {\n            \"zendesk_subdomain\": os.environ[\"ZENDESK_SUBDOMAIN\"],\n            \"zendesk_email\": os.environ[\"ZENDESK_EMAIL\"],\n            \"zendesk_token\": os.environ[\"ZENDESK_TOKEN\"],\n        }\n    )\n\n    current = time.time()\n    one_day_ago = current - 24 * 60 * 60  # 1 day\n    document_batches = connector.load_from_checkpoint(\n        one_day_ago,\n        current,\n        connector.build_dummy_checkpoint(),\n    )\n\n    print(next(document_batches))\n"
  },
  {
    "path": "backend/onyx/connectors/zulip/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/connectors/zulip/connector.py",
    "content": "import os\nimport tempfile\nimport urllib.parse\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\nfrom typing import Tuple\nfrom typing import Union\n\nfrom zulip import Client\n\nfrom onyx.configs.app_configs import INDEX_BATCH_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateDocumentsOutput\nfrom onyx.connectors.interfaces import LoadConnector\nfrom onyx.connectors.interfaces import PollConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.zulip.schemas import GetMessagesResponse\nfrom onyx.connectors.zulip.schemas import Message\nfrom onyx.connectors.zulip.utils import build_search_narrow\nfrom onyx.connectors.zulip.utils import call_api\nfrom onyx.connectors.zulip.utils import encode_zulip_narrow_operand\nfrom onyx.utils.logger import setup_logger\n\n# Potential improvements\n# 1. Group documents messages into topics, make 1 document per topic per week\n# 2. Add end date support once https://github.com/zulip/zulip/issues/25436 is solved\n\nlogger = setup_logger()\n\n\nclass ZulipConnector(LoadConnector, PollConnector):\n    def __init__(\n        self, realm_name: str, realm_url: str, batch_size: int = INDEX_BATCH_SIZE\n    ) -> None:\n        self.batch_size = batch_size\n        self.realm_name = realm_name\n\n        # Clean and normalize the URL\n        realm_url = realm_url.strip().lower()\n\n        # Remove any trailing slashes\n        realm_url = realm_url.rstrip(\"/\")\n\n        # Ensure the URL has a scheme\n        if not realm_url.startswith((\"http://\", \"https://\")):\n            realm_url = f\"https://{realm_url}\"\n\n        try:\n            parsed = urllib.parse.urlparse(realm_url)\n\n            # Extract the base domain without any paths or ports\n            netloc = parsed.netloc.split(\":\")[0]  # Remove port if present\n\n            if not netloc:\n                raise ValueError(\n                    f\"Invalid realm URL format: {realm_url}. URL must include a valid domain name.\"\n                )\n\n            # Always use HTTPS for security\n            self.base_url = f\"https://{netloc}\"\n            self.client: Client | None = None\n\n        except Exception as e:\n            raise ValueError(\n                f\"Failed to parse Zulip realm URL: {realm_url}. \"\n                f\"Please provide a URL in the format: domain.com or https://domain.com. \"\n                f\"Error: {str(e)}\"\n            )\n\n    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:\n        contents = credentials[\"zuliprc_content\"]\n        # The input field converts newlines to spaces in the provided\n        # zuliprc file. This reverts them back to newlines.\n        contents_spaces_to_newlines = contents.replace(\" \", \"\\n\")\n        # create a temporary zuliprc file\n        tempdir = tempfile.tempdir\n        if tempdir is None:\n            raise Exception(\"Could not determine tempfile directory\")\n        config_file = os.path.join(tempdir, f\"zuliprc-{self.realm_name}\")\n        with open(config_file, \"w\") as f:\n            f.write(contents_spaces_to_newlines)\n        self.client = Client(config_file=config_file)\n        return None\n\n    def _message_to_narrow_link(self, m: Message) -> str:\n        try:\n            stream_name = m.display_recipient  # assume str\n            stream_operand = encode_zulip_narrow_operand(f\"{m.stream_id}-{stream_name}\")\n            topic_operand = encode_zulip_narrow_operand(m.subject)\n\n            narrow_link = f\"{self.base_url}#narrow/stream/{stream_operand}/topic/{topic_operand}/near/{m.id}\"\n            return narrow_link\n        except Exception as e:\n            logger.error(f\"Error generating Zulip message link: {e}\")\n            # Fallback to a basic link that at least includes the base URL\n            return f\"{self.base_url}#narrow/id/{m.id}\"\n\n    def _get_message_batch(self, anchor: str) -> Tuple[bool, List[Message]]:\n        if self.client is None:\n            raise ConnectorMissingCredentialError(\"Zulip\")\n\n        logger.info(f\"Fetching messages starting with anchor={anchor}\")\n        request = build_search_narrow(\n            limit=INDEX_BATCH_SIZE, anchor=anchor, apply_md=False\n        )\n        response = GetMessagesResponse(**call_api(self.client.get_messages, request))\n\n        end = False\n        if len(response.messages) == 0 or response.found_oldest:\n            end = True\n\n        # reverse, so that the last message is the new anchor\n        # and the order is from newest to oldest\n        return end, response.messages[::-1]\n\n    def _message_to_doc(self, message: Message) -> Document:\n        text = f\"{message.sender_full_name}: {message.content}\"\n\n        try:\n            # Convert timestamps to UTC datetime objects\n            post_time = datetime.fromtimestamp(message.timestamp, tz=timezone.utc)\n            edit_time = (\n                datetime.fromtimestamp(message.last_edit_timestamp, tz=timezone.utc)\n                if message.last_edit_timestamp is not None\n                else None\n            )\n\n            # Use the most recent edit time if available, otherwise use post time\n            doc_time = edit_time if edit_time is not None else post_time\n\n        except (ValueError, TypeError) as e:\n            logger.warning(f\"Failed to parse timestamp for message {message.id}: {e}\")\n            post_time = None\n            edit_time = None\n            doc_time = None\n\n        metadata: Dict[str, Union[str, List[str]]] = {\n            \"stream_name\": str(message.display_recipient),\n            \"topic\": str(message.subject),\n            \"sender_name\": str(message.sender_full_name),\n            \"sender_email\": str(message.sender_email),\n            \"message_timestamp\": str(message.timestamp),\n            \"message_id\": str(message.id),\n            \"stream_id\": str(message.stream_id),\n            \"has_reactions\": str(len(message.reactions) > 0),\n            \"content_type\": str(message.content_type or \"text\"),\n        }\n\n        # Always include edit timestamp in metadata when available\n        if edit_time is not None:\n            metadata[\"edit_timestamp\"] = str(message.last_edit_timestamp)\n\n        return Document(\n            id=f\"{message.stream_id}__{message.id}\",\n            sections=[\n                TextSection(\n                    link=self._message_to_narrow_link(message),\n                    text=text,\n                )\n            ],\n            source=DocumentSource.ZULIP,\n            semantic_identifier=f\"{message.display_recipient} > {message.subject}\",\n            metadata=metadata,\n            doc_updated_at=doc_time,  # Use most recent edit time or post time\n        )\n\n    def _get_docs(\n        self, anchor: str, start: SecondsSinceUnixEpoch | None = None\n    ) -> Generator[Document, None, None]:\n        message: Message | None = None\n        while True:\n            end, message_batch = self._get_message_batch(anchor)\n\n            for message in message_batch:\n                if start is not None and float(message.timestamp) < start:\n                    return\n                yield self._message_to_doc(message)\n\n            if end or message is None:\n                return\n\n            # Last message is oldest, use as next anchor\n            anchor = str(message.id)\n\n    def _poll_source(\n        self,\n        start: SecondsSinceUnixEpoch | None,\n        end: SecondsSinceUnixEpoch | None,  # noqa: ARG002\n    ) -> GenerateDocumentsOutput:\n        # Since Zulip doesn't support searching by timestamp,\n        # we have to always start from the newest message\n        # and go backwards.\n        anchor = \"newest\"\n\n        docs: list[Document | HierarchyNode] = []\n        for doc in self._get_docs(anchor=anchor, start=start):\n            docs.append(doc)\n            if len(docs) == self.batch_size:\n                yield docs\n                docs = []\n        if docs:\n            yield docs\n\n    def load_from_state(self) -> GenerateDocumentsOutput:\n        return self._poll_source(start=None, end=None)\n\n    def poll_source(\n        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch\n    ) -> GenerateDocumentsOutput:\n        return self._poll_source(start, end)\n"
  },
  {
    "path": "backend/onyx/connectors/zulip/schemas.py",
    "content": "from typing import Any\nfrom typing import List\nfrom typing import Optional\nfrom typing import Union\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass Message(BaseModel):\n    id: int\n    sender_id: int\n    content: str\n    recipient_id: int\n    timestamp: int\n    client: str\n    is_me_message: bool\n    sender_full_name: str\n    sender_email: str\n    sender_realm_str: str\n    subject: str\n    topic_links: Optional[List[Any]] = None\n    last_edit_timestamp: Optional[int] = None\n    edit_history: Any = None\n    reactions: List[Any]\n    submessages: List[Any]\n    flags: List[str] = Field(default_factory=list)\n    display_recipient: Optional[str] = None\n    type: Optional[str] = None\n    stream_id: int\n    avatar_url: Optional[str]\n    content_type: Optional[str]\n    rendered_content: Optional[str] = None\n\n\nclass GetMessagesResponse(BaseModel):\n    result: str\n    msg: str\n    found_anchor: Optional[bool] = None\n    found_oldest: Optional[bool] = None\n    found_newest: Optional[bool] = None\n    history_limited: Optional[bool] = None\n    anchor: Optional[Union[str, int]] = None\n    messages: List[Message] = Field(default_factory=list)\n"
  },
  {
    "path": "backend/onyx/connectors/zulip/utils.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import Dict\nfrom typing import Optional\nfrom urllib.parse import quote\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass ZulipAPIError(Exception):\n    def __init__(self, code: Any = None, msg: str | None = None) -> None:\n        self.code = code\n        self.msg = msg\n\n    def __str__(self) -> str:\n        return (\n            f\"Error occurred during Zulip API call: {self.msg}\" + \"\"\n            if self.code is None\n            else f\" ({self.code})\"\n        )\n\n\nclass ZulipHTTPError(ZulipAPIError):\n    def __init__(self, msg: str | None = None, status_code: Any = None) -> None:\n        super().__init__(code=None, msg=msg)\n        self.status_code = status_code\n\n    def __str__(self) -> str:\n        return f\"HTTP error {self.status_code} occurred during Zulip API call\"\n\n\ndef __call_with_retry(fun: Callable, *args: Any, **kwargs: Any) -> Dict[str, Any]:\n    result = fun(*args, **kwargs)\n    if result.get(\"result\") == \"error\":\n        if result.get(\"code\") == \"RATE_LIMIT_HIT\":\n            retry_after = float(result[\"retry-after\"]) + 1\n            logger.warn(f\"Rate limit hit, retrying after {retry_after} seconds\")\n            time.sleep(retry_after)\n            return __call_with_retry(fun, *args)\n    return result\n\n\ndef __raise_if_error(response: dict[str, Any]) -> None:\n    if response.get(\"result\") == \"error\":\n        raise ZulipAPIError(\n            code=response.get(\"code\"),\n            msg=response.get(\"msg\"),\n        )\n    elif response.get(\"result\") == \"http-error\":\n        raise ZulipHTTPError(\n            msg=response.get(\"msg\"), status_code=response.get(\"status_code\")\n        )\n\n\ndef call_api(fun: Callable, *args: Any, **kwargs: Any) -> Dict[str, Any]:\n    response = __call_with_retry(fun, *args, **kwargs)\n    __raise_if_error(response)\n    return response\n\n\ndef build_search_narrow(\n    *,\n    stream: Optional[str] = None,\n    topic: Optional[str] = None,\n    limit: int = 100,\n    content: Optional[str] = None,\n    apply_md: bool = False,\n    anchor: str = \"newest\",\n) -> Dict[str, Any]:\n    narrow_filters = []\n\n    if stream:\n        narrow_filters.append({\"operator\": \"stream\", \"operand\": stream})\n\n    if topic:\n        narrow_filters.append({\"operator\": \"topic\", \"operand\": topic})\n\n    if content:\n        narrow_filters.append({\"operator\": \"has\", \"operand\": content})\n\n    if not stream and not topic and not content:\n        narrow_filters.append({\"operator\": \"streams\", \"operand\": \"public\"})\n\n    narrow = {\n        \"anchor\": anchor,\n        \"num_before\": limit,\n        \"num_after\": 0,\n        \"narrow\": narrow_filters,\n    }\n    narrow[\"apply_markdown\"] = apply_md\n\n    return narrow\n\n\ndef encode_zulip_narrow_operand(value: str) -> str:\n    # like https://github.com/zulip/zulip/blob/1577662a6/static/js/hash_util.js#L18-L25\n    # safe characters necessary to make Python match Javascript's escaping behaviour,\n    # see: https://stackoverflow.com/a/74439601\n    return quote(value, safe=\"!~*'()\").replace(\".\", \"%2E\").replace(\"%\", \".\")\n"
  },
  {
    "path": "backend/onyx/context/search/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/context/search/enums.py",
    "content": "\"\"\"NOTE: this needs to be separate from models.py because of circular imports.\nBoth search/models.py and db/models.py import enums from this file AND\nsearch/models.py imports from db/models.py.\"\"\"\n\nfrom enum import Enum\n\n\nclass RecencyBiasSetting(str, Enum):\n    FAVOR_RECENT = \"favor_recent\"  # 2x decay rate\n    BASE_DECAY = \"base_decay\"\n    NO_DECAY = \"no_decay\"\n    # Determine based on query if to use base_decay or favor_recent\n    AUTO = \"auto\"\n\n\nclass QueryType(str, Enum):\n    \"\"\"\n    The type of first-pass query to use for hybrid search.\n\n    The values of this enum are injected into the ranking profile name which\n    should match the name in the schema.\n    \"\"\"\n\n    KEYWORD = \"keyword\"\n    SEMANTIC = \"semantic\"\n\n\nclass SearchType(str, Enum):\n    KEYWORD = \"keyword\"\n    SEMANTIC = \"semantic\"\n    INTERNET = \"internet\"\n"
  },
  {
    "path": "backend/onyx/context/search/federated/models.py",
    "content": "from datetime import datetime\nfrom typing import TypedDict\n\nfrom pydantic import BaseModel\n\nfrom onyx.onyxbot.slack.models import ChannelType\n\n\nclass ChannelMetadata(TypedDict):\n    \"\"\"Type definition for cached channel metadata.\"\"\"\n\n    name: str\n    type: ChannelType\n    is_private: bool\n    is_member: bool\n\n\nclass SlackMessage(BaseModel):\n    document_id: str\n    channel_id: str\n    message_id: str\n    thread_id: str | None\n    link: str\n    metadata: dict[str, str | list[str]]\n    timestamp: datetime\n    recency_bias: float\n    semantic_identifier: str\n    text: str\n    highlighted_texts: set[str]\n    slack_score: float\n"
  },
  {
    "path": "backend/onyx/context/search/federated/slack_search.py",
    "content": "import json\nimport re\nimport time\nfrom datetime import datetime\nfrom typing import Any\nfrom typing import cast\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import ValidationError\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG\nfrom onyx.configs.app_configs import MAX_SLACK_THREAD_CONTEXT_MESSAGES\nfrom onyx.configs.app_configs import SLACK_THREAD_CONTEXT_BATCH_SIZE\nfrom onyx.configs.chat_configs import DOC_TIME_DECAY\nfrom onyx.connectors.models import IndexingDocument\nfrom onyx.connectors.models import TextSection\nfrom onyx.context.search.federated.models import ChannelMetadata\nfrom onyx.context.search.federated.models import SlackMessage\nfrom onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES\nfrom onyx.context.search.federated.slack_search_utils import build_channel_query_filter\nfrom onyx.context.search.federated.slack_search_utils import build_slack_queries\nfrom onyx.context.search.federated.slack_search_utils import get_channel_type\nfrom onyx.context.search.federated.slack_search_utils import (\n    get_channel_type_for_missing_scope,\n)\nfrom onyx.context.search.federated.slack_search_utils import is_recency_query\nfrom onyx.context.search.federated.slack_search_utils import should_include_message\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.document import DocumentSource\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.document_index_utils import (\n    get_multipass_config,\n)\nfrom onyx.federated_connectors.slack.models import SlackEntities\nfrom onyx.indexing.chunker import Chunker\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.onyxbot.slack.models import ChannelType\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.federated.models import FederatedConnectorDetail\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\n\nlogger = setup_logger()\n\nHIGHLIGHT_START_CHAR = \"\\ue000\"\nHIGHLIGHT_END_CHAR = \"\\ue001\"\n\nCHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours\nUSER_PROFILE_CACHE_TTL = 60 * 60 * 24  # 24 hours\nSLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include\nCHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching\nCHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)\n\n\ndef fetch_and_cache_channel_metadata(\n    access_token: str, team_id: str, include_private: bool = True\n) -> dict[str, ChannelMetadata]:\n    \"\"\"\n    Fetch ALL channel metadata in one API call and cache it.\n\n    Returns a dict mapping channel_id -> metadata including name, type, etc.\n    This replaces multiple conversations.info calls with a single conversations.list.\n\n    Note: We ALWAYS fetch all channel types (including private) and cache them together.\n    This ensures a single cache entry per team, avoiding duplicate API calls.\n    \"\"\"\n    # Use tenant-specific Redis client\n    redis_client = get_redis_client()\n    # (tenant_id prefix is added automatically by TenantRedis)\n    cache_key = f\"slack_federated_search:{team_id}:channels:metadata\"\n\n    try:\n        cached = redis_client.get(cache_key)\n        if cached:\n            logger.debug(f\"Channel metadata cache HIT for team {team_id}\")\n            cached_str: str = (\n                cached.decode(\"utf-8\") if isinstance(cached, bytes) else str(cached)\n            )\n            cached_data = cast(dict[str, ChannelMetadata], json.loads(cached_str))\n            logger.debug(f\"Loaded {len(cached_data)} channels from cache\")\n            if not include_private:\n                filtered: dict[str, ChannelMetadata] = {\n                    k: v\n                    for k, v in cached_data.items()\n                    if v.get(\"type\") != ChannelType.PRIVATE_CHANNEL.value\n                }\n                logger.debug(f\"Filtered to {len(filtered)} channels (exclude private)\")\n                return filtered\n            return cached_data\n    except Exception as e:\n        logger.warning(f\"Error reading from channel metadata cache: {e}\")\n\n    # Cache miss - fetch from Slack API with retry logic\n    logger.debug(f\"Channel metadata cache MISS for team {team_id} - fetching from API\")\n    slack_client = WebClient(token=access_token)\n    channel_metadata: dict[str, ChannelMetadata] = {}\n\n    # Retry logic with exponential backoff\n    last_exception = None\n    available_channel_types = ALL_CHANNEL_TYPES.copy()\n\n    for attempt in range(CHANNEL_METADATA_MAX_RETRIES):\n        try:\n            # Use available channel types (may be reduced if scopes are missing)\n            channel_types = \",\".join(available_channel_types)\n\n            # Fetch all channels in one call\n            cursor = None\n            channel_count = 0\n            while True:\n                response = slack_client.conversations_list(\n                    types=channel_types,\n                    exclude_archived=True,\n                    limit=1000,\n                    cursor=cursor,\n                )\n                response.validate()\n\n                # Cast response.data to dict for type checking\n                response_data: dict[str, Any] = response.data  # type: ignore\n                for ch in response_data.get(\"channels\", []):\n                    channel_id = ch.get(\"id\")\n                    if not channel_id:\n                        continue\n\n                    # Determine channel type\n                    channel_type_enum = get_channel_type(channel_info=ch)\n                    channel_type = ChannelType(channel_type_enum.value)\n\n                    channel_metadata[channel_id] = {\n                        \"name\": ch.get(\"name\", \"\"),\n                        \"type\": channel_type,\n                        \"is_private\": ch.get(\"is_private\", False),\n                        \"is_member\": ch.get(\"is_member\", False),\n                    }\n                    channel_count += 1\n\n                cursor = response_data.get(\"response_metadata\", {}).get(\"next_cursor\")\n                if not cursor:\n                    break\n\n            logger.info(f\"Fetched {channel_count} channels for team {team_id}\")\n\n            # Cache the results\n            try:\n                redis_client.set(\n                    cache_key,\n                    json.dumps(channel_metadata),\n                    ex=CHANNEL_METADATA_CACHE_TTL,\n                )\n                logger.info(\n                    f\"Cached {channel_count} channels for team {team_id} (TTL: {CHANNEL_METADATA_CACHE_TTL}s, key: {cache_key})\"\n                )\n            except Exception as e:\n                logger.warning(f\"Error caching channel metadata: {e}\")\n\n            return channel_metadata\n\n        except SlackApiError as e:\n            last_exception = e\n\n            # Extract all needed fields from response upfront\n            if e.response:\n                error_response = e.response.get(\"error\", \"\")\n                needed_scope = e.response.get(\"needed\", \"\")\n            else:\n                error_response = \"\"\n                needed_scope = \"\"\n\n            # Check if this is a missing_scope error\n            if error_response == \"missing_scope\":\n                # Get the channel type that requires this scope\n                missing_channel_type = get_channel_type_for_missing_scope(needed_scope)\n\n                if (\n                    missing_channel_type\n                    and missing_channel_type in available_channel_types\n                ):\n                    # Remove the problematic channel type and retry\n                    available_channel_types.remove(missing_channel_type)\n                    logger.warning(\n                        f\"Missing scope '{needed_scope}' for channel type '{missing_channel_type}'. \"\n                        f\"Continuing with reduced channel types: {available_channel_types}\"\n                    )\n                    # Don't count this as a retry attempt, just try again with fewer types\n                    if available_channel_types:  # Only continue if we have types left\n                        continue\n                    # Otherwise fall through to retry logic\n                else:\n                    logger.error(\n                        f\"Missing scope '{needed_scope}' but could not map to channel type or already removed. \"\n                        f\"Response: {e.response}\"\n                    )\n\n            # For other errors, use retry logic\n            if attempt < CHANNEL_METADATA_MAX_RETRIES - 1:\n                retry_delay = CHANNEL_METADATA_RETRY_DELAY * (2**attempt)\n                logger.warning(\n                    f\"Failed to fetch channel metadata (attempt {attempt + 1}/{CHANNEL_METADATA_MAX_RETRIES}): {e}. \"\n                    f\"Retrying in {retry_delay}s...\"\n                )\n                time.sleep(retry_delay)\n            else:\n                logger.error(\n                    f\"Failed to fetch channel metadata after {CHANNEL_METADATA_MAX_RETRIES} attempts: {e}\"\n                )\n\n    # If we have some channel metadata despite errors, return it with a warning\n    if channel_metadata:\n        logger.warning(\n            f\"Returning partial channel metadata ({len(channel_metadata)} channels) despite errors. Last error: {last_exception}\"\n        )\n        return channel_metadata\n\n    # If we exhausted all retries and have no data, raise the last exception\n    if last_exception:\n        raise SlackApiError(\n            f\"Channel metadata fetching failed after {CHANNEL_METADATA_MAX_RETRIES} attempts\",\n            last_exception.response,\n        )\n\n    return {}\n\n\ndef get_available_channels(\n    access_token: str, team_id: str, include_private: bool = False\n) -> list[str]:\n    \"\"\"Fetch list of available channel names using cached metadata.\"\"\"\n    metadata = fetch_and_cache_channel_metadata(access_token, team_id, include_private)\n    return [meta[\"name\"] for meta in metadata.values() if meta[\"name\"]]\n\n\ndef get_cached_user_profile(\n    access_token: str, team_id: str, user_id: str\n) -> str | None:\n    \"\"\"\n    Get a user's display name from cache or fetch from Slack API.\n\n    Uses Redis caching to avoid repeated API calls and rate limiting.\n    Returns the user's real_name or email, or None if not found.\n    \"\"\"\n    redis_client = get_redis_client()\n    cache_key = f\"slack_federated_search:{team_id}:user:{user_id}\"\n\n    # Check cache first\n    try:\n        cached = redis_client.get(cache_key)\n        if cached is not None:\n            cached_str = (\n                cached.decode(\"utf-8\") if isinstance(cached, bytes) else str(cached)\n            )\n            # Empty string means user was not found previously\n            return cached_str if cached_str else None\n    except Exception as e:\n        logger.debug(f\"Error reading user profile cache: {e}\")\n\n    # Cache miss - fetch from Slack API\n    slack_client = WebClient(token=access_token)\n    try:\n        response = slack_client.users_profile_get(user=user_id)\n        response.validate()\n        profile: dict[str, Any] = response.get(\"profile\", {})\n        name: str | None = profile.get(\"real_name\") or profile.get(\"email\")\n\n        # Cache the result (empty string for not found)\n        try:\n            redis_client.set(\n                cache_key,\n                name or \"\",\n                ex=USER_PROFILE_CACHE_TTL,\n            )\n        except Exception as e:\n            logger.debug(f\"Error caching user profile: {e}\")\n\n        return name\n\n    except SlackApiError as e:\n        error_str = str(e)\n        if \"user_not_found\" in error_str:\n            logger.debug(\n                f\"User {user_id} not found in Slack workspace (likely deleted/deactivated)\"\n            )\n        elif \"ratelimited\" in error_str:\n            # Don't cache rate limit errors - we'll retry later\n            logger.debug(f\"Rate limited fetching user {user_id}, will retry later\")\n            return None\n        else:\n            logger.warning(f\"Could not fetch profile for user {user_id}: {e}\")\n\n        # Cache negative result to avoid repeated lookups for missing users\n        try:\n            redis_client.set(cache_key, \"\", ex=USER_PROFILE_CACHE_TTL)\n        except Exception:\n            pass\n\n        return None\n\n\ndef batch_get_user_profiles(\n    access_token: str, team_id: str, user_ids: set[str]\n) -> dict[str, str]:\n    \"\"\"\n    Batch fetch user profiles with caching.\n\n    Returns a dict mapping user_id -> display_name for users that were found.\n    \"\"\"\n    result: dict[str, str] = {}\n\n    for user_id in user_ids:\n        name = get_cached_user_profile(access_token, team_id, user_id)\n        if name:\n            result[user_id] = name\n\n    return result\n\n\ndef _extract_channel_data_from_entities(\n    entities: dict[str, Any] | None,\n    channel_metadata_dict: dict[str, ChannelMetadata] | None,\n) -> list[str] | None:\n    \"\"\"Extract available channels list from metadata based on entity configuration.\n\n    Args:\n        entities: Entity filter configuration dict\n        channel_metadata_dict: Pre-fetched channel metadata dictionary\n\n    Returns:\n        List of available channel names, or None if not needed\n    \"\"\"\n    if not entities or not channel_metadata_dict:\n        return None\n\n    try:\n        parsed_entities = SlackEntities(**entities)\n        # Only extract if we have exclusions or channel filters\n        if parsed_entities.exclude_channels or parsed_entities.channels:\n            # Extract channel names from metadata dict\n            return [\n                meta[\"name\"]\n                for meta in channel_metadata_dict.values()\n                if meta[\"name\"]\n                and (\n                    parsed_entities.include_private_channels\n                    or meta.get(\"type\") != ChannelType.PRIVATE_CHANNEL.value\n                )\n            ]\n    except ValidationError:\n        logger.debug(\"Failed to parse entities for channel data extraction\")\n\n    return None\n\n\ndef _should_skip_channel(\n    channel_id: str,\n    allowed_private_channel: str | None,\n    bot_token: str | None,\n    access_token: str,\n    include_dm: bool,\n    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,\n) -> bool:\n    \"\"\"Bot context filtering: skip private channels unless explicitly allowed.\n\n    Uses pre-fetched channel metadata when available to avoid API calls.\n    \"\"\"\n    if bot_token and not include_dm:\n        try:\n            # First try to use pre-fetched metadata from cache\n            if channel_metadata_dict and channel_id in channel_metadata_dict:\n                channel_meta = channel_metadata_dict[channel_id]\n                channel_type_str = channel_meta.get(\"type\", \"\")\n                is_private_or_dm = channel_type_str in [\n                    ChannelType.PRIVATE_CHANNEL.value,\n                    ChannelType.IM.value,\n                    ChannelType.MPIM.value,\n                ]\n                if is_private_or_dm and channel_id != allowed_private_channel:\n                    return True\n                return False\n\n            # Fallback: API call only if not in cache (should be rare)\n            token_to_use = bot_token or access_token\n            channel_client = WebClient(token=token_to_use)\n            channel_info = channel_client.conversations_info(channel=channel_id)\n\n            if isinstance(channel_info.data, dict):\n                channel_data = channel_info.data.get(\"channel\", {})\n                channel_type = get_channel_type(channel_info=channel_data)\n                is_private_or_dm = channel_type in [\n                    ChannelType.PRIVATE_CHANNEL,\n                    ChannelType.IM,\n                    ChannelType.MPIM,\n                ]\n\n                if is_private_or_dm and channel_id != allowed_private_channel:\n                    return True\n        except Exception as e:\n            logger.warning(\n                f\"Could not determine channel type for {channel_id}, filtering out: {e}\"\n            )\n            return True\n    return False\n\n\nclass SlackQueryResult(BaseModel):\n    \"\"\"Result from a single Slack query including stats.\"\"\"\n\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n    messages: list[SlackMessage]\n    filtered_channels: list[str]  # Channels filtered out during this query\n\n\ndef query_slack(\n    query_string: str,\n    access_token: str,\n    limit: int | None = None,\n    allowed_private_channel: str | None = None,\n    bot_token: str | None = None,\n    include_dm: bool = False,\n    entities: dict[str, Any] | None = None,\n    available_channels: list[str] | None = None,\n    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,\n) -> SlackQueryResult:\n\n    # Check if query has channel override (user specified channels in query)\n    has_channel_override = query_string.startswith(\"__CHANNEL_OVERRIDE__\")\n\n    if has_channel_override:\n        # Remove the marker and use the query as-is (already has channel filters)\n        final_query = query_string.replace(\"__CHANNEL_OVERRIDE__\", \"\").strip()\n    else:\n        # Normal flow: build channel filters from entity config\n        channel_filter = \"\"\n        if entities:\n            channel_filter = build_channel_query_filter(entities, available_channels)\n\n        final_query = query_string\n        if channel_filter:\n            # Add channel filter to query\n            final_query = f\"{query_string} {channel_filter}\"\n\n    logger.info(f\"Final query to slack: {final_query}\")\n\n    # Detect if query asks for most recent results\n    sort_by_time = is_recency_query(query_string)\n\n    slack_client = WebClient(token=access_token)\n    try:\n        search_params: dict[str, Any] = {\n            \"query\": final_query,\n            \"count\": limit,\n            \"highlight\": True,\n        }\n\n        # Sort by timestamp for recency-focused queries, otherwise by relevance\n        if sort_by_time:\n            search_params[\"sort\"] = \"timestamp\"\n            search_params[\"sort_dir\"] = \"desc\"\n\n        response = slack_client.search_messages(**search_params)\n        response.validate()\n\n        messages: dict[str, Any] = response.get(\"messages\", {})\n        matches: list[dict[str, Any]] = messages.get(\"matches\", [])\n\n        logger.info(f\"Slack search found {len(matches)} messages\")\n    except SlackApiError as slack_error:\n        logger.error(f\"Slack API error in search_messages: {slack_error}\")\n        logger.error(\n            f\"Slack API error details: status={slack_error.response.status_code}, error={slack_error.response.get('error')}\"\n        )\n        if \"not_allowed_token_type\" in str(slack_error):\n            # Log token type prefix\n            token_prefix = access_token[:4] if len(access_token) >= 4 else \"unknown\"\n            logger.error(f\"TOKEN TYPE ERROR: access_token type: {token_prefix}...\")\n        return SlackQueryResult(messages=[], filtered_channels=[])\n\n    # convert matches to slack messages\n    slack_messages: list[SlackMessage] = []\n    filtered_channels: list[str] = []\n    for match in matches:\n        text: str | None = match.get(\"text\")\n        permalink: str | None = match.get(\"permalink\")\n        message_id: str | None = match.get(\"ts\")\n        channel_id: str | None = match.get(\"channel\", {}).get(\"id\")\n        channel_name: str | None = match.get(\"channel\", {}).get(\"name\")\n        username: str | None = match.get(\"username\")\n        if not username:\n            # Fallback: try to get from user field if username is missing\n            user_info = match.get(\"user\", \"\")\n            if isinstance(user_info, str) and user_info:\n                username = user_info  # Use user ID as fallback\n            else:\n                username = \"unknown_user\"\n        score: float = match.get(\"score\", 0.0)\n        if (  # can't use any() because of type checking :(\n            not text\n            or not permalink\n            or not message_id\n            or not channel_id\n            or not channel_name\n            or not username\n        ):\n            continue\n\n        # Apply channel filtering if needed\n        if _should_skip_channel(\n            channel_id,\n            allowed_private_channel,\n            bot_token,\n            access_token,\n            include_dm,\n            channel_metadata_dict,\n        ):\n            filtered_channels.append(f\"{channel_name}({channel_id})\")\n            continue\n\n        # generate thread id and document id\n        thread_id = (\n            permalink.split(\"?thread_ts=\", 1)[1] if \"?thread_ts=\" in permalink else None\n        )\n        document_id = f\"{channel_id}_{message_id}\"\n\n        decay_factor = DOC_TIME_DECAY\n        doc_time = datetime.fromtimestamp(float(message_id))\n        doc_age_years = (datetime.now() - doc_time).total_seconds() / (\n            365 * 24 * 60 * 60\n        )\n        recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)\n        metadata: dict[str, str | list[str]] = {\n            \"channel\": channel_name,\n            \"time\": doc_time.isoformat(),\n        }\n\n        # extract out the highlighted texts\n        highlighted_texts = set(\n            re.findall(\n                rf\"{re.escape(HIGHLIGHT_START_CHAR)}(.*?){re.escape(HIGHLIGHT_END_CHAR)}\",\n                text,\n            )\n        )\n        cleaned_text = text.replace(HIGHLIGHT_START_CHAR, \"\").replace(\n            HIGHLIGHT_END_CHAR, \"\"\n        )\n\n        # get the semantic identifier\n        snippet = (\n            cleaned_text[:50].rstrip() + \"...\" if len(cleaned_text) > 50 else text\n        ).replace(\"\\n\", \" \")\n        doc_sem_id = f\"{username} in #{channel_name}: {snippet}\"\n\n        slack_messages.append(\n            SlackMessage(\n                document_id=document_id,\n                channel_id=channel_id,\n                message_id=message_id,\n                thread_id=thread_id,\n                link=permalink,\n                metadata=metadata,\n                timestamp=doc_time,\n                recency_bias=recency_bias,\n                semantic_identifier=doc_sem_id,\n                text=f\"{username}: {cleaned_text}\",\n                highlighted_texts=highlighted_texts,\n                slack_score=score,\n            )\n        )\n\n    return SlackQueryResult(\n        messages=slack_messages, filtered_channels=filtered_channels\n    )\n\n\ndef merge_slack_messages(\n    query_results: list[SlackQueryResult],\n) -> tuple[list[SlackMessage], dict[str, SlackMessage], set[str]]:\n    \"\"\"Merge messages from multiple query results, deduplicating by document_id.\n\n    Returns:\n        Tuple of (merged_messages, docid_to_message, all_filtered_channels)\n    \"\"\"\n    merged_messages: list[SlackMessage] = []\n    docid_to_message: dict[str, SlackMessage] = {}\n    all_filtered_channels: set[str] = set()\n\n    for result in query_results:\n        # Collect filtered channels from all queries\n        all_filtered_channels.update(result.filtered_channels)\n\n        for message in result.messages:\n            if message.document_id in docid_to_message:\n                # update the score and highlighted texts, rest should be identical\n                docid_to_message[message.document_id].slack_score = max(\n                    docid_to_message[message.document_id].slack_score,\n                    message.slack_score,\n                )\n                docid_to_message[message.document_id].highlighted_texts.update(\n                    message.highlighted_texts\n                )\n                continue\n\n            # add the message to the list\n            docid_to_message[message.document_id] = message\n            merged_messages.append(message)\n\n    # re-sort by score\n    merged_messages.sort(key=lambda x: x.slack_score, reverse=True)\n\n    return merged_messages, docid_to_message, all_filtered_channels\n\n\nclass SlackRateLimitError(Exception):\n    \"\"\"Raised when Slack API returns a rate limit error (429).\"\"\"\n\n\nclass ThreadContextResult:\n    \"\"\"Result wrapper for thread context fetch that captures error type.\"\"\"\n\n    __slots__ = (\"text\", \"is_rate_limited\", \"is_error\")\n\n    def __init__(\n        self, text: str, is_rate_limited: bool = False, is_error: bool = False\n    ):\n        self.text = text\n        self.is_rate_limited = is_rate_limited\n        self.is_error = is_error\n\n    @classmethod\n    def success(cls, text: str) -> \"ThreadContextResult\":\n        return cls(text)\n\n    @classmethod\n    def rate_limited(cls, original_text: str) -> \"ThreadContextResult\":\n        return cls(original_text, is_rate_limited=True)\n\n    @classmethod\n    def error(cls, original_text: str) -> \"ThreadContextResult\":\n        return cls(original_text, is_error=True)\n\n\ndef _fetch_thread_context(\n    message: SlackMessage, access_token: str, team_id: str | None = None\n) -> ThreadContextResult:\n    \"\"\"\n    Fetch thread context for a message, returning a result object.\n\n    Returns ThreadContextResult with:\n    - success: enriched thread text\n    - rate_limited: original text + flag indicating we should stop\n    - error: original text for other failures (graceful degradation)\n    \"\"\"\n    channel_id = message.channel_id\n    thread_id = message.thread_id\n    message_id = message.message_id\n\n    # If not a thread, return original text as success\n    if thread_id is None:\n        return ThreadContextResult.success(message.text)\n\n    slack_client = WebClient(token=access_token, timeout=30)\n    try:\n        response = slack_client.conversations_replies(\n            channel=channel_id,\n            ts=thread_id,\n        )\n        response.validate()\n        messages: list[dict[str, Any]] = response.get(\"messages\", [])\n    except SlackApiError as e:\n        # Check for rate limit error specifically\n        if e.response and e.response.status_code == 429:\n            logger.warning(\n                f\"Slack rate limit hit while fetching thread context for {channel_id}/{thread_id}\"\n            )\n            return ThreadContextResult.rate_limited(message.text)\n        # For other Slack errors, log and return original text\n        logger.error(f\"Slack API error in thread context fetch: {e}\")\n        return ThreadContextResult.error(message.text)\n    except Exception as e:\n        # Network errors, timeouts, etc - treat as recoverable error\n        logger.error(f\"Unexpected error in thread context fetch: {e}\")\n        return ThreadContextResult.error(message.text)\n\n    # If empty response or single message (not a thread), return original text\n    if len(messages) <= 1:\n        return ThreadContextResult.success(message.text)\n\n    # Build thread text from thread starter + context window around matched message\n    thread_text = _build_thread_text(\n        messages, message_id, thread_id, access_token, team_id, slack_client\n    )\n    return ThreadContextResult.success(thread_text)\n\n\ndef _build_thread_text(\n    messages: list[dict[str, Any]],\n    message_id: str,\n    thread_id: str,\n    access_token: str,\n    team_id: str | None,\n    slack_client: WebClient,\n) -> str:\n    \"\"\"Build the thread text from messages.\"\"\"\n    msg_text = messages[0].get(\"text\", \"\")\n    msg_sender = messages[0].get(\"user\", \"\")\n    thread_text = f\"<@{msg_sender}>: {msg_text}\"\n\n    thread_text += \"\\n\\nReplies:\"\n    if thread_id == message_id:\n        message_id_idx = 0\n    else:\n        message_id_idx = next(\n            (i for i, msg in enumerate(messages) if msg.get(\"ts\") == message_id), 0\n        )\n        if not message_id_idx:\n            return thread_text\n\n        start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)\n\n        if start_idx > 1:\n            thread_text += \"\\n...\"\n\n        for i in range(start_idx, message_id_idx):\n            msg_text = messages[i].get(\"text\", \"\")\n            msg_sender = messages[i].get(\"user\", \"\")\n            thread_text += f\"\\n\\n<@{msg_sender}>: {msg_text}\"\n\n        msg_text = messages[message_id_idx].get(\"text\", \"\")\n        msg_sender = messages[message_id_idx].get(\"user\", \"\")\n        thread_text += f\"\\n\\n<@{msg_sender}>: {msg_text}\"\n\n    # Add following replies\n    len_replies = 0\n    for msg in messages[message_id_idx + 1 :]:\n        msg_text = msg.get(\"text\", \"\")\n        msg_sender = msg.get(\"user\", \"\")\n        reply = f\"\\n\\n<@{msg_sender}>: {msg_text}\"\n        thread_text += reply\n\n        len_replies += len(reply)\n        if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:\n            thread_text += \"\\n...\"\n            break\n\n    # Replace user IDs with names using cached lookups\n    userids: set[str] = set(re.findall(r\"<@([A-Z0-9]+)>\", thread_text))\n\n    if team_id:\n        user_profiles = batch_get_user_profiles(access_token, team_id, userids)\n        for userid, name in user_profiles.items():\n            thread_text = thread_text.replace(f\"<@{userid}>\", name)\n    else:\n        for userid in userids:\n            try:\n                response = slack_client.users_profile_get(user=userid)\n                response.validate()\n                profile: dict[str, Any] = response.get(\"profile\", {})\n                user_name: str | None = profile.get(\"real_name\") or profile.get(\"email\")\n            except SlackApiError as e:\n                if \"user_not_found\" in str(e):\n                    logger.debug(\n                        f\"User {userid} not found (likely deleted/deactivated)\"\n                    )\n                else:\n                    logger.warning(f\"Could not fetch profile for user {userid}: {e}\")\n                continue\n            if not user_name:\n                continue\n            thread_text = thread_text.replace(f\"<@{userid}>\", user_name)\n\n    return thread_text\n\n\ndef fetch_thread_contexts_with_rate_limit_handling(\n    slack_messages: list[SlackMessage],\n    access_token: str,\n    team_id: str | None,\n    batch_size: int = SLACK_THREAD_CONTEXT_BATCH_SIZE,\n    max_messages: int | None = MAX_SLACK_THREAD_CONTEXT_MESSAGES,\n) -> list[str]:\n    \"\"\"\n    Fetch thread contexts in controlled batches, stopping on rate limit.\n\n    Distinguishes between error types:\n    - Rate limit (429): Stop processing further batches\n    - Other errors: Continue processing (graceful degradation)\n\n    Args:\n        slack_messages: Messages to fetch thread context for (should be sorted by relevance)\n        access_token: Slack OAuth token\n        team_id: Slack team ID for user profile caching\n        batch_size: Number of concurrent API calls per batch\n        max_messages: Maximum messages to fetch thread context for (None = no limit)\n\n    Returns:\n        List of thread texts, one per input message.\n        Messages beyond max_messages or after rate limit get their original text.\n    \"\"\"\n    if not slack_messages:\n        return []\n\n    # Limit how many messages we fetch thread context for (if max_messages is set)\n    if max_messages and max_messages < len(slack_messages):\n        messages_for_context = slack_messages[:max_messages]\n        messages_without_context = slack_messages[max_messages:]\n    else:\n        messages_for_context = slack_messages\n        messages_without_context = []\n\n    logger.info(\n        f\"Fetching thread context for {len(messages_for_context)} of {len(slack_messages)} messages \"\n        f\"(batch_size={batch_size}, max={max_messages or 'unlimited'})\"\n    )\n\n    results: list[str] = []\n    rate_limited = False\n    total_batches = (len(messages_for_context) + batch_size - 1) // batch_size\n    rate_limit_batch = 0\n\n    # Process in batches\n    for i in range(0, len(messages_for_context), batch_size):\n        current_batch = i // batch_size + 1\n\n        if rate_limited:\n            # Skip remaining batches, use original message text\n            remaining = messages_for_context[i:]\n            skipped_batches = total_batches - rate_limit_batch\n            logger.warning(\n                f\"Slack rate limit: skipping {len(remaining)} remaining messages \"\n                f\"({skipped_batches} of {total_batches} batches). \"\n                f\"Successfully enriched {len(results)} messages before rate limit.\"\n            )\n            results.extend([msg.text for msg in remaining])\n            break\n\n        batch = messages_for_context[i : i + batch_size]\n\n        # _fetch_thread_context returns ThreadContextResult (never raises)\n        # allow_failures=True is a safety net for any unexpected exceptions\n        batch_results: list[ThreadContextResult | None] = (\n            run_functions_tuples_in_parallel(\n                [\n                    (\n                        _fetch_thread_context,\n                        (msg, access_token, team_id),\n                    )\n                    for msg in batch\n                ],\n                allow_failures=True,\n                max_workers=batch_size,\n            )\n        )\n\n        # Process results - ThreadContextResult tells us exactly what happened\n        for j, result in enumerate(batch_results):\n            if result is None:\n                # Unexpected exception (shouldn't happen) - use original text, stop\n                logger.error(f\"Unexpected None result for message {j} in batch\")\n                results.append(batch[j].text)\n                rate_limited = True\n                rate_limit_batch = current_batch\n            elif result.is_rate_limited:\n                # Rate limit hit - use original text, stop further batches\n                results.append(result.text)\n                rate_limited = True\n                rate_limit_batch = current_batch\n            else:\n                # Success or recoverable error - use the text (enriched or original)\n                results.append(result.text)\n\n        if rate_limited:\n            logger.warning(\n                f\"Slack rate limit (429) hit at batch {current_batch}/{total_batches} \"\n                f\"while fetching thread context. Stopping further API calls.\"\n            )\n\n    # Add original text for messages we didn't fetch context for\n    results.extend([msg.text for msg in messages_without_context])\n\n    return results\n\n\ndef convert_slack_score(slack_score: float) -> float:\n    \"\"\"\n    Convert slack score to a score between 0 and 1.\n    Will affect UI ordering and LLM ordering, but not the pruning.\n    I.e., should have very little effect on the search/answer quality.\n    \"\"\"\n    return max(0.0, min(1.0, slack_score / 90_000))\n\n\n@log_function_time(print_only=True)\ndef slack_retrieval(\n    query: ChunkIndexRequest,\n    access_token: str,\n    db_session: Session | None = None,\n    connector: FederatedConnectorDetail | None = None,  # noqa: ARG001\n    entities: dict[str, Any] | None = None,\n    limit: int | None = None,\n    slack_event_context: SlackContext | None = None,\n    bot_token: str | None = None,  # Add bot token parameter\n    team_id: str | None = None,\n    # Pre-fetched data — when provided, avoids DB query (no session needed)\n    search_settings: SearchSettings | None = None,\n) -> list[InferenceChunk]:\n    \"\"\"\n    Main entry point for Slack federated search with entity filtering.\n\n    Applies entity filtering including:\n    - Channel selection and exclusion\n    - Date range extraction and enforcement\n    - DM/private channel filtering\n    - Multi-layer caching\n\n    Args:\n        query: Search query object\n        access_token: User OAuth access token\n        db_session: Database session (optional if search_settings provided)\n        connector: Federated connector detail (unused, kept for backwards compat)\n        entities: Connector-level config (entity filtering configuration)\n        limit: Maximum number of results\n        slack_event_context: Context when called from Slack bot\n        bot_token: Bot token for enhanced permissions\n        team_id: Slack team/workspace ID\n\n    Returns:\n        List of InferenceChunk objects\n    \"\"\"\n    # Use connector-level config\n    entities = entities or {}\n\n    if not entities:\n        logger.debug(\"No entity configuration found, using defaults\")\n    else:\n        logger.debug(f\"Using entity configuration: {entities}\")\n\n    # Extract limit from entity config if not explicitly provided\n    query_limit = limit\n    if entities:\n        try:\n            parsed_entities = SlackEntities(**entities)\n            if limit is None:\n                query_limit = parsed_entities.max_messages_per_query\n                logger.debug(f\"Using max_messages_per_query from config: {query_limit}\")\n        except Exception as e:\n            logger.warning(f\"Error parsing entities for limit: {e}\")\n            if limit is None:\n                query_limit = 100  # Fallback default\n    elif limit is None:\n        query_limit = 100  # Default when no entities and no limit provided\n\n    # Pre-fetch channel metadata from Redis cache and extract available channels\n    # This avoids repeated Redis lookups during parallel search execution\n    available_channels = None\n    channel_metadata_dict = None\n    if team_id:\n        # Always fetch all channel types (include_private=True) to ensure single cache entry\n        channel_metadata_dict = fetch_and_cache_channel_metadata(\n            access_token, team_id, include_private=True\n        )\n\n        # Extract available channels list if needed for pattern matching\n        available_channels = _extract_channel_data_from_entities(\n            entities, channel_metadata_dict\n        )\n\n    # Query slack with entity filtering\n    llm = get_default_llm()\n    query_strings = build_slack_queries(query, llm, entities, available_channels)\n\n    # Determine filtering based on entities OR context (bot)\n    include_dm = False\n    allowed_private_channel = None\n\n    # Bot context overrides (if entities not specified)\n    if slack_event_context and not entities:\n        channel_type = slack_event_context.channel_type\n        if channel_type == ChannelType.IM:  # DM with user\n            include_dm = True\n        if channel_type == ChannelType.PRIVATE_CHANNEL:\n            allowed_private_channel = slack_event_context.channel_id\n            logger.debug(\n                f\"Private channel context: will only allow messages from {allowed_private_channel} + public channels\"\n            )\n\n    # Build search tasks\n    search_tasks = [\n        (\n            query_slack,\n            (\n                query_string,\n                access_token,\n                query_limit,\n                allowed_private_channel,\n                bot_token,\n                include_dm,\n                entities,\n                available_channels,\n                channel_metadata_dict,\n            ),\n        )\n        for query_string in query_strings\n    ]\n\n    # If include_dm is True AND we're not already searching all channels,\n    # add additional searches without channel filters.\n    # This allows searching DMs/group DMs while still searching the specified channels.\n    # Skip this if search_all_channels is already True (would be duplicate queries).\n    if (\n        entities\n        and entities.get(\"include_dm\")\n        and not entities.get(\"search_all_channels\")\n    ):\n        # Create a minimal entities dict that won't add channel filters\n        # This ensures we search ALL conversations (DMs, group DMs, private channels)\n        # BUT we still want to exclude channels specified in exclude_channels\n        dm_entities = {\n            \"include_dm\": True,\n            \"include_private_channels\": entities.get(\"include_private_channels\", False),\n            \"default_search_days\": entities.get(\"default_search_days\", 30),\n            \"search_all_channels\": True,\n            \"channels\": None,\n            \"exclude_channels\": entities.get(\n                \"exclude_channels\"\n            ),  # ALWAYS apply exclude_channels\n        }\n\n        for query_string in query_strings:\n            search_tasks.append(\n                (\n                    query_slack,\n                    (\n                        query_string,\n                        access_token,\n                        query_limit,\n                        allowed_private_channel,\n                        bot_token,\n                        include_dm,\n                        dm_entities,\n                        available_channels,\n                        channel_metadata_dict,\n                    ),\n                )\n            )\n\n    # Execute searches in parallel\n    results = run_functions_tuples_in_parallel(search_tasks)\n\n    # Calculate stats for consolidated logging\n    total_raw_messages = sum(len(r.messages) for r in results)\n\n    # Merge and post-filter results\n    slack_messages, docid_to_message, query_filtered_channels = merge_slack_messages(\n        results\n    )\n    messages_after_dedup = len(slack_messages)\n\n    # Post-filter by channel type (DM, private channel, etc.)\n    # NOTE: We must post-filter because Slack's search.messages API only supports\n    # filtering by channel NAME (via in:#channel syntax), not by channel TYPE.\n    # There's no way to specify \"only public channels\" or \"exclude DMs\" in the query.\n    # Start with channels filtered during query execution, then add post-filter channels\n    filtered_out_channels: set[str] = set(query_filtered_channels)\n    if entities and team_id:\n        # Use pre-fetched channel metadata to avoid cache misses\n        # Pass it directly instead of relying on Redis cache\n\n        filtered_messages = []\n        for msg in slack_messages:\n            # Pass pre-fetched metadata to avoid cache lookups\n            channel_type = get_channel_type(\n                channel_id=msg.channel_id,\n                channel_metadata=channel_metadata_dict,\n            )\n            if should_include_message(channel_type, entities):\n                filtered_messages.append(msg)\n            else:\n                # Track unique channel name for summary\n                channel_name = msg.metadata.get(\"channel\", msg.channel_id)\n                filtered_out_channels.add(f\"{channel_name}({msg.channel_id})\")\n\n        slack_messages = filtered_messages\n\n    slack_messages = slack_messages[: limit or len(slack_messages)]\n\n    # Log consolidated summary with request ID for correlation\n    request_id = (\n        slack_event_context.message_ts[:10]\n        if slack_event_context and slack_event_context.message_ts\n        else \"no-ctx\"\n    )\n    logger.info(\n        f\"[req:{request_id}] Slack federated search: {len(search_tasks)} queries, \"\n        f\"{total_raw_messages} raw msgs -> {messages_after_dedup} after dedup -> \"\n        f\"{len(slack_messages)} final\"\n        + (\n            f\", filtered channels: {sorted(filtered_out_channels)}\"\n            if filtered_out_channels\n            else \"\"\n        )\n    )\n\n    if not slack_messages:\n        return []\n\n    # Fetch thread context with rate limit handling and message limiting\n    # Messages are already sorted by relevance (slack_score), so top N get full context\n    thread_texts = fetch_thread_contexts_with_rate_limit_handling(\n        slack_messages=slack_messages,\n        access_token=access_token,\n        team_id=team_id,\n    )\n    for slack_message, thread_text in zip(slack_messages, thread_texts):\n        slack_message.text = thread_text\n\n    # get the highlighted texts from shortest to longest\n    highlighted_texts: set[str] = set()\n    for slack_message in slack_messages:\n        highlighted_texts.update(slack_message.highlighted_texts)\n    sorted_highlighted_texts = sorted(highlighted_texts, key=len)\n\n    # For queries without highlights (e.g., empty recency queries), we should keep all chunks\n    has_highlights = len(sorted_highlighted_texts) > 0\n\n    # convert slack messages to index documents\n    index_docs: list[IndexingDocument] = []\n    for slack_message in slack_messages:\n        section: TextSection = TextSection(\n            text=slack_message.text, link=slack_message.link\n        )\n        index_docs.append(\n            IndexingDocument(\n                id=slack_message.document_id,\n                sections=[section],\n                processed_sections=[section],\n                source=DocumentSource.SLACK,\n                title=slack_message.semantic_identifier,\n                semantic_identifier=slack_message.semantic_identifier,\n                metadata=slack_message.metadata,\n                doc_updated_at=slack_message.timestamp,\n            )\n        )\n\n    # chunk index docs into doc aware chunks\n    # a single index doc can get split into multiple chunks\n    if search_settings is None:\n        if db_session is None:\n            raise ValueError(\"Either db_session or search_settings must be provided\")\n        search_settings = get_current_search_settings(db_session)\n    embedder = DefaultIndexingEmbedder.from_db_search_settings(\n        search_settings=search_settings\n    )\n    multipass_config = get_multipass_config(search_settings)\n    enable_contextual_rag = (\n        search_settings.enable_contextual_rag or ENABLE_CONTEXTUAL_RAG\n    )\n    chunker = Chunker(\n        tokenizer=embedder.embedding_model.tokenizer,\n        enable_multipass=multipass_config.multipass_indexing,\n        enable_large_chunks=multipass_config.enable_large_chunks,\n        enable_contextual_rag=enable_contextual_rag,\n    )\n    chunks = chunker.chunk(index_docs)\n\n    # prune chunks without any highlighted texts\n    # BUT: for recency queries without keywords, keep all chunks\n    relevant_chunks: list[DocAwareChunk] = []\n    chunkid_to_match_highlight: dict[str, str] = {}\n\n    if not has_highlights:\n        # No highlighted terms - keep all chunks (recency query)\n        for chunk in chunks:\n            chunk_id = f\"{chunk.source_document.id}__{chunk.chunk_id}\"\n            relevant_chunks.append(chunk)\n            chunkid_to_match_highlight[chunk_id] = chunk.content  # No highlighting\n            if limit and len(relevant_chunks) >= limit:\n                break\n    else:\n        # Prune chunks that don't contain highlighted terms\n        for chunk in chunks:\n            match_highlight = chunk.content\n            for highlight in sorted_highlighted_texts:  # faster than re sub\n                match_highlight = match_highlight.replace(\n                    highlight, f\"<hi>{highlight}</hi>\"\n                )\n\n            # if nothing got replaced, the chunk is irrelevant\n            if len(match_highlight) == len(chunk.content):\n                continue\n\n            chunk_id = f\"{chunk.source_document.id}__{chunk.chunk_id}\"\n            relevant_chunks.append(chunk)\n            chunkid_to_match_highlight[chunk_id] = match_highlight\n            if limit and len(relevant_chunks) >= limit:\n                break\n\n    # convert to inference chunks\n    top_chunks: list[InferenceChunk] = []\n    for chunk in relevant_chunks:\n        document_id = chunk.source_document.id\n        chunk_id = f\"{document_id}__{chunk.chunk_id}\"\n\n        top_chunks.append(\n            InferenceChunk(\n                chunk_id=chunk.chunk_id,\n                blurb=chunk.blurb,\n                content=chunk.content,\n                source_links=chunk.source_links,\n                image_file_id=chunk.image_file_id,\n                section_continuation=chunk.section_continuation,\n                semantic_identifier=docid_to_message[document_id].semantic_identifier,\n                document_id=document_id,\n                source_type=DocumentSource.SLACK,\n                title=chunk.title_prefix,\n                boost=0,\n                score=convert_slack_score(docid_to_message[document_id].slack_score),\n                hidden=False,\n                is_relevant=None,\n                relevance_explanation=\"\",\n                metadata=docid_to_message[document_id].metadata,\n                match_highlights=[chunkid_to_match_highlight[chunk_id]],\n                doc_summary=\"\",\n                chunk_context=\"\",\n                updated_at=docid_to_message[document_id].timestamp,\n                is_federated=True,\n            )\n        )\n\n    return top_chunks\n"
  },
  {
    "path": "backend/onyx/context/search/federated/slack_search_utils.py",
    "content": "import fnmatch\nimport json\nimport re\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\n\nfrom pydantic import ValidationError\n\nfrom onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS\nfrom onyx.context.search.federated.models import ChannelMetadata\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.federated_connectors.slack.models import SlackEntities\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET\nfrom onyx.onyxbot.slack.models import ChannelType\nfrom onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT\nfrom onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Constants for date extraction heuristics\nDEFAULT_RECENCY_DAYS = 7\nDEFAULT_LATELY_DAYS = 14\nDAYS_PER_WEEK = 7\nDAYS_PER_MONTH = 30\nMAX_CONTENT_WORDS = 3\n\n# Punctuation to strip from words during analysis\nWORD_PUNCTUATION = \".,!?;:\\\"'#\"\n\nRECENCY_KEYWORDS = [\"recent\", \"latest\", \"newest\", \"last\"]\n\n# All Slack channel types for fetching metadata\nALL_CHANNEL_TYPES = [\n    ChannelType.PUBLIC_CHANNEL.value,\n    ChannelType.IM.value,\n    ChannelType.MPIM.value,\n    ChannelType.PRIVATE_CHANNEL.value,\n]\n\n# Map Slack API scopes to their corresponding channel types\n# This is used for graceful degradation when scopes are missing\nSCOPE_TO_CHANNEL_TYPE_MAP = {\n    \"mpim:read\": ChannelType.MPIM.value,\n    \"mpim:history\": ChannelType.MPIM.value,\n    \"im:read\": ChannelType.IM.value,\n    \"im:history\": ChannelType.IM.value,\n    \"groups:read\": ChannelType.PRIVATE_CHANNEL.value,\n    \"groups:history\": ChannelType.PRIVATE_CHANNEL.value,\n    \"channels:read\": ChannelType.PUBLIC_CHANNEL.value,\n    \"channels:history\": ChannelType.PUBLIC_CHANNEL.value,\n}\n\n\ndef get_channel_type_for_missing_scope(scope: str) -> str | None:\n    \"\"\"Get the channel type that requires a specific Slack scope.\n\n    Args:\n        scope: The Slack API scope (e.g., 'mpim:read', 'im:history')\n\n    Returns:\n        The channel type string if scope is recognized, None otherwise\n\n    Examples:\n        >>> get_channel_type_for_missing_scope('mpim:read')\n        'mpim'\n        >>> get_channel_type_for_missing_scope('im:read')\n        'im'\n        >>> get_channel_type_for_missing_scope('unknown:scope')\n        None\n    \"\"\"\n    return SCOPE_TO_CHANNEL_TYPE_MAP.get(scope)\n\n\ndef _parse_llm_code_block_response(response: str) -> str:\n    \"\"\"Remove code block markers from LLM response if present.\n\n    Handles responses wrapped in triple backticks (```) by removing\n    the opening and closing markers.\n\n    Args:\n        response: Raw LLM response string\n\n    Returns:\n        Cleaned response with code block markers removed\n    \"\"\"\n    response_clean = response.strip()\n    if response_clean.startswith(\"```\"):\n        lines = response_clean.split(\"\\n\")\n        lines = lines[1:]\n        if lines and lines[-1].strip() == \"```\":\n            lines = lines[:-1]\n        response_clean = \"\\n\".join(lines)\n    return response_clean\n\n\ndef is_recency_query(query: str) -> bool:\n    \"\"\"Check if a query is primarily about recency (not content + recency).\n\n    Returns True only for pure recency queries like \"recent messages\" or \"latest updates\",\n    but False for queries with content + recency like \"golf scores last saturday\".\n    \"\"\"\n    # Check if query contains recency keywords\n    has_recency_keyword = any(\n        re.search(rf\"\\b{re.escape(keyword)}\\b\", query, flags=re.IGNORECASE)\n        for keyword in RECENCY_KEYWORDS\n    )\n\n    if not has_recency_keyword:\n        return False\n\n    # Get combined stop words (English + Slack-specific)\n    all_stop_words = _get_combined_stop_words()\n\n    # Extract content words (excluding stop words)\n    query_lower = query.lower()\n    words = query_lower.split()\n\n    # Count content words (not stop words, length > 2)\n    content_word_count = 0\n    for word in words:\n        clean_word = word.strip(WORD_PUNCTUATION)\n        if clean_word and len(clean_word) > 2 and clean_word not in all_stop_words:\n            content_word_count += 1\n\n    # If query has significant content words (>= 2), it's not a pure recency query\n    # Examples:\n    # - \"recent messages\" -> content_word_count = 0 -> pure recency\n    # - \"golf scores last saturday\" -> content_word_count = 3 (golf, scores, saturday) -> not pure recency\n    return content_word_count < 2\n\n\ndef extract_date_range_from_query(\n    query: str,\n    llm: LLM,\n    default_search_days: int,\n) -> int:\n    query_lower = query.lower()\n\n    if re.search(r\"\\btoday(?:\\'?s)?\\b\", query_lower):\n        return 0\n\n    if re.search(r\"\\byesterday\\b\", query_lower):\n        return min(1, default_search_days)\n\n    # Handle \"last [day of week]\" - e.g., \"last monday\", \"last saturday\"\n    days_of_week = [\n        \"monday\",\n        \"tuesday\",\n        \"wednesday\",\n        \"thursday\",\n        \"friday\",\n        \"saturday\",\n        \"sunday\",\n    ]\n    for day in days_of_week:\n        if re.search(rf\"\\b(?:last|this)\\s+{day}\\b\", query_lower):\n            # Assume last occurrence of that day was within the past week\n            return min(DAYS_PER_WEEK, default_search_days)\n\n    match = re.search(r\"\\b(?:last|past)\\s+(\\d+)\\s+days?\\b\", query_lower)\n    if match:\n        days = int(match.group(1))\n        return min(days, default_search_days)\n\n    if re.search(r\"\\b(?:last|past|this)\\s+week\\b\", query_lower):\n        return min(DAYS_PER_WEEK, default_search_days)\n\n    match = re.search(r\"\\b(?:last|past)\\s+(\\d+)\\s+weeks?\\b\", query_lower)\n    if match:\n        weeks = int(match.group(1))\n        return min(weeks * DAYS_PER_WEEK, default_search_days)\n\n    if re.search(r\"\\b(?:last|past|this)\\s+month\\b\", query_lower):\n        return min(DAYS_PER_MONTH, default_search_days)\n\n    match = re.search(r\"\\b(?:last|past)\\s+(\\d+)\\s+months?\\b\", query_lower)\n    if match:\n        months = int(match.group(1))\n        return min(months * DAYS_PER_MONTH, default_search_days)\n\n    if re.search(r\"\\brecent(?:ly)?\\b\", query_lower):\n        return min(DEFAULT_RECENCY_DAYS, default_search_days)\n\n    if re.search(r\"\\blately\\b\", query_lower):\n        return min(DEFAULT_LATELY_DAYS, default_search_days)\n\n    try:\n        prompt = SLACK_DATE_EXTRACTION_PROMPT.format(query=query)\n        prompt_msg = UserMessage(content=prompt)\n\n        # Call LLM with Braintrust tracing\n        with llm_generation_span(\n            llm=llm, flow=\"slack_date_extraction\", input_messages=[prompt_msg]\n        ) as span_generation:\n            llm_response = llm.invoke(prompt_msg)\n            record_llm_response(span_generation, llm_response)\n            response = llm_response_to_string(llm_response)\n\n        response_clean = _parse_llm_code_block_response(response)\n\n        try:\n            data = json.loads(response_clean)\n            if not isinstance(data, dict):\n                logger.debug(\n                    f\"LLM date extraction returned non-dict response for query: \"\n                    f\"'{query}', using default: {default_search_days} days\"\n                )\n                return default_search_days\n\n            days_back = data.get(\"days_back\")\n            if days_back is None:\n                logger.debug(\n                    f\"LLM date extraction returned null for query: '{query}', using default: {default_search_days} days\"\n                )\n                return default_search_days\n\n            if not isinstance(days_back, (int, float)):\n                logger.debug(\n                    f\"LLM date extraction returned non-numeric days_back for \"\n                    f\"query: '{query}', using default: {default_search_days} days\"\n                )\n                return default_search_days\n\n        except json.JSONDecodeError:\n            logger.debug(\n                f\"Failed to parse LLM date extraction response for query: '{query}' \"\n                f\"(response: '{response_clean}'), \"\n                f\"using default: {default_search_days} days\"\n            )\n            return default_search_days\n\n        return min(int(days_back), default_search_days)\n\n    except Exception as e:\n        logger.warning(f\"Error extracting date range with LLM for query '{query}': {e}\")\n        return default_search_days\n\n\ndef matches_exclude_pattern(channel_name: str, patterns: list[str]) -> bool:\n    if not patterns:\n        return False\n\n    channel_norm = channel_name.lower().strip().lstrip(\"#\")\n\n    for pattern in patterns:\n        pattern_norm = pattern.lower().strip().lstrip(\"#\")\n        if fnmatch.fnmatch(channel_norm, pattern_norm):\n            return True\n\n    return False\n\n\ndef build_channel_query_filter(\n    parsed_entities: SlackEntities | dict[str, Any],\n    available_channels: list[str] | None = None,\n) -> str:\n    # Parse entities if dict\n    try:\n        if isinstance(parsed_entities, dict):\n            entities = SlackEntities(**parsed_entities)\n        else:\n            entities = parsed_entities\n    except ValidationError:\n        return \"\"\n\n    search_all_channels = entities.search_all_channels\n\n    if search_all_channels:\n        if not entities.exclude_channels:\n            return \"\"\n\n        # Can't apply exclusions without available_channels\n        if not available_channels:\n            return \"\"\n\n        excluded_channels = [\n            ch\n            for ch in available_channels\n            if matches_exclude_pattern(ch, entities.exclude_channels)\n        ]\n        normalized_excluded = [ch.lstrip(\"#\") for ch in excluded_channels]\n\n        exclusion_filters = [f\"-in:#{channel}\" for channel in normalized_excluded]\n        return \" \".join(exclusion_filters)\n\n    if not entities.channels:\n        return \"\"\n\n    included_channels: list[str] = []\n    for pattern in entities.channels:\n        pattern_norm = pattern.lstrip(\"#\")\n        if \"*\" in pattern_norm or \"?\" in pattern_norm:\n            # Glob patterns require available_channels\n            if available_channels:\n                matching = [\n                    ch\n                    for ch in available_channels\n                    if fnmatch.fnmatch(ch.lstrip(\"#\").lower(), pattern_norm.lower())\n                ]\n                included_channels.extend(matching)\n        else:\n            # Exact match: use directly or verify against available_channels\n            if not available_channels or pattern_norm in [\n                ch.lstrip(\"#\") for ch in available_channels\n            ]:\n                included_channels.append(pattern_norm)\n\n    # Apply exclusions to included channels\n    if entities.exclude_channels:\n        included_channels = [\n            ch\n            for ch in included_channels\n            if not matches_exclude_pattern(ch, entities.exclude_channels)\n        ]\n\n    if not included_channels:\n        return \"\"\n\n    normalized_channels = [ch.lstrip(\"#\") for ch in included_channels]\n    filters = [f\"in:#{channel}\" for channel in normalized_channels]\n    return \" \".join(filters)\n\n\ndef get_channel_type(\n    channel_info: dict[str, Any] | None = None,\n    channel_id: str | None = None,\n    channel_metadata: dict[str, ChannelMetadata] | None = None,\n) -> ChannelType:\n    \"\"\"\n    Determine channel type from channel info dict or by looking up channel_id.\n\n    Args:\n        channel_info: Channel info dict from Slack API (direct mode)\n        channel_id: Channel ID to look up (lookup mode)\n        channel_metadata: Pre-fetched metadata dict (for lookup mode)\n\n    Returns:\n        ChannelType enum\n    \"\"\"\n    if channel_info is not None:\n        if channel_info.get(\"is_im\"):\n            return ChannelType.IM\n        if channel_info.get(\"is_mpim\"):\n            return ChannelType.MPIM\n        if channel_info.get(\"is_private\"):\n            return ChannelType.PRIVATE_CHANNEL\n        return ChannelType.PUBLIC_CHANNEL\n\n    # Lookup mode: get type from pre-fetched metadata\n    if channel_id and channel_metadata:\n        ch_meta = channel_metadata.get(channel_id)\n        if ch_meta:\n            type_str = ch_meta.get(\"type\")\n            if type_str == ChannelType.IM.value:\n                return ChannelType.IM\n            elif type_str == ChannelType.MPIM.value:\n                return ChannelType.MPIM\n            elif type_str == ChannelType.PRIVATE_CHANNEL.value:\n                return ChannelType.PRIVATE_CHANNEL\n            return ChannelType.PUBLIC_CHANNEL\n\n    return ChannelType.PUBLIC_CHANNEL\n\n\ndef should_include_message(channel_type: ChannelType, entities: dict[str, Any]) -> bool:\n    include_dm = entities.get(\"include_dm\", False)\n    include_group_dm = entities.get(\"include_group_dm\", False)\n    include_private = entities.get(\"include_private_channels\", False)\n\n    if channel_type == ChannelType.IM:\n        return include_dm\n    if channel_type == ChannelType.MPIM:\n        return include_group_dm\n    if channel_type == ChannelType.PRIVATE_CHANNEL:\n        return include_private\n    return True\n\n\ndef extract_channel_references_from_query(query_text: str) -> set[str]:\n    \"\"\"Extract channel names referenced in the query text.\n\n    Only matches explicit channel references with prepositions or # symbols:\n    - \"in the office channel\"\n    - \"from the office channel\"\n    - \"in #office\"\n    - \"from #office\"\n\n    Does NOT match generic phrases like \"slack discussions\" or \"team channel\".\n\n    Args:\n        query_text: The user's query text\n\n    Returns:\n        Set of channel names (without # prefix)\n    \"\"\"\n    channel_references = set()\n    query_lower = query_text.lower()\n\n    # Only match channels with explicit prepositions (in/from) or # prefix\n    # This prevents false positives like \"slack discussions\" being interpreted as channel \"slack\"\n    channel_patterns = [\n        r\"\\bin\\s+(?:the\\s+)?([a-z0-9_-]+)\\s+(?:slack\\s+)?channels?\\b\",  # \"in the office channel\"\n        r\"\\bfrom\\s+(?:the\\s+)?([a-z0-9_-]+)\\s+(?:slack\\s+)?channels?\\b\",  # \"from the office channel\"\n        r\"\\bin[:\\s]*#([a-z0-9_-]+)\\b\",  # \"in #office\" or \"in:#office\"\n        r\"\\bfrom[:\\s]*#([a-z0-9_-]+)\\b\",  # \"from #office\" or \"from:#office\"\n    ]\n\n    for pattern in channel_patterns:\n        matches = re.finditer(pattern, query_lower)\n        for match in matches:\n            channel_references.add(match.group(1))\n\n    return channel_references\n\n\ndef validate_channel_references(\n    channel_references: set[str],\n    entities: dict[str, Any],\n    available_channels: list[str] | None,\n) -> None:\n    \"\"\"Validate that referenced channels exist and are allowed by entity config.\n\n    Args:\n        channel_references: Set of channel names extracted from query\n        entities: Entity configuration dict\n        available_channels: List of available channel names in workspace\n\n    Raises:\n        ValueError: If channel doesn't exist, is excluded, or not in inclusion list\n    \"\"\"\n    if not channel_references or not entities:\n        return\n\n    try:\n        parsed_entities = SlackEntities(**entities)\n\n        for channel_name in channel_references:\n            # Check if channel exists\n            if available_channels is not None:\n                # Normalize for comparison (available_channels may or may not have #)\n                normalized_available = [\n                    ch.lstrip(\"#\").lower() for ch in available_channels\n                ]\n                if channel_name.lower() not in normalized_available:\n                    raise ValueError(\n                        f\"Channel '{channel_name}' does not exist in your Slack workspace. \"\n                        f\"Please check the channel name and try again.\"\n                    )\n\n            # Check if channel is in exclusion list\n            if parsed_entities.exclude_channels:\n                if matches_exclude_pattern(\n                    channel_name, parsed_entities.exclude_channels\n                ):\n                    raise ValueError(\n                        f\"Channel '{channel_name}' is excluded from search by your configuration. \"\n                        f\"Please update your connector settings to search this channel.\"\n                    )\n\n            # Check if channel is in inclusion list (when search_all_channels is False)\n            if not parsed_entities.search_all_channels:\n                if parsed_entities.channels:\n                    # Normalize channel lists for comparison\n                    normalized_channels = [\n                        ch.lstrip(\"#\").lower() for ch in parsed_entities.channels\n                    ]\n                    if channel_name.lower() not in normalized_channels:\n                        raise ValueError(\n                            f\"Channel '{channel_name}' is not in your configured channel list. \"\n                            f\"Please update your connector settings to include this channel.\"\n                        )\n\n    except ValidationError:\n        # If entities are malformed, skip validation\n        pass\n\n\ndef build_channel_override_query(channel_references: set[str], time_filter: str) -> str:\n    \"\"\"Build a Slack query with ONLY channel filters and time filter (no keywords).\n\n    Args:\n        channel_references: Set of channel names to search\n        time_filter: Time filter string (e.g., \" after:2025-11-07\")\n\n    Returns:\n        Query string with __CHANNEL_OVERRIDE__ marker\n    \"\"\"\n    normalized_channels = [ch.lstrip(\"#\") for ch in channel_references]\n    channel_filter = \" \".join([f\"in:#{channel}\" for channel in normalized_channels])\n    return f\"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}\"\n\n\n# Slack-specific stop words (in addition to standard English stop words)\n# These include Slack-specific terms and temporal/recency keywords\nSLACK_SPECIFIC_STOP_WORDS = frozenset(\n    RECENCY_KEYWORDS\n    + [\n        \"dm\",\n        \"dms\",\n        \"message\",\n        \"messages\",\n        \"channel\",\n        \"channels\",\n        \"slack\",\n        \"post\",\n        \"posted\",\n        \"posting\",\n        \"sent\",\n    ]\n)\n\n\ndef _get_combined_stop_words() -> frozenset[str]:\n    \"\"\"Get combined English + Slack-specific stop words.\n\n    Returns a frozenset of stop words for filtering content words.\n\n    Note: Currently only supports English stop words. Non-English queries\n    may have suboptimal content word extraction. Future enhancement could\n    detect query language and load appropriate stop words.\n    \"\"\"\n    return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS\n\n\ndef extract_content_words_from_recency_query(\n    query_text: str, channel_references: set[str]\n) -> list[str]:\n    \"\"\"Extract meaningful content words from a recency query.\n\n    Filters out English stop words, Slack-specific terms, channel references, and proper nouns.\n\n    Args:\n        query_text: The user's query text\n        channel_references: Channel names to exclude from content words\n\n    Returns:\n        List of content words (up to MAX_CONTENT_WORDS)\n    \"\"\"\n    # Get combined stop words (English + Slack-specific)\n    all_stop_words = _get_combined_stop_words()\n\n    words = query_text.split()\n    content_words = []\n\n    for word in words:\n        clean_word = word.lower().strip(WORD_PUNCTUATION)\n        # Skip if it's a channel reference or a stop word\n        if clean_word in channel_references:\n            continue\n        if clean_word and clean_word not in all_stop_words and len(clean_word) > 2:\n            clean_word_orig = word.strip(WORD_PUNCTUATION)\n            if clean_word_orig.lower() not in all_stop_words:\n                content_words.append(clean_word_orig)\n\n    # Filter out proper nouns (capitalized words)\n    content_words_filtered = [word for word in content_words if not word[0].isupper()]\n\n    return content_words_filtered[:MAX_CONTENT_WORDS]\n\n\ndef _is_valid_keyword_query(line: str) -> bool:\n    \"\"\"Check if a line looks like a valid keyword query vs explanatory text.\n\n    Returns False for lines that appear to be LLM explanations rather than keywords.\n    \"\"\"\n    # Reject lines that start with parentheses (explanatory notes)\n    if line.startswith(\"(\"):\n        return False\n\n    # Reject lines that are too long (likely sentences, not keywords)\n    # Keywords should be short - reject if > 50 chars or > 6 words\n    if len(line) > 50 or len(line.split()) > 6:\n        return False\n\n    return True\n\n\ndef expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:\n    \"\"\"Use LLM to expand query into multiple search variations.\n\n    Args:\n        query_text: The user's original query\n        llm: LLM instance to use for expansion\n\n    Returns:\n        List of rephrased query strings (up to MAX_SLACK_QUERY_EXPANSIONS)\n    \"\"\"\n    prompt = UserMessage(\n        content=SLACK_QUERY_EXPANSION_PROMPT.format(\n            query=query_text, max_queries=MAX_SLACK_QUERY_EXPANSIONS\n        )\n    )\n\n    try:\n        # Call LLM with Braintrust tracing\n        with llm_generation_span(\n            llm=llm, flow=\"slack_query_expansion\", input_messages=[prompt]\n        ) as span_generation:\n            llm_response = llm.invoke(prompt)\n            record_llm_response(span_generation, llm_response)\n            response = llm_response_to_string(llm_response)\n\n        response_clean = _parse_llm_code_block_response(response)\n\n        # Split into lines and filter out empty lines\n        raw_queries = [\n            line.strip() for line in response_clean.split(\"\\n\") if line.strip()\n        ]\n\n        # Filter out lines that look like explanatory text rather than keywords\n        rephrased_queries = [q for q in raw_queries if _is_valid_keyword_query(q)]\n\n        # Log if we filtered out garbage\n        if len(raw_queries) != len(rephrased_queries):\n            filtered_out = set(raw_queries) - set(rephrased_queries)\n            logger.warning(f\"Filtered out non-keyword LLM responses: {filtered_out}\")\n\n        # If no queries generated, use empty query\n        if not rephrased_queries:\n            logger.debug(\"No content keywords extracted from query expansion\")\n            return [\"\"]\n\n        logger.debug(\n            f\"Expanded query into {len(rephrased_queries)} queries: {rephrased_queries}\"\n        )\n        return rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]\n\n    except Exception as e:\n        logger.error(f\"Error expanding query: {e}\")\n        return [query_text]\n\n\ndef build_slack_queries(\n    query: ChunkIndexRequest,\n    llm: LLM,\n    entities: dict[str, Any] | None = None,\n    available_channels: list[str] | None = None,\n) -> list[str]:\n    \"\"\"Build Slack query strings with date filtering and query expansion.\"\"\"\n    default_search_days = 30\n    if entities:\n        try:\n            parsed_entities = SlackEntities(**entities)\n            default_search_days = parsed_entities.default_search_days\n        except ValidationError as e:\n            logger.warning(f\"Invalid entities in build_slack_queries: {e}\")\n\n    days_back = extract_date_range_from_query(\n        query=query.query,\n        llm=llm,\n        default_search_days=default_search_days,\n    )\n\n    # get time filter\n    time_filter = \"\"\n    if days_back is not None and days_back >= 0:\n        if days_back == 0:\n            time_filter = \" on:today\"\n        else:\n            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)\n            time_filter = f\" after:{cutoff_date.strftime('%Y-%m-%d')}\"\n\n    # ALWAYS extract channel references from the query (not just for recency queries)\n    channel_references = extract_channel_references_from_query(query.query)\n\n    # Validate channel references against available channels and entity config\n    # This will raise ValueError if channels are invalid\n    if channel_references and entities:\n        try:\n            validate_channel_references(\n                channel_references, entities, available_channels\n            )\n            logger.info(\n                f\"Detected and validated channel references: {channel_references}\"\n            )\n\n            # If valid channels detected, use ONLY those channels with NO keywords\n            # Return query with ONLY time filter + channel filter (no keywords)\n            return [build_channel_override_query(channel_references, time_filter)]\n        except ValueError as e:\n            # If validation fails, log the error and continue with normal flow\n            logger.warning(f\"Channel reference validation failed: {e}\")\n            channel_references = set()\n\n    # use llm to generate slack queries (use original query to use same keywords as the user)\n    if is_recency_query(query.query):\n        # For recency queries, extract content words (excluding channel names and stop words)\n        content_words = extract_content_words_from_recency_query(\n            query.query, channel_references\n        )\n        rephrased_queries = [\" \".join(content_words)] if content_words else [\"\"]\n    else:\n        # For other queries, use LLM to expand into multiple variations\n        rephrased_queries = expand_query_with_llm(query.query, llm)\n\n    # Build final query strings with time filters\n    return [\n        rephrased_query.strip() + time_filter\n        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]\n    ]\n"
  },
  {
    "path": "backend/onyx/context/search/models.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import SearchSettings\nfrom onyx.indexing.models import BaseChunk\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX\n\n\nclass QueryExpansions(BaseModel):\n    keywords_expansions: list[str] | None = None\n    semantic_expansions: list[str] | None = None\n\n\nclass QueryExpansionType(Enum):\n    KEYWORD = \"keyword\"\n    SEMANTIC = \"semantic\"\n\n\nclass SearchSettingsCreationRequest(IndexingSetting):\n    @classmethod\n    def from_db_model(\n        cls, search_settings: SearchSettings\n    ) -> \"SearchSettingsCreationRequest\":\n        indexing_setting = IndexingSetting.from_db_model(search_settings)\n        return cls(**indexing_setting.model_dump())\n\n\nclass SavedSearchSettings(IndexingSetting):\n    # Previously this contained also Inference time settings. Keeping this wrapper class around\n    # as there may again be inference time settings that may get added.\n    @classmethod\n    def from_db_model(cls, search_settings: SearchSettings) -> \"SavedSearchSettings\":\n        return cls(\n            # Indexing Setting\n            model_name=search_settings.model_name,\n            model_dim=search_settings.model_dim,\n            normalize=search_settings.normalize,\n            query_prefix=search_settings.query_prefix,\n            passage_prefix=search_settings.passage_prefix,\n            provider_type=search_settings.provider_type,\n            index_name=search_settings.index_name,\n            multipass_indexing=search_settings.multipass_indexing,\n            embedding_precision=search_settings.embedding_precision,\n            reduced_dimension=search_settings.reduced_dimension,\n            switchover_type=search_settings.switchover_type,\n            enable_contextual_rag=search_settings.enable_contextual_rag,\n            contextual_rag_llm_name=search_settings.contextual_rag_llm_name,\n            contextual_rag_llm_provider=search_settings.contextual_rag_llm_provider,\n        )\n\n\nclass Tag(BaseModel):\n    tag_key: str\n    tag_value: str\n\n\nclass BaseFilters(BaseModel):\n    source_type: list[DocumentSource] | None = None\n    document_set: list[str] | None = None\n    time_cutoff: datetime | None = None\n    tags: list[Tag] | None = None\n\n\nclass UserFileFilters(BaseModel):\n    # Scopes search to user files tagged with a given project/persona in Vespa.\n    # These are NOT simply the IDs of the current project or persona — they are\n    # only set when the persona's/project's user files overflowed the LLM\n    # context window and must be searched via vector DB instead of being loaded\n    # directly into the prompt.\n    project_id_filter: int | None = None\n    persona_id_filter: int | None = None\n\n\nclass AssistantKnowledgeFilters(BaseModel):\n    \"\"\"Filters for knowledge attached to an assistant (persona).\n\n    These filters scope search to documents/folders explicitly attached\n    to the assistant. When present, only documents matching these criteria\n    are searched (in addition to ACL filtering).\n    \"\"\"\n\n    # Document IDs explicitly attached to the assistant\n    attached_document_ids: list[str] | None = None\n    # Hierarchy node IDs (folders/spaces) attached to the assistant.\n    # Matches chunks where ancestor_hierarchy_node_ids contains any of these.\n    hierarchy_node_ids: list[int] | None = None\n\n\nclass IndexFilters(BaseFilters, UserFileFilters, AssistantKnowledgeFilters):\n    # NOTE: These strings must be formatted in the same way as the output of\n    # DocumentAccess::to_acl.\n    access_control_list: list[str] | None\n    tenant_id: str | None = None\n\n\nclass BasicChunkRequest(BaseModel):\n    query: str\n\n    # In case the caller wants to override the weighting between semantic and keyword search.\n    hybrid_alpha: float | None = None\n\n    # In case some queries favor recency more than other queries.\n    recency_bias_multiplier: float = 1.0\n\n    limit: int | None = None\n\n\nclass ChunkSearchRequest(BasicChunkRequest):\n    # Final filters are calculated from these\n    user_selected_filters: BaseFilters | None = None\n\n    # Use with caution!\n    bypass_acl: bool = False\n\n\n# From the Chat Session we know what project (if any) this search should include\n# From the user uploads and persona uploaded files, we know which of those to include\nclass ChunkIndexRequest(BasicChunkRequest):\n    # Calculated final filters\n    filters: IndexFilters\n\n    query_keywords: list[str] | None = None\n\n\nclass ContextExpansionType(str, Enum):\n    NOT_RELEVANT = \"not_relevant\"\n    MAIN_SECTION_ONLY = \"main_section_only\"\n    INCLUDE_ADJACENT_SECTIONS = \"include_adjacent_sections\"\n    FULL_DOCUMENT = \"full_document\"\n\n\nclass InferenceChunk(BaseChunk):\n    document_id: str\n    source_type: DocumentSource\n    semantic_identifier: str\n    title: str | None  # Separate from Semantic Identifier though often same\n    boost: int\n    score: float | None\n    hidden: bool\n    is_relevant: bool | None = None\n    relevance_explanation: str | None = None\n    # TODO(andrei): Ideally we could improve this to where each value is just a\n    # list of strings.\n    metadata: dict[str, str | list[str]]\n    # Matched sections in the chunk. Uses Vespa syntax e.g. <hi>TEXT</hi>\n    # to specify that a set of words should be highlighted. For example:\n    # [\"<hi>the</hi> <hi>answer</hi> is 42\", \"he couldn't find an <hi>answer</hi>\"]\n    match_highlights: list[str]\n    doc_summary: str\n    chunk_context: str\n\n    # when the doc was last updated\n    updated_at: datetime | None\n    primary_owners: list[str] | None = None\n    secondary_owners: list[str] | None = None\n    large_chunk_reference_ids: list[int] = Field(default_factory=list)\n\n    is_federated: bool = False\n\n    @property\n    def unique_id(self) -> str:\n        return f\"{self.document_id}__{self.chunk_id}\"\n\n    def __repr__(self) -> str:\n        blurb_words = self.blurb.split()\n        short_blurb = \"\"\n        for word in blurb_words:\n            if not short_blurb:\n                short_blurb = word\n                continue\n            if len(short_blurb) > 25:\n                break\n            short_blurb += \" \" + word\n        return f\"Inference Chunk: {self.document_id} - {short_blurb}...\"\n\n    def __eq__(self, other: Any) -> bool:\n        if not isinstance(other, InferenceChunk):\n            return False\n        return (self.document_id, self.chunk_id) == (other.document_id, other.chunk_id)\n\n    def __hash__(self) -> int:\n        return hash((self.document_id, self.chunk_id))\n\n    def __lt__(self, other: Any) -> bool:\n        if not isinstance(other, InferenceChunk):\n            return NotImplemented\n        if self.score is None:\n            if other.score is None:\n                return self.chunk_id > other.chunk_id\n            return True\n        if other.score is None:\n            return False\n        if self.score == other.score:\n            return self.chunk_id > other.chunk_id\n        return self.score < other.score\n\n    def __gt__(self, other: Any) -> bool:\n        if not isinstance(other, InferenceChunk):\n            return NotImplemented\n        if self.score is None:\n            return False\n        if other.score is None:\n            return True\n        if self.score == other.score:\n            return self.chunk_id < other.chunk_id\n        return self.score > other.score\n\n\nclass InferenceChunkUncleaned(InferenceChunk):\n    metadata_suffix: str | None\n\n    def to_inference_chunk(self) -> InferenceChunk:\n        # Create a dict of all fields except 'metadata_suffix'\n        # Assumes the cleaning has already been applied and just needs to translate to the right type\n        inference_chunk_data = {\n            k: v\n            for k, v in self.model_dump().items()\n            if k\n            not in [\"metadata_suffix\"]  # May be other fields to throw out in the future\n        }\n        return InferenceChunk(**inference_chunk_data)\n\n\nclass InferenceSection(BaseModel):\n    \"\"\"Section list of chunks with a combined content. A section could be a single chunk, several\n    chunks from the same document or the entire document.\"\"\"\n\n    center_chunk: InferenceChunk\n    chunks: list[InferenceChunk]\n    combined_content: str\n\n\nclass SearchDoc(BaseModel):\n    document_id: str\n    chunk_ind: int\n    semantic_identifier: str\n    link: str | None = None\n    blurb: str\n    source_type: DocumentSource\n    boost: int\n    # Whether the document is hidden when doing a standard search\n    # since a standard search will never find a hidden doc, this can only ever\n    # be `True` when doing an admin search\n    hidden: bool\n    metadata: dict[str, str | list[str]]\n    score: float | None = None\n    is_relevant: bool | None = None\n    relevance_explanation: str | None = None\n    # Matched sections in the doc. Uses Vespa syntax e.g. <hi>TEXT</hi>\n    # to specify that a set of words should be highlighted. For example:\n    # [\"<hi>the</hi> <hi>answer</hi> is 42\", \"the answer is <hi>42</hi>\"\"]\n    match_highlights: list[str]\n    # when the doc was last updated\n    updated_at: datetime | None = None\n    primary_owners: list[str] | None = None\n    secondary_owners: list[str] | None = None\n    is_internet: bool = False\n\n    @classmethod\n    def from_chunks_or_sections(\n        cls,\n        items: \"Sequence[InferenceChunk | InferenceSection] | None\",\n    ) -> list[\"SearchDoc\"]:\n        \"\"\"Convert a sequence of InferenceChunk or InferenceSection objects to SearchDoc objects.\"\"\"\n        if not items:\n            return []\n\n        search_docs = [\n            cls(\n                document_id=(\n                    chunk := (\n                        item.center_chunk\n                        if isinstance(item, InferenceSection)\n                        else item\n                    )\n                ).document_id,\n                chunk_ind=chunk.chunk_id,\n                semantic_identifier=chunk.semantic_identifier or \"Unknown\",\n                link=chunk.source_links[0] if chunk.source_links else None,\n                blurb=chunk.blurb,\n                source_type=chunk.source_type,\n                boost=chunk.boost,\n                hidden=chunk.hidden,\n                metadata=chunk.metadata,\n                score=chunk.score,\n                match_highlights=chunk.match_highlights,\n                updated_at=chunk.updated_at,\n                primary_owners=chunk.primary_owners,\n                secondary_owners=chunk.secondary_owners,\n                is_internet=False,\n            )\n            for item in items\n        ]\n\n        return search_docs\n\n    # TODO - there is likely a way to clean this all up and not have the switch between these\n    @classmethod\n    def from_saved_search_doc(cls, saved_search_doc: \"SavedSearchDoc\") -> \"SearchDoc\":\n        \"\"\"Convert a SavedSearchDoc to SearchDoc by dropping the db_doc_id field.\"\"\"\n        saved_search_doc_data = saved_search_doc.model_dump()\n        # Remove db_doc_id as it's not part of SearchDoc\n        saved_search_doc_data.pop(\"db_doc_id\", None)\n        return cls(**saved_search_doc_data)\n\n    @classmethod\n    def from_saved_search_docs(\n        cls, saved_search_docs: list[\"SavedSearchDoc\"]\n    ) -> list[\"SearchDoc\"]:\n        return [\n            cls.from_saved_search_doc(saved_search_doc)\n            for saved_search_doc in saved_search_docs\n        ]\n\n    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore\n        initial_dict = super().model_dump(*args, **kwargs)  # type: ignore\n        initial_dict[\"updated_at\"] = (\n            self.updated_at.isoformat() if self.updated_at else None\n        )\n        return initial_dict\n\n\nclass SearchDocsResponse(BaseModel):\n    search_docs: list[SearchDoc]\n    # Maps the citation number to the document id\n    # Since these are no longer just links on the frontend but instead document cards, mapping it to the\n    # document id is  the most staightforward way.\n    citation_mapping: dict[int, str]\n\n    # For cases where the frontend only needs to display a subset of the search docs\n    # The whole list is typically still needed for later steps but this set should be saved separately\n    displayed_docs: list[SearchDoc] | None = None\n\n\nclass SavedSearchDoc(SearchDoc):\n    db_doc_id: int\n    score: float | None = 0.0\n\n    @classmethod\n    def from_search_doc(\n        cls, search_doc: SearchDoc, db_doc_id: int = 0\n    ) -> \"SavedSearchDoc\":\n        \"\"\"IMPORTANT: careful using this and not providing a db_doc_id If db_doc_id is not\n        provided, it won't be able to actually fetch the saved doc and info later on. So only skip\n        providing this if the SavedSearchDoc will not be used in the future\"\"\"\n        search_doc_data = search_doc.model_dump()\n        search_doc_data[\"score\"] = search_doc_data.get(\"score\") or 0.0\n        return cls(**search_doc_data, db_doc_id=db_doc_id)\n\n    @classmethod\n    def from_dict(cls, data: dict[str, Any]) -> \"SavedSearchDoc\":\n        \"\"\"Create SavedSearchDoc from serialized dictionary data (e.g., from database JSON)\"\"\"\n        return cls(**data)\n\n    @classmethod\n    def from_url(cls, url: str) -> \"SavedSearchDoc\":\n        \"\"\"Create a SavedSearchDoc from a URL for internet search documents.\n\n        Uses the INTERNET_SEARCH_DOC_ prefix for document_id to match the format\n        used by inference sections created from internet content.\n        \"\"\"\n        return cls(\n            # db_doc_id can be a filler value since these docs are not saved to the database.\n            db_doc_id=0,\n            document_id=WEB_SEARCH_PREFIX + url,\n            chunk_ind=0,\n            semantic_identifier=url,\n            link=url,\n            blurb=\"\",\n            source_type=DocumentSource.WEB,\n            boost=1,\n            hidden=False,\n            metadata={},\n            score=0.0,\n            is_relevant=None,\n            relevance_explanation=None,\n            match_highlights=[],\n            updated_at=None,\n            primary_owners=None,\n            secondary_owners=None,\n            is_internet=True,\n        )\n\n    def __lt__(self, other: Any) -> bool:\n        if not isinstance(other, SavedSearchDoc):\n            return NotImplemented\n        self_score = self.score if self.score is not None else 0.0\n        other_score = other.score if other.score is not None else 0.0\n        return self_score < other_score\n\n\nclass SavedSearchDocWithContent(SavedSearchDoc):\n    \"\"\"Used for endpoints that need to return the actual contents of the retrieved\n    section in addition to the match_highlights.\"\"\"\n\n    content: str\n\n\nclass PersonaSearchInfo(BaseModel):\n    \"\"\"Snapshot of persona data needed by the search pipeline.\n\n    Extracted from the ORM Persona before the DB session is released so that\n    SearchTool and search_pipeline never lazy-load relationships post-commit.\n    \"\"\"\n\n    document_set_names: list[str]\n    search_start_date: datetime | None\n    attached_document_ids: list[str]\n    hierarchy_node_ids: list[int]\n"
  },
  {
    "path": "backend/onyx/context/search/pipeline.py",
    "content": "from collections import defaultdict\nfrom datetime import datetime\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import ChunkSearchRequest\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import PersonaSearchInfo\nfrom onyx.context.search.preprocessing.access_filters import (\n    build_access_filters_for_user,\n)\nfrom onyx.context.search.retrieval.search_runner import search_chunks\nfrom onyx.context.search.utils import inference_section_from_chunks\nfrom onyx.db.models import User\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo\nfrom onyx.llm.interfaces import LLM\nfrom onyx.natural_language_processing.english_stopwords import strip_stopwords\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.secondary_llm_flows.source_filter import extract_source_filter\nfrom onyx.secondary_llm_flows.time_filter import extract_time_filter\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import FunctionCall\nfrom onyx.utils.threadpool_concurrency import run_functions_in_parallel\nfrom onyx.utils.timing import log_function_time\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\n@log_function_time(print_only=True)\ndef _build_index_filters(\n    user_provided_filters: BaseFilters | None,\n    user: User,  # Used for ACLs, anonymous users only see public docs\n    project_id_filter: int | None,\n    persona_id_filter: int | None,\n    persona_document_sets: list[str] | None,\n    persona_time_cutoff: datetime | None,\n    db_session: Session | None = None,\n    auto_detect_filters: bool = False,\n    query: str | None = None,\n    llm: LLM | None = None,\n    bypass_acl: bool = False,\n    # Assistant knowledge filters\n    attached_document_ids: list[str] | None = None,\n    hierarchy_node_ids: list[int] | None = None,\n    # Pre-fetched ACL filters (skips DB query when provided)\n    acl_filters: list[str] | None = None,\n) -> IndexFilters:\n    if auto_detect_filters and (llm is None or query is None):\n        raise RuntimeError(\"LLM and query are required for auto detect filters\")\n\n    base_filters = user_provided_filters or BaseFilters()\n\n    document_set_filter = (\n        base_filters.document_set\n        if base_filters.document_set is not None\n        else persona_document_sets\n    )\n\n    time_filter = base_filters.time_cutoff or persona_time_cutoff\n    source_filter = base_filters.source_type\n\n    detected_time_filter = None\n    detected_source_filter = None\n    if auto_detect_filters:\n        time_filter_fnc = FunctionCall(extract_time_filter, (query, llm), {})\n        if not source_filter:\n            source_filter_fnc = FunctionCall(\n                extract_source_filter, (query, llm, db_session), {}\n            )\n        else:\n            source_filter_fnc = None\n\n        functions_to_run = [fn for fn in [time_filter_fnc, source_filter_fnc] if fn]\n        parallel_results = run_functions_in_parallel(functions_to_run)\n        # Detected favor recent is not used for now\n        detected_time_filter, _detected_favor_recent = parallel_results[\n            time_filter_fnc.result_id\n        ]\n        if source_filter_fnc:\n            detected_source_filter = parallel_results[source_filter_fnc.result_id]\n\n    # If the detected time filter is more recent, use that one\n    if time_filter and detected_time_filter and detected_time_filter > time_filter:\n        time_filter = detected_time_filter\n\n    # If the user has explicitly set a source filter, use that one\n    if not source_filter and detected_source_filter:\n        source_filter = detected_source_filter\n\n    if bypass_acl:\n        user_acl_filters = None\n    elif acl_filters is not None:\n        user_acl_filters = acl_filters\n    else:\n        if db_session is None:\n            raise ValueError(\"Either db_session or acl_filters must be provided\")\n        user_acl_filters = build_access_filters_for_user(user, db_session)\n\n    final_filters = IndexFilters(\n        project_id_filter=project_id_filter,\n        persona_id_filter=persona_id_filter,\n        source_type=source_filter,\n        document_set=document_set_filter,\n        time_cutoff=time_filter,\n        tags=base_filters.tags,\n        access_control_list=user_acl_filters,\n        tenant_id=get_current_tenant_id() if MULTI_TENANT else None,\n        # Assistant knowledge filters\n        attached_document_ids=attached_document_ids,\n        hierarchy_node_ids=hierarchy_node_ids,\n    )\n\n    return final_filters\n\n\ndef merge_individual_chunks(\n    chunks: list[InferenceChunk],\n) -> list[InferenceSection]:\n    \"\"\"Merge adjacent chunks from the same document into sections.\n\n    Chunks are considered adjacent if their chunk_ids differ by 1 and they\n    are from the same document. The section maintains the position of the\n    first chunk in the original list.\n    \"\"\"\n    if not chunks:\n        return []\n\n    # Create a mapping from (document_id, chunk_id) to original index\n    # This helps us find the chunk that appears first in the original list\n    chunk_to_original_index: dict[tuple[str, int], int] = {}\n    for idx, chunk in enumerate(chunks):\n        chunk_to_original_index[(chunk.document_id, chunk.chunk_id)] = idx\n\n    # Group chunks by document_id\n    doc_chunks: dict[str, list[InferenceChunk]] = defaultdict(list)\n    for chunk in chunks:\n        doc_chunks[chunk.document_id].append(chunk)\n\n    # For each document, sort chunks by chunk_id to identify adjacent chunks\n    for doc_id in doc_chunks:\n        doc_chunks[doc_id].sort(key=lambda c: c.chunk_id)\n\n    # Create a mapping from (document_id, chunk_id) to the section it belongs to\n    # This helps us maintain the original order\n    chunk_to_section: dict[tuple[str, int], InferenceSection] = {}\n\n    # Process each document's chunks\n    for doc_id, doc_chunk_list in doc_chunks.items():\n        if not doc_chunk_list:\n            continue\n\n        # Group adjacent chunks into sections\n        current_section_chunks = [doc_chunk_list[0]]\n\n        for i in range(1, len(doc_chunk_list)):\n            prev_chunk = doc_chunk_list[i - 1]\n            curr_chunk = doc_chunk_list[i]\n\n            # Check if chunks are adjacent (chunk_id difference is 1)\n            if curr_chunk.chunk_id == prev_chunk.chunk_id + 1:\n                # Add to current section\n                current_section_chunks.append(curr_chunk)\n            else:\n                # Create section from previous chunks\n                # Find the chunk that appears first in the original list\n                center_chunk = min(\n                    current_section_chunks,\n                    key=lambda c: chunk_to_original_index.get(\n                        (c.document_id, c.chunk_id), float(\"inf\")\n                    ),\n                )\n                section = inference_section_from_chunks(\n                    center_chunk=center_chunk,\n                    chunks=current_section_chunks.copy(),\n                )\n                if section:\n                    for chunk in current_section_chunks:\n                        chunk_to_section[(chunk.document_id, chunk.chunk_id)] = section\n\n                # Start new section\n                current_section_chunks = [curr_chunk]\n\n        # Create section for the last group\n        if current_section_chunks:\n            # Find the chunk that appears first in the original list\n            center_chunk = min(\n                current_section_chunks,\n                key=lambda c: chunk_to_original_index.get(\n                    (c.document_id, c.chunk_id), float(\"inf\")\n                ),\n            )\n            section = inference_section_from_chunks(\n                center_chunk=center_chunk,\n                chunks=current_section_chunks.copy(),\n            )\n            if section:\n                for chunk in current_section_chunks:\n                    chunk_to_section[(chunk.document_id, chunk.chunk_id)] = section\n\n    # Build result list maintaining original order\n    # Use (document_id, chunk_id) of center_chunk as unique identifier for sections\n    seen_section_ids: set[tuple[str, int]] = set()\n    result: list[InferenceSection] = []\n\n    for chunk in chunks:\n        section = chunk_to_section.get((chunk.document_id, chunk.chunk_id))\n        if section:\n            section_id = (\n                section.center_chunk.document_id,\n                section.center_chunk.chunk_id,\n            )\n            if section_id not in seen_section_ids:\n                seen_section_ids.add(section_id)\n                result.append(section)\n        else:\n            # Chunk wasn't part of any merged section, create a single-chunk section\n            single_section = inference_section_from_chunks(\n                center_chunk=chunk,\n                chunks=[chunk],\n            )\n            if single_section:\n                single_section_id = (\n                    single_section.center_chunk.document_id,\n                    single_section.center_chunk.chunk_id,\n                )\n                if single_section_id not in seen_section_ids:\n                    seen_section_ids.add(single_section_id)\n                    result.append(single_section)\n\n    return result\n\n\n@log_function_time(print_only=True, debug_only=True)\ndef search_pipeline(\n    # Query and settings\n    chunk_search_request: ChunkSearchRequest,\n    # Document index to search over\n    # Note that federated sources will also be used (not related to this arg)\n    document_index: DocumentIndex,\n    # Used for ACLs and federated search, anonymous users only see public docs\n    user: User,\n    # Pre-extracted persona search configuration (None when no persona)\n    persona_search_info: PersonaSearchInfo | None,\n    db_session: Session | None = None,\n    auto_detect_filters: bool = False,\n    llm: LLM | None = None,\n    # Vespa metadata filters for overflowing user files.  NOT the raw IDs\n    # of the current project/persona — only set when user files couldn't fit\n    # in the LLM context and need to be searched via vector DB.\n    project_id_filter: int | None = None,\n    persona_id_filter: int | None = None,\n    # Pre-fetched data — when provided, avoids DB queries (no session needed)\n    acl_filters: list[str] | None = None,\n    embedding_model: EmbeddingModel | None = None,\n    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,\n) -> list[InferenceChunk]:\n    persona_document_sets: list[str] | None = (\n        persona_search_info.document_set_names if persona_search_info else None\n    )\n    persona_time_cutoff: datetime | None = (\n        persona_search_info.search_start_date if persona_search_info else None\n    )\n    attached_document_ids: list[str] | None = (\n        persona_search_info.attached_document_ids or None\n        if persona_search_info\n        else None\n    )\n    hierarchy_node_ids: list[int] | None = (\n        persona_search_info.hierarchy_node_ids or None if persona_search_info else None\n    )\n\n    filters = _build_index_filters(\n        user_provided_filters=chunk_search_request.user_selected_filters,\n        user=user,\n        project_id_filter=project_id_filter,\n        persona_id_filter=persona_id_filter,\n        persona_document_sets=persona_document_sets,\n        persona_time_cutoff=persona_time_cutoff,\n        db_session=db_session,\n        auto_detect_filters=auto_detect_filters,\n        query=chunk_search_request.query,\n        llm=llm,\n        bypass_acl=chunk_search_request.bypass_acl,\n        attached_document_ids=attached_document_ids,\n        hierarchy_node_ids=hierarchy_node_ids,\n        acl_filters=acl_filters,\n    )\n\n    query_keywords = strip_stopwords(chunk_search_request.query)\n\n    query_request = ChunkIndexRequest(\n        query=chunk_search_request.query,\n        hybrid_alpha=chunk_search_request.hybrid_alpha,\n        recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,\n        query_keywords=query_keywords,\n        filters=filters,\n        limit=chunk_search_request.limit,\n    )\n\n    retrieved_chunks = search_chunks(\n        query_request=query_request,\n        user_id=user.id if user else None,\n        document_index=document_index,\n        db_session=db_session,\n        embedding_model=embedding_model,\n        prefetched_federated_retrieval_infos=prefetched_federated_retrieval_infos,\n    )\n\n    # For some specific connectors like Salesforce, a user that has access to an object doesn't mean\n    # that they have access to all of the fields of the object.\n    censored_chunks: list[InferenceChunk] = fetch_ee_implementation_or_noop(\n        \"onyx.external_permissions.post_query_censoring\",\n        \"_post_query_chunk_censoring\",\n        retrieved_chunks,\n    )(\n        chunks=retrieved_chunks,\n        user=user,\n    )\n\n    return censored_chunks\n"
  },
  {
    "path": "backend/onyx/context/search/preprocessing/access_filters.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.access.access import get_acl_for_user\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.db.models import User\n\n\ndef build_access_filters_for_user(user: User, session: Session) -> list[str]:\n    user_acl = get_acl_for_user(user, session)\n    return list(user_acl)\n\n\ndef build_user_only_filters(user: User, db_session: Session) -> IndexFilters:\n    user_acl_filters = build_access_filters_for_user(user, db_session)\n    return IndexFilters(\n        source_type=None,\n        document_set=None,\n        time_cutoff=None,\n        tags=None,\n        access_control_list=user_acl_filters,\n    )\n"
  },
  {
    "path": "backend/onyx/context/search/retrieval/search_runner.py",
    "content": "from collections.abc import Callable\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.chat_configs import HYBRID_ALPHA\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.context.search.utils import get_query_embedding\nfrom onyx.context.search.utils import inference_section_from_chunks\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces_new import DocumentIndex as NewDocumentIndex\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchOldDocumentIndex,\n)\nfrom onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo\nfrom onyx.federated_connectors.federated_retrieval import (\n    get_federated_retrieval_functions,\n)\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\n\ndef combine_retrieval_results(\n    chunk_sets: list[list[InferenceChunk]],\n) -> list[InferenceChunk]:\n    all_chunks = [chunk for chunk_set in chunk_sets for chunk in chunk_set]\n\n    unique_chunks: dict[tuple[str, int], InferenceChunk] = {}\n    for chunk in all_chunks:\n        key = (chunk.document_id, chunk.chunk_id)\n        if key not in unique_chunks:\n            unique_chunks[key] = chunk\n            continue\n\n        stored_chunk_score = unique_chunks[key].score or 0\n        this_chunk_score = chunk.score or 0\n        if stored_chunk_score < this_chunk_score:\n            unique_chunks[key] = chunk\n\n    sorted_chunks = sorted(\n        unique_chunks.values(), key=lambda x: x.score or 0, reverse=True\n    )\n\n    return sorted_chunks\n\n\ndef _embed_and_hybrid_search(\n    query_request: ChunkIndexRequest,\n    document_index: DocumentIndex,\n    db_session: Session | None = None,\n    embedding_model: EmbeddingModel | None = None,\n) -> list[InferenceChunk]:\n    query_embedding = get_query_embedding(\n        query_request.query,\n        db_session=db_session,\n        embedding_model=embedding_model,\n    )\n\n    hybrid_alpha = query_request.hybrid_alpha or HYBRID_ALPHA\n\n    top_chunks = document_index.hybrid_retrieval(\n        query=query_request.query,\n        query_embedding=query_embedding,\n        final_keywords=query_request.query_keywords,\n        filters=query_request.filters,\n        hybrid_alpha=hybrid_alpha,\n        time_decay_multiplier=query_request.recency_bias_multiplier,\n        num_to_retrieve=query_request.limit or NUM_RETURNED_HITS,\n        ranking_profile_type=(\n            QueryExpansionType.KEYWORD\n            if hybrid_alpha <= 0.3\n            else QueryExpansionType.SEMANTIC\n        ),\n    )\n\n    return top_chunks\n\n\ndef _keyword_search(\n    query_request: ChunkIndexRequest,\n    document_index: NewDocumentIndex,\n) -> list[InferenceChunk]:\n    return document_index.keyword_retrieval(\n        query=query_request.query,\n        filters=query_request.filters,\n        num_to_retrieve=query_request.limit or NUM_RETURNED_HITS,\n    )\n\n\ndef search_chunks(\n    query_request: ChunkIndexRequest,\n    user_id: UUID | None,\n    document_index: DocumentIndex,\n    db_session: Session | None = None,\n    embedding_model: EmbeddingModel | None = None,\n    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,\n) -> list[InferenceChunk]:\n    run_queries: list[tuple[Callable, tuple]] = []\n\n    source_filters = (\n        set(query_request.filters.source_type)\n        if query_request.filters.source_type\n        else None\n    )\n\n    # Federated retrieval — use pre-fetched if available, otherwise query DB\n    if prefetched_federated_retrieval_infos is not None:\n        federated_retrieval_infos = prefetched_federated_retrieval_infos\n    else:\n        if db_session is None:\n            raise ValueError(\n                \"Either db_session or prefetched_federated_retrieval_infos must be provided\"\n            )\n        federated_retrieval_infos = get_federated_retrieval_functions(\n            db_session=db_session,\n            user_id=user_id,\n            source_types=list(source_filters) if source_filters else None,\n            document_set_names=query_request.filters.document_set,\n        )\n\n    federated_sources = set(\n        federated_retrieval_info.source.to_non_federated_source()\n        for federated_retrieval_info in federated_retrieval_infos\n    )\n    for federated_retrieval_info in federated_retrieval_infos:\n        run_queries.append(\n            (federated_retrieval_info.retrieval_function, (query_request,))\n        )\n\n    # Don't run normal hybrid search if there are no indexed sources to\n    # search over\n    normal_search_enabled = (source_filters is None) or (\n        len(set(source_filters) - federated_sources) > 0\n    )\n\n    if normal_search_enabled:\n        if (\n            query_request.hybrid_alpha is not None\n            and query_request.hybrid_alpha == 0.0\n            and isinstance(document_index, OpenSearchOldDocumentIndex)\n        ):\n            # If hybrid alpha is explicitly set to keyword only, do pure keyword\n            # search without generating an embedding. This is currently only\n            # supported with OpenSearchDocumentIndex.\n            opensearch_new_document_index: NewDocumentIndex = document_index._real_index\n            run_queries.append(\n                (\n                    lambda: _keyword_search(\n                        query_request, opensearch_new_document_index\n                    ),\n                    (),\n                )\n            )\n        else:\n            run_queries.append(\n                (\n                    _embed_and_hybrid_search,\n                    (query_request, document_index, db_session, embedding_model),\n                )\n            )\n\n    parallel_search_results = run_functions_tuples_in_parallel(run_queries)\n    top_chunks = combine_retrieval_results(parallel_search_results)\n\n    if not top_chunks:\n        logger.debug(\n            f\"Search returned no results for query: {query_request.query} with filters: {query_request.filters}.\"\n        )\n\n    return top_chunks\n\n\n# TODO: This is unused code.\ndef inference_sections_from_ids(\n    doc_identifiers: list[tuple[str, int]],\n    document_index: DocumentIndex,\n) -> list[InferenceSection]:\n    # Currently only fetches whole docs\n    doc_ids_set = set(doc_id for doc_id, _ in doc_identifiers)\n\n    chunk_requests: list[VespaChunkRequest] = [\n        VespaChunkRequest(document_id=doc_id) for doc_id in doc_ids_set\n    ]\n\n    # No need for ACL here because the doc ids were validated beforehand\n    filters = IndexFilters(access_control_list=None)\n\n    retrieved_chunks = document_index.id_based_retrieval(\n        chunk_requests=chunk_requests,\n        filters=filters,\n    )\n\n    if not retrieved_chunks:\n        return []\n\n    # Group chunks by document ID\n    chunks_by_doc_id: dict[str, list[InferenceChunk]] = {}\n    for chunk in retrieved_chunks:\n        chunks_by_doc_id.setdefault(chunk.document_id, []).append(chunk)\n\n    inference_sections = [\n        section\n        for chunks in chunks_by_doc_id.values()\n        if chunks\n        and (\n            section := inference_section_from_chunks(\n                # The scores will always be 0 because the fetching by id gives back\n                # no search scores. This is not needed though if the user is explicitly\n                # selecting a document.\n                center_chunk=chunks[0],\n                chunks=chunks,\n            )\n        )\n    ]\n\n    return inference_sections\n"
  },
  {
    "path": "backend/onyx/context/search/utils.py",
    "content": "from typing import TypeVar\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SavedSearchDocWithContent\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.model_server_models import Embedding\n\nlogger = setup_logger()\n\n\nT = TypeVar(\n    \"T\",\n    InferenceSection,\n    InferenceChunk,\n    SearchDoc,\n    SavedSearchDoc,\n    SavedSearchDocWithContent,\n)\n\nTSection = TypeVar(\n    \"TSection\",\n    InferenceSection,\n    SearchDoc,\n    SavedSearchDoc,\n    SavedSearchDocWithContent,\n)\n\n\ndef inference_section_from_chunks(\n    center_chunk: InferenceChunk,\n    chunks: list[InferenceChunk],\n) -> InferenceSection | None:\n    if not chunks:\n        return None\n\n    combined_content = \"\\n\".join([chunk.content for chunk in chunks])\n\n    return InferenceSection(\n        center_chunk=center_chunk,\n        chunks=chunks,\n        combined_content=combined_content,\n    )\n\n\n# If it should be a real section, don't use this one\ndef inference_section_from_single_chunk(\n    chunk: InferenceChunk,\n) -> InferenceSection:\n    return InferenceSection(\n        center_chunk=chunk,\n        chunks=[chunk],\n        combined_content=chunk.content,\n    )\n\n\ndef get_query_embeddings(\n    queries: list[str],\n    db_session: Session | None = None,\n    embedding_model: EmbeddingModel | None = None,\n) -> list[Embedding]:\n    if embedding_model is None:\n        if db_session is None:\n            raise ValueError(\"Either db_session or embedding_model must be provided\")\n        search_settings = get_current_search_settings(db_session)\n        embedding_model = EmbeddingModel.from_db_model(\n            search_settings=search_settings,\n            server_host=MODEL_SERVER_HOST,\n            server_port=MODEL_SERVER_PORT,\n        )\n\n    query_embedding = embedding_model.encode(queries, text_type=EmbedTextType.QUERY)\n    return query_embedding\n\n\n@log_function_time(print_only=True, debug_only=True)\ndef get_query_embedding(\n    query: str,\n    db_session: Session | None = None,\n    embedding_model: EmbeddingModel | None = None,\n) -> Embedding:\n    return get_query_embeddings(\n        [query], db_session=db_session, embedding_model=embedding_model\n    )[0]\n\n\ndef convert_inference_sections_to_search_docs(\n    inference_sections: list[InferenceSection],\n    is_internet: bool = False,\n) -> list[SearchDoc]:\n    search_docs = SearchDoc.from_chunks_or_sections(inference_sections)\n    for search_doc in search_docs:\n        search_doc.is_internet = is_internet\n    return search_docs\n"
  },
  {
    "path": "backend/onyx/db/README.md",
    "content": "An explanation of how the history of messages, tool calls, and docs are stored in the database:\n\nMessages are grouped by a chat session, a tree structured is used to allow edits and for the\nuser to switch between branches. Each ChatMessage is either a user message or an assistant message.\nIt should always alternate between the two, System messages, custom agent prompt injections, and\nreminder messages are injected dynamically after the chat session is loaded into memory. The user\nand assistant messages are stored in pairs, though it is ok if the user message is stored and the\nassistant message fails.\n\nThe user chat message is relatively simple and includes the user prompt and any attached documents.\nThe assistant message includes the response, tool calls, feedback, citations, etc.\nThings provided as input are part of the user message, things that happen during the inference and\nLLM loop are part of the assistant message.\n\nReasoning is part of the message or tool call that occured after the reasoning. Really the reasoning\nshould be part of the previous message / tool call because if it branches afterwards as a result of\nthe reasoning, this is somewhat unintuitive. But to not include reasoning as part of the user message,\nit is instead included with the following message or tool call. With parallel tool calls, the reasoning\nwill be included with each of the tool calls.\n\nTool calls are stored in the ToolCall table and can represent all of the following:\n- Parallel tool calls, these will have the same turn number and parent tool call id\n- Sequential tool calls, these will have a different turn number and parent tool call id\n- Tool calls attached to the ChatMessage are top level tool calls directly triggered by the LLM\n- Tool calls that are instead attached to other ToolCalls are tool calls that happen as part of an\n  agent that has been called. The top level tool call is the agent call and the tool calls that have\n  the agent call as a parent are the tool calls that happen as part of the agent.\n\nThe different branches are generated by sending a new search query to an existing parent.\n```\n                 [Empty Root Message]  (This allows the first message to be branched/edited as well)\n              /           |           \\\n[First Message] [First Message Edit 1] [First Message Edit 2]\n       |                  |\n[Second Message]  [Second Message of Edit 1 Branch]\n```\n"
  },
  {
    "path": "backend/onyx/db/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/db/_deprecated/pg_file_store.py",
    "content": "\"\"\"Kept around since it's used in the migration to move to S3/MinIO\"\"\"\n\nimport tempfile\nfrom io import BytesIO\nfrom typing import IO\n\nfrom psycopg2.extensions import connection\nfrom sqlalchemy import text  # NEW: for SQL large-object helpers\nfrom sqlalchemy.orm import Session\n\nfrom onyx.file_store.constants import MAX_IN_MEMORY_SIZE\nfrom onyx.file_store.constants import STANDARD_CHUNK_SIZE\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_pg_conn_from_session(db_session: Session) -> connection:\n    return db_session.connection().connection.connection  # type: ignore\n\n\ndef create_populate_lobj(\n    content: IO,\n    db_session: Session,\n) -> int:\n    \"\"\"Create a PostgreSQL large object from *content* and return its OID.\n\n    Preferred approach is to use the psycopg2 ``lobject`` API, but if that is\n    unavailable (e.g. when the underlying connection is an asyncpg adapter)\n    we fall back to PostgreSQL helper functions such as ``lo_from_bytea``.\n\n    NOTE: this function intentionally *does not* commit the surrounding\n    transaction – that is handled by the caller so all work stays atomic.\n    \"\"\"\n\n    pg_conn = None\n    try:\n        pg_conn = get_pg_conn_from_session(db_session)\n        # ``AsyncAdapt_asyncpg_connection`` (asyncpg) has no ``lobject``\n        if not hasattr(pg_conn, \"lobject\"):\n            raise AttributeError  # will be handled by fallback below\n\n        large_object = pg_conn.lobject()\n\n        # write in multiple chunks to avoid loading the whole file into memory\n        while True:\n            chunk = content.read(STANDARD_CHUNK_SIZE)\n            if not chunk:\n                break\n            large_object.write(chunk)\n\n        large_object.close()\n\n        return large_object.oid\n\n    except AttributeError:\n        # Fall back to SQL helper functions – read the full content into memory\n        # (acceptable for the limited number and size of files handled during\n        # migrations).  ``lo_from_bytea`` returns the new OID.\n        byte_data = content.read()\n        result = db_session.execute(\n            text(\"SELECT lo_from_bytea(0, :data) AS oid\"),\n            {\"data\": byte_data},\n        )\n        # ``scalar_one`` is 2.0-style; ``scalar`` works on both 1.4/2.0.\n        lobj_oid = result.scalar()\n        if lobj_oid is None:\n            raise RuntimeError(\"Failed to create large object\")\n        return int(lobj_oid)\n\n\ndef read_lobj(\n    lobj_oid: int,\n    db_session: Session,\n    mode: str | None = None,\n    use_tempfile: bool = False,\n) -> IO:\n    \"\"\"Read a PostgreSQL large object identified by *lobj_oid*.\n\n    Attempts to use the native ``lobject`` API first; if unavailable falls back\n    to ``lo_get`` which returns the large object's contents as *bytea*.\n    \"\"\"\n\n    pg_conn = None\n    try:\n        pg_conn = get_pg_conn_from_session(db_session)\n        if not hasattr(pg_conn, \"lobject\"):\n            raise AttributeError\n\n        # Ensure binary mode by default\n        if mode is None:\n            mode = \"rb\"\n        large_object = (\n            pg_conn.lobject(lobj_oid, mode=mode) if mode else pg_conn.lobject(lobj_oid)\n        )\n\n        if use_tempfile:\n            temp_file = tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)\n            while True:\n                chunk = large_object.read(STANDARD_CHUNK_SIZE)\n                if not chunk:\n                    break\n                temp_file.write(chunk)\n            temp_file.seek(0)\n            return temp_file\n        else:\n            return BytesIO(large_object.read())\n\n    except AttributeError:\n        # Fallback path using ``lo_get``\n        result = db_session.execute(\n            text(\"SELECT lo_get(:oid) AS data\"),\n            {\"oid\": lobj_oid},\n        )\n        byte_data = result.scalar()\n        if byte_data is None:\n            raise RuntimeError(\"Failed to read large object\")\n\n        if use_tempfile:\n            temp_file = tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)\n            temp_file.write(byte_data)\n            temp_file.seek(0)\n            return temp_file\n        return BytesIO(byte_data)\n\n\ndef delete_lobj_by_id(\n    lobj_oid: int,\n    db_session: Session,\n) -> None:\n    \"\"\"Remove a large object by OID, regardless of driver implementation.\"\"\"\n\n    try:\n        pg_conn = get_pg_conn_from_session(db_session)\n        if hasattr(pg_conn, \"lobject\"):\n            pg_conn.lobject(lobj_oid).unlink()\n            return\n        raise AttributeError\n    except AttributeError:\n        # Fallback for drivers without ``lobject`` support\n        db_session.execute(text(\"SELECT lo_unlink(:oid)\"), {\"oid\": lobj_oid})\n        # No explicit result expected\n"
  },
  {
    "path": "backend/onyx/db/api_key.py",
    "content": "import uuid\n\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.api_key import ApiKeyDescriptor\nfrom onyx.auth.api_key import build_displayable_api_key\nfrom onyx.auth.api_key import generate_api_key\nfrom onyx.auth.api_key import hash_api_key\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN\nfrom onyx.configs.constants import DANSWER_API_KEY_PREFIX\nfrom onyx.configs.constants import UNNAMED_KEY_PLACEHOLDER\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import ApiKey\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.permissions import recompute_user_permissions__no_commit\nfrom onyx.db.users import assign_user_to_default_groups__no_commit\nfrom onyx.server.api_key.models import APIKeyArgs\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\ndef get_api_key_email_pattern() -> str:\n    return DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN\n\n\ndef is_api_key_email_address(email: str) -> bool:\n    return email.endswith(get_api_key_email_pattern())\n\n\ndef fetch_api_keys(db_session: Session) -> list[ApiKeyDescriptor]:\n    api_keys = (\n        db_session.scalars(select(ApiKey).options(joinedload(ApiKey.user)))\n        .unique()\n        .all()\n    )\n    return [\n        ApiKeyDescriptor(\n            api_key_id=api_key.id,\n            api_key_role=api_key.user.role,\n            api_key_display=api_key.api_key_display,\n            api_key_name=api_key.name,\n            user_id=api_key.user_id,\n        )\n        for api_key in api_keys\n    ]\n\n\nasync def fetch_user_for_api_key(\n    hashed_api_key: str, async_db_session: AsyncSession\n) -> User | None:\n    \"\"\"NOTE: this is async, since it's used during auth\n    (which is necessarily async due to FastAPI Users)\"\"\"\n    return await async_db_session.scalar(\n        select(User)\n        .join(ApiKey, ApiKey.user_id == User.id)\n        .where(ApiKey.hashed_api_key == hashed_api_key)\n    )\n\n\ndef get_api_key_fake_email(\n    name: str,\n    unique_id: str,\n) -> str:\n    return f\"{DANSWER_API_KEY_PREFIX}{name}@{unique_id}{DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN}\"\n\n\ndef insert_api_key(\n    db_session: Session, api_key_args: APIKeyArgs, user_id: uuid.UUID | None\n) -> ApiKeyDescriptor:\n    std_password_helper = PasswordHelper()\n\n    # Get tenant_id from context var (will be default schema for single tenant)\n    tenant_id = get_current_tenant_id()\n\n    api_key = generate_api_key(tenant_id)\n    api_key_user_id = uuid.uuid4()\n\n    display_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER\n    api_key_user_row = User(\n        id=api_key_user_id,\n        email=get_api_key_fake_email(display_name, str(api_key_user_id)),\n        # a random password for the \"user\"\n        hashed_password=std_password_helper.hash(std_password_helper.generate()),\n        is_active=True,\n        is_superuser=False,\n        is_verified=True,\n        role=api_key_args.role,\n        account_type=AccountType.SERVICE_ACCOUNT,\n    )\n    db_session.add(api_key_user_row)\n\n    api_key_row = ApiKey(\n        name=api_key_args.name,\n        hashed_api_key=hash_api_key(api_key),\n        api_key_display=build_displayable_api_key(api_key),\n        user_id=api_key_user_id,\n        owner_id=user_id,\n    )\n    db_session.add(api_key_row)\n\n    # Assign the API key virtual user to the appropriate default group\n    # before commit so everything is atomic.\n    # LIMITED role service accounts should have no group membership.\n    if api_key_args.role != UserRole.LIMITED:\n        assign_user_to_default_groups__no_commit(\n            db_session,\n            api_key_user_row,\n            is_admin=(api_key_args.role == UserRole.ADMIN),\n        )\n\n    db_session.commit()\n\n    return ApiKeyDescriptor(\n        api_key_id=api_key_row.id,\n        api_key_role=api_key_user_row.role,\n        api_key_display=api_key_row.api_key_display,\n        api_key=api_key,\n        api_key_name=api_key_args.name,\n        user_id=api_key_user_id,\n    )\n\n\ndef update_api_key(\n    db_session: Session, api_key_id: int, api_key_args: APIKeyArgs\n) -> ApiKeyDescriptor:\n    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))\n    if existing_api_key is None:\n        raise ValueError(f\"API key with id {api_key_id} does not exist\")\n\n    existing_api_key.name = api_key_args.name\n    api_key_user = db_session.scalar(\n        select(User).where(User.id == existing_api_key.user_id)  # type: ignore\n    )\n    if api_key_user is None:\n        raise RuntimeError(\"API Key does not have associated user.\")\n\n    email_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER\n    api_key_user.email = get_api_key_fake_email(email_name, str(api_key_user.id))\n\n    old_role = api_key_user.role\n    api_key_user.role = api_key_args.role\n\n    # Reconcile default-group membership when the role changes.\n    if old_role != api_key_args.role:\n        # Remove from all default groups first.\n        delete_stmt = delete(User__UserGroup).where(\n            User__UserGroup.user_id == api_key_user.id,\n            User__UserGroup.user_group_id.in_(\n                select(UserGroup.id).where(UserGroup.is_default.is_(True))\n            ),\n        )\n        db_session.execute(delete_stmt)\n\n        # Re-assign to the correct default group (skip for LIMITED).\n        if api_key_args.role != UserRole.LIMITED:\n            assign_user_to_default_groups__no_commit(\n                db_session,\n                api_key_user,\n                is_admin=(api_key_args.role == UserRole.ADMIN),\n            )\n        else:\n            # No group assigned for LIMITED, but we still need to recompute\n            # since we just removed the old default-group membership above.\n            recompute_user_permissions__no_commit(api_key_user.id, db_session)\n\n    db_session.commit()\n\n    return ApiKeyDescriptor(\n        api_key_id=existing_api_key.id,\n        api_key_display=existing_api_key.api_key_display,\n        api_key_name=api_key_args.name,\n        api_key_role=api_key_user.role,\n        user_id=existing_api_key.user_id,\n    )\n\n\ndef regenerate_api_key(db_session: Session, api_key_id: int) -> ApiKeyDescriptor:\n    \"\"\"NOTE: currently, any admin can regenerate any API key.\"\"\"\n    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))\n    if existing_api_key is None:\n        raise ValueError(f\"API key with id {api_key_id} does not exist\")\n\n    api_key_user = db_session.scalar(\n        select(User).where(User.id == existing_api_key.user_id)  # type: ignore\n    )\n    if api_key_user is None:\n        raise RuntimeError(\"API Key does not have associated user.\")\n\n    # Get tenant_id from context var (will be default schema for single tenant)\n    tenant_id = get_current_tenant_id()\n\n    new_api_key = generate_api_key(tenant_id)\n    existing_api_key.hashed_api_key = hash_api_key(new_api_key)\n    existing_api_key.api_key_display = build_displayable_api_key(new_api_key)\n    db_session.commit()\n\n    return ApiKeyDescriptor(\n        api_key_id=existing_api_key.id,\n        api_key_display=existing_api_key.api_key_display,\n        api_key=new_api_key,\n        api_key_name=existing_api_key.name,\n        api_key_role=api_key_user.role,\n        user_id=existing_api_key.user_id,\n    )\n\n\ndef remove_api_key(db_session: Session, api_key_id: int) -> None:\n    existing_api_key = db_session.scalar(select(ApiKey).where(ApiKey.id == api_key_id))\n    if existing_api_key is None:\n        raise ValueError(f\"API key with id {api_key_id} does not exist\")\n\n    user_associated_with_key = db_session.scalar(\n        select(User).where(User.id == existing_api_key.user_id)  # type: ignore\n    )\n    if user_associated_with_key is None:\n        raise ValueError(\n            f\"User associated with API key with id {api_key_id} does not exist. This should not happen.\"\n        )\n\n    db_session.delete(existing_api_key)\n    db_session.delete(user_associated_with_key)\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/auth.py",
    "content": "from collections.abc import AsyncGenerator\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import Dict\nfrom typing import TypeVar\n\nfrom fastapi import Depends\nfrom fastapi_users.models import ID\nfrom fastapi_users.models import UP\nfrom fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase\nfrom fastapi_users_db_sqlalchemy.access_token import SQLAlchemyAccessTokenDatabase\nfrom sqlalchemy import func\nfrom sqlalchemy import Select\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.future import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.configs.constants import NO_AUTH_PLACEHOLDER_USER_EMAIL\nfrom onyx.db.api_key import get_api_key_email_pattern\nfrom onyx.db.engine.async_sql_engine import get_async_session\nfrom onyx.db.engine.async_sql_engine import get_async_session_context_manager\nfrom onyx.db.models import AccessToken\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import User\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\n\nT = TypeVar(\"T\", bound=tuple[Any, ...])\n\n\ndef get_default_admin_user_emails() -> list[str]:\n    \"\"\"Returns a list of emails who should default to Admin role.\n    Only used in the EE version. For MIT, just return empty list.\"\"\"\n    get_default_admin_user_emails_fn: Callable[[], list[str]] = (\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.auth.users\", \"get_default_admin_user_emails_\", lambda: list[str]()\n        )\n    )\n    return get_default_admin_user_emails_fn()\n\n\ndef _add_live_user_count_where_clause(\n    select_stmt: Select[T],\n    only_admin_users: bool,\n) -> Select[T]:\n    \"\"\"\n    Builds a SQL column expression that can be used to filter out\n    users who should not be included in the live user count.\n\n    Excludes:\n    - API key users (by email pattern)\n    - System users (anonymous user, no-auth placeholder)\n    - External permission users (unless only_admin_users is True)\n    \"\"\"\n    select_stmt = select_stmt.where(~User.email.endswith(get_api_key_email_pattern()))  # type: ignore\n\n    # Exclude system users (anonymous user, no-auth placeholder)\n    select_stmt = select_stmt.where(User.email != ANONYMOUS_USER_EMAIL)  # type: ignore\n    select_stmt = select_stmt.where(User.email != NO_AUTH_PLACEHOLDER_USER_EMAIL)  # type: ignore\n\n    if only_admin_users:\n        return select_stmt.where(User.role == UserRole.ADMIN)\n\n    return select_stmt.where(\n        User.role != UserRole.EXT_PERM_USER,\n    )\n\n\ndef get_live_users_count(db_session: Session) -> int:\n    \"\"\"\n    Returns the number of users in the system.\n    This does NOT include invited users, \"users\" pulled in\n    from external connectors, or API keys.\n    \"\"\"\n    count_stmt = func.count(User.id)\n    select_stmt = select(count_stmt)\n    select_stmt_w_filters = _add_live_user_count_where_clause(select_stmt, False)\n    user_count = db_session.scalar(select_stmt_w_filters)\n    if user_count is None:\n        raise RuntimeError(\"Was not able to fetch the user count.\")\n    return user_count\n\n\nasync def get_user_count(only_admin_users: bool = False) -> int:\n    async with get_async_session_context_manager() as session:\n        count_stmt = func.count(User.id)\n        stmt = select(count_stmt)\n        stmt_w_filters = _add_live_user_count_where_clause(stmt, only_admin_users)\n        user_count = await session.scalar(stmt_w_filters)\n        if user_count is None:\n            raise RuntimeError(\"Was not able to fetch the user count.\")\n        return user_count\n\n\n# Need to override this because FastAPI Users doesn't give flexibility for backend field creation logic in OAuth flow\nclass SQLAlchemyUserAdminDB(SQLAlchemyUserDatabase[UP, ID]):\n    async def create(\n        self,\n        create_dict: Dict[str, Any],\n    ) -> UP:\n        user_count = await get_user_count()\n        if user_count == 0 or create_dict[\"email\"] in get_default_admin_user_emails():\n            create_dict[\"role\"] = UserRole.ADMIN\n        else:\n            create_dict[\"role\"] = UserRole.BASIC\n        return await super().create(create_dict)\n\n\nasync def get_user_db(\n    session: AsyncSession = Depends(get_async_session),\n) -> AsyncGenerator[SQLAlchemyUserAdminDB, None]:\n    yield SQLAlchemyUserAdminDB(session, User, OAuthAccount)\n\n\nasync def get_access_token_db(\n    session: AsyncSession = Depends(get_async_session),\n) -> AsyncGenerator[SQLAlchemyAccessTokenDatabase, None]:\n    yield SQLAlchemyAccessTokenDatabase(session, AccessToken)\n"
  },
  {
    "path": "backend/onyx/db/background_error.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.db.models import BackgroundError\n\n\ndef create_background_error(\n    db_session: Session, message: str, cc_pair_id: int | None\n) -> None:\n    db_session.add(BackgroundError(message=message, cc_pair_id=cc_pair_id))\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/chat.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Tuple\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import delete\nfrom sqlalchemy import desc\nfrom sqlalchemy import func\nfrom sqlalchemy import nullsfirst\nfrom sqlalchemy import or_\nfrom sqlalchemy import Row\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.exc import MultipleResultsFound\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.chat_configs import HARD_DELETE_CHATS\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc as ServerSearchDoc\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatMessage__SearchDoc\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import ChatSessionSharedStatus\nfrom onyx.db.models import Persona\nfrom onyx.db.models import SearchDoc as DBSearchDoc\nfrom onyx.db.models import ToolCall\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_best_persona_id_for_user\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.llm.override_models import PromptOverride\nfrom onyx.server.query_and_chat.models import ChatMessageDetail\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.postgres_sanitization import sanitize_string\n\n\nlogger = setup_logger()\n\n\n# Note: search/streaming packet helpers moved to streaming_utils.py\n\n\ndef get_chat_session_by_id(\n    chat_session_id: UUID,\n    user_id: UUID | None,\n    db_session: Session,\n    include_deleted: bool = False,\n    is_shared: bool = False,\n    eager_load_persona: bool = False,\n) -> ChatSession:\n    stmt = select(ChatSession).where(ChatSession.id == chat_session_id)\n\n    if eager_load_persona:\n        stmt = stmt.options(\n            joinedload(ChatSession.persona).options(\n                selectinload(Persona.tools),\n                selectinload(Persona.user_files),\n                selectinload(Persona.document_sets),\n                selectinload(Persona.attached_documents),\n                selectinload(Persona.hierarchy_nodes),\n            ),\n            joinedload(ChatSession.project),\n        )\n\n    if is_shared:\n        stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC)\n    else:\n        # if user_id is None, assume this is an admin who should be able\n        # to view all chat sessions\n        if user_id is not None:\n            stmt = stmt.where(\n                or_(ChatSession.user_id == user_id, ChatSession.user_id.is_(None))\n            )\n\n    result = db_session.execute(stmt)\n    chat_session = result.scalar_one_or_none()\n\n    if not chat_session:\n        raise ValueError(\"Invalid Chat Session ID provided\")\n\n    if not include_deleted and chat_session.deleted:\n        raise ValueError(\"Chat session has been deleted\")\n\n    return chat_session\n\n\ndef get_chat_sessions_by_slack_thread_id(\n    slack_thread_id: str,\n    user_id: UUID | None,\n    db_session: Session,\n) -> Sequence[ChatSession]:\n    stmt = select(ChatSession).where(ChatSession.slack_thread_id == slack_thread_id)\n    if user_id is not None:\n        stmt = stmt.where(\n            or_(ChatSession.user_id == user_id, ChatSession.user_id.is_(None))\n        )\n    return db_session.scalars(stmt).all()\n\n\n# Retrieves chat sessions by user\n# Chat sessions do not include onyxbot flows\ndef get_chat_sessions_by_user(\n    user_id: UUID | None,\n    deleted: bool | None,\n    db_session: Session,\n    include_onyxbot_flows: bool = False,\n    limit: int = 50,\n    before: datetime | None = None,\n    project_id: int | None = None,\n    only_non_project_chats: bool = False,\n    include_failed_chats: bool = False,\n) -> list[ChatSession]:\n    stmt = select(ChatSession).where(ChatSession.user_id == user_id)\n\n    if not include_onyxbot_flows:\n        stmt = stmt.where(ChatSession.onyxbot_flow.is_(False))\n\n    stmt = stmt.order_by(desc(ChatSession.time_updated))\n\n    if deleted is not None:\n        stmt = stmt.where(ChatSession.deleted == deleted)\n\n    if before is not None:\n        stmt = stmt.where(ChatSession.time_updated < before)\n\n    if project_id is not None:\n        stmt = stmt.where(ChatSession.project_id == project_id)\n    elif only_non_project_chats:\n        stmt = stmt.where(ChatSession.project_id.is_(None))\n\n    # When filtering out failed chats, we apply the limit in Python after\n    # filtering rather than in SQL, since the post-filter may remove rows.\n    if limit and include_failed_chats:\n        stmt = stmt.limit(limit)\n\n    result = db_session.execute(stmt)\n    chat_sessions = list(result.scalars().all())\n\n    if not include_failed_chats and chat_sessions:\n        # Filter out \"failed\" sessions (those with only SYSTEM messages)\n        # using a separate efficient query instead of a correlated EXISTS\n        # subquery, which causes full sequential scans of chat_message.\n        leeway = datetime.now(timezone.utc) - timedelta(minutes=5)\n        session_ids = [cs.id for cs in chat_sessions if cs.time_created < leeway]\n\n        if session_ids:\n            valid_session_ids_stmt = (\n                select(ChatMessage.chat_session_id)\n                .where(ChatMessage.chat_session_id.in_(session_ids))\n                .where(ChatMessage.message_type != MessageType.SYSTEM)\n                .distinct()\n            )\n            valid_session_ids = set(\n                db_session.execute(valid_session_ids_stmt).scalars().all()\n            )\n\n            chat_sessions = [\n                cs\n                for cs in chat_sessions\n                if cs.time_created >= leeway or cs.id in valid_session_ids\n            ]\n\n        if limit:\n            chat_sessions = chat_sessions[:limit]\n\n    return chat_sessions\n\n\ndef delete_orphaned_search_docs(db_session: Session) -> None:\n    orphaned_docs = (\n        db_session.query(DBSearchDoc)\n        .outerjoin(ChatMessage__SearchDoc)\n        .filter(ChatMessage__SearchDoc.chat_message_id.is_(None))\n        .all()\n    )\n    for doc in orphaned_docs:\n        db_session.delete(doc)\n    db_session.commit()\n\n\ndef delete_messages_and_files_from_chat_session(\n    chat_session_id: UUID, db_session: Session\n) -> None:\n    # Select messages older than cutoff_time with files\n    messages_with_files = db_session.execute(\n        select(ChatMessage.id, ChatMessage.files).where(\n            ChatMessage.chat_session_id == chat_session_id,\n        )\n    ).fetchall()\n\n    for _, files in messages_with_files:\n        file_store = get_default_file_store()\n        for file_info in files or []:\n            file_store.delete_file(file_id=file_info.get(\"id\"))\n\n    # Delete ChatMessage records - CASCADE constraints will automatically handle:\n    # - ChatMessage__StandardAnswer relationship records\n    db_session.execute(\n        delete(ChatMessage).where(ChatMessage.chat_session_id == chat_session_id)\n    )\n    db_session.commit()\n\n    delete_orphaned_search_docs(db_session)\n\n\ndef create_chat_session(\n    db_session: Session,\n    description: str | None,\n    user_id: UUID | None,\n    persona_id: int | None,  # Can be none if temporary persona is used\n    llm_override: LLMOverride | None = None,\n    prompt_override: PromptOverride | None = None,\n    onyxbot_flow: bool = False,\n    slack_thread_id: str | None = None,\n    project_id: int | None = None,\n) -> ChatSession:\n    chat_session = ChatSession(\n        user_id=user_id,\n        persona_id=persona_id,\n        description=description,\n        llm_override=llm_override,\n        prompt_override=prompt_override,\n        onyxbot_flow=onyxbot_flow,\n        slack_thread_id=slack_thread_id,\n        project_id=project_id,\n    )\n\n    db_session.add(chat_session)\n    db_session.commit()\n\n    return chat_session\n\n\ndef duplicate_chat_session_for_user_from_slack(\n    db_session: Session,\n    user: User,\n    chat_session_id: UUID,\n) -> ChatSession:\n    \"\"\"\n    This takes a chat session id for a session in Slack and:\n    - Creates a new chat session in the DB\n    - Tries to copy the persona from the original chat session\n        (if it is available to the user clicking the button)\n    - Sets the user to the given user (if provided)\n    \"\"\"\n    chat_session = get_chat_session_by_id(\n        chat_session_id=chat_session_id,\n        user_id=None,  # Ignore user permissions for this\n        db_session=db_session,\n    )\n    if not chat_session:\n        raise HTTPException(status_code=400, detail=\"Invalid Chat Session ID provided\")\n\n    # This enforces permissions and sets a default\n    new_persona_id = get_best_persona_id_for_user(\n        db_session=db_session,\n        user=user,\n        persona_id=chat_session.persona_id,\n    )\n\n    return create_chat_session(\n        db_session=db_session,\n        user_id=user.id,\n        persona_id=new_persona_id,\n        # Set this to empty string so the frontend will force a rename\n        description=\"\",\n        llm_override=chat_session.llm_override,\n        prompt_override=chat_session.prompt_override,\n        # Chat is in UI now so this is false\n        onyxbot_flow=False,\n        # Maybe we want this in the future to track if it was created from Slack\n        slack_thread_id=None,\n    )\n\n\ndef update_chat_session(\n    db_session: Session,\n    user_id: UUID | None,\n    chat_session_id: UUID,\n    description: str | None = None,\n    sharing_status: ChatSessionSharedStatus | None = None,\n) -> ChatSession:\n    chat_session = get_chat_session_by_id(\n        chat_session_id=chat_session_id, user_id=user_id, db_session=db_session\n    )\n\n    if chat_session.deleted:\n        raise ValueError(\"Trying to rename a deleted chat session\")\n\n    if description is not None:\n        chat_session.description = description\n    if sharing_status is not None:\n        chat_session.shared_status = sharing_status\n\n    db_session.commit()\n\n    return chat_session\n\n\ndef delete_all_chat_sessions_for_user(\n    user: User, db_session: Session, hard_delete: bool = HARD_DELETE_CHATS\n) -> None:\n    user_id = user.id\n\n    chat_sessions = (\n        db_session.query(ChatSession)\n        .filter(ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False))\n        .all()\n    )\n\n    if hard_delete:\n        for chat_session in chat_sessions:\n            delete_messages_and_files_from_chat_session(chat_session.id, db_session)\n        db_session.execute(\n            delete(ChatSession).where(\n                ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False)\n            )\n        )\n    else:\n        db_session.execute(\n            update(ChatSession)\n            .where(ChatSession.user_id == user_id, ChatSession.onyxbot_flow.is_(False))\n            .values(deleted=True)\n        )\n\n    db_session.commit()\n\n\ndef delete_chat_session(\n    user_id: UUID | None,\n    chat_session_id: UUID,\n    db_session: Session,\n    include_deleted: bool = False,\n    hard_delete: bool = HARD_DELETE_CHATS,\n) -> None:\n    chat_session = get_chat_session_by_id(\n        chat_session_id=chat_session_id,\n        user_id=user_id,\n        db_session=db_session,\n        include_deleted=include_deleted,\n    )\n\n    if chat_session.deleted and not include_deleted:\n        raise ValueError(\"Cannot delete an already deleted chat session\")\n\n    if hard_delete:\n        delete_messages_and_files_from_chat_session(chat_session_id, db_session)\n        db_session.execute(delete(ChatSession).where(ChatSession.id == chat_session_id))\n    else:\n        chat_session = get_chat_session_by_id(\n            chat_session_id=chat_session_id, user_id=user_id, db_session=db_session\n        )\n        chat_session.deleted = True\n\n    db_session.commit()\n\n\ndef get_chat_sessions_older_than(\n    days_old: int, db_session: Session\n) -> list[tuple[UUID | None, UUID]]:\n    \"\"\"\n    Retrieves chat sessions older than a specified number of days.\n\n    Args:\n        days_old: The number of days to consider as \"old\".\n        db_session: The database session.\n\n    Returns:\n        A list of tuples, where each tuple contains the user_id (can be None) and the chat_session_id of an old chat session.\n    \"\"\"\n\n    cutoff_time = datetime.utcnow() - timedelta(days=days_old)\n    old_sessions: Sequence[Row[Tuple[UUID | None, UUID]]] = db_session.execute(\n        select(ChatSession.user_id, ChatSession.id).where(\n            ChatSession.time_created < cutoff_time\n        )\n    ).fetchall()\n\n    # convert old_sessions to a conventional list of tuples\n    returned_sessions: list[tuple[UUID | None, UUID]] = [\n        (user_id, session_id) for user_id, session_id in old_sessions\n    ]\n\n    return returned_sessions\n\n\ndef get_chat_message(\n    chat_message_id: int,\n    user_id: UUID | None,\n    db_session: Session,\n) -> ChatMessage:\n    stmt = select(ChatMessage).where(ChatMessage.id == chat_message_id)\n\n    result = db_session.execute(stmt)\n    chat_message = result.scalar_one_or_none()\n\n    if not chat_message:\n        raise ValueError(\"Invalid Chat Message specified\")\n\n    chat_user = chat_message.chat_session.user\n    expected_user_id = chat_user.id if chat_user is not None else None\n\n    if expected_user_id != user_id:\n        logger.error(\n            f\"User {user_id} tried to fetch a chat message that does not belong to them\"\n        )\n        raise ValueError(\"Chat message does not belong to user\")\n\n    return chat_message\n\n\ndef get_chat_session_by_message_id(\n    db_session: Session,\n    message_id: int,\n) -> ChatSession:\n    \"\"\"\n    Should only be used for Slack\n    Get the chat session associated with a specific message ID\n    Note: this ignores permission checks.\n    \"\"\"\n    stmt = select(ChatMessage).where(ChatMessage.id == message_id)\n\n    result = db_session.execute(stmt)\n    chat_message = result.scalar_one_or_none()\n\n    if chat_message is None:\n        raise ValueError(\n            f\"Unable to find chat session associated with message ID: {message_id}\"\n        )\n\n    return chat_message.chat_session\n\n\ndef get_chat_messages_by_sessions(\n    chat_session_ids: list[UUID],\n    user_id: UUID | None,\n    db_session: Session,\n    skip_permission_check: bool = False,\n) -> Sequence[ChatMessage]:\n    if not skip_permission_check:\n        for chat_session_id in chat_session_ids:\n            get_chat_session_by_id(\n                chat_session_id=chat_session_id, user_id=user_id, db_session=db_session\n            )\n    stmt = (\n        select(ChatMessage)\n        .where(ChatMessage.chat_session_id.in_(chat_session_ids))\n        .order_by(nullsfirst(ChatMessage.parent_message_id))\n    )\n    return db_session.execute(stmt).scalars().all()\n\n\ndef add_chats_to_session_from_slack_thread(\n    db_session: Session,\n    slack_chat_session_id: UUID,\n    new_chat_session_id: UUID,\n) -> None:\n    new_root_message = get_or_create_root_message(\n        chat_session_id=new_chat_session_id,\n        db_session=db_session,\n    )\n\n    for chat_message in get_chat_messages_by_sessions(\n        chat_session_ids=[slack_chat_session_id],\n        user_id=None,  # Ignore user permissions for this\n        db_session=db_session,\n        skip_permission_check=True,\n    ):\n        if chat_message.message_type == MessageType.SYSTEM:\n            continue\n        # Duplicate the message\n        new_root_message = create_new_chat_message(\n            db_session=db_session,\n            chat_session_id=new_chat_session_id,\n            parent_message=new_root_message,\n            message=chat_message.message,\n            files=chat_message.files,\n            error=chat_message.error,\n            token_count=chat_message.token_count,\n            message_type=chat_message.message_type,\n            reasoning_tokens=chat_message.reasoning_tokens,\n        )\n\n\ndef add_search_docs_to_chat_message(\n    chat_message_id: int, search_doc_ids: list[int], db_session: Session\n) -> None:\n    \"\"\"\n    Link SearchDocs to a ChatMessage by creating entries in the chat_message__search_doc junction table.\n\n    Args:\n        chat_message_id: The ID of the chat message\n        search_doc_ids: List of search document IDs to link\n        db_session: The database session\n    \"\"\"\n    for search_doc_id in search_doc_ids:\n        chat_message_search_doc = ChatMessage__SearchDoc(\n            chat_message_id=chat_message_id, search_doc_id=search_doc_id\n        )\n        db_session.add(chat_message_search_doc)\n\n\ndef add_search_docs_to_tool_call(\n    tool_call_id: int, search_doc_ids: list[int], db_session: Session\n) -> None:\n    \"\"\"\n    Link SearchDocs to a ToolCall by creating entries in the tool_call__search_doc junction table.\n\n    Args:\n        tool_call_id: The ID of the tool call\n        search_doc_ids: List of search document IDs to link\n        db_session: The database session\n    \"\"\"\n    from onyx.db.models import ToolCall__SearchDoc\n\n    for search_doc_id in search_doc_ids:\n        tool_call_search_doc = ToolCall__SearchDoc(\n            tool_call_id=tool_call_id, search_doc_id=search_doc_id\n        )\n        db_session.add(tool_call_search_doc)\n\n\ndef get_chat_messages_by_session(\n    chat_session_id: UUID,\n    user_id: UUID | None,\n    db_session: Session,\n    skip_permission_check: bool = False,\n    prefetch_top_two_level_tool_calls: bool = True,\n) -> list[ChatMessage]:\n    if not skip_permission_check:\n        # bug if we ever call this expecting the permission check to not be skipped\n        get_chat_session_by_id(\n            chat_session_id=chat_session_id, user_id=user_id, db_session=db_session\n        )\n\n    stmt = (\n        select(ChatMessage)\n        .where(ChatMessage.chat_session_id == chat_session_id)\n        .order_by(nullsfirst(ChatMessage.parent_message_id))\n    )\n\n    # This should handle both the top level tool calls and deep research\n    # If there are future nested agents, this can be extended.\n    if prefetch_top_two_level_tool_calls:\n        # Load tool_calls and their direct children (one level deep)\n        stmt = stmt.options(\n            selectinload(ChatMessage.tool_calls).selectinload(\n                ToolCall.tool_call_children\n            )\n        )\n        result = db_session.scalars(stmt).unique().all()\n    else:\n        result = db_session.scalars(stmt).all()\n\n    return list(result)\n\n\ndef get_or_create_root_message(\n    chat_session_id: UUID,\n    db_session: Session,\n) -> ChatMessage:\n    try:\n        root_message: ChatMessage | None = (\n            db_session.query(ChatMessage)\n            .filter(\n                ChatMessage.chat_session_id == chat_session_id,\n                ChatMessage.parent_message_id.is_(None),\n            )\n            .one_or_none()\n        )\n    except MultipleResultsFound:\n        raise Exception(\n            \"Multiple root messages found for chat session. Data inconsistency detected.\"\n        )\n\n    if root_message is not None:\n        return root_message\n    else:\n        new_root_message = ChatMessage(\n            chat_session_id=chat_session_id,\n            parent_message_id=None,\n            latest_child_message_id=None,\n            message=\"\",\n            token_count=0,\n            message_type=MessageType.SYSTEM,\n        )\n        db_session.add(new_root_message)\n        db_session.commit()\n        return new_root_message\n\n\ndef reserve_message_id(\n    db_session: Session,\n    chat_session_id: UUID,\n    parent_message: int,\n    message_type: MessageType = MessageType.ASSISTANT,\n) -> ChatMessage:\n    # Create an temporary holding chat message to the updated and saved at the end\n    empty_message = ChatMessage(\n        chat_session_id=chat_session_id,\n        parent_message_id=parent_message,\n        latest_child_message_id=None,\n        message=\"Response was terminated prior to completion, try regenerating.\",\n        token_count=15,\n        message_type=message_type,\n    )\n\n    # Add the empty message to the session\n    db_session.add(empty_message)\n    db_session.flush()\n\n    # Get the parent message and set its child pointer to the current message\n    parent_chat_message = (\n        db_session.query(ChatMessage).filter(ChatMessage.id == parent_message).first()\n    )\n    if parent_chat_message:\n        parent_chat_message.latest_child_message_id = empty_message.id\n\n    # Committing because it's ok to recover this state. More clear to the user than it is now.\n    # Ideally there's a special UI for a case like this with a regenerate button but not needed for now.\n    db_session.commit()\n\n    return empty_message\n\n\ndef reserve_multi_model_message_ids(\n    db_session: Session,\n    chat_session_id: UUID,\n    parent_message_id: int,\n    model_display_names: list[str],\n) -> list[ChatMessage]:\n    \"\"\"Reserve N assistant message placeholders for multi-model parallel streaming.\n\n    All messages share the same parent (the user message). The parent's\n    latest_child_message_id points to the LAST reserved message so that the\n    default history-chain walker picks it up.\n    \"\"\"\n    reserved: list[ChatMessage] = []\n    for display_name in model_display_names:\n        msg = ChatMessage(\n            chat_session_id=chat_session_id,\n            parent_message_id=parent_message_id,\n            latest_child_message_id=None,\n            message=\"Response was terminated prior to completion, try regenerating.\",\n            token_count=15,  # placeholder; updated on completion by llm_loop_completion_handle\n            message_type=MessageType.ASSISTANT,\n            model_display_name=display_name,\n        )\n        db_session.add(msg)\n        reserved.append(msg)\n\n    # Flush to assign IDs without committing yet\n    db_session.flush()\n\n    # Point parent's latest_child to the last reserved message\n    parent = (\n        db_session.query(ChatMessage)\n        .filter(ChatMessage.id == parent_message_id)\n        .first()\n    )\n    if parent:\n        parent.latest_child_message_id = reserved[-1].id\n\n    db_session.commit()\n    return reserved\n\n\ndef set_preferred_response(\n    db_session: Session,\n    user_message_id: int,\n    preferred_assistant_message_id: int,\n) -> None:\n    \"\"\"Mark one assistant response as the user's preferred choice in a multi-model turn.\n\n    Also advances ``latest_child_message_id`` so the preferred response becomes\n    the active branch for any subsequent messages in the conversation.\n\n    Args:\n        db_session: Active database session.\n        user_message_id: Primary key of the ``USER``-type ``ChatMessage`` whose\n            preferred response is being set.\n        preferred_assistant_message_id: Primary key of the ``ASSISTANT``-type\n            ``ChatMessage`` to prefer. Must be a direct child of ``user_message_id``.\n\n    Raises:\n        ValueError: If either message is not found, if ``user_message_id`` does not\n            refer to a USER message, or if the assistant message is not a direct child\n            of the user message.\n    \"\"\"\n    user_msg = db_session.get(ChatMessage, user_message_id)\n    if user_msg is None:\n        raise ValueError(f\"User message {user_message_id} not found\")\n    if user_msg.message_type != MessageType.USER:\n        raise ValueError(f\"Message {user_message_id} is not a user message\")\n\n    assistant_msg = db_session.get(ChatMessage, preferred_assistant_message_id)\n    if assistant_msg is None:\n        raise ValueError(\n            f\"Assistant message {preferred_assistant_message_id} not found\"\n        )\n    if assistant_msg.parent_message_id != user_message_id:\n        raise ValueError(\n            f\"Assistant message {preferred_assistant_message_id} is not a child of user message {user_message_id}\"\n        )\n\n    user_msg.preferred_response_id = preferred_assistant_message_id\n    user_msg.latest_child_message_id = preferred_assistant_message_id\n    db_session.commit()\n\n\ndef create_new_chat_message(\n    chat_session_id: UUID,\n    parent_message: ChatMessage,\n    message: str,\n    token_count: int,\n    message_type: MessageType,\n    db_session: Session,\n    files: list[FileDescriptor] | None = None,\n    error: str | None = None,\n    commit: bool = True,\n    reserved_message_id: int | None = None,\n    reasoning_tokens: str | None = None,\n) -> ChatMessage:\n    if reserved_message_id is not None:\n        # Edit existing message\n        existing_message = db_session.query(ChatMessage).get(reserved_message_id)\n        if existing_message is None:\n            raise ValueError(f\"No message found with id {reserved_message_id}\")\n\n        existing_message.chat_session_id = chat_session_id\n        existing_message.parent_message_id = parent_message.id\n        existing_message.message = message\n        existing_message.token_count = token_count\n        existing_message.message_type = message_type\n        existing_message.files = files\n        existing_message.error = error\n        existing_message.reasoning_tokens = reasoning_tokens\n        new_chat_message = existing_message\n    else:\n        # Create new message\n        new_chat_message = ChatMessage(\n            chat_session_id=chat_session_id,\n            parent_message_id=parent_message.id,\n            latest_child_message_id=None,\n            message=message,\n            token_count=token_count,\n            message_type=message_type,\n            files=files,\n            error=error,\n            reasoning_tokens=reasoning_tokens,\n        )\n        db_session.add(new_chat_message)\n\n    # Flush the session to get an ID for the new chat message\n    db_session.flush()\n\n    parent_message.latest_child_message_id = new_chat_message.id\n    if commit:\n        db_session.commit()\n\n    return new_chat_message\n\n\ndef set_as_latest_chat_message(\n    chat_message: ChatMessage,\n    user_id: UUID | None,\n    db_session: Session,\n) -> None:\n    parent_message_id = chat_message.parent_message_id\n\n    if parent_message_id is None:\n        raise RuntimeError(\n            f\"Trying to set a latest message without parent, message id: {chat_message.id}\"\n        )\n\n    parent_message = get_chat_message(\n        chat_message_id=parent_message_id, user_id=user_id, db_session=db_session\n    )\n\n    parent_message.latest_child_message_id = chat_message.id\n\n    db_session.commit()\n\n\ndef create_db_search_doc(\n    server_search_doc: ServerSearchDoc,\n    db_session: Session,\n    commit: bool = True,\n) -> DBSearchDoc:\n    db_search_doc = DBSearchDoc(\n        document_id=sanitize_string(server_search_doc.document_id),\n        chunk_ind=server_search_doc.chunk_ind,\n        semantic_id=sanitize_string(server_search_doc.semantic_identifier),\n        link=(\n            sanitize_string(server_search_doc.link)\n            if server_search_doc.link is not None\n            else None\n        ),\n        blurb=sanitize_string(server_search_doc.blurb),\n        source_type=server_search_doc.source_type,\n        boost=server_search_doc.boost,\n        hidden=server_search_doc.hidden,\n        doc_metadata=server_search_doc.metadata,\n        is_relevant=server_search_doc.is_relevant,\n        relevance_explanation=(\n            sanitize_string(server_search_doc.relevance_explanation)\n            if server_search_doc.relevance_explanation is not None\n            else None\n        ),\n        score=server_search_doc.score or 0.0,\n        match_highlights=[\n            sanitize_string(h) for h in server_search_doc.match_highlights\n        ],\n        updated_at=server_search_doc.updated_at,\n        primary_owners=(\n            [sanitize_string(o) for o in server_search_doc.primary_owners]\n            if server_search_doc.primary_owners is not None\n            else None\n        ),\n        secondary_owners=(\n            [sanitize_string(o) for o in server_search_doc.secondary_owners]\n            if server_search_doc.secondary_owners is not None\n            else None\n        ),\n        is_internet=server_search_doc.is_internet,\n    )\n\n    db_session.add(db_search_doc)\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n    return db_search_doc\n\n\ndef get_db_search_doc_by_id(doc_id: int, db_session: Session) -> DBSearchDoc | None:\n    \"\"\"There are no safety checks here like user permission etc., use with caution\"\"\"\n    search_doc = db_session.query(DBSearchDoc).filter(DBSearchDoc.id == doc_id).first()\n    return search_doc\n\n\ndef get_db_search_doc_by_document_id(\n    document_id: str, db_session: Session\n) -> DBSearchDoc | None:\n    \"\"\"Get SearchDoc by document_id field. There are no safety checks here like user permission etc., use with caution\"\"\"\n    search_doc = (\n        db_session.query(DBSearchDoc)\n        .filter(DBSearchDoc.document_id == document_id)\n        .first()\n    )\n    return search_doc\n\n\ndef translate_db_search_doc_to_saved_search_doc(\n    db_search_doc: DBSearchDoc,\n    remove_doc_content: bool = False,\n) -> SavedSearchDoc:\n    return SavedSearchDoc(\n        db_doc_id=db_search_doc.id,\n        score=db_search_doc.score,\n        document_id=db_search_doc.document_id,\n        chunk_ind=db_search_doc.chunk_ind,\n        semantic_identifier=db_search_doc.semantic_id,\n        link=db_search_doc.link,\n        blurb=db_search_doc.blurb if not remove_doc_content else \"\",\n        source_type=db_search_doc.source_type,\n        boost=db_search_doc.boost,\n        hidden=db_search_doc.hidden,\n        metadata=db_search_doc.doc_metadata if not remove_doc_content else {},\n        match_highlights=(\n            db_search_doc.match_highlights if not remove_doc_content else []\n        ),\n        relevance_explanation=db_search_doc.relevance_explanation,\n        is_relevant=db_search_doc.is_relevant,\n        updated_at=db_search_doc.updated_at if not remove_doc_content else None,\n        primary_owners=db_search_doc.primary_owners if not remove_doc_content else [],\n        secondary_owners=(\n            db_search_doc.secondary_owners if not remove_doc_content else []\n        ),\n        is_internet=db_search_doc.is_internet,\n    )\n\n\ndef translate_db_message_to_chat_message_detail(\n    chat_message: ChatMessage,\n    remove_doc_content: bool = False,\n) -> ChatMessageDetail:\n    # Get current feedback if any\n    current_feedback = None\n    if chat_message.chat_message_feedbacks:\n        latest_feedback = chat_message.chat_message_feedbacks[-1]\n        if latest_feedback.is_positive is not None:\n            current_feedback = \"like\" if latest_feedback.is_positive else \"dislike\"\n\n    # Convert citations from {citation_num: db_doc_id} to {citation_num: document_id}\n    converted_citations = None\n    if chat_message.citations and chat_message.search_docs:\n        # Build lookup map: db_doc_id -> document_id\n        db_doc_id_to_document_id = {\n            doc.id: doc.document_id for doc in chat_message.search_docs\n        }\n\n        converted_citations = {}\n        for citation_num, db_doc_id in chat_message.citations.items():\n            document_id = db_doc_id_to_document_id.get(db_doc_id)\n            if document_id:\n                converted_citations[citation_num] = document_id\n\n    top_documents = [\n        translate_db_search_doc_to_saved_search_doc(\n            db_doc, remove_doc_content=remove_doc_content\n        )\n        for db_doc in chat_message.search_docs\n    ]\n    top_documents = sorted(\n        top_documents, key=lambda doc: doc.score or 0.0, reverse=True\n    )\n    chat_msg_detail = ChatMessageDetail(\n        chat_session_id=chat_message.chat_session_id,\n        message_id=chat_message.id,\n        parent_message=chat_message.parent_message_id,\n        latest_child_message=chat_message.latest_child_message_id,\n        message=chat_message.message,\n        reasoning_tokens=chat_message.reasoning_tokens,\n        message_type=chat_message.message_type,\n        context_docs=top_documents,\n        citations=converted_citations,\n        time_sent=chat_message.time_sent,\n        files=chat_message.files or [],\n        error=chat_message.error,\n        current_feedback=current_feedback,\n        processing_duration_seconds=chat_message.processing_duration_seconds,\n        preferred_response_id=chat_message.preferred_response_id,\n        model_display_name=chat_message.model_display_name,\n    )\n\n    return chat_msg_detail\n\n\ndef update_chat_session_updated_at_timestamp(\n    chat_session_id: UUID, db_session: Session\n) -> None:\n    \"\"\"\n    Explicitly update the timestamp on a chat session without modifying other fields.\n    This is useful when adding messages to a chat session to reflect recent activity.\n    \"\"\"\n\n    # Direct SQL update to avoid loading the entire object if it's not already loaded\n    db_session.execute(\n        update(ChatSession)\n        .where(ChatSession.id == chat_session_id)\n        .values(time_updated=func.now())\n    )\n    # No commit - the caller is responsible for committing the transaction\n\n\ndef create_search_doc_from_inference_section(\n    inference_section: InferenceSection,\n    is_internet: bool,\n    db_session: Session,\n    score: float = 0.0,\n    is_relevant: bool | None = None,\n    relevance_explanation: str | None = None,\n    commit: bool = False,\n) -> DBSearchDoc:\n    \"\"\"Create a SearchDoc in the database from an InferenceSection.\"\"\"\n\n    db_search_doc = DBSearchDoc(\n        document_id=inference_section.center_chunk.document_id,\n        chunk_ind=inference_section.center_chunk.chunk_id,\n        semantic_id=inference_section.center_chunk.semantic_identifier,\n        link=(\n            inference_section.center_chunk.source_links.get(0)\n            if inference_section.center_chunk.source_links\n            else None\n        ),\n        blurb=inference_section.center_chunk.blurb,\n        source_type=inference_section.center_chunk.source_type,\n        boost=inference_section.center_chunk.boost,\n        hidden=inference_section.center_chunk.hidden,\n        doc_metadata=inference_section.center_chunk.metadata,\n        score=score,\n        is_relevant=is_relevant,\n        relevance_explanation=relevance_explanation,\n        match_highlights=inference_section.center_chunk.match_highlights,\n        updated_at=inference_section.center_chunk.updated_at,\n        primary_owners=inference_section.center_chunk.primary_owners or [],\n        secondary_owners=inference_section.center_chunk.secondary_owners or [],\n        is_internet=is_internet,\n    )\n\n    db_session.add(db_search_doc)\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return db_search_doc\n\n\ndef create_search_doc_from_saved_search_doc(\n    saved_search_doc: SavedSearchDoc,\n) -> DBSearchDoc:\n    \"\"\"Convert SavedSearchDoc (server model) into DB SearchDoc with correct field mapping.\"\"\"\n    return DBSearchDoc(\n        document_id=saved_search_doc.document_id,\n        chunk_ind=saved_search_doc.chunk_ind,\n        # Map Pydantic semantic_identifier -> DB semantic_id; ensure non-null\n        semantic_id=saved_search_doc.semantic_identifier or \"Unknown\",\n        link=saved_search_doc.link,\n        blurb=saved_search_doc.blurb,\n        source_type=saved_search_doc.source_type,\n        boost=saved_search_doc.boost,\n        hidden=saved_search_doc.hidden,\n        # Map metadata -> doc_metadata (DB column name)\n        doc_metadata=saved_search_doc.metadata,\n        # SavedSearchDoc.score exists and defaults to 0.0\n        score=saved_search_doc.score or 0.0,\n        match_highlights=saved_search_doc.match_highlights,\n        updated_at=saved_search_doc.updated_at,\n        primary_owners=saved_search_doc.primary_owners,\n        secondary_owners=saved_search_doc.secondary_owners,\n        is_internet=saved_search_doc.is_internet,\n        is_relevant=saved_search_doc.is_relevant,\n        relevance_explanation=saved_search_doc.relevance_explanation,\n    )\n\n\ndef update_db_session_with_messages(\n    db_session: Session,\n    chat_message_id: int,\n    chat_session_id: UUID,\n    message: str | None = None,\n    message_type: str | None = None,\n    token_count: int | None = None,\n    error: str | None = None,\n    update_parent_message: bool = True,\n    files: list[FileDescriptor] | None = None,\n    reasoning_tokens: str | None = None,\n    commit: bool = False,\n) -> ChatMessage:\n    chat_message = (\n        db_session.query(ChatMessage)\n        .filter(\n            ChatMessage.id == chat_message_id,\n            ChatMessage.chat_session_id == chat_session_id,\n        )\n        .first()\n    )\n    if not chat_message:\n        raise ValueError(\"Chat message with id not found\")  # should never happen\n\n    if message:\n        chat_message.message = message\n    if message_type:\n        chat_message.message_type = MessageType(message_type)\n    if token_count:\n        chat_message.token_count = token_count\n    if error:\n        chat_message.error = error\n    if files is not None:\n        chat_message.files = files\n    if reasoning_tokens is not None:\n        chat_message.reasoning_tokens = reasoning_tokens\n\n    if update_parent_message:\n        parent_chat_message = (\n            db_session.query(ChatMessage)\n            .filter(ChatMessage.id == chat_message.parent_message_id)\n            .first()\n        )\n        if parent_chat_message:\n            parent_chat_message.latest_child_message_id = chat_message.id\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return chat_message\n"
  },
  {
    "path": "backend/onyx/db/chat_search.py",
    "content": "from typing import List\nfrom typing import Optional\nfrom typing import Tuple\nfrom uuid import UUID\n\nfrom sqlalchemy import column\nfrom sqlalchemy import desc\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql.expression import ColumnClause\n\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\n\n\ndef search_chat_sessions(\n    user_id: UUID | None,\n    db_session: Session,\n    query: Optional[str] = None,\n    page: int = 1,\n    page_size: int = 10,\n    include_deleted: bool = False,\n    include_onyxbot_flows: bool = False,\n) -> Tuple[List[ChatSession], bool]:\n    \"\"\"\n    Fast full-text search on ChatSession + ChatMessage using tsvectors.\n\n    If no query is provided, returns the most recent chat sessions.\n    Otherwise, searches both chat messages and session descriptions.\n\n    Returns a tuple of (sessions, has_more) where has_more indicates if\n    there are additional results beyond the requested page.\n    \"\"\"\n    offset_val = (page - 1) * page_size\n\n    # If no query, just return the most recent sessions\n    if not query or not query.strip():\n        stmt = (\n            select(ChatSession)\n            .order_by(desc(ChatSession.time_created))\n            .offset(offset_val)\n            .limit(page_size + 1)\n        )\n        if user_id is not None:\n            stmt = stmt.where(ChatSession.user_id == user_id)\n        if not include_onyxbot_flows:\n            stmt = stmt.where(ChatSession.onyxbot_flow.is_(False))\n        if not include_deleted:\n            stmt = stmt.where(ChatSession.deleted.is_(False))\n\n        result = db_session.execute(stmt.options(joinedload(ChatSession.persona)))\n        sessions = result.scalars().all()\n\n        has_more = len(sessions) > page_size\n        if has_more:\n            sessions = sessions[:page_size]\n\n        return list(sessions), has_more\n\n    # Otherwise, proceed with full-text search\n    query = query.strip()\n\n    base_conditions = []\n    if user_id is not None:\n        base_conditions.append(ChatSession.user_id == user_id)\n    if not include_onyxbot_flows:\n        base_conditions.append(ChatSession.onyxbot_flow.is_(False))\n    if not include_deleted:\n        base_conditions.append(ChatSession.deleted.is_(False))\n\n    message_tsv: ColumnClause = column(\"message_tsv\")\n    description_tsv: ColumnClause = column(\"description_tsv\")\n\n    ts_query = func.plainto_tsquery(\"english\", query)\n\n    description_session_ids = (\n        select(ChatSession.id)\n        .where(*base_conditions)\n        .where(description_tsv.op(\"@@\")(ts_query))\n    )\n\n    message_session_ids = (\n        select(ChatMessage.chat_session_id)\n        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)\n        .where(*base_conditions)\n        .where(message_tsv.op(\"@@\")(ts_query))\n    )\n\n    combined_ids = description_session_ids.union(message_session_ids).alias(\n        \"combined_ids\"\n    )\n\n    final_stmt = (\n        select(ChatSession)\n        .join(combined_ids, ChatSession.id == combined_ids.c.id)\n        .order_by(desc(ChatSession.time_created))\n        .distinct()\n        .offset(offset_val)\n        .limit(page_size + 1)\n        .options(joinedload(ChatSession.persona))\n    )\n\n    session_objs = db_session.execute(final_stmt).scalars().all()\n\n    has_more = len(session_objs) > page_size\n    if has_more:\n        session_objs = session_objs[:page_size]\n\n    return list(session_objs), has_more\n"
  },
  {
    "path": "backend/onyx/db/chunk.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\n\nfrom sqlalchemy import delete\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import ChunkStats\nfrom onyx.indexing.models import UpdatableChunkData\n\n\ndef update_chunk_boost_components__no_commit(\n    chunk_data: list[UpdatableChunkData],\n    db_session: Session,\n) -> None:\n    \"\"\"Updates the chunk_boost_components for chunks in the database.\n\n    Args:\n        chunk_data: List of dicts containing chunk_id, document_id, and boost_score\n        db_session: SQLAlchemy database session\n    \"\"\"\n    if not chunk_data:\n        return\n\n    for data in chunk_data:\n        chunk_in_doc_id = int(data.chunk_id)\n        if chunk_in_doc_id < 0:\n            raise ValueError(f\"Chunk ID is empty for chunk {data}\")\n\n        chunk_document_id = f\"{data.document_id}__{chunk_in_doc_id}\"\n        chunk_stats = (\n            db_session.query(ChunkStats)\n            .filter(\n                ChunkStats.id == chunk_document_id,\n            )\n            .first()\n        )\n\n        score = data.boost_score\n\n        if chunk_stats:\n            chunk_stats.information_content_boost = score\n            chunk_stats.last_modified = datetime.now(timezone.utc)\n            db_session.add(chunk_stats)\n        else:\n            # do not save new chunks with a neutral boost score\n            if score == 1.0:\n                continue\n            # Create new record\n            chunk_stats = ChunkStats(\n                document_id=data.document_id,\n                chunk_in_doc_id=chunk_in_doc_id,\n                information_content_boost=score,\n            )\n            db_session.add(chunk_stats)\n\n\ndef delete_chunk_stats_by_connector_credential_pair__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"This deletes just chunk stats in postgres.\"\"\"\n    stmt = delete(ChunkStats).where(ChunkStats.document_id.in_(document_ids))\n\n    db_session.execute(stmt)\n"
  },
  {
    "path": "backend/onyx/db/code_interpreter.py",
    "content": "from sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import CodeInterpreterServer\n\n\ndef fetch_code_interpreter_server(\n    db_session: Session,\n) -> CodeInterpreterServer:\n    server = db_session.scalars(select(CodeInterpreterServer)).one()\n    return server\n\n\ndef update_code_interpreter_server_enabled(\n    db_session: Session,\n    enabled: bool,\n) -> CodeInterpreterServer:\n    server = db_session.scalars(select(CodeInterpreterServer)).one()\n    server.server_enabled = enabled\n    db_session.commit()\n    return server\n"
  },
  {
    "path": "backend/onyx/db/connector.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import cast\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import exists\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DEFAULT_PRUNING_FREQ\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import IndexingMode\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import FederatedConnector\nfrom onyx.db.models import IndexAttempt\nfrom onyx.kg.models import KGConnectorData\nfrom onyx.server.documents.models import ConnectorBase\nfrom onyx.server.documents.models import ObjectCreationIdResponse\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef check_federated_connectors_exist(db_session: Session) -> bool:\n    stmt = select(exists(FederatedConnector))\n    result = db_session.execute(stmt)\n    return result.scalar() or False\n\n\ndef check_connectors_exist(db_session: Session) -> bool:\n    # Connector 0 is created on server startup as a default for ingestion\n    # it will always exist and we don't need to count it for this\n    stmt = select(exists(Connector).where(Connector.id > 0))\n    result = db_session.execute(stmt)\n    return result.scalar() or False\n\n\ndef check_user_files_exist(db_session: Session) -> bool:\n    \"\"\"Check if any user files exist in the system.\n\n    This is used to determine if the search tool should be available\n    when there are no regular connectors but there are user files\n    (User Knowledge mode).\n    \"\"\"\n    from onyx.db.models import UserFile\n    from onyx.db.enums import UserFileStatus\n\n    stmt = select(exists(UserFile).where(UserFile.status == UserFileStatus.COMPLETED))\n    result = db_session.execute(stmt)\n    return result.scalar() or False\n\n\ndef fetch_connectors(\n    db_session: Session,\n    sources: list[DocumentSource] | None = None,\n    input_types: list[InputType] | None = None,\n) -> list[Connector]:\n    stmt = select(Connector)\n    if sources is not None:\n        stmt = stmt.where(Connector.source.in_(sources))\n    if input_types is not None:\n        stmt = stmt.where(Connector.input_type.in_(input_types))\n    results = db_session.scalars(stmt)\n    return list(results.all())\n\n\ndef connector_by_name_source_exists(\n    connector_name: str, source: DocumentSource, db_session: Session\n) -> bool:\n    stmt = select(Connector).where(\n        Connector.name == connector_name, Connector.source == source\n    )\n    result = db_session.execute(stmt)\n    connector = result.scalar_one_or_none()\n    return connector is not None\n\n\ndef fetch_connector_by_id(connector_id: int, db_session: Session) -> Connector | None:\n    stmt = select(Connector).where(Connector.id == connector_id)\n    result = db_session.execute(stmt)\n    connector = result.scalar_one_or_none()\n    return connector\n\n\ndef fetch_ingestion_connector_by_name(\n    connector_name: str, db_session: Session\n) -> Connector | None:\n    stmt = (\n        select(Connector)\n        .where(Connector.name == connector_name)\n        .where(Connector.source == DocumentSource.INGESTION_API)\n    )\n    result = db_session.execute(stmt)\n    connector = result.scalar_one_or_none()\n    return connector\n\n\ndef create_connector(\n    db_session: Session,\n    connector_data: ConnectorBase,\n) -> ObjectCreationIdResponse:\n    if connector_by_name_source_exists(\n        connector_data.name, connector_data.source, db_session\n    ):\n        raise ValueError(\n            \"Connector by this name already exists, duplicate naming not allowed.\"\n        )\n\n    connector = Connector(\n        name=connector_data.name,\n        source=connector_data.source,\n        input_type=connector_data.input_type,\n        connector_specific_config=connector_data.connector_specific_config,\n        refresh_freq=connector_data.refresh_freq,\n        indexing_start=connector_data.indexing_start,\n        prune_freq=connector_data.prune_freq,\n    )\n    db_session.add(connector)\n    db_session.commit()\n\n    return ObjectCreationIdResponse(id=connector.id)\n\n\ndef update_connector(\n    connector_id: int,\n    connector_data: ConnectorBase,\n    db_session: Session,\n) -> Connector | None:\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        return None\n\n    if connector_data.name != connector.name and connector_by_name_source_exists(\n        connector_data.name, connector_data.source, db_session\n    ):\n        raise ValueError(\n            \"Connector by this name already exists, duplicate naming not allowed.\"\n        )\n\n    connector.name = connector_data.name\n    connector.source = connector_data.source\n    connector.input_type = connector_data.input_type\n    connector.connector_specific_config = connector_data.connector_specific_config\n    connector.refresh_freq = connector_data.refresh_freq\n    connector.prune_freq = (\n        connector_data.prune_freq\n        if connector_data.prune_freq is not None\n        else DEFAULT_PRUNING_FREQ\n    )\n\n    db_session.commit()\n    return connector\n\n\ndef delete_connector(\n    db_session: Session,\n    connector_id: int,\n) -> StatusResponse[int]:\n    \"\"\"Only used in special cases (e.g. a connector is in a bad state and we need to delete it).\n    Be VERY careful using this, as it could lead to a bad state if not used correctly.\n    \"\"\"\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        return StatusResponse(\n            success=True, message=\"Connector was already deleted\", data=connector_id\n        )\n\n    db_session.delete(connector)\n    return StatusResponse(\n        success=True, message=\"Connector deleted successfully\", data=connector_id\n    )\n\n\ndef get_connector_credential_ids(\n    connector_id: int,\n    db_session: Session,\n) -> list[int]:\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        raise ValueError(f\"Connector by id {connector_id} does not exist\")\n\n    return [association.credential.id for association in connector.credentials]\n\n\ndef fetch_latest_index_attempt_by_connector(\n    db_session: Session,\n    source: DocumentSource | None = None,\n) -> list[IndexAttempt]:\n    latest_index_attempts: list[IndexAttempt] = []\n\n    if source:\n        connectors = fetch_connectors(db_session, sources=[source])\n    else:\n        connectors = fetch_connectors(db_session)\n\n    if not connectors:\n        return []\n\n    for connector in connectors:\n        latest_index_attempt = (\n            db_session.query(IndexAttempt)\n            .join(ConnectorCredentialPair)\n            .filter(ConnectorCredentialPair.connector_id == connector.id)\n            .order_by(IndexAttempt.time_updated.desc())\n            .first()\n        )\n\n        if latest_index_attempt is not None:\n            latest_index_attempts.append(latest_index_attempt)\n\n    return latest_index_attempts\n\n\ndef fetch_latest_index_attempts_by_status(\n    db_session: Session,\n) -> list[IndexAttempt]:\n    subquery = (\n        db_session.query(\n            IndexAttempt.connector_credential_pair_id,\n            IndexAttempt.status,\n            func.max(IndexAttempt.time_updated).label(\"time_updated\"),\n        )\n        .group_by(IndexAttempt.connector_credential_pair_id)\n        .group_by(IndexAttempt.status)\n        .subquery()\n    )\n\n    alias = aliased(IndexAttempt, subquery)\n\n    query = db_session.query(IndexAttempt).join(\n        alias,\n        and_(\n            IndexAttempt.connector_credential_pair_id\n            == alias.connector_credential_pair_id,\n            IndexAttempt.status == alias.status,\n            IndexAttempt.time_updated == alias.time_updated,\n        ),\n    )\n\n    return cast(list[IndexAttempt], query.all())\n\n\ndef fetch_unique_document_sources(db_session: Session) -> list[DocumentSource]:\n    distinct_sources = db_session.query(Connector.source).distinct().all()\n\n    sources = [\n        source[0]\n        for source in distinct_sources\n        if source[0] != DocumentSource.INGESTION_API\n    ]\n\n    return sources\n\n\ndef create_initial_default_connector(db_session: Session) -> None:\n    default_connector_id = 0\n    default_connector = fetch_connector_by_id(default_connector_id, db_session)\n    if default_connector is not None:\n        if (\n            default_connector.source != DocumentSource.INGESTION_API\n            or default_connector.input_type != InputType.LOAD_STATE\n            or default_connector.refresh_freq is not None\n            or default_connector.name != \"Ingestion API\"\n            or default_connector.connector_specific_config != {}\n            or default_connector.prune_freq is not None\n        ):\n            logger.warning(\n                \"Default connector does not have expected values. Updating to proper state.\"\n            )\n            # Ensure default connector has correct values\n            default_connector.source = DocumentSource.INGESTION_API\n            default_connector.input_type = InputType.LOAD_STATE\n            default_connector.refresh_freq = None\n            default_connector.name = \"Ingestion API\"\n            default_connector.connector_specific_config = {}\n            default_connector.prune_freq = None\n            db_session.commit()\n        return\n\n    # Create a new default connector if it doesn't exist\n    connector = Connector(\n        id=default_connector_id,\n        name=\"Ingestion API\",\n        source=DocumentSource.INGESTION_API,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n        refresh_freq=None,\n        prune_freq=None,\n    )\n    db_session.add(connector)\n    db_session.commit()\n\n\ndef mark_ccpair_as_pruned(cc_pair_id: int, db_session: Session) -> None:\n    stmt = select(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.id == cc_pair_id\n    )\n    cc_pair = db_session.scalar(stmt)\n    if cc_pair is None:\n        raise ValueError(f\"No cc_pair with ID: {cc_pair_id}\")\n\n    cc_pair.last_pruned = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef mark_cc_pair_as_hierarchy_fetched(db_session: Session, cc_pair_id: int) -> None:\n    stmt = select(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.id == cc_pair_id\n    )\n    cc_pair = db_session.scalar(stmt)\n    if cc_pair is None:\n        raise ValueError(f\"No cc_pair with ID: {cc_pair_id}\")\n\n    cc_pair.last_time_hierarchy_fetch = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef mark_cc_pair_as_permissions_synced(\n    db_session: Session, cc_pair_id: int, start_time: datetime | None\n) -> None:\n    stmt = select(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.id == cc_pair_id\n    )\n    cc_pair = db_session.scalar(stmt)\n    if cc_pair is None:\n        raise ValueError(f\"No cc_pair with ID: {cc_pair_id}\")\n\n    cc_pair.last_time_perm_sync = start_time\n    db_session.commit()\n\n\ndef mark_cc_pair_as_external_group_synced(db_session: Session, cc_pair_id: int) -> None:\n    stmt = select(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.id == cc_pair_id\n    )\n    cc_pair = db_session.scalar(stmt)\n    if cc_pair is None:\n        raise ValueError(f\"No cc_pair with ID: {cc_pair_id}\")\n\n    # The sync time can be marked after it ran because all group syncs\n    # are run in full, not polling for changes.\n    # If this changes, we need to update this function.\n    cc_pair.last_time_external_group_sync = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef mark_ccpair_with_indexing_trigger(\n    cc_pair_id: int, indexing_mode: IndexingMode | None, db_session: Session\n) -> None:\n    \"\"\"indexing_mode sets a field which will be picked up by a background task\n    to trigger indexing. Set to None to disable the trigger.\"\"\"\n    try:\n        cc_pair = db_session.execute(\n            select(ConnectorCredentialPair)\n            .where(ConnectorCredentialPair.id == cc_pair_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if cc_pair is None:\n            raise ValueError(f\"No cc_pair with ID: {cc_pair_id}\")\n\n        cc_pair.indexing_trigger = indexing_mode\n        db_session.commit()\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef get_kg_enabled_connectors(db_session: Session) -> list[KGConnectorData]:\n    \"\"\"\n    Retrieves a list of connector IDs that have not been KG processed for a given tenant.\n    Args:\n        db_session (Session): The database session to use\n    Returns:\n        list[KGConnectorData]: List of connector IDs with KG extraction enabled but have unprocessed documents\n    \"\"\"\n    try:\n        stmt = select(Connector.id, Connector.source, Connector.kg_coverage_days).where(\n            Connector.kg_processing_enabled\n        )\n        result = db_session.execute(stmt)\n\n        connector_results = [\n            KGConnectorData(id=row[0], source=row[1].lower(), kg_coverage_days=row[2])\n            for row in result.fetchall()\n        ]\n\n        return connector_results\n\n    except Exception as e:\n        logger.error(f\"Error fetching unprocessed connector IDs: {str(e)}\")\n        raise e\n"
  },
  {
    "path": "backend/onyx/db/connector_credential_pair.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom typing import TypeVarTuple\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import delete\nfrom sqlalchemy import desc\nfrom sqlalchemy import exists\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.connector import fetch_connector_by_id\nfrom onyx.db.credentials import fetch_credential_by_id\nfrom onyx.db.credentials import fetch_credential_by_id_for_user\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexingStatus\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup__ConnectorCredentialPair\nfrom onyx.db.models import UserRole\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\nlogger = setup_logger()\n\nR = TypeVarTuple(\"R\")\n\n\nclass ConnectorType(str, Enum):\n    STANDARD = \"standard\"\n    USER_FILE = \"user_file\"\n\n\ndef _add_user_filters(\n    stmt: Select[tuple[*R]], user: User, get_editable: bool = True\n) -> Select[tuple[*R]]:\n    if user.role == UserRole.ADMIN:\n        return stmt\n\n    # If anonymous user, only show public cc_pairs\n    if user.is_anonymous:\n        where_clause = ConnectorCredentialPair.access_type == AccessType.PUBLIC\n        return stmt.where(where_clause)\n\n    stmt = stmt.distinct()\n    UG__CCpair = aliased(UserGroup__ConnectorCredentialPair)\n    User__UG = aliased(User__UserGroup)\n\n    \"\"\"\n    Here we select cc_pairs by relation:\n    User -> User__UserGroup -> UserGroup__ConnectorCredentialPair ->\n    ConnectorCredentialPair\n    \"\"\"\n    stmt = stmt.outerjoin(UG__CCpair).outerjoin(\n        User__UG,\n        User__UG.user_group_id == UG__CCpair.user_group_id,\n    )\n\n    \"\"\"\n    Filter cc_pairs by:\n    - if the user is in the user_group that owns the cc_pair\n    - if the user is not a global_curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out cc_pairs that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all cc_pairs in the groups the user is a curator\n    for (as well as public cc_pairs)\n    \"\"\"\n\n    where_clause = User__UG.user_id == user.id\n    if user.role == UserRole.CURATOR and get_editable:\n        where_clause &= User__UG.is_curator == True  # noqa: E712\n    if get_editable:\n        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)\n        if user.role == UserRole.CURATOR:\n            user_groups = user_groups.where(\n                User__UserGroup.is_curator == True  # noqa: E712\n            )\n        where_clause &= (\n            ~exists()\n            .where(UG__CCpair.cc_pair_id == ConnectorCredentialPair.id)\n            .where(~UG__CCpair.user_group_id.in_(user_groups))\n            .correlate(ConnectorCredentialPair)\n        )\n        where_clause |= ConnectorCredentialPair.creator_id == user.id\n    else:\n        where_clause |= ConnectorCredentialPair.access_type == AccessType.PUBLIC\n        where_clause |= ConnectorCredentialPair.access_type == AccessType.SYNC\n\n    return stmt.where(where_clause)\n\n\ndef get_connector_credential_pairs_for_user(\n    db_session: Session,\n    user: User,\n    get_editable: bool = True,\n    ids: list[int] | None = None,\n    eager_load_connector: bool = False,\n    eager_load_credential: bool = False,\n    eager_load_user: bool = False,\n    order_by_desc: bool = False,\n    source: DocumentSource | None = None,\n    processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,\n    defer_connector_config: bool = False,\n) -> list[ConnectorCredentialPair]:\n    \"\"\"Get connector credential pairs for a user.\n\n    Args:\n        processing_mode: Filter by processing mode. Defaults to REGULAR to hide\n            FILE_SYSTEM connectors from standard admin UI. Pass None to get all.\n        defer_connector_config: If True, skips loading Connector.connector_specific_config\n            to avoid fetching large JSONB blobs when they aren't needed.\n    \"\"\"\n    if eager_load_user:\n        assert (\n            eager_load_credential\n        ), \"eager_load_credential must be True if eager_load_user is True\"\n    stmt = select(ConnectorCredentialPair).distinct()\n\n    if eager_load_connector:\n        connector_load = selectinload(ConnectorCredentialPair.connector)\n        if defer_connector_config:\n            connector_load = connector_load.defer(Connector.connector_specific_config)\n        stmt = stmt.options(connector_load)\n\n    if eager_load_credential:\n        load_opts = selectinload(ConnectorCredentialPair.credential)\n        if eager_load_user:\n            load_opts = load_opts.joinedload(Credential.user)\n        stmt = stmt.options(load_opts)\n\n    stmt = _add_user_filters(stmt, user, get_editable)\n\n    if source:\n        stmt = stmt.join(ConnectorCredentialPair.connector).where(\n            Connector.source == source.value\n        )\n\n    if ids:\n        stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))\n\n    if processing_mode is not None:\n        stmt = stmt.where(ConnectorCredentialPair.processing_mode == processing_mode)\n\n    if order_by_desc:\n        stmt = stmt.order_by(desc(ConnectorCredentialPair.id))\n\n    return list(db_session.scalars(stmt).unique().all())\n\n\n# For use with our thread-level parallelism utils. Note that any relationships\n# you wish to use MUST be eagerly loaded, as the session will not be available\n# after this function to allow lazy loading.\ndef get_connector_credential_pairs_for_user_parallel(\n    user: User,\n    get_editable: bool = True,\n    ids: list[int] | None = None,\n    eager_load_connector: bool = False,\n    eager_load_credential: bool = False,\n    eager_load_user: bool = False,\n    order_by_desc: bool = False,\n    source: DocumentSource | None = None,\n    processing_mode: ProcessingMode | None = ProcessingMode.REGULAR,\n    defer_connector_config: bool = False,\n) -> list[ConnectorCredentialPair]:\n    with get_session_with_current_tenant() as db_session:\n        return get_connector_credential_pairs_for_user(\n            db_session=db_session,\n            user=user,\n            get_editable=get_editable,\n            ids=ids,\n            eager_load_connector=eager_load_connector,\n            eager_load_credential=eager_load_credential,\n            eager_load_user=eager_load_user,\n            order_by_desc=order_by_desc,\n            source=source,\n            processing_mode=processing_mode,\n            defer_connector_config=defer_connector_config,\n        )\n\n\ndef get_connector_credential_pairs(\n    db_session: Session, ids: list[int] | None = None\n) -> list[ConnectorCredentialPair]:\n    stmt = select(ConnectorCredentialPair).distinct()\n\n    if ids:\n        stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))\n\n    return list(db_session.scalars(stmt).all())\n\n\ndef add_deletion_failure_message(\n    db_session: Session,\n    cc_pair_id: int,\n    failure_message: str,\n) -> None:\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    if not cc_pair:\n        return\n    cc_pair.deletion_failure_message = failure_message\n    db_session.commit()\n\n\ndef get_cc_pair_groups_for_ids(\n    db_session: Session,\n    cc_pair_ids: list[int],\n) -> list[UserGroup__ConnectorCredentialPair]:\n    stmt = select(UserGroup__ConnectorCredentialPair).distinct()\n    stmt = stmt.outerjoin(\n        ConnectorCredentialPair,\n        UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id,\n    )\n    stmt = stmt.where(UserGroup__ConnectorCredentialPair.cc_pair_id.in_(cc_pair_ids))\n    return list(db_session.scalars(stmt).all())\n\n\n# For use with our thread-level parallelism utils. Note that any relationships\n# you wish to use MUST be eagerly loaded, as the session will not be available\n# after this function to allow lazy loading.\ndef get_cc_pair_groups_for_ids_parallel(\n    cc_pair_ids: list[int],\n) -> list[UserGroup__ConnectorCredentialPair]:\n    with get_session_with_current_tenant() as db_session:\n        return get_cc_pair_groups_for_ids(db_session, cc_pair_ids)\n\n\ndef get_connector_credential_pair_for_user(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    user: User,\n    get_editable: bool = True,\n) -> ConnectorCredentialPair | None:\n    stmt = select(ConnectorCredentialPair)\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id)\n    stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id)\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef get_connector_credential_pair(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n) -> ConnectorCredentialPair | None:\n    stmt = select(ConnectorCredentialPair)\n    stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id)\n    stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id)\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef get_connector_credential_pair_from_id_for_user(\n    cc_pair_id: int,\n    db_session: Session,\n    user: User,\n    get_editable: bool = True,\n) -> ConnectorCredentialPair | None:\n    stmt = select(ConnectorCredentialPair).distinct()\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef verify_user_has_access_to_cc_pair(\n    cc_pair_id: int,\n    db_session: Session,\n    user: User,\n    get_editable: bool = True,\n) -> bool:\n    stmt = select(ConnectorCredentialPair.id)\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)\n    result = db_session.execute(stmt)\n    return result.scalars().first() is not None\n\n\ndef get_connector_credential_pair_from_id(\n    db_session: Session,\n    cc_pair_id: int,\n    eager_load_connector: bool = False,\n    eager_load_credential: bool = False,\n) -> ConnectorCredentialPair | None:\n    stmt = select(ConnectorCredentialPair).distinct()\n    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)\n\n    if eager_load_credential:\n        stmt = stmt.options(joinedload(ConnectorCredentialPair.credential))\n    if eager_load_connector:\n        stmt = stmt.options(joinedload(ConnectorCredentialPair.connector))\n\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef get_connector_credential_pairs_for_source(\n    db_session: Session,\n    source: DocumentSource,\n) -> list[ConnectorCredentialPair]:\n    stmt = (\n        select(ConnectorCredentialPair)\n        .join(ConnectorCredentialPair.connector)\n        .where(Connector.source == source)\n    )\n    return list(db_session.scalars(stmt).unique().all())\n\n\ndef get_last_successful_attempt_poll_range_end(\n    cc_pair_id: int,\n    earliest_index: float,\n    search_settings: SearchSettings,\n    db_session: Session,\n) -> float:\n    \"\"\"Used to get the latest `poll_range_end` for a given connector and credential.\n\n    This can be used to determine the next \"start\" time for a new index attempt.\n\n    Note that the attempts time_started is not necessarily correct - that gets set\n    separately and is similar but not exactly the same as the `poll_range_end`.\n    \"\"\"\n    latest_successful_index_attempt = (\n        db_session.query(IndexAttempt)\n        .join(\n            ConnectorCredentialPair,\n            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,\n        )\n        .filter(\n            ConnectorCredentialPair.id == cc_pair_id,\n            IndexAttempt.search_settings_id == search_settings.id,\n            IndexAttempt.status == IndexingStatus.SUCCESS,\n        )\n        .order_by(IndexAttempt.poll_range_end.desc())\n        .first()\n    )\n    if (\n        not latest_successful_index_attempt\n        or not latest_successful_index_attempt.poll_range_end\n    ):\n        return earliest_index\n\n    return latest_successful_index_attempt.poll_range_end.timestamp()\n\n\n\"\"\"Updates\"\"\"\n\n\ndef _update_connector_credential_pair(\n    db_session: Session,\n    cc_pair: ConnectorCredentialPair,\n    status: ConnectorCredentialPairStatus | None = None,\n    net_docs: int | None = None,\n    run_dt: datetime | None = None,\n) -> None:\n    # simply don't update last_successful_index_time if run_dt is not specified\n    # at worst, this would result in re-indexing documents that were already indexed\n    if run_dt is not None:\n        cc_pair.last_successful_index_time = run_dt\n    if net_docs is not None:\n        cc_pair.total_docs_indexed += net_docs\n    if status is not None:\n        cc_pair.status = status\n\n    db_session.commit()\n\n\ndef update_connector_credential_pair_from_id(\n    db_session: Session,\n    cc_pair_id: int,\n    status: ConnectorCredentialPairStatus | None = None,\n    net_docs: int | None = None,\n    run_dt: datetime | None = None,\n) -> None:\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    if not cc_pair:\n        logger.warning(\n            f\"Attempted to update pair for Connector Credential Pair '{cc_pair_id}' but it does not exist\"\n        )\n        return\n\n    _update_connector_credential_pair(\n        db_session=db_session,\n        cc_pair=cc_pair,\n        status=status,\n        net_docs=net_docs,\n        run_dt=run_dt,\n    )\n\n\ndef update_connector_credential_pair(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    status: ConnectorCredentialPairStatus | None = None,\n    net_docs: int | None = None,\n    run_dt: datetime | None = None,\n) -> None:\n    cc_pair = get_connector_credential_pair(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n    )\n    if not cc_pair:\n        logger.warning(\n            f\"Attempted to update pair for connector id {connector_id} and credential id {credential_id}\"\n        )\n        return\n\n    _update_connector_credential_pair(\n        db_session=db_session,\n        cc_pair=cc_pair,\n        status=status,\n        net_docs=net_docs,\n        run_dt=run_dt,\n    )\n\n\ndef set_cc_pair_repeated_error_state(\n    db_session: Session,\n    cc_pair_id: int,\n    in_repeated_error_state: bool,\n) -> None:\n    stmt = (\n        update(ConnectorCredentialPair)\n        .where(ConnectorCredentialPair.id == cc_pair_id)\n        .values(in_repeated_error_state=in_repeated_error_state)\n    )\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef delete_connector_credential_pair__no_commit(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n) -> None:\n    stmt = delete(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.connector_id == connector_id,\n        ConnectorCredentialPair.credential_id == credential_id,\n    )\n    db_session.execute(stmt)\n\n\ndef associate_default_cc_pair(db_session: Session) -> None:\n    existing_association = (\n        db_session.query(ConnectorCredentialPair)\n        .filter(\n            ConnectorCredentialPair.connector_id == 0,\n            ConnectorCredentialPair.credential_id == 0,\n        )\n        .one_or_none()\n    )\n    if existing_association is not None:\n        return\n\n    # DefaultCCPair has id 1 since it is the first CC pair created\n    # It is DEFAULT_CC_PAIR_ID, but can't set it explicitly because it messed with the\n    # auto-incrementing id\n    association = ConnectorCredentialPair(\n        connector_id=0,\n        credential_id=0,\n        access_type=AccessType.PUBLIC,\n        name=\"DefaultCCPair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n    )\n    db_session.add(association)\n    db_session.commit()\n\n\ndef _relate_groups_to_cc_pair__no_commit(\n    db_session: Session,\n    cc_pair_id: int,\n    user_group_ids: list[int] | None = None,\n) -> None:\n    if not user_group_ids:\n        return\n\n    for group_id in user_group_ids:\n        db_session.add(\n            UserGroup__ConnectorCredentialPair(\n                user_group_id=group_id, cc_pair_id=cc_pair_id\n            )\n        )\n\n\ndef add_credential_to_connector(\n    db_session: Session,\n    user: User,\n    connector_id: int,\n    credential_id: int,\n    cc_pair_name: str,\n    access_type: AccessType,\n    groups: list[int] | None,\n    auto_sync_options: dict | None = None,\n    initial_status: ConnectorCredentialPairStatus = ConnectorCredentialPairStatus.SCHEDULED,\n    last_successful_index_time: datetime | None = None,\n    seeding_flow: bool = False,\n    processing_mode: ProcessingMode = ProcessingMode.REGULAR,\n) -> StatusResponse:\n    connector = fetch_connector_by_id(connector_id, db_session)\n\n    # If we are in the seeding flow, we shouldn't need to check if the credential belongs to the user\n    if seeding_flow:\n        credential = fetch_credential_by_id(\n            credential_id=credential_id,\n            db_session=db_session,\n        )\n    else:\n        credential = fetch_credential_by_id_for_user(\n            credential_id,\n            user,\n            db_session,\n            get_editable=False,\n        )\n\n    if connector is None:\n        raise HTTPException(status_code=404, detail=\"Connector does not exist\")\n\n    if access_type == AccessType.SYNC:\n        if not fetch_ee_implementation_or_noop(\n            \"onyx.external_permissions.sync_params\",\n            \"check_if_valid_sync_source\",\n            noop_return_value=True,\n        )(connector.source):\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Connector of type {connector.source} does not support SYNC access type\",\n            )\n\n    if credential is None:\n        error_msg = (\n            f\"Credential {credential_id} does not exist or does not belong to user\"\n        )\n        logger.error(error_msg)\n        raise HTTPException(\n            status_code=401,\n            detail=error_msg,\n        )\n\n    existing_association = (\n        db_session.query(ConnectorCredentialPair)\n        .filter(\n            ConnectorCredentialPair.connector_id == connector_id,\n            ConnectorCredentialPair.credential_id == credential_id,\n        )\n        .one_or_none()\n    )\n    if existing_association is not None:\n        return StatusResponse(\n            success=False,\n            message=f\"Connector {connector_id} already has Credential {credential_id}\",\n            data=connector_id,\n        )\n\n    association = ConnectorCredentialPair(\n        creator_id=user.id,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        name=cc_pair_name,\n        status=initial_status,\n        access_type=access_type,\n        auto_sync_options=auto_sync_options,\n        last_successful_index_time=last_successful_index_time,\n        processing_mode=processing_mode,\n    )\n    db_session.add(association)\n    db_session.flush()  # make sure the association has an id\n    db_session.refresh(association)\n\n    _relate_groups_to_cc_pair__no_commit(\n        db_session=db_session,\n        cc_pair_id=association.id,\n        user_group_ids=groups,\n    )\n\n    db_session.commit()\n\n    return StatusResponse(\n        success=True,\n        message=f\"Creating new association between Connector {connector_id} and Credential {credential_id}\",\n        data=association.id,\n    )\n\n\ndef remove_credential_from_connector(\n    connector_id: int,\n    credential_id: int,\n    user: User,\n    db_session: Session,\n) -> StatusResponse[int]:\n    connector = fetch_connector_by_id(connector_id, db_session)\n    credential = fetch_credential_by_id_for_user(\n        credential_id,\n        user,\n        db_session,\n        get_editable=False,\n    )\n\n    if connector is None:\n        raise HTTPException(status_code=404, detail=\"Connector does not exist\")\n\n    if credential is None:\n        raise HTTPException(\n            status_code=404,\n            detail=\"Credential does not exist or does not belong to user\",\n        )\n\n    association = get_connector_credential_pair_for_user(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        user=user,\n        get_editable=True,\n    )\n\n    if association is not None:\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.external_perm\",\n            \"delete_user__ext_group_for_cc_pair__no_commit\",\n        )(\n            db_session=db_session,\n            cc_pair_id=association.id,\n        )\n        db_session.delete(association)\n        db_session.commit()\n        return StatusResponse(\n            success=True,\n            message=f\"Credential {credential_id} removed from Connector\",\n            data=connector_id,\n        )\n\n    return StatusResponse(\n        success=False,\n        message=f\"Connector already does not have Credential {credential_id}\",\n        data=connector_id,\n    )\n\n\ndef fetch_indexable_standard_connector_credential_pair_ids(\n    db_session: Session,\n    active_cc_pairs_only: bool = True,\n    limit: int | None = None,\n) -> list[int]:\n    stmt = select(ConnectorCredentialPair.id)\n\n    # For regular indexing checks\n    if active_cc_pairs_only:\n        stmt = stmt.where(\n            ConnectorCredentialPair.status.in_(\n                ConnectorCredentialPairStatus.active_statuses()\n            )\n        )\n    else:\n        # For embedding swap checks, include PAUSED and exclude DELETING or INVALID\n        stmt = stmt.where(\n            ConnectorCredentialPair.status.in_(\n                ConnectorCredentialPairStatus.indexable_statuses()\n            )\n        )\n\n    if limit:\n        stmt = stmt.limit(limit)\n\n    return list(db_session.scalars(stmt))\n\n\ndef fetch_connector_credential_pair_for_connector(\n    db_session: Session,\n    connector_id: int,\n) -> ConnectorCredentialPair | None:\n    stmt = select(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.connector_id == connector_id,\n    )\n    return db_session.scalar(stmt)\n\n\ndef resync_cc_pair(\n    cc_pair: ConnectorCredentialPair,\n    search_settings_id: int,\n    db_session: Session,\n) -> None:\n    \"\"\"\n    Updates state stored in the connector_credential_pair table based on the\n    latest index attempt for the given search settings.\n\n    Args:\n        cc_pair: ConnectorCredentialPair to resync\n        search_settings_id: SearchSettings to use for resync\n        db_session: Database session\n    \"\"\"\n\n    def find_latest_index_attempt(\n        connector_id: int,\n        credential_id: int,\n        only_include_success: bool,\n        db_session: Session,\n    ) -> IndexAttempt | None:\n        query = (\n            db_session.query(IndexAttempt)\n            .join(\n                ConnectorCredentialPair,\n                IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,\n            )\n            .filter(\n                ConnectorCredentialPair.connector_id == connector_id,\n                ConnectorCredentialPair.credential_id == credential_id,\n                IndexAttempt.search_settings_id == search_settings_id,\n            )\n        )\n\n        if only_include_success:\n            query = query.filter(IndexAttempt.status == IndexingStatus.SUCCESS)\n\n        latest_index_attempt = query.order_by(desc(IndexAttempt.time_started)).first()\n\n        return latest_index_attempt\n\n    last_success = find_latest_index_attempt(\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        only_include_success=True,\n        db_session=db_session,\n    )\n\n    cc_pair.last_successful_index_time = (\n        last_success.time_started if last_success else None\n    )\n\n    db_session.commit()\n\n\n# ── Metrics query helpers ──────────────────────────────────────────────\n\n\ndef get_connector_health_for_metrics(\n    db_session: Session,\n) -> list:  # Returns list of Row tuples\n    \"\"\"Return connector health data for Prometheus metrics.\n\n    Each row is (cc_pair_id, status, in_repeated_error_state,\n    last_successful_index_time, name, source).\n    \"\"\"\n    return (\n        db_session.query(\n            ConnectorCredentialPair.id,\n            ConnectorCredentialPair.status,\n            ConnectorCredentialPair.in_repeated_error_state,\n            ConnectorCredentialPair.last_successful_index_time,\n            ConnectorCredentialPair.name,\n            Connector.source,\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .all()\n    )\n"
  },
  {
    "path": "backend/onyx/db/constants.py",
    "content": "SLACK_BOT_PERSONA_PREFIX = \"__slack_bot_persona__\"\nDEFAULT_PERSONA_SLACK_CHANNEL_NAME = \"DEFAULT_SLACK_CHANNEL\"\n\nCONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX = \"ConnectorValidationError:\"\n\n\n# Sentinel value to distinguish between \"not provided\" and \"explicitly set to None\"\nclass UnsetType:\n    def __repr__(self) -> str:\n        return \"<UNSET>\"\n\n\nUNSET = UnsetType()\n"
  },
  {
    "path": "backend/onyx/db/credentials.py",
    "content": "from typing import Any\n\nfrom sqlalchemy import exists\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql.expression import and_\nfrom sqlalchemy.sql.expression import or_\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import Credential__UserGroup\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n# The credentials for these sources are not real so\n# permissions are not enforced for them\nCREDENTIAL_PERMISSIONS_TO_IGNORE = {\n    DocumentSource.FILE,\n    DocumentSource.WEB,\n    DocumentSource.NOT_APPLICABLE,\n    DocumentSource.GOOGLE_SITES,\n    DocumentSource.WIKIPEDIA,\n    DocumentSource.MEDIAWIKI,\n}\n\nPUBLIC_CREDENTIAL_ID = 0\n\n\ndef _add_user_filters(\n    stmt: Select,\n    user: User,\n    get_editable: bool = True,\n) -> Select:\n    \"\"\"Attaches filters to the statement to ensure that the user can only\n    access the appropriate credentials\"\"\"\n    if user.is_anonymous:\n        raise ValueError(\"Anonymous users are not allowed to access credentials\")\n\n    if user.role == UserRole.ADMIN:\n        # Admins can access all credentials that are public or owned by them\n        # or are not associated with any user\n        return stmt.where(\n            or_(\n                Credential.user_id == user.id,\n                Credential.user_id.is_(None),\n                Credential.admin_public == True,  # noqa: E712\n                Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE),\n            )\n        )\n    if user.role == UserRole.BASIC:\n        # Basic users can only access credentials that are owned by them\n        return stmt.where(Credential.user_id == user.id)\n\n    stmt = stmt.distinct()\n    \"\"\"\n    THIS PART IS FOR CURATORS AND GLOBAL CURATORS\n    Here we select cc_pairs by relation:\n    User -> User__UserGroup -> Credential__UserGroup -> Credential\n    \"\"\"\n    stmt = stmt.outerjoin(Credential__UserGroup).outerjoin(\n        User__UserGroup,\n        User__UserGroup.user_group_id == Credential__UserGroup.user_group_id,\n    )\n    \"\"\"\n    Filter Credentials by:\n    - if the user is in the user_group that owns the Credential\n    - if the user is a curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out Credentials that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all Credentials in the groups the user is a curator\n    for (as well as public Credentials)\n    - if we are not editing, we return all Credentials directly connected to the user\n    \"\"\"\n    where_clause = User__UserGroup.user_id == user.id\n    if user.role == UserRole.CURATOR:\n        where_clause &= User__UserGroup.is_curator == True  # noqa: E712\n\n    if get_editable:\n        user_groups = select(User__UserGroup.user_group_id).where(\n            User__UserGroup.user_id == user.id\n        )\n        if user.role == UserRole.CURATOR:\n            user_groups = user_groups.where(\n                User__UserGroup.is_curator == True  # noqa: E712\n            )\n        where_clause &= (\n            ~exists()\n            .where(Credential__UserGroup.credential_id == Credential.id)\n            .where(~Credential__UserGroup.user_group_id.in_(user_groups))\n            .correlate(Credential)\n        )\n    else:\n        where_clause |= Credential.curator_public == True  # noqa: E712\n        where_clause |= Credential.user_id == user.id  # noqa: E712\n\n    where_clause |= Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE)\n\n    return stmt.where(where_clause)\n\n\ndef _relate_credential_to_user_groups__no_commit(\n    db_session: Session,\n    credential_id: int,\n    user_group_ids: list[int],\n) -> None:\n    credential_user_groups = []\n    for group_id in user_group_ids:\n        credential_user_groups.append(\n            Credential__UserGroup(\n                credential_id=credential_id,\n                user_group_id=group_id,\n            )\n        )\n    db_session.add_all(credential_user_groups)\n\n\ndef fetch_credentials_for_user(\n    db_session: Session,\n    user: User,\n    get_editable: bool = True,\n) -> list[Credential]:\n    stmt = select(Credential)\n    stmt = _add_user_filters(stmt, user, get_editable=get_editable)\n    results = db_session.scalars(stmt)\n    return list(results.all())\n\n\ndef fetch_credential_by_id_for_user(\n    credential_id: int,\n    user: User,\n    db_session: Session,\n    get_editable: bool = True,\n) -> Credential | None:\n    stmt = select(Credential).distinct()\n    stmt = stmt.where(Credential.id == credential_id)\n    stmt = _add_user_filters(\n        stmt=stmt,\n        user=user,\n        get_editable=get_editable,\n    )\n    result = db_session.execute(stmt)\n    credential = result.scalar_one_or_none()\n    return credential\n\n\ndef fetch_credential_by_id(\n    credential_id: int,\n    db_session: Session,\n) -> Credential | None:\n    stmt = select(Credential).distinct()\n    stmt = stmt.where(Credential.id == credential_id)\n    result = db_session.execute(stmt)\n    credential = result.scalar_one_or_none()\n    return credential\n\n\ndef fetch_credentials_by_source_for_user(\n    db_session: Session,\n    user: User,\n    document_source: DocumentSource | None = None,\n    get_editable: bool = True,\n) -> list[Credential]:\n    base_query = select(Credential).where(Credential.source == document_source)\n    base_query = _add_user_filters(base_query, user, get_editable=get_editable)\n    credentials = db_session.execute(base_query).scalars().all()\n    return list(credentials)\n\n\ndef fetch_credentials_by_source(\n    db_session: Session,\n    document_source: DocumentSource | None = None,\n) -> list[Credential]:\n    base_query = select(Credential).where(Credential.source == document_source)\n    credentials = db_session.execute(base_query).scalars().all()\n    return list(credentials)\n\n\ndef swap_credentials_connector(\n    new_credential_id: int, connector_id: int, user: User, db_session: Session\n) -> ConnectorCredentialPair:\n    # Check if the user has permission to use the new credential\n    new_credential = fetch_credential_by_id_for_user(\n        new_credential_id, user, db_session\n    )\n    if not new_credential:\n        raise ValueError(\n            f\"No Credential found with id {new_credential_id} or user doesn't have permission to use it\"\n        )\n\n    # Existing pair\n    existing_pair = db_session.execute(\n        select(ConnectorCredentialPair).where(\n            ConnectorCredentialPair.connector_id == connector_id\n        )\n    ).scalar_one_or_none()\n\n    if not existing_pair:\n        raise ValueError(\n            f\"No ConnectorCredentialPair found for connector_id {connector_id}\"\n        )\n\n    # Check if the new credential is compatible with the connector\n    if new_credential.source != existing_pair.connector.source:\n        raise ValueError(\n            f\"New credential source {new_credential.source} does not match connector source {existing_pair.connector.source}\"\n        )\n\n    db_session.execute(\n        update(DocumentByConnectorCredentialPair)\n        .where(\n            and_(\n                DocumentByConnectorCredentialPair.connector_id == connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == existing_pair.credential_id,\n            )\n        )\n        .values(credential_id=new_credential_id)\n    )\n\n    # Update the existing pair with the new credential\n    existing_pair.credential_id = new_credential_id\n    existing_pair.credential = new_credential\n\n    # Update ccpair status if it's in INVALID state\n    if existing_pair.status == ConnectorCredentialPairStatus.INVALID:\n        existing_pair.status = ConnectorCredentialPairStatus.ACTIVE\n\n    # Commit the changes\n    db_session.commit()\n\n    # Refresh the object to ensure all relationships are up-to-date\n    db_session.refresh(existing_pair)\n    return existing_pair\n\n\ndef create_credential(\n    credential_data: CredentialBase,\n    user: User,\n    db_session: Session,\n) -> Credential:\n    credential = Credential(\n        credential_json=credential_data.credential_json,\n        user_id=user.id,\n        admin_public=credential_data.admin_public,\n        source=credential_data.source,\n        name=credential_data.name,\n        curator_public=credential_data.curator_public,\n    )\n    db_session.add(credential)\n    db_session.flush()  # This ensures the credential gets an ID\n    _relate_credential_to_user_groups__no_commit(\n        db_session=db_session,\n        credential_id=credential.id,\n        user_group_ids=credential_data.groups,\n    )\n\n    db_session.commit()\n    # Expire to ensure credential_json is reloaded as SensitiveValue from DB\n    db_session.expire(credential)\n    return credential\n\n\ndef _cleanup_credential__user_group_relationships__no_commit(\n    db_session: Session, credential_id: int\n) -> None:\n    \"\"\"NOTE: does not commit the transaction.\"\"\"\n    db_session.query(Credential__UserGroup).filter(\n        Credential__UserGroup.credential_id == credential_id\n    ).delete(synchronize_session=False)\n\n\ndef alter_credential(\n    credential_id: int,\n    name: str,\n    credential_json: dict[str, Any],\n    user: User,\n    db_session: Session,\n) -> Credential | None:\n    # TODO: add user group relationship update\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n\n    if credential is None:\n        return None\n\n    credential.name = name\n\n    # Get existing credential_json and merge with new values\n    existing_json = (\n        credential.credential_json.get_value(apply_mask=False)\n        if credential.credential_json\n        else {}\n    )\n    credential.credential_json = {  # type: ignore[assignment]\n        **existing_json,\n        **credential_json,\n    }\n\n    credential.user_id = user.id\n    db_session.commit()\n    # Expire to ensure credential_json is reloaded as SensitiveValue from DB\n    db_session.expire(credential)\n    return credential\n\n\ndef update_credential(\n    credential_id: int,\n    credential_data: CredentialBase,\n    user: User,\n    db_session: Session,\n) -> Credential | None:\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if credential is None:\n        return None\n\n    credential.credential_json = credential_data.credential_json  # type: ignore[assignment]\n    credential.user_id = user.id if user is not None else None\n\n    db_session.commit()\n    # Expire to ensure credential_json is reloaded as SensitiveValue from DB\n    db_session.expire(credential)\n    return credential\n\n\ndef update_credential_json(\n    credential_id: int,\n    credential_json: dict[str, Any],\n    user: User,\n    db_session: Session,\n) -> Credential | None:\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if credential is None:\n        return None\n\n    credential.credential_json = credential_json  # type: ignore[assignment]\n    db_session.commit()\n    # Expire to ensure credential_json is reloaded as SensitiveValue from DB\n    db_session.expire(credential)\n    return credential\n\n\ndef backend_update_credential_json(\n    credential: Credential,\n    credential_json: dict[str, Any],\n    db_session: Session,\n) -> None:\n    \"\"\"This should not be used in any flows involving the frontend or users\"\"\"\n    credential.credential_json = credential_json  # type: ignore[assignment]\n    db_session.commit()\n\n\ndef _delete_credential_internal(\n    credential: Credential,\n    credential_id: int,\n    db_session: Session,\n    force: bool = False,\n) -> None:\n    \"\"\"Internal utility function to handle the actual deletion of a credential\"\"\"\n    associated_connectors = (\n        db_session.query(ConnectorCredentialPair)\n        .filter(ConnectorCredentialPair.credential_id == credential_id)\n        .all()\n    )\n\n    associated_doc_cc_pairs = (\n        db_session.query(DocumentByConnectorCredentialPair)\n        .filter(DocumentByConnectorCredentialPair.credential_id == credential_id)\n        .all()\n    )\n\n    if associated_connectors or associated_doc_cc_pairs:\n        if force:\n            logger.warning(\n                f\"Force deleting credential {credential_id} and its associated records\"\n            )\n\n            # Delete DocumentByConnectorCredentialPair records first\n            for doc_cc_pair in associated_doc_cc_pairs:\n                db_session.delete(doc_cc_pair)\n\n            # Then delete ConnectorCredentialPair records\n            for connector in associated_connectors:\n                db_session.delete(connector)\n\n            # Commit these deletions before deleting the credential\n            db_session.flush()\n        else:\n            raise ValueError(\n                f\"Cannot delete credential as it is still associated with \"\n                f\"{len(associated_connectors)} connector(s) and {len(associated_doc_cc_pairs)} document(s). \"\n            )\n\n    if force:\n        logger.warning(f\"Force deleting credential {credential_id}\")\n    else:\n        logger.notice(f\"Deleting credential {credential_id}\")\n\n    _cleanup_credential__user_group_relationships__no_commit(db_session, credential_id)\n    db_session.delete(credential)\n    db_session.commit()\n\n\ndef delete_credential_for_user(\n    credential_id: int,\n    user: User,\n    db_session: Session,\n    force: bool = False,\n) -> None:\n    \"\"\"Delete a credential that belongs to a specific user\"\"\"\n    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if credential is None:\n        raise ValueError(\n            f\"Credential by provided id {credential_id} does not exist or does not belong to user\"\n        )\n\n    _delete_credential_internal(credential, credential_id, db_session, force)\n\n\ndef delete_credential(\n    credential_id: int,\n    db_session: Session,\n    force: bool = False,\n) -> None:\n    \"\"\"Delete a credential regardless of ownership (admin function)\"\"\"\n    credential = fetch_credential_by_id(credential_id, db_session)\n    if credential is None:\n        raise ValueError(f\"Credential by provided id {credential_id} does not exist\")\n\n    _delete_credential_internal(credential, credential_id, db_session, force)\n\n\ndef create_initial_public_credential(db_session: Session) -> None:\n    error_msg = (\n        \"DB is not in a valid initial state.\"\n        \"There must exist an empty public credential for data connectors that do not require additional Auth.\"\n    )\n    first_credential = fetch_credential_by_id(\n        credential_id=PUBLIC_CREDENTIAL_ID,\n        db_session=db_session,\n    )\n\n    if first_credential is not None:\n        credential_json_value = (\n            first_credential.credential_json.get_value(apply_mask=False)\n            if first_credential.credential_json\n            else {}\n        )\n        if credential_json_value != {} or first_credential.user is not None:\n            raise ValueError(error_msg)\n        return\n\n    credential = Credential(\n        id=PUBLIC_CREDENTIAL_ID,\n        credential_json={},\n        user_id=None,\n    )\n    db_session.add(credential)\n    db_session.commit()\n\n\ndef cleanup_gmail_credentials(db_session: Session) -> None:\n    gmail_credentials = fetch_credentials_by_source(\n        db_session=db_session, document_source=DocumentSource.GMAIL\n    )\n    for credential in gmail_credentials:\n        db_session.delete(credential)\n    db_session.commit()\n\n\ndef cleanup_google_drive_credentials(db_session: Session) -> None:\n    google_drive_credentials = fetch_credentials_by_source(\n        db_session=db_session, document_source=DocumentSource.GOOGLE_DRIVE\n    )\n    for credential in google_drive_credentials:\n        db_session.delete(credential)\n    db_session.commit()\n\n\ndef delete_service_account_credentials(\n    user: User, db_session: Session, source: DocumentSource\n) -> None:\n    credentials = fetch_credentials_for_user(db_session=db_session, user=user)\n    for credential in credentials:\n        credential_json = (\n            credential.credential_json.get_value(apply_mask=False)\n            if credential.credential_json\n            else {}\n        )\n        if (\n            credential_json.get(DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY)\n            and credential.source == source\n        ):\n            db_session.delete(credential)\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/dal.py",
    "content": "\"\"\"Base Data Access Layer (DAL) for database operations.\n\nThe DAL pattern groups related database operations into cohesive classes\nwith explicit session management. It supports two usage modes:\n\n  1. **External session** (FastAPI endpoints) — the caller provides a session\n     whose lifecycle is managed by FastAPI's dependency injection.\n\n  2. **Self-managed session** (Celery tasks, scripts) — the DAL creates its\n     own session via the tenant-aware session factory.\n\nSubclasses add domain-specific query methods while inheriting session\nmanagement. See ``ee.onyx.db.scim.ScimDAL`` for a concrete example.\n\nExample (FastAPI)::\n\n    def get_scim_dal(db_session: Session = Depends(get_session)) -> ScimDAL:\n        return ScimDAL(db_session)\n\n    @router.get(\"/users\")\n    def list_users(dal: ScimDAL = Depends(get_scim_dal)) -> ...:\n        return dal.list_user_mappings(...)\n\nExample (Celery)::\n\n    with ScimDAL.from_tenant(\"tenant_abc\") as dal:\n        dal.create_user_mapping(...)\n        dal.commit()\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\n\n\nclass DAL:\n    \"\"\"Base Data Access Layer.\n\n    Holds a SQLAlchemy session and provides transaction control helpers.\n    Subclasses add domain-specific query methods.\n    \"\"\"\n\n    def __init__(self, db_session: Session) -> None:\n        self._session = db_session\n\n    @property\n    def session(self) -> Session:\n        \"\"\"Direct access to the underlying session for advanced use cases.\"\"\"\n        return self._session\n\n    def commit(self) -> None:\n        self._session.commit()\n\n    def flush(self) -> None:\n        self._session.flush()\n\n    def rollback(self) -> None:\n        self._session.rollback()\n\n    @classmethod\n    @contextmanager\n    def from_tenant(cls, tenant_id: str) -> Generator[\"DAL\", None, None]:\n        \"\"\"Create a DAL with a self-managed session for the given tenant.\n\n        The session is automatically closed when the context manager exits.\n        The caller must explicitly call ``commit()`` to persist changes.\n        \"\"\"\n        with get_session_with_tenant(tenant_id=tenant_id) as session:\n            yield cls(session)\n"
  },
  {
    "path": "backend/onyx/db/deletion_attempt.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.db.index_attempt import get_last_attempt\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import IndexingStatus\nfrom onyx.db.search_settings import get_current_search_settings\n\n\ndef check_deletion_attempt_is_allowed(\n    connector_credential_pair: ConnectorCredentialPair,\n    db_session: Session,\n    allow_scheduled: bool = False,\n) -> str | None:\n    \"\"\"\n    To be deletable:\n        (1) connector should be paused\n        (2) there should be no in-progress/planned index attempts\n\n    Returns an error message if the deletion attempt is not allowed, otherwise None.\n    \"\"\"\n    base_error_msg = (\n        f\"Connector with ID '{connector_credential_pair.connector_id}' and credential ID \"\n        f\"'{connector_credential_pair.credential_id}' is not deletable.\"\n    )\n\n    if connector_credential_pair.status.is_active():\n        return base_error_msg + \" Connector must be paused.\"\n\n    connector_id = connector_credential_pair.connector_id\n    credential_id = connector_credential_pair.credential_id\n    search_settings = get_current_search_settings(db_session)\n\n    last_indexing = get_last_attempt(\n        connector_id=connector_id,\n        credential_id=credential_id,\n        search_settings_id=search_settings.id,\n        db_session=db_session,\n    )\n\n    if not last_indexing:\n        return None\n\n    if last_indexing.status == IndexingStatus.IN_PROGRESS or (\n        last_indexing.status == IndexingStatus.NOT_STARTED and not allow_scheduled\n    ):\n        return (\n            base_error_msg\n            + \" There is an ongoing / planned indexing attempt. \"\n            + \"The indexing attempt must be completed or cancelled before deletion.\"\n        )\n\n    return None\n"
  },
  {
    "path": "backend/onyx/db/discord_bot.py",
    "content": "\"\"\"CRUD operations for Discord bot models.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.api_key import build_displayable_api_key\nfrom onyx.auth.api_key import generate_api_key\nfrom onyx.auth.api_key import hash_api_key\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import DISCORD_SERVICE_API_KEY_NAME\nfrom onyx.db.api_key import insert_api_key\nfrom onyx.db.models import ApiKey\nfrom onyx.db.models import DiscordBotConfig\nfrom onyx.db.models import DiscordChannelConfig\nfrom onyx.db.models import DiscordGuildConfig\nfrom onyx.db.models import User\nfrom onyx.db.utils import DiscordChannelView\nfrom onyx.server.api_key.models import APIKeyArgs\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# === DiscordBotConfig ===\n\n\ndef get_discord_bot_config(db_session: Session) -> DiscordBotConfig | None:\n    \"\"\"Get the Discord bot config for this tenant (at most one).\"\"\"\n    return db_session.scalar(select(DiscordBotConfig).limit(1))\n\n\ndef create_discord_bot_config(\n    db_session: Session,\n    bot_token: str,\n) -> DiscordBotConfig:\n    \"\"\"Create the Discord bot config. Raises ValueError if already exists.\n\n    The check constraint on id='SINGLETON' ensures only one config per tenant.\n    \"\"\"\n    existing = get_discord_bot_config(db_session)\n    if existing:\n        raise ValueError(\"Discord bot config already exists\")\n\n    config = DiscordBotConfig(bot_token=bot_token)\n    db_session.add(config)\n    try:\n        db_session.flush()\n    except IntegrityError:\n        # Race condition: another request created the config concurrently\n        db_session.rollback()\n        raise ValueError(\"Discord bot config already exists\")\n    return config\n\n\ndef delete_discord_bot_config(db_session: Session) -> bool:\n    \"\"\"Delete the Discord bot config. Returns True if deleted.\"\"\"\n    result = db_session.execute(delete(DiscordBotConfig))\n    db_session.flush()\n    return result.rowcount > 0  # type: ignore[attr-defined]\n\n\n# === Discord Service API Key ===\n\n\ndef get_discord_service_api_key(db_session: Session) -> ApiKey | None:\n    \"\"\"Get the Discord service API key if it exists.\"\"\"\n    return db_session.scalar(\n        select(ApiKey).where(ApiKey.name == DISCORD_SERVICE_API_KEY_NAME)\n    )\n\n\ndef get_or_create_discord_service_api_key(\n    db_session: Session,\n    tenant_id: str,\n) -> str:\n    \"\"\"Get existing Discord service API key or create one.\n\n    The API key is used by the Discord bot to authenticate with the\n    Onyx API pods when sending chat requests.\n\n    Args:\n        db_session: Database session for the tenant.\n        tenant_id: The tenant ID (used for logging/context).\n\n    Returns:\n        The raw API key string (not hashed).\n\n    Raises:\n        RuntimeError: If API key creation fails.\n    \"\"\"\n    # Check for existing key\n    existing = get_discord_service_api_key(db_session)\n    if existing:\n        # Database only stores the hash, so we must regenerate to get the raw key.\n        # This is safe since the Discord bot is the only consumer of this key.\n        logger.debug(\n            f\"Found existing Discord service API key for tenant {tenant_id} that isn't in cache, regenerating to update cache\"\n        )\n        new_api_key = generate_api_key(tenant_id)\n        existing.hashed_api_key = hash_api_key(new_api_key)\n        existing.api_key_display = build_displayable_api_key(new_api_key)\n        db_session.flush()\n        return new_api_key\n\n    # Create new API key\n    logger.info(f\"Creating Discord service API key for tenant {tenant_id}\")\n    api_key_args = APIKeyArgs(\n        name=DISCORD_SERVICE_API_KEY_NAME,\n        role=UserRole.LIMITED,  # Limited role is sufficient for chat requests\n    )\n    api_key_descriptor = insert_api_key(\n        db_session=db_session,\n        api_key_args=api_key_args,\n        user_id=None,  # Service account, no owner\n    )\n\n    if not api_key_descriptor.api_key:\n        raise RuntimeError(\n            f\"Failed to create Discord service API key for tenant {tenant_id}\"\n        )\n\n    return api_key_descriptor.api_key\n\n\ndef delete_discord_service_api_key(db_session: Session) -> bool:\n    \"\"\"Delete the Discord service API key for a tenant.\n\n    Called when:\n    - Bot config is deleted (self-hosted)\n    - All guild configs are deleted (Cloud)\n\n    Args:\n        db_session: Database session for the tenant.\n\n    Returns:\n        True if the key was deleted, False if it didn't exist.\n    \"\"\"\n    existing_key = get_discord_service_api_key(db_session)\n    if not existing_key:\n        return False\n\n    # Also delete the associated user\n    api_key_user = db_session.scalar(\n        select(User).where(User.id == existing_key.user_id)  # type: ignore[arg-type]\n    )\n\n    db_session.delete(existing_key)\n    if api_key_user:\n        db_session.delete(api_key_user)\n\n    db_session.flush()\n    logger.info(\"Deleted Discord service API key\")\n    return True\n\n\n# === DiscordGuildConfig ===\n\n\ndef get_guild_configs(\n    db_session: Session,\n    include_channels: bool = False,\n) -> list[DiscordGuildConfig]:\n    \"\"\"Get all guild configs for this tenant.\"\"\"\n    stmt = select(DiscordGuildConfig)\n    if include_channels:\n        stmt = stmt.options(joinedload(DiscordGuildConfig.channels))\n    return list(db_session.scalars(stmt).unique().all())\n\n\ndef get_guild_config_by_internal_id(\n    db_session: Session,\n    internal_id: int,\n) -> DiscordGuildConfig | None:\n    \"\"\"Get a specific guild config by its ID.\"\"\"\n    return db_session.scalar(\n        select(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)\n    )\n\n\ndef get_guild_config_by_discord_id(\n    db_session: Session,\n    guild_id: int,\n) -> DiscordGuildConfig | None:\n    \"\"\"Get a guild config by Discord guild ID.\"\"\"\n    return db_session.scalar(\n        select(DiscordGuildConfig).where(DiscordGuildConfig.guild_id == guild_id)\n    )\n\n\ndef get_guild_config_by_registration_key(\n    db_session: Session,\n    registration_key: str,\n) -> DiscordGuildConfig | None:\n    \"\"\"Get a guild config by its registration key.\"\"\"\n    return db_session.scalar(\n        select(DiscordGuildConfig).where(\n            DiscordGuildConfig.registration_key == registration_key\n        )\n    )\n\n\ndef create_guild_config(\n    db_session: Session,\n    registration_key: str,\n) -> DiscordGuildConfig:\n    \"\"\"Create a new guild config with a registration key (guild_id=NULL).\"\"\"\n    config = DiscordGuildConfig(registration_key=registration_key)\n    db_session.add(config)\n    db_session.flush()\n    return config\n\n\ndef register_guild(\n    db_session: Session,\n    config: DiscordGuildConfig,\n    guild_id: int,\n    guild_name: str,\n) -> DiscordGuildConfig:\n    \"\"\"Complete registration by setting guild_id and guild_name.\"\"\"\n    config.guild_id = guild_id\n    config.guild_name = guild_name\n    config.registered_at = datetime.now(timezone.utc)\n    db_session.flush()\n    return config\n\n\ndef update_guild_config(\n    db_session: Session,\n    config: DiscordGuildConfig,\n    enabled: bool,\n    default_persona_id: int | None = None,\n) -> DiscordGuildConfig:\n    \"\"\"Update guild config fields.\"\"\"\n    config.enabled = enabled\n    config.default_persona_id = default_persona_id\n    db_session.flush()\n    return config\n\n\ndef delete_guild_config(\n    db_session: Session,\n    internal_id: int,\n) -> bool:\n    \"\"\"Delete guild config (cascades to channel configs). Returns True if deleted.\"\"\"\n    result = db_session.execute(\n        delete(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)\n    )\n    db_session.flush()\n    return result.rowcount > 0  # type: ignore[attr-defined]\n\n\n# === DiscordChannelConfig ===\n\n\ndef get_channel_configs(\n    db_session: Session,\n    guild_config_id: int,\n) -> list[DiscordChannelConfig]:\n    \"\"\"Get all channel configs for a guild.\"\"\"\n    return list(\n        db_session.scalars(\n            select(DiscordChannelConfig).where(\n                DiscordChannelConfig.guild_config_id == guild_config_id\n            )\n        ).all()\n    )\n\n\ndef get_channel_config_by_discord_ids(\n    db_session: Session,\n    guild_id: int,\n    channel_id: int,\n) -> DiscordChannelConfig | None:\n    \"\"\"Get a specific channel config by guild_id and channel_id.\"\"\"\n    return db_session.scalar(\n        select(DiscordChannelConfig)\n        .join(DiscordGuildConfig)\n        .where(\n            DiscordGuildConfig.guild_id == guild_id,\n            DiscordChannelConfig.channel_id == channel_id,\n        )\n    )\n\n\ndef get_channel_config_by_internal_ids(\n    db_session: Session,\n    guild_config_id: int,\n    channel_config_id: int,\n) -> DiscordChannelConfig | None:\n    \"\"\"Get a specific channel config by guild_config_id and channel_config_id\"\"\"\n    return db_session.scalar(\n        select(DiscordChannelConfig).where(\n            DiscordChannelConfig.guild_config_id == guild_config_id,\n            DiscordChannelConfig.id == channel_config_id,\n        )\n    )\n\n\ndef update_discord_channel_config(\n    db_session: Session,\n    config: DiscordChannelConfig,\n    channel_name: str,\n    thread_only_mode: bool,\n    require_bot_invocation: bool,\n    enabled: bool,\n    persona_override_id: int | None = None,\n) -> DiscordChannelConfig:\n    \"\"\"Update channel config fields.\"\"\"\n    config.channel_name = channel_name\n    config.require_bot_invocation = require_bot_invocation\n    config.persona_override_id = persona_override_id\n    config.enabled = enabled\n    config.thread_only_mode = thread_only_mode\n    db_session.flush()\n    return config\n\n\ndef delete_discord_channel_config(\n    db_session: Session,\n    guild_config_id: int,\n    channel_config_id: int,\n) -> bool:\n    \"\"\"Delete a channel config. Returns True if deleted.\"\"\"\n    result = db_session.execute(\n        delete(DiscordChannelConfig).where(\n            DiscordChannelConfig.guild_config_id == guild_config_id,\n            DiscordChannelConfig.id == channel_config_id,\n        )\n    )\n    db_session.flush()\n    return result.rowcount > 0  # type: ignore[attr-defined]\n\n\ndef create_channel_config(\n    db_session: Session,\n    guild_config_id: int,\n    channel_view: DiscordChannelView,\n) -> DiscordChannelConfig:\n    \"\"\"Create a new channel config with default settings (disabled by default, admin enables via UI).\"\"\"\n    config = DiscordChannelConfig(\n        guild_config_id=guild_config_id,\n        channel_id=channel_view.channel_id,\n        channel_name=channel_view.channel_name,\n        channel_type=channel_view.channel_type,\n        is_private=channel_view.is_private,\n    )\n    db_session.add(config)\n    db_session.flush()\n    return config\n\n\ndef bulk_create_channel_configs(\n    db_session: Session,\n    guild_config_id: int,\n    channels: list[DiscordChannelView],\n) -> list[DiscordChannelConfig]:\n    \"\"\"Create multiple channel configs at once. Skips existing channels.\"\"\"\n    # Get existing channel IDs for this guild\n    existing_channel_ids = set(\n        db_session.scalars(\n            select(DiscordChannelConfig.channel_id).where(\n                DiscordChannelConfig.guild_config_id == guild_config_id\n            )\n        ).all()\n    )\n\n    # Create configs for new channels only\n    new_configs = []\n    for channel_view in channels:\n        if channel_view.channel_id not in existing_channel_ids:\n            config = DiscordChannelConfig(\n                guild_config_id=guild_config_id,\n                channel_id=channel_view.channel_id,\n                channel_name=channel_view.channel_name,\n                channel_type=channel_view.channel_type,\n                is_private=channel_view.is_private,\n            )\n            db_session.add(config)\n            new_configs.append(config)\n\n    db_session.flush()\n    return new_configs\n\n\ndef sync_channel_configs(\n    db_session: Session,\n    guild_config_id: int,\n    current_channels: list[DiscordChannelView],\n) -> tuple[int, int, int]:\n    \"\"\"Sync channel configs with current Discord channels.\n\n    - Creates configs for new channels (disabled by default)\n    - Removes configs for deleted channels\n    - Updates names and types for existing channels if changed\n\n    Returns: (added_count, removed_count, updated_count)\n    \"\"\"\n    current_channel_map = {\n        channel_view.channel_id: channel_view for channel_view in current_channels\n    }\n    current_channel_ids = set(current_channel_map.keys())\n\n    # Get existing configs\n    existing_configs = get_channel_configs(db_session, guild_config_id)\n    existing_channel_ids = {c.channel_id for c in existing_configs}\n\n    # Find channels to add, remove, and potentially update\n    to_add = current_channel_ids - existing_channel_ids\n    to_remove = existing_channel_ids - current_channel_ids\n\n    # Add new channels\n    added_count = 0\n    for channel_id in to_add:\n        channel_view = current_channel_map[channel_id]\n        create_channel_config(db_session, guild_config_id, channel_view)\n        added_count += 1\n\n    # Remove deleted channels\n    removed_count = 0\n    for config in existing_configs:\n        if config.channel_id in to_remove:\n            db_session.delete(config)\n            removed_count += 1\n\n    # Update names, types, and privacy for existing channels if changed\n    updated_count = 0\n    for config in existing_configs:\n        if config.channel_id in current_channel_ids:\n            channel_view = current_channel_map[config.channel_id]\n            changed = False\n            if config.channel_name != channel_view.channel_name:\n                config.channel_name = channel_view.channel_name\n                changed = True\n            if config.channel_type != channel_view.channel_type:\n                config.channel_type = channel_view.channel_type\n                changed = True\n            if config.is_private != channel_view.is_private:\n                config.is_private = channel_view.is_private\n                changed = True\n            if changed:\n                updated_count += 1\n\n    db_session.flush()\n    return added_count, removed_count, updated_count\n"
  },
  {
    "path": "backend/onyx/db/document.py",
    "content": "import contextlib\nimport time\nfrom collections.abc import Generator\nfrom collections.abc import Iterable\nfrom collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import exists\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import tuple_\nfrom sqlalchemy import update\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.engine.util import TransactionalContext\nfrom sqlalchemy.exc import OperationalError\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql.expression import null\n\nfrom onyx.configs.constants import DEFAULT_BOOST\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.kg_configs import KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES\nfrom onyx.db.chunk import delete_chunk_stats_by_connector_credential_pair__no_commit\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.document_access import apply_document_access_filter\nfrom onyx.db.entities import delete_from_kg_entities__no_commit\nfrom onyx.db.entities import delete_from_kg_entities_extraction_staging__no_commit\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.feedback import delete_document_feedback_for_documents__no_commit\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import Document as DbDocument\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import KGEntity\nfrom onyx.db.models import KGRelationship\nfrom onyx.db.models import User\nfrom onyx.db.relationships import delete_from_kg_relationships__no_commit\nfrom onyx.db.relationships import (\n    delete_from_kg_relationships_extraction_staging__no_commit,\n)\nfrom onyx.db.tag import delete_document_tags_for_documents__no_commit\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import model_to_dict\nfrom onyx.db.utils import SortOrder\nfrom onyx.document_index.interfaces import DocumentMetadata\nfrom onyx.kg.models import KGStage\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nONE_HOUR_IN_SECONDS = 60 * 60\n\n\ndef check_docs_exist(db_session: Session) -> bool:\n    stmt = select(exists(DbDocument))\n    result = db_session.execute(stmt)\n    return result.scalar() or False\n\n\ndef count_documents_by_needs_sync(session: Session) -> int:\n    \"\"\"Get the count of all documents where:\n    1. last_modified is newer than last_synced\n    2. last_synced is null (meaning we've never synced)\n    AND the document has a relationship with a connector/credential pair\n\n    TODO: The documents without a relationship with a connector/credential pair\n    should be cleaned up somehow eventually.\n\n    This function executes the query and returns the count of\n    documents matching the criteria.\"\"\"\n\n    return (\n        session.query(DbDocument.id)\n        .filter(\n            or_(\n                DbDocument.last_modified > DbDocument.last_synced,\n                DbDocument.last_synced.is_(None),\n            )\n        )\n        .count()\n    )\n\n\ndef construct_document_id_select_by_needs_sync() -> Select:\n    \"\"\"Get all document IDs that need syncing across all connector credential pairs.\n\n    Returns a Select statement for documents where:\n    1. last_modified is newer than last_synced\n    2. last_synced is null (meaning we've never synced)\n    AND the document has a relationship with a connector/credential pair\n    \"\"\"\n    return select(DbDocument.id).where(\n        or_(\n            DbDocument.last_modified > DbDocument.last_synced,\n            DbDocument.last_synced.is_(None),\n        )\n    )\n\n\ndef construct_document_id_select_for_connector_credential_pair(\n    connector_id: int, credential_id: int | None = None\n) -> Select:\n    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    stmt = (\n        select(DbDocument.id).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()\n    )\n    return stmt\n\n\ndef construct_document_select_for_connector_credential_pair(\n    connector_id: int, credential_id: int | None = None\n) -> Select:\n    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    stmt = select(DbDocument).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()\n    return stmt\n\n\ndef get_documents_for_cc_pair(\n    db_session: Session,\n    cc_pair_id: int,\n) -> list[DbDocument]:\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    if not cc_pair:\n        raise ValueError(f\"No CC pair found with ID: {cc_pair_id}\")\n    stmt = construct_document_select_for_connector_credential_pair(\n        connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id\n    )\n    return list(db_session.scalars(stmt).all())\n\n\ndef get_document_ids_for_connector_credential_pair(\n    db_session: Session, connector_id: int, credential_id: int\n) -> list[str]:\n    doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    return list(db_session.execute(doc_ids_stmt).scalars().all())\n\n\ndef get_documents_for_connector_credential_pair_limited_columns(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    sort_order: SortOrder | None = None,\n) -> Sequence[DocumentRow]:\n\n    doc_ids_subquery = select(DocumentByConnectorCredentialPair.id).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    doc_ids_subquery = doc_ids_subquery.join(\n        DbDocument, DocumentByConnectorCredentialPair.id == DbDocument.id\n    )\n\n    stmt = select(\n        DbDocument.id, DbDocument.doc_metadata, DbDocument.external_user_group_ids\n    )\n\n    stmt = stmt.where(DbDocument.id.in_(doc_ids_subquery))\n\n    if sort_order == SortOrder.ASC:\n        stmt = stmt.order_by(DbDocument.last_modified.asc())\n    elif sort_order == SortOrder.DESC:\n        stmt = stmt.order_by(DbDocument.last_modified.desc())\n\n    rows = db_session.execute(stmt).mappings().all()\n\n    doc_rows: list[DocumentRow] = []\n    for row in rows:\n        doc_row = DocumentRow(\n            id=row.id,\n            doc_metadata=row.doc_metadata,\n            external_user_group_ids=row.external_user_group_ids or [],\n        )\n        doc_rows.append(doc_row)\n    return doc_rows\n\n\ndef get_documents_for_connector_credential_pair(\n    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None\n) -> Sequence[DbDocument]:\n    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    stmt = select(DbDocument).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()\n    if limit:\n        stmt = stmt.limit(limit)\n    return db_session.scalars(stmt).all()\n\n\ndef get_documents_by_ids(\n    db_session: Session,\n    document_ids: list[str],\n) -> list[DbDocument]:\n    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))\n    documents = db_session.execute(stmt).scalars().all()\n    return list(documents)\n\n\ndef get_documents_by_source(\n    db_session: Session,\n    source: DocumentSource,\n    creator_id: UUID | None = None,\n) -> list[DbDocument]:\n    \"\"\"Get all documents associated with a specific source type.\n\n    This queries through the connector relationship to find all documents\n    that were indexed by connectors of the given source type.\n\n    Args:\n        db_session: Database session\n        source: The document source type to filter by\n        creator_id: If provided, only return documents from connectors\n                    created by this user. Filters via ConnectorCredentialPair.\n    \"\"\"\n    stmt = (\n        select(DbDocument)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .where(Connector.source == source)\n    )\n    if creator_id is not None:\n        stmt = stmt.where(ConnectorCredentialPair.creator_id == creator_id)\n    stmt = stmt.distinct()\n    documents = db_session.execute(stmt).scalars().all()\n    return list(documents)\n\n\ndef _apply_last_updated_cursor_filter(\n    stmt: Select,\n    cursor_last_modified: datetime | None,\n    cursor_last_synced: datetime | None,\n    cursor_document_id: str | None,\n    is_ascending: bool,\n) -> Select:\n    \"\"\"Apply cursor filter for last_updated sorting.\n\n    ASC uses nulls_first (NULLs at start), DESC uses nulls_last (NULLs at end).\n    This affects which extra clauses are needed when the cursor has NULL last_synced\n    vs non-NULL last_synced.\n    \"\"\"\n    if not cursor_last_modified or not cursor_document_id:\n        return stmt\n\n    # Pick comparison operators based on sort direction\n    if is_ascending:\n        modified_cmp = DbDocument.last_modified > cursor_last_modified\n        synced_cmp = DbDocument.last_synced > cursor_last_synced\n        id_cmp = DbDocument.id > cursor_document_id\n    else:\n        modified_cmp = DbDocument.last_modified < cursor_last_modified\n        synced_cmp = DbDocument.last_synced < cursor_last_synced\n        id_cmp = DbDocument.id < cursor_document_id\n\n    if cursor_last_synced is None:\n        # Cursor has NULL last_synced\n        # ASC (nulls_first): NULL is at start, so non-NULL values come after\n        # DESC (nulls_last): NULL is at end, so nothing with non-NULL comes after\n        base_clauses = [\n            modified_cmp,\n            and_(\n                DbDocument.last_modified == cursor_last_modified,\n                DbDocument.last_synced.is_(None),\n                id_cmp,\n            ),\n        ]\n        if is_ascending:\n            # Any non-NULL last_synced comes after NULL when nulls_first\n            base_clauses.append(\n                and_(\n                    DbDocument.last_modified == cursor_last_modified,\n                    DbDocument.last_synced.is_not(None),\n                )\n            )\n        return stmt.where(or_(*base_clauses))\n\n    # Cursor has non-NULL last_synced\n    # ASC (nulls_first): NULLs came before, so no NULL clause needed\n    # DESC (nulls_last): NULLs come after non-NULL values\n    synced_clauses = [\n        synced_cmp,\n        and_(DbDocument.last_synced == cursor_last_synced, id_cmp),\n    ]\n    if not is_ascending:\n        # NULLs come after all non-NULL values when nulls_last\n        synced_clauses.append(DbDocument.last_synced.is_(None))\n\n    return stmt.where(\n        or_(\n            modified_cmp,\n            and_(\n                DbDocument.last_modified == cursor_last_modified,\n                or_(*synced_clauses),\n            ),\n        )\n    )\n\n\ndef _apply_name_cursor_filter_asc(\n    stmt: Select,\n    cursor_name: str | None,\n    cursor_document_id: str | None,\n) -> Select:\n    \"\"\"Apply cursor filter for name ASC sorting.\"\"\"\n    if not cursor_name or not cursor_document_id:\n        return stmt\n    return stmt.where(\n        or_(\n            DbDocument.semantic_id > cursor_name,\n            and_(\n                DbDocument.semantic_id == cursor_name,\n                DbDocument.id > cursor_document_id,\n            ),\n        )\n    )\n\n\ndef _apply_name_cursor_filter_desc(\n    stmt: Select,\n    cursor_name: str | None,\n    cursor_document_id: str | None,\n) -> Select:\n    \"\"\"Apply cursor filter for name DESC sorting.\"\"\"\n    if not cursor_name or not cursor_document_id:\n        return stmt\n    return stmt.where(\n        or_(\n            DbDocument.semantic_id < cursor_name,\n            and_(\n                DbDocument.semantic_id == cursor_name,\n                DbDocument.id < cursor_document_id,\n            ),\n        )\n    )\n\n\ndef get_accessible_documents_for_hierarchy_node_paginated(\n    db_session: Session,\n    parent_hierarchy_node_id: int,\n    user_email: str | None,\n    external_group_ids: list[str],\n    limit: int,\n    # Sort options\n    sort_by_name: bool = False,\n    sort_ascending: bool = False,\n    # Cursor fields for last_updated sorting\n    cursor_last_modified: datetime | None = None,\n    cursor_last_synced: datetime | None = None,\n    # Cursor field for name sorting\n    cursor_name: str | None = None,\n    # Document ID for tie-breaking (used by both sort types)\n    cursor_document_id: str | None = None,\n) -> list[DbDocument]:\n    stmt = select(DbDocument).where(\n        DbDocument.parent_hierarchy_node_id == parent_hierarchy_node_id\n    )\n    stmt = apply_document_access_filter(stmt, user_email, external_group_ids)\n\n    # Apply cursor filter based on sort type and direction\n    if sort_by_name:\n        if sort_ascending:\n            stmt = _apply_name_cursor_filter_asc(stmt, cursor_name, cursor_document_id)\n            stmt = stmt.order_by(DbDocument.semantic_id.asc(), DbDocument.id.asc())\n        else:\n            stmt = _apply_name_cursor_filter_desc(stmt, cursor_name, cursor_document_id)\n            stmt = stmt.order_by(DbDocument.semantic_id.desc(), DbDocument.id.desc())\n    else:\n        # Sort by last_updated\n        if sort_ascending:\n            stmt = _apply_last_updated_cursor_filter(\n                stmt,\n                cursor_last_modified,\n                cursor_last_synced,\n                cursor_document_id,\n                is_ascending=True,\n            )\n            stmt = stmt.order_by(\n                DbDocument.last_modified.asc(),\n                DbDocument.last_synced.asc().nulls_first(),\n                DbDocument.id.asc(),\n            )\n        else:\n            stmt = _apply_last_updated_cursor_filter(\n                stmt,\n                cursor_last_modified,\n                cursor_last_synced,\n                cursor_document_id,\n                is_ascending=False,\n            )\n            stmt = stmt.order_by(\n                DbDocument.last_modified.desc(),\n                DbDocument.last_synced.desc().nulls_last(),\n                DbDocument.id.desc(),\n            )\n\n    # Use distinct to avoid duplicates when a document belongs to multiple cc_pairs\n    stmt = stmt.distinct()\n    stmt = stmt.limit(limit)\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef filter_existing_document_ids(\n    db_session: Session,\n    document_ids: list[str],\n) -> set[str]:\n    \"\"\"Filter a list of document IDs to only those that exist in the database.\n\n    Args:\n        db_session: Database session\n        document_ids: List of document IDs to check for existence\n\n    Returns:\n        Set of document IDs from the input list that exist in the database\n    \"\"\"\n    if not document_ids:\n        return set()\n    stmt = select(DbDocument.id).where(DbDocument.id.in_(document_ids))\n    return set(db_session.execute(stmt).scalars().all())\n\n\ndef fetch_document_ids_by_links(\n    db_session: Session,\n    links: list[str],\n) -> dict[str, str]:\n    \"\"\"Fetch document IDs for documents whose link matches any of the provided values.\"\"\"\n    if not links:\n        return {}\n\n    stmt = select(DbDocument.link, DbDocument.id).where(DbDocument.link.in_(links))\n    rows = db_session.execute(stmt).all()\n    return {link: doc_id for link, doc_id in rows if link}\n\n\ndef get_document_connector_count(\n    db_session: Session,\n    document_id: str,\n) -> int:\n    results = get_document_connector_counts(db_session, [document_id])\n    if not results or len(results) == 0:\n        return 0\n\n    return results[0][1]\n\n\ndef get_document_connector_counts(\n    db_session: Session,\n    document_ids: list[str],\n) -> Sequence[tuple[str, int]]:\n    stmt = (\n        select(\n            DocumentByConnectorCredentialPair.id,\n            func.count(),\n        )\n        .where(DocumentByConnectorCredentialPair.id.in_(document_ids))\n        .group_by(DocumentByConnectorCredentialPair.id)\n    )\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef get_document_counts_for_cc_pairs(\n    db_session: Session, cc_pairs: list[ConnectorCredentialPairIdentifier]\n) -> Sequence[tuple[int, int, int]]:\n    \"\"\"Returns a sequence of tuples of (connector_id, credential_id, document count)\"\"\"\n\n    if not cc_pairs:\n        return []\n\n    # Prepare a list of (connector_id, credential_id) tuples\n    cc_ids = [(x.connector_id, x.credential_id) for x in cc_pairs]\n\n    # Batch to avoid generating extremely large IN clauses that can blow Postgres stack depth\n    batch_size = 1000\n    aggregated_counts: dict[tuple[int, int], int] = {}\n\n    for start_idx in range(0, len(cc_ids), batch_size):\n        batch = cc_ids[start_idx : start_idx + batch_size]\n\n        stmt = (\n            select(\n                DocumentByConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id,\n                func.count(),\n            )\n            .where(\n                and_(\n                    tuple_(\n                        DocumentByConnectorCredentialPair.connector_id,\n                        DocumentByConnectorCredentialPair.credential_id,\n                    ).in_(batch),\n                    DocumentByConnectorCredentialPair.has_been_indexed.is_(True),\n                )\n            )\n            .group_by(\n                DocumentByConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id,\n            )\n        )\n\n        for connector_id, credential_id, cnt in db_session.execute(stmt).all():\n            aggregated_counts[(connector_id, credential_id)] = cnt\n\n    # Convert aggregated results back to the expected sequence of tuples\n    return [\n        (connector_id, credential_id, cnt)\n        for (connector_id, credential_id), cnt in aggregated_counts.items()\n    ]\n\n\ndef get_document_counts_for_all_cc_pairs(\n    db_session: Session,\n) -> Sequence[tuple[int, int, int]]:\n    \"\"\"Return (connector_id, credential_id, count) for ALL CC pairs with indexed docs.\n\n    Executes a single grouped query so Postgres can fully leverage indexes,\n    avoiding large batched IN-lists.\n    \"\"\"\n    stmt = (\n        select(\n            DocumentByConnectorCredentialPair.connector_id,\n            DocumentByConnectorCredentialPair.credential_id,\n            func.count(),\n        )\n        .where(DocumentByConnectorCredentialPair.has_been_indexed.is_(True))\n        .group_by(\n            DocumentByConnectorCredentialPair.connector_id,\n            DocumentByConnectorCredentialPair.credential_id,\n        )\n    )\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef get_access_info_for_document(\n    db_session: Session,\n    document_id: str,\n) -> tuple[str, list[str | None], bool] | None:\n    \"\"\"Gets access info for a single document by calling the get_access_info_for_documents function\n    and passing a list with a single document ID.\n    Args:\n        db_session (Session): The database session to use.\n        document_id (str): The document ID to fetch access info for.\n    Returns:\n        Optional[Tuple[str, List[str | None], bool]]: A tuple containing the document ID, a list of user emails,\n        and a boolean indicating if the document is globally public, or None if no results are found.\n    \"\"\"\n    results = get_access_info_for_documents(db_session, [document_id])\n    if not results:\n        return None\n\n    return results[0]\n\n\ndef get_access_info_for_documents(\n    db_session: Session,\n    document_ids: list[str],\n) -> Sequence[tuple[str, list[str | None], bool]]:\n    \"\"\"Gets back all relevant access info for the given documents. This includes\n    the user_ids for cc pairs that the document is associated with + whether any\n    of the associated cc pairs are intending to make the document globally public.\n    Returns the list where each element contains:\n    - Document ID (which is also the ID of the DocumentByConnectorCredentialPair)\n    - List of emails of Onyx users with direct access to the doc (includes a \"None\" element if\n      the connector was set up by an admin when auth was off\n    - bool for whether the document is public (the document later can also be marked public by\n      automatic permission sync step)\n    \"\"\"\n    stmt = select(\n        DocumentByConnectorCredentialPair.id,\n        func.array_agg(func.coalesce(User.email, null())).label(\"user_emails\"),\n        func.bool_or(ConnectorCredentialPair.access_type == AccessType.PUBLIC).label(\n            \"public_doc\"\n        ),\n    ).where(DocumentByConnectorCredentialPair.id.in_(document_ids))\n\n    stmt = (\n        stmt.join(\n            Credential,\n            DocumentByConnectorCredentialPair.credential_id == Credential.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .outerjoin(\n            User,\n            and_(\n                Credential.user_id == User.id,\n                ConnectorCredentialPair.access_type != AccessType.SYNC,\n            ),\n        )\n        # don't include CC pairs that are being deleted\n        # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them\n        .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING)\n        .group_by(DocumentByConnectorCredentialPair.id)\n    )\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef upsert_documents(\n    db_session: Session,\n    document_metadata_batch: list[DocumentMetadata],\n    initial_boost: int = DEFAULT_BOOST,\n) -> None:\n    \"\"\"NOTE: this function is Postgres specific. Not all DBs support the ON CONFLICT clause.\n    Also note, this function should not be used for updating documents, only creating and\n    ensuring that it exists. It IGNORES the doc_updated_at field\"\"\"\n    seen_documents: dict[str, DocumentMetadata] = {}\n    for document_metadata in document_metadata_batch:\n        doc_id = document_metadata.document_id\n        if doc_id not in seen_documents:\n            seen_documents[doc_id] = document_metadata\n\n    if not seen_documents:\n        logger.info(\"No documents to upsert. Skipping.\")\n        return\n\n    includes_permissions = any(doc.external_access for doc in seen_documents.values())\n\n    insert_stmt = insert(DbDocument).values(\n        [\n            model_to_dict(\n                DbDocument(\n                    id=doc.document_id,\n                    from_ingestion_api=doc.from_ingestion_api,\n                    boost=initial_boost,\n                    hidden=False,\n                    semantic_id=doc.semantic_identifier,\n                    link=doc.first_link,\n                    doc_updated_at=None,  # this is intentional\n                    last_modified=datetime.now(timezone.utc),\n                    primary_owners=doc.primary_owners,\n                    secondary_owners=doc.secondary_owners,\n                    kg_stage=KGStage.NOT_STARTED,\n                    parent_hierarchy_node_id=doc.parent_hierarchy_node_id,\n                    **(\n                        {\n                            \"external_user_emails\": list(\n                                doc.external_access.external_user_emails\n                            ),\n                            \"external_user_group_ids\": list(\n                                doc.external_access.external_user_group_ids\n                            ),\n                            \"is_public\": doc.external_access.is_public,\n                        }\n                        if doc.external_access\n                        else {}\n                    ),\n                    doc_metadata=doc.doc_metadata,\n                )\n            )\n            for doc in seen_documents.values()\n        ]\n    )\n\n    update_set = {\n        \"from_ingestion_api\": insert_stmt.excluded.from_ingestion_api,\n        \"boost\": insert_stmt.excluded.boost,\n        \"hidden\": insert_stmt.excluded.hidden,\n        \"semantic_id\": insert_stmt.excluded.semantic_id,\n        \"link\": insert_stmt.excluded.link,\n        \"primary_owners\": insert_stmt.excluded.primary_owners,\n        \"secondary_owners\": insert_stmt.excluded.secondary_owners,\n        \"doc_metadata\": insert_stmt.excluded.doc_metadata,\n        \"parent_hierarchy_node_id\": insert_stmt.excluded.parent_hierarchy_node_id,\n    }\n    if includes_permissions:\n        # Use COALESCE to preserve existing permissions when new values are NULL.\n        # This prevents subsequent indexing runs (which don't fetch permissions)\n        # from overwriting permissions set by permission sync jobs.\n        update_set.update(\n            {\n                \"external_user_emails\": func.coalesce(\n                    insert_stmt.excluded.external_user_emails,\n                    DbDocument.external_user_emails,\n                ),\n                \"external_user_group_ids\": func.coalesce(\n                    insert_stmt.excluded.external_user_group_ids,\n                    DbDocument.external_user_group_ids,\n                ),\n                \"is_public\": func.coalesce(\n                    insert_stmt.excluded.is_public,\n                    DbDocument.is_public,\n                ),\n            }\n        )\n    on_conflict_stmt = insert_stmt.on_conflict_do_update(\n        index_elements=[\"id\"],\n        set_=update_set,  # Conflict target\n    )\n    db_session.execute(on_conflict_stmt)\n    db_session.commit()\n\n\ndef upsert_document_by_connector_credential_pair(\n    db_session: Session, connector_id: int, credential_id: int, document_ids: list[str]\n) -> None:\n    \"\"\"NOTE: this function is Postgres specific. Not all DBs support the ON CONFLICT clause.\"\"\"\n    if not document_ids:\n        logger.info(\"`document_ids` is empty. Skipping.\")\n        return\n\n    insert_stmt = insert(DocumentByConnectorCredentialPair).values(\n        [\n            model_to_dict(\n                DocumentByConnectorCredentialPair(\n                    id=doc_id,\n                    connector_id=connector_id,\n                    credential_id=credential_id,\n                    has_been_indexed=False,\n                )\n            )\n            for doc_id in document_ids\n        ]\n    )\n    # this must be `on_conflict_do_nothing` rather than `on_conflict_do_update`\n    # since we don't want to update the `has_been_indexed` field for documents\n    # that already exist\n    on_conflict_stmt = insert_stmt.on_conflict_do_nothing()\n    db_session.execute(on_conflict_stmt)\n    db_session.commit()\n\n\ndef mark_document_as_indexed_for_cc_pair__no_commit(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    document_ids: Iterable[str],\n) -> None:\n    \"\"\"Should be called only after a successful index operation for a batch.\"\"\"\n    db_session.execute(\n        update(DocumentByConnectorCredentialPair)\n        .where(\n            and_(\n                DocumentByConnectorCredentialPair.connector_id == connector_id,\n                DocumentByConnectorCredentialPair.credential_id == credential_id,\n                DocumentByConnectorCredentialPair.id.in_(document_ids),\n            )\n        )\n        .values(has_been_indexed=True)\n    )\n\n\ndef update_docs_updated_at__no_commit(\n    ids_to_new_updated_at: dict[str, datetime],\n    db_session: Session,\n) -> None:\n    doc_ids = list(ids_to_new_updated_at.keys())\n    documents_to_update = (\n        db_session.query(DbDocument).filter(DbDocument.id.in_(doc_ids)).all()\n    )\n\n    for document in documents_to_update:\n        document.doc_updated_at = ids_to_new_updated_at[document.id]\n\n\ndef update_docs_last_modified__no_commit(\n    document_ids: list[str],\n    db_session: Session,\n) -> None:\n    documents_to_update = (\n        db_session.query(DbDocument).filter(DbDocument.id.in_(document_ids)).all()\n    )\n\n    now = datetime.now(timezone.utc)\n    for doc in documents_to_update:\n        doc.last_modified = now\n\n\ndef update_docs_chunk_count__no_commit(\n    document_ids: list[str],\n    doc_id_to_chunk_count: dict[str, int],\n    db_session: Session,\n) -> None:\n    documents_to_update = (\n        db_session.query(DbDocument).filter(DbDocument.id.in_(document_ids)).all()\n    )\n    for doc in documents_to_update:\n        doc.chunk_count = doc_id_to_chunk_count[doc.id]\n\n\ndef mark_document_as_modified(\n    document_id: str,\n    db_session: Session,\n) -> None:\n    stmt = select(DbDocument).where(DbDocument.id == document_id)\n    doc = db_session.scalar(stmt)\n    if doc is None:\n        raise ValueError(f\"No document with ID: {document_id}\")\n\n    # update last_synced\n    doc.last_modified = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef mark_document_as_synced(document_id: str, db_session: Session) -> None:\n    stmt = select(DbDocument).where(DbDocument.id == document_id)\n    doc = db_session.scalar(stmt)\n    if doc is None:\n        raise ValueError(f\"No document with ID: {document_id}\")\n\n    # update last_synced\n    doc.last_synced = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef delete_document_by_connector_credential_pair__no_commit(\n    db_session: Session,\n    document_id: str,\n    connector_credential_pair_identifier: (\n        ConnectorCredentialPairIdentifier | None\n    ) = None,\n) -> None:\n    \"\"\"Deletes a single document by cc pair relationship entry.\n    Foreign key rows are left in place.\n    The implicit assumption is that the document itself still has other cc_pair\n    references and needs to continue existing.\n    \"\"\"\n    delete_documents_by_connector_credential_pair__no_commit(\n        db_session=db_session,\n        document_ids=[document_id],\n        connector_credential_pair_identifier=connector_credential_pair_identifier,\n    )\n\n\ndef delete_documents_by_connector_credential_pair__no_commit(\n    db_session: Session,\n    document_ids: list[str],\n    connector_credential_pair_identifier: (\n        ConnectorCredentialPairIdentifier | None\n    ) = None,\n) -> None:\n    \"\"\"This deletes just the document by cc pair entries for a particular cc pair.\n    Foreign key rows are left in place.\n    The implicit assumption is that the document itself still has other cc_pair\n    references and needs to continue existing.\n    \"\"\"\n    stmt = delete(DocumentByConnectorCredentialPair).where(\n        DocumentByConnectorCredentialPair.id.in_(document_ids)\n    )\n    if connector_credential_pair_identifier:\n        stmt = stmt.where(\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == connector_credential_pair_identifier.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == connector_credential_pair_identifier.credential_id,\n            )\n        )\n    db_session.execute(stmt)\n\n\ndef delete_all_documents_by_connector_credential_pair__no_commit(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n) -> None:\n    \"\"\"Deletes all document by connector credential pair entries for a specific connector and credential.\n    This is primarily used during connector deletion to ensure all references are removed\n    before deleting the connector itself. This is crucial because connector_id is part of the\n    primary key in DocumentByConnectorCredentialPair, and attempting to delete the Connector\n    would otherwise try to set the foreign key to NULL, which fails for primary keys.\n\n    NOTE: Does not commit the transaction, this must be done by the caller.\n    \"\"\"\n    stmt = delete(DocumentByConnectorCredentialPair).where(\n        and_(\n            DocumentByConnectorCredentialPair.connector_id == connector_id,\n            DocumentByConnectorCredentialPair.credential_id == credential_id,\n        )\n    )\n    db_session.execute(stmt)\n\n\ndef delete_documents__no_commit(db_session: Session, document_ids: list[str]) -> None:\n    db_session.execute(delete(DbDocument).where(DbDocument.id.in_(document_ids)))\n\n\ndef delete_documents_complete__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"This completely deletes the documents from the db, including all foreign key relationships\"\"\"\n\n    # Start with the kg references\n\n    delete_from_kg_relationships__no_commit(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    delete_from_kg_entities__no_commit(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    delete_from_kg_relationships_extraction_staging__no_commit(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    delete_from_kg_entities_extraction_staging__no_commit(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    # Continue with deleting the chunk stats for the documents\n    delete_chunk_stats_by_connector_credential_pair__no_commit(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    delete_documents_by_connector_credential_pair__no_commit(db_session, document_ids)\n    delete_document_feedback_for_documents__no_commit(\n        document_ids=document_ids, db_session=db_session\n    )\n    delete_document_tags_for_documents__no_commit(\n        document_ids=document_ids, db_session=db_session\n    )\n    delete_documents__no_commit(db_session, document_ids)\n\n\ndef delete_all_documents_for_connector_credential_pair(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    timeout: int = ONE_HOUR_IN_SECONDS,\n) -> None:\n    \"\"\"Delete all documents for a given connector credential pair.\n    This will delete all documents and their associated data (chunks, feedback, tags, etc.)\n\n    NOTE: a bit inefficient, but it's not a big deal since this is done rarely - only during\n    an index swap. If we wanted to make this more efficient, we could use a single delete\n    statement + cascade.\n    \"\"\"\n    batch_size = 1000\n    start_time = time.monotonic()\n\n    while True:\n        # Get document IDs in batches\n        stmt = (\n            select(DocumentByConnectorCredentialPair.id)\n            .where(\n                DocumentByConnectorCredentialPair.connector_id == connector_id,\n                DocumentByConnectorCredentialPair.credential_id == credential_id,\n            )\n            .limit(batch_size)\n        )\n        document_ids = db_session.scalars(stmt).all()\n\n        if not document_ids:\n            break\n\n        delete_documents_complete__no_commit(\n            db_session=db_session, document_ids=list(document_ids)\n        )\n        db_session.commit()\n\n        if time.monotonic() - start_time > timeout:\n            raise RuntimeError(\"Timeout reached while deleting documents\")\n\n\ndef acquire_document_locks(db_session: Session, document_ids: list[str]) -> bool:\n    \"\"\"Acquire locks for the specified documents. Ideally this shouldn't be\n    called with large list of document_ids (an exception could be made if the\n    length of holding the lock is very short).\n\n    Will simply raise an exception if any of the documents are already locked.\n    This prevents deadlocks (assuming that the caller passes in all required\n    document IDs in a single call).\n    \"\"\"\n    stmt = (\n        select(DbDocument.id)\n        .where(DbDocument.id.in_(document_ids))\n        .with_for_update(nowait=True)\n    )\n    # will raise exception if any of the documents are already locked\n    documents = db_session.scalars(stmt).all()\n\n    # make sure we found every document\n    if len(documents) != len(set(document_ids)):\n        logger.warning(\"Didn't find row for all specified document IDs. Aborting.\")\n        return False\n\n    return True\n\n\n_NUM_LOCK_ATTEMPTS = 10\n_LOCK_RETRY_DELAY = 10\n\n\n@contextlib.contextmanager\ndef prepare_to_modify_documents(\n    db_session: Session, document_ids: list[str], retry_delay: int = _LOCK_RETRY_DELAY\n) -> Generator[TransactionalContext, None, None]:\n    \"\"\"Try and acquire locks for the documents to prevent other jobs from\n    modifying them at the same time (e.g. avoid race conditions). This should be\n    called ahead of any modification to Vespa. Locks should be released by the\n    caller as soon as updates are complete by finishing the transaction.\n\n    NOTE: only one commit is allowed within the context manager returned by this function.\n    Multiple commits will result in a sqlalchemy.exc.InvalidRequestError.\n    NOTE: this function will commit any existing transaction.\n    \"\"\"\n\n    db_session.commit()  # ensure that we're not in a transaction\n\n    lock_acquired = False\n    for i in range(_NUM_LOCK_ATTEMPTS):\n        try:\n            with db_session.begin() as transaction:\n                lock_acquired = acquire_document_locks(\n                    db_session=db_session, document_ids=document_ids\n                )\n                if lock_acquired:\n                    yield transaction\n                    break\n        except OperationalError as e:\n            logger.warning(\n                f\"Failed to acquire locks for documents on attempt {i}, retrying. Error: {e}\"\n            )\n\n        time.sleep(retry_delay)\n\n    if not lock_acquired:\n        raise RuntimeError(\n            f\"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for documents: {document_ids}\"\n        )\n\n\ndef get_ingestion_documents(\n    db_session: Session,\n) -> list[DbDocument]:\n    # TODO add the option to filter by DocumentSource\n    stmt = select(DbDocument).where(DbDocument.from_ingestion_api.is_(True))\n    documents = db_session.execute(stmt).scalars().all()\n    return list(documents)\n\n\ndef get_documents_by_cc_pair(\n    cc_pair_id: int,\n    db_session: Session,\n) -> list[DbDocument]:\n    return (\n        db_session.query(DbDocument)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .filter(ConnectorCredentialPair.id == cc_pair_id)\n        .all()\n    )\n\n\ndef get_document(\n    document_id: str,\n    db_session: Session,\n) -> DbDocument | None:\n    stmt = select(DbDocument).where(DbDocument.id == document_id)\n    doc: DbDocument | None = db_session.execute(stmt).scalar_one_or_none()\n    return doc\n\n\ndef get_cc_pairs_for_document(\n    db_session: Session,\n    document_id: str,\n) -> list[ConnectorCredentialPair]:\n    stmt = (\n        select(ConnectorCredentialPair)\n        .join(\n            DocumentByConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .where(DocumentByConnectorCredentialPair.id == document_id)\n    )\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef get_document_sources(\n    db_session: Session,\n    document_ids: list[str],\n) -> dict[str, DocumentSource]:\n    \"\"\"Gets the sources for a list of document IDs.\n    Returns a dictionary mapping document ID to its source.\n    If a document has multiple sources (multiple CC pairs), returns the first one found.\n    \"\"\"\n    stmt = (\n        select(\n            DocumentByConnectorCredentialPair.id,\n            Connector.source,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .where(DocumentByConnectorCredentialPair.id.in_(document_ids))\n        .distinct()\n    )\n\n    results = db_session.execute(stmt).all()\n    return {doc_id: source for doc_id, source in results}\n\n\ndef fetch_chunk_counts_for_documents(\n    document_ids: list[str],\n    db_session: Session,\n) -> list[tuple[str, int]]:\n    \"\"\"\n    Return a list of (document_id, chunk_count) tuples.\n    If a document_id is not found in the database, it will be returned with a chunk_count of 0.\n    \"\"\"\n    stmt = select(DbDocument.id, DbDocument.chunk_count).where(\n        DbDocument.id.in_(document_ids)\n    )\n\n    results = db_session.execute(stmt).all()\n\n    # Create a dictionary of document_id to chunk_count\n    chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}\n\n    # Return a list of tuples, preserving `None` for documents not found or with\n    # an unknown chunk count. Callers should handle the `None` case and fall\n    # back to an existence check against the vector DB if necessary.\n    return [(doc_id, chunk_counts.get(doc_id, 0)) for doc_id in document_ids]\n\n\ndef fetch_chunk_count_for_document(\n    document_id: str,\n    db_session: Session,\n) -> int | None:\n    stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_unprocessed_kg_document_batch_for_connector(\n    db_session: Session,\n    connector_id: int,\n    kg_coverage_start: datetime,\n    kg_max_coverage_days: int,\n    batch_size: int = 100,\n) -> list[DbDocument]:\n    \"\"\"\n    Retrieves a batch of documents that have not been processed for knowledge graph extraction.\n    Args:\n        db_session (Session): The database session to use\n        connector_id (int): The ID of the connector to get documents for\n        batch_size (int): The maximum number of documents to retrieve\n    Returns:\n        list[DbDocument]: List of documents that need KG processing\n    \"\"\"\n\n    stmt = (\n        select(DbDocument)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .where(\n            and_(\n                DocumentByConnectorCredentialPair.connector_id == connector_id,\n                DbDocument.doc_updated_at\n                >= max(\n                    kg_coverage_start,\n                    datetime.now() - timedelta(days=kg_max_coverage_days),\n                ),\n                or_(\n                    DbDocument.kg_stage.is_(None),\n                    DbDocument.kg_stage == KGStage.NOT_STARTED,\n                    DbDocument.doc_updated_at > DbDocument.kg_processing_time,\n                ),\n            )\n        )\n        .distinct()\n        .limit(batch_size)\n    )\n\n    documents = db_session.scalars(stmt).all()\n    db_session.flush()\n\n    return list(documents)\n\n\ndef get_kg_extracted_document_ids(db_session: Session) -> list[str]:\n    \"\"\"\n    Retrieves all document IDs where kg_stage is EXTRACTED.\n    Args:\n        db_session (Session): The database session to use\n    Returns:\n        list[str]: List of document IDs that have been KG processed\n    \"\"\"\n    stmt = select(DbDocument.id).where(DbDocument.kg_stage == KGStage.EXTRACTED)\n\n    return list(db_session.scalars(stmt).all())\n\n\ndef update_document_kg_info(\n    db_session: Session, document_id: str, kg_stage: KGStage\n) -> None:\n    \"\"\"Updates the knowledge graph related information for a document.\n    Args:\n        db_session (Session): The database session to use\n        document_id (str): The ID of the document to update\n        kg_stage (KGStage): The stage of the knowledge graph processing for the document\n    Raises:\n        ValueError: If the document with the given ID is not found\n    \"\"\"\n    stmt = (\n        update(DbDocument)\n        .where(DbDocument.id == document_id)\n        .values(\n            kg_stage=kg_stage,\n            kg_processing_time=datetime.now(timezone.utc),\n        )\n    )\n    db_session.execute(stmt)\n\n\ndef update_document_kg_stage(\n    db_session: Session,\n    document_id: str,\n    kg_stage: KGStage,\n) -> None:\n    stmt = (\n        update(DbDocument).where(DbDocument.id == document_id).values(kg_stage=kg_stage)\n    )\n    db_session.execute(stmt)\n    db_session.flush()\n\n\ndef get_all_kg_extracted_documents_info(\n    db_session: Session,\n) -> list[str]:\n    \"\"\"Retrieves the knowledge graph data for all documents that have been processed.\n    Args:\n        db_session (Session): The database session to use\n    Returns:\n        List[Tuple[str, dict]]: A list of tuples containing:\n            - str: The document ID\n            - dict: The KG data containing 'entities', 'relationships', and 'terms'\n        Only returns documents where kg_stage is EXTRACTED\n    \"\"\"\n    stmt = (\n        select(DbDocument.id)\n        .where(DbDocument.kg_stage == KGStage.EXTRACTED)\n        .order_by(DbDocument.id)\n    )\n\n    results = db_session.execute(stmt).all()\n    return [str(doc_id) for doc_id in results]\n\n\ndef get_base_llm_doc_information(\n    db_session: Session, document_ids: list[str]\n) -> list[str]:\n    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))\n    results = db_session.execute(stmt).all()\n\n    documents = []\n\n    for doc_nr, doc in enumerate(results):\n        bare_doc = doc[0]\n        documents.append(\n            f\"\"\"* [{bare_doc.semantic_id}]({bare_doc.link}) ({bare_doc.doc_updated_at})\"\"\"\n        )\n\n    return documents[:KG_SIMPLE_ANSWER_MAX_DISPLAYED_SOURCES]\n\n\ndef get_document_updated_at(\n    document_id: str,\n    db_session: Session,\n) -> datetime | None:\n    \"\"\"Retrieves the doc_updated_at timestamp for a given document ID.\n    Args:\n        document_id (str): The ID of the document to query\n        db_session (Session): The database session to use\n    Returns:\n        Optional[datetime]: The doc_updated_at timestamp if found, None if document doesn't exist\n    \"\"\"\n\n    stmt = select(DbDocument.doc_updated_at).where(DbDocument.id == document_id)\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef reset_all_document_kg_stages(db_session: Session) -> int:\n    \"\"\"Reset the KG stage of all documents that are not in NOT_STARTED state to NOT_STARTED.\n\n    Args:\n        db_session (Session): The database session to use\n\n    Returns:\n        int: Number of documents that were reset\n    \"\"\"\n    stmt = (\n        update(DbDocument)\n        .where(DbDocument.kg_stage != KGStage.NOT_STARTED)\n        .values(kg_stage=KGStage.NOT_STARTED)\n    )\n    result = db_session.execute(stmt)\n\n    # The hasattr check is needed for type checking, even though rowcount\n    # is guaranteed to exist at runtime for UPDATE operations\n    return result.rowcount if hasattr(result, \"rowcount\") else 0\n\n\ndef update_document_kg_stages(\n    db_session: Session, source_stage: KGStage, target_stage: KGStage\n) -> int:\n    \"\"\"Reset the KG stage only of documents back to NOT_STARTED.\n    Part of reset flow for documents that have been extracted but not clustered.\n\n    Args:\n        db_session (Session): The database session to use\n\n    Returns:\n        int: Number of documents that were reset\n    \"\"\"\n    stmt = (\n        update(DbDocument)\n        .where(DbDocument.kg_stage == source_stage)\n        .values(kg_stage=target_stage)\n    )\n    result = db_session.execute(stmt)\n    # The hasattr check is needed for type checking, even though rowcount\n    # is guaranteed to exist at runtime for UPDATE operations\n    return result.rowcount if hasattr(result, \"rowcount\") else 0\n\n\ndef get_skipped_kg_documents(db_session: Session) -> list[str]:\n    \"\"\"\n    Retrieves all document IDs where kg_stage is SKIPPED.\n    Args:\n        db_session (Session): The database session to use\n    Returns:\n        list[str]: List of document IDs that have been skipped in KG processing\n    \"\"\"\n    stmt = select(DbDocument.id).where(DbDocument.kg_stage == KGStage.SKIPPED)\n\n    return list(db_session.scalars(stmt).all())\n\n\n# def get_kg_doc_info_for_entity_name(\n#     db_session: Session, document_id: str, entity_type: str\n# ) -> KGEntityDocInfo:\n#     \"\"\"\n#     Get the semantic ID and the link for an entity name.\n#     \"\"\"\n\n#     result = (\n#         db_session.query(Document.semantic_id, Document.link)\n#         .filter(Document.id == document_id)\n#         .first()\n#     )\n\n#     if result is None:\n#         return KGEntityDocInfo(\n#             doc_id=None,\n#             doc_semantic_id=None,\n#             doc_link=None,\n#             semantic_entity_name=f\"{entity_type}:{document_id}\",\n#             semantic_linked_entity_name=f\"{entity_type}:{document_id}\",\n#         )\n\n#     return KGEntityDocInfo(\n#         doc_id=document_id,\n#         doc_semantic_id=result[0],\n#         doc_link=result[1],\n#         semantic_entity_name=f\"{entity_type.upper()}:{result[0]}\",\n#         semantic_linked_entity_name=f\"[{entity_type.upper()}:{result[0]}]({result[1]})\",\n#     )\n\n\ndef check_for_documents_needing_kg_processing(\n    db_session: Session, kg_coverage_start: datetime, kg_max_coverage_days: int\n) -> bool:\n    \"\"\"Check if there are any documents that need KG processing.\n\n    A document needs KG processing if:\n    1. It is associated with a connector that has kg_processing_enabled = true\n    2. AND either:\n       - Its kg_stage is NOT_STARTED or NULL\n       - OR its last_updated timestamp is greater than its kg_processing_time\n\n    Args:\n        db_session (Session): The database session to use\n\n    Returns:\n        bool: True if there are any documents needing KG processing, False otherwise\n    \"\"\"\n\n    stmt = (\n        select(1)\n        .select_from(DbDocument)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            Connector,\n            DocumentByConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .where(\n            and_(\n                Connector.kg_processing_enabled.is_(True),\n                DbDocument.doc_updated_at\n                >= max(\n                    kg_coverage_start,\n                    datetime.now() - timedelta(days=kg_max_coverage_days),\n                ),\n                or_(\n                    DbDocument.kg_stage.is_(None),\n                    DbDocument.kg_stage == KGStage.NOT_STARTED,\n                    DbDocument.doc_updated_at > DbDocument.kg_processing_time,\n                ),\n            )\n        )\n        .exists()\n    )\n\n    return db_session.execute(select(stmt)).scalar() or False\n\n\ndef check_for_documents_needing_kg_clustering(db_session: Session) -> bool:\n    \"\"\"Check if there are any documents that need KG clustering.\n\n    A document needs KG clustering if:\n    1. It is associated with a connector that has kg_processing_enabled = true\n    2. AND either:\n       - Its kg_stage is EXTRACTED\n       - OR its last_updated timestamp is greater than its kg_processing_time\n\n    Args:\n        db_session (Session): The database session to use\n\n    Returns:\n        bool: True if there are any documents needing KG clustering, False otherwise\n    \"\"\"\n    stmt = (\n        select(1)\n        .select_from(DbDocument)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .where(\n            and_(\n                Connector.kg_processing_enabled.is_(True),\n                ConnectorCredentialPair.status\n                != ConnectorCredentialPairStatus.DELETING,\n                or_(\n                    DbDocument.kg_stage == KGStage.EXTRACTED,\n                    DbDocument.last_modified > DbDocument.kg_processing_time,\n                ),\n            )\n        )\n        .exists()\n    )\n\n    return db_session.execute(select(stmt)).scalar() or False\n\n\ndef get_document_kg_entities_and_relationships(\n    db_session: Session, document_id: str\n) -> tuple[list[KGEntity], list[KGRelationship]]:\n    \"\"\"\n    Get the KG entities and relationships that references the document.\n    \"\"\"\n    entities = (\n        db_session.query(KGEntity).filter(KGEntity.document_id == document_id).all()\n    )\n    if not entities:\n        return [], []\n    entity_id_names = [entity.id_name for entity in entities]\n\n    relationships = (\n        db_session.query(KGRelationship)\n        .filter(\n            or_(\n                KGRelationship.source_node.in_(entity_id_names),\n                KGRelationship.target_node.in_(entity_id_names),\n                KGRelationship.source_document == document_id,\n            )\n        )\n        .all()\n    )\n    return entities, relationships\n\n\ndef get_num_chunks_for_document(db_session: Session, document_id: str) -> int:\n    stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)\n    return db_session.execute(stmt).scalar_one_or_none() or 0\n\n\ndef update_document_metadata__no_commit(\n    db_session: Session,\n    document_id: str,\n    doc_metadata: dict[str, Any],\n) -> None:\n    \"\"\"Update the doc_metadata field for a document.\n\n    Note: Does not commit. Caller is responsible for committing.\n\n    Args:\n        db_session: Database session\n        document_id: The ID of the document to update\n        doc_metadata: The new metadata dictionary to set\n    \"\"\"\n    stmt = (\n        update(DbDocument)\n        .where(DbDocument.id == document_id)\n        .values(doc_metadata=doc_metadata)\n    )\n    db_session.execute(stmt)\n\n\ndef delete_document_by_id__no_commit(\n    db_session: Session,\n    document_id: str,\n) -> None:\n    \"\"\"Delete a single document and its connector credential pair relationships.\n\n    Note: Does not commit. Caller is responsible for committing.\n\n    This uses delete_documents_complete__no_commit which handles\n    all foreign key relationships (KG entities, relationships, chunk stats,\n    cc pair associations, feedback, tags).\n    \"\"\"\n    delete_documents_complete__no_commit(db_session, [document_id])\n"
  },
  {
    "path": "backend/onyx/db/document_access.py",
    "content": "\"\"\"\nDocument access filtering utilities.\n\nThis module provides reusable access filtering logic for documents based on:\n- Connector access type (PUBLIC vs SYNC)\n- Document-level public flag\n- User email matching external_user_emails\n- User group overlap with external_user_group_ids\n\nThis is a standalone module to avoid circular imports between document.py and persona.py.\n\"\"\"\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import any_\nfrom sqlalchemy import cast\nfrom sqlalchemy import or_\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import String\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql.elements import ColumnElement\n\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document\nfrom onyx.db.models import DocumentByConnectorCredentialPair\n\n\ndef apply_document_access_filter(\n    stmt: Select,\n    user_email: str | None,\n    external_group_ids: list[str],\n) -> Select:\n    \"\"\"\n    Apply document access filtering to a query.\n\n    This joins with DocumentByConnectorCredentialPair and ConnectorCredentialPair to:\n    1. Check if the document is from a PUBLIC connector (access_type = PUBLIC)\n    2. Check document-level permissions (is_public, external_user_emails, external_user_group_ids)\n    3. Exclude documents from cc_pairs that are being deleted\n\n    Args:\n        stmt: The SELECT statement to modify (must be selecting from Document)\n        user_email: The user's email for permission checking\n        external_group_ids: List of external group IDs the user belongs to\n\n    Returns:\n        Modified SELECT statement with access filtering applied\n    \"\"\"\n    # Join to get cc_pair info for each document\n    stmt = stmt.join(\n        DocumentByConnectorCredentialPair,\n        Document.id == DocumentByConnectorCredentialPair.id,\n    ).join(\n        ConnectorCredentialPair,\n        and_(\n            DocumentByConnectorCredentialPair.connector_id\n            == ConnectorCredentialPair.connector_id,\n            DocumentByConnectorCredentialPair.credential_id\n            == ConnectorCredentialPair.credential_id,\n        ),\n    )\n\n    # Exclude documents from cc_pairs that are being deleted\n    stmt = stmt.where(\n        ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING\n    )\n\n    # Build access filters\n    access_filters: list[ColumnElement[bool]] = [\n        # Document is from a PUBLIC connector\n        ConnectorCredentialPair.access_type == AccessType.PUBLIC,\n        # Document is marked as public (e.g., \"Anyone with link\" in source)\n        Document.is_public.is_(True),\n    ]\n    if user_email:\n        access_filters.append(any_(Document.external_user_emails) == user_email)\n    if external_group_ids:\n        access_filters.append(\n            Document.external_user_group_ids.overlap(\n                cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))\n            )\n        )\n\n    stmt = stmt.where(or_(*access_filters))\n    return stmt\n\n\ndef get_accessible_documents_by_ids(\n    db_session: Session,\n    document_ids: list[str],\n    user_email: str | None,\n    external_group_ids: list[str],\n) -> list[Document]:\n    \"\"\"\n    Fetch documents by IDs, filtering to only those the user has access to.\n\n    Uses the same access filtering logic as other document queries:\n    - Documents from PUBLIC connectors\n    - Documents marked as public (e.g., \"Anyone with link\")\n    - Documents where user email matches external_user_emails\n    - Documents where user's groups overlap with external_user_group_ids\n\n    Args:\n        db_session: Database session\n        document_ids: List of document IDs to fetch\n        user_email: User's email for permission checking\n        external_group_ids: List of external group IDs the user belongs to\n\n    Returns:\n        List of Document objects from the input that the user has access to\n    \"\"\"\n    if not document_ids:\n        return []\n\n    stmt = select(Document).where(Document.id.in_(document_ids))\n    stmt = apply_document_access_filter(stmt, user_email, external_group_ids)\n    # Use distinct to avoid duplicates when a document belongs to multiple cc_pairs\n    stmt = stmt.distinct()\n    return list(db_session.execute(stmt).scalars().all())\n"
  },
  {
    "path": "backend/onyx/db/document_set.py",
    "content": "from collections.abc import Sequence\nfrom typing import cast\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import exists\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.federated import create_federated_connector_document_set_mapping\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import DocumentSet as DocumentSetDBModel\nfrom onyx.db.models import DocumentSet__ConnectorCredentialPair\nfrom onyx.db.models import DocumentSet__UserGroup\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserRole\nfrom onyx.server.features.document_set.models import DocumentSetCreationRequest\nfrom onyx.server.features.document_set.models import DocumentSetUpdateRequest\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n\ndef _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:\n    if user.role == UserRole.ADMIN:\n        return stmt\n\n    stmt = stmt.distinct()\n    DocumentSet__UG = aliased(DocumentSet__UserGroup)\n    User__UG = aliased(User__UserGroup)\n    \"\"\"\n    Here we select cc_pairs by relation:\n    User -> User__UserGroup -> DocumentSet__UserGroup -> DocumentSet\n    \"\"\"\n    stmt = stmt.outerjoin(DocumentSet__UG).outerjoin(\n        User__UserGroup,\n        User__UserGroup.user_group_id == DocumentSet__UG.user_group_id,\n    )\n    \"\"\"\n    Filter DocumentSets by:\n    - if the user is in the user_group that owns the DocumentSet\n    - if the user is not a global_curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out DocumentSets that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all DocumentSets in the groups the user is a curator\n    for (as well as public DocumentSets)\n    \"\"\"\n\n    # Anonymous users only see public DocumentSets\n    if user.is_anonymous:\n        where_clause = DocumentSetDBModel.is_public == True  # noqa: E712\n        return stmt.where(where_clause)\n\n    where_clause = User__UserGroup.user_id == user.id\n    if user.role == UserRole.CURATOR and get_editable:\n        where_clause &= User__UserGroup.is_curator == True  # noqa: E712\n    if get_editable:\n        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)\n        if user.role == UserRole.CURATOR:\n            user_groups = user_groups.where(User__UG.is_curator == True)  # noqa: E712\n        where_clause &= (\n            ~exists()\n            .where(DocumentSet__UG.document_set_id == DocumentSetDBModel.id)\n            .where(~DocumentSet__UG.user_group_id.in_(user_groups))\n            .correlate(DocumentSetDBModel)\n        )\n        where_clause |= DocumentSetDBModel.user_id == user.id\n    else:\n        where_clause |= DocumentSetDBModel.is_public == True  # noqa: E712\n\n    return stmt.where(where_clause)\n\n\ndef _delete_document_set_cc_pairs__no_commit(\n    db_session: Session, document_set_id: int, is_current: bool | None = None\n) -> None:\n    \"\"\"NOTE: does not commit transaction, this must be done by the caller\"\"\"\n    stmt = delete(DocumentSet__ConnectorCredentialPair).where(\n        DocumentSet__ConnectorCredentialPair.document_set_id == document_set_id\n    )\n    if is_current is not None:\n        stmt = stmt.where(DocumentSet__ConnectorCredentialPair.is_current == is_current)\n    db_session.execute(stmt)\n\n\ndef _mark_document_set_cc_pairs_as_outdated__no_commit(\n    db_session: Session, document_set_id: int\n) -> None:\n    \"\"\"NOTE: does not commit transaction, this must be done by the caller\"\"\"\n    stmt = select(DocumentSet__ConnectorCredentialPair).where(\n        DocumentSet__ConnectorCredentialPair.document_set_id == document_set_id\n    )\n    for row in db_session.scalars(stmt):\n        row.is_current = False\n\n\ndef delete_document_set_privacy__no_commit(\n    document_set_id: int, db_session: Session\n) -> None:\n    \"\"\"No private document sets in Onyx MIT\"\"\"\n\n\ndef get_document_set_by_id_for_user(\n    db_session: Session,\n    document_set_id: int,\n    user: User,\n    get_editable: bool = True,\n) -> DocumentSetDBModel | None:\n    stmt = (\n        select(DocumentSetDBModel)\n        .distinct()\n        .options(selectinload(DocumentSetDBModel.federated_connectors))\n    )\n    stmt = stmt.where(DocumentSetDBModel.id == document_set_id)\n    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)\n    return db_session.scalar(stmt)\n\n\ndef get_document_set_by_id(\n    db_session: Session,\n    document_set_id: int,\n) -> DocumentSetDBModel | None:\n    stmt = select(DocumentSetDBModel).distinct()\n    stmt = stmt.where(DocumentSetDBModel.id == document_set_id)\n    return db_session.scalar(stmt)\n\n\ndef get_document_set_by_name(\n    db_session: Session, document_set_name: str\n) -> DocumentSetDBModel | None:\n    return db_session.scalar(\n        select(DocumentSetDBModel).where(DocumentSetDBModel.name == document_set_name)\n    )\n\n\ndef get_document_sets_by_name(\n    db_session: Session, document_set_names: list[str]\n) -> Sequence[DocumentSetDBModel]:\n    return db_session.scalars(\n        select(DocumentSetDBModel).where(\n            DocumentSetDBModel.name.in_(document_set_names)\n        )\n    ).all()\n\n\ndef get_document_sets_by_ids(\n    db_session: Session, document_set_ids: list[int]\n) -> Sequence[DocumentSetDBModel]:\n    if not document_set_ids:\n        return []\n    return db_session.scalars(\n        select(DocumentSetDBModel).where(DocumentSetDBModel.id.in_(document_set_ids))\n    ).all()\n\n\ndef make_doc_set_private(\n    document_set_id: int,  # noqa: ARG001\n    user_ids: list[UUID] | None,\n    group_ids: list[int] | None,\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    # May cause error if someone switches down to MIT from EE\n    if user_ids or group_ids:\n        raise NotImplementedError(\"Onyx MIT does not support private Document Sets\")\n\n\ndef _check_if_cc_pairs_are_owned_by_groups(\n    db_session: Session,\n    cc_pair_ids: list[int],\n    group_ids: list[int],\n) -> None:\n    \"\"\"\n    This function checks if the CC pairs are owned by the specified groups or public.\n    If not, it raises a ValueError.\n    \"\"\"\n    group_cc_pair_relationships = get_cc_pair_groups_for_ids(\n        db_session=db_session,\n        cc_pair_ids=cc_pair_ids,\n    )\n\n    group_cc_pair_relationships_set = {\n        (relationship.cc_pair_id, relationship.user_group_id)\n        for relationship in group_cc_pair_relationships\n    }\n\n    missing_cc_pair_ids = []\n    for cc_pair_id in cc_pair_ids:\n        for group_id in group_ids:\n            if (cc_pair_id, group_id) not in group_cc_pair_relationships_set:\n                missing_cc_pair_ids.append(cc_pair_id)\n                break\n\n    if missing_cc_pair_ids:\n        cc_pairs = get_connector_credential_pairs(\n            db_session=db_session,\n            ids=missing_cc_pair_ids,\n        )\n        for cc_pair in cc_pairs:\n            if cc_pair.access_type == AccessType.PRIVATE:\n                raise ValueError(\n                    f\"Connector Credential Pair with ID: '{cc_pair.id}' is not owned by the specified groups\"\n                )\n\n\ndef insert_document_set(\n    document_set_creation_request: DocumentSetCreationRequest,\n    user_id: UUID | None,\n    db_session: Session,\n) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]:\n    # Check if we have either CC pairs or federated connectors (or both)\n    if (\n        not document_set_creation_request.cc_pair_ids\n        and not document_set_creation_request.federated_connectors\n    ):\n        raise ValueError(\"Cannot create a document set with no connectors\")\n\n    if not document_set_creation_request.is_public:\n        _check_if_cc_pairs_are_owned_by_groups(\n            db_session=db_session,\n            cc_pair_ids=document_set_creation_request.cc_pair_ids,\n            group_ids=document_set_creation_request.groups or [],\n        )\n\n    new_document_set_row: DocumentSetDBModel\n    ds_cc_pairs: list[DocumentSet__ConnectorCredentialPair]\n    try:\n        new_document_set_row = DocumentSetDBModel(\n            name=document_set_creation_request.name,\n            description=document_set_creation_request.description,\n            user_id=user_id,\n            is_public=document_set_creation_request.is_public,\n            is_up_to_date=DISABLE_VECTOR_DB,\n            time_last_modified_by_user=func.now(),\n        )\n        db_session.add(new_document_set_row)\n        db_session.flush()  # ensure the new document set gets assigned an ID\n\n        # Create CC pair mappings\n        ds_cc_pairs = [\n            DocumentSet__ConnectorCredentialPair(\n                document_set_id=new_document_set_row.id,\n                connector_credential_pair_id=cc_pair_id,\n                is_current=True,\n            )\n            for cc_pair_id in document_set_creation_request.cc_pair_ids\n        ]\n        db_session.add_all(ds_cc_pairs)\n\n        # Create federated connector mappings\n        from onyx.db.federated import create_federated_connector_document_set_mapping\n\n        for fc_config in document_set_creation_request.federated_connectors:\n            create_federated_connector_document_set_mapping(\n                db_session=db_session,\n                federated_connector_id=fc_config.federated_connector_id,\n                document_set_id=new_document_set_row.id,\n                entities=fc_config.entities,\n            )\n\n        versioned_private_doc_set_fn = fetch_versioned_implementation(\n            \"onyx.db.document_set\", \"make_doc_set_private\"\n        )\n\n        # Private Document Sets\n        versioned_private_doc_set_fn(\n            document_set_id=new_document_set_row.id,\n            user_ids=document_set_creation_request.users,\n            group_ids=document_set_creation_request.groups,\n            db_session=db_session,\n        )\n\n        db_session.commit()\n    except Exception as e:\n        db_session.rollback()\n        logger.error(f\"Error creating document set: {e}\")\n        raise\n\n    return new_document_set_row, ds_cc_pairs\n\n\ndef update_document_set(\n    db_session: Session,\n    document_set_update_request: DocumentSetUpdateRequest,\n    user: User,\n) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]:\n    \"\"\"If successful, this sets document_set_row.is_up_to_date = False.\n    That will be processed via Celery in check_for_vespa_sync_task\n    and trigger a long running background sync to Vespa.\n    \"\"\"\n    # Check if we have either CC pairs or federated connectors (or both)\n    if (\n        not document_set_update_request.cc_pair_ids\n        and not document_set_update_request.federated_connectors\n    ):\n        raise ValueError(\"Cannot update a document set with no connectors\")\n\n    if not document_set_update_request.is_public:\n        _check_if_cc_pairs_are_owned_by_groups(\n            db_session=db_session,\n            cc_pair_ids=document_set_update_request.cc_pair_ids,\n            group_ids=document_set_update_request.groups,\n        )\n\n    try:\n        # update the description\n        document_set_row = get_document_set_by_id_for_user(\n            db_session=db_session,\n            document_set_id=document_set_update_request.id,\n            user=user,\n            get_editable=True,\n        )\n        if document_set_row is None:\n            raise ValueError(\n                f\"No document set with ID '{document_set_update_request.id}'\"\n            )\n        if not document_set_row.is_up_to_date:\n            raise ValueError(\n                \"Cannot update document set while it is syncing. Please wait for it to finish syncing, and then try again.\"\n            )\n\n        document_set_row.description = document_set_update_request.description\n        if not DISABLE_VECTOR_DB:\n            document_set_row.is_up_to_date = False\n        document_set_row.is_public = document_set_update_request.is_public\n        document_set_row.time_last_modified_by_user = func.now()\n        versioned_private_doc_set_fn = fetch_versioned_implementation(\n            \"onyx.db.document_set\", \"make_doc_set_private\"\n        )\n\n        # Private Document Sets\n        versioned_private_doc_set_fn(\n            document_set_id=document_set_row.id,\n            user_ids=document_set_update_request.users,\n            group_ids=document_set_update_request.groups,\n            db_session=db_session,\n        )\n\n        # update the attached CC pairs\n        # first, mark all existing CC pairs as not current\n        _mark_document_set_cc_pairs_as_outdated__no_commit(\n            db_session=db_session, document_set_id=document_set_row.id\n        )\n        # add in rows for the new CC pairs\n        ds_cc_pairs = [\n            DocumentSet__ConnectorCredentialPair(\n                document_set_id=document_set_update_request.id,\n                connector_credential_pair_id=cc_pair_id,\n                is_current=True,\n            )\n            for cc_pair_id in document_set_update_request.cc_pair_ids\n        ]\n        db_session.add_all(ds_cc_pairs)\n\n        # Update federated connector mappings\n        # Delete existing federated connector mappings for this document set\n        delete_stmt = delete(FederatedConnector__DocumentSet).where(\n            FederatedConnector__DocumentSet.document_set_id == document_set_row.id\n        )\n        db_session.execute(delete_stmt)\n\n        # Create new federated connector mappings\n        for fc_config in document_set_update_request.federated_connectors:\n            create_federated_connector_document_set_mapping(\n                db_session=db_session,\n                federated_connector_id=fc_config.federated_connector_id,\n                document_set_id=document_set_row.id,\n                entities=fc_config.entities,\n            )\n\n        db_session.commit()\n    except:\n        db_session.rollback()\n        raise\n\n    return document_set_row, ds_cc_pairs\n\n\ndef mark_document_set_as_synced(document_set_id: int, db_session: Session) -> None:\n    stmt = select(DocumentSetDBModel).where(DocumentSetDBModel.id == document_set_id)\n    document_set = db_session.scalar(stmt)\n    if document_set is None:\n        raise ValueError(f\"No document set with ID: {document_set_id}\")\n\n    # mark as up to date\n    document_set.is_up_to_date = True\n    # delete outdated relationship table rows\n    _delete_document_set_cc_pairs__no_commit(\n        db_session=db_session, document_set_id=document_set_id, is_current=False\n    )\n    db_session.commit()\n\n\ndef delete_document_set(\n    document_set_row: DocumentSetDBModel, db_session: Session\n) -> None:\n    # delete all relationships to CC pairs\n    _delete_document_set_cc_pairs__no_commit(\n        db_session=db_session, document_set_id=document_set_row.id\n    )\n    db_session.delete(document_set_row)\n    db_session.commit()\n\n\ndef mark_document_set_as_to_be_deleted(\n    db_session: Session,\n    document_set_id: int,\n    user: User,\n) -> None:\n    \"\"\"Cleans up all document_set -> cc_pair relationships and marks the document set\n    as needing an update. The actual document set row will be deleted by the background\n    job which syncs these changes to Vespa.\"\"\"\n\n    try:\n        document_set_row = get_document_set_by_id_for_user(\n            db_session=db_session,\n            document_set_id=document_set_id,\n            user=user,\n            get_editable=True,\n        )\n        if document_set_row is None:\n            error_msg = f\"Document set with ID: '{document_set_id}' does not exist \"\n            if user is not None:\n                error_msg += f\"or is not editable by user with email: '{user.email}'\"\n            raise ValueError(error_msg)\n        if not document_set_row.is_up_to_date:\n            raise ValueError(\n                \"Cannot delete document set while it is syncing. Please wait for it to finish syncing, and then try again.\"\n            )\n\n        # delete all relationships to CC pairs\n        _delete_document_set_cc_pairs__no_commit(\n            db_session=db_session, document_set_id=document_set_id\n        )\n\n        # delete all federated connector mappings so the cleanup task can fully\n        # remove the document set once the Vespa sync completes\n        delete_stmt = delete(FederatedConnector__DocumentSet).where(\n            FederatedConnector__DocumentSet.document_set_id == document_set_id\n        )\n        db_session.execute(delete_stmt)\n\n        # delete all private document set information\n        versioned_delete_private_fn = fetch_versioned_implementation(\n            \"onyx.db.document_set\", \"delete_document_set_privacy__no_commit\"\n        )\n        versioned_delete_private_fn(\n            document_set_id=document_set_id, db_session=db_session\n        )\n\n        # mark the row as needing a sync, it will be deleted there since there\n        # are no more relationships to cc pairs\n        document_set_row.is_up_to_date = False\n        db_session.commit()\n    except:\n        db_session.rollback()\n        raise\n\n\ndef delete_document_set_cc_pair_relationship__no_commit(\n    connector_id: int, credential_id: int, db_session: Session\n) -> int:\n    \"\"\"Deletes all rows from DocumentSet__ConnectorCredentialPair where the\n    connector_credential_pair_id matches the given cc_pair_id.\"\"\"\n    delete_stmt = delete(DocumentSet__ConnectorCredentialPair).where(\n        and_(\n            ConnectorCredentialPair.connector_id == connector_id,\n            ConnectorCredentialPair.credential_id == credential_id,\n            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id\n            == ConnectorCredentialPair.id,\n        )\n    )\n    result = db_session.execute(delete_stmt)\n    return result.rowcount  # type: ignore\n\n\ndef fetch_document_sets(\n    user_id: UUID | None,  # noqa: ARG001\n    db_session: Session,\n    include_outdated: bool = False,\n) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]:\n    \"\"\"Return is a list where each element contains a tuple of:\n    1. The document set itself\n    2. All CC pairs associated with the document set\"\"\"\n    stmt = (\n        select(DocumentSetDBModel, ConnectorCredentialPair)\n        .join(\n            DocumentSet__ConnectorCredentialPair,\n            DocumentSetDBModel.id\n            == DocumentSet__ConnectorCredentialPair.document_set_id,\n            isouter=True,  # outer join is needed to also fetch document sets with no cc pairs\n        )\n        .join(\n            ConnectorCredentialPair,\n            ConnectorCredentialPair.id\n            == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,\n            isouter=True,  # outer join is needed to also fetch document sets with no cc pairs\n        )\n    )\n    if not include_outdated:\n        stmt = stmt.where(\n            or_(\n                DocumentSet__ConnectorCredentialPair.is_current == True,  # noqa: E712\n                # `None` handles case where no CC Pairs exist for a Document Set\n                DocumentSet__ConnectorCredentialPair.is_current.is_(None),\n            )\n        )\n\n    results = cast(\n        list[tuple[DocumentSetDBModel, ConnectorCredentialPair | None]],\n        db_session.execute(stmt).all(),\n    )\n\n    aggregated_results: dict[\n        int, tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]\n    ] = {}\n    for document_set, cc_pair in results:\n        if document_set.id not in aggregated_results:\n            aggregated_results[document_set.id] = (\n                document_set,\n                [cc_pair] if cc_pair else [],\n            )\n        else:\n            if cc_pair:\n                aggregated_results[document_set.id][1].append(cc_pair)\n\n    return [\n        (document_set, cc_pairs)\n        for document_set, cc_pairs in aggregated_results.values()\n    ]\n\n\ndef fetch_all_document_sets_for_user(\n    db_session: Session,\n    user: User,\n    get_editable: bool = True,\n) -> Sequence[DocumentSetDBModel]:\n    stmt = (\n        select(DocumentSetDBModel)\n        .distinct()\n        .options(\n            selectinload(DocumentSetDBModel.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSetDBModel.users),\n            selectinload(DocumentSetDBModel.groups),\n            selectinload(DocumentSetDBModel.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        )\n    )\n    stmt = _add_user_filters(stmt, user, get_editable=get_editable)\n    return db_session.scalars(stmt).unique().all()\n\n\ndef fetch_documents_for_document_set_paginated(\n    document_set_id: int,\n    db_session: Session,\n    current_only: bool = True,\n    last_document_id: str | None = None,\n    limit: int = 100,\n) -> tuple[Sequence[Document], str | None]:\n    stmt = (\n        select(Document)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DocumentByConnectorCredentialPair.id == Document.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                ConnectorCredentialPair.connector_id\n                == DocumentByConnectorCredentialPair.connector_id,\n                ConnectorCredentialPair.credential_id\n                == DocumentByConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            DocumentSet__ConnectorCredentialPair,\n            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id\n            == ConnectorCredentialPair.id,\n        )\n        .join(\n            DocumentSetDBModel,\n            DocumentSetDBModel.id\n            == DocumentSet__ConnectorCredentialPair.document_set_id,\n        )\n        .where(DocumentSetDBModel.id == document_set_id)\n        .order_by(Document.id)\n        .limit(limit)\n    )\n    if last_document_id is not None:\n        stmt = stmt.where(Document.id > last_document_id)\n    if current_only:\n        stmt = stmt.where(\n            DocumentSet__ConnectorCredentialPair.is_current == True  # noqa: E712\n        )\n    stmt = stmt.distinct()\n\n    documents = db_session.scalars(stmt).all()\n    return documents, documents[-1].id if documents else None\n\n\ndef construct_document_id_select_by_docset(\n    document_set_id: int,\n    current_only: bool = True,\n) -> Select:\n    \"\"\"This returns a statement that should be executed using\n    .yield_per() to minimize overhead. The primary consumers of this function\n    are background processing task generators.\"\"\"\n\n    stmt = (\n        select(Document.id)\n        .join(\n            DocumentByConnectorCredentialPair,\n            DocumentByConnectorCredentialPair.id == Document.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                ConnectorCredentialPair.connector_id\n                == DocumentByConnectorCredentialPair.connector_id,\n                ConnectorCredentialPair.credential_id\n                == DocumentByConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            DocumentSet__ConnectorCredentialPair,\n            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id\n            == ConnectorCredentialPair.id,\n        )\n        .join(\n            DocumentSetDBModel,\n            DocumentSetDBModel.id\n            == DocumentSet__ConnectorCredentialPair.document_set_id,\n        )\n        .where(DocumentSetDBModel.id == document_set_id)\n        .order_by(Document.id)\n    )\n\n    if current_only:\n        stmt = stmt.where(\n            DocumentSet__ConnectorCredentialPair.is_current == True  # noqa: E712\n        )\n\n    stmt = stmt.distinct()\n    return stmt\n\n\ndef fetch_document_sets_for_document(\n    document_id: str,\n    db_session: Session,\n) -> list[str]:\n    \"\"\"\n    Fetches the document set names for a single document ID.\n\n    :param document_id: The ID of the document to fetch sets for.\n    :param db_session: The SQLAlchemy session to use for the query.\n    :return: A list of document set names, or None if no result is found.\n    \"\"\"\n    result = fetch_document_sets_for_documents([document_id], db_session)\n    if not result:\n        return []\n    return result[0][1]\n\n\ndef fetch_document_sets_for_documents(\n    document_ids: list[str],\n    db_session: Session,\n) -> Sequence[tuple[str, list[str]]]:\n    \"\"\"Gives back a list of (document_id, list[document_set_names]) tuples\"\"\"\n\n    \"\"\"Building subqueries\"\"\"\n    # NOTE: have to build these subqueries first in order to guarantee that we get one\n    # returned row for each specified document_id. Basically, we want to do the filters first,\n    # then the outer joins.\n\n    # don't include CC pairs that are being deleted\n    # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them\n    # as we can assume their document sets are no longer relevant\n    valid_cc_pairs_subquery = aliased(\n        ConnectorCredentialPair,\n        select(ConnectorCredentialPair)\n        .where(\n            ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING\n        )  # noqa: E712\n        .subquery(),\n    )\n\n    valid_document_set__cc_pairs_subquery = aliased(\n        DocumentSet__ConnectorCredentialPair,\n        select(DocumentSet__ConnectorCredentialPair)\n        .where(DocumentSet__ConnectorCredentialPair.is_current == True)  # noqa: E712\n        .subquery(),\n    )\n    \"\"\"End building subqueries\"\"\"\n\n    stmt = (\n        select(\n            Document.id,\n            func.coalesce(\n                func.array_remove(func.array_agg(DocumentSetDBModel.name), None), []\n            ).label(\"document_set_names\"),\n        )\n        # Here we select document sets by relation:\n        # Document -> DocumentByConnectorCredentialPair -> ConnectorCredentialPair ->\n        # DocumentSet__ConnectorCredentialPair -> DocumentSet\n        .outerjoin(\n            DocumentByConnectorCredentialPair,\n            Document.id == DocumentByConnectorCredentialPair.id,\n        )\n        .outerjoin(\n            valid_cc_pairs_subquery,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == valid_cc_pairs_subquery.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == valid_cc_pairs_subquery.credential_id,\n            ),\n        )\n        .outerjoin(\n            valid_document_set__cc_pairs_subquery,\n            valid_cc_pairs_subquery.id\n            == valid_document_set__cc_pairs_subquery.connector_credential_pair_id,\n        )\n        .outerjoin(\n            DocumentSetDBModel,\n            DocumentSetDBModel.id\n            == valid_document_set__cc_pairs_subquery.document_set_id,\n        )\n        .where(Document.id.in_(document_ids))\n        .group_by(Document.id)\n    )\n    return db_session.execute(stmt).all()  # type: ignore\n\n\ndef get_or_create_document_set_by_name(\n    db_session: Session,\n    document_set_name: str,\n    document_set_description: str = \"Default Persona created Document-Set, please update description\",\n) -> DocumentSetDBModel:\n    \"\"\"This is used by the default personas which need to attach to document sets\n    on server startup\"\"\"\n    doc_set = get_document_set_by_name(db_session, document_set_name)\n    if doc_set is not None:\n        return doc_set\n\n    new_doc_set = DocumentSetDBModel(\n        name=document_set_name,\n        description=document_set_description,\n        user_id=None,\n        is_up_to_date=True,\n    )\n\n    db_session.add(new_doc_set)\n    db_session.commit()\n\n    return new_doc_set\n\n\ndef check_document_sets_are_public(\n    db_session: Session,\n    document_set_ids: list[int],\n) -> bool:\n    \"\"\"Checks if any of the CC-Pairs are Non Public (meaning that some documents in this document\n    set is not Public\"\"\"\n    connector_credential_pair_ids = (\n        db_session.query(\n            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id\n        )\n        .filter(\n            DocumentSet__ConnectorCredentialPair.document_set_id.in_(document_set_ids)\n        )\n        .subquery()\n    )\n\n    not_public_exists = (\n        db_session.query(ConnectorCredentialPair.id)\n        .filter(\n            ConnectorCredentialPair.id.in_(\n                connector_credential_pair_ids  # type:ignore\n            ),\n            ConnectorCredentialPair.access_type != AccessType.PUBLIC,\n        )\n        .limit(1)\n        .first()\n        is not None\n    )\n\n    return not not_public_exists\n"
  },
  {
    "path": "backend/onyx/db/engine/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/db/engine/async_sql_engine.py",
    "content": "from collections.abc import AsyncGenerator\nfrom contextlib import asynccontextmanager\nfrom typing import Any\nfrom typing import AsyncContextManager\n\nimport asyncpg  # type: ignore\nfrom fastapi import HTTPException\nfrom sqlalchemy import event\nfrom sqlalchemy import pool\nfrom sqlalchemy.ext.asyncio import AsyncEngine\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.ext.asyncio import create_async_engine\n\nfrom onyx.configs.app_configs import AWS_REGION_NAME\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE\nfrom onyx.configs.app_configs import POSTGRES_DB\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_POOL_PRE_PING\nfrom onyx.configs.app_configs import POSTGRES_POOL_RECYCLE\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USE_NULL_POOL\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.db.engine.iam_auth import create_ssl_context_if_iam\nfrom onyx.db.engine.iam_auth import get_iam_auth_token\nfrom onyx.db.engine.sql_engine import ASYNC_DB_API\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.sql_engine import is_valid_schema_name\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.sql_engine import USE_IAM_AUTH\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\n# Global so we don't create more than one engine per process\n_ASYNC_ENGINE: AsyncEngine | None = None\n\n\nasync def get_async_connection() -> Any:\n    \"\"\"\n    Custom connection function for async engine when using IAM auth.\n    \"\"\"\n    host = POSTGRES_HOST\n    port = POSTGRES_PORT\n    user = POSTGRES_USER\n    db = POSTGRES_DB\n    token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)\n\n    # asyncpg requires 'ssl=\"require\"' if SSL needed\n    return await asyncpg.connect(\n        user=user, password=token, host=host, port=int(port), database=db, ssl=\"require\"\n    )\n\n\ndef get_sqlalchemy_async_engine() -> AsyncEngine:\n    global _ASYNC_ENGINE\n    if _ASYNC_ENGINE is None:\n        app_name = SqlEngine.get_app_name() + \"_async\"\n        connection_string = build_connection_string(\n            db_api=ASYNC_DB_API,\n            use_iam_auth=USE_IAM_AUTH,\n        )\n\n        connect_args: dict[str, Any] = {}\n        if app_name:\n            connect_args[\"server_settings\"] = {\"application_name\": app_name}\n\n        connect_args[\"ssl\"] = create_ssl_context_if_iam()\n\n        engine_kwargs = {\n            \"connect_args\": connect_args,\n            \"pool_pre_ping\": POSTGRES_POOL_PRE_PING,\n            \"pool_recycle\": POSTGRES_POOL_RECYCLE,\n        }\n\n        if POSTGRES_USE_NULL_POOL:\n            engine_kwargs[\"poolclass\"] = pool.NullPool\n        else:\n            engine_kwargs[\"pool_size\"] = POSTGRES_API_SERVER_POOL_SIZE\n            engine_kwargs[\"max_overflow\"] = POSTGRES_API_SERVER_POOL_OVERFLOW\n\n        _ASYNC_ENGINE = create_async_engine(\n            connection_string,\n            **engine_kwargs,\n        )\n\n        if USE_IAM_AUTH:\n\n            @event.listens_for(_ASYNC_ENGINE.sync_engine, \"do_connect\")\n            def provide_iam_token_async(\n                dialect: Any,  # noqa: ARG001\n                conn_rec: Any,  # noqa: ARG001\n                cargs: Any,  # noqa: ARG001\n                cparams: Any,\n            ) -> None:\n                # For async engine using asyncpg, we still need to set the IAM token here.\n                host = POSTGRES_HOST\n                port = POSTGRES_PORT\n                user = POSTGRES_USER\n                token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)\n                cparams[\"password\"] = token\n                cparams[\"ssl\"] = create_ssl_context_if_iam()\n\n    return _ASYNC_ENGINE\n\n\nasync def get_async_session(\n    tenant_id: str | None = None,\n) -> AsyncGenerator[AsyncSession, None]:\n    \"\"\"For use w/ Depends for *async* FastAPI endpoints.\n\n    For standard `async with ... as ...` use, use get_async_session_context_manager.\n    \"\"\"\n\n    if tenant_id is None:\n        tenant_id = get_current_tenant_id()\n\n    if not is_valid_schema_name(tenant_id):\n        raise HTTPException(status_code=400, detail=\"Invalid tenant ID\")\n\n    engine = get_sqlalchemy_async_engine()\n\n    # no need to use the schema translation map for self-hosted + default schema\n    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:\n        async with AsyncSession(bind=engine, expire_on_commit=False) as session:\n            yield session\n        return\n\n    # Create connection with schema translation to handle querying the right schema\n    schema_translate_map = {None: tenant_id}\n    async with engine.connect() as connection:\n        connection = await connection.execution_options(\n            schema_translate_map=schema_translate_map\n        )\n        async with AsyncSession(\n            bind=connection, expire_on_commit=False\n        ) as async_session:\n            yield async_session\n\n\ndef get_async_session_context_manager(\n    tenant_id: str | None = None,\n) -> AsyncContextManager[AsyncSession]:\n    return asynccontextmanager(get_async_session)(tenant_id)\n"
  },
  {
    "path": "backend/onyx/db/engine/connection_warmup.py",
    "content": "from sqlalchemy import text\n\nfrom onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\n\n\nasync def warm_up_connections(\n    sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20\n) -> None:\n    sync_postgres_engine = get_sqlalchemy_engine()\n    connections = [\n        sync_postgres_engine.connect() for _ in range(sync_connections_to_warm_up)\n    ]\n    for conn in connections:\n        conn.execute(text(\"SELECT 1\"))\n    for conn in connections:\n        conn.close()\n\n    async_postgres_engine = get_sqlalchemy_async_engine()\n    async_connections = [\n        await async_postgres_engine.connect()\n        for _ in range(async_connections_to_warm_up)\n    ]\n    for async_conn in async_connections:\n        await async_conn.execute(text(\"SELECT 1\"))\n    for async_conn in async_connections:\n        await async_conn.close()\n"
  },
  {
    "path": "backend/onyx/db/engine/iam_auth.py",
    "content": "import functools\nimport os\nimport ssl\nfrom typing import Any\n\nimport boto3\n\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.configs.app_configs import USE_IAM_AUTH\nfrom onyx.configs.constants import SSL_CERT_FILE\n\n\ndef get_iam_auth_token(\n    host: str, port: str, user: str, region: str = \"us-east-2\"\n) -> str:\n    \"\"\"\n    Generate an IAM authentication token using boto3.\n    \"\"\"\n    client = boto3.client(\"rds\", region_name=region)\n    token = client.generate_db_auth_token(\n        DBHostname=host, Port=int(port), DBUsername=user\n    )\n    return token\n\n\ndef configure_psycopg2_iam_auth(\n    cparams: dict[str, Any], host: str, port: str, user: str, region: str\n) -> None:\n    \"\"\"\n    Configure cparams for psycopg2 with IAM token and SSL.\n    \"\"\"\n    token = get_iam_auth_token(host, port, user, region)\n    cparams[\"password\"] = token\n    cparams[\"sslmode\"] = \"require\"\n    cparams[\"sslrootcert\"] = SSL_CERT_FILE\n\n\ndef provide_iam_token(\n    dialect: Any,  # noqa: ARG001\n    conn_rec: Any,  # noqa: ARG001\n    cargs: Any,  # noqa: ARG001\n    cparams: Any,\n) -> None:\n    if USE_IAM_AUTH:\n        host = POSTGRES_HOST\n        port = POSTGRES_PORT\n        user = POSTGRES_USER\n        region = os.getenv(\"AWS_REGION_NAME\", \"us-east-2\")\n        # Configure for psycopg2 with IAM token\n        configure_psycopg2_iam_auth(cparams, host, port, user, region)\n\n\n@functools.cache\ndef create_ssl_context_if_iam() -> ssl.SSLContext | None:\n    \"\"\"Create an SSL context if IAM authentication is enabled, else return None.\"\"\"\n    if USE_IAM_AUTH:\n        return ssl.create_default_context(cafile=SSL_CERT_FILE)\n    return None\n"
  },
  {
    "path": "backend/onyx/db/engine/sql_engine.py",
    "content": "import os\nimport re\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import event\nfrom sqlalchemy import pool\nfrom sqlalchemy.engine import create_engine\nfrom sqlalchemy.engine import Engine\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DB_READONLY_PASSWORD\nfrom onyx.configs.app_configs import DB_READONLY_USER\nfrom onyx.configs.app_configs import LOG_POSTGRES_CONN_COUNTS\nfrom onyx.configs.app_configs import LOG_POSTGRES_LATENCY\nfrom onyx.configs.app_configs import POSTGRES_DB\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD\nfrom onyx.configs.app_configs import POSTGRES_POOL_PRE_PING\nfrom onyx.configs.app_configs import POSTGRES_POOL_RECYCLE\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USE_NULL_POOL\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME\nfrom onyx.db.engine.iam_auth import provide_iam_token\nfrom onyx.server.utils import BasicAuthenticationError\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\n# Moved is_valid_schema_name here to avoid circular import\n\n\nlogger = setup_logger()\n\n\n# Schema name validation (moved here to avoid circular import)\nSCHEMA_NAME_REGEX = re.compile(r\"^[a-zA-Z0-9_-]+$\")\n\n\ndef is_valid_schema_name(name: str) -> bool:\n    return SCHEMA_NAME_REGEX.match(name) is not None\n\n\nSYNC_DB_API = \"psycopg2\"\nASYNC_DB_API = \"asyncpg\"\n\n# why isn't this in configs?\nUSE_IAM_AUTH = os.getenv(\"USE_IAM_AUTH\", \"False\").lower() == \"true\"\n\n\ndef build_connection_string(\n    *,\n    db_api: str = ASYNC_DB_API,\n    user: str = POSTGRES_USER,\n    password: str = POSTGRES_PASSWORD,\n    host: str = POSTGRES_HOST,\n    port: str = POSTGRES_PORT,\n    db: str = POSTGRES_DB,\n    app_name: str | None = None,\n    use_iam_auth: bool = USE_IAM_AUTH,\n    region: str = \"us-west-2\",  # noqa: ARG001\n) -> str:\n    if use_iam_auth:\n        base_conn_str = f\"postgresql+{db_api}://{user}@{host}:{port}/{db}\"\n    else:\n        base_conn_str = f\"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}\"\n\n    # For asyncpg, do not include application_name in the connection string\n    if app_name and db_api != \"asyncpg\":\n        if \"?\" in base_conn_str:\n            return f\"{base_conn_str}&application_name={app_name}\"\n        else:\n            return f\"{base_conn_str}?application_name={app_name}\"\n    return base_conn_str\n\n\nif LOG_POSTGRES_LATENCY:\n\n    @event.listens_for(Engine, \"before_cursor_execute\")\n    def before_cursor_execute(  # type: ignore\n        conn,\n        cursor,  # noqa: ARG001\n        statement,  # noqa: ARG001\n        parameters,  # noqa: ARG001\n        context,  # noqa: ARG001\n        executemany,  # noqa: ARG001\n    ):\n        conn.info[\"query_start_time\"] = time.time()\n\n    @event.listens_for(Engine, \"after_cursor_execute\")\n    def after_cursor_execute(  # type: ignore\n        conn,\n        cursor,  # noqa: ARG001\n        statement,\n        parameters,  # noqa: ARG001\n        context,  # noqa: ARG001\n        executemany,  # noqa: ARG001\n    ):\n        total_time = time.time() - conn.info[\"query_start_time\"]\n        if total_time > 0.1:\n            logger.debug(\n                f\"Query Complete: {statement}\\n\\nTotal Time: {total_time:.4f} seconds\"\n            )\n\n\nif LOG_POSTGRES_CONN_COUNTS:\n    checkout_count = 0\n    checkin_count = 0\n\n    @event.listens_for(Engine, \"checkout\")\n    def log_checkout(dbapi_connection, connection_record, connection_proxy):  # type: ignore  # noqa: ARG001\n        global checkout_count\n        checkout_count += 1\n\n        active_connections = connection_proxy._pool.checkedout()\n        idle_connections = connection_proxy._pool.checkedin()\n        pool_size = connection_proxy._pool.size()\n        logger.debug(\n            \"Connection Checkout\\n\"\n            f\"Active Connections: {active_connections};\\n\"\n            f\"Idle: {idle_connections};\\n\"\n            f\"Pool Size: {pool_size};\\n\"\n            f\"Total connection checkouts: {checkout_count}\"\n        )\n\n    @event.listens_for(Engine, \"checkin\")\n    def log_checkin(dbapi_connection, connection_record):  # type: ignore  # noqa: ARG001\n        global checkin_count\n        checkin_count += 1\n        logger.debug(f\"Total connection checkins: {checkin_count}\")\n\n\nclass SqlEngine:\n    _engine: Engine | None = None\n    _readonly_engine: Engine | None = None\n    _lock: threading.Lock = threading.Lock()\n    _readonly_lock: threading.Lock = threading.Lock()\n    _app_name: str = POSTGRES_UNKNOWN_APP_NAME\n\n    @classmethod\n    def init_engine(\n        cls,\n        pool_size: int,\n        # is really `pool_max_overflow`, but calling it `max_overflow` to stay consistent with SQLAlchemy\n        max_overflow: int,\n        app_name: str | None = None,  # noqa: ARG003\n        db_api: str = SYNC_DB_API,\n        use_iam: bool = USE_IAM_AUTH,\n        connection_string: str | None = None,\n        **extra_engine_kwargs: Any,\n    ) -> None:\n        \"\"\"NOTE: enforce that pool_size and pool_max_overflow are passed in. These are\n        important args, and if incorrectly specified, we have run into hitting the pool\n        limit / using too many connections and overwhelming the database.\n\n        Specifying connection_string directly will cause some of the other parameters\n        to be ignored.\n        \"\"\"\n        with cls._lock:\n            if cls._engine:\n                return\n\n            if not connection_string:\n                connection_string = build_connection_string(\n                    db_api=db_api,\n                    app_name=cls._app_name + \"_sync\",\n                    use_iam_auth=use_iam,\n                )\n\n            # Start with base kwargs that are valid for all pool types\n            final_engine_kwargs: dict[str, Any] = {}\n\n            if POSTGRES_USE_NULL_POOL:\n                # if null pool is specified, then we need to make sure that\n                # we remove any passed in kwargs related to pool size that would\n                # cause the initialization to fail\n                final_engine_kwargs.update(extra_engine_kwargs)\n\n                final_engine_kwargs[\"poolclass\"] = pool.NullPool\n                if \"pool_size\" in final_engine_kwargs:\n                    del final_engine_kwargs[\"pool_size\"]\n                if \"max_overflow\" in final_engine_kwargs:\n                    del final_engine_kwargs[\"max_overflow\"]\n            else:\n                final_engine_kwargs[\"pool_size\"] = pool_size\n                final_engine_kwargs[\"max_overflow\"] = max_overflow\n                final_engine_kwargs[\"pool_pre_ping\"] = POSTGRES_POOL_PRE_PING\n                final_engine_kwargs[\"pool_recycle\"] = POSTGRES_POOL_RECYCLE\n\n                # any passed in kwargs override the defaults\n                final_engine_kwargs.update(extra_engine_kwargs)\n\n            logger.info(f\"Creating engine with kwargs: {final_engine_kwargs}\")\n            # echo=True here for inspecting all emitted db queries\n            engine = create_engine(connection_string, **final_engine_kwargs)\n\n            if use_iam:\n                event.listen(engine, \"do_connect\", provide_iam_token)\n\n            cls._engine = engine\n\n    @classmethod\n    def init_readonly_engine(\n        cls,\n        pool_size: int,\n        # is really `pool_max_overflow`, but calling it `max_overflow` to stay consistent with SQLAlchemy\n        max_overflow: int,\n        **extra_engine_kwargs: Any,\n    ) -> None:\n        \"\"\"NOTE: enforce that pool_size and pool_max_overflow are passed in. These are\n        important args, and if incorrectly specified, we have run into hitting the pool\n        limit / using too many connections and overwhelming the database.\"\"\"\n        with cls._readonly_lock:\n            if cls._readonly_engine:\n                return\n\n            if not DB_READONLY_USER or not DB_READONLY_PASSWORD:\n                raise ValueError(\n                    \"Custom database user credentials not configured in environment variables\"\n                )\n\n            # Build connection string with custom user\n            connection_string = build_connection_string(\n                user=DB_READONLY_USER,\n                password=DB_READONLY_PASSWORD,\n                use_iam_auth=False,  # Custom users typically don't use IAM auth\n                db_api=SYNC_DB_API,  # Explicitly use sync DB API\n            )\n\n            # Start with base kwargs that are valid for all pool types\n            final_engine_kwargs: dict[str, Any] = {}\n\n            if POSTGRES_USE_NULL_POOL:\n                # if null pool is specified, then we need to make sure that\n                # we remove any passed in kwargs related to pool size that would\n                # cause the initialization to fail\n                final_engine_kwargs.update(extra_engine_kwargs)\n\n                final_engine_kwargs[\"poolclass\"] = pool.NullPool\n                if \"pool_size\" in final_engine_kwargs:\n                    del final_engine_kwargs[\"pool_size\"]\n                if \"max_overflow\" in final_engine_kwargs:\n                    del final_engine_kwargs[\"max_overflow\"]\n            else:\n                final_engine_kwargs[\"pool_size\"] = pool_size\n                final_engine_kwargs[\"max_overflow\"] = max_overflow\n                final_engine_kwargs[\"pool_pre_ping\"] = POSTGRES_POOL_PRE_PING\n                final_engine_kwargs[\"pool_recycle\"] = POSTGRES_POOL_RECYCLE\n\n                # any passed in kwargs override the defaults\n                final_engine_kwargs.update(extra_engine_kwargs)\n\n            logger.info(f\"Creating engine with kwargs: {final_engine_kwargs}\")\n            # echo=True here for inspecting all emitted db queries\n            engine = create_engine(connection_string, **final_engine_kwargs)\n\n            if USE_IAM_AUTH:\n                event.listen(engine, \"do_connect\", provide_iam_token)\n\n            cls._readonly_engine = engine\n\n    @classmethod\n    def get_engine(cls) -> Engine:\n        if not cls._engine:\n            raise RuntimeError(\"Engine not initialized. Must call init_engine first.\")\n        return cls._engine\n\n    @classmethod\n    def get_readonly_engine(cls) -> Engine:\n        if not cls._readonly_engine:\n            raise RuntimeError(\n                \"Readonly engine not initialized. Must call init_readonly_engine first.\"\n            )\n        return cls._readonly_engine\n\n    @classmethod\n    def set_app_name(cls, app_name: str) -> None:\n        cls._app_name = app_name\n\n    @classmethod\n    def get_app_name(cls) -> str:\n        if not cls._app_name:\n            return \"\"\n        return cls._app_name\n\n    @classmethod\n    def reset_engine(cls) -> None:\n        with cls._lock:\n            if cls._engine:\n                cls._engine.dispose()\n                cls._engine = None\n\n    @classmethod\n    @contextmanager\n    def scoped_engine(cls, **init_kwargs: Any) -> Generator[None, None, None]:\n        \"\"\"Context manager that initializes the engine and guarantees cleanup.\"\"\"\n        cls.init_engine(**init_kwargs)\n        try:\n            yield\n        finally:\n            cls.reset_engine()\n\n\ndef get_sqlalchemy_engine() -> Engine:\n    return SqlEngine.get_engine()\n\n\ndef get_readonly_sqlalchemy_engine() -> Engine:\n    return SqlEngine.get_readonly_engine()\n\n\n@contextmanager\ndef get_session_with_current_tenant() -> Generator[Session, None, None]:\n    \"\"\"Standard way to get a DB session.\"\"\"\n    tenant_id = get_current_tenant_id()\n    with get_session_with_tenant(tenant_id=tenant_id) as session:\n        yield session\n\n\n@contextmanager\ndef get_session_with_current_tenant_if_none(\n    session: Session | None,\n) -> Generator[Session, None, None]:\n    if session is None:\n        tenant_id = get_current_tenant_id()\n        with get_session_with_tenant(tenant_id=tenant_id) as session:\n            yield session\n    else:\n        yield session\n\n\n# Used in multi tenant mode when need to refer to the shared `public` schema\n@contextmanager\ndef get_session_with_shared_schema() -> Generator[Session, None, None]:\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)\n    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as session:\n        yield session\n    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@contextmanager\ndef get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]:\n    \"\"\"\n    Generate a database session for a specific tenant.\n    \"\"\"\n    engine = get_sqlalchemy_engine()\n\n    if not is_valid_schema_name(tenant_id):\n        raise HTTPException(status_code=400, detail=\"Invalid tenant ID\")\n\n    # no need to use the schema translation map for self-hosted + default schema\n    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:\n        with Session(bind=engine, expire_on_commit=False) as session:\n            yield session\n        return\n\n    # Create connection with schema translation to handle querying the right schema\n    schema_translate_map = {None: tenant_id}\n    with engine.connect().execution_options(\n        schema_translate_map=schema_translate_map\n    ) as connection:\n        with Session(bind=connection, expire_on_commit=False) as session:\n            yield session\n\n\ndef get_session() -> Generator[Session, None, None]:\n    \"\"\"For use w/ Depends for FastAPI endpoints.\n\n    Has some additional validation, and likely should be merged\n    with get_session_with_current_tenant in the future.\"\"\"\n    tenant_id = get_current_tenant_id()\n    if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:\n        raise BasicAuthenticationError(detail=\"User must authenticate\")\n\n    if not is_valid_schema_name(tenant_id):\n        raise HTTPException(status_code=400, detail=\"Invalid tenant ID\")\n\n    with get_session_with_current_tenant() as db_session:\n        yield db_session\n\n\n@contextmanager\ndef get_db_readonly_user_session_with_current_tenant() -> (\n    Generator[Session, None, None]\n):\n    \"\"\"\n    Generate a database session using a custom database user for the current tenant.\n    The custom user credentials are obtained from environment variables.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n\n    readonly_engine = get_readonly_sqlalchemy_engine()\n\n    if not is_valid_schema_name(tenant_id):\n        raise HTTPException(status_code=400, detail=\"Invalid tenant ID\")\n\n    # no need to use the schema translation map for self-hosted + default schema\n    if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:\n        with Session(readonly_engine, expire_on_commit=False) as session:\n            yield session\n        return\n\n    schema_translate_map = {None: tenant_id}\n    with readonly_engine.connect().execution_options(\n        schema_translate_map=schema_translate_map\n    ) as connection:\n        with Session(bind=connection, expire_on_commit=False) as session:\n            yield session\n"
  },
  {
    "path": "backend/onyx/db/engine/tenant_utils.py",
    "content": "from sqlalchemy import text\n\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\n\ndef get_schemas_needing_migration(\n    tenant_schemas: list[str], head_rev: str\n) -> list[str]:\n    \"\"\"Return only schemas whose current alembic version is not at head.\n\n    Uses a server-side PL/pgSQL loop to collect each schema's alembic version\n    into a temp table one at a time. This avoids building a massive UNION ALL\n    query (which locks the DB and times out at 17k+ schemas) and instead\n    acquires locks sequentially, one schema per iteration.\n    \"\"\"\n    if not tenant_schemas:\n        return []\n\n    engine = SqlEngine.get_engine()\n\n    with engine.connect() as conn:\n        # Populate a temp input table with exactly the schemas we care about.\n        # The DO block reads from this table so it only iterates the requested\n        # schemas instead of every tenant_% schema in the database.\n        conn.execute(text(\"DROP TABLE IF EXISTS _alembic_version_snapshot\"))\n        conn.execute(text(\"DROP TABLE IF EXISTS _tenant_schemas_input\"))\n        conn.execute(text(\"CREATE TEMP TABLE _tenant_schemas_input (schema_name text)\"))\n        conn.execute(\n            text(\n                \"INSERT INTO _tenant_schemas_input (schema_name) SELECT unnest(CAST(:schemas AS text[]))\"\n            ),\n            {\"schemas\": tenant_schemas},\n        )\n        conn.execute(\n            text(\n                \"CREATE TEMP TABLE _alembic_version_snapshot (schema_name text, version_num text)\"\n            )\n        )\n\n        conn.execute(\n            text(\n                \"\"\"\n                DO $$\n                DECLARE\n                    s        text;\n                    schemas  text[];\n                BEGIN\n                    SELECT array_agg(schema_name) INTO schemas\n                    FROM _tenant_schemas_input;\n\n                    IF schemas IS NULL THEN\n                        RAISE NOTICE 'No tenant schemas found.';\n                        RETURN;\n                    END IF;\n\n                    FOREACH s IN ARRAY schemas LOOP\n                        BEGIN\n                            EXECUTE format(\n                                'INSERT INTO _alembic_version_snapshot\n                                 SELECT %L, version_num FROM %I.alembic_version',\n                                s, s\n                            );\n                        EXCEPTION\n                            -- undefined_table: schema exists but has no alembic_version\n                            --   table yet (new tenant, not yet migrated).\n                            -- invalid_schema_name: tenant is registered but its\n                            --   PostgreSQL schema does not exist yet (e.g. provisioning\n                            --   incomplete). Both cases mean no version is available and\n                            --   the schema will be included in the migration list.\n                            WHEN undefined_table THEN NULL;\n                            WHEN invalid_schema_name THEN NULL;\n                        END;\n                    END LOOP;\n                END;\n                $$\n                \"\"\"\n            )\n        )\n\n        rows = conn.execute(\n            text(\"SELECT schema_name, version_num FROM _alembic_version_snapshot\")\n        )\n        version_by_schema = {row[0]: row[1] for row in rows}\n\n        conn.execute(text(\"DROP TABLE IF EXISTS _alembic_version_snapshot\"))\n        conn.execute(text(\"DROP TABLE IF EXISTS _tenant_schemas_input\"))\n\n    # Schemas missing from the snapshot have no alembic_version table yet and\n    # also need migration. version_by_schema.get(s) returns None for those,\n    # and None != head_rev, so they are included automatically.\n    return [s for s in tenant_schemas if version_by_schema.get(s) != head_rev]\n\n\ndef get_all_tenant_ids() -> list[str]:\n    \"\"\"Returning [None] means the only tenant is the 'public' or self hosted tenant.\"\"\"\n\n    tenant_ids: list[str]\n\n    if not MULTI_TENANT:\n        return [POSTGRES_DEFAULT_SCHEMA]\n\n    with get_session_with_shared_schema() as session:\n        result = session.execute(\n            text(\n                f\"\"\"\n                SELECT schema_name\n                FROM information_schema.schemata\n                WHERE schema_name NOT IN ('pg_catalog', 'information_schema', '{POSTGRES_DEFAULT_SCHEMA}')\"\"\"\n            )\n        )\n        tenant_ids = [row[0] for row in result]\n\n    valid_tenants = [\n        tenant\n        for tenant in tenant_ids\n        if tenant is None or tenant.startswith(TENANT_ID_PREFIX)\n    ]\n    return valid_tenants\n"
  },
  {
    "path": "backend/onyx/db/engine/time_utils.py",
    "content": "from datetime import datetime\n\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\n\ndef get_db_current_time(db_session: Session) -> datetime:\n    result = db_session.execute(text(\"SELECT NOW()\")).scalar()\n    if result is None:\n        raise ValueError(\"Database did not return a time\")\n    return result\n"
  },
  {
    "path": "backend/onyx/db/entities.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import List\n\nfrom sqlalchemy import func\nfrom sqlalchemy import literal\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.dialects.postgresql import JSONB\nfrom sqlalchemy.orm import Session\n\nimport onyx.db.document as dbdocument\nfrom onyx.db.entity_type import UNGROUNDED_SOURCE_NAME\nfrom onyx.db.models import Document\nfrom onyx.db.models import KGEntity\nfrom onyx.db.models import KGEntityExtractionStaging\nfrom onyx.db.models import KGEntityType\nfrom onyx.kg.models import KGGroundingType\nfrom onyx.kg.models import KGStage\nfrom onyx.kg.utils.formatting_utils import make_entity_id\n\n\ndef upsert_staging_entity(\n    db_session: Session,\n    name: str,\n    entity_type: str,\n    document_id: str | None = None,\n    occurrences: int = 1,\n    attributes: dict[str, str] | None = None,\n    event_time: datetime | None = None,\n) -> KGEntityExtractionStaging:\n    \"\"\"Add or update a new staging entity to the database.\n\n    Args:\n        db_session: SQLAlchemy session\n        name: Name of the entity\n        entity_type: Type of the entity (must match an existing KGEntityType)\n        document_id: ID of the document the entity belongs to\n        occurrences: Number of times this entity has been found\n        attributes: Attributes of the entity\n        event_time: Time the entity was added to the database\n\n    Returns:\n        KGEntityExtractionStaging: The created entity\n    \"\"\"\n    entity_type = entity_type.upper()\n    name = name.title()\n    id_name = make_entity_id(entity_type, name)\n    attributes = attributes or {}\n\n    entity_key = attributes.get(\"key\")\n    entity_parent = attributes.get(\"parent\")\n\n    keep_attributes = {\n        attr_key: attr_val\n        for attr_key, attr_val in attributes.items()\n        if attr_key not in (\"key\", \"parent\")\n    }\n\n    # Create new entity\n    stmt = (\n        pg_insert(KGEntityExtractionStaging)\n        .values(\n            id_name=id_name,\n            name=name,\n            entity_type_id_name=entity_type,\n            entity_key=entity_key,\n            parent_key=entity_parent,\n            document_id=document_id,\n            occurrences=occurrences,\n            attributes=keep_attributes,\n            event_time=event_time,\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\"],\n            set_=dict(\n                occurrences=KGEntityExtractionStaging.occurrences + occurrences,\n            ),\n        )\n        .returning(KGEntityExtractionStaging)\n    )\n\n    result = db_session.execute(stmt).scalar()\n    if result is None:\n        raise RuntimeError(\n            f\"Failed to create or increment staging entity with id_name: {id_name}\"\n        )\n\n    # Update the document's kg_stage if document_id is provided\n    if document_id is not None:\n        db_session.query(Document).filter(Document.id == document_id).update(\n            {\n                \"kg_stage\": KGStage.EXTRACTED,\n                \"kg_processing_time\": datetime.now(timezone.utc),\n            }\n        )\n    db_session.flush()\n\n    return result\n\n\ndef transfer_entity(\n    db_session: Session,\n    entity: KGEntityExtractionStaging,\n) -> KGEntity:\n    \"\"\"Transfer an entity from the extraction staging table to the normalized table.\n\n    Args:\n        db_session: SQLAlchemy session\n        entity: Entity to transfer\n\n    Returns:\n        KGEntity: The transferred entity\n    \"\"\"\n    # Create the transferred entity\n    stmt = (\n        pg_insert(KGEntity)\n        .values(\n            id_name=make_entity_id(entity.entity_type_id_name, uuid.uuid4().hex[:20]),\n            name=entity.name.casefold(),\n            entity_key=entity.entity_key,\n            parent_key=entity.parent_key,\n            alternative_names=entity.alternative_names or [],\n            entity_type_id_name=entity.entity_type_id_name,\n            document_id=entity.document_id,\n            occurrences=entity.occurrences,\n            attributes=entity.attributes,\n            event_time=entity.event_time,\n        )\n        .on_conflict_do_update(\n            index_elements=[\"name\", \"entity_type_id_name\", \"document_id\"],\n            set_=dict(\n                occurrences=KGEntity.occurrences + entity.occurrences,\n                attributes=KGEntity.attributes.op(\"||\")(\n                    literal(entity.attributes, JSONB)\n                ),\n                entity_key=func.coalesce(KGEntity.entity_key, entity.entity_key),\n                parent_key=func.coalesce(KGEntity.parent_key, entity.parent_key),\n                event_time=entity.event_time,\n                time_updated=datetime.now(),\n            ),\n        )\n        .returning(KGEntity)\n    )\n    new_entity = db_session.execute(stmt).scalar()\n    if new_entity is None:\n        raise RuntimeError(f\"Failed to transfer entity with id_name: {entity.id_name}\")\n\n    # Update the document's kg_stage if document_id is provided\n    if entity.document_id is not None:\n        dbdocument.update_document_kg_info(\n            db_session,\n            document_id=entity.document_id,\n            kg_stage=KGStage.NORMALIZED,\n        )\n\n    # Update transferred\n    db_session.query(KGEntityExtractionStaging).filter(\n        KGEntityExtractionStaging.id_name == entity.id_name\n    ).update({\"transferred_id_name\": new_entity.id_name})\n    db_session.flush()\n\n    return new_entity\n\n\ndef merge_entities(\n    db_session: Session, parent: KGEntity, child: KGEntityExtractionStaging\n) -> KGEntity:\n    \"\"\"Merge an entity from the extraction staging table into\n    an existing entity in the normalized table.\n\n    Args:\n        db_session: SQLAlchemy session\n        parent: Parent entity to merge into\n        child: Child staging entity to merge\n\n    Returns:\n        KGEntity: The merged entity\n    \"\"\"\n    # check we're not merging two entities with different document_ids\n    if (\n        parent.document_id is not None\n        and child.document_id is not None\n        and parent.document_id != child.document_id\n    ):\n        raise ValueError(\n            \"Overwriting the document_id of an entity with a document_id already is not allowed\"\n        )\n\n    # update the parent entity (only document_id, alternative_names, occurrences)\n    setting_doc = parent.document_id is None and child.document_id is not None\n    document_id = child.document_id if setting_doc else parent.document_id\n    alternative_names = set(parent.alternative_names or [])\n    alternative_names.update(child.alternative_names or [])\n    alternative_names.add(child.name.lower())\n    alternative_names.discard(parent.name)\n\n    stmt = (\n        update(KGEntity)\n        .where(KGEntity.id_name == parent.id_name)\n        .values(\n            document_id=document_id,\n            alternative_names=list(alternative_names),\n            occurrences=parent.occurrences + child.occurrences,\n            attributes=parent.attributes | child.attributes,\n            entity_key=parent.entity_key or child.entity_key,\n            parent_key=parent.parent_key or child.parent_key,\n        )\n        .returning(KGEntity)\n    )\n\n    result = db_session.execute(stmt).scalar()\n    if result is None:\n        raise RuntimeError(f\"Failed to merge entities with id_name: {parent.id_name}\")\n\n    # Update the document's kg_stage if document_id is set\n    if setting_doc and child.document_id is not None:\n        dbdocument.update_document_kg_info(\n            db_session,\n            document_id=child.document_id,\n            kg_stage=KGStage.NORMALIZED,\n        )\n\n    # Update transferred\n    db_session.query(KGEntityExtractionStaging).filter(\n        KGEntityExtractionStaging.id_name == child.id_name\n    ).update({\"transferred_id_name\": parent.id_name})\n    db_session.flush()\n\n    return result\n\n\ndef get_kg_entity_by_document(db: Session, document_id: str) -> KGEntity | None:\n    \"\"\"\n    Check if a document_id exists in the kg_entities table and return its id_name if found.\n\n    Args:\n        db: SQLAlchemy database session\n        document_id: The document ID to search for\n\n    Returns:\n        The id_name of the matching KGEntity if found, None otherwise\n    \"\"\"\n    query = select(KGEntity).where(KGEntity.document_id == document_id)\n    result = db.execute(query).scalar()\n    return result\n\n\ndef get_grounded_entities_by_types(\n    db_session: Session, entity_types: List[str], grounding: KGGroundingType\n) -> List[KGEntity]:\n    \"\"\"Get all entities matching an entity_type.\n\n    Args:\n        db_session: SQLAlchemy session\n        entity_types: List of entity types to filter by\n\n    Returns:\n        List of KGEntity objects belonging to the specified entity types\n    \"\"\"\n    return (\n        db_session.query(KGEntity)\n        .join(KGEntityType, KGEntity.entity_type_id_name == KGEntityType.id_name)\n        .filter(KGEntity.entity_type_id_name.in_(entity_types))\n        .filter(KGEntityType.grounding == grounding)\n        .all()\n    )\n\n\ndef get_document_id_for_entity(db_session: Session, entity_id_name: str) -> str | None:\n    \"\"\"Get the document ID associated with an entity.\n\n    Args:\n        db_session: SQLAlchemy database session\n        entity_id_name: The entity id_name to look up\n\n    Returns:\n        The document ID if found, None otherwise\n    \"\"\"\n    entity = (\n        db_session.query(KGEntity).filter(KGEntity.id_name == entity_id_name).first()\n    )\n    return entity.document_id if entity else None\n\n\ndef delete_from_kg_entities_extraction_staging__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"Delete entities from the extraction staging table.\"\"\"\n    db_session.query(KGEntityExtractionStaging).filter(\n        KGEntityExtractionStaging.document_id.in_(document_ids)\n    ).delete(synchronize_session=False)\n\n\ndef delete_from_kg_entities__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"Delete entities from the normalized table.\"\"\"\n    db_session.query(KGEntity).filter(KGEntity.document_id.in_(document_ids)).delete(\n        synchronize_session=False\n    )\n\n\ndef get_entity_name(db_session: Session, entity_id_name: str) -> str | None:\n    \"\"\"Get the name of an entity.\"\"\"\n    entity = (\n        db_session.query(KGEntity).filter(KGEntity.id_name == entity_id_name).first()\n    )\n    return entity.name if entity else None\n\n\ndef get_entity_stats_by_grounded_source_name(\n    db_session: Session,\n) -> dict[str, tuple[datetime, int]]:\n    \"\"\"\n    Returns a dict mapping each grounded_source_name to a tuple in which:\n        - the first element is the latest update time across all entities with the same entity-type\n        - the second element is the count of `KGEntity`s\n    \"\"\"\n    results = (\n        db_session.query(\n            KGEntityType.grounded_source_name,\n            func.count(KGEntity.id_name).label(\"entities_count\"),\n            func.max(KGEntity.time_updated).label(\"last_updated\"),\n        )\n        .join(KGEntityType, KGEntity.entity_type_id_name == KGEntityType.id_name)\n        .group_by(KGEntityType.grounded_source_name)\n        .all()\n    )\n\n    # `row.grounded_source_name` is NULLABLE in the database schema.\n    # Thus, for all \"ungrounded\" entity-types, we use a default name.\n    return {\n        (row.grounded_source_name or UNGROUNDED_SOURCE_NAME): (\n            row.last_updated,\n            row.entities_count,\n        )\n        for row in results\n    }\n"
  },
  {
    "path": "backend/onyx/db/entity_type.py",
    "content": "from collections import defaultdict\n\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.connector import fetch_unique_document_sources\nfrom onyx.db.document import DocumentSource\nfrom onyx.db.models import Connector\nfrom onyx.db.models import KGEntityType\nfrom onyx.kg.models import KGAttributeEntityOption\nfrom onyx.server.kg.models import EntityType\n\n\nUNGROUNDED_SOURCE_NAME = \"Ungrounded\"\n\n\ndef get_entity_types_with_grounded_source_name(\n    db_session: Session,\n) -> list[KGEntityType]:\n    \"\"\"Get all entity types that have non-null grounded_source_name.\n\n    Args:\n        db_session: SQLAlchemy session\n\n    Returns:\n        List of KGEntityType objects that have grounded_source_name defined\n    \"\"\"\n    return (\n        db_session.query(KGEntityType)\n        .filter(KGEntityType.grounded_source_name.isnot(None))\n        .all()\n    )\n\n\ndef get_entity_types(\n    db_session: Session,\n    active: bool | None = True,\n) -> list[KGEntityType]:\n    # Query the database for all distinct entity types\n\n    if active is None:\n        return db_session.query(KGEntityType).order_by(KGEntityType.id_name).all()\n\n    else:\n        return (\n            db_session.query(KGEntityType)\n            .filter(KGEntityType.active == active)\n            .order_by(KGEntityType.id_name)\n            .all()\n        )\n\n\ndef get_configured_entity_types(db_session: Session) -> dict[str, list[KGEntityType]]:\n    # get entity types from configured sources\n    configured_connector_sources = {\n        source.value.lower()\n        for source in fetch_unique_document_sources(db_session=db_session)\n    }\n    entity_types = (\n        db_session.query(KGEntityType)\n        .filter(KGEntityType.grounded_source_name.in_(configured_connector_sources))\n        .all()\n    )\n    entity_type_set = {et.id_name for et in entity_types}\n\n    # get implied entity types from those entity types\n    for et in entity_types:\n        for prop in et.parsed_attributes.metadata_attribute_conversion.values():\n            if prop.implication_property is None:\n                continue\n\n            implied_et = prop.implication_property.implied_entity_type\n            if implied_et == KGAttributeEntityOption.FROM_EMAIL:\n                if \"ACCOUNT\" not in entity_type_set:\n                    entity_type_set.add(\"ACCOUNT\")\n                if \"EMPLOYEE\" not in entity_type_set:\n                    entity_type_set.add(\"EMPLOYEE\")\n            elif isinstance(implied_et, str):\n                if implied_et not in entity_type_set:\n                    entity_type_set.add(implied_et)\n\n    ets = (\n        db_session.query(KGEntityType)\n        .filter(KGEntityType.id_name.in_(entity_type_set))\n        .all()\n    )\n\n    et_map = defaultdict(list)\n    for et in ets:\n        key = et.grounded_source_name or UNGROUNDED_SOURCE_NAME\n        et_map[key].append(et)\n\n    return et_map\n\n\ndef update_entity_types_and_related_connectors__commit(\n    db_session: Session, updates: list[EntityType]\n) -> None:\n    for upd in updates:\n        db_session.execute(\n            update(KGEntityType)\n            .where(KGEntityType.id_name == upd.name)\n            .values(\n                description=upd.description,\n                active=upd.active,\n            )\n        )\n    db_session.flush()\n\n    # Update connector sources\n\n    configured_entity_types = get_configured_entity_types(db_session=db_session)\n\n    active_entity_type_sources = {\n        et.grounded_source_name\n        for ets in configured_entity_types.values()\n        for et in ets\n        if et.active\n    }\n\n    # Update connectors that should be enabled\n    db_session.execute(\n        update(Connector)\n        .where(\n            Connector.source.in_(\n                [\n                    source\n                    for source in DocumentSource\n                    if source.value.lower() in active_entity_type_sources\n                ]\n            )\n        )\n        .where(~Connector.kg_processing_enabled)\n        .values(kg_processing_enabled=True)\n    )\n\n    # Update connectors that should be disabled\n    db_session.execute(\n        update(Connector)\n        .where(\n            Connector.source.in_(\n                [\n                    source\n                    for source in DocumentSource\n                    if source.value.lower() not in active_entity_type_sources\n                ]\n            )\n        )\n        .where(Connector.kg_processing_enabled)\n        .values(kg_processing_enabled=False)\n    )\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/enums.py",
    "content": "from __future__ import annotations\n\nfrom enum import Enum as PyEnum\nfrom typing import ClassVar\n\n\nclass AccountType(str, PyEnum):\n    \"\"\"\n    What kind of account this is — determines whether the user\n    enters the group-based permission system.\n\n    STANDARD + SERVICE_ACCOUNT → participate in group system\n    BOT, EXT_PERM_USER, ANONYMOUS → fixed behavior\n    \"\"\"\n\n    STANDARD = \"STANDARD\"\n    BOT = \"BOT\"\n    EXT_PERM_USER = \"EXT_PERM_USER\"\n    SERVICE_ACCOUNT = \"SERVICE_ACCOUNT\"\n    ANONYMOUS = \"ANONYMOUS\"\n\n    def is_web_login(self) -> bool:\n        \"\"\"Whether this account type supports interactive web login.\"\"\"\n        return self not in (\n            AccountType.BOT,\n            AccountType.EXT_PERM_USER,\n        )\n\n\nclass GrantSource(str, PyEnum):\n    \"\"\"How a permission grant was created.\"\"\"\n\n    USER = \"USER\"\n    SCIM = \"SCIM\"\n    SYSTEM = \"SYSTEM\"\n\n\nclass IndexingStatus(str, PyEnum):\n    NOT_STARTED = \"not_started\"\n    IN_PROGRESS = \"in_progress\"\n    SUCCESS = \"success\"\n    CANCELED = \"canceled\"\n    FAILED = \"failed\"\n    COMPLETED_WITH_ERRORS = \"completed_with_errors\"\n\n    def is_terminal(self) -> bool:\n        terminal_states = {\n            IndexingStatus.SUCCESS,\n            IndexingStatus.COMPLETED_WITH_ERRORS,\n            IndexingStatus.CANCELED,\n            IndexingStatus.FAILED,\n        }\n        return self in terminal_states\n\n    def is_successful(self) -> bool:\n        return (\n            self == IndexingStatus.SUCCESS\n            or self == IndexingStatus.COMPLETED_WITH_ERRORS\n        )\n\n\nclass PermissionSyncStatus(str, PyEnum):\n    \"\"\"Status enum for permission sync attempts\"\"\"\n\n    NOT_STARTED = \"not_started\"\n    IN_PROGRESS = \"in_progress\"\n    SUCCESS = \"success\"\n    CANCELED = \"canceled\"\n    FAILED = \"failed\"\n    COMPLETED_WITH_ERRORS = \"completed_with_errors\"\n\n    def is_terminal(self) -> bool:\n        terminal_states = {\n            PermissionSyncStatus.SUCCESS,\n            PermissionSyncStatus.COMPLETED_WITH_ERRORS,\n            PermissionSyncStatus.CANCELED,\n            PermissionSyncStatus.FAILED,\n        }\n        return self in terminal_states\n\n    def is_successful(self) -> bool:\n        return (\n            self == PermissionSyncStatus.SUCCESS\n            or self == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        )\n\n\nclass IndexingMode(str, PyEnum):\n    UPDATE = \"update\"\n    REINDEX = \"reindex\"\n\n\nclass ProcessingMode(str, PyEnum):\n    \"\"\"Determines how documents are processed after fetching.\"\"\"\n\n    REGULAR = \"REGULAR\"  # Full pipeline: chunk → embed → Vespa\n    FILE_SYSTEM = \"FILE_SYSTEM\"  # Write to file system only (JSON documents)\n    RAW_BINARY = \"RAW_BINARY\"  # Write raw binary to S3 (no text extraction)\n\n\nclass SyncType(str, PyEnum):\n    DOCUMENT_SET = \"document_set\"\n    USER_GROUP = \"user_group\"\n    CONNECTOR_DELETION = \"connector_deletion\"\n    PRUNING = \"pruning\"  # not really a sync, but close enough\n    EXTERNAL_PERMISSIONS = \"external_permissions\"\n    EXTERNAL_GROUP = \"external_group\"\n\n    def __str__(self) -> str:\n        return self.value\n\n\nclass SyncStatus(str, PyEnum):\n    IN_PROGRESS = \"in_progress\"\n    SUCCESS = \"success\"\n    FAILED = \"failed\"\n    CANCELED = \"canceled\"\n\n    def is_terminal(self) -> bool:\n        terminal_states = {\n            SyncStatus.SUCCESS,\n            SyncStatus.FAILED,\n        }\n        return self in terminal_states\n\n\nclass MCPAuthenticationType(str, PyEnum):\n    NONE = \"NONE\"\n    API_TOKEN = \"API_TOKEN\"\n    OAUTH = \"OAUTH\"\n    PT_OAUTH = \"PT_OAUTH\"  # Pass-Through OAuth\n\n\nclass MCPTransport(str, PyEnum):\n    \"\"\"MCP transport types\"\"\"\n\n    STDIO = \"STDIO\"  # TODO: currently unsupported, need to add a user guide for setup\n    SSE = \"SSE\"  # Server-Sent Events (deprecated but still used)\n    STREAMABLE_HTTP = \"STREAMABLE_HTTP\"  # Modern HTTP streaming\n\n\nclass MCPAuthenticationPerformer(str, PyEnum):\n    ADMIN = \"ADMIN\"\n    PER_USER = \"PER_USER\"\n\n\nclass MCPServerStatus(str, PyEnum):\n    CREATED = \"CREATED\"  # Server created, needs auth configuration\n    AWAITING_AUTH = \"AWAITING_AUTH\"  # Auth configured, pending user authentication\n    FETCHING_TOOLS = \"FETCHING_TOOLS\"  # Auth complete, fetching tools\n    CONNECTED = \"CONNECTED\"  # Fully configured and connected\n    DISCONNECTED = \"DISCONNECTED\"  # Server disconnected, but not deleted\n\n\n# Consistent with Celery task statuses\nclass TaskStatus(str, PyEnum):\n    PENDING = \"PENDING\"\n    STARTED = \"STARTED\"\n    SUCCESS = \"SUCCESS\"\n    FAILURE = \"FAILURE\"\n\n\nclass IndexModelStatus(str, PyEnum):\n    PAST = \"PAST\"\n    PRESENT = \"PRESENT\"\n    FUTURE = \"FUTURE\"\n\n    def is_current(self) -> bool:\n        return self == IndexModelStatus.PRESENT\n\n    def is_future(self) -> bool:\n        return self == IndexModelStatus.FUTURE\n\n\nclass ChatSessionSharedStatus(str, PyEnum):\n    PUBLIC = \"public\"\n    PRIVATE = \"private\"\n\n\nclass ConnectorCredentialPairStatus(str, PyEnum):\n    SCHEDULED = \"SCHEDULED\"\n    INITIAL_INDEXING = \"INITIAL_INDEXING\"\n    ACTIVE = \"ACTIVE\"\n    PAUSED = \"PAUSED\"\n    DELETING = \"DELETING\"\n    INVALID = \"INVALID\"\n\n    @classmethod\n    def active_statuses(cls) -> list[\"ConnectorCredentialPairStatus\"]:\n        return [\n            ConnectorCredentialPairStatus.ACTIVE,\n            ConnectorCredentialPairStatus.SCHEDULED,\n            ConnectorCredentialPairStatus.INITIAL_INDEXING,\n        ]\n\n    @classmethod\n    def indexable_statuses(self) -> list[\"ConnectorCredentialPairStatus\"]:\n        # Superset of active statuses for indexing model swaps\n        return self.active_statuses() + [\n            ConnectorCredentialPairStatus.PAUSED,\n        ]\n\n    def is_active(self) -> bool:\n        return self in self.active_statuses()\n\n\nclass AccessType(str, PyEnum):\n    PUBLIC = \"public\"\n    PRIVATE = \"private\"\n    SYNC = \"sync\"\n\n\nclass EmbeddingPrecision(str, PyEnum):\n    # matches vespa tensor type\n    # only support float / bfloat16 for now, since there's not a\n    # good reason to specify anything else\n    BFLOAT16 = \"bfloat16\"\n    FLOAT = \"float\"\n\n\nclass UserFileStatus(str, PyEnum):\n    PROCESSING = \"PROCESSING\"\n    INDEXING = \"INDEXING\"\n    COMPLETED = \"COMPLETED\"\n    SKIPPED = \"SKIPPED\"\n    FAILED = \"FAILED\"\n    CANCELED = \"CANCELED\"\n    DELETING = \"DELETING\"\n\n\nclass ThemePreference(str, PyEnum):\n    LIGHT = \"light\"\n    DARK = \"dark\"\n    SYSTEM = \"system\"\n\n\nclass DefaultAppMode(str, PyEnum):\n    AUTO = \"AUTO\"\n    CHAT = \"CHAT\"\n    SEARCH = \"SEARCH\"\n\n\nclass SwitchoverType(str, PyEnum):\n    REINDEX = \"reindex\"\n    ACTIVE_ONLY = \"active_only\"\n    INSTANT = \"instant\"\n\n\nclass OpenSearchDocumentMigrationStatus(str, PyEnum):\n    \"\"\"Status for Vespa to OpenSearch migration per document.\"\"\"\n\n    PENDING = \"pending\"\n    COMPLETED = \"completed\"\n    FAILED = \"failed\"\n    PERMANENTLY_FAILED = \"permanently_failed\"\n\n\nclass OpenSearchTenantMigrationStatus(str, PyEnum):\n    \"\"\"Status for tenant-level OpenSearch migration.\"\"\"\n\n    PENDING = \"pending\"\n    COMPLETED = \"completed\"\n\n\n# Onyx Build Mode Enums\nclass BuildSessionStatus(str, PyEnum):\n    ACTIVE = \"active\"\n    IDLE = \"idle\"\n\n\nclass SharingScope(str, PyEnum):\n    PRIVATE = \"private\"\n    PUBLIC_ORG = \"public_org\"\n    PUBLIC_GLOBAL = \"public_global\"\n\n\nclass SandboxStatus(str, PyEnum):\n    PROVISIONING = \"provisioning\"\n    RUNNING = \"running\"\n    SLEEPING = \"sleeping\"  # Pod terminated, snapshots saved to S3\n    TERMINATED = \"terminated\"\n    FAILED = \"failed\"\n\n    def is_active(self) -> bool:\n        \"\"\"Check if sandbox is in an active state (running).\"\"\"\n        return self == SandboxStatus.RUNNING\n\n    def is_terminal(self) -> bool:\n        \"\"\"Check if sandbox is in a terminal state.\"\"\"\n        return self in (SandboxStatus.TERMINATED, SandboxStatus.FAILED)\n\n    def is_sleeping(self) -> bool:\n        \"\"\"Check if sandbox is sleeping (pod terminated but can be restored).\"\"\"\n        return self == SandboxStatus.SLEEPING\n\n\nclass ArtifactType(str, PyEnum):\n    WEB_APP = \"web_app\"\n    PPTX = \"pptx\"\n    DOCX = \"docx\"\n    IMAGE = \"image\"\n    MARKDOWN = \"markdown\"\n    EXCEL = \"excel\"\n\n\nclass HierarchyNodeType(str, PyEnum):\n    \"\"\"Types of hierarchy nodes across different sources\"\"\"\n\n    # Generic\n    FOLDER = \"folder\"\n\n    # Root-level type\n    SOURCE = \"source\"  # Root node for a source (e.g., \"Google Drive\")\n\n    # Google Drive\n    SHARED_DRIVE = \"shared_drive\"\n    MY_DRIVE = \"my_drive\"\n\n    # Confluence\n    SPACE = \"space\"\n    PAGE = \"page\"  # Confluence pages can be both hierarchy nodes AND documents\n\n    # Jira\n    PROJECT = \"project\"\n\n    # Notion\n    DATABASE = \"database\"\n    WORKSPACE = \"workspace\"\n\n    # Sharepoint\n    SITE = \"site\"\n    DRIVE = \"drive\"  # Document library within a site\n\n    # Slack\n    CHANNEL = \"channel\"\n\n\nclass LLMModelFlowType(str, PyEnum):\n    CHAT = \"chat\"\n    VISION = \"vision\"\n    CONTEXTUAL_RAG = \"contextual_rag\"\n\n\nclass HookPoint(str, PyEnum):\n    DOCUMENT_INGESTION = \"document_ingestion\"\n    QUERY_PROCESSING = \"query_processing\"\n\n\nclass HookFailStrategy(str, PyEnum):\n    HARD = \"hard\"  # exception propagates, pipeline aborts\n    SOFT = \"soft\"  # log error, return original input, pipeline continues\n\n\nclass Permission(str, PyEnum):\n    \"\"\"\n    Permission tokens for group-based authorization.\n    19 tokens total. full_admin_panel_access is an override —\n    if present, any permission check passes.\n    \"\"\"\n\n    # Basic (auto-granted to every new group)\n    BASIC_ACCESS = \"basic\"\n\n    # Read tokens — implied only, never granted directly\n    READ_CONNECTORS = \"read:connectors\"\n    READ_DOCUMENT_SETS = \"read:document_sets\"\n    READ_AGENTS = \"read:agents\"\n    READ_USERS = \"read:users\"\n\n    # Add / Manage pairs\n    ADD_AGENTS = \"add:agents\"\n    MANAGE_AGENTS = \"manage:agents\"\n    MANAGE_DOCUMENT_SETS = \"manage:document_sets\"\n    ADD_CONNECTORS = \"add:connectors\"\n    MANAGE_CONNECTORS = \"manage:connectors\"\n    MANAGE_LLMS = \"manage:llms\"\n\n    # Toggle tokens\n    READ_AGENT_ANALYTICS = \"read:agent_analytics\"\n    MANAGE_ACTIONS = \"manage:actions\"\n    READ_QUERY_HISTORY = \"read:query_history\"\n    MANAGE_USER_GROUPS = \"manage:user_groups\"\n    CREATE_USER_API_KEYS = \"create:user_api_keys\"\n    CREATE_SERVICE_ACCOUNT_API_KEYS = \"create:service_account_api_keys\"\n    CREATE_SLACK_DISCORD_BOTS = \"create:slack_discord_bots\"\n\n    # Override — any permission check passes\n    FULL_ADMIN_PANEL_ACCESS = \"admin\"\n\n    # Permissions that are implied by other grants and must never be stored\n    # directly in the permission_grant table.\n    IMPLIED: ClassVar[frozenset[Permission]]\n\n\nPermission.IMPLIED = frozenset(\n    {\n        Permission.READ_CONNECTORS,\n        Permission.READ_DOCUMENT_SETS,\n        Permission.READ_AGENTS,\n        Permission.READ_USERS,\n    }\n)\n"
  },
  {
    "path": "backend/onyx/db/federated.py",
    "content": "from datetime import datetime\nfrom typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import FederatedConnector\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.models import FederatedConnectorOAuthToken\nfrom onyx.federated_connectors.factory import get_federated_connector\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef fetch_federated_connector_by_id(\n    federated_connector_id: int, db_session: Session\n) -> FederatedConnector | None:\n    \"\"\"Fetch a federated connector by its ID.\"\"\"\n    stmt = select(FederatedConnector).where(\n        FederatedConnector.id == federated_connector_id\n    )\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef fetch_all_federated_connectors(db_session: Session) -> list[FederatedConnector]:\n    \"\"\"Fetch all federated connectors with their OAuth tokens and document sets.\"\"\"\n    stmt = select(FederatedConnector).options(\n        selectinload(FederatedConnector.oauth_tokens),\n        selectinload(FederatedConnector.document_sets),\n    )\n    result = db_session.execute(stmt)\n    return list(result.scalars().all())\n\n\ndef fetch_all_federated_connectors_parallel() -> list[FederatedConnector]:\n    with get_session_with_current_tenant() as db_session:\n        return fetch_all_federated_connectors(db_session)\n\n\ndef validate_federated_connector_credentials(\n    source: FederatedConnectorSource,\n    credentials: dict[str, Any],\n) -> bool:\n    \"\"\"Validate credentials for a federated connector using the connector's validation logic.\"\"\"\n    try:\n        # the initialization will fail if the credentials are invalid\n        get_federated_connector(source, credentials)\n        return True\n    except Exception as e:\n        logger.error(f\"Error validating credentials for source {source}: {e}\")\n        return False\n\n\ndef create_federated_connector(\n    db_session: Session,\n    source: FederatedConnectorSource,\n    credentials: dict[str, Any],\n    config: dict[str, Any] | None = None,\n) -> FederatedConnector:\n    \"\"\"Create a new federated connector with credential and config validation.\"\"\"\n    # Validate credentials before creating\n    if not validate_federated_connector_credentials(source, credentials):\n        raise ValueError(\n            f\"Invalid credentials for federated connector source: {source}\"\n        )\n\n    # Validate config using connector-specific validation\n    if config:\n        try:\n            # Get connector instance to access validate_config method\n            connector = get_federated_connector(source, credentials)\n            if not connector.validate_config(config):\n                raise ValueError(\n                    f\"Invalid config for federated connector source: {source}\"\n                )\n        except Exception as e:\n            raise ValueError(f\"Config validation failed for {source}: {str(e)}\")\n\n    federated_connector = FederatedConnector(\n        source=source,\n        credentials=credentials,\n        config=config or {},\n    )\n    db_session.add(federated_connector)\n    db_session.commit()\n    return federated_connector\n\n\ndef update_federated_connector_oauth_token(\n    db_session: Session,\n    federated_connector_id: int,\n    user_id: UUID,\n    token: str,\n    expires_at: datetime | None = None,\n) -> FederatedConnectorOAuthToken:\n    \"\"\"Update or create OAuth token for a federated connector and user.\"\"\"\n    # First, try to find existing token for this user and connector\n    stmt = select(FederatedConnectorOAuthToken).where(\n        FederatedConnectorOAuthToken.federated_connector_id == federated_connector_id,\n        FederatedConnectorOAuthToken.user_id == user_id,\n    )\n    existing_token = db_session.execute(stmt).scalar_one_or_none()\n\n    if existing_token:\n        # Update existing token\n        existing_token.token = token  # type: ignore[assignment]\n        existing_token.expires_at = expires_at\n        db_session.commit()\n        return existing_token\n    else:\n        # Create new token\n        oauth_token = FederatedConnectorOAuthToken(\n            federated_connector_id=federated_connector_id,\n            user_id=user_id,\n            token=token,\n            expires_at=expires_at,\n        )\n        db_session.add(oauth_token)\n        db_session.commit()\n        return oauth_token\n\n\ndef get_federated_connector_oauth_token(\n    db_session: Session,\n    federated_connector_id: int,\n    user_id: UUID,\n) -> FederatedConnectorOAuthToken | None:\n    \"\"\"Get OAuth token for a federated connector and user.\"\"\"\n    stmt = select(FederatedConnectorOAuthToken).where(\n        FederatedConnectorOAuthToken.federated_connector_id == federated_connector_id,\n        FederatedConnectorOAuthToken.user_id == user_id,\n    )\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef list_federated_connector_oauth_tokens(\n    db_session: Session,\n    user_id: UUID,\n) -> list[FederatedConnectorOAuthToken]:\n    \"\"\"List all OAuth tokens for all federated connectors.\"\"\"\n    stmt = (\n        select(FederatedConnectorOAuthToken)\n        .where(\n            FederatedConnectorOAuthToken.user_id == user_id,\n        )\n        .options(\n            joinedload(FederatedConnectorOAuthToken.federated_connector),\n        )\n    )\n    result = db_session.scalars(stmt)\n    return list(result)\n\n\ndef create_federated_connector_document_set_mapping(\n    db_session: Session,\n    federated_connector_id: int,\n    document_set_id: int,\n    entities: dict[str, Any],\n) -> FederatedConnector__DocumentSet:\n    \"\"\"Create a mapping between federated connector and document set with entities.\"\"\"\n    mapping = FederatedConnector__DocumentSet(\n        federated_connector_id=federated_connector_id,\n        document_set_id=document_set_id,\n        entities=entities,\n    )\n    db_session.add(mapping)\n    db_session.commit()\n    return mapping\n\n\ndef update_federated_connector_document_set_entities(\n    db_session: Session,\n    federated_connector_id: int,\n    document_set_id: int,\n    entities: dict[str, Any],\n) -> FederatedConnector__DocumentSet | None:\n    \"\"\"Update entities for a federated connector document set mapping.\"\"\"\n    stmt = select(FederatedConnector__DocumentSet).where(\n        FederatedConnector__DocumentSet.federated_connector_id\n        == federated_connector_id,\n        FederatedConnector__DocumentSet.document_set_id == document_set_id,\n    )\n    mapping = db_session.execute(stmt).scalar_one_or_none()\n\n    if mapping:\n        mapping.entities = entities\n        db_session.commit()\n        return mapping\n\n    return None\n\n\ndef get_federated_connector_document_set_mappings(\n    db_session: Session,\n    federated_connector_id: int,\n) -> list[FederatedConnector__DocumentSet]:\n    \"\"\"Get all document set mappings for a federated connector.\"\"\"\n    stmt = select(FederatedConnector__DocumentSet).where(\n        FederatedConnector__DocumentSet.federated_connector_id == federated_connector_id\n    )\n    result = db_session.execute(stmt)\n    return list(result.scalars().all())\n\n\ndef delete_federated_connector_document_set_mapping(\n    db_session: Session,\n    federated_connector_id: int,\n    document_set_id: int,\n) -> bool:\n    \"\"\"Delete a federated connector document set mapping.\"\"\"\n    stmt = select(FederatedConnector__DocumentSet).where(\n        FederatedConnector__DocumentSet.federated_connector_id\n        == federated_connector_id,\n        FederatedConnector__DocumentSet.document_set_id == document_set_id,\n    )\n    mapping = db_session.execute(stmt).scalar_one_or_none()\n\n    if mapping:\n        db_session.delete(mapping)\n        db_session.commit()\n        return True\n\n    return False\n\n\ndef get_federated_connector_document_set_mappings_by_document_set_names(\n    db_session: Session,\n    document_set_names: list[str],\n) -> list[FederatedConnector__DocumentSet]:\n    \"\"\"Get all document set mappings for a federated connector by document set names.\"\"\"\n    stmt = (\n        select(FederatedConnector__DocumentSet)\n        .join(\n            DocumentSet,\n            FederatedConnector__DocumentSet.document_set_id == DocumentSet.id,\n        )\n        .options(joinedload(FederatedConnector__DocumentSet.federated_connector))\n        .where(DocumentSet.name.in_(document_set_names))\n    )\n    result = db_session.scalars(stmt)\n    # Use unique() because joinedload can cause duplicate rows\n    return list(result.unique())\n\n\ndef update_federated_connector(\n    db_session: Session,\n    federated_connector_id: int,\n    credentials: dict[str, Any] | None = None,\n    config: dict[str, Any] | None = None,\n) -> FederatedConnector | None:\n    \"\"\"Update a federated connector with credential and config validation.\"\"\"\n    federated_connector = fetch_federated_connector_by_id(\n        federated_connector_id, db_session\n    )\n    if not federated_connector:\n        return None\n\n    # Use provided credentials if updating them, otherwise use existing credentials\n    # This is needed to instantiate the connector for config validation when only config is being updated\n    creds_to_use = (\n        credentials\n        if credentials is not None\n        else (\n            federated_connector.credentials.get_value(apply_mask=False)\n            if federated_connector.credentials\n            else {}\n        )\n    )\n\n    if credentials is not None:\n        # Validate credentials before updating\n        if not validate_federated_connector_credentials(\n            federated_connector.source, credentials\n        ):\n            raise ValueError(\n                f\"Invalid credentials for federated connector source: {federated_connector.source}\"\n            )\n        federated_connector.credentials = credentials  # type: ignore[assignment]\n\n    if config is not None:\n        # Validate config using connector-specific validation\n        try:\n            # Get connector instance to access validate_config method\n            connector = get_federated_connector(\n                federated_connector.source, creds_to_use\n            )\n            if not connector.validate_config(config):\n                raise ValueError(\n                    f\"Invalid config for federated connector source: {federated_connector.source}\"\n                )\n        except Exception as e:\n            raise ValueError(\n                f\"Config validation failed for {federated_connector.source}: {str(e)}\"\n            )\n        federated_connector.config = config\n\n    db_session.commit()\n    return federated_connector\n\n\ndef delete_federated_connector(\n    db_session: Session,\n    federated_connector_id: int,\n) -> bool:\n    \"\"\"Delete a federated connector and all its related data.\"\"\"\n    federated_connector = fetch_federated_connector_by_id(\n        federated_connector_id, db_session\n    )\n    if not federated_connector:\n        return False\n\n    # Delete related OAuth tokens (cascade should handle this)\n    # Delete related document set mappings (cascade should handle this)\n    db_session.delete(federated_connector)\n    db_session.commit()\n    return True\n"
  },
  {
    "path": "backend/onyx/db/feedback.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import and_\nfrom sqlalchemy import asc\nfrom sqlalchemy import delete\nfrom sqlalchemy import desc\nfrom sqlalchemy import exists\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import SearchFeedbackType\nfrom onyx.db.chat import get_chat_message\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import ChatMessageFeedback\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document as DbDocument\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import DocumentRetrievalFeedback\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup__ConnectorCredentialPair\nfrom onyx.db.models import UserRole\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _fetch_db_doc_by_id(doc_id: str, db_session: Session) -> DbDocument:\n    stmt = select(DbDocument).where(DbDocument.id == doc_id)\n    result = db_session.execute(stmt)\n    doc = result.scalar_one_or_none()\n\n    if not doc:\n        raise ValueError(\"Invalid Document ID Provided\")\n\n    return doc\n\n\ndef _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:\n    if user.role == UserRole.ADMIN:\n        return stmt\n\n    stmt = stmt.distinct()\n    DocByCC = aliased(DocumentByConnectorCredentialPair)\n    CCPair = aliased(ConnectorCredentialPair)\n    UG__CCpair = aliased(UserGroup__ConnectorCredentialPair)\n    User__UG = aliased(User__UserGroup)\n\n    \"\"\"\n    Here we select documents by relation:\n    User -> User__UserGroup -> UserGroup__ConnectorCredentialPair ->\n    ConnectorCredentialPair -> DocumentByConnectorCredentialPair -> Document\n    \"\"\"\n    stmt = (\n        stmt.outerjoin(DocByCC, DocByCC.id == DbDocument.id)\n        .outerjoin(\n            CCPair,\n            and_(\n                CCPair.connector_id == DocByCC.connector_id,\n                CCPair.credential_id == DocByCC.credential_id,\n            ),\n        )\n        .outerjoin(UG__CCpair, UG__CCpair.cc_pair_id == CCPair.id)\n        .outerjoin(User__UG, User__UG.user_group_id == UG__CCpair.user_group_id)\n    )\n\n    \"\"\"\n    Filter Documents by:\n    - if the user is in the user_group that owns the object\n    - if the user is not a global_curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out objects that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all objects in the groups the user is a curator\n    for (as well as public objects as well)\n    \"\"\"\n\n    # Anonymous users only see public documents\n    if user.is_anonymous:\n        where_clause = CCPair.access_type == AccessType.PUBLIC\n        return stmt.where(where_clause)\n\n    where_clause = User__UG.user_id == user.id\n    if user.role == UserRole.CURATOR and get_editable:\n        where_clause &= User__UG.is_curator == True  # noqa: E712\n    if get_editable:\n        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)\n        where_clause &= (\n            ~exists()\n            .where(UG__CCpair.cc_pair_id == CCPair.id)\n            .where(~UG__CCpair.user_group_id.in_(user_groups))\n            .correlate(CCPair)\n        )\n    else:\n        where_clause |= CCPair.access_type == AccessType.PUBLIC\n\n    return stmt.where(where_clause)\n\n\ndef fetch_docs_ranked_by_boost_for_user(\n    db_session: Session,\n    user: User,\n    ascending: bool = False,\n    limit: int = 100,\n) -> list[DbDocument]:\n    order_func = asc if ascending else desc\n    stmt = select(DbDocument)\n\n    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=False)\n\n    stmt = stmt.order_by(\n        order_func(DbDocument.boost), order_func(DbDocument.semantic_id)\n    )\n    stmt = stmt.limit(limit)\n    result = db_session.execute(stmt)\n    doc_list = result.scalars().all()\n\n    return list(doc_list)\n\n\ndef update_document_boost_for_user(\n    db_session: Session,\n    document_id: str,\n    boost: int,\n    user: User,\n) -> None:\n    stmt = select(DbDocument).where(DbDocument.id == document_id)\n    stmt = _add_user_filters(stmt, user, get_editable=True)\n    result: DbDocument | None = db_session.execute(stmt).scalar_one_or_none()\n    if result is None:\n        raise HTTPException(\n            status_code=400, detail=\"Document is not editable by this user\"\n        )\n\n    result.boost = boost\n\n    # updating last_modified triggers sync\n    # TODO: Should this submit to the queue directly so that the UI can update?\n    result.last_modified = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef update_document_hidden_for_user(\n    db_session: Session,\n    document_id: str,\n    hidden: bool,\n    user: User,\n) -> None:\n    stmt = select(DbDocument).where(DbDocument.id == document_id)\n    stmt = _add_user_filters(stmt, user, get_editable=True)\n    result = db_session.execute(stmt).scalar_one_or_none()\n    if result is None:\n        raise HTTPException(\n            status_code=400, detail=\"Document is not editable by this user\"\n        )\n\n    result.hidden = hidden\n\n    # updating last_modified triggers sync\n    # TODO: Should this submit to the queue directly so that the UI can update?\n    result.last_modified = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef create_doc_retrieval_feedback(\n    message_id: int,\n    document_id: str,\n    document_rank: int,\n    db_session: Session,\n    clicked: bool = False,\n    feedback: SearchFeedbackType | None = None,\n) -> None:\n    \"\"\"Creates a new Document feedback row and updates the boost value in Postgres and Vespa\"\"\"\n    db_doc = _fetch_db_doc_by_id(document_id, db_session)\n\n    retrieval_feedback = DocumentRetrievalFeedback(\n        chat_message_id=message_id,\n        document_id=document_id,\n        document_rank=document_rank,\n        clicked=clicked,\n        feedback=feedback,\n    )\n\n    if feedback is not None:\n        if feedback == SearchFeedbackType.ENDORSE:\n            db_doc.boost += 1\n        elif feedback == SearchFeedbackType.REJECT:\n            db_doc.boost -= 1\n        elif feedback == SearchFeedbackType.HIDE:\n            db_doc.hidden = True\n        elif feedback == SearchFeedbackType.UNHIDE:\n            db_doc.hidden = False\n        else:\n            raise ValueError(\"Unhandled document feedback type\")\n\n    if feedback in [\n        SearchFeedbackType.ENDORSE,\n        SearchFeedbackType.REJECT,\n        SearchFeedbackType.HIDE,\n    ]:\n        # updating last_modified triggers sync\n        # TODO: Should this submit to the queue directly so that the UI can update?\n        db_doc.last_modified = datetime.now(timezone.utc)\n\n    db_session.add(retrieval_feedback)\n    db_session.commit()\n\n\ndef delete_document_feedback_for_documents__no_commit(\n    document_ids: list[str], db_session: Session\n) -> None:\n    \"\"\"NOTE: does not commit transaction so that this can be used as part of a\n    larger transaction block.\"\"\"\n    stmt = delete(DocumentRetrievalFeedback).where(\n        DocumentRetrievalFeedback.document_id.in_(document_ids)\n    )\n    db_session.execute(stmt)\n\n\ndef create_chat_message_feedback(\n    is_positive: bool | None,\n    feedback_text: str | None,\n    chat_message_id: int,\n    user_id: UUID | None,\n    db_session: Session,\n    # Slack user requested help from human\n    required_followup: bool | None = None,\n    predefined_feedback: str | None = None,  # Added predefined_feedback parameter\n) -> None:\n    if (\n        is_positive is None\n        and feedback_text is None\n        and required_followup is None\n        and predefined_feedback is None\n    ):\n        raise ValueError(\"No feedback provided\")\n\n    chat_message = get_chat_message(\n        chat_message_id=chat_message_id, user_id=user_id, db_session=db_session\n    )\n\n    if chat_message.message_type != MessageType.ASSISTANT:\n        raise ValueError(\"Can only provide feedback on LLM Outputs\")\n\n    message_feedback = ChatMessageFeedback(\n        chat_message_id=chat_message_id,\n        is_positive=is_positive,\n        feedback_text=feedback_text,\n        required_followup=required_followup,\n        predefined_feedback=predefined_feedback,\n    )\n\n    db_session.add(message_feedback)\n    db_session.commit()\n\n\ndef remove_chat_message_feedback(\n    chat_message_id: int,\n    user_id: UUID | None,\n    db_session: Session,\n) -> None:\n    \"\"\"Remove all feedback for a chat message.\"\"\"\n    chat_message = get_chat_message(\n        chat_message_id=chat_message_id, user_id=user_id, db_session=db_session\n    )\n\n    if chat_message.message_type != MessageType.ASSISTANT:\n        raise ValueError(\"Can only remove feedback from LLM Outputs\")\n\n    # Delete all feedback for this message\n    db_session.query(ChatMessageFeedback).filter(\n        ChatMessageFeedback.chat_message_id == chat_message_id\n    ).delete()\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/file_content.py",
    "content": "from sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import FileContent\n\n\ndef get_file_content_by_file_id(\n    file_id: str,\n    db_session: Session,\n) -> FileContent:\n    record = db_session.query(FileContent).filter_by(file_id=file_id).first()\n    if not record:\n        raise RuntimeError(\n            f\"File content for file_id {file_id} does not exist or was deleted\"\n        )\n    return record\n\n\ndef get_file_content_by_file_id_optional(\n    file_id: str,\n    db_session: Session,\n) -> FileContent | None:\n    return db_session.query(FileContent).filter_by(file_id=file_id).first()\n\n\ndef upsert_file_content(\n    file_id: str,\n    lobj_oid: int,\n    file_size: int,\n    db_session: Session,\n) -> FileContent:\n    \"\"\"Atomic upsert using INSERT ... ON CONFLICT DO UPDATE to avoid\n    race conditions when concurrent calls target the same file_id.\"\"\"\n    stmt = insert(FileContent).values(\n        file_id=file_id,\n        lobj_oid=lobj_oid,\n        file_size=file_size,\n    )\n    stmt = stmt.on_conflict_do_update(\n        index_elements=[FileContent.file_id],\n        set_={\n            \"lobj_oid\": stmt.excluded.lobj_oid,\n            \"file_size\": stmt.excluded.file_size,\n        },\n    )\n    db_session.execute(stmt)\n\n    # Return the merged ORM instance so callers can inspect the result\n    return db_session.get(FileContent, file_id)  # type: ignore[return-value]\n\n\ndef transfer_file_content_file_id(\n    old_file_id: str,\n    new_file_id: str,\n    db_session: Session,\n) -> None:\n    \"\"\"Move a file_content row from old_file_id to new_file_id in-place.\n\n    This avoids creating a duplicate row that shares the same Large Object OID,\n    keeping OID ownership unique at all times.  The caller must ensure that\n    new_file_id already exists in file_record (FK target).\"\"\"\n    rows = (\n        db_session.query(FileContent)\n        .filter_by(file_id=old_file_id)\n        .update({\"file_id\": new_file_id})\n    )\n    if not rows:\n        raise RuntimeError(\n            f\"File content for file_id {old_file_id} does not exist or was deleted\"\n        )\n\n\ndef delete_file_content_by_file_id(\n    file_id: str,\n    db_session: Session,\n) -> None:\n    db_session.query(FileContent).filter_by(file_id=file_id).delete()\n"
  },
  {
    "path": "backend/onyx/db/file_record.py",
    "content": "from sqlalchemy import and_\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.task_utils import QUERY_REPORT_NAME_PREFIX\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import FileType\nfrom onyx.db.models import FileRecord\n\n\ndef get_query_history_export_files(\n    db_session: Session,\n) -> list[FileRecord]:\n    return list(\n        db_session.scalars(\n            select(FileRecord).where(\n                and_(\n                    FileRecord.file_id.like(f\"{QUERY_REPORT_NAME_PREFIX}-%\"),\n                    FileRecord.file_type == FileType.CSV,\n                    FileRecord.file_origin == FileOrigin.QUERY_HISTORY_CSV,\n                )\n            )\n        )\n    )\n\n\ndef get_filerecord_by_file_id_optional(\n    file_id: str,\n    db_session: Session,\n) -> FileRecord | None:\n    return db_session.query(FileRecord).filter_by(file_id=file_id).first()\n\n\ndef get_filerecord_by_file_id(\n    file_id: str,\n    db_session: Session,\n) -> FileRecord:\n    filestore = db_session.query(FileRecord).filter_by(file_id=file_id).first()\n\n    if not filestore:\n        raise RuntimeError(f\"File by id {file_id} does not exist or was deleted\")\n\n    return filestore\n\n\ndef get_filerecord_by_prefix(\n    prefix: str,\n    db_session: Session,\n) -> list[FileRecord]:\n    if not prefix:\n        return db_session.query(FileRecord).all()\n    return (\n        db_session.query(FileRecord).filter(FileRecord.file_id.like(f\"{prefix}%\")).all()\n    )\n\n\ndef delete_filerecord_by_file_id(\n    file_id: str,\n    db_session: Session,\n) -> None:\n    db_session.query(FileRecord).filter_by(file_id=file_id).delete()\n\n\ndef upsert_filerecord(\n    file_id: str,\n    display_name: str,\n    file_origin: FileOrigin,\n    file_type: str,\n    bucket_name: str,\n    object_key: str,\n    db_session: Session,\n    file_metadata: dict | None = None,\n) -> FileRecord:\n    \"\"\"Atomic upsert using INSERT ... ON CONFLICT DO UPDATE to avoid\n    race conditions when concurrent calls target the same file_id.\"\"\"\n    stmt = insert(FileRecord).values(\n        file_id=file_id,\n        display_name=display_name,\n        file_origin=file_origin,\n        file_type=file_type,\n        file_metadata=file_metadata,\n        bucket_name=bucket_name,\n        object_key=object_key,\n    )\n    stmt = stmt.on_conflict_do_update(\n        index_elements=[FileRecord.file_id],\n        set_={\n            \"display_name\": stmt.excluded.display_name,\n            \"file_origin\": stmt.excluded.file_origin,\n            \"file_type\": stmt.excluded.file_type,\n            \"file_metadata\": stmt.excluded.file_metadata,\n            \"bucket_name\": stmt.excluded.bucket_name,\n            \"object_key\": stmt.excluded.object_key,\n        },\n    )\n    db_session.execute(stmt)\n\n    return db_session.get(FileRecord, file_id)  # type: ignore[return-value]\n"
  },
  {
    "path": "backend/onyx/db/hierarchy.py",
    "content": "\"\"\"CRUD operations for HierarchyNode.\"\"\"\n\nfrom collections import defaultdict\n\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.engine import CursorResult\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import HierarchyNode as PydanticHierarchyNode\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.models import Document\nfrom onyx.db.models import HierarchyNode\nfrom onyx.db.models import HierarchyNodeByConnectorCredentialPair\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n# Sources where hierarchy nodes can also be documents.\n# For these sources, pages/items can be both a hierarchy node (with children)\n# AND a document with indexed content. For example:\n# - Notion: Pages with child pages are hierarchy nodes, but also documents\n# - Confluence: Pages can have child pages and also contain content\n# Other sources like Google Drive have folders as hierarchy nodes, but folders\n# are not documents themselves.\nSOURCES_WITH_HIERARCHY_NODE_DOCUMENTS: set[DocumentSource] = {\n    DocumentSource.NOTION,\n    DocumentSource.CONFLUENCE,\n}\n\n\ndef _get_source_display_name(source: DocumentSource) -> str:\n    \"\"\"Get a human-readable display name for a source type.\"\"\"\n    return source.value.replace(\"_\", \" \").title()\n\n\ndef get_hierarchy_node_by_raw_id(\n    db_session: Session,\n    raw_node_id: str,\n    source: DocumentSource,\n) -> HierarchyNode | None:\n    \"\"\"Get a hierarchy node by its raw ID and source.\"\"\"\n    stmt = select(HierarchyNode).where(\n        HierarchyNode.raw_node_id == raw_node_id,\n        HierarchyNode.source == source,\n    )\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_source_hierarchy_node(\n    db_session: Session,\n    source: DocumentSource,\n) -> HierarchyNode | None:\n    \"\"\"Get the SOURCE-type root node for a given source.\"\"\"\n    stmt = select(HierarchyNode).where(\n        HierarchyNode.source == source,\n        HierarchyNode.node_type == HierarchyNodeType.SOURCE,\n    )\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef ensure_source_node_exists(\n    db_session: Session,\n    source: DocumentSource,\n    commit: bool = True,\n) -> HierarchyNode:\n    \"\"\"\n    Ensure that a SOURCE-type root node exists for the given source.\n\n    This function is idempotent - it will return the existing SOURCE node if one\n    exists, or create a new one if not.\n\n    The SOURCE node is the root of the hierarchy tree for a given source type\n    (e.g., \"Google Drive\", \"Confluence\"). All other hierarchy nodes for that\n    source should ultimately have this node as an ancestor.\n\n    For the SOURCE node:\n    - raw_node_id is set to the source name (e.g., \"google_drive\")\n    - parent_id is None (it's the root)\n    - display_name is a human-readable version (e.g., \"Google Drive\")\n\n    Args:\n        db_session: SQLAlchemy session\n        source: The document source type\n        commit: Whether to commit the transaction\n\n    Returns:\n        The existing or newly created SOURCE-type HierarchyNode\n    \"\"\"\n    # Try to get existing SOURCE node first\n    existing_node = get_source_hierarchy_node(db_session, source)\n    if existing_node:\n        return existing_node\n\n    # Create the SOURCE node\n    display_name = _get_source_display_name(source)\n\n    source_node = HierarchyNode(\n        raw_node_id=source.value,  # Use source name as raw_node_id\n        display_name=display_name,\n        link=None,\n        source=source,\n        node_type=HierarchyNodeType.SOURCE,\n        document_id=None,\n        parent_id=None,  # SOURCE nodes have no parent\n    )\n\n    db_session.add(source_node)\n\n    # Flush to get the ID and detect any race conditions\n    try:\n        db_session.flush()\n    except Exception:\n        # Race condition - another worker created it. Roll back and fetch.\n        db_session.rollback()\n        existing_node = get_source_hierarchy_node(db_session, source)\n        if existing_node:\n            return existing_node\n        # If still not found, re-raise the original exception\n        raise\n\n    if commit:\n        db_session.commit()\n\n    logger.info(\n        f\"Created SOURCE hierarchy node for {source.value}: id={source_node.id}, display_name={display_name}\"\n    )\n\n    return source_node\n\n\ndef resolve_parent_hierarchy_node_id(\n    db_session: Session,\n    raw_parent_id: str | None,\n    source: DocumentSource,\n) -> int | None:\n    \"\"\"\n    Resolve a raw_parent_id to a database HierarchyNode ID.\n\n    If raw_parent_id is None, returns the SOURCE node ID for backward compatibility.\n    If the parent node doesn't exist, returns the SOURCE node ID as fallback.\n    \"\"\"\n    if raw_parent_id is None:\n        # No parent specified - use the SOURCE node\n        source_node = get_source_hierarchy_node(db_session, source)\n        return source_node.id if source_node else None\n\n    parent_node = get_hierarchy_node_by_raw_id(db_session, raw_parent_id, source)\n    if parent_node:\n        return parent_node.id\n\n    # Parent not found - fall back to SOURCE node\n    logger.warning(\n        f\"Parent hierarchy node not found: raw_id={raw_parent_id}, source={source}. Falling back to SOURCE node.\"\n    )\n    source_node = get_source_hierarchy_node(db_session, source)\n    return source_node.id if source_node else None\n\n\ndef upsert_parents(\n    db_session: Session,\n    node: PydanticHierarchyNode,\n    source: DocumentSource,\n    node_by_id: dict[str, PydanticHierarchyNode],\n    done_ids: set[str],\n    is_connector_public: bool = False,\n) -> None:\n    \"\"\"\n    Upsert the parents of a hierarchy node.\n    \"\"\"\n    if (\n        node.node_type == HierarchyNodeType.SOURCE\n        or (node.raw_parent_id not in node_by_id)\n        or (node.raw_parent_id in done_ids)\n    ):\n        return\n    parent_node = node_by_id[node.raw_parent_id]\n    upsert_parents(\n        db_session,\n        parent_node,\n        source,\n        node_by_id,\n        done_ids,\n        is_connector_public=is_connector_public,\n    )\n    upsert_hierarchy_node(\n        db_session,\n        parent_node,\n        source,\n        commit=False,\n        is_connector_public=is_connector_public,\n    )\n    done_ids.add(parent_node.raw_node_id)\n\n\ndef upsert_hierarchy_node(\n    db_session: Session,\n    node: PydanticHierarchyNode,\n    source: DocumentSource,\n    commit: bool = True,\n    is_connector_public: bool = False,\n) -> HierarchyNode:\n    \"\"\"\n    Upsert a hierarchy node from a Pydantic model.\n\n    If a node with the same raw_node_id and source exists, updates it.\n    Otherwise, creates a new node.\n\n    Args:\n        db_session: SQLAlchemy session\n        node: The Pydantic hierarchy node to upsert\n        source: Document source type\n        commit: Whether to commit the transaction\n        is_connector_public: If True, the connector is public (organization-wide access)\n            and all hierarchy nodes should be marked as public regardless of their\n            external_access settings. This ensures nodes from public connectors are\n            accessible to all users.\n    \"\"\"\n    # Resolve parent_id from raw_parent_id\n    parent_id = (\n        None\n        if node.node_type == HierarchyNodeType.SOURCE\n        else resolve_parent_hierarchy_node_id(db_session, node.raw_parent_id, source)\n    )\n\n    # For public connectors, all nodes are public\n    # Otherwise, extract permission fields from external_access if present\n    if is_connector_public:\n        is_public = True\n        external_user_emails: list[str] | None = None\n        external_user_group_ids: list[str] | None = None\n    elif node.external_access:\n        is_public = node.external_access.is_public\n        external_user_emails = (\n            list(node.external_access.external_user_emails)\n            if node.external_access.external_user_emails\n            else None\n        )\n        external_user_group_ids = (\n            list(node.external_access.external_user_group_ids)\n            if node.external_access.external_user_group_ids\n            else None\n        )\n    else:\n        is_public = False\n        external_user_emails = None\n        external_user_group_ids = None\n\n    # Check if node already exists\n    existing_node = get_hierarchy_node_by_raw_id(db_session, node.raw_node_id, source)\n\n    if existing_node:\n        # Update existing node\n        existing_node.display_name = node.display_name\n        existing_node.link = node.link\n        existing_node.node_type = node.node_type\n        existing_node.parent_id = parent_id\n        # Update permission fields\n        existing_node.is_public = is_public\n        existing_node.external_user_emails = external_user_emails\n        existing_node.external_user_group_ids = external_user_group_ids\n        hierarchy_node = existing_node\n    else:\n        # Create new node\n        hierarchy_node = HierarchyNode(\n            raw_node_id=node.raw_node_id,\n            display_name=node.display_name,\n            link=node.link,\n            source=source,\n            node_type=node.node_type,\n            parent_id=parent_id,\n            is_public=is_public,\n            external_user_emails=external_user_emails,\n            external_user_group_ids=external_user_group_ids,\n        )\n        db_session.add(hierarchy_node)\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return hierarchy_node\n\n\ndef upsert_hierarchy_nodes_batch(\n    db_session: Session,\n    nodes: list[PydanticHierarchyNode],\n    source: DocumentSource,\n    commit: bool = True,\n    is_connector_public: bool = False,\n) -> list[HierarchyNode]:\n    \"\"\"\n    Batch upsert hierarchy nodes.\n\n    Note: This function requires that for each node passed in, all\n    its ancestors exist in either the database or elsewhere in the nodes list.\n    This function handles parent dependencies for you as long as that condition is met\n    (so you don't need to worry about parent nodes appearing before their children in the list).\n\n    Args:\n        db_session: SQLAlchemy session\n        nodes: List of Pydantic hierarchy nodes to upsert\n        source: Document source type\n        commit: Whether to commit the transaction\n        is_connector_public: If True, the connector is public (organization-wide access)\n            and all hierarchy nodes should be marked as public regardless of their\n            external_access settings.\n    \"\"\"\n    node_by_id = {}\n    for node in nodes:\n        if node.node_type != HierarchyNodeType.SOURCE:\n            node_by_id[node.raw_node_id] = node\n    done_ids = set[str]()\n\n    results = []\n    for node in nodes:\n        if node.raw_node_id in done_ids:\n            continue\n        upsert_parents(\n            db_session,\n            node,\n            source,\n            node_by_id,\n            done_ids,\n            is_connector_public=is_connector_public,\n        )\n        hierarchy_node = upsert_hierarchy_node(\n            db_session,\n            node,\n            source,\n            commit=False,\n            is_connector_public=is_connector_public,\n        )\n        done_ids.add(node.raw_node_id)\n        results.append(hierarchy_node)\n\n    if commit:\n        db_session.commit()\n\n    return results\n\n\ndef link_hierarchy_nodes_to_documents(\n    db_session: Session,\n    document_ids: list[str],\n    source: DocumentSource,\n    commit: bool = True,\n) -> int:\n    \"\"\"\n    Link hierarchy nodes to their corresponding documents.\n\n    For connectors like Notion and Confluence where pages can be both hierarchy nodes\n    AND documents, we need to set the document_id field on hierarchy nodes after the\n    documents are created. This is because hierarchy nodes are processed before documents,\n    and the FK constraint on document_id requires the document to exist first.\n\n    Args:\n        db_session: SQLAlchemy session\n        document_ids: List of document IDs that were just created/updated\n        source: The document source (e.g., NOTION, CONFLUENCE)\n        commit: Whether to commit the transaction\n\n    Returns:\n        Number of hierarchy nodes that were linked to documents\n    \"\"\"\n    # Skip for sources where hierarchy nodes cannot also be documents\n    if source not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS:\n        return 0\n\n    if not document_ids:\n        return 0\n\n    # Find hierarchy nodes where raw_node_id matches a document_id\n    # These are pages that are both hierarchy nodes and documents\n    stmt = select(HierarchyNode).where(\n        HierarchyNode.source == source,\n        HierarchyNode.raw_node_id.in_(document_ids),\n        HierarchyNode.document_id.is_(None),  # Only update if not already linked\n    )\n    nodes_to_update = list(db_session.execute(stmt).scalars().all())\n\n    # Update document_id for each matching node\n    for node in nodes_to_update:\n        node.document_id = node.raw_node_id\n\n    if commit:\n        db_session.commit()\n\n    if nodes_to_update:\n        logger.debug(\n            f\"Linked {len(nodes_to_update)} hierarchy nodes to documents for source {source.value}\"\n        )\n\n    return len(nodes_to_update)\n\n\ndef get_hierarchy_node_children(\n    db_session: Session,\n    parent_id: int,\n    limit: int = 100,\n    offset: int = 0,\n) -> list[HierarchyNode]:\n    \"\"\"Get children of a hierarchy node, paginated.\"\"\"\n    stmt = (\n        select(HierarchyNode)\n        .where(HierarchyNode.parent_id == parent_id)\n        .order_by(HierarchyNode.display_name)\n        .limit(limit)\n        .offset(offset)\n    )\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef get_hierarchy_node_by_id(\n    db_session: Session,\n    node_id: int,\n) -> HierarchyNode | None:\n    \"\"\"Get a hierarchy node by its database ID.\"\"\"\n    return db_session.get(HierarchyNode, node_id)\n\n\ndef get_root_hierarchy_nodes_for_source(\n    db_session: Session,\n    source: DocumentSource,\n) -> list[HierarchyNode]:\n    \"\"\"Get all root-level hierarchy nodes for a source (children of SOURCE node).\"\"\"\n    source_node = get_source_hierarchy_node(db_session, source)\n    if not source_node:\n        return []\n\n    return get_hierarchy_node_children(db_session, source_node.id)\n\n\ndef get_all_hierarchy_nodes_for_source(\n    db_session: Session,\n    source: DocumentSource,\n) -> list[HierarchyNode]:\n    \"\"\"\n    Get ALL hierarchy nodes for a given source.\n\n    This is used to populate the Redis cache. Returns all nodes including\n    the SOURCE-type root node.\n\n    Args:\n        db_session: SQLAlchemy session\n        source: The document source to get nodes for\n\n    Returns:\n        List of all HierarchyNode objects for the source\n    \"\"\"\n    stmt = select(HierarchyNode).where(HierarchyNode.source == source)\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef _get_accessible_hierarchy_nodes_for_source(\n    db_session: Session,\n    source: DocumentSource,\n    user_email: str,  # noqa: ARG001\n    external_group_ids: list[str],  # noqa: ARG001\n) -> list[HierarchyNode]:\n    \"\"\"\n    MIT version: Returns all hierarchy nodes for the source without permission filtering.\n\n    In the MIT version, permission checks are not performed on hierarchy nodes.\n    The EE version overrides this to apply permission filtering based on user\n    email and external group IDs.\n\n    Args:\n        db_session: SQLAlchemy session\n        source: Document source type\n        user_email: User's email (unused in MIT version)\n        external_group_ids: User's external group IDs (unused in MIT version)\n\n    Returns:\n        List of all HierarchyNode objects for the source\n    \"\"\"\n    stmt = select(HierarchyNode).where(HierarchyNode.source == source)\n    stmt = stmt.order_by(HierarchyNode.display_name)\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef get_accessible_hierarchy_nodes_for_source(\n    db_session: Session,\n    source: DocumentSource,\n    user_email: str,\n    external_group_ids: list[str],\n) -> list[HierarchyNode]:\n    \"\"\"\n    Get hierarchy nodes for a source that are accessible to the user.\n\n    Uses fetch_versioned_implementation to get the appropriate version:\n    - MIT version: Returns all nodes (no permission filtering)\n    - EE version: Filters based on user email and external group IDs\n    \"\"\"\n    versioned_fn = fetch_versioned_implementation(\n        \"onyx.db.hierarchy\", \"_get_accessible_hierarchy_nodes_for_source\"\n    )\n    return versioned_fn(db_session, source, user_email, external_group_ids)\n\n\ndef get_document_parent_hierarchy_node_ids(\n    db_session: Session,\n    document_ids: list[str],\n) -> dict[str, int | None]:\n    \"\"\"\n    Get the parent_hierarchy_node_id for multiple documents in a single query.\n\n    Args:\n        db_session: SQLAlchemy session\n        document_ids: List of document IDs to look up\n\n    Returns:\n        Dict mapping document_id -> parent_hierarchy_node_id (or None if not set)\n    \"\"\"\n\n    if not document_ids:\n        return {}\n\n    stmt = select(Document.id, Document.parent_hierarchy_node_id).where(\n        Document.id.in_(document_ids)\n    )\n    results = db_session.execute(stmt).all()\n\n    return {doc_id: parent_id for doc_id, parent_id in results}\n\n\ndef update_document_parent_hierarchy_nodes(\n    db_session: Session,\n    doc_parent_map: dict[str, int | None],\n    commit: bool = True,\n) -> int:\n    \"\"\"Bulk-update Document.parent_hierarchy_node_id for multiple documents.\n\n    Only updates rows whose current value differs from the desired value to\n    avoid unnecessary writes.\n\n    Args:\n        db_session: SQLAlchemy session\n        doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id\n        commit: Whether to commit the transaction\n\n    Returns:\n        Number of documents actually updated\n    \"\"\"\n    if not doc_parent_map:\n        return 0\n\n    doc_ids = list(doc_parent_map.keys())\n    existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)\n\n    by_parent: dict[int | None, list[str]] = defaultdict(list)\n    for doc_id, desired_parent_id in doc_parent_map.items():\n        current = existing.get(doc_id)\n        if current == desired_parent_id or doc_id not in existing:\n            continue\n        by_parent[desired_parent_id].append(doc_id)\n\n    updated = 0\n    for desired_parent_id, ids in by_parent.items():\n        db_session.query(Document).filter(Document.id.in_(ids)).update(\n            {Document.parent_hierarchy_node_id: desired_parent_id},\n            synchronize_session=False,\n        )\n        updated += len(ids)\n\n    if commit:\n        db_session.commit()\n    elif updated:\n        db_session.flush()\n\n    return updated\n\n\ndef update_hierarchy_node_permissions(\n    db_session: Session,\n    raw_node_id: str,\n    source: DocumentSource,\n    is_public: bool,\n    external_user_emails: list[str] | None,\n    external_user_group_ids: list[str] | None,\n    commit: bool = True,\n) -> bool:\n    \"\"\"\n    Update permissions for an existing hierarchy node.\n\n    This is used during permission sync to update folder permissions\n    without needing the full Pydantic HierarchyNode model.\n\n    Args:\n        db_session: SQLAlchemy session\n        raw_node_id: Raw node ID from the source system\n        source: Document source type\n        is_public: Whether the node is public\n        external_user_emails: List of user emails with access\n        external_user_group_ids: List of group IDs with access\n        commit: Whether to commit the transaction\n\n    Returns:\n        True if the node was found and updated, False if not found\n    \"\"\"\n    existing_node = get_hierarchy_node_by_raw_id(db_session, raw_node_id, source)\n\n    if not existing_node:\n        logger.warning(\n            f\"Hierarchy node not found for permission update: raw_node_id={raw_node_id}, source={source}\"\n        )\n        return False\n\n    existing_node.is_public = is_public\n    existing_node.external_user_emails = external_user_emails\n    existing_node.external_user_group_ids = external_user_group_ids\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return True\n\n\ndef upsert_hierarchy_node_cc_pair_entries(\n    db_session: Session,\n    hierarchy_node_ids: list[int],\n    connector_id: int,\n    credential_id: int,\n    commit: bool = True,\n) -> None:\n    \"\"\"Insert rows into HierarchyNodeByConnectorCredentialPair, ignoring conflicts.\n\n    This records that the given cc_pair \"owns\" these hierarchy nodes. Used by\n    indexing, pruning, and hierarchy-fetching paths.\n    \"\"\"\n    if not hierarchy_node_ids:\n        return\n\n    _M = HierarchyNodeByConnectorCredentialPair\n    stmt = pg_insert(_M).values(\n        [\n            {\n                _M.hierarchy_node_id: node_id,\n                _M.connector_id: connector_id,\n                _M.credential_id: credential_id,\n            }\n            for node_id in hierarchy_node_ids\n        ]\n    )\n    stmt = stmt.on_conflict_do_nothing()\n    db_session.execute(stmt)\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n\ndef remove_stale_hierarchy_node_cc_pair_entries(\n    db_session: Session,\n    connector_id: int,\n    credential_id: int,\n    live_hierarchy_node_ids: set[int],\n    commit: bool = True,\n) -> int:\n    \"\"\"Delete join-table rows for this cc_pair that are NOT in the live set.\n\n    If ``live_hierarchy_node_ids`` is empty ALL rows for the cc_pair are deleted\n    (i.e. the connector no longer has any hierarchy nodes). Callers that want a\n    no-op when there are no live nodes must guard before calling.\n\n    Returns the number of deleted rows.\n    \"\"\"\n    stmt = delete(HierarchyNodeByConnectorCredentialPair).where(\n        HierarchyNodeByConnectorCredentialPair.connector_id == connector_id,\n        HierarchyNodeByConnectorCredentialPair.credential_id == credential_id,\n    )\n    if live_hierarchy_node_ids:\n        stmt = stmt.where(\n            HierarchyNodeByConnectorCredentialPair.hierarchy_node_id.notin_(\n                live_hierarchy_node_ids\n            )\n        )\n\n    result: CursorResult = db_session.execute(stmt)  # type: ignore[assignment]\n    deleted = result.rowcount\n\n    if commit:\n        db_session.commit()\n    elif deleted:\n        db_session.flush()\n\n    return deleted\n\n\ndef delete_orphaned_hierarchy_nodes(\n    db_session: Session,\n    source: DocumentSource,\n    commit: bool = True,\n) -> list[str]:\n    \"\"\"Delete hierarchy nodes for a source that have zero cc_pair associations.\n\n    SOURCE-type nodes are excluded (they are synthetic roots).\n\n    Returns the list of raw_node_ids that were deleted (for cache eviction).\n    \"\"\"\n    # Find orphaned nodes: no rows in the join table\n    orphan_stmt = (\n        select(HierarchyNode.id, HierarchyNode.raw_node_id)\n        .outerjoin(\n            HierarchyNodeByConnectorCredentialPair,\n            HierarchyNode.id\n            == HierarchyNodeByConnectorCredentialPair.hierarchy_node_id,\n        )\n        .where(\n            HierarchyNode.source == source,\n            HierarchyNode.node_type != HierarchyNodeType.SOURCE,\n            HierarchyNodeByConnectorCredentialPair.hierarchy_node_id.is_(None),\n        )\n    )\n    orphans = db_session.execute(orphan_stmt).all()\n    if not orphans:\n        return []\n\n    orphan_ids = [row[0] for row in orphans]\n    deleted_raw_ids = [row[1] for row in orphans]\n\n    db_session.execute(delete(HierarchyNode).where(HierarchyNode.id.in_(orphan_ids)))\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return deleted_raw_ids\n\n\ndef reparent_orphaned_hierarchy_nodes(\n    db_session: Session,\n    source: DocumentSource,\n    commit: bool = True,\n) -> list[HierarchyNode]:\n    \"\"\"Re-parent hierarchy nodes whose parent_id is NULL to the SOURCE node.\n\n    After pruning deletes stale nodes, their former children get parent_id=NULL\n    via the SET NULL cascade. This function points them back to the SOURCE root.\n\n    Returns the reparented HierarchyNode objects (with updated parent_id)\n    so callers can refresh downstream caches.\n    \"\"\"\n    source_node = get_source_hierarchy_node(db_session, source)\n    if not source_node:\n        return []\n\n    stmt = select(HierarchyNode).where(\n        HierarchyNode.source == source,\n        HierarchyNode.parent_id.is_(None),\n        HierarchyNode.node_type != HierarchyNodeType.SOURCE,\n    )\n    orphans = list(db_session.execute(stmt).scalars().all())\n    if not orphans:\n        return []\n\n    for node in orphans:\n        node.parent_id = source_node.id\n\n    if commit:\n        db_session.commit()\n    else:\n        db_session.flush()\n\n    return orphans\n"
  },
  {
    "path": "backend/onyx/db/hook.py",
    "content": "import datetime\nfrom uuid import UUID\n\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.engine import CursorResult\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.constants import UNSET\nfrom onyx.db.constants import UnsetType\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.db.models import Hook\nfrom onyx.db.models import HookExecutionLog\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n\n# ── Hook CRUD ────────────────────────────────────────────────────────────\n\n\ndef get_hook_by_id(\n    *,\n    db_session: Session,\n    hook_id: int,\n    include_deleted: bool = False,\n    include_creator: bool = False,\n) -> Hook | None:\n    stmt = select(Hook).where(Hook.id == hook_id)\n    if not include_deleted:\n        stmt = stmt.where(Hook.deleted.is_(False))\n    if include_creator:\n        stmt = stmt.options(selectinload(Hook.creator))\n    return db_session.scalar(stmt)\n\n\ndef get_non_deleted_hook_by_hook_point(\n    *,\n    db_session: Session,\n    hook_point: HookPoint,\n    include_creator: bool = False,\n) -> Hook | None:\n    stmt = (\n        select(Hook).where(Hook.hook_point == hook_point).where(Hook.deleted.is_(False))\n    )\n    if include_creator:\n        stmt = stmt.options(selectinload(Hook.creator))\n    return db_session.scalar(stmt)\n\n\ndef get_hooks(\n    *,\n    db_session: Session,\n    include_deleted: bool = False,\n    include_creator: bool = False,\n) -> list[Hook]:\n    stmt = select(Hook)\n    if not include_deleted:\n        stmt = stmt.where(Hook.deleted.is_(False))\n    if include_creator:\n        stmt = stmt.options(selectinload(Hook.creator))\n    stmt = stmt.order_by(Hook.hook_point, Hook.created_at.desc())\n    return list(db_session.scalars(stmt).all())\n\n\ndef create_hook__no_commit(\n    *,\n    db_session: Session,\n    name: str,\n    hook_point: HookPoint,\n    endpoint_url: str | None = None,\n    api_key: str | None = None,\n    fail_strategy: HookFailStrategy,\n    timeout_seconds: float,\n    is_active: bool = False,\n    is_reachable: bool | None = None,\n    creator_id: UUID | None = None,\n) -> Hook:\n    \"\"\"Create a new hook for the given hook point.\n\n    At most one non-deleted hook per hook point is allowed. Raises\n    OnyxError(CONFLICT) if a hook already exists, including under concurrent\n    duplicate creates where the partial unique index fires an IntegrityError.\n    \"\"\"\n    existing = get_non_deleted_hook_by_hook_point(\n        db_session=db_session, hook_point=hook_point\n    )\n    if existing:\n        raise OnyxError(\n            OnyxErrorCode.CONFLICT,\n            f\"A hook for '{hook_point.value}' already exists (id={existing.id}).\",\n        )\n\n    hook = Hook(\n        name=name,\n        hook_point=hook_point,\n        endpoint_url=endpoint_url,\n        api_key=api_key,\n        fail_strategy=fail_strategy,\n        timeout_seconds=timeout_seconds,\n        is_active=is_active,\n        is_reachable=is_reachable,\n        creator_id=creator_id,\n    )\n    # Use a savepoint so that a failed insert only rolls back this operation,\n    # not the entire outer transaction.\n    savepoint = db_session.begin_nested()\n    try:\n        db_session.add(hook)\n        savepoint.commit()\n    except IntegrityError as exc:\n        savepoint.rollback()\n        if \"ix_hook_one_non_deleted_per_point\" in str(exc.orig):\n            raise OnyxError(\n                OnyxErrorCode.CONFLICT,\n                f\"A hook for '{hook_point.value}' already exists.\",\n            )\n        raise  # re-raise unrelated integrity errors (FK violations, etc.)\n    return hook\n\n\ndef update_hook__no_commit(\n    *,\n    db_session: Session,\n    hook_id: int,\n    name: str | None = None,\n    endpoint_url: str | None | UnsetType = UNSET,\n    api_key: str | None | UnsetType = UNSET,\n    fail_strategy: HookFailStrategy | None = None,\n    timeout_seconds: float | None = None,\n    is_active: bool | None = None,\n    is_reachable: bool | None = None,\n    include_creator: bool = False,\n) -> Hook:\n    \"\"\"Update hook fields.\n\n    Sentinel conventions:\n    - endpoint_url, api_key: pass UNSET to leave unchanged; pass None to clear.\n    - name, fail_strategy, timeout_seconds, is_active, is_reachable: pass None to leave unchanged.\n    \"\"\"\n    hook = get_hook_by_id(\n        db_session=db_session, hook_id=hook_id, include_creator=include_creator\n    )\n    if hook is None:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, f\"Hook with id {hook_id} not found.\")\n\n    if name is not None:\n        hook.name = name\n    if not isinstance(endpoint_url, UnsetType):\n        hook.endpoint_url = endpoint_url\n    if not isinstance(api_key, UnsetType):\n        hook.api_key = api_key  # type: ignore[assignment]  # EncryptedString coerces str → SensitiveValue at the ORM level\n    if fail_strategy is not None:\n        hook.fail_strategy = fail_strategy\n    if timeout_seconds is not None:\n        hook.timeout_seconds = timeout_seconds\n    if is_active is not None:\n        hook.is_active = is_active\n    if is_reachable is not None:\n        hook.is_reachable = is_reachable\n\n    db_session.flush()\n    return hook\n\n\ndef delete_hook__no_commit(\n    *,\n    db_session: Session,\n    hook_id: int,\n) -> None:\n    hook = get_hook_by_id(db_session=db_session, hook_id=hook_id)\n    if hook is None:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, f\"Hook with id {hook_id} not found.\")\n\n    hook.deleted = True\n    hook.is_active = False\n    db_session.flush()\n\n\n# ── HookExecutionLog CRUD ────────────────────────────────────────────────\n\n\ndef create_hook_execution_log__no_commit(\n    *,\n    db_session: Session,\n    hook_id: int,\n    is_success: bool,\n    error_message: str | None = None,\n    status_code: int | None = None,\n    duration_ms: int | None = None,\n) -> HookExecutionLog:\n    log = HookExecutionLog(\n        hook_id=hook_id,\n        is_success=is_success,\n        error_message=error_message,\n        status_code=status_code,\n        duration_ms=duration_ms,\n    )\n    db_session.add(log)\n    db_session.flush()\n    return log\n\n\ndef get_hook_execution_logs(\n    *,\n    db_session: Session,\n    hook_id: int,\n    limit: int,\n) -> list[HookExecutionLog]:\n    stmt = (\n        select(HookExecutionLog)\n        .where(HookExecutionLog.hook_id == hook_id)\n        .order_by(HookExecutionLog.created_at.desc())\n        .limit(limit)\n    )\n    return list(db_session.scalars(stmt).all())\n\n\ndef cleanup_old_execution_logs__no_commit(\n    *,\n    db_session: Session,\n    max_age_days: int,\n) -> int:\n    \"\"\"Delete execution logs older than max_age_days. Returns the number of rows deleted.\"\"\"\n    cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(\n        days=max_age_days\n    )\n    result: CursorResult = db_session.execute(  # type: ignore[assignment]\n        delete(HookExecutionLog)\n        .where(HookExecutionLog.created_at < cutoff)\n        .execution_options(synchronize_session=False)\n    )\n    return result.rowcount\n"
  },
  {
    "path": "backend/onyx/db/image_generation.py",
    "content": "from sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import ImageGenerationConfig\nfrom onyx.db.models import LLMProvider\nfrom onyx.db.models import ModelConfiguration\nfrom onyx.llm.utils import get_max_input_tokens\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Default image generation config constants\nDEFAULT_IMAGE_PROVIDER_ID = \"openai_gpt_image_1\"\nDEFAULT_IMAGE_MODEL_NAME = \"gpt-image-1\"\nDEFAULT_IMAGE_PROVIDER = \"openai\"\n\n\ndef create_image_generation_config__no_commit(\n    db_session: Session,\n    image_provider_id: str,\n    model_configuration_id: int,\n    is_default: bool = False,\n) -> ImageGenerationConfig:\n    \"\"\"Create a new image generation config.\"\"\"\n    # If setting as default, clear ALL existing defaults in a single atomic update\n    # This is more atomic than select-then-update pattern\n    if is_default:\n        db_session.execute(\n            update(ImageGenerationConfig)\n            .where(ImageGenerationConfig.is_default.is_(True))\n            .values(is_default=False)\n        )\n\n    new_config = ImageGenerationConfig(\n        image_provider_id=image_provider_id,\n        model_configuration_id=model_configuration_id,\n        is_default=is_default,\n    )\n    db_session.add(new_config)\n    db_session.flush()\n    return new_config\n\n\ndef get_all_image_generation_configs(\n    db_session: Session,\n) -> list[ImageGenerationConfig]:\n    \"\"\"Get all image generation configs.\n\n    Returns:\n        List of all ImageGenerationConfig objects\n    \"\"\"\n    stmt = select(ImageGenerationConfig)\n    return list(db_session.scalars(stmt).all())\n\n\ndef get_image_generation_config(\n    db_session: Session,\n    image_provider_id: str,\n) -> ImageGenerationConfig | None:\n    \"\"\"Get a single image generation config by image_provider_id with relationships loaded.\n\n    Args:\n        db_session: Database session\n        image_provider_id: The image provider ID (primary key)\n\n    Returns:\n        The ImageGenerationConfig or None if not found\n    \"\"\"\n    stmt = (\n        select(ImageGenerationConfig)\n        .where(ImageGenerationConfig.image_provider_id == image_provider_id)\n        .options(\n            selectinload(ImageGenerationConfig.model_configuration).selectinload(\n                ModelConfiguration.llm_provider\n            )\n        )\n    )\n    return db_session.scalar(stmt)\n\n\ndef get_default_image_generation_config(\n    db_session: Session,\n) -> ImageGenerationConfig | None:\n    \"\"\"Get the default image generation config.\n\n    Returns:\n        The default ImageGenerationConfig or None if not set\n    \"\"\"\n    stmt = (\n        select(ImageGenerationConfig)\n        .where(ImageGenerationConfig.is_default.is_(True))\n        .options(\n            selectinload(ImageGenerationConfig.model_configuration).selectinload(\n                ModelConfiguration.llm_provider\n            )\n        )\n    )\n    return db_session.scalar(stmt)\n\n\ndef set_default_image_generation_config(\n    db_session: Session,\n    image_provider_id: str,\n) -> None:\n    \"\"\"Set a config as the default (clears previous default).\n\n    Args:\n        db_session: Database session\n        image_provider_id: The image provider ID to set as default\n\n    Raises:\n        ValueError: If config not found\n    \"\"\"\n    # Get the config to set as default\n    new_default = db_session.get(ImageGenerationConfig, image_provider_id)\n    if not new_default:\n        raise ValueError(\n            f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\"\n        )\n\n    # Clear ALL existing defaults in a single atomic update\n    # This is more atomic than select-then-update pattern\n    db_session.execute(\n        update(ImageGenerationConfig)\n        .where(\n            ImageGenerationConfig.is_default.is_(True),\n            ImageGenerationConfig.image_provider_id != image_provider_id,\n        )\n        .values(is_default=False)\n    )\n\n    # Set new default\n    new_default.is_default = True\n    db_session.commit()\n\n\ndef unset_default_image_generation_config(\n    db_session: Session,\n    image_provider_id: str,\n) -> None:\n    \"\"\"Unset a config as the default.\"\"\"\n    config = db_session.get(ImageGenerationConfig, image_provider_id)\n    if not config:\n        raise ValueError(\n            f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\"\n        )\n    config.is_default = False\n    db_session.commit()\n\n\ndef delete_image_generation_config__no_commit(\n    db_session: Session,\n    image_provider_id: str,\n) -> None:\n    \"\"\"Delete an image generation config by image_provider_id.\"\"\"\n    config = db_session.get(ImageGenerationConfig, image_provider_id)\n    if not config:\n        raise ValueError(\n            f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\"\n        )\n\n    db_session.delete(config)\n    db_session.flush()\n\n\ndef create_default_image_gen_config_from_api_key(\n    db_session: Session,\n    api_key: str,\n    provider: str = DEFAULT_IMAGE_PROVIDER,\n    image_provider_id: str = DEFAULT_IMAGE_PROVIDER_ID,\n    model_name: str = DEFAULT_IMAGE_MODEL_NAME,\n) -> ImageGenerationConfig | None:\n    \"\"\"Create default image gen config using an API key directly.\n\n    This function is used during tenant provisioning to automatically create\n    a default image generation config when an OpenAI provider is configured.\n\n    Args:\n        db_session: Database session\n        api_key: API key for the LLM provider\n        provider: Provider name (default: openai)\n        image_provider_id: Static unique key for the config (default: openai_gpt_image_1)\n        model_name: Model name for image generation (default: gpt-image-1)\n\n    Returns:\n        The created ImageGenerationConfig, or None if:\n        - image_generation_config table already has records\n    \"\"\"\n    # Check if any image generation configs already exist (optimization to avoid work)\n    existing_configs = get_all_image_generation_configs(db_session)\n    if existing_configs:\n        logger.info(\"Image generation config already exists, skipping default creation\")\n        return None\n\n    try:\n        # Create new LLM provider for image generation\n        new_provider = LLMProvider(\n            name=f\"Image Gen - {image_provider_id}\",\n            provider=provider,\n            api_key=api_key,\n            api_base=None,\n            api_version=None,\n            deployment_name=None,\n            is_public=True,\n        )\n        db_session.add(new_provider)\n        db_session.flush()\n\n        # Create model configuration\n        max_input_tokens = get_max_input_tokens(\n            model_name=model_name,\n            model_provider=provider,\n        )\n\n        model_config = ModelConfiguration(\n            llm_provider_id=new_provider.id,\n            name=model_name,\n            is_visible=True,\n            max_input_tokens=max_input_tokens,\n        )\n        db_session.add(model_config)\n        db_session.flush()\n\n        # Create image generation config\n        config = create_image_generation_config__no_commit(\n            db_session=db_session,\n            image_provider_id=image_provider_id,\n            model_configuration_id=model_config.id,\n            is_default=True,\n        )\n\n        db_session.commit()\n\n        logger.info(f\"Created default image generation config: {image_provider_id}\")\n\n        return config\n\n    except Exception:\n        db_session.rollback()\n        logger.exception(\n            f\"Failed to create default image generation config {image_provider_id}\"\n        )\n        return None\n"
  },
  {
    "path": "backend/onyx/db/index_attempt.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import NamedTuple\nfrom typing import TYPE_CHECKING\nfrom typing import TypeVarTuple\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import desc\nfrom sqlalchemy import func\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexAttemptError\nfrom onyx.db.models import SearchSettings\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\n\nif TYPE_CHECKING:\n    from onyx.configs.constants import DocumentSource\n\n# from sqlalchemy.sql.selectable import Select\n\n# Comment out unused imports that cause mypy errors\n# from onyx.auth.models import UserRole\n# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS\n# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier\n# from onyx.db.engine import async_query_for_dms\n\nlogger = setup_logger()\n\n\ndef get_last_attempt_for_cc_pair(\n    cc_pair_id: int,\n    search_settings_id: int,\n    db_session: Session,\n) -> IndexAttempt | None:\n    return (\n        db_session.query(IndexAttempt)\n        .filter(\n            IndexAttempt.connector_credential_pair_id == cc_pair_id,\n            IndexAttempt.search_settings_id == search_settings_id,\n        )\n        .order_by(IndexAttempt.time_updated.desc())\n        .first()\n    )\n\n\ndef get_recent_completed_attempts_for_cc_pair(\n    cc_pair_id: int,\n    search_settings_id: int,\n    limit: int,\n    db_session: Session,\n) -> list[IndexAttempt]:\n    \"\"\"Most recent to least recent.\"\"\"\n    return (\n        db_session.query(IndexAttempt)\n        .filter(\n            IndexAttempt.connector_credential_pair_id == cc_pair_id,\n            IndexAttempt.search_settings_id == search_settings_id,\n            IndexAttempt.status.notin_(\n                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n            ),\n        )\n        .order_by(IndexAttempt.time_updated.desc())\n        .limit(limit)\n        .all()\n    )\n\n\ndef get_recent_attempts_for_cc_pair(\n    cc_pair_id: int,\n    search_settings_id: int,\n    limit: int,\n    db_session: Session,\n) -> list[IndexAttempt]:\n    \"\"\"Most recent to least recent.\"\"\"\n    return (\n        db_session.query(IndexAttempt)\n        .filter(\n            IndexAttempt.connector_credential_pair_id == cc_pair_id,\n            IndexAttempt.search_settings_id == search_settings_id,\n        )\n        .order_by(IndexAttempt.time_updated.desc())\n        .limit(limit)\n        .all()\n    )\n\n\ndef get_index_attempt(\n    db_session: Session,\n    index_attempt_id: int,\n    eager_load_cc_pair: bool = False,\n    eager_load_search_settings: bool = False,\n) -> IndexAttempt | None:\n    stmt = select(IndexAttempt).where(IndexAttempt.id == index_attempt_id)\n    if eager_load_cc_pair:\n        stmt = stmt.options(\n            joinedload(IndexAttempt.connector_credential_pair).joinedload(\n                ConnectorCredentialPair.connector\n            )\n        )\n        stmt = stmt.options(\n            joinedload(IndexAttempt.connector_credential_pair).joinedload(\n                ConnectorCredentialPair.credential\n            )\n        )\n    if eager_load_search_settings:\n        stmt = stmt.options(joinedload(IndexAttempt.search_settings))\n    return db_session.scalars(stmt).first()\n\n\ndef count_error_rows_for_index_attempt(\n    index_attempt_id: int,\n    db_session: Session,\n) -> int:\n    return (\n        db_session.query(IndexAttemptError)\n        .filter(IndexAttemptError.index_attempt_id == index_attempt_id)\n        .count()\n    )\n\n\ndef create_index_attempt(\n    connector_credential_pair_id: int,\n    search_settings_id: int,\n    db_session: Session,\n    from_beginning: bool = False,\n    celery_task_id: str | None = None,\n) -> int:\n    new_attempt = IndexAttempt(\n        connector_credential_pair_id=connector_credential_pair_id,\n        search_settings_id=search_settings_id,\n        from_beginning=from_beginning,\n        status=IndexingStatus.NOT_STARTED,\n        celery_task_id=celery_task_id,\n    )\n    db_session.add(new_attempt)\n    db_session.commit()\n\n    return new_attempt.id\n\n\ndef delete_index_attempt(db_session: Session, index_attempt_id: int) -> None:\n    index_attempt = get_index_attempt(db_session, index_attempt_id)\n    if index_attempt:\n        db_session.delete(index_attempt)\n        db_session.commit()\n\n\ndef mock_successful_index_attempt(\n    connector_credential_pair_id: int,\n    search_settings_id: int,\n    docs_indexed: int,\n    db_session: Session,\n) -> int:\n    \"\"\"Should not be used in any user triggered flows\"\"\"\n    db_time = func.now()\n    new_attempt = IndexAttempt(\n        connector_credential_pair_id=connector_credential_pair_id,\n        search_settings_id=search_settings_id,\n        from_beginning=True,\n        status=IndexingStatus.SUCCESS,\n        total_docs_indexed=docs_indexed,\n        new_docs_indexed=docs_indexed,\n        # Need this to be some convincing random looking value and it can't be 0\n        # or the indexing rate would calculate out to infinity\n        time_started=db_time - timedelta(seconds=1.92),\n        time_updated=db_time,\n    )\n    db_session.add(new_attempt)\n    db_session.commit()\n\n    return new_attempt.id\n\n\ndef get_in_progress_index_attempts(\n    connector_id: int | None,\n    db_session: Session,\n) -> list[IndexAttempt]:\n    stmt = select(IndexAttempt)\n    if connector_id is not None:\n        stmt = stmt.where(\n            IndexAttempt.connector_credential_pair.has(connector_id=connector_id)\n        )\n    stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS)\n\n    incomplete_attempts = db_session.scalars(stmt)\n    return list(incomplete_attempts.all())\n\n\ndef get_all_index_attempts_by_status(\n    status: IndexingStatus, db_session: Session\n) -> list[IndexAttempt]:\n    \"\"\"Returns index attempts with the given status.\n    Only recommend calling this with non-terminal states as the full list of\n    terminal statuses may be quite large.\n\n    Results are ordered by time_created (oldest to newest).\"\"\"\n    stmt = select(IndexAttempt)\n    stmt = stmt.where(IndexAttempt.status == status)\n    stmt = stmt.order_by(IndexAttempt.time_created)\n    new_attempts = db_session.scalars(stmt)\n    return list(new_attempts.all())\n\n\ndef transition_attempt_to_in_progress(\n    index_attempt_id: int,\n    db_session: Session,\n) -> IndexAttempt:\n    \"\"\"Locks the row when we try to update\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if attempt is None:\n            raise RuntimeError(\n                f\"Unable to find IndexAttempt for ID '{index_attempt_id}'\"\n            )\n\n        if attempt.status != IndexingStatus.NOT_STARTED:\n            raise RuntimeError(\n                f\"Indexing attempt with ID '{index_attempt_id}' is not in NOT_STARTED status. \"\n                f\"Current status is '{attempt.status}'.\"\n            )\n\n        attempt.status = IndexingStatus.IN_PROGRESS\n        attempt.time_started = attempt.time_started or func.now()  # type: ignore\n        db_session.commit()\n        return attempt\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"transition_attempt_to_in_progress exceptioned.\")\n        raise\n\n\ndef mark_attempt_in_progress(\n    index_attempt: IndexAttempt,\n    db_session: Session,\n) -> None:\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt.id)\n            .with_for_update()\n        ).scalar_one()\n\n        attempt.status = IndexingStatus.IN_PROGRESS\n        attempt.time_started = index_attempt.time_started or func.now()  # type: ignore\n        db_session.commit()\n\n        # Add telemetry for index attempt status change\n        optional_telemetry(\n            record_type=RecordType.INDEX_ATTEMPT_STATUS,\n            data={\n                \"index_attempt_id\": index_attempt.id,\n                \"status\": IndexingStatus.IN_PROGRESS.value,\n                \"cc_pair_id\": index_attempt.connector_credential_pair_id,\n            },\n        )\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef mark_attempt_succeeded(\n    index_attempt_id: int,\n    db_session: Session,\n) -> IndexAttempt:\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        attempt.status = IndexingStatus.SUCCESS\n        attempt.celery_task_id = None\n        db_session.commit()\n\n        # Add telemetry for index attempt status change\n        optional_telemetry(\n            record_type=RecordType.INDEX_ATTEMPT_STATUS,\n            data={\n                \"index_attempt_id\": index_attempt_id,\n                \"status\": IndexingStatus.SUCCESS.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n        return attempt\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef mark_attempt_partially_succeeded(\n    index_attempt_id: int,\n    db_session: Session,\n) -> IndexAttempt:\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        attempt.status = IndexingStatus.COMPLETED_WITH_ERRORS\n        attempt.celery_task_id = None\n        db_session.commit()\n\n        # Add telemetry for index attempt status change\n        optional_telemetry(\n            record_type=RecordType.INDEX_ATTEMPT_STATUS,\n            data={\n                \"index_attempt_id\": index_attempt_id,\n                \"status\": IndexingStatus.COMPLETED_WITH_ERRORS.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n        return attempt\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef mark_attempt_canceled(\n    index_attempt_id: int,\n    db_session: Session,\n    reason: str = \"Unknown\",\n) -> None:\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if not attempt.time_started:\n            attempt.time_started = datetime.now(timezone.utc)\n        attempt.status = IndexingStatus.CANCELED\n        attempt.error_msg = reason\n        db_session.commit()\n\n        # Add telemetry for index attempt status change\n        optional_telemetry(\n            record_type=RecordType.INDEX_ATTEMPT_STATUS,\n            data={\n                \"index_attempt_id\": index_attempt_id,\n                \"status\": IndexingStatus.CANCELED.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef mark_attempt_failed(\n    index_attempt_id: int,\n    db_session: Session,\n    failure_reason: str = \"Unknown\",\n    full_exception_trace: str | None = None,\n) -> None:\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if not attempt.time_started:\n            attempt.time_started = datetime.now(timezone.utc)\n        attempt.status = IndexingStatus.FAILED\n        attempt.error_msg = failure_reason\n        attempt.full_exception_trace = full_exception_trace\n        attempt.celery_task_id = None\n        db_session.commit()\n\n        # Add telemetry for index attempt status change\n        optional_telemetry(\n            record_type=RecordType.INDEX_ATTEMPT_STATUS,\n            data={\n                \"index_attempt_id\": index_attempt_id,\n                \"status\": IndexingStatus.FAILED.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef update_docs_indexed(\n    db_session: Session,\n    index_attempt_id: int,\n    total_docs_indexed: int,\n    new_docs_indexed: int,\n    docs_removed_from_index: int,\n) -> None:\n    \"\"\"Updates the docs_indexed and new_docs_indexed fields of an index attempt.\n    Adds the given values to the current values in the db\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(IndexAttempt)\n            .where(IndexAttempt.id == index_attempt_id)\n            .with_for_update()  # Locks the row when we try to update\n        ).scalar_one()\n\n        attempt.total_docs_indexed = (\n            attempt.total_docs_indexed or 0\n        ) + total_docs_indexed\n        attempt.new_docs_indexed = (attempt.new_docs_indexed or 0) + new_docs_indexed\n        attempt.docs_removed_from_index = (\n            attempt.docs_removed_from_index or 0\n        ) + docs_removed_from_index\n        db_session.commit()\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"update_docs_indexed exceptioned.\")\n        raise\n\n\ndef get_last_attempt(\n    connector_id: int,\n    credential_id: int,\n    search_settings_id: int | None,\n    db_session: Session,\n) -> IndexAttempt | None:\n    stmt = (\n        select(IndexAttempt)\n        .join(ConnectorCredentialPair)\n        .where(\n            ConnectorCredentialPair.connector_id == connector_id,\n            ConnectorCredentialPair.credential_id == credential_id,\n            IndexAttempt.search_settings_id == search_settings_id,\n        )\n    )\n\n    # Note, the below is using time_created instead of time_updated\n    stmt = stmt.order_by(desc(IndexAttempt.time_created))\n\n    return db_session.execute(stmt).scalars().first()\n\n\ndef get_latest_index_attempts_by_status(\n    secondary_index: bool,\n    db_session: Session,\n    status: IndexingStatus,\n) -> Sequence[IndexAttempt]:\n    \"\"\"\n    Retrieves the most recent index attempt with the specified status for each connector_credential_pair.\n    Filters attempts based on the secondary_index flag to get either future or present index attempts.\n    Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair.\n    \"\"\"\n    latest_failed_attempts = (\n        select(\n            IndexAttempt.connector_credential_pair_id,\n            func.max(IndexAttempt.id).label(\"max_failed_id\"),\n        )\n        .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)\n        .where(\n            SearchSettings.status\n            == (\n                IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT\n            ),\n            IndexAttempt.status == status,\n        )\n        .group_by(IndexAttempt.connector_credential_pair_id)\n        .subquery()\n    )\n\n    stmt = select(IndexAttempt).join(\n        latest_failed_attempts,\n        (\n            IndexAttempt.connector_credential_pair_id\n            == latest_failed_attempts.c.connector_credential_pair_id\n        )\n        & (IndexAttempt.id == latest_failed_attempts.c.max_failed_id),\n    )\n\n    return db_session.execute(stmt).scalars().all()\n\n\nT = TypeVarTuple(\"T\")\n\n\ndef _add_only_finished_clause(stmt: Select[tuple[*T]]) -> Select[tuple[*T]]:\n    return stmt.where(\n        IndexAttempt.status.not_in(\n            [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n        ),\n    )\n\n\ndef get_latest_index_attempts(\n    secondary_index: bool,\n    db_session: Session,\n    eager_load_cc_pair: bool = False,\n    only_finished: bool = False,\n) -> Sequence[IndexAttempt]:\n    ids_stmt = select(\n        IndexAttempt.connector_credential_pair_id,\n        func.max(IndexAttempt.id).label(\"max_id\"),\n    ).join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)\n\n    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT\n    ids_stmt = ids_stmt.where(SearchSettings.status == status)\n\n    if only_finished:\n        ids_stmt = _add_only_finished_clause(ids_stmt)\n\n    ids_stmt = ids_stmt.group_by(IndexAttempt.connector_credential_pair_id)\n    ids_subquery = ids_stmt.subquery()\n\n    stmt = (\n        select(IndexAttempt)\n        .join(\n            ids_subquery,\n            IndexAttempt.connector_credential_pair_id\n            == ids_subquery.c.connector_credential_pair_id,\n        )\n        .where(IndexAttempt.id == ids_subquery.c.max_id)\n    )\n\n    if only_finished:\n        stmt = _add_only_finished_clause(stmt)\n\n    if eager_load_cc_pair:\n        stmt = stmt.options(\n            joinedload(IndexAttempt.connector_credential_pair),\n            joinedload(IndexAttempt.error_rows),\n        )\n\n    return db_session.execute(stmt).scalars().unique().all()\n\n\n# For use with our thread-level parallelism utils. Note that any relationships\n# you wish to use MUST be eagerly loaded, as the session will not be available\n# after this function to allow lazy loading.\ndef get_latest_index_attempts_parallel(\n    secondary_index: bool,\n    eager_load_cc_pair: bool = False,\n    only_finished: bool = False,\n) -> Sequence[IndexAttempt]:\n    with get_session_with_current_tenant() as db_session:\n        return get_latest_index_attempts(\n            secondary_index,\n            db_session,\n            eager_load_cc_pair,\n            only_finished,\n        )\n\n\ndef get_latest_index_attempt_for_cc_pair_id(\n    db_session: Session,\n    connector_credential_pair_id: int,\n    secondary_index: bool,\n    only_finished: bool = True,\n) -> IndexAttempt | None:\n    stmt = select(IndexAttempt)\n    stmt = stmt.where(\n        IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,\n    )\n    if only_finished:\n        stmt = _add_only_finished_clause(stmt)\n\n    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT\n    stmt = stmt.join(SearchSettings).where(SearchSettings.status == status)\n    stmt = stmt.order_by(desc(IndexAttempt.time_created))\n    stmt = stmt.limit(1)\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_latest_successful_index_attempt_for_cc_pair_id(\n    db_session: Session,\n    connector_credential_pair_id: int,\n    secondary_index: bool = False,\n) -> IndexAttempt | None:\n    \"\"\"Returns the most recent successful index attempt for the given cc pair,\n    filtered to the current (or future) search settings.\n    Uses MAX(id) semantics to match get_latest_index_attempts_by_status.\"\"\"\n    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT\n    stmt = (\n        select(IndexAttempt)\n        .where(\n            IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,\n            IndexAttempt.status.in_(\n                [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]\n            ),\n        )\n        .join(SearchSettings)\n        .where(SearchSettings.status == status)\n        .order_by(desc(IndexAttempt.id))\n        .limit(1)\n    )\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_latest_successful_index_attempts_parallel(\n    secondary_index: bool = False,\n) -> Sequence[IndexAttempt]:\n    \"\"\"Batch version: returns the latest successful index attempt per cc pair.\n    Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful()).\"\"\"\n    model_status = (\n        IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT\n    )\n    with get_session_with_current_tenant() as db_session:\n        latest_ids = (\n            select(\n                IndexAttempt.connector_credential_pair_id,\n                func.max(IndexAttempt.id).label(\"max_id\"),\n            )\n            .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)\n            .where(\n                SearchSettings.status == model_status,\n                IndexAttempt.status.in_(\n                    [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]\n                ),\n            )\n            .group_by(IndexAttempt.connector_credential_pair_id)\n            .subquery()\n        )\n\n        stmt = select(IndexAttempt).join(\n            latest_ids,\n            (\n                IndexAttempt.connector_credential_pair_id\n                == latest_ids.c.connector_credential_pair_id\n            )\n            & (IndexAttempt.id == latest_ids.c.max_id),\n        )\n        return db_session.execute(stmt).scalars().all()\n\n\ndef count_index_attempts_for_cc_pair(\n    db_session: Session,\n    cc_pair_id: int,\n    only_current: bool = True,\n    disinclude_finished: bool = False,\n) -> int:\n    stmt = select(IndexAttempt).where(\n        IndexAttempt.connector_credential_pair_id == cc_pair_id\n    )\n    if disinclude_finished:\n        stmt = stmt.where(\n            IndexAttempt.status.in_(\n                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n            )\n        )\n    if only_current:\n        stmt = stmt.join(SearchSettings).where(\n            SearchSettings.status == IndexModelStatus.PRESENT\n        )\n    # Count total items for pagination\n    count_stmt = stmt.with_only_columns(func.count()).order_by(None)\n    total_count = db_session.execute(count_stmt).scalar_one()\n    return total_count\n\n\ndef get_paginated_index_attempts_for_cc_pair_id(\n    db_session: Session,\n    cc_pair_id: int,\n    page: int,\n    page_size: int,\n    only_current: bool = True,\n    disinclude_finished: bool = False,\n) -> list[IndexAttempt]:\n    stmt = select(IndexAttempt).where(\n        IndexAttempt.connector_credential_pair_id == cc_pair_id\n    )\n    if disinclude_finished:\n        stmt = stmt.where(\n            IndexAttempt.status.in_(\n                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n            )\n        )\n    if only_current:\n        stmt = stmt.join(SearchSettings).where(\n            SearchSettings.status == IndexModelStatus.PRESENT\n        )\n\n    stmt = stmt.order_by(IndexAttempt.time_started.desc())\n\n    # Apply pagination\n    stmt = stmt.offset(page * page_size).limit(page_size)\n\n    return list(db_session.execute(stmt).scalars().unique().all())\n\n\ndef get_index_attempts_for_cc_pair(\n    db_session: Session,\n    cc_pair_identifier: ConnectorCredentialPairIdentifier,\n    only_current: bool = True,\n    disinclude_finished: bool = False,\n) -> Sequence[IndexAttempt]:\n    stmt = (\n        select(IndexAttempt)\n        .join(ConnectorCredentialPair)\n        .where(\n            and_(\n                ConnectorCredentialPair.connector_id == cc_pair_identifier.connector_id,\n                ConnectorCredentialPair.credential_id\n                == cc_pair_identifier.credential_id,\n            )\n        )\n    )\n    if disinclude_finished:\n        stmt = stmt.where(\n            IndexAttempt.status.in_(\n                [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n            )\n        )\n    if only_current:\n        stmt = stmt.join(SearchSettings).where(\n            SearchSettings.status == IndexModelStatus.PRESENT\n        )\n\n    stmt = stmt.order_by(IndexAttempt.time_created.desc())\n    return db_session.execute(stmt).scalars().all()\n\n\ndef delete_index_attempts(\n    cc_pair_id: int,\n    db_session: Session,\n) -> None:\n    # First, delete related entries in IndexAttemptErrors\n    stmt_errors = delete(IndexAttemptError).where(\n        IndexAttemptError.index_attempt_id.in_(\n            select(IndexAttempt.id).where(\n                IndexAttempt.connector_credential_pair_id == cc_pair_id\n            )\n        )\n    )\n    db_session.execute(stmt_errors)\n\n    stmt = delete(IndexAttempt).where(\n        IndexAttempt.connector_credential_pair_id == cc_pair_id,\n    )\n\n    db_session.execute(stmt)\n\n\ndef expire_index_attempts(\n    search_settings_id: int,\n    db_session: Session,\n) -> None:\n    not_started_query = (\n        update(IndexAttempt)\n        .where(IndexAttempt.search_settings_id == search_settings_id)\n        .where(IndexAttempt.status == IndexingStatus.NOT_STARTED)\n        .values(\n            status=IndexingStatus.CANCELED,\n            error_msg=\"Canceled, likely due to model swap\",\n        )\n    )\n    db_session.execute(not_started_query)\n\n    update_query = (\n        update(IndexAttempt)\n        .where(IndexAttempt.search_settings_id == search_settings_id)\n        .where(IndexAttempt.status != IndexingStatus.SUCCESS)\n        .values(\n            status=IndexingStatus.FAILED,\n            error_msg=\"Canceled due to embedding model swap\",\n        )\n    )\n    db_session.execute(update_query)\n\n    db_session.commit()\n\n\ndef cancel_indexing_attempts_for_ccpair(\n    cc_pair_id: int,\n    db_session: Session,\n    include_secondary_index: bool = False,\n) -> None:\n    stmt = (\n        update(IndexAttempt)\n        .where(IndexAttempt.connector_credential_pair_id == cc_pair_id)\n        .where(IndexAttempt.status == IndexingStatus.NOT_STARTED)\n        .values(\n            status=IndexingStatus.CANCELED,\n            error_msg=\"Canceled by user\",\n            time_started=datetime.now(timezone.utc),\n        )\n    )\n\n    if not include_secondary_index:\n        subquery = select(SearchSettings.id).where(\n            SearchSettings.status != IndexModelStatus.FUTURE\n        )\n        stmt = stmt.where(IndexAttempt.search_settings_id.in_(subquery))\n\n    db_session.execute(stmt)\n\n\ndef cancel_indexing_attempts_past_model(\n    db_session: Session,\n) -> None:\n    \"\"\"Stops all indexing attempts that are in progress or not started for\n    any embedding model that not present/future\"\"\"\n\n    db_session.execute(\n        update(IndexAttempt)\n        .where(\n            IndexAttempt.status.in_(\n                [IndexingStatus.IN_PROGRESS, IndexingStatus.NOT_STARTED]\n            ),\n            IndexAttempt.search_settings_id == SearchSettings.id,\n            SearchSettings.status == IndexModelStatus.PAST,\n        )\n        .values(status=IndexingStatus.FAILED)\n    )\n\n\ndef cancel_indexing_attempts_for_search_settings(\n    search_settings_id: int,\n    db_session: Session,\n) -> None:\n    \"\"\"Stops all indexing attempts that are in progress or not started for\n    the specified search settings.\"\"\"\n\n    db_session.execute(\n        update(IndexAttempt)\n        .where(\n            IndexAttempt.status.in_(\n                [IndexingStatus.IN_PROGRESS, IndexingStatus.NOT_STARTED]\n            ),\n            IndexAttempt.search_settings_id == search_settings_id,\n        )\n        .values(status=IndexingStatus.FAILED)\n    )\n\n\ndef count_unique_cc_pairs_with_successful_index_attempts(\n    search_settings_id: int | None,\n    db_session: Session,\n) -> int:\n    \"\"\"Collect all of the Index Attempts that are successful and for the specified embedding model\n    Then do distinct by connector_id and credential_id which is equivalent to the cc-pair. Finally,\n    do a count to get the total number of unique cc-pairs with successful attempts\"\"\"\n    unique_pairs_count = (\n        db_session.query(IndexAttempt.connector_credential_pair_id)\n        .join(ConnectorCredentialPair)\n        .filter(\n            IndexAttempt.search_settings_id == search_settings_id,\n            IndexAttempt.status == IndexingStatus.SUCCESS,\n        )\n        .distinct()\n        .count()\n    )\n\n    return unique_pairs_count\n\n\ndef count_unique_active_cc_pairs_with_successful_index_attempts(\n    search_settings_id: int | None,\n    db_session: Session,\n) -> int:\n    \"\"\"Collect all of the Index Attempts that are successful and for the specified embedding model,\n    but only for non-paused connector-credential pairs. Then do distinct by connector_id and credential_id\n    which is equivalent to the cc-pair. Finally, do a count to get the total number of unique non-paused\n    cc-pairs with successful attempts.\"\"\"\n    unique_pairs_count = (\n        db_session.query(IndexAttempt.connector_credential_pair_id)\n        .join(ConnectorCredentialPair)\n        .filter(\n            IndexAttempt.search_settings_id == search_settings_id,\n            IndexAttempt.status == IndexingStatus.SUCCESS,\n            ConnectorCredentialPair.status != ConnectorCredentialPairStatus.PAUSED,\n        )\n        .distinct()\n        .count()\n    )\n\n    return unique_pairs_count\n\n\ndef create_index_attempt_error(\n    index_attempt_id: int | None,\n    connector_credential_pair_id: int,\n    failure: ConnectorFailure,\n    db_session: Session,\n) -> int:\n    new_error = IndexAttemptError(\n        index_attempt_id=index_attempt_id,\n        connector_credential_pair_id=connector_credential_pair_id,\n        document_id=(\n            failure.failed_document.document_id if failure.failed_document else None\n        ),\n        document_link=(\n            failure.failed_document.document_link if failure.failed_document else None\n        ),\n        entity_id=(failure.failed_entity.entity_id if failure.failed_entity else None),\n        failed_time_range_start=(\n            failure.failed_entity.missed_time_range[0]\n            if failure.failed_entity and failure.failed_entity.missed_time_range\n            else None\n        ),\n        failed_time_range_end=(\n            failure.failed_entity.missed_time_range[1]\n            if failure.failed_entity and failure.failed_entity.missed_time_range\n            else None\n        ),\n        failure_message=failure.failure_message,\n        is_resolved=False,\n    )\n    db_session.add(new_error)\n    db_session.commit()\n\n    return new_error.id\n\n\ndef get_index_attempt_errors(\n    index_attempt_id: int,\n    db_session: Session,\n) -> list[IndexAttemptError]:\n    stmt = select(IndexAttemptError).where(\n        IndexAttemptError.index_attempt_id == index_attempt_id\n    )\n\n    errors = db_session.scalars(stmt)\n    return list(errors.all())\n\n\ndef count_index_attempt_errors_for_cc_pair(\n    cc_pair_id: int,\n    unresolved_only: bool,\n    db_session: Session,\n) -> int:\n    stmt = (\n        select(func.count())\n        .select_from(IndexAttemptError)\n        .where(IndexAttemptError.connector_credential_pair_id == cc_pair_id)\n    )\n    if unresolved_only:\n        stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))\n\n    result = db_session.scalar(stmt)\n    return 0 if result is None else result\n\n\ndef get_index_attempt_errors_for_cc_pair(\n    cc_pair_id: int,\n    unresolved_only: bool,\n    db_session: Session,\n    page: int | None = None,\n    page_size: int | None = None,\n) -> list[IndexAttemptError]:\n    stmt = select(IndexAttemptError).where(\n        IndexAttemptError.connector_credential_pair_id == cc_pair_id\n    )\n    if unresolved_only:\n        stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))\n\n    # Order by most recent first\n    stmt = stmt.order_by(desc(IndexAttemptError.time_created))\n\n    if page is not None and page_size is not None:\n        stmt = stmt.offset(page * page_size).limit(page_size)\n\n    return list(db_session.scalars(stmt).all())\n\n\n# ── Metrics query helpers ──────────────────────────────────────────────\n\n\nclass ActiveIndexAttemptMetric(NamedTuple):\n    \"\"\"Row returned by get_active_index_attempts_for_metrics.\"\"\"\n\n    status: IndexingStatus\n    source: \"DocumentSource\"\n    cc_pair_id: int\n    cc_pair_name: str | None\n    attempt_count: int\n\n\ndef get_active_index_attempts_for_metrics(\n    db_session: Session,\n) -> list[ActiveIndexAttemptMetric]:\n    \"\"\"Return non-terminal index attempts grouped by status, source, and connector.\n\n    Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).\n    \"\"\"\n    from onyx.db.models import Connector\n\n    terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]\n    rows = (\n        db_session.query(\n            IndexAttempt.status,\n            Connector.source,\n            ConnectorCredentialPair.id,\n            ConnectorCredentialPair.name,\n            func.count(),\n        )\n        .join(\n            ConnectorCredentialPair,\n            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .filter(IndexAttempt.status.notin_(terminal_statuses))\n        .group_by(\n            IndexAttempt.status,\n            Connector.source,\n            ConnectorCredentialPair.id,\n            ConnectorCredentialPair.name,\n        )\n        .all()\n    )\n    return [ActiveIndexAttemptMetric(*row) for row in rows]\n\n\ndef get_failed_attempt_counts_by_cc_pair(\n    db_session: Session,\n    since: datetime | None = None,\n) -> dict[int, int]:\n    \"\"\"Return {cc_pair_id: failed_attempt_count} for all connectors.\n\n    When ``since`` is provided, only attempts created after that timestamp\n    are counted. Defaults to the last 90 days to avoid unbounded historical\n    aggregation.\n    \"\"\"\n    if since is None:\n        since = datetime.now(timezone.utc) - timedelta(days=90)\n\n    rows = (\n        db_session.query(\n            IndexAttempt.connector_credential_pair_id,\n            func.count(),\n        )\n        .filter(IndexAttempt.status == IndexingStatus.FAILED)\n        .filter(IndexAttempt.time_created >= since)\n        .group_by(IndexAttempt.connector_credential_pair_id)\n        .all()\n    )\n    return {cc_id: count for cc_id, count in rows}\n\n\ndef get_docs_indexed_by_cc_pair(\n    db_session: Session,\n    since: datetime | None = None,\n) -> dict[int, int]:\n    \"\"\"Return {cc_pair_id: total_new_docs_indexed} across successful attempts.\n\n    Only counts attempts with status SUCCESS to avoid inflating counts with\n    partial results from failed attempts. When ``since`` is provided, only\n    attempts created after that timestamp are included.\n    \"\"\"\n    if since is None:\n        since = datetime.now(timezone.utc) - timedelta(days=90)\n\n    query = (\n        db_session.query(\n            IndexAttempt.connector_credential_pair_id,\n            func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),\n        )\n        .filter(IndexAttempt.status == IndexingStatus.SUCCESS)\n        .filter(IndexAttempt.time_created >= since)\n        .group_by(IndexAttempt.connector_credential_pair_id)\n    )\n    rows = query.all()\n    return {cc_id: int(total or 0) for cc_id, total in rows}\n"
  },
  {
    "path": "backend/onyx/db/indexing_coordination.py",
    "content": "\"\"\"Database-based indexing coordination to replace Redis fencing.\"\"\"\n\nfrom pydantic import BaseModel\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import SQLAlchemyError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.index_attempt import count_error_rows_for_index_attempt\nfrom onyx.db.index_attempt import create_index_attempt\nfrom onyx.db.index_attempt import get_index_attempt\nfrom onyx.db.models import IndexAttempt\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nINDEXING_PROGRESS_TIMEOUT_HOURS = 6\n\n\nclass CoordinationStatus(BaseModel):\n    \"\"\"Status of an indexing attempt's coordination.\"\"\"\n\n    found: bool\n    total_batches: int | None\n    completed_batches: int\n    total_failures: int\n    total_docs: int\n    total_chunks: int\n    status: IndexingStatus | None = None\n    cancellation_requested: bool = False\n\n\nclass IndexingCoordination:\n    \"\"\"Database-based coordination for indexing tasks, replacing Redis fencing.\"\"\"\n\n    @staticmethod\n    def try_create_index_attempt(\n        db_session: Session,\n        cc_pair_id: int,\n        search_settings_id: int,\n        celery_task_id: str,\n        from_beginning: bool = False,\n    ) -> int | None:\n        \"\"\"\n        Try to create a new index attempt for the given CC pair and search settings.\n        Returns the index_attempt_id if successful, None if another attempt is already running.\n\n        This replaces the Redis fencing mechanism by using database constraints\n        and transactions to prevent duplicate attempts.\n        \"\"\"\n        try:\n            # Check for existing active attempts (this is the \"fence\" check)\n            existing_attempt = db_session.execute(\n                select(IndexAttempt)\n                .where(\n                    IndexAttempt.connector_credential_pair_id == cc_pair_id,\n                    IndexAttempt.search_settings_id == search_settings_id,\n                    IndexAttempt.status.in_(\n                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n                    ),\n                )\n                .with_for_update(nowait=True)\n            ).first()\n\n            if existing_attempt:\n                logger.info(\n                    f\"Indexing already in progress: \"\n                    f\"cc_pair={cc_pair_id} \"\n                    f\"search_settings={search_settings_id} \"\n                    f\"existing_attempt={existing_attempt[0].id}\"\n                )\n                return None\n\n            # Create new index attempt (this is setting the \"fence\")\n            attempt_id = create_index_attempt(\n                connector_credential_pair_id=cc_pair_id,\n                search_settings_id=search_settings_id,\n                from_beginning=from_beginning,\n                db_session=db_session,\n                celery_task_id=celery_task_id,\n            )\n\n            logger.info(\n                f\"Created Index Attempt: \"\n                f\"cc_pair={cc_pair_id} \"\n                f\"search_settings={search_settings_id} \"\n                f\"attempt_id={attempt_id} \"\n                f\"celery_task_id={celery_task_id}\"\n            )\n\n            return attempt_id\n\n        except SQLAlchemyError as e:\n            logger.info(\n                f\"Failed to create index attempt (likely race condition): \"\n                f\"cc_pair={cc_pair_id} \"\n                f\"search_settings={search_settings_id} \"\n                f\"error={str(e)}\"\n            )\n            db_session.rollback()\n            return None\n\n    @staticmethod\n    def check_cancellation_requested(\n        db_session: Session,\n        index_attempt_id: int,\n    ) -> bool:\n        \"\"\"\n        Check if cancellation has been requested for this indexing attempt.\n        This replaces Redis termination signals.\n        \"\"\"\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        return attempt.cancellation_requested if attempt else False\n\n    @staticmethod\n    def request_cancellation(\n        db_session: Session,\n        index_attempt_id: int,\n    ) -> None:\n        \"\"\"\n        Request cancellation of an indexing attempt.\n        This replaces Redis termination signals.\n        \"\"\"\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        if attempt:\n            attempt.cancellation_requested = True\n            db_session.commit()\n\n            logger.info(f\"Requested cancellation for attempt {index_attempt_id}\")\n\n    @staticmethod\n    def set_total_batches(\n        db_session: Session,\n        index_attempt_id: int,\n        total_batches: int,\n    ) -> None:\n        \"\"\"\n        Set the total number of batches for this indexing attempt.\n        Called by docfetching when extraction is complete.\n        \"\"\"\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        if attempt:\n            attempt.total_batches = total_batches\n            db_session.commit()\n\n            logger.info(\n                f\"Set total batches: attempt={index_attempt_id} total={total_batches}\"\n            )\n\n    @staticmethod\n    def update_batch_completion_and_docs(\n        db_session: Session,\n        index_attempt_id: int,\n        total_docs_indexed: int,\n        new_docs_indexed: int,\n        total_chunks: int,\n    ) -> tuple[int, int | None]:\n        \"\"\"\n        Update batch completion and document counts atomically.\n        Returns (completed_batches, total_batches).\n        This extends the existing update_docs_indexed pattern.\n        \"\"\"\n        try:\n            attempt = db_session.execute(\n                select(IndexAttempt)\n                .where(IndexAttempt.id == index_attempt_id)\n                .with_for_update()  # Same pattern as existing update_docs_indexed\n            ).scalar_one()\n\n            # Existing document count updates\n            attempt.total_docs_indexed = (\n                attempt.total_docs_indexed or 0\n            ) + total_docs_indexed\n            attempt.new_docs_indexed = (\n                attempt.new_docs_indexed or 0\n            ) + new_docs_indexed\n\n            # New coordination updates\n            attempt.completed_batches = (attempt.completed_batches or 0) + 1\n            attempt.total_chunks = (attempt.total_chunks or 0) + total_chunks\n\n            db_session.commit()\n\n            logger.info(\n                f\"Updated batch completion: \"\n                f\"attempt={index_attempt_id} \"\n                f\"completed={attempt.completed_batches} \"\n                f\"total={attempt.total_batches} \"\n                f\"docs={total_docs_indexed} \"\n            )\n\n            return attempt.completed_batches, attempt.total_batches\n\n        except Exception:\n            db_session.rollback()\n            logger.exception(\n                f\"Failed to update batch completion for attempt {index_attempt_id}\"\n            )\n            raise\n\n    @staticmethod\n    def get_coordination_status(\n        db_session: Session,\n        index_attempt_id: int,\n    ) -> CoordinationStatus:\n        \"\"\"\n        Get the current coordination status for an indexing attempt.\n        This replaces reading FileStore state files.\n        \"\"\"\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        if not attempt:\n            return CoordinationStatus(\n                found=False,\n                total_batches=None,\n                completed_batches=0,\n                total_failures=0,\n                total_docs=0,\n                total_chunks=0,\n                status=None,\n                cancellation_requested=False,\n            )\n\n        return CoordinationStatus(\n            found=True,\n            total_batches=attempt.total_batches,\n            completed_batches=attempt.completed_batches,\n            total_failures=count_error_rows_for_index_attempt(\n                index_attempt_id, db_session\n            ),\n            total_docs=attempt.total_docs_indexed or 0,\n            total_chunks=attempt.total_chunks,\n            status=attempt.status,\n            cancellation_requested=attempt.cancellation_requested,\n        )\n\n    @staticmethod\n    def get_orphaned_index_attempt_ids(db_session: Session) -> list[int]:\n        \"\"\"\n        Gets a list of potentially orphaned index attempts.\n        These are attempts in non-terminal state that have task IDs but may have died.\n\n        This replaces the old get_unfenced_index_attempt_ids function.\n        The actual orphan detection requires checking with Celery, which should be\n        done by the caller.\n        \"\"\"\n        # Find attempts that are active and have task IDs\n        # The caller needs to check each one with Celery to confirm orphaned status\n        active_attempts = (\n            db_session.execute(\n                select(IndexAttempt).where(\n                    IndexAttempt.status.in_(\n                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n                    ),\n                    IndexAttempt.celery_task_id.isnot(None),\n                )\n            )\n            .scalars()\n            .all()\n        )\n\n        return [attempt.id for attempt in active_attempts]\n\n    @staticmethod\n    def update_progress_tracking(\n        db_session: Session,\n        index_attempt_id: int,\n        current_batches_completed: int,\n        timeout_hours: int = INDEXING_PROGRESS_TIMEOUT_HOURS,\n        force_update_progress: bool = False,\n    ) -> bool:\n        \"\"\"\n        Update progress tracking for stall detection.\n        Returns True if sufficient progress was made, False if stalled.\n        \"\"\"\n\n        attempt = get_index_attempt(db_session, index_attempt_id)\n        if not attempt:\n            logger.error(f\"Index attempt {index_attempt_id} not found in database\")\n            return False\n\n        current_time = get_db_current_time(db_session)\n\n        # No progress - check if this is the first time tracking\n        # or if the caller wants to simulate guaranteed progress\n        if attempt.last_progress_time is None or force_update_progress:\n            # First time tracking - initialize\n            attempt.last_progress_time = current_time\n            attempt.last_batches_completed_count = current_batches_completed\n            db_session.commit()\n            return True\n\n        time_elapsed = (current_time - attempt.last_progress_time).total_seconds()\n        # only actually write to db every timeout_hours/2\n        # this ensure thats at most timeout_hours will pass with no activity\n        if time_elapsed < timeout_hours * 1800:\n            return True\n\n        # Check if progress has been made\n        if current_batches_completed <= attempt.last_batches_completed_count:\n            # if between timeout_hours/2 and timeout_hours has passed\n            # without an update, we consider the attempt stalled\n            return False\n\n        # Progress made - update tracking\n        attempt.last_progress_time = current_time\n        attempt.last_batches_completed_count = current_batches_completed\n        db_session.commit()\n        return True\n"
  },
  {
    "path": "backend/onyx/db/input_prompt.py",
    "content": "from uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import InputPrompt\nfrom onyx.db.models import InputPrompt__User\nfrom onyx.db.models import User\nfrom onyx.server.features.input_prompt.models import InputPromptSnapshot\nfrom onyx.server.manage.models import UserInfo\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef insert_input_prompt(\n    prompt: str,\n    content: str,\n    is_public: bool,\n    user: User | None,\n    db_session: Session,\n) -> InputPrompt:\n    user_id = user.id if user else None\n\n    # Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING\n    # to avoid race conditions with the uniqueness check\n    stmt = pg_insert(InputPrompt).values(\n        prompt=prompt,\n        content=content,\n        active=True,\n        is_public=is_public,\n        user_id=user_id,\n    )\n\n    # Use the appropriate constraint based on whether this is a user-owned or public prompt\n    if user_id is not None:\n        stmt = stmt.on_conflict_do_nothing(constraint=\"uq_inputprompt_prompt_user_id\")\n    else:\n        # Partial unique indexes cannot be targeted by constraint name;\n        # must use index_elements + index_where\n        stmt = stmt.on_conflict_do_nothing(\n            index_elements=[InputPrompt.prompt],\n            index_where=InputPrompt.user_id.is_(None),\n        )\n\n    stmt = stmt.returning(InputPrompt)\n\n    result = db_session.execute(stmt)\n    input_prompt = result.scalar_one_or_none()\n\n    if input_prompt is None:\n        raise HTTPException(\n            status_code=409,\n            detail=f\"A prompt shortcut with the name '{prompt}' already exists\",\n        )\n\n    db_session.commit()\n    return input_prompt\n\n\ndef update_input_prompt(\n    user: User,\n    input_prompt_id: int,\n    prompt: str,\n    content: str,\n    active: bool,\n    db_session: Session,\n) -> InputPrompt:\n    input_prompt = db_session.scalar(\n        select(InputPrompt).where(InputPrompt.id == input_prompt_id)\n    )\n    if input_prompt is None:\n        raise ValueError(f\"No input prompt with id {input_prompt_id}\")\n\n    if not validate_user_prompt_authorization(user, input_prompt):\n        raise HTTPException(status_code=401, detail=\"You don't own this prompt\")\n\n    input_prompt.prompt = prompt\n    input_prompt.content = content\n    input_prompt.active = active\n\n    try:\n        db_session.commit()\n    except IntegrityError:\n        db_session.rollback()\n        raise HTTPException(\n            status_code=409,\n            detail=f\"A prompt shortcut with the name '{prompt}' already exists\",\n        )\n\n    return input_prompt\n\n\ndef validate_user_prompt_authorization(user: User, input_prompt: InputPrompt) -> bool:\n    prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)\n\n    # Public prompts cannot be modified via the user API (only admins via admin endpoints)\n    if prompt.is_public or prompt.user_id is None:\n        return False\n\n    # Anonymous users cannot modify user-owned prompts\n    if user.is_anonymous:\n        return False\n\n    # User must own the prompt\n    user_details = UserInfo.from_model(user)\n    return str(user_details.id) == str(prompt.user_id)\n\n\ndef remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:\n    input_prompt = db_session.scalar(\n        select(InputPrompt).where(InputPrompt.id == input_prompt_id)\n    )\n\n    if input_prompt is None:\n        raise ValueError(f\"No input prompt with id {input_prompt_id}\")\n\n    if not input_prompt.is_public:\n        raise HTTPException(status_code=400, detail=\"This prompt is not public\")\n\n    db_session.delete(input_prompt)\n    db_session.commit()\n\n\ndef remove_input_prompt(\n    user: User,\n    input_prompt_id: int,\n    db_session: Session,\n    delete_public: bool = False,\n) -> None:\n    input_prompt = db_session.scalar(\n        select(InputPrompt).where(InputPrompt.id == input_prompt_id)\n    )\n    if input_prompt is None:\n        raise ValueError(f\"No input prompt with id {input_prompt_id}\")\n\n    if input_prompt.is_public and not delete_public:\n        raise HTTPException(\n            status_code=400, detail=\"Cannot delete public prompts with this method\"\n        )\n\n    if not validate_user_prompt_authorization(user, input_prompt):\n        raise HTTPException(status_code=401, detail=\"You do not own this prompt\")\n\n    db_session.delete(input_prompt)\n    db_session.commit()\n\n\ndef fetch_input_prompt_by_id(\n    id: int, user_id: UUID | None, db_session: Session\n) -> InputPrompt:\n    query = select(InputPrompt).where(InputPrompt.id == id)\n\n    if user_id:\n        query = query.where(\n            (InputPrompt.user_id == user_id) | (InputPrompt.user_id is None)\n        )\n    else:\n        # If no user_id is provided, only fetch prompts without a user_id (aka public)\n        query = query.where(InputPrompt.user_id == None)  # noqa\n\n    result = db_session.scalar(query)\n\n    if result is None:\n        raise HTTPException(422, \"No input prompt found\")\n\n    return result\n\n\ndef fetch_public_input_prompts(\n    db_session: Session,\n) -> list[InputPrompt]:\n    query = select(InputPrompt).where(InputPrompt.is_public)\n    return list(db_session.scalars(query).all())\n\n\ndef fetch_input_prompts_by_user(\n    db_session: Session,\n    user_id: UUID | None,\n    active: bool | None = None,\n    include_public: bool = False,\n) -> list[InputPrompt]:\n    \"\"\"\n    Returns all prompts belonging to the user or public prompts,\n    excluding those the user has specifically disabled.\n    \"\"\"\n\n    query = select(InputPrompt)\n\n    if user_id is not None:\n        # If we have a user, left join to InputPrompt__User to check \"disabled\"\n        IPU = aliased(InputPrompt__User)\n        query = query.join(\n            IPU,\n            (IPU.input_prompt_id == InputPrompt.id) & (IPU.user_id == user_id),\n            isouter=True,\n        )\n\n        # Exclude disabled prompts\n        query = query.where(or_(IPU.disabled.is_(None), IPU.disabled.is_(False)))\n\n        if include_public:\n            # Return both user-owned and public prompts\n            query = query.where(\n                or_(\n                    InputPrompt.user_id == user_id,\n                    InputPrompt.is_public,\n                )\n            )\n        else:\n            # Return only user-owned prompts\n            query = query.where(InputPrompt.user_id == user_id)\n\n    else:\n        # user_id is None - anonymous usage\n        if include_public:\n            query = query.where(InputPrompt.is_public)\n        else:\n            # No user and not requesting public prompts - return nothing\n            return []\n\n    if active is not None:\n        query = query.where(InputPrompt.active == active)\n\n    return list(db_session.scalars(query).all())\n\n\ndef disable_input_prompt_for_user(\n    input_prompt_id: int,\n    user_id: UUID,\n    db_session: Session,\n) -> None:\n    \"\"\"\n    Sets (or creates) a record in InputPrompt__User with disabled=True\n    so that this prompt is hidden for the user.\n    \"\"\"\n    ipu = (\n        db_session.query(InputPrompt__User)\n        .filter_by(input_prompt_id=input_prompt_id, user_id=user_id)\n        .first()\n    )\n\n    if ipu is None:\n        # Create a new association row\n        ipu = InputPrompt__User(\n            input_prompt_id=input_prompt_id, user_id=user_id, disabled=True\n        )\n        db_session.add(ipu)\n    else:\n        # Just update the existing record\n        ipu.disabled = True\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/kg_config.py",
    "content": "from onyx.configs.constants import KV_KG_CONFIG_KEY\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.kg.models import KGConfigSettings\nfrom onyx.server.kg.models import EnableKGConfigRequest\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef set_kg_config_settings(kg_config_settings: KGConfigSettings) -> None:\n    kv_store = get_kv_store()\n    kv_store.store(KV_KG_CONFIG_KEY, kg_config_settings.model_dump())\n\n\ndef get_kg_config_settings() -> KGConfigSettings:\n    kv_store = get_kv_store()\n    try:\n        # refresh cache True until beta is over as we may manually update the config in the db\n        stored_config = kv_store.load(KV_KG_CONFIG_KEY, refresh_cache=True)\n        return KGConfigSettings.model_validate(stored_config or {})\n    except KvKeyNotFoundError:\n        # Default to empty kg config if no config have been set yet\n        logger.debug(f\"No kg config found in KV store for key: {KV_KG_CONFIG_KEY}\")\n        return KGConfigSettings()\n    except Exception as e:\n        logger.error(f\"Error loading kg config from KV store: {str(e)}\")\n        return KGConfigSettings()\n\n\ndef validate_kg_settings(kg_config_settings: KGConfigSettings) -> None:\n    if not kg_config_settings.KG_ENABLED:\n        raise ValueError(\"KG is not enabled\")\n    if not kg_config_settings.KG_VENDOR:\n        raise ValueError(\"KG_VENDOR is not set\")\n    if not kg_config_settings.KG_VENDOR_DOMAINS:\n        raise ValueError(\"KG_VENDOR_DOMAINS is not set\")\n\n\ndef is_kg_config_settings_enabled_valid(kg_config_settings: KGConfigSettings) -> bool:\n    try:\n        validate_kg_settings(kg_config_settings)\n        return True\n    except Exception:\n        return False\n\n\ndef enable_kg(enable_req: EnableKGConfigRequest) -> None:\n    kg_config_settings = get_kg_config_settings()\n    kg_config_settings.KG_ENABLED = True\n    kg_config_settings.KG_VENDOR = enable_req.vendor\n    kg_config_settings.KG_VENDOR_DOMAINS = enable_req.vendor_domains\n    kg_config_settings.KG_IGNORE_EMAIL_DOMAINS = enable_req.ignore_domains\n    kg_config_settings.KG_COVERAGE_START = enable_req.coverage_start.strftime(\n        \"%Y-%m-%d\"\n    )\n    kg_config_settings.KG_MAX_COVERAGE_DAYS = 10000  # TODO: revisit after public beta\n\n    validate_kg_settings(kg_config_settings)\n    set_kg_config_settings(kg_config_settings)\n\n\ndef disable_kg() -> None:\n    kg_config_settings = get_kg_config_settings()\n    kg_config_settings.KG_ENABLED = False\n    set_kg_config_settings(kg_config_settings)\n"
  },
  {
    "path": "backend/onyx/db/kg_temp_view.py",
    "content": "# import random\n\n# from sqlalchemy import text\n# from sqlalchemy.ext.declarative import declarative_base\n# from sqlalchemy.orm import Session\n\n# from onyx.agents.agent_search.kb_search.models import KGViewNames\n# from onyx.configs.app_configs import DB_READONLY_USER\n# from onyx.configs.kg_configs import KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX\n# from onyx.configs.kg_configs import KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX\n# from onyx.configs.kg_configs import KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX\n# from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n\n# Base = declarative_base()\n\n\n# def get_user_view_names(\n#     user_email: str, tenant_id: str\n# ) -> KGViewNames:\n#     user_email_cleaned = (\n#         user_email.replace(\"@\", \"__\")\n#         .replace(\".\", \"_\")\n#         .replace(\"+\", \"_\")\n#     )\n#     random_suffix_str = str(\n#         random.randint(1000000, 9999999)\n#     )\n#     return KGViewNames(\n#         allowed_docs_view_name=(\n#             f'\"{tenant_id}\".'\n#             f\"{KG_TEMP_ALLOWED_DOCS_VIEW_NAME_PREFIX}_\"\n#             f\"{user_email_cleaned}_{random_suffix_str}\"\n#         ),\n#         kg_relationships_view_name=(\n#             f'\"{tenant_id}\".'\n#             f\"{KG_TEMP_KG_RELATIONSHIPS_VIEW_NAME_PREFIX}_\"\n#             f\"{user_email_cleaned}_{random_suffix_str}\"\n#         ),\n#         kg_entity_view_name=(\n#             f'\"{tenant_id}\".'\n#             f\"{KG_TEMP_KG_ENTITIES_VIEW_NAME_PREFIX}_\"\n#             f\"{user_email_cleaned}_{random_suffix_str}\"\n#         ),\n#     )\n\n\n# # First, create the view definition\n# def create_views(\n#     db_session: Session,\n#     tenant_id: str,\n#     user_email: str,\n#     allowed_docs_view_name: str,\n#     kg_relationships_view_name: str,\n#     kg_entity_view_name: str,\n# ) -> None:\n\n#     # Create ALLOWED_DOCS view\n#     allowed_docs_view = text(\n#         f\"\"\"\n#     CREATE OR REPLACE VIEW {allowed_docs_view_name} AS\n#     WITH kg_used_docs AS (\n#         SELECT document_id as kg_used_doc_id\n#         FROM \"{tenant_id}\".kg_entity d\n#         WHERE document_id IS NOT NULL\n#     ),\n\n#     base_public_docs AS (\n#         SELECT d.id as allowed_doc_id\n#         FROM \"{tenant_id}\".document d\n#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id\n#         WHERE d.is_public\n#     ),\n#     user_owned_and_public_docs AS (\n#         SELECT d.id as allowed_doc_id\n#         FROM \"{tenant_id}\".document_by_connector_credential_pair d\n#         JOIN \"{tenant_id}\".credential c ON d.credential_id = c.id\n#         JOIN \"{tenant_id}\".connector_credential_pair ccp ON\n#             d.connector_id = ccp.connector_id AND\n#             d.credential_id = ccp.credential_id\n#         JOIN \"{tenant_id}\".user u ON c.user_id = u.id\n#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id\n#         WHERE ccp.status != 'DELETING'\n#         AND ccp.access_type != 'SYNC'\n#         AND (u.email = :user_email or ccp.access_type::text = 'PUBLIC')\n#     ),\n#     user_group_accessible_docs AS (\n#         SELECT d.id as allowed_doc_id\n#         FROM \"{tenant_id}\".document_by_connector_credential_pair d\n#         JOIN \"{tenant_id}\".connector_credential_pair ccp ON\n#             d.connector_id = ccp.connector_id AND\n#             d.credential_id = ccp.credential_id\n#         JOIN \"{tenant_id}\".user_group__connector_credential_pair ugccp ON\n#             ccp.id = ugccp.cc_pair_id\n#         JOIN \"{tenant_id}\".user__user_group uug ON\n#             uug.user_group_id = ugccp.user_group_id\n#         JOIN \"{tenant_id}\".user u ON uug.user_id = u.id\n#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id\n#         WHERE kud.kg_used_doc_id IS NOT NULL\n#         AND ccp.status != 'DELETING'\n#         AND ccp.access_type != 'SYNC'\n#         AND u.email = :user_email\n#     ),\n#     external_user_docs AS (\n#         SELECT d.id as allowed_doc_id\n#         FROM \"{tenant_id}\".document d\n#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id\n#         WHERE kud.kg_used_doc_id IS NOT NULL\n#         AND :user_email = ANY(external_user_emails)\n#     ),\n#     external_group_docs AS (\n#         SELECT d.id as allowed_doc_id\n#         FROM \"{tenant_id}\".document d\n#         INNER JOIN kg_used_docs kud ON kud.kg_used_doc_id = d.id\n#         JOIN \"{tenant_id}\".user__external_user_group_id ueg ON ueg.external_user_group_id = ANY(d.external_user_group_ids)\n#         JOIN \"{tenant_id}\".user u ON ueg.user_id = u.id\n#         WHERE kud.kg_used_doc_id IS NOT NULL\n#         AND u.email = :user_email\n#     )\n#     SELECT DISTINCT allowed_doc_id FROM (\n#         SELECT allowed_doc_id FROM base_public_docs\n#         UNION\n#         SELECT allowed_doc_id FROM user_owned_and_public_docs\n#         UNION\n#         SELECT allowed_doc_id FROM user_group_accessible_docs\n#         UNION\n#         SELECT allowed_doc_id FROM external_user_docs\n#         UNION\n#         SELECT allowed_doc_id FROM external_group_docs\n#     ) combined_docs\n#     \"\"\"\n#     ).bindparams(user_email=user_email)\n\n#     # Create the main view that uses ALLOWED_DOCS for Relationships\n#     kg_relationships_view = text(\n#         f\"\"\"\n#     CREATE OR REPLACE VIEW {kg_relationships_view_name} AS\n#     SELECT kgr.id_name as relationship,\n#            kgr.source_node as source_entity,\n#            kgr.target_node as target_entity,\n#            kgr.source_node_type as source_entity_type,\n#            kgr.target_node_type as target_entity_type,\n#            kgr.type as relationship_description,\n#            kgr.relationship_type_id_name as relationship_type,\n#            kgr.source_document as source_document,\n#            d.doc_updated_at as source_date,\n#            se.attributes as source_entity_attributes,\n#            te.attributes as target_entity_attributes\n#     FROM \"{tenant_id}\".kg_relationship kgr\n#     INNER JOIN {allowed_docs_view_name} AD on AD.allowed_doc_id = kgr.source_document\n#     JOIN \"{tenant_id}\".document d on d.id = kgr.source_document\n#     JOIN \"{tenant_id}\".kg_entity se on se.id_name = kgr.source_node\n#     JOIN \"{tenant_id}\".kg_entity te on te.id_name = kgr.target_node\n#     \"\"\"\n#     )\n\n#     # Create the main view that uses ALLOWED_DOCS for Entities\n#     kg_entity_view = text(\n#         f\"\"\"\n#     CREATE OR REPLACE VIEW {kg_entity_view_name} AS\n#     SELECT kge.id_name as entity,\n#            kge.entity_type_id_name as entity_type,\n#            kge.attributes as entity_attributes,\n#            kge.document_id as source_document,\n#            d.doc_updated_at as source_date\n#     FROM \"{tenant_id}\".kg_entity kge\n#     INNER JOIN {allowed_docs_view_name} AD on AD.allowed_doc_id = kge.document_id\n#     JOIN \"{tenant_id}\".document d on d.id = kge.document_id\n#     \"\"\"\n#     )\n\n#     # Execute the views using the session\n#     db_session.execute(allowed_docs_view)\n#     db_session.execute(kg_relationships_view)\n#     db_session.execute(kg_entity_view)\n\n#     # Grant permissions on view to readonly user\n\n#     db_session.execute(\n#         text(f\"GRANT SELECT ON {kg_relationships_view_name} TO {DB_READONLY_USER}\")\n#     )\n#     db_session.execute(\n#         text(f\"GRANT SELECT ON {kg_entity_view_name} TO {DB_READONLY_USER}\")\n#     )\n\n#     db_session.commit()\n\n#     return None\n\n\n# def drop_views(\n#     allowed_docs_view_name: str | None = None,\n#     kg_relationships_view_name: str | None = None,\n#     kg_entity_view_name: str | None = None,\n# ) -> None:\n#     \"\"\"\n#     Drops the temporary views created by create_views.\n\n#     Args:\n#         db_session: SQLAlchemy session\n#         allowed_docs_view_name: Name of the allowed_docs view\n#         kg_relationships_view_name: Name of the allowed kg_relationships view\n#         kg_entity_view_name: Name of the allowed kg_entity view\n#     \"\"\"\n\n#     with get_session_with_current_tenant() as db_drop_session:\n#         if kg_relationships_view_name:\n#             revoke_kg_relationships = text(\n#                 f\"REVOKE SELECT ON {kg_relationships_view_name} FROM {DB_READONLY_USER}\"\n#             )\n#             db_drop_session.execute(revoke_kg_relationships)\n#             drop_kg_relationships = text(\n#                 f\"DROP VIEW IF EXISTS {kg_relationships_view_name}\"\n#             )\n#             db_drop_session.execute(drop_kg_relationships)\n\n#         if kg_entity_view_name:\n#             revoke_kg_entities = text(\n#                 f\"REVOKE SELECT ON {kg_entity_view_name} FROM {DB_READONLY_USER}\"\n#             )\n#             db_drop_session.execute(revoke_kg_entities)\n#             drop_kg_entities = text(f\"DROP VIEW IF EXISTS {kg_entity_view_name}\")\n#             db_drop_session.execute(drop_kg_entities)\n\n#         if allowed_docs_view_name:\n#             drop_allowed_docs = text(f\"DROP VIEW IF EXISTS {allowed_docs_view_name}\")\n#             db_drop_session.execute(drop_allowed_docs)\n\n#         db_drop_session.commit()\n#     return None\n"
  },
  {
    "path": "backend/onyx/db/llm.py",
    "content": "from sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import ImageGenerationConfig\nfrom onyx.db.models import LLMModelFlow\nfrom onyx.db.models import LLMProvider as LLMProviderModel\nfrom onyx.db.models import LLMProvider__Persona\nfrom onyx.db.models import LLMProvider__UserGroup\nfrom onyx.db.models import ModelConfiguration\nfrom onyx.db.models import Persona\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.models import Tool as ToolModel\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.llm.utils import model_supports_image_input\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.server.manage.embedding.models import CloudEmbeddingProvider\nfrom onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import SyncModelEntry\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.enums import EmbeddingProvider\n\nlogger = setup_logger()\n\n\ndef update_group_llm_provider_relationships__no_commit(\n    llm_provider_id: int,\n    group_ids: list[int] | None,\n    db_session: Session,\n) -> None:\n    # Delete existing relationships\n    db_session.query(LLMProvider__UserGroup).filter(\n        LLMProvider__UserGroup.llm_provider_id == llm_provider_id\n    ).delete(synchronize_session=\"fetch\")\n\n    # Add new relationships from given group_ids\n    if group_ids:\n        new_relationships = [\n            LLMProvider__UserGroup(\n                llm_provider_id=llm_provider_id,\n                user_group_id=group_id,\n            )\n            for group_id in group_ids\n        ]\n        db_session.add_all(new_relationships)\n\n\ndef update_llm_provider_persona_relationships__no_commit(\n    db_session: Session,\n    llm_provider_id: int,\n    persona_ids: list[int] | None,\n) -> None:\n    \"\"\"Replace the persona restrictions for a provider within an open transaction.\"\"\"\n    db_session.execute(\n        delete(LLMProvider__Persona).where(\n            LLMProvider__Persona.llm_provider_id == llm_provider_id\n        )\n    )\n\n    if persona_ids:\n        db_session.add_all(\n            LLMProvider__Persona(\n                llm_provider_id=llm_provider_id,\n                persona_id=persona_id,\n            )\n            for persona_id in persona_ids\n        )\n\n\ndef fetch_user_group_ids(db_session: Session, user: User) -> set[int]:\n    \"\"\"Fetch the set of user group IDs for a given user.\n\n    Args:\n        db_session: Database session\n        user: User to fetch groups for\n\n    Returns:\n        Set of user group IDs. Empty set for anonymous users.\n    \"\"\"\n    if user.is_anonymous:\n        return set()\n\n    return set(\n        db_session.scalars(\n            select(User__UserGroup.user_group_id).where(\n                User__UserGroup.user_id == user.id\n            )\n        ).all()\n    )\n\n\ndef can_user_access_llm_provider(\n    provider: LLMProviderModel,\n    user_group_ids: set[int],\n    persona: Persona | None,\n    is_admin: bool = False,\n) -> bool:\n    \"\"\"Check if a user may use an LLM provider.\n\n    Args:\n        provider: The LLM provider to check access for\n        user_group_ids: Set of user group IDs the user belongs to\n        persona: The persona being used (if any)\n        is_admin: If True, bypass user group restrictions but still respect persona restrictions\n\n    Access logic:\n    - is_public controls USER access (group bypass): when True, all users can access\n      regardless of group membership. When False, user must be in a whitelisted group\n      (or be admin).\n    - Persona restrictions are ALWAYS enforced when set, regardless of is_public.\n      This allows admins to make a provider available to all users while still\n      restricting which personas (assistants) can use it.\n\n    Decision matrix:\n    1. is_public=True, no personas set → everyone has access\n    2. is_public=True, personas set → all users, but only whitelisted personas\n    3. is_public=False, groups+personas set → must satisfy BOTH (admins bypass groups)\n    4. is_public=False, only groups set → must be in group (admins bypass)\n    5. is_public=False, only personas set → must use whitelisted persona\n    6. is_public=False, neither set → admin-only (locked)\n    \"\"\"\n    provider_group_ids = {g.id for g in (provider.groups or [])}\n    provider_persona_ids = {p.id for p in (provider.personas or [])}\n    has_groups = bool(provider_group_ids)\n    has_personas = bool(provider_persona_ids)\n\n    # Persona restrictions are always enforced when set, regardless of is_public\n    if has_personas and not (persona and persona.id in provider_persona_ids):\n        return False\n\n    if provider.is_public:\n        return True\n\n    if has_groups:\n        return is_admin or bool(user_group_ids & provider_group_ids)\n\n    # No groups: either persona-whitelisted (already passed) or admin-only if locked\n    return has_personas or is_admin\n\n\ndef validate_persona_ids_exist(\n    db_session: Session, persona_ids: list[int]\n) -> tuple[set[int], list[int]]:\n    \"\"\"Validate that persona IDs exist in the database.\n\n    Returns:\n        Tuple of (fetched_persona_ids, missing_personas)\n    \"\"\"\n    fetched_persona_ids = set(\n        db_session.scalars(select(Persona.id).where(Persona.id.in_(persona_ids))).all()\n    )\n    missing_personas = sorted(set(persona_ids) - fetched_persona_ids)\n    return fetched_persona_ids, missing_personas\n\n\ndef get_personas_using_provider(\n    db_session: Session, provider_name: str\n) -> list[Persona]:\n    \"\"\"Get all non-deleted personas that use a specific LLM provider.\"\"\"\n    return list(\n        db_session.scalars(\n            select(Persona).where(\n                Persona.llm_model_provider_override == provider_name,\n                Persona.deleted == False,  # noqa: E712\n            )\n        ).all()\n    )\n\n\ndef fetch_persona_with_groups(db_session: Session, persona_id: int) -> Persona | None:\n    \"\"\"Fetch a persona with its groups eagerly loaded.\"\"\"\n    return db_session.scalar(\n        select(Persona)\n        .options(selectinload(Persona.groups))\n        .where(Persona.id == persona_id, Persona.deleted == False)  # noqa: E712\n    )\n\n\ndef upsert_cloud_embedding_provider(\n    db_session: Session, provider: CloudEmbeddingProviderCreationRequest\n) -> CloudEmbeddingProvider:\n    existing_provider = (\n        db_session.query(CloudEmbeddingProviderModel)\n        .filter_by(provider_type=provider.provider_type)\n        .first()\n    )\n    if existing_provider:\n        for key, value in provider.model_dump().items():\n            setattr(existing_provider, key, value)\n    else:\n        new_provider = CloudEmbeddingProviderModel(**provider.model_dump())\n\n        db_session.add(new_provider)\n        existing_provider = new_provider\n    db_session.commit()\n    db_session.refresh(existing_provider)\n    return CloudEmbeddingProvider.from_request(existing_provider)\n\n\ndef upsert_llm_provider(\n    llm_provider_upsert_request: LLMProviderUpsertRequest,\n    db_session: Session,\n) -> LLMProviderView:\n    existing_llm_provider: LLMProviderModel | None = None\n    if llm_provider_upsert_request.id:\n        existing_llm_provider = fetch_existing_llm_provider_by_id(\n            id=llm_provider_upsert_request.id, db_session=db_session\n        )\n        if not existing_llm_provider:\n            raise ValueError(\n                f\"LLM provider with id {llm_provider_upsert_request.id} not found\"\n            )\n\n        if existing_llm_provider.name != llm_provider_upsert_request.name:\n            raise ValueError(\n                f\"LLM provider with id {llm_provider_upsert_request.id} name change not allowed\"\n            )\n    else:\n        existing_llm_provider = fetch_existing_llm_provider(\n            name=llm_provider_upsert_request.name, db_session=db_session\n        )\n        if existing_llm_provider:\n            raise ValueError(\n                f\"LLM provider with name '{llm_provider_upsert_request.name}' already exists\"\n            )\n        existing_llm_provider = LLMProviderModel(name=llm_provider_upsert_request.name)\n        db_session.add(existing_llm_provider)\n\n    # Filter out empty strings and None values from custom_config to allow\n    # providers like Bedrock to fall back to IAM roles when credentials are not provided\n    custom_config = llm_provider_upsert_request.custom_config\n    if custom_config:\n        custom_config = {\n            k: v for k, v in custom_config.items() if v is not None and v.strip() != \"\"\n        }\n        # Set to None if the dict is empty after filtering\n        custom_config = custom_config or None\n\n    api_base = llm_provider_upsert_request.api_base or None\n    existing_llm_provider.provider = llm_provider_upsert_request.provider\n    # EncryptedString accepts str for writes, returns SensitiveValue for reads\n    existing_llm_provider.api_key = llm_provider_upsert_request.api_key  # type: ignore[assignment]\n    existing_llm_provider.api_base = api_base\n    existing_llm_provider.api_version = llm_provider_upsert_request.api_version\n    existing_llm_provider.custom_config = custom_config\n\n    existing_llm_provider.is_public = llm_provider_upsert_request.is_public\n    existing_llm_provider.is_auto_mode = llm_provider_upsert_request.is_auto_mode\n    existing_llm_provider.deployment_name = llm_provider_upsert_request.deployment_name\n\n    if not existing_llm_provider.id:\n        # If its not already in the db, we need to generate an ID by flushing\n        db_session.flush()\n\n    # Build a lookup of existing model configurations by name (single iteration)\n    existing_by_name = {\n        mc.name: mc for mc in existing_llm_provider.model_configurations\n    }\n\n    models_to_exist = {\n        mc.name for mc in llm_provider_upsert_request.model_configurations\n    }\n\n    # Build a lookup of requested visibility by model name\n    requested_visibility = {\n        mc.name: mc.is_visible\n        for mc in llm_provider_upsert_request.model_configurations\n    }\n\n    # Delete removed models\n    removed_ids = [\n        mc.id for name, mc in existing_by_name.items() if name not in models_to_exist\n    ]\n\n    default_model = fetch_default_llm_model(db_session)\n\n    # Prevent removing and hiding the default model\n    if default_model:\n        for name, mc in existing_by_name.items():\n            if mc.id == default_model.id:\n                if default_model.id in removed_ids:\n                    raise ValueError(\n                        f\"Cannot remove the default model '{name}'. Please change the default model before removing.\"\n                    )\n                if not requested_visibility.get(name, True):\n                    raise ValueError(\n                        f\"Cannot hide the default model '{name}'. Please change the default model before hiding.\"\n                    )\n                break\n\n    if removed_ids:\n        db_session.query(ModelConfiguration).filter(\n            ModelConfiguration.id.in_(removed_ids)\n        ).delete(synchronize_session=\"fetch\")\n        db_session.flush()\n\n    # Import here to avoid circular imports\n    from onyx.llm.utils import get_max_input_tokens\n\n    for model_config in llm_provider_upsert_request.model_configurations:\n        max_input_tokens = model_config.max_input_tokens\n        if max_input_tokens is None:\n            max_input_tokens = get_max_input_tokens(\n                model_name=model_config.name,\n                model_provider=llm_provider_upsert_request.provider,\n            )\n\n        supported_flows = [LLMModelFlowType.CHAT]\n        if model_config.supports_image_input:\n            supported_flows.append(LLMModelFlowType.VISION)\n\n        existing = existing_by_name.get(model_config.name)\n        if existing:\n            update_model_configuration__no_commit(\n                db_session=db_session,\n                model_configuration_id=existing.id,\n                supported_flows=supported_flows,\n                is_visible=model_config.is_visible,\n                max_input_tokens=max_input_tokens,\n                display_name=model_config.display_name,\n            )\n        else:\n            insert_new_model_configuration__no_commit(\n                db_session=db_session,\n                llm_provider_id=existing_llm_provider.id,\n                model_name=model_config.name,\n                supported_flows=supported_flows,\n                is_visible=model_config.is_visible,\n                max_input_tokens=max_input_tokens,\n                display_name=model_config.display_name,\n            )\n\n    # Make sure the relationship table stays up to date\n    update_group_llm_provider_relationships__no_commit(\n        llm_provider_id=existing_llm_provider.id,\n        group_ids=llm_provider_upsert_request.groups,\n        db_session=db_session,\n    )\n    update_llm_provider_persona_relationships__no_commit(\n        db_session=db_session,\n        llm_provider_id=existing_llm_provider.id,\n        persona_ids=llm_provider_upsert_request.personas,\n    )\n\n    db_session.flush()\n    db_session.refresh(existing_llm_provider)\n\n    try:\n        db_session.commit()\n    except Exception as e:\n        db_session.rollback()\n        raise ValueError(f\"Failed to save LLM provider: {str(e)}\") from e\n\n    full_llm_provider = LLMProviderView.from_model(existing_llm_provider)\n    return full_llm_provider\n\n\ndef sync_model_configurations(\n    db_session: Session,\n    provider_name: str,\n    models: list[SyncModelEntry],\n) -> int:\n    \"\"\"Sync model configurations for a dynamic provider (OpenRouter, Bedrock, Ollama, etc.).\n\n    This inserts NEW models from the source API without overwriting existing ones.\n    User preferences (is_visible, max_input_tokens) are preserved for existing models.\n\n    Args:\n        db_session: Database session\n        provider_name: Name of the LLM provider\n        models: List of SyncModelEntry objects describing the fetched models\n\n    Returns:\n        Number of new models added\n    \"\"\"\n    provider = fetch_existing_llm_provider(name=provider_name, db_session=db_session)\n    if not provider:\n        raise ValueError(f\"LLM Provider '{provider_name}' not found\")\n\n    # Get existing model names to count new additions\n    existing_names = {mc.name for mc in provider.model_configurations}\n\n    new_count = 0\n    for model in models:\n        if model.name not in existing_names:\n            # Insert new model with is_visible=False (user must explicitly enable)\n            supported_flows = [LLMModelFlowType.CHAT]\n            if model.supports_image_input:\n                supported_flows.append(LLMModelFlowType.VISION)\n\n            insert_new_model_configuration__no_commit(\n                db_session=db_session,\n                llm_provider_id=provider.id,\n                model_name=model.name,\n                supported_flows=supported_flows,\n                is_visible=False,\n                max_input_tokens=model.max_input_tokens,\n                display_name=model.display_name,\n            )\n            new_count += 1\n\n    if new_count > 0:\n        db_session.commit()\n\n    return new_count\n\n\ndef fetch_existing_embedding_providers(\n    db_session: Session,\n) -> list[CloudEmbeddingProviderModel]:\n    return list(db_session.scalars(select(CloudEmbeddingProviderModel)).all())\n\n\ndef fetch_existing_doc_sets(\n    db_session: Session, doc_ids: list[int]\n) -> list[DocumentSet]:\n    return list(\n        db_session.scalars(select(DocumentSet).where(DocumentSet.id.in_(doc_ids))).all()\n    )\n\n\ndef fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolModel]:\n    return list(\n        db_session.scalars(select(ToolModel).where(ToolModel.id.in_(tool_ids))).all()\n    )\n\n\ndef fetch_existing_models(\n    db_session: Session,\n    flow_types: list[LLMModelFlowType],\n) -> list[ModelConfiguration]:\n    models = (\n        select(ModelConfiguration)\n        .join(LLMModelFlow)\n        .where(LLMModelFlow.llm_model_flow_type.in_(flow_types))\n        .options(\n            selectinload(ModelConfiguration.llm_provider),\n            selectinload(ModelConfiguration.llm_model_flows),\n        )\n    )\n\n    return list(db_session.scalars(models).all())\n\n\ndef fetch_existing_llm_providers(\n    db_session: Session,\n    flow_type_filter: list[LLMModelFlowType],\n    only_public: bool = False,\n    exclude_image_generation_providers: bool = True,\n) -> list[LLMProviderModel]:\n    \"\"\"Fetch all LLM providers with optional filtering.\n\n    Args:\n        db_session: Database session\n        flow_type_filter: List of flow types to filter by, empty list for no filter\n        only_public: If True, only return public providers\n        exclude_image_generation_providers: If True, exclude providers that are\n            used for image generation configs\n    \"\"\"\n    stmt = select(LLMProviderModel)\n\n    if flow_type_filter:\n        providers_with_flows = (\n            select(ModelConfiguration.llm_provider_id)\n            .join(LLMModelFlow)\n            .where(LLMModelFlow.llm_model_flow_type.in_(flow_type_filter))\n            .distinct()\n        )\n        stmt = stmt.where(LLMProviderModel.id.in_(providers_with_flows))\n\n    if exclude_image_generation_providers:\n        image_gen_provider_ids = select(ModelConfiguration.llm_provider_id).join(\n            ImageGenerationConfig\n        )\n        stmt = stmt.where(~LLMProviderModel.id.in_(image_gen_provider_ids))\n\n    stmt = stmt.options(\n        selectinload(LLMProviderModel.model_configurations),\n        selectinload(LLMProviderModel.groups),\n        selectinload(LLMProviderModel.personas),\n    )\n\n    providers = list(db_session.scalars(stmt).all())\n    if only_public:\n        return [provider for provider in providers if provider.is_public]\n    return providers\n\n\ndef fetch_existing_llm_provider(\n    name: str, db_session: Session\n) -> LLMProviderModel | None:\n    provider_model = db_session.scalar(\n        select(LLMProviderModel)\n        .where(LLMProviderModel.name == name)\n        .options(\n            selectinload(LLMProviderModel.model_configurations),\n            selectinload(LLMProviderModel.groups),\n            selectinload(LLMProviderModel.personas),\n        )\n    )\n\n    return provider_model\n\n\ndef fetch_existing_llm_provider_by_id(\n    id: int, db_session: Session\n) -> LLMProviderModel | None:\n    provider_model = db_session.scalar(\n        select(LLMProviderModel)\n        .where(LLMProviderModel.id == id)\n        .options(\n            selectinload(LLMProviderModel.model_configurations),\n            selectinload(LLMProviderModel.groups),\n            selectinload(LLMProviderModel.personas),\n        )\n    )\n\n    return provider_model\n\n\ndef fetch_embedding_provider(\n    db_session: Session, provider_type: EmbeddingProvider\n) -> CloudEmbeddingProviderModel | None:\n    return db_session.scalar(\n        select(CloudEmbeddingProviderModel).where(\n            CloudEmbeddingProviderModel.provider_type == provider_type\n        )\n    )\n\n\ndef fetch_default_llm_model(db_session: Session) -> ModelConfiguration | None:\n    return fetch_default_model(db_session, LLMModelFlowType.CHAT)\n\n\ndef fetch_default_vision_model(db_session: Session) -> ModelConfiguration | None:\n    return fetch_default_model(db_session, LLMModelFlowType.VISION)\n\n\ndef fetch_default_contextual_rag_model(\n    db_session: Session,\n) -> ModelConfiguration | None:\n    return fetch_default_model(db_session, LLMModelFlowType.CONTEXTUAL_RAG)\n\n\ndef fetch_default_model(\n    db_session: Session,\n    flow_type: LLMModelFlowType,\n) -> ModelConfiguration | None:\n    model_config = db_session.scalar(\n        select(ModelConfiguration)\n        .options(selectinload(ModelConfiguration.llm_provider))\n        .join(LLMModelFlow)\n        .where(\n            LLMModelFlow.llm_model_flow_type == flow_type,\n            LLMModelFlow.is_default == True,  # noqa: E712\n        )\n    )\n\n    return model_config\n\n\ndef fetch_llm_provider_view(\n    db_session: Session, provider_name: str\n) -> LLMProviderView | None:\n    provider_model = fetch_existing_llm_provider(\n        name=provider_name, db_session=db_session\n    )\n    if not provider_model:\n        return None\n    return LLMProviderView.from_model(provider_model)\n\n\ndef remove_embedding_provider(\n    db_session: Session, provider_type: EmbeddingProvider\n) -> None:\n    db_session.execute(\n        delete(SearchSettings).where(SearchSettings.provider_type == provider_type)\n    )\n\n    # Delete the embedding provider\n    db_session.execute(\n        delete(CloudEmbeddingProviderModel).where(\n            CloudEmbeddingProviderModel.provider_type == provider_type\n        )\n    )\n\n    db_session.commit()\n\n\ndef remove_llm_provider(db_session: Session, provider_id: int) -> None:\n    provider = db_session.get(LLMProviderModel, provider_id)\n    if not provider:\n        raise ValueError(\"LLM Provider not found\")\n\n    # Clear the provider override from any personas using it\n    # This causes them to fall back to the default provider\n    personas_using_provider = get_personas_using_provider(db_session, provider.name)\n    for persona in personas_using_provider:\n        persona.llm_model_provider_override = None\n\n    db_session.execute(\n        delete(LLMProvider__UserGroup).where(\n            LLMProvider__UserGroup.llm_provider_id == provider_id\n        )\n    )\n    # Remove LLMProvider\n    db_session.execute(\n        delete(LLMProviderModel).where(LLMProviderModel.id == provider_id)\n    )\n    db_session.commit()\n\n\ndef remove_llm_provider__no_commit(db_session: Session, provider_id: int) -> None:\n    \"\"\"Remove LLM provider.\"\"\"\n    provider = db_session.get(LLMProviderModel, provider_id)\n    if not provider:\n        raise ValueError(\"LLM Provider not found\")\n\n    # Clear the provider override from any personas using it\n    # This causes them to fall back to the default provider\n    personas_using_provider = get_personas_using_provider(db_session, provider.name)\n    for persona in personas_using_provider:\n        persona.llm_model_provider_override = None\n\n    db_session.execute(\n        delete(LLMProvider__UserGroup).where(\n            LLMProvider__UserGroup.llm_provider_id == provider_id\n        )\n    )\n    # Remove LLMProvider\n    db_session.execute(\n        delete(LLMProviderModel).where(LLMProviderModel.id == provider_id)\n    )\n    db_session.flush()\n\n\ndef update_default_provider(\n    provider_id: int, model_name: str, db_session: Session\n) -> None:\n    _update_default_model(\n        db_session,\n        provider_id,\n        model_name,\n        LLMModelFlowType.CHAT,\n    )\n\n\ndef update_default_vision_provider(\n    provider_id: int, vision_model: str, db_session: Session\n) -> None:\n    provider = db_session.scalar(\n        select(LLMProviderModel).where(\n            LLMProviderModel.id == provider_id,\n        )\n    )\n\n    if provider is None:\n        raise ValueError(f\"LLM Provider with id={provider_id} does not exist\")\n\n    if not model_supports_image_input(vision_model, provider.provider):\n        raise ValueError(\n            f\"Model '{vision_model}' for provider '{provider.provider} does not support image input\"\n        )\n\n    _update_default_model(\n        db_session=db_session,\n        provider_id=provider_id,\n        model=vision_model,\n        flow_type=LLMModelFlowType.VISION,\n    )\n\n\ndef update_no_default_contextual_rag_provider(\n    db_session: Session,\n) -> None:\n    db_session.execute(\n        update(LLMModelFlow)\n        .where(\n            LLMModelFlow.llm_model_flow_type == LLMModelFlowType.CONTEXTUAL_RAG,\n            LLMModelFlow.is_default == True,  # noqa: E712\n        )\n        .values(is_default=False)\n    )\n    db_session.commit()\n\n\ndef update_default_contextual_model(\n    db_session: Session,\n    enable_contextual_rag: bool,\n    contextual_rag_llm_provider: str | None,\n    contextual_rag_llm_name: str | None,\n) -> None:\n    \"\"\"Sets or clears the default contextual RAG model.\n\n    Should be called whenever the PRESENT search settings change\n    (e.g. inline update or FUTURE → PRESENT swap).\n    \"\"\"\n    if (\n        not enable_contextual_rag\n        or not contextual_rag_llm_name\n        or not contextual_rag_llm_provider\n    ):\n        update_no_default_contextual_rag_provider(db_session=db_session)\n        return\n\n    provider = fetch_existing_llm_provider(\n        name=contextual_rag_llm_provider, db_session=db_session\n    )\n    if not provider:\n        raise ValueError(f\"Provider '{contextual_rag_llm_provider}' not found\")\n\n    model_config = next(\n        (\n            mc\n            for mc in provider.model_configurations\n            if mc.name == contextual_rag_llm_name\n        ),\n        None,\n    )\n    if not model_config:\n        raise ValueError(\n            f\"Model '{contextual_rag_llm_name}' not found for provider '{contextual_rag_llm_provider}'\"\n        )\n\n    add_model_to_flow(\n        db_session=db_session,\n        model_configuration_id=model_config.id,\n        flow_type=LLMModelFlowType.CONTEXTUAL_RAG,\n    )\n    _update_default_model(\n        db_session=db_session,\n        provider_id=provider.id,\n        model=contextual_rag_llm_name,\n        flow_type=LLMModelFlowType.CONTEXTUAL_RAG,\n    )\n\n    return\n\n\ndef fetch_auto_mode_providers(db_session: Session) -> list[LLMProviderModel]:\n    \"\"\"Fetch all LLM providers that are in Auto mode.\"\"\"\n    query = (\n        select(LLMProviderModel)\n        .where(LLMProviderModel.is_auto_mode.is_(True))\n        .options(selectinload(LLMProviderModel.model_configurations))\n    )\n    return list(db_session.scalars(query).all())\n\n\ndef sync_auto_mode_models(\n    db_session: Session,\n    provider: LLMProviderModel,\n    llm_recommendations: LLMRecommendations,\n) -> int:\n    \"\"\"Sync models from GitHub config to a provider in Auto mode.\n\n    In Auto mode, the model list and default are controlled by GitHub config.\n    The schema has:\n    - default_model: The default model config (always visible)\n    - additional_visible_models: List of additional visible models\n\n    Admin only provides API credentials.\n\n    Args:\n        db_session: Database session\n        provider: LLM provider in Auto mode\n        github_config: Configuration from GitHub\n\n    Returns:\n        The number of changes made.\n    \"\"\"\n    changes = 0\n\n    # Build the list of all visible models from the config\n    # All models in the config are visible (default + additional_visible_models)\n    recommended_visible_models = llm_recommendations.get_visible_models(\n        provider.provider\n    )\n    recommended_visible_model_names = [\n        model.name for model in recommended_visible_models\n    ]\n\n    # Get existing models\n    existing_models: dict[str, ModelConfiguration] = {\n        mc.name: mc\n        for mc in db_session.scalars(\n            select(ModelConfiguration).where(\n                ModelConfiguration.llm_provider_id == provider.id\n            )\n        ).all()\n    }\n\n    # Mark models that are no longer in GitHub config as not visible\n    for model_name, model in existing_models.items():\n        if model_name not in recommended_visible_model_names:\n            if model.is_visible:\n                model.is_visible = False\n                changes += 1\n\n    # Add or update models from GitHub config\n    for model_config in recommended_visible_models:\n        if model_config.name in existing_models:\n            # Update existing model\n            existing = existing_models[model_config.name]\n            # Check each field for changes\n            updated = False\n            if existing.display_name != model_config.display_name:\n                existing.display_name = model_config.display_name\n                updated = True\n            # All models in the config are visible\n            if not existing.is_visible:\n                existing.is_visible = True\n                updated = True\n            if updated:\n                changes += 1\n        else:\n            # Add new model - all models from GitHub config are visible\n            insert_new_model_configuration__no_commit(\n                db_session=db_session,\n                llm_provider_id=provider.id,\n                model_name=model_config.name,\n                supported_flows=[LLMModelFlowType.CHAT],\n                is_visible=True,\n                max_input_tokens=None,\n                display_name=model_config.display_name,\n            )\n            changes += 1\n\n    # Update the default if this provider currently holds the global CHAT default.\n    # We flush (but don't commit) so that _update_default_model can see the new\n    # model rows, then commit everything atomically to avoid a window where the\n    # old default is invisible but still pointed-to.\n    db_session.flush()\n\n    recommended_default = llm_recommendations.get_default_model(provider.provider)\n    if recommended_default:\n        current_default = fetch_default_llm_model(db_session)\n\n        if (\n            current_default\n            and current_default.llm_provider_id == provider.id\n            and current_default.name != recommended_default.name\n        ):\n            _update_default_model__no_commit(\n                db_session=db_session,\n                provider_id=provider.id,\n                model=recommended_default.name,\n                flow_type=LLMModelFlowType.CHAT,\n            )\n            changes += 1\n\n    db_session.commit()\n    return changes\n\n\ndef create_new_flow_mapping__no_commit(\n    db_session: Session,\n    model_configuration_id: int,\n    flow_type: LLMModelFlowType,\n) -> LLMModelFlow:\n    result = db_session.execute(\n        insert(LLMModelFlow)\n        .values(\n            model_configuration_id=model_configuration_id,\n            llm_model_flow_type=flow_type,\n            is_default=False,\n        )\n        .on_conflict_do_nothing()\n        .returning(LLMModelFlow)\n    )\n\n    flow = result.scalar()\n    if not flow:\n        # Row already exists — fetch it\n        flow = db_session.scalar(\n            select(LLMModelFlow).where(\n                LLMModelFlow.model_configuration_id == model_configuration_id,\n                LLMModelFlow.llm_model_flow_type == flow_type,\n            )\n        )\n    if not flow:\n        raise ValueError(\n            f\"Failed to create or find flow mapping for model_configuration_id={model_configuration_id} and flow_type={flow_type}\"\n        )\n\n    return flow\n\n\ndef insert_new_model_configuration__no_commit(\n    db_session: Session,\n    llm_provider_id: int,\n    model_name: str,\n    supported_flows: list[LLMModelFlowType],\n    is_visible: bool,\n    max_input_tokens: int | None,\n    display_name: str | None,\n) -> int | None:\n    result = db_session.execute(\n        insert(ModelConfiguration)\n        .values(\n            llm_provider_id=llm_provider_id,\n            name=model_name,\n            is_visible=is_visible,\n            max_input_tokens=max_input_tokens,\n            display_name=display_name,\n            supports_image_input=LLMModelFlowType.VISION in supported_flows,\n        )\n        .on_conflict_do_nothing()\n        .returning(ModelConfiguration.id)\n    )\n\n    model_config_id = result.scalar()\n\n    if not model_config_id:\n        return None\n\n    for flow_type in supported_flows:\n        create_new_flow_mapping__no_commit(\n            db_session=db_session,\n            model_configuration_id=model_config_id,\n            flow_type=flow_type,\n        )\n\n    return model_config_id\n\n\ndef update_model_configuration__no_commit(\n    db_session: Session,\n    model_configuration_id: int,\n    supported_flows: list[LLMModelFlowType],\n    is_visible: bool,\n    max_input_tokens: int | None,\n    display_name: str | None,\n) -> None:\n    result = db_session.execute(\n        update(ModelConfiguration)\n        .values(\n            is_visible=is_visible,\n            max_input_tokens=max_input_tokens,\n            display_name=display_name,\n            supports_image_input=LLMModelFlowType.VISION in supported_flows,\n        )\n        .where(ModelConfiguration.id == model_configuration_id)\n        .returning(ModelConfiguration)\n    )\n\n    model_configuration = result.scalar()\n    if not model_configuration:\n        raise ValueError(\n            f\"Failed to update model configuration with id={model_configuration_id}\"\n        )\n\n    new_flows = {\n        flow_type\n        for flow_type in supported_flows\n        if flow_type not in model_configuration.llm_model_flow_types\n    }\n    removed_flows = {\n        flow_type\n        for flow_type in model_configuration.llm_model_flow_types\n        if flow_type not in supported_flows\n    }\n\n    for flow_type in new_flows:\n        create_new_flow_mapping__no_commit(\n            db_session=db_session,\n            model_configuration_id=model_configuration_id,\n            flow_type=flow_type,\n        )\n\n    for flow_type in removed_flows:\n        db_session.execute(\n            delete(LLMModelFlow).where(\n                LLMModelFlow.model_configuration_id == model_configuration_id,\n                LLMModelFlow.llm_model_flow_type == flow_type,\n            )\n        )\n\n    db_session.flush()\n\n\ndef _update_default_model__no_commit(\n    db_session: Session,\n    provider_id: int,\n    model: str,\n    flow_type: LLMModelFlowType,\n) -> None:\n    result = db_session.execute(\n        select(ModelConfiguration, LLMModelFlow)\n        .join(\n            LLMModelFlow, LLMModelFlow.model_configuration_id == ModelConfiguration.id\n        )\n        .where(\n            ModelConfiguration.llm_provider_id == provider_id,\n            ModelConfiguration.name == model,\n            LLMModelFlow.llm_model_flow_type == flow_type,\n        )\n    ).first()\n\n    if not result:\n        raise ValueError(\n            f\"Model '{model}' is not a valid model for provider_id={provider_id}\"\n        )\n\n    model_config, new_default = result\n\n    # Clear existing default and set in an atomic operation\n    db_session.execute(\n        update(LLMModelFlow)\n        .where(\n            LLMModelFlow.llm_model_flow_type == flow_type,\n            LLMModelFlow.is_default == True,  # noqa: E712\n        )\n        .values(is_default=False)\n    )\n\n    new_default.is_default = True\n    model_config.is_visible = True\n\n\ndef _update_default_model(\n    db_session: Session,\n    provider_id: int,\n    model: str,\n    flow_type: LLMModelFlowType,\n) -> None:\n    _update_default_model__no_commit(db_session, provider_id, model, flow_type)\n    db_session.commit()\n\n\ndef add_model_to_flow(\n    db_session: Session,\n    model_configuration_id: int,\n    flow_type: LLMModelFlowType,\n) -> None:\n    # Function does nothing on conflict\n    create_new_flow_mapping__no_commit(\n        db_session=db_session,\n        model_configuration_id=model_configuration_id,\n        flow_type=flow_type,\n    )\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/mcp.py",
    "content": "import datetime\nfrom typing import cast\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.orm.attributes import flag_modified\n\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPServerStatus\nfrom onyx.db.enums import MCPTransport\nfrom onyx.db.models import MCPAuthenticationType\nfrom onyx.db.models import MCPConnectionConfig\nfrom onyx.db.models import MCPServer\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.server.features.mcp.models import MCPConnectionData\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.sensitive import SensitiveValue\n\nlogger = setup_logger()\n\n\n# MCPServer operations\ndef get_all_mcp_servers(db_session: Session) -> list[MCPServer]:\n    \"\"\"Get all MCP servers\"\"\"\n    return list(\n        db_session.scalars(select(MCPServer).order_by(MCPServer.created_at)).all()\n    )\n\n\ndef get_mcp_server_by_id(server_id: int, db_session: Session) -> MCPServer:\n    \"\"\"Get MCP server by ID\"\"\"\n    server = db_session.scalar(select(MCPServer).where(MCPServer.id == server_id))\n    if not server:\n        raise ValueError(\"MCP server by specified id does not exist\")\n    return server\n\n\ndef get_mcp_servers_by_owner(owner_email: str, db_session: Session) -> list[MCPServer]:\n    \"\"\"Get all MCP servers owned by a specific user\"\"\"\n    return list(\n        db_session.scalars(\n            select(MCPServer).where(MCPServer.owner == owner_email)\n        ).all()\n    )\n\n\ndef get_mcp_servers_for_persona(\n    persona_id: int,\n    db_session: Session,\n    user: User,  # noqa: ARG001\n) -> list[MCPServer]:\n    \"\"\"Get all MCP servers associated with a persona via its tools\"\"\"\n    # Get the persona and its tools\n    persona = db_session.query(Persona).filter(Persona.id == persona_id).first()\n    if not persona:\n        return []\n\n    # Collect unique MCP server IDs from the persona's tools\n    mcp_server_ids = set()\n    for tool in persona.tools:\n        if tool.mcp_server_id:\n            mcp_server_ids.add(tool.mcp_server_id)\n\n    if not mcp_server_ids:\n        return []\n\n    # Fetch the MCP servers\n    mcp_servers = (\n        db_session.query(MCPServer).filter(MCPServer.id.in_(mcp_server_ids)).all()\n    )\n\n    return list(mcp_servers)\n\n\ndef get_mcp_servers_accessible_to_user(\n    user_id: UUID, db_session: Session\n) -> list[MCPServer]:\n    \"\"\"Get all MCP servers accessible to a user (directly or through groups)\"\"\"\n    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore\n    if not user:\n        return []\n    user = cast(User, user)\n    # Get servers accessible directly to user\n    user_servers = list(user.accessible_mcp_servers)\n\n    # TODO: Add group-based access once relationships are fully implemented\n    # For now, just return direct user access\n    return user_servers\n\n\ndef create_mcp_server__no_commit(\n    owner_email: str,\n    name: str,\n    description: str | None,\n    server_url: str,\n    auth_type: MCPAuthenticationType | None,\n    transport: MCPTransport | None,\n    auth_performer: MCPAuthenticationPerformer | None,\n    db_session: Session,\n    admin_connection_config_id: int | None = None,\n) -> MCPServer:\n    \"\"\"Create a new MCP server\"\"\"\n    new_server = MCPServer(\n        owner=owner_email,\n        name=name,\n        description=description,\n        server_url=server_url,\n        transport=transport,\n        auth_type=auth_type,\n        auth_performer=auth_performer,\n        admin_connection_config_id=admin_connection_config_id,\n    )\n    db_session.add(new_server)\n    db_session.flush()  # Get the ID without committing\n    return new_server\n\n\ndef update_mcp_server__no_commit(\n    server_id: int,\n    db_session: Session,\n    name: str | None = None,\n    description: str | None = None,\n    server_url: str | None = None,\n    auth_type: MCPAuthenticationType | None = None,\n    admin_connection_config_id: int | None = None,\n    auth_performer: MCPAuthenticationPerformer | None = None,\n    transport: MCPTransport | None = None,\n    status: MCPServerStatus | None = None,\n    last_refreshed_at: datetime.datetime | None = None,\n) -> MCPServer:\n    \"\"\"Update an existing MCP server\"\"\"\n    server = get_mcp_server_by_id(server_id, db_session)\n\n    if name is not None:\n        server.name = name\n    if description is not None:\n        server.description = description\n    if server_url is not None:\n        server.server_url = server_url\n    if auth_type is not None:\n        server.auth_type = auth_type\n    if admin_connection_config_id is not None:\n        server.admin_connection_config_id = admin_connection_config_id\n    if auth_performer is not None:\n        server.auth_performer = auth_performer\n    if transport is not None:\n        server.transport = transport\n    if status is not None:\n        server.status = status\n    if last_refreshed_at is not None:\n        server.last_refreshed_at = last_refreshed_at\n\n    db_session.flush()  # Don't commit yet, let caller decide when to commit\n    return server\n\n\ndef delete_mcp_server(server_id: int, db_session: Session) -> None:\n    \"\"\"Delete an MCP server and all associated tools (via CASCADE)\"\"\"\n    server = get_mcp_server_by_id(server_id, db_session)\n\n    # Count tools that will be deleted\n    tools_count = db_session.query(Tool).filter(Tool.mcp_server_id == server_id).count()\n    logger.info(f\"Deleting MCP server {server_id} with {tools_count} associated tools\")\n\n    db_session.delete(server)\n    db_session.commit()\n\n    logger.info(f\"Successfully deleted MCP server {server_id} and its tools\")\n\n\ndef get_all_mcp_tools_for_server(server_id: int, db_session: Session) -> list[Tool]:\n    \"\"\"Get all MCP tools for a server\"\"\"\n    return list(\n        db_session.scalars(select(Tool).where(Tool.mcp_server_id == server_id)).all()\n    )\n\n\ndef add_user_to_mcp_server(server_id: int, user_id: UUID, db_session: Session) -> None:\n    \"\"\"Grant a user access to an MCP server\"\"\"\n    server = get_mcp_server_by_id(server_id, db_session)\n    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore\n    if not user:\n        raise ValueError(\"User not found\")\n\n    if user not in server.users:\n        server.users.append(user)\n        db_session.commit()\n\n\ndef remove_user_from_mcp_server(\n    server_id: int, user_id: UUID, db_session: Session\n) -> None:\n    \"\"\"Remove a user's access to an MCP server\"\"\"\n    server = get_mcp_server_by_id(server_id, db_session)\n    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore\n    if not user:\n        raise ValueError(\"User not found\")\n\n    if user in server.users:\n        server.users.remove(user)\n        db_session.commit()\n\n\n# MCPConnectionConfig operations\ndef extract_connection_data(\n    config: MCPConnectionConfig | None, apply_mask: bool = False\n) -> MCPConnectionData:\n    \"\"\"Extract MCPConnectionData from a connection config, with proper typing.\n\n    This helper encapsulates the cast from the JSON column's dict[str, Any]\n    to the typed MCPConnectionData structure.\n    \"\"\"\n    if config is None or config.config is None:\n        return MCPConnectionData(headers={})\n    if isinstance(config.config, SensitiveValue):\n        return cast(MCPConnectionData, config.config.get_value(apply_mask=apply_mask))\n    return cast(MCPConnectionData, config.config)\n\n\ndef get_connection_config_by_id(\n    config_id: int, db_session: Session\n) -> MCPConnectionConfig:\n    \"\"\"Get connection config by ID\"\"\"\n    config = db_session.scalar(\n        select(MCPConnectionConfig).where(MCPConnectionConfig.id == config_id)\n    )\n    if not config:\n        raise ValueError(\"Connection config by specified id does not exist\")\n    return config\n\n\ndef get_user_connection_config(\n    server_id: int, user_email: str, db_session: Session\n) -> MCPConnectionConfig | None:\n    \"\"\"Get a user's connection config for a specific MCP server\"\"\"\n    return db_session.scalar(\n        select(MCPConnectionConfig).where(\n            and_(\n                MCPConnectionConfig.mcp_server_id == server_id,\n                MCPConnectionConfig.user_email == user_email,\n            )\n        )\n    )\n\n\ndef get_user_connection_configs_for_server(\n    server_id: int, db_session: Session\n) -> list[MCPConnectionConfig]:\n    \"\"\"Get all user connection configs for a specific MCP server\"\"\"\n    return list(\n        db_session.scalars(\n            select(MCPConnectionConfig).where(\n                MCPConnectionConfig.mcp_server_id == server_id\n            )\n        ).all()\n    )\n\n\ndef create_connection_config(\n    config_data: MCPConnectionData,\n    db_session: Session,\n    mcp_server_id: int | None = None,\n    user_email: str = \"\",\n) -> MCPConnectionConfig:\n    \"\"\"Create a new connection config\"\"\"\n    new_config = MCPConnectionConfig(\n        mcp_server_id=mcp_server_id,\n        user_email=user_email,\n        config=config_data,\n    )\n    db_session.add(new_config)\n    db_session.flush()  # Don't commit yet, let caller decide when to commit\n    return new_config\n\n\ndef update_connection_config(\n    config_id: int,\n    db_session: Session,\n    config_data: MCPConnectionData | None = None,\n) -> MCPConnectionConfig:\n    \"\"\"Update an existing connection config\"\"\"\n    config = get_connection_config_by_id(config_id, db_session)\n\n    if config_data is not None:\n        config.config = config_data  # type: ignore[assignment]\n        # Force SQLAlchemy to detect the change by marking the field as modified\n        flag_modified(config, \"config\")\n\n    db_session.commit()\n    return config\n\n\ndef upsert_user_connection_config(\n    server_id: int,\n    user_email: str,\n    config_data: MCPConnectionData,\n    db_session: Session,\n) -> MCPConnectionConfig:\n    \"\"\"Create or update a user's connection config for an MCP server\"\"\"\n    existing_config = get_user_connection_config(server_id, user_email, db_session)\n\n    if existing_config:\n        existing_config.config = config_data  # type: ignore[assignment]\n        db_session.flush()  # Don't commit yet, let caller decide when to commit\n        return existing_config\n    else:\n        return create_connection_config(\n            config_data=config_data,\n            mcp_server_id=server_id,\n            user_email=user_email,\n            db_session=db_session,\n        )\n\n\n# TODO: do this in one db call\ndef get_server_auth_template(\n    server_id: int, db_session: Session\n) -> MCPConnectionConfig | None:\n    \"\"\"Get the authentication template for a server (from the admin connection config)\"\"\"\n    server = get_mcp_server_by_id(server_id, db_session)\n    if not server.admin_connection_config_id:\n        return None\n\n    if server.auth_performer == MCPAuthenticationPerformer.ADMIN:\n        return None  # admin server implies no template\n    return server.admin_connection_config\n\n\ndef delete_connection_config(config_id: int, db_session: Session) -> None:\n    \"\"\"Delete a connection config\"\"\"\n    config = get_connection_config_by_id(config_id, db_session)\n    db_session.delete(config)\n    db_session.flush()  # Don't commit yet, let caller decide when to commit\n\n\ndef delete_user_connection_configs_for_server(\n    server_id: int, user_email: str, db_session: Session\n) -> None:\n    \"\"\"Delete all connection configs for a user on a specific server\"\"\"\n    configs = db_session.scalars(\n        select(MCPConnectionConfig).where(\n            and_(\n                MCPConnectionConfig.mcp_server_id == server_id,\n                MCPConnectionConfig.user_email == user_email,\n            )\n        )\n    ).all()\n\n    for config in configs:\n        db_session.delete(config)\n\n    db_session.commit()\n\n\ndef delete_all_user_connection_configs_for_server_no_commit(\n    server_id: int, db_session: Session\n) -> None:\n    \"\"\"Delete all user connection configs for a specific MCP server\"\"\"\n    db_session.execute(\n        delete(MCPConnectionConfig).where(\n            MCPConnectionConfig.mcp_server_id == server_id\n        )\n    )\n    db_session.flush()  # Don't commit yet, let caller decide when to commit\n"
  },
  {
    "path": "backend/onyx/db/memory.py",
    "content": "from uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import Memory\nfrom onyx.db.models import User\n\nMAX_MEMORIES_PER_USER = 10\n\n\nclass UserInfo(BaseModel):\n    name: str | None = None\n    role: str | None = None\n    email: str | None = None\n\n    def to_dict(self) -> dict:\n        return {\n            \"name\": self.name,\n            \"role\": self.role,\n            \"email\": self.email,\n        }\n\n\nclass UserMemoryContext(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    user_id: UUID | None = None\n    user_info: UserInfo\n    user_preferences: str | None = None\n    memories: tuple[str, ...] = ()\n\n    def without_memories(self) -> \"UserMemoryContext\":\n        \"\"\"Return a copy with memories cleared but user info/preferences intact.\"\"\"\n        return UserMemoryContext(\n            user_id=self.user_id,\n            user_info=self.user_info,\n            user_preferences=self.user_preferences,\n            memories=(),\n        )\n\n    def as_formatted_list(self) -> list[str]:\n        \"\"\"Returns combined list of user info, preferences, and memories.\"\"\"\n        result = []\n        if self.user_info.name:\n            result.append(f\"User's name: {self.user_info.name}\")\n        if self.user_info.role:\n            result.append(f\"User's role: {self.user_info.role}\")\n        if self.user_info.email:\n            result.append(f\"User's email: {self.user_info.email}\")\n        if self.user_preferences:\n            result.append(f\"User preferences: {self.user_preferences}\")\n        result.extend(self.memories)\n        return result\n\n\ndef get_memories(user: User, db_session: Session) -> UserMemoryContext:\n    user_info = UserInfo(\n        name=user.personal_name,\n        role=user.personal_role,\n        email=user.email,\n    )\n\n    user_preferences = None\n    if user.user_preferences:\n        user_preferences = user.user_preferences\n\n    memory_rows = db_session.scalars(\n        select(Memory).where(Memory.user_id == user.id).order_by(Memory.id.asc())\n    ).all()\n    memories = tuple(memory.memory_text for memory in memory_rows if memory.memory_text)\n\n    return UserMemoryContext(\n        user_id=user.id,\n        user_info=user_info,\n        user_preferences=user_preferences,\n        memories=memories,\n    )\n\n\ndef add_memory(\n    user_id: UUID,\n    memory_text: str,\n    db_session: Session,\n) -> Memory:\n    \"\"\"Insert a new Memory row for the given user.\n\n    If the user already has MAX_MEMORIES_PER_USER memories, the oldest\n    one (lowest id) is deleted before inserting the new one.\n    \"\"\"\n    existing = db_session.scalars(\n        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())\n    ).all()\n\n    if len(existing) >= MAX_MEMORIES_PER_USER:\n        db_session.delete(existing[0])\n\n    memory = Memory(\n        user_id=user_id,\n        memory_text=memory_text,\n    )\n    db_session.add(memory)\n    db_session.commit()\n    return memory\n\n\ndef update_memory_at_index(\n    user_id: UUID,\n    index: int,\n    new_text: str,\n    db_session: Session,\n) -> Memory | None:\n    \"\"\"Update the memory at the given 0-based index (ordered by id ASC, matching get_memories()).\n\n    Returns the updated Memory row, or None if the index is out of range.\n    \"\"\"\n    memory_rows = db_session.scalars(\n        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())\n    ).all()\n\n    if index < 0 or index >= len(memory_rows):\n        return None\n\n    memory = memory_rows[index]\n    memory.memory_text = new_text\n    db_session.commit()\n    return memory\n"
  },
  {
    "path": "backend/onyx/db/models.py",
    "content": "import datetime\nimport json\nfrom typing import Any\nfrom typing import Literal\nfrom typing import NotRequired\nfrom uuid import uuid4\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import validates\n\nfrom typing_extensions import TypedDict  # noreorder\nfrom uuid import UUID\nfrom pydantic import ValidationError\n\nfrom sqlalchemy.dialects.postgresql import JSONB as PGJSONB\nfrom sqlalchemy.dialects.postgresql import UUID as PGUUID\n\nfrom fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID\nfrom fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID\nfrom fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID\nfrom fastapi_users_db_sqlalchemy.generics import TIMESTAMPAware\nfrom sqlalchemy import Boolean\nfrom sqlalchemy import DateTime\nfrom sqlalchemy import desc\nfrom sqlalchemy import Enum\nfrom sqlalchemy import Float\nfrom sqlalchemy import ForeignKey\nfrom sqlalchemy import ForeignKeyConstraint\nfrom sqlalchemy import func\nfrom sqlalchemy import Index\nfrom sqlalchemy import Integer\nfrom sqlalchemy import BigInteger\n\nfrom sqlalchemy import Sequence\nfrom sqlalchemy import String\nfrom sqlalchemy import Text\nfrom sqlalchemy import text\nfrom sqlalchemy import UniqueConstraint\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy import event\nfrom sqlalchemy.engine.interfaces import Dialect\nfrom sqlalchemy.orm import DeclarativeBase\nfrom sqlalchemy.orm import Mapped\nfrom sqlalchemy.orm import Mapper\nfrom sqlalchemy.orm import mapped_column\nfrom sqlalchemy.orm import relationship\nfrom sqlalchemy.types import LargeBinary\nfrom sqlalchemy.types import TypeDecorator\nfrom sqlalchemy import PrimaryKeyConstraint\n\nfrom onyx.db.enums import AccountType\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import (\n    ANONYMOUS_USER_UUID,\n    DEFAULT_BOOST,\n    FederatedConnectorSource,\n    MilestoneRecordType,\n)\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.enums import (\n    AccessType,\n    ArtifactType,\n    BuildSessionStatus,\n    EmbeddingPrecision,\n    HierarchyNodeType,\n    HookFailStrategy,\n    HookPoint,\n    IndexingMode,\n    OpenSearchDocumentMigrationStatus,\n    OpenSearchTenantMigrationStatus,\n    ProcessingMode,\n    SandboxStatus,\n    SyncType,\n    SyncStatus,\n    MCPAuthenticationType,\n    UserFileStatus,\n    MCPAuthenticationPerformer,\n    MCPTransport,\n    MCPServerStatus,\n    Permission,\n    GrantSource,\n    LLMModelFlowType,\n    ThemePreference,\n    DefaultAppMode,\n    SwitchoverType,\n    SharingScope,\n)\nfrom onyx.configs.constants import NotificationType\nfrom onyx.configs.constants import SearchFeedbackType\nfrom onyx.configs.constants import TokenRateLimitScope\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import ChatSessionSharedStatus\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.enums import TaskStatus\nfrom onyx.db.pydantic_type import PydanticListType, PydanticType\nfrom onyx.kg.models import KGEntityTypeAttributes\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.special_types import JSON_ro\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.llm.override_models import PromptOverride\nfrom onyx.kg.models import KGStage\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom onyx.utils.encryption import decrypt_bytes_to_string\nfrom onyx.utils.encryption import encrypt_string_to_bytes\nfrom onyx.utils.sensitive import SensitiveValue\nfrom onyx.utils.headers import HeaderItemDict\nfrom shared_configs.enums import EmbeddingProvider\n\n# TODO: After anonymous user migration has been deployed, make user_id columns NOT NULL\n# and update Mapped[User | None] relationships to Mapped[User] where needed.\n\n\nlogger = setup_logger()\n\nPROMPT_LENGTH = 5_000_000\n\n\nclass Base(DeclarativeBase):\n    __abstract__ = True\n\n\nclass _EncryptedBase(TypeDecorator):\n    \"\"\"Base for encrypted column types that wrap values in SensitiveValue.\"\"\"\n\n    impl = LargeBinary\n    cache_ok = True\n    _is_json: bool = False\n\n    def wrap_raw(self, value: Any) -> SensitiveValue:\n        \"\"\"Encrypt a raw value and wrap it in SensitiveValue.\n\n        Called by the attribute set event so the Python-side type is always\n        SensitiveValue, regardless of whether the value was loaded from the DB\n        or assigned in application code.\n        \"\"\"\n        if self._is_json:\n            if not isinstance(value, dict):\n                raise TypeError(\n                    f\"EncryptedJson column expected dict, got {type(value).__name__}\"\n                )\n            raw_str = json.dumps(value)\n        else:\n            if not isinstance(value, str):\n                raise TypeError(\n                    f\"EncryptedString column expected str, got {type(value).__name__}\"\n                )\n            raw_str = value\n        return SensitiveValue(\n            encrypted_bytes=encrypt_string_to_bytes(raw_str),\n            decrypt_fn=decrypt_bytes_to_string,\n            is_json=self._is_json,\n        )\n\n    def compare_values(self, x: Any, y: Any) -> bool:\n        if x is None or y is None:\n            return x == y\n        if isinstance(x, SensitiveValue):\n            x = x.get_value(apply_mask=False)\n        if isinstance(y, SensitiveValue):\n            y = y.get_value(apply_mask=False)\n        return x == y\n\n\nclass EncryptedString(_EncryptedBase):\n    # Must redeclare cache_ok in this child class since we explicitly redeclare _is_json\n    cache_ok = True\n    _is_json: bool = False\n\n    def process_bind_param(\n        self,\n        value: str | SensitiveValue[str] | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> bytes | None:\n        if value is not None:\n            # Handle both raw strings and SensitiveValue wrappers\n            if isinstance(value, SensitiveValue):\n                # Get raw value for storage\n                value = value.get_value(apply_mask=False)\n            return encrypt_string_to_bytes(value)\n        return value\n\n    def process_result_value(\n        self,\n        value: bytes | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> SensitiveValue[str] | None:\n        if value is not None:\n            return SensitiveValue(\n                encrypted_bytes=value,\n                decrypt_fn=decrypt_bytes_to_string,\n                is_json=False,\n            )\n        return None\n\n\nclass EncryptedJson(_EncryptedBase):\n    cache_ok = True\n    _is_json: bool = True\n\n    def process_bind_param(\n        self,\n        value: dict[str, Any] | SensitiveValue[dict[str, Any]] | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> bytes | None:\n        if value is not None:\n            if isinstance(value, SensitiveValue):\n                value = value.get_value(apply_mask=False)\n            json_str = json.dumps(value)\n            return encrypt_string_to_bytes(json_str)\n        return value\n\n    def process_result_value(\n        self,\n        value: bytes | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> SensitiveValue[dict[str, Any]] | None:\n        if value is not None:\n            return SensitiveValue(\n                encrypted_bytes=value,\n                decrypt_fn=decrypt_bytes_to_string,\n                is_json=True,\n            )\n        return None\n\n\n_REGISTERED_ATTRS: set[str] = set()\n\n\n@event.listens_for(Mapper, \"mapper_configured\")\ndef _register_sensitive_value_set_events(\n    mapper: Mapper,\n    class_: type,\n) -> None:\n    \"\"\"Auto-wrap raw values in SensitiveValue when assigned to encrypted columns.\"\"\"\n    for prop in mapper.column_attrs:\n        for col in prop.columns:\n            if isinstance(col.type, _EncryptedBase):\n                col_type = col.type\n                attr = getattr(class_, prop.key)\n\n                # Guard against double-registration (e.g. if mapper is\n                # re-configured in test setups)\n                attr_key = f\"{class_.__qualname__}.{prop.key}\"\n                if attr_key in _REGISTERED_ATTRS:\n                    continue\n                _REGISTERED_ATTRS.add(attr_key)\n\n                @event.listens_for(attr, \"set\", retval=True)\n                def _wrap_value(\n                    target: Any,  # noqa: ARG001\n                    value: Any,\n                    oldvalue: Any,  # noqa: ARG001\n                    initiator: Any,  # noqa: ARG001\n                    _col_type: _EncryptedBase = col_type,\n                ) -> Any:\n                    if value is not None and not isinstance(value, SensitiveValue):\n                        return _col_type.wrap_raw(value)\n                    return value\n\n\nclass NullFilteredString(TypeDecorator):\n    impl = String\n    # This type's behavior is fully deterministic and doesn't depend on any external factors.\n    cache_ok = True\n\n    def process_bind_param(\n        self,\n        value: str | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> str | None:\n        if value is not None and \"\\x00\" in value:\n            logger.warning(f\"NUL characters found in value: {value}\")\n            return value.replace(\"\\x00\", \"\")\n        return value\n\n    def process_result_value(\n        self,\n        value: str | None,\n        dialect: Dialect,  # noqa: ARG002\n    ) -> str | None:\n        return value\n\n\n\"\"\"\nAuth/Authz (users, permissions, access) Tables\n\"\"\"\n\n\nclass OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):\n    # even an almost empty token from keycloak will not fit the default 1024 bytes\n    access_token: Mapped[str] = mapped_column(Text, nullable=False)  # type: ignore\n    refresh_token: Mapped[str] = mapped_column(Text, nullable=False)  # type: ignore\n\n\nclass User(SQLAlchemyBaseUserTableUUID, Base):\n    oauth_accounts: Mapped[list[OAuthAccount]] = relationship(\n        \"OAuthAccount\", lazy=\"joined\", cascade=\"all, delete-orphan\"\n    )\n    role: Mapped[UserRole] = mapped_column(\n        Enum(UserRole, native_enum=False, default=UserRole.BASIC)\n    )\n    account_type: Mapped[AccountType] = mapped_column(\n        Enum(AccountType, native_enum=False),\n        nullable=False,\n        default=AccountType.STANDARD,\n        server_default=\"STANDARD\",\n    )\n\n    \"\"\"\n    Preferences probably should be in a separate table at some point, but for now\n    putting here for simpicity\n    \"\"\"\n\n    temperature_override_enabled: Mapped[bool | None] = mapped_column(\n        Boolean, default=None\n    )\n    auto_scroll: Mapped[bool | None] = mapped_column(Boolean, default=None)\n    shortcut_enabled: Mapped[bool] = mapped_column(Boolean, default=False)\n    theme_preference: Mapped[ThemePreference | None] = mapped_column(\n        Enum(ThemePreference, native_enum=False),\n        nullable=True,\n        default=None,\n    )\n    chat_background: Mapped[str | None] = mapped_column(String, nullable=True)\n    default_app_mode: Mapped[DefaultAppMode] = mapped_column(\n        Enum(DefaultAppMode, native_enum=False),\n        nullable=False,\n        default=DefaultAppMode.CHAT,\n    )\n    # personalization fields are exposed via the chat user settings \"Personalization\" tab\n    personal_name: Mapped[str | None] = mapped_column(String, nullable=True)\n    personal_role: Mapped[str | None] = mapped_column(String, nullable=True)\n    use_memories: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n    enable_memory_tool: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=True\n    )\n    user_preferences: Mapped[str | None] = mapped_column(Text, nullable=True)\n\n    chosen_assistants: Mapped[list[int] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True, default=None\n    )\n    visible_assistants: Mapped[list[int]] = mapped_column(\n        postgresql.JSONB(), nullable=False, default=[]\n    )\n    hidden_assistants: Mapped[list[int]] = mapped_column(\n        postgresql.JSONB(), nullable=False, default=[]\n    )\n\n    pinned_assistants: Mapped[list[int] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True, default=None\n    )\n\n    effective_permissions: Mapped[list[str]] = mapped_column(\n        postgresql.JSONB(),\n        nullable=False,\n        default=list,\n        server_default=text(\"'[]'::jsonb\"),\n    )\n\n    oidc_expiry: Mapped[datetime.datetime] = mapped_column(\n        TIMESTAMPAware(timezone=True), nullable=True\n    )\n\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    default_model: Mapped[str] = mapped_column(Text, nullable=True)\n    # organized in typical structured fashion\n    # formatted as `displayName__provider__modelName`\n\n    # Voice preferences\n    voice_auto_send: Mapped[bool] = mapped_column(Boolean, default=False)\n    voice_auto_playback: Mapped[bool] = mapped_column(Boolean, default=False)\n    voice_playback_speed: Mapped[float] = mapped_column(Float, default=1.0)\n\n    # relationships\n    credentials: Mapped[list[\"Credential\"]] = relationship(\n        \"Credential\", back_populates=\"user\"\n    )\n    chat_sessions: Mapped[list[\"ChatSession\"]] = relationship(\n        \"ChatSession\", back_populates=\"user\"\n    )\n\n    input_prompts: Mapped[list[\"InputPrompt\"]] = relationship(\n        \"InputPrompt\", back_populates=\"user\"\n    )\n    # Personas owned by this user\n    personas: Mapped[list[\"Persona\"]] = relationship(\"Persona\", back_populates=\"user\")\n    # Custom tools created by this user\n    custom_tools: Mapped[list[\"Tool\"]] = relationship(\"Tool\", back_populates=\"user\")\n    # Notifications for the UI\n    notifications: Mapped[list[\"Notification\"]] = relationship(\n        \"Notification\", back_populates=\"user\"\n    )\n    cc_pairs: Mapped[list[\"ConnectorCredentialPair\"]] = relationship(\n        \"ConnectorCredentialPair\",\n        back_populates=\"creator\",\n        primaryjoin=\"User.id == foreign(ConnectorCredentialPair.creator_id)\",\n    )\n    projects: Mapped[list[\"UserProject\"]] = relationship(\n        \"UserProject\", back_populates=\"user\"\n    )\n    files: Mapped[list[\"UserFile\"]] = relationship(\"UserFile\", back_populates=\"user\")\n    # MCP servers accessible to this user\n    accessible_mcp_servers: Mapped[list[\"MCPServer\"]] = relationship(\n        \"MCPServer\", secondary=\"mcp_server__user\", back_populates=\"users\"\n    )\n    memories: Mapped[list[\"Memory\"]] = relationship(\n        \"Memory\",\n        back_populates=\"user\",\n        cascade=\"all, delete-orphan\",\n        order_by=\"desc(Memory.id)\",\n    )\n    oauth_user_tokens: Mapped[list[\"OAuthUserToken\"]] = relationship(\n        \"OAuthUserToken\",\n        back_populates=\"user\",\n        cascade=\"all, delete-orphan\",\n    )\n\n    @validates(\"email\")\n    def validate_email(self, key: str, value: str) -> str:  # noqa: ARG002\n        return value.lower() if value else value\n\n    @property\n    def password_configured(self) -> bool:\n        \"\"\"\n        Returns True if the user has at least one OAuth (or OIDC) account.\n        \"\"\"\n        return not bool(self.oauth_accounts)\n\n    @property\n    def is_anonymous(self) -> bool:\n        \"\"\"Returns True if this is the anonymous user.\"\"\"\n        return str(self.id) == ANONYMOUS_USER_UUID\n\n\nclass AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):\n    pass\n\n\nclass Memory(Base):\n    __tablename__ = \"memory\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)\n    user_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    memory_text: Mapped[str] = mapped_column(Text, nullable=False)\n    conversation_id: Mapped[UUID | None] = mapped_column(\n        PGUUID(as_uuid=True), nullable=True\n    )\n    message_id: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    user: Mapped[\"User\"] = relationship(\"User\", back_populates=\"memories\")\n\n\nclass ApiKey(Base):\n    __tablename__ = \"api_key\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str | None] = mapped_column(String, nullable=True)\n    hashed_api_key: Mapped[str] = mapped_column(String, unique=True)\n    api_key_display: Mapped[str] = mapped_column(String, unique=True)\n    # the ID of the \"user\" who represents the access credentials for the API key\n    user_id: Mapped[UUID] = mapped_column(ForeignKey(\"user.id\"), nullable=False)\n    # the ID of the user who owns the key\n    owner_id: Mapped[UUID | None] = mapped_column(ForeignKey(\"user.id\"), nullable=True)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # Add this relationship to access the User object via user_id\n    user: Mapped[\"User\"] = relationship(\"User\", foreign_keys=[user_id])\n\n\nclass PersonalAccessToken(Base):\n    __tablename__ = \"personal_access_token\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, nullable=False)  # User-provided label\n    hashed_token: Mapped[str] = mapped_column(\n        String(64), unique=True, nullable=False\n    )  # SHA256 = 64 hex chars\n    token_display: Mapped[str] = mapped_column(String, nullable=False)\n\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n\n    expires_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, index=True\n    )  # NULL = no expiration. Revocation sets this to NOW() for immediate expiry.\n\n    # Audit fields\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    last_used_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    is_revoked: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"false\"), nullable=False\n    )  # True if user explicitly revoked (vs naturally expired)\n\n    user: Mapped[\"User\"] = relationship(\"User\", foreign_keys=[user_id])\n\n    # Indexes for performance\n    __table_args__ = (\n        Index(\n            \"ix_pat_user_created\", user_id, created_at.desc()\n        ),  # Fast user token listing\n    )\n\n\nclass Notification(Base):\n    __tablename__ = \"notification\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    notif_type: Mapped[NotificationType] = mapped_column(\n        Enum(NotificationType, native_enum=False)\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    dismissed: Mapped[bool] = mapped_column(Boolean, default=False)\n    last_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))\n    first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))\n    title: Mapped[str] = mapped_column(String)\n    description: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    user: Mapped[User] = relationship(\"User\", back_populates=\"notifications\")\n    additional_data: Mapped[dict | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Unique constraint ix_notification_user_type_data on (user_id, notif_type, additional_data)\n    # ensures notification deduplication for batch inserts. Defined in migration 8405ca81cc83.\n    __table_args__ = (\n        Index(\n            \"ix_notification_user_sort\",\n            \"user_id\",\n            \"dismissed\",\n            desc(\"first_shown\"),\n        ),\n    )\n\n\n\"\"\"\nAssociation Tables\nNOTE: must be at the top since they are referenced by other tables\n\"\"\"\n\n\nclass Persona__DocumentSet(Base):\n    __tablename__ = \"persona__document_set\"\n\n    persona_id: Mapped[int] = mapped_column(ForeignKey(\"persona.id\"), primary_key=True)\n    document_set_id: Mapped[int] = mapped_column(\n        ForeignKey(\"document_set.id\"), primary_key=True\n    )\n\n\nclass Persona__User(Base):\n    __tablename__ = \"persona__user\"\n\n    persona_id: Mapped[int] = mapped_column(ForeignKey(\"persona.id\"), primary_key=True)\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), primary_key=True, nullable=True\n    )\n\n\nclass DocumentSet__User(Base):\n    __tablename__ = \"document_set__user\"\n\n    document_set_id: Mapped[int] = mapped_column(\n        ForeignKey(\"document_set.id\"), primary_key=True\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), primary_key=True, nullable=True\n    )\n\n\nclass DocumentSet__ConnectorCredentialPair(Base):\n    __tablename__ = \"document_set__connector_credential_pair\"\n\n    document_set_id: Mapped[int] = mapped_column(\n        ForeignKey(\"document_set.id\"), primary_key=True\n    )\n    connector_credential_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"), primary_key=True\n    )\n    # if `True`, then is part of the current state of the document set\n    # if `False`, then is a part of the prior state of the document set\n    # rows with `is_current=False` should be deleted when the document\n    # set is updated and should not exist for a given document set if\n    # `DocumentSet.is_up_to_date == True`\n    is_current: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=True,\n        primary_key=True,\n    )\n\n    document_set: Mapped[\"DocumentSet\"] = relationship(\"DocumentSet\")\n\n\nclass ChatMessage__SearchDoc(Base):\n    __tablename__ = \"chat_message__search_doc\"\n\n    chat_message_id: Mapped[int] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    search_doc_id: Mapped[int] = mapped_column(\n        ForeignKey(\"search_doc.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass ToolCall__SearchDoc(Base):\n    __tablename__ = \"tool_call__search_doc\"\n\n    tool_call_id: Mapped[int] = mapped_column(\n        ForeignKey(\"tool_call.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    search_doc_id: Mapped[int] = mapped_column(\n        ForeignKey(\"search_doc.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass Document__Tag(Base):\n    __tablename__ = \"document__tag\"\n\n    document_id: Mapped[str] = mapped_column(\n        ForeignKey(\"document.id\"), primary_key=True\n    )\n    tag_id: Mapped[int] = mapped_column(\n        ForeignKey(\"tag.id\"), primary_key=True, index=True\n    )\n\n\nclass Persona__Tool(Base):\n    \"\"\"An entry in this table represents a tool that is **available** to a persona.\n    It does NOT necessarily mean that the tool is actually usable to the persona.\n\n    For example, a persona may have the image generation tool attached to it, even though\n    the image generation tool is not set up / enabled. In this case, the tool should not\n    show up in the UI for the persona + it should not be usable by the persona in chat.\n    \"\"\"\n\n    __tablename__ = \"persona__tool\"\n\n    persona_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    tool_id: Mapped[int] = mapped_column(\n        ForeignKey(\"tool.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass StandardAnswer__StandardAnswerCategory(Base):\n    __tablename__ = \"standard_answer__standard_answer_category\"\n\n    standard_answer_id: Mapped[int] = mapped_column(\n        ForeignKey(\"standard_answer.id\"), primary_key=True\n    )\n    standard_answer_category_id: Mapped[int] = mapped_column(\n        ForeignKey(\"standard_answer_category.id\"), primary_key=True\n    )\n\n\nclass SlackChannelConfig__StandardAnswerCategory(Base):\n    __tablename__ = \"slack_channel_config__standard_answer_category\"\n\n    slack_channel_config_id: Mapped[int] = mapped_column(\n        ForeignKey(\"slack_channel_config.id\"), primary_key=True\n    )\n    standard_answer_category_id: Mapped[int] = mapped_column(\n        ForeignKey(\"standard_answer_category.id\"), primary_key=True\n    )\n\n\nclass ChatMessage__StandardAnswer(Base):\n    __tablename__ = \"chat_message__standard_answer\"\n\n    chat_message_id: Mapped[int] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    standard_answer_id: Mapped[int] = mapped_column(\n        ForeignKey(\"standard_answer.id\"), primary_key=True\n    )\n\n\n\"\"\"\nDocuments/Indexing Tables\n\"\"\"\n\n\nclass ConnectorCredentialPair(Base):\n    \"\"\"Connectors and Credentials can have a many-to-many relationship\n    I.e. A Confluence Connector may have multiple admin users who can run it with their own credentials\n    I.e. An admin user may use the same credential to index multiple Confluence Spaces\n    \"\"\"\n\n    __tablename__ = \"connector_credential_pair\"\n    # NOTE: this `id` column has to use `Sequence` instead of `autoincrement=True`\n    # due to some SQLAlchemy quirks + this not being a primary key column\n    id: Mapped[int] = mapped_column(\n        Integer,\n        Sequence(\"connector_credential_pair_id_seq\"),\n        unique=True,\n        nullable=False,\n    )\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    status: Mapped[ConnectorCredentialPairStatus] = mapped_column(\n        Enum(ConnectorCredentialPairStatus, native_enum=False), nullable=False\n    )\n    # this is separate from the `status` above, since a connector can be `INITIAL_INDEXING`, `ACTIVE`,\n    # or `PAUSED` and still be in a repeated error state.\n    in_repeated_error_state: Mapped[bool] = mapped_column(Boolean, default=False)\n    connector_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector.id\"), primary_key=True\n    )\n\n    deletion_failure_message: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    credential_id: Mapped[int] = mapped_column(\n        ForeignKey(\"credential.id\"), primary_key=True\n    )\n    # controls whether the documents indexed by this CC pair are visible to all\n    # or if they are only visible to those with that are given explicit access\n    # (e.g. via owning the credential or being a part of a group that is given access)\n    access_type: Mapped[AccessType] = mapped_column(\n        Enum(AccessType, native_enum=False), nullable=False\n    )\n\n    # special info needed for the auto-sync feature. The exact structure depends on the\n\n    # source type (defined in the connector's `source` field)\n    # E.g. for google_drive perm sync:\n    # {\"customer_id\": \"123567\", \"company_domain\": \"@onyx.app\"}\n    auto_sync_options: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    last_time_perm_sync: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    last_time_external_group_sync: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    # Time finished, not used for calculating backend jobs which uses time started (created)\n    last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n\n    # last successful prune\n    last_pruned: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, index=True\n    )\n\n    # last successful hierarchy fetch\n    last_time_hierarchy_fetch: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0)\n\n    indexing_trigger: Mapped[IndexingMode | None] = mapped_column(\n        Enum(IndexingMode, native_enum=False), nullable=True\n    )\n\n    # Determines how documents are processed after fetching:\n    # REGULAR: Full pipeline (chunk → embed → Vespa)\n    # FILE_SYSTEM: Write to file system only (for CLI agent sandbox)\n    processing_mode: Mapped[ProcessingMode] = mapped_column(\n        Enum(ProcessingMode, native_enum=False),\n        nullable=False,\n        default=ProcessingMode.REGULAR,\n        server_default=\"REGULAR\",\n    )\n\n    connector: Mapped[\"Connector\"] = relationship(\n        \"Connector\", back_populates=\"credentials\"\n    )\n    credential: Mapped[\"Credential\"] = relationship(\n        \"Credential\", back_populates=\"connectors\"\n    )\n    document_sets: Mapped[list[\"DocumentSet\"]] = relationship(\n        \"DocumentSet\",\n        secondary=DocumentSet__ConnectorCredentialPair.__table__,\n        primaryjoin=(\n            (DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == id)\n            & (DocumentSet__ConnectorCredentialPair.is_current.is_(True))\n        ),\n        back_populates=\"connector_credential_pairs\",\n        overlaps=\"document_set\",\n    )\n    index_attempts: Mapped[list[\"IndexAttempt\"]] = relationship(\n        \"IndexAttempt\", back_populates=\"connector_credential_pair\"\n    )\n\n    # the user id of the user that created this cc pair\n    creator_id: Mapped[UUID | None] = mapped_column(nullable=True)\n    creator: Mapped[\"User\"] = relationship(\n        \"User\",\n        back_populates=\"cc_pairs\",\n        primaryjoin=\"foreign(ConnectorCredentialPair.creator_id) == remote(User.id)\",\n    )\n\n    background_errors: Mapped[list[\"BackgroundError\"]] = relationship(\n        \"BackgroundError\", back_populates=\"cc_pair\", cascade=\"all, delete-orphan\"\n    )\n\n\nclass HierarchyNode(Base):\n    \"\"\"\n    Represents a structural node in a connected source's hierarchy.\n    Examples: folders, drives, spaces, projects, channels.\n\n    Stores hierarchy structure WITH permission information, using the same\n    permission model as Documents (external_user_emails, external_user_group_ids,\n    is_public). This enables user-scoped hierarchy browsing in the UI.\n\n    Some hierarchy nodes (e.g., Confluence pages) can also be documents.\n    In these cases, `document_id` will be set.\n    \"\"\"\n\n    __tablename__ = \"hierarchy_node\"\n\n    # Primary key - Integer for simplicity\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n\n    # Raw identifier from the source system\n    # e.g., \"1h7uWUR2BYZjtMfEXFt43tauj-Gp36DTPtwnsNuA665I\" for Google Drive\n    # For SOURCE nodes, this is the source name (e.g., \"google_drive\")\n    raw_node_id: Mapped[str] = mapped_column(String, nullable=False)\n\n    # Human-readable name for display\n    # e.g., \"Engineering\", \"Q4 Planning\", \"Google Drive\"\n    display_name: Mapped[str] = mapped_column(String, nullable=False)\n\n    # Link to view this node in the source system\n    link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)\n\n    # Source type (google_drive, confluence, etc.)\n    source: Mapped[DocumentSource] = mapped_column(\n        Enum(DocumentSource, native_enum=False), nullable=False\n    )\n\n    # What kind of structural node this is\n    node_type: Mapped[HierarchyNodeType] = mapped_column(\n        Enum(HierarchyNodeType, native_enum=False), nullable=False\n    )\n\n    # ============= PERMISSION FIELDS (same pattern as Document) =============\n    # Email addresses of external users with access to this node in the source system\n    external_user_emails: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    # External group IDs with access (prefixed by source type)\n    external_user_group_ids: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    # Whether this node is publicly accessible (org-wide or world-public)\n    # SOURCE nodes are always public. Other nodes get this from source permissions.\n    is_public: Mapped[bool] = mapped_column(Boolean, default=False)\n    # ==========================================================================\n\n    # Foreign keys\n    # For hierarchy nodes that are also documents (e.g., Confluence pages)\n    # SET NULL when document is deleted - node can exist without its document\n    document_id: Mapped[str | None] = mapped_column(\n        ForeignKey(\"document.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n\n    # Self-referential FK for tree structure\n    # SET NULL when parent is deleted - orphan children for cleanup via pruning\n    parent_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"hierarchy_node.id\", ondelete=\"SET NULL\"), nullable=True, index=True\n    )\n\n    # Relationships\n    document: Mapped[\"Document | None\"] = relationship(\n        \"Document\", back_populates=\"hierarchy_node\", foreign_keys=[document_id]\n    )\n    parent: Mapped[\"HierarchyNode | None\"] = relationship(\n        \"HierarchyNode\", remote_side=[id], back_populates=\"children\"\n    )\n    children: Mapped[list[\"HierarchyNode\"]] = relationship(\n        \"HierarchyNode\", back_populates=\"parent\", passive_deletes=True\n    )\n    child_documents: Mapped[list[\"Document\"]] = relationship(\n        \"Document\",\n        back_populates=\"parent_hierarchy_node\",\n        foreign_keys=\"Document.parent_hierarchy_node_id\",\n        passive_deletes=True,\n    )\n    # Personas that have this hierarchy node attached for scoped search\n    personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=\"persona__hierarchy_node\",\n        back_populates=\"hierarchy_nodes\",\n        viewonly=True,\n    )\n\n    __table_args__ = (\n        # Unique constraint: same raw_node_id + source should not exist twice\n        UniqueConstraint(\n            \"raw_node_id\", \"source\", name=\"uq_hierarchy_node_raw_id_source\"\n        ),\n        Index(\"ix_hierarchy_node_source_type\", source, node_type),\n    )\n\n\nclass Document(Base):\n    __tablename__ = \"document\"\n    # NOTE: if more sensitive data is added here for display, make sure to add user/group permission\n\n    # this should correspond to the ID of the document\n    # (as is passed around in Onyx)\n    id: Mapped[str] = mapped_column(NullFilteredString, primary_key=True)\n    from_ingestion_api: Mapped[bool] = mapped_column(\n        Boolean, default=False, nullable=True\n    )\n    # 0 for neutral, positive for mostly endorse, negative for mostly reject\n    boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST)\n    hidden: Mapped[bool] = mapped_column(Boolean, default=False)\n    semantic_id: Mapped[str] = mapped_column(NullFilteredString)\n    # First Section's link\n    link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)\n\n    # The updated time is also used as a measure of the last successful state of the doc\n    # pulled from the source (to help skip reindexing already updated docs in case of\n    # connector retries)\n    # TODO: rename this column because it conflates the time of the source doc\n    # with the local last modified time of the doc and any associated metadata\n    # it should just be the server timestamp of the source doc\n    doc_updated_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    # Number of chunks in the document (in Vespa)\n    # Only null for documents indexed prior to this change\n    chunk_count: Mapped[int | None] = mapped_column(Integer, nullable=True)\n\n    # last time any vespa relevant row metadata or the doc changed.\n    # does not include last_synced\n    last_modified: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=False, index=True, default=func.now()\n    )\n\n    # last successful sync to vespa\n    last_synced: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, index=True\n    )\n    # The following are not attached to User because the account/email may not be known\n    # within Onyx\n    # Something like the document creator\n    primary_owners: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    secondary_owners: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    # Permission sync columns\n    # Email addresses are saved at the document level for externally synced permissions\n    # This is becuase the normal flow of assigning permissions is through the cc_pair\n    # doesn't apply here\n    external_user_emails: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    # These group ids have been prefixed by the source type\n    external_user_group_ids: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    is_public: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    # Reference to parent hierarchy node (the folder/space containing this doc)\n    # If None, document's hierarchy position is unknown or connector doesn't support hierarchy\n    # SET NULL when hierarchy node is deleted - document should not be blocked by node deletion\n    parent_hierarchy_node_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"hierarchy_node.id\", ondelete=\"SET NULL\"), nullable=True, index=True\n    )\n\n    # tables for the knowledge graph data\n    kg_stage: Mapped[KGStage] = mapped_column(\n        Enum(KGStage, native_enum=False),\n        comment=\"Status of knowledge graph extraction for this document\",\n        index=True,\n    )\n\n    kg_processing_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    retrieval_feedbacks: Mapped[list[\"DocumentRetrievalFeedback\"]] = relationship(\n        \"DocumentRetrievalFeedback\", back_populates=\"document\"\n    )\n\n    doc_metadata: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True, default=None\n    )\n    tags = relationship(\n        \"Tag\",\n        secondary=Document__Tag.__table__,\n        back_populates=\"documents\",\n    )\n\n    # Relationship to parent hierarchy node (the folder/space containing this doc)\n    parent_hierarchy_node: Mapped[\"HierarchyNode | None\"] = relationship(\n        \"HierarchyNode\",\n        back_populates=\"child_documents\",\n        foreign_keys=[parent_hierarchy_node_id],\n    )\n\n    # For documents that ARE hierarchy nodes (e.g., Confluence pages with children)\n    hierarchy_node: Mapped[\"HierarchyNode | None\"] = relationship(\n        \"HierarchyNode\",\n        back_populates=\"document\",\n        foreign_keys=\"HierarchyNode.document_id\",\n        passive_deletes=True,\n    )\n    # Personas that have this document directly attached for scoped search\n    attached_personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=\"persona__document\",\n        back_populates=\"attached_documents\",\n        viewonly=True,\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_document_sync_status\",\n            last_modified,\n            last_synced,\n        ),\n    )\n\n\nclass OpenSearchDocumentMigrationRecord(Base):\n    \"\"\"Tracks the migration status of documents from Vespa to OpenSearch.\n\n    This table can be dropped when the migration is complete for all Onyx\n    instances.\n    \"\"\"\n\n    __tablename__ = \"opensearch_document_migration_record\"\n\n    document_id: Mapped[str] = mapped_column(\n        String,\n        ForeignKey(\"document.id\", ondelete=\"CASCADE\"),\n        primary_key=True,\n        nullable=False,\n        index=True,\n    )\n    status: Mapped[OpenSearchDocumentMigrationStatus] = mapped_column(\n        Enum(OpenSearchDocumentMigrationStatus, native_enum=False),\n        default=OpenSearchDocumentMigrationStatus.PENDING,\n        nullable=False,\n        index=True,\n    )\n    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)\n    attempts_count: Mapped[int] = mapped_column(\n        Integer, default=0, nullable=False, index=True\n    )\n    last_attempt_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        nullable=False,\n        index=True,\n    )\n\n    document: Mapped[\"Document\"] = relationship(\"Document\")\n\n\nclass OpenSearchTenantMigrationRecord(Base):\n    \"\"\"Tracks the state of the OpenSearch migration for a tenant.\n\n    Should only contain one row.\n\n    This table can be dropped when the migration is complete for all Onyx\n    instances.\n    \"\"\"\n\n    __tablename__ = \"opensearch_tenant_migration_record\"\n    __table_args__ = (\n        # Singleton pattern - unique index on constant ensures only one row.\n        Index(\"idx_opensearch_tenant_migration_singleton\", text(\"(true)\"), unique=True),\n    )\n\n    id: Mapped[int] = mapped_column(primary_key=True, nullable=False)\n    document_migration_record_table_population_status: Mapped[\n        OpenSearchTenantMigrationStatus\n    ] = mapped_column(\n        Enum(OpenSearchTenantMigrationStatus, native_enum=False),\n        default=OpenSearchTenantMigrationStatus.PENDING,\n        nullable=False,\n    )\n    num_times_observed_no_additional_docs_to_populate_migration_table: Mapped[int] = (\n        mapped_column(Integer, default=0, nullable=False)\n    )\n    overall_document_migration_status: Mapped[OpenSearchTenantMigrationStatus] = (\n        mapped_column(\n            Enum(OpenSearchTenantMigrationStatus, native_enum=False),\n            default=OpenSearchTenantMigrationStatus.PENDING,\n            nullable=False,\n        )\n    )\n    num_times_observed_no_additional_docs_to_migrate: Mapped[int] = mapped_column(\n        Integer,\n        default=0,\n        nullable=False,\n    )\n    last_updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n    # Opaque continuation token from Vespa's Visit API.\n    # NULL means \"not started\".\n    # Otherwise contains a serialized mapping between slice ID and continuation\n    # token for that slice.\n    vespa_visit_continuation_token: Mapped[str | None] = mapped_column(\n        Text, nullable=True\n    )\n    total_chunks_migrated: Mapped[int] = mapped_column(\n        Integer, default=0, nullable=False\n    )\n    total_chunks_errored: Mapped[int] = mapped_column(\n        Integer, default=0, nullable=False\n    )\n    total_chunks_in_vespa: Mapped[int] = mapped_column(\n        Integer, default=0, nullable=False\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        nullable=False,\n    )\n    migration_completed_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    enable_opensearch_retrieval: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n    approx_chunk_count_in_vespa: Mapped[int | None] = mapped_column(\n        Integer, nullable=True\n    )\n\n\nclass KGEntityType(Base):\n    __tablename__ = \"kg_entity_type\"\n\n    # Primary identifier\n    id_name: Mapped[str] = mapped_column(\n        String, primary_key=True, nullable=False, index=True\n    )\n\n    description: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)\n\n    grounding: Mapped[str] = mapped_column(\n        NullFilteredString, nullable=False, index=False\n    )\n\n    attributes: Mapped[dict | None] = mapped_column(\n        postgresql.JSONB,\n        nullable=True,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Filtering based on document attribute\",\n    )\n\n    @property\n    def parsed_attributes(self) -> KGEntityTypeAttributes:\n        if self.attributes is None:\n            return KGEntityTypeAttributes()\n\n        try:\n            return KGEntityTypeAttributes(**self.attributes)\n        except ValidationError:\n            return KGEntityTypeAttributes()\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    deep_extraction: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n\n    # Tracking fields\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    grounded_source_name: Mapped[str | None] = mapped_column(\n        NullFilteredString, nullable=True, index=False\n    )\n\n    entity_values: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=True, default=None\n    )\n\n    clustering: Mapped[dict] = mapped_column(\n        postgresql.JSONB,\n        nullable=False,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Clustering information for this entity type\",\n    )\n\n\nclass KGRelationshipType(Base):\n    __tablename__ = \"kg_relationship_type\"\n\n    # Primary identifier\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        primary_key=True,\n        nullable=False,\n        index=True,\n    )\n\n    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    source_entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    target_entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    definition: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=False,\n        comment=\"Whether this relationship type represents a definition\",\n    )\n\n    clustering: Mapped[dict] = mapped_column(\n        postgresql.JSONB,\n        nullable=False,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Clustering information for this relationship type\",\n    )\n\n    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    # Tracking fields\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # Relationships to EntityType\n    source_type: Mapped[\"KGEntityType\"] = relationship(\n        \"KGEntityType\",\n        foreign_keys=[source_entity_type_id_name],\n        backref=\"source_relationship_type\",\n    )\n    target_type: Mapped[\"KGEntityType\"] = relationship(\n        \"KGEntityType\",\n        foreign_keys=[target_entity_type_id_name],\n        backref=\"target_relationship_type\",\n    )\n\n\nclass KGRelationshipTypeExtractionStaging(Base):\n    __tablename__ = \"kg_relationship_type_extraction_staging\"\n\n    # Primary identifier\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        primary_key=True,\n        nullable=False,\n        index=True,\n    )\n\n    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    source_entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    target_entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    definition: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=False,\n        comment=\"Whether this relationship type represents a definition\",\n    )\n\n    clustering: Mapped[dict] = mapped_column(\n        postgresql.JSONB,\n        nullable=False,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Clustering information for this relationship type\",\n    )\n\n    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    transferred: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=False,\n    )\n\n    # Tracking fields\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # Relationships to EntityType\n    source_type: Mapped[\"KGEntityType\"] = relationship(\n        \"KGEntityType\",\n        foreign_keys=[source_entity_type_id_name],\n        backref=\"source_relationship_type_staging\",\n    )\n    target_type: Mapped[\"KGEntityType\"] = relationship(\n        \"KGEntityType\",\n        foreign_keys=[target_entity_type_id_name],\n        backref=\"target_relationship_type_staging\",\n    )\n\n\nclass KGEntity(Base):\n    __tablename__ = \"kg_entity\"\n\n    # Primary identifier\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString, primary_key=True, index=True\n    )\n\n    # Basic entity information\n    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n    entity_key: Mapped[str] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n    parent_key: Mapped[str | None] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n\n    name_trigrams: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String(3)),\n        nullable=True,\n    )\n\n    attributes: Mapped[dict] = mapped_column(\n        postgresql.JSONB,\n        nullable=False,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Attributes for this entity\",\n    )\n\n    document_id: Mapped[str | None] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n\n    alternative_names: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    # Reference to KGEntityType\n    entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Relationship to KGEntityType\n    entity_type: Mapped[\"KGEntityType\"] = relationship(\"KGEntityType\", backref=\"entity\")\n\n    description: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    keywords: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    # Access control\n    acl: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    # Boosts - using JSON for flexibility\n    boosts: Mapped[dict] = mapped_column(postgresql.JSONB, nullable=False, default=dict)\n\n    event_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True),\n        nullable=True,\n        comment=\"Time of the event being processed\",\n    )\n\n    # Tracking fields\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    __table_args__ = (\n        # Fixed column names in indexes\n        Index(\"ix_entity_type_acl\", entity_type_id_name, acl),\n        Index(\"ix_entity_name_search\", name, entity_type_id_name),\n    )\n\n\nclass KGEntityExtractionStaging(Base):\n    __tablename__ = \"kg_entity_extraction_staging\"\n\n    # Primary identifier\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        primary_key=True,\n        nullable=False,\n        index=True,\n    )\n\n    # Basic entity information\n    name: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    attributes: Mapped[dict] = mapped_column(\n        postgresql.JSONB,\n        nullable=False,\n        default=dict,\n        server_default=\"{}\",\n        comment=\"Attributes for this entity\",\n    )\n\n    document_id: Mapped[str | None] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n\n    alternative_names: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    # Reference to KGEntityType\n    entity_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Relationship to KGEntityType\n    entity_type: Mapped[\"KGEntityType\"] = relationship(\n        \"KGEntityType\", backref=\"entity_staging\"\n    )\n\n    description: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    keywords: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    # Access control\n    acl: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    # Boosts - using JSON for flexibility\n    boosts: Mapped[dict] = mapped_column(postgresql.JSONB, nullable=False, default=dict)\n\n    transferred_id_name: Mapped[str | None] = mapped_column(\n        NullFilteredString,\n        nullable=True,\n    )\n\n    # Parent Child Information\n    entity_key: Mapped[str] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n    parent_key: Mapped[str | None] = mapped_column(\n        NullFilteredString, nullable=True, index=True\n    )\n\n    event_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True),\n        nullable=True,\n        comment=\"Time of the event being processed\",\n    )\n\n    # Tracking fields\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    __table_args__ = (\n        # Fixed column names in indexes\n        Index(\"ix_entity_type_acl\", entity_type_id_name, acl),\n        Index(\"ix_entity_name_search\", name, entity_type_id_name),\n    )\n\n\nclass KGRelationship(Base):\n    __tablename__ = \"kg_relationship\"\n\n    # Primary identifier - now part of composite key\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        nullable=False,\n        index=True,\n    )\n\n    source_document: Mapped[str | None] = mapped_column(\n        NullFilteredString, ForeignKey(\"document.id\"), nullable=True, index=True\n    )\n\n    # Source and target nodes (foreign keys to Entity table)\n    source_node: Mapped[str] = mapped_column(\n        NullFilteredString, ForeignKey(\"kg_entity.id_name\"), nullable=False, index=True\n    )\n\n    target_node: Mapped[str] = mapped_column(\n        NullFilteredString, ForeignKey(\"kg_entity.id_name\"), nullable=False, index=True\n    )\n\n    source_node_type: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    target_node_type: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Relationship type\n    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    # Add new relationship type reference\n    relationship_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_relationship_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Add the SQLAlchemy relationship property\n    relationship_type: Mapped[\"KGRelationshipType\"] = relationship(\n        \"KGRelationshipType\", backref=\"relationship\"\n    )\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    # Tracking fields\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # Relationships to Entity table\n    source: Mapped[\"KGEntity\"] = relationship(\"KGEntity\", foreign_keys=[source_node])\n    target: Mapped[\"KGEntity\"] = relationship(\"KGEntity\", foreign_keys=[target_node])\n    document: Mapped[\"Document\"] = relationship(\n        \"Document\", foreign_keys=[source_document]\n    )\n\n    __table_args__ = (\n        # Composite primary key\n        PrimaryKeyConstraint(\"id_name\", \"source_document\"),\n        # Index for querying relationships by type\n        Index(\"ix_kg_relationship_type\", type),\n        # Composite index for source/target queries\n        Index(\"ix_kg_relationship_nodes\", source_node, target_node),\n        # Ensure unique relationships between nodes of a specific type\n        UniqueConstraint(\n            \"source_node\",\n            \"target_node\",\n            \"type\",\n            name=\"uq_kg_relationship_source_target_type\",\n        ),\n    )\n\n\nclass KGRelationshipExtractionStaging(Base):\n    __tablename__ = \"kg_relationship_extraction_staging\"\n\n    # Primary identifier - now part of composite key\n    id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        nullable=False,\n        index=True,\n    )\n\n    source_document: Mapped[str | None] = mapped_column(\n        NullFilteredString, ForeignKey(\"document.id\"), nullable=True, index=True\n    )\n\n    # Source and target nodes (foreign keys to Entity table)\n    source_node: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_extraction_staging.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    target_node: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_extraction_staging.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    source_node_type: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    target_node_type: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_entity_type.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Relationship type\n    type: Mapped[str] = mapped_column(NullFilteredString, nullable=False, index=True)\n\n    # Add new relationship type reference\n    relationship_type_id_name: Mapped[str] = mapped_column(\n        NullFilteredString,\n        ForeignKey(\"kg_relationship_type_extraction_staging.id_name\"),\n        nullable=False,\n        index=True,\n    )\n\n    # Add the SQLAlchemy relationship property\n    relationship_type: Mapped[\"KGRelationshipTypeExtractionStaging\"] = relationship(\n        \"KGRelationshipTypeExtractionStaging\", backref=\"relationship_staging\"\n    )\n\n    occurrences: Mapped[int] = mapped_column(Integer, nullable=False, default=1)\n\n    transferred: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=False,\n    )\n\n    # Tracking fields\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # Relationships to Entity table\n    source: Mapped[\"KGEntityExtractionStaging\"] = relationship(\n        \"KGEntityExtractionStaging\", foreign_keys=[source_node]\n    )\n    target: Mapped[\"KGEntityExtractionStaging\"] = relationship(\n        \"KGEntityExtractionStaging\", foreign_keys=[target_node]\n    )\n    document: Mapped[\"Document\"] = relationship(\n        \"Document\", foreign_keys=[source_document]\n    )\n\n    __table_args__ = (\n        # Composite primary key\n        PrimaryKeyConstraint(\"id_name\", \"source_document\"),\n        # Index for querying relationships by type\n        Index(\"ix_kg_relationship_type\", type),\n        # Composite index for source/target queries\n        Index(\"ix_kg_relationship_nodes\", source_node, target_node),\n        # Ensure unique relationships between nodes of a specific type\n        UniqueConstraint(\n            \"source_node\",\n            \"target_node\",\n            \"type\",\n            name=\"uq_kg_relationship_source_target_type\",\n        ),\n    )\n\n\nclass KGTerm(Base):\n    __tablename__ = \"kg_term\"\n\n    # Make id_term the primary key\n    id_term: Mapped[str] = mapped_column(\n        NullFilteredString, primary_key=True, nullable=False, index=True\n    )\n\n    # List of entity types this term applies to\n    entity_types: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), nullable=False, default=list\n    )\n\n    # Tracking fields\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    __table_args__ = (\n        # Index for searching terms with specific entity types\n        Index(\"ix_search_term_entities\", entity_types),\n        # Index for term lookups\n        Index(\"ix_search_term_term\", id_term),\n    )\n\n\nclass ChunkStats(Base):\n    __tablename__ = \"chunk_stats\"\n    # NOTE: if more sensitive data is added here for display, make sure to add user/group permission\n\n    # this should correspond to the ID of the document\n    # (as is passed around in Onyx)x\n    id: Mapped[str] = mapped_column(\n        NullFilteredString,\n        primary_key=True,\n        default=lambda context: (\n            f\"{context.get_current_parameters()['document_id']}__{context.get_current_parameters()['chunk_in_doc_id']}\"\n        ),\n        index=True,\n    )\n\n    # Reference to parent document\n    document_id: Mapped[str] = mapped_column(\n        NullFilteredString, ForeignKey(\"document.id\"), nullable=False, index=True\n    )\n\n    chunk_in_doc_id: Mapped[int] = mapped_column(\n        Integer,\n        nullable=False,\n    )\n\n    information_content_boost: Mapped[float | None] = mapped_column(\n        Float, nullable=True\n    )\n\n    last_modified: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=False, index=True, default=func.now()\n    )\n    last_synced: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, index=True\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_chunk_sync_status\",\n            last_modified,\n            last_synced,\n        ),\n        UniqueConstraint(\n            \"document_id\", \"chunk_in_doc_id\", name=\"uq_chunk_stats_doc_chunk\"\n        ),\n    )\n\n\nclass Tag(Base):\n    __tablename__ = \"tag\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    tag_key: Mapped[str] = mapped_column(String)\n    tag_value: Mapped[str] = mapped_column(String)\n    source: Mapped[DocumentSource] = mapped_column(\n        Enum(DocumentSource, native_enum=False)\n    )\n    is_list: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    documents = relationship(\n        \"Document\",\n        secondary=Document__Tag.__table__,\n        back_populates=\"tags\",\n    )\n\n    __table_args__ = (\n        UniqueConstraint(\n            \"tag_key\",\n            \"tag_value\",\n            \"source\",\n            \"is_list\",\n            name=\"_tag_key_value_source_list_uc\",\n        ),\n    )\n\n\nclass Connector(Base):\n    __tablename__ = \"connector\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    name: Mapped[str] = mapped_column(String)\n    source: Mapped[DocumentSource] = mapped_column(\n        Enum(DocumentSource, native_enum=False)\n    )\n    input_type = mapped_column(Enum(InputType, native_enum=False))\n    connector_specific_config: Mapped[dict[str, Any]] = mapped_column(\n        postgresql.JSONB()\n    )\n    indexing_start: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime, nullable=True\n    )\n\n    kg_processing_enabled: Mapped[bool] = mapped_column(\n        Boolean,\n        nullable=False,\n        default=False,\n        comment=\"Whether this connector should extract knowledge graph entities\",\n    )\n\n    kg_coverage_days: Mapped[int | None] = mapped_column(Integer, nullable=True)\n\n    refresh_freq: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    prune_freq: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    credentials: Mapped[list[\"ConnectorCredentialPair\"]] = relationship(\n        \"ConnectorCredentialPair\",\n        back_populates=\"connector\",\n        cascade=\"all, delete-orphan\",\n    )\n    documents_by_connector: Mapped[list[\"DocumentByConnectorCredentialPair\"]] = (\n        relationship(\n            \"DocumentByConnectorCredentialPair\",\n            back_populates=\"connector\",\n            passive_deletes=True,\n        )\n    )\n\n    # synchronize this validation logic with RefreshFrequencySchema etc on front end\n    # until we have a centralized validation schema\n\n    # TODO(rkuo): experiment with SQLAlchemy validators rather than manual checks\n    # https://docs.sqlalchemy.org/en/20/orm/mapped_attributes.html\n    def validate_refresh_freq(self) -> None:\n        if self.refresh_freq is not None:\n            if self.refresh_freq < 60:\n                raise ValueError(\n                    \"refresh_freq must be greater than or equal to 1 minute.\"\n                )\n\n    def validate_prune_freq(self) -> None:\n        if self.prune_freq is not None:\n            if self.prune_freq < 300:\n                raise ValueError(\n                    \"prune_freq must be greater than or equal to 5 minutes.\"\n                )\n\n\nclass Credential(Base):\n    __tablename__ = \"credential\"\n\n    name: Mapped[str] = mapped_column(String, nullable=True)\n\n    source: Mapped[DocumentSource] = mapped_column(\n        Enum(DocumentSource, native_enum=False)\n    )\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    credential_json: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(\n        EncryptedJson()\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # if `true`, then all Admins will have access to the credential\n    admin_public: Mapped[bool] = mapped_column(Boolean, default=True)\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    curator_public: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    connectors: Mapped[list[\"ConnectorCredentialPair\"]] = relationship(\n        \"ConnectorCredentialPair\",\n        back_populates=\"credential\",\n        cascade=\"all, delete-orphan\",\n    )\n    documents_by_credential: Mapped[list[\"DocumentByConnectorCredentialPair\"]] = (\n        relationship(\n            \"DocumentByConnectorCredentialPair\",\n            back_populates=\"credential\",\n            passive_deletes=True,\n        )\n    )\n\n    user: Mapped[User | None] = relationship(\"User\", back_populates=\"credentials\")\n\n\nclass FederatedConnector(Base):\n    __tablename__ = \"federated_connector\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    source: Mapped[FederatedConnectorSource] = mapped_column(\n        Enum(FederatedConnectorSource, native_enum=False)\n    )\n    credentials: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(\n        EncryptedJson(), nullable=False\n    )\n    config: Mapped[dict[str, Any]] = mapped_column(\n        postgresql.JSONB(), default=dict, nullable=False, server_default=\"{}\"\n    )\n\n    oauth_tokens: Mapped[list[\"FederatedConnectorOAuthToken\"]] = relationship(\n        \"FederatedConnectorOAuthToken\",\n        back_populates=\"federated_connector\",\n        cascade=\"all, delete-orphan\",\n    )\n    document_sets: Mapped[list[\"FederatedConnector__DocumentSet\"]] = relationship(\n        \"FederatedConnector__DocumentSet\",\n        back_populates=\"federated_connector\",\n        cascade=\"all, delete-orphan\",\n    )\n\n\nclass FederatedConnectorOAuthToken(Base):\n    __tablename__ = \"federated_connector_oauth_token\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    federated_connector_id: Mapped[int] = mapped_column(\n        ForeignKey(\"federated_connector.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    token: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=False\n    )\n    expires_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime, nullable=True\n    )\n\n    federated_connector: Mapped[\"FederatedConnector\"] = relationship(\n        \"FederatedConnector\", back_populates=\"oauth_tokens\"\n    )\n    user: Mapped[\"User\"] = relationship(\"User\")\n\n\nclass FederatedConnector__DocumentSet(Base):\n    __tablename__ = \"federated_connector__document_set\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    federated_connector_id: Mapped[int] = mapped_column(\n        ForeignKey(\"federated_connector.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    document_set_id: Mapped[int] = mapped_column(\n        ForeignKey(\"document_set.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    # unique per source type. Validated before insertion.\n    entities: Mapped[dict[str, Any]] = mapped_column(postgresql.JSONB(), nullable=False)\n\n    federated_connector: Mapped[\"FederatedConnector\"] = relationship(\n        \"FederatedConnector\", back_populates=\"document_sets\"\n    )\n    document_set: Mapped[\"DocumentSet\"] = relationship(\n        \"DocumentSet\", back_populates=\"federated_connectors\"\n    )\n\n    __table_args__ = (\n        UniqueConstraint(\n            \"federated_connector_id\",\n            \"document_set_id\",\n            name=\"uq_federated_connector_document_set\",\n        ),\n    )\n\n\nclass SearchSettings(Base):\n    __tablename__ = \"search_settings\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    model_name: Mapped[str] = mapped_column(String)\n    model_dim: Mapped[int] = mapped_column(Integer)\n    normalize: Mapped[bool] = mapped_column(Boolean)\n    query_prefix: Mapped[str | None] = mapped_column(String, nullable=True)\n    passage_prefix: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    status: Mapped[IndexModelStatus] = mapped_column(\n        Enum(IndexModelStatus, native_enum=False)\n    )\n    index_name: Mapped[str] = mapped_column(String)\n    provider_type: Mapped[EmbeddingProvider | None] = mapped_column(\n        ForeignKey(\"embedding_provider.provider_type\"), nullable=True\n    )\n\n    # Type of switchover to perform when switching embedding models\n    # REINDEX: waits for all connectors to complete\n    # ACTIVE_ONLY: waits for only non-paused connectors to complete\n    # INSTANT: swaps immediately without waiting\n    switchover_type: Mapped[SwitchoverType] = mapped_column(\n        Enum(SwitchoverType, native_enum=False), default=SwitchoverType.REINDEX\n    )\n\n    # allows for quantization -> less memory usage for a small performance hit\n    embedding_precision: Mapped[EmbeddingPrecision] = mapped_column(\n        Enum(EmbeddingPrecision, native_enum=False)\n    )\n\n    # can be used to reduce dimensionality of vectors and save memory with\n    # a small performance hit. More details in the `Reducing embedding dimensions`\n    # section here:\n    # https://platform.openai.com/docs/guides/embeddings#embedding-models\n    # If not specified, will just use the model_dim without any reduction.\n    # NOTE: this is only currently available for OpenAI models\n    reduced_dimension: Mapped[int | None] = mapped_column(Integer, nullable=True)\n\n    # Mini and Large Chunks (large chunk also checks for model max context)\n    multipass_indexing: Mapped[bool] = mapped_column(Boolean, default=True)\n\n    # Contextual RAG\n    enable_contextual_rag: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    # Contextual RAG LLM\n    contextual_rag_llm_name: Mapped[str | None] = mapped_column(String, nullable=True)\n    contextual_rag_llm_provider: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )\n\n    multilingual_expansion: Mapped[list[str]] = mapped_column(\n        postgresql.ARRAY(String), default=[]\n    )\n\n    cloud_provider: Mapped[\"CloudEmbeddingProvider\"] = relationship(\n        \"CloudEmbeddingProvider\",\n        back_populates=\"search_settings\",\n        foreign_keys=[provider_type],\n    )\n\n    index_attempts: Mapped[list[\"IndexAttempt\"]] = relationship(\n        \"IndexAttempt\", back_populates=\"search_settings\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_embedding_model_present_unique\",\n            \"status\",\n            unique=True,\n            postgresql_where=(status == IndexModelStatus.PRESENT),\n        ),\n        Index(\n            \"ix_embedding_model_future_unique\",\n            \"status\",\n            unique=True,\n            postgresql_where=(status == IndexModelStatus.FUTURE),\n        ),\n    )\n\n    def __repr__(self) -> str:\n        return f\"<EmbeddingModel(model_name='{self.model_name}', status='{self.status}',\\\n          cloud_provider='{self.cloud_provider.provider_type if self.cloud_provider else 'None'}')>\"\n\n    @property\n    def api_version(self) -> str | None:\n        return (\n            self.cloud_provider.api_version if self.cloud_provider is not None else None\n        )\n\n    @property\n    def deployment_name(self) -> str | None:\n        return (\n            self.cloud_provider.deployment_name\n            if self.cloud_provider is not None\n            else None\n        )\n\n    @property\n    def api_url(self) -> str | None:\n        return self.cloud_provider.api_url if self.cloud_provider is not None else None\n\n    @property\n    def api_key(self) -> str | None:\n        if self.cloud_provider is None or self.cloud_provider.api_key is None:\n            return None\n        return self.cloud_provider.api_key.get_value(apply_mask=False)\n\n    @property\n    def large_chunks_enabled(self) -> bool:\n        \"\"\"\n        Given multipass usage and an embedder, decides whether large chunks are allowed\n        based on model/provider constraints.\n        \"\"\"\n        # Only local models that support a larger context are from Nomic\n        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)\n        return SearchSettings.can_use_large_chunks(\n            self.multipass_indexing, self.model_name, self.provider_type\n        )\n\n    @property\n    def final_embedding_dim(self) -> int:\n        return self.reduced_dimension or self.model_dim\n\n    @staticmethod\n    def can_use_large_chunks(\n        multipass: bool, model_name: str, provider_type: EmbeddingProvider | None\n    ) -> bool:\n        \"\"\"\n        Given multipass usage and an embedder, decides whether large chunks are allowed\n        based on model/provider constraints.\n        \"\"\"\n        # Only local models that support a larger context are from Nomic\n        # Cohere does not support larger contexts (they recommend not going above ~512 tokens)\n        return (\n            multipass\n            and model_name.startswith(\"nomic-ai\")\n            and provider_type != EmbeddingProvider.COHERE\n        )\n\n\nclass IndexAttempt(Base):\n    \"\"\"\n    Represents an attempt to index a group of 0 or more documents from a\n    source. For example, a single pull from Google Drive, a single event from\n    slack event API, or a single website crawl.\n    \"\"\"\n\n    __tablename__ = \"index_attempt\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    connector_credential_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"),\n        nullable=False,\n    )\n\n    # Some index attempts that run from beginning will still have this as False\n    # This is only for attempts that are explicitly marked as from the start via\n    # the run once API\n    from_beginning: Mapped[bool] = mapped_column(Boolean)\n    status: Mapped[IndexingStatus] = mapped_column(\n        Enum(IndexingStatus, native_enum=False, index=True)\n    )\n    # The two below may be slightly out of sync if user switches Embedding Model\n    new_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0)\n    total_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0)\n    docs_removed_from_index: Mapped[int | None] = mapped_column(Integer, default=0)\n    # only filled if status = \"failed\"\n    error_msg: Mapped[str | None] = mapped_column(Text, default=None)\n    # only filled if status = \"failed\" AND an unhandled exception caused the failure\n    full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None)\n    # Nullable because in the past, we didn't allow swapping out embedding models live\n    search_settings_id: Mapped[int] = mapped_column(\n        ForeignKey(\"search_settings.id\", ondelete=\"SET NULL\"),\n        nullable=True,\n    )\n\n    # for polling connectors, the start and end time of the poll window\n    # will be set when the index attempt starts\n    poll_range_start: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, default=None\n    )\n    poll_range_end: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True, default=None\n    )\n\n    # Points to the last checkpoint that was saved for this run. The pointer here\n    # can be taken to the FileStore to grab the actual checkpoint value\n    checkpoint_pointer: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    # Database-based coordination fields (replacing Redis fencing)\n    celery_task_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    cancellation_requested: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    # Batch coordination fields\n    # Once this is set, docfetching has completed\n    total_batches: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    # batches that are fully indexed (i.e. have completed docfetching and docprocessing)\n    completed_batches: Mapped[int] = mapped_column(Integer, default=0)\n    # TODO: unused, remove this column\n    total_failures_batch_level: Mapped[int] = mapped_column(Integer, default=0)\n    total_chunks: Mapped[int] = mapped_column(Integer, default=0)\n\n    # Progress tracking for stall detection\n    last_progress_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    last_batches_completed_count: Mapped[int] = mapped_column(Integer, default=0)\n\n    # Heartbeat tracking for worker liveness detection\n    heartbeat_counter: Mapped[int] = mapped_column(Integer, default=0)\n    last_heartbeat_value: Mapped[int] = mapped_column(Integer, default=0)\n    last_heartbeat_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        index=True,\n    )\n    # when the actual indexing run began\n    # NOTE: will use the api_server clock rather than DB server clock\n    time_started: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n\n    connector_credential_pair: Mapped[ConnectorCredentialPair] = relationship(\n        \"ConnectorCredentialPair\", back_populates=\"index_attempts\"\n    )\n\n    search_settings: Mapped[SearchSettings | None] = relationship(\n        \"SearchSettings\", back_populates=\"index_attempts\"\n    )\n\n    error_rows = relationship(\n        \"IndexAttemptError\",\n        back_populates=\"index_attempt\",\n        cascade=\"all, delete-orphan\",\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_index_attempt_latest_for_connector_credential_pair\",\n            \"connector_credential_pair_id\",\n            \"time_created\",\n        ),\n        Index(\n            \"ix_index_attempt_ccpair_search_settings_time_updated\",\n            \"connector_credential_pair_id\",\n            \"search_settings_id\",\n            desc(\"time_updated\"),\n            unique=False,\n        ),\n        Index(\n            \"ix_index_attempt_cc_pair_settings_poll\",\n            \"connector_credential_pair_id\",\n            \"search_settings_id\",\n            \"status\",\n            desc(\"time_updated\"),\n        ),\n        # NEW: Index for coordination queries\n        Index(\n            \"ix_index_attempt_active_coordination\",\n            \"connector_credential_pair_id\",\n            \"search_settings_id\",\n            \"status\",\n        ),\n    )\n\n    def __repr__(self) -> str:\n        return (\n            f\"<IndexAttempt(id={self.id!r}, \"\n            f\"status={self.status!r}, \"\n            f\"error_msg={self.error_msg!r})>\"\n            f\"time_created={self.time_created!r}, \"\n            f\"time_updated={self.time_updated!r}, \"\n        )\n\n    def is_finished(self) -> bool:\n        return self.status.is_terminal()\n\n    def is_coordination_complete(self) -> bool:\n        \"\"\"Check if all batches have been processed\"\"\"\n        return (\n            self.total_batches is not None\n            and self.completed_batches >= self.total_batches\n        )\n\n\nclass HierarchyFetchAttempt(Base):\n    \"\"\"Tracks attempts to fetch hierarchy nodes from a source\"\"\"\n\n    __tablename__ = \"hierarchy_fetch_attempt\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n\n    connector_credential_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n\n    status: Mapped[IndexingStatus] = mapped_column(\n        Enum(IndexingStatus, native_enum=False), nullable=False, index=True\n    )\n\n    # Statistics\n    nodes_fetched: Mapped[int | None] = mapped_column(Integer, default=0)\n    nodes_updated: Mapped[int | None] = mapped_column(Integer, default=0)\n\n    # Error information (only filled if status = \"failed\")\n    error_msg: Mapped[str | None] = mapped_column(Text, default=None)\n    full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None)\n\n    # Timestamps\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        index=True,\n    )\n    time_started: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n\n    # Relationships\n    connector_credential_pair: Mapped[\"ConnectorCredentialPair\"] = relationship(\n        \"ConnectorCredentialPair\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_hierarchy_fetch_attempt_cc_pair\",\n            connector_credential_pair_id,\n        ),\n    )\n\n\nclass IndexAttemptError(Base):\n    __tablename__ = \"index_attempt_errors\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    index_attempt_id: Mapped[int] = mapped_column(\n        ForeignKey(\"index_attempt.id\"),\n        nullable=False,\n    )\n    connector_credential_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"),\n        nullable=False,\n    )\n\n    document_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    document_link: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    entity_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    failed_time_range_start: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    failed_time_range_end: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    failure_message: Mapped[str] = mapped_column(Text)\n    is_resolved: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n    )\n\n    # This is the reverse side of the relationship\n    index_attempt = relationship(\"IndexAttempt\", back_populates=\"error_rows\")\n\n\nclass SyncRecord(Base):\n    \"\"\"\n    Represents the status of a \"sync\" operation (e.g. document set, user group, deletion).\n\n    A \"sync\" operation is an operation which needs to update a set of documents within\n    Vespa, usually to match the state of Postgres.\n    \"\"\"\n\n    __tablename__ = \"sync_record\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    # document set id, user group id, or deletion id\n    entity_id: Mapped[int] = mapped_column(Integer)\n\n    sync_type: Mapped[SyncType] = mapped_column(Enum(SyncType, native_enum=False))\n    sync_status: Mapped[SyncStatus] = mapped_column(Enum(SyncStatus, native_enum=False))\n\n    num_docs_synced: Mapped[int] = mapped_column(Integer, default=0)\n\n    sync_start_time: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))\n    sync_end_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_sync_record_entity_id_sync_type_sync_start_time\",\n            \"entity_id\",\n            \"sync_type\",\n            \"sync_start_time\",\n        ),\n        Index(\n            \"ix_sync_record_entity_id_sync_type_sync_status\",\n            \"entity_id\",\n            \"sync_type\",\n            \"sync_status\",\n        ),\n    )\n\n\nclass HierarchyNodeByConnectorCredentialPair(Base):\n    \"\"\"Tracks which cc_pairs reference each hierarchy node.\n\n    During pruning, stale entries are removed for the current cc_pair.\n    Hierarchy nodes with zero remaining entries are then deleted.\n    \"\"\"\n\n    __tablename__ = \"hierarchy_node_by_connector_credential_pair\"\n\n    hierarchy_node_id: Mapped[int] = mapped_column(\n        ForeignKey(\"hierarchy_node.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    connector_id: Mapped[int] = mapped_column(primary_key=True)\n    credential_id: Mapped[int] = mapped_column(primary_key=True)\n\n    __table_args__ = (\n        ForeignKeyConstraint(\n            [\"connector_id\", \"credential_id\"],\n            [\n                \"connector_credential_pair.connector_id\",\n                \"connector_credential_pair.credential_id\",\n            ],\n            ondelete=\"CASCADE\",\n        ),\n        Index(\n            \"ix_hierarchy_node_cc_pair_connector_credential\",\n            \"connector_id\",\n            \"credential_id\",\n        ),\n    )\n\n\nclass DocumentByConnectorCredentialPair(Base):\n    \"\"\"Represents an indexing of a document by a specific connector / credential pair\"\"\"\n\n    __tablename__ = \"document_by_connector_credential_pair\"\n\n    id: Mapped[str] = mapped_column(ForeignKey(\"document.id\"), primary_key=True)\n    # TODO: transition this to use the ConnectorCredentialPair id directly\n    connector_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    credential_id: Mapped[int] = mapped_column(\n        ForeignKey(\"credential.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n    # used to better keep track of document counts at a connector level\n    # e.g. if a document is added as part of permission syncing, it should\n    # not be counted as part of the connector's document count until\n    # the actual indexing is complete\n    has_been_indexed: Mapped[bool] = mapped_column(Boolean)\n\n    connector: Mapped[Connector] = relationship(\n        \"Connector\", back_populates=\"documents_by_connector\", passive_deletes=True\n    )\n    credential: Mapped[Credential] = relationship(\n        \"Credential\", back_populates=\"documents_by_credential\", passive_deletes=True\n    )\n\n    __table_args__ = (\n        Index(\n            \"idx_document_cc_pair_connector_credential\",\n            \"connector_id\",\n            \"credential_id\",\n            unique=False,\n        ),\n        # Index to optimize get_document_counts_for_cc_pairs query pattern\n        Index(\n            \"idx_document_cc_pair_counts\",\n            \"connector_id\",\n            \"credential_id\",\n            \"has_been_indexed\",\n            unique=False,\n        ),\n    )\n\n\n\"\"\"\nMessages Tables\n\"\"\"\n\n\nclass ChatSession(Base):\n    __tablename__ = \"chat_session\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    persona_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"persona.id\"), nullable=True\n    )\n    description: Mapped[str | None] = mapped_column(Text, nullable=True)\n    # This chat created by OnyxBot\n    onyxbot_flow: Mapped[bool] = mapped_column(Boolean, default=False)\n    # Only ever set to True if system is set to not hard-delete chats\n    deleted: Mapped[bool] = mapped_column(Boolean, default=False)\n    # controls whether or not this conversation is viewable by others\n    shared_status: Mapped[ChatSessionSharedStatus] = mapped_column(\n        Enum(ChatSessionSharedStatus, native_enum=False),\n        default=ChatSessionSharedStatus.PRIVATE,\n    )\n\n    current_alternate_model: Mapped[str | None] = mapped_column(String, default=None)\n\n    slack_thread_id: Mapped[str | None] = mapped_column(\n        String, nullable=True, default=None\n    )\n\n    project_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"user_project.id\"), nullable=True\n    )\n\n    project: Mapped[\"UserProject\"] = relationship(\n        \"UserProject\", back_populates=\"chat_sessions\", foreign_keys=[project_id]\n    )\n\n    # the latest \"overrides\" specified by the user. These take precedence over\n    # the attached persona. However, overrides specified directly in the\n    # `send-message` call will take precedence over these.\n    # NOTE: currently only used by the chat seeding flow, will be used in the\n    # future once we allow users to override default values via the Chat UI\n    # itself\n    llm_override: Mapped[LLMOverride | None] = mapped_column(\n        PydanticType(LLMOverride), nullable=True\n    )\n\n    # The latest temperature override specified by the user\n    temperature_override: Mapped[float | None] = mapped_column(Float, nullable=True)\n\n    prompt_override: Mapped[PromptOverride | None] = mapped_column(\n        PydanticType(PromptOverride), nullable=True\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    user: Mapped[User] = relationship(\"User\", back_populates=\"chat_sessions\")\n    messages: Mapped[list[\"ChatMessage\"]] = relationship(\n        \"ChatMessage\",\n        back_populates=\"chat_session\",\n        cascade=\"all, delete-orphan\",\n        foreign_keys=\"ChatMessage.chat_session_id\",\n    )\n    persona: Mapped[\"Persona\"] = relationship(\"Persona\")\n\n\nclass ChatMessage(Base):\n    \"\"\"Note, the first message in a chain has no contents, it's a workaround to allow edits\n    on the first message of a session, an empty root node basically\n\n    Since every user message is followed by a LLM response, chat messages generally come in pairs.\n    Keeping them as separate messages however for future Agentification extensions\n    Fields will be largely duplicated in the pair.\n    \"\"\"\n\n    __tablename__ = \"chat_message\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    # Where is this message located\n    chat_session_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), ForeignKey(\"chat_session.id\")\n    )\n\n    # Parent message pointer for the tree structure, nullable because the first message is\n    # an empty root node to allow edits on the first message of a session.\n    parent_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\"), nullable=True\n    )\n    # This only maps to the latest because only that message chain is needed.\n    # It can be updated as needed to trace other branches.\n    latest_child_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\"), nullable=True\n    )\n\n    # Only set on summary messages - the ID of the last message included in this summary\n    # Used for chat history compression\n    last_summarized_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"),\n        nullable=True,\n    )\n\n    # For multi-model turns: the user message points to which assistant response\n    # was selected as the preferred one to continue the conversation with.\n    preferred_response_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n\n    # The display name of the model that generated this assistant message\n    model_display_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    # What does this message contain\n    reasoning_tokens: Mapped[str | None] = mapped_column(Text, nullable=True)\n    message: Mapped[str] = mapped_column(Text)\n    token_count: Mapped[int] = mapped_column(Integer)\n    message_type: Mapped[MessageType] = mapped_column(\n        Enum(MessageType, native_enum=False)\n    )\n    # Files attached to the message, when parsed into history, it becomes a separate message\n    files: Mapped[list[FileDescriptor] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Maps the citation numbers to a SearchDoc id\n    citations: Mapped[dict[int, int] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Metadata\n    error: Mapped[str | None] = mapped_column(Text, nullable=True)\n    time_sent: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    # True if this assistant message is a clarification question (deep research flow)\n    is_clarification: Mapped[bool] = mapped_column(Boolean, default=False)\n    # Duration in seconds for processing this message (assistant messages only)\n    processing_duration_seconds: Mapped[float | None] = mapped_column(\n        Float, nullable=True\n    )\n\n    # Relationships\n    chat_session: Mapped[ChatSession] = relationship(\n        \"ChatSession\",\n        back_populates=\"messages\",\n        foreign_keys=[chat_session_id],\n    )\n\n    chat_message_feedbacks: Mapped[list[\"ChatMessageFeedback\"]] = relationship(\n        \"ChatMessageFeedback\",\n        back_populates=\"chat_message\",\n    )\n\n    document_feedbacks: Mapped[list[\"DocumentRetrievalFeedback\"]] = relationship(\n        \"DocumentRetrievalFeedback\",\n        back_populates=\"chat_message\",\n    )\n\n    # Even though search docs come from tool calls, the answer has a final set of saved search docs that we will show\n    search_docs: Mapped[list[\"SearchDoc\"]] = relationship(\n        \"SearchDoc\",\n        secondary=ChatMessage__SearchDoc.__table__,\n        back_populates=\"chat_messages\",\n        cascade=\"all, delete-orphan\",\n        single_parent=True,\n    )\n\n    parent_message: Mapped[\"ChatMessage | None\"] = relationship(\n        \"ChatMessage\",\n        foreign_keys=[parent_message_id],\n        remote_side=\"ChatMessage.id\",\n    )\n\n    latest_child_message: Mapped[\"ChatMessage | None\"] = relationship(\n        \"ChatMessage\",\n        foreign_keys=[latest_child_message_id],\n        remote_side=\"ChatMessage.id\",\n    )\n\n    preferred_response: Mapped[\"ChatMessage | None\"] = relationship(\n        \"ChatMessage\",\n        foreign_keys=[preferred_response_id],\n        remote_side=\"ChatMessage.id\",\n    )\n\n    # Chat messages only need to know their immediate tool call children\n    # If there are nested tool calls, they are stored in the tool_call_children relationship.\n    tool_calls: Mapped[list[\"ToolCall\"] | None] = relationship(\n        \"ToolCall\",\n        back_populates=\"chat_message\",\n    )\n\n    standard_answers: Mapped[list[\"StandardAnswer\"]] = relationship(\n        \"StandardAnswer\",\n        secondary=ChatMessage__StandardAnswer.__table__,\n        back_populates=\"chat_messages\",\n    )\n\n\nclass ToolCall(Base):\n    \"\"\"Represents a Tool Call and Tool Response\"\"\"\n\n    __tablename__ = \"tool_call\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    chat_session_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), ForeignKey(\"chat_session.id\", ondelete=\"CASCADE\")\n    )\n\n    # If this is not None, it's a top level tool call from the user message\n    # If this is None, it's a lower level call from another tool/agent\n    parent_chat_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # If this is not None, this tool call is a child of another tool call\n    parent_tool_call_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"tool_call.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # The tools with the same turn number (and parent) were called in parallel\n    # Ones with different turn numbers (and same parent) were called sequentially\n    turn_number: Mapped[int] = mapped_column(Integer)\n    # Index order of tool calls from the LLM for parallel tool calls\n    tab_index: Mapped[int] = mapped_column(Integer, default=0)\n\n    # Not a FK because we want to be able to delete the tool without deleting\n    # this entry\n    tool_id: Mapped[int] = mapped_column(Integer())\n    # This is needed because LLMs expect the tool call and the response to have matching IDs\n    # This is better than just regenerating one randomly\n    tool_call_id: Mapped[str] = mapped_column(String())\n    # Preceeding reasoning tokens for this tool call, not included in the history\n    reasoning_tokens: Mapped[str | None] = mapped_column(Text, nullable=True)\n    # For \"Agents\" like the Research Agent for Deep Research -\n    # the argument and final report are stored as the argument and response.\n    tool_call_arguments: Mapped[dict[str, JSON_ro]] = mapped_column(postgresql.JSONB())\n    tool_call_response: Mapped[str] = mapped_column(Text)\n    # This just counts the number of tokens in the arg because it's all that's kept for the history\n    # Only the top level tools (the ones with a parent_chat_message_id) have token counts that are counted\n    # towards the session total.\n    tool_call_tokens: Mapped[int] = mapped_column(Integer())\n    # For image generation tool - stores GeneratedImage objects for replay\n    generated_images: Mapped[list[dict] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Relationships\n    chat_session: Mapped[ChatSession] = relationship(\"ChatSession\")\n\n    chat_message: Mapped[\"ChatMessage | None\"] = relationship(\n        \"ChatMessage\",\n        foreign_keys=[parent_chat_message_id],\n        back_populates=\"tool_calls\",\n    )\n    parent_tool_call: Mapped[\"ToolCall | None\"] = relationship(\n        \"ToolCall\",\n        foreign_keys=[parent_tool_call_id],\n        remote_side=\"ToolCall.id\",\n    )\n    tool_call_children: Mapped[list[\"ToolCall\"]] = relationship(\n        \"ToolCall\",\n        foreign_keys=[parent_tool_call_id],\n        back_populates=\"parent_tool_call\",\n    )\n    # Other tools may need to save other things, might need to figure out a more generic way to store\n    # rich tool returns\n    search_docs: Mapped[list[\"SearchDoc\"]] = relationship(\n        \"SearchDoc\",\n        secondary=ToolCall__SearchDoc.__table__,\n        back_populates=\"tool_calls\",\n        cascade=\"all, delete-orphan\",\n        single_parent=True,\n    )\n\n\nclass SearchDoc(Base):\n    \"\"\"Different from Document table. This one stores the state of a document from a retrieval.\n    This allows chat sessions to be replayed with the searched docs\n\n    Notably, this does not include the contents of the Document/Chunk, during inference if a stored\n    SearchDoc is selected, an inference must be remade to retrieve the contents\n    \"\"\"\n\n    __tablename__ = \"search_doc\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    document_id: Mapped[str] = mapped_column(String)\n    chunk_ind: Mapped[int] = mapped_column(Integer)\n    semantic_id: Mapped[str] = mapped_column(String)\n    link: Mapped[str | None] = mapped_column(String, nullable=True)\n    blurb: Mapped[str] = mapped_column(String)\n    boost: Mapped[int] = mapped_column(Integer)\n    source_type: Mapped[DocumentSource] = mapped_column(\n        Enum(DocumentSource, native_enum=False)\n    )\n    hidden: Mapped[bool] = mapped_column(Boolean)\n    doc_metadata: Mapped[dict[str, str | list[str]]] = mapped_column(postgresql.JSONB())\n    score: Mapped[float] = mapped_column(Float)\n    match_highlights: Mapped[list[str]] = mapped_column(postgresql.ARRAY(String))\n    # This is for the document, not this row in the table\n    updated_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    primary_owners: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    secondary_owners: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    is_internet: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)\n\n    is_relevant: Mapped[bool | None] = mapped_column(Boolean, nullable=True)\n    relevance_explanation: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    chat_messages: Mapped[list[\"ChatMessage\"]] = relationship(\n        \"ChatMessage\",\n        secondary=ChatMessage__SearchDoc.__table__,\n        back_populates=\"search_docs\",\n    )\n\n    tool_calls: Mapped[list[\"ToolCall\"]] = relationship(\n        \"ToolCall\",\n        secondary=ToolCall__SearchDoc.__table__,\n        back_populates=\"search_docs\",\n    )\n\n\nclass SearchQuery(Base):\n    # This table contains search queries for the Search UI. There are no followups and less is stored because the reply\n    # functionality is simply to rerun the search query again as things may have changed and this is more common for search.\n    __tablename__ = \"search_query\"\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), ForeignKey(\"user.id\", ondelete=\"CASCADE\")\n    )\n    query: Mapped[str] = mapped_column(String)\n    query_expansions: Mapped[list[str] | None] = mapped_column(\n        postgresql.ARRAY(String), nullable=True\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n\n\"\"\"\nFeedback, Logging, Metrics Tables\n\"\"\"\n\n\nclass DocumentRetrievalFeedback(Base):\n    __tablename__ = \"document_retrieval_feedback\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    chat_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n    document_id: Mapped[str] = mapped_column(ForeignKey(\"document.id\"))\n    # How high up this document is in the results, 1 for first\n    document_rank: Mapped[int] = mapped_column(Integer)\n    clicked: Mapped[bool] = mapped_column(Boolean, default=False)\n    feedback: Mapped[SearchFeedbackType | None] = mapped_column(\n        Enum(SearchFeedbackType, native_enum=False), nullable=True\n    )\n\n    chat_message: Mapped[ChatMessage] = relationship(\n        \"ChatMessage\",\n        back_populates=\"document_feedbacks\",\n        foreign_keys=[chat_message_id],\n    )\n    document: Mapped[Document] = relationship(\n        \"Document\", back_populates=\"retrieval_feedbacks\"\n    )\n\n\nclass ChatMessageFeedback(Base):\n    __tablename__ = \"chat_feedback\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    chat_message_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"chat_message.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n    is_positive: Mapped[bool | None] = mapped_column(Boolean, nullable=True)\n    required_followup: Mapped[bool | None] = mapped_column(Boolean, nullable=True)\n    feedback_text: Mapped[str | None] = mapped_column(Text, nullable=True)\n    predefined_feedback: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    chat_message: Mapped[ChatMessage] = relationship(\n        \"ChatMessage\",\n        back_populates=\"chat_message_feedbacks\",\n        foreign_keys=[chat_message_id],\n    )\n\n\nclass LLMProvider(Base):\n    __tablename__ = \"llm_provider\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    provider: Mapped[str] = mapped_column(String)\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n    api_base: Mapped[str | None] = mapped_column(String, nullable=True)\n    api_version: Mapped[str | None] = mapped_column(String, nullable=True)\n    # custom configs that should be passed to the LLM provider at inference time\n    # (e.g. `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, etc. for bedrock)\n    custom_config: Mapped[dict[str, str] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Deprecated: use LLMModelFlow with CHAT flow type instead\n    default_model_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    # Deprecated: use LLMModelFlow.is_default with CHAT flow type instead\n    is_default_provider: Mapped[bool | None] = mapped_column(Boolean, nullable=True)\n    # Deprecated: use LLMModelFlow.is_default with VISION flow type instead\n    is_default_vision_provider: Mapped[bool | None] = mapped_column(Boolean)\n    # Deprecated: use LLMModelFlow with VISION flow type instead\n    default_vision_model: Mapped[str | None] = mapped_column(String, nullable=True)\n    # EE only\n    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n    # Auto mode: models, visibility, and defaults are managed by GitHub config\n    is_auto_mode: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    groups: Mapped[list[\"UserGroup\"]] = relationship(\n        \"UserGroup\",\n        secondary=\"llm_provider__user_group\",\n        viewonly=True,\n    )\n    personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=\"llm_provider__persona\",\n        back_populates=\"allowed_by_llm_providers\",\n        viewonly=True,\n    )\n    model_configurations: Mapped[list[\"ModelConfiguration\"]] = relationship(\n        \"ModelConfiguration\",\n        back_populates=\"llm_provider\",\n        foreign_keys=\"ModelConfiguration.llm_provider_id\",\n    )\n\n\nclass ModelConfiguration(Base):\n    __tablename__ = \"model_configuration\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    llm_provider_id: Mapped[int] = mapped_column(\n        ForeignKey(\"llm_provider.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    name: Mapped[str] = mapped_column(String, nullable=False)\n\n    # Represents whether or not a given model will be usable by the end user or not.\n    # This field is primarily used for \"Well Known LLM Providers\", since for them,\n    # we have a pre-defined list of LLM models that we allow them to choose from.\n    # For example, for OpenAI, we allow the end-user to choose multiple models from\n    # `[\"gpt-4\", \"gpt-4o\", etc.]`. Once they make their selections, we set each\n    # selected model to `is_visible = True`.\n    #\n    # For \"Custom LLM Providers\", we don't provide a comprehensive list of models\n    # for the end-user to choose from; *they provide it themselves*. Therefore,\n    # for Custom LLM Providers, `is_visible` will always be True.\n    is_visible: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    # Max input tokens can be null when:\n    # - The end-user configures models through a \"Well Known LLM Provider\".\n    # - The end-user is configuring a model and chooses not to set a max-input-tokens limit.\n    max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)\n\n    # Deprecated: use LLMModelFlow with VISION flow type instead\n    supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)\n\n    # Human-readable display name for the model.\n    # For dynamic providers (OpenRouter, Bedrock, Ollama), this comes from the source API.\n    # For static providers (OpenAI, Anthropic), this may be null and will fall back to LiteLLM.\n    display_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    llm_provider: Mapped[\"LLMProvider\"] = relationship(\n        \"LLMProvider\",\n        back_populates=\"model_configurations\",\n    )\n\n    llm_model_flows: Mapped[list[\"LLMModelFlow\"]] = relationship(\n        \"LLMModelFlow\",\n        back_populates=\"model_configuration\",\n        cascade=\"all, delete-orphan\",\n        passive_deletes=True,\n    )\n\n    @property\n    def llm_model_flow_types(self) -> list[LLMModelFlowType]:\n        return [flow.llm_model_flow_type for flow in self.llm_model_flows]\n\n\nclass LLMModelFlow(Base):\n    __tablename__ = \"llm_model_flow\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n\n    llm_model_flow_type: Mapped[LLMModelFlowType] = mapped_column(\n        Enum(LLMModelFlowType, native_enum=False), nullable=False\n    )\n    model_configuration_id: Mapped[int] = mapped_column(\n        ForeignKey(\"model_configuration.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    model_configuration: Mapped[\"ModelConfiguration\"] = relationship(\n        \"ModelConfiguration\",\n        back_populates=\"llm_model_flows\",\n    )\n\n    __table_args__ = (\n        UniqueConstraint(\n            \"llm_model_flow_type\",\n            \"model_configuration_id\",\n            name=\"uq_model_config_per_llm_model_flow_type\",\n        ),\n        Index(\n            \"ix_one_default_per_llm_model_flow\",\n            \"llm_model_flow_type\",\n            unique=True,\n            postgresql_where=(is_default == True),  # noqa: E712\n        ),\n    )\n\n\nclass ImageGenerationConfig(Base):\n    __tablename__ = \"image_generation_config\"\n\n    image_provider_id: Mapped[str] = mapped_column(String, primary_key=True)\n    model_configuration_id: Mapped[int] = mapped_column(\n        ForeignKey(\"model_configuration.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    model_configuration: Mapped[\"ModelConfiguration\"] = relationship(\n        \"ModelConfiguration\"\n    )\n\n    __table_args__ = (\n        Index(\"ix_image_generation_config_is_default\", \"is_default\"),\n        Index(\n            \"ix_image_generation_config_model_configuration_id\",\n            \"model_configuration_id\",\n        ),\n    )\n\n\nclass VoiceProvider(Base):\n    \"\"\"Configuration for voice services (STT and TTS).\"\"\"\n\n    __tablename__ = \"voice_provider\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    provider_type: Mapped[str] = mapped_column(\n        String\n    )  # \"openai\", \"azure\", \"elevenlabs\"\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n    api_base: Mapped[str | None] = mapped_column(String, nullable=True)\n    custom_config: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Model/voice configuration\n    stt_model: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )  # e.g., \"whisper-1\"\n    tts_model: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )  # e.g., \"tts-1\", \"tts-1-hd\"\n    default_voice: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )  # e.g., \"alloy\", \"echo\"\n\n    # STT and TTS can use different providers - only one provider per type\n    is_default_stt: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    is_default_tts: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    # Enforce only one default STT provider and one default TTS provider at DB level\n    __table_args__ = (\n        Index(\n            \"ix_voice_provider_one_default_stt\",\n            \"is_default_stt\",\n            unique=True,\n            postgresql_where=(is_default_stt == True),  # noqa: E712\n        ),\n        Index(\n            \"ix_voice_provider_one_default_tts\",\n            \"is_default_tts\",\n            unique=True,\n            postgresql_where=(is_default_tts == True),  # noqa: E712\n        ),\n    )\n\n\nclass CloudEmbeddingProvider(Base):\n    __tablename__ = \"embedding_provider\"\n\n    provider_type: Mapped[EmbeddingProvider] = mapped_column(\n        Enum(EmbeddingProvider), primary_key=True\n    )\n    api_url: Mapped[str | None] = mapped_column(String, nullable=True)\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(EncryptedString())\n    api_version: Mapped[str | None] = mapped_column(String, nullable=True)\n    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    search_settings: Mapped[list[\"SearchSettings\"]] = relationship(\n        \"SearchSettings\",\n        back_populates=\"cloud_provider\",\n    )\n\n    def __repr__(self) -> str:\n        return f\"<EmbeddingProvider(type='{self.provider_type}')>\"\n\n\nclass InternetSearchProvider(Base):\n    __tablename__ = \"internet_search_provider\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)\n    provider_type: Mapped[str] = mapped_column(String, nullable=False)\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n    config: Mapped[dict[str, str] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    def __repr__(self) -> str:\n        return f\"<InternetSearchProvider(name='{self.name}', provider_type='{self.provider_type}')>\"\n\n\nclass InternetContentProvider(Base):\n    __tablename__ = \"internet_content_provider\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)\n    provider_type: Mapped[str] = mapped_column(String, nullable=False)\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n    config: Mapped[WebContentProviderConfig | None] = mapped_column(\n        PydanticType(WebContentProviderConfig), nullable=True\n    )\n    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    time_updated: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    def __repr__(self) -> str:\n        return f\"<InternetContentProvider(name='{self.name}', provider_type='{self.provider_type}')>\"\n\n\nclass DocumentSet(Base):\n    __tablename__ = \"document_set\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    description: Mapped[str | None] = mapped_column(String)\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # Whether changes to the document set have been propagated\n    is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    # If `False`, then the document set is not visible to users who are not explicitly\n    # given access to it either via the `users` or `groups` relationships\n    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n    # Last time a user updated this document set\n    time_last_modified_by_user: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    connector_credential_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(\n        \"ConnectorCredentialPair\",\n        secondary=DocumentSet__ConnectorCredentialPair.__table__,\n        primaryjoin=(\n            (DocumentSet__ConnectorCredentialPair.document_set_id == id)\n            & (DocumentSet__ConnectorCredentialPair.is_current.is_(True))\n        ),\n        secondaryjoin=(\n            DocumentSet__ConnectorCredentialPair.connector_credential_pair_id\n            == ConnectorCredentialPair.id\n        ),\n        back_populates=\"document_sets\",\n        overlaps=\"document_set\",\n    )\n    personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=Persona__DocumentSet.__table__,\n        back_populates=\"document_sets\",\n    )\n    # Other users with access\n    users: Mapped[list[User]] = relationship(\n        \"User\",\n        secondary=DocumentSet__User.__table__,\n        viewonly=True,\n    )\n    # EE only\n    groups: Mapped[list[\"UserGroup\"]] = relationship(\n        \"UserGroup\",\n        secondary=\"document_set__user_group\",\n        viewonly=True,\n    )\n    federated_connectors: Mapped[list[\"FederatedConnector__DocumentSet\"]] = (\n        relationship(\n            \"FederatedConnector__DocumentSet\",\n            back_populates=\"document_set\",\n            cascade=\"all, delete-orphan\",\n        )\n    )\n\n\nclass Tool(Base):\n    __tablename__ = \"tool\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    # The name of the tool that the LLM will see\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    description: Mapped[str] = mapped_column(Text, nullable=True)\n    # ID of the tool in the codebase, only applies for in-code tools.\n    # tools defined via the UI will have this as None\n    in_code_tool_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    display_name: Mapped[str] = mapped_column(String, nullable=True)\n\n    # OpenAPI scheme for the tool. Only applies to tools defined via the UI.\n    openapi_schema: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    # MCP tool input schema. Only applies to MCP tools.\n    mcp_input_schema: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    custom_headers: Mapped[list[HeaderItemDict] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    # user who created / owns the tool. Will be None for built-in tools.\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # whether to pass through the user's OAuth token as Authorization header\n    passthrough_auth: Mapped[bool] = mapped_column(Boolean, default=False)\n    # MCP server this tool is associated with (null for non-MCP tools)\n    mcp_server_id: Mapped[int | None] = mapped_column(\n        Integer, ForeignKey(\"mcp_server.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # OAuth configuration for this tool (null for tools without OAuth)\n    oauth_config_id: Mapped[int | None] = mapped_column(\n        Integer, ForeignKey(\"oauth_config.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n    enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n    user: Mapped[User | None] = relationship(\"User\", back_populates=\"custom_tools\")\n    oauth_config: Mapped[\"OAuthConfig | None\"] = relationship(\n        \"OAuthConfig\", back_populates=\"tools\"\n    )\n    # Relationship to Persona through the association table\n    personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=Persona__Tool.__table__,\n        back_populates=\"tools\",\n    )\n    # MCP server relationship\n    mcp_server: Mapped[\"MCPServer | None\"] = relationship(\n        \"MCPServer\", back_populates=\"current_actions\"\n    )\n\n\nclass OAuthConfig(Base):\n    \"\"\"OAuth provider configuration that can be shared across multiple tools\"\"\"\n\n    __tablename__ = \"oauth_config\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True, nullable=False)\n\n    # OAuth provider endpoints\n    authorization_url: Mapped[str] = mapped_column(Text, nullable=False)\n    token_url: Mapped[str] = mapped_column(Text, nullable=False)\n\n    # Client credentials (encrypted)\n    client_id: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=False\n    )\n    client_secret: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=False\n    )\n\n    # Optional configurations\n    scopes: Mapped[list[str] | None] = mapped_column(postgresql.JSONB(), nullable=True)\n    additional_params: Mapped[dict[str, Any] | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n\n    # Metadata\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    # Relationships\n    tools: Mapped[list[\"Tool\"]] = relationship(\"Tool\", back_populates=\"oauth_config\")\n    user_tokens: Mapped[list[\"OAuthUserToken\"]] = relationship(\n        \"OAuthUserToken\", back_populates=\"oauth_config\", cascade=\"all, delete-orphan\"\n    )\n\n\nclass OAuthUserToken(Base):\n    \"\"\"Per-user OAuth tokens for a specific OAuth configuration\"\"\"\n\n    __tablename__ = \"oauth_user_token\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    oauth_config_id: Mapped[int] = mapped_column(\n        ForeignKey(\"oauth_config.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n\n    # Token data (encrypted)\n    # Structure: {\n    #   \"access_token\": \"...\",\n    #   \"refresh_token\": \"...\",  # Optional\n    #   \"token_type\": \"Bearer\",\n    #   \"expires_at\": 1234567890,  # Unix timestamp, optional\n    #   \"scope\": \"repo user\"  # Optional\n    # }\n    token_data: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(\n        EncryptedJson(), nullable=False\n    )\n\n    # Metadata\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    # Relationships\n    oauth_config: Mapped[\"OAuthConfig\"] = relationship(\n        \"OAuthConfig\", back_populates=\"user_tokens\"\n    )\n    user: Mapped[\"User\"] = relationship(\"User\")\n\n    # Unique constraint: One token per user per OAuth config\n    __table_args__ = (\n        UniqueConstraint(\"oauth_config_id\", \"user_id\", name=\"uq_oauth_user_token\"),\n    )\n\n\nclass StarterMessage(BaseModel):\n    \"\"\"Starter message for a persona.\"\"\"\n\n    name: str\n    message: str\n\n\nclass Persona__PersonaLabel(Base):\n    __tablename__ = \"persona__persona_label\"\n\n    persona_id: Mapped[int] = mapped_column(ForeignKey(\"persona.id\"), primary_key=True)\n    persona_label_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona_label.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass Persona(Base):\n    __tablename__ = \"persona\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    name: Mapped[str] = mapped_column(String)\n    description: Mapped[str] = mapped_column(String)\n\n    # Allows the persona to specify a specific default LLM model\n    # NOTE: only is applied on the actual response generation - is not used for things like\n    # auto-detected time filters, relevance filters, etc.\n    llm_model_provider_override: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )\n    llm_model_version_override: Mapped[str | None] = mapped_column(\n        String, nullable=True\n    )\n    default_model_configuration_id: Mapped[int | None] = mapped_column(\n        Integer,\n        ForeignKey(\"model_configuration.id\", ondelete=\"SET NULL\"),\n        nullable=True,\n    )\n\n    starter_messages: Mapped[list[StarterMessage] | None] = mapped_column(\n        PydanticListType(StarterMessage), nullable=True\n    )\n    search_start_date: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n    # Built-in personas are configured via backend during deployment\n    # Treated specially (cannot be user edited etc.)\n    builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    # Featured personas are highlighted in the UI\n    is_featured: Mapped[bool] = mapped_column(Boolean, default=False)\n    # controls whether the persona is listed in user-facing agent lists\n    is_listed: Mapped[bool] = mapped_column(Boolean, default=True)\n    # controls the ordering of personas in the UI\n    # higher priority personas are displayed first, ties are resolved by the ID,\n    # where lower value IDs (e.g. created earlier) are displayed first\n    display_priority: Mapped[int | None] = mapped_column(\n        Integer, nullable=True, default=None\n    )\n    deleted: Mapped[bool] = mapped_column(Boolean, default=False)\n\n    # Custom Agent Prompt\n    system_prompt: Mapped[str | None] = mapped_column(\n        String(length=PROMPT_LENGTH), nullable=True\n    )\n    replace_base_system_prompt: Mapped[bool] = mapped_column(Boolean, default=False)\n    task_prompt: Mapped[str | None] = mapped_column(\n        String(length=PROMPT_LENGTH), nullable=True\n    )\n    datetime_aware: Mapped[bool] = mapped_column(Boolean, default=True)\n\n    uploaded_image_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    icon_name: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    # These are only defaults, users can select from all if desired\n    document_sets: Mapped[list[DocumentSet]] = relationship(\n        \"DocumentSet\",\n        secondary=Persona__DocumentSet.__table__,\n        back_populates=\"personas\",\n    )\n    tools: Mapped[list[Tool]] = relationship(\n        \"Tool\",\n        secondary=Persona__Tool.__table__,\n        back_populates=\"personas\",\n    )\n    # Owner\n    user: Mapped[User | None] = relationship(\"User\", back_populates=\"personas\")\n    # Other users with access\n    users: Mapped[list[User]] = relationship(\n        \"User\",\n        secondary=Persona__User.__table__,\n        viewonly=True,\n    )\n    # EE only\n    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n    groups: Mapped[list[\"UserGroup\"]] = relationship(\n        \"UserGroup\",\n        secondary=\"persona__user_group\",\n        viewonly=True,\n    )\n    allowed_by_llm_providers: Mapped[list[\"LLMProvider\"]] = relationship(\n        \"LLMProvider\",\n        secondary=\"llm_provider__persona\",\n        back_populates=\"personas\",\n        viewonly=True,\n    )\n    # Relationship to UserFile\n    user_files: Mapped[list[\"UserFile\"]] = relationship(\n        \"UserFile\",\n        secondary=\"persona__user_file\",\n        back_populates=\"assistants\",\n    )\n    labels: Mapped[list[\"PersonaLabel\"]] = relationship(\n        \"PersonaLabel\",\n        secondary=Persona__PersonaLabel.__table__,\n        back_populates=\"personas\",\n    )\n    # Hierarchy nodes attached to this persona for scoped search\n    hierarchy_nodes: Mapped[list[\"HierarchyNode\"]] = relationship(\n        \"HierarchyNode\",\n        secondary=\"persona__hierarchy_node\",\n        back_populates=\"personas\",\n    )\n    # Individual documents attached to this persona for scoped search\n    attached_documents: Mapped[list[\"Document\"]] = relationship(\n        \"Document\",\n        secondary=\"persona__document\",\n        back_populates=\"attached_personas\",\n    )\n\n    # Default personas loaded via yaml cannot have the same name\n    __table_args__ = (\n        Index(\n            \"_builtin_persona_name_idx\",\n            \"name\",\n            unique=True,\n            postgresql_where=(builtin_persona == True),  # noqa: E712\n        ),\n    )\n\n\nclass Persona__UserFile(Base):\n    __tablename__ = \"persona__user_file\"\n\n    persona_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    user_file_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user_file.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass Persona__HierarchyNode(Base):\n    \"\"\"Association table linking personas to hierarchy nodes.\n\n    This allows assistants to be configured with specific hierarchy nodes\n    (folders, spaces, channels, etc.) for scoped search/retrieval.\n    \"\"\"\n\n    __tablename__ = \"persona__hierarchy_node\"\n\n    persona_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    hierarchy_node_id: Mapped[int] = mapped_column(\n        ForeignKey(\"hierarchy_node.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass Persona__Document(Base):\n    \"\"\"Association table linking personas to individual documents.\n\n    This allows assistants to be configured with specific documents\n    for scoped search/retrieval. Complements hierarchy_nodes which\n    allow attaching folders/spaces.\n    \"\"\"\n\n    __tablename__ = \"persona__document\"\n\n    persona_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    document_id: Mapped[str] = mapped_column(\n        ForeignKey(\"document.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass PersonaLabel(Base):\n    __tablename__ = \"persona_label\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    personas: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=Persona__PersonaLabel.__table__,\n        back_populates=\"labels\",\n    )\n\n\nclass Assistant__UserSpecificConfig(Base):\n    __tablename__ = \"assistant__user_specific_config\"\n\n    assistant_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    disabled_tool_ids: Mapped[list[int]] = mapped_column(\n        postgresql.ARRAY(Integer), nullable=False\n    )\n\n\nAllowedAnswerFilters = (\n    Literal[\"well_answered_postfilter\"] | Literal[\"questionmark_prefilter\"]\n)\n\n\nclass ChannelConfig(TypedDict):\n    \"\"\"NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column\n    in Postgres\"\"\"\n\n    channel_name: str | None  # None for default channel config\n    respond_tag_only: NotRequired[bool]  # defaults to False\n    respond_to_bots: NotRequired[bool]  # defaults to False\n    is_ephemeral: NotRequired[bool]  # defaults to False\n    respond_member_group_list: NotRequired[list[str]]\n    answer_filters: NotRequired[list[AllowedAnswerFilters]]\n    # If None then no follow up\n    # If empty list, follow up with no tags\n    follow_up_tags: NotRequired[list[str]]\n    show_continue_in_web_ui: NotRequired[bool]  # defaults to False\n    disabled: NotRequired[bool]  # defaults to False\n\n\nclass SlackChannelConfig(Base):\n    __tablename__ = \"slack_channel_config\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    slack_bot_id: Mapped[int] = mapped_column(\n        ForeignKey(\"slack_bot.id\"), nullable=False\n    )\n    persona_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"persona.id\"), nullable=True\n    )\n    channel_config: Mapped[ChannelConfig] = mapped_column(\n        postgresql.JSONB(), nullable=False\n    )\n\n    enable_auto_filters: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n\n    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    persona: Mapped[Persona | None] = relationship(\"Persona\")\n\n    slack_bot: Mapped[\"SlackBot\"] = relationship(\n        \"SlackBot\",\n        back_populates=\"slack_channel_configs\",\n    )\n    standard_answer_categories: Mapped[list[\"StandardAnswerCategory\"]] = relationship(\n        \"StandardAnswerCategory\",\n        secondary=SlackChannelConfig__StandardAnswerCategory.__table__,\n        back_populates=\"slack_channel_configs\",\n    )\n\n    __table_args__ = (\n        UniqueConstraint(\n            \"slack_bot_id\",\n            \"is_default\",\n            name=\"uq_slack_channel_config_slack_bot_id_default\",\n        ),\n        Index(\n            \"ix_slack_channel_config_slack_bot_id_default\",\n            \"slack_bot_id\",\n            \"is_default\",\n            unique=True,\n            postgresql_where=(is_default is True),\n        ),\n    )\n\n\nclass SlackBot(Base):\n    __tablename__ = \"slack_bot\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    name: Mapped[str] = mapped_column(String)\n    enabled: Mapped[bool] = mapped_column(Boolean, default=True)\n\n    bot_token: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), unique=True\n    )\n    app_token: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), unique=True\n    )\n    user_token: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n\n    slack_channel_configs: Mapped[list[SlackChannelConfig]] = relationship(\n        \"SlackChannelConfig\",\n        back_populates=\"slack_bot\",\n        cascade=\"all, delete-orphan\",\n    )\n\n\nclass DiscordBotConfig(Base):\n    \"\"\"Global Discord bot configuration (one per tenant).\n\n    Stores the bot token when not provided via DISCORD_BOT_TOKEN env var.\n    Uses a fixed ID with check constraint to enforce only one row per tenant.\n    \"\"\"\n\n    __tablename__ = \"discord_bot_config\"\n\n    id: Mapped[str] = mapped_column(\n        String, primary_key=True, server_default=text(\"'SINGLETON'\")\n    )\n    bot_token: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=False\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n\n\nclass DiscordGuildConfig(Base):\n    \"\"\"Configuration for a Discord guild (server) connected to this tenant.\n\n    registration_key is a one-time key used to link a Discord server to this tenant.\n    Format: discord_<tenant_id>.<random_token>\n    guild_id is NULL until the Discord admin runs !register with the key.\n    \"\"\"\n\n    __tablename__ = \"discord_guild_config\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    # Discord snowflake - NULL until registered via command in Discord\n    guild_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True, unique=True)\n    guild_name: Mapped[str | None] = mapped_column(String(256), nullable=True)\n\n    # One-time registration key: discord_<tenant_id>.<random_token>\n    registration_key: Mapped[str] = mapped_column(String, unique=True, nullable=False)\n\n    registered_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    # Configuration\n    default_persona_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n    enabled: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"true\"), nullable=False\n    )\n\n    # Relationships\n    default_persona: Mapped[\"Persona | None\"] = relationship(\n        \"Persona\", foreign_keys=[default_persona_id]\n    )\n    channels: Mapped[list[\"DiscordChannelConfig\"]] = relationship(\n        back_populates=\"guild_config\", cascade=\"all, delete-orphan\"\n    )\n\n\nclass DiscordChannelConfig(Base):\n    \"\"\"Per-channel configuration for Discord bot behavior.\n\n    Used to whitelist specific channels and configure per-channel behavior.\n    \"\"\"\n\n    __tablename__ = \"discord_channel_config\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    guild_config_id: Mapped[int] = mapped_column(\n        ForeignKey(\"discord_guild_config.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n\n    # Discord snowflake\n    channel_id: Mapped[int] = mapped_column(BigInteger, nullable=False)\n    channel_name: Mapped[str] = mapped_column(String(), nullable=False)\n\n    # Channel type from Discord (text, forum)\n    channel_type: Mapped[str] = mapped_column(\n        String(20), server_default=text(\"'text'\"), nullable=False\n    )\n\n    # True if @everyone cannot view the channel\n    is_private: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"false\"), nullable=False\n    )\n\n    # If true, bot only responds to messages in threads\n    # Otherwise, will reply in channel\n    thread_only_mode: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"false\"), nullable=False\n    )\n\n    # If true (default), bot only responds when @mentioned\n    # If false, bot responds to ALL messages in this channel\n    require_bot_invocation: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"true\"), nullable=False\n    )\n\n    # Override the guild's default persona for this channel\n    persona_override_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n\n    enabled: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"false\"), nullable=False\n    )\n\n    # Relationships\n    guild_config: Mapped[\"DiscordGuildConfig\"] = relationship(back_populates=\"channels\")\n    persona_override: Mapped[\"Persona | None\"] = relationship()\n\n    # Constraints\n    __table_args__ = (\n        UniqueConstraint(\n            \"guild_config_id\", \"channel_id\", name=\"uq_discord_channel_guild_channel\"\n        ),\n    )\n\n\nclass Milestone(Base):\n    # This table is used to track significant events for a deployment towards finding value\n    # The table is currently not used for features but it may be used in the future to inform\n    # users about the product features and encourage usage/exploration.\n    __tablename__ = \"milestone\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    event_type: Mapped[MilestoneRecordType] = mapped_column(String)\n    # Need to track counts and specific ids of certain events to know if the Milestone has been reached\n    event_tracker: Mapped[dict | None] = mapped_column(\n        postgresql.JSONB(), nullable=True\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    user: Mapped[User | None] = relationship(\"User\")\n\n    __table_args__ = (UniqueConstraint(\"event_type\", name=\"uq_milestone_event_type\"),)\n\n\nclass TaskQueueState(Base):\n    # Currently refers to Celery Tasks\n    __tablename__ = \"task_queue_jobs\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    # Celery task id. currently only for readability/diagnostics\n    task_id: Mapped[str] = mapped_column(String)\n    # For any job type, this would be the same\n    task_name: Mapped[str] = mapped_column(String)\n    # Note that if the task dies, this won't necessarily be marked FAILED correctly\n    status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus, native_enum=False))\n    start_time: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True)\n    )\n    register_time: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n\nclass KVStore(Base):\n    __tablename__ = \"key_value_store\"\n\n    key: Mapped[str] = mapped_column(String, primary_key=True)\n    value: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)\n    encrypted_value: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(\n        EncryptedJson(), nullable=True\n    )\n\n\nclass FileRecord(Base):\n    __tablename__ = \"file_record\"\n\n    # Internal file ID, must be unique across all files.\n    file_id: Mapped[str] = mapped_column(String, primary_key=True)\n\n    display_name: Mapped[str] = mapped_column(String, nullable=True)\n    file_origin: Mapped[FileOrigin] = mapped_column(Enum(FileOrigin, native_enum=False))\n    file_type: Mapped[str] = mapped_column(String, default=\"text/plain\")\n    file_metadata: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)\n\n    # External storage support (S3, MinIO, Azure Blob, etc.)\n    bucket_name: Mapped[str] = mapped_column(String)\n    object_key: Mapped[str] = mapped_column(String)\n\n    # Timestamps for external storage\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n\nclass FileContent(Base):\n    \"\"\"Stores file content in PostgreSQL using Large Objects.\n    Used when FILE_STORE_BACKEND=postgres to avoid needing S3/MinIO.\"\"\"\n\n    __tablename__ = \"file_content\"\n\n    file_id: Mapped[str] = mapped_column(\n        String,\n        ForeignKey(\"file_record.file_id\", ondelete=\"CASCADE\"),\n        primary_key=True,\n    )\n    # PostgreSQL Large Object OID referencing pg_largeobject\n    lobj_oid: Mapped[int] = mapped_column(BigInteger, nullable=False)\n    file_size: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)\n\n\n\"\"\"\n************************************************************************\nEnterprise Edition Models\n************************************************************************\n\nThese models are only used in Enterprise Edition only features in Onyx.\nThey are kept here to simplify the codebase and avoid having different assumptions\non the shape of data being passed around between the MIT and EE versions of Onyx.\n\nIn the MIT version of Onyx, assume these tables are always empty.\n\"\"\"\n\n\nclass SamlAccount(Base):\n    __tablename__ = \"saml\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    user_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), unique=True\n    )\n    encrypted_cookie: Mapped[str] = mapped_column(Text, unique=True)\n    expires_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    user: Mapped[User] = relationship(\"User\")\n\n\nclass User__UserGroup(Base):\n    __tablename__ = \"user__user_group\"\n\n    __table_args__ = (Index(\"ix_user__user_group_user_id\", \"user_id\"),)\n\n    is_curator: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), primary_key=True, nullable=True\n    )\n\n\nclass PermissionGrant(Base):\n    __tablename__ = \"permission_grant\"\n\n    __table_args__ = (\n        UniqueConstraint(\n            \"group_id\", \"permission\", name=\"uq_permission_grant_group_permission\"\n        ),\n    )\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)\n    group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n    permission: Mapped[Permission] = mapped_column(\n        Enum(\n            Permission,\n            native_enum=False,\n            values_callable=lambda x: [e.value for e in x],\n        ),\n        nullable=False,\n    )\n    grant_source: Mapped[GrantSource] = mapped_column(\n        Enum(GrantSource, native_enum=False), nullable=False\n    )\n    granted_by: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"SET NULL\"), nullable=True\n    )\n    granted_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    is_deleted: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False, server_default=text(\"false\")\n    )\n\n    group: Mapped[\"UserGroup\"] = relationship(\n        \"UserGroup\", back_populates=\"permission_grants\"\n    )\n\n    @validates(\"permission\")\n    def _validate_permission(self, _key: str, value: Permission) -> Permission:\n        if value in Permission.IMPLIED:\n            raise ValueError(\n                f\"{value!r} is an implied permission and cannot be granted directly\"\n            )\n        return value\n\n\nclass UserGroup__ConnectorCredentialPair(Base):\n    __tablename__ = \"user_group__connector_credential_pair\"\n\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n    cc_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"), primary_key=True\n    )\n    # if `True`, then is part of the current state of the UserGroup\n    # if `False`, then is a part of the prior state of the UserGroup\n    # rows with `is_current=False` should be deleted when the UserGroup\n    # is updated and should not exist for a given UserGroup if\n    # `UserGroup.is_up_to_date == True`\n    is_current: Mapped[bool] = mapped_column(\n        Boolean,\n        default=True,\n        primary_key=True,\n    )\n\n    cc_pair: Mapped[ConnectorCredentialPair] = relationship(\n        \"ConnectorCredentialPair\",\n    )\n\n\nclass Persona__UserGroup(Base):\n    __tablename__ = \"persona__user_group\"\n\n    persona_id: Mapped[int] = mapped_column(ForeignKey(\"persona.id\"), primary_key=True)\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass LLMProvider__Persona(Base):\n    \"\"\"Association table restricting LLM providers to specific personas.\n\n    If no such rows exist for a given LLM provider, then it is accessible by all personas.\n    \"\"\"\n\n    __tablename__ = \"llm_provider__persona\"\n\n    llm_provider_id: Mapped[int] = mapped_column(\n        ForeignKey(\"llm_provider.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    persona_id: Mapped[int] = mapped_column(\n        ForeignKey(\"persona.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass LLMProvider__UserGroup(Base):\n    __tablename__ = \"llm_provider__user_group\"\n\n    llm_provider_id: Mapped[int] = mapped_column(\n        ForeignKey(\"llm_provider.id\"), primary_key=True\n    )\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass DocumentSet__UserGroup(Base):\n    __tablename__ = \"document_set__user_group\"\n\n    document_set_id: Mapped[int] = mapped_column(\n        ForeignKey(\"document_set.id\"), primary_key=True\n    )\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass Credential__UserGroup(Base):\n    __tablename__ = \"credential__user_group\"\n\n    credential_id: Mapped[int] = mapped_column(\n        ForeignKey(\"credential.id\"), primary_key=True\n    )\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass UserGroup(Base):\n    __tablename__ = \"user_group\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    # whether or not changes to the UserGroup have been propagated to Vespa\n    is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    # tell the sync job to clean up the group\n    is_up_for_deletion: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n    # whether this is a default group (e.g. \"Basic\", \"Admins\") that cannot be deleted\n    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    # Last time a user updated this user group\n    time_last_modified_by_user: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    users: Mapped[list[User]] = relationship(\n        \"User\",\n        secondary=User__UserGroup.__table__,\n    )\n    user_group_relationships: Mapped[list[User__UserGroup]] = relationship(\n        \"User__UserGroup\",\n        viewonly=True,\n    )\n    cc_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(\n        \"ConnectorCredentialPair\",\n        secondary=UserGroup__ConnectorCredentialPair.__table__,\n        viewonly=True,\n    )\n    cc_pair_relationships: Mapped[list[UserGroup__ConnectorCredentialPair]] = (\n        relationship(\n            \"UserGroup__ConnectorCredentialPair\",\n            viewonly=True,\n        )\n    )\n    personas: Mapped[list[Persona]] = relationship(\n        \"Persona\",\n        secondary=Persona__UserGroup.__table__,\n        viewonly=True,\n    )\n    document_sets: Mapped[list[DocumentSet]] = relationship(\n        \"DocumentSet\",\n        secondary=DocumentSet__UserGroup.__table__,\n        viewonly=True,\n    )\n    credentials: Mapped[list[Credential]] = relationship(\n        \"Credential\",\n        secondary=Credential__UserGroup.__table__,\n    )\n    # MCP servers accessible to this user group\n    accessible_mcp_servers: Mapped[list[\"MCPServer\"]] = relationship(\n        \"MCPServer\", secondary=\"mcp_server__user_group\", back_populates=\"user_groups\"\n    )\n    permission_grants: Mapped[list[\"PermissionGrant\"]] = relationship(\n        \"PermissionGrant\", back_populates=\"group\", cascade=\"all, delete-orphan\"\n    )\n\n\n\"\"\"Tables related to Token Rate Limiting\nNOTE: `TokenRateLimit` is partially an MIT feature (global rate limit)\n\"\"\"\n\n\nclass TokenRateLimit(Base):\n    __tablename__ = \"token_rate_limit\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n    token_budget: Mapped[int] = mapped_column(Integer, nullable=False)\n    period_hours: Mapped[int] = mapped_column(Integer, nullable=False)\n    scope: Mapped[TokenRateLimitScope] = mapped_column(\n        Enum(TokenRateLimitScope, native_enum=False)\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n\nclass TokenRateLimit__UserGroup(Base):\n    __tablename__ = \"token_rate_limit__user_group\"\n\n    rate_limit_id: Mapped[int] = mapped_column(\n        ForeignKey(\"token_rate_limit.id\"), primary_key=True\n    )\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass StandardAnswerCategory(Base):\n    __tablename__ = \"standard_answer_category\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    name: Mapped[str] = mapped_column(String, unique=True)\n    standard_answers: Mapped[list[\"StandardAnswer\"]] = relationship(\n        \"StandardAnswer\",\n        secondary=StandardAnswer__StandardAnswerCategory.__table__,\n        back_populates=\"categories\",\n    )\n    slack_channel_configs: Mapped[list[\"SlackChannelConfig\"]] = relationship(\n        \"SlackChannelConfig\",\n        secondary=SlackChannelConfig__StandardAnswerCategory.__table__,\n        back_populates=\"standard_answer_categories\",\n    )\n\n\nclass StandardAnswer(Base):\n    __tablename__ = \"standard_answer\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    keyword: Mapped[str] = mapped_column(String)\n    answer: Mapped[str] = mapped_column(String)\n    active: Mapped[bool] = mapped_column(Boolean)\n    match_regex: Mapped[bool] = mapped_column(Boolean)\n    match_any_keywords: Mapped[bool] = mapped_column(Boolean)\n\n    __table_args__ = (\n        Index(\n            \"unique_keyword_active\",\n            keyword,\n            active,\n            unique=True,\n            postgresql_where=(active == True),  # noqa: E712\n        ),\n    )\n\n    categories: Mapped[list[StandardAnswerCategory]] = relationship(\n        \"StandardAnswerCategory\",\n        secondary=StandardAnswer__StandardAnswerCategory.__table__,\n        back_populates=\"standard_answers\",\n    )\n    chat_messages: Mapped[list[ChatMessage]] = relationship(\n        \"ChatMessage\",\n        secondary=ChatMessage__StandardAnswer.__table__,\n        back_populates=\"standard_answers\",\n    )\n\n\nclass BackgroundError(Base):\n    \"\"\"Important background errors. Serves to:\n    1. Ensure that important logs are kept around and not lost on rotation/container restarts\n    2. A trail for high-signal events so that the debugger doesn't need to remember/know every\n       possible relevant log line.\n    \"\"\"\n\n    __tablename__ = \"background_error\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    message: Mapped[str] = mapped_column(String)\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    # option to link the error to a specific CC Pair\n    cc_pair_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n\n    cc_pair: Mapped[\"ConnectorCredentialPair | None\"] = relationship(\n        \"ConnectorCredentialPair\", back_populates=\"background_errors\"\n    )\n\n\n\"\"\"Tables related to Permission Sync\"\"\"\n\n\nclass User__ExternalUserGroupId(Base):\n    \"\"\"Maps user info both internal and external to the name of the external group\n    This maps the user to all of their external groups so that the external group name can be\n    attached to the ACL list matching during query time. User level permissions can be handled by\n    directly adding the Onyx user to the doc ACL list\"\"\"\n\n    __tablename__ = \"user__external_user_group_id\"\n\n    user_id: Mapped[UUID] = mapped_column(ForeignKey(\"user.id\"), primary_key=True)\n    # These group ids have been prefixed by the source type\n    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)\n    cc_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"), primary_key=True\n    )\n\n    # Signifies whether or not the group should be cleaned up at the end of a\n    # group sync run.\n    stale: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    __table_args__ = (\n        Index(\n            \"ix_user_external_group_cc_pair_stale\",\n            \"cc_pair_id\",\n            \"stale\",\n        ),\n        Index(\n            \"ix_user_external_group_stale\",\n            \"stale\",\n        ),\n    )\n\n\nclass PublicExternalUserGroup(Base):\n    \"\"\"Stores all public external user \"groups\".\n\n    For example, things like Google Drive folders that are marked\n    as `Anyone with the link` or `Anyone in the domain`\n    \"\"\"\n\n    __tablename__ = \"public_external_user_group\"\n\n    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)\n    cc_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n    # Signifies whether or not the group should be cleaned up at the end of a\n    # group sync run.\n    stale: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n    __table_args__ = (\n        Index(\n            \"ix_public_external_group_cc_pair_stale\",\n            \"cc_pair_id\",\n            \"stale\",\n        ),\n        Index(\n            \"ix_public_external_group_stale\",\n            \"stale\",\n        ),\n    )\n\n\nclass UsageReport(Base):\n    \"\"\"This stores metadata about usage reports generated by admin including user who generated\n    them as well as the period they cover. The actual zip file of the report is stored as a lo\n    using the FileRecord\n    \"\"\"\n\n    __tablename__ = \"usage_reports\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    report_name: Mapped[str] = mapped_column(ForeignKey(\"file_record.file_id\"))\n\n    # if None, report was auto-generated\n    requestor_user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    period_from: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True)\n    )\n    period_to: Mapped[datetime.datetime | None] = mapped_column(DateTime(timezone=True))\n\n    requestor = relationship(\"User\")\n    file = relationship(\"FileRecord\")\n\n\nclass InputPrompt(Base):\n    __tablename__ = \"inputprompt\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)\n    prompt: Mapped[str] = mapped_column(String)\n    content: Mapped[str] = mapped_column(String)\n    active: Mapped[bool] = mapped_column(Boolean)\n    user: Mapped[User | None] = relationship(\"User\", back_populates=\"input_prompts\")\n    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n\n    __table_args__ = (\n        # Unique constraint on (prompt, user_id) for user-owned prompts\n        UniqueConstraint(\"prompt\", \"user_id\", name=\"uq_inputprompt_prompt_user_id\"),\n        # Partial unique index for public prompts (user_id IS NULL)\n        Index(\n            \"uq_inputprompt_prompt_public\",\n            \"prompt\",\n            unique=True,\n            postgresql_where=text(\"user_id IS NULL\"),\n        ),\n    )\n\n\nclass InputPrompt__User(Base):\n    __tablename__ = \"inputprompt__user\"\n\n    input_prompt_id: Mapped[int] = mapped_column(\n        ForeignKey(\"inputprompt.id\"), primary_key=True\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        ForeignKey(\"user.id\"), primary_key=True\n    )\n    disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n\n\nclass Project__UserFile(Base):\n    __tablename__ = \"project__user_file\"\n\n    project_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_project.id\"), primary_key=True\n    )\n    user_file_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user_file.id\"), primary_key=True\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_project__user_file_project_id_created_at\",\n            project_id,\n            created_at.desc(),\n        ),\n    )\n\n\nclass UserProject(Base):\n    __tablename__ = \"user_project\"\n\n    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)\n    user_id: Mapped[UUID | None] = mapped_column(ForeignKey(\"user.id\"), nullable=False)\n    name: Mapped[str] = mapped_column(nullable=False)\n    description: Mapped[str] = mapped_column(nullable=True)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    user: Mapped[\"User\"] = relationship(back_populates=\"projects\")\n    user_files: Mapped[list[\"UserFile\"]] = relationship(\n        \"UserFile\",\n        secondary=Project__UserFile.__table__,\n        back_populates=\"projects\",\n    )\n    chat_sessions: Mapped[list[\"ChatSession\"]] = relationship(\n        \"ChatSession\", back_populates=\"project\", lazy=\"selectin\"\n    )\n    instructions: Mapped[str] = mapped_column(String)\n\n\nclass UserDocument(str, Enum):\n    CHAT = \"chat\"\n    RECENT = \"recent\"\n    FILE = \"file\"\n\n\nclass UserFile(Base):\n    __tablename__ = \"user_file\"\n\n    id: Mapped[UUID] = mapped_column(PGUUID(as_uuid=True), primary_key=True)\n    user_id: Mapped[UUID | None] = mapped_column(ForeignKey(\"user.id\"), nullable=False)\n    assistants: Mapped[list[\"Persona\"]] = relationship(\n        \"Persona\",\n        secondary=Persona__UserFile.__table__,\n        back_populates=\"user_files\",\n    )\n    file_id: Mapped[str] = mapped_column(nullable=False)\n    name: Mapped[str] = mapped_column(nullable=False)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        default=datetime.datetime.utcnow\n    )\n    user: Mapped[\"User\"] = relationship(back_populates=\"files\")\n    token_count: Mapped[int | None] = mapped_column(Integer, nullable=True)\n\n    file_type: Mapped[str] = mapped_column(String, nullable=False)\n\n    status: Mapped[UserFileStatus] = mapped_column(\n        Enum(UserFileStatus, native_enum=False, name=\"userfilestatus\"),\n        nullable=False,\n        default=UserFileStatus.PROCESSING,\n    )\n    needs_project_sync: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n    needs_persona_sync: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, default=False\n    )\n    last_project_sync_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n    chunk_count: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    last_accessed_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    link_url: Mapped[str | None] = mapped_column(String, nullable=True)\n    content_type: Mapped[str | None] = mapped_column(String, nullable=True)\n\n    projects: Mapped[list[\"UserProject\"]] = relationship(\n        \"UserProject\",\n        secondary=Project__UserFile.__table__,\n        back_populates=\"user_files\",\n        lazy=\"selectin\",\n    )\n\n\n\"\"\"\nMulti-tenancy related tables\n\"\"\"\n\n\nclass PublicBase(DeclarativeBase):\n    __abstract__ = True\n\n\n# Strictly keeps track of the tenant that a given user will authenticate to.\nclass UserTenantMapping(Base):\n    __tablename__ = \"user_tenant_mapping\"\n    __table_args__ = ({\"schema\": \"public\"},)\n\n    email: Mapped[str] = mapped_column(String, nullable=False, primary_key=True)\n    tenant_id: Mapped[str] = mapped_column(String, nullable=False, primary_key=True)\n    active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n    @validates(\"email\")\n    def validate_email(self, key: str, value: str) -> str:  # noqa: ARG002\n        return value.lower() if value else value\n\n\nclass AvailableTenant(Base):\n    __tablename__ = \"available_tenant\"\n    \"\"\"\n    These entries will only exist ephemerally and are meant to be picked up by new users on registration.\n    \"\"\"\n\n    tenant_id: Mapped[str] = mapped_column(String, primary_key=True, nullable=False)\n    alembic_version: Mapped[str] = mapped_column(String, nullable=False)\n    date_created: Mapped[datetime.datetime] = mapped_column(DateTime, nullable=False)\n\n\n# This is a mapping from tenant IDs to anonymous user paths\nclass TenantAnonymousUserPath(Base):\n    __tablename__ = \"tenant_anonymous_user_path\"\n\n    tenant_id: Mapped[str] = mapped_column(String, primary_key=True, nullable=False)\n    anonymous_user_path: Mapped[str] = mapped_column(\n        String, nullable=False, unique=True\n    )\n\n\nclass MCPServer(Base):\n    \"\"\"Model for storing MCP server configurations\"\"\"\n\n    __tablename__ = \"mcp_server\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    # Owner email of user who configured this server\n    owner: Mapped[str] = mapped_column(String, nullable=False)\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    description: Mapped[str | None] = mapped_column(String, nullable=True)\n    server_url: Mapped[str] = mapped_column(String, nullable=False)\n    # Transport type for connecting to the MCP server\n    transport: Mapped[MCPTransport | None] = mapped_column(\n        Enum(MCPTransport, native_enum=False), nullable=True\n    )\n    # Auth type: \"none\", \"api_token\", or \"oauth\"\n    auth_type: Mapped[MCPAuthenticationType | None] = mapped_column(\n        Enum(MCPAuthenticationType, native_enum=False), nullable=True\n    )\n    # Who performs authentication for this server (ADMIN or PER_USER)\n    auth_performer: Mapped[MCPAuthenticationPerformer | None] = mapped_column(\n        Enum(MCPAuthenticationPerformer, native_enum=False), nullable=True\n    )\n    # Status tracking for configuration flow\n    status: Mapped[MCPServerStatus] = mapped_column(\n        Enum(MCPServerStatus, native_enum=False),\n        nullable=False,\n        server_default=\"CREATED\",\n    )\n    # Admin connection config - used for the config page\n    # and (when applicable) admin-managed auth\n    # and (when applicable) per-user auth\n    admin_connection_config_id: Mapped[int | None] = mapped_column(\n        Integer,\n        ForeignKey(\"mcp_connection_config.id\", ondelete=\"SET NULL\"),\n        nullable=True,\n    )\n\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n    last_refreshed_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    # Relationships\n    admin_connection_config: Mapped[\"MCPConnectionConfig | None\"] = relationship(\n        \"MCPConnectionConfig\",\n        foreign_keys=[admin_connection_config_id],\n        back_populates=\"admin_servers\",\n    )\n\n    user_connection_configs: Mapped[list[\"MCPConnectionConfig\"]] = relationship(\n        \"MCPConnectionConfig\",\n        foreign_keys=\"MCPConnectionConfig.mcp_server_id\",\n        back_populates=\"mcp_server\",\n        passive_deletes=True,\n    )\n    current_actions: Mapped[list[\"Tool\"]] = relationship(\n        \"Tool\", back_populates=\"mcp_server\", cascade=\"all, delete-orphan\"\n    )\n    # Many-to-many relationships for access control\n    users: Mapped[list[\"User\"]] = relationship(\n        \"User\", secondary=\"mcp_server__user\", back_populates=\"accessible_mcp_servers\"\n    )\n    user_groups: Mapped[list[\"UserGroup\"]] = relationship(\n        \"UserGroup\",\n        secondary=\"mcp_server__user_group\",\n        back_populates=\"accessible_mcp_servers\",\n    )\n\n\nclass MCPServer__User(Base):\n    __tablename__ = \"mcp_server__user\"\n    mcp_server_id: Mapped[int] = mapped_column(\n        ForeignKey(\"mcp_server.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), primary_key=True\n    )\n\n\nclass MCPServer__UserGroup(Base):\n    __tablename__ = \"mcp_server__user_group\"\n    mcp_server_id: Mapped[int] = mapped_column(\n        ForeignKey(\"mcp_server.id\"), primary_key=True\n    )\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\"), primary_key=True\n    )\n\n\nclass MCPConnectionConfig(Base):\n    \"\"\"Model for storing MCP connection configurations (credentials, auth data)\"\"\"\n\n    __tablename__ = \"mcp_connection_config\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    # Server this config is for (nullable for template configs)\n    mcp_server_id: Mapped[int | None] = mapped_column(\n        Integer, ForeignKey(\"mcp_server.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    # User email this config is for (empty for admin configs and templates)\n    user_email: Mapped[str] = mapped_column(String, nullable=False, default=\"\")\n    # Config data stored as JSON\n    # Format: {\n    #   \"refresh_token\": \"<token>\",  # OAuth only\n    #   \"access_token\": \"<token>\",   # OAuth only\n    #   \"headers\": {\"key\": \"value\", \"key2\": \"value2\"},\n    #   \"header_substitutions\": {\"<key>\": \"<value>\"}, # stored header template substitutions\n    #   \"request_body\": [\"path/in/body:value\", \"path2/in2/body2:value2\"] # TBD\n    #   \"client_id\": \"<id>\",  # For dynamically registered OAuth clients\n    #   \"client_secret\": \"<secret>\",  # For confidential clients\n    #   \"registration_access_token\": \"<token>\",  # For managing registration\n    #   \"registration_client_uri\": \"<uri>\",  # For managing registration\n    # }\n    config: Mapped[SensitiveValue[dict[str, Any]] | None] = mapped_column(\n        EncryptedJson(), nullable=False, default=dict\n    )\n\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    # Relationships\n    mcp_server: Mapped[\"MCPServer | None\"] = relationship(\n        \"MCPServer\",\n        foreign_keys=[mcp_server_id],\n        back_populates=\"user_connection_configs\",\n    )\n    admin_servers: Mapped[list[\"MCPServer\"]] = relationship(\n        \"MCPServer\",\n        foreign_keys=\"MCPServer.admin_connection_config_id\",\n        back_populates=\"admin_connection_config\",\n    )\n\n    __table_args__ = (\n        Index(\"ix_mcp_connection_config_user_email\", \"user_email\"),\n        Index(\"ix_mcp_connection_config_server_user\", \"mcp_server_id\", \"user_email\"),\n    )\n\n\n\"\"\"\nPermission Sync Tables\n\"\"\"\n\n\nclass DocPermissionSyncAttempt(Base):\n    \"\"\"\n    Represents an attempt to sync document permissions for a connector credential pair.\n    Similar to IndexAttempt but specifically for document permission syncing operations.\n    \"\"\"\n\n    __tablename__ = \"doc_permission_sync_attempt\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    connector_credential_pair_id: Mapped[int] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"),\n        nullable=False,\n    )\n\n    # Status of the sync attempt\n    status: Mapped[PermissionSyncStatus] = mapped_column(\n        Enum(PermissionSyncStatus, native_enum=False, index=True)\n    )\n\n    # Counts for tracking progress\n    total_docs_synced: Mapped[int | None] = mapped_column(Integer, default=0)\n    docs_with_permission_errors: Mapped[int | None] = mapped_column(Integer, default=0)\n\n    # Error message if sync fails\n    error_message: Mapped[str | None] = mapped_column(Text, default=None)\n\n    # Timestamps\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        index=True,\n    )\n    time_started: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n    time_finished: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n\n    # Relationships\n    connector_credential_pair: Mapped[ConnectorCredentialPair] = relationship(\n        \"ConnectorCredentialPair\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_permission_sync_attempt_latest_for_cc_pair\",\n            \"connector_credential_pair_id\",\n            \"time_created\",\n        ),\n        Index(\n            \"ix_permission_sync_attempt_status_time\",\n            \"status\",\n            desc(\"time_finished\"),\n        ),\n    )\n\n    def __repr__(self) -> str:\n        return f\"<DocPermissionSyncAttempt(id={self.id!r}, status={self.status!r})>\"\n\n    def is_finished(self) -> bool:\n        return self.status.is_terminal()\n\n\nclass ExternalGroupPermissionSyncAttempt(Base):\n    \"\"\"\n    Represents an attempt to sync external group memberships for users.\n    This tracks the syncing of user-to-external-group mappings across connectors.\n    \"\"\"\n\n    __tablename__ = \"external_group_permission_sync_attempt\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    # Can be tied to a specific connector or be a global group sync\n    connector_credential_pair_id: Mapped[int | None] = mapped_column(\n        ForeignKey(\"connector_credential_pair.id\"),\n        nullable=True,  # Nullable for global group syncs across all connectors\n    )\n\n    # Status of the group sync attempt\n    status: Mapped[PermissionSyncStatus] = mapped_column(\n        Enum(PermissionSyncStatus, native_enum=False, index=True)\n    )\n\n    # Counts for tracking progress\n    total_users_processed: Mapped[int | None] = mapped_column(Integer, default=0)\n    total_groups_processed: Mapped[int | None] = mapped_column(Integer, default=0)\n    total_group_memberships_synced: Mapped[int | None] = mapped_column(\n        Integer, default=0\n    )\n\n    # Error message if sync fails\n    error_message: Mapped[str | None] = mapped_column(Text, default=None)\n\n    # Timestamps\n    time_created: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        index=True,\n    )\n    time_started: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n    time_finished: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), default=None\n    )\n\n    # Relationships\n    connector_credential_pair: Mapped[ConnectorCredentialPair | None] = relationship(\n        \"ConnectorCredentialPair\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_group_sync_attempt_cc_pair_time\",\n            \"connector_credential_pair_id\",\n            \"time_created\",\n        ),\n        Index(\n            \"ix_group_sync_attempt_status_time\",\n            \"status\",\n            desc(\"time_finished\"),\n        ),\n    )\n\n    def __repr__(self) -> str:\n        return f\"<ExternalGroupPermissionSyncAttempt(id={self.id!r}, status={self.status!r})>\"\n\n    def is_finished(self) -> bool:\n        return self.status.is_terminal()\n\n\nclass License(Base):\n    \"\"\"Stores the signed license blob (singleton pattern - only one row).\"\"\"\n\n    __tablename__ = \"license\"\n    __table_args__ = (\n        # Singleton pattern - unique index on constant ensures only one row\n        Index(\"idx_license_singleton\", text(\"(true)\"), unique=True),\n    )\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n    license_data: Mapped[str] = mapped_column(Text, nullable=False)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now()\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n\nclass TenantUsage(Base):\n    \"\"\"\n    Tracks per-tenant usage statistics within a time window for cloud usage limits.\n\n    Each row represents usage for a specific tenant during a specific time window.\n    A new row is created when the window rolls over (typically weekly).\n    \"\"\"\n\n    __tablename__ = \"tenant_usage\"\n\n    id: Mapped[int] = mapped_column(primary_key=True)\n\n    # The start of the usage tracking window (e.g., start of the week in UTC)\n    window_start: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), nullable=False, index=True\n    )\n\n    # Cumulative LLM usage cost in cents for the window\n    llm_cost_cents: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)\n\n    # Number of chunks indexed during the window\n    chunks_indexed: Mapped[int] = mapped_column(Integer, nullable=False, default=0)\n\n    # Number of API calls using API keys or Personal Access Tokens\n    api_calls: Mapped[int] = mapped_column(Integer, nullable=False, default=0)\n\n    # Number of non-streaming API calls (more expensive operations)\n    non_streaming_api_calls: Mapped[int] = mapped_column(\n        Integer, nullable=False, default=0\n    )\n\n    # Last updated timestamp for tracking freshness\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()\n    )\n\n    __table_args__ = (\n        # Ensure only one row per window start (tenant_id is in the schema name)\n        UniqueConstraint(\"window_start\", name=\"uq_tenant_usage_window\"),\n    )\n\n\n\"\"\"Tables related to Build Mode (CLI Agent Platform)\"\"\"\n\n\nclass BuildSession(Base):\n    \"\"\"Stores metadata about CLI agent build sessions.\"\"\"\n\n    __tablename__ = \"build_session\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    user_id: Mapped[UUID | None] = mapped_column(\n        PGUUID(as_uuid=True), ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=True\n    )\n    name: Mapped[str | None] = mapped_column(String, nullable=True)\n    status: Mapped[BuildSessionStatus] = mapped_column(\n        Enum(BuildSessionStatus, native_enum=False, name=\"buildsessionstatus\"),\n        nullable=False,\n        default=BuildSessionStatus.ACTIVE,\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    last_activity_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n    nextjs_port: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    demo_data_enabled: Mapped[bool] = mapped_column(\n        Boolean, nullable=False, server_default=text(\"true\")\n    )\n    sharing_scope: Mapped[SharingScope] = mapped_column(\n        String,\n        nullable=False,\n        default=SharingScope.PRIVATE,\n        server_default=\"private\",\n    )\n\n    # Relationships\n    user: Mapped[User | None] = relationship(\"User\", foreign_keys=[user_id])\n    artifacts: Mapped[list[\"Artifact\"]] = relationship(\n        \"Artifact\", back_populates=\"session\", cascade=\"all, delete-orphan\"\n    )\n    messages: Mapped[list[\"BuildMessage\"]] = relationship(\n        \"BuildMessage\", back_populates=\"session\", cascade=\"all, delete-orphan\"\n    )\n    snapshots: Mapped[list[\"Snapshot\"]] = relationship(\n        \"Snapshot\", back_populates=\"session\", cascade=\"all, delete-orphan\"\n    )\n\n    __table_args__ = (\n        Index(\"ix_build_session_user_created\", \"user_id\", desc(\"created_at\")),\n        Index(\"ix_build_session_status\", \"status\"),\n    )\n\n\nclass Sandbox(Base):\n    \"\"\"Stores sandbox container metadata for users (one sandbox per user).\"\"\"\n\n    __tablename__ = \"sandbox\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True),\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n        unique=True,\n    )\n    container_id: Mapped[str | None] = mapped_column(String, nullable=True)\n    status: Mapped[SandboxStatus] = mapped_column(\n        Enum(SandboxStatus, native_enum=False, name=\"sandboxstatus\"),\n        nullable=False,\n        default=SandboxStatus.PROVISIONING,\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    last_heartbeat: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    # Relationships\n    user: Mapped[User] = relationship(\"User\")\n\n    __table_args__ = (\n        Index(\"ix_sandbox_status\", \"status\"),\n        Index(\"ix_sandbox_container_id\", \"container_id\"),\n    )\n\n\nclass Artifact(Base):\n    \"\"\"Stores metadata about artifacts generated by CLI agents.\"\"\"\n\n    __tablename__ = \"artifact\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    session_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True),\n        ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    type: Mapped[ArtifactType] = mapped_column(\n        Enum(ArtifactType, native_enum=False, name=\"artifacttype\"), nullable=False\n    )\n    # path of artifact in sandbox relative to outputs/\n    path: Mapped[str] = mapped_column(String, nullable=False)\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    # Relationships\n    session: Mapped[BuildSession] = relationship(\n        \"BuildSession\", back_populates=\"artifacts\"\n    )\n\n    __table_args__ = (\n        Index(\"ix_artifact_session_created\", \"session_id\", desc(\"created_at\")),\n        Index(\"ix_artifact_type\", \"type\"),\n    )\n\n\nclass Snapshot(Base):\n    \"\"\"Stores metadata about session output snapshots.\"\"\"\n\n    __tablename__ = \"snapshot\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    session_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True),\n        ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    storage_path: Mapped[str] = mapped_column(String, nullable=False)\n    size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n\n    # Relationships\n    session: Mapped[BuildSession] = relationship(\n        \"BuildSession\", back_populates=\"snapshots\"\n    )\n\n    __table_args__ = (\n        Index(\"ix_snapshot_session_created\", \"session_id\", desc(\"created_at\")),\n    )\n\n\nclass BuildMessage(Base):\n    \"\"\"Stores messages exchanged in build sessions.\n\n    All message data is stored in message_metadata as JSON (the raw ACP packet).\n    The turn_index groups all assistant responses under the user prompt they respond to.\n\n    Packet types stored in message_metadata:\n    - user_message: {type: \"user_message\", content: {...}}\n    - agent_message: {type: \"agent_message\", content: {...}} (accumulated from chunks)\n    - agent_thought: {type: \"agent_thought\", content: {...}} (accumulated from chunks)\n    - tool_call_progress: {type: \"tool_call_progress\", status: \"completed\", ...} (only completed)\n    - agent_plan_update: {type: \"agent_plan_update\", entries: [...]} (upserted, latest only)\n    \"\"\"\n\n    __tablename__ = \"build_message\"\n\n    id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True), primary_key=True, default=uuid4\n    )\n    session_id: Mapped[UUID] = mapped_column(\n        PGUUID(as_uuid=True),\n        ForeignKey(\"build_session.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n    )\n    turn_index: Mapped[int] = mapped_column(Integer, nullable=False)\n    type: Mapped[MessageType] = mapped_column(\n        Enum(MessageType, native_enum=False, name=\"messagetype\"), nullable=False\n    )\n    message_metadata: Mapped[dict[str, Any]] = mapped_column(PGJSONB, nullable=False)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n\n    # Relationships\n    session: Mapped[BuildSession] = relationship(\n        \"BuildSession\", back_populates=\"messages\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_build_message_session_turn\", \"session_id\", \"turn_index\", \"created_at\"\n        ),\n    )\n\n\n\"\"\"\nSCIM 2.0 Provisioning Models (Enterprise Edition only)\nUsed for automated user/group provisioning from identity providers (Okta, Azure AD).\n\"\"\"\n\n\nclass ScimToken(Base):\n    \"\"\"Bearer tokens for IdP SCIM authentication.\"\"\"\n\n    __tablename__ = \"scim_token\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    hashed_token: Mapped[str] = mapped_column(\n        String(64), unique=True, nullable=False\n    )  # SHA256 = 64 hex chars\n    token_display: Mapped[str] = mapped_column(\n        String, nullable=False\n    )  # Last 4 chars for UI identification\n\n    created_by_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), nullable=False\n    )\n\n    is_active: Mapped[bool] = mapped_column(\n        Boolean, server_default=text(\"true\"), nullable=False\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    last_used_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n    created_by: Mapped[User] = relationship(\"User\", foreign_keys=[created_by_id])\n\n\nclass ScimUserMapping(Base):\n    \"\"\"Maps SCIM externalId from the IdP to an Onyx User.\"\"\"\n\n    __tablename__ = \"scim_user_mapping\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    external_id: Mapped[str | None] = mapped_column(\n        String, unique=True, index=True, nullable=True\n    )\n    user_id: Mapped[UUID] = mapped_column(\n        ForeignKey(\"user.id\", ondelete=\"CASCADE\"), unique=True, nullable=False\n    )\n    scim_username: Mapped[str | None] = mapped_column(String, nullable=True)\n    department: Mapped[str | None] = mapped_column(String, nullable=True)\n    manager: Mapped[str | None] = mapped_column(String, nullable=True)\n    given_name: Mapped[str | None] = mapped_column(String, nullable=True)\n    family_name: Mapped[str | None] = mapped_column(String, nullable=True)\n    scim_emails_json: Mapped[str | None] = mapped_column(Text, nullable=True)\n\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    user: Mapped[User] = relationship(\"User\", foreign_keys=[user_id])\n\n\nclass ScimGroupMapping(Base):\n    \"\"\"Maps SCIM externalId from the IdP to an Onyx UserGroup.\"\"\"\n\n    __tablename__ = \"scim_group_mapping\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    external_id: Mapped[str] = mapped_column(String, unique=True, index=True)\n    user_group_id: Mapped[int] = mapped_column(\n        ForeignKey(\"user_group.id\", ondelete=\"CASCADE\"), unique=True, nullable=False\n    )\n\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    user_group: Mapped[UserGroup] = relationship(\n        \"UserGroup\", foreign_keys=[user_group_id]\n    )\n\n\nclass CodeInterpreterServer(Base):\n    \"\"\"Details about the code interpreter server\"\"\"\n\n    __tablename__ = \"code_interpreter_server\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    server_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)\n\n\nclass CacheStore(Base):\n    \"\"\"Key-value cache table used by ``PostgresCacheBackend``.\n\n    Replaces Redis for simple KV caching, locks, and list operations\n    when ``CACHE_BACKEND=postgres`` (NO_VECTOR_DB deployments).\n\n    Intentionally separate from ``KVStore``:\n    - Stores raw bytes (LargeBinary) vs JSONB, matching Redis semantics.\n    - Has ``expires_at`` for TTL; rows are periodically garbage-collected.\n    - Holds ephemeral data (tokens, stop signals, lock state) not\n      persistent application config, so cleanup can be aggressive.\n    \"\"\"\n\n    __tablename__ = \"cache_store\"\n\n    key: Mapped[str] = mapped_column(String, primary_key=True)\n    value: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)\n    expires_at: Mapped[datetime.datetime | None] = mapped_column(\n        DateTime(timezone=True), nullable=True\n    )\n\n\nclass Hook(Base):\n    \"\"\"Pairs a HookPoint with a customer-provided API endpoint.\n\n    At most one non-deleted Hook per HookPoint is allowed, enforced by a\n    partial unique index on (hook_point) where deleted=false.\n    \"\"\"\n\n    __tablename__ = \"hook\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    name: Mapped[str] = mapped_column(String, nullable=False)\n    hook_point: Mapped[HookPoint] = mapped_column(\n        Enum(HookPoint, native_enum=False), nullable=False\n    )\n    endpoint_url: Mapped[str | None] = mapped_column(Text, nullable=True)\n    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(\n        EncryptedString(), nullable=True\n    )\n    is_reachable: Mapped[bool | None] = mapped_column(\n        Boolean, nullable=True, default=None\n    )  # null = never validated, true = last check passed, false = last check failed\n    fail_strategy: Mapped[HookFailStrategy] = mapped_column(\n        Enum(HookFailStrategy, native_enum=False),\n        nullable=False,\n        default=HookFailStrategy.HARD,\n    )\n    timeout_seconds: Mapped[float] = mapped_column(Float, nullable=False, default=30.0)\n    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    creator_id: Mapped[UUID | None] = mapped_column(\n        PGUUID(as_uuid=True),\n        ForeignKey(\"user.id\", ondelete=\"SET NULL\"),\n        nullable=True,\n    )\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True),\n        server_default=func.now(),\n        onupdate=func.now(),\n        nullable=False,\n    )\n\n    creator: Mapped[\"User | None\"] = relationship(\"User\", foreign_keys=[creator_id])\n    execution_logs: Mapped[list[\"HookExecutionLog\"]] = relationship(\n        \"HookExecutionLog\", back_populates=\"hook\", cascade=\"all, delete-orphan\"\n    )\n\n    __table_args__ = (\n        Index(\n            \"ix_hook_one_non_deleted_per_point\",\n            \"hook_point\",\n            unique=True,\n            postgresql_where=(deleted == False),  # noqa: E712\n        ),\n    )\n\n\nclass HookExecutionLog(Base):\n    \"\"\"Records hook executions for health monitoring and debugging.\n\n    Currently only failures are logged; the is_success column exists so\n    success logging can be added later without a schema change.\n    Retention: rows older than 30 days are deleted by a nightly Celery task.\n    \"\"\"\n\n    __tablename__ = \"hook_execution_log\"\n\n    id: Mapped[int] = mapped_column(Integer, primary_key=True)\n    hook_id: Mapped[int] = mapped_column(\n        Integer,\n        ForeignKey(\"hook.id\", ondelete=\"CASCADE\"),\n        nullable=False,\n        index=True,\n    )\n    is_success: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)\n    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)\n    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False, index=True\n    )\n\n    hook: Mapped[\"Hook\"] = relationship(\"Hook\", back_populates=\"execution_logs\")\n"
  },
  {
    "path": "backend/onyx/db/notification.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom uuid import UUID\n\nfrom sqlalchemy import cast\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql import func\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import NotificationType\nfrom onyx.db.models import Notification\nfrom onyx.db.models import User\n\n\ndef create_notification(\n    user_id: UUID | None,\n    notif_type: NotificationType,\n    db_session: Session,\n    title: str,\n    description: str | None = None,\n    additional_data: dict | None = None,\n    autocommit: bool = True,\n) -> Notification:\n    # Previously, we only matched the first identical, undismissed notification\n    # Now, we assume some uniqueness to notifications\n    # If we previously issued a notification that was dismissed, we no longer issue a new one\n\n    # Normalize additional_data to match the unique index behavior\n    # The index uses COALESCE(additional_data, '{}'::jsonb)\n    # We need to match this logic in our query\n    additional_data_normalized = additional_data if additional_data is not None else {}\n\n    existing_notification = (\n        db_session.query(Notification)\n        .filter_by(user_id=user_id, notif_type=notif_type)\n        .filter(\n            func.coalesce(Notification.additional_data, cast({}, postgresql.JSONB))\n            == additional_data_normalized\n        )\n        .first()\n    )\n\n    if existing_notification:\n        # Update the last_shown timestamp if the notification is not dismissed\n        if not existing_notification.dismissed:\n            existing_notification.last_shown = func.now()\n            if autocommit:\n                db_session.commit()\n        return existing_notification\n\n    # Create a new notification if none exists\n    notification = Notification(\n        user_id=user_id,\n        notif_type=notif_type,\n        title=title,\n        description=description,\n        dismissed=False,\n        last_shown=func.now(),\n        first_shown=func.now(),\n        additional_data=additional_data,\n    )\n    db_session.add(notification)\n    if autocommit:\n        db_session.commit()\n    return notification\n\n\ndef get_notification_by_id(\n    notification_id: int, user: User, db_session: Session\n) -> Notification:\n    user_id = user.id\n    notif = db_session.get(Notification, notification_id)\n    if not notif:\n        raise ValueError(f\"No notification found with id {notification_id}\")\n    if notif.user_id != user_id and not (\n        notif.user_id is None and user is not None and user.role == UserRole.ADMIN\n    ):\n        raise PermissionError(\n            f\"User {user_id} is not authorized to access notification {notification_id}\"\n        )\n    return notif\n\n\ndef get_notifications(\n    user: User | None,\n    db_session: Session,\n    notif_type: NotificationType | None = None,\n    include_dismissed: bool = True,\n) -> list[Notification]:\n    query = select(Notification).where(\n        Notification.user_id == user.id if user else Notification.user_id.is_(None)\n    )\n    if not include_dismissed:\n        query = query.where(Notification.dismissed.is_(False))\n    if notif_type:\n        query = query.where(Notification.notif_type == notif_type)\n    # Sort: undismissed first, then by date (newest first)\n    query = query.order_by(\n        Notification.dismissed.asc(),\n        Notification.first_shown.desc(),\n    )\n    return list(db_session.execute(query).scalars().all())\n\n\ndef dismiss_all_notifications(\n    notif_type: NotificationType,\n    db_session: Session,\n) -> None:\n    db_session.query(Notification).filter(Notification.notif_type == notif_type).update(\n        {\"dismissed\": True}\n    )\n    db_session.commit()\n\n\ndef dismiss_notification(notification: Notification, db_session: Session) -> None:\n    notification.dismissed = True\n    db_session.commit()\n\n\ndef batch_dismiss_notifications(\n    notifications: list[Notification],\n    db_session: Session,\n) -> None:\n    for notification in notifications:\n        notification.dismissed = True\n    db_session.commit()\n\n\ndef batch_create_notifications(\n    user_ids: list[UUID],\n    notif_type: NotificationType,\n    db_session: Session,\n    title: str,\n    description: str | None = None,\n    additional_data: dict | None = None,\n) -> int:\n    \"\"\"\n    Create notifications for multiple users in a single batch operation.\n    Uses ON CONFLICT DO NOTHING for atomic idempotent inserts - if a user already\n    has a notification with the same (user_id, notif_type, additional_data), the\n    insert is silently skipped.\n\n    Returns the number of notifications created.\n\n    Relies on unique index on (user_id, notif_type, COALESCE(additional_data, '{}'))\n    \"\"\"\n    if not user_ids:\n        return 0\n\n    now = datetime.now(timezone.utc)\n    # Use empty dict instead of None to match COALESCE behavior in the unique index\n    additional_data_normalized = additional_data if additional_data is not None else {}\n\n    values = [\n        {\n            \"user_id\": uid,\n            \"notif_type\": notif_type.value,\n            \"title\": title,\n            \"description\": description,\n            \"dismissed\": False,\n            \"last_shown\": now,\n            \"first_shown\": now,\n            \"additional_data\": additional_data_normalized,\n        }\n        for uid in user_ids\n    ]\n\n    stmt = insert(Notification).values(values).on_conflict_do_nothing()\n    result = db_session.execute(stmt)\n    db_session.commit()\n\n    # rowcount returns number of rows inserted (excludes conflicts)\n    # CursorResult has rowcount but session.execute type hints are too broad\n    return result.rowcount if result.rowcount >= 0 else 0  # type: ignore[attr-defined]\n\n\ndef update_notification_last_shown(\n    notification: Notification, db_session: Session\n) -> None:\n    notification.last_shown = func.now()\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/oauth_config.py",
    "content": "from typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import OAuthUserToken\nfrom onyx.db.models import Tool\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n# OAuth Config CRUD operations\n\n\ndef create_oauth_config(\n    name: str,\n    authorization_url: str,\n    token_url: str,\n    client_id: str,\n    client_secret: str,\n    scopes: list[str] | None,\n    additional_params: dict[str, str] | None,\n    db_session: Session,\n) -> OAuthConfig:\n    \"\"\"Create a new OAuth configuration\"\"\"\n    oauth_config = OAuthConfig(\n        name=name,\n        authorization_url=authorization_url,\n        token_url=token_url,\n        client_id=client_id,\n        client_secret=client_secret,\n        scopes=scopes,\n        additional_params=additional_params,\n    )\n    db_session.add(oauth_config)\n    db_session.commit()\n    return oauth_config\n\n\ndef get_oauth_config(oauth_config_id: int, db_session: Session) -> OAuthConfig | None:\n    \"\"\"Get OAuth configuration by ID\"\"\"\n    return db_session.scalar(\n        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)\n    )\n\n\ndef get_oauth_configs(db_session: Session) -> list[OAuthConfig]:\n    \"\"\"Get all OAuth configurations\"\"\"\n    return list(db_session.scalars(select(OAuthConfig)).all())\n\n\ndef update_oauth_config(\n    oauth_config_id: int,\n    db_session: Session,\n    name: str | None = None,\n    authorization_url: str | None = None,\n    token_url: str | None = None,\n    client_id: str | None = None,\n    client_secret: str | None = None,\n    scopes: list[str] | None = None,\n    additional_params: dict[str, Any] | None = None,\n    clear_client_id: bool = False,\n    clear_client_secret: bool = False,\n) -> OAuthConfig:\n    \"\"\"\n    Update OAuth configuration.\n\n    NOTE: If client_id or client_secret are None, existing values are preserved.\n    To clear these values, set clear_client_id or clear_client_secret to True.\n    This allows partial updates without re-entering secrets.\n    \"\"\"\n    oauth_config = db_session.scalar(\n        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)\n    )\n    if oauth_config is None:\n        raise ValueError(f\"OAuth config with id {oauth_config_id} does not exist\")\n\n    # Update only provided fields\n    if name is not None:\n        oauth_config.name = name\n    if authorization_url is not None:\n        oauth_config.authorization_url = authorization_url\n    if token_url is not None:\n        oauth_config.token_url = token_url\n    if clear_client_id:\n        oauth_config.client_id = \"\"  # type: ignore[assignment]\n    elif client_id is not None:\n        oauth_config.client_id = client_id  # type: ignore[assignment]\n    if clear_client_secret:\n        oauth_config.client_secret = \"\"  # type: ignore[assignment]\n    elif client_secret is not None:\n        oauth_config.client_secret = client_secret  # type: ignore[assignment]\n    if scopes is not None:\n        oauth_config.scopes = scopes\n    if additional_params is not None:\n        oauth_config.additional_params = additional_params\n\n    db_session.commit()\n    return oauth_config\n\n\ndef delete_oauth_config(oauth_config_id: int, db_session: Session) -> None:\n    \"\"\"\n    Delete OAuth configuration.\n\n    Sets oauth_config_id to NULL for associated tools due to SET NULL foreign key.\n    Cascades delete to user tokens.\n    \"\"\"\n    oauth_config = db_session.scalar(\n        select(OAuthConfig).where(OAuthConfig.id == oauth_config_id)\n    )\n    if oauth_config is None:\n        raise ValueError(f\"OAuth config with id {oauth_config_id} does not exist\")\n\n    db_session.delete(oauth_config)\n    db_session.commit()\n\n\n# User Token operations\n\n\ndef get_user_oauth_token(\n    oauth_config_id: int, user_id: UUID, db_session: Session\n) -> OAuthUserToken | None:\n    \"\"\"Get user's OAuth token for a specific configuration\"\"\"\n    return db_session.scalar(\n        select(OAuthUserToken).where(\n            OAuthUserToken.oauth_config_id == oauth_config_id,\n            OAuthUserToken.user_id == user_id,\n        )\n    )\n\n\ndef get_all_user_oauth_tokens(\n    user_id: UUID, db_session: Session\n) -> list[OAuthUserToken]:\n    \"\"\"\n    Get all user OAuth tokens.\n    \"\"\"\n    stmt = select(OAuthUserToken).where(OAuthUserToken.user_id == user_id)\n\n    return list(db_session.scalars(stmt).all())\n\n\ndef upsert_user_oauth_token(\n    oauth_config_id: int, user_id: UUID, token_data: dict, db_session: Session\n) -> OAuthUserToken:\n    \"\"\"Insert or update user's OAuth token for a specific configuration\"\"\"\n    existing_token = get_user_oauth_token(oauth_config_id, user_id, db_session)\n\n    if existing_token:\n        # Update existing token\n        existing_token.token_data = token_data  # type: ignore[assignment]\n        db_session.commit()\n        return existing_token\n    else:\n        # Create new token\n        new_token = OAuthUserToken(\n            oauth_config_id=oauth_config_id,\n            user_id=user_id,\n            token_data=token_data,\n        )\n        db_session.add(new_token)\n        db_session.commit()\n        return new_token\n\n\ndef delete_user_oauth_token(\n    oauth_config_id: int, user_id: UUID, db_session: Session\n) -> None:\n    \"\"\"Delete user's OAuth token for a specific configuration\"\"\"\n    user_token = get_user_oauth_token(oauth_config_id, user_id, db_session)\n    if user_token is None:\n        raise ValueError(\n            f\"OAuth token for user {user_id} and config {oauth_config_id} does not exist\"\n        )\n\n    db_session.delete(user_token)\n    db_session.commit()\n\n\n# Helper operations\n\n\ndef get_tools_by_oauth_config(oauth_config_id: int, db_session: Session) -> list[Tool]:\n    \"\"\"Get all tools that use a specific OAuth configuration\"\"\"\n    return list(\n        db_session.scalars(\n            select(Tool).where(Tool.oauth_config_id == oauth_config_id)\n        ).all()\n    )\n"
  },
  {
    "path": "backend/onyx/db/opensearch_migration.py",
    "content": "\"\"\"Database operations for OpenSearch migration tracking.\n\nThis module provides functions to track the progress of migrating documents\nfrom Vespa to OpenSearch.\n\"\"\"\n\nimport json\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom sqlalchemy import select\nfrom sqlalchemy import text\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    GET_VESPA_CHUNKS_SLICE_COUNT,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE,\n)\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX\nfrom onyx.db.enums import OpenSearchDocumentMigrationStatus\nfrom onyx.db.models import Document\nfrom onyx.db.models import OpenSearchDocumentMigrationRecord\nfrom onyx.db.models import OpenSearchTenantMigrationRecord\nfrom onyx.document_index.vespa.shared_utils.utils import (\n    replace_invalid_doc_id_characters,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_paginated_document_batch(\n    db_session: Session,\n    limit: int,\n    prev_ending_document_id: str | None = None,\n) -> list[str]:\n    \"\"\"Gets a paginated batch of document IDs from the Document table.\n\n    We need some deterministic ordering to ensure that we don't miss any\n    documents when paginating. This function uses the document ID. It is\n    possible a document is inserted above a spot this function has already\n    passed. In that event we assume that the document will be indexed into\n    OpenSearch anyway and we don't need to migrate.\n    TODO(andrei): Consider ordering on last_modified in addition to ID to better\n    match get_opensearch_migration_records_needing_migration.\n\n    Args:\n        db_session: SQLAlchemy session.\n        limit: Number of document IDs to fetch.\n        prev_ending_document_id: Document ID to start after (for pagination). If\n            None, returns the first batch of documents. If not None, this should\n            be the last ordered ID which was fetched in a previous batch.\n            Defaults to None.\n\n    Returns:\n        List of document IDs.\n    \"\"\"\n    stmt = select(Document.id).order_by(Document.id.asc()).limit(limit)\n    if prev_ending_document_id is not None:\n        stmt = stmt.where(Document.id > prev_ending_document_id)\n    return list(db_session.scalars(stmt).all())\n\n\ndef get_last_opensearch_migration_document_id(\n    db_session: Session,\n) -> str | None:\n    \"\"\"\n    Gets the last document ID in the OpenSearchDocumentMigrationRecord table.\n\n    Returns None if no records are found.\n    \"\"\"\n    stmt = (\n        select(OpenSearchDocumentMigrationRecord.document_id)\n        .order_by(OpenSearchDocumentMigrationRecord.document_id.desc())\n        .limit(1)\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef create_opensearch_migration_records_with_commit(\n    db_session: Session,\n    document_ids: list[str],\n) -> None:\n    \"\"\"Creates new OpenSearchDocumentMigrationRecord records.\n\n    Silently skips any document IDs that already have records.\n    \"\"\"\n    if not document_ids:\n        return\n\n    values = [\n        {\n            \"document_id\": document_id,\n            \"status\": OpenSearchDocumentMigrationStatus.PENDING,\n        }\n        for document_id in document_ids\n    ]\n\n    stmt = insert(OpenSearchDocumentMigrationRecord).values(values)\n    stmt = stmt.on_conflict_do_nothing(index_elements=[\"document_id\"])\n\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef get_opensearch_migration_records_needing_migration(\n    db_session: Session,\n    limit: int,\n) -> list[OpenSearchDocumentMigrationRecord]:\n    \"\"\"Gets records of documents that need to be migrated.\n\n    Properties:\n    - First tries documents with status PENDING.\n    - Of these, orders documents with the oldest last_modified to prioritize\n      documents that were modified a long time ago, as they are presumed to be\n      stable. This column is modified in many flows so is not a guarantee of the\n      document having been indexed.\n    - Then if there's room in the result, tries documents with status FAILED.\n    - Of these, first orders documents on the least attempts_count so as to have\n      a backoff for recently-failed docs. Then orders on last_modified as\n      before.\n    \"\"\"\n    result: list[OpenSearchDocumentMigrationRecord] = []\n\n    # Step 1: Fetch as many PENDING status records as possible ordered by\n    # last_modified (oldest first). last_modified lives on Document, so we join.\n    stmt_pending = (\n        select(OpenSearchDocumentMigrationRecord)\n        .join(Document, OpenSearchDocumentMigrationRecord.document_id == Document.id)\n        .where(\n            OpenSearchDocumentMigrationRecord.status\n            == OpenSearchDocumentMigrationStatus.PENDING\n        )\n        .order_by(Document.last_modified.asc())\n        .limit(limit)\n    )\n    result.extend(list(db_session.scalars(stmt_pending).all()))\n    remaining = limit - len(result)\n\n    # Step 2: If more are needed, fetch records with status FAILED, ordered by\n    # attempts_count (lowest first), then last_modified (oldest first).\n    if remaining > 0:\n        stmt_failed = (\n            select(OpenSearchDocumentMigrationRecord)\n            .join(\n                Document,\n                OpenSearchDocumentMigrationRecord.document_id == Document.id,\n            )\n            .where(\n                OpenSearchDocumentMigrationRecord.status\n                == OpenSearchDocumentMigrationStatus.FAILED\n            )\n            .order_by(\n                OpenSearchDocumentMigrationRecord.attempts_count.asc(),\n                Document.last_modified.asc(),\n            )\n            .limit(remaining)\n        )\n        result.extend(list(db_session.scalars(stmt_failed).all()))\n\n    return result\n\n\ndef get_total_opensearch_migration_record_count(\n    db_session: Session,\n) -> int:\n    \"\"\"Gets the total number of OpenSearch migration records.\n\n    Used to check whether every document has been tracked for migration.\n    \"\"\"\n    return db_session.query(OpenSearchDocumentMigrationRecord).count()\n\n\ndef get_total_document_count(db_session: Session) -> int:\n    \"\"\"Gets the total number of documents.\n\n    Used to check whether every document has been tracked for migration.\n    \"\"\"\n    return db_session.query(Document).count()\n\n\ndef try_insert_opensearch_tenant_migration_record_with_commit(\n    db_session: Session,\n) -> None:\n    \"\"\"Tries to insert the singleton row on OpenSearchTenantMigrationRecord.\n\n    Does nothing if the row already exists.\n    \"\"\"\n    stmt = insert(OpenSearchTenantMigrationRecord).on_conflict_do_nothing(\n        index_elements=[text(\"(true)\")]\n    )\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef increment_num_times_observed_no_additional_docs_to_migrate_with_commit(\n    db_session: Session,\n) -> None:\n    \"\"\"Increments the number of times observed no additional docs to migrate.\n\n    Requires the OpenSearchTenantMigrationRecord to exist.\n\n    Used to track when to stop the migration task.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    record.num_times_observed_no_additional_docs_to_migrate += 1\n    db_session.commit()\n\n\ndef increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(\n    db_session: Session,\n) -> None:\n    \"\"\"\n    Increments the number of times observed no additional docs to populate the\n    migration table.\n\n    Requires the OpenSearchTenantMigrationRecord to exist.\n\n    Used to track when to stop the migration check task.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    record.num_times_observed_no_additional_docs_to_populate_migration_table += 1\n    db_session.commit()\n\n\ndef should_document_migration_be_permanently_failed(\n    opensearch_document_migration_record: OpenSearchDocumentMigrationRecord,\n) -> bool:\n    return (\n        opensearch_document_migration_record.status\n        == OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED\n        or (\n            opensearch_document_migration_record.status\n            == OpenSearchDocumentMigrationStatus.FAILED\n            and opensearch_document_migration_record.attempts_count\n            >= TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE\n        )\n    )\n\n\ndef get_vespa_visit_state(\n    db_session: Session,\n) -> tuple[dict[int, str | None], int]:\n    \"\"\"Gets the current Vespa migration state from the tenant migration record.\n\n    Requires the OpenSearchTenantMigrationRecord to exist.\n\n    Returns:\n        Tuple of (continuation_token_map, total_chunks_migrated).\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    if record.vespa_visit_continuation_token is None:\n        continuation_token_map: dict[int, str | None] = {\n            slice_id: None for slice_id in range(GET_VESPA_CHUNKS_SLICE_COUNT)\n        }\n    else:\n        json_loaded_continuation_token_map = json.loads(\n            record.vespa_visit_continuation_token\n        )\n        continuation_token_map = {\n            int(key): value for key, value in json_loaded_continuation_token_map.items()\n        }\n    return continuation_token_map, record.total_chunks_migrated\n\n\ndef update_vespa_visit_progress_with_commit(\n    db_session: Session,\n    continuation_token_map: dict[int, str | None],\n    chunks_processed: int,\n    chunks_errored: int,\n    approx_chunk_count_in_vespa: int | None,\n) -> None:\n    \"\"\"Updates the Vespa migration progress and commits.\n\n    Requires the OpenSearchTenantMigrationRecord to exist.\n\n    Args:\n        db_session: SQLAlchemy session.\n        continuation_token_map: The new continuation token map. None entry means\n            the visit is complete for that slice.\n        chunks_processed: Number of chunks processed in this batch (added to\n            the running total).\n        chunks_errored: Number of chunks errored in this batch (added to the\n            running errored total).\n        approx_chunk_count_in_vespa: Approximate number of chunks in Vespa. If\n            None, the existing value is used.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    record.vespa_visit_continuation_token = json.dumps(continuation_token_map)\n    record.total_chunks_migrated += chunks_processed\n    record.total_chunks_errored += chunks_errored\n    record.approx_chunk_count_in_vespa = (\n        approx_chunk_count_in_vespa\n        if approx_chunk_count_in_vespa is not None\n        else record.approx_chunk_count_in_vespa\n    )\n    db_session.commit()\n\n\ndef mark_migration_completed_time_if_not_set_with_commit(\n    db_session: Session,\n) -> None:\n    \"\"\"Marks the migration completed time if not set.\n\n    Requires the OpenSearchTenantMigrationRecord to exist.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    if record.migration_completed_at is not None:\n        return\n    record.migration_completed_at = datetime.now(timezone.utc)\n    db_session.commit()\n\n\ndef is_migration_completed(db_session: Session) -> bool:\n    \"\"\"Returns True if the migration is completed.\n\n    Can be run even if the migration record does not exist.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    return record is not None and record.migration_completed_at is not None\n\n\ndef build_sanitized_to_original_doc_id_mapping(\n    db_session: Session,\n) -> dict[str, str]:\n    \"\"\"Pre-computes a mapping of sanitized -> original document IDs.\n\n    Only includes documents whose ID contains single quotes (the only character\n    that gets sanitized by replace_invalid_doc_id_characters). For all other\n    documents, sanitized == original and no mapping entry is needed.\n\n    Scans over all documents.\n\n    Checks if the sanitized ID already exists as a genuine separate document in\n    the Document table. If so, raises as there is no way of resolving the\n    conflict in the migration. The user will need to reindex.\n\n    Args:\n        db_session: SQLAlchemy session.\n\n    Returns:\n        Dict mapping sanitized_id -> original_id, only for documents where\n        the IDs differ. Empty dict means no documents have single quotes\n        in their IDs.\n    \"\"\"\n    # Find all documents with single quotes in their ID.\n    stmt = select(Document.id).where(Document.id.contains(\"'\"))\n    ids_with_quotes = list(db_session.scalars(stmt).all())\n\n    result: dict[str, str] = {}\n    for original_id in ids_with_quotes:\n        sanitized_id = replace_invalid_doc_id_characters(original_id)\n        if sanitized_id != original_id:\n            result[sanitized_id] = original_id\n\n    # See if there are any documents whose ID is a sanitized ID of another\n    # document. If there is even one match, we cannot proceed.\n    stmt = select(Document.id).where(Document.id.in_(result.keys()))\n    ids_with_matches = list(db_session.scalars(stmt).all())\n    if ids_with_matches:\n        raise RuntimeError(\n            f\"Documents with IDs {ids_with_matches} have sanitized IDs that match other documents. \"\n            \"This is not supported and the user will need to reindex.\"\n        )\n\n    return result\n\n\ndef get_opensearch_migration_state(\n    db_session: Session,\n) -> tuple[int, datetime | None, datetime | None, int | None]:\n    \"\"\"Returns the state of the Vespa to OpenSearch migration.\n\n    If the tenant migration record is not found, returns defaults of 0, None,\n    None, None.\n\n    Args:\n        db_session: SQLAlchemy session.\n\n    Returns:\n        Tuple of (total_chunks_migrated, created_at, migration_completed_at,\n            approx_chunk_count_in_vespa).\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        return 0, None, None, None\n    return (\n        record.total_chunks_migrated,\n        record.created_at,\n        record.migration_completed_at,\n        record.approx_chunk_count_in_vespa,\n    )\n\n\ndef get_opensearch_retrieval_state(\n    db_session: Session,\n) -> bool:\n    \"\"\"Returns the state of the OpenSearch retrieval.\n\n    If the tenant migration record is not found, defaults to\n    ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX.\n    \"\"\"\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        return ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX\n    return record.enable_opensearch_retrieval\n\n\ndef set_enable_opensearch_retrieval_with_commit(\n    db_session: Session,\n    enable: bool,\n) -> None:\n    \"\"\"Sets the enable_opensearch_retrieval flag on the singleton record.\n\n    Creates the record if it doesn't exist yet.\n    \"\"\"\n    try_insert_opensearch_tenant_migration_record_with_commit(db_session)\n    record = db_session.query(OpenSearchTenantMigrationRecord).first()\n    if record is None:\n        raise RuntimeError(\"OpenSearchTenantMigrationRecord not found.\")\n    record.enable_opensearch_retrieval = enable\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/pat.py",
    "content": "\"\"\"Database operations for Personal Access Tokens.\"\"\"\n\nimport asyncio\nfrom datetime import datetime\nfrom datetime import timezone\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.pat import build_displayable_pat\nfrom onyx.auth.pat import calculate_expiration\nfrom onyx.auth.pat import generate_pat\nfrom onyx.auth.pat import hash_pat\nfrom onyx.db.engine.async_sql_engine import get_async_session_context_manager\nfrom onyx.db.models import PersonalAccessToken\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nlogger = setup_logger()\n\n\nasync def fetch_user_for_pat(\n    hashed_token: str, async_db_session: AsyncSession\n) -> User | None:\n    \"\"\"Fetch user associated with PAT. Returns None if invalid, expired, or inactive user.\n\n    NOTE: This is async since it's used during auth (which is necessarily async due to FastAPI Users).\n    NOTE: Expired includes both naturally expired and user-revoked tokens (revocation sets expires_at=NOW()).\n\n    Uses select(User) as primary entity so that joined-eager relationships (e.g. oauth_accounts)\n    are loaded correctly — matching the pattern in fetch_user_for_api_key.\n    \"\"\"\n    now = datetime.now(timezone.utc)\n\n    user = await async_db_session.scalar(\n        select(User)\n        .join(PersonalAccessToken, PersonalAccessToken.user_id == User.id)\n        .where(PersonalAccessToken.hashed_token == hashed_token)\n        .where(User.is_active)  # type: ignore\n        .where(\n            (PersonalAccessToken.expires_at.is_(None))\n            | (PersonalAccessToken.expires_at > now)\n        )\n    )\n    if not user:\n        return None\n\n    _schedule_pat_last_used_update(hashed_token, now)\n    return user\n\n\ndef _schedule_pat_last_used_update(hashed_token: str, now: datetime) -> None:\n    \"\"\"Fire-and-forget update of last_used_at, throttled to 5-minute granularity.\"\"\"\n\n    async def _update() -> None:\n        try:\n            tenant_id = get_current_tenant_id()\n            async with get_async_session_context_manager(tenant_id) as session:\n                pat = await session.scalar(\n                    select(PersonalAccessToken).where(\n                        PersonalAccessToken.hashed_token == hashed_token\n                    )\n                )\n                if not pat:\n                    return\n                if (\n                    pat.last_used_at is not None\n                    and (now - pat.last_used_at).total_seconds() <= 300\n                ):\n                    return\n                await session.execute(\n                    update(PersonalAccessToken)\n                    .where(PersonalAccessToken.hashed_token == hashed_token)\n                    .values(last_used_at=now)\n                )\n                await session.commit()\n        except Exception as e:\n            logger.warning(f\"Failed to update last_used_at for PAT: {e}\")\n\n    asyncio.create_task(_update())\n\n\ndef create_pat(\n    db_session: Session,\n    user_id: UUID,\n    name: str,\n    expiration_days: int | None,\n) -> tuple[PersonalAccessToken, str]:\n    \"\"\"Create new PAT. Returns (db_record, raw_token).\n\n    Raises ValueError if user is inactive or not found.\n    \"\"\"\n    user = db_session.scalar(select(User).where(User.id == user_id))  # type: ignore\n    if not user or not user.is_active:\n        raise ValueError(\"Cannot create PAT for inactive or non-existent user\")\n\n    tenant_id = get_current_tenant_id()\n    raw_token = generate_pat(tenant_id)\n\n    pat = PersonalAccessToken(\n        name=name,\n        hashed_token=hash_pat(raw_token),\n        token_display=build_displayable_pat(raw_token),\n        user_id=user_id,\n        expires_at=calculate_expiration(expiration_days),\n    )\n    db_session.add(pat)\n    db_session.commit()\n\n    return pat, raw_token\n\n\ndef list_user_pats(db_session: Session, user_id: UUID) -> list[PersonalAccessToken]:\n    \"\"\"List all active (non-expired) PATs for a user.\"\"\"\n    return list(\n        db_session.scalars(\n            select(PersonalAccessToken)\n            .where(PersonalAccessToken.user_id == user_id)\n            .where(\n                (PersonalAccessToken.expires_at.is_(None))\n                | (PersonalAccessToken.expires_at > datetime.now(timezone.utc))\n            )\n            .order_by(PersonalAccessToken.created_at.desc())\n        ).all()\n    )\n\n\ndef revoke_pat(db_session: Session, pat_id: int, user_id: UUID) -> bool:\n    \"\"\"Revoke PAT by setting expires_at=NOW() for immediate expiry.\n\n    Returns True if revoked, False if not found, not owned by user, or already expired.\n    \"\"\"\n    now = datetime.now(timezone.utc)\n    pat = db_session.scalar(\n        select(PersonalAccessToken)\n        .where(PersonalAccessToken.id == pat_id)\n        .where(PersonalAccessToken.user_id == user_id)\n        .where(\n            (PersonalAccessToken.expires_at.is_(None))\n            | (PersonalAccessToken.expires_at > now)\n        )  # Only revoke active (non-expired) tokens\n    )\n    if not pat:\n        return False\n\n    # Revoke by setting expires_at to NOW() and marking as revoked for audit trail\n    pat.expires_at = now\n    pat.is_revoked = True\n    db_session.commit()\n    return True\n"
  },
  {
    "path": "backend/onyx/db/permission_sync_attempt.py",
    "content": "\"\"\"Permission sync attempt CRUD operations and utilities.\n\nThis module contains all CRUD operations for both DocPermissionSyncAttempt\nand ExternalGroupPermissionSyncAttempt models, along with shared utilities.\n\"\"\"\n\nfrom typing import Any\nfrom typing import cast\n\nfrom sqlalchemy import delete\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.engine.cursor import CursorResult\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import DocPermissionSyncAttempt\nfrom onyx.db.models import ExternalGroupPermissionSyncAttempt\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\n\nlogger = setup_logger()\n\n\n# =============================================================================\n# DOC PERMISSION SYNC ATTEMPT CRUD\n# =============================================================================\n\n\ndef create_doc_permission_sync_attempt(\n    connector_credential_pair_id: int,\n    db_session: Session,\n) -> int:\n    \"\"\"Create a new doc permission sync attempt.\n\n    Args:\n        connector_credential_pair_id: The ID of the connector credential pair\n        db_session: The database session\n\n    Returns:\n        The ID of the created attempt\n    \"\"\"\n    attempt = DocPermissionSyncAttempt(\n        connector_credential_pair_id=connector_credential_pair_id,\n        status=PermissionSyncStatus.NOT_STARTED,\n    )\n    db_session.add(attempt)\n    db_session.commit()\n\n    return attempt.id\n\n\ndef get_doc_permission_sync_attempt(\n    db_session: Session,\n    attempt_id: int,\n    eager_load_connector: bool = False,\n) -> DocPermissionSyncAttempt | None:\n    \"\"\"Get a doc permission sync attempt by ID.\n\n    Args:\n        db_session: The database session\n        attempt_id: The ID of the attempt\n        eager_load_connector: If True, eagerly loads the connector and cc_pair relationships\n\n    Returns:\n        The attempt if found, None otherwise\n    \"\"\"\n    stmt = select(DocPermissionSyncAttempt).where(\n        DocPermissionSyncAttempt.id == attempt_id\n    )\n\n    if eager_load_connector:\n        stmt = stmt.options(\n            joinedload(DocPermissionSyncAttempt.connector_credential_pair).joinedload(\n                ConnectorCredentialPair.connector\n            )\n        )\n\n    return db_session.scalars(stmt).first()\n\n\ndef get_latest_doc_permission_sync_attempt_for_cc_pair(\n    db_session: Session,\n    connector_credential_pair_id: int,\n) -> DocPermissionSyncAttempt | None:\n    \"\"\"Get the latest doc permission sync attempt for a connector credential pair.\"\"\"\n    return db_session.execute(\n        select(DocPermissionSyncAttempt)\n        .where(\n            DocPermissionSyncAttempt.connector_credential_pair_id\n            == connector_credential_pair_id\n        )\n        .order_by(DocPermissionSyncAttempt.time_created.desc())\n        .limit(1)\n    ).scalar_one_or_none()\n\n\ndef get_recent_doc_permission_sync_attempts_for_cc_pair(\n    cc_pair_id: int,\n    limit: int,\n    db_session: Session,\n) -> list[DocPermissionSyncAttempt]:\n    \"\"\"Get recent doc permission sync attempts for a cc pair, most recent first.\"\"\"\n    return list(\n        db_session.execute(\n            select(DocPermissionSyncAttempt)\n            .where(DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id)\n            .order_by(DocPermissionSyncAttempt.time_created.desc())\n            .limit(limit)\n        ).scalars()\n    )\n\n\ndef mark_doc_permission_sync_attempt_in_progress(\n    attempt_id: int,\n    db_session: Session,\n) -> DocPermissionSyncAttempt:\n    \"\"\"Mark a doc permission sync attempt as IN_PROGRESS.\n    Locks the row during update.\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(DocPermissionSyncAttempt)\n            .where(DocPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if attempt.status != PermissionSyncStatus.NOT_STARTED:\n            raise RuntimeError(\n                f\"Doc permission sync attempt with ID '{attempt_id}' is not in NOT_STARTED status. \"\n                f\"Current status is '{attempt.status}'.\"\n            )\n\n        attempt.status = PermissionSyncStatus.IN_PROGRESS\n        attempt.time_started = func.now()  # type: ignore\n        db_session.commit()\n        return attempt\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"mark_doc_permission_sync_attempt_in_progress exceptioned.\")\n        raise\n\n\ndef mark_doc_permission_sync_attempt_failed(\n    attempt_id: int,\n    db_session: Session,\n    error_message: str,\n) -> None:\n    \"\"\"Mark a doc permission sync attempt as failed.\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(DocPermissionSyncAttempt)\n            .where(DocPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if not attempt.time_started:\n            attempt.time_started = func.now()  # type: ignore\n        attempt.status = PermissionSyncStatus.FAILED\n        attempt.time_finished = func.now()  # type: ignore\n        attempt.error_message = error_message\n        db_session.commit()\n\n        # Add telemetry for permission sync attempt status change\n        optional_telemetry(\n            record_type=RecordType.PERMISSION_SYNC_COMPLETE,\n            data={\n                \"doc_permission_sync_attempt_id\": attempt_id,\n                \"status\": PermissionSyncStatus.FAILED.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef complete_doc_permission_sync_attempt(\n    db_session: Session,\n    attempt_id: int,\n    total_docs_synced: int,\n    docs_with_permission_errors: int,\n) -> DocPermissionSyncAttempt:\n    \"\"\"Complete a doc permission sync attempt by updating progress and setting final status.\n\n    This combines the progress update and final status marking into a single operation.\n    If there were permission errors, the attempt is marked as COMPLETED_WITH_ERRORS,\n    otherwise it's marked as SUCCESS.\n\n    Args:\n        db_session: The database session\n        attempt_id: The ID of the attempt\n        total_docs_synced: Total number of documents synced\n        docs_with_permission_errors: Number of documents that had permission errors\n\n    Returns:\n        The completed attempt\n    \"\"\"\n    try:\n        attempt = db_session.execute(\n            select(DocPermissionSyncAttempt)\n            .where(DocPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        # Update progress counters\n        attempt.total_docs_synced = (attempt.total_docs_synced or 0) + total_docs_synced\n        attempt.docs_with_permission_errors = (\n            attempt.docs_with_permission_errors or 0\n        ) + docs_with_permission_errors\n\n        # Set final status based on whether there were errors\n        if docs_with_permission_errors > 0:\n            attempt.status = PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        else:\n            attempt.status = PermissionSyncStatus.SUCCESS\n\n        attempt.time_finished = func.now()  # type: ignore\n        db_session.commit()\n\n        # Add telemetry\n        optional_telemetry(\n            record_type=RecordType.PERMISSION_SYNC_COMPLETE,\n            data={\n                \"doc_permission_sync_attempt_id\": attempt_id,\n                \"status\": attempt.status.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n        return attempt\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"complete_doc_permission_sync_attempt exceptioned.\")\n        raise\n\n\n# =============================================================================\n# EXTERNAL GROUP PERMISSION SYNC ATTEMPT CRUD\n# =============================================================================\n\n\ndef create_external_group_sync_attempt(\n    connector_credential_pair_id: int | None,\n    db_session: Session,\n) -> int:\n    \"\"\"Create a new external group sync attempt.\n\n    Args:\n        connector_credential_pair_id: The ID of the connector credential pair, or None for global syncs\n        db_session: The database session\n\n    Returns:\n        The ID of the created attempt\n    \"\"\"\n    attempt = ExternalGroupPermissionSyncAttempt(\n        connector_credential_pair_id=connector_credential_pair_id,\n        status=PermissionSyncStatus.NOT_STARTED,\n    )\n    db_session.add(attempt)\n    db_session.commit()\n\n    return attempt.id\n\n\ndef get_external_group_sync_attempt(\n    db_session: Session,\n    attempt_id: int,\n    eager_load_connector: bool = False,\n) -> ExternalGroupPermissionSyncAttempt | None:\n    \"\"\"Get an external group sync attempt by ID.\n\n    Args:\n        db_session: The database session\n        attempt_id: The ID of the attempt\n        eager_load_connector: If True, eagerly loads the connector and cc_pair relationships\n\n    Returns:\n        The attempt if found, None otherwise\n    \"\"\"\n    stmt = select(ExternalGroupPermissionSyncAttempt).where(\n        ExternalGroupPermissionSyncAttempt.id == attempt_id\n    )\n\n    if eager_load_connector:\n        stmt = stmt.options(\n            joinedload(\n                ExternalGroupPermissionSyncAttempt.connector_credential_pair\n            ).joinedload(ConnectorCredentialPair.connector)\n        )\n\n    return db_session.scalars(stmt).first()\n\n\ndef get_recent_external_group_sync_attempts_for_cc_pair(\n    cc_pair_id: int | None,\n    limit: int,\n    db_session: Session,\n) -> list[ExternalGroupPermissionSyncAttempt]:\n    \"\"\"Get recent external group sync attempts for a cc pair, most recent first.\n    If cc_pair_id is None, gets global group sync attempts.\"\"\"\n    stmt = select(ExternalGroupPermissionSyncAttempt)\n\n    if cc_pair_id is not None:\n        stmt = stmt.where(\n            ExternalGroupPermissionSyncAttempt.connector_credential_pair_id\n            == cc_pair_id\n        )\n    else:\n        stmt = stmt.where(\n            ExternalGroupPermissionSyncAttempt.connector_credential_pair_id.is_(None)\n        )\n\n    return list(\n        db_session.execute(\n            stmt.order_by(ExternalGroupPermissionSyncAttempt.time_created.desc()).limit(\n                limit\n            )\n        ).scalars()\n    )\n\n\ndef mark_external_group_sync_attempt_in_progress(\n    attempt_id: int,\n    db_session: Session,\n) -> ExternalGroupPermissionSyncAttempt:\n    \"\"\"Mark an external group sync attempt as IN_PROGRESS.\n    Locks the row during update.\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(ExternalGroupPermissionSyncAttempt)\n            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if attempt.status != PermissionSyncStatus.NOT_STARTED:\n            raise RuntimeError(\n                f\"External group sync attempt with ID '{attempt_id}' is not in NOT_STARTED status. \"\n                f\"Current status is '{attempt.status}'.\"\n            )\n\n        attempt.status = PermissionSyncStatus.IN_PROGRESS\n        attempt.time_started = func.now()  # type: ignore\n        db_session.commit()\n        return attempt\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"mark_external_group_sync_attempt_in_progress exceptioned.\")\n        raise\n\n\ndef mark_external_group_sync_attempt_failed(\n    attempt_id: int,\n    db_session: Session,\n    error_message: str,\n) -> None:\n    \"\"\"Mark an external group sync attempt as failed.\"\"\"\n    try:\n        attempt = db_session.execute(\n            select(ExternalGroupPermissionSyncAttempt)\n            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        if not attempt.time_started:\n            attempt.time_started = func.now()  # type: ignore\n        attempt.status = PermissionSyncStatus.FAILED\n        attempt.time_finished = func.now()  # type: ignore\n        attempt.error_message = error_message\n        db_session.commit()\n\n        # Add telemetry for permission sync attempt status change\n        optional_telemetry(\n            record_type=RecordType.PERMISSION_SYNC_COMPLETE,\n            data={\n                \"external_group_sync_attempt_id\": attempt_id,\n                \"status\": PermissionSyncStatus.FAILED.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n    except Exception:\n        db_session.rollback()\n        raise\n\n\ndef complete_external_group_sync_attempt(\n    db_session: Session,\n    attempt_id: int,\n    total_users_processed: int,\n    total_groups_processed: int,\n    total_group_memberships_synced: int,\n    errors_encountered: int = 0,\n) -> ExternalGroupPermissionSyncAttempt:\n    \"\"\"Complete an external group sync attempt by updating progress and setting final status.\n\n    This combines the progress update and final status marking into a single operation.\n    If there were errors, the attempt is marked as COMPLETED_WITH_ERRORS,\n    otherwise it's marked as SUCCESS.\n\n    Args:\n        db_session: The database session\n        attempt_id: The ID of the attempt\n        total_users_processed: Total users processed\n        total_groups_processed: Total groups processed\n        total_group_memberships_synced: Total group memberships synced\n        errors_encountered: Number of errors encountered (determines if COMPLETED_WITH_ERRORS)\n\n    Returns:\n        The completed attempt\n    \"\"\"\n    try:\n        attempt = db_session.execute(\n            select(ExternalGroupPermissionSyncAttempt)\n            .where(ExternalGroupPermissionSyncAttempt.id == attempt_id)\n            .with_for_update()\n        ).scalar_one()\n\n        # Update progress counters\n        attempt.total_users_processed = (\n            attempt.total_users_processed or 0\n        ) + total_users_processed\n        attempt.total_groups_processed = (\n            attempt.total_groups_processed or 0\n        ) + total_groups_processed\n        attempt.total_group_memberships_synced = (\n            attempt.total_group_memberships_synced or 0\n        ) + total_group_memberships_synced\n\n        # Set final status based on whether there were errors\n        if errors_encountered > 0:\n            attempt.status = PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        else:\n            attempt.status = PermissionSyncStatus.SUCCESS\n\n        attempt.time_finished = func.now()  # type: ignore\n        db_session.commit()\n\n        # Add telemetry\n        optional_telemetry(\n            record_type=RecordType.PERMISSION_SYNC_COMPLETE,\n            data={\n                \"external_group_sync_attempt_id\": attempt_id,\n                \"status\": attempt.status.value,\n                \"cc_pair_id\": attempt.connector_credential_pair_id,\n            },\n        )\n        return attempt\n    except Exception:\n        db_session.rollback()\n        logger.exception(\"complete_external_group_sync_attempt exceptioned.\")\n        raise\n\n\n# =============================================================================\n# DELETION FUNCTIONS\n# =============================================================================\n\n\ndef delete_doc_permission_sync_attempts__no_commit(\n    db_session: Session,\n    cc_pair_id: int,\n) -> int:\n    \"\"\"Delete all doc permission sync attempts for a connector credential pair.\n\n    This does not commit the transaction. It should be used within an existing transaction.\n\n    Args:\n        db_session: The database session\n        cc_pair_id: The connector credential pair ID\n\n    Returns:\n        The number of attempts deleted\n    \"\"\"\n    stmt = delete(DocPermissionSyncAttempt).where(\n        DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id\n    )\n    result = cast(CursorResult[Any], db_session.execute(stmt))\n    return result.rowcount or 0\n\n\ndef delete_external_group_permission_sync_attempts__no_commit(\n    db_session: Session,\n    cc_pair_id: int,\n) -> int:\n    \"\"\"Delete all external group permission sync attempts for a connector credential pair.\n\n    This does not commit the transaction. It should be used within an existing transaction.\n\n    Args:\n        db_session: The database session\n        cc_pair_id: The connector credential pair ID\n\n    Returns:\n        The number of attempts deleted\n    \"\"\"\n    stmt = delete(ExternalGroupPermissionSyncAttempt).where(\n        ExternalGroupPermissionSyncAttempt.connector_credential_pair_id == cc_pair_id\n    )\n    result = cast(CursorResult[Any], db_session.execute(stmt))\n    return result.rowcount or 0\n"
  },
  {
    "path": "backend/onyx/db/permissions.py",
    "content": "\"\"\"\nDB operations for recomputing user effective_permissions.\n\nThese live in onyx/db/ (not onyx/auth/) because they are pure DB operations\nthat query PermissionGrant rows and update the User.effective_permissions\nJSONB column.  Keeping them here avoids circular imports when called from\nother onyx/db/ modules such as users.py.\n\"\"\"\n\nfrom collections import defaultdict\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import PermissionGrant\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\n\n\ndef recompute_user_permissions__no_commit(\n    user_ids: UUID | str | list[UUID] | list[str], db_session: Session\n) -> None:\n    \"\"\"Recompute granted permissions for one or more users.\n\n    Accepts a single UUID or a list.  Uses a single query regardless of\n    how many users are passed, avoiding N+1 issues.\n\n    Stores only directly granted permissions — implication expansion\n    happens at read time via get_effective_permissions().\n\n    Does NOT commit — caller must commit the session.\n    \"\"\"\n    if isinstance(user_ids, (UUID, str)):\n        uid_list = [user_ids]\n    else:\n        uid_list = list(user_ids)\n\n    if not uid_list:\n        return\n\n    # Single query to fetch ALL permissions for these users across ALL their\n    # groups (a user may belong to multiple groups with different grants).\n    rows = db_session.execute(\n        select(User__UserGroup.user_id, PermissionGrant.permission)\n        .join(\n            PermissionGrant,\n            PermissionGrant.group_id == User__UserGroup.user_group_id,\n        )\n        .where(\n            User__UserGroup.user_id.in_(uid_list),\n            PermissionGrant.is_deleted.is_(False),\n        )\n    ).all()\n\n    # Group permissions by user; users with no grants get an empty set.\n    perms_by_user: dict[UUID | str, set[str]] = defaultdict(set)\n    for uid in uid_list:\n        perms_by_user[uid]  # ensure every user has an entry\n    for uid, perm in rows:\n        perms_by_user[uid].add(perm.value)\n\n    for uid, perms in perms_by_user.items():\n        db_session.execute(\n            update(User)\n            .where(User.id == uid)  # type: ignore[arg-type]\n            .values(effective_permissions=sorted(perms))\n        )\n\n\ndef recompute_permissions_for_group__no_commit(\n    group_id: int, db_session: Session\n) -> None:\n    \"\"\"Recompute granted permissions for all users in a group.\n\n    Does NOT commit — caller must commit the session.\n    \"\"\"\n    user_ids: list[UUID] = [\n        uid\n        for uid in db_session.execute(\n            select(User__UserGroup.user_id).where(\n                User__UserGroup.user_group_id == group_id,\n                User__UserGroup.user_id.isnot(None),\n            )\n        )\n        .scalars()\n        .all()\n        if uid is not None\n    ]\n\n    if not user_ids:\n        return\n\n    recompute_user_permissions__no_commit(user_ids, db_session)\n"
  },
  {
    "path": "backend/onyx/db/persona.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom enum import Enum\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom sqlalchemy import exists\nfrom sqlalchemy import func\nfrom sqlalchemy import not_\nfrom sqlalchemy import or_\nfrom sqlalchemy import Select\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import aliased\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.hierarchy_access import get_user_external_group_ids\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.app_configs import CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.configs.constants import NotificationType\nfrom onyx.db.constants import SLACK_BOT_PERSONA_PREFIX\nfrom onyx.db.document_access import get_accessible_documents_by_ids\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.models import HierarchyNode\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__User\nfrom onyx.db.models import Persona__UserGroup\nfrom onyx.db.models import PersonaLabel\nfrom onyx.db.models import StarterMessage\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserFile\nfrom onyx.db.models import UserGroup\nfrom onyx.db.notification import create_notification\nfrom onyx.server.features.persona.models import FullPersonaSnapshot\nfrom onyx.server.features.persona.models import MinimalPersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaSharedNotificationData\nfrom onyx.server.features.persona.models import PersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n\ndef get_default_behavior_persona(\n    db_session: Session,\n    eager_load_for_tools: bool = False,\n) -> Persona | None:\n    stmt = select(Persona).where(Persona.id == DEFAULT_PERSONA_ID)\n    if eager_load_for_tools:\n        stmt = stmt.options(\n            selectinload(Persona.tools),\n            selectinload(Persona.document_sets),\n            selectinload(Persona.attached_documents),\n            selectinload(Persona.hierarchy_nodes),\n        )\n    return db_session.scalars(stmt).first()\n\n\nclass PersonaLoadType(Enum):\n    NONE = \"none\"\n    MINIMAL = \"minimal\"\n    FULL = \"full\"\n\n\ndef _add_user_filters(\n    stmt: Select[tuple[Persona]], user: User, get_editable: bool = True\n) -> Select[tuple[Persona]]:\n    if user.role == UserRole.ADMIN:\n        return stmt\n\n    stmt = stmt.distinct()\n    Persona__UG = aliased(Persona__UserGroup)\n    User__UG = aliased(User__UserGroup)\n    \"\"\"\n    Here we select cc_pairs by relation:\n    User -> User__UserGroup -> Persona__UserGroup -> Persona\n    \"\"\"\n    stmt = (\n        stmt.outerjoin(Persona__UG)\n        .outerjoin(\n            User__UserGroup,\n            User__UserGroup.user_group_id == Persona__UG.user_group_id,\n        )\n        .outerjoin(\n            Persona__User,\n            Persona__User.persona_id == Persona.id,\n        )\n    )\n    \"\"\"\n    Filter Personas by:\n    - if the user is in the user_group that owns the Persona\n    - if the user is not a global_curator, they must also have a curator relationship\n    to the user_group\n    - if editing is being done, we also filter out Personas that are owned by groups\n    that the user isn't a curator for\n    - if we are not editing, we show all Personas in the groups the user is a curator\n    for (as well as public Personas)\n    - if we are not editing, we return all Personas directly connected to the user\n    \"\"\"\n\n    # Anonymous users only see public Personas\n    if user.is_anonymous:\n        where_clause = Persona.is_public == True  # noqa: E712\n        return stmt.where(where_clause)\n\n    # If curator ownership restriction is enabled, curators can only access their own assistants\n    if CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS and user.role in [\n        UserRole.CURATOR,\n        UserRole.GLOBAL_CURATOR,\n    ]:\n        where_clause = (Persona.user_id == user.id) | (Persona.user_id.is_(None))\n        return stmt.where(where_clause)\n\n    where_clause = User__UserGroup.user_id == user.id\n    if user.role == UserRole.CURATOR and get_editable:\n        where_clause &= User__UserGroup.is_curator == True  # noqa: E712\n    if get_editable:\n        user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id)\n        if user.role == UserRole.CURATOR:\n            user_groups = user_groups.where(User__UG.is_curator == True)  # noqa: E712\n        where_clause &= (\n            ~exists()\n            .where(Persona__UG.persona_id == Persona.id)\n            .where(~Persona__UG.user_group_id.in_(user_groups))\n            .correlate(Persona)\n        )\n    else:\n        # Group the public persona conditions\n        public_condition = (Persona.is_public == True) & (  # noqa: E712\n            Persona.is_listed == True  # noqa: E712\n        )\n\n        where_clause |= public_condition\n        where_clause |= Persona__User.user_id == user.id\n\n    where_clause |= Persona.user_id == user.id\n\n    return stmt.where(where_clause)\n\n\ndef fetch_persona_by_id_for_user(\n    db_session: Session, persona_id: int, user: User, get_editable: bool = True\n) -> Persona:\n    stmt = select(Persona).where(Persona.id == persona_id).distinct()\n    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)\n    persona = db_session.scalars(stmt).one_or_none()\n    if not persona:\n        raise HTTPException(\n            status_code=403,\n            detail=f\"Persona with ID {persona_id} does not exist or user is not authorized to access it\",\n        )\n    return persona\n\n\ndef get_best_persona_id_for_user(\n    db_session: Session, user: User, persona_id: int | None = None\n) -> int | None:\n    if persona_id is not None:\n        stmt = select(Persona).where(Persona.id == persona_id).distinct()\n        stmt = _add_user_filters(\n            stmt=stmt,\n            user=user,\n            # We don't want to filter by editable here, we just want to see if the\n            # persona is usable by the user\n            get_editable=False,\n        )\n        persona = db_session.scalars(stmt).one_or_none()\n        if persona:\n            return persona.id\n\n    # If the persona is not found, or the slack bot is using doc sets instead of personas,\n    # we need to find the best persona for the user\n    # This is the persona with the highest display priority that the user has access to\n    stmt = select(Persona).order_by(Persona.display_priority.desc()).distinct()\n    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=True)\n    persona = db_session.scalars(stmt).one_or_none()\n    return persona.id if persona else None\n\n\ndef _get_persona_by_name(\n    persona_name: str, user: User | None, db_session: Session\n) -> Persona | None:\n    \"\"\"Fetch a persona by name with access control.\n\n    Access rules:\n    - user=None (system operations): can see all personas\n    - Admin users: can see all personas\n    - Non-admin users: can only see their own personas\n    \"\"\"\n    stmt = select(Persona).where(Persona.name == persona_name)\n    if user and user.role != UserRole.ADMIN:\n        stmt = stmt.where(Persona.user_id == user.id)\n    result = db_session.execute(stmt).scalar_one_or_none()\n    return result\n\n\ndef update_persona_access(\n    persona_id: int,\n    creator_user_id: UUID | None,\n    db_session: Session,\n    is_public: bool | None = None,\n    user_ids: list[UUID] | None = None,\n    group_ids: list[int] | None = None,\n) -> None:\n    \"\"\"Updates the access settings for a persona including public status and user shares.\n\n    NOTE: Callers are responsible for committing.\"\"\"\n\n    needs_sync = False\n    if is_public is not None:\n        needs_sync = True\n        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()\n        if persona:\n            persona.is_public = is_public\n\n    # NOTE: For user-ids and group-ids, `None` means \"leave unchanged\", `[]` means \"clear all shares\",\n    # and a non-empty list means \"replace with these shares\".\n    if user_ids is not None:\n        needs_sync = True\n        db_session.query(Persona__User).filter(\n            Persona__User.persona_id == persona_id\n        ).delete(synchronize_session=\"fetch\")\n\n        for user_uuid in user_ids:\n            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))\n            if user_uuid != creator_user_id:\n                create_notification(\n                    user_id=user_uuid,\n                    notif_type=NotificationType.PERSONA_SHARED,\n                    title=\"A new agent was shared with you!\",\n                    db_session=db_session,\n                    additional_data=PersonaSharedNotificationData(\n                        persona_id=persona_id,\n                    ).model_dump(),\n                )\n\n    # MIT doesn't support group-based sharing, so we allow clearing (no-op since\n    # there shouldn't be any) but raise an error if trying to add actual groups.\n    if group_ids is not None:\n        needs_sync = True\n        db_session.query(Persona__UserGroup).filter(\n            Persona__UserGroup.persona_id == persona_id\n        ).delete(synchronize_session=\"fetch\")\n\n        if group_ids:\n            raise NotImplementedError(\"Onyx MIT does not support group-based sharing\")\n\n    # When sharing changes, user file ACLs need to be updated in the vector DB\n    if needs_sync:\n        mark_persona_user_files_for_sync(persona_id, db_session)\n\n\ndef create_update_persona(\n    persona_id: int | None,\n    create_persona_request: PersonaUpsertRequest,\n    user: User,\n    db_session: Session,\n) -> FullPersonaSnapshot:\n    \"\"\"Higher level function than upsert_persona, although either is valid to use.\"\"\"\n    # Permission to actually use these is checked later\n\n    try:\n        # Featured persona validation\n        if create_persona_request.is_featured:\n            # Curators can edit featured personas, but not make them\n            # TODO this will be reworked soon with RBAC permissions feature\n            if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:\n                pass\n            elif user.role != UserRole.ADMIN:\n                raise ValueError(\"Only admins can make a featured persona\")\n\n        # Convert incoming string UUIDs to UUID objects for DB operations\n        converted_user_file_ids = None\n        if create_persona_request.user_file_ids is not None:\n            try:\n                converted_user_file_ids = [\n                    UUID(str_id) for str_id in create_persona_request.user_file_ids\n                ]\n            except Exception:\n                raise ValueError(\"Invalid user_file_ids; must be UUID strings\")\n\n        persona = upsert_persona(\n            persona_id=persona_id,\n            user=user,\n            db_session=db_session,\n            description=create_persona_request.description,\n            name=create_persona_request.name,\n            document_set_ids=create_persona_request.document_set_ids,\n            tool_ids=create_persona_request.tool_ids,\n            is_public=create_persona_request.is_public,\n            llm_model_provider_override=create_persona_request.llm_model_provider_override,\n            llm_model_version_override=create_persona_request.llm_model_version_override,\n            starter_messages=create_persona_request.starter_messages,\n            system_prompt=create_persona_request.system_prompt,\n            task_prompt=create_persona_request.task_prompt,\n            datetime_aware=create_persona_request.datetime_aware,\n            replace_base_system_prompt=create_persona_request.replace_base_system_prompt,\n            uploaded_image_id=create_persona_request.uploaded_image_id,\n            icon_name=create_persona_request.icon_name,\n            display_priority=create_persona_request.display_priority,\n            remove_image=create_persona_request.remove_image,\n            search_start_date=create_persona_request.search_start_date,\n            label_ids=create_persona_request.label_ids,\n            is_featured=create_persona_request.is_featured,\n            user_file_ids=converted_user_file_ids,\n            commit=False,\n            hierarchy_node_ids=create_persona_request.hierarchy_node_ids,\n            document_ids=create_persona_request.document_ids,\n        )\n\n        versioned_update_persona_access = fetch_versioned_implementation(\n            \"onyx.db.persona\", \"update_persona_access\"\n        )\n\n        versioned_update_persona_access(\n            persona_id=persona.id,\n            creator_user_id=user.id,\n            db_session=db_session,\n            user_ids=create_persona_request.users,\n            group_ids=create_persona_request.groups,\n        )\n        db_session.commit()\n\n    except ValueError as e:\n        logger.exception(\"Failed to create persona\")\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return FullPersonaSnapshot.from_model(persona)\n\n\ndef update_persona_shared(\n    persona_id: int,\n    user_ids: list[UUID] | None,\n    user: User,\n    db_session: Session,\n    group_ids: list[int] | None = None,\n    is_public: bool | None = None,\n    label_ids: list[int] | None = None,\n) -> None:\n    \"\"\"Simplified version of `create_update_persona` which only touches the\n    accessibility rather than any of the logic (e.g. prompt, connected data sources,\n    etc.).\"\"\"\n    persona = fetch_persona_by_id_for_user(\n        db_session=db_session, persona_id=persona_id, user=user, get_editable=True\n    )\n\n    if user and user.role != UserRole.ADMIN and persona.user_id != user.id:\n        raise PermissionError(\"You don't have permission to modify this persona\")\n\n    versioned_update_persona_access = fetch_versioned_implementation(\n        \"onyx.db.persona\", \"update_persona_access\"\n    )\n    versioned_update_persona_access(\n        persona_id=persona_id,\n        creator_user_id=user.id,\n        db_session=db_session,\n        is_public=is_public,\n        user_ids=user_ids,\n        group_ids=group_ids,\n    )\n\n    if label_ids is not None:\n        labels = (\n            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()\n        )\n        if len(labels) != len(label_ids):\n            raise ValueError(\"Some label IDs were not found in the database\")\n        persona.labels.clear()\n        persona.labels = labels\n\n    db_session.commit()\n\n\ndef update_persona_public_status(\n    persona_id: int,\n    is_public: bool,\n    db_session: Session,\n    user: User,\n) -> None:\n    persona = fetch_persona_by_id_for_user(\n        db_session=db_session, persona_id=persona_id, user=user, get_editable=True\n    )\n    if user.role != UserRole.ADMIN and persona.user_id != user.id:\n        raise ValueError(\"You don't have permission to modify this persona\")\n\n    persona.is_public = is_public\n    db_session.commit()\n\n\ndef _build_persona_filters(\n    stmt: Select[tuple[Persona]],\n    include_default: bool,\n    include_slack_bot_personas: bool,\n    include_deleted: bool,\n) -> Select[tuple[Persona]]:\n    \"\"\"Filters which Personas are included in the query.\n\n    Args:\n        stmt: The base query to filter.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        The modified query with the filters applied.\n    \"\"\"\n    if not include_default:\n        stmt = stmt.where(Persona.builtin_persona.is_(False))\n    if not include_slack_bot_personas:\n        stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX)))\n    if not include_deleted:\n        stmt = stmt.where(Persona.deleted.is_(False))\n    return stmt\n\n\ndef get_minimal_persona_snapshots_for_user(\n    user: User,\n    db_session: Session,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> list[MinimalPersonaSnapshot]:\n    stmt = select(Persona)\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = _build_persona_filters(\n        stmt, include_default, include_slack_bot_personas, include_deleted\n    )\n    stmt = stmt.options(\n        selectinload(Persona.tools),\n        selectinload(Persona.labels),\n        selectinload(Persona.document_sets).options(\n            selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSet.users),\n            selectinload(DocumentSet.groups),\n            selectinload(DocumentSet.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        ),\n        selectinload(Persona.hierarchy_nodes),\n        selectinload(Persona.attached_documents).selectinload(\n            Document.parent_hierarchy_node\n        ),\n        selectinload(Persona.user),\n    )\n    results = db_session.scalars(stmt).all()\n    return [MinimalPersonaSnapshot.from_model(persona) for persona in results]\n\n\ndef get_persona_snapshots_for_user(\n    user: User,\n    db_session: Session,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> list[PersonaSnapshot]:\n    stmt = select(Persona)\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = _build_persona_filters(\n        stmt, include_default, include_slack_bot_personas, include_deleted\n    )\n    stmt = stmt.options(\n        selectinload(Persona.tools),\n        selectinload(Persona.hierarchy_nodes),\n        selectinload(Persona.attached_documents).selectinload(\n            Document.parent_hierarchy_node\n        ),\n        selectinload(Persona.labels),\n        selectinload(Persona.document_sets).options(\n            selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSet.users),\n            selectinload(DocumentSet.groups),\n            selectinload(DocumentSet.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        ),\n        selectinload(Persona.user),\n        selectinload(Persona.user_files),\n        selectinload(Persona.users),\n        selectinload(Persona.groups),\n    )\n\n    results = db_session.scalars(stmt).all()\n    return [PersonaSnapshot.from_model(persona) for persona in results]\n\n\ndef get_persona_count_for_user(\n    user: User,\n    db_session: Session,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> int:\n    \"\"\"Counts the total number of personas accessible to the user.\n\n    Args:\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        db_session: Database session for executing queries.\n        get_editable: If True, only returns personas the user can edit.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        Total count of personas matching the filters and user permissions.\n    \"\"\"\n    stmt = _build_persona_base_query(\n        user=user,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_slack_bot_personas=include_slack_bot_personas,\n        include_deleted=include_deleted,\n    )\n    # Convert to count query.\n    count_stmt = stmt.with_only_columns(func.count(func.distinct(Persona.id))).order_by(\n        None\n    )\n    return db_session.scalar(count_stmt) or 0\n\n\ndef get_minimal_persona_snapshots_paginated(\n    user: User,\n    db_session: Session,\n    page_num: int,\n    page_size: int,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> list[MinimalPersonaSnapshot]:\n    \"\"\"Gets a single page of minimal persona snapshots with ordering.\n\n    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC\n    distance from 0).\n\n    Args:\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        db_session: Database session for executing queries.\n        page_num: Zero-indexed page number (e.g., 0 for the first page).\n        page_size: Number of items per page.\n        get_editable: If True, only returns personas the user can edit.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        List of MinimalPersonaSnapshot objects for the requested page, ordered\n        by display_priority (nulls last) then ID.\n    \"\"\"\n    stmt = _get_paginated_persona_query(\n        user,\n        page_num,\n        page_size,\n        get_editable,\n        include_default,\n        include_slack_bot_personas,\n        include_deleted,\n    )\n    # Do eager loading of columns we know MinimalPersonaSnapshot.from_model will\n    # need.\n    stmt = stmt.options(\n        selectinload(Persona.tools),\n        selectinload(Persona.hierarchy_nodes),\n        selectinload(Persona.attached_documents).selectinload(\n            Document.parent_hierarchy_node\n        ),\n        selectinload(Persona.labels),\n        selectinload(Persona.document_sets).options(\n            selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSet.users),\n            selectinload(DocumentSet.groups),\n            selectinload(DocumentSet.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        ),\n        selectinload(Persona.user),\n    )\n\n    results = db_session.scalars(stmt).all()\n    return [MinimalPersonaSnapshot.from_model(persona) for persona in results]\n\n\ndef get_persona_snapshots_paginated(\n    user: User,\n    db_session: Session,\n    page_num: int,\n    page_size: int,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> list[PersonaSnapshot]:\n    \"\"\"Gets a single page of persona snapshots (admin view) with ordering.\n\n    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC\n    distance from 0).\n\n    This function returns PersonaSnapshot objects which contain more detailed\n    information than MinimalPersonaSnapshot, used for admin views.\n\n    Args:\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        db_session: Database session for executing queries.\n        page_num: Zero-indexed page number (e.g., 0 for the first page).\n        page_size: Number of items per page.\n        get_editable: If True, only returns personas the user can edit.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        List of PersonaSnapshot objects for the requested page, ordered by\n        display_priority (nulls last) then ID.\n    \"\"\"\n    stmt = _get_paginated_persona_query(\n        user,\n        page_num,\n        page_size,\n        get_editable,\n        include_default,\n        include_slack_bot_personas,\n        include_deleted,\n    )\n    # Do eager loading of columns we know PersonaSnapshot.from_model will need.\n    stmt = stmt.options(\n        selectinload(Persona.tools),\n        selectinload(Persona.hierarchy_nodes),\n        selectinload(Persona.attached_documents).selectinload(\n            Document.parent_hierarchy_node\n        ),\n        selectinload(Persona.labels),\n        selectinload(Persona.document_sets).options(\n            selectinload(DocumentSet.connector_credential_pairs).selectinload(\n                ConnectorCredentialPair.connector\n            ),\n            selectinload(DocumentSet.users),\n            selectinload(DocumentSet.groups),\n            selectinload(DocumentSet.federated_connectors).selectinload(\n                FederatedConnector__DocumentSet.federated_connector\n            ),\n        ),\n        selectinload(Persona.user),\n        selectinload(Persona.user_files),\n        selectinload(Persona.users),\n        selectinload(Persona.groups),\n    )\n\n    results = db_session.scalars(stmt).all()\n    return [PersonaSnapshot.from_model(persona) for persona in results]\n\n\ndef _get_paginated_persona_query(\n    user: User,\n    page_num: int,\n    page_size: int,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> Select[tuple[Persona]]:\n    \"\"\"Builds a paginated query on personas ordered on display_priority and id.\n\n    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC\n    distance from 0) to match the frontend personaComparator() logic.\n\n    Args:\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        page_num: Zero-indexed page number (e.g., 0 for the first page).\n        page_size: Number of items per page.\n        get_editable: If True, only returns personas the user can edit.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        SQLAlchemy Select statement with all filters, ordering, and pagination\n        applied.\n    \"\"\"\n    stmt = _build_persona_base_query(\n        user=user,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_slack_bot_personas=include_slack_bot_personas,\n        include_deleted=include_deleted,\n    )\n    # Add the abs(id) expression to the SELECT list (required for DISTINCT +\n    # ORDER BY).\n    stmt = stmt.add_columns(func.abs(Persona.id).label(\"abs_id\"))\n    # Apply ordering.\n    stmt = stmt.order_by(\n        Persona.display_priority.asc().nullslast(),\n        func.abs(Persona.id).asc(),\n    )\n    # Apply pagination.\n    stmt = stmt.offset(page_num * page_size).limit(page_size)\n    return stmt\n\n\ndef _build_persona_base_query(\n    user: User,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> Select[tuple[Persona]]:\n    \"\"\"Builds a base persona query with all user and persona filters applied.\n\n    This helper constructs a filtered query that can then be customized for\n    counting, pagination, or full retrieval.\n\n    Args:\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        get_editable: If True, only returns personas the user can edit.\n        include_default: If True, includes builtin/default personas.\n        include_slack_bot_personas: If True, includes Slack bot personas.\n        include_deleted: If True, includes deleted personas.\n\n    Returns:\n        SQLAlchemy Select statement with all filters applied.\n    \"\"\"\n    stmt = select(Persona)\n    stmt = _add_user_filters(stmt, user, get_editable)\n    stmt = _build_persona_filters(\n        stmt, include_default, include_slack_bot_personas, include_deleted\n    )\n    return stmt\n\n\ndef get_raw_personas_for_user(\n    user: User,\n    db_session: Session,\n    get_editable: bool = True,\n    include_default: bool = True,\n    include_slack_bot_personas: bool = False,\n    include_deleted: bool = False,\n) -> Sequence[Persona]:\n    stmt = _build_persona_base_query(\n        user, get_editable, include_default, include_slack_bot_personas, include_deleted\n    )\n    return db_session.scalars(stmt).all()\n\n\ndef get_personas(db_session: Session) -> Sequence[Persona]:\n    \"\"\"WARNING: Unsafe, can fetch personas from all users.\"\"\"\n    stmt = select(Persona).distinct()\n    stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX)))\n    stmt = stmt.where(Persona.deleted.is_(False))\n    return db_session.execute(stmt).unique().scalars().all()\n\n\ndef mark_persona_as_deleted(\n    persona_id: int,\n    user: User,\n    db_session: Session,\n) -> None:\n    persona = get_persona_by_id(persona_id=persona_id, user=user, db_session=db_session)\n    persona.deleted = True\n    affected_file_ids = [uf.id for uf in persona.user_files]\n    if affected_file_ids:\n        _mark_files_need_persona_sync(db_session, affected_file_ids)\n    db_session.commit()\n\n\ndef mark_persona_as_not_deleted(\n    persona_id: int,\n    user: User,\n    db_session: Session,\n) -> None:\n    persona = get_persona_by_id(\n        persona_id=persona_id, user=user, db_session=db_session, include_deleted=True\n    )\n    if not persona.deleted:\n        raise ValueError(f\"Persona with ID {persona_id} is not deleted.\")\n    persona.deleted = False\n    affected_file_ids = [uf.id for uf in persona.user_files]\n    if affected_file_ids:\n        _mark_files_need_persona_sync(db_session, affected_file_ids)\n    db_session.commit()\n\n\ndef mark_delete_persona_by_name(\n    persona_name: str, db_session: Session, is_default: bool = True\n) -> None:\n    stmt = (\n        update(Persona)\n        .where(Persona.name == persona_name, Persona.builtin_persona == is_default)\n        .values(deleted=True)\n    )\n\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef update_personas_display_priority(\n    display_priority_map: dict[int, int],\n    db_session: Session,\n    user: User,\n    commit_db_txn: bool = False,\n) -> None:\n    \"\"\"Updates the display priorities of the specified Personas.\n\n    Args:\n        display_priority_map: A map of persona IDs to intended display\n            priorities.\n        db_session: Database session for executing queries.\n        user: The user to filter personas for. If None and auth is disabled,\n            assumes the user is an admin. Otherwise, if None shows only public\n            personas.\n        commit_db_txn: If True, commits the database transaction after\n            updating the display priorities. Defaults to False.\n\n    Raises:\n        ValueError: The caller tried to update a persona for which the user does\n            not have access.\n    \"\"\"\n    # No-op to save a query if it is not necessary.\n    if len(display_priority_map) == 0:\n        return\n\n    personas = get_raw_personas_for_user(\n        user,\n        db_session,\n        get_editable=False,\n        include_default=True,\n        include_slack_bot_personas=True,\n        include_deleted=True,\n    )\n    available_personas_map: dict[int, Persona] = {\n        persona.id: persona for persona in personas\n    }\n\n    for persona_id, priority in display_priority_map.items():\n        if persona_id not in available_personas_map:\n            raise ValueError(\n                f\"Invalid persona ID provided: Persona with ID {persona_id} was not found for this user.\"\n            )\n\n        available_personas_map[persona_id].display_priority = priority\n\n    if commit_db_txn:\n        db_session.commit()\n\n\ndef mark_persona_user_files_for_sync(\n    persona_id: int,\n    db_session: Session,\n) -> None:\n    \"\"\"When persona sharing changes, mark all of its user files for sync\n    so that their ACLs get updated in the vector DB.\"\"\"\n    persona = (\n        db_session.query(Persona)\n        .options(selectinload(Persona.user_files))\n        .filter(Persona.id == persona_id)\n        .first()\n    )\n    if not persona:\n        return\n    file_ids = [uf.id for uf in persona.user_files]\n    _mark_files_need_persona_sync(db_session, file_ids)\n\n\ndef _mark_files_need_persona_sync(\n    db_session: Session,\n    user_file_ids: list[UUID],\n) -> None:\n    \"\"\"Flag the given UserFile rows so the background sync task picks them up\n    and updates their persona metadata in the vector DB.\"\"\"\n    if not user_file_ids:\n        return\n    db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).update(\n        {UserFile.needs_persona_sync: True},\n        synchronize_session=False,\n    )\n\n\ndef upsert_persona(\n    user: User | None,\n    name: str,\n    description: str,\n    llm_model_provider_override: str | None,\n    llm_model_version_override: str | None,\n    starter_messages: list[StarterMessage] | None,\n    # Embedded prompt fields\n    system_prompt: str | None,\n    task_prompt: str | None,\n    datetime_aware: bool | None,\n    is_public: bool,\n    db_session: Session,\n    document_set_ids: list[int] | None = None,\n    tool_ids: list[int] | None = None,\n    persona_id: int | None = None,\n    commit: bool = True,\n    uploaded_image_id: str | None = None,\n    icon_name: str | None = None,\n    display_priority: int | None = None,\n    is_listed: bool = True,\n    remove_image: bool | None = None,\n    search_start_date: datetime | None = None,\n    builtin_persona: bool = False,\n    is_featured: bool | None = None,\n    label_ids: list[int] | None = None,\n    user_file_ids: list[UUID] | None = None,\n    hierarchy_node_ids: list[int] | None = None,\n    document_ids: list[str] | None = None,\n    replace_base_system_prompt: bool = False,\n) -> Persona:\n    \"\"\"\n    NOTE: This operation cannot update persona configuration options that\n    are core to the persona, such as its display priority and\n    whether or not the assistant is a built-in / default assistant\n    \"\"\"\n\n    if persona_id is not None:\n        existing_persona = db_session.query(Persona).filter_by(id=persona_id).first()\n    else:\n        existing_persona = _get_persona_by_name(\n            persona_name=name, user=user, db_session=db_session\n        )\n\n        # Check for duplicate names when creating new personas\n        # Deleted personas are allowed to be overwritten\n        if existing_persona and not existing_persona.deleted:\n            raise ValueError(\n                f\"Assistant with name '{name}' already exists. Please rename your assistant.\"\n            )\n\n    if existing_persona and user:\n        # this checks if the user has permission to edit the persona\n        # will raise an Exception if the user does not have permission\n        # Skip check if user is None (system/admin operation)\n        existing_persona = fetch_persona_by_id_for_user(\n            db_session=db_session,\n            persona_id=existing_persona.id,\n            user=user,\n            get_editable=True,\n        )\n\n    # Fetch and attach tools by IDs\n    tools = None\n    if tool_ids is not None:\n        tools = db_session.query(Tool).filter(Tool.id.in_(tool_ids)).all()\n        if not tools and tool_ids:\n            raise ValueError(\"Tools not found\")\n\n    # Fetch and attach document_sets by IDs\n    document_sets = None\n    if document_set_ids is not None:\n        document_sets = (\n            db_session.query(DocumentSet)\n            .filter(DocumentSet.id.in_(document_set_ids))\n            .all()\n        )\n        if not document_sets and document_set_ids:\n            raise ValueError(\"document_sets not found\")\n\n    # Fetch and attach user_files by IDs\n    user_files = None\n    if user_file_ids is not None:\n        user_files = (\n            db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()\n        )\n        if not user_files and user_file_ids:\n            raise ValueError(\"user_files not found\")\n\n    labels = None\n    if label_ids is not None:\n        labels = (\n            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()\n        )\n        if len(labels) != len(label_ids):\n            raise ValueError(\"Some label IDs were not found in the database\")\n\n    # Fetch and attach hierarchy_nodes by IDs\n    hierarchy_nodes = None\n    if hierarchy_node_ids:\n        hierarchy_nodes = (\n            db_session.query(HierarchyNode)\n            .filter(HierarchyNode.id.in_(hierarchy_node_ids))\n            .all()\n        )\n        if not hierarchy_nodes and hierarchy_node_ids:\n            raise ValueError(\"hierarchy_nodes not found\")\n\n    # Fetch and attach documents by IDs, filtering for access permissions\n    attached_documents = None\n    if document_ids is not None:\n        user_email = user.email if user else None\n        external_group_ids = (\n            get_user_external_group_ids(db_session, user) if user else []\n        )\n        attached_documents = get_accessible_documents_by_ids(\n            db_session=db_session,\n            document_ids=document_ids,\n            user_email=user_email,\n            external_group_ids=external_group_ids,\n        )\n        if not attached_documents and document_ids:\n            raise ValueError(\"documents not found or not accessible\")\n\n    # ensure all specified tools are valid\n    if tools:\n        validate_persona_tools(tools, db_session)\n\n    if existing_persona:\n        # Built-in personas can only be updated through YAML configuration.\n        # This ensures that core system personas are not modified unintentionally.\n        if existing_persona.builtin_persona and not builtin_persona:\n            raise ValueError(\"Cannot update builtin persona with non-builtin.\")\n\n        # The following update excludes `default`, `built-in`, and display priority.\n        # Display priority is handled separately in the `display-priority` endpoint.\n        # `default` and `built-in` properties can only be set when creating a persona.\n        existing_persona.name = name\n        existing_persona.description = description\n        existing_persona.llm_model_provider_override = llm_model_provider_override\n        existing_persona.llm_model_version_override = llm_model_version_override\n        existing_persona.starter_messages = starter_messages\n        existing_persona.deleted = False  # Un-delete if previously deleted\n        existing_persona.is_public = is_public\n        if remove_image or uploaded_image_id:\n            existing_persona.uploaded_image_id = uploaded_image_id\n        existing_persona.icon_name = icon_name\n        existing_persona.is_listed = is_listed\n        existing_persona.search_start_date = search_start_date\n        if label_ids is not None:\n            existing_persona.labels.clear()\n            existing_persona.labels = labels or []\n        existing_persona.is_featured = (\n            is_featured if is_featured is not None else existing_persona.is_featured\n        )\n        # Update embedded prompt fields if provided\n        if system_prompt is not None:\n            existing_persona.system_prompt = system_prompt\n        if task_prompt is not None:\n            existing_persona.task_prompt = task_prompt\n        if datetime_aware is not None:\n            existing_persona.datetime_aware = datetime_aware\n        existing_persona.replace_base_system_prompt = replace_base_system_prompt\n\n        # Do not delete any associations manually added unless\n        # a new updated list is provided\n        if document_sets is not None:\n            existing_persona.document_sets.clear()\n            existing_persona.document_sets = document_sets or []\n\n        # Note: prompts are now embedded in personas - no separate prompts relationship\n\n        if tools is not None:\n            existing_persona.tools = tools or []\n\n        if user_file_ids is not None:\n            old_file_ids = {uf.id for uf in existing_persona.user_files}\n            new_file_ids = {uf.id for uf in (user_files or [])}\n            affected_file_ids = old_file_ids | new_file_ids\n            existing_persona.user_files.clear()\n            existing_persona.user_files = user_files or []\n            if affected_file_ids:\n                _mark_files_need_persona_sync(db_session, list(affected_file_ids))\n\n        if hierarchy_node_ids is not None:\n            existing_persona.hierarchy_nodes.clear()\n            existing_persona.hierarchy_nodes = hierarchy_nodes or []\n\n        if document_ids is not None:\n            existing_persona.attached_documents.clear()\n            existing_persona.attached_documents = attached_documents or []\n\n        # We should only update display priority if it is not already set\n        if existing_persona.display_priority is None:\n            existing_persona.display_priority = display_priority\n\n        persona = existing_persona\n\n    else:\n        # Create new persona - prompt configuration will be set separately if needed\n        new_persona = Persona(\n            id=persona_id,\n            user_id=user.id if user else None,\n            is_public=is_public,\n            name=name,\n            description=description,\n            builtin_persona=builtin_persona,\n            system_prompt=system_prompt or \"\",\n            task_prompt=task_prompt or \"\",\n            datetime_aware=(datetime_aware if datetime_aware is not None else True),\n            replace_base_system_prompt=replace_base_system_prompt,\n            document_sets=document_sets or [],\n            llm_model_provider_override=llm_model_provider_override,\n            llm_model_version_override=llm_model_version_override,\n            starter_messages=starter_messages,\n            tools=tools or [],\n            uploaded_image_id=uploaded_image_id,\n            icon_name=icon_name,\n            display_priority=display_priority,\n            is_listed=is_listed,\n            search_start_date=search_start_date,\n            is_featured=(is_featured if is_featured is not None else False),\n            user_files=user_files or [],\n            labels=labels or [],\n            hierarchy_nodes=hierarchy_nodes or [],\n            attached_documents=attached_documents or [],\n        )\n        db_session.add(new_persona)\n        if user_files:\n            _mark_files_need_persona_sync(db_session, [uf.id for uf in user_files])\n        persona = new_persona\n    if commit:\n        db_session.commit()\n    else:\n        # flush the session so that the persona has an ID\n        db_session.flush()\n\n    return persona\n\n\ndef delete_old_default_personas(\n    db_session: Session,\n) -> None:\n    \"\"\"Note, this locks out the Summarize and Paraphrase personas for now\n    Need a more graceful fix later or those need to never have IDs.\n\n    This function is idempotent, so it can be run multiple times without issue.\n    \"\"\"\n    OLD_SUFFIX = \"_old\"\n    stmt = (\n        update(Persona)\n        .where(\n            Persona.builtin_persona,\n            Persona.id > 0,\n            or_(\n                Persona.deleted.is_(False),\n                not_(Persona.name.endswith(OLD_SUFFIX)),\n            ),\n        )\n        .values(deleted=True, name=func.concat(Persona.name, OLD_SUFFIX))\n    )\n\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef update_persona_featured(\n    persona_id: int,\n    is_featured: bool,\n    db_session: Session,\n    user: User,\n) -> None:\n    persona = fetch_persona_by_id_for_user(\n        db_session=db_session, persona_id=persona_id, user=user, get_editable=True\n    )\n\n    persona.is_featured = is_featured\n    db_session.commit()\n\n\ndef update_persona_visibility(\n    persona_id: int,\n    is_listed: bool,\n    db_session: Session,\n    user: User,\n) -> None:\n    persona = fetch_persona_by_id_for_user(\n        db_session=db_session, persona_id=persona_id, user=user, get_editable=True\n    )\n\n    persona.is_listed = is_listed\n    db_session.commit()\n\n\ndef validate_persona_tools(tools: list[Tool], db_session: Session) -> None:\n    # local import to avoid circular import. DB layer should not depend on tools layer.\n    from onyx.tools.built_in_tools import get_built_in_tool_by_id\n\n    for tool in tools:\n        if tool.in_code_tool_id is not None:\n            tool_cls = get_built_in_tool_by_id(tool.in_code_tool_id)\n            if not tool_cls.is_available(db_session):\n                raise ValueError(f\"Tool {tool.in_code_tool_id} is not available\")\n\n\n# TODO: since this gets called with every chat message, could it be more efficient to pregenerate\n# a direct mapping indicating whether a user has access to a specific persona?\ndef get_persona_by_id(\n    persona_id: int,\n    user: User | None,\n    db_session: Session,\n    include_deleted: bool = False,\n    is_for_edit: bool = True,  # NOTE: assume true for safety\n) -> Persona:\n    persona_stmt = (\n        select(Persona)\n        .distinct()\n        .outerjoin(Persona.groups)\n        .outerjoin(Persona.users)\n        .outerjoin(UserGroup.user_group_relationships)\n        .where(Persona.id == persona_id)\n    )\n\n    if not include_deleted:\n        persona_stmt = persona_stmt.where(Persona.deleted.is_(False))\n\n    if not user or user.role == UserRole.ADMIN:\n        result = db_session.execute(persona_stmt)\n        persona = result.scalar_one_or_none()\n        if persona is None:\n            raise ValueError(f\"Persona with ID {persona_id} does not exist\")\n        return persona\n\n    # or check if user owns persona\n    or_conditions = Persona.user_id == user.id\n    # allow access if persona user id is None\n    or_conditions |= Persona.user_id == None  # noqa: E711\n    if not is_for_edit:\n        # if the user is in a group related to the persona\n        or_conditions |= User__UserGroup.user_id == user.id\n        # if the user is in the .users of the persona\n        or_conditions |= User.id == user.id\n        or_conditions |= Persona.is_public == True  # noqa: E712\n    elif user.role == UserRole.GLOBAL_CURATOR:\n        # global curators can edit personas for the groups they are in\n        or_conditions |= User__UserGroup.user_id == user.id\n    elif user.role == UserRole.CURATOR:\n        # curators can edit personas for the groups they are curators of\n        or_conditions |= (User__UserGroup.user_id == user.id) & (\n            User__UserGroup.is_curator == True  # noqa: E712\n        )\n\n    persona_stmt = persona_stmt.where(or_conditions)\n    result = db_session.execute(persona_stmt)\n    persona = result.scalar_one_or_none()\n    if persona is None:\n        raise ValueError(\n            f\"Persona with ID {persona_id} does not exist or does not belong to user\"\n        )\n    return persona\n\n\ndef get_personas_by_ids(\n    persona_ids: list[int], db_session: Session\n) -> Sequence[Persona]:\n    \"\"\"WARNING: Unsafe, can fetch personas from all users.\"\"\"\n    if not persona_ids:\n        return []\n    personas = db_session.scalars(\n        select(Persona).where(Persona.id.in_(persona_ids))\n    ).all()\n\n    return personas\n\n\ndef delete_persona_by_name(\n    persona_name: str, db_session: Session, is_default: bool = True\n) -> None:\n    stmt = (\n        update(Persona)\n        .where(Persona.name == persona_name, Persona.builtin_persona == is_default)\n        .values(deleted=True)\n    )\n\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef get_assistant_labels(db_session: Session) -> list[PersonaLabel]:\n    return db_session.query(PersonaLabel).all()\n\n\ndef create_assistant_label(db_session: Session, name: str) -> PersonaLabel:\n    label = PersonaLabel(name=name)\n    db_session.add(label)\n    db_session.commit()\n    return label\n\n\ndef update_persona_label(\n    label_id: int,\n    label_name: str,\n    db_session: Session,\n) -> None:\n    persona_label = (\n        db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).one_or_none()\n    )\n    if persona_label is None:\n        raise ValueError(f\"Persona label with ID {label_id} does not exist\")\n    persona_label.name = label_name\n    db_session.commit()\n\n\ndef delete_persona_label(label_id: int, db_session: Session) -> None:\n    db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).delete()\n    db_session.commit()\n\n\ndef persona_has_search_tool(persona_id: int, db_session: Session) -> bool:\n    persona = (\n        db_session.query(Persona)\n        .options(selectinload(Persona.tools))\n        .filter(Persona.id == persona_id)\n        .one_or_none()\n    )\n    if persona is None:\n        raise ValueError(f\"Persona with ID {persona_id} does not exist\")\n    return any(tool.in_code_tool_id == \"run_search\" for tool in persona.tools)\n\n\ndef get_default_assistant(db_session: Session) -> Persona | None:\n    \"\"\"Fetch the default assistant (persona with builtin_persona=True).\"\"\"\n    return (\n        db_session.query(Persona)\n        .options(selectinload(Persona.tools))\n        .filter(Persona.builtin_persona.is_(True))\n        # NOTE: need to add this since we had prior builtin personas\n        # that have since been deleted\n        .filter(Persona.deleted.is_(False))\n        .one_or_none()\n    )\n\n\ndef update_default_assistant_configuration(\n    db_session: Session,\n    tool_ids: list[int] | None = None,\n    system_prompt: str | None = None,\n    update_system_prompt: bool = False,\n) -> Persona:\n    \"\"\"Update only tools and system_prompt for the default assistant.\n\n    Args:\n        db_session: Database session\n        tool_ids: List of tool IDs to enable (if None, tools are not updated)\n        system_prompt: New system prompt value (None means use default)\n        update_system_prompt: If True, update the system_prompt field (allows setting to None)\n\n    Returns:\n        Updated Persona object\n\n    Raises:\n        ValueError: If default assistant not found or invalid tool IDs provided\n    \"\"\"\n    # Get the default assistant\n    persona = get_default_assistant(db_session)\n    if not persona:\n        raise ValueError(\"Default assistant not found\")\n\n    # Update system prompt if explicitly requested\n    if update_system_prompt:\n        persona.system_prompt = system_prompt\n\n    # Update tools if provided\n    if tool_ids is not None:\n        # Clear existing tool associations\n        persona.tools = []\n\n        # Add new tool associations\n        for tool_id in tool_ids:\n            tool = db_session.query(Tool).filter(Tool.id == tool_id).one_or_none()\n            if not tool:\n                raise ValueError(f\"Tool with ID {tool_id} not found\")\n\n            if not should_expose_tool_to_fe(tool):\n                raise ValueError(f\"Tool with ID {tool_id} cannot be assigned\")\n\n            if not tool.enabled:\n                raise ValueError(\n                    f\"Enable tool {tool.display_name or tool.name} before assigning it\"\n                )\n\n            persona.tools.append(tool)\n\n    db_session.commit()\n    return persona\n\n\ndef user_can_access_persona(\n    db_session: Session, persona_id: int, user: User, get_editable: bool = False\n) -> bool:\n    \"\"\"Check if a user has access to a specific persona.\n\n    Args:\n        db_session: Database session\n        persona_id: ID of the persona to check\n        user: User to check access for\n        get_editable: If True, check for edit access; if False, check for view access\n\n    Returns:\n        True if user can access the persona, False otherwise\n    \"\"\"\n    stmt = select(Persona).where(Persona.id == persona_id, Persona.deleted.is_(False))\n    stmt = _add_user_filters(stmt, user, get_editable=get_editable)\n    return db_session.scalar(stmt) is not None\n"
  },
  {
    "path": "backend/onyx/db/projects.py",
    "content": "import datetime\nimport uuid\nfrom typing import List\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom fastapi import UploadFile\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom sqlalchemy import func\nfrom sqlalchemy.orm import Session\nfrom starlette.background import BackgroundTasks\n\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import Project__UserFile\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.models import UserProject\nfrom onyx.server.documents.connector import upload_files\nfrom onyx.server.features.projects.projects_file_utils import categorize_uploaded_files\nfrom onyx.server.features.projects.projects_file_utils import RejectedFile\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nclass CategorizedFilesResult(BaseModel):\n    user_files: list[UserFile]\n    rejected_files: list[RejectedFile]\n    id_to_temp_id: dict[str, str]\n    # Filenames that should be stored but not indexed.\n    skip_indexing_filenames: set[str] = Field(default_factory=set)\n    # Allow SQLAlchemy ORM models inside this result container\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n    @property\n    def indexable_files(self) -> list[UserFile]:\n        return [\n            uf\n            for uf in self.user_files\n            if (uf.name or \"\") not in self.skip_indexing_filenames\n        ]\n\n\ndef build_hashed_file_key(file: UploadFile) -> str:\n    name_prefix = (file.filename or \"\")[:50]\n    return f\"{file.size}|{name_prefix}\"\n\n\ndef create_user_files(\n    files: List[UploadFile],\n    project_id: int | None,\n    user: User,\n    db_session: Session,\n    link_url: str | None = None,\n    temp_id_map: dict[str, str] | None = None,\n) -> CategorizedFilesResult:\n\n    # Categorize the files\n    categorized_files = categorize_uploaded_files(files, db_session)\n    # NOTE: At the moment, zip metadata is not used for user files.\n    # Should revisit to decide whether this should be a feature.\n    upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)\n    user_files = []\n    rejected_files = categorized_files.rejected\n    id_to_temp_id: dict[str, str] = {}\n    # Pair returned storage paths with the same set of acceptable files we uploaded\n    for file_path, file in zip(\n        upload_response.file_paths, categorized_files.acceptable\n    ):\n        new_id = uuid.uuid4()\n        new_temp_id = (\n            temp_id_map.get(build_hashed_file_key(file)) if temp_id_map else None\n        )\n        if new_temp_id is not None:\n            id_to_temp_id[str(new_id)] = new_temp_id\n        should_skip = (file.filename or \"\") in categorized_files.skip_indexing\n        new_file = UserFile(\n            id=new_id,\n            user_id=user.id,\n            file_id=file_path,\n            name=file.filename,\n            token_count=categorized_files.acceptable_file_to_token_count[\n                file.filename or \"\"\n            ],\n            link_url=link_url,\n            content_type=file.content_type,\n            file_type=file.content_type,\n            status=UserFileStatus.SKIPPED if should_skip else UserFileStatus.PROCESSING,\n            last_accessed_at=datetime.datetime.now(datetime.timezone.utc),\n        )\n        # Persist the UserFile first to satisfy FK constraints for association table\n        db_session.add(new_file)\n        db_session.flush()\n        if project_id:\n            project_to_user_file = Project__UserFile(\n                project_id=project_id,\n                user_file_id=new_file.id,\n            )\n            db_session.add(project_to_user_file)\n        user_files.append(new_file)\n    db_session.commit()\n    return CategorizedFilesResult(\n        user_files=user_files,\n        rejected_files=rejected_files,\n        id_to_temp_id=id_to_temp_id,\n        skip_indexing_filenames=categorized_files.skip_indexing,\n    )\n\n\ndef upload_files_to_user_files_with_indexing(\n    files: List[UploadFile],\n    project_id: int | None,\n    user: User,\n    temp_id_map: dict[str, str] | None,\n    db_session: Session,\n    background_tasks: BackgroundTasks | None = None,\n) -> CategorizedFilesResult:\n    if project_id is not None and user is not None:\n        if not check_project_ownership(project_id, user.id, db_session):\n            raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    categorized_files_result = create_user_files(\n        files,\n        project_id,\n        user,\n        db_session,\n        temp_id_map=temp_id_map,\n    )\n    user_files = categorized_files_result.user_files\n    rejected_files = categorized_files_result.rejected_files\n    id_to_temp_id = categorized_files_result.id_to_temp_id\n    indexable_files = categorized_files_result.indexable_files\n    # Trigger per-file processing immediately for the current tenant\n    tenant_id = get_current_tenant_id()\n    for rejected_file in rejected_files:\n        logger.warning(\n            f\"File {rejected_file.filename} rejected for {rejected_file.reason}\"\n        )\n\n    if DISABLE_VECTOR_DB and background_tasks is not None:\n        from onyx.background.task_utils import drain_processing_loop\n\n        background_tasks.add_task(drain_processing_loop, tenant_id)\n        for user_file in indexable_files:\n            logger.info(f\"Queued in-process processing for user_file_id={user_file.id}\")\n    else:\n        from onyx.background.celery.versioned_apps.client import app as client_app\n\n        for user_file in indexable_files:\n            task = client_app.send_task(\n                OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,\n                kwargs={\"user_file_id\": user_file.id, \"tenant_id\": tenant_id},\n                queue=OnyxCeleryQueues.USER_FILE_PROCESSING,\n                priority=OnyxCeleryPriority.HIGH,\n                expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,\n            )\n            logger.info(\n                f\"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}\"\n            )\n\n    return CategorizedFilesResult(\n        user_files=user_files,\n        rejected_files=rejected_files,\n        id_to_temp_id=id_to_temp_id,\n        skip_indexing_filenames=categorized_files_result.skip_indexing_filenames,\n    )\n\n\ndef check_project_ownership(\n    project_id: int, user_id: UUID | None, db_session: Session\n) -> bool:\n    # In no-auth mode, all projects are accessible\n    if user_id is None:\n        # Verify project exists\n        return (\n            db_session.query(UserProject).filter(UserProject.id == project_id).first()\n            is not None\n        )\n\n    return (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .first()\n        is not None\n    )\n\n\ndef get_user_files_from_project(\n    project_id: int, user_id: UUID | None, db_session: Session\n) -> list[UserFile]:\n    # First check if the user owns the project\n    if not check_project_ownership(project_id, user_id, db_session):\n        return []\n\n    return (\n        db_session.query(UserFile)\n        .join(Project__UserFile)\n        .filter(Project__UserFile.project_id == project_id)\n        .all()\n    )\n\n\ndef get_project_instructions(db_session: Session, project_id: int | None) -> str | None:\n    \"\"\"Return the project's instruction text from the project, else None.\n\n    Safe helper that swallows DB errors and returns None on any failure.\n    \"\"\"\n    if not project_id:\n        return None\n    try:\n        project = (\n            db_session.query(UserProject)\n            .filter(UserProject.id == project_id)\n            .one_or_none()\n        )\n        if not project or not project.instructions:\n            return None\n        instructions = project.instructions.strip()\n        return instructions or None\n    except Exception:\n        return None\n\n\ndef get_project_token_count(\n    project_id: int | None,\n    user_id: UUID | None,\n    db_session: Session,\n) -> int:\n    \"\"\"Return sum of token_count for all user files in the given project.\n\n    If project_id is None, returns 0.\n    \"\"\"\n    if project_id is None:\n        return 0\n\n    total_tokens = (\n        db_session.query(func.coalesce(func.sum(UserFile.token_count), 0))\n        .filter(\n            UserFile.user_id == user_id,\n            UserFile.projects.any(id=project_id),\n        )\n        .scalar()\n        or 0\n    )\n\n    return int(total_tokens)\n"
  },
  {
    "path": "backend/onyx/db/pydantic_type.py",
    "content": "import json\nfrom typing import Any\nfrom typing import Optional\nfrom typing import Type\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.dialects.postgresql import JSONB\nfrom sqlalchemy.types import TypeDecorator\n\n\nclass PydanticType(TypeDecorator):\n    impl = JSONB\n\n    def __init__(\n        self, pydantic_model: Type[BaseModel], *args: Any, **kwargs: Any\n    ) -> None:\n        super().__init__(*args, **kwargs)\n        self.pydantic_model = pydantic_model\n\n    def process_bind_param(\n        self,\n        value: Optional[BaseModel],\n        dialect: Any,  # noqa: ARG002\n    ) -> Optional[dict]:\n        if value is not None:\n            return json.loads(value.json())\n        return None\n\n    def process_result_value(\n        self,\n        value: Optional[dict],\n        dialect: Any,  # noqa: ARG002\n    ) -> Optional[BaseModel]:\n        if value is not None:\n            return self.pydantic_model.parse_obj(value)\n        return None\n\n\nclass PydanticListType(TypeDecorator):\n    impl = JSONB\n\n    def __init__(\n        self, pydantic_model: Type[BaseModel], *args: Any, **kwargs: Any\n    ) -> None:\n        super().__init__(*args, **kwargs)\n        self.pydantic_model = pydantic_model\n\n    def process_bind_param(\n        self,\n        value: Optional[list[BaseModel]],\n        dialect: Any,  # noqa: ARG002\n    ) -> Optional[list[dict]]:\n        if value is not None:\n            return [json.loads(item.model_dump_json()) for item in value]\n        return None\n\n    def process_result_value(\n        self,\n        value: Optional[list[dict]],\n        dialect: Any,  # noqa: ARG002\n    ) -> Optional[list[BaseModel]]:\n        if value is not None:\n            return [self.pydantic_model.model_validate(item) for item in value]\n        return None\n"
  },
  {
    "path": "backend/onyx/db/relationships.py",
    "content": "from typing import List\n\nfrom sqlalchemy import or_\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.orm import Session\n\nimport onyx.db.document as dbdocument\nfrom onyx.db.models import KGEntity\nfrom onyx.db.models import KGEntityExtractionStaging\nfrom onyx.db.models import KGRelationship\nfrom onyx.db.models import KGRelationshipExtractionStaging\nfrom onyx.db.models import KGRelationshipType\nfrom onyx.db.models import KGRelationshipTypeExtractionStaging\nfrom onyx.db.models import KGStage\nfrom onyx.kg.utils.formatting_utils import extract_relationship_type_id\nfrom onyx.kg.utils.formatting_utils import format_relationship_id\nfrom onyx.kg.utils.formatting_utils import get_entity_type\nfrom onyx.kg.utils.formatting_utils import make_relationship_id\nfrom onyx.kg.utils.formatting_utils import make_relationship_type_id\nfrom onyx.kg.utils.formatting_utils import split_relationship_id\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef upsert_staging_relationship(\n    db_session: Session,\n    relationship_id_name: str,\n    source_document_id: str | None,\n    occurrences: int = 1,\n) -> KGRelationshipExtractionStaging:\n    \"\"\"\n    Add or update a new staging relationship to the database.\n\n    Args:\n        db_session: SQLAlchemy database session\n        relationship_id_name: The ID name of the relationship in format \"source__relationship__target\"\n        source_document_id: ID of the source document\n        occurrences: Number of times this relationship has been found\n    Returns:\n        The created or updated KGRelationshipExtractionStaging object\n\n    Raises:\n        sqlalchemy.exc.IntegrityError: If there's an error with the database operation\n    \"\"\"\n    # Generate a unique ID for the relationship\n    relationship_id_name = format_relationship_id(relationship_id_name)\n    (\n        source_entity_id_name,\n        relationship_string,\n        target_entity_id_name,\n    ) = split_relationship_id(relationship_id_name)\n\n    source_entity_type = get_entity_type(source_entity_id_name)\n    target_entity_type = get_entity_type(target_entity_id_name)\n    relationship_type = extract_relationship_type_id(relationship_id_name)\n\n    # Insert the new relationship\n    stmt = (\n        postgresql.insert(KGRelationshipExtractionStaging)\n        .values(\n            {\n                \"id_name\": relationship_id_name,\n                \"source_node\": source_entity_id_name,\n                \"target_node\": target_entity_id_name,\n                \"source_node_type\": source_entity_type,\n                \"target_node_type\": target_entity_type,\n                \"type\": relationship_string.lower(),\n                \"relationship_type_id_name\": relationship_type,\n                \"source_document\": source_document_id,\n                \"occurrences\": occurrences,\n            }\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\", \"source_document\"],\n            set_=dict(\n                occurrences=KGRelationshipExtractionStaging.occurrences + occurrences,\n            ),\n        )\n        .returning(KGRelationshipExtractionStaging)\n    )\n\n    result = db_session.execute(stmt).scalar()\n    if result is None:\n        raise RuntimeError(\n            f\"Failed to create or increment staging relationship with id_name: {relationship_id_name}\"\n        )\n\n    # Update the document's kg_stage if source_document is provided\n    if source_document_id is not None:\n        dbdocument.update_document_kg_info(\n            db_session,\n            document_id=source_document_id,\n            kg_stage=KGStage.EXTRACTED,\n        )\n    db_session.flush()  # Flush to get any DB errors early\n\n    return result\n\n\ndef upsert_relationship(\n    db_session: Session,\n    relationship_id_name: str,\n    source_document_id: str | None,\n    occurrences: int = 1,\n) -> KGRelationship:\n    \"\"\"\n    Upsert a new relationship directly to the database.\n\n    Args:\n        db_session: SQLAlchemy database session\n        relationship_id_name: The ID name of the relationship in format \"source__relationship__target\"\n        source_document_id: ID of the source document\n        occurrences: Number of times this relationship has been found\n    Returns:\n        The created or updated KGRelationship object\n\n    Raises:\n        sqlalchemy.exc.IntegrityError: If there's an error with the database operation\n    \"\"\"\n    # Generate a unique ID for the relationship\n    relationship_id_name = format_relationship_id(relationship_id_name)\n    (\n        source_entity_id_name,\n        relationship_string,\n        target_entity_id_name,\n    ) = split_relationship_id(relationship_id_name)\n\n    source_entity_type = get_entity_type(source_entity_id_name)\n    target_entity_type = get_entity_type(target_entity_id_name)\n    relationship_type = extract_relationship_type_id(relationship_id_name)\n\n    # Insert the new relationship\n    stmt = (\n        postgresql.insert(KGRelationship)\n        .values(\n            {\n                \"id_name\": relationship_id_name,\n                \"source_node\": source_entity_id_name,\n                \"target_node\": target_entity_id_name,\n                \"source_node_type\": source_entity_type,\n                \"target_node_type\": target_entity_type,\n                \"type\": relationship_string.lower(),\n                \"relationship_type_id_name\": relationship_type,\n                \"source_document\": source_document_id,\n                \"occurrences\": occurrences,\n            }\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\", \"source_document\"],\n            set_=dict(\n                occurrences=KGRelationship.occurrences + occurrences,\n            ),\n        )\n        .returning(KGRelationship)\n    )\n\n    new_relationship = db_session.execute(stmt).scalar()\n    if new_relationship is None:\n        raise RuntimeError(\n            f\"Failed to upsert relationship with id_name: {relationship_id_name}\"\n        )\n    db_session.flush()\n    return new_relationship\n\n\ndef transfer_relationship(\n    db_session: Session,\n    relationship: KGRelationshipExtractionStaging,\n    entity_translations: dict[str, str],\n) -> KGRelationship:\n    \"\"\"\n    Transfer a relationship from the staging table to the normalized table.\n    \"\"\"\n    # Translate the source and target nodes\n    source_node = entity_translations[relationship.source_node]\n    target_node = entity_translations[relationship.target_node]\n    relationship_id_name = make_relationship_id(\n        source_node, relationship.type, target_node\n    )\n\n    # Create the transferred relationship\n    stmt = (\n        pg_insert(KGRelationship)\n        .values(\n            id_name=relationship_id_name,\n            source_node=source_node,\n            target_node=target_node,\n            source_node_type=relationship.source_node_type,\n            target_node_type=relationship.target_node_type,\n            type=relationship.type,\n            relationship_type_id_name=relationship.relationship_type_id_name,\n            source_document=relationship.source_document,\n            occurrences=relationship.occurrences,\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\", \"source_document\"],\n            set_=dict(\n                occurrences=KGRelationship.occurrences + relationship.occurrences,\n            ),\n        )\n        .returning(KGRelationship)\n    )\n\n    new_relationship = db_session.execute(stmt).scalar()\n    if new_relationship is None:\n        raise RuntimeError(\n            f\"Failed to transfer relationship with id_name: {relationship.id_name}\"\n        )\n\n    # Update transferred\n    db_session.query(KGRelationshipExtractionStaging).filter(\n        KGRelationshipExtractionStaging.id_name == relationship.id_name,\n        KGRelationshipExtractionStaging.source_document == relationship.source_document,\n    ).update({\"transferred\": True})\n    db_session.flush()\n\n    return new_relationship\n\n\ndef upsert_staging_relationship_type(\n    db_session: Session,\n    source_entity_type: str,\n    relationship_type: str,\n    target_entity_type: str,\n    definition: bool = False,\n    extraction_count: int = 1,\n) -> KGRelationshipTypeExtractionStaging:\n    \"\"\"\n    Add a new relationship type to the database.\n\n    Args:\n        db_session: SQLAlchemy session\n        source_entity_type: Type of the source entity\n        relationship_type: Type of relationship\n        target_entity_type: Type of the target entity\n        definition: Whether this relationship type represents a definition (default False)\n\n    Returns:\n        The created KGRelationshipTypeExtractionStaging object\n    \"\"\"\n\n    id_name = make_relationship_type_id(\n        source_entity_type, relationship_type, target_entity_type\n    )\n\n    # Create new relationship type\n    stmt = (\n        postgresql.insert(KGRelationshipTypeExtractionStaging)\n        .values(\n            {\n                \"id_name\": id_name,\n                \"name\": relationship_type,\n                \"source_entity_type_id_name\": source_entity_type.upper(),\n                \"target_entity_type_id_name\": target_entity_type.upper(),\n                \"definition\": definition,\n                \"occurrences\": extraction_count,\n                \"type\": relationship_type,  # Using the relationship_type as the type\n                \"active\": True,  # Setting as active by default\n            }\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\"],\n            set_=dict(\n                occurrences=KGRelationshipTypeExtractionStaging.occurrences\n                + extraction_count,\n            ),\n        )\n        .returning(KGRelationshipTypeExtractionStaging)\n    )\n\n    result = db_session.execute(stmt).scalar()\n    if result is None:\n        raise RuntimeError(\n            f\"Failed to create or increment staging relationship type with id_name: {id_name}\"\n        )\n    db_session.flush()  # Flush to get any DB errors early\n\n    return result\n\n\ndef upsert_relationship_type(\n    db_session: Session,\n    source_entity_type: str,\n    relationship_type: str,\n    target_entity_type: str,\n    definition: bool = False,\n    extraction_count: int = 1,\n) -> KGRelationshipType:\n    \"\"\"\n    Upsert a new relationship type directly to the database.\n\n    Args:\n        db_session: SQLAlchemy session\n        source_entity_type: Type of the source entity\n        relationship_type: Type of relationship\n        target_entity_type: Type of the target entity\n        definition: Whether this relationship type represents a definition (default False)\n\n    Returns:\n        The created KGRelationshipType object\n    \"\"\"\n\n    id_name = make_relationship_type_id(\n        source_entity_type, relationship_type, target_entity_type\n    )\n\n    # Create new relationship type\n    stmt = (\n        postgresql.insert(KGRelationshipType)\n        .values(\n            {\n                \"id_name\": id_name,\n                \"name\": relationship_type,\n                \"source_entity_type_id_name\": source_entity_type.upper(),\n                \"target_entity_type_id_name\": target_entity_type.upper(),\n                \"definition\": definition,\n                \"occurrences\": extraction_count,\n                \"type\": relationship_type,  # Using the relationship_type as the type\n                \"active\": True,  # Setting as active by default\n            }\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\"],\n            set_=dict(\n                occurrences=KGRelationshipType.occurrences + extraction_count,\n            ),\n        )\n        .returning(KGRelationshipType)\n    )\n\n    new_relationship_type = db_session.execute(stmt).scalar()\n    if new_relationship_type is None:\n        raise RuntimeError(\n            f\"Failed to upsert relationship type with id_name: {id_name}\"\n        )\n    db_session.flush()\n    return new_relationship_type\n\n\ndef transfer_relationship_type(\n    db_session: Session,\n    relationship_type: KGRelationshipTypeExtractionStaging,\n) -> KGRelationshipType:\n    \"\"\"\n    Transfer a relationship type from the staging table to the normalized table.\n    \"\"\"\n    stmt = (\n        pg_insert(KGRelationshipType)\n        .values(\n            id_name=relationship_type.id_name,\n            name=relationship_type.name,\n            source_entity_type_id_name=relationship_type.source_entity_type_id_name,\n            target_entity_type_id_name=relationship_type.target_entity_type_id_name,\n            definition=relationship_type.definition,\n            occurrences=relationship_type.occurrences,\n            type=relationship_type.type,\n            active=relationship_type.active,\n        )\n        .on_conflict_do_update(\n            index_elements=[\"id_name\"],\n            set_=dict(\n                occurrences=KGRelationshipType.occurrences\n                + relationship_type.occurrences,\n            ),\n        )\n        .returning(KGRelationshipType)\n    )\n\n    new_relationship_type = db_session.execute(stmt).scalar()\n    if new_relationship_type is None:\n        raise RuntimeError(\n            f\"Failed to transfer relationship type with id_name: {relationship_type.id_name}\"\n        )\n\n    # Update transferred\n    db_session.query(KGRelationshipTypeExtractionStaging).filter(\n        KGRelationshipTypeExtractionStaging.id_name == relationship_type.id_name\n    ).update({\"transferred\": True})\n    db_session.flush()\n\n    return new_relationship_type\n\n\ndef delete_relationships_by_id_names(\n    db_session: Session, id_names: list[str], kg_stage: KGStage\n) -> int:\n    \"\"\"\n    Delete relationships from the database based on a list of id_names.\n\n    Args:\n        db_session: SQLAlchemy database session\n        id_names: List of relationship id_names to delete\n\n    Returns:\n        Number of relationships deleted\n\n    Raises:\n        sqlalchemy.exc.SQLAlchemyError: If there's an error during deletion\n    \"\"\"\n\n    deleted_count = 0\n\n    if kg_stage == KGStage.EXTRACTED:\n        deleted_count = (\n            db_session.query(KGRelationshipExtractionStaging)\n            .filter(KGRelationshipExtractionStaging.id_name.in_(id_names))\n            .delete(synchronize_session=False)\n        )\n    elif kg_stage == KGStage.NORMALIZED:\n        deleted_count = (\n            db_session.query(KGRelationship)\n            .filter(KGRelationship.id_name.in_(id_names))\n            .delete(synchronize_session=False)\n        )\n\n    db_session.flush()  # Flush to ensure deletion is processed\n    return deleted_count\n\n\ndef delete_relationship_types_by_id_names(\n    db_session: Session, id_names: list[str], kg_stage: KGStage\n) -> int:\n    \"\"\"\n    Delete relationship types from the database based on a list of id_names.\n\n    Args:\n        db_session: SQLAlchemy database session\n        id_names: List of relationship type id_names to delete\n\n    Returns:\n        Number of relationship types deleted\n\n    Raises:\n        sqlalchemy.exc.SQLAlchemyError: If there's an error during deletion\n    \"\"\"\n    deleted_count = 0\n\n    if kg_stage == KGStage.EXTRACTED:\n        deleted_count = (\n            db_session.query(KGRelationshipTypeExtractionStaging)\n            .filter(KGRelationshipTypeExtractionStaging.id_name.in_(id_names))\n            .delete(synchronize_session=False)\n        )\n    elif kg_stage == KGStage.NORMALIZED:\n        deleted_count = (\n            db_session.query(KGRelationshipType)\n            .filter(KGRelationshipType.id_name.in_(id_names))\n            .delete(synchronize_session=False)\n        )\n\n    db_session.flush()  # Flush to ensure deletion is processed\n    return deleted_count\n\n\ndef get_relationships_for_entity_type_pairs(\n    db_session: Session, entity_type_pairs: list[tuple[str, str]]\n) -> list[\"KGRelationshipType\"]:\n    \"\"\"\n    Get relationship types from the database based on a list of entity type pairs.\n\n    Args:\n        db_session: SQLAlchemy database session\n        entity_type_pairs: List of tuples where each tuple contains (source_entity_type, target_entity_type)\n\n    Returns:\n        List of KGRelationshipType objects where source and target types match the provided pairs\n    \"\"\"\n\n    conditions = [\n        (\n            (KGRelationshipType.source_entity_type_id_name == source_type)\n            & (KGRelationshipType.target_entity_type_id_name == target_type)\n        )\n        for source_type, target_type in entity_type_pairs\n    ]\n\n    return db_session.query(KGRelationshipType).filter(or_(*conditions)).all()\n\n\ndef get_allowed_relationship_type_pairs(\n    db_session: Session, entities: list[str]\n) -> list[str]:\n    \"\"\"\n    Get the allowed relationship pairs for the given entities.\n\n    Args:\n        db_session: SQLAlchemy database session\n        entities: List of entity type ID names to filter by\n\n    Returns:\n        List of id_names from KGRelationshipType where source or target entity types\n        are in the provided entities list. We also filter out for now the catch-all\n        relationship types 'VENDOR__<relationship>__<target entity type>'\n    \"\"\"\n\n    entity_types = list({get_entity_type(entity) for entity in entities})\n\n    return [\n        row[0]\n        for row in (\n            db_session.query(KGRelationshipType.id_name)\n            .filter(\n                or_(\n                    KGRelationshipType.source_entity_type_id_name.in_(entity_types),\n                    KGRelationshipType.target_entity_type_id_name.in_(entity_types),\n                )\n            )\n            .filter(~KGRelationshipType.source_entity_type_id_name.like(\"VENDOR::%\"))\n            .distinct()\n            .all()\n        )\n    ]\n\n\ndef get_relationships_of_entity(db_session: Session, entity_id: str) -> List[str]:\n    \"\"\"Get all relationship ID names where the given entity is either the source or target node.\n\n    Args:\n        db_session: SQLAlchemy session\n        entity_id: ID of the entity to find relationships for\n\n    Returns:\n        List of relationship ID names where the entity is either source or target\n    \"\"\"\n    return [\n        row[0]\n        for row in (\n            db_session.query(KGRelationship.id_name)\n            .filter(\n                or_(\n                    KGRelationship.source_node == entity_id,\n                    KGRelationship.target_node == entity_id,\n                )\n            )\n            .all()\n        )\n    ]\n\n\ndef get_relationship_types_of_entity_types(\n    db_session: Session, entity_types_id: str\n) -> List[str]:\n    \"\"\"Get all relationship ID names where the given entity is either the source or target node.\n\n    Args:\n        db_session: SQLAlchemy session\n        entity_types_id: ID of the entity to find relationships for\n\n    Returns:\n        List of relationship ID names where the entity is either source or target\n    \"\"\"\n\n    if entity_types_id.endswith(\":*\"):\n        entity_types_id = entity_types_id[:-2]\n\n    return [\n        row[0]\n        for row in (\n            db_session.query(KGRelationshipType.id_name)\n            .filter(\n                or_(\n                    KGRelationshipType.source_entity_type_id_name == entity_types_id,\n                    KGRelationshipType.target_entity_type_id_name == entity_types_id,\n                )\n            )\n            .all()\n        )\n    ]\n\n\ndef delete_document_references_from_kg(db_session: Session, document_id: str) -> None:\n    # Delete relationships from normalized stage\n    db_session.query(KGRelationship).filter(\n        KGRelationship.source_document == document_id\n    ).delete(synchronize_session=False)\n\n    # Delete relationships from extraction staging\n    db_session.query(KGRelationshipExtractionStaging).filter(\n        KGRelationshipExtractionStaging.source_document == document_id\n    ).delete(synchronize_session=False)\n\n    # Delete entities from normalized stage\n    db_session.query(KGEntity).filter(KGEntity.document_id == document_id).delete(\n        synchronize_session=False\n    )\n\n    # Delete entities from extraction staging\n    db_session.query(KGEntityExtractionStaging).filter(\n        KGEntityExtractionStaging.document_id == document_id\n    ).delete(synchronize_session=False)\n\n    db_session.flush()\n\n\ndef delete_from_kg_relationships_extraction_staging__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"Delete relationships from the extraction staging table.\"\"\"\n    db_session.query(KGRelationshipExtractionStaging).filter(\n        KGRelationshipExtractionStaging.source_document.in_(document_ids)\n    ).delete(synchronize_session=False)\n\n\ndef delete_from_kg_relationships__no_commit(\n    db_session: Session, document_ids: list[str]\n) -> None:\n    \"\"\"Delete relationships from the normalized table.\"\"\"\n    db_session.query(KGRelationship).filter(\n        KGRelationship.source_document.in_(document_ids)\n    ).delete(synchronize_session=False)\n"
  },
  {
    "path": "backend/onyx/db/release_notes.py",
    "content": "\"\"\"Database functions for release notes functionality.\"\"\"\n\nfrom urllib.parse import urlencode\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import INSTANCE_TYPE\nfrom onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN\nfrom onyx.configs.constants import NotificationType\nfrom onyx.configs.constants import ONYX_UTM_SOURCE\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import User\nfrom onyx.db.notification import batch_create_notifications\nfrom onyx.server.features.release_notes.constants import DOCS_CHANGELOG_BASE_URL\nfrom onyx.server.features.release_notes.models import ReleaseNoteEntry\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef create_release_notifications_for_versions(\n    db_session: Session,\n    release_note_entries: list[ReleaseNoteEntry],\n) -> int:\n    \"\"\"\n    Create release notes notifications for each release note entry.\n    Uses batch_create_notifications for efficient bulk insertion.\n\n    If a user already has a notification for a specific version (dismissed or not),\n    no new one is created (handled by unique constraint on additional_data).\n\n    Note: Entries should already be filtered by app_version before calling this\n    function. The filtering happens in _parse_mdx_to_release_note_entries().\n\n    Args:\n        db_session: Database session\n        release_note_entries: List of release note entries to notify about (pre-filtered)\n\n    Returns:\n        Total number of notifications created across all versions.\n    \"\"\"\n    if not release_note_entries:\n        logger.debug(\"No release note entries to notify about\")\n        return 0\n\n    # Get active users and exclude API key users\n    user_ids = list(\n        db_session.scalars(\n            select(User.id).where(  # type: ignore\n                User.is_active == True,  # noqa: E712\n                User.account_type.notin_([AccountType.BOT, AccountType.EXT_PERM_USER]),\n                User.email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN).is_(False),  # type: ignore[attr-defined]\n            )\n        ).all()\n    )\n\n    total_created = 0\n    for entry in release_note_entries:\n        # Convert version to anchor format for external docs links\n        # v2.7.0 -> v2-7-0\n        version_anchor = entry.version.replace(\".\", \"-\")\n\n        # Build UTM parameters for tracking\n        utm_params = {\n            \"utm_source\": ONYX_UTM_SOURCE,\n            \"utm_medium\": \"notification\",\n            \"utm_campaign\": INSTANCE_TYPE,\n            \"utm_content\": f\"release_notes-{entry.version}\",\n        }\n\n        link = f\"{DOCS_CHANGELOG_BASE_URL}#{version_anchor}?{urlencode(utm_params)}\"\n\n        additional_data: dict[str, str] = {\n            \"version\": entry.version,\n            \"link\": link,\n        }\n\n        created_count = batch_create_notifications(\n            user_ids,\n            NotificationType.RELEASE_NOTES,\n            db_session,\n            title=entry.title,\n            description=f\"Check out what's new in {entry.version}\",\n            additional_data=additional_data,\n        )\n        total_created += created_count\n\n        logger.debug(\n            f\"Created {created_count} release notes notifications (version {entry.version}, {len(user_ids)} eligible users)\"\n        )\n\n    return total_created\n"
  },
  {
    "path": "backend/onyx/db/rotate_encryption_key.py",
    "content": "\"\"\"Rotate encryption key for all encrypted columns.\n\nDynamically discovers all columns using EncryptedString / EncryptedJson,\ndecrypts each value with the old key, and re-encrypts with the current\nENCRYPTION_KEY_SECRET.\n\nThe operation is idempotent: rows already encrypted with the current key\nare skipped. Commits are made in batches so a crash mid-rotation can be\nsafely resumed by re-running.\n\"\"\"\n\nimport json\nfrom typing import Any\n\nfrom sqlalchemy import LargeBinary\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import ENCRYPTION_KEY_SECRET\nfrom onyx.db.models import Base\nfrom onyx.db.models import EncryptedJson\nfrom onyx.db.models import EncryptedString\nfrom onyx.utils.encryption import decrypt_bytes_to_string\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import global_version\n\nlogger = setup_logger()\n\n_BATCH_SIZE = 500\n\n\ndef _can_decrypt_with_current_key(data: bytes) -> bool:\n    \"\"\"Check if data is already encrypted with the current key.\n\n    Passes the key explicitly so the fallback-to-raw-decode path in\n    _decrypt_bytes is NOT triggered — a clean success/failure signal.\n    \"\"\"\n    try:\n        decrypt_bytes_to_string(data, key=ENCRYPTION_KEY_SECRET)\n        return True\n    except Exception:\n        return False\n\n\ndef _discover_encrypted_columns() -> list[tuple[type, str, list[str], bool]]:\n    \"\"\"Walk all ORM models and find columns using EncryptedString/EncryptedJson.\n\n    Returns list of (ModelClass, column_attr_name, [pk_attr_names], is_json).\n    \"\"\"\n    results: list[tuple[type, str, list[str], bool]] = []\n\n    for mapper in Base.registry.mappers:\n        model_cls = mapper.class_\n        pk_names = [col.key for col in mapper.primary_key]\n\n        for prop in mapper.column_attrs:\n            for col in prop.columns:\n                if isinstance(col.type, EncryptedJson):\n                    results.append((model_cls, prop.key, pk_names, True))\n                elif isinstance(col.type, EncryptedString):\n                    results.append((model_cls, prop.key, pk_names, False))\n\n    return results\n\n\ndef rotate_encryption_key(\n    db_session: Session,\n    old_key: str | None,\n    dry_run: bool = False,\n) -> dict[str, int]:\n    \"\"\"Decrypt all encrypted columns with old_key and re-encrypt with the current key.\n\n    Args:\n        db_session: Active database session.\n        old_key: The previous encryption key. Pass None or \"\" if values were\n                 not previously encrypted with a key.\n        dry_run: If True, count rows that need rotation without modifying data.\n\n    Returns:\n        Dict of \"table.column\" -> number of rows re-encrypted (or would be).\n\n    Commits every _BATCH_SIZE rows so that locks are held briefly and progress\n    is preserved on crash. Already-rotated rows are detected and skipped,\n    making the operation safe to re-run.\n    \"\"\"\n    if not global_version.is_ee_version():\n        raise RuntimeError(\"EE mode is not enabled — rotation requires EE encryption.\")\n\n    if not ENCRYPTION_KEY_SECRET:\n        raise RuntimeError(\n            \"ENCRYPTION_KEY_SECRET is not set — cannot rotate. Set the target encryption key in the environment before running.\"\n        )\n\n    encrypted_columns = _discover_encrypted_columns()\n    totals: dict[str, int] = {}\n\n    for model_cls, col_name, pk_names, is_json in encrypted_columns:\n        table_name: str = model_cls.__tablename__  # type: ignore[attr-defined]\n        col_attr = getattr(model_cls, col_name)\n        pk_attrs = [getattr(model_cls, pk) for pk in pk_names]\n\n        # Read raw bytes directly, bypassing the TypeDecorator\n        raw_col = col_attr.property.columns[0]\n\n        stmt = select(*pk_attrs, raw_col.cast(LargeBinary)).where(col_attr.is_not(None))\n        rows = db_session.execute(stmt).all()\n\n        reencrypted = 0\n        batch_pending = 0\n        for row in rows:\n            raw_bytes: bytes | None = row[-1]\n            if raw_bytes is None:\n                continue\n\n            if _can_decrypt_with_current_key(raw_bytes):\n                continue\n\n            try:\n                if not old_key:\n                    decrypted_str = raw_bytes.decode(\"utf-8\")\n                else:\n                    decrypted_str = decrypt_bytes_to_string(raw_bytes, key=old_key)\n\n                # For EncryptedJson, parse back to dict so the TypeDecorator\n                # can json.dumps() it cleanly (avoids double-encoding).\n                value: Any = json.loads(decrypted_str) if is_json else decrypted_str\n            except (ValueError, UnicodeDecodeError) as e:\n                pk_vals = [row[i] for i in range(len(pk_names))]\n                logger.warning(\n                    f\"Could not decrypt/parse {table_name}.{col_name} row {pk_vals} — skipping: {e}\"\n                )\n                continue\n\n            if not dry_run:\n                pk_filters = [pk_attr == row[i] for i, pk_attr in enumerate(pk_attrs)]\n                update_stmt = (\n                    update(model_cls).where(*pk_filters).values({col_name: value})\n                )\n                db_session.execute(update_stmt)\n                batch_pending += 1\n\n                if batch_pending >= _BATCH_SIZE:\n                    db_session.commit()\n                    batch_pending = 0\n            reencrypted += 1\n\n        # Flush remaining rows in this column\n        if batch_pending > 0:\n            db_session.commit()\n\n        if reencrypted > 0:\n            totals[f\"{table_name}.{col_name}\"] = reencrypted\n            logger.info(\n                f\"{'[DRY RUN] Would re-encrypt' if dry_run else 'Re-encrypted'} {reencrypted} value(s) in {table_name}.{col_name}\"\n            )\n\n    return totals\n"
  },
  {
    "path": "backend/onyx/db/saml.py",
    "content": "import datetime\nfrom typing import cast\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.ext.asyncio import AsyncSession\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS\nfrom onyx.db.models import SamlAccount\n\n\ndef upsert_saml_account(\n    user_id: UUID,\n    cookie: str,\n    db_session: Session,\n    expiration_offset: int = SESSION_EXPIRE_TIME_SECONDS,\n) -> datetime.datetime:\n    expires_at = func.now() + datetime.timedelta(seconds=expiration_offset)\n\n    existing_saml_acc = (\n        db_session.query(SamlAccount)\n        .filter(SamlAccount.user_id == user_id)\n        .one_or_none()\n    )\n\n    if existing_saml_acc:\n        existing_saml_acc.encrypted_cookie = cookie\n        existing_saml_acc.expires_at = cast(datetime.datetime, expires_at)\n        existing_saml_acc.updated_at = func.now()\n        saml_acc = existing_saml_acc\n    else:\n        saml_acc = SamlAccount(\n            user_id=user_id,\n            encrypted_cookie=cookie,\n            expires_at=expires_at,\n        )\n        db_session.add(saml_acc)\n\n    db_session.commit()\n\n    return saml_acc.expires_at\n\n\nasync def get_saml_account(\n    cookie: str, async_db_session: AsyncSession\n) -> SamlAccount | None:\n    \"\"\"NOTE: this is async, since it's used during auth\n    (which is necessarily async due to FastAPI Users)\"\"\"\n    stmt = (\n        select(SamlAccount)\n        .options(selectinload(SamlAccount.user))  # Use selectinload for collections\n        .where(\n            and_(\n                SamlAccount.encrypted_cookie == cookie,\n                SamlAccount.expires_at > func.now(),\n            )\n        )\n    )\n\n    result = await async_db_session.execute(stmt)\n    return result.scalars().unique().one_or_none()\n\n\nasync def expire_saml_account(\n    saml_account: SamlAccount, async_db_session: AsyncSession\n) -> None:\n    saml_account.expires_at = func.now()\n    await async_db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/search_settings.py",
    "content": "from sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL\nfrom onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL\nfrom onyx.context.search.models import SavedSearchSettings\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.llm import fetch_embedding_provider\nfrom onyx.db.models import CloudEmbeddingProvider\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.models import SearchSettings\nfrom onyx.server.manage.embedding.models import (\n    CloudEmbeddingProvider as ServerCloudEmbeddingProvider,\n)\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import PRESERVED_SEARCH_FIELDS\nfrom shared_configs.enums import EmbeddingProvider\n\n\nlogger = setup_logger()\n\n\nclass ActiveSearchSettings:\n    primary: SearchSettings\n    secondary: SearchSettings | None\n\n    def __init__(\n        self, primary: SearchSettings, secondary: SearchSettings | None\n    ) -> None:\n        self.primary = primary\n        self.secondary = secondary\n\n\ndef create_search_settings(\n    search_settings: SavedSearchSettings,\n    db_session: Session,\n    status: IndexModelStatus = IndexModelStatus.FUTURE,\n) -> SearchSettings:\n    embedding_model = SearchSettings(\n        model_name=search_settings.model_name,\n        model_dim=search_settings.model_dim,\n        normalize=search_settings.normalize,\n        query_prefix=search_settings.query_prefix,\n        passage_prefix=search_settings.passage_prefix,\n        status=status,\n        index_name=search_settings.index_name,\n        provider_type=search_settings.provider_type,\n        multipass_indexing=search_settings.multipass_indexing,\n        embedding_precision=search_settings.embedding_precision,\n        reduced_dimension=search_settings.reduced_dimension,\n        enable_contextual_rag=search_settings.enable_contextual_rag,\n        contextual_rag_llm_name=search_settings.contextual_rag_llm_name,\n        contextual_rag_llm_provider=search_settings.contextual_rag_llm_provider,\n        switchover_type=search_settings.switchover_type,\n    )\n\n    db_session.add(embedding_model)\n    db_session.commit()\n\n    return embedding_model\n\n\ndef get_embedding_provider_from_provider_type(\n    db_session: Session, provider_type: EmbeddingProvider\n) -> CloudEmbeddingProvider | None:\n    query = select(CloudEmbeddingProvider).where(\n        CloudEmbeddingProvider.provider_type == provider_type\n    )\n    provider = db_session.execute(query).scalars().first()\n    return provider if provider else None\n\n\ndef get_current_db_embedding_provider(\n    db_session: Session,\n) -> ServerCloudEmbeddingProvider | None:\n    search_settings = get_current_search_settings(db_session=db_session)\n\n    if search_settings.provider_type is None:\n        return None\n\n    embedding_provider = fetch_embedding_provider(\n        db_session=db_session,\n        provider_type=search_settings.provider_type,\n    )\n    if embedding_provider is None:\n        raise RuntimeError(\"No embedding provider exists for this model.\")\n\n    current_embedding_provider = ServerCloudEmbeddingProvider.from_request(\n        cloud_provider_model=embedding_provider\n    )\n\n    return current_embedding_provider\n\n\ndef delete_search_settings(db_session: Session, search_settings_id: int) -> None:\n    current_settings = get_current_search_settings(db_session)\n\n    if current_settings.id == search_settings_id:\n        raise ValueError(\"Cannot delete currently active search settings\")\n\n    # First, delete associated index attempts\n    index_attempts_query = delete(IndexAttempt).where(\n        IndexAttempt.search_settings_id == search_settings_id\n    )\n    db_session.execute(index_attempts_query)\n\n    # Then, delete the search settings\n    search_settings_query = delete(SearchSettings).where(\n        and_(\n            SearchSettings.id == search_settings_id,\n            SearchSettings.status != IndexModelStatus.PRESENT,\n        )\n    )\n\n    db_session.execute(search_settings_query)\n    db_session.commit()\n\n\ndef get_current_search_settings(db_session: Session) -> SearchSettings:\n    query = (\n        select(SearchSettings)\n        .where(SearchSettings.status == IndexModelStatus.PRESENT)\n        .order_by(SearchSettings.id.desc())\n    )\n    result = db_session.execute(query)\n    latest_settings = result.scalars().first()\n\n    if not latest_settings:\n        raise RuntimeError(\"No search settings specified; DB is not in a valid state.\")\n    return latest_settings\n\n\ndef get_secondary_search_settings(db_session: Session) -> SearchSettings | None:\n    query = (\n        select(SearchSettings)\n        .where(SearchSettings.status == IndexModelStatus.FUTURE)\n        .order_by(SearchSettings.id.desc())\n    )\n    result = db_session.execute(query)\n    latest_settings = result.scalars().first()\n\n    return latest_settings\n\n\ndef get_active_search_settings(db_session: Session) -> ActiveSearchSettings:\n    \"\"\"Returns active search settings. Secondary search settings may be None.\"\"\"\n\n    # Get the primary and secondary search settings\n    primary_search_settings = get_current_search_settings(db_session)\n    secondary_search_settings = get_secondary_search_settings(db_session)\n    return ActiveSearchSettings(\n        primary=primary_search_settings, secondary=secondary_search_settings\n    )\n\n\ndef get_active_search_settings_list(db_session: Session) -> list[SearchSettings]:\n    \"\"\"Returns active search settings as a list. Primary settings are the first element,\n    and if secondary search settings exist, they will be the second element.\"\"\"\n\n    search_settings_list: list[SearchSettings] = []\n\n    active_search_settings = get_active_search_settings(db_session)\n    search_settings_list.append(active_search_settings.primary)\n    if active_search_settings.secondary:\n        search_settings_list.append(active_search_settings.secondary)\n\n    return search_settings_list\n\n\ndef get_all_search_settings(db_session: Session) -> list[SearchSettings]:\n    query = select(SearchSettings).order_by(SearchSettings.id.desc())\n    result = db_session.execute(query)\n    all_settings = result.scalars().all()\n    return list(all_settings)\n\n\ndef get_multilingual_expansion(db_session: Session | None = None) -> list[str]:\n    if db_session is None:\n        with get_session_with_current_tenant() as db_session:\n            search_settings = get_current_search_settings(db_session)\n    else:\n        search_settings = get_current_search_settings(db_session)\n    if not search_settings:\n        return []\n    return search_settings.multilingual_expansion\n\n\ndef update_search_settings(\n    current_settings: SearchSettings,\n    updated_settings: SavedSearchSettings,\n    preserved_fields: list[str],\n) -> None:\n    for field, value in updated_settings.dict().items():\n        if field not in preserved_fields:\n            setattr(current_settings, field, value)\n\n\ndef update_current_search_settings(\n    db_session: Session,\n    search_settings: SavedSearchSettings,\n    preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS,\n) -> None:\n    current_settings = get_current_search_settings(db_session)\n    if not current_settings:\n        logger.warning(\"No current search settings found to update\")\n        return\n\n    update_search_settings(current_settings, search_settings, preserved_fields)\n    db_session.commit()\n    logger.info(\"Current search settings updated successfully\")\n\n\ndef update_secondary_search_settings(\n    db_session: Session,\n    search_settings: SavedSearchSettings,\n    preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS,\n) -> None:\n    secondary_settings = get_secondary_search_settings(db_session)\n    if not secondary_settings:\n        logger.warning(\"No secondary search settings found to update\")\n        return\n\n    preserved_fields = PRESERVED_SEARCH_FIELDS\n    update_search_settings(secondary_settings, search_settings, preserved_fields)\n\n    db_session.commit()\n    logger.info(\"Secondary search settings updated successfully\")\n\n\ndef update_search_settings_status(\n    search_settings: SearchSettings, new_status: IndexModelStatus, db_session: Session\n) -> None:\n    search_settings.status = new_status\n    db_session.commit()\n\n\ndef user_has_overridden_embedding_model() -> bool:\n    return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL\n"
  },
  {
    "path": "backend/onyx/db/seeding/chat_history_seeding.py",
    "content": "import random\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom logging import getLogger\nfrom uuid import UUID\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.chat import create_new_chat_message\nfrom onyx.db.chat import get_or_create_root_message\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import ChatSession\n\nlogger = getLogger(__name__)\n\n\ndef seed_chat_history(\n    num_sessions: int,\n    num_messages: int,\n    days: int,\n    user_id: UUID | None = None,\n    persona_id: int | None = None,\n) -> None:\n    \"\"\"Utility function to seed chat history for testing.\n\n    num_sessions: the number of sessions to seed\n    num_messages: the number of messages to seed per sessions\n    days: the number of days looking backwards from the current time over which to randomize\n    the times.\n    user_id: optional user to associate with sessions\n    persona_id: optional persona/assistant to associate with sessions\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        logger.info(f\"Seeding {num_sessions} sessions.\")\n        for y in range(0, num_sessions):\n            create_chat_session(db_session, f\"pytest_session_{y}\", user_id, persona_id)\n\n        # randomize all session times\n        logger.info(f\"Seeding {num_messages} messages per session.\")\n        rows = db_session.query(ChatSession).all()\n        for x in range(0, len(rows)):\n            if x % 1024 == 0:\n                logger.info(f\"Seeded messages for {x} sessions so far.\")\n\n            row = rows[x]\n            row.time_created = datetime.utcnow() - timedelta(\n                days=random.randint(0, days)\n            )\n            row.time_updated = row.time_created + timedelta(\n                minutes=random.randint(0, 10)\n            )\n\n            root_message = get_or_create_root_message(row.id, db_session)\n\n            current_message_type = MessageType.USER\n            parent_message = root_message\n            for x in range(0, num_messages):\n                if current_message_type == MessageType.USER:\n                    msg = f\"pytest_message_user_{x}\"\n                else:\n                    msg = f\"pytest_message_assistant_{x}\"\n\n                chat_message = create_new_chat_message(\n                    chat_session_id=row.id,\n                    parent_message=parent_message,\n                    message=msg,\n                    token_count=0,\n                    message_type=current_message_type,\n                    commit=False,\n                    db_session=db_session,\n                )\n\n                chat_message.time_sent = row.time_created + timedelta(\n                    minutes=random.randint(0, 10)\n                )\n\n                db_session.commit()\n\n                current_message_type = (\n                    MessageType.ASSISTANT\n                    if current_message_type == MessageType.USER\n                    else MessageType.USER\n                )\n                parent_message = chat_message\n\n        db_session.commit()\n\n        logger.info(f\"Seeded messages for {len(rows)} sessions. Finished.\")\n"
  },
  {
    "path": "backend/onyx/db/slack_bot.py",
    "content": "from collections.abc import Sequence\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import SlackBot\n\n\ndef insert_slack_bot(\n    db_session: Session,\n    name: str,\n    enabled: bool,\n    bot_token: str,\n    app_token: str,\n    user_token: str | None = None,\n) -> SlackBot:\n    slack_bot = SlackBot(\n        name=name,\n        enabled=enabled,\n        bot_token=bot_token,\n        app_token=app_token,\n        user_token=user_token,\n    )\n    db_session.add(slack_bot)\n    db_session.commit()\n\n    return slack_bot\n\n\ndef update_slack_bot(\n    db_session: Session,\n    slack_bot_id: int,\n    name: str,\n    enabled: bool,\n    bot_token: str,\n    app_token: str,\n    user_token: str | None = None,\n) -> SlackBot:\n    slack_bot = db_session.scalar(select(SlackBot).where(SlackBot.id == slack_bot_id))\n    if slack_bot is None:\n        raise ValueError(f\"Unable to find Slack Bot with ID {slack_bot_id}\")\n\n    # update the app\n    slack_bot.name = name\n    slack_bot.enabled = enabled\n    slack_bot.bot_token = bot_token  # type: ignore[assignment]\n    slack_bot.app_token = app_token  # type: ignore[assignment]\n    slack_bot.user_token = user_token  # type: ignore[assignment]\n\n    db_session.commit()\n\n    return slack_bot\n\n\ndef fetch_slack_bot(\n    db_session: Session,\n    slack_bot_id: int,\n) -> SlackBot:\n    slack_bot = db_session.scalar(select(SlackBot).where(SlackBot.id == slack_bot_id))\n    if slack_bot is None:\n        raise ValueError(f\"Unable to find Slack Bot with ID {slack_bot_id}\")\n\n    return slack_bot\n\n\ndef remove_slack_bot(\n    db_session: Session,\n    slack_bot_id: int,\n) -> None:\n    slack_bot = fetch_slack_bot(\n        db_session=db_session,\n        slack_bot_id=slack_bot_id,\n    )\n\n    db_session.delete(slack_bot)\n    db_session.commit()\n\n\ndef fetch_slack_bots(db_session: Session) -> Sequence[SlackBot]:\n    return db_session.scalars(select(SlackBot)).all()\n"
  },
  {
    "path": "backend/onyx/db/slack_channel_config.py",
    "content": "from collections.abc import Sequence\nfrom typing import Any\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.constants import DEFAULT_PERSONA_SLACK_CHANNEL_NAME\nfrom onyx.db.constants import SLACK_BOT_PERSONA_PREFIX\nfrom onyx.db.models import ChannelConfig\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__DocumentSet\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.models import User\nfrom onyx.db.persona import mark_persona_as_deleted\nfrom onyx.db.persona import upsert_persona\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.utils.errors import EERequiredError\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\n\n\ndef _build_persona_name(channel_name: str | None) -> str:\n    return f\"{SLACK_BOT_PERSONA_PREFIX}{channel_name if channel_name else DEFAULT_PERSONA_SLACK_CHANNEL_NAME}\"\n\n\ndef _cleanup_relationships(db_session: Session, persona_id: int) -> None:\n    \"\"\"NOTE: does not commit changes\"\"\"\n    # delete existing persona-document_set relationships\n    existing_relationships = db_session.scalars(\n        select(Persona__DocumentSet).where(\n            Persona__DocumentSet.persona_id == persona_id\n        )\n    )\n    for rel in existing_relationships:\n        db_session.delete(rel)\n\n\ndef create_slack_channel_persona(\n    db_session: Session,\n    channel_name: str | None,\n    document_set_ids: list[int],\n    existing_persona_id: int | None = None,\n) -> Persona:\n    \"\"\"NOTE: does not commit changes\"\"\"\n\n    search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)\n\n    # create/update persona associated with the Slack channel\n    persona_name = _build_persona_name(channel_name)\n    persona_id_to_update = existing_persona_id\n    if persona_id_to_update is None:\n        # Reuse any previous Slack persona for this channel (even if the config was\n        # temporarily switched to a different persona) so we don't trip duplicate name\n        # validation inside `upsert_persona`.\n        existing_persona = db_session.scalar(\n            select(Persona).where(Persona.name == persona_name)\n        )\n        if existing_persona:\n            persona_id_to_update = existing_persona.id\n\n    persona = upsert_persona(\n        user=None,  # Slack channel Personas are not attached to users\n        persona_id=persona_id_to_update,\n        name=persona_name,\n        description=\"\",\n        system_prompt=\"\",\n        task_prompt=\"\",\n        datetime_aware=True,\n        tool_ids=[search_tool.id],\n        document_set_ids=document_set_ids,\n        llm_model_provider_override=None,\n        llm_model_version_override=None,\n        starter_messages=None,\n        is_public=True,\n        is_featured=False,\n        db_session=db_session,\n        commit=False,\n    )\n\n    return persona\n\n\ndef _no_ee_standard_answer_categories(\n    *args: Any,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> list:\n    return []\n\n\ndef insert_slack_channel_config(\n    db_session: Session,\n    slack_bot_id: int,\n    persona_id: int | None,\n    channel_config: ChannelConfig,\n    standard_answer_category_ids: list[int],\n    enable_auto_filters: bool,\n    is_default: bool = False,\n) -> SlackChannelConfig:\n    versioned_fetch_standard_answer_categories_by_ids = (\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.db.standard_answer\",\n            \"fetch_standard_answer_categories_by_ids\",\n            _no_ee_standard_answer_categories,\n        )\n    )\n    existing_standard_answer_categories = (\n        versioned_fetch_standard_answer_categories_by_ids(\n            standard_answer_category_ids=standard_answer_category_ids,\n            db_session=db_session,\n        )\n    )\n\n    if len(existing_standard_answer_categories) != len(standard_answer_category_ids):\n        if len(existing_standard_answer_categories) == 0:\n            raise EERequiredError(\n                \"Standard answers are a paid Enterprise Edition feature - enable EE or remove standard answer categories\"\n            )\n        else:\n            raise ValueError(\n                f\"Some or all categories with ids {standard_answer_category_ids} do not exist\"\n            )\n\n    if is_default:\n        existing_default = db_session.scalar(\n            select(SlackChannelConfig).where(\n                SlackChannelConfig.slack_bot_id == slack_bot_id,\n                SlackChannelConfig.is_default is True,  # type: ignore\n            )\n        )\n        if existing_default:\n            raise ValueError(\"A default config already exists for this Slack bot.\")\n    else:\n        if \"channel_name\" not in channel_config:\n            raise ValueError(\"Channel name is required for non-default configs.\")\n\n    slack_channel_config = SlackChannelConfig(\n        slack_bot_id=slack_bot_id,\n        persona_id=persona_id,\n        channel_config=channel_config,\n        standard_answer_categories=existing_standard_answer_categories,\n        enable_auto_filters=enable_auto_filters,\n        is_default=is_default,\n    )\n    db_session.add(slack_channel_config)\n    db_session.commit()\n\n    return slack_channel_config\n\n\ndef update_slack_channel_config(\n    db_session: Session,\n    slack_channel_config_id: int,\n    persona_id: int | None,\n    channel_config: ChannelConfig,\n    standard_answer_category_ids: list[int],\n    enable_auto_filters: bool,\n    disabled: bool,  # noqa: ARG001\n) -> SlackChannelConfig:\n    slack_channel_config = db_session.scalar(\n        select(SlackChannelConfig).where(\n            SlackChannelConfig.id == slack_channel_config_id\n        )\n    )\n    if slack_channel_config is None:\n        raise ValueError(\n            f\"Unable to find Slack channel config with ID {slack_channel_config_id}\"\n        )\n\n    versioned_fetch_standard_answer_categories_by_ids = (\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.db.standard_answer\",\n            \"fetch_standard_answer_categories_by_ids\",\n            _no_ee_standard_answer_categories,\n        )\n    )\n    existing_standard_answer_categories = (\n        versioned_fetch_standard_answer_categories_by_ids(\n            standard_answer_category_ids=standard_answer_category_ids,\n            db_session=db_session,\n        )\n    )\n    if len(existing_standard_answer_categories) != len(standard_answer_category_ids):\n        raise ValueError(\n            f\"Some or all categories with ids {standard_answer_category_ids} do not exist\"\n        )\n\n    # update the config\n    slack_channel_config.persona_id = persona_id\n    slack_channel_config.channel_config = channel_config\n    slack_channel_config.standard_answer_categories = list(\n        existing_standard_answer_categories\n    )\n    slack_channel_config.enable_auto_filters = enable_auto_filters\n\n    db_session.commit()\n\n    return slack_channel_config\n\n\ndef remove_slack_channel_config(\n    db_session: Session,\n    slack_channel_config_id: int,\n    user: User,\n) -> None:\n    slack_channel_config = db_session.scalar(\n        select(SlackChannelConfig).where(\n            SlackChannelConfig.id == slack_channel_config_id\n        )\n    )\n    if slack_channel_config is None:\n        raise ValueError(\n            f\"Unable to find Slack channel config with ID {slack_channel_config_id}\"\n        )\n\n    existing_persona_id = slack_channel_config.persona_id\n    if existing_persona_id:\n        existing_persona = db_session.scalar(\n            select(Persona).where(Persona.id == existing_persona_id)\n        )\n        # if the existing persona was one created just for use with this Slack channel,\n        # then clean it up\n        if existing_persona and existing_persona.name.startswith(\n            SLACK_BOT_PERSONA_PREFIX\n        ):\n            _cleanup_relationships(\n                db_session=db_session, persona_id=existing_persona_id\n            )\n            mark_persona_as_deleted(\n                persona_id=existing_persona_id, user=user, db_session=db_session\n            )\n\n    db_session.delete(slack_channel_config)\n    db_session.commit()\n\n\ndef fetch_slack_channel_configs(\n    db_session: Session, slack_bot_id: int | None = None\n) -> Sequence[SlackChannelConfig]:\n    if not slack_bot_id:\n        return db_session.scalars(select(SlackChannelConfig)).all()\n\n    return db_session.scalars(\n        select(SlackChannelConfig).where(\n            SlackChannelConfig.slack_bot_id == slack_bot_id\n        )\n    ).all()\n\n\ndef fetch_slack_channel_config(\n    db_session: Session, slack_channel_config_id: int\n) -> SlackChannelConfig | None:\n    return db_session.scalar(\n        select(SlackChannelConfig).where(\n            SlackChannelConfig.id == slack_channel_config_id\n        )\n    )\n\n\ndef fetch_slack_channel_config_for_channel_or_default(\n    db_session: Session, slack_bot_id: int, channel_name: str | None\n) -> SlackChannelConfig | None:\n    # attempt to find channel-specific config first\n    if channel_name is not None:\n        sc_config = db_session.scalar(\n            select(SlackChannelConfig)\n            .options(joinedload(SlackChannelConfig.persona))\n            .where(\n                SlackChannelConfig.slack_bot_id == slack_bot_id,\n                SlackChannelConfig.channel_config[\"channel_name\"].astext\n                == channel_name,\n            )\n        )\n    else:\n        sc_config = None\n\n    if sc_config:\n        return sc_config\n\n    # if none found, see if there is a default\n    default_sc = db_session.scalar(\n        select(SlackChannelConfig)\n        .options(joinedload(SlackChannelConfig.persona))\n        .where(\n            SlackChannelConfig.slack_bot_id == slack_bot_id,\n            SlackChannelConfig.is_default == True,  # noqa: E712\n        )\n    )\n\n    return default_sc\n"
  },
  {
    "path": "backend/onyx/db/swap_index.py",
    "content": "import time\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP\nfrom onyx.configs.constants import KV_REINDEX_KEY\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.connector_credential_pair import resync_cc_pair\nfrom onyx.db.document import delete_all_documents_for_connector_credential_pair\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.enums import SwitchoverType\nfrom onyx.db.index_attempt import cancel_indexing_attempts_for_search_settings\nfrom onyx.db.index_attempt import (\n    count_unique_active_cc_pairs_with_successful_index_attempts,\n)\nfrom onyx.db.index_attempt import count_unique_cc_pairs_with_successful_index_attempts\nfrom onyx.db.llm import update_default_contextual_model\nfrom onyx.db.llm import update_no_default_contextual_rag_provider\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import get_secondary_search_settings\nfrom onyx.db.search_settings import update_search_settings_status\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _perform_index_swap(\n    db_session: Session,\n    new_search_settings: SearchSettings,\n    all_cc_pairs: list[ConnectorCredentialPair],\n    cleanup_documents: bool = False,\n) -> SearchSettings | None:\n    \"\"\"Swap the indices and expire the old one.\n\n    Returns the old search settings if the swap was successful, otherwise None.\n    \"\"\"\n    current_search_settings = get_current_search_settings(db_session)\n    if len(all_cc_pairs) > 0:\n        kv_store = get_kv_store()\n        kv_store.store(KV_REINDEX_KEY, False)\n\n        # Expire jobs for the now past index/embedding model\n        cancel_indexing_attempts_for_search_settings(\n            search_settings_id=current_search_settings.id,\n            db_session=db_session,\n        )\n\n        # Recount aggregates\n        for cc_pair in all_cc_pairs:\n            resync_cc_pair(\n                cc_pair=cc_pair,\n                # sync based on the new search settings\n                search_settings_id=new_search_settings.id,\n                db_session=db_session,\n            )\n\n        if cleanup_documents:\n            # clean up all DocumentByConnectorCredentialPair / Document rows, since we're\n            # doing an instant swap and no documents will exist in the new index.\n            for cc_pair in all_cc_pairs:\n                delete_all_documents_for_connector_credential_pair(\n                    db_session=db_session,\n                    connector_id=cc_pair.connector_id,\n                    credential_id=cc_pair.credential_id,\n                )\n\n    # swap over search settings\n    update_search_settings_status(\n        search_settings=current_search_settings,\n        new_status=IndexModelStatus.PAST,\n        db_session=db_session,\n    )\n    update_search_settings_status(\n        search_settings=new_search_settings,\n        new_status=IndexModelStatus.PRESENT,\n        db_session=db_session,\n    )\n\n    # Update the default contextual model to match the newly promoted settings\n    try:\n        update_default_contextual_model(\n            db_session=db_session,\n            enable_contextual_rag=new_search_settings.enable_contextual_rag,\n            contextual_rag_llm_provider=new_search_settings.contextual_rag_llm_provider,\n            contextual_rag_llm_name=new_search_settings.contextual_rag_llm_name,\n        )\n    except ValueError as e:\n        logger.error(f\"Model not found, defaulting to no contextual model: {e}\")\n        update_no_default_contextual_rag_provider(\n            db_session=db_session,\n        )\n        new_search_settings.enable_contextual_rag = False\n        new_search_settings.contextual_rag_llm_provider = None\n        new_search_settings.contextual_rag_llm_name = None\n        db_session.commit()\n\n    # This flow is for checking and possibly creating an index so we get all\n    # indices.\n    document_indices = get_all_document_indices(new_search_settings, None, None)\n\n    WAIT_SECONDS = 5\n\n    for document_index in document_indices:\n        success = False\n        for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):\n            try:\n                logger.notice(\n                    f\"Document index {document_index.__class__.__name__} swap (attempt {x + 1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})...\"\n                )\n                document_index.ensure_indices_exist(\n                    primary_embedding_dim=new_search_settings.final_embedding_dim,\n                    primary_embedding_precision=new_search_settings.embedding_precision,\n                    # just finished swap, no more secondary index\n                    secondary_index_embedding_dim=None,\n                    secondary_index_embedding_precision=None,\n                )\n\n                logger.notice(\"Document index swap complete.\")\n                success = True\n                break\n            except Exception:\n                logger.exception(\n                    f\"Document index swap for {document_index.__class__.__name__} did not succeed. \"\n                    f\"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds.\"\n                )\n                time.sleep(WAIT_SECONDS)\n\n        if not success:\n            logger.error(\n                f\"Document index swap for {document_index.__class__.__name__} did not succeed. \"\n                f\"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})\"\n            )\n            return None\n\n    return current_search_settings\n\n\ndef check_and_perform_index_swap(db_session: Session) -> SearchSettings | None:\n    \"\"\"Get count of cc-pairs and count of successful index_attempts for the\n    new model grouped by connector + credential, if it's the same, then assume\n    new index is done building. If so, swap the indices and expire the old one.\n\n    Returns None if search settings did not change, or the old search settings if they\n    did change.\n    \"\"\"\n    if DISABLE_VECTOR_DB:\n        return None\n\n    # Default CC-pair created for Ingestion API unused here\n    all_cc_pairs = get_connector_credential_pairs(db_session)\n    cc_pair_count = max(len(all_cc_pairs) - 1, 0)\n    new_search_settings = get_secondary_search_settings(db_session)\n\n    if not new_search_settings:\n        return None\n\n    # Handle switchover based on switchover_type\n    switchover_type = new_search_settings.switchover_type\n\n    # INSTANT: Swap immediately without waiting\n    if switchover_type == SwitchoverType.INSTANT:\n        return _perform_index_swap(\n            db_session=db_session,\n            new_search_settings=new_search_settings,\n            all_cc_pairs=all_cc_pairs,\n            # clean up all DocumentByConnectorCredentialPair / Document rows, since we're\n            # doing an instant swap.\n            cleanup_documents=True,\n        )\n\n    # REINDEX: Wait for all connectors to complete\n    elif switchover_type == SwitchoverType.REINDEX:\n        unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts(\n            search_settings_id=new_search_settings.id, db_session=db_session\n        )\n\n        # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this\n        # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs\n        if unique_cc_indexings > cc_pair_count:\n            logger.error(\"More unique indexings than cc pairs, should not occur\")\n\n        if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings:\n            # Swap indices\n            return _perform_index_swap(\n                db_session=db_session,\n                new_search_settings=new_search_settings,\n                all_cc_pairs=all_cc_pairs,\n            )\n\n        return None\n\n    # ACTIVE_ONLY: Wait for only non-paused connectors to complete\n    elif switchover_type == SwitchoverType.ACTIVE_ONLY:\n        # Count non-paused cc_pairs (excluding the default Ingestion API cc_pair)\n        active_cc_pairs = [\n            cc_pair\n            for cc_pair in all_cc_pairs\n            if cc_pair.status != ConnectorCredentialPairStatus.PAUSED\n        ]\n        active_cc_pair_count = max(len(active_cc_pairs) - 1, 0)\n\n        unique_active_cc_indexings = (\n            count_unique_active_cc_pairs_with_successful_index_attempts(\n                search_settings_id=new_search_settings.id, db_session=db_session\n            )\n        )\n\n        if unique_active_cc_indexings > active_cc_pair_count:\n            logger.error(\n                \"More unique active indexings than active cc pairs, should not occur\"\n            )\n\n        if (\n            active_cc_pair_count == 0\n            or active_cc_pair_count == unique_active_cc_indexings\n        ):\n            # Swap indices\n            return _perform_index_swap(\n                db_session=db_session,\n                new_search_settings=new_search_settings,\n                all_cc_pairs=all_cc_pairs,\n            )\n\n        return None\n\n    # Should not reach here, but handle gracefully\n    logger.error(f\"Unknown switchover_type: {switchover_type}\")\n    return None\n"
  },
  {
    "path": "backend/onyx/db/sync_record.py",
    "content": "from sqlalchemy import and_\nfrom sqlalchemy import desc\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import SyncStatus\nfrom onyx.db.enums import SyncType\nfrom onyx.db.models import SyncRecord\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef insert_sync_record(\n    db_session: Session,\n    entity_id: int,\n    sync_type: SyncType,\n) -> SyncRecord:\n    \"\"\"Insert a new sync record into the database, cancelling any existing in-progress records.\n\n    Args:\n        db_session: The database session to use\n        entity_id: The ID of the entity being synced (document set ID, user group ID, etc.)\n        sync_type: The type of sync operation\n    \"\"\"\n    # If an existing in-progress sync record exists, mark as cancelled\n    existing_in_progress_sync_record = fetch_latest_sync_record(\n        db_session, entity_id, sync_type, sync_status=SyncStatus.IN_PROGRESS\n    )\n\n    if existing_in_progress_sync_record is not None:\n        logger.info(\n            f\"Cancelling existing in-progress sync record {existing_in_progress_sync_record.id} \"\n            f\"for entity_id={entity_id} sync_type={sync_type}\"\n        )\n        mark_sync_records_as_cancelled(db_session, entity_id, sync_type)\n\n    return _create_sync_record(db_session, entity_id, sync_type)\n\n\ndef mark_sync_records_as_cancelled(\n    db_session: Session,\n    entity_id: int | None,\n    sync_type: SyncType,\n) -> None:\n    stmt = (\n        update(SyncRecord)\n        .where(\n            and_(\n                SyncRecord.entity_id == entity_id,\n                SyncRecord.sync_type == sync_type,\n                SyncRecord.sync_status == SyncStatus.IN_PROGRESS,\n            )\n        )\n        .values(sync_status=SyncStatus.CANCELED)\n    )\n    db_session.execute(stmt)\n    db_session.commit()\n\n\ndef _create_sync_record(\n    db_session: Session,\n    entity_id: int | None,\n    sync_type: SyncType,\n) -> SyncRecord:\n    \"\"\"Create and insert a new sync record into the database.\"\"\"\n    sync_record = SyncRecord(\n        entity_id=entity_id,\n        sync_type=sync_type,\n        sync_status=SyncStatus.IN_PROGRESS,\n        num_docs_synced=0,\n        sync_start_time=func.now(),\n    )\n    db_session.add(sync_record)\n    db_session.commit()\n\n    return sync_record\n\n\ndef fetch_latest_sync_record(\n    db_session: Session,\n    entity_id: int,\n    sync_type: SyncType,\n    sync_status: SyncStatus | None = None,\n) -> SyncRecord | None:\n    \"\"\"Fetch the most recent sync record for a given entity ID and status.\n\n    Args:\n        db_session: The database session to use\n        entity_id: The ID of the entity to fetch sync record for\n        sync_type: The type of sync operation\n    \"\"\"\n    stmt = (\n        select(SyncRecord)\n        .where(\n            and_(\n                SyncRecord.entity_id == entity_id,\n                SyncRecord.sync_type == sync_type,\n            )\n        )\n        .order_by(desc(SyncRecord.sync_start_time))\n        .limit(1)\n    )\n\n    if sync_status is not None:\n        stmt = stmt.where(SyncRecord.sync_status == sync_status)\n\n    result = db_session.execute(stmt)\n    return result.scalar_one_or_none()\n\n\ndef update_sync_record_status(\n    db_session: Session,\n    entity_id: int,\n    sync_type: SyncType,\n    sync_status: SyncStatus,\n    num_docs_synced: int | None = None,\n) -> None:\n    \"\"\"Update the status of a sync record.\n\n    Args:\n        db_session: The database session to use\n        entity_id: The ID of the entity being synced\n        sync_type: The type of sync operation\n        sync_status: The new status to set\n        num_docs_synced: Optional number of documents synced to update\n    \"\"\"\n    sync_record = fetch_latest_sync_record(db_session, entity_id, sync_type)\n    if sync_record is None:\n        raise ValueError(\n            f\"No sync record found for entity_id={entity_id} sync_type={sync_type}\"\n        )\n\n    sync_record.sync_status = sync_status\n    if num_docs_synced is not None:\n        sync_record.num_docs_synced = num_docs_synced\n\n    if sync_status.is_terminal():\n        sync_record.sync_end_time = func.now()  # type: ignore\n\n    db_session.commit()\n\n\ndef cleanup_sync_records(\n    db_session: Session, entity_id: int, sync_type: SyncType\n) -> None:\n    \"\"\"Cleanup sync records for a given entity ID and sync type by marking them as failed.\"\"\"\n    stmt = (\n        update(SyncRecord)\n        .where(SyncRecord.entity_id == entity_id)\n        .where(SyncRecord.sync_type == sync_type)\n        .where(SyncRecord.sync_status == SyncStatus.IN_PROGRESS)\n        .values(sync_status=SyncStatus.CANCELED, sync_end_time=func.now())\n    )\n    db_session.execute(stmt)\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/tag.py",
    "content": "from typing import Any\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import delete\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import Document\nfrom onyx.db.models import Document__Tag\nfrom onyx.db.models import Tag\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef check_tag_validity(tag_key: str, tag_value: str) -> bool:\n    \"\"\"If a tag is too long, it should not be used (it will cause an error in Postgres\n    as the unique constraint can only apply to entries that are less than 2704 bytes).\n\n    Additionally, extremely long tags are not really usable / useful.\"\"\"\n    if len(tag_key) + len(tag_value) > 255:\n        logger.error(\n            f\"Tag with key '{tag_key}' and value '{tag_value}' is too long, cannot be used\"\n        )\n        return False\n\n    return True\n\n\ndef create_or_add_document_tag(\n    tag_key: str,\n    tag_value: str,\n    source: DocumentSource,\n    document_id: str,\n    db_session: Session,\n) -> Tag | None:\n    if not check_tag_validity(tag_key, tag_value):\n        return None\n\n    document = db_session.get(Document, document_id)\n    if not document:\n        raise ValueError(\"Invalid Document, cannot attach Tags\")\n\n    # Use upsert to avoid race condition when multiple workers try to create the same tag\n    insert_stmt = pg_insert(Tag).values(\n        tag_key=tag_key,\n        tag_value=tag_value,\n        source=source,\n        is_list=False,\n    )\n    insert_stmt = insert_stmt.on_conflict_do_nothing(\n        constraint=\"_tag_key_value_source_list_uc\"\n    )\n    db_session.execute(insert_stmt)\n\n    # Now fetch the tag (either just inserted or already existed)\n    tag_stmt = select(Tag).where(\n        Tag.tag_key == tag_key,\n        Tag.tag_value == tag_value,\n        Tag.source == source,\n        Tag.is_list.is_(False),\n    )\n    tag = db_session.execute(tag_stmt).scalar_one()\n\n    if tag not in document.tags:\n        document.tags.append(tag)\n\n    db_session.commit()\n    return tag\n\n\ndef create_or_add_document_tag_list(\n    tag_key: str,\n    tag_values: list[str],\n    source: DocumentSource,\n    document_id: str,\n    db_session: Session,\n) -> list[Tag]:\n    valid_tag_values = [\n        tag_value for tag_value in tag_values if check_tag_validity(tag_key, tag_value)\n    ]\n    if not valid_tag_values:\n        return []\n\n    document = db_session.get(Document, document_id)\n    if not document:\n        raise ValueError(\"Invalid Document, cannot attach Tags\")\n\n    # Use upsert to avoid race condition when multiple workers try to create the same tags\n    for tag_value in valid_tag_values:\n        insert_stmt = pg_insert(Tag).values(\n            tag_key=tag_key,\n            tag_value=tag_value,\n            source=source,\n            is_list=True,\n        )\n        insert_stmt = insert_stmt.on_conflict_do_nothing(\n            constraint=\"_tag_key_value_source_list_uc\"\n        )\n        db_session.execute(insert_stmt)\n\n    # Now fetch all tags (either just inserted or already existed)\n    all_tags_stmt = select(Tag).where(\n        Tag.tag_key == tag_key,\n        Tag.tag_value.in_(valid_tag_values),\n        Tag.source == source,\n        Tag.is_list.is_(True),\n    )\n    all_tags = list(db_session.execute(all_tags_stmt).scalars().all())\n\n    for tag in all_tags:\n        if tag not in document.tags:\n            document.tags.append(tag)\n\n    db_session.commit()\n    return all_tags\n\n\ndef upsert_document_tags(\n    document_id: str,\n    source: DocumentSource,\n    metadata: dict[str, str | list[str]],\n    db_session: Session,\n) -> list[Tag]:\n    document = db_session.get(Document, document_id)\n    if not document:\n        raise ValueError(\"Invalid Document, cannot attach Tags\")\n\n    old_tag_ids: set[int] = {tag.id for tag in document.tags}\n\n    new_tags: list[Tag] = []\n    new_tag_ids: set[int] = set()\n    for k, v in metadata.items():\n        if isinstance(v, list):\n            new_tags.extend(\n                create_or_add_document_tag_list(k, v, source, document_id, db_session)\n            )\n            new_tag_ids.update({tag.id for tag in new_tags})\n            continue\n\n        new_tag = create_or_add_document_tag(k, v, source, document_id, db_session)\n        if new_tag:\n            new_tag_ids.add(new_tag.id)\n            new_tags.append(new_tag)\n\n    delete_tags = old_tag_ids - new_tag_ids\n    if delete_tags:\n        delete_stmt = delete(Document__Tag).where(\n            Document__Tag.document_id == document_id,\n            Document__Tag.tag_id.in_(delete_tags),\n        )\n        db_session.execute(delete_stmt)\n        db_session.commit()\n\n    return new_tags\n\n\ndef find_tags(\n    tag_key_prefix: str | None,\n    tag_value_prefix: str | None,\n    sources: list[DocumentSource] | None,\n    limit: int | None,\n    db_session: Session,\n    # if set, both tag_key_prefix and tag_value_prefix must be a match\n    require_both_to_match: bool = False,\n) -> list[Tag]:\n    query = select(Tag)\n\n    if tag_key_prefix or tag_value_prefix:\n        conditions = []\n        if tag_key_prefix:\n            conditions.append(Tag.tag_key.ilike(f\"{tag_key_prefix}%\"))\n        if tag_value_prefix:\n            conditions.append(Tag.tag_value.ilike(f\"{tag_value_prefix}%\"))\n\n        final_prefix_condition = (\n            and_(*conditions) if require_both_to_match else or_(*conditions)\n        )\n        query = query.where(final_prefix_condition)\n\n    if sources:\n        query = query.where(Tag.source.in_(sources))\n\n    if limit:\n        query = query.limit(limit)\n\n    result = db_session.execute(query)\n\n    tags = result.scalars().all()\n    return list(tags)\n\n\ndef get_structured_tags_for_document(\n    document_id: str, db_session: Session\n) -> dict[str, str | list[str]]:\n    \"\"\"Essentially returns the document metadata from postgres.\"\"\"\n    document = db_session.get(Document, document_id)\n    if not document:\n        raise ValueError(\"Invalid Document, cannot find tags\")\n\n    document_metadata: dict[str, Any] = {}\n    for tag in document.tags:\n        if tag.is_list:\n            document_metadata.setdefault(tag.tag_key, [])\n            # should always be a list (if tag.is_list is always True for this key), but just in case\n            if not isinstance(document_metadata[tag.tag_key], list):\n                logger.warning(\n                    \"Inconsistent is_list for document %s, tag_key %s\",\n                    document_id,\n                    tag.tag_key,\n                )\n                document_metadata[tag.tag_key] = [document_metadata[tag.tag_key]]\n            document_metadata[tag.tag_key].append(tag.tag_value)\n            continue\n\n        # set value (ignore duplicate keys, though there should be none)\n        document_metadata.setdefault(tag.tag_key, tag.tag_value)\n\n        # should always be a value, but just in case (treat it as a list in this case)\n        if isinstance(document_metadata[tag.tag_key], list):\n            logger.warning(\n                \"Inconsistent is_list for document %s, tag_key %s\",\n                document_id,\n                tag.tag_key,\n            )\n            document_metadata[tag.tag_key] = [document_metadata[tag.tag_key]]\n    return document_metadata\n\n\ndef delete_document_tags_for_documents__no_commit(\n    document_ids: list[str], db_session: Session\n) -> None:\n    stmt = delete(Document__Tag).where(Document__Tag.document_id.in_(document_ids))\n    db_session.execute(stmt)\n\n\ndef delete_orphan_tags__no_commit(db_session: Session) -> None:\n    orphan_tags_query = select(Tag.id).where(\n        ~db_session.query(Document__Tag.tag_id)\n        .filter(Document__Tag.tag_id == Tag.id)\n        .exists()\n    )\n\n    orphan_tags = db_session.execute(orphan_tags_query).scalars().all()\n\n    if orphan_tags:\n        delete_orphan_tags_stmt = delete(Tag).where(Tag.id.in_(orphan_tags))\n        db_session.execute(delete_orphan_tags_stmt)\n"
  },
  {
    "path": "backend/onyx/db/tasks.py",
    "content": "from datetime import datetime\n\nfrom sqlalchemy import desc\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql import delete\n\nfrom onyx.configs.app_configs import JOB_TIMEOUT\nfrom onyx.db.engine.time_utils import get_db_current_time\nfrom onyx.db.models import TaskQueueState\nfrom onyx.db.models import TaskStatus\n\n\ndef get_latest_task(\n    task_name: str,\n    db_session: Session,\n) -> TaskQueueState | None:\n    stmt = (\n        select(TaskQueueState)\n        .where(TaskQueueState.task_name == task_name)\n        .order_by(desc(TaskQueueState.id))\n        .limit(1)\n    )\n\n    result = db_session.execute(stmt)\n    latest_task = result.scalars().first()\n\n    return latest_task\n\n\ndef get_latest_task_by_type(\n    task_name: str,\n    db_session: Session,\n) -> TaskQueueState | None:\n    stmt = (\n        select(TaskQueueState)\n        .where(TaskQueueState.task_name.like(f\"%{task_name}%\"))\n        .order_by(desc(TaskQueueState.id))\n        .limit(1)\n    )\n\n    result = db_session.execute(stmt)\n    latest_task = result.scalars().first()\n\n    return latest_task\n\n\ndef register_task(\n    task_name: str,\n    db_session: Session,\n    task_id: str = \"\",\n    status: TaskStatus = TaskStatus.PENDING,\n    start_time: datetime | None = None,\n) -> TaskQueueState:\n    new_task = TaskQueueState(\n        task_id=task_id,\n        task_name=task_name,\n        status=status,\n        start_time=start_time,\n    )\n\n    db_session.add(new_task)\n    db_session.commit()\n\n    return new_task\n\n\ndef get_task_with_id(\n    db_session: Session,\n    task_id: str,\n) -> TaskQueueState | None:\n    return db_session.scalar(\n        select(TaskQueueState).where(TaskQueueState.task_id == task_id)\n    )\n\n\ndef delete_task_with_id(\n    db_session: Session,\n    task_id: str,\n) -> None:\n    db_session.execute(delete(TaskQueueState).where(TaskQueueState.task_id == task_id))\n    db_session.commit()\n\n\ndef get_all_tasks_with_prefix(\n    db_session: Session, task_name_prefix: str\n) -> list[TaskQueueState]:\n    return list(\n        db_session.scalars(\n            select(TaskQueueState).where(\n                TaskQueueState.task_name.like(f\"{task_name_prefix}_%\")\n            )\n        )\n    )\n\n\ndef mark_task_as_started_with_id(\n    db_session: Session,\n    task_id: str,\n) -> None:\n    task = get_task_with_id(db_session=db_session, task_id=task_id)\n    if not task:\n        raise RuntimeError(f\"A task with the task-id {task_id=} does not exist\")\n\n    task.status = TaskStatus.STARTED\n    db_session.commit()\n\n\ndef mark_task_as_finished_with_id(\n    db_session: Session,\n    task_id: str,\n    success: bool = True,\n) -> None:\n    task = get_task_with_id(db_session=db_session, task_id=task_id)\n    if not task:\n        raise RuntimeError(f\"A task with the task-id {task_id=} does not exist\")\n\n    task.status = TaskStatus.SUCCESS if success else TaskStatus.FAILURE\n    db_session.commit()\n\n\ndef mark_task_start(\n    task_name: str,\n    db_session: Session,\n) -> None:\n    task = get_latest_task(task_name, db_session)\n    if not task:\n        raise ValueError(f\"No task found with name {task_name}\")\n\n    task.start_time = func.now()  # type: ignore\n    db_session.commit()\n\n\ndef mark_task_finished(\n    task_name: str,\n    db_session: Session,\n    success: bool = True,\n) -> None:\n    latest_task = get_latest_task(task_name, db_session)\n    if latest_task is None:\n        raise ValueError(f\"tasks for {task_name} do not exist\")\n\n    latest_task.status = TaskStatus.SUCCESS if success else TaskStatus.FAILURE\n    db_session.commit()\n\n\ndef check_task_is_live_and_not_timed_out(\n    task: TaskQueueState,\n    db_session: Session,\n    timeout: int = JOB_TIMEOUT,\n) -> bool:\n    # We only care for live tasks to not create new periodic tasks\n    if task.status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:\n        return False\n\n    current_db_time = get_db_current_time(db_session=db_session)\n\n    last_update_time = task.register_time\n    if task.start_time:\n        last_update_time = max(task.register_time, task.start_time)\n\n    time_elapsed = current_db_time - last_update_time\n    return time_elapsed.total_seconds() < timeout\n"
  },
  {
    "path": "backend/onyx/db/token_limit.py",
    "content": "from collections.abc import Sequence\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import TokenRateLimitScope\nfrom onyx.db.models import TokenRateLimit\nfrom onyx.db.models import TokenRateLimit__UserGroup\nfrom onyx.server.token_rate_limits.models import TokenRateLimitArgs\n\n\ndef fetch_all_user_token_rate_limits(\n    db_session: Session,\n    enabled_only: bool = False,\n    ordered: bool = True,\n) -> Sequence[TokenRateLimit]:\n    query = select(TokenRateLimit).where(\n        TokenRateLimit.scope == TokenRateLimitScope.USER\n    )\n\n    if enabled_only:\n        query = query.where(TokenRateLimit.enabled.is_(True))\n\n    if ordered:\n        query = query.order_by(TokenRateLimit.created_at.desc())\n\n    return db_session.scalars(query).all()\n\n\ndef fetch_all_global_token_rate_limits(\n    db_session: Session,\n    enabled_only: bool = False,\n    ordered: bool = True,\n) -> Sequence[TokenRateLimit]:\n    query = select(TokenRateLimit).where(\n        TokenRateLimit.scope == TokenRateLimitScope.GLOBAL\n    )\n\n    if enabled_only:\n        query = query.where(TokenRateLimit.enabled.is_(True))\n\n    if ordered:\n        query = query.order_by(TokenRateLimit.created_at.desc())\n\n    token_rate_limits = db_session.scalars(query).all()\n    return token_rate_limits\n\n\ndef insert_user_token_rate_limit(\n    db_session: Session,\n    token_rate_limit_settings: TokenRateLimitArgs,\n) -> TokenRateLimit:\n    token_limit = TokenRateLimit(\n        enabled=token_rate_limit_settings.enabled,\n        token_budget=token_rate_limit_settings.token_budget,\n        period_hours=token_rate_limit_settings.period_hours,\n        scope=TokenRateLimitScope.USER,\n    )\n    db_session.add(token_limit)\n    db_session.commit()\n\n    return token_limit\n\n\ndef insert_global_token_rate_limit(\n    db_session: Session,\n    token_rate_limit_settings: TokenRateLimitArgs,\n) -> TokenRateLimit:\n    token_limit = TokenRateLimit(\n        enabled=token_rate_limit_settings.enabled,\n        token_budget=token_rate_limit_settings.token_budget,\n        period_hours=token_rate_limit_settings.period_hours,\n        scope=TokenRateLimitScope.GLOBAL,\n    )\n    db_session.add(token_limit)\n    db_session.commit()\n\n    return token_limit\n\n\ndef update_token_rate_limit(\n    db_session: Session,\n    token_rate_limit_id: int,\n    token_rate_limit_settings: TokenRateLimitArgs,\n) -> TokenRateLimit:\n    token_limit = db_session.get(TokenRateLimit, token_rate_limit_id)\n    if token_limit is None:\n        raise ValueError(f\"TokenRateLimit with id '{token_rate_limit_id}' not found\")\n\n    token_limit.enabled = token_rate_limit_settings.enabled\n    token_limit.token_budget = token_rate_limit_settings.token_budget\n    token_limit.period_hours = token_rate_limit_settings.period_hours\n    db_session.commit()\n\n    return token_limit\n\n\ndef delete_token_rate_limit(\n    db_session: Session,\n    token_rate_limit_id: int,\n) -> None:\n    token_limit = db_session.get(TokenRateLimit, token_rate_limit_id)\n    if token_limit is None:\n        raise ValueError(f\"TokenRateLimit with id '{token_rate_limit_id}' not found\")\n\n    db_session.query(TokenRateLimit__UserGroup).filter(\n        TokenRateLimit__UserGroup.rate_limit_id == token_rate_limit_id\n    ).delete()\n\n    db_session.delete(token_limit)\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/tools.py",
    "content": "from typing import Any\nfrom typing import cast\nfrom typing import Type\nfrom typing import TYPE_CHECKING\nfrom uuid import UUID\n\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.constants import UNSET\nfrom onyx.db.constants import UnsetType\nfrom onyx.db.enums import MCPServerStatus\nfrom onyx.db.models import MCPServer\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import Tool\nfrom onyx.db.models import ToolCall\nfrom onyx.server.features.tool.models import Header\nfrom onyx.tools.built_in_tools import BUILT_IN_TOOL_TYPES\nfrom onyx.utils.headers import HeaderItemDict\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.postgres_sanitization import sanitize_json_like\nfrom onyx.utils.postgres_sanitization import sanitize_string\n\nif TYPE_CHECKING:\n    pass\n\nlogger = setup_logger()\n\n\ndef get_tools(\n    db_session: Session,\n    *,\n    only_enabled: bool = False,\n    only_connected_mcp: bool = False,\n    only_openapi: bool = False,\n) -> list[Tool]:\n    query = select(Tool)\n\n    if only_connected_mcp:\n        # Keep tools that either:\n        # 1. Don't have an MCP server (mcp_server_id IS NULL) - Non-MCP tools\n        # 2. Have an MCP server that is connected - Connected MCP tools\n        query = query.outerjoin(MCPServer, Tool.mcp_server_id == MCPServer.id).where(\n            or_(\n                Tool.mcp_server_id.is_(None),  # Non-MCP tools (built-in, custom)\n                MCPServer.status == MCPServerStatus.CONNECTED,  # MCP tools connected\n            )\n        )\n\n    if only_enabled:\n        query = query.where(Tool.enabled.is_(True))\n\n    if only_openapi:\n        query = query.where(\n            Tool.openapi_schema.is_not(None),\n            # To avoid showing rows that have JSON literal `null` stored in the column to the user.\n            # tools from mcp servers will not have an openapi schema but it has `null`, so we need to exclude them.\n            func.jsonb_typeof(Tool.openapi_schema) == \"object\",\n            # Exclude built-in tools that happen to have an openapi_schema\n            Tool.in_code_tool_id.is_(None),\n        )\n\n    return list(db_session.scalars(query).all())\n\n\ndef get_tools_by_mcp_server_id(\n    mcp_server_id: int,\n    db_session: Session,\n    *,\n    only_enabled: bool = False,\n    order_by_id: bool = False,\n) -> list[Tool]:\n    query = select(Tool).where(Tool.mcp_server_id == mcp_server_id)\n    if only_enabled:\n        query = query.where(Tool.enabled.is_(True))\n    if order_by_id:\n        query = query.order_by(Tool.id)\n    return list(db_session.scalars(query).all())\n\n\ndef get_tools_by_ids(tool_ids: list[int], db_session: Session) -> list[Tool]:\n    if not tool_ids:\n        return []\n    stmt = select(Tool).where(Tool.id.in_(tool_ids))\n    return list(db_session.scalars(stmt).all())\n\n\ndef get_tool_by_id(tool_id: int, db_session: Session) -> Tool:\n    tool = db_session.scalar(select(Tool).where(Tool.id == tool_id))\n    if not tool:\n        raise ValueError(\"Tool by specified id does not exist\")\n    return tool\n\n\ndef get_tool_by_name(tool_name: str, db_session: Session) -> Tool:\n    tool = db_session.scalar(select(Tool).where(Tool.name == tool_name))\n    if not tool:\n        raise ValueError(\"Tool by specified name does not exist\")\n    return tool\n\n\ndef create_tool__no_commit(\n    name: str,\n    description: str | None,\n    openapi_schema: dict[str, Any] | None,\n    custom_headers: list[Header] | None,\n    user_id: UUID | None,\n    db_session: Session,\n    passthrough_auth: bool,\n    *,\n    mcp_server_id: int | None = None,\n    oauth_config_id: int | None = None,\n    enabled: bool = True,\n) -> Tool:\n    new_tool = Tool(\n        name=name,\n        description=description,\n        in_code_tool_id=None,\n        openapi_schema=openapi_schema,\n        custom_headers=(\n            [header.model_dump() for header in custom_headers] if custom_headers else []\n        ),\n        user_id=user_id,\n        passthrough_auth=passthrough_auth,\n        mcp_server_id=mcp_server_id,\n        oauth_config_id=oauth_config_id,\n        enabled=enabled,\n    )\n    db_session.add(new_tool)\n    db_session.flush()  # Don't commit yet, let caller decide when to commit\n    return new_tool\n\n\ndef update_tool(\n    tool_id: int,\n    name: str | None,\n    description: str | None,\n    openapi_schema: dict[str, Any] | None,\n    custom_headers: list[Header] | None,\n    user_id: UUID | None,\n    db_session: Session,\n    passthrough_auth: bool | None,\n    oauth_config_id: int | None | UnsetType = UNSET,\n) -> Tool:\n    tool = get_tool_by_id(tool_id, db_session)\n    if tool is None:\n        raise ValueError(f\"Tool with ID {tool_id} does not exist\")\n\n    if name is not None:\n        tool.name = name\n    if description is not None:\n        tool.description = description\n    if openapi_schema is not None:\n        tool.openapi_schema = openapi_schema\n    if user_id is not None:\n        tool.user_id = user_id\n    if custom_headers is not None:\n        tool.custom_headers = [\n            cast(HeaderItemDict, header.model_dump()) for header in custom_headers\n        ]\n    if passthrough_auth is not None:\n        tool.passthrough_auth = passthrough_auth\n    old_oauth_config_id = tool.oauth_config_id\n    if not isinstance(oauth_config_id, UnsetType):\n        tool.oauth_config_id = oauth_config_id\n        db_session.flush()\n\n    # Clean up orphaned OAuthConfig if the oauth_config_id was changed\n    if (\n        old_oauth_config_id is not None\n        and not isinstance(oauth_config_id, UnsetType)\n        and old_oauth_config_id != oauth_config_id\n    ):\n        other_tools = db_session.scalars(\n            select(Tool).where(Tool.oauth_config_id == old_oauth_config_id)\n        ).all()\n        if not other_tools:\n            oauth_config = db_session.get(OAuthConfig, old_oauth_config_id)\n            if oauth_config:\n                db_session.delete(oauth_config)\n\n    db_session.commit()\n    return tool\n\n\ndef delete_tool__no_commit(tool_id: int, db_session: Session) -> None:\n    tool = get_tool_by_id(tool_id, db_session)\n    if tool is None:\n        raise ValueError(f\"Tool with ID {tool_id} does not exist\")\n\n    oauth_config_id = tool.oauth_config_id\n\n    db_session.delete(tool)\n    db_session.flush()\n\n    # Clean up orphaned OAuthConfig if no other tools reference it\n    if oauth_config_id is not None:\n        other_tools = db_session.scalars(\n            select(Tool).where(Tool.oauth_config_id == oauth_config_id)\n        ).all()\n        if not other_tools:\n            oauth_config = db_session.get(OAuthConfig, oauth_config_id)\n            if oauth_config:\n                db_session.delete(oauth_config)\n                db_session.flush()\n\n\ndef get_builtin_tool(\n    db_session: Session,\n    tool_type: Type[BUILT_IN_TOOL_TYPES],\n) -> Tool:\n    \"\"\"\n    Retrieves a built-in tool from the database based on the tool type.\n    \"\"\"\n    # local import to avoid circular import. DB layer should not depend on tools layer.\n    from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP\n\n    tool_id = next(\n        (\n            in_code_tool_id\n            for in_code_tool_id, tool_cls in BUILT_IN_TOOL_MAP.items()\n            if tool_cls.__name__ == tool_type.__name__\n        ),\n        None,\n    )\n\n    if not tool_id:\n        raise RuntimeError(\n            f\"Tool type {tool_type.__name__} not found in the BUILT_IN_TOOLS list.\"\n        )\n\n    db_tool = db_session.execute(\n        select(Tool).where(Tool.in_code_tool_id == tool_id)\n    ).scalar_one_or_none()\n\n    if not db_tool:\n        raise RuntimeError(f\"Tool type {tool_type.__name__} not found in the database.\")\n\n    return db_tool\n\n\ndef create_tool_call_no_commit(\n    chat_session_id: UUID,\n    parent_chat_message_id: int | None,\n    turn_number: int,\n    tool_id: int,\n    tool_call_id: str,\n    tool_call_arguments: dict[str, Any],\n    tool_call_response: Any,\n    tool_call_tokens: int,\n    db_session: Session,\n    *,\n    parent_tool_call_id: int | None = None,\n    reasoning_tokens: str | None = None,\n    generated_images: list[dict] | None = None,\n    tab_index: int = 0,\n    add_only: bool = True,\n) -> ToolCall:\n    \"\"\"\n    Create a ToolCall entry in the database.\n\n    Args:\n        chat_session_id: The chat session ID\n        parent_chat_message_id: The parent chat message ID\n        turn_number: The turn number for this tool call\n        tool_id: The tool ID\n        tool_call_id: The tool call ID (string identifier from LLM)\n        tool_call_arguments: The tool call arguments\n        tool_call_response: The tool call response\n        tool_call_tokens: The number of tokens in the tool call arguments\n        db_session: The database session\n        parent_tool_call_id: Optional parent tool call ID (for nested tool calls)\n        reasoning_tokens: Optional reasoning tokens\n        generated_images: Optional list of generated image metadata for replay\n        tab_index: Index order of tool calls from the LLM for parallel tool calls\n        commit: If True, commit the transaction; if False, flush only\n\n    Returns:\n        The created ToolCall object\n    \"\"\"\n    tool_call = ToolCall(\n        chat_session_id=chat_session_id,\n        parent_chat_message_id=parent_chat_message_id,\n        parent_tool_call_id=parent_tool_call_id,\n        turn_number=turn_number,\n        tab_index=tab_index,\n        tool_id=tool_id,\n        tool_call_id=tool_call_id,\n        reasoning_tokens=(\n            sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens\n        ),\n        tool_call_arguments=sanitize_json_like(tool_call_arguments),\n        tool_call_response=sanitize_json_like(tool_call_response),\n        tool_call_tokens=tool_call_tokens,\n        generated_images=sanitize_json_like(generated_images),\n    )\n\n    db_session.add(tool_call)\n    if not add_only:\n        db_session.add(tool_call)\n    else:\n        db_session.flush()\n    return tool_call\n"
  },
  {
    "path": "backend/onyx/db/usage.py",
    "content": "\"\"\"Database interactions for tenant usage tracking (cloud usage limits).\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom enum import Enum\n\nfrom pydantic import BaseModel\nfrom sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import TenantUsage\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import USAGE_LIMIT_WINDOW_SECONDS\n\nlogger = setup_logger()\n\n\nclass UsageType(str, Enum):\n    \"\"\"Types of usage that can be tracked and limited.\"\"\"\n\n    LLM_COST = \"llm_cost_cents\"\n    CHUNKS_INDEXED = \"chunks_indexed\"\n    API_CALLS = \"api_calls\"\n    NON_STREAMING_API_CALLS = \"non_streaming_api_calls\"\n\n\nclass TenantUsageStats(BaseModel):\n    \"\"\"Current usage statistics for a tenant.\"\"\"\n\n    window_start: datetime\n    llm_cost_cents: float\n    chunks_indexed: int\n    api_calls: int\n    non_streaming_api_calls: int\n\n\nclass UsageLimitExceededError(Exception):\n    \"\"\"Raised when a tenant exceeds their usage limit.\"\"\"\n\n    def __init__(self, usage_type: UsageType, current: float, limit: float):\n        self.usage_type = usage_type\n        self.current = current\n        self.limit = limit\n        super().__init__(\n            f\"Usage limit exceeded for {usage_type.value}: current usage {current}, limit {limit}\"\n        )\n\n\ndef get_current_window_start() -> datetime:\n    \"\"\"\n    Calculate the start of the current usage window.\n\n    Uses fixed windows aligned to Monday 00:00 UTC for predictability.\n    The window duration is configured via USAGE_LIMIT_WINDOW_SECONDS.\n    \"\"\"\n    now = datetime.now(timezone.utc)\n    # For weekly windows (default), align to Monday 00:00 UTC\n    if USAGE_LIMIT_WINDOW_SECONDS == 604800:  # 1 week\n        # Get the start of the current week (Monday)\n        days_since_monday = now.weekday()\n        window_start = now.replace(\n            hour=0, minute=0, second=0, microsecond=0\n        ) - __import__(\"datetime\").timedelta(days=days_since_monday)\n        return window_start\n\n    # For other window sizes, use epoch-aligned windows\n    epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)\n    seconds_since_epoch = int((now - epoch).total_seconds())\n    window_number = seconds_since_epoch // USAGE_LIMIT_WINDOW_SECONDS\n    window_start_seconds = window_number * USAGE_LIMIT_WINDOW_SECONDS\n    return epoch + __import__(\"datetime\").timedelta(seconds=window_start_seconds)\n\n\ndef get_or_create_tenant_usage(\n    db_session: Session,\n    window_start: datetime | None = None,\n) -> TenantUsage:\n    \"\"\"\n    Get or create the usage record for the current window.\n\n    Uses INSERT ... ON CONFLICT DO UPDATE to atomically create or get the record,\n    avoiding TOCTOU race conditions where two concurrent requests could both\n    attempt to insert a new record.\n    \"\"\"\n    if window_start is None:\n        window_start = get_current_window_start()\n\n    # Atomic upsert: insert if not exists, or update a field to itself if exists\n    # This ensures we always get back a valid row without race conditions\n    stmt = (\n        pg_insert(TenantUsage)\n        .values(\n            window_start=window_start,\n            llm_cost_cents=0.0,\n            chunks_indexed=0,\n            api_calls=0,\n            non_streaming_api_calls=0,\n        )\n        .on_conflict_do_update(\n            index_elements=[\"window_start\"],\n            # No-op update: just set a field to its current value\n            # This ensures the row is returned even on conflict\n            set_={\"llm_cost_cents\": TenantUsage.llm_cost_cents},\n        )\n        .returning(TenantUsage)\n    )\n\n    result = db_session.execute(stmt).scalar_one()\n    db_session.flush()\n\n    return result\n\n\ndef get_tenant_usage_stats(\n    db_session: Session,\n    window_start: datetime | None = None,\n) -> TenantUsageStats:\n    \"\"\"Get the current usage statistics for the tenant (read-only, no lock).\"\"\"\n    if window_start is None:\n        window_start = get_current_window_start()\n\n    usage = db_session.execute(\n        select(TenantUsage).where(TenantUsage.window_start == window_start)\n    ).scalar_one_or_none()\n\n    if usage is None:\n        # No usage recorded yet for this window\n        return TenantUsageStats(\n            window_start=window_start,\n            llm_cost_cents=0.0,\n            chunks_indexed=0,\n            api_calls=0,\n            non_streaming_api_calls=0,\n        )\n\n    return TenantUsageStats(\n        window_start=usage.window_start,\n        llm_cost_cents=usage.llm_cost_cents,\n        chunks_indexed=usage.chunks_indexed,\n        api_calls=usage.api_calls,\n        non_streaming_api_calls=usage.non_streaming_api_calls,\n    )\n\n\ndef increment_usage(\n    db_session: Session,\n    usage_type: UsageType,\n    amount: float | int,\n) -> None:\n    \"\"\"\n    Atomically increment a usage counter.\n\n    Uses row-level locking to prevent race conditions.\n    The caller should handle the transaction commit.\n    \"\"\"\n    usage = get_or_create_tenant_usage(db_session)\n\n    if usage_type == UsageType.LLM_COST:\n        usage.llm_cost_cents += float(amount)\n    elif usage_type == UsageType.CHUNKS_INDEXED:\n        usage.chunks_indexed += int(amount)\n    elif usage_type == UsageType.API_CALLS:\n        usage.api_calls += int(amount)\n    elif usage_type == UsageType.NON_STREAMING_API_CALLS:\n        usage.non_streaming_api_calls += int(amount)\n\n    db_session.flush()\n\n\ndef check_usage_limit(\n    db_session: Session,\n    usage_type: UsageType,\n    limit: float | int,\n    pending_amount: float | int = 0,\n) -> None:\n    \"\"\"\n    Check if the current usage plus pending amount would exceed the limit.\n\n    Args:\n        db_session: Database session\n        usage_type: Type of usage to check\n        limit: The maximum allowed usage\n        pending_amount: Amount about to be used (to check before committing)\n\n    Raises:\n        UsageLimitExceededError: If usage would exceed the limit\n    \"\"\"\n    stats = get_tenant_usage_stats(db_session)\n\n    current_value: float\n    if usage_type == UsageType.LLM_COST:\n        current_value = stats.llm_cost_cents\n    elif usage_type == UsageType.CHUNKS_INDEXED:\n        current_value = float(stats.chunks_indexed)\n    elif usage_type == UsageType.API_CALLS:\n        current_value = float(stats.api_calls)\n    elif usage_type == UsageType.NON_STREAMING_API_CALLS:\n        current_value = float(stats.non_streaming_api_calls)\n    else:\n        current_value = 0.0\n\n    if current_value + pending_amount > limit:\n        raise UsageLimitExceededError(\n            usage_type=usage_type,\n            current=current_value + pending_amount,\n            limit=float(limit),\n        )\n"
  },
  {
    "path": "backend/onyx/db/user_file.py",
    "content": "import datetime\nfrom uuid import UUID\n\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import joinedload\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Project__UserFile\nfrom onyx.db.models import UserFile\n\n\ndef fetch_chunk_counts_for_user_files(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> list[tuple[str, int]]:\n    \"\"\"\n    Return a list of (user_file_id, chunk_count) tuples.\n    If a user_file_id is not found in the database, it will be returned with a chunk_count of 0.\n    \"\"\"\n    stmt = select(UserFile.id, UserFile.chunk_count).where(\n        UserFile.id.in_(user_file_ids)\n    )\n\n    results = db_session.execute(stmt).all()\n\n    # Create a dictionary of user_file_id to chunk_count\n    chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}\n\n    # Return a list of tuples, preserving `None` for documents not found or with\n    # an unknown chunk count. Callers should handle the `None` case and fall\n    # back to an existence check against the vector DB if necessary.\n    return [\n        (user_file_id, chunk_counts.get(user_file_id, 0))\n        for user_file_id in user_file_ids\n    ]\n\n\ndef calculate_user_files_token_count(file_ids: list[UUID], db_session: Session) -> int:\n    \"\"\"Calculate total token count for specified files\"\"\"\n    total_tokens = 0\n\n    # Get tokens from individual files\n    if file_ids:\n        file_tokens = (\n            db_session.query(func.sum(UserFile.token_count))\n            .filter(UserFile.id.in_(file_ids))\n            .scalar()\n            or 0\n        )\n        total_tokens += file_tokens\n\n    return total_tokens\n\n\ndef fetch_user_project_ids_for_user_files(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> dict[str, list[int]]:\n    \"\"\"Fetch user project ids for specified user files\"\"\"\n    user_file_uuid_ids = [UUID(user_file_id) for user_file_id in user_file_ids]\n    stmt = select(Project__UserFile.user_file_id, Project__UserFile.project_id).where(\n        Project__UserFile.user_file_id.in_(user_file_uuid_ids)\n    )\n    rows = db_session.execute(stmt).all()\n\n    user_file_id_to_project_ids: dict[str, list[int]] = {\n        user_file_id: [] for user_file_id in user_file_ids\n    }\n    for user_file_id, project_id in rows:\n        user_file_id_to_project_ids[str(user_file_id)].append(project_id)\n\n    return user_file_id_to_project_ids\n\n\ndef fetch_persona_ids_for_user_files(\n    user_file_ids: list[str],\n    db_session: Session,\n) -> dict[str, list[int]]:\n    \"\"\"Fetch persona (assistant) ids for specified user files.\"\"\"\n    stmt = (\n        select(UserFile)\n        .where(UserFile.id.in_(user_file_ids))\n        .options(selectinload(UserFile.assistants))\n    )\n    results = db_session.execute(stmt).scalars().all()\n    return {\n        str(user_file.id): [persona.id for persona in user_file.assistants]\n        for user_file in results\n    }\n\n\ndef update_last_accessed_at_for_user_files(\n    user_file_ids: list[UUID],\n    db_session: Session,\n) -> None:\n    \"\"\"Update `last_accessed_at` to now (UTC) for the given user files.\"\"\"\n    if not user_file_ids:\n        return\n    now = datetime.datetime.now(datetime.timezone.utc)\n    (\n        db_session.query(UserFile)\n        .filter(UserFile.id.in_(user_file_ids))\n        .update({UserFile.last_accessed_at: now}, synchronize_session=False)\n    )\n    db_session.commit()\n\n\ndef get_file_id_by_user_file_id(user_file_id: str, db_session: Session) -> str | None:\n    user_file = db_session.query(UserFile).filter(UserFile.id == user_file_id).first()\n    if user_file:\n        return user_file.file_id\n    return None\n\n\ndef get_file_ids_by_user_file_ids(\n    user_file_ids: list[UUID], db_session: Session\n) -> list[str]:\n    user_files = db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()\n    return [user_file.file_id for user_file in user_files]\n\n\ndef fetch_user_files_with_access_relationships(\n    user_file_ids: list[str],\n    db_session: Session,\n    eager_load_groups: bool = False,\n) -> list[UserFile]:\n    \"\"\"Fetch user files with the owner and assistant relationships\n    eagerly loaded (needed for computing access control).\n\n    When eager_load_groups is True, Persona.groups is also loaded so that\n    callers can extract user-group names without a second DB round-trip.\"\"\"\n    persona_sub_options = [\n        selectinload(Persona.users),\n        selectinload(Persona.user),\n    ]\n    if eager_load_groups:\n        persona_sub_options.append(selectinload(Persona.groups))\n\n    return (\n        db_session.query(UserFile)\n        .options(\n            joinedload(UserFile.user),\n            selectinload(UserFile.assistants).options(*persona_sub_options),\n        )\n        .filter(UserFile.id.in_(user_file_ids))\n        .all()\n    )\n"
  },
  {
    "path": "backend/onyx/db/user_preferences.py",
    "content": "from collections.abc import Sequence\nfrom uuid import UUID\n\nfrom sqlalchemy import Column\nfrom sqlalchemy import delete\nfrom sqlalchemy import desc\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import DefaultAppMode\nfrom onyx.db.enums import ThemePreference\nfrom onyx.db.models import AccessToken\nfrom onyx.db.models import Assistant__UserSpecificConfig\nfrom onyx.db.models import Memory\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.permissions import recompute_user_permissions__no_commit\nfrom onyx.db.users import assign_user_to_default_groups__no_commit\nfrom onyx.server.manage.models import MemoryItem\nfrom onyx.server.manage.models import UserSpecificAssistantPreference\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n_ROLE_TO_ACCOUNT_TYPE: dict[UserRole, AccountType] = {\n    UserRole.SLACK_USER: AccountType.BOT,\n    UserRole.EXT_PERM_USER: AccountType.EXT_PERM_USER,\n}\n\n\ndef update_user_role(\n    user: User,\n    new_role: UserRole,\n    db_session: Session,\n) -> None:\n    \"\"\"Update a user's role in the database.\n    Dual-writes account_type to keep it in sync with role and\n    reconciles default-group membership (Admin / Basic).\"\"\"\n    old_role = user.role\n    user.role = new_role\n    # Note: setting account_type to BOT or EXT_PERM_USER causes\n    # assign_user_to_default_groups__no_commit to early-return, which is\n    # intentional — these account types should not be in default groups.\n    if new_role in _ROLE_TO_ACCOUNT_TYPE:\n        user.account_type = _ROLE_TO_ACCOUNT_TYPE[new_role]\n    elif user.account_type in (AccountType.BOT, AccountType.EXT_PERM_USER):\n        # Upgrading from a non-web-login account type to a web role\n        user.account_type = AccountType.STANDARD\n\n    # Reconcile default-group membership when the role changes.\n    if old_role != new_role:\n        # Remove from all default groups first.\n        db_session.execute(\n            delete(User__UserGroup).where(\n                User__UserGroup.user_id == user.id,\n                User__UserGroup.user_group_id.in_(\n                    select(UserGroup.id).where(UserGroup.is_default.is_(True))\n                ),\n            )\n        )\n\n        # Re-assign to the correct default group (skip for LIMITED).\n        if new_role != UserRole.LIMITED:\n            assign_user_to_default_groups__no_commit(\n                db_session,\n                user,\n                is_admin=(new_role == UserRole.ADMIN),\n            )\n\n        recompute_user_permissions__no_commit(user.id, db_session)\n\n    db_session.commit()\n\n\ndef deactivate_user(\n    user: User,\n    db_session: Session,\n) -> None:\n    \"\"\"Deactivate a user by setting is_active to False.\"\"\"\n    user.is_active = False\n    db_session.add(user)\n    db_session.commit()\n\n\ndef activate_user(\n    user: User,\n    db_session: Session,\n) -> None:\n    \"\"\"Activate a user by setting is_active to True.\n\n    Also reconciles default-group membership — the user may have been\n    created while inactive or deactivated before the backfill migration.\n    \"\"\"\n    user.is_active = True\n    if user.role != UserRole.LIMITED:\n        assign_user_to_default_groups__no_commit(\n            db_session, user, is_admin=(user.role == UserRole.ADMIN)\n        )\n    db_session.add(user)\n    db_session.commit()\n\n\ndef get_latest_access_token_for_user(\n    user_id: UUID,\n    db_session: Session,\n) -> AccessToken | None:\n    \"\"\"Get the most recent access token for a user.\"\"\"\n    try:\n        result = db_session.execute(\n            select(AccessToken)\n            .where(AccessToken.user_id == user_id)  # type: ignore\n            .order_by(desc(Column(\"created_at\")))\n            .limit(1)\n        )\n        return result.scalar_one_or_none()\n    except Exception as e:\n        logger.error(f\"Error fetching AccessToken: {e}\")\n        return None\n\n\ndef update_user_temperature_override_enabled(\n    user_id: UUID,\n    temperature_override_enabled: bool,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's temperature override enabled setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(temperature_override_enabled=temperature_override_enabled)\n    )\n    db_session.commit()\n\n\ndef update_user_shortcut_enabled(\n    user_id: UUID,\n    shortcut_enabled: bool,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's shortcut enabled setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(shortcut_enabled=shortcut_enabled)\n    )\n    db_session.commit()\n\n\ndef update_user_auto_scroll(\n    user_id: UUID,\n    auto_scroll: bool | None,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's auto scroll setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(auto_scroll=auto_scroll)\n    )\n    db_session.commit()\n\n\ndef update_user_default_model(\n    user_id: UUID,\n    default_model: str | None,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's default model setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(default_model=default_model)\n    )\n    db_session.commit()\n\n\ndef update_user_theme_preference(\n    user_id: UUID,\n    theme_preference: ThemePreference,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's theme preference setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(theme_preference=theme_preference)\n    )\n    db_session.commit()\n\n\ndef update_user_chat_background(\n    user_id: UUID,\n    chat_background: str | None,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's chat background setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(chat_background=chat_background)\n    )\n    db_session.commit()\n\n\ndef update_user_default_app_mode(\n    user_id: UUID,\n    default_app_mode: DefaultAppMode,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's default app mode setting.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(default_app_mode=default_app_mode)\n    )\n    db_session.commit()\n\n\ndef update_user_personalization(\n    user_id: UUID,\n    *,\n    personal_name: str | None,\n    personal_role: str | None,\n    use_memories: bool,\n    enable_memory_tool: bool,\n    memories: list[MemoryItem],\n    user_preferences: str | None,\n    db_session: Session,\n) -> None:\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(\n            personal_name=personal_name,\n            personal_role=personal_role,\n            use_memories=use_memories,\n            enable_memory_tool=enable_memory_tool,\n            user_preferences=user_preferences,\n        )\n    )\n\n    # ID-based upsert: use real DB IDs from the frontend to match memories.\n    incoming_ids = {m.id for m in memories if m.id is not None}\n\n    # Delete existing rows not in the incoming set (scoped to user_id)\n    existing_memories = list(\n        db_session.scalars(select(Memory).where(Memory.user_id == user_id)).all()\n    )\n    existing_ids = {mem.id for mem in existing_memories}\n    ids_to_delete = existing_ids - incoming_ids\n    if ids_to_delete:\n        db_session.execute(\n            delete(Memory).where(\n                Memory.id.in_(ids_to_delete),\n                Memory.user_id == user_id,\n            )\n        )\n\n    # Update existing rows whose IDs match\n    existing_by_id = {mem.id: mem for mem in existing_memories}\n    for item in memories:\n        if item.id is not None and item.id in existing_by_id:\n            existing_by_id[item.id].memory_text = item.content\n\n    # Create new rows for items without an ID\n    new_items = [m for m in memories if m.id is None]\n    if new_items:\n        db_session.add_all(\n            [Memory(user_id=user_id, memory_text=item.content) for item in new_items]\n        )\n\n    db_session.commit()\n\n\ndef get_memories_for_user(\n    user_id: UUID,\n    db_session: Session,\n) -> Sequence[Memory]:\n    return db_session.scalars(\n        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.desc())\n    ).all()\n\n\ndef update_user_pinned_assistants(\n    user_id: UUID,\n    pinned_assistants: list[int],\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's pinned assistants list.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(pinned_assistants=pinned_assistants)\n    )\n    db_session.commit()\n\n\ndef update_user_assistant_visibility(\n    user_id: UUID,\n    hidden_assistants: list[int] | None,\n    visible_assistants: list[int] | None,\n    chosen_assistants: list[int] | None,\n    db_session: Session,\n) -> None:\n    \"\"\"Update user's assistant visibility settings.\"\"\"\n    db_session.execute(\n        update(User)\n        .where(User.id == user_id)  # type: ignore\n        .values(\n            hidden_assistants=hidden_assistants,\n            visible_assistants=visible_assistants,\n            chosen_assistants=chosen_assistants,\n        )\n    )\n    db_session.commit()\n\n\ndef get_all_user_assistant_specific_configs(\n    user_id: UUID,\n    db_session: Session,\n) -> Sequence[Assistant__UserSpecificConfig]:\n    \"\"\"Get the full user assistant specific config for a specific assistant and user.\"\"\"\n    return db_session.scalars(\n        select(Assistant__UserSpecificConfig).where(\n            Assistant__UserSpecificConfig.user_id == user_id\n        )\n    ).all()\n\n\ndef update_assistant_preferences(\n    assistant_id: int,\n    user_id: UUID,\n    new_assistant_preference: UserSpecificAssistantPreference,\n    db_session: Session,\n) -> None:\n    \"\"\"Update the disabled tools for a specific assistant for a specific user.\"\"\"\n    # First check if a config already exists\n    result = db_session.execute(\n        select(Assistant__UserSpecificConfig)\n        .where(Assistant__UserSpecificConfig.assistant_id == assistant_id)\n        .where(Assistant__UserSpecificConfig.user_id == user_id)\n    )\n    config = result.scalar_one_or_none()\n\n    if config:\n        # Update existing config\n        config.disabled_tool_ids = new_assistant_preference.disabled_tool_ids\n    else:\n        # Create new config\n        config = Assistant__UserSpecificConfig(\n            assistant_id=assistant_id,\n            user_id=user_id,\n            disabled_tool_ids=new_assistant_preference.disabled_tool_ids,\n        )\n        db_session.add(config)\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/db/users.py",
    "content": "from collections.abc import Sequence\nfrom typing import Any\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy import case\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.sql import expression\nfrom sqlalchemy.sql.elements import ColumnElement\nfrom sqlalchemy.sql.elements import KeyedColumnElement\nfrom sqlalchemy.sql.expression import or_\n\nfrom onyx.auth.invited_users import remove_user_from_invited_users\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN\nfrom onyx.configs.constants import NO_AUTH_PLACEHOLDER_USER_EMAIL\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import DocumentSet__User\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__User\nfrom onyx.db.models import SamlAccount\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\nlogger = setup_logger()\n\n\ndef validate_user_role_update(\n    requested_role: UserRole,\n    current_role: UserRole,\n    current_account_type: AccountType,\n    explicit_override: bool = False,\n) -> None:\n    \"\"\"\n    Validate that a user role update is valid.\n    Assumed only admins can hit this endpoint.\n    raise if:\n    - requested role is a curator\n    - requested role is a slack user\n    - requested role is an external permissioned user\n    - requested role is a limited user\n    - current account type is BOT (slack user)\n    - current account type is EXT_PERM_USER\n    - current role is a limited user\n    \"\"\"\n\n    if current_account_type == AccountType.BOT:\n        raise HTTPException(\n            status_code=400,\n            detail=\"To change a Slack User's role, they must first login to Onyx via the web app.\",\n        )\n\n    if current_account_type == AccountType.EXT_PERM_USER:\n        raise HTTPException(\n            status_code=400,\n            detail=\"To change an External Permissioned User's role, they must first login to Onyx via the web app.\",\n        )\n\n    if current_role == UserRole.LIMITED:\n        raise HTTPException(\n            status_code=400,\n            detail=\"To change a Limited User's role, they must first login to Onyx via the web app.\",\n        )\n\n    if explicit_override:\n        return\n\n    if requested_role == UserRole.CURATOR:\n        # This shouldn't happen, but just in case\n        raise HTTPException(\n            status_code=400,\n            detail=\"Curator role must be set via the User Group Menu\",\n        )\n\n    if requested_role == UserRole.LIMITED:\n        # This shouldn't happen, but just in case\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"A user cannot be set to a Limited User role. \"\n                \"This role is automatically assigned to users through certain endpoints in the API.\"\n            ),\n        )\n\n    if requested_role == UserRole.SLACK_USER:\n        # This shouldn't happen, but just in case\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"A user cannot be set to a Slack User role. \"\n                \"This role is automatically assigned to users who only use Onyx via Slack.\"\n            ),\n        )\n\n    if requested_role == UserRole.EXT_PERM_USER:\n        # This shouldn't happen, but just in case\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"A user cannot be set to an External Permissioned User role. \"\n                \"This role is automatically assigned to users who have been \"\n                \"pulled in to the system via an external permissions system.\"\n            ),\n        )\n\n\ndef get_all_users(\n    db_session: Session,\n    email_filter_string: str | None = None,\n    include_external: bool = False,\n) -> Sequence[User]:\n    \"\"\"List all users. No pagination as of now, as the # of users\n    is assumed to be relatively small (<< 1 million)\"\"\"\n    stmt = select(User)\n\n    # Exclude system users (anonymous user, no-auth placeholder)\n    stmt = stmt.where(User.email != ANONYMOUS_USER_EMAIL)  # type: ignore\n    stmt = stmt.where(User.email != NO_AUTH_PLACEHOLDER_USER_EMAIL)  # type: ignore\n\n    if not include_external:\n        stmt = stmt.where(User.role != UserRole.EXT_PERM_USER)\n\n    if email_filter_string is not None:\n        stmt = stmt.where(User.email.ilike(f\"%{email_filter_string}%\"))  # type: ignore\n\n    return db_session.scalars(stmt).unique().all()\n\n\ndef _get_accepted_user_where_clause(\n    email_filter_string: str | None = None,\n    roles_filter: list[UserRole] = [],\n    include_external: bool = False,\n    is_active_filter: bool | None = None,\n) -> list[ColumnElement[bool]]:\n    \"\"\"\n    Generates a SQLAlchemy where clause for filtering users based on the provided parameters.\n    This is used to build the filters for the function that retrieves the users for the users table in the admin panel.\n\n    Parameters:\n    - email_filter_string: A substring to filter user emails. Only users whose emails contain this substring will be included.\n    - is_active_filter: When True, only active users will be included. When False, only inactive users will be included.\n    - roles_filter: A list of user roles to filter by. Only users with roles in this list will be included.\n    - include_external: If False, external permissioned users will be excluded.\n\n    Returns:\n    - list: A list of conditions to be used in a SQLAlchemy query to filter users.\n    \"\"\"\n\n    # Access table columns directly via __table__.c to get proper SQLAlchemy column types\n    # This ensures type checking works correctly for SQL operations like ilike, endswith, and is_\n    email_col: KeyedColumnElement[Any] = User.__table__.c.email\n    is_active_col: KeyedColumnElement[Any] = User.__table__.c.is_active\n\n    where_clause: list[ColumnElement[bool]] = [\n        expression.not_(email_col.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN)),\n        # Exclude system users (anonymous user, no-auth placeholder)\n        email_col != ANONYMOUS_USER_EMAIL,\n        email_col != NO_AUTH_PLACEHOLDER_USER_EMAIL,\n    ]\n\n    if not include_external:\n        where_clause.append(User.role != UserRole.EXT_PERM_USER)\n\n    if email_filter_string is not None:\n        personal_name_col: KeyedColumnElement[Any] = User.__table__.c.personal_name\n        where_clause.append(\n            or_(\n                email_col.ilike(f\"%{email_filter_string}%\"),\n                personal_name_col.ilike(f\"%{email_filter_string}%\"),\n            )\n        )\n\n    if roles_filter:\n        where_clause.append(User.role.in_(roles_filter))\n\n    if is_active_filter is not None:\n        where_clause.append(is_active_col.is_(is_active_filter))\n\n    return where_clause\n\n\ndef get_all_accepted_users(\n    db_session: Session,\n    include_external: bool = False,\n) -> Sequence[User]:\n    \"\"\"Returns all accepted users without pagination.\n    Uses the same filtering as the paginated endpoint but without\n    search, role, or active filters.\"\"\"\n    stmt = select(User)\n    where_clause = _get_accepted_user_where_clause(\n        include_external=include_external,\n    )\n    stmt = stmt.where(*where_clause).order_by(User.email)\n    return db_session.scalars(stmt).unique().all()\n\n\ndef get_page_of_filtered_users(\n    db_session: Session,\n    page_size: int,\n    page_num: int,\n    email_filter_string: str | None = None,\n    is_active_filter: bool | None = None,\n    roles_filter: list[UserRole] = [],\n    include_external: bool = False,\n) -> Sequence[User]:\n    users_stmt = select(User)\n\n    where_clause = _get_accepted_user_where_clause(\n        email_filter_string=email_filter_string,\n        roles_filter=roles_filter,\n        include_external=include_external,\n        is_active_filter=is_active_filter,\n    )\n    # Apply pagination\n    users_stmt = users_stmt.offset((page_num) * page_size).limit(page_size)\n    # Apply filtering\n    users_stmt = users_stmt.where(*where_clause)\n\n    return db_session.scalars(users_stmt).unique().all()\n\n\ndef get_total_filtered_users_count(\n    db_session: Session,\n    email_filter_string: str | None = None,\n    is_active_filter: bool | None = None,\n    roles_filter: list[UserRole] = [],\n    include_external: bool = False,\n) -> int:\n    where_clause = _get_accepted_user_where_clause(\n        email_filter_string=email_filter_string,\n        roles_filter=roles_filter,\n        include_external=include_external,\n        is_active_filter=is_active_filter,\n    )\n    total_count_stmt = select(func.count()).select_from(User)\n    # Apply filtering\n    total_count_stmt = total_count_stmt.where(*where_clause)\n\n    return db_session.scalar(total_count_stmt) or 0\n\n\ndef get_user_counts_by_role_and_status(\n    db_session: Session,\n) -> dict[str, dict[str, int]]:\n    \"\"\"Returns user counts grouped by role and by active/inactive status.\n\n    Excludes API key users, anonymous users, and no-auth placeholder users.\n    Uses a single query with conditional aggregation.\n    \"\"\"\n    base_where = _get_accepted_user_where_clause()\n    role_col = User.__table__.c.role\n    is_active_col = User.__table__.c.is_active\n\n    stmt = (\n        select(\n            role_col,\n            func.count().label(\"total\"),\n            func.sum(case((is_active_col.is_(True), 1), else_=0)).label(\"active\"),\n            func.sum(case((is_active_col.is_(False), 1), else_=0)).label(\"inactive\"),\n        )\n        .where(*base_where)\n        .group_by(role_col)\n    )\n\n    role_counts: dict[str, int] = {}\n    status_counts: dict[str, int] = {\"active\": 0, \"inactive\": 0}\n\n    for role_val, total, active, inactive in db_session.execute(stmt).all():\n        key = role_val.value if hasattr(role_val, \"value\") else str(role_val)\n        role_counts[key] = total\n        status_counts[\"active\"] += active or 0\n        status_counts[\"inactive\"] += inactive or 0\n\n    return {\"role_counts\": role_counts, \"status_counts\": status_counts}\n\n\ndef get_user_by_email(email: str, db_session: Session) -> User | None:\n    user = (\n        db_session.query(User)\n        .filter(func.lower(User.email) == func.lower(email))\n        .first()\n    )\n    return user\n\n\ndef fetch_user_by_id(db_session: Session, user_id: UUID) -> User | None:\n    return db_session.query(User).filter(User.id == user_id).first()  # type: ignore\n\n\ndef _generate_slack_user(email: str) -> User:\n    fastapi_users_pw_helper = PasswordHelper()\n    password = fastapi_users_pw_helper.generate()\n    hashed_pass = fastapi_users_pw_helper.hash(password)\n    return User(\n        email=email,\n        hashed_password=hashed_pass,\n        role=UserRole.SLACK_USER,\n        account_type=AccountType.BOT,\n    )\n\n\ndef add_slack_user_if_not_exists(db_session: Session, email: str) -> User:\n    email = email.lower()\n    user = get_user_by_email(email, db_session)\n    if user is not None:\n        # If the user is an external permissioned user, we update it to a slack user\n        if user.account_type == AccountType.EXT_PERM_USER:\n            user.role = UserRole.SLACK_USER\n            user.account_type = AccountType.BOT\n            db_session.commit()\n        return user\n\n    user = _generate_slack_user(email=email)\n    db_session.add(user)\n    db_session.commit()\n    return user\n\n\ndef _get_users_by_emails(\n    db_session: Session, lower_emails: list[str]\n) -> tuple[list[User], list[str]]:\n    \"\"\"given a list of lowercase emails,\n    returns a list[User] of Users whose emails match and a list[str]\n    the missing emails that had no User\"\"\"\n    stmt = select(User).filter(func.lower(User.email).in_(lower_emails))\n    found_users = list(db_session.scalars(stmt).unique().all())  # Convert to list\n\n    # Extract found emails and convert to lowercase to avoid case sensitivity issues\n    found_users_emails = [user.email.lower() for user in found_users]\n\n    # Separate emails for users that were not found\n    missing_user_emails = [\n        email for email in lower_emails if email not in found_users_emails\n    ]\n    return found_users, missing_user_emails\n\n\ndef _generate_ext_permissioned_user(email: str) -> User:\n    fastapi_users_pw_helper = PasswordHelper()\n    password = fastapi_users_pw_helper.generate()\n    hashed_pass = fastapi_users_pw_helper.hash(password)\n    return User(\n        email=email,\n        hashed_password=hashed_pass,\n        role=UserRole.EXT_PERM_USER,\n        account_type=AccountType.EXT_PERM_USER,\n    )\n\n\ndef batch_add_ext_perm_user_if_not_exists(\n    db_session: Session, emails: list[str], continue_on_error: bool = False\n) -> list[User]:\n    lower_emails = [email.lower() for email in emails]\n    found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)\n\n    # Use savepoints (begin_nested) so that a failed insert only rolls back\n    # that single user, not the entire transaction. A plain rollback() would\n    # discard all previously flushed users in the same transaction.\n    # We also avoid add_all() because SQLAlchemy 2.0's insertmanyvalues\n    # batch path hits a UUID sentinel mismatch with server_default columns.\n    for email in missing_lower_emails:\n        user = _generate_ext_permissioned_user(email=email)\n        savepoint = db_session.begin_nested()\n        try:\n            db_session.add(user)\n            savepoint.commit()\n        except IntegrityError:\n            savepoint.rollback()\n            if not continue_on_error:\n                raise\n\n    db_session.commit()\n    # Fetch all users again to ensure we have the most up-to-date list\n    all_users, _ = _get_users_by_emails(db_session, lower_emails)\n    return all_users\n\n\ndef assign_user_to_default_groups__no_commit(\n    db_session: Session,\n    user: User,\n    is_admin: bool = False,\n) -> None:\n    \"\"\"Assign a newly created user to the appropriate default group.\n\n    Does NOT commit — callers must commit the session themselves so that\n    group assignment can be part of the same transaction as user creation.\n\n    Args:\n        is_admin: If True, assign to Admin default group; otherwise Basic.\n            Callers determine this from their own context (e.g. user_count,\n            admin email list, explicit choice). Defaults to False (Basic).\n    \"\"\"\n    if user.account_type in (\n        AccountType.BOT,\n        AccountType.EXT_PERM_USER,\n        AccountType.ANONYMOUS,\n    ):\n        return\n\n    target_group_name = \"Admin\" if is_admin else \"Basic\"\n\n    default_group = (\n        db_session.query(UserGroup)\n        .filter(\n            UserGroup.name == target_group_name,\n            UserGroup.is_default.is_(True),\n        )\n        .first()\n    )\n\n    if default_group is None:\n        raise RuntimeError(\n            f\"Default group '{target_group_name}' not found. \"\n            f\"Cannot assign user {user.email} to a group. \"\n            f\"Ensure the seed_default_groups migration has run.\"\n        )\n\n    # Check if the user is already in the group\n    existing = (\n        db_session.query(User__UserGroup)\n        .filter(\n            User__UserGroup.user_id == user.id,\n            User__UserGroup.user_group_id == default_group.id,\n        )\n        .first()\n    )\n    if existing is not None:\n        return\n\n    savepoint = db_session.begin_nested()\n    try:\n        db_session.add(\n            User__UserGroup(\n                user_id=user.id,\n                user_group_id=default_group.id,\n            )\n        )\n        db_session.flush()\n    except IntegrityError:\n        # Race condition: another transaction inserted this membership\n        # between our SELECT and INSERT. The savepoint isolates the failure\n        # so the outer transaction (user creation) stays intact.\n        savepoint.rollback()\n        return\n\n    from onyx.db.permissions import recompute_user_permissions__no_commit\n\n    recompute_user_permissions__no_commit(user.id, db_session)\n\n    logger.info(f\"Assigned user {user.email} to default group '{default_group.name}'\")\n\n\ndef delete_user_from_db(\n    user_to_delete: User,\n    db_session: Session,\n) -> None:\n    for oauth_account in user_to_delete.oauth_accounts:\n        db_session.delete(oauth_account)\n\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.external_perm\",\n        \"delete_user__ext_group_for_user__no_commit\",\n    )(\n        db_session=db_session,\n        user_id=user_to_delete.id,\n    )\n    db_session.query(SamlAccount).filter(\n        SamlAccount.user_id == user_to_delete.id\n    ).delete()\n    # Null out ownership on document sets and personas so they're\n    # preserved for other users instead of being cascade-deleted\n    db_session.query(DocumentSet).filter(\n        DocumentSet.user_id == user_to_delete.id\n    ).update({DocumentSet.user_id: None})\n    db_session.query(Persona).filter(Persona.user_id == user_to_delete.id).update(\n        {Persona.user_id: None}\n    )\n\n    db_session.query(DocumentSet__User).filter(\n        DocumentSet__User.user_id == user_to_delete.id\n    ).delete()\n    db_session.query(Persona__User).filter(\n        Persona__User.user_id == user_to_delete.id\n    ).delete()\n    db_session.query(User__UserGroup).filter(\n        User__UserGroup.user_id == user_to_delete.id\n    ).delete()\n    db_session.delete(user_to_delete)\n    db_session.commit()\n\n    # NOTE: edge case may exist with race conditions\n    # with this `invited user` scheme generally.\n    remove_user_from_invited_users(user_to_delete.email)\n\n\ndef batch_get_user_groups(\n    db_session: Session,\n    user_ids: list[UUID],\n    include_default: bool = False,\n) -> dict[UUID, list[tuple[int, str]]]:\n    \"\"\"Fetch group memberships for a batch of users in a single query.\n    Returns a mapping of user_id -> list of (group_id, group_name) tuples.\"\"\"\n    if not user_ids:\n        return {}\n\n    stmt = (\n        select(\n            User__UserGroup.user_id,\n            UserGroup.id,\n            UserGroup.name,\n        )\n        .join(UserGroup, UserGroup.id == User__UserGroup.user_group_id)\n        .where(User__UserGroup.user_id.in_(user_ids))\n    )\n    if not include_default:\n        stmt = stmt.where(UserGroup.is_default == False)  # noqa: E712\n\n    rows = db_session.execute(stmt).all()\n\n    result: dict[UUID, list[tuple[int, str]]] = {uid: [] for uid in user_ids}\n    for user_id, group_id, group_name in rows:\n        result[user_id].append((group_id, group_name))\n    return result\n"
  },
  {
    "path": "backend/onyx/db/utils.py",
    "content": "from enum import Enum\nfrom typing import Any\n\nfrom psycopg2 import errorcodes\nfrom psycopg2 import OperationalError\nfrom pydantic import BaseModel\nfrom sqlalchemy import inspect\n\nfrom onyx.db.models import Base\n\n\ndef model_to_dict(model: Base) -> dict[str, Any]:\n    return {c.key: getattr(model, c.key) for c in inspect(model).mapper.column_attrs}  # type: ignore\n\n\nRETRYABLE_PG_CODES = {\n    errorcodes.SERIALIZATION_FAILURE,  # '40001'\n    errorcodes.DEADLOCK_DETECTED,  # '40P01'\n    errorcodes.CONNECTION_EXCEPTION,  # '08000'\n    errorcodes.CONNECTION_DOES_NOT_EXIST,  # '08003'\n    errorcodes.CONNECTION_FAILURE,  # '08006'\n    errorcodes.TRANSACTION_ROLLBACK,  # '40000'\n}\n\n\ndef is_retryable_sqlalchemy_error(exc: BaseException) -> bool:\n    \"\"\"Helper function for use with tenacity's retry_if_exception as the callback\"\"\"\n    if isinstance(exc, OperationalError):\n        pgcode = getattr(getattr(exc, \"orig\", None), \"pgcode\", None)\n        return pgcode in RETRYABLE_PG_CODES\n    return False\n\n\nclass DocumentRow(BaseModel):\n    id: str\n    doc_metadata: dict[str, Any]\n    external_user_group_ids: list[str]\n\n\nclass SortOrder(str, Enum):\n    ASC = \"asc\"\n    DESC = \"desc\"\n\n\nclass DiscordChannelView(BaseModel):\n    channel_id: int\n    channel_name: str\n    channel_type: str = \"text\"  # text, forum\n    is_private: bool = False  # True if @everyone cannot view the channel\n"
  },
  {
    "path": "backend/onyx/db/voice.py",
    "content": "from typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import User\nfrom onyx.db.models import VoiceProvider\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\nMIN_VOICE_PLAYBACK_SPEED = 0.5\nMAX_VOICE_PLAYBACK_SPEED = 2.0\n\n\ndef fetch_voice_providers(db_session: Session) -> list[VoiceProvider]:\n    \"\"\"Fetch all voice providers.\"\"\"\n    return list(\n        db_session.scalars(select(VoiceProvider).order_by(VoiceProvider.name)).all()\n    )\n\n\ndef fetch_voice_provider_by_id(\n    db_session: Session, provider_id: int\n) -> VoiceProvider | None:\n    \"\"\"Fetch a voice provider by ID.\"\"\"\n    return db_session.scalar(\n        select(VoiceProvider).where(VoiceProvider.id == provider_id)\n    )\n\n\ndef fetch_default_stt_provider(db_session: Session) -> VoiceProvider | None:\n    \"\"\"Fetch the default STT provider.\"\"\"\n    return db_session.scalar(\n        select(VoiceProvider).where(VoiceProvider.is_default_stt.is_(True))\n    )\n\n\ndef fetch_default_tts_provider(db_session: Session) -> VoiceProvider | None:\n    \"\"\"Fetch the default TTS provider.\"\"\"\n    return db_session.scalar(\n        select(VoiceProvider).where(VoiceProvider.is_default_tts.is_(True))\n    )\n\n\ndef fetch_voice_provider_by_type(\n    db_session: Session, provider_type: str\n) -> VoiceProvider | None:\n    \"\"\"Fetch a voice provider by type.\"\"\"\n    return db_session.scalar(\n        select(VoiceProvider).where(VoiceProvider.provider_type == provider_type)\n    )\n\n\ndef upsert_voice_provider(\n    *,\n    db_session: Session,\n    provider_id: int | None,\n    name: str,\n    provider_type: str,\n    api_key: str | None,\n    api_key_changed: bool,\n    api_base: str | None = None,\n    custom_config: dict[str, Any] | None = None,\n    stt_model: str | None = None,\n    tts_model: str | None = None,\n    default_voice: str | None = None,\n    activate_stt: bool = False,\n    activate_tts: bool = False,\n) -> VoiceProvider:\n    \"\"\"Create or update a voice provider.\"\"\"\n    provider: VoiceProvider | None = None\n\n    if provider_id is not None:\n        provider = fetch_voice_provider_by_id(db_session, provider_id)\n        if provider is None:\n            raise OnyxError(\n                OnyxErrorCode.NOT_FOUND,\n                f\"No voice provider with id {provider_id} exists.\",\n            )\n    else:\n        provider = VoiceProvider()\n        db_session.add(provider)\n\n    # Apply updates\n    provider.name = name\n    provider.provider_type = provider_type\n    provider.api_base = api_base\n    provider.custom_config = custom_config\n    provider.stt_model = stt_model\n    provider.tts_model = tts_model\n    provider.default_voice = default_voice\n\n    # Only update API key if explicitly changed or if provider has no key\n    if api_key_changed or provider.api_key is None:\n        provider.api_key = api_key  # type: ignore[assignment]\n\n    db_session.flush()\n\n    if activate_stt:\n        set_default_stt_provider(db_session=db_session, provider_id=provider.id)\n    if activate_tts:\n        set_default_tts_provider(db_session=db_session, provider_id=provider.id)\n\n    db_session.refresh(provider)\n    return provider\n\n\ndef delete_voice_provider(db_session: Session, provider_id: int) -> None:\n    \"\"\"Delete a voice provider by ID.\"\"\"\n    provider = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider:\n        db_session.delete(provider)\n        db_session.flush()\n\n\ndef set_default_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:\n    \"\"\"Set a voice provider as the default STT provider.\"\"\"\n    provider = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider is None:\n        raise OnyxError(\n            OnyxErrorCode.NOT_FOUND,\n            f\"No voice provider with id {provider_id} exists.\",\n        )\n\n    # Deactivate all other STT providers\n    db_session.execute(\n        update(VoiceProvider)\n        .where(\n            VoiceProvider.is_default_stt.is_(True),\n            VoiceProvider.id != provider_id,\n        )\n        .values(is_default_stt=False)\n    )\n\n    # Activate this provider\n    provider.is_default_stt = True\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef set_default_tts_provider(\n    *, db_session: Session, provider_id: int, tts_model: str | None = None\n) -> VoiceProvider:\n    \"\"\"Set a voice provider as the default TTS provider.\"\"\"\n    provider = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider is None:\n        raise OnyxError(\n            OnyxErrorCode.NOT_FOUND,\n            f\"No voice provider with id {provider_id} exists.\",\n        )\n\n    # Deactivate all other TTS providers\n    db_session.execute(\n        update(VoiceProvider)\n        .where(\n            VoiceProvider.is_default_tts.is_(True),\n            VoiceProvider.id != provider_id,\n        )\n        .values(is_default_tts=False)\n    )\n\n    # Activate this provider\n    provider.is_default_tts = True\n\n    # Update the TTS model if specified\n    if tts_model is not None:\n        provider.tts_model = tts_model\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef deactivate_stt_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:\n    \"\"\"Remove the default STT status from a voice provider.\"\"\"\n    provider = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider is None:\n        raise OnyxError(\n            OnyxErrorCode.NOT_FOUND,\n            f\"No voice provider with id {provider_id} exists.\",\n        )\n\n    provider.is_default_stt = False\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef deactivate_tts_provider(*, db_session: Session, provider_id: int) -> VoiceProvider:\n    \"\"\"Remove the default TTS status from a voice provider.\"\"\"\n    provider = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider is None:\n        raise OnyxError(\n            OnyxErrorCode.NOT_FOUND,\n            f\"No voice provider with id {provider_id} exists.\",\n        )\n\n    provider.is_default_tts = False\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\n# User voice preferences\n\n\ndef update_user_voice_settings(\n    db_session: Session,\n    user_id: UUID,\n    auto_send: bool | None = None,\n    auto_playback: bool | None = None,\n    playback_speed: float | None = None,\n) -> None:\n    \"\"\"Update user's voice settings.\n\n    For all fields, None means \"don't update this field\".\n    \"\"\"\n    values: dict[str, bool | float] = {}\n\n    if auto_send is not None:\n        values[\"voice_auto_send\"] = auto_send\n    if auto_playback is not None:\n        values[\"voice_auto_playback\"] = auto_playback\n    if playback_speed is not None:\n        values[\"voice_playback_speed\"] = max(\n            MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, playback_speed)\n        )\n\n    if values:\n        db_session.execute(update(User).where(User.id == user_id).values(**values))  # type: ignore[arg-type]\n        db_session.flush()\n"
  },
  {
    "path": "backend/onyx/db/web_search.py",
    "content": "from __future__ import annotations\n\nfrom sqlalchemy import select\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import InternetContentProvider\nfrom onyx.db.models import InternetSearchProvider\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\n\ndef fetch_web_search_providers(db_session: Session) -> list[InternetSearchProvider]:\n    stmt = select(InternetSearchProvider).order_by(InternetSearchProvider.id.asc())\n    return list(db_session.scalars(stmt).all())\n\n\ndef fetch_web_content_providers(db_session: Session) -> list[InternetContentProvider]:\n    stmt = select(InternetContentProvider).order_by(InternetContentProvider.id.asc())\n    return list(db_session.scalars(stmt).all())\n\n\ndef fetch_active_web_search_provider(\n    db_session: Session,\n) -> InternetSearchProvider | None:\n    stmt = select(InternetSearchProvider).where(\n        InternetSearchProvider.is_active.is_(True)\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef fetch_web_search_provider_by_id(\n    provider_id: int, db_session: Session\n) -> InternetSearchProvider | None:\n    return db_session.get(InternetSearchProvider, provider_id)\n\n\ndef fetch_web_search_provider_by_name(\n    name: str, db_session: Session\n) -> InternetSearchProvider | None:\n    stmt = select(InternetSearchProvider).where(InternetSearchProvider.name.ilike(name))\n    return db_session.scalars(stmt).first()\n\n\ndef fetch_web_search_provider_by_type(\n    provider_type: WebSearchProviderType, db_session: Session\n) -> InternetSearchProvider | None:\n    stmt = select(InternetSearchProvider).where(\n        InternetSearchProvider.provider_type == provider_type.value\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef _ensure_unique_search_name(\n    name: str, provider_id: int | None, db_session: Session\n) -> None:\n    existing = fetch_web_search_provider_by_name(name=name, db_session=db_session)\n    if existing and existing.id != provider_id:\n        raise ValueError(f\"A web search provider named '{name}' already exists.\")\n\n\ndef _apply_search_provider_updates(\n    provider: InternetSearchProvider,\n    *,\n    name: str,\n    provider_type: WebSearchProviderType,\n    api_key: str | None,\n    api_key_changed: bool,\n    config: dict[str, str] | None,\n) -> None:\n    provider.name = name\n    provider.provider_type = provider_type.value\n    provider.config = config\n    if api_key_changed or provider.api_key is None:\n        # EncryptedString accepts str for writes, returns SensitiveValue for reads\n        provider.api_key = api_key  # type: ignore[assignment]\n\n\ndef upsert_web_search_provider(\n    *,\n    provider_id: int | None,\n    name: str,\n    provider_type: WebSearchProviderType,\n    api_key: str | None,\n    api_key_changed: bool,\n    config: dict[str, str] | None,\n    activate: bool,\n    db_session: Session,\n) -> InternetSearchProvider:\n    _ensure_unique_search_name(\n        name=name, provider_id=provider_id, db_session=db_session\n    )\n\n    provider: InternetSearchProvider | None = None\n    if provider_id is not None:\n        provider = fetch_web_search_provider_by_id(provider_id, db_session)\n        if provider is None:\n            raise ValueError(f\"No web search provider with id {provider_id} exists.\")\n    else:\n        provider = InternetSearchProvider()\n        db_session.add(provider)\n\n    _apply_search_provider_updates(\n        provider,\n        name=name,\n        provider_type=provider_type,\n        api_key=api_key,\n        api_key_changed=api_key_changed,\n        config=config,\n    )\n\n    db_session.flush()\n\n    if activate:\n        set_active_web_search_provider(provider_id=provider.id, db_session=db_session)\n\n    db_session.refresh(provider)\n    return provider\n\n\ndef set_active_web_search_provider(\n    *, provider_id: int | None, db_session: Session\n) -> InternetSearchProvider:\n    if provider_id is None:\n        raise ValueError(\"Cannot activate a provider without an id.\")\n\n    provider = fetch_web_search_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web search provider with id {provider_id} exists.\")\n\n    db_session.execute(\n        update(InternetSearchProvider)\n        .where(\n            InternetSearchProvider.is_active.is_(True),\n            InternetSearchProvider.id != provider_id,\n        )\n        .values(is_active=False)\n    )\n    provider.is_active = True\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef deactivate_web_search_provider(\n    *, provider_id: int | None, db_session: Session\n) -> InternetSearchProvider:\n    if provider_id is None:\n        raise ValueError(\"Cannot deactivate a provider without an id.\")\n\n    provider = fetch_web_search_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web search provider with id {provider_id} exists.\")\n\n    provider.is_active = False\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef delete_web_search_provider(provider_id: int, db_session: Session) -> None:\n    provider = fetch_web_search_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web search provider with id {provider_id} exists.\")\n\n    db_session.delete(provider)\n    db_session.flush()\n\n    db_session.commit()\n\n\n# Content provider helpers\n\n\ndef fetch_active_web_content_provider(\n    db_session: Session,\n) -> InternetContentProvider | None:\n    stmt = select(InternetContentProvider).where(\n        InternetContentProvider.is_active.is_(True)\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef fetch_web_content_provider_by_id(\n    provider_id: int, db_session: Session\n) -> InternetContentProvider | None:\n    return db_session.get(InternetContentProvider, provider_id)\n\n\ndef fetch_web_content_provider_by_name(\n    name: str, db_session: Session\n) -> InternetContentProvider | None:\n    stmt = select(InternetContentProvider).where(\n        InternetContentProvider.name.ilike(name)\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef fetch_web_content_provider_by_type(\n    provider_type: WebContentProviderType, db_session: Session\n) -> InternetContentProvider | None:\n    stmt = select(InternetContentProvider).where(\n        InternetContentProvider.provider_type == provider_type.value\n    )\n    return db_session.scalars(stmt).first()\n\n\ndef _ensure_unique_content_name(\n    name: str, provider_id: int | None, db_session: Session\n) -> None:\n    existing = fetch_web_content_provider_by_name(name=name, db_session=db_session)\n    if existing and existing.id != provider_id:\n        raise ValueError(f\"A web content provider named '{name}' already exists.\")\n\n\ndef _apply_content_provider_updates(\n    provider: InternetContentProvider,\n    *,\n    name: str,\n    provider_type: WebContentProviderType,\n    api_key: str | None,\n    api_key_changed: bool,\n    config: WebContentProviderConfig | None,\n) -> None:\n    provider.name = name\n    provider.provider_type = provider_type.value\n    provider.config = config\n    if api_key_changed or provider.api_key is None:\n        # EncryptedString accepts str for writes, returns SensitiveValue for reads\n        provider.api_key = api_key  # type: ignore[assignment]\n\n\ndef upsert_web_content_provider(\n    *,\n    provider_id: int | None,\n    name: str,\n    provider_type: WebContentProviderType,\n    api_key: str | None,\n    api_key_changed: bool,\n    config: WebContentProviderConfig | None,\n    activate: bool,\n    db_session: Session,\n) -> InternetContentProvider:\n    _ensure_unique_content_name(\n        name=name, provider_id=provider_id, db_session=db_session\n    )\n\n    provider: InternetContentProvider | None = None\n    if provider_id is not None:\n        provider = fetch_web_content_provider_by_id(provider_id, db_session)\n        if provider is None:\n            raise ValueError(f\"No web content provider with id {provider_id} exists.\")\n    else:\n        provider = InternetContentProvider()\n        db_session.add(provider)\n\n    _apply_content_provider_updates(\n        provider,\n        name=name,\n        provider_type=provider_type,\n        api_key=api_key,\n        api_key_changed=api_key_changed,\n        config=config,\n    )\n\n    db_session.flush()\n\n    if activate:\n        set_active_web_content_provider(provider_id=provider.id, db_session=db_session)\n\n    db_session.refresh(provider)\n    return provider\n\n\ndef set_active_web_content_provider(\n    *, provider_id: int | None, db_session: Session\n) -> InternetContentProvider:\n    if provider_id is None:\n        raise ValueError(\"Cannot activate a provider without an id.\")\n\n    provider = fetch_web_content_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web content provider with id {provider_id} exists.\")\n\n    db_session.execute(\n        update(InternetContentProvider)\n        .where(\n            InternetContentProvider.is_active.is_(True),\n            InternetContentProvider.id != provider_id,\n        )\n        .values(is_active=False)\n    )\n    provider.is_active = True\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef deactivate_web_content_provider(\n    *, provider_id: int | None, db_session: Session\n) -> InternetContentProvider:\n    if provider_id is None:\n        raise ValueError(\"Cannot deactivate a provider without an id.\")\n\n    provider = fetch_web_content_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web content provider with id {provider_id} exists.\")\n\n    provider.is_active = False\n\n    db_session.flush()\n    db_session.refresh(provider)\n    return provider\n\n\ndef delete_web_content_provider(provider_id: int, db_session: Session) -> None:\n    provider = fetch_web_content_provider_by_id(provider_id, db_session)\n    if provider is None:\n        raise ValueError(f\"No web content provider with id {provider_id} exists.\")\n\n    db_session.delete(provider)\n    db_session.flush()\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/deep_research/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/deep_research/dr_loop.py",
    "content": "# TODO: Notes for potential extensions and future improvements:\n# 1. Allow tools that aren't search specific tools\n# 2. Use user provided custom prompts\n# 3. Save the plan for replay\n\nimport time\nfrom collections.abc import Callable\nfrom typing import cast\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.chat.emitter import Emitter\nfrom onyx.chat.llm_loop import construct_message_history\nfrom onyx.chat.llm_step import run_llm_step\nfrom onyx.chat.llm_step import run_llm_step_pkt_generator\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import LlmStepResult\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.chat_configs import SKIP_DEEP_RESEARCH_CLARIFICATION\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.tools import get_tool_by_name\nfrom onyx.deep_research.dr_mock_tools import get_clarification_tool_definitions\nfrom onyx.deep_research.dr_mock_tools import get_orchestrator_tools\nfrom onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TOOL_NAME\nfrom onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_MESSAGE\nfrom onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_TOKEN_COUNT\nfrom onyx.deep_research.utils import check_special_tool_calls\nfrom onyx.deep_research.utils import create_think_tool_token_processor\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.models import ToolChoiceOptions\nfrom onyx.llm.utils import model_is_reasoning_model\nfrom onyx.prompts.deep_research.orchestration_layer import CLARIFICATION_PROMPT\nfrom onyx.prompts.deep_research.orchestration_layer import FINAL_REPORT_PROMPT\nfrom onyx.prompts.deep_research.orchestration_layer import FIRST_CYCLE_REMINDER\nfrom onyx.prompts.deep_research.orchestration_layer import FIRST_CYCLE_REMINDER_TOKENS\nfrom onyx.prompts.deep_research.orchestration_layer import (\n    INTERNAL_SEARCH_CLARIFICATION_GUIDANCE,\n)\nfrom onyx.prompts.deep_research.orchestration_layer import (\n    INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE,\n)\nfrom onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT\nfrom onyx.prompts.deep_research.orchestration_layer import ORCHESTRATOR_PROMPT_REASONING\nfrom onyx.prompts.deep_research.orchestration_layer import RESEARCH_PLAN_PROMPT\nfrom onyx.prompts.deep_research.orchestration_layer import RESEARCH_PLAN_REMINDER\nfrom onyx.prompts.deep_research.orchestration_layer import USER_FINAL_REPORT_QUERY\nfrom onyx.prompts.prompt_utils import get_current_llm_day_time\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import DeepResearchPlanDelta\nfrom onyx.server.query_and_chat.streaming_models import DeepResearchPlanStart\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.server.query_and_chat.streaming_models import TopLevelBranching\nfrom onyx.tools.fake_tools.research_agent import run_research_agent_calls\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ToolCallInfo\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.tracing.framework.create import function_span\nfrom onyx.tracing.framework.create import trace\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nMAX_USER_MESSAGES_FOR_CONTEXT = 5\nMAX_FINAL_REPORT_TOKENS = 20000\n\n# 30 minute timeout before forcing final report generation\n# NOTE: The overall execution may be much longer still because it could run a research cycle at minute 29\n# and that runs for another nearly 30 minutes.\nDEEP_RESEARCH_FORCE_REPORT_SECONDS = 30 * 60\n\n# Might be something like (this gives a lot of leeway for change but typically the models don't do this):\n# 0. Research topics 1-3\n# 1. Think\n# 2. Research topics 4-5\n# 3. Think\n# 4. Research topics 6 + something new or different from the plan\n# 5. Think\n# 6. Research, possibly something new or different from the plan\n# 7. Think\n# 8. Generate report\nMAX_ORCHESTRATOR_CYCLES = 8\n\n# Similar but without the 4 thinking tool calls\nMAX_ORCHESTRATOR_CYCLES_REASONING = 4\n\n\ndef generate_final_report(\n    history: list[ChatMessageSimple],\n    research_plan: str,\n    llm: LLM,\n    token_counter: Callable[[str], int],\n    state_container: ChatStateContainer,\n    emitter: Emitter,\n    turn_index: int,\n    citation_mapping: CitationMapping,\n    user_identity: LLMUserIdentity | None,\n    saved_reasoning: str | None = None,\n    pre_answer_processing_time: float | None = None,\n    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,\n) -> bool:\n    \"\"\"Generate the final research report.\n\n    Returns:\n        bool: True if reasoning occurred during report generation (turn_index was incremented),\n              False otherwise.\n    \"\"\"\n    with function_span(\"generate_report\") as span:\n        span.span_data.input = f\"history_length={len(history)}, turn_index={turn_index}\"\n        final_report_prompt = FINAL_REPORT_PROMPT.format(\n            current_datetime=get_current_llm_day_time(full_sentence=False),\n        )\n        system_prompt = ChatMessageSimple(\n            message=final_report_prompt,\n            token_count=token_counter(final_report_prompt),\n            message_type=MessageType.SYSTEM,\n        )\n        final_reminder = USER_FINAL_REPORT_QUERY.format(research_plan=research_plan)\n        reminder_message = ChatMessageSimple(\n            message=final_reminder,\n            token_count=token_counter(final_reminder),\n            message_type=MessageType.USER_REMINDER,\n        )\n        final_report_history = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=history,\n            reminder_message=reminder_message,\n            context_files=None,\n            available_tokens=llm.config.max_input_tokens,\n            all_injected_file_metadata=all_injected_file_metadata,\n        )\n\n        citation_processor = DynamicCitationProcessor()\n        citation_processor.update_citation_mapping(citation_mapping)\n\n        # Only passing in the cited documents as the whole list would be too long\n        final_documents = list(citation_processor.citation_to_doc.values())\n\n        llm_step_result, has_reasoned = run_llm_step(\n            emitter=emitter,\n            history=final_report_history,\n            tool_definitions=[],\n            tool_choice=ToolChoiceOptions.NONE,\n            llm=llm,\n            placement=Placement(turn_index=turn_index),\n            citation_processor=citation_processor,\n            state_container=state_container,\n            final_documents=final_documents,\n            user_identity=user_identity,\n            max_tokens=MAX_FINAL_REPORT_TOKENS,\n            is_deep_research=True,\n            pre_answer_processing_time=pre_answer_processing_time,\n            timeout_override=300,  # 5 minute read timeout for long report generation\n        )\n\n        # Save citation mapping to state_container so citations are persisted\n        state_container.set_citation_mapping(citation_processor.citation_to_doc)\n\n        final_report = llm_step_result.answer\n        if final_report is None:\n            raise ValueError(\"LLM failed to generate the final deep research report\")\n\n        if saved_reasoning:\n            # The reasoning we want to save with the message is more about calling this\n            # generate report and why it's done. Also some models don't have reasoning\n            # but we'd still want to capture the reasoning from the think_tool of theprevious turn.\n            state_container.set_reasoning_tokens(saved_reasoning)\n\n        span.span_data.output = final_report if final_report else None\n        return has_reasoned\n\n\n@log_function_time(print_only=True)\ndef run_deep_research_llm_loop(\n    emitter: Emitter,\n    state_container: ChatStateContainer,\n    simple_chat_history: list[ChatMessageSimple],\n    tools: list[Tool],\n    custom_agent_prompt: str | None,  # noqa: ARG001\n    llm: LLM,\n    token_counter: Callable[[str], int],\n    db_session: Session,\n    skip_clarification: bool = False,\n    user_identity: LLMUserIdentity | None = None,\n    chat_session_id: str | None = None,\n    all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,\n) -> None:\n    with trace(\n        \"run_deep_research_llm_loop\",\n        group_id=chat_session_id,\n        metadata={\n            \"tenant_id\": get_current_tenant_id(),\n            \"chat_session_id\": chat_session_id,\n        },\n    ):\n        # Here for lazy load LiteLLM\n        from onyx.llm.litellm_singleton.config import initialize_litellm\n\n        # An approximate limit. In extreme cases it may still fail but this should allow deep research\n        # to work in most cases.\n        if llm.config.max_input_tokens < 50000:\n            raise RuntimeError(\n                \"Cannot run Deep Research with an LLM that has less than 50,000 max input tokens\"\n            )\n\n        initialize_litellm()\n\n        # Track processing start time for tool duration calculation\n        processing_start_time = time.monotonic()\n\n        available_tokens = llm.config.max_input_tokens\n\n        llm_step_result: LlmStepResult | None = None\n\n        # Filter tools to only allow web search, internal search, and open URL\n        allowed_tool_names = {SearchTool.NAME, WebSearchTool.NAME, OpenURLTool.NAME}\n        allowed_tools = [tool for tool in tools if tool.name in allowed_tool_names]\n        include_internal_search_tunings = SearchTool.NAME in allowed_tool_names\n        orchestrator_start_turn_index = 1\n\n        #########################################################\n        # CLARIFICATION STEP (optional)\n        #########################################################\n        internal_search_clarification_guidance = (\n            INTERNAL_SEARCH_CLARIFICATION_GUIDANCE\n            if include_internal_search_tunings\n            else \"\"\n        )\n        if not SKIP_DEEP_RESEARCH_CLARIFICATION and not skip_clarification:\n            with function_span(\"clarification_step\") as span:\n                clarification_prompt = CLARIFICATION_PROMPT.format(\n                    current_datetime=get_current_llm_day_time(full_sentence=False),\n                    internal_search_clarification_guidance=internal_search_clarification_guidance,\n                )\n                system_prompt = ChatMessageSimple(\n                    message=clarification_prompt,\n                    token_count=300,  # Skips the exact token count but has enough leeway\n                    message_type=MessageType.SYSTEM,\n                )\n\n                truncated_message_history = construct_message_history(\n                    system_prompt=system_prompt,\n                    custom_agent_prompt=None,\n                    simple_chat_history=simple_chat_history,\n                    reminder_message=None,\n                    context_files=None,\n                    available_tokens=available_tokens,\n                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,\n                    all_injected_file_metadata=all_injected_file_metadata,\n                )\n\n                # Calculate tool processing duration for clarification step\n                # (used if the LLM emits a clarification question instead of calling tools)\n                clarification_tool_duration = time.monotonic() - processing_start_time\n                llm_step_result, _ = run_llm_step(\n                    emitter=emitter,\n                    history=truncated_message_history,\n                    tool_definitions=get_clarification_tool_definitions(),\n                    tool_choice=ToolChoiceOptions.AUTO,\n                    llm=llm,\n                    placement=Placement(turn_index=0),\n                    # No citations in this step, it should just pass through all\n                    # tokens directly so initialized as an empty citation processor\n                    citation_processor=None,\n                    state_container=state_container,\n                    final_documents=None,\n                    user_identity=user_identity,\n                    is_deep_research=True,\n                    pre_answer_processing_time=clarification_tool_duration,\n                )\n\n                if not llm_step_result.tool_calls:\n                    # Mark this turn as a clarification question\n                    state_container.set_is_clarification(True)\n                    span.span_data.output = \"clarification_required\"\n\n                    emitter.emit(\n                        Packet(\n                            placement=Placement(turn_index=0),\n                            obj=OverallStop(type=\"stop\"),\n                        )\n                    )\n\n                    # If a clarification is asked, we need to end this turn and wait on user input\n                    return\n\n        #########################################################\n        # RESEARCH PLAN STEP\n        #########################################################\n        with function_span(\"research_plan_step\") as span:\n            system_prompt = ChatMessageSimple(\n                message=RESEARCH_PLAN_PROMPT.format(\n                    current_datetime=get_current_llm_day_time(full_sentence=False)\n                ),\n                token_count=300,\n                message_type=MessageType.SYSTEM,\n            )\n            # Note this is fine to use a USER message type here as it can just be interpretered as a\n            # user's message directly to the LLM.\n            reminder_message = ChatMessageSimple(\n                message=RESEARCH_PLAN_REMINDER,\n                token_count=100,\n                message_type=MessageType.USER,\n            )\n            truncated_message_history = construct_message_history(\n                system_prompt=system_prompt,\n                custom_agent_prompt=None,\n                simple_chat_history=simple_chat_history + [reminder_message],\n                reminder_message=None,\n                context_files=None,\n                available_tokens=available_tokens,\n                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,\n                all_injected_file_metadata=all_injected_file_metadata,\n            )\n\n            research_plan_generator = run_llm_step_pkt_generator(\n                history=truncated_message_history,\n                tool_definitions=[],\n                tool_choice=ToolChoiceOptions.NONE,\n                llm=llm,\n                placement=Placement(turn_index=0),\n                citation_processor=None,\n                state_container=state_container,\n                final_documents=None,\n                user_identity=user_identity,\n                is_deep_research=True,\n            )\n\n            while True:\n                try:\n                    packet = next(research_plan_generator)\n                    # Translate AgentResponseStart/Delta packets to DeepResearchPlanStart/Delta\n                    # The LLM response from this prompt is the research plan\n                    if isinstance(packet.obj, AgentResponseStart):\n                        emitter.emit(\n                            Packet(\n                                placement=packet.placement,\n                                obj=DeepResearchPlanStart(),\n                            )\n                        )\n                    elif isinstance(packet.obj, AgentResponseDelta):\n                        emitter.emit(\n                            Packet(\n                                placement=packet.placement,\n                                obj=DeepResearchPlanDelta(content=packet.obj.content),\n                            )\n                        )\n                    else:\n                        # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)\n                        emitter.emit(packet)\n                except StopIteration as e:\n                    llm_step_result, reasoned = e.value\n                    emitter.emit(\n                        Packet(\n                            # Marks the last turn end which should be the plan generation\n                            placement=Placement(\n                                turn_index=1 if reasoned else 0,\n                            ),\n                            obj=SectionEnd(),\n                        )\n                    )\n                    if reasoned:\n                        orchestrator_start_turn_index += 1\n                    break\n            llm_step_result = cast(LlmStepResult, llm_step_result)\n\n            research_plan = llm_step_result.answer\n            if research_plan is None:\n                raise RuntimeError(\"Deep Research failed to generate a research plan\")\n            span.span_data.output = research_plan if research_plan else None\n\n        #########################################################\n        # RESEARCH EXECUTION STEP\n        #########################################################\n        with function_span(\"research_execution_step\") as span:\n            is_reasoning_model = model_is_reasoning_model(\n                llm.config.model_name, llm.config.model_provider\n            )\n\n            max_orchestrator_cycles = (\n                MAX_ORCHESTRATOR_CYCLES\n                if not is_reasoning_model\n                else MAX_ORCHESTRATOR_CYCLES_REASONING\n            )\n\n            orchestrator_prompt_template = (\n                ORCHESTRATOR_PROMPT\n                if not is_reasoning_model\n                else ORCHESTRATOR_PROMPT_REASONING\n            )\n\n            internal_search_research_task_guidance = (\n                INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE\n                if include_internal_search_tunings\n                else \"\"\n            )\n            token_count_prompt = orchestrator_prompt_template.format(\n                current_datetime=get_current_llm_day_time(full_sentence=False),\n                current_cycle_count=1,\n                max_cycles=max_orchestrator_cycles,\n                research_plan=research_plan,\n                internal_search_research_task_guidance=internal_search_research_task_guidance,\n            )\n            orchestration_tokens = token_counter(token_count_prompt)\n\n            reasoning_cycles = 0\n            most_recent_reasoning: str | None = None\n            citation_mapping: CitationMapping = {}\n            final_turn_index: int = (\n                orchestrator_start_turn_index  # Track the final turn_index for stop packet\n            )\n            for cycle in range(max_orchestrator_cycles):\n                # Check if we've exceeded the time limit or reached the last cycle\n                # - if so, skip LLM and generate final report\n                elapsed_seconds = time.monotonic() - processing_start_time\n                timed_out = elapsed_seconds > DEEP_RESEARCH_FORCE_REPORT_SECONDS\n                is_last_cycle = cycle == max_orchestrator_cycles - 1\n\n                if timed_out or is_last_cycle:\n                    if timed_out:\n                        logger.info(\n                            f\"Deep research exceeded {DEEP_RESEARCH_FORCE_REPORT_SECONDS}s \"\n                            f\"(elapsed: {elapsed_seconds:.1f}s), forcing final report generation\"\n                        )\n                    report_turn_index = (\n                        orchestrator_start_turn_index + cycle + reasoning_cycles\n                    )\n                    report_reasoned = generate_final_report(\n                        history=simple_chat_history,\n                        research_plan=research_plan,\n                        llm=llm,\n                        token_counter=token_counter,\n                        state_container=state_container,\n                        emitter=emitter,\n                        turn_index=report_turn_index,\n                        citation_mapping=citation_mapping,\n                        user_identity=user_identity,\n                        pre_answer_processing_time=elapsed_seconds,\n                        all_injected_file_metadata=all_injected_file_metadata,\n                    )\n                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)\n                    break\n\n                if cycle == 1:\n                    first_cycle_reminder_message = ChatMessageSimple(\n                        message=FIRST_CYCLE_REMINDER,\n                        token_count=FIRST_CYCLE_REMINDER_TOKENS,\n                        message_type=MessageType.USER_REMINDER,\n                    )\n                else:\n                    first_cycle_reminder_message = None\n\n                research_agent_calls: list[ToolCallKickoff] = []\n\n                orchestrator_prompt = orchestrator_prompt_template.format(\n                    current_datetime=get_current_llm_day_time(full_sentence=False),\n                    current_cycle_count=cycle,\n                    max_cycles=max_orchestrator_cycles,\n                    research_plan=research_plan,\n                    internal_search_research_task_guidance=internal_search_research_task_guidance,\n                )\n\n                system_prompt = ChatMessageSimple(\n                    message=orchestrator_prompt,\n                    token_count=orchestration_tokens,\n                    message_type=MessageType.SYSTEM,\n                )\n\n                truncated_message_history = construct_message_history(\n                    system_prompt=system_prompt,\n                    custom_agent_prompt=None,\n                    simple_chat_history=simple_chat_history,\n                    reminder_message=first_cycle_reminder_message,\n                    context_files=None,\n                    available_tokens=available_tokens,\n                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,\n                    all_injected_file_metadata=all_injected_file_metadata,\n                )\n\n                # Use think tool processor for non-reasoning models to convert\n                # think_tool calls to reasoning content\n                custom_processor = (\n                    create_think_tool_token_processor()\n                    if not is_reasoning_model\n                    else None\n                )\n\n                llm_step_result, has_reasoned = run_llm_step(\n                    emitter=emitter,\n                    history=truncated_message_history,\n                    tool_definitions=get_orchestrator_tools(\n                        include_think_tool=not is_reasoning_model\n                    ),\n                    tool_choice=ToolChoiceOptions.REQUIRED,\n                    llm=llm,\n                    placement=Placement(\n                        turn_index=orchestrator_start_turn_index\n                        + cycle\n                        + reasoning_cycles\n                    ),\n                    # No citations in this step, it should just pass through all\n                    # tokens directly so initialized as an empty citation processor\n                    citation_processor=DynamicCitationProcessor(),\n                    state_container=state_container,\n                    final_documents=None,\n                    user_identity=user_identity,\n                    custom_token_processor=custom_processor,\n                    is_deep_research=True,\n                    # Even for the reasoning tool, this should be plenty\n                    # The generation here should never be very long as it's just the tool calls.\n                    # This prevents timeouts where the model gets into an endless loop of null or bad tokens.\n                    max_tokens=1024,\n                )\n                if has_reasoned:\n                    reasoning_cycles += 1\n\n                tool_calls = llm_step_result.tool_calls or []\n\n                if not tool_calls and cycle == 0:\n                    raise RuntimeError(\n                        \"Deep Research failed to generate any research tasks for the agents.\"\n                    )\n\n                if not tool_calls:\n                    # Basically hope that this is an infrequent occurence and hopefully multiple research\n                    # cycles have already ran\n                    logger.warning(\"No tool calls found, this should not happen.\")\n                    report_turn_index = (\n                        orchestrator_start_turn_index + cycle + reasoning_cycles\n                    )\n                    report_reasoned = generate_final_report(\n                        history=simple_chat_history,\n                        research_plan=research_plan,\n                        llm=llm,\n                        token_counter=token_counter,\n                        state_container=state_container,\n                        emitter=emitter,\n                        turn_index=report_turn_index,\n                        citation_mapping=citation_mapping,\n                        user_identity=user_identity,\n                        pre_answer_processing_time=time.monotonic()\n                        - processing_start_time,\n                        all_injected_file_metadata=all_injected_file_metadata,\n                    )\n                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)\n                    break\n\n                special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)\n\n                if special_tool_calls.generate_report_tool_call:\n                    report_turn_index = (\n                        special_tool_calls.generate_report_tool_call.placement.turn_index\n                    )\n                    report_reasoned = generate_final_report(\n                        history=simple_chat_history,\n                        research_plan=research_plan,\n                        llm=llm,\n                        token_counter=token_counter,\n                        state_container=state_container,\n                        emitter=emitter,\n                        turn_index=report_turn_index,\n                        citation_mapping=citation_mapping,\n                        user_identity=user_identity,\n                        saved_reasoning=most_recent_reasoning,\n                        pre_answer_processing_time=time.monotonic()\n                        - processing_start_time,\n                        all_injected_file_metadata=all_injected_file_metadata,\n                    )\n                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)\n                    break\n                elif special_tool_calls.think_tool_call:\n                    think_tool_call = special_tool_calls.think_tool_call\n                    # Only process the THINK_TOOL and skip all other tool calls\n                    # This will not actually get saved to the db as a tool call but we'll attach it to the tool(s) called after\n                    # it as if it were just a reasoning model doing it. In the chat history, because it happens in 2 steps,\n                    # we will show it as a separate message.\n                    # NOTE: This does not need to increment the reasoning cycles because the custom token processor causes\n                    # the LLM step to handle this\n                    with function_span(\"think_tool\") as span:\n                        span.span_data.input = str(think_tool_call.tool_args)\n                        most_recent_reasoning = state_container.reasoning_tokens\n                        tool_call_message = think_tool_call.to_msg_str()\n                        tool_call_token_count = token_counter(tool_call_message)\n\n                        # Create ASSISTANT message with tool_calls (OpenAI parallel format)\n                        think_tool_simple = ToolCallSimple(\n                            tool_call_id=think_tool_call.tool_call_id,\n                            tool_name=think_tool_call.tool_name,\n                            tool_arguments=think_tool_call.tool_args,\n                            token_count=tool_call_token_count,\n                        )\n                        think_assistant_msg = ChatMessageSimple(\n                            message=\"\",\n                            token_count=tool_call_token_count,\n                            message_type=MessageType.ASSISTANT,\n                            tool_calls=[think_tool_simple],\n                            image_files=None,\n                        )\n                        simple_chat_history.append(think_assistant_msg)\n\n                        think_tool_response_msg = ChatMessageSimple(\n                            message=THINK_TOOL_RESPONSE_MESSAGE,\n                            token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,\n                            message_type=MessageType.TOOL_CALL_RESPONSE,\n                            tool_call_id=think_tool_call.tool_call_id,\n                            image_files=None,\n                        )\n                        simple_chat_history.append(think_tool_response_msg)\n                        span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE\n                    continue\n                else:\n                    for tool_call in tool_calls:\n                        if tool_call.tool_name != RESEARCH_AGENT_TOOL_NAME:\n                            logger.warning(\n                                f\"Unexpected tool call: {tool_call.tool_name}\"\n                            )\n                            continue\n\n                        research_agent_calls.append(tool_call)\n\n                    if not research_agent_calls:\n                        logger.warning(\n                            \"No research agent tool calls found, this should not happen.\"\n                        )\n                        report_turn_index = (\n                            orchestrator_start_turn_index + cycle + reasoning_cycles\n                        )\n                        report_reasoned = generate_final_report(\n                            history=simple_chat_history,\n                            research_plan=research_plan,\n                            llm=llm,\n                            token_counter=token_counter,\n                            state_container=state_container,\n                            emitter=emitter,\n                            turn_index=report_turn_index,\n                            citation_mapping=citation_mapping,\n                            user_identity=user_identity,\n                            pre_answer_processing_time=time.monotonic()\n                            - processing_start_time,\n                            all_injected_file_metadata=all_injected_file_metadata,\n                        )\n                        final_turn_index = report_turn_index + (\n                            1 if report_reasoned else 0\n                        )\n                        break\n\n                    if len(research_agent_calls) > 1:\n                        emitter.emit(\n                            Packet(\n                                placement=Placement(\n                                    turn_index=research_agent_calls[\n                                        0\n                                    ].placement.turn_index\n                                ),\n                                obj=TopLevelBranching(\n                                    num_parallel_branches=len(research_agent_calls)\n                                ),\n                            )\n                        )\n\n                    research_results = run_research_agent_calls(\n                        # The tool calls here contain the placement information\n                        research_agent_calls=research_agent_calls,\n                        parent_tool_call_ids=[\n                            tool_call.tool_call_id for tool_call in tool_calls\n                        ],\n                        tools=allowed_tools,\n                        emitter=emitter,\n                        state_container=state_container,\n                        llm=llm,\n                        is_reasoning_model=is_reasoning_model,\n                        token_counter=token_counter,\n                        citation_mapping=citation_mapping,\n                        user_identity=user_identity,\n                    )\n\n                    citation_mapping = research_results.citation_mapping\n\n                    # Build ONE ASSISTANT message with all tool calls (OpenAI parallel format)\n                    tool_calls_simple: list[ToolCallSimple] = []\n                    for current_tool_call in research_agent_calls:\n                        tool_call_message = current_tool_call.to_msg_str()\n                        tool_call_token_count = token_counter(tool_call_message)\n                        tool_calls_simple.append(\n                            ToolCallSimple(\n                                tool_call_id=current_tool_call.tool_call_id,\n                                tool_name=current_tool_call.tool_name,\n                                tool_arguments=current_tool_call.tool_args,\n                                token_count=tool_call_token_count,\n                            )\n                        )\n\n                    total_tool_call_tokens = sum(\n                        tc.token_count for tc in tool_calls_simple\n                    )\n                    assistant_with_tools = ChatMessageSimple(\n                        message=\"\",\n                        token_count=total_tool_call_tokens,\n                        message_type=MessageType.ASSISTANT,\n                        tool_calls=tool_calls_simple,\n                        image_files=None,\n                    )\n                    simple_chat_history.append(assistant_with_tools)\n\n                    # Now add TOOL_CALL_RESPONSE messages and tool call info for each result\n                    for tab_index, report in enumerate(\n                        research_results.intermediate_reports\n                    ):\n                        if report is None:\n                            # The LLM will not see that this research was even attempted, it may try\n                            # something similar again but this is not bad.\n                            logger.error(\n                                f\"Research agent call at tab_index {tab_index} failed, skipping\"\n                            )\n                            continue\n\n                        current_tool_call = research_agent_calls[tab_index]\n                        tool_call_info = ToolCallInfo(\n                            parent_tool_call_id=None,\n                            turn_index=orchestrator_start_turn_index\n                            + cycle\n                            + reasoning_cycles,\n                            tab_index=tab_index,\n                            tool_name=current_tool_call.tool_name,\n                            tool_call_id=current_tool_call.tool_call_id,\n                            tool_id=get_tool_by_name(\n                                tool_name=RESEARCH_AGENT_TOOL_NAME,\n                                db_session=db_session,\n                            ).id,\n                            reasoning_tokens=llm_step_result.reasoning\n                            or most_recent_reasoning,\n                            tool_call_arguments=current_tool_call.tool_args,\n                            tool_call_response=report,\n                            search_docs=None,  # Intermediate docs are not saved/shown\n                            generated_images=None,\n                        )\n                        state_container.add_tool_call(tool_call_info)\n\n                        tool_call_response_msg = ChatMessageSimple(\n                            message=report,\n                            token_count=token_counter(report),\n                            message_type=MessageType.TOOL_CALL_RESPONSE,\n                            tool_call_id=current_tool_call.tool_call_id,\n                            image_files=None,\n                        )\n                        simple_chat_history.append(tool_call_response_msg)\n\n                # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns\n                most_recent_reasoning = None\n\n        emitter.emit(\n            Packet(\n                placement=Placement(turn_index=final_turn_index),\n                obj=OverallStop(type=\"stop\"),\n            )\n        )\n"
  },
  {
    "path": "backend/onyx/deep_research/dr_mock_tools.py",
    "content": "GENERATE_PLAN_TOOL_NAME = \"generate_plan\"\n\nRESEARCH_AGENT_IN_CODE_ID = \"ResearchAgent\"\nRESEARCH_AGENT_TOOL_NAME = \"research_agent\"\nRESEARCH_AGENT_TASK_KEY = \"task\"\n\nGENERATE_REPORT_TOOL_NAME = \"generate_report\"\n\nTHINK_TOOL_NAME = \"think_tool\"\n\n\n# ruff: noqa: E501, W605 start\nGENERATE_PLAN_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": GENERATE_PLAN_TOOL_NAME,\n        \"description\": \"No clarification needed, generate a research plan for the user's query.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {},\n            \"required\": [],\n        },\n    },\n}\n\n\nRESEARCH_AGENT_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": RESEARCH_AGENT_TOOL_NAME,\n        \"description\": \"Conduct research on a specific topic.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                RESEARCH_AGENT_TASK_KEY: {\n                    \"type\": \"string\",\n                    \"description\": \"The research task to investigate, should be 1-2 descriptive sentences outlining the direction of investigation.\",\n                }\n            },\n            \"required\": [RESEARCH_AGENT_TASK_KEY],\n        },\n    },\n}\n\n\nGENERATE_REPORT_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": GENERATE_REPORT_TOOL_NAME,\n        \"description\": \"Generate the final research report from all of the findings. Should be called when all aspects of the user's query have been researched, or maximum cycles are reached.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {},\n            \"required\": [],\n        },\n    },\n}\n\n\nTHINK_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": THINK_TOOL_NAME,\n        \"description\": \"Use this for reasoning between research_agent calls and before calling generate_report. Think deeply about key results, identify knowledge gaps, and plan next steps.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"reasoning\": {\n                    \"type\": \"string\",\n                    \"description\": \"Your chain of thought reasoning, use paragraph format, no lists.\",\n                }\n            },\n            \"required\": [\"reasoning\"],\n        },\n    },\n}\n\n\nRESEARCH_AGENT_THINK_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"think_tool\",\n        \"description\": \"Use this for reasoning between research steps. Think deeply about key results, identify knowledge gaps, and plan next steps.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"reasoning\": {\n                    \"type\": \"string\",\n                    \"description\": \"Your chain of thought reasoning, can be as long as a lengthy paragraph.\",\n                }\n            },\n            \"required\": [\"reasoning\"],\n        },\n    },\n}\n\n\nRESEARCH_AGENT_GENERATE_REPORT_TOOL_DESCRIPTION = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"generate_report\",\n        \"description\": \"Generate the final research report from all findings. Should be called when research is complete.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {},\n            \"required\": [],\n        },\n    },\n}\n\n\nTHINK_TOOL_RESPONSE_MESSAGE = \"Acknowledged, please continue.\"\nTHINK_TOOL_RESPONSE_TOKEN_COUNT = 10\n\n\ndef get_clarification_tool_definitions() -> list[dict]:\n    return [GENERATE_PLAN_TOOL_DESCRIPTION]\n\n\ndef get_orchestrator_tools(include_think_tool: bool) -> list[dict]:\n    tools = [\n        RESEARCH_AGENT_TOOL_DESCRIPTION,\n        GENERATE_REPORT_TOOL_DESCRIPTION,\n    ]\n    if include_think_tool:\n        tools.append(THINK_TOOL_DESCRIPTION)\n    return tools\n\n\ndef get_research_agent_additional_tool_definitions(\n    include_think_tool: bool,\n) -> list[dict]:\n    tools = [GENERATE_REPORT_TOOL_DESCRIPTION]\n    if include_think_tool:\n        tools.append(RESEARCH_AGENT_THINK_TOOL_DESCRIPTION)\n    return tools\n\n\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/deep_research/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.tools.models import ToolCallKickoff\n\n\nclass SpecialToolCalls(BaseModel):\n    think_tool_call: ToolCallKickoff | None = None\n    generate_report_tool_call: ToolCallKickoff | None = None\n\n\nclass ResearchAgentCallResult(BaseModel):\n    intermediate_report: str\n    citation_mapping: CitationMapping\n\n\nclass CombinedResearchAgentCallResult(BaseModel):\n    # The None is needed here to keep the mappings consistent\n    # we later skip the failed research results but we need to know\n    # which ones failed\n    intermediate_reports: list[str | None]\n    citation_mapping: CitationMapping\n"
  },
  {
    "path": "backend/onyx/deep_research/utils.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\nfrom onyx.deep_research.dr_mock_tools import GENERATE_REPORT_TOOL_NAME\nfrom onyx.deep_research.dr_mock_tools import THINK_TOOL_NAME\nfrom onyx.deep_research.models import SpecialToolCalls\nfrom onyx.llm.model_response import ChatCompletionDeltaToolCall\nfrom onyx.llm.model_response import Delta\nfrom onyx.llm.model_response import FunctionCall\nfrom onyx.tools.models import ToolCallKickoff\n\n\n# JSON prefixes to detect in think_tool arguments\n# The schema is: {\"reasoning\": \"...content...\"}\nJSON_PREFIX_WITH_SPACE = '{\"reasoning\": \"'\nJSON_PREFIX_NO_SPACE = '{\"reasoning\":\"'\n\n\nclass ThinkToolProcessorState(BaseModel):\n    \"\"\"State for tracking think tool processing across streaming deltas.\"\"\"\n\n    think_tool_found: bool = False\n    think_tool_index: int | None = None\n    think_tool_id: str | None = None\n    full_arguments: str = \"\"  # Full accumulated arguments for final tool call\n    accumulated_args: str = \"\"  # Working buffer for JSON parsing\n    json_prefix_stripped: bool = False\n    # Buffer holds content that might be the JSON suffix \"}\n    # We hold back 2 chars to avoid emitting the closing \"}\n    buffer: str = \"\"\n\n\ndef _unescape_json_string(s: str) -> str:\n    \"\"\"\n    Unescape JSON string escape sequences.\n\n    JSON strings use backslash escapes like \\\\n for newlines, \\\\t for tabs, etc.\n    When we extract content from JSON by string manipulation (without json.loads),\n    we need to manually decode these escape sequences.\n\n    Note: We use a placeholder approach to handle escaped backslashes correctly.\n    For example, \"\\\\\\\\n\" (escaped backslash + n) should become \"\\\\n\" (literal backslash + n),\n    not a newline character.\n    \"\"\"\n    # First, protect escaped backslashes with a placeholder\n    placeholder = \"\\x00ESCAPED_BACKSLASH\\x00\"\n    result = s.replace(\"\\\\\\\\\", placeholder)\n\n    # Now unescape common JSON escape sequences\n    result = result.replace(\"\\\\n\", \"\\n\")\n    result = result.replace(\"\\\\r\", \"\\r\")\n    result = result.replace(\"\\\\t\", \"\\t\")\n    result = result.replace('\\\\\"', '\"')\n\n    # Finally, restore escaped backslashes as single backslashes\n    result = result.replace(placeholder, \"\\\\\")\n\n    return result\n\n\ndef _extract_reasoning_chunk(state: ThinkToolProcessorState) -> str | None:\n    \"\"\"\n    Extract reasoning content from accumulated arguments, stripping JSON wrapper.\n\n    Returns the next chunk of reasoning to emit, or None if nothing to emit yet.\n    \"\"\"\n    # If we haven't found the JSON prefix yet, look for it\n    if not state.json_prefix_stripped:\n        # Try both prefix variants\n        for prefix in [JSON_PREFIX_WITH_SPACE, JSON_PREFIX_NO_SPACE]:\n            prefix_pos = state.accumulated_args.find(prefix)\n            if prefix_pos != -1:\n                # Found prefix - extract content after it\n                content_start = prefix_pos + len(prefix)\n                state.buffer = state.accumulated_args[content_start:]\n                state.accumulated_args = \"\"\n                state.json_prefix_stripped = True\n                break\n\n        if not state.json_prefix_stripped:\n            # Haven't seen full prefix yet, keep accumulating\n            return None\n    else:\n        # Already stripped prefix, add new content to buffer\n        state.buffer += state.accumulated_args\n        state.accumulated_args = \"\"\n\n    # Hold back enough chars to avoid splitting escape sequences AND the JSON suffix \"}\n    # We need at least 2 for the suffix, but we also need to ensure escape sequences\n    # like \\n, \\t, \\\\, \\\" don't get split. The longest escape is \\\\ (2 chars).\n    # So we hold back 3 chars to be safe: if the last char is \\, we don't want to\n    # emit it without knowing what follows.\n    holdback = 3\n    if len(state.buffer) <= holdback:\n        return None\n\n    # Check if there's a trailing backslash that could be part of an escape sequence\n    # If so, hold back one more character to avoid splitting the escape\n    to_emit = state.buffer[:-holdback]\n    remaining = state.buffer[-holdback:]\n\n    # If to_emit ends with a backslash, it might be the start of an escape sequence\n    # Move it to the remaining buffer to process with the next chunk\n    # If to_emit ends with a backslash, it might be the start of an escape sequence\n    # Move it to the remaining buffer to process with the next chunk\n    if to_emit and to_emit[-1] == \"\\\\\":\n        remaining = to_emit[-1] + remaining\n        to_emit = to_emit[:-1]\n\n    state.buffer = remaining\n\n    # Unescape JSON escape sequences (e.g., \\\\n -> \\n)\n    if to_emit:\n        to_emit = _unescape_json_string(to_emit)\n\n    return to_emit if to_emit else None\n\n\ndef create_think_tool_token_processor() -> (\n    Callable[[Delta | None, Any], tuple[Delta | None, Any]]\n):\n    \"\"\"\n    Create a custom token processor that converts think_tool calls to reasoning content.\n\n    When the think_tool is detected:\n    - Tool call arguments are converted to reasoning_content (JSON wrapper stripped)\n    - All other deltas (content, other tool calls) are dropped\n\n    This allows non-reasoning models to emit chain-of-thought via the think_tool,\n    which gets displayed as reasoning tokens in the UI.\n\n    Returns:\n        A function compatible with run_llm_step_pkt_generator's custom_token_processor parameter.\n        The function takes (Delta, state) and returns (modified Delta | None, new state).\n    \"\"\"\n\n    def process_token(delta: Delta | None, state: Any) -> tuple[Delta | None, Any]:\n        if state is None:\n            state = ThinkToolProcessorState()\n\n        # Handle flush signal (delta=None) - emit the complete tool call\n        if delta is None:\n            if state.think_tool_found and state.think_tool_id:\n                # Return the complete think tool call\n                complete_tool_call = ChatCompletionDeltaToolCall(\n                    id=state.think_tool_id,\n                    index=state.think_tool_index or 0,\n                    type=\"function\",\n                    function=FunctionCall(\n                        name=THINK_TOOL_NAME,\n                        arguments=state.full_arguments,\n                    ),\n                )\n                return Delta(tool_calls=[complete_tool_call]), state\n            return None, state\n\n        # Check for think tool in tool_calls\n        if delta.tool_calls:\n            for tool_call in delta.tool_calls:\n                # Detect think tool by name\n                if tool_call.function and tool_call.function.name == THINK_TOOL_NAME:\n                    state.think_tool_found = True\n                    state.think_tool_index = tool_call.index\n\n                # Capture tool call id when available\n                if (\n                    state.think_tool_found\n                    and tool_call.index == state.think_tool_index\n                    and tool_call.id\n                ):\n                    state.think_tool_id = tool_call.id\n\n                # Accumulate arguments for the think tool\n                if (\n                    state.think_tool_found\n                    and tool_call.index == state.think_tool_index\n                    and tool_call.function\n                    and tool_call.function.arguments\n                ):\n                    # Track full arguments for final tool call\n                    state.full_arguments += tool_call.function.arguments\n                    # Also accumulate for JSON parsing\n                    state.accumulated_args += tool_call.function.arguments\n\n                    # Try to extract reasoning content\n                    reasoning_chunk = _extract_reasoning_chunk(state)\n                    if reasoning_chunk:\n                        # Return delta with reasoning_content to trigger reasoning streaming\n                        return Delta(reasoning_content=reasoning_chunk), state\n\n        # If think tool found, drop all other content\n        if state.think_tool_found:\n            return None, state\n\n        # No think tool detected, pass through original delta\n        return delta, state\n\n    return process_token\n\n\ndef check_special_tool_calls(tool_calls: list[ToolCallKickoff]) -> SpecialToolCalls:\n    think_tool_call: ToolCallKickoff | None = None\n    generate_report_tool_call: ToolCallKickoff | None = None\n\n    for tool_call in tool_calls:\n        if tool_call.tool_name == THINK_TOOL_NAME:\n            think_tool_call = tool_call\n        elif tool_call.tool_name == GENERATE_REPORT_TOOL_NAME:\n            generate_report_tool_call = tool_call\n\n    return SpecialToolCalls(\n        think_tool_call=think_tool_call,\n        generate_report_tool_call=generate_report_tool_call,\n    )\n"
  },
  {
    "path": "backend/onyx/document_index/FILTER_SEMANTICS.md",
    "content": "# Vector DB Filter Semantics\n\nHow `IndexFilters` fields combine into the final query filter. Applies to both Vespa and OpenSearch.\n\n## Filter categories\n\n| Category | Fields | Join logic |\n|---|---|---|\n| **Visibility** | `hidden` | Always applied (unless `include_hidden`) |\n| **Tenant** | `tenant_id` | AND (multi-tenant only) |\n| **ACL** | `access_control_list` | OR within, AND with rest |\n| **Narrowing** | `source_type`, `tags`, `time_cutoff` | Each OR within, AND with rest |\n| **Knowledge scope** | `document_set`, `attached_document_ids`, `hierarchy_node_ids`, `persona_id_filter` | OR within group, AND with rest |\n| **Additive scope** | `project_id_filter` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |\n\n## How filters combine\n\nAll categories are AND'd together. Within the knowledge scope category, individual filters are OR'd.\n\n```\nNOT hidden\nAND tenant = T                          -- if multi-tenant\nAND (acl contains A1 OR acl contains A2)\nAND (source_type = S1 OR ...)           -- if set\nAND (tag = T1 OR ...)                   -- if set\nAND <knowledge scope>                   -- see below\nAND time >= cutoff                      -- if set\n```\n\n## Knowledge scope rules\n\nThe knowledge scope filter controls **what knowledge an assistant can access**.\n\n### Primary vs additive triggers\n\n- **`persona_id_filter`** is a **primary** trigger. A persona with user files IS explicit\n  knowledge, so `persona_id_filter` alone can start a knowledge scope. Note: this is\n  NOT the raw ID of the persona being used — it is only set when the persona's\n  user files overflowed the LLM context window.\n- **`project_id_filter`** is **additive**. It widens an existing scope to include project\n  files but never restricts on its own — a chat inside a project should still search\n  team knowledge when no other knowledge is attached.\n\n### No explicit knowledge attached\n\nWhen `document_set`, `attached_document_ids`, `hierarchy_node_ids`, and `persona_id_filter` are all empty/None:\n\n- **No knowledge scope filter is applied.** The assistant can see everything (subject to ACL).\n- `project_id_filter` is ignored — it never restricts on its own.\n\n### One explicit knowledge type\n\n```\n-- Only document sets\nAND (document_sets contains \"Engineering\" OR document_sets contains \"Legal\")\n\n-- Only persona user files (overflowed context)\nAND (personas contains 42)\n```\n\n### Multiple explicit knowledge types (OR'd)\n\n```\n-- Document sets + persona user files\nAND (\n    document_sets contains \"Engineering\"\n    OR personas contains 42\n)\n```\n\n### Explicit knowledge + overflowing project files\n\nWhen an explicit knowledge restriction is in effect **and** `project_id_filter` is set (project files overflowed the LLM context window), `project_id_filter` widens the filter:\n\n```\n-- Document sets + project files overflowed\nAND (\n    document_sets contains \"Engineering\"\n    OR user_project contains 7\n)\n\n-- Persona user files + project files (won't happen in practice;\n-- custom personas ignore project files per the precedence rule)\nAND (\n    personas contains 42\n    OR user_project contains 7\n)\n```\n\n### Only project_id_filter (no explicit knowledge)\n\nNo knowledge scope filter. The assistant searches everything.\n\n```\n-- Just ACL, no restriction\nNOT hidden\nAND (acl contains ...)\n```\n\n## Field reference\n\n| Filter field | Vespa field | Vespa type | Purpose |\n|---|---|---|---|\n| `document_set` | `document_sets` | `weightedset<string>` | Connector doc sets attached to assistant |\n| `attached_document_ids` | `document_id` | `string` | Documents explicitly attached (OpenSearch only) |\n| `hierarchy_node_ids` | `ancestor_hierarchy_node_ids` | `array<int>` | Folder/space nodes (OpenSearch only) |\n| `persona_id_filter` | `personas` | `array<int>` | Persona tag for overflowing user files (**primary** trigger) |\n| `project_id_filter` | `user_project` | `array<int>` | Project tag for overflowing project files (**additive** only) |\n| `access_control_list` | `access_control_list` | `weightedset<string>` | ACL entries for the requesting user |\n| `source_type` | `source_type` | `string` | Connector source type (e.g. `web`, `jira`) |\n| `tags` | `metadata_list` | `array<string>` | Document metadata tags |\n| `time_cutoff` | `doc_updated_at` | `long` | Minimum document update timestamp |\n| `tenant_id` | `tenant_id` | `string` | Tenant isolation (multi-tenant) |\n"
  },
  {
    "path": "backend/onyx/document_index/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/document_index/chunk_content_enrichment.py",
    "content": "from onyx.configs.app_configs import BLURB_SIZE\nfrom onyx.configs.constants import RETURN_SEPARATOR\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceChunkUncleaned\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\n\n\ndef generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:\n    return f\"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}\"\n\n\ndef generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:\n    return f\"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}\"\n\n\ndef cleanup_content_for_chunks(\n    chunks: list[InferenceChunkUncleaned],\n) -> list[InferenceChunk]:\n    \"\"\"\n    Removes indexing-time content additions from chunks. Inverse of\n    generate_enriched_content_for_chunk.\n\n    During indexing, chunks are augmented with additional text to improve search\n    quality:\n    - Title prepended to content (for better keyword/semantic matching)\n    - Metadata suffix appended to content\n    - Contextual RAG: doc_summary (beginning) and chunk_context (end)\n\n    This function strips these additions before returning chunks to users,\n    restoring the original document content. Cleaning is applied in sequence:\n    1. Title removal:\n        - Full match: Strips exact title from beginning\n        - Partial match: If content starts with title[:BLURB_SIZE], splits on\n          RETURN_SEPARATOR to remove title section\n    2. Metadata suffix removal:\n        - Strips metadata_suffix from end, plus trailing RETURN_SEPARATOR\n    3. Contextual RAG removal:\n        - Strips doc_summary from beginning (if present)\n        - Strips chunk_context from end (if present)\n\n    TODO(andrei): This entire function is not that fantastic, clean it up during\n    QA before rolling out OpenSearch.\n\n    Args:\n        chunks: Chunks as retrieved from the document index with indexing\n            augmentations intact.\n\n    Returns:\n        Clean InferenceChunk objects with augmentations removed, containing only\n            the original document content that should be shown to users.\n    \"\"\"\n\n    def _remove_title(chunk: InferenceChunkUncleaned) -> str:\n        # TODO(andrei): This was ported over from\n        # backend/onyx/document_index/vespa/vespa_document_index.py but I don't\n        # think this logic is correct. In Vespa at least we set the title field\n        # from the output of get_title_for_document_index, which is not\n        # necessarily the same data that is prepended to the content; that comes\n        # from title_prefix.\n        # This was added in\n        # https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-2a2a29d5929de75cdaea77867a397934d9f8b785ce40a861c0d704033e3663ab,\n        # see postprocessing.py. At that time the content enrichment logic was\n        # also added in that commit, see\n        # https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-d807718aa263a15c1d991a4ab063c360c8419eaad210b4ba70e1e9f47d2aa6d2R77\n        # chunker.py.\n        if not chunk.title or not chunk.content:\n            return chunk.content\n\n        if chunk.content.startswith(chunk.title):\n            return chunk.content[len(chunk.title) :].lstrip()\n\n        # BLURB SIZE is by token instead of char but each token is at least 1 char\n        # If this prefix matches the content, it's assumed the title was prepended\n        if chunk.content.startswith(chunk.title[:BLURB_SIZE]):\n            return (\n                chunk.content.split(RETURN_SEPARATOR, 1)[-1]\n                if RETURN_SEPARATOR in chunk.content\n                else chunk.content\n            )\n        return chunk.content\n\n    def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:\n        if not chunk.metadata_suffix:\n            return chunk.content\n        return chunk.content.removesuffix(chunk.metadata_suffix).rstrip(\n            RETURN_SEPARATOR\n        )\n\n    def _remove_contextual_rag(chunk: InferenceChunkUncleaned) -> str:\n        # remove document summary\n        if chunk.doc_summary and chunk.content.startswith(chunk.doc_summary):\n            chunk.content = chunk.content[len(chunk.doc_summary) :].lstrip()\n        # remove chunk context\n        if chunk.chunk_context and chunk.content.endswith(chunk.chunk_context):\n            chunk.content = chunk.content[\n                : len(chunk.content) - len(chunk.chunk_context)\n            ].rstrip()\n        return chunk.content\n\n    for chunk in chunks:\n        chunk.content = _remove_title(chunk)\n        chunk.content = _remove_metadata_suffix(chunk)\n        chunk.content = _remove_contextual_rag(chunk)\n\n    return [chunk.to_inference_chunk() for chunk in chunks]\n"
  },
  {
    "path": "backend/onyx/document_index/disabled.py",
    "content": "\"\"\"A DocumentIndex implementation that raises on every operation.\n\nUsed as a safety net when DISABLE_VECTOR_DB is True. Any code path that\naccidentally reaches the vector DB layer will fail loudly instead of timing\nout against a nonexistent Vespa/OpenSearch instance.\n\"\"\"\n\nfrom collections.abc import Iterable\nfrom typing import Any\n\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import DocumentInsertionRecord\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom shared_configs.model_server_models import Embedding\n\nVECTOR_DB_DISABLED_ERROR = \"Vector DB is disabled (DISABLE_VECTOR_DB=true). This operation requires a vector database.\"\n\n\nclass DisabledDocumentIndex(DocumentIndex):\n    \"\"\"A DocumentIndex where every method raises RuntimeError.\n\n    Returned by the factory when DISABLE_VECTOR_DB is True so that any\n    accidental vector-DB call surfaces immediately.\n    \"\"\"\n\n    def __init__(\n        self,\n        index_name: str = \"disabled\",\n        secondary_index_name: str | None = None,\n        *args: Any,  # noqa: ARG002\n        **kwargs: Any,  # noqa: ARG002\n    ) -> None:\n        self.index_name = index_name\n        self.secondary_index_name = secondary_index_name\n\n    # ------------------------------------------------------------------\n    # Verifiable\n    # ------------------------------------------------------------------\n    def ensure_indices_exist(\n        self,\n        primary_embedding_dim: int,  # noqa: ARG002\n        primary_embedding_precision: EmbeddingPrecision,  # noqa: ARG002\n        secondary_index_embedding_dim: int | None,  # noqa: ARG002\n        secondary_index_embedding_precision: EmbeddingPrecision | None,  # noqa: ARG002\n    ) -> None:\n        # No-op: there are no indices to create when the vector DB is disabled.\n        pass\n\n    @staticmethod\n    def register_multitenant_indices(\n        indices: list[str],  # noqa: ARG002, ARG004\n        embedding_dims: list[int],  # noqa: ARG002, ARG004\n        embedding_precisions: list[EmbeddingPrecision],  # noqa: ARG002, ARG004\n    ) -> None:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # Indexable\n    # ------------------------------------------------------------------\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],  # noqa: ARG002\n        index_batch_params: IndexBatchParams,  # noqa: ARG002\n    ) -> set[DocumentInsertionRecord]:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # Deletable\n    # ------------------------------------------------------------------\n    def delete_single(\n        self,\n        doc_id: str,  # noqa: ARG002\n        *,\n        tenant_id: str,  # noqa: ARG002\n        chunk_count: int | None,  # noqa: ARG002\n    ) -> int:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # Updatable\n    # ------------------------------------------------------------------\n    def update_single(\n        self,\n        doc_id: str,  # noqa: ARG002\n        *,\n        tenant_id: str,  # noqa: ARG002\n        chunk_count: int | None,  # noqa: ARG002\n        fields: VespaDocumentFields | None,  # noqa: ARG002\n        user_fields: VespaDocumentUserFields | None,  # noqa: ARG002\n    ) -> None:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # IdRetrievalCapable\n    # ------------------------------------------------------------------\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[VespaChunkRequest],  # noqa: ARG002\n        filters: IndexFilters,  # noqa: ARG002\n        batch_retrieval: bool = False,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # HybridCapable\n    # ------------------------------------------------------------------\n    def hybrid_retrieval(\n        self,\n        query: str,  # noqa: ARG002\n        query_embedding: Embedding,  # noqa: ARG002\n        final_keywords: list[str] | None,  # noqa: ARG002\n        filters: IndexFilters,  # noqa: ARG002\n        hybrid_alpha: float,  # noqa: ARG002\n        time_decay_multiplier: float,  # noqa: ARG002\n        num_to_retrieve: int,  # noqa: ARG002\n        ranking_profile_type: QueryExpansionType,  # noqa: ARG002\n        title_content_ratio: float | None = None,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # AdminCapable\n    # ------------------------------------------------------------------\n    def admin_retrieval(\n        self,\n        query: str,  # noqa: ARG002\n        query_embedding: Embedding,  # noqa: ARG002\n        filters: IndexFilters,  # noqa: ARG002\n        num_to_retrieve: int = 10,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n\n    # ------------------------------------------------------------------\n    # RandomCapable\n    # ------------------------------------------------------------------\n    def random_retrieval(\n        self,\n        filters: IndexFilters,  # noqa: ARG002\n        num_to_retrieve: int = 10,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)\n"
  },
  {
    "path": "backend/onyx/document_index/document_index_utils.py",
    "content": "import math\nimport uuid\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import get_secondary_search_settings\nfrom onyx.document_index.interfaces import EnrichedDocumentIndexingInfo\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import MultipassConfig\nfrom shared_configs.configs import MULTI_TENANT\n\nDEFAULT_BATCH_SIZE = 30\nDEFAULT_INDEX_NAME = \"danswer_chunk\"\n\n\ndef should_use_multipass(search_settings: SearchSettings | None) -> bool:\n    \"\"\"\n    Determines whether multipass should be used based on the search settings\n    or the default config if settings are unavailable.\n    \"\"\"\n    if search_settings is not None:\n        return search_settings.multipass_indexing\n    return ENABLE_MULTIPASS_INDEXING\n\n\ndef get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:\n    \"\"\"\n    Determines whether to enable multipass and large chunks by examining\n    the current search settings and the embedder configuration.\n    \"\"\"\n    multipass = should_use_multipass(search_settings)\n    enable_large_chunks = SearchSettings.can_use_large_chunks(\n        multipass, search_settings.model_name, search_settings.provider_type\n    )\n    return MultipassConfig(\n        multipass_indexing=multipass, enable_large_chunks=enable_large_chunks\n    )\n\n\ndef get_both_index_properties(\n    db_session: Session,\n) -> tuple[str, str | None, bool, bool | None]:\n    search_settings = get_current_search_settings(db_session)\n    config_1 = get_multipass_config(search_settings)\n\n    search_settings_new = get_secondary_search_settings(db_session)\n    if not search_settings_new:\n        return search_settings.index_name, None, config_1.enable_large_chunks, None\n\n    config_2 = get_multipass_config(search_settings)\n    return (\n        search_settings.index_name,\n        search_settings_new.index_name,\n        config_1.enable_large_chunks,\n        config_2.enable_large_chunks,\n    )\n\n\ndef translate_boost_count_to_multiplier(boost: int) -> float:\n    \"\"\"Mapping boost integer values to a multiplier according to a sigmoid curve\n    Piecewise such that at many downvotes, its 0.5x the score and with many upvotes\n    it is 2x the score. This should be in line with the Vespa calculation.\"\"\"\n    # 3 in the equation below stretches it out to hit asymptotes slower\n    if boost < 0:\n        # 0.5 + sigmoid -> range of 0.5 to 1\n        return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))\n\n    # 2 x sigmoid -> range of 1 to 2\n    return 2 / (1 + math.exp(-1 * boost / 3))\n\n\n# Assembles a list of Vespa chunk IDs for a document\n# given the required context. This can be used to directly query\n# Vespa's Document API.\ndef get_document_chunk_ids(\n    enriched_document_info_list: list[EnrichedDocumentIndexingInfo],\n    tenant_id: str,\n    large_chunks_enabled: bool,\n) -> list[UUID]:\n    doc_chunk_ids = []\n\n    for enriched_document_info in enriched_document_info_list:\n        for chunk_index in range(\n            enriched_document_info.chunk_start_index,\n            enriched_document_info.chunk_end_index,\n        ):\n            if not enriched_document_info.old_version:\n                doc_chunk_ids.append(\n                    get_uuid_from_chunk_info(\n                        document_id=enriched_document_info.doc_id,\n                        chunk_id=chunk_index,\n                        tenant_id=tenant_id,\n                    )\n                )\n            else:\n                doc_chunk_ids.append(\n                    get_uuid_from_chunk_info_old(\n                        document_id=enriched_document_info.doc_id,\n                        chunk_id=chunk_index,\n                    )\n                )\n\n            if large_chunks_enabled and chunk_index % 4 == 0:\n                large_chunk_id = int(chunk_index / 4)\n                large_chunk_reference_ids = [\n                    large_chunk_id + i\n                    for i in range(4)\n                    if large_chunk_id + i < enriched_document_info.chunk_end_index\n                ]\n                if enriched_document_info.old_version:\n                    doc_chunk_ids.append(\n                        get_uuid_from_chunk_info_old(\n                            document_id=enriched_document_info.doc_id,\n                            chunk_id=large_chunk_id,\n                            large_chunk_reference_ids=large_chunk_reference_ids,\n                        )\n                    )\n                else:\n                    doc_chunk_ids.append(\n                        get_uuid_from_chunk_info(\n                            document_id=enriched_document_info.doc_id,\n                            chunk_id=large_chunk_id,\n                            tenant_id=tenant_id,\n                            large_chunk_id=large_chunk_id,\n                        )\n                    )\n\n    return doc_chunk_ids\n\n\ndef get_uuid_from_chunk_info(\n    *,\n    document_id: str,\n    chunk_id: int,\n    tenant_id: str,\n    large_chunk_id: int | None = None,\n) -> UUID:\n    \"\"\"NOTE: be VERY carefuly about changing this function. If changed without a migration,\n    this can cause deletion/update/insertion to function incorrectly.\"\"\"\n    doc_str = document_id\n\n    # Web parsing URL duplicate catching\n    if doc_str and doc_str[-1] == \"/\":\n        doc_str = doc_str[:-1]\n\n    chunk_index = (\n        \"large_\" + str(large_chunk_id) if large_chunk_id is not None else str(chunk_id)\n    )\n    unique_identifier_string = \"_\".join([doc_str, chunk_index])\n    if MULTI_TENANT:\n        unique_identifier_string += \"_\" + tenant_id\n\n    uuid_value = uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)\n    return uuid_value\n\n\ndef get_uuid_from_chunk_info_old(\n    *, document_id: str, chunk_id: int, large_chunk_reference_ids: list[int] = []\n) -> UUID:\n    doc_str = document_id\n\n    # Web parsing URL duplicate catching\n    if doc_str and doc_str[-1] == \"/\":\n        doc_str = doc_str[:-1]\n    unique_identifier_string = \"_\".join([doc_str, str(chunk_id), \"0\"])\n    if large_chunk_reference_ids:\n        unique_identifier_string += \"_large\" + \"_\".join(\n            [\n                str(referenced_chunk_id)\n                for referenced_chunk_id in large_chunk_reference_ids\n            ]\n        )\n    return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)\n\n\ndef get_uuid_from_chunk(chunk: DocMetadataAwareIndexChunk) -> uuid.UUID:\n    return get_uuid_from_chunk_info(\n        document_id=chunk.source_document.id,\n        chunk_id=chunk.chunk_id,\n        tenant_id=chunk.tenant_id,\n        large_chunk_id=chunk.large_chunk_id,\n    )\n\n\ndef get_uuid_from_chunk_old(\n    chunk: DocMetadataAwareIndexChunk, large_chunk_reference_ids: list[int] = []\n) -> UUID:\n    return get_uuid_from_chunk_info_old(\n        document_id=chunk.source_document.id,\n        chunk_id=chunk.chunk_id,\n        large_chunk_reference_ids=large_chunk_reference_ids,\n    )\n"
  },
  {
    "path": "backend/onyx/document_index/factory.py",
    "content": "import httpx\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.opensearch_migration import get_opensearch_retrieval_state\nfrom onyx.document_index.disabled import DisabledDocumentIndex\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchOldDocumentIndex,\n)\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.indexing.models import IndexingSetting\nfrom shared_configs.configs import MULTI_TENANT\n\n\ndef get_default_document_index(\n    search_settings: SearchSettings,\n    secondary_search_settings: SearchSettings | None,\n    db_session: Session,\n    httpx_client: httpx.Client | None = None,\n) -> DocumentIndex:\n    \"\"\"Gets the default document index from env vars.\n\n    To be used for retrieval only. Indexing should be done through both indices\n    until Vespa is deprecated.\n\n    Primary index is the index that is used for querying/updating etc. Secondary\n    index is for when both the currently used index and the upcoming index both\n    need to be updated. Updates are applied to both indices.\n    WARNING: In that case, get_all_document_indices should be used.\n    \"\"\"\n    if DISABLE_VECTOR_DB:\n        return DisabledDocumentIndex(\n            index_name=search_settings.index_name,\n            secondary_index_name=(\n                secondary_search_settings.index_name\n                if secondary_search_settings\n                else None\n            ),\n        )\n\n    secondary_index_name: str | None = None\n    secondary_large_chunks_enabled: bool | None = None\n    if secondary_search_settings:\n        secondary_index_name = secondary_search_settings.index_name\n        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled\n\n    opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)\n    if opensearch_retrieval_enabled:\n        indexing_setting = IndexingSetting.from_db_model(search_settings)\n        secondary_indexing_setting = (\n            IndexingSetting.from_db_model(secondary_search_settings)\n            if secondary_search_settings\n            else None\n        )\n        return OpenSearchOldDocumentIndex(\n            index_name=search_settings.index_name,\n            embedding_dim=indexing_setting.final_embedding_dim,\n            embedding_precision=indexing_setting.embedding_precision,\n            secondary_index_name=secondary_index_name,\n            secondary_embedding_dim=(\n                secondary_indexing_setting.final_embedding_dim\n                if secondary_indexing_setting\n                else None\n            ),\n            secondary_embedding_precision=(\n                secondary_indexing_setting.embedding_precision\n                if secondary_indexing_setting\n                else None\n            ),\n            large_chunks_enabled=search_settings.large_chunks_enabled,\n            secondary_large_chunks_enabled=secondary_large_chunks_enabled,\n            multitenant=MULTI_TENANT,\n            httpx_client=httpx_client,\n        )\n    else:\n        return VespaIndex(\n            index_name=search_settings.index_name,\n            secondary_index_name=secondary_index_name,\n            large_chunks_enabled=search_settings.large_chunks_enabled,\n            secondary_large_chunks_enabled=secondary_large_chunks_enabled,\n            multitenant=MULTI_TENANT,\n            httpx_client=httpx_client,\n        )\n\n\ndef get_all_document_indices(\n    search_settings: SearchSettings,\n    secondary_search_settings: SearchSettings | None,\n    httpx_client: httpx.Client | None = None,\n) -> list[DocumentIndex]:\n    \"\"\"Gets all document indices.\n\n    NOTE: Will only return an OpenSearch index interface if\n    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows\n    where we know it won't be enabled.\n\n    Used for indexing only. Until Vespa is deprecated we will index into both\n    document indices. Retrieval is done through only one index however.\n\n    Large chunks are not currently supported so we hardcode appropriate values.\n\n    NOTE: Make sure the Vespa index object is returned first. In the rare event\n    that there is some conflict between indexing and the migration task, it is\n    assumed that the state of Vespa is more up-to-date than the state of\n    OpenSearch.\n    \"\"\"\n    if DISABLE_VECTOR_DB:\n        return [\n            DisabledDocumentIndex(\n                index_name=search_settings.index_name,\n                secondary_index_name=(\n                    secondary_search_settings.index_name\n                    if secondary_search_settings\n                    else None\n                ),\n            )\n        ]\n\n    vespa_document_index = VespaIndex(\n        index_name=search_settings.index_name,\n        secondary_index_name=(\n            secondary_search_settings.index_name if secondary_search_settings else None\n        ),\n        large_chunks_enabled=search_settings.large_chunks_enabled,\n        secondary_large_chunks_enabled=(\n            secondary_search_settings.large_chunks_enabled\n            if secondary_search_settings\n            else None\n        ),\n        multitenant=MULTI_TENANT,\n        httpx_client=httpx_client,\n    )\n    opensearch_document_index: OpenSearchOldDocumentIndex | None = None\n    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n        indexing_setting = IndexingSetting.from_db_model(search_settings)\n        secondary_indexing_setting = (\n            IndexingSetting.from_db_model(secondary_search_settings)\n            if secondary_search_settings\n            else None\n        )\n        opensearch_document_index = OpenSearchOldDocumentIndex(\n            index_name=search_settings.index_name,\n            embedding_dim=indexing_setting.final_embedding_dim,\n            embedding_precision=indexing_setting.embedding_precision,\n            secondary_index_name=(\n                secondary_search_settings.index_name\n                if secondary_search_settings\n                else None\n            ),\n            secondary_embedding_dim=(\n                secondary_indexing_setting.final_embedding_dim\n                if secondary_indexing_setting\n                else None\n            ),\n            secondary_embedding_precision=(\n                secondary_indexing_setting.embedding_precision\n                if secondary_indexing_setting\n                else None\n            ),\n            large_chunks_enabled=search_settings.large_chunks_enabled,\n            secondary_large_chunks_enabled=(\n                secondary_search_settings.large_chunks_enabled\n                if secondary_search_settings\n                else None\n            ),\n            multitenant=MULTI_TENANT,\n            httpx_client=httpx_client,\n        )\n    result: list[DocumentIndex] = [vespa_document_index]\n    if opensearch_document_index:\n        result.append(opensearch_document_index)\n    return result\n"
  },
  {
    "path": "backend/onyx/document_index/interfaces.py",
    "content": "import abc\nfrom collections.abc import Iterable\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom typing import Any\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.configs.chat_configs import TITLE_CONTENT_RATIO\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom shared_configs.model_server_models import Embedding\n\n\n@dataclass(frozen=True)\nclass DocumentInsertionRecord:\n    document_id: str\n    already_existed: bool\n\n\n@dataclass(frozen=True)\nclass VespaChunkRequest:\n    document_id: str\n    min_chunk_ind: int | None = None\n    max_chunk_ind: int | None = None\n\n    @property\n    def is_capped(self) -> bool:\n        # If the max chunk index is not None, then the chunk request is capped\n        # If the min chunk index is None, we can assume the min is 0\n        return self.max_chunk_ind is not None\n\n    @property\n    def range(self) -> int | None:\n        if self.max_chunk_ind is not None:\n            return (self.max_chunk_ind - (self.min_chunk_ind or 0)) + 1\n        return None\n\n\n@dataclass\nclass IndexBatchParams:\n    \"\"\"\n    Information necessary for efficiently indexing a batch of documents\n    \"\"\"\n\n    doc_id_to_previous_chunk_cnt: dict[str, int]\n    doc_id_to_new_chunk_cnt: dict[str, int]\n    tenant_id: str\n    large_chunks_enabled: bool\n\n\n@dataclass\nclass MinimalDocumentIndexingInfo:\n    \"\"\"\n    Minimal information necessary for indexing a document\n    \"\"\"\n\n    doc_id: str\n    chunk_start_index: int\n\n\n@dataclass\nclass EnrichedDocumentIndexingInfo(MinimalDocumentIndexingInfo):\n    \"\"\"\n    Enriched information necessary for indexing a document, including version and chunk range.\n    \"\"\"\n\n    old_version: bool\n    chunk_end_index: int\n\n\n@dataclass\nclass DocumentMetadata:\n    \"\"\"\n    Document information that needs to be inserted into Postgres on first time encountering this\n    document during indexing across any of the connectors.\n    \"\"\"\n\n    connector_id: int\n    credential_id: int\n    document_id: str\n    semantic_identifier: str\n    first_link: str\n    doc_updated_at: datetime | None = None\n    # Emails, not necessarily attached to users\n    # Users may not be in Onyx\n    primary_owners: list[str] | None = None\n    secondary_owners: list[str] | None = None\n    from_ingestion_api: bool = False\n\n    external_access: ExternalAccess | None = None\n    doc_metadata: dict[str, Any] | None = None\n\n    # The resolved database ID of the parent hierarchy node (folder/container)\n    parent_hierarchy_node_id: int | None = None\n\n\n@dataclass\nclass VespaDocumentFields:\n    \"\"\"\n    Specifies fields in Vespa for a document.  Fields set to None will be ignored.\n    Perhaps we should name this in an implementation agnostic fashion, but it's more\n    understandable like this for now.\n    \"\"\"\n\n    # all other fields except these 4 will always be left alone by the update request\n    access: DocumentAccess | None = None\n    document_sets: set[str] | None = None\n    boost: float | None = None\n    hidden: bool | None = None\n    aggregated_chunk_boost_factor: float | None = None\n\n\n@dataclass\nclass VespaDocumentUserFields:\n    \"\"\"\n    Fields that are specific to the user who is indexing the document.\n    \"\"\"\n\n    user_projects: list[int] | None = None\n    personas: list[int] | None = None\n\n\n@dataclass\nclass UpdateRequest:\n    \"\"\"\n    For all document_ids, update the allowed_users and the boost to the new values\n    Does not update any of the None fields\n    \"\"\"\n\n    minimal_document_indexing_info: list[MinimalDocumentIndexingInfo]\n    # all other fields except these 4 will always be left alone by the update request\n    access: DocumentAccess | None = None\n    document_sets: set[str] | None = None\n    boost: float | None = None\n    hidden: bool | None = None\n\n\nclass Verifiable(abc.ABC):\n    \"\"\"\n    Class must implement document index schema verification. For example, verify that all of the\n    necessary attributes for indexing, querying, filtering, and fields to return from search are\n    all valid in the schema.\n\n    Parameters:\n    - index_name: The name of the primary index currently used for querying\n    - secondary_index_name: The name of the secondary index being built in the background, if it\n            currently exists. Some functions on the document index act on both the primary and\n            secondary index, some act on just one.\n    \"\"\"\n\n    @abc.abstractmethod\n    def __init__(\n        self,\n        index_name: str,\n        secondary_index_name: str | None,\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(*args, **kwargs)\n        self.index_name = index_name\n        self.secondary_index_name = secondary_index_name\n\n    @abc.abstractmethod\n    def ensure_indices_exist(\n        self,\n        primary_embedding_dim: int,\n        primary_embedding_precision: EmbeddingPrecision,\n        secondary_index_embedding_dim: int | None,\n        secondary_index_embedding_precision: EmbeddingPrecision | None,\n    ) -> None:\n        \"\"\"\n        Verify that the document index exists and is consistent with the expectations in the code.\n\n        Parameters:\n        - primary_embedding_dim: Vector dimensionality for the vector similarity part of the search\n        - primary_embedding_precision: Precision of the vector similarity part of the search\n        - secondary_index_embedding_dim: Vector dimensionality of the secondary index being built\n                behind the scenes. The secondary index should only be built when switching\n                embedding models therefore this dim should be different from the primary index.\n        - secondary_index_embedding_precision: Precision of the vector similarity part of the secondary index\n        \"\"\"\n        raise NotImplementedError\n\n    @staticmethod\n    @abc.abstractmethod\n    def register_multitenant_indices(\n        indices: list[str],\n        embedding_dims: list[int],\n        embedding_precisions: list[EmbeddingPrecision],\n    ) -> None:\n        \"\"\"\n        Register multitenant indices with the document index.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Indexable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to index document chunks\n    \"\"\"\n\n    @abc.abstractmethod\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        index_batch_params: IndexBatchParams,\n    ) -> set[DocumentInsertionRecord]:\n        \"\"\"\n        Takes a list of document chunks and indexes them in the document index\n\n        NOTE: When a document is reindexed/updated here, it must clear all of the existing document\n        chunks before reindexing. This is because the document may have gotten shorter since the\n        last run. Therefore, upserting the first 0 through n chunks may leave some old chunks that\n        have not been written over.\n\n        NOTE: The chunks of a document are never separated into separate index() calls. So there is\n        no worry of receiving the first 0 through n chunks in one index call and the next n through\n        m chunks of a docu in the next index call.\n\n        NOTE: Due to some asymmetry between the primary and secondary indexing logic, this function\n        only needs to index chunks into the PRIMARY index. Do not update the secondary index here,\n        it is done automatically outside of this code.\n\n        Parameters:\n        - chunks: Document chunks with all of the information needed for\n                indexing to the document index.\n        - tenant_id: The tenant id of the user whose chunks are being indexed\n        - large_chunks_enabled: Whether large chunks are enabled\n\n        Returns:\n            List of document ids which map to unique documents and are used for deduping chunks\n            when updating, as well as if the document is newly indexed or already existed and\n            just updated\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Deletable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to delete document by a given unique document id.\n    \"\"\"\n\n    @abc.abstractmethod\n    def delete_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,\n        chunk_count: int | None,\n    ) -> int:\n        \"\"\"\n        Given a single document id, hard delete it from the document index\n\n        Parameters:\n        - doc_id: document id as specified by the connector\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Updatable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to update certain attributes of a document without needing to\n    update all of the fields. Specifically, needs to be able to update:\n    - Access Control List\n    - Document-set membership\n    - Boost value (learning from feedback mechanism)\n    - Whether the document is hidden or not, hidden documents are not returned from search\n    \"\"\"\n\n    @abc.abstractmethod\n    def update_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,\n        chunk_count: int | None,\n        fields: VespaDocumentFields | None,\n        user_fields: VespaDocumentUserFields | None,\n    ) -> None:\n        \"\"\"\n        Updates all chunks for a document with the specified fields.\n        None values mean that the field does not need an update.\n\n        The rationale for a single update function is that it allows retries and parallelism\n        to happen at a higher / more strategic level, is simpler to read, and allows\n        us to individually handle error conditions per document.\n\n        Parameters:\n        - fields: the fields to update in the document. Any field set to None will not be changed.\n\n        Return:\n            None\n        \"\"\"\n        raise NotImplementedError\n\n\nclass IdRetrievalCapable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to retrieve either:\n    - all of the chunks of a document IN ORDER given a document id.\n    - a specific chunk given a document id and a chunk index (0 based)\n    \"\"\"\n\n    @abc.abstractmethod\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[VespaChunkRequest],\n        filters: IndexFilters,\n        batch_retrieval: bool = False,\n    ) -> list[InferenceChunk]:\n        \"\"\"\n        Fetch chunk(s) based on document id\n\n        NOTE: This is used to reconstruct a full document or an extended (multi-chunk) section\n        of a document. Downstream currently assumes that the chunking does not introduce overlaps\n        between the chunks. If there are overlaps for the chunks, then the reconstructed document\n        or extended section will have duplicate segments.\n\n        Parameters:\n        - chunk_requests: requests containing the document id and the chunk range to retrieve\n        - filters: Filters to apply to retrieval\n        - batch_retrieval: If True, perform a batch retrieval\n\n        Returns:\n            list of chunks for the document id or the specific chunk by the specified chunk index\n            and document id\n        \"\"\"\n        raise NotImplementedError\n\n\nclass HybridCapable(abc.ABC):\n    \"\"\"\n    Class must implement hybrid (keyword + vector) search functionality\n    \"\"\"\n\n    @abc.abstractmethod\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        final_keywords: list[str] | None,\n        filters: IndexFilters,\n        hybrid_alpha: float,\n        time_decay_multiplier: float,\n        num_to_retrieve: int,\n        ranking_profile_type: QueryExpansionType,\n        title_content_ratio: float | None = TITLE_CONTENT_RATIO,\n    ) -> list[InferenceChunk]:\n        \"\"\"\n        Run hybrid search and return a list of inference chunks.\n\n        NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is\n        expected to be handled by this function as it may depend on the index implementation.\n        Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are\n        done here.\n\n        Parameters:\n        - query: unmodified user query. This is needed for getting the matching highlighted\n                keywords\n        - query_embedding: vector representation of the query, must be of the correct\n                dimensionality for the primary index\n        - final_keywords: Final keywords to be used from the query, defaults to query if not set\n        - filters: standard filter object\n        - hybrid_alpha: weighting between the keyword and vector search results. It is important\n                that the two scores are normalized to the same range so that a meaningful\n                comparison can be made. 1 for 100% weighting on vector score, 0 for 100% weighting\n                on keyword score.\n        - time_decay_multiplier: how much to decay the document scores as they age. Some queries\n                based on the persona settings, will have this be a 2x or 3x of the default\n        - num_to_retrieve: number of highest matching chunks to return\n\n        Returns:\n            best matching chunks based on weighted sum of keyword and vector/semantic search scores\n        \"\"\"\n        raise NotImplementedError\n\n\nclass AdminCapable(abc.ABC):\n    \"\"\"\n    Class must implement a search for the admin \"Explorer\" page. The assumption here is that the\n    admin is not \"searching\" for knowledge but has some document already in mind. They are either\n    looking to positively boost it because they know it's a good reference document, looking to\n    negatively boost it as a way of \"deprecating\", or hiding the document.\n\n    Assuming the admin knows the document name, this search has high emphasis on the title match.\n\n    Suggested implementation:\n    Keyword only, BM25 search with 5x weighting on the title field compared to the contents\n    \"\"\"\n\n    @abc.abstractmethod\n    def admin_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        filters: IndexFilters,\n        num_to_retrieve: int = NUM_RETURNED_HITS,\n    ) -> list[InferenceChunk]:\n        \"\"\"\n        Run the special search for the admin document explorer page\n\n        Parameters:\n        - query: unmodified user query. Though in this flow probably unmodified is best\n        - filters: standard filter object\n        - num_to_retrieve: number of highest matching chunks to return\n\n        Returns:\n            list of best matching chunks for the explorer page query\n        \"\"\"\n        raise NotImplementedError\n\n\nclass RandomCapable(abc.ABC):\n    \"\"\"Class must implement random document retrieval capability\"\"\"\n\n    @abc.abstractmethod\n    def random_retrieval(\n        self,\n        filters: IndexFilters,\n        num_to_retrieve: int = 10,\n    ) -> list[InferenceChunk]:\n        \"\"\"Retrieve random chunks matching the filters\"\"\"\n        raise NotImplementedError\n\n\nclass BaseIndex(\n    Verifiable,\n    Indexable,\n    Updatable,\n    Deletable,\n    AdminCapable,\n    IdRetrievalCapable,\n    RandomCapable,\n    abc.ABC,\n):\n    \"\"\"\n    All basic document index functionalities excluding the actual querying approach.\n\n    As a summary, document indices need to be able to\n    - Verify the schema definition is valid\n    - Index new documents\n    - Update specific attributes of existing documents\n    - Delete documents\n    - Provide a search for the admin document explorer page\n    - Retrieve documents based on document id\n    \"\"\"\n\n\nclass DocumentIndex(HybridCapable, BaseIndex, abc.ABC):\n    \"\"\"\n    A valid document index that can plug into all Onyx flows must implement all of these\n    functionalities, though \"technically\" it does not need to be keyword or vector capable as\n    currently all default search flows use Hybrid Search.\n    \"\"\"\n"
  },
  {
    "path": "backend/onyx/document_index/interfaces_new.py",
    "content": "import abc\nfrom collections.abc import Iterable\nfrom typing import Self\n\nfrom pydantic import BaseModel\nfrom pydantic import model_validator\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.context.search.enums import QueryType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom shared_configs.model_server_models import Embedding\n\n# NOTE: \"Document\" in the naming convention is used to refer to the entire\n# document as represented in Onyx. What is actually stored in the index is the\n# document chunks. By the terminology of most search engines / vector databases,\n# the individual objects stored are called documents, but in this case it refers\n# to a chunk.\n\n\n__all__ = [\n    # Main interfaces - these are what you should inherit from\n    \"DocumentIndex\",\n    # Data models - used in method signatures\n    \"DocumentInsertionRecord\",\n    \"DocumentSectionRequest\",\n    \"IndexingMetadata\",\n    \"MetadataUpdateRequest\",\n    # Capability mixins - for custom compositions or type checking\n    \"SchemaVerifiable\",\n    \"Indexable\",\n    \"Deletable\",\n    \"Updatable\",\n    \"IdRetrievalCapable\",\n    \"HybridCapable\",\n    \"RandomCapable\",\n]\n\n\nclass TenantState(BaseModel):\n    \"\"\"\n    Captures the tenant-related state for an instance of DocumentIndex.\n\n    NOTE: Tenant ID must be set in multitenant mode.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    tenant_id: str\n    multitenant: bool\n\n    def __str__(self) -> str:\n        return (\n            f\"TenantState(tenant_id={self.tenant_id}, multitenant={self.multitenant})\"\n        )\n\n    @model_validator(mode=\"after\")\n    def check_tenant_id_is_set_in_multitenant_mode(self) -> Self:\n        if self.multitenant and not self.tenant_id:\n            raise ValueError(\"Bug: Tenant ID must be set in multitenant mode.\")\n        return self\n\n\nclass DocumentInsertionRecord(BaseModel):\n    \"\"\"\n    Result of indexing a document.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    document_id: str\n    already_existed: bool\n\n\nclass DocumentSectionRequest(BaseModel):\n    \"\"\"Request for a document section or whole document.\n\n    If no min_chunk_ind is provided it should start at the beginning of the\n    document.\n    If no max_chunk_ind is provided it should go to the end of the document.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    document_id: str\n    min_chunk_ind: int | None = None\n    max_chunk_ind: int | None = None\n    # A given document can have multiple chunking strategies.\n    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE\n\n    @model_validator(mode=\"after\")\n    def check_chunk_index_range_is_valid(self) -> Self:\n        if (\n            self.min_chunk_ind is not None\n            and self.max_chunk_ind is not None\n            and self.min_chunk_ind > self.max_chunk_ind\n        ):\n            raise ValueError(\n                \"Bug: Min chunk index must be less than or equal to max chunk index.\"\n            )\n        return self\n\n\nclass IndexingMetadata(BaseModel):\n    \"\"\"\n    Information about chunk counts for efficient cleaning / updating of document\n    chunks.\n\n    A common pattern to ensure that no chunks are left over is to delete all of\n    the chunks for a document and then re-index the document. This information\n    allows us to only delete the extra \"tail\" chunks when the document has\n    gotten shorter.\n    \"\"\"\n\n    class ChunkCounts(BaseModel):\n        model_config = {\"frozen\": True}\n\n        old_chunk_cnt: int\n        new_chunk_cnt: int\n\n    model_config = {\"frozen\": True}\n\n    doc_id_to_chunk_cnt_diff: dict[str, ChunkCounts]\n\n\nclass MetadataUpdateRequest(BaseModel):\n    \"\"\"\n    Updates to the documents that can happen without there being an update to\n    the contents of the document.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    document_ids: list[str]\n    # Passed in to help with potential optimizations of the implementation. The\n    # keys should be redundant with document_ids.\n    # NOTE: Generally the chunk count should always be known, however for\n    # documents still using the legacy chunk ID system it may not be. Any chunk\n    # count value < 0 should represent an unknown chunk count.\n    doc_id_to_chunk_cnt: dict[str, int]\n    # For the ones that are None, there is no update required to that field.\n    access: DocumentAccess | None = None\n    document_sets: set[str] | None = None\n    boost: float | None = None\n    hidden: bool | None = None\n    secondary_index_updated: bool | None = None\n    project_ids: set[int] | None = None\n    persona_ids: set[int] | None = None\n\n\nclass IndexRetrievalFilters(BaseModel):\n    \"\"\"\n    Filters for retrieving chunks from the index.\n\n    Used to filter on permissions and other Onyx-specific metadata rather than\n    chunk content. Should be passed in for every retrieval method.\n\n    TODO(andrei): Currently unused, use this when making retrieval methods more\n    strict.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    # frozenset gets around the issue of python's mutable defaults.\n    # WARNING: Falls back to only public docs as default for security. If\n    # callers want no access filtering they must explicitly supply an empty set.\n    # Doing so should be done sparingly.\n    access_control_list: frozenset[str] = frozenset({PUBLIC_DOC_PAT})\n\n\nclass SchemaVerifiable(abc.ABC):\n    \"\"\"\n    Class must implement document index schema verification. For example, verify\n    that all of the necessary attributes for indexing, querying, filtering, and\n    fields to return from search are all valid in the schema.\n    \"\"\"\n\n    @abc.abstractmethod\n    def verify_and_create_index_if_necessary(\n        self,\n        embedding_dim: int,\n        embedding_precision: EmbeddingPrecision,\n    ) -> None:\n        \"\"\"\n        Verifies that the document index exists and is consistent with the\n        expectations in the code.\n\n        For certain search engines, the schema needs to be created before\n        indexing can happen. This call should create the schema if it does not\n        exist.\n\n        Args:\n            embedding_dim: Vector dimensionality for the vector similarity part\n                of the search.\n            embedding_precision: Precision of the values of the vectors for the\n                similarity part of the search.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Indexable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to index document chunks.\n    \"\"\"\n\n    @abc.abstractmethod\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        indexing_metadata: IndexingMetadata,\n    ) -> list[DocumentInsertionRecord]:\n        \"\"\"Indexes an iterable of document chunks into the document index.\n\n        This is often a batch operation including chunks from multiple\n        documents.\n\n        NOTE: When a document is reindexed/updated here and has gotten shorter,\n        it is important to delete the extra chunks at the end to ensure there\n        are no stale chunks in the index. The implementation should do this.\n\n        NOTE: The chunks of a document are never separated into separate index()\n        calls. So there is no worry of receiving the first 0 through n chunks in\n        one index call and the next n through m chunks of a document in the next\n        index call.\n\n        Args:\n            chunks: Document chunks with all of the information needed for\n                indexing to the document index.\n            indexing_metadata: Information about chunk counts for efficient\n                cleaning / updating.\n\n        Returns:\n            List of document IDs which map to unique documents as well as if the\n                document is newly indexed or had already existed and was just\n                updated.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Deletable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to delete a document by a given unique\n    document ID.\n    \"\"\"\n\n    @abc.abstractmethod\n    def delete(\n        self,\n        # TODO(andrei): Fine for now but this can probably be a batch operation\n        # that takes in a list of IDs.\n        document_id: str,\n        chunk_count: int | None = None,\n        # TODO(andrei): Shouldn't this also have some acl filtering at minimum?\n    ) -> int:\n        \"\"\"\n        Hard deletes all of the chunks for the corresponding document in the\n        document index.\n\n        TODO(andrei): Not a pressing issue now but think about what we want the\n        contract of this method to be in the event the specified document ID\n        does not exist.\n\n        Args:\n            document_id: The unique identifier for the document as represented\n                in Onyx, not necessarily in the document index.\n            chunk_count: The number of chunks in the document. May be useful for\n                improving the efficiency of the delete operation. Defaults to\n                None.\n\n        Returns:\n            The number of chunks deleted.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass Updatable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to update certain attributes of a document\n    without needing to update all of the fields. Specifically, needs to be able\n    to update:\n    - Access Control List\n    - Document-set membership\n    - Boost value (learning from feedback mechanism)\n    - Whether the document is hidden or not; hidden documents are not returned\n      from search\n    - Which Projects the document is a part of\n    \"\"\"\n\n    @abc.abstractmethod\n    def update(\n        self,\n        update_requests: list[MetadataUpdateRequest],\n    ) -> None:\n        \"\"\"Updates some set of chunks.\n\n        The document and fields to update are specified in the update requests.\n        Each update request in the list applies its changes to a list of\n        document IDs. None values mean that the field does not need an update.\n\n        Args:\n            update_requests: A list of update requests, each containing a list\n                of document IDs and the fields to update. The field updates\n                apply to all of the specified documents in each update request.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass IdRetrievalCapable(abc.ABC):\n    \"\"\"\n    Class must implement the ability to retrieve either:\n    - All of the chunks of a document IN ORDER given a document ID.\n    - A specific section (continuous set of chunks) for some document.\n    \"\"\"\n\n    @abc.abstractmethod\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[DocumentSectionRequest],\n        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.\n        filters: IndexFilters,\n        # TODO(andrei): This is temporary, we will not expose this in the long\n        # run.\n        batch_retrieval: bool = False,\n        # TODO(andrei): Add a param for whether to retrieve hidden docs.\n    ) -> list[InferenceChunk]:\n        \"\"\"Fetches chunk(s) based on document ID.\n\n        NOTE: This is used to reconstruct a full document or an extended\n        (multi-chunk) section of a document. Downstream currently assumes that\n        the chunking does not introduce overlaps between the chunks. If there\n        are overlaps for the chunks, then the reconstructed document or extended\n        section will have duplicate segments.\n\n        Args:\n            chunk_requests: Requests containing the document ID and the chunk\n                range to retrieve.\n\n        Returns:\n            List of sections from the documents specified.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass HybridCapable(abc.ABC):\n    \"\"\"\n    Class must implement hybrid (keyword + vector) search functionality.\n    \"\"\"\n\n    @abc.abstractmethod\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        # TODO(andrei): This param is not great design, get rid of it.\n        final_keywords: list[str] | None,\n        query_type: QueryType,\n        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        \"\"\"Runs hybrid search and returns a list of inference chunks.\n\n        Args:\n            query: Unmodified user query. This may be needed for getting the\n                matching highlighted keywords or for logging purposes.\n            query_embedding: Vector representation of the query. Must be of the\n                correct dimensionality for the primary index.\n            final_keywords: Final keywords to be used from the query; defaults\n                to query if not set.\n            query_type: Semantic or keyword type query; may use different\n                scoring logic for each.\n            filters: Filters for things like permissions, source type, time,\n                etc.\n            num_to_retrieve: Number of highest matching chunks to return.\n\n        Returns:\n            Score-ranked (highest first) list of highest matching chunks.\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def keyword_retrieval(\n        self,\n        query: str,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        \"\"\"Runs keyword-only search and returns a list of inference chunks.\n\n        Args:\n            query: User query.\n            filters: Filters for things like permissions, source type, time,\n                etc.\n            num_to_retrieve: Number of highest matching chunks to return.\n\n        Returns:\n            Score-ranked (highest first) list of highest matching chunks.\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def semantic_retrieval(\n        self,\n        query_embedding: Embedding,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        \"\"\"Runs semantic-only search and returns a list of inference chunks.\n\n        Args:\n            query_embedding: Vector representation of the query. Must be of the\n                correct dimensionality for the primary index.\n            filters: Filters for things like permissions, source type, time,\n                etc.\n            num_to_retrieve: Number of highest matching chunks to return.\n\n        Returns:\n            Score-ranked (highest first) list of highest matching chunks.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass RandomCapable(abc.ABC):\n    \"\"\"\n    Class must implement random document retrieval.\n    \"\"\"\n\n    @abc.abstractmethod\n    def random_retrieval(\n        self,\n        # TODO(andrei): Make this more strict w.r.t. acl, temporary for now.\n        filters: IndexFilters,\n        num_to_retrieve: int = 10,\n        dirty: bool | None = None,\n    ) -> list[InferenceChunk]:\n        \"\"\"Retrieves random chunks matching the filters.\n\n        Args:\n            filters: Filters for things like permissions, source type, time,\n                etc.\n            num_to_retrieve: Number of chunks to retrieve. Defaults to 10.\n            dirty: If set, retrieve chunks whose \"dirty\" flag matches this\n                argument. If None, there is no restriction on retrieved chunks\n                with respect to that flag. A chunk is considered dirty if there\n                is a secondary index but the chunk's state has not been ported\n                over to it yet. Defaults to None.\n\n        Returns:\n            List of chunks matching the filters.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass DocumentIndex(\n    SchemaVerifiable,\n    Indexable,\n    Updatable,\n    Deletable,\n    HybridCapable,\n    IdRetrievalCapable,\n    RandomCapable,\n    abc.ABC,\n):\n    \"\"\"\n    A valid document index that can plug into all Onyx flows must implement all\n    of these functionalities.\n\n    As a high-level summary, document indices need to be able to:\n    - Verify the schema definition is valid\n    - Index new documents\n    - Update specific attributes of existing documents\n    - Delete documents\n    - Run hybrid search\n    - Retrieve document or sections of documents based on document id\n    - Retrieve sets of random documents\n    \"\"\"\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/README.md",
    "content": "# Opensearch Idiosyncrasies\n\n## How it works at a high level\nOpensearch has 2 phases, a `Search` phase and a `Fetch` phase. The `Search` phase works by getting the document scores on each\nshard separately, then typically a fetch phase grabs all of the relevant fields/data for returning to the user. There is also\nan intermediate phase (seemingly built specifically to handle hybrid search queries) which can run in between as a processor.\nReferences:\nhttps://docs.opensearch.org/latest/search-plugins/search-pipelines/search-processors/\nhttps://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/\nhttps://docs.opensearch.org/latest/query-dsl/compound/hybrid/\n\n## How Hybrid queries work\nHybrid queries are basically parallel queries that each run through their own `Search` phase and do not interact in any way.\nThey also run across all the shards. It is not entirely clear what happens if a combination pipeline is not specified for them,\nperhaps the scores are just summed.\n\nWhen the normalization processor is applied to keyword/vector hybrid searches, documents that show up due to keyword match may\nnot also have showed up in the vector search and vice versa. In these situations, it just receives a 0 score for the missing\nquery component. Opensearch does not run another phase to recapture those missing values. The impact of this is that after\nnormalizing, the missing scores are 0 but this is a higher score than if it actually received a non-zero score.\n\nThis may not be immediately obvious so an explanation is included here. If it got a non-zero score instead, it must be lower\nthan all of the other scores of the list (otherwise it would have shown up). Therefore it would impact the normalization and\npush the other scores higher so that it's not only the lowest score still, but now it's a differentiated lowest score. This is\nnot strictly the case in a multi-node setup but the high level concept approximately holds. So basically the 0 score is a form\nof \"minimum value clipping\".\n\n## On time decay and boosting\nEmbedding models do not have a uniform distribution from 0 to 1. The values typically cluster strongly around 0.6 to 0.8 but also\nvaries between models and even the query. It is not a safe assumption to pre-normalize the scores so we also cannot apply any\nadditive or multiplicative boost to it. i.e. if results of a doc cluster around 0.6 to 0.8 and I give a 50% penalty to the score,\nit doesn't bring a result from the top of the range to 50th percentile, it brings it under the 0.6 and is now the worst match.\nSame logic applies to additive boosting.\n\nSo these boosts can only be applied after normalization. Unfortunately with Opensearch, the normalization processor runs last\nand only applies to the results of the completely independent `Search` phase queries. So if a time based boost (a separate\nquery which filters on recently updated documents) is added, it would not be able to introduce any new documents\nto the set (since the new documents would have no keyword/vector score or already be present) since the 0 scores on keyword\nand vector would make the docs which only came because of time filter very low scoring. This can however make some of the lower\nscored documents from the union of all the `Search` phase documents to show up higher and potentially not get dropped before\nbeing fetched and returned to the user. But there are other issues of including these:\n- There is no way to sort by this field, only a filter, so there's no way to guarantee the best docs even irrespective of the\ncontents. If there are lots of updates, this may miss.\n- There is not a good way to normalize this field, the best is to clip it on the bottom.\n- This would require using min-max norm but z-score norm is better for the other functions due to things like it being less\nsensitive to outliers, better handles distribution drifts (min-max assumes stable meaningful ranges), better for comparing\n\"unusual-ness\" across distributions.\n\nSo while it is possible to apply time based boosting at the normalization stage (or specifically to the keyword score), we have\ndecided it is better to not apply it during the OpenSearch query.\n\nBecause of these limitations, Onyx in code applies further refinements, boostings, etc. based on OpenSearch providing an initial\nfiltering. The impact of time decay and boost should not be so big that we would need orders of magnitude more results back\nfrom OpenSearch.\n\n## Other concepts to be aware of\nWithin the `Search` phase, there are optional steps like Rescore but these are not useful for the combination/normalization\nwork that is relevant for the hybrid search. Since the Rescore happens prior to normalization, it's not able to provide any\nmeaningful operations to the query for our usage.\n\nBecause the Title is included in the Contents for both embedding and keyword searches, the Title scores are very low relative to\nthe actual full contents scoring. It is seen as a boost rather than a core scoring component. Time decay works similarly.\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/client.py",
    "content": "import json\nimport logging\nimport time\nfrom contextlib import AbstractContextManager\nfrom contextlib import nullcontext\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TypeVar\n\nfrom opensearchpy import OpenSearch\nfrom opensearchpy import TransportError\nfrom opensearchpy.helpers import bulk\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S\nfrom onyx.configs.app_configs import OPENSEARCH_ADMIN_PASSWORD\nfrom onyx.configs.app_configs import OPENSEARCH_ADMIN_USERNAME\nfrom onyx.configs.app_configs import OPENSEARCH_HOST\nfrom onyx.configs.app_configs import OPENSEARCH_REST_API_PORT\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.constants import OpenSearchSearchType\nfrom onyx.document_index.opensearch.schema import DocumentChunk\nfrom onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors\nfrom onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id\nfrom onyx.document_index.opensearch.search import DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW\nfrom onyx.server.metrics.opensearch_search import observe_opensearch_search\nfrom onyx.server.metrics.opensearch_search import track_opensearch_search_in_progress\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.timing import log_function_time\n\n\nCLIENT_THRESHOLD_TO_LOG_SLOW_SEARCH_MS = 2000\n\n\nlogger = setup_logger(__name__)\n# Set the logging level to WARNING to ignore INFO and DEBUG logs from\n# opensearch. By default it emits INFO-level logs for every request.\n# The opensearch-py library uses \"opensearch\" as the logger name for HTTP\n# requests (see opensearchpy/connection/base.py)\nopensearch_logger = logging.getLogger(\"opensearch\")\nopensearch_logger.setLevel(logging.WARNING)\n\n\nSchemaDocumentModel = TypeVar(\"SchemaDocumentModel\")\n\n\nclass SearchHit(BaseModel, Generic[SchemaDocumentModel]):\n    \"\"\"Represents a hit from OpenSearch in response to a query.\n\n    Templated on the specific document model as defined by a schema.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    # The document chunk source retrieved from OpenSearch.\n    document_chunk: SchemaDocumentModel\n    # The match score for the document chunk as calculated by OpenSearch. Only\n    # relevant for \"fuzzy searches\"; this will be None for direct queries where\n    # score is not relevant like direct retrieval on ID.\n    score: float | None = None\n    # Maps schema property name to a list of highlighted snippets with match\n    # terms wrapped in tags (e.g. \"something <hi>keyword</hi> other thing\").\n    match_highlights: dict[str, list[str]] = {}\n    # Score explanation from OpenSearch when \"explain\": true is set in the\n    # query. Contains detailed breakdown of how the score was calculated.\n    explanation: dict[str, Any] | None = None\n\n\nclass IndexInfo(BaseModel):\n    \"\"\"\n    Represents information about an OpenSearch index.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    name: str\n    health: str\n    status: str\n    num_primary_shards: str\n    num_replica_shards: str\n    docs_count: str\n    docs_deleted: str\n    created_at: str\n    total_size: str\n    primary_shards_size: str\n\n\ndef get_new_body_without_vectors(body: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Recursively replaces vectors in the body with their length.\n\n    TODO(andrei): Do better.\n\n    Args:\n        body: The body to replace the vectors.\n\n    Returns:\n        A copy of body with vectors replaced with their length.\n    \"\"\"\n    new_body: dict[str, Any] = {}\n    for k, v in body.items():\n        if k == \"vector\":\n            new_body[k] = len(v)\n        elif isinstance(v, dict):\n            new_body[k] = get_new_body_without_vectors(v)\n        elif isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict):\n            new_body[k] = [get_new_body_without_vectors(item) for item in v]\n        else:\n            new_body[k] = v\n    return new_body\n\n\nclass OpenSearchClient(AbstractContextManager):\n    \"\"\"Client for interacting with OpenSearch for cluster-level operations.\n\n    Args:\n        host: The host of the OpenSearch cluster.\n        port: The port of the OpenSearch cluster.\n        auth: The authentication credentials for the OpenSearch cluster. A tuple\n            of (username, password).\n        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to\n            True.\n        verify_certs: Whether to verify the SSL certificates for the OpenSearch\n            cluster. Defaults to False.\n        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults\n            to False.\n        timeout: The timeout for the OpenSearch cluster. Defaults to\n            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.\n    \"\"\"\n\n    def __init__(\n        self,\n        host: str = OPENSEARCH_HOST,\n        port: int = OPENSEARCH_REST_API_PORT,\n        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),\n        use_ssl: bool = True,\n        verify_certs: bool = False,\n        ssl_show_warn: bool = False,\n        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,\n    ):\n        logger.debug(\n            f\"Creating OpenSearch client with host {host}, port {port} and timeout {timeout} seconds.\"\n        )\n        self._client = OpenSearch(\n            hosts=[{\"host\": host, \"port\": port}],\n            http_auth=auth,\n            use_ssl=use_ssl,\n            verify_certs=verify_certs,\n            ssl_show_warn=ssl_show_warn,\n            # NOTE: This timeout applies to all requests the client makes,\n            # including bulk indexing. When exceeded, the client will raise a\n            # ConnectionTimeout and return no useful results. The OpenSearch\n            # server will log that the client cancelled the request. To get\n            # partial results from OpenSearch, pass in a timeout parameter to\n            # your request body that is less than this value.\n            timeout=timeout,\n        )\n\n    def __exit__(self, *_: Any) -> None:\n        self.close()\n\n    def __del__(self) -> None:\n        try:\n            self.close()\n        except Exception:\n            pass\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def create_search_pipeline(\n        self,\n        pipeline_id: str,\n        pipeline_body: dict[str, Any],\n    ) -> None:\n        \"\"\"Creates a search pipeline.\n\n        See the OpenSearch documentation for more information on the search\n        pipeline body.\n        https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/\n\n        Args:\n            pipeline_id: The ID of the search pipeline to create.\n            pipeline_body: The body of the search pipeline to create.\n\n        Raises:\n            Exception: There was an error creating the search pipeline.\n        \"\"\"\n        response = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)\n        if not response.get(\"acknowledged\", False):\n            raise RuntimeError(f\"Failed to create search pipeline {pipeline_id}.\")\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def delete_search_pipeline(self, pipeline_id: str) -> None:\n        \"\"\"Deletes a search pipeline.\n\n        Args:\n            pipeline_id: The ID of the search pipeline to delete.\n\n        Raises:\n            Exception: There was an error deleting the search pipeline.\n        \"\"\"\n        response = self._client.search_pipeline.delete(id=pipeline_id)\n        if not response.get(\"acknowledged\", False):\n            raise RuntimeError(f\"Failed to delete search pipeline {pipeline_id}.\")\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def put_cluster_settings(self, settings: dict[str, Any]) -> bool:\n        \"\"\"Puts cluster settings.\n\n        Args:\n            settings: The settings to put.\n\n        Raises:\n            Exception: There was an error putting the cluster settings.\n\n        Returns:\n            True if the settings were put successfully, False otherwise.\n        \"\"\"\n        response = self._client.cluster.put_settings(body=settings)\n        if response.get(\"acknowledged\", False):\n            logger.info(\"Successfully put cluster settings.\")\n            return True\n        else:\n            logger.error(f\"Failed to put cluster settings: {response}.\")\n            return False\n\n    @log_function_time(print_only=True, debug_only=True)\n    def list_indices_with_info(self) -> list[IndexInfo]:\n        \"\"\"\n        Lists the indices in the OpenSearch cluster with information about each\n        index.\n\n        Returns:\n            A list of IndexInfo objects for each index.\n        \"\"\"\n        response = self._client.cat.indices(format=\"json\")\n        indices: list[IndexInfo] = []\n        for raw_index_info in response:\n            indices.append(\n                IndexInfo(\n                    name=raw_index_info.get(\"index\", \"\"),\n                    health=raw_index_info.get(\"health\", \"\"),\n                    status=raw_index_info.get(\"status\", \"\"),\n                    num_primary_shards=raw_index_info.get(\"pri\", \"\"),\n                    num_replica_shards=raw_index_info.get(\"rep\", \"\"),\n                    docs_count=raw_index_info.get(\"docs.count\", \"\"),\n                    docs_deleted=raw_index_info.get(\"docs.deleted\", \"\"),\n                    created_at=raw_index_info.get(\"creation.date.string\", \"\"),\n                    total_size=raw_index_info.get(\"store.size\", \"\"),\n                    primary_shards_size=raw_index_info.get(\"pri.store.size\", \"\"),\n                )\n            )\n        return indices\n\n    @log_function_time(print_only=True, debug_only=True)\n    def ping(self) -> bool:\n        \"\"\"Pings the OpenSearch cluster.\n\n        Returns:\n            True if OpenSearch could be reached, False if it could not.\n        \"\"\"\n        return self._client.ping()\n\n    def close(self) -> None:\n        \"\"\"Closes the client.\n\n        Raises:\n            Exception: There was an error closing the client.\n        \"\"\"\n        self._client.close()\n\n\nclass OpenSearchIndexClient(OpenSearchClient):\n    \"\"\"Client for interacting with OpenSearch for index-level operations.\n\n    OpenSearch's Python module has pretty bad typing support so this client\n    attempts to protect the rest of the codebase from this. As a consequence,\n    most methods here return the minimum data needed for the rest of Onyx, and\n    tend to rely on Exceptions to handle errors.\n\n    TODO(andrei): This class currently assumes the structure of the database\n    schema when it returns a DocumentChunk. Make the class, or at least the\n    search method, templated on the structure the caller can expect.\n\n    Args:\n        index_name: The name of the index to interact with.\n        host: The host of the OpenSearch cluster.\n        port: The port of the OpenSearch cluster.\n        auth: The authentication credentials for the OpenSearch cluster. A tuple\n            of (username, password).\n        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to\n            True.\n        verify_certs: Whether to verify the SSL certificates for the OpenSearch\n            cluster. Defaults to False.\n        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults\n            to False.\n        timeout: The timeout for the OpenSearch cluster. Defaults to\n            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.\n    \"\"\"\n\n    def __init__(\n        self,\n        index_name: str,\n        host: str = OPENSEARCH_HOST,\n        port: int = OPENSEARCH_REST_API_PORT,\n        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),\n        use_ssl: bool = True,\n        verify_certs: bool = False,\n        ssl_show_warn: bool = False,\n        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,\n        emit_metrics: bool = True,\n    ):\n        super().__init__(\n            host=host,\n            port=port,\n            auth=auth,\n            use_ssl=use_ssl,\n            verify_certs=verify_certs,\n            ssl_show_warn=ssl_show_warn,\n            timeout=timeout,\n        )\n        self._index_name = index_name\n        self._emit_metrics = emit_metrics\n        logger.debug(\n            f\"OpenSearch client created successfully for index {self._index_name}.\"\n        )\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def create_index(self, mappings: dict[str, Any], settings: dict[str, Any]) -> None:\n        \"\"\"Creates the index.\n\n        See the OpenSearch documentation for more information on mappings and\n        settings.\n\n        Args:\n            mappings: The mappings for the index to create.\n            settings: The settings for the index to create.\n\n        Raises:\n            Exception: There was an error creating the index.\n        \"\"\"\n        body: dict[str, Any] = {\n            \"mappings\": mappings,\n            \"settings\": settings,\n        }\n        logger.debug(f\"Creating index {self._index_name} with body {body}.\")\n        response = self._client.indices.create(index=self._index_name, body=body)\n        if not response.get(\"acknowledged\", False):\n            raise RuntimeError(f\"Failed to create index {self._index_name}.\")\n        response_index = response.get(\"index\", \"\")\n        if response_index != self._index_name:\n            raise RuntimeError(\n                f\"OpenSearch responded with index name {response_index} when creating index {self._index_name}.\"\n            )\n        logger.debug(f\"Index {self._index_name} created successfully.\")\n\n    @log_function_time(print_only=True, debug_only=True)\n    def delete_index(self) -> bool:\n        \"\"\"Deletes the index.\n\n        Raises:\n            Exception: There was an error deleting the index.\n\n        Returns:\n            True if the index was deleted, False if it did not exist.\n        \"\"\"\n        if not self._client.indices.exists(index=self._index_name):\n            logger.warning(\n                f\"Tried to delete index {self._index_name} but it does not exist.\"\n            )\n            return False\n\n        logger.debug(f\"Deleting index {self._index_name}.\")\n        response = self._client.indices.delete(index=self._index_name)\n        if not response.get(\"acknowledged\", False):\n            raise RuntimeError(f\"Failed to delete index {self._index_name}.\")\n        return True\n\n    @log_function_time(print_only=True, debug_only=True)\n    def index_exists(self) -> bool:\n        \"\"\"Checks if the index exists.\n\n        Raises:\n            Exception: There was an error checking if the index exists.\n\n        Returns:\n            True if the index exists, False if it does not.\n        \"\"\"\n        return self._client.indices.exists(index=self._index_name)\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def put_mapping(self, mappings: dict[str, Any]) -> None:\n        \"\"\"Updates the index mapping in an idempotent manner.\n\n        - Existing fields with the same definition: No-op (succeeds silently).\n        - New fields: Added to the index.\n        - Existing fields with different types: Raises exception (requires\n          reindex).\n\n        See the OpenSearch documentation for more information:\n        https://docs.opensearch.org/latest/api-reference/index-apis/put-mapping/\n\n        Args:\n            mappings: The complete mapping definition to apply. This will be\n                merged with existing mappings in the index.\n\n        Raises:\n            Exception: There was an error updating the mappings, such as\n                attempting to change the type of an existing field.\n        \"\"\"\n        logger.debug(\n            f\"Putting mappings for index {self._index_name} with mappings {mappings}.\"\n        )\n        response = self._client.indices.put_mapping(\n            index=self._index_name, body=mappings\n        )\n        if not response.get(\"acknowledged\", False):\n            raise RuntimeError(\n                f\"Failed to put the mapping update for index {self._index_name}.\"\n            )\n        logger.debug(f\"Successfully put mappings for index {self._index_name}.\")\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def validate_index(self, expected_mappings: dict[str, Any]) -> bool:\n        \"\"\"Validates the index.\n\n        Short-circuit returns False on the first mismatch. Logs the mismatch.\n\n        See the OpenSearch documentation for more information on the index\n        mappings.\n        https://docs.opensearch.org/latest/mappings/\n\n        Args:\n            mappings: The expected mappings of the index to validate.\n\n        Raises:\n            Exception: There was an error validating the index.\n\n        Returns:\n            True if the index is valid, False if it is not based on the mappings\n                supplied.\n        \"\"\"\n        # OpenSearch's documentation makes no mention of what happens when you\n        # invoke client.indices.get on an index that does not exist, so we check\n        # for existence explicitly just to be sure.\n        exists_response = self.index_exists()\n        if not exists_response:\n            logger.warning(\n                f\"Tried to validate index {self._index_name} but it does not exist.\"\n            )\n            return False\n        logger.debug(\n            f\"Validating index {self._index_name} with expected mappings {expected_mappings}.\"\n        )\n\n        get_result = self._client.indices.get(index=self._index_name)\n        index_info: dict[str, Any] = get_result.get(self._index_name, {})\n        if not index_info:\n            raise ValueError(\n                f\"Bug: OpenSearch did not return any index info for index {self._index_name}, \"\n                \"even though it confirmed that the index exists.\"\n            )\n        index_mapping_properties: dict[str, Any] = index_info.get(\"mappings\", {}).get(\n            \"properties\", {}\n        )\n        expected_mapping_properties: dict[str, Any] = expected_mappings.get(\n            \"properties\", {}\n        )\n        assert (\n            expected_mapping_properties\n        ), \"Bug: No properties were found in the provided expected mappings.\"\n\n        for property in expected_mapping_properties:\n            if property not in index_mapping_properties:\n                logger.warning(\n                    f'The field \"{property}\" was not found in the index {self._index_name}.'\n                )\n                return False\n\n            expected_property_type = expected_mapping_properties[property].get(\n                \"type\", \"\"\n            )\n            assert (\n                expected_property_type\n            ), f'Bug: The field \"{property}\" in the supplied expected schema mappings has no type.'\n\n            index_property_type = index_mapping_properties[property].get(\"type\", \"\")\n            if expected_property_type != index_property_type:\n                logger.warning(\n                    f'The field \"{property}\" in the index {self._index_name} has type {index_property_type} '\n                    f\"but the expected type is {expected_property_type}.\"\n                )\n                return False\n\n        logger.debug(f\"Index {self._index_name} validated successfully.\")\n        return True\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def update_settings(self, settings: dict[str, Any]) -> None:\n        \"\"\"Updates the settings of the index.\n\n        See the OpenSearch documentation for more information on the index\n        settings.\n        https://docs.opensearch.org/latest/install-and-configure/configuring-opensearch/index-settings/\n\n        Args:\n            settings: The settings to update the index with.\n\n        Raises:\n            Exception: There was an error updating the settings of the index.\n        \"\"\"\n        # TODO(andrei): Implement this.\n        raise NotImplementedError\n\n    @log_function_time(\n        print_only=True,\n        debug_only=True,\n        include_args_subset={\n            \"document\": str,\n            \"tenant_state\": str,\n            \"update_if_exists\": str,\n        },\n    )\n    def index_document(\n        self,\n        document: DocumentChunk,\n        tenant_state: TenantState,\n        update_if_exists: bool = False,\n    ) -> None:\n        \"\"\"Indexes a document.\n\n        Args:\n            document: The document to index. In Onyx this is a chunk of a\n                document, OpenSearch simply refers to this as a document as\n                well.\n            tenant_state: The tenant state of the caller.\n            update_if_exists: Whether to update the document if it already\n                exists. If False, will raise an exception if the document\n                already exists. Defaults to False.\n\n        Raises:\n            Exception: There was an error indexing the document. This includes\n                the case where a document with the same ID already exists if\n                update_if_exists is False.\n        \"\"\"\n        logger.debug(\n            f\"Trying to index document ID {document.document_id} for tenant {tenant_state.tenant_id}. \"\n            f\"update_if_exists={update_if_exists}.\"\n        )\n        document_chunk_id: str = get_opensearch_doc_chunk_id(\n            tenant_state=tenant_state,\n            document_id=document.document_id,\n            chunk_index=document.chunk_index,\n            max_chunk_size=document.max_chunk_size,\n        )\n        body: dict[str, Any] = document.model_dump(exclude_none=True)\n        # client.create will raise if a doc with the same ID exists.\n        # client.index does not do this.\n        if update_if_exists:\n            result = self._client.index(\n                index=self._index_name, id=document_chunk_id, body=body\n            )\n        else:\n            result = self._client.create(\n                index=self._index_name, id=document_chunk_id, body=body\n            )\n        result_id = result.get(\"_id\", \"\")\n        # Sanity check.\n        if result_id != document_chunk_id:\n            raise RuntimeError(\n                f'Upon trying to index a document, OpenSearch responded with ID \"{result_id}\" '\n                f'instead of \"{document_chunk_id}\" which is the ID it was given.'\n            )\n        result_string: str = result.get(\"result\", \"\")\n        match result_string:\n            # Sanity check.\n            case \"created\":\n                pass\n            case \"updated\":\n                if not update_if_exists:\n                    raise RuntimeError(\n                        f'The OpenSearch client returned result \"updated\" for indexing document chunk \"{document_chunk_id}\". '\n                        \"This indicates that a document chunk with that ID already exists, which is not expected.\"\n                    )\n            case _:\n                raise RuntimeError(\n                    f'Unknown OpenSearch indexing result: \"{result_string}\".'\n                )\n        logger.debug(f\"Successfully indexed {document_chunk_id}.\")\n\n    @log_function_time(\n        print_only=True,\n        debug_only=True,\n        include_args_subset={\n            \"documents\": len,\n            \"tenant_state\": str,\n            \"update_if_exists\": str,\n        },\n    )\n    def bulk_index_documents(\n        self,\n        documents: list[DocumentChunk],\n        tenant_state: TenantState,\n        update_if_exists: bool = False,\n    ) -> None:\n        \"\"\"Bulk indexes documents.\n\n        Raises if there are any errors during the bulk index. It should be\n        assumed that no documents in the batch were indexed successfully if\n        there is an error.\n\n        Retries on 429 too many requests.\n\n        Args:\n            documents: The documents to index. In Onyx this is a chunk of a\n                document, OpenSearch simply refers to this as a document as\n                well.\n            tenant_state: The tenant state of the caller.\n            update_if_exists: Whether to update the document if it already\n                exists. If False, will raise an exception if the document\n                already exists. Defaults to False.\n\n        Raises:\n            Exception: There was an error during the bulk index. This\n                includes the case where a document with the same ID already\n                exists if update_if_exists is False.\n        \"\"\"\n        if not documents:\n            return\n        logger.debug(\n            f\"Bulk indexing {len(documents)} documents for tenant {tenant_state.tenant_id}. update_if_exists={update_if_exists}.\"\n        )\n        data = []\n        for document in documents:\n            document_chunk_id: str = get_opensearch_doc_chunk_id(\n                tenant_state=tenant_state,\n                document_id=document.document_id,\n                chunk_index=document.chunk_index,\n                max_chunk_size=document.max_chunk_size,\n            )\n            body: dict[str, Any] = document.model_dump(exclude_none=True)\n            data_for_document: dict[str, Any] = {\n                \"_index\": self._index_name,\n                \"_id\": document_chunk_id,\n                \"_op_type\": \"index\" if update_if_exists else \"create\",\n                \"_source\": body,\n            }\n            data.append(data_for_document)\n        # max_retries is the number of times to retry a request if we get a 429.\n        success, errors = bulk(self._client, data, max_retries=3)\n        if errors:\n            raise RuntimeError(\n                f\"Failed to bulk index documents for index {self._index_name}. Errors: {errors}\"\n            )\n        if success != len(documents):\n            raise RuntimeError(\n                f\"OpenSearch reported no errors during bulk index but the number of successful operations \"\n                f\"({success}) does not match the number of documents ({len(documents)}).\"\n            )\n        logger.debug(f\"Successfully bulk indexed {len(documents)} documents.\")\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def delete_document(self, document_chunk_id: str) -> bool:\n        \"\"\"Deletes a document.\n\n        Args:\n            document_chunk_id: The OpenSearch ID of the document chunk to\n                delete.\n\n        Raises:\n            Exception: There was an error deleting the document.\n\n        Returns:\n            True if the document was deleted, False if it was not found.\n        \"\"\"\n        try:\n            logger.debug(\n                f\"Trying to delete document chunk {document_chunk_id} from index {self._index_name}.\"\n            )\n            result = self._client.delete(index=self._index_name, id=document_chunk_id)\n        except TransportError as e:\n            if e.status_code == 404:\n                logger.debug(\n                    f\"Document chunk {document_chunk_id} not found in index {self._index_name}.\"\n                )\n                return False\n            else:\n                raise e\n\n        result_string: str = result.get(\"result\", \"\")\n        match result_string:\n            case \"deleted\":\n                logger.debug(\n                    f\"Successfully deleted document chunk {document_chunk_id} from index {self._index_name}.\"\n                )\n                return True\n            case \"not_found\":\n                logger.debug(\n                    f\"Document chunk {document_chunk_id} not found in index {self._index_name}.\"\n                )\n                return False\n            case _:\n                raise RuntimeError(\n                    f'Unknown OpenSearch deletion result: \"{result_string}\".'\n                )\n\n    @log_function_time(print_only=True, debug_only=True)\n    def delete_by_query(self, query_body: dict[str, Any]) -> int:\n        \"\"\"Deletes documents by a query.\n\n        Args:\n            query_body: The body of the query to delete documents by.\n\n        Raises:\n            Exception: There was an error deleting the documents.\n\n        Returns:\n            The number of documents deleted.\n        \"\"\"\n        logger.debug(\n            f\"Trying to delete documents by query for index {self._index_name}.\"\n        )\n        result = self._client.delete_by_query(index=self._index_name, body=query_body)\n        if result.get(\"timed_out\", False):\n            raise RuntimeError(\n                f\"Delete by query timed out for index {self._index_name}.\"\n            )\n        if len(result.get(\"failures\", [])) > 0:\n            raise RuntimeError(\n                f\"Failed to delete some or all of the documents for index {self._index_name}.\"\n            )\n\n        num_deleted = result.get(\"deleted\", 0)\n        num_processed = result.get(\"total\", 0)\n        if num_deleted != num_processed:\n            raise RuntimeError(\n                f\"Failed to delete some or all of the documents for index {self._index_name}. \"\n                f\"{num_deleted} documents were deleted out of {num_processed} documents that were processed.\"\n            )\n\n        logger.debug(\n            f\"Successfully deleted {num_deleted} documents by query for index {self._index_name}.\"\n        )\n        return num_deleted\n\n    @log_function_time(\n        print_only=True,\n        debug_only=True,\n        include_args_subset={\n            \"document_chunk_id\": str,\n            \"properties_to_update\": lambda x: x.keys(),\n        },\n    )\n    def update_document(\n        self, document_chunk_id: str, properties_to_update: dict[str, Any]\n    ) -> None:\n        \"\"\"Updates an OpenSearch document chunk's properties.\n\n        Args:\n            document_chunk_id: The OpenSearch ID of the document chunk to\n                update.\n            properties_to_update: The properties of the document to update. Each\n                property should exist in the schema.\n\n        Raises:\n            Exception: There was an error updating the document.\n        \"\"\"\n        logger.debug(\n            f\"Trying to update document chunk {document_chunk_id} for index {self._index_name}.\"\n        )\n        update_body: dict[str, Any] = {\"doc\": properties_to_update}\n        result = self._client.update(\n            index=self._index_name,\n            id=document_chunk_id,\n            body=update_body,\n            _source=False,\n        )\n        result_id = result.get(\"_id\", \"\")\n        # Sanity check.\n        if result_id != document_chunk_id:\n            raise RuntimeError(\n                f'Upon trying to update a document, OpenSearch responded with ID \"{result_id}\" '\n                f'instead of \"{document_chunk_id}\" which is the ID it was given.'\n            )\n        result_string: str = result.get(\"result\", \"\")\n        match result_string:\n            # Sanity check.\n            case \"updated\":\n                logger.debug(\n                    f\"Successfully updated document chunk {document_chunk_id} for index {self._index_name}.\"\n                )\n                return\n            case \"noop\":\n                logger.warning(\n                    f'OpenSearch reported a no-op when trying to update document with ID \"{document_chunk_id}\".'\n                )\n                return\n            case _:\n                raise RuntimeError(\n                    f'The OpenSearch client returned result \"{result_string}\" for updating document chunk \"{document_chunk_id}\". '\n                    \"This is unexpected.\"\n                )\n\n    @log_function_time(print_only=True, debug_only=True, include_args=True)\n    def get_document(self, document_chunk_id: str) -> DocumentChunk:\n        \"\"\"Gets an OpenSearch document chunk.\n\n        Will raise an exception if the document chunk is not found.\n\n        Args:\n            document_chunk_id: The OpenSearch ID of the document chunk to get.\n\n        Raises:\n            Exception: There was an error getting the document. This includes\n                the case where the document is not found.\n\n        Returns:\n            The document chunk.\n        \"\"\"\n        logger.debug(\n            f\"Trying to get document chunk {document_chunk_id} from index {self._index_name}.\"\n        )\n        result = self._client.get(index=self._index_name, id=document_chunk_id)\n        found_result: bool = result.get(\"found\", False)\n        if not found_result:\n            raise RuntimeError(\n                f'Document chunk with ID \"{document_chunk_id}\" was not found.'\n            )\n\n        document_chunk_source: dict[str, Any] | None = result.get(\"_source\")\n        if not document_chunk_source:\n            raise RuntimeError(\n                f'Document chunk with ID \"{document_chunk_id}\" has no data.'\n            )\n\n        logger.debug(\n            f\"Successfully got document chunk {document_chunk_id} from index {self._index_name}.\"\n        )\n        return DocumentChunk.model_validate(document_chunk_source)\n\n    @log_function_time(print_only=True, debug_only=True)\n    def search(\n        self,\n        body: dict[str, Any],\n        search_pipeline_id: str | None,\n        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,\n    ) -> list[SearchHit[DocumentChunkWithoutVectors]]:\n        \"\"\"Searches the index.\n\n        NOTE: Does not return vector fields. In order to take advantage of\n        performance benefits, the search body should exclude the schema's vector\n        fields.\n\n        TODO(andrei): Ideally we could check that every field in the body is\n        present in the index, to avoid a class of runtime bugs that could easily\n        be caught during development. Or change the function signature to accept\n        a predefined pydantic model of allowed fields.\n\n        Args:\n            body: The body of the search request. See the OpenSearch\n                documentation for more information on search request bodies.\n            search_pipeline_id: The ID of the search pipeline to use. If None,\n                the default search pipeline will be used.\n            search_type: Label for Prometheus metrics. Does not affect search\n                behavior.\n\n        Raises:\n            Exception: There was an error searching the index.\n\n        Returns:\n            List of search hits that match the search request.\n        \"\"\"\n        logger.debug(\n            f\"Trying to search index {self._index_name} with search pipeline {search_pipeline_id}.\"\n        )\n        result: dict[str, Any]\n        params = {\"phase_took\": \"true\"}\n        ctx = self._get_emit_metrics_context_manager(search_type)\n        t0 = time.perf_counter()\n        with ctx:\n            if search_pipeline_id:\n                result = self._client.search(\n                    index=self._index_name,\n                    search_pipeline=search_pipeline_id,\n                    body=body,\n                    params=params,\n                )\n            else:\n                result = self._client.search(\n                    index=self._index_name, body=body, params=params\n                )\n        client_duration_s = time.perf_counter() - t0\n\n        hits, time_took, timed_out, phase_took, profile = (\n            self._get_hits_and_profile_from_search_result(result)\n        )\n        if self._emit_metrics:\n            observe_opensearch_search(search_type, client_duration_s, time_took)\n        self._log_search_result_perf(\n            time_took=time_took,\n            timed_out=timed_out,\n            phase_took=phase_took,\n            profile=profile,\n            body=body,\n            search_pipeline_id=search_pipeline_id,\n            raise_on_timeout=True,\n        )\n\n        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []\n        for hit in hits:\n            document_chunk_source: dict[str, Any] | None = hit.get(\"_source\")\n            if not document_chunk_source:\n                raise RuntimeError(\n                    f'Document chunk with ID \"{hit.get(\"_id\", \"\")}\" has no data.'\n                )\n            document_chunk_score = hit.get(\"_score\", None)\n            match_highlights: dict[str, list[str]] = hit.get(\"highlight\", {})\n            explanation: dict[str, Any] | None = hit.get(\"_explanation\", None)\n            search_hit = SearchHit[DocumentChunkWithoutVectors](\n                document_chunk=DocumentChunkWithoutVectors.model_validate(\n                    document_chunk_source\n                ),\n                score=document_chunk_score,\n                match_highlights=match_highlights,\n                explanation=explanation,\n            )\n            search_hits.append(search_hit)\n        logger.debug(\n            f\"Successfully searched index {self._index_name} and got {len(search_hits)} hits.\"\n        )\n        return search_hits\n\n    @log_function_time(print_only=True, debug_only=True)\n    def search_for_document_ids(\n        self,\n        body: dict[str, Any],\n        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,\n    ) -> list[str]:\n        \"\"\"Searches the index and returns only document chunk IDs.\n\n        In order to take advantage of the performance benefits of only returning\n        IDs, the body should have a key, value pair of \"_source\": False.\n        Otherwise, OpenSearch will return the entire document body and this\n        method's performance will be the same as the search method's.\n\n        TODO(andrei): Ideally we could check that every field in the body is\n        present in the index, to avoid a class of runtime bugs that could easily\n        be caught during development.\n\n        Args:\n            body: The body of the search request. See the OpenSearch\n                documentation for more information on search request bodies.\n                TODO(andrei): Make this a more deep interface; callers shouldn't\n                need to know to set _source: False for example.\n            search_type: Label for Prometheus metrics. Does not affect search\n                behavior.\n\n        Raises:\n            Exception: There was an error searching the index.\n\n        Returns:\n            List of document chunk IDs that match the search request.\n        \"\"\"\n        logger.debug(\n            f\"Trying to search for document chunk IDs in index {self._index_name}.\"\n        )\n        if \"_source\" not in body or body[\"_source\"] is not False:\n            logger.warning(\n                \"The body of the search request for document chunk IDs is missing the key, value pair of \"\n                '\"_source\": False. This query will therefore be inefficient.'\n            )\n\n        params = {\"phase_took\": \"true\"}\n        ctx = self._get_emit_metrics_context_manager(search_type)\n        t0 = time.perf_counter()\n        with ctx:\n            result: dict[str, Any] = self._client.search(\n                index=self._index_name, body=body, params=params\n            )\n        client_duration_s = time.perf_counter() - t0\n\n        hits, time_took, timed_out, phase_took, profile = (\n            self._get_hits_and_profile_from_search_result(result)\n        )\n        if self._emit_metrics:\n            observe_opensearch_search(search_type, client_duration_s, time_took)\n        self._log_search_result_perf(\n            time_took=time_took,\n            timed_out=timed_out,\n            phase_took=phase_took,\n            profile=profile,\n            body=body,\n            raise_on_timeout=True,\n        )\n\n        # TODO(andrei): Implement scroll/point in time for results so that we\n        # can return arbitrarily-many IDs.\n        if len(hits) == DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:\n            logger.warning(\n                \"The search request for document chunk IDs returned the maximum number of results. \"\n                \"It is extremely likely that there are more hits in OpenSearch than the returned results.\"\n            )\n\n        # Extract only the _id field from each hit.\n        document_chunk_ids: list[str] = []\n        for hit in hits:\n            document_chunk_id = hit.get(\"_id\")\n            if not document_chunk_id:\n                raise RuntimeError(\n                    \"Received a hit from OpenSearch but the _id field is missing.\"\n                )\n            document_chunk_ids.append(document_chunk_id)\n        logger.debug(\n            f\"Successfully searched for document chunk IDs in index {self._index_name} and got {len(document_chunk_ids)} hits.\"\n        )\n        return document_chunk_ids\n\n    @log_function_time(print_only=True, debug_only=True)\n    def refresh_index(self) -> None:\n        \"\"\"Refreshes the index to make recent changes searchable.\n\n        In OpenSearch, documents are not immediately searchable after indexing.\n        This method forces a refresh to make them available for search.\n\n        Raises:\n            Exception: There was an error refreshing the index.\n        \"\"\"\n        self._client.indices.refresh(index=self._index_name)\n\n    def _get_hits_and_profile_from_search_result(\n        self, result: dict[str, Any]\n    ) -> tuple[list[Any], int | None, bool | None, dict[str, Any], dict[str, Any]]:\n        \"\"\"Extracts the hits and profiling information from a search result.\n\n        Args:\n            result: The search result to extract the hits from.\n\n        Raises:\n            Exception: There was an error extracting the hits from the search\n                result.\n\n        Returns:\n            A tuple containing the hits from the search result, the time taken\n                to execute the search in milliseconds, whether the search timed\n                out, the time taken to execute each phase of the search, and the\n                profile.\n        \"\"\"\n        time_took: int | None = result.get(\"took\")\n        timed_out: bool | None = result.get(\"timed_out\")\n        phase_took: dict[str, Any] = result.get(\"phase_took\", {})\n        profile: dict[str, Any] = result.get(\"profile\", {})\n\n        hits_first_layer: dict[str, Any] = result.get(\"hits\", {})\n        if not hits_first_layer:\n            raise RuntimeError(\n                f\"Hits field missing from response when trying to search index {self._index_name}.\"\n            )\n        hits_second_layer: list[Any] = hits_first_layer.get(\"hits\", [])\n\n        return hits_second_layer, time_took, timed_out, phase_took, profile\n\n    def _log_search_result_perf(\n        self,\n        time_took: int | None,\n        timed_out: bool | None,\n        phase_took: dict[str, Any],\n        profile: dict[str, Any],\n        body: dict[str, Any],\n        search_pipeline_id: str | None = None,\n        raise_on_timeout: bool = False,\n    ) -> None:\n        \"\"\"Logs the performance of a search result.\n\n        Args:\n            time_took: The time taken to execute the search in milliseconds.\n            timed_out: Whether the search timed out.\n            phase_took: The time taken to execute each phase of the search.\n            profile: The profile for the search.\n            body: The body of the search request for logging.\n            search_pipeline_id: The ID of the search pipeline used for the\n                search, if any, for logging. Defaults to None.\n            raise_on_timeout: Whether to raise an exception if the search timed\n                out. Note that the result may still contain useful partial\n                results. Defaults to False.\n\n        Raises:\n            Exception: If raise_on_timeout is True and the search timed out.\n        \"\"\"\n        if time_took and time_took > CLIENT_THRESHOLD_TO_LOG_SLOW_SEARCH_MS:\n            logger.warning(\n                f\"OpenSearch client warning: Search for index {self._index_name} took {time_took} milliseconds.\\n\"\n                f\"Body: {get_new_body_without_vectors(body)}\\n\"\n                f\"Search pipeline ID: {search_pipeline_id}\\n\"\n                f\"Phase took: {phase_took}\\n\"\n                f\"Profile: {json.dumps(profile, indent=2)}\\n\"\n            )\n        if timed_out:\n            error_str = f\"OpenSearch client error: Search timed out for index {self._index_name}.\"\n            logger.error(error_str)\n            if raise_on_timeout:\n                raise RuntimeError(error_str)\n\n    def _get_emit_metrics_context_manager(\n        self, search_type: OpenSearchSearchType\n    ) -> AbstractContextManager[None]:\n        \"\"\"\n        Returns a context manager that tracks in-flight OpenSearch searches via\n        a Gauge if emit_metrics is True, otherwise returns a null context\n        manager.\n        \"\"\"\n        return (\n            track_opensearch_search_in_progress(search_type)\n            if self._emit_metrics\n            else nullcontext()\n        )\n\n\ndef wait_for_opensearch_with_timeout(\n    wait_interval_s: int = 5,\n    wait_limit_s: int = 60,\n    client: OpenSearchClient | None = None,\n) -> bool:\n    \"\"\"Waits for OpenSearch to become ready subject to a timeout.\n\n    Will create a new dummy client if no client is provided. Will close this\n    client at the end of the function. Will not close the client if it was\n    supplied.\n\n    Args:\n        wait_interval_s: The interval in seconds to wait between checks.\n            Defaults to 5.\n        wait_limit_s: The total timeout in seconds to wait for OpenSearch to\n            become ready. Defaults to 60.\n        client: The OpenSearch client to use for pinging. If None, a new dummy\n            client will be created. Defaults to None.\n\n    Returns:\n        True if OpenSearch is ready, False otherwise.\n    \"\"\"\n    with nullcontext(client) if client else OpenSearchClient() as client:\n        time_start = time.monotonic()\n        while True:\n            if client.ping():\n                logger.info(\"[OpenSearch] Readiness probe succeeded. Continuing...\")\n                return True\n            time_elapsed = time.monotonic() - time_start\n            if time_elapsed > wait_limit_s:\n                logger.info(\n                    f\"[OpenSearch] Readiness probe did not succeed within the timeout ({wait_limit_s} seconds).\"\n                )\n                return False\n            logger.info(\n                f\"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}\"\n            )\n            time.sleep(wait_interval_s)\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/cluster_settings.py",
    "content": "from typing import Any\n\nOPENSEARCH_CLUSTER_SETTINGS: dict[str, Any] = {\n    \"persistent\": {\n        # By default, when you index a document to a non-existent index,\n        # OpenSearch will automatically create the index. This behavior is\n        # undesirable so this function exposes the ability to disable it.\n        # See\n        # https://docs.opensearch.org/latest/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api\n        \"action.auto_create_index\": False,\n        # Thresholds for OpenSearch to log slow queries at the server level.\n        \"cluster.search.request.slowlog.level\": \"INFO\",\n        \"cluster.search.request.slowlog.threshold.warn\": \"5s\",\n        \"cluster.search.request.slowlog.threshold.info\": \"2s\",\n        \"cluster.search.request.slowlog.threshold.debug\": \"1s\",\n        \"cluster.search.request.slowlog.threshold.trace\": \"500ms\",\n    }\n}\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/constants.py",
    "content": "# Default value for the maximum number of tokens a chunk can hold, if none is\n# specified when creating an index.\nimport os\nfrom enum import Enum\n\n\nDEFAULT_MAX_CHUNK_SIZE = 512\n\n\n# By default OpenSearch will only return a maximum of this many results in a\n# given search. This value is configurable in the index settings.\nDEFAULT_OPENSEARCH_MAX_RESULT_WINDOW = 10_000\n\n\n# For documents which do not have a value for LAST_UPDATED_FIELD_NAME, we assume\n# that the document was last updated this many days ago for the purpose of time\n# cutoff filtering during retrieval.\nASSUMED_DOCUMENT_AGE_DAYS = 90\n\n\n# Size of the dynamic list used to consider elements during kNN graph creation.\n# Higher values improve search quality but increase indexing time. Values\n# typically range between 100 - 512.\nEF_CONSTRUCTION = 256\n# Number of bi-directional links per element. Higher values improve search\n# quality but increase memory footprint. Values typically range between 12 - 48.\nM = 32  # Set relatively high for better accuracy.\n\n# When performing hybrid search, we need to consider more candidates than the\n# number of results to be returned. This is because the scoring is hybrid and\n# the results are reordered due to the hybrid scoring. Higher = more candidates\n# for hybrid fusion = better retrieval accuracy, but results in more computation\n# per query. Imagine a simple case with a single keyword query and a single\n# vector query and we want 10 final docs. If we only fetch 10 candidates from\n# each of keyword and vector, they would have to have perfect overlap to get a\n# good hybrid ranking for the 10 results. If we fetch 1000 candidates from each,\n# we have a much higher chance of all 10 of the final desired docs showing up\n# and getting scored. In worse situations, the final 10 docs don't even show up\n# as the final 10 (worse than just a miss at the reranking step).\n# Defaults to 500 for now. Initially this defaulted to 750 but we were seeing\n# poor search performance; bumped from 100 to 500 to improve recall.\nDEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES = int(\n    os.environ.get(\"DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES\", 500)\n)\n\n# Number of vectors to examine to decide the top k neighbors for the HNSW\n# method.\n# NOTE: \"When creating a search query, you must specify k. If you provide both k\n# and ef_search, then the larger value is passed to the engine. If ef_search is\n# larger than k, you can provide the size parameter to limit the final number of\n# results to k.\" from\n# https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search\nEF_SEARCH = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES\n\n\nclass OpenSearchSearchType(str, Enum):\n    \"\"\"Search type label used for Prometheus metrics.\"\"\"\n\n    HYBRID = \"hybrid\"\n    KEYWORD = \"keyword\"\n    SEMANTIC = \"semantic\"\n    RANDOM = \"random\"\n    DOC_ID_RETRIEVAL = \"doc_id_retrieval\"\n    UNKNOWN = \"unknown\"\n\n\nclass HybridSearchSubqueryConfiguration(Enum):\n    TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 1\n    # Current default.\n    CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 2\n\n\n# Will raise and block application start if HYBRID_SEARCH_SUBQUERY_CONFIGURATION\n# is set but not a valid value. If not set, defaults to\n# CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD.\nHYBRID_SEARCH_SUBQUERY_CONFIGURATION: HybridSearchSubqueryConfiguration = (\n    HybridSearchSubqueryConfiguration(\n        int(os.environ[\"HYBRID_SEARCH_SUBQUERY_CONFIGURATION\"])\n    )\n    if os.environ.get(\"HYBRID_SEARCH_SUBQUERY_CONFIGURATION\", None) is not None\n    else HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD\n)\n\n\nclass HybridSearchNormalizationPipeline(Enum):\n    # Current default.\n    MIN_MAX = 1\n    # NOTE: Using z-score normalization is better for hybrid search from a\n    # theoretical standpoint. Empirically on a small dataset of up to 10K docs,\n    # it's not very different. Likely more impactful at scale.\n    # https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/\n    ZSCORE = 2\n\n\n# Will raise and block application start if HYBRID_SEARCH_NORMALIZATION_PIPELINE\n# is set but not a valid value. If not set, defaults to MIN_MAX.\nHYBRID_SEARCH_NORMALIZATION_PIPELINE: HybridSearchNormalizationPipeline = (\n    HybridSearchNormalizationPipeline(\n        int(os.environ[\"HYBRID_SEARCH_NORMALIZATION_PIPELINE\"])\n    )\n    if os.environ.get(\"HYBRID_SEARCH_NORMALIZATION_PIPELINE\", None) is not None\n    else HybridSearchNormalizationPipeline.MIN_MAX\n)\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/opensearch_document_index.py",
    "content": "import json\nfrom collections.abc import Iterable\nfrom typing import Any\n\nimport httpx\nfrom opensearchpy import NotFoundError\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH\nfrom onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.configs.chat_configs import TITLE_CONTENT_RATIO\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_experts_stores_representations,\n)\nfrom onyx.connectors.models import convert_metadata_list_of_strings_to_dict\nfrom onyx.context.search.enums import QueryType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceChunkUncleaned\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.models import DocumentSource\nfrom onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks\nfrom onyx.document_index.chunk_content_enrichment import (\n    generate_enriched_content_for_chunk_text,\n)\nfrom onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex\nfrom onyx.document_index.interfaces import (\n    DocumentInsertionRecord as OldDocumentInsertionRecord,\n)\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.document_index.interfaces_new import DocumentIndex\nfrom onyx.document_index.interfaces_new import DocumentInsertionRecord\nfrom onyx.document_index.interfaces_new import DocumentSectionRequest\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.interfaces_new import MetadataUpdateRequest\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.client import OpenSearchClient\nfrom onyx.document_index.opensearch.client import OpenSearchIndexClient\nfrom onyx.document_index.opensearch.client import SearchHit\nfrom onyx.document_index.opensearch.cluster_settings import OPENSEARCH_CLUSTER_SETTINGS\nfrom onyx.document_index.opensearch.constants import OpenSearchSearchType\nfrom onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DocumentChunk\nfrom onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors\nfrom onyx.document_index.opensearch.schema import DocumentSchema\nfrom onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id\nfrom onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME\nfrom onyx.document_index.opensearch.search import DocumentQuery\nfrom onyx.document_index.opensearch.search import (\n    get_min_max_normalization_pipeline_name_and_config,\n)\nfrom onyx.document_index.opensearch.search import (\n    get_normalization_pipeline_name_and_config,\n)\nfrom onyx.document_index.opensearch.search import (\n    get_zscore_normalization_pipeline_name_and_config,\n)\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import Document\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import remove_invalid_unicode_chars\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom shared_configs.model_server_models import Embedding\n\n\nlogger = setup_logger(__name__)\n\n\nclass ChunkCountNotFoundError(ValueError):\n    \"\"\"Raised when a document has no chunk count.\"\"\"\n\n\ndef generate_opensearch_filtered_access_control_list(\n    access: DocumentAccess,\n) -> list[str]:\n    \"\"\"Generates an access control list with PUBLIC_DOC_PAT removed.\n\n    In the OpenSearch schema this is represented by PUBLIC_FIELD_NAME.\n    \"\"\"\n    access_control_list = access.to_acl()\n    access_control_list.discard(PUBLIC_DOC_PAT)\n    return list(access_control_list)\n\n\ndef set_cluster_state(client: OpenSearchClient) -> None:\n    if not client.put_cluster_settings(settings=OPENSEARCH_CLUSTER_SETTINGS):\n        logger.error(\n            \"Failed to put cluster settings. If the settings have never been set before, \"\n            \"this may cause unexpected index creation when indexing documents into an \"\n            \"index that does not exist, or may cause expected logs to not appear. If this \"\n            \"is not the first time running Onyx against this instance of OpenSearch, these \"\n            \"settings have likely already been set. Not taking any further action...\"\n        )\n    min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (\n        get_min_max_normalization_pipeline_name_and_config()\n    )\n    zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (\n        get_zscore_normalization_pipeline_name_and_config()\n    )\n    client.create_search_pipeline(\n        pipeline_id=min_max_normalization_pipeline_name,\n        pipeline_body=min_max_normalization_pipeline_config,\n    )\n    client.create_search_pipeline(\n        pipeline_id=zscore_normalization_pipeline_name,\n        pipeline_body=zscore_normalization_pipeline_config,\n    )\n\n\ndef _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n    chunk: DocumentChunkWithoutVectors,\n    score: float | None,\n    highlights: dict[str, list[str]],\n) -> InferenceChunkUncleaned:\n    \"\"\"\n    Generates an inference chunk from an OpenSearch document chunk, its score,\n    and its match highlights.\n\n    Args:\n        chunk: The document chunk returned by OpenSearch.\n        score: The document chunk match score as calculated by OpenSearch. Only\n            relevant for searches like hybrid search. It is acceptable for this\n            value to be None for results from other queries like ID-based\n            retrieval as a match score makes no sense in those contexts.\n        highlights: Maps schema property name to a list of highlighted snippets\n            with match terms wrapped in tags (e.g. \"something <hi>keyword</hi>\n            other thing\").\n\n    Returns:\n        An Onyx inference chunk representation.\n    \"\"\"\n    return InferenceChunkUncleaned(\n        chunk_id=chunk.chunk_index,\n        blurb=chunk.blurb,\n        # Includes extra content prepended/appended during indexing.\n        content=chunk.content,\n        # When we read a string and turn it into a dict the keys will be\n        # strings, but in this case they need to be ints.\n        source_links=(\n            {int(k): v for k, v in json.loads(chunk.source_links).items()}\n            if chunk.source_links\n            else None\n        ),\n        image_file_id=chunk.image_file_id,\n        # Deprecated. Fill in some reasonable default.\n        section_continuation=False,\n        document_id=chunk.document_id,\n        source_type=DocumentSource(chunk.source_type),\n        semantic_identifier=chunk.semantic_identifier,\n        title=chunk.title,\n        boost=chunk.global_boost,\n        score=score,\n        hidden=chunk.hidden,\n        metadata=(\n            convert_metadata_list_of_strings_to_dict(chunk.metadata_list)\n            if chunk.metadata_list\n            else {}\n        ),\n        # Extract highlighted snippets from the content field, if available. In\n        # the future we may want to match on other fields too, currently we only\n        # use the content field.\n        match_highlights=highlights.get(CONTENT_FIELD_NAME, []),\n        # TODO(andrei) Consider storing a chunk content index instead of a full\n        # string when working on chunk content augmentation.\n        doc_summary=chunk.doc_summary,\n        # TODO(andrei) Same thing as above.\n        chunk_context=chunk.chunk_context,\n        updated_at=chunk.last_updated,\n        primary_owners=chunk.primary_owners,\n        secondary_owners=chunk.secondary_owners,\n        # TODO(andrei) Same thing as chunk_context above.\n        metadata_suffix=chunk.metadata_suffix,\n    )\n\n\ndef _convert_onyx_chunk_to_opensearch_document(\n    chunk: DocMetadataAwareIndexChunk,\n) -> DocumentChunk:\n    filtered_blurb = remove_invalid_unicode_chars(chunk.blurb)\n    _title = chunk.source_document.get_title_for_document_index()\n    filtered_title = remove_invalid_unicode_chars(_title) if _title else None\n    filtered_content = remove_invalid_unicode_chars(\n        generate_enriched_content_for_chunk_text(chunk)\n    )\n    filtered_semantic_identifier = remove_invalid_unicode_chars(\n        chunk.source_document.semantic_identifier\n    )\n    filtered_metadata_suffix = remove_invalid_unicode_chars(\n        chunk.metadata_suffix_keyword\n    )\n    _metadata_list = chunk.source_document.get_metadata_str_attributes()\n    filtered_metadata_list = (\n        [remove_invalid_unicode_chars(metadata) for metadata in _metadata_list]\n        if _metadata_list\n        else None\n    )\n    return DocumentChunk(\n        document_id=chunk.source_document.id,\n        chunk_index=chunk.chunk_id,\n        # Use get_title_for_document_index to match the logic used when creating\n        # the title_embedding in the embedder. This method falls back to\n        # semantic_identifier when title is None (but not empty string).\n        title=filtered_title,\n        title_vector=chunk.title_embedding,\n        content=filtered_content,\n        content_vector=chunk.embeddings.full_embedding,\n        source_type=chunk.source_document.source.value,\n        metadata_list=filtered_metadata_list,\n        metadata_suffix=filtered_metadata_suffix,\n        last_updated=chunk.source_document.doc_updated_at,\n        public=chunk.access.is_public,\n        access_control_list=generate_opensearch_filtered_access_control_list(\n            chunk.access\n        ),\n        global_boost=chunk.boost,\n        semantic_identifier=filtered_semantic_identifier,\n        image_file_id=chunk.image_file_id,\n        # Small optimization, if this list is empty we can supply None to\n        # OpenSearch and it will not store any data at all for this field, which\n        # is different from supplying an empty list.\n        source_links=json.dumps(chunk.source_links) if chunk.source_links else None,\n        blurb=filtered_blurb,\n        doc_summary=chunk.doc_summary,\n        chunk_context=chunk.chunk_context,\n        # Small optimization, if this list is empty we can supply None to\n        # OpenSearch and it will not store any data at all for this field, which\n        # is different from supplying an empty list.\n        document_sets=list(chunk.document_sets) if chunk.document_sets else None,\n        # Small optimization, if this list is empty we can supply None to\n        # OpenSearch and it will not store any data at all for this field, which\n        # is different from supplying an empty list.\n        user_projects=chunk.user_project or None,\n        personas=chunk.personas or None,\n        primary_owners=get_experts_stores_representations(\n            chunk.source_document.primary_owners\n        ),\n        secondary_owners=get_experts_stores_representations(\n            chunk.source_document.secondary_owners\n        ),\n        # TODO(andrei): Consider not even getting this from\n        # DocMetadataAwareIndexChunk and instead using OpenSearchDocumentIndex's\n        # instance variable. One source of truth -> less chance of a very bad\n        # bug in prod.\n        tenant_id=TenantState(tenant_id=chunk.tenant_id, multitenant=MULTI_TENANT),\n        # Store ancestor hierarchy node IDs for hierarchy-based filtering.\n        ancestor_hierarchy_node_ids=chunk.ancestor_hierarchy_node_ids or None,\n    )\n\n\nclass OpenSearchOldDocumentIndex(OldDocumentIndex):\n    \"\"\"\n    Wrapper for OpenSearch to adapt the new DocumentIndex interface with\n    invocations to the old DocumentIndex interface in the hotpath.\n\n    The analogous class for Vespa is VespaIndex which calls to\n    VespaDocumentIndex.\n\n    TODO(andrei): This is very dumb and purely temporary until there are no more\n    references to the old interface in the hotpath.\n    \"\"\"\n\n    def __init__(\n        self,\n        index_name: str,\n        embedding_dim: int,\n        embedding_precision: EmbeddingPrecision,\n        secondary_index_name: str | None,\n        secondary_embedding_dim: int | None,\n        secondary_embedding_precision: EmbeddingPrecision | None,\n        # NOTE: We do not support large chunks right now.\n        large_chunks_enabled: bool,  # noqa: ARG002\n        secondary_large_chunks_enabled: bool | None,  # noqa: ARG002\n        multitenant: bool = False,\n        httpx_client: httpx.Client | None = None,  # noqa: ARG002\n    ) -> None:\n        super().__init__(\n            index_name=index_name,\n            secondary_index_name=secondary_index_name,\n        )\n        if multitenant != MULTI_TENANT:\n            raise ValueError(\n                \"Bug: Multitenant mismatch when initializing an OpenSearchDocumentIndex. \"\n                f\"Expected {MULTI_TENANT}, got {multitenant}.\"\n            )\n        tenant_id = get_current_tenant_id()\n        tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)\n        self._real_index = OpenSearchDocumentIndex(\n            tenant_state=tenant_state,\n            index_name=index_name,\n            embedding_dim=embedding_dim,\n            embedding_precision=embedding_precision,\n        )\n        self._secondary_real_index: OpenSearchDocumentIndex | None = None\n        if self.secondary_index_name:\n            if secondary_embedding_dim is None or secondary_embedding_precision is None:\n                raise ValueError(\n                    \"Bug: Secondary index embedding dimension and precision are not set.\"\n                )\n            self._secondary_real_index = OpenSearchDocumentIndex(\n                tenant_state=tenant_state,\n                index_name=self.secondary_index_name,\n                embedding_dim=secondary_embedding_dim,\n                embedding_precision=secondary_embedding_precision,\n            )\n\n    @staticmethod\n    def register_multitenant_indices(\n        indices: list[str],\n        embedding_dims: list[int],\n        embedding_precisions: list[EmbeddingPrecision],\n    ) -> None:\n        raise NotImplementedError(\n            \"Bug: Multitenant index registration is not supported for OpenSearch.\"\n        )\n\n    def ensure_indices_exist(\n        self,\n        primary_embedding_dim: int,\n        primary_embedding_precision: EmbeddingPrecision,\n        secondary_index_embedding_dim: int | None,\n        secondary_index_embedding_precision: EmbeddingPrecision | None,\n    ) -> None:\n        self._real_index.verify_and_create_index_if_necessary(\n            primary_embedding_dim, primary_embedding_precision\n        )\n        if self.secondary_index_name:\n            if (\n                secondary_index_embedding_dim is None\n                or secondary_index_embedding_precision is None\n            ):\n                raise ValueError(\n                    \"Bug: Secondary index embedding dimension and precision are not set.\"\n                )\n            assert (\n                self._secondary_real_index is not None\n            ), \"Bug: Secondary index is not initialized.\"\n            self._secondary_real_index.verify_and_create_index_if_necessary(\n                secondary_index_embedding_dim, secondary_index_embedding_precision\n            )\n\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        index_batch_params: IndexBatchParams,\n    ) -> set[OldDocumentInsertionRecord]:\n        \"\"\"\n        NOTE: Do NOT consider the secondary index here. A separate indexing\n        pipeline will be responsible for indexing to the secondary index. This\n        design is not ideal and we should reconsider this when revamping index\n        swapping.\n        \"\"\"\n        # Convert IndexBatchParams to IndexingMetadata.\n        chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}\n        for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:\n            old_count = index_batch_params.doc_id_to_previous_chunk_cnt[doc_id]\n            new_count = index_batch_params.doc_id_to_new_chunk_cnt[doc_id]\n            chunk_counts[doc_id] = IndexingMetadata.ChunkCounts(\n                old_chunk_cnt=old_count,\n                new_chunk_cnt=new_count,\n            )\n\n        indexing_metadata = IndexingMetadata(doc_id_to_chunk_cnt_diff=chunk_counts)\n\n        results = self._real_index.index(chunks, indexing_metadata)\n\n        # Convert list[DocumentInsertionRecord] to\n        # set[OldDocumentInsertionRecord].\n        return {\n            OldDocumentInsertionRecord(\n                document_id=record.document_id,\n                already_existed=record.already_existed,\n            )\n            for record in results\n        }\n\n    def delete_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,  # noqa: ARG002\n        chunk_count: int | None,\n    ) -> int:\n        \"\"\"\n        NOTE: Remember to handle the secondary index here. There is no separate\n        pipeline for deleting chunks in the secondary index. This design is not\n        ideal and we should reconsider this when revamping index swapping.\n        \"\"\"\n        total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)\n        if self.secondary_index_name:\n            assert (\n                self._secondary_real_index is not None\n            ), \"Bug: Secondary index is not initialized.\"\n            total_chunks_deleted += self._secondary_real_index.delete(\n                doc_id, chunk_count\n            )\n        return total_chunks_deleted\n\n    def update_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,  # noqa: ARG002\n        chunk_count: int | None,\n        fields: VespaDocumentFields | None,\n        user_fields: VespaDocumentUserFields | None,\n    ) -> None:\n        \"\"\"\n        NOTE: Remember to handle the secondary index here. There is no separate\n        pipeline for updating chunks in the secondary index. This design is not\n        ideal and we should reconsider this when revamping index swapping.\n        \"\"\"\n        if fields is None and user_fields is None:\n            logger.warning(\n                f\"Tried to update document {doc_id} with no updated fields or user fields.\"\n            )\n            return\n\n        # Convert VespaDocumentFields to MetadataUpdateRequest.\n        update_request = MetadataUpdateRequest(\n            document_ids=[doc_id],\n            doc_id_to_chunk_cnt={\n                doc_id: chunk_count if chunk_count is not None else -1\n            },\n            access=fields.access if fields else None,\n            document_sets=fields.document_sets if fields else None,\n            boost=fields.boost if fields else None,\n            hidden=fields.hidden if fields else None,\n            project_ids=(\n                set(user_fields.user_projects)\n                # NOTE: Empty user_projects is semantically different from None\n                # user_projects.\n                if user_fields and user_fields.user_projects is not None\n                else None\n            ),\n            persona_ids=(\n                set(user_fields.personas)\n                # NOTE: Empty personas is semantically different from None\n                # personas.\n                if user_fields and user_fields.personas is not None\n                else None\n            ),\n        )\n\n        try:\n            self._real_index.update([update_request])\n            if self.secondary_index_name:\n                assert (\n                    self._secondary_real_index is not None\n                ), \"Bug: Secondary index is not initialized.\"\n                self._secondary_real_index.update([update_request])\n        except NotFoundError:\n            logger.exception(\n                f\"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. \"\n                \"This is likely due to it not having been indexed yet. Skipping update for now...\"\n            )\n            return\n        except ChunkCountNotFoundError:\n            logger.exception(\n                f\"Tried to update document {doc_id} but its chunk count is not known. We tolerate this for now \"\n                \"but this will not be an acceptable state once OpenSearch is the primary document index and the \"\n                \"indexing/updating race condition is fixed.\"\n            )\n            return\n\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[VespaChunkRequest],\n        filters: IndexFilters,\n        batch_retrieval: bool = False,\n        get_large_chunks: bool = False,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        section_requests = [\n            DocumentSectionRequest(\n                document_id=req.document_id,\n                min_chunk_ind=req.min_chunk_ind,\n                max_chunk_ind=req.max_chunk_ind,\n            )\n            for req in chunk_requests\n        ]\n\n        return self._real_index.id_based_retrieval(\n            section_requests, filters, batch_retrieval\n        )\n\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        final_keywords: list[str] | None,\n        filters: IndexFilters,\n        hybrid_alpha: float,\n        time_decay_multiplier: float,  # noqa: ARG002\n        num_to_retrieve: int,\n        ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,  # noqa: ARG002\n        title_content_ratio: float | None = TITLE_CONTENT_RATIO,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        # Determine query type based on hybrid_alpha.\n        if hybrid_alpha >= 0.8:\n            query_type = QueryType.SEMANTIC\n        elif hybrid_alpha <= 0.2:\n            query_type = QueryType.KEYWORD\n        else:\n            query_type = QueryType.SEMANTIC  # Default to semantic for hybrid.\n\n        return self._real_index.hybrid_retrieval(\n            query=query,\n            query_embedding=query_embedding,\n            final_keywords=final_keywords,\n            query_type=query_type,\n            filters=filters,\n            num_to_retrieve=num_to_retrieve,\n        )\n\n    def admin_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        filters: IndexFilters,\n        num_to_retrieve: int = NUM_RETURNED_HITS,\n    ) -> list[InferenceChunk]:\n        return self._real_index.hybrid_retrieval(\n            query=query,\n            query_embedding=query_embedding,\n            final_keywords=None,\n            query_type=QueryType.KEYWORD,\n            filters=filters,\n            num_to_retrieve=num_to_retrieve,\n        )\n\n    def random_retrieval(\n        self,\n        filters: IndexFilters,\n        num_to_retrieve: int = 10,\n    ) -> list[InferenceChunk]:\n        return self._real_index.random_retrieval(\n            filters=filters,\n            num_to_retrieve=num_to_retrieve,\n            dirty=None,\n        )\n\n\nclass OpenSearchDocumentIndex(DocumentIndex):\n    \"\"\"OpenSearch-specific implementation of the DocumentIndex interface.\n\n    This class provides document indexing, retrieval, and management operations\n    for an OpenSearch search engine instance. It handles the complete lifecycle\n    of document chunks within a specific OpenSearch index/schema.\n\n    Each kind of embedding used should correspond to a different instance of\n    this class, and therefore a different index in OpenSearch.\n\n    If in a multitenant environment and\n    VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT, will verify and create the index\n    if necessary on initialization. This is because there is no logic which runs\n    on cluster restart which scans through all search settings over all tenants\n    and creates the relevant indices.\n\n    Args:\n        tenant_state: The tenant state of the caller.\n        index_name: The name of the index to interact with.\n        embedding_dim: The dimensionality of the embeddings used for the index.\n        embedding_precision: The precision of the embeddings used for the index.\n    \"\"\"\n\n    def __init__(\n        self,\n        tenant_state: TenantState,\n        index_name: str,\n        embedding_dim: int,\n        embedding_precision: EmbeddingPrecision,\n    ) -> None:\n        self._index_name: str = index_name\n        self._tenant_state: TenantState = tenant_state\n        self._client = OpenSearchIndexClient(index_name=self._index_name)\n\n        if self._tenant_state.multitenant and VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT:\n            self.verify_and_create_index_if_necessary(\n                embedding_dim=embedding_dim, embedding_precision=embedding_precision\n            )\n\n    def verify_and_create_index_if_necessary(\n        self,\n        embedding_dim: int,\n        embedding_precision: EmbeddingPrecision,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Verifies and creates the index if necessary.\n\n        Also puts the desired cluster settings if not in a multitenant\n        environment.\n\n        Also puts the desired search pipeline state if not in a multitenant\n        environment, creating the pipelines if they do not exist and updating\n        them otherwise.\n\n        In a multitenant environment, the above steps happen explicitly on\n        setup.\n\n        Args:\n            embedding_dim: Vector dimensionality for the vector similarity part\n                of the search.\n            embedding_precision: Precision of the values of the vectors for the\n                similarity part of the search.\n\n        Raises:\n            Exception: There was an error verifying or creating the index or\n                search pipelines.\n        \"\"\"\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if \"\n            f\"necessary, with embedding dimension {embedding_dim}.\"\n        )\n\n        if not self._tenant_state.multitenant:\n            set_cluster_state(self._client)\n\n        expected_mappings = DocumentSchema.get_document_schema(\n            embedding_dim, self._tenant_state.multitenant\n        )\n\n        if not self._client.index_exists():\n            index_settings = DocumentSchema.get_index_settings_based_on_environment()\n            self._client.create_index(\n                mappings=expected_mappings,\n                settings=index_settings,\n            )\n        else:\n            # Ensure schema is up to date by applying the current mappings.\n            try:\n                self._client.put_mapping(expected_mappings)\n            except Exception as e:\n                logger.error(\n                    f\"Failed to update mappings for index {self._index_name}. This likely means a \"\n                    f\"field type was changed which requires reindexing. Error: {e}\"\n                )\n                raise\n\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        indexing_metadata: IndexingMetadata,\n    ) -> list[DocumentInsertionRecord]:\n        \"\"\"Indexes an iterable of document chunks into the document index.\n\n        Groups chunks by document ID and for each document, deletes existing\n        chunks and indexes the new chunks in bulk.\n\n        NOTE: It is assumed that chunks for a given document are not spread out\n        over multiple index() calls.\n\n        Args:\n            chunks: Document chunks with all of the information needed for\n                indexing to the document index.\n            indexing_metadata: Information about chunk counts for efficient\n                cleaning / updating.\n\n        Raises:\n            Exception: Failed to index some or all of the chunks for the\n                specified documents.\n\n        Returns:\n            List of document IDs which map to unique documents as well as if the\n                document is newly indexed or had already existed and was just\n                updated.\n        \"\"\"\n        total_chunks = sum(\n            cc.new_chunk_cnt\n            for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()\n        )\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} \"\n            f\"documents for index {self._index_name}.\"\n        )\n\n        document_indexing_results: list[DocumentInsertionRecord] = []\n        deleted_doc_ids: set[str] = set()\n        # Buffer chunks per document as they arrive from the iterable.\n        # When the document ID changes flush the buffered chunks.\n        current_doc_id: str | None = None\n        current_chunks: list[DocMetadataAwareIndexChunk] = []\n\n        def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:\n            assert len(doc_chunks) > 0, \"doc_chunks is empty\"\n\n            # Create a batch of OpenSearch-formatted chunks for bulk insertion.\n            # Since we are doing this in batches, an error occurring midway\n            # can result in a state where chunks are deleted and not all the\n            # new chunks have been indexed.\n            chunk_batch: list[DocumentChunk] = [\n                _convert_onyx_chunk_to_opensearch_document(chunk)\n                for chunk in doc_chunks\n            ]\n            onyx_document: Document = doc_chunks[0].source_document\n            # First delete the doc's chunks from the index. This is so that\n            # there are no dangling chunks in the index, in the event that the\n            # new document's content contains fewer chunks than the previous\n            # content.\n            # TODO(andrei): This can possibly be made more efficient by checking\n            # if the chunk count has actually decreased. This assumes that\n            # overlapping chunks are perfectly overwritten. If we can't\n            # guarantee that then we need the code as-is.\n            if onyx_document.id not in deleted_doc_ids:\n                num_chunks_deleted = self.delete(\n                    onyx_document.id, onyx_document.chunk_count\n                )\n                deleted_doc_ids.add(onyx_document.id)\n                # If we see that chunks were deleted we assume the doc already\n                # existed. We record the result before bulk_index_documents\n                # runs. If indexing raises, this entire result list is discarded\n                # by the caller's retry logic, so early recording is safe.\n                document_indexing_results.append(\n                    DocumentInsertionRecord(\n                        document_id=onyx_document.id,\n                        already_existed=num_chunks_deleted > 0,\n                    )\n                )\n            # Now index. This will raise if a chunk of the same ID exists, which\n            # we do not expect because we should have deleted all chunks.\n            self._client.bulk_index_documents(\n                documents=chunk_batch,\n                tenant_state=self._tenant_state,\n            )\n\n        for chunk in chunks:\n            doc_id = chunk.source_document.id\n            if doc_id != current_doc_id:\n                if current_chunks:\n                    _flush_chunks(current_chunks)\n                current_doc_id = doc_id\n                current_chunks = [chunk]\n            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:\n                _flush_chunks(current_chunks)\n                current_chunks = [chunk]\n            else:\n                current_chunks.append(chunk)\n\n        if current_chunks:\n            _flush_chunks(current_chunks)\n\n        return document_indexing_results\n\n    def delete(\n        self,\n        document_id: str,\n        chunk_count: int | None = None,  # noqa: ARG002\n    ) -> int:\n        \"\"\"Deletes all chunks for a given document.\n\n        Does nothing if the specified document ID does not exist.\n\n        TODO(andrei): Consider implementing this method to delete on document\n        chunk IDs vs querying for matching document chunks. Unclear if this is\n        any better though.\n\n        Args:\n            document_id: The unique identifier for the document as represented\n                in Onyx, not necessarily in the document index.\n            chunk_count: The number of chunks in OpenSearch for the document.\n                Defaults to None.\n\n        Raises:\n            Exception: Failed to delete some or all of the chunks for the\n                document.\n\n        Returns:\n            The number of chunks successfully deleted.\n        \"\"\"\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}.\"\n        )\n        query_body = DocumentQuery.delete_from_document_id_query(\n            document_id=document_id,\n            tenant_state=self._tenant_state,\n        )\n\n        return self._client.delete_by_query(query_body)\n\n    def update(\n        self,\n        update_requests: list[MetadataUpdateRequest],\n    ) -> None:\n        \"\"\"Updates some set of chunks.\n\n        NOTE: Will raise if one of the specified document chunks do not exist.\n        This may be due to a concurrent ongoing indexing operation. In that\n        event callers are expected to retry after a bit once the state of the\n        document index is updated.\n        NOTE: Requires document chunk count be known; will raise if it is not.\n        This may be caused by the same situation outlined above.\n        NOTE: Will no-op if an update request has no fields to update.\n\n        TODO(andrei): Consider exploring a batch API for OpenSearch for this\n        operation.\n\n        Args:\n            update_requests: A list of update requests, each containing a list\n                of document IDs and the fields to update. The field updates\n                apply to all of the specified documents in each update request.\n\n        Raises:\n            Exception: Failed to update some or all of the chunks for the\n                specified documents.\n        \"\"\"\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}.\"\n        )\n        for update_request in update_requests:\n            properties_to_update: dict[str, Any] = dict()\n            # TODO(andrei): Nit but consider if we can use DocumentChunk\n            # here so we don't have to think about passing in the\n            # appropriate types into this dict.\n            if update_request.access is not None:\n                properties_to_update[ACCESS_CONTROL_LIST_FIELD_NAME] = (\n                    generate_opensearch_filtered_access_control_list(\n                        update_request.access\n                    )\n                )\n            if update_request.document_sets is not None:\n                properties_to_update[DOCUMENT_SETS_FIELD_NAME] = list(\n                    update_request.document_sets\n                )\n            if update_request.boost is not None:\n                properties_to_update[GLOBAL_BOOST_FIELD_NAME] = int(\n                    update_request.boost\n                )\n            if update_request.hidden is not None:\n                properties_to_update[HIDDEN_FIELD_NAME] = update_request.hidden\n            if update_request.project_ids is not None:\n                properties_to_update[USER_PROJECTS_FIELD_NAME] = list(\n                    update_request.project_ids\n                )\n            if update_request.persona_ids is not None:\n                properties_to_update[PERSONAS_FIELD_NAME] = list(\n                    update_request.persona_ids\n                )\n\n            if not properties_to_update:\n                if len(update_request.document_ids) > 1:\n                    update_string = f\"{len(update_request.document_ids)} documents\"\n                else:\n                    update_string = f\"document {update_request.document_ids[0]}\"\n                logger.warning(\n                    f\"[OpenSearchDocumentIndex] Tried to update {update_string} \"\n                    \"with no specified update fields. This will be a no-op.\"\n                )\n                continue\n\n            for doc_id in update_request.document_ids:\n                doc_chunk_count = update_request.doc_id_to_chunk_cnt.get(doc_id, -1)\n                if doc_chunk_count < 0:\n                    # This means the chunk count is not known. This is due to a\n                    # race condition between doc indexing and updating steps\n                    # which run concurrently when a doc is indexed. The indexing\n                    # step should update chunk count shortly. This could also\n                    # have been due to an older version of the indexing pipeline\n                    # which did not compute chunk count, but that codepath has\n                    # since been deprecated and should no longer be the case\n                    # here.\n                    # TODO(andrei): Fix the aforementioned race condition.\n                    raise ChunkCountNotFoundError(\n                        f\"Tried to update document {doc_id} but its chunk count is not known. \"\n                        \"Older versions of the application used to permit this but is not a \"\n                        \"supported state for a document when using OpenSearch. The document was \"\n                        \"likely just added to the indexing pipeline and the chunk count will be \"\n                        \"updated shortly.\"\n                    )\n                if doc_chunk_count == 0:\n                    raise ValueError(\n                        f\"Bug: Tried to update document {doc_id} but its chunk count was 0.\"\n                    )\n\n                for chunk_index in range(doc_chunk_count):\n                    document_chunk_id = get_opensearch_doc_chunk_id(\n                        tenant_state=self._tenant_state,\n                        document_id=doc_id,\n                        chunk_index=chunk_index,\n                    )\n                    self._client.update_document(\n                        document_chunk_id=document_chunk_id,\n                        properties_to_update=properties_to_update,\n                    )\n\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[DocumentSectionRequest],\n        filters: IndexFilters,\n        # TODO(andrei): Remove this from the new interface at some point; we\n        # should not be exposing this.\n        batch_retrieval: bool = False,  # noqa: ARG002\n        # TODO(andrei): Add a param for whether to retrieve hidden docs.\n    ) -> list[InferenceChunk]:\n        \"\"\"\n        TODO(andrei): Consider implementing this method to retrieve on document\n        chunk IDs vs querying for matching document chunks.\n        \"\"\"\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}.\"\n        )\n        results: list[InferenceChunk] = []\n        for chunk_request in chunk_requests:\n            search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []\n            query_body = DocumentQuery.get_from_document_id_query(\n                document_id=chunk_request.document_id,\n                tenant_state=self._tenant_state,\n                # NOTE: Index filters includes metadata tags which were filtered\n                # for invalid unicode at indexing time. In theory it would be\n                # ideal to do filtering here as well, in practice we never did\n                # that in the Vespa codepath and have not seen issues in\n                # production, so we deliberately conform to the existing logic\n                # in order to not unknowningly introduce a possible bug.\n                index_filters=filters,\n                include_hidden=False,\n                max_chunk_size=chunk_request.max_chunk_size,\n                min_chunk_index=chunk_request.min_chunk_ind,\n                max_chunk_index=chunk_request.max_chunk_ind,\n            )\n            search_hits = self._client.search(\n                body=query_body,\n                search_pipeline_id=None,\n                search_type=OpenSearchSearchType.DOC_ID_RETRIEVAL,\n            )\n            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [\n                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n                    search_hit.document_chunk, None, {}\n                )\n                for search_hit in search_hits\n            ]\n            inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(\n                inference_chunks_uncleaned\n            )\n            results.extend(inference_chunks)\n        return results\n\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        # TODO(andrei): This param is not great design, get rid of it.\n        final_keywords: list[str] | None,\n        query_type: QueryType,  # noqa: ARG002\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        # TODO(andrei): There is some duplicated logic in this function with\n        # others in this file.\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}.\"\n        )\n        # TODO(andrei): This could be better, the caller should just make this\n        # decision when passing in the query param. See the above comment in the\n        # function signature.\n        final_query = \" \".join(final_keywords) if final_keywords else query\n        query_body = DocumentQuery.get_hybrid_search_query(\n            query_text=final_query,\n            query_vector=query_embedding,\n            num_hits=num_to_retrieve,\n            tenant_state=self._tenant_state,\n            # NOTE: Index filters includes metadata tags which were filtered\n            # for invalid unicode at indexing time. In theory it would be\n            # ideal to do filtering here as well, in practice we never did\n            # that in the Vespa codepath and have not seen issues in\n            # production, so we deliberately conform to the existing logic\n            # in order to not unknowningly introduce a possible bug.\n            index_filters=filters,\n            include_hidden=False,\n        )\n        normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()\n        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(\n            body=query_body,\n            search_pipeline_id=normalization_pipeline_name,\n            search_type=OpenSearchSearchType.HYBRID,\n        )\n\n        # Good place for a breakpoint to inspect the search hits if you have\n        # \"explain\" enabled.\n        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [\n            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n                search_hit.document_chunk, search_hit.score, search_hit.match_highlights\n            )\n            for search_hit in search_hits\n        ]\n        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(\n            inference_chunks_uncleaned\n        )\n\n        return inference_chunks\n\n    def keyword_retrieval(\n        self,\n        query: str,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        # TODO(andrei): There is some duplicated logic in this function with\n        # others in this file.\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Keyword retrieving {num_to_retrieve} chunks for index {self._index_name}.\"\n        )\n        query_body = DocumentQuery.get_keyword_search_query(\n            query_text=query,\n            num_hits=num_to_retrieve,\n            tenant_state=self._tenant_state,\n            # NOTE: Index filters includes metadata tags which were filtered\n            # for invalid unicode at indexing time. In theory it would be\n            # ideal to do filtering here as well, in practice we never did\n            # that in the Vespa codepath and have not seen issues in\n            # production, so we deliberately conform to the existing logic\n            # in order to not unknowningly introduce a possible bug.\n            index_filters=filters,\n            include_hidden=False,\n        )\n        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(\n            body=query_body,\n            search_pipeline_id=None,\n            search_type=OpenSearchSearchType.KEYWORD,\n        )\n\n        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [\n            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n                search_hit.document_chunk, search_hit.score, search_hit.match_highlights\n            )\n            for search_hit in search_hits\n        ]\n        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(\n            inference_chunks_uncleaned\n        )\n\n        return inference_chunks\n\n    def semantic_retrieval(\n        self,\n        query_embedding: Embedding,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        # TODO(andrei): There is some duplicated logic in this function with\n        # others in this file.\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Semantic retrieving {num_to_retrieve} chunks for index {self._index_name}.\"\n        )\n        query_body = DocumentQuery.get_semantic_search_query(\n            query_embedding=query_embedding,\n            num_hits=num_to_retrieve,\n            tenant_state=self._tenant_state,\n            # NOTE: Index filters includes metadata tags which were filtered\n            # for invalid unicode at indexing time. In theory it would be\n            # ideal to do filtering here as well, in practice we never did\n            # that in the Vespa codepath and have not seen issues in\n            # production, so we deliberately conform to the existing logic\n            # in order to not unknowningly introduce a possible bug.\n            index_filters=filters,\n            include_hidden=False,\n        )\n        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(\n            body=query_body,\n            search_pipeline_id=None,\n            search_type=OpenSearchSearchType.SEMANTIC,\n        )\n\n        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [\n            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n                search_hit.document_chunk, search_hit.score, search_hit.match_highlights\n            )\n            for search_hit in search_hits\n        ]\n        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(\n            inference_chunks_uncleaned\n        )\n\n        return inference_chunks\n\n    def random_retrieval(\n        self,\n        filters: IndexFilters,\n        num_to_retrieve: int = 10,\n        dirty: bool | None = None,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Randomly retrieving {num_to_retrieve} chunks for index {self._index_name}.\"\n        )\n        query_body = DocumentQuery.get_random_search_query(\n            tenant_state=self._tenant_state,\n            index_filters=filters,\n            num_to_retrieve=num_to_retrieve,\n        )\n        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(\n            body=query_body,\n            search_pipeline_id=None,\n            search_type=OpenSearchSearchType.RANDOM,\n        )\n        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [\n            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(\n                search_hit.document_chunk, search_hit.score, search_hit.match_highlights\n            )\n            for search_hit in search_hits\n        ]\n        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(\n            inference_chunks_uncleaned\n        )\n\n        return inference_chunks\n\n    def index_raw_chunks(self, chunks: list[DocumentChunk]) -> None:\n        \"\"\"Indexes raw document chunks into OpenSearch.\n\n        Used in the Vespa migration task. Can be deleted after migrations are\n        complete.\n        \"\"\"\n        logger.debug(\n            f\"[OpenSearchDocumentIndex] Indexing {len(chunks)} raw chunks for index {self._index_name}.\"\n        )\n        # Do not raise if the document already exists, just update. This is\n        # because the document may already have been indexed during the\n        # OpenSearch transition period.\n        self._client.bulk_index_documents(\n            documents=chunks, tenant_state=self._tenant_state, update_if_exists=True\n        )\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/schema.py",
    "content": "import hashlib\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import Self\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_serializer\nfrom pydantic import field_validator\nfrom pydantic import model_serializer\nfrom pydantic import model_validator\nfrom pydantic import SerializerFunctionWrapHandler\n\nfrom onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_REPLICAS\nfrom onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_SHARDS\nfrom onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER\nfrom onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE\nfrom onyx.document_index.opensearch.constants import EF_CONSTRUCTION\nfrom onyx.document_index.opensearch.constants import EF_SEARCH\nfrom onyx.document_index.opensearch.constants import M\nfrom onyx.document_index.opensearch.string_filtering import DocumentIDTooLongError\nfrom onyx.document_index.opensearch.string_filtering import (\n    filter_and_validate_document_id,\n)\nfrom onyx.document_index.opensearch.string_filtering import (\n    MAX_DOCUMENT_ID_ENCODED_LENGTH,\n)\nfrom onyx.utils.tenant import get_tenant_id_short_string\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nTITLE_FIELD_NAME = \"title\"\nTITLE_VECTOR_FIELD_NAME = \"title_vector\"\nCONTENT_FIELD_NAME = \"content\"\nCONTENT_VECTOR_FIELD_NAME = \"content_vector\"\nSOURCE_TYPE_FIELD_NAME = \"source_type\"\nMETADATA_LIST_FIELD_NAME = \"metadata_list\"\nLAST_UPDATED_FIELD_NAME = \"last_updated\"\nPUBLIC_FIELD_NAME = \"public\"\nACCESS_CONTROL_LIST_FIELD_NAME = \"access_control_list\"\nHIDDEN_FIELD_NAME = \"hidden\"\nGLOBAL_BOOST_FIELD_NAME = \"global_boost\"\nSEMANTIC_IDENTIFIER_FIELD_NAME = \"semantic_identifier\"\nIMAGE_FILE_ID_FIELD_NAME = \"image_file_id\"\nSOURCE_LINKS_FIELD_NAME = \"source_links\"\nDOCUMENT_SETS_FIELD_NAME = \"document_sets\"\nUSER_PROJECTS_FIELD_NAME = \"user_projects\"\nPERSONAS_FIELD_NAME = \"personas\"\nDOCUMENT_ID_FIELD_NAME = \"document_id\"\nCHUNK_INDEX_FIELD_NAME = \"chunk_index\"\nMAX_CHUNK_SIZE_FIELD_NAME = \"max_chunk_size\"\nTENANT_ID_FIELD_NAME = \"tenant_id\"\nBLURB_FIELD_NAME = \"blurb\"\nDOC_SUMMARY_FIELD_NAME = \"doc_summary\"\nCHUNK_CONTEXT_FIELD_NAME = \"chunk_context\"\nMETADATA_SUFFIX_FIELD_NAME = \"metadata_suffix\"\nPRIMARY_OWNERS_FIELD_NAME = \"primary_owners\"\nSECONDARY_OWNERS_FIELD_NAME = \"secondary_owners\"\n# Hierarchy filtering - list of ancestor hierarchy node IDs\nANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME = \"ancestor_hierarchy_node_ids\"\n\n\n# Faiss was also tried but it didn't have any benefits\n# NMSLIB is deprecated, not recommended\nOPENSEARCH_KNN_ENGINE = \"lucene\"\n\n\ndef get_opensearch_doc_chunk_id(\n    tenant_state: TenantState,\n    document_id: str,\n    chunk_index: int,\n    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,\n) -> str:\n    \"\"\"\n    Returns a unique identifier for the chunk.\n\n    This will be the string used to identify the chunk in OpenSearch. Any direct\n    chunk queries should use this function.\n\n    If the document ID is too long, a hash of the ID is used instead.\n    \"\"\"\n    opensearch_doc_chunk_id_suffix: str = f\"__{max_chunk_size}__{chunk_index}\"\n    encoded_suffix_length: int = len(opensearch_doc_chunk_id_suffix.encode(\"utf-8\"))\n    max_encoded_permissible_doc_id_length: int = (\n        MAX_DOCUMENT_ID_ENCODED_LENGTH - encoded_suffix_length\n    )\n    opensearch_doc_chunk_id_tenant_prefix: str = \"\"\n    if tenant_state.multitenant:\n        short_tenant_id: str = get_tenant_id_short_string(tenant_state.tenant_id)\n        # Use tenant ID because in multitenant mode each tenant has its own\n        # Documents table, so there is a very small chance that doc IDs are not\n        # actually unique across all tenants.\n        opensearch_doc_chunk_id_tenant_prefix = f\"{short_tenant_id}__\"\n        encoded_prefix_length: int = len(\n            opensearch_doc_chunk_id_tenant_prefix.encode(\"utf-8\")\n        )\n        max_encoded_permissible_doc_id_length -= encoded_prefix_length\n\n    try:\n        sanitized_document_id: str = filter_and_validate_document_id(\n            document_id, max_encoded_length=max_encoded_permissible_doc_id_length\n        )\n    except DocumentIDTooLongError:\n        # If the document ID is too long, use a hash instead.\n        # We use blake2b because it is faster and equally secure as SHA256, and\n        # accepts digest_size which controls the number of bytes returned in the\n        # hash.\n        # digest_size is the size of the returned hash in bytes. Since we're\n        # decoding the hash bytes as a hex string, the digest_size should be\n        # half the max target size of the hash string.\n        # Subtract 1 because filter_and_validate_document_id compares on >= on\n        # max_encoded_length.\n        # 64 is the max digest_size blake2b returns.\n        digest_size: int = min((max_encoded_permissible_doc_id_length - 1) // 2, 64)\n        sanitized_document_id = hashlib.blake2b(\n            document_id.encode(\"utf-8\"), digest_size=digest_size\n        ).hexdigest()\n\n    opensearch_doc_chunk_id: str = (\n        f\"{opensearch_doc_chunk_id_tenant_prefix}{sanitized_document_id}{opensearch_doc_chunk_id_suffix}\"\n    )\n\n    # Do one more validation to ensure we haven't exceeded the max length.\n    opensearch_doc_chunk_id = filter_and_validate_document_id(opensearch_doc_chunk_id)\n    return opensearch_doc_chunk_id\n\n\ndef set_or_convert_timezone_to_utc(value: datetime) -> datetime:\n    if value.tzinfo is None:\n        # astimezone will raise if value does not have a timezone set.\n        value = value.replace(tzinfo=timezone.utc)\n    else:\n        # Does appropriate time conversion if value was set in a different\n        # timezone.\n        value = value.astimezone(timezone.utc)\n    return value\n\n\nclass DocumentChunkWithoutVectors(BaseModel):\n    \"\"\"\n    Represents a chunk of a document in the OpenSearch index without vectors.\n\n    The names of these fields are based on the OpenSearch schema. Changes to the\n    schema require changes here. See get_document_schema.\n\n    WARNING: Relies on MULTI_TENANT which is global state. Also uses\n    get_current_tenant_id. Generally relying on global state is bad, in this\n    case we accept it because of the importance of validating tenant logic.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    document_id: str\n    chunk_index: int\n    # The maximum number of tokens this chunk's content can hold. Previously\n    # there was a concept of large chunks, this is a generic concept of that. We\n    # can choose to have any size of chunks in the index and they should be\n    # distinct from one another.\n    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE\n\n    # Either both should be None or both should be non-None.\n    title: str | None = None\n    content: str\n\n    source_type: str\n    # A list of key-value pairs separated by INDEX_SEPARATOR. See\n    # convert_metadata_dict_to_list_of_strings.\n    metadata_list: list[str] | None = None\n    # If it exists, time zone should always be UTC.\n    last_updated: datetime | None = None\n\n    public: bool\n    access_control_list: list[str]\n    # Defaults to False, currently gets written during update not index.\n    hidden: bool = False\n\n    global_boost: int\n\n    semantic_identifier: str\n    image_file_id: str | None = None\n    # Contains a string representation of a dict which maps offset into the raw\n    # chunk text to the link corresponding to that point.\n    source_links: str | None = None\n    blurb: str\n    # doc_summary, chunk_context, and metadata_suffix are all stored simply to\n    # reverse the augmentations to content. Ideally these would just be start\n    # and stop indices into the content string. For legacy reasons they are not\n    # right now.\n    doc_summary: str\n    chunk_context: str\n    metadata_suffix: str | None = None\n\n    document_sets: list[str] | None = None\n    user_projects: list[int] | None = None\n    personas: list[int] | None = None\n    primary_owners: list[str] | None = None\n    secondary_owners: list[str] | None = None\n\n    # List of ancestor hierarchy node IDs for hierarchy-based filtering.\n    # None means no hierarchy info (document will be excluded from\n    # hierarchy-filtered searches).\n    ancestor_hierarchy_node_ids: list[int] | None = None\n\n    tenant_id: TenantState = Field(\n        default_factory=lambda: TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n    )\n\n    def __str__(self) -> str:\n        return (\n            f\"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, \"\n            f\"content length={len(self.content)}, tenant_id={self.tenant_id.tenant_id}).\"\n        )\n\n    @model_serializer(mode=\"wrap\")\n    def serialize_model(\n        self, handler: SerializerFunctionWrapHandler\n    ) -> dict[str, object]:\n        \"\"\"Invokes pydantic's serialization logic, then excludes Nones.\n\n        We do this because .model_dump(exclude_none=True) does not work after\n        @field_serializer logic, so for some field serializers which return None\n        and which we would like to exclude from the final dump, they would be\n        included without this.\n\n        Args:\n            handler: Callable from pydantic which takes the instance of the\n                model as an argument and performs standard serialization.\n\n        Returns:\n            The return of handler but with None items excluded.\n        \"\"\"\n        serialized: dict[str, object] = handler(self)\n        serialized_exclude_none = {k: v for k, v in serialized.items() if v is not None}\n        return serialized_exclude_none\n\n    @field_serializer(\"last_updated\", mode=\"wrap\")\n    def serialize_datetime_fields_to_epoch_seconds(\n        self,\n        value: datetime | None,\n        handler: SerializerFunctionWrapHandler,  # noqa: ARG002\n    ) -> int | None:\n        \"\"\"\n        Serializes datetime fields to seconds since the Unix epoch.\n\n        If there is no datetime, returns None.\n        \"\"\"\n        if value is None:\n            return None\n        value = set_or_convert_timezone_to_utc(value)\n        return int(value.timestamp())\n\n    @field_validator(\"last_updated\", mode=\"before\")\n    @classmethod\n    def parse_epoch_seconds_to_datetime(cls, value: Any) -> datetime | None:\n        \"\"\"Parses seconds since the Unix epoch to a datetime object.\n\n        If the input is None, returns None.\n\n        The datetime returned will be in UTC.\n        \"\"\"\n        if value is None:\n            return None\n        if isinstance(value, datetime):\n            value = set_or_convert_timezone_to_utc(value)\n            return value\n        if not isinstance(value, int):\n            raise ValueError(\n                f\"Bug: Expected an int for the last_updated property from OpenSearch, got {type(value)} instead.\"\n            )\n        return datetime.fromtimestamp(value, tz=timezone.utc)\n\n    @field_serializer(\"tenant_id\", mode=\"wrap\")\n    def serialize_tenant_state(\n        self,\n        value: TenantState,\n        handler: SerializerFunctionWrapHandler,  # noqa: ARG002\n    ) -> str | None:\n        \"\"\"\n        Serializes tenant_state to the tenant str if multitenant, or None if\n        not.\n\n        The idea is that in single tenant mode, the schema does not have a\n        tenant_id field, so we don't want to supply it in our serialized\n        DocumentChunk. This assumes the final serialized model excludes None\n        fields, which serialize_model should enforce.\n        \"\"\"\n        if not value.multitenant:\n            return None\n        else:\n            return value.tenant_id\n\n    @field_validator(\"tenant_id\", mode=\"before\")\n    @classmethod\n    def parse_tenant_id(cls, value: Any) -> TenantState:\n        \"\"\"\n        Generates a TenantState from OpenSearch's tenant_id if it exists, or\n        generates a default state if it does not (implies we are in single\n        tenant mode).\n        \"\"\"\n        if value is None:\n            if MULTI_TENANT:\n                raise ValueError(\n                    \"Bug: No tenant_id was supplied but multi-tenant mode is enabled.\"\n                )\n            return TenantState(\n                tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n            )\n        elif isinstance(value, TenantState):\n            if MULTI_TENANT != value.multitenant:\n                raise ValueError(\n                    f\"Bug: An existing TenantState object was supplied to the DocumentChunk model \"\n                    f\"but its multi-tenant mode ({value.multitenant}) does not match the program's \"\n                    \"current global tenancy state.\"\n                )\n            return value\n        elif not isinstance(value, str):\n            raise ValueError(\n                f\"Bug: Expected a str for the tenant_id property from OpenSearch, got {type(value)} instead.\"\n            )\n        else:\n            if not MULTI_TENANT:\n                raise ValueError(\n                    \"Bug: Got a non-null str for the tenant_id property from OpenSearch but \"\n                    \"multi-tenant mode is not enabled. This is unexpected because in single-tenant \"\n                    \"mode we don't expect to see a tenant_id.\"\n                )\n            return TenantState(tenant_id=value, multitenant=MULTI_TENANT)\n\n\nclass DocumentChunk(DocumentChunkWithoutVectors):\n    \"\"\"Represents a chunk of a document in the OpenSearch index.\n\n    The names of these fields are based on the OpenSearch schema. Changes to the\n    schema require changes here. See get_document_schema.\n    \"\"\"\n\n    model_config = {\"frozen\": True}\n\n    title_vector: list[float] | None = None\n    content_vector: list[float]\n\n    def __str__(self) -> str:\n        return (\n            f\"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, \"\n            f\"content length={len(self.content)}, content vector length={len(self.content_vector)}, \"\n            f\"tenant_id={self.tenant_id.tenant_id})\"\n        )\n\n    @model_validator(mode=\"after\")\n    def check_title_and_title_vector_are_consistent(self) -> Self:\n        # title and title_vector should both either be None or not.\n        if self.title is not None and self.title_vector is None:\n            raise ValueError(\"Bug: Title vector must not be None if title is not None.\")\n        if self.title_vector is not None and self.title is None:\n            raise ValueError(\"Bug: Title must not be None if title vector is not None.\")\n        return self\n\n\nclass DocumentSchema:\n    \"\"\"\n    Represents the schema and indexing strategies of the OpenSearch index.\n\n    TODO(andrei): Implement multi-phase indexing strategies.\n    \"\"\"\n\n    @staticmethod\n    def get_document_schema(vector_dimension: int, multitenant: bool) -> dict[str, Any]:\n        \"\"\"Returns the document schema for the OpenSearch index.\n\n        WARNING: Changes / additions to field names here require changes to the\n        DocumentChunk class above.\n\n        Notes:\n          - By default all fields have indexing enabled.\n          - By default almost all fields except text fields have doc_values\n            enabled, enabling operations like sorting and aggregations.\n          - By default all fields are nullable.\n          - \"type\": \"keyword\" fields are stored as-is, used for exact matches,\n            filtering, etc.\n          - \"type\": \"text\" fields are OpenSearch-processed strings, used for\n            full-text searches.\n          - \"store\": True fields are stored and can be returned on their own,\n            independent of the parent document.\n          - \"index\": True fields can be queried on.\n          - \"doc_values\": True fields can be sorted and aggregated efficiently.\n            Not supported for \"text\" type fields.\n          - \"store\": True fields are stored separately from the source document\n            and can thus be returned from a query separately from _source.\n            Generally this is not necessary.\n\n        Args:\n            vector_dimension: The dimension of vector embeddings. Must be a\n                positive integer.\n            multitenant: Whether the index is multitenant.\n\n        Returns:\n            A dictionary representing the document schema, to be supplied to the\n                OpenSearch client. The structure of this dictionary is\n                determined by OpenSearch documentation.\n        \"\"\"\n        schema: dict[str, Any] = {\n            # By default OpenSearch allows dynamically adding new properties\n            # based on indexed documents. This is awful and we disable it here.\n            # An exception will be raised if you try to index a new doc which\n            # contains unexpected fields.\n            \"dynamic\": \"strict\",\n            \"properties\": {\n                TITLE_FIELD_NAME: {\n                    \"type\": \"text\",\n                    # Language analyzer (e.g. english) stems at index and search\n                    # time for variant matching. Configure via\n                    # OPENSEARCH_TEXT_ANALYZER. Existing indices need reindexing\n                    # after a change.\n                    \"analyzer\": OPENSEARCH_TEXT_ANALYZER,\n                    \"fields\": {\n                        # Subfield accessed as title.keyword. Not indexed for\n                        # values longer than 256 chars.\n                        # TODO(andrei): Ask Yuhong do we want this?\n                        \"keyword\": {\"type\": \"keyword\", \"ignore_above\": 256}\n                    },\n                    # This makes highlighting text during queries more efficient\n                    # at the cost of disk space. See\n                    # https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#methods-of-obtaining-offsets\n                    \"index_options\": \"offsets\",\n                },\n                CONTENT_FIELD_NAME: {\n                    \"type\": \"text\",\n                    \"store\": True,\n                    \"analyzer\": OPENSEARCH_TEXT_ANALYZER,\n                    \"index_options\": \"offsets\",\n                },\n                TITLE_VECTOR_FIELD_NAME: {\n                    \"type\": \"knn_vector\",\n                    \"dimension\": vector_dimension,\n                    \"method\": {\n                        \"name\": \"hnsw\",\n                        \"space_type\": \"cosinesimil\",\n                        \"engine\": OPENSEARCH_KNN_ENGINE,\n                        \"parameters\": {\"ef_construction\": EF_CONSTRUCTION, \"m\": M},\n                    },\n                },\n                # TODO(andrei): This is a tensor in Vespa. Also look at feature\n                # parity for these other method fields.\n                CONTENT_VECTOR_FIELD_NAME: {\n                    \"type\": \"knn_vector\",\n                    \"dimension\": vector_dimension,\n                    \"method\": {\n                        \"name\": \"hnsw\",\n                        \"space_type\": \"cosinesimil\",\n                        \"engine\": OPENSEARCH_KNN_ENGINE,\n                        \"parameters\": {\"ef_construction\": EF_CONSTRUCTION, \"m\": M},\n                    },\n                },\n                SOURCE_TYPE_FIELD_NAME: {\"type\": \"keyword\"},\n                METADATA_LIST_FIELD_NAME: {\"type\": \"keyword\"},\n                LAST_UPDATED_FIELD_NAME: {\n                    \"type\": \"date\",\n                    \"format\": \"epoch_second\",\n                    # For some reason date defaults to False, even though it\n                    # would make sense to sort by date.\n                    \"doc_values\": True,\n                },\n                # Access control fields.\n                # Whether the doc is public. Could have fallen under access\n                # control list but is such a broad and critical filter that it\n                # is its own field. If true, ACCESS_CONTROL_LIST_FIELD_NAME\n                # should have no effect on queries.\n                PUBLIC_FIELD_NAME: {\"type\": \"boolean\"},\n                # Access control list for the doc, excluding public access,\n                # which is covered above.\n                # If a user's access set contains at least one entry from this\n                # set, the user should be able to retrieve this document. This\n                # only applies if public is set to false; public non-hidden\n                # documents are always visible to anyone in a given tenancy\n                # regardless of this field.\n                ACCESS_CONTROL_LIST_FIELD_NAME: {\"type\": \"keyword\"},\n                # Whether the doc is hidden from search results.\n                # Should clobber all other access search filters, namely\n                # PUBLIC_FIELD_NAME and ACCESS_CONTROL_LIST_FIELD_NAME; up to\n                # search implementations to guarantee this.\n                HIDDEN_FIELD_NAME: {\"type\": \"boolean\"},\n                GLOBAL_BOOST_FIELD_NAME: {\"type\": \"integer\"},\n                # This field is only used for displaying a useful name for the\n                # doc in the UI and is not used for searching. Disabling these\n                # features to increase perf. This field is therefore essentially\n                # just metadata.\n                SEMANTIC_IDENTIFIER_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above; used to display an image along with the doc.\n                IMAGE_FILE_ID_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above; used to link to the source doc.\n                SOURCE_LINKS_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above; used to quickly summarize the doc in the UI.\n                BLURB_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above.\n                # TODO(andrei): If we want to search on this this needs to be\n                # changed.\n                DOC_SUMMARY_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above.\n                # TODO(andrei): If we want to search on this this needs to be\n                # changed.\n                CHUNK_CONTEXT_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    # Generally False by default; just making sure.\n                    \"store\": False,\n                },\n                # Same as above.\n                METADATA_SUFFIX_FIELD_NAME: {\n                    \"type\": \"keyword\",\n                    \"index\": False,\n                    \"doc_values\": False,\n                    \"store\": False,\n                },\n                # Product-specific fields.\n                DOCUMENT_SETS_FIELD_NAME: {\"type\": \"keyword\"},\n                USER_PROJECTS_FIELD_NAME: {\"type\": \"integer\"},\n                PERSONAS_FIELD_NAME: {\"type\": \"integer\"},\n                PRIMARY_OWNERS_FIELD_NAME: {\"type\": \"keyword\"},\n                SECONDARY_OWNERS_FIELD_NAME: {\"type\": \"keyword\"},\n                # OpenSearch metadata fields.\n                DOCUMENT_ID_FIELD_NAME: {\"type\": \"keyword\"},\n                CHUNK_INDEX_FIELD_NAME: {\"type\": \"integer\"},\n                # The maximum number of tokens this chunk's content can hold.\n                MAX_CHUNK_SIZE_FIELD_NAME: {\"type\": \"integer\"},\n                # Hierarchy filtering - list of ancestor hierarchy node IDs.\n                # Used for scoped search within folder/space hierarchies.\n                # OpenSearch's terms query with value_type: \"bitmap\" can\n                # efficiently check if any value in this array matches a\n                # query bitmap.\n                ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME: {\"type\": \"integer\"},\n            },\n        }\n\n        if multitenant:\n            schema[\"properties\"][TENANT_ID_FIELD_NAME] = {\"type\": \"keyword\"}\n\n        return schema\n\n    @staticmethod\n    def get_index_settings_based_on_environment() -> dict[str, Any]:\n        \"\"\"\n        Returns the index settings based on the environment.\n        \"\"\"\n        if USING_AWS_MANAGED_OPENSEARCH:\n            # NOTE: The number of data copies, including the primary (not a\n            # replica) copy, must be divisible by the number of AZs.\n            if MULTI_TENANT:\n                number_of_shards = 324\n                number_of_replicas = 2\n            else:\n                number_of_shards = 3\n                number_of_replicas = 2\n        else:\n            number_of_shards = 1\n            number_of_replicas = 1\n\n        if OPENSEARCH_INDEX_NUM_SHARDS is not None:\n            number_of_shards = OPENSEARCH_INDEX_NUM_SHARDS\n        if OPENSEARCH_INDEX_NUM_REPLICAS is not None:\n            number_of_replicas = OPENSEARCH_INDEX_NUM_REPLICAS\n\n        return {\n            \"index\": {\n                \"number_of_shards\": number_of_shards,\n                \"number_of_replicas\": number_of_replicas,\n                # Required for vector search.\n                \"knn\": True,\n                \"knn.algo_param.ef_search\": EF_SEARCH,\n            }\n        }\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/search.py",
    "content": "import random\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import TypeAlias\nfrom typing import TypeVar\n\nfrom onyx.configs.app_configs import DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S\nfrom onyx.configs.app_configs import OPENSEARCH_EXPLAIN_ENABLED\nfrom onyx.configs.app_configs import OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED\nfrom onyx.configs.app_configs import OPENSEARCH_PROFILING_DISABLED\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import INDEX_SEPARATOR\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import Tag\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.constants import ASSUMED_DOCUMENT_AGE_DAYS\nfrom onyx.document_index.opensearch.constants import (\n    DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,\n)\nfrom onyx.document_index.opensearch.constants import (\n    DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,\n)\nfrom onyx.document_index.opensearch.constants import (\n    HYBRID_SEARCH_NORMALIZATION_PIPELINE,\n)\nfrom onyx.document_index.opensearch.constants import (\n    HYBRID_SEARCH_SUBQUERY_CONFIGURATION,\n)\nfrom onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline\nfrom onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration\nfrom onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import CHUNK_INDEX_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import CONTENT_VECTOR_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DOCUMENT_ID_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import LAST_UPDATED_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import MAX_CHUNK_SIZE_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import METADATA_LIST_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import PUBLIC_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import set_or_convert_timezone_to_utc\nfrom onyx.document_index.opensearch.schema import SOURCE_TYPE_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import TENANT_ID_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import TITLE_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import TITLE_VECTOR_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME\n\n# See https://docs.opensearch.org/latest/query-dsl/term/terms/.\nMAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY = 65_536\n\n\n_T = TypeVar(\"_T\")\nTermsQuery: TypeAlias = dict[str, dict[str, list[_T]]]\nTermQuery: TypeAlias = dict[str, dict[str, dict[str, _T]]]\n\n\n# TODO(andrei): Turn all magic dictionaries to pydantic models.\n\n\n# Normalization pipelines combine document scores from multiple query clauses.\n# The number and ordering of weights should match the query clauses. The values\n# of the weights should sum to 1.\ndef _get_hybrid_search_normalization_weights() -> list[float]:\n    if (\n        HYBRID_SEARCH_SUBQUERY_CONFIGURATION\n        is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD\n    ):\n        # Since the titles are included in the contents, the embedding matches\n        # are heavily downweighted as they act as a boost rather than an\n        # independent scoring component.\n        search_title_vector_weight = 0.1\n        search_content_vector_weight = 0.45\n        # Single keyword weight for both title and content (merged from former\n        # title keyword + content keyword).\n        search_keyword_weight = 0.45\n\n        # NOTE: It is critical that the order of these weights matches the order\n        # of the sub-queries in the hybrid search.\n        hybrid_search_normalization_weights = [\n            search_title_vector_weight,\n            search_content_vector_weight,\n            search_keyword_weight,\n        ]\n    elif (\n        HYBRID_SEARCH_SUBQUERY_CONFIGURATION\n        is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD\n    ):\n        search_content_vector_weight = 0.5\n        # Single keyword weight for both title and content (merged from former\n        # title keyword + content keyword).\n        search_keyword_weight = 0.5\n\n        # NOTE: It is critical that the order of these weights matches the order\n        # of the sub-queries in the hybrid search.\n        hybrid_search_normalization_weights = [\n            search_content_vector_weight,\n            search_keyword_weight,\n        ]\n    else:\n        raise ValueError(\n            f\"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}.\"\n        )\n\n    assert (\n        sum(hybrid_search_normalization_weights) == 1.0\n    ), \"Bug: Hybrid search normalization weights do not sum to 1.0.\"\n\n    return hybrid_search_normalization_weights\n\n\ndef get_min_max_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:\n    min_max_normalization_pipeline_name = \"normalization_pipeline_min_max\"\n    min_max_normalization_pipeline_config: dict[str, Any] = {\n        \"description\": \"Normalization for keyword and vector scores using min-max\",\n        \"phase_results_processors\": [\n            {\n                # https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/\n                \"normalization-processor\": {\n                    \"normalization\": {\"technique\": \"min_max\"},\n                    \"combination\": {\n                        \"technique\": \"arithmetic_mean\",\n                        \"parameters\": {\n                            \"weights\": _get_hybrid_search_normalization_weights()\n                        },\n                    },\n                }\n            }\n        ],\n    }\n    return min_max_normalization_pipeline_name, min_max_normalization_pipeline_config\n\n\ndef get_zscore_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:\n    zscore_normalization_pipeline_name = \"normalization_pipeline_zscore\"\n    zscore_normalization_pipeline_config: dict[str, Any] = {\n        \"description\": \"Normalization for keyword and vector scores using z-score\",\n        \"phase_results_processors\": [\n            {\n                # https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/\n                \"normalization-processor\": {\n                    \"normalization\": {\"technique\": \"z_score\"},\n                    \"combination\": {\n                        \"technique\": \"arithmetic_mean\",\n                        \"parameters\": {\n                            \"weights\": _get_hybrid_search_normalization_weights()\n                        },\n                    },\n                }\n            }\n        ],\n    }\n    return zscore_normalization_pipeline_name, zscore_normalization_pipeline_config\n\n\ndef get_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:\n    if (\n        HYBRID_SEARCH_NORMALIZATION_PIPELINE\n        is HybridSearchNormalizationPipeline.MIN_MAX\n    ):\n        return get_min_max_normalization_pipeline_name_and_config()\n    elif (\n        HYBRID_SEARCH_NORMALIZATION_PIPELINE is HybridSearchNormalizationPipeline.ZSCORE\n    ):\n        return get_zscore_normalization_pipeline_name_and_config()\n    else:\n        raise ValueError(\n            f\"Bug: Unhandled hybrid search normalization pipeline: {HYBRID_SEARCH_NORMALIZATION_PIPELINE}.\"\n        )\n\n\nclass DocumentQuery:\n    \"\"\"\n    TODO(andrei): Implement multi-phase search strategies.\n    TODO(andrei): Implement document boost.\n    TODO(andrei): Implement document age.\n    \"\"\"\n\n    @staticmethod\n    def get_from_document_id_query(\n        document_id: str,\n        tenant_state: TenantState,\n        index_filters: IndexFilters,\n        include_hidden: bool,\n        max_chunk_size: int,\n        min_chunk_index: int | None,\n        max_chunk_index: int | None,\n        get_full_document: bool = True,\n    ) -> dict[str, Any]:\n        \"\"\"\n        Returns a final search query which gets chunks from a given document ID.\n\n        This query can be directly supplied to the OpenSearch client.\n\n        TODO(andrei): Currently capped at 10k results. Implement scroll/point in\n        time for results so that we can return arbitrarily-many IDs.\n\n        Args:\n            document_id: Onyx document ID. Notably not an OpenSearch document\n                ID, which points to what Onyx would refer to as a chunk.\n            tenant_state: Tenant state containing the tenant ID.\n            index_filters: Filters for the document retrieval query.\n            include_hidden: Whether to include hidden documents.\n            max_chunk_size: Document chunks are categorized by the maximum\n                number of tokens they can hold. This parameter specifies the\n                maximum size category of document chunks to retrieve.\n            min_chunk_index: The minimum chunk index to retrieve, inclusive. If\n                None, no minimum chunk index will be applied.\n            max_chunk_index: The maximum chunk index to retrieve, inclusive. If\n                None, no maximum chunk index will be applied.\n            get_full_document: Whether to get the full document body. If False,\n                OpenSearch will only return the matching document chunk IDs plus\n                metadata; the source data will be omitted from the response. Use\n                this for performance optimization if OpenSearch IDs are\n                sufficient. Defaults to True.\n\n        Returns:\n            A dictionary representing the final ID search query.\n        \"\"\"\n        filter_clauses = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            include_hidden=include_hidden,\n            access_control_list=index_filters.access_control_list,\n            source_types=index_filters.source_type or [],\n            tags=index_filters.tags or [],\n            document_sets=index_filters.document_set or [],\n            project_id_filter=index_filters.project_id_filter,\n            persona_id_filter=index_filters.persona_id_filter,\n            time_cutoff=index_filters.time_cutoff,\n            min_chunk_index=min_chunk_index,\n            max_chunk_index=max_chunk_index,\n            max_chunk_size=max_chunk_size,\n            document_id=document_id,\n            attached_document_ids=index_filters.attached_document_ids,\n            hierarchy_node_ids=index_filters.hierarchy_node_ids,\n        )\n        final_get_ids_query: dict[str, Any] = {\n            \"query\": {\"bool\": {\"filter\": filter_clauses}},\n            # We include this to make sure OpenSearch does not revert to\n            # returning some number of results less than the index max allowed\n            # return size.\n            \"size\": DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,\n            # By default exclude retrieving the vector fields in order to save\n            # on retrieval cost as we don't need them upstream.\n            \"_source\": {\n                \"excludes\": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]\n            },\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n        }\n        if not get_full_document:\n            # If we explicitly do not want the underlying document, we will only\n            # retrieve IDs.\n            final_get_ids_query[\"_source\"] = False\n        if not OPENSEARCH_PROFILING_DISABLED:\n            final_get_ids_query[\"profile\"] = True\n\n        return final_get_ids_query\n\n    @staticmethod\n    def delete_from_document_id_query(\n        document_id: str,\n        tenant_state: TenantState,\n    ) -> dict[str, Any]:\n        \"\"\"\n        Returns a final search query which deletes chunks from a given document\n        ID.\n\n        This query can be directly supplied to the OpenSearch client.\n\n        Intended to be supplied to the OpenSearch client's delete_by_query\n        method.\n\n        TODO(andrei): There is no limit to the number of document chunks that\n        can be deleted by this query. This could get expensive. Consider\n        implementing batching.\n\n        Args:\n            document_id: Onyx document ID. Notably not an OpenSearch document\n                ID, which points to what Onyx would refer to as a chunk.\n            tenant_state: Tenant state containing the tenant ID.\n\n        Returns:\n            A dictionary representing the final delete query.\n        \"\"\"\n        filter_clauses = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            # Delete hidden docs too.\n            include_hidden=True,\n            access_control_list=None,\n            source_types=[],\n            tags=[],\n            document_sets=[],\n            project_id_filter=None,\n            persona_id_filter=None,\n            time_cutoff=None,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            max_chunk_size=None,\n            document_id=document_id,\n        )\n        final_delete_query: dict[str, Any] = {\n            \"query\": {\"bool\": {\"filter\": filter_clauses}},\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n        }\n        if not OPENSEARCH_PROFILING_DISABLED:\n            final_delete_query[\"profile\"] = True\n\n        return final_delete_query\n\n    @staticmethod\n    def get_hybrid_search_query(\n        query_text: str,\n        query_vector: list[float],\n        num_hits: int,\n        tenant_state: TenantState,\n        index_filters: IndexFilters,\n        include_hidden: bool,\n    ) -> dict[str, Any]:\n        \"\"\"Returns a final hybrid search query.\n\n        NOTE: This query can be directly supplied to the OpenSearch client, but\n        it MUST be supplied in addition to a search pipeline. The results from\n        hybrid search are not meaningful without that step.\n\n        TODO(andrei): There is some duplicated logic in this function with\n        others in this file.\n\n        Args:\n            query_text: The text to query for.\n            query_vector: The vector embedding of the text to query for.\n            num_hits: The final number of hits to return.\n            tenant_state: Tenant state containing the tenant ID.\n            index_filters: Filters for the hybrid search query.\n            include_hidden: Whether to include hidden documents.\n\n        Returns:\n            A dictionary representing the final hybrid search query.\n        \"\"\"\n        # WARNING: Profiling does not work with hybrid search; do not add it at\n        # this level. See https://github.com/opensearch-project/neural-search/issues/1255\n\n        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:\n            raise ValueError(\n                f\"Bug: num_hits ({num_hits}) is greater than the current maximum allowed \"\n                f\"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW}).\"\n            )\n\n        # TODO(andrei, yuhong): We can tune this more dynamically based on\n        # num_hits.\n        max_results_per_subquery = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES\n\n        hybrid_search_subqueries = DocumentQuery._get_hybrid_search_subqueries(\n            query_text, query_vector, vector_candidates=max_results_per_subquery\n        )\n        hybrid_search_filters = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            include_hidden=include_hidden,\n            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to\n            # now. This should not cause any issues but it can introduce\n            # redundant filters in queries that may affect performance.\n            access_control_list=index_filters.access_control_list,\n            source_types=index_filters.source_type or [],\n            tags=index_filters.tags or [],\n            document_sets=index_filters.document_set or [],\n            project_id_filter=index_filters.project_id_filter,\n            persona_id_filter=index_filters.persona_id_filter,\n            time_cutoff=index_filters.time_cutoff,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            attached_document_ids=index_filters.attached_document_ids,\n            hierarchy_node_ids=index_filters.hierarchy_node_ids,\n        )\n\n        # See https://docs.opensearch.org/latest/query-dsl/compound/hybrid/\n        hybrid_search_query: dict[str, Any] = {\n            \"hybrid\": {\n                \"queries\": hybrid_search_subqueries,\n                # Max results per subquery per shard before aggregation. Ensures\n                # keyword and vector subqueries contribute equally to the\n                # candidate pool for hybrid fusion.\n                # Sources:\n                # https://docs.opensearch.org/latest/vector-search/ai-search/hybrid-search/pagination/\n                # https://opensearch.org/blog/navigating-pagination-in-hybrid-queries-with-the-pagination_depth-parameter/\n                \"pagination_depth\": max_results_per_subquery,\n                # Applied to all the sub-queries independently (this avoids\n                # subqueries having a lot of results thrown out during\n                # aggregation).\n                # Sources:\n                # https://docs.opensearch.org/latest/query-dsl/compound/hybrid/\n                # https://opensearch.org/blog/introducing-common-filter-support-for-hybrid-search-queries\n                # Does AND for each filter in the list.\n                \"filter\": {\"bool\": {\"filter\": hybrid_search_filters}},\n            }\n        }\n\n        final_hybrid_search_body: dict[str, Any] = {\n            \"query\": hybrid_search_query,\n            \"size\": num_hits,\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n            # Exclude retrieving the vector fields in order to save on\n            # retrieval cost as we don't need them upstream.\n            \"_source\": {\n                \"excludes\": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]\n            },\n        }\n\n        if not OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED:\n            final_hybrid_search_body[\"highlight\"] = (\n                DocumentQuery._get_match_highlights_configuration()\n            )\n\n        # Explain is for scoring breakdowns. Setting this significantly\n        # increases query latency.\n        if OPENSEARCH_EXPLAIN_ENABLED:\n            final_hybrid_search_body[\"explain\"] = True\n\n        return final_hybrid_search_body\n\n    @staticmethod\n    def get_keyword_search_query(\n        query_text: str,\n        num_hits: int,\n        tenant_state: TenantState,\n        index_filters: IndexFilters,\n        include_hidden: bool,\n    ) -> dict[str, Any]:\n        \"\"\"Returns a final keyword search query.\n\n        This query can be directly supplied to the OpenSearch client.\n\n        TODO(andrei): There is some duplicated logic in this function with\n        others in this file.\n\n        Args:\n            query_text: The text to query for.\n            num_hits: The final number of hits to return.\n            tenant_state: Tenant state containing the tenant ID.\n            index_filters: Filters for the keyword search query.\n            include_hidden: Whether to include hidden documents.\n\n        Returns:\n            A dictionary representing the final keyword search query.\n        \"\"\"\n        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:\n            raise ValueError(\n                f\"Bug: num_hits ({num_hits}) is greater than the current maximum allowed \"\n                f\"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW}).\"\n            )\n\n        keyword_search_filters = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            include_hidden=include_hidden,\n            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to\n            # now. This should not cause any issues but it can introduce\n            # redundant filters in queries that may affect performance.\n            access_control_list=index_filters.access_control_list,\n            source_types=index_filters.source_type or [],\n            tags=index_filters.tags or [],\n            document_sets=index_filters.document_set or [],\n            project_id_filter=index_filters.project_id_filter,\n            persona_id_filter=index_filters.persona_id_filter,\n            time_cutoff=index_filters.time_cutoff,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            attached_document_ids=index_filters.attached_document_ids,\n            hierarchy_node_ids=index_filters.hierarchy_node_ids,\n        )\n\n        keyword_search_query = (\n            DocumentQuery._get_title_content_combined_keyword_search_query(\n                query_text, search_filters=keyword_search_filters\n            )\n        )\n\n        final_keyword_search_query: dict[str, Any] = {\n            \"query\": keyword_search_query,\n            \"size\": num_hits,\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n            # Exclude retrieving the vector fields in order to save on\n            # retrieval cost as we don't need them upstream.\n            \"_source\": {\n                \"excludes\": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]\n            },\n        }\n\n        if not OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED:\n            final_keyword_search_query[\"highlight\"] = (\n                DocumentQuery._get_match_highlights_configuration()\n            )\n\n        if not OPENSEARCH_PROFILING_DISABLED:\n            final_keyword_search_query[\"profile\"] = True\n\n        # Explain is for scoring breakdowns. Setting this significantly\n        # increases query latency.\n        if OPENSEARCH_EXPLAIN_ENABLED:\n            final_keyword_search_query[\"explain\"] = True\n\n        return final_keyword_search_query\n\n    @staticmethod\n    def get_semantic_search_query(\n        query_embedding: list[float],\n        num_hits: int,\n        tenant_state: TenantState,\n        index_filters: IndexFilters,\n        include_hidden: bool,\n    ) -> dict[str, Any]:\n        \"\"\"Returns a final semantic search query.\n\n        This query can be directly supplied to the OpenSearch client.\n\n        TODO(andrei): There is some duplicated logic in this function with\n        others in this file.\n\n        Args:\n            query_embedding: The vector embedding of the text to query for.\n            num_hits: The final number of hits to return.\n            tenant_state: Tenant state containing the tenant ID.\n            index_filters: Filters for the semantic search query.\n            include_hidden: Whether to include hidden documents.\n\n        Returns:\n            A dictionary representing the final semantic search query.\n        \"\"\"\n        if num_hits > DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW:\n            raise ValueError(\n                f\"Bug: num_hits ({num_hits}) is greater than the current maximum allowed \"\n                f\"result window ({DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW}).\"\n            )\n\n        semantic_search_filters = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            include_hidden=include_hidden,\n            # TODO(andrei): We've done no filtering for PUBLIC_DOC_PAT up to\n            # now. This should not cause any issues but it can introduce\n            # redundant filters in queries that may affect performance.\n            access_control_list=index_filters.access_control_list,\n            source_types=index_filters.source_type or [],\n            tags=index_filters.tags or [],\n            document_sets=index_filters.document_set or [],\n            project_id_filter=index_filters.project_id_filter,\n            persona_id_filter=index_filters.persona_id_filter,\n            time_cutoff=index_filters.time_cutoff,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            attached_document_ids=index_filters.attached_document_ids,\n            hierarchy_node_ids=index_filters.hierarchy_node_ids,\n        )\n\n        semantic_search_query = (\n            DocumentQuery._get_content_vector_similarity_search_query(\n                query_embedding,\n                vector_candidates=num_hits,\n                search_filters=semantic_search_filters,\n            )\n        )\n\n        final_semantic_search_query: dict[str, Any] = {\n            \"query\": semantic_search_query,\n            \"size\": num_hits,\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n            # Exclude retrieving the vector fields in order to save on\n            # retrieval cost as we don't need them upstream.\n            \"_source\": {\n                \"excludes\": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]\n            },\n        }\n\n        if not OPENSEARCH_PROFILING_DISABLED:\n            final_semantic_search_query[\"profile\"] = True\n\n        # Explain is for scoring breakdowns. Setting this significantly\n        # increases query latency.\n        if OPENSEARCH_EXPLAIN_ENABLED:\n            final_semantic_search_query[\"explain\"] = True\n\n        return final_semantic_search_query\n\n    @staticmethod\n    def get_random_search_query(\n        tenant_state: TenantState,\n        index_filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> dict[str, Any]:\n        \"\"\"Returns a final search query that gets document chunks randomly.\n\n        Args:\n            tenant_state: Tenant state containing the tenant ID.\n            index_filters: Filters for the random search query.\n            num_to_retrieve: Number of document chunks to retrieve.\n\n        Returns:\n            A dictionary representing the final random search query.\n        \"\"\"\n        search_filters = DocumentQuery._get_search_filters(\n            tenant_state=tenant_state,\n            include_hidden=False,\n            access_control_list=index_filters.access_control_list,\n            source_types=index_filters.source_type or [],\n            tags=index_filters.tags or [],\n            document_sets=index_filters.document_set or [],\n            project_id_filter=index_filters.project_id_filter,\n            persona_id_filter=index_filters.persona_id_filter,\n            time_cutoff=index_filters.time_cutoff,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            attached_document_ids=index_filters.attached_document_ids,\n            hierarchy_node_ids=index_filters.hierarchy_node_ids,\n        )\n        final_random_search_query = {\n            \"query\": {\n                \"function_score\": {\n                    \"query\": {\"bool\": {\"filter\": search_filters}},\n                    # See\n                    # https://docs.opensearch.org/latest/query-dsl/compound/function-score/#the-random-score-function\n                    \"random_score\": {\n                        # We'll use a different seed per invocation.\n                        \"seed\": random.randint(0, 1_000_000),\n                        # Some field which has a unique value per document\n                        # chunk.\n                        \"field\": \"_seq_no\",\n                    },\n                    # Replaces whatever score was computed in the query.\n                    \"boost_mode\": \"replace\",\n                }\n            },\n            \"size\": num_to_retrieve,\n            \"timeout\": f\"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s\",\n            # Exclude retrieving the vector fields in order to save on\n            # retrieval cost as we don't need them upstream.\n            \"_source\": {\n                \"excludes\": [TITLE_VECTOR_FIELD_NAME, CONTENT_VECTOR_FIELD_NAME]\n            },\n        }\n        if not OPENSEARCH_PROFILING_DISABLED:\n            final_random_search_query[\"profile\"] = True\n\n        return final_random_search_query\n\n    @staticmethod\n    def _get_hybrid_search_subqueries(\n        query_text: str,\n        query_vector: list[float],\n        # The default number of neighbors to consider for knn vector similarity\n        # search. This is higher than the number of results because the scoring\n        # is hybrid. For a detailed breakdown, see where the default value is\n        # set.\n        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,\n    ) -> list[dict[str, Any]]:\n        \"\"\"Returns subqueries for hybrid search.\n\n        Each of these subqueries are the \"hybrid\" component of this search. We\n        search on various things and combine results.\n\n        The return of this function is not sufficient to be directly supplied to\n        the OpenSearch client. See get_hybrid_search_query.\n\n        Normalization is not performed here.\n        The weights of each of these subqueries should be configured in a search\n        pipeline.\n\n        The exact subqueries executed depend on the\n        HYBRID_SEARCH_SUBQUERY_CONFIGURATION setting.\n\n        NOTE: For OpenSearch, 5 is the maximum number of query clauses allowed\n        in a single hybrid query. Source:\n        https://docs.opensearch.org/latest/query-dsl/compound/hybrid/\n\n        NOTE: Each query is independent during the search phase; there is no\n        backfilling of scores for missing query components. What this means is\n        that if a document was a good vector match but did not show up for\n        keyword, it gets a score of 0 for the keyword component of the hybrid\n        scoring. This is not as bad as just disregarding a score though as there\n        is normalization applied after. So really it is \"increasing\" the missing\n        score compared to if it was included and the range was renormalized.\n        This does however mean that between docs that have high scores for say\n        the vector field, the keyword scores between them are completely ignored\n        unless they also showed up in the keyword query as a reasonably high\n        match. TLDR, this is a bit of unique funky behavior but it seems ok.\n\n        NOTE: Options considered and rejected:\n        - minimum_should_match: Since it's hybrid search and users often provide\n          semantic queries, there is often a lot of terms, and very low number\n          of meaningful keywords (and a low ratio of keywords).\n        - fuzziness AUTO: Typo tolerance (0/1/2 edit distance by term length).\n          It's mostly for typos as the analyzer (\"english\" by default) already\n          does some stemming and tokenization. In testing datasets, this makes\n          recall slightly worse. It also is less performant so not really any\n          reason to do it.\n\n        Args:\n            query_text: The text of the query to search for.\n            query_vector: The vector embedding of the query to search for.\n            num_candidates: The number of candidates to consider for vector\n                similarity search.\n        \"\"\"\n        # Build sub-queries for hybrid search. Order must match normalization\n        # pipeline weights.\n        if (\n            HYBRID_SEARCH_SUBQUERY_CONFIGURATION\n            is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD\n        ):\n            return [\n                DocumentQuery._get_title_vector_similarity_search_query(\n                    query_vector, vector_candidates\n                ),\n                DocumentQuery._get_content_vector_similarity_search_query(\n                    query_vector, vector_candidates\n                ),\n                DocumentQuery._get_title_content_combined_keyword_search_query(\n                    query_text\n                ),\n            ]\n        elif (\n            HYBRID_SEARCH_SUBQUERY_CONFIGURATION\n            is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD\n        ):\n            return [\n                DocumentQuery._get_content_vector_similarity_search_query(\n                    query_vector, vector_candidates\n                ),\n                DocumentQuery._get_title_content_combined_keyword_search_query(\n                    query_text\n                ),\n            ]\n        else:\n            raise ValueError(\n                f\"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}\"\n            )\n\n    @staticmethod\n    def _get_title_vector_similarity_search_query(\n        query_vector: list[float],\n        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,\n    ) -> dict[str, Any]:\n        return {\n            \"knn\": {\n                TITLE_VECTOR_FIELD_NAME: {\n                    \"vector\": query_vector,\n                    \"k\": vector_candidates,\n                }\n            }\n        }\n\n    @staticmethod\n    def _get_content_vector_similarity_search_query(\n        query_vector: list[float],\n        vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,\n        search_filters: list[dict[str, Any]] | None = None,\n    ) -> dict[str, Any]:\n        query = {\n            \"knn\": {\n                CONTENT_VECTOR_FIELD_NAME: {\n                    \"vector\": query_vector,\n                    \"k\": vector_candidates,\n                }\n            }\n        }\n\n        if search_filters is not None:\n            query[\"knn\"][CONTENT_VECTOR_FIELD_NAME][\"filter\"] = {\n                \"bool\": {\"filter\": search_filters}\n            }\n\n        return query\n\n    @staticmethod\n    def _get_title_content_combined_keyword_search_query(\n        query_text: str,\n        search_filters: list[dict[str, Any]] | None = None,\n    ) -> dict[str, Any]:\n        query = {\n            \"bool\": {\n                \"should\": [\n                    {\n                        \"match\": {\n                            TITLE_FIELD_NAME: {\n                                \"query\": query_text,\n                                \"operator\": \"or\",\n                                # The title fields are strongly discounted as\n                                # they are included in the content. This just\n                                # acts as a minor boost.\n                                \"boost\": 0.1,\n                            }\n                        }\n                    },\n                    {\n                        \"match_phrase\": {\n                            TITLE_FIELD_NAME: {\n                                \"query\": query_text,\n                                \"slop\": 1,\n                                \"boost\": 0.2,\n                            }\n                        }\n                    },\n                    {\n                        # Analyzes the query and returns results which match any\n                        # of the query's terms. More matches result in higher\n                        # scores.\n                        \"match\": {\n                            CONTENT_FIELD_NAME: {\n                                \"query\": query_text,\n                                \"operator\": \"or\",\n                                \"boost\": 1.0,\n                            }\n                        }\n                    },\n                    {\n                        # Matches an exact phrase in a specified order.\n                        \"match_phrase\": {\n                            CONTENT_FIELD_NAME: {\n                                \"query\": query_text,\n                                # The number of words permitted between words of\n                                # a query phrase and still result in a match.\n                                \"slop\": 1,\n                                \"boost\": 1.5,\n                            }\n                        }\n                    },\n                ],\n                # Ensures at least one match subquery from the query is present\n                # in the document. This defaults to 1, unless a filter or must\n                # clause is supplied, in which case it defaults to 0.\n                \"minimum_should_match\": 1,\n            }\n        }\n\n        if search_filters is not None:\n            query[\"bool\"][\"filter\"] = search_filters\n\n        return query\n\n    @staticmethod\n    def _get_search_filters(\n        tenant_state: TenantState,\n        include_hidden: bool,\n        access_control_list: list[str] | None,\n        source_types: list[DocumentSource],\n        tags: list[Tag],\n        document_sets: list[str],\n        project_id_filter: int | None,\n        persona_id_filter: int | None,\n        time_cutoff: datetime | None,\n        min_chunk_index: int | None,\n        max_chunk_index: int | None,\n        max_chunk_size: int | None = None,\n        document_id: str | None = None,\n        # Assistant knowledge filters\n        attached_document_ids: list[str] | None = None,\n        hierarchy_node_ids: list[int] | None = None,\n    ) -> list[dict[str, Any]]:\n        \"\"\"Returns filters to be passed into the \"filter\" key of a search query.\n\n        The \"filter\" key applies a logical AND operator to its elements, so\n        every subfilter must evaluate to true in order for the document to be\n        retrieved. This function returns a list of such subfilters.\n        See https://docs.opensearch.org/latest/query-dsl/compound/bool/.\n\n        TODO(ENG-3874): The terms queries returned by this function can be made\n        more performant for large cardinality sets by sorting the values by\n        their UTF-8 byte order.\n\n        TODO(ENG-3875): This function can take even better advantage of filter\n        caching by grouping \"static\" filters together into one sub-clause.\n\n        Args:\n            tenant_state: Tenant state containing the tenant ID.\n            include_hidden: Whether to include hidden documents.\n            access_control_list: Access control list for the documents to\n                retrieve. If None, there is no restriction on the documents that\n                can be retrieved. If not None, only public documents can be\n                retrieved, or non-public documents where at least one acl\n                provided here is present in the document's acl list.\n            source_types: If supplied, only documents of one of these source\n                types will be retrieved.\n            tags: If supplied, only documents with an entry in their metadata\n                list corresponding to a tag will be retrieved.\n            document_sets: If supplied, only documents with at least one\n                document set ID from this list will be retrieved.\n            project_id_filter: If not None, only documents with this project ID\n                in user projects will be retrieved. Additive — only applied\n                when a knowledge scope already exists.\n            persona_id_filter: If not None, only documents whose personas array\n                contains this persona ID will be retrieved. Primary — creates\n                a knowledge scope on its own.\n            time_cutoff: Time cutoff for the documents to retrieve. If not None,\n                Documents which were last updated before this date will not be\n                returned. For documents which do not have a value for their last\n                updated time, we assume some default age of\n                ASSUMED_DOCUMENT_AGE_DAYS for when the document was last\n                updated.\n            min_chunk_index: The minimum chunk index to retrieve, inclusive. If\n                None, no minimum chunk index will be applied.\n            max_chunk_index: The maximum chunk index to retrieve, inclusive. If\n                None, no maximum chunk index will be applied.\n            max_chunk_size: The type of chunk to retrieve, specified by the\n                maximum number of tokens it can hold. If None, no filter will be\n                applied for this. Defaults to None.\n                NOTE: See DocumentChunk.max_chunk_size.\n            document_id: The document ID to retrieve. If None, no filter will be\n                applied for this. Defaults to None.\n            attached_document_ids: Document IDs explicitly attached to the\n                assistant. If provided along with hierarchy_node_ids, documents\n                matching EITHER criteria will be retrieved (OR logic).\n            hierarchy_node_ids: Hierarchy node IDs (folders/spaces) attached to\n                the assistant. Matches chunks where ancestor_hierarchy_node_ids\n                contains any of these values.\n\n        Raises:\n            ValueError: document_id and attached_document_ids were supplied\n                together. This is not allowed because they operate on the same\n                schema field, and it does not semantically make sense to use\n                them together.\n            ValueError: Too many of one of the collection arguments was\n                supplied.\n\n        Returns:\n            A list of filters to be passed into the \"filter\" key of a search\n                query.\n        \"\"\"\n\n        def _get_acl_visibility_filter(\n            access_control_list: list[str],\n        ) -> dict[str, dict[str, list[TermQuery[bool] | TermsQuery[str]] | int]]:\n            \"\"\"Returns a filter for the access control list.\n\n            Since this returns an isolated bool should clause, it can be cached\n            in OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                access_control_list: The access control list to restrict\n                    documents to.\n\n            Raises:\n                ValueError: The number of access control list entries is greater\n                    than MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n\n            Returns:\n                A filter for the access control list.\n            \"\"\"\n            # Logical OR operator on its elements.\n            acl_visibility_filter: dict[str, dict[str, Any]] = {\n                \"bool\": {\n                    \"should\": [{\"term\": {PUBLIC_FIELD_NAME: {\"value\": True}}}],\n                    \"minimum_should_match\": 1,\n                }\n            }\n            if access_control_list:\n                if len(access_control_list) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                    raise ValueError(\n                        f\"Too many access control list entries: {len(access_control_list)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                    )\n                # Use terms instead of a list of term within a should clause\n                # because Lucene will optimize the filtering for large sets of\n                # terms. Small sets of terms are not expected to perform any\n                # differently than individual term clauses.\n                acl_subclause: TermsQuery[str] = {\n                    \"terms\": {ACCESS_CONTROL_LIST_FIELD_NAME: list(access_control_list)}\n                }\n                acl_visibility_filter[\"bool\"][\"should\"].append(acl_subclause)\n            return acl_visibility_filter\n\n        def _get_source_type_filter(\n            source_types: list[DocumentSource],\n        ) -> TermsQuery[str]:\n            \"\"\"Returns a filter for the source types.\n\n            Since this returns an isolated terms clause, it can be cached in\n            OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                source_types: The source types to restrict documents to.\n\n            Raises:\n                ValueError: The number of source types is greater than\n                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n                ValueError: An empty list was supplied.\n\n            Returns:\n                A filter for the source types.\n            \"\"\"\n            if not source_types:\n                raise ValueError(\n                    \"source_types cannot be empty if trying to create a source type filter.\"\n                )\n            if len(source_types) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                raise ValueError(\n                    f\"Too many source types: {len(source_types)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                )\n            # Use terms instead of a list of term within a should clause because\n            # Lucene will optimize the filtering for large sets of terms. Small\n            # sets of terms are not expected to perform any differently than\n            # individual term clauses.\n            return {\n                \"terms\": {\n                    SOURCE_TYPE_FIELD_NAME: [\n                        source_type.value for source_type in source_types\n                    ]\n                }\n            }\n\n        def _get_tag_filter(tags: list[Tag]) -> TermsQuery[str]:\n            \"\"\"Returns a filter for the tags.\n\n            Since this returns an isolated terms clause, it can be cached in\n            OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                tags: The tags to restrict documents to.\n\n            Raises:\n                ValueError: The number of tags is greater than\n                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n                ValueError: An empty list was supplied.\n\n            Returns:\n                A filter for the tags.\n            \"\"\"\n            if not tags:\n                raise ValueError(\n                    \"tags cannot be empty if trying to create a tag filter.\"\n                )\n            if len(tags) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                raise ValueError(\n                    f\"Too many tags: {len(tags)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                )\n            # Kind of an abstraction leak, see\n            # convert_metadata_dict_to_list_of_strings for why metadata list\n            # entries are expected to look this way.\n            tag_str_list = [\n                f\"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}\" for tag in tags\n            ]\n            # Use terms instead of a list of term within a should clause because\n            # Lucene will optimize the filtering for large sets of terms. Small\n            # sets of terms are not expected to perform any differently than\n            # individual term clauses.\n            return {\"terms\": {METADATA_LIST_FIELD_NAME: tag_str_list}}\n\n        def _get_document_set_filter(document_sets: list[str]) -> TermsQuery[str]:\n            \"\"\"Returns a filter for the document sets.\n\n            Since this returns an isolated terms clause, it can be cached in\n            OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                document_sets: The document sets to restrict documents to.\n\n            Raises:\n                ValueError: The number of document sets is greater than\n                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n                ValueError: An empty list was supplied.\n\n            Returns:\n                A filter for the document sets.\n            \"\"\"\n            if not document_sets:\n                raise ValueError(\n                    \"document_sets cannot be empty if trying to create a document set filter.\"\n                )\n            if len(document_sets) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                raise ValueError(\n                    f\"Too many document sets: {len(document_sets)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                )\n            # Use terms instead of a list of term within a should clause because\n            # Lucene will optimize the filtering for large sets of terms. Small\n            # sets of terms are not expected to perform any differently than\n            # individual term clauses.\n            return {\"terms\": {DOCUMENT_SETS_FIELD_NAME: list(document_sets)}}\n\n        def _get_user_project_filter(project_id: int) -> TermQuery[int]:\n            return {\"term\": {USER_PROJECTS_FIELD_NAME: {\"value\": project_id}}}\n\n        def _get_persona_filter(persona_id: int) -> TermQuery[int]:\n            return {\"term\": {PERSONAS_FIELD_NAME: {\"value\": persona_id}}}\n\n        def _get_time_cutoff_filter(time_cutoff: datetime) -> dict[str, Any]:\n            # Convert to UTC if not already so the cutoff is comparable to the\n            # document data.\n            time_cutoff = set_or_convert_timezone_to_utc(time_cutoff)\n            # Logical OR operator on its elements.\n            time_cutoff_filter: dict[str, Any] = {\n                \"bool\": {\"should\": [], \"minimum_should_match\": 1}\n            }\n            time_cutoff_filter[\"bool\"][\"should\"].append(\n                {\n                    \"range\": {\n                        LAST_UPDATED_FIELD_NAME: {\"gte\": int(time_cutoff.timestamp())}\n                    }\n                }\n            )\n            if time_cutoff < datetime.now(timezone.utc) - timedelta(\n                days=ASSUMED_DOCUMENT_AGE_DAYS\n            ):\n                # Since the time cutoff is older than ASSUMED_DOCUMENT_AGE_DAYS\n                # ago, we include documents which have no\n                # LAST_UPDATED_FIELD_NAME value.\n                time_cutoff_filter[\"bool\"][\"should\"].append(\n                    {\n                        \"bool\": {\n                            \"must_not\": {\"exists\": {\"field\": LAST_UPDATED_FIELD_NAME}}\n                        }\n                    }\n                )\n            return time_cutoff_filter\n\n        def _get_chunk_index_filter(\n            min_chunk_index: int | None, max_chunk_index: int | None\n        ) -> dict[str, Any]:\n            range_clause: dict[str, Any] = {\"range\": {CHUNK_INDEX_FIELD_NAME: {}}}\n            if min_chunk_index is not None:\n                range_clause[\"range\"][CHUNK_INDEX_FIELD_NAME][\"gte\"] = min_chunk_index\n            if max_chunk_index is not None:\n                range_clause[\"range\"][CHUNK_INDEX_FIELD_NAME][\"lte\"] = max_chunk_index\n            return range_clause\n\n        def _get_attached_document_id_filter(\n            doc_ids: list[str],\n        ) -> TermsQuery[str]:\n            \"\"\"\n            Returns a filter for documents explicitly attached to an assistant.\n\n            Since this returns an isolated terms clause, it can be cached in\n            OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                doc_ids: The document IDs to restrict documents to.\n\n            Raises:\n                ValueError: The number of document IDs is greater than\n                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n                ValueError: An empty list was supplied.\n\n            Returns:\n                A filter for the document IDs.\n            \"\"\"\n            if not doc_ids:\n                raise ValueError(\n                    \"doc_ids cannot be empty if trying to create a document ID filter.\"\n                )\n            if len(doc_ids) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                raise ValueError(\n                    f\"Too many document IDs: {len(doc_ids)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                )\n            # Use terms instead of a list of term within a should clause because\n            # Lucene will optimize the filtering for large sets of terms. Small\n            # sets of terms are not expected to perform any differently than\n            # individual term clauses.\n            return {\"terms\": {DOCUMENT_ID_FIELD_NAME: list(doc_ids)}}\n\n        def _get_hierarchy_node_filter(\n            node_ids: list[int],\n        ) -> TermsQuery[int]:\n            \"\"\"\n            Returns a filter for chunks whose ancestors include any of the given\n            hierarchy nodes.\n\n            Since this returns an isolated terms clause, it can be cached in\n            OpenSearch independently of other clauses in _get_search_filters.\n\n            Args:\n                node_ids: The hierarchy node IDs to restrict documents to.\n\n            Raises:\n                ValueError: The number of hierarchy node IDs is greater than\n                    MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY.\n                ValueError: An empty list was supplied.\n\n            Returns:\n                A filter for the hierarchy node IDs.\n            \"\"\"\n            if not node_ids:\n                raise ValueError(\n                    \"node_ids cannot be empty if trying to create a hierarchy node ID filter.\"\n                )\n            if len(node_ids) > MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY:\n                raise ValueError(\n                    f\"Too many hierarchy node IDs: {len(node_ids)}. Max allowed: {MAX_NUM_TERMS_ALLOWED_IN_TERMS_QUERY}.\"\n                )\n            # Use terms instead of a list of term within a should clause because\n            # Lucene will optimize the filtering for large sets of terms. Small\n            # sets of terms are not expected to perform any differently than\n            # individual term clauses.\n            return {\"terms\": {ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME: list(node_ids)}}\n\n        if document_id is not None and attached_document_ids is not None:\n            raise ValueError(\n                \"document_id and attached_document_ids cannot be used together.\"\n            )\n\n        filter_clauses: list[dict[str, Any]] = []\n\n        if not include_hidden:\n            filter_clauses.append({\"term\": {HIDDEN_FIELD_NAME: {\"value\": False}}})\n\n        if access_control_list is not None:\n            # If an access control list is provided, the caller can only\n            # retrieve public documents, and non-public documents where at least\n            # one acl provided here is present in the document's acl list. If\n            # there is explicitly no list provided, we make no restrictions on\n            # the documents that can be retrieved.\n            filter_clauses.append(_get_acl_visibility_filter(access_control_list))\n\n        if source_types:\n            # If at least one source type is provided, the caller will only\n            # retrieve documents whose source type is present in this input\n            # list.\n            filter_clauses.append(_get_source_type_filter(source_types))\n\n        if tags:\n            # If at least one tag is provided, the caller will only retrieve\n            # documents where at least one tag provided here is present in the\n            # document's metadata list.\n            filter_clauses.append(_get_tag_filter(tags))\n\n        # Knowledge scope: explicit knowledge attachments restrict what an\n        # assistant can see. When none are set the assistant searches\n        # everything.\n        #\n        # persona_id_filter is a primary trigger — a persona with user files IS\n        # explicit knowledge, so it can start a knowledge scope on its own.\n        #\n        # project_id_filter is additive — it widens the scope to also cover\n        # overflowing project files but never restricts on its own (a chat\n        # inside a project should still search team knowledge).\n        has_knowledge_scope = (\n            attached_document_ids\n            or hierarchy_node_ids\n            or document_sets\n            or persona_id_filter is not None\n        )\n\n        if has_knowledge_scope:\n            # Since this returns an isolated bool should clause, it can be\n            # cached in OpenSearch independently of other clauses in\n            # _get_search_filters.\n            knowledge_filter: dict[str, Any] = {\n                \"bool\": {\"should\": [], \"minimum_should_match\": 1}\n            }\n            if attached_document_ids:\n                knowledge_filter[\"bool\"][\"should\"].append(\n                    _get_attached_document_id_filter(attached_document_ids)\n                )\n            if hierarchy_node_ids:\n                knowledge_filter[\"bool\"][\"should\"].append(\n                    _get_hierarchy_node_filter(hierarchy_node_ids)\n                )\n            if document_sets:\n                knowledge_filter[\"bool\"][\"should\"].append(\n                    _get_document_set_filter(document_sets)\n                )\n            if persona_id_filter is not None:\n                knowledge_filter[\"bool\"][\"should\"].append(\n                    _get_persona_filter(persona_id_filter)\n                )\n            if project_id_filter is not None:\n                knowledge_filter[\"bool\"][\"should\"].append(\n                    _get_user_project_filter(project_id_filter)\n                )\n            filter_clauses.append(knowledge_filter)\n\n        if time_cutoff is not None:\n            # If a time cutoff is provided, the caller will only retrieve\n            # documents where the document was last updated at or after the time\n            # cutoff. For documents which do not have a value for\n            # LAST_UPDATED_FIELD_NAME, we assume some default age for the\n            # purposes of time cutoff.\n            filter_clauses.append(_get_time_cutoff_filter(time_cutoff))\n\n        if min_chunk_index is not None or max_chunk_index is not None:\n            filter_clauses.append(\n                _get_chunk_index_filter(min_chunk_index, max_chunk_index)\n            )\n\n        if document_id is not None:\n            filter_clauses.append(\n                {\"term\": {DOCUMENT_ID_FIELD_NAME: {\"value\": document_id}}}\n            )\n\n        if max_chunk_size is not None:\n            filter_clauses.append(\n                {\"term\": {MAX_CHUNK_SIZE_FIELD_NAME: {\"value\": max_chunk_size}}}\n            )\n\n        if tenant_state.multitenant:\n            filter_clauses.append(\n                {\"term\": {TENANT_ID_FIELD_NAME: {\"value\": tenant_state.tenant_id}}}\n            )\n\n        return filter_clauses\n\n    @staticmethod\n    def _get_match_highlights_configuration() -> dict[str, Any]:\n        \"\"\"\n        Gets configuration for returning match highlights for a hit.\n        \"\"\"\n        match_highlights_configuration: dict[str, Any] = {\n            \"fields\": {\n                CONTENT_FIELD_NAME: {\n                    # See https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#highlighter-types\n                    \"type\": \"unified\",\n                    # The length in chars of a match snippet. Somewhat\n                    # arbitrarily-chosen. The Vespa codepath limited total\n                    # highlights length to 400 chars. fragment_size *\n                    # number_of_fragments = 400 should be good enough.\n                    \"fragment_size\": 100,\n                    # The number of snippets to return per field per document\n                    # hit.\n                    \"number_of_fragments\": 4,\n                    # These tags wrap matched keywords and they match what Vespa\n                    # used to return. Use them to minimize changes to our code.\n                    \"pre_tags\": [\"<hi>\"],\n                    \"post_tags\": [\"</hi>\"],\n                }\n            }\n        }\n\n        return match_highlights_configuration\n"
  },
  {
    "path": "backend/onyx/document_index/opensearch/string_filtering.py",
    "content": "import re\n\nMAX_DOCUMENT_ID_ENCODED_LENGTH: int = 512\n\n\nclass DocumentIDTooLongError(ValueError):\n    \"\"\"Raised when a document ID is too long for OpenSearch after filtering.\"\"\"\n\n\ndef filter_and_validate_document_id(\n    document_id: str, max_encoded_length: int = MAX_DOCUMENT_ID_ENCODED_LENGTH\n) -> str:\n    \"\"\"\n    Filters and validates a document ID such that it can be used as an ID in\n    OpenSearch.\n\n    OpenSearch imposes the following restrictions on IDs:\n    - Must not be an empty string.\n    - Must not exceed 512 bytes.\n    - Must not contain any control characters (newline, etc.).\n    - Must not contain URL-unsafe characters (#, ?, /, %, &, etc.).\n\n    For extra resilience, this function simply removes all characters that are\n    not alphanumeric or one of _.-~.\n\n    Any query on document ID should use this function.\n\n    Args:\n        document_id: The document ID to filter and validate.\n        max_encoded_length: The maximum length of the document ID after\n            filtering in bytes. Compared with >= for extra resilience, so\n            encoded values of this length will fail.\n\n    Raises:\n        DocumentIDTooLongError: If the document ID is too long after filtering.\n        ValueError: If the document ID is empty after filtering.\n\n    Returns:\n        str: The filtered document ID.\n    \"\"\"\n    filtered_document_id = re.sub(r\"[^A-Za-z0-9_.\\-~]\", \"\", document_id)\n    if not filtered_document_id:\n        raise ValueError(f\"Document ID {document_id} is empty after filtering.\")\n    if len(filtered_document_id.encode(\"utf-8\")) >= max_encoded_length:\n        raise DocumentIDTooLongError(\n            f\"Document ID {document_id} is too long after filtering.\"\n        )\n    return filtered_document_id\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja",
    "content": "schema {{ schema_name }} {\n    # source, type, target triplets for kg_relationships\n    struct kg_relationship {\n        field source type string {}\n        field rel_type type string {}\n        field target type string {}\n    }\n\n    document {{ schema_name }} {\n        {% if multi_tenant %}\n        field tenant_id type string {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        {% endif %}\n        # Not to be confused with the UUID generated for this chunk which is called documentid by default\n        field document_id type string {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field chunk_id type int {\n            indexing: summary | attribute\n        }\n        # Displayed in the UI as the main identifier for the doc\n        field semantic_identifier type string {\n            indexing: summary | attribute\n        }\n        # Must have an additional field for whether to skip title embeddings\n        # This information cannot be extracted from either the title field nor title embedding\n        field skip_title type bool {\n            indexing: attribute\n        }\n        # May not always match the `semantic_identifier` e.g. for Slack docs the\n        # `semantic_identifier` will be the channel name, but the `title` will be empty\n        field title type string {\n            indexing: summary | index | attribute\n            index: enable-bm25\n        }\n        field content type string {\n            indexing: summary | index\n            index: enable-bm25\n        }\n        # duplication of `content` is far from ideal, but is needed for\n        # non-gram based highlighting for now. If the capability to re-use a\n        # single field to do both is added, `content_summary` should be removed\n        field content_summary type string {\n            indexing: summary | index\n            summary: dynamic\n        }\n        # Title embedding (x1)\n        field title_embedding type tensor<{{ embedding_precision }}>(x[{{ dim }}]) {\n            indexing: attribute | index\n            attribute {\n                distance-metric: angular\n            }\n        }\n        # Content embeddings (chunk + optional mini chunks embeddings)\n        # \"t\" and \"x\" are arbitrary names, not special keywords\n        field embeddings type tensor<{{ embedding_precision }}>(t{},x[{{ dim }}]) {\n            indexing: attribute | index\n            attribute {\n                distance-metric: angular\n            }\n        }\n        # Starting section of the doc, currently unused as it has been replaced by match highlighting\n        field blurb type string {\n            indexing: summary | attribute\n        }\n        field image_file_name type string {\n            indexing: summary | attribute\n        }\n        # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it\n        field source_type type string {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        # Can also index links https://docs.vespa.ai/en/reference/schema-reference.html#attribute\n        # URL type matching\n        field source_links type string {\n            indexing: summary | attribute\n        }\n        field section_continuation type bool {\n            indexing: summary | attribute\n        }\n        # Technically this one should be int, but can't change without causing breaks to existing index\n        field boost type float {\n            indexing: summary | attribute\n        }\n        field hidden type bool {\n            indexing: summary | attribute\n            rank: filter\n        }\n        # Field to indicate whether a short chunk is a low content chunk\n        field aggregated_chunk_boost_factor type float {\n            indexing: attribute\n        }\n\n        # Separate array fields for knowledge graph data\n        field kg_entities type array<string> {\n            indexing: summary | attribute\n            attribute: fast-search\n        }\n\n        field kg_relationships type array<kg_relationship> {\n            indexing: summary\n            struct-field source {\n                indexing: attribute\n                attribute: fast-search\n            }\n            struct-field rel_type {\n                indexing: attribute\n                attribute: fast-search\n            }\n            struct-field target {\n                indexing: attribute\n                attribute: fast-search\n            }\n        }\n\n        field kg_terms type array<string> {\n            indexing: summary | attribute\n            attribute: fast-search\n        }\n\n        # Needs to have a separate Attribute list for efficient filtering\n        field metadata_list type array<string> {\n            indexing: summary | attribute\n            rank:filter\n            attribute: fast-search\n        }\n        # If chunk is a large chunk, this will contain the ids of the smaller chunks\n        field large_chunk_reference_ids type array<int> {\n            indexing: summary | attribute\n        }\n        field metadata type string {\n            indexing: summary | attribute\n        }\n        field chunk_context type string {\n            indexing: summary | attribute\n        }\n        field doc_summary type string {\n            indexing: summary | attribute\n        }\n        field metadata_suffix type string {\n            indexing: summary | attribute\n        }\n        field doc_updated_at type int {\n            indexing: summary | attribute\n        }\n        field primary_owners type array<string> {\n            indexing: summary | attribute\n        }\n        field secondary_owners type array<string> {\n            indexing: summary | attribute\n        }\n        field access_control_list type weightedset<string> {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field document_sets type weightedset<string> {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field user_file type int {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field user_folder type int {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field user_project type array<int> {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n        field personas type array<int> {\n            indexing: summary | attribute\n            rank: filter\n            attribute: fast-search\n        }\n    }\n\n    # If using different tokenization settings, the fieldset has to be removed, and the field must\n    # be specified in the yql like:\n    # + 'or ({grammar: \"weakAnd\", defaultIndex:\"title\"}userInput(@query)) '\n    # + 'or ({grammar: \"weakAnd\", defaultIndex:\"content\"}userInput(@query)) '\n    # Note: for BM-25, the ngram size (and whether ngrams are used) changes the range of the scores\n    fieldset default {\n        fields: content, title\n    }\n\n    rank-profile default_rank {\n        inputs {\n            query(decay_factor) double\n        }\n\n        function inline document_boost() {\n            # 0.5 to 2x score: piecewise sigmoid function stretched out by factor of 3\n            # meaning requires 3x the number of feedback votes to have default sigmoid effect\n            expression: if(attribute(boost) < 0, 0.5 + (1 / (1 + exp(-attribute(boost) / 3))), 2 / (1 + exp(-attribute(boost) / 3)))\n        }\n\n        function inline document_age() {\n            # Time in years (91.3 days ~= 3 Months ~= 1 fiscal quarter if no age found)\n            expression: max(if(isNan(attribute(doc_updated_at)) == 1, 7890000, now() - attribute(doc_updated_at)) / 31536000, 0)\n        }\n\n        function inline aggregated_chunk_boost() {\n            # Aggregated boost factor, currently only used for information content classification\n            expression: if(isNan(attribute(aggregated_chunk_boost_factor)) == 1, 1.0, attribute(aggregated_chunk_boost_factor))\n        }\n\n        # Document score decays from 1 to 0.75 as age of last updated time increases\n        function inline recency_bias() {\n            expression: max(1 / (1 + query(decay_factor) * document_age), 0.75)\n        }\n\n        match-features: recency_bias\n    }\n\n    rank-profile hybrid_search_semantic_base_{{ dim }} inherits default, default_rank {\n        inputs {\n            query(query_embedding) tensor<float>(x[{{ dim }}])\n        }\n\n        function title_vector_score() {\n            expression {\n                # If no good matching titles, then it should use the context embeddings rather than having some\n                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest\n                # matching content score getting the full score\n                max(closeness(field, embeddings), closeness(field, title_embedding))\n            }\n        }\n\n        # First phase must be vector to allow hits that have no keyword matches\n        first-phase {\n            expression: query(title_content_ratio) * closeness(field, title_embedding) + (1 - query(title_content_ratio)) * closeness(field, embeddings)\n        }\n\n        # Weighted average between Vector Search and BM-25\n        global-phase {\n            expression {\n                (\n                    # Weighted Vector Similarity Score\n                    (\n                        query(alpha) * (\n                            (query(title_content_ratio) * normalize_linear(title_vector_score))\n                            +\n                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))\n                        )\n                    )\n\n                    +\n\n                    # Weighted Keyword Similarity Score\n                    # Note: for the BM25 Title score, it requires decent stopword removal in the query\n                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1\n                    (\n                        (1 - query(alpha)) * (\n                            (query(title_content_ratio) * normalize_linear(bm25(title)))\n                            +\n                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))\n                        )\n                    )\n                )\n                # Boost based on user feedback\n                * document_boost\n                # Decay factor based on time document was last updated\n                * recency_bias\n                # Boost based on aggregated boost calculation\n                * aggregated_chunk_boost\n            }\n            # Target hits for hybrid retrieval should be at least this value.\n            rerank-count: 1000\n        }\n\n        match-features {\n            bm25(title)\n            bm25(content)\n            closeness(field, title_embedding)\n            closeness(field, embeddings)\n            document_boost\n            recency_bias\n            aggregated_chunk_boost\n            closest(embeddings)\n        }\n    }\n\n\n    rank-profile hybrid_search_keyword_base_{{ dim }} inherits default, default_rank {\n        inputs {\n            query(query_embedding) tensor<float>(x[{{ dim }}])\n        }\n\n        function title_vector_score() {\n            expression {\n                # If no good matching titles, then it should use the context embeddings rather than having some\n                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest\n                # matching content score getting the full score\n                max(closeness(field, embeddings), closeness(field, title_embedding))\n            }\n        }\n\n        # First phase must be vector to allow hits that have no keyword matches\n        first-phase {\n            expression: query(title_content_ratio) * bm25(title) + (1 - query(title_content_ratio)) * bm25(content)\n        }\n\n        # Weighted average between Vector Search and BM-25\n        global-phase {\n            expression {\n                (\n                    # Weighted Vector Similarity Score\n                    (\n                        query(alpha) * (\n                            (query(title_content_ratio) * normalize_linear(title_vector_score))\n                            +\n                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))\n                        )\n                    )\n\n                    +\n\n                    # Weighted Keyword Similarity Score\n                    # Note: for the BM25 Title score, it requires decent stopword removal in the query\n                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1\n                    (\n                        (1 - query(alpha)) * (\n                            (query(title_content_ratio) * normalize_linear(bm25(title)))\n                            +\n                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))\n                        )\n                    )\n                )\n                # Boost based on user feedback\n                * document_boost\n                # Decay factor based on time document was last updated\n                * recency_bias\n                # Boost based on aggregated boost calculation\n                * aggregated_chunk_boost\n            }\n            # Target hits for hybrid retrieval should be at least this value.\n            rerank-count: 1000\n        }\n\n        match-features {\n            bm25(title)\n            bm25(content)\n            closeness(field, title_embedding)\n            closeness(field, embeddings)\n            document_boost\n            recency_bias\n            aggregated_chunk_boost\n            closest(embeddings)\n        }\n    }\n\n    # Used when searching from the admin UI for a specific doc to hide / boost\n    # Very heavily prioritize title\n    rank-profile admin_search inherits default, default_rank {\n        first-phase {\n            expression: bm25(content) + (5 * bm25(title))\n        }\n    }\n\n    rank-profile random_ inherits default {\n        first-phase {\n            expression: random\n        }\n    }\n}\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/app_config/services.xml.jinja",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<services version=\"1.0\">\n    <container id=\"default\" version=\"1.0\">\n        <document-api/>\n        <search/>\n        <http>\n            <server id=\"default\" port=\"8081\"/>\n        </http>\n        <nodes>\n            <node hostalias=\"danswer-node\" />\n        </nodes>\n    </container>\n    <content id=\"danswer_index\" version=\"1.0\">\n        <redundancy>1</redundancy>\n        <documents>\n            <!-- <document type=\"danswer_chunk\" mode=\"index\" /> -->\n            {{ document_elements }}\n        </documents>\n        <nodes>\n            <node hostalias=\"danswer-node\" distribution-key=\"0\" />\n        </nodes>\n        <tuning>\n            <resource-limits>\n                <!-- Default is 75% but this can be increased for Dockerized deployments -->\n                <!-- https://docs.vespa.ai/en/operations/feed-block.html -->\n                <disk>0.85</disk>\n            </resource-limits>\n        </tuning>\n        <engine>    \n            <proton>\n                <tuning>\n                    <searchnode>\n                        <requestthreads>\n                            <persearch>{{ num_search_threads }}</persearch>\n                        </requestthreads>\n                    </searchnode>\n                </tuning>\n            </proton>\n        </engine>\n        <config name=\"vespa.config.search.summary.juniperrc\">\n            <max_matches>3</max_matches>\n            <length>750</length>\n            <surround_max>350</surround_max>\n            <min_length>300</min_length>\n        </config>\n    </content>\n</services>"
  },
  {
    "path": "backend/onyx/document_index/vespa/app_config/validation-overrides.xml.jinja",
    "content": "<validation-overrides>\n    <allow\n        until=\"{{ until_date }}\"\n        comment=\"We need to be able to create/delete indices for swapping models\">schema-removal</allow>\n    <allow\n        until=\"{{ until_date }}\"\n        comment=\"We need to be able to update the schema for updates to the Onyx schema\">indexing-change</allow>\n    <allow \n        until=\"{{ until_date }}\"\n        comment=\"Prevents old alt indices from interfering with changes\">field-type-change</allow>\n</validation-overrides>\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/chunk_retrieval.py",
    "content": "import json\nimport string\nimport time\nfrom collections.abc import Callable\nfrom collections.abc import Mapping\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\n\nimport httpx\nfrom retry import retry\n\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    FINISHED_VISITING_SLICE_CONTINUATION_TOKEN,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.transformer import (\n    FIELDS_NEEDED_FOR_TRANSFORMATION,\n)\nfrom onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION\nfrom onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE\nfrom onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S\nfrom onyx.configs.app_configs import VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunkUncleaned\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.shared_utils.vespa_request_builders import (\n    build_vespa_filters,\n)\nfrom onyx.document_index.vespa.shared_utils.vespa_request_builders import (\n    build_vespa_id_based_retrieval_yql,\n)\nfrom onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST\nfrom onyx.document_index.vespa_constants import BLURB\nfrom onyx.document_index.vespa_constants import BOOST\nfrom onyx.document_index.vespa_constants import CHUNK_CONTEXT\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import CONTENT\nfrom onyx.document_index.vespa_constants import CONTENT_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa_constants import HIDDEN\nfrom onyx.document_index.vespa_constants import IMAGE_FILE_NAME\nfrom onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS\nfrom onyx.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE\nfrom onyx.document_index.vespa_constants import MAX_OR_CONDITIONS\nfrom onyx.document_index.vespa_constants import METADATA\nfrom onyx.document_index.vespa_constants import METADATA_SUFFIX\nfrom onyx.document_index.vespa_constants import PRIMARY_OWNERS\nfrom onyx.document_index.vespa_constants import SEARCH_ENDPOINT\nfrom onyx.document_index.vespa_constants import SECONDARY_OWNERS\nfrom onyx.document_index.vespa_constants import SECTION_CONTINUATION\nfrom onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER\nfrom onyx.document_index.vespa_constants import SOURCE_LINKS\nfrom onyx.document_index.vespa_constants import SOURCE_TYPE\nfrom onyx.document_index.vespa_constants import TENANT_ID\nfrom onyx.document_index.vespa_constants import TITLE\nfrom onyx.document_index.vespa_constants import YQL_BASE\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef _process_dynamic_summary(\n    dynamic_summary: str, max_summary_length: int = 400\n) -> list[str]:\n    if not dynamic_summary:\n        return []\n\n    current_length = 0\n    processed_summary: list[str] = []\n    for summary_section in dynamic_summary.split(\"<sep />\"):\n        # if we're past the desired max length, break at the last word\n        if current_length + len(summary_section) >= max_summary_length:\n            summary_section = summary_section[: max_summary_length - current_length]\n            summary_section = summary_section.lstrip()  # remove any leading whitespace\n\n            # handle the case where the truncated section is either just a\n            # single (partial) word or if it's empty\n            first_space = summary_section.find(\" \")\n            if first_space == -1:\n                # add ``...`` to previous section\n                if processed_summary:\n                    processed_summary[-1] += \"...\"\n                break\n\n            # handle the valid truncated section case\n            summary_section = summary_section.rsplit(\" \", 1)[0]\n            if summary_section[-1] in string.punctuation:\n                summary_section = summary_section[:-1]\n            summary_section += \"...\"\n            processed_summary.append(summary_section)\n            break\n\n        processed_summary.append(summary_section)\n        current_length += len(summary_section)\n\n    return processed_summary\n\n\ndef _vespa_hit_to_inference_chunk(\n    hit: dict[str, Any], null_score: bool = False\n) -> InferenceChunkUncleaned:\n    fields = cast(dict[str, Any], hit[\"fields\"])\n\n    # parse fields that are stored as strings, but are really json / datetime\n    metadata = json.loads(fields[METADATA]) if METADATA in fields else {}\n    updated_at = (\n        datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc)\n        if DOC_UPDATED_AT in fields\n        else None\n    )\n\n    match_highlights = _process_dynamic_summary(\n        # fallback to regular `content` if the `content_summary` field\n        # isn't present\n        dynamic_summary=hit[\"fields\"].get(CONTENT_SUMMARY, hit[\"fields\"][CONTENT]),\n    )\n    semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, \"\")\n    if not semantic_identifier:\n        logger.error(\n            f\"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier\"\n        )\n\n    source_links = fields.get(SOURCE_LINKS, {})\n    source_links_dict_unprocessed = (\n        json.loads(source_links) if isinstance(source_links, str) else source_links\n    )\n    source_links_dict = {\n        int(k): v\n        for k, v in cast(dict[str, str], source_links_dict_unprocessed).items()\n    }\n\n    return InferenceChunkUncleaned(\n        chunk_id=fields[CHUNK_ID],\n        blurb=fields.get(BLURB, \"\"),  # Unused\n        content=fields[CONTENT],  # Includes extra title prefix and metadata suffix;\n        # also sometimes context for contextual rag\n        source_links=source_links_dict or {0: \"\"},\n        section_continuation=fields[SECTION_CONTINUATION],\n        document_id=fields[DOCUMENT_ID],\n        source_type=fields[SOURCE_TYPE],\n        # still called `image_file_name` in Vespa for backwards compatibility\n        image_file_id=fields.get(IMAGE_FILE_NAME),\n        title=fields.get(TITLE),\n        semantic_identifier=fields[SEMANTIC_IDENTIFIER],\n        boost=fields.get(BOOST, 1),\n        score=None if null_score else hit.get(\"relevance\", 0),\n        hidden=fields.get(HIDDEN, False),\n        primary_owners=fields.get(PRIMARY_OWNERS),\n        secondary_owners=fields.get(SECONDARY_OWNERS),\n        large_chunk_reference_ids=fields.get(LARGE_CHUNK_REFERENCE_IDS, []),\n        metadata=metadata,\n        metadata_suffix=fields.get(METADATA_SUFFIX),\n        doc_summary=fields.get(DOC_SUMMARY, \"\"),\n        chunk_context=fields.get(CHUNK_CONTEXT, \"\"),\n        match_highlights=match_highlights,\n        updated_at=updated_at,\n    )\n\n\ndef get_chunks_via_visit_api(\n    chunk_request: VespaChunkRequest,\n    index_name: str,\n    filters: IndexFilters,\n    field_names: list[str] | None = None,\n    get_large_chunks: bool = False,\n    short_tensor_format: bool = False,\n) -> list[dict]:\n    # Constructing the URL for the Visit API\n    # NOTE: visit API uses the same URL as the document API, but with different params\n    url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n\n    # build the list of fields to retrieve\n    field_set_list = (\n        [f\"{field_name}\" for field_name in field_names] if field_names else []\n    )\n    acl_fieldset_entry = f\"{ACCESS_CONTROL_LIST}\"\n    if (\n        field_set_list\n        and filters.access_control_list\n        and acl_fieldset_entry not in field_set_list\n    ):\n        field_set_list.append(acl_fieldset_entry)\n\n    if MULTI_TENANT:\n        tenant_id_fieldset_entry = f\"{TENANT_ID}\"\n        if field_set_list and tenant_id_fieldset_entry not in field_set_list:\n            field_set_list.append(tenant_id_fieldset_entry)\n\n    if field_set_list:\n        field_set = f\"{index_name}:\" + \",\".join(field_set_list)\n    else:\n        field_set = None\n\n    # build filters\n    selection = f\"{index_name}.document_id=='{chunk_request.document_id}'\"\n\n    if chunk_request.is_capped:\n        selection += f\" and {index_name}.chunk_id>={chunk_request.min_chunk_ind or 0}\"\n        selection += f\" and {index_name}.chunk_id<={chunk_request.max_chunk_ind}\"\n    if not get_large_chunks:\n        selection += f\" and {index_name}.large_chunk_reference_ids == null\"\n\n    # enforcing tenant_id through a == condition\n    if MULTI_TENANT:\n        if filters.tenant_id:\n            selection += f\" and {index_name}.tenant_id=='{filters.tenant_id}'\"\n        else:\n            raise ValueError(\"Tenant ID is required for multi-tenant\")\n\n    # Setting up the selection criteria in the query parameters\n    params = {\n        # NOTE: Document Selector Language doesn't allow `contains`, so we can't check\n        # for the ACL in the selection. Instead, we have to check as a postfilter\n        \"selection\": selection,\n        \"continuation\": None,\n        \"wantedDocumentCount\": 1_000,\n        \"fieldSet\": field_set,\n    }\n    # Vespa can supply tensors in various different formats. This explicitly\n    # asks to retrieve tensor data in \"short-value\" format.\n    if short_tensor_format:\n        params[\"format.tensors\"] = \"short-value\"\n\n    document_chunks: list[dict] = []\n    while True:\n        try:\n            filtered_params = {k: v for k, v in params.items() if v is not None}\n            with get_vespa_http_client() as http_client:\n                response = http_client.get(url, params=filtered_params)\n                response.raise_for_status()\n        except httpx.HTTPError as e:\n            error_base = \"Failed to query Vespa\"\n            logger.error(\n                f\"{error_base}:\\n\"\n                f\"Request URL: {e.request.url}\\n\"\n                f\"Request Headers: {e.request.headers}\\n\"\n                f\"Request Payload: {params}\\n\"\n                f\"Exception: {str(e)}\"\n            )\n            raise httpx.HTTPError(error_base) from e\n\n        # Check if the response contains any documents\n        response_data = response.json()\n\n        if \"documents\" in response_data:\n            for document in response_data[\"documents\"]:\n                if filters.access_control_list:\n                    document_acl = document[\"fields\"].get(ACCESS_CONTROL_LIST)\n                    if not document_acl or not any(\n                        user_acl_entry in document_acl\n                        for user_acl_entry in filters.access_control_list\n                    ):\n                        continue\n\n                if MULTI_TENANT:\n                    if not filters.tenant_id:\n                        raise ValueError(\"Tenant ID is required for multi-tenant\")\n                    document_tenant_id = document[\"fields\"].get(TENANT_ID)\n                    if document_tenant_id != filters.tenant_id:\n                        logger.error(\n                            f\"Skipping document {document['document_id']} because \"\n                            f\"it does not belong to tenant {filters.tenant_id}. \"\n                            \"This should never happen.\"\n                        )\n                        continue\n\n                document_chunks.append(document)\n\n        # Check for continuation token to handle pagination\n        if \"continuation\" in response_data and response_data[\"continuation\"]:\n            params[\"continuation\"] = response_data[\"continuation\"]\n        else:\n            break  # Exit loop if no continuation token\n\n    return document_chunks\n\n\ndef get_all_chunks_paginated(\n    index_name: str,\n    tenant_state: TenantState,\n    continuation_token_map: dict[int, str | None],\n    page_size: int,\n) -> tuple[list[dict], dict[int, str | None]]:\n    \"\"\"Gets all chunks in Vespa matching the filters, paginated.\n\n    Uses the Visit API with slicing. Each continuation token map entry is for a\n    different slice. The number of entries determines the number of slices.\n\n    Args:\n        index_name: The name of the Vespa index to visit.\n        tenant_state: The tenant state to filter by.\n        continuation_token_map: Map of slice ID to a token returned by Vespa\n            representing a page offset. None to start from the beginning of the\n            slice.\n        page_size: Best-effort batch size for the visit. Defaults to 1,000.\n\n    Returns:\n        Tuple of (list of chunk dicts, next continuation token or None). The\n            continuation token is None when the visit is complete.\n    \"\"\"\n\n    def _get_all_chunks_paginated_for_slice(\n        index_name: str,\n        tenant_state: TenantState,\n        slice_id: int,\n        total_slices: int,\n        continuation_token: str | None,\n        page_size: int,\n    ) -> tuple[list[dict], str | None]:\n        if continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN:\n            logger.debug(\n                f\"Slice {slice_id} has finished visiting. Returning empty list and {FINISHED_VISITING_SLICE_CONTINUATION_TOKEN}.\"\n            )\n            return [], FINISHED_VISITING_SLICE_CONTINUATION_TOKEN\n\n        url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n\n        selection: str = f\"{index_name}.large_chunk_reference_ids == null\"\n        if MULTI_TENANT:\n            selection += f\" and {index_name}.tenant_id=='{tenant_state.tenant_id}'\"\n\n        field_set = f\"{index_name}:\" + \",\".join(FIELDS_NEEDED_FOR_TRANSFORMATION)\n\n        params: dict[str, str | int | None] = {\n            \"selection\": selection,\n            \"fieldSet\": field_set,\n            \"wantedDocumentCount\": page_size,\n            \"format.tensors\": \"short-value\",\n            \"slices\": total_slices,\n            \"sliceId\": slice_id,\n            # When exceeded, Vespa should return gracefully with partial\n            # results. Even if no hits are returned, Vespa should still return a\n            # new continuation token representing a new spot in the linear\n            # traversal.\n            \"timeout\": VESPA_MIGRATION_SERVER_SIDE_REQUEST_TIMEOUT,\n        }\n        if continuation_token is not None:\n            params[\"continuation\"] = continuation_token\n\n        response: httpx.Response | None = None\n        start_time = time.monotonic()\n        try:\n            with get_vespa_http_client(\n                # When exceeded, an exception is raised in our code. No progress\n                # is saved, and the task will retry this spot in the traversal\n                # later.\n                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S\n            ) as http_client:\n                response = http_client.get(url, params=params)\n                response.raise_for_status()\n        except httpx.HTTPError as e:\n            error_base = (\n                f\"Failed to get chunks from Vespa slice {slice_id} with continuation token \"\n                f\"{continuation_token} in {time.monotonic() - start_time:.3f} seconds.\"\n            )\n            logger.exception(\n                f\"Request URL: {e.request.url}\\nRequest Headers: {e.request.headers}\\nRequest Payload: {params}\\n\"\n            )\n            error_message = (\n                response.json().get(\"message\") if response else \"No response\"\n            )\n            logger.error(\"Error message from response: %s\", error_message)\n            raise httpx.HTTPError(error_base) from e\n\n        response_data = response.json()\n\n        # NOTE: If we see a falsey value for \"continuation\" in the response we\n        # assume we are done and return\n        # FINISHED_VISITING_SLICE_CONTINUATION_TOKEN instead.\n        next_continuation_token = (\n            response_data.get(\"continuation\")\n            or FINISHED_VISITING_SLICE_CONTINUATION_TOKEN\n        )\n        chunks = [chunk[\"fields\"] for chunk in response_data.get(\"documents\", [])]\n        if next_continuation_token == FINISHED_VISITING_SLICE_CONTINUATION_TOKEN:\n            logger.debug(\n                f\"Slice {slice_id} has finished visiting. Returning {len(chunks)} chunks and {next_continuation_token}.\"\n            )\n        return chunks, next_continuation_token\n\n    total_slices = len(continuation_token_map)\n    if total_slices < 1:\n        raise ValueError(\"continuation_token_map must have at least one entry.\")\n    # We want to guarantee that these invocations are ordered by slice_id,\n    # because we read in the same order below when parsing parallel_results.\n    functions_with_args: list[tuple[Callable, tuple]] = [\n        (\n            _get_all_chunks_paginated_for_slice,\n            (\n                index_name,\n                tenant_state,\n                slice_id,\n                total_slices,\n                continuation_token,\n                page_size,\n            ),\n        )\n        for slice_id, continuation_token in sorted(continuation_token_map.items())\n    ]\n\n    parallel_results = run_functions_tuples_in_parallel(\n        functions_with_args, allow_failures=True\n    )\n    if len(parallel_results) != total_slices:\n        raise RuntimeError(\n            f\"Expected {total_slices} parallel results, but got {len(parallel_results)}.\"\n        )\n\n    chunks: list[dict] = []\n    next_continuation_token_map: dict[int, str | None] = {\n        key: value for key, value in continuation_token_map.items()\n    }\n    for i, parallel_result in enumerate(parallel_results):\n        if i not in next_continuation_token_map:\n            raise RuntimeError(f\"Slice {i} is not in the continuation token map.\")\n        if parallel_result is None:\n            logger.error(\n                f\"Failed to get chunks for slice {i} of {total_slices}. \"\n                \"The continuation token for this slice will not be updated.\"\n            )\n            continue\n        chunks.extend(parallel_result[0])\n        next_continuation_token_map[i] = parallel_result[1]\n\n    return chunks, next_continuation_token_map\n\n\n# TODO(rkuo): candidate for removal if not being used\n# @retry(tries=10, delay=1, backoff=2)\n# def get_all_vespa_ids_for_document_id(\n#     document_id: str,\n#     index_name: str,\n#     filters: IndexFilters | None = None,\n#     get_large_chunks: bool = False,\n# ) -> list[str]:\n#     document_chunks = get_chunks_via_visit_api(\n#         chunk_request=VespaChunkRequest(document_id=document_id),\n#         index_name=index_name,\n#         filters=filters or IndexFilters(access_control_list=None),\n#         field_names=[DOCUMENT_ID],\n#         get_large_chunks=get_large_chunks,\n#     )\n#     return [chunk[\"id\"].split(\"::\", 1)[-1] for chunk in document_chunks]\n\n\ndef parallel_visit_api_retrieval(\n    index_name: str,\n    chunk_requests: list[VespaChunkRequest],\n    filters: IndexFilters,\n    get_large_chunks: bool = False,\n) -> list[InferenceChunkUncleaned]:\n    functions_with_args: list[tuple[Callable, tuple]] = [\n        (\n            get_chunks_via_visit_api,\n            (chunk_request, index_name, filters, get_large_chunks),\n        )\n        for chunk_request in chunk_requests\n    ]\n\n    parallel_results = run_functions_tuples_in_parallel(\n        functions_with_args, allow_failures=True\n    )\n\n    # Any failures to retrieve would give a None, drop the Nones and empty lists\n    vespa_chunk_sets = [res for res in parallel_results if res]\n\n    flattened_vespa_chunks = []\n    for chunk_set in vespa_chunk_sets:\n        flattened_vespa_chunks.extend(chunk_set)\n\n    inference_chunks = [\n        _vespa_hit_to_inference_chunk(chunk, null_score=True)\n        for chunk in flattened_vespa_chunks\n    ]\n\n    return inference_chunks\n\n\n@retry(tries=3, delay=1, backoff=2)\ndef query_vespa(\n    query_params: Mapping[str, str | int | float],\n) -> list[InferenceChunkUncleaned]:\n    if \"query\" in query_params and not cast(str, query_params[\"query\"]).strip():\n        raise ValueError(\"No/empty query received\")\n\n    params = dict(\n        **query_params,\n        **(\n            {\n                \"presentation.timing\": True,\n            }\n            if LOG_VESPA_TIMING_INFORMATION\n            else {}\n        ),\n    )\n\n    if VESPA_LANGUAGE_OVERRIDE:\n        params[\"language\"] = VESPA_LANGUAGE_OVERRIDE\n\n    try:\n        with get_vespa_http_client() as http_client:\n            response = http_client.post(SEARCH_ENDPOINT, json=params)\n            response.raise_for_status()\n    except httpx.HTTPError as e:\n        response_text = (\n            e.response.text if isinstance(e, httpx.HTTPStatusError) else None\n        )\n        status_code = (\n            e.response.status_code if isinstance(e, httpx.HTTPStatusError) else None\n        )\n        yql_value = params.get(\"yql\", \"\")\n        yql_length = len(str(yql_value))\n\n        # Log each detail on its own line so log collectors capture them\n        # as separate entries rather than truncating a single multiline msg\n        logger.error(\n            f\"Failed to query Vespa | \"\n            f\"status={status_code} | \"\n            f\"yql_length={yql_length} | \"\n            f\"exception={str(e)}\"\n        )\n        if response_text:\n            logger.error(f\"Vespa error response: {response_text[:1000]}\")\n        logger.error(f\"Vespa request URL: {e.request.url}\")\n\n        # Re-raise with diagnostics so callers see what actually went wrong\n        raise httpx.HTTPError(\n            f\"Failed to query Vespa (status={status_code}, \" f\"yql_length={yql_length})\"\n        ) from e\n\n    response_json: dict[str, Any] = response.json()\n\n    if LOG_VESPA_TIMING_INFORMATION:\n        logger.debug(\"Vespa timing info: %s\", response_json.get(\"timing\"))\n    hits = response_json[\"root\"].get(\"children\", [])\n\n    if not hits:\n        logger.warning(\n            f\"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}\"\n        )\n        logger.debug(f\"Vespa Response: {response.text}\")\n\n    for hit in hits:\n        if hit[\"fields\"].get(CONTENT) is None:\n            identifier = hit[\"fields\"].get(\"documentid\") or hit[\"id\"]\n            logger.error(\n                f\"Vespa Index with Vespa ID {identifier} has no contents. \"\n                f\"This is invalid because the vector is not meaningful and keywordsearch cannot \"\n                f\"fetch this document\"\n            )\n\n    filtered_hits = [hit for hit in hits if hit[\"fields\"].get(CONTENT) is not None]\n\n    inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]\n\n    try:\n        num_retrieved_inference_chunks = len(inference_chunks)\n        num_retrieved_document_ids = len(\n            set([chunk.document_id for chunk in inference_chunks])\n        )\n        logger.info(\n            f\"Retrieved {num_retrieved_inference_chunks} inference chunks for {num_retrieved_document_ids} documents\"\n        )\n    except Exception as e:\n        # Debug logging only, should not fail the retrieval\n        logger.error(f\"Error logging retrieval statistics: {e}\")\n\n    # Good Debugging Spot\n    return inference_chunks\n\n\ndef _get_chunks_via_batch_search(\n    index_name: str,\n    chunk_requests: list[VespaChunkRequest],\n    filters: IndexFilters,\n    get_large_chunks: bool = False,\n) -> list[InferenceChunkUncleaned]:\n    if not chunk_requests:\n        return []\n\n    filters_str = build_vespa_filters(filters=filters, include_hidden=True)\n\n    yql = (\n        YQL_BASE.format(index_name=index_name)\n        + filters_str\n        + build_vespa_id_based_retrieval_yql(chunk_requests[0])\n    )\n    chunk_requests.pop(0)\n\n    for request in chunk_requests:\n        yql += \" or \" + build_vespa_id_based_retrieval_yql(request)\n    params: dict[str, str | int | float] = {\n        \"yql\": yql,\n        \"hits\": MAX_ID_SEARCH_QUERY_SIZE,\n    }\n\n    inference_chunks = query_vespa(params)\n    if not get_large_chunks:\n        inference_chunks = [\n            chunk for chunk in inference_chunks if not chunk.large_chunk_reference_ids\n        ]\n    inference_chunks.sort(key=lambda chunk: chunk.chunk_id)\n    return inference_chunks\n\n\ndef batch_search_api_retrieval(\n    index_name: str,\n    chunk_requests: list[VespaChunkRequest],\n    filters: IndexFilters,\n    get_large_chunks: bool = False,\n) -> list[InferenceChunkUncleaned]:\n    retrieved_chunks: list[InferenceChunkUncleaned] = []\n    capped_requests: list[VespaChunkRequest] = []\n    uncapped_requests: list[VespaChunkRequest] = []\n    chunk_count = 0\n    for req_ind, request in enumerate(chunk_requests, start=1):\n        # All requests without a chunk range are uncapped\n        # Uncapped requests are retrieved using the Visit API\n        range = request.range\n        if range is None:\n            uncapped_requests.append(request)\n            continue\n\n        if (\n            chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE\n            or req_ind % MAX_OR_CONDITIONS == 0\n        ):\n            retrieved_chunks.extend(\n                _get_chunks_via_batch_search(\n                    index_name=index_name,\n                    chunk_requests=capped_requests,\n                    filters=filters,\n                    get_large_chunks=get_large_chunks,\n                )\n            )\n            capped_requests = []\n            chunk_count = 0\n        capped_requests.append(request)\n        chunk_count += range\n\n    if capped_requests:\n        retrieved_chunks.extend(\n            _get_chunks_via_batch_search(\n                index_name=index_name,\n                chunk_requests=capped_requests,\n                filters=filters,\n                get_large_chunks=get_large_chunks,\n            )\n        )\n\n    if uncapped_requests:\n        logger.debug(f\"Retrieving {len(uncapped_requests)} uncapped requests\")\n        retrieved_chunks.extend(\n            parallel_visit_api_retrieval(\n                index_name, uncapped_requests, filters, get_large_chunks\n            )\n        )\n\n    return retrieved_chunks\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/deletion.py",
    "content": "import concurrent.futures\nfrom uuid import UUID\n\nimport httpx\nfrom retry import retry\n\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa_constants import NUM_THREADS\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nCONTENT_SUMMARY = \"content_summary\"\n\n\n@retry(tries=10, delay=1, backoff=2)\ndef _retryable_http_delete(http_client: httpx.Client, url: str) -> None:\n    res = http_client.delete(url)\n    res.raise_for_status()\n\n\ndef _delete_vespa_chunk(\n    doc_chunk_id: UUID, index_name: str, http_client: httpx.Client\n) -> None:\n    try:\n        _retryable_http_delete(\n            http_client,\n            f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}\",\n        )\n    except httpx.HTTPStatusError as e:\n        logger.error(f\"Failed to delete chunk, details: {e.response.text}\")\n        raise\n\n\ndef delete_vespa_chunks(\n    doc_chunk_ids: list[UUID],\n    index_name: str,\n    http_client: httpx.Client,\n    executor: concurrent.futures.ThreadPoolExecutor | None = None,\n) -> None:\n    \"\"\"Deletes a list of chunks from a Vespa index in parallel.\n\n    Args:\n        doc_chunk_ids: List of chunk IDs to delete.\n        index_name: Name of the index to delete from.\n        http_client: HTTP client to use for the request.\n        executor: Executor to use for the request.\n    \"\"\"\n    external_executor = True\n\n    if not executor:\n        external_executor = False\n        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)\n\n    try:\n        chunk_deletion_future = {\n            executor.submit(\n                _delete_vespa_chunk, doc_chunk_id, index_name, http_client\n            ): doc_chunk_id\n            for doc_chunk_id in doc_chunk_ids\n        }\n        for future in concurrent.futures.as_completed(chunk_deletion_future):\n            # Will raise exception if the deletion raised an exception\n            future.result()\n\n    finally:\n        if not external_executor:\n            executor.shutdown(wait=True)\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/index.py",
    "content": "import concurrent.futures\nimport io\nimport logging\nimport os\nimport re\nimport time\nimport urllib\nimport zipfile\nfrom collections.abc import Iterable\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom typing import BinaryIO\nfrom typing import cast\nfrom typing import List\n\nimport httpx\nimport jinja2\nimport requests\nfrom pydantic import BaseModel\nfrom retry import retry\n\nfrom onyx.configs.app_configs import BLURB_SIZE\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.configs.chat_configs import TITLE_CONTENT_RATIO\nfrom onyx.configs.chat_configs import VESPA_SEARCHER_THREADS\nfrom onyx.configs.constants import KV_REINDEX_KEY\nfrom onyx.configs.constants import RETURN_SEPARATOR\nfrom onyx.context.search.enums import QueryType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceChunkUncleaned\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.document_index_utils import get_uuid_from_chunk_info\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import (\n    DocumentInsertionRecord as OldDocumentInsertionRecord,\n)\nfrom onyx.document_index.interfaces import EnrichedDocumentIndexingInfo\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.document_index.interfaces import MinimalDocumentIndexingInfo\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces import VespaDocumentFields\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.document_index.interfaces_new import DocumentSectionRequest\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.interfaces_new import MetadataUpdateRequest\nfrom onyx.document_index.vespa.chunk_retrieval import query_vespa\nfrom onyx.document_index.vespa.indexing_utils import BaseHTTPXClientContext\nfrom onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence\nfrom onyx.document_index.vespa.indexing_utils import GlobalHTTPXClientContext\nfrom onyx.document_index.vespa.indexing_utils import TemporaryHTTPXClientContext\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.shared_utils.vespa_request_builders import (\n    build_vespa_filters,\n)\nfrom onyx.document_index.vespa.vespa_document_index import TenantState\nfrom onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex\nfrom onyx.document_index.vespa_constants import BATCH_SIZE\nfrom onyx.document_index.vespa_constants import CONTENT_SUMMARY\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa_constants import NUM_THREADS\nfrom onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT\nfrom onyx.document_index.vespa_constants import VESPA_TIMEOUT\nfrom onyx.document_index.vespa_constants import YQL_BASE\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.key_value_store.factory import get_shared_kv_store\nfrom onyx.kg.utils.formatting_utils import split_relationship_id\nfrom onyx.utils.batching import batch_generator\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom shared_configs.model_server_models import Embedding\n\nlogger = setup_logger()\n\n# Set the logging level to WARNING to ignore INFO and DEBUG logs\nhttpx_logger = logging.getLogger(\"httpx\")\nhttpx_logger.setLevel(logging.WARNING)\n\n\n@dataclass\nclass _VespaUpdateRequest:\n    document_id: str\n    url: str\n    update_request: dict[str, dict]\n\n\nclass KGVespaChunkUpdateRequest(BaseModel):\n    document_id: str\n    chunk_id: int\n    url: str\n    update_request: dict[str, dict]\n\n\nclass KGUChunkUpdateRequest(BaseModel):\n    \"\"\"\n    Update KG fields for a document\n    \"\"\"\n\n    document_id: str\n    chunk_id: int\n    core_entity: str\n    entities: set[str] | None = None\n    relationships: set[str] | None = None\n    terms: set[str] | None = None\n\n\nclass KGUDocumentUpdateRequest(BaseModel):\n    \"\"\"\n    Update KG fields for a document\n    \"\"\"\n\n    document_id: str\n    entities: set[str]\n    relationships: set[str]\n    terms: set[str]\n\n\ndef generate_kg_update_request(\n    kg_update_request: KGUChunkUpdateRequest,\n) -> dict[str, dict]:\n    kg_update_dict: dict[str, dict] = {}\n\n    if kg_update_request.entities is not None:\n        kg_update_dict[\"kg_entities\"] = {\"assign\": list(kg_update_request.entities)}\n\n    if kg_update_request.relationships is not None:\n        kg_update_dict[\"kg_relationships\"] = {\"assign\": []}\n        for relationship in kg_update_request.relationships:\n            source, rel_type, target = split_relationship_id(relationship)\n            kg_update_dict[\"kg_relationships\"][\"assign\"].append(\n                {\n                    \"source\": source,\n                    \"rel_type\": rel_type,\n                    \"target\": target,\n                }\n            )\n\n    return kg_update_dict\n\n\ndef in_memory_zip_from_file_bytes(file_contents: dict[str, bytes]) -> BinaryIO:\n    zip_buffer = io.BytesIO()\n    with zipfile.ZipFile(zip_buffer, \"w\", zipfile.ZIP_DEFLATED) as zipf:\n        for filename, content in file_contents.items():\n            zipf.writestr(filename, content)\n    zip_buffer.seek(0)\n    return zip_buffer\n\n\ndef _create_document_xml_lines(doc_names: list[str | None] | list[str]) -> str:\n    doc_lines = [\n        f'<document type=\"{doc_name}\" mode=\"index\" />'\n        for doc_name in doc_names\n        if doc_name\n    ]\n    return \"\\n\".join(doc_lines)\n\n\ndef add_ngrams_to_schema(schema_content: str) -> str:\n    # Add the match blocks containing gram and gram-size to title and content fields\n    schema_content = re.sub(\n        r\"(field title type string \\{[^}]*indexing: summary \\| index \\| attribute)\",\n        r\"\\1\\n            match {\\n                gram\\n                gram-size: 3\\n            }\",\n        schema_content,\n    )\n    schema_content = re.sub(\n        r\"(field content type string \\{[^}]*indexing: summary \\| index)\",\n        r\"\\1\\n            match {\\n                gram\\n                gram-size: 3\\n            }\",\n        schema_content,\n    )\n    return schema_content\n\n\ndef cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]:\n    def _remove_title(chunk: InferenceChunkUncleaned) -> str:\n        if not chunk.title or not chunk.content:\n            return chunk.content\n\n        if chunk.content.startswith(chunk.title):\n            return chunk.content[len(chunk.title) :].lstrip()\n\n        # BLURB SIZE is by token instead of char but each token is at least 1 char\n        # If this prefix matches the content, it's assumed the title was prepended\n        if chunk.content.startswith(chunk.title[:BLURB_SIZE]):\n            return (\n                chunk.content.split(RETURN_SEPARATOR, 1)[-1]\n                if RETURN_SEPARATOR in chunk.content\n                else chunk.content\n            )\n\n        return chunk.content\n\n    def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:\n        if not chunk.metadata_suffix:\n            return chunk.content\n        return chunk.content.removesuffix(chunk.metadata_suffix).rstrip(\n            RETURN_SEPARATOR\n        )\n\n    def _remove_contextual_rag(chunk: InferenceChunkUncleaned) -> str:\n        # remove document summary\n        if chunk.content.startswith(chunk.doc_summary):\n            chunk.content = chunk.content[len(chunk.doc_summary) :].lstrip()\n        # remove chunk context\n        if chunk.content.endswith(chunk.chunk_context):\n            chunk.content = chunk.content[\n                : len(chunk.content) - len(chunk.chunk_context)\n            ].rstrip()\n        return chunk.content\n\n    for chunk in chunks:\n        chunk.content = _remove_title(chunk)\n        chunk.content = _remove_metadata_suffix(chunk)\n        chunk.content = _remove_contextual_rag(chunk)\n\n    return [chunk.to_inference_chunk() for chunk in chunks]\n\n\nclass VespaIndex(DocumentIndex):\n    VESPA_SCHEMA_JINJA_FILENAME = \"danswer_chunk.sd.jinja\"\n\n    def __init__(\n        self,\n        index_name: str,\n        secondary_index_name: str | None,\n        large_chunks_enabled: bool,\n        secondary_large_chunks_enabled: bool | None,\n        multitenant: bool = False,\n        httpx_client: httpx.Client | None = None,\n    ) -> None:\n        self.index_name = index_name\n        self.secondary_index_name = secondary_index_name\n\n        self.large_chunks_enabled = large_chunks_enabled\n        self.secondary_large_chunks_enabled = secondary_large_chunks_enabled\n\n        self.multitenant = multitenant\n\n        # Temporary until we refactor the entirety of this class.\n        self.httpx_client = httpx_client\n\n        self.httpx_client_context: BaseHTTPXClientContext\n        if httpx_client:\n            self.httpx_client_context = GlobalHTTPXClientContext(httpx_client)\n        else:\n            self.httpx_client_context = TemporaryHTTPXClientContext(\n                get_vespa_http_client\n            )\n\n        self.index_to_large_chunks_enabled: dict[str, bool] = {}\n        self.index_to_large_chunks_enabled[index_name] = large_chunks_enabled\n        if secondary_index_name and secondary_large_chunks_enabled:\n            self.index_to_large_chunks_enabled[secondary_index_name] = (\n                secondary_large_chunks_enabled\n            )\n\n    def ensure_indices_exist(\n        self,\n        primary_embedding_dim: int,\n        primary_embedding_precision: EmbeddingPrecision,\n        secondary_index_embedding_dim: int | None,\n        secondary_index_embedding_precision: EmbeddingPrecision | None,\n    ) -> None:\n        if MULTI_TENANT:\n            logger.info(\n                \"Skipping Vespa index setup for multitenant (would wipe all indices)\"\n            )\n            return None\n\n        jinja_env = jinja2.Environment()\n\n        deploy_url = f\"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate\"\n        logger.notice(f\"Deploying Vespa application package to {deploy_url}\")\n\n        vespa_schema_path = os.path.join(\n            os.getcwd(), \"onyx\", \"document_index\", \"vespa\", \"app_config\"\n        )\n        schema_jinja_file = os.path.join(\n            vespa_schema_path, \"schemas\", VespaIndex.VESPA_SCHEMA_JINJA_FILENAME\n        )\n        services_jinja_file = os.path.join(vespa_schema_path, \"services.xml.jinja\")\n        overrides_jinja_file = os.path.join(\n            vespa_schema_path, \"validation-overrides.xml.jinja\"\n        )\n\n        with open(services_jinja_file, \"r\") as services_f:\n            schema_names = [self.index_name, self.secondary_index_name]\n            doc_lines = _create_document_xml_lines(schema_names)\n\n            services_template_str = services_f.read()\n            services_template = jinja_env.from_string(services_template_str)\n            services = services_template.render(\n                document_elements=doc_lines,\n                num_search_threads=str(VESPA_SEARCHER_THREADS),\n            )\n\n        kv_store = get_shared_kv_store()\n\n        needs_reindexing = False\n        try:\n            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))\n        except Exception:\n            logger.debug(\"Could not load the reindexing flag. Using ngrams\")\n\n        # Vespa requires an override to erase data including the indices we're no longer using\n        # It also has a 30 day cap from current so we set it to 7 dynamically\n        with open(overrides_jinja_file, \"r\") as overrides_f:\n            overrides_template_str = overrides_f.read()\n            overrides_template = jinja_env.from_string(overrides_template_str)\n\n            now = datetime.now()\n            date_in_7_days = now + timedelta(days=7)\n            formatted_date = date_in_7_days.strftime(\"%Y-%m-%d\")\n            overrides = overrides_template.render(\n                until_date=formatted_date,\n            )\n\n        zip_dict = {\n            \"services.xml\": services.encode(\"utf-8\"),\n            \"validation-overrides.xml\": overrides.encode(\"utf-8\"),\n        }\n\n        with open(schema_jinja_file, \"r\") as schema_f:\n            template_str = schema_f.read()\n\n        template = jinja_env.from_string(template_str)\n        schema = template.render(\n            multi_tenant=MULTI_TENANT,\n            schema_name=self.index_name,\n            dim=primary_embedding_dim,\n            embedding_precision=primary_embedding_precision.value,\n        )\n\n        schema = add_ngrams_to_schema(schema) if needs_reindexing else schema\n        zip_dict[f\"schemas/{schema_names[0]}.sd\"] = schema.encode(\"utf-8\")\n\n        if self.secondary_index_name:\n            if secondary_index_embedding_dim is None:\n                raise ValueError(\"Secondary index embedding dimension is required\")\n            if secondary_index_embedding_precision is None:\n                raise ValueError(\"Secondary index embedding precision is required\")\n\n            upcoming_schema = template.render(\n                multi_tenant=MULTI_TENANT,\n                schema_name=self.secondary_index_name,\n                dim=secondary_index_embedding_dim,\n                embedding_precision=secondary_index_embedding_precision.value,\n            )\n\n            zip_dict[f\"schemas/{schema_names[1]}.sd\"] = upcoming_schema.encode(\"utf-8\")\n\n        zip_file = in_memory_zip_from_file_bytes(zip_dict)\n\n        headers = {\"Content-Type\": \"application/zip\"}\n        response = requests.post(deploy_url, headers=headers, data=zip_file)\n        if response.status_code != 200:\n            logger.error(\n                f\"Failed to prepare Vespa Onyx Index. Response: {response.text}\"\n            )\n            raise RuntimeError(\n                f\"Failed to prepare Vespa Onyx Index. Response: {response.text}\"\n            )\n\n    @staticmethod\n    def register_multitenant_indices(\n        indices: list[str],\n        embedding_dims: list[int],\n        embedding_precisions: list[EmbeddingPrecision],\n    ) -> None:\n        if not MULTI_TENANT:\n            raise ValueError(\"Multi-tenant is not enabled\")\n\n        deploy_url = f\"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate\"\n        logger.info(f\"Deploying Vespa application package to {deploy_url}\")\n\n        vespa_schema_path = os.path.join(\n            os.getcwd(), \"onyx\", \"document_index\", \"vespa\", \"app_config\"\n        )\n        schema_jinja_file = os.path.join(\n            vespa_schema_path, \"schemas\", VespaIndex.VESPA_SCHEMA_JINJA_FILENAME\n        )\n        services_jinja_file = os.path.join(vespa_schema_path, \"services.xml.jinja\")\n        overrides_jinja_file = os.path.join(\n            vespa_schema_path, \"validation-overrides.xml.jinja\"\n        )\n\n        jinja_env = jinja2.Environment()\n\n        # Generate schema names from index settings\n        with open(services_jinja_file, \"r\") as services_f:\n            schema_names = [index_name for index_name in indices]\n            doc_lines = _create_document_xml_lines(schema_names)\n\n            services_template_str = services_f.read()\n            services_template = jinja_env.from_string(services_template_str)\n            services = services_template.render(\n                document_elements=doc_lines,\n                num_search_threads=str(VESPA_SEARCHER_THREADS),\n            )\n\n        kv_store = get_shared_kv_store()\n\n        needs_reindexing = False\n        try:\n            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))\n        except Exception:\n            logger.debug(\"Could not load the reindexing flag. Using ngrams\")\n\n        # Vespa requires an override to erase data including the indices we're no longer using\n        # It also has a 30 day cap from current so we set it to 7 dynamically\n        with open(overrides_jinja_file, \"r\") as overrides_f:\n            overrides_template_str = overrides_f.read()\n            overrides_template = jinja_env.from_string(overrides_template_str)\n\n            now = datetime.now()\n            date_in_7_days = now + timedelta(days=7)\n            formatted_date = date_in_7_days.strftime(\"%Y-%m-%d\")\n            overrides = overrides_template.render(\n                until_date=formatted_date,\n            )\n\n        zip_dict = {\n            \"services.xml\": services.encode(\"utf-8\"),\n            \"validation-overrides.xml\": overrides.encode(\"utf-8\"),\n        }\n\n        with open(schema_jinja_file, \"r\") as schema_f:\n            schema_template_str = schema_f.read()\n\n        schema_template = jinja_env.from_string(schema_template_str)\n\n        for i, index_name in enumerate(indices):\n            embedding_dim = embedding_dims[i]\n            embedding_precision = embedding_precisions[i]\n            logger.info(\n                f\"Creating index: {index_name} with embedding dimension: {embedding_dim}\"\n            )\n\n            schema = schema_template.render(\n                multi_tenant=MULTI_TENANT,\n                schema_name=index_name,\n                dim=embedding_dim,\n                embedding_precision=embedding_precision.value,\n            )\n\n            schema = add_ngrams_to_schema(schema) if needs_reindexing else schema\n            zip_dict[f\"schemas/{index_name}.sd\"] = schema.encode(\"utf-8\")\n\n        zip_file = in_memory_zip_from_file_bytes(zip_dict)\n\n        headers = {\"Content-Type\": \"application/zip\"}\n        response = requests.post(deploy_url, headers=headers, data=zip_file)\n\n        if response.status_code != 200:\n            raise RuntimeError(\n                f\"Failed to prepare Vespa Onyx Indexes. Response: {response.text}\"\n            )\n\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        index_batch_params: IndexBatchParams,\n    ) -> set[OldDocumentInsertionRecord]:\n        \"\"\"\n        NOTE: Do NOT consider the secondary index here. A separate indexing\n        pipeline will be responsible for indexing to the secondary index. This\n        design is not ideal and we should reconsider this when revamping index\n        swapping.\n        \"\"\"\n        if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(\n            index_batch_params.doc_id_to_new_chunk_cnt\n        ):\n            raise ValueError(\"Bug: Length of doc ID to chunk maps does not match.\")\n        doc_id_to_chunk_cnt_diff = {\n            doc_id: IndexingMetadata.ChunkCounts(\n                old_chunk_cnt=index_batch_params.doc_id_to_previous_chunk_cnt[doc_id],\n                new_chunk_cnt=index_batch_params.doc_id_to_new_chunk_cnt[doc_id],\n            )\n            for doc_id in index_batch_params.doc_id_to_previous_chunk_cnt.keys()\n        }\n        indexing_metadata = IndexingMetadata(\n            doc_id_to_chunk_cnt_diff=doc_id_to_chunk_cnt_diff,\n        )\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        if tenant_state.multitenant != self.multitenant:\n            raise ValueError(\n                f\"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}.\"\n            )\n        if (\n            tenant_state.multitenant\n            and tenant_state.tenant_id != index_batch_params.tenant_id\n        ):\n            raise ValueError(\n                f\"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {index_batch_params.tenant_id}.\"\n            )\n        vespa_document_index = VespaDocumentIndex(\n            index_name=self.index_name,\n            tenant_state=tenant_state,\n            large_chunks_enabled=self.large_chunks_enabled,\n            httpx_client=self.httpx_client,\n        )\n        # This conversion from list to set only to be converted again to a list\n        # upstream is suboptimal and only temporary until we refactor the\n        # entirety of this class.\n        document_insertion_records = vespa_document_index.index(\n            chunks, indexing_metadata\n        )\n        return set(\n            [\n                OldDocumentInsertionRecord(\n                    document_id=doc_insertion_record.document_id,\n                    already_existed=doc_insertion_record.already_existed,\n                )\n                for doc_insertion_record in document_insertion_records\n            ]\n        )\n\n    @classmethod\n    def _apply_updates_batched(\n        cls,\n        updates: list[_VespaUpdateRequest],\n        httpx_client: httpx.Client,\n        batch_size: int = BATCH_SIZE,\n    ) -> None:\n        \"\"\"Runs a batch of updates in parallel via the ThreadPoolExecutor.\"\"\"\n\n        def _update_chunk(\n            update: _VespaUpdateRequest, http_client: httpx.Client\n        ) -> httpx.Response:\n            logger.debug(\n                f\"Updating with request to {update.url} with body {update.update_request}\"\n            )\n            return http_client.put(\n                update.url,\n                headers={\"Content-Type\": \"application/json\"},\n                json=update.update_request,\n            )\n\n        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for\n        # indexing / updates / deletes since we have to make a large volume of requests.\n\n        with (\n            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,\n            httpx_client as http_client,\n        ):\n            for update_batch in batch_generator(updates, batch_size):\n                future_to_document_id = {\n                    executor.submit(\n                        _update_chunk,\n                        update,\n                        http_client,\n                    ): update.document_id\n                    for update in update_batch\n                }\n                for future in concurrent.futures.as_completed(future_to_document_id):\n                    res = future.result()\n                    try:\n                        res.raise_for_status()\n                    except requests.HTTPError as e:\n                        failure_msg = f\"Failed to update document: {future_to_document_id[future]}\"\n                        raise requests.HTTPError(failure_msg) from e\n\n    @classmethod\n    def _apply_kg_chunk_updates_batched(\n        cls,\n        updates: list[KGVespaChunkUpdateRequest],\n        httpx_client: httpx.Client,\n        batch_size: int = BATCH_SIZE,\n    ) -> None:\n        \"\"\"Runs a batch of updates in parallel via the ThreadPoolExecutor.\"\"\"\n\n        @retry(tries=3, delay=1, backoff=2, jitter=(0.0, 1.0))\n        def _kg_update_chunk(\n            update: KGVespaChunkUpdateRequest, http_client: httpx.Client\n        ) -> httpx.Response:\n            return http_client.put(\n                update.url,\n                headers={\"Content-Type\": \"application/json\"},\n                json=update.update_request,\n            )\n\n        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for\n        # indexing / updates / deletes since we have to make a large volume of requests.\n\n        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:\n            for update_batch in batch_generator(updates, batch_size):\n                future_to_document_id = {\n                    executor.submit(\n                        _kg_update_chunk,\n                        update,\n                        httpx_client,\n                    ): update.document_id\n                    for update in update_batch\n                }\n                for future in concurrent.futures.as_completed(future_to_document_id):\n                    res = future.result()\n                    try:\n                        res.raise_for_status()\n                    except requests.HTTPError as e:\n                        failure_msg = f\"Failed to update document {future_to_document_id[future]}\\nResponse: {res.text}\"\n                        raise requests.HTTPError(failure_msg) from e\n\n    def kg_chunk_updates(\n        self, kg_update_requests: list[KGUChunkUpdateRequest], tenant_id: str\n    ) -> None:\n\n        processed_updates_requests: list[KGVespaChunkUpdateRequest] = []\n        update_start = time.monotonic()\n\n        # Build the _VespaUpdateRequest objects\n\n        for kg_update_request in kg_update_requests:\n            kg_update_dict: dict[str, dict] = {\n                \"fields\": generate_kg_update_request(kg_update_request)\n            }\n            if not kg_update_dict[\"fields\"]:\n                logger.error(\"Update request received but nothing to update\")\n                continue\n\n            doc_chunk_id = get_uuid_from_chunk_info(\n                document_id=kg_update_request.document_id,\n                chunk_id=kg_update_request.chunk_id,\n                tenant_id=tenant_id,\n                large_chunk_id=None,\n            )\n\n            processed_updates_requests.append(\n                KGVespaChunkUpdateRequest(\n                    document_id=kg_update_request.document_id,\n                    chunk_id=kg_update_request.chunk_id,\n                    url=f\"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}\",\n                    update_request=kg_update_dict,\n                )\n            )\n\n        with self.httpx_client_context as httpx_client:\n            self._apply_kg_chunk_updates_batched(\n                processed_updates_requests, httpx_client\n            )\n        logger.debug(\n            \"Updated %d vespa documents in %.2f seconds\",\n            len(processed_updates_requests),\n            time.monotonic() - update_start,\n        )\n\n    def update_single(\n        self,\n        doc_id: str,\n        *,\n        chunk_count: int | None,\n        tenant_id: str,\n        fields: VespaDocumentFields | None,\n        user_fields: VespaDocumentUserFields | None,\n    ) -> None:\n        \"\"\"Note: if the document id does not exist, the update will be a no-op and the\n        function will complete with no errors or exceptions.\n        Handle other exceptions if you wish to implement retry behavior\n\n        NOTE: Remember to handle the secondary index here. There is no separate\n        pipeline for updating chunks in the secondary index. This design is not\n        ideal and we should reconsider this when revamping index swapping.\n        \"\"\"\n        if fields is None and user_fields is None:\n            logger.warning(\n                f\"Tried to update document {doc_id} with no updated fields or user fields.\"\n            )\n            return\n\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        if tenant_state.multitenant != self.multitenant:\n            raise ValueError(\n                f\"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}.\"\n            )\n        if tenant_state.multitenant and tenant_state.tenant_id != tenant_id:\n            raise ValueError(\n                f\"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}.\"\n            )\n\n        project_ids: set[int] | None = None\n        # NOTE: Empty user_projects is semantically different from None\n        # user_projects.\n        if user_fields is not None and user_fields.user_projects is not None:\n            project_ids = set(user_fields.user_projects)\n        persona_ids: set[int] | None = None\n        # NOTE: Empty personas is semantically different from None personas.\n        if user_fields is not None and user_fields.personas is not None:\n            persona_ids = set(user_fields.personas)\n        update_request = MetadataUpdateRequest(\n            document_ids=[doc_id],\n            doc_id_to_chunk_cnt={\n                doc_id: chunk_count if chunk_count is not None else -1\n            },  # NOTE: -1 represents an unknown chunk count.\n            access=fields.access if fields is not None else None,\n            document_sets=fields.document_sets if fields is not None else None,\n            boost=fields.boost if fields is not None else None,\n            hidden=fields.hidden if fields is not None else None,\n            project_ids=project_ids,\n            persona_ids=persona_ids,\n        )\n\n        indices = [self.index_name]\n        if self.secondary_index_name:\n            indices.append(self.secondary_index_name)\n\n        for index_name in indices:\n            vespa_document_index = VespaDocumentIndex(\n                index_name=index_name,\n                tenant_state=tenant_state,\n                large_chunks_enabled=self.index_to_large_chunks_enabled.get(\n                    index_name, False\n                ),\n                httpx_client=self.httpx_client,\n            )\n            vespa_document_index.update([update_request])\n\n    def delete_single(\n        self,\n        doc_id: str,\n        *,\n        tenant_id: str,\n        chunk_count: int | None,\n    ) -> int:\n        \"\"\"\n        NOTE: Remember to handle the secondary index here. There is no separate\n        pipeline for deleting chunks in the secondary index. This design is not\n        ideal and we should reconsider this when revamping index swapping.\n        \"\"\"\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        if tenant_state.multitenant != self.multitenant:\n            raise ValueError(\n                f\"Bug: Multitenant mismatch. Expected {tenant_state.multitenant}, got {self.multitenant}.\"\n            )\n        if tenant_state.multitenant and tenant_state.tenant_id != tenant_id:\n            raise ValueError(\n                f\"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}.\"\n            )\n        indices = [self.index_name]\n        if self.secondary_index_name:\n            indices.append(self.secondary_index_name)\n\n        total_chunks_deleted = 0\n        for index_name in indices:\n            vespa_document_index = VespaDocumentIndex(\n                index_name=index_name,\n                tenant_state=tenant_state,\n                large_chunks_enabled=self.index_to_large_chunks_enabled.get(\n                    index_name, False\n                ),\n                httpx_client=self.httpx_client,\n            )\n            total_chunks_deleted += vespa_document_index.delete(\n                document_id=doc_id, chunk_count=chunk_count\n            )\n\n        return total_chunks_deleted\n\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[VespaChunkRequest],\n        filters: IndexFilters,\n        batch_retrieval: bool = False,\n        get_large_chunks: bool = False,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        vespa_document_index = VespaDocumentIndex(\n            index_name=self.index_name,\n            tenant_state=tenant_state,\n            large_chunks_enabled=self.large_chunks_enabled,\n            httpx_client=self.httpx_client,\n        )\n        generic_chunk_requests: list[DocumentSectionRequest] = []\n        for chunk_request in chunk_requests:\n            generic_chunk_requests.append(\n                DocumentSectionRequest(\n                    document_id=chunk_request.document_id,\n                    min_chunk_ind=chunk_request.min_chunk_ind,\n                    max_chunk_ind=chunk_request.max_chunk_ind,\n                )\n            )\n        return vespa_document_index.id_based_retrieval(\n            chunk_requests=generic_chunk_requests,\n            filters=filters,\n            batch_retrieval=batch_retrieval,\n        )\n\n    @log_function_time(print_only=True, debug_only=True)\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        final_keywords: list[str] | None,\n        filters: IndexFilters,\n        hybrid_alpha: float,  # noqa: ARG002\n        time_decay_multiplier: float,  # noqa: ARG002\n        num_to_retrieve: int,\n        ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,\n        title_content_ratio: float | None = TITLE_CONTENT_RATIO,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        vespa_document_index = VespaDocumentIndex(\n            index_name=self.index_name,\n            tenant_state=tenant_state,\n            large_chunks_enabled=self.large_chunks_enabled,\n            httpx_client=self.httpx_client,\n        )\n        if not (\n            ranking_profile_type == QueryExpansionType.KEYWORD\n            or ranking_profile_type == QueryExpansionType.SEMANTIC\n        ):\n            raise ValueError(\n                f\"Bug: Received invalid ranking profile type: {ranking_profile_type}\"\n            )\n        query_type = (\n            QueryType.KEYWORD\n            if ranking_profile_type == QueryExpansionType.KEYWORD\n            else QueryType.SEMANTIC\n        )\n        return vespa_document_index.hybrid_retrieval(\n            query,\n            query_embedding,\n            final_keywords,\n            query_type,\n            filters,\n            num_to_retrieve,\n        )\n\n    def admin_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,  # noqa: ARG002\n        filters: IndexFilters,\n        num_to_retrieve: int = NUM_RETURNED_HITS,\n    ) -> list[InferenceChunk]:\n        vespa_where_clauses = build_vespa_filters(filters, include_hidden=True)\n        yql = (\n            YQL_BASE.format(index_name=self.index_name)\n            + vespa_where_clauses\n            + '({grammar: \"weakAnd\"}userInput(@query) '\n            # `({defaultIndex: \"content_summary\"}userInput(@query))` section is\n            # needed for highlighting while the N-gram highlighting is broken /\n            # not working as desired\n            + f'or ({{defaultIndex: \"{CONTENT_SUMMARY}\"}}userInput(@query)))'\n        )\n\n        params: dict[str, str | int] = {\n            \"yql\": yql,\n            \"query\": query,\n            \"hits\": num_to_retrieve,\n            \"ranking.profile\": \"admin_search\",\n            \"timeout\": VESPA_TIMEOUT,\n        }\n\n        return cleanup_chunks(query_vespa(params))\n\n    # Retrieves chunk information for a document:\n    # - Determines the last indexed chunk\n    # - Identifies if the document uses the old or new chunk ID system\n    # This data is crucial for Vespa document updates without relying on the visit API.\n    @classmethod\n    def enrich_basic_chunk_info(\n        cls,\n        index_name: str,\n        http_client: httpx.Client,\n        document_id: str,\n        previous_chunk_count: int | None = None,\n        new_chunk_count: int = 0,\n    ) -> EnrichedDocumentIndexingInfo:\n        last_indexed_chunk = previous_chunk_count\n\n        # If the document has no `chunk_count` in the database, we know that it\n        # has the old chunk ID system and we must check for the final chunk index\n        is_old_version = False\n        if last_indexed_chunk is None:\n            is_old_version = True\n            minimal_doc_info = MinimalDocumentIndexingInfo(\n                doc_id=document_id, chunk_start_index=new_chunk_count\n            )\n            last_indexed_chunk = check_for_final_chunk_existence(\n                minimal_doc_info=minimal_doc_info,\n                start_index=new_chunk_count,\n                index_name=index_name,\n                http_client=http_client,\n            )\n\n        enriched_doc_info = EnrichedDocumentIndexingInfo(\n            doc_id=document_id,\n            chunk_start_index=new_chunk_count,\n            chunk_end_index=last_indexed_chunk,\n            old_version=is_old_version,\n        )\n        return enriched_doc_info\n\n    @classmethod\n    def delete_entries_by_tenant_id(\n        cls,\n        *,\n        tenant_id: str,\n        index_name: str,\n    ) -> int:\n        \"\"\"\n        Deletes all entries in the specified index with the given tenant_id.\n\n        Currently unused, but we anticipate this being useful. The entire flow does not\n        use the httpx connection pool of an instance.\n\n        Parameters:\n            tenant_id (str): The tenant ID whose documents are to be deleted.\n            index_name (str): The name of the index from which to delete documents.\n\n        Returns:\n            int: The number of documents deleted.\n        \"\"\"\n        logger.info(\n            f\"Deleting entries with tenant_id: {tenant_id} from index: {index_name}\"\n        )\n\n        # Step 1: Retrieve all document IDs with the given tenant_id\n        document_ids = cls._get_all_document_ids_by_tenant_id(tenant_id, index_name)\n\n        if not document_ids:\n            logger.info(\n                f\"No documents found with tenant_id: {tenant_id} in index: {index_name}\"\n            )\n            return 0\n\n        # Step 2: Delete documents in batches\n        delete_requests = [\n            _VespaDeleteRequest(document_id=doc_id, index_name=index_name)\n            for doc_id in document_ids\n        ]\n\n        cls._apply_deletes_batched(delete_requests)\n        return len(document_ids)\n\n    @classmethod\n    def _get_all_document_ids_by_tenant_id(\n        cls, tenant_id: str, index_name: str\n    ) -> List[str]:\n        \"\"\"\n        Retrieves all document IDs with the specified tenant_id, handling pagination.\n\n        Internal helper function for delete_entries_by_tenant_id.\n\n        Parameters:\n            tenant_id (str): The tenant ID to search for.\n            index_name (str): The name of the index to search in.\n\n        Returns:\n            List[str]: A list of document IDs matching the tenant_id.\n        \"\"\"\n        offset = 0\n        limit = 1000  # Vespa's maximum hits per query\n        document_ids = []\n\n        logger.debug(\n            f\"Starting document ID retrieval for tenant_id: {tenant_id} in index: {index_name}\"\n        )\n\n        while True:\n            # Construct the query to fetch document IDs\n            query_params = {\n                \"yql\": f'select id from sources * where tenant_id contains \"{tenant_id}\";',\n                \"offset\": str(offset),\n                \"hits\": str(limit),\n                \"timeout\": \"10s\",\n                \"format\": \"json\",\n                \"summary\": \"id\",\n            }\n\n            url = f\"{VESPA_APPLICATION_ENDPOINT}/search/\"\n\n            logger.debug(\n                f\"Querying for document IDs with tenant_id: {tenant_id}, offset: {offset}\"\n            )\n\n            with get_vespa_http_client() as http_client:\n                response = http_client.get(url, params=query_params, timeout=None)\n                response.raise_for_status()\n\n                search_result = response.json()\n                hits = search_result.get(\"root\", {}).get(\"children\", [])\n\n                if not hits:\n                    break\n\n                for hit in hits:\n                    doc_id = hit.get(\"id\")\n                    if doc_id:\n                        document_ids.append(doc_id)\n\n                offset += limit  # Move to the next page\n\n        logger.debug(\n            f\"Retrieved {len(document_ids)} document IDs for tenant_id: {tenant_id}\"\n        )\n        return document_ids\n\n    @classmethod\n    def _apply_deletes_batched(\n        cls,\n        delete_requests: List[\"_VespaDeleteRequest\"],\n        batch_size: int = BATCH_SIZE,\n    ) -> None:\n        \"\"\"\n        Deletes documents in batches using multiple threads.\n\n        Internal helper function for delete_entries_by_tenant_id.\n\n        This is a class method and does not use the httpx pool of the instance.\n        This is OK because we don't use this method often.\n\n        Parameters:\n            delete_requests (List[_VespaDeleteRequest]): The list of delete requests.\n            batch_size (int): The number of documents to delete in each batch.\n        \"\"\"\n\n        def _delete_document(\n            delete_request: \"_VespaDeleteRequest\", http_client: httpx.Client\n        ) -> None:\n            logger.debug(f\"Deleting document with ID {delete_request.document_id}\")\n            response = http_client.delete(\n                delete_request.url,\n                headers={\"Content-Type\": \"application/json\"},\n                timeout=None,\n            )\n            response.raise_for_status()\n\n        logger.debug(f\"Starting batch deletion for {len(delete_requests)} documents\")\n\n        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:\n            with get_vespa_http_client() as http_client:\n                for batch_start in range(0, len(delete_requests), batch_size):\n                    batch = delete_requests[batch_start : batch_start + batch_size]\n\n                    future_to_document_id = {\n                        executor.submit(\n                            _delete_document,\n                            delete_request,\n                            http_client,\n                        ): delete_request.document_id\n                        for delete_request in batch\n                    }\n\n                    for future in concurrent.futures.as_completed(\n                        future_to_document_id\n                    ):\n                        doc_id = future_to_document_id[future]\n                        try:\n                            future.result()\n                            logger.debug(f\"Successfully deleted document: {doc_id}\")\n                        except httpx.HTTPError as e:\n                            logger.error(f\"Failed to delete document {doc_id}: {e}\")\n                            # Optionally, implement retry logic or error handling here\n\n        logger.info(\"Batch deletion completed\")\n\n    def random_retrieval(\n        self,\n        filters: IndexFilters,\n        num_to_retrieve: int = 10,\n    ) -> list[InferenceChunk]:\n        \"\"\"Retrieve random chunks matching the filters using Vespa's random ranking\n\n        This method is currently used for random chunk retrieval in the context of\n        assistant starter message creation (passed as sample context for usage by the assistant).\n        \"\"\"\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(),\n            multitenant=MULTI_TENANT,\n        )\n        vespa_document_index = VespaDocumentIndex(\n            index_name=self.index_name,\n            tenant_state=tenant_state,\n            large_chunks_enabled=self.large_chunks_enabled,\n            httpx_client=self.httpx_client,\n        )\n        return vespa_document_index.random_retrieval(\n            filters=filters,\n            num_to_retrieve=num_to_retrieve,\n        )\n\n\nclass _VespaDeleteRequest:\n    def __init__(self, document_id: str, index_name: str) -> None:\n        self.document_id = document_id\n        # Encode the document ID to ensure it's safe for use in the URL\n        encoded_doc_id = urllib.parse.quote_plus(self.document_id)\n        self.url = f\"{VESPA_APPLICATION_ENDPOINT}/document/v1/{index_name}/{index_name}/docid/{encoded_doc_id}\"\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/indexing_utils.py",
    "content": "import concurrent.futures\nimport json\nimport random\nimport time\nimport uuid\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom datetime import timezone\nfrom http import HTTPStatus\n\nimport httpx\nfrom retry import retry\n\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_experts_stores_representations,\n)\nfrom onyx.document_index.chunk_content_enrichment import (\n    generate_enriched_content_for_chunk_text,\n)\nfrom onyx.document_index.document_index_utils import get_uuid_from_chunk\nfrom onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old\nfrom onyx.document_index.interfaces import MinimalDocumentIndexingInfo\nfrom onyx.document_index.vespa.shared_utils.utils import (\n    replace_invalid_doc_id_characters,\n)\nfrom onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST\nfrom onyx.document_index.vespa_constants import AGGREGATED_CHUNK_BOOST_FACTOR\nfrom onyx.document_index.vespa_constants import BLURB\nfrom onyx.document_index.vespa_constants import BOOST\nfrom onyx.document_index.vespa_constants import CHUNK_CONTEXT\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import CONTENT\nfrom onyx.document_index.vespa_constants import CONTENT_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa_constants import DOCUMENT_SETS\nfrom onyx.document_index.vespa_constants import EMBEDDINGS\nfrom onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY\nfrom onyx.document_index.vespa_constants import IMAGE_FILE_NAME\nfrom onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS\nfrom onyx.document_index.vespa_constants import METADATA\nfrom onyx.document_index.vespa_constants import METADATA_LIST\nfrom onyx.document_index.vespa_constants import METADATA_SUFFIX\nfrom onyx.document_index.vespa_constants import NUM_THREADS\nfrom onyx.document_index.vespa_constants import PERSONAS\nfrom onyx.document_index.vespa_constants import PRIMARY_OWNERS\nfrom onyx.document_index.vespa_constants import SECONDARY_OWNERS\nfrom onyx.document_index.vespa_constants import SECTION_CONTINUATION\nfrom onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER\nfrom onyx.document_index.vespa_constants import SKIP_TITLE_EMBEDDING\nfrom onyx.document_index.vespa_constants import SOURCE_LINKS\nfrom onyx.document_index.vespa_constants import SOURCE_TYPE\nfrom onyx.document_index.vespa_constants import TENANT_ID\nfrom onyx.document_index.vespa_constants import TITLE\nfrom onyx.document_index.vespa_constants import TITLE_EMBEDDING\nfrom onyx.document_index.vespa_constants import USER_PROJECT\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import remove_invalid_unicode_chars\n\n\nlogger = setup_logger()\n\n# Retry configuration constants\nINDEXING_MAX_RETRIES = 5\nINDEXING_BASE_DELAY = 1.0\nINDEXING_MAX_DELAY = 60.0\n\n\n@retry(tries=3, delay=1, backoff=2)\ndef _does_doc_chunk_exist(\n    doc_chunk_id: uuid.UUID, index_name: str, http_client: httpx.Client\n) -> bool:\n    doc_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}\"\n    doc_fetch_response = http_client.get(doc_url)\n    if doc_fetch_response.status_code == 404:\n        return False\n\n    if doc_fetch_response.status_code != 200:\n        logger.debug(f\"Failed to check for document with URL {doc_url}\")\n        raise RuntimeError(\n            f\"Unexpected fetch document by ID value from Vespa: \"\n            f\"error={doc_fetch_response.status_code} \"\n            f\"index={index_name} \"\n            f\"doc_chunk_id={doc_chunk_id}\"\n        )\n    return True\n\n\ndef _vespa_get_updated_at_attribute(t: datetime | None) -> int | None:\n    if not t:\n        return None\n\n    if t.tzinfo != timezone.utc:\n        raise ValueError(\"Connectors must provide document update time in UTC\")\n\n    return int(t.timestamp())\n\n\ndef get_existing_documents_from_chunks(\n    chunks: list[DocMetadataAwareIndexChunk],\n    index_name: str,\n    http_client: httpx.Client,\n    executor: concurrent.futures.ThreadPoolExecutor | None = None,\n) -> set[str]:\n    external_executor = True\n\n    if not executor:\n        external_executor = False\n        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)\n\n    document_ids: set[str] = set()\n    try:\n        chunk_existence_future = {\n            executor.submit(\n                _does_doc_chunk_exist,\n                get_uuid_from_chunk(chunk),\n                index_name,\n                http_client,\n            ): chunk\n            for chunk in chunks\n        }\n        for future in concurrent.futures.as_completed(chunk_existence_future):\n            chunk = chunk_existence_future[future]\n            chunk_already_existed = future.result()\n            if chunk_already_existed:\n                document_ids.add(chunk.source_document.id)\n\n    finally:\n        if not external_executor:\n            executor.shutdown(wait=True)\n\n    return document_ids\n\n\ndef _index_vespa_chunk(\n    chunk: DocMetadataAwareIndexChunk,\n    index_name: str,\n    http_client: httpx.Client,\n    multitenant: bool,\n) -> None:\n    json_header = {\n        \"Content-Type\": \"application/json\",\n    }\n    document = chunk.source_document\n\n    # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself\n\n    vespa_chunk_id = str(get_uuid_from_chunk(chunk))\n\n    embeddings = chunk.embeddings\n\n    embeddings_name_vector_map = {FULL_CHUNK_EMBEDDING_KEY: embeddings.full_embedding}\n\n    if embeddings.mini_chunk_embeddings:\n        for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings):\n            embeddings_name_vector_map[f\"mini_chunk_{ind}\"] = m_c_embed\n\n    title = document.get_title_for_document_index()\n\n    metadata_json = document.metadata\n    cleaned_metadata_json: dict[str, str | list[str]] = {}\n    for key, value in metadata_json.items():\n        cleaned_key = remove_invalid_unicode_chars(key)\n        if isinstance(value, list):\n            cleaned_metadata_json[cleaned_key] = [\n                remove_invalid_unicode_chars(item) for item in value\n            ]\n        else:\n            cleaned_metadata_json[cleaned_key] = remove_invalid_unicode_chars(value)\n\n    metadata_list = document.get_metadata_str_attributes()\n    if metadata_list:\n        metadata_list = [\n            remove_invalid_unicode_chars(metadata) for metadata in metadata_list\n        ]\n\n    vespa_document_fields = {\n        DOCUMENT_ID: document.id,\n        CHUNK_ID: chunk.chunk_id,\n        BLURB: remove_invalid_unicode_chars(chunk.blurb),\n        TITLE: remove_invalid_unicode_chars(title) if title else None,\n        SKIP_TITLE_EMBEDDING: not title,\n        # For the BM25 index, the keyword suffix is used, the vector is already generated with the more\n        # natural language representation of the metadata section\n        CONTENT: remove_invalid_unicode_chars(\n            generate_enriched_content_for_chunk_text(chunk)\n        ),\n        # This duplication of `content` is needed for keyword highlighting\n        # Note that it's not exactly the same as the actual content\n        # which contains the title prefix and metadata suffix\n        CONTENT_SUMMARY: remove_invalid_unicode_chars(chunk.content),\n        SOURCE_TYPE: str(document.source.value),\n        SOURCE_LINKS: json.dumps(chunk.source_links),\n        SEMANTIC_IDENTIFIER: remove_invalid_unicode_chars(document.semantic_identifier),\n        SECTION_CONTINUATION: chunk.section_continuation,\n        LARGE_CHUNK_REFERENCE_IDS: chunk.large_chunk_reference_ids,\n        METADATA: json.dumps(cleaned_metadata_json),\n        # Save as a list for efficient extraction as an Attribute\n        METADATA_LIST: metadata_list,\n        METADATA_SUFFIX: remove_invalid_unicode_chars(chunk.metadata_suffix_keyword),\n        CHUNK_CONTEXT: chunk.chunk_context,\n        DOC_SUMMARY: chunk.doc_summary,\n        EMBEDDINGS: embeddings_name_vector_map,\n        TITLE_EMBEDDING: chunk.title_embedding,\n        DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at),\n        PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners),\n        SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners),\n        # the only `set` vespa has is `weightedset`, so we have to give each\n        # element an arbitrary weight\n        # rkuo: acl, docset and boost metadata are also updated through the metadata sync queue\n        # which only calls VespaIndex.update\n        ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()},\n        DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},\n        # still called `image_file_name` in Vespa for backwards compatibility\n        IMAGE_FILE_NAME: chunk.image_file_id,\n        USER_PROJECT: chunk.user_project if chunk.user_project is not None else [],\n        PERSONAS: chunk.personas if chunk.personas is not None else [],\n        BOOST: chunk.boost,\n        AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor,\n    }\n\n    if multitenant:\n        if chunk.tenant_id:\n            vespa_document_fields[TENANT_ID] = chunk.tenant_id\n    vespa_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}\"\n    logger.debug(f'Indexing to URL \"{vespa_url}\"')\n\n    # Retry logic with exponential backoff for rate limiting\n    for attempt in range(INDEXING_MAX_RETRIES):\n        try:\n            res = http_client.post(\n                vespa_url, headers=json_header, json={\"fields\": vespa_document_fields}\n            )\n            res.raise_for_status()\n            return  # Success, exit the function\n        except httpx.HTTPStatusError as e:\n            # Handle 429 rate limiting specifically\n            if e.response.status_code == HTTPStatus.TOO_MANY_REQUESTS:\n                if attempt < INDEXING_MAX_RETRIES - 1:\n                    # Calculate exponential backoff with jitter\n                    delay = min(\n                        INDEXING_BASE_DELAY * (2**attempt), INDEXING_MAX_DELAY\n                    ) * random.uniform(0.5, 1.0)\n                    logger.warning(\n                        f\"Rate limited while indexing document '{document.id}' \"\n                        f\"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}). \"\n                        f\"Vespa response: '{e.response.text}'. \"\n                        f\"Backing off for {delay:.2f} seconds.\"\n                    )\n                    time.sleep(delay)\n                    continue\n                else:\n                    raise RuntimeError(\n                        f\"Failed to index document '{document.id}' after {INDEXING_MAX_RETRIES} attempts due to rate limiting\"\n                    ) from e\n            elif e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:\n                logger.error(\n                    f\"Failed to index document: '{document.id}'. Got response: '{e.response.text}'\"\n                )\n                logger.error(\n                    \"NOTE: HTTP Status 507 Insufficient Storage usually means \"\n                    \"you need to allocate more memory or disk space to the \"\n                    \"Vespa/index container.\"\n                )\n                raise\n            else:\n                # For other HTTP errors, check if retryable\n                if e.response.status_code in (\n                    HTTPStatus.BAD_REQUEST,\n                    HTTPStatus.UNAUTHORIZED,\n                    HTTPStatus.FORBIDDEN,\n                    HTTPStatus.NOT_FOUND,\n                ):\n                    # Non-retryable errors - fail immediately\n                    logger.error(\n                        f\"Non-retryable HTTP {e.response.status_code} error for document '{document.id}'\"\n                    )\n                    raise\n                # Retry other errors with shorter backoff\n                if attempt < INDEXING_MAX_RETRIES - 1:\n                    delay = INDEXING_BASE_DELAY * (1.5**attempt)\n                    logger.warning(\n                        f\"HTTP error {e.response.status_code} while indexing document '{document.id}' \"\n                        f\"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}). Retrying in {delay:.2f} seconds.\"\n                    )\n                    time.sleep(delay)\n                    continue\n                else:\n                    logger.exception(\n                        f\"Failed to index document: '{document.id}'. Got response: '{e.response.text}'\"\n                    )\n                    raise\n        except Exception as e:\n            # For non-HTTP errors, use simple retry logic\n            if attempt < INDEXING_MAX_RETRIES - 1:\n                delay = INDEXING_BASE_DELAY * (1.5**attempt)\n                logger.warning(\n                    f\"Error while indexing document '{document.id}' \"\n                    f\"(attempt {attempt + 1}/{INDEXING_MAX_RETRIES}): {str(e)}. \"\n                    f\"Retrying in {delay:.2f} seconds.\"\n                )\n                time.sleep(delay)\n                continue\n            else:\n                logger.exception(f\"Failed to index document: '{document.id}'\")\n                raise\n\n\ndef batch_index_vespa_chunks(\n    chunks: list[DocMetadataAwareIndexChunk],\n    index_name: str,\n    http_client: httpx.Client,\n    multitenant: bool,\n    executor: concurrent.futures.ThreadPoolExecutor | None = None,\n) -> None:\n    \"\"\"Indexes a list of chunks in a Vespa index in parallel.\n\n    Args:\n        chunks: List of chunks to index.\n        index_name: Name of the index to index into.\n        http_client: HTTP client to use for the request.\n        multitenant: Whether the index is multitenant.\n        executor: Executor to use for the request.\n    \"\"\"\n    external_executor = True\n\n    if not executor:\n        external_executor = False\n        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)\n\n    try:\n        chunk_index_future = {\n            executor.submit(\n                _index_vespa_chunk, chunk, index_name, http_client, multitenant\n            ): chunk\n            for chunk in chunks\n        }\n        for future in concurrent.futures.as_completed(chunk_index_future):\n            # Will raise exception if any indexing raised an exception\n            future.result()\n\n    finally:\n        if not external_executor:\n            executor.shutdown(wait=True)\n\n\ndef clean_chunk_id_copy(\n    chunk: DocMetadataAwareIndexChunk,\n) -> DocMetadataAwareIndexChunk:\n    clean_chunk = chunk.model_copy(\n        update={\n            \"source_document\": chunk.source_document.model_copy(\n                update={\n                    \"id\": replace_invalid_doc_id_characters(chunk.source_document.id)\n                }\n            )\n        }\n    )\n    return clean_chunk\n\n\ndef check_for_final_chunk_existence(\n    minimal_doc_info: MinimalDocumentIndexingInfo,\n    start_index: int,\n    index_name: str,\n    http_client: httpx.Client,\n) -> int:\n    index = start_index\n    while True:\n        doc_chunk_id = get_uuid_from_chunk_info_old(\n            document_id=minimal_doc_info.doc_id,\n            chunk_id=index,\n            large_chunk_reference_ids=[],\n        )\n        if not _does_doc_chunk_exist(doc_chunk_id, index_name, http_client):\n            return index\n        index += 1\n\n\nclass BaseHTTPXClientContext(ABC):\n    \"\"\"Abstract base class for an HTTPX client context manager.\"\"\"\n\n    @abstractmethod\n    def __enter__(self) -> httpx.Client:\n        pass\n\n    @abstractmethod\n    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore\n        pass\n\n\nclass GlobalHTTPXClientContext(BaseHTTPXClientContext):\n    \"\"\"Context manager for a global HTTPX client that does not close it.\"\"\"\n\n    def __init__(self, client: httpx.Client):\n        self._client = client\n\n    def __enter__(self) -> httpx.Client:\n        return self._client  # Reuse the global client\n\n    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore\n        pass  # Do nothing; don't close the global client\n\n\nclass TemporaryHTTPXClientContext(BaseHTTPXClientContext):\n    \"\"\"Context manager for a temporary HTTPX client that closes it after use.\"\"\"\n\n    def __init__(self, client_factory: Callable[[], httpx.Client]):\n        self._client_factory = client_factory\n        self._client: httpx.Client | None = None  # Client will be created in __enter__\n\n    def __enter__(self) -> httpx.Client:\n        self._client = self._client_factory()  # Create a new client\n        return self._client\n\n    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore\n        if self._client:\n            self._client.close()\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/kg_interactions.py",
    "content": "from onyx.db.document import get_document_kg_entities_and_relationships\nfrom onyx.db.document import get_num_chunks_for_document\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.document_index.vespa.index import KGUChunkUpdateRequest\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef update_kg_chunks_vespa_info(\n    kg_update_requests: list[KGUChunkUpdateRequest],\n    index_name: str,\n    tenant_id: str,\n) -> None:\n    \"\"\" \"\"\"\n    # Use the existing visit API infrastructure\n    vespa_index = VespaIndex(\n        index_name=index_name,\n        secondary_index_name=None,\n        large_chunks_enabled=False,\n        secondary_large_chunks_enabled=False,\n        multitenant=MULTI_TENANT,\n        httpx_client=None,\n    )\n\n    vespa_index.kg_chunk_updates(\n        kg_update_requests=kg_update_requests, tenant_id=tenant_id\n    )\n\n\ndef get_kg_vespa_info_update_requests_for_document(\n    document_id: str,\n) -> list[KGUChunkUpdateRequest]:\n    \"\"\"Get the kg_info update requests for a document.\"\"\"\n    # get all entities and relationships tied to the document\n    with get_session_with_current_tenant() as db_session:\n        entities, relationships = get_document_kg_entities_and_relationships(\n            db_session, document_id\n        )\n\n    # create the kg vespa info\n    kg_entities = {entity.id_name for entity in entities}\n    kg_relationships = {relationship.id_name for relationship in relationships}\n\n    # get chunks in the document\n    with get_session_with_current_tenant() as db_session:\n        num_chunks = get_num_chunks_for_document(db_session, document_id)\n\n    # get vespa update requests\n    return [\n        KGUChunkUpdateRequest(\n            document_id=document_id,\n            chunk_id=chunk_id,\n            core_entity=\"unused\",\n            entities=kg_entities,\n            relationships=kg_relationships or None,\n        )\n        for chunk_id in range(num_chunks)\n    ]\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/shared_utils/utils.py",
    "content": "import time\nfrom typing import cast\n\nimport httpx\n\nfrom onyx.configs.app_configs import MANAGED_VESPA\nfrom onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH\nfrom onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH\nfrom onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT\nfrom onyx.document_index.vespa_constants import VESPA_APP_CONTAINER_URL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# NOTE: This does not seem to be used in reality despite the Vespa Docs pointing to this code\n# See here for reference: https://docs.vespa.ai/en/documents.html\n# https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java\n\n# Define allowed ASCII characters\nALLOWED_ASCII_CHARS: list[bool] = [False] * 0x80\nALLOWED_ASCII_CHARS[0x9] = True  # tab\nALLOWED_ASCII_CHARS[0xA] = True  # newline\nALLOWED_ASCII_CHARS[0xD] = True  # carriage return\nfor i in range(0x20, 0x7F):\n    ALLOWED_ASCII_CHARS[i] = True  # printable ASCII chars\nALLOWED_ASCII_CHARS[0x7F] = True  # del - discouraged, but allowed\n\n\ndef is_text_character(codepoint: int) -> bool:\n    \"\"\"Returns whether the given codepoint is a valid text character.\"\"\"\n    if codepoint < 0x80:\n        return ALLOWED_ASCII_CHARS[codepoint]\n    if codepoint < 0xD800:\n        return True\n    if codepoint <= 0xDFFF:\n        return False\n    if codepoint < 0xFDD0:\n        return True\n    if codepoint <= 0xFDEF:\n        return False\n    if codepoint >= 0x10FFFE:\n        return False\n    return (codepoint & 0xFFFF) < 0xFFFE\n\n\ndef replace_invalid_doc_id_characters(text: str) -> str:\n    \"\"\"Replaces invalid document ID characters in text.\n    NOTE: this must be called at the start of every vespa-related operation or else we\n    risk discrepancies -> silent failures on deletion/update/insertion.\"\"\"\n    # There may be a more complete set of replacements that need to be made but Vespa docs are unclear\n    # and users only seem to be running into this error with single quotes\n    return text.replace(\"'\", \"_\")\n\n\ndef get_vespa_http_client(\n    no_timeout: bool = False, http2: bool = True, timeout: int | None = None\n) -> httpx.Client:\n    \"\"\"\n    Configures and returns an HTTP client for communicating with Vespa,\n    including authentication if needed.\n    \"\"\"\n    return httpx.Client(\n        cert=(\n            cast(tuple[str, str], (VESPA_CLOUD_CERT_PATH, VESPA_CLOUD_KEY_PATH))\n            if MANAGED_VESPA\n            else None\n        ),\n        verify=False if not MANAGED_VESPA else True,\n        timeout=None if no_timeout else (timeout or VESPA_REQUEST_TIMEOUT),\n        http2=http2,\n    )\n\n\ndef wait_for_vespa_with_timeout(wait_interval: int = 5, wait_limit: int = 60) -> bool:\n    \"\"\"Waits for Vespa to become ready subject to a timeout.\n    Returns True if Vespa is ready, False otherwise.\"\"\"\n\n    time_start = time.monotonic()\n    logger.info(\"Vespa: Readiness probe starting.\")\n    while True:\n        url = f\"{VESPA_APP_CONTAINER_URL}/state/v1/health\"\n        try:\n            client = get_vespa_http_client()\n            response = client.get(url)\n            response.raise_for_status()\n\n            response_dict = response.json()\n            if response_dict[\"status\"][\"code\"] == \"up\":\n                logger.info(\"Vespa: Readiness probe succeeded. Continuing...\")\n                return True\n        except Exception as e:\n            logger.warning(\n                f\"Vespa: Readiness probe failed trying to connect to {url}. Exception: {e}\"\n            )\n\n        time_elapsed = time.monotonic() - time_start\n        if time_elapsed > wait_limit:\n            logger.info(\n                f\"Vespa: Readiness probe did not succeed within the timeout ({wait_limit} seconds).\"\n            )\n            return False\n\n        logger.info(\n            f\"Vespa: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit:.1f}\"\n        )\n\n        time.sleep(wait_interval)\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom onyx.configs.constants import INDEX_SEPARATOR\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_SETS\nfrom onyx.document_index.vespa_constants import HIDDEN\nfrom onyx.document_index.vespa_constants import METADATA_LIST\nfrom onyx.document_index.vespa_constants import PERSONAS\nfrom onyx.document_index.vespa_constants import SOURCE_TYPE\nfrom onyx.document_index.vespa_constants import TENANT_ID\nfrom onyx.document_index.vespa_constants import USER_PROJECT\nfrom onyx.kg.utils.formatting_utils import split_relationship_id\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef build_tenant_id_filter(tenant_id: str) -> str:\n    return f'({TENANT_ID} contains \"{tenant_id}\")'\n\n\ndef build_vespa_filters(\n    filters: IndexFilters,\n    *,\n    include_hidden: bool = False,\n    remove_trailing_and: bool = False,  # Set to True when using as a complete Vespa query\n) -> str:\n    def _build_or_filters(key: str, vals: list[str] | None) -> str:\n        \"\"\"For string-based 'contains' filters, e.g. WSET fields or array<string> fields.\n        Returns a bare clause like '(key contains \"v1\" or key contains \"v2\")' or \"\".\"\"\"\n        if not key or not vals:\n            return \"\"\n        eq_elems = [f'{key} contains \"{val}\"' for val in vals if val]\n        if not eq_elems:\n            return \"\"\n        return f\"({' or '.join(eq_elems)})\"\n\n    def _build_weighted_set_filter(key: str, vals: list[str] | None) -> str:\n        \"\"\"Build a Vespa weightedSet filter for large value lists.\n\n        Uses Vespa's native weightedSet() operator instead of OR-chained\n        'contains' clauses.  This is critical for fields like\n        access_control_list where a single user may have tens of thousands\n        of ACL entries — OR clauses at that scale cause Vespa to reject\n        the query with HTTP 400.\"\"\"\n        if not key or not vals:\n            return \"\"\n        filtered = [val for val in vals if val]\n        if not filtered:\n            return \"\"\n        items = \", \".join(f'\"{val}\":1' for val in filtered)\n        return f\"weightedSet({key}, {{{items}}})\"\n\n    def _build_int_or_filters(key: str, vals: list[int] | None) -> str:\n        \"\"\"For an integer field filter.\n        Returns a bare clause or \"\".\"\"\"\n        if vals is None or not vals:\n            return \"\"\n        eq_elems = [f\"{key} = {val}\" for val in vals]\n        return f\"({' or '.join(eq_elems)})\"\n\n    def _build_kg_filter(\n        kg_entities: list[str] | None,\n        kg_relationships: list[str] | None,\n        kg_terms: list[str] | None,\n    ) -> str:\n        if not kg_entities and not kg_relationships and not kg_terms:\n            return \"\"\n\n        combined_filter_parts = []\n\n        def _build_kge(entity: str) -> str:\n            GENERAL = \"::*\"\n            if entity.endswith(GENERAL):\n                return f'({{prefix: true}}\"{entity.split(GENERAL, 1)[0]}\")'\n            else:\n                return f'\"{entity}\"'\n\n        if kg_entities:\n            filter_parts = []\n            for kg_entity in kg_entities:\n                filter_parts.append(f\"(kg_entities contains {_build_kge(kg_entity)})\")\n            combined_filter_parts.append(f\"({' or '.join(filter_parts)})\")\n\n        # TODO: handle complex nested relationship logic (e.g., A participated, and B or C participated)\n        if kg_relationships:\n            filter_parts = []\n            for kg_relationship in kg_relationships:\n                source, rel_type, target = split_relationship_id(kg_relationship)\n                filter_parts.append(\n                    \"(kg_relationships contains sameElement(\"\n                    f\"source contains {_build_kge(source)},\"\n                    f'rel_type contains \"{rel_type}\",'\n                    f\"target contains {_build_kge(target)}))\"\n                )\n            combined_filter_parts.append(f\"{' and '.join(filter_parts)}\")\n\n        # TODO: remove kg terms entirely from prompts and codebase\n\n        return f\"({' and '.join(combined_filter_parts)})\"\n\n    def _build_kg_source_filters(\n        kg_sources: list[str] | None,\n    ) -> str:\n        if not kg_sources:\n            return \"\"\n\n        source_phrases = [f'{DOCUMENT_ID} contains \"{source}\"' for source in kg_sources]\n        return f\"({' or '.join(source_phrases)})\"\n\n    def _build_kg_chunk_id_zero_only_filter(\n        kg_chunk_id_zero_only: bool,\n    ) -> str:\n        if not kg_chunk_id_zero_only:\n            return \"\"\n        return \"(chunk_id = 0)\"\n\n    def _build_time_filter(\n        cutoff: datetime | None,\n        untimed_doc_cutoff: timedelta = timedelta(days=92),\n    ) -> str:\n        if not cutoff:\n            return \"\"\n        include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff\n        cutoff_secs = int(cutoff.timestamp())\n\n        if include_untimed:\n            return f\"!({DOC_UPDATED_AT} < {cutoff_secs})\"\n        return f\"({DOC_UPDATED_AT} >= {cutoff_secs})\"\n\n    def _build_user_project_filter(\n        project_id: int | None,\n    ) -> str:\n        if project_id is None:\n            return \"\"\n        try:\n            pid = int(project_id)\n        except Exception:\n            return \"\"\n        return f'({USER_PROJECT} contains \"{pid}\")'\n\n    def _build_persona_filter(\n        persona_id: int | None,\n    ) -> str:\n        if persona_id is None:\n            return \"\"\n        try:\n            pid = int(persona_id)\n        except Exception:\n            logger.warning(f\"Invalid persona ID: {persona_id}\")\n            return \"\"\n        return f'({PERSONAS} contains \"{pid}\")'\n\n    def _append(parts: list[str], clause: str) -> None:\n        if clause:\n            parts.append(clause)\n\n    # Collect all top-level filter clauses, then join with \" and \" at the end.\n    filter_parts: list[str] = []\n\n    if not include_hidden:\n        filter_parts.append(f\"!({HIDDEN}=true)\")\n\n    # TODO: add error condition if MULTI_TENANT and no tenant_id filter is set\n    if filters.tenant_id and MULTI_TENANT:\n        filter_parts.append(build_tenant_id_filter(filters.tenant_id))\n\n    # ACL filters — use weightedSet for efficient matching against the\n    # access_control_list weightedset<string> field.  OR-chaining thousands\n    # of 'contains' clauses causes Vespa to reject the query (HTTP 400)\n    # for users with large numbers of external permission groups.\n    if filters.access_control_list is not None:\n        _append(\n            filter_parts,\n            _build_weighted_set_filter(\n                ACCESS_CONTROL_LIST, filters.access_control_list\n            ),\n        )\n\n    # Source type filters\n    source_strs = (\n        [s.value for s in filters.source_type] if filters.source_type else None\n    )\n    _append(filter_parts, _build_or_filters(SOURCE_TYPE, source_strs))\n\n    # Tag filters\n    tag_attributes = None\n    if filters.tags:\n        tag_attributes = [\n            f\"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}\" for tag in filters.tags\n        ]\n    _append(filter_parts, _build_or_filters(METADATA_LIST, tag_attributes))\n\n    # Knowledge scope: explicit knowledge attachments restrict what an\n    # assistant can see.  When none are set, the assistant can see\n    # everything.\n    #\n    # persona_id_filter is a primary trigger — a persona with user files IS\n    # explicit knowledge, so it can start a knowledge scope on its own.\n    #\n    # project_id_filter is additive — it widens the scope to also cover\n    # overflowing project files but never restricts on its own (a chat\n    # inside a project should still search team knowledge).\n    knowledge_scope_parts: list[str] = []\n\n    _append(\n        knowledge_scope_parts, _build_or_filters(DOCUMENT_SETS, filters.document_set)\n    )\n    _append(knowledge_scope_parts, _build_persona_filter(filters.persona_id_filter))\n\n    # project_id_filter only widens an existing scope.\n    if knowledge_scope_parts:\n        _append(\n            knowledge_scope_parts,\n            _build_user_project_filter(filters.project_id_filter),\n        )\n\n    if len(knowledge_scope_parts) > 1:\n        filter_parts.append(\"(\" + \" or \".join(knowledge_scope_parts) + \")\")\n    elif len(knowledge_scope_parts) == 1:\n        filter_parts.append(knowledge_scope_parts[0])\n\n    # Time filter\n    _append(filter_parts, _build_time_filter(filters.time_cutoff))\n\n    # # Knowledge Graph Filters\n    # _append(filter_parts, _build_kg_filter(\n    #     kg_entities=filters.kg_entities,\n    #     kg_relationships=filters.kg_relationships,\n    #     kg_terms=filters.kg_terms,\n    # ))\n\n    # _append(filter_parts, _build_kg_source_filters(filters.kg_sources))\n\n    # _append(filter_parts, _build_kg_chunk_id_zero_only_filter(\n    #     filters.kg_chunk_id_zero_only or False\n    # ))\n\n    filter_str = \" and \".join(filter_parts)\n\n    if filter_str and not remove_trailing_and:\n        filter_str += \" and \"\n\n    return filter_str\n\n\ndef build_vespa_id_based_retrieval_yql(\n    chunk_request: VespaChunkRequest,\n) -> str:\n    id_based_retrieval_yql_section = (\n        f'({DOCUMENT_ID} contains \"{chunk_request.document_id}\"'\n    )\n\n    if chunk_request.is_capped:\n        id_based_retrieval_yql_section += (\n            f\" and {CHUNK_ID} >= {chunk_request.min_chunk_ind or 0}\"\n        )\n        id_based_retrieval_yql_section += (\n            f\" and {CHUNK_ID} <= {chunk_request.max_chunk_ind}\"\n        )\n\n    id_based_retrieval_yql_section += \")\"\n    return id_based_retrieval_yql_section\n"
  },
  {
    "path": "backend/onyx/document_index/vespa/vespa_document_index.py",
    "content": "import concurrent.futures\nimport logging\nimport random\nfrom collections.abc import Generator\nfrom collections.abc import Iterable\nfrom typing import Any\nfrom uuid import UUID\n\nimport httpx\nfrom pydantic import BaseModel\nfrom retry import retry\n\nfrom onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH\nfrom onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER\nfrom onyx.configs.app_configs import RERANK_COUNT\nfrom onyx.configs.chat_configs import DOC_TIME_DECAY\nfrom onyx.configs.chat_configs import HYBRID_ALPHA\nfrom onyx.configs.chat_configs import TITLE_CONTENT_RATIO\nfrom onyx.context.search.enums import QueryType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks\nfrom onyx.document_index.document_index_utils import get_document_chunk_ids\nfrom onyx.document_index.document_index_utils import get_uuid_from_chunk_info\nfrom onyx.document_index.interfaces import EnrichedDocumentIndexingInfo\nfrom onyx.document_index.interfaces import MinimalDocumentIndexingInfo\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces_new import DocumentIndex\nfrom onyx.document_index.interfaces_new import DocumentInsertionRecord\nfrom onyx.document_index.interfaces_new import DocumentSectionRequest\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.interfaces_new import MetadataUpdateRequest\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.vespa.chunk_retrieval import batch_search_api_retrieval\nfrom onyx.document_index.vespa.chunk_retrieval import get_all_chunks_paginated\nfrom onyx.document_index.vespa.chunk_retrieval import get_chunks_via_visit_api\nfrom onyx.document_index.vespa.chunk_retrieval import (\n    parallel_visit_api_retrieval,\n)\nfrom onyx.document_index.vespa.chunk_retrieval import query_vespa\nfrom onyx.document_index.vespa.deletion import delete_vespa_chunks\nfrom onyx.document_index.vespa.indexing_utils import BaseHTTPXClientContext\nfrom onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks\nfrom onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence\nfrom onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy\nfrom onyx.document_index.vespa.indexing_utils import GlobalHTTPXClientContext\nfrom onyx.document_index.vespa.indexing_utils import TemporaryHTTPXClientContext\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.shared_utils.utils import (\n    replace_invalid_doc_id_characters,\n)\nfrom onyx.document_index.vespa.shared_utils.vespa_request_builders import (\n    build_vespa_filters,\n)\nfrom onyx.document_index.vespa_constants import BATCH_SIZE\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import CONTENT_SUMMARY\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa_constants import NUM_THREADS\nfrom onyx.document_index.vespa_constants import SEARCH_ENDPOINT\nfrom onyx.document_index.vespa_constants import VESPA_TIMEOUT\nfrom onyx.document_index.vespa_constants import YQL_BASE\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.tools.tool_implementations.search.constants import KEYWORD_QUERY_HYBRID_ALPHA\nfrom onyx.utils.batching import batch_generator\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.model_server_models import Embedding\n\n\nlogger = setup_logger(__name__)\n# Set the logging level to WARNING to ignore INFO and DEBUG logs from httpx. By\n# default it emits INFO-level logs for every request.\nhttpx_logger = logging.getLogger(\"httpx\")\nhttpx_logger.setLevel(logging.WARNING)\n\n\ndef _enrich_basic_chunk_info(\n    index_name: str,\n    http_client: httpx.Client,\n    document_id: str,\n    previous_chunk_count: int | None,\n    new_chunk_count: int,\n) -> EnrichedDocumentIndexingInfo:\n    \"\"\"Determines which chunks need to be deleted during document reindexing.\n\n    When a document is reindexed, it may have fewer chunks than before. This\n    function identifies the range of old chunks that need to be deleted by\n    comparing the new chunk count with the previous chunk count.\n\n    Example:\n        If a document previously had 10 chunks (0-9) and now has 7 chunks (0-6),\n        this function identifies that chunks 7-9 need to be deleted.\n\n    Args:\n        index_name: The Vespa index/schema name.\n        http_client: HTTP client for making requests to Vespa.\n        document_id: The Vespa-sanitized ID of the document being reindexed.\n        previous_chunk_count: The total number of chunks the document had before\n            reindexing. None for documents using the legacy chunk ID system.\n        new_chunk_count: The total number of chunks the document has after\n            reindexing. This becomes the starting index for deletion since\n            chunks are 0-indexed.\n\n    Returns:\n        EnrichedDocumentIndexingInfo with chunk_start_index set to\n        new_chunk_count (where deletion begins) and chunk_end_index set to\n        previous_chunk_count (where deletion ends).\n    \"\"\"\n    # Technically last indexed chunk index +1.\n    last_indexed_chunk = previous_chunk_count\n    # If the document has no `chunk_count` in the database, we know that it\n    # has the old chunk ID system and we must check for the final chunk index.\n    is_old_version = False\n    if last_indexed_chunk is None:\n        is_old_version = True\n        minimal_doc_info = MinimalDocumentIndexingInfo(\n            doc_id=document_id, chunk_start_index=new_chunk_count\n        )\n        last_indexed_chunk = check_for_final_chunk_existence(\n            minimal_doc_info=minimal_doc_info,\n            start_index=new_chunk_count,\n            index_name=index_name,\n            http_client=http_client,\n        )\n\n    assert (\n        last_indexed_chunk is not None and last_indexed_chunk >= 0\n    ), f\"Bug: Last indexed chunk index is None or less than 0 for document: {document_id}.\"\n\n    enriched_doc_info = EnrichedDocumentIndexingInfo(\n        doc_id=document_id,\n        chunk_start_index=new_chunk_count,\n        chunk_end_index=last_indexed_chunk,\n        old_version=is_old_version,\n    )\n    return enriched_doc_info\n\n\n@retry(\n    tries=3,\n    delay=1,\n    backoff=2,\n    exceptions=httpx.HTTPError,\n)\ndef _update_single_chunk(\n    doc_chunk_id: UUID,\n    index_name: str,\n    doc_id: str,\n    http_client: httpx.Client,\n    update_request: MetadataUpdateRequest,\n) -> None:\n    \"\"\"Updates a single document chunk in Vespa.\n\n    TODO(andrei): Couldn't this be batched?\n\n    Args:\n        doc_chunk_id: The ID of the chunk to update.\n        index_name: The index the chunk belongs to.\n        doc_id: The ID of the document the chunk belongs to. Used only for\n            logging.\n        http_client: The HTTP client to use to make the request.\n        update_request: Metadata update request object received in the bulk\n            update method containing fields to update.\n    \"\"\"\n\n    class _Boost(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: float\n\n    class _DocumentSets(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: dict[str, int]\n\n    class _AccessControl(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: dict[str, int]\n\n    class _Hidden(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: bool\n\n    class _UserProjects(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: list[int]\n\n    class _Personas(BaseModel):\n        model_config = {\"frozen\": True}\n        assign: list[int]\n\n    class _VespaPutFields(BaseModel):\n        model_config = {\"frozen\": True}\n        # The names of these fields are based the Vespa schema. Changes to the\n        # schema require changes here. These names were originally found in\n        # backend/onyx/document_index/vespa_constants.py.\n        boost: _Boost | None = None\n        document_sets: _DocumentSets | None = None\n        access_control_list: _AccessControl | None = None\n        hidden: _Hidden | None = None\n        user_project: _UserProjects | None = None\n        personas: _Personas | None = None\n\n    class _VespaPutRequest(BaseModel):\n        model_config = {\"frozen\": True}\n        fields: _VespaPutFields\n\n    boost_update: _Boost | None = (\n        _Boost(assign=update_request.boost)\n        if update_request.boost is not None\n        else None\n    )\n    document_sets_update: _DocumentSets | None = (\n        _DocumentSets(\n            assign={document_set: 1 for document_set in update_request.document_sets}\n        )\n        if update_request.document_sets is not None\n        else None\n    )\n    access_update: _AccessControl | None = (\n        _AccessControl(\n            assign={acl_entry: 1 for acl_entry in update_request.access.to_acl()}\n        )\n        if update_request.access is not None\n        else None\n    )\n    hidden_update: _Hidden | None = (\n        _Hidden(assign=update_request.hidden)\n        if update_request.hidden is not None\n        else None\n    )\n    user_projects_update: _UserProjects | None = (\n        _UserProjects(assign=list(update_request.project_ids))\n        if update_request.project_ids is not None\n        else None\n    )\n    personas_update: _Personas | None = (\n        _Personas(assign=list(update_request.persona_ids))\n        if update_request.persona_ids is not None\n        else None\n    )\n\n    vespa_put_fields = _VespaPutFields(\n        boost=boost_update,\n        document_sets=document_sets_update,\n        access_control_list=access_update,\n        hidden=hidden_update,\n        user_project=user_projects_update,\n        personas=personas_update,\n    )\n\n    vespa_put_request = _VespaPutRequest(\n        fields=vespa_put_fields,\n    )\n\n    vespa_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true\"\n\n    try:\n        resp = http_client.put(\n            vespa_url,\n            headers={\"Content-Type\": \"application/json\"},\n            json=vespa_put_request.model_dump(\n                exclude_none=True\n            ),  # NOTE: Important to not produce null fields in the json.\n        )\n        resp.raise_for_status()\n    except httpx.HTTPStatusError as e:\n        logger.error(\n            f\"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). \"\n            f\"Code: {e.response.status_code}. Details: {e.response.text}\"\n        )\n        # Re-raise so the @retry decorator will catch and retry, unless the\n        # status code is < 5xx, in which case wrap the exception in something\n        # other than an HTTPError to skip retries.\n        if e.response.status_code >= 500:\n            raise\n        raise RuntimeError(\n            f\"Non-retryable error updating chunk {doc_chunk_id}: {e}\"\n        ) from e\n\n\nclass VespaDocumentIndex(DocumentIndex):\n    \"\"\"Vespa-specific implementation of the DocumentIndex interface.\n\n    This class provides document indexing, retrieval, and management operations\n    for a Vespa search engine instance. It handles the complete lifecycle of\n    document chunks within a specific Vespa index/schema.\n    \"\"\"\n\n    def __init__(\n        self,\n        index_name: str,\n        tenant_state: TenantState,\n        large_chunks_enabled: bool,\n        httpx_client: httpx.Client | None = None,\n    ) -> None:\n        self._index_name = index_name\n        self._tenant_id = tenant_state.tenant_id\n        self._large_chunks_enabled = large_chunks_enabled\n        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This\n        # is beneficial for indexing / updates / deletes since we have to make a\n        # large volume of requests.\n        self._httpx_client_context: BaseHTTPXClientContext\n        if httpx_client:\n            # Use the provided client. Because this client is presumed global,\n            # it does not close after exiting a context manager.\n            self._httpx_client_context = GlobalHTTPXClientContext(httpx_client)\n        else:\n            # We did not receive a client, so create one what will close after\n            # exiting a context manager.\n            self._httpx_client_context = TemporaryHTTPXClientContext(\n                get_vespa_http_client\n            )\n        self._multitenant = tenant_state.multitenant\n\n    def verify_and_create_index_if_necessary(\n        self, embedding_dim: int, embedding_precision: EmbeddingPrecision\n    ) -> None:\n        raise NotImplementedError\n\n    def index(\n        self,\n        chunks: Iterable[DocMetadataAwareIndexChunk],\n        indexing_metadata: IndexingMetadata,\n    ) -> list[DocumentInsertionRecord]:\n        doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff\n        doc_id_to_previous_chunk_cnt = {\n            doc_id: chunk_cnt_diff.old_chunk_cnt\n            for doc_id, chunk_cnt_diff in doc_id_to_chunk_cnt_diff.items()\n        }\n        doc_id_to_new_chunk_cnt = {\n            doc_id: chunk_cnt_diff.new_chunk_cnt\n            for doc_id, chunk_cnt_diff in doc_id_to_chunk_cnt_diff.items()\n        }\n        assert (\n            len(doc_id_to_chunk_cnt_diff)\n            == len(doc_id_to_previous_chunk_cnt)\n            == len(doc_id_to_new_chunk_cnt)\n        ), \"Bug: Doc ID to chunk maps have different lengths.\"\n\n        # Vespa has restrictions on valid characters, yet document IDs come from\n        # external w.r.t. this class. We need to sanitize them.\n        #\n        # Instead of materializing all cleaned chunks upfront, we stream them\n        # through a generator that cleans IDs and builds the original-ID mapping\n        # incrementally as chunks flow into Vespa.\n        def _clean_and_track(\n            chunks_iter: Iterable[DocMetadataAwareIndexChunk],\n            id_map: dict[str, str],\n            seen_ids: set[str],\n        ) -> Generator[DocMetadataAwareIndexChunk, None, None]:\n            \"\"\"Cleans chunk IDs and builds the original-ID mapping\n            incrementally as chunks flow through, avoiding a separate\n            materialization pass.\"\"\"\n            for chunk in chunks_iter:\n                original_id = chunk.source_document.id\n                cleaned = clean_chunk_id_copy(chunk)\n                cleaned_id = cleaned.source_document.id\n                # Needed so the final DocumentInsertionRecord returned can have\n                # the original document ID. cleaned_chunks might not contain IDs\n                # exactly as callers supplied them.\n                id_map[cleaned_id] = original_id\n                seen_ids.add(cleaned_id)\n                yield cleaned\n\n        new_document_id_to_original_document_id: dict[str, str] = {}\n        all_cleaned_doc_ids: set[str] = set()\n\n        existing_docs: set[str] = set()\n\n        with (\n            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,\n            self._httpx_client_context as http_client,\n        ):\n            # We require the start and end index for each document in order to\n            # know precisely which chunks to delete. This information exists for\n            # documents that have `chunk_count` in the database, but not for\n            # `old_version` documents.\n            enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = [\n                _enrich_basic_chunk_info(\n                    index_name=self._index_name,\n                    http_client=http_client,\n                    document_id=doc_id,\n                    previous_chunk_count=doc_id_to_previous_chunk_cnt[doc_id],\n                    new_chunk_count=doc_id_to_new_chunk_cnt[doc_id],\n                )\n                for doc_id in doc_id_to_chunk_cnt_diff.keys()\n                # TODO(andrei), WARNING: Don't we need to sanitize these doc IDs?\n            ]\n\n            for enriched_doc_info in enriched_doc_infos:\n                # If the document has previously indexed chunks, we know it\n                # previously existed and this is a reindex.\n                if enriched_doc_info.chunk_end_index:\n                    existing_docs.add(enriched_doc_info.doc_id)\n\n            # Now, for each doc, we know exactly where to start and end our\n            # deletion. So let's generate the chunk IDs for each chunk to\n            # delete.\n            # WARNING: This code seems to use\n            # indexing_metadata.doc_id_to_chunk_cnt_diff as the source of truth\n            # for which chunks to delete. This implies that the onus is on the\n            # caller to ensure doc_id_to_chunk_cnt_diff only contains docs\n            # relevant to the chunks argument to this method. This should not be\n            # the contract of DocumentIndex; and this code is only a refactor\n            # from old code. It would seem we should use all_cleaned_doc_ids as\n            # the source of truth.\n            chunks_to_delete = get_document_chunk_ids(\n                enriched_document_info_list=enriched_doc_infos,\n                tenant_id=self._tenant_id,\n                large_chunks_enabled=self._large_chunks_enabled,\n            )\n\n            # Delete old Vespa documents.\n            for doc_chunk_ids_batch in batch_generator(chunks_to_delete, BATCH_SIZE):\n                delete_vespa_chunks(\n                    doc_chunk_ids=doc_chunk_ids_batch,\n                    index_name=self._index_name,\n                    http_client=http_client,\n                    executor=executor,\n                )\n\n            # Insert new Vespa documents, streaming through the cleaning\n            # pipeline so chunks are never fully materialized.\n            cleaned_chunks = _clean_and_track(\n                chunks,\n                new_document_id_to_original_document_id,\n                all_cleaned_doc_ids,\n            )\n            for chunk_batch in batch_generator(\n                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)\n            ):\n                batch_index_vespa_chunks(\n                    chunks=chunk_batch,\n                    index_name=self._index_name,\n                    http_client=http_client,\n                    multitenant=self._multitenant,\n                    executor=executor,\n                )\n\n        return [\n            DocumentInsertionRecord(\n                document_id=new_document_id_to_original_document_id[cleaned_doc_id],\n                already_existed=cleaned_doc_id in existing_docs,\n            )\n            for cleaned_doc_id in all_cleaned_doc_ids\n        ]\n\n    def delete(self, document_id: str, chunk_count: int | None = None) -> int:\n        total_chunks_deleted = 0\n\n        sanitized_doc_id = replace_invalid_doc_id_characters(document_id)\n\n        with (\n            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,\n            self._httpx_client_context as http_client,\n        ):\n            enriched_doc_info = _enrich_basic_chunk_info(\n                index_name=self._index_name,\n                http_client=http_client,\n                document_id=sanitized_doc_id,\n                previous_chunk_count=chunk_count,\n                new_chunk_count=0,\n            )\n            chunks_to_delete = get_document_chunk_ids(\n                enriched_document_info_list=[enriched_doc_info],\n                tenant_id=self._tenant_id,\n                large_chunks_enabled=self._large_chunks_enabled,\n            )\n\n            for doc_chunk_ids_batch in batch_generator(chunks_to_delete, BATCH_SIZE):\n                total_chunks_deleted += len(doc_chunk_ids_batch)\n                delete_vespa_chunks(\n                    doc_chunk_ids=doc_chunk_ids_batch,\n                    index_name=self._index_name,\n                    http_client=http_client,\n                    executor=executor,\n                )\n\n        return total_chunks_deleted\n\n    def update(\n        self,\n        update_requests: list[MetadataUpdateRequest],\n    ) -> None:\n        # WARNING: This method can be called by vespa_metadata_sync_task, which\n        # is kicked off by check_for_vespa_sync_task, notably before a document\n        # has finished indexing. In this way, chunk_count below could be unknown\n        # even for chunks not on the \"old\" chunk ID system; i.e. there could be\n        # a race condition. Passing in None to _enrich_basic_chunk_info should\n        # handle this, but a higher level TODO might be to not run update at all\n        # on connectors that are still indexing, and therefore do not yet have a\n        # chunk count because update_docs_chunk_count__no_commit has not been\n        # run yet.\n        with self._httpx_client_context as httpx_client:\n            # Each invocation of this method can contain multiple update requests.\n            for update_request in update_requests:\n                # Each update request can correspond to multiple documents.\n                for doc_id in update_request.document_ids:\n                    # NOTE: -1 represents an unknown chunk count.\n                    chunk_count = update_request.doc_id_to_chunk_cnt[doc_id]\n                    sanitized_doc_id = replace_invalid_doc_id_characters(doc_id)\n                    enriched_doc_info = _enrich_basic_chunk_info(\n                        index_name=self._index_name,\n                        http_client=httpx_client,\n                        document_id=sanitized_doc_id,\n                        previous_chunk_count=chunk_count if chunk_count >= 0 else None,\n                        new_chunk_count=0,  # WARNING: This semantically makes no sense and is misusing this function.\n                    )\n\n                    doc_chunk_ids = get_document_chunk_ids(\n                        enriched_document_info_list=[enriched_doc_info],\n                        tenant_id=self._tenant_id,\n                        large_chunks_enabled=self._large_chunks_enabled,\n                    )\n\n                    for doc_chunk_id in doc_chunk_ids:\n                        _update_single_chunk(\n                            doc_chunk_id,\n                            self._index_name,\n                            # NOTE: Used only for logging, raw ID is ok here.\n                            doc_id,\n                            httpx_client,\n                            update_request,\n                        )\n\n                    logger.info(\n                        f\"Updated {len(doc_chunk_ids)} chunks for document {doc_id}.\"\n                    )\n\n    def id_based_retrieval(\n        self,\n        chunk_requests: list[DocumentSectionRequest],\n        filters: IndexFilters,\n        batch_retrieval: bool = False,\n    ) -> list[InferenceChunk]:\n        sanitized_chunk_requests = [\n            VespaChunkRequest(\n                document_id=replace_invalid_doc_id_characters(\n                    chunk_request.document_id\n                ),\n                min_chunk_ind=chunk_request.min_chunk_ind,\n                max_chunk_ind=chunk_request.max_chunk_ind,\n            )\n            for chunk_request in chunk_requests\n        ]\n\n        if batch_retrieval:\n            return cleanup_content_for_chunks(\n                batch_search_api_retrieval(\n                    index_name=self._index_name,\n                    chunk_requests=sanitized_chunk_requests,\n                    filters=filters,\n                    # No one was passing in this parameter in the legacy\n                    # interface, it always defaulted to False.\n                    get_large_chunks=False,\n                )\n            )\n        return cleanup_content_for_chunks(\n            parallel_visit_api_retrieval(\n                index_name=self._index_name,\n                chunk_requests=sanitized_chunk_requests,\n                filters=filters,\n                # No one was passing in this parameter in the legacy interface,\n                # it always defaulted to False.\n                get_large_chunks=False,\n            )\n        )\n\n    def hybrid_retrieval(\n        self,\n        query: str,\n        query_embedding: Embedding,\n        final_keywords: list[str] | None,\n        query_type: QueryType,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        vespa_where_clauses = build_vespa_filters(filters)\n        # Avoid over-fetching a very large candidate set for global-phase reranking.\n        # Keep enough headroom for quality while capping cost on larger indices.\n        target_hits = min(max(4 * num_to_retrieve, 100), RERANK_COUNT)\n\n        yql = (\n            YQL_BASE.format(index_name=self._index_name)\n            + vespa_where_clauses\n            + f\"(({{targetHits: {target_hits}}}nearestNeighbor(embeddings, query_embedding)) \"\n            + f\"or ({{targetHits: {target_hits}}}nearestNeighbor(title_embedding, query_embedding)) \"\n            + 'or ({grammar: \"weakAnd\"}userInput(@query)) '\n            + f'or ({{defaultIndex: \"{CONTENT_SUMMARY}\"}}userInput(@query)))'\n        )\n\n        final_query = \" \".join(final_keywords) if final_keywords else query\n\n        ranking_profile = (\n            f\"hybrid_search_{query_type.value}_base_{len(query_embedding)}\"\n        )\n\n        logger.info(f\"Selected ranking profile: {ranking_profile}\")\n\n        logger.debug(f\"Query YQL: {yql}\")\n\n        # In this interface we do not pass in hybrid alpha. Tracing the codepath\n        # of the legacy Vespa interface, it so happens that KEYWORD always\n        # corresponds to an alpha of 0.2 (from KEYWORD_QUERY_HYBRID_ALPHA), and\n        # SEMANTIC to 0.5 (from HYBRID_ALPHA). HYBRID_ALPHA_KEYWORD was only\n        # used in dead code so we do not use it here.\n        hybrid_alpha = (\n            KEYWORD_QUERY_HYBRID_ALPHA\n            if query_type == QueryType.KEYWORD\n            else HYBRID_ALPHA\n        )\n\n        params: dict[str, str | int | float] = {\n            \"yql\": yql,\n            \"query\": final_query,\n            \"input.query(query_embedding)\": str(query_embedding),\n            \"input.query(decay_factor)\": str(DOC_TIME_DECAY * RECENCY_BIAS_MULTIPLIER),\n            \"input.query(alpha)\": hybrid_alpha,\n            \"input.query(title_content_ratio)\": TITLE_CONTENT_RATIO,\n            \"hits\": num_to_retrieve,\n            \"ranking.profile\": ranking_profile,\n            \"timeout\": VESPA_TIMEOUT,\n        }\n\n        return cleanup_content_for_chunks(query_vespa(params))\n\n    def keyword_retrieval(\n        self,\n        query: str,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        raise NotImplementedError\n\n    def semantic_retrieval(\n        self,\n        query_embedding: Embedding,\n        filters: IndexFilters,\n        num_to_retrieve: int,\n    ) -> list[InferenceChunk]:\n        raise NotImplementedError\n\n    def random_retrieval(\n        self,\n        filters: IndexFilters,\n        num_to_retrieve: int = 100,\n        dirty: bool | None = None,  # noqa: ARG002\n    ) -> list[InferenceChunk]:\n        vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)\n\n        yql = YQL_BASE.format(index_name=self._index_name) + vespa_where_clauses\n\n        random_seed = random.randint(0, 1_000_000)\n\n        params: dict[str, str | int | float] = {\n            \"yql\": yql,\n            \"hits\": num_to_retrieve,\n            \"timeout\": VESPA_TIMEOUT,\n            \"ranking.profile\": \"random_\",\n            \"ranking.properties.random.seed\": random_seed,\n        }\n\n        return cleanup_content_for_chunks(query_vespa(params))\n\n    def get_raw_document_chunks(self, document_id: str) -> list[dict[str, Any]]:\n        \"\"\"Gets all raw document chunks for a document as returned by Vespa.\n\n        Used in the Vespa migration task.\n\n        Args:\n            document_id: The ID of the document to get chunks for.\n\n        Returns:\n            List of raw document chunks.\n        \"\"\"\n        # Vespa doc IDs are sanitized using replace_invalid_doc_id_characters.\n        sanitized_document_id = replace_invalid_doc_id_characters(document_id)\n        chunk_request = VespaChunkRequest(document_id=sanitized_document_id)\n        raw_chunks = get_chunks_via_visit_api(\n            chunk_request=chunk_request,\n            index_name=self._index_name,\n            filters=IndexFilters(access_control_list=None, tenant_id=self._tenant_id),\n            get_large_chunks=False,\n            short_tensor_format=True,\n        )\n        # Vespa returns other metadata around the actual document chunk. The raw\n        # chunk we're interested in is in the \"fields\" field.\n        raw_document_chunks = [chunk[\"fields\"] for chunk in raw_chunks]\n        return raw_document_chunks\n\n    def get_all_raw_document_chunks_paginated(\n        self,\n        continuation_token_map: dict[int, str | None],\n        page_size: int,\n    ) -> tuple[list[dict[str, Any]], dict[int, str | None]]:\n        \"\"\"Gets all the chunks in Vespa, paginated.\n\n        Used in the chunk-level Vespa-to-OpenSearch migration task.\n\n        Args:\n            continuation_token: Token returned by Vespa representing a page\n                offset. None to start from the beginning. Defaults to None.\n            page_size: Best-effort batch size for the visit.\n\n        Returns:\n            Tuple of (list of chunk dicts, next continuation token or None). The\n                continuation token is None when the visit is complete.\n        \"\"\"\n        raw_chunks, next_continuation_token_map = get_all_chunks_paginated(\n            index_name=self._index_name,\n            tenant_state=TenantState(\n                tenant_id=self._tenant_id, multitenant=MULTI_TENANT\n            ),\n            continuation_token_map=continuation_token_map,\n            page_size=page_size,\n        )\n        return raw_chunks, next_continuation_token_map\n\n    def index_raw_chunks(self, chunks: list[dict[str, Any]]) -> None:\n        \"\"\"Indexes raw document chunks into Vespa.\n\n        To only be used in tests. Not for production.\n        \"\"\"\n        json_header = {\n            \"Content-Type\": \"application/json\",\n        }\n        with self._httpx_client_context as http_client:\n            for chunk in chunks:\n                chunk_id = str(\n                    get_uuid_from_chunk_info(\n                        document_id=chunk[DOCUMENT_ID],\n                        chunk_id=chunk[CHUNK_ID],\n                        tenant_id=self._tenant_id,\n                    )\n                )\n                vespa_url = f\"{DOCUMENT_ID_ENDPOINT.format(index_name=self._index_name)}/{chunk_id}\"\n                response = http_client.post(\n                    vespa_url,\n                    headers=json_header,\n                    json={\"fields\": chunk},\n                )\n                response.raise_for_status()\n\n    def get_chunk_count(self) -> int:\n        \"\"\"Returns the exact number of document chunks in Vespa for this tenant.\n\n        Uses the Vespa Search API with `limit 0` and `ranking.profile=unranked`\n        to get an exact count without fetching any document data.\n\n        Includes large chunks. There is no way to filter these out using the\n        Search API.\n        \"\"\"\n        where_clause = (\n            f'tenant_id contains \"{self._tenant_id}\"' if self._multitenant else \"true\"\n        )\n        yql = f\"select documentid from {self._index_name} where {where_clause} limit 0\"\n        params: dict[str, str | int] = {\n            \"yql\": yql,\n            \"ranking.profile\": \"unranked\",\n            \"timeout\": VESPA_TIMEOUT,\n        }\n\n        with get_vespa_http_client() as http_client:\n            response = http_client.post(SEARCH_ENDPOINT, json=params)\n            response.raise_for_status()\n            response_data = response.json()\n        return response_data[\"root\"][\"fields\"][\"totalCount\"]\n"
  },
  {
    "path": "backend/onyx/document_index/vespa_constants.py",
    "content": "from onyx.configs.app_configs import VESPA_CLOUD_URL\nfrom onyx.configs.app_configs import VESPA_CONFIG_SERVER_HOST\nfrom onyx.configs.app_configs import VESPA_HOST\nfrom onyx.configs.app_configs import VESPA_PORT\nfrom onyx.configs.app_configs import VESPA_TENANT_PORT\nfrom onyx.configs.constants import SOURCE_TYPE\n\n# config server\n\n\nVESPA_CONFIG_SERVER_URL = (\n    VESPA_CLOUD_URL or f\"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}\"\n)\nVESPA_APPLICATION_ENDPOINT = f\"{VESPA_CONFIG_SERVER_URL}/application/v2\"\n\n# main search application\nVESPA_APP_CONTAINER_URL = VESPA_CLOUD_URL or f\"http://{VESPA_HOST}:{VESPA_PORT}\"\n\n\n# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd.jinja\nDOCUMENT_ID_ENDPOINT = (\n    f\"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid\"\n)\n\n# the default document id endpoint is http://localhost:8080/document/v1/default/danswer_chunk/docid\n\nSEARCH_ENDPOINT = f\"{VESPA_APP_CONTAINER_URL}/search/\"\n\n# Since Vespa doesn't allow batching of inserts / updates, we use threads to\n# parallelize the operations.\nNUM_THREADS = 32\nMAX_ID_SEARCH_QUERY_SIZE = 400\n# Suspect that adding too many \"or\" conditions will cause Vespa to timeout and return\n# an empty list of hits (with no error status and coverage: 0 and degraded)\nMAX_OR_CONDITIONS = 10\n# up from 500ms for now, since we've seen quite a few timeouts\n# in the long term, we are looking to improve the performance of Vespa\n# so that we can bring this back to default\nVESPA_TIMEOUT = \"10s\"\n# The size of the batch to use for batched operations like inserts / updates.\n# The batch will likely be sent to a threadpool of size NUM_THREADS.\nBATCH_SIZE = 128\n\nTENANT_ID = \"tenant_id\"\nDOCUMENT_ID = \"document_id\"\nCHUNK_ID = \"chunk_id\"\nBLURB = \"blurb\"\nCONTENT = \"content\"\nSOURCE_LINKS = \"source_links\"\nSEMANTIC_IDENTIFIER = \"semantic_identifier\"\nTITLE = \"title\"\nSKIP_TITLE_EMBEDDING = \"skip_title\"\nSECTION_CONTINUATION = \"section_continuation\"\nEMBEDDINGS = \"embeddings\"\nTITLE_EMBEDDING = \"title_embedding\"\nACCESS_CONTROL_LIST = \"access_control_list\"\nDOCUMENT_SETS = \"document_sets\"\nUSER_FILE = \"user_file\"\nUSER_FOLDER = \"user_folder\"\nUSER_PROJECT = \"user_project\"\nPERSONAS = \"personas\"\nLARGE_CHUNK_REFERENCE_IDS = \"large_chunk_reference_ids\"\nMETADATA = \"metadata\"\nMETADATA_LIST = \"metadata_list\"\nMETADATA_SUFFIX = \"metadata_suffix\"\nDOC_SUMMARY = \"doc_summary\"\nCHUNK_CONTEXT = \"chunk_context\"\nBOOST = \"boost\"\nAGGREGATED_CHUNK_BOOST_FACTOR = \"aggregated_chunk_boost_factor\"\nDOC_UPDATED_AT = \"doc_updated_at\"  # Indexed as seconds since epoch\nPRIMARY_OWNERS = \"primary_owners\"\nSECONDARY_OWNERS = \"secondary_owners\"\nRECENCY_BIAS = \"recency_bias\"\nHIDDEN = \"hidden\"\n# for legacy reasons, called `name` in Vespa despite it really being an ID\nIMAGE_FILE_NAME = \"image_file_name\"\n\n# Specific to Vespa, needed for highlighting matching keywords / section\nCONTENT_SUMMARY = \"content_summary\"\n\nFULL_CHUNK_EMBEDDING_KEY = \"full_chunk\"\n\n\nYQL_BASE = (\n    f\"select \"\n    f\"documentid, \"\n    f\"{DOCUMENT_ID}, \"\n    f\"{CHUNK_ID}, \"\n    f\"{BLURB}, \"\n    f\"{CONTENT}, \"\n    f\"{SOURCE_TYPE}, \"\n    f\"{SOURCE_LINKS}, \"\n    f\"{SEMANTIC_IDENTIFIER}, \"\n    f\"{TITLE}, \"\n    f\"{SECTION_CONTINUATION}, \"\n    f\"{IMAGE_FILE_NAME}, \"\n    f\"{BOOST}, \"\n    f\"{AGGREGATED_CHUNK_BOOST_FACTOR}, \"\n    f\"{HIDDEN}, \"\n    f\"{DOC_UPDATED_AT}, \"\n    f\"{PRIMARY_OWNERS}, \"\n    f\"{SECONDARY_OWNERS}, \"\n    f\"{LARGE_CHUNK_REFERENCE_IDS}, \"\n    f\"{METADATA}, \"\n    f\"{METADATA_SUFFIX}, \"\n    f\"{DOC_SUMMARY}, \"\n    f\"{CHUNK_CONTEXT}, \"\n    f\"{CONTENT_SUMMARY} \"\n    f\"from {{index_name}} where \"\n)\n"
  },
  {
    "path": "backend/onyx/error_handling/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/error_handling/error_codes.py",
    "content": "\"\"\"\nStandardized error codes for the Onyx backend.\n\nUsage:\n    from onyx.error_handling.error_codes import OnyxErrorCode\n    from onyx.error_handling.exceptions import OnyxError\n\n    raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, \"Token expired\")\n\"\"\"\n\nfrom enum import Enum\n\n\nclass OnyxErrorCode(Enum):\n    \"\"\"\n    Each member is a tuple of (error_code_string, http_status_code).\n\n    The error_code_string is a stable, machine-readable identifier that\n    API consumers can match on. The http_status_code is the default HTTP\n    status to return.\n    \"\"\"\n\n    # ------------------------------------------------------------------\n    # Authentication (401)\n    # ------------------------------------------------------------------\n    UNAUTHENTICATED = (\"UNAUTHENTICATED\", 401)\n    INVALID_TOKEN = (\"INVALID_TOKEN\", 401)\n    TOKEN_EXPIRED = (\"TOKEN_EXPIRED\", 401)\n    CSRF_FAILURE = (\"CSRF_FAILURE\", 403)\n\n    # ------------------------------------------------------------------\n    # Authorization (403)\n    # ------------------------------------------------------------------\n    UNAUTHORIZED = (\"UNAUTHORIZED\", 403)\n    INSUFFICIENT_PERMISSIONS = (\"INSUFFICIENT_PERMISSIONS\", 403)\n    ADMIN_ONLY = (\"ADMIN_ONLY\", 403)\n    EE_REQUIRED = (\"EE_REQUIRED\", 403)\n    SINGLE_TENANT_ONLY = (\"SINGLE_TENANT_ONLY\", 403)\n    ENV_VAR_GATED = (\"ENV_VAR_GATED\", 403)\n\n    # ------------------------------------------------------------------\n    # Validation / Bad Request (400)\n    # ------------------------------------------------------------------\n    VALIDATION_ERROR = (\"VALIDATION_ERROR\", 400)\n    INVALID_INPUT = (\"INVALID_INPUT\", 400)\n    MISSING_REQUIRED_FIELD = (\"MISSING_REQUIRED_FIELD\", 400)\n    QUERY_REJECTED = (\"QUERY_REJECTED\", 400)\n\n    # ------------------------------------------------------------------\n    # Not Found (404)\n    # ------------------------------------------------------------------\n    NOT_FOUND = (\"NOT_FOUND\", 404)\n    CONNECTOR_NOT_FOUND = (\"CONNECTOR_NOT_FOUND\", 404)\n    CREDENTIAL_NOT_FOUND = (\"CREDENTIAL_NOT_FOUND\", 404)\n    PERSONA_NOT_FOUND = (\"PERSONA_NOT_FOUND\", 404)\n    DOCUMENT_NOT_FOUND = (\"DOCUMENT_NOT_FOUND\", 404)\n    SESSION_NOT_FOUND = (\"SESSION_NOT_FOUND\", 404)\n    USER_NOT_FOUND = (\"USER_NOT_FOUND\", 404)\n\n    # ------------------------------------------------------------------\n    # Conflict (409)\n    # ------------------------------------------------------------------\n    CONFLICT = (\"CONFLICT\", 409)\n    DUPLICATE_RESOURCE = (\"DUPLICATE_RESOURCE\", 409)\n\n    # ------------------------------------------------------------------\n    # Rate Limiting / Quotas (429 / 402)\n    # ------------------------------------------------------------------\n    RATE_LIMITED = (\"RATE_LIMITED\", 429)\n    SEAT_LIMIT_EXCEEDED = (\"SEAT_LIMIT_EXCEEDED\", 402)\n\n    # ------------------------------------------------------------------\n    # Payload (413)\n    # ------------------------------------------------------------------\n    PAYLOAD_TOO_LARGE = (\"PAYLOAD_TOO_LARGE\", 413)\n\n    # ------------------------------------------------------------------\n    # Connector / Credential Errors (400-range)\n    # ------------------------------------------------------------------\n    CONNECTOR_VALIDATION_FAILED = (\"CONNECTOR_VALIDATION_FAILED\", 400)\n    CREDENTIAL_INVALID = (\"CREDENTIAL_INVALID\", 400)\n    CREDENTIAL_EXPIRED = (\"CREDENTIAL_EXPIRED\", 401)\n\n    # ------------------------------------------------------------------\n    # Server Errors (5xx)\n    # ------------------------------------------------------------------\n    INTERNAL_ERROR = (\"INTERNAL_ERROR\", 500)\n    NOT_IMPLEMENTED = (\"NOT_IMPLEMENTED\", 501)\n    SERVICE_UNAVAILABLE = (\"SERVICE_UNAVAILABLE\", 503)\n    BAD_GATEWAY = (\"BAD_GATEWAY\", 502)\n    LLM_PROVIDER_ERROR = (\"LLM_PROVIDER_ERROR\", 502)\n    HOOK_EXECUTION_FAILED = (\"HOOK_EXECUTION_FAILED\", 502)\n    GATEWAY_TIMEOUT = (\"GATEWAY_TIMEOUT\", 504)\n\n    def __init__(self, code: str, status_code: int) -> None:\n        self.code = code\n        self.status_code = status_code\n\n    def detail(self, message: str | None = None) -> dict[str, str]:\n        \"\"\"Build a structured error detail dict.\n\n        Returns a dict like:\n            {\"error_code\": \"UNAUTHENTICATED\", \"detail\": \"Token expired\"}\n\n        If no message is supplied, the error code itself is used as the detail.\n        \"\"\"\n        return {\n            \"error_code\": self.code,\n            \"detail\": message or self.code,\n        }\n"
  },
  {
    "path": "backend/onyx/error_handling/exceptions.py",
    "content": "\"\"\"OnyxError — the single exception type for all Onyx business errors.\n\nRaise ``OnyxError`` instead of ``HTTPException`` in business code.  A global\nFastAPI exception handler (registered via ``register_onyx_exception_handlers``)\nconverts it into a JSON response with the standard\n``{\"error_code\": \"...\", \"detail\": \"...\"}`` shape.\n\nUsage::\n\n    from onyx.error_handling.error_codes import OnyxErrorCode\n    from onyx.error_handling.exceptions import OnyxError\n\n    raise OnyxError(OnyxErrorCode.NOT_FOUND, \"Session not found\")\n\nFor upstream errors with a dynamic HTTP status (e.g. billing service),\nuse ``status_code_override``::\n\n    raise OnyxError(\n        OnyxErrorCode.BAD_GATEWAY,\n        detail,\n        status_code_override=upstream_status,\n    )\n\"\"\"\n\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi.responses import JSONResponse\n\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass OnyxError(Exception):\n    \"\"\"Structured error that maps to a specific ``OnyxErrorCode``.\n\n    Attributes:\n        error_code: The ``OnyxErrorCode`` enum member.\n        detail: Human-readable detail (defaults to the error code string).\n        status_code: HTTP status — either overridden or from the error code.\n    \"\"\"\n\n    def __init__(\n        self,\n        error_code: OnyxErrorCode,\n        detail: str | None = None,\n        *,\n        status_code_override: int | None = None,\n    ) -> None:\n        resolved_detail = detail or error_code.code\n        super().__init__(resolved_detail)\n        self.error_code = error_code\n        self.detail = resolved_detail\n        self._status_code_override = status_code_override\n\n    @property\n    def status_code(self) -> int:\n        return self._status_code_override or self.error_code.status_code\n\n\ndef log_onyx_error(exc: OnyxError) -> None:\n    detail = exc.detail\n    status_code = exc.status_code\n    if status_code >= 500:\n        logger.error(f\"OnyxError {exc.error_code.code}: {detail}\")\n    elif status_code >= 400:\n        logger.warning(f\"OnyxError {exc.error_code.code}: {detail}\")\n\n\ndef onyx_error_to_json_response(exc: OnyxError) -> JSONResponse:\n    return JSONResponse(\n        status_code=exc.status_code,\n        content=exc.error_code.detail(exc.detail),\n    )\n\n\ndef register_onyx_exception_handlers(app: FastAPI) -> None:\n    \"\"\"Register a global handler that converts ``OnyxError`` to JSON responses.\n\n    Must be called *after* the app is created but *before* it starts serving.\n    The handler logs at WARNING for 4xx and ERROR for 5xx.\n    \"\"\"\n\n    @app.exception_handler(OnyxError)\n    async def _handle_onyx_error(\n        request: Request,  # noqa: ARG001\n        exc: OnyxError,\n    ) -> JSONResponse:\n        log_onyx_error(exc)\n        return onyx_error_to_json_response(exc)\n"
  },
  {
    "path": "backend/onyx/evals/README.md",
    "content": "# Onyx Evaluations\n\nThis directory contains the evaluation framework for testing and measuring the performance of Onyx's chat and retrieval systems.\n\n## Overview\n\nThe evaluation system uses [Braintrust](https://www.braintrust.dev/) to run automated evaluations against test datasets. It measures the quality of responses generated by Onyx's chat system and can be used to track performance improvements over time.\n\n## Prerequisites\n\n**Important**: The model server must be running in order for evals to work properly. Make sure your model server is up and running before executing any evaluations.\n\n## Running Evaluations\n\nKick off a remote job\n```bash\nonyx/backend$ python -m dotenv -f .vscode/.env run -- python onyx/evals/eval_cli.py --remote --api-key <SUPER_CLOUD_USER_API_KEY> --search-permissions-email <email account to reference> --remote --remote-dataset-name Simple\n```\n\nYou can also run the CLI directly from the command line:\n\n```bash\nonyx$ python -m dotenv -f .vscode/.env run -- python backend/onyx/evals/eval_cli.py --local-dataset-path backend/onyx/evals/data/eval.json --search-permissions-email richard@onyx.app\n```\nSave the env var ONYX_EVAL_API_KEY in your .env file so you don't have to specify it every time for triggering remote runs.\nYou'll need to create an API key in the admin panel to run evals.\n\n\n### Production Environment\n\n### Local Development\n\nFor local development, use the `eval_cli.py` script. We recommend starting it from the VS Code launch configuration for the best debugging experience.\n\n#### Using VS Code Launch Configuration\n\n1. Open VS Code in the project root\n2. Go to the \"Run and Debug\" panel (Ctrl/Cmd + Shift + D)\n3. Select \"Eval CLI\" from the dropdown\n4. Click the play button or press F5\n\nThis will run the evaluation with the following default settings:\n- Uses the local data file at `evals/data/data.json`\n- Enables verbose output\n- Sets up proper environment variables and Python path\n\n#### CLI Options\n\n- `--local-data-path`: Path to local JSON file containing test data (defaults to `evals/data/data.json`)\n- `--remote-dataset-name`: Name of remote Braintrust dataset\n- `--braintrust-project`: Braintrust project name (overrides `BRAINTRUST_PROJECT` env var)\n- `--verbose`: Enable verbose output\n- `--no-send-logs`: Skip sending logs to Braintrust (useful for local testing)\n- `--local-only`: Run evals locally without Braintrust, output results to CLI only\n\n## Test Data\n\nThe evaluation system uses test data stored in `evals/data/data.json`. This file contains a list of test cases, each with:\n- `input`: The question or prompt to test\n\nExample test case:\n```json\n{\n    \"input\": {\n      \"message\": \"What is the capital of France?\"\n    }\n}\n```\n\n### Per-Test Configuration\n\nConfigure tool forcing, assertions, and model settings per-test by adding optional fields to each test case.\n\n#### Tool Configuration\n\n- `force_tools`: List of tool type names to force for this specific test\n- `expected_tools`: List of tool type names expected to be called\n- `require_all_tools`: If true, all expected tools must be called (default: false)\n\n#### Model Configuration\n\n- `model`: Model version to use (e.g., \"gpt-4o\", \"claude-3-5-sonnet\")\n- `model_provider`: Model provider (e.g., \"openai\", \"anthropic\")\n- `temperature`: Temperature for the model (default: 0.0)\n\nExample with tool and model configuration:\n```json\n[\n  {\n    \"input\": {\n      \"message\": \"Find information about Python programming\"\n    },\n    \"expected_tools\": [\"SearchTool\"],\n    \"force_tools\": [\"SearchTool\"],\n    \"model\": \"gpt-4o\"\n  },\n  {\n    \"input\": {\n      \"message\": \"Search the web for recent news about AI\"\n    },\n    \"expected_tools\": [\"WebSearchTool\"],\n    \"model\": \"claude-3-5-sonnet\",\n    \"model_provider\": \"anthropic\"\n  },\n  {\n    \"input\": {\n      \"message\": \"Calculate 2 + 2\"\n    },\n    \"expected_tools\": [\"PythonTool\"],\n    \"temperature\": 0.5\n  }\n]\n```\n\n### Multi-Turn Evaluations\n\nFor testing realistic multi-turn conversations where each turn may require different tools, use the `messages` array format instead of a single `message`:\n\n```json\n{\n  \"input\": {\n    \"messages\": [\n      {\n        \"message\": \"What's the latest news about OpenAI today?\",\n        \"expected_tools\": [\"WebSearchTool\", \"OpenURLTool\"]\n      },\n      {\n        \"message\": \"Now search our internal docs for our OpenAI integration guide\",\n        \"expected_tools\": [\"SearchTool\"]\n      },\n      {\n        \"message\": \"Thanks, that's helpful!\",\n        \"expected_tools\": []\n      }\n    ]\n  }\n}\n```\n\nEach message in the `messages` array can have its own configuration:\n- `message`: The user message text (required)\n- `expected_tools`: List of tool types expected to be called for this turn\n- `require_all_tools`: If true, all expected tools must be called (default: false)\n- `force_tools`: List of tool types to force for this turn\n- `model`: Model version override for this turn\n- `model_provider`: Model provider override for this turn\n- `temperature`: Temperature override for this turn\n\nMulti-turn evals run within a single chat session, so the model has full context of previous turns when responding.\n\n### Available Tool Types\n\nThe following built-in tool types can be used:\n- `SearchTool`: Internal document search\n- `WebSearchTool`: Internet/web search\n- `ImageGenerationTool`: Image generation\n- `PythonTool`: Python code execution\n- `OpenURLTool`: Open and read URLs\n\n### Braintrust Dashboard\n\nAfter running evaluations, you can view results in the Braintrust dashboard. The evaluation will report:\n- `tool_assertion`: Score of 1.0 if tool assertions passed (or no assertions configured), 0.0 if failed\n- Metadata including `tools_called`, `tools_called_count`, and assertion details\n"
  },
  {
    "path": "backend/onyx/evals/eval.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\n\nfrom sqlalchemy import Engine\nfrom sqlalchemy import event\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.orm import sessionmaker\nfrom sqlalchemy.orm.session import SessionTransaction\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.chat.process_message import gather_stream_full\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom onyx.db.users import get_user_by_email\nfrom onyx.evals.models import ChatFullEvalResult\nfrom onyx.evals.models import EvalationAck\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.evals.models import EvalMessage\nfrom onyx.evals.models import EvalProvider\nfrom onyx.evals.models import EvalTimings\nfrom onyx.evals.models import EvalToolResult\nfrom onyx.evals.models import MultiTurnEvalResult\nfrom onyx.evals.models import ToolAssertion\nfrom onyx.evals.provider import get_provider\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\n@contextmanager\ndef isolated_ephemeral_session_factory(\n    engine: Engine,\n) -> Generator[Callable[[], Session], None, None]:\n    \"\"\"\n    Create a session factory that creates sessions that run in a transaction that gets rolled back.\n    This is useful for running evals without any lasting db side effects.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    schema_translate_map = {None: tenant_id}\n    conn = engine.connect().execution_options(schema_translate_map=schema_translate_map)\n    outer_tx = conn.begin()\n    Maker = sessionmaker(bind=conn, expire_on_commit=False, future=True)\n\n    def make_session() -> Session:\n        s = Maker()\n        s.begin_nested()\n\n        @event.listens_for(s, \"after_transaction_end\")\n        def _restart_savepoint(\n            session: Session, transaction: SessionTransaction\n        ) -> None:\n            if transaction.nested and not (\n                transaction._parent is not None and transaction._parent.nested\n            ):\n                session.begin_nested()\n\n        return s\n\n    try:\n        yield make_session\n    finally:\n        outer_tx.rollback()\n        conn.close()\n\n\ndef _chat_full_response_to_eval_result(\n    full: ChatFullResponse,\n    stream_start_time: float,\n) -> ChatFullEvalResult:\n    \"\"\"Map ChatFullResponse from gather_stream_full to eval result components.\"\"\"\n    tools_called = [tc.tool_name for tc in full.tool_calls]\n    tool_call_details: list[dict[str, Any]] = [\n        {\"tool_name\": tc.tool_name, \"tool_arguments\": tc.tool_arguments}\n        for tc in full.tool_calls\n    ]\n    stream_end_time = time.time()\n    total_ms = (stream_end_time - stream_start_time) * 1000\n    timings = EvalTimings(\n        total_ms=total_ms,\n        llm_first_token_ms=None,\n        tool_execution_ms={},\n        stream_processing_ms=total_ms,\n    )\n    return ChatFullEvalResult(\n        answer=full.answer,\n        tools_called=tools_called,\n        tool_call_details=tool_call_details,\n        citations=full.citation_info,\n        timings=timings,\n    )\n\n\ndef evaluate_tool_assertions(\n    tools_called: list[str],\n    assertions: ToolAssertion | None,\n) -> tuple[bool | None, str | None]:\n    \"\"\"\n    Evaluate tool assertions against the tools that were called.\n\n    Args:\n        tools_called: List of tool names that were called during evaluation\n        assertions: Tool assertions to check, or None if no assertions\n\n    Returns:\n        Tuple of (passed, details) where:\n        - passed: True if assertions passed, False if failed, None if no assertions\n        - details: Human-readable explanation of the result\n    \"\"\"\n    if assertions is None:\n        return None, None\n\n    expected_tools = set(assertions.expected_tools)\n    called_tools = set(tools_called)\n\n    if assertions.require_all:\n        # All expected tools must be called\n        missing_tools = expected_tools - called_tools\n        if missing_tools:\n            return False, (\n                f\"Missing expected tools: {sorted(missing_tools)}. Called tools: {sorted(called_tools)}\"\n            )\n        return True, (\n            f\"All expected tools called: {sorted(expected_tools)}. Called tools: {sorted(called_tools)}\"\n        )\n    else:\n        # At least one expected tool must be called\n        matched_tools = expected_tools & called_tools\n        if not matched_tools:\n            return False, (\n                f\"None of expected tools called. Expected one of: {sorted(expected_tools)}. Called tools: {sorted(called_tools)}\"\n            )\n        return True, (\n            f\"Expected tool(s) called: {sorted(matched_tools)}. Called tools: {sorted(called_tools)}\"\n        )\n\n\ndef _get_answer_with_tools(\n    eval_input: dict[str, Any],\n    configuration: EvalConfigurationOptions,\n) -> EvalToolResult:\n    \"\"\"\n    Get answer from the chat system with full tool call tracking.\n\n    Args:\n        eval_input: Dictionary containing:\n            - 'message': The user message to send\n            - 'force_tools' (optional): List of tool types to force for this input\n            - 'expected_tools' (optional): List of tool types expected to be called\n            - 'require_all_tools' (optional): If true, all expected tools must be called\n            - 'model' (optional): Model version to use (e.g., \"gpt-4o\", \"claude-3-5-sonnet\")\n            - 'model_provider' (optional): Model provider (e.g., \"openai\", \"anthropic\")\n            - 'temperature' (optional): Temperature for the model\n        configuration: Evaluation configuration options\n\n    Returns:\n        EvalToolResult containing the answer and tool call information\n    \"\"\"\n    engine = get_sqlalchemy_engine()\n    with isolated_ephemeral_session_factory(engine) as SessionLocal:\n        with SessionLocal() as db_session:\n            full_configuration = configuration.get_configuration(db_session)\n\n            # Handle per-input tool forcing (from data file)\n            forced_tool_ids: list[int] = []\n            input_force_tools = eval_input.get(\"force_tools\", [])\n            if input_force_tools:\n                from onyx.db.tools import get_builtin_tool\n                from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP\n\n                for tool_type in input_force_tools:\n                    if tool_type in BUILT_IN_TOOL_MAP:\n                        tool_id = get_builtin_tool(\n                            db_session, BUILT_IN_TOOL_MAP[tool_type]\n                        ).id\n                        if tool_id not in forced_tool_ids:\n                            forced_tool_ids.append(tool_id)\n\n            # Build tool assertions from per-input config\n            tool_assertions: ToolAssertion | None = None\n            input_expected_tools = eval_input.get(\"expected_tools\", [])\n            if input_expected_tools:\n                tool_assertions = ToolAssertion(\n                    expected_tools=input_expected_tools,\n                    require_all=eval_input.get(\"require_all_tools\", False),\n                )\n\n            # Handle per-input model configuration\n            llm_override = full_configuration.llm\n            input_model = eval_input.get(\"model\")\n            input_model_provider = eval_input.get(\"model_provider\")\n            input_temperature = eval_input.get(\"temperature\")\n\n            if input_model or input_model_provider or input_temperature is not None:\n                # Create a new LLMOverride with per-input values, falling back to config\n                llm_override = LLMOverride(\n                    model_provider=input_model_provider or llm_override.model_provider,\n                    model_version=input_model or llm_override.model_version,\n                    temperature=(\n                        input_temperature\n                        if input_temperature is not None\n                        else llm_override.temperature\n                    ),\n                )\n\n            user = get_user_by_email(configuration.search_permissions_email, db_session)\n            if not user:\n                raise ValueError(\n                    f\"User not found for email: {configuration.search_permissions_email}\"\n                )\n\n            forced_tool_id = forced_tool_ids[0] if forced_tool_ids else None\n            request = SendMessageRequest(\n                message=eval_input[\"message\"],\n                llm_override=llm_override,\n                allowed_tool_ids=full_configuration.allowed_tool_ids,\n                forced_tool_id=forced_tool_id,\n                chat_session_info=ChatSessionCreationRequest(\n                    persona_id=DEFAULT_PERSONA_ID,\n                    description=\"Eval session\",\n                ),\n            )\n\n            stream_start_time = time.time()\n            state_container = ChatStateContainer()\n            packets = handle_stream_message_objects(\n                new_msg_req=request,\n                user=user,\n                db_session=db_session,\n                external_state_container=state_container,\n            )\n            full = gather_stream_full(packets, state_container)\n\n            result = _chat_full_response_to_eval_result(full, stream_start_time)\n\n            # Evaluate tool assertions\n            assertion_passed, assertion_details = evaluate_tool_assertions(\n                result.tools_called, tool_assertions\n            )\n\n            logger.info(\n                f\"Eval completed. Tools called: {result.tools_called}.\\n\"\n                f\"Assertion passed: {assertion_passed}. Details: {assertion_details}\\n\"\n            )\n\n            return EvalToolResult(\n                answer=result.answer,\n                tools_called=result.tools_called,\n                tool_call_details=result.tool_call_details,\n                citations=result.citations,\n                assertion_passed=assertion_passed,\n                assertion_details=assertion_details,\n                timings=result.timings,\n            )\n\n\ndef _get_multi_turn_answer_with_tools(\n    eval_input: dict[str, Any],\n    configuration: EvalConfigurationOptions,\n) -> MultiTurnEvalResult:\n    \"\"\"\n    Get answers from a multi-turn conversation with tool call tracking for each turn.\n\n    Args:\n        eval_input: Dictionary containing:\n            - 'messages': List of message dicts, each with:\n                - 'message': The user message text\n                - 'expected_tools' (optional): List of expected tool types\n                - 'require_all_tools' (optional): If true, all expected tools must be called\n                - 'model' (optional): Model version override for this turn\n                - 'model_provider' (optional): Provider override for this turn\n                - 'temperature' (optional): Temperature override for this turn\n                - 'force_tools' (optional): List of tool types to force\n        configuration: Evaluation configuration options\n\n    Returns:\n        MultiTurnEvalResult containing per-turn results and aggregate metrics\n    \"\"\"\n    messages_data = eval_input.get(\"messages\", [])\n    if not messages_data:\n        raise ValueError(\"Multi-turn eval requires 'messages' array in input\")\n\n    # Parse messages into EvalMessage objects\n    messages: list[EvalMessage] = []\n    for msg_data in messages_data:\n        messages.append(\n            EvalMessage(\n                message=msg_data[\"message\"],\n                expected_tools=msg_data.get(\"expected_tools\", []),\n                require_all_tools=msg_data.get(\"require_all_tools\", False),\n                model=msg_data.get(\"model\"),\n                model_provider=msg_data.get(\"model_provider\"),\n                temperature=msg_data.get(\"temperature\"),\n                force_tools=msg_data.get(\"force_tools\", []),\n            )\n        )\n\n    turn_results: list[EvalToolResult] = []\n\n    engine = get_sqlalchemy_engine()\n    with isolated_ephemeral_session_factory(engine) as SessionLocal:\n        with SessionLocal() as db_session:\n            full_configuration = configuration.get_configuration(db_session)\n\n            user = get_user_by_email(configuration.search_permissions_email, db_session)\n            if not user:\n                raise ValueError(\n                    f\"User not found for email: {configuration.search_permissions_email}\"\n                )\n            # Cache user_id to avoid SQLAlchemy expiration issues\n            user_id = user.id\n\n            # Create a single chat session for all turns\n            chat_session = create_chat_session(\n                db_session=db_session,\n                description=\"Multi-turn eval session\",\n                user_id=user_id,\n                persona_id=DEFAULT_PERSONA_ID,\n                onyxbot_flow=True,\n            )\n            chat_session_id = chat_session.id\n\n            # Process each turn sequentially\n            for turn_idx, msg in enumerate(messages):\n                logger.info(\n                    f\"Processing turn {turn_idx + 1}/{len(messages)}: {msg.message[:50]}...\"\n                )\n\n                # Handle per-turn tool forcing\n                forced_tool_ids: list[int] = []\n                if msg.force_tools:\n                    from onyx.db.tools import get_builtin_tool\n                    from onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP\n\n                    for tool_type in msg.force_tools:\n                        if tool_type in BUILT_IN_TOOL_MAP:\n                            tool_id = get_builtin_tool(\n                                db_session, BUILT_IN_TOOL_MAP[tool_type]\n                            ).id\n                            if tool_id not in forced_tool_ids:\n                                forced_tool_ids.append(tool_id)\n\n                # Build tool assertions for this turn\n                tool_assertions: ToolAssertion | None = None\n                if msg.expected_tools:\n                    tool_assertions = ToolAssertion(\n                        expected_tools=msg.expected_tools,\n                        require_all=msg.require_all_tools,\n                    )\n\n                # Handle per-turn model configuration\n                llm_override = full_configuration.llm\n                if msg.model or msg.model_provider or msg.temperature is not None:\n                    llm_override = LLMOverride(\n                        model_provider=msg.model_provider\n                        or llm_override.model_provider,\n                        model_version=msg.model or llm_override.model_version,\n                        temperature=(\n                            msg.temperature\n                            if msg.temperature is not None\n                            else llm_override.temperature\n                        ),\n                    )\n\n                # Create request for this turn using SendMessageRequest (same API as handle_stream_message_objects)\n                # Use AUTO_PLACE_AFTER_LATEST_MESSAGE to chain messages\n                forced_tool_id = forced_tool_ids[0] if forced_tool_ids else None\n                request = SendMessageRequest(\n                    chat_session_id=chat_session_id,\n                    parent_message_id=AUTO_PLACE_AFTER_LATEST_MESSAGE,\n                    message=msg.message,\n                    llm_override=llm_override,\n                    allowed_tool_ids=full_configuration.allowed_tool_ids,\n                    forced_tool_id=forced_tool_id,\n                )\n\n                # Stream and gather results for this turn via handle_stream_message_objects + gather_stream_full\n                stream_start_time = time.time()\n                state_container = ChatStateContainer()\n                packets = handle_stream_message_objects(\n                    new_msg_req=request,\n                    user=user,\n                    db_session=db_session,\n                    external_state_container=state_container,\n                )\n                full = gather_stream_full(packets, state_container)\n\n                result = _chat_full_response_to_eval_result(full, stream_start_time)\n\n                # Evaluate tool assertions for this turn\n                assertion_passed, assertion_details = evaluate_tool_assertions(\n                    result.tools_called, tool_assertions\n                )\n\n                logger.info(\n                    f\"Turn {turn_idx + 1} completed. Tools called: {result.tools_called}.\\n\"\n                    f\"Assertion passed: {assertion_passed}. Details: {assertion_details}\\n\"\n                )\n\n                turn_results.append(\n                    EvalToolResult(\n                        answer=result.answer,\n                        tools_called=result.tools_called,\n                        tool_call_details=result.tool_call_details,\n                        citations=result.citations,\n                        assertion_passed=assertion_passed,\n                        assertion_details=assertion_details,\n                        timings=result.timings,\n                    )\n                )\n\n    # Calculate aggregate metrics\n    pass_count = sum(1 for r in turn_results if r.assertion_passed is True)\n    fail_count = sum(1 for r in turn_results if r.assertion_passed is False)\n    # Consider \"all passed\" only if there are no failures\n    # (turns with no assertions don't count as failures)\n    all_passed = fail_count == 0\n\n    return MultiTurnEvalResult(\n        turn_results=turn_results,\n        all_passed=all_passed,\n        pass_count=pass_count,\n        fail_count=fail_count,\n        total_turns=len(turn_results),\n    )\n\n\ndef run_eval(\n    configuration: EvalConfigurationOptions,\n    data: list[dict[str, Any]] | None = None,\n    remote_dataset_name: str | None = None,\n    provider: EvalProvider = get_provider(),\n) -> EvalationAck:\n    if data is not None and remote_dataset_name is not None:\n        raise ValueError(\"Cannot specify both data and remote_dataset_name\")\n\n    if data is None and remote_dataset_name is None:\n        raise ValueError(\"Must specify either data or remote_dataset_name\")\n\n    return provider.eval(\n        task=lambda eval_input: _get_answer_with_tools(eval_input, configuration),\n        configuration=configuration,\n        data=data,\n        remote_dataset_name=remote_dataset_name,\n        multi_turn_task=lambda eval_input: _get_multi_turn_answer_with_tools(\n            eval_input, configuration\n        ),\n    )\n"
  },
  {
    "path": "backend/onyx/evals/eval_cli.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nCLI for running evaluations with local configurations.\n\"\"\"\n\nimport argparse\nimport json\nimport logging\nimport os\nfrom typing import Any\n\nimport braintrust\nimport requests\n\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE\nfrom onyx.configs.constants import POSTGRES_WEB_APP_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.evals.eval import run_eval\nfrom onyx.evals.models import EvalationAck\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.evals.provider import get_provider\nfrom onyx.tracing.setup import setup_tracing\n\n\ndef setup_session_factory() -> None:\n    SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)\n    SqlEngine.init_engine(\n        pool_size=POSTGRES_API_SERVER_POOL_SIZE,\n        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,\n    )\n\n\ndef load_data_local(\n    local_data_path: str,\n) -> list[dict[str, Any]]:\n    if not os.path.isfile(local_data_path):\n        raise ValueError(f\"Local data file does not exist: {local_data_path}\")\n    with open(local_data_path, \"r\") as f:\n        return json.load(f)\n\n\ndef configure_logging_for_evals(verbose: bool) -> None:\n    \"\"\"Set logging level to WARNING to reduce noise during evals.\"\"\"\n    if verbose:\n        return\n\n    # Set environment variable for any future logger creation\n    os.environ[\"LOG_LEVEL\"] = \"WARNING\"\n\n    # Force WARNING level for root logger and its handlers\n    root = logging.getLogger()\n    root.setLevel(logging.WARNING)\n    for handler in root.handlers:\n        handler.setLevel(logging.WARNING)\n\n    # Force WARNING level for all existing loggers and their handlers\n    for name in list(logging.Logger.manager.loggerDict.keys()):\n        logger = logging.getLogger(name)\n        logger.setLevel(logging.WARNING)\n        for handler in logger.handlers:\n            handler.setLevel(logging.WARNING)\n\n    # Set a basic config to ensure new loggers also use WARNING\n    logging.basicConfig(level=logging.WARNING, force=True)\n\n\ndef run_local(\n    local_data_path: str | None,\n    remote_dataset_name: str | None,\n    search_permissions_email: str | None = None,\n    no_send_logs: bool = False,\n    local_only: bool = False,\n    verbose: bool = False,\n) -> EvalationAck:\n    \"\"\"\n    Run evaluation with local configurations.\n\n    Tool forcing and assertions are configured per-test in the data file using:\n    - force_tools: List of tool type names to force\n    - expected_tools: List of tool type names expected to be called\n    - require_all_tools: If true, all expected tools must be called\n\n    Args:\n        local_data_path: Path to local JSON file\n        remote_dataset_name: Name of remote Braintrust dataset\n        search_permissions_email: Optional email address to impersonate for the evaluation\n        no_send_logs: Whether to skip sending logs to Braintrust\n        local_only: If True, use LocalEvalProvider (CLI output only, no Braintrust)\n\n    Returns:\n        EvalationAck: The evaluation result\n    \"\"\"\n    setup_session_factory()\n    configure_logging_for_evals(\n        verbose=verbose,\n    )\n    # Only setup tracing if not running in local-only mode\n    if not local_only:\n        setup_tracing()\n\n    if search_permissions_email is None:\n        raise ValueError(\"search_permissions_email is required for local evaluation\")\n\n    configuration = EvalConfigurationOptions(\n        search_permissions_email=search_permissions_email,\n        dataset_name=remote_dataset_name or \"local\",\n        no_send_logs=no_send_logs,\n    )\n\n    # Get the appropriate provider\n    provider = get_provider(local_only=local_only)\n\n    if remote_dataset_name:\n        score = run_eval(\n            configuration=configuration,\n            remote_dataset_name=remote_dataset_name,\n            provider=provider,\n        )\n    else:\n        if local_data_path is None:\n            raise ValueError(\n                \"local_data_path or remote_dataset_name is required for local evaluation\"\n            )\n        data = load_data_local(local_data_path)\n        score = run_eval(configuration=configuration, data=data, provider=provider)\n\n    return score\n\n\ndef run_remote(\n    base_url: str,\n    api_key: str,\n    remote_dataset_name: str,\n    search_permissions_email: str,\n    payload: dict[str, Any] | None = None,\n) -> dict[str, Any]:\n    \"\"\"\n    Trigger an eval pipeline execution on a remote server.\n\n    Tool forcing and assertions are configured per-test in the dataset.\n\n    Args:\n        base_url: Base URL of the remote server (e.g., \"https://test.onyx.app\")\n        api_key: API key for authentication\n        remote_dataset_name: Name of remote Braintrust dataset\n        search_permissions_email: Email address to use for the evaluation.\n        payload: Optional payload to send with the request\n\n    Returns:\n        Response from the remote server\n\n    Raises:\n        requests.RequestException: If the request fails\n    \"\"\"\n    if payload is None:\n        payload = {}\n\n    payload[\"search_permissions_email\"] = search_permissions_email\n    payload[\"dataset_name\"] = remote_dataset_name\n\n    url = f\"{base_url}/api/evals/eval_run\"\n    headers = {\n        \"Authorization\": f\"Bearer {api_key}\",\n        \"Content-Type\": \"application/json\",\n    }\n    response = requests.post(url, headers=headers, json=payload)\n\n    response.raise_for_status()\n    return response.json()\n\n\ndef main() -> None:\n    \"\"\"Main CLI entry point.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Run evaluations with local configurations\"\n    )\n\n    parser.add_argument(\n        \"--local-data-path\",\n        type=str,\n        help=\"Path to local JSON file containing test data\",\n    )\n\n    parser.add_argument(\n        \"--remote-dataset-name\",\n        type=str,\n        help=\"Name of remote Braintrust dataset\",\n    )\n\n    parser.add_argument(\n        \"--braintrust-project\",\n        type=str,\n        help=\"Braintrust project name\",\n        default=\"Onyx\",\n    )\n\n    parser.add_argument(\"--verbose\", action=\"store_true\", help=\"Enable verbose output\")\n\n    # Remote eval arguments\n    parser.add_argument(\n        \"--base-url\",\n        type=str,\n        default=\"https://test.onyx.app\",\n        help=\"Base URL of the remote server (default: https://test.onyx.app)\",\n    )\n\n    parser.add_argument(\n        \"--api-key\",\n        type=str,\n        help=\"API key for authentication with the remote server\",\n    )\n\n    parser.add_argument(\n        \"--remote\",\n        action=\"store_true\",\n        help=\"Run evaluation on remote server instead of locally\",\n    )\n\n    parser.add_argument(\n        \"--search-permissions-email\",\n        type=str,\n        help=\"Email address to impersonate for the evaluation\",\n    )\n\n    parser.add_argument(\n        \"--no-send-logs\",\n        action=\"store_true\",\n        help=\"Do not send logs to the remote server\",\n        default=False,\n    )\n\n    parser.add_argument(\n        \"--local-only\",\n        action=\"store_true\",\n        help=\"Run evals locally without Braintrust, output results to CLI only\",\n        default=False,\n    )\n\n    args = parser.parse_args()\n\n    if args.local_data_path:\n        print(f\"Loading data from local file: {args.local_data_path}\")\n    elif args.remote_dataset_name:\n        if args.local_only:\n            raise ValueError(\n                \"--local-only cannot be used with --remote-dataset-name. Use --local-data-path with a local JSON file instead.\"\n            )\n        print(f\"Loading data from remote dataset: {args.remote_dataset_name}\")\n        dataset = braintrust.init_dataset(\n            project=args.braintrust_project, name=args.remote_dataset_name\n        )\n        dataset_size = len(list(dataset.fetch()))\n        print(f\"Dataset size: {dataset_size}\")\n    if args.remote:\n        if not args.api_key:\n            print(\"Using API Key from ONYX_EVAL_API_KEY\")\n        api_key: str = (\n            args.api_key if args.api_key else os.environ.get(\"ONYX_EVAL_API_KEY\", \"\")\n        )\n        print(f\"Running evaluation on remote server: {args.base_url}\")\n\n        if args.search_permissions_email:\n            print(f\"Using search permissions email: {args.search_permissions_email}\")\n\n        try:\n            result = run_remote(\n                args.base_url,\n                api_key,\n                args.remote_dataset_name,\n                search_permissions_email=args.search_permissions_email,\n            )\n            print(f\"Remote evaluation triggered successfully: {result}\")\n        except requests.RequestException as e:\n            print(f\"Error triggering remote evaluation: {e}\")\n            return\n    else:\n        if args.local_only:\n            print(\"Running in local-only mode (no Braintrust)\")\n        else:\n            print(f\"Using Braintrust project: {args.braintrust_project}\")\n\n        if args.search_permissions_email:\n            print(f\"Using search permissions email: {args.search_permissions_email}\")\n\n        run_local(\n            local_data_path=args.local_data_path,\n            remote_dataset_name=args.remote_dataset_name,\n            search_permissions_email=args.search_permissions_email,\n            no_send_logs=args.no_send_logs,\n            local_only=args.local_only,\n            verbose=args.verbose,\n        )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/evals/models.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.tools.built_in_tools import BUILT_IN_TOOL_MAP\n\n\nclass ToolAssertion(BaseModel):\n    \"\"\"Assertion about expected tool usage during evaluation.\"\"\"\n\n    expected_tools: list[str]  # Tool type names that should be called\n    require_all: bool = False  # If True, ALL expected tools must be called\n\n\nclass EvalTimings(BaseModel):\n    \"\"\"Timing information for eval execution.\"\"\"\n\n    total_ms: float  # Total time for the eval\n    llm_first_token_ms: float | None = None  # Time to first token from LLM\n    tool_execution_ms: dict[str, float] = Field(\n        default_factory=dict\n    )  # Per-tool timings\n    stream_processing_ms: float | None = None  # Time to process the stream\n\n\nclass ChatFullEvalResult(BaseModel):\n    \"\"\"Raw eval components from ChatFullResponse (before tool assertions).\"\"\"\n\n    answer: str\n    tools_called: list[str]\n    tool_call_details: list[dict[str, Any]]\n    citations: list[CitationInfo]\n    timings: EvalTimings\n\n\nclass EvalToolResult(BaseModel):\n    \"\"\"Result of a single eval with tool call information.\"\"\"\n\n    answer: str\n    tools_called: list[str]  # Names of tools that were called\n    tool_call_details: list[dict[str, Any]]  # Full tool call info\n    citations: list[CitationInfo]  # Citations used in the answer\n    assertion_passed: bool | None = None  # None if no assertion configured\n    assertion_details: str | None = None  # Explanation of pass/fail\n    timings: EvalTimings | None = None  # Timing information for the eval\n\n\nclass EvalMessage(BaseModel):\n    \"\"\"Single message in a multi-turn evaluation conversation.\"\"\"\n\n    message: str  # The message text to send\n    expected_tools: list[str] = Field(\n        default_factory=list\n    )  # Expected tools for this turn\n    require_all_tools: bool = False  # If True, ALL expected tools must be called\n    # Per-message model configuration overrides\n    model: str | None = None\n    model_provider: str | None = None\n    temperature: float | None = None\n    force_tools: list[str] = Field(default_factory=list)  # Tools to force for this turn\n\n\nclass MultiTurnEvalResult(BaseModel):\n    \"\"\"Result of a multi-turn evaluation containing per-message results.\"\"\"\n\n    turn_results: list[EvalToolResult]  # Results for each turn/message\n    all_passed: bool  # True if all turn assertions passed\n    pass_count: int  # Number of turns that passed\n    fail_count: int  # Number of turns that failed\n    total_turns: int  # Total number of turns\n\n\nclass EvalConfiguration(BaseModel):\n    llm: LLMOverride = Field(default_factory=LLMOverride)\n    search_permissions_email: str\n    allowed_tool_ids: list[int]\n\n\nclass EvalConfigurationOptions(BaseModel):\n    builtin_tool_types: list[str] = list(BUILT_IN_TOOL_MAP.keys())\n    llm: LLMOverride = LLMOverride(\n        model_provider=None,\n        model_version=\"gpt-4o\",\n        temperature=0.0,\n    )\n    search_permissions_email: str\n    dataset_name: str\n    no_send_logs: bool = False\n    # Optional override for Braintrust project (defaults to BRAINTRUST_PROJECT env var)\n    braintrust_project: str | None = None\n    # Optional experiment name for the eval run (shows in Braintrust UI)\n    experiment_name: str | None = None\n\n    def get_configuration(self, db_session: Session) -> EvalConfiguration:\n        return EvalConfiguration(\n            llm=self.llm,\n            search_permissions_email=self.search_permissions_email,\n            allowed_tool_ids=[\n                get_builtin_tool(db_session, BUILT_IN_TOOL_MAP[tool]).id\n                for tool in self.builtin_tool_types\n            ],\n        )\n\n\nclass EvalationAck(BaseModel):\n    success: bool\n\n\nclass EvalProvider(ABC):\n    @abstractmethod\n    def eval(\n        self,\n        task: Callable[[dict[str, Any]], EvalToolResult],\n        configuration: EvalConfigurationOptions,\n        data: list[dict[str, Any]] | None = None,\n        remote_dataset_name: str | None = None,\n        multi_turn_task: \"Callable[[dict[str, Any]], MultiTurnEvalResult] | None\" = None,\n    ) -> EvalationAck:\n        pass\n"
  },
  {
    "path": "backend/onyx/evals/one_off/create_braintrust_dataset.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to create a Braintrust dataset from the DR Master Question & Metric Sheet CSV.\n\nThis script:\n1. Parses the CSV file\n2. Filters records where \"Should we use it\" is TRUE and \"web-only\" is in categories\n3. Creates a Braintrust dataset with Question as input and research_type metadata\n\nUsage:\n    python create_braintrust_dataset.py --dataset-name \"MyDataset\"\n    python create_braintrust_dataset.py --dataset-name \"MyDataset\" --csv-path \"/path/to/csv\"\n\"\"\"\n\nimport argparse\nimport csv\nimport os\nimport sys\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\n\nfrom onyx.configs.app_configs import BRAINTRUST_API_KEY\n\ntry:\n    from braintrust import init_dataset\nexcept ImportError:\n    print(\n        \"Error: braintrust package not found. Please install it with: pip install braintrust\"\n    )\n    sys.exit(1)\n\n\ndef column_letter_to_index(column_letter: str) -> int:\n    \"\"\"Convert Google Sheets column letter (A, B, C, etc.) to 0-based index.\"\"\"\n    result = 0\n    for char in column_letter.upper():\n        result = result * 26 + (ord(char) - ord(\"A\") + 1)\n    return result - 1\n\n\ndef parse_csv_file(csv_path: str) -> List[Dict[str, Any]]:\n    \"\"\"Parse the CSV file and extract relevant records.\"\"\"\n    records = []\n\n    with open(csv_path, \"r\", encoding=\"utf-8\") as file:\n        # Skip the first few header rows and read the actual data\n        lines = file.readlines()\n\n        # Find the actual data start (skip header rows)\n        data_start = 0\n        for i, line in enumerate(lines):\n            if \"Should we use it?\" in line:\n                data_start = i + 1\n                break\n\n        # Parse the CSV data starting from the data_start line\n        csv_reader = csv.reader(lines[data_start:])\n\n        # Define Google Sheets column references for easy modification\n        SHOULD_USE_COL = \"C\"  # \"Should we use it?\"\n        QUESTION_COL = \"H\"  # \"Question\"\n        EXPECTED_DEPTH_COL = \"J\"  # \"Expected Depth\"\n        CATEGORIES_COL = \"M\"  # \"Categories\"\n        OPENAI_DEEP_COL = \"AA\"  # \"OpenAI Deep Answer\"\n        OPENAI_THINKING_COL = \"O\"  # \"OpenAI Thinking Answer\"\n\n        for row_num, row in enumerate(csv_reader, start=data_start + 1):\n            if len(row) < 15:  # Ensure we have enough columns\n                continue\n\n            # Extract relevant fields using Google Sheets column references\n            should_use = (\n                row[column_letter_to_index(SHOULD_USE_COL)].strip().upper()\n                if len(row) > column_letter_to_index(SHOULD_USE_COL)\n                else \"\"\n            )\n            question = (\n                row[column_letter_to_index(QUESTION_COL)].strip()\n                if len(row) > column_letter_to_index(QUESTION_COL)\n                else \"\"\n            )\n            expected_depth = (\n                row[column_letter_to_index(EXPECTED_DEPTH_COL)].strip()\n                if len(row) > column_letter_to_index(EXPECTED_DEPTH_COL)\n                else \"\"\n            )\n            categories = (\n                row[column_letter_to_index(CATEGORIES_COL)].strip()\n                if len(row) > column_letter_to_index(CATEGORIES_COL)\n                else \"\"\n            )\n            openai_deep_answer = (\n                row[column_letter_to_index(OPENAI_DEEP_COL)].strip()\n                if len(row) > column_letter_to_index(OPENAI_DEEP_COL)\n                else \"\"\n            )\n            openai_thinking_answer = (\n                row[column_letter_to_index(OPENAI_THINKING_COL)].strip()\n                if len(row) > column_letter_to_index(OPENAI_THINKING_COL)\n                else \"\"\n            )\n\n            # Filter records: should_use = TRUE and categories contains \"web-only\"\n            if (\n                should_use == \"TRUE\" and \"web-only\" in categories and question\n            ):  # Ensure question is not empty\n                if expected_depth == \"Deep\":\n                    records.extend(\n                        [\n                            {\n                                \"question\": question\n                                + \". All info is contained in the quesiton. DO NOT ask any clarifying questions.\",\n                                \"research_type\": \"DEEP\",\n                                \"categories\": categories,\n                                \"expected_depth\": expected_depth,\n                                \"expected_answer\": openai_deep_answer,\n                                \"row_number\": row_num,\n                            }\n                        ]\n                    )\n                else:\n                    records.extend(\n                        [\n                            {\n                                \"question\": question,\n                                \"research_type\": \"THOUGHTFUL\",\n                                \"categories\": categories,\n                                \"expected_depth\": expected_depth,\n                                \"expected_answer\": openai_thinking_answer,\n                                \"row_number\": row_num,\n                            }\n                        ]\n                    )\n\n    return records\n\n\ndef create_braintrust_dataset(records: List[Dict[str, Any]], dataset_name: str) -> None:\n    \"\"\"Create a Braintrust dataset with the filtered records.\"\"\"\n\n    # Check if BRAINTRUST_API_KEY is set\n    if BRAINTRUST_API_KEY == \"\":\n        print(\"WARNING: BRAINTRUST_API_KEY environment variable is not set.\")\n        print(\n            \"The script will show what would be inserted but won't actually create the dataset.\"\n        )\n        print(\n            \"To actually create the dataset, set your BRAINTRUST_API_KEY environment variable.\"\n        )\n        print()\n\n        # Show what would be inserted\n        print(\n            f\"Would create Braintrust dataset '{dataset_name}' with {len(records)} records:\"\n        )\n        for i, record in enumerate(records, 1):\n            print(f\"Record {i}/{len(records)}:\")\n            print(f\"  Question: {record['question'][:100]}...\")\n            print(f\"  Research Type: {record['research_type']}\")\n            print(f\"  Expected Answer: {record['expected_answer'][:100]}...\")\n            print()\n        return\n\n    # Initialize the dataset\n    dataset = init_dataset(\"Onyx\", dataset_name, api_key=BRAINTRUST_API_KEY)\n\n    print(f\"Creating Braintrust dataset with {len(records)} records...\")\n\n    # Insert records into the dataset\n    for i, record in enumerate(records, 1):\n        record_id = dataset.insert(\n            {\"message\": record[\"question\"], \"research_type\": record[\"research_type\"]},\n            expected=record[\"expected_answer\"],\n        )\n        print(f\"Inserted record {i}/{len(records)}: ID {record_id}\")\n        print(f\"  Question: {record['question'][:100]}...\")\n        print(f\"  Research Type: {record['research_type']}\")\n        print(f\"  Expected Answer: {record['expected_answer'][:100]}...\")\n        print()\n\n    # Flush to ensure all records are sent\n    dataset.flush()\n    print(f\"Successfully created dataset with {len(records)} records!\")\n\n\ndef main() -> None:\n    \"\"\"Main function to run the script.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Create a Braintrust dataset from the DR Master Question & Metric Sheet CSV\"\n    )\n    parser.add_argument(\n        \"--dataset-name\", required=True, help=\"Name of the Braintrust dataset to create\"\n    )\n    parser.add_argument(\n        \"--csv-path\",\n        default=\"/Users/richardguan/onyx/backend/onyx/evals/data/DR Master Question & Metric Sheet - Sheet1.csv\",\n        help=\"Path to the CSV file (default: %(default)s)\",\n    )\n\n    args = parser.parse_args()\n\n    csv_path = args.csv_path\n    dataset_name = args.dataset_name\n\n    if not os.path.exists(csv_path):\n        print(f\"Error: CSV file not found at {csv_path}\")\n        sys.exit(1)\n\n    print(\"Parsing CSV file...\")\n    records = parse_csv_file(csv_path)\n\n    print(f\"Found {len(records)} records matching criteria:\")\n    print(\"- Should we use it = TRUE\")\n    print(\"- Categories contains 'web-only'\")\n    print(\"- Question is not empty\")\n    print()\n\n    if not records:\n        print(\"No records found matching the criteria!\")\n        sys.exit(1)\n\n    # Show summary of research types\n    deep_count = sum(1 for r in records if r[\"research_type\"] == \"DEEP\")\n    thoughtful_count = sum(1 for r in records if r[\"research_type\"] == \"THOUGHTFUL\")\n\n    print(\"Research type breakdown:\")\n    print(f\"  DEEP: {deep_count}\")\n    print(f\"  THOUGHTFUL: {thoughtful_count}\")\n    print()\n\n    # Create the Braintrust dataset\n    create_braintrust_dataset(records, dataset_name)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/evals/provider.py",
    "content": "from onyx.evals.models import EvalProvider\nfrom onyx.evals.providers.braintrust import BraintrustEvalProvider\nfrom onyx.evals.providers.local import LocalEvalProvider\n\n\ndef get_provider(local_only: bool = False) -> EvalProvider:\n    \"\"\"\n    Get the appropriate eval provider.\n\n    Args:\n        local_only: If True, use LocalEvalProvider (CLI output only, no Braintrust).\n                   If False, use BraintrustEvalProvider.\n\n    Returns:\n        The appropriate EvalProvider instance.\n    \"\"\"\n    if local_only:\n        return LocalEvalProvider()\n    return BraintrustEvalProvider()\n"
  },
  {
    "path": "backend/onyx/evals/providers/braintrust.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\nfrom typing import Union\n\nfrom braintrust import Eval\nfrom braintrust import EvalCase\nfrom braintrust import init_dataset\nfrom braintrust import Score\n\nfrom onyx.configs.app_configs import BRAINTRUST_MAX_CONCURRENCY\nfrom onyx.configs.app_configs import BRAINTRUST_PROJECT\nfrom onyx.evals.models import EvalationAck\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.evals.models import EvalProvider\nfrom onyx.evals.models import EvalToolResult\nfrom onyx.evals.models import MultiTurnEvalResult\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Union type for both single and multi-turn results\nEvalResult = Union[EvalToolResult, MultiTurnEvalResult]\n\n\ndef tool_assertion_scorer(\n    input: dict[str, Any], output: EvalResult, expected: EvalResult | None\n) -> Score:\n    \"\"\"\n    Scorer that checks if tool assertions passed.\n\n    Handles both single-turn (EvalToolResult) and multi-turn (MultiTurnEvalResult) outputs.\n\n    Args:\n        input: The input data for the evaluation case.\n        output: The actual output from the task.\n        expected: The expected output (unused for this scorer).\n\n    Returns:\n        Score with value 1.0 if passed or no assertions, 0.0 if failed.\n    \"\"\"\n    # input and expected are unused but required by Braintrust scorer signature\n    _ = input, expected\n\n    # Handle multi-turn results\n    if isinstance(output, MultiTurnEvalResult):\n        # Calculate score based on pass rate\n        if output.total_turns == 0:\n            score = 1.0\n        else:\n            # Score is the ratio of passed assertions\n            assertions_evaluated = output.pass_count + output.fail_count\n            if assertions_evaluated == 0:\n                score = 1.0  # No assertions configured\n            else:\n                score = output.pass_count / assertions_evaluated\n\n        return Score(\n            name=\"tool_assertion\",\n            score=score,\n            metadata={\n                \"is_multi_turn\": True,\n                \"total_turns\": output.total_turns,\n                \"pass_count\": output.pass_count,\n                \"fail_count\": output.fail_count,\n                \"all_passed\": output.all_passed,\n                \"turn_details\": [\n                    {\n                        \"tools_called\": r.tools_called,\n                        \"assertion_passed\": r.assertion_passed,\n                        \"assertion_details\": r.assertion_details,\n                    }\n                    for r in output.turn_results\n                ],\n            },\n        )\n\n    # Handle single-turn results (EvalToolResult)\n    if output.assertion_passed is None:\n        # No assertions configured - return passing score\n        return Score(\n            name=\"tool_assertion\",\n            score=1.0,\n            metadata={\n                \"is_multi_turn\": False,\n                \"tools_called\": output.tools_called,\n                \"tools_called_count\": len(output.tools_called),\n                \"assertion_configured\": False,\n            },\n        )\n\n    return Score(\n        name=\"tool_assertion\",\n        score=1.0 if output.assertion_passed else 0.0,\n        metadata={\n            \"is_multi_turn\": False,\n            \"tools_called\": output.tools_called,\n            \"tools_called_count\": len(output.tools_called),\n            \"assertion_passed\": output.assertion_passed,\n            \"assertion_details\": output.assertion_details,\n            \"tool_call_details\": output.tool_call_details,\n        },\n    )\n\n\nclass BraintrustEvalProvider(EvalProvider):\n    def eval(\n        self,\n        task: Callable[[dict[str, Any]], EvalToolResult],\n        configuration: EvalConfigurationOptions,\n        data: list[dict[str, Any]] | None = None,\n        remote_dataset_name: str | None = None,\n        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None = None,\n    ) -> EvalationAck:\n        if data is not None and remote_dataset_name is not None:\n            raise ValueError(\"Cannot specify both data and remote_dataset_name\")\n        if data is None and remote_dataset_name is None:\n            raise ValueError(\"Must specify either data or remote_dataset_name\")\n\n        # Create a wrapper task that dispatches to the appropriate handler\n        def dispatch_task(eval_input: dict[str, Any]) -> EvalResult:\n            if \"messages\" in eval_input and multi_turn_task is not None:\n                return multi_turn_task(eval_input)\n            return task(eval_input)\n\n        project_name = configuration.braintrust_project or BRAINTRUST_PROJECT\n        experiment_name = configuration.experiment_name\n\n        eval_data: Any = None\n        if remote_dataset_name is not None:\n            eval_data = init_dataset(project=project_name, name=remote_dataset_name)\n        else:\n            if data:\n                eval_data = [\n                    EvalCase(\n                        input={\n                            **item.get(\"input\", {}),\n                            # Pass through per-test tool configuration (for single-turn)\n                            \"force_tools\": item.get(\"force_tools\", []),\n                            \"expected_tools\": item.get(\"expected_tools\", []),\n                            \"require_all_tools\": item.get(\"require_all_tools\", False),\n                            # Pass through per-test model configuration\n                            \"model\": item.get(\"model\"),\n                            \"model_provider\": item.get(\"model_provider\"),\n                            \"temperature\": item.get(\"temperature\"),\n                        },\n                        expected=item.get(\"expected\"),\n                    )\n                    for item in data\n                ]\n\n        metadata = configuration.model_dump()\n\n        Eval(  # type: ignore[misc]\n            name=project_name,\n            experiment_name=experiment_name,\n            data=eval_data,\n            task=dispatch_task,\n            scores=[tool_assertion_scorer],\n            metadata=metadata,\n            max_concurrency=BRAINTRUST_MAX_CONCURRENCY,\n            no_send_logs=configuration.no_send_logs,\n        )\n        return EvalationAck(success=True)\n"
  },
  {
    "path": "backend/onyx/evals/providers/local.py",
    "content": "\"\"\"\nLocal eval provider that runs evaluations and outputs results to the CLI.\nNo external dependencies like Braintrust required.\n\"\"\"\n\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom onyx.evals.models import EvalationAck\nfrom onyx.evals.models import EvalConfigurationOptions\nfrom onyx.evals.models import EvalProvider\nfrom onyx.evals.models import EvalToolResult\nfrom onyx.evals.models import MultiTurnEvalResult\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# ANSI color codes\nGREEN = \"\\033[92m\"\nRED = \"\\033[91m\"\nYELLOW = \"\\033[93m\"\nBLUE = \"\\033[94m\"\nBOLD = \"\\033[1m\"\nRESET = \"\\033[0m\"\nDIM = \"\\033[2m\"\n\n\ndef _display_single_turn_result(\n    result: EvalToolResult,\n    passed_count: list[int],\n    failed_count: list[int],\n    no_assertion_count: list[int],\n) -> None:\n    \"\"\"Display results for a single turn and update counters.\"\"\"\n    # Display timing trace\n    if result.timings:\n        print(f\"  {BOLD}Trace:{RESET}\")\n        print(f\"    Total: {result.timings.total_ms:.0f}ms\")\n        if result.timings.llm_first_token_ms is not None:\n            print(f\"    First token: {result.timings.llm_first_token_ms:.0f}ms\")\n        if result.timings.tool_execution_ms:\n            for tool_name, duration_ms in result.timings.tool_execution_ms.items():\n                print(f\"    {tool_name}: {duration_ms:.0f}ms\")\n\n    # Display tools called\n    tools_str = \", \".join(result.tools_called) if result.tools_called else \"(none)\"\n    print(f\"  Tools called: {BLUE}{tools_str}{RESET}\")\n\n    # Display assertion result\n    if result.assertion_passed is None:\n        print(f\"  Assertion: {YELLOW}N/A{RESET} - No assertion configured\")\n        no_assertion_count[0] += 1\n    elif result.assertion_passed:\n        print(f\"  Assertion: {GREEN}PASS{RESET} - {result.assertion_details}\")\n        passed_count[0] += 1\n    else:\n        print(f\"  Assertion: {RED}FAIL{RESET} - {result.assertion_details}\")\n        failed_count[0] += 1\n\n    # Display truncated answer\n    answer = result.answer\n    truncated_answer = answer[:200] + \"...\" if len(answer) > 200 else answer\n    truncated_answer = truncated_answer.replace(\"\\n\", \" \")\n    print(f\"  Answer: {truncated_answer}\")\n\n\nclass LocalEvalProvider(EvalProvider):\n    \"\"\"\n    Eval provider that runs evaluations locally and prints results to the CLI.\n    Does not require Braintrust or any external service.\n    \"\"\"\n\n    def eval(\n        self,\n        task: Callable[[dict[str, Any]], EvalToolResult],\n        configuration: EvalConfigurationOptions,  # noqa: ARG002\n        data: list[dict[str, Any]] | None = None,\n        remote_dataset_name: str | None = None,\n        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None = None,\n    ) -> EvalationAck:\n        if remote_dataset_name is not None:\n            raise ValueError(\n                \"LocalEvalProvider does not support remote datasets. Use --local-data-path with a local JSON file.\"\n            )\n\n        if data is None:\n            raise ValueError(\"data is required for LocalEvalProvider\")\n\n        total = len(data)\n        # Use lists to allow mutation in helper function\n        passed = [0]\n        failed = [0]\n        no_assertion = [0]\n\n        print(f\"\\n{BOLD}Running {total} evaluation(s)...{RESET}\\n\")\n        print(\"=\" * 60)\n\n        for i, item in enumerate(data, 1):\n            input_data = item.get(\"input\", {})\n\n            # Check if this is a multi-turn eval (has 'messages' array)\n            if \"messages\" in input_data:\n                self._run_multi_turn_eval(\n                    i, total, item, multi_turn_task, passed, failed, no_assertion\n                )\n            else:\n                self._run_single_turn_eval(\n                    i, total, item, task, passed, failed, no_assertion\n                )\n\n        # Summary\n        print(\"\\n\" + \"=\" * 60)\n        total_with_assertions = passed[0] + failed[0]\n        if total_with_assertions > 0:\n            pass_rate = (passed[0] / total_with_assertions) * 100\n            print(\n                f\"{BOLD}Summary:{RESET} {passed[0]}/{total_with_assertions} passed ({pass_rate:.1f}%)\"\n            )\n        else:\n            print(f\"{BOLD}Summary:{RESET} No assertions configured\")\n\n        print(f\"  {GREEN}Passed:{RESET} {passed[0]}\")\n        print(f\"  {RED}Failed:{RESET} {failed[0]}\")\n        if no_assertion[0] > 0:\n            print(f\"  {YELLOW}No assertion:{RESET} {no_assertion[0]}\")\n        print(\"=\" * 60 + \"\\n\")\n\n        # Return success if no failures\n        return EvalationAck(success=(failed[0] == 0))\n\n    def _run_single_turn_eval(\n        self,\n        i: int,\n        total: int,\n        item: dict[str, Any],\n        task: Callable[[dict[str, Any]], EvalToolResult],\n        passed: list[int],\n        failed: list[int],\n        no_assertion: list[int],\n    ) -> None:\n        \"\"\"Run a single-turn evaluation.\"\"\"\n        # Build input with tool and model config\n        eval_input = {\n            **item.get(\"input\", {}),\n            # Tool configuration\n            \"force_tools\": item.get(\"force_tools\", []),\n            \"expected_tools\": item.get(\"expected_tools\", []),\n            \"require_all_tools\": item.get(\"require_all_tools\", False),\n            # Model configuration\n            \"model\": item.get(\"model\"),\n            \"model_provider\": item.get(\"model_provider\"),\n            \"temperature\": item.get(\"temperature\"),\n        }\n\n        message = eval_input.get(\"message\", \"(no message)\")\n        truncated_message = message[:50] + \"...\" if len(message) > 50 else message\n\n        # Show model if specified\n        model_info = \"\"\n        if item.get(\"model\"):\n            model_info = f\" [{item.get('model')}]\"\n\n        print(f'\\n{BOLD}[{i}/{total}]{RESET} \"{truncated_message}\"{model_info}')\n\n        try:\n            result = task(eval_input)\n            _display_single_turn_result(result, passed, failed, no_assertion)\n        except Exception as e:\n            print(f\"  {RED}ERROR:{RESET} {e}\")\n            failed[0] += 1\n            logger.exception(f\"Error running eval for input: {message}\")\n\n    def _run_multi_turn_eval(\n        self,\n        i: int,\n        total: int,\n        item: dict[str, Any],\n        multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None,\n        passed: list[int],\n        failed: list[int],\n        no_assertion: list[int],\n    ) -> None:\n        \"\"\"Run a multi-turn evaluation.\"\"\"\n        if multi_turn_task is None:\n            print(\n                f\"\\n{BOLD}[{i}/{total}]{RESET} {RED}ERROR:{RESET} Multi-turn task not configured\"\n            )\n            failed[0] += 1\n            return\n\n        input_data = item.get(\"input\", {})\n        messages = input_data.get(\"messages\", [])\n        num_turns = len(messages)\n\n        # Show first message as preview\n        first_msg = (\n            messages[0].get(\"message\", \"(no message)\") if messages else \"(no messages)\"\n        )\n        truncated_first = first_msg[:40] + \"...\" if len(first_msg) > 40 else first_msg\n\n        print(f\"\\n{BOLD}[{i}/{total}] Multi-turn ({num_turns} turns){RESET}\")\n        print(f'  First: \"{truncated_first}\"')\n\n        try:\n            # Pass the full input with messages\n            eval_input = {**input_data}\n            result = multi_turn_task(eval_input)\n\n            # Display each turn's result\n            for turn_idx, turn_result in enumerate(result.turn_results):\n                turn_msg = messages[turn_idx].get(\"message\", \"\")\n                truncated_turn = (\n                    turn_msg[:40] + \"...\" if len(turn_msg) > 40 else turn_msg\n                )\n                print(f'\\n  {DIM}Turn {turn_idx + 1}:{RESET} \"{truncated_turn}\"')\n                _display_single_turn_result(turn_result, passed, failed, no_assertion)\n\n            # Show multi-turn summary\n            status = (\n                f\"{GREEN}ALL PASSED{RESET}\"\n                if result.all_passed\n                else f\"{RED}SOME FAILED{RESET}\"\n            )\n            print(\n                f\"\\n  {BOLD}Multi-turn result:{RESET} {status} ({result.pass_count}/{result.total_turns} turns passed)\"\n            )\n\n        except Exception as e:\n            print(f\"  {RED}ERROR:{RESET} {e}\")\n            failed[0] += 1\n            logger.exception(f\"Error running multi-turn eval: {first_msg}\")\n"
  },
  {
    "path": "backend/onyx/feature_flags/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/feature_flags/factory.py",
    "content": "from onyx.configs.app_configs import DEV_MODE\nfrom onyx.feature_flags.interface import FeatureFlagProvider\nfrom onyx.feature_flags.interface import NoOpFeatureFlagProvider\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom shared_configs.configs import MULTI_TENANT\n\n\ndef get_default_feature_flag_provider() -> FeatureFlagProvider:\n    \"\"\"\n    Get the default feature flag provider implementation.\n\n    Returns the PostHog-based provider in Enterprise Edition when available,\n    otherwise returns a no-op provider that always returns False.\n\n    This function is designed for dependency injection - callers should\n    use this factory rather than directly instantiating providers.\n\n    Returns:\n        FeatureFlagProvider: The configured feature flag provider instance\n    \"\"\"\n    if MULTI_TENANT or DEV_MODE:\n        return fetch_versioned_implementation_with_fallback(\n            module=\"onyx.feature_flags.factory\",\n            attribute=\"get_posthog_feature_flag_provider\",\n            fallback=lambda: NoOpFeatureFlagProvider(),\n        )()\n    return NoOpFeatureFlagProvider()\n"
  },
  {
    "path": "backend/onyx/feature_flags/feature_flags_keys.py",
    "content": "\"\"\"\nFeature flag keys used throughout the application.\nCentralizes feature flag key definitions to avoid magic strings.\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/feature_flags/flags.py",
    "content": ""
  },
  {
    "path": "backend/onyx/feature_flags/interface.py",
    "content": "import abc\nfrom typing import Any\nfrom uuid import UUID\n\nfrom onyx.db.models import User\nfrom shared_configs.configs import ENVIRONMENT\n\n\nclass FeatureFlagProvider(abc.ABC):\n    \"\"\"\n    Abstract base class for feature flag providers.\n\n    Implementations should provide vendor-specific logic for checking\n    whether a feature flag is enabled for a given user.\n    \"\"\"\n\n    @abc.abstractmethod\n    def feature_enabled(\n        self,\n        flag_key: str,\n        user_id: UUID,\n        user_properties: dict[str, Any] | None = None,\n    ) -> bool:\n        \"\"\"\n        Check if a feature flag is enabled for a user.\n\n        Args:\n            flag_key: The identifier for the feature flag to check\n            user_id: The unique identifier for the user\n            user_properties: Optional dictionary of user properties/attributes\n                           that may influence flag evaluation\n\n        Returns:\n            True if the feature is enabled for the user, False otherwise\n        \"\"\"\n        raise NotImplementedError\n\n    def feature_enabled_for_user_tenant(\n        self, flag_key: str, user: User, tenant_id: str\n    ) -> bool:\n        \"\"\"\n        Check if a feature flag is enabled for a user.\n        \"\"\"\n        return self.feature_enabled(\n            flag_key,\n            # For anonymous/unauthenticated users, use a fixed UUID as fallback\n            user.id if user else UUID(\"caa1e0cd-6ee6-4550-b1ec-8affaef4bf83\"),\n            user_properties={\n                \"tenant_id\": tenant_id,\n                \"email\": user.email if user else \"anonymous@onyx.app\",\n            },\n        )\n\n\nclass NoOpFeatureFlagProvider(FeatureFlagProvider):\n    \"\"\"\n    No-operation feature flag provider that always returns False.\n\n    Used as a fallback when no real feature flag provider is available\n    (e.g., in MIT version without PostHog).\n    \"\"\"\n\n    def feature_enabled(\n        self,\n        flag_key: str,  # noqa: ARG002\n        user_id: UUID,  # noqa: ARG002\n        user_properties: dict[str, Any] | None = None,  # noqa: ARG002\n    ) -> bool:\n        environment = ENVIRONMENT\n        if environment == \"local\":\n            return True\n        return False\n"
  },
  {
    "path": "backend/onyx/federated_connectors/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/federated_connectors/factory.py",
    "content": "\"\"\"Factory for creating federated connector instances.\"\"\"\n\nimport importlib\nfrom typing import Any\nfrom typing import Type\n\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.federated_connectors.interfaces import FederatedConnector\nfrom onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass FederatedConnectorMissingException(Exception):\n    pass\n\n\n# Cache for already imported federated connector classes\n_federated_connector_cache: dict[FederatedConnectorSource, Type[FederatedConnector]] = (\n    {}\n)\n\n\ndef _load_federated_connector_class(\n    source: FederatedConnectorSource,\n) -> Type[FederatedConnector]:\n    \"\"\"Dynamically load and cache a federated connector class.\"\"\"\n    if source in _federated_connector_cache:\n        return _federated_connector_cache[source]\n\n    if source not in FEDERATED_CONNECTOR_CLASS_MAP:\n        raise FederatedConnectorMissingException(\n            f\"Federated connector not found for source={source}\"\n        )\n\n    mapping = FEDERATED_CONNECTOR_CLASS_MAP[source]\n\n    try:\n        module = importlib.import_module(mapping.module_path)\n        connector_class = getattr(module, mapping.class_name)\n        _federated_connector_cache[source] = connector_class\n        return connector_class\n    except (ImportError, AttributeError) as e:\n        raise FederatedConnectorMissingException(\n            f\"Failed to import {mapping.class_name} from {mapping.module_path}: {e}\"\n        )\n\n\ndef get_federated_connector(\n    source: FederatedConnectorSource,\n    credentials: dict[str, Any],\n) -> FederatedConnector:\n    \"\"\"Get an instance of the appropriate federated connector.\"\"\"\n    connector_cls = get_federated_connector_cls(source)\n    return connector_cls(credentials)\n\n\ndef get_federated_connector_cls(\n    source: FederatedConnectorSource,\n) -> Type[FederatedConnector]:\n    \"\"\"Get the class of the appropriate federated connector.\"\"\"\n    return _load_federated_connector_class(source)\n"
  },
  {
    "path": "backend/onyx/federated_connectors/federated_retrieval.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Callable\nfrom typing import Any\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.federated import (\n    get_federated_connector_document_set_mappings_by_document_set_names,\n)\nfrom onyx.db.federated import list_federated_connector_oauth_tokens\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.slack_bot import fetch_slack_bots\nfrom onyx.federated_connectors.factory import get_federated_connector\nfrom onyx.federated_connectors.interfaces import FederatedConnector\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass FederatedRetrievalInfo(BaseModel):\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n    retrieval_function: Callable[[ChunkIndexRequest], list[InferenceChunk]]\n    source: FederatedConnectorSource\n\n\ndef get_federated_retrieval_functions(\n    db_session: Session,\n    user_id: UUID | None,\n    source_types: list[DocumentSource] | None,\n    document_set_names: list[str] | None,\n    slack_context: SlackContext | None = None,\n) -> list[FederatedRetrievalInfo]:\n\n    # Check for Slack bot context first (regardless of user_id)\n    if slack_context:\n        logger.debug(\"Slack context detected, checking for Slack bot setup...\")\n\n        # Slack federated search requires a Slack federated connector to be linked\n        # via document sets. If no document sets are provided, skip Slack federated search.\n        if not document_set_names:\n            logger.debug(\n                \"Skipping Slack federated search: no document sets provided, \"\n                \"Slack federated connector must be linked via document sets\"\n            )\n            return []\n\n        # Check if any Slack federated connector is associated with the document sets\n        # and extract its config (entities) for channel filtering\n        slack_federated_connector_config: dict[str, Any] | None = None\n        slack_federated_mappings = (\n            get_federated_connector_document_set_mappings_by_document_set_names(\n                db_session, document_set_names\n            )\n        )\n        for mapping in slack_federated_mappings:\n            if (\n                mapping.federated_connector is not None\n                and mapping.federated_connector.source\n                == FederatedConnectorSource.FEDERATED_SLACK\n            ):\n                slack_federated_connector_config = (\n                    mapping.federated_connector.config or {}\n                )\n                logger.debug(\n                    f\"Found Slack federated connector config: {slack_federated_connector_config}\"\n                )\n                break\n\n        if slack_federated_connector_config is None:\n            logger.debug(\n                f\"Skipping Slack federated search: document sets {document_set_names} \"\n                \"are not associated with any Slack federated connector\"\n            )\n            # Return empty list - no Slack federated search for this context\n            return []\n\n        try:\n            slack_bots = fetch_slack_bots(db_session)\n            logger.debug(f\"Found {len(slack_bots)} Slack bots\")\n\n            # First try to find a bot with user token\n            tenant_slack_bot = next(\n                (bot for bot in slack_bots if bot.enabled and bot.user_token), None\n            )\n            if tenant_slack_bot:\n                logger.debug(f\"Selected bot with user_token: {tenant_slack_bot.name}\")\n            else:\n                # Fall back to any enabled bot without user token\n                tenant_slack_bot = next(\n                    (bot for bot in slack_bots if bot.enabled), None\n                )\n                if tenant_slack_bot:\n                    logger.debug(\n                        f\"Selected bot without user_token: {tenant_slack_bot.name} (limited functionality)\"\n                    )\n                else:\n                    logger.warning(\"No enabled Slack bots found\")\n\n            if tenant_slack_bot:\n                federated_retrieval_infos_slack = []\n\n                # Use user_token if available, otherwise fall back to bot_token\n                # Unwrap SensitiveValue for backend API calls\n                access_token = (\n                    tenant_slack_bot.user_token.get_value(apply_mask=False)\n                    if tenant_slack_bot.user_token\n                    else (\n                        tenant_slack_bot.bot_token.get_value(apply_mask=False)\n                        if tenant_slack_bot.bot_token\n                        else \"\"\n                    )\n                )\n                if not tenant_slack_bot.user_token:\n                    logger.warning(\n                        f\"Using bot_token for Slack search (limited functionality): {tenant_slack_bot.name}\"\n                    )\n\n                # For bot context, we don't need real OAuth credentials\n                credentials = {\n                    \"client_id\": \"bot-context\",  # Placeholder for bot context\n                    \"client_secret\": \"bot-context\",  # Placeholder for bot context\n                }\n\n                # Create Slack federated connector\n                connector = get_federated_connector(\n                    FederatedConnectorSource.FEDERATED_SLACK,\n                    credentials,\n                )\n\n                # Capture variables by value to avoid lambda closure issues\n                # Unwrap SensitiveValue for backend API calls\n                bot_token = (\n                    tenant_slack_bot.bot_token.get_value(apply_mask=False)\n                    if tenant_slack_bot.bot_token\n                    else \"\"\n                )\n\n                # Use connector config for channel filtering (guaranteed to exist at this point)\n                connector_entities = slack_federated_connector_config\n                logger.debug(\n                    f\"Using Slack federated connector entities for bot context: {connector_entities}\"\n                )\n\n                def create_slack_retrieval_function(\n                    conn: FederatedConnector,\n                    token: str,\n                    ctx: SlackContext,\n                    bot_tok: str,\n                    entities: dict[str, Any],\n                ) -> Callable[[ChunkIndexRequest], list[InferenceChunk]]:\n                    def retrieval_fn(query: ChunkIndexRequest) -> list[InferenceChunk]:\n                        return conn.search(\n                            query,\n                            entities,  # Use connector-level entities for channel filtering\n                            access_token=token,\n                            limit=None,  # Let connector use its own max_messages_per_query config\n                            slack_event_context=ctx,\n                            bot_token=bot_tok,\n                        )\n\n                    return retrieval_fn\n\n                federated_retrieval_infos_slack.append(\n                    FederatedRetrievalInfo(\n                        retrieval_function=create_slack_retrieval_function(\n                            connector,\n                            access_token,\n                            slack_context,\n                            bot_token,\n                            connector_entities,\n                        ),\n                        source=FederatedConnectorSource.FEDERATED_SLACK,\n                    )\n                )\n                logger.debug(\n                    f\"Added Slack federated search for bot, returning {len(federated_retrieval_infos_slack)} retrieval functions\"\n                )\n                return federated_retrieval_infos_slack\n\n        except Exception as e:\n            logger.warning(f\"Could not setup Slack bot federated search: {e}\")\n            # Fall through to regular federated connector logic\n\n    if user_id is None:\n        # No user ID provided and no Slack context, return empty\n        logger.warning(\n            \"No user ID provided and no Slack context, returning empty retrieval functions\"\n        )\n        return []\n\n    federated_connector__document_set_pairs = (\n        (\n            get_federated_connector_document_set_mappings_by_document_set_names(\n                db_session, document_set_names\n            )\n        )\n        if document_set_names\n        else []\n    )\n    federated_connector_id_to_document_sets: dict[\n        int, list[FederatedConnector__DocumentSet]\n    ] = defaultdict(list)\n    for pair in federated_connector__document_set_pairs:\n        federated_connector_id_to_document_sets[pair.federated_connector_id].append(\n            pair\n        )\n\n    # At this point, user_id is guaranteed to be not None since we're in the else branch\n    assert user_id is not None\n\n    # If no source types are specified, don't use any federated connectors\n    if source_types is None:\n        logger.debug(\"No source types specified, skipping all federated connectors\")\n        return []\n\n    federated_retrieval_infos: list[FederatedRetrievalInfo] = []\n    federated_oauth_tokens = list_federated_connector_oauth_tokens(db_session, user_id)\n    for oauth_token in federated_oauth_tokens:\n        # Slack is handled separately inside SearchTool\n        if (\n            oauth_token.federated_connector.source\n            == FederatedConnectorSource.FEDERATED_SLACK\n        ):\n            logger.debug(\n                \"Skipping Slack federated connector in user OAuth path - handled by SearchTool\"\n            )\n            continue\n\n        if (\n            oauth_token.federated_connector.source.to_non_federated_source()\n            not in source_types\n        ):\n            continue\n\n        document_set_associations = federated_connector_id_to_document_sets[\n            oauth_token.federated_connector_id\n        ]\n\n        # if document set names are specified by the user, skip federated connectors that are\n        # not associated with any of the document sets\n        if document_set_names and not document_set_associations:\n            continue\n\n        # Only use connector-level config (no junction table entities)\n        entities = oauth_token.federated_connector.config or {}\n\n        connector = get_federated_connector(\n            oauth_token.federated_connector.source,\n            oauth_token.federated_connector.credentials.get_value(apply_mask=False),\n        )\n\n        # Capture variables by value to avoid lambda closure issues\n        access_token = oauth_token.token.get_value(apply_mask=False)\n\n        def create_retrieval_function(\n            conn: FederatedConnector,\n            ent: dict[str, Any],\n            token: str,\n        ) -> Callable[[ChunkIndexRequest], list[InferenceChunk]]:\n            return lambda query: conn.search(\n                query,\n                ent,\n                access_token=token,\n                limit=None,  # Let connector use its own max_messages_per_query config\n            )\n\n        federated_retrieval_infos.append(\n            FederatedRetrievalInfo(\n                retrieval_function=create_retrieval_function(\n                    connector, entities, access_token\n                ),\n                source=oauth_token.federated_connector.source,\n            )\n        )\n    return federated_retrieval_infos\n"
  },
  {
    "path": "backend/onyx/federated_connectors/interfaces.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\nfrom typing import Any\nfrom typing import Dict\n\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.federated_connectors.models import CredentialField\nfrom onyx.federated_connectors.models import EntityField\nfrom onyx.federated_connectors.models import OAuthResult\nfrom onyx.onyxbot.slack.models import SlackContext\n\n\nclass FederatedConnector(ABC):\n    \"\"\"Base interface that all federated connectors must implement.\"\"\"\n\n    @abstractmethod\n    def __init__(self, credentials: dict[str, Any]):\n        \"\"\"\n        Initialize the connector with credentials + validate their structure.\n\n        Args:\n            credentials: Dictionary of credentials to initialize the connector with\n        \"\"\"\n        self.credentials = credentials\n\n    @abstractmethod\n    def validate_entities(self, entities: Dict[str, Any]) -> bool:\n        \"\"\"\n        Validate that the provided entities match the expected structure.\n\n        Args:\n            entities: Dictionary of entities to validate\n\n        Returns:\n            True if entities are valid, False otherwise\n\n        Note: This method is used for backward compatibility with document-set level entities.\n        For connector-level config validation, use validate_config() instead.\n        \"\"\"\n\n    def validate_config(self, config: Dict[str, Any]) -> bool:\n        \"\"\"\n        Validate that the provided config matches the expected structure.\n\n        This is an alias for validate_entities() to provide clearer semantics\n        when validating connector-level configuration.\n\n        Args:\n            config: Dictionary of configuration to validate\n\n        Returns:\n            True if config is valid, False otherwise\n        \"\"\"\n        return self.validate_entities(config)\n\n    @classmethod\n    @abstractmethod\n    def configuration_schema(cls) -> Dict[str, EntityField]:\n        \"\"\"\n        Return the specification of what configuration fields are available for this connector.\n\n        Returns:\n            Dictionary where keys are configuration field names and values are EntityField objects\n            describing the expected structure and constraints.\n        \"\"\"\n\n    @classmethod\n    @abstractmethod\n    def credentials_schema(cls) -> Dict[str, CredentialField]:\n        \"\"\"\n        Return the specification of what credentials are required for this connector.\n\n        Returns:\n            Dictionary where keys are credential field names and values are CredentialField objects\n            describing the expected structure, validation rules, and security properties.\n        \"\"\"\n\n    @abstractmethod\n    def authorize(self, redirect_uri: str) -> str:\n        \"\"\"\n        Generate the OAuth authorization URL.\n\n        Returns:\n            The URL where users should be redirected to authorize the application\n        \"\"\"\n\n    @abstractmethod\n    def callback(self, callback_data: Dict[str, Any], redirect_uri: str) -> OAuthResult:\n        \"\"\"\n        Handle the OAuth callback and exchange the authorization code for tokens.\n\n        Args:\n            callback_data: The data received from the OAuth callback (query params, etc.)\n            redirect_uri: The OAuth redirect URI used in the authorization request\n\n        Returns:\n            Standardized OAuthResult containing tokens and metadata\n        \"\"\"\n\n    @abstractmethod\n    def search(\n        self,\n        query: ChunkIndexRequest,\n        entities: dict[str, Any],\n        access_token: str,\n        limit: int | None = None,\n        # Slack-specific parameters\n        slack_event_context: SlackContext | None = None,\n        bot_token: str | None = None,\n    ) -> list[InferenceChunk]:\n        \"\"\"\n        Perform a federated search using the provided query and entities.\n\n        Args:\n            query: The search query\n            entities: Connector-level config (entity filtering configuration)\n            access_token: The OAuth access token\n            limit: Maximum number of results to return\n            slack_event_context: Slack-specific context (only used by Slack bot)\n            bot_token: Slack bot token (only used by Slack bot)\n\n        Returns:\n            Search results in a standardized format\n        \"\"\"\n"
  },
  {
    "path": "backend/onyx/federated_connectors/models.py",
    "content": "from datetime import datetime\nfrom typing import Any\nfrom typing import Dict\nfrom typing import Optional\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass FieldSpec(BaseModel):\n    \"\"\"Model for describing a field specification.\"\"\"\n\n    type: str = Field(\n        ..., description=\"The type of the field (e.g., 'str', 'bool', 'list[str]')\"\n    )\n    description: str = Field(\n        ..., description=\"Description of what this field represents\"\n    )\n    required: bool = Field(default=False, description=\"Whether this field is required\")\n    default: Optional[Any] = Field(\n        default=None, description=\"Default value if not provided\"\n    )\n    example: Optional[Any] = Field(\n        default=None, description=\"Example value for documentation\"\n    )\n    secret: bool = Field(\n        default=False, description=\"Whether this field contains sensitive data\"\n    )\n\n\nclass EntityField(FieldSpec):\n    \"\"\"Model for describing an entity field in the entities specification.\"\"\"\n\n\nclass CredentialField(FieldSpec):\n    \"\"\"Model for describing a credential field in the credentials specification.\"\"\"\n\n\nclass OAuthResult(BaseModel):\n    \"\"\"Standardized OAuth result that all federated connectors should return from callback.\"\"\"\n\n    access_token: Optional[str] = Field(\n        default=None, description=\"The bot access token for bot operations\"\n    )\n    user_token: Optional[str] = Field(\n        default=None,\n        description=\"The user access token for user-scoped operations like federated search\",\n    )\n    token_type: Optional[str] = Field(\n        default=None, description=\"Token type (usually 'bearer')\"\n    )\n    scope: Optional[str] = Field(default=None, description=\"Granted scopes\")\n    expires_at: Optional[datetime] = Field(\n        default=None, description=\"When the token expires\"\n    )\n    refresh_token: Optional[str] = Field(\n        default=None, description=\"Refresh token if applicable\"\n    )\n\n    # Additional fields that might be useful\n    team: Optional[Dict[str, Any]] = Field(\n        default=None, description=\"Team/workspace information\"\n    )\n    user: Optional[Dict[str, Any]] = Field(default=None, description=\"User information\")\n    raw_response: Optional[Dict[str, Any]] = Field(\n        default=None, description=\"Raw response for debugging\"\n    )\n\n    # Pydantic V2 automatically serializes datetime to ISO format, so no custom encoder needed\n"
  },
  {
    "path": "backend/onyx/federated_connectors/oauth_utils.py",
    "content": "\"\"\"Generic OAuth utilities for federated connectors API layer.\"\"\"\n\nimport base64\nimport json\nimport uuid\nfrom typing import Any\n\nfrom onyx.cache.factory import get_cache_backend\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nOAUTH_STATE_PREFIX = \"federated_oauth\"\nOAUTH_STATE_TTL = 300  # 5 minutes\n\n\nclass OAuthSession:\n    \"\"\"Represents an OAuth session stored in the cache backend.\"\"\"\n\n    def __init__(\n        self,\n        federated_connector_id: int,\n        user_id: str,\n        redirect_uri: str | None = None,\n        additional_data: dict[str, Any] | None = None,\n    ):\n        self.federated_connector_id = federated_connector_id\n        self.user_id = user_id\n        self.redirect_uri = redirect_uri\n        self.additional_data = additional_data or {}\n\n    def to_dict(self) -> dict[str, Any]:\n        return {\n            \"federated_connector_id\": self.federated_connector_id,\n            \"user_id\": self.user_id,\n            \"redirect_uri\": self.redirect_uri,\n            \"additional_data\": self.additional_data,\n        }\n\n    @classmethod\n    def from_dict(cls, data: dict[str, Any]) -> \"OAuthSession\":\n        return cls(\n            federated_connector_id=data[\"federated_connector_id\"],\n            user_id=data[\"user_id\"],\n            redirect_uri=data.get(\"redirect_uri\"),\n            additional_data=data.get(\"additional_data\", {}),\n        )\n\n\ndef generate_oauth_state(\n    federated_connector_id: int,\n    user_id: str,\n    redirect_uri: str | None = None,\n    additional_data: dict[str, Any] | None = None,\n    ttl: int = OAUTH_STATE_TTL,\n) -> str:\n    \"\"\"\n    Generate a secure state parameter and store session data in the cache backend.\n\n    Args:\n        federated_connector_id: ID of the federated connector\n        user_id: ID of the user initiating OAuth\n        redirect_uri: Optional redirect URI after OAuth completion\n        additional_data: Any additional data to store with the session\n        ttl: Time-to-live in seconds for the cache key\n\n    Returns:\n        Base64-encoded state parameter\n    \"\"\"\n    # Generate a random UUID for the state\n    state_uuid = uuid.uuid4()\n    state_b64 = base64.urlsafe_b64encode(state_uuid.bytes).decode(\"utf-8\").rstrip(\"=\")\n\n    session = OAuthSession(\n        federated_connector_id=federated_connector_id,\n        user_id=user_id,\n        redirect_uri=redirect_uri,\n        additional_data=additional_data,\n    )\n\n    cache = get_cache_backend()\n    cache_key = f\"{OAUTH_STATE_PREFIX}:{state_uuid}\"\n    cache.set(cache_key, json.dumps(session.to_dict()), ex=ttl)\n\n    logger.info(\n        f\"Generated OAuth state for federated_connector_id={federated_connector_id}, user_id={user_id}, state={state_b64}\"\n    )\n\n    return state_b64\n\n\ndef verify_oauth_state(state: str) -> OAuthSession:\n    \"\"\"\n    Verify OAuth state parameter and retrieve session data.\n\n    Args:\n        state: Base64-encoded state parameter from OAuth callback\n\n    Returns:\n        OAuthSession if state is valid, None otherwise\n    \"\"\"\n    # Add padding if needed for base64 decoding\n    padded_state = state + \"=\" * (-len(state) % 4)\n\n    # Decode base64 to get UUID bytes\n    state_bytes = base64.urlsafe_b64decode(padded_state)\n    state_uuid = uuid.UUID(bytes=state_bytes)\n\n    cache = get_cache_backend()\n    cache_key = f\"{OAUTH_STATE_PREFIX}:{state_uuid}\"\n\n    session_data = cache.get(cache_key)\n    if not session_data:\n        raise ValueError(f\"OAuth state not found: {state}\")\n\n    cache.delete(cache_key)\n\n    session_dict = json.loads(session_data)\n    return OAuthSession.from_dict(session_dict)\n\n\ndef get_oauth_callback_uri() -> str:\n    \"\"\"\n    Generate the OAuth callback URI for a federated connector.\n\n    Returns:\n        The callback URI\n    \"\"\"\n    # Use the frontend callback page as the OAuth redirect URI\n    # The frontend will then make an API call to process the callback\n    return f\"{WEB_DOMAIN}/federated/oauth/callback\"\n\n\ndef add_state_to_oauth_url(base_oauth_url: str, state: str) -> str:\n    \"\"\"\n    Add state parameter to an OAuth URL.\n\n    Args:\n        base_oauth_url: The base OAuth URL from the connector\n        state: The state parameter to add\n\n    Returns:\n        The OAuth URL with state parameter added\n    \"\"\"\n    # Check if URL already has query parameters\n    separator = \"&\" if \"?\" in base_oauth_url else \"?\"\n    return f\"{base_oauth_url}{separator}state={state}\"\n"
  },
  {
    "path": "backend/onyx/federated_connectors/registry.py",
    "content": "\"\"\"Registry mapping for federated connector classes.\"\"\"\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import FederatedConnectorSource\n\n\nclass FederatedConnectorMapping(BaseModel):\n    module_path: str\n    class_name: str\n\n\n# Mapping of FederatedConnectorSource to connector details for lazy loading\nFEDERATED_CONNECTOR_CLASS_MAP = {\n    FederatedConnectorSource.FEDERATED_SLACK: FederatedConnectorMapping(\n        module_path=\"onyx.federated_connectors.slack.federated_connector\",\n        class_name=\"SlackFederatedConnector\",\n    ),\n}\n"
  },
  {
    "path": "backend/onyx/federated_connectors/slack/__init__.py",
    "content": "# Slack federated connector module\n"
  },
  {
    "path": "backend/onyx/federated_connectors/slack/federated_connector.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom urllib.parse import urlencode\n\nimport requests\nfrom pydantic import ValidationError\nfrom slack_sdk import WebClient\nfrom typing_extensions import override\n\nfrom onyx.context.search.federated.slack_search import slack_retrieval\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.federated_connectors.interfaces import FederatedConnector\nfrom onyx.federated_connectors.models import CredentialField\nfrom onyx.federated_connectors.models import EntityField\nfrom onyx.federated_connectors.models import OAuthResult\nfrom onyx.federated_connectors.slack.models import SlackCredentials\nfrom onyx.federated_connectors.slack.models import SlackEntities\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nSCOPES = [\n    \"channels:read\",\n    \"groups:read\",\n    \"im:read\",\n    \"mpim:read\",\n    \"search:read\",\n    \"channels:history\",\n    \"groups:history\",\n    \"im:history\",\n    \"mpim:history\",\n    \"users:read\",\n    \"users.profile:read\",\n]\n\n\nclass SlackFederatedConnector(FederatedConnector):\n    def __init__(self, credentials: dict[str, Any]):\n        self.slack_credentials = SlackCredentials(**credentials)\n\n    @override\n    def validate_entities(self, entities: dict[str, Any]) -> bool:\n        \"\"\"Check the entities and verify that they match the expected structure/all values are valid.\n\n        For Slack federated search, we expect:\n        - channels: list[str] (list of channel names or IDs)\n        - include_dm: bool (whether to include direct messages)\n        \"\"\"\n        try:\n            # Use Pydantic model for validation\n            SlackEntities(**entities)\n            return True\n        except ValidationError as e:\n            logger.warning(f\"Validation error for Slack entities: {e}\")\n            return False\n        except Exception as e:\n            logger.error(f\"Error validating Slack entities: {e}\")\n            return False\n\n    @classmethod\n    def entities_schema(cls) -> dict[str, EntityField]:\n        \"\"\"Return the specifications of what entity configuration fields are available for Slack.\n\n        This is the canonical schema definition for Slack entities.\n        \"\"\"\n        return {\n            \"exclude_channels\": EntityField(\n                type=\"list[str]\",\n                description=\"Exclude the following channels from search. Glob patterns are supported.\",\n                required=False,\n                example=[\"secure-channel\", \"private-*\", \"customer*\"],\n            ),\n            \"search_all_channels\": EntityField(\n                type=\"bool\",\n                description=\"Search all accessible channels. If not set, must specify channels below.\",\n                required=False,\n                default=False,\n                example=False,\n            ),\n            \"channels\": EntityField(\n                type=\"list[str]\",\n                description=\"Search the following channels\",\n                required=False,\n                example=[\"general\", \"eng*\", \"product-*\"],\n            ),\n            \"include_dm\": EntityField(\n                type=\"bool\",\n                description=\"Include user direct messages in search results\",\n                required=False,\n                default=False,\n                example=False,\n            ),\n            \"include_group_dm\": EntityField(\n                type=\"bool\",\n                description=\"Include group direct messages (multi-person DMs) in search results\",\n                required=False,\n                default=False,\n                example=False,\n            ),\n            \"include_private_channels\": EntityField(\n                type=\"bool\",\n                description=\"Include private channels in search results (user must have access)\",\n                required=False,\n                default=False,\n                example=False,\n            ),\n            \"default_search_days\": EntityField(\n                type=\"int\",\n                description=\"Maximum number of days to search back. Increasing this value degrades answer quality.\",\n                required=False,\n                default=30,\n                example=30,\n            ),\n            \"max_messages_per_query\": EntityField(\n                type=\"int\",\n                description=(\n                    \"Maximum number of messages to retrieve per search query. \"\n                    \"Higher values provide more context but may be slower.\"\n                ),\n                required=False,\n                default=25,\n                example=25,\n            ),\n        }\n\n    @classmethod\n    def configuration_schema(cls) -> dict[str, EntityField]:\n        \"\"\"Wrapper for backwards compatibility - delegates to entities_schema().\"\"\"\n        return cls.entities_schema()\n\n    @classmethod\n    @override\n    def credentials_schema(cls) -> dict[str, CredentialField]:\n        \"\"\"Return the specification of what credentials are required for Slack connector.\"\"\"\n        return {\n            \"client_id\": CredentialField(\n                type=\"str\",\n                description=\"Slack app client ID from your Slack app configuration\",\n                required=True,\n                example=\"1234567890.1234567890123\",\n                secret=False,\n            ),\n            \"client_secret\": CredentialField(\n                type=\"str\",\n                description=\"Slack app client secret from your Slack app configuration\",\n                required=True,\n                example=\"1a2b3c4d5e6f7g8h9i0j1k2l3m4n5o6p\",\n                secret=True,\n            ),\n        }\n\n    @override\n    def authorize(self, redirect_uri: str) -> str:\n        \"\"\"Get back the OAuth URL for Slack authorization.\n\n        Returns the URL where users should be redirected to authorize the application.\n        Note: State parameter will be added by the API layer.\n        \"\"\"\n        # Build OAuth URL with proper parameters (no state - handled by API layer)\n        params = {\n            \"client_id\": self.slack_credentials.client_id,\n            \"user_scope\": \" \".join(SCOPES),\n            \"redirect_uri\": redirect_uri,\n        }\n\n        # Build query string\n        oauth_url = f\"https://slack.com/oauth/v2/authorize?{urlencode(params)}\"\n\n        logger.info(\"Generated Slack OAuth authorization URL\")\n        return oauth_url\n\n    @override\n    def callback(self, callback_data: dict[str, Any], redirect_uri: str) -> OAuthResult:\n        \"\"\"Handle the response from the OAuth flow and return it in a standard format.\n\n        Args:\n            callback_data: The data received from the OAuth callback (state already validated by API layer)\n\n        Returns:\n            Standardized OAuthResult\n        \"\"\"\n        # Extract authorization code from callback\n        auth_code = callback_data.get(\"code\")\n        error = callback_data.get(\"error\")\n\n        if error:\n            raise RuntimeError(f\"OAuth error received: {error}\")\n\n        if not auth_code:\n            raise ValueError(\"No authorization code received\")\n\n        # Exchange authorization code for access token\n        token_response = self._exchange_code_for_token(auth_code, redirect_uri)\n\n        if not token_response.get(\"ok\"):\n            raise RuntimeError(\n                f\"Failed to exchange authorization code for token: {token_response.get('error')}\"\n            )\n\n        # Build team info\n        team_info = None\n        if \"team\" in token_response:\n            team_info = {\n                \"id\": token_response[\"team\"][\"id\"],\n                \"name\": token_response[\"team\"][\"name\"],\n            }\n\n        # Build user info and extract OAuth tokens\n        if \"authed_user\" not in token_response:\n            raise RuntimeError(\"Missing authed_user in OAuth response from Slack\")\n\n        authed_user = token_response[\"authed_user\"]\n        user_info = {\n            \"id\": authed_user[\"id\"],\n            \"scope\": authed_user.get(\"scope\"),\n            \"token_type\": authed_user.get(\"token_type\"),\n        }\n\n        # Extract OAuth tokens - bot token from root, user token from authed_user\n        user_token = authed_user.get(\"access_token\")  # User token\n        refresh_token = authed_user.get(\"refresh_token\")\n        token_type = authed_user.get(\"token_type\", \"bearer\")\n        scope = authed_user.get(\"scope\")\n\n        # Calculate expires_at from expires_in if present\n        expires_at = None\n        if \"expires_in\" in authed_user:\n            expires_at = datetime.now(timezone.utc) + timedelta(\n                seconds=authed_user[\"expires_in\"]\n            )\n\n        return OAuthResult(\n            access_token=user_token,  # Bot token for bot operations\n            token_type=token_type,\n            scope=scope,\n            expires_at=expires_at,\n            refresh_token=refresh_token,\n            team=team_info,\n            user=user_info,\n            raw_response=token_response,\n        )\n\n    def _exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:\n        \"\"\"Exchange authorization code for access token.\n\n        Args:\n            code: Authorization code from OAuth callback\n\n        Returns:\n            Token response from Slack API\n        \"\"\"\n        response = requests.post(\n            \"https://slack.com/api/oauth.v2.access\",\n            data={\n                \"client_id\": self.slack_credentials.client_id,\n                \"client_secret\": self.slack_credentials.client_secret,\n                \"code\": code,\n                \"redirect_uri\": redirect_uri,\n            },\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @override\n    def search(\n        self,\n        query: ChunkIndexRequest,\n        entities: dict[str, Any],\n        access_token: str,\n        limit: int | None = None,\n        slack_event_context: SlackContext | None = None,\n        bot_token: str | None = None,\n    ) -> list[InferenceChunk]:\n        \"\"\"Perform a federated search on Slack.\n\n        Args:\n            query: The search query\n            entities: Connector-level config (entity filtering configuration)\n            access_token: The OAuth access token\n            limit: Maximum number of results to return\n            slack_event_context: Optional Slack context for slack bot\n            bot_token: Optional bot token for slack bot\n\n        Returns:\n            Search results in SlackSearchResponse format\n        \"\"\"\n        logger.debug(f\"Slack federated search called with entities: {entities}\")\n\n        # Get team_id from Slack API for caching and filtering\n        team_id = None\n        try:\n            slack_client = WebClient(token=access_token)\n            auth_response = slack_client.auth_test()\n            auth_response.validate()\n\n            # Cast response.data to dict for type checking\n            auth_data: dict[str, Any] = auth_response.data  # type: ignore\n            team_id = auth_data.get(\"team_id\")\n            logger.debug(f\"Slack team_id: {team_id}\")\n        except Exception as e:\n            logger.warning(f\"Could not fetch team_id from Slack API: {e}\")\n\n        with get_session_with_current_tenant() as db_session:\n            return slack_retrieval(\n                query,\n                access_token,\n                db_session,\n                entities=entities,\n                limit=limit,\n                slack_event_context=slack_event_context,\n                bot_token=bot_token,\n                team_id=team_id,\n            )\n"
  },
  {
    "path": "backend/onyx/federated_connectors/slack/models.py",
    "content": "from typing import Optional\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_validator\nfrom pydantic import model_validator\n\n\nclass SlackEntities(BaseModel):\n    \"\"\"Pydantic model for Slack federated search entities.\"\"\"\n\n    # Channel filtering\n    search_all_channels: bool = Field(\n        default=True,\n        description=\"Search all accessible channels. If not set, must specify channels below.\",\n    )\n    channels: Optional[list[str]] = Field(\n        default=None,\n        description=\"List of Slack channel names to search across.\",\n    )\n    exclude_channels: Optional[list[str]] = Field(\n        default=None,\n        description=\"List of channel names or patterns to exclude e.g. 'private-*, customer-*, secure-channel'.\",\n    )\n\n    # Direct message filtering\n    include_dm: bool = Field(\n        default=True,\n        description=\"Include user direct messages in search results\",\n    )\n    include_group_dm: bool = Field(\n        default=True,\n        description=\"Include group direct messages (multi-person DMs) in search results\",\n    )\n\n    # Private channel filtering\n    include_private_channels: bool = Field(\n        default=True,\n        description=\"Include private channels in search results (user must have access)\",\n    )\n\n    # Date range filtering\n    default_search_days: int = Field(\n        default=30,\n        description=\"Maximum number of days to search back. Increasing this value degrades answer quality.\",\n    )\n\n    # Message count per slack request\n    max_messages_per_query: int = Field(\n        default=10,\n        description=(\n            \"Maximum number of messages to retrieve per search query. \"\n            \"Higher values increase API calls and may trigger rate limits.\"\n        ),\n    )\n\n    @field_validator(\"default_search_days\")\n    @classmethod\n    def validate_default_search_days(cls, v: int) -> int:\n        \"\"\"Validate default_search_days is positive and reasonable\"\"\"\n        if v < 1:\n            raise ValueError(\"default_search_days must be at least 1\")\n        if v > 365:\n            raise ValueError(\"default_search_days cannot exceed 365 days\")\n        return v\n\n    @field_validator(\"max_messages_per_query\")\n    @classmethod\n    def validate_max_messages_per_query(cls, v: int) -> int:\n        \"\"\"Validate max_messages_per_query is positive and reasonable\"\"\"\n        if v < 1:\n            raise ValueError(\"max_messages_per_query must be at least 1\")\n        if v > 100:\n            raise ValueError(\"max_messages_per_query cannot exceed 100\")\n        return v\n\n    @field_validator(\"channels\")\n    @classmethod\n    def validate_channels(cls, v: Optional[list[str]]) -> Optional[list[str]]:\n        \"\"\"Validate each channel is a non-empty string\"\"\"\n        if v is not None:\n            if not isinstance(v, list):\n                raise ValueError(\"channels must be a list\")\n            for channel in v:\n                if not isinstance(channel, str) or not channel.strip():\n                    raise ValueError(\"Each channel must be a non-empty string\")\n        return v\n\n    @field_validator(\"exclude_channels\")\n    @classmethod\n    def validate_exclude_patterns(cls, v: Optional[list[str]]) -> Optional[list[str]]:\n        \"\"\"Validate each exclude pattern is a non-empty string\"\"\"\n        if v is None:\n            return v\n\n        for pattern in v:\n            if not isinstance(pattern, str) or not pattern.strip():\n                raise ValueError(\"Each exclude pattern must be a non-empty string\")\n\n        return v\n\n    @model_validator(mode=\"after\")\n    def validate_channel_config(self) -> \"SlackEntities\":\n        \"\"\"Validate search_all_channels configuration\"\"\"\n        # If search_all_channels is False, channels list must be provided\n        if not self.search_all_channels:\n            if self.channels is None or len(self.channels) == 0:\n                raise ValueError(\n                    \"Must specify at least one channel when search_all_channels is False\"\n                )\n\n        return self\n\n\nclass SlackCredentials(BaseModel):\n    \"\"\"Slack federated connector credentials.\"\"\"\n\n    client_id: str = Field(..., description=\"Slack app client ID\")\n    client_secret: str = Field(..., description=\"Slack app client secret\")\n\n    @field_validator(\"client_id\")\n    @classmethod\n    def validate_client_id(cls, v: str) -> str:\n        if not v or not v.strip():\n            raise ValueError(\"Client ID cannot be empty\")\n        return v.strip()\n\n    @field_validator(\"client_secret\")\n    @classmethod\n    def validate_client_secret(cls, v: str) -> str:\n        if not v or not v.strip():\n            raise ValueError(\"Client secret cannot be empty\")\n        return v.strip()\n\n\nclass SlackTeamInfo(BaseModel):\n    \"\"\"Information about a Slack team/workspace.\"\"\"\n\n    id: str = Field(..., description=\"Team ID\")\n    name: str = Field(..., description=\"Team name\")\n    domain: Optional[str] = Field(default=None, description=\"Team domain\")\n\n\nclass SlackUserInfo(BaseModel):\n    \"\"\"Information about a Slack user.\"\"\"\n\n    id: str = Field(..., description=\"User ID\")\n    team_id: Optional[str] = Field(default=None, description=\"Team ID\")\n    name: Optional[str] = Field(default=None, description=\"User name\")\n    email: Optional[str] = Field(default=None, description=\"User email\")\n\n\nclass SlackSearchResult(BaseModel):\n    \"\"\"Individual search result from Slack.\"\"\"\n\n    channel: str = Field(..., description=\"Channel where the message was found\")\n    timestamp: str = Field(..., description=\"Message timestamp\")\n    user: Optional[str] = Field(default=None, description=\"User who sent the message\")\n    text: str = Field(..., description=\"Message text\")\n    permalink: Optional[str] = Field(\n        default=None, description=\"Permalink to the message\"\n    )\n    score: Optional[float] = Field(default=None, description=\"Search relevance score\")\n\n    # Additional context\n    thread_ts: Optional[str] = Field(\n        default=None, description=\"Thread timestamp if in a thread\"\n    )\n    reply_count: Optional[int] = Field(\n        default=None, description=\"Number of replies if it's a thread\"\n    )\n\n\nclass SlackSearchResponse(BaseModel):\n    \"\"\"Response from Slack federated search.\"\"\"\n\n    query: str = Field(..., description=\"The search query\")\n    total_count: int = Field(..., description=\"Total number of results\")\n    results: list[SlackSearchResult] = Field(..., description=\"Search results\")\n    next_cursor: Optional[str] = Field(\n        default=None, description=\"Cursor for pagination\"\n    )\n\n    # Metadata\n    channels_searched: Optional[list[str]] = Field(\n        default=None, description=\"Channels that were searched\"\n    )\n    search_time_ms: Optional[int] = Field(\n        default=None, description=\"Time taken to search in milliseconds\"\n    )\n"
  },
  {
    "path": "backend/onyx/file_processing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/file_processing/enums.py",
    "content": "from enum import Enum\n\n\nclass HtmlBasedConnectorTransformLinksStrategy(str, Enum):\n    # remove links entirely\n    STRIP = \"strip\"\n    # turn HTML links into markdown links\n    MARKDOWN = \"markdown\"\n"
  },
  {
    "path": "backend/onyx/file_processing/extract_file_text.py",
    "content": "import csv\nimport gc\nimport io\nimport json\nimport os\nimport re\nimport zipfile\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom collections.abc import Sequence\nfrom email.parser import Parser as EmailParser\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import IO\nfrom typing import NamedTuple\nfrom typing import Optional\nfrom typing import TYPE_CHECKING\nfrom zipfile import BadZipFile\n\nimport chardet\nimport openpyxl\nfrom openpyxl.worksheet.worksheet import Worksheet\nfrom PIL import Image\n\nfrom onyx.configs.constants import ONYX_METADATA_FILENAME\nfrom onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.file_processing.file_types import PRESENTATION_MIME_TYPE\nfrom onyx.file_processing.file_types import WORD_PROCESSING_MIME_TYPE\nfrom onyx.file_processing.html_utils import parse_html_page_basic\nfrom onyx.file_processing.unstructured import get_unstructured_api_key\nfrom onyx.file_processing.unstructured import unstructured_to_text\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    from markitdown import MarkItDown\nlogger = setup_logger()\n\nTEXT_SECTION_SEPARATOR = \"\\n\\n\"\n\n_MARKITDOWN_CONVERTER: Optional[\"MarkItDown\"] = None\n\nKNOWN_OPENPYXL_BUGS = [\n    \"Value must be either numerical or a string containing a wildcard\",\n    \"File contains no valid workbook part\",\n    \"Unable to read workbook: could not read stylesheet from None\",\n    \"Colors must be aRGB hex values\",\n]\n\n\ndef get_markitdown_converter() -> \"MarkItDown\":\n    global _MARKITDOWN_CONVERTER\n    from markitdown import MarkItDown\n\n    if _MARKITDOWN_CONVERTER is None:\n        _MARKITDOWN_CONVERTER = MarkItDown(enable_plugins=False)\n    return _MARKITDOWN_CONVERTER\n\n\ndef get_file_ext(file_path_or_name: str | Path) -> str:\n    _, extension = os.path.splitext(file_path_or_name)\n    return extension.lower()\n\n\ndef is_text_file(file: IO[bytes]) -> bool:\n    \"\"\"\n    checks if the first 1024 bytes only contain printable or whitespace characters\n    if it does, then we say it's a plaintext file\n    \"\"\"\n    raw_data = file.read(1024)\n    file.seek(0)\n    text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})\n    return all(c in text_chars for c in raw_data)\n\n\ndef detect_encoding(file: IO[bytes]) -> str:\n    raw_data = file.read(50000)\n    file.seek(0)\n    encoding = chardet.detect(raw_data)[\"encoding\"] or \"utf-8\"\n    return encoding\n\n\ndef is_macos_resource_fork_file(file_name: str) -> bool:\n    return os.path.basename(file_name).startswith(\"._\") and file_name.startswith(\n        \"__MACOSX\"\n    )\n\n\ndef to_bytesio(stream: IO[bytes]) -> BytesIO:\n    if isinstance(stream, BytesIO):\n        return stream\n    data = stream.read()  # consumes the stream!\n    return BytesIO(data)\n\n\ndef load_files_from_zip(\n    zip_file_io: IO,\n    ignore_macos_resource_fork_files: bool = True,\n    ignore_dirs: bool = True,\n) -> Iterator[tuple[zipfile.ZipInfo, IO[Any]]]:\n    \"\"\"\n    Iterates through files in a zip archive, yielding (ZipInfo, file handle) pairs.\n    \"\"\"\n    with zipfile.ZipFile(zip_file_io, \"r\") as zip_file:\n        for file_info in zip_file.infolist():\n            if ignore_dirs and file_info.is_dir():\n                continue\n\n            if (\n                ignore_macos_resource_fork_files\n                and is_macos_resource_fork_file(file_info.filename)\n            ) or file_info.filename == ONYX_METADATA_FILENAME:\n                continue\n\n            with zip_file.open(file_info.filename, \"r\") as subfile:\n                # Try to match by exact filename first\n                yield file_info, subfile\n\n\ndef _extract_onyx_metadata(line: str) -> dict | None:\n    \"\"\"\n    Example: first line has:\n        <!-- ONYX_METADATA={\"title\": \"...\"} -->\n      or\n        #ONYX_METADATA={\"title\":\"...\"}\n    \"\"\"\n    html_comment_pattern = r\"<!--\\s*ONYX_METADATA=\\{(.*?)\\}\\s*-->\"\n    hashtag_pattern = r\"#ONYX_METADATA=\\{(.*?)\\}\"\n\n    html_comment_match = re.search(html_comment_pattern, line)\n    hashtag_match = re.search(hashtag_pattern, line)\n\n    if html_comment_match:\n        json_str = html_comment_match.group(1)\n    elif hashtag_match:\n        json_str = hashtag_match.group(1)\n    else:\n        return None\n\n    try:\n        return json.loads(\"{\" + json_str + \"}\")\n    except json.JSONDecodeError:\n        return None\n\n\ndef read_text_file(\n    file: IO,\n    encoding: str = \"utf-8\",\n    errors: str = \"replace\",\n    ignore_onyx_metadata: bool = True,\n) -> tuple[str, dict]:\n    \"\"\"\n    For plain text files. Optionally extracts Onyx metadata from the first line.\n    \"\"\"\n    metadata = {}\n    file_content_raw = \"\"\n    for ind, line in enumerate(file):\n        # decode\n        try:\n            line = line.decode(encoding) if isinstance(line, bytes) else line\n        except UnicodeDecodeError:\n            line = (\n                line.decode(encoding, errors=errors)\n                if isinstance(line, bytes)\n                else line\n            )\n\n        # optionally parse metadata in the first line\n        if ind == 0 and not ignore_onyx_metadata:\n            potential_meta = _extract_onyx_metadata(line)\n            if potential_meta is not None:\n                metadata = potential_meta\n                continue\n\n        file_content_raw += line\n\n    return file_content_raw, metadata\n\n\ndef pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:\n    \"\"\"\n    Extract text from a PDF. For embedded images, a more complex approach is needed.\n    This is a minimal approach returning text only.\n    \"\"\"\n    text, _, _ = read_pdf_file(file, pdf_pass)\n    return text\n\n\ndef read_pdf_file(\n    file: IO[Any],\n    pdf_pass: str | None = None,\n    extract_images: bool = False,\n    image_callback: Callable[[bytes, str], None] | None = None,\n) -> tuple[str, dict[str, Any], Sequence[tuple[bytes, str]]]:\n    \"\"\"\n    Returns the text, basic PDF metadata, and optionally extracted images.\n    \"\"\"\n    from pypdf import PdfReader\n    from pypdf.errors import PdfStreamError\n\n    metadata: dict[str, Any] = {}\n    extracted_images: list[tuple[bytes, str]] = []\n    try:\n        pdf_reader = PdfReader(file)\n\n        if pdf_reader.is_encrypted and pdf_pass is not None:\n            decrypt_success = False\n            try:\n                decrypt_success = pdf_reader.decrypt(pdf_pass) != 0\n            except Exception:\n                logger.error(\"Unable to decrypt pdf\")\n\n            if not decrypt_success:\n                return \"\", metadata, []\n        elif pdf_reader.is_encrypted:\n            logger.warning(\"No Password for an encrypted PDF, returning empty text.\")\n            return \"\", metadata, []\n\n        # Basic PDF metadata\n        if pdf_reader.metadata is not None:\n            for key, value in pdf_reader.metadata.items():\n                clean_key = key.lstrip(\"/\")\n                if isinstance(value, str) and value.strip():\n                    metadata[clean_key] = value\n                elif isinstance(value, list) and all(\n                    isinstance(item, str) for item in value\n                ):\n                    metadata[clean_key] = \", \".join(value)\n\n        text = TEXT_SECTION_SEPARATOR.join(\n            page.extract_text() for page in pdf_reader.pages\n        )\n\n        if extract_images:\n            for page_num, page in enumerate(pdf_reader.pages):\n                for image_file_object in page.images:\n                    image = Image.open(io.BytesIO(image_file_object.data))\n                    img_byte_arr = io.BytesIO()\n                    image.save(img_byte_arr, format=image.format)\n                    img_bytes = img_byte_arr.getvalue()\n\n                    image_format = image.format.lower() if image.format else \"png\"\n                    image_name = f\"page_{page_num + 1}_image_{image_file_object.name}.{image_format}\"\n                    if image_callback is not None:\n                        # Stream image out immediately\n                        image_callback(img_bytes, image_name)\n                    else:\n                        extracted_images.append((img_bytes, image_name))\n\n        return text, metadata, extracted_images\n\n    except PdfStreamError:\n        logger.exception(\"Invalid PDF file\")\n    except Exception:\n        logger.exception(\"Failed to read PDF\")\n\n    return \"\", metadata, []\n\n\ndef extract_docx_images(docx_bytes: IO[Any]) -> Iterator[tuple[bytes, str]]:\n    \"\"\"\n    Given the bytes of a docx file, extract all the images.\n    Returns a list of tuples (image_bytes, image_name).\n    \"\"\"\n    try:\n        with zipfile.ZipFile(docx_bytes) as z:\n            for name in z.namelist():\n                if name.startswith(\"word/media/\"):\n                    yield (z.read(name), name.split(\"/\")[-1])\n    except Exception:\n        logger.exception(\"Failed to extract all docx images\")\n\n\ndef read_docx_file(\n    file: IO[Any],\n    file_name: str = \"\",\n    extract_images: bool = False,\n    image_callback: Callable[[bytes, str], None] | None = None,\n) -> tuple[str, Sequence[tuple[bytes, str]]]:\n    \"\"\"\n    Extract text from a docx.\n    Return (text_content, list_of_images).\n\n    The caller can choose to provide a callback to handle images with the intent\n    of avoiding materializing the list of images in memory.\n    The images list returned is empty in this case.\n    \"\"\"\n    md = get_markitdown_converter()\n    from markitdown import (\n        StreamInfo,\n        FileConversionException,\n        UnsupportedFormatException,\n    )\n\n    try:\n        doc = md.convert(\n            to_bytesio(file), stream_info=StreamInfo(mimetype=WORD_PROCESSING_MIME_TYPE)\n        )\n    except (\n        BadZipFile,\n        ValueError,\n        FileConversionException,\n        UnsupportedFormatException,\n    ) as e:\n        logger.warning(\n            f\"Failed to extract docx {file_name or 'docx file'}: {e}. Attempting to read as text file.\"\n        )\n\n        # May be an invalid docx, but still a valid text file\n        file.seek(0)\n        encoding = detect_encoding(file)\n        text_content_raw, _ = read_text_file(\n            file, encoding=encoding, ignore_onyx_metadata=False\n        )\n        return text_content_raw or \"\", []\n\n    file.seek(0)\n\n    if extract_images:\n        if image_callback is None:\n            return doc.markdown, list(extract_docx_images(to_bytesio(file)))\n        # If a callback is provided, iterate and stream images without accumulating\n        try:\n            for img_file_bytes, img_file_name in extract_docx_images(to_bytesio(file)):\n                image_callback(img_file_bytes, img_file_name)\n        except Exception:\n            logger.exception(\"Failed to stream docx images\")\n    return doc.markdown, []\n\n\ndef pptx_to_text(file: IO[Any], file_name: str = \"\") -> str:\n    md = get_markitdown_converter()\n    from markitdown import (\n        StreamInfo,\n        FileConversionException,\n        UnsupportedFormatException,\n    )\n\n    stream_info = StreamInfo(\n        mimetype=PRESENTATION_MIME_TYPE, filename=file_name or None, extension=\".pptx\"\n    )\n    try:\n        presentation = md.convert(to_bytesio(file), stream_info=stream_info)\n    except (\n        BadZipFile,\n        ValueError,\n        FileConversionException,\n        UnsupportedFormatException,\n    ) as e:\n        error_str = f\"Failed to extract text from {file_name or 'pptx file'}: {e}\"\n        logger.warning(error_str)\n        return \"\"\n    return presentation.markdown\n\n\ndef _worksheet_to_matrix(\n    worksheet: Worksheet,\n) -> list[list[str]]:\n    \"\"\"\n    Converts a singular worksheet to a matrix of values\n    \"\"\"\n    rows: list[list[str]] = []\n    for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):\n        row = [\"\" if cell is None else str(cell) for cell in worksheet_row]\n        rows.append(row)\n\n    return rows\n\n\ndef _clean_worksheet_matrix(matrix: list[list[str]]) -> list[list[str]]:\n    \"\"\"\n    Cleans a worksheet matrix by removing rows if there are N consecutive empty\n    rows and removing cols if there are M consecutive empty columns\n    \"\"\"\n    MAX_EMPTY_ROWS = 2  # Runs longer than this are capped to max_empty; shorter runs are preserved as-is\n    MAX_EMPTY_COLS = 2\n\n    # Row cleanup\n    matrix = _remove_empty_runs(matrix, max_empty=MAX_EMPTY_ROWS)\n\n    if not matrix:\n        return matrix\n\n    # Column cleanup — determine which columns to keep without transposing.\n    num_cols = len(matrix[0])\n    keep_cols = _columns_to_keep(matrix, num_cols, max_empty=MAX_EMPTY_COLS)\n    if len(keep_cols) < num_cols:\n        matrix = [[row[c] for c in keep_cols] for row in matrix]\n\n    return matrix\n\n\ndef _columns_to_keep(\n    matrix: list[list[str]], num_cols: int, max_empty: int\n) -> list[int]:\n    \"\"\"Return the indices of columns to keep after removing empty-column runs.\n\n    Uses the same logic as ``_remove_empty_runs`` but operates on column\n    indices so no transpose is needed.\n    \"\"\"\n    kept: list[int] = []\n    empty_buffer: list[int] = []\n\n    for col_idx in range(num_cols):\n        col_is_empty = all(not row[col_idx] for row in matrix)\n        if col_is_empty:\n            empty_buffer.append(col_idx)\n        else:\n            kept.extend(empty_buffer[:max_empty])\n            kept.append(col_idx)\n            empty_buffer = []\n\n    return kept\n\n\ndef _remove_empty_runs(\n    rows: list[list[str]],\n    max_empty: int,\n) -> list[list[str]]:\n    \"\"\"Removes entire runs of empty rows when the run length exceeds max_empty.\n\n    Leading empty runs are capped to max_empty, just like interior runs.\n    Trailing empty rows are always dropped since there is no subsequent\n    non-empty row to flush them.\n    \"\"\"\n    result: list[list[str]] = []\n    empty_buffer: list[list[str]] = []\n\n    for row in rows:\n        # Check if empty\n        if not any(row):\n            if len(empty_buffer) < max_empty:\n                empty_buffer.append(row)\n        else:\n            # Add upto max empty rows onto the result - that's what we allow\n            result.extend(empty_buffer[:max_empty])\n            # Add the new non-empty row\n            result.append(row)\n            empty_buffer = []\n\n    return result\n\n\ndef xlsx_to_text(file: IO[Any], file_name: str = \"\") -> str:\n    # TODO: switch back to this approach in a few months when markitdown\n    # fixes their handling of excel files\n\n    # md = get_markitdown_converter()\n    # stream_info = StreamInfo(\n    #     mimetype=SPREADSHEET_MIME_TYPE, filename=file_name or None, extension=\".xlsx\"\n    # )\n    # try:\n    #     workbook = md.convert(to_bytesio(file), stream_info=stream_info)\n    # except (\n    #     BadZipFile,\n    #     ValueError,\n    #     FileConversionException,\n    #     UnsupportedFormatException,\n    # ) as e:\n    #     error_str = f\"Failed to extract text from {file_name or 'xlsx file'}: {e}\"\n    #     if file_name.startswith(\"~\"):\n    #         logger.debug(error_str + \" (this is expected for files with ~)\")\n    #     else:\n    #         logger.warning(error_str)\n    #     return \"\"\n    # return workbook.markdown\n    try:\n        workbook = openpyxl.load_workbook(file, read_only=True)\n    except BadZipFile as e:\n        error_str = f\"Failed to extract text from {file_name or 'xlsx file'}: {e}\"\n        if file_name.startswith(\"~\"):\n            logger.debug(error_str + \" (this is expected for files with ~)\")\n        else:\n            logger.warning(error_str)\n        return \"\"\n    except Exception as e:\n        if any(s in str(e) for s in KNOWN_OPENPYXL_BUGS):\n            logger.error(\n                f\"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}\"\n            )\n            return \"\"\n        raise\n\n    text_content = []\n    for sheet in workbook.worksheets:\n        sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))\n        buf = io.StringIO()\n        writer = csv.writer(buf, lineterminator=\"\\n\")\n        writer.writerows(sheet_matrix)\n        text_content.append(buf.getvalue().rstrip(\"\\n\"))\n    return TEXT_SECTION_SEPARATOR.join(text_content)\n\n\ndef eml_to_text(file: IO[Any]) -> str:\n    encoding = detect_encoding(file)\n    text_file = io.TextIOWrapper(file, encoding=encoding)\n    parser = EmailParser()\n    try:\n        message = parser.parse(text_file)\n    finally:\n        try:\n            # Keep underlying upload handle open for downstream consumers.\n            raw_file = text_file.detach()\n        except Exception as detach_error:\n            logger.warning(\n                f\"Failed to detach TextIOWrapper for EML upload, using original file: {detach_error}\"\n            )\n            raw_file = file\n        try:\n            raw_file.seek(0)\n        except Exception:\n            pass\n\n    text_content = []\n    for part in message.walk():\n        if part.get_content_type().startswith(\"text/plain\"):\n            payload = part.get_payload()\n            if isinstance(payload, str):\n                text_content.append(payload)\n            elif isinstance(payload, list):\n                text_content.extend(item for item in payload if isinstance(item, str))\n            else:\n                logger.warning(f\"Unexpected payload type: {type(payload)}\")\n    return TEXT_SECTION_SEPARATOR.join(text_content)\n\n\ndef epub_to_text(file: IO[Any]) -> str:\n    with zipfile.ZipFile(file) as epub:\n        text_content = []\n        for item in epub.infolist():\n            if item.filename.endswith(\".xhtml\") or item.filename.endswith(\".html\"):\n                with epub.open(item) as html_file:\n                    text_content.append(parse_html_page_basic(html_file))\n        return TEXT_SECTION_SEPARATOR.join(text_content)\n\n\ndef file_io_to_text(file: IO[Any]) -> str:\n    encoding = detect_encoding(file)\n    file_content, _ = read_text_file(file, encoding=encoding)\n    return file_content\n\n\ndef extract_file_text(\n    file: IO[Any],\n    file_name: str,\n    break_on_unprocessable: bool = True,\n    extension: str | None = None,\n) -> str:\n    \"\"\"\n    Legacy function that returns *only text*, ignoring embedded images.\n    For backward-compatibility in code that only wants text.\n\n    NOTE: Ignoring seems to be defined as returning an empty string for files it can't\n    handle (such as images).\n    \"\"\"\n    extension_to_function: dict[str, Callable[[IO[Any]], str]] = {\n        \".pdf\": pdf_to_text,\n        \".docx\": lambda f: read_docx_file(f, file_name)[0],  # no images\n        \".pptx\": lambda f: pptx_to_text(f, file_name),\n        \".xlsx\": lambda f: xlsx_to_text(f, file_name),\n        \".eml\": eml_to_text,\n        \".epub\": epub_to_text,\n        \".html\": parse_html_page_basic,\n    }\n\n    try:\n        if get_unstructured_api_key():\n            try:\n                return unstructured_to_text(file, file_name)\n            except Exception as unstructured_error:\n                logger.error(\n                    f\"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing.\"\n                )\n        if extension is None:\n            extension = get_file_ext(file_name)\n\n        if extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:\n            func = extension_to_function.get(extension, file_io_to_text)\n            file.seek(0)\n            return func(file)\n\n        # If unknown extension, maybe it's a text file\n        file.seek(0)\n        if is_text_file(file):\n            return file_io_to_text(file)\n\n        raise ValueError(\"Unknown file extension or not recognized as text data\")\n\n    except Exception as e:\n        if break_on_unprocessable:\n            raise RuntimeError(\n                f\"Failed to process file {file_name or 'Unknown'}: {str(e)}\"\n            ) from e\n        logger.warning(f\"Failed to process file {file_name or 'Unknown'}: {str(e)}\")\n        return \"\"\n\n\nclass ExtractionResult(NamedTuple):\n    \"\"\"Structured result from text and image extraction from various file types.\"\"\"\n\n    text_content: str\n    embedded_images: Sequence[tuple[bytes, str]]\n    metadata: dict[str, Any]\n\n\ndef extract_result_from_text_file(file: IO[Any]) -> ExtractionResult:\n    encoding = detect_encoding(file)\n    text_content_raw, file_metadata = read_text_file(\n        file, encoding=encoding, ignore_onyx_metadata=False\n    )\n    return ExtractionResult(\n        text_content=text_content_raw,\n        embedded_images=[],\n        metadata=file_metadata,\n    )\n\n\ndef extract_text_and_images(\n    file: IO[Any],\n    file_name: str,\n    pdf_pass: str | None = None,\n    content_type: str | None = None,\n    image_callback: Callable[[bytes, str], None] | None = None,\n) -> ExtractionResult:\n    \"\"\"\n    Primary new function for the updated connector.\n    Returns structured extraction result with text content, embedded images, and metadata.\n\n    Args:\n        file: File-like object to extract content from.\n        file_name: Name of the file (used to determine extension/type).\n        pdf_pass: Optional password for encrypted PDFs.\n        content_type: Optional MIME type override for the file.\n        image_callback: Optional callback for streaming image extraction. When provided,\n            embedded images are passed to this callback one at a time as (bytes, filename)\n            instead of being accumulated in the returned ExtractionResult.embedded_images\n            list. This is a memory optimization for large documents with many images -\n            the caller can process/store each image immediately rather than holding all\n            images in memory. When using a callback, ExtractionResult.embedded_images\n            will be an empty list.\n\n    Returns:\n        ExtractionResult containing text_content, embedded_images (empty if callback used),\n        and metadata extracted from the file.\n    \"\"\"\n    res = _extract_text_and_images(\n        file, file_name, pdf_pass, content_type, image_callback\n    )\n    # Clean up any temporary objects and force garbage collection\n    unreachable = gc.collect()\n    logger.info(f\"Unreachable objects: {unreachable}\")\n\n    return res\n\n\ndef _extract_text_and_images(\n    file: IO[Any],\n    file_name: str,\n    pdf_pass: str | None = None,\n    content_type: str | None = None,\n    image_callback: Callable[[bytes, str], None] | None = None,\n) -> ExtractionResult:\n    file.seek(0)\n\n    if get_unstructured_api_key():\n        try:\n            text_content = unstructured_to_text(file, file_name)\n            return ExtractionResult(\n                text_content=text_content, embedded_images=[], metadata={}\n            )\n        except Exception as e:\n            logger.error(\n                f\"Failed to process with Unstructured: {str(e)}. Falling back to normal processing.\"\n            )\n            file.seek(0)  # Reset file pointer just in case\n\n    # When we upload a document via a connector or MyDocuments, we extract and store the content of files\n    # with content types in UploadMimeTypes.DOCUMENT_MIME_TYPES as plain text files.\n    # As a result, the file name extension may differ from the original content type.\n    # We process files with a plain text content type first to handle this scenario.\n    if content_type in OnyxMimeTypes.TEXT_MIME_TYPES:\n        return extract_result_from_text_file(file)\n\n    # Default processing\n    try:\n        extension = get_file_ext(file_name)\n        # docx example for embedded images\n        if extension == \".docx\":\n            text_content, images = read_docx_file(\n                file, file_name, extract_images=True, image_callback=image_callback\n            )\n            return ExtractionResult(\n                text_content=text_content, embedded_images=images, metadata={}\n            )\n\n        # PDF example: we do not show complicated PDF image extraction here\n        # so we simply extract text for now and skip images.\n        if extension == \".pdf\":\n            text_content, pdf_metadata, images = read_pdf_file(\n                file,\n                pdf_pass,\n                extract_images=get_image_extraction_and_analysis_enabled(),\n                image_callback=image_callback,\n            )\n            return ExtractionResult(\n                text_content=text_content, embedded_images=images, metadata=pdf_metadata\n            )\n\n        # For PPTX, XLSX, EML, etc., we do not show embedded image logic here.\n        # You can do something similar to docx if needed.\n        if extension == \".pptx\":\n            return ExtractionResult(\n                text_content=pptx_to_text(file, file_name=file_name),\n                embedded_images=[],\n                metadata={},\n            )\n\n        if extension == \".xlsx\":\n            return ExtractionResult(\n                text_content=xlsx_to_text(file, file_name=file_name),\n                embedded_images=[],\n                metadata={},\n            )\n\n        if extension == \".eml\":\n            return ExtractionResult(\n                text_content=eml_to_text(file), embedded_images=[], metadata={}\n            )\n\n        if extension == \".epub\":\n            return ExtractionResult(\n                text_content=epub_to_text(file), embedded_images=[], metadata={}\n            )\n\n        if extension == \".html\":\n            return ExtractionResult(\n                text_content=parse_html_page_basic(file),\n                embedded_images=[],\n                metadata={},\n            )\n\n        # If we reach here and it's a recognized text extension\n        if extension in OnyxFileExtensions.PLAIN_TEXT_EXTENSIONS:\n            return extract_result_from_text_file(file)\n\n        # If it's an image file or something else, we do not parse embedded images from them\n        # just return empty text\n        return ExtractionResult(text_content=\"\", embedded_images=[], metadata={})\n\n    except Exception as e:\n        logger.exception(f\"Failed to extract text/images from {file_name}: {e}\")\n        return ExtractionResult(text_content=\"\", embedded_images=[], metadata={})\n\n\ndef docx_to_txt_filename(file_path: str) -> str:\n    return file_path.rsplit(\".\", 1)[0] + \".txt\"\n"
  },
  {
    "path": "backend/onyx/file_processing/file_types.py",
    "content": "PRESENTATION_MIME_TYPE = (\n    \"application/vnd.openxmlformats-officedocument.presentationml.presentation\"\n)\n\nSPREADSHEET_MIME_TYPE = (\n    \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n)\nWORD_PROCESSING_MIME_TYPE = (\n    \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"\n)\nPDF_MIME_TYPE = \"application/pdf\"\nPLAIN_TEXT_MIME_TYPE = \"text/plain\"\n\n\nclass OnyxMimeTypes:\n    IMAGE_MIME_TYPES = {\"image/jpg\", \"image/jpeg\", \"image/png\", \"image/webp\"}\n    CSV_MIME_TYPES = {\"text/csv\"}\n    TABULAR_MIME_TYPES = CSV_MIME_TYPES | {SPREADSHEET_MIME_TYPE}\n    TEXT_MIME_TYPES = {\n        PLAIN_TEXT_MIME_TYPE,\n        \"text/markdown\",\n        \"text/x-markdown\",\n        \"text/x-log\",\n        \"text/x-config\",\n        \"text/tab-separated-values\",\n        \"application/json\",\n        \"application/xml\",\n        \"text/xml\",\n        \"application/x-yaml\",\n        \"application/yaml\",\n        \"text/yaml\",\n        \"text/x-yaml\",\n    }\n    DOCUMENT_MIME_TYPES = {\n        PDF_MIME_TYPE,\n        WORD_PROCESSING_MIME_TYPE,\n        PRESENTATION_MIME_TYPE,\n        \"message/rfc822\",\n        \"application/epub+zip\",\n    }\n\n    ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES.union(\n        TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, TABULAR_MIME_TYPES\n    )\n\n    EXCLUDED_IMAGE_TYPES = {\n        \"image/bmp\",\n        \"image/tiff\",\n        \"image/gif\",\n        \"image/svg+xml\",\n        \"image/avif\",\n    }\n\n\nclass OnyxFileExtensions:\n    TABULAR_EXTENSIONS = {\n        \".csv\",\n        \".tsv\",\n        \".xlsx\",\n    }\n    PLAIN_TEXT_EXTENSIONS = {\n        \".txt\",\n        \".md\",\n        \".mdx\",\n        \".conf\",\n        \".log\",\n        \".json\",\n        \".csv\",\n        \".tsv\",\n        \".xml\",\n        \".yml\",\n        \".yaml\",\n        \".sql\",\n    }\n    DOCUMENT_EXTENSIONS = {\n        \".pdf\",\n        \".docx\",\n        \".pptx\",\n        \".xlsx\",\n        \".eml\",\n        \".epub\",\n        \".html\",\n    }\n    IMAGE_EXTENSIONS = {\n        \".png\",\n        \".jpg\",\n        \".jpeg\",\n        \".webp\",\n    }\n\n    TEXT_AND_DOCUMENT_EXTENSIONS = PLAIN_TEXT_EXTENSIONS.union(DOCUMENT_EXTENSIONS)\n\n    ALL_ALLOWED_EXTENSIONS = TEXT_AND_DOCUMENT_EXTENSIONS.union(IMAGE_EXTENSIONS)\n"
  },
  {
    "path": "backend/onyx/file_processing/html_utils.py",
    "content": "import re\nfrom copy import copy\nfrom dataclasses import dataclass\nfrom io import BytesIO\nfrom typing import IO\n\nimport bs4\n\nfrom onyx.configs.app_configs import HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY\nfrom onyx.configs.app_configs import PARSE_WITH_TRAFILATURA\nfrom onyx.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES\nfrom onyx.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS\nfrom onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nMINTLIFY_UNWANTED = [\"sticky\", \"hidden\"]\n\n\n@dataclass\nclass ParsedHTML:\n    title: str | None\n    cleaned_text: str\n\n\ndef strip_excessive_newlines_and_spaces(document: str) -> str:\n    # collapse repeated spaces into one\n    document = re.sub(r\" +\", \" \", document)\n    # remove trailing spaces\n    document = re.sub(r\" +[\\n\\r]\", \"\\n\", document)\n    # remove repeated newlines\n    document = re.sub(r\"[\\n\\r]+\", \"\\n\", document)\n    return document.strip()\n\n\ndef strip_newlines(document: str) -> str:\n    # HTML might contain newlines which are just whitespaces to a browser\n    return re.sub(r\"[\\n\\r]+\", \" \", document)\n\n\ndef format_element_text(element_text: str, link_href: str | None) -> str:\n    element_text_no_newlines = strip_newlines(element_text)\n\n    if (\n        not link_href\n        or HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY\n        == HtmlBasedConnectorTransformLinksStrategy.STRIP\n    ):\n        return element_text_no_newlines\n\n    return f\"[{element_text_no_newlines}]({link_href})\"\n\n\ndef parse_html_with_trafilatura(html_content: str) -> str:\n    \"\"\"Parse HTML content using trafilatura.\"\"\"\n    import trafilatura  # type: ignore\n    from trafilatura.settings import use_config  # type: ignore\n\n    config = use_config()\n    config.set(\"DEFAULT\", \"include_links\", \"True\")\n    config.set(\"DEFAULT\", \"include_tables\", \"True\")\n    config.set(\"DEFAULT\", \"include_images\", \"True\")\n    config.set(\"DEFAULT\", \"include_formatting\", \"True\")\n\n    extracted_text = trafilatura.extract(html_content, config=config)\n    return strip_excessive_newlines_and_spaces(extracted_text) if extracted_text else \"\"\n\n\ndef format_document_soup(\n    document: bs4.BeautifulSoup, table_cell_separator: str = \"\\t\"\n) -> str:\n    \"\"\"Format html to a flat text document.\n\n    The following goals:\n    - Newlines from within the HTML are removed (as browser would ignore them as well).\n    - Repeated newlines/spaces are removed (as browsers would ignore them).\n    - Newlines only before and after headlines and paragraphs or when explicit (br or pre tag)\n    - Table columns/rows are separated by newline\n    - List elements are separated by newline and start with a hyphen\n    \"\"\"\n    text = \"\"\n    list_element_start = False\n    verbatim_output = 0\n    in_table = False\n    last_added_newline = False\n    link_href: str | None = None\n\n    for e in document.descendants:\n        verbatim_output -= 1\n        if isinstance(e, bs4.element.NavigableString):\n            if isinstance(e, (bs4.element.Comment, bs4.element.Doctype)):\n                continue\n            element_text = e.text\n            if in_table:\n                # Tables are represented in natural language with rows separated by newlines\n                # Can't have newlines then in the table elements\n                element_text = element_text.replace(\"\\n\", \" \").strip()\n\n            # Some tags are translated to spaces but in the logic underneath this section, we\n            # translate them to newlines as a browser should render them such as with br\n            # This logic here avoids a space after newline when it shouldn't be there.\n            if last_added_newline and element_text.startswith(\" \"):\n                element_text = element_text[1:]\n                last_added_newline = False\n\n            if element_text:\n                content_to_add = (\n                    element_text\n                    if verbatim_output > 0\n                    else format_element_text(element_text, link_href)\n                )\n\n                # Don't join separate elements without any spacing\n                if (text and not text[-1].isspace()) and (\n                    content_to_add and not content_to_add[0].isspace()\n                ):\n                    text += \" \"\n\n                text += content_to_add\n\n                list_element_start = False\n        elif isinstance(e, bs4.element.Tag):\n            # table is standard HTML element\n            if e.name == \"table\":\n                in_table = True\n            # tr is for rows\n            elif e.name == \"tr\" and in_table:\n                text += \"\\n\"\n            # td for data cell, th for header\n            elif e.name in [\"td\", \"th\"] and in_table:\n                text += table_cell_separator\n            elif e.name == \"/table\":\n                in_table = False\n            elif in_table:\n                # don't handle other cases while in table\n                pass\n            elif e.name == \"a\":\n                href_value = e.get(\"href\", None)\n                # mostly for typing, having multiple hrefs is not valid HTML\n                link_href = (\n                    href_value[0] if isinstance(href_value, list) else href_value\n                )\n            elif e.name == \"/a\":\n                link_href = None\n            elif e.name in [\"p\", \"div\"]:\n                if not list_element_start:\n                    text += \"\\n\"\n            elif e.name in [\"h1\", \"h2\", \"h3\", \"h4\"]:\n                text += \"\\n\"\n                list_element_start = False\n                last_added_newline = True\n            elif e.name == \"br\":\n                text += \"\\n\"\n                list_element_start = False\n                last_added_newline = True\n            elif e.name == \"li\":\n                text += \"\\n- \"\n                list_element_start = True\n            elif e.name == \"pre\":\n                if verbatim_output <= 0:\n                    verbatim_output = len(list(e.childGenerator()))\n    return strip_excessive_newlines_and_spaces(text)\n\n\ndef parse_html_page_basic(text: str | BytesIO | IO[bytes]) -> str:\n    soup = bs4.BeautifulSoup(text, \"lxml\")\n    return format_document_soup(soup)\n\n\ndef web_html_cleanup(\n    page_content: str | bs4.BeautifulSoup,\n    mintlify_cleanup_enabled: bool = True,\n    additional_element_types_to_discard: list[str] | None = None,\n) -> ParsedHTML:\n    if isinstance(page_content, str):\n        soup = bs4.BeautifulSoup(page_content, \"lxml\")\n    else:\n        soup = page_content\n\n    title_tag = soup.find(\"title\")\n    title = None\n    if title_tag and title_tag.text:\n        title = title_tag.text\n        title_tag.extract()\n\n    # Heuristics based cleaning of elements based on css classes\n    unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)\n    if mintlify_cleanup_enabled:\n        unwanted_classes.extend(MINTLIFY_UNWANTED)\n    for undesired_element in unwanted_classes:\n        [\n            tag.extract()\n            for tag in soup.find_all(\n                class_=lambda x: x and undesired_element in x.split()\n            )\n        ]\n\n    for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:\n        [tag.extract() for tag in soup.find_all(undesired_tag)]\n\n    if additional_element_types_to_discard:\n        for undesired_tag in additional_element_types_to_discard:\n            [tag.extract() for tag in soup.find_all(undesired_tag)]\n\n    soup_string = str(soup)\n    page_text = \"\"\n\n    if PARSE_WITH_TRAFILATURA:\n        try:\n            page_text = parse_html_with_trafilatura(soup_string)\n            if not page_text:\n                raise ValueError(\"Empty content returned by trafilatura.\")\n        except Exception as e:\n            logger.info(f\"Trafilatura parsing failed: {e}. Falling back on bs4.\")\n            page_text = format_document_soup(soup)\n    else:\n        page_text = format_document_soup(soup)\n\n    # 200B is ZeroWidthSpace which we don't care for\n    cleaned_text = page_text.replace(\"\\u200b\", \"\")\n\n    return ParsedHTML(title=title, cleaned_text=cleaned_text)\n"
  },
  {
    "path": "backend/onyx/file_processing/image_summarization.py",
    "content": "import base64\nfrom io import BytesIO\n\nfrom PIL import Image\n\nfrom onyx.configs.app_configs import IMAGE_SUMMARIZATION_SYSTEM_PROMPT\nfrom onyx.configs.app_configs import IMAGE_SUMMARIZATION_USER_PROMPT\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import ContentPart\nfrom onyx.llm.models import ImageContentPart\nfrom onyx.llm.models import ImageUrlDetail\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import TextContentPart\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.b64 import get_image_type_from_bytes\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass UnsupportedImageFormatError(ValueError):\n    \"\"\"Raised when an image uses a MIME type unsupported by the summarization flow.\"\"\"\n\n\ndef prepare_image_bytes(image_data: bytes) -> str:\n    \"\"\"Prepare image bytes for summarization.\n    Resizes image if it's larger than 20MB. Encodes image as a base64 string.\"\"\"\n    image_data = _resize_image_if_needed(image_data)\n\n    # encode image (base64)\n    encoded_image = _encode_image_for_llm_prompt(image_data)\n\n    return encoded_image\n\n\ndef summarize_image_pipeline(\n    llm: LLM,\n    image_data: bytes,\n    query: str | None = None,\n    system_prompt: str | None = None,\n) -> str:\n    \"\"\"Pipeline to generate a summary of an image.\n    Resizes images if it is bigger than 20MB. Encodes image as a base64 string.\n    And finally uses the Default LLM to generate a textual summary of the image.\"\"\"\n    # resize image if it's bigger than 20MB\n    encoded_image = prepare_image_bytes(image_data)\n\n    summary = _summarize_image(\n        encoded_image,\n        llm,\n        query,\n        system_prompt,\n    )\n\n    return summary\n\n\ndef summarize_image_with_error_handling(\n    llm: LLM | None,\n    image_data: bytes,\n    context_name: str,\n    system_prompt: str = IMAGE_SUMMARIZATION_SYSTEM_PROMPT,\n    user_prompt_template: str = IMAGE_SUMMARIZATION_USER_PROMPT,\n) -> str | None:\n    \"\"\"Wrapper function that handles error cases and configuration consistently.\n\n    Args:\n        llm: The LLM with vision capabilities to use for summarization\n        image_data: The raw image bytes\n        context_name: Name or title of the image for context\n        system_prompt: System prompt to use for the LLM\n        user_prompt_template: User prompt to use (without title)\n\n    Returns:\n        The image summary text, or None if summarization failed or is disabled\n    \"\"\"\n    if llm is None:\n        return None\n\n    # Prepend the image filename to the user prompt\n    user_prompt = (\n        f\"The image has the file name '{context_name}'.\\n{user_prompt_template}\"\n    )\n    try:\n        return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)\n    except UnsupportedImageFormatError:\n        magic_hex = image_data[:8].hex() if image_data else \"empty\"\n        logger.info(\n            \"Skipping image summarization due to unsupported MIME type \"\n            \"for %s (magic_bytes=%s, size=%d bytes)\",\n            context_name,\n            magic_hex,\n            len(image_data),\n        )\n        return None\n\n\ndef _summarize_image(\n    encoded_image: str,\n    llm: LLM,\n    query: str | None = None,\n    system_prompt: str | None = None,\n) -> str:\n    \"\"\"Use default LLM (if it is multimodal) to generate a summary of an image.\"\"\"\n\n    messages: list[ChatCompletionMessage] = []\n\n    if system_prompt:\n        messages.append(SystemMessage(content=system_prompt))\n\n    content: list[ContentPart] = []\n    if query:\n        content.append(TextContentPart(text=query))\n    content.append(ImageContentPart(image_url=ImageUrlDetail(url=encoded_image)))\n\n    messages.append(\n        UserMessage(\n            content=content,\n        ),\n    )\n\n    try:\n        # Call LLM with Braintrust tracing\n        with llm_generation_span(\n            llm=llm,\n            flow=\"image_summarization\",\n            input_messages=[{\"type\": \"image_summarization_request\"}],\n        ) as span_generation:\n            # Note: We don't include the actual image in the span input to avoid bloating traces\n            response = llm.invoke(messages)\n            record_llm_response(span_generation, response)\n            summary = llm_response_to_string(response)\n\n        return summary\n\n    except Exception as e:\n        # Extract structured details from LiteLLM exceptions when available,\n        # rather than dumping the full messages payload (which contains base64\n        # image data and produces enormous, unreadable error logs).\n        str_e = str(e)\n        if len(str_e) > 512:\n            str_e = str_e[:512] + \"... (truncated)\"\n        parts = [f\"Summarization failed: {type(e).__name__}: {str_e}\"]\n        status_code = getattr(e, \"status_code\", None)\n        llm_provider = getattr(e, \"llm_provider\", None)\n        model = getattr(e, \"model\", None)\n        if status_code is not None:\n            parts.append(f\"status_code={status_code}\")\n        if llm_provider is not None:\n            parts.append(f\"llm_provider={llm_provider}\")\n        if model is not None:\n            parts.append(f\"model={model}\")\n        raise ValueError(\" | \".join(parts)) from e\n\n\ndef _encode_image_for_llm_prompt(image_data: bytes) -> str:\n    \"\"\"Prepare a data URL with the correct MIME type for the LLM message.\"\"\"\n    try:\n        mime_type = get_image_type_from_bytes(image_data)\n    except ValueError as exc:\n        raise UnsupportedImageFormatError(\n            \"Unsupported image format for summarization\"\n        ) from exc\n\n    base64_encoded_data = base64.b64encode(image_data).decode(\"utf-8\")\n\n    return f\"data:{mime_type};base64,{base64_encoded_data}\"\n\n\ndef _resize_image_if_needed(image_data: bytes, max_size_mb: int = 20) -> bytes:\n    \"\"\"Resize image if it's larger than the specified max size in MB.\"\"\"\n    max_size_bytes = max_size_mb * 1024 * 1024\n\n    if len(image_data) > max_size_bytes:\n        with Image.open(BytesIO(image_data)) as img:\n            # Reduce dimensions for better size reduction\n            img.thumbnail((1024, 1024), Image.Resampling.LANCZOS)\n            output = BytesIO()\n\n            # Save with lower quality for compression\n            img.save(output, format=\"JPEG\", quality=85)\n            resized_data = output.getvalue()\n\n            return resized_data\n\n    return image_data\n"
  },
  {
    "path": "backend/onyx/file_processing/image_utils.py",
    "content": "from io import BytesIO\nfrom typing import Tuple\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.models import ImageSection\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef store_image_and_create_section(\n    image_data: bytes,\n    file_id: str,\n    display_name: str,\n    link: str | None = None,\n    media_type: str = \"application/octet-stream\",\n    file_origin: FileOrigin = FileOrigin.OTHER,\n) -> Tuple[ImageSection, str | None]:\n    \"\"\"\n    Stores an image in FileStore and creates an ImageSection object without summarization.\n\n    Args:\n        image_data: Raw image bytes\n        file_id: Base identifier for the file\n        display_name: Human-readable name for the image\n        media_type: MIME type of the image\n        file_origin: Origin of the file (e.g., CONFLUENCE, GOOGLE_DRIVE, etc.)\n\n    Returns:\n        Tuple containing:\n        - ImageSection object with image reference\n        - The file_id in FileStore or None if storage failed\n    \"\"\"\n    # Storage logic\n    try:\n        file_store = get_default_file_store()\n        file_id = file_store.save_file(\n            content=BytesIO(image_data),\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=media_type,\n            file_id=file_id,\n        )\n    except Exception as e:\n        logger.error(f\"Failed to store image: {e}\")\n        raise e\n\n    # Create an ImageSection with empty text (will be filled by LLM later in the pipeline)\n    return (\n        ImageSection(image_file_id=file_id, link=link),\n        file_id,\n    )\n"
  },
  {
    "path": "backend/onyx/file_processing/password_validation.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom typing import IO\n\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nPASSWORD_PROTECTED_FILES = [\n    \".pdf\",\n    \".docx\",\n    \".pptx\",\n    \".xlsx\",\n]\n\n\n@contextmanager\ndef preserve_position(file: IO[Any]) -> Generator[IO[Any], None, None]:\n    \"\"\"Preserves the file's cursor position\"\"\"\n    pos = file.tell()\n    try:\n        file.seek(0)\n        yield file\n    finally:\n        file.seek(pos)\n\n\ndef is_pdf_protected(file: IO[Any]) -> bool:\n    from pypdf import PdfReader\n\n    with preserve_position(file):\n        reader = PdfReader(file)\n\n    return bool(reader.is_encrypted)\n\n\ndef is_docx_protected(file: IO[Any]) -> bool:\n    return is_office_file_protected(file)\n\n\ndef is_pptx_protected(file: IO[Any]) -> bool:\n    return is_office_file_protected(file)\n\n\ndef is_xlsx_protected(file: IO[Any]) -> bool:\n    return is_office_file_protected(file)\n\n\ndef is_office_file_protected(file: IO[Any]) -> bool:\n    import msoffcrypto  # type: ignore[import-untyped]\n\n    with preserve_position(file):\n        office = msoffcrypto.OfficeFile(file)\n\n    return office.is_encrypted()\n\n\ndef is_file_password_protected(\n    file: IO[Any],\n    file_name: str,\n    extension: str | None = None,\n) -> bool:\n    extension_to_function: dict[str, Callable[[IO[Any]], bool]] = {\n        \".pdf\": is_pdf_protected,\n        \".docx\": is_docx_protected,\n        \".pptx\": is_pptx_protected,\n        \".xlsx\": is_xlsx_protected,\n    }\n\n    if not extension:\n        extension = get_file_ext(file_name)\n\n    if extension not in PASSWORD_PROTECTED_FILES:\n        return False\n\n    if extension not in extension_to_function:\n        logger.warning(\n            f\"Extension={extension} can be password protected, but no function found\"\n        )\n        return False\n\n    func = extension_to_function[extension]\n\n    return func(file)\n"
  },
  {
    "path": "backend/onyx/file_processing/unstructured.py",
    "content": "from typing import Any\nfrom typing import cast\nfrom typing import IO\nfrom typing import TYPE_CHECKING\n\nfrom onyx.configs.constants import KV_UNSTRUCTURED_API_KEY\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    from unstructured_client.models import operations\n\n\nlogger = setup_logger()\n\n\ndef get_unstructured_api_key() -> str | None:\n    kv_store = get_kv_store()\n    try:\n        return cast(str, kv_store.load(KV_UNSTRUCTURED_API_KEY))\n    except KvKeyNotFoundError:\n        return None\n\n\ndef update_unstructured_api_key(api_key: str) -> None:\n    kv_store = get_kv_store()\n    kv_store.store(KV_UNSTRUCTURED_API_KEY, api_key)\n\n\ndef delete_unstructured_api_key() -> None:\n    kv_store = get_kv_store()\n    kv_store.delete(KV_UNSTRUCTURED_API_KEY)\n\n\ndef _sdk_partition_request(\n    file: IO[Any], file_name: str, **kwargs: Any\n) -> \"operations.PartitionRequest\":\n    from unstructured_client.models import operations\n    from unstructured_client.models import shared\n\n    file.seek(0, 0)\n    try:\n        request = operations.PartitionRequest(\n            partition_parameters=shared.PartitionParameters(\n                files=shared.Files(content=file.read(), file_name=file_name),\n                **kwargs,\n            ),\n        )\n        return request\n    except Exception as e:\n        logger.error(f\"Error creating partition request for file {file_name}: {str(e)}\")\n        raise\n\n\ndef unstructured_to_text(file: IO[Any], file_name: str) -> str:\n    from unstructured.staging.base import dict_to_elements\n    from unstructured_client import UnstructuredClient\n\n    logger.debug(f\"Starting to read file: {file_name}\")\n    req = _sdk_partition_request(file, file_name, strategy=\"fast\")\n\n    unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())\n\n    response = unstructured_client.general.partition(request=req)\n\n    if response.status_code != 200:\n        err = f\"Received unexpected status code {response.status_code} from Unstructured API.\"\n        logger.error(err)\n        raise ValueError(err)\n\n    elements = dict_to_elements(response.elements or [])\n    return \"\\n\\n\".join(str(el) for el in elements)\n"
  },
  {
    "path": "backend/onyx/file_store/README.md",
    "content": "# Onyx File Store\n\nThe Onyx file store provides a unified interface for storing files and large binary objects in S3-compatible storage systems. It supports AWS S3, MinIO, Azure Blob Storage, Digital Ocean Spaces, and other S3-compatible services.\n\n## Architecture\n\nThe file store uses a single database table (`file_record`) to store file metadata while the actual file content is stored in external S3-compatible storage. This approach provides scalability, cost-effectiveness, and decouples file storage from the database.\n\n### Database Schema\n\nThe `file_record` table contains the following columns:\n\n- `file_id` (primary key): Unique identifier for the file\n- `display_name`: Human-readable name for the file\n- `file_origin`: Origin/source of the file (enum)\n- `file_type`: MIME type of the file\n- `file_metadata`: Additional metadata as JSON\n- `bucket_name`: External storage bucket/container name\n- `object_key`: External storage object key/path\n- `created_at`: Timestamp when the file was created\n- `updated_at`: Timestamp when the file was last updated\n\n## Storage Backend\n\n### S3-Compatible Storage\n\nStores files in external S3-compatible storage systems while keeping metadata in the database.\n\n**Pros:**\n- Scalable storage\n- Cost-effective for large files\n- CDN integration possible\n- Decoupled from database\n- Wide ecosystem support\n\n**Cons:**\n- Additional infrastructure required\n- Network dependency\n- Eventual consistency considerations\n\n## Configuration\n\nAll configuration is handled via environment variables. The system requires S3-compatible storage to be configured.\n\n### AWS S3\n\n```bash\nS3_FILE_STORE_BUCKET_NAME=your-bucket-name  # Defaults to 'onyx-file-store-bucket'\nS3_FILE_STORE_PREFIX=onyx-files  # Optional, defaults to 'onyx-files'\n\n# AWS credentials (use one of these methods):\n# 1. Environment variables\nS3_AWS_ACCESS_KEY_ID=your-access-key\nS3_AWS_SECRET_ACCESS_KEY=your-secret-key\nAWS_REGION_NAME=us-east-2  # Optional, defaults to 'us-east-2'\n\n# 2. IAM roles (recommended for EC2/ECS deployments)\n# No additional configuration needed if using IAM roles\n```\n\n### MinIO\n\n```bash\nS3_FILE_STORE_BUCKET_NAME=your-bucket-name\nS3_ENDPOINT_URL=http://localhost:9000  # MinIO endpoint\nS3_AWS_ACCESS_KEY_ID=minioadmin\nS3_AWS_SECRET_ACCESS_KEY=minioadmin\nAWS_REGION_NAME=us-east-1  # Any region name\nS3_VERIFY_SSL=false  # Optional, defaults to false\n```\n\n### Digital Ocean Spaces\n\n```bash\nS3_FILE_STORE_BUCKET_NAME=your-space-name\nS3_ENDPOINT_URL=https://nyc3.digitaloceanspaces.com\nS3_AWS_ACCESS_KEY_ID=your-spaces-key\nS3_AWS_SECRET_ACCESS_KEY=your-spaces-secret\nAWS_REGION_NAME=nyc3\n```\n\n### Other S3-Compatible Services\n\nThe file store works with any S3-compatible service. Simply configure:\n- `S3_FILE_STORE_BUCKET_NAME`: Your bucket/container name\n- `S3_ENDPOINT_URL`: The service endpoint URL\n- `S3_AWS_ACCESS_KEY_ID` and `S3_AWS_SECRET_ACCESS_KEY`: Your credentials\n- `AWS_REGION_NAME`: The region (any valid region name)\n\n## Implementation\n\nThe system uses the `S3BackedFileStore` class that implements the abstract `FileStore` interface. The database uses generic column names (`bucket_name`, `object_key`) to maintain compatibility with different S3-compatible services.\n\n### File Store Interface\n\nThe `FileStore` abstract base class defines the following methods:\n\n- `initialize()`: Initialize the storage backend (create bucket if needed)\n- `has_file(file_id, file_origin, file_type)`: Check if a file exists\n- `save_file(content, display_name, file_origin, file_type, file_metadata, file_id)`: Save a file\n- `read_file(file_id, mode, use_tempfile)`: Read file content\n- `read_file_record(file_id)`: Get file metadata from database\n- `delete_file(file_id)`: Delete a file and its metadata\n- `get_file_with_mime_type(file_id)`: Get file with parsed MIME type\n\n## Usage Example\n\n```python\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.configs.constants import FileOrigin\n\n# Get the configured file store\nfile_store = get_default_file_store(db_session)\n\n# Initialize the storage backend (creates bucket if needed)\nfile_store.initialize()\n\n# Save a file\nwith open(\"example.pdf\", \"rb\") as f:\n    file_id = file_store.save_file(\n        content=f,\n        display_name=\"Important Document.pdf\",\n        file_origin=FileOrigin.OTHER,\n        file_type=\"application/pdf\",\n        file_metadata={\"department\": \"engineering\", \"version\": \"1.0\"}\n    )\n\n# Check if a file exists\nexists = file_store.has_file(\n    file_id=file_id,\n    file_origin=FileOrigin.OTHER,\n    file_type=\"application/pdf\"\n)\n\n# Read a file\nfile_content = file_store.read_file(file_id)\n\n# Read file with temporary file (for large files)\nfile_content = file_store.read_file(file_id, use_tempfile=True)\n\n# Get file metadata\nfile_record = file_store.read_file_record(file_id)\n\n# Get file with MIME type detection\nfile_with_mime = file_store.get_file_with_mime_type(file_id)\n\n# Delete a file\nfile_store.delete_file(file_id)\n```\n\n## Initialization\n\nWhen deploying the application, ensure that:\n\n1. The S3-compatible storage service is accessible\n2. Credentials are properly configured\n3. The bucket specified in `S3_FILE_STORE_BUCKET_NAME` exists or the service account has permissions to create it\n4. Call `file_store.initialize()` during application startup to ensure the bucket exists\n\nThe file store will automatically create the bucket if it doesn't exist and the credentials have sufficient permissions.\n "
  },
  {
    "path": "backend/onyx/file_store/constants.py",
    "content": "MAX_IN_MEMORY_SIZE = 30 * 1024 * 1024  # 30MB\nSTANDARD_CHUNK_SIZE = 10 * 1024 * 1024  # 10MB chunks\n"
  },
  {
    "path": "backend/onyx/file_store/document_batch_storage.py",
    "content": "import json\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom enum import Enum\nfrom io import StringIO\nfrom typing import List\nfrom typing import Optional\nfrom typing import TypeAlias\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.connectors.models import DocExtractionContext\nfrom onyx.connectors.models import DocIndexingContext\nfrom onyx.connectors.models import Document\nfrom onyx.file_store.file_store import FileStore\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass DocumentBatchStorageStateType(str, Enum):\n    EXTRACTION = \"extraction\"\n    INDEXING = \"indexing\"\n\n\nDocumentStorageState: TypeAlias = DocExtractionContext | DocIndexingContext\n\nSTATE_TYPE_TO_MODEL: dict[str, type[DocumentStorageState]] = {\n    DocumentBatchStorageStateType.EXTRACTION.value: DocExtractionContext,\n    DocumentBatchStorageStateType.INDEXING.value: DocIndexingContext,\n}\n\n\nclass BatchStoragePathInfo(BaseModel):\n    cc_pair_id: int\n    index_attempt_id: int\n    batch_num: int\n\n\nclass DocumentBatchStorage(ABC):\n    \"\"\"Abstract base class for document batch storage implementations.\"\"\"\n\n    def __init__(self, cc_pair_id: int, index_attempt_id: int):\n        self.cc_pair_id = cc_pair_id\n        self.index_attempt_id = index_attempt_id\n        self.base_path = f\"{self._per_cc_pair_base_path()}/{index_attempt_id}\"\n\n    @abstractmethod\n    def store_batch(self, batch_num: int, documents: List[Document]) -> None:\n        \"\"\"Store a batch of documents.\"\"\"\n\n    @abstractmethod\n    def get_batch(self, batch_num: int) -> Optional[List[Document]]:\n        \"\"\"Retrieve a batch of documents.\"\"\"\n\n    @abstractmethod\n    def delete_batch_by_name(self, batch_file_name: str) -> None:\n        \"\"\"Delete a specific batch.\"\"\"\n\n    @abstractmethod\n    def delete_batch_by_num(self, batch_num: int) -> None:\n        \"\"\"Delete a specific batch.\"\"\"\n\n    @abstractmethod\n    def cleanup_all_batches(self) -> None:\n        \"\"\"Clean up all batches and state for this index attempt.\"\"\"\n\n    @abstractmethod\n    def get_all_batches_for_cc_pair(self) -> list[str]:\n        \"\"\"Get all IDs of batches stored in the file store.\"\"\"\n\n    @abstractmethod\n    def update_old_batches_to_new_index_attempt(self, batch_names: list[str]) -> None:\n        \"\"\"Update all batches to the new index attempt.\"\"\"\n        \"\"\"\n        This is used when we need to re-issue docprocessing tasks for a new index attempt.\n        We need to update the batch file names to the new index attempt ID.\n        \"\"\"\n\n    @abstractmethod\n    def extract_path_info(self, path: str) -> BatchStoragePathInfo | None:\n        \"\"\"Extract path info from a path.\"\"\"\n\n    def _serialize_documents(self, documents: list[Document]) -> str:\n        \"\"\"Serialize documents to JSON string.\"\"\"\n        # Use mode='json' to properly serialize datetime and other complex types\n        return json.dumps([doc.model_dump(mode=\"json\") for doc in documents], indent=2)\n\n    def _deserialize_documents(self, data: str) -> list[Document]:\n        \"\"\"Deserialize documents from JSON string.\"\"\"\n        doc_dicts = json.loads(data)\n        return [\n            Document.model_validate(self._normalize_doc_dict(doc_dict))\n            for doc_dict in doc_dicts\n        ]\n\n    def _normalize_doc_dict(self, doc_dict: dict) -> dict:\n        \"\"\"Normalize document dict to handle legacy data with non-string metadata values.\n\n        Before the _convert_to_metadata_value fix, Salesforce connector stored raw\n        types (bool, float, None) in metadata. This converts them to strings for\n        backward compatibility.\n        \"\"\"\n        if \"metadata\" not in doc_dict:\n            return doc_dict\n\n        metadata = doc_dict[\"metadata\"]\n        if not isinstance(metadata, dict):\n            return doc_dict\n\n        normalized_metadata: dict[str, str | list[str]] = {}\n        converted_keys: list[str] = []\n        for key, value in metadata.items():\n            if isinstance(value, list):\n                normalized_metadata[key] = [str(item) for item in value]\n            elif isinstance(value, str):\n                normalized_metadata[key] = value\n            else:\n                # Convert bool, int, float, None to string\n                converted_keys.append(f\"{key}={type(value).__name__}\")\n                normalized_metadata[key] = str(value)\n\n        if converted_keys:\n            doc_id = doc_dict.get(\"id\", \"unknown\")\n            logger.warning(\n                f\"Normalized legacy metadata for document {doc_id}: {converted_keys}\"\n            )\n\n        doc_dict[\"metadata\"] = normalized_metadata\n        return doc_dict\n\n    def _per_cc_pair_base_path(self) -> str:\n        \"\"\"Get the base path for the cc pair.\"\"\"\n        return f\"iab/{self.cc_pair_id}\"\n\n\nclass FileStoreDocumentBatchStorage(DocumentBatchStorage):\n    \"\"\"FileStore-based implementation of document batch storage.\"\"\"\n\n    def __init__(self, cc_pair_id: int, index_attempt_id: int, file_store: FileStore):\n        super().__init__(cc_pair_id, index_attempt_id)\n        self.file_store = file_store\n\n    def _get_batch_file_name(self, batch_num: int) -> str:\n        \"\"\"Generate file name for a document batch.\"\"\"\n        return f\"{self.base_path}/{batch_num}.json\"\n\n    def store_batch(self, batch_num: int, documents: list[Document]) -> None:\n        \"\"\"Store a batch of documents using FileStore.\"\"\"\n        file_name = self._get_batch_file_name(batch_num)\n        try:\n            data = self._serialize_documents(documents)\n            content = StringIO(data)\n\n            self.file_store.save_file(\n                file_id=file_name,\n                content=content,\n                display_name=f\"Document Batch {batch_num}\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"application/json\",\n                file_metadata={\n                    \"batch_num\": batch_num,\n                    \"document_count\": str(len(documents)),\n                },\n            )\n\n            logger.debug(\n                f\"Stored batch {batch_num} with {len(documents)} documents to FileStore as {file_name}\"\n            )\n        except Exception as e:\n            logger.error(f\"Failed to store batch {batch_num}: {e}\")\n            raise\n\n    def get_batch(self, batch_num: int) -> list[Document] | None:\n        \"\"\"Retrieve a batch of documents from FileStore.\"\"\"\n        file_name = self._get_batch_file_name(batch_num)\n        try:\n            # Check if file exists\n            if not self.file_store.has_file(\n                file_id=file_name,\n                file_origin=FileOrigin.OTHER,\n                file_type=\"application/json\",\n            ):\n                logger.warning(\n                    f\"Batch {batch_num} not found in FileStore with name {file_name}\"\n                )\n                return None\n\n            content_io = self.file_store.read_file(file_name)\n            data = content_io.read().decode(\"utf-8\")\n\n            documents = self._deserialize_documents(data)\n            logger.debug(\n                f\"Retrieved batch {batch_num} with {len(documents)} documents from FileStore\"\n            )\n            return documents\n        except Exception as e:\n            logger.error(f\"Failed to retrieve batch {batch_num}: {e}\")\n            raise\n\n    def delete_batch_by_name(self, batch_file_name: str) -> None:\n        \"\"\"Delete a specific batch from FileStore.\"\"\"\n        self.file_store.delete_file(batch_file_name)\n        logger.debug(f\"Deleted batch {batch_file_name} from FileStore\")\n\n    def delete_batch_by_num(self, batch_num: int) -> None:\n        \"\"\"Delete a specific batch from FileStore.\"\"\"\n        batch_file_name = self._get_batch_file_name(batch_num)\n        self.delete_batch_by_name(batch_file_name)\n        logger.debug(f\"Deleted batch num {batch_num} {batch_file_name} from FileStore\")\n\n    def cleanup_all_batches(self) -> None:\n        \"\"\"Clean up all batches for this index attempt.\"\"\"\n        for batch_file_name in self.get_all_batches_for_cc_pair():\n            self.delete_batch_by_name(batch_file_name)\n\n    def get_all_batches_for_cc_pair(self) -> list[str]:\n        \"\"\"Get all IDs of batches stored in the file store for the cc pair\n        this batch store was initialized with.\n        This includes any batches left over from a previous\n        indexing attempt that need to be processed.\n        \"\"\"\n        return [\n            file.file_id\n            for file in self.file_store.list_files_by_prefix(\n                self._per_cc_pair_base_path()\n            )\n        ]\n\n    def update_old_batches_to_new_index_attempt(self, batch_names: list[str]) -> None:\n        \"\"\"Update all batches to the new index attempt.\"\"\"\n        for batch_file_name in batch_names:\n            path_info = self.extract_path_info(batch_file_name)\n            if path_info is None:\n                logger.warning(\n                    f\"Could not extract path info from batch file: {batch_file_name}\"\n                )\n                continue\n            new_batch_file_name = self._get_batch_file_name(path_info.batch_num)\n            self.file_store.change_file_id(batch_file_name, new_batch_file_name)\n\n    def extract_path_info(self, path: str) -> BatchStoragePathInfo | None:\n        \"\"\"Extract path info from a path.\"\"\"\n        path_spl = path.split(\"/\")\n        # TODO: remove this in a few months, just for backwards compatibility\n        if len(path_spl) == 3:\n            path_spl = [\"iab\"] + path_spl\n        try:\n            _, cc_pair_id, index_attempt_id, batch_num = path_spl\n            return BatchStoragePathInfo(\n                cc_pair_id=int(cc_pair_id),\n                index_attempt_id=int(index_attempt_id),\n                batch_num=int(batch_num.split(\".\")[0]),  # remove .json\n            )\n        except Exception as e:\n            logger.error(f\"Failed to extract path info from {path}: {e}\")\n            return None\n\n\ndef get_document_batch_storage(\n    cc_pair_id: int, index_attempt_id: int\n) -> DocumentBatchStorage:\n    \"\"\"Factory function to get the configured document batch storage implementation.\"\"\"\n    # The get_default_file_store will now correctly use S3BackedFileStore\n    # or other configured stores based on environment variables\n    file_store = get_default_file_store()\n    return FileStoreDocumentBatchStorage(cc_pair_id, index_attempt_id, file_store)\n"
  },
  {
    "path": "backend/onyx/file_store/file_store.py",
    "content": "import hashlib\nimport tempfile\nimport uuid\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\nfrom typing import IO\nfrom typing import NotRequired\nfrom typing import TypedDict\n\nimport boto3\nimport puremagic\nfrom botocore.config import Config\nfrom botocore.exceptions import ClientError\nfrom mypy_boto3_s3 import S3Client\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import AWS_REGION_NAME\nfrom onyx.configs.app_configs import S3_AWS_ACCESS_KEY_ID\nfrom onyx.configs.app_configs import S3_AWS_SECRET_ACCESS_KEY\nfrom onyx.configs.app_configs import S3_ENDPOINT_URL\nfrom onyx.configs.app_configs import S3_FILE_STORE_BUCKET_NAME\nfrom onyx.configs.app_configs import S3_FILE_STORE_PREFIX\nfrom onyx.configs.app_configs import S3_GENERATE_LOCAL_CHECKSUM\nfrom onyx.configs.app_configs import S3_VERIFY_SSL\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none\nfrom onyx.db.file_record import delete_filerecord_by_file_id\nfrom onyx.db.file_record import get_filerecord_by_file_id\nfrom onyx.db.file_record import get_filerecord_by_file_id_optional\nfrom onyx.db.file_record import get_filerecord_by_prefix\nfrom onyx.db.file_record import upsert_filerecord\nfrom onyx.db.models import FileRecord\nfrom onyx.db.models import FileRecord as FileStoreModel\nfrom onyx.file_store.s3_key_utils import generate_s3_key\nfrom onyx.utils.file import FileWithMimeType\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nclass S3PutKwargs(TypedDict):\n    ChecksumSHA256: NotRequired[str]\n\n\nclass FileStore(ABC):\n    \"\"\"\n    An abstraction for storing files and large binary objects.\n    \"\"\"\n\n    @abstractmethod\n    def initialize(self) -> None:\n        \"\"\"\n        Should generally be called once before any other methods are called.\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def has_file(\n        self,\n        file_id: str,\n        file_origin: FileOrigin,\n        file_type: str,\n    ) -> bool:\n        \"\"\"\n        Check if a file exists in the blob store\n\n        Parameters:\n        - file_id: Unique ID of the file to check for\n        - file_origin: Origin of the file\n        - file_type: Type of the file\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def save_file(\n        self,\n        content: IO,\n        display_name: str | None,\n        file_origin: FileOrigin,\n        file_type: str,\n        file_metadata: dict[str, Any] | None = None,\n        file_id: str | None = None,\n    ) -> str:\n        \"\"\"\n        Save a file to the blob store\n\n        Parameters:\n        - content: Contents of the file\n        - display_name: Display name of the file to save\n        - file_origin: Origin of the file\n        - file_type: Type of the file\n        - file_metadata: Additional metadata for the file\n        - file_id: Unique ID of the file to save. If not provided, a random UUID will be generated.\n                   It is generally NOT recommended to provide this.\n\n        Returns:\n            The unique ID of the file that was saved.\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def read_file(\n        self, file_id: str, mode: str | None = None, use_tempfile: bool = False\n    ) -> IO[bytes]:\n        \"\"\"\n        Read the content of a given file by the ID\n\n        Parameters:\n        - file_id: Unique ID of file to read\n        - mode: Mode to open the file (e.g. 'b' for binary)\n        - use_tempfile: Whether to use a temporary file to store the contents\n                        in order to avoid loading the entire file into memory\n\n        Returns:\n            Contents of the file and metadata dict\n        \"\"\"\n\n    @abstractmethod\n    def read_file_record(self, file_id: str) -> FileStoreModel:\n        \"\"\"\n        Read the file record by the ID\n        \"\"\"\n\n    @abstractmethod\n    def get_file_size(\n        self, file_id: str, db_session: Session | None = None\n    ) -> int | None:\n        \"\"\"\n        Get the size of a file in bytes.\n        Optionally provide a db_session for database access.\n        \"\"\"\n\n    @abstractmethod\n    def delete_file(self, file_id: str) -> None:\n        \"\"\"\n        Delete a file by its ID.\n\n        Parameters:\n        - file_name: Name of file to delete\n        \"\"\"\n\n    @abstractmethod\n    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:\n        \"\"\"\n        Get the file + parse out the mime type.\n        \"\"\"\n\n    @abstractmethod\n    def change_file_id(self, old_file_id: str, new_file_id: str) -> None:\n        \"\"\"\n        Change the file ID of an existing file.\n\n        Parameters:\n        - old_file_id: Current file ID\n        - new_file_id: New file ID to assign\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:\n        \"\"\"\n        List all file IDs that start with the given prefix.\n        \"\"\"\n\n\nclass S3BackedFileStore(FileStore):\n    \"\"\"Isn't necessarily S3, but is any S3-compatible storage (e.g. MinIO)\"\"\"\n\n    def __init__(\n        self,\n        bucket_name: str,\n        aws_access_key_id: str | None = None,\n        aws_secret_access_key: str | None = None,\n        aws_region_name: str | None = None,\n        s3_endpoint_url: str | None = None,\n        s3_prefix: str | None = None,\n        s3_verify_ssl: bool = True,\n    ) -> None:\n        self._s3_client: S3Client | None = None\n        self._bucket_name = bucket_name\n        self._aws_access_key_id = aws_access_key_id\n        self._aws_secret_access_key = aws_secret_access_key\n        self._aws_region_name = aws_region_name or \"us-east-2\"\n        self._s3_endpoint_url = s3_endpoint_url\n        self._s3_prefix = s3_prefix or \"onyx-files\"\n        self._s3_verify_ssl = s3_verify_ssl\n\n    def _get_s3_client(self) -> S3Client:\n        \"\"\"Initialize S3 client if not already done\"\"\"\n        if self._s3_client is None:\n            try:\n                client_kwargs: dict[str, Any] = {\n                    \"service_name\": \"s3\",\n                    \"region_name\": self._aws_region_name,\n                }\n\n                # Add endpoint URL if specified (for MinIO, etc.)\n                if self._s3_endpoint_url:\n                    client_kwargs[\"endpoint_url\"] = self._s3_endpoint_url\n                    client_kwargs[\"config\"] = Config(\n                        signature_version=\"s3v4\",\n                        s3={\"addressing_style\": \"path\"},  # Required for MinIO\n                    )\n                    # Disable SSL verification if requested (for local development)\n                    if not self._s3_verify_ssl:\n                        import urllib3\n\n                        urllib3.disable_warnings(\n                            urllib3.exceptions.InsecureRequestWarning\n                        )\n                        client_kwargs[\"verify\"] = False\n\n                if self._aws_access_key_id and self._aws_secret_access_key:\n                    # Use explicit credentials\n                    client_kwargs.update(\n                        {\n                            \"aws_access_key_id\": self._aws_access_key_id,\n                            \"aws_secret_access_key\": self._aws_secret_access_key,\n                        }\n                    )\n                    self._s3_client = boto3.client(**client_kwargs)\n                else:\n                    # Use IAM role or default credentials (not typically used with MinIO)\n                    self._s3_client = boto3.client(**client_kwargs)\n\n            except Exception as e:\n                logger.error(f\"Failed to initialize S3 client: {e}\")\n                raise RuntimeError(f\"Failed to initialize S3 client: {e}\")\n\n        return self._s3_client\n\n    def _get_bucket_name(self) -> str:\n        \"\"\"Get S3 bucket name from configuration\"\"\"\n        if not self._bucket_name:\n            raise RuntimeError(\"S3 bucket name is required for S3 file store\")\n        return self._bucket_name\n\n    def _get_s3_key(self, file_name: str) -> str:\n        \"\"\"Generate S3 key from file name with tenant ID prefix\"\"\"\n        tenant_id = get_current_tenant_id()\n\n        s3_key = generate_s3_key(\n            file_name=file_name,\n            prefix=self._s3_prefix,\n            tenant_id=tenant_id,\n            max_key_length=1024,\n        )\n\n        # Log if truncation occurred (when the key is exactly at the limit)\n        if len(s3_key) == 1024:\n            logger.info(f\"File name was too long and was truncated: {file_name}\")\n\n        return s3_key\n\n    def initialize(self) -> None:\n        \"\"\"Initialize the S3 file store by ensuring the bucket exists\"\"\"\n        s3_client = self._get_s3_client()\n        bucket_name = self._get_bucket_name()\n\n        # Check if bucket exists\n        try:\n            s3_client.head_bucket(Bucket=bucket_name)\n            logger.info(f\"S3 bucket '{bucket_name}' already exists\")\n        except ClientError as e:\n            error_code = e.response[\"Error\"][\"Code\"]\n            if error_code == \"404\":\n                # Bucket doesn't exist, create it\n                logger.info(f\"Creating S3 bucket '{bucket_name}'\")\n\n                # For AWS S3, we need to handle region-specific bucket creation\n                region = (\n                    s3_client._client_config.region_name\n                    if hasattr(s3_client, \"_client_config\")\n                    else None\n                )\n\n                if region and region != \"us-east-1\":\n                    # For regions other than us-east-1, we need to specify LocationConstraint\n                    s3_client.create_bucket(\n                        Bucket=bucket_name,\n                        CreateBucketConfiguration={\"LocationConstraint\": region},\n                    )\n                else:\n                    # For us-east-1 or MinIO/other S3-compatible services\n                    s3_client.create_bucket(Bucket=bucket_name)\n\n                logger.info(f\"Successfully created S3 bucket '{bucket_name}'\")\n            elif error_code == \"403\":\n                # Bucket exists but we don't have permission to access it\n                logger.warning(\n                    f\"S3 bucket '{bucket_name}' exists but access is forbidden\"\n                )\n                raise RuntimeError(\n                    f\"Access denied to S3 bucket '{bucket_name}'. Check credentials and permissions.\"\n                )\n            else:\n                # Some other error occurred\n                logger.error(f\"Failed to check S3 bucket '{bucket_name}': {e}\")\n                raise RuntimeError(f\"Failed to check S3 bucket '{bucket_name}': {e}\")\n\n    def has_file(\n        self,\n        file_id: str,\n        file_origin: FileOrigin,\n        file_type: str,\n        db_session: Session | None = None,\n    ) -> bool:\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            file_record = get_filerecord_by_file_id_optional(\n                file_id=file_id, db_session=db_session\n            )\n        return (\n            file_record is not None\n            and file_record.file_origin == file_origin\n            and file_record.file_type == file_type\n        )\n\n    def save_file(\n        self,\n        content: IO,\n        display_name: str | None,\n        file_origin: FileOrigin,\n        file_type: str,\n        file_metadata: dict[str, Any] | None = None,\n        file_id: str | None = None,\n        db_session: Session | None = None,\n    ) -> str:\n        if file_id is None:\n            file_id = str(uuid.uuid4())\n\n        s3_client = self._get_s3_client()\n        bucket_name = self._get_bucket_name()\n        s3_key = self._get_s3_key(file_id)\n\n        hash256 = \"\"\n        sha256_hash = hashlib.sha256()\n        kwargs: S3PutKwargs = {}\n\n        # FIX: Optimize checksum generation to avoid creating extra copies in memory\n        # Read content from IO object\n        if hasattr(content, \"read\"):\n            file_content = content.read()\n            if S3_GENERATE_LOCAL_CHECKSUM:\n                # FIX: Don't convert to string first (creates unnecessary copy)\n                # Work directly with bytes\n                if isinstance(file_content, bytes):\n                    sha256_hash.update(file_content)\n                else:\n                    sha256_hash.update(str(file_content).encode())\n                hash256 = sha256_hash.hexdigest()\n                kwargs[\"ChecksumSHA256\"] = hash256\n            if hasattr(content, \"seek\"):\n                content.seek(0)  # Reset position for potential re-reads\n        else:\n            file_content = content\n\n        # Upload to S3\n\n        s3_client.put_object(\n            Bucket=bucket_name,\n            Key=s3_key,\n            Body=file_content,\n            ContentType=file_type,\n            **kwargs,\n        )\n\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            # Save metadata to database\n            upsert_filerecord(\n                file_id=file_id,\n                display_name=display_name or file_id,\n                file_origin=file_origin,\n                file_type=file_type,\n                bucket_name=bucket_name,\n                object_key=s3_key,\n                db_session=db_session,\n                file_metadata=file_metadata,\n            )\n            db_session.commit()\n\n        return file_id\n\n    def read_file(\n        self,\n        file_id: str,\n        mode: str | None = None,  # noqa: ARG002\n        use_tempfile: bool = False,\n        db_session: Session | None = None,\n    ) -> IO[bytes]:\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            file_record = get_filerecord_by_file_id(\n                file_id=file_id, db_session=db_session\n            )\n\n        s3_client = self._get_s3_client()\n        try:\n            response = s3_client.get_object(\n                Bucket=file_record.bucket_name, Key=file_record.object_key\n            )\n        except ClientError:\n            logger.error(f\"Failed to read file {file_id} from S3\")\n            raise\n\n        # FIX: Stream file content instead of loading entire file into memory\n        # This prevents OOM issues with large files (500MB+ PDFs, etc.)\n        if use_tempfile:\n            # Stream directly to temp file to avoid holding entire file in memory\n            temp_file = tempfile.NamedTemporaryFile(mode=\"w+b\", delete=True)\n            # Stream in 8MB chunks to reduce memory footprint\n            for chunk in response[\"Body\"].iter_chunks(chunk_size=8 * 1024 * 1024):\n                temp_file.write(chunk)\n            temp_file.seek(0)\n            return temp_file\n        else:\n            # For BytesIO, we still need to read into memory (legacy behavior)\n            # but at least we're not creating duplicate copies\n            file_content = response[\"Body\"].read()\n            return BytesIO(file_content)\n\n    def read_file_record(\n        self, file_id: str, db_session: Session | None = None\n    ) -> FileStoreModel:\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            file_record = get_filerecord_by_file_id(\n                file_id=file_id, db_session=db_session\n            )\n        return file_record\n\n    def get_file_size(\n        self, file_id: str, db_session: Session | None = None\n    ) -> int | None:\n        \"\"\"\n        Get the size of a file in bytes by querying S3 metadata.\n        \"\"\"\n        try:\n            with get_session_with_current_tenant_if_none(db_session) as db_session:\n                file_record = get_filerecord_by_file_id(\n                    file_id=file_id, db_session=db_session\n                )\n\n            s3_client = self._get_s3_client()\n            response = s3_client.head_object(\n                Bucket=file_record.bucket_name, Key=file_record.object_key\n            )\n            return response.get(\"ContentLength\")\n        except Exception as e:\n            logger.warning(f\"Error getting file size for {file_id}: {e}\")\n            return None\n\n    def delete_file(self, file_id: str, db_session: Session | None = None) -> None:\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            try:\n                file_record = get_filerecord_by_file_id(\n                    file_id=file_id, db_session=db_session\n                )\n                if not file_record.bucket_name:\n                    logger.error(\n                        f\"File record {file_id} with key {file_record.object_key} \"\n                        \"has no bucket name, cannot delete from filestore\"\n                    )\n                    delete_filerecord_by_file_id(file_id=file_id, db_session=db_session)\n                    db_session.commit()\n                    return\n\n                # Delete from external storage\n                s3_client = self._get_s3_client()\n                try:\n                    s3_client.delete_object(\n                        Bucket=file_record.bucket_name, Key=file_record.object_key\n                    )\n                except ClientError as e:\n                    # If the object doesn't exist in file store, treat it as success\n                    # since the end goal (object not existing) is achieved\n                    if e.response.get(\"Error\", {}).get(\"Code\") == \"NoSuchKey\":\n                        logger.warning(\n                            f\"delete_file: File {file_id} not found in file store (key: {file_record.object_key}), \"\n                            \"cleaning up database record.\"\n                        )\n                    else:\n                        raise\n\n                # Delete metadata from database\n                delete_filerecord_by_file_id(file_id=file_id, db_session=db_session)\n\n                db_session.commit()\n\n            except Exception:\n                db_session.rollback()\n                raise\n\n    def change_file_id(\n        self, old_file_id: str, new_file_id: str, db_session: Session | None = None\n    ) -> None:\n        with get_session_with_current_tenant_if_none(db_session) as db_session:\n            try:\n                # Get the existing file record\n                old_file_record = get_filerecord_by_file_id(\n                    file_id=old_file_id, db_session=db_session\n                )\n\n                # Generate new S3 key for the new file ID\n                new_s3_key = self._get_s3_key(new_file_id)\n\n                # Copy S3 object to new key\n                s3_client = self._get_s3_client()\n                bucket_name = self._get_bucket_name()\n\n                copy_source = (\n                    f\"{old_file_record.bucket_name}/{old_file_record.object_key}\"\n                )\n\n                s3_client.copy_object(\n                    CopySource=copy_source,\n                    Bucket=bucket_name,\n                    Key=new_s3_key,\n                    MetadataDirective=\"COPY\",\n                )\n\n                # Create new file record with new file_id\n                # Cast file_metadata to the expected type\n                file_metadata = cast(\n                    dict[Any, Any] | None, old_file_record.file_metadata\n                )\n\n                upsert_filerecord(\n                    file_id=new_file_id,\n                    display_name=old_file_record.display_name,\n                    file_origin=old_file_record.file_origin,\n                    file_type=old_file_record.file_type,\n                    bucket_name=bucket_name,\n                    object_key=new_s3_key,\n                    db_session=db_session,\n                    file_metadata=file_metadata,\n                )\n\n                # Delete old S3 object\n                s3_client.delete_object(\n                    Bucket=old_file_record.bucket_name, Key=old_file_record.object_key\n                )\n\n                # Delete old file record\n                delete_filerecord_by_file_id(file_id=old_file_id, db_session=db_session)\n\n                db_session.commit()\n\n            except Exception as e:\n                db_session.rollback()\n                logger.exception(\n                    f\"Failed to change file ID from {old_file_id} to {new_file_id}: {e}\"\n                )\n                raise\n\n    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:\n        mime_type: str = \"application/octet-stream\"\n        try:\n            file_io = self.read_file(file_id, mode=\"b\")\n            file_content = file_io.read()\n            matches = puremagic.magic_string(file_content)\n            if matches:\n                mime_type = cast(str, matches[0].mime_type)\n            return FileWithMimeType(data=file_content, mime_type=mime_type)\n        except Exception:\n            return None\n\n    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:\n        \"\"\"\n        List all file IDs that start with the given prefix.\n        \"\"\"\n        with get_session_with_current_tenant() as db_session:\n            file_records = get_filerecord_by_prefix(\n                prefix=prefix, db_session=db_session\n            )\n        return file_records\n\n\ndef get_s3_file_store() -> S3BackedFileStore:\n    \"\"\"\n    Returns the S3 file store implementation.\n    \"\"\"\n\n    # Get bucket name - this is required\n    bucket_name = S3_FILE_STORE_BUCKET_NAME\n    if not bucket_name:\n        raise RuntimeError(\n            \"S3_FILE_STORE_BUCKET_NAME configuration is required for S3 file store\"\n        )\n\n    return S3BackedFileStore(\n        bucket_name=bucket_name,\n        aws_access_key_id=S3_AWS_ACCESS_KEY_ID,\n        aws_secret_access_key=S3_AWS_SECRET_ACCESS_KEY,\n        aws_region_name=AWS_REGION_NAME,\n        s3_endpoint_url=S3_ENDPOINT_URL,\n        s3_prefix=S3_FILE_STORE_PREFIX,\n        s3_verify_ssl=S3_VERIFY_SSL,\n    )\n\n\ndef get_default_file_store() -> FileStore:\n    \"\"\"\n    Returns the configured file store implementation based on FILE_STORE_BACKEND.\n\n    When FILE_STORE_BACKEND=postgres (default):\n    - Files are stored in PostgreSQL using Large Objects.\n    - No external storage service (S3/MinIO) is required.\n\n    When FILE_STORE_BACKEND=s3:\n    - Supports AWS S3, MinIO, and other S3-compatible storage.\n    - Configuration via environment variables:\n      - S3_FILE_STORE_BUCKET_NAME, S3_ENDPOINT_URL, S3_AWS_ACCESS_KEY_ID, etc.\n    \"\"\"\n    from onyx.configs.app_configs import FILE_STORE_BACKEND\n    from onyx.configs.constants import FileStoreType\n\n    if FileStoreType(FILE_STORE_BACKEND) == FileStoreType.POSTGRES:\n        from onyx.file_store.postgres_file_store import PostgresBackedFileStore\n\n        return PostgresBackedFileStore()\n\n    return get_s3_file_store()\n"
  },
  {
    "path": "backend/onyx/file_store/models.py",
    "content": "import base64\nfrom enum import Enum\nfrom typing import NotRequired\nfrom typing_extensions import TypedDict  # noreorder\n\nfrom pydantic import BaseModel\n\n\nclass ChatFileType(str, Enum):\n    # Image types only contain the binary data\n    IMAGE = \"image\"\n    # Doc types are saved as both the binary, and the parsed text\n    DOC = \"document\"\n    # Plain text only contain the text\n    PLAIN_TEXT = \"plain_text\"\n    # Tabular data files (CSV, XLSX)\n    TABULAR = \"tabular\"\n\n    def is_text_file(self) -> bool:\n        return self in (\n            ChatFileType.PLAIN_TEXT,\n            ChatFileType.DOC,\n            ChatFileType.TABULAR,\n        )\n\n    def use_metadata_only(self) -> bool:\n        \"\"\"File types where we can ignore the file content\n        and only use the metadata.\"\"\"\n        return self in (ChatFileType.TABULAR,)\n\n\nclass FileDescriptor(TypedDict):\n    \"\"\"NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column\n    in Postgres\"\"\"\n\n    id: str\n    type: ChatFileType\n    name: NotRequired[str | None]\n    user_file_id: NotRequired[str | None]\n\n\nclass InMemoryChatFile(BaseModel):\n    file_id: str\n    content: bytes\n    file_type: ChatFileType\n    filename: str | None = None\n\n    def to_base64(self) -> str:\n        if self.file_type == ChatFileType.IMAGE:\n            return base64.b64encode(self.content).decode()\n        else:\n            raise RuntimeError(\n                \"Should not be trying to convert a non-image file to base64\"\n            )\n\n    def to_file_descriptor(self) -> FileDescriptor:\n        return {\n            \"id\": str(self.file_id),\n            \"type\": self.file_type,\n            \"name\": self.filename,\n            \"user_file_id\": str(self.file_id) if self.file_id else None,\n        }\n"
  },
  {
    "path": "backend/onyx/file_store/postgres_file_store.py",
    "content": "\"\"\"PostgreSQL-backed file store using Large Objects.\n\nStores file content directly in PostgreSQL via the Large Object facility,\neliminating the need for an external S3/MinIO service.\n\"\"\"\n\nimport tempfile\nimport uuid\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\nfrom typing import IO\n\nimport puremagic\nfrom psycopg2.extensions import connection as Psycopg2Connection\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none\nfrom onyx.db.file_content import delete_file_content_by_file_id\nfrom onyx.db.file_content import get_file_content_by_file_id\nfrom onyx.db.file_content import get_file_content_by_file_id_optional\nfrom onyx.db.file_content import transfer_file_content_file_id\nfrom onyx.db.file_content import upsert_file_content\nfrom onyx.db.file_record import delete_filerecord_by_file_id\nfrom onyx.db.file_record import get_filerecord_by_file_id\nfrom onyx.db.file_record import get_filerecord_by_file_id_optional\nfrom onyx.db.file_record import get_filerecord_by_prefix\nfrom onyx.db.file_record import upsert_filerecord\nfrom onyx.db.models import FileRecord\nfrom onyx.db.models import FileRecord as FileStoreModel\nfrom onyx.file_store.file_store import FileStore\nfrom onyx.utils.file import FileWithMimeType\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nPOSTGRES_BUCKET_SENTINEL = \"postgres\"\nSTREAM_CHUNK_SIZE = 8 * 1024 * 1024  # 8 MB\n\n\ndef _get_raw_connection(db_session: Session) -> Psycopg2Connection:\n    \"\"\"Extract the raw psycopg2 connection from a SQLAlchemy session.\"\"\"\n    raw_conn = db_session.connection().connection.dbapi_connection\n    if raw_conn is None:\n        raise ValueError(\"Failed to get raw connection from session\")\n    return cast(Psycopg2Connection, raw_conn)\n\n\ndef _create_large_object(raw_conn: Psycopg2Connection, data: bytes) -> int:\n    \"\"\"Create a new Large Object, write data, and return the OID.\"\"\"\n    lobj = raw_conn.lobject(0, \"wb\")\n    lobj.write(data)\n    oid: int = lobj.oid\n    lobj.close()\n    return oid\n\n\ndef _read_large_object(raw_conn: Psycopg2Connection, oid: int) -> bytes:\n    \"\"\"Read all bytes from a Large Object.\"\"\"\n    lobj = raw_conn.lobject(oid, \"rb\")\n    data: bytes = lobj.read()\n    lobj.close()\n    return data\n\n\ndef _read_large_object_to_tempfile(raw_conn: Psycopg2Connection, oid: int) -> IO[bytes]:\n    \"\"\"Stream a Large Object into a temporary file to avoid OOM on large files.\"\"\"\n    lobj = raw_conn.lobject(oid, \"rb\")\n    temp = tempfile.NamedTemporaryFile(mode=\"w+b\", delete=True)\n    while True:\n        chunk = lobj.read(STREAM_CHUNK_SIZE)\n        if not chunk:\n            break\n        temp.write(chunk)\n    lobj.close()\n    temp.seek(0)\n    return temp\n\n\ndef _delete_large_object(raw_conn: Any, oid: int) -> None:\n    \"\"\"Unlink (delete) a Large Object by OID.\"\"\"\n    lobj = raw_conn.lobject(oid, \"n\")\n    lobj.unlink()\n\n\nclass PostgresBackedFileStore(FileStore):\n    \"\"\"File store backed entirely by PostgreSQL.\n\n    Metadata lives in `file_record`, content lives in PostgreSQL Large Objects\n    with OID references tracked in `file_content`.\n    \"\"\"\n\n    def initialize(self) -> None:\n        # Nothing to do — tables are created by Alembic migrations.\n        pass\n\n    def has_file(\n        self,\n        file_id: str,\n        file_origin: FileOrigin,\n        file_type: str,\n        db_session: Session | None = None,\n    ) -> bool:\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            record = get_filerecord_by_file_id_optional(\n                file_id=file_id, db_session=session\n            )\n        return (\n            record is not None\n            and record.file_origin == file_origin\n            and record.file_type == file_type\n        )\n\n    def save_file(\n        self,\n        content: IO,\n        display_name: str | None,\n        file_origin: FileOrigin,\n        file_type: str,\n        file_metadata: dict[str, Any] | None = None,\n        file_id: str | None = None,\n        db_session: Session | None = None,\n    ) -> str:\n        if file_id is None:\n            file_id = str(uuid.uuid4())\n\n        file_bytes = self._read_content_bytes(content)\n        created_lo = False\n\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            raw_conn, oid = None, None\n            try:\n                raw_conn = _get_raw_connection(session)\n\n                # Look up existing content so we can unlink the old\n                # Large Object after a successful overwrite.\n                existing = get_file_content_by_file_id_optional(\n                    file_id=file_id, db_session=session\n                )\n                old_oid = existing.lobj_oid if existing else None\n\n                oid = _create_large_object(raw_conn, file_bytes)\n                created_lo = True\n\n                upsert_filerecord(\n                    file_id=file_id,\n                    display_name=display_name or file_id,\n                    file_origin=file_origin,\n                    file_type=file_type,\n                    bucket_name=POSTGRES_BUCKET_SENTINEL,\n                    object_key=str(oid),\n                    db_session=session,\n                    file_metadata=file_metadata,\n                )\n                upsert_file_content(\n                    file_id=file_id,\n                    lobj_oid=oid,\n                    file_size=len(file_bytes),\n                    db_session=session,\n                )\n\n                # Unlink the previous Large Object to avoid orphans\n                if old_oid is not None and old_oid != oid:\n                    try:\n                        _delete_large_object(raw_conn, old_oid)\n                    except Exception:\n                        logger.warning(\n                            f\"Failed to unlink old large object {old_oid} for file {file_id}\"\n                        )\n\n                session.commit()\n            except Exception as e:\n                session.rollback()\n                try:\n                    if created_lo and raw_conn is not None and oid is not None:\n                        _delete_large_object(raw_conn, oid)\n                except Exception:\n                    logger.exception(\n                        f\"Failed to delete large object {oid} for file {file_id}\"\n                    )\n                raise e\n\n        return file_id\n\n    def read_file(\n        self,\n        file_id: str,\n        mode: str | None = None,  # noqa: ARG002\n        use_tempfile: bool = False,\n        db_session: Session | None = None,\n    ) -> IO[bytes]:\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            file_content = get_file_content_by_file_id(\n                file_id=file_id, db_session=session\n            )\n            raw_conn = _get_raw_connection(session)\n\n            if use_tempfile:\n                return _read_large_object_to_tempfile(raw_conn, file_content.lobj_oid)\n\n            data = _read_large_object(raw_conn, file_content.lobj_oid)\n            return BytesIO(data)\n\n    def read_file_record(\n        self, file_id: str, db_session: Session | None = None\n    ) -> FileStoreModel:\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            return get_filerecord_by_file_id(file_id=file_id, db_session=session)\n\n    def get_file_size(\n        self, file_id: str, db_session: Session | None = None\n    ) -> int | None:\n        try:\n            with get_session_with_current_tenant_if_none(db_session) as session:\n                record = get_file_content_by_file_id(\n                    file_id=file_id, db_session=session\n                )\n                return record.file_size\n        except Exception as e:\n            logger.warning(f\"Error getting file size for {file_id}: {e}\")\n            return None\n\n    def delete_file(self, file_id: str, db_session: Session | None = None) -> None:\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            try:\n                file_content = get_file_content_by_file_id(\n                    file_id=file_id, db_session=session\n                )\n                raw_conn = _get_raw_connection(session)\n\n                try:\n                    _delete_large_object(raw_conn, file_content.lobj_oid)\n                except Exception:\n                    logger.warning(\n                        f\"Large object {file_content.lobj_oid} for file {file_id} not found, cleaning up records only.\"\n                    )\n\n                delete_file_content_by_file_id(file_id=file_id, db_session=session)\n                delete_filerecord_by_file_id(file_id=file_id, db_session=session)\n                session.commit()\n            except Exception:\n                session.rollback()\n                raise\n\n    def get_file_with_mime_type(self, file_id: str) -> FileWithMimeType | None:\n        mime_type = \"application/octet-stream\"\n        try:\n            file_io = self.read_file(file_id, mode=\"b\")\n        except Exception:\n            return None\n\n        file_content = file_io.read()\n        try:\n            matches = puremagic.magic_string(file_content)\n            if matches:\n                mime_type = cast(str, matches[0].mime_type)\n        except puremagic.PureError:\n            pass\n\n        return FileWithMimeType(data=file_content, mime_type=mime_type)\n\n    def change_file_id(\n        self, old_file_id: str, new_file_id: str, db_session: Session | None = None\n    ) -> None:\n        with get_session_with_current_tenant_if_none(db_session) as session:\n            try:\n                old_record = get_filerecord_by_file_id(\n                    file_id=old_file_id, db_session=session\n                )\n                file_metadata = cast(dict[Any, Any] | None, old_record.file_metadata)\n\n                # 1. Create the new file_record so the FK target exists\n                upsert_filerecord(\n                    file_id=new_file_id,\n                    display_name=old_record.display_name,\n                    file_origin=old_record.file_origin,\n                    file_type=old_record.file_type,\n                    bucket_name=POSTGRES_BUCKET_SENTINEL,\n                    object_key=old_record.object_key,\n                    db_session=session,\n                    file_metadata=file_metadata,\n                )\n\n                # 2. Move file_content in-place — the LO OID is never\n                #    shared between two rows.\n                transfer_file_content_file_id(\n                    old_file_id=old_file_id,\n                    new_file_id=new_file_id,\n                    db_session=session,\n                )\n\n                # 3. Remove the now-orphaned old file_record\n                delete_filerecord_by_file_id(file_id=old_file_id, db_session=session)\n\n                session.commit()\n            except Exception as e:\n                session.rollback()\n                logger.exception(\n                    f\"Failed to change file ID from {old_file_id} to {new_file_id}: {e}\"\n                )\n                raise\n\n    def list_files_by_prefix(self, prefix: str) -> list[FileRecord]:\n        with get_session_with_current_tenant() as session:\n            return get_filerecord_by_prefix(prefix=prefix, db_session=session)\n\n    @staticmethod\n    def _read_content_bytes(content: IO) -> bytes:\n        \"\"\"Normalize an IO object into raw bytes.\"\"\"\n        if hasattr(content, \"read\"):\n            raw = content.read()\n        else:\n            raw = content\n\n        if isinstance(raw, str):\n            return raw.encode(\"utf-8\")\n        return raw\n"
  },
  {
    "path": "backend/onyx/file_store/s3_key_utils.py",
    "content": "\"\"\"\nS3 key sanitization utilities for ensuring AWS S3 compatibility.\n\nThis module provides utilities for sanitizing file names to be compatible with\nAWS S3 object key naming guidelines while ensuring uniqueness when significant\nsanitization occurs.\n\nReference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html\n\"\"\"\n\nimport hashlib\nimport re\nimport urllib.parse\nfrom re import Match\n\n# Constants for S3 key generation\nHASH_LENGTH = 64  # SHA256 hex digest length\nHASH_SEPARATOR_LENGTH = 1  # Length of underscore separator\nHASH_WITH_SEPARATOR_LENGTH = HASH_LENGTH + HASH_SEPARATOR_LENGTH\n\n\ndef _encode_special_char(match: Match[str]) -> str:\n    \"\"\"Helper function to URL encode special characters.\"\"\"\n    return urllib.parse.quote(match.group(0), safe=\"\")\n\n\ndef sanitize_s3_key_name(file_name: str) -> str:\n    \"\"\"\n    Sanitize file name to be S3-compatible according to AWS guidelines.\n\n    This method:\n    1. Replaces problematic characters with safe alternatives\n    2. URL-encodes characters that might require special handling\n    3. Ensures the result is safe for S3 object keys\n    4. Adds uniqueness when significant sanitization occurs\n\n    Args:\n        file_name: The original file name to sanitize\n\n    Returns:\n        A sanitized file name that is S3-compatible\n\n    Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html\n    \"\"\"\n    if not file_name:\n        return \"unnamed_file\"\n\n    original_name = file_name\n\n    # Characters to avoid completely (replace with underscore)\n    # These are characters that AWS recommends avoiding\n    avoid_chars = r'[\\\\{}^%`\\[\\]\"<>#|~/]'\n\n    # Replace avoided characters with underscore\n    sanitized = re.sub(avoid_chars, \"_\", file_name)\n    # Characters that might require special handling but are allowed\n    # We'll URL encode these to be safe\n    special_chars = r\"[&$@=;:+,?\\s]\"\n\n    sanitized = re.sub(special_chars, _encode_special_char, sanitized)\n\n    # Handle non-ASCII characters by URL encoding them\n    # This ensures Unicode characters are properly handled\n    needs_unicode_encoding = False\n    try:\n        # Try to encode as ASCII to check if it contains non-ASCII chars\n        sanitized.encode(\"ascii\")\n    except UnicodeEncodeError:\n        needs_unicode_encoding = True\n        # Contains non-ASCII characters, URL encode the entire string\n        # but preserve safe ASCII characters\n        sanitized = urllib.parse.quote(\n            sanitized,\n            safe=\"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.()!*\",\n        )\n\n    # Ensure we don't have consecutive periods at the start (relative path issue)\n    sanitized = re.sub(r\"^\\.+\", \"\", sanitized)\n\n    # Remove any trailing periods to avoid download issues\n    sanitized = sanitized.rstrip(\".\")\n\n    # Remove multiple separators\n    sanitized = re.sub(r\"[-_]{2,}\", \"-\", sanitized)\n\n    # If sanitization resulted in empty string, use a default\n    if not sanitized:\n        sanitized = \"sanitized_file\"\n\n    # Check if significant sanitization occurred and add uniqueness if needed\n    significant_changes = (\n        # Check if we replaced many characters\n        len(re.findall(avoid_chars, original_name)) > 3\n        or\n        # Check if we had to URL encode Unicode characters\n        needs_unicode_encoding\n        or\n        # Check if the sanitized name is very different in length (expansion due to encoding)\n        len(sanitized) > len(original_name) * 2\n        or\n        # Check if the original had many special characters\n        len(re.findall(special_chars, original_name)) > 5\n    )\n\n    if significant_changes:\n        # Add a short hash to ensure uniqueness while keeping some readability\n        name_hash = hashlib.sha256(original_name.encode(\"utf-8\")).hexdigest()[:8]\n\n        # Try to preserve file extension if it exists and is reasonable\n        if \".\" in sanitized and len(sanitized.split(\".\")[-1]) <= 10:\n            name_parts = sanitized.rsplit(\".\", 1)\n            sanitized = f\"{name_parts[0]}_{name_hash}.{name_parts[1]}\"\n        else:\n            sanitized = f\"{sanitized}_{name_hash}\"\n\n    return sanitized\n\n\ndef generate_s3_key(\n    file_name: str, prefix: str, tenant_id: str, max_key_length: int = 1024\n) -> str:\n    \"\"\"\n    Generate a complete S3 key from file name with prefix and tenant ID.\n\n    Args:\n        file_name: The original file name\n        prefix: S3 key prefix (e.g., 'onyx-files')\n        tenant_id: Tenant identifier\n        max_key_length: Maximum allowed S3 key length (default: 1024)\n\n    Returns:\n        A complete S3 key that fits within the length limit\n    \"\"\"\n    # Strip slashes from prefix and tenant_id to avoid double slashes\n    prefix_clean = prefix.strip(\"/\")\n    tenant_clean = tenant_id.strip(\"/\")\n\n    # Sanitize the file name first\n    sanitized_file_name = sanitize_s3_key_name(file_name)\n\n    # Handle long file names that could exceed S3's key limit\n    # S3 key format: {prefix}/{tenant_id}/{file_name}\n    prefix_and_tenant_parts = [prefix_clean, tenant_clean]\n    prefix_and_tenant = \"/\".join(prefix_and_tenant_parts) + \"/\"\n    max_file_name_length = max_key_length - len(prefix_and_tenant)\n\n    if len(sanitized_file_name) < max_file_name_length:\n        return \"/\".join(prefix_and_tenant_parts + [sanitized_file_name])\n\n    # For very long file names, use hash-based approach to ensure uniqueness\n    # Use the original file name for the hash to maintain consistency\n    file_hash = hashlib.sha256(file_name.encode(\"utf-8\")).hexdigest()\n\n    # Calculate how much space we have for the readable part\n    # Reserve space for hash (64 chars) + underscore separator (1 char)\n    readable_part_max_length = max(0, max_file_name_length - HASH_WITH_SEPARATOR_LENGTH)\n\n    if readable_part_max_length > 0:\n        # Use first part of sanitized name + hash to maintain some readability\n        readable_part = sanitized_file_name[:readable_part_max_length]\n        truncated_name = f\"{readable_part}_{file_hash}\"\n    else:\n        # If no space for readable part, just use hash\n        truncated_name = file_hash\n\n    return \"/\".join(prefix_and_tenant_parts + [truncated_name])\n"
  },
  {
    "path": "backend/onyx/file_store/utils.py",
    "content": "import base64\nfrom collections.abc import Callable\nfrom io import BytesIO\nfrom typing import cast\nfrom uuid import UUID\n\nimport requests\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.models import UserFile\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type\nfrom onyx.utils.b64 import get_image_type\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.timing import log_function_time\n\nlogger = setup_logger()\n\n\ndef plaintext_file_name_for_id(file_id: str) -> str:\n    \"\"\"Generate a consistent file name for storing plaintext content of a file.\"\"\"\n    return f\"plaintext_{file_id}\"\n\n\ndef store_plaintext(file_id: str, plaintext_content: str) -> bool:\n    \"\"\"\n    Store plaintext content for a file in the file store.\n\n    Args:\n        file_id: The ID of the file (user_file or artifact_file)\n        plaintext_content: The plaintext content to store\n\n    Returns:\n        bool: True if storage was successful, False otherwise\n    \"\"\"\n    if not plaintext_content:\n        return False\n\n    plaintext_file_name = plaintext_file_name_for_id(file_id)\n    try:\n        file_store = get_default_file_store()\n        file_content = BytesIO(plaintext_content.encode(\"utf-8\"))\n        file_store.save_file(\n            content=file_content,\n            display_name=f\"Plaintext for {file_id}\",\n            file_origin=FileOrigin.PLAINTEXT_CACHE,\n            file_type=\"text/plain\",\n            file_id=plaintext_file_name,\n        )\n        return True\n    except Exception as e:\n        logger.warning(f\"Failed to store plaintext for {file_id}: {e}\")\n        return False\n\n\n# --- Convenience wrappers for callers that use user-file UUIDs ---\n\n\ndef user_file_id_to_plaintext_file_name(user_file_id: UUID) -> str:\n    \"\"\"Generate a consistent file name for storing plaintext content of a user file.\"\"\"\n    return plaintext_file_name_for_id(str(user_file_id))\n\n\ndef store_user_file_plaintext(user_file_id: UUID, plaintext_content: str) -> bool:\n    \"\"\"Store plaintext content for a user file (delegates to :func:`store_plaintext`).\"\"\"\n    return store_plaintext(str(user_file_id), plaintext_content)\n\n\ndef load_chat_file_by_id(file_id: str) -> InMemoryChatFile:\n    \"\"\"Load a file directly from the file store using its file_record ID.\n\n    This is the fallback path for chat-attached files that don't have a\n    corresponding row in the ``user_file`` table.\"\"\"\n    file_store = get_default_file_store()\n    file_record = file_store.read_file_record(file_id)\n    chat_file_type = mime_type_to_chat_file_type(file_record.file_type)\n\n    file_io = file_store.read_file(file_id, mode=\"b\")\n    return InMemoryChatFile(\n        file_id=file_id,\n        content=file_io.read(),\n        file_type=chat_file_type,\n        filename=file_record.display_name,\n    )\n\n\ndef load_user_file(file_id: UUID, db_session: Session) -> InMemoryChatFile:\n    status = \"not_loaded\"\n\n    user_file = db_session.query(UserFile).filter(UserFile.id == file_id).first()\n    if not user_file:\n        raise ValueError(f\"User file with id {file_id} not found\")\n\n    # Get the file record to determine the appropriate chat file type\n    file_store = get_default_file_store()\n    file_record = file_store.read_file_record(user_file.file_id)\n\n    # Determine appropriate chat file type based on the original file's MIME type\n    chat_file_type = mime_type_to_chat_file_type(file_record.file_type)\n\n    # Try to load plaintext version first\n    plaintext_file_name = user_file_id_to_plaintext_file_name(file_id)\n\n    # check for plain text normalized version first, then use original file otherwise\n    try:\n        file_io = file_store.read_file(plaintext_file_name, mode=\"b\")\n        # Metadata-only file types preserve their original type so\n        # downstream injection paths can route them correctly.\n        if chat_file_type.use_metadata_only():\n            plaintext_chat_file_type = chat_file_type\n        elif file_io is not None:\n            # if we have plaintext for image (which happens when image\n            # extraction is enabled), we use PLAIN_TEXT type\n            plaintext_chat_file_type = ChatFileType.PLAIN_TEXT\n        else:\n            plaintext_chat_file_type = (\n                ChatFileType.PLAIN_TEXT\n                if chat_file_type != ChatFileType.IMAGE\n                else chat_file_type\n            )\n\n        chat_file = InMemoryChatFile(\n            file_id=str(user_file.file_id),\n            content=file_io.read(),\n            file_type=plaintext_chat_file_type,\n            filename=user_file.name,\n        )\n        status = \"plaintext\"\n        return chat_file\n    except Exception as e:\n        logger.warning(f\"Failed to load plaintext for user file {user_file.id}: {e}\")\n        # Fall back to original file if plaintext not available\n        file_io = file_store.read_file(user_file.file_id, mode=\"b\")\n\n        chat_file = InMemoryChatFile(\n            file_id=str(user_file.file_id),\n            content=file_io.read(),\n            file_type=chat_file_type,\n            filename=user_file.name,\n        )\n        status = \"original\"\n        return chat_file\n    finally:\n        logger.debug(\n            f\"load_user_file finished: file_id={user_file.file_id} chat_file_type={chat_file_type} status={status}\"\n        )\n\n\ndef load_in_memory_chat_files(\n    user_file_ids: list[UUID],\n    db_session: Session,\n) -> list[InMemoryChatFile]:\n    \"\"\"\n    Loads the actual content of user files specified by individual IDs and those\n    within specified project IDs into memory.\n\n    Args:\n        user_file_ids: A list of specific UserFile IDs to load.\n        db_session: The SQLAlchemy database session.\n\n    Returns:\n        A list of InMemoryChatFile objects, each containing the file content (as bytes),\n        file ID, file type, and filename. Prioritizes loading plaintext versions if available.\n    \"\"\"\n    # Use parallel execution to load files concurrently\n    return cast(\n        list[InMemoryChatFile],\n        run_functions_tuples_in_parallel(\n            # 1. Load files specified by individual IDs\n            [(load_user_file, (file_id, db_session)) for file_id in user_file_ids]\n        ),\n    )\n\n\ndef get_user_files(\n    user_file_ids: list[UUID],\n    db_session: Session,\n) -> list[UserFile]:\n    \"\"\"\n    Fetches UserFile database records based on provided file and project IDs.\n\n    Args:\n        user_file_ids: A list of specific UserFile IDs to fetch.\n        db_session: The SQLAlchemy database session.\n\n    Returns:\n        A list containing UserFile SQLAlchemy model objects corresponding to the\n        specified file IDs and all files within the specified project IDs.\n        It does NOT return the actual file content.\n    \"\"\"\n    user_files: list[UserFile] = []\n\n    # 1. Fetch UserFile records for specific file IDs\n    for user_file_id in user_file_ids:\n        # Query the database for a UserFile with the matching ID\n        user_file = (\n            db_session.query(UserFile).filter(UserFile.id == user_file_id).first()\n        )\n        # If found, add it to the list\n        if user_file is not None:\n            user_files.append(user_file)\n\n    # 3. Return the combined list of UserFile database objects\n    return user_files\n\n\ndef validate_user_files_ownership(\n    user_file_ids: list[UUID],\n    user_id: UUID | None,\n    db_session: Session,\n) -> list[UserFile]:\n    \"\"\"\n    Fetches all UserFile database records for a given user.\n    \"\"\"\n    user_files = get_user_files(user_file_ids, db_session)\n    current_user_files = []\n    for user_file in user_files:\n        # Note: if user_id is None, then all files should be None as well\n        # (since auth must be disabled in this case)\n        if user_file.user_id != user_id:\n            raise ValueError(\n                f\"User {user_id} does not have access to file {user_file.id}\"\n            )\n        current_user_files.append(user_file)\n\n    return current_user_files\n\n\ndef save_file_from_url(url: str) -> str:\n    response = requests.get(url)\n    response.raise_for_status()\n\n    file_io = BytesIO(response.content)\n    file_store = get_default_file_store()\n    file_id = file_store.save_file(\n        content=file_io,\n        display_name=\"GeneratedImage\",\n        file_origin=FileOrigin.CHAT_IMAGE_GEN,\n        file_type=\"image/png;base64\",\n    )\n    return file_id\n\n\ndef save_file_from_base64(base64_string: str) -> str:\n    file_store = get_default_file_store()\n    file_id = file_store.save_file(\n        content=BytesIO(base64.b64decode(base64_string)),\n        display_name=\"GeneratedImage\",\n        file_origin=FileOrigin.CHAT_IMAGE_GEN,\n        file_type=get_image_type(base64_string),\n    )\n    return file_id\n\n\ndef save_file(\n    url: str | None = None,\n    base64_data: str | None = None,\n) -> str:\n    \"\"\"Save a file from either a URL or base64 encoded string.\n\n    Args:\n        url: URL to download file from\n        base64_data: Base64 encoded file data\n\n    Returns:\n        The unique ID of the saved file\n\n    Raises:\n        ValueError: If neither url nor base64_data is provided, or if both are provided\n    \"\"\"\n    if url is not None and base64_data is not None:\n        raise ValueError(\"Cannot specify both url and base64_data\")\n\n    if url is not None:\n        return save_file_from_url(url)\n    elif base64_data is not None:\n        return save_file_from_base64(base64_data)\n    else:\n        raise ValueError(\"Must specify either url or base64_data\")\n\n\ndef save_files(urls: list[str], base64_files: list[str]) -> list[str]:\n    # NOTE: be explicit about typing so that if we change things, we get notified\n    funcs: list[\n        tuple[\n            Callable[[str | None, str | None], str],\n            tuple[str | None, str | None],\n        ]\n    ] = [(save_file, (url, None)) for url in urls] + [\n        (save_file, (None, base64_file)) for base64_file in base64_files\n    ]\n\n    return run_functions_tuples_in_parallel(funcs)\n\n\n@log_function_time(print_only=True)\ndef verify_user_files(\n    user_files: list[FileDescriptor],\n    user_id: UUID | None,\n    db_session: Session,\n    project_id: int | None = None,\n) -> None:\n    \"\"\"\n    Verify that all provided file descriptors belong to the specified user.\n    For project files (those without user_file_id), verifies access through project ownership.\n\n    Args:\n        user_files: List of file descriptors to verify\n        user_id: The user ID to check ownership against\n        db_session: The SQLAlchemy database session\n        project_id: Optional project ID to verify project file access against\n\n    Raises:\n        ValueError: If any file does not belong to the user or is not found\n    \"\"\"\n    from onyx.db.models import Project__UserFile\n    from onyx.db.projects import check_project_ownership\n\n    # Extract user_file_ids and project file_ids from the file descriptors\n    user_file_ids = []\n    project_file_ids = []\n\n    for file_descriptor in user_files:\n        # Check if this file descriptor has a user_file_id\n        if file_descriptor.get(\"user_file_id\"):\n            try:\n                user_file_ids.append(UUID(file_descriptor[\"user_file_id\"]))\n            except (ValueError, TypeError):\n                logger.warning(\n                    f\"Invalid user_file_id in file descriptor: {file_descriptor['user_file_id']}\"\n                )\n                continue\n        else:\n            # This is a project file - use the 'id' field which is the file_id\n            if file_descriptor.get(\"id\"):\n                project_file_ids.append(file_descriptor[\"id\"])\n\n    # Verify user files (existing logic)\n    if user_file_ids:\n        validate_user_files_ownership(user_file_ids, user_id, db_session)\n\n    # Verify project files\n    if project_file_ids:\n        if project_id is None:\n            raise ValueError(\n                \"Project files provided but no project_id specified for verification\"\n            )\n\n        # Verify user owns the project\n        if not check_project_ownership(project_id, user_id, db_session):\n            raise ValueError(\n                f\"User {user_id} does not have access to project {project_id}\"\n            )\n\n        # Verify all project files belong to the specified project\n        user_files_in_project = (\n            db_session.query(UserFile)\n            .join(Project__UserFile)\n            .filter(\n                Project__UserFile.project_id == project_id,\n                UserFile.file_id.in_(project_file_ids),\n            )\n            .all()\n        )\n\n        # Check if all files were found in the project\n        found_file_ids = {uf.file_id for uf in user_files_in_project}\n        missing_files = set(project_file_ids) - found_file_ids\n\n        if missing_files:\n            raise ValueError(\n                f\"Files {missing_files} are not associated with project {project_id}\"\n            )\n\n\ndef build_frontend_file_url(file_id: str) -> str:\n    return f\"/api/chat/file/{file_id}\"\n\n\ndef build_full_frontend_file_url(file_id: str) -> str:\n    return f\"{WEB_DOMAIN}/api/chat/file/{file_id}\"\n"
  },
  {
    "path": "backend/onyx/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/hooks/api_dependencies.py",
    "content": "from onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom shared_configs.configs import MULTI_TENANT\n\n\ndef require_hook_enabled() -> None:\n    \"\"\"FastAPI dependency that gates all hook management endpoints.\n\n    Hooks are only available in single-tenant / self-hosted EE deployments.\n\n    Use as: Depends(require_hook_enabled)\n    \"\"\"\n    if MULTI_TENANT:\n        raise OnyxError(\n            OnyxErrorCode.SINGLE_TENANT_ONLY,\n            \"Hooks are not available in multi-tenant deployments\",\n        )\n"
  },
  {
    "path": "backend/onyx/hooks/executor.py",
    "content": "\"\"\"CE hook executor.\n\nHookSkipped and HookSoftFailed are real classes kept here because\nprocess_message.py (CE code) uses isinstance checks against them.\n\nexecute_hook is the public entry point. It dispatches to _execute_hook_impl\nvia fetch_versioned_implementation so that:\n  - CE: onyx.hooks.executor._execute_hook_impl → no-op, returns HookSkipped()\n  - EE: ee.onyx.hooks.executor._execute_hook_impl → real HTTP call\n\"\"\"\n\nfrom typing import Any\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import HookPoint\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\n\nclass HookSkipped:\n    \"\"\"No active hook configured for this hook point.\"\"\"\n\n\nclass HookSoftFailed:\n    \"\"\"Hook was called but failed with SOFT fail strategy — continuing.\"\"\"\n\n\nT = TypeVar(\"T\", bound=BaseModel)\n\n\ndef _execute_hook_impl(\n    *,\n    db_session: Session,  # noqa: ARG001\n    hook_point: HookPoint,  # noqa: ARG001\n    payload: dict[str, Any],  # noqa: ARG001\n    response_type: type[T],  # noqa: ARG001\n) -> T | HookSkipped | HookSoftFailed:\n    \"\"\"CE no-op — hooks are not available without EE.\"\"\"\n    return HookSkipped()\n\n\ndef execute_hook(\n    *,\n    db_session: Session,\n    hook_point: HookPoint,\n    payload: dict[str, Any],\n    response_type: type[T],\n) -> T | HookSkipped | HookSoftFailed:\n    \"\"\"Execute the hook for the given hook point.\n\n    Dispatches to the versioned implementation so EE gets the real executor\n    and CE gets the no-op stub, without any changes at the call site.\n    \"\"\"\n    impl = fetch_versioned_implementation(\"onyx.hooks.executor\", \"_execute_hook_impl\")\n    return impl(\n        db_session=db_session,\n        hook_point=hook_point,\n        payload=payload,\n        response_type=response_type,\n    )\n"
  },
  {
    "path": "backend/onyx/hooks/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom typing import Annotated\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_validator\nfrom pydantic import model_validator\nfrom pydantic import SecretStr\n\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\n\nNonEmptySecretStr = Annotated[SecretStr, Field(min_length=1)]\n\n\n# ---------------------------------------------------------------------------\n# Request models\n# ---------------------------------------------------------------------------\n\n\nclass HookCreateRequest(BaseModel):\n    name: str = Field(min_length=1)\n    hook_point: HookPoint\n    endpoint_url: str = Field(min_length=1)\n    api_key: NonEmptySecretStr | None = None\n    fail_strategy: HookFailStrategy | None = None  # if None, uses HookPointSpec default\n    timeout_seconds: float | None = Field(\n        default=None, gt=0\n    )  # if None, uses HookPointSpec default\n\n    @field_validator(\"name\", \"endpoint_url\")\n    @classmethod\n    def no_whitespace_only(cls, v: str) -> str:\n        if not v.strip():\n            raise ValueError(\"cannot be whitespace-only.\")\n        return v\n\n\nclass HookUpdateRequest(BaseModel):\n    name: str | None = None\n    endpoint_url: str | None = None\n    api_key: NonEmptySecretStr | None = None\n    fail_strategy: HookFailStrategy | None = None\n    timeout_seconds: float | None = Field(default=None, gt=0)\n\n    @model_validator(mode=\"after\")\n    def require_at_least_one_field(self) -> \"HookUpdateRequest\":\n        if not self.model_fields_set:\n            raise ValueError(\"At least one field must be provided for an update.\")\n        if \"name\" in self.model_fields_set and not (self.name or \"\").strip():\n            raise ValueError(\"name cannot be cleared.\")\n        if (\n            \"endpoint_url\" in self.model_fields_set\n            and not (self.endpoint_url or \"\").strip()\n        ):\n            raise ValueError(\"endpoint_url cannot be cleared.\")\n        if \"fail_strategy\" in self.model_fields_set and self.fail_strategy is None:\n            raise ValueError(\n                \"fail_strategy cannot be null; omit the field to leave it unchanged.\"\n            )\n        if \"timeout_seconds\" in self.model_fields_set and self.timeout_seconds is None:\n            raise ValueError(\n                \"timeout_seconds cannot be null; omit the field to leave it unchanged.\"\n            )\n        return self\n\n\n# ---------------------------------------------------------------------------\n# Response models\n# ---------------------------------------------------------------------------\n\n\nclass HookPointMetaResponse(BaseModel):\n    hook_point: HookPoint\n    display_name: str\n    description: str\n    docs_url: str | None\n    input_schema: dict[str, Any]\n    output_schema: dict[str, Any]\n    default_timeout_seconds: float\n    default_fail_strategy: HookFailStrategy\n    fail_hard_description: str\n\n\nclass HookResponse(BaseModel):\n    id: int\n    name: str\n    hook_point: HookPoint\n    # Nullable to match the DB column — endpoint_url is required on creation but\n    # future hook point types may not use an external endpoint (e.g. built-in handlers).\n    endpoint_url: str | None\n    # Partially-masked API key (e.g. \"abcd••••••••wxyz\"), or None if no key is set.\n    api_key_masked: str | None\n    fail_strategy: HookFailStrategy\n    timeout_seconds: float  # always resolved — None from request is replaced with spec default before DB write\n    is_active: bool\n    is_reachable: bool | None\n    creator_email: str | None\n    created_at: datetime\n    updated_at: datetime\n\n\nclass HookValidateStatus(str, Enum):\n    passed = \"passed\"  # server responded (any status except 401/403)\n    auth_failed = \"auth_failed\"  # server responded with 401 or 403\n    timeout = (\n        \"timeout\"  # TCP connected, but read/write timed out (server exists but slow)\n    )\n    cannot_connect = \"cannot_connect\"  # could not connect to the server\n\n\nclass HookValidateResponse(BaseModel):\n    status: HookValidateStatus\n    error_message: str | None = None\n\n\nclass HookExecutionRecord(BaseModel):\n    error_message: str | None = None\n    status_code: int | None = None\n    duration_ms: int | None = None\n    created_at: datetime\n"
  },
  {
    "path": "backend/onyx/hooks/points/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/hooks/points/base.py",
    "content": "from typing import Any\nfrom typing import ClassVar\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\n\n\n_REQUIRED_ATTRS = (\n    \"hook_point\",\n    \"display_name\",\n    \"description\",\n    \"default_timeout_seconds\",\n    \"fail_hard_description\",\n    \"default_fail_strategy\",\n    \"payload_model\",\n    \"response_model\",\n)\n\n\nclass HookPointSpec:\n    \"\"\"Static metadata and contract for a pipeline hook point.\n\n    Each concrete subclass represents exactly one hook point and is instantiated\n    once at startup, registered in onyx.hooks.registry._REGISTRY. Prefer\n    get_hook_point_spec() or get_all_specs() from the registry over direct\n    instantiation.\n\n    Each hook point is a concrete subclass of this class. Onyx engineers\n    own these definitions — customers never touch this code.\n\n    Subclasses must define all attributes as class-level constants.\n    payload_model and response_model must be Pydantic BaseModel subclasses;\n    input_schema and output_schema are derived from them automatically.\n    \"\"\"\n\n    hook_point: HookPoint\n    display_name: str\n    description: str\n    default_timeout_seconds: float\n    fail_hard_description: str\n    default_fail_strategy: HookFailStrategy\n    docs_url: str | None = None\n\n    payload_model: ClassVar[type[BaseModel]]\n    response_model: ClassVar[type[BaseModel]]\n\n    # Computed once at class definition time from payload_model / response_model.\n    input_schema: ClassVar[dict[str, Any]]\n    output_schema: ClassVar[dict[str, Any]]\n\n    def __init_subclass__(cls, **kwargs: object) -> None:\n        \"\"\"Enforce that every subclass declares all required class attributes.\n\n        Called automatically by Python whenever a class inherits from HookPointSpec.\n        Raises TypeError at import time if any required attribute is missing or if\n        payload_model / response_model are not Pydantic BaseModel subclasses.\n        input_schema and output_schema are derived automatically from the models.\n        \"\"\"\n        super().__init_subclass__(**kwargs)\n        missing = [attr for attr in _REQUIRED_ATTRS if not hasattr(cls, attr)]\n        if missing:\n            raise TypeError(f\"{cls.__name__} must define class attributes: {missing}\")\n        for attr in (\"payload_model\", \"response_model\"):\n            val = getattr(cls, attr, None)\n            if val is None or not (\n                isinstance(val, type) and issubclass(val, BaseModel)\n            ):\n                raise TypeError(\n                    f\"{cls.__name__}.{attr} must be a Pydantic BaseModel subclass, got {val!r}\"\n                )\n        cls.input_schema = cls.payload_model.model_json_schema()\n        cls.output_schema = cls.response_model.model_json_schema()\n"
  },
  {
    "path": "backend/onyx/hooks/points/document_ingestion.py",
    "content": "from pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks.points.base import HookPointSpec\n\n\nclass DocumentIngestionSection(BaseModel):\n    \"\"\"Represents a single section of a document — either text or image, not both.\n\n    Text section: set `text`, leave `image_file_id` null.\n    Image section: set `image_file_id`, leave `text` null.\n    \"\"\"\n\n    text: str | None = Field(\n        default=None,\n        description=\"Text content of this section. Set for text sections, null for image sections.\",\n    )\n    link: str | None = Field(\n        default=None,\n        description=\"Optional URL associated with this section. Preserve the original link from the payload if you want it retained.\",\n    )\n    image_file_id: str | None = Field(\n        default=None,\n        description=(\n            \"Opaque identifier for an image stored in the file store. \"\n            \"The image content is not included — this field signals that the section is an image. \"\n            \"Hooks can use its presence to reorder or drop image sections, but cannot read or modify the image itself.\"\n        ),\n    )\n\n\nclass DocumentIngestionOwner(BaseModel):\n    display_name: str | None = Field(\n        default=None,\n        description=\"Human-readable name of the owner.\",\n    )\n    email: str | None = Field(\n        default=None,\n        description=\"Email address of the owner.\",\n    )\n\n\nclass DocumentIngestionPayload(BaseModel):\n    document_id: str = Field(\n        description=\"Unique identifier for the document. Read-only — changes are ignored.\"\n    )\n    title: str | None = Field(description=\"Title of the document.\")\n    semantic_identifier: str = Field(\n        description=\"Human-readable identifier used for display (e.g. file name, page title).\"\n    )\n    source: str = Field(\n        description=(\n            \"Connector source type (e.g. confluence, slack, google_drive). \"\n            \"Read-only — changes are ignored. \"\n            \"Full list of values: https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/configs/constants.py#L195\"\n        )\n    )\n    sections: list[DocumentIngestionSection] = Field(\n        description=\"Sections of the document. Includes both text sections (text set, image_file_id null) and image sections (image_file_id set, text null).\"\n    )\n    metadata: dict[str, list[str]] = Field(\n        description=\"Key-value metadata attached to the document. Values are always a list of strings.\"\n    )\n    doc_updated_at: str | None = Field(\n        description=\"ISO 8601 UTC timestamp of the last update at the source, or null if unknown. Example: '2024-03-15T10:30:00+00:00'.\"\n    )\n    primary_owners: list[DocumentIngestionOwner] | None = Field(\n        description=\"Primary owners of the document, or null if not available.\"\n    )\n    secondary_owners: list[DocumentIngestionOwner] | None = Field(\n        description=\"Secondary owners of the document, or null if not available.\"\n    )\n\n\nclass DocumentIngestionResponse(BaseModel):\n    # Intentionally permissive — customer endpoints may return extra fields.\n    sections: list[DocumentIngestionSection] | None = Field(\n        description=\"The sections to index, in the desired order. Reorder, drop, or modify sections freely. Null or empty list drops the document.\"\n    )\n    rejection_reason: str | None = Field(\n        default=None,\n        description=\"Logged when sections is null or empty. Falls back to a generic message if omitted.\",\n    )\n\n\nclass DocumentIngestionSpec(HookPointSpec):\n    \"\"\"Hook point that runs on every document before it enters the indexing pipeline.\n\n    Call site: immediately after Onyx's internal validation and before the\n    indexing pipeline begins — no partial writes have occurred yet.\n\n    If a Document Ingestion hook is configured, it takes precedence —\n    Document Ingestion Light will not run. Configure only one per deployment.\n\n    Supported use cases:\n    - Document filtering: drop documents based on content or metadata\n    - Content rewriting: redact PII or normalize text before indexing\n    \"\"\"\n\n    hook_point = HookPoint.DOCUMENT_INGESTION\n    display_name = \"Document Ingestion\"\n    description = (\n        \"Runs on every document before it enters the indexing pipeline. \"\n        \"Allows filtering, rewriting, or dropping documents.\"\n    )\n    default_timeout_seconds = 30.0\n    fail_hard_description = \"The document will not be indexed.\"\n    default_fail_strategy = HookFailStrategy.HARD\n    docs_url = \"https://docs.onyx.app/admins/advanced_configs/hook_extensions#document-ingestion\"\n\n    payload_model = DocumentIngestionPayload\n    response_model = DocumentIngestionResponse\n"
  },
  {
    "path": "backend/onyx/hooks/points/query_processing.py",
    "content": "from pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\n\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks.points.base import HookPointSpec\n\n\nclass QueryProcessingPayload(BaseModel):\n    model_config = ConfigDict(extra=\"forbid\")\n\n    query: str = Field(description=\"The raw query string exactly as the user typed it.\")\n    user_email: str | None = Field(\n        description=\"Email of the user submitting the query, or null if unauthenticated.\"\n    )\n    chat_session_id: str = Field(\n        description=\"UUID of the chat session, formatted as a hyphenated lowercase string (e.g. '550e8400-e29b-41d4-a716-446655440000'). Always present — the session is guaranteed to exist by the time this hook fires.\"\n    )\n\n\nclass QueryProcessingResponse(BaseModel):\n    # Intentionally permissive — customer endpoints may return extra fields.\n    query: str | None = Field(\n        default=None,\n        description=(\n            \"The query to use in the pipeline. \"\n            \"Null, empty string, whitespace-only, or absent = reject the query.\"\n        ),\n    )\n    rejection_message: str | None = Field(\n        default=None,\n        description=\"Message shown to the user when the query is rejected. Falls back to a generic message if not provided.\",\n    )\n\n\nclass QueryProcessingSpec(HookPointSpec):\n    \"\"\"Hook point that runs on every user query before it enters the pipeline.\n\n    Call site: inside handle_stream_message_objects() in\n    backend/onyx/chat/process_message.py, immediately after message_text is\n    assigned from the request and before create_new_chat_message() saves it.\n\n    This is the earliest possible point in the query pipeline:\n    - Raw query — unmodified, exactly as the user typed it\n    - No side effects yet — message has not been saved to DB\n    - User identity is available for user-specific logic\n\n    Supported use cases:\n    - Query rejection: block queries based on content or user context\n    - Query rewriting: normalize, expand, or modify the query\n    - PII removal: scrub sensitive data before the LLM sees it\n    - Access control: reject queries from certain users or groups\n    - Query auditing: log or track queries based on business rules\n    \"\"\"\n\n    hook_point = HookPoint.QUERY_PROCESSING\n    display_name = \"Query Processing\"\n    description = (\n        \"Runs on every user query before it enters the pipeline. \"\n        \"Allows rewriting, filtering, or rejecting queries.\"\n    )\n    default_timeout_seconds = 5.0  # user is actively waiting — keep tight\n    fail_hard_description = (\n        \"The query will be blocked and the user will see an error message.\"\n    )\n    default_fail_strategy = HookFailStrategy.HARD\n    docs_url = (\n        \"https://docs.onyx.app/admins/advanced_configs/hook_extensions#query-processing\"\n    )\n\n    payload_model = QueryProcessingPayload\n    response_model = QueryProcessingResponse\n"
  },
  {
    "path": "backend/onyx/hooks/registry.py",
    "content": "from onyx.db.enums import HookPoint\nfrom onyx.hooks.points.base import HookPointSpec\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionSpec\nfrom onyx.hooks.points.query_processing import QueryProcessingSpec\n\n# Internal: use `monkeypatch.setattr(registry_module, \"_REGISTRY\", {...})` to override in tests.\n_REGISTRY: dict[HookPoint, HookPointSpec] = {\n    HookPoint.DOCUMENT_INGESTION: DocumentIngestionSpec(),\n    HookPoint.QUERY_PROCESSING: QueryProcessingSpec(),\n}\n\n\ndef validate_registry() -> None:\n    \"\"\"Assert that every HookPoint enum value has a registered spec.\n\n    Call once at application startup (e.g. from the FastAPI lifespan hook).\n    Raises RuntimeError if any hook point is missing a spec.\n    \"\"\"\n    missing = set(HookPoint) - set(_REGISTRY)\n    if missing:\n        raise RuntimeError(\n            f\"Hook point(s) have no registered spec: {missing}. \"\n            \"Add an entry to onyx.hooks.registry._REGISTRY.\"\n        )\n\n\ndef get_hook_point_spec(hook_point: HookPoint) -> HookPointSpec:\n    \"\"\"Returns the spec for a given hook point.\n\n    Raises ValueError if the hook point has no registered spec — this is a\n    programmer error; every HookPoint enum value must have a corresponding spec\n    in _REGISTRY.\n    \"\"\"\n    try:\n        return _REGISTRY[hook_point]\n    except KeyError:\n        raise ValueError(\n            f\"No spec registered for hook point {hook_point!r}. \"\n            \"Add an entry to onyx.hooks.registry._REGISTRY.\"\n        )\n\n\ndef get_all_specs() -> list[HookPointSpec]:\n    \"\"\"Returns the specs for all registered hook points.\"\"\"\n    return list(_REGISTRY.values())\n"
  },
  {
    "path": "backend/onyx/httpx/httpx_pool.py",
    "content": "import threading\nfrom typing import Any\n\nimport httpx\n\n\ndef make_default_kwargs() -> dict[str, Any]:\n    return {\n        \"http2\": True,\n        \"limits\": httpx.Limits(),\n    }\n\n\nclass HttpxPool:\n    \"\"\"Class to manage a global httpx Client instance\"\"\"\n\n    _clients: dict[str, httpx.Client] = {}\n    _lock: threading.Lock = threading.Lock()\n\n    # Default parameters for creation\n\n    def __init__(self) -> None:\n        pass\n\n    @classmethod\n    def _init_client(cls, **kwargs: Any) -> httpx.Client:\n        \"\"\"Private helper method to create and return an httpx.Client.\"\"\"\n        merged_kwargs = {**(make_default_kwargs()), **kwargs}\n        return httpx.Client(**merged_kwargs)\n\n    @classmethod\n    def init_client(cls, name: str, **kwargs: Any) -> None:\n        \"\"\"Allow the caller to init the client with extra params.\"\"\"\n        with cls._lock:\n            if name not in cls._clients:\n                cls._clients[name] = cls._init_client(**kwargs)\n\n    @classmethod\n    def close_client(cls, name: str) -> None:\n        \"\"\"Allow the caller to close the client.\"\"\"\n        with cls._lock:\n            client = cls._clients.pop(name, None)\n            if client:\n                client.close()\n\n    @classmethod\n    def close_all(cls) -> None:\n        \"\"\"Close all registered clients.\"\"\"\n        with cls._lock:\n            for client in cls._clients.values():\n                client.close()\n            cls._clients.clear()\n\n    @classmethod\n    def get(cls, name: str) -> httpx.Client:\n        \"\"\"Gets the httpx.Client. Will init to default settings if not init'd.\"\"\"\n        with cls._lock:\n            if name not in cls._clients:\n                cls._clients[name] = cls._init_client()\n            return cls._clients[name]\n"
  },
  {
    "path": "backend/onyx/image_gen/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/image_gen/exceptions.py",
    "content": "class ImageProviderError(Exception):\n    pass\n\n\nclass ImageProviderCredentialsError(ImageProviderError):\n    pass\n"
  },
  {
    "path": "backend/onyx/image_gen/factory.py",
    "content": "from enum import Enum\n\nfrom onyx.image_gen.interfaces import ImageGenerationProvider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.providers.azure_img_gen import AzureImageGenerationProvider\nfrom onyx.image_gen.providers.openai_img_gen import OpenAIImageGenerationProvider\nfrom onyx.image_gen.providers.vertex_img_gen import VertexImageGenerationProvider\n\n\nclass ImageGenerationProviderName(str, Enum):\n    AZURE = \"azure\"\n    OPENAI = \"openai\"\n    VERTEX_AI = \"vertex_ai\"\n\n\nPROVIDERS: dict[ImageGenerationProviderName, type[ImageGenerationProvider]] = {\n    ImageGenerationProviderName.AZURE: AzureImageGenerationProvider,\n    ImageGenerationProviderName.OPENAI: OpenAIImageGenerationProvider,\n    ImageGenerationProviderName.VERTEX_AI: VertexImageGenerationProvider,\n}\n\n\ndef get_image_generation_provider(\n    provider: str,\n    credentials: ImageGenerationProviderCredentials,\n) -> ImageGenerationProvider:\n    provider_cls = _get_provider_cls(provider)\n    return provider_cls.build_from_credentials(credentials)\n\n\ndef validate_credentials(\n    provider: str,\n    credentials: ImageGenerationProviderCredentials,\n) -> bool:\n    provider_cls = _get_provider_cls(provider)\n    return provider_cls.validate_credentials(credentials)\n\n\ndef _get_provider_cls(provider: str) -> type[ImageGenerationProvider]:\n    try:\n        provider_enum = ImageGenerationProviderName(provider)\n    except ValueError:\n        raise ValueError(f\"Invalid image generation provider: {provider}\")\n    return PROVIDERS[provider_enum]\n"
  },
  {
    "path": "backend/onyx/image_gen/interfaces.py",
    "content": "from __future__ import annotations\n\nimport abc\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\n\nfrom onyx.image_gen.exceptions import ImageProviderCredentialsError\n\nif TYPE_CHECKING:\n    from litellm.types.utils import ImageResponse as ImageGenerationResponse\n\n\nclass ImageGenerationProviderCredentials(BaseModel):\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n    custom_config: dict[str, str] | None = None\n\n\nclass ReferenceImage(BaseModel):\n    data: bytes\n    mime_type: str\n\n\nclass ImageGenerationProvider(abc.ABC):\n    @property\n    def supports_reference_images(self) -> bool:\n        return False\n\n    @property\n    def max_reference_images(self) -> int:\n        return 0\n\n    @classmethod\n    @abc.abstractmethod\n    def validate_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> bool:\n        \"\"\"Returns true if sufficient credentials are given to build this provider.\"\"\"\n        raise NotImplementedError(\"validate_credentials not implemented\")\n\n    @classmethod\n    def build_from_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> ImageGenerationProvider:\n        if not cls.validate_credentials(credentials):\n            raise ImageProviderCredentialsError(\n                f\"Invalid image generation credentials: {credentials}\"\n            )\n        return cls._build_from_credentials(credentials)\n\n    @classmethod\n    @abc.abstractmethod\n    def _build_from_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> ImageGenerationProvider:\n        \"\"\"\n        Given credentials, builds an instance of the provider.\n        Should NOT be called directly - use build_from_credentials instead.\n\n        AssertionError if credentials are invalid.\n        \"\"\"\n        raise NotImplementedError(\"build_from_credentials not implemented\")\n\n    @abc.abstractmethod\n    def generate_image(\n        self,\n        prompt: str,\n        model: str,\n        size: str,\n        n: int,\n        quality: str | None = None,\n        reference_images: list[ReferenceImage] | None = None,\n        **kwargs: Any,\n    ) -> ImageGenerationResponse:\n        \"\"\"Generates an image based on a prompt.\"\"\"\n        raise NotImplementedError(\"generate_image not implemented\")\n"
  },
  {
    "path": "backend/onyx/image_gen/providers/azure_img_gen.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom onyx.image_gen.interfaces import ImageGenerationProvider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\n\nif TYPE_CHECKING:\n    from onyx.image_gen.interfaces import ImageGenerationResponse\n\n\nclass AzureImageGenerationProvider(ImageGenerationProvider):\n    _GPT_IMAGE_MODEL_PREFIX = \"gpt-image-\"\n    _DALL_E_2_MODEL_NAME = \"dall-e-2\"\n\n    def __init__(\n        self,\n        api_key: str,\n        api_base: str,\n        api_version: str,\n        deployment_name: str | None = None,\n    ):\n        self._api_key = api_key\n        self._api_base = api_base\n        self._api_version = api_version\n        self._deployment_name = deployment_name\n\n    @classmethod\n    def validate_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> bool:\n        return all(\n            [\n                credentials.api_key,\n                credentials.api_base,\n                credentials.api_version,\n            ]\n        )\n\n    @classmethod\n    def _build_from_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> AzureImageGenerationProvider:\n        assert credentials.api_key\n        assert credentials.api_base\n        assert credentials.api_version\n\n        return cls(\n            api_key=credentials.api_key,\n            api_base=credentials.api_base,\n            api_version=credentials.api_version,\n            deployment_name=credentials.deployment_name,\n        )\n\n    @property\n    def supports_reference_images(self) -> bool:\n        return True\n\n    @property\n    def max_reference_images(self) -> int:\n        # Azure GPT image models support up to 16 input images for edits.\n        return 16\n\n    def _normalize_model_name(self, model: str) -> str:\n        return model.rsplit(\"/\", 1)[-1]\n\n    def _model_supports_image_edits(self, model: str) -> bool:\n        normalized_model = self._normalize_model_name(model)\n        return (\n            normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)\n            or normalized_model == self._DALL_E_2_MODEL_NAME\n        )\n\n    def generate_image(\n        self,\n        prompt: str,\n        model: str,\n        size: str,\n        n: int,\n        quality: str | None = None,\n        reference_images: list[ReferenceImage] | None = None,\n        **kwargs: Any,\n    ) -> ImageGenerationResponse:\n        deployment = self._deployment_name or model\n        model_name = f\"azure/{deployment}\"\n\n        if reference_images:\n            if not self._model_supports_image_edits(model):\n                raise ValueError(\n                    f\"Model '{model}' does not support image edits with reference images.\"\n                )\n\n            normalized_model = self._normalize_model_name(model)\n            if (\n                normalized_model == self._DALL_E_2_MODEL_NAME\n                and len(reference_images) > 1\n            ):\n                raise ValueError(\n                    \"Model 'dall-e-2' only supports a single reference image for edits.\"\n                )\n\n            from litellm import image_edit\n\n            return image_edit(\n                image=[image.data for image in reference_images],\n                prompt=prompt,\n                model=model_name,\n                api_key=self._api_key,\n                api_base=self._api_base,\n                api_version=self._api_version,\n                size=size,\n                n=n,\n                quality=quality,\n                **kwargs,\n            )\n\n        from litellm import image_generation\n\n        return image_generation(\n            prompt=prompt,\n            model=model_name,\n            api_key=self._api_key,\n            api_base=self._api_base,\n            api_version=self._api_version,\n            size=size,\n            n=n,\n            quality=quality,\n            **kwargs,\n        )\n"
  },
  {
    "path": "backend/onyx/image_gen/providers/openai_img_gen.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom onyx.image_gen.interfaces import ImageGenerationProvider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\n\nif TYPE_CHECKING:\n    from onyx.image_gen.interfaces import ImageGenerationResponse\n\n\nclass OpenAIImageGenerationProvider(ImageGenerationProvider):\n    _GPT_IMAGE_MODEL_PREFIX = \"gpt-image-\"\n    _DALL_E_2_MODEL_NAME = \"dall-e-2\"\n\n    def __init__(\n        self,\n        api_key: str,\n        api_base: str | None = None,\n    ):\n        self._api_key = api_key\n        self._api_base = api_base\n\n    @classmethod\n    def validate_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> bool:\n        return bool(credentials.api_key)\n\n    @classmethod\n    def _build_from_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> OpenAIImageGenerationProvider:\n        assert credentials.api_key\n\n        return cls(\n            api_key=credentials.api_key,\n            api_base=credentials.api_base,\n        )\n\n    @property\n    def supports_reference_images(self) -> bool:\n        return True\n\n    @property\n    def max_reference_images(self) -> int:\n        # GPT image models support up to 16 input images for edits.\n        return 16\n\n    def _normalize_model_name(self, model: str) -> str:\n        return model.rsplit(\"/\", 1)[-1]\n\n    def _model_supports_image_edits(self, model: str) -> bool:\n        normalized_model = self._normalize_model_name(model)\n        return (\n            normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)\n            or normalized_model == self._DALL_E_2_MODEL_NAME\n        )\n\n    def generate_image(\n        self,\n        prompt: str,\n        model: str,\n        size: str,\n        n: int,\n        quality: str | None = None,\n        reference_images: list[ReferenceImage] | None = None,\n        **kwargs: Any,\n    ) -> ImageGenerationResponse:\n        if reference_images:\n            if not self._model_supports_image_edits(model):\n                raise ValueError(\n                    f\"Model '{model}' does not support image edits with reference images.\"\n                )\n\n            normalized_model = self._normalize_model_name(model)\n            if (\n                normalized_model == self._DALL_E_2_MODEL_NAME\n                and len(reference_images) > 1\n            ):\n                raise ValueError(\n                    \"Model 'dall-e-2' only supports a single reference image for edits.\"\n                )\n\n            from litellm import image_edit\n\n            return image_edit(\n                image=[image.data for image in reference_images],\n                prompt=prompt,\n                model=model,\n                api_key=self._api_key,\n                api_base=self._api_base,\n                size=size,\n                n=n,\n                quality=quality,\n                **kwargs,\n            )\n\n        from litellm import image_generation\n\n        return image_generation(\n            prompt=prompt,\n            model=model,\n            api_key=self._api_key,\n            api_base=self._api_base,\n            size=size,\n            n=n,\n            quality=quality,\n            **kwargs,\n        )\n"
  },
  {
    "path": "backend/onyx/image_gen/providers/vertex_img_gen.py",
    "content": "from __future__ import annotations\n\nimport base64\nimport json\nfrom datetime import datetime\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\n\nfrom onyx.image_gen.exceptions import ImageProviderCredentialsError\nfrom onyx.image_gen.interfaces import ImageGenerationProvider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\n\nif TYPE_CHECKING:\n    from onyx.image_gen.interfaces import ImageGenerationResponse\n\n\nclass VertexCredentials(BaseModel):\n    vertex_credentials: str\n    vertex_location: str\n    project_id: str\n\n\nclass VertexImageGenerationProvider(ImageGenerationProvider):\n    def __init__(\n        self,\n        vertex_credentials: VertexCredentials,\n    ):\n        self._vertex_credentials = vertex_credentials.vertex_credentials\n        self._vertex_location = vertex_credentials.vertex_location\n        self._vertex_project = vertex_credentials.project_id\n\n    @classmethod\n    def validate_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> bool:\n        try:\n            _parse_to_vertex_credentials(credentials)\n            return True\n        except ImageProviderCredentialsError:\n            return False\n\n    @classmethod\n    def _build_from_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,\n    ) -> VertexImageGenerationProvider:\n        vertex_credentials = _parse_to_vertex_credentials(credentials)\n\n        return cls(\n            vertex_credentials=vertex_credentials,\n        )\n\n    @property\n    def supports_reference_images(self) -> bool:\n        return True\n\n    @property\n    def max_reference_images(self) -> int:\n        # Gemini image editing supports up to 14 input images.\n        return 14\n\n    def generate_image(\n        self,\n        prompt: str,\n        model: str,\n        size: str,\n        n: int,\n        quality: str | None = None,\n        reference_images: list[ReferenceImage] | None = None,\n        **kwargs: Any,\n    ) -> ImageGenerationResponse:\n        if reference_images:\n            return self._generate_image_with_reference_images(\n                prompt=prompt,\n                model=model,\n                size=size,\n                n=n,\n                reference_images=reference_images,\n            )\n\n        from litellm import image_generation\n\n        return image_generation(\n            prompt=prompt,\n            model=model,\n            size=size,\n            n=n,\n            quality=quality,\n            vertex_location=self._vertex_location,\n            vertex_credentials=self._vertex_credentials,\n            vertex_project=self._vertex_project,\n            **kwargs,\n        )\n\n    def _generate_image_with_reference_images(\n        self,\n        prompt: str,\n        model: str,\n        size: str,\n        n: int,\n        reference_images: list[ReferenceImage],\n    ) -> ImageGenerationResponse:\n        from google import genai\n        from google.genai import types as genai_types\n        from google.oauth2 import service_account\n        from litellm.types.utils import ImageObject\n        from litellm.types.utils import ImageResponse\n\n        service_account_info = json.loads(self._vertex_credentials)\n        credentials = service_account.Credentials.from_service_account_info(\n            service_account_info,\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n\n        client = genai.Client(\n            vertexai=True,\n            project=self._vertex_project,\n            location=self._vertex_location,\n            credentials=credentials,\n        )\n\n        parts: list[genai_types.Part] = [\n            genai_types.Part.from_bytes(data=image.data, mime_type=image.mime_type)\n            for image in reference_images\n        ]\n        parts.append(genai_types.Part.from_text(text=prompt))\n\n        config = genai_types.GenerateContentConfig(\n            response_modalities=[\"TEXT\", \"IMAGE\"],\n            candidate_count=max(1, n),\n            image_config=genai_types.ImageConfig(\n                aspect_ratio=_map_size_to_aspect_ratio(size)\n            ),\n        )\n        model_name = model.replace(\"vertex_ai/\", \"\")\n        response = client.models.generate_content(\n            model=model_name,\n            contents=genai_types.Content(\n                role=\"user\",\n                parts=parts,\n            ),\n            config=config,\n        )\n\n        generated_data: list[ImageObject] = []\n        for candidate in response.candidates or []:\n            candidate_content = candidate.content\n            if not candidate_content:\n                continue\n\n            for part in candidate_content.parts or []:\n                inline_data = part.inline_data\n                if not inline_data or inline_data.data is None:\n                    continue\n\n                if isinstance(inline_data.data, bytes):\n                    b64_json = base64.b64encode(inline_data.data).decode(\"utf-8\")\n                elif isinstance(inline_data.data, str):\n                    b64_json = inline_data.data\n                else:\n                    continue\n\n                generated_data.append(\n                    ImageObject(\n                        b64_json=b64_json,\n                        revised_prompt=prompt,\n                    )\n                )\n\n        if not generated_data:\n            raise RuntimeError(\"No image data returned from Vertex AI.\")\n\n        return ImageResponse(\n            created=int(datetime.now().timestamp()),\n            data=generated_data,\n        )\n\n\ndef _map_size_to_aspect_ratio(size: str) -> str:\n    return {\n        \"1024x1024\": \"1:1\",\n        \"1792x1024\": \"16:9\",\n        \"1024x1792\": \"9:16\",\n        \"1536x1024\": \"3:2\",\n        \"1024x1536\": \"2:3\",\n    }.get(size, \"1:1\")\n\n\ndef _parse_to_vertex_credentials(\n    credentials: ImageGenerationProviderCredentials,\n) -> VertexCredentials:\n    custom_config = credentials.custom_config\n\n    if not custom_config:\n        raise ImageProviderCredentialsError(\"Custom config is required\")\n\n    vertex_credentials = custom_config.get(\"vertex_credentials\")\n    vertex_location = custom_config.get(\"vertex_location\")\n\n    if not vertex_credentials:\n        raise ImageProviderCredentialsError(\"Vertex credentials are required\")\n\n    if not vertex_location:\n        raise ImageProviderCredentialsError(\"Vertex location is required\")\n\n    vertex_json = json.loads(vertex_credentials)\n    vertex_project = vertex_json.get(\"project_id\")\n\n    if not vertex_project:\n        raise ImageProviderCredentialsError(\"Project ID is required\")\n\n    return VertexCredentials(\n        vertex_credentials=vertex_credentials,\n        vertex_location=vertex_location,\n        project_id=vertex_project,\n    )\n"
  },
  {
    "path": "backend/onyx/indexing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/indexing/adapters/document_indexing_adapter.py",
    "content": "import contextlib\nfrom collections.abc import Generator\n\nfrom sqlalchemy.engine.util import TransactionalContext\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.access import get_access_for_documents\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DEFAULT_BOOST\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.db.chunk import update_chunk_boost_components__no_commit\nfrom onyx.db.document import fetch_chunk_counts_for_documents\nfrom onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit\nfrom onyx.db.document import prepare_to_modify_documents\nfrom onyx.db.document import update_docs_chunk_count__no_commit\nfrom onyx.db.document import update_docs_last_modified__no_commit\nfrom onyx.db.document import update_docs_updated_at__no_commit\nfrom onyx.db.document_set import fetch_document_sets_for_documents\nfrom onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext\nfrom onyx.indexing.indexing_pipeline import index_doc_batch_prepare\nfrom onyx.indexing.models import ChunkEnrichmentContext\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexChunk\nfrom onyx.indexing.models import UpdatableChunkData\nfrom onyx.redis.redis_hierarchy import get_ancestors_from_raw_id\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass DocumentIndexingBatchAdapter:\n    \"\"\"Default adapter: handles DB prep, locking, metadata enrichment, and finalize.\n\n    Keeps orchestration logic in the pipeline and side-effects in the adapter.\n    \"\"\"\n\n    def __init__(\n        self,\n        db_session: Session,\n        connector_id: int,\n        credential_id: int,\n        tenant_id: str,\n        index_attempt_metadata: IndexAttemptMetadata,\n    ):\n        self.db_session = db_session\n        self.connector_id = connector_id\n        self.credential_id = credential_id\n        self.tenant_id = tenant_id\n        self.index_attempt_metadata = index_attempt_metadata\n\n    def prepare(\n        self, documents: list[Document], ignore_time_skip: bool\n    ) -> DocumentBatchPrepareContext | None:\n        \"\"\"Upsert docs, map CC pairs, return context or mark as indexed if no-op.\"\"\"\n        context = index_doc_batch_prepare(\n            documents=documents,\n            index_attempt_metadata=self.index_attempt_metadata,\n            db_session=self.db_session,\n            ignore_time_skip=ignore_time_skip,\n        )\n\n        if not context:\n            # even though we didn't actually index anything, we should still\n            # mark them as \"completed\" for the CC Pair in order to make the\n            # counts match\n            mark_document_as_indexed_for_cc_pair__no_commit(\n                connector_id=self.index_attempt_metadata.connector_id,\n                credential_id=self.index_attempt_metadata.credential_id,\n                document_ids=[doc.id for doc in documents],\n                db_session=self.db_session,\n            )\n            self.db_session.commit()\n\n        return context\n\n    @contextlib.contextmanager\n    def lock_context(\n        self, documents: list[Document]\n    ) -> Generator[TransactionalContext, None, None]:\n        \"\"\"Acquire transaction/row locks on docs for the critical section.\"\"\"\n        with prepare_to_modify_documents(\n            db_session=self.db_session, document_ids=[doc.id for doc in documents]\n        ) as transaction:\n            yield transaction\n\n    def prepare_enrichment(\n        self,\n        context: DocumentBatchPrepareContext,\n        tenant_id: str,\n        chunks: list[DocAwareChunk],\n    ) -> \"DocumentChunkEnricher\":\n        \"\"\"Do all DB lookups once and return a per-chunk enricher.\"\"\"\n        updatable_ids = [doc.id for doc in context.updatable_docs]\n\n        doc_id_to_new_chunk_cnt: dict[str, int] = {\n            doc_id: 0 for doc_id in updatable_ids\n        }\n        for chunk in chunks:\n            if chunk.source_document.id in doc_id_to_new_chunk_cnt:\n                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1\n\n        no_access = DocumentAccess.build(\n            user_emails=[],\n            user_groups=[],\n            external_user_emails=[],\n            external_user_group_ids=[],\n            is_public=False,\n        )\n\n        return DocumentChunkEnricher(\n            doc_id_to_access_info=get_access_for_documents(\n                document_ids=updatable_ids, db_session=self.db_session\n            ),\n            doc_id_to_document_set={\n                document_id: document_sets\n                for document_id, document_sets in fetch_document_sets_for_documents(\n                    document_ids=updatable_ids, db_session=self.db_session\n                )\n            },\n            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(\n                context.updatable_docs, tenant_id\n            ),\n            id_to_boost_map=context.id_to_boost_map,\n            doc_id_to_previous_chunk_cnt={\n                document_id: chunk_count\n                for document_id, chunk_count in fetch_chunk_counts_for_documents(\n                    document_ids=updatable_ids,\n                    db_session=self.db_session,\n                )\n            },\n            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),\n            no_access=no_access,\n            tenant_id=tenant_id,\n        )\n\n    def _get_ancestor_ids_for_documents(\n        self,\n        documents: list[Document],\n        tenant_id: str,\n    ) -> dict[str, list[int]]:\n        \"\"\"\n        Get ancestor hierarchy node IDs for a batch of documents.\n\n        Uses Redis cache for fast lookups - no DB calls are made unless\n        there's a cache miss. Documents provide parent_hierarchy_raw_node_id\n        directly from the connector.\n\n        Returns a mapping from document_id to list of ancestor node IDs.\n        \"\"\"\n        if not documents:\n            return {}\n\n        redis_client = get_redis_client(tenant_id=tenant_id)\n        result: dict[str, list[int]] = {}\n\n        for doc in documents:\n            # Use parent_hierarchy_raw_node_id directly from the document\n            # If None, get_ancestors_from_raw_id will return just the SOURCE node\n            ancestors = get_ancestors_from_raw_id(\n                redis_client=redis_client,\n                source=doc.source,\n                parent_hierarchy_raw_node_id=doc.parent_hierarchy_raw_node_id,\n                db_session=self.db_session,\n            )\n            result[doc.id] = ancestors\n\n        return result\n\n    def post_index(\n        self,\n        context: DocumentBatchPrepareContext,\n        updatable_chunk_data: list[UpdatableChunkData],\n        filtered_documents: list[Document],\n        enrichment: ChunkEnrichmentContext,\n    ) -> None:\n        \"\"\"Finalize DB updates, store plaintext, and mark docs as indexed.\"\"\"\n        updatable_ids = [doc.id for doc in context.updatable_docs]\n        last_modified_ids = []\n        ids_to_new_updated_at = {}\n        for doc in context.updatable_docs:\n            last_modified_ids.append(doc.id)\n            # doc_updated_at is the source's idea (on the other end of the connector)\n            # of when the doc was last modified\n            if doc.doc_updated_at is None:\n                continue\n            ids_to_new_updated_at[doc.id] = doc.doc_updated_at\n\n        update_docs_updated_at__no_commit(\n            ids_to_new_updated_at=ids_to_new_updated_at, db_session=self.db_session\n        )\n\n        update_docs_last_modified__no_commit(\n            document_ids=last_modified_ids, db_session=self.db_session\n        )\n\n        update_docs_chunk_count__no_commit(\n            document_ids=updatable_ids,\n            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,\n            db_session=self.db_session,\n        )\n\n        # these documents can now be counted as part of the CC Pairs\n        # document count, so we need to mark them as indexed\n        # NOTE: even documents we skipped since they were already up\n        # to date should be counted here in order to maintain parity\n        # between CC Pair and index attempt counts\n        mark_document_as_indexed_for_cc_pair__no_commit(\n            connector_id=self.index_attempt_metadata.connector_id,\n            credential_id=self.index_attempt_metadata.credential_id,\n            document_ids=[doc.id for doc in filtered_documents],\n            db_session=self.db_session,\n        )\n\n        # save the chunk boost components to postgres\n        update_chunk_boost_components__no_commit(\n            chunk_data=updatable_chunk_data, db_session=self.db_session\n        )\n\n        self.db_session.commit()\n\n\nclass DocumentChunkEnricher:\n    \"\"\"Pre-computed metadata for per-chunk enrichment of connector documents.\"\"\"\n\n    def __init__(\n        self,\n        doc_id_to_access_info: dict[str, DocumentAccess],\n        doc_id_to_document_set: dict[str, list[str]],\n        doc_id_to_ancestor_ids: dict[str, list[int]],\n        id_to_boost_map: dict[str, int],\n        doc_id_to_previous_chunk_cnt: dict[str, int],\n        doc_id_to_new_chunk_cnt: dict[str, int],\n        no_access: DocumentAccess,\n        tenant_id: str,\n    ) -> None:\n        self._doc_id_to_access_info = doc_id_to_access_info\n        self._doc_id_to_document_set = doc_id_to_document_set\n        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids\n        self._id_to_boost_map = id_to_boost_map\n        self._no_access = no_access\n        self._tenant_id = tenant_id\n        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt\n        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt\n\n    def enrich_chunk(\n        self, chunk: IndexChunk, score: float\n    ) -> DocMetadataAwareIndexChunk:\n        return DocMetadataAwareIndexChunk.from_index_chunk(\n            index_chunk=chunk,\n            access=self._doc_id_to_access_info.get(\n                chunk.source_document.id, self._no_access\n            ),\n            document_sets=set(\n                self._doc_id_to_document_set.get(chunk.source_document.id, [])\n            ),\n            user_project=[],\n            personas=[],\n            boost=(\n                self._id_to_boost_map[chunk.source_document.id]\n                if chunk.source_document.id in self._id_to_boost_map\n                else DEFAULT_BOOST\n            ),\n            tenant_id=self._tenant_id,\n            aggregated_chunk_boost_factor=score,\n            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[\n                chunk.source_document.id\n            ],\n        )\n"
  },
  {
    "path": "backend/onyx/indexing/adapters/user_file_indexing_adapter.py",
    "content": "from __future__ import annotations\n\nimport contextlib\nimport datetime\nimport time\nfrom collections import defaultdict\nfrom collections.abc import Generator\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import OperationalError\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.orm.session import TransactionalContext\n\nfrom onyx.access.access import get_access_for_user_files\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DEFAULT_BOOST\nfrom onyx.configs.constants import NotificationType\nfrom onyx.connectors.models import Document\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import Persona\nfrom onyx.db.models import UserFile\nfrom onyx.db.notification import create_notification\nfrom onyx.db.user_file import fetch_chunk_counts_for_user_files\nfrom onyx.db.user_file import fetch_persona_ids_for_user_files\nfrom onyx.db.user_file import fetch_user_project_ids_for_user_files\nfrom onyx.file_store.utils import store_user_file_plaintext\nfrom onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext\nfrom onyx.indexing.models import ChunkEnrichmentContext\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexChunk\nfrom onyx.indexing.models import UpdatableChunkData\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.natural_language_processing.utils import count_tokens\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_NUM_LOCK_ATTEMPTS = 3\nretry_delay = 0.5\n\n\ndef _acquire_user_file_locks(db_session: Session, user_file_ids: list[str]) -> bool:\n    \"\"\"Acquire locks for the specified user files.\"\"\"\n    # Convert to UUIDs for the DB comparison\n    user_file_uuid_list = [UUID(user_file_id) for user_file_id in user_file_ids]\n    stmt = (\n        select(UserFile.id)\n        .where(UserFile.id.in_(user_file_uuid_list))\n        .with_for_update(nowait=True)\n    )\n    # will raise exception if any of the documents are already locked\n    documents = db_session.scalars(stmt).all()\n\n    # make sure we found every document\n    if len(documents) != len(set(user_file_ids)):\n        logger.warning(\"Didn't find row for all specified user file IDs. Aborting.\")\n        return False\n\n    return True\n\n\nclass UserFileIndexingAdapter:\n    def __init__(self, tenant_id: str, db_session: Session):\n        self.tenant_id = tenant_id\n        self.db_session = db_session\n\n    def prepare(\n        self,\n        documents: list[Document],\n        ignore_time_skip: bool,  # noqa: ARG002\n    ) -> DocumentBatchPrepareContext:\n        return DocumentBatchPrepareContext(\n            updatable_docs=documents,\n            id_to_boost_map={},  # TODO(subash): add boost map\n        )\n\n    @contextlib.contextmanager\n    def lock_context(\n        self, documents: list[Document]\n    ) -> Generator[TransactionalContext, None, None]:\n        self.db_session.commit()  # ensure that we're not in a transaction\n        lock_acquired = False\n        for i in range(_NUM_LOCK_ATTEMPTS):\n            try:\n                with self.db_session.begin() as transaction:\n                    lock_acquired = _acquire_user_file_locks(\n                        db_session=self.db_session,\n                        user_file_ids=[doc.id for doc in documents],\n                    )\n                    if lock_acquired:\n                        yield transaction\n                        break\n            except OperationalError as e:\n                logger.warning(\n                    f\"Failed to acquire locks for user files on attempt {i}, retrying. Error: {e}\"\n                )\n\n            time.sleep(retry_delay)\n\n        if not lock_acquired:\n            raise RuntimeError(\n                f\"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}\"\n            )\n\n    def prepare_enrichment(\n        self,\n        context: DocumentBatchPrepareContext,\n        tenant_id: str,\n        chunks: list[DocAwareChunk],\n    ) -> UserFileChunkEnricher:\n        \"\"\"Do all DB lookups and pre-compute file metadata from chunks.\"\"\"\n        updatable_ids = [doc.id for doc in context.updatable_docs]\n\n        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)\n        content_by_file: dict[str, list[str]] = defaultdict(list)\n        for chunk in chunks:\n            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1\n            content_by_file[chunk.source_document.id].append(chunk.content)\n\n        no_access = DocumentAccess.build(\n            user_emails=[],\n            user_groups=[],\n            external_user_emails=[],\n            external_user_group_ids=[],\n            is_public=False,\n        )\n\n        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(\n            user_file_ids=updatable_ids,\n            db_session=self.db_session,\n        )\n        user_file_id_to_persona_ids = fetch_persona_ids_for_user_files(\n            user_file_ids=updatable_ids,\n            db_session=self.db_session,\n        )\n        user_file_id_to_access: dict[str, DocumentAccess] = get_access_for_user_files(\n            user_file_ids=updatable_ids,\n            db_session=self.db_session,\n        )\n        user_file_id_to_previous_chunk_cnt: dict[str, int] = {\n            user_file_id: chunk_count\n            for user_file_id, chunk_count in fetch_chunk_counts_for_user_files(\n                user_file_ids=updatable_ids,\n                db_session=self.db_session,\n            )\n        }\n\n        # Initialize tokenizer used for token count calculation\n        try:\n            llm = get_default_llm()\n            llm_tokenizer = get_tokenizer(\n                model_name=llm.config.model_name,\n                provider_type=llm.config.model_provider,\n            )\n        except Exception as e:\n            logger.error(f\"Error getting tokenizer: {e}\")\n            llm_tokenizer = None\n\n        user_file_id_to_raw_text: dict[str, str] = {}\n        user_file_id_to_token_count: dict[str, int | None] = {}\n        for user_file_id in updatable_ids:\n            contents = content_by_file.get(user_file_id)\n            if contents:\n                combined_content = \" \".join(contents)\n                user_file_id_to_raw_text[str(user_file_id)] = combined_content\n                token_count: int = (\n                    count_tokens(combined_content, llm_tokenizer)\n                    if llm_tokenizer\n                    else 0\n                )\n                user_file_id_to_token_count[str(user_file_id)] = token_count\n            else:\n                user_file_id_to_raw_text[str(user_file_id)] = \"\"\n                user_file_id_to_token_count[str(user_file_id)] = None\n\n        return UserFileChunkEnricher(\n            user_file_id_to_access=user_file_id_to_access,\n            user_file_id_to_project_ids=user_file_id_to_project_ids,\n            user_file_id_to_persona_ids=user_file_id_to_persona_ids,\n            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,\n            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),\n            user_file_id_to_raw_text=user_file_id_to_raw_text,\n            user_file_id_to_token_count=user_file_id_to_token_count,\n            no_access=no_access,\n            tenant_id=tenant_id,\n        )\n\n    def _notify_assistant_owners_if_files_ready(\n        self, user_files: list[UserFile]\n    ) -> None:\n        \"\"\"\n        Check if all files for associated assistants are processed and notify owners.\n        Only sends notification when all files for an assistant are COMPLETED.\n        \"\"\"\n        for user_file in user_files:\n            if user_file.status == UserFileStatus.COMPLETED:\n                for assistant in user_file.assistants:\n                    # Skip assistants without owners\n                    if assistant.user_id is None:\n                        continue\n\n                    # Check if all OTHER files for this assistant are completed\n                    # (we already know current file is completed from the outer check)\n                    all_files_completed = all(\n                        f.status == UserFileStatus.COMPLETED\n                        for f in assistant.user_files\n                        if f.id != user_file.id\n                    )\n\n                    if all_files_completed:\n                        create_notification(\n                            user_id=assistant.user_id,\n                            notif_type=NotificationType.ASSISTANT_FILES_READY,\n                            db_session=self.db_session,\n                            title=\"Your files are ready!\",\n                            description=f\"All files for agent {assistant.name} have been processed and are now available.\",\n                            additional_data={\n                                \"persona_id\": assistant.id,\n                                \"link\": f\"/assistants/{assistant.id}\",\n                            },\n                            autocommit=False,\n                        )\n\n    def post_index(\n        self,\n        context: DocumentBatchPrepareContext,\n        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002\n        filtered_documents: list[Document],  # noqa: ARG002\n        enrichment: ChunkEnrichmentContext,\n    ) -> None:\n        assert isinstance(enrichment, UserFileChunkEnricher)\n        user_file_ids = [doc.id for doc in context.updatable_docs]\n\n        user_files = (\n            self.db_session.query(UserFile)\n            .options(selectinload(UserFile.assistants).selectinload(Persona.user_files))\n            .filter(UserFile.id.in_(user_file_ids))\n            .all()\n        )\n        for user_file in user_files:\n            # don't update the status if the user file is being deleted\n            if user_file.status != UserFileStatus.DELETING:\n                user_file.status = UserFileStatus.COMPLETED\n            user_file.last_project_sync_at = datetime.datetime.now(\n                datetime.timezone.utc\n            )\n            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(\n                str(user_file.id), 0\n            )\n            user_file.token_count = enrichment.user_file_id_to_token_count[\n                str(user_file.id)\n            ]\n\n        # Notify assistant owners if all their files are now processed\n        self._notify_assistant_owners_if_files_ready(user_files)\n\n        self.db_session.commit()\n\n        # Store the plaintext in the file store for faster retrieval\n        # NOTE: this creates its own session to avoid committing the overall\n        # transaction.\n        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():\n            store_user_file_plaintext(\n                user_file_id=UUID(user_file_id),\n                plaintext_content=raw_text,\n            )\n\n\nclass UserFileChunkEnricher:\n    \"\"\"Pre-computed metadata for per-chunk enrichment of user-uploaded files.\"\"\"\n\n    def __init__(\n        self,\n        user_file_id_to_access: dict[str, DocumentAccess],\n        user_file_id_to_project_ids: dict[str, list[int]],\n        user_file_id_to_persona_ids: dict[str, list[int]],\n        doc_id_to_previous_chunk_cnt: dict[str, int],\n        doc_id_to_new_chunk_cnt: dict[str, int],\n        user_file_id_to_raw_text: dict[str, str],\n        user_file_id_to_token_count: dict[str, int | None],\n        no_access: DocumentAccess,\n        tenant_id: str,\n    ) -> None:\n        self._user_file_id_to_access = user_file_id_to_access\n        self._user_file_id_to_project_ids = user_file_id_to_project_ids\n        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids\n        self._no_access = no_access\n        self._tenant_id = tenant_id\n        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt\n        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt\n        self.user_file_id_to_raw_text = user_file_id_to_raw_text\n        self.user_file_id_to_token_count = user_file_id_to_token_count\n\n    def enrich_chunk(\n        self, chunk: IndexChunk, score: float\n    ) -> DocMetadataAwareIndexChunk:\n        return DocMetadataAwareIndexChunk.from_index_chunk(\n            index_chunk=chunk,\n            access=self._user_file_id_to_access.get(\n                chunk.source_document.id, self._no_access\n            ),\n            document_sets=set(),\n            user_project=self._user_file_id_to_project_ids.get(\n                chunk.source_document.id, []\n            ),\n            personas=self._user_file_id_to_persona_ids.get(\n                chunk.source_document.id, []\n            ),\n            boost=DEFAULT_BOOST,\n            tenant_id=self._tenant_id,\n            aggregated_chunk_boost_factor=score,\n        )\n"
  },
  {
    "path": "backend/onyx/indexing/chunk_batch_store.py",
    "content": "import pickle\nimport shutil\nimport tempfile\nfrom collections.abc import Iterator\nfrom pathlib import Path\n\nfrom onyx.indexing.models import IndexChunk\n\n\nclass ChunkBatchStore:\n    \"\"\"Manages serialization of embedded chunks to a temporary directory.\n\n    Owns the temp directory lifetime and provides save/load/stream/scrub\n    operations.\n\n    Use as a context manager to ensure cleanup::\n\n        with ChunkBatchStore() as store:\n            store.save(chunks, batch_idx=0)\n            for chunk in store.stream():\n                ...\n    \"\"\"\n\n    _EXT = \".pkl\"\n\n    def __init__(self) -> None:\n        self._tmpdir: Path | None = None\n\n    # -- context manager -----------------------------------------------------\n\n    def __enter__(self) -> \"ChunkBatchStore\":\n        self._tmpdir = Path(tempfile.mkdtemp(prefix=\"onyx_embeddings_\"))\n        return self\n\n    def __exit__(self, *_exc: object) -> None:\n        if self._tmpdir is not None:\n            shutil.rmtree(self._tmpdir, ignore_errors=True)\n            self._tmpdir = None\n\n    @property\n    def _dir(self) -> Path:\n        assert self._tmpdir is not None, \"ChunkBatchStore used outside context manager\"\n        return self._tmpdir\n\n    # -- storage primitives --------------------------------------------------\n\n    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:\n        \"\"\"Serialize a batch of embedded chunks to disk.\"\"\"\n        with open(self._dir / f\"batch_{batch_idx}{self._EXT}\", \"wb\") as f:\n            pickle.dump(chunks, f)\n\n    def _load(self, batch_file: Path) -> list[IndexChunk]:\n        \"\"\"Deserialize a batch of embedded chunks from a file.\"\"\"\n        with open(batch_file, \"rb\") as f:\n            return pickle.load(f)\n\n    def _batch_files(self) -> list[Path]:\n        \"\"\"Return batch files sorted by numeric index.\"\"\"\n        return sorted(\n            self._dir.glob(f\"batch_*{self._EXT}\"),\n            key=lambda p: int(p.stem.removeprefix(\"batch_\")),\n        )\n\n    # -- higher-level operations ---------------------------------------------\n\n    def stream(self) -> Iterator[IndexChunk]:\n        \"\"\"Yield all chunks across all batch files.\n\n        Each call returns a fresh generator, so the data can be iterated\n        multiple times (e.g. once per document index).\n        \"\"\"\n        for batch_file in self._batch_files():\n            yield from self._load(batch_file)\n\n    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:\n        \"\"\"Remove chunks belonging to *failed_doc_ids* from all batch files.\n\n        When a document fails embedding in batch N, earlier batches may\n        already contain successfully embedded chunks for that document.\n        This ensures the output is all-or-nothing per document.\n        \"\"\"\n        for batch_file in self._batch_files():\n            batch_chunks = self._load(batch_file)\n            cleaned = [\n                c for c in batch_chunks if c.source_document.id not in failed_doc_ids\n            ]\n            if len(cleaned) != len(batch_chunks):\n                with open(batch_file, \"wb\") as f:\n                    pickle.dump(cleaned, f)\n"
  },
  {
    "path": "backend/onyx/indexing/chunker.py",
    "content": "from typing import cast\n\nfrom chonkie import SentenceChunker\n\nfrom onyx.configs.app_configs import AVERAGE_SUMMARY_EMBEDDINGS\nfrom onyx.configs.app_configs import BLURB_SIZE\nfrom onyx.configs.app_configs import LARGE_CHUNK_RATIO\nfrom onyx.configs.app_configs import MINI_CHUNK_SIZE\nfrom onyx.configs.app_configs import SKIP_METADATA_IN_CHUNK\nfrom onyx.configs.app_configs import USE_CHUNK_SUMMARY\nfrom onyx.configs.app_configs import USE_DOCUMENT_SUMMARY\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import RETURN_SEPARATOR\nfrom onyx.configs.constants import SECTION_SEPARATOR\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_metadata_keys_to_ignore,\n)\nfrom onyx.connectors.models import IndexingDocument\nfrom onyx.connectors.models import Section\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.llm.utils import MAX_CONTEXT_TOKENS\nfrom onyx.natural_language_processing.utils import BaseTokenizer\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import clean_text\nfrom onyx.utils.text_processing import shared_precompare_cleanup\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\nfrom shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT\n\n# Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps\n# actually help quality at all\nCHUNK_OVERLAP = 0\n# Fairly arbitrary numbers but the general concept is we don't want the title/metadata to\n# overwhelm the actual contents of the chunk\nMAX_METADATA_PERCENTAGE = 0.25\nCHUNK_MIN_CONTENT = 256\n\nlogger = setup_logger()\n\n\ndef _get_metadata_suffix_for_document_index(\n    metadata: dict[str, str | list[str]], include_separator: bool = False\n) -> tuple[str, str]:\n    \"\"\"\n    Returns the metadata as a natural language string representation with all of the keys and values\n    for the vector embedding and a string of all of the values for the keyword search.\n    \"\"\"\n    if not metadata:\n        return \"\", \"\"\n\n    metadata_str = \"Metadata:\\n\"\n    metadata_values = []\n    for key, value in metadata.items():\n        if key in get_metadata_keys_to_ignore():\n            continue\n\n        value_str = \", \".join(value) if isinstance(value, list) else value\n\n        if isinstance(value, list):\n            metadata_values.extend(value)\n        else:\n            metadata_values.append(value)\n\n        metadata_str += f\"\\t{key} - {value_str}\\n\"\n\n    metadata_semantic = metadata_str.strip()\n    metadata_keyword = \" \".join(metadata_values)\n\n    if include_separator:\n        return RETURN_SEPARATOR + metadata_semantic, RETURN_SEPARATOR + metadata_keyword\n    return metadata_semantic, metadata_keyword\n\n\ndef _combine_chunks(chunks: list[DocAwareChunk], large_chunk_id: int) -> DocAwareChunk:\n    \"\"\"\n    Combines multiple DocAwareChunks into one large chunk (for \"multipass\" mode),\n    appending the content and adjusting source_links accordingly.\n    \"\"\"\n    merged_chunk = DocAwareChunk(\n        source_document=chunks[0].source_document,\n        chunk_id=chunks[0].chunk_id,\n        blurb=chunks[0].blurb,\n        content=chunks[0].content,\n        source_links=chunks[0].source_links or {},\n        image_file_id=None,\n        section_continuation=(chunks[0].chunk_id > 0),\n        title_prefix=chunks[0].title_prefix,\n        metadata_suffix_semantic=chunks[0].metadata_suffix_semantic,\n        metadata_suffix_keyword=chunks[0].metadata_suffix_keyword,\n        large_chunk_reference_ids=[chunk.chunk_id for chunk in chunks],\n        mini_chunk_texts=None,\n        large_chunk_id=large_chunk_id,\n        chunk_context=\"\",\n        doc_summary=\"\",\n        contextual_rag_reserved_tokens=0,\n    )\n\n    offset = 0\n    for i in range(1, len(chunks)):\n        merged_chunk.content += SECTION_SEPARATOR + chunks[i].content\n\n        offset += len(SECTION_SEPARATOR) + len(chunks[i - 1].content)\n        for link_offset, link_text in (chunks[i].source_links or {}).items():\n            if merged_chunk.source_links is None:\n                merged_chunk.source_links = {}\n            merged_chunk.source_links[link_offset + offset] = link_text\n\n    return merged_chunk\n\n\ndef generate_large_chunks(chunks: list[DocAwareChunk]) -> list[DocAwareChunk]:\n    \"\"\"\n    Generates larger \"grouped\" chunks by combining sets of smaller chunks.\n    \"\"\"\n    large_chunks = []\n    for idx, i in enumerate(range(0, len(chunks), LARGE_CHUNK_RATIO)):\n        chunk_group = chunks[i : i + LARGE_CHUNK_RATIO]\n        if len(chunk_group) > 1:\n            large_chunk = _combine_chunks(chunk_group, idx)\n            large_chunks.append(large_chunk)\n    return large_chunks\n\n\nclass Chunker:\n    \"\"\"\n    Chunks documents into smaller chunks for indexing.\n    \"\"\"\n\n    def __init__(\n        self,\n        tokenizer: BaseTokenizer,\n        enable_multipass: bool = False,\n        enable_large_chunks: bool = False,\n        enable_contextual_rag: bool = False,\n        blurb_size: int = BLURB_SIZE,\n        include_metadata: bool = not SKIP_METADATA_IN_CHUNK,\n        chunk_token_limit: int = DOC_EMBEDDING_CONTEXT_SIZE,\n        chunk_overlap: int = CHUNK_OVERLAP,\n        mini_chunk_size: int = MINI_CHUNK_SIZE,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> None:\n        self.include_metadata = include_metadata\n        self.chunk_token_limit = chunk_token_limit\n        self.enable_multipass = enable_multipass\n        self.enable_large_chunks = enable_large_chunks\n        self.enable_contextual_rag = enable_contextual_rag\n        if enable_contextual_rag:\n            assert (\n                USE_CHUNK_SUMMARY or USE_DOCUMENT_SUMMARY\n            ), \"Contextual RAG requires at least one of chunk summary and document summary enabled\"\n        self.default_contextual_rag_reserved_tokens = MAX_CONTEXT_TOKENS * (\n            int(USE_CHUNK_SUMMARY) + int(USE_DOCUMENT_SUMMARY)\n        )\n        self.tokenizer = tokenizer\n        self.callback = callback\n\n        self.max_context = 0\n        self.prompt_tokens = 0\n\n        # Create a token counter function that returns the count instead of the tokens\n        def token_counter(text: str) -> int:\n            return len(tokenizer.encode(text))\n\n        self.blurb_splitter = SentenceChunker(\n            tokenizer_or_token_counter=token_counter,\n            chunk_size=blurb_size,\n            chunk_overlap=0,\n            return_type=\"texts\",\n        )\n\n        self.chunk_splitter = SentenceChunker(\n            tokenizer_or_token_counter=token_counter,\n            chunk_size=chunk_token_limit,\n            chunk_overlap=chunk_overlap,\n            return_type=\"texts\",\n        )\n\n        self.mini_chunk_splitter = (\n            SentenceChunker(\n                tokenizer_or_token_counter=token_counter,\n                chunk_size=mini_chunk_size,\n                chunk_overlap=0,\n                return_type=\"texts\",\n            )\n            if enable_multipass\n            else None\n        )\n\n    def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:\n        \"\"\"\n        Splits the text into smaller chunks based on token count to ensure\n        no chunk exceeds the content_token_limit.\n        \"\"\"\n        tokens = self.tokenizer.tokenize(text)\n        chunks = []\n        start = 0\n        total_tokens = len(tokens)\n        while start < total_tokens:\n            end = min(start + content_token_limit, total_tokens)\n            token_chunk = tokens[start:end]\n            chunk_text = \" \".join(token_chunk)\n            chunks.append(chunk_text)\n            start = end\n        return chunks\n\n    def _extract_blurb(self, text: str) -> str:\n        \"\"\"\n        Extract a short blurb from the text (first chunk of size `blurb_size`).\n        \"\"\"\n        # chunker is in `text` mode\n        texts = cast(list[str], self.blurb_splitter.chunk(text))\n        if not texts:\n            return \"\"\n        return texts[0]\n\n    def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:\n        \"\"\"\n        For \"multipass\" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.\n        \"\"\"\n        if self.mini_chunk_splitter and chunk_text.strip():\n            # chunker is in `text` mode\n            return cast(list[str], self.mini_chunk_splitter.chunk(chunk_text))\n        return None\n\n    # ADDED: extra param image_url to store in the chunk\n    def _create_chunk(\n        self,\n        document: IndexingDocument,\n        chunks_list: list[DocAwareChunk],\n        text: str,\n        links: dict[int, str],\n        is_continuation: bool = False,\n        title_prefix: str = \"\",\n        metadata_suffix_semantic: str = \"\",\n        metadata_suffix_keyword: str = \"\",\n        image_file_id: str | None = None,\n    ) -> None:\n        \"\"\"\n        Helper to create a new DocAwareChunk, append it to chunks_list.\n        \"\"\"\n        new_chunk = DocAwareChunk(\n            source_document=document,\n            chunk_id=len(chunks_list),\n            blurb=self._extract_blurb(text),\n            content=text,\n            source_links=links or {0: \"\"},\n            image_file_id=image_file_id,\n            section_continuation=is_continuation,\n            title_prefix=title_prefix,\n            metadata_suffix_semantic=metadata_suffix_semantic,\n            metadata_suffix_keyword=metadata_suffix_keyword,\n            mini_chunk_texts=self._get_mini_chunk_texts(text),\n            large_chunk_id=None,\n            doc_summary=\"\",\n            chunk_context=\"\",\n            contextual_rag_reserved_tokens=0,  # set per-document in _handle_single_document\n        )\n        chunks_list.append(new_chunk)\n\n    def _chunk_document_with_sections(\n        self,\n        document: IndexingDocument,\n        sections: list[Section],\n        title_prefix: str,\n        metadata_suffix_semantic: str,\n        metadata_suffix_keyword: str,\n        content_token_limit: int,\n    ) -> list[DocAwareChunk]:\n        \"\"\"\n        Loops through sections of the document, converting them into one or more chunks.\n        Works with processed sections that are base Section objects.\n        \"\"\"\n        chunks: list[DocAwareChunk] = []\n        link_offsets: dict[int, str] = {}\n        chunk_text = \"\"\n\n        for section_idx, section in enumerate(sections):\n            # Get section text and other attributes\n            section_text = clean_text(str(section.text or \"\"))\n            section_link_text = section.link or \"\"\n            image_url = section.image_file_id\n\n            # If there is no useful content, skip\n            if not section_text and (not document.title or section_idx > 0):\n                logger.warning(\n                    f\"Skipping empty or irrelevant section in doc {document.semantic_identifier}, link={section_link_text}\"\n                )\n                continue\n\n            # CASE 1: If this section has an image, force a separate chunk\n            if image_url:\n                # First, if we have any partially built text chunk, finalize it\n                if chunk_text.strip():\n                    self._create_chunk(\n                        document,\n                        chunks,\n                        chunk_text,\n                        link_offsets,\n                        is_continuation=False,\n                        title_prefix=title_prefix,\n                        metadata_suffix_semantic=metadata_suffix_semantic,\n                        metadata_suffix_keyword=metadata_suffix_keyword,\n                    )\n                    chunk_text = \"\"\n                    link_offsets = {}\n\n                # Create a chunk specifically for this image section\n                # (Using the text summary that was generated during processing)\n                self._create_chunk(\n                    document,\n                    chunks,\n                    section_text,\n                    links={0: section_link_text} if section_link_text else {},\n                    image_file_id=image_url,\n                    title_prefix=title_prefix,\n                    metadata_suffix_semantic=metadata_suffix_semantic,\n                    metadata_suffix_keyword=metadata_suffix_keyword,\n                )\n                # Continue to next section\n                continue\n\n            # CASE 2: Normal text section\n            section_token_count = len(self.tokenizer.encode(section_text))\n\n            # If the section is large on its own, split it separately\n            if section_token_count > content_token_limit:\n                if chunk_text.strip():\n                    self._create_chunk(\n                        document,\n                        chunks,\n                        chunk_text,\n                        link_offsets,\n                        False,\n                        title_prefix,\n                        metadata_suffix_semantic,\n                        metadata_suffix_keyword,\n                    )\n                    chunk_text = \"\"\n                    link_offsets = {}\n\n                # chunker is in `text` mode\n                split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))\n                for i, split_text in enumerate(split_texts):\n                    # If even the split_text is bigger than strict limit, further split\n                    if (\n                        STRICT_CHUNK_TOKEN_LIMIT\n                        and len(self.tokenizer.encode(split_text)) > content_token_limit\n                    ):\n                        smaller_chunks = self._split_oversized_chunk(\n                            split_text, content_token_limit\n                        )\n                        for j, small_chunk in enumerate(smaller_chunks):\n                            self._create_chunk(\n                                document,\n                                chunks,\n                                small_chunk,\n                                {0: section_link_text},\n                                is_continuation=(j != 0),\n                                title_prefix=title_prefix,\n                                metadata_suffix_semantic=metadata_suffix_semantic,\n                                metadata_suffix_keyword=metadata_suffix_keyword,\n                            )\n                    else:\n                        self._create_chunk(\n                            document,\n                            chunks,\n                            split_text,\n                            {0: section_link_text},\n                            is_continuation=(i != 0),\n                            title_prefix=title_prefix,\n                            metadata_suffix_semantic=metadata_suffix_semantic,\n                            metadata_suffix_keyword=metadata_suffix_keyword,\n                        )\n                continue\n\n            # If we can still fit this section into the current chunk, do so\n            current_token_count = len(self.tokenizer.encode(chunk_text))\n            current_offset = len(shared_precompare_cleanup(chunk_text))\n            next_section_tokens = (\n                len(self.tokenizer.encode(SECTION_SEPARATOR)) + section_token_count\n            )\n\n            if next_section_tokens + current_token_count <= content_token_limit:\n                if chunk_text:\n                    chunk_text += SECTION_SEPARATOR\n                chunk_text += section_text\n                link_offsets[current_offset] = section_link_text\n            else:\n                # finalize the existing chunk\n                self._create_chunk(\n                    document,\n                    chunks,\n                    chunk_text,\n                    link_offsets,\n                    False,\n                    title_prefix,\n                    metadata_suffix_semantic,\n                    metadata_suffix_keyword,\n                )\n                # start a new chunk\n                link_offsets = {0: section_link_text}\n                chunk_text = section_text\n\n        # finalize any leftover text chunk\n        if chunk_text.strip() or not chunks:\n            self._create_chunk(\n                document,\n                chunks,\n                chunk_text,\n                link_offsets or {0: \"\"},  # safe default\n                False,\n                title_prefix,\n                metadata_suffix_semantic,\n                metadata_suffix_keyword,\n            )\n        return chunks\n\n    def _handle_single_document(\n        self, document: IndexingDocument\n    ) -> list[DocAwareChunk]:\n        # Specifically for reproducing an issue with gmail\n        if document.source == DocumentSource.GMAIL:\n            logger.debug(f\"Chunking {document.semantic_identifier}\")\n\n        # Title prep\n        title = self._extract_blurb(document.get_title_for_document_index() or \"\")\n        title_prefix = title + RETURN_SEPARATOR if title else \"\"\n        title_tokens = len(self.tokenizer.encode(title_prefix))\n\n        # Metadata prep\n        metadata_suffix_semantic = \"\"\n        metadata_suffix_keyword = \"\"\n        metadata_tokens = 0\n        if self.include_metadata:\n            (\n                metadata_suffix_semantic,\n                metadata_suffix_keyword,\n            ) = _get_metadata_suffix_for_document_index(\n                document.metadata, include_separator=True\n            )\n            metadata_tokens = len(self.tokenizer.encode(metadata_suffix_semantic))\n\n        # If metadata is too large, skip it in the semantic content\n        if metadata_tokens >= self.chunk_token_limit * MAX_METADATA_PERCENTAGE:\n            metadata_suffix_semantic = \"\"\n            metadata_tokens = 0\n\n        single_chunk_fits = True\n        doc_token_count = 0\n        if self.enable_contextual_rag:\n            doc_content = document.get_text_content()\n            tokenized_doc = self.tokenizer.tokenize(doc_content)\n            doc_token_count = len(tokenized_doc)\n\n            # check if doc + title + metadata fits in a single chunk. If so, no need for contextual RAG\n            single_chunk_fits = (\n                doc_token_count + title_tokens + metadata_tokens\n                <= self.chunk_token_limit\n            )\n\n        # expand the size of the context used for contextual rag based on whether chunk context and doc summary are used\n        context_size = 0\n        if (\n            self.enable_contextual_rag\n            and not single_chunk_fits\n            and not AVERAGE_SUMMARY_EMBEDDINGS\n        ):\n            context_size += self.default_contextual_rag_reserved_tokens\n\n        # Adjust content token limit to accommodate title + metadata\n        content_token_limit = (\n            self.chunk_token_limit - title_tokens - metadata_tokens - context_size\n        )\n\n        # first check: if there is not enough actual chunk content when including contextual rag,\n        # then don't do contextual rag\n        if content_token_limit <= CHUNK_MIN_CONTENT:\n            context_size = 0  # Don't do contextual RAG\n            # revert to previous content token limit\n            content_token_limit = (\n                self.chunk_token_limit - title_tokens - metadata_tokens\n            )\n\n        # If there is not enough context remaining then just index the chunk with no prefix/suffix\n        if content_token_limit <= CHUNK_MIN_CONTENT:\n            # Not enough space left, so revert to full chunk without the prefix\n            content_token_limit = self.chunk_token_limit\n            title_prefix = \"\"\n            metadata_suffix_semantic = \"\"\n\n        # Use processed_sections if available (IndexingDocument), otherwise use original sections\n        sections_to_chunk = document.processed_sections\n\n        normal_chunks = self._chunk_document_with_sections(\n            document,\n            sections_to_chunk,\n            title_prefix,\n            metadata_suffix_semantic,\n            metadata_suffix_keyword,\n            content_token_limit,\n        )\n\n        # Optional \"multipass\" large chunk creation\n        if self.enable_multipass and self.enable_large_chunks:\n            large_chunks = generate_large_chunks(normal_chunks)\n            normal_chunks.extend(large_chunks)\n\n        for chunk in normal_chunks:\n            chunk.contextual_rag_reserved_tokens = context_size\n\n        return normal_chunks\n\n    def chunk(self, documents: list[IndexingDocument]) -> list[DocAwareChunk]:\n        \"\"\"\n        Takes in a list of documents and chunks them into smaller chunks for indexing\n        while persisting the document metadata.\n\n        Works with both standard Document objects and IndexingDocument objects with processed_sections.\n        \"\"\"\n        final_chunks: list[DocAwareChunk] = []\n        for document in documents:\n            if self.callback and self.callback.should_stop():\n                raise RuntimeError(\"Chunker.chunk: Stop signal detected\")\n\n            chunks = self._handle_single_document(document)\n            final_chunks.extend(chunks)\n\n            if self.callback:\n                self.callback.progress(\"Chunker.chunk\", len(chunks))\n\n        return final_chunks\n"
  },
  {
    "path": "backend/onyx/indexing/content_classification.py",
    "content": ""
  },
  {
    "path": "backend/onyx/indexing/embedder.py",
    "content": "import time\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom collections import defaultdict\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorStopSignal\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.db.models import SearchSettings\nfrom onyx.document_index.chunk_content_enrichment import (\n    generate_enriched_content_for_chunk_embedding,\n)\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import IndexChunk\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.pydantic_util import shallow_model_dump\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_HOST\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_PORT\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.model_server_models import Embedding\n\n\nlogger = setup_logger()\n\n\nclass IndexingEmbedder(ABC):\n    \"\"\"Converts chunks into chunks with embeddings. Note that one chunk may have\n    multiple embeddings associated with it.\"\"\"\n\n    def __init__(\n        self,\n        model_name: str,\n        normalize: bool,\n        query_prefix: str | None,\n        passage_prefix: str | None,\n        provider_type: EmbeddingProvider | None,\n        api_key: str | None,\n        api_url: str | None,\n        api_version: str | None,\n        deployment_name: str | None,\n        reduced_dimension: int | None,\n        callback: IndexingHeartbeatInterface | None,\n    ):\n        self.model_name = model_name\n        self.normalize = normalize\n        self.query_prefix = query_prefix\n        self.passage_prefix = passage_prefix\n        self.provider_type = provider_type\n        self.api_key = api_key\n        self.api_url = api_url\n        self.api_version = api_version\n        self.deployment_name = deployment_name\n\n        self.embedding_model = EmbeddingModel(\n            model_name=model_name,\n            query_prefix=query_prefix,\n            passage_prefix=passage_prefix,\n            normalize=normalize,\n            api_key=api_key,\n            provider_type=provider_type,\n            api_url=api_url,\n            api_version=api_version,\n            deployment_name=deployment_name,\n            reduced_dimension=reduced_dimension,\n            # The below are globally set, this flow always uses the indexing one\n            server_host=INDEXING_MODEL_SERVER_HOST,\n            server_port=INDEXING_MODEL_SERVER_PORT,\n            retrim_content=True,\n            callback=callback,\n        )\n\n    @abstractmethod\n    def embed_chunks(\n        self,\n        chunks: list[DocAwareChunk],\n        tenant_id: str | None = None,\n        request_id: str | None = None,\n    ) -> list[IndexChunk]:\n        raise NotImplementedError\n\n\nclass DefaultIndexingEmbedder(IndexingEmbedder):\n    def __init__(\n        self,\n        model_name: str,\n        normalize: bool,\n        query_prefix: str | None,\n        passage_prefix: str | None,\n        provider_type: EmbeddingProvider | None = None,\n        api_key: str | None = None,\n        api_url: str | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n        reduced_dimension: int | None = None,\n        callback: IndexingHeartbeatInterface | None = None,\n    ):\n        super().__init__(\n            model_name,\n            normalize,\n            query_prefix,\n            passage_prefix,\n            provider_type,\n            api_key,\n            api_url,\n            api_version,\n            deployment_name,\n            reduced_dimension,\n            callback,\n        )\n\n    @log_function_time()\n    def embed_chunks(\n        self,\n        chunks: list[DocAwareChunk],\n        tenant_id: str | None = None,\n        request_id: str | None = None,\n    ) -> list[IndexChunk]:\n        \"\"\"Adds embeddings to the chunks, the title and metadata suffixes are added to the chunk as well\n        if they exist. If there is no space for it, it would have been thrown out at the chunking step.\n        \"\"\"\n        # All chunks at this point must have some non-empty content\n        flat_chunk_texts: list[str] = []\n        large_chunks_present = False\n        for chunk in chunks:\n            if chunk.large_chunk_reference_ids:\n                large_chunks_present = True\n            chunk_text = (\n                generate_enriched_content_for_chunk_embedding(chunk)\n            ) or chunk.source_document.get_title_for_document_index()\n\n            if not chunk_text:\n                # This should never happen, the document would have been dropped\n                # before getting to this point\n                raise ValueError(f\"Chunk has no content: {chunk.to_short_descriptor()}\")\n\n            flat_chunk_texts.append(chunk_text)\n\n            if chunk.mini_chunk_texts:\n                if chunk.large_chunk_reference_ids:\n                    # A large chunk does not contain mini chunks, if it matches the large chunk\n                    # with a high score, then mini chunks would not be used anyway\n                    # otherwise it should match the normal chunk\n                    raise RuntimeError(\"Large chunk contains mini chunks\")\n                flat_chunk_texts.extend(chunk.mini_chunk_texts)\n\n        embeddings = self.embedding_model.encode(\n            texts=flat_chunk_texts,\n            text_type=EmbedTextType.PASSAGE,\n            large_chunks_present=large_chunks_present,\n            tenant_id=tenant_id,\n            request_id=request_id,\n        )\n\n        chunk_titles = {\n            chunk.source_document.get_title_for_document_index() for chunk in chunks\n        }\n\n        # Drop any None or empty strings\n        # If there is no title or the title is empty, the title embedding field will be null\n        # which is ok, it just won't contribute at all to the scoring.\n        chunk_titles_list = [title for title in chunk_titles if title]\n\n        # Cache the Title embeddings to only have to do it once\n        title_embed_dict: dict[str, Embedding] = {}\n        if chunk_titles_list:\n            title_embeddings = self.embedding_model.encode(\n                chunk_titles_list,\n                text_type=EmbedTextType.PASSAGE,\n                tenant_id=tenant_id,\n                request_id=request_id,\n            )\n            title_embed_dict.update(\n                {\n                    title: vector\n                    for title, vector in zip(chunk_titles_list, title_embeddings)\n                }\n            )\n\n        # Mapping embeddings to chunks\n        embedded_chunks: list[IndexChunk] = []\n        embedding_ind_start = 0\n        for chunk in chunks:\n            num_embeddings = 1 + (\n                len(chunk.mini_chunk_texts) if chunk.mini_chunk_texts else 0\n            )\n            chunk_embeddings = embeddings[\n                embedding_ind_start : embedding_ind_start + num_embeddings\n            ]\n\n            title = chunk.source_document.get_title_for_document_index()\n\n            title_embedding = None\n            if title:\n                if title in title_embed_dict:\n                    # Using cached value to avoid recalculating for every chunk\n                    title_embedding = title_embed_dict[title]\n                else:\n                    logger.error(\n                        \"Title had to be embedded separately, this should not happen!\"\n                    )\n                    title_embedding = self.embedding_model.encode(\n                        [title],\n                        text_type=EmbedTextType.PASSAGE,\n                        tenant_id=tenant_id,\n                        request_id=request_id,\n                    )[0]\n                    title_embed_dict[title] = title_embedding\n\n            new_embedded_chunk = IndexChunk.model_construct(\n                **shallow_model_dump(chunk),\n                embeddings=ChunkEmbedding(\n                    full_embedding=chunk_embeddings[0],\n                    mini_chunk_embeddings=chunk_embeddings[1:],\n                ),\n                title_embedding=title_embedding,\n            )\n            embedded_chunks.append(new_embedded_chunk)\n            embedding_ind_start += num_embeddings\n\n        return embedded_chunks\n\n    @classmethod\n    def from_db_search_settings(\n        cls,\n        search_settings: SearchSettings,\n        callback: IndexingHeartbeatInterface | None = None,\n    ) -> \"DefaultIndexingEmbedder\":\n        return cls(\n            model_name=search_settings.model_name,\n            normalize=search_settings.normalize,\n            query_prefix=search_settings.query_prefix,\n            passage_prefix=search_settings.passage_prefix,\n            provider_type=search_settings.provider_type,\n            api_key=search_settings.api_key,\n            api_url=search_settings.api_url,\n            api_version=search_settings.api_version,\n            deployment_name=search_settings.deployment_name,\n            reduced_dimension=search_settings.reduced_dimension,\n            callback=callback,\n        )\n\n\ndef embed_chunks_with_failure_handling(\n    chunks: list[DocAwareChunk],\n    embedder: IndexingEmbedder,\n    tenant_id: str | None = None,\n    request_id: str | None = None,\n) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n    \"\"\"Tries to embed all chunks in one large batch. If that batch fails for any reason,\n    goes document by document to isolate the failure(s).\n    \"\"\"\n\n    # TODO(rkuo): this doesn't disambiguate calls to the model server on retries.\n    # Improve this if needed.\n\n    # First try to embed all chunks in one batch\n    try:\n        return (\n            embedder.embed_chunks(\n                chunks=chunks, tenant_id=tenant_id, request_id=request_id\n            ),\n            [],\n        )\n    except ConnectorStopSignal as e:\n        logger.warning(\n            \"Connector stop signal detected in embed_chunks_with_failure_handling\"\n        )\n        raise e\n    except Exception:\n        logger.exception(\"Failed to embed chunk batch. Trying individual docs.\")\n        # wait a couple seconds to let any rate limits or temporary issues resolve\n        time.sleep(2)\n\n    # Try embedding each document's chunks individually\n    chunks_by_doc: dict[str, list[DocAwareChunk]] = defaultdict(list)\n    for chunk in chunks:\n        chunks_by_doc[chunk.source_document.id].append(chunk)\n\n    embedded_chunks: list[IndexChunk] = []\n    failures: list[ConnectorFailure] = []\n\n    for doc_id, chunks_for_doc in chunks_by_doc.items():\n        try:\n            doc_embedded_chunks = embedder.embed_chunks(\n                chunks=chunks_for_doc, tenant_id=tenant_id, request_id=request_id\n            )\n            embedded_chunks.extend(doc_embedded_chunks)\n        except Exception as e:\n            logger.exception(f\"Failed to embed chunks for document '{doc_id}'\")\n            failures.append(\n                ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=doc_id,\n                        document_link=(\n                            chunks_for_doc[0].get_link() if chunks_for_doc else None\n                        ),\n                    ),\n                    failure_message=str(e),\n                    exception=e,\n                )\n            )\n\n    return embedded_chunks, failures\n"
  },
  {
    "path": "backend/onyx/indexing/indexing_heartbeat.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\n\n\nclass IndexingHeartbeatInterface(ABC):\n    \"\"\"Defines a callback interface to be passed to\n    to run_indexing_entrypoint.\"\"\"\n\n    @abstractmethod\n    def should_stop(self) -> bool:\n        \"\"\"Signal to stop the looping function in flight.\"\"\"\n\n    @abstractmethod\n    def progress(self, tag: str, amount: int) -> None:\n        \"\"\"Send progress updates to the caller.\n        Amount can be a positive number to indicate progress or <= 0\n        just to act as a keep-alive.\n        \"\"\"\n"
  },
  {
    "path": "backend/onyx/indexing/indexing_pipeline.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import Protocol\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME\nfrom onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER\nfrom onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG\nfrom onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH\nfrom onyx.configs.app_configs import MAX_DOCUMENT_CHARS\nfrom onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION\nfrom onyx.configs.app_configs import USE_CHUNK_SUMMARY\nfrom onyx.configs.app_configs import USE_DOCUMENT_SUMMARY\nfrom onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import (\n    get_experts_stores_representations,\n)\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import ConnectorStopSignal\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.connectors.models import IndexingDocument\nfrom onyx.connectors.models import Section\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.document import get_documents_by_ids\nfrom onyx.db.document import upsert_document_by_connector_credential_pair\nfrom onyx.db.document import upsert_documents\nfrom onyx.db.enums import HookPoint\nfrom onyx.db.hierarchy import link_hierarchy_nodes_to_documents\nfrom onyx.db.models import Document as DBDocument\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.tag import upsert_document_tags\nfrom onyx.document_index.document_index_utils import (\n    get_multipass_config,\n)\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import DocumentInsertionRecord\nfrom onyx.document_index.interfaces import DocumentMetadata\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.file_processing.image_summarization import summarize_image_with_error_handling\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.hooks.executor import execute_hook\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionOwner\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionPayload\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionResponse\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionSection\nfrom onyx.indexing.chunk_batch_store import ChunkBatchStore\nfrom onyx.indexing.chunker import Chunker\nfrom onyx.indexing.embedder import embed_chunks_with_failure_handling\nfrom onyx.indexing.embedder import IndexingEmbedder\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexingBatchAdapter\nfrom onyx.indexing.models import UpdatableChunkData\nfrom onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff\nfrom onyx.llm.factory import get_default_llm_with_vision\nfrom onyx.llm.factory import get_llm_for_contextual_rag\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.multi_llm import LLMRateLimitError\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.llm.utils import MAX_CONTEXT_TOKENS\nfrom onyx.natural_language_processing.utils import BaseTokenizer\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.natural_language_processing.utils import tokenizer_trim_middle\nfrom onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1\nfrom onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2\nfrom onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT\nfrom onyx.utils.batching import batch_generator\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.postgres_sanitization import sanitize_documents_for_postgres\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.timing import log_function_time\n\n\nlogger = setup_logger()\n\n\nclass DocumentBatchPrepareContext(BaseModel):\n    updatable_docs: list[Document]\n    id_to_boost_map: dict[str, int]\n    indexable_docs: list[IndexingDocument] = []\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n\nclass IndexingPipelineResult(BaseModel):\n    # number of documents that are completely new (e.g. did\n    # not exist as a part of this OR any other connector)\n    new_docs: int\n    # NOTE: need total_docs, since the pipeline can skip some docs\n    # (e.g. not even insert them into Postgres)\n    total_docs: int\n    # number of chunks that were inserted into Vespa\n    total_chunks: int\n\n    failures: list[ConnectorFailure]\n\n    @classmethod\n    def empty(cls, total_docs: int) -> \"IndexingPipelineResult\":\n        return cls(\n            new_docs=0,\n            total_docs=total_docs,\n            total_chunks=0,\n            failures=[],\n        )\n\n\nclass ChunkEmbeddingResult(BaseModel):\n    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)\n    connector_failures: list[ConnectorFailure]\n\n\nclass IndexingPipelineProtocol(Protocol):\n    def __call__(\n        self,\n        document_batch: list[Document],\n        index_attempt_metadata: IndexAttemptMetadata,\n    ) -> IndexingPipelineResult: ...\n\n\ndef _upsert_documents_in_db(\n    documents: list[Document],\n    index_attempt_metadata: IndexAttemptMetadata,\n    db_session: Session,\n) -> None:\n    # Metadata here refers to basic document info, not metadata about the actual content\n    document_metadata_list: list[DocumentMetadata] = []\n    for doc in documents:\n        first_link = next(\n            (section.link for section in doc.sections if section.link), \"\"\n        )\n        db_doc_metadata = DocumentMetadata(\n            connector_id=index_attempt_metadata.connector_id,\n            credential_id=index_attempt_metadata.credential_id,\n            document_id=doc.id,\n            semantic_identifier=doc.semantic_identifier,\n            first_link=first_link,\n            primary_owners=get_experts_stores_representations(doc.primary_owners),\n            secondary_owners=get_experts_stores_representations(doc.secondary_owners),\n            from_ingestion_api=doc.from_ingestion_api,\n            external_access=doc.external_access,\n            doc_metadata=doc.doc_metadata,\n            # parent_hierarchy_node_id is resolved in docfetching using Redis cache\n            parent_hierarchy_node_id=doc.parent_hierarchy_node_id,\n        )\n        document_metadata_list.append(db_doc_metadata)\n\n    upsert_documents(db_session, document_metadata_list)\n\n    # Insert document content metadata\n    for doc in documents:\n        upsert_document_tags(\n            document_id=doc.id,\n            source=doc.source,\n            metadata=doc.metadata,\n            db_session=db_session,\n        )\n\n\ndef _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:\n    \"\"\"Extract document IDs from a list of connector failures.\"\"\"\n    return {f.failed_document.document_id for f in failures if f.failed_document}\n\n\ndef _embed_chunks_to_store(\n    chunks: list[DocAwareChunk],\n    embedder: IndexingEmbedder,\n    tenant_id: str,\n    request_id: str | None,\n    store: ChunkBatchStore,\n) -> ChunkEmbeddingResult:\n    \"\"\"Embed chunks in batches, spilling each batch to *store*.\n\n    If a document fails embedding in any batch, its chunks are excluded from\n    all batches (including earlier ones already written) so that the output\n    is all-or-nothing per document.\n    \"\"\"\n    successful_chunk_ids: list[tuple[int, str]] = []\n    all_embedding_failures: list[ConnectorFailure] = []\n    # Track failed doc IDs across all batches so that a failure in batch N\n    # causes chunks for that doc to be skipped in batch N+1 and stripped\n    # from earlier batches.\n    all_failed_doc_ids: set[str] = set()\n\n    for batch_idx, chunk_batch in enumerate(\n        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)\n    ):\n        # Skip chunks belonging to documents that failed in earlier batches.\n        chunk_batch = [\n            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids\n        ]\n        if not chunk_batch:\n            continue\n\n        logger.debug(f\"Embedding batch {batch_idx}: {len(chunk_batch)} chunks\")\n\n        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(\n            chunks=chunk_batch,\n            embedder=embedder,\n            tenant_id=tenant_id,\n            request_id=request_id,\n        )\n        all_embedding_failures.extend(embedding_failures)\n        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))\n\n        # Only keep successfully embedded chunks for non-failed docs.\n        chunks_with_embeddings = [\n            c\n            for c in chunks_with_embeddings\n            if c.source_document.id not in all_failed_doc_ids\n        ]\n\n        successful_chunk_ids.extend(\n            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings\n        )\n\n        store.save(chunks_with_embeddings, batch_idx)\n        del chunks_with_embeddings\n\n    # Scrub earlier batches for docs that failed in later batches.\n    if all_failed_doc_ids:\n        store.scrub_failed_docs(all_failed_doc_ids)\n        successful_chunk_ids = [\n            (chunk_id, doc_id)\n            for chunk_id, doc_id in successful_chunk_ids\n            if doc_id not in all_failed_doc_ids\n        ]\n\n    return ChunkEmbeddingResult(\n        successful_chunk_ids=successful_chunk_ids,\n        connector_failures=all_embedding_failures,\n    )\n\n\n@contextmanager\ndef embed_and_stream(\n    chunks: list[DocAwareChunk],\n    embedder: IndexingEmbedder,\n    tenant_id: str,\n    request_id: str | None,\n) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:\n    \"\"\"Embed chunks to disk and yield a ``(result, store)`` pair.\n\n    The store owns the temp directory — files are cleaned up when the context\n    manager exits.\n\n    Usage::\n\n        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):\n            for chunk in store.stream():\n                ...\n    \"\"\"\n    with ChunkBatchStore() as store:\n        result = _embed_chunks_to_store(\n            chunks=chunks,\n            embedder=embedder,\n            tenant_id=tenant_id,\n            request_id=request_id,\n            store=store,\n        )\n        yield result, store\n\n\ndef get_doc_ids_to_update(\n    documents: list[Document], db_docs: list[DBDocument]\n) -> list[Document]:\n    \"\"\"Figures out which documents actually need to be updated. If a document is already present\n    and the `updated_at` hasn't changed, we shouldn't need to do anything with it.\n\n    NB: Still need to associate the document in the DB if multiple connectors are\n    indexing the same doc.\"\"\"\n    id_update_time_map = {\n        doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at\n    }\n\n    updatable_docs: list[Document] = []\n    for doc in documents:\n        if (\n            doc.id in id_update_time_map\n            and doc.doc_updated_at\n            and doc.doc_updated_at <= id_update_time_map[doc.id]\n        ):\n            continue\n        updatable_docs.append(doc)\n\n    return updatable_docs\n\n\ndef index_doc_batch_with_handler(\n    *,\n    chunker: Chunker,\n    embedder: IndexingEmbedder,\n    document_indices: list[DocumentIndex],\n    document_batch: list[Document],\n    request_id: str | None,\n    tenant_id: str,\n    db_session: Session,\n    adapter: IndexingBatchAdapter,\n    ignore_time_skip: bool = False,\n    enable_contextual_rag: bool = False,\n    llm: LLM | None = None,\n) -> IndexingPipelineResult:\n    try:\n        index_pipeline_result = index_doc_batch(\n            chunker=chunker,\n            embedder=embedder,\n            document_indices=document_indices,\n            document_batch=document_batch,\n            request_id=request_id,\n            tenant_id=tenant_id,\n            db_session=db_session,\n            adapter=adapter,\n            ignore_time_skip=ignore_time_skip,\n            enable_contextual_rag=enable_contextual_rag,\n            llm=llm,\n        )\n\n    except ConnectorStopSignal as e:\n        logger.warning(\"Connector stop signal detected in index_doc_batch_with_handler\")\n        raise e\n    except Exception as e:\n        # don't log the batch directly, it's too much text\n        document_ids = [doc.id for doc in document_batch]\n        logger.exception(f\"Failed to index document batch: {document_ids}\")\n\n        index_pipeline_result = IndexingPipelineResult(\n            new_docs=0,\n            total_docs=len(document_batch),\n            total_chunks=0,\n            failures=[\n                ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=document.id,\n                        document_link=(\n                            document.sections[0].link if document.sections else None\n                        ),\n                    ),\n                    failure_message=str(e),\n                    exception=e,\n                )\n                for document in document_batch\n            ],\n        )\n\n    return index_pipeline_result\n\n\ndef index_doc_batch_prepare(\n    documents: list[Document],\n    index_attempt_metadata: IndexAttemptMetadata,\n    db_session: Session,\n    ignore_time_skip: bool = False,\n) -> DocumentBatchPrepareContext | None:\n    \"\"\"Sets up the documents in the relational DB (source of truth) for permissions, metadata, etc.\n    This preceeds indexing it into the actual document index.\"\"\"\n    documents = sanitize_documents_for_postgres(documents)\n\n    # Create a trimmed list of docs that don't have a newer updated at\n    # Shortcuts the time-consuming flow on connector index retries\n    document_ids: list[str] = [document.id for document in documents]\n    db_docs: list[DBDocument] = get_documents_by_ids(\n        db_session=db_session,\n        document_ids=document_ids,\n    )\n\n    updatable_docs = (\n        get_doc_ids_to_update(documents=documents, db_docs=db_docs)\n        if not ignore_time_skip\n        else documents\n    )\n    if len(updatable_docs) != len(documents):\n        updatable_doc_ids = [doc.id for doc in updatable_docs]\n        skipped_doc_ids = [\n            doc.id for doc in documents if doc.id not in updatable_doc_ids\n        ]\n        logger.info(\n            f\"Skipping {len(skipped_doc_ids)} documents because they are up to date. Skipped doc IDs: {skipped_doc_ids}\"\n        )\n\n    # for all updatable docs, upsert into the DB\n    # Does not include doc_updated_at which is also used to indicate a successful update\n    if updatable_docs:\n        _upsert_documents_in_db(\n            documents=updatable_docs,\n            index_attempt_metadata=index_attempt_metadata,\n            db_session=db_session,\n        )\n\n    logger.info(\n        f\"Upserted {len(updatable_docs)} changed docs out of {len(documents)} total docs into the DB\"\n    )\n\n    # for all docs, upsert the document to cc pair relationship\n    upsert_document_by_connector_credential_pair(\n        db_session,\n        index_attempt_metadata.connector_id,\n        index_attempt_metadata.credential_id,\n        document_ids,\n    )\n\n    # Link hierarchy nodes to documents for sources where pages can be both\n    # hierarchy nodes AND documents (e.g., Notion, Confluence).\n    # This must happen after documents are upserted due to FK constraint.\n    if documents:\n        link_hierarchy_nodes_to_documents(\n            db_session=db_session,\n            document_ids=document_ids,\n            source=documents[0].source,\n            commit=False,  # We'll commit with the rest of the transaction\n        )\n\n    # No docs to process because the batch is empty or every doc was already indexed\n    if not updatable_docs:\n        return None\n\n    id_to_boost_map = {doc.id: doc.boost for doc in db_docs}\n    return DocumentBatchPrepareContext(\n        updatable_docs=updatable_docs, id_to_boost_map=id_to_boost_map\n    )\n\n\ndef filter_documents(document_batch: list[Document]) -> list[Document]:\n    documents: list[Document] = []\n    total_chars_in_batch = 0\n    skipped_too_long = []\n\n    for document in document_batch:\n        empty_contents = not any(\n            isinstance(section, TextSection)\n            and section.text is not None\n            and section.text.strip()\n            for section in document.sections\n        )\n        if (\n            (not document.title or not document.title.strip())\n            and not document.semantic_identifier.strip()\n            and empty_contents\n        ):\n            # Skip documents that have neither title nor content\n            # If the document doesn't have either, then there is no useful information in it\n            # This is again verified later in the pipeline after chunking but at that point there should\n            # already be no documents that are empty.\n            logger.warning(\n                f\"Skipping document with ID {document.id} as it has neither title nor content.\"\n            )\n            continue\n\n        if document.title is not None and not document.title.strip() and empty_contents:\n            # The title is explicitly empty (\"\" and not None) and the document is empty\n            # so when building the chunk text representation, it will be empty and unuseable\n            logger.warning(\n                f\"Skipping document with ID {document.id} as the chunks will be empty.\"\n            )\n            continue\n\n        section_chars = sum(\n            (\n                len(section.text)\n                if isinstance(section, TextSection) and section.text is not None\n                else 0\n            )\n            for section in document.sections\n        )\n        doc_total_chars = (\n            len(document.title or document.semantic_identifier) + section_chars\n        )\n\n        if MAX_DOCUMENT_CHARS and doc_total_chars > MAX_DOCUMENT_CHARS:\n            # Skip documents that are too long, later on there are more memory intensive steps done on the text\n            # and the container will run out of memory and crash. Several other checks are included upstream but\n            # those are at the connector level so a catchall is still needed.\n            # Assumption here is that files that are that long, are generated files and not the type users\n            # generally care for.\n            logger.warning(\n                f\"Skipping document with ID {document.id} as it is too long \"\n                f\"({doc_total_chars:,} chars, max={MAX_DOCUMENT_CHARS:,})\"\n            )\n            skipped_too_long.append((document.id, doc_total_chars))\n            continue\n\n        total_chars_in_batch += doc_total_chars\n        documents.append(document)\n\n    # Log batch statistics for OOM debugging\n    if documents:\n        avg_chars = total_chars_in_batch / len(documents)\n        # Get the source from the first document (all in batch should be same source)\n        source = documents[0].source.value if documents[0].source else \"unknown\"\n        logger.debug(\n            f\"Document batch filter [{source}]: {len(documents)} docs kept, {len(skipped_too_long)} skipped (too long). \"\n            f\"Total chars: {total_chars_in_batch:,}, Avg: {avg_chars:,.0f} chars/doc\"\n        )\n        if skipped_too_long:\n            logger.warning(\n                f\"Skipped oversized documents [{source}]: {skipped_too_long[:5]}\"\n            )  # Log first 5\n\n    return documents\n\n\ndef process_image_sections(documents: list[Document]) -> list[IndexingDocument]:\n    \"\"\"\n    Process all sections in documents by:\n    1. Converting both TextSection and ImageSection objects to base Section objects\n    2. Processing ImageSections to generate text summaries using a vision-capable LLM\n    3. Returning IndexingDocument objects with both original and processed sections\n\n    Args:\n        documents: List of documents with TextSection | ImageSection objects\n\n    Returns:\n        List of IndexingDocument objects with processed_sections as list[Section]\n    \"\"\"\n    # Check if image extraction and analysis is enabled before trying to get a vision LLM\n    if not get_image_extraction_and_analysis_enabled():\n        llm = None\n    else:\n        # Only get the vision LLM if image processing is enabled\n        llm = get_default_llm_with_vision()\n\n    if not llm:\n        if get_image_extraction_and_analysis_enabled():\n            logger.warning(\n                \"Image analysis is enabled but no vision-capable LLM is \"\n                \"available — images will not be summarized. Configure a \"\n                \"vision model in the admin LLM settings.\"\n            )\n        # Even without LLM, we still convert to IndexingDocument with base Sections\n        return [\n            IndexingDocument(\n                **document.model_dump(),\n                processed_sections=[\n                    Section(\n                        text=section.text if isinstance(section, TextSection) else \"\",\n                        link=section.link,\n                        image_file_id=(\n                            section.image_file_id\n                            if isinstance(section, ImageSection)\n                            else None\n                        ),\n                    )\n                    for section in document.sections\n                ],\n            )\n            for document in documents\n        ]\n\n    indexed_documents: list[IndexingDocument] = []\n\n    for document in documents:\n        processed_sections: list[Section] = []\n\n        for section in document.sections:\n            # For ImageSection, process and create base Section with both text and image_file_id\n            if isinstance(section, ImageSection):\n                # Default section with image path preserved - ensure text is always a string\n                processed_section = Section(\n                    link=section.link,\n                    image_file_id=section.image_file_id,\n                    text=\"\",  # Initialize with empty string\n                )\n\n                # Try to get image summary\n                try:\n                    file_store = get_default_file_store()\n\n                    file_record = file_store.read_file_record(\n                        file_id=section.image_file_id\n                    )\n                    if not file_record:\n                        logger.warning(\n                            f\"Image file {section.image_file_id} not found in FileStore\"\n                        )\n\n                        processed_section.text = \"[Image could not be processed]\"\n                    else:\n                        # Get the image data\n                        image_data_io = file_store.read_file(\n                            file_id=section.image_file_id\n                        )\n                        image_data = image_data_io.read()\n                        summary = summarize_image_with_error_handling(\n                            llm=llm,\n                            image_data=image_data,\n                            context_name=file_record.display_name or \"Image\",\n                        )\n\n                        if summary:\n                            processed_section.text = summary\n                        else:\n                            processed_section.text = \"[Image could not be summarized]\"\n                except Exception as e:\n                    logger.error(f\"Error processing image section: {e}\")\n                    processed_section.text = \"[Error processing image]\"\n\n                processed_sections.append(processed_section)\n\n            # For TextSection, create a base Section with text and link\n            elif isinstance(section, TextSection):\n                processed_section = Section(\n                    text=section.text or \"\",  # Ensure text is always a string, not None\n                    link=section.link,\n                    image_file_id=None,\n                )\n                processed_sections.append(processed_section)\n\n        # Create IndexingDocument with original sections and processed_sections\n        indexed_document = IndexingDocument(\n            **document.model_dump(), processed_sections=processed_sections\n        )\n        indexed_documents.append(indexed_document)\n\n    return indexed_documents\n\n\ndef add_document_summaries(\n    chunks_by_doc: list[DocAwareChunk],\n    llm: LLM,\n    tokenizer: BaseTokenizer,\n    trunc_doc_tokens: int,\n) -> list[int] | None:\n    \"\"\"\n    Adds a document summary to a list of chunks from the same document.\n    Returns the number of tokens in the document.\n    \"\"\"\n\n    doc_tokens = []\n    # this is value is the same for each chunk in the document; 0 indicates\n    # There is not enough space for contextual RAG (the chunk content\n    # and possibly metadata took up too much space)\n    if chunks_by_doc[0].contextual_rag_reserved_tokens == 0:\n        return None\n\n    doc_tokens = tokenizer.encode(chunks_by_doc[0].source_document.get_text_content())\n    doc_content = tokenizer_trim_middle(doc_tokens, trunc_doc_tokens, tokenizer)\n\n    # Apply prompt caching: cache the static prompt, document content is the suffix\n    # Note: For document summarization, there's no cacheable prefix since the document changes\n    # So we just pass the full prompt without caching\n    summary_prompt = DOCUMENT_SUMMARY_PROMPT.format(document=doc_content)\n    prompt_msg = UserMessage(content=summary_prompt)\n\n    response = llm.invoke(prompt_msg, max_tokens=MAX_CONTEXT_TOKENS)\n    doc_summary = llm_response_to_string(response)\n\n    for chunk in chunks_by_doc:\n        chunk.doc_summary = doc_summary\n\n    return doc_tokens\n\n\ndef add_chunk_summaries(\n    chunks_by_doc: list[DocAwareChunk],\n    llm: LLM,\n    tokenizer: BaseTokenizer,\n    trunc_doc_chunk_tokens: int,\n    doc_tokens: list[int] | None,\n) -> None:\n    \"\"\"\n    Adds chunk summaries to the chunks grouped by document id.\n    Chunk summaries look at the chunk as well as the entire document (or a summary,\n    if the document is too long) and describe how the chunk relates to the document.\n    \"\"\"\n    # all chunks within a document have the same contextual_rag_reserved_tokens\n    if chunks_by_doc[0].contextual_rag_reserved_tokens == 0:\n        return\n\n    # use values computed in above doc summary section if available\n    doc_tokens = doc_tokens or tokenizer.encode(\n        chunks_by_doc[0].source_document.get_text_content()\n    )\n    doc_content = tokenizer_trim_middle(doc_tokens, trunc_doc_chunk_tokens, tokenizer)\n\n    # only compute doc summary if needed\n    doc_info = (\n        doc_content\n        if len(doc_tokens) <= MAX_TOKENS_FOR_FULL_INCLUSION\n        else chunks_by_doc[0].doc_summary\n    )\n    if not doc_info:\n        # This happens if the document is too long AND document summaries are turned off\n        # In this case we compute a doc summary using the LLM\n        fallback_prompt = UserMessage(\n            content=DOCUMENT_SUMMARY_PROMPT.format(document=doc_content)\n        )\n        response = llm.invoke(fallback_prompt, max_tokens=MAX_CONTEXT_TOKENS)\n        doc_info = llm_response_to_string(response)\n\n    from onyx.llm.prompt_cache.processor import process_with_prompt_cache\n\n    context_prompt1 = CONTEXTUAL_RAG_PROMPT1.format(document=doc_info)\n\n    def assign_context(chunk: DocAwareChunk) -> None:\n        context_prompt2 = CONTEXTUAL_RAG_PROMPT2.format(chunk=chunk.content)\n        try:\n            # Apply prompt caching: cache the document context (prompt1), chunk content is the suffix\n            # For string inputs with continuation=True, the result will be a concatenated string\n            processed_prompt, _ = process_with_prompt_cache(\n                llm_config=llm.config,\n                cacheable_prefix=UserMessage(content=context_prompt1),\n                suffix=UserMessage(content=context_prompt2),\n                continuation=True,  # Append chunk to the document context\n            )\n\n            response = llm.invoke(processed_prompt, max_tokens=MAX_CONTEXT_TOKENS)\n            chunk.chunk_context = llm_response_to_string(response)\n\n        except LLMRateLimitError as e:\n            # Erroring during chunker is undesirable, so we log the error and continue\n            # TODO: for v2, add robust retry logic\n            logger.exception(f\"Rate limit adding chunk summary: {e}\", exc_info=e)\n            chunk.chunk_context = \"\"\n        except Exception as e:\n            logger.exception(f\"Error adding chunk summary: {e}\", exc_info=e)\n            chunk.chunk_context = \"\"\n\n    run_functions_tuples_in_parallel(\n        [(assign_context, (chunk,)) for chunk in chunks_by_doc]\n    )\n\n\ndef add_contextual_summaries(\n    chunks: list[DocAwareChunk],\n    llm: LLM,\n    tokenizer: BaseTokenizer,\n    chunk_token_limit: int,\n) -> list[DocAwareChunk]:\n    \"\"\"\n    Adds Document summary and chunk-within-document context to the chunks\n    based on which environment variables are set.\n    \"\"\"\n    doc2chunks = defaultdict(list)\n    for chunk in chunks:\n        doc2chunks[chunk.source_document.id].append(chunk)\n\n    # The number of tokens allowed for the document when computing a document summary\n    trunc_doc_summary_tokens = llm.config.max_input_tokens - len(\n        tokenizer.encode(DOCUMENT_SUMMARY_PROMPT)\n    )\n\n    prompt_tokens = len(\n        tokenizer.encode(CONTEXTUAL_RAG_PROMPT1 + CONTEXTUAL_RAG_PROMPT2)\n    )\n    # The number of tokens allowed for the document when computing a\n    # \"chunk in context of document\" summary\n    trunc_doc_chunk_tokens = (\n        llm.config.max_input_tokens - prompt_tokens - chunk_token_limit\n    )\n    for chunks_by_doc in doc2chunks.values():\n        doc_tokens = None\n        if USE_DOCUMENT_SUMMARY:\n            doc_tokens = add_document_summaries(\n                chunks_by_doc, llm, tokenizer, trunc_doc_summary_tokens\n            )\n\n        if USE_CHUNK_SUMMARY:\n            add_chunk_summaries(\n                chunks_by_doc, llm, tokenizer, trunc_doc_chunk_tokens, doc_tokens\n            )\n\n    return chunks\n\n\ndef _verify_indexing_completeness(\n    insertion_records: list[DocumentInsertionRecord],\n    write_failures: list[ConnectorFailure],\n    embedding_failed_doc_ids: set[str],\n    updatable_ids: list[str],\n    document_index_name: str,\n) -> None:\n    \"\"\"Verify that every updatable document was either indexed or reported as failed.\"\"\"\n    all_returned_doc_ids = (\n        {r.document_id for r in insertion_records}\n        | {f.failed_document.document_id for f in write_failures if f.failed_document}\n        | embedding_failed_doc_ids\n    )\n    if all_returned_doc_ids != set(updatable_ids):\n        raise RuntimeError(\n            f\"Some documents were not successfully indexed. \"\n            f\"Updatable IDs: {updatable_ids}, \"\n            f\"Returned IDs: {all_returned_doc_ids}. \"\n            f\"This should never happen. \"\n            f\"This occured for document index {document_index_name}\"\n        )\n\n\ndef _apply_document_ingestion_hook(\n    documents: list[Document],\n    db_session: Session,\n) -> list[Document]:\n    \"\"\"Apply the Document Ingestion hook to each document in the batch.\n\n    - HookSkipped / HookSoftFailed → document passes through unchanged.\n    - Response with sections=None → document is dropped (logged).\n    - Response with sections → document sections are replaced with the hook's output.\n    \"\"\"\n\n    def _build_payload(doc: Document) -> DocumentIngestionPayload:\n        return DocumentIngestionPayload(\n            document_id=doc.id or \"\",\n            title=doc.title,\n            semantic_identifier=doc.semantic_identifier,\n            source=doc.source.value if doc.source is not None else \"\",\n            sections=[\n                DocumentIngestionSection(\n                    text=s.text if isinstance(s, TextSection) else None,\n                    link=s.link,\n                    image_file_id=(\n                        s.image_file_id if isinstance(s, ImageSection) else None\n                    ),\n                )\n                for s in doc.sections\n            ],\n            metadata={\n                k: v if isinstance(v, list) else [v] for k, v in doc.metadata.items()\n            },\n            doc_updated_at=(\n                doc.doc_updated_at.isoformat() if doc.doc_updated_at else None\n            ),\n            primary_owners=(\n                [\n                    DocumentIngestionOwner(\n                        display_name=o.get_semantic_name() or None,\n                        email=o.email,\n                    )\n                    for o in doc.primary_owners\n                ]\n                if doc.primary_owners\n                else None\n            ),\n            secondary_owners=(\n                [\n                    DocumentIngestionOwner(\n                        display_name=o.get_semantic_name() or None,\n                        email=o.email,\n                    )\n                    for o in doc.secondary_owners\n                ]\n                if doc.secondary_owners\n                else None\n            ),\n        )\n\n    def _apply_result(\n        doc: Document,\n        hook_result: DocumentIngestionResponse | HookSkipped | HookSoftFailed,\n    ) -> Document | None:\n        \"\"\"Return the modified doc, original doc (skip/soft-fail), or None (drop).\"\"\"\n        if isinstance(hook_result, (HookSkipped, HookSoftFailed)):\n            return doc\n        if not hook_result.sections:\n            reason = hook_result.rejection_reason or \"Document rejected by hook\"\n            logger.info(\n                f\"Document ingestion hook dropped document doc_id={doc.id!r}: {reason}\"\n            )\n            return None\n        new_sections: list[TextSection | ImageSection] = []\n        for s in hook_result.sections:\n            if s.image_file_id is not None:\n                new_sections.append(\n                    ImageSection(image_file_id=s.image_file_id, link=s.link)\n                )\n            elif s.text is not None:\n                new_sections.append(TextSection(text=s.text, link=s.link))\n            else:\n                logger.warning(\n                    f\"Document ingestion hook returned a section with neither text nor \"\n                    f\"image_file_id for doc_id={doc.id!r} — skipping section.\"\n                )\n        if not new_sections:\n            logger.info(\n                f\"Document ingestion hook produced no valid sections for doc_id={doc.id!r} — dropping document.\"\n            )\n            return None\n        return doc.model_copy(update={\"sections\": new_sections})\n\n    if not documents:\n        return documents\n\n    # Run the hook for the first document. If it returns HookSkipped the hook\n    # is not configured — skip the remaining N-1 DB lookups.\n    first_doc = documents[0]\n    first_payload = _build_payload(first_doc).model_dump()\n    first_hook_result = execute_hook(\n        db_session=db_session,\n        hook_point=HookPoint.DOCUMENT_INGESTION,\n        payload=first_payload,\n        response_type=DocumentIngestionResponse,\n    )\n    if isinstance(first_hook_result, HookSkipped):\n        return documents\n\n    result: list[Document] = []\n    first_applied = _apply_result(first_doc, first_hook_result)\n    if first_applied is not None:\n        result.append(first_applied)\n\n    for doc in documents[1:]:\n        payload = _build_payload(doc).model_dump()\n        hook_result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.DOCUMENT_INGESTION,\n            payload=payload,\n            response_type=DocumentIngestionResponse,\n        )\n        applied = _apply_result(doc, hook_result)\n        if applied is not None:\n            result.append(applied)\n\n    return result\n\n\n@log_function_time(debug_only=True)\ndef index_doc_batch(\n    *,\n    document_batch: list[Document],\n    chunker: Chunker,\n    embedder: IndexingEmbedder,\n    document_indices: list[DocumentIndex],\n    request_id: str | None,\n    tenant_id: str,\n    db_session: Session,\n    adapter: IndexingBatchAdapter,\n    enable_contextual_rag: bool = False,\n    llm: LLM | None = None,\n    ignore_time_skip: bool = False,\n    filter_fnc: Callable[[list[Document]], list[Document]] = filter_documents,\n) -> IndexingPipelineResult:\n    \"\"\"End-to-end indexing for a pre-batched set of documents.\"\"\"\n    \"\"\"Takes different pieces of the indexing pipeline and applies it to a batch of documents\n    Note that the documents should already be batched at this point so that it does not inflate the\n    memory requirements\n\n    Returns a tuple where the first element is the number of new docs and the\n    second element is the number of chunks.\"\"\"\n\n    # Log connector info for debugging OOM issues\n    connector_id = getattr(adapter, \"connector_id\", None)\n    credential_id = getattr(adapter, \"credential_id\", None)\n    logger.debug(\n        f\"Starting index_doc_batch: connector_id={connector_id}, \"\n        f\"credential_id={credential_id}, tenant_id={tenant_id}, \"\n        f\"num_docs={len(document_batch)}\"\n    )\n\n    filtered_documents = filter_fnc(document_batch)\n    filtered_documents = _apply_document_ingestion_hook(filtered_documents, db_session)\n    context = adapter.prepare(filtered_documents, ignore_time_skip)\n    if not context:\n        return IndexingPipelineResult.empty(len(filtered_documents))\n\n    # Convert documents to IndexingDocument objects with processed section\n    # logger.debug(\"Processing image sections\")\n    context.indexable_docs = process_image_sections(context.updatable_docs)\n\n    doc_descriptors = [\n        {\n            \"doc_id\": doc.id,\n            \"doc_length\": doc.get_total_char_length(),\n        }\n        for doc in context.indexable_docs\n    ]\n    logger.debug(f\"Starting indexing process for documents: {doc_descriptors}\")\n\n    logger.debug(\"Starting chunking\")\n    # NOTE: no special handling for failures here, since the chunker is not\n    # a common source of failure for the indexing pipeline\n    chunks: list[DocAwareChunk] = chunker.chunk(context.indexable_docs)\n    llm_tokenizer: BaseTokenizer | None = None\n\n    # contextual RAG\n    if enable_contextual_rag:\n        assert llm is not None, \"must provide an LLM for contextual RAG\"\n        llm_tokenizer = get_tokenizer(\n            model_name=llm.config.model_name,\n            provider_type=llm.config.model_provider,\n        )\n\n        # Because the chunker's tokens are different from the LLM's tokens,\n        # We add a fudge factor to ensure we truncate prompts to the LLM's token limit\n        chunks = add_contextual_summaries(\n            chunks=chunks,\n            llm=llm,\n            tokenizer=llm_tokenizer,\n            chunk_token_limit=chunker.chunk_token_limit * 2,\n        )\n\n    logger.debug(\"Starting embedding\")\n    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (\n        embedding_result,\n        chunk_store,\n    ):\n        updatable_ids = [doc.id for doc in context.updatable_docs]\n        updatable_chunk_data = [\n            UpdatableChunkData(\n                chunk_id=chunk_id,\n                document_id=document_id,\n                boost_score=1.0,\n            )\n            for chunk_id, document_id in embedding_result.successful_chunk_ids\n        ]\n\n        embedding_failed_doc_ids = _get_failed_doc_ids(\n            embedding_result.connector_failures\n        )\n\n        # Filter to only successfully embedded chunks so\n        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.\n        embedded_chunks = [\n            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids\n        ]\n\n        # Acquires a lock on the documents so that no other process can modify\n        # them.  Not needed until here, since this is when the actual race\n        # condition with vector db can occur.\n        with adapter.lock_context(context.updatable_docs):\n            enricher = adapter.prepare_enrichment(\n                context=context,\n                tenant_id=tenant_id,\n                chunks=embedded_chunks,\n            )\n\n            index_batch_params = IndexBatchParams(\n                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,\n                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,\n                tenant_id=tenant_id,\n                large_chunks_enabled=chunker.enable_large_chunks,\n            )\n\n            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (\n                None\n            )\n            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (\n                None\n            )\n\n            for document_index in document_indices:\n\n                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:\n                    for chunk in chunk_store.stream():\n                        yield enricher.enrich_chunk(chunk, 1.0)\n\n                insertion_records, write_failures = (\n                    write_chunks_to_vector_db_with_backoff(\n                        document_index=document_index,\n                        make_chunks=_enriched_stream,\n                        index_batch_params=index_batch_params,\n                    )\n                )\n\n                _verify_indexing_completeness(\n                    insertion_records=insertion_records,\n                    write_failures=write_failures,\n                    embedding_failed_doc_ids=embedding_failed_doc_ids,\n                    updatable_ids=updatable_ids,\n                    document_index_name=document_index.__class__.__name__,\n                )\n                # We treat the first document index we got as the primary one used\n                # for reporting the state of indexing.\n                if primary_doc_idx_insertion_records is None:\n                    primary_doc_idx_insertion_records = insertion_records\n                if primary_doc_idx_vector_db_write_failures is None:\n                    primary_doc_idx_vector_db_write_failures = write_failures\n\n            adapter.post_index(\n                context=context,\n                updatable_chunk_data=updatable_chunk_data,\n                filtered_documents=filtered_documents,\n                enrichment=enricher,\n            )\n\n    assert primary_doc_idx_insertion_records is not None\n    assert primary_doc_idx_vector_db_write_failures is not None\n    return IndexingPipelineResult(\n        new_docs=sum(\n            1 for r in primary_doc_idx_insertion_records if not r.already_existed\n        ),\n        total_docs=len(filtered_documents),\n        total_chunks=len(embedding_result.successful_chunk_ids),\n        failures=primary_doc_idx_vector_db_write_failures\n        + embedding_result.connector_failures,\n    )\n\n\ndef run_indexing_pipeline(\n    *,\n    document_batch: list[Document],\n    request_id: str | None,\n    embedder: IndexingEmbedder,\n    document_indices: list[DocumentIndex],\n    db_session: Session,\n    tenant_id: str,\n    adapter: IndexingBatchAdapter,\n    chunker: Chunker | None = None,\n    ignore_time_skip: bool = False,\n) -> IndexingPipelineResult:\n    \"\"\"Builds a pipeline which takes in a list (batch) of docs and indexes them.\"\"\"\n    all_search_settings = get_active_search_settings(db_session)\n    if (\n        all_search_settings.secondary\n        and all_search_settings.secondary.status == IndexModelStatus.FUTURE\n    ):\n        search_settings = all_search_settings.secondary\n    else:\n        search_settings = all_search_settings.primary\n\n    multipass_config = get_multipass_config(search_settings)\n\n    enable_contextual_rag = (\n        search_settings.enable_contextual_rag or ENABLE_CONTEXTUAL_RAG\n    )\n    llm = None\n    if enable_contextual_rag:\n        llm = get_llm_for_contextual_rag(\n            search_settings.contextual_rag_llm_name or DEFAULT_CONTEXTUAL_RAG_LLM_NAME,\n            search_settings.contextual_rag_llm_provider\n            or DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER,\n        )\n\n    chunker = chunker or Chunker(\n        tokenizer=embedder.embedding_model.tokenizer,\n        enable_multipass=multipass_config.multipass_indexing,\n        enable_large_chunks=multipass_config.enable_large_chunks,\n        enable_contextual_rag=enable_contextual_rag,\n        # after every doc, update status in case there are a bunch of really long docs\n    )\n\n    return index_doc_batch_with_handler(\n        chunker=chunker,\n        embedder=embedder,\n        document_indices=document_indices,\n        document_batch=document_batch,\n        request_id=request_id,\n        tenant_id=tenant_id,\n        db_session=db_session,\n        adapter=adapter,\n        enable_contextual_rag=enable_contextual_rag,\n        llm=llm,\n        ignore_time_skip=ignore_time_skip,\n    )\n"
  },
  {
    "path": "backend/onyx/indexing/models.py",
    "content": "import contextlib\nfrom collections.abc import Generator\nfrom typing import Optional\nfrom typing import Protocol\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.connectors.models import Document\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.enums import SwitchoverType\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.pydantic_util import shallow_model_dump\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.model_server_models import Embedding\n\nif TYPE_CHECKING:\n    from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext\nfrom sqlalchemy.engine.util import TransactionalContext\n\nif TYPE_CHECKING:\n    from onyx.db.models import SearchSettings\n\n\nlogger = setup_logger()\n\n\nclass ChunkEmbedding(BaseModel):\n    full_embedding: Embedding\n    mini_chunk_embeddings: list[Embedding]\n\n\nclass BaseChunk(BaseModel):\n    chunk_id: int\n    # The first sentence(s) of the first Section of the chunk\n    blurb: str\n    content: str\n    # Holds the link and the offsets into the raw Chunk text\n    source_links: dict[int, str] | None\n    image_file_id: str | None\n    # True if this Chunk's start is not at the start of a Section\n    # TODO(andrei): This is deprecated as of the OpenSearch migration. Remove.\n    # Do not use.\n    section_continuation: bool\n\n\nclass DocAwareChunk(BaseChunk):\n    # During indexing flow, we have access to a complete \"Document\"\n    # During inference we only have access to the document id and do not reconstruct the Document\n    source_document: Document\n\n    # This could be an empty string if the title is too long and taking up too much of the chunk\n    # This does not mean necessarily that the document does not have a title\n    title_prefix: str\n\n    # During indexing we also (optionally) build a metadata string from the metadata dict\n    # This is also indexed so that we can strip it out after indexing, this way it supports\n    # multiple iterations of metadata representation for backwards compatibility\n    metadata_suffix_semantic: str\n    metadata_suffix_keyword: str\n\n    # This is the number of tokens reserved for contextual RAG\n    # in the chunk. doc_summary and chunk_context conbined should\n    # contain at most this many tokens.\n    contextual_rag_reserved_tokens: int\n    # This is the summary for the document generated for contextual RAG\n    doc_summary: str\n    # This is the context for this chunk generated for contextual RAG\n    chunk_context: str\n\n    mini_chunk_texts: list[str] | None\n\n    large_chunk_id: int | None\n\n    large_chunk_reference_ids: list[int] = Field(default_factory=list)\n\n    def to_short_descriptor(self) -> str:\n        \"\"\"Used when logging the identity of a chunk\"\"\"\n        return f\"{self.source_document.to_short_descriptor()} Chunk ID: {self.chunk_id}\"\n\n    def get_link(self) -> str | None:\n        return (\n            self.source_document.sections[0].link\n            if self.source_document.sections\n            else None\n        )\n\n\nclass IndexChunk(DocAwareChunk):\n    embeddings: ChunkEmbedding\n    title_embedding: Embedding | None\n\n\n# TODO(rkuo): currently, this extra metadata sent during indexing is just for speed,\n# but full consistency happens on background sync\nclass DocMetadataAwareIndexChunk(IndexChunk):\n    \"\"\"An `IndexChunk` that contains all necessary metadata to be indexed. This includes\n    the following:\n\n    access: holds all information about which users should have access to the\n            source document for this chunk.\n    document_sets: all document sets the source document for this chunk is a part\n                   of. This is used for filtering / personas.\n    boost: influences the ranking of this chunk at query time. Positive -> ranked higher,\n           negative -> ranked lower. Not included in aggregated boost calculation\n           for legacy reasons.\n    aggregated_chunk_boost_factor: represents the aggregated chunk-level boost (currently: information content)\n    \"\"\"\n\n    tenant_id: str\n    access: \"DocumentAccess\"\n    document_sets: set[str]\n    user_project: list[int]\n    personas: list[int]\n    boost: int\n    aggregated_chunk_boost_factor: float\n    # Full ancestor path from root hierarchy node to document's parent.\n    # Stored as an integer array in OpenSearch for hierarchy-based filtering.\n    # Empty list means no hierarchy info (document excluded from hierarchy searches).\n    ancestor_hierarchy_node_ids: list[int]\n\n    @classmethod\n    def from_index_chunk(\n        cls,\n        index_chunk: IndexChunk,\n        access: \"DocumentAccess\",\n        document_sets: set[str],\n        user_project: list[int],\n        personas: list[int],\n        boost: int,\n        aggregated_chunk_boost_factor: float,\n        tenant_id: str,\n        ancestor_hierarchy_node_ids: list[int] | None = None,\n    ) -> \"DocMetadataAwareIndexChunk\":\n        return cls.model_construct(\n            **shallow_model_dump(index_chunk),\n            access=access,\n            document_sets=document_sets,\n            user_project=user_project,\n            personas=personas,\n            boost=boost,\n            aggregated_chunk_boost_factor=aggregated_chunk_boost_factor,\n            tenant_id=tenant_id,\n            ancestor_hierarchy_node_ids=ancestor_hierarchy_node_ids or [],\n        )\n\n\nclass EmbeddingModelDetail(BaseModel):\n    id: int | None = None\n    model_name: str\n    normalize: bool\n    query_prefix: str | None\n    passage_prefix: str | None\n    api_url: str | None = None\n    provider_type: EmbeddingProvider | None = None\n    api_key: str | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n    @classmethod\n    def from_db_model(\n        cls,\n        search_settings: \"SearchSettings\",\n    ) -> \"EmbeddingModelDetail\":\n        api_key = None\n        if (\n            search_settings.cloud_provider is not None\n            and search_settings.cloud_provider.api_key is not None\n        ):\n            api_key = search_settings.cloud_provider.api_key.get_value(apply_mask=True)\n\n        return cls(\n            id=search_settings.id,\n            model_name=search_settings.model_name,\n            normalize=search_settings.normalize,\n            query_prefix=search_settings.query_prefix,\n            passage_prefix=search_settings.passage_prefix,\n            provider_type=search_settings.provider_type,\n            api_key=api_key,\n            api_url=search_settings.api_url,\n        )\n\n\n# Additional info needed for indexing time\nclass IndexingSetting(EmbeddingModelDetail):\n    model_dim: int\n    index_name: str | None\n    multipass_indexing: bool\n    embedding_precision: EmbeddingPrecision\n    reduced_dimension: int | None = None\n\n    switchover_type: SwitchoverType = SwitchoverType.REINDEX\n    enable_contextual_rag: bool\n    contextual_rag_llm_name: str | None = None\n    contextual_rag_llm_provider: str | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n    @property\n    def final_embedding_dim(self) -> int:\n        if self.reduced_dimension:\n            return self.reduced_dimension\n        return self.model_dim\n\n    @classmethod\n    def from_db_model(cls, search_settings: \"SearchSettings\") -> \"IndexingSetting\":\n        return cls(\n            model_name=search_settings.model_name,\n            model_dim=search_settings.model_dim,\n            normalize=search_settings.normalize,\n            query_prefix=search_settings.query_prefix,\n            passage_prefix=search_settings.passage_prefix,\n            provider_type=search_settings.provider_type,\n            index_name=search_settings.index_name,\n            multipass_indexing=search_settings.multipass_indexing,\n            embedding_precision=search_settings.embedding_precision,\n            reduced_dimension=search_settings.reduced_dimension,\n            switchover_type=search_settings.switchover_type,\n            enable_contextual_rag=search_settings.enable_contextual_rag,\n        )\n\n\nclass MultipassConfig(BaseModel):\n    multipass_indexing: bool\n    enable_large_chunks: bool\n\n\nclass UpdatableChunkData(BaseModel):\n    chunk_id: int\n    document_id: str\n    boost_score: float\n\n\nclass ChunkEnrichmentContext(Protocol):\n    \"\"\"Returned by prepare_enrichment. Holds pre-computed metadata lookups\n    and provides per-chunk enrichment.\"\"\"\n\n    doc_id_to_previous_chunk_cnt: dict[str, int]\n    doc_id_to_new_chunk_cnt: dict[str, int]\n\n    def enrich_chunk(\n        self, chunk: IndexChunk, score: float\n    ) -> DocMetadataAwareIndexChunk: ...\n\n\nclass IndexingBatchAdapter(Protocol):\n    def prepare(\n        self, documents: list[Document], ignore_time_skip: bool\n    ) -> Optional[\"DocumentBatchPrepareContext\"]: ...\n\n    @contextlib.contextmanager\n    def lock_context(\n        self, documents: list[Document]\n    ) -> Generator[TransactionalContext, None, None]:\n        \"\"\"Provide a transaction/row-lock context for critical updates.\"\"\"\n\n    def prepare_enrichment(\n        self,\n        context: \"DocumentBatchPrepareContext\",\n        tenant_id: str,\n        chunks: list[DocAwareChunk],\n    ) -> ChunkEnrichmentContext:\n        \"\"\"Prepare per-chunk enrichment data (access, document sets, boost, etc.).\n\n        Precondition: ``chunks`` have already been through the embedding step\n        (i.e. they are ``IndexChunk`` instances with populated embeddings,\n        passed here as the base ``DocAwareChunk`` type).\n        \"\"\"\n        ...\n\n    def post_index(\n        self,\n        context: \"DocumentBatchPrepareContext\",\n        updatable_chunk_data: list[UpdatableChunkData],\n        filtered_documents: list[Document],\n        enrichment: ChunkEnrichmentContext,\n    ) -> None: ...\n"
  },
  {
    "path": "backend/onyx/indexing/vector_db_insertion.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Iterable\nfrom http import HTTPStatus\nfrom itertools import chain\nfrom itertools import groupby\n\nimport httpx\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import DocumentInsertionRecord\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _log_insufficient_storage_error(e: Exception) -> None:\n    if isinstance(e, httpx.HTTPStatusError):\n        if e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:\n            logger.error(\n                \"NOTE: HTTP Status 507 Insufficient Storage indicates \"\n                \"you need to allocate more memory or disk space to the \"\n                \"Vespa/index container.\"\n            )\n\n\ndef write_chunks_to_vector_db_with_backoff(\n    document_index: DocumentIndex,\n    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],\n    index_batch_params: IndexBatchParams,\n) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:\n    \"\"\"Tries to insert all chunks in one large batch. If that batch fails for any reason,\n    goes document by document to isolate the failure(s).\n\n    IMPORTANT: must pass in whole documents at a time not individual chunks, since the\n    vector DB interface assumes that all chunks for a single document are present. The\n    chunks must also be in contiguous batches\n    \"\"\"\n    # first try to write the chunks to the vector db\n    try:\n        return (\n            list(\n                document_index.index(\n                    chunks=make_chunks(),\n                    index_batch_params=index_batch_params,\n                )\n            ),\n            [],\n        )\n    except Exception as e:\n        logger.exception(\n            \"Failed to write chunk batch to vector db. Trying individual docs.\"\n        )\n\n        # give some specific logging on this common failure case.\n        _log_insufficient_storage_error(e)\n\n        # wait a couple seconds just to give the vector db a chance to recover\n        time.sleep(2)\n\n    insertion_records: list[DocumentInsertionRecord] = []\n    failures: list[ConnectorFailure] = []\n\n    def key(chunk: DocMetadataAwareIndexChunk) -> str:\n        return chunk.source_document.id\n\n    seen_doc_ids: set[str] = set()\n    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):\n        if doc_id in seen_doc_ids:\n            raise RuntimeError(\n                f\"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}\"\n            )\n        seen_doc_ids.add(doc_id)\n\n        first_chunk = next(chunks_for_doc)\n        chunks_for_doc = chain([first_chunk], chunks_for_doc)\n\n        try:\n            insertion_records.extend(\n                document_index.index(\n                    chunks=chunks_for_doc,\n                    index_batch_params=index_batch_params,\n                )\n            )\n        except Exception as e:\n            logger.exception(\n                f\"Failed to write document chunks for '{doc_id}' to vector db\"\n            )\n\n            # give some specific logging on this common failure case.\n            _log_insufficient_storage_error(e)\n\n            failures.append(\n                ConnectorFailure(\n                    failed_document=DocumentFailure(\n                        document_id=doc_id,\n                        document_link=first_chunk.get_link(),\n                    ),\n                    failure_message=str(e),\n                    exception=e,\n                )\n            )\n\n    return insertion_records, failures\n"
  },
  {
    "path": "backend/onyx/key_value_store/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/key_value_store/factory.py",
    "content": "from onyx.key_value_store.interface import KeyValueStore\nfrom onyx.key_value_store.store import PgRedisKVStore\nfrom shared_configs.configs import DEFAULT_REDIS_PREFIX\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n\ndef get_kv_store() -> KeyValueStore:\n    # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in\n    # It's read from the global thread level variable\n    return PgRedisKVStore()\n\n\ndef get_shared_kv_store() -> KeyValueStore:\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(DEFAULT_REDIS_PREFIX)\n    try:\n        return get_kv_store()\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n"
  },
  {
    "path": "backend/onyx/key_value_store/interface.py",
    "content": "import abc\nfrom typing import cast\n\nfrom onyx.utils.special_types import JSON_ro\n\n\nclass KvKeyNotFoundError(Exception):\n    pass\n\n\ndef unwrap_str(val: JSON_ro) -> str:\n    \"\"\"Unwrap a string stored as {\"value\": str} in the encrypted KV store.\n    Also handles legacy plain-string values cached in Redis.\"\"\"\n    if isinstance(val, dict):\n        try:\n            return cast(str, val[\"value\"])\n        except KeyError:\n            raise ValueError(\n                f\"Expected dict with 'value' key, got keys: {list(val.keys())}\"\n            )\n    return cast(str, val)\n\n\nclass KeyValueStore:\n    # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in\n    # It's read from the global thread level variable\n    @abc.abstractmethod\n    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def delete(self, key: str) -> None:\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/key_value_store/store.py",
    "content": "import json\nfrom typing import cast\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import KVStore\nfrom onyx.key_value_store.interface import KeyValueStore\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.special_types import JSON_ro\n\n\nlogger = setup_logger()\n\n\nREDIS_KEY_PREFIX = \"onyx_kv_store:\"\nKV_REDIS_KEY_EXPIRATION = 60 * 60 * 24  # 1 Day\n\n\nclass PgRedisKVStore(KeyValueStore):\n    def __init__(self, cache: CacheBackend | None = None) -> None:\n        self._cache = cache\n\n    def _get_cache(self) -> CacheBackend:\n        if self._cache is None:\n            from onyx.cache.factory import get_cache_backend\n\n            self._cache = get_cache_backend()\n        return self._cache\n\n    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:\n        # Not encrypted in Cache backend (typically Redis), but encrypted in Postgres\n        try:\n            self._get_cache().set(\n                REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION\n            )\n        except Exception as e:\n            # Fallback gracefully to Postgres if Cache backend fails\n            logger.error(\n                f\"Failed to set value in Cache backend for key '{key}': {str(e)}\"\n            )\n\n        encrypted_val = val if encrypt else None\n        plain_val = val if not encrypt else None\n        with get_session_with_current_tenant() as db_session:\n            obj = db_session.query(KVStore).filter_by(key=key).first()\n            if obj:\n                obj.value = plain_val\n                obj.encrypted_value = encrypted_val  # type: ignore[assignment]\n            else:\n                obj = KVStore(key=key, value=plain_val, encrypted_value=encrypted_val)\n                db_session.query(KVStore).filter_by(key=key).delete()  # just in case\n                db_session.add(obj)\n            db_session.commit()\n\n    def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:\n        if not refresh_cache:\n            try:\n                cached = self._get_cache().get(REDIS_KEY_PREFIX + key)\n                if cached is not None:\n                    return json.loads(cached.decode(\"utf-8\"))\n            except Exception as e:\n                logger.error(\n                    f\"Failed to get value from cache for key '{key}': {str(e)}\"\n                )\n\n        with get_session_with_current_tenant() as db_session:\n            obj = db_session.query(KVStore).filter_by(key=key).first()\n            if not obj:\n                raise KvKeyNotFoundError\n\n            if obj.value is not None:\n                value = obj.value\n            elif obj.encrypted_value is not None:\n                # Unwrap SensitiveValue - this is internal backend use\n                value = obj.encrypted_value.get_value(apply_mask=False)\n            else:\n                value = None\n\n            try:\n                self._get_cache().set(\n                    REDIS_KEY_PREFIX + key,\n                    json.dumps(value),\n                    ex=KV_REDIS_KEY_EXPIRATION,\n                )\n            except Exception as e:\n                logger.error(f\"Failed to set value in cache for key '{key}': {str(e)}\")\n\n            return cast(JSON_ro, value)\n\n    def delete(self, key: str) -> None:\n        try:\n            self._get_cache().delete(REDIS_KEY_PREFIX + key)\n        except Exception as e:\n            logger.error(f\"Failed to delete value from cache for key '{key}': {str(e)}\")\n\n        with get_session_with_current_tenant() as db_session:\n            result = db_session.query(KVStore).filter_by(key=key).delete()\n            if result == 0:\n                raise KvKeyNotFoundError\n            db_session.commit()\n"
  },
  {
    "path": "backend/onyx/kg/clustering/clustering.py",
    "content": "import time\nfrom collections.abc import Generator\nfrom typing import cast\n\nfrom rapidfuzz.fuzz import ratio\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy import func\nfrom sqlalchemy import text\n\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.kg_configs import KG_CLUSTERING_RETRIEVE_THRESHOLD\nfrom onyx.configs.kg_configs import KG_CLUSTERING_THRESHOLD\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.entities import KGEntity\nfrom onyx.db.entities import KGEntityExtractionStaging\nfrom onyx.db.entities import merge_entities\nfrom onyx.db.entities import transfer_entity\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import validate_kg_settings\nfrom onyx.db.models import Document\nfrom onyx.db.models import KGEntityType\nfrom onyx.db.models import KGRelationshipExtractionStaging\nfrom onyx.db.models import KGRelationshipTypeExtractionStaging\nfrom onyx.db.relationships import transfer_relationship\nfrom onyx.db.relationships import transfer_relationship_type\nfrom onyx.db.relationships import upsert_relationship\nfrom onyx.db.relationships import upsert_relationship_type\nfrom onyx.document_index.vespa.kg_interactions import (\n    get_kg_vespa_info_update_requests_for_document,\n)\nfrom onyx.document_index.vespa.kg_interactions import update_kg_chunks_vespa_info\nfrom onyx.kg.models import KGGroundingType\nfrom onyx.kg.utils.formatting_utils import make_relationship_id\nfrom onyx.kg.utils.lock_utils import extend_lock\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nlogger = setup_logger()\n\n\ndef _get_batch_untransferred_grounded_entities(\n    batch_size: int,\n) -> Generator[list[KGEntityExtractionStaging], None, None]:\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            batch = (\n                db_session.query(KGEntityExtractionStaging)\n                .join(\n                    KGEntityType,\n                    KGEntityExtractionStaging.entity_type_id_name\n                    == KGEntityType.id_name,\n                )\n                .filter(\n                    KGEntityType.grounding == KGGroundingType.GROUNDED,\n                    KGEntityExtractionStaging.transferred_id_name.is_(None),\n                )\n                .limit(batch_size)\n                .all()\n            )\n            if not batch:\n                break\n            yield batch\n\n\ndef _get_batch_untransferred_relationship_types(\n    batch_size: int,\n) -> Generator[list[KGRelationshipTypeExtractionStaging], None, None]:\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            batch = (\n                db_session.query(KGRelationshipTypeExtractionStaging)\n                .filter(KGRelationshipTypeExtractionStaging.transferred.is_(False))\n                .limit(batch_size)\n                .all()\n            )\n            if not batch:\n                break\n            yield batch\n\n\ndef _get_batch_untransferred_relationships(\n    batch_size: int,\n) -> Generator[list[KGRelationshipExtractionStaging], None, None]:\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            batch = (\n                db_session.query(KGRelationshipExtractionStaging)\n                .filter(KGRelationshipExtractionStaging.transferred.is_(False))\n                .limit(batch_size)\n                .all()\n            )\n            if not batch:\n                break\n            yield batch\n\n\ndef _get_batch_entities_with_parent(\n    batch_size: int,\n) -> Generator[list[KGEntityExtractionStaging], None, None]:\n    offset = 0\n\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            batch = (\n                db_session.query(KGEntityExtractionStaging)\n                .filter(KGEntityExtractionStaging.parent_key.isnot(None))\n                .order_by(KGEntityExtractionStaging.id_name)\n                .offset(offset)\n                .limit(batch_size)\n                .all()\n            )\n            if not batch:\n                break\n            # we can't filter out \"\"s earlier as it will mess up the pagination\n            yield [entity for entity in batch if entity.parent_key != \"\"]\n            offset += batch_size\n\n\ndef _get_batch_kg_processed_documents(\n    batch_size: int,\n) -> Generator[list[Document], None, None]:\n    offset = 0\n\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            batch = (\n                db_session.query(Document)\n                .join(\n                    KGEntityExtractionStaging,\n                    Document.id == KGEntityExtractionStaging.document_id,\n                )\n                .filter(\n                    KGEntityExtractionStaging.transferred_id_name.is_not(None),\n                )\n                .order_by(Document.id)\n                .offset(offset)\n                .limit(batch_size)\n                .all()\n            )\n            if not batch:\n                break\n            yield batch\n            offset += batch_size\n\n\ndef _cluster_one_grounded_entity(\n    entity: KGEntityExtractionStaging,\n) -> tuple[KGEntity, bool]:\n    \"\"\"\n    Cluster a single grounded entity.\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        # get entity name and filtering conditions\n        if entity.document_id is not None:\n            entity_name = cast(\n                str,\n                db_session.query(Document.semantic_id)\n                .filter(Document.id == entity.document_id)\n                .scalar(),\n            ).lower()\n            filtering = [KGEntity.document_id.is_(None)]\n        else:\n            entity_name = entity.name.lower()\n            filtering = []\n\n        # skip those with numbers so we don't cluster version1 and version2, etc.\n        similar_entities: list[KGEntity] = []\n        if not any(char.isdigit() for char in entity_name):\n            # find similar entities, uses GIN index, very efficient\n            db_session.execute(\n                text(\n                    \"SET pg_trgm.similarity_threshold = \"\n                    + str(KG_CLUSTERING_RETRIEVE_THRESHOLD)\n                )\n            )\n            similar_entities = (\n                db_session.query(KGEntity)\n                .filter(\n                    # find entities of the same type with a similar name\n                    *filtering,\n                    KGEntity.entity_type_id_name == entity.entity_type_id_name,\n                    getattr(func, POSTGRES_DEFAULT_SCHEMA).similarity_op(\n                        KGEntity.name, entity_name\n                    ),\n                )\n                .all()\n            )\n\n    # find best match\n    best_score = -1.0\n    best_entity = None\n    for similar in similar_entities:\n        # skip those with numbers so we don't cluster version1 and version2, etc.\n        if any(char.isdigit() for char in similar.name):\n            continue\n        score = ratio(similar.name, entity_name)\n        if score >= KG_CLUSTERING_THRESHOLD * 100 and score > best_score:\n            best_score = score\n            best_entity = similar\n\n    # if there is a match, update the entity, otherwise create a new one\n    with get_session_with_current_tenant() as db_session:\n        if best_entity:\n            logger.debug(f\"Merged {entity.name} with {best_entity.name}\")\n            update_vespa = (\n                best_entity.document_id is None and entity.document_id is not None\n            )\n            transferred_entity = merge_entities(\n                db_session=db_session, parent=best_entity, child=entity\n            )\n        else:\n            update_vespa = entity.document_id is not None\n            transferred_entity = transfer_entity(db_session=db_session, entity=entity)\n\n        db_session.commit()\n\n    return transferred_entity, update_vespa\n\n\ndef _create_one_parent_child_relationship(entity: KGEntityExtractionStaging) -> None:\n    \"\"\"\n    Creates a relationship between the entity and its parent, if it exists.\n    Then, updates the entity's parent to the next ancestor.\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        # find the next ancestor\n        parent = (\n            db_session.query(KGEntity)\n            .filter(KGEntity.entity_key == entity.parent_key)\n            .first()\n        )\n\n        if parent is not None:\n            # create parent child relationship and relationship type\n            upsert_relationship_type(\n                db_session=db_session,\n                source_entity_type=parent.entity_type_id_name,\n                relationship_type=\"has_subcomponent\",\n                target_entity_type=entity.entity_type_id_name,\n            )\n            relationship_id_name = make_relationship_id(\n                parent.id_name,\n                \"has_subcomponent\",\n                cast(str, entity.transferred_id_name),\n            )\n            upsert_relationship(\n                db_session=db_session,\n                relationship_id_name=relationship_id_name,\n                source_document_id=entity.document_id,\n            )\n\n            next_ancestor = parent.parent_key or \"\"\n        else:\n            next_ancestor = \"\"\n\n        # set the staging entity's parent to the next ancestor\n        # if there is no parent or next ancestor, set to \"\" to differentiate from None\n        # None will mess up the pagination in _get_batch_entities_with_parent\n        db_session.query(KGEntityExtractionStaging).filter(\n            KGEntityExtractionStaging.id_name == entity.id_name\n        ).update({\"parent_key\": next_ancestor})\n        db_session.commit()\n\n\ndef _transfer_one_relationship(\n    relationship: KGRelationshipExtractionStaging,\n) -> None:\n    with get_session_with_current_tenant() as db_session:\n        # get the translations\n        staging_entity_id_names = {\n            relationship.source_node,\n            relationship.target_node,\n        }\n        entity_translations: dict[str, str] = {\n            entity.id_name: entity.transferred_id_name\n            for entity in db_session.query(KGEntityExtractionStaging)\n            .filter(KGEntityExtractionStaging.id_name.in_(staging_entity_id_names))\n            .all()\n            if entity.transferred_id_name is not None\n        }\n        if len(entity_translations) != len(staging_entity_id_names):\n            logger.error(\n                f\"Missing entity translations for {staging_entity_id_names - entity_translations.keys()}\"\n            )\n            return\n\n        # transfer the relationship\n        transfer_relationship(\n            db_session=db_session,\n            relationship=relationship,\n            entity_translations=entity_translations,\n        )\n        db_session.commit()\n\n\ndef kg_clustering(\n    tenant_id: str,\n    index_name: str,\n    lock: RedisLock,\n    processing_chunk_batch_size: int = 16,\n) -> None:\n    \"\"\"\n    Here we will cluster the extractions based on their cluster frameworks.\n    Initially, this will only focus on grounded entities with pre-determined\n    relationships, so 'clustering' is actually not yet required.\n    However, we may need to reconcile entities coming from different sources.\n\n    The primary purpose of this function is to populate the actual KG tables\n    from the temp_extraction tables.\n\n    This will change with deep extraction, where grounded-sourceless entities\n    can be extracted and then need to be clustered.\n    \"\"\"\n    logger.info(f\"Starting kg clustering for tenant {tenant_id}\")\n\n    kg_config_settings = get_kg_config_settings()\n    validate_kg_settings(kg_config_settings)\n\n    last_lock_time = time.monotonic()\n\n    # Cluster and transfer grounded entities sequentially\n    start_time = time.monotonic()\n    i_batch = 0\n    for i_batch, untransferred_grounded_entities in enumerate(\n        _get_batch_untransferred_grounded_entities(\n            batch_size=processing_chunk_batch_size\n        )\n    ):\n        for entity in untransferred_grounded_entities:\n            _cluster_one_grounded_entity(entity)\n        last_lock_time = extend_lock(\n            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n        )\n        # logger.debug(f\"Transferred entities batch {i}\")\n    # NOTE: we assume every entity is transferred, as we currently only have grounded entities\n    time_delta = time.monotonic() - start_time\n    logger.info(\n        f\"Finished transferring {i_batch + 1} entity batches in {time_delta:.2f}s\"\n    )\n\n    # Create parent-child relationships in parallel\n    for _ in range(kg_config_settings.KG_MAX_PARENT_RECURSION_DEPTH):\n        for root_entities in _get_batch_entities_with_parent(\n            batch_size=processing_chunk_batch_size\n        ):\n            run_functions_tuples_in_parallel(\n                [\n                    (_create_one_parent_child_relationship, (root_entity,))\n                    for root_entity in root_entities\n                ]\n            )\n            last_lock_time = extend_lock(\n                lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n            )\n    logger.info(\"Finished creating all parent-child relationships\")\n\n    # Transfer the relationship types (no need to do in parallel as there's only a few)\n    start_time = time.monotonic()\n    i_batch = 0\n    for i_batch, relationship_types in enumerate(\n        _get_batch_untransferred_relationship_types(\n            batch_size=processing_chunk_batch_size\n        )\n    ):\n        with get_session_with_current_tenant() as db_session:\n            for relationship_type in relationship_types:\n                transfer_relationship_type(db_session, relationship_type)\n            db_session.commit()\n        last_lock_time = extend_lock(\n            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n        )\n        # logger.debug(f\"Transferred relationship types batch {i}\")\n    time_delta = time.monotonic() - start_time\n    logger.info(\n        f\"Finished transferring {i_batch + 1} relationship type batches in {time_delta:.2f}s\"\n    )\n\n    # Transfer the relationships in parallel\n    start_time = time.monotonic()\n    i_batch = 0\n    for i_batch, relationships in enumerate(\n        _get_batch_untransferred_relationships(batch_size=processing_chunk_batch_size)\n    ):\n        run_functions_tuples_in_parallel(\n            [\n                (_transfer_one_relationship, (relationship,))\n                for relationship in relationships\n            ]\n        )\n        last_lock_time = extend_lock(\n            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n        )\n        # logger.debug(f\"Transferred relationships batch {i}\")\n    time_delta = time.monotonic() - start_time\n    logger.info(\n        f\"Finished transferring {i_batch + 1} relationship batches in {time_delta:.2f}s\"\n    )\n\n    # Update vespa for each document\n    start_time = time.monotonic()\n    i_batch = 0\n    for i_batch, documents in enumerate(\n        _get_batch_kg_processed_documents(batch_size=processing_chunk_batch_size)\n    ):\n        batch_update_requests = run_functions_tuples_in_parallel(\n            [\n                (get_kg_vespa_info_update_requests_for_document, (document.id,))\n                for document in documents\n            ]\n        )\n        for update_requests, document in zip(batch_update_requests, documents):\n            try:\n                update_kg_chunks_vespa_info(update_requests, index_name, tenant_id)\n            except Exception as e:\n                logger.error(f\"Error updating vespa for document {document.id}: {e}\")\n        last_lock_time = extend_lock(\n            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n        )\n        # logger.debug(f\"Updated vespa for documents batch {i}\")\n    time_delta = time.monotonic() - start_time\n    logger.info(\n        f\"Finished updating {i_batch + 1} document batches in {time_delta:.2f}s\"\n    )\n\n    # Delete the transferred objects from the staging tables\n    try:\n        with get_session_with_current_tenant() as db_session:\n            db_session.query(KGRelationshipExtractionStaging).filter(\n                KGRelationshipExtractionStaging.transferred.is_(True)\n            ).delete(synchronize_session=False)\n            db_session.commit()\n    except Exception as e:\n        logger.error(f\"Error deleting relationships: {e}\")\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            db_session.query(KGRelationshipTypeExtractionStaging).filter(\n                KGRelationshipTypeExtractionStaging.transferred.is_(True)\n            ).delete(synchronize_session=False)\n            db_session.commit()\n    except Exception as e:\n        logger.error(f\"Error deleting relationship types: {e}\")\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            db_session.query(KGEntityExtractionStaging).filter(\n                KGEntityExtractionStaging.transferred_id_name.is_not(None)\n            ).delete(synchronize_session=False)\n            db_session.commit()\n    except Exception as e:\n        logger.error(f\"Error deleting entities: {e}\")\n    logger.info(\"Finished deleting all transferred staging entries\")\n"
  },
  {
    "path": "backend/onyx/kg/clustering/normalizations.py",
    "content": "import re\nfrom collections import defaultdict\nfrom typing import cast\n\nimport numpy as np\nfrom rapidfuzz.distance.DamerauLevenshtein import normalized_similarity\nfrom sqlalchemy import desc\nfrom sqlalchemy import Float\nfrom sqlalchemy import func\nfrom sqlalchemy import MetaData\nfrom sqlalchemy import select\nfrom sqlalchemy import String\nfrom sqlalchemy import Table\nfrom sqlalchemy.dialects.postgresql import ARRAY\n\nfrom onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT\nfrom onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_NGRAM_WEIGHTS\nfrom onyx.configs.kg_configs import KG_NORMALIZATION_RERANK_THRESHOLD\nfrom onyx.configs.kg_configs import KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import KGEntity\nfrom onyx.db.relationships import get_relationships_for_entity_type_pairs\nfrom onyx.kg.models import NormalizedEntities\nfrom onyx.kg.models import NormalizedRelationships\nfrom onyx.kg.utils.embeddings import encode_string_batch\nfrom onyx.kg.utils.formatting_utils import format_entity_id_for_models\nfrom onyx.kg.utils.formatting_utils import get_attributes\nfrom onyx.kg.utils.formatting_utils import get_entity_type\nfrom onyx.kg.utils.formatting_utils import make_entity_w_attributes\nfrom onyx.kg.utils.formatting_utils import make_relationship_id\nfrom onyx.kg.utils.formatting_utils import split_entity_id\nfrom onyx.kg.utils.formatting_utils import split_relationship_id\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nlogger = setup_logger()\n\n\nalphanum_regex = re.compile(r\"[^a-z0-9]+\")\nrem_email_regex = re.compile(r\"(?<=\\S)@([a-z0-9-]+)\\.([a-z]{2,6})$\")\n\n\ndef _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:\n    \"\"\"Generate n-grams from a sequence.\"\"\"\n    return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]\n\n\ndef _clean_name(entity_name: str) -> str:\n    \"\"\"\n    Clean an entity string by removing non-alphanumeric characters and email addresses.\n    If the name after cleaning is empty, return the original name in lowercase.\n    \"\"\"\n    cleaned_entity = entity_name.casefold()\n    return (\n        alphanum_regex.sub(\"\", rem_email_regex.sub(\"\", cleaned_entity))\n        or cleaned_entity\n    )\n\n\ndef _normalize_one_entity(\n    entity: str,\n    attributes: dict[str, str],\n    allowed_docs_temp_view_name: str | None = None,\n) -> str | None:\n    \"\"\"\n    Matches a single entity to the best matching entity of the same type.\n    \"\"\"\n    entity_type, entity_name = split_entity_id(entity)\n    if entity_name == \"*\":\n        return entity\n\n    cleaned_entity = _clean_name(entity_name)\n\n    # narrow filter to subtype if requested\n    type_filters = [KGEntity.entity_type_id_name == entity_type]\n    if \"subtype\" in attributes:\n        type_filters.append(\n            KGEntity.attributes.op(\"@>\")({\"subtype\": attributes[\"subtype\"]})\n        )\n\n    # step 1: find entities containing the entity_name or something similar\n    with get_session_with_current_tenant() as db_session:\n        # get allowed documents\n        metadata = MetaData()\n        if allowed_docs_temp_view_name is None:\n            raise ValueError(\"allowed_docs_temp_view_name is not available\")\n\n        effective_schema_allowed_docs_temp_view_name = (\n            allowed_docs_temp_view_name.split(\".\")[-1]\n        )\n\n        allowed_docs_temp_view = Table(\n            effective_schema_allowed_docs_temp_view_name,\n            metadata,\n            autoload_with=db_session.get_bind(),\n        )\n\n        # generate trigrams of the queried entity Q\n        query_trigrams = db_session.query(\n            getattr(func, POSTGRES_DEFAULT_SCHEMA)\n            .show_trgm(cleaned_entity)\n            .cast(ARRAY(String(3)))\n            .label(\"trigrams\")\n        ).cte(\"query\")\n\n        candidates = cast(\n            list[tuple[str, str, float]],\n            db_session.query(\n                KGEntity.id_name,\n                KGEntity.name,\n                (\n                    # for each entity E, compute score = | Q ∩ E | / min(|Q|, |E|)\n                    func.cardinality(\n                        func.array(\n                            select(func.unnest(KGEntity.name_trigrams))\n                            .correlate(KGEntity)\n                            .intersect(\n                                select(\n                                    func.unnest(query_trigrams.c.trigrams)\n                                ).correlate(query_trigrams)\n                            )\n                            .scalar_subquery()\n                        )\n                    ).cast(Float)\n                    / func.least(\n                        func.cardinality(query_trigrams.c.trigrams),\n                        func.cardinality(KGEntity.name_trigrams),\n                    )\n                ).label(\"score\"),\n            )\n            .select_from(KGEntity, query_trigrams)\n            .outerjoin(\n                allowed_docs_temp_view,\n                KGEntity.document_id == allowed_docs_temp_view.c.allowed_doc_id,\n            )\n            .filter(\n                *type_filters,\n                KGEntity.name_trigrams.overlap(query_trigrams.c.trigrams),\n                # Add filter for allowed docs - either document_id is NULL or it's in allowed_docs\n                (\n                    KGEntity.document_id.is_(None)\n                    | allowed_docs_temp_view.c.allowed_doc_id.isnot(None)\n                ),\n            )\n            .order_by(desc(\"score\"))\n            .limit(KG_NORMALIZATION_RETRIEVE_ENTITIES_LIMIT)\n            .all(),\n        )\n    if not candidates:\n        return None\n\n    # step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank\n    n1, n2, n3 = (\n        set(_ngrams(cleaned_entity, 1)),\n        set(_ngrams(cleaned_entity, 2)),\n        set(_ngrams(cleaned_entity, 3)),\n    )\n    for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):\n        cleaned_candidate = _clean_name(candidate_name)\n        h_n1, h_n2, h_n3 = (\n            set(_ngrams(cleaned_candidate, 1)),\n            set(_ngrams(cleaned_candidate, 2)),\n            set(_ngrams(cleaned_candidate, 3)),\n        )\n\n        # compute ngram overlap, renormalize scores if the names are too short for larger ngrams\n        grams_used = min(2, len(cleaned_entity) - 1, len(cleaned_candidate) - 1)\n        W_n1, W_n2, W_n3 = KG_NORMALIZATION_RERANK_NGRAM_WEIGHTS\n        ngram_score = (\n            # compute | Q ∩ E | / min(|Q|, |E|) for unigrams and bigrams (trigrams already computed)\n            W_n1 * len(n1 & h_n1) / max(1, min(len(n1), len(h_n1)))\n            + W_n2 * len(n2 & h_n2) / max(1, min(len(n2), len(h_n2)))\n            + W_n3 * len(n3 & h_n3) / max(1, min(len(n3), len(h_n3)))\n        ) / (W_n1, W_n1 + W_n2, 1.0)[grams_used]\n\n        # compute damerau levenshtein distance to fuzzy match against typos\n        W_leven = KG_NORMALIZATION_RERANK_LEVENSHTEIN_WEIGHT\n        leven_score = normalized_similarity(cleaned_entity, cleaned_candidate)\n\n        # combine scores\n        score = (1.0 - W_leven) * ngram_score + W_leven * leven_score\n        candidates[i] = (candidate_id_name, candidate_name, score)\n    candidates = list(\n        sorted(\n            filter(lambda x: x[2] > KG_NORMALIZATION_RERANK_THRESHOLD, candidates),\n            key=lambda x: x[2],\n            reverse=True,\n        )\n    )\n    if not candidates:\n        return None\n\n    return candidates[0][0]\n\n\ndef _get_existing_normalized_relationships(\n    raw_relationships: list[str],\n) -> dict[str, dict[str, list[str]]]:\n    \"\"\"\n    Get existing normalized relationships from the database.\n    \"\"\"\n\n    relationship_type_map: dict[str, dict[str, list[str]]] = defaultdict(\n        lambda: defaultdict(list)\n    )\n    relationship_pairs = list(\n        {\n            (\n                get_entity_type(split_relationship_id(relationship)[0]),\n                get_entity_type(split_relationship_id(relationship)[2]),\n            )\n            for relationship in raw_relationships\n        }\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        relationships = get_relationships_for_entity_type_pairs(\n            db_session, relationship_pairs\n        )\n\n    for relationship in relationships:\n        relationship_type_map[relationship.source_entity_type_id_name][\n            relationship.target_entity_type_id_name\n        ].append(relationship.id_name)\n\n    return relationship_type_map\n\n\ndef normalize_entities(\n    raw_entities: list[str],\n    raw_entities_w_attributes: list[str],\n    allowed_docs_temp_view_name: str | None = None,\n) -> NormalizedEntities:\n    \"\"\"\n    Match each entity against a list of normalized entities using fuzzy matching.\n    Returns the best matching normalized entity for each input entity.\n\n    Args:\n        raw_entities: list of entity strings to normalize, w/o attributes\n        raw_entities_w_attributes: list of entity strings to normalize, w/ attributes\n\n    Returns:\n        list of normalized entity strings\n    \"\"\"\n    normalized_entities: list[str] = []\n    normalized_entities_w_attributes: list[str] = []\n    normalized_map: dict[str, str] = {}\n\n    entity_attributes = [\n        get_attributes(attr_entity) for attr_entity in raw_entities_w_attributes\n    ]\n\n    mapping: list[str | None] = run_functions_tuples_in_parallel(\n        [\n            (_normalize_one_entity, (entity, attributes, allowed_docs_temp_view_name))\n            for entity, attributes in zip(raw_entities, entity_attributes)\n        ]\n    )\n    for entity, attributes, normalized_entity in zip(\n        raw_entities, entity_attributes, mapping\n    ):\n        if normalized_entity is not None:\n            normalized_entities.append(normalized_entity)\n            normalized_entities_w_attributes.append(\n                make_entity_w_attributes(normalized_entity, attributes)\n            )\n            normalized_map[entity] = format_entity_id_for_models(normalized_entity)\n        else:\n            logger.warning(f\"No normalized entity found for {entity}\")\n            normalized_map[entity] = format_entity_id_for_models(entity)\n\n    return NormalizedEntities(\n        entities=normalized_entities,\n        entities_w_attributes=normalized_entities_w_attributes,\n        entity_normalization_map=normalized_map,\n    )\n\n\ndef normalize_relationships(\n    raw_relationships: list[str], entity_normalization_map: dict[str, str]\n) -> NormalizedRelationships:\n    \"\"\"\n    Normalize relationships using entity mappings and relationship string matching.\n\n    Args:\n        relationships: list of relationships in format \"source__relation__target\"\n        entity_normalization_map: Mapping of raw entities to normalized ones (or None)\n\n    Returns:\n        NormalizedRelationships containing normalized relationships and mapping\n    \"\"\"\n    # Placeholder for normalized relationship structure\n    nor_relationships = _get_existing_normalized_relationships(raw_relationships)\n\n    normalized_rels: list[str] = []\n    normalization_map: dict[str, str] = {}\n\n    for raw_rel in raw_relationships:\n        # 1. Split and normalize entities\n        try:\n            source, rel_string, target = split_relationship_id(raw_rel)\n        except ValueError:\n            raise ValueError(f\"Invalid relationship format: {raw_rel}\")\n\n        # Check if entities are in normalization map and not None\n        norm_source = entity_normalization_map.get(source)\n        norm_target = entity_normalization_map.get(target)\n\n        if norm_source is None or norm_target is None:\n            logger.warning(f\"No normalized entities found for {raw_rel}\")\n            continue\n\n        # 2. Find candidate normalized relationships\n        candidate_rels = []\n        norm_source_type = get_entity_type(format_entity_id_for_models(norm_source))\n        norm_target_type = get_entity_type(format_entity_id_for_models(norm_target))\n        if (\n            norm_source_type in nor_relationships\n            and norm_target_type in nor_relationships[norm_source_type]\n        ):\n            candidate_rels = [\n                split_relationship_id(rel)[1]\n                for rel in nor_relationships[norm_source_type][norm_target_type]\n            ]\n\n        if not candidate_rels:\n            logger.warning(f\"No candidate relationships found for {raw_rel}\")\n            continue\n\n        # 3. Encode and find best match\n        strings_to_encode = [rel_string] + candidate_rels\n        vectors = encode_string_batch(strings_to_encode)\n\n        # Get raw relation vector and candidate vectors\n        raw_vector = vectors[0]\n        candidate_vectors = vectors[1:]\n\n        # Calculate dot products\n        dot_products = np.dot(candidate_vectors, raw_vector)\n        best_match_idx = np.argmax(dot_products)\n\n        # Create normalized relationship\n        norm_rel = make_relationship_id(\n            norm_source, candidate_rels[best_match_idx], norm_target\n        )\n        normalized_rels.append(norm_rel)\n        normalization_map[raw_rel] = norm_rel\n\n    return NormalizedRelationships(\n        relationships=normalized_rels, relationship_normalization_map=normalization_map\n    )\n"
  },
  {
    "path": "backend/onyx/kg/extractions/extraction_processing.py",
    "content": "import time\nfrom typing import Any\n\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.db.connector import get_kg_enabled_connectors\nfrom onyx.db.document import get_document_updated_at\nfrom onyx.db.document import get_skipped_kg_documents\nfrom onyx.db.document import get_unprocessed_kg_document_batch_for_connector\nfrom onyx.db.document import update_document_kg_info\nfrom onyx.db.document import update_document_kg_stage\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.entities import delete_from_kg_entities__no_commit\nfrom onyx.db.entities import upsert_staging_entity\nfrom onyx.db.entity_type import get_entity_types\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import validate_kg_settings\nfrom onyx.db.models import Document\nfrom onyx.db.models import KGStage\nfrom onyx.db.relationships import delete_from_kg_relationships__no_commit\nfrom onyx.db.relationships import upsert_staging_relationship\nfrom onyx.db.relationships import upsert_staging_relationship_type\nfrom onyx.kg.models import KGClassificationInstructions\nfrom onyx.kg.models import KGDocumentDeepExtractionResults\nfrom onyx.kg.models import KGEnhancedDocumentMetadata\nfrom onyx.kg.models import KGEntityTypeInstructions\nfrom onyx.kg.models import KGExtractionInstructions\nfrom onyx.kg.models import KGImpliedExtractionResults\nfrom onyx.kg.utils.extraction_utils import EntityTypeMetadataTracker\nfrom onyx.kg.utils.extraction_utils import (\n    get_batch_documents_metadata,\n)\nfrom onyx.kg.utils.extraction_utils import kg_deep_extraction\nfrom onyx.kg.utils.extraction_utils import (\n    kg_implied_extraction,\n)\nfrom onyx.kg.utils.formatting_utils import extract_relationship_type_id\nfrom onyx.kg.utils.formatting_utils import get_entity_type\nfrom onyx.kg.utils.formatting_utils import split_entity_id\nfrom onyx.kg.utils.formatting_utils import split_relationship_id\nfrom onyx.kg.utils.lock_utils import extend_lock\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\n\ndef _get_classification_extraction_instructions() -> (\n    dict[str | None, dict[str, KGEntityTypeInstructions]]\n):\n    \"\"\"\n    Prepare the classification instructions for the given source.\n    \"\"\"\n\n    classification_instructions_dict: dict[\n        str | None, dict[str, KGEntityTypeInstructions]\n    ] = {}\n\n    with get_session_with_current_tenant() as db_session:\n        entity_types = get_entity_types(db_session, active=True)\n\n    for entity_type in entity_types:\n        grounded_source_name = entity_type.grounded_source_name\n\n        if grounded_source_name not in classification_instructions_dict:\n            classification_instructions_dict[grounded_source_name] = {}\n\n        if grounded_source_name is None:\n            continue\n\n        attributes = entity_type.parsed_attributes\n        classification_attributes = {\n            option: info\n            for option, info in attributes.classification_attributes.items()\n            if info.extraction\n        }\n        classification_options = \", \".join(classification_attributes.keys())\n        classification_enabled = (\n            len(classification_options) > 0 and len(classification_attributes) > 0\n        )\n\n        classification_instructions_dict[grounded_source_name][entity_type.id_name] = (\n            KGEntityTypeInstructions(\n                metadata_attribute_conversion=attributes.metadata_attribute_conversion,\n                classification_instructions=KGClassificationInstructions(\n                    classification_enabled=classification_enabled,\n                    classification_options=classification_options,\n                    classification_class_definitions=classification_attributes,\n                ),\n                extraction_instructions=KGExtractionInstructions(\n                    deep_extraction=entity_type.deep_extraction,\n                    active=entity_type.active,\n                ),\n                entity_filter_attributes=attributes.entity_filter_attributes,\n            )\n        )\n\n    return classification_instructions_dict\n\n\ndef _get_batch_documents_enhanced_metadata(\n    unprocessed_document_batch: list[Document],\n    source_type_classification_extraction_instructions: dict[\n        str, KGEntityTypeInstructions\n    ],\n    connector_source: str,\n) -> dict[str, KGEnhancedDocumentMetadata]:\n    \"\"\"\n    Get the entity types for the given unprocessed documents.\n    \"\"\"\n\n    kg_document_meta_data_dict: dict[str, KGEnhancedDocumentMetadata] = {\n        document.id: KGEnhancedDocumentMetadata(\n            entity_type=None,\n            metadata_attribute_conversion=None,\n            document_metadata=None,\n            deep_extraction=False,\n            classification_enabled=False,\n            classification_instructions=None,\n            skip=True,\n        )\n        for document in unprocessed_document_batch\n    }\n\n    batch_entity = None\n    if len(source_type_classification_extraction_instructions) == 1:\n        # if source only has one entity type, the document must be of that type\n        batch_entity = list(source_type_classification_extraction_instructions.keys())[\n            0\n        ]\n\n    # the documents can be of multiple entity types. We need to identify the entity type for each document\n    batch_metadata = get_batch_documents_metadata(\n        [\n            unprocessed_document.id\n            for unprocessed_document in unprocessed_document_batch\n        ],\n        connector_source,\n    )\n\n    for metadata in batch_metadata:\n        document_id = metadata.document_id\n        doc_entity = None\n\n        if not isinstance(document_id, str):\n            continue\n\n        chunk_metadata = metadata.source_metadata\n\n        if batch_entity:\n            doc_entity = batch_entity\n        else:\n            # TODO: make this a helper function\n            if not chunk_metadata:\n                continue\n\n            for (\n                potential_entity_type\n            ) in source_type_classification_extraction_instructions.keys():\n                potential_entity_type_attribute_filters = (\n                    source_type_classification_extraction_instructions[\n                        potential_entity_type\n                    ].entity_filter_attributes\n                    or {}\n                )\n\n                if not potential_entity_type_attribute_filters:\n                    continue\n\n                if all(\n                    chunk_metadata.get(attribute)\n                    == potential_entity_type_attribute_filters.get(attribute)\n                    for attribute in potential_entity_type_attribute_filters\n                ):\n                    doc_entity = potential_entity_type\n                    break\n\n        if doc_entity is None:\n            continue\n\n        entity_instructions = source_type_classification_extraction_instructions[\n            doc_entity\n        ]\n\n        kg_document_meta_data_dict[document_id] = KGEnhancedDocumentMetadata(\n            entity_type=doc_entity,\n            metadata_attribute_conversion=(\n                source_type_classification_extraction_instructions[\n                    doc_entity\n                ].metadata_attribute_conversion\n            ),\n            document_metadata=chunk_metadata,\n            deep_extraction=entity_instructions.extraction_instructions.deep_extraction,\n            classification_enabled=entity_instructions.classification_instructions.classification_enabled,\n            classification_instructions=entity_instructions.classification_instructions,\n            skip=False,\n        )\n\n    return kg_document_meta_data_dict\n\n\ndef kg_extraction(\n    tenant_id: str,\n    index_name: str,\n    lock: RedisLock,\n    processing_chunk_batch_size: int = 8,\n) -> None:\n    \"\"\"\n    This extraction will try to extract from all chunks that have not been kg-processed yet.\n\n    Approach:\n    - Get all connectors that are enabled for KG extraction\n    - For each enabled connector:\n        - Get unprocessed documents (using a generator)\n        - For each batch of unprocessed documents:\n            - Classify each document to select proper ones\n            - Get and extract from chunks\n            - Update chunks in Vespa\n            - Update temporary KG extraction tables\n            - Update document table to set kg_extracted = True\n    \"\"\"\n\n    logger.info(f\"Starting kg extraction for tenant {tenant_id}\")\n\n    kg_config_settings = get_kg_config_settings()\n    validate_kg_settings(kg_config_settings)\n\n    # get connector ids that are enabled for KG extraction\n    with get_session_with_current_tenant() as db_session:\n        kg_enabled_connectors = get_kg_enabled_connectors(db_session)\n\n    document_classification_extraction_instructions = (\n        _get_classification_extraction_instructions()\n    )\n\n    # get entity type info\n    with get_session_with_current_tenant() as db_session:\n        all_entity_types = get_entity_types(db_session)\n        active_entity_types = {\n            entity_type.id_name\n            for entity_type in get_entity_types(db_session, active=True)\n        }\n\n        # entity_type: (metadata: conversion property)\n        entity_metadata_conversion_instructions = {\n            entity_type.id_name: entity_type.parsed_attributes.metadata_attribute_conversion\n            for entity_type in all_entity_types\n        }\n\n    # Track which metadata attributes are possible for each entity type\n    metadata_tracker = EntityTypeMetadataTracker()\n    metadata_tracker.import_typeinfo()\n\n    last_lock_time = time.monotonic()\n\n    # Iterate over connectors that are enabled for KG extraction\n    for kg_enabled_connector in kg_enabled_connectors:\n        connector_id = kg_enabled_connector.id\n        connector_coverage_days = kg_enabled_connector.kg_coverage_days\n        connector_source = kg_enabled_connector.source\n\n        document_batch_counter = 0\n\n        # iterate over un-kg-processed documents in connector\n        while True:\n            # get a batch of unprocessed documents\n            with get_session_with_current_tenant() as db_session:\n                unprocessed_document_batch = (\n                    get_unprocessed_kg_document_batch_for_connector(\n                        db_session,\n                        connector_id,\n                        kg_coverage_start=kg_config_settings.KG_COVERAGE_START_DATE,\n                        kg_max_coverage_days=connector_coverage_days\n                        or kg_config_settings.KG_MAX_COVERAGE_DAYS,\n                        batch_size=processing_chunk_batch_size,\n                    )\n                )\n\n            if len(unprocessed_document_batch) == 0:\n                logger.info(\n                    f\"No unprocessed documents found for connector {connector_id}. Processed {document_batch_counter} batches.\"\n                )\n                break\n\n            document_batch_counter += 1\n            last_lock_time = extend_lock(\n                lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n            )\n            logger.info(f\"Processing document batch {document_batch_counter}\")\n\n            # Get the document attributes and entity types\n            batch_metadata = _get_batch_documents_enhanced_metadata(\n                unprocessed_document_batch,\n                document_classification_extraction_instructions.get(\n                    connector_source, {}\n                ),\n                connector_source,\n            )\n\n            # mark docs in unprocessed_document_batch as EXTRACTING\n            for unprocessed_document in unprocessed_document_batch:\n                if batch_metadata[unprocessed_document.id].entity_type is None:\n                    # info for after the connector has been processed\n                    kg_stage = KGStage.SKIPPED\n                    logger.debug(\n                        f\"Document {unprocessed_document.id} is not of any entity type\"\n                    )\n                elif batch_metadata[unprocessed_document.id].skip:\n                    # info for after the connector has been processed. But no message as there may be many\n                    # purposefully skipped documents\n                    kg_stage = KGStage.SKIPPED\n                else:\n                    kg_stage = KGStage.EXTRACTING\n\n                with get_session_with_current_tenant() as db_session:\n                    update_document_kg_stage(\n                        db_session,\n                        unprocessed_document.id,\n                        kg_stage,\n                    )\n\n                    if kg_stage == KGStage.EXTRACTING:\n                        delete_from_kg_relationships__no_commit(\n                            db_session, [unprocessed_document.id]\n                        )\n                        delete_from_kg_entities__no_commit(\n                            db_session, [unprocessed_document.id]\n                        )\n                    db_session.commit()\n\n            # Iterate over batches of unprocessed documents\n            # For each document:\n            #   - extract implied entities and relationships\n            #   - if deep extraction is enabled, extract entities and relationships with LLM\n            #   - if deep extraction and classification are enabled, classify document\n            #   - update postgres with\n            #     - extracted entities (with classification) and relationships\n            #     - kg_stage of the processed document\n\n            documents_to_process = [x.id for x in unprocessed_document_batch]\n            batch_implied_extraction: dict[str, KGImpliedExtractionResults] = {}\n            batch_deep_extraction_args: list[\n                tuple[str, KGEnhancedDocumentMetadata, KGImpliedExtractionResults]\n            ] = []\n\n            for unprocessed_document in unprocessed_document_batch:\n                if (\n                    unprocessed_document.id not in documents_to_process\n                    or batch_metadata[unprocessed_document.id].entity_type is None\n                    or batch_metadata[unprocessed_document.id].skip\n                ):\n                    with get_session_with_current_tenant() as db_session:\n                        update_document_kg_stage(\n                            db_session,\n                            unprocessed_document.id,\n                            KGStage.SKIPPED,\n                        )\n                        db_session.commit()\n                    continue\n\n                # 1. perform (implicit) KG 'extractions' on the documents that should be processed\n                # This is really about assigning document meta-data to KG entities/relationships or KG entity attributes\n                # General approach:\n                #    - vendor emails to Employee-type entities + relationship to current primary grounded entity\n                #    - external account emails to Account-type entities + relationship to current primary grounded entity\n                #    - non-email owners to KG current entity's attributes, no relationships\n                # We also collect email addresses of vendors and external accounts to inform chunk processing\n                batch_implied_extraction[unprocessed_document.id] = (\n                    kg_implied_extraction(\n                        unprocessed_document,\n                        batch_metadata[unprocessed_document.id],\n                        active_entity_types,\n                        kg_config_settings,\n                    )\n                )\n\n                # 2. prepare inputs for deep extraction and classification\n                if batch_metadata[unprocessed_document.id].deep_extraction:\n                    batch_deep_extraction_args.append(\n                        (\n                            unprocessed_document.id,\n                            batch_metadata[unprocessed_document.id],\n                            batch_implied_extraction[unprocessed_document.id],\n                        )\n                    )\n\n            # 2. perform deep extraction and classification in parallel\n            batch_deep_extraction_func_calls = [\n                (\n                    kg_deep_extraction,\n                    (\n                        *arg,\n                        tenant_id,\n                        index_name,\n                        kg_config_settings,\n                    ),\n                )\n                for arg in batch_deep_extraction_args\n            ]\n            batch_deep_extractions: dict[str, KGDocumentDeepExtractionResults] = {\n                document_id: result\n                for document_id, result in zip(\n                    documents_to_process,\n                    run_functions_tuples_in_parallel(batch_deep_extraction_func_calls),\n                )\n            }\n\n            # Collect entities and relationships to upsert\n            batch_entities: list[tuple[str | None, str]] = []\n            batch_relationships: list[tuple[str, str]] = []\n            entity_classification: dict[str, str] = {}\n\n            for document_id, implied_metadata in batch_implied_extraction.items():\n                batch_entities += [\n                    (None, entity) for entity in implied_metadata.implied_entities\n                ]\n                batch_entities.append((document_id, implied_metadata.document_entity))\n                batch_relationships += [\n                    (document_id, relationship)\n                    for relationship in implied_metadata.implied_relationships\n                ]\n\n            for document_id, deep_extraction_result in batch_deep_extractions.items():\n                batch_entities += [\n                    (None, entity)\n                    for entity in deep_extraction_result.deep_extracted_entities\n                ]\n                for relationship in deep_extraction_result.deep_extracted_relationships:\n                    source_entity, _, target_entity = split_relationship_id(\n                        relationship\n                    )\n                    if (\n                        source_entity in active_entity_types\n                        and target_entity in active_entity_types\n                    ):\n                        batch_relationships += [(document_id, relationship)]\n\n                classification_result = deep_extraction_result.classification_result\n                if not classification_result:\n                    continue\n                entity_classification[classification_result.document_entity] = (\n                    classification_result.classification_class\n                )\n\n            # Populate the KG database with the extracted entities, relationships, and terms\n            for potential_document_id, entity in batch_entities:\n                # verify the entity is valid\n                parts = split_entity_id(entity)\n                if len(parts) != 2:\n                    logger.error(\n                        f\"Invalid entity {entity} in aggregated_kg_extractions.entities\"\n                    )\n                    continue\n\n                entity_type, entity_name = parts\n                entity_type = entity_type.upper()\n                entity_name = entity_name.capitalize()\n\n                if entity_type not in active_entity_types:\n                    continue\n\n                try:\n                    with get_session_with_current_tenant() as db_session:\n                        entity_attributes: dict[str, Any] = {}\n\n                        if potential_document_id:\n                            entity_attributes = (\n                                batch_metadata[potential_document_id].document_metadata\n                                or {}\n                            )\n\n                        # only keep selected attributes (and translate the attribute names)\n                        metadata_attributes = entity_metadata_conversion_instructions[\n                            entity_type\n                        ]\n                        keep_attributes = {\n                            metadata_attributes[attr_name].name: attr_val\n                            for attr_name, attr_val in entity_attributes.items()\n                            if (\n                                attr_name in metadata_attributes\n                                and metadata_attributes[attr_name].keep\n                            )\n                        }\n\n                        # add the classification result to the attributes\n                        if entity in entity_classification:\n                            keep_attributes[\"classification\"] = entity_classification[\n                                entity\n                            ]\n\n                        event_time = None\n                        if potential_document_id:\n                            event_time = get_document_updated_at(\n                                potential_document_id, db_session\n                            )\n\n                        upserted_entity = upsert_staging_entity(\n                            db_session=db_session,\n                            name=entity_name,\n                            entity_type=entity_type,\n                            document_id=potential_document_id,\n                            occurrences=1,\n                            attributes=keep_attributes,\n                            event_time=event_time,\n                        )\n                        metadata_tracker.track_metadata(\n                            entity_type, upserted_entity.attributes\n                        )\n\n                        db_session.commit()\n                except Exception as e:\n                    logger.error(f\"Error adding entity {entity}. Error message: {e}\")\n\n            for document_id, relationship in batch_relationships:\n                relationship_split = split_relationship_id(relationship)\n\n                if len(relationship_split) != 3:\n                    logger.error(\n                        f\"Invalid relationship {relationship} in aggregated_kg_extractions.relationships\"\n                    )\n                    continue\n\n                source_entity, relationship_type, target_entity = relationship_split\n\n                source_entity_type = get_entity_type(source_entity)\n                target_entity_type = get_entity_type(target_entity)\n\n                if (\n                    source_entity_type not in active_entity_types\n                    or target_entity_type not in active_entity_types\n                ):\n                    continue\n\n                relationship_type_id_name = extract_relationship_type_id(relationship)\n\n                with get_session_with_current_tenant() as db_session:\n                    try:\n                        upsert_staging_relationship_type(\n                            db_session=db_session,\n                            source_entity_type=source_entity_type.upper(),\n                            relationship_type=relationship_type,\n                            target_entity_type=target_entity_type.upper(),\n                            definition=False,\n                            extraction_count=1,\n                        )\n                        db_session.commit()\n                    except Exception as e:\n                        logger.error(\n                            f\"Error adding relationship type {relationship_type_id_name} to the database: {e}\"\n                        )\n\n                    with get_session_with_current_tenant() as db_session:\n                        try:\n                            upsert_staging_relationship(\n                                db_session=db_session,\n                                relationship_id_name=relationship,\n                                source_document_id=document_id,\n                                occurrences=1,\n                            )\n                            db_session.commit()\n                        except Exception as e:\n                            logger.error(\n                                f\"Error adding relationship {relationship} to the database: {e}\"\n                            )\n\n            # Populate the Documents table with the kg information for the documents\n\n            for processed_document in documents_to_process:\n                with get_session_with_current_tenant() as db_session:\n                    update_document_kg_info(\n                        db_session,\n                        processed_document,\n                        KGStage.EXTRACTED,\n                    )\n                    db_session.commit()\n\n        # Update the the Skipped Docs back to Not Started\n        with get_session_with_current_tenant() as db_session:\n            skipped_documents = get_skipped_kg_documents(db_session)\n            for document_id in skipped_documents:\n                update_document_kg_stage(\n                    db_session,\n                    document_id,\n                    KGStage.NOT_STARTED,\n                )\n                db_session.commit()\n\n    metadata_tracker.export_typeinfo()\n"
  },
  {
    "path": "backend/onyx/kg/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.kg_configs import KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH\n\n\n# Note: make sure to write a migration if adding a non-nullable field or removing a field\nclass KGConfigSettings(BaseModel):\n    KG_EXPOSED: bool = False\n    KG_ENABLED: bool = False\n    KG_VENDOR: str | None = None\n    KG_VENDOR_DOMAINS: list[str] = []\n    KG_IGNORE_EMAIL_DOMAINS: list[str] = []\n    KG_COVERAGE_START: str = datetime(1970, 1, 1).strftime(\"%Y-%m-%d\")\n    KG_MAX_COVERAGE_DAYS: int = 10000\n    KG_MAX_PARENT_RECURSION_DEPTH: int = KG_DEFAULT_MAX_PARENT_RECURSION_DEPTH\n    KG_BETA_PERSONA_ID: int | None = None\n\n    @property\n    def KG_COVERAGE_START_DATE(self) -> datetime:\n        return datetime.strptime(self.KG_COVERAGE_START, \"%Y-%m-%d\")\n\n\nclass KGGroundingType(str, Enum):\n    UNGROUNDED = \"ungrounded\"\n    GROUNDED = \"grounded\"\n\n\nclass KGAttributeTrackType(str, Enum):\n    VALUE = \"value\"\n    LIST = \"list\"\n\n\nclass KGAttributeTrackInfo(BaseModel):\n    type: KGAttributeTrackType\n    values: set[str] | None\n\n\nclass KGAttributeEntityOption(str, Enum):\n    FROM_EMAIL = \"from_email\"  # use email to determine type (ACCOUNT or EMPLOYEE)\n\n\nclass KGAttributeImplicationProperty(BaseModel):\n    # type of implied entity to create\n    # if str, will create an implied entity of that type\n    # if KGAttributeEntityOption, will determine the type based on the option\n    implied_entity_type: str | KGAttributeEntityOption\n    # name of the implied relationship to create (from implied entity to this entity)\n    implied_relationship_name: str\n\n\nclass KGAttributeProperty(BaseModel):\n    # name of attribute to map metadata to\n    name: str\n    # whether to keep this attribute in the entity\n    keep: bool\n    # properties for creating implied entities and relations from this metadata\n    implication_property: KGAttributeImplicationProperty | None = None\n\n\nclass KGEntityTypeClassificationInfo(BaseModel):\n    extraction: bool\n    description: str\n\n\nclass KGEntityTypeAttributes(BaseModel):\n    # information on how to use the metadata to extract attributes, implied entities, and relations\n    metadata_attribute_conversion: dict[str, KGAttributeProperty] = {}\n    # a metadata key: value pair to match for to differentiate entities from the same source\n    entity_filter_attributes: dict[str, Any] = {}\n    # mapping of classification names to their corresponding classification info\n    classification_attributes: dict[str, KGEntityTypeClassificationInfo] = {}\n\n    # mapping of attribute names to their allowed values, populated during extraction\n    attribute_values: dict[str, KGAttributeTrackInfo | None] = {}\n\n\nclass KGEntityTypeDefinition(BaseModel):\n    description: str\n    grounding: KGGroundingType\n    grounded_source_name: DocumentSource | None\n    active: bool = False\n    attributes: KGEntityTypeAttributes = KGEntityTypeAttributes()\n    entity_values: list[str] = []\n\n\nclass KGChunkFormat(BaseModel):\n    connector_id: int | None = None\n    document_id: str\n    chunk_id: int\n    title: str\n    content: str\n    primary_owners: list[str]\n    secondary_owners: list[str]\n    source_type: str\n    metadata: dict[str, str | list[str]] | None = None\n\n\nclass KGPerson(BaseModel):\n    name: str\n    company: str\n    employee: bool\n\n\nclass NormalizedEntities(BaseModel):\n    entities: list[str]\n    entities_w_attributes: list[str]\n    entity_normalization_map: dict[str, str]\n\n\nclass NormalizedRelationships(BaseModel):\n    relationships: list[str]\n    relationship_normalization_map: dict[str, str]\n\n\nclass KGMetadataContent(BaseModel):\n    document_id: str\n    source_type: str\n    source_metadata: dict[str, Any] | None = None\n\n\nclass KGClassificationInstructions(BaseModel):\n    classification_enabled: bool\n    classification_options: str\n    classification_class_definitions: dict[str, KGEntityTypeClassificationInfo]\n\n\nclass KGExtractionInstructions(BaseModel):\n    deep_extraction: bool\n    active: bool\n\n\nclass KGEntityTypeInstructions(BaseModel):\n    metadata_attribute_conversion: dict[str, KGAttributeProperty]\n    classification_instructions: KGClassificationInstructions\n    extraction_instructions: KGExtractionInstructions\n    entity_filter_attributes: dict[str, Any] | None = None\n\n\nclass KGEnhancedDocumentMetadata(BaseModel):\n    entity_type: str | None\n    metadata_attribute_conversion: dict[str, KGAttributeProperty] | None\n    document_metadata: dict[str, Any] | None\n    deep_extraction: bool\n    classification_enabled: bool\n    classification_instructions: KGClassificationInstructions | None\n    skip: bool\n\n\nclass KGConnectorData(BaseModel):\n    id: int\n    source: str\n    kg_coverage_days: int | None\n\n\nclass KGStage(str, Enum):\n    EXTRACTED = \"extracted\"\n    NORMALIZED = \"normalized\"\n    FAILED = \"failed\"\n    SKIPPED = \"skipped\"\n    NOT_STARTED = \"not_started\"\n    EXTRACTING = \"extracting\"\n    DO_NOT_EXTRACT = \"do_not_extract\"\n\n\nclass KGClassificationResult(BaseModel):\n    document_entity: str\n    classification_class: str\n\n\nclass KGImpliedExtractionResults(BaseModel):\n    document_entity: str\n    implied_entities: set[str]\n    implied_relationships: set[str]\n    company_participant_emails: set[str]\n    account_participant_emails: set[str]\n\n\nclass KGDocumentDeepExtractionResults(BaseModel):\n    classification_result: KGClassificationResult | None\n    deep_extracted_entities: set[str]\n    deep_extracted_relationships: set[str]\n\n\nclass KGException(Exception):\n    pass\n"
  },
  {
    "path": "backend/onyx/kg/resets/reset_index.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.db.document import reset_all_document_kg_stages\nfrom onyx.db.models import Connector\nfrom onyx.db.models import KGEntity\nfrom onyx.db.models import KGEntityExtractionStaging\nfrom onyx.db.models import KGEntityType\nfrom onyx.db.models import KGRelationship\nfrom onyx.db.models import KGRelationshipExtractionStaging\nfrom onyx.db.models import KGRelationshipType\nfrom onyx.db.models import KGRelationshipTypeExtractionStaging\n\n\ndef reset_full_kg_index__commit(db_session: Session) -> None:\n    \"\"\"\n    Resets the knowledge graph index.\n    \"\"\"\n\n    db_session.query(KGRelationship).delete()\n    db_session.query(KGRelationshipType).delete()\n    db_session.query(KGEntity).delete()\n    db_session.query(KGRelationshipExtractionStaging).delete()\n    db_session.query(KGEntityExtractionStaging).delete()\n    db_session.query(KGRelationshipTypeExtractionStaging).delete()\n    # Update all connectors to disable KG processing\n    db_session.query(Connector).update({\"kg_processing_enabled\": False})\n\n    # Only reset grounded entity types\n    db_session.query(KGEntityType).filter(\n        KGEntityType.grounded_source_name.isnot(None)\n    ).update({\"active\": False})\n\n    reset_all_document_kg_stages(db_session)\n\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/kg/resets/reset_source.py",
    "content": "from redis.lock import Lock as RedisLock\nfrom sqlalchemy import or_\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Connector\nfrom onyx.db.models import Document\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import KGEntity\nfrom onyx.db.models import KGEntityExtractionStaging\nfrom onyx.db.models import KGEntityType\nfrom onyx.db.models import KGRelationship\nfrom onyx.db.models import KGRelationshipExtractionStaging\nfrom onyx.db.models import KGRelationshipType\nfrom onyx.db.models import KGRelationshipTypeExtractionStaging\nfrom onyx.db.models import KGStage\nfrom onyx.kg.resets.reset_index import reset_full_kg_index__commit\nfrom onyx.kg.resets.reset_vespa import reset_vespa_kg_index\n\n\ndef reset_source_kg_index(\n    source_name: str | None, tenant_id: str, index_name: str, lock: RedisLock\n) -> None:\n    \"\"\"\n    Resets the knowledge graph index and vespa for a source.\n    \"\"\"\n    # reset vespa for the source\n    reset_vespa_kg_index(tenant_id, index_name, lock, source_name)\n\n    with get_session_with_current_tenant() as db_session:\n        if source_name is None:\n            reset_full_kg_index__commit(db_session)\n            return\n\n        # get all the entity types for the given source\n        entity_types = [\n            et.id_name\n            for et in db_session.query(KGEntityType)\n            .filter(KGEntityType.grounded_source_name == source_name)\n            .all()\n        ]\n        if not entity_types:\n            raise ValueError(f\"There are no entity types for the source {source_name}\")\n\n        # delete the entity type from the knowledge graph\n        for entity_type in entity_types:\n            db_session.query(KGRelationship).filter(\n                or_(\n                    KGRelationship.source_node_type == entity_type,\n                    KGRelationship.target_node_type == entity_type,\n                )\n            ).delete()\n            db_session.query(KGRelationshipType).filter(\n                or_(\n                    KGRelationshipType.source_entity_type_id_name == entity_type,\n                    KGRelationshipType.target_entity_type_id_name == entity_type,\n                )\n            ).delete()\n            db_session.query(KGEntity).filter(\n                KGEntity.entity_type_id_name == entity_type\n            ).delete()\n            db_session.query(KGRelationshipExtractionStaging).filter(\n                or_(\n                    KGRelationshipExtractionStaging.source_node_type == entity_type,\n                    KGRelationshipExtractionStaging.target_node_type == entity_type,\n                )\n            ).delete()\n            db_session.query(KGEntityExtractionStaging).filter(\n                KGEntityExtractionStaging.entity_type_id_name == entity_type\n            ).delete()\n            db_session.query(KGRelationshipTypeExtractionStaging).filter(\n                or_(\n                    KGRelationshipTypeExtractionStaging.source_entity_type_id_name\n                    == entity_type,\n                    KGRelationshipTypeExtractionStaging.target_entity_type_id_name\n                    == entity_type,\n                )\n            ).delete()\n        db_session.commit()\n\n    with get_session_with_current_tenant() as db_session:\n        # get all the documents for the given source\n        kg_connectors = [\n            connector.id\n            for connector in db_session.query(Connector)\n            .filter(Connector.source == DocumentSource(source_name))\n            .all()\n        ]\n        document_ids = [\n            cc_pair.id\n            for cc_pair in db_session.query(DocumentByConnectorCredentialPair)\n            .filter(DocumentByConnectorCredentialPair.connector_id.in_(kg_connectors))\n            .all()\n        ]\n\n        # reset the kg stage for the documents\n        db_session.query(Document).filter(Document.id.in_(document_ids)).update(\n            {\"kg_stage\": KGStage.NOT_STARTED}\n        )\n        db_session.commit()\n"
  },
  {
    "path": "backend/onyx/kg/resets/reset_vespa.py",
    "content": "import time\nfrom typing import Any\n\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.document import get_num_chunks_for_document\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Connector\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import KGEntityType\nfrom onyx.document_index.document_index_utils import get_uuid_from_chunk_info\nfrom onyx.document_index.vespa.index import KGVespaChunkUpdateRequest\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\nfrom onyx.kg.utils.lock_utils import extend_lock\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\n\ndef _reset_vespa_for_doc(document_id: str, tenant_id: str, index_name: str) -> None:\n    vespa_index = VespaIndex(\n        index_name=index_name,\n        secondary_index_name=None,\n        large_chunks_enabled=False,\n        secondary_large_chunks_enabled=False,\n        multitenant=MULTI_TENANT,\n        httpx_client=None,\n    )\n\n    reset_update_dict: dict[str, Any] = {\n        \"fields\": {\n            \"kg_entities\": {\"assign\": []},\n            \"kg_relationships\": {\"assign\": []},\n            \"kg_terms\": {\"assign\": []},\n        }\n    }\n\n    with get_session_with_current_tenant() as db_session:\n        num_chunks = get_num_chunks_for_document(db_session, document_id)\n\n    vespa_requests: list[KGVespaChunkUpdateRequest] = []\n    for chunk_num in range(num_chunks):\n        doc_chunk_id = get_uuid_from_chunk_info(\n            document_id=document_id,\n            chunk_id=chunk_num,\n            tenant_id=tenant_id,\n            large_chunk_id=None,\n        )\n        vespa_requests.append(\n            KGVespaChunkUpdateRequest(\n                document_id=document_id,\n                chunk_id=chunk_num,\n                url=f\"{DOCUMENT_ID_ENDPOINT.format(index_name=vespa_index.index_name)}/{doc_chunk_id}\",\n                update_request=reset_update_dict,\n            )\n        )\n\n    with vespa_index.httpx_client_context as httpx_client:\n        vespa_index._apply_kg_chunk_updates_batched(vespa_requests, httpx_client)\n\n\ndef reset_vespa_kg_index(\n    tenant_id: str, index_name: str, lock: RedisLock, source_name: str | None = None\n) -> None:\n    \"\"\"\n    Reset the kg info in vespa for all documents of a given source name,\n    or all documents from kg grounded sources if source_name is None.\n    \"\"\"\n    logger.info(\n        f\"Resetting kg vespa index {index_name} for tenant {tenant_id}, source: {source_name if source_name else 'all'}\"\n    )\n\n    last_lock_time = time.monotonic()\n\n    # Get all documents that need a vespa reset\n    with get_session_with_current_tenant() as db_session:\n        if source_name:\n            # get all connectors of the given source name\n            kg_connectors = [\n                connector.id\n                for connector in db_session.query(Connector)\n                .filter(Connector.source == DocumentSource(source_name))\n                .all()\n            ]\n        else:\n            # get all connectors that have kg enabled\n            kg_sources = [\n                DocumentSource(et.grounded_source_name)\n                for et in db_session.query(KGEntityType)\n                .filter(\n                    KGEntityType.grounded_source_name.is_not(None),\n                    KGEntityType.active.is_(True),\n                )\n                .distinct()\n                .all()\n            ]\n            kg_connectors = [\n                connector.id\n                for connector in db_session.query(Connector)\n                .filter(Connector.source.in_(kg_sources))\n                .all()\n            ]\n\n        # Get all the documents for the given connectors\n        document_ids = [\n            cc_pair.id\n            for cc_pair in db_session.query(DocumentByConnectorCredentialPair)\n            .filter(DocumentByConnectorCredentialPair.connector_id.in_(kg_connectors))\n            .all()\n        ]\n\n    # Reset the kg fields\n    for document_id in document_ids:\n        _reset_vespa_for_doc(document_id, tenant_id, index_name)\n        last_lock_time = extend_lock(\n            lock, CELERY_GENERIC_BEAT_LOCK_TIMEOUT, last_lock_time\n        )\n\n    logger.info(\n        f\"Finished resetting kg vespa index {index_name} for tenant {tenant_id}, source: {source_name if source_name else 'all'}\"\n    )\n"
  },
  {
    "path": "backend/onyx/kg/setup/kg_default_entity_definitions.py",
    "content": "from typing import cast\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.entity_type import KGEntityType\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import validate_kg_settings\nfrom onyx.kg.models import KGAttributeEntityOption\nfrom onyx.kg.models import KGAttributeImplicationProperty\nfrom onyx.kg.models import KGAttributeProperty\nfrom onyx.kg.models import KGEntityTypeAttributes\nfrom onyx.kg.models import KGEntityTypeClassificationInfo\nfrom onyx.kg.models import KGEntityTypeDefinition\nfrom onyx.kg.models import KGGroundingType\n\n\ndef get_default_entity_types(vendor_name: str) -> dict[str, KGEntityTypeDefinition]:\n    return {\n        \"LINEAR\": KGEntityTypeDefinition(\n            description=\"A formal Linear ticket about a product issue or improvement request.\",\n            attributes=KGEntityTypeAttributes(\n                metadata_attribute_conversion={\n                    \"team\": KGAttributeProperty(name=\"team\", keep=True),\n                    \"state\": KGAttributeProperty(name=\"state\", keep=True),\n                    \"priority\": KGAttributeProperty(name=\"priority\", keep=True),\n                    \"estimate\": KGAttributeProperty(name=\"estimate\", keep=True),\n                    \"created_at\": KGAttributeProperty(name=\"created_at\", keep=True),\n                    \"started_at\": KGAttributeProperty(name=\"started_at\", keep=True),\n                    \"completed_at\": KGAttributeProperty(name=\"completed_at\", keep=True),\n                    \"due_date\": KGAttributeProperty(name=\"due_date\", keep=True),\n                    \"creator\": KGAttributeProperty(\n                        name=\"creator\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_creator_of\",\n                        ),\n                    ),\n                    \"assignee\": KGAttributeProperty(\n                        name=\"assignee\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_assignee_of\",\n                        ),\n                    ),\n                },\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.LINEAR,\n        ),\n        \"JIRA\": KGEntityTypeDefinition(\n            description=(\n                \"A formal Jira ticket about a product issue or improvement request.\"\n            ),\n            attributes=KGEntityTypeAttributes(\n                metadata_attribute_conversion={\n                    \"issuetype\": KGAttributeProperty(name=\"subtype\", keep=True),\n                    \"status\": KGAttributeProperty(name=\"status\", keep=True),\n                    \"priority\": KGAttributeProperty(name=\"priority\", keep=True),\n                    \"project_name\": KGAttributeProperty(name=\"project\", keep=True),\n                    \"created\": KGAttributeProperty(name=\"created_at\", keep=True),\n                    \"updated\": KGAttributeProperty(name=\"updated_at\", keep=True),\n                    \"resolution_date\": KGAttributeProperty(\n                        name=\"completed_at\", keep=True\n                    ),\n                    \"duedate\": KGAttributeProperty(name=\"due_date\", keep=True),\n                    \"reporter_email\": KGAttributeProperty(\n                        name=\"creator\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_creator_of\",\n                        ),\n                    ),\n                    \"assignee_email\": KGAttributeProperty(\n                        name=\"assignee\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_assignee_of\",\n                        ),\n                    ),\n                    # not using implication property as that only captures 1 depth\n                    \"key\": KGAttributeProperty(name=\"key\", keep=True),\n                    \"parent\": KGAttributeProperty(name=\"parent\", keep=True),\n                },\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.JIRA,\n        ),\n        \"GITHUB_PR\": KGEntityTypeDefinition(\n            description=\"A formal engineering request to merge proposed changes into the codebase.\",\n            attributes=KGEntityTypeAttributes(\n                metadata_attribute_conversion={\n                    \"repo\": KGAttributeProperty(name=\"repository\", keep=True),\n                    \"state\": KGAttributeProperty(name=\"state\", keep=True),\n                    \"num_commits\": KGAttributeProperty(name=\"num_commits\", keep=True),\n                    \"num_files_changed\": KGAttributeProperty(\n                        name=\"num_files_changed\", keep=True\n                    ),\n                    \"labels\": KGAttributeProperty(name=\"labels\", keep=True),\n                    \"merged\": KGAttributeProperty(name=\"merged\", keep=True),\n                    \"merged_at\": KGAttributeProperty(name=\"merged_at\", keep=True),\n                    \"closed_at\": KGAttributeProperty(name=\"closed_at\", keep=True),\n                    \"created_at\": KGAttributeProperty(name=\"created_at\", keep=True),\n                    \"updated_at\": KGAttributeProperty(name=\"updated_at\", keep=True),\n                    \"user\": KGAttributeProperty(\n                        name=\"creator\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_creator_of\",\n                        ),\n                    ),\n                    \"assignees\": KGAttributeProperty(\n                        name=\"assignees\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_assignee_of\",\n                        ),\n                    ),\n                },\n                entity_filter_attributes={\"object_type\": \"PullRequest\"},\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.GITHUB,\n        ),\n        \"GITHUB_ISSUE\": KGEntityTypeDefinition(\n            description=\"A formal engineering ticket about an issue, idea, inquiry, or task.\",\n            attributes=KGEntityTypeAttributes(\n                metadata_attribute_conversion={\n                    \"repo\": KGAttributeProperty(name=\"repository\", keep=True),\n                    \"state\": KGAttributeProperty(name=\"state\", keep=True),\n                    \"labels\": KGAttributeProperty(name=\"labels\", keep=True),\n                    \"closed_at\": KGAttributeProperty(name=\"closed_at\", keep=True),\n                    \"created_at\": KGAttributeProperty(name=\"created_at\", keep=True),\n                    \"updated_at\": KGAttributeProperty(name=\"updated_at\", keep=True),\n                    \"user\": KGAttributeProperty(\n                        name=\"creator\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_creator_of\",\n                        ),\n                    ),\n                    \"assignees\": KGAttributeProperty(\n                        name=\"assignees\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=KGAttributeEntityOption.FROM_EMAIL,\n                            implied_relationship_name=\"is_assignee_of\",\n                        ),\n                    ),\n                },\n                entity_filter_attributes={\"object_type\": \"Issue\"},\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.GITHUB,\n        ),\n        \"FIREFLIES\": KGEntityTypeDefinition(\n            description=(\n                f\"A phone call transcript between us ({vendor_name}) and another account or individuals, or an internal meeting.\"\n            ),\n            attributes=KGEntityTypeAttributes(\n                classification_attributes={\n                    \"customer\": KGEntityTypeClassificationInfo(\n                        extraction=True,\n                        description=\"a call with representatives of one or more customers prospects\",\n                    ),\n                    \"internal\": KGEntityTypeClassificationInfo(\n                        extraction=True,\n                        description=\"a call between employees of the vendor's company (a vendor-internal call)\",\n                    ),\n                    \"interview\": KGEntityTypeClassificationInfo(\n                        extraction=True,\n                        description=(\n                            \"a call with an individual who is interviewed or is discussing potential employment with the vendor\"\n                        ),\n                    ),\n                    \"other\": KGEntityTypeClassificationInfo(\n                        extraction=True,\n                        description=(\n                            \"a call with representatives of companies having a different reason for the call \"\n                            \"(investment, partnering, etc.)\"\n                        ),\n                    ),\n                },\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.FIREFLIES,\n        ),\n        \"ACCOUNT\": KGEntityTypeDefinition(\n            description=(\n                \"A company that was, is, or potentially could be a customer of the vendor \"\n                f\"('us, {vendor_name}'). Note that {vendor_name} can never be an ACCOUNT.\"\n            ),\n            attributes=KGEntityTypeAttributes(\n                entity_filter_attributes={\"object_type\": \"Account\"},\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.SALESFORCE,\n        ),\n        \"OPPORTUNITY\": KGEntityTypeDefinition(\n            description=\"A sales opportunity.\",\n            attributes=KGEntityTypeAttributes(\n                metadata_attribute_conversion={\n                    \"name\": KGAttributeProperty(name=\"name\", keep=True),\n                    \"stage_name\": KGAttributeProperty(name=\"stage\", keep=True),\n                    \"type\": KGAttributeProperty(name=\"type\", keep=True),\n                    \"amount\": KGAttributeProperty(name=\"amount\", keep=True),\n                    \"fiscal_year\": KGAttributeProperty(name=\"fiscal_year\", keep=True),\n                    \"fiscal_quarter\": KGAttributeProperty(\n                        name=\"fiscal_quarter\", keep=True\n                    ),\n                    \"is_closed\": KGAttributeProperty(name=\"is_closed\", keep=True),\n                    \"close_date\": KGAttributeProperty(name=\"close_date\", keep=True),\n                    \"probability\": KGAttributeProperty(\n                        name=\"close_probability\", keep=True\n                    ),\n                    \"created_date\": KGAttributeProperty(name=\"created_at\", keep=True),\n                    \"last_modified_date\": KGAttributeProperty(\n                        name=\"updated_at\", keep=True\n                    ),\n                    \"account\": KGAttributeProperty(\n                        name=\"account\",\n                        keep=False,\n                        implication_property=KGAttributeImplicationProperty(\n                            implied_entity_type=\"ACCOUNT\",\n                            implied_relationship_name=\"is_account_of\",\n                        ),\n                    ),\n                },\n                entity_filter_attributes={\"object_type\": \"Opportunity\"},\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            grounded_source_name=DocumentSource.SALESFORCE,\n        ),\n        \"VENDOR\": KGEntityTypeDefinition(\n            description=f\"The Vendor {vendor_name}, 'us'\",\n            grounding=KGGroundingType.GROUNDED,\n            active=True,\n            grounded_source_name=None,\n        ),\n        \"EMPLOYEE\": KGEntityTypeDefinition(\n            description=(\n                f\"A person who speaks on behalf of 'our' company (the VENDOR {vendor_name}), \"\n                \"NOT of another account. Therefore, employees of other companies \"\n                \"are NOT included here. If in doubt, do NOT extract.\"\n            ),\n            grounding=KGGroundingType.GROUNDED,\n            active=False,\n            grounded_source_name=None,\n        ),\n    }\n\n\ndef populate_missing_default_entity_types__commit(db_session: Session) -> None:\n    \"\"\"\n    Populates the database with the missing default entity types.\n    \"\"\"\n    kg_config_settings = get_kg_config_settings()\n    validate_kg_settings(kg_config_settings)\n\n    vendor_name = cast(str, kg_config_settings.KG_VENDOR)\n\n    existing_entity_types = {et.id_name for et in db_session.query(KGEntityType).all()}\n\n    default_entity_types = get_default_entity_types(vendor_name=vendor_name)\n    for entity_type_id_name, entity_type_definition in default_entity_types.items():\n        if entity_type_id_name in existing_entity_types:\n            continue\n\n        grounded_source_name = (\n            entity_type_definition.grounded_source_name.value\n            if entity_type_definition.grounded_source_name\n            else None\n        )\n        kg_entity_type = KGEntityType(\n            id_name=entity_type_id_name,\n            description=entity_type_definition.description,\n            attributes=entity_type_definition.attributes.model_dump(),\n            grounding=entity_type_definition.grounding,\n            grounded_source_name=grounded_source_name,\n            active=entity_type_definition.active,\n        )\n        db_session.add(kg_entity_type)\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/kg/utils/embeddings.py",
    "content": "from typing import List\n\nimport numpy as np\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.natural_language_processing.search_nlp_models import EmbedTextType\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\n\n\ndef encode_string_batch(strings: List[str]) -> np.ndarray:\n    with get_session_with_current_tenant() as db_session:\n        current_search_settings = get_current_search_settings(db_session)\n        model = EmbeddingModel.from_db_model(\n            search_settings=current_search_settings,\n            server_host=MODEL_SERVER_HOST,\n            server_port=MODEL_SERVER_PORT,\n        )\n        # Get embeddings while session is still open\n        embedding = model.encode(strings, text_type=EmbedTextType.QUERY)\n    return np.array(embedding)\n"
  },
  {
    "path": "backend/onyx/kg/utils/extraction_utils.py",
    "content": "import json\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCallTypes\nfrom onyx.configs.kg_configs import KG_METADATA_TRACKING_THRESHOLD\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.entities import get_kg_entity_by_document\nfrom onyx.db.entity_type import get_entity_types\nfrom onyx.db.kg_config import KGConfigSettings\nfrom onyx.db.models import Document\nfrom onyx.db.models import KGEntityType\nfrom onyx.db.models import KGRelationshipType\nfrom onyx.db.tag import get_structured_tags_for_document\nfrom onyx.kg.models import KGAttributeEntityOption\nfrom onyx.kg.models import KGAttributeTrackInfo\nfrom onyx.kg.models import KGAttributeTrackType\nfrom onyx.kg.models import KGChunkFormat\nfrom onyx.kg.models import KGClassificationInstructions\nfrom onyx.kg.models import KGClassificationResult\nfrom onyx.kg.models import KGDocumentDeepExtractionResults\nfrom onyx.kg.models import KGEnhancedDocumentMetadata\nfrom onyx.kg.models import KGImpliedExtractionResults\nfrom onyx.kg.models import KGMetadataContent\nfrom onyx.kg.utils.formatting_utils import extract_email\nfrom onyx.kg.utils.formatting_utils import get_entity_type\nfrom onyx.kg.utils.formatting_utils import kg_email_processing\nfrom onyx.kg.utils.formatting_utils import make_entity_id\nfrom onyx.kg.utils.formatting_utils import make_relationship_id\nfrom onyx.kg.utils.formatting_utils import make_relationship_type_id\nfrom onyx.kg.vespa.vespa_interactions import get_document_vespa_contents\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.prompts.kg_prompts import CALL_CHUNK_PREPROCESSING_PROMPT\nfrom onyx.prompts.kg_prompts import CALL_DOCUMENT_CLASSIFICATION_PROMPT\nfrom onyx.prompts.kg_prompts import GENERAL_CHUNK_PREPROCESSING_PROMPT\nfrom onyx.prompts.kg_prompts import MASTER_EXTRACTION_PROMPT\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_entity_types_str(active: bool | None = None) -> str:\n    \"\"\"\n    Format the entity types into a string for the LLM.\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        entity_types = get_entity_types(db_session, active)\n\n        entity_types_list: list[str] = []\n        for entity_type in entity_types:\n            if entity_type.description:\n                entity_description = \"\\n  - Description: \" + entity_type.description\n            else:\n                entity_description = \"\"\n\n            if entity_type.entity_values:\n                allowed_values = \"\\n  - Allowed Values: \" + \", \".join(\n                    entity_type.entity_values\n                )\n            else:\n                allowed_values = \"\"\n\n            attributes = entity_type.parsed_attributes\n\n            entity_type_attribute_list: list[str] = []\n            for attribute, values in attributes.attribute_values.items():\n                entity_type_attribute_list.append(\n                    f\"{attribute}: {trackinfo_to_str(values)}\"\n                )\n\n            if attributes.classification_attributes:\n                entity_type_attribute_list.append(\n                    # TODO: restructure classification attribute to be a dict of attribute name to classification info\n                    # e.g., {scope: {internal: prompt, external: prompt}, sentiment: {positive: prompt, negative: prompt}}\n                    \"classification: one of: \"\n                    + \", \".join(attributes.classification_attributes.keys())\n                )\n            if entity_type_attribute_list:\n                entity_attributes = \"\\n  - Attributes:\\n    - \" + \"\\n    - \".join(\n                    entity_type_attribute_list\n                )\n            else:\n                entity_attributes = \"\"\n\n            entity_types_list.append(\n                entity_type.id_name\n                + entity_description\n                + allowed_values\n                + entity_attributes\n            )\n\n    return \"\\n\".join(entity_types_list)\n\n\ndef get_relationship_types_str(active: bool | None = None) -> str:\n    \"\"\"\n    Format the relationship types into a string for the LLM.\n    \"\"\"\n    with get_session_with_current_tenant() as db_session:\n        active_filters = []\n        if active is not None:\n            active_filters.append(KGRelationshipType.active == active)\n\n        relationship_types = (\n            db_session.query(KGRelationshipType).filter(*active_filters).all()\n        )\n\n        relationship_types_list = []\n        for rel_type in relationship_types:\n            # Format as \"source_type__relationship_type__target_type\"\n            formatted_type = make_relationship_type_id(\n                rel_type.source_entity_type_id_name,\n                rel_type.type,\n                rel_type.target_entity_type_id_name,\n            )\n            relationship_types_list.append(formatted_type)\n\n    return \"\\n\".join(relationship_types_list)\n\n\ndef kg_process_owners(\n    owner_emails: list[str],\n    document_entity_id: str,\n    relationship_type: str,\n    kg_config_settings: KGConfigSettings,\n    active_entity_types: set[str],\n) -> tuple[set[str], set[str], set[str], set[str]]:\n    owner_entities: set[str] = set()\n    owner_relationships: set[str] = set()\n    company_participant_emails: set[str] = set()\n    account_participant_emails: set[str] = set()\n\n    for owner_email in owner_emails:\n        if extract_email(owner_email) is None:\n            continue\n\n        process_results = kg_process_person(\n            owner_email,\n            document_entity_id,\n            relationship_type,\n            kg_config_settings,\n            active_entity_types,\n        )\n        if process_results is None:\n            continue\n\n        (\n            owner_entity,\n            owner_relationship,\n            company_participant_email,\n            account_participant_email,\n        ) = process_results\n\n        owner_entities.add(owner_entity)\n        owner_relationships.add(owner_relationship)\n        if company_participant_email:\n            company_participant_emails.add(company_participant_email)\n        if account_participant_email:\n            account_participant_emails.add(account_participant_email)\n\n    return (\n        owner_entities,\n        owner_relationships,\n        company_participant_emails,\n        account_participant_emails,\n    )\n\n\ndef kg_implied_extraction(\n    document: Document,\n    doc_metadata: KGEnhancedDocumentMetadata,\n    active_entity_types: set[str],\n    kg_config_settings: KGConfigSettings,\n) -> KGImpliedExtractionResults:\n    \"\"\"\n    Generate entities, relationships, and attributes for a document.\n    \"\"\"\n\n    # Get document entity and metadata stuff from the KGEnhancedDocumentMetadata\n    document_entity_type = doc_metadata.entity_type\n    document_metadata = doc_metadata.document_metadata or {}\n    metadata_attribute_conversion = doc_metadata.metadata_attribute_conversion\n    if document_entity_type is None or metadata_attribute_conversion is None:\n        raise ValueError(\"Entity type and metadata attributes are required\")\n\n    implied_entities: set[str] = set()\n    implied_relationships: set[str] = set()\n\n    # Quantity needed for call processing - participants from vendor\n    company_participant_emails: set[str] = set()\n    # Quantity needed for call processing - external participants\n    account_participant_emails: set[str] = set()\n\n    # Chunk treatment variables\n\n    document_is_from_call = document_entity_type.lower() in (\n        call_type.value.lower() for call_type in OnyxCallTypes\n    )\n\n    # Get core entity\n\n    document_id = document.id\n    primary_owners = document.primary_owners\n    secondary_owners = document.secondary_owners\n\n    with get_session_with_current_tenant() as db_session:\n        document_entity = get_kg_entity_by_document(db_session, document_id)\n\n    if document_entity:\n        document_entity_id = document_entity.id_name\n    else:\n        document_entity_id = make_entity_id(document_entity_type, document_id)\n\n    # Get implied entities and relationships from primary/secondary owners\n\n    if document_is_from_call:\n        (\n            implied_entities,\n            implied_relationships,\n            company_participant_emails,\n            account_participant_emails,\n        ) = kg_process_owners(\n            owner_emails=(primary_owners or []) + (secondary_owners or []),\n            document_entity_id=document_entity_id,\n            relationship_type=\"participates_in\",\n            kg_config_settings=kg_config_settings,\n            active_entity_types=active_entity_types,\n        )\n    else:\n        (\n            implied_entities,\n            implied_relationships,\n            company_participant_emails,\n            account_participant_emails,\n        ) = kg_process_owners(\n            owner_emails=primary_owners or [],\n            document_entity_id=document_entity_id,\n            relationship_type=\"leads\",\n            kg_config_settings=kg_config_settings,\n            active_entity_types=active_entity_types,\n        )\n\n        (\n            participant_entities,\n            participant_relationships,\n            company_emails,\n            account_emails,\n        ) = kg_process_owners(\n            owner_emails=secondary_owners or [],\n            document_entity_id=document_entity_id,\n            relationship_type=\"participates_in\",\n            kg_config_settings=kg_config_settings,\n            active_entity_types=active_entity_types,\n        )\n        implied_entities.update(participant_entities)\n        implied_relationships.update(participant_relationships)\n        company_participant_emails.update(company_emails)\n        account_participant_emails.update(account_emails)\n\n    # Get implied entities and relationships from document metadata\n    for metadata, value in document_metadata.items():\n        # get implication property for this metadata\n        if metadata not in metadata_attribute_conversion:\n            continue\n        if (\n            implication_property := metadata_attribute_conversion[\n                metadata\n            ].implication_property\n        ) is None:\n            continue\n\n        if not isinstance(value, str) and not isinstance(value, list):\n            continue\n        values: list[str] = [value] if isinstance(value, str) else value\n\n        # create implied entities and relationships\n        for item in values:\n            if (\n                implication_property.implied_entity_type\n                == KGAttributeEntityOption.FROM_EMAIL\n            ):\n                # determine entity type from email\n                email = extract_email(item)\n                if email is None:\n                    continue\n                process_results = kg_process_person(\n                    email=email,\n                    document_entity_id=document_entity_id,\n                    relationship_type=implication_property.implied_relationship_name,\n                    kg_config_settings=kg_config_settings,\n                    active_entity_types=active_entity_types,\n                )\n                if process_results is None:\n                    continue\n\n                (implied_entity, implied_relationship, _, _) = process_results\n                implied_entities.add(implied_entity)\n                implied_relationships.add(implied_relationship)\n            else:\n                # use the given entity type\n                entity_type = implication_property.implied_entity_type\n                if entity_type not in active_entity_types:\n                    continue\n\n                implied_entity = make_entity_id(entity_type, item)\n                implied_entities.add(implied_entity)\n                implied_relationships.add(\n                    make_relationship_id(\n                        implied_entity,\n                        implication_property.implied_relationship_name,\n                        document_entity_id,\n                    )\n                )\n\n    return KGImpliedExtractionResults(\n        document_entity=document_entity_id,\n        implied_entities=implied_entities,\n        implied_relationships=implied_relationships,\n        company_participant_emails=company_participant_emails,\n        account_participant_emails=account_participant_emails,\n    )\n\n\ndef kg_deep_extraction(\n    document_id: str,\n    metadata: KGEnhancedDocumentMetadata,\n    implied_extraction: KGImpliedExtractionResults,\n    tenant_id: str,\n    index_name: str,\n    kg_config_settings: KGConfigSettings,\n) -> KGDocumentDeepExtractionResults:\n    \"\"\"\n    Perform deep extraction and classification on the document.\n    \"\"\"\n    result = KGDocumentDeepExtractionResults(\n        classification_result=None,\n        deep_extracted_entities=set(),\n        deep_extracted_relationships=set(),\n    )\n\n    entity_types_str = get_entity_types_str(active=True)\n    relationship_types_str = get_relationship_types_str(active=True)\n\n    for i, chunk_batch in enumerate(\n        get_document_vespa_contents(document_id, index_name, tenant_id)\n    ):\n        # use first batch for classification\n        if i == 0 and metadata.classification_enabled:\n            if not metadata.classification_instructions:\n                raise ValueError(\n                    \"Classification is enabled but no instructions are provided\"\n                )\n            result.classification_result = kg_classify_document(\n                document_entity=implied_extraction.document_entity,\n                chunk_batch=chunk_batch,\n                implied_extraction=implied_extraction,\n                classification_instructions=metadata.classification_instructions,\n                kg_config_settings=kg_config_settings,\n            )\n\n        # deep extract from this chunk batch\n        chunk_batch_results = kg_deep_extract_chunks(\n            document_entity=implied_extraction.document_entity,\n            chunk_batch=chunk_batch,\n            implied_extraction=implied_extraction,\n            kg_config_settings=kg_config_settings,\n            entity_types_str=entity_types_str,\n            relationship_types_str=relationship_types_str,\n        )\n        if chunk_batch_results is not None:\n            result.deep_extracted_entities.update(\n                chunk_batch_results.deep_extracted_entities\n            )\n            result.deep_extracted_relationships.update(\n                chunk_batch_results.deep_extracted_relationships\n            )\n\n    return result\n\n\ndef kg_classify_document(\n    document_entity: str,\n    chunk_batch: list[KGChunkFormat],\n    implied_extraction: KGImpliedExtractionResults,\n    classification_instructions: KGClassificationInstructions,\n    kg_config_settings: KGConfigSettings,\n) -> KGClassificationResult | None:\n    # currently, classification is only done for calls\n    # TODO: add support (or use same prompt and format) for non-call documents\n    entity_type = get_entity_type(document_entity)\n    if entity_type not in (call_type.value for call_type in OnyxCallTypes):\n        return None\n\n    # prepare prompt\n    implied_extraction.document_entity\n    company_participants = implied_extraction.company_participant_emails\n    account_participants = implied_extraction.account_participant_emails\n    content = (\n        f\"Title: {chunk_batch[0].title}:\\nVendor Participants:\\n\"\n        + \"\".join(f\" - {participant}\\n\" for participant in company_participants)\n        + \"Other Participants:\\n\"\n        + \"\".join(f\" - {participant}\\n\" for participant in account_participants)\n        + \"Call Content:\\n\"\n        + \"\\n\".join(chunk.content for chunk in chunk_batch)\n    )\n    category_list = {\n        cls: definition.description\n        for cls, definition in classification_instructions.classification_class_definitions.items()\n    }\n    prompt = CALL_DOCUMENT_CLASSIFICATION_PROMPT.format(\n        beginning_of_call_content=content,\n        category_list=category_list,\n        category_options=classification_instructions.classification_options,\n        vendor=kg_config_settings.KG_VENDOR,\n    )\n\n    # classify with LLM with Braintrust tracing\n    llm = get_default_llm()\n    try:\n        prompt_msg = UserMessage(content=prompt)\n        with llm_generation_span(\n            llm=llm, flow=\"kg_document_classification\", input_messages=[prompt_msg]\n        ) as span_generation:\n            response = llm.invoke(prompt_msg)\n            record_llm_response(span_generation, response)\n            raw_classification_result = llm_response_to_string(response)\n\n        classification_result = (\n            raw_classification_result.replace(\"```json\", \"\").replace(\"```\", \"\").strip()\n        )\n        # no json parsing here because of reasoning output\n        classification_class = classification_result.split(\"CATEGORY:\")[1].strip()\n\n        if (\n            classification_class\n            in classification_instructions.classification_class_definitions\n        ):\n            return KGClassificationResult(\n                document_entity=document_entity,\n                classification_class=classification_class,\n            )\n    except Exception as e:\n        logger.error(f\"Failed to classify document {document_entity}. Error: {str(e)}\")\n    return None\n\n\ndef kg_deep_extract_chunks(\n    document_entity: str,\n    chunk_batch: list[KGChunkFormat],\n    implied_extraction: KGImpliedExtractionResults,\n    kg_config_settings: KGConfigSettings,\n    entity_types_str: str,\n    relationship_types_str: str,\n) -> KGDocumentDeepExtractionResults | None:\n    # currently, calls are treated differently\n    # TODO: either treat some other documents differently too, or ideally all the same way\n    entity_type = get_entity_type(document_entity)\n    is_call = entity_type in (call_type.value for call_type in OnyxCallTypes)\n\n    content = \"\\n\".join(chunk.content for chunk in chunk_batch)\n\n    # prepare prompt\n    if is_call:\n        company_participants_str = \"\".join(\n            f\" - {participant}\\n\"\n            for participant in implied_extraction.company_participant_emails\n        )\n        account_participants_str = \"\".join(\n            f\" - {participant}\\n\"\n            for participant in implied_extraction.account_participant_emails\n        )\n        llm_context = CALL_CHUNK_PREPROCESSING_PROMPT.format(\n            participant_string=company_participants_str,\n            account_participant_string=account_participants_str,\n            vendor=kg_config_settings.KG_VENDOR,\n            content=content,\n        )\n    else:\n        llm_context = GENERAL_CHUNK_PREPROCESSING_PROMPT.format(\n            vendor=kg_config_settings.KG_VENDOR,\n            content=content,\n        )\n    prompt = MASTER_EXTRACTION_PROMPT.format(\n        entity_types=entity_types_str,\n        relationship_types=relationship_types_str,\n    ).replace(\"---content---\", llm_context)\n\n    # extract with LLM with Braintrust tracing\n    llm = get_default_llm()\n    try:\n        prompt_msg = UserMessage(content=prompt)\n        with llm_generation_span(\n            llm=llm, flow=\"kg_deep_extraction\", input_messages=[prompt_msg]\n        ) as span_generation:\n            response = llm.invoke(prompt_msg)\n            record_llm_response(span_generation, response)\n            raw_extraction_result = llm_response_to_string(response)\n\n        cleaned_response = (\n            raw_extraction_result.replace(\"{{\", \"{\")\n            .replace(\"}}\", \"}\")\n            .replace(\"```json\\n\", \"\")\n            .replace(\"\\n```\", \"\")\n            .replace(\"\\n\", \"\")\n        )\n        first_bracket = cleaned_response.find(\"{\")\n        last_bracket = cleaned_response.rfind(\"}\")\n        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]\n        parsed_result = json.loads(cleaned_response)\n        return KGDocumentDeepExtractionResults(\n            classification_result=None,\n            deep_extracted_entities=set(parsed_result.get(\"entities\", [])),\n            deep_extracted_relationships={\n                rel.replace(\" \", \"_\") for rel in parsed_result.get(\"relationships\", [])\n            },\n        )\n    except Exception as e:\n        failed_chunks = [chunk.chunk_id for chunk in chunk_batch]\n        logger.error(\n            f\"Failed to process chunks {failed_chunks} from document {document_entity}. Error: {str(e)}\"\n        )\n    return None\n\n\ndef kg_process_person(\n    email: str,\n    document_entity_id: str,\n    relationship_type: str,\n    kg_config_settings: KGConfigSettings,\n    active_entity_types: set[str],\n) -> tuple[str, str, str, str] | None:\n    \"\"\"\n    Create an employee or account entity from an email address, and a relationship to\n    the entity from the document that the email is from.\n\n    Returns:\n        tuple containing (person_entity, person_relationship, company_participant_email,\n        and account_participant_email), or None if the created entity is not of an\n        active entity type or is from an ignored email domain.\n    \"\"\"\n    kg_person = kg_email_processing(email, kg_config_settings)\n    if any(\n        domain.lower() in kg_person.company.lower()\n        for domain in kg_config_settings.KG_IGNORE_EMAIL_DOMAINS\n    ):\n        return None\n\n    person_entity = None\n    if kg_person.employee and \"EMPLOYEE\" in active_entity_types:\n        person_entity = make_entity_id(\"EMPLOYEE\", kg_person.name)\n    elif not kg_person.employee and \"ACCOUNT\" in active_entity_types:\n        person_entity = make_entity_id(\"ACCOUNT\", kg_person.company)\n\n    if person_entity:\n        is_account = person_entity.startswith(\"ACCOUNT\")\n        participant_email = f\"{kg_person.name} -- ({kg_person.company})\"\n        return (\n            person_entity,\n            make_relationship_id(person_entity, relationship_type, document_entity_id),\n            participant_email if not is_account else \"\",\n            participant_email if is_account else \"\",\n        )\n\n    return None\n\n\ndef get_batch_documents_metadata(\n    document_ids: list[str], connector_source: str\n) -> list[KGMetadataContent]:\n    \"\"\"\n    Gets the metadata for a batch of documents.\n    \"\"\"\n    batch_metadata: list[KGMetadataContent] = []\n    source_type = DocumentSource(connector_source).value\n\n    with get_session_with_current_tenant() as db_session:\n        for document_id in document_ids:\n            # get document metadata\n            metadata = get_structured_tags_for_document(document_id, db_session)\n\n            batch_metadata.append(\n                KGMetadataContent(\n                    document_id=document_id,\n                    source_type=source_type,\n                    source_metadata=metadata,\n                )\n            )\n    return batch_metadata\n\n\ndef trackinfo_to_str(trackinfo: KGAttributeTrackInfo | None) -> str:\n    \"\"\"Convert trackinfo to an LLM friendly string\"\"\"\n    if trackinfo is None:\n        return \"\"\n\n    if trackinfo.type == KGAttributeTrackType.LIST:\n        if trackinfo.values is None:\n            return \"a list of any suitable values\"\n        return \"a list with possible values: \" + \", \".join(trackinfo.values)\n    elif trackinfo.type == KGAttributeTrackType.VALUE:\n        if trackinfo.values is None:\n            return \"any suitable value\"\n        return \"one of: \" + \", \".join(trackinfo.values)\n\n\ndef trackinfo_to_dict(trackinfo: KGAttributeTrackInfo | None) -> dict | None:\n    if trackinfo is None:\n        return None\n    return {\n        \"type\": trackinfo.type,\n        \"values\": (list(trackinfo.values) if trackinfo.values else None),\n    }\n\n\nclass EntityTypeMetadataTracker:\n    def __init__(self) -> None:\n        \"\"\"\n        Tracks the possible values the metadata attributes can take for each entity type.\n        \"\"\"\n        # entity type -> attribute -> trackinfo\n        self.entity_attr_info: dict[str, dict[str, KGAttributeTrackInfo | None]] = {}\n        self.entity_allowed_attrs: dict[str, set[str]] = {}\n\n    def import_typeinfo(self) -> None:\n        \"\"\"\n        Loads the metadata tracking information from the database.\n        \"\"\"\n        with get_session_with_current_tenant() as db_session:\n            entity_types = db_session.query(KGEntityType).all()\n\n        for entity_type in entity_types:\n            self.entity_attr_info[entity_type.id_name] = (\n                entity_type.parsed_attributes.attribute_values\n            )\n            self.entity_allowed_attrs[entity_type.id_name] = {\n                attr.name\n                for attr in entity_type.parsed_attributes.metadata_attribute_conversion.values()\n            }\n\n    def export_typeinfo(self) -> None:\n        \"\"\"\n        Exports the metadata tracking information to the database.\n        \"\"\"\n        with get_session_with_current_tenant() as db_session:\n            for entity_type_id_name, attribute_values in self.entity_attr_info.items():\n                db_session.query(KGEntityType).filter(\n                    KGEntityType.id_name == entity_type_id_name\n                ).update(\n                    {\n                        KGEntityType.attributes: KGEntityType.attributes.op(\"||\")(\n                            {\n                                \"attribute_values\": {\n                                    attr: trackinfo_to_dict(info)\n                                    for attr, info in attribute_values.items()\n                                }\n                            }\n                        )\n                    },\n                    synchronize_session=False,\n                )\n            db_session.commit()\n\n    def track_metadata(\n        self, entity_type: str, attributes: dict[str, str | list[str]]\n    ) -> None:\n        \"\"\"\n        Tracks which values are possible for the given attributes.\n        If the attribute value is a list, we track the values in the list rather than the list itself.\n        If we see to many different values, we stop tracking the attribute.\n        \"\"\"\n        for attribute, value in attributes.items():\n            # ignore types/metadata we are not tracking\n            if entity_type not in self.entity_attr_info:\n                continue\n            if attribute not in self.entity_allowed_attrs[entity_type]:\n                continue\n\n            # determine if the attribute is a list or a value\n            trackinfo = self.entity_attr_info[entity_type].get(attribute, None)\n            if trackinfo is None:\n                trackinfo = KGAttributeTrackInfo(\n                    type=(\n                        KGAttributeTrackType.VALUE\n                        if isinstance(value, str)\n                        else KGAttributeTrackType.LIST\n                    ),\n                    values=set(),\n                )\n                self.entity_attr_info[entity_type][attribute] = trackinfo\n\n            # None means marked as don't track\n            if trackinfo.values is None:\n                continue\n\n            # track the value\n            if isinstance(value, str):\n                trackinfo.values.add(value)\n            else:\n                trackinfo.type = KGAttributeTrackType.LIST\n                trackinfo.values.update(value)\n\n            # if we see to many different values, we stop tracking\n            if len(trackinfo.values) > KG_METADATA_TRACKING_THRESHOLD:\n                trackinfo.values = None\n"
  },
  {
    "path": "backend/onyx/kg/utils/formatting_utils.py",
    "content": "import re\n\nfrom onyx.db.kg_config import KGConfigSettings\nfrom onyx.kg.models import KGPerson\n\n\ndef format_entity_id(entity_id_name: str) -> str:\n    return make_entity_id(*split_entity_id(entity_id_name))\n\n\ndef make_entity_id(entity_type: str, entity_name: str) -> str:\n    return f\"{entity_type.upper()}::{entity_name.lower()}\"\n\n\ndef split_entity_id(entity_id_name: str) -> list[str]:\n    return entity_id_name.split(\"::\")\n\n\ndef get_entity_type(entity_id_name: str) -> str:\n    return entity_id_name.split(\"::\", 1)[0].upper()\n\n\ndef format_entity_id_for_models(entity_id_name: str) -> str:\n    entity_split = entity_id_name.split(\"::\")\n    if len(entity_split) == 2:\n        entity_type, entity_name = entity_split\n        separator = \"::\"\n    elif len(entity_split) > 2:\n        raise ValueError(f\"Entity {entity_id_name} is not in the correct format\")\n    else:\n        entity_name = entity_id_name\n        separator = entity_type = \"\"\n\n    formatted_entity_type = entity_type.strip().upper()\n    formatted_entity_name = entity_name.strip().replace('\"', \"\").replace(\"'\", \"\")\n\n    return f\"{formatted_entity_type}{separator}{formatted_entity_name}\"\n\n\ndef get_attributes(entity_w_attributes: str) -> dict[str, str]:\n    \"\"\"\n    Extract attributes from an entity string.\n    E.g., \"TYPE::Entity--[attr1: value1, attr2: value2]\" -> {\"attr1\": \"value1\", \"attr2\": \"value2\"}\n    \"\"\"\n    attr_split = entity_w_attributes.split(\"--\")\n    if len(attr_split) != 2:\n        raise ValueError(f\"Invalid entity with attributes: {entity_w_attributes}\")\n\n    match = re.search(r\"\\[(.*)\\]\", attr_split[1])\n    if not match:\n        return {}\n\n    attr_list_str = match.group(1)\n    return {\n        attr_split[0].strip(): attr_split[1].strip()\n        for attr in attr_list_str.split(\",\")\n        if len(attr_split := attr.split(\":\", 1)) == 2\n    }\n\n\ndef make_entity_w_attributes(entity: str, attributes: dict[str, str]) -> str:\n    return f\"{entity}--[{', '.join(f'{k}: {v}' for k, v in attributes.items())}]\"\n\n\ndef format_relationship_id(relationship_id_name: str) -> str:\n    return make_relationship_id(*split_relationship_id(relationship_id_name))\n\n\ndef make_relationship_id(\n    source_node: str, relationship_type: str, target_node: str\n) -> str:\n    return f\"{format_entity_id(source_node)}__{relationship_type.lower()}__{format_entity_id(target_node)}\"\n\n\ndef split_relationship_id(relationship_id_name: str) -> list[str]:\n    return relationship_id_name.split(\"__\")\n\n\ndef format_relationship_type_id(relationship_type_id_name: str) -> str:\n    return make_relationship_type_id(\n        *split_relationship_type_id(relationship_type_id_name)\n    )\n\n\ndef make_relationship_type_id(\n    source_node_type: str, relationship_type: str, target_node_type: str\n) -> str:\n    return f\"{source_node_type.upper()}__{relationship_type.lower()}__{target_node_type.upper()}\"\n\n\ndef split_relationship_type_id(relationship_type_id_name: str) -> list[str]:\n    return relationship_type_id_name.split(\"__\")\n\n\ndef extract_relationship_type_id(relationship_id_name: str) -> str:\n    source_node, relationship_type, target_node = split_relationship_id(\n        relationship_id_name\n    )\n    return make_relationship_type_id(\n        get_entity_type(source_node), relationship_type, get_entity_type(target_node)\n    )\n\n\ndef extract_email(email: str) -> str | None:\n    \"\"\"\n    Extract an email from an arbitrary string (if any).\n    Only the first email is returned.\n    \"\"\"\n    match = re.search(r\"([A-Za-z0-9._+-]+@[A-Za-z0-9-]+(?:\\.[A-Za-z0-9-]+)+)\", email)\n    return match.group(0) if match else None\n\n\ndef kg_email_processing(email: str, kg_config_settings: KGConfigSettings) -> KGPerson:\n    \"\"\"\n    Process the email.\n    \"\"\"\n    name, company_domain = email.split(\"@\")\n    assert isinstance(company_domain, str)\n    assert isinstance(kg_config_settings.KG_VENDOR_DOMAINS, list)\n    assert isinstance(kg_config_settings.KG_VENDOR, str)\n\n    employee = any(\n        domain in company_domain for domain in kg_config_settings.KG_VENDOR_DOMAINS\n    )\n    if employee:\n        company = kg_config_settings.KG_VENDOR\n    else:\n        # TODO: maybe store a list of domains for each account and use that to match\n        # right now, gmail and other random domains are being converted into accounts\n        company = company_domain.title()\n\n    return KGPerson(name=name, company=company, employee=employee)\n"
  },
  {
    "path": "backend/onyx/kg/utils/lock_utils.py",
    "content": "import time\n\nfrom redis.lock import Lock as RedisLock\n\n\ndef extend_lock(lock: RedisLock, timeout: int, last_lock_time: float) -> float:\n    current_time = time.monotonic()\n    if current_time - last_lock_time >= (timeout / 4):\n        lock.reacquire()\n        last_lock_time = current_time\n\n    return last_lock_time\n"
  },
  {
    "path": "backend/onyx/kg/vespa/vespa_interactions.py",
    "content": "import json\nfrom collections.abc import Generator\n\nfrom onyx.document_index.vespa.chunk_retrieval import get_chunks_via_visit_api\nfrom onyx.document_index.vespa.chunk_retrieval import VespaChunkRequest\nfrom onyx.document_index.vespa.index import IndexFilters\nfrom onyx.kg.models import KGChunkFormat\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_document_vespa_contents(\n    document_id: str,\n    index_name: str,\n    tenant_id: str,\n    batch_size: int = 8,\n) -> Generator[list[KGChunkFormat], None, None]:\n    \"\"\"\n    Retrieves chunks from Vespa for the given document IDs and converts them to KGChunks.\n\n    Args:\n        document_id (str): ID of the document to fetch chunks for\n        index_name (str): Name of the Vespa index\n        tenant_id (str): ID of the tenant\n        batch_size (int): Number of chunks to fetch per batch\n\n    Yields:\n        list[KGChunk]: Batches of chunks ready for KG processing\n    \"\"\"\n\n    current_batch: list[KGChunkFormat] = []\n\n    # get all chunks for the document\n    # TODO: revisit the visit function\n    chunks = get_chunks_via_visit_api(\n        chunk_request=VespaChunkRequest(document_id=document_id),\n        index_name=index_name,\n        filters=IndexFilters(access_control_list=None, tenant_id=tenant_id),\n        field_names=[\n            \"document_id\",\n            \"chunk_id\",\n            \"title\",\n            \"content\",\n            \"metadata\",\n            \"primary_owners\",\n            \"secondary_owners\",\n            \"source_type\",\n        ],\n        get_large_chunks=False,\n    )\n\n    # Convert Vespa chunks to KGChunks\n    # kg_chunks: list[KGChunkFormat] = []\n\n    for i, chunk in enumerate(chunks):\n        fields = chunk[\"fields\"]\n        if isinstance(fields.get(\"metadata\", {}), str):\n            fields[\"metadata\"] = json.loads(fields[\"metadata\"])\n        current_batch.append(\n            KGChunkFormat(\n                connector_id=None,  # We may need to adjust this\n                document_id=fields.get(\"document_id\"),\n                chunk_id=fields.get(\"chunk_id\"),\n                primary_owners=fields.get(\"primary_owners\", []),\n                secondary_owners=fields.get(\"secondary_owners\", []),\n                source_type=fields.get(\"source_type\", \"\"),\n                title=fields.get(\"title\", \"\"),\n                content=fields.get(\"content\", \"\"),\n                metadata=fields.get(\"metadata\", {}),\n            )\n        )\n\n        if len(current_batch) >= batch_size:\n            yield current_batch\n            current_batch = []\n\n    # Yield any remaining chunks\n    if current_batch:\n        yield current_batch\n"
  },
  {
    "path": "backend/onyx/llm/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/llm/constants.py",
    "content": "\"\"\"\nLLM Constants\n\nCentralized constants for LLM providers, vendors, and display names.\n\"\"\"\n\nfrom enum import Enum\n\n\n# Provider names\nclass LlmProviderNames(str, Enum):\n    \"\"\"\n    Canonical string identifiers for LLM providers.\n    \"\"\"\n\n    OPENAI = \"openai\"\n    ANTHROPIC = \"anthropic\"\n    GOOGLE = \"google\"\n    BEDROCK = \"bedrock\"\n    BEDROCK_CONVERSE = \"bedrock_converse\"\n    VERTEX_AI = \"vertex_ai\"\n    OPENROUTER = \"openrouter\"\n    AZURE = \"azure\"\n    OLLAMA_CHAT = \"ollama_chat\"\n    LM_STUDIO = \"lm_studio\"\n    MISTRAL = \"mistral\"\n    LITELLM_PROXY = \"litellm_proxy\"\n    BIFROST = \"bifrost\"\n\n    def __str__(self) -> str:\n        \"\"\"Needed so things like:\n\n        f\"{LlmProviderNames.OPENAI}/\" gives back \"openai/\" instead of \"LlmProviderNames.OPENAI/\"\n        \"\"\"\n        return self.value\n\n\nWELL_KNOWN_PROVIDER_NAMES = [\n    LlmProviderNames.OPENAI,\n    LlmProviderNames.ANTHROPIC,\n    LlmProviderNames.VERTEX_AI,\n    LlmProviderNames.BEDROCK,\n    LlmProviderNames.OPENROUTER,\n    LlmProviderNames.AZURE,\n    LlmProviderNames.OLLAMA_CHAT,\n    LlmProviderNames.LM_STUDIO,\n    LlmProviderNames.LITELLM_PROXY,\n    LlmProviderNames.BIFROST,\n]\n\n\n# Proper capitalization for known providers and vendors\nPROVIDER_DISPLAY_NAMES: dict[str, str] = {\n    LlmProviderNames.OPENAI: \"OpenAI\",\n    LlmProviderNames.ANTHROPIC: \"Anthropic\",\n    LlmProviderNames.GOOGLE: \"Google\",\n    LlmProviderNames.BEDROCK: \"Bedrock\",\n    LlmProviderNames.BEDROCK_CONVERSE: \"Bedrock\",\n    LlmProviderNames.VERTEX_AI: \"Vertex AI\",\n    LlmProviderNames.OPENROUTER: \"OpenRouter\",\n    LlmProviderNames.AZURE: \"Azure\",\n    \"ollama\": \"Ollama\",\n    LlmProviderNames.OLLAMA_CHAT: \"Ollama\",\n    LlmProviderNames.LM_STUDIO: \"LM Studio\",\n    LlmProviderNames.LITELLM_PROXY: \"LiteLLM Proxy\",\n    LlmProviderNames.BIFROST: \"Bifrost\",\n    \"groq\": \"Groq\",\n    \"anyscale\": \"Anyscale\",\n    \"deepseek\": \"DeepSeek\",\n    \"xai\": \"xAI\",\n    LlmProviderNames.MISTRAL: \"Mistral\",\n    \"mistralai\": \"Mistral\",  # Alias used by some providers\n    \"cohere\": \"Cohere\",\n    \"perplexity\": \"Perplexity\",\n    \"amazon\": \"Amazon\",\n    \"meta\": \"Meta\",\n    \"meta-llama\": \"Meta\",  # Alias used by some providers\n    \"ai21\": \"AI21\",\n    \"nvidia\": \"NVIDIA\",\n    \"databricks\": \"Databricks\",\n    \"alibaba\": \"Alibaba\",\n    \"qwen\": \"Qwen\",\n    \"microsoft\": \"Microsoft\",\n    \"gemini\": \"Gemini\",\n    \"stability\": \"Stability\",\n    \"writer\": \"Writer\",\n}\n\n# Map vendors to their brand names (used for provider_display_name generation)\nVENDOR_BRAND_NAMES: dict[str, str] = {\n    \"anthropic\": \"Claude\",\n    \"openai\": \"GPT\",\n    \"google\": \"Gemini\",\n    \"amazon\": \"Nova\",\n    \"meta\": \"Llama\",\n    \"mistral\": \"Mistral\",\n    \"cohere\": \"Command\",\n    \"deepseek\": \"DeepSeek\",\n    \"xai\": \"Grok\",\n    \"perplexity\": \"Sonar\",\n    \"ai21\": \"Jamba\",\n    \"nvidia\": \"Nemotron\",\n    \"qwen\": \"Qwen\",\n    \"alibaba\": \"Qwen\",\n    \"writer\": \"Palmyra\",\n}\n\n# Aggregator providers that host models from multiple vendors\nAGGREGATOR_PROVIDERS: set[str] = {\n    LlmProviderNames.BEDROCK,\n    LlmProviderNames.BEDROCK_CONVERSE,\n    LlmProviderNames.OPENROUTER,\n    LlmProviderNames.OLLAMA_CHAT,\n    LlmProviderNames.LM_STUDIO,\n    LlmProviderNames.VERTEX_AI,\n    LlmProviderNames.AZURE,\n    LlmProviderNames.LITELLM_PROXY,\n    LlmProviderNames.BIFROST,\n}\n\n# Model family name mappings for display name generation\n# Used by Bedrock display name generator\nBEDROCK_MODEL_NAME_MAPPINGS: dict[str, str] = {\n    \"claude\": \"Claude\",\n    \"llama\": \"Llama\",\n    \"mistral\": \"Mistral\",\n    \"mixtral\": \"Mixtral\",\n    \"titan\": \"Titan\",\n    \"nova\": \"Nova\",\n    \"jamba\": \"Jamba\",\n    \"command\": \"Command\",\n    \"deepseek\": \"DeepSeek\",\n}\n\n# Used by Ollama display name generator\nOLLAMA_MODEL_NAME_MAPPINGS: dict[str, str] = {\n    \"llama\": \"Llama\",\n    \"qwen\": \"Qwen\",\n    \"mistral\": \"Mistral\",\n    \"deepseek\": \"DeepSeek\",\n    \"gemma\": \"Gemma\",\n    \"phi\": \"Phi\",\n    \"codellama\": \"Code Llama\",\n    \"starcoder\": \"StarCoder\",\n    \"wizardcoder\": \"WizardCoder\",\n    \"vicuna\": \"Vicuna\",\n    \"orca\": \"Orca\",\n    \"dolphin\": \"Dolphin\",\n    \"nous\": \"Nous\",\n    \"neural\": \"Neural\",\n    \"mixtral\": \"Mixtral\",\n    \"falcon\": \"Falcon\",\n    \"yi\": \"Yi\",\n    \"command\": \"Command\",\n    \"zephyr\": \"Zephyr\",\n    \"openchat\": \"OpenChat\",\n    \"solar\": \"Solar\",\n}\n\n# Bedrock model token limits (AWS doesn't expose this via API)\n# Note: Many Bedrock model IDs include context length suffix (e.g., \":200k\")\n# which is parsed first. This mapping is for models without suffixes.\n# Sources:\n# - LiteLLM model_prices_and_context_window.json\n# - AWS Bedrock documentation and announcement blogs\nBEDROCK_MODEL_TOKEN_LIMITS: dict[str, int] = {\n    # Anthropic Claude models (new naming: claude-{tier}-{version})\n    \"claude-opus-4\": 200000,\n    \"claude-sonnet-4\": 200000,\n    \"claude-haiku-4\": 200000,\n    # Anthropic Claude models (old naming: claude-{version})\n    \"claude-4\": 200000,\n    \"claude-3-7\": 200000,\n    \"claude-3-5\": 200000,\n    \"claude-3\": 200000,\n    \"claude-v2\": 100000,\n    \"claude-instant\": 100000,\n    # Amazon Nova models (from LiteLLM)\n    \"nova-premier\": 1000000,\n    \"nova-pro\": 300000,\n    \"nova-lite\": 300000,\n    \"nova-2-lite\": 1000000,  # Nova 2 Lite has 1M context\n    \"nova-2-sonic\": 128000,\n    \"nova-micro\": 128000,\n    # Amazon Titan models (from LiteLLM: all text models are 42K)\n    \"titan-text-premier\": 42000,\n    \"titan-text-express\": 42000,\n    \"titan-text-lite\": 42000,\n    \"titan-tg1\": 8000,\n    # Meta Llama models (Llama 3 base = 8K, Llama 3.1+ = 128K)\n    \"llama4\": 128000,\n    \"llama3-3\": 128000,\n    \"llama3-2\": 128000,\n    \"llama3-1\": 128000,\n    \"llama3-8b\": 8000,\n    \"llama3-70b\": 8000,\n    # Mistral models (Large 2+ = 128K, original Large/Small = 32K)\n    \"mistral-large-3\": 128000,\n    \"mistral-large-2407\": 128000,  # Mistral Large 2\n    \"mistral-large-2402\": 32000,  # Original Mistral Large\n    \"mistral-large\": 128000,  # Default to newer version\n    \"mistral-small\": 32000,\n    \"mistral-7b\": 32000,\n    \"mixtral-8x7b\": 32000,\n    \"pixtral\": 128000,\n    \"ministral\": 128000,\n    \"magistral\": 128000,\n    \"voxtral\": 32000,\n    # Cohere models\n    \"command-r-plus\": 128000,\n    \"command-r\": 128000,\n    # DeepSeek models\n    \"deepseek\": 64000,\n    # Google Gemma models\n    \"gemma-3\": 128000,\n    \"gemma-2\": 8000,\n    \"gemma\": 8000,\n    # Qwen models\n    \"qwen3\": 128000,\n    \"qwen2\": 128000,\n    # NVIDIA models\n    \"nemotron\": 128000,\n    # Writer Palmyra models\n    \"palmyra\": 128000,\n    # Moonshot Kimi\n    \"kimi\": 128000,\n    # Minimax\n    \"minimax\": 128000,\n    # OpenAI (via Bedrock)\n    \"gpt-oss\": 128000,\n    # AI21 models (from LiteLLM: Jamba 1.5 = 256K, Jamba Instruct = 70K)\n    \"jamba-1-5\": 256000,\n    \"jamba-instruct\": 70000,\n    \"jamba\": 256000,  # Default to newer version\n}\n\n\n# Models that should keep their hyphenated format in display names\n# These are model families where the hyphen is part of the brand name\nHYPHENATED_MODEL_NAMES: set[str] = {\n    \"gpt-oss\",\n}\n\n\n# General model prefix to vendor mapping (used as fallback when enrichment data is missing)\n# This covers common model families across all providers\nMODEL_PREFIX_TO_VENDOR: dict[str, str] = {\n    # Google\n    \"gemini\": \"google\",\n    \"gemma\": \"google\",\n    \"palm\": \"google\",\n    # Anthropic\n    \"claude\": \"anthropic\",\n    # OpenAI\n    \"gpt\": \"openai\",\n    \"o1\": \"openai\",\n    \"o3\": \"openai\",\n    \"o4\": \"openai\",\n    \"chatgpt\": \"openai\",\n    # Meta\n    \"llama\": \"meta\",\n    \"codellama\": \"meta\",\n    # Mistral\n    \"mistral\": \"mistral\",\n    \"mixtral\": \"mistral\",\n    \"codestral\": \"mistral\",\n    \"ministral\": \"mistral\",\n    \"pixtral\": \"mistral\",\n    \"magistral\": \"mistral\",\n    # Cohere\n    \"command\": \"cohere\",\n    \"aya\": \"cohere\",\n    # Amazon\n    \"nova\": \"amazon\",\n    \"titan\": \"amazon\",\n    # AI21\n    \"jamba\": \"ai21\",\n    # DeepSeek\n    \"deepseek\": \"deepseek\",\n    # Alibaba/Qwen\n    \"qwen\": \"alibaba\",\n    \"qwq\": \"alibaba\",\n    # Microsoft\n    \"phi\": \"microsoft\",\n    # NVIDIA\n    \"nemotron\": \"nvidia\",\n    # xAI\n    \"grok\": \"xai\",\n}\n\n\n# Ollama model prefix to vendor mapping (for grouping models by vendor)\nOLLAMA_MODEL_TO_VENDOR: dict[str, str] = {\n    \"llama\": \"Meta\",\n    \"codellama\": \"Meta\",\n    \"qwen\": \"Alibaba\",\n    \"qwq\": \"Alibaba\",\n    \"mistral\": \"Mistral\",\n    \"ministral\": \"Mistral\",\n    \"mixtral\": \"Mistral\",\n    \"deepseek\": \"DeepSeek\",\n    \"gemma\": \"Google\",\n    \"phi\": \"Microsoft\",\n    \"command\": \"Cohere\",\n    \"aya\": \"Cohere\",\n    \"falcon\": \"TII\",\n    \"yi\": \"01.AI\",\n    \"starcoder\": \"BigCode\",\n    \"wizardcoder\": \"WizardLM\",\n    \"vicuna\": \"LMSYS\",\n    \"openchat\": \"OpenChat\",\n    \"solar\": \"Upstage\",\n    \"orca\": \"Microsoft\",\n    \"dolphin\": \"Cognitive Computations\",\n    \"nous\": \"Nous Research\",\n    \"neural\": \"Intel\",\n    \"zephyr\": \"HuggingFace\",\n    \"granite\": \"IBM\",\n    \"nemotron\": \"NVIDIA\",\n    \"smollm\": \"HuggingFace\",\n}\n"
  },
  {
    "path": "backend/onyx/llm/cost.py",
    "content": "\"\"\"LLM cost calculation utilities.\"\"\"\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef calculate_llm_cost_cents(\n    model_name: str,\n    prompt_tokens: int,\n    completion_tokens: int,\n) -> float:\n    \"\"\"\n    Calculate the cost in cents for an LLM API call.\n\n    Uses litellm's cost_per_token function to get current pricing.\n    Returns 0 if the model is not found or on any error.\n    \"\"\"\n    try:\n        import litellm\n\n        # cost_per_token returns (prompt_cost, completion_cost) in USD\n        prompt_cost_usd, completion_cost_usd = litellm.cost_per_token(\n            model=model_name,\n            prompt_tokens=prompt_tokens,\n            completion_tokens=completion_tokens,\n        )\n\n        # Convert to cents (multiply by 100)\n        total_cost_cents = (prompt_cost_usd + completion_cost_usd) * 100\n        return total_cost_cents\n\n    except Exception as e:\n        # Log but don't fail - unknown models or errors shouldn't block usage\n        logger.debug(\n            f\"Could not calculate cost for model {model_name}: {e}. Assuming cost is 0.\"\n        )\n        return 0.0\n"
  },
  {
    "path": "backend/onyx/llm/factory.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.model_configs import GEN_AI_TEMPERATURE\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.llm import can_user_access_llm_provider\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.db.llm import fetch_default_vision_model\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import fetch_existing_models\nfrom onyx.db.llm import fetch_llm_provider_view\nfrom onyx.db.llm import fetch_user_group_ids\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.multi_llm import LitellmLLM\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.llm.utils import get_max_input_tokens_from_llm_provider\nfrom onyx.llm.utils import model_supports_image_input\nfrom onyx.llm.well_known_providers.constants import (\n    PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING,\n)\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.utils.headers import build_llm_extra_headers\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _build_provider_extra_headers(\n    provider: str, custom_config: dict[str, str] | None\n) -> dict[str, str]:\n    if provider in PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING and custom_config:\n        raw = custom_config.get(PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING[provider])\n        api_key = raw.strip() if raw else None\n        if not api_key:\n            return {}\n        return {\n            \"Authorization\": (\n                api_key\n                if api_key.lower().startswith(\"bearer \")\n                else f\"Bearer {api_key}\"\n            )\n        }\n\n    # Passing these will put Onyx on the OpenRouter leaderboard\n    elif provider == LlmProviderNames.OPENROUTER:\n        return {\n            \"HTTP-Referer\": \"https://onyx.app\",\n            \"X-Title\": \"Onyx\",\n        }\n\n    return {}\n\n\ndef _get_model_configured_max_input_tokens(\n    llm_provider: LLMProviderView,\n    model_name: str,\n) -> int | None:\n    for model_configuration in llm_provider.model_configurations:\n        if model_configuration.name == model_name:\n            return model_configuration.max_input_tokens\n    return None\n\n\ndef _build_model_kwargs(\n    provider: str,\n    configured_max_input_tokens: int | None,\n) -> dict[str, Any]:\n    model_kwargs: dict[str, Any] = {}\n    if (\n        provider == LlmProviderNames.OLLAMA_CHAT\n        and configured_max_input_tokens\n        and configured_max_input_tokens > 0\n    ):\n        model_kwargs[\"num_ctx\"] = configured_max_input_tokens\n    return model_kwargs\n\n\ndef get_llm_for_persona(\n    persona: Persona | None,\n    user: User,\n    llm_override: LLMOverride | None = None,\n    additional_headers: dict[str, str] | None = None,\n) -> LLM:\n    if persona is None:\n        logger.warning(\"No persona provided, using default LLM\")\n        return get_default_llm()\n\n    provider_name_override = llm_override.model_provider if llm_override else None\n    model_version_override = llm_override.model_version if llm_override else None\n    temperature_override = llm_override.temperature if llm_override else None\n\n    provider_name = provider_name_override or persona.llm_model_provider_override\n    if not provider_name:\n        return get_default_llm(\n            temperature=temperature_override or GEN_AI_TEMPERATURE,\n            additional_headers=additional_headers,\n        )\n\n    with get_session_with_current_tenant() as db_session:\n        provider_model = fetch_existing_llm_provider(provider_name, db_session)\n        if not provider_model:\n            raise ValueError(\"No LLM provider found\")\n\n        # Fetch user group IDs for access control check\n        user_group_ids = fetch_user_group_ids(db_session, user)\n\n        if not can_user_access_llm_provider(\n            provider_model, user_group_ids, persona, user.role == UserRole.ADMIN\n        ):\n            logger.warning(\n                \"User %s with persona %s cannot access provider %s. Falling back to default provider.\",\n                user.id,\n                persona.id,\n                provider_model.name,\n            )\n            return get_default_llm(\n                temperature=temperature_override or GEN_AI_TEMPERATURE,\n                additional_headers=additional_headers,\n            )\n\n        llm_provider = LLMProviderView.from_model(provider_model)\n\n    model = model_version_override or persona.llm_model_version_override\n    if not model:\n        raise ValueError(\"No model name found\")\n\n    return llm_from_provider(\n        model_name=model,\n        llm_provider=llm_provider,\n        temperature=temperature_override,\n        additional_headers=additional_headers,\n    )\n\n\ndef get_default_llm_with_vision(\n    timeout: int | None = None,\n    temperature: float | None = None,\n    additional_headers: dict[str, str] | None = None,\n) -> LLM | None:\n    \"\"\"Get an LLM that supports image input, with the following priority:\n    1. Use the designated default vision provider if it exists and supports image input\n    2. Fall back to the first LLM provider that supports image input\n\n    Returns None if no providers exist or if no provider supports images.\n    \"\"\"\n\n    def create_vision_llm(provider: LLMProviderView, model: str) -> LLM:\n        \"\"\"Helper to create an LLM if the provider supports image input.\"\"\"\n        return llm_from_provider(\n            model_name=model,\n            llm_provider=provider,\n            timeout=timeout,\n            temperature=temperature,\n            additional_headers=additional_headers,\n        )\n\n    provider_map = {}\n    with get_session_with_current_tenant() as db_session:\n        # Try the default vision provider first\n        default_model = fetch_default_vision_model(db_session)\n        if default_model:\n            if model_supports_image_input(\n                default_model.name, default_model.llm_provider.provider\n            ):\n                logger.info(\n                    \"Using default vision model: %s (provider=%s)\",\n                    default_model.name,\n                    default_model.llm_provider.provider,\n                )\n                return create_vision_llm(\n                    LLMProviderView.from_model(default_model.llm_provider),\n                    default_model.name,\n                )\n            else:\n                logger.warning(\n                    \"Default vision model %s (provider=%s) does not support \"\n                    \"image input — falling back to searching all providers\",\n                    default_model.name,\n                    default_model.llm_provider.provider,\n                )\n\n        # Fall back to searching all providers\n        models = fetch_existing_models(\n            db_session=db_session,\n            flow_types=[LLMModelFlowType.VISION, LLMModelFlowType.CHAT],\n        )\n\n        if not models:\n            logger.warning(\n                \"No LLM models with VISION or CHAT flow type found — \"\n                \"image summarization will be disabled\"\n            )\n            return None\n\n        for model in models:\n            if model.llm_provider_id not in provider_map:\n                provider_map[model.llm_provider_id] = LLMProviderView.from_model(\n                    model.llm_provider\n                )\n\n    # Search for viable vision model followed by chat models\n    # Sort models from VISION to CHAT priority\n    sorted_models = sorted(\n        models,\n        key=lambda x: (\n            LLMModelFlowType.VISION in x.llm_model_flow_types,\n            LLMModelFlowType.CHAT in x.llm_model_flow_types,\n        ),\n        reverse=True,\n    )\n\n    for model in sorted_models:\n        if model_supports_image_input(model.name, model.llm_provider.provider):\n            logger.info(\n                \"Using fallback vision model: %s (provider=%s)\",\n                model.name,\n                model.llm_provider.provider,\n            )\n            return create_vision_llm(\n                provider_map[model.llm_provider_id],\n                model.name,\n            )\n\n    checked_models = [\n        f\"{m.name} (provider={m.llm_provider.provider})\" for m in sorted_models\n    ]\n    logger.warning(\n        \"No vision-capable model found among %d candidates: %s — \"\n        \"image summarization will be disabled\",\n        len(sorted_models),\n        \", \".join(checked_models),\n    )\n    return None\n\n\ndef llm_from_provider(\n    model_name: str,\n    llm_provider: LLMProviderView,\n    timeout: int | None = None,\n    temperature: float | None = None,\n    additional_headers: dict[str, str] | None = None,\n) -> LLM:\n    configured_max_input_tokens = _get_model_configured_max_input_tokens(\n        llm_provider=llm_provider, model_name=model_name\n    )\n    model_kwargs = _build_model_kwargs(\n        provider=llm_provider.provider,\n        configured_max_input_tokens=configured_max_input_tokens,\n    )\n    max_input_tokens = (\n        configured_max_input_tokens\n        if configured_max_input_tokens\n        else get_max_input_tokens_from_llm_provider(\n            llm_provider=llm_provider, model_name=model_name\n        )\n    )\n    return get_llm(\n        provider=llm_provider.provider,\n        model=model_name,\n        deployment_name=llm_provider.deployment_name,\n        api_key=llm_provider.api_key,\n        api_base=llm_provider.api_base,\n        api_version=llm_provider.api_version,\n        custom_config=llm_provider.custom_config,\n        timeout=timeout,\n        temperature=temperature,\n        additional_headers=additional_headers,\n        max_input_tokens=max_input_tokens,\n        model_kwargs=model_kwargs,\n    )\n\n\ndef get_llm_for_contextual_rag(model_name: str, model_provider: str) -> LLM:\n    with get_session_with_current_tenant() as db_session:\n        llm_provider = fetch_llm_provider_view(db_session, model_provider)\n    if not llm_provider:\n        raise ValueError(\"No LLM provider with name {} found\".format(model_provider))\n    return llm_from_provider(\n        model_name=model_name,\n        llm_provider=llm_provider,\n    )\n\n\ndef get_default_llm(\n    timeout: int | None = None,\n    temperature: float | None = None,\n    additional_headers: dict[str, str] | None = None,\n) -> LLM:\n    with get_session_with_current_tenant() as db_session:\n        model = fetch_default_llm_model(db_session)\n\n        if not model:\n            raise ValueError(\"No default LLM model found\")\n\n        return llm_from_provider(\n            model_name=model.name,\n            llm_provider=LLMProviderView.from_model(model.llm_provider),\n            timeout=timeout,\n            temperature=temperature,\n            additional_headers=additional_headers,\n        )\n\n\ndef get_llm(\n    provider: str,\n    model: str,\n    max_input_tokens: int,\n    deployment_name: str | None,\n    api_key: str | None = None,\n    api_base: str | None = None,\n    api_version: str | None = None,\n    custom_config: dict[str, str] | None = None,\n    temperature: float | None = None,\n    timeout: int | None = None,\n    additional_headers: dict[str, str] | None = None,\n    model_kwargs: dict[str, Any] | None = None,\n) -> LLM:\n    if temperature is None:\n        temperature = GEN_AI_TEMPERATURE\n\n    extra_headers = build_llm_extra_headers(additional_headers)\n\n    # NOTE: this is needed since Ollama API key is optional\n    # User may access Ollama cloud via locally hosted instance (logged in)\n    # or just via the cloud API (not logged in, using API key)\n    provider_extra_headers = _build_provider_extra_headers(provider, custom_config)\n    if provider_extra_headers:\n        extra_headers.update(provider_extra_headers)\n\n    return LitellmLLM(\n        model_provider=provider,\n        model_name=model,\n        deployment_name=deployment_name,\n        api_key=api_key,\n        api_base=api_base,\n        api_version=api_version,\n        timeout=timeout,\n        temperature=temperature,\n        custom_config=custom_config,\n        extra_headers=extra_headers,\n        model_kwargs=model_kwargs or {},\n        max_input_tokens=max_input_tokens,\n    )\n\n\ndef get_llm_tokenizer_encode_func(llm: LLM) -> Callable[[str], list[int]]:\n    \"\"\"Get the tokenizer encode function for an LLM.\n\n    Args:\n        llm: The LLM instance to get the tokenizer for\n\n    Returns:\n        A callable that encodes a string into a list of token IDs\n    \"\"\"\n    llm_provider = llm.config.model_provider\n    llm_model_name = llm.config.model_name\n\n    llm_tokenizer = get_tokenizer(\n        model_name=llm_model_name,\n        provider_type=llm_provider,\n    )\n    return llm_tokenizer.encode\n\n\ndef get_llm_token_counter(llm: LLM) -> Callable[[str], int]:\n    tokenizer_encode_func = get_llm_tokenizer_encode_func(llm)\n    return lambda text: len(tokenizer_encode_func(text))\n"
  },
  {
    "path": "backend/onyx/llm/interfaces.py",
    "content": "import abc\nfrom collections.abc import Iterator\n\nfrom braintrust import traced\nfrom pydantic import BaseModel\n\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import ModelResponseStream\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import ToolChoiceOptions\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass LLMUserIdentity(BaseModel):\n    user_id: str | None = None\n    session_id: str | None = None\n\n\nclass LLMConfig(BaseModel):\n    model_provider: str\n    model_name: str\n    temperature: float\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n    custom_config: dict[str, str] | None = None\n    max_input_tokens: int\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n\nclass LLM(abc.ABC):\n    @property\n    @abc.abstractmethod\n    def config(self) -> LLMConfig:\n        raise NotImplementedError\n\n    @traced(name=\"invoke llm\", type=\"llm\")\n    def invoke(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None = None,\n        tool_choice: ToolChoiceOptions | None = None,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        user_identity: LLMUserIdentity | None = None,\n    ) -> \"ModelResponse\":\n        raise NotImplementedError\n\n    def stream(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None = None,\n        tool_choice: ToolChoiceOptions | None = None,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        user_identity: LLMUserIdentity | None = None,\n    ) -> Iterator[ModelResponseStream]:\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/llm/litellm_singleton/__init__.py",
    "content": "\"\"\"\nSingleton module for litellm configuration.\nThis ensures litellm is configured exactly once when first imported.\nAll other modules should import litellm from here instead of directly.\n\"\"\"\n\nimport litellm\n\nfrom .config import initialize_litellm\nfrom .monkey_patches import apply_monkey_patches\n\ninitialize_litellm()\napply_monkey_patches()\n\n# Export the configured litellm module and model\n__all__ = [\"litellm\"]\n"
  },
  {
    "path": "backend/onyx/llm/litellm_singleton/config.py",
    "content": "import json\nfrom pathlib import Path\n\nimport litellm\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef configure_litellm_settings() -> None:\n    # If a user configures a different model and it doesn't support all the same\n    # parameters like frequency and presence, just ignore them\n    litellm.drop_params = True\n    litellm.telemetry = False\n    litellm.modify_params = True\n    litellm.add_function_to_prompt = False\n    litellm.suppress_debug_info = True\n\n\n# TODO: We might not need to register ollama_chat in addition to ollama but let's just do it for good measure for now.\ndef register_ollama_models() -> None:\n    litellm.register_model(\n        model_cost={\n            # GPT-OSS models\n            \"ollama_chat/gpt-oss:120b-cloud\": {\"supports_function_calling\": True},\n            \"ollama_chat/gpt-oss:120b\": {\"supports_function_calling\": True},\n            \"ollama_chat/gpt-oss:20b-cloud\": {\"supports_function_calling\": True},\n            \"ollama_chat/gpt-oss:20b\": {\"supports_function_calling\": True},\n            \"ollama/gpt-oss:120b-cloud\": {\"supports_function_calling\": True},\n            \"ollama/gpt-oss:120b\": {\"supports_function_calling\": True},\n            \"ollama/gpt-oss:20b-cloud\": {\"supports_function_calling\": True},\n            \"ollama/gpt-oss:20b\": {\"supports_function_calling\": True},\n            # DeepSeek models\n            \"ollama_chat/deepseek-r1:latest\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:1.5b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:7b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:8b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:14b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:32b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:70b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-r1:671b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-v3.1:latest\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-v3.1:671b\": {\"supports_function_calling\": True},\n            \"ollama_chat/deepseek-v3.1:671b-cloud\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:latest\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:1.5b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:7b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:8b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:14b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:32b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:70b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-r1:671b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-v3.1:latest\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-v3.1:671b\": {\"supports_function_calling\": True},\n            \"ollama/deepseek-v3.1:671b-cloud\": {\"supports_function_calling\": True},\n            # Gemma3 models\n            \"ollama_chat/gemma3:latest\": {\"supports_function_calling\": True},\n            \"ollama_chat/gemma3:270m\": {\"supports_function_calling\": True},\n            \"ollama_chat/gemma3:1b\": {\"supports_function_calling\": True},\n            \"ollama_chat/gemma3:4b\": {\"supports_function_calling\": True},\n            \"ollama_chat/gemma3:12b\": {\"supports_function_calling\": True},\n            \"ollama_chat/gemma3:27b\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:latest\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:270m\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:1b\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:4b\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:12b\": {\"supports_function_calling\": True},\n            \"ollama/gemma3:27b\": {\"supports_function_calling\": True},\n            # Qwen models\n            \"ollama_chat/qwen3-coder:latest\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-coder:30b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-coder:480b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-coder:480b-cloud\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:latest\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:2b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:4b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:8b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:30b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:32b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:235b\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:235b-cloud\": {\"supports_function_calling\": True},\n            \"ollama_chat/qwen3-vl:235b-instruct-cloud\": {\n                \"supports_function_calling\": True\n            },\n            \"ollama/qwen3-coder:latest\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-coder:30b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-coder:480b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-coder:480b-cloud\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:latest\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:2b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:4b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:8b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:30b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:32b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:235b\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:235b-cloud\": {\"supports_function_calling\": True},\n            \"ollama/qwen3-vl:235b-instruct-cloud\": {\"supports_function_calling\": True},\n            # Kimi\n            \"ollama_chat/kimi-k2:1t\": {\"supports_function_calling\": True},\n            \"ollama_chat/kimi-k2:1t-cloud\": {\"supports_function_calling\": True},\n            \"ollama/kimi-k2:1t\": {\"supports_function_calling\": True},\n            \"ollama/kimi-k2:1t-cloud\": {\"supports_function_calling\": True},\n            # GLM\n            \"ollama_chat/glm-4.6:cloud\": {\"supports_function_calling\": True},\n            \"ollama_chat/glm-4.6\": {\"supports_function_calling\": True},\n            \"ollama/glm-4.6\": {\"supports_function_calling\": True},\n            \"ollama/glm-4.6-cloud\": {\"supports_function_calling\": True},\n        }\n    )\n\n\ndef load_model_metadata_enrichments() -> None:\n    \"\"\"\n    Load model metadata enrichments from JSON file and merge into litellm.model_cost.\n\n    This adds model_vendor, display_name, and model_version fields\n    to litellm's model_cost dict. These fields are used by the UI to display\n    models grouped by vendor with human-friendly names.\n\n    Once LiteLLM accepts our upstream PR to add these fields natively,\n    this function and the JSON file can be removed.\n    \"\"\"\n    enrichments_path = Path(__file__).parent.parent / \"model_metadata_enrichments.json\"\n\n    if not enrichments_path.exists():\n        logger.warning(f\"Model metadata enrichments file not found: {enrichments_path}\")\n        return\n\n    try:\n        with open(enrichments_path) as f:\n            enrichments = json.load(f)\n\n        # Merge enrichments into litellm.model_cost\n        for model_key, metadata in enrichments.items():\n            if model_key in litellm.model_cost:\n                # Update existing entry with our metadata\n                litellm.model_cost[model_key].update(metadata)\n            else:\n                # Model not in litellm.model_cost - add it with just our metadata\n                litellm.model_cost[model_key] = metadata\n\n        logger.info(f\"Loaded model metadata enrichments for {len(enrichments)} models\")\n\n        # Clear the model name parser cache since enrichments are now loaded\n        # This ensures any parsing done before enrichments were loaded gets refreshed\n        try:\n            from onyx.llm.model_name_parser import parse_litellm_model_name\n\n            parse_litellm_model_name.cache_clear()\n        except ImportError:\n            pass  # Parser not yet imported, no cache to clear\n    except Exception as e:\n        logger.error(f\"Failed to load model metadata enrichments: {e}\")\n\n\ndef initialize_litellm() -> None:\n    configure_litellm_settings()\n    register_ollama_models()\n    load_model_metadata_enrichments()\n"
  },
  {
    "path": "backend/onyx/llm/litellm_singleton/monkey_patches.py",
    "content": "\"\"\"\nLiteLLM Monkey Patches\n\nThis module addresses the following issues in LiteLLM:\n\nStatus checked against LiteLLM v1.81.6-nightly (2026-02-02):\n\n1. Ollama Streaming Reasoning Content (_patch_ollama_chunk_parser):\n   - LiteLLM's chunk_parser doesn't properly handle reasoning content in streaming\n     responses from Ollama\n   - Processes native \"thinking\" field from Ollama responses\n   - Also handles <think>...</think> tags in content for models that use that format\n   - Tracks reasoning state to properly separate thinking from regular content\n   STATUS: STILL NEEDED - LiteLLM has a bug where it only yields thinking content on\n           the first two chunks, then stops (lines 504-510). Our patch correctly yields\n           ALL thinking chunks. The upstream logic sets finished_reasoning_content=True\n           on the second chunk instead of when regular content starts.\n\n2. OpenAI Responses API Parallel Tool Calls (_patch_openai_responses_parallel_tool_calls):\n   - LiteLLM's translate_responses_chunk_to_openai_stream hardcodes index=0 for all tool calls\n   - This breaks parallel tool calls where multiple functions are called simultaneously\n   - The OpenAI Responses API provides output_index in streaming events to track which\n     tool call each event belongs to\n   STATUS: STILL NEEDED - LiteLLM hardcodes index=0 in translate_responses_chunk_to_openai_stream\n           for response.output_item.added (line 962), response.function_call_arguments.delta\n           (line 989), and response.output_item.done (line 1033). Our patch uses output_index\n           from the event to properly track parallel tool calls.\n\n3. OpenAI Responses API Non-Streaming (_patch_openai_responses_transform_response):\n   - LiteLLM's transform_response doesn't properly concatenate multiple reasoning\n     summary parts in non-streaming responses\n   - Multiple ReasoningSummaryItem objects should be joined with newlines\n   STATUS: STILL NEEDED - LiteLLM's _convert_response_output_to_choices (lines 366-370)\n           only keeps the LAST summary item text, discarding earlier parts. Our patch\n           concatenates all summary texts with double newlines.\n\n4. Azure Responses API Fake Streaming (_patch_azure_responses_should_fake_stream):\n   - LiteLLM uses \"fake streaming\" (MockResponsesAPIStreamingIterator) for models\n     not in its database, which buffers the entire response before yielding\n   - This causes poor time-to-first-token for Azure custom model deployments\n   - Azure's Responses API supports native streaming, so we force real streaming\n   STATUS: STILL NEEDED - AzureOpenAIResponsesAPIConfig does NOT override should_fake_stream,\n           so it inherits from OpenAIResponsesAPIConfig which returns True for models not\n           in litellm.utils.supports_native_streaming(). Custom Azure deployments will\n           still use fake streaming without this patch.\n\n# Note: 5 and 6 are to supress a warning and may fix usage info but is not strictly required for the app to run\n5. Responses API Usage Format Mismatch (_patch_responses_api_usage_format):\n   - LiteLLM uses model_construct as a fallback in multiple places when\n     ResponsesAPIResponse validation fails\n   - This bypasses the usage validator, allowing chat completion format usage\n     (completion_tokens, prompt_tokens) to be stored instead of Responses API format\n     (input_tokens, output_tokens)\n   - When model_dump() is later called, Pydantic emits a serialization warning\n   STATUS: STILL NEEDED - Multiple files use model_construct which bypasses validation:\n           openai/responses/transformation.py, chatgpt/responses/transformation.py,\n           manus/responses/transformation.py, volcengine/responses/transformation.py,\n           and handler.py. Our patch wraps ResponsesAPIResponse.model_construct itself\n           to transform usage in all code paths.\n\n6. Logging Usage Transformation Warning (_patch_logging_assembled_streaming_response):\n   - LiteLLM's _get_assembled_streaming_response in litellm_logging.py transforms\n     ResponseAPIUsage to chat completion format and sets it as a dict on the\n     ResponsesAPIResponse.usage field\n   - This replaces the proper ResponseAPIUsage object with a dict, causing Pydantic\n     to emit a serialization warning when model_dump() is called later\n   STATUS: STILL NEEDED - litellm_core_utils/litellm_logging.py lines 3185-3199 set\n           usage as a dict with chat completion format instead of keeping it as\n           ResponseAPIUsage. Our patch creates a deep copy before modification.\n\n7. Responses API metadata=None TypeError (_patch_responses_metadata_none):\n   - LiteLLM's @client decorator wrapper in utils.py uses kwargs.get(\"metadata\", {})\n     to check for router calls, but when metadata is explicitly None (key exists with\n     value None), the default {} is not used\n   - This causes \"argument of type 'NoneType' is not iterable\" TypeError which swallows\n     the real exception (e.g. AuthenticationError for wrong API key)\n   - Surfaces as: APIConnectionError: OpenAIException - argument of type 'NoneType' is\n     not iterable\n   STATUS: STILL NEEDED - litellm/utils.py wrapper function (line 1721) does not guard\n           against metadata being explicitly None. Triggered when Responses API bridge\n           passes **litellm_params containing metadata=None.\n\"\"\"\n\nimport time\nimport uuid\nfrom typing import Any\nfrom typing import cast\nfrom typing import List\nfrom typing import Optional\n\nfrom litellm.completion_extras.litellm_responses_transformation.transformation import (\n    LiteLLMResponsesTransformationHandler,\n)\nfrom litellm.completion_extras.litellm_responses_transformation.transformation import (\n    OpenAiResponsesToChatCompletionStreamIterator,\n)\nfrom litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator\nfrom litellm.llms.ollama.common_utils import OllamaError\nfrom litellm.types.utils import ChatCompletionUsageBlock\nfrom litellm.types.utils import ModelResponseStream\n\n\ndef _patch_ollama_chunk_parser() -> None:\n    \"\"\"\n    Patches OllamaChatCompletionResponseIterator.chunk_parser to properly handle\n    reasoning content and content in streaming responses.\n    \"\"\"\n    if (\n        getattr(OllamaChatCompletionResponseIterator.chunk_parser, \"__name__\", \"\")\n        == \"_patched_chunk_parser\"\n    ):\n        return\n\n    def _patched_chunk_parser(self: Any, chunk: dict) -> ModelResponseStream:\n        try:\n            \"\"\"\n            Expected chunk format:\n            {\n                \"model\": \"llama3.1\",\n                \"created_at\": \"2025-05-24T02:12:05.859654Z\",\n                \"message\": {\n                    \"role\": \"assistant\",\n                    \"content\": \"\",\n                    \"tool_calls\": [{\n                        \"function\": {\n                            \"name\": \"get_latest_album_ratings\",\n                            \"arguments\": {\n                                \"artist_name\": \"Taylor Swift\"\n                            }\n                        }\n                    }]\n                },\n                \"done_reason\": \"stop\",\n                \"done\": true,\n                ...\n            }\n            Need to:\n            - convert 'message' to 'delta'\n            - return finish_reason when done is true\n            - return usage when done is true\n            \"\"\"\n            from litellm.types.utils import Delta\n            from litellm.types.utils import StreamingChoices\n\n            # process tool calls - if complete function arg - add id to tool call\n            tool_calls = chunk[\"message\"].get(\"tool_calls\")\n            if tool_calls is not None:\n                for tool_call in tool_calls:\n                    function_args = tool_call.get(\"function\").get(\"arguments\")\n                    if function_args is not None and len(function_args) > 0:\n                        is_function_call_complete = self._is_function_call_complete(\n                            function_args\n                        )\n                        if is_function_call_complete:\n                            tool_call[\"id\"] = str(uuid.uuid4())\n\n            # PROCESS REASONING CONTENT\n            reasoning_content: Optional[str] = None\n            content: Optional[str] = None\n            thinking_content = chunk[\"message\"].get(\"thinking\")\n            if thinking_content:  # Truthy check: skips None and empty string \"\"\n                reasoning_content = thinking_content\n                if self.started_reasoning_content is False:\n                    self.started_reasoning_content = True\n            if chunk[\"message\"].get(\"content\") is not None:\n                message_content = chunk[\"message\"].get(\"content\")\n                # Track whether we are inside <think>...</think> tagged content.\n                in_think_tag_block = bool(getattr(self, \"_in_think_tag_block\", False))\n                if \"<think>\" in message_content:\n                    message_content = message_content.replace(\"<think>\", \"\")\n                    self.started_reasoning_content = True\n                    self.finished_reasoning_content = False\n                    in_think_tag_block = True\n                if \"</think>\" in message_content and self.started_reasoning_content:\n                    message_content = message_content.replace(\"</think>\", \"\")\n                    self.finished_reasoning_content = True\n                    in_think_tag_block = False\n\n                # For native Ollama \"thinking\" streams, content without active\n                # think tags indicates a transition into regular assistant output.\n                if (\n                    self.started_reasoning_content\n                    and not self.finished_reasoning_content\n                    and not in_think_tag_block\n                    and not thinking_content\n                ):\n                    self.finished_reasoning_content = True\n\n                self._in_think_tag_block = in_think_tag_block\n\n                # When Ollama returns both \"thinking\" and \"content\" in the same\n                # chunk, preserve both instead of classifying content as reasoning.\n                if thinking_content and not in_think_tag_block:\n                    content = message_content\n                elif (\n                    self.started_reasoning_content\n                    and not self.finished_reasoning_content\n                ):\n                    reasoning_content = message_content\n                else:\n                    content = message_content\n\n            delta = Delta(\n                content=content,\n                reasoning_content=reasoning_content,\n                tool_calls=tool_calls,\n            )\n            if chunk[\"done\"] is True:\n                finish_reason = chunk.get(\"done_reason\", \"stop\")\n                choices = [\n                    StreamingChoices(\n                        delta=delta,\n                        finish_reason=finish_reason,\n                    )\n                ]\n            else:\n                choices = [\n                    StreamingChoices(\n                        delta=delta,\n                    )\n                ]\n\n            usage = ChatCompletionUsageBlock(\n                prompt_tokens=chunk.get(\"prompt_eval_count\", 0),\n                completion_tokens=chunk.get(\"eval_count\", 0),\n                total_tokens=chunk.get(\"prompt_eval_count\", 0)\n                + chunk.get(\"eval_count\", 0),\n            )\n\n            return ModelResponseStream(\n                id=str(uuid.uuid4()),\n                object=\"chat.completion.chunk\",\n                created=int(time.time()),  # ollama created_at is in UTC\n                usage=usage,\n                model=chunk[\"model\"],\n                choices=choices,\n            )\n        except KeyError as e:\n            raise OllamaError(\n                message=f\"KeyError: {e}, Got unexpected response from Ollama: {chunk}\",\n                status_code=400,\n                headers={\"Content-Type\": \"application/json\"},\n            )\n        except Exception as e:\n            raise e\n\n    OllamaChatCompletionResponseIterator.chunk_parser = _patched_chunk_parser  # type: ignore[method-assign]\n\n\ndef _patch_openai_responses_parallel_tool_calls() -> None:\n    \"\"\"\n    Patches OpenAiResponsesToChatCompletionStreamIterator to properly handle:\n    1. Parallel tool calls by using output_index from streaming events\n    2. Reasoning summary sections by inserting newlines between different summary indices\n\n    LiteLLM's implementation hardcodes index=0 for all tool calls, breaking parallel tool calls.\n    The OpenAI Responses API provides output_index in each event to track which tool call\n    the event belongs to.\n\n    STATUS: STILL NEEDED - LiteLLM hardcodes index=0 in translate_responses_chunk_to_openai_stream\n            for response.output_item.added (line 962), response.function_call_arguments.delta\n            (line 989), and response.output_item.done (line 1033). Our patch uses output_index\n            from the event to properly track parallel tool calls.\n    \"\"\"\n    if (\n        getattr(\n            OpenAiResponsesToChatCompletionStreamIterator.chunk_parser,\n            \"__name__\",\n            \"\",\n        )\n        == \"_patched_responses_chunk_parser\"\n    ):\n        return\n\n    def _patched_responses_chunk_parser(\n        self: Any, chunk: dict\n    ) -> \"ModelResponseStream\":\n        from pydantic import BaseModel\n\n        from litellm.types.llms.openai import (\n            ChatCompletionToolCallFunctionChunk,\n            ResponsesAPIStreamEvents,\n        )\n        from litellm.types.utils import (\n            ChatCompletionToolCallChunk,\n            Delta,\n            ModelResponseStream,\n            StreamingChoices,\n        )\n\n        parsed_chunk = chunk\n        if not parsed_chunk:\n            raise ValueError(\"Chat provider: Empty parsed_chunk\")\n\n        if isinstance(parsed_chunk, BaseModel):\n            parsed_chunk = parsed_chunk.model_dump()\n        if not isinstance(parsed_chunk, dict):\n            raise ValueError(f\"Chat provider: Invalid chunk type {type(parsed_chunk)}\")\n\n        event_type = parsed_chunk.get(\"type\")\n        if isinstance(event_type, ResponsesAPIStreamEvents):\n            event_type = event_type.value\n\n        # Get the output_index for proper parallel tool call tracking\n        output_index = parsed_chunk.get(\"output_index\", 0)\n\n        if event_type == \"response.output_item.added\":\n            output_item = parsed_chunk.get(\"item\", {})\n            if output_item.get(\"type\") == \"function_call\":\n                provider_specific_fields = output_item.get(\"provider_specific_fields\")\n                if provider_specific_fields and not isinstance(\n                    provider_specific_fields, dict\n                ):\n                    provider_specific_fields = (\n                        dict(provider_specific_fields)\n                        if hasattr(provider_specific_fields, \"__dict__\")\n                        else {}\n                    )\n\n                function_chunk = ChatCompletionToolCallFunctionChunk(\n                    name=output_item.get(\"name\", None),\n                    arguments=parsed_chunk.get(\"arguments\", \"\"),\n                )\n                if provider_specific_fields:\n                    function_chunk[\"provider_specific_fields\"] = (\n                        provider_specific_fields\n                    )\n\n                tool_call_chunk = ChatCompletionToolCallChunk(\n                    id=output_item.get(\"call_id\"),\n                    index=output_index,  # Use output_index for parallel tool calls\n                    type=\"function\",\n                    function=function_chunk,\n                )\n                if provider_specific_fields:\n                    tool_call_chunk.provider_specific_fields = provider_specific_fields  # type: ignore\n\n                return ModelResponseStream(\n                    choices=[\n                        StreamingChoices(\n                            index=0,\n                            delta=Delta(tool_calls=[tool_call_chunk]),\n                            finish_reason=None,\n                        )\n                    ]\n                )\n\n        elif event_type == \"response.function_call_arguments.delta\":\n            content_part: Optional[str] = parsed_chunk.get(\"delta\", None)\n            if content_part:\n                return ModelResponseStream(\n                    choices=[\n                        StreamingChoices(\n                            index=0,\n                            delta=Delta(\n                                tool_calls=[\n                                    ChatCompletionToolCallChunk(\n                                        id=None,\n                                        index=output_index,  # Use output_index for parallel tool calls\n                                        type=\"function\",\n                                        function=ChatCompletionToolCallFunctionChunk(\n                                            name=None, arguments=content_part\n                                        ),\n                                    )\n                                ]\n                            ),\n                            finish_reason=None,\n                        )\n                    ]\n                )\n            else:\n                raise ValueError(\n                    f\"Chat provider: Invalid function argument delta {parsed_chunk}\"\n                )\n\n        elif event_type == \"response.output_item.done\":\n            output_item = parsed_chunk.get(\"item\", {})\n            if output_item.get(\"type\") == \"function_call\":\n                provider_specific_fields = output_item.get(\"provider_specific_fields\")\n                if provider_specific_fields and not isinstance(\n                    provider_specific_fields, dict\n                ):\n                    provider_specific_fields = (\n                        dict(provider_specific_fields)\n                        if hasattr(provider_specific_fields, \"__dict__\")\n                        else {}\n                    )\n\n                function_chunk = ChatCompletionToolCallFunctionChunk(\n                    name=output_item.get(\"name\", None),\n                    arguments=\"\",  # responses API sends everything again, we don't need it\n                )\n                if provider_specific_fields:\n                    function_chunk[\"provider_specific_fields\"] = (\n                        provider_specific_fields\n                    )\n\n                tool_call_chunk = ChatCompletionToolCallChunk(\n                    id=output_item.get(\"call_id\"),\n                    index=output_index,  # Use output_index for parallel tool calls\n                    type=\"function\",\n                    function=function_chunk,\n                )\n                if provider_specific_fields:\n                    tool_call_chunk.provider_specific_fields = provider_specific_fields  # type: ignore\n\n                return ModelResponseStream(\n                    choices=[\n                        StreamingChoices(\n                            index=0,\n                            delta=Delta(tool_calls=[tool_call_chunk]),\n                            finish_reason=\"tool_calls\",\n                        )\n                    ]\n                )\n\n        elif event_type == \"response.reasoning_summary_text.delta\":\n            # Handle reasoning summary with newlines between sections\n            content_part = parsed_chunk.get(\"delta\", None)\n            if content_part:\n                summary_index = parsed_chunk.get(\"summary_index\", 0)\n\n                # Track the last summary index to insert newlines between parts\n                last_summary_index = getattr(\n                    self, \"_last_reasoning_summary_index\", None\n                )\n                if (\n                    last_summary_index is not None\n                    and summary_index != last_summary_index\n                ):\n                    # New summary part started, prepend newlines to separate them\n                    content_part = \"\\n\\n\" + content_part\n                self._last_reasoning_summary_index = summary_index\n\n                return ModelResponseStream(\n                    choices=[\n                        StreamingChoices(\n                            index=cast(int, summary_index),\n                            delta=Delta(reasoning_content=content_part),\n                        )\n                    ]\n                )\n\n        # For all other event types, use the original static method\n        return OpenAiResponsesToChatCompletionStreamIterator.translate_responses_chunk_to_openai_stream(\n            parsed_chunk\n        )\n\n    _patched_responses_chunk_parser.__name__ = \"_patched_responses_chunk_parser\"\n    OpenAiResponsesToChatCompletionStreamIterator.chunk_parser = _patched_responses_chunk_parser  # type: ignore[method-assign]\n\n\ndef _patch_openai_responses_transform_response() -> None:\n    \"\"\"\n    Patches LiteLLMResponsesTransformationHandler.transform_response to properly\n    concatenate multiple reasoning summary parts with newlines in non-streaming responses.\n    \"\"\"\n    # Store the original method\n    original_transform_response = (\n        LiteLLMResponsesTransformationHandler.transform_response\n    )\n\n    if (\n        getattr(\n            original_transform_response,\n            \"__name__\",\n            \"\",\n        )\n        == \"_patched_transform_response\"\n    ):\n        return\n\n    def _patched_transform_response(\n        self: Any,\n        model: str,\n        raw_response: Any,\n        model_response: Any,\n        logging_obj: Any,\n        request_data: dict,\n        messages: List[Any],\n        optional_params: dict,\n        litellm_params: dict,\n        encoding: Any,\n        api_key: Optional[str] = None,\n        json_mode: Optional[bool] = None,\n    ) -> Any:\n        \"\"\"\n        Patched transform_response that properly concatenates reasoning summary parts\n        with newlines.\n        \"\"\"\n        from openai.types.responses.response import Response as ResponsesAPIResponse\n        from openai.types.responses.response_reasoning_item import ResponseReasoningItem\n\n        # Check if raw_response has reasoning items that need concatenation\n        if isinstance(raw_response, ResponsesAPIResponse) and raw_response.output:\n            for item in raw_response.output:\n                if isinstance(item, ResponseReasoningItem) and item.summary:\n                    # Concatenate summary texts with double newlines\n                    summary_texts = []\n                    for summary_item in item.summary:\n                        text = getattr(summary_item, \"text\", \"\")\n                        if text:\n                            summary_texts.append(text)\n\n                    if len(summary_texts) > 1:\n                        # Modify the first summary item to contain all concatenated text\n                        combined_text = \"\\n\\n\".join(summary_texts)\n                        if hasattr(item.summary[0], \"text\"):\n                            # Create a modified copy of the response with concatenated text\n                            # Since OpenAI types are typically frozen, we need to work around this\n                            # by modifying the object after the fact or using the result\n                            pass  # The fix is applied in the result processing below\n\n        # Call the original method\n        result = original_transform_response(\n            self,\n            model,\n            raw_response,\n            model_response,\n            logging_obj,\n            request_data,\n            messages,\n            optional_params,\n            litellm_params,\n            encoding,\n            api_key,\n            json_mode,\n        )\n\n        # Post-process: If there are multiple summary items, fix the reasoning_content\n        if isinstance(raw_response, ResponsesAPIResponse) and raw_response.output:\n            for item in raw_response.output:\n                if isinstance(item, ResponseReasoningItem) and item.summary:\n                    if len(item.summary) > 1:\n                        # Concatenate all summary texts with double newlines\n                        summary_texts = []\n                        for summary_item in item.summary:\n                            text = getattr(summary_item, \"text\", \"\")\n                            if text:\n                                summary_texts.append(text)\n\n                        if summary_texts:\n                            combined_text = \"\\n\\n\".join(summary_texts)\n                            # Update the reasoning_content in the result choices\n                            if hasattr(result, \"choices\"):\n                                for choice in result.choices:\n                                    if hasattr(choice, \"message\") and hasattr(\n                                        choice.message, \"reasoning_content\"\n                                    ):\n                                        choice.message.reasoning_content = combined_text\n                    break  # Only process the first reasoning item\n\n        return result\n\n    _patched_transform_response.__name__ = \"_patched_transform_response\"\n    LiteLLMResponsesTransformationHandler.transform_response = _patched_transform_response  # type: ignore[method-assign]\n\n\ndef _patch_azure_responses_should_fake_stream() -> None:\n    \"\"\"\n    Patches AzureOpenAIResponsesAPIConfig.should_fake_stream to always return False.\n\n    By default, LiteLLM uses \"fake streaming\" (MockResponsesAPIStreamingIterator) for models\n    not in its database. This causes Azure custom model deployments to buffer the entire\n    response before yielding, resulting in poor time-to-first-token.\n\n    Azure's Responses API supports native streaming, so we override this to always use\n    real streaming (SyncResponsesAPIStreamingIterator).\n    \"\"\"\n    from litellm.llms.azure.responses.transformation import (\n        AzureOpenAIResponsesAPIConfig,\n    )\n\n    if (\n        getattr(AzureOpenAIResponsesAPIConfig.should_fake_stream, \"__name__\", \"\")\n        == \"_patched_should_fake_stream\"\n    ):\n        return\n\n    def _patched_should_fake_stream(\n        self: Any,  # noqa: ARG001\n        model: Optional[str],  # noqa: ARG001\n        stream: Optional[bool],  # noqa: ARG001\n        custom_llm_provider: Optional[str] = None,  # noqa: ARG001\n    ) -> bool:\n        # Azure Responses API supports native streaming - never fake it\n        return False\n\n    _patched_should_fake_stream.__name__ = \"_patched_should_fake_stream\"\n    AzureOpenAIResponsesAPIConfig.should_fake_stream = _patched_should_fake_stream  # type: ignore[method-assign]\n\n\ndef _patch_responses_api_usage_format() -> None:\n    \"\"\"\n    Patches ResponsesAPIResponse.model_construct to properly transform usage data\n    from chat completion format to Responses API format.\n\n    LiteLLM uses model_construct as a fallback in multiple places when ResponsesAPIResponse\n    validation fails. This bypasses the usage validator, allowing usage data in chat\n    completion format (completion_tokens, prompt_tokens) to be stored instead of Responses\n    API format (input_tokens, output_tokens), causing Pydantic serialization warnings.\n\n    This patch wraps model_construct to transform usage before construction, ensuring\n    the correct type regardless of which code path calls model_construct.\n\n    Affected locations in LiteLLM:\n    - litellm/llms/openai/responses/transformation.py (lines 183, 563)\n    - litellm/llms/chatgpt/responses/transformation.py (line 153)\n    - litellm/llms/manus/responses/transformation.py (lines 243, 334)\n    - litellm/llms/volcengine/responses/transformation.py (line 280)\n    - litellm/completion_extras/litellm_responses_transformation/handler.py (line 51)\n    \"\"\"\n    from litellm.types.llms.openai import ResponseAPIUsage, ResponsesAPIResponse\n\n    original_model_construct = ResponsesAPIResponse.model_construct\n\n    if getattr(original_model_construct, \"_is_patched\", False):\n        return\n\n    @classmethod  # type: ignore[misc]\n    def _patched_model_construct(\n        cls: Any,\n        _fields_set: Optional[set[str]] = None,\n        **values: Any,\n    ) -> \"ResponsesAPIResponse\":\n        \"\"\"\n        Patched model_construct that ensures usage is a ResponseAPIUsage object.\n        \"\"\"\n        # Transform usage if present and not already the correct type\n        if \"usage\" in values and values[\"usage\"] is not None:\n            usage = values[\"usage\"]\n            if not isinstance(usage, ResponseAPIUsage):\n                if isinstance(usage, dict):\n                    values = dict(values)  # Don't mutate original\n                    # Check if it's in chat completion format\n                    if \"prompt_tokens\" in usage or \"completion_tokens\" in usage:\n                        # Transform from chat completion format\n                        values[\"usage\"] = ResponseAPIUsage(\n                            input_tokens=usage.get(\"prompt_tokens\", 0),\n                            output_tokens=usage.get(\"completion_tokens\", 0),\n                            total_tokens=usage.get(\"total_tokens\", 0),\n                        )\n                    elif \"input_tokens\" in usage or \"output_tokens\" in usage:\n                        # Already in Responses API format, just convert to proper type\n                        values[\"usage\"] = ResponseAPIUsage(\n                            input_tokens=usage.get(\"input_tokens\", 0),\n                            output_tokens=usage.get(\"output_tokens\", 0),\n                            total_tokens=usage.get(\"total_tokens\", 0),\n                        )\n\n        # Call original model_construct (need to call it as unbound method)\n        return original_model_construct.__func__(cls, _fields_set, **values)  # type: ignore[attr-defined]\n\n    _patched_model_construct._is_patched = True  # type: ignore[attr-defined]\n    ResponsesAPIResponse.model_construct = _patched_model_construct  # type: ignore[method-assign, assignment]\n\n\ndef _patch_logging_assembled_streaming_response() -> None:\n    \"\"\"\n    Patches LiteLLMLoggingObj._get_assembled_streaming_response to create a deep copy\n    of the ResponsesAPIResponse before modifying its usage field.\n\n    The original code transforms usage to chat completion format and sets it as a dict\n    directly on the ResponsesAPIResponse.usage field. This mutates the original object,\n    causing Pydantic serialization warnings when model_dump() is called later because\n    the usage field contains a dict instead of the expected ResponseAPIUsage type.\n\n    This patch creates a copy of the response before modification, preserving the\n    original object with its proper ResponseAPIUsage type.\n    \"\"\"\n    from litellm import LiteLLMLoggingObj\n    from litellm.responses.utils import ResponseAPILoggingUtils\n    from litellm.types.llms.openai import (\n        ResponseAPIUsage,\n        ResponseCompletedEvent,\n        ResponsesAPIResponse,\n    )\n    from litellm.types.utils import ModelResponse, TextCompletionResponse\n\n    original_method = LiteLLMLoggingObj._get_assembled_streaming_response\n\n    if getattr(original_method, \"_is_patched\", False):\n        return\n\n    def _patched_get_assembled_streaming_response(\n        self: Any,  # noqa: ARG001\n        result: Any,\n        start_time: Any,  # noqa: ARG001\n        end_time: Any,  # noqa: ARG001\n        is_async: bool,  # noqa: ARG001\n        streaming_chunks: List[Any],  # noqa: ARG001\n    ) -> Any:\n        \"\"\"\n        Patched version that creates a copy before modifying usage.\n\n        The original LiteLLM code transforms usage to chat completion format and\n        sets it directly as a dict, which causes Pydantic serialization warnings.\n        This patch uses model_construct to rebuild the response with the transformed\n        usage, ensuring proper typing.\n        \"\"\"\n        if isinstance(result, ModelResponse):\n            return result\n        elif isinstance(result, TextCompletionResponse):\n            return result\n        elif isinstance(result, ResponseCompletedEvent):\n            # Get the original response data\n            original_response = result.response\n            response_data = original_response.model_dump()\n\n            # Transform usage if present\n            if isinstance(original_response.usage, ResponseAPIUsage):\n                transformed_usage = (\n                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(\n                        original_response.usage\n                    )\n                )\n                # Put the transformed usage (in chat completion format) into response_data\n                # Our patched model_construct will convert it back to ResponseAPIUsage\n                response_data[\"usage\"] = (\n                    transformed_usage.model_dump()\n                    if hasattr(transformed_usage, \"model_dump\")\n                    else dict(transformed_usage)\n                )\n\n            # Rebuild using model_construct - our patch ensures usage is properly typed\n            response_copy = ResponsesAPIResponse.model_construct(**response_data)\n\n            # Copy hidden params\n            if hasattr(original_response, \"_hidden_params\"):\n                response_copy._hidden_params = dict(original_response._hidden_params)\n\n            return response_copy\n        else:\n            return None\n\n    _patched_get_assembled_streaming_response._is_patched = True  # type: ignore[attr-defined]\n    LiteLLMLoggingObj._get_assembled_streaming_response = _patched_get_assembled_streaming_response  # type: ignore[method-assign]\n\n\ndef _patch_responses_metadata_none() -> None:\n    \"\"\"\n    Patches litellm.responses to normalize metadata=None to metadata={} in kwargs.\n\n    LiteLLM's @client decorator wrapper in utils.py (line 1721) does:\n        _is_litellm_router_call = \"model_group\" in kwargs.get(\"metadata\", {})\n    When metadata is explicitly None in kwargs, kwargs.get(\"metadata\", {}) returns\n    None (the key exists, so the default is not used), causing:\n        TypeError: argument of type 'NoneType' is not iterable\n\n    This swallows the real exception (e.g. AuthenticationError) and surfaces as:\n        APIConnectionError: OpenAIException - argument of type 'NoneType' is not iterable\n\n    This happens when the Responses API bridge calls litellm.responses() with\n    **litellm_params which may contain metadata=None.\n\n    STATUS: STILL NEEDED - litellm/utils.py wrapper function uses kwargs.get(\"metadata\", {})\n            which does not guard against metadata being explicitly None. Same pattern exists\n            on line 1407 for async path.\n    \"\"\"\n    import litellm as _litellm\n    from functools import wraps\n\n    original_responses = _litellm.responses\n\n    if getattr(original_responses, \"_metadata_patched\", False):\n        return\n\n    @wraps(original_responses)\n    def _patched_responses(*args: Any, **kwargs: Any) -> Any:\n        if kwargs.get(\"metadata\") is None:\n            kwargs[\"metadata\"] = {}\n        return original_responses(*args, **kwargs)\n\n    _patched_responses._metadata_patched = True  # type: ignore[attr-defined]\n    _litellm.responses = _patched_responses\n\n\ndef apply_monkey_patches() -> None:\n    \"\"\"\n    Apply all necessary monkey patches to LiteLLM for compatibility.\n\n    This includes:\n    - Patching OllamaChatCompletionResponseIterator.chunk_parser for streaming content\n    - Patching translate_responses_chunk_to_openai_stream for parallel tool calls\n    - Patching LiteLLMResponsesTransformationHandler.transform_response for non-streaming responses\n    - Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming\n    - Patching ResponsesAPIResponse.model_construct to fix usage format in all code paths\n    - Patching LiteLLMLoggingObj._get_assembled_streaming_response to avoid mutating original response\n    - Patching litellm.responses to fix metadata=None causing TypeError in error handling\n    \"\"\"\n    _patch_ollama_chunk_parser()\n    _patch_openai_responses_parallel_tool_calls()\n    _patch_openai_responses_transform_response()\n    _patch_azure_responses_should_fake_stream()\n    _patch_responses_api_usage_format()\n    _patch_logging_assembled_streaming_response()\n    _patch_responses_metadata_none()\n"
  },
  {
    "path": "backend/onyx/llm/model_metadata_enrichments.json",
    "content": "{\n  \"ai21.j2-mid-v1\": {\n    \"display_name\": \"J2 Mid\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"v1\"\n  },\n  \"ai21.j2-ultra-v1\": {\n    \"display_name\": \"J2 Ultra\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"v1\"\n  },\n  \"ai21.jamba-1-5-large-v1:0\": {\n    \"display_name\": \"Jamba 1.5 Large\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"v1:0\"\n  },\n  \"ai21.jamba-1-5-mini-v1:0\": {\n    \"display_name\": \"Jamba 1.5 Mini\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"v1:0\"\n  },\n  \"ai21.jamba-instruct-v1:0\": {\n    \"display_name\": \"Jamba Instruct\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"v1:0\"\n  },\n  \"amazon.nova-lite-v1:0\": {\n    \"display_name\": \"Nova Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"amazon.nova-micro-v1:0\": {\n    \"display_name\": \"Nova Micro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"amazon.titan-text-express-v1\": {\n    \"display_name\": \"Titan Text Express\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"amazon.titan-text-lite-v1\": {\n    \"display_name\": \"Titan Text Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"amazon.titan-text-premier-v1:0\": {\n    \"display_name\": \"Titan Text Premier\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"anthropic.claude-3-5-sonnet-20241022-v2:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20241022-v2:0\"\n  },\n  \"anthropic.claude-3-sonnet-20240229-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240229-v1:0\"\n  },\n  \"anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001-v1:0\"\n  },\n  \"anthropic.claude-haiku-4-5@20251001\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"anthropic.claude-opus-4-1-20250805-v1:0\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805-v1:0\"\n  },\n  \"anthropic.claude-opus-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514-v1:0\"\n  },\n  \"anthropic.claude-opus-4-5-20251101-v1:0\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251101-v1:0\"\n  },\n  \"anthropic.claude-sonnet-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514-v1:0\"\n  },\n  \"anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"apac.amazon.nova-lite-v1:0\": {\n    \"display_name\": \"Nova Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"apac.amazon.nova-micro-v1:0\": {\n    \"display_name\": \"Nova Micro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"apac.amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"apac.anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"apac.anthropic.claude-3-5-sonnet-20241022-v2:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20241022-v2:0\"\n  },\n  \"apac.anthropic.claude-3-sonnet-20240229-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240229-v1:0\"\n  },\n  \"apac.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001-v1:0\"\n  },\n  \"apac.anthropic.claude-sonnet-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514-v1:0\"\n  },\n  \"au.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001-v1:0\"\n  },\n  \"au.anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"azure/claude-haiku-4-5\": {\n    \"display_name\": \"Claude Haiku\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"azure/claude-opus-4-1\": {\n    \"display_name\": \"Claude Opus\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"azure/claude-sonnet-4-5\": {\n    \"display_name\": \"Claude Sonnet\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"azure/codex-mini\": {\n    \"display_name\": \"Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/command-r-plus\": {\n    \"display_name\": \"Command R Plus\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/computer-use-preview\": {\n    \"display_name\": \"Computer Use Preview\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"preview\"\n  },\n  \"azure/container\": {\n    \"display_name\": \"Container\",\n    \"model_vendor\": \"azure\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/eu/gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"azure/eu/gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"azure/eu/gpt-4o-mini-2024-07-18\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-07-18\"\n  },\n  \"azure/eu/gpt-4o-mini-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/eu/gpt-4o-realtime-preview-2024-10-01\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-10-01\"\n  },\n  \"azure/eu/gpt-4o-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/eu/gpt-5-2025-08-07\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/eu/gpt-5-mini-2025-08-07\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/eu/gpt-5-nano-2025-08-07\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/eu/gpt-5.1\": {\n    \"display_name\": \"GPT-5.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/eu/gpt-5.1-chat\": {\n    \"display_name\": \"GPT-5.1 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/eu/gpt-5.1-codex\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/eu/gpt-5.1-codex-mini\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/eu/o1-2024-12-17\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/eu/o1-mini-2024-09-12\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/eu/o1-preview-2024-09-12\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/eu/o3-mini-2025-01-31\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-01-31\"\n  },\n  \"azure/global-standard/gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"azure/global-standard/gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"azure/global-standard/gpt-4o-mini\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/global/gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"azure/global/gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"azure/global/gpt-5.1\": {\n    \"display_name\": \"GPT-5.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/global/gpt-5.1-chat\": {\n    \"display_name\": \"GPT-5.1 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/global/gpt-5.1-codex\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/global/gpt-5.1-codex-mini\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-3.5-turbo\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-3.5-turbo-0125\": {\n    \"display_name\": \"GPT 3.5 Turbo 0125\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"azure/gpt-3.5-turbo-instruct-0914\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0914\"\n  },\n  \"azure/gpt-35-turbo\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-35-turbo-0125\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"azure/gpt-35-turbo-0301\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0301\"\n  },\n  \"azure/gpt-35-turbo-0613\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"azure/gpt-35-turbo-1106\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"azure/gpt-35-turbo-16k\": {\n    \"display_name\": \"GPT-3.5 Turbo 16K\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-35-turbo-16k-0613\": {\n    \"display_name\": \"GPT-3.5 Turbo 16K\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"azure/gpt-35-turbo-instruct\": {\n    \"display_name\": \"GPT-3.5 Turbo Instruct\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-35-turbo-instruct-0914\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0914\"\n  },\n  \"azure/gpt-4\": {\n    \"display_name\": \"GPT-4\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4-0125-preview\": {\n    \"display_name\": \"GPT 4 0125 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"azure/gpt-4-0613\": {\n    \"display_name\": \"GPT 4 0613\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"azure/gpt-4-1106-preview\": {\n    \"display_name\": \"GPT 4 1106 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"azure/gpt-4-32k\": {\n    \"display_name\": \"GPT-4 32K\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4-32k-0613\": {\n    \"display_name\": \"GPT 4 32k 0613\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"azure/gpt-4-turbo\": {\n    \"display_name\": \"GPT-4 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4-turbo-2024-04-09\": {\n    \"display_name\": \"GPT-4 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-04-09\"\n  },\n  \"azure/gpt-4-turbo-vision-preview\": {\n    \"display_name\": \"GPT-4 Turbo Vision Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4.1\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4.1-2025-04-14\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/gpt-4.1-mini\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4.1-mini-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/gpt-4.1-nano\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4.1-nano-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/gpt-4.5-preview\": {\n    \"display_name\": \"GPT-4.5 Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o-2024-05-13\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-05-13\"\n  },\n  \"azure/gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"azure/gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"azure/gpt-4o-audio-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/gpt-4o-mini\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o-mini-2024-07-18\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-07-18\"\n  },\n  \"azure/gpt-4o-mini-audio-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/gpt-4o-mini-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/gpt-4o-mini-transcribe\": {\n    \"display_name\": \"GPT-4o Mini Transcribe\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o-mini-tts\": {\n    \"display_name\": \"GPT-4o Mini TTS\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o-realtime-preview-2024-10-01\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-10-01\"\n  },\n  \"azure/gpt-4o-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/gpt-4o-transcribe\": {\n    \"display_name\": \"GPT-4o Transcribe\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-4o-transcribe-diarize\": {\n    \"display_name\": \"GPT-4o Transcribe Diarize\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5-2025-08-07\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/gpt-5-chat\": {\n    \"display_name\": \"GPT-5 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5-chat-latest\": {\n    \"display_name\": \"GPT 5 Chat\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/gpt-5-codex\": {\n    \"display_name\": \"GPT-5 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5-mini\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5-mini-2025-08-07\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/gpt-5-nano\": {\n    \"display_name\": \"GPT-5 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5-nano-2025-08-07\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/gpt-5-pro\": {\n    \"display_name\": \"GPT-5 Pro\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5.1\": {\n    \"display_name\": \"GPT-5.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5.1-2025-11-13\": {\n    \"display_name\": \"GPT 5.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-11-13\"\n  },\n  \"azure/gpt-5.1-chat\": {\n    \"display_name\": \"GPT-5.1 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5.1-chat-2025-11-13\": {\n    \"display_name\": \"GPT 5.1 Chat\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-11-13\"\n  },\n  \"azure/gpt-5.1-codex\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5.1-codex-2025-11-13\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-11-13\"\n  },\n  \"azure/gpt-5.1-codex-mini\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/gpt-5.1-codex-mini-2025-11-13\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-11-13\"\n  },\n  \"azure/gpt-audio-2025-08-28\": {\n    \"display_name\": \"GPT Audio\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-28\"\n  },\n  \"azure/gpt-audio-mini-2025-10-06\": {\n    \"display_name\": \"GPT Audio Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-10-06\"\n  },\n  \"azure/gpt-realtime-2025-08-28\": {\n    \"display_name\": \"GPT Realtime\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-28\"\n  },\n  \"azure/gpt-realtime-mini-2025-10-06\": {\n    \"display_name\": \"GPT Realtime Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-10-06\"\n  },\n  \"azure/mistral-large-2402\": {\n    \"display_name\": \"Mistral Large 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2402\"\n  },\n  \"azure/mistral-large-latest\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o1\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o1-2024-12-17\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/o1-mini\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o1-mini-2024-09-12\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/o1-preview\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o1-preview-2024-09-12\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/o3\": {\n    \"display_name\": \"o3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o3-2025-04-16\": {\n    \"display_name\": \"o3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-16\"\n  },\n  \"azure/o3-deep-research\": {\n    \"display_name\": \"O3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o3-mini\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o3-mini-2025-01-31\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-01-31\"\n  },\n  \"azure/o3-pro\": {\n    \"display_name\": \"O3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o3-pro-2025-06-10\": {\n    \"display_name\": \"O3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-10\"\n  },\n  \"azure/o4-mini\": {\n    \"display_name\": \"o4 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure/o4-mini-2025-04-16\": {\n    \"display_name\": \"o4 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-16\"\n  },\n  \"azure/us/gpt-4.1-2025-04-14\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/us/gpt-4.1-mini-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/us/gpt-4.1-nano-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"azure/us/gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"azure/us/gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"azure/us/gpt-4o-mini-2024-07-18\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-07-18\"\n  },\n  \"azure/us/gpt-4o-mini-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/us/gpt-4o-realtime-preview-2024-10-01\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-10-01\"\n  },\n  \"azure/us/gpt-4o-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/us/gpt-5-2025-08-07\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/us/gpt-5-mini-2025-08-07\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/us/gpt-5-nano-2025-08-07\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"azure/us/gpt-5.1\": {\n    \"display_name\": \"GPT-5.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/us/gpt-5.1-chat\": {\n    \"display_name\": \"GPT-5.1 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/us/gpt-5.1-codex\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/us/gpt-5.1-codex-mini\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"azure/us/o1-2024-12-17\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"azure/us/o1-mini-2024-09-12\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/us/o1-preview-2024-09-12\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-09-12\"\n  },\n  \"azure/us/o3-2025-04-16\": {\n    \"display_name\": \"o3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-16\"\n  },\n  \"azure/us/o3-mini-2025-01-31\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-01-31\"\n  },\n  \"azure/us/o4-mini-2025-04-16\": {\n    \"display_name\": \"o4 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-16\"\n  },\n  \"azure_ai/Llama-3.2-11B-Vision-Instruct\": {\n    \"display_name\": \"Llama 3.2 11B Vision Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Llama-3.2-90B-Vision-Instruct\": {\n    \"display_name\": \"Llama 3.2 90B Vision Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Llama-3.3-70B-Instruct\": {\n    \"display_name\": \"Llama 3.3 70B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8\": {\n    \"display_name\": \"Llama 4 Maverick 17B 128E Instruct FP8\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Llama-4-Scout-17B-16E-Instruct\": {\n    \"display_name\": \"Llama 4 Scout 17B 16E Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/MAI-DS-R1\": {\n    \"display_name\": \"MAI-DS-R1\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Meta-Llama-3-70B-Instruct\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Meta-Llama-3.1-405B-Instruct\": {\n    \"display_name\": \"Llama 3.1 405B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Meta-Llama-3.1-70B-Instruct\": {\n    \"display_name\": \"Llama 3.1 70B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Meta-Llama-3.1-8B-Instruct\": {\n    \"display_name\": \"Llama 3.1 8B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"azure_ai/Phi-3-medium-128k-instruct\": {\n    \"display_name\": \"Phi 3 Medium 128k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3-medium-4k-instruct\": {\n    \"display_name\": \"Phi 3 Medium 4k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3-mini-128k-instruct\": {\n    \"display_name\": \"Phi 3 Mini 128k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3-mini-4k-instruct\": {\n    \"display_name\": \"Phi 3 Mini 4k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3-small-128k-instruct\": {\n    \"display_name\": \"Phi 3 Small 128k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3-small-8k-instruct\": {\n    \"display_name\": \"Phi 3 Small 8k Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3.5-MoE-instruct\": {\n    \"display_name\": \"Phi 3.5 MOE Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3.5-mini-instruct\": {\n    \"display_name\": \"Phi 3.5 Mini Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-3.5-vision-instruct\": {\n    \"display_name\": \"Phi 3.5 Vision Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-4\": {\n    \"display_name\": \"Phi 4\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-4-mini-instruct\": {\n    \"display_name\": \"Phi 4 Mini Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-4-mini-reasoning\": {\n    \"display_name\": \"Phi 4 Mini Reasoning\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-4-multimodal-instruct\": {\n    \"display_name\": \"Phi 4 Multimodal Instruct\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/Phi-4-reasoning\": {\n    \"display_name\": \"Phi 4 Reasoning\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/deepseek-r1\": {\n    \"display_name\": \"DeepSeek R1\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/deepseek-v3\": {\n    \"display_name\": \"DeepSeek V3\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"v3\"\n  },\n  \"azure_ai/deepseek-v3-0324\": {\n    \"display_name\": \"DeepSeek v3 0324\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"0324\"\n  },\n  \"azure_ai/global/grok-3\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/global/grok-3-mini\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-3\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-3-mini\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-4\": {\n    \"display_name\": \"Grok 4\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-4-fast-non-reasoning\": {\n    \"display_name\": \"Grok 4 Fast Non Reasoning\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-4-fast-reasoning\": {\n    \"display_name\": \"Grok 4 Fast Reasoning\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/grok-code-fast-1\": {\n    \"display_name\": \"Grok Code Fast 1\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/jais-30b-chat\": {\n    \"display_name\": \"Jais 30B Chat\",\n    \"model_vendor\": \"g42\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/jamba-instruct\": {\n    \"display_name\": \"Jamba Instruct\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/ministral-3b\": {\n    \"display_name\": \"Ministral 3B\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/mistral-large\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/mistral-large-2407\": {\n    \"display_name\": \"Mistral Large 24.07\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2407\"\n  },\n  \"azure_ai/mistral-large-latest\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/mistral-medium-2505\": {\n    \"display_name\": \"Mistral Medium 2505\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2505\"\n  },\n  \"azure_ai/mistral-nemo\": {\n    \"display_name\": \"Mistral Nemo\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/mistral-small\": {\n    \"display_name\": \"Mistral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"azure_ai/mistral-small-2503\": {\n    \"display_name\": \"Mistral Small 2503\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2503\"\n  },\n  \"bedrock/*/1-month-commitment/cohere.command-light-text-v14\": {\n    \"display_name\": \"Command Light Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"bedrock/*/1-month-commitment/cohere.command-text-v14\": {\n    \"display_name\": \"Command Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"bedrock/*/6-month-commitment/cohere.command-light-text-v14\": {\n    \"display_name\": \"Command Light Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"bedrock/*/6-month-commitment/cohere.command-text-v14\": {\n    \"display_name\": \"Command Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/ap-northeast-1/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/ap-northeast-1/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/eu-central-1/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/eu-central-1/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:2\"\n  },\n  \"bedrock/eu-west-3/mistral.mistral-large-2402-v1:0\": {\n    \"display_name\": \"Mistral Large 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2402-v1:0\"\n  },\n  \"bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1\": {\n    \"display_name\": \"Mixtral 8x7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:1\"\n  },\n  \"bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/1-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/6-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-east-1/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-east-1/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-east-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-east-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:2\"\n  },\n  \"bedrock/us-east-1/mistral.mistral-large-2402-v1:0\": {\n    \"display_name\": \"Mistral Large 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2402-v1:0\"\n  },\n  \"bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1\": {\n    \"display_name\": \"Mixtral 8x7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:1\"\n  },\n  \"bedrock/us-gov-east-1/amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-east-1/amazon.titan-text-express-v1\": {\n    \"display_name\": \"Titan Text Express\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-gov-east-1/amazon.titan-text-lite-v1\": {\n    \"display_name\": \"Titan Text Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-gov-east-1/amazon.titan-text-premier-v1:0\": {\n    \"display_name\": \"Titan Text Premier\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-west-1/amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-west-1/amazon.titan-text-express-v1\": {\n    \"display_name\": \"Titan Text Express\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-gov-west-1/amazon.titan-text-lite-v1\": {\n    \"display_name\": \"Titan Text Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-gov-west-1/amazon.titan-text-premier-v1:0\": {\n    \"display_name\": \"Titan Text Premier\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-west-1/meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-west-1/meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/1-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/6-month-commitment/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-west-2/anthropic.claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/anthropic.claude-v1\": {\n    \"display_name\": \"Claude\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"bedrock/us-west-2/anthropic.claude-v2:1\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v2:1\"\n  },\n  \"bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:2\"\n  },\n  \"bedrock/us-west-2/mistral.mistral-large-2402-v1:0\": {\n    \"display_name\": \"Mistral Large 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2402-v1:0\"\n  },\n  \"bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1\": {\n    \"display_name\": \"Mixtral 8x7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:1\"\n  },\n  \"chat-bison\": {\n    \"display_name\": \"Chat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"chat-bison-32k\": {\n    \"display_name\": \"Chat Bison 32k\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"chat-bison-32k@002\": {\n    \"display_name\": \"Chat Bison 32k\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"chat-bison@001\": {\n    \"display_name\": \"Chat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"chat-bison@002\": {\n    \"display_name\": \"Chat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"chatgpt-4o-latest\": {\n    \"display_name\": \"ChatGPT 4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"claude-3-5-sonnet-20240620\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620\"\n  },\n  \"claude-3-5-sonnet-20241022\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20241022\"\n  },\n  \"claude-3-5-sonnet-latest\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"latest\"\n  },\n  \"claude-4-opus-20250514\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"claude-4-sonnet-20250514\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"claude-haiku-4-5\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-haiku-4-5-20251001\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"claude-opus-4-1\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-opus-4-1-20250805\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805\"\n  },\n  \"claude-opus-4-1@20250805\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805\"\n  },\n  \"claude-opus-4-20250514\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"claude-opus-4-5\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-opus-4-6\": {\n    \"display_name\": \"Claude Opus 4.6\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-opus-4-5-20251101\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251101\"\n  },\n  \"claude-sonnet-4-20250514\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"claude-sonnet-4-5\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-sonnet-4-6\": {\n    \"display_name\": \"Claude Sonnet 4.6\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"claude-sonnet-4-5-20250929\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929\"\n  },\n  \"claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"codechat-bison\": {\n    \"display_name\": \"Codechat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"codechat-bison-32k\": {\n    \"display_name\": \"Codechat Bison 32k\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"codechat-bison-32k@002\": {\n    \"display_name\": \"Codechat Bison 32k\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"codechat-bison@001\": {\n    \"display_name\": \"Codechat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"codechat-bison@002\": {\n    \"display_name\": \"Codechat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"codechat-bison@latest\": {\n    \"display_name\": \"Codechat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"codex-mini-latest\": {\n    \"display_name\": \"Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"cohere.command-light-text-v14\": {\n    \"display_name\": \"Command Light Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"cohere.command-r-plus-v1:0\": {\n    \"display_name\": \"Command R Plus\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v1:0\"\n  },\n  \"cohere.command-r-v1:0\": {\n    \"display_name\": \"Command R\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v1:0\"\n  },\n  \"cohere.command-text-v14\": {\n    \"display_name\": \"Command Text\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"v14\"\n  },\n  \"computer-use-preview\": {\n    \"display_name\": \"Computer Use Preview\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"preview\"\n  },\n  \"deepseek.v3-v1:0\": {\n    \"display_name\": \"DeepSeek V3\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"v1:0\"\n  },\n  \"deepseek/deepseek-chat\": {\n    \"display_name\": \"DeepSeek Chat\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"deepseek/deepseek-coder\": {\n    \"display_name\": \"DeepSeek Coder\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"deepseek/deepseek-r1\": {\n    \"display_name\": \"DeepSeek R1\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"deepseek/deepseek-reasoner\": {\n    \"display_name\": \"DeepSeek Reasoner\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"deepseek/deepseek-v3\": {\n    \"display_name\": \"DeepSeek V3\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"v3\"\n  },\n  \"eu.amazon.nova-lite-v1:0\": {\n    \"display_name\": \"Nova Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"eu.amazon.nova-micro-v1:0\": {\n    \"display_name\": \"Nova Micro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"eu.amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"eu.anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620-v1:0\"\n  },\n  \"eu.anthropic.claude-3-5-sonnet-20241022-v2:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20241022-v2:0\"\n  },\n  \"eu.anthropic.claude-3-sonnet-20240229-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240229-v1:0\"\n  },\n  \"eu.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001-v1:0\"\n  },\n  \"eu.anthropic.claude-opus-4-1-20250805-v1:0\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805-v1:0\"\n  },\n  \"eu.anthropic.claude-opus-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514-v1:0\"\n  },\n  \"eu.anthropic.claude-sonnet-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514-v1:0\"\n  },\n  \"eu.anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929-v1:0\"\n  },\n  \"eu.meta.llama3-2-1b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 1B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"eu.meta.llama3-2-3b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 3B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1:0\"\n  },\n  \"eu.mistral.pixtral-large-2502-v1:0\": {\n    \"display_name\": \"Pixtral Large 25.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"2502-v1:0\"\n  },\n  \"eu.twelvelabs.pegasus-1-2-v1:0\": {\n    \"display_name\": \"Pegasus 1.2\",\n    \"model_vendor\": \"twelvelabs\",\n    \"model_version\": \"1.2-v1:0\"\n  },\n  \"ft:gpt-3.5-turbo\": {\n    \"display_name\": \"Ft:gpt 3.5 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"ft:gpt-3.5-turbo-0125\": {\n    \"display_name\": \"GPT-3.5 Turbo (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"ft:gpt-3.5-turbo-0613\": {\n    \"display_name\": \"GPT-3.5 Turbo (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"ft:gpt-3.5-turbo-1106\": {\n    \"display_name\": \"GPT-3.5 Turbo (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"ft:gpt-4-0613\": {\n    \"display_name\": \"GPT-4 (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"ft:gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"ft:gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"ft:gpt-4o-mini-2024-07-18\": {\n    \"display_name\": \"GPT-4o Mini (Fine-tuned)\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-07-18\"\n  },\n  \"gemini-1.0-pro\": {\n    \"display_name\": \"Gemini 1.0 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-1.0-pro-001\": {\n    \"display_name\": \"Gemini 1.0 Pro 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-1.0-pro-002\": {\n    \"display_name\": \"Gemini 1.0 Pro 002\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini-1.0-ultra\": {\n    \"display_name\": \"Gemini 1.0 Ultra\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-1.0-ultra-001\": {\n    \"display_name\": \"Gemini 1.0 Ultra 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-1.5-flash\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-1.5-flash-001\": {\n    \"display_name\": \"Gemini 1.5 Flash 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-1.5-flash-002\": {\n    \"display_name\": \"Gemini 1.5 Flash 002\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini-1.5-flash-exp-0827\": {\n    \"display_name\": \"Gemini 1.5 Flash Exp 0827\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0827\"\n  },\n  \"gemini-1.5-flash-preview-0514\": {\n    \"display_name\": \"Gemini 1.5 Flash Preview 0514\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0514\"\n  },\n  \"gemini-1.5-pro\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-1.5-pro-001\": {\n    \"display_name\": \"Gemini 1.5 Pro 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-1.5-pro-002\": {\n    \"display_name\": \"Gemini 1.5 Pro 002\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini-1.5-pro-preview-0215\": {\n    \"display_name\": \"Gemini 1.5 Pro Preview 0215\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0215\"\n  },\n  \"gemini-1.5-pro-preview-0409\": {\n    \"display_name\": \"Gemini 1.5 Pro Preview 0409\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0409\"\n  },\n  \"gemini-1.5-pro-preview-0514\": {\n    \"display_name\": \"Gemini 1.5 Pro Preview 0514\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0514\"\n  },\n  \"gemini-2.0-flash\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-flash-001\": {\n    \"display_name\": \"Gemini 2.0 Flash 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-2.0-flash-exp\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-flash-lite\": {\n    \"display_name\": \"Gemini 2.0 Flash Lite\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-flash-lite-001\": {\n    \"display_name\": \"Gemini 2.0 Flash Lite 001\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini-2.0-flash-live-preview-04-09\": {\n    \"display_name\": \"Gemini 2.0 Flash Live Preview 04 09\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-flash-thinking-exp\": {\n    \"display_name\": \"Gemini 2.0 Flash Thinking\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-flash-thinking-exp-01-21\": {\n    \"display_name\": \"Gemini 2.0 Flash Thinking Exp 01 21\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.0-pro-exp-02-05\": {\n    \"display_name\": \"Gemini 2.0 Pro Exp 02 05\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash-lite\": {\n    \"display_name\": \"Gemini 2.5 Flash Lite\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash-lite-preview-06-17\": {\n    \"display_name\": \"Gemini 2.5 Flash Lite Preview 06 17\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash-lite-preview-09-2025\": {\n    \"display_name\": \"Gemini 2.5 Flash Lite Preview 09 2025\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"2025\"\n  },\n  \"gemini-2.5-flash-preview-04-17\": {\n    \"display_name\": \"Gemini 2.5 Flash Preview 04 17\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash-preview-05-20\": {\n    \"display_name\": \"Gemini 2.5 Flash Preview 05 20\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-flash-preview-09-2025\": {\n    \"display_name\": \"Gemini 2.5 Flash Preview 09 2025\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"2025\"\n  },\n  \"gemini-2.5-pro\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-pro-exp-03-25\": {\n    \"display_name\": \"Gemini 2.5 Pro Exp 03 25\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-pro-preview-03-25\": {\n    \"display_name\": \"Gemini 2.5 Pro Preview 03 25\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-pro-preview-05-06\": {\n    \"display_name\": \"Gemini 2.5 Pro Preview 05 06\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-2.5-pro-preview-06-05\": {\n    \"display_name\": \"Gemini 2.5 Pro Preview 06 05\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-3-pro-preview\": {\n    \"display_name\": \"Gemini 3 Pro Preview\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini-3-flash-preview\": {\n    \"display_name\": \"Gemini 3 Flash Preview\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini-flash-experimental\": {\n    \"display_name\": \"Gemini Flash Experimental\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"experimental\"\n  },\n  \"gemini-pro\": {\n    \"display_name\": \"Gemini Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini-pro-experimental\": {\n    \"display_name\": \"Gemini Pro Experimental\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-1.5-flash\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-1.5-flash-001\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/gemini-1.5-flash-002\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini/gemini-1.5-flash-8b\": {\n    \"display_name\": \"Gemini 1.5 Flash 8B\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-1.5-flash-8b-exp-0827\": {\n    \"display_name\": \"Gemini 1.5 Flash 8B\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0827\"\n  },\n  \"gemini/gemini-1.5-flash-8b-exp-0924\": {\n    \"display_name\": \"Gemini 1.5 Flash 8B\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0924\"\n  },\n  \"gemini/gemini-1.5-flash-exp-0827\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0827\"\n  },\n  \"gemini/gemini-1.5-flash-latest\": {\n    \"display_name\": \"Gemini 1.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-1.5-pro\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-1.5-pro-001\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/gemini-1.5-pro-002\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini/gemini-1.5-pro-exp-0801\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0801\"\n  },\n  \"gemini/gemini-1.5-pro-exp-0827\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"0827\"\n  },\n  \"gemini/gemini-1.5-pro-latest\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-001\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/gemini-2.0-flash-exp\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-lite\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-lite-preview-02-05\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-live-001\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/gemini-2.0-flash-preview-image-generation\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-thinking-exp\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-flash-thinking-exp-01-21\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.0-pro-exp-02-05\": {\n    \"display_name\": \"Gemini 2.0\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-image\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-image-preview\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-lite\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-lite-preview-06-17\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-lite-preview-09-2025\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"2025\"\n  },\n  \"gemini/gemini-2.5-flash-preview-04-17\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-preview-05-20\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-flash-preview-09-2025\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"2025\"\n  },\n  \"gemini/gemini-2.5-flash-preview-tts\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro-exp-03-25\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro-preview-03-25\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro-preview-05-06\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro-preview-06-05\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-2.5-pro-preview-tts\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-3-pro-image-preview\": {\n    \"display_name\": \"Gemini 1.0 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/gemini-3-pro-preview\": {\n    \"display_name\": \"Gemini 1.0 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/gemini-embedding-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/gemini-exp-1114\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"experimental\"\n  },\n  \"gemini/gemini-exp-1206\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"experimental\"\n  },\n  \"gemini/gemini-flash-latest\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"gemini/gemini-flash-lite-latest\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"gemini/gemini-gemma-2-27b-it\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"gemini/gemini-gemma-2-9b-it\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"gemini/gemini-live-2.5-flash-preview-native-audio-09-2025\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/gemini-pro\": {\n    \"display_name\": \"Gemini 1.0 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemini-pro-vision\": {\n    \"display_name\": \"Gemini 1.0 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"gemini/gemma-3-27b-it\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"gemini/imagen-3.0-fast-generate-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/imagen-3.0-generate-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/imagen-3.0-generate-002\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"002\"\n  },\n  \"gemini/imagen-4.0-fast-generate-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/imagen-4.0-generate-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/imagen-4.0-ultra-generate-001\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/learnlm-1.5-pro-experimental\": {\n    \"display_name\": \"Gemini 1.5 Pro\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"experimental\"\n  },\n  \"gemini/veo-2.0-generate-001\": {\n    \"display_name\": \"Gemini 2.0\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"gemini/veo-3.0-fast-generate-preview\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/veo-3.0-generate-preview\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/veo-3.1-fast-generate-preview\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"gemini/veo-3.1-generate-preview\": {\n    \"display_name\": \"Gemini\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"global.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"global.anthropic.claude-sonnet-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"global.anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929\"\n  },\n  \"gpt-3.5-turbo\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-3.5-turbo-0125\": {\n    \"display_name\": \"GPT 3.5 Turbo 0125\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"gpt-3.5-turbo-0301\": {\n    \"display_name\": \"GPT 3.5 Turbo 0301\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0301\"\n  },\n  \"gpt-3.5-turbo-0613\": {\n    \"display_name\": \"GPT 3.5 Turbo 0613\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"gpt-3.5-turbo-1106\": {\n    \"display_name\": \"GPT 3.5 Turbo 1106\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"gpt-3.5-turbo-16k\": {\n    \"display_name\": \"GPT-3.5 Turbo 16K\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-3.5-turbo-16k-0613\": {\n    \"display_name\": \"GPT 3.5 Turbo 16k 0613\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"gpt-4\": {\n    \"display_name\": \"GPT-4\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4-0125-preview\": {\n    \"display_name\": \"GPT-4 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0125\"\n  },\n  \"gpt-4-0314\": {\n    \"display_name\": \"GPT-4\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0314\"\n  },\n  \"gpt-4-0613\": {\n    \"display_name\": \"GPT-4\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"gpt-4-1106-preview\": {\n    \"display_name\": \"GPT-4 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"gpt-4-1106-vision-preview\": {\n    \"display_name\": \"GPT-4 Vision Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"1106\"\n  },\n  \"gpt-4-32k\": {\n    \"display_name\": \"GPT-4 32K\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4-32k-0314\": {\n    \"display_name\": \"GPT-4 32K\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0314\"\n  },\n  \"gpt-4-32k-0613\": {\n    \"display_name\": \"GPT-4 32K\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"0613\"\n  },\n  \"gpt-4-turbo\": {\n    \"display_name\": \"GPT-4 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4-turbo-2024-04-09\": {\n    \"display_name\": \"GPT-4 Turbo\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-04-09\"\n  },\n  \"gpt-4-turbo-preview\": {\n    \"display_name\": \"GPT-4 Turbo Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4-vision-preview\": {\n    \"display_name\": \"GPT-4 Vision Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4.1\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4.1-2025-04-14\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"gpt-4.1-mini\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4.1-mini-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"gpt-4.1-nano\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4.1-nano-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"gpt-4.5-preview\": {\n    \"display_name\": \"GPT-4.5 Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4.5-preview-2025-02-27\": {\n    \"display_name\": \"GPT-4.5 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-02-27\"\n  },\n  \"gpt-4o\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-2024-05-13\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-05-13\"\n  },\n  \"gpt-4o-2024-08-06\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-08-06\"\n  },\n  \"gpt-4o-2024-11-20\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-11-20\"\n  },\n  \"gpt-4o-audio-preview\": {\n    \"display_name\": \"GPT-4o Audio Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-audio-preview-2024-10-01\": {\n    \"display_name\": \"GPT-4o Audio Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-10-01\"\n  },\n  \"gpt-4o-audio-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Audio Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"gpt-4o-audio-preview-2025-06-03\": {\n    \"display_name\": \"GPT-4o Audio Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-03\"\n  },\n  \"gpt-4o-mini\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-mini-2024-07-18\": {\n    \"display_name\": \"GPT-4o Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-07-18\"\n  },\n  \"gpt-4o-mini-audio-preview\": {\n    \"display_name\": \"GPT-4o Mini Audio Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-mini-audio-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini Audio Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"gpt-4o-mini-realtime-preview\": {\n    \"display_name\": \"GPT-4o Mini Realtime Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-mini-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Mini Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"gpt-4o-mini-search-preview\": {\n    \"display_name\": \"GPT 4o Mini Search Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-mini-search-preview-2025-03-11\": {\n    \"display_name\": \"GPT 4o Mini Search Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-03-11\"\n  },\n  \"gpt-4o-realtime-preview\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-realtime-preview-2024-10-01\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-10-01\"\n  },\n  \"gpt-4o-realtime-preview-2024-12-17\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"gpt-4o-realtime-preview-2025-06-03\": {\n    \"display_name\": \"GPT-4o Realtime Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-03\"\n  },\n  \"gpt-4o-search-preview\": {\n    \"display_name\": \"GPT 4o Search Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-4o-search-preview-2025-03-11\": {\n    \"display_name\": \"GPT 4o Search Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-03-11\"\n  },\n  \"gpt-5\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-2025-08-07\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"gpt-5-chat\": {\n    \"display_name\": \"GPT 5 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-chat-latest\": {\n    \"display_name\": \"GPT 5 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-codex\": {\n    \"display_name\": \"GPT-5 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-mini\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-mini-2025-08-07\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"gpt-5-nano\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-nano-2025-08-07\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-07\"\n  },\n  \"gpt-5-pro\": {\n    \"display_name\": \"GPT-5 Pro\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5-pro-2025-10-06\": {\n    \"display_name\": \"GPT-5 Pro\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-10-06\"\n  },\n  \"gpt-5.4\": {\n    \"display_name\": \"GPT-5.4\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.2-pro-2025-12-11\": {\n    \"display_name\": \"GPT-5.2 Pro\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-12-11\"\n  },\n  \"gpt-5.2-pro\": {\n    \"display_name\": \"GPT-5.2 Pro\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.2-chat-latest\": {\n    \"display_name\": \"GPT 5.2 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.2-2025-12-11\": {\n    \"display_name\": \"GPT 5.2\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-12-11\"\n  },\n  \"gpt-5.2\": {\n    \"display_name\": \"GPT 5.2\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.1\": {\n    \"display_name\": \"GPT 5.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.1-2025-11-13\": {\n    \"display_name\": \"GPT 5.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-11-13\"\n  },\n  \"gpt-5.1-chat-latest\": {\n    \"display_name\": \"GPT 5.1 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.1-codex\": {\n    \"display_name\": \"GPT-5.1 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-5.1-codex-mini\": {\n    \"display_name\": \"GPT-5.1 Codex Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-image-1-mini\": {\n    \"display_name\": \"GPT Image 1 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"gpt-realtime\": {\n    \"display_name\": \"GPT Realtime\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"gpt-realtime-2025-08-28\": {\n    \"display_name\": \"GPT Realtime\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-08-28\"\n  },\n  \"gpt-realtime-mini\": {\n    \"display_name\": \"GPT Realtime Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"jp.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"jp.anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929\"\n  },\n  \"medlm-large\": {\n    \"display_name\": \"MedLM Large\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"medlm-medium\": {\n    \"display_name\": \"MedLM Medium\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"meta.llama2-13b-chat-v1\": {\n    \"display_name\": \"Llama 2 13B Chat\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1\"\n  },\n  \"meta.llama2-70b-chat-v1\": {\n    \"display_name\": \"Llama 2 70B Chat\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"v1\"\n  },\n  \"meta.llama3-1-405b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 405B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-1-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-1-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-2-11b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 11B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-2-1b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 1B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-2-3b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 3B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-2-90b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 90B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama3-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama4-maverick-17b-instruct-v1:0\": {\n    \"display_name\": \"Llama 4 Maverick 17B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"meta.llama4-scout-17b-instruct-v1:0\": {\n    \"display_name\": \"Llama 4 Scout 17B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"mistral.mistral-7b-instruct-v0:2\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0:2\"\n  },\n  \"mistral.mistral-large-2402-v1:0\": {\n    \"display_name\": \"Mistral Large 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"1:0\"\n  },\n  \"mistral.mistral-large-2407-v1:0\": {\n    \"display_name\": \"Mistral Large 24.07\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"1:0\"\n  },\n  \"mistral.mistral-small-2402-v1:0\": {\n    \"display_name\": \"Mistral Small 24.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"1:0\"\n  },\n  \"mistral.mixtral-8x7b-instruct-v0:1\": {\n    \"display_name\": \"Mixtral 8x7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"0:1\"\n  },\n  \"mistral/codestral-2405\": {\n    \"display_name\": \"Codestral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/codestral-embed\": {\n    \"display_name\": \"Codestral Embed\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/codestral-embed-2505\": {\n    \"display_name\": \"Codestral Embed\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/codestral-latest\": {\n    \"display_name\": \"Codestral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/codestral-mamba-latest\": {\n    \"display_name\": \"Codestral Mamba\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/devstral-medium-2507\": {\n    \"display_name\": \"Devstral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/devstral-small-2505\": {\n    \"display_name\": \"Devstral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/devstral-small-2507\": {\n    \"display_name\": \"Devstral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/magistral-medium-2506\": {\n    \"display_name\": \"Magistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/magistral-medium-2509\": {\n    \"display_name\": \"Magistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/magistral-medium-latest\": {\n    \"display_name\": \"Magistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/magistral-small-2506\": {\n    \"display_name\": \"Magistral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/magistral-small-latest\": {\n    \"display_name\": \"Magistral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-embed\": {\n    \"display_name\": \"Mistral Embed\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-large-2402\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-large-2407\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-large-2411\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-large-latest\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-medium\": {\n    \"display_name\": \"Mistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-medium-2312\": {\n    \"display_name\": \"Mistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-medium-2505\": {\n    \"display_name\": \"Mistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-medium-latest\": {\n    \"display_name\": \"Mistral Medium\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-ocr-2505-completion\": {\n    \"display_name\": \"Mistral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-ocr-latest\": {\n    \"display_name\": \"Mistral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-small\": {\n    \"display_name\": \"Mistral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-small-latest\": {\n    \"display_name\": \"Mistral Small\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/mistral-tiny\": {\n    \"display_name\": \"Mistral Tiny\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-codestral-mamba\": {\n    \"display_name\": \"Codestral Mamba\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-mistral-7b\": {\n    \"display_name\": \"Open Mistral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-mistral-nemo\": {\n    \"display_name\": \"Open Mistral Nemo\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-mistral-nemo-2407\": {\n    \"display_name\": \"Open Mistral Nemo\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-mixtral-8x22b\": {\n    \"display_name\": \"Open Mixtral 8x22B\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/open-mixtral-8x7b\": {\n    \"display_name\": \"Open Mixtral 8x7B\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/pixtral-12b-2409\": {\n    \"display_name\": \"Pixtral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/pixtral-large-2411\": {\n    \"display_name\": \"Pixtral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"mistral/pixtral-large-latest\": {\n    \"display_name\": \"Pixtral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"o1\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o1-2024-12-17\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-12-17\"\n  },\n  \"o1-mini\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o1-mini-2024-09-12\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o1-preview\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o1-preview-2024-09-12\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o1-pro\": {\n    \"display_name\": \"o1 Pro\",\n    \"model_vendor\": \"openai\"\n  },\n  \"o1-pro-2025-03-19\": {\n    \"display_name\": \"o1 Pro\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-03-19\"\n  },\n  \"o3\": {\n    \"display_name\": \"o3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o3-2025-04-16\": {\n    \"display_name\": \"o3\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o3-deep-research\": {\n    \"display_name\": \"o3 Deep Research\",\n    \"model_vendor\": \"openai\"\n  },\n  \"o3-deep-research-2025-06-26\": {\n    \"display_name\": \"o3 Deep Research\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-26\"\n  },\n  \"o3-mini\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o3-mini-2025-01-31\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o3-pro\": {\n    \"display_name\": \"o3 Pro\",\n    \"model_vendor\": \"openai\"\n  },\n  \"o3-pro-2025-06-10\": {\n    \"display_name\": \"o3 Pro\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-10\"\n  },\n  \"o4-mini\": {\n    \"display_name\": \"o4 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o4-mini-2025-04-16\": {\n    \"display_name\": \"o4 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"o4-mini-deep-research\": {\n    \"display_name\": \"o4 Mini Deep Research\",\n    \"model_vendor\": \"openai\"\n  },\n  \"o4-mini-deep-research-2025-06-26\": {\n    \"display_name\": \"o4 Mini Deep Research\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-06-26\"\n  },\n  \"ollama/codegeex4\": {\n    \"display_name\": \"CodeGeeX4\",\n    \"model_vendor\": \"zhipu\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/codegemma\": {\n    \"display_name\": \"Codegemma\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/codellama\": {\n    \"display_name\": \"CodeLlama\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/deepseek-coder-v2-base\": {\n    \"display_name\": \"DeepSeek Coder v2 Base\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/deepseek-coder-v2-instruct\": {\n    \"display_name\": \"DeepSeek Coder v2 Instruct\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/deepseek-coder-v2-lite-base\": {\n    \"display_name\": \"DeepSeek Coder v2 Lite Base\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/deepseek-coder-v2-lite-instruct\": {\n    \"display_name\": \"DeepSeek Coder v2 Lite Instruct\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/deepseek-v3.1:671b-cloud\": {\n    \"display_name\": \"DeepSeek V3.1:671B Cloud\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/gpt-oss:120b-cloud\": {\n    \"display_name\": \"GPT Open-Source 120B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/gpt-oss:20b-cloud\": {\n    \"display_name\": \"GPT Open-Source 20B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/internlm2_5-20b-chat\": {\n    \"display_name\": \"InternLM 2.5 20B Chat\",\n    \"model_vendor\": \"shanghai-ai-lab\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/llama2\": {\n    \"display_name\": \"Llama 2\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama2-uncensored\": {\n    \"display_name\": \"Llama 2 Uncensored\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama2:13b\": {\n    \"display_name\": \"Llama 2:13B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama2:70b\": {\n    \"display_name\": \"Llama 2:70B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama2:7b\": {\n    \"display_name\": \"Llama 2:7B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama3\": {\n    \"display_name\": \"Llama 3\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama3.1\": {\n    \"display_name\": \"Llama 3.1\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama3:70b\": {\n    \"display_name\": \"Llama 3:70B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/llama3:8b\": {\n    \"display_name\": \"Llama 3:8B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"ollama/mistral\": {\n    \"display_name\": \"Mistral\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/mistral-7B-Instruct-v0.1\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0.1\"\n  },\n  \"ollama/mistral-7B-Instruct-v0.2\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"v0.2\"\n  },\n  \"ollama/mistral-large-instruct-2407\": {\n    \"display_name\": \"Mistral Large Instruct 24.07\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/mixtral-8x22B-Instruct-v0.1\": {\n    \"display_name\": \"Mixtral 8x22B Instruct V0.1\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/mixtral-8x7B-Instruct-v0.1\": {\n    \"display_name\": \"Mixtral 8x7B Instruct V0.1\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/orca-mini\": {\n    \"display_name\": \"Orca Mini\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/qwen3-coder:480b-cloud\": {\n    \"display_name\": \"Qwen3 Coder:480B Cloud\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"ollama/vicuna\": {\n    \"display_name\": \"Vicuna\",\n    \"model_vendor\": \"lmsys\",\n    \"model_version\": \"latest\"\n  },\n  \"openai.gpt-oss-120b-1:0\": {\n    \"display_name\": \"GPT Open-Source 120B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"v1:0\"\n  },\n  \"openai.gpt-oss-20b-1:0\": {\n    \"display_name\": \"GPT Open-Source 20B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"v1:0\"\n  },\n  \"openai/container\": {\n    \"display_name\": \"Container\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/agentica-org/deepcoder-14b-preview\": {\n    \"display_name\": \"DeepCoder 14B Preview\",\n    \"model_vendor\": \"agentica\"\n  },\n  \"openrouter/ai21/jamba-1-5-large\": {\n    \"display_name\": \"Jamba 1.5 Large\",\n    \"model_vendor\": \"ai21\"\n  },\n  \"openrouter/ai21/jamba-1-5-mini\": {\n    \"display_name\": \"Jamba 1.5 Mini\",\n    \"model_vendor\": \"ai21\"\n  },\n  \"openrouter/ai21/jamba-large-1.7\": {\n    \"display_name\": \"Jamba Large 1.7\",\n    \"model_vendor\": \"ai21\"\n  },\n  \"openrouter/aion-labs/aion-1.0\": {\n    \"display_name\": \"AION 1.0\",\n    \"model_vendor\": \"aion-labs\"\n  },\n  \"openrouter/alibaba/qwen-2.5-72b-instruct\": {\n    \"display_name\": \"Qwen 2.5 72B Instruct\",\n    \"model_vendor\": \"alibaba\"\n  },\n  \"openrouter/alibaba/qwen-2.5-coder-32b-instruct\": {\n    \"display_name\": \"Qwen 2.5 Coder 32B\",\n    \"model_vendor\": \"alibaba\"\n  },\n  \"openrouter/alibaba/tongyi-deepresearch-30b-a3b\": {\n    \"display_name\": \"Tongyi DeepResearch 30B\",\n    \"model_vendor\": \"alibaba\"\n  },\n  \"openrouter/alibaba/tongyi-deepresearch-30b-a3b:free\": {\n    \"display_name\": \"Tongyi DeepResearch 30B (Free)\",\n    \"model_vendor\": \"alibaba\"\n  },\n  \"openrouter/anthropic/claude-2\": {\n    \"display_name\": \"Claude 2\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/anthropic/claude-3-sonnet\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/anthropic/claude-3.5-sonnet\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/anthropic/claude-3.5-sonnet:beta\": {\n    \"display_name\": \"Claude Sonnet 3.5:beta\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/anthropic/claude-haiku-4.5\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/anthropic/claude-instant-v1\": {\n    \"display_name\": \"Claude Instant\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"v1\"\n  },\n  \"openrouter/anthropic/claude-opus-4\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/anthropic/claude-opus-4.1\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/anthropic/claude-opus-4.5\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/anthropic/claude-sonnet-4\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/anthropic/claude-sonnet-4.5\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"openrouter/baidu/ernie-4.5-300b-a47b\": {\n    \"display_name\": \"ERNIE 4.5 300B\",\n    \"model_vendor\": \"baidu\"\n  },\n  \"openrouter/baidu/ernie-4.5-vl-28b-a3b\": {\n    \"display_name\": \"ERNIE 4.5 VL 28B\",\n    \"model_vendor\": \"baidu\"\n  },\n  \"openrouter/bytedance/ui-tars-1.5-7b\": {\n    \"display_name\": \"UI-TARS 1.5 7B\",\n    \"model_vendor\": \"bytedance\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/cognitivecomputations/dolphin-mixtral-8x7b\": {\n    \"display_name\": \"Dolphin Mixtral 8x7B\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/cohere/command-a\": {\n    \"display_name\": \"Command A\",\n    \"model_vendor\": \"cohere\"\n  },\n  \"openrouter/cohere/command-r\": {\n    \"display_name\": \"Command R\",\n    \"model_vendor\": \"cohere\"\n  },\n  \"openrouter/cohere/command-r-08-2024\": {\n    \"display_name\": \"Command R\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"08-2024\"\n  },\n  \"openrouter/cohere/command-r-plus\": {\n    \"display_name\": \"Command R Plus\",\n    \"model_vendor\": \"cohere\"\n  },\n  \"openrouter/cohere/command-r-plus-08-2024\": {\n    \"display_name\": \"Command R Plus\",\n    \"model_vendor\": \"cohere\",\n    \"model_version\": \"08-2024\"\n  },\n  \"openrouter/databricks/dbrx-instruct\": {\n    \"display_name\": \"DBRX Instruct\",\n    \"model_vendor\": \"databricks\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepcogito/cogito-v2-preview-deepseek-671b\": {\n    \"display_name\": \"Cogito V2 Preview DeepSeek 671B\",\n    \"model_vendor\": \"deepcogito\"\n  },\n  \"openrouter/deepcogito/cogito-v2-preview-llama-109b-moe\": {\n    \"display_name\": \"Cogito V2 Preview Llama 109B MoE\",\n    \"model_vendor\": \"deepcogito\"\n  },\n  \"openrouter/deepseek/deepseek-chat\": {\n    \"display_name\": \"DeepSeek Chat\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-chat-v3-0324\": {\n    \"display_name\": \"DeepSeek Chat v3 0324\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-chat-v3.1\": {\n    \"display_name\": \"DeepSeek Chat V3.1\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-coder\": {\n    \"display_name\": \"DeepSeek Coder\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-r1\": {\n    \"display_name\": \"DeepSeek R1\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-r1-0528\": {\n    \"display_name\": \"DeepSeek R1 0528\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/deepseek/deepseek-v3.2-exp\": {\n    \"display_name\": \"DeepSeek V3.2\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"experimental\"\n  },\n  \"openrouter/fireworks/firellava-13b\": {\n    \"display_name\": \"FireLLaVA 13B\",\n    \"model_vendor\": \"fireworks\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/google/gemini-2.0-flash-001\": {\n    \"display_name\": \"Gemini 2.0 Flash\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"001\"\n  },\n  \"openrouter/google/gemini-2.5-flash\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemini-2.5-pro\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemini-3-pro-preview\": {\n    \"display_name\": \"Gemini 3 Pro Preview\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"openrouter/google/gemini-pro-1.5\": {\n    \"display_name\": \"Gemini Pro 1.5\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemini-pro-vision\": {\n    \"display_name\": \"Gemini Pro Vision\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemma-2-27b-it\": {\n    \"display_name\": \"Gemma 2 27B\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemma-2-9b-it\": {\n    \"display_name\": \"Gemma 2 9B\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemma-2-9b-it:free\": {\n    \"display_name\": \"Gemma 2 9B (Free)\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemma-3n-e4b-it\": {\n    \"display_name\": \"Gemma 3N E4B\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/gemma-3n-e4b-it:free\": {\n    \"display_name\": \"Gemma 3N E4B (Free)\",\n    \"model_vendor\": \"google\"\n  },\n  \"openrouter/google/palm-2-chat-bison\": {\n    \"display_name\": \"PaLM 2 Chat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/google/palm-2-codechat-bison\": {\n    \"display_name\": \"PaLM 2 Codechat Bison\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/gryphe/mythomax-l2-13b\": {\n    \"display_name\": \"MythoMax L2 13B\",\n    \"model_vendor\": \"gryphe\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/inclusionai/ring-1t\": {\n    \"display_name\": \"Ring 1T\",\n    \"model_vendor\": \"inclusionai\"\n  },\n  \"openrouter/jondurbin/airoboros-l2-70b-2.1\": {\n    \"display_name\": \"Airoboros L2 70B\",\n    \"model_vendor\": \"jondurbin\"\n  },\n  \"openrouter/mancer/weaver\": {\n    \"display_name\": \"Weaver\",\n    \"model_vendor\": \"mancer\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/meta-llama/codellama-34b-instruct\": {\n    \"display_name\": \"CodeLlama 34B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-2-13b-chat\": {\n    \"display_name\": \"Llama 2 13B Chat\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-2-70b-chat\": {\n    \"display_name\": \"Llama 2 70B Chat\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-3-70b-instruct\": {\n    \"display_name\": \"Llama 3 70B Instruct\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-3-70b-instruct:nitro\": {\n    \"display_name\": \"Llama 3 70B Instruct:nitro\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-3-8b-instruct:extended\": {\n    \"display_name\": \"Llama 3 8B Instruct:extended\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/meta-llama/llama-3-8b-instruct:free\": {\n    \"display_name\": \"Llama 3 8B Instruct:free\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/microsoft/wizardlm-2-8x22b:nitro\": {\n    \"display_name\": \"WizardLM 2 8x22B\",\n    \"model_vendor\": \"microsoft\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/minimax/minimax-m2\": {\n    \"display_name\": \"MiniMax M2\",\n    \"model_vendor\": \"minimax\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mistral-7b-instruct\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mistral-7b-instruct:free\": {\n    \"display_name\": \"Mistral 7B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mistral-large\": {\n    \"display_name\": \"Mistral Large\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mistral-small-3.1-24b-instruct\": {\n    \"display_name\": \"Mistral Small 3.1 24B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mistral-small-3.2-24b-instruct\": {\n    \"display_name\": \"Mistral Small 3.2 24B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/mistralai/mixtral-8x22b-instruct\": {\n    \"display_name\": \"Mixtral 8x22B Instruct\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/nousresearch/nous-hermes-llama2-13b\": {\n    \"display_name\": \"Nous Hermes Llama 2 13B\",\n    \"model_vendor\": \"meta\"\n  },\n  \"openrouter/openai/gpt-3.5-turbo\": {\n    \"display_name\": \"GPT-3.5 Turbo\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-3.5-turbo-16k\": {\n    \"display_name\": \"GPT-3.5 Turbo 16K\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4\": {\n    \"display_name\": \"GPT-4\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4-vision-preview\": {\n    \"display_name\": \"GPT-4 Vision Preview\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4.1\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4.1-2025-04-14\": {\n    \"display_name\": \"GPT-4.1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"openrouter/openai/gpt-4.1-mini\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4.1-mini-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"openrouter/openai/gpt-4.1-nano\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4.1-nano-2025-04-14\": {\n    \"display_name\": \"GPT-4.1 Nano\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2025-04-14\"\n  },\n  \"openrouter/openai/gpt-4o\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-4o-2024-05-13\": {\n    \"display_name\": \"GPT-4o\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"2024-05-13\"\n  },\n  \"openrouter/openai/gpt-5\": {\n    \"display_name\": \"GPT-5\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-5-chat\": {\n    \"display_name\": \"GPT 5 Chat\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-5-codex\": {\n    \"display_name\": \"GPT-5 Codex\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-5-mini\": {\n    \"display_name\": \"GPT-5 Mini\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-5-nano\": {\n    \"display_name\": \"GPT 5 Nano\",\n    \"model_vendor\": \"openai\"\n  },\n  \"openrouter/openai/gpt-oss-120b\": {\n    \"display_name\": \"GPT Open-Source 120B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/gpt-oss-20b\": {\n    \"display_name\": \"GPT Open-Source 20B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o1\": {\n    \"display_name\": \"o1\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o1-mini\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o1-mini-2024-09-12\": {\n    \"display_name\": \"o1 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o1-preview\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o1-preview-2024-09-12\": {\n    \"display_name\": \"o1 Preview\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o3-mini\": {\n    \"display_name\": \"o3 Mini\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/openai/o3-mini-high\": {\n    \"display_name\": \"O3 Mini High\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/pygmalionai/mythalion-13b\": {\n    \"display_name\": \"Mythalion 13B\",\n    \"model_vendor\": \"pygmalionai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/qwen/qwen-2.5-coder-32b-instruct\": {\n    \"display_name\": \"Qwen 2.5 Coder 32B Instruct\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/qwen/qwen-vl-plus\": {\n    \"display_name\": \"Qwen Vl Plus\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/qwen/qwen3-coder\": {\n    \"display_name\": \"Qwen3 Coder\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/switchpoint/router\": {\n    \"display_name\": \"SwitchPoint Router\",\n    \"model_vendor\": \"switchpoint\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/undi95/remm-slerp-l2-13b\": {\n    \"display_name\": \"ReMM SLERP L2 13B\",\n    \"model_vendor\": \"undi95\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/x-ai/grok-4\": {\n    \"display_name\": \"Grok 4\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/x-ai/grok-4-fast:free\": {\n    \"display_name\": \"Grok 4 Fast:free\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/z-ai/glm-4.6\": {\n    \"display_name\": \"GLM 4.6\",\n    \"model_vendor\": \"zhipu\",\n    \"model_version\": \"latest\"\n  },\n  \"openrouter/z-ai/glm-4.6:exacto\": {\n    \"display_name\": \"GLM 4.6 Exacto\",\n    \"model_vendor\": \"zhipu\",\n    \"model_version\": \"latest\"\n  },\n  \"qwen.qwen3-235b-a22b-2507-v1:0\": {\n    \"display_name\": \"Qwen.qwen3 235B A22b 2507\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"1:0\"\n  },\n  \"qwen.qwen3-32b-v1:0\": {\n    \"display_name\": \"Qwen.qwen3 32B\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"1:0\"\n  },\n  \"qwen.qwen3-coder-30b-a3b-v1:0\": {\n    \"display_name\": \"Qwen.qwen3 Coder 30B A3b\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"1:0\"\n  },\n  \"qwen.qwen3-coder-480b-a35b-v1:0\": {\n    \"display_name\": \"Qwen.qwen3 Coder 480B A35b\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"1:0\"\n  },\n  \"twelvelabs.pegasus-1-2-v1:0\": {\n    \"display_name\": \"Pegasus 1.2\",\n    \"model_vendor\": \"twelvelabs\",\n    \"model_version\": \"v1:0\"\n  },\n  \"us.amazon.nova-lite-v1:0\": {\n    \"display_name\": \"Nova Lite\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.amazon.nova-micro-v1:0\": {\n    \"display_name\": \"Nova Micro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.amazon.nova-premier-v1:0\": {\n    \"display_name\": \"Nova Premier\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"v1:0\"\n  },\n  \"us.amazon.nova-pro-v1:0\": {\n    \"display_name\": \"Nova Pro\",\n    \"model_vendor\": \"amazon\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.anthropic.claude-3-5-sonnet-20240620-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620\"\n  },\n  \"us.anthropic.claude-3-5-sonnet-20241022-v2:0\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20241022\"\n  },\n  \"us.anthropic.claude-3-sonnet-20240229-v1:0\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240229\"\n  },\n  \"us.anthropic.claude-haiku-4-5-20251001-v1:0\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"us.anthropic.claude-opus-4-1-20250805-v1:0\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805\"\n  },\n  \"us.anthropic.claude-opus-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"us.anthropic.claude-opus-4-5-20251101-v1:0\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251101\"\n  },\n  \"us.anthropic.claude-sonnet-4-20250514-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"us.anthropic.claude-sonnet-4-5-20250929-v1:0\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929\"\n  },\n  \"us.deepseek.r1-v1:0\": {\n    \"display_name\": \"DeepSeek R1\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"v1:0\"\n  },\n  \"us.meta.llama3-1-405b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 405B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-1-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-1-8b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.1 8B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-2-11b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 11B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-2-1b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 1B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-2-3b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 3B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-2-90b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.2 90B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama3-3-70b-instruct-v1:0\": {\n    \"display_name\": \"Llama 3.3 70B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama4-maverick-17b-instruct-v1:0\": {\n    \"display_name\": \"Llama 4 Maverick 17B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.meta.llama4-scout-17b-instruct-v1:0\": {\n    \"display_name\": \"Llama 4 Scout 17B Instruct\",\n    \"model_vendor\": \"meta\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.mistral.pixtral-large-2502-v1:0\": {\n    \"display_name\": \"Pixtral Large 25.02\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"1:0\"\n  },\n  \"us.twelvelabs.pegasus-1-2-v1:0\": {\n    \"display_name\": \"Pegasus 1.2\",\n    \"model_vendor\": \"twelvelabs\",\n    \"model_version\": \"v1:0\"\n  },\n  \"vertex_ai/claude-3-5-sonnet\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-3-5-sonnet@20240620\": {\n    \"display_name\": \"Claude Sonnet 3.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240620\"\n  },\n  \"vertex_ai/claude-3-sonnet\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-3-sonnet@20240229\": {\n    \"display_name\": \"Claude Sonnet 3\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20240229\"\n  },\n  \"vertex_ai/claude-haiku-4-5\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-haiku-4-5@20251001\": {\n    \"display_name\": \"Claude Haiku 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251001\"\n  },\n  \"vertex_ai/claude-opus-4\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-opus-4-1\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-opus-4-1@20250805\": {\n    \"display_name\": \"Claude Opus 4.1\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250805\"\n  },\n  \"vertex_ai/claude-opus-4-5\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-opus-4-5@20251101\": {\n    \"display_name\": \"Claude Opus 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20251101\"\n  },\n  \"vertex_ai/claude-opus-4@20250514\": {\n    \"display_name\": \"Claude Opus 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"vertex_ai/claude-sonnet-4\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-sonnet-4-5\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\"\n  },\n  \"vertex_ai/claude-sonnet-4-5@20250929\": {\n    \"display_name\": \"Claude Sonnet 4.5\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250929\"\n  },\n  \"vertex_ai/claude-sonnet-4@20250514\": {\n    \"display_name\": \"Claude Sonnet 4\",\n    \"model_vendor\": \"anthropic\",\n    \"model_version\": \"20250514\"\n  },\n  \"vertex_ai/codestral-2\": {\n    \"display_name\": \"Codestral 2\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/codestral-2501\": {\n    \"display_name\": \"Codestral 25.01\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/codestral-2@001\": {\n    \"display_name\": \"Codestral 2@001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/codestral@2405\": {\n    \"display_name\": \"Codestral@2405\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/codestral@latest\": {\n    \"display_name\": \"Codestral@latest\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/deepseek-ai/deepseek-r1-0528-maas\": {\n    \"display_name\": \"DeepSeek R1 0528 Maas\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/deepseek-ai/deepseek-v3.1-maas\": {\n    \"display_name\": \"DeepSeek V3.1 Maas\",\n    \"model_vendor\": \"deepseek\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/gemini-2.5-flash\": {\n    \"display_name\": \"Gemini 2.5 Flash\",\n    \"model_vendor\": \"google\"\n  },\n  \"vertex_ai/gemini-2.5-flash-lite\": {\n    \"display_name\": \"Gemini 2.5 Flash Lite\",\n    \"model_vendor\": \"google\"\n  },\n  \"vertex_ai/gemini-2.5-pro\": {\n    \"display_name\": \"Gemini 2.5 Pro\",\n    \"model_vendor\": \"google\"\n  },\n  \"vertex_ai/gemini-3-pro-preview\": {\n    \"display_name\": \"Gemini 3 Pro Preview\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"vertex_ai/gemini-3-flash-preview\": {\n    \"display_name\": \"Gemini 3 Flash Preview\",\n    \"model_vendor\": \"google\",\n    \"model_version\": \"preview\"\n  },\n  \"vertex_ai/jamba-1.5\": {\n    \"display_name\": \"Jamba 1.5\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/jamba-1.5-large\": {\n    \"display_name\": \"Jamba 1.5 Large\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/jamba-1.5-large@001\": {\n    \"display_name\": \"Jamba 1.5 Large@001\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/jamba-1.5-mini\": {\n    \"display_name\": \"Jamba 1.5 Mini\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/jamba-1.5-mini@001\": {\n    \"display_name\": \"Jamba 1.5 Mini@001\",\n    \"model_vendor\": \"ai21\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/meta/llama-3.1-405b-instruct-maas\": {\n    \"display_name\": \"Llama 3.1 405B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-3.1-70b-instruct-maas\": {\n    \"display_name\": \"Llama 3.1 70B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-3.1-8b-instruct-maas\": {\n    \"display_name\": \"Llama 3.1 8B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-3.2-90b-vision-instruct-maas\": {\n    \"display_name\": \"Llama 3.2 90B Vision Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas\": {\n    \"display_name\": \"Llama 4 Maverick 17B 128e Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-4-maverick-17b-16e-instruct-maas\": {\n    \"display_name\": \"Llama 4 Maverick 17B 16e Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas\": {\n    \"display_name\": \"Llama 4 Scout 17B 128e Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas\": {\n    \"display_name\": \"Llama 4 Scout 17B 16e Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama3-405b-instruct-maas\": {\n    \"display_name\": \"Llama 3 405B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama3-70b-instruct-maas\": {\n    \"display_name\": \"Llama 3 70B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/meta/llama3-8b-instruct-maas\": {\n    \"display_name\": \"Llama 3 8B Instruct Maas\",\n    \"model_vendor\": \"meta\"\n  },\n  \"vertex_ai/minimaxai/minimax-m2-maas\": {\n    \"display_name\": \"MiniMax M2\",\n    \"model_vendor\": \"minimax\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-large-2411\": {\n    \"display_name\": \"Mistral Large 24.11\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-large@2407\": {\n    \"display_name\": \"Mistral Large@24.07\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-large@2411-001\": {\n    \"display_name\": \"Mistral Large@24.11 001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"001\"\n  },\n  \"vertex_ai/mistral-large@latest\": {\n    \"display_name\": \"Mistral Large@latest\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-medium-3\": {\n    \"display_name\": \"Mistral Medium 3\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-medium-3@001\": {\n    \"display_name\": \"Mistral Medium 3@001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-nemo@2407\": {\n    \"display_name\": \"Mistral Nemo@24.07\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-nemo@latest\": {\n    \"display_name\": \"Mistral Nemo@latest\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-small-2503\": {\n    \"display_name\": \"Mistral Small 2503\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistral-small-2503@001\": {\n    \"display_name\": \"Mistral Small 2503@001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistralai/codestral-2\": {\n    \"display_name\": \"Codestral 2\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistralai/codestral-2@001\": {\n    \"display_name\": \"Codestral 2@001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistralai/mistral-medium-3\": {\n    \"display_name\": \"Mistral Medium 3\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/mistralai/mistral-medium-3@001\": {\n    \"display_name\": \"Mistral Medium 3@001\",\n    \"model_vendor\": \"mistral\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/moonshotai/kimi-k2-thinking-maas\": {\n    \"display_name\": \"Kimi K2 Thinking\",\n    \"model_vendor\": \"moonshot\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/openai/gpt-oss-120b-maas\": {\n    \"display_name\": \"GPT Open-Source 120B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/openai/gpt-oss-20b-maas\": {\n    \"display_name\": \"GPT Open-Source 20B\",\n    \"model_vendor\": \"openai\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas\": {\n    \"display_name\": \"Qwen3 235B A22b Instruct 2507 Maas\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas\": {\n    \"display_name\": \"Qwen3 Coder 480B A35b Instruct Maas\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/qwen/qwen3-next-80b-a3b-instruct-maas\": {\n    \"display_name\": \"Qwen3 Next 80B A3b Instruct Maas\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"vertex_ai/qwen/qwen3-next-80b-a3b-thinking-maas\": {\n    \"display_name\": \"Qwen3 Next 80B A3b Thinking Maas\",\n    \"model_vendor\": \"alibaba\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2\": {\n    \"display_name\": \"Grok 2\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2-1212\": {\n    \"display_name\": \"Grok 2\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2-latest\": {\n    \"display_name\": \"Grok 2\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2-vision\": {\n    \"display_name\": \"Grok 2 Vision\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2-vision-1212\": {\n    \"display_name\": \"Grok 2 Vision\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-2-vision-latest\": {\n    \"display_name\": \"Grok 2 Vision\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-beta\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-fast-beta\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-fast-latest\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-latest\": {\n    \"display_name\": \"Grok 3\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini-beta\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini-fast\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini-fast-beta\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini-fast-latest\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-3-mini-latest\": {\n    \"display_name\": \"Grok 3 Mini\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-0709\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-1-fast\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-1-fast-non-reasoning\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-1-fast-non-reasoning-latest\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-1-fast-reasoning\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-1-fast-reasoning-latest\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-fast-non-reasoning\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-fast-reasoning\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-4-latest\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-beta\": {\n    \"display_name\": \"Grok Beta\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-code-fast\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-code-fast-1\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-code-fast-1-0825\": {\n    \"display_name\": \"Grok\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  },\n  \"xai/grok-vision-beta\": {\n    \"display_name\": \"Grok Vision\",\n    \"model_vendor\": \"xai\",\n    \"model_version\": \"latest\"\n  }\n}\n"
  },
  {
    "path": "backend/onyx/llm/model_name_parser.py",
    "content": "\"\"\"\nLiteLLM Model Name Parser\n\nParses LiteLLM model strings and returns structured metadata for UI display.\nAll metadata comes from litellm's model_cost dictionary. Until this upstream patch to LiteLLM\nis merged (https://github.com/BerriAI/litellm/pull/17330), we use the model_metadata_enrichments.json\nto add these fields at server startup.\n\nEnrichment fields:\n- display_name: Human-friendly name (e.g., \"Claude 3.5 Sonnet\")\n- model_vendor: The company that made the model (anthropic, openai, meta, etc.)\n- model_version: Version string (e.g., \"20241022-v2:0\", \"v1:0\")\n\nThe parser only extracts provider and region from the model key - everything\nelse comes from enrichment.\n\"\"\"\n\nimport re\nfrom functools import lru_cache\n\nfrom pydantic import BaseModel\n\nfrom onyx.llm.constants import AGGREGATOR_PROVIDERS\nfrom onyx.llm.constants import HYPHENATED_MODEL_NAMES\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.constants import MODEL_PREFIX_TO_VENDOR\nfrom onyx.llm.constants import PROVIDER_DISPLAY_NAMES\nfrom onyx.llm.constants import VENDOR_BRAND_NAMES\n\n\nclass ParsedModelName(BaseModel):\n    \"\"\"Structured representation of a parsed LiteLLM model name.\"\"\"\n\n    raw_name: str  # Original: \"bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0\"\n    provider: str  # \"bedrock\", \"azure\", \"openai\", etc. (the API route)\n    vendor: str | None = None  # From enrichment: \"anthropic\", \"openai\", \"meta\", etc.\n    version: str | None = None  # From enrichment: \"20241022-v2:0\", \"v1:0\", etc.\n    region: str | None = None  # Extracted: \"us\", \"eu\", or None\n    display_name: str  # From enrichment: \"Claude 3.5 Sonnet\"\n    provider_display_name: str  # Generated: \"Claude (Bedrock - Anthropic)\"\n\n\ndef _get_model_info(model_key: str) -> dict:\n    \"\"\"Get model info from litellm.model_cost.\"\"\"\n    from onyx.llm.litellm_singleton import litellm\n\n    # Try exact key first\n    info = litellm.model_cost.get(model_key)\n    if info:\n        return info\n\n    # Try without provider prefix (e.g., \"bedrock/anthropic.claude-...\" -> \"anthropic.claude-...\")\n    if \"/\" in model_key:\n        return litellm.model_cost.get(model_key.split(\"/\", 1)[-1], {})\n\n    return {}\n\n\ndef _extract_provider(model_key: str) -> str:\n    \"\"\"Extract provider from model key prefix.\"\"\"\n    from onyx.llm.litellm_singleton import litellm\n\n    if \"/\" in model_key:\n        return model_key.split(\"/\")[0]\n\n    # No prefix - try to get from litellm.model_cost\n    info = litellm.model_cost.get(model_key, {})\n    litellm_provider = info.get(\"litellm_provider\", \"\")\n\n    if litellm_provider:\n        # Normalize vertex_ai variants\n        if litellm_provider.startswith(LlmProviderNames.VERTEX_AI):\n            return LlmProviderNames.VERTEX_AI\n        return litellm_provider\n\n    return \"unknown\"\n\n\ndef _extract_region(model_key: str) -> str | None:\n    \"\"\"Extract region from model key (e.g., us., eu., apac. prefix).\"\"\"\n    base = model_key.split(\"/\")[-1].lower()\n\n    for prefix in [\"us.\", \"eu.\", \"apac.\", \"global.\", \"us-gov.\"]:\n        if base.startswith(prefix):\n            return prefix.rstrip(\".\")\n\n    return None\n\n\ndef _format_name(name: str | None) -> str:\n    \"\"\"Format provider or vendor name with proper capitalization.\"\"\"\n    if not name:\n        return \"Unknown\"\n    return PROVIDER_DISPLAY_NAMES.get(name.lower(), name.replace(\"_\", \" \").title())\n\n\ndef _infer_vendor_from_model_name(model_name: str) -> str | None:\n    \"\"\"\n    Infer vendor from model name patterns when enrichment data is missing.\n\n    Uses MODEL_PREFIX_TO_VENDOR mapping to match model name prefixes.\n    Returns lowercase vendor name for consistency with enrichment data.\n\n    Examples:\n        \"gemini-3-flash-preview\" → \"google\"\n        \"claude-3-5-sonnet\" → \"anthropic\"\n        \"llama-3.1-70b\" → \"meta\"\n    \"\"\"\n    try:\n        # Get the base model name (remove provider prefix if present)\n        base_name = model_name.split(\"/\")[-1].lower()\n\n        # Try to match against known prefixes (sorted by length to match longest first)\n        for prefix in sorted(MODEL_PREFIX_TO_VENDOR.keys(), key=len, reverse=True):\n            if base_name.startswith(prefix):\n                return MODEL_PREFIX_TO_VENDOR[prefix]\n    except Exception:\n        pass\n\n    return None\n\n\ndef _generate_display_name_from_model(model_name: str) -> str:\n    \"\"\"\n    Generate a human-friendly display name from a model identifier.\n\n    Used as fallback when the model is not in enrichment data.\n    Cleans up the raw model name by removing provider prefixes and\n    formatting version numbers nicely.\n\n    Examples:\n        \"vertex_ai/gemini-3-flash-preview\" → \"Gemini 3 Flash Preview\"\n        \"gemini-2.5-pro-exp-03-25\" → \"Gemini 2.5 Pro\"\n        \"claude-3-5-sonnet-20241022\" → \"Claude 3.5 Sonnet\"\n        \"gpt-oss:120b\" → \"GPT-OSS 120B\" (hyphenated exception)\n    \"\"\"\n    try:\n        # Remove provider prefix if present\n        base_name = model_name.split(\"/\")[-1]\n\n        # Remove tag suffix (e.g., :14b, :latest) - handle separately\n        size_suffix = \"\"\n        if \":\" in base_name:\n            base_name, tag = base_name.rsplit(\":\", 1)\n            # Keep size tags like \"14b\", \"70b\", \"120b\"\n            if re.match(r\"^\\d+[bBmM]$\", tag):\n                size_suffix = f\" {tag.upper()}\"\n\n        # Check if this is a hyphenated model that should keep its format\n        base_name_lower = base_name.lower()\n        for hyphenated in HYPHENATED_MODEL_NAMES:\n            if base_name_lower.startswith(hyphenated):\n                # Keep the hyphenated prefix, uppercase it\n                return hyphenated.upper() + size_suffix\n\n        # Remove common suffixes: date stamps, version numbers\n        cleaned = base_name\n        # Remove date stamps like -20241022, @20250219, -2024-08-06\n        cleaned = re.sub(r\"[-@]\\d{4}-?\\d{2}-?\\d{2}\", \"\", cleaned)\n        # Remove experimental/preview date suffixes like -exp-03-25\n        cleaned = re.sub(r\"-exp-\\d{2}-\\d{2}\", \"\", cleaned)\n        # Remove version suffixes like -v1, -v2\n        cleaned = re.sub(r\"-v\\d+$\", \"\", cleaned)\n\n        # Convert separators to spaces\n        cleaned = cleaned.replace(\"-\", \" \").replace(\"_\", \" \")\n\n        # Clean up version numbers: \"3 5\" → \"3.5\", \"2 5\" → \"2.5\"\n        # But only for single digits that look like version numbers\n        cleaned = re.sub(r\"(\\d) (\\d)(?!\\d)\", r\"\\1.\\2\", cleaned)\n\n        # Title case each word, preserving version numbers\n        words = cleaned.split()\n        result_words = []\n        for word in words:\n            if word.isdigit() or re.match(r\"^\\d+\\.?\\d*$\", word):\n                # Keep numbers as-is\n                result_words.append(word)\n            elif word.lower() in (\"pro\", \"lite\", \"mini\", \"flash\", \"preview\", \"ultra\"):\n                # Common suffixes get title case\n                result_words.append(word.title())\n            else:\n                # Title case other words\n                result_words.append(word.title())\n\n        return \" \".join(result_words) + size_suffix\n    except Exception:\n        return model_name\n\n\ndef _generate_provider_display_name(provider: str, vendor: str | None) -> str:\n    \"\"\"\n    Generate provider display name with model brand and vendor info.\n\n    Examples:\n        - Direct OpenAI: \"GPT (OpenAI)\"\n        - Bedrock via Anthropic: \"Claude (Bedrock - Anthropic)\"\n        - Vertex AI via Google: \"Gemini (Vertex AI - Google)\"\n    \"\"\"\n    provider_nice = _format_name(provider)\n    vendor_nice = _format_name(vendor) if vendor else None\n    brand = VENDOR_BRAND_NAMES.get(vendor.lower()) if vendor else None\n\n    # For aggregator providers, show: Brand (Provider - Vendor)\n    if provider.lower() in AGGREGATOR_PROVIDERS:\n        if brand and vendor_nice:\n            return f\"{brand} ({provider_nice} - {vendor_nice})\"\n        elif vendor_nice:\n            return f\"{provider_nice} - {vendor_nice}\"\n        return provider_nice\n\n    # For direct providers, show: Brand (Provider)\n    if brand:\n        return f\"{brand} ({provider_nice})\"\n\n    return provider_nice\n\n\n@lru_cache(maxsize=1024)\ndef parse_litellm_model_name(raw_name: str) -> ParsedModelName:\n    \"\"\"\n    Parse a LiteLLM model string into structured data.\n\n    Metadata comes from enrichment when available, with fallback logic\n    for models not in the enrichment data.\n\n    Args:\n        raw_name: The LiteLLM model string\n\n    Returns:\n        ParsedModelName with all components from enrichment or fallback\n    \"\"\"\n    model_info = _get_model_info(raw_name)\n\n    # Extract from key (not in enrichment)\n    provider = _extract_provider(raw_name)\n    region = _extract_region(raw_name)\n\n    # Get from enrichment, with fallbacks for unenriched models\n    vendor = model_info.get(\"model_vendor\") or _infer_vendor_from_model_name(raw_name)\n    version = model_info.get(\"model_version\")\n    display_name = model_info.get(\"display_name\") or _generate_display_name_from_model(\n        raw_name\n    )\n\n    # Generate provider display name\n    provider_display_name = _generate_provider_display_name(provider, vendor)\n\n    return ParsedModelName(\n        raw_name=raw_name,\n        provider=provider,\n        vendor=vendor,\n        version=version,\n        region=region,\n        display_name=display_name,\n        provider_display_name=provider_display_name,\n    )\n"
  },
  {
    "path": "backend/onyx/llm/model_response.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import List\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass FunctionCall(BaseModel):\n    arguments: str | None = None\n    name: str | None = None\n\n\nclass ChatCompletionMessageToolCall(BaseModel):\n    id: str\n    type: str = \"function\"\n    function: FunctionCall\n\n\nclass ChatCompletionDeltaToolCall(BaseModel):\n    id: str | None = None\n    index: int = 0\n    type: str = \"function\"\n    function: FunctionCall | None = None\n\n\nclass Delta(BaseModel):\n    content: str | None = None\n    reasoning_content: str | None = None\n    tool_calls: List[ChatCompletionDeltaToolCall] = Field(default_factory=list)\n\n\nclass StreamingChoice(BaseModel):\n    finish_reason: str | None = None\n    index: int = 0\n    delta: Delta = Field(default_factory=Delta)\n\n\nclass Usage(BaseModel):\n    completion_tokens: int\n    prompt_tokens: int\n    total_tokens: int\n    cache_creation_input_tokens: int\n    cache_read_input_tokens: int\n\n\nclass ModelResponseStream(BaseModel):\n    id: str\n    created: str\n    choice: StreamingChoice\n    usage: Usage | None = None\n\n\nif TYPE_CHECKING:\n    from litellm.types.utils import ModelResponseStream as LiteLLMModelResponseStream\n\n\nclass Message(BaseModel):\n    content: str | None = None\n    role: str = \"assistant\"\n    tool_calls: List[ChatCompletionMessageToolCall] | None = None\n    reasoning_content: str | None = None\n\n\nclass Choice(BaseModel):\n    finish_reason: str | None = None\n    index: int = 0\n    message: Message = Field(default_factory=Message)\n\n\nclass ModelResponse(BaseModel):\n    id: str\n    created: str\n    choice: Choice\n    usage: Usage | None = None\n\n\nif TYPE_CHECKING:\n    from litellm.types.utils import (\n        ModelResponse as LiteLLMModelResponse,\n        ModelResponseStream as LiteLLMModelResponseStream,\n    )\n\n\ndef _parse_function_call(\n    function_payload: dict[str, Any] | None,\n) -> FunctionCall | None:\n    \"\"\"Parse a function call payload into a FunctionCall object.\"\"\"\n    if not function_payload or not isinstance(function_payload, dict):\n        return None\n    return FunctionCall(\n        arguments=function_payload.get(\"arguments\"),\n        name=function_payload.get(\"name\"),\n    )\n\n\ndef _parse_delta_tool_calls(\n    tool_calls: list[dict[str, Any]] | None,\n) -> list[ChatCompletionDeltaToolCall]:\n    \"\"\"Parse tool calls for streaming responses (delta format).\"\"\"\n    if not tool_calls:\n        return []\n\n    parsed_tool_calls: list[ChatCompletionDeltaToolCall] = []\n    for tool_call in tool_calls:\n        parsed_tool_calls.append(\n            ChatCompletionDeltaToolCall(\n                id=tool_call.get(\"id\"),\n                index=tool_call.get(\"index\", 0),\n                type=tool_call.get(\"type\", \"function\"),\n                function=_parse_function_call(tool_call.get(\"function\")),\n            )\n        )\n    return parsed_tool_calls\n\n\ndef _parse_message_tool_calls(\n    tool_calls: list[dict[str, Any]] | None,\n) -> list[ChatCompletionMessageToolCall]:\n    \"\"\"Parse tool calls for non-streaming responses (message format).\"\"\"\n    if not tool_calls:\n        return []\n\n    parsed_tool_calls: list[ChatCompletionMessageToolCall] = []\n    for tool_call in tool_calls:\n        function_call = _parse_function_call(tool_call.get(\"function\"))\n        if not function_call:\n            continue\n\n        parsed_tool_calls.append(\n            ChatCompletionMessageToolCall(\n                id=tool_call.get(\"id\", \"\"),\n                type=tool_call.get(\"type\", \"function\"),\n                function=function_call,\n            )\n        )\n    return parsed_tool_calls\n\n\ndef _validate_and_extract_base_fields(\n    response_data: dict[str, Any], error_prefix: str\n) -> tuple[str, str, dict[str, Any]]:\n    \"\"\"\n    Validate and extract common fields (id, created, first choice) from a LiteLLM response.\n\n    Returns:\n        Tuple of (id, created, choice_data)\n    \"\"\"\n    response_id = response_data.get(\"id\")\n    created = response_data.get(\"created\")\n    if response_id is None or created is None:\n        raise ValueError(f\"{error_prefix} must include 'id' and 'created'.\")\n\n    choices: list[dict[str, Any]] = response_data.get(\"choices\") or []\n    if not choices:\n        raise ValueError(f\"{error_prefix} must include at least one choice.\")\n\n    return str(response_id), str(created), choices[0] or {}\n\n\ndef _usage_from_usage_data(usage_data: dict[str, Any]) -> Usage:\n    # NOTE: sometimes the usage data dictionary has these keys and the values are None\n    # hence the \"or 0\" instead of just using default values\n    return Usage(\n        completion_tokens=usage_data.get(\"completion_tokens\") or 0,\n        prompt_tokens=usage_data.get(\"prompt_tokens\") or 0,\n        total_tokens=usage_data.get(\"total_tokens\") or 0,\n        cache_creation_input_tokens=usage_data.get(\"cache_creation_input_tokens\") or 0,\n        cache_read_input_tokens=usage_data.get(\n            \"cache_read_input_tokens\",\n            (usage_data.get(\"prompt_tokens_details\") or {}).get(\"cached_tokens\"),\n        )\n        or 0,\n    )\n\n\ndef from_litellm_model_response_stream(\n    response: \"LiteLLMModelResponseStream\",\n) -> ModelResponseStream:\n    \"\"\"\n    Convert a LiteLLM ModelResponseStream into the simplified Onyx representation.\n    \"\"\"\n    response_data = response.model_dump()\n    response_id, created, choice_data = _validate_and_extract_base_fields(\n        response_data, \"LiteLLM response stream\"\n    )\n\n    delta_data: dict[str, Any] = choice_data.get(\"delta\") or {}\n    parsed_delta = Delta(\n        content=delta_data.get(\"content\"),\n        reasoning_content=delta_data.get(\"reasoning_content\"),\n        tool_calls=_parse_delta_tool_calls(delta_data.get(\"tool_calls\")),\n    )\n\n    streaming_choice = StreamingChoice(\n        finish_reason=choice_data.get(\"finish_reason\"),\n        index=choice_data.get(\"index\", 0),\n        delta=parsed_delta,\n    )\n\n    usage_data = response_data.get(\"usage\")\n    return ModelResponseStream(\n        id=response_id,\n        created=created,\n        choice=streaming_choice,\n        usage=(_usage_from_usage_data(usage_data) if usage_data else None),\n    )\n\n\ndef from_litellm_model_response(\n    response: \"LiteLLMModelResponse\",\n) -> ModelResponse:\n    \"\"\"\n    Convert a LiteLLM ModelResponse into the simplified Onyx representation.\n    \"\"\"\n    response_data = response.model_dump()\n    response_id, created, choice_data = _validate_and_extract_base_fields(\n        response_data, \"LiteLLM response\"\n    )\n\n    message_data: dict[str, Any] = choice_data.get(\"message\") or {}\n    parsed_tool_calls = _parse_message_tool_calls(message_data.get(\"tool_calls\"))\n\n    message = Message(\n        content=message_data.get(\"content\"),\n        role=message_data.get(\"role\", \"assistant\"),\n        tool_calls=parsed_tool_calls if parsed_tool_calls else None,\n        reasoning_content=message_data.get(\"reasoning_content\"),\n    )\n\n    choice = Choice(\n        finish_reason=choice_data.get(\"finish_reason\"),\n        index=choice_data.get(\"index\", 0),\n        message=message,\n    )\n\n    usage_data = response_data.get(\"usage\")\n    return ModelResponse(\n        id=response_id,\n        created=created,\n        choice=choice,\n        usage=(_usage_from_usage_data(usage_data) if usage_data else None),\n    )\n"
  },
  {
    "path": "backend/onyx/llm/models.py",
    "content": "from enum import Enum\nfrom typing import Literal\n\nfrom pydantic import BaseModel\n\n\nclass ToolChoiceOptions(str, Enum):\n    REQUIRED = \"required\"\n    AUTO = \"auto\"\n    NONE = \"none\"\n\n\nclass ReasoningEffort(str, Enum):\n    \"\"\"Reasoning effort levels for models that support extended thinking.\n\n    Different providers map these values differently:\n    - OpenAI: Uses \"low\", \"medium\", \"high\" directly for reasoning_effort. Recently added \"none\" for 5 series\n              which is like \"minimal\"\n    - Claude: Uses budget_tokens with different values for each level\n    - Gemini: Uses \"none\", \"low\", \"medium\", \"high\" for thinking_budget (via litellm mapping)\n    \"\"\"\n\n    AUTO = \"auto\"\n    OFF = \"off\"\n    LOW = \"low\"\n    MEDIUM = \"medium\"\n    HIGH = \"high\"\n\n\n# OpenAI reasoning effort mapping\n# Note: OpenAI API does not support \"auto\" - valid values are: none, minimal, low, medium, high, xhigh\nOPENAI_REASONING_EFFORT: dict[ReasoningEffort, str] = {\n    ReasoningEffort.AUTO: \"medium\",  # Default to medium when auto is requested\n    ReasoningEffort.OFF: \"none\",\n    ReasoningEffort.LOW: \"low\",\n    ReasoningEffort.MEDIUM: \"medium\",\n    ReasoningEffort.HIGH: \"high\",\n}\n\n# Anthropic reasoning effort to budget tokens mapping\n# Loosely based on budgets from LiteLLM but this ensures it's not updated without our knowing from a version bump.\nANTHROPIC_REASONING_EFFORT_BUDGET: dict[ReasoningEffort, int] = {\n    ReasoningEffort.AUTO: 2048,\n    ReasoningEffort.LOW: 1024,\n    ReasoningEffort.MEDIUM: 2048,\n    ReasoningEffort.HIGH: 4096,\n}\n\n\n# Content part structures for multimodal messages\n# The classes in this mirror the OpenAI Chat Completions message types and work well with routers like LiteLLM\nclass TextContentPart(BaseModel):\n    type: Literal[\"text\"] = \"text\"\n    text: str\n    # Some providers (e.g. Anthropic/Gemini) support prompt caching controls on content blocks.\n    cache_control: dict | None = None\n\n\nclass ImageUrlDetail(BaseModel):\n    url: str\n    detail: Literal[\"auto\", \"low\", \"high\"] | None = None\n\n\nclass ImageContentPart(BaseModel):\n    type: Literal[\"image_url\"] = \"image_url\"\n    image_url: ImageUrlDetail\n\n\nContentPart = TextContentPart | ImageContentPart\n\n\n# Tool call structures\nclass FunctionCall(BaseModel):\n    name: str\n    arguments: str\n\n\nclass ToolCall(BaseModel):\n    type: Literal[\"function\"] = \"function\"\n    id: str\n    function: FunctionCall\n\n\n# Message types\n\n\n# Base class for all cacheable messages\nclass CacheableMessage(BaseModel):\n    # Some providers support prompt caching controls at the message level (passed through via LiteLLM).\n    cache_control: dict | None = None\n\n\nclass SystemMessage(CacheableMessage):\n    role: Literal[\"system\"] = \"system\"\n    content: str\n\n\nclass UserMessage(CacheableMessage):\n    role: Literal[\"user\"] = \"user\"\n    content: str | list[ContentPart]\n\n\nclass AssistantMessage(CacheableMessage):\n    role: Literal[\"assistant\"] = \"assistant\"\n    content: str | None = None\n    tool_calls: list[ToolCall] | None = None\n\n\nclass ToolMessage(CacheableMessage):\n    role: Literal[\"tool\"] = \"tool\"\n    content: str\n    tool_call_id: str\n\n\n# Union type for all OpenAI Chat Completions messages\nChatCompletionMessage = SystemMessage | UserMessage | AssistantMessage | ToolMessage\n# Allows for passing in a string directly. This is provided for convenience and is wrapped as a UserMessage.\nLanguageModelInput = list[ChatCompletionMessage] | ChatCompletionMessage\n"
  },
  {
    "path": "backend/onyx/llm/multi_llm.py",
    "content": "import os\nimport threading\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom contextlib import nullcontext\nfrom typing import Any\nfrom typing import cast\nfrom typing import TYPE_CHECKING\nfrom typing import Union\n\nfrom onyx.configs.app_configs import MOCK_LLM_RESPONSE\nfrom onyx.configs.chat_configs import LLM_SOCKET_READ_TIMEOUT\nfrom onyx.configs.model_configs import GEN_AI_TEMPERATURE\nfrom onyx.configs.model_configs import LITELLM_EXTRA_BODY\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.cost import calculate_llm_cost_cents\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.interfaces import ReasoningEffort\nfrom onyx.llm.interfaces import ToolChoiceOptions\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import ModelResponseStream\nfrom onyx.llm.model_response import Usage\nfrom onyx.llm.models import ANTHROPIC_REASONING_EFFORT_BUDGET\nfrom onyx.llm.models import OPENAI_REASONING_EFFORT\nfrom onyx.llm.request_context import get_llm_mock_response\nfrom onyx.llm.utils import build_litellm_passthrough_kwargs\nfrom onyx.llm.utils import is_true_openai_model\nfrom onyx.llm.utils import model_is_reasoning_model\nfrom onyx.llm.well_known_providers.constants import AWS_ACCESS_KEY_ID_KWARG\nfrom onyx.llm.well_known_providers.constants import (\n    AWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT,\n)\nfrom onyx.llm.well_known_providers.constants import (\n    AWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT,\n)\nfrom onyx.llm.well_known_providers.constants import AWS_REGION_NAME_KWARG\nfrom onyx.llm.well_known_providers.constants import AWS_REGION_NAME_KWARG_ENV_VAR_FORMAT\nfrom onyx.llm.well_known_providers.constants import AWS_SECRET_ACCESS_KEY_KWARG\nfrom onyx.llm.well_known_providers.constants import (\n    AWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT,\n)\nfrom onyx.llm.well_known_providers.constants import LM_STUDIO_API_KEY_CONFIG_KEY\nfrom onyx.llm.well_known_providers.constants import OLLAMA_API_KEY_CONFIG_KEY\nfrom onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG\nfrom onyx.llm.well_known_providers.constants import (\n    VERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT,\n)\nfrom onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG\nfrom onyx.utils.encryption import mask_string\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_env_lock = threading.Lock()\n\nif TYPE_CHECKING:\n    from litellm import CustomStreamWrapper\n    from litellm import HTTPHandler\n\n\n_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = \"llm_prompt\"\nLEGACY_MAX_TOKENS_KWARG = \"max_tokens\"\nSTANDARD_MAX_TOKENS_KWARG = \"max_completion_tokens\"\n_VERTEX_ANTHROPIC_MODELS_REJECTING_OUTPUT_CONFIG = (\n    \"claude-opus-4-5\",\n    \"claude-opus-4-6\",\n)\n\n\nclass LLMTimeoutError(Exception):\n    \"\"\"\n    Exception raised when an LLM call times out.\n    \"\"\"\n\n\nclass LLMRateLimitError(Exception):\n    \"\"\"\n    Exception raised when an LLM call is rate limited.\n    \"\"\"\n\n\ndef _prompt_to_dicts(prompt: LanguageModelInput) -> list[dict[str, Any]]:\n    \"\"\"Convert Pydantic message models to dictionaries for LiteLLM.\n\n    LiteLLM expects messages to be dictionaries (with .get() method),\n    not Pydantic models. This function serializes the messages.\n    \"\"\"\n    if isinstance(prompt, list):\n        return [msg.model_dump(exclude_none=True) for msg in prompt]\n    return [prompt.model_dump(exclude_none=True)]\n\n\ndef _normalize_content(raw: Any) -> str:\n    \"\"\"Normalize a message content field to a plain string.\n\n    Content can be a string, None, or a list of content-block dicts\n    (e.g. [{\"type\": \"text\", \"text\": \"...\"}]).\n    \"\"\"\n    if raw is None:\n        return \"\"\n    if isinstance(raw, str):\n        return raw\n    if isinstance(raw, list):\n        return \"\\n\".join(\n            block.get(\"text\", \"\") if isinstance(block, dict) else str(block)\n            for block in raw\n        )\n    return str(raw)\n\n\ndef _strip_tool_content_from_messages(\n    messages: list[dict[str, Any]],\n) -> list[dict[str, Any]]:\n    \"\"\"Convert tool-related messages to plain text.\n\n    Bedrock's Converse API requires toolConfig when messages contain\n    toolUse/toolResult content blocks. When no tools are provided for the\n    current request, we must convert any tool-related history into plain text\n    to avoid the \"toolConfig field must be defined\" error.\n\n    This is the same approach used by _OllamaHistoryMessageFormatter.\n    \"\"\"\n    result: list[dict[str, Any]] = []\n    for msg in messages:\n        role = msg.get(\"role\")\n        tool_calls = msg.get(\"tool_calls\")\n\n        if role == \"assistant\" and tool_calls:\n            # Convert structured tool calls to text representation\n            tool_call_lines = []\n            for tc in tool_calls:\n                func = tc.get(\"function\", {})\n                name = func.get(\"name\", \"unknown\")\n                args = func.get(\"arguments\", \"{}\")\n                tc_id = tc.get(\"id\", \"\")\n                tool_call_lines.append(\n                    f\"[Tool Call] name={name} id={tc_id} args={args}\"\n                )\n\n            existing_content = _normalize_content(msg.get(\"content\"))\n            parts = (\n                [existing_content] + tool_call_lines\n                if existing_content\n                else tool_call_lines\n            )\n            new_msg = {\n                \"role\": \"assistant\",\n                \"content\": \"\\n\".join(parts),\n            }\n            result.append(new_msg)\n\n        elif role == \"tool\":\n            # Convert tool response to user message with text content\n            tool_call_id = msg.get(\"tool_call_id\", \"\")\n            content = _normalize_content(msg.get(\"content\"))\n            tool_result_text = f\"[Tool Result] id={tool_call_id}\\n{content}\"\n            # Merge into previous user message if it is also a converted\n            # tool result to avoid consecutive user messages (Bedrock requires\n            # strict user/assistant alternation).\n            if (\n                result\n                and result[-1][\"role\"] == \"user\"\n                and \"[Tool Result]\" in result[-1].get(\"content\", \"\")\n            ):\n                result[-1][\"content\"] += \"\\n\\n\" + tool_result_text\n            else:\n                result.append({\"role\": \"user\", \"content\": tool_result_text})\n\n        else:\n            result.append(msg)\n\n    return result\n\n\ndef _fix_tool_user_message_ordering(\n    messages: list[dict[str, Any]],\n) -> list[dict[str, Any]]:\n    \"\"\"Insert a synthetic assistant message between tool and user messages.\n\n    Some models (e.g. Mistral on Azure) require strict message ordering where\n    a user message cannot immediately follow a tool message. This function\n    inserts a minimal assistant message to bridge the gap.\n    \"\"\"\n    if len(messages) < 2:\n        return messages\n\n    result: list[dict[str, Any]] = [messages[0]]\n    for msg in messages[1:]:\n        prev_role = result[-1].get(\"role\")\n        curr_role = msg.get(\"role\")\n        if prev_role == \"tool\" and curr_role == \"user\":\n            result.append({\"role\": \"assistant\", \"content\": \"Noted. Continuing.\"})\n        result.append(msg)\n    return result\n\n\ndef _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:\n    \"\"\"Check if any messages contain tool-related content blocks.\"\"\"\n    for msg in messages:\n        if msg.get(\"role\") == \"tool\":\n            return True\n        if msg.get(\"role\") == \"assistant\" and msg.get(\"tool_calls\"):\n            return True\n    return False\n\n\ndef _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:\n    \"\"\"Check if the prompt contains any assistant messages with tool_calls.\n\n    When Anthropic's extended thinking is enabled, the API requires every\n    assistant message to start with a thinking block before any tool_use\n    blocks.  Since we don't preserve thinking_blocks (they carry\n    cryptographic signatures that can't be reconstructed), we must skip\n    the thinking param whenever history contains prior tool-calling turns.\n    \"\"\"\n    from onyx.llm.models import AssistantMessage\n\n    msgs = prompt if isinstance(prompt, list) else [prompt]\n    return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)\n\n\ndef _is_vertex_model_rejecting_output_config(model_name: str) -> bool:\n    normalized_model_name = model_name.lower()\n    return any(\n        blocked_model in normalized_model_name\n        for blocked_model in _VERTEX_ANTHROPIC_MODELS_REJECTING_OUTPUT_CONFIG\n    )\n\n\nclass LitellmLLM(LLM):\n    \"\"\"Uses Litellm library to allow easy configuration to use a multitude of LLMs\n    See https://python.langchain.com/docs/integrations/chat/litellm\"\"\"\n\n    def __init__(\n        self,\n        api_key: str | None,\n        model_provider: str,\n        model_name: str,\n        max_input_tokens: int,\n        timeout: int | None = None,\n        api_base: str | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n        custom_llm_provider: str | None = None,\n        temperature: float | None = None,\n        custom_config: dict[str, str] | None = None,\n        extra_headers: dict[str, str] | None = None,\n        extra_body: dict | None = LITELLM_EXTRA_BODY,\n        model_kwargs: dict[str, Any] | None = None,\n    ):\n        # Timeout in seconds for each socket read operation (i.e., max time between\n        # receiving data chunks/tokens). This is NOT a total request timeout - a\n        # request can run indefinitely as long as data keeps arriving within this\n        # window. If the LLM pauses for longer than this timeout between chunks,\n        # a ReadTimeout is raised.\n        self._timeout = timeout\n        if timeout is None:\n            self._timeout = LLM_SOCKET_READ_TIMEOUT\n\n        self._temperature = GEN_AI_TEMPERATURE if temperature is None else temperature\n\n        self._model_provider = model_provider\n        self._model_version = model_name\n        self._api_key = api_key\n        self._deployment_name = deployment_name\n        self._api_base = api_base\n        self._api_version = api_version\n        self._custom_llm_provider = custom_llm_provider\n        self._max_input_tokens = max_input_tokens\n        self._custom_config = custom_config\n\n        # Create a dictionary for model-specific arguments if it's None\n        model_kwargs = model_kwargs or {}\n\n        if custom_config:\n            for k, v in custom_config.items():\n                if model_provider == LlmProviderNames.VERTEX_AI:\n                    if k == VERTEX_CREDENTIALS_FILE_KWARG:\n                        model_kwargs[k] = v\n                    elif k == VERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT:\n                        model_kwargs[VERTEX_CREDENTIALS_FILE_KWARG] = v\n                    elif k == VERTEX_LOCATION_KWARG:\n                        model_kwargs[k] = v\n                elif model_provider == LlmProviderNames.OLLAMA_CHAT:\n                    if k == OLLAMA_API_KEY_CONFIG_KEY:\n                        model_kwargs[\"api_key\"] = v\n                elif model_provider == LlmProviderNames.LM_STUDIO:\n                    if k == LM_STUDIO_API_KEY_CONFIG_KEY:\n                        model_kwargs[\"api_key\"] = v\n                elif model_provider == LlmProviderNames.BEDROCK:\n                    if k == AWS_REGION_NAME_KWARG:\n                        model_kwargs[k] = v\n                    elif k == AWS_REGION_NAME_KWARG_ENV_VAR_FORMAT:\n                        model_kwargs[AWS_REGION_NAME_KWARG] = v\n                    elif k == AWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT:\n                        model_kwargs[\"api_key\"] = v\n                    elif k == AWS_ACCESS_KEY_ID_KWARG:\n                        model_kwargs[k] = v\n                    elif k == AWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT:\n                        model_kwargs[AWS_ACCESS_KEY_ID_KWARG] = v\n                    elif k == AWS_SECRET_ACCESS_KEY_KWARG:\n                        model_kwargs[k] = v\n                    elif k == AWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT:\n                        model_kwargs[AWS_SECRET_ACCESS_KEY_KWARG] = v\n\n        # LM Studio: LiteLLM defaults to \"fake-api-key\" when no key is provided,\n        # which LM Studio rejects. Ensure we always pass an explicit key (or empty\n        # string) to prevent LiteLLM from injecting its fake default.\n        if model_provider == LlmProviderNames.LM_STUDIO:\n            model_kwargs.setdefault(\"api_key\", \"\")\n\n            # Users provide the server root (e.g. http://localhost:1234) but LiteLLM\n            # needs /v1 for OpenAI-compatible calls.\n            if self._api_base is not None:\n                base = self._api_base.rstrip(\"/\")\n                self._api_base = base if base.endswith(\"/v1\") else f\"{base}/v1\"\n                model_kwargs[\"api_base\"] = self._api_base\n\n        # Default vertex_location to \"global\" if not provided for Vertex AI\n        # Latest gemini models are only available through the global region\n        if (\n            model_provider == LlmProviderNames.VERTEX_AI\n            and VERTEX_LOCATION_KWARG not in model_kwargs\n        ):\n            model_kwargs[VERTEX_LOCATION_KWARG] = \"global\"\n\n        # Bifrost: OpenAI-compatible proxy that expects model names in\n        # provider/model format (e.g. \"anthropic/claude-sonnet-4-6\").\n        # We route through LiteLLM's openai provider with the Bifrost base URL,\n        # and ensure /v1 is appended.\n        if model_provider == LlmProviderNames.BIFROST:\n            self._custom_llm_provider = \"openai\"\n            if self._api_base is not None:\n                base = self._api_base.rstrip(\"/\")\n                self._api_base = base if base.endswith(\"/v1\") else f\"{base}/v1\"\n                model_kwargs[\"api_base\"] = self._api_base\n\n        # This is needed for Ollama to do proper function calling\n        if model_provider == LlmProviderNames.OLLAMA_CHAT and api_base is not None:\n            model_kwargs[\"api_base\"] = api_base\n        if extra_headers:\n            model_kwargs.update({\"extra_headers\": extra_headers})\n        if extra_body:\n            model_kwargs.update({\"extra_body\": extra_body})\n\n        self._model_kwargs = model_kwargs\n\n    def _safe_model_config(self) -> dict:\n        dump = self.config.model_dump()\n        dump[\"api_key\"] = mask_string(dump.get(\"api_key\") or \"\")\n        custom_config = dump.get(\"custom_config\")\n        if isinstance(custom_config, dict):\n            # Mask sensitive values in custom_config\n            masked_config = {}\n            for k, v in custom_config.items():\n                masked_config[k] = mask_string(v) if v else v\n            dump[\"custom_config\"] = masked_config\n        return dump\n\n    def _track_llm_cost(self, usage: Usage) -> None:\n        \"\"\"\n        Track LLM usage cost for Onyx-managed API keys.\n\n        This is called after every LLM call completes (streaming or non-streaming).\n        Cost is only tracked if:\n        1. Usage limits are enabled for this deployment\n        2. The API key is one of Onyx's managed default keys\n        \"\"\"\n\n        from onyx.server.usage_limits import is_usage_limits_enabled\n\n        if not is_usage_limits_enabled():\n            return\n\n        from onyx.server.usage_limits import is_onyx_managed_api_key\n\n        if not is_onyx_managed_api_key(self._api_key):\n            return\n        # Import here to avoid circular imports\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n        from onyx.db.usage import increment_usage\n        from onyx.db.usage import UsageType\n\n        # Calculate cost in cents\n        cost_cents = calculate_llm_cost_cents(\n            model_name=self._model_version,\n            prompt_tokens=usage.prompt_tokens,\n            completion_tokens=usage.completion_tokens,\n        )\n\n        if cost_cents <= 0:\n            return\n\n        try:\n            with get_session_with_current_tenant() as db_session:\n                increment_usage(db_session, UsageType.LLM_COST, cost_cents)\n                db_session.commit()\n        except Exception as e:\n            # Log but don't fail the LLM call if tracking fails\n            logger.warning(f\"Failed to track LLM cost: {e}\")\n\n    def _completion(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None,\n        tool_choice: ToolChoiceOptions | None,\n        stream: bool,\n        parallel_tool_calls: bool,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        user_identity: LLMUserIdentity | None = None,\n        client: \"HTTPHandler | None\" = None,\n    ) -> Union[\"ModelResponse\", \"CustomStreamWrapper\"]:\n        # Lazy loading to avoid memory bloat for non-inference flows\n        from onyx.llm.litellm_singleton import litellm\n        from litellm.exceptions import Timeout, RateLimitError\n\n        #########################\n        # Flags that modify the final arguments\n        #########################\n        is_claude_model = \"claude\" in self.config.model_name.lower()\n        is_reasoning = model_is_reasoning_model(\n            self.config.model_name, self.config.model_provider\n        )\n        # All OpenAI models will use responses API for consistency\n        # Responses API is needed to get reasoning packets from OpenAI models\n        is_openai_model = is_true_openai_model(\n            self.config.model_provider, self.config.model_name\n        )\n        is_ollama = self._model_provider == LlmProviderNames.OLLAMA_CHAT\n        is_mistral = self._model_provider == LlmProviderNames.MISTRAL\n        is_vertex_ai = self._model_provider == LlmProviderNames.VERTEX_AI\n        # Some Vertex Anthropic models reject output_config.\n        # Keep this guard until LiteLLM/Vertex accept the field for these models.\n        is_vertex_model_rejecting_output_config = (\n            is_vertex_ai\n            and _is_vertex_model_rejecting_output_config(self.config.model_name)\n        )\n\n        #########################\n        # Build arguments\n        #########################\n        # Optional kwargs - should only be passed to LiteLLM under certain conditions\n        optional_kwargs: dict[str, Any] = {}\n\n        # Model name\n        is_bifrost = self._model_provider == LlmProviderNames.BIFROST\n        model_provider = (\n            f\"{self.config.model_provider}/responses\"\n            if is_openai_model  # Uses litellm's completions -> responses bridge\n            else self.config.model_provider\n        )\n        if is_bifrost:\n            # Bifrost expects model names in provider/model format\n            # (e.g. \"anthropic/claude-sonnet-4-6\") sent directly to its\n            # OpenAI-compatible endpoint. We use custom_llm_provider=\"openai\"\n            # so LiteLLM doesn't try to route based on the provider prefix.\n            model = self.config.deployment_name or self.config.model_name\n        else:\n            model = f\"{model_provider}/{self.config.deployment_name or self.config.model_name}\"\n\n        # Tool choice\n        if is_claude_model and tool_choice == ToolChoiceOptions.REQUIRED:\n            # Claude models will not use reasoning if tool_choice is required\n            # let it choose tools automatically so reasoning can still be used\n            tool_choice = ToolChoiceOptions.AUTO\n\n        # If no tools are provided, tool_choice should be None\n        if not tools:\n            tool_choice = None\n\n        # Temperature\n        temperature = 1 if is_reasoning else self._temperature\n\n        if stream and not is_vertex_model_rejecting_output_config:\n            optional_kwargs[\"stream_options\"] = {\"include_usage\": True}\n\n        # Note, there is a reasoning_effort parameter in LiteLLM but it is completely jank and does not work for any\n        # of the major providers. Not setting it sets it to OFF.\n        if (\n            is_reasoning\n            # The default of this parameter not set is surprisingly not the equivalent of an Auto but is actually Off\n            and reasoning_effort != ReasoningEffort.OFF\n            and not is_vertex_model_rejecting_output_config\n        ):\n            if is_openai_model:\n                # OpenAI API does not accept reasoning params for GPT 5 chat models\n                # (neither reasoning nor reasoning_effort are accepted)\n                # even though they are reasoning models (bug in OpenAI)\n                if \"-chat\" not in model:\n                    optional_kwargs[\"reasoning\"] = {\n                        \"effort\": OPENAI_REASONING_EFFORT[reasoning_effort],\n                        \"summary\": \"auto\",\n                    }\n\n            elif is_claude_model:\n                budget_tokens: int | None = ANTHROPIC_REASONING_EFFORT_BUDGET.get(\n                    reasoning_effort\n                )\n\n                # Anthropic requires every assistant message with tool_use\n                # blocks to start with a thinking block that carries a\n                # cryptographic signature.  We don't preserve those blocks\n                # across turns, so skip thinking when the history already\n                # contains tool-calling assistant messages.  LiteLLM's\n                # modify_params workaround doesn't cover all providers\n                # (notably Bedrock).\n                can_enable_thinking = (\n                    budget_tokens is not None\n                    and not _prompt_contains_tool_call_history(prompt)\n                )\n\n                if can_enable_thinking:\n                    assert budget_tokens is not None  # mypy\n                    if max_tokens is not None:\n                        # Anthropic has a weird rule where max token has to be at least as much as budget tokens if set\n                        # and the minimum budget tokens is 1024\n                        # Will note that overwriting a developer set max tokens is not ideal but is the best we can do for now\n                        # It is better to allow the LLM to output more reasoning tokens even if it results in a fairly small tool\n                        # call as compared to reducing the budget for reasoning.\n                        max_tokens = max(budget_tokens + 1, max_tokens)\n                    optional_kwargs[\"thinking\"] = {\n                        \"type\": \"enabled\",\n                        \"budget_tokens\": budget_tokens,\n                    }\n\n                # LiteLLM just does some mapping like this anyway but is incomplete for Anthropic\n                optional_kwargs.pop(\"reasoning_effort\", None)\n\n            else:\n                # Hope for the best from LiteLLM\n                if reasoning_effort in [\n                    ReasoningEffort.LOW,\n                    ReasoningEffort.MEDIUM,\n                    ReasoningEffort.HIGH,\n                ]:\n                    optional_kwargs[\"reasoning_effort\"] = reasoning_effort.value\n                else:\n                    optional_kwargs[\"reasoning_effort\"] = ReasoningEffort.MEDIUM.value\n\n        if tools:\n            # OpenAI will error if parallel_tool_calls is True and tools are not specified\n            optional_kwargs[\"parallel_tool_calls\"] = parallel_tool_calls\n\n        if structured_response_format:\n            optional_kwargs[\"response_format\"] = structured_response_format\n\n        if not (is_claude_model or is_ollama or is_mistral) or is_bifrost:\n            # Litellm bug: tool_choice is dropped silently if not specified here for OpenAI\n            # However, this param breaks Anthropic and Mistral models,\n            # so it must be conditionally included unless the request is\n            # routed through Bifrost's OpenAI-compatible endpoint.\n            # Additionally, tool_choice is not supported by Ollama and causes warnings if included.\n            # See also, https://github.com/ollama/ollama/issues/11171\n            optional_kwargs[\"allowed_openai_params\"] = [\"tool_choice\"]\n\n        # Passthrough kwargs\n        passthrough_kwargs = build_litellm_passthrough_kwargs(\n            model_kwargs=self._model_kwargs,\n            user_identity=user_identity,\n        )\n\n        try:\n            # NOTE: must pass in None instead of empty strings otherwise litellm\n            # can have some issues with bedrock.\n            # NOTE: Sometimes _model_kwargs may have an \"api_key\" kwarg\n            # depending on what the caller passes in for custom_config. If it\n            # does we allow it to clobber _api_key.\n            if \"api_key\" not in passthrough_kwargs:\n                passthrough_kwargs[\"api_key\"] = self._api_key or None\n\n            # We only need to set environment variables if custom config is set\n            env_ctx = (\n                temporary_env_and_lock(self._custom_config)\n                if self._custom_config\n                else nullcontext()\n            )\n            with env_ctx:\n                messages = _prompt_to_dicts(prompt)\n\n                # Bedrock's Converse API requires toolConfig when messages\n                # contain toolUse/toolResult content blocks. When no tools are\n                # provided for this request but the history contains tool\n                # content from previous turns, strip it to plain text.\n                is_bedrock = self._model_provider in {\n                    LlmProviderNames.BEDROCK,\n                    LlmProviderNames.BEDROCK_CONVERSE,\n                }\n                if (\n                    is_bedrock\n                    and not tools\n                    and _messages_contain_tool_content(messages)\n                ):\n                    messages = _strip_tool_content_from_messages(messages)\n\n                # Some models (e.g. Mistral) reject a user message\n                # immediately after a tool message. Insert a synthetic\n                # assistant bridge message to satisfy the ordering\n                # constraint. Check both the provider and the deployment/\n                # model name to catch Mistral hosted on Azure.\n                model_or_deployment = (\n                    self._deployment_name or self._model_version or \"\"\n                ).lower()\n                is_mistral_model = is_mistral or \"mistral\" in model_or_deployment\n                if is_mistral_model:\n                    messages = _fix_tool_user_message_ordering(messages)\n\n                # Only pass tool_choice when tools are present — some providers (e.g. Fireworks)\n                # reject requests where tool_choice is explicitly null.\n                if tools and tool_choice is not None:\n                    optional_kwargs[\"tool_choice\"] = tool_choice\n\n                response = litellm.completion(\n                    mock_response=get_llm_mock_response() or MOCK_LLM_RESPONSE,\n                    model=model,\n                    base_url=self._api_base or None,\n                    api_version=self._api_version or None,\n                    custom_llm_provider=self._custom_llm_provider or None,\n                    messages=messages,\n                    tools=tools,\n                    stream=stream,\n                    temperature=temperature,\n                    timeout=timeout_override or self._timeout,\n                    max_tokens=max_tokens,\n                    client=client,\n                    **optional_kwargs,\n                    **passthrough_kwargs,\n                )\n            return response\n        except Exception as e:\n            # for break pointing\n            if isinstance(e, Timeout):\n                raise LLMTimeoutError(e)\n\n            elif isinstance(e, RateLimitError):\n                raise LLMRateLimitError(e)\n\n            raise e\n\n    @property\n    def config(self) -> LLMConfig:\n        return LLMConfig(\n            model_provider=self._model_provider,\n            model_name=self._model_version,\n            temperature=self._temperature,\n            api_key=self._api_key,\n            api_base=self._api_base,\n            api_version=self._api_version,\n            deployment_name=self._deployment_name,\n            custom_config=self._custom_config,\n            max_input_tokens=self._max_input_tokens,\n        )\n\n    def invoke(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None = None,\n        tool_choice: ToolChoiceOptions | None = None,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        user_identity: LLMUserIdentity | None = None,\n    ) -> ModelResponse:\n        from litellm import HTTPHandler\n        from litellm import ModelResponse as LiteLLMModelResponse\n\n        from onyx.llm.model_response import from_litellm_model_response\n\n        # HTTPHandler Threading & Connection Pool Notes:\n        # =============================================\n        # We create an isolated HTTPHandler ONLY for true OpenAI models (not OpenAI-compatible\n        # providers like glm-4.7, DeepSeek, etc.). This distinction is critical:\n        #\n        # 1. WHY ONLY TRUE OPENAI MODELS:\n        #    - True OpenAI models use litellm's \"responses API\" path which expects HTTPHandler\n        #    - OpenAI-compatible providers (model_provider=\"openai\" with non-OpenAI models)\n        #      use the standard completion path which expects OpenAI SDK client objects\n        #    - Passing HTTPHandler to OpenAI-compatible providers causes:\n        #      AttributeError: 'HTTPHandler' object has no attribute 'api_key'\n        #      (because _get_openai_client() calls openai_client.api_key on line ~929)\n        #\n        # 2. WHY ISOLATED HTTPHandler FOR OPENAI:\n        #    - Prevents \"Bad file descriptor\" errors when multiple threads stream concurrently\n        #    - Shared connection pools can have stale connections or abandoned streams that\n        #      corrupt the pool state for other threads\n        #    - Each request gets its own fresh httpx.Client via HTTPHandler\n        #\n        # 3. WHY OTHER PROVIDERS DON'T NEED THIS:\n        #    - Other providers (Anthropic, Bedrock, etc.) use litellm.module_level_client\n        #      which handles concurrency appropriately\n        #    - httpx.Client itself IS thread-safe for concurrent requests\n        #    - The issue is specific to OpenAI's responses API path and connection reuse\n        #\n        # 4. PITFALL - is_true_openai_model() CHECK:\n        #    - Must use is_true_openai_model() NOT just check model_provider == \"openai\"\n        #    - Many OpenAI-compatible providers set model_provider=\"openai\" but are NOT true\n        #      OpenAI models (glm-4.7, DeepSeek, local proxies, etc.)\n        #    - is_true_openai_model() checks both provider AND model name patterns\n        #\n        # This note may not be entirely accurate as there is a lot of complexity in the LiteLLM codebase around this\n        # and not every model path was traced thoroughly. It is also possible that in future versions of LiteLLM\n        # they will realize that their OpenAI handling is not threadsafe. Hope they will just fix it.\n        client = None\n        if is_true_openai_model(self.config.model_provider, self.config.model_name):\n            client = HTTPHandler(timeout=timeout_override or self._timeout)\n\n        try:\n            # When custom_config is set, env vars are temporarily injected\n            # under a global lock. Using stream=True here means the lock is\n            # only held during connection setup (not the full inference).\n            # The chunks are then collected outside the lock and reassembled\n            # into a single ModelResponse via stream_chunk_builder.\n            from litellm import stream_chunk_builder\n            from litellm import CustomStreamWrapper as LiteLLMCustomStreamWrapper\n\n            stream_response = cast(\n                LiteLLMCustomStreamWrapper,\n                self._completion(\n                    prompt=prompt,\n                    tools=tools,\n                    tool_choice=tool_choice,\n                    stream=True,\n                    structured_response_format=structured_response_format,\n                    timeout_override=timeout_override,\n                    max_tokens=max_tokens,\n                    parallel_tool_calls=True,\n                    reasoning_effort=reasoning_effort,\n                    user_identity=user_identity,\n                    client=client,\n                ),\n            )\n            chunks = list(stream_response)\n            response = cast(\n                LiteLLMModelResponse,\n                stream_chunk_builder(chunks),\n            )\n\n            model_response = from_litellm_model_response(response)\n\n            # Track LLM cost for Onyx-managed API keys\n            if model_response.usage:\n                self._track_llm_cost(model_response.usage)\n\n            return model_response\n        finally:\n            if client is not None:\n                client.close()\n\n    def stream(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None = None,\n        tool_choice: ToolChoiceOptions | None = None,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        user_identity: LLMUserIdentity | None = None,\n    ) -> Iterator[ModelResponseStream]:\n        from litellm import CustomStreamWrapper as LiteLLMCustomStreamWrapper\n        from litellm import HTTPHandler\n\n        from onyx.llm.model_response import from_litellm_model_response_stream\n\n        # HTTPHandler Threading & Connection Pool Notes:\n        # =============================================\n        # See invoke() method for full explanation. Key points for streaming:\n        #\n        # 1. SAME RESTRICTIONS APPLY:\n        #    - HTTPHandler ONLY for true OpenAI models (use is_true_openai_model())\n        #    - OpenAI-compatible providers will fail with AttributeError on api_key\n        #\n        # 2. STREAMING-SPECIFIC CONCERNS:\n        #    - \"Bad file descriptor\" errors are MORE common during streaming because:\n        #      a) Streams hold connections open longer, increasing conflict window\n        #      b) Multiple concurrent streams (e.g., deep research) share the pool\n        #      c) Abandoned/interrupted streams can leave connections in bad state\n        #\n        # 3. ABANDONED STREAM PITFALL:\n        #    - If callers abandon this generator without fully consuming it (e.g.,\n        #      early return, exception, or break), the finally block won't execute\n        #      until the generator is garbage collected\n        #    - This is acceptable because:\n        #      a) CPython's refcounting typically finalizes generators promptly\n        #      b) Each HTTPHandler has its own isolated connection pool\n        #      c) httpx has built-in connection timeouts as a fallback\n        #    - If abandoned streams become problematic, consider using contextlib\n        #      or explicit stream.close() at call sites\n        #\n        # 4. WHY NOT USE SHARED HTTPHandler:\n        #    - litellm's InMemoryCache (used for client caching) is NOT thread-safe\n        #    - Shared pools can have connections corrupted by other threads\n        #    - Per-request HTTPHandler eliminates cross-thread interference\n        client = None\n        if is_true_openai_model(self.config.model_provider, self.config.model_name):\n            client = HTTPHandler(timeout=timeout_override or self._timeout)\n\n        try:\n            response = cast(\n                LiteLLMCustomStreamWrapper,\n                self._completion(\n                    prompt=prompt,\n                    tools=tools,\n                    tool_choice=tool_choice,\n                    stream=True,\n                    structured_response_format=structured_response_format,\n                    timeout_override=timeout_override,\n                    max_tokens=max_tokens,\n                    parallel_tool_calls=True,\n                    reasoning_effort=reasoning_effort,\n                    user_identity=user_identity,\n                    client=client,\n                ),\n            )\n\n            for chunk in response:\n                model_response = from_litellm_model_response_stream(chunk)\n\n                # Track LLM cost when usage info is available (typically in the last chunk)\n                if model_response.usage:\n                    self._track_llm_cost(model_response.usage)\n\n                yield model_response\n        finally:\n            if client is not None:\n                client.close()\n\n\n@contextmanager\ndef temporary_env_and_lock(env_variables: dict[str, str]) -> Iterator[None]:\n    \"\"\"\n    Temporarily sets the environment variables to the given values.\n    Code path is locked while the environment variables are set.\n    Then cleans up the environment and frees the lock.\n    \"\"\"\n    with _env_lock:\n        logger.debug(\"Acquired lock in temporary_env_and_lock\")\n        # Store original values (None if key didn't exist)\n        original_values: dict[str, str | None] = {\n            key: os.environ.get(key) for key in env_variables\n        }\n        try:\n            os.environ.update(env_variables)\n            yield\n        finally:\n            for key, original_value in original_values.items():\n                if original_value is None:\n                    os.environ.pop(key, None)  # Remove if it didn't exist before\n                else:\n                    os.environ[key] = original_value  # Restore original value\n\n    logger.debug(\"Released lock in temporary_env_and_lock\")\n"
  },
  {
    "path": "backend/onyx/llm/override_models.py",
    "content": "\"\"\"Overrides sent over the wire / stored in the DB\n\nNOTE: these models are used in many places, so have to be\nkepy in a separate file to avoid circular imports.\n\"\"\"\n\nfrom pydantic import BaseModel\n\n\nclass LLMOverride(BaseModel):\n    \"\"\"Per-request LLM settings that override persona defaults.\n\n    All fields are optional — only the fields that differ from the persona's\n    configured LLM need to be supplied. Used both over the wire (API requests)\n    and for multi-model comparison, where one override is supplied per model.\n\n    Attributes:\n        model_provider: LLM provider slug (e.g. ``\"openai\"``, ``\"anthropic\"``).\n            When ``None``, the persona's default provider is used.\n        model_version: Specific model version string (e.g. ``\"gpt-4o\"``).\n            When ``None``, the persona's default model is used.\n        temperature: Sampling temperature in ``[0, 2]``. When ``None``, the\n            persona's default temperature is used.\n        display_name: Human-readable label shown in the UI for this model,\n            e.g. ``\"GPT-4 Turbo\"``. Optional; falls back to ``model_version``\n            when not set.\n    \"\"\"\n\n    model_provider: str | None = None\n    model_version: str | None = None\n    temperature: float | None = None\n    display_name: str | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n\nclass PromptOverride(BaseModel):\n    system_prompt: str | None = None\n    task_prompt: str | None = None\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/README.md",
    "content": "# Prompt Caching Framework\n\nA comprehensive prompt-caching mechanism for enabling cost savings across multiple LLM providers by leveraging provider-side prompt token caching.\n\n## Overview\n\nThe prompt caching framework provides a unified interface for enabling prompt caching across different LLM providers. It supports both **implicit caching** (automatic provider-side caching) and **explicit caching** (with cache control parameters).\n\n## Features\n\n- **Provider Support**: OpenAI (implicit), Anthropic (explicit), Vertex AI (explicit)\n- **Flexible Input**: Supports both `str` and `Sequence[ChatCompletionMessage]` inputs\n- **Continuation Handling**: Smart merging of cacheable prefix and suffix messages\n- **Best-Effort**: Gracefully degrades if caching fails\n- **Tenant-Aware**: Automatic tenant isolation for multi-tenant deployments\n- **Configurable**: Enable/disable via environment variable\n\n## Quick Start\n\n### Basic Usage\n\n```python\nfrom onyx.llm.prompt_cache import process_with_prompt_cache\nfrom onyx.llm.models import SystemMessage, UserMessage\n\n# Assume you have an LLM instance with a config property\n# llm = get_your_llm_instance()\n\n# Define cacheable prefix (static context) using Pydantic message models\ncacheable_prefix = [\n    SystemMessage(role=\"system\", content=\"You are a helpful assistant.\"),\n    UserMessage(role=\"user\", content=\"Context: ...\")  # Static context\n]\n\n# Define suffix (dynamic user input)\nsuffix = [UserMessage(role=\"user\", content=\"What is the weather?\")]\n\n# Process with caching - pass llm_config, not the llm instance\nprocessed_prompt, cache_metadata = process_with_prompt_cache(\n    llm_config=llm.config,\n    cacheable_prefix=cacheable_prefix,\n    suffix=suffix,\n    continuation=False,\n)\n\n# Make LLM call with processed prompt\nresponse = llm.invoke(processed_prompt)\n```\n\n### Using String Inputs\n\n```python\n# Both prefix and suffix can be strings\ncacheable_prefix = \"You are a helpful assistant. Context: ...\"\nsuffix = \"What is the weather?\"\n\nprocessed_prompt, cache_metadata = process_with_prompt_cache(\n    llm_config=llm.config,\n    cacheable_prefix=cacheable_prefix,\n    suffix=suffix,\n    continuation=False,\n)\n\nresponse = llm.invoke(processed_prompt)\n```\n\n### Continuation Flag\n\nWhen `continuation=True`, the suffix is appended to the last message of the cacheable prefix:\n\n```python\n# Without continuation (default)\n# Result: [system_msg, prefix_user_msg, suffix_user_msg]\n\n# With continuation=True\n# Result: [system_msg, prefix_user_msg + suffix_user_msg]\nprocessed_prompt, _ = process_with_prompt_cache(\n    llm_config=llm.config,\n    cacheable_prefix=cacheable_prefix,\n    suffix=suffix,\n    continuation=True,  # Merge suffix into last prefix message\n)\n```\n\n**Note**: If `cacheable_prefix` is a string, it remains in its own content block even when `continuation=True`.\n\n## Provider-Specific Behavior\n\n### OpenAI\n- **Caching Type**: Implicit (automatic)\n- **Behavior**: No special parameters needed. Provider automatically caches prefixes >1024 tokens.\n- **Cache Lifetime**: Up to 1 hour\n- **Cost Savings**: 50% discount on cached tokens\n\n### Anthropic\n- **Caching Type**: Explicit (requires `cache_control` parameter)\n- **Behavior**: Automatically adds `cache_control={\"type\": \"ephemeral\"}` to the **last message** of the cacheable prefix\n- **Cache Lifetime**: 5 minutes (default)\n- **Limitations**: Supports up to 4 cache breakpoints\n\n### Vertex AI\n- **Caching Type**: Explicit (with `cache_control` parameter)\n- **Behavior**: Adds `cache_control={\"type\": \"ephemeral\"}` to **all content blocks** in cacheable messages. String content is converted to array format with the cache control attached.\n- **Cache Lifetime**: 5 minutes\n- **Future**: Full context caching with block number management (deferred to future PR)\n\n## Configuration\n\n### Environment Variables\n\n- `ENABLE_PROMPT_CACHING`: Enable/disable prompt caching (default: `true`)\n  ```bash\n  export ENABLE_PROMPT_CACHING=false  # Disable caching\n  ```\n\n## Architecture\n\n### Core Components\n\n1. **`processor.py`**: Main entry point (`process_with_prompt_cache`)\n2. **`cache_manager.py`**: Cache metadata storage and retrieval\n3. **`models.py`**: Pydantic models for cache metadata (`CacheMetadata`)\n4. **`providers/`**: Provider-specific adapters\n5. **`utils.py`**: Shared utility functions\n\n### Provider Adapters\n\nEach provider has its own adapter in `providers/`:\n\n| File | Class | Description |\n|------|-------|-------------|\n| `base.py` | `PromptCacheProvider` | Abstract base class for all providers |\n| `openai.py` | `OpenAIPromptCacheProvider` | Implicit caching (no transformation) |\n| `anthropic.py` | `AnthropicPromptCacheProvider` | Explicit caching with `cache_control` on last message |\n| `vertex.py` | `VertexAIPromptCacheProvider` | Explicit caching with `cache_control` on all content blocks |\n| `noop.py` | `NoOpPromptCacheProvider` | Fallback for unsupported providers |\n\nEach adapter implements:\n- `supports_caching()`: Whether caching is supported\n- `prepare_messages_for_caching()`: Transform messages for caching\n- `extract_cache_metadata()`: Extract metadata from responses\n- `get_cache_ttl_seconds()`: Cache TTL\n\n## Best Practices\n\n1. **Cache Static Content**: Use cacheable prefix for system prompts, static context, and instructions that don't change between requests.\n\n2. **Keep Dynamic Content in Suffix**: User queries, search results, and other dynamic content should be in the suffix.\n\n3. **Monitor Cache Effectiveness**: Check logs for cache hits/misses and adjust your caching strategy accordingly.\n\n4. **Provider Selection**: Different providers have different caching characteristics - choose based on your use case.\n\n## Error Handling\n\nThe framework is **best-effort** - if caching fails, it gracefully falls back to non-cached behavior:\n\n- Cache lookup failures: Logged and continue without caching\n- Provider adapter failures: Fall back to no-op adapter\n- Cache storage failures: Logged and continue (caching is best-effort)\n- Invalid cache metadata: Cleared and proceed without cache\n\n## Future Enhancements\n\n- **Explicit Caching for Vertex AI**: Full block number tracking and management\n- **Cache Analytics**: Detailed metrics on cache effectiveness and cost savings\n- **Advanced Strategies**: More sophisticated cache key generation and invalidation\n- **Distributed Caching**: Shared caches across instances\n\n## Examples\n\nSee `backend/tests/external_dependency_unit/llm/test_prompt_caching.py` for detailed integration test examples.\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/__init__.py",
    "content": "\"\"\"Prompt caching framework for LLM providers.\n\nThis module provides a framework for enabling prompt caching across different\nLLM providers. It supports both implicit caching (automatic provider-side caching)\nand explicit caching (with cache metadata management).\n\"\"\"\n\nfrom onyx.llm.prompt_cache.cache_manager import CacheManager\nfrom onyx.llm.prompt_cache.cache_manager import generate_cache_key_hash\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.processor import process_with_prompt_cache\nfrom onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.factory import get_provider_adapter\nfrom onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider\nfrom onyx.llm.prompt_cache.utils import combine_messages_with_continuation\nfrom onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform\n\n__all__ = [\n    \"AnthropicPromptCacheProvider\",\n    \"CacheManager\",\n    \"CacheMetadata\",\n    \"combine_messages_with_continuation\",\n    \"generate_cache_key_hash\",\n    \"get_provider_adapter\",\n    \"NoOpPromptCacheProvider\",\n    \"OpenAIPromptCacheProvider\",\n    \"prepare_messages_with_cacheable_transform\",\n    \"process_with_prompt_cache\",\n    \"PromptCacheProvider\",\n    \"VertexAIPromptCacheProvider\",\n]\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/cache_manager.py",
    "content": "\"\"\"Cache manager for storing and retrieving prompt cache metadata.\"\"\"\n\nimport hashlib\nimport json\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom onyx.configs.model_configs import PROMPT_CACHE_REDIS_TTL_MULTIPLIER\nfrom onyx.key_value_store.store import PgRedisKVStore\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nREDIS_KEY_PREFIX = \"prompt_cache:\"\n# Cache TTL multiplier - store caches slightly longer than provider TTL\n# This allows for some clock skew and ensures we don't lose cache metadata prematurely\n# Value is configurable via PROMPT_CACHE_REDIS_TTL_MULTIPLIER env var (default: 1.2)\nCACHE_TTL_MULTIPLIER = PROMPT_CACHE_REDIS_TTL_MULTIPLIER\n\n\nclass CacheManager:\n    \"\"\"Manages storage and retrieval of prompt cache metadata.\"\"\"\n\n    def __init__(self, kv_store: PgRedisKVStore | None = None) -> None:\n        \"\"\"Initialize the cache manager.\n\n        Args:\n            kv_store: Optional key-value store. If None, creates a new PgRedisKVStore.\n        \"\"\"\n        self._kv_store = kv_store or PgRedisKVStore()\n\n    def _build_cache_key(\n        self,\n        provider: str,\n        model_name: str,\n        cache_key_hash: str,\n        tenant_id: str | None = None,\n    ) -> str:\n        \"\"\"Build a Redis/PostgreSQL key for cache metadata.\n\n        Args:\n            provider: LLM provider name (e.g., \"openai\", \"anthropic\")\n            model_name: Model name\n            cache_key_hash: Hash of the cacheable prefix content\n            tenant_id: Tenant ID. If None, uses current tenant from context.\n\n        Returns:\n            Cache key string\n        \"\"\"\n        if tenant_id is None:\n            tenant_id = get_current_tenant_id()\n        return f\"{REDIS_KEY_PREFIX}{tenant_id}:{provider}:{model_name}:{cache_key_hash}\"\n\n    def store_cache_metadata(\n        self,\n        metadata: CacheMetadata,\n    ) -> None:\n        \"\"\"Store cache metadata.\n\n        Args:\n            metadata: Cache metadata to store\n            ttl_seconds: Optional TTL in seconds. If None, uses provider default.\n        \"\"\"\n        try:\n            cache_key = self._build_cache_key(\n                metadata.provider,\n                metadata.model_name,\n                metadata.cache_key,\n                metadata.tenant_id,\n            )\n\n            # Update last_accessed timestamp\n            metadata.last_accessed = datetime.now(timezone.utc)\n\n            # Serialize metadata\n            metadata_dict = metadata.model_dump(mode=\"json\")\n\n            # Store in key-value store\n            # Note: PgRedisKVStore doesn't support TTL directly, but Redis will\n            # handle expiration. For PostgreSQL persistence, we rely on cleanup\n            # based on last_accessed timestamp.\n            self._kv_store.store(cache_key, metadata_dict, encrypt=False)\n\n            logger.debug(\n                f\"Stored cache metadata: provider={metadata.provider}, \"\n                f\"model={metadata.model_name}, cache_key={metadata.cache_key[:16]}..., \"\n                f\"tenant_id={metadata.tenant_id}\"\n            )\n        except Exception as e:\n            # Best-effort: log and continue\n            logger.warning(f\"Failed to store cache metadata: {str(e)}\")\n\n    def retrieve_cache_metadata(\n        self,\n        provider: str,\n        model_name: str,\n        cache_key_hash: str,\n        tenant_id: str | None = None,\n    ) -> CacheMetadata | None:\n        \"\"\"Retrieve cache metadata.\n\n        Args:\n            provider: LLM provider name\n            model_name: Model name\n            cache_key_hash: Hash of the cacheable prefix content\n            tenant_id: Tenant ID. If None, uses current tenant from context.\n\n        Returns:\n            CacheMetadata if found, None otherwise\n        \"\"\"\n        try:\n            cache_key = self._build_cache_key(\n                provider, model_name, cache_key_hash, tenant_id\n            )\n            metadata_dict = self._kv_store.load(cache_key, refresh_cache=False)\n\n            # Deserialize metadata\n            metadata = CacheMetadata.model_validate(metadata_dict)\n\n            # Update last_accessed timestamp\n            metadata.last_accessed = datetime.now(timezone.utc)\n            self.store_cache_metadata(metadata)\n\n            logger.debug(\n                f\"Retrieved cache metadata: provider={provider}, \"\n                f\"model={model_name}, cache_key={cache_key_hash[:16]}..., \"\n                f\"tenant_id={tenant_id}\"\n            )\n            return metadata\n        except Exception as e:\n            # Best-effort: log and continue\n            logger.debug(f\"Cache metadata not found or error retrieving: {str(e)}\")\n            return None\n\n    def delete_cache_metadata(\n        self,\n        provider: str,\n        model_name: str,\n        cache_key_hash: str,\n        tenant_id: str | None = None,\n    ) -> None:\n        \"\"\"Delete cache metadata.\n\n        Args:\n            provider: LLM provider name\n            model_name: Model name\n            cache_key_hash: Hash of the cacheable prefix content\n            tenant_id: Tenant ID. If None, uses current tenant from context.\n        \"\"\"\n        try:\n            cache_key = self._build_cache_key(\n                provider, model_name, cache_key_hash, tenant_id\n            )\n            self._kv_store.delete(cache_key)\n            logger.debug(\n                f\"Deleted cache metadata for provider={provider}, model={model_name}, cache_key={cache_key_hash[:16]}...\"\n            )\n        except Exception as e:\n            # Best-effort: log and continue\n            logger.warning(f\"Failed to delete cache metadata: {str(e)}\")\n\n\ndef _make_json_serializable(obj: object) -> object:\n    \"\"\"Recursively convert objects to JSON-serializable types.\n\n    Handles Pydantic models, dicts, lists, and other common types.\n    \"\"\"\n    if hasattr(obj, \"model_dump\"):\n        # Pydantic v2 model\n        return obj.model_dump(mode=\"json\")\n    elif hasattr(obj, \"dict\"):\n        # Pydantic v1 model or similar\n        return _make_json_serializable(obj.dict())\n    elif isinstance(obj, dict):\n        return {k: _make_json_serializable(v) for k, v in obj.items()}\n    elif isinstance(obj, (list, tuple)):\n        return [_make_json_serializable(item) for item in obj]\n    elif isinstance(obj, (str, int, float, bool, type(None))):\n        return obj\n    else:\n        # Fallback: convert to string representation\n        return str(obj)\n\n\ndef generate_cache_key_hash(\n    cacheable_prefix: LanguageModelInput,\n    provider: str,\n    model_name: str,\n    tenant_id: str,\n) -> str:\n    \"\"\"Generate a deterministic cache key hash from cacheable prefix.\n\n    Args:\n        cacheable_prefix: Single message or list of messages to hash\n        provider: LLM provider name\n        model_name: Model name\n        tenant_id: Tenant ID\n\n    Returns:\n        SHA256 hash as hex string\n    \"\"\"\n    # Normalize to list for consistent hashing; _make_json_serializable handles Pydantic models\n    messages = (\n        cacheable_prefix if isinstance(cacheable_prefix, list) else [cacheable_prefix]\n    )\n    messages_dict = [_make_json_serializable(msg) for msg in messages]\n\n    # Serialize messages in a deterministic way\n    # Include only content, roles, and order - exclude timestamps or dynamic fields\n    serialized = json.dumps(\n        {\n            \"messages\": messages_dict,\n            \"provider\": provider,\n            \"model\": model_name,\n            \"tenant_id\": tenant_id,\n        },\n        sort_keys=True,\n        separators=(\",\", \":\"),\n    )\n    return hashlib.sha256(serialized.encode(\"utf-8\")).hexdigest()\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/models.py",
    "content": "\"\"\"Interfaces and data structures for prompt caching.\"\"\"\n\nfrom datetime import datetime\n\nfrom pydantic import BaseModel\n\n\nclass CacheMetadata(BaseModel):\n    \"\"\"Metadata for cached prompt prefixes.\"\"\"\n\n    cache_key: str\n    provider: str\n    model_name: str\n    tenant_id: str\n    created_at: datetime\n    last_accessed: datetime\n    # Provider-specific metadata\n    # TODO: Add explicit caching support in future PR\n    # vertex_block_numbers: dict[str, str] | None = None  # message_hash -> block_number\n    # anthropic_cache_id: str | None = None\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/processor.py",
    "content": "\"\"\"Main processor for prompt caching.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom onyx.configs.model_configs import ENABLE_PROMPT_CACHING\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.prompt_cache.cache_manager import generate_cache_key_hash\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.providers.factory import get_provider_adapter\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\n# TODO: test with a history containing images\ndef process_with_prompt_cache(\n    llm_config: LLMConfig,\n    cacheable_prefix: LanguageModelInput | None,\n    suffix: LanguageModelInput,\n    continuation: bool = False,\n) -> tuple[LanguageModelInput, CacheMetadata | None]:\n    \"\"\"Process prompt with caching support.\n\n    This function takes a cacheable prefix and suffix, processes them according to\n    the LLM provider's caching capabilities, and returns the combined messages\n    ready for LLM API calls along with optional cache metadata.\n\n    Args:\n        llm: The LLM instance (used to determine provider and model)\n        cacheable_prefix: Optional cacheable prefix. If None, no caching is attempted.\n        suffix: The non-cacheable suffix to append\n        continuation: If True, suffix should be appended to the last message\n            of cacheable_prefix rather than being separate messages\n\n    Returns:\n        Tuple of (processed_prompt, cache_metadata_to_store)\n        - processed_prompt: Combined and transformed messages ready for LLM API call\n        - cache_metadata_to_store: Optional cache metadata for post-processing\n            (currently None for implicit caching, will be populated in future PR\n            for explicit caching)\n    \"\"\"\n    # Check if prompt caching is enabled\n    if not ENABLE_PROMPT_CACHING:\n        logger.debug(\"Prompt caching is disabled via configuration\")\n        # Fall back to no-op behavior\n        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\n\n        noop_adapter = NoOpPromptCacheProvider()\n        combined = noop_adapter.prepare_messages_for_caching(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            cache_metadata=None,\n        )\n        return combined, None\n\n    # If no cacheable prefix, return suffix unchanged\n    if cacheable_prefix is None:\n        logger.debug(\"No cacheable prefix provided, skipping caching\")\n        return suffix, None\n\n    # Get provider adapter\n    provider_adapter = get_provider_adapter(llm_config)\n\n    # If provider doesn't support caching, combine and return unchanged\n    if not provider_adapter.supports_caching():\n        logger.debug(\n            f\"Provider {llm_config.model_provider} does not support caching, combining messages without caching\"\n        )\n        # Use no-op adapter to combine messages\n        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\n\n        noop_adapter = NoOpPromptCacheProvider()\n        combined = noop_adapter.prepare_messages_for_caching(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            cache_metadata=None,\n        )\n        return combined, None\n\n    # Generate cache key for cacheable prefix\n    tenant_id = get_current_tenant_id()\n    cache_key_hash = generate_cache_key_hash(\n        cacheable_prefix=cacheable_prefix,\n        provider=llm_config.model_provider,\n        model_name=llm_config.model_name,\n        tenant_id=tenant_id,\n    )\n\n    # For implicit caching: Skip cache lookup (providers handle caching automatically)\n    # TODO (explicit caching - future PR): Look up cache metadata in CacheManager\n    cache_metadata: CacheMetadata | None = None\n\n    # Use provider adapter to prepare messages with caching\n    try:\n        processed_prompt = provider_adapter.prepare_messages_for_caching(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            cache_metadata=cache_metadata,\n        )\n\n        logger.debug(\n            f\"Processed prompt with caching: provider={llm_config.model_provider}, \"\n            f\"model={llm_config.model_name}, cache_key={cache_key_hash[:16]}..., \"\n            f\"continuation={continuation}\"\n        )\n\n        # Create cache metadata for tracking (even for implicit caching)\n        # This allows us to track cache usage and effectiveness\n        cache_metadata = CacheMetadata(\n            cache_key=cache_key_hash,\n            provider=llm_config.model_provider,\n            model_name=llm_config.model_name,\n            tenant_id=tenant_id,\n            created_at=datetime.now(timezone.utc),\n            last_accessed=datetime.now(timezone.utc),\n        )\n\n        return processed_prompt, cache_metadata\n\n    except Exception as e:\n        # Best-effort: log error and fall back to no-op behavior\n        logger.warning(\n            f\"Error processing prompt with caching for provider={llm_config.model_provider}: {str(e)}. \"\n            \"Falling back to non-cached behavior.\"\n        )\n        # Fall back to no-op adapter\n        from onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\n\n        noop_adapter = NoOpPromptCacheProvider()\n        combined = noop_adapter.prepare_messages_for_caching(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            cache_metadata=None,\n        )\n        return combined, None\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/__init__.py",
    "content": "\"\"\"Provider adapters for prompt caching.\"\"\"\n\nfrom onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.factory import get_provider_adapter\nfrom onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider\n\n__all__ = [\n    \"AnthropicPromptCacheProvider\",\n    \"get_provider_adapter\",\n    \"NoOpPromptCacheProvider\",\n    \"OpenAIPromptCacheProvider\",\n    \"PromptCacheProvider\",\n    \"VertexAIPromptCacheProvider\",\n]\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/anthropic.py",
    "content": "\"\"\"Anthropic provider adapter for prompt caching.\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform\nfrom onyx.llm.prompt_cache.utils import revalidate_message_from_original\n\n\ndef _add_anthropic_cache_control(\n    messages: Sequence[ChatCompletionMessage],\n) -> Sequence[ChatCompletionMessage]:\n    \"\"\"Add cache_control parameter to messages for Anthropic caching.\n\n    Args:\n        messages: Messages to transform\n\n    Returns:\n        Messages with cache_control added\n    \"\"\"\n    last_message_dict = dict(messages[-1])\n    last_message_dict[\"cache_control\"] = {\"type\": \"ephemeral\"}\n    last_message = revalidate_message_from_original(\n        original=messages[-1], mutated=last_message_dict\n    )\n    return list(messages[:-1]) + [last_message]\n\n\nclass AnthropicPromptCacheProvider(PromptCacheProvider):\n    \"\"\"Anthropic adapter for prompt caching (explicit caching with cache_control).\n    implicit caching = just need to ensure byte-equivalent prefixes, and the provider\n                       auto-detects and reuses them.\n    explicit caching = the caller must do _something_ to enable provider-side caching.\n    In this case, anthropic supports explicit caching via the cache_control parameter:\n    https://platform.claude.com/docs/en/build-with-claude/prompt-caching\n    \"\"\"\n\n    def supports_caching(self) -> bool:\n        \"\"\"Anthropic supports explicit prompt caching.\"\"\"\n        return True\n\n    def prepare_messages_for_caching(\n        self,\n        cacheable_prefix: LanguageModelInput | None,\n        suffix: LanguageModelInput,\n        continuation: bool,\n        cache_metadata: CacheMetadata | None,  # noqa: ARG002\n    ) -> LanguageModelInput:\n        \"\"\"Prepare messages for Anthropic caching.\n\n        Anthropic requires cache_control parameter on cacheable messages.\n        We add cache_control={\"type\": \"ephemeral\"} to all cacheable prefix messages.\n\n        Args:\n            cacheable_prefix: Optional cacheable prefix\n            suffix: Non-cacheable suffix\n            continuation: Whether to append suffix to last prefix message\n            cache_metadata: Cache metadata (for future explicit caching support)\n\n        Returns:\n            Combined messages with cache_control on cacheable messages\n        \"\"\"\n        return prepare_messages_with_cacheable_transform(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            transform_cacheable=_add_anthropic_cache_control,\n        )\n\n    def extract_cache_metadata(\n        self,\n        response: dict,  # noqa: ARG002\n        cache_key: str,  # noqa: ARG002\n    ) -> CacheMetadata | None:\n        \"\"\"Extract cache metadata from Anthropic response.\n\n        Anthropic may return cache identifiers in the response.\n        For now, we don't extract detailed metadata (future explicit caching support).\n\n        Args:\n            response: Anthropic API response dictionary\n            cache_key: Cache key used for this request\n\n        Returns:\n            CacheMetadata if extractable, None otherwise\n        \"\"\"\n        # TODO: Extract cache identifiers from response when implementing explicit caching\n        return None\n\n    def get_cache_ttl_seconds(self) -> int:\n        \"\"\"Get cache TTL for Anthropic (5 minutes default).\"\"\"\n        return 300\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/base.py",
    "content": "\"\"\"Base interface for provider-specific prompt caching adapters.\"\"\"\n\nfrom abc import ABC\nfrom abc import abstractmethod\n\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.prompt_cache.models import CacheMetadata\n\n\nclass PromptCacheProvider(ABC):\n    \"\"\"Abstract base class for provider-specific prompt caching logic.\"\"\"\n\n    @abstractmethod\n    def supports_caching(self) -> bool:\n        \"\"\"Whether this provider supports prompt caching.\n\n        Returns:\n            True if caching is supported, False otherwise\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def prepare_messages_for_caching(\n        self,\n        cacheable_prefix: LanguageModelInput | None,\n        suffix: LanguageModelInput,\n        continuation: bool,\n        cache_metadata: CacheMetadata | None,\n    ) -> LanguageModelInput:\n        \"\"\"Transform messages to enable caching.\n\n        Args:\n            cacheable_prefix: Optional cacheable prefix (can be str or Sequence[ChatCompletionMessage])\n            suffix: Non-cacheable suffix (can be str or Sequence[ChatCompletionMessage])\n            continuation: If True, suffix should be appended to the last message\n                of cacheable_prefix rather than being separate messages.\n                Note: When cacheable_prefix is a string, it should remain in its own\n                content block even if continuation=True.\n            cache_metadata: Optional cache metadata from previous requests\n\n        Returns:\n            Combined and transformed messages ready for LLM API call\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def extract_cache_metadata(\n        self,\n        response: dict,  # Provider-specific response object\n        cache_key: str,\n    ) -> CacheMetadata | None:\n        \"\"\"Extract cache metadata from API response.\n\n        Args:\n            response: Provider-specific response dictionary\n            cache_key: Cache key used for this request\n\n        Returns:\n            CacheMetadata if extractable, None otherwise\n        \"\"\"\n        raise NotImplementedError\n\n    @abstractmethod\n    def get_cache_ttl_seconds(self) -> int:\n        \"\"\"Get cache TTL in seconds for this provider.\n\n        Returns:\n            TTL in seconds\n        \"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/factory.py",
    "content": "\"\"\"Factory for creating provider-specific prompt cache adapters.\"\"\"\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.prompt_cache.providers.anthropic import AnthropicPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.noop import NoOpPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.openai import OpenAIPromptCacheProvider\nfrom onyx.llm.prompt_cache.providers.vertex import VertexAIPromptCacheProvider\n\nANTHROPIC_BEDROCK_TAG = \"anthropic.\"\n\n\ndef get_provider_adapter(llm_config: LLMConfig) -> PromptCacheProvider:\n    \"\"\"Get the appropriate prompt cache provider adapter for a given provider.\n\n    Args:\n        provider: Provider name (e.g., \"openai\", \"anthropic\", \"vertex_ai\")\n\n    Returns:\n        PromptCacheProvider instance for the given provider\n    \"\"\"\n    if llm_config.model_provider == LlmProviderNames.OPENAI:\n        return OpenAIPromptCacheProvider()\n    elif llm_config.model_provider == LlmProviderNames.ANTHROPIC or (\n        llm_config.model_provider == LlmProviderNames.BEDROCK\n        and ANTHROPIC_BEDROCK_TAG in llm_config.model_name\n    ):\n        return AnthropicPromptCacheProvider()\n    elif llm_config.model_provider == LlmProviderNames.VERTEX_AI:\n        return VertexAIPromptCacheProvider()\n    else:\n        # Default to no-op for providers without caching support\n        return NoOpPromptCacheProvider()\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/noop.py",
    "content": "\"\"\"No-op provider adapter for providers without caching support.\"\"\"\n\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform\n\n\nclass NoOpPromptCacheProvider(PromptCacheProvider):\n    \"\"\"No-op adapter for providers that don't support prompt caching.\"\"\"\n\n    def supports_caching(self) -> bool:\n        \"\"\"No-op providers don't support caching.\"\"\"\n        return False\n\n    def prepare_messages_for_caching(\n        self,\n        cacheable_prefix: LanguageModelInput | None,\n        suffix: LanguageModelInput,\n        continuation: bool,\n        cache_metadata: CacheMetadata | None,  # noqa: ARG002\n    ) -> LanguageModelInput:\n        \"\"\"Return messages unchanged (no caching support).\n\n        Args:\n            cacheable_prefix: Optional cacheable prefix (can be str or Sequence[ChatCompletionMessage])\n            suffix: Non-cacheable suffix (can be str or Sequence[ChatCompletionMessage])\n            continuation: Whether to append suffix to last prefix message.\n                Note: When cacheable_prefix is a string, it remains in its own content block.\n            cache_metadata: Cache metadata (ignored)\n\n        Returns:\n            Combined messages (prefix + suffix)\n        \"\"\"\n        # No transformation needed for no-op provider\n        return prepare_messages_with_cacheable_transform(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            transform_cacheable=None,\n        )\n\n    def extract_cache_metadata(\n        self,\n        response: dict,  # noqa: ARG002\n        cache_key: str,  # noqa: ARG002\n    ) -> CacheMetadata | None:\n        \"\"\"No cache metadata to extract.\"\"\"\n        return None\n\n    def get_cache_ttl_seconds(self) -> int:\n        \"\"\"Return default TTL (not used for no-op).\"\"\"\n        return 0\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/openai.py",
    "content": "\"\"\"OpenAI provider adapter for prompt caching.\"\"\"\n\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform\n\n\nclass OpenAIPromptCacheProvider(PromptCacheProvider):\n    \"\"\"OpenAI adapter for prompt caching (implicit caching).\"\"\"\n\n    def supports_caching(self) -> bool:\n        \"\"\"OpenAI supports automatic prompt caching.\"\"\"\n        return True\n\n    def prepare_messages_for_caching(\n        self,\n        cacheable_prefix: LanguageModelInput | None,\n        suffix: LanguageModelInput,\n        continuation: bool,\n        cache_metadata: CacheMetadata | None,  # noqa: ARG002\n    ) -> LanguageModelInput:\n        \"\"\"Prepare messages for OpenAI caching.\n\n        OpenAI handles caching automatically, so we just normalize and combine\n        the messages. The provider will automatically cache prefixes >1024 tokens.\n\n        Args:\n            cacheable_prefix: Optional cacheable prefix\n            suffix: Non-cacheable suffix\n            continuation: Whether to append suffix to last prefix message\n            cache_metadata: Cache metadata (ignored for implicit caching)\n\n        Returns:\n            Combined messages ready for LLM API call\n        \"\"\"\n        # No transformation needed for OpenAI (implicit caching)\n        return prepare_messages_with_cacheable_transform(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            transform_cacheable=None,\n        )\n\n    def extract_cache_metadata(\n        self,\n        response: dict,  # noqa: ARG002\n        cache_key: str,  # noqa: ARG002\n    ) -> CacheMetadata | None:\n        \"\"\"Extract cache metadata from OpenAI response.\n\n        OpenAI responses may include cached_tokens in the usage field.\n        For implicit caching, we don't need to store much metadata.\n\n        Args:\n            response: OpenAI API response dictionary\n            cache_key: Cache key used for this request\n\n        Returns:\n            CacheMetadata if extractable, None otherwise\n        \"\"\"\n        # For implicit caching, OpenAI handles everything automatically\n        # We could extract cached_tokens from response.get(\"usage\", {}).get(\"cached_tokens\")\n        # but for now, we don't need to store metadata for implicit caching\n        return None\n\n    def get_cache_ttl_seconds(self) -> int:\n        \"\"\"Get cache TTL for OpenAI (1 hour max).\"\"\"\n        return 3600\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/providers/vertex.py",
    "content": "\"\"\"Vertex AI provider adapter for prompt caching.\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.prompt_cache.models import CacheMetadata\nfrom onyx.llm.prompt_cache.providers.base import PromptCacheProvider\nfrom onyx.llm.prompt_cache.utils import prepare_messages_with_cacheable_transform\nfrom onyx.llm.prompt_cache.utils import revalidate_message_from_original\n\n\nclass VertexAIPromptCacheProvider(PromptCacheProvider):\n    \"\"\"Vertex AI adapter for prompt caching (implicit caching for this PR).\"\"\"\n\n    def supports_caching(self) -> bool:\n        \"\"\"Vertex AI supports prompt caching (implicit and explicit).\"\"\"\n        return True\n\n    def prepare_messages_for_caching(\n        self,\n        cacheable_prefix: LanguageModelInput | None,\n        suffix: LanguageModelInput,\n        continuation: bool,\n        cache_metadata: CacheMetadata | None,  # noqa: ARG002\n    ) -> LanguageModelInput:\n        \"\"\"Prepare messages for Vertex AI caching.\n\n        For implicit caching we attach cache_control={\"type\": \"ephemeral\"} to every\n        cacheable prefix message so Vertex/Gemini can reuse them automatically.\n        Explicit context caching (with cache blocks) will be added in a future PR.\n\n        Args:\n            cacheable_prefix: Optional cacheable prefix\n            suffix: Non-cacheable suffix\n            continuation: Whether to append suffix to last prefix message\n            cache_metadata: Cache metadata (for future explicit caching support)\n\n        Returns:\n            Combined messages ready for LLM API call\n        \"\"\"\n        # For implicit caching, no transformation needed (Vertex handles caching automatically)\n        # TODO (explicit caching - future PR):\n        # - Check cache_metadata for vertex_block_numbers\n        # - Create transform function that replaces messages with cache_block_id if available\n        # - Or adds cache_control parameter if not using cached blocks\n        return prepare_messages_with_cacheable_transform(\n            cacheable_prefix=cacheable_prefix,\n            suffix=suffix,\n            continuation=continuation,\n            transform_cacheable=None,  # TODO: support explicit caching\n        )\n\n    def extract_cache_metadata(\n        self,\n        response: dict,  # noqa: ARG002\n        cache_key: str,  # noqa: ARG002\n    ) -> CacheMetadata | None:\n        \"\"\"Extract cache metadata from Vertex AI response.\n\n        For this PR (implicit caching): Extract basic cache usage info if available.\n        TODO (explicit caching - future PR): Extract block numbers from response\n        and store in metadata.\n\n        Args:\n            response: Vertex AI API response dictionary\n            cache_key: Cache key used for this request\n\n        Returns:\n            CacheMetadata if extractable, None otherwise\n        \"\"\"\n        # For implicit caching, Vertex handles everything automatically\n        # TODO (explicit caching - future PR):\n        # - Extract cache block numbers from response\n        # - Store in cache_metadata.vertex_block_numbers\n        return None\n\n    def get_cache_ttl_seconds(self) -> int:\n        \"\"\"Get cache TTL for Vertex AI (5 minutes).\"\"\"\n        return 300\n\n\ndef _add_vertex_cache_control(\n    messages: Sequence[ChatCompletionMessage],\n) -> Sequence[ChatCompletionMessage]:\n    \"\"\"Add cache_control inside content blocks for Vertex AI/Gemini caching.\n\n    Gemini requires cache_control to be on a content block within the content array,\n    not at the message level. This function converts string content to the array format\n    and adds cache_control to the last content block in each cacheable message.\n    \"\"\"\n    # NOTE: unfortunately we need a much more sophisticated mechnism to support\n    # explict caching with vertex in the presence of tools and system messages\n    # (since they're supposed to be stripped out when setting cache_control)\n    # so we're deferring this to a future PR.\n    updated: list[ChatCompletionMessage] = []\n    for message in messages:\n        mutated = dict(message)\n        content = mutated.get(\"content\")\n\n        if isinstance(content, str):\n            # Convert string content to array format with cache_control\n            mutated[\"content\"] = [\n                {\n                    \"type\": \"text\",\n                    \"text\": content,\n                    \"cache_control\": {\"type\": \"ephemeral\"},\n                }\n            ]\n        elif isinstance(content, list) and content:\n            # Content is already an array - add cache_control to last block\n            new_content = []\n            for i, block in enumerate(content):\n                if isinstance(block, dict):\n                    block_copy = dict(block)\n                    # Add cache_control to the last content block\n                    if i == len(content) - 1:\n                        block_copy[\"cache_control\"] = {\"type\": \"ephemeral\"}\n                    new_content.append(block_copy)\n                else:\n                    new_content.append(block)\n            mutated[\"content\"] = new_content\n\n        updated.append(revalidate_message_from_original(message, mutated))\n    return updated\n"
  },
  {
    "path": "backend/onyx/llm/prompt_cache/utils.py",
    "content": "# pyright: reportMissingTypeStubs=false\n\"\"\"Utility functions for prompt caching.\"\"\"\n\nimport json\nfrom collections.abc import Callable\nfrom collections.abc import Sequence\nfrom typing import Any\n\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef combine_messages_with_continuation(\n    prefix_msgs: Sequence[ChatCompletionMessage],\n    suffix_msgs: Sequence[ChatCompletionMessage],\n    continuation: bool,\n) -> list[ChatCompletionMessage]:\n    \"\"\"Combine prefix and suffix messages, handling continuation flag.\n\n    Args:\n        prefix_msgs: Normalized cacheable prefix messages\n        suffix_msgs: Normalized suffix messages\n        continuation: If True, append suffix content to the last message of prefix\n        was_prefix_string: Deprecated, no longer used\n\n    Returns:\n        Combined messages\n    \"\"\"\n    if not continuation or not prefix_msgs:\n        return list(prefix_msgs) + list(suffix_msgs)\n    # Append suffix content to last message of prefix\n    result = list(prefix_msgs)\n    last_msg = dict(result[-1])\n    suffix_first = dict(suffix_msgs[0]) if suffix_msgs else {}\n\n    # Combine content\n    if \"content\" in last_msg and \"content\" in suffix_first:\n        if isinstance(last_msg[\"content\"], str) and isinstance(\n            suffix_first[\"content\"], str\n        ):\n            last_msg[\"content\"] = last_msg[\"content\"] + suffix_first[\"content\"]\n        else:\n            # Handle list content (multimodal)\n            prefix_content = (\n                last_msg[\"content\"]\n                if isinstance(last_msg[\"content\"], list)\n                else [{\"type\": \"text\", \"text\": last_msg[\"content\"]}]\n            )\n            suffix_content = (\n                suffix_first[\"content\"]\n                if isinstance(suffix_first[\"content\"], list)\n                else [{\"type\": \"text\", \"text\": suffix_first[\"content\"]}]\n            )\n            last_msg[\"content\"] = prefix_content + suffix_content\n\n    result[-1] = revalidate_message_from_original(original=result[-1], mutated=last_msg)\n    result.extend(suffix_msgs[1:])\n    return result\n\n\ndef revalidate_message_from_original(\n    original: ChatCompletionMessage,\n    mutated: dict[str, Any],\n) -> ChatCompletionMessage:\n    \"\"\"Rebuild a mutated message using the original BaseModel type.\n\n    Some providers need to add cache metadata to messages. Re-run validation against\n    the original message's Pydantic class so union discrimination (by role) stays\n    intact.\n    \"\"\"\n    cls = original.__class__\n    try:\n        return cls.model_validate_json(json.dumps(mutated))\n    except Exception:\n        return cls.model_validate(mutated)\n\n\ndef prepare_messages_with_cacheable_transform(\n    cacheable_prefix: LanguageModelInput | None,\n    suffix: LanguageModelInput,\n    continuation: bool,\n    transform_cacheable: (\n        Callable[[Sequence[ChatCompletionMessage]], Sequence[ChatCompletionMessage]]\n        | None\n    ) = None,\n) -> LanguageModelInput:\n    \"\"\"Prepare messages for caching with optional transformation of cacheable prefix.\n\n    This is a shared utility that handles the common flow:\n    1. Normalize inputs\n    2. Optionally transform cacheable messages\n    3. Combine with continuation handling\n\n    Args:\n        cacheable_prefix: Optional cacheable prefix\n        suffix: Non-cacheable suffix\n        continuation: Whether to append suffix to last prefix message\n        transform_cacheable: Optional function to transform cacheable messages\n            (e.g., add cache_control parameter). If None, messages are used as-is.\n\n    Returns:\n        Combined messages ready for LLM API call\n    \"\"\"\n    if cacheable_prefix is None:\n        return suffix\n\n    prefix_msgs = (\n        cacheable_prefix if isinstance(cacheable_prefix, list) else [cacheable_prefix]\n    )\n    suffix_msgs = suffix if isinstance(suffix, list) else [suffix]\n\n    # Apply transformation to cacheable messages if provided\n    if transform_cacheable is not None:\n        prefix_msgs = list(transform_cacheable(prefix_msgs))\n\n    return combine_messages_with_continuation(\n        prefix_msgs=prefix_msgs, suffix_msgs=suffix_msgs, continuation=continuation\n    )\n"
  },
  {
    "path": "backend/onyx/llm/request_context.py",
    "content": "import contextvars\n\n\n_LLM_MOCK_RESPONSE_CONTEXTVAR: contextvars.ContextVar[str | None] = (\n    contextvars.ContextVar(\"llm_mock_response\", default=None)\n)\n\n\ndef get_llm_mock_response() -> str | None:\n    return _LLM_MOCK_RESPONSE_CONTEXTVAR.get()\n\n\ndef set_llm_mock_response(mock_response: str | None) -> contextvars.Token[str | None]:\n    return _LLM_MOCK_RESPONSE_CONTEXTVAR.set(mock_response)\n\n\ndef reset_llm_mock_response(token: contextvars.Token[str | None]) -> None:\n    try:\n        _LLM_MOCK_RESPONSE_CONTEXTVAR.reset(token)\n    except ValueError:\n        # Streaming requests can cross execution contexts.\n        # Best effort clear to avoid crashing request teardown in integration mode.\n        _LLM_MOCK_RESPONSE_CONTEXTVAR.set(None)\n"
  },
  {
    "path": "backend/onyx/llm/utils.py",
    "content": "import copy\nimport re\nfrom collections.abc import Callable\nfrom functools import lru_cache\nfrom typing import Any\nfrom typing import cast\nfrom typing import TYPE_CHECKING\n\nfrom sqlalchemy import select\n\nfrom onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS\nfrom onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION\nfrom onyx.configs.app_configs import SEND_USER_METADATA_TO_LLM_PROVIDER\nfrom onyx.configs.app_configs import USE_CHUNK_SUMMARY\nfrom onyx.configs.app_configs import USE_DOCUMENT_SUMMARY\nfrom onyx.configs.model_configs import GEN_AI_MAX_TOKENS\nfrom onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS\nfrom onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.models import LLMProvider\nfrom onyx.db.models import ModelConfiguration\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_TOKEN_ESTIMATE\nfrom onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_TOKEN_ESTIMATE\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\n\n\nif TYPE_CHECKING:\n    from onyx.server.manage.llm.models import LLMProviderView\n\n\nlogger = setup_logger()\n\nMAX_CONTEXT_TOKENS = 100\nONE_MILLION = 1_000_000\nCHUNKS_PER_DOC_ESTIMATE = 5\nMAX_LITELLM_USER_ID_LENGTH = 64\n_TWELVE_LABS_PEGASUS_MODEL_NAMES = [\n    \"us.twelvelabs.pegasus-1-2-v1:0\",\n    \"us.twelvelabs.pegasus-1-2-v1\",\n    \"twelvelabs/us.twelvelabs.pegasus-1-2-v1:0\",\n    \"twelvelabs/us.twelvelabs.pegasus-1-2-v1\",\n]\n_TWELVE_LABS_PEGASUS_OUTPUT_TOKENS = max(512, GEN_AI_MODEL_FALLBACK_MAX_TOKENS // 4)\nCUSTOM_LITELLM_MODEL_OVERRIDES: dict[str, dict[str, Any]] = {\n    model_name: {\n        \"max_input_tokens\": GEN_AI_MODEL_FALLBACK_MAX_TOKENS,\n        \"max_output_tokens\": _TWELVE_LABS_PEGASUS_OUTPUT_TOKENS,\n        \"max_tokens\": GEN_AI_MODEL_FALLBACK_MAX_TOKENS,\n        \"supports_reasoning\": False,\n        \"supports_vision\": False,\n    }\n    for model_name in _TWELVE_LABS_PEGASUS_MODEL_NAMES\n}\n\n\ndef truncate_litellm_user_id(user_id: str) -> str:\n    \"\"\"Truncate the LiteLLM `user` field maximum length.\"\"\"\n    if len(user_id) <= MAX_LITELLM_USER_ID_LENGTH:\n        return user_id\n    logger.warning(\n        \"User's ID exceeds %d chars (len=%d); truncating for Litellm logging compatibility.\",\n        MAX_LITELLM_USER_ID_LENGTH,\n        len(user_id),\n    )\n    return user_id[:MAX_LITELLM_USER_ID_LENGTH]\n\n\ndef build_litellm_passthrough_kwargs(\n    model_kwargs: dict[str, Any],\n    user_identity: LLMUserIdentity | None,\n) -> dict[str, Any]:\n    \"\"\"Build kwargs passed through directly to LiteLLM.\n\n    Returns `model_kwargs` unchanged unless we need to add user/session metadata,\n    in which case a copy is returned to avoid cross-request mutation.\n    \"\"\"\n\n    if not (SEND_USER_METADATA_TO_LLM_PROVIDER and user_identity):\n        return model_kwargs\n\n    passthrough_kwargs = copy.deepcopy(model_kwargs)\n\n    if user_identity.user_id:\n        passthrough_kwargs[\"user\"] = truncate_litellm_user_id(user_identity.user_id)\n\n    if user_identity.session_id:\n        existing_metadata = passthrough_kwargs.get(\"metadata\")\n        metadata: dict[str, Any] | None\n        if existing_metadata is None:\n            metadata = {}\n        elif isinstance(existing_metadata, dict):\n            metadata = copy.deepcopy(existing_metadata)\n        else:\n            metadata = None\n\n        if metadata is not None:\n            metadata[\"session_id\"] = user_identity.session_id\n            passthrough_kwargs[\"metadata\"] = metadata\n\n    return passthrough_kwargs\n\n\ndef _unwrap_nested_exception(error: Exception) -> Exception:\n    \"\"\"\n    Traverse common exception wrappers to surface the underlying LiteLLM error.\n    \"\"\"\n    visited: set[int] = set()\n    current = error\n    for _ in range(100):\n        visited.add(id(current))\n        candidate: Exception | None = None\n        cause = getattr(current, \"__cause__\", None)\n        if isinstance(cause, Exception):\n            candidate = cause\n        elif (\n            hasattr(current, \"args\")\n            and len(getattr(current, \"args\")) == 1\n            and isinstance(current.args[0], Exception)\n        ):\n            candidate = current.args[0]\n        if candidate is None or id(candidate) in visited:\n            break\n        current = candidate\n    return current\n\n\ndef litellm_exception_to_error_msg(\n    e: Exception,\n    llm: LLM,\n    fallback_to_error_msg: bool = False,\n    custom_error_msg_mappings: (\n        dict[str, str] | None\n    ) = LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS,\n) -> tuple[str, str, bool]:\n    \"\"\"Convert a LiteLLM exception to a user-friendly error message with classification.\n\n    Returns:\n        tuple: (error_message, error_code, is_retryable)\n            - error_message: User-friendly error description\n            - error_code: Categorized error code for frontend display\n            - is_retryable: Whether the user should try again\n    \"\"\"\n    from litellm.exceptions import BadRequestError\n    from litellm.exceptions import AuthenticationError\n    from litellm.exceptions import PermissionDeniedError\n    from litellm.exceptions import NotFoundError\n    from litellm.exceptions import UnprocessableEntityError\n    from litellm.exceptions import RateLimitError\n    from litellm.exceptions import ContextWindowExceededError\n    from litellm.exceptions import APIConnectionError\n    from litellm.exceptions import APIError\n    from litellm.exceptions import Timeout\n    from litellm.exceptions import ContentPolicyViolationError\n    from litellm.exceptions import BudgetExceededError\n    from litellm.exceptions import ServiceUnavailableError\n\n    core_exception = _unwrap_nested_exception(e)\n    error_msg = str(core_exception)\n    error_code = \"UNKNOWN_ERROR\"\n    is_retryable = True\n\n    if custom_error_msg_mappings:\n        for error_msg_pattern, custom_error_msg in custom_error_msg_mappings.items():\n            if error_msg_pattern in error_msg:\n                return custom_error_msg, \"CUSTOM_ERROR\", True\n\n    if isinstance(core_exception, BadRequestError):\n        error_msg = \"Bad request: The server couldn't process your request. Please check your input.\"\n        error_code = \"BAD_REQUEST\"\n        is_retryable = True\n    elif isinstance(core_exception, AuthenticationError):\n        error_msg = \"Authentication failed: Please check your API key and credentials.\"\n        error_code = \"AUTH_ERROR\"\n        is_retryable = False\n    elif isinstance(core_exception, PermissionDeniedError):\n        error_msg = (\n            \"Permission denied: You don't have the necessary permissions for this operation. \"\n            \"Ensure you have access to this model.\"\n        )\n        error_code = \"PERMISSION_DENIED\"\n        is_retryable = False\n    elif isinstance(core_exception, NotFoundError):\n        error_msg = \"Resource not found: The requested resource doesn't exist.\"\n        error_code = \"NOT_FOUND\"\n        is_retryable = False\n    elif isinstance(core_exception, UnprocessableEntityError):\n        error_msg = \"Unprocessable entity: The server couldn't process your request due to semantic errors.\"\n        error_code = \"UNPROCESSABLE_ENTITY\"\n        is_retryable = True\n    elif isinstance(core_exception, RateLimitError):\n        provider_name = (\n            llm.config.model_provider\n            if llm is not None and llm.config.model_provider\n            else \"The LLM provider\"\n        )\n        upstream_detail: str | None = None\n        message_attr = getattr(core_exception, \"message\", None)\n        if message_attr:\n            upstream_detail = str(message_attr)\n        elif hasattr(core_exception, \"api_error\"):\n            api_error = core_exception.api_error\n            if isinstance(api_error, dict):\n                upstream_detail = (\n                    api_error.get(\"message\")\n                    or api_error.get(\"detail\")\n                    or api_error.get(\"error\")\n                )\n        if not upstream_detail:\n            upstream_detail = str(core_exception)\n        upstream_detail = str(upstream_detail).strip()\n        if \":\" in upstream_detail and upstream_detail.lower().startswith(\n            \"ratelimiterror\"\n        ):\n            upstream_detail = upstream_detail.split(\":\", 1)[1].strip()\n        upstream_detail_lower = upstream_detail.lower()\n        if (\n            \"insufficient_quota\" in upstream_detail_lower\n            or \"exceeded your current quota\" in upstream_detail_lower\n        ):\n            error_msg = (\n                f\"{provider_name} quota exceeded: {upstream_detail}\"\n                if upstream_detail\n                else f\"{provider_name} quota exceeded: Verify billing and quota for this API key.\"\n            )\n            error_code = \"BUDGET_EXCEEDED\"\n            is_retryable = False\n        else:\n            error_msg = (\n                f\"{provider_name} rate limit: {upstream_detail}\"\n                if upstream_detail\n                else f\"{provider_name} rate limit exceeded: Please slow down your requests and try again later.\"\n            )\n            error_code = \"RATE_LIMIT\"\n            is_retryable = True\n    elif isinstance(core_exception, ServiceUnavailableError):\n        provider_name = (\n            llm.config.model_provider\n            if llm is not None and llm.config.model_provider\n            else \"The LLM provider\"\n        )\n        # Check if this is specifically the Bedrock \"Too many connections\" error\n        if \"Too many connections\" in error_msg or \"BedrockException\" in error_msg:\n            error_msg = (\n                f\"{provider_name} is experiencing high connection volume and cannot process your request right now. \"\n                \"This typically happens when there are too many simultaneous requests to the AI model. \"\n                \"Please wait a moment and try again. If this persists, contact your system administrator \"\n                \"to review connection limits and retry configurations.\"\n            )\n        else:\n            # Generic 503 Service Unavailable\n            error_msg = f\"{provider_name} service error: {str(core_exception)}\"\n        error_code = \"SERVICE_UNAVAILABLE\"\n        is_retryable = True\n    elif isinstance(core_exception, ContextWindowExceededError):\n        error_msg = (\n            \"Context window exceeded: Your input is too long for the model to process.\"\n        )\n        if llm is not None:\n            try:\n                max_context = get_max_input_tokens(\n                    model_name=llm.config.model_name,\n                    model_provider=llm.config.model_provider,\n                )\n                error_msg += f\" Your invoked model ({llm.config.model_name}) has a maximum context size of {max_context}.\"\n            except Exception:\n                logger.warning(\n                    \"Unable to get maximum input token for LiteLLM exception handling\"\n                )\n        error_code = \"CONTEXT_TOO_LONG\"\n        is_retryable = False\n    elif isinstance(core_exception, ContentPolicyViolationError):\n        error_msg = \"Content policy violation: Your request violates the content policy. Please revise your input.\"\n        error_code = \"CONTENT_POLICY\"\n        is_retryable = False\n    elif isinstance(core_exception, APIConnectionError):\n        error_msg = \"API connection error: Failed to connect to the API. Please check your internet connection.\"\n        error_code = \"CONNECTION_ERROR\"\n        is_retryable = True\n    elif isinstance(core_exception, BudgetExceededError):\n        error_msg = (\n            \"Budget exceeded: You've exceeded your allocated budget for API usage.\"\n        )\n        error_code = \"BUDGET_EXCEEDED\"\n        is_retryable = False\n    elif isinstance(core_exception, Timeout):\n        error_msg = \"Request timed out: The operation took too long to complete. Please try again.\"\n        error_code = \"CONNECTION_ERROR\"\n        is_retryable = True\n    elif isinstance(core_exception, APIError):\n        error_msg = f\"API error: An error occurred while communicating with the API. Details: {str(core_exception)}\"\n        error_code = \"API_ERROR\"\n        is_retryable = True\n    elif not fallback_to_error_msg:\n        error_msg = \"An unexpected error occurred while processing your request. Please try again later.\"\n        error_code = \"UNKNOWN_ERROR\"\n        is_retryable = True\n\n    return error_msg, error_code, is_retryable\n\n\ndef llm_response_to_string(message: ModelResponse) -> str:\n    if not isinstance(message.choice.message.content, str):\n        raise RuntimeError(\"LLM message not in expected format.\")\n\n    return message.choice.message.content\n\n\ndef check_number_of_tokens(\n    text: str, encode_fn: Callable[[str], list] | None = None\n) -> int:\n    \"\"\"Gets the number of tokens in the provided text, using the provided encoding\n    function. If none is provided, default to the tiktoken encoder used by GPT-3.5\n    and GPT-4.\n    \"\"\"\n    import tiktoken\n\n    if encode_fn is None:\n        encode_fn = tiktoken.get_encoding(\"cl100k_base\").encode\n\n    return len(encode_fn(text))\n\n\ndef test_llm(llm: LLM) -> str | None:\n    # try for up to 2 timeouts (e.g. 10 seconds in total)\n    error_msg = None\n    for _ in range(2):\n        try:\n            llm.invoke(UserMessage(content=\"Do not respond\"), max_tokens=50)\n            return None\n        except Exception as e:\n            error_msg = str(e)\n            logger.warning(f\"Failed to call LLM with the following error: {error_msg}\")\n\n    return error_msg\n\n\n@lru_cache(maxsize=1)  # the copy.deepcopy is expensive, so we cache the result\ndef get_model_map() -> dict:\n    import litellm\n\n    DIVIDER = \"/\"\n\n    original_map = cast(dict[str, dict], litellm.model_cost)\n    starting_map = copy.deepcopy(original_map)\n    for key in original_map:\n        if DIVIDER in key:\n            truncated_key = key.split(DIVIDER)[-1]\n            # make sure not to overwrite an original key\n            if truncated_key in original_map:\n                continue\n\n            # if there are multiple possible matches, choose the most \"detailed\"\n            # one as a heuristic. \"detailed\" = the description of the model\n            # has the most filled out fields.\n            existing_truncated_value = starting_map.get(truncated_key)\n            potential_truncated_value = original_map[key]\n            if not existing_truncated_value or len(potential_truncated_value) > len(\n                existing_truncated_value\n            ):\n                starting_map[truncated_key] = potential_truncated_value\n\n    for model_name, model_metadata in CUSTOM_LITELLM_MODEL_OVERRIDES.items():\n        if model_name in starting_map:\n            continue\n        starting_map[model_name] = copy.deepcopy(model_metadata)\n\n    # NOTE: outside of the explicit CUSTOM_LITELLM_MODEL_OVERRIDES,\n    # we avoid hard-coding additional models here. Ollama, for example,\n    # allows the user to specify their desired max context window, and it's\n    # unlikely to be standard across users even for the same model\n    # (it heavily depends on their hardware). For those cases, we rely on\n    # GEN_AI_MODEL_FALLBACK_MAX_TOKENS to cover this.\n    # for model_name in [\n    #     \"llama3.2\",\n    #     \"llama3.2:1b\",\n    #     \"llama3.2:3b\",\n    #     \"llama3.2:11b\",\n    #     \"llama3.2:90b\",\n    # ]:\n    #     starting_map[f\"ollama/{model_name}\"] = {\n    #         \"max_tokens\": 128000,\n    #         \"max_input_tokens\": 128000,\n    #         \"max_output_tokens\": 128000,\n    #     }\n\n    return starting_map\n\n\ndef _strip_extra_provider_from_model_name(model_name: str) -> str:\n    return model_name.split(\"/\")[1] if \"/\" in model_name else model_name\n\n\ndef _strip_colon_from_model_name(model_name: str) -> str:\n    return \":\".join(model_name.split(\":\")[:-1]) if \":\" in model_name else model_name\n\n\ndef find_model_obj(model_map: dict, provider: str, model_name: str) -> dict | None:\n    stripped_model_name = _strip_extra_provider_from_model_name(model_name)\n\n    model_names = [\n        model_name,\n        _strip_extra_provider_from_model_name(model_name),\n        # Remove leading extra provider. Usually for cases where user has a\n        # customer model proxy which appends another prefix\n        # remove :XXXX from the end, if present. Needed for ollama.\n        _strip_colon_from_model_name(model_name),\n        _strip_colon_from_model_name(stripped_model_name),\n    ]\n\n    # Filter out None values and deduplicate model names\n    filtered_model_names = [name for name in model_names if name]\n\n    # First try all model names with provider prefix\n    for model_name in filtered_model_names:\n        model_obj = model_map.get(f\"{provider}/{model_name}\")\n        if model_obj:\n            return model_obj\n\n    # Then try all model names without provider prefix\n    for model_name in filtered_model_names:\n        model_obj = model_map.get(model_name)\n        if model_obj:\n            return model_obj\n\n    return None\n\n\ndef get_llm_contextual_cost(\n    llm: LLM,\n) -> float:\n    \"\"\"\n    Approximate the cost of using the given LLM for indexing with Contextual RAG.\n\n    We use a precomputed estimate for the number of tokens in the contextualizing prompts,\n    and we assume that every chunk is maximized in terms of content and context.\n    We also assume that every document is maximized in terms of content, as currently if\n    a document is longer than a certain length, its summary is used instead of the full content.\n\n    We expect that the first assumption will overestimate more than the second one\n    underestimates, so this should be a fairly conservative price estimate. Also,\n    this does not account for the cost of documents that fit within a single chunk\n    which do not get contextualized.\n    \"\"\"\n\n    import litellm\n\n    # calculate input costs\n    num_tokens = ONE_MILLION\n    num_input_chunks = num_tokens // DOC_EMBEDDING_CONTEXT_SIZE\n\n    # We assume that the documents are MAX_TOKENS_FOR_FULL_INCLUSION tokens long\n    # on average.\n    num_docs = num_tokens // MAX_TOKENS_FOR_FULL_INCLUSION\n\n    num_input_tokens = 0\n    num_output_tokens = 0\n\n    if not USE_CHUNK_SUMMARY and not USE_DOCUMENT_SUMMARY:\n        return 0\n\n    if USE_CHUNK_SUMMARY:\n        # Each per-chunk prompt includes:\n        # - The prompt tokens\n        # - the document tokens\n        # - the chunk tokens\n\n        # for each chunk, we prompt the LLM with the contextual RAG prompt\n        # and the full document content (or the doc summary, so this is an overestimate)\n        num_input_tokens += num_input_chunks * (\n            CONTEXTUAL_RAG_TOKEN_ESTIMATE + MAX_TOKENS_FOR_FULL_INCLUSION\n        )\n\n        # in aggregate, each chunk content is used as a prompt input once\n        # so the full input size is covered\n        num_input_tokens += num_tokens\n\n        # A single MAX_CONTEXT_TOKENS worth of output is generated per chunk\n        num_output_tokens += num_input_chunks * MAX_CONTEXT_TOKENS\n\n    # going over each doc once means all the tokens, plus the prompt tokens for\n    # the summary prompt. This CAN happen even when USE_DOCUMENT_SUMMARY is false,\n    # since doc summaries are used for longer documents when USE_CHUNK_SUMMARY is true.\n    # So, we include this unconditionally to overestimate.\n    num_input_tokens += num_tokens + num_docs * DOCUMENT_SUMMARY_TOKEN_ESTIMATE\n    num_output_tokens += num_docs * MAX_CONTEXT_TOKENS\n\n    try:\n        usd_per_prompt, usd_per_completion = litellm.cost_per_token(\n            model=llm.config.model_name,\n            prompt_tokens=num_input_tokens,\n            completion_tokens=num_output_tokens,\n        )\n    except Exception:\n        logger.exception(\n            \"An unexpected error occurred while calculating cost for model \"\n            f\"{llm.config.model_name} (potentially due to malformed name). \"\n            \"Assuming cost is 0.\"\n        )\n        return 0\n\n    # Costs are in USD dollars per million tokens\n    return usd_per_prompt + usd_per_completion\n\n\ndef llm_max_input_tokens(\n    model_map: dict,\n    model_name: str,\n    model_provider: str,\n) -> int:\n    \"\"\"Best effort attempt to get the max input tokens for the LLM.\"\"\"\n    if GEN_AI_MAX_TOKENS:\n        # This is an override, so always return this\n        logger.info(f\"Using override GEN_AI_MAX_TOKENS: {GEN_AI_MAX_TOKENS}\")\n        return GEN_AI_MAX_TOKENS\n\n    model_obj = find_model_obj(\n        model_map,\n        model_provider,\n        model_name,\n    )\n    if not model_obj:\n        logger.warning(\n            f\"Model '{model_name}' not found in LiteLLM. Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens.\"\n        )\n        return GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n\n    if \"max_input_tokens\" in model_obj:\n        return model_obj[\"max_input_tokens\"]\n\n    if \"max_tokens\" in model_obj:\n        return model_obj[\"max_tokens\"]\n\n    logger.warning(\n        f\"No max tokens found for '{model_name}'. Falling back to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS} tokens.\"\n    )\n    return GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n\n\ndef get_llm_max_output_tokens(\n    model_map: dict,\n    model_name: str,\n    model_provider: str,\n) -> int:\n    \"\"\"Best effort attempt to get the max output tokens for the LLM.\"\"\"\n    default_output_tokens = int(GEN_AI_MODEL_FALLBACK_MAX_TOKENS)\n\n    model_obj = model_map.get(f\"{model_provider}/{model_name}\")\n    if not model_obj:\n        model_obj = model_map.get(model_name)\n\n    if not model_obj:\n        logger.warning(\n            f\"Model '{model_name}' not found in LiteLLM. Falling back to {default_output_tokens} output tokens.\"\n        )\n        return default_output_tokens\n\n    if \"max_output_tokens\" in model_obj:\n        return model_obj[\"max_output_tokens\"]\n\n    # Fallback to a fraction of max_tokens if max_output_tokens is not specified\n    if \"max_tokens\" in model_obj:\n        return int(model_obj[\"max_tokens\"] * 0.1)\n\n    logger.warning(\n        f\"No max output tokens found for '{model_name}'. Falling back to {default_output_tokens} output tokens.\"\n    )\n    return default_output_tokens\n\n\ndef get_max_input_tokens(\n    model_name: str,\n    model_provider: str,\n    output_tokens: int = GEN_AI_NUM_RESERVED_OUTPUT_TOKENS,\n) -> int:\n    # NOTE: we previously used `litellm.get_max_tokens()`, but despite the name, this actually\n    # returns the max OUTPUT tokens. Under the hood, this uses the `litellm.model_cost` dict,\n    # and there is no other interface to get what we want. This should be okay though, since the\n    # `model_cost` dict is a named public interface:\n    # https://litellm.vercel.app/docs/completion/token_usage#7-model_cost\n    # model_map is  litellm.model_cost\n    litellm_model_map = get_model_map()\n\n    input_toks = (\n        llm_max_input_tokens(\n            model_name=model_name,\n            model_provider=model_provider,\n            model_map=litellm_model_map,\n        )\n        - output_tokens\n    )\n\n    if input_toks <= 0:\n        return GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n\n    return input_toks\n\n\ndef get_max_input_tokens_from_llm_provider(\n    llm_provider: \"LLMProviderView\",\n    model_name: str,\n) -> int:\n    \"\"\"Get max input tokens for a model, with fallback chain.\n\n    Fallback order:\n    1. Use max_input_tokens from model_configuration (populated from source APIs\n       like OpenRouter, Ollama, or our Bedrock mapping)\n    2. Look up in litellm.model_cost dictionary\n    3. Fall back to GEN_AI_MODEL_FALLBACK_MAX_TOKENS (32000)\n\n    Most dynamic providers (OpenRouter, Ollama) provide context_length via their\n    APIs. Bedrock doesn't expose this, so we parse from model ID suffix (:200k)\n    or use BEDROCK_MODEL_TOKEN_LIMITS mapping. The 32000 fallback is only hit for\n    unknown models not in any of these sources.\n    \"\"\"\n    max_input_tokens = None\n    for model_configuration in llm_provider.model_configurations:\n        if model_configuration.name == model_name:\n            max_input_tokens = model_configuration.max_input_tokens\n    return (\n        max_input_tokens\n        if max_input_tokens\n        else get_max_input_tokens(\n            model_provider=llm_provider.name,\n            model_name=model_name,\n        )\n    )\n\n\ndef get_bedrock_token_limit(model_id: str) -> int:\n    \"\"\"Look up token limit for a Bedrock model.\n\n    AWS Bedrock API doesn't expose token limits directly. This function\n    attempts to determine the limit from multiple sources.\n\n    Lookup order:\n    1. Parse from model ID suffix (e.g., \":200k\" → 200000)\n    2. Check LiteLLM's model_cost dictionary\n    3. Fall back to our hardcoded BEDROCK_MODEL_TOKEN_LIMITS mapping\n    4. Default to 32000 if not found anywhere\n    \"\"\"\n    from onyx.llm.constants import BEDROCK_MODEL_TOKEN_LIMITS\n\n    model_id_lower = model_id.lower()\n\n    # 1. Try to parse context length from model ID suffix\n    # Format: \"model-name:version:NNNk\" where NNN is the context length in thousands\n    # Examples: \":200k\", \":128k\", \":1000k\", \":8k\", \":4k\"\n    context_match = re.search(r\":(\\d+)k\\b\", model_id_lower)\n    if context_match:\n        return int(context_match.group(1)) * 1000\n\n    # 2. Check LiteLLM's model_cost dictionary\n    try:\n        model_map = get_model_map()\n        # Try with bedrock/ prefix first, then without\n        for key in [f\"bedrock/{model_id}\", model_id]:\n            if key in model_map:\n                model_info = model_map[key]\n                if \"max_input_tokens\" in model_info:\n                    return model_info[\"max_input_tokens\"]\n                if \"max_tokens\" in model_info:\n                    return model_info[\"max_tokens\"]\n    except Exception:\n        pass  # Fall through to mapping\n\n    # 3. Try our hardcoded mapping (longest match first)\n    for pattern, limit in sorted(\n        BEDROCK_MODEL_TOKEN_LIMITS.items(), key=lambda x: -len(x[0])\n    ):\n        if pattern in model_id_lower:\n            return limit\n\n    # 4. Default fallback\n    return GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n\n\ndef model_supports_image_input(model_name: str, model_provider: str) -> bool:\n    # First, try to read an explicit configuration from the model_configuration table\n    try:\n        with get_session_with_current_tenant() as db_session:\n            model_config = db_session.scalar(\n                select(ModelConfiguration)\n                .join(\n                    LLMProvider,\n                    ModelConfiguration.llm_provider_id == LLMProvider.id,\n                )\n                .where(\n                    ModelConfiguration.name == model_name,\n                    LLMProvider.provider == model_provider,\n                )\n            )\n            if (\n                model_config\n                and LLMModelFlowType.VISION in model_config.llm_model_flow_types\n            ):\n                return True\n    except Exception as e:\n        logger.warning(\n            f\"Failed to query database for {model_provider} model {model_name} image support: {e}\"\n        )\n\n    # Fallback to looking up the model in the litellm model_cost dict\n    return litellm_thinks_model_supports_image_input(model_name, model_provider)\n\n\ndef litellm_thinks_model_supports_image_input(\n    model_name: str, model_provider: str\n) -> bool:\n    \"\"\"Generally should call `model_supports_image_input` unless you already know that\n    `model_supports_image_input` from the DB is not set OR you need to avoid the performance\n    hit of querying the DB.\"\"\"\n    try:\n        model_obj = find_model_obj(get_model_map(), model_provider, model_name)\n        if not model_obj:\n            logger.warning(\n                f\"No litellm entry found for {model_provider}/{model_name}, this model may or may not support image input.\"\n            )\n            return False\n        # The or False here is because sometimes the dict contains the key but the value is None\n        return model_obj.get(\"supports_vision\", False) or False\n    except Exception:\n        logger.exception(\n            f\"Failed to get model object for {model_provider}/{model_name}\"\n        )\n        return False\n\n\ndef model_is_reasoning_model(model_name: str, model_provider: str) -> bool:\n    import litellm\n\n    model_map = get_model_map()\n    try:\n        model_obj = find_model_obj(\n            model_map,\n            model_provider,\n            model_name,\n        )\n        if model_obj and \"supports_reasoning\" in model_obj:\n            return model_obj[\"supports_reasoning\"]\n\n        # Fallback: try using litellm.supports_reasoning() for newer models\n        try:\n            # logger.debug(\"Falling back to `litellm.supports_reasoning`\")\n            full_model_name = (\n                f\"{model_provider}/{model_name}\"\n                if model_provider not in model_name\n                else model_name\n            )\n            return litellm.supports_reasoning(model=full_model_name)\n        except Exception:\n            logger.exception(\n                f\"Failed to check if {model_provider}/{model_name} supports reasoning\"\n            )\n            return False\n\n    except Exception:\n        logger.exception(\n            f\"Failed to get model object for {model_provider}/{model_name}\"\n        )\n        return False\n\n\ndef is_true_openai_model(model_provider: str, model_name: str) -> bool:\n    \"\"\"\n    Determines if a model is a true OpenAI model or just using OpenAI-compatible API.\n\n    LiteLLM uses the \"openai\" provider for any OpenAI-compatible server (e.g. vLLM, LiteLLM proxy),\n    but this function checks if the model is actually from OpenAI's model registry.\n\n    This function is used primarily to determine if we should use the responses API.\n    OpenAI models from OpenAI and Azure should use responses.\n    \"\"\"\n\n    if model_provider not in {\n        LlmProviderNames.OPENAI,\n        LlmProviderNames.LITELLM_PROXY,\n        LlmProviderNames.AZURE,\n    }:\n        return False\n\n    model_map = get_model_map()\n\n    def _check_if_model_name_is_openai_provider(model_name: str) -> bool:\n        if model_name not in model_map:\n            return False\n        return model_map[model_name].get(\"litellm_provider\") == LlmProviderNames.OPENAI\n\n    try:\n        # Check if any model exists in litellm's registry with openai prefix\n        # If it's registered as \"openai/model-name\", it's a real OpenAI model\n        if f\"{LlmProviderNames.OPENAI}/{model_name}\" in model_map:\n            return True\n\n        if _check_if_model_name_is_openai_provider(model_name):\n            return True\n\n        if model_name.startswith(f\"{LlmProviderNames.AZURE}/\"):\n            model_name_with_azure_removed = \"/\".join(model_name.split(\"/\")[1:])\n            if _check_if_model_name_is_openai_provider(model_name_with_azure_removed):\n                return True\n\n        return False\n\n    except Exception:\n        logger.exception(\n            f\"Failed to determine if {model_provider}/{model_name} is a true OpenAI model\"\n        )\n        return False\n\n\ndef model_needs_formatting_reenabled(model_name: str) -> bool:\n    # See https://simonwillison.net/tags/markdown/ for context on why this is needed\n    # for OpenAI reasoning models to have correct markdown generation\n\n    # Models that need formatting re-enabled\n    model_names = [\"gpt-5.1\", \"gpt-5\", \"o3\", \"o1\"]\n\n    # Pattern matches if any of these model names appear with word boundaries\n    # Word boundaries include: start/end of string, space, hyphen, or forward slash\n    pattern = (\n        r\"(?:^|[\\s\\-/])(\"\n        + \"|\".join(re.escape(name) for name in model_names)\n        + r\")(?:$|[\\s\\-/])\"\n    )\n\n    if re.search(pattern, model_name):\n        return True\n\n    return False\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/auto_update_models.py",
    "content": "\"\"\"Pydantic models for GitHub-hosted Auto LLM configuration.\"\"\"\n\nfrom datetime import datetime\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import field_validator\n\nfrom onyx.llm.well_known_providers.models import SimpleKnownModel\n\n\nclass LLMProviderRecommendation(BaseModel):\n    \"\"\"Configuration for a single provider in the GitHub config.\n\n    Schema matches the plan:\n    - default_model: The default model config (can be string or object with name)\n    - additional_visible_models: List of additional visible model configs\n    \"\"\"\n\n    default_model: SimpleKnownModel\n    additional_visible_models: list[SimpleKnownModel] = []\n\n    @field_validator(\"default_model\", mode=\"before\")\n    @classmethod\n    def normalize_default_model(cls, v: Any) -> dict[str, Any]:\n        \"\"\"Allow default_model to be a string (model name) or object.\"\"\"\n        if isinstance(v, str):\n            return {\"name\": v}\n        return v\n\n\nclass LLMRecommendations(BaseModel):\n    \"\"\"Root configuration object fetched from GitHub.\"\"\"\n\n    version: str\n    updated_at: datetime\n    providers: dict[str, LLMProviderRecommendation]\n\n    def get_visible_models(self, provider_name: str) -> list[SimpleKnownModel]:\n        \"\"\"Get the set of models that should be visible by default for a provider.\"\"\"\n        if provider_name in self.providers:\n            provider_config = self.providers[provider_name]\n            return [provider_config.default_model] + list(\n                provider_config.additional_visible_models\n            )\n        return []\n\n    def get_default_model(self, provider_name: str) -> SimpleKnownModel | None:\n        \"\"\"Get the default model for a provider.\"\"\"\n        if provider_name in self.providers:\n            provider_config = self.providers[provider_name]\n            return provider_config.default_model\n        return None\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/auto_update_service.py",
    "content": "\"\"\"Service for fetching and syncing LLM model configurations from GitHub.\n\nThis service manages Auto mode LLM providers, where models and configuration\nare managed centrally via a GitHub-hosted JSON file. In Auto mode:\n- Model list is controlled by GitHub config\n- Model visibility is controlled by GitHub config\n- Default model is controlled by GitHub config\n- Admin only needs to provide API credentials\n\"\"\"\n\nfrom datetime import datetime\n\nimport httpx\nfrom sqlalchemy.orm import Session\n\nfrom onyx.cache.factory import get_cache_backend\nfrom onyx.configs.app_configs import AUTO_LLM_CONFIG_URL\nfrom onyx.db.llm import fetch_auto_mode_providers\nfrom onyx.db.llm import sync_auto_mode_models\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_CACHE_KEY_LAST_UPDATED_AT = \"auto_llm_update:last_updated_at\"\n_CACHE_TTL_SECONDS = 60 * 60 * 24  # 24 hours\n\n\ndef _get_cached_last_updated_at() -> datetime | None:\n    try:\n        value = get_cache_backend().get(_CACHE_KEY_LAST_UPDATED_AT)\n        if value is not None:\n            return datetime.fromisoformat(value.decode(\"utf-8\"))\n    except Exception as e:\n        logger.warning(f\"Failed to get cached last_updated_at: {e}\")\n    return None\n\n\ndef _set_cached_last_updated_at(updated_at: datetime) -> None:\n    try:\n        get_cache_backend().set(\n            _CACHE_KEY_LAST_UPDATED_AT,\n            updated_at.isoformat(),\n            ex=_CACHE_TTL_SECONDS,\n        )\n    except Exception as e:\n        logger.warning(f\"Failed to set cached last_updated_at: {e}\")\n\n\ndef fetch_llm_recommendations_from_github(\n    timeout: float = 30.0,\n) -> LLMRecommendations | None:\n    \"\"\"Fetch LLM configuration from GitHub.\n\n    Returns:\n        GitHubLLMConfig if successful, None on error.\n    \"\"\"\n    if not AUTO_LLM_CONFIG_URL:\n        logger.debug(\"AUTO_LLM_CONFIG_URL not configured, skipping fetch\")\n        return None\n\n    try:\n        with httpx.Client(timeout=timeout) as client:\n            response = client.get(AUTO_LLM_CONFIG_URL)\n            response.raise_for_status()\n\n            data = response.json()\n            return LLMRecommendations.model_validate(data)\n    except httpx.HTTPError as e:\n        logger.error(f\"Failed to fetch LLM config from GitHub: {e}\")\n        return None\n    except Exception as e:\n        logger.error(f\"Error parsing LLM config: {e}\")\n        return None\n\n\ndef sync_llm_models_from_github(\n    db_session: Session,\n    force: bool = False,\n) -> dict[str, int]:\n    \"\"\"Sync models from GitHub config to database for all Auto mode providers.\n\n    In Auto mode, EVERYTHING is controlled by GitHub config:\n    - Model list\n    - Model visibility (is_visible)\n    - Default model\n    - Fast default model\n\n    Args:\n        db_session: Database session\n        config: GitHub LLM configuration\n        force: If True, skip the updated_at check and force sync\n\n    Returns:\n        Dict of provider_name -> number of changes made.\n    \"\"\"\n    results: dict[str, int] = {}\n\n    # Get all providers in Auto mode\n    auto_providers = fetch_auto_mode_providers(db_session)\n    if not auto_providers:\n        logger.debug(\"No providers in Auto mode found\")\n        return {}\n\n    # Fetch config from GitHub\n    config = fetch_llm_recommendations_from_github()\n    if not config:\n        logger.warning(\"Failed to fetch GitHub config\")\n        return {}\n\n    # Skip if we've already processed this version (unless forced)\n    last_updated_at = _get_cached_last_updated_at()\n    if not force and last_updated_at and config.updated_at <= last_updated_at:\n        logger.debug(\"GitHub config unchanged, skipping sync\")\n        _set_cached_last_updated_at(config.updated_at)\n        return {}\n\n    for provider in auto_providers:\n        provider_type = provider.provider  # e.g., \"openai\", \"anthropic\"\n\n        if provider_type not in config.providers:\n            logger.debug(\n                f\"No config for provider type '{provider_type}' in GitHub config\"\n            )\n            continue\n\n        # Sync models - this replaces the model list entirely for Auto mode\n        changes = sync_auto_mode_models(\n            db_session=db_session,\n            provider=provider,\n            llm_recommendations=config,\n        )\n\n        if changes > 0:\n            results[provider.name] = changes\n            logger.info(\n                f\"Applied {changes} model changes to provider '{provider.name}'\"\n            )\n\n    _set_cached_last_updated_at(config.updated_at)\n    return results\n\n\ndef reset_cache() -> None:\n    \"\"\"Reset the cache timestamp. Useful for testing.\"\"\"\n    try:\n        get_cache_backend().delete(_CACHE_KEY_LAST_UPDATED_AT)\n    except Exception as e:\n        logger.warning(f\"Failed to reset cache: {e}\")\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/constants.py",
    "content": "from onyx.llm.constants import LlmProviderNames\n\nOPENAI_PROVIDER_NAME = \"openai\"\n\nBEDROCK_PROVIDER_NAME = \"bedrock\"\n\n\nOLLAMA_PROVIDER_NAME = \"ollama_chat\"\nOLLAMA_API_KEY_CONFIG_KEY = \"OLLAMA_API_KEY\"\n\nLM_STUDIO_PROVIDER_NAME = \"lm_studio\"\nLM_STUDIO_API_KEY_CONFIG_KEY = \"LM_STUDIO_API_KEY\"\n\nLITELLM_PROXY_PROVIDER_NAME = \"litellm_proxy\"\n\nBIFROST_PROVIDER_NAME = \"bifrost\"\n\n# Providers that use optional Bearer auth from custom_config\nPROVIDERS_WITH_SPECIAL_API_KEY_HANDLING: dict[str, str] = {\n    LlmProviderNames.OLLAMA_CHAT: OLLAMA_API_KEY_CONFIG_KEY,\n    LlmProviderNames.LM_STUDIO: LM_STUDIO_API_KEY_CONFIG_KEY,\n}\n\n# OpenRouter\nOPENROUTER_PROVIDER_NAME = \"openrouter\"\n\nANTHROPIC_PROVIDER_NAME = \"anthropic\"\n\nAZURE_PROVIDER_NAME = \"azure\"\n\n\nVERTEXAI_PROVIDER_NAME = \"vertex_ai\"\nVERTEX_CREDENTIALS_FILE_KWARG = \"vertex_credentials\"\nVERTEX_CREDENTIALS_FILE_KWARG_ENV_VAR_FORMAT = \"CREDENTIALS_FILE\"\nVERTEX_LOCATION_KWARG = \"vertex_location\"\n\nAWS_REGION_NAME_KWARG = \"aws_region_name\"\nAWS_REGION_NAME_KWARG_ENV_VAR_FORMAT = \"AWS_REGION_NAME\"\nAWS_BEARER_TOKEN_BEDROCK_KWARG_ENV_VAR_FORMAT = \"AWS_BEARER_TOKEN_BEDROCK\"\nAWS_ACCESS_KEY_ID_KWARG = \"aws_access_key_id\"\nAWS_ACCESS_KEY_ID_KWARG_ENV_VAR_FORMAT = \"AWS_ACCESS_KEY_ID\"\nAWS_SECRET_ACCESS_KEY_KWARG = \"aws_secret_access_key\"\nAWS_SECRET_ACCESS_KEY_KWARG_ENV_VAR_FORMAT = \"AWS_SECRET_ACCESS_KEY\"\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/llm_provider_options.py",
    "content": "import json\nimport pathlib\nimport threading\nimport time\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.constants import PROVIDER_DISPLAY_NAMES\nfrom onyx.llm.constants import WELL_KNOWN_PROVIDER_NAMES\nfrom onyx.llm.utils import get_max_input_tokens\nfrom onyx.llm.utils import model_supports_image_input\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.llm.well_known_providers.auto_update_service import (\n    fetch_llm_recommendations_from_github,\n)\nfrom onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import AZURE_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import BEDROCK_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import BIFROST_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import LITELLM_PROXY_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import LM_STUDIO_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import OLLAMA_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.models import WellKnownLLMProviderDescriptor\nfrom onyx.server.manage.llm.models import ModelConfigurationView\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_RECOMMENDATIONS_CACHE_TTL_SECONDS = 300\n_recommendations_cache_lock = threading.Lock()\n_cached_recommendations: LLMRecommendations | None = None\n_cached_recommendations_time: float = 0.0\n\n\ndef _get_provider_to_models_map() -> dict[str, list[str]]:\n    \"\"\"Lazy-load provider model mappings to avoid importing litellm at module level.\n\n    Dynamic providers (Bedrock, Ollama, OpenRouter) return empty lists here\n    because their models are fetched directly from the source API, which is\n    more up-to-date than LiteLLM's static lists.\n    \"\"\"\n    return {\n        OPENAI_PROVIDER_NAME: get_openai_model_names(),\n        BEDROCK_PROVIDER_NAME: [],  # Dynamic - fetched from AWS API\n        ANTHROPIC_PROVIDER_NAME: get_anthropic_model_names(),\n        VERTEXAI_PROVIDER_NAME: get_vertexai_model_names(),\n        OLLAMA_PROVIDER_NAME: [],  # Dynamic - fetched from Ollama API\n        LM_STUDIO_PROVIDER_NAME: [],  # Dynamic - fetched from LM Studio API\n        OPENROUTER_PROVIDER_NAME: [],  # Dynamic - fetched from OpenRouter API\n        LITELLM_PROXY_PROVIDER_NAME: [],  # Dynamic - fetched from LiteLLM proxy API\n        BIFROST_PROVIDER_NAME: [],  # Dynamic - fetched from Bifrost API\n    }\n\n\ndef _load_bundled_recommendations() -> LLMRecommendations:\n    json_path = pathlib.Path(__file__).parent / \"recommended-models.json\"\n    with open(json_path, \"r\") as f:\n        json_config = json.load(f)\n    return LLMRecommendations.model_validate(json_config)\n\n\ndef get_recommendations() -> LLMRecommendations:\n    \"\"\"Get the recommendations, with an in-memory cache to avoid\n    hitting GitHub on every API request.\"\"\"\n    global _cached_recommendations, _cached_recommendations_time\n\n    now = time.monotonic()\n    if (\n        _cached_recommendations is not None\n        and (now - _cached_recommendations_time) < _RECOMMENDATIONS_CACHE_TTL_SECONDS\n    ):\n        return _cached_recommendations\n\n    with _recommendations_cache_lock:\n        # Double-check after acquiring lock\n        if (\n            _cached_recommendations is not None\n            and (time.monotonic() - _cached_recommendations_time)\n            < _RECOMMENDATIONS_CACHE_TTL_SECONDS\n        ):\n            return _cached_recommendations\n\n        recommendations_from_github = fetch_llm_recommendations_from_github()\n        result = recommendations_from_github or _load_bundled_recommendations()\n\n        _cached_recommendations = result\n        _cached_recommendations_time = time.monotonic()\n        return result\n\n\ndef is_obsolete_model(model_name: str, provider: str) -> bool:\n    \"\"\"Check if a model is obsolete and should be filtered out.\n\n    Filters models that are 2+ major versions behind or deprecated.\n    This is the single source of truth for obsolete model detection.\n    \"\"\"\n    model_lower = model_name.lower()\n\n    # OpenAI obsolete models\n    if provider == LlmProviderNames.OPENAI:\n        # GPT-3 models are obsolete\n        if \"gpt-3\" in model_lower:\n            return True\n        # Legacy models\n        deprecated = {\n            \"text-davinci-003\",\n            \"text-davinci-002\",\n            \"text-curie-001\",\n            \"text-babbage-001\",\n            \"text-ada-001\",\n            \"davinci\",\n            \"curie\",\n            \"babbage\",\n            \"ada\",\n        }\n        if model_lower in deprecated:\n            return True\n\n    # Anthropic obsolete models\n    if provider == LlmProviderNames.ANTHROPIC:\n        if \"claude-2\" in model_lower or \"claude-instant\" in model_lower:\n            return True\n\n    # Vertex AI obsolete models\n    if provider == LlmProviderNames.VERTEX_AI:\n        if \"gemini-1.0\" in model_lower:\n            return True\n        if \"palm\" in model_lower or \"bison\" in model_lower:\n            return True\n\n    return False\n\n\ndef get_openai_model_names() -> list[str]:\n    \"\"\"Get OpenAI model names dynamically from litellm.\"\"\"\n    import re\n    import litellm\n\n    # TODO: remove these lists once we have a comprehensive model configuration page\n    # The ideal flow should be: fetch all available models --> filter by type\n    # --> allow user to modify filters and select models based on current context\n    non_chat_model_terms = {\n        \"embed\",\n        \"audio\",\n        \"tts\",\n        \"whisper\",\n        \"dall-e\",\n        \"image\",\n        \"moderation\",\n        \"sora\",\n        \"container\",\n    }\n    deprecated_model_terms = {\"babbage\", \"davinci\", \"gpt-3.5\", \"gpt-4-\"}\n    excluded_terms = non_chat_model_terms | deprecated_model_terms\n\n    # NOTE: We are explicitly excluding all \"timestamped\" models\n    # because they are mostly just noise in the admin configuration panel\n    # e.g. gpt-4o-2025-07-16, gpt-3.5-turbo-0613, etc.\n    date_pattern = re.compile(r\"-\\d{4}\")\n\n    def is_valid_model(model: str) -> bool:\n        model_lower = model.lower()\n        return not any(\n            ex in model_lower for ex in excluded_terms\n        ) and not date_pattern.search(model)\n\n    return sorted(\n        (\n            model.removeprefix(\"openai/\")\n            for model in litellm.open_ai_chat_completion_models\n            if is_valid_model(model)\n        ),\n        reverse=True,\n    )\n\n\ndef get_anthropic_model_names() -> list[str]:\n    \"\"\"Get Anthropic model names dynamically from litellm.\"\"\"\n    import litellm\n\n    # Models to exclude from Anthropic's model list (deprecated or duplicates)\n    _IGNORABLE_ANTHROPIC_MODELS = {\n        \"claude-2\",\n        \"claude-instant-1\",\n        \"anthropic/claude-3-5-sonnet-20241022\",\n    }\n\n    return sorted(\n        [\n            model\n            for model in litellm.anthropic_models\n            if model not in _IGNORABLE_ANTHROPIC_MODELS\n            and not is_obsolete_model(model, LlmProviderNames.ANTHROPIC)\n        ],\n        reverse=True,\n    )\n\n\ndef get_vertexai_model_names() -> list[str]:\n    \"\"\"Get Vertex AI model names dynamically from litellm model_cost.\"\"\"\n    import litellm\n\n    # Combine all vertex model sets\n    vertex_models: set[str] = set()\n    vertex_model_sets = [\n        \"vertex_chat_models\",\n        \"vertex_language_models\",\n        \"vertex_anthropic_models\",\n        \"vertex_llama3_models\",\n        \"vertex_mistral_models\",\n        \"vertex_ai_ai21_models\",\n        \"vertex_deepseek_models\",\n    ]\n    for attr in vertex_model_sets:\n        if hasattr(litellm, attr):\n            vertex_models.update(getattr(litellm, attr))\n\n    # Also extract from model_cost for any models not in the sets\n    for key in litellm.model_cost.keys():\n        if key.startswith(\"vertex_ai/\"):\n            model_name = key.replace(\"vertex_ai/\", \"\")\n            vertex_models.add(model_name)\n\n    return sorted(\n        [\n            model\n            for model in vertex_models\n            if \"embed\" not in model.lower()\n            and \"image\" not in model.lower()\n            and \"video\" not in model.lower()\n            and \"code\" not in model.lower()\n            and \"veo\" not in model.lower()  # video generation\n            and \"live\" not in model.lower()  # live/streaming models\n            and \"tts\" not in model.lower()  # text-to-speech\n            and \"native-audio\" not in model.lower()  # audio models\n            and \"/\" not in model  # filter out prefixed models like openai/gpt-oss\n            and \"search_api\" not in model.lower()  # not a model\n            and \"-maas\" not in model.lower()  # marketplace models\n            and not is_obsolete_model(model, LlmProviderNames.VERTEX_AI)\n        ],\n        reverse=True,\n    )\n\n\ndef model_configurations_for_provider(\n    provider_name: str, llm_recommendations: LLMRecommendations\n) -> list[ModelConfigurationView]:\n    recommended_visible_models = llm_recommendations.get_visible_models(provider_name)\n    recommended_visible_models_names = [m.name for m in recommended_visible_models]\n\n    # Preserve provider-defined ordering while de-duplicating.\n    model_names: list[str] = []\n    seen_model_names: set[str] = set()\n    for model_name in (\n        fetch_models_for_provider(provider_name) + recommended_visible_models_names\n    ):\n        if model_name in seen_model_names:\n            continue\n        seen_model_names.add(model_name)\n        model_names.append(model_name)\n\n    # Vertex model list can be large and mixed-vendor; alphabetical ordering\n    # makes model discovery easier in admin selection UIs.\n    if provider_name == VERTEXAI_PROVIDER_NAME:\n        model_names = sorted(model_names, key=str.lower)\n\n    return [\n        ModelConfigurationView(\n            name=model_name,\n            is_visible=model_name in recommended_visible_models_names,\n            max_input_tokens=get_max_input_tokens(model_name, provider_name),\n            supports_image_input=model_supports_image_input(model_name, provider_name),\n        )\n        for model_name in model_names\n    ]\n\n\ndef fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:\n    llm_recommendations = get_recommendations()\n\n    well_known_llms = []\n    for provider_name in WELL_KNOWN_PROVIDER_NAMES:\n        model_configurations = model_configurations_for_provider(\n            provider_name, llm_recommendations\n        )\n        well_known_llms.append(\n            WellKnownLLMProviderDescriptor(\n                name=provider_name,\n                known_models=model_configurations,\n                recommended_default_model=llm_recommendations.get_default_model(\n                    provider_name\n                ),\n            )\n        )\n    return well_known_llms\n\n\ndef fetch_models_for_provider(provider_name: str) -> list[str]:\n    return _get_provider_to_models_map().get(provider_name, [])\n\n\ndef fetch_model_names_for_provider_as_set(provider_name: str) -> set[str] | None:\n    model_names = fetch_models_for_provider(provider_name)\n    return set(model_names) if model_names else None\n\n\ndef fetch_visible_model_names_for_provider_as_set(\n    provider_name: str,\n) -> set[str] | None:\n    \"\"\"Get visible model names for a provider.\n\n    Note: Since we no longer maintain separate visible model lists,\n    this returns all models (same as fetch_model_names_for_provider_as_set).\n    Kept for backwards compatibility with alembic migrations.\n    \"\"\"\n    return fetch_model_names_for_provider_as_set(provider_name)\n\n\ndef get_provider_display_name(provider_name: str) -> str:\n    \"\"\"Get human-friendly display name for an Onyx-supported provider.\n\n    First checks Onyx-specific display names, then falls back to\n    PROVIDER_DISPLAY_NAMES from constants.\n    \"\"\"\n    # Display names for Onyx-supported LLM providers (used in admin UI provider selection).\n    # These override PROVIDER_DISPLAY_NAMES for Onyx-specific branding.\n    _ONYX_PROVIDER_DISPLAY_NAMES: dict[str, str] = {\n        OPENAI_PROVIDER_NAME: \"ChatGPT (OpenAI)\",\n        OLLAMA_PROVIDER_NAME: \"Ollama\",\n        LM_STUDIO_PROVIDER_NAME: \"LM Studio\",\n        ANTHROPIC_PROVIDER_NAME: \"Claude (Anthropic)\",\n        AZURE_PROVIDER_NAME: \"Azure OpenAI\",\n        BEDROCK_PROVIDER_NAME: \"Amazon Bedrock\",\n        VERTEXAI_PROVIDER_NAME: \"Google Vertex AI\",\n        OPENROUTER_PROVIDER_NAME: \"OpenRouter\",\n        LITELLM_PROXY_PROVIDER_NAME: \"LiteLLM Proxy\",\n    }\n\n    if provider_name in _ONYX_PROVIDER_DISPLAY_NAMES:\n        return _ONYX_PROVIDER_DISPLAY_NAMES[provider_name]\n    return PROVIDER_DISPLAY_NAMES.get(\n        provider_name.lower(), provider_name.replace(\"_\", \" \").title()\n    )\n\n\ndef fetch_default_model_for_provider(provider_name: str) -> str | None:\n    \"\"\"Fetch the default model for a provider.\n\n    First checks the GitHub-hosted recommended-models.json config (via fetch_github_config),\n    then falls back to hardcoded defaults if unavailable.\n    \"\"\"\n    llm_recommendations = get_recommendations()\n    default_model = llm_recommendations.get_default_model(provider_name)\n    return default_model.name if default_model else None\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/models.py",
    "content": "from enum import Enum\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.server.manage.llm.models import ModelConfigurationView\n\n\nclass CustomConfigKeyType(str, Enum):\n    # used for configuration values that require manual input\n    # i.e., textual API keys (e.g., \"abcd1234\")\n    TEXT_INPUT = \"text_input\"\n\n    # used for configuration values that require a file to be selected/drag-and-dropped\n    # i.e., file based credentials (e.g., \"/path/to/credentials/file.json\")\n    FILE_INPUT = \"file_input\"\n\n    # used for configuration values that require a selection from predefined options\n    SELECT = \"select\"\n\n\nclass SimpleKnownModel(BaseModel):\n    name: str\n    display_name: str | None = None\n\n\nclass WellKnownLLMProviderDescriptor(BaseModel):\n    name: str\n\n    # NOTE: the recommended visible models are encoded in the known_models list\n    known_models: list[ModelConfigurationView] = Field(default_factory=list)\n    recommended_default_model: SimpleKnownModel | None = None\n"
  },
  {
    "path": "backend/onyx/llm/well_known_providers/recommended-models.json",
    "content": "{\n  \"version\": \"1.1\",\n  \"updated_at\": \"2026-03-05T00:00:00Z\",\n  \"providers\": {\n    \"openai\": {\n      \"default_model\": { \"name\": \"gpt-5.4\" },\n      \"additional_visible_models\": [\n        { \"name\": \"gpt-5.4\" },\n        { \"name\": \"gpt-5.2\" }\n      ]\n    },\n    \"anthropic\": {\n      \"default_model\": \"claude-opus-4-6\",\n      \"additional_visible_models\": [\n        {\n          \"name\": \"claude-opus-4-6\",\n          \"display_name\": \"Claude Opus 4.6\"\n        },\n        {\n          \"name\": \"claude-sonnet-4-6\",\n          \"display_name\": \"Claude Sonnet 4.6\"\n        },\n        {\n          \"name\": \"claude-opus-4-5\",\n          \"display_name\": \"Claude Opus 4.5\"\n        },\n        {\n          \"name\": \"claude-sonnet-4-5\",\n          \"display_name\": \"Claude Sonnet 4.5\"\n        }\n      ]\n    },\n    \"vertex_ai\": {\n      \"default_model\": \"gemini-3-pro-preview\",\n      \"additional_visible_models\": [\n        {\n          \"name\": \"gemini-3-pro-preview\",\n          \"display_name\": \"Gemini 3 Pro\"\n        },\n        {\n          \"name\": \"gemini-3-flash-preview\",\n          \"display_name\": \"Gemini 3 Flash\"\n        }\n      ]\n    },\n    \"openrouter\": {\n      \"default_model\": \"z-ai/glm-4.7\",\n      \"additional_visible_models\": [\n        {\n          \"name\": \"z-ai/glm-4.7\",\n          \"display_name\": \"GLM 4.7\"\n        },\n        {\n          \"name\": \"deepseek/deepseek-v3.2\",\n          \"display_name\": \"DeepSeek V3.2\"\n        },\n        {\n          \"name\": \"qwen/qwen3-235b-a22b-2507\",\n          \"display_name\": \"Qwen3 235B A22B Instruct 2507\"\n        },\n        {\n          \"name\": \"moonshotai/kimi-k2-0905\",\n          \"display_name\": \"Kimi K2 0905\"\n        }\n      ]\n    }\n  }\n}\n"
  },
  {
    "path": "backend/onyx/main.py",
    "content": "import logging\nimport sys\nimport traceback\nimport warnings\nfrom collections.abc import AsyncGenerator\nfrom contextlib import asynccontextmanager\nfrom typing import Any\nfrom typing import cast\n\nimport sentry_sdk\nimport uvicorn\nfrom fastapi import APIRouter\nfrom fastapi import FastAPI\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import status\nfrom fastapi.exceptions import RequestValidationError\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.responses import JSONResponse\nfrom fastapi.routing import APIRoute\nfrom httpx_oauth.clients.google import GoogleOAuth2\nfrom httpx_oauth.clients.openid import BASE_SCOPES\nfrom httpx_oauth.clients.openid import OpenID\nfrom sentry_sdk.integrations.fastapi import FastApiIntegration\nfrom sentry_sdk.integrations.starlette import StarletteIntegration\nfrom starlette.types import Lifespan\n\nfrom onyx import __version__\nfrom onyx.auth.schemas import UserCreate\nfrom onyx.auth.schemas import UserRead\nfrom onyx.auth.schemas import UserUpdate\nfrom onyx.auth.users import auth_backend\nfrom onyx.auth.users import create_onyx_oauth_router\nfrom onyx.auth.users import fastapi_users\nfrom onyx.cache.interface import CacheBackendType\nfrom onyx.configs.app_configs import APP_API_PREFIX\nfrom onyx.configs.app_configs import APP_HOST\nfrom onyx.configs.app_configs import APP_PORT\nfrom onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import CACHE_BACKEND\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import LOG_ENDPOINT_LATENCY\nfrom onyx.configs.app_configs import OAUTH_CLIENT_ID\nfrom onyx.configs.app_configs import OAUTH_CLIENT_SECRET\nfrom onyx.configs.app_configs import OAUTH_ENABLED\nfrom onyx.configs.app_configs import OIDC_PKCE_ENABLED\nfrom onyx.configs.app_configs import OIDC_SCOPE_OVERRIDE\nfrom onyx.configs.app_configs import OPENID_CONFIG_URL\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE\nfrom onyx.configs.app_configs import SYSTEM_RECURSION_LIMIT\nfrom onyx.configs.app_configs import USER_AUTH_SECRET\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import POSTGRES_WEB_APP_NAME\nfrom onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine\nfrom onyx.db.engine.connection_warmup import warm_up_connections\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.error_handling.exceptions import register_onyx_exception_handlers\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.hooks.registry import validate_registry\nfrom onyx.server.api_key.api import router as api_key_router\nfrom onyx.server.auth_check import check_router_auth\nfrom onyx.server.documents.cc_pair import router as cc_pair_router\nfrom onyx.server.documents.connector import router as connector_router\nfrom onyx.server.documents.credential import router as credential_router\nfrom onyx.server.documents.document import router as document_router\nfrom onyx.server.documents.standard_oauth import router as standard_oauth_router\nfrom onyx.server.features.build.api.api import public_build_router\nfrom onyx.server.features.build.api.api import router as build_router\nfrom onyx.server.features.default_assistant.api import (\n    router as default_assistant_router,\n)\nfrom onyx.server.features.document_set.api import router as document_set_router\nfrom onyx.server.features.hierarchy.api import router as hierarchy_router\nfrom onyx.server.features.input_prompt.api import (\n    admin_router as admin_input_prompt_router,\n)\nfrom onyx.server.features.input_prompt.api import (\n    basic_router as input_prompt_router,\n)\nfrom onyx.server.features.mcp.api import admin_router as mcp_admin_router\nfrom onyx.server.features.mcp.api import router as mcp_router\nfrom onyx.server.features.notifications.api import router as notification_router\nfrom onyx.server.features.oauth_config.api import (\n    admin_router as admin_oauth_config_router,\n)\nfrom onyx.server.features.oauth_config.api import router as oauth_config_router\nfrom onyx.server.features.password.api import router as password_router\nfrom onyx.server.features.persona.api import admin_agents_router\nfrom onyx.server.features.persona.api import admin_router as admin_persona_router\nfrom onyx.server.features.persona.api import agents_router\nfrom onyx.server.features.persona.api import basic_router as persona_router\nfrom onyx.server.features.projects.api import router as projects_router\nfrom onyx.server.features.tool.api import admin_router as admin_tool_router\nfrom onyx.server.features.tool.api import router as tool_router\nfrom onyx.server.features.user_oauth_token.api import router as user_oauth_token_router\nfrom onyx.server.features.web_search.api import router as web_search_router\nfrom onyx.server.federated.api import router as federated_router\nfrom onyx.server.kg.api import admin_router as kg_admin_router\nfrom onyx.server.manage.administrative import router as admin_router\nfrom onyx.server.manage.code_interpreter.api import (\n    admin_router as code_interpreter_admin_router,\n)\nfrom onyx.server.manage.discord_bot.api import router as discord_bot_router\nfrom onyx.server.manage.embedding.api import admin_router as embedding_admin_router\nfrom onyx.server.manage.embedding.api import basic_router as embedding_router\nfrom onyx.server.manage.get_state import router as state_router\nfrom onyx.server.manage.image_generation.api import (\n    admin_router as image_generation_admin_router,\n)\nfrom onyx.server.manage.llm.api import admin_router as llm_admin_router\nfrom onyx.server.manage.llm.api import basic_router as llm_router\nfrom onyx.server.manage.opensearch_migration.api import (\n    admin_router as opensearch_migration_admin_router,\n)\nfrom onyx.server.manage.search_settings import router as search_settings_router\nfrom onyx.server.manage.slack_bot import router as slack_bot_management_router\nfrom onyx.server.manage.users import router as user_router\nfrom onyx.server.manage.voice.api import admin_router as voice_admin_router\nfrom onyx.server.manage.voice.user_api import router as voice_router\nfrom onyx.server.manage.voice.websocket_api import router as voice_websocket_router\nfrom onyx.server.manage.web_search.api import (\n    admin_router as web_search_admin_router,\n)\nfrom onyx.server.metrics.postgres_connection_pool import (\n    setup_postgres_connection_pool_metrics,\n)\nfrom onyx.server.metrics.prometheus_setup import setup_prometheus_metrics\nfrom onyx.server.middleware.latency_logging import add_latency_logging_middleware\nfrom onyx.server.middleware.rate_limiting import close_auth_limiter\nfrom onyx.server.middleware.rate_limiting import get_auth_rate_limiters\nfrom onyx.server.middleware.rate_limiting import setup_auth_limiter\nfrom onyx.server.onyx_api.ingestion import router as onyx_api_router\nfrom onyx.server.pat.api import router as pat_router\nfrom onyx.server.query_and_chat.chat_backend import router as chat_router\nfrom onyx.server.query_and_chat.query_backend import (\n    admin_router as admin_query_router,\n)\nfrom onyx.server.query_and_chat.query_backend import basic_router as query_router\nfrom onyx.server.saml import router as saml_router\nfrom onyx.server.settings.api import admin_router as settings_admin_router\nfrom onyx.server.settings.api import basic_router as settings_router\nfrom onyx.server.token_rate_limits.api import (\n    router as token_rate_limit_settings_router,\n)\nfrom onyx.server.utils import BasicAuthenticationError\nfrom onyx.setup import setup_multitenant_onyx\nfrom onyx.setup import setup_onyx\nfrom onyx.tracing.setup import setup_tracing\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.logger import setup_uvicorn_logger\nfrom onyx.utils.middleware import add_endpoint_context_middleware\nfrom onyx.utils.middleware import add_onyx_request_id_middleware\nfrom onyx.utils.telemetry import get_or_generate_uuid\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\nfrom shared_configs.configs import CORS_ALLOWED_ORIGIN\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import SENTRY_DSN\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nwarnings.filterwarnings(\n    \"ignore\", category=ResourceWarning, message=r\"Unclosed client session\"\n)\nwarnings.filterwarnings(\n    \"ignore\", category=ResourceWarning, message=r\"Unclosed connector\"\n)\n\nlogger = setup_logger()\n\nfile_handlers = [\n    h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)\n]\n\nsetup_uvicorn_logger(shared_file_handlers=file_handlers)\n\n\ndef validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:\n    if not isinstance(exc, RequestValidationError):\n        logger.error(\n            f\"Unexpected exception type in validation_exception_handler - {type(exc)}\"\n        )\n        raise exc\n\n    exc_str = f\"{exc}\".replace(\"\\n\", \" \").replace(\"   \", \" \")\n    logger.exception(f\"{request}: {exc_str}\")\n    content = {\"status_code\": 422, \"message\": exc_str, \"data\": None}\n    return JSONResponse(content=content, status_code=422)\n\n\ndef value_error_handler(_: Request, exc: Exception) -> JSONResponse:\n    if not isinstance(exc, ValueError):\n        logger.error(f\"Unexpected exception type in value_error_handler - {type(exc)}\")\n        raise exc\n\n    try:\n        raise (exc)\n    except Exception:\n        # log stacktrace\n        logger.exception(\"ValueError\")\n    return JSONResponse(\n        status_code=400,\n        content={\"message\": str(exc)},\n    )\n\n\ndef use_route_function_names_as_operation_ids(app: FastAPI) -> None:\n    \"\"\"\n    OpenAPI generation defaults to naming the operation with the\n    function + route + HTTP method, which usually looks very redundant.\n\n    This function changes the operation IDs to be just the function name.\n\n    Should be called only after all routes have been added.\n    \"\"\"\n    for route in app.routes:\n        if isinstance(route, APIRoute):\n            route.operation_id = route.name\n\n\ndef include_router_with_global_prefix_prepended(\n    application: FastAPI, router: APIRouter, **kwargs: Any\n) -> None:\n    \"\"\"Adds the global prefix to all routes in the router.\"\"\"\n    processed_global_prefix = f\"/{APP_API_PREFIX.strip('/')}\" if APP_API_PREFIX else \"\"\n\n    passed_in_prefix = cast(str | None, kwargs.get(\"prefix\"))\n    if passed_in_prefix:\n        final_prefix = f\"{processed_global_prefix}/{passed_in_prefix.strip('/')}\"\n    else:\n        final_prefix = f\"{processed_global_prefix}\"\n    final_kwargs: dict[str, Any] = {\n        **kwargs,\n        \"prefix\": final_prefix,\n    }\n\n    application.include_router(router, **final_kwargs)\n\n\ndef include_auth_router_with_prefix(\n    application: FastAPI,\n    router: APIRouter,\n    prefix: str | None = None,\n    tags: list[str] | None = None,\n) -> None:\n    \"\"\"Wrapper function to include an 'auth' router with prefix + rate-limiting dependencies.\"\"\"\n    final_tags = tags or [\"auth\"]\n    include_router_with_global_prefix_prepended(\n        application,\n        router,\n        prefix=prefix,\n        tags=final_tags,\n        dependencies=get_auth_rate_limiters(),\n    )\n\n\ndef validate_cache_backend_settings() -> None:\n    \"\"\"Validate that CACHE_BACKEND=postgres is only used with DISABLE_VECTOR_DB.\n\n    The Postgres cache backend eliminates the Redis dependency, but only works\n    when Celery is not running (which requires DISABLE_VECTOR_DB=true).\n    \"\"\"\n    if CACHE_BACKEND == CacheBackendType.POSTGRES and not DISABLE_VECTOR_DB:\n        raise RuntimeError(\n            \"CACHE_BACKEND=postgres requires DISABLE_VECTOR_DB=true. \"\n            \"The Postgres cache backend is only supported in no-vector-DB \"\n            \"deployments where Celery is replaced by the in-process task runner.\"\n        )\n\n\ndef validate_no_vector_db_settings() -> None:\n    \"\"\"Validate that DISABLE_VECTOR_DB is not combined with incompatible settings.\n\n    Raises RuntimeError if DISABLE_VECTOR_DB is set alongside MULTI_TENANT or ENABLE_CRAFT,\n    since these modes require infrastructure that is removed in no-vector-DB deployments.\n    \"\"\"\n    if not DISABLE_VECTOR_DB:\n        return\n\n    if MULTI_TENANT:\n        raise RuntimeError(\n            \"DISABLE_VECTOR_DB cannot be used with MULTI_TENANT. \"\n            \"Multi-tenant deployments require the vector database for \"\n            \"per-tenant document indexing and search. Run in single-tenant \"\n            \"mode when disabling the vector database.\"\n        )\n\n    from onyx.server.features.build.configs import ENABLE_CRAFT\n\n    if ENABLE_CRAFT:\n        raise RuntimeError(\n            \"DISABLE_VECTOR_DB cannot be used with ENABLE_CRAFT. \"\n            \"Onyx Craft requires background workers for sandbox lifecycle \"\n            \"management, which are removed in no-vector-DB deployments. \"\n            \"Disable Craft (ENABLE_CRAFT=false) when disabling the vector database.\"\n        )\n\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:  # noqa: ARG001\n    validate_no_vector_db_settings()\n    validate_cache_backend_settings()\n    validate_registry()\n\n    # Set recursion limit\n    if SYSTEM_RECURSION_LIMIT is not None:\n        sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)\n        logger.notice(f\"System recursion limit set to {SYSTEM_RECURSION_LIMIT}\")\n\n    SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)\n\n    SqlEngine.init_engine(\n        pool_size=POSTGRES_API_SERVER_POOL_SIZE,\n        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,\n    )\n    SqlEngine.get_engine()\n\n    SqlEngine.init_readonly_engine(\n        pool_size=POSTGRES_API_SERVER_READ_ONLY_POOL_SIZE,\n        max_overflow=POSTGRES_API_SERVER_READ_ONLY_POOL_OVERFLOW,\n    )\n\n    # Register pool metrics now that engines are created.\n    # HTTP instrumentation is set up earlier in get_application() since it\n    # adds middleware (which Starlette forbids after the app has started).\n    setup_postgres_connection_pool_metrics(\n        engines={\n            \"sync\": SqlEngine.get_engine(),\n            \"async\": get_sqlalchemy_async_engine(),\n            \"readonly\": SqlEngine.get_readonly_engine(),\n        },\n    )\n\n    verify_auth = fetch_versioned_implementation(\n        \"onyx.auth.users\", \"verify_auth_setting\"\n    )\n\n    # Will throw exception if an issue is found\n    verify_auth()\n\n    if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:\n        logger.notice(\"Both OAuth Client ID and Secret are configured.\")\n\n    # Initialize tracing if credentials are provided\n    setup_tracing()\n\n    # fill up Postgres connection pools\n    await warm_up_connections()\n\n    if not MULTI_TENANT:\n        # We cache this at the beginning so there is no delay in the first telemetry\n        CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)\n        get_or_generate_uuid()\n\n        # If we are multi-tenant, we need to only set up initial public tables\n        with get_session_with_current_tenant() as db_session:\n            setup_onyx(db_session, POSTGRES_DEFAULT_SCHEMA)\n            # set up the file store (e.g. create bucket if needed). On multi-tenant,\n            # this is done via IaC\n            get_default_file_store().initialize()\n    else:\n        setup_multitenant_onyx()\n\n    if not MULTI_TENANT:\n        # don't emit a metric for every pod rollover/restart\n        optional_telemetry(\n            record_type=RecordType.VERSION, data={\"version\": __version__}\n        )\n\n    if AUTH_RATE_LIMITING_ENABLED:\n        await setup_auth_limiter()\n\n    if DISABLE_VECTOR_DB:\n        from onyx.background.periodic_poller import recover_stuck_user_files\n        from onyx.background.periodic_poller import start_periodic_poller\n\n        recover_stuck_user_files(POSTGRES_DEFAULT_SCHEMA)\n        start_periodic_poller(POSTGRES_DEFAULT_SCHEMA)\n\n    yield\n\n    if DISABLE_VECTOR_DB:\n        from onyx.background.periodic_poller import stop_periodic_poller\n\n        stop_periodic_poller()\n\n    SqlEngine.reset_engine()\n\n    if AUTH_RATE_LIMITING_ENABLED:\n        await close_auth_limiter()\n\n\ndef log_http_error(request: Request, exc: Exception) -> JSONResponse:\n    status_code = getattr(exc, \"status_code\", 500)\n\n    if isinstance(exc, BasicAuthenticationError):\n        # For BasicAuthenticationError, just log a brief message without stack trace\n        # (almost always spammy)\n        logger.debug(f\"Authentication failed: {str(exc)}\")\n\n    elif status_code == 404 and request.url.path == \"/metrics\":\n        # Log 404 errors for the /metrics endpoint with debug level\n        logger.debug(f\"404 error for /metrics endpoint: {str(exc)}\")\n\n    elif status_code >= 400:\n        error_msg = f\"{str(exc)}\\n\"\n        error_msg += \"\".join(traceback.format_tb(exc.__traceback__))\n        logger.error(error_msg)\n\n    detail = exc.detail if isinstance(exc, HTTPException) else str(exc)\n    return JSONResponse(\n        status_code=status_code,\n        content={\"detail\": detail},\n    )\n\n\ndef get_application(lifespan_override: Lifespan | None = None) -> FastAPI:\n    application = FastAPI(\n        title=\"Onyx Backend\",\n        version=__version__,\n        description=\"Onyx API for AI-powered chat with search, document indexing, agents, actions, and more\",\n        servers=[\n            {\"url\": f\"{WEB_DOMAIN.rstrip('/')}/api\", \"description\": \"Onyx API Server\"}\n        ],\n        lifespan=lifespan_override or lifespan,\n    )\n    if SENTRY_DSN:\n        sentry_sdk.init(\n            dsn=SENTRY_DSN,\n            integrations=[StarletteIntegration(), FastApiIntegration()],\n            traces_sample_rate=0.1,\n            release=__version__,\n        )\n        logger.info(\"Sentry initialized\")\n    else:\n        logger.debug(\"Sentry DSN not provided, skipping Sentry initialization\")\n\n    application.add_exception_handler(status.HTTP_400_BAD_REQUEST, log_http_error)\n    application.add_exception_handler(status.HTTP_401_UNAUTHORIZED, log_http_error)\n    application.add_exception_handler(status.HTTP_403_FORBIDDEN, log_http_error)\n    application.add_exception_handler(status.HTTP_404_NOT_FOUND, log_http_error)\n    application.add_exception_handler(\n        status.HTTP_500_INTERNAL_SERVER_ERROR, log_http_error\n    )\n\n    register_onyx_exception_handlers(application)\n\n    include_router_with_global_prefix_prepended(application, password_router)\n    include_router_with_global_prefix_prepended(application, chat_router)\n    include_router_with_global_prefix_prepended(application, query_router)\n    include_router_with_global_prefix_prepended(application, document_router)\n    include_router_with_global_prefix_prepended(application, user_router)\n    include_router_with_global_prefix_prepended(application, admin_query_router)\n    include_router_with_global_prefix_prepended(application, admin_router)\n    include_router_with_global_prefix_prepended(application, connector_router)\n    include_router_with_global_prefix_prepended(application, credential_router)\n    include_router_with_global_prefix_prepended(application, input_prompt_router)\n    include_router_with_global_prefix_prepended(application, admin_input_prompt_router)\n    include_router_with_global_prefix_prepended(application, cc_pair_router)\n    include_router_with_global_prefix_prepended(application, projects_router)\n    include_router_with_global_prefix_prepended(application, public_build_router)\n    include_router_with_global_prefix_prepended(application, build_router)\n    include_router_with_global_prefix_prepended(application, document_set_router)\n    include_router_with_global_prefix_prepended(application, hierarchy_router)\n    include_router_with_global_prefix_prepended(application, search_settings_router)\n    include_router_with_global_prefix_prepended(\n        application, slack_bot_management_router\n    )\n    include_router_with_global_prefix_prepended(application, discord_bot_router)\n    include_router_with_global_prefix_prepended(application, persona_router)\n    include_router_with_global_prefix_prepended(application, admin_persona_router)\n    include_router_with_global_prefix_prepended(application, agents_router)\n    include_router_with_global_prefix_prepended(application, admin_agents_router)\n    include_router_with_global_prefix_prepended(application, default_assistant_router)\n    include_router_with_global_prefix_prepended(application, notification_router)\n    include_router_with_global_prefix_prepended(application, tool_router)\n    include_router_with_global_prefix_prepended(application, admin_tool_router)\n    include_router_with_global_prefix_prepended(application, oauth_config_router)\n    include_router_with_global_prefix_prepended(application, admin_oauth_config_router)\n    include_router_with_global_prefix_prepended(application, user_oauth_token_router)\n    include_router_with_global_prefix_prepended(application, state_router)\n    include_router_with_global_prefix_prepended(application, onyx_api_router)\n    include_router_with_global_prefix_prepended(application, settings_router)\n    include_router_with_global_prefix_prepended(application, settings_admin_router)\n    include_router_with_global_prefix_prepended(application, llm_admin_router)\n    include_router_with_global_prefix_prepended(application, kg_admin_router)\n    include_router_with_global_prefix_prepended(application, llm_router)\n    include_router_with_global_prefix_prepended(\n        application, code_interpreter_admin_router\n    )\n    include_router_with_global_prefix_prepended(\n        application, image_generation_admin_router\n    )\n    include_router_with_global_prefix_prepended(application, embedding_admin_router)\n    include_router_with_global_prefix_prepended(application, embedding_router)\n    include_router_with_global_prefix_prepended(application, web_search_router)\n    include_router_with_global_prefix_prepended(application, web_search_admin_router)\n    include_router_with_global_prefix_prepended(application, voice_admin_router)\n    include_router_with_global_prefix_prepended(application, voice_router)\n    include_router_with_global_prefix_prepended(application, voice_websocket_router)\n    include_router_with_global_prefix_prepended(\n        application, opensearch_migration_admin_router\n    )\n    include_router_with_global_prefix_prepended(\n        application, token_rate_limit_settings_router\n    )\n    include_router_with_global_prefix_prepended(application, api_key_router)\n    include_router_with_global_prefix_prepended(application, standard_oauth_router)\n    include_router_with_global_prefix_prepended(application, federated_router)\n    include_router_with_global_prefix_prepended(application, mcp_router)\n    include_router_with_global_prefix_prepended(application, mcp_admin_router)\n\n    include_router_with_global_prefix_prepended(application, pat_router)\n\n    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_auth_router(auth_backend),\n            prefix=\"/auth\",\n        )\n\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_register_router(UserRead, UserCreate),\n            prefix=\"/auth\",\n        )\n\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_reset_password_router(),\n            prefix=\"/auth\",\n        )\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_verify_router(UserRead),\n            prefix=\"/auth\",\n        )\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_users_router(UserRead, UserUpdate),\n            prefix=\"/users\",\n        )\n\n    # Register Google OAuth when AUTH_TYPE is GOOGLE_OAUTH, or when\n    # AUTH_TYPE is BASIC and OAuth credentials are configured\n    if AUTH_TYPE == AuthType.GOOGLE_OAUTH or (\n        AUTH_TYPE == AuthType.BASIC and OAUTH_ENABLED\n    ):\n        oauth_client = GoogleOAuth2(\n            OAUTH_CLIENT_ID,\n            OAUTH_CLIENT_SECRET,\n            scopes=[\"openid\", \"email\", \"profile\"],\n        )\n        include_auth_router_with_prefix(\n            application,\n            create_onyx_oauth_router(\n                oauth_client,\n                auth_backend,\n                USER_AUTH_SECRET,\n                associate_by_email=True,\n                is_verified_by_default=True,\n                redirect_url=f\"{WEB_DOMAIN}/auth/oauth/callback\",\n            ),\n            prefix=\"/auth/oauth\",\n        )\n\n        # Need logout router for GOOGLE_OAUTH only (BASIC already has it from above)\n        if AUTH_TYPE == AuthType.GOOGLE_OAUTH:\n            include_auth_router_with_prefix(\n                application,\n                fastapi_users.get_logout_router(auth_backend),\n                prefix=\"/auth\",\n            )\n\n    if AUTH_TYPE == AuthType.OIDC:\n        # Ensure we request offline_access for refresh tokens\n        try:\n            oidc_scopes = list(OIDC_SCOPE_OVERRIDE or BASE_SCOPES)\n            if \"offline_access\" not in oidc_scopes:\n                oidc_scopes.append(\"offline_access\")\n        except Exception as e:\n            logger.warning(f\"Error configuring OIDC scopes: {e}\")\n            # Fall back to default scopes if there's an error\n            oidc_scopes = BASE_SCOPES\n\n        include_auth_router_with_prefix(\n            application,\n            create_onyx_oauth_router(\n                OpenID(\n                    OAUTH_CLIENT_ID,\n                    OAUTH_CLIENT_SECRET,\n                    OPENID_CONFIG_URL,\n                    # Use the configured scopes\n                    base_scopes=oidc_scopes,\n                ),\n                auth_backend,\n                USER_AUTH_SECRET,\n                associate_by_email=True,\n                is_verified_by_default=True,\n                redirect_url=f\"{WEB_DOMAIN}/auth/oidc/callback\",\n                enable_pkce=OIDC_PKCE_ENABLED,\n            ),\n            prefix=\"/auth/oidc\",\n        )\n\n        # need basic auth router for `logout` endpoint\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_auth_router(auth_backend),\n            prefix=\"/auth\",\n        )\n\n    elif AUTH_TYPE == AuthType.SAML:\n        include_auth_router_with_prefix(\n            application,\n            saml_router,\n        )\n\n    if (\n        AUTH_TYPE == AuthType.CLOUD\n        or AUTH_TYPE == AuthType.BASIC\n        or AUTH_TYPE == AuthType.GOOGLE_OAUTH\n        or AUTH_TYPE == AuthType.OIDC\n    ):\n        # Add refresh token endpoint for OAuth as well\n        include_auth_router_with_prefix(\n            application,\n            fastapi_users.get_refresh_router(auth_backend),\n            prefix=\"/auth\",\n        )\n\n    application.add_exception_handler(\n        RequestValidationError, validation_exception_handler\n    )\n\n    application.add_exception_handler(ValueError, value_error_handler)\n\n    application.add_middleware(\n        CORSMiddleware,\n        allow_origins=CORS_ALLOWED_ORIGIN,  # Configurable via environment variable\n        allow_credentials=True,\n        allow_methods=[\"*\"],\n        allow_headers=[\"*\"],\n    )\n    if LOG_ENDPOINT_LATENCY:\n        add_latency_logging_middleware(application, logger)\n\n    add_onyx_request_id_middleware(application, \"API\", logger)\n\n    # Set endpoint context for per-endpoint DB pool attribution metrics.\n    # Must be registered after all routes are added.\n    add_endpoint_context_middleware(application)\n\n    # HTTP request metrics (latency histograms, in-progress gauge, slow request\n    # counter). Must be called here — before the app starts — because the\n    # instrumentator adds middleware via app.add_middleware().\n    setup_prometheus_metrics(application)\n\n    # Ensure all routes have auth enabled or are explicitly marked as public\n    check_router_auth(application)\n\n    use_route_function_names_as_operation_ids(application)\n\n    return application\n\n\n# NOTE: needs to be outside of the `if __name__ == \"__main__\"` block so that the\n# app is exportable\nset_is_ee_based_on_env_variable()\napp = fetch_versioned_implementation(module=\"onyx.main\", attribute=\"get_application\")\n\n\nif __name__ == \"__main__\":\n    logger.notice(\n        f\"Starting Onyx Backend version {__version__} on http://{APP_HOST}:{str(APP_PORT)}/\"\n    )\n\n    if global_version.is_ee_version():\n        logger.notice(\"Running Enterprise Edition\")\n\n    uvicorn.run(app, host=APP_HOST, port=APP_PORT)\n"
  },
  {
    "path": "backend/onyx/mcp_server/README.md",
    "content": "# Onyx MCP Server\n\n## Overview\n\nThe Onyx MCP server allows LLMs to connect to your Onyx instance and access its knowledge base and search capabilities through the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/).\n\nWith the Onyx MCP Server, you can search your knowledgebase,\ngive your LLMs web search, and upload and manage documents in Onyx.\n\nAll access controls are managed within the main Onyx application.\n\n### Authentication\n\nProvide an Onyx Personal Access Token or API Key in the `Authorization` header as a Bearer token.\nThe MCP server quickly validates and passes through the token on every request.\n\nDepending on usage, the MCP Server may support OAuth and stdio in the future.\n\n### Default Configuration\n- **Transport**: HTTP POST (MCP over HTTP)\n- **Port**: 8090 (shares domain with API server)\n- **Framework**: FastMCP with FastAPI wrapper\n- **Database**: None (all work delegates to the API server)\n\n### Architecture\n\nThe MCP server is built on [FastMCP](https://github.com/jlowin/fastmcp) and runs alongside the main Onyx API server:\n\n```\n┌─────────────────┐\n│  LLM Client     │\n│  (Claude, etc)  │\n└────────┬────────┘\n         │ MCP over HTTP\n         │ (POST with bearer)\n         ▼\n┌─────────────────┐\n│  MCP Server     │\n│  Port 8090      │\n│  ├─ Auth        │\n│  ├─ Tools       │\n│  └─ Resources   │\n└────────┬────────┘\n         │ Internal HTTP\n         │ (authenticated)\n         ▼\n┌─────────────────┐\n│  API Server     │\n│  Port 8080      │\n│  ├─ /me (auth)  │\n│  ├─ Search APIs │\n│  └─ ACL checks  │\n└─────────────────┘\n```\n\n## Configuring MCP Clients\n\n### Claude Desktop\n\nAdd to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):\n\n```json\n{\n  \"mcpServers\": {\n    \"onyx\": {\n      \"url\": \"https://[YOUR_ONYX_DOMAIN]:8090/\",\n      \"transport\": \"http\",\n      \"headers\": {\n        \"Authorization\": \"Bearer YOUR_ONYX_TOKEN_HERE\"\n      }\n    }\n  }\n}\n```\n\n### Other MCP Clients\n\nMost MCP clients support HTTP transport with custom headers. Refer to your client's documentation for configuration details.\n\n## Capabilities\n\n### Tools\n\nThe server provides three tools for searching and retrieving information:\n\n1. `search_indexed_documents`\nSearch the user's private knowledge base indexed in Onyx. Returns ranked documents with content snippets, scores, and metadata.\n\n2. `search_web`\nSearch the public internet for current events and general knowledge. Returns web search results with titles, URLs, and snippets.\n\n3. `open_urls`\nRetrieve the complete text content from specific web URLs. Useful for fetching full page content after finding relevant URLs via `search_web`.\n\n### Resources\n\n1. `indexed_sources`\nLists all document sources currently indexed in the tenant (e.g., `\"confluence\"`, `\"github\"`). Use these values to filter results when calling `search_indexed_documents`.\n\n## Local Development\n\n### Running the MCP Server\n\nThe MCP Server automatically launches with the `Run All Onyx Services` task from the default launch.json.\n\nYou can also independently launch the Server via the vscode debugger.\n\n### Testing with MCP Inspector\n\nThe [MCP Inspector](https://github.com/modelcontextprotocol/inspector) is a debugging tool for MCP servers:\n\n```bash\nnpx @modelcontextprotocol/inspector http://localhost:8090/\n```\n\n**Setup in Inspector:**\n\n1. Ignore the OAuth configuration menus\n2. Open the **Authentication** tab\n3. Select **Bearer Token** authentication\n4. Paste your Onyx bearer token\n5. Click **Connect**\n\nOnce connected, you can:\n- Browse available tools\n- Test tool calls with different parameters\n- View request/response payloads\n- Debug authentication issues\n\n### Health Check\n\nVerify the server is running:\n\n```bash\ncurl http://localhost:8090/health\n```\n\nExpected response:\n```json\n{\n  \"status\": \"healthy\",\n  \"service\": \"mcp_server\"\n}\n```\n\n### Environment Variables\n\n**MCP Server Configuration:**\n- `MCP_SERVER_ENABLED`: Enable MCP server (set to \"true\" to enable, default: disabled)\n- `MCP_SERVER_PORT`: Port for MCP server (default: 8090)\n- `MCP_SERVER_CORS_ORIGINS`: Comma-separated CORS origins (optional)\n\n**API Server Connection:**\n- `API_SERVER_PROTOCOL`: Protocol for API server connection (default: \"http\")\n- `API_SERVER_HOST`: Hostname for API server connection (default: \"127.0.0.1\")\n- `API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS`: Optional override URL. If set, takes precedence over the protocol/host variables. Used for self-hosting the MCP server with Onyx Cloud as the backend."
  },
  {
    "path": "backend/onyx/mcp_server/api.py",
    "content": "\"\"\"MCP server with FastAPI wrapper.\"\"\"\n\nfrom collections.abc import AsyncGenerator\nfrom contextlib import asynccontextmanager\n\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.responses import JSONResponse\nfrom fastapi.responses import Response\nfrom fastmcp import FastMCP\nfrom starlette.datastructures import MutableHeaders\nfrom starlette.middleware.base import RequestResponseEndpoint\nfrom starlette.types import Receive\nfrom starlette.types import Scope\nfrom starlette.types import Send\n\nfrom onyx.configs.app_configs import MCP_SERVER_CORS_ORIGINS\nfrom onyx.mcp_server.auth import OnyxTokenVerifier\nfrom onyx.mcp_server.utils import shutdown_http_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nlogger.info(\"Creating Onyx MCP Server...\")\n\nmcp_server = FastMCP(\n    name=\"Onyx MCP Server\",\n    version=\"1.0.0\",\n    auth=OnyxTokenVerifier(),\n)\n\n# Import tools and resources AFTER mcp_server is created to avoid circular imports\n# Components register themselves via decorators on the shared mcp_server instance\nfrom onyx.mcp_server.tools import search  # noqa: E402, F401\nfrom onyx.mcp_server.resources import indexed_sources  # noqa: E402, F401\n\nlogger.info(\"MCP server instance created\")\n\n\ndef create_mcp_fastapi_app() -> FastAPI:\n    \"\"\"Create FastAPI app wrapping MCP server with auth and shared client lifecycle.\"\"\"\n    mcp_asgi_app = mcp_server.http_app(path=\"/\")\n\n    async def _ensure_streamable_accept_header(\n        scope: Scope, receive: Receive, send: Send\n    ) -> None:\n        \"\"\"Ensure Accept header includes types required by FastMCP streamable HTTP.\"\"\"\n        if scope.get(\"type\") == \"http\":\n            headers = MutableHeaders(scope=scope)\n            accept = headers.get(\"accept\", \"\")\n            accept_lower = accept.lower()\n\n            if (\n                not accept\n                or accept == \"*/*\"\n                or \"application/json\" not in accept_lower\n                or \"text/event-stream\" not in accept_lower\n            ):\n                headers[\"accept\"] = \"application/json, text/event-stream\"\n\n        await mcp_asgi_app(scope, receive, send)\n\n    @asynccontextmanager\n    async def combined_lifespan(app: FastAPI) -> AsyncGenerator[None, None]:\n        \"\"\"Initializes MCP session manager.\"\"\"\n        logger.info(\"MCP server starting up\")\n\n        try:\n            async with mcp_asgi_app.lifespan(app):\n                yield\n        finally:\n            logger.info(\"MCP server shutting down\")\n            await shutdown_http_client()\n\n    app = FastAPI(\n        title=\"Onyx MCP Server\",\n        description=\"HTTP POST transport with bearer auth delegated to API /me\",\n        version=\"1.0.0\",\n        lifespan=combined_lifespan,\n    )\n\n    # Public health check endpoint (bypasses MCP auth)\n    @app.middleware(\"http\")\n    async def health_check(\n        request: Request, call_next: RequestResponseEndpoint\n    ) -> Response:\n        if request.url.path.rstrip(\"/\") == \"/health\":\n            return JSONResponse({\"status\": \"healthy\", \"service\": \"mcp_server\"})\n        return await call_next(request)\n\n    # Authentication is handled by FastMCP's OnyxTokenVerifier (see auth.py)\n\n    if MCP_SERVER_CORS_ORIGINS:\n        logger.info(f\"CORS origins: {MCP_SERVER_CORS_ORIGINS}\")\n        app.add_middleware(\n            CORSMiddleware,\n            allow_origins=MCP_SERVER_CORS_ORIGINS,\n            allow_credentials=True,\n            allow_methods=[\"*\"],\n            allow_headers=[\"*\"],\n        )\n\n    app.mount(\"/\", _ensure_streamable_accept_header)\n\n    return app\n\n\nmcp_app = create_mcp_fastapi_app()\n"
  },
  {
    "path": "backend/onyx/mcp_server/auth.py",
    "content": "\"\"\"Authentication helpers for the Onyx MCP server.\"\"\"\n\nfrom typing import Optional\n\nfrom fastmcp.server.auth.auth import AccessToken\nfrom fastmcp.server.auth.auth import TokenVerifier\n\nfrom onyx.mcp_server.utils import get_http_client\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import build_api_server_url_for_http_requests\n\nlogger = setup_logger()\n\n\nclass OnyxTokenVerifier(TokenVerifier):\n    \"\"\"Validates bearer tokens by delegating to the API server.\"\"\"\n\n    async def verify_token(self, token: str) -> Optional[AccessToken]:\n        \"\"\"Call API /me to verify the token, return minimal AccessToken on success.\"\"\"\n        try:\n            response = await get_http_client().get(\n                f\"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/me\",\n                headers={\"Authorization\": f\"Bearer {token}\"},\n            )\n        except Exception as exc:\n            logger.error(\n                \"MCP server failed to reach API /me for authentication: %s\",\n                exc,\n                exc_info=True,\n            )\n            return None\n\n        if response.status_code != 200:\n            logger.warning(\n                \"API server rejected MCP auth token with status %s\",\n                response.status_code,\n            )\n            return None\n\n        return AccessToken(\n            token=token,\n            client_id=\"mcp\",\n            scopes=[\"mcp:use\"],\n            expires_at=None,\n            resource=None,\n            claims={},\n        )\n"
  },
  {
    "path": "backend/onyx/mcp_server/mcp.json.template",
    "content": "{\n    \"mcpServers\": {\n      \"Onyx\": {\n        \"url\": \"https://cloud.onyx.app/mcp\",\n        \"headers\": {\n          \"Authorization\": \"Bearer [YOUR PAT OR API KEY HERE]\"\n        }\n      }\n    }\n  }"
  },
  {
    "path": "backend/onyx/mcp_server/resources/__init__.py",
    "content": "\"\"\"Resource registrations for the Onyx MCP server.\"\"\"\n\n# Import resource modules so decorators execute when the package loads.\nfrom onyx.mcp_server.resources import indexed_sources  # noqa: F401\n"
  },
  {
    "path": "backend/onyx/mcp_server/resources/indexed_sources.py",
    "content": "\"\"\"Resources that expose metadata for the Onyx MCP server.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom onyx.mcp_server.api import mcp_server\nfrom onyx.mcp_server.utils import get_indexed_sources\nfrom onyx.mcp_server.utils import require_access_token\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@mcp_server.resource(\n    \"resource://indexed_sources\",\n    name=\"indexed_sources\",\n    description=(\n        \"Enumerate the user's document sources that are currently indexed in Onyx.\"\n        \"This can be used to discover filters for the `search_indexed_documents` tool.\"\n    ),\n    mime_type=\"application/json\",\n)\nasync def indexed_sources_resource() -> dict[str, Any]:\n    \"\"\"Return the list of indexed source types for search filtering.\"\"\"\n\n    access_token = require_access_token()\n\n    sources = await get_indexed_sources(access_token)\n\n    logger.info(\n        \"Onyx MCP Server: indexed_sources resource returning %s entries\",\n        len(sources),\n    )\n\n    return {\n        \"indexed_sources\": sorted(sources),\n    }\n"
  },
  {
    "path": "backend/onyx/mcp_server/tools/__init__.py",
    "content": "\"\"\"Tool registrations for the Onyx MCP server.\"\"\"\n\n# Import tool modules so decorators execute when the package is imported.\nfrom onyx.mcp_server.tools import search  # noqa: F401\n"
  },
  {
    "path": "backend/onyx/mcp_server/tools/search.py",
    "content": "\"\"\"Search tools for MCP server - document and web search.\"\"\"\n\nfrom datetime import datetime\nfrom typing import Any\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.mcp_server.api import mcp_server\nfrom onyx.mcp_server.utils import get_http_client\nfrom onyx.mcp_server.utils import get_indexed_sources\nfrom onyx.mcp_server.utils import require_access_token\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import build_api_server_url_for_http_requests\nfrom onyx.utils.variable_functionality import global_version\n\nlogger = setup_logger()\n\n\n@mcp_server.tool()\nasync def search_indexed_documents(\n    query: str,\n    source_types: list[str] | None = None,\n    time_cutoff: str | None = None,\n    limit: int = 10,\n) -> dict[str, Any]:\n    \"\"\"\n    Search the user's knowledge base indexed in Onyx.\n    Use this tool for information that is not public knowledge and specific to the user,\n    their team, their work, or their organization/company.\n\n    Note: In CE mode, this tool uses the chat endpoint internally which invokes an LLM\n    on every call, consuming tokens and adding latency.\n    Additionally, CE callers receive a truncated snippet (blurb) instead of a full document chunk,\n    but this should still be sufficient for most use cases. CE mode functionality should be swapped\n    when a dedicated CE search endpoint is implemented.\n\n    In EE mode, the dedicated search endpoint is used instead.\n\n    To find a list of available sources, use the `indexed_sources` resource.\n    Returns chunks of text as search results with snippets, scores, and metadata.\n\n    Example usage:\n    ```\n    {\n        \"query\": \"What is the latest status of PROJ-1234 and what is the next development item?\",\n        \"source_types\": [\"jira\", \"google_drive\", \"github\"],\n        \"time_cutoff\": \"2025-11-24T00:00:00Z\",\n        \"limit\": 10,\n    }\n    ```\n    \"\"\"\n    logger.info(\n        f\"Onyx MCP Server: document search: query='{query}', sources={source_types}, limit={limit}\"\n    )\n\n    # Parse time_cutoff string to datetime if provided\n    time_cutoff_dt: datetime | None = None\n    if time_cutoff:\n        try:\n            time_cutoff_dt = datetime.fromisoformat(time_cutoff.replace(\"Z\", \"+00:00\"))\n        except ValueError as e:\n            logger.warning(\n                f\"Onyx MCP Server: Invalid time_cutoff format '{time_cutoff}': {e}. Continuing without time filter.\"\n            )\n            # Continue with no time_cutoff instead of returning an error\n            time_cutoff_dt = None\n\n    # Initialize source_type_enums early to avoid UnboundLocalError\n    source_type_enums: list[DocumentSource] | None = None\n\n    # Get authenticated user from FastMCP's access token\n    access_token = require_access_token()\n\n    try:\n        sources = await get_indexed_sources(access_token)\n    except Exception as e:\n        # Error fetching sources (network error, API failure, etc.)\n        logger.error(\n            \"Onyx MCP Server: Error checking indexed sources: %s\",\n            e,\n            exc_info=True,\n        )\n        return {\n            \"documents\": [],\n            \"total_results\": 0,\n            \"query\": query,\n            \"error\": (f\"Failed to check indexed sources: {str(e)}. \"),\n        }\n\n    if not sources:\n        logger.info(\"Onyx MCP Server: No indexed sources available for tenant\")\n        return {\n            \"documents\": [],\n            \"total_results\": 0,\n            \"query\": query,\n            \"message\": (\n                \"No document sources are indexed yet. Add connectors or upload data \"\n                \"through Onyx before calling onyx_search_documents.\"\n            ),\n        }\n\n    # Convert source_types strings to DocumentSource enums if provided\n    # Invalid values will be handled by the API server\n    if source_types is not None:\n        source_type_enums = []\n        for src in source_types:\n            try:\n                source_type_enums.append(DocumentSource(src.lower()))\n            except ValueError:\n                logger.warning(\n                    f\"Onyx MCP Server: Invalid source type '{src}' - will be ignored by server\"\n                )\n\n    # Build filters dict only with non-None values\n    filters: dict[str, Any] | None = None\n    if source_type_enums or time_cutoff_dt:\n        filters = {}\n        if source_type_enums:\n            filters[\"source_type\"] = [src.value for src in source_type_enums]\n        if time_cutoff_dt:\n            filters[\"time_cutoff\"] = time_cutoff_dt.isoformat()\n\n    is_ee = global_version.is_ee_version()\n    base_url = build_api_server_url_for_http_requests(respect_env_override_if_set=True)\n    auth_headers = {\"Authorization\": f\"Bearer {access_token.token}\"}\n\n    search_request: dict[str, Any]\n    if is_ee:\n        # EE: use the dedicated search endpoint (no LLM invocation)\n        search_request = {\n            \"search_query\": query,\n            \"filters\": filters,\n            \"num_docs_fed_to_llm_selection\": limit,\n            \"run_query_expansion\": False,\n            \"include_content\": True,\n            \"stream\": False,\n        }\n        endpoint = f\"{base_url}/search/send-search-message\"\n        error_key = \"error\"\n        docs_key = \"search_docs\"\n        content_field = \"content\"\n    else:\n        # CE: fall back to the chat endpoint (invokes LLM, consumes tokens)\n        search_request = {\n            \"message\": query,\n            \"stream\": False,\n            \"chat_session_info\": {},\n        }\n        if filters:\n            search_request[\"internal_search_filters\"] = filters\n        endpoint = f\"{base_url}/chat/send-chat-message\"\n        error_key = \"error_msg\"\n        docs_key = \"top_documents\"\n        content_field = \"blurb\"\n\n    try:\n        response = await get_http_client().post(\n            endpoint,\n            json=search_request,\n            headers=auth_headers,\n        )\n        response.raise_for_status()\n        result = response.json()\n\n        # Check for error in response\n        if result.get(error_key):\n            return {\n                \"documents\": [],\n                \"total_results\": 0,\n                \"query\": query,\n                \"error\": result.get(error_key),\n            }\n\n        documents = [\n            {\n                \"semantic_identifier\": doc.get(\"semantic_identifier\"),\n                \"content\": doc.get(content_field),\n                \"source_type\": doc.get(\"source_type\"),\n                \"link\": doc.get(\"link\"),\n                \"score\": doc.get(\"score\"),\n            }\n            for doc in result.get(docs_key, [])\n        ]\n\n        # NOTE: search depth is controlled by the backend persona defaults, not `limit`.\n        # `limit` only caps the returned list; fewer results may be returned if the\n        # backend retrieves fewer documents than requested.\n        documents = documents[:limit]\n\n        logger.info(\n            f\"Onyx MCP Server: Internal search returned {len(documents)} results\"\n        )\n        return {\n            \"documents\": documents,\n            \"total_results\": len(documents),\n            \"query\": query,\n        }\n    except Exception as e:\n        logger.error(f\"Onyx MCP Server: Document search error: {e}\", exc_info=True)\n        return {\n            \"error\": f\"Document search failed: {str(e)}\",\n            \"documents\": [],\n            \"query\": query,\n        }\n\n\n@mcp_server.tool()\nasync def search_web(\n    query: str,\n    limit: int = 5,\n) -> dict[str, Any]:\n    \"\"\"\n    Search the public internet for general knowledge, current events, and publicly available information.\n    Use this tool for information that is publicly available on the web,\n    such as news, documentation, general facts, or when the user's private knowledge base doesn't contain relevant information.\n\n    Returns web search results with titles, URLs, and snippets (NOT full content). Use `open_urls` to fetch full page content.\n\n    Example usage:\n    ```\n    {\n        \"query\": \"React 19 migration guide to use react compiler\",\n        \"limit\": 5\n    }\n    ```\n    \"\"\"\n    logger.info(f\"Onyx MCP Server: Web search: query='{query}', limit={limit}\")\n\n    access_token = require_access_token()\n\n    try:\n        request_payload = {\"queries\": [query], \"max_results\": limit}\n        response = await get_http_client().post(\n            f\"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/search-lite\",\n            json=request_payload,\n            headers={\"Authorization\": f\"Bearer {access_token.token}\"},\n        )\n        response.raise_for_status()\n        response_payload = response.json()\n        results = response_payload.get(\"results\", [])\n        return {\n            \"results\": results,\n            \"query\": query,\n        }\n    except Exception as e:\n        logger.error(f\"Onyx MCP Server: Web search error: {e}\", exc_info=True)\n        return {\n            \"error\": f\"Web search failed: {str(e)}\",\n            \"results\": [],\n            \"query\": query,\n        }\n\n\n@mcp_server.tool()\nasync def open_urls(\n    urls: list[str],\n) -> dict[str, Any]:\n    \"\"\"\n    Retrieve the complete text content from specific web URLs.\n    Use this tool when you need to access full content from known URLs,\n    such as documentation pages or articles returned by the `search_web` tool.\n\n    Useful for following up on web search results when snippets do not provide enough information.\n\n    Returns the full text content of each URL along with metadata like title and content type.\n\n    Example usage:\n    ```\n    {\n        \"urls\": [\"https://react.dev/versions\", \"https://react.dev/learn/react-compiler\",\"https://react.dev/learn/react-compiler/introduction\"]\n    }\n    ```\n    \"\"\"\n    logger.info(f\"Onyx MCP Server: Open URL: fetching {len(urls)} URLs\")\n\n    access_token = require_access_token()\n\n    try:\n        response = await get_http_client().post(\n            f\"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/open-urls\",\n            json={\"urls\": urls},\n            headers={\"Authorization\": f\"Bearer {access_token.token}\"},\n        )\n        response.raise_for_status()\n        response_payload = response.json()\n        results = response_payload.get(\"results\", [])\n        return {\n            \"results\": results,\n        }\n    except Exception as e:\n        logger.error(f\"Onyx MCP Server: URL fetch error: {e}\", exc_info=True)\n        return {\n            \"error\": f\"URL fetch failed: {str(e)}\",\n            \"results\": [],\n        }\n"
  },
  {
    "path": "backend/onyx/mcp_server/utils.py",
    "content": "\"\"\"Utility helpers for the Onyx MCP server.\"\"\"\n\nfrom __future__ import annotations\n\nimport httpx\nfrom fastmcp.server.auth.auth import AccessToken\nfrom fastmcp.server.dependencies import get_access_token\n\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import build_api_server_url_for_http_requests\n\nlogger = setup_logger()\n\n# Shared HTTP client reused across requests\n_http_client: httpx.AsyncClient | None = None\n\n\ndef require_access_token() -> AccessToken:\n    \"\"\"\n    Get and validate the access token from the current request.\n\n    Raises:\n        ValueError: If no access token is present in the request.\n\n    Returns:\n        AccessToken: The validated access token.\n    \"\"\"\n    access_token = get_access_token()\n    if not access_token:\n        raise ValueError(\n            \"MCP Server requires an Onyx access token to authenticate your request\"\n        )\n    return access_token\n\n\ndef get_http_client() -> httpx.AsyncClient:\n    \"\"\"Return a shared async HTTP client.\"\"\"\n    global _http_client\n    if _http_client is None:\n        _http_client = httpx.AsyncClient(timeout=60.0)\n    return _http_client\n\n\nasync def shutdown_http_client() -> None:\n    \"\"\"Close the shared HTTP client when the server shuts down.\"\"\"\n    global _http_client\n    if _http_client is not None:\n        await _http_client.aclose()\n        _http_client = None\n\n\nasync def get_indexed_sources(\n    access_token: AccessToken,\n) -> list[str]:\n    \"\"\"\n    Fetch indexed document sources for the current user/tenant.\n\n    Returns:\n        List of indexed source strings. Empty list if no sources are indexed.\n    \"\"\"\n    headers = {\"Authorization\": f\"Bearer {access_token.token}\"}\n    try:\n        response = await get_http_client().get(\n            f\"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/manage/indexed-sources\",\n            headers=headers,\n        )\n        response.raise_for_status()\n        payload = response.json()\n        sources = payload.get(\"sources\", [])\n        if not isinstance(sources, list):\n            raise ValueError(\"Unexpected response shape for indexed sources\")\n        return [str(source) for source in sources]\n    except (httpx.HTTPStatusError, httpx.RequestError, ValueError):\n        # Re-raise known exception types (httpx errors and validation errors)\n        logger.error(\n            \"Onyx MCP Server: Failed to fetch indexed sources\",\n            exc_info=True,\n        )\n        raise\n    except Exception as exc:\n        # Wrap unexpected exceptions\n        logger.error(\n            \"Onyx MCP Server: Unexpected error fetching indexed sources\",\n            exc_info=True,\n        )\n        raise RuntimeError(f\"Failed to fetch indexed sources: {exc}\") from exc\n"
  },
  {
    "path": "backend/onyx/mcp_server_main.py",
    "content": "\"\"\"Entry point for MCP server - HTTP POST transport with API key auth.\"\"\"\n\nimport uvicorn\n\nfrom onyx.configs.app_configs import MCP_SERVER_ENABLED\nfrom onyx.configs.app_configs import MCP_SERVER_HOST\nfrom onyx.configs.app_configs import MCP_SERVER_PORT\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef main() -> None:\n    \"\"\"Run the MCP server.\"\"\"\n    if not MCP_SERVER_ENABLED:\n        logger.info(\"MCP server is disabled (MCP_SERVER_ENABLED=false)\")\n        return\n\n    logger.info(f\"Starting MCP server on {MCP_SERVER_HOST}:{MCP_SERVER_PORT}\")\n\n    from onyx.mcp_server.api import mcp_app\n\n    uvicorn.run(\n        mcp_app,\n        host=MCP_SERVER_HOST,\n        port=MCP_SERVER_PORT,\n        log_config=None,\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/natural_language_processing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/natural_language_processing/constants.py",
    "content": "\"\"\"\nConstants for natural language processing, including embedding and reranking models.\n\nThis file contains constants moved from model_server to support the gradual migration\nof API-based calls to bypass the model server.\n\"\"\"\n\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\n\n\n# Default model names for different providers\nDEFAULT_OPENAI_MODEL = \"text-embedding-3-small\"\nDEFAULT_COHERE_MODEL = \"embed-english-light-v3.0\"\nDEFAULT_VOYAGE_MODEL = \"voyage-large-2-instruct\"\nDEFAULT_VERTEX_MODEL = \"text-embedding-005\"\n\n\nclass EmbeddingModelTextType:\n    \"\"\"Mapping of Onyx text types to provider-specific text types.\"\"\"\n\n    PROVIDER_TEXT_TYPE_MAP = {\n        EmbeddingProvider.COHERE: {\n            EmbedTextType.QUERY: \"search_query\",\n            EmbedTextType.PASSAGE: \"search_document\",\n        },\n        EmbeddingProvider.VOYAGE: {\n            EmbedTextType.QUERY: \"query\",\n            EmbedTextType.PASSAGE: \"document\",\n        },\n        EmbeddingProvider.GOOGLE: {\n            EmbedTextType.QUERY: \"RETRIEVAL_QUERY\",\n            EmbedTextType.PASSAGE: \"RETRIEVAL_DOCUMENT\",\n        },\n    }\n\n    @staticmethod\n    def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str:\n        \"\"\"Get provider-specific text type string.\"\"\"\n        return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type]\n"
  },
  {
    "path": "backend/onyx/natural_language_processing/english_stopwords.py",
    "content": "import re\n\nENGLISH_STOPWORDS = [\n    \"a\",\n    \"about\",\n    \"above\",\n    \"after\",\n    \"again\",\n    \"against\",\n    \"ain\",\n    \"all\",\n    \"am\",\n    \"an\",\n    \"and\",\n    \"any\",\n    \"are\",\n    \"aren\",\n    \"aren't\",\n    \"as\",\n    \"at\",\n    \"be\",\n    \"because\",\n    \"been\",\n    \"before\",\n    \"being\",\n    \"below\",\n    \"between\",\n    \"both\",\n    \"but\",\n    \"by\",\n    \"can\",\n    \"couldn\",\n    \"couldn't\",\n    \"d\",\n    \"did\",\n    \"didn\",\n    \"didn't\",\n    \"do\",\n    \"does\",\n    \"doesn\",\n    \"doesn't\",\n    \"doing\",\n    \"don\",\n    \"don't\",\n    \"down\",\n    \"during\",\n    \"each\",\n    \"few\",\n    \"for\",\n    \"from\",\n    \"further\",\n    \"had\",\n    \"hadn\",\n    \"hadn't\",\n    \"has\",\n    \"hasn\",\n    \"hasn't\",\n    \"have\",\n    \"haven\",\n    \"haven't\",\n    \"having\",\n    \"he\",\n    \"he'd\",\n    \"he'll\",\n    \"he's\",\n    \"her\",\n    \"here\",\n    \"hers\",\n    \"herself\",\n    \"him\",\n    \"himself\",\n    \"his\",\n    \"how\",\n    \"i\",\n    \"i'd\",\n    \"i'll\",\n    \"i'm\",\n    \"i've\",\n    \"if\",\n    \"in\",\n    \"into\",\n    \"is\",\n    \"isn\",\n    \"isn't\",\n    \"it\",\n    \"it'd\",\n    \"it'll\",\n    \"it's\",\n    \"its\",\n    \"itself\",\n    \"just\",\n    \"ll\",\n    \"m\",\n    \"ma\",\n    \"me\",\n    \"mightn\",\n    \"mightn't\",\n    \"more\",\n    \"most\",\n    \"mustn\",\n    \"mustn't\",\n    \"my\",\n    \"myself\",\n    \"needn\",\n    \"needn't\",\n    \"no\",\n    \"nor\",\n    \"not\",\n    \"now\",\n    \"o\",\n    \"of\",\n    \"off\",\n    \"on\",\n    \"once\",\n    \"only\",\n    \"or\",\n    \"other\",\n    \"our\",\n    \"ours\",\n    \"ourselves\",\n    \"out\",\n    \"over\",\n    \"own\",\n    \"re\",\n    \"s\",\n    \"same\",\n    \"shan\",\n    \"shan't\",\n    \"she\",\n    \"she'd\",\n    \"she'll\",\n    \"she's\",\n    \"should\",\n    \"should've\",\n    \"shouldn\",\n    \"shouldn't\",\n    \"so\",\n    \"some\",\n    \"such\",\n    \"t\",\n    \"than\",\n    \"that\",\n    \"that'll\",\n    \"the\",\n    \"their\",\n    \"theirs\",\n    \"them\",\n    \"themselves\",\n    \"then\",\n    \"there\",\n    \"these\",\n    \"they\",\n    \"they'd\",\n    \"they'll\",\n    \"they're\",\n    \"they've\",\n    \"this\",\n    \"those\",\n    \"through\",\n    \"to\",\n    \"too\",\n    \"under\",\n    \"until\",\n    \"up\",\n    \"ve\",\n    \"very\",\n    \"was\",\n    \"wasn\",\n    \"wasn't\",\n    \"we\",\n    \"we'd\",\n    \"we'll\",\n    \"we're\",\n    \"we've\",\n    \"were\",\n    \"weren\",\n    \"weren't\",\n    \"what\",\n    \"when\",\n    \"where\",\n    \"which\",\n    \"while\",\n    \"who\",\n    \"whom\",\n    \"why\",\n    \"will\",\n    \"with\",\n    \"won\",\n    \"won't\",\n    \"wouldn\",\n    \"wouldn't\",\n    \"y\",\n    \"you\",\n    \"you'd\",\n    \"you'll\",\n    \"you're\",\n    \"you've\",\n    \"your\",\n    \"yours\",\n    \"yourself\",\n    \"yourselves\",\n]\n\nENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)\n\n\ndef strip_stopwords(text: str) -> list[str]:\n    \"\"\"Remove English stopwords from text.\n\n    Matching is case-insensitive and ignores leading/trailing punctuation\n    on each word. Internal punctuation (like apostrophes in contractions)\n    is preserved for matching, so \"you're\" matches the stopword \"you're\"\n    but \"youre\" would not.\n    \"\"\"\n    words = text.split()\n    result = []\n\n    for word in words:\n        # Strip leading/trailing punctuation to get the core word for comparison\n        # This preserves internal punctuation like apostrophes\n        core = re.sub(r\"^[^\\w']+|[^\\w']+$\", \"\", word)\n        if core.lower() not in ENGLISH_STOPWORDS_SET:\n            result.append(word)\n\n    return result\n"
  },
  {
    "path": "backend/onyx/natural_language_processing/exceptions.py",
    "content": "class ModelServerRateLimitError(Exception):\n    \"\"\"\n    Exception raised for rate limiting errors from the model server.\n    \"\"\"\n\n\nclass CohereBillingLimitError(Exception):\n    \"\"\"\n    Raised when Cohere rejects requests because the billing cap is reached.\n    \"\"\"\n"
  },
  {
    "path": "backend/onyx/natural_language_processing/search_nlp_models.py",
    "content": "import asyncio\nimport json\nimport os\nimport threading\nimport time\nfrom collections.abc import Callable\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom functools import partial\nfrom functools import wraps\nfrom types import TracebackType\nfrom typing import Any\nfrom typing import cast\n\nimport aioboto3  # type: ignore\nimport httpx\nimport requests\nimport voyageai  # type: ignore[import-untyped]\nfrom cohere import AsyncClient as CohereAsyncClient\nfrom cohere.core.api_error import ApiError\nfrom google.oauth2 import service_account\nfrom httpx import HTTPError\nfrom requests import JSONDecodeError\nfrom requests import RequestException\nfrom requests import Response\nfrom retry import retry\n\nfrom onyx.configs.app_configs import INDEXING_EMBEDDING_MODEL_NUM_THREADS\nfrom onyx.configs.app_configs import LARGE_CHUNK_RATIO\nfrom onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS\nfrom onyx.configs.model_configs import (\n    BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,\n)\nfrom onyx.connectors.models import ConnectorStopSignal\nfrom onyx.db.models import SearchSettings\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.natural_language_processing.constants import DEFAULT_COHERE_MODEL\nfrom onyx.natural_language_processing.constants import DEFAULT_OPENAI_MODEL\nfrom onyx.natural_language_processing.constants import DEFAULT_VERTEX_MODEL\nfrom onyx.natural_language_processing.constants import DEFAULT_VOYAGE_MODEL\nfrom onyx.natural_language_processing.constants import EmbeddingModelTextType\nfrom onyx.natural_language_processing.exceptions import CohereBillingLimitError\nfrom onyx.natural_language_processing.exceptions import ModelServerRateLimitError\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.natural_language_processing.utils import tokenizer_trim_content\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.search_nlp_models_utils import pass_aws_key\nfrom onyx.utils.text_processing import remove_invalid_unicode_chars\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\nfrom shared_configs.configs import INDEXING_ONLY\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT\nfrom shared_configs.configs import SKIP_WARM_UP\nfrom shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.enums import RerankerProvider\nfrom shared_configs.model_server_models import Embedding\nfrom shared_configs.model_server_models import EmbedRequest\nfrom shared_configs.model_server_models import EmbedResponse\nfrom shared_configs.model_server_models import IntentRequest\nfrom shared_configs.model_server_models import IntentResponse\nfrom shared_configs.model_server_models import RerankRequest\nfrom shared_configs.model_server_models import RerankResponse\nfrom shared_configs.utils import batch_list\n\nlogger = setup_logger()\n\n# If we are not only indexing, dont want retry very long\n_RETRY_DELAY = 10 if INDEXING_ONLY else 0.1\n_RETRY_TRIES = 10 if INDEXING_ONLY else 2\n\n# OpenAI only allows 2048 embeddings to be computed at once\n_OPENAI_MAX_INPUT_LEN = 2048\n# Cohere allows up to 96 embeddings in a single embedding calling\n_COHERE_MAX_INPUT_LEN = 96\n\n# Authentication error string constants\n_AUTH_ERROR_401 = \"401\"\n_AUTH_ERROR_UNAUTHORIZED = \"unauthorized\"\n_AUTH_ERROR_INVALID_API_KEY = \"invalid api key\"\n_AUTH_ERROR_PERMISSION = \"permission\"\n\n# Thread-local storage for event loops\n# This prevents creating thousands of event loops during batch processing,\n# which was causing severe memory leaks with API-based embedding providers\n_thread_local = threading.local()\n\n\ndef _get_or_create_event_loop() -> asyncio.AbstractEventLoop:\n    \"\"\"Get or create a thread-local event loop for API embedding calls.\n\n    This prevents creating a new event loop for every batch during embedding,\n    which was causing memory leaks. Instead, each thread reuses the same loop.\n\n    Returns:\n        asyncio.AbstractEventLoop: The thread-local event loop\n    \"\"\"\n    if (\n        not hasattr(_thread_local, \"loop\")\n        or _thread_local.loop is None\n        or _thread_local.loop.is_closed()\n    ):\n        _thread_local.loop = asyncio.new_event_loop()\n        asyncio.set_event_loop(_thread_local.loop)\n    return _thread_local.loop\n\n\ndef cleanup_embedding_thread_locals() -> None:\n    \"\"\"Clean up thread-local event loops to prevent memory leaks.\n\n    This should be called after each task completes to ensure that\n    event loops and their associated resources are properly released.\n    Thread-local storage persists across Celery tasks when using the\n    thread pool, so explicit cleanup is necessary.\n\n    NOTE: This must be called from the SAME thread that created the event loop.\n    For ThreadPoolExecutor-based embedding, this cleanup happens automatically\n    via the _cleanup_thread_local wrapper.\n    \"\"\"\n    if hasattr(_thread_local, \"loop\") and _thread_local.loop is not None:\n        loop = _thread_local.loop\n        if not loop.is_closed():\n            # Cancel all pending tasks in the event loop\n            try:\n                # Ensure loop is set as current event loop before accessing tasks\n                asyncio.set_event_loop(loop)\n                pending = asyncio.all_tasks(loop)\n                if pending:\n                    logger.debug(\n                        f\"Cleaning up event loop with {len(pending)} pending tasks in thread {threading.current_thread().name}\"\n                    )\n                    for task in pending:\n                        task.cancel()\n                    # Run the loop briefly to allow cancelled tasks to complete\n                    loop.run_until_complete(\n                        asyncio.gather(*pending, return_exceptions=True)\n                    )\n            except Exception as e:\n                # If gathering tasks fails, just close the loop\n                logger.debug(f\"Error gathering tasks during cleanup: {e}\")\n\n            # Close the event loop\n            loop.close()\n            logger.debug(\n                f\"Closed event loop in thread {threading.current_thread().name}\"\n            )\n\n        # Clear the thread-local reference\n        _thread_local.loop = None\n\n\ndef _cleanup_thread_local(func: Callable) -> Callable:\n    \"\"\"Decorator to ensure thread-local cleanup after function execution.\n\n    This wraps functions that run in ThreadPoolExecutor threads to ensure\n    that thread-local event loops are cleaned up after each execution,\n    preventing memory leaks from persistent thread-local storage.\n    \"\"\"\n\n    @wraps(func)\n    def wrapper(*args: Any, **kwargs: Any) -> Any:\n        try:\n            return func(*args, **kwargs)\n        finally:\n            # Clean up thread-local event loop after this thread's work is done\n            cleanup_embedding_thread_locals()\n\n    return wrapper\n\n\nWARM_UP_STRINGS = [\n    \"Onyx is amazing!\",\n    \"Check out our easy deployment guide at\",\n    \"https://docs.onyx.app/deployment/getting_started/quickstart\",\n]\n\n\ndef clean_model_name(model_str: str) -> str:\n    return model_str.replace(\"/\", \"_\").replace(\"-\", \"_\").replace(\".\", \"_\")\n\n\ndef build_model_server_url(\n    model_server_host: str,\n    model_server_port: int,\n) -> str:\n    model_server_url = f\"{model_server_host}:{model_server_port}\"\n\n    # use protocol if provided\n    if \"http\" in model_server_url:\n        return model_server_url\n\n    # otherwise default to http\n    return f\"http://{model_server_url}\"\n\n\ndef is_authentication_error(error: Exception) -> bool:\n    \"\"\"Check if an exception is related to authentication issues.\n\n    Args:\n        error: The exception to check\n\n    Returns:\n        bool: True if the error appears to be authentication-related\n    \"\"\"\n    error_str = str(error).lower()\n    return (\n        _AUTH_ERROR_401 in error_str\n        or _AUTH_ERROR_UNAUTHORIZED in error_str\n        or _AUTH_ERROR_INVALID_API_KEY in error_str\n        or _AUTH_ERROR_PERMISSION in error_str\n    )\n\n\ndef format_embedding_error(\n    error: Exception,\n    service_name: str,\n    model: str | None,\n    provider: EmbeddingProvider,\n    sanitized_api_key: str | None = None,\n    status_code: int | None = None,\n) -> str:\n    \"\"\"\n    Format a standardized error string for embedding errors.\n    \"\"\"\n    detail = f\"Status {status_code}\" if status_code else f\"{type(error)}\"\n\n    return (\n        f\"{'HTTP error' if status_code else 'Exception'} embedding text with {service_name} - {detail}: \"\n        f\"Model: {model} \"\n        f\"Provider: {provider} \"\n        f\"API Key: {sanitized_api_key} \"\n        f\"Exception: {error}\"\n    )\n\n\n# Custom exception for authentication errors\nclass AuthenticationError(Exception):\n    \"\"\"Raised when authentication fails with a provider.\"\"\"\n\n    def __init__(self, provider: str, message: str = \"API key is invalid or expired\"):\n        self.provider = provider\n        self.message = message\n        super().__init__(f\"{provider} authentication failed: {message}\")\n\n\nclass CloudEmbedding:\n    def __init__(\n        self,\n        api_key: str,\n        provider: EmbeddingProvider,\n        api_url: str | None = None,\n        api_version: str | None = None,\n        timeout: int = API_BASED_EMBEDDING_TIMEOUT,\n    ) -> None:\n        self.provider = provider\n        self.api_key = api_key\n        self.api_url = api_url\n        self.api_version = api_version\n        self.timeout = timeout\n        self.http_client = httpx.AsyncClient(timeout=timeout)\n        self._closed = False\n        self.sanitized_api_key = api_key[:4] + \"********\" + api_key[-4:]\n\n    async def _embed_openai(\n        self, texts: list[str], model: str | None, reduced_dimension: int | None\n    ) -> list[Embedding]:\n        if not model:\n            model = DEFAULT_OPENAI_MODEL\n\n        import openai\n\n        # Use the OpenAI specific timeout for this one\n        client = openai.AsyncOpenAI(\n            api_key=self.api_key, timeout=OPENAI_EMBEDDING_TIMEOUT\n        )\n\n        final_embeddings: list[Embedding] = []\n\n        for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN):\n            response = await client.embeddings.create(\n                input=text_batch,\n                model=model,\n                dimensions=reduced_dimension or openai.omit,\n            )\n            final_embeddings.extend(\n                [embedding.embedding for embedding in response.data]\n            )\n        return final_embeddings\n\n    async def _embed_cohere(\n        self, texts: list[str], model: str | None, embedding_type: str\n    ) -> list[Embedding]:\n        if not model:\n            model = DEFAULT_COHERE_MODEL\n\n        client = CohereAsyncClient(api_key=self.api_key)\n\n        final_embeddings: list[Embedding] = []\n        for text_batch in batch_list(texts, _COHERE_MAX_INPUT_LEN):\n            # Does not use the same tokenizer as the Onyx API server but it's approximately the same\n            # empirically it's only off by a very few tokens so it's not a big deal\n            response = await client.embed(\n                texts=text_batch,\n                model=model,\n                input_type=embedding_type,\n                truncate=\"END\",\n            )\n            final_embeddings.extend(cast(list[Embedding], response.embeddings))\n        return final_embeddings\n\n    async def _embed_voyage(\n        self, texts: list[str], model: str | None, embedding_type: str\n    ) -> list[Embedding]:\n        if not model:\n            model = DEFAULT_VOYAGE_MODEL\n\n        client = voyageai.AsyncClient(\n            api_key=self.api_key, timeout=API_BASED_EMBEDDING_TIMEOUT\n        )\n\n        response = await client.embed(\n            texts=texts,\n            model=model,\n            input_type=embedding_type,\n            truncation=True,\n        )\n        return response.embeddings\n\n    async def _embed_azure(\n        self, texts: list[str], model: str | None\n    ) -> list[Embedding]:\n        from litellm import aembedding\n\n        response = await aembedding(\n            model=model,\n            input=texts,\n            timeout=API_BASED_EMBEDDING_TIMEOUT,\n            api_key=self.api_key,\n            api_base=self.api_url,\n            api_version=self.api_version,\n        )\n        embeddings = [embedding[\"embedding\"] for embedding in response.data]\n        return embeddings\n\n    async def _embed_vertex(\n        self,\n        texts: list[str],\n        model: str | None,\n        embedding_type: str,\n        reduced_dimension: int | None,\n    ) -> list[Embedding]:\n        from google import genai\n        from google.genai import types as genai_types\n\n        if not model:\n            model = DEFAULT_VERTEX_MODEL\n\n        service_account_info = json.loads(self.api_key)\n        credentials = service_account.Credentials.from_service_account_info(\n            service_account_info,\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n        project_id = service_account_info[\"project_id\"]\n        location = (\n            service_account_info.get(\"location\")\n            or os.environ.get(\"GOOGLE_CLOUD_LOCATION\")\n            or \"us-central1\"\n        )\n\n        client = genai.Client(\n            vertexai=True,\n            project=project_id,\n            location=location,\n            credentials=credentials,\n        )\n\n        embed_config = genai_types.EmbedContentConfig(\n            task_type=embedding_type,\n            output_dimensionality=reduced_dimension,\n            auto_truncate=True,\n        )\n\n        async def _embed_batch(batch_texts: list[str]) -> list[Embedding]:\n            content_requests: list[Any] = [\n                genai_types.Content(parts=[genai_types.Part(text=text)])\n                for text in batch_texts\n            ]\n            response = await client.aio.models.embed_content(\n                model=model,\n                contents=content_requests,\n                config=embed_config,\n            )\n\n            if not response.embeddings:\n                raise RuntimeError(\"Received empty embeddings from Google GenAI.\")\n\n            embeddings: list[Embedding] = []\n            for idx, embedding in enumerate(response.embeddings):\n                if embedding.values is None:\n                    raise RuntimeError(\n                        f\"Missing embedding values for input at index {idx}.\"\n                    )\n                embeddings.append(embedding.values)\n            return embeddings\n\n        # Process VertexAI batches sequentially to avoid additional intra-task fanout.\n        # The higher-level thread pool already provides concurrency; running these\n        # requests in parallel here was causing excessive memory usage.\n        batches = [\n            texts[i : i + VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE]\n            for i in range(0, len(texts), VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE)\n        ]\n        all_embeddings: list[Embedding] = []\n\n        logger.debug(\n            f\"VertexAI embedding: processing {len(texts)} texts in {len(batches)} batches \"\n            f\"(batch_size={VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE})\"\n        )\n\n        try:\n            for batch_idx, batch in enumerate(batches):\n                batch_embeddings = await _embed_batch(batch)\n                all_embeddings.extend(batch_embeddings)\n\n                # Log progress for large batches to track memory usage patterns\n                if batch_idx % 10 == 0 and batch_idx > 0:\n                    logger.debug(\n                        f\"VertexAI embedding progress: batch {batch_idx}/{len(batches)}, total_embeddings={len(all_embeddings)}\"\n                    )\n\n            logger.debug(\n                f\"VertexAI embedding completed: {len(all_embeddings)} embeddings generated\"\n            )\n            return all_embeddings\n        finally:\n            # Ensure client is closed with a timeout to prevent hanging on stuck sessions\n            try:\n                await asyncio.wait_for(client.aio.aclose(), timeout=5.0)\n            except asyncio.TimeoutError:\n                logger.warning(\"Google GenAI client aclose() timed out after 5s\")\n            except Exception as e:\n                logger.warning(f\"Error closing Google GenAI client: {e}\")\n\n    async def _embed_litellm_proxy(\n        self, texts: list[str], model_name: str | None\n    ) -> list[Embedding]:\n        if not model_name:\n            raise ValueError(\"Model name is required for LiteLLM proxy embedding.\")\n\n        if not self.api_url:\n            raise ValueError(\"API URL is required for LiteLLM proxy embedding.\")\n\n        headers = (\n            {} if not self.api_key else {\"Authorization\": f\"Bearer {self.api_key}\"}\n        )\n\n        response = await self.http_client.post(\n            self.api_url,\n            json={\n                \"model\": model_name,\n                \"input\": texts,\n            },\n            headers=headers,\n        )\n        response.raise_for_status()\n        result = response.json()\n        return [embedding[\"embedding\"] for embedding in result[\"data\"]]\n\n    @retry(tries=_RETRY_TRIES, delay=_RETRY_DELAY)\n    async def embed(\n        self,\n        *,\n        texts: list[str],\n        text_type: EmbedTextType,\n        model_name: str | None = None,\n        deployment_name: str | None = None,\n        reduced_dimension: int | None = None,\n    ) -> list[Embedding]:\n        import openai\n\n        try:\n            if self.provider == EmbeddingProvider.OPENAI:\n                return await self._embed_openai(texts, model_name, reduced_dimension)\n            elif self.provider == EmbeddingProvider.AZURE:\n                return await self._embed_azure(texts, f\"azure/{deployment_name}\")\n            elif self.provider == EmbeddingProvider.LITELLM:\n                return await self._embed_litellm_proxy(texts, model_name)\n\n            embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type)\n            if self.provider == EmbeddingProvider.COHERE:\n                return await self._embed_cohere(texts, model_name, embedding_type)\n            elif self.provider == EmbeddingProvider.VOYAGE:\n                return await self._embed_voyage(texts, model_name, embedding_type)\n            elif self.provider == EmbeddingProvider.GOOGLE:\n                return await self._embed_vertex(\n                    texts, model_name, embedding_type, reduced_dimension\n                )\n            else:\n                raise ValueError(f\"Unsupported provider: {self.provider}\")\n        except openai.AuthenticationError:\n            raise AuthenticationError(provider=\"OpenAI\")\n        except httpx.HTTPStatusError as e:\n            if e.response.status_code == 401:\n                raise AuthenticationError(provider=str(self.provider))\n\n            error_string = format_embedding_error(\n                e,\n                str(self.provider),\n                model_name or deployment_name,\n                self.provider,\n                sanitized_api_key=self.sanitized_api_key,\n                status_code=e.response.status_code,\n            )\n            logger.error(error_string)\n            logger.debug(f\"Exception texts: {texts}\")\n\n            raise RuntimeError(error_string)\n        except Exception as e:\n            if is_authentication_error(e):\n                raise AuthenticationError(provider=str(self.provider))\n\n            error_string = format_embedding_error(\n                e,\n                str(self.provider),\n                model_name or deployment_name,\n                self.provider,\n                sanitized_api_key=self.sanitized_api_key,\n            )\n            logger.error(error_string)\n            logger.debug(f\"Exception texts: {texts}\")\n\n            raise RuntimeError(error_string)\n\n    @staticmethod\n    def create(\n        api_key: str,\n        provider: EmbeddingProvider,\n        api_url: str | None = None,\n        api_version: str | None = None,\n    ) -> \"CloudEmbedding\":\n        logger.debug(f\"Creating Embedding instance for provider: {provider}\")\n        return CloudEmbedding(api_key, provider, api_url, api_version)\n\n    async def aclose(self) -> None:\n        \"\"\"Explicitly close the client.\"\"\"\n        if not self._closed:\n            await self.http_client.aclose()\n            self._closed = True\n\n    async def __aenter__(self) -> \"CloudEmbedding\":\n        return self\n\n    async def __aexit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        await self.aclose()\n\n    def __del__(self) -> None:\n        \"\"\"Finalizer to warn about unclosed clients.\"\"\"\n        if not self._closed:\n            logger.warning(\n                \"CloudEmbedding was not properly closed. Use 'async with' or call aclose()\"\n            )\n\n\n# API-based reranking functions (moved from model server)\nasync def cohere_rerank_api(\n    query: str, docs: list[str], model_name: str, api_key: str\n) -> list[float]:\n    cohere_client = CohereAsyncClient(api_key=api_key)\n    try:\n        response = await cohere_client.rerank(\n            query=query, documents=docs, model=model_name\n        )\n    except ApiError as err:\n        if err.status_code == 402:\n            logger.warning(\n                \"Cohere rerank request rejected due to billing cap. Falling back to retrieval ordering until billing resets.\"\n            )\n            raise CohereBillingLimitError(\n                \"Cohere billing limit reached for reranking\"\n            ) from err\n        raise\n    results = response.results\n    sorted_results = sorted(results, key=lambda item: item.index)\n    return [result.relevance_score for result in sorted_results]\n\n\nasync def cohere_rerank_aws(\n    query: str,\n    docs: list[str],\n    model_name: str,\n    region_name: str,\n    aws_access_key_id: str,\n    aws_secret_access_key: str,\n) -> list[float]:\n    session = aioboto3.Session(\n        aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key\n    )\n    async with session.client(\n        \"bedrock-runtime\", region_name=region_name\n    ) as bedrock_client:\n        body = json.dumps(\n            {\n                \"query\": query,\n                \"documents\": docs,\n                \"api_version\": 2,\n            }\n        )\n        # Invoke the Bedrock model asynchronously\n        response = await bedrock_client.invoke_model(\n            modelId=model_name,\n            accept=\"application/json\",\n            contentType=\"application/json\",\n            body=body,\n        )\n\n        # Read the response asynchronously\n        response_body = json.loads(await response[\"body\"].read())\n\n        # Extract and sort the results\n        results = response_body.get(\"results\", [])\n        sorted_results = sorted(results, key=lambda item: item[\"index\"])\n\n        return [result[\"relevance_score\"] for result in sorted_results]\n\n\nasync def litellm_rerank(\n    query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None\n) -> list[float]:\n    headers = {} if not api_key else {\"Authorization\": f\"Bearer {api_key}\"}\n    async with httpx.AsyncClient() as client:\n        response = await client.post(\n            api_url,\n            json={\n                \"model\": model_name,\n                \"query\": query,\n                \"documents\": docs,\n            },\n            headers=headers,\n        )\n        response.raise_for_status()\n        result = response.json()\n        return [\n            item[\"relevance_score\"]\n            for item in sorted(result[\"results\"], key=lambda x: x[\"index\"])\n        ]\n\n\nclass EmbeddingModel:\n    def __init__(\n        self,\n        server_host: str,  # Changes depending on indexing or inference\n        server_port: int,\n        model_name: str | None,\n        normalize: bool,\n        query_prefix: str | None,\n        passage_prefix: str | None,\n        api_key: str | None,\n        api_url: str | None,\n        provider_type: EmbeddingProvider | None,\n        retrim_content: bool = False,\n        callback: IndexingHeartbeatInterface | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n        reduced_dimension: int | None = None,\n    ) -> None:\n        self.api_key = api_key\n        self.provider_type = provider_type\n        self.query_prefix = query_prefix\n        self.passage_prefix = passage_prefix\n        self.normalize = normalize\n        self.model_name = model_name\n        self.retrim_content = retrim_content\n        self.api_url = api_url\n        self.api_version = api_version\n        self.deployment_name = deployment_name\n        self.reduced_dimension = reduced_dimension\n        self.tokenizer = get_tokenizer(\n            model_name=model_name, provider_type=provider_type\n        )\n        self.callback = callback\n\n        # Only build model server endpoint for local models\n        if self.provider_type is None:\n            model_server_url = build_model_server_url(server_host, server_port)\n            self.embed_server_endpoint: str | None = (\n                f\"{model_server_url}/encoder/bi-encoder-embed\"\n            )\n        else:\n            # API providers don't need model server endpoint\n            self.embed_server_endpoint = None\n\n    async def _make_direct_api_call(\n        self,\n        embed_request: EmbedRequest,\n        tenant_id: str | None = None,  # noqa: ARG002\n        request_id: str | None = None,  # noqa: ARG002\n    ) -> EmbedResponse:\n        \"\"\"Make direct API call to cloud provider, bypassing model server.\"\"\"\n        if self.provider_type is None:\n            raise ValueError(\"Provider type is required for direct API calls\")\n\n        if self.api_key is None:\n            logger.error(\"API key not provided for cloud model\")\n            raise RuntimeError(\"API key not provided for cloud model\")\n\n        # Check for prefix usage with cloud models\n        if embed_request.manual_query_prefix or embed_request.manual_passage_prefix:\n            logger.warning(\"Prefix provided for cloud model, which is not supported\")\n            raise ValueError(\n                \"Prefix string is not valid for cloud models. Cloud models take an explicit text type instead.\"\n            )\n\n        if not all(embed_request.texts):\n            logger.error(\"Empty strings provided for embedding\")\n            raise ValueError(\"Empty strings are not allowed for embedding.\")\n\n        if not embed_request.texts:\n            logger.error(\"No texts provided for embedding\")\n            raise ValueError(\"No texts provided for embedding.\")\n\n        start_time = time.monotonic()\n        total_chars = sum(len(text) for text in embed_request.texts)\n\n        logger.info(\n            f\"Embedding {len(embed_request.texts)} texts with {total_chars} total characters with provider: {self.provider_type}\"\n        )\n\n        async with CloudEmbedding(\n            api_key=self.api_key,\n            provider=self.provider_type,\n            api_url=self.api_url,\n            api_version=self.api_version,\n        ) as cloud_model:\n            embeddings = await cloud_model.embed(\n                texts=embed_request.texts,\n                model_name=embed_request.model_name,\n                deployment_name=embed_request.deployment_name,\n                text_type=embed_request.text_type,\n                reduced_dimension=embed_request.reduced_dimension,\n            )\n\n        if any(embedding is None for embedding in embeddings):\n            error_message = \"Embeddings contain None values\\n\"\n            error_message += \"Corresponding texts:\\n\"\n            error_message += \"\\n\".join(embed_request.texts)\n            logger.error(error_message)\n            raise ValueError(error_message)\n\n        elapsed = time.monotonic() - start_time\n        logger.info(\n            f\"event=embedding_provider \"\n            f\"texts={len(embed_request.texts)} \"\n            f\"chars={total_chars} \"\n            f\"provider={self.provider_type} \"\n            f\"elapsed={elapsed:.2f}\"\n        )\n\n        return EmbedResponse(embeddings=embeddings)\n\n    def _make_model_server_request(\n        self,\n        embed_request: EmbedRequest,\n        tenant_id: str | None = None,\n        request_id: str | None = None,\n    ) -> EmbedResponse:\n        if self.embed_server_endpoint is None:\n            raise ValueError(\"Model server endpoint is not configured for local models\")\n\n        # Store the endpoint in a local variable to help mypy understand it's not None\n        endpoint = self.embed_server_endpoint\n\n        def _make_request() -> Response:\n            headers = {}\n            if tenant_id:\n                headers[\"X-Onyx-Tenant-ID\"] = tenant_id\n\n            if request_id:\n                headers[\"X-Onyx-Request-ID\"] = request_id\n\n            response = requests.post(\n                endpoint,\n                headers=headers,\n                json=embed_request.model_dump(),\n            )\n            # signify that this is a rate limit error\n            if response.status_code == 429:\n                raise ModelServerRateLimitError(response.text)\n\n            response.raise_for_status()\n            return response\n\n        final_make_request_func = _make_request\n\n        # if the text type is a passage, add some default\n        # retries + handling for rate limiting\n        if embed_request.text_type == EmbedTextType.PASSAGE:\n            final_make_request_func = retry(\n                tries=3,\n                delay=5,\n                exceptions=(RequestException, ValueError, JSONDecodeError),\n            )(final_make_request_func)\n            # use 10 second delay as per Azure suggestion\n            final_make_request_func = retry(\n                tries=10, delay=10, exceptions=ModelServerRateLimitError\n            )(final_make_request_func)\n\n        response: Response | None = None\n\n        try:\n            response = final_make_request_func()\n            return EmbedResponse(**response.json())\n        except requests.HTTPError as e:\n            if not response:\n                raise HTTPError(\"HTTP error occurred - response is None.\") from e\n\n            try:\n                error_detail = response.json().get(\"detail\", str(e))\n            except Exception:\n                error_detail = response.text\n            raise HTTPError(f\"HTTP error occurred: {error_detail}\") from e\n        except requests.RequestException as e:\n            raise HTTPError(f\"Request failed: {str(e)}\") from e\n\n    def _batch_encode_texts(\n        self,\n        texts: list[str],\n        text_type: EmbedTextType,\n        batch_size: int,\n        max_seq_length: int,\n        num_threads: int = INDEXING_EMBEDDING_MODEL_NUM_THREADS,\n        tenant_id: str | None = None,\n        request_id: str | None = None,\n    ) -> list[Embedding]:\n        text_batches = batch_list(texts, batch_size)\n\n        logger.debug(f\"Encoding {len(texts)} texts in {len(text_batches)} batches\")\n\n        embeddings: list[Embedding] = []\n\n        @_cleanup_thread_local\n        def process_batch(\n            batch_idx: int,\n            batch_len: int,\n            text_batch: list[str],\n            tenant_id: str | None = None,\n            request_id: str | None = None,\n        ) -> tuple[int, list[Embedding]]:\n            if self.callback:\n                if self.callback.should_stop():\n                    raise ConnectorStopSignal(\n                        \"_batch_encode_texts detected stop signal\"\n                    )\n\n            embed_request = EmbedRequest(\n                model_name=self.model_name,\n                texts=text_batch,\n                api_version=self.api_version,\n                deployment_name=self.deployment_name,\n                max_context_length=max_seq_length,\n                normalize_embeddings=self.normalize,\n                api_key=self.api_key,\n                provider_type=self.provider_type,\n                text_type=text_type,\n                manual_query_prefix=self.query_prefix,\n                manual_passage_prefix=self.passage_prefix,\n                api_url=self.api_url,\n                reduced_dimension=self.reduced_dimension,\n            )\n\n            start_time = time.monotonic()\n\n            # Route between direct API calls and model server calls\n            if self.provider_type is not None:\n                # For API providers, make direct API call\n                # Use thread-local event loop to prevent memory leaks from creating\n                # thousands of event loops during batch processing\n                loop = _get_or_create_event_loop()\n                response = loop.run_until_complete(\n                    self._make_direct_api_call(\n                        embed_request, tenant_id=tenant_id, request_id=request_id\n                    )\n                )\n            else:\n                # For local models, use model server\n                response = self._make_model_server_request(\n                    embed_request, tenant_id=tenant_id, request_id=request_id\n                )\n\n            end_time = time.monotonic()\n\n            processing_time = end_time - start_time\n            logger.debug(\n                f\"EmbeddingModel.process_batch: Batch {batch_idx}/{batch_len} processing time: {processing_time:.2f} seconds\"\n            )\n\n            return batch_idx, response.embeddings\n\n        # only multi thread if:\n        #   1. num_threads is greater than 1\n        #   2. we are using an API-based embedding model (provider_type is not None)\n        #   3. there are more than 1 batch (no point in threading if only 1)\n        if num_threads >= 1 and self.provider_type and len(text_batches) > 1:\n            with ThreadPoolExecutor(max_workers=num_threads) as executor:\n                future_to_batch = {\n                    executor.submit(\n                        partial(\n                            process_batch,\n                            idx,\n                            len(text_batches),\n                            batch,\n                            tenant_id=tenant_id,\n                            request_id=request_id,\n                        )\n                    ): idx\n                    for idx, batch in enumerate(text_batches, start=1)\n                }\n\n                # Collect results in order\n                batch_results: list[tuple[int, list[Embedding]]] = []\n                for future in as_completed(future_to_batch):\n                    try:\n                        result = future.result()\n                        batch_results.append(result)\n                    except Exception as e:\n                        logger.exception(\"Embedding model failed to process batch\")\n                        raise e\n\n                # Sort by batch index and extend embeddings\n                batch_results.sort(key=lambda x: x[0])\n                for _, batch_embeddings in batch_results:\n                    embeddings.extend(batch_embeddings)\n        else:\n            # Original sequential processing\n            for idx, text_batch in enumerate(text_batches, start=1):\n                _, batch_embeddings = process_batch(\n                    idx,\n                    len(text_batches),\n                    text_batch,\n                    tenant_id=tenant_id,\n                    request_id=request_id,\n                )\n                embeddings.extend(batch_embeddings)\n\n        return embeddings\n\n    @log_function_time(print_only=True, debug_only=True)\n    def encode(\n        self,\n        texts: list[str],\n        text_type: EmbedTextType,\n        large_chunks_present: bool = False,\n        local_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS,\n        api_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,\n        max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE,\n        tenant_id: str | None = None,\n        request_id: str | None = None,\n    ) -> list[Embedding]:\n        if not texts or not all(texts):\n            raise ValueError(f\"Empty or missing text for embedding: {texts}\")\n\n        if large_chunks_present:\n            max_seq_length *= LARGE_CHUNK_RATIO\n\n        if self.retrim_content:\n            # This is applied during indexing as a catchall for overly long titles (or other uncapped fields)\n            # Note that this uses just the default tokenizer which may also lead to very minor miscountings\n            # However this slight miscounting is very unlikely to have any material impact.\n            texts = [\n                tokenizer_trim_content(\n                    content=text,\n                    desired_length=max_seq_length,\n                    tokenizer=self.tokenizer,\n                )\n                for text in texts\n            ]\n\n        # Remove invalid Unicode characters (e.g., unpaired surrogates from malformed documents)\n        # that would cause UTF-8 encoding errors when sent to embedding providers\n        texts = [remove_invalid_unicode_chars(text) or \"<>\" for text in texts]\n\n        batch_size = (\n            api_embedding_batch_size\n            if self.provider_type\n            else local_embedding_batch_size\n        )\n\n        return self._batch_encode_texts(\n            texts=texts,\n            text_type=text_type,\n            batch_size=batch_size,\n            max_seq_length=max_seq_length,\n            tenant_id=tenant_id,\n            request_id=request_id,\n        )\n\n    @classmethod\n    def from_db_model(\n        cls,\n        search_settings: SearchSettings,\n        server_host: str,  # Changes depending on indexing or inference\n        server_port: int,\n        retrim_content: bool = False,\n    ) -> \"EmbeddingModel\":\n        return cls(\n            server_host=server_host,\n            server_port=server_port,\n            model_name=search_settings.model_name,\n            normalize=search_settings.normalize,\n            query_prefix=search_settings.query_prefix,\n            passage_prefix=search_settings.passage_prefix,\n            api_key=search_settings.api_key,\n            provider_type=search_settings.provider_type,\n            api_url=search_settings.api_url,\n            retrim_content=retrim_content,\n            api_version=search_settings.api_version,\n            deployment_name=search_settings.deployment_name,\n            reduced_dimension=search_settings.reduced_dimension,\n        )\n\n\nclass RerankingModel:\n    def __init__(\n        self,\n        model_name: str,\n        provider_type: RerankerProvider | None,\n        api_key: str | None,\n        api_url: str | None,\n        model_server_host: str = MODEL_SERVER_HOST,\n        model_server_port: int = MODEL_SERVER_PORT,\n    ) -> None:\n        self.model_name = model_name\n        self.provider_type = provider_type\n        self.api_key = api_key\n        self.api_url = api_url\n\n        # Only build model server endpoint for local models\n        if self.provider_type is None:\n            model_server_url = build_model_server_url(\n                model_server_host, model_server_port\n            )\n            self.rerank_server_endpoint: str | None = (\n                model_server_url + \"/encoder/cross-encoder-scores\"\n            )\n        else:\n            # API providers don't need model server endpoint\n            self.rerank_server_endpoint = None\n\n    async def _make_direct_rerank_call(\n        self, query: str, passages: list[str]\n    ) -> list[float]:\n        \"\"\"Make direct API call to cloud provider, bypassing model server.\"\"\"\n        if self.provider_type is None:\n            raise ValueError(\"Provider type is required for direct API calls\")\n\n        if self.api_key is None:\n            raise ValueError(\"API key is required for cloud provider\")\n\n        if self.provider_type == RerankerProvider.COHERE:\n            return await cohere_rerank_api(\n                query, passages, self.model_name, self.api_key\n            )\n        elif self.provider_type == RerankerProvider.BEDROCK:\n            aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key(\n                self.api_key\n            )\n            return await cohere_rerank_aws(\n                query,\n                passages,\n                self.model_name,\n                aws_region,\n                aws_access_key_id,\n                aws_secret_access_key,\n            )\n        elif self.provider_type == RerankerProvider.LITELLM:\n            if self.api_url is None:\n                raise ValueError(\"API URL is required for LiteLLM reranking.\")\n            return await litellm_rerank(\n                query, passages, self.api_url, self.model_name, self.api_key\n            )\n        else:\n            raise ValueError(f\"Unsupported reranking provider: {self.provider_type}\")\n\n    def predict(self, query: str, passages: list[str]) -> list[float]:\n        # Route between direct API calls and model server calls\n        if self.provider_type is not None:\n            # For API providers, make direct API call\n            loop = asyncio.new_event_loop()\n            try:\n                asyncio.set_event_loop(loop)\n                return loop.run_until_complete(\n                    self._make_direct_rerank_call(query, passages)\n                )\n            finally:\n                loop.close()\n        else:\n            # For local models, use model server\n            if self.rerank_server_endpoint is None:\n                raise ValueError(\n                    \"Rerank server endpoint is not configured for local models\"\n                )\n\n            rerank_request = RerankRequest(\n                query=query,\n                documents=passages,\n                model_name=self.model_name,\n                provider_type=self.provider_type,\n                api_key=self.api_key,\n                api_url=self.api_url,\n            )\n\n            response = requests.post(\n                self.rerank_server_endpoint, json=rerank_request.model_dump()\n            )\n            response.raise_for_status()\n\n            return RerankResponse(**response.json()).scores\n\n\nclass QueryAnalysisModel:\n    def __init__(\n        self,\n        model_server_host: str = MODEL_SERVER_HOST,\n        model_server_port: int = MODEL_SERVER_PORT,\n        # Lean heavily towards not throwing out keywords\n        keyword_percent_threshold: float = 0.1,\n        # Lean towards semantic which is the default\n        semantic_percent_threshold: float = 0.4,\n    ) -> None:\n        model_server_url = build_model_server_url(model_server_host, model_server_port)\n        self.intent_server_endpoint = model_server_url + \"/custom/query-analysis\"\n        self.keyword_percent_threshold = keyword_percent_threshold\n        self.semantic_percent_threshold = semantic_percent_threshold\n\n    def predict(\n        self,\n        query: str,\n    ) -> tuple[bool, list[str]]:\n        intent_request = IntentRequest(\n            query=query,\n            keyword_percent_threshold=self.keyword_percent_threshold,\n            semantic_percent_threshold=self.semantic_percent_threshold,\n        )\n\n        response = requests.post(\n            self.intent_server_endpoint, json=intent_request.model_dump()\n        )\n        response.raise_for_status()\n\n        response_model = IntentResponse(**response.json())\n\n        return response_model.is_keyword, response_model.keywords\n\n\ndef warm_up_retry(\n    func: Callable[..., Any],\n    tries: int = 20,\n    delay: int = 5,\n    *args: Any,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> Callable[..., Any]:\n    @wraps(func)\n    def wrapper(*args: Any, **kwargs: Any) -> Any:\n        exceptions = []\n        for attempt in range(tries):\n            try:\n                return func(*args, **kwargs)\n            except Exception as e:\n                exceptions.append(e)\n                logger.info(\n                    f\"Attempt {attempt + 1}/{tries} failed; retrying in {delay} seconds...\"\n                )\n                time.sleep(delay)\n        raise Exception(f\"All retries failed: {exceptions}\")\n\n    return wrapper\n\n\ndef warm_up_bi_encoder(\n    embedding_model: EmbeddingModel,\n    non_blocking: bool = False,\n) -> None:\n    if SKIP_WARM_UP:\n        return\n\n    warm_up_str = \" \".join(WARM_UP_STRINGS)\n\n    logger.debug(f\"Warming up encoder model: {embedding_model.model_name}\")\n    get_tokenizer(\n        model_name=embedding_model.model_name,\n        provider_type=embedding_model.provider_type,\n    ).encode(warm_up_str)\n\n    def _warm_up() -> None:\n        try:\n            embedding_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY)\n            logger.debug(\n                f\"Warm-up complete for encoder model: {embedding_model.model_name}\"\n            )\n        except Exception as e:\n            logger.warning(\n                f\"Warm-up request failed for encoder model {embedding_model.model_name}: {e}\"\n            )\n\n    if non_blocking:\n        threading.Thread(target=_warm_up, daemon=True).start()\n        logger.debug(\n            f\"Started non-blocking warm-up for encoder model: {embedding_model.model_name}\"\n        )\n    else:\n        retry_encode = warm_up_retry(embedding_model.encode)\n        retry_encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY)\n\n\n# No longer used\ndef warm_up_cross_encoder(\n    rerank_model_name: str,\n    non_blocking: bool = False,\n) -> None:\n    if SKIP_WARM_UP:\n        return\n\n    logger.debug(f\"Warming up reranking model: {rerank_model_name}\")\n\n    reranking_model = RerankingModel(\n        model_name=rerank_model_name,\n        provider_type=None,\n        api_url=None,\n        api_key=None,\n    )\n\n    def _warm_up() -> None:\n        try:\n            reranking_model.predict(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:])\n            logger.debug(f\"Warm-up complete for reranking model: {rerank_model_name}\")\n        except Exception as e:\n            logger.warning(\n                f\"Warm-up request failed for reranking model {rerank_model_name}: {e}\"\n            )\n\n    if non_blocking:\n        threading.Thread(target=_warm_up, daemon=True).start()\n        logger.debug(\n            f\"Started non-blocking warm-up for reranking model: {rerank_model_name}\"\n        )\n    else:\n        retry_rerank = warm_up_retry(reranking_model.predict)\n        retry_rerank(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:])\n"
  },
  {
    "path": "backend/onyx/natural_language_processing/utils.py",
    "content": "import os\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom copy import copy\n\nfrom tokenizers import Encoding  # type: ignore[import-untyped]\nfrom tokenizers import Tokenizer\n\nfrom onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\nfrom shared_configs.enums import EmbeddingProvider\n\nTRIM_SEP_PAT = \"\\n... {n} tokens removed...\\n\"\n\nlogger = setup_logger()\nos.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\nos.environ[\"HF_HUB_DISABLE_TELEMETRY\"] = \"1\"\n\n\nclass BaseTokenizer(ABC):\n    @abstractmethod\n    def encode(self, string: str) -> list[int]:\n        pass\n\n    @abstractmethod\n    def tokenize(self, string: str) -> list[str]:\n        pass\n\n    @abstractmethod\n    def decode(self, tokens: list[int]) -> str:\n        pass\n\n\nclass TiktokenTokenizer(BaseTokenizer):\n    _instances: dict[str, \"TiktokenTokenizer\"] = {}\n\n    def __new__(cls, model_name: str) -> \"TiktokenTokenizer\":\n        if model_name not in cls._instances:\n            cls._instances[model_name] = super(TiktokenTokenizer, cls).__new__(cls)\n        return cls._instances[model_name]\n\n    def __init__(self, model_name: str):\n        if not hasattr(self, \"encoder\"):\n            import tiktoken\n\n            self.encoder = tiktoken.encoding_for_model(model_name)\n\n    def encode(self, string: str) -> list[int]:\n        # this ignores special tokens that the model is trained on, see encode_ordinary for details\n        return self.encoder.encode_ordinary(string)\n\n    def tokenize(self, string: str) -> list[str]:\n        encoded = self.encode(string)\n        decoded = [self.encoder.decode([token]) for token in encoded]\n\n        if len(decoded) != len(encoded):\n            logger.warning(\n                f\"OpenAI tokenized length {len(decoded)} does not match encoded length {len(encoded)} for string: {string}\"\n            )\n\n        return decoded\n\n    def decode(self, tokens: list[int]) -> str:\n        return self.encoder.decode(tokens)\n\n\nclass HuggingFaceTokenizer(BaseTokenizer):\n    def __init__(self, model_name: str):\n        self.encoder: Tokenizer = Tokenizer.from_pretrained(model_name)\n\n    def _safer_encode(self, string: str) -> Encoding:\n        \"\"\"\n        Encode a string using the HuggingFaceTokenizer, but if it fails,\n        encode the string as ASCII and decode it back to a string. This helps\n        in cases where the string has weird characters like \\udeb4.\n        \"\"\"\n        try:\n            return self.encoder.encode(string, add_special_tokens=False)\n        except Exception:\n            return self.encoder.encode(\n                string.encode(\"ascii\", \"ignore\").decode(), add_special_tokens=False\n            )\n\n    def encode(self, string: str) -> list[int]:\n        # this returns no special tokens\n        return self._safer_encode(string).ids\n\n    def tokenize(self, string: str) -> list[str]:\n        return self._safer_encode(string).tokens\n\n    def decode(self, tokens: list[int]) -> str:\n        return self.encoder.decode(tokens)\n\n\n_TOKENIZER_CACHE: dict[tuple[EmbeddingProvider | None, str | None], BaseTokenizer] = {}\n\n\ndef _check_tokenizer_cache(\n    model_provider: EmbeddingProvider | None, model_name: str | None\n) -> BaseTokenizer:\n    global _TOKENIZER_CACHE\n    id_tuple = (model_provider, model_name)\n\n    if id_tuple not in _TOKENIZER_CACHE:\n        tokenizer = None\n\n        if model_name:\n            tokenizer = _try_initialize_tokenizer(model_name, model_provider)\n\n        if not tokenizer:\n            logger.info(\n                f\"Falling back to default embedding model tokenizer: {DOCUMENT_ENCODER_MODEL}\"\n            )\n            tokenizer = _get_default_tokenizer()\n\n        _TOKENIZER_CACHE[id_tuple] = tokenizer\n\n    return _TOKENIZER_CACHE[id_tuple]\n\n\ndef _try_initialize_tokenizer(\n    model_name: str, model_provider: EmbeddingProvider | None\n) -> BaseTokenizer | None:\n    tokenizer: BaseTokenizer | None = None\n\n    if model_provider is not None:\n        # Try using TiktokenTokenizer first if model_provider exists\n        try:\n            tokenizer = TiktokenTokenizer(model_name)\n            logger.info(f\"Initialized TiktokenTokenizer for: {model_name}\")\n            return tokenizer\n        except Exception as tiktoken_error:\n            logger.debug(\n                f\"TiktokenTokenizer not available for model {model_name}: {tiktoken_error}\"\n            )\n    else:\n        # If no provider specified, try HuggingFaceTokenizer\n        try:\n            tokenizer = HuggingFaceTokenizer(model_name)\n            logger.info(f\"Initialized HuggingFaceTokenizer for: {model_name}\")\n            return tokenizer\n        except Exception as hf_error:\n            logger.warning(\n                f\"Failed to initialize HuggingFaceTokenizer for {model_name}: {hf_error}\"\n            )\n\n    # If both initializations fail, return None\n    return None\n\n\n_DEFAULT_TOKENIZER: BaseTokenizer | None = None\n\n\ndef _get_default_tokenizer() -> BaseTokenizer:\n    \"\"\"Lazy-load the default tokenizer to avoid loading it at module import time.\"\"\"\n    global _DEFAULT_TOKENIZER\n    if _DEFAULT_TOKENIZER is None:\n        _DEFAULT_TOKENIZER = HuggingFaceTokenizer(DOCUMENT_ENCODER_MODEL)\n    return _DEFAULT_TOKENIZER\n\n\ndef get_tokenizer(\n    model_name: str | None, provider_type: EmbeddingProvider | str | None\n) -> BaseTokenizer:\n    if isinstance(provider_type, str):\n        try:\n            provider_type = EmbeddingProvider(provider_type)\n        except ValueError:\n            logger.debug(\n                f\"Invalid provider_type '{provider_type}'. Falling back to default tokenizer.\"\n            )\n            return _get_default_tokenizer()\n    return _check_tokenizer_cache(provider_type, model_name)\n\n\n# Max characters per encode() call.\n_ENCODE_CHUNK_SIZE = 500_000\n\n\ndef count_tokens(\n    text: str,\n    tokenizer: BaseTokenizer,\n    token_limit: int | None = None,\n) -> int:\n    \"\"\"Count tokens, chunking the input to avoid tiktoken stack overflow.\n\n    If token_limit is provided and the text is large enough to require\n    multiple chunks (> 500k chars), stops early once the count exceeds it.\n    When early-exiting, the returned value exceeds token_limit but may be\n    less than the true full token count.\n    \"\"\"\n    if len(text) <= _ENCODE_CHUNK_SIZE:\n        return len(tokenizer.encode(text))\n    total = 0\n    for start in range(0, len(text), _ENCODE_CHUNK_SIZE):\n        total += len(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))\n        if token_limit is not None and total > token_limit:\n            return total  # Already over — skip remaining chunks\n    return total\n\n\ndef tokenizer_trim_content(\n    content: str, desired_length: int, tokenizer: BaseTokenizer\n) -> str:\n    tokens = tokenizer.encode(content)\n    if len(tokens) <= desired_length:\n        return content\n\n    return tokenizer.decode(tokens[:desired_length])\n\n\ndef tokenizer_trim_middle(\n    tokens: list[int], desired_length: int, tokenizer: BaseTokenizer\n) -> str:\n    if len(tokens) <= desired_length:\n        return tokenizer.decode(tokens)\n    sep_str = TRIM_SEP_PAT.format(n=len(tokens) - desired_length)\n    sep_tokens = tokenizer.encode(sep_str)\n    slice_size = (desired_length - len(sep_tokens)) // 2\n    assert slice_size > 0, \"Slice size is not positive, desired length is too short\"\n    return (\n        tokenizer.decode(tokens[:slice_size])\n        + sep_str\n        + tokenizer.decode(tokens[-slice_size:])\n    )\n\n\ndef tokenizer_trim_chunks(\n    chunks: list[InferenceChunk],\n    tokenizer: BaseTokenizer,\n    max_chunk_toks: int = DOC_EMBEDDING_CONTEXT_SIZE,\n) -> list[InferenceChunk]:\n    new_chunks = copy(chunks)\n    for ind, chunk in enumerate(new_chunks):\n        new_content = tokenizer_trim_content(chunk.content, max_chunk_toks, tokenizer)\n        if len(new_content) != len(chunk.content):\n            new_chunk = copy(chunk)\n            new_chunk.content = new_content\n            new_chunks[ind] = new_chunk\n    return new_chunks\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/DISCORD_MULTITENANT_README.md",
    "content": "# Discord Bot Multitenant Architecture\n\nThis document analyzes how the Discord cache manager and API client coordinate to handle multitenant API keys from a single Discord client.\n\n## Overview\n\nThe Discord bot uses a **single-client, multi-tenant** architecture where one `OnyxDiscordClient` instance serves multiple tenants (organizations) simultaneously. Tenant isolation is achieved through:\n\n- **Cache Manager**: Maps Discord guilds to tenants and stores per-tenant API keys\n- **API Client**: Stateless HTTP client that accepts dynamic API keys per request\n\n```\n┌─────────────────────────────────────────────────────────────────────┐\n│                      OnyxDiscordClient                              │\n│                                                                     │\n│  ┌─────────────────────────┐    ┌─────────────────────────────┐    │\n│  │   DiscordCacheManager   │    │      OnyxAPIClient          │    │\n│  │                         │    │                             │    │\n│  │  guild_id → tenant_id   │───▶│  send_chat_message(         │    │\n│  │  tenant_id → api_key    │    │    message,                 │    │\n│  │                         │    │    api_key=<per-tenant>,    │    │\n│  └─────────────────────────┘    │    persona_id=...           │    │\n│                                 │  )                          │    │\n│                                 └─────────────────────────────┘    │\n└─────────────────────────────────────────────────────────────────────┘\n```\n\n---\n\n## Component Details\n\n### 1. Cache Manager (`backend/onyx/onyxbot/discord/cache.py`)\n\nThe `DiscordCacheManager` maintains two critical in-memory mappings:\n\n```python\nclass DiscordCacheManager:\n    _guild_tenants: dict[int, str]   # guild_id → tenant_id\n    _api_keys: dict[str, str]        # tenant_id → api_key\n    _lock: asyncio.Lock              # Concurrency control\n```\n\n#### Key Responsibilities\n\n| Function | Purpose |\n|----------|---------|\n| `get_tenant(guild_id)` | O(1) lookup: guild → tenant |\n| `get_api_key(tenant_id)` | O(1) lookup: tenant → API key |\n| `refresh_all()` | Full cache rebuild from database |\n| `refresh_guild()` | Incremental update for single guild |\n\n#### API Key Provisioning Strategy\n\nAPI keys are **lazily provisioned** - only created when first needed:\n\n```python\nasync def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:\n    needs_key = tenant_id not in self._api_keys\n\n    with get_session_with_tenant(tenant_id) as db:\n        # Load guild configs\n        configs = get_discord_bot_configs(db)\n        guild_ids = [c.guild_id for c in configs if c.enabled]\n\n        # Only provision API key if not already cached\n        api_key = None\n        if needs_key:\n            api_key = get_or_create_discord_service_api_key(db, tenant_id)\n\n    return guild_ids, api_key\n```\n\nThis optimization avoids repeated database calls for API key generation.\n\n#### Concurrency Control\n\nAll write operations acquire an async lock to prevent race conditions:\n\n```python\nasync def refresh_all(self) -> None:\n    async with self._lock:\n        # Safe to modify _guild_tenants and _api_keys\n        for tenant_id in get_all_tenant_ids():\n            guild_ids, api_key = await self._load_tenant_data(tenant_id)\n            # Update mappings...\n```\n\nRead operations (`get_tenant`, `get_api_key`) are lock-free since Python dict lookups are atomic.\n\n---\n\n### 2. API Client (`backend/onyx/onyxbot/discord/api_client.py`)\n\nThe `OnyxAPIClient` is a **stateless async HTTP client** that communicates with Onyx API pods.\n\n#### Key Design: Per-Request API Key Injection\n\n```python\nclass OnyxAPIClient:\n    async def send_chat_message(\n        self,\n        message: str,\n        api_key: str,           # Injected per-request\n        persona_id: int | None,\n        ...\n    ) -> ChatFullResponse:\n        headers = {\n            \"Content-Type\": \"application/json\",\n            \"Authorization\": f\"Bearer {api_key}\",  # Tenant-specific auth\n        }\n        # Make request...\n```\n\nThe client accepts `api_key` as a parameter to each method, enabling **dynamic tenant selection at request time**. This design allows a single client instance to serve multiple tenants:\n\n```python\n# Same client, different tenants\nawait api_client.send_chat_message(msg, api_key=key_for_tenant_1, ...)\nawait api_client.send_chat_message(msg, api_key=key_for_tenant_2, ...)\n```\n\n---\n\n## Coordination Flow\n\n### Message Processing Pipeline\n\nWhen a Discord message arrives, the client coordinates cache and API client:\n\n```python\nasync def on_message(self, message: Message) -> None:\n    guild_id = message.guild.id\n\n    # Step 1: Cache lookup - guild → tenant\n    tenant_id = self.cache.get_tenant(guild_id)\n    if not tenant_id:\n        return  # Guild not registered\n\n    # Step 2: Cache lookup - tenant → API key\n    api_key = self.cache.get_api_key(tenant_id)\n    if not api_key:\n        logger.warning(f\"No API key for tenant {tenant_id}\")\n        return\n\n    # Step 3: API call with tenant-specific credentials\n    await process_chat_message(\n        message=message,\n        api_key=api_key,              # Tenant-specific\n        persona_id=persona_id,         # Tenant-specific\n        api_client=self.api_client,\n    )\n```\n\n### Startup Sequence\n\n```python\nasync def setup_hook(self) -> None:\n    # 1. Initialize API client (create aiohttp session)\n    await self.api_client.initialize()\n\n    # 2. Populate cache with all tenants\n    await self.cache.refresh_all()\n\n    # 3. Start background refresh task\n    self._cache_refresh_task = self.loop.create_task(\n        self._periodic_cache_refresh()  # Every 60 seconds\n    )\n```\n\n### Shutdown Sequence\n\n```python\nasync def close(self) -> None:\n    # 1. Cancel background refresh\n    if self._cache_refresh_task:\n        self._cache_refresh_task.cancel()\n\n    # 2. Close Discord connection\n    await super().close()\n\n    # 3. Close API client session\n    await self.api_client.close()\n\n    # 4. Clear cache\n    self.cache.clear()\n```\n\n---\n\n## Tenant Isolation Mechanisms\n\n### 1. Per-Tenant API Keys\n\nEach tenant has a dedicated service API key:\n\n```python\n# backend/onyx/db/discord_bot.py\ndef get_or_create_discord_service_api_key(db_session: Session, tenant_id: str) -> str:\n    existing = get_discord_service_api_key(db_session)\n    if existing:\n        return regenerate_key(existing)\n\n    # Create LIMITED role key (chat-only permissions)\n    return insert_api_key(\n        db_session=db_session,\n        api_key_args=APIKeyArgs(\n            name=DISCORD_SERVICE_API_KEY_NAME,\n            role=UserRole.LIMITED,  # Minimal permissions\n        ),\n        user_id=None,  # Service account (system-owned)\n    ).api_key\n```\n\n### 2. Database Context Variables\n\nThe cache uses context variables for proper tenant-scoped DB sessions:\n\n```python\ncontext_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\ntry:\n    with get_session_with_tenant(tenant_id) as db:\n        # All DB operations scoped to this tenant\n        ...\nfinally:\n    CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)\n```\n\n### 3. Enterprise Gating Support\n\nGated tenants are filtered during cache refresh:\n\n```python\ngated_tenants = fetch_ee_implementation_or_noop(\n    \"onyx.server.tenants.product_gating\",\n    \"get_gated_tenants\",\n    set(),\n)()\n\nfor tenant_id in get_all_tenant_ids():\n    if tenant_id in gated_tenants:\n        continue  # Skip gated tenants\n```\n\n---\n\n## Cache Refresh Strategy\n\n| Trigger | Method | Scope |\n|---------|--------|-------|\n| Startup | `refresh_all()` | All tenants |\n| Periodic (60s) | `refresh_all()` | All tenants |\n| Guild registration | `refresh_guild()` | Single tenant |\n\n### Error Handling\n\n- **Tenant-level errors**: Logged and skipped (doesn't stop other tenants)\n- **Missing API key**: Bot silently ignores messages from that guild\n- **Network errors**: Logged, cache continues with stale data until next refresh\n\n---\n\n## Key Design Insights\n\n1. **Single Client, Multiple Tenants**: One `OnyxAPIClient` and one `DiscordCacheManager` instance serves all tenants via dynamic API key injection.\n\n2. **Cache-First Architecture**: Guild lookups are O(1) in-memory; API keys are cached after first provisioning to avoid repeated DB calls.\n\n3. **Graceful Degradation**: If an API key is missing or stale, the bot simply doesn't respond (no crash or error propagation).\n\n4. **Thread Safety Without Blocking**: `asyncio.Lock` prevents race conditions while maintaining async concurrency for reads.\n\n5. **Lazy Provisioning**: API keys are only created when first needed, then cached for performance.\n\n6. **Stateless API Client**: The HTTP client holds no tenant state - all tenant context is injected per-request via the `api_key` parameter.\n\n---\n\n## File References\n\n| Component | Path |\n|-----------|------|\n| Cache Manager | `backend/onyx/onyxbot/discord/cache.py` |\n| API Client | `backend/onyx/onyxbot/discord/api_client.py` |\n| Discord Client | `backend/onyx/onyxbot/discord/client.py` |\n| API Key DB Operations | `backend/onyx/db/discord_bot.py` |\n| Cache Manager Tests | `backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py` |\n| API Client Tests | `backend/tests/unit/onyx/onyxbot/discord/test_api_client.py` |"
  },
  {
    "path": "backend/onyx/onyxbot/discord/api_client.py",
    "content": "\"\"\"Async HTTP client for communicating with Onyx API pods.\"\"\"\n\nimport aiohttp\n\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.onyxbot.discord.constants import API_REQUEST_TIMEOUT\nfrom onyx.onyxbot.discord.exceptions import APIConnectionError\nfrom onyx.onyxbot.discord.exceptions import APIResponseError\nfrom onyx.onyxbot.discord.exceptions import APITimeoutError\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import MessageOrigin\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import build_api_server_url_for_http_requests\n\nlogger = setup_logger()\n\n\nclass OnyxAPIClient:\n    \"\"\"Async HTTP client for sending chat requests to Onyx API pods.\n\n    This client manages an aiohttp session for making non-blocking HTTP\n    requests to the Onyx API server. It handles authentication with per-tenant\n    API keys and multi-tenant routing.\n\n    Usage:\n        client = OnyxAPIClient()\n        await client.initialize()\n        try:\n            response = await client.send_chat_message(\n                message=\"What is our deployment process?\",\n                tenant_id=\"tenant_123\",\n                api_key=\"dn_xxx...\",\n                persona_id=1,\n            )\n            print(response.answer)\n        finally:\n            await client.close()\n    \"\"\"\n\n    def __init__(\n        self,\n        timeout: int = API_REQUEST_TIMEOUT,\n    ) -> None:\n        \"\"\"Initialize the API client.\n\n        Args:\n            timeout: Request timeout in seconds.\n        \"\"\"\n        # Helm chart uses API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS to set the base URL\n        # TODO: Ideally, this override is only used when someone is launching an Onyx service independently\n        self._base_url = build_api_server_url_for_http_requests(\n            respect_env_override_if_set=True\n        ).rstrip(\"/\")\n        self._timeout = timeout\n        self._session: aiohttp.ClientSession | None = None\n\n    async def initialize(self) -> None:\n        \"\"\"Create the aiohttp session.\n\n        Must be called before making any requests. The session is created\n        with a total timeout and connection timeout.\n        \"\"\"\n        if self._session is not None:\n            logger.warning(\"API client session already initialized\")\n            return\n\n        timeout = aiohttp.ClientTimeout(\n            total=self._timeout,\n            connect=30,  # 30 seconds to establish connection\n        )\n        self._session = aiohttp.ClientSession(timeout=timeout)\n        logger.info(f\"API client initialized with base URL: {self._base_url}\")\n\n    async def close(self) -> None:\n        \"\"\"Close the aiohttp session.\n\n        Should be called when shutting down the bot to properly release\n        resources.\n        \"\"\"\n        if self._session is not None:\n            await self._session.close()\n            self._session = None\n            logger.info(\"API client session closed\")\n\n    @property\n    def is_initialized(self) -> bool:\n        \"\"\"Check if the session is initialized.\"\"\"\n        return self._session is not None\n\n    async def send_chat_message(\n        self,\n        message: str,\n        api_key: str,\n        persona_id: int | None = None,\n    ) -> ChatFullResponse:\n        \"\"\"Send a chat message to the Onyx API server and get a response.\n\n        This method sends a non-streaming chat request to the API server. The response\n        contains the complete answer with any citations and metadata.\n\n        Args:\n            message: The user's message to process.\n            api_key: The API key for authentication.\n            persona_id: Optional persona ID to use for the response.\n\n        Returns:\n            ChatFullResponse containing the answer, citations, and metadata.\n\n        Raises:\n            APIConnectionError: If unable to connect to the API.\n            APITimeoutError: If the request times out.\n            APIResponseError: If the API returns an error response.\n        \"\"\"\n        if self._session is None:\n            raise APIConnectionError(\n                \"API client not initialized. Call initialize() first.\"\n            )\n\n        url = f\"{self._base_url}/chat/send-chat-message\"\n\n        # Build request payload\n        request = SendMessageRequest(\n            message=message,\n            stream=False,\n            origin=MessageOrigin.DISCORDBOT,\n            chat_session_info=ChatSessionCreationRequest(\n                persona_id=persona_id if persona_id is not None else 0,\n            ),\n        )\n\n        # Build headers\n        headers = {\n            \"Content-Type\": \"application/json\",\n            \"Authorization\": f\"Bearer {api_key}\",\n        }\n\n        try:\n            async with self._session.post(\n                url,\n                json=request.model_dump(mode=\"json\"),\n                headers=headers,\n            ) as response:\n                if response.status == 401:\n                    raise APIResponseError(\n                        \"Authentication failed - invalid API key\",\n                        status_code=401,\n                    )\n                elif response.status == 403:\n                    raise APIResponseError(\n                        \"Access denied - insufficient permissions\",\n                        status_code=403,\n                    )\n                elif response.status == 404:\n                    raise APIResponseError(\n                        \"API endpoint not found\",\n                        status_code=404,\n                    )\n                elif response.status >= 500:\n                    error_text = await response.text()\n                    raise APIResponseError(\n                        f\"Server error: {error_text}\",\n                        status_code=response.status,\n                    )\n                elif response.status >= 400:\n                    error_text = await response.text()\n                    raise APIResponseError(\n                        f\"Request error: {error_text}\",\n                        status_code=response.status,\n                    )\n\n                # Parse successful response\n                data = await response.json()\n                response_obj = ChatFullResponse.model_validate(data)\n\n                if response_obj.error_msg:\n                    logger.warning(f\"Chat API returned error: {response_obj.error_msg}\")\n\n                return response_obj\n\n        except aiohttp.ClientConnectorError as e:\n            logger.error(f\"Failed to connect to API: {e}\")\n            raise APIConnectionError(\n                f\"Failed to connect to API at {self._base_url}: {e}\"\n            ) from e\n\n        except TimeoutError as e:\n            logger.error(f\"API request timed out after {self._timeout}s\")\n            raise APITimeoutError(\n                f\"Request timed out after {self._timeout} seconds\"\n            ) from e\n\n        except aiohttp.ClientError as e:\n            logger.error(f\"HTTP client error: {e}\")\n            raise APIConnectionError(f\"HTTP client error: {e}\") from e\n\n    async def health_check(self) -> bool:\n        \"\"\"Check if the API server is healthy.\n\n        Returns:\n            True if the API server is reachable and healthy, False otherwise.\n        \"\"\"\n        if self._session is None:\n            logger.warning(\"API client not initialized. Call initialize() first.\")\n            return False\n\n        try:\n            url = f\"{self._base_url}/health\"\n            async with self._session.get(\n                url, timeout=aiohttp.ClientTimeout(total=10)\n            ) as response:\n                return response.status == 200\n        except Exception as e:\n            logger.warning(f\"API server health check failed: {e}\")\n            return False\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/cache.py",
    "content": "\"\"\"Multi-tenant cache for Discord bot guild-tenant mappings and API keys.\"\"\"\n\nimport asyncio\n\nfrom onyx.db.discord_bot import get_guild_configs\nfrom onyx.db.discord_bot import get_or_create_discord_service_api_key\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.onyxbot.discord.exceptions import CacheError\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\nclass DiscordCacheManager:\n    \"\"\"Caches guild->tenant mappings and tenant->API key mappings.\n\n    Refreshed on startup, periodically (every 60s), and when guilds register.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._guild_tenants: dict[int, str] = {}  # guild_id -> tenant_id\n        self._api_keys: dict[str, str] = {}  # tenant_id -> api_key\n        self._lock = asyncio.Lock()\n        self._initialized = False\n\n    @property\n    def is_initialized(self) -> bool:\n        return self._initialized\n\n    async def refresh_all(self) -> None:\n        \"\"\"Full cache refresh from all tenants.\"\"\"\n        async with self._lock:\n            logger.info(\"Starting Discord cache refresh\")\n\n            new_guild_tenants: dict[int, str] = {}\n            new_api_keys: dict[str, str] = {}\n\n            try:\n                gated = fetch_ee_implementation_or_noop(\n                    \"onyx.server.tenants.product_gating\",\n                    \"get_gated_tenants\",\n                    set(),\n                )()\n\n                tenant_ids = await asyncio.to_thread(get_all_tenant_ids)\n                for tenant_id in tenant_ids:\n                    if tenant_id in gated:\n                        continue\n\n                    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n                    try:\n                        guild_ids, api_key = await self._load_tenant_data(tenant_id)\n                        if not guild_ids:\n                            logger.debug(f\"No guilds found for tenant {tenant_id}\")\n                            continue\n\n                        if not api_key:\n                            logger.warning(\n                                \"Discord service API key missing for tenant that has registered guilds. \"\n                                f\"{tenant_id} will not be handled in this refresh cycle.\"\n                            )\n                            continue\n\n                        for guild_id in guild_ids:\n                            new_guild_tenants[guild_id] = tenant_id\n\n                        new_api_keys[tenant_id] = api_key\n                    except Exception as e:\n                        logger.warning(f\"Failed to refresh tenant {tenant_id}: {e}\")\n                    finally:\n                        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)\n\n                self._guild_tenants = new_guild_tenants\n                self._api_keys = new_api_keys\n                self._initialized = True\n\n                logger.info(\n                    f\"Cache refresh complete: {len(new_guild_tenants)} guilds, {len(new_api_keys)} tenants\"\n                )\n\n            except Exception as e:\n                logger.error(f\"Cache refresh failed: {e}\")\n                raise CacheError(f\"Failed to refresh cache: {e}\") from e\n\n    async def refresh_guild(self, guild_id: int, tenant_id: str) -> None:\n        \"\"\"Add a single guild to cache after registration.\"\"\"\n        async with self._lock:\n            logger.info(f\"Refreshing cache for guild {guild_id} (tenant: {tenant_id})\")\n\n            guild_ids, api_key = await self._load_tenant_data(tenant_id)\n\n            if guild_id in guild_ids:\n                self._guild_tenants[guild_id] = tenant_id\n                if api_key:\n                    self._api_keys[tenant_id] = api_key\n                logger.info(f\"Cache updated for guild {guild_id}\")\n            else:\n                logger.warning(f\"Guild {guild_id} not found or disabled\")\n\n    async def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:\n        \"\"\"Load guild IDs and provision API key if needed.\n\n        Returns:\n            (active_guild_ids, api_key) - api_key is the cached key if available,\n            otherwise a newly created key. Returns None if no guilds found.\n        \"\"\"\n        cached_key = self._api_keys.get(tenant_id)\n\n        def _sync() -> tuple[list[int], str | None]:\n            with get_session_with_tenant(tenant_id=tenant_id) as db:\n                configs = get_guild_configs(db)\n                guild_ids = [\n                    config.guild_id\n                    for config in configs\n                    if config.enabled and config.guild_id is not None\n                ]\n\n                if not guild_ids:\n                    return [], None\n\n                if not cached_key:\n                    new_key = get_or_create_discord_service_api_key(db, tenant_id)\n                    db.commit()\n                    return guild_ids, new_key\n\n                return guild_ids, cached_key\n\n        return await asyncio.to_thread(_sync)\n\n    def get_tenant(self, guild_id: int) -> str | None:\n        \"\"\"Get tenant ID for a guild.\"\"\"\n        return self._guild_tenants.get(guild_id)\n\n    def get_api_key(self, tenant_id: str) -> str | None:\n        \"\"\"Get API key for a tenant.\"\"\"\n        return self._api_keys.get(tenant_id)\n\n    def remove_guild(self, guild_id: int) -> None:\n        \"\"\"Remove a guild from cache.\"\"\"\n        self._guild_tenants.pop(guild_id, None)\n\n    def get_all_guild_ids(self) -> list[int]:\n        \"\"\"Get all cached guild IDs.\"\"\"\n        return list(self._guild_tenants.keys())\n\n    def clear(self) -> None:\n        \"\"\"Clear all caches.\"\"\"\n        self._guild_tenants.clear()\n        self._api_keys.clear()\n        self._initialized = False\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/client.py",
    "content": "\"\"\"Discord bot client with integrated message handling.\"\"\"\n\nimport asyncio\nimport time\n\nimport discord\nfrom discord.ext import commands\n\nfrom onyx.configs.app_configs import DISCORD_BOT_INVOKE_CHAR\nfrom onyx.onyxbot.discord.api_client import OnyxAPIClient\nfrom onyx.onyxbot.discord.cache import DiscordCacheManager\nfrom onyx.onyxbot.discord.constants import CACHE_REFRESH_INTERVAL\nfrom onyx.onyxbot.discord.handle_commands import handle_dm\nfrom onyx.onyxbot.discord.handle_commands import handle_registration_command\nfrom onyx.onyxbot.discord.handle_commands import handle_sync_channels_command\nfrom onyx.onyxbot.discord.handle_message import process_chat_message\nfrom onyx.onyxbot.discord.handle_message import should_respond\nfrom onyx.onyxbot.discord.utils import get_bot_token\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass OnyxDiscordClient(commands.Bot):\n    \"\"\"Discord bot client with integrated cache, API client, and message handling.\n\n    This client handles:\n    - Guild registration via !register command\n    - Message processing with persona-based responses\n    - Thread context for conversation continuity\n    - Multi-tenant support via cached API keys\n    \"\"\"\n\n    def __init__(self, command_prefix: str = DISCORD_BOT_INVOKE_CHAR) -> None:\n        intents = discord.Intents.default()\n        intents.message_content = True\n        intents.members = True\n\n        super().__init__(command_prefix=command_prefix, intents=intents)\n\n        self.ready = False\n        self.cache = DiscordCacheManager()\n        self.api_client = OnyxAPIClient()\n        self._cache_refresh_task: asyncio.Task | None = None\n\n    # -------------------------------------------------------------------------\n    # Lifecycle Methods\n    # -------------------------------------------------------------------------\n\n    async def setup_hook(self) -> None:\n        \"\"\"Called before on_ready. Initialize components.\"\"\"\n        logger.info(\"Initializing Discord bot components...\")\n\n        # Initialize API client\n        await self.api_client.initialize()\n\n        # Initial cache load\n        await self.cache.refresh_all()\n\n        # Start periodic cache refresh\n        self._cache_refresh_task = self.loop.create_task(self._periodic_cache_refresh())\n\n        logger.info(\"Discord bot components initialized\")\n\n    async def _periodic_cache_refresh(self) -> None:\n        \"\"\"Background task to refresh cache periodically.\"\"\"\n        while not self.is_closed():\n            await asyncio.sleep(CACHE_REFRESH_INTERVAL)\n            try:\n                await self.cache.refresh_all()\n            except Exception as e:\n                logger.error(f\"Cache refresh failed: {e}\")\n\n    async def on_ready(self) -> None:\n        \"\"\"Bot connected and ready.\"\"\"\n        if self.ready:\n            return\n\n        if not self.user:\n            raise RuntimeError(\"Critical error: Discord Bot user not found\")\n\n        logger.info(f\"Discord Bot connected as {self.user} (ID: {self.user.id})\")\n        logger.info(f\"Connected to {len(self.guilds)} guild(s)\")\n        logger.info(f\"Cached {len(self.cache.get_all_guild_ids())} registered guild(s)\")\n\n        self.ready = True\n\n    async def close(self) -> None:\n        \"\"\"Graceful shutdown.\"\"\"\n        logger.info(\"Shutting down Discord bot...\")\n\n        # Cancel cache refresh task\n        if self._cache_refresh_task:\n            self._cache_refresh_task.cancel()\n            try:\n                await self._cache_refresh_task\n            except asyncio.CancelledError:\n                pass\n\n        # Close Discord connection first - stops new commands from triggering cache ops\n        if not self.is_closed():\n            await super().close()\n\n        # Close API client\n        await self.api_client.close()\n\n        # Clear cache (safe now - no concurrent operations possible)\n        self.cache.clear()\n\n        self.ready = False\n        logger.info(\"Discord bot shutdown complete\")\n\n    # -------------------------------------------------------------------------\n    # Message Handling\n    # -------------------------------------------------------------------------\n\n    async def on_message(self, message: discord.Message) -> None:\n        \"\"\"Main message handler.\"\"\"\n        # mypy\n        if not self.user:\n            raise RuntimeError(\"Critical error: Discord Bot user not found\")\n\n        try:\n            # Ignore bot messages\n            if message.author.bot:\n                return\n\n            # Ignore thread starter messages (empty reference nodes that don't contain content)\n            if message.type == discord.MessageType.thread_starter_message:\n                return\n\n            # Handle DMs\n            if isinstance(message.channel, discord.DMChannel):\n                await handle_dm(message)\n                return\n\n            # Must have a guild\n            if not message.guild or not message.guild.id:\n                return\n\n            guild_id = message.guild.id\n\n            # Check for registration command first\n            if await handle_registration_command(message, self.cache):\n                return\n\n            # Look up guild in cache\n            tenant_id = self.cache.get_tenant(guild_id)\n\n            # Check for sync-channels command (requires registered guild)\n            if await handle_sync_channels_command(message, tenant_id, self):\n                return\n\n            if not tenant_id:\n                # Guild not registered, ignore\n                return\n\n            # Get API key\n            api_key = self.cache.get_api_key(tenant_id)\n            if not api_key:\n                logger.warning(f\"No API key cached for tenant {tenant_id}\")\n                return\n\n            # Check if bot should respond\n            should_respond_context = await should_respond(message, tenant_id, self.user)\n\n            if not should_respond_context.should_respond:\n                return\n\n            logger.debug(\n                f\"Processing message: '{message.content[:50]}' in \"\n                f\"#{getattr(message.channel, 'name', 'unknown')} ({message.guild.name}), \"\n                f\"persona_id={should_respond_context.persona_id}\"\n            )\n\n            # Process the message\n            await process_chat_message(\n                message=message,\n                api_key=api_key,\n                persona_id=should_respond_context.persona_id,\n                thread_only_mode=should_respond_context.thread_only_mode,\n                api_client=self.api_client,\n                bot_user=self.user,\n            )\n\n        except Exception as e:\n            logger.exception(f\"Error processing message: {e}\")\n\n\n# -----------------------------------------------------------------------------\n# Entry Point\n# -----------------------------------------------------------------------------\n\n\ndef main() -> None:\n    \"\"\"Main entry point for Discord bot.\"\"\"\n    from onyx.db.engine.sql_engine import SqlEngine\n    from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\n\n    logger.info(\"Starting Onyx Discord Bot...\")\n\n    # Initialize the database engine (required before any DB operations)\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    # Initialize EE features based on environment\n    set_is_ee_based_on_env_variable()\n\n    counter = 0\n    while True:\n        token = get_bot_token()\n        if not token:\n            if counter % 180 == 0:\n                logger.info(\n                    \"Discord bot is dormant. Waiting for token configuration...\"\n                )\n            counter += 1\n            time.sleep(5)\n            continue\n        counter = 0\n        bot = OnyxDiscordClient()\n\n        try:\n            # bot.run() handles SIGINT/SIGTERM and calls close() automatically\n            bot.run(token)\n\n        except Exception:\n            logger.exception(\"Fatal error in Discord bot\")\n            raise\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/constants.py",
    "content": "\"\"\"Discord bot constants.\"\"\"\n\n# API settings\nAPI_REQUEST_TIMEOUT: int = 3 * 60  # 3 minutes\n\n# Cache settings\nCACHE_REFRESH_INTERVAL: int = 60  # 1 minute\n\n# Message settings\nMAX_MESSAGE_LENGTH: int = 2000  # Discord's character limit\nMAX_CONTEXT_MESSAGES: int = 10  # Max messages to include in conversation context\n# Note: Discord.py's add_reaction() requires unicode emoji, not :name: format\nTHINKING_EMOJI: str = \"🤔\"  # U+1F914 - Thinking Face\nSUCCESS_EMOJI: str = \"✅\"  # U+2705 - White Heavy Check Mark\nERROR_EMOJI: str = \"❌\"  # U+274C - Cross Mark\n\n# Command prefix\nREGISTER_COMMAND: str = \"register\"\nSYNC_CHANNELS_COMMAND: str = \"sync-channels\"\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/exceptions.py",
    "content": "\"\"\"Custom exception classes for Discord bot.\"\"\"\n\n\nclass DiscordBotError(Exception):\n    \"\"\"Base exception for Discord bot errors.\"\"\"\n\n\nclass RegistrationError(DiscordBotError):\n    \"\"\"Error during guild registration.\"\"\"\n\n\nclass SyncChannelsError(DiscordBotError):\n    \"\"\"Error during channel sync.\"\"\"\n\n\nclass APIError(DiscordBotError):\n    \"\"\"Base API error.\"\"\"\n\n\nclass CacheError(DiscordBotError):\n    \"\"\"Error during cache operations.\"\"\"\n\n\nclass APIConnectionError(APIError):\n    \"\"\"Failed to connect to API.\"\"\"\n\n\nclass APITimeoutError(APIError):\n    \"\"\"Request timed out.\"\"\"\n\n\nclass APIResponseError(APIError):\n    \"\"\"API returned an error response.\"\"\"\n\n    def __init__(self, message: str, status_code: int | None = None):\n        super().__init__(message)\n        self.status_code = status_code\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/handle_commands.py",
    "content": "\"\"\"Discord bot command handlers for registration and channel sync.\"\"\"\n\nimport asyncio\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport discord\n\nfrom onyx.configs.app_configs import DISCORD_BOT_INVOKE_CHAR\nfrom onyx.configs.constants import ONYX_DISCORD_URL\nfrom onyx.db.discord_bot import bulk_create_channel_configs\nfrom onyx.db.discord_bot import get_guild_config_by_discord_id\nfrom onyx.db.discord_bot import get_guild_config_by_internal_id\nfrom onyx.db.discord_bot import get_guild_config_by_registration_key\nfrom onyx.db.discord_bot import sync_channel_configs\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.utils import DiscordChannelView\nfrom onyx.onyxbot.discord.cache import DiscordCacheManager\nfrom onyx.onyxbot.discord.constants import REGISTER_COMMAND\nfrom onyx.onyxbot.discord.constants import SYNC_CHANNELS_COMMAND\nfrom onyx.onyxbot.discord.exceptions import RegistrationError\nfrom onyx.onyxbot.discord.exceptions import SyncChannelsError\nfrom onyx.server.manage.discord_bot.utils import parse_discord_registration_key\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\nasync def handle_dm(message: discord.Message) -> None:\n    \"\"\"Handle direct messages.\"\"\"\n    dm_response = (\n        \"**I can't respond to DMs** :sweat:\\n\\n\"\n        f\"Please chat with me in a server channel, or join the official \"\n        f\"[Onyx Discord]({ONYX_DISCORD_URL}) for help!\"\n    )\n    await message.channel.send(dm_response)\n\n\n# -------------------------------------------------------------------------\n# Helper functions for error handling\n# -------------------------------------------------------------------------\n\n\nasync def _try_dm_author(message: discord.Message, content: str) -> bool:\n    \"\"\"Attempt to DM the message author. Returns True if successful.\"\"\"\n    logger.debug(f\"Responding in Discord DM with {content}\")\n    try:\n        await message.author.send(content)\n        return True\n    except (discord.Forbidden, discord.HTTPException) as e:\n        # User has DMs disabled or other error\n        logger.warning(f\"Failed to DM author {message.author.id}: {e}\")\n    except Exception as e:\n        logger.exception(f\"Unexpected error DMing author {message.author.id}: {e}\")\n    return False\n\n\nasync def _try_delete_message(message: discord.Message) -> bool:\n    \"\"\"Attempt to delete a message. Returns True if successful.\"\"\"\n    logger.debug(f\"Deleting potentially sensitive message {message.id}\")\n    try:\n        await message.delete()\n        return True\n    except (discord.Forbidden, discord.HTTPException) as e:\n        # Bot lacks permission or other error\n        logger.warning(f\"Failed to delete message {message.id}: {e}\")\n    except Exception as e:\n        logger.exception(f\"Unexpected error deleting message {message.id}: {e}\")\n    return False\n\n\nasync def _try_react_x(message: discord.Message) -> bool:\n    \"\"\"Attempt to react to a message with ❌. Returns True if successful.\"\"\"\n    try:\n        await message.add_reaction(\"❌\")\n        return True\n    except (discord.Forbidden, discord.HTTPException) as e:\n        # Bot lacks permission or other error\n        logger.warning(f\"Failed to react to message {message.id}: {e}\")\n    except Exception as e:\n        logger.exception(f\"Unexpected error reacting to message {message.id}: {e}\")\n    return False\n\n\n# -------------------------------------------------------------------------\n# Registration\n# -------------------------------------------------------------------------\n\n\nasync def handle_registration_command(\n    message: discord.Message,\n    cache: DiscordCacheManager,\n) -> bool:\n    \"\"\"Handle !register command. Returns True if command was handled.\"\"\"\n    content = message.content.strip()\n\n    # Check for !register command\n    if not content.startswith(f\"{DISCORD_BOT_INVOKE_CHAR}{REGISTER_COMMAND}\"):\n        return False\n\n    # Must be in a server\n    if not message.guild:\n        await _try_dm_author(\n            message, \"This command can only be used in a server channel.\"\n        )\n        return True\n\n    guild_name = message.guild.name\n    logger.info(f\"Registration command received: {guild_name}\")\n\n    try:\n        # Parse the registration key\n        parts = content.split(maxsplit=1)\n        if len(parts) < 2:\n            raise RegistrationError(\n                \"Invalid registration key format. Please check the key and try again.\"\n            )\n\n        registration_key = parts[1].strip()\n\n        if not message.author or not isinstance(message.author, discord.Member):\n            raise RegistrationError(\n                \"You need to be a server administrator to register the bot.\"\n            )\n\n        # Check permissions - require admin or manage_guild\n        if not message.author.guild_permissions.administrator:\n            if not message.author.guild_permissions.manage_guild:\n                raise RegistrationError(\n                    \"You need **Administrator** or **Manage Server** permissions to register this bot.\"\n                )\n\n        await _register_guild(message, registration_key, cache)\n        logger.info(f\"Registration successful: {guild_name}\")\n        await message.reply(\n            \":white_check_mark: **Successfully registered!**\\n\\n\"\n            \"This server is now connected to Onyx. \"\n            \"I'll respond to messages based on your server and channel settings set in Onyx.\"\n        )\n    except RegistrationError as e:\n        logger.debug(f\"Registration failed: {guild_name}, error={e}\")\n        await _try_dm_author(message, f\":x: **Registration failed.**\\n\\n{e}\")\n        await _try_delete_message(message)\n    except Exception:\n        logger.exception(f\"Registration failed unexpectedly: {guild_name}\")\n        await _try_dm_author(\n            message,\n            \":x: **Registration failed.**\\n\\nAn unexpected error occurred. Please try again later.\",\n        )\n        await _try_delete_message(message)\n\n    return True\n\n\nasync def _register_guild(\n    message: discord.Message,\n    registration_key: str,\n    cache: DiscordCacheManager,\n) -> None:\n    \"\"\"Register a guild with a registration key.\"\"\"\n    if not message.guild:\n        # mypy, even though we already know that message.guild is not None\n        raise RegistrationError(\"This command can only be used in a server.\")\n\n    logger.info(f\"Guild '{message.guild.name}' attempting to register Discord bot\")\n    registration_key = registration_key.strip()\n\n    # Parse tenant_id from registration key\n    parsed = parse_discord_registration_key(registration_key)\n    if parsed is None:\n        raise RegistrationError(\n            \"Invalid registration key format. Please check the key and try again.\"\n        )\n\n    tenant_id = parsed\n\n    logger.info(f\"Parsed tenant_id {tenant_id} from registration key\")\n\n    # Check if this guild is already registered to any tenant\n    guild_id = message.guild.id\n    existing_tenant = cache.get_tenant(guild_id)\n    if existing_tenant is not None:\n        logger.warning(\n            f\"Guild {guild_id} is already registered to tenant {existing_tenant}\"\n        )\n        raise RegistrationError(\n            \"This server is already registered.\\n\\nOnyxBot can only connect one Discord server to one Onyx workspace.\"\n        )\n\n    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n    try:\n        guild = message.guild\n        guild_name = guild.name\n\n        # Collect all text channels from the guild\n        channels = get_text_channels(guild)\n        logger.info(f\"Found {len(channels)} text channels in guild '{guild_name}'\")\n\n        # Validate and update in database\n        def _sync_register() -> int:\n            with get_session_with_tenant(tenant_id=tenant_id) as db:\n                # Find the guild config by registration key\n                config = get_guild_config_by_registration_key(db, registration_key)\n                if not config:\n                    raise RegistrationError(\n                        \"Registration key not found.\\n\\n\"\n                        \"The key may have expired or been deleted. \"\n                        \"Please generate a new one from the Onyx admin panel.\"\n                    )\n\n                # Check if already used\n                if config.guild_id is not None:\n                    raise RegistrationError(\n                        \"This registration key has already been used.\\n\\n\"\n                        \"Each key can only be used once. \"\n                        \"Please generate a new key from the Onyx admin panel.\"\n                    )\n\n                # Update the guild config\n                config.guild_id = guild_id\n                config.guild_name = guild_name\n                config.registered_at = datetime.now(timezone.utc)\n\n                # Create channel configs for all text channels\n                bulk_create_channel_configs(db, config.id, channels)\n\n                db.commit()\n                return config.id\n\n        await asyncio.to_thread(_sync_register)\n\n        # Refresh cache for this guild\n        await cache.refresh_guild(guild_id, tenant_id)\n\n        logger.info(\n            f\"Guild '{guild_name}' registered with {len(channels)} channel configs\"\n        )\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)\n\n\ndef get_text_channels(guild: discord.Guild) -> list[DiscordChannelView]:\n    \"\"\"Get all text channels from a guild as DiscordChannelView objects.\"\"\"\n    channels: list[DiscordChannelView] = []\n    for channel in guild.channels:\n        # Include text channels and forum channels (where threads can be created)\n        if isinstance(channel, (discord.TextChannel, discord.ForumChannel)):\n            # Check if channel is private (not visible to @everyone)\n            everyone_perms = channel.permissions_for(guild.default_role)\n            is_private = not everyone_perms.view_channel\n\n            logger.debug(\n                f\"Found channel: #{channel.name}, type={channel.type.name}, is_private={is_private}\"\n            )\n\n            channels.append(\n                DiscordChannelView(\n                    channel_id=channel.id,\n                    channel_name=channel.name,\n                    channel_type=channel.type.name,  # \"text\" or \"forum\"\n                    is_private=is_private,\n                )\n            )\n\n    logger.debug(f\"Retrieved {len(channels)} channels from guild '{guild.name}'\")\n    return channels\n\n\n# -------------------------------------------------------------------------\n# Sync Channels\n# -------------------------------------------------------------------------\n\n\nasync def handle_sync_channels_command(\n    message: discord.Message,\n    tenant_id: str | None,\n    bot: discord.Client,\n) -> bool:\n    \"\"\"Handle !sync-channels command. Returns True if command was handled.\"\"\"\n    content = message.content.strip()\n\n    # Check for !sync-channels command\n    if not content.startswith(f\"{DISCORD_BOT_INVOKE_CHAR}{SYNC_CHANNELS_COMMAND}\"):\n        return False\n\n    # Must be in a server\n    if not message.guild:\n        await _try_dm_author(\n            message, \"This command can only be used in a server channel.\"\n        )\n        return True\n\n    guild_name = message.guild.name\n    logger.info(f\"Sync-channels command received: {guild_name}\")\n\n    try:\n        # Must be registered\n        if not tenant_id:\n            raise SyncChannelsError(\n                \"This server is not registered. Please register it first.\"\n            )\n\n        # Check permissions - require admin or manage_guild\n        if not message.author or not isinstance(message.author, discord.Member):\n            raise SyncChannelsError(\n                \"You need to be a server administrator to sync channels.\"\n            )\n\n        if not message.author.guild_permissions.administrator:\n            if not message.author.guild_permissions.manage_guild:\n                raise SyncChannelsError(\n                    \"You need **Administrator** or **Manage Server** permissions to sync channels.\"\n                )\n\n        # Get guild config ID\n        def _get_guild_config_id() -> int | None:\n            with get_session_with_tenant(tenant_id=tenant_id) as db:\n                if not message.guild:\n                    raise SyncChannelsError(\n                        \"Server not found. This shouldn't happen. Please contact Onyx support.\"\n                    )\n                config = get_guild_config_by_discord_id(db, message.guild.id)\n                return config.id if config else None\n\n        guild_config_id = await asyncio.to_thread(_get_guild_config_id)\n\n        if not guild_config_id:\n            raise SyncChannelsError(\n                \"Server config not found. This shouldn't happen. Please contact Onyx support.\"\n            )\n\n        # Perform the sync\n        added, removed, updated = await sync_guild_channels(\n            guild_config_id, tenant_id, bot\n        )\n        logger.info(\n            f\"Sync-channels successful: {guild_name}, added={added}, removed={removed}, updated={updated}\"\n        )\n        await message.reply(\n            f\":white_check_mark: **Channel sync complete!**\\n\\n\"\n            f\"* **{added}** new channel(s) added\\n\"\n            f\"* **{removed}** deleted channel(s) removed\\n\"\n            f\"* **{updated}** channel name(s) updated\\n\\n\"\n            \"New channels are disabled by default. Enable them in the Onyx admin panel.\"\n        )\n    except SyncChannelsError as e:\n        logger.debug(f\"Sync-channels failed: {guild_name}, error={e}\")\n        await _try_dm_author(message, f\":x: **Channel sync failed.**\\n\\n{e}\")\n        await _try_react_x(message)\n    except Exception:\n        logger.exception(f\"Sync-channels failed unexpectedly: {guild_name}\")\n        await _try_dm_author(\n            message,\n            \":x: **Channel sync failed.**\\n\\nAn unexpected error occurred. Please try again later.\",\n        )\n        await _try_react_x(message)\n\n    return True\n\n\nasync def sync_guild_channels(\n    guild_config_id: int,\n    tenant_id: str,\n    bot: discord.Client,\n) -> tuple[int, int, int]:\n    \"\"\"Sync channel configs with current Discord channels for a guild.\n\n    Fetches current channels from Discord and syncs with database:\n    - Creates configs for new channels (disabled by default)\n    - Removes configs for deleted channels\n    - Updates names for existing channels if changed\n\n    Args:\n        guild_config_id: Internal ID of the guild config\n        tenant_id: Tenant ID for database access\n        bot: Discord bot client\n\n    Returns:\n        (added_count, removed_count, updated_count)\n\n    Raises:\n        ValueError: If guild config not found or guild not registered\n    \"\"\"\n    context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n    try:\n        # Get guild_id from config\n        def _get_guild_id() -> int | None:\n            with get_session_with_tenant(tenant_id=tenant_id) as db:\n                config = get_guild_config_by_internal_id(db, guild_config_id)\n                if not config:\n                    return None\n                return config.guild_id\n\n        guild_id = await asyncio.to_thread(_get_guild_id)\n\n        if guild_id is None:\n            raise ValueError(\n                f\"Guild config {guild_config_id} not found or not registered\"\n            )\n\n        # Get the guild from Discord\n        guild = bot.get_guild(guild_id)\n        if not guild:\n            raise ValueError(f\"Guild {guild_id} not found in Discord cache\")\n\n        # Get current channels from Discord\n        channels = get_text_channels(guild)\n        logger.info(f\"Syncing {len(channels)} channels for guild '{guild.name}'\")\n\n        # Sync with database\n        def _sync() -> tuple[int, int, int]:\n            with get_session_with_tenant(tenant_id=tenant_id) as db:\n                added, removed, updated = sync_channel_configs(\n                    db, guild_config_id, channels\n                )\n                db.commit()\n                return added, removed, updated\n\n        added, removed, updated = await asyncio.to_thread(_sync)\n\n        logger.info(\n            f\"Channel sync complete for guild '{guild.name}': added={added}, removed={removed}, updated={updated}\"\n        )\n\n        return added, removed, updated\n\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/handle_message.py",
    "content": "\"\"\"Discord bot message handling and response logic.\"\"\"\n\nimport asyncio\n\nimport discord\nfrom pydantic import BaseModel\n\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.db.discord_bot import get_channel_config_by_discord_ids\nfrom onyx.db.discord_bot import get_guild_config_by_discord_id\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import DiscordChannelConfig\nfrom onyx.db.models import DiscordGuildConfig\nfrom onyx.onyxbot.discord.api_client import OnyxAPIClient\nfrom onyx.onyxbot.discord.constants import MAX_CONTEXT_MESSAGES\nfrom onyx.onyxbot.discord.constants import MAX_MESSAGE_LENGTH\nfrom onyx.onyxbot.discord.constants import THINKING_EMOJI\nfrom onyx.onyxbot.discord.exceptions import APIError\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Message types with actual content (excludes system notifications like \"user joined\")\nCONTENT_MESSAGE_TYPES = (\n    discord.MessageType.default,\n    discord.MessageType.reply,\n    discord.MessageType.thread_starter_message,\n)\n\n\nclass ShouldRespondContext(BaseModel):\n    \"\"\"Context for whether the bot should respond to a message.\"\"\"\n\n    should_respond: bool\n    persona_id: int | None\n    thread_only_mode: bool\n\n\n# -------------------------------------------------------------------------\n# Response Logic\n# -------------------------------------------------------------------------\n\n\nasync def should_respond(\n    message: discord.Message,\n    tenant_id: str,\n    bot_user: discord.ClientUser,\n) -> ShouldRespondContext:\n    \"\"\"Determine if bot should respond and which persona to use.\"\"\"\n    if not message.guild:\n        logger.warning(\"Received a message that isn't in a server.\")\n        return ShouldRespondContext(\n            should_respond=False, persona_id=None, thread_only_mode=False\n        )\n\n    guild_id = message.guild.id\n    channel_id = message.channel.id\n    bot_mentioned = bot_user in message.mentions\n\n    def _get_configs() -> tuple[DiscordGuildConfig | None, DiscordChannelConfig | None]:\n        with get_session_with_tenant(tenant_id=tenant_id) as db:\n            guild_config = get_guild_config_by_discord_id(db, guild_id)\n            if not guild_config or not guild_config.enabled:\n                return None, None\n\n            # For threads, use parent channel ID\n            actual_channel_id = channel_id\n            if isinstance(message.channel, discord.Thread) and message.channel.parent:\n                actual_channel_id = message.channel.parent.id\n\n            channel_config = get_channel_config_by_discord_ids(\n                db, guild_id, actual_channel_id\n            )\n            return guild_config, channel_config\n\n    guild_config, channel_config = await asyncio.to_thread(_get_configs)\n\n    if not guild_config or not channel_config or not channel_config.enabled:\n        return ShouldRespondContext(\n            should_respond=False, persona_id=None, thread_only_mode=False\n        )\n\n    # Determine persona (channel override or guild default)\n    persona_id = channel_config.persona_override_id or guild_config.default_persona_id\n\n    # Check mention requirement (with exceptions for implicit invocation)\n    if channel_config.require_bot_invocation and not bot_mentioned:\n        if not await check_implicit_invocation(message, bot_user):\n            return ShouldRespondContext(\n                should_respond=False, persona_id=None, thread_only_mode=False\n            )\n\n    return ShouldRespondContext(\n        should_respond=True,\n        persona_id=persona_id,\n        thread_only_mode=channel_config.thread_only_mode,\n    )\n\n\nasync def check_implicit_invocation(\n    message: discord.Message,\n    bot_user: discord.ClientUser,\n) -> bool:\n    \"\"\"Check if the bot should respond without explicit mention.\n\n    Returns True if:\n    1. User is replying to a bot message\n    2. User is in a thread owned by the bot\n    3. User is in a thread created from a bot message\n    \"\"\"\n    # Check if replying to a bot message\n    if message.reference and message.reference.message_id:\n        try:\n            referenced_msg = await message.channel.fetch_message(\n                message.reference.message_id\n            )\n            if referenced_msg.author.id == bot_user.id:\n                logger.debug(\n                    f\"Implicit invocation via reply: '{message.content[:50]}...'\"\n                )\n                return True\n        except (discord.NotFound, discord.HTTPException):\n            pass\n\n    # Check thread-related conditions\n    if isinstance(message.channel, discord.Thread):\n        thread = message.channel\n\n        # Bot owns the thread\n        if thread.owner_id == bot_user.id:\n            logger.debug(\n                f\"Implicit invocation via bot-owned thread: '{message.content[:50]}...' in #{thread.name}\"\n            )\n            return True\n\n        # Thread was created from a bot message\n        if thread.parent and not isinstance(thread.parent, discord.ForumChannel):\n            try:\n                starter = await thread.parent.fetch_message(thread.id)\n                if starter.author.id == bot_user.id:\n                    logger.debug(\n                        f\"Implicit invocation via bot-started thread: '{message.content[:50]}...' in #{thread.name}\"\n                    )\n                    return True\n            except (discord.NotFound, discord.HTTPException):\n                pass\n\n    return False\n\n\n# -------------------------------------------------------------------------\n# Message Processing\n# -------------------------------------------------------------------------\n\n\nasync def process_chat_message(\n    message: discord.Message,\n    api_key: str,\n    persona_id: int | None,\n    thread_only_mode: bool,\n    api_client: OnyxAPIClient,\n    bot_user: discord.ClientUser,\n) -> None:\n    \"\"\"Process a message and send response.\"\"\"\n    try:\n        await message.add_reaction(THINKING_EMOJI)\n    except discord.DiscordException:\n        logger.warning(\n            f\"Failed to add thinking reaction to message: '{message.content[:50]}...'\"\n        )\n\n    try:\n        # Build conversation context\n        context = await _build_conversation_context(message, bot_user)\n\n        # Prepare full message content\n        parts = []\n        if context:\n            parts.append(context)\n        if isinstance(message.channel, discord.Thread):\n            if isinstance(message.channel.parent, discord.ForumChannel):\n                parts.append(f\"Forum post title: {message.channel.name}\")\n        parts.append(\n            f\"Current message from @{message.author.display_name}: {format_message_content(message)}\"\n        )\n\n        # Send to API\n        response = await api_client.send_chat_message(\n            message=\"\\n\\n\".join(parts),\n            api_key=api_key,\n            persona_id=persona_id,\n        )\n\n        # Format response with citations\n        answer = response.answer or \"I couldn't generate a response.\"\n        answer = _append_citations(answer, response)\n\n        await send_response(message, answer, thread_only_mode)\n\n        try:\n            await message.remove_reaction(THINKING_EMOJI, bot_user)\n        except discord.DiscordException:\n            pass\n\n    except APIError as e:\n        logger.error(f\"API error processing message: {e}\")\n        await send_error_response(message, bot_user)\n    except Exception as e:\n        logger.exception(f\"Error processing chat message: {e}\")\n        await send_error_response(message, bot_user)\n\n\nasync def _build_conversation_context(\n    message: discord.Message,\n    bot_user: discord.ClientUser,\n) -> str | None:\n    \"\"\"Build conversation context from thread history or reply chain.\"\"\"\n    if isinstance(message.channel, discord.Thread):\n        return await _build_thread_context(message, bot_user)\n    elif message.reference:\n        return await _build_reply_chain_context(message, bot_user)\n    return None\n\n\ndef _append_citations(answer: str, response: ChatFullResponse) -> str:\n    \"\"\"Append citation sources to the answer if present.\"\"\"\n    if not response.citation_info or not response.top_documents:\n        return answer\n\n    cited_docs: list[tuple[int, str, str | None]] = []\n    for citation in response.citation_info:\n        doc = next(\n            (\n                d\n                for d in response.top_documents\n                if d.document_id == citation.document_id\n            ),\n            None,\n        )\n        if doc:\n            cited_docs.append(\n                (\n                    citation.citation_number,\n                    doc.semantic_identifier or \"Source\",\n                    doc.link,\n                )\n            )\n\n    if not cited_docs:\n        return answer\n\n    cited_docs.sort(key=lambda x: x[0])\n    citations = \"\\n\\n**Sources:**\\n\"\n    for num, name, link in cited_docs[:5]:\n        if link:\n            citations += f\"{num}. [{name}](<{link}>)\\n\"\n        else:\n            citations += f\"{num}. {name}\\n\"\n\n    return answer + citations\n\n\n# -------------------------------------------------------------------------\n# Context Building\n# -------------------------------------------------------------------------\n\n\nasync def _build_reply_chain_context(\n    message: discord.Message,\n    bot_user: discord.ClientUser,\n) -> str | None:\n    \"\"\"Build context by following the reply chain backwards.\"\"\"\n    if not message.reference or not message.reference.message_id:\n        return None\n\n    try:\n        messages: list[discord.Message] = []\n        current = message\n\n        # Follow reply chain backwards up to MAX_CONTEXT_MESSAGES\n        while (\n            current.reference\n            and current.reference.message_id\n            and len(messages) < MAX_CONTEXT_MESSAGES\n        ):\n            try:\n                parent = await message.channel.fetch_message(\n                    current.reference.message_id\n                )\n                messages.append(parent)\n                current = parent\n            except (discord.NotFound, discord.HTTPException):\n                break\n\n        if not messages:\n            return None\n\n        messages.reverse()  # Chronological order\n\n        logger.debug(\n            f\"Built reply chain context: {len(messages)} messages in #{getattr(message.channel, 'name', 'unknown')}\"\n        )\n\n        return _format_messages_as_context(messages, bot_user)\n\n    except Exception as e:\n        logger.warning(f\"Failed to build reply chain context: {e}\")\n        return None\n\n\nasync def _build_thread_context(\n    message: discord.Message,\n    bot_user: discord.ClientUser,\n) -> str | None:\n    \"\"\"Build context from thread message history.\"\"\"\n    if not isinstance(message.channel, discord.Thread):\n        return None\n\n    try:\n        thread = message.channel\n        messages: list[discord.Message] = []\n\n        # Fetch recent messages (excluding current)\n        async for msg in thread.history(limit=MAX_CONTEXT_MESSAGES, oldest_first=False):\n            if msg.id != message.id:\n                messages.append(msg)\n\n        # Include thread starter message and its reply chain if not already present\n        if thread.parent and not isinstance(thread.parent, discord.ForumChannel):\n            try:\n                starter = await thread.parent.fetch_message(thread.id)\n                if starter.id != message.id and not any(\n                    m.id == starter.id for m in messages\n                ):\n                    messages.append(starter)\n\n                # Trace back through the starter's reply chain for more context\n                current = starter\n                while (\n                    current.reference\n                    and current.reference.message_id\n                    and len(messages) < MAX_CONTEXT_MESSAGES\n                ):\n                    try:\n                        parent = await thread.parent.fetch_message(\n                            current.reference.message_id\n                        )\n                        if not any(m.id == parent.id for m in messages):\n                            messages.append(parent)\n                        current = parent\n                    except (discord.NotFound, discord.HTTPException):\n                        break\n            except (discord.NotFound, discord.HTTPException):\n                pass\n\n        if not messages:\n            return None\n\n        messages.sort(key=lambda m: m.id)  # Chronological order\n        logger.debug(\n            f\"Built thread context: {len(messages)} messages in #{thread.name}\"\n        )\n\n        return _format_messages_as_context(messages, bot_user)\n\n    except Exception as e:\n        logger.warning(f\"Failed to build thread context: {e}\")\n        return None\n\n\ndef _format_messages_as_context(\n    messages: list[discord.Message],\n    bot_user: discord.ClientUser,\n) -> str | None:\n    \"\"\"Format a list of messages into a conversation context string.\"\"\"\n    formatted = []\n    for msg in messages:\n        if msg.type not in CONTENT_MESSAGE_TYPES:\n            continue\n\n        sender = (\n            \"OnyxBot\" if msg.author.id == bot_user.id else f\"@{msg.author.display_name}\"\n        )\n        formatted.append(f\"{sender}: {format_message_content(msg)}\")\n\n    if not formatted:\n        return None\n\n    return (\n        \"You are a Discord bot named OnyxBot.\\n\"\n        'Always assume that [user] is the same as the \"Current message\" author.'\n        \"Conversation history:\\n\"\n        \"---\\n\" + \"\\n\".join(formatted) + \"\\n---\"\n    )\n\n\n# -------------------------------------------------------------------------\n# Message Formatting\n# -------------------------------------------------------------------------\n\n\ndef format_message_content(message: discord.Message) -> str:\n    \"\"\"Format message content with readable mentions.\"\"\"\n    content = message.content\n\n    for user in message.mentions:\n        content = content.replace(f\"<@{user.id}>\", f\"@{user.display_name}\")\n        content = content.replace(f\"<@!{user.id}>\", f\"@{user.display_name}\")\n\n    for role in message.role_mentions:\n        content = content.replace(f\"<@&{role.id}>\", f\"@{role.name}\")\n\n    for channel in message.channel_mentions:\n        content = content.replace(f\"<#{channel.id}>\", f\"#{channel.name}\")\n\n    return content\n\n\n# -------------------------------------------------------------------------\n# Response Sending\n# -------------------------------------------------------------------------\n\n\nasync def send_response(\n    message: discord.Message,\n    content: str,\n    thread_only_mode: bool,\n) -> None:\n    \"\"\"Send response based on thread_only_mode setting.\"\"\"\n    chunks = _split_message(content)\n\n    if isinstance(message.channel, discord.Thread):\n        for chunk in chunks:\n            await message.channel.send(chunk)\n    elif thread_only_mode:\n        thread_name = f\"OnyxBot <> {message.author.display_name}\"[:100]\n        thread = await message.create_thread(name=thread_name)\n        for chunk in chunks:\n            await thread.send(chunk)\n    else:\n        for i, chunk in enumerate(chunks):\n            if i == 0:\n                await message.reply(chunk)\n            else:\n                await message.channel.send(chunk)\n\n\ndef _split_message(content: str) -> list[str]:\n    \"\"\"Split content into chunks that fit Discord's message limit.\"\"\"\n    chunks = []\n    while content:\n        if len(content) <= MAX_MESSAGE_LENGTH:\n            chunks.append(content)\n            break\n\n        # Find a good split point\n        split_at = MAX_MESSAGE_LENGTH\n        for sep in [\"\\n\\n\", \"\\n\", \". \", \" \"]:\n            idx = content.rfind(sep, 0, MAX_MESSAGE_LENGTH)\n            if idx > MAX_MESSAGE_LENGTH // 2:\n                split_at = idx + len(sep)\n                break\n\n        chunks.append(content[:split_at])\n        content = content[split_at:]\n\n    return chunks\n\n\nasync def send_error_response(\n    message: discord.Message,\n    bot_user: discord.ClientUser,\n) -> None:\n    \"\"\"Send error response and clean up reaction.\"\"\"\n    try:\n        await message.remove_reaction(THINKING_EMOJI, bot_user)\n    except discord.DiscordException:\n        pass\n\n    error_msg = \"Sorry, I encountered an error processing your message. You may want to contact Onyx for support :sweat_smile:\"\n\n    try:\n        if isinstance(message.channel, discord.Thread):\n            await message.channel.send(error_msg)\n        else:\n            thread = await message.create_thread(\n                name=f\"Response to {message.author.display_name}\"[:100]\n            )\n            await thread.send(error_msg)\n    except discord.DiscordException:\n        pass\n"
  },
  {
    "path": "backend/onyx/onyxbot/discord/utils.py",
    "content": "from onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import DISCORD_BOT_TOKEN\nfrom onyx.configs.constants import AuthType\nfrom onyx.db.discord_bot import get_discord_bot_config\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.sensitive import SensitiveValue\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nlogger = setup_logger()\n\n\ndef get_bot_token() -> str | None:\n    \"\"\"Get Discord bot token from env var or database.\n\n    Priority:\n    1. DISCORD_BOT_TOKEN env var (always takes precedence)\n    2. For self-hosted: DiscordBotConfig in database (default tenant)\n    3. For Cloud: should always have env var set\n\n    Returns:\n        Bot token string, or None if not configured.\n    \"\"\"\n    # Environment variable takes precedence\n    if DISCORD_BOT_TOKEN:\n        return DISCORD_BOT_TOKEN\n\n    # Cloud should always have env var; if not, return None\n    if AUTH_TYPE == AuthType.CLOUD:\n        logger.warning(\"Cloud deployment missing DISCORD_BOT_TOKEN env var\")\n        return None\n\n    # Self-hosted: check database for bot config\n    try:\n        with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db:\n            config = get_discord_bot_config(db)\n    except Exception as e:\n        logger.error(f\"Failed to get bot token from database: {e}\")\n        return None\n    if config and config.bot_token:\n        if isinstance(config.bot_token, SensitiveValue):\n            return config.bot_token.get_value(apply_mask=False)\n        return config.bot_token\n    return None\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/blocks.py",
    "content": "from datetime import datetime\nfrom typing import cast\n\nimport pytz\nimport timeago  # type: ignore\nfrom slack_sdk.models.blocks import ActionsBlock\nfrom slack_sdk.models.blocks import Block\nfrom slack_sdk.models.blocks import ButtonElement\nfrom slack_sdk.models.blocks import ContextBlock\nfrom slack_sdk.models.blocks import DividerBlock\nfrom slack_sdk.models.blocks import HeaderBlock\nfrom slack_sdk.models.blocks import Option\nfrom slack_sdk.models.blocks import RadioButtonsElement\nfrom slack_sdk.models.blocks import SectionBlock\nfrom slack_sdk.models.blocks.basic_components import MarkdownTextObject\nfrom slack_sdk.models.blocks.block_elements import ImageElement\n\nfrom onyx.chat.models import ChatBasicResponse\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import SearchFeedbackType\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_NUM_DOCS_TO_DISPLAY\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.chat import get_chat_session_by_message_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import ChannelConfig\nfrom onyx.onyxbot.slack.constants import CONTINUE_IN_WEB_UI_ACTION_ID\nfrom onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID\nfrom onyx.onyxbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID\nfrom onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID\nfrom onyx.onyxbot.slack.formatting import format_slack_message\nfrom onyx.onyxbot.slack.icons import source_to_github_img_link\nfrom onyx.onyxbot.slack.models import ActionValuesEphemeralMessage\nfrom onyx.onyxbot.slack.models import ActionValuesEphemeralMessageChannelConfig\nfrom onyx.onyxbot.slack.models import ActionValuesEphemeralMessageMessageInfo\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.utils import build_continue_in_web_ui_id\nfrom onyx.onyxbot.slack.utils import build_feedback_id\nfrom onyx.onyxbot.slack.utils import build_publish_ephemeral_message_id\nfrom onyx.onyxbot.slack.utils import remove_slack_text_interactions\nfrom onyx.onyxbot.slack.utils import translate_vespa_highlight_to_slack\nfrom onyx.utils.text_processing import decode_escapes\n\n_MAX_BLURB_LEN = 45\n\n\ndef _format_doc_updated_at(updated_at: datetime | None) -> str | None:\n    \"\"\"Convert document timestamps to a human friendly relative string.\"\"\"\n    if updated_at is None:\n        return None\n\n    if updated_at.tzinfo is None or updated_at.tzinfo.utcoffset(updated_at) is None:\n        aware_updated_at = updated_at.replace(tzinfo=pytz.utc)\n    else:\n        aware_updated_at = updated_at.astimezone(pytz.utc)\n\n    return timeago.format(aware_updated_at, datetime.now(pytz.utc))\n\n\ndef get_feedback_reminder_blocks(thread_link: str, include_followup: bool) -> Block:\n    text = (\n        f\"Please provide feedback on <{thread_link}|this answer>. \"\n        \"This is essential to help us to improve the quality of the answers. \"\n        \"Please rate it by clicking the `Helpful` or `Not helpful` button. \"\n    )\n    if include_followup:\n        text += \"\\n\\nIf you need more help, click the `I need more help from a human!` button. \"\n\n    text += \"\\n\\nThanks!\"\n\n    return SectionBlock(text=text)\n\n\ndef _split_text(text: str, limit: int = 3000) -> list[str]:\n    if len(text) <= limit:\n        return [text]\n\n    chunks = []\n    while text:\n        if len(text) <= limit:\n            chunks.append(text)\n            break\n\n        # Find the nearest space before the limit to avoid splitting a word\n        split_at = text.rfind(\" \", 0, limit)\n        if split_at == -1:  # No spaces found, force split\n            split_at = limit\n\n        chunk = text[:split_at]\n        chunks.append(chunk)\n        text = text[split_at:].lstrip()  # Remove leading spaces from the next chunk\n\n    return chunks\n\n\ndef _clean_markdown_link_text(text: str) -> str:\n    # Remove any newlines within the text\n    return format_slack_message(text).replace(\"\\n\", \" \").strip()\n\n\ndef _build_qa_feedback_block(\n    message_id: int, feedback_reminder_id: str | None = None\n) -> Block:\n    return ActionsBlock(\n        block_id=build_feedback_id(message_id),\n        elements=[\n            ButtonElement(\n                action_id=LIKE_BLOCK_ACTION_ID,\n                text=\"👍 Helpful\",\n                value=feedback_reminder_id,\n            ),\n            ButtonElement(\n                action_id=DISLIKE_BLOCK_ACTION_ID,\n                text=\"👎 Not helpful\",\n                value=feedback_reminder_id,\n            ),\n        ],\n    )\n\n\ndef _build_ephemeral_publication_block(\n    channel_id: str,  # noqa: ARG001\n    chat_message_id: int,\n    message_info: SlackMessageInfo,\n    original_question_ts: str,\n    channel_conf: ChannelConfig,\n    feedback_reminder_id: str | None = None,\n) -> Block:\n    # check whether the message is in a thread\n    if (\n        message_info is not None\n        and message_info.msg_to_respond is not None\n        and message_info.thread_to_respond is not None\n        and (message_info.msg_to_respond == message_info.thread_to_respond)\n    ):\n        respond_ts = None\n    else:\n        respond_ts = original_question_ts\n\n    action_values_ephemeral_message_channel_config = (\n        ActionValuesEphemeralMessageChannelConfig(\n            channel_name=channel_conf.get(\"channel_name\"),\n            respond_tag_only=channel_conf.get(\"respond_tag_only\"),\n            respond_to_bots=channel_conf.get(\"respond_to_bots\"),\n            is_ephemeral=channel_conf.get(\"is_ephemeral\", False),\n            respond_member_group_list=channel_conf.get(\"respond_member_group_list\"),\n            answer_filters=channel_conf.get(\"answer_filters\"),\n            follow_up_tags=channel_conf.get(\"follow_up_tags\"),\n            show_continue_in_web_ui=channel_conf.get(\"show_continue_in_web_ui\", False),\n        )\n    )\n\n    action_values_ephemeral_message_message_info = (\n        ActionValuesEphemeralMessageMessageInfo(\n            bypass_filters=message_info.bypass_filters,\n            channel_to_respond=message_info.channel_to_respond,\n            msg_to_respond=message_info.msg_to_respond,\n            email=message_info.email,\n            sender_id=message_info.sender_id,\n            thread_messages=[],\n            is_slash_command=message_info.is_slash_command,\n            is_bot_dm=message_info.is_bot_dm,\n            thread_to_respond=respond_ts,\n        )\n    )\n\n    action_values_ephemeral_message = ActionValuesEphemeralMessage(\n        original_question_ts=original_question_ts,\n        feedback_reminder_id=feedback_reminder_id,\n        chat_message_id=chat_message_id,\n        message_info=action_values_ephemeral_message_message_info,\n        channel_conf=action_values_ephemeral_message_channel_config,\n    )\n\n    return ActionsBlock(\n        block_id=build_publish_ephemeral_message_id(original_question_ts),\n        elements=[\n            ButtonElement(\n                action_id=SHOW_EVERYONE_ACTION_ID,\n                text=\"📢 Share with Everyone\",\n                value=action_values_ephemeral_message.model_dump_json(),\n            ),\n            ButtonElement(\n                action_id=KEEP_TO_YOURSELF_ACTION_ID,\n                text=\"🤫  Keep to Yourself\",\n                value=action_values_ephemeral_message.model_dump_json(),\n            ),\n        ],\n    )\n\n\ndef get_document_feedback_blocks() -> Block:\n    return SectionBlock(\n        text=(\n            \"- 'Up-Boost' if this document is a good source of information and should be \"\n            \"shown more often.\\n\"\n            \"- 'Down-boost' if this document is a poor source of information and should be \"\n            \"shown less often.\\n\"\n            \"- 'Hide' if this document is deprecated and should never be shown anymore.\"\n        ),\n        accessory=RadioButtonsElement(\n            options=[\n                Option(\n                    text=\":thumbsup: Up-Boost\",\n                    value=SearchFeedbackType.ENDORSE.value,\n                ),\n                Option(\n                    text=\":thumbsdown: Down-Boost\",\n                    value=SearchFeedbackType.REJECT.value,\n                ),\n                Option(\n                    text=\":x: Hide\",\n                    value=SearchFeedbackType.HIDE.value,\n                ),\n            ]\n        ),\n    )\n\n\ndef _build_doc_feedback_block(\n    message_id: int,\n    document_id: str,\n    document_rank: int,\n) -> ButtonElement:\n    feedback_id = build_feedback_id(message_id, document_id, document_rank)\n    return ButtonElement(\n        action_id=FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID,\n        value=feedback_id,\n        text=\"Give Feedback\",\n    )\n\n\ndef get_restate_blocks(\n    msg: str,\n    is_slash_command: bool,\n) -> list[Block]:\n    # Only the slash command needs this context because the user doesn't see their own input\n    if not is_slash_command:\n        return []\n\n    return [\n        HeaderBlock(text=\"Responding to the Query\"),\n        SectionBlock(text=f\"```{msg}```\"),\n    ]\n\n\ndef _build_documents_blocks(\n    documents: list[SearchDoc],\n    message_id: int | None,\n    num_docs_to_display: int = ONYX_BOT_NUM_DOCS_TO_DISPLAY,\n) -> list[Block]:\n    header_text = \"Reference Documents\"\n    seen_docs_identifiers = set()\n    section_blocks: list[Block] = [HeaderBlock(text=header_text)]\n    included_docs = 0\n    for rank, d in enumerate(documents):\n        if d.document_id in seen_docs_identifiers:\n            continue\n        seen_docs_identifiers.add(d.document_id)\n\n        # Strip newlines from the semantic identifier for Slackbot formatting\n        doc_sem_id = d.semantic_identifier.replace(\"\\n\", \" \")\n        if d.source_type == DocumentSource.SLACK.value:\n            doc_sem_id = \"#\" + doc_sem_id\n\n        used_chars = len(doc_sem_id) + 3\n        match_str = translate_vespa_highlight_to_slack(d.match_highlights, used_chars)\n\n        included_docs += 1\n\n        header_line = f\"{doc_sem_id}\\n\"\n        if d.link:\n            header_line = f\"<{d.link}|{doc_sem_id}>\\n\"\n\n        updated_at_line = \"\"\n        updated_at_str = _format_doc_updated_at(d.updated_at)\n        if updated_at_str:\n            updated_at_line = f\"_Updated {updated_at_str}_\\n\"\n\n        body_text = f\">{remove_slack_text_interactions(match_str)}\"\n\n        block_text = header_line + updated_at_line + body_text\n\n        feedback: ButtonElement | dict = {}\n        if message_id is not None:\n            feedback = _build_doc_feedback_block(\n                message_id=message_id,\n                document_id=d.document_id,\n                document_rank=rank,\n            )\n\n        section_blocks.append(\n            SectionBlock(text=block_text, accessory=feedback),\n        )\n\n        section_blocks.append(DividerBlock())\n\n        if included_docs >= num_docs_to_display:\n            break\n\n    return section_blocks\n\n\ndef _build_sources_blocks(\n    cited_documents: list[tuple[int, SearchDoc]],\n    num_docs_to_display: int = ONYX_BOT_NUM_DOCS_TO_DISPLAY,\n) -> list[Block]:\n    if not cited_documents:\n        return [\n            SectionBlock(\n                text=\"*Warning*: no sources were cited for this answer, so it may be unreliable 😔\"\n            )\n        ]\n\n    seen_docs_identifiers = set()\n    section_blocks: list[Block] = [SectionBlock(text=\"*Sources:*\")]\n    included_docs = 0\n    for citation_num, d in cited_documents:\n        if d.document_id in seen_docs_identifiers:\n            continue\n        seen_docs_identifiers.add(d.document_id)\n\n        doc_sem_id = d.semantic_identifier\n        if d.source_type == DocumentSource.SLACK.value:\n            # for legacy reasons, before the switch to how Slack semantic identifiers are constructed\n            if \"#\" not in doc_sem_id:\n                doc_sem_id = \"#\" + doc_sem_id\n\n        # this is needed to try and prevent the line from overflowing\n        # if it does overflow, the image gets placed above the title and it\n        # looks bad\n        doc_sem_id = (\n            doc_sem_id[:_MAX_BLURB_LEN] + \"...\"\n            if len(doc_sem_id) > _MAX_BLURB_LEN\n            else doc_sem_id\n        )\n\n        owner_str = f\"By {d.primary_owners[0]}\" if d.primary_owners else None\n        days_ago_str = _format_doc_updated_at(d.updated_at)\n        final_metadata_str = \" | \".join(\n            ([owner_str] if owner_str else [])\n            + ([days_ago_str] if days_ago_str else [])\n        )\n\n        document_title = _clean_markdown_link_text(doc_sem_id)\n        img_link = source_to_github_img_link(d.source_type)\n\n        section_blocks.append(\n            ContextBlock(\n                elements=(\n                    [\n                        ImageElement(\n                            image_url=img_link,\n                            alt_text=f\"{d.source_type.value} logo\",\n                        )\n                    ]\n                    if img_link\n                    else []\n                )\n                + [\n                    (\n                        MarkdownTextObject(text=f\"{document_title}\")\n                        if d.link == \"\"\n                        else MarkdownTextObject(\n                            text=f\"*<{d.link}|[{citation_num}] {document_title}>*\\n{final_metadata_str}\"\n                        )\n                    ),\n                ]\n            )\n        )\n\n        if included_docs >= num_docs_to_display:\n            break\n\n    return section_blocks\n\n\ndef _priority_ordered_documents_blocks(\n    answer: ChatBasicResponse,\n) -> list[Block]:\n    top_docs = answer.top_documents if answer.top_documents else None\n    if not top_docs:\n        return []\n\n    document_blocks = _build_documents_blocks(\n        documents=top_docs,\n        message_id=answer.message_id,\n    )\n    if document_blocks:\n        document_blocks = [DividerBlock()] + document_blocks\n    return document_blocks\n\n\ndef _build_citations_blocks(\n    answer: ChatBasicResponse,\n) -> list[Block]:\n    top_docs = answer.top_documents\n    citations = answer.citation_info or []\n    cited_docs: list[tuple[int, SearchDoc]] = []\n    for citation_info in citations:\n        matching_doc = next(\n            (d for d in top_docs if d.document_id == citation_info.document_id),\n            None,\n        )\n        if matching_doc:\n            cited_docs.append((citation_info.citation_number, matching_doc))\n\n    cited_docs.sort()\n    citations_block = _build_sources_blocks(cited_documents=cited_docs)\n    return citations_block\n\n\ndef _build_main_response_blocks(\n    answer: ChatBasicResponse,\n) -> list[Block]:\n    # TODO: add back in later when auto-filtering is implemented\n    # if (\n    #     retrieval_info.applied_time_cutoff\n    #     or retrieval_info.recency_bias_multiplier > 1\n    #     or retrieval_info.applied_source_filters\n    # ):\n    #     filter_text = \"Filters: \"\n    #     if retrieval_info.applied_source_filters:\n    #         sources_str = \", \".join(\n    #             [s.value for s in retrieval_info.applied_source_filters]\n    #         )\n    #         filter_text += f\"`Sources in [{sources_str}]`\"\n    #         if (\n    #             retrieval_info.applied_time_cutoff\n    #             or retrieval_info.recency_bias_multiplier > 1\n    #         ):\n    #             filter_text += \" and \"\n    #     if retrieval_info.applied_time_cutoff is not None:\n    #         time_str = retrieval_info.applied_time_cutoff.strftime(\"%b %d, %Y\")\n    #         filter_text += f\"`Docs Updated >= {time_str}` \"\n    #     if retrieval_info.recency_bias_multiplier > 1:\n    #         if retrieval_info.applied_time_cutoff is not None:\n    #             filter_text += \"+ \"\n    #         filter_text += \"`Prioritize Recently Updated Docs`\"\n\n    #     filter_block = SectionBlock(text=f\"_{filter_text}_\")\n\n    # replaces markdown links with slack format links\n    formatted_answer = format_slack_message(answer.answer)\n    answer_processed = decode_escapes(remove_slack_text_interactions(formatted_answer))\n    answer_blocks = [SectionBlock(text=text) for text in _split_text(answer_processed)]\n\n    return cast(list[Block], answer_blocks)\n\n\ndef _build_continue_in_web_ui_block(\n    message_id: int | None,\n) -> Block:\n    if message_id is None:\n        raise ValueError(\"No message id provided to build continue in web ui block\")\n    with get_session_with_current_tenant() as db_session:\n        chat_session = get_chat_session_by_message_id(\n            db_session=db_session,\n            message_id=message_id,\n        )\n        return ActionsBlock(\n            block_id=build_continue_in_web_ui_id(message_id),\n            elements=[\n                ButtonElement(\n                    action_id=CONTINUE_IN_WEB_UI_ACTION_ID,\n                    text=\"Continue Chat in Onyx!\",\n                    style=\"primary\",\n                    url=f\"{WEB_DOMAIN}/chat?slackChatId={chat_session.id}\",\n                ),\n            ],\n        )\n\n\ndef _build_follow_up_block(message_id: int | None) -> ActionsBlock:\n    return ActionsBlock(\n        block_id=build_feedback_id(message_id) if message_id is not None else None,\n        elements=[\n            ButtonElement(\n                action_id=IMMEDIATE_RESOLVED_BUTTON_ACTION_ID,\n                style=\"primary\",\n                text=\"I'm all set!\",\n            ),\n            ButtonElement(\n                action_id=FOLLOWUP_BUTTON_ACTION_ID,\n                style=\"danger\",\n                text=\"I need more help from a human!\",\n            ),\n        ],\n    )\n\n\ndef build_follow_up_resolved_blocks(\n    tag_ids: list[str], group_ids: list[str]\n) -> list[Block]:\n    tag_str = \" \".join([f\"<@{tag}>\" for tag in tag_ids])\n    if tag_str:\n        tag_str += \" \"\n\n    group_str = \" \".join([f\"<!subteam^{group_id}|>\" for group_id in group_ids])\n    if group_str:\n        group_str += \" \"\n\n    text = (\n        tag_str\n        + group_str\n        + \"Someone has requested more help.\\n\\n:point_down:Please mark this resolved after answering!\"\n    )\n    text_block = SectionBlock(text=text)\n    button_block = ActionsBlock(\n        elements=[\n            ButtonElement(\n                action_id=FOLLOWUP_BUTTON_RESOLVED_ACTION_ID,\n                style=\"primary\",\n                text=\"Mark Resolved\",\n            )\n        ]\n    )\n    return [text_block, button_block]\n\n\ndef build_slack_response_blocks(\n    answer: ChatBasicResponse,\n    message_info: SlackMessageInfo,\n    channel_conf: ChannelConfig | None,\n    feedback_reminder_id: str | None,\n    skip_ai_feedback: bool = False,\n    offer_ephemeral_publication: bool = False,\n    skip_restated_question: bool = False,\n) -> list[Block]:\n    \"\"\"\n    This function is a top level function that builds all the blocks for the Slack response.\n    It also handles combining all the blocks together.\n    \"\"\"\n    # If called with the OnyxBot slash command, the question is lost so we have to reshow it\n    if not skip_restated_question:\n        restate_question_block = get_restate_blocks(\n            message_info.thread_messages[-1].message, message_info.is_slash_command\n        )\n    else:\n        restate_question_block = []\n\n    answer_blocks = _build_main_response_blocks(answer)\n\n    web_follow_up_block = []\n    if channel_conf and channel_conf.get(\"show_continue_in_web_ui\"):\n        web_follow_up_block.append(\n            _build_continue_in_web_ui_block(\n                message_id=answer.message_id,\n            )\n        )\n\n    follow_up_block = []\n    if (\n        channel_conf\n        and channel_conf.get(\"follow_up_tags\") is not None\n        and not channel_conf.get(\"is_ephemeral\", False)\n    ):\n        follow_up_block.append(_build_follow_up_block(message_id=answer.message_id))\n\n    publish_ephemeral_message_block = []\n\n    if (\n        offer_ephemeral_publication\n        and answer.message_id is not None\n        and message_info.msg_to_respond is not None\n        and channel_conf is not None\n    ):\n        publish_ephemeral_message_block.append(\n            _build_ephemeral_publication_block(\n                channel_id=message_info.channel_to_respond,\n                chat_message_id=answer.message_id,\n                original_question_ts=message_info.msg_to_respond,\n                message_info=message_info,\n                channel_conf=channel_conf,\n                feedback_reminder_id=feedback_reminder_id,\n            )\n        )\n\n    ai_feedback_block: list[Block] = []\n\n    if answer.message_id is not None and not skip_ai_feedback:\n        ai_feedback_block.append(\n            _build_qa_feedback_block(\n                message_id=answer.message_id,\n                feedback_reminder_id=feedback_reminder_id,\n            )\n        )\n\n    citations_blocks = []\n    if answer.citation_info:\n        citations_blocks = _build_citations_blocks(answer)\n\n    citations_divider = [DividerBlock()] if citations_blocks else []\n    buttons_divider = [DividerBlock()] if web_follow_up_block or follow_up_block else []\n\n    all_blocks = (\n        restate_question_block\n        + answer_blocks\n        + publish_ephemeral_message_block\n        + ai_feedback_block\n        + citations_divider\n        + citations_blocks\n        + buttons_divider\n        + web_follow_up_block\n        + follow_up_block\n    )\n\n    return all_blocks\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/config.py",
    "content": "import os\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.slack_channel_config import (\n    fetch_slack_channel_config_for_channel_or_default,\n)\nfrom onyx.db.slack_channel_config import fetch_slack_channel_configs\n\nVALID_SLACK_FILTERS = [\n    \"answerable_prefilter\",\n    \"well_answered_postfilter\",\n    \"questionmark_prefilter\",\n]\n\n\ndef get_slack_channel_config_for_bot_and_channel(\n    db_session: Session,\n    slack_bot_id: int,\n    channel_name: str | None,\n) -> SlackChannelConfig:\n    slack_bot_config = fetch_slack_channel_config_for_channel_or_default(\n        db_session=db_session, slack_bot_id=slack_bot_id, channel_name=channel_name\n    )\n    if not slack_bot_config:\n        raise ValueError(\n            \"No default configuration has been set for this Slack bot. This should not be possible.\"\n        )\n\n    return slack_bot_config\n\n\ndef validate_channel_name(\n    db_session: Session,\n    current_slack_bot_id: int,\n    channel_name: str,\n    current_slack_channel_config_id: int | None,\n) -> str:\n    \"\"\"Make sure that this channel_name does not exist in other Slack channel configs.\n    Returns a cleaned up channel name (e.g. '#' removed if present)\"\"\"\n    slack_bot_configs = fetch_slack_channel_configs(\n        db_session=db_session,\n        slack_bot_id=current_slack_bot_id,\n    )\n    cleaned_channel_name = channel_name.lstrip(\"#\").lower()\n    for slack_channel_config in slack_bot_configs:\n        if slack_channel_config.id == current_slack_channel_config_id:\n            continue\n\n        if cleaned_channel_name == slack_channel_config.channel_config[\"channel_name\"]:\n            raise ValueError(\n                f\"Channel name '{channel_name}' already exists in \"\n                \"another Slack channel config with in Slack Bot with name: \"\n                f\"{slack_channel_config.slack_bot.name}\"\n            )\n\n    return cleaned_channel_name\n\n\n# Scaling configurations for multi-tenant Slack channel handling\nTENANT_LOCK_EXPIRATION = 1800  # How long a pod can hold exclusive access to a tenant before other pods can acquire it\nTENANT_HEARTBEAT_INTERVAL = (\n    15  # How often pods send heartbeats to indicate they are still processing a tenant\n)\nTENANT_HEARTBEAT_EXPIRATION = (\n    60  # How long before a tenant's heartbeat expires, allowing other pods to take over\n)\nTENANT_ACQUISITION_INTERVAL = 60  # How often pods attempt to acquire unprocessed tenants and checks for new tokens\n\nMAX_TENANTS_PER_POD = int(os.getenv(\"MAX_TENANTS_PER_POD\", 50))\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/constants.py",
    "content": "import re\nfrom enum import Enum\n\n# Matches Slack channel references like <#C097NBWMY8Y> or <#C097NBWMY8Y|channel-name>\nSLACK_CHANNEL_REF_PATTERN = re.compile(r\"<#([A-Z0-9]+)(?:\\|([^>]+))?>\")\n\nLIKE_BLOCK_ACTION_ID = \"feedback-like\"\nDISLIKE_BLOCK_ACTION_ID = \"feedback-dislike\"\nSHOW_EVERYONE_ACTION_ID = \"show-everyone\"\nKEEP_TO_YOURSELF_ACTION_ID = \"keep-to-yourself\"\nCONTINUE_IN_WEB_UI_ACTION_ID = \"continue-in-web-ui\"\nFEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID = \"feedback-doc-button\"\nIMMEDIATE_RESOLVED_BUTTON_ACTION_ID = \"immediate-resolved-button\"\nFOLLOWUP_BUTTON_ACTION_ID = \"followup-button\"\nFOLLOWUP_BUTTON_RESOLVED_ACTION_ID = \"followup-resolved-button\"\nVIEW_DOC_FEEDBACK_ID = \"view-doc-feedback\"\nGENERATE_ANSWER_BUTTON_ACTION_ID = \"generate-answer-button\"\n\n\nclass FeedbackVisibility(str, Enum):\n    PRIVATE = \"private\"\n    ANONYMOUS = \"anonymous\"\n    PUBLIC = \"public\"\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/formatting.py",
    "content": "import re\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom mistune import create_markdown\nfrom mistune import HTMLRenderer\n\n# Tags that should be replaced with a newline (line-break and block-level elements)\n_HTML_NEWLINE_TAG_PATTERN = re.compile(\n    r\"<br\\s*/?>|</(?:p|div|li|h[1-6]|tr|blockquote|section|article)>\",\n    re.IGNORECASE,\n)\n\n# Strips HTML tags but excludes autolinks like <https://...> and <mailto:...>\n_HTML_TAG_PATTERN = re.compile(\n    r\"<(?!https?://|mailto:)/?[a-zA-Z][^>]*>\",\n)\n\n# Matches fenced code blocks (``` ... ```) so we can skip sanitization inside them\n_FENCED_CODE_BLOCK_PATTERN = re.compile(r\"```[\\s\\S]*?```\")\n\n# Matches the start of any markdown link: [text]( or [[n]](\n# The inner group handles nested brackets for citation links like [[1]](.\n_MARKDOWN_LINK_PATTERN = re.compile(r\"\\[(?:[^\\[\\]]|\\[[^\\]]*\\])*\\]\\(\")\n\n# Matches Slack-style links <url|text> that LLMs sometimes output directly.\n# Mistune doesn't recognise this syntax, so text() would escape the angle\n# brackets and Slack would render them as literal text instead of links.\n_SLACK_LINK_PATTERN = re.compile(r\"<(https?://[^|>]+)\\|([^>]+)>\")\n\n\ndef _sanitize_html(text: str) -> str:\n    \"\"\"Strip HTML tags from a text fragment.\n\n    Block-level closing tags and <br> are converted to newlines.\n    All other HTML tags are removed. Autolinks (<https://...>) are preserved.\n    \"\"\"\n    text = _HTML_NEWLINE_TAG_PATTERN.sub(\"\\n\", text)\n    text = _HTML_TAG_PATTERN.sub(\"\", text)\n    return text\n\n\ndef _transform_outside_code_blocks(\n    message: str, transform: Callable[[str], str]\n) -> str:\n    \"\"\"Apply *transform* only to text outside fenced code blocks.\"\"\"\n    parts = _FENCED_CODE_BLOCK_PATTERN.split(message)\n    code_blocks = _FENCED_CODE_BLOCK_PATTERN.findall(message)\n\n    result: list[str] = []\n    for i, part in enumerate(parts):\n        result.append(transform(part))\n        if i < len(code_blocks):\n            result.append(code_blocks[i])\n\n    return \"\".join(result)\n\n\ndef _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:\n    \"\"\"Extract markdown link destination, allowing nested parentheses in the URL.\"\"\"\n    depth = 0\n    i = start_idx\n\n    while i < len(message):\n        curr = message[i]\n        if curr == \"\\\\\":\n            i += 2\n            continue\n\n        if curr == \"(\":\n            depth += 1\n        elif curr == \")\":\n            if depth == 0:\n                return message[start_idx:i], i\n            depth -= 1\n        i += 1\n\n    return message[start_idx:], None\n\n\ndef _normalize_link_destinations(message: str) -> str:\n    \"\"\"Wrap markdown link URLs in angle brackets so the parser handles special chars safely.\n\n    Markdown link syntax [text](url) breaks when the URL contains unescaped\n    parentheses, spaces, or other special characters. Wrapping the URL in angle\n    brackets — [text](<url>) — tells the parser to treat everything inside as\n    a literal URL. This applies to all links, not just citations.\n    \"\"\"\n    if \"](\" not in message:\n        return message\n\n    normalized_parts: list[str] = []\n    cursor = 0\n\n    while match := _MARKDOWN_LINK_PATTERN.search(message, cursor):\n        normalized_parts.append(message[cursor : match.end()])\n        destination_start = match.end()\n        destination, end_idx = _extract_link_destination(message, destination_start)\n        if end_idx is None:\n            normalized_parts.append(message[destination_start:])\n            return \"\".join(normalized_parts)\n\n        already_wrapped = destination.startswith(\"<\") and destination.endswith(\">\")\n        if destination and not already_wrapped:\n            destination = f\"<{destination}>\"\n\n        normalized_parts.append(destination)\n        normalized_parts.append(\")\")\n        cursor = end_idx + 1\n\n    normalized_parts.append(message[cursor:])\n    return \"\".join(normalized_parts)\n\n\ndef _convert_slack_links_to_markdown(message: str) -> str:\n    \"\"\"Convert Slack-style <url|text> links to standard markdown [text](url).\n\n    LLMs sometimes emit Slack mrkdwn link syntax directly. Mistune doesn't\n    recognise it, so the angle brackets would be escaped by text() and Slack\n    would render the link as literal text instead of a clickable link.\n    \"\"\"\n    return _transform_outside_code_blocks(\n        message, lambda text: _SLACK_LINK_PATTERN.sub(r\"[\\2](\\1)\", text)\n    )\n\n\ndef format_slack_message(message: str | None) -> str:\n    if message is None:\n        return \"\"\n    message = _transform_outside_code_blocks(message, _sanitize_html)\n    message = _convert_slack_links_to_markdown(message)\n    normalized_message = _normalize_link_destinations(message)\n    md = create_markdown(renderer=SlackRenderer(), plugins=[\"strikethrough\", \"table\"])\n    result = md(normalized_message)\n    # With HTMLRenderer, result is always str (not AST list)\n    assert isinstance(result, str)\n    return result.rstrip(\"\\n\")\n\n\nclass SlackRenderer(HTMLRenderer):\n    \"\"\"Renders markdown as Slack mrkdwn format instead of HTML.\n\n    Overrides all HTMLRenderer methods that produce HTML tags to ensure\n    no raw HTML ever appears in Slack messages.\n    \"\"\"\n\n    SPECIALS: dict[str, str] = {\"&\": \"&amp;\", \"<\": \"&lt;\", \">\": \"&gt;\"}\n\n    def __init__(self) -> None:\n        super().__init__()\n        self._table_headers: list[str] = []\n        self._current_row_cells: list[str] = []\n\n    def escape_special(self, text: str) -> str:\n        for special, replacement in self.SPECIALS.items():\n            text = text.replace(special, replacement)\n        return text\n\n    def heading(self, text: str, level: int, **attrs: Any) -> str:  # noqa: ARG002\n        return f\"*{text}*\\n\\n\"\n\n    def emphasis(self, text: str) -> str:\n        return f\"_{text}_\"\n\n    def strong(self, text: str) -> str:\n        return f\"*{text}*\"\n\n    def strikethrough(self, text: str) -> str:\n        return f\"~{text}~\"\n\n    def list(self, text: str, ordered: bool, **attrs: Any) -> str:  # noqa: ARG002\n        lines = text.split(\"\\n\")\n        count = 0\n        for i, line in enumerate(lines):\n            if line.startswith(\"li: \"):\n                count += 1\n                prefix = f\"{count}. \" if ordered else \"• \"\n                lines[i] = f\"{prefix}{line[4:]}\"\n        return \"\\n\".join(lines) + \"\\n\"\n\n    def list_item(self, text: str) -> str:\n        return f\"li: {text}\\n\"\n\n    def link(self, text: str, url: str, title: str | None = None) -> str:\n        escaped_url = self.escape_special(url)\n        if text:\n            return f\"<{escaped_url}|{text}>\"\n        if title:\n            return f\"<{escaped_url}|{title}>\"\n        return f\"<{escaped_url}>\"\n\n    def image(self, text: str, url: str, title: str | None = None) -> str:\n        escaped_url = self.escape_special(url)\n        display_text = title or text\n        return f\"<{escaped_url}|{display_text}>\" if display_text else f\"<{escaped_url}>\"\n\n    def codespan(self, text: str) -> str:\n        return f\"`{text}`\"\n\n    def block_code(self, code: str, info: str | None = None) -> str:  # noqa: ARG002\n        return f\"```\\n{code.rstrip(chr(10))}\\n```\\n\\n\"\n\n    def linebreak(self) -> str:\n        return \"\\n\"\n\n    def thematic_break(self) -> str:\n        return \"---\\n\\n\"\n\n    def block_quote(self, text: str) -> str:\n        lines = text.strip().split(\"\\n\")\n        quoted = \"\\n\".join(f\">{line}\" for line in lines)\n        return quoted + \"\\n\\n\"\n\n    def block_html(self, html: str) -> str:\n        return _sanitize_html(html) + \"\\n\\n\"\n\n    def block_error(self, text: str) -> str:\n        return f\"```\\n{text}\\n```\\n\\n\"\n\n    def text(self, text: str) -> str:\n        # Only escape the three entities Slack recognizes: & < >\n        # HTMLRenderer.text() also escapes \" to &quot; which Slack renders\n        # as literal &quot; text since Slack doesn't recognize that entity.\n        return self.escape_special(text)\n\n    # -- Table rendering (converts markdown tables to vertical cards) --\n\n    def table_cell(\n        self,\n        text: str,\n        align: str | None = None,  # noqa: ARG002\n        head: bool = False,  # noqa: ARG002\n    ) -> str:\n        if head:\n            self._table_headers.append(text.strip())\n        else:\n            self._current_row_cells.append(text.strip())\n        return \"\"\n\n    def table_head(self, text: str) -> str:  # noqa: ARG002\n        self._current_row_cells = []\n        return \"\"\n\n    def table_row(self, text: str) -> str:  # noqa: ARG002\n        cells = self._current_row_cells\n        self._current_row_cells = []\n        # First column becomes the bold title, remaining columns are bulleted fields\n        lines: list[str] = []\n        if cells:\n            title = cells[0]\n            if title:\n                # Avoid double-wrapping if cell already contains bold markup\n                if title.startswith(\"*\") and title.endswith(\"*\") and len(title) > 1:\n                    lines.append(title)\n                else:\n                    lines.append(f\"*{title}*\")\n            for i, cell in enumerate(cells[1:], start=1):\n                if i < len(self._table_headers):\n                    lines.append(f\"  • {self._table_headers[i]}: {cell}\")\n                else:\n                    lines.append(f\"  • {cell}\")\n        return \"\\n\".join(lines) + \"\\n\\n\"\n\n    def table_body(self, text: str) -> str:\n        return text\n\n    def table(self, text: str) -> str:\n        self._table_headers = []\n        self._current_row_cells = []\n        return text + \"\\n\"\n\n    def paragraph(self, text: str) -> str:\n        return f\"{text}\\n\\n\"\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/handle_buttons.py",
    "content": "import json\nfrom typing import Any\nfrom typing import cast\n\nfrom slack_sdk import WebClient\nfrom slack_sdk.models.blocks import SectionBlock\nfrom slack_sdk.models.views import View\nfrom slack_sdk.socket_mode.request import SocketModeRequest\nfrom slack_sdk.webhook import WebhookClient\n\nfrom onyx.chat.models import ChatBasicResponse\nfrom onyx.chat.process_message import remove_answer_citations\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import SearchFeedbackType\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_FOLLOWUP_EMOJI\nfrom onyx.connectors.slack.utils import expert_info_from_slack_id\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.chat import get_chat_message\nfrom onyx.db.chat import translate_db_message_to_chat_message_detail\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.feedback import create_chat_message_feedback\nfrom onyx.db.feedback import create_doc_retrieval_feedback\nfrom onyx.db.users import get_user_by_email\nfrom onyx.onyxbot.slack.blocks import build_follow_up_resolved_blocks\nfrom onyx.onyxbot.slack.blocks import build_slack_response_blocks\nfrom onyx.onyxbot.slack.blocks import get_document_feedback_blocks\nfrom onyx.onyxbot.slack.config import get_slack_channel_config_for_bot_and_channel\nfrom onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FeedbackVisibility\nfrom onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID\nfrom onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID\nfrom onyx.onyxbot.slack.constants import VIEW_DOC_FEEDBACK_ID\nfrom onyx.onyxbot.slack.handlers.handle_message import (\n    remove_scheduled_feedback_reminder,\n)\nfrom onyx.onyxbot.slack.handlers.handle_regular_answer import (\n    handle_regular_answer,\n)\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.utils import build_feedback_id\nfrom onyx.onyxbot.slack.utils import decompose_action_id\nfrom onyx.onyxbot.slack.utils import fetch_group_ids_from_names\nfrom onyx.onyxbot.slack.utils import fetch_slack_user_ids_from_emails\nfrom onyx.onyxbot.slack.utils import get_channel_name_from_id\nfrom onyx.onyxbot.slack.utils import get_feedback_visibility\nfrom onyx.onyxbot.slack.utils import read_slack_thread\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\nfrom onyx.onyxbot.slack.utils import TenantSocketModeClient\nfrom onyx.onyxbot.slack.utils import update_emote_react\nfrom onyx.server.query_and_chat.models import ChatMessageDetail\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\ndef _convert_document_ids_to_citation_info(\n    citation_dict: dict[int, str], top_documents: list[SavedSearchDoc]\n) -> list[CitationInfo]:\n    citation_list_with_document_id = []\n    # Build a set of valid document_ids from top_documents for validation\n    valid_document_ids = {doc.document_id for doc in top_documents}\n\n    for citation_num, document_id in citation_dict.items():\n        if document_id is not None and document_id in valid_document_ids:\n            citation_list_with_document_id.append(\n                CitationInfo(\n                    citation_number=citation_num,\n                    document_id=document_id,\n                )\n            )\n    return citation_list_with_document_id\n\n\ndef _build_citation_list(chat_message_detail: ChatMessageDetail) -> list[CitationInfo]:\n    citation_dict = chat_message_detail.citations\n    if citation_dict is None:\n        return []\n    else:\n        top_documents = (\n            chat_message_detail.context_docs if chat_message_detail.context_docs else []\n        )\n        citation_list = _convert_document_ids_to_citation_info(\n            citation_dict, top_documents\n        )\n        return citation_list\n\n\ndef handle_doc_feedback_button(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n) -> None:\n    if not (actions := req.payload.get(\"actions\")):\n        logger.error(\"Missing actions. Unable to build the source feedback view\")\n        return\n\n    # Extracts the feedback_id coming from the 'source feedback' button\n    # and generates a new one for the View, to keep track of the doc info\n    query_event_id, doc_id, doc_rank = decompose_action_id(actions[0].get(\"value\"))\n    external_id = build_feedback_id(query_event_id, doc_id, doc_rank)\n\n    channel_id = req.payload[\"container\"][\"channel_id\"]\n    thread_ts = req.payload[\"container\"].get(\"thread_ts\", None)\n\n    data = View(\n        type=\"modal\",\n        callback_id=VIEW_DOC_FEEDBACK_ID,\n        external_id=external_id,\n        # We use the private metadata to keep track of the channel id and thread ts\n        private_metadata=f\"{channel_id}_{thread_ts}\",\n        title=\"Give Feedback\",\n        blocks=[get_document_feedback_blocks()],\n        submit=\"send\",\n        close=\"cancel\",\n    )\n\n    client.web_client.views_open(\n        trigger_id=req.payload[\"trigger_id\"], view=data.to_dict()\n    )\n\n\ndef handle_generate_answer_button(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n) -> None:\n    channel_id = req.payload[\"channel\"][\"id\"]\n    channel_name = req.payload[\"channel\"][\"name\"]\n    message_ts = req.payload[\"message\"][\"ts\"]\n    thread_ts = req.payload[\"container\"].get(\"thread_ts\", None)\n    user_id = req.payload[\"user\"][\"id\"]\n    expert_info = expert_info_from_slack_id(user_id, client.web_client, user_cache={})\n    email = expert_info.email if expert_info else None\n\n    if not thread_ts:\n        raise ValueError(\"Missing thread_ts in the payload\")\n\n    thread_messages = read_slack_thread(\n        tenant_id=client._tenant_id,\n        channel=channel_id,\n        thread=thread_ts,\n        client=client.web_client,\n    )\n    # remove all assistant messages till we get to the last user message\n    # we want the new answer to be generated off of the last \"question\" in\n    # the thread\n    for i in range(len(thread_messages) - 1, -1, -1):\n        if thread_messages[i].role == MessageType.USER:\n            break\n        if thread_messages[i].role == MessageType.ASSISTANT:\n            thread_messages.pop(i)\n\n    # tell the user that we're working on it\n    # Send an ephemeral message to the user that we're generating the answer\n    respond_in_thread_or_channel(\n        client=client.web_client,\n        channel=channel_id,\n        receiver_ids=[user_id],\n        text=\"I'm working on generating a full answer for you. This may take a moment...\",\n        thread_ts=thread_ts,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        slack_channel_config = get_slack_channel_config_for_bot_and_channel(\n            db_session=db_session,\n            slack_bot_id=client.slack_bot_id,\n            channel_name=channel_name,\n        )\n\n        handle_regular_answer(\n            message_info=SlackMessageInfo(\n                thread_messages=thread_messages,\n                channel_to_respond=channel_id,\n                msg_to_respond=cast(str, message_ts or thread_ts),\n                thread_to_respond=cast(str, thread_ts or message_ts),\n                sender_id=user_id or None,\n                email=email or None,\n                bypass_filters=True,\n                is_slash_command=False,\n                is_bot_dm=False,\n            ),\n            slack_channel_config=slack_channel_config,\n            receiver_ids=None,\n            client=client.web_client,\n            channel=channel_id,\n            logger=logger,\n            feedback_reminder_id=None,\n        )\n\n\ndef handle_publish_ephemeral_message_button(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n    action_id: str,\n) -> None:\n    \"\"\"\n    This function handles the Share with Everyone/Keep for Yourself buttons\n    for ephemeral messages.\n    \"\"\"\n    channel_id = req.payload[\"channel\"][\"id\"]\n    ephemeral_message_ts = req.payload[\"container\"][\"message_ts\"]\n\n    slack_sender_id = req.payload[\"user\"][\"id\"]\n    response_url = req.payload[\"response_url\"]\n    webhook = WebhookClient(url=response_url)\n\n    # The additional data required that was added to buttons.\n    # Specifically, this contains the message_info, channel_conf information\n    # and some additional attributes.\n    value_dict = json.loads(req.payload[\"actions\"][0][\"value\"])\n\n    original_question_ts = value_dict.get(\"original_question_ts\")\n    if not original_question_ts:\n        raise ValueError(\"Missing original_question_ts in the payload\")\n    if not ephemeral_message_ts:\n        raise ValueError(\"Missing ephemeral_message_ts in the payload\")\n\n    feedback_reminder_id = value_dict.get(\"feedback_reminder_id\")\n\n    slack_message_info = SlackMessageInfo(**value_dict[\"message_info\"])\n    channel_conf = value_dict.get(\"channel_conf\")\n\n    user_email = value_dict.get(\"message_info\", {}).get(\"email\")\n\n    chat_message_id = value_dict.get(\"chat_message_id\")\n\n    # Obtain onyx_user and chat_message information\n    if not chat_message_id:\n        raise ValueError(\"Missing chat_message_id in the payload\")\n\n    with get_session_with_current_tenant() as db_session:\n        onyx_user = get_user_by_email(user_email, db_session)\n        if not onyx_user:\n            raise ValueError(\"Cannot determine onyx_user_id from email in payload\")\n        try:\n            chat_message = get_chat_message(chat_message_id, onyx_user.id, db_session)\n        except ValueError:\n            chat_message = get_chat_message(\n                chat_message_id, None, db_session\n            )  # is this good idea?\n        except Exception as e:\n            logger.error(f\"Failed to get chat message: {e}\")\n            raise e\n\n        chat_message_detail = translate_db_message_to_chat_message_detail(chat_message)\n\n        # construct the proper citation format and then the answer in the suitable format\n        # we need to construct the blocks.\n        citation_list = _build_citation_list(chat_message_detail)\n\n        if chat_message_detail.context_docs:\n            top_documents: list[SearchDoc] = [\n                SearchDoc.from_saved_search_doc(doc)\n                for doc in chat_message_detail.context_docs\n            ]\n        else:\n            top_documents = []\n\n        onyx_bot_answer = ChatBasicResponse(\n            answer=chat_message_detail.message,\n            answer_citationless=remove_answer_citations(chat_message_detail.message),\n            top_documents=top_documents,\n            message_id=chat_message_id,\n            error_msg=None,\n            citation_info=citation_list,\n        )\n\n    # Note: we need to use the webhook and the respond_url to update/delete ephemeral messages\n    if action_id == SHOW_EVERYONE_ACTION_ID:\n        # Convert to non-ephemeral message in thread\n        try:\n            webhook.send(\n                response_type=\"ephemeral\",\n                text=\"\",\n                blocks=[],\n                replace_original=True,\n                delete_original=True,\n            )\n        except Exception as e:\n            logger.error(f\"Failed to send webhook: {e}\")\n\n        # remove handling of empheremal block and add AI feedback.\n        all_blocks = build_slack_response_blocks(\n            answer=onyx_bot_answer,\n            message_info=slack_message_info,\n            channel_conf=channel_conf,\n            feedback_reminder_id=feedback_reminder_id,\n            skip_ai_feedback=False,\n            offer_ephemeral_publication=False,\n            skip_restated_question=True,\n        )\n        try:\n            # Post in thread as non-ephemeral message\n            respond_in_thread_or_channel(\n                client=client.web_client,\n                channel=channel_id,\n                receiver_ids=None,  # If respond_member_group_list is set, send to them. TODO: check!\n                text=\"Hello! Onyx has some results for you!\",\n                blocks=all_blocks,\n                thread_ts=original_question_ts,\n                # don't unfurl, since otherwise we will have 5+ previews which makes the message very long\n                unfurl=False,\n                send_as_ephemeral=False,\n            )\n        except Exception as e:\n            logger.error(f\"Failed to publish ephemeral message: {e}\")\n            raise e\n\n    elif action_id == KEEP_TO_YOURSELF_ACTION_ID:\n        # Keep as ephemeral message in channel or thread, but remove the publish button and add feedback button\n\n        changed_blocks = build_slack_response_blocks(\n            answer=onyx_bot_answer,\n            message_info=slack_message_info,\n            channel_conf=channel_conf,\n            feedback_reminder_id=feedback_reminder_id,\n            skip_ai_feedback=False,\n            offer_ephemeral_publication=False,\n            skip_restated_question=True,\n        )\n\n        try:\n            if slack_message_info.thread_to_respond is not None:\n                # There seems to be a bug in slack where an update within the thread\n                # actually leads to the update to be posted in the channel. Therefore,\n                # for now we delete the original ephemeral message and post a new one\n                # if the ephemeral message is in a thread.\n                webhook.send(\n                    response_type=\"ephemeral\",\n                    text=\"\",\n                    blocks=[],\n                    replace_original=True,\n                    delete_original=True,\n                )\n\n                respond_in_thread_or_channel(\n                    client=client.web_client,\n                    channel=channel_id,\n                    receiver_ids=[slack_sender_id],\n                    text=\"Your personal response, sent as an ephemeral message.\",\n                    blocks=changed_blocks,\n                    thread_ts=original_question_ts,\n                    # don't unfurl, since otherwise we will have 5+ previews which makes the message very long\n                    unfurl=False,\n                    send_as_ephemeral=True,\n                )\n            else:\n                # This works fine if the ephemeral message is in the channel\n                webhook.send(\n                    response_type=\"ephemeral\",\n                    text=\"Your personal response, sent as an ephemeral message.\",\n                    blocks=changed_blocks,\n                    replace_original=True,\n                    delete_original=False,\n                )\n        except Exception as e:\n            logger.error(f\"Failed to send webhook: {e}\")\n\n\ndef handle_slack_feedback(\n    feedback_id: str,\n    feedback_type: str,\n    feedback_msg_reminder: str,\n    client: WebClient,\n    user_id_to_post_confirmation: str,\n    channel_id_to_post_confirmation: str,\n    thread_ts_to_post_confirmation: str,\n) -> None:\n    message_id, doc_id, doc_rank = decompose_action_id(feedback_id)\n\n    # Get Onyx user from Slack ID\n    expert_info = expert_info_from_slack_id(\n        user_id_to_post_confirmation, client, user_cache={}\n    )\n    email = expert_info.email if expert_info else None\n\n    with get_session_with_current_tenant() as db_session:\n        onyx_user = get_user_by_email(email, db_session) if email else None\n        if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]:\n            create_chat_message_feedback(\n                is_positive=feedback_type == LIKE_BLOCK_ACTION_ID,\n                feedback_text=\"\",\n                chat_message_id=message_id,\n                user_id=onyx_user.id if onyx_user else None,\n                db_session=db_session,\n            )\n            remove_scheduled_feedback_reminder(\n                client=client,\n                channel=user_id_to_post_confirmation,\n                msg_id=feedback_msg_reminder,\n            )\n        elif feedback_type in [\n            SearchFeedbackType.ENDORSE.value,\n            SearchFeedbackType.REJECT.value,\n            SearchFeedbackType.HIDE.value,\n        ]:\n            if doc_id is None or doc_rank is None:\n                raise ValueError(\"Missing information for Document Feedback\")\n\n            if feedback_type == SearchFeedbackType.ENDORSE.value:\n                feedback = SearchFeedbackType.ENDORSE\n            elif feedback_type == SearchFeedbackType.REJECT.value:\n                feedback = SearchFeedbackType.REJECT\n            else:\n                feedback = SearchFeedbackType.HIDE\n\n            create_doc_retrieval_feedback(\n                message_id=message_id,\n                document_id=doc_id,\n                document_rank=doc_rank,\n                db_session=db_session,\n                clicked=False,  # Not tracking this for Slack\n                feedback=feedback,\n            )\n        else:\n            logger.error(f\"Feedback type '{feedback_type}' not supported\")\n\n    if get_feedback_visibility() == FeedbackVisibility.PRIVATE or feedback_type not in [\n        LIKE_BLOCK_ACTION_ID,\n        DISLIKE_BLOCK_ACTION_ID,\n    ]:\n        client.chat_postEphemeral(\n            channel=channel_id_to_post_confirmation,\n            user=user_id_to_post_confirmation,\n            thread_ts=thread_ts_to_post_confirmation,\n            text=\"Thanks for your feedback!\",\n        )\n    else:\n        feedback_response_txt = (\n            \"liked\" if feedback_type == LIKE_BLOCK_ACTION_ID else \"disliked\"\n        )\n\n        if get_feedback_visibility() == FeedbackVisibility.ANONYMOUS:\n            msg = f\"A user has {feedback_response_txt} the AI Answer\"\n        else:\n            msg = f\"<@{user_id_to_post_confirmation}> has {feedback_response_txt} the AI Answer\"\n\n        respond_in_thread_or_channel(\n            client=client,\n            channel=channel_id_to_post_confirmation,\n            text=msg,\n            thread_ts=thread_ts_to_post_confirmation,\n            unfurl=False,\n        )\n\n\ndef handle_followup_button(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n) -> None:\n    action_id = None\n    if actions := req.payload.get(\"actions\"):\n        action = cast(dict[str, Any], actions[0])\n        action_id = cast(str, action.get(\"block_id\"))\n\n    channel_id = req.payload[\"container\"][\"channel_id\"]\n    thread_ts = req.payload[\"container\"].get(\"thread_ts\", None)\n\n    update_emote_react(\n        emoji=ONYX_BOT_FOLLOWUP_EMOJI,\n        channel=channel_id,\n        message_ts=thread_ts,\n        remove=False,\n        client=client.web_client,\n    )\n\n    tag_ids: list[str] = []\n    group_ids: list[str] = []\n    with get_session_with_current_tenant() as db_session:\n        channel_name, is_dm = get_channel_name_from_id(\n            client=client.web_client, channel_id=channel_id\n        )\n        slack_channel_config = get_slack_channel_config_for_bot_and_channel(\n            db_session=db_session,\n            slack_bot_id=client.slack_bot_id,\n            channel_name=channel_name,\n        )\n        if slack_channel_config:\n            tag_names = slack_channel_config.channel_config.get(\"follow_up_tags\")\n            remaining = None\n            if tag_names:\n                tag_ids, remaining = fetch_slack_user_ids_from_emails(\n                    tag_names, client.web_client\n                )\n            if remaining:\n                group_ids, _ = fetch_group_ids_from_names(remaining, client.web_client)\n\n    blocks = build_follow_up_resolved_blocks(tag_ids=tag_ids, group_ids=group_ids)\n\n    respond_in_thread_or_channel(\n        client=client.web_client,\n        channel=channel_id,\n        text=\"Received your request for more help\",\n        blocks=blocks,\n        thread_ts=thread_ts,\n        unfurl=False,\n    )\n\n    if action_id is not None:\n        message_id, _, _ = decompose_action_id(action_id)\n\n        create_chat_message_feedback(\n            is_positive=None,\n            feedback_text=\"\",\n            chat_message_id=message_id,\n            user_id=None,  # no \"user\" for Slack bot for now\n            db_session=db_session,\n            required_followup=True,\n        )\n\n\ndef get_clicker_name(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n) -> str:\n    clicker_name = req.payload.get(\"user\", {}).get(\"name\", \"Someone\")\n    clicker_real_name = None\n    try:\n        clicker = client.web_client.users_info(user=req.payload[\"user\"][\"id\"])\n        clicker_real_name = (\n            cast(dict, clicker.data).get(\"user\", {}).get(\"profile\", {}).get(\"real_name\")\n        )\n    except Exception:\n        # Likely a scope issue\n        pass\n\n    if clicker_real_name:\n        clicker_name = clicker_real_name\n\n    return clicker_name\n\n\ndef handle_followup_resolved_button(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n    immediate: bool = False,\n) -> None:\n    channel_id = req.payload[\"container\"][\"channel_id\"]\n    message_ts = req.payload[\"container\"][\"message_ts\"]\n    thread_ts = req.payload[\"container\"].get(\"thread_ts\", None)\n\n    clicker_name = get_clicker_name(req, client)\n\n    update_emote_react(\n        emoji=ONYX_BOT_FOLLOWUP_EMOJI,\n        channel=channel_id,\n        message_ts=thread_ts,\n        remove=True,\n        client=client.web_client,\n    )\n\n    # Delete the message with the option to mark resolved\n    if not immediate:\n        response = client.web_client.chat_delete(\n            channel=channel_id,\n            ts=message_ts,\n        )\n\n        if not response.get(\"ok\"):\n            logger.error(\"Unable to delete message for resolved\")\n\n    if immediate:\n        msg_text = f\"{clicker_name} has marked this question as resolved!\"\n    else:\n        msg_text = (\n            f\"{clicker_name} has marked this question as resolved! \"\n            f'\\n\\n You can always click the \"I need more help button\" to let the team '\n            f\"know that your problem still needs attention.\"\n        )\n\n    resolved_block = SectionBlock(text=msg_text)\n\n    respond_in_thread_or_channel(\n        client=client.web_client,\n        channel=channel_id,\n        text=\"Your request for help as been addressed!\",\n        blocks=[resolved_block],\n        thread_ts=thread_ts,\n        unfurl=False,\n    )\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/handle_message.py",
    "content": "import datetime\n\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\n\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_FEEDBACK_REMINDER\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.user_preferences import activate_user\nfrom onyx.db.users import add_slack_user_if_not_exists\nfrom onyx.db.users import get_user_by_email\nfrom onyx.onyxbot.slack.blocks import get_feedback_reminder_blocks\nfrom onyx.onyxbot.slack.handlers.handle_regular_answer import (\n    handle_regular_answer,\n)\nfrom onyx.onyxbot.slack.handlers.handle_standard_answers import (\n    handle_standard_answers,\n)\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.utils import fetch_slack_user_ids_from_emails\nfrom onyx.onyxbot.slack.utils import fetch_user_ids_from_groups\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\nfrom onyx.onyxbot.slack.utils import slack_usage_report\nfrom onyx.onyxbot.slack.utils import update_emote_react\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.configs import SLACK_CHANNEL_ID\n\nlogger_base = setup_logger()\n\n\ndef send_msg_ack_to_user(details: SlackMessageInfo, client: WebClient) -> None:\n    if details.is_slash_command and details.sender_id:\n        respond_in_thread_or_channel(\n            client=client,\n            channel=details.channel_to_respond,\n            thread_ts=details.msg_to_respond,\n            receiver_ids=[details.sender_id],\n            text=\"Hi, we're evaluating your query :face_with_monocle:\",\n        )\n        return\n\n    update_emote_react(\n        emoji=ONYX_BOT_REACT_EMOJI,\n        channel=details.channel_to_respond,\n        message_ts=details.msg_to_respond,\n        remove=False,\n        client=client,\n    )\n\n\ndef schedule_feedback_reminder(\n    details: SlackMessageInfo, include_followup: bool, client: WebClient\n) -> str | None:\n    logger = setup_logger(extra={SLACK_CHANNEL_ID: details.channel_to_respond})\n\n    if not ONYX_BOT_FEEDBACK_REMINDER:\n        logger.info(\"Scheduled feedback reminder disabled...\")\n        return None\n\n    try:\n        permalink = client.chat_getPermalink(\n            channel=details.channel_to_respond,\n            message_ts=details.msg_to_respond,  # type:ignore\n        )\n    except SlackApiError as e:\n        logger.error(f\"Unable to generate the feedback reminder permalink: {e}\")\n        return None\n\n    now = datetime.datetime.now()\n    future = now + datetime.timedelta(minutes=ONYX_BOT_FEEDBACK_REMINDER)\n\n    try:\n        response = client.chat_scheduleMessage(\n            channel=details.sender_id,  # type:ignore\n            post_at=int(future.timestamp()),\n            blocks=[\n                get_feedback_reminder_blocks(\n                    thread_link=permalink.data[\"permalink\"],  # type:ignore\n                    include_followup=include_followup,\n                )\n            ],\n            text=\"\",\n        )\n        logger.info(\"Scheduled feedback reminder configured\")\n        return response.data[\"scheduled_message_id\"]  # type:ignore\n    except SlackApiError as e:\n        logger.error(f\"Unable to generate the feedback reminder message: {e}\")\n        return None\n\n\ndef remove_scheduled_feedback_reminder(\n    client: WebClient, channel: str | None, msg_id: str\n) -> None:\n    logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})\n\n    try:\n        client.chat_deleteScheduledMessage(\n            channel=channel,  # type:ignore\n            scheduled_message_id=msg_id,\n        )\n        logger.info(\"Scheduled feedback reminder deleted\")\n    except SlackApiError as e:\n        if e.response[\"error\"] == \"invalid_scheduled_message_id\":\n            logger.info(\n                \"Unable to delete the scheduled message. It must have already been posted\"\n            )\n\n\ndef handle_message(\n    message_info: SlackMessageInfo,\n    slack_channel_config: SlackChannelConfig,\n    client: WebClient,\n    feedback_reminder_id: str | None,\n) -> bool:\n    \"\"\"Potentially respond to the user message depending on filters and if an answer was generated\n\n    Returns True if need to respond with an additional message to the user(s) after this\n    function is finished. True indicates an unexpected failure that needs to be communicated\n    Query thrown out by filters due to config does not count as a failure that should be notified\n    Onyx failing to answer/retrieve docs does count and should be notified\n    \"\"\"\n    channel = message_info.channel_to_respond\n\n    logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})\n\n    messages = message_info.thread_messages\n    sender_id = message_info.sender_id\n    bypass_filters = message_info.bypass_filters\n    is_slash_command = message_info.is_slash_command\n    is_bot_dm = message_info.is_bot_dm\n\n    action = \"slack_message\"\n    if is_slash_command:\n        action = \"slack_slash_message\"\n    elif bypass_filters:\n        action = \"slack_tag_message\"\n    elif is_bot_dm:\n        action = \"slack_dm_message\"\n    slack_usage_report(action=action, sender_id=sender_id, client=client)\n\n    document_set_names: list[str] | None = None\n    persona = slack_channel_config.persona if slack_channel_config else None\n    if persona:\n        document_set_names = [\n            document_set.name for document_set in persona.document_sets\n        ]\n\n    respond_tag_only = False\n    respond_member_group_list = None\n\n    channel_conf = None\n    if slack_channel_config and slack_channel_config.channel_config:\n        channel_conf = slack_channel_config.channel_config\n        if not bypass_filters and \"answer_filters\" in channel_conf:\n            if (\n                \"questionmark_prefilter\" in channel_conf[\"answer_filters\"]\n                and \"?\" not in messages[-1].message\n            ):\n                logger.info(\n                    \"Skipping message since it does not contain a question mark\"\n                )\n                return False\n\n        logger.info(\n            \"Found slack bot config for channel. Restricting bot to use document \"\n            f\"sets: {document_set_names}, \"\n            f\"validity checks enabled: {channel_conf.get('answer_filters', 'NA')}\"\n        )\n\n        respond_tag_only = channel_conf.get(\"respond_tag_only\") or False\n        respond_member_group_list = channel_conf.get(\"respond_member_group_list\", None)\n\n    # Only default config can be disabled.\n    # If channel config is disabled, bot should not respond to this message (including DMs)\n    if slack_channel_config.channel_config.get(\"disabled\"):\n        logger.info(\"Skipping message: OnyxBot is disabled for this channel\")\n        return False\n\n    # If bot should only respond to tags and is not tagged nor in a DM, skip message\n    if respond_tag_only and not bypass_filters and not is_bot_dm:\n        logger.info(\"Skipping message: OnyxBot only responds to tags in this channel\")\n        return False\n\n    # List of user id to send message to, if None, send to everyone in channel\n    send_to: list[str] | None = None\n    missing_users: list[str] | None = None\n    if respond_member_group_list:\n        send_to, missing_ids = fetch_slack_user_ids_from_emails(\n            respond_member_group_list, client\n        )\n\n        user_ids, missing_users = fetch_user_ids_from_groups(missing_ids, client)\n        send_to = list(set(send_to + user_ids)) if send_to else user_ids\n\n        if missing_users:\n            logger.warning(f\"Failed to find these users/groups: {missing_users}\")\n\n    # If configured to respond to team members only, then cannot be used with a /OnyxBot command\n    # which would just respond to the sender\n    if send_to and is_slash_command:\n        if sender_id:\n            respond_in_thread_or_channel(\n                client=client,\n                channel=channel,\n                receiver_ids=[sender_id],\n                text=\"The OnyxBot slash command is not enabled for this channel\",\n                thread_ts=None,\n            )\n\n    try:\n        send_msg_ack_to_user(message_info, client)\n    except SlackApiError as e:\n        logger.error(f\"Was not able to react to user message due to: {e}\")\n\n    with get_session_with_current_tenant() as db_session:\n        if message_info.email:\n            existing_user = get_user_by_email(message_info.email, db_session)\n            if existing_user is None:\n                # New user — check seat availability before creating\n                check_seat_fn = fetch_ee_implementation_or_noop(\n                    \"onyx.db.license\",\n                    \"check_seat_availability\",\n                    None,\n                )\n                # noop returns None when called; real function returns SeatAvailabilityResult\n                seat_result = check_seat_fn(db_session=db_session)\n                if seat_result is not None and not seat_result.available:\n                    logger.info(\n                        f\"Blocked new Slack user {message_info.email}: {seat_result.error_message}\"\n                    )\n                    respond_in_thread_or_channel(\n                        client=client,\n                        channel=channel,\n                        thread_ts=message_info.msg_to_respond,\n                        text=(\n                            \"We weren't able to respond because your organization \"\n                            \"has reached its user seat limit. Since this is your \"\n                            \"first time interacting with the bot, a new account \"\n                            \"could not be created for you. Please contact your \"\n                            \"Onyx administrator to add more seats.\"\n                        ),\n                    )\n                    return False\n\n            elif (\n                not existing_user.is_active\n                and existing_user.account_type == AccountType.BOT\n            ):\n                check_seat_fn = fetch_ee_implementation_or_noop(\n                    \"onyx.db.license\",\n                    \"check_seat_availability\",\n                    None,\n                )\n                seat_result = check_seat_fn(db_session=db_session)\n                if seat_result is not None and not seat_result.available:\n                    logger.info(\n                        f\"Blocked inactive Slack user {message_info.email}: {seat_result.error_message}\"\n                    )\n                    respond_in_thread_or_channel(\n                        client=client,\n                        channel=channel,\n                        thread_ts=message_info.msg_to_respond,\n                        text=(\n                            \"We weren't able to respond because your organization \"\n                            \"has reached its user seat limit. Your account is \"\n                            \"currently deactivated and cannot be reactivated \"\n                            \"until more seats are available. Please contact \"\n                            \"your Onyx administrator.\"\n                        ),\n                    )\n                    return False\n\n                activate_user(existing_user, db_session)\n                invalidate_license_cache_fn = fetch_ee_implementation_or_noop(\n                    \"onyx.db.license\",\n                    \"invalidate_license_cache\",\n                    None,\n                )\n                invalidate_license_cache_fn()\n                logger.info(f\"Reactivated inactive Slack user {message_info.email}\")\n\n            add_slack_user_if_not_exists(db_session, message_info.email)\n\n        # first check if we need to respond with a standard answer\n        # standard answers should be published in a thread\n        used_standard_answer = handle_standard_answers(\n            message_info=message_info,\n            receiver_ids=send_to,\n            slack_channel_config=slack_channel_config,\n            logger=logger,\n            client=client,\n            db_session=db_session,\n        )\n        if used_standard_answer:\n            return False\n\n        # if no standard answer applies, try a regular answer\n        issue_with_regular_answer = handle_regular_answer(\n            message_info=message_info,\n            slack_channel_config=slack_channel_config,\n            receiver_ids=send_to,\n            client=client,\n            channel=channel,\n            logger=logger,\n            feedback_reminder_id=feedback_reminder_id,\n        )\n        return issue_with_regular_answer\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py",
    "content": "import functools\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import Optional\nfrom typing import TypeVar\n\nfrom retry import retry\nfrom slack_sdk import WebClient\n\nfrom onyx.auth.users import get_anonymous_user\nfrom onyx.chat.models import ChatBasicResponse\nfrom onyx.chat.process_message import gather_stream\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_DISPLAY_ERROR_MSGS\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import Tag\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.db.users import get_user_by_email\nfrom onyx.onyxbot.slack.blocks import build_slack_response_blocks\nfrom onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN\nfrom onyx.onyxbot.slack.handlers.utils import send_team_member_message\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.models import ThreadMessage\nfrom onyx.onyxbot.slack.utils import get_channel_from_id\nfrom onyx.onyxbot.slack.utils import get_channel_name_from_id\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\nfrom onyx.onyxbot.slack.utils import SlackRateLimiter\nfrom onyx.onyxbot.slack.utils import update_emote_react\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import MessageOrigin\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.utils.logger import OnyxLoggingAdapter\n\nsrl = SlackRateLimiter()\n\nRT = TypeVar(\"RT\")  # return type\n\n\ndef resolve_channel_references(\n    message: str,\n    client: WebClient,\n    logger: OnyxLoggingAdapter,\n) -> tuple[str, list[Tag]]:\n    \"\"\"Parse Slack channel references from a message, resolve IDs to names,\n    replace the raw markup with readable #channel-name, and return channel tags\n    for search filtering.\"\"\"\n    tags: list[Tag] = []\n    channel_matches = SLACK_CHANNEL_REF_PATTERN.findall(message)\n    seen_channel_ids: set[str] = set()\n\n    for channel_id, channel_name_from_markup in channel_matches:\n        if channel_id in seen_channel_ids:\n            continue\n        seen_channel_ids.add(channel_id)\n\n        channel_name = channel_name_from_markup or None\n\n        if not channel_name:\n            try:\n                channel_info = get_channel_from_id(client=client, channel_id=channel_id)\n                channel_name = channel_info.get(\"name\") or None\n            except Exception:\n                logger.warning(f\"Failed to resolve channel name for ID: {channel_id}\")\n\n            if not channel_name:\n                continue\n\n        # Replace raw Slack markup with readable channel name\n        if channel_name_from_markup:\n            message = message.replace(\n                f\"<#{channel_id}|{channel_name_from_markup}>\",\n                f\"#{channel_name}\",\n            )\n        else:\n            message = message.replace(\n                f\"<#{channel_id}>\",\n                f\"#{channel_name}\",\n            )\n        tags.append(Tag(tag_key=\"Channel\", tag_value=channel_name))\n\n    return message, tags\n\n\ndef rate_limits(\n    client: WebClient, channel: str, thread_ts: Optional[str]\n) -> Callable[[Callable[..., RT]], Callable[..., RT]]:\n    def decorator(func: Callable[..., RT]) -> Callable[..., RT]:\n        @functools.wraps(func)\n        def wrapper(*args: Any, **kwargs: Any) -> RT:\n            if not srl.is_available():\n                func_randid, position = srl.init_waiter()\n                srl.notify(client, channel, position, thread_ts)\n                while not srl.is_available():\n                    srl.waiter(func_randid)\n            srl.acquire_slot()\n            return func(*args, **kwargs)\n\n        return wrapper\n\n    return decorator\n\n\ndef build_slack_context_str(\n    messages: list[ThreadMessage], channel_name: str | None\n) -> str | None:\n    if not messages:\n        return None\n\n    if channel_name:\n        slack_context_str = f\"The following is a thread in Slack in channel {channel_name}:\\n====================\\n\"\n    else:\n        slack_context_str = (\n            \"The following is a thread from Slack:\\n====================\\n\"\n        )\n\n    message_strs: list[str] = []\n    for message in messages:\n        if message.role == MessageType.USER:\n            message_text = f\"{message.sender or 'Unknown User'}:\\n{message.message}\"\n        elif message.role == MessageType.ASSISTANT:\n            message_text = f\"AI:\\n{message.message}\"\n        else:\n            message_text = f\"{message.role.value.upper()}:\\n{message.message}\"\n        message_strs.append(message_text)\n\n    return slack_context_str + \"\\n\\n\".join(message_strs)\n\n\ndef handle_regular_answer(\n    message_info: SlackMessageInfo,\n    slack_channel_config: SlackChannelConfig,\n    receiver_ids: list[str] | None,\n    client: WebClient,\n    channel: str,\n    logger: OnyxLoggingAdapter,\n    feedback_reminder_id: str | None,\n    num_retries: int = ONYX_BOT_NUM_RETRIES,\n    should_respond_with_error_msgs: bool = ONYX_BOT_DISPLAY_ERROR_MSGS,\n    disable_docs_only_answer: bool = ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER,\n) -> bool:\n    channel_conf = slack_channel_config.channel_config\n\n    messages = message_info.thread_messages\n\n    message_ts_to_respond_to = message_info.msg_to_respond\n    is_slash_command = message_info.is_slash_command\n\n    # Capture whether response mode for channel is ephemeral. Even if the channel is set\n    # to respond with an ephemeral message, we still send as non-ephemeral if\n    # the message is a dm with the Onyx bot.\n    send_as_ephemeral = (\n        slack_channel_config.channel_config.get(\"is_ephemeral\", False)\n        or message_info.is_slash_command\n    ) and not message_info.is_bot_dm\n\n    # If the channel is configured to respond with an ephemeral message,\n    # or the message is a dm to the Onyx bot, we should use the proper onyx user from the email.\n    # This will make documents privately accessible to the user available to Onyx Bot answers.\n    # Otherwise - if not ephemeral or DM to Onyx Bot - we use anonymous user to restrict\n    # to public docs.\n\n    if message_info.email:\n        with get_session_with_current_tenant() as db_session:\n            found_user = get_user_by_email(message_info.email, db_session)\n            user = found_user if found_user else get_anonymous_user()\n    else:\n        user = get_anonymous_user()\n\n    target_thread_ts = (\n        None\n        if send_as_ephemeral and len(message_info.thread_messages) < 2\n        else message_ts_to_respond_to\n    )\n    target_receiver_ids = (\n        [message_info.sender_id]\n        if message_info.sender_id and send_as_ephemeral\n        else receiver_ids\n    )\n\n    document_set_names: list[str] | None = None\n    # If no persona is specified, use the default search based persona\n    # This way slack flow always has a persona\n    persona = slack_channel_config.persona\n    if not persona:\n        logger.warning(\"No persona found for channel config, using default persona\")\n        with get_session_with_current_tenant() as db_session:\n            persona = get_persona_by_id(DEFAULT_PERSONA_ID, user, db_session)\n            document_set_names = [\n                document_set.name for document_set in persona.document_sets\n            ]\n    else:\n        logger.info(f\"Using persona {persona.name} for channel config\")\n        document_set_names = [\n            document_set.name for document_set in persona.document_sets\n        ]\n\n    user_message = messages[-1]\n    history_messages = messages[:-1]\n\n    # Resolve any <#CHANNEL_ID> references in the user message to readable\n    # channel names and extract channel tags for search filtering\n    resolved_message, channel_tags = resolve_channel_references(\n        message=user_message.message,\n        client=client,\n        logger=logger,\n    )\n\n    user_message = ThreadMessage(\n        message=resolved_message,\n        sender=user_message.sender,\n        role=user_message.role,\n    )\n\n    channel_name, _ = get_channel_name_from_id(\n        client=client,\n        channel_id=channel,\n    )\n\n    # NOTE: only the message history will contain the person asking. This is likely\n    # fine since the most common use case for this info is when referring to a user\n    # who previously posted in the thread.\n    slack_context_str = build_slack_context_str(history_messages, channel_name)\n\n    if not message_ts_to_respond_to and not is_slash_command:\n        # if the message is not \"/onyx\" command, then it should have a message ts to respond to\n        raise RuntimeError(\n            \"No message timestamp to respond to in `handle_message`. This should never happen.\"\n        )\n\n    @retry(\n        tries=num_retries,\n        delay=0.25,\n        backoff=2,\n    )\n    @rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to)\n    def _get_slack_answer(\n        new_message_request: SendMessageRequest,\n        slack_context_str: str | None,\n        onyx_user: User,\n    ) -> ChatBasicResponse:\n        with get_session_with_current_tenant() as db_session:\n            packets = handle_stream_message_objects(\n                new_msg_req=new_message_request,\n                user=onyx_user,\n                db_session=db_session,\n                bypass_acl=False,\n                additional_context=slack_context_str,\n                slack_context=message_info.slack_context,\n            )\n            answer = gather_stream(packets)\n\n        if answer.error_msg:\n            raise RuntimeError(answer.error_msg)\n\n        return answer\n\n    try:\n        # By leaving time_cutoff and favor_recent as None, and setting enable_auto_detect_filters\n        # it allows the slack flow to extract out filters from the user query\n        filters = BaseFilters(\n            source_type=None,\n            document_set=document_set_names,\n            time_cutoff=None,\n            tags=channel_tags if channel_tags else None,\n        )\n\n        new_message_request = SendMessageRequest(\n            message=user_message.message,\n            allowed_tool_ids=None,\n            forced_tool_id=None,\n            file_descriptors=[],\n            internal_search_filters=filters,\n            deep_research=False,\n            origin=MessageOrigin.SLACKBOT,\n            chat_session_info=ChatSessionCreationRequest(\n                persona_id=persona.id,\n            ),\n        )\n\n        # if it's a DM or ephemeral message, answer based on private documents.\n        # otherwise, answer based on public documents ONLY as to not leak information.\n        can_search_over_private_docs = message_info.is_bot_dm or send_as_ephemeral\n        answer = _get_slack_answer(\n            new_message_request=new_message_request,\n            onyx_user=user if can_search_over_private_docs else get_anonymous_user(),\n            slack_context_str=slack_context_str,\n        )\n\n        # If a channel filter was applied but no results were found, override\n        # the LLM response to avoid hallucinated answers about unindexed channels\n        if channel_tags and not answer.citation_info and not answer.top_documents:\n            channel_names = \", \".join(f\"#{tag.tag_value}\" for tag in channel_tags)\n            answer.answer = (\n                f\"No indexed data found for {channel_names}. \"\n                \"This channel may not be indexed, or there may be no messages \"\n                \"matching your query within it.\"\n            )\n\n    except Exception as e:\n        logger.exception(\n            f\"Unable to process message - did not successfully answer in {num_retries} attempts\"\n        )\n        # Optionally, respond in thread with the error message, Used primarily\n        # for debugging purposes\n        if should_respond_with_error_msgs:\n            respond_in_thread_or_channel(\n                client=client,\n                channel=channel,\n                receiver_ids=target_receiver_ids,\n                text=f\"Encountered exception when trying to answer: \\n\\n```{e}```\",\n                thread_ts=target_thread_ts,\n                send_as_ephemeral=send_as_ephemeral,\n            )\n\n        # In case of failures, don't keep the reaction there permanently\n        update_emote_react(\n            emoji=ONYX_BOT_REACT_EMOJI,\n            channel=message_info.channel_to_respond,\n            message_ts=message_info.msg_to_respond,\n            remove=True,\n            client=client,\n        )\n\n        return True\n\n    # Got an answer at this point, can remove reaction and give results\n    if not is_slash_command:  # Slash commands don't have reactions\n        update_emote_react(\n            emoji=ONYX_BOT_REACT_EMOJI,\n            channel=message_info.channel_to_respond,\n            message_ts=message_info.msg_to_respond,\n            remove=True,\n            client=client,\n        )\n\n    if not answer.answer and disable_docs_only_answer:\n        logger.notice(\n            \"Unable to find answer - not responding since the `ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER` env variable is set\"\n        )\n        return True\n\n    only_respond_if_citations = (\n        channel_conf\n        and \"well_answered_postfilter\" in channel_conf.get(\"answer_filters\", [])\n    )\n\n    if (\n        only_respond_if_citations\n        and not answer.citation_info\n        and not message_info.bypass_filters\n        and not channel_tags\n    ):\n        logger.error(\n            f\"Unable to find citations to answer: '{answer.answer}' - not answering!\"\n        )\n        # Optionally, respond in thread with the error message\n        # Used primarily for debugging purposes\n        if should_respond_with_error_msgs:\n            respond_in_thread_or_channel(\n                client=client,\n                channel=channel,\n                receiver_ids=target_receiver_ids,\n                text=\"Found no citations or quotes when trying to answer.\",\n                thread_ts=target_thread_ts,\n                send_as_ephemeral=send_as_ephemeral,\n            )\n        return True\n\n    if (\n        send_as_ephemeral\n        and target_receiver_ids is not None\n        and len(target_receiver_ids) == 1\n    ):\n        offer_ephemeral_publication = True\n        skip_ai_feedback = True\n    else:\n        offer_ephemeral_publication = False\n        skip_ai_feedback = False\n\n    all_blocks = build_slack_response_blocks(\n        message_info=message_info,\n        answer=answer,\n        channel_conf=channel_conf,\n        feedback_reminder_id=feedback_reminder_id,\n        offer_ephemeral_publication=offer_ephemeral_publication,\n        skip_ai_feedback=skip_ai_feedback,\n    )\n\n    # NOTE(rkuo): Slack has a maximum block list size of 50.\n    # we should modify build_slack_response_blocks to respect the max\n    # but enforcing the hard limit here is the last resort.\n    all_blocks = all_blocks[:50]\n\n    try:\n        respond_in_thread_or_channel(\n            client=client,\n            channel=channel,\n            receiver_ids=target_receiver_ids,\n            text=\"Hello! Onyx has some results for you!\",\n            blocks=all_blocks,\n            thread_ts=target_thread_ts,\n            # don't unfurl, since otherwise we will have 5+ previews which makes the message very long\n            unfurl=False,\n            send_as_ephemeral=send_as_ephemeral,\n        )\n\n        # For DM (ephemeral message), we need to create a thread via a normal message so the user can see\n        # the ephemeral message. This also will give the user a notification which ephemeral message does not.\n        # if there is no message_ts_to_respond_to, and we have made it this far, then this is a /onyx message\n        # so we shouldn't send_team_member_message\n        if (\n            target_receiver_ids\n            and message_ts_to_respond_to is not None\n            and not send_as_ephemeral\n            and target_thread_ts is not None\n        ):\n            send_team_member_message(\n                client=client,\n                channel=channel,\n                thread_ts=target_thread_ts,\n                receiver_ids=target_receiver_ids,\n                send_as_ephemeral=send_as_ephemeral,\n            )\n\n        return False\n\n    except Exception:\n        logger.exception(\n            f\"Unable to process message - could not respond in slack in {num_retries} attempts\"\n        )\n        return True\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/handle_standard_answers.py",
    "content": "from slack_sdk import WebClient\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.utils.logger import OnyxLoggingAdapter\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n\ndef handle_standard_answers(\n    message_info: SlackMessageInfo,\n    receiver_ids: list[str] | None,\n    slack_channel_config: SlackChannelConfig,\n    logger: OnyxLoggingAdapter,\n    client: WebClient,\n    db_session: Session,\n) -> bool:\n    \"\"\"Returns whether one or more Standard Answer message blocks were\n    emitted by the Slack bot\"\"\"\n    versioned_handle_standard_answers = fetch_versioned_implementation(\n        \"onyx.onyxbot.slack.handlers.handle_standard_answers\",\n        \"_handle_standard_answers\",\n    )\n    return versioned_handle_standard_answers(\n        message_info=message_info,\n        receiver_ids=receiver_ids,\n        slack_channel_config=slack_channel_config,\n        logger=logger,\n        client=client,\n        db_session=db_session,\n    )\n\n\ndef _handle_standard_answers(\n    message_info: SlackMessageInfo,  # noqa: ARG001\n    receiver_ids: list[str] | None,  # noqa: ARG001\n    slack_channel_config: SlackChannelConfig,  # noqa: ARG001\n    logger: OnyxLoggingAdapter,  # noqa: ARG001\n    client: WebClient,  # noqa: ARG001\n    db_session: Session,  # noqa: ARG001\n) -> bool:\n    \"\"\"\n    Standard Answers are a paid Enterprise Edition feature. This is the fallback\n    function handling the case where EE features are not enabled.\n\n    Always returns false i.e. since EE features are not enabled, we NEVER create any\n    Slack message blocks.\n    \"\"\"\n    return False\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/handlers/utils.py",
    "content": "from slack_sdk import WebClient\n\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\n\n\ndef send_team_member_message(\n    client: WebClient,\n    channel: str,\n    thread_ts: str,\n    receiver_ids: list[str] | None = None,  # noqa: ARG001\n    send_as_ephemeral: bool = False,\n) -> None:\n    respond_in_thread_or_channel(\n        client=client,\n        channel=channel,\n        text=(\n            \"👋 Hi, we've just gathered and forwarded the relevant \"\n            + \"information to the team. They'll get back to you shortly!\"\n        ),\n        thread_ts=thread_ts,\n        receiver_ids=None,\n        send_as_ephemeral=send_as_ephemeral,\n    )\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/icons.py",
    "content": "from onyx.configs.constants import DocumentSource\n\n\ndef source_to_github_img_link(source: DocumentSource) -> str | None:\n    # TODO: store these images somewhere better\n    if source == DocumentSource.WEB.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Web.png\"\n    if source == DocumentSource.FILE.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png\"\n    if source == DocumentSource.GOOGLE_SITES.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/GoogleSites.png\"\n    if source == DocumentSource.SLACK.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Slack.png\"\n    if source == DocumentSource.GMAIL.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gmail.png\"\n    if source == DocumentSource.GOOGLE_DRIVE.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/GoogleDrive.png\"\n    if source == DocumentSource.GITHUB.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Github.png\"\n    if source == DocumentSource.GITLAB.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gitlab.png\"\n    if source == DocumentSource.CONFLUENCE.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Confluence.png\"\n    if source == DocumentSource.JIRA.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Jira.png\"\n    if source == DocumentSource.NOTION.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Notion.png\"\n    if source == DocumentSource.ZENDESK.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Zendesk.png\"\n    if source == DocumentSource.GONG.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Gong.png\"\n    if source == DocumentSource.LINEAR.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Linear.png\"\n    if source == DocumentSource.PRODUCTBOARD.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Productboard.webp\"\n    if source == DocumentSource.SLAB.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/SlabLogo.png\"\n    if source == DocumentSource.ZULIP.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Zulip.png\"\n    if source == DocumentSource.GURU.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/Guru.png\"\n    if source == DocumentSource.HUBSPOT.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/HubSpot.png\"\n    if source == DocumentSource.DOCUMENT360.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Document360.png\"\n    if source == DocumentSource.BOOKSTACK.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Bookstack.png\"\n    if source == DocumentSource.OUTLINE.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Outline.png\"\n    if source == DocumentSource.LOOPIO.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Loopio.png\"\n    if source == DocumentSource.SHAREPOINT.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/web/public/Sharepoint.png\"\n    if source == DocumentSource.REQUESTTRACKER.value:\n        # just use file icon for now\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png\"\n    if source == DocumentSource.INGESTION_API.value:\n        return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png\"\n\n    return \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/backend/slackbot_images/File.png\"\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/listener.py",
    "content": "import os\nimport signal\nimport sys\nimport threading\nimport time\nfrom collections.abc import Callable\nfrom contextvars import Token\nfrom threading import Event\nfrom types import FrameType\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\n\nimport psycopg2.errors\nfrom prometheus_client import Gauge\nfrom prometheus_client import start_http_server\nfrom redis.lock import Lock\nfrom redis.lock import Lock as RedisLock\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\nfrom slack_sdk.http_retry import ConnectionErrorRetryHandler\nfrom slack_sdk.http_retry import RateLimitErrorRetryHandler\nfrom slack_sdk.http_retry import RetryHandler\nfrom slack_sdk.socket_mode.request import SocketModeRequest\nfrom slack_sdk.socket_mode.response import SocketModeResponse\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import POD_NAME\nfrom onyx.configs.app_configs import POD_NAMESPACE\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER\nfrom onyx.connectors.slack.utils import expert_info_from_slack_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.db.models import SlackBot\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.slack_bot import fetch_slack_bot\nfrom onyx.db.slack_bot import fetch_slack_bots\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder\nfrom onyx.onyxbot.slack.config import get_slack_channel_config_for_bot_and_channel\nfrom onyx.onyxbot.slack.config import MAX_TENANTS_PER_POD\nfrom onyx.onyxbot.slack.config import TENANT_ACQUISITION_INTERVAL\nfrom onyx.onyxbot.slack.config import TENANT_HEARTBEAT_EXPIRATION\nfrom onyx.onyxbot.slack.config import TENANT_HEARTBEAT_INTERVAL\nfrom onyx.onyxbot.slack.config import TENANT_LOCK_EXPIRATION\nfrom onyx.onyxbot.slack.constants import DISLIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID\nfrom onyx.onyxbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID\nfrom onyx.onyxbot.slack.constants import KEEP_TO_YOURSELF_ACTION_ID\nfrom onyx.onyxbot.slack.constants import LIKE_BLOCK_ACTION_ID\nfrom onyx.onyxbot.slack.constants import SHOW_EVERYONE_ACTION_ID\nfrom onyx.onyxbot.slack.constants import VIEW_DOC_FEEDBACK_ID\nfrom onyx.onyxbot.slack.handlers.handle_buttons import handle_doc_feedback_button\nfrom onyx.onyxbot.slack.handlers.handle_buttons import handle_followup_button\nfrom onyx.onyxbot.slack.handlers.handle_buttons import (\n    handle_followup_resolved_button,\n)\nfrom onyx.onyxbot.slack.handlers.handle_buttons import (\n    handle_generate_answer_button,\n)\nfrom onyx.onyxbot.slack.handlers.handle_buttons import (\n    handle_publish_ephemeral_message_button,\n)\nfrom onyx.onyxbot.slack.handlers.handle_buttons import handle_slack_feedback\nfrom onyx.onyxbot.slack.handlers.handle_message import handle_message\nfrom onyx.onyxbot.slack.handlers.handle_message import (\n    remove_scheduled_feedback_reminder,\n)\nfrom onyx.onyxbot.slack.handlers.handle_message import schedule_feedback_reminder\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.onyxbot.slack.models import SlackMessageInfo\nfrom onyx.onyxbot.slack.models import ThreadMessage\nfrom onyx.onyxbot.slack.utils import check_message_limit\nfrom onyx.onyxbot.slack.utils import decompose_action_id\nfrom onyx.onyxbot.slack.utils import get_channel_name_from_id\nfrom onyx.onyxbot.slack.utils import get_channel_type_from_id\nfrom onyx.onyxbot.slack.utils import get_onyx_bot_auth_ids\nfrom onyx.onyxbot.slack.utils import read_slack_thread\nfrom onyx.onyxbot.slack.utils import remove_onyx_bot_tag\nfrom onyx.onyxbot.slack.utils import respond_in_thread_or_channel\nfrom onyx.onyxbot.slack.utils import TenantSocketModeClient\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.manage.models import SlackBotTokens\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom onyx.utils.variable_functionality import set_is_ee_based_on_env_variable\nfrom shared_configs.configs import DISALLOWED_SLACK_BOT_TENANT_LIST\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import SLACK_CHANNEL_ID\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n# Prometheus metric for HPA\nactive_tenants_gauge = Gauge(\n    \"active_tenants\",\n    \"Number of active tenants handled by this pod\",\n    [\"namespace\", \"pod\"],\n)\n\n# In rare cases, some users have been experiencing a massive amount of trivial messages coming through\n# to the Slack Bot with trivial messages. Adding this to avoid exploding LLM costs while we track down\n# the cause.\n_SLACK_GREETINGS_TO_IGNORE = {\n    \"Welcome back!\",\n    \"It's going to be a great day.\",\n    \"Salutations!\",\n    \"Greetings!\",\n    \"Feeling great!\",\n    \"Hi there\",\n    \":wave:\",\n}\n\n# This is always (currently) the user id of Slack's official slackbot\n_OFFICIAL_SLACKBOT_USER_ID = \"USLACKBOT\"\n\n# Fields to exclude from Slack payload logging\n# Intention is to not log slack message content\n_EXCLUDED_SLACK_PAYLOAD_FIELDS = {\"text\", \"blocks\"}\n\n\nclass SlackbotHandler:\n    def __init__(self) -> None:\n        logger.info(\"Initializing SlackbotHandler\")\n        self.tenant_ids: set[str] = set()\n        # The keys for these dictionaries are tuples of (tenant_id, slack_bot_id)\n        self.socket_clients: Dict[tuple[str, int], TenantSocketModeClient] = {}\n        self.slack_bot_tokens: Dict[tuple[str, int], SlackBotTokens] = {}\n\n        # Store Redis lock objects here so we can release them properly\n        self.redis_locks: Dict[str, Lock] = {}\n\n        self.running = True\n        self.pod_id = os.environ.get(\"HOSTNAME\", \"unknown_pod\")\n        self._shutdown_event = Event()\n\n        self._lock = threading.Lock()\n\n        logger.info(f\"Pod ID: {self.pod_id}\")\n\n        # Set up signal handlers for graceful shutdown\n        signal.signal(signal.SIGTERM, self.shutdown)\n        signal.signal(signal.SIGINT, self.shutdown)\n        logger.info(\"Signal handlers registered\")\n\n        # Start the Prometheus metrics server\n        logger.info(\"Starting Prometheus metrics server\")\n        start_http_server(8000)\n        logger.info(\"Prometheus metrics server started\")\n\n        # Start background threads\n        logger.info(\"Starting background threads\")\n        self.acquire_thread = threading.Thread(\n            target=self.acquire_tenants_loop, daemon=True\n        )\n        self.heartbeat_thread = threading.Thread(\n            target=self.heartbeat_loop, daemon=True\n        )\n\n        self.acquire_thread.start()\n        self.heartbeat_thread.start()\n\n        logger.info(\"Background threads started\")\n\n    def acquire_tenants_loop(self) -> None:\n        while not self._shutdown_event.is_set():\n            try:\n                self.acquire_tenants()\n\n                # After we finish acquiring and managing Slack bots,\n                # set the gauge to the number of active tenants (those with Slack bots).\n                active_tenants_gauge.labels(namespace=POD_NAMESPACE, pod=POD_NAME).set(\n                    len(self.tenant_ids)\n                )\n                logger.debug(\n                    f\"Current active tenants with Slack bots: {len(self.tenant_ids)}\"\n                )\n            except Exception as e:\n                logger.exception(f\"Error in Slack acquisition: {e}\")\n            self._shutdown_event.wait(timeout=TENANT_ACQUISITION_INTERVAL)\n\n    def heartbeat_loop(self) -> None:\n        \"\"\"This heartbeats into redis.\n\n        NOTE(rkuo): this is not thread-safe with acquire_tenants_loop and will\n        occasionally exception. Fix it!\n        \"\"\"\n        while not self._shutdown_event.is_set():\n            try:\n                with self._lock:\n                    tenant_ids = self.tenant_ids.copy()\n\n                SlackbotHandler.send_heartbeats(self.pod_id, tenant_ids)\n                logger.debug(f\"Sent heartbeats for {len(tenant_ids)} active tenants\")\n            except Exception as e:\n                logger.exception(f\"Error in heartbeat loop: {e}\")\n            self._shutdown_event.wait(timeout=TENANT_HEARTBEAT_INTERVAL)\n\n    def _manage_clients_per_tenant(\n        self, db_session: Session, tenant_id: str, bot: SlackBot\n    ) -> None:\n        \"\"\"\n        - If the tokens are missing or empty, close the socket client and remove them.\n        - If the tokens have changed, close the existing socket client and reconnect.\n        - If the tokens are new, warm up the model and start a new socket client.\n        \"\"\"\n        tenant_bot_pair = (tenant_id, bot.id)\n\n        # If the tokens are missing or empty, close the socket client and remove them.\n        if not bot.bot_token or not bot.app_token:\n            logger.debug(\n                f\"No Slack bot tokens found for tenant={tenant_id}, bot {bot.id}\"\n            )\n            if tenant_bot_pair in self.socket_clients:\n                self.socket_clients[tenant_bot_pair].close()\n                del self.socket_clients[tenant_bot_pair]\n                del self.slack_bot_tokens[tenant_bot_pair]\n            return\n\n        slack_bot_tokens = SlackBotTokens(\n            bot_token=bot.bot_token.get_value(apply_mask=False),\n            app_token=bot.app_token.get_value(apply_mask=False),\n        )\n\n        tokens_exist = tenant_bot_pair in self.slack_bot_tokens\n        tokens_changed = (\n            tokens_exist and slack_bot_tokens != self.slack_bot_tokens[tenant_bot_pair]\n        )\n        if not tokens_exist or tokens_changed:\n            if tokens_exist:\n                logger.info(\n                    f\"Slack Bot tokens changed for tenant={tenant_id}, bot {bot.id}; reconnecting\"\n                )\n            else:\n                # Warm up the model if needed\n                search_settings = get_current_search_settings(db_session)\n                embedding_model = EmbeddingModel.from_db_model(\n                    search_settings=search_settings,\n                    server_host=MODEL_SERVER_HOST,\n                    server_port=MODEL_SERVER_PORT,\n                )\n                warm_up_bi_encoder(embedding_model=embedding_model)\n\n            self.slack_bot_tokens[tenant_bot_pair] = slack_bot_tokens\n\n            # Close any existing connection first\n            if tenant_bot_pair in self.socket_clients:\n                self.socket_clients[tenant_bot_pair].close()\n\n            socket_client = self.start_socket_client(\n                bot.id, tenant_id, slack_bot_tokens\n            )\n            if socket_client:\n                # Ensure tenant is tracked as active\n                self.socket_clients[tenant_id, bot.id] = socket_client\n\n                logger.info(\n                    f\"Started SocketModeClient: {tenant_id=} {socket_client.bot_name=} {bot.id=}\"\n                )\n\n            self.tenant_ids.add(tenant_id)\n\n    def acquire_tenants(self) -> None:\n        \"\"\"\n        - Attempt to acquire a Redis lock for each tenant.\n        - If acquired, check if that tenant actually has Slack bots.\n        - If yes, store them in self.tenant_ids and manage the socket connections.\n        - If a tenant in self.tenant_ids no longer has Slack bots, remove it (and release the lock in this scope).\n        \"\"\"\n\n        token: Token[str | None]\n\n        # tenants that are disabled (e.g. their trial is over and haven't subscribed)\n        # for non-cloud, this will return an empty set\n        gated_tenants = fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.product_gating\",\n            \"get_gated_tenants\",\n            set(),\n        )()\n        all_active_tenants = [\n            tenant_id\n            for tenant_id in get_all_tenant_ids()\n            if tenant_id not in gated_tenants\n        ]\n\n        # 1) Try to acquire locks for new tenants\n        for tenant_id in all_active_tenants:\n            if (\n                DISALLOWED_SLACK_BOT_TENANT_LIST is not None\n                and tenant_id in DISALLOWED_SLACK_BOT_TENANT_LIST\n            ):\n                logger.debug(f\"Tenant {tenant_id} is disallowed; skipping.\")\n                continue\n\n            # Already acquired in a previous loop iteration?\n            if tenant_id in self.tenant_ids:\n                continue\n\n            # Respect max tenant limit per pod\n            if len(self.tenant_ids) >= MAX_TENANTS_PER_POD:\n                logger.info(\n                    f\"Max tenants per pod reached, not acquiring more: {MAX_TENANTS_PER_POD=}\"\n                )\n                break\n\n            redis_client = get_redis_client(tenant_id=tenant_id)\n            # Acquire a Redis lock (non-blocking)\n            # thread_local=False because the shutdown event is handled\n            # on an arbitrary thread\n            rlock: RedisLock = redis_client.lock(\n                OnyxRedisLocks.SLACK_BOT_LOCK,\n                timeout=TENANT_LOCK_EXPIRATION,\n                thread_local=False,\n            )\n            lock_acquired = rlock.acquire(blocking=False)\n\n            if not lock_acquired and not DEV_MODE:\n                logger.debug(\n                    f\"Another pod holds the lock for tenant {tenant_id}, skipping.\"\n                )\n                continue\n\n            if lock_acquired:\n                logger.debug(f\"Acquired lock for tenant {tenant_id}.\")\n                self.redis_locks[tenant_id] = rlock\n            else:\n                # DEV_MODE will skip the lock acquisition guard\n                logger.debug(\n                    f\"Running in DEV_MODE. Not enforcing lock for {tenant_id}.\"\n                )\n\n            # Now check if this tenant actually has Slack bots\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(\n                tenant_id or POSTGRES_DEFAULT_SCHEMA\n            )\n            try:\n                with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n                    bots: list[SlackBot] = []\n                    try:\n                        bots = list(fetch_slack_bots(db_session=db_session))\n                    except KvKeyNotFoundError:\n                        # No Slackbot tokens, pass\n                        pass\n                    except psycopg2.errors.UndefinedTable:\n                        logger.error(\n                            \"Undefined table error in fetch_slack_bots. Tenant schema may need fixing.\"\n                        )\n                    except Exception as e:\n                        logger.exception(\n                            f\"Error fetching Slack bots for tenant {tenant_id}: {e}\"\n                        )\n\n                    if bots:\n                        # Mark as active tenant\n                        self.tenant_ids.add(tenant_id)\n                        for bot in bots:\n                            self._manage_clients_per_tenant(\n                                db_session=db_session,\n                                tenant_id=tenant_id,\n                                bot=bot,\n                            )\n                    else:\n                        # If no Slack bots, release lock immediately (unless in DEV_MODE)\n                        if lock_acquired and not DEV_MODE:\n                            rlock.release()\n                            del self.redis_locks[tenant_id]\n                        logger.debug(\n                            f\"No Slack bots for tenant {tenant_id}; lock released (if held).\"\n                        )\n            finally:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n        # 2) Make sure tenants we're handling still have Slack bots\n        #    and haven't been suspended (gated)\n        for tenant_id in list(self.tenant_ids):\n            if tenant_id in gated_tenants:\n                logger.info(\n                    f\"Tenant {tenant_id} is now gated (suspended). Disconnecting.\"\n                )\n                self._remove_tenant(tenant_id)\n                if tenant_id in self.redis_locks and not DEV_MODE:\n                    try:\n                        self.redis_locks[tenant_id].release()\n                        del self.redis_locks[tenant_id]\n                    except Exception as e:\n                        logger.error(\n                            f\"Error releasing lock for gated tenant {tenant_id}: {e}\"\n                        )\n                continue\n\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(\n                tenant_id or POSTGRES_DEFAULT_SCHEMA\n            )\n            redis_client = get_redis_client(tenant_id=tenant_id)\n\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    # Attempt to fetch Slack bots\n                    try:\n                        bots = list(fetch_slack_bots(db_session=db_session))\n                    except KvKeyNotFoundError:\n                        # No Slackbot tokens, pass (and remove below)\n                        bots = []\n                    except Exception as e:\n                        logger.exception(f\"Error handling tenant {tenant_id}: {e}\")\n                        bots = []\n\n                    if not bots:\n                        logger.info(\n                            f\"Tenant {tenant_id} no longer has Slack bots. Removing.\"\n                        )\n                        self._remove_tenant(tenant_id)\n\n                        # NOTE: We release the lock here (in the same scope it was acquired)\n                        if tenant_id in self.redis_locks and not DEV_MODE:\n                            try:\n                                self.redis_locks[tenant_id].release()\n                                del self.redis_locks[tenant_id]\n                                logger.info(f\"Released lock for tenant {tenant_id}\")\n                            except Exception as e:\n                                logger.error(\n                                    f\"Error releasing lock for tenant {tenant_id}: {e}\"\n                                )\n                    else:\n                        # Manage or reconnect Slack bot sockets\n                        for bot in bots:\n                            self._manage_clients_per_tenant(\n                                db_session=db_session,\n                                tenant_id=tenant_id,\n                                bot=bot,\n                            )\n            finally:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n    def _remove_tenant(self, tenant_id: str) -> None:\n        \"\"\"\n        Helper to remove a tenant from `self.tenant_ids` and close any socket clients.\n        (Lock release now happens in `acquire_tenants()`, not here.)\n        \"\"\"\n        socket_client_list = list(self.socket_clients.items())\n        # Close all socket clients for this tenant\n        for (t_id, slack_bot_id), client in socket_client_list:\n            if t_id == tenant_id:\n                client.close()\n                del self.socket_clients[(t_id, slack_bot_id)]\n                del self.slack_bot_tokens[(t_id, slack_bot_id)]\n                logger.info(\n                    f\"Stopped SocketModeClient for tenant: {t_id}, app: {slack_bot_id}\"\n                )\n\n        # Remove from active set\n        if tenant_id in self.tenant_ids:\n            self.tenant_ids.remove(tenant_id)\n\n    @staticmethod\n    def send_heartbeats(pod_id: str, tenant_ids: set[str]) -> None:\n        current_time = int(time.time())\n        logger.debug(f\"Sending heartbeats for {len(tenant_ids)} active tenants\")\n        for tenant_id in tenant_ids:\n            redis_client = get_redis_client(tenant_id=tenant_id)\n            heartbeat_key = f\"{OnyxRedisLocks.SLACK_BOT_HEARTBEAT_PREFIX}:{pod_id}\"\n            redis_client.set(\n                heartbeat_key, current_time, ex=TENANT_HEARTBEAT_EXPIRATION\n            )\n\n    @staticmethod\n    def start_socket_client(\n        slack_bot_id: int, tenant_id: str, slack_bot_tokens: SlackBotTokens\n    ) -> TenantSocketModeClient | None:\n        \"\"\"Returns the socket client if this succeeds\"\"\"\n        socket_client: TenantSocketModeClient = _get_socket_client(\n            slack_bot_tokens, tenant_id, slack_bot_id\n        )\n\n        try:\n            bot_info = socket_client.web_client.auth_test()\n\n            if bot_info[\"ok\"]:\n                bot_user_id = bot_info[\"user_id\"]\n                user_info = socket_client.web_client.users_info(user=bot_user_id)\n                if user_info[\"ok\"]:\n                    bot_name = (\n                        user_info[\"user\"][\"real_name\"] or user_info[\"user\"][\"name\"]\n                    )\n                    socket_client.bot_name = bot_name\n                    # logger.info(\n                    #     f\"Started socket client for Slackbot with name '{bot_name}' (tenant: {tenant_id}, app: {slack_bot_id})\"\n                    # )\n        except SlackApiError as e:\n            # Only error out if we get a not_authed error\n            if \"not_authed\" in str(e):\n                # for some reason we want to add the tenant to the list when this happens?\n                logger.error(\n                    f\"Authentication error - Invalid or expired credentials: {tenant_id=} {slack_bot_id=}. Error: {e}\"\n                )\n                return None\n\n            # Log other Slack API errors but continue\n            logger.error(\n                f\"Slack API error fetching bot info: {e} for tenant: {tenant_id}, app: {slack_bot_id}\"\n            )\n        except Exception as e:\n            # Log other exceptions but continue\n            logger.error(\n                f\"Error fetching bot info: {e} for tenant: {tenant_id}, app: {slack_bot_id}\"\n            )\n\n        # Append the event handler\n        process_slack_event = create_process_slack_event()\n        socket_client.socket_mode_request_listeners.append(process_slack_event)  # type: ignore\n\n        # Establish a WebSocket connection to the Socket Mode servers\n        # logger.debug(\n        #     f\"Connecting socket client for tenant: {tenant_id}, app: {slack_bot_id}\"\n        # )\n        socket_client.connect()\n        # logger.info(\n        #     f\"Started SocketModeClient for tenant: {tenant_id}, app: {slack_bot_id}\"\n        # )\n\n        return socket_client\n\n    @staticmethod\n    def stop_socket_clients(\n        pod_id: str, socket_clients: Dict[tuple[str, int], TenantSocketModeClient]\n    ) -> None:\n        socket_client_list = list(socket_clients.items())\n        length = len(socket_client_list)\n\n        x = 0\n        for (tenant_id, slack_bot_id), client in socket_client_list:\n            x += 1\n            client.close()\n            logger.info(\n                f\"Stopped SocketModeClient {x}/{length}: {pod_id=} {tenant_id=} {slack_bot_id=}\"\n            )\n\n    def shutdown(\n        self,\n        signum: int | None,  # noqa: ARG002\n        frame: FrameType | None,  # noqa: ARG002\n    ) -> None:\n        if not self.running:\n            return\n\n        logger.info(\"Shutting down gracefully\")\n        self.running = False\n        self._shutdown_event.set()  # set the shutdown event\n\n        # wait for threads to detect the event and exit\n        self.acquire_thread.join(timeout=60.0)\n        self.heartbeat_thread.join(timeout=60.0)\n\n        # Stop all socket clients\n        logger.info(f\"Stopping {len(self.socket_clients)} socket clients\")\n        SlackbotHandler.stop_socket_clients(self.pod_id, self.socket_clients)\n\n        # Release locks for all tenants we currently hold\n        logger.info(f\"Releasing locks for {len(self.tenant_ids)} tenants\")\n        for tenant_id in list(self.tenant_ids):\n            if tenant_id in self.redis_locks:\n                try:\n                    self.redis_locks[tenant_id].release()\n                    logger.info(f\"Released lock for tenant {tenant_id}\")\n                except Exception as e:\n                    logger.error(f\"Error releasing lock for tenant {tenant_id}: {e}\")\n                finally:\n                    del self.redis_locks[tenant_id]\n\n        # Wait for background threads to finish (with a timeout)\n        logger.info(\"Waiting for background threads to finish...\")\n        self.acquire_thread.join(timeout=5)\n        self.heartbeat_thread.join(timeout=5)\n\n        logger.info(\"Shutdown complete\")\n        sys.exit(0)\n\n\ndef sanitize_slack_payload(payload: dict) -> dict:\n    \"\"\"Remove message content from Slack payload for logging\"\"\"\n    sanitized = {\n        k: v for k, v in payload.items() if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS\n    }\n    if \"event\" in sanitized and isinstance(sanitized[\"event\"], dict):\n        sanitized[\"event\"] = {\n            k: v\n            for k, v in sanitized[\"event\"].items()\n            if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS\n        }\n    return sanitized\n\n\ndef prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -> bool:\n    \"\"\"True to keep going, False to ignore this Slack request\"\"\"\n\n    # skip cases where the bot is disabled in the web UI\n    tenant_id = get_current_tenant_id()\n\n    bot_token_user_id, bot_token_bot_id = get_onyx_bot_auth_ids(\n        tenant_id, client.web_client\n    )\n    logger.info(f\"prefilter_requests: {bot_token_user_id=} {bot_token_bot_id=}\")\n\n    with get_session_with_current_tenant() as db_session:\n        slack_bot = fetch_slack_bot(\n            db_session=db_session, slack_bot_id=client.slack_bot_id\n        )\n        if not slack_bot:\n            logger.error(\n                f\"Slack bot with ID '{client.slack_bot_id}' not found. Skipping request.\"\n            )\n            return False\n\n        if not slack_bot.enabled:\n            logger.info(\n                f\"Slack bot with ID '{client.slack_bot_id}' is disabled. Skipping request.\"\n            )\n            return False\n\n    if req.type == \"events_api\":\n        # Verify channel is valid\n        event = cast(dict[str, Any], req.payload.get(\"event\", {}))\n        msg = cast(str | None, event.get(\"text\"))\n        channel = cast(str | None, event.get(\"channel\"))\n        channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})\n\n        # This should never happen, but we can't continue without a channel since\n        # we can't send a response without it\n        if not channel:\n            channel_specific_logger.warning(\"Found message without channel - skipping\")\n            return False\n\n        if not msg:\n            channel_specific_logger.warning(\n                \"Cannot respond to empty message - skipping\"\n            )\n            return False\n\n        if (\n            req.payload.setdefault(\"event\", {}).get(\"user\", \"\")\n            == _OFFICIAL_SLACKBOT_USER_ID\n        ):\n            channel_specific_logger.info(\n                \"Ignoring messages from Slack's official Slackbot\"\n            )\n            return False\n\n        if (\n            msg in _SLACK_GREETINGS_TO_IGNORE\n            or remove_onyx_bot_tag(tenant_id, msg, client=client.web_client)\n            in _SLACK_GREETINGS_TO_IGNORE\n        ):\n            channel_specific_logger.error(\n                f\"Ignoring weird Slack greeting message: '{msg}'\"\n            )\n            channel_specific_logger.error(\n                f\"Weird Slack greeting message payload: '{req.payload}'\"\n            )\n            return False\n\n        # Ensure that the message is a new message of expected type\n        event_type = event.get(\"type\")\n        event.get(\"channel_type\")\n\n        if event_type not in [\"app_mention\", \"message\"]:\n            return False\n\n        bot_token_user_id, bot_token_bot_id = get_onyx_bot_auth_ids(\n            tenant_id, client.web_client\n        )\n        if event_type == \"message\":\n            is_onyx_bot_msg = False\n            is_tagged = False\n\n            event_user = event.get(\"user\", \"\")\n            event_bot_id = event.get(\"bot_id\", \"\")\n\n            is_dm = event.get(\"channel_type\") == \"im\"\n            if bot_token_user_id and f\"<@{bot_token_user_id}>\" in msg:\n                is_tagged = True\n\n            if bot_token_user_id and bot_token_user_id in event_user:\n                is_onyx_bot_msg = True\n\n            if bot_token_bot_id and bot_token_bot_id in event_bot_id:\n                is_onyx_bot_msg = True\n\n            # OnyxBot should never respond to itself\n            if is_onyx_bot_msg:\n                logger.info(\"Ignoring message from OnyxBot (self-message)\")\n                return False\n\n            # DMs with the bot don't pick up the @OnyxBot so we have to keep the\n            # caught events_api\n            if is_tagged and not is_dm:\n                # Let the tag flow handle this case, don't reply twice\n                return False\n\n        # Check if this is a bot message (either via bot_profile or bot_message subtype)\n        is_bot_message = bool(\n            event.get(\"bot_profile\") or event.get(\"subtype\") == \"bot_message\"\n        )\n        if is_bot_message:\n            channel_name, _ = get_channel_name_from_id(\n                client=client.web_client, channel_id=channel\n            )\n            with get_session_with_current_tenant() as db_session:\n                slack_channel_config = get_slack_channel_config_for_bot_and_channel(\n                    db_session=db_session,\n                    slack_bot_id=client.slack_bot_id,\n                    channel_name=channel_name,\n                )\n\n            # If OnyxBot is not specifically tagged and the channel is not set to respond to bots, ignore the message\n            if (not bot_token_user_id or bot_token_user_id not in msg) and (\n                not slack_channel_config\n                or not slack_channel_config.channel_config.get(\"respond_to_bots\")\n            ):\n                channel_specific_logger.info(\n                    \"Ignoring message from bot since respond_to_bots is disabled\"\n                )\n                return False\n\n        # Ignore things like channel_join, channel_leave, etc.\n        # NOTE: \"file_share\" is just a message with a file attachment, so we\n        # should not ignore it\n        message_subtype = event.get(\"subtype\")\n        if message_subtype not in [None, \"file_share\", \"bot_message\"]:\n            channel_specific_logger.info(\n                f\"Ignoring message with subtype '{message_subtype}' since it is a special message type\"\n            )\n            return False\n\n        message_ts = event.get(\"ts\")\n        thread_ts = event.get(\"thread_ts\")\n        # Pick the root of the thread (if a thread exists)\n        # Can respond in thread if it's an \"im\" directly to Onyx or @OnyxBot is tagged\n        if (\n            thread_ts\n            and message_ts != thread_ts\n            and event_type != \"app_mention\"\n            and event.get(\"channel_type\") != \"im\"\n        ):\n            channel_specific_logger.debug(\n                \"Skipping message since it is not the root of a thread\"\n            )\n            return False\n\n        msg = cast(str, event.get(\"text\", \"\"))\n        if not msg:\n            channel_specific_logger.error(\"Unable to process empty message\")\n            return False\n\n    if req.type == \"slash_commands\":\n        # Verify that there's an associated channel\n        channel = req.payload.get(\"channel_id\")\n        channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel})\n\n        if not channel:\n            channel_specific_logger.error(\n                \"Received OnyxBot command without channel - skipping\"\n            )\n            return False\n\n        sender = req.payload.get(\"user_id\")\n        if not sender:\n            channel_specific_logger.error(\n                \"Cannot respond to OnyxBot command without sender to respond to.\"\n            )\n            return False\n\n    if not check_message_limit():\n        return False\n\n    # Don't log Slack message content\n    logger.debug(\n        f\"Handling Slack request: {client.bot_name=} '{sanitize_slack_payload(req.payload)=}'\"\n    )\n    return True\n\n\ndef process_feedback(req: SocketModeRequest, client: TenantSocketModeClient) -> None:\n    if actions := req.payload.get(\"actions\"):\n        action = cast(dict[str, Any], actions[0])\n        feedback_type = cast(str, action.get(\"action_id\"))\n        feedback_msg_reminder = cast(str, action.get(\"value\"))\n        feedback_id = cast(str, action.get(\"block_id\"))\n        channel_id = cast(str, req.payload[\"container\"][\"channel_id\"])\n        thread_ts = cast(\n            str,\n            req.payload[\"container\"].get(\"thread_ts\")\n            or req.payload[\"container\"].get(\"message_ts\"),\n        )\n    else:\n        logger.error(\"Unable to process feedback. Action not found\")\n        return\n\n    user_id = cast(str, req.payload[\"user\"][\"id\"])\n\n    handle_slack_feedback(\n        feedback_id=feedback_id,\n        feedback_type=feedback_type,\n        feedback_msg_reminder=feedback_msg_reminder,\n        client=client.web_client,\n        user_id_to_post_confirmation=user_id,\n        channel_id_to_post_confirmation=channel_id,\n        thread_ts_to_post_confirmation=thread_ts,\n    )\n\n    query_event_id, _, _ = decompose_action_id(feedback_id)\n    logger.info(f\"Successfully handled QA feedback for event: {query_event_id}\")\n\n\ndef build_request_details(\n    req: SocketModeRequest, client: TenantSocketModeClient\n) -> SlackMessageInfo:\n    tagged: bool = False\n\n    tenant_id = get_current_tenant_id()\n    if req.type == \"events_api\":\n        event = cast(dict[str, Any], req.payload[\"event\"])\n        msg = cast(str, event[\"text\"])\n        channel = cast(str, event[\"channel\"])\n\n        # Check for both app_mention events and messages containing bot tag\n        bot_token_user_id, _ = get_onyx_bot_auth_ids(tenant_id, client.web_client)\n        message_ts = event.get(\"ts\")\n        thread_ts = event.get(\"thread_ts\")\n        sender_id = event.get(\"user\") or None\n        expert_info = expert_info_from_slack_id(\n            sender_id, client.web_client, user_cache={}\n        )\n        email = expert_info.email if expert_info else None\n\n        msg = remove_onyx_bot_tag(tenant_id, msg, client=client.web_client)\n\n        logger.info(f\"Received Slack message: {msg}\")\n\n        event_type = event.get(\"type\")\n        if event_type == \"app_mention\":\n            tagged = True\n\n        if event_type == \"message\":\n            if bot_token_user_id:\n                if f\"<@{bot_token_user_id}>\" in msg:\n                    tagged = True\n\n        if tagged:\n            logger.debug(\"User tagged OnyxBot\")\n\n        # Build Slack context for federated search\n        # Get proper channel type from Slack API instead of relying on event.channel_type\n        channel_type = get_channel_type_from_id(client.web_client, channel)\n\n        slack_context = SlackContext(\n            channel_type=channel_type,\n            channel_id=channel,\n            user_id=sender_id or \"unknown\",\n            message_ts=message_ts,\n        )\n        logger.info(\n            f\"build_request_details: Capturing Slack context: \"\n            f\"channel_type={channel_type} channel_id={channel} message_ts={message_ts}\"\n        )\n\n        if thread_ts != message_ts and thread_ts is not None:\n            thread_messages: list[ThreadMessage] = read_slack_thread(\n                tenant_id=tenant_id,\n                channel=channel,\n                thread=thread_ts,\n                client=client.web_client,\n            )\n        else:\n            sender_display_name = None\n            if expert_info:\n                sender_display_name = expert_info.display_name\n                if sender_display_name is None:\n                    sender_display_name = (\n                        f\"{expert_info.first_name} {expert_info.last_name}\"\n                        if expert_info.last_name\n                        else expert_info.first_name\n                    )\n                if sender_display_name is None:\n                    sender_display_name = expert_info.email\n            thread_messages = [\n                ThreadMessage(\n                    message=msg, sender=sender_display_name, role=MessageType.USER\n                )\n            ]\n\n        return SlackMessageInfo(\n            thread_messages=thread_messages,\n            channel_to_respond=channel,\n            msg_to_respond=cast(str, message_ts or thread_ts),\n            thread_to_respond=cast(str, thread_ts or message_ts),\n            sender_id=sender_id,\n            email=email,\n            bypass_filters=tagged,\n            is_slash_command=False,\n            is_bot_dm=event.get(\"channel_type\") == \"im\",\n            slack_context=slack_context,  # Add Slack context for federated search\n        )\n\n    elif req.type == \"slash_commands\":\n        channel = req.payload[\"channel_id\"]\n        channel_name = req.payload[\"channel_name\"]\n        msg = req.payload[\"text\"]\n        sender = req.payload[\"user_id\"]\n        expert_info = expert_info_from_slack_id(\n            sender, client.web_client, user_cache={}\n        )\n        email = expert_info.email if expert_info else None\n\n        # Get proper channel type for slash commands too\n        channel_type = get_channel_type_from_id(client.web_client, channel)\n\n        slack_context = SlackContext(\n            channel_type=channel_type,\n            channel_id=channel,\n            user_id=sender,\n            message_ts=None,  # Slash commands don't have a message timestamp\n        )\n        logger.info(\n            f\"build_request_details: Capturing Slack context for slash command: channel_type={channel_type} channel_id={channel}\"\n        )\n\n        single_msg = ThreadMessage(message=msg, sender=None, role=MessageType.USER)\n\n        return SlackMessageInfo(\n            thread_messages=[single_msg],\n            channel_to_respond=channel,\n            msg_to_respond=None,\n            thread_to_respond=None,\n            sender_id=sender,\n            email=email,\n            bypass_filters=True,\n            is_slash_command=True,\n            is_bot_dm=channel_name == \"directmessage\",\n            slack_context=slack_context,  # Add Slack context for federated search\n        )\n\n    raise RuntimeError(\"Programming fault, this should never happen.\")\n\n\ndef apologize_for_fail(\n    details: SlackMessageInfo,\n    client: TenantSocketModeClient,\n) -> None:\n    respond_in_thread_or_channel(\n        client=client.web_client,\n        channel=details.channel_to_respond,\n        thread_ts=details.msg_to_respond,\n        text=\"Sorry, we weren't able to find anything relevant :cold_sweat:\",\n    )\n\n\ndef process_message(\n    req: SocketModeRequest,\n    client: TenantSocketModeClient,\n    notify_no_answer: bool = NOTIFY_SLACKBOT_NO_ANSWER,\n) -> None:\n    tenant_id = get_current_tenant_id()\n    if req.type == \"events_api\":\n        event = cast(dict[str, Any], req.payload[\"event\"])\n        event_type = event.get(\"type\")\n        logger.info(\n            f\"process_message start: {tenant_id=} {req.type=} {req.envelope_id=} {event_type=}\"\n        )\n    else:\n        logger.info(\n            f\"process_message start: {tenant_id=} {req.type=} {req.envelope_id=}\"\n        )\n\n    # Throw out requests that can't or shouldn't be handled\n    if not prefilter_requests(req, client):\n        logger.info(\n            f\"process_message prefiltered: {tenant_id=} {req.type=} {req.envelope_id=}\"\n        )\n        return\n\n    details = build_request_details(req, client)\n    channel = details.channel_to_respond\n    channel_name, is_dm = get_channel_name_from_id(\n        client=client.web_client, channel_id=channel\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        slack_channel_config = get_slack_channel_config_for_bot_and_channel(\n            db_session=db_session,\n            slack_bot_id=client.slack_bot_id,\n            channel_name=channel_name,\n        )\n\n        follow_up = bool(\n            slack_channel_config.channel_config\n            and slack_channel_config.channel_config.get(\"follow_up_tags\") is not None\n        )\n\n        feedback_reminder_id = schedule_feedback_reminder(\n            details=details, client=client.web_client, include_followup=follow_up\n        )\n\n        failed = handle_message(\n            message_info=details,\n            slack_channel_config=slack_channel_config,\n            client=client.web_client,\n            feedback_reminder_id=feedback_reminder_id,\n        )\n\n        if failed:\n            if feedback_reminder_id:\n                remove_scheduled_feedback_reminder(\n                    client=client.web_client,\n                    channel=details.sender_id,\n                    msg_id=feedback_reminder_id,\n                )\n            # Skipping answering due to pre-filtering is not considered a failure\n            if notify_no_answer:\n                apologize_for_fail(details, client)\n\n    logger.info(\n        f\"process_message finished: success={not failed} {tenant_id=} {req.type=} {req.envelope_id=}\"\n    )\n\n\ndef acknowledge_message(req: SocketModeRequest, client: TenantSocketModeClient) -> None:\n    response = SocketModeResponse(envelope_id=req.envelope_id)\n    client.send_socket_mode_response(response)\n\n\ndef action_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None:\n    if actions := req.payload.get(\"actions\"):\n        action = cast(dict[str, Any], actions[0])\n\n        if action[\"action_id\"] in [DISLIKE_BLOCK_ACTION_ID, LIKE_BLOCK_ACTION_ID]:\n            # AI Answer feedback\n            return process_feedback(req, client)\n        elif action[\"action_id\"] in [\n            SHOW_EVERYONE_ACTION_ID,\n            KEEP_TO_YOURSELF_ACTION_ID,\n        ]:\n            # Publish ephemeral message or keep hidden in main channel\n            return handle_publish_ephemeral_message_button(\n                req, client, action[\"action_id\"]\n            )\n        elif action[\"action_id\"] == FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID:\n            # Activation of the \"source feedback\" button\n            return handle_doc_feedback_button(req, client)\n        elif action[\"action_id\"] == FOLLOWUP_BUTTON_ACTION_ID:\n            return handle_followup_button(req, client)\n        elif action[\"action_id\"] == IMMEDIATE_RESOLVED_BUTTON_ACTION_ID:\n            return handle_followup_resolved_button(req, client, immediate=True)\n        elif action[\"action_id\"] == FOLLOWUP_BUTTON_RESOLVED_ACTION_ID:\n            return handle_followup_resolved_button(req, client, immediate=False)\n        elif action[\"action_id\"] == GENERATE_ANSWER_BUTTON_ACTION_ID:\n            return handle_generate_answer_button(req, client)\n\n\ndef view_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None:\n    if view := req.payload.get(\"view\"):\n        if view[\"callback_id\"] == VIEW_DOC_FEEDBACK_ID:\n            return process_feedback(req, client)\n\n\ndef _extract_channel_from_request(req: SocketModeRequest) -> str | None:\n    \"\"\"Best-effort channel extraction from any Slack request type.\"\"\"\n    if req.type == \"events_api\":\n        return cast(dict[str, Any], req.payload.get(\"event\", {})).get(\"channel\")\n    elif req.type == \"slash_commands\":\n        return req.payload.get(\"channel_id\")\n    elif req.type == \"interactive\":\n        container = req.payload.get(\"container\", {})\n        return container.get(\"channel_id\") or req.payload.get(\"channel\", {}).get(\"id\")\n    return None\n\n\ndef _check_tenant_gated(client: TenantSocketModeClient, req: SocketModeRequest) -> bool:\n    \"\"\"Check if the current tenant is gated (suspended or license expired).\n\n    Multi-tenant: checks the gated tenants Redis set (populated by control plane).\n    Self-hosted: checks the cached license metadata for expiry.\n\n    Returns True if blocked.\n    \"\"\"\n    from onyx.server.settings.models import ApplicationStatus\n\n    # Multi-tenant path: control plane marks gated tenants in Redis\n    is_gated: bool = fetch_ee_implementation_or_noop(\n        \"onyx.server.tenants.product_gating\",\n        \"is_tenant_gated\",\n        False,\n    )(get_current_tenant_id())\n\n    # Self-hosted path: check license metadata cache\n    if not is_gated:\n        get_cached_metadata = fetch_ee_implementation_or_noop(\n            \"onyx.db.license\",\n            \"get_cached_license_metadata\",\n            None,\n        )\n        metadata = get_cached_metadata()\n        if metadata is not None:\n            if metadata.status == ApplicationStatus.GATED_ACCESS:\n                is_gated = True\n\n    if not is_gated:\n        return False\n\n    # Only notify once per user action:\n    # - Skip bot messages (avoids feedback loop from our own response)\n    # - Skip app_mention events (Slack fires both app_mention AND message\n    #   for @mentions; we respond on the message event only)\n    event = req.payload.get(\"event\", {}) if req.type == \"events_api\" else {}\n    is_bot_event = bool(\n        event.get(\"bot_id\")\n        or event.get(\"bot_profile\")\n        or event.get(\"subtype\") == \"bot_message\"\n    )\n    is_duplicate_mention = event.get(\"type\") == \"app_mention\"\n    if not is_bot_event and not is_duplicate_mention:\n        channel = _extract_channel_from_request(req)\n        thread_ts = event.get(\"thread_ts\") or event.get(\"ts\")\n        if channel:\n            respond_in_thread_or_channel(\n                client=client.web_client,\n                channel=channel,\n                thread_ts=thread_ts,\n                text=(\n                    \"Your organization's subscription has expired. Please contact your Onyx administrator to restore access.\"\n                ),\n            )\n    logger.info(f\"Blocked Slack request for gated tenant {get_current_tenant_id()}\")\n    return True\n\n\ndef create_process_slack_event() -> (\n    Callable[[TenantSocketModeClient, SocketModeRequest], None]\n):\n    def process_slack_event(\n        client: TenantSocketModeClient, req: SocketModeRequest\n    ) -> None:\n        # Always respond right away, if Slack doesn't receive these frequently enough\n        # it will assume the Bot is DEAD!!! :(\n        acknowledge_message(req, client)\n\n        if _check_tenant_gated(client, req):\n            return\n\n        try:\n            if req.type == \"interactive\":\n                if req.payload.get(\"type\") == \"block_actions\":\n                    return action_routing(req, client)\n                elif req.payload.get(\"type\") == \"view_submission\":\n                    return view_routing(req, client)\n            elif req.type == \"events_api\" or req.type == \"slash_commands\":\n                return process_message(req, client)\n        except Exception:\n            logger.exception(\"Failed to process slack event\")\n\n    return process_slack_event\n\n\ndef _get_socket_client(\n    slack_bot_tokens: SlackBotTokens, tenant_id: str, slack_bot_id: int\n) -> TenantSocketModeClient:\n    # For more info on how to set this up, checkout the docs:\n    # https://docs.onyx.app/admins/getting_started/slack_bot_setup\n\n    # use the retry handlers built into the slack sdk\n    connection_error_retry_handler = ConnectionErrorRetryHandler()\n    rate_limit_error_retry_handler = RateLimitErrorRetryHandler(max_retry_count=7)\n    slack_retry_handlers: list[RetryHandler] = [\n        connection_error_retry_handler,\n        rate_limit_error_retry_handler,\n    ]\n\n    return TenantSocketModeClient(\n        # This app-level token will be used only for establishing a connection\n        app_token=slack_bot_tokens.app_token,\n        web_client=WebClient(\n            token=slack_bot_tokens.bot_token, retry_handlers=slack_retry_handlers\n        ),\n        tenant_id=tenant_id,\n        slack_bot_id=slack_bot_id,\n    )\n\n\nif __name__ == \"__main__\":\n    # Initialize the SqlEngine\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    # Initialize the tenant handler which will manage tenant connections\n    logger.info(\"Starting SlackbotHandler\")\n    tenant_handler = SlackbotHandler()\n\n    set_is_ee_based_on_env_variable()\n\n    try:\n        # Keep the main thread alive\n        while tenant_handler.running:\n            time.sleep(1)\n\n    except Exception:\n        logger.exception(\"Fatal error in main thread\")\n        tenant_handler.shutdown(None, None)\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/models.py",
    "content": "from enum import Enum\nfrom typing import Literal\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import MessageType\n\n\nclass ChannelType(str, Enum):\n    \"\"\"Slack channel types.\"\"\"\n\n    IM = \"im\"  # Direct message\n    MPIM = \"mpim\"  # Multi-person direct message\n    PRIVATE_CHANNEL = \"private_channel\"  # Private channel\n    PUBLIC_CHANNEL = \"public_channel\"  # Public channel\n    UNKNOWN = \"unknown\"  # Unknown channel type\n\n\nclass SlackContext(BaseModel):\n    \"\"\"Context information for Slack bot interactions.\"\"\"\n\n    channel_type: ChannelType\n    channel_id: str\n    user_id: str\n    message_ts: str | None = None  # Used as request ID for log correlation\n\n\nclass ThreadMessage(BaseModel):\n    message: str\n    sender: str | None = None\n    role: MessageType = MessageType.USER\n\n\nclass SlackMessageInfo(BaseModel):\n    thread_messages: list[ThreadMessage]\n    channel_to_respond: str\n    msg_to_respond: str | None\n    thread_to_respond: str | None\n    sender_id: str | None\n    email: str | None\n    bypass_filters: bool  # User has tagged @OnyxBot\n    is_slash_command: bool  # User is using /OnyxBot\n    is_bot_dm: bool  # User is direct messaging to OnyxBot\n    slack_context: SlackContext | None = None\n\n\n# Models used to encode the relevant data for the ephemeral message actions\nclass ActionValuesEphemeralMessageMessageInfo(BaseModel):\n    bypass_filters: bool | None\n    channel_to_respond: str | None\n    msg_to_respond: str | None\n    email: str | None\n    sender_id: str | None\n    thread_messages: list[ThreadMessage] | None\n    is_slash_command: bool | None\n    is_bot_dm: bool | None\n    thread_to_respond: str | None\n\n\nclass ActionValuesEphemeralMessageChannelConfig(BaseModel):\n    channel_name: str | None\n    respond_tag_only: bool | None\n    respond_to_bots: bool | None\n    is_ephemeral: bool\n    respond_member_group_list: list[str] | None\n    answer_filters: (\n        list[Literal[\"well_answered_postfilter\", \"questionmark_prefilter\"]] | None\n    )\n    follow_up_tags: list[str] | None\n    show_continue_in_web_ui: bool\n\n\nclass ActionValuesEphemeralMessage(BaseModel):\n    original_question_ts: str | None\n    feedback_reminder_id: str | None\n    chat_message_id: int\n    message_info: ActionValuesEphemeralMessageMessageInfo\n    channel_conf: ActionValuesEphemeralMessageChannelConfig\n"
  },
  {
    "path": "backend/onyx/onyxbot/slack/utils.py",
    "content": "import logging\nimport random\nimport re\nimport string\nimport threading\nimport time\nimport uuid\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom typing import cast\n\nfrom retry import retry\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\nfrom slack_sdk.models.blocks import Block\nfrom slack_sdk.models.blocks import SectionBlock\nfrom slack_sdk.models.metadata import Metadata\nfrom slack_sdk.socket_mode import SocketModeClient\n\nfrom onyx.configs.app_configs import DISABLE_TELEMETRY\nfrom onyx.configs.constants import ID_SEPARATOR\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_FEEDBACK_VISIBILITY\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_MAX_QPM\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_MAX_WAIT_TIME\nfrom onyx.configs.onyxbot_configs import ONYX_BOT_NUM_RETRIES\nfrom onyx.configs.onyxbot_configs import (\n    ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD,\n)\nfrom onyx.configs.onyxbot_configs import (\n    ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS,\n)\nfrom onyx.connectors.slack.utils import SlackTextCleaner\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.users import get_user_by_email\nfrom onyx.onyxbot.slack.constants import FeedbackVisibility\nfrom onyx.onyxbot.slack.models import ChannelType\nfrom onyx.onyxbot.slack.models import ThreadMessage\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\nfrom onyx.utils.text_processing import replace_whitespaces_w_space\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\nslack_token_user_ids: dict[str, str | None] = {}\nslack_token_bot_ids: dict[str, str | None] = {}\nslack_token_lock = threading.Lock()\n\n_ONYX_BOT_MESSAGE_COUNT: int = 0\n_ONYX_BOT_COUNT_START_TIME: float = time.time()\n\n\ndef get_onyx_bot_auth_ids(\n    tenant_id: str, web_client: WebClient\n) -> tuple[str | None, str | None]:\n    \"\"\"Returns a tuple of user_id and bot_id.\"\"\"\n\n    user_id: str | None\n    bot_id: str | None\n\n    global slack_token_user_ids\n    global slack_token_bot_ids\n\n    with slack_token_lock:\n        user_id = slack_token_user_ids.get(tenant_id)\n        bot_id = slack_token_bot_ids.get(tenant_id)\n\n    if user_id is None or bot_id is None:\n        response = web_client.auth_test()\n        user_id = response.get(\"user_id\")\n        bot_id = response.get(\"bot_id\")\n        with slack_token_lock:\n            slack_token_user_ids[tenant_id] = user_id\n            slack_token_bot_ids[tenant_id] = bot_id\n\n    return user_id, bot_id\n\n\ndef get_channel_type_from_id(web_client: WebClient, channel_id: str) -> ChannelType:\n    \"\"\"\n    Get the channel type from a channel ID using Slack API.\n    Returns: ChannelType enum value\n    \"\"\"\n    try:\n        channel_info = web_client.conversations_info(channel=channel_id)\n        if channel_info.get(\"ok\") and channel_info.get(\"channel\"):\n            channel: dict[str, Any] = channel_info.get(\"channel\", {})\n\n            if channel.get(\"is_im\"):\n                return ChannelType.IM  # Direct message\n            elif channel.get(\"is_mpim\"):\n                return ChannelType.MPIM  # Multi-person direct message\n            elif channel.get(\"is_private\"):\n                return ChannelType.PRIVATE_CHANNEL  # Private channel\n            elif channel.get(\"is_channel\"):\n                return ChannelType.PUBLIC_CHANNEL  # Public channel\n            else:\n                logger.warning(\n                    f\"Could not determine channel type for {channel_id}, defaulting to unknown\"\n                )\n                return ChannelType.UNKNOWN\n        else:\n            logger.warning(f\"Invalid channel info response for {channel_id}\")\n            return ChannelType.UNKNOWN\n    except Exception as e:\n        logger.warning(\n            f\"Error getting channel info for {channel_id}, defaulting to unknown: {e}\"\n        )\n        return ChannelType.UNKNOWN\n\n\ndef check_message_limit() -> bool:\n    \"\"\"\n    This isnt a perfect solution.\n    High traffic at the end of one period and start of another could cause\n    the limit to be exceeded.\n    \"\"\"\n    if ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD <= 0:\n        return True\n    global _ONYX_BOT_MESSAGE_COUNT\n    global _ONYX_BOT_COUNT_START_TIME\n    time_since_start = time.time() - _ONYX_BOT_COUNT_START_TIME\n    if time_since_start > ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS:\n        _ONYX_BOT_MESSAGE_COUNT = 0\n        _ONYX_BOT_COUNT_START_TIME = time.time()\n    if (_ONYX_BOT_MESSAGE_COUNT + 1) > ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD:\n        logger.error(\n            f\"OnyxBot has reached the message limit {ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD}\"\n            f\" for the time period {ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS} seconds.\"\n            \" These limits are configurable in backend/onyx/configs/onyxbot_configs.py\"\n        )\n        return False\n    _ONYX_BOT_MESSAGE_COUNT += 1\n    return True\n\n\ndef update_emote_react(\n    emoji: str,\n    channel: str,\n    message_ts: str | None,\n    remove: bool,\n    client: WebClient,\n) -> None:\n    if not message_ts:\n        action = \"remove\" if remove else \"add\"\n        logger.error(f\"update_emote_react - no message specified: {channel=} {action=}\")\n        return\n\n    if remove:\n        try:\n            client.reactions_remove(\n                name=emoji,\n                channel=channel,\n                timestamp=message_ts,\n            )\n        except SlackApiError as e:\n            logger.error(f\"Failed to remove Reaction due to: {e}\")\n\n        return\n\n    try:\n        client.reactions_add(\n            name=emoji,\n            channel=channel,\n            timestamp=message_ts,\n        )\n    except SlackApiError as e:\n        logger.error(f\"Was not able to react to user message due to: {e}\")\n\n    return\n\n\ndef remove_onyx_bot_tag(tenant_id: str, message_str: str, client: WebClient) -> str:\n    bot_token_user_id, _ = get_onyx_bot_auth_ids(tenant_id, web_client=client)\n    return re.sub(rf\"<@{bot_token_user_id}>\\s*\", \"\", message_str)\n\n\ndef _check_for_url_in_block(block: Block) -> bool:\n    \"\"\"\n    Check if the block has a key that contains \"url\" in it\n    \"\"\"\n    block_dict = block.to_dict()\n\n    def check_dict_for_url(d: dict) -> bool:\n        for key, value in d.items():\n            if \"url\" in key.lower():\n                return True\n            if isinstance(value, dict):\n                if check_dict_for_url(value):\n                    return True\n            elif isinstance(value, list):\n                for item in value:\n                    if isinstance(item, dict) and check_dict_for_url(item):\n                        return True\n        return False\n\n    return check_dict_for_url(block_dict)\n\n\ndef _build_error_block(error_message: str) -> Block:\n    \"\"\"\n    Build an error block to display in slack so that the user can see\n    the error without completely breaking\n    \"\"\"\n    display_text = (\n        \"There was an error displaying all of the Onyx answers.\"\n        f\" Please let an admin or an onyx developer know. Error: {error_message}\"\n    )\n    return SectionBlock(text=display_text)\n\n\n@retry(\n    tries=ONYX_BOT_NUM_RETRIES,\n    delay=0.25,\n    backoff=2,\n    logger=cast(logging.Logger, logger),\n)\ndef respond_in_thread_or_channel(\n    client: WebClient,\n    channel: str,\n    thread_ts: str | None,\n    text: str | None = None,\n    blocks: list[Block] | None = None,\n    receiver_ids: list[str] | None = None,\n    metadata: Metadata | None = None,\n    unfurl: bool = True,\n    send_as_ephemeral: bool | None = True,  # noqa: ARG001\n) -> list[str]:\n    if not text and not blocks:\n        raise ValueError(\"One of `text` or `blocks` must be provided\")\n\n    message_ids: list[str] = []\n    if not receiver_ids:\n        try:\n            response = client.chat_postMessage(\n                channel=channel,\n                text=text,\n                blocks=blocks,\n                thread_ts=thread_ts,\n                metadata=metadata,\n                unfurl_links=unfurl,\n                unfurl_media=unfurl,\n            )\n        except Exception as e:\n            blocks_str = str(blocks)[:1024]  # truncate block logging\n            logger.warning(f\"Failed to post message: {e} \\n blocks: {blocks_str}\")\n            logger.warning(\"Trying again without blocks that have urls\")\n\n            if not blocks:\n                raise e\n\n            blocks_without_urls = [\n                block for block in blocks if not _check_for_url_in_block(block)\n            ]\n            blocks_without_urls.append(_build_error_block(str(e)))\n\n            # Try again wtihout blocks containing url\n            response = client.chat_postMessage(\n                channel=channel,\n                text=text,\n                blocks=blocks_without_urls,\n                thread_ts=thread_ts,\n                metadata=metadata,\n                unfurl_links=unfurl,\n                unfurl_media=unfurl,\n            )\n\n        message_ids.append(response[\"message_ts\"])\n    else:\n        for receiver in receiver_ids:\n            try:\n                response = client.chat_postEphemeral(\n                    channel=channel,\n                    user=receiver,\n                    text=text,\n                    blocks=blocks,\n                    thread_ts=thread_ts,\n                    metadata=metadata,\n                    unfurl_links=unfurl,\n                    unfurl_media=unfurl,\n                )\n            except Exception as e:\n                blocks_str = str(blocks)[:1024]  # truncate block logging\n                logger.warning(f\"Failed to post message: {e} \\n blocks: {blocks_str}\")\n                logger.warning(\"Trying again without blocks that have urls\")\n\n                if not blocks:\n                    raise e\n\n                blocks_without_urls = [\n                    block for block in blocks if not _check_for_url_in_block(block)\n                ]\n                blocks_without_urls.append(_build_error_block(str(e)))\n\n                # Try again wtihout blocks containing url\n                response = client.chat_postEphemeral(\n                    channel=channel,\n                    user=receiver,\n                    text=text,\n                    blocks=blocks_without_urls,\n                    thread_ts=thread_ts,\n                    metadata=metadata,\n                    unfurl_links=unfurl,\n                    unfurl_media=unfurl,\n                )\n\n            message_ids.append(response[\"message_ts\"])\n\n    return message_ids\n\n\ndef build_feedback_id(\n    message_id: int,\n    document_id: str | None = None,\n    document_rank: int | None = None,\n) -> str:\n    unique_prefix = \"\".join(random.choice(string.ascii_letters) for _ in range(10))\n    if document_id is not None:\n        if not document_id or document_rank is None:\n            raise ValueError(\"Invalid document, missing information\")\n        if ID_SEPARATOR in document_id:\n            raise ValueError(\n                \"Separator pattern should not already exist in document id\"\n            )\n        feedback_id = ID_SEPARATOR.join(\n            [str(message_id), document_id, str(document_rank)]\n        )\n    else:\n        feedback_id = str(message_id)\n\n    return unique_prefix + ID_SEPARATOR + feedback_id\n\n\ndef build_publish_ephemeral_message_id(\n    original_question_ts: str,\n) -> str:\n    return \"publish_ephemeral_message__\" + original_question_ts\n\n\ndef build_continue_in_web_ui_id(\n    message_id: int,\n) -> str:\n    unique_prefix = str(uuid.uuid4())[:10]\n    return unique_prefix + ID_SEPARATOR + str(message_id)\n\n\ndef decompose_action_id(feedback_id: str) -> tuple[int, str | None, int | None]:\n    \"\"\"Decompose into query_id, document_id, document_rank, see above function\"\"\"\n    try:\n        components = feedback_id.split(ID_SEPARATOR)\n        if len(components) != 2 and len(components) != 4:\n            raise ValueError(\"Feedback ID does not contain right number of elements\")\n\n        if len(components) == 2:\n            return int(components[-1]), None, None\n\n        return int(components[1]), components[2], int(components[3])\n\n    except Exception as e:\n        logger.error(e)\n        raise ValueError(\"Received invalid Feedback Identifier\")\n\n\ndef get_view_values(state_values: dict[str, Any]) -> dict[str, str]:\n    \"\"\"Extract view values\n\n    Args:\n        state_values (dict): The Slack view-submission values\n\n    Returns:\n        dict: keys/values of the view state content\n    \"\"\"\n    view_values = {}\n    for _, view_data in state_values.items():\n        for k, v in view_data.items():\n            if (\n                \"selected_option\" in v\n                and isinstance(v[\"selected_option\"], dict)\n                and \"value\" in v[\"selected_option\"]\n            ):\n                view_values[k] = v[\"selected_option\"][\"value\"]\n            elif \"selected_options\" in v and isinstance(v[\"selected_options\"], list):\n                view_values[k] = [\n                    x[\"value\"] for x in v[\"selected_options\"] if \"value\" in x\n                ]\n            elif \"selected_date\" in v:\n                view_values[k] = v[\"selected_date\"]\n            elif \"value\" in v:\n                view_values[k] = v[\"value\"]\n    return view_values\n\n\ndef translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:\n    def _replace_highlight(s: str) -> str:\n        s = re.sub(r\"(?<=[^\\s])<hi>(.*?)</hi>\", r\"\\1\", s)\n        s = s.replace(\"</hi>\", \"*\").replace(\"<hi>\", \"*\")\n        return s\n\n    final_matches = [\n        replace_whitespaces_w_space(_replace_highlight(match_str)).strip()\n        for match_str in match_strs\n        if match_str\n    ]\n    combined = \"... \".join(final_matches)\n\n    # Slack introduces \"Show More\" after 300 on desktop which is ugly\n    # But don't trim the message if there is still a highlight after 300 chars\n    remaining = 300 - used_chars\n    if len(combined) > remaining and \"*\" not in combined[remaining:]:\n        combined = combined[: remaining - 3] + \"...\"\n\n    return combined\n\n\ndef remove_slack_text_interactions(slack_str: str) -> str:\n    slack_str = SlackTextCleaner.replace_tags_basic(slack_str)\n    slack_str = SlackTextCleaner.replace_channels_basic(slack_str)\n    slack_str = SlackTextCleaner.replace_special_mentions(slack_str)\n    slack_str = SlackTextCleaner.replace_special_catchall(slack_str)\n    slack_str = SlackTextCleaner.add_zero_width_whitespace_after_tag(slack_str)\n    return slack_str\n\n\ndef get_channel_from_id(client: WebClient, channel_id: str) -> dict[str, Any]:\n    response = client.conversations_info(channel=channel_id)\n    response.validate()\n    return response[\"channel\"]\n\n\ndef get_channel_name_from_id(\n    client: WebClient, channel_id: str\n) -> tuple[str | None, bool]:\n    try:\n        channel_info = get_channel_from_id(client, channel_id)\n        name = channel_info.get(\"name\")\n        is_dm = any([channel_info.get(\"is_im\"), channel_info.get(\"is_mpim\")])\n        return name, is_dm\n    except SlackApiError as e:\n        logger.exception(f\"Couldn't fetch channel name from id: {channel_id}\")\n        raise e\n\n\ndef fetch_slack_user_ids_from_emails(\n    user_emails: list[str], client: WebClient\n) -> tuple[list[str], list[str]]:\n    user_ids: list[str] = []\n    failed_to_find: list[str] = []\n    for email in user_emails:\n        try:\n            user = client.users_lookupByEmail(email=email)\n            user_ids.append(user.data[\"user\"][\"id\"])  # type: ignore\n        except Exception:\n            logger.error(f\"Was not able to find slack user by email: {email}\")\n            failed_to_find.append(email)\n\n    return user_ids, failed_to_find\n\n\ndef fetch_user_ids_from_groups(\n    given_names: list[str], client: WebClient\n) -> tuple[list[str], list[str]]:\n    user_ids: list[str] = []\n    failed_to_find: list[str] = []\n    try:\n        response = client.usergroups_list()\n        if not isinstance(response.data, dict):\n            logger.error(\"Error fetching user groups\")\n            return user_ids, given_names\n\n        all_group_data = response.data.get(\"usergroups\", [])\n        name_id_map = {d[\"name\"]: d[\"id\"] for d in all_group_data}\n        handle_id_map = {d[\"handle\"]: d[\"id\"] for d in all_group_data}\n        for given_name in given_names:\n            group_id = name_id_map.get(given_name) or handle_id_map.get(\n                given_name.lstrip(\"@\")\n            )\n            if not group_id:\n                failed_to_find.append(given_name)\n                continue\n            try:\n                response = client.usergroups_users_list(usergroup=group_id)\n                if isinstance(response.data, dict):\n                    user_ids.extend(response.data.get(\"users\", []))\n                else:\n                    failed_to_find.append(given_name)\n            except Exception as e:\n                logger.error(f\"Error fetching user group ids: {str(e)}\")\n                failed_to_find.append(given_name)\n    except Exception as e:\n        logger.error(f\"Error fetching user groups: {str(e)}\")\n        failed_to_find = given_names\n\n    return user_ids, failed_to_find\n\n\ndef fetch_group_ids_from_names(\n    given_names: list[str], client: WebClient\n) -> tuple[list[str], list[str]]:\n    group_data: list[str] = []\n    failed_to_find: list[str] = []\n\n    try:\n        response = client.usergroups_list()\n        if not isinstance(response.data, dict):\n            logger.error(\"Error fetching user groups\")\n            return group_data, given_names\n\n        all_group_data = response.data.get(\"usergroups\", [])\n\n        name_id_map = {d[\"name\"]: d[\"id\"] for d in all_group_data}\n        handle_id_map = {d[\"handle\"]: d[\"id\"] for d in all_group_data}\n\n        for given_name in given_names:\n            id = handle_id_map.get(given_name.lstrip(\"@\"))\n            id = id or name_id_map.get(given_name)\n            if id:\n                group_data.append(id)\n            else:\n                failed_to_find.append(given_name)\n    except Exception as e:\n        failed_to_find = given_names\n        logger.error(f\"Error fetching user groups: {str(e)}\")\n\n    return group_data, failed_to_find\n\n\ndef fetch_user_semantic_id_from_id(\n    user_id: str | None, client: WebClient\n) -> str | None:\n    if not user_id:\n        return None\n\n    response = client.users_info(user=user_id)\n    if not response[\"ok\"]:\n        return None\n\n    user: dict = cast(dict[Any, dict], response.data).get(\"user\", {})\n\n    return (\n        user.get(\"real_name\")\n        or user.get(\"name\")\n        or user.get(\"profile\", {}).get(\"email\")\n    )\n\n\ndef read_slack_thread(\n    tenant_id: str, channel: str, thread: str, client: WebClient\n) -> list[ThreadMessage]:\n    thread_messages: list[ThreadMessage] = []\n    response = client.conversations_replies(channel=channel, ts=thread)\n    replies = cast(dict, response.data).get(\"messages\", [])\n    for reply in replies:\n        if \"user\" in reply and \"bot_id\" not in reply:\n            message = reply[\"text\"]\n            user_sem_id = (\n                fetch_user_semantic_id_from_id(reply.get(\"user\"), client)\n                or \"Unknown User\"\n            )\n            message_type = MessageType.USER\n        else:\n            blocks: Any\n            is_onyx_bot_response = False\n\n            reply_user = reply.get(\"user\")\n            reply_bot_id = reply.get(\"bot_id\")\n\n            self_slack_bot_user_id, self_slack_bot_bot_id = get_onyx_bot_auth_ids(\n                tenant_id, client\n            )\n            if reply_user is not None and reply_user == self_slack_bot_user_id:\n                is_onyx_bot_response = True\n\n            if reply_bot_id is not None and reply_bot_id == self_slack_bot_bot_id:\n                is_onyx_bot_response = True\n\n            if is_onyx_bot_response:\n                # OnyxBot response\n                message_type = MessageType.ASSISTANT\n                user_sem_id = \"Assistant\"\n\n                # OnyxBot responses have both text and blocks\n                # The useful content is in the blocks, specifically the first block unless there are\n                # auto-detected filters\n                blocks = reply.get(\"blocks\")\n                if not blocks:\n                    logger.warning(f\"OnyxBot response has no blocks: {reply}\")\n                    continue\n\n                message = blocks[0].get(\"text\", {}).get(\"text\")\n\n                # If auto-detected filters are on, use the second block for the actual answer\n                # The first block is the auto-detected filters\n                if message is not None and message.startswith(\"_Filters\"):\n                    if len(blocks) < 2:\n                        logger.warning(f\"Only filter blocks found: {reply}\")\n                        continue\n                    # This is the OnyxBot answer format, if there is a change to how we respond,\n                    # this will need to be updated to get the correct \"answer\" portion\n                    message = reply[\"blocks\"][1].get(\"text\", {}).get(\"text\")\n            else:\n                # Other bots are not counted as the LLM response which only comes from Onyx\n                message_type = MessageType.USER\n                bot_user_name = fetch_user_semantic_id_from_id(\n                    reply.get(\"user\"), client\n                )\n                user_sem_id = bot_user_name or \"Unknown\" + \" Bot\"\n\n                # For other bots, just use the text as we have no way of knowing that the\n                # useful portion is\n                message = reply.get(\"text\")\n                if not message:\n                    message = blocks[0].get(\"text\", {}).get(\"text\")\n\n            if not message:\n                logger.warning(\"Skipping Slack thread message, no text found\")\n                continue\n\n        message = remove_onyx_bot_tag(tenant_id, message, client=client)\n        thread_messages.append(\n            ThreadMessage(message=message, sender=user_sem_id, role=message_type)\n        )\n\n    return thread_messages\n\n\ndef slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> None:\n    if DISABLE_TELEMETRY:\n        return\n\n    onyx_user = None\n    sender_email = None\n    try:\n        sender_email = client.users_info(user=sender_id).data[\"user\"][\"profile\"][\"email\"]  # type: ignore\n    except Exception:\n        logger.warning(\"Unable to find sender email\")\n\n    if sender_email is not None:\n        with get_session_with_current_tenant() as db_session:\n            onyx_user = get_user_by_email(email=sender_email, db_session=db_session)\n\n    optional_telemetry(\n        record_type=RecordType.USAGE,\n        data={\"action\": action},\n        user_id=str(onyx_user.id) if onyx_user else \"Non-Onyx-Or-No-Auth-User\",\n    )\n\n\nclass SlackRateLimiter:\n    def __init__(self) -> None:\n        self.max_qpm: int | None = ONYX_BOT_MAX_QPM\n        self.max_wait_time = ONYX_BOT_MAX_WAIT_TIME\n        self.active_question = 0\n        self.last_reset_time = time.time()\n        self.waiting_questions: list[int] = []\n\n    def refill(self) -> None:\n        # If elapsed time is greater than the period, reset the active question count\n        if (time.time() - self.last_reset_time) > 60:\n            self.active_question = 0\n            self.last_reset_time = time.time()\n\n    def notify(\n        self, client: WebClient, channel: str, position: int, thread_ts: str | None\n    ) -> None:\n        respond_in_thread_or_channel(\n            client=client,\n            channel=channel,\n            receiver_ids=None,\n            text=f\"Your question has been queued. You are in position {position}.\\nPlease wait a moment :hourglass_flowing_sand:\",\n            thread_ts=thread_ts,\n        )\n\n    def is_available(self) -> bool:\n        if self.max_qpm is None:\n            return True\n\n        self.refill()\n        return self.active_question < self.max_qpm\n\n    def acquire_slot(self) -> None:\n        self.active_question += 1\n\n    def init_waiter(self) -> tuple[int, int]:\n        func_randid = random.getrandbits(128)\n        self.waiting_questions.append(func_randid)\n        position = self.waiting_questions.index(func_randid) + 1\n\n        return func_randid, position\n\n    def waiter(self, func_randid: int) -> None:\n        if self.max_qpm is None:\n            return\n\n        wait_time = 0\n        while (\n            self.active_question >= self.max_qpm\n            or self.waiting_questions[0] != func_randid\n        ):\n            if wait_time > self.max_wait_time:\n                raise TimeoutError\n            time.sleep(2)\n            wait_time += 2\n            self.refill()\n\n        del self.waiting_questions[0]\n\n\ndef get_feedback_visibility() -> FeedbackVisibility:\n    try:\n        return FeedbackVisibility(ONYX_BOT_FEEDBACK_VISIBILITY.lower())\n    except ValueError:\n        return FeedbackVisibility.PRIVATE\n\n\nclass TenantSocketModeClient(SocketModeClient):\n    def __init__(self, tenant_id: str, slack_bot_id: int, *args: Any, **kwargs: Any):\n        super().__init__(*args, **kwargs)\n        self._tenant_id = tenant_id\n        self.slack_bot_id = slack_bot_id\n        self.bot_name: str = \"Unnamed\"\n\n    @contextmanager\n    def _set_tenant_context(self) -> Generator[None, None, None]:\n        token = None\n        try:\n            if self._tenant_id:\n                token = CURRENT_TENANT_ID_CONTEXTVAR.set(self._tenant_id)\n            yield\n        finally:\n            if token:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n    def enqueue_message(self, message: str) -> None:\n        with self._set_tenant_context():\n            super().enqueue_message(message)\n\n    def process_message(self) -> None:\n        with self._set_tenant_context():\n            super().process_message()\n\n    def run_message_listeners(self, message: dict, raw_message: str) -> None:\n        with self._set_tenant_context():\n            super().run_message_listeners(message, raw_message)\n"
  },
  {
    "path": "backend/onyx/prompts/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/prompts/basic_memory.py",
    "content": "# ruff: noqa: E501, W605 start\n\n# Note that the user_basic_information is only included if we have at least 1 of the following: user_name, user_email, user_role\n# This is included because sometimes we need to know the user's name or basic info to best generate the memory.\nFULL_MEMORY_UPDATE_PROMPT = \"\"\"\nYou are a memory update agent that helps the user add or update memories. You are given a list of existing memories and a new memory to add. \\\nJust as context, you are also given the last few user messages from the conversation which generated the new memory. You must determine if the memory is brand new or if it is related to an existing memory. \\\nIf the new memory is an update to an existing memory or contradicts an existing memory, it should be treated as an update and you should reference the existing memory by memory_id (see below). \\\nThe memory should omit the user's name and direct reference to the user - for example, a memory like \"Yuhong prefers dark mode.\" should be modified to \"Prefers dark mode.\" (if the user's name is Yuhong).\n\n# Truncated chat history\n{chat_history}{user_basic_information}\n\n# User's existing memories\n{existing_memories}\n\n# New memory the user wants to insert\n{new_memory}\n\n# Response Style\nYou MUST respond in a json which follows the following format and keys:\n```json\n{{\n    \"operation\": \"add or update\",\n    \"memory_id\": \"if the operation is update, the id of the memory to update, otherwise null\",\n    \"memory_text\": \"the text of the memory to add or update\"\n}}\n```\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n\nMEMORY_USER_BASIC_INFORMATION_PROMPT = \"\"\"\n\n# User Basic Information\nUser name: {user_name}\nUser email: {user_email}\nUser role: {user_role}\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/prompts/chat_prompts.py",
    "content": "# ruff: noqa: E501, W605 start\n\nfrom onyx.prompts.constants import REMINDER_TAG_NO_HEADER\n\n\nDATETIME_REPLACEMENT_PAT = \"{{CURRENT_DATETIME}}\"\nCITATION_GUIDANCE_REPLACEMENT_PAT = \"{{CITATION_GUIDANCE}}\"\nREMINDER_TAG_REPLACEMENT_PAT = \"{{REMINDER_TAG_DESCRIPTION}}\"\n\n\n# Note this uses a string pattern replacement so the user can also include it in their custom prompts. Keeps the replacement logic simple\n# This is editable by the user in the admin UI.\n# The first line is intended to help guide the general feel/behavior of the system.\nDEFAULT_SYSTEM_PROMPT = f\"\"\"\nYou are an expert assistant who is truthful, nuanced, insightful, and efficient. \\\nYour goal is to deeply understand the user's intent, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. \\\nWhenever there is any ambiguity around the user's query (or more information would be helpful), you use available tools (if any) to get more context.\n\nThe current date is {DATETIME_REPLACEMENT_PAT}.{CITATION_GUIDANCE_REPLACEMENT_PAT}\n\n# Response Style\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\( [expression] \\\\)' when inline.\nFor code you prefer to use Markdown and specify the language.\nYou can use horizontal rules (---) to separate sections of your responses.\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\n\n{REMINDER_TAG_REPLACEMENT_PAT}\n\"\"\".lstrip()\n\n\nCOMPANY_NAME_BLOCK = \"\"\"\nThe user is at an organization called `{company_name}`.\n\"\"\"\n\nCOMPANY_DESCRIPTION_BLOCK = \"\"\"\nOrganization description: {company_description}\n\"\"\"\n\n# This is added to the system prompt prior to the tools section and is applied only if search tools have been run\nREQUIRE_CITATION_GUIDANCE = \"\"\"\n\nCRITICAL: If referencing knowledge from searches, cite relevant statements INLINE using the format [1], [2], [3], etc. to reference the \"document\" field. \\\nDO NOT provide any links following the citations. Cite inline as opposed to leaving all citations until the very end of the response.\n\"\"\"\n\n\n# Reminder message if any search tool has been run anytime in the chat turn\nCITATION_REMINDER = \"\"\"\nRemember to provide inline citations in the format [1], [2], [3], etc. based on the \"document\" field of the documents.\n\"\"\".strip()\n\nLAST_CYCLE_CITATION_REMINDER = \"\"\"\nYou are on your last cycle and no longer have any tool calls available. You must answer the query now to the best of your ability.\n\"\"\".strip()\n\n\n# Reminder message that replaces the usual reminder if web_search was the last tool call\nOPEN_URL_REMINDER = \"\"\"\nRemember that after using web_search, you are encouraged to open some pages to get more context unless the query is completely answered by the snippets.\nOpen the pages that look the most promising and high quality by calling the open_url tool with an array of URLs. Open as many as you want.\n\nIf you do have enough to answer, remember to provide INLINE citations using the \"document\" field in the format [1], [2], [3], etc.\n\"\"\".strip()\n\n\nIMAGE_GEN_REMINDER = \"\"\"\nVery briefly describe the image(s) generated. Do not include any links or attachments.\n\"\"\".strip()\n\n\nFILE_REMINDER = \"\"\"\nYour code execution generated file(s) with download links.\nIf you reference or share these files, use the exact markdown format [filename](file_link) with the file_link from the execution result.\n\"\"\".strip()\n\n\n# Specifically for OpenAI models, this prefix needs to be in place for the model to output markdown and correct styling\nCODE_BLOCK_MARKDOWN = \"Formatting re-enabled. \"\n\n# This is just for Slack context today\nADDITIONAL_CONTEXT_PROMPT = \"\"\"\nHere is some additional context which may be relevant to the user query:\n\n{additional_context}\n\"\"\".strip()\n\n\nTOOL_CALL_RESPONSE_CROSS_MESSAGE = \"\"\"\nThis tool call completed but the results are no longer accessible.\n\"\"\".strip()\n\n# This is used to add the current date and time to the prompt in the case where the Agent should be aware of the current\n# date and time but the replacement pattern is not present in the prompt.\nADDITIONAL_INFO = \"\\n\\nAdditional Information:\\n\\t- {datetime_info}.\"\n\n\nCHAT_NAMING_SYSTEM_PROMPT = f\"\"\"\nGiven the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \\\nMake sure the name is in the same language as the user's first message.\n\n{REMINDER_TAG_NO_HEADER}\n\nIMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.\n\"\"\".strip()\n\n\nCHAT_NAMING_REMINDER = \"\"\"\nProvide a short name for the conversation. Refer to other messages in the conversation (not including this one) to determine the language of the name.\n\nIMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/chat_tools.py",
    "content": "# These prompts are to support tool calling. Currently not used in the main flow or via any configs\n# The current generation of LLM is too unreliable for this task.\n# Onyx retrieval call as a tool option\nDANSWER_TOOL_NAME = \"Current Search\"\nDANSWER_TOOL_DESCRIPTION = \"A search tool that can find information on any topic including up to date and proprietary knowledge.\"\n\n\n# Tool calling format inspired from LangChain\nTOOL_TEMPLATE = \"\"\"\nTOOLS\n------\nYou can use tools to look up information that may be helpful in answering the user's \\\noriginal question. The available tools are:\n\n{tool_overviews}\n\nRESPONSE FORMAT INSTRUCTIONS\n----------------------------\nWhen responding to me, please output a response in one of two formats:\n\n**Option 1:**\nUse this if you want to use a tool. Markdown code snippet formatted in the following schema:\n\n```json\n{{\n    \"action\": string, \\\\ The action to take. {tool_names}\n    \"action_input\": string \\\\ The input to the action\n}}\n```\n\n**Option #2:**\nUse this if you want to respond directly to the user. Markdown code snippet formatted in the following schema:\n\n```json\n{{\n    \"action\": \"Final Answer\",\n    \"action_input\": string \\\\ You should put what you want to return to use here\n}}\n```\n\"\"\"\n\n# For the case where the user has not configured any tools to call, but still using the tool-flow\n# expected format\nTOOL_LESS_PROMPT = \"\"\"\nRespond with a markdown code snippet in the following schema:\n\n```json\n{{\n    \"action\": \"Final Answer\",\n    \"action_input\": string \\\\ You should put what you want to return to use here\n}}\n```\n\"\"\"\n\n\n# Second part of the prompt to include the user query\nUSER_INPUT = \"\"\"\nUSER'S INPUT\n--------------------\nHere is the user's input \\\n(remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):\n\n{user_input}\n\"\"\"\n\n\n# After the tool call, this is the following message to get a final answer\n# Tools are not chained currently, the system must provide an answer after calling a tool\nTOOL_FOLLOWUP = \"\"\"\nTOOL RESPONSE:\n---------------------\n{tool_output}\n\nUSER'S INPUT\n--------------------\nOkay, so what is the response to my last comment? If using information obtained from the tools you must \\\nmention it explicitly without mentioning the tool names - I have forgotten all TOOL RESPONSES!\nIf the tool response is not useful, ignore it completely.\n{optional_reminder}{hint}\nIMPORTANT! You MUST respond with a markdown code snippet of a json blob with a single action, and NOTHING else.\n\"\"\"\n\n\n# If no tools were used, but retrieval is enabled, then follow up with this message to get the final answer\nTOOL_LESS_FOLLOWUP = \"\"\"\nRefer to the following documents when responding to my final query. Ignore any documents that are not relevant.\n\nCONTEXT DOCUMENTS:\n---------------------\n{context_str}\n\nFINAL QUERY:\n--------------------\n{user_query}\n\n{hint_text}\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/prompts/compression_prompts.py",
    "content": "# Prompts for chat history compression via summarization.\n\n# ruff: noqa: E501, W605 start\n# Cutoff marker helps the LLM focus on summarizing only messages before this point.\n# This improves \"needle in haystack\" accuracy by explicitly marking where to stop with an exact pattern which is also placed in locations easily attended to by the LLM (last user message and system prompt).\nCONTEXT_CUTOFF_START_MARKER = \"<context_cutoff>\"\nCONTEXT_CUTOFF_END_MARKER = \"</context_cutoff>\"\n\nSUMMARIZATION_CUTOFF_MARKER = f\"{CONTEXT_CUTOFF_START_MARKER} Stop summarizing the rest of the conversation past this point. {CONTEXT_CUTOFF_END_MARKER}\"\n\nSUMMARIZATION_PROMPT = f\"\"\"\nYou are a summarization system. Your task is to produce a detailed and accurate summary of a chat conversation up to a specified cutoff message. The cutoff will be marked by the string {CONTEXT_CUTOFF_START_MARKER}. \\\nIMPORTANT: Do not explicitly mention anything about the cutoff in your response. Do not situate the summary with respect to the cutoff. The context cutoff is only a system injected marker.\n\n# Guidelines\n- Only consider messages that occur at or before the cutoff point. Use the messages after it purely as context without including any of it in the summary.\n- Preserve factual correctness and intent; do not infer or speculate.\n- The summary should be information dense and detailed.\n- The summary should be in paragraph format and long enough to capture all of the most prominent details.\n\n# Focus on\n- Key topics discussed.\n- Decisions made, tools used, and conclusions reached.\n- Open questions or unresolved items.\n- Important constraints, preferences, or assumptions stated.\n- Omit small talk, repetition, and stylistic filler unless it affects meaning.\n\"\"\".strip()\n\nPROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK = \"\"\"\n\n# Existing summary\nThere is a previous summary of the conversation. Build on top of this when constructing the new overall summary of the conversation:\n{previous_summary}\n\"\"\".rstrip()\n\nUSER_REMINDER = f\"Help summarize the conversation up to the cutoff point (do not mention anything related to the cutoff directly in your response). It should be a long form summary of the conversation up to the cutoff point as marked by {CONTEXT_CUTOFF_START_MARKER}. Be thorough.\"\n\nPROGRESSIVE_USER_REMINDER = f\"Update the existing summary by incorporating the new messages up to the cutoff point as marked by {CONTEXT_CUTOFF_START_MARKER} (do not mention anything related to the cutoff directly in your response). Be thorough and maintain the long form summary format.\"\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/constants.py",
    "content": "# ruff: noqa: E501, W605 start\nCODE_BLOCK_PAT = \"```\\n{}\\n```\"\nTRIPLE_BACKTICK = \"```\"\nSYSTEM_REMINDER_TAG_OPEN = \"<system-reminder>\"\nSYSTEM_REMINDER_TAG_CLOSE = \"</system-reminder>\"\n\n# Tags format inspired by Anthropic and OpenCode\nREMINDER_TAG_NO_HEADER = f\"\"\"\nUser messages may include {SYSTEM_REMINDER_TAG_OPEN} and {SYSTEM_REMINDER_TAG_CLOSE} tags. These {SYSTEM_REMINDER_TAG_OPEN} tags contain useful information and reminders. \\\nThey are automatically added by the system and are not actual user inputs. Behave in accordance to these instructions if relevant, and continue normally if they are not.\n\"\"\".strip()\n\nREMINDER_TAG_DESCRIPTION = f\"\"\"\n# System Reminders\n{REMINDER_TAG_NO_HEADER}\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/contextual_retrieval.py",
    "content": "# NOTE: the prompt separation is partially done for efficiency; previously I tried\n# to do it all in one prompt with sequential format() calls but this will cause a backend\n# error when the document contains any {} as python will expect the {} to be filled by\n# format() arguments\n\n# ruff: noqa: E501, W605 start\nCONTEXTUAL_RAG_PROMPT1 = \"\"\"<document>\n{document}\n</document>\nHere is the chunk we want to situate within the whole document\"\"\"\n\nCONTEXTUAL_RAG_PROMPT2 = \"\"\"<chunk>\n{chunk}\n</chunk>\nPlease give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.\n\"\"\".rstrip()\n\nCONTEXTUAL_RAG_TOKEN_ESTIMATE = 64  # 19 + 45\n\nDOCUMENT_SUMMARY_PROMPT = \"\"\"<document>\n{document}\n</document>\nPlease give a short succinct summary of the entire document. Answer only with the succinct summary and nothing else.\n\"\"\".rstrip()\n\nDOCUMENT_SUMMARY_TOKEN_ESTIMATE = 50\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/deep_research/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/prompts/deep_research/dr_tool_prompts.py",
    "content": "GENERATE_PLAN_TOOL_NAME = \"generate_plan\"\n\n\nGENERATE_REPORT_TOOL_NAME = \"generate_report\"\n\n\nRESEARCH_AGENT_TOOL_NAME = \"research_agent\"\n\n\n# This is to ensure that even the non-reasoning models can have an ok time with this more complex flow.\nTHINK_TOOL_NAME = \"think_tool\"\n\n\n# ruff: noqa: E501, W605 start\n\n# Hard for the open_url tool to be called for a ton of search results all at once so limit to 3\nWEB_SEARCH_TOOL_DESCRIPTION = \"\"\"\n\n## web_search\nUse the `web_search` tool to get search results from the web. You should use this tool to get context for your research. These should be optimized for search engines like Google. \\\nUse concise and specific queries and avoid merging multiple queries into one. You can call web_search with multiple queries at once (3 max) but generally only do this when there is a clear opportunity for parallel searching. \\\nIf you use multiple queries, ensure that the queries are related in topic but not similar such that the results would be redundant.\n\"\"\"\n\n# This one is mostly similar to the one for the main flow but there won't be any user specified URLs to open.\nOPEN_URLS_TOOL_DESCRIPTION = f\"\"\"\n\n## open_urls\nUse the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \\\nYou can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \\\nYou should almost always use open_urls after a web_search call and sometimes after reasoning with the {THINK_TOOL_NAME} tool.\n\"\"\"\n\nOPEN_URLS_TOOL_DESCRIPTION_REASONING = \"\"\"\n\n## open_urls\nUse the `open_urls` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches. \\\nYou can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \\\nYou should almost always use open_urls after a web_search call.\n\"\"\"\n\n# NOTE: Internal search tool uses the same description as the default flow, not duplicating here.\n\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/deep_research/orchestration_layer.py",
    "content": "from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_PLAN_TOOL_NAME\nfrom onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME\nfrom onyx.prompts.deep_research.dr_tool_prompts import RESEARCH_AGENT_TOOL_NAME\nfrom onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME\n\n\n# ruff: noqa: E501, W605 start\nCLARIFICATION_PROMPT = f\"\"\"\nYou are a clarification agent that runs prior to deep research. Assess whether you need to ask clarifying questions, or if the user has already provided enough information for you to start research. \\\nCRITICAL - Never directly answer the user's query, you must only ask clarifying questions or call the `{GENERATE_PLAN_TOOL_NAME}` tool.\n\nIf the user query is already very detailed or lengthy (more than 3 sentences), do not ask for clarification and instead call the `{GENERATE_PLAN_TOOL_NAME}` tool.\n\nFor context, the date is {{current_datetime}}.\n\nBe conversational and friendly, prefer saying \"could you\" rather than \"I need\" etc.\n\nIf you need to ask questions, follow these guidelines:\n- Be concise and do not ask more than 5 questions.\n- If there are ambiguous terms or questions, ask the user to clarify.\n- Your questions should be a numbered list for clarity.\n- Respond in the same language as the user's query.\n- Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}\n- Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.\n\"\"\".strip()\n\n\nINTERNAL_SEARCH_CLARIFICATION_GUIDANCE = \"\"\"\n- The deep research system is connected with organization internal document search and web search capabilities. In cases where it is unclear which source is more appropriate, ask the user to clarify.\n\"\"\"\n\n# Here there is a bit of combating model behavior which during alignment may be overly tuned to be cautious about access to data and feasibility.\n# Sometimes the model will just apologize and claim the task is not possible, hence the long section following CRITICAL.\nRESEARCH_PLAN_PROMPT = \"\"\"\nYou are a research planner agent that generates the high level approach for deep research on a user query. Analyze the query carefully and break it down into main concepts and areas of exploration. \\\nStick closely to the user query and stay on topic but be curious and avoid duplicate or overlapping exploration directions. \\\nBe sure to take into account the time sensitive aspects of the research topic and make sure to emphasize up to date information where appropriate. \\\nFocus on providing thorough research of the user's query over being helpful.\n\nCRITICAL - You MUST only output the research plan for the deep research flow and nothing else, you are not responding to the user. \\\nDo not worry about the feasibility of the plan or access to data or tools, a different deep research flow will handle that.\n\nFor context, the date is {current_datetime}.\n\nThe research plan should be formatted as a numbered list of steps and have 6 or less individual steps.\n\nEach step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.\n\nOutput only the numbered list of steps with no additional prefix or suffix.\n\"\"\".strip()\n\n\n# Specifically for some models, it really struggles to not just answer the user when there are questions about internal knowledge.\n# A reminder (specifically the fact that it's also a User type message) helps to prevent this.\nRESEARCH_PLAN_REMINDER = \"\"\"\nRemember to only output the research plan and nothing else. Do not worry about the feasibility of the plan or data access.\n\nYour response must only be a numbered list of steps with no additional prefix or suffix.\n\"\"\".strip()\n\n\nORCHESTRATOR_PROMPT = f\"\"\"\nYou are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \\\nThis delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.\n\nFor context, the date is {{current_datetime}}.\n\nBefore calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched. \\\nThere are cases where new discoveries from research may lead to a deviation from the original research plan.\nIn these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.\n\nNEVER output normal response tokens, you must only call tools.\n\n# Tools\nYou have currently used {{current_cycle_count}} of {{max_cycles}} max research cycles. You do not need to use all cycles.\n\n## {RESEARCH_AGENT_TOOL_NAME}\nThe research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \\\nIt should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \\\nThe research task should be in the same language as the overall research plan.\n\nCRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \\\nYou absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}\n\nYou should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.\n\nYou are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the research tasks are not dependent on each other, which is typically the case. NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.\n\n## {GENERATE_REPORT_TOOL_NAME}\nYou should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:\n- You have researched all of the relevant topics of the research plan.\n- You have shifted away from the original research plan and believe that you are done.\n- You have all of the information needed to thoroughly answer all aspects of the user's query.\n- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.\n\n## {THINK_TOOL_NAME}\nCRITICAL - use the {THINK_TOOL_NAME} to reason between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. You should treat this as chain-of-thought reasoning to think deeply on what to do next. \\\nBe curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format, do not use bullet points or lists.\n\nNEVER use the {THINK_TOOL_NAME} in parallel with other {RESEARCH_AGENT_TOOL_NAME} or {GENERATE_REPORT_TOOL_NAME}.\n\nBefore calling {GENERATE_REPORT_TOOL_NAME}, double check that all aspects of the user's query have been researched and that all key topics around the plan have been researched (unless you have gone in a different direction).\n\n# Research Plan\n{{research_plan}}\n\"\"\".strip()\n\n\nINTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE = \"\"\"\n If necessary, clarify if the research agent should focus mostly on organization internal searches, web searches, or a combination of both. If the task doesn't require a clear priority, don't add sourcing guidance.\n\"\"\".strip(\n    \"\\n\"\n)\n\n\nUSER_ORCHESTRATOR_PROMPT = \"\"\"\nRemember to refer to the system prompt and follow how to use the tools. Call the {THINK_TOOL_NAME} between every call to the {RESEARCH_AGENT_TOOL_NAME} and before calling {GENERATE_REPORT_TOOL_NAME}. Never run more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.\n\nDon't mention this reminder or underlying details about the system.\n\"\"\".strip()\n\n\nFINAL_REPORT_PROMPT = \"\"\"\nYou are the final answer generator for a deep research task. Your job is to produce a thorough, balanced, and comprehensive answer on the research question provided by the user. \\\nYou have access to high-quality, diverse sources collected by secondary research agents as well as their analysis of the sources.\n\nIMPORTANT - You get straight to the point, never providing a title and avoiding lengthy introductions/preambles.\n\nFor context, the date is {current_datetime}.\n\nUsers have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long. \\\nStructure your response logically into relevant sections. You may find it helpful to reference the research plan to help structure your response but do not limit yourself to what is contained in the plan.\n\nYou use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.\n\nProvide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents.\n\"\"\".strip()\n\n\nUSER_FINAL_REPORT_QUERY = f\"\"\"\nThe original research plan is included below (use it as a helpful reference but do not limit yourself to this):\n```\n{{research_plan}}\n```\n\nBased on all of the context provided in the research history, provide a comprehensive, well structured, and insightful answer to the user's previous query. \\\nCRITICAL: be extremely thorough in your response and address all relevant aspects of the query.\n\nIgnore the format styles of the intermediate {RESEARCH_AGENT_TOOL_NAME} reports, those are not end user facing and different from your task.\n\nProvide inline citations in the format [1], [2], [3], etc. based on the citations included by the research agents. The citations should be just a number in a bracket, nothing additional.\n\"\"\".strip()\n\n\n# Reasoning Model Variants of the prompts\nORCHESTRATOR_PROMPT_REASONING = f\"\"\"\nYou are an orchestrator agent for deep research. Your job is to conduct research by calling the {RESEARCH_AGENT_TOOL_NAME} tool with high level research tasks. \\\nThis delegates the lower level research work to the {RESEARCH_AGENT_TOOL_NAME} which will provide back the results of the research.\n\nFor context, the date is {{current_datetime}}.\n\nBefore calling {GENERATE_REPORT_TOOL_NAME}, reason to double check that all aspects of the user's query have been well researched and that all key topics around the plan have been researched.\nThere are cases where new discoveries from research may lead to a deviation from the original research plan. In these cases, ensure that the new directions are thoroughly investigated prior to calling {GENERATE_REPORT_TOOL_NAME}.\n\nBetween calls, think deeply on what to do next. Be curious, identify knowledge gaps and consider new potential directions of research. Use paragraph format for your reasoning, do not use bullet points or lists.\n\nNEVER output normal response tokens, you must only call tools.\n\n# Tools\nYou have currently used {{current_cycle_count}} of {{max_cycles}} max research cycles. You do not need to use all cycles.\n\n## {RESEARCH_AGENT_TOOL_NAME}\nThe research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \\\nIt should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \\\nThe research task should be in the same language as the overall research plan.\n\nCRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \\\nYou absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}\n\nYou should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.\n\nYou are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel if the research tasks are not dependent on each other, which is typically the case. NEVER call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.\n\n## {GENERATE_REPORT_TOOL_NAME}\nYou should call the {GENERATE_REPORT_TOOL_NAME} tool if any of the following conditions are met:\n- You have researched all of the relevant topics of the research plan.\n- You have shifted away from the original research plan and believe that you are done.\n- You have all of the information needed to thoroughly answer all aspects of the user's query.\n- The last research cycle yielded minimal new information and future cycles are unlikely to yield more information.\n\n# Research Plan\n{{research_plan}}\n\"\"\".strip()\n\n\nUSER_ORCHESTRATOR_PROMPT_REASONING = \"\"\"\nRemember to refer to the system prompt and follow how to use the tools. \\\nYou are encouraged to call the {RESEARCH_AGENT_TOOL_NAME} in parallel when the research tasks are not dependent on each other, but never call more than 3 {RESEARCH_AGENT_TOOL_NAME} calls in parallel.\n\nDon't mention this reminder or underlying details about the system.\n\"\"\".strip()\n\n\n# Only for the first cycle, we encourage the model to research more, since it is unlikely that it has already addressed all parts of the plan at this point.\nFIRST_CYCLE_REMINDER_TOKENS = 100\nFIRST_CYCLE_REMINDER = \"\"\"\nMake sure all parts of the user question and the plan have been thoroughly explored before calling generate_report. If new interesting angles have been revealed from the research, you may deviate from the plan to research new directions.\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/deep_research/research_agent.py",
    "content": "from onyx.prompts.deep_research.dr_tool_prompts import GENERATE_REPORT_TOOL_NAME\nfrom onyx.prompts.deep_research.dr_tool_prompts import THINK_TOOL_NAME\n\n\nMAX_RESEARCH_CYCLES = 8\n\n# ruff: noqa: E501, W605 start\nRESEARCH_AGENT_PROMPT = f\"\"\"\nYou are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \\\nYou iteratively call the tools available to you including {{available_tools}} until you have completed your research at which point you call the {GENERATE_REPORT_TOOL_NAME} tool.\n\nNEVER output normal response tokens, you must only call tools.\n\nFor context, the date is {{current_datetime}}.\n\n# Tools\nYou have a limited number of cycles to complete your research and you do not have to use all cycles. You are on cycle {{current_cycle_count}} of {MAX_RESEARCH_CYCLES}.\\\n{{optional_internal_search_tool_description}}\\\n{{optional_web_search_tool_description}}\\\n{{optional_open_url_tool_description}}\n## {THINK_TOOL_NAME}\nCRITICAL - use the think tool after every set of searches and reads (so search, read some pages, then think and repeat). \\\nYou MUST use the {THINK_TOOL_NAME} before calling the web_search tool for all calls to web_search except for the first call. \\\nUse the {THINK_TOOL_NAME} before calling the {GENERATE_REPORT_TOOL_NAME} tool.\n\nAfter a set of searches + reads, use the {THINK_TOOL_NAME} to analyze the results and plan the next steps.\n- Reflect on the key information found with relation to the task.\n- Reason thoroughly about what could be missing, the knowledge gaps, and what queries might address them, \\\nor why there is enough information to answer the research task comprehensively.\n\n## {GENERATE_REPORT_TOOL_NAME}\nOnce you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool. \\\nYou should only call this tool after you have fully researched the topic. \\\nConsider other potential areas of research and weigh that against the materials already gathered before calling this tool.\n\"\"\".strip()\n\n\nRESEARCH_REPORT_PROMPT = \"\"\"\nYou are a highly capable and precise research sub-agent that has conducted research on a specific topic. \\\nYour job is now to organize the findings to return a comprehensive report that preserves all relevant statements and information that has been gathered in the existing messages. \\\nThe report will be seen by another agent instead of a user so keep it free of formatting or commentary and instead focus on the facts only. \\\nDo not give it a title, do not break it down into sections, and do not provide any of your own conclusions/analysis.\n\nYou may see a list of tool calls in the history but you do not have access to tools anymore. You should only use the information in the history to create the report.\n\nCRITICAL - This report should be as long as necessary to return ALL of the information that the researcher has gathered. It should be several pages long so as to capture as much detail as possible from the research. \\\nIt cannot be stressed enough that this report must be EXTREMELY THOROUGH and COMPREHENSIVE. Only this report is going to be returned, so it's CRUCIAL that you don't lose any details from the raw messages.\n\nRemove any obviously irrelevant or duplicative information.\n\nIf a statement seems not trustworthy or is contradictory to other statements, it is important to flag it.\n\nWrite the report in the same language as the provided task.\n\nCite all sources INLINE using the format [1], [2], [3], etc. based on the `document` field of the source. \\\nCite inline as opposed to leaving all citations until the very end of the response.\n\"\"\"\n\n\nUSER_REPORT_QUERY = \"\"\"\nPlease write me a comprehensive report on the research topic given the context above. As a reminder, the original topic was:\n{research_topic}\n\nRemember to include AS MUCH INFORMATION AS POSSIBLE and as faithful to the original sources as possible. \\\nKeep it free of formatting and focus on the facts only. Be sure to include all context for each fact to avoid misinterpretation or misattribution. \\\nRespond in the same language as the topic provided above.\n\nCite every fact INLINE using the format [1], [2], [3], etc. based on the `document` field of the source.\n\nCRITICAL - BE EXTREMELY THOROUGH AND COMPREHENSIVE, YOUR RESPONSE SHOULD BE SEVERAL PAGES LONG.\n\"\"\"\n\n\n# Reasoning Model Variants of the prompts\nRESEARCH_AGENT_PROMPT_REASONING = f\"\"\"\nYou are a highly capable, thoughtful, and precise research agent that conducts research on a specific topic. Prefer being thorough in research over being helpful. Be curious but stay strictly on topic. \\\nYou iteratively call the tools available to you including {{available_tools}} until you have completed your research at which point you call the {GENERATE_REPORT_TOOL_NAME} tool. Between calls, think about the results of the previous tool call and plan the next steps. \\\nReason thoroughly about what could be missing, identify knowledge gaps, and what queries might address them. Or consider why there is enough information to answer the research task comprehensively.\n\nOnce you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool.\n\nNEVER output normal response tokens, you must only call tools.\n\nFor context, the date is {{current_datetime}}.\n\n# Tools\nYou have a limited number of cycles to complete your research and you do not have to use all cycles. You are on cycle {{current_cycle_count}} of {MAX_RESEARCH_CYCLES}.\\\n{{optional_internal_search_tool_description}}\\\n{{optional_web_search_tool_description}}\\\n{{optional_open_url_tool_description}}\n## {GENERATE_REPORT_TOOL_NAME}\nOnce you have completed your research, call the `{GENERATE_REPORT_TOOL_NAME}` tool. You should only call this tool after you have fully researched the topic.\n\"\"\".strip()\n\n\nOPEN_URL_REMINDER_RESEARCH_AGENT = \"\"\"\nRemember that after using web_search, you are encouraged to open some pages to get more context unless the query is completely answered by the snippets.\nOpen the pages that look the most promising and high quality by calling the open_url tool with an array of URLs.\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/federated_search.py",
    "content": "from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS\n\nSLACK_QUERY_EXPANSION_PROMPT = f\"\"\"\nRewrite the user's query into at most {MAX_SLACK_QUERY_EXPANSIONS} keyword-only queries for Slack's keyword search.\n\nSlack search behavior:\n- Pure keyword AND search (no semantics)\n- More words = fewer matches, so keep queries concise (1-3 words)\n\nALWAYS include:\n- Person names (e.g., \"Sarah Chen\", \"Mike Johnson\") - people search for messages from/about specific people\n- Project/product names, technical terms, proper nouns\n- Actual content words: \"performance\", \"bug\", \"deployment\", \"API\", \"error\"\n\nDO NOT include:\n- Meta-words: \"topics\", \"conversations\", \"discussed\", \"summary\", \"messages\"\n- Temporal: \"today\", \"yesterday\", \"week\", \"month\", \"recent\", \"last\"\n- Channel names: \"general\", \"eng-general\", \"random\"\n\nExamples:\n\nQuery: \"what are the big topics in eng-general this week?\"\nOutput:\n\nQuery: \"messages with Sarah about the deployment\"\nOutput:\nSarah deployment\nSarah\ndeployment\n\nQuery: \"what did Mike say about the budget?\"\nOutput:\nMike budget\nMike\nbudget\n\nQuery: \"performance issues in eng-general\"\nOutput:\nperformance issues\nperformance\nissues\n\nQuery: \"what did we discuss about the API migration?\"\nOutput:\nAPI migration\nAPI\nmigration\n\nNow process this query:\n\n{{query}}\n\nOutput (keywords only, one per line, NO explanations or commentary):\n\"\"\"\n\nSLACK_DATE_EXTRACTION_PROMPT = \"\"\"\nExtract the date range from the user's query and return it in a structured format.\n\nCurrent date context:\n- Today: {today}\n- Current time: {current_time}\n\nGuidelines:\n1. Return a JSON object with \"days_back\" (integer) indicating how many days back to search\n2. If no date/time is mentioned, return {{\"days_back\": null}}\n3. Interpret relative dates accurately:\n   - \"today\" or \"today's\" = 0 days back\n   - \"yesterday\" = 1 day back\n   - \"last week\" = 7 days back\n   - \"last month\" = 30 days back\n   - \"last X days\" = X days back\n   - \"past X days\" = X days back\n   - \"this week\" = 7 days back\n   - \"this month\" = 30 days back\n4. For creative expressions, interpret intent:\n   - \"recent\" = 7 days back\n   - \"recently\" = 7 days back\n   - \"lately\" = 14 days back\n5. Always be conservative - if uncertain, use a longer time range\n\nUser query: {query}\n\nReturn ONLY a valid JSON object in this format: {{\"days_back\": <integer or null>}}\nNothing else.\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/prompts/filter_extration.py",
    "content": "# The following prompts are used for extracting filters to apply along with the query in the\n# document index. For example, a filter for dates or a filter by source type such as GitHub\n# or Slack\nSOURCES_KEY = \"sources\"\n\n# Smaller followup prompts in time_filter.py\nTIME_FILTER_PROMPT = \"\"\"\nYou are a tool to identify time filters to apply to a user query for a downstream search \\\napplication. The downstream application is able to use a recency bias or apply a hard cutoff to \\\nremove all documents before the cutoff. Identify the correct filters to apply for the user query.\n\nThe current day and time is {current_day_time_str}.\n\nAlways answer with ONLY a json which contains the keys \"filter_type\", \"filter_value\", \\\n\"value_multiple\" and \"date\".\n\nThe valid values for \"filter_type\" are \"hard cutoff\", \"favors recent\", or \"not time sensitive\".\nThe valid values for \"filter_value\" are \"day\", \"week\", \"month\", \"quarter\", \"half\", or \"year\".\nThe valid values for \"value_multiple\" is any number.\nThe valid values for \"date\" is a date in format MM/DD/YYYY, ALWAYS follow this format.\n\"\"\".strip()\n\n\n# Smaller followup prompts in source_filter.py\n# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains \"web\" but not\n# \"confluence\" and the user asks for confluence related things, the LLM will select \"web\" since\n# confluence is accessed as a website. This cannot be fixed without also reducing the capability\n# to match things like repository->github, website->web, etc.\n# This is generally not a big issue though as if the company has confluence, hopefully they add\n# a connector for it or the user is aware that confluence has not been added.\nSOURCE_FILTER_PROMPT = f\"\"\"\nGiven a user query, extract relevant source filters for use in a downstream search tool.\nRespond with a json containing the source filters or null if no specific sources are referenced.\nONLY extract sources when the user is explicitly limiting the scope of where information is \\\ncoming from.\nThe user may provide invalid source filters, ignore those.\n\nThe valid sources are:\n{{valid_sources}}\n{{web_source_warning}}\n{{file_source_warning}}\n\n\nALWAYS answer with ONLY a json with the key \"{SOURCES_KEY}\". \\\nThe value for \"{SOURCES_KEY}\" must be null or a list of valid sources.\n\nSample Response:\n{{sample_response}}\n\"\"\".strip()\n\nWEB_SOURCE_WARNING = \"\"\"\nNote: The \"web\" source only applies to when the user specifies \"website\" in the query. \\\nIt does not apply to tools such as Confluence, GitHub, etc. that have a website.\n\"\"\".strip()\n\nFILE_SOURCE_WARNING = \"\"\"\nNote: The \"file\" source only applies to when the user refers to uploaded files in the query.\n\"\"\".strip()\n\n\n# Use the following for easy viewing of prompts\nif __name__ == \"__main__\":\n    print(TIME_FILTER_PROMPT)\n    print(\"------------------\")\n    print(SOURCE_FILTER_PROMPT)\n"
  },
  {
    "path": "backend/onyx/prompts/image_analysis.py",
    "content": "# Used for creating embeddings of images for vector search\nDEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT = \"\"\"\nYou are an assistant for summarizing images for retrieval.\nSummarize the content of the following image and be as precise as possible.\nThe summary will be embedded and used to retrieve the original image.\nTherefore, write a concise summary of the image that is optimized for retrieval.\n\"\"\"\n\n# Prompt for generating image descriptions with filename context\nDEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT = \"\"\"\nDescribe precisely and concisely what the image shows.\n\"\"\"\n\n\n# Used for analyzing images in response to user queries at search time\nDEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT = (\n    \"You are an AI assistant specialized in describing images.\\n\"\n    \"You will receive a user question plus an image URL. Provide a concise textual answer.\\n\"\n    \"Focus on aspects of the image that are relevant to the user's question.\\n\"\n    \"Be specific and detailed about visual elements that directly address the query.\\n\"\n)\n"
  },
  {
    "path": "backend/onyx/prompts/kg_prompts.py",
    "content": "# Standards\nSEPARATOR_LINE = \"-------\"\nSEPARATOR_LINE_LONG = \"---------------\"\nNO_EXTRACTION = \"No extraction of knowledge graph objects was feasible.\"\nYES = \"yes\"\nNO = \"no\"\n\n# Framing/Support/Template Prompts\nENTITY_TYPE_SETTING_PROMPT = f\"\"\"\n{SEPARATOR_LINE}\n{{entity_types}}\n{SEPARATOR_LINE}\n\"\"\".strip()\n\nRELATIONSHIP_TYPE_SETTING_PROMPT = f\"\"\"\nHere are the types of relationships:\n{SEPARATOR_LINE}\n{{relationship_types}}\n{SEPARATOR_LINE}\n\"\"\".strip()\n\nEXTRACTION_FORMATTING_PROMPT = r\"\"\"\n{{\"entities\": [<a list of entities of the prescribed entity types that you can reliably identify in the text, \\\nformatted as '<ENTITY_TYPE_NAME>::<entity_name>' (please use that capitalization). If allowed options \\\nare provided above, you can only extract those types of entities! Again, there should be an 'Other' \\\noption. Pick this if none of the others apply.>],\n\"relationships\": [<a list of IMPORTANT relationships between the identified entities, formatted as \\\n'<SOURCE_ENTITY_TYPE_NAME>::<source_entity_name>__<a word or two that captures the nature \\\nof the relationship (if appropriate, include a judgment, as in 'likes' or 'dislikes' vs. 'uses', etc.). \\\nCommon relationships may be: 'likes', 'dislikes', 'uses', 'is interested in', 'mentions', 'addresses', \\\n'participates in', etc., but look at the text to find the most appropriate relationship. \\\nUse spaces here for word separation. DO NOT INCLUDE RELATIONSHIPS THAT ARE SIMPLY MENTIONED, BUT ONLY \\\nTHOSE THAT ARE CENTRAL TO THE CONTENT! >\\\n__<TARGET_ENTITY_TYPE_NAME>::<target_entity_name>'>],\n\"terms\": [<a comma-separated list of high-level terms (each one one or two words) that you can reliably \\\nidentify in the text, each formatted simply as '<term>'>]\n}}\n\"\"\".strip()\n\nQUERY_ENTITY_EXTRACTION_FORMATTING_PROMPT = r\"\"\"\n{{\"entities\": [<a list of entities of the prescribed entity types that you can reliably identify in the text, \\\nformatted as '<ENTITY_TYPE_NAME>::<entity_name>' (please use that capitalization)>. Each entity \\\nalso should be followed by a list of comma-separated attribute filters for the entity, if referred to in the \\\nquestion for that entity. CRITICAL: you can only use attributes that are mentioned above for the \\\nentity type in question. Example: 'ACCOUNT::* -- [account_type: customer, status: active]' if the question is \\\n'list all customer accounts', and ACCOUNT was an entity type with these attribute key/values allowed.] \\\n\"time_filter\": <if needed, a SQL-like filter for a field called 'event_date'. Do not select anything here \\\nunless you are sure that the question asks for that filter. Only apply a time_filter if the question explicitly \\\nmentions a specific date, time period, or event that can be directly translated into a date filter. Do not assume \\\nthe current date, if given, as the event date or to imply that it should be a filter. Do not make assumptions here \\\nbut only use the information provided to infer whether there should be a time_filter, and if so, what it should be.>\n}}\n\"\"\".strip()\n\nQUERY_RELATIONSHIP_EXTRACTION_FORMATTING_PROMPT = r\"\"\"\n{{\"relationships\": [<a list of relationships between the identified entities, formatted as \\\n'<SOURCE_ENTITY_TYPE_NAME>::<source_entity_name>__<a word or two that captures the nature \\\nof the relationship (if appropriate, include a judgment, as in 'likes' or 'dislikes' vs. 'uses', etc.)>\\\n__<TARGET_ENTITY_TYPE_NAME>::<target_entity_name>'>]\n}}\n\"\"\".strip()\n\nEXAMPLE_1 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike\", \"CONCERN::*\"],\n    \"relationships\": [\"ACCOUNT::Nike__had__CONCERN::*\"], \"terms\": []}}\n\"\"\".strip()\n\nEXAMPLE_2 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike\", \"CONCERN::performance\"],\n    \"relationships\": [\"ACCOUNT::*__had_issues__CONCERN::performance\"], \"terms\": [\"performance issue\"]}}\n\"\"\".strip()\n\nEXAMPLE_3 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike\", \"CONCERN::performance\", \"CONCERN::user_experience\"],\n    \"relationships\": [\"ACCOUNT::Nike__had__CONCERN::performance\",\n                      \"ACCOUNT::Nike__solved__CONCERN::user_experience\"],\n    \"terms\": [\"performance\", \"user experience\"]}}\n\"\"\".strip()\n\nEXAMPLE_4 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike\", \"FEATURE::dashboard\", \"CONCERN::performance\"],\n    \"relationships\": [\"ACCOUNT::Nike__had__CONCERN::performance\",\n                      \"ACCOUNT::Nike__had_issues__FEATURE::dashboard\",\n                      \"ACCOUNT::NIKE__gets_value_from__FEATURE::dashboard\"],\n    \"terms\": [\"value\", \"performance\"]}}\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_1 = r\"\"\"\n'Which issues did Nike report?' and the extracted entities were found to be:\n\n  \"ACCOUNT::Nike\", \"CONCERN::*\"\n\nthen a valid relationship extraction could be:\n\n{{\"relationships\": [\"ACCOUNT::Nike__had__CONCERN::*\"]}}\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_2 = r\"\"\"\n'Did Nike say anything about performance issues?' and the extracted entities were found to be:\n\n\"ACCOUNT::Nike\", \"CONCERN::performance\"\n\nthen a much more suitable relationship extraction could be:\n{{\"relationships\": [\"ACCOUNT::*__had_issues__CONCERN::performance\"]}}\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_3 = r\"\"\"\n'Did Nike report some performance issues with our solution? And were they happy that the user experience issue got solved?', \\\nand the extracted entities were found to be:\n\n\"ACCOUNT::Nike\", \"CONCERN::performance\", \"CONCERN::user_experience\"\n\nthen a valid relationship extraction could be:\n\n{{\"relationships\": [\"ACCOUNT::Nike__had__CONCERN::performance\",\n                      \"ACCOUNT::Nike__solved__CONCERN::user_experience\"]}}\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_4 = r\"\"\"\n'Nike reported some performance issues with our dashboard solution, but do they think it delivers great value nevertheless?' \\\nand the extracted entities were found to be:\n\n\"ACCOUNT::Nike\", \"FEATURE::dashboard\", \"CONCERN::performance\"\n\nthen a valid relationship extraction could be:\nExample 4:\n\n{{\"relationships\": [\"ACCOUNT::Nike__had__CONCERN::performance\",\n                      \"ACCOUNT::Nike__had_issues__FEATURE::dashboard\",\n                      \"ACCOUNT::NIKE__gets_value_from__FEATURE::dashboard\"]}}\n\nExplanation:\n - Nike did report performance concerns\n - Nike had problems with the dashboard, which is a feature\n - We are interested in the value relationship between Nike and the dashboard feature\n\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_5 = r\"\"\"\n'In which emails did Nike discuss their issues with the dashboard?' \\\nand the extracted entities were found to be:\n\n\"ACCOUNT::Nike\", \"FEATURE::dashboard\", \"EMAIL::*\"\n\nthen a valid relationship extraction could be:\n\n{{\"relationships\": [\"ACCOUNT::Nike__had__CONCERN::*\",\n                      \"ACCOUNT::Nike__had_issues__FEATURE::dashboard\",\n                      \"ACCOUNT::NIKE__in__EMAIL::*\",\n                      \"EMAIL::*__discusses__FEATURE::dashboard\",\n                      \"EMAIL::*Nike__had__CONCERN::* \"]}}\nExplanation:\n - Nike did report unspecified concerns\n - Nike had problems with the dashboard, which is a feature\n - We are interested in emails that Nike exchanged with us\n\"\"\".strip()\n\nRELATIONSHIP_EXAMPLE_6 = r\"\"\"\n'List the last 5 emails that Lisa exchanged with Nike:' \\\nand the extracted entities were found to be:\n\n\"ACCOUNT::Nike\", \"EMAIL::*\", \"EMPLOYEE::Lisa\"\n\nthen a valid relationship extraction could be:\n\n{{\"relationships\": [\"ACCOUNT::Nike__had__CONCERN::*\",\n                      \"ACCOUNT::Nike__had_issues__FEATURE::dashboard\",\n                      \"ACCOUNT::NIKE__in__EMAIL::*\"]}}\nExplanation:\n - Nike did report unspecified concerns\n - Nike had problems with the dashboard, which is a feature\n - We are interested in emails that Nike exchanged with us\n\"\"\".strip()\n\n\nENTITY_EXAMPLE_1 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike--[]\", \"CONCERN::*--[]\"]}}\n\"\"\".strip()\n\nENTITY_EXAMPLE_2 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::Nike--[]\", \"CONCERN::performance--[]\"]}}\n\"\"\".strip()\n\nENTITY_EXAMPLE_3 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::*--[]\", \"CONCERN::performance--[]\", \"CONCERN::user_experience--[]\"]}}\n\"\"\".strip()\n\nENTITY_EXAMPLE_4 = r\"\"\"\n{{\"entities\": [\"ACCOUNT::*--[]\", \"CONCERN::performance--[degree: severe]\"]}}\n\"\"\".strip()\n\nMASTER_EXTRACTION_PROMPT = f\"\"\"\nYou are an expert in the area of knowledge extraction in order to construct a knowledge graph. You are given a text \\\nand asked to extract entities, relationships, and terms from it that you can reliably identify.\n\nHere are the entity types that are available for extraction. Some of them may have a description, others \\\nshould be obvious. Also, for a given entity allowed options may be provided. If allowed options are provided, \\\nyou can only extract those types of entities! If no allowed options are provided, take your best guess.\n\nYou can ONLY extract entities of these types and relationships between objects of these types:\n{SEPARATOR_LINE}\n{ENTITY_TYPE_SETTING_PROMPT}\n{SEPARATOR_LINE}\nPlease format your answer in this format:\n{SEPARATOR_LINE}\n{EXTRACTION_FORMATTING_PROMPT}\n{SEPARATOR_LINE}\n\nThe list above here is the exclusive, only list of entities you can choose from!\n\nHere are some important additional instructions. (For the purpose of illustration, assume that ]\n \"ACCOUNT\", \"CONCERN\", and \"FEATURE\" are all in the list of entity types above, and shown actual \\\nentities fall into allowed options. Note that this \\\nis just assumed for these examples, but you MUST use only the entities above for the actual extraction!)\n\n- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.\n* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.\nAs an example, if the text would say:\n 'Nike reported that they had issues'\nthen a valid extraction could be:\nExample 1:\n{EXAMPLE_1}\n\n* If on the other hand the text would say:\n'Nike reported that they had performance issues'\nthen a much more suitable extraction could be:\nExample 2:\n{EXAMPLE_2}\n\n- You can extract multiple relationships between the same two entity types.\nAs an example, if the text would say:\n'Nike reported some performance issues with our solution, but they are very happy that the user experience issue got solved.'\nthen a valid extraction could be:\nExample 3:\n{EXAMPLE_3}\n\n- You can extract multiple relationships between the same two actual entities if you think that \\\nthere are multiple relationships between them based on the text.\nAs an example, if the text would say:\n'Nike reported some performance issues with our dashboard solution, but they think it delivers great value.'\nthen a valid extraction could be:\nExample 4:\n{EXAMPLE_4}\n\nNote that effectively a three-way relationship (Nike - performance issues - dashboard) extracted as two individual \\\nrelationships.\n\n- Again,\n   -  you should only extract entities belonging to the entity types above - but do extract all that you \\\ncan reliably identify in the text\n   - use refer to 'all' entities in an entity type listed above by using '*' as the entity name\n   - only extract important relationships that signify something non-trivial, expressing things like \\\nneeds, wants, likes, dislikes, plans, interests, lack of interests, problems the account is having, etc.\n   - you MUST only use the initial list of entities provided! Ignore the entities in the examples unless \\\nthey are also part of the initial list of entities! This is essential!\n   - only extract relationships between the entities extracted first!\n\n\n{SEPARATOR_LINE}\n\nHere is the text you are asked to extract knowledge from, if needed with additional information about any participants:\n{SEPARATOR_LINE}\n---content---\n{SEPARATOR_LINE}\n\"\"\".strip()\n\n\nQUERY_ENTITY_EXTRACTION_PROMPT = f\"\"\"\nYou are an expert in the area of knowledge extraction and using knowledge graphs. You are given a question \\\nand asked to extract entities (with attributes if applicable) that you can reliably identify, which will then\nbe matched with a known entity in the knowledge graph. You are also asked to extract time constraints information \\\nfrom the QUESTION. Some time constraints will be captured by entity attributes if \\\nthe entity type has a fitting attribute (example: 'created_at' could be a candidate for that), other times\nwe will extract an explicit time filter if no attribute fits. (Note regarding 'last', 'first', etc.: DO NOT \\\nimply the need for a time filter just because the question asks for something that is not the current date. \\\nThey will relate to ordering that we will handle separately later).\n\nIn case useful, today is ---today_date--- and the user asking is ---user_name---, which may or may not be relevant.\nHere are the entity types that are available for extraction. Some of them may have \\\na description, others should be obvious. Also, notice that some may have attributes associated with them, which will \\\nbe important later.\nYou can ONLY extract entities of these types:\n{SEPARATOR_LINE}\n{ENTITY_TYPE_SETTING_PROMPT}\n{SEPARATOR_LINE}\n\nThe list above here is the exclusive, only list of entities you can choose from!\n\nAlso, note that there are fixed relationship types between these entities. Please consider those \\\nas well so to make sure that you are not missing implicit entities! Implicit entities are often \\\nin verbs ('emailed to', 'talked to', ...). Also, they may be used to connect entities that are \\\nclearly in the question.\n\n{SEPARATOR_LINE}\n{RELATIONSHIP_TYPE_SETTING_PROMPT}\n{SEPARATOR_LINE}\n\nHere are some important additional instructions. (For the purpose of illustration, assume that \\\n \"ACCOUNT\", \"CONCERN\", \"EMAIL\", and \"FEATURE\" are all in the list of entity types above, and the \\\nattribute options for \"CONCERN\" include 'degree' with possible values that include 'severe'. Note that this \\\nis just assumed for these examples, but you MUST use only the entities above for the actual extraction!)\n\n- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.\n* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.\nAs an example, if the question would say:\n 'Which issues did Nike report?'\nthen a valid entity and term extraction could be:\nExample 1:\n{ENTITY_EXAMPLE_1}\n\n* If on the other hand the question would say:\n'Did Nike say anything about performance issues?'\nthen a much more suitable entity and term extraction could be:\nExample 2:\n{ENTITY_EXAMPLE_2}\n\n* Then, if the question is:\n'Who reported performance issues?'\nthen a suitable entity and term extraction could be:\nExample 3:\n{ENTITY_EXAMPLE_3}\n\n* Then, if we inquire about an entity with a specific attribute :\n'Who reported severe performance issues?'\nthen a suitable entity and term extraction could be:\nExample 3:\n{ENTITY_EXAMPLE_4}\n\n- Again,\n   -  you should only extract entities belonging to the entity types above - but do extract all that you \\\ncan reliably identify in the text\n   - if you refer to all/any/an unspecified entity of an entity type listed above, use '*' as the entity name\n   - similarly, if a specific entity type is referred to in general, you should use '*' as the entity name\n   - you MUST only use the initial list of entities provided! Ignore the entities in the examples unless \\\nthey are also part of the initial list of entities! This is essential!\n   - don't forget to provide answers also to the event filtering and whether documents need to be inspected!\n   - 'who' often refers to individuals or accounts.\n   - see whether any of the entities are supposed to be narrowed down by an attribute value. The precise attribute \\\nand the value would need to be taken from the specification, as the question may use different words and the \\\nactual attribute may be implied.\n   - don't just look at the entities that are mentioned in the question but also those that the question \\\nmay be about.\n  - be very careful that you only extract attributes that are listed above for the entity type in question! Do \\\nnot make up attributes even if they are implied! Particularly if there is a relationship type that would \\\nactually represent that information, you MUST not extract the information as an attribute. We \\\nwill extract the relationship type later.\n  - For the values of attributes, look at the possible values above! For example 'open' may refer to \\\n'backlog', 'todo', 'in progress', etc. In cases like that construct a ';'-separated list of values that you think may fit \\\nwhat is implied in the question (in the exanple: 'open; backlog; todo; in progress').\n\nAlso, if you think the name or the title of an entity is given but name or title are not mentioned \\\nexplicitly as an attribute, then you should indeed extract the name/title as the entity name.\n\n{SEPARATOR_LINE}\n\nHere is the question you are asked to extract desired entities and time filters from:\n{SEPARATOR_LINE}\n---content---\n{SEPARATOR_LINE}\n\nPlease format your answer in this format:\n{SEPARATOR_LINE}\n{QUERY_ENTITY_EXTRACTION_FORMATTING_PROMPT}\n{SEPARATOR_LINE}\n\n\"\"\".strip()\n\n\nQUERY_RELATIONSHIP_EXTRACTION_PROMPT = f\"\"\"\nYou are an expert in the area of knowledge extraction and using knowledge graphs. You are given a question \\\nand previously you were asked to identify known entities in the question. Now you are asked to extract \\\nthe relationships between the entities you have identified earlier.\n\nFirst off as background, here are the entity types that are known to the system:\n{SEPARATOR_LINE}\n---entity_types---\n{SEPARATOR_LINE}\n\n\nHere are the entities you have identified earlier:\n{SEPARATOR_LINE}\n---identified_entities---\n{SEPARATOR_LINE}\n\nNote that the notation for the entities is <ENTITY_TYPE>::<ENTITY_NAME>.\n\nHere are the options for the relationship types(!) between the entities you have identified earlier \\\nas well as relationship types between the identified entities and other entities \\\nnot explicitly mentioned:\n{SEPARATOR_LINE}\n---relationship_type_options---\n{SEPARATOR_LINE}\n\nThese types are, if any were identified, formatted as \\\n<SOURCE_ENTITY_TYPE>__<RELATIONSHIP_SHORTHAND>__<TARGET_ENTITY_TYPE>, and they \\\nlimit the allowed relationships that you can extract. You would then though use the actual full entities as in:\n\n<SOURCE_ENTITY_TYPE>::<SOURCE_ENTITY_NAME>__<RELATIONSHIP_SHORTHAND>__<TARGET_ENTITY_TYPE>::<TARGET_ENTITY_NAME>.\n\nNote: <RELATIONSHIP_SHORTHAND> should be a word or two that captures the nature \\\nof the relationship. Common relationships may be: 'likes', 'dislikes', 'uses', 'is interested in', 'mentions', \\\n'addresses', 'participates in', etc., but look at the text to find the most appropriate relationship. \\\nUse spaces here for word separation.\n\nPlease format your answer in this format:\n{SEPARATOR_LINE}\n{QUERY_RELATIONSHIP_EXTRACTION_FORMATTING_PROMPT}\n{SEPARATOR_LINE}\n\nThe list above here is the exclusive, only list of entities and relationship types you can choose from!\n\nHere are some important additional instructions. (For the purpose of illustration, assume that ]\n \"ACCOUNT\", \"CONCERN\", and \"FEATURE\" are all in the list of entity types above. Note that this \\\nis just assumed for these examples, but you MUST use only the entities above for the actual extraction!)\n\n- You can either extract specific entities if a specific entity is referred to, or you can refer to the entity type.\n* if the entity type is referred to in general, you would use '*' as the entity name in the extraction.\n\nAs an example, if the question would say:\n\n{RELATIONSHIP_EXAMPLE_1}\n\n* If on the other hand the question would say:\n\n{RELATIONSHIP_EXAMPLE_2}\n\n- You can extract multiple relationships between the same two entity types.\nFor example 3, if the question would say:\n\n{RELATIONSHIP_EXAMPLE_3}\n\n- You can extract multiple relationships between the same two actual entities if you think that \\\nthere are multiple relationships between them based on the question.\nAs an example, if the question would say:\n\n{RELATIONSHIP_EXAMPLE_4}\n\nNote that effectively a three-way relationship (Nike - performance issues - dashboard) extracted as two individual \\\nrelationships.\n\n- Again,\n   - you can only extract relationships between the entities extracted earlier\n   - you can only extract the relationships that match the listed relationship types\n   - if in doubt and there are multiple relationships between the same two entities, you can extract \\\nall of those that may fit with the question.\n   - be really thinking through the question which type of relationships should be extracted and which should not.\n\nOther important notes:\n - For questions that really try to explore in general what a certain entity was involved in like 'what did Paul Smith do \\\nin the last 3 months?', and Paul Smith has been extracted i.e. as an entity of type 'EMPLOYEE', then you need to extract \\\nall of the possible relationships an empoyee Paul Smith could have.\n - You are not forced to use all or any of the relationship types listed above. Really look at the question to \\\n determine which relationships are explicitly or implicitly referred to in the question.\n\n{SEPARATOR_LINE}\n\nHere is the question you are asked to extract desired entities, relationships, and terms from:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\"\"\".strip()\n\n\nGENERAL_CHUNK_PREPROCESSING_PROMPT = \"\"\"\nThis is a part of a document that you need to extract information (entities, relationships) from.\n\nNote: when you extract relationships, please make sure that:\n  - if you see a relationship for one of our employees, you should extract the relationship both for the employee AND \\\n    VENDOR::{vendor}.\n  - if you see a relationship for one of the representatives of other accounts, you should extract the relationship \\\nonly for the account ACCOUNT::<account_name>!\n\n--\nAnd here is the content:\n{content}\n\"\"\".strip()\n\n\n### Source-specific prompts\n\nCALL_CHUNK_PREPROCESSING_PROMPT = \"\"\"\nThis is a call between employees of the VENDOR's company and representatives of one or more ACCOUNTs (usually one). \\\nWhen you extract information based on the instructions, please make sure that you properly attribute the information \\\nto the correct employee and account. \\\n\nHere are the participants (name component of email) from us ({vendor}):\n{participant_string}\n\nHere are the participants (name component of email) from the other account(s):\n{account_participant_string}\n\nIn the text it should be easy to associate a name with the email, and then with the account ('us' vs 'them'). If in doubt, \\\nlook at the context and try to identify whether the statement comes from the other account. If you are not sure, ignore.\n\nNote: when you extract relationships, please make sure that:\n  - if you see a relationship for one of our employees, you should extract the relationship both for the employee AND \\\n    VENDOR::{vendor}.\n  - if you see a relationship for one of the representatives of other accounts, you should extract the relationship \\\nonly for the account ACCOUNT::<account_name>!\n\n--\nAnd here is the content:\n{content}\n\"\"\".strip()\n\n\nCALL_DOCUMENT_CLASSIFICATION_PROMPT = \"\"\"\nThis is the beginning of a call between employees of the VENDOR's company ({vendor}) and other participants.\n\nYour task is to classify the call into one of the following categories:\n{category_options}\n\nPlease also consider the participants when you perform your classification task - they can be important indicators \\\nfor the category.\n\nPlease format your answer as a string in the format:\n\nREASONING: <your reasoning for the classification> - CATEGORY: <the category you have chosen. Only use {category_list}>\n\n--\nAnd here is the beginning of the call, including title and participants:\n\n{beginning_of_call_content}\n\"\"\".strip()\n\n\nSTRATEGY_GENERATION_PROMPT = f\"\"\"\nNow you need to decide what type of strategy to use to answer a given question, how ultimately \\\nthe answer should be formatted to match the user's expectation, and what an appropriate question \\\nto/about 'one object or one set of objects' may be, should the answer logically benefit from a divide \\\nand conquer strategy, or it naturally relates to one or few individual objects. Also, you are \\\nsupposed to determine whether a divide and conquer strategy would be appropriate.\n\n\nHere are the entity types that are available in the knowledge graph:\n{SEPARATOR_LINE}\n---possible_entities---\n{SEPARATOR_LINE}\n\nHere are the relationship types that are available in the knowledge graph:\n{SEPARATOR_LINE}\n---possible_relationships---\n{SEPARATOR_LINE}\n\nHere is the question whose answer is ultimately sought:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nAnd here are the entities and relationships that have been extracted earlier from this question:\n{SEPARATOR_LINE}\n---entities---\n---relationships---\n{SEPARATOR_LINE}\n\nHere are more instructions:\n\na) Regarding the strategy, there are three aspects to it:\n\na1) \"Search Type\":\nShould the question be answered as a SEARCH ('filtered search'), or as a SQL ('SQL query search')?\n\nThe options are:\n1. SEARCH: A filtered search simply uses the entities and relationships that you extracted earlier and \\\napplies them as filters to search the underlying documents, which are properly indexed. Examples are \\\n'what did Nike say about the Analyzer product?', or 'what did I say in my calls with Nike about pricing?'. So this \\\nis used really when there is *no implicit or explicit constraint or requirements* on underlying source documents \\\noutside of filters, and there is no ordering, no limiting their number, etc. So use this for a question that \\\ntries to get information *across* documents which may be filtered by their related relationships and entities, but without \\\nother constraints.\n\n2. SQL: Choose this option if the question either requires counting of entities (e.g. 'how many calls...'), or \\\nif the query refers to specific entities that first need to be identified and then analyzed/searched/listed. \\\nExamples here are 'what did I say about pricing in my call with Nike last week?' (the specific call needs to \\\nbe identified first and then analyzed),  \\\n'what are the next steps of our two largest opportunities?', or 'summarize my 3 most recent customer calls'. So \\\nthis is used if there *are implicit constraints* on the underlying source documents beyond filtering, including \\\nordering, limiting, etc. Use this also if the answer expects to analyze each source independently as part \\\nof the overall answer.\n\nNote:\n - here, you should look at the extracted entities and relationships and judge whether using them as filters \\\n(using an *and*) would be appropriate to identify the range of underlying sources, or whether more \\\ncalculations would be needed to find the underlying sources ('last 2...', etc.) .\n - It is also *critical* to look at the attributes of the entities! You only can use the given attributes (and their\n values, if given) as where conditions etc in a SQL statement. So if you think you would 'want\n to' have a where condition but there is not appropriate attribute, then you should not use the SQL strategy\n but the SEARCH strategy. (A Search can always look through data and see what is the best fit, SQL needs to\n be more specific.). On the other hand, if the question maps well to the entities and attributes, then\n SQL may be a good choice.\n - Likely, if there are questions 'about something', then this only is used in a SQL statement or a filter \\\n if it shows up as an entity or relationship in the extracted entities and relationships. Otherwise, it will \\\n be part of the analysis/search. not the document identification.\n - again, note that we can only FILTER (SEARCH) or COMPUTE (SQL) using the extracted entities (and their attributes)\n and relationships. \\\n So do not think that if there is another term in the question, it should be included in the SQL statement. \\\n It cannot.\n\n\na2) \"Search Strategy\":\nIf a SQL search is chosen, i.e., documents have to be identified first, there are two approaches:\n1. SIMPLE: You think you can answer the question using a database that is aware of the entities, relationships \\\nabove, and is generally suitable if it is enough to either list or count entities, return dates, etc. Usually, \\\n'SIMPLE' is chosen for questions of the form 'how many...' (always), or 'list the...' (often), 'when was...', \\\n'what did (someone) work on...'etc. Often it is also used in cases like 'what did John work on since April?'. Here, \\\nthe user would expect to just see the list. So chose 'SIMPLE' here unless there are REALLY CLEAR \\\nfollow-up instructions for each item (like 'summarize...' , 'analyze...', 'what are the main points of...'.) If \\\nit is a 'what did...'-type question, choose 'SIMPLE'!\n\n2. DEEP: You think you really should ALSO leverage the actual text of sources to answer the question, which sits \\\nin a vector database. Examples are 'what is discussed in...', 'summarize', 'what is the discussion about...',\\\n'how does... relate to...', 'are there any mentions of... in..', 'what are the main points in...', \\\n'what are the next steps...', etc. Those are usually questions 'about' \\\nthe entities retrieved from the knowledge graph, or questions about the underlying sources.\n\nYour task is to decide which of the two strategies to use.\n\na3) \"Relationship Detection\":\nYou need to evaluate whether the question involves any relationships between entities (of the same type) \\\nor between entities and relationships.  Respond with 'RELATIONSHIPS' or 'NO_RELATIONSHIPS'.\n\nb) Regarding the format of the answer: there are also two types of formats available to you:\n\n1. LIST: The user would expect an answer as a bullet point list of objects, likely with text associated with each \\\nbullet point (or sub-bullet). This will be clearer once the data is available.\n2. TEXT: The user would expect the questions to be answered in text form.\n\nYour task is to decide which of the two formats to use.\n\n\nc) Regarding the broken down question for one object:\n\nAlways generate a broken_down_question if the question pertains ultimately to a specific objects, even if it seems to be \\\na singular object.\n\n- If the question is of type 'how many...', or similar, then imagine that the individual objects have been \\\nfound and you want to ask each object something that illustrates why/in what what that object relates to the \\\nquestion. (question: 'How many cars are fast?' -> broken_down_question: 'How fast is this car?')\n\n- Assume the answer would either i) best be generated by first analyzing one object at a time, then aggregating \\\nthe results, or ii) directly relates to one or few objects found through matching suitable criteria.\n\n- The key is to drop any filtering/criteria matching as the objects are already filtered by the criteria. Also, do not \\\ntry to verify here whether the object in question actually satisfies a filter criteria, but rather see \\\nwhat it says/does etc. In other words, use this to identify more details about the object, as it relates \\\nto the original question.\n(Example: question: 'What did our oil & gas customers say about the new product?' -> broken_down_question: \\\n'What did this customer say about the new product?',\nor:\nquestion: 'What was in the email from Frank?' -> broken_down_question: 'What is in this email?')\n\n\nd) Regarding the divide and conquer strategy:\n\nYou are supposed to decide whether a divide and conquer strategy would be appropriate. That means, do you think \\\nthat in order to answer the question, it would be good to first analyze one object at a time, and then aggregate the \\\nresults? Or should the information rather be analyzed as a whole? This would be 'yes' or 'no'.\n\nPlease answer in json format in this form:\n\n{{\n    \"search_type\": <see search-type instructions above, answer with \"SEARCH\" or \"SQL\">,\n    \"search_strategy\": <see search-strategy instructions above, answer with \"DEEP\" or \"SIMPLE\">,\n    \"relationship_detection\": <see relationship-detection instructions above, answer with \"RELATIONSHIPS\" or \"NO_RELATIONSHIPS\">,\n    \"format\": <see format instructions above, answer with \"LIST\" or \"TEXT\">,\n    \"broken_down_question\": <see broken-down-question instructions above, answer with the question \\\nthat should be used to analyze each object/each source (or 'the object' that fits all criteria).>,\n    \"divide_and_conquer\": <see divide-and-conquer instructions above, answer with \"yes\" or \"no\">\n}}\n\nDo not include any other text or explanations.\n\"\"\"\n\nSOURCE_DETECTION_PROMPT = f\"\"\"\nYou are an expert in generating, understanding and analyzing SQL statements.\n\nYou are given an original SQL statement that returns a list of entities from a table or \\\nan aggregation of entities from a table. Your task will be to \\\nidentify the source documents that are relevant to what the SQL statement is returning.\n\nThe task is actually quite simple. There are two tables involved - relationship_table and entity_table. \\\nrelationship_table was used to generate the original SQL statement. Again, returning entities \\\nor aggregations of entities. The second table, entity_table contains the entities and \\\nthe corresponding source_documents. All you need to do is to appropriately join the \\\nentity_table table on the entities that would be retrieved from the original SQL statement, \\\nand then return the source_documents from the entity_table table.\n\nFor your orientation, the relationship_table table has this structure:\n - Table name: relationship_table\n - Columns:\n   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \\\nIt is of the form \\\n<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \\\n[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!\n   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]\n   - source_entity_attributes (json): the attributes of the source entity/node [example: {{\"account_type\": \"customer\"}}]\n   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]\n   - target_entity_attributes (json): the attributes of the target entity/node [example: {{\"degree\": \"severe\"}}]\n   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \\\n   below are valid.\n   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \\\n   below are valid.\n   - relationship_type (str): the type of the relationship, formatted as  \\\n<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \\\nbeen removed. [example: ACCOUNT__has__CONCERN]\n   - source_date (str): the 'event' date of the source document [example: 2021-01-01]\n\nThe second table, entity_table, has this structure:\n - Table name: entity_table\n - Columns:\n   - entity (str): The name of the ENTITY, which is unique in this table. source_entity and target_entity \\\nin the relationship_table table are the same as entity in this table.\n   - source_document (str): the id of the document that contains the entity.\n\nAgain, ultimately, your task is to join the entity_table table on the entities that would be retrieved from the \\\noriginal SQL statement, and then return the source_documents from the entity_table table.\n\nThe way to do that is to create a common table expression for the original SQL statement and join the \\\nentity_table table suitably on the entities.\n\nHere is the *original* SQL statement:\n{SEPARATOR_LINE}\n---original_sql_statement---\n{SEPARATOR_LINE}\n\nPlease structure your answer using <reasoning>, </reasoning>,<sql>, </sql> start and end tags as in:\n\n<reasoning>[think very briefly through the problem step by step, not more than 2-3 sentences]</reasoning> \\\n<sql>[the new SQL statement that returns the source documents involved in the original SQL statement]</sql>\n\"\"\".strip()\n\nENTITY_SOURCE_DETECTION_PROMPT = f\"\"\"\nYou are an expert in generating, understanding and analyzing SQL statements.\n\nYou are given a SQL statement that returned an aggregation of entities in a table. \\\nYour task will be to identify the source documents for the entities involved in \\\nthe answer. For example, should the original SQL statement be \\\n'SELECT COUNT(entity) FROM entity_table where entity_type = \"ACCOUNT\"' \\\nthen you should return the source documents that contain the entities of type 'ACCOUNT'.\n\nThe table has this structure:\n - Table name: entity_table\n - Columns:\n   - entity (str): The name of the ENTITY, combining the nature of the entity and the id of the entity. \\\nIt is of the form <entity_type>::<entity_name> [example: ACCOUNT::625482894].\n   - entity_type (str): the type of the entity [example: ACCOUNT].\n   - entity_attributes (json): the attributes of the entity [example: {{\"priority\": \"high\", \"status\": \"active\"}}]\n   - source_document (str): the id of the document that contains the entity. Note that the combination of \\\nid_name and source_document IS UNIQUE!\n   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]\n\nSpecifically, the table contains the 'source_document' column, which is the id of the source document that \\\ncontains the core information about the entity. Make sure that you do not return more documents, i.e. if there \\\nis a limit on source documents in the original SQL statement, the new SQL statement needs to have \\\nthe same limit.\n\nCRITICAL NOTES:\n - Only return source documents and nothing else!\n\nYour task is then to create a new SQL statement that returns the source documents that are relevant to what the \\\noriginal SQL statement is returning. So the source document of every row used in the original SQL statement should \\\nbe included in the result of the new SQL statement, and then you should apply a 'distinct'.\n\nHere is the *original* SQL statement:\n{SEPARATOR_LINE}\n---original_sql_statement---\n{SEPARATOR_LINE}\n\nPlease structure your answer using <reasoning>, </reasoning>,<sql>, </sql> start and end tags as in:\n\n<reasoning>[think very briefly through the problem step by step, not more than 2-3 sentences]</reasoning> \\\n<sql>[the new SQL statement that returns the source documents involved in the original SQL statement]</sql>\n\"\"\".strip()\n\n\nENTITY_TABLE_DESCRIPTION = f\"\"\"\\\n - Table name: entity_table\n - Columns:\n   - entity (str): The name of the ENTITY, combining the nature of the entity and the id of the entity. \\\nIt is of the form <entity_type>::<entity_name> [example: ACCOUNT::625482894].\n   - entity_type (str): the type of the entity [example: ACCOUNT].\n   - entity_attributes (json): the attributes of the entity [example: {{\"priority\": \"high\", \"status\": \"active\"}}]\n   - source_document (str): the id of the document that contains the entity. Note that the combination of \\\nid_name and source_document IS UNIQUE!\n   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]\n\n{SEPARATOR_LINE}\n\nImportantly, here are the entity (node) types that you can use, with a short description of what they mean. You may need to \\\nidentify the proper entity type through its description. Also notice the allowed attributes for each entity type and \\\ntheir values, if provided. Of particular importance is the 'subtype' attribute, if provided, as this is how \\\nthe entity type may also often be referred to.\n{SEPARATOR_LINE}\n---entity_types---\n{SEPARATOR_LINE}\n\"\"\"\n\nRELATIONSHIP_TABLE_DESCRIPTION = f\"\"\"\\\n - Table name: relationship_table\n - Columns:\n   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \\\nIt is of the form \\\n<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \\\n[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!\n   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]\n   - source_entity_attributes (json): the attributes of the source entity/node [example: {{\"account_type\": \"customer\"}}]\n   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]\n   - target_entity_attributes (json): the attributes of the target entity/node [example: {{\"degree\": \"severe\"}}]\n   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \\\n   below are valid.\n   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \\\n   below are valid.\n   - relationship_type (str): the type of the relationship, formatted as  \\\n<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \\\nbeen removed. [example: ACCOUNT__has__CONCERN]\n   - source_document (str): the id of the document that contains the relationship. Note that the combination of \\\nid_name and source_document IS UNIQUE!\n   - source_date (timestamp): the 'event' date of the source document [example: 2025-04-25 21:43:31.054741+00]\n\n{SEPARATOR_LINE}\n\nImportantly, here are the entity (node) types that you can use, with a short description of what they mean. You may need to \\\nidentify the proper entity type through its description. Also notice the allowed attributes for each entity type and \\\ntheir values, if provided. Of particular importance is the 'subtype' attribute, if provided, as this is how \\\nthe entity type may also often be referred to.\n{SEPARATOR_LINE}\n---entity_types---\n{SEPARATOR_LINE}\n\nHere are the relationship types that are in the table, denoted as <source_entity_type>__<relationship_type>__<target_entity_type>.\nIn the table, the actual relationships are not quite of this form, but each <entity_type> is followed by '::<entity_name>' \\\nin the relationship id as shown above.\n{SEPARATOR_LINE}\n---relationship_types---\n{SEPARATOR_LINE}\n\"\"\"\n\n\nSIMPLE_SQL_PROMPT = f\"\"\"\nYou are an expert in generating a SQL statement that only uses ONE TABLE that captures RELATIONSHIPS \\\nbetween TWO ENTITIES. The table has the following structure:\n\n{SEPARATOR_LINE}\n{RELATIONSHIP_TABLE_DESCRIPTION}\n\nHere is the question you are supposed to translate into a SQL statement:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nTo help you, we already have identified the entities and relationships that the SQL statement likely *should* use (but note the \\\nexception below!). The entities also contain the list of attributes and attribute values that should specify the entity. \\\nThe format is <entity_type>::<entity_name>--[<attribute_name_1>:<attribute_value_1>, \\\n<attribute_name_2>:<attribute_value_2>, ...].\n{SEPARATOR_LINE}\nIdentified entities with attributes in query:\n\n---query_entities_with_attributes---\n\nThese are the entities that should be used in the SQL statement. However, \\\nnote that these are the entities (with potential attributes) that were *matches* of Knowledge Graph identified with the \\\nentities originally identified in the original question. A such, they may have id names that may not mean much by themselves, \\\neg ACCOUNT::a74f332. Here is the mapping of entities originally identified (whose role in the query should be obvious) with \\\nthe entities that were matched to them in the Knowledge Graph:\n\n---entity_explanation_string---\n\n--\n\nHere are relationships that were identified as explicitly or implicitly referred to in the question:\n\n---query_relationships---\n\n(Again, if applicable, the entities contained in the relationships are the same as the entities in the \\\nquery_entities_with_attributes, and those are the correct ones to use in the SQL statement.)\n\n{SEPARATOR_LINE}\n\nCRITICAL SPECIAL CASE:\n  - if an identified entity is of the form <entity_type>::*, or an identified relationship contains an \\\nentity of this form, this refers to *any* entity of that type. Correspondingly, the SQL query should use the *entity type*, \\\nand possibly the relationship type, but not the entity with the * itself. \\\nExample: if you see 'ACCOUNT::*', that means any account matches. So if you are supposed to count the 'ACCOUNT::*', \\\nyou should count the entities of entity_type 'ACCOUNT'.\n\n\nIMPORTANT NOTES:\n- The id_name of each relationship has the format \\\n<source_entity_id_name>__<relationship_type>__<target_entity_id_name>.\n- The relationship id_names are NOT UNIQUE, only the combinations of relationship id_name and source_document_id are unique. \\\nThat is because each relationship is extracted from a document. So make sure you use the proper 'distinct's!\n- If the SQL contains a 'SELECT DISTINCT' clause and an ORDER BY clause, then you MUST include the columns from the ORDER BY \\\nclause ALSO IN THE SELECT DISTINCT CLAUSE! This is very important! (This is a postgres db., so this is a MUST!). \\\nYou MUST NOT have a column in the ORDER BY clause that is not ALSO in the SELECT DISTINCT clause!\n- If you join the relationship table on itself using the source_node or target_node, you need to make sure that you also \\\njoin on the source_document_id.\n- The id_name of each node/entity has the format <entity_type_id_name>::<name>, where 'entity_type_id_name' \\\nand 'name' are columns and \\\n  the values <entity_type_id_name> and <name> can be used for filtering.\n- The table can be joined on itself on source nodes and/or target nodes if needed.\n- the SQL statement MUST ultimately only return NODES/ENTITIES (not relationships!), or aggregations of \\\nentities/nodes(count, avg, max, min, etc.). \\\nAgain, DO NOT compose a SQL statement that returns id_name of relationships.\n- You CAN ONLY return ENTITIES or COUNTS (or other aggregations) of ENTITIES, or you can return \\\nsource_date (but only if the question asks for event dates or times). DO NOT return \\\nsource documents or counts of source documents, or relationships or counts of relationships! \\\nThose can only appear in where clauses, ordering etc., but they cannot be returned or ultimately \\\ncounted here! source_date and date operations can appear in select statements, particularly if \\\nthere is time ordering or grouping involved.\n- ENTITIES can be target_entity or source_entity. Think about the allowed relationships and the \\\nquestion to decide which one you want!\n- It is ok to generate nested SQL as long as it is correct postgres syntax!\n- Attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \\\n\"attributes ->> '<attribute>' = '<attribute value>'\".\n-  The SELECT clause MUST only contain entities or aggregations/counts of entities, or, in cases the \\\nquestion was about dates or times, then it can also include source_date. But source_document MUST NEVER appear \\\nin the SELECT clause!\n- Again, NEVER count or retrieve source documents in SELECT CLAUSE, whether it is in combination with \\\nentities, with a distinct, etc. NO source_document in SELECT CLAUSE! So NEVER produce a \\\n'SELECT COUNT(source_entity, source_document)...'\n- Please think about whether you are interested in source entities or target entities! For that purpose, \\\nconsider the allowed relationship types to make sure you select or count the correct one!\n- Again, ALWAYS make sure that EACH COLUMN in an ORDER-BY clause IS ALSO IN THE SELECT CLAUSE! Remind yourself \\\nof that in the reasoning.\n- Be careful with dates! Often a date will refer to the source data, which is the date when \\\nan underlying piece of information was updated. However, if the attributes of an entity contain \\\ntime information as well (like 'started_at', 'completed_at', etc.), then you should really look at \\\nthe wording to see whether you should use a date in the attributes or the event date.\n- Dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \\\nSo please use that format, particularly if you use data comparisons (>, <, ...)\n- Again, NO 'relationship' or 'source_document' in the SELECT CLAUSE, be it as direct columns are in aggregations!\n- Careful with SORT! Really think in which order you want to sort if you have multiple columns you \\\nwant to sort by. If the sorting is time-based and there is a limit for example, then you do want to have a suitable date \\\nvariable as the first column to sort by.\n- When doing a SORT on an attribute value of an entity, you MUST also apply a WHERE clause to filter \\\nfor entities that have the attribute value set. For example, if you want to sort the target entity \\\nby the attribute 'created_date', you must also have a WHERE clause that checks whether the target \\\nentity attribute contains 'created_date'. This is vital for proper ordering with null values.\n- Usually, you will want to retrieve or count entities, maybe with attributes. But you almost always want to \\\nhave entities involved in the SELECT clause.\n- Questions like 'What did Paul work on last week?' should generally be handled by finding all entities \\\nthat reasonably relate to 'work entities' that are i) related to Paul, and ii) that were created or \\\nupdated (by him) last week. So this would likely be a UNION of multiple queries.\n- If you do joins consider the possibility that the second entity does not exist for all examples. \\\nTherefore joins should generally be LEFT joins (or RIGHT joins) as appropriate. Think about which \\\nentities you are interested in, and which ones provides attributes.\nAnother important note:\n - For questions that really try to explore what a certain entity was involved in like 'what did Paul Smith do \\\nin the last 3 months?', and Paul Smith has been extracted ie as an entity of type 'EMPLOYEE', you will \\\nwant to consider all entities that Paul Smith may be related to that satisfy any potential other conditions.\n- Joins should always be made on entities, not source documents!\n- Try to be as efficient as possible.\n\nAPPROACH:\nPlease think through this step by step. Make sure that you include all columns in the ORDER BY clause \\\nalso in the SELECT DISTINCT clause, \\\nif applicable! And again, joins should generally be LEFT JOINS!\n\nAlso, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.\n\nPlease structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:\n\n<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>\n<sql>[the SQL statement that you generate to satisfy the task]</sql>\n\"\"\".strip()\n\n# TODO: remove following before merging after enough testing\nSIMPLE_SQL_CORRECTION_PROMPT = f\"\"\"\nYou are an expert in reviewing and fixing SQL statements.\n\nHere is a draft SQL statement that you should consider as generally capturing the information intended. \\\nHowever, it may or may not be syntactically 100% for our postgresql database.\n\nGuidance:\n - Think about whether attributes should be numbers or strings. You may need to convert them.\n - If we use SELECT DISTINCT we need to have the ORDER BY columns in the \\\nSELECT statement as well! And it needs to be in the EXACT FORM! So if a \\\nconversion took place, make sure to include the conversion in the SELECT and the ORDER BY clause!\n - never should 'source_document' be in the SELECT clause! Remove if present!\n - if there are joins, they must be on entities, never source documents\n - if there are joins, consider the possibility that the second entity does not exist for all examples.\\\n Therefore consider using LEFT joins (or RIGHT joins) as appropriate.\n\nDraft SQL:\n{SEPARATOR_LINE}\n---draft_sql---\n{SEPARATOR_LINE}\n\nPlease structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:\n\n<reasoning>[think briefly through the problem step by step]</reasoning>\n<sql>[the corrected (or original one, if correct) SQL statement]</sql>\n\"\"\".strip()\n\nSIMPLE_ENTITY_SQL_PROMPT = f\"\"\"\nYou are an expert in generating a SQL statement that only uses ONE TABLE that captures ENTITIES \\\nand their attributes and other data. The table has the following structure:\n\n{SEPARATOR_LINE}\n{ENTITY_TABLE_DESCRIPTION}\n\nHere is the question you are supposed to translate into a SQL statement:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nTo help you, we already have identified the entities that the SQL statement likely *should* use (but note the \\\nexception below!). The entities as written below also contain the list of attributes and attribute values \\\nthat should specify the entity. \\\nThe format is <entity_type>::<entity_name>--[<attribute_name_1>:<attribute_value_1>, \\\n<attribute_name_2>:<attribute_value_2>, ...].\n{SEPARATOR_LINE}\nIdentified entities with attributes in query:\n\n---query_entities_with_attributes---\n\nThese are the entities that should be used in the SQL statement. However, \\\nnote that these are the entities (with potential attributes) that were *matches* of Knowledge Graph identified with the \\\nentities originally identified in the original question. As such, they may have id names that may not mean much by themselves, \\\neg ACCOUNT::a74f332. Here is the mapping of entities originally identified (whose role in the query should be obvious) with \\\nthe entities that were matched to them in the Knowledge Graph:\n\n---entity_explanation_string---\n\n--\n\n\n{SEPARATOR_LINE}\n\nCRITICAL SPECIAL CASE:\n  - if an identified entity is of the form <entity_type>::*, or an identified relationship contains an \\\nentity of this form, this refers to *any* entity of that type. Correspondingly, the SQL query should use the *entity type*, \\\nbut not the entity with the * itself. \\\nExample: if you see 'ACCOUNT::*', that means any account matches. So if you are supposed to count the 'ACCOUNT::*', \\\nyou should count the entities of entity_type 'ACCOUNT'.\n\n\nIMPORTANT NOTES:\n- The entities are unique in the table.\n- If the SQL contains a 'SELECT DISTINCT' clause and an ORDER BY clause, then you MUST include the columns from the ORDER BY \\\nclause ALSO IN THE SELECT DISTINCT CLAUSE! This is very important! (This is a postgres db., so this is a MUST!). \\\nYou MUST NOT have a column in the ORDER BY clause that is not ALSO in the SELECT DISTINCT clause!\n- The table cannot be joined on itself.\n- You CAN ONLY return ENTITIES or COUNTS (or other aggregations) of ENTITIES, or you can return \\\nsource_date (but only if the question asks for event dates or times, and then the \\\ncorresponding entity must also be returned).\n- Generally, the query can only return ENTITIES or aggregations of ENTITIES:\n   - if individual entities are returned, then you MUST also return the source_document. \\\nIf the source date was requested, you can return that too.\n   - if aggregations of entities are returned, then you can only aggregate the entities.\n- Attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \\\n\"attributes ->> '<attribute>' = '<attribute value>'\".\n- Again, ALWAYS make sure that EACH COLUMN in an ORDER-BY clause IS ALSO IN THE SELECT CLAUSE! Remind yourself \\\nof that in the reasoning.\n- Be careful with dates! Often a date will refer to the source data, which is the date when \\\nan underlying piece of information was updated. However, if the attributes of an entity may contain \\\ntime information as well (like 'started_at', 'completed_at', etc.), then you should really look at \\\nthe wording to see whether you should use a date in the attributes or the event date.\n- Dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \\\nSo please use that format, particularly if you use data comparisons (>, <, ...)\n- Careful with SORT! Really think in which order you want to sort if you have multiple columns you \\\nwant to sort by. If the sorting is time-based and there is a limit for example, then you do want to have a suitable date \\\nvariable as the first column to sort by.\n- When doing a SORT on an attribute value of an entity, you MUST also apply a WHERE clause to filter \\\nfor entities that have the attribute value set. For example, if you want to sort the target entity \\\nby the attribute 'created_date', you must also have a WHERE clause that checks whether the target \\\nentity attribute contains 'created_date'. This is vital for proper ordering with null values.\n- Usually, you will want to retrieve or count entities, maybe with attributes. But you almost always want to \\\nhave entities involved in the SELECT clause.\n- You MUST ONLY rely on the entity attributes provided! This is essential! Do not assume \\\nother attributes exist...they don't! Note that there will often be a search using the results \\\nof this query. So if there is information in the question that does not fit the provided attributes, \\\nyou should not use it here but rely on the later search!\n- Try to be as efficient as possible.\n\nAPPROACH:\nPlease think through this step by step. Make sure that you include all columns in the ORDER BY clause \\\nalso in the SELECT DISTINCT clause, \\\nif applicable!\n\nAlso, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.\n\nPlease structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:\n\n<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>\n<sql>[the SQL statement that you generate to satisfy the task]</sql>\n\"\"\".strip()\n\nSIMPLE_SQL_ERROR_FIX_PROMPT = f\"\"\"\nYou are an expert at fixing SQL statements. You will be provided with a SQL statement that aims to address \\\na question, but it contains an error. Your task is to fix the SQL statement, based on the error message.\n\nHere is the description of the table that the SQL statement is supposed to use:\n---table_description---\n\nHere is the question you are supposed to translate into a SQL statement:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nHere is the SQL statement that you should fix:\n{SEPARATOR_LINE}\n---sql_statement---\n{SEPARATOR_LINE}\n\nHere is the error message that was returned:\n{SEPARATOR_LINE}\n---error_message---\n{SEPARATOR_LINE}\n\nNote that in the case the error states the sql statement did not return any results, it is possible that the \\\nsql statement is correct, but the question is not addressable with the information in the knowledge graph. \\\nIf you are absolutely certain that is the case, you may return the original sql statement.\n\nHere are a couple common errors that you may encounter:\n- source_document is in the SELECT clause -> remove it\n- columns used in ORDER BY must also appear in the SELECT DISTINCT clause\n- consider carefully the type of the columns you are using, especially for attributes. You may need to cast them\n- dates are ALWAYS in string format of the form YYYY-MM-DD, for source date as well as for date-like the attributes! \\\nSo please use that format, particularly if you use data comparisons (>, <, ...)\n- attributes are stored in the attributes json field. As this is postgres, querying for those must be done as \\\n\"attributes ->> '<attribute>' = '<attribute value>'\" (or \"attributes ? '<attribute>'\" to check for existence).\n- if you are using joins and the sql returned no joins, make sure you are using the appropriate join type (LEFT, RIGHT, etc.) \\\nit is possible that the second entity does not exist for all examples.\n- (ignore if using entity_table) if using the relationship_table and the sql returned no results, make sure you are \\\nselecting the correct column! Use the available relationship types to determine whether to use the source or target entity.\n\nAPPROACH:\nPlease think through this step by step. Please also bear in mind that the sql statement is written in postgres syntax.\n\nAlso, in case it is important, today is ---today_date--- and the user/employee asking is ---user_name---.\n\nPlease structure your answer using <reasoning>, </reasoning>, <sql>, </sql> start and end tags as in:\n\n<reasoning>[think through the logic but do so extremely briefly! Not more than 3-4 sentences.]</reasoning>\n<sql>[the SQL statement that you generate to satisfy the task]</sql>\n\"\"\"\n\n\nSEARCH_FILTER_CONSTRUCTION_PROMPT = f\"\"\"\nYou need to prepare a search across text segments that contain the information necessary to \\\nanswer a question. The text segments have tags that can be used to filter for the relevant segments. \\\nKey are suitable entities and relationships of a knowledge graph, as well as underlying source documents.\n\nYour overall task is to find the filters and structures that are needed to filtering a database to \\\nproperly address a user question.\n\nYou will be given:\n  - the user question\n  - a description of all of the potential entity types involved\n  - a list of 'global' entities and relationships that should be filtered by, given the question\n  - the structure of a schema that was used to derive additional entity filters\n  - a SQL statement that was generated to derive those filters\n  - the results that were generated using the SQL statement. This can have multiple rows, \\\nand those will be the 'local' filters (which will later mean that each retrieved result will \\\nneed to match at least one of the conditions that you will generate).\n  - the results of another query that asked for the underlying source documents that resulted \\\nin the answers of the SQL statement\n\n\nHere is the information:\n\n1) The overall user question\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\n2) Here is a description of all of the entity types:\n{SEPARATOR_LINE}\n---entity_type_descriptions---\n{SEPARATOR_LINE}\n\n3) Here are the lists of entity and relationship filters that were derived from the question:\n{SEPARATOR_LINE}\nEntity filters:\n\n---entity_filters---\n\n--\n\nRelationship filters:\n\n---relationship_filters---\n\n{SEPARATOR_LINE}\n\n4) Here are the columns of a table in a database that has a lot of knowledge about the \\\ndata:\n{SEPARATOR_LINE}\n   - relationship (str): The name of the RELATIONSHIP, combining the nature of the relationship and the names of the entities. \\\nIt is of the form \\\n<source_entity_type>::<source_entity_name>__<relationship_description>__<target_entity_type>::<target_entity_name> \\\n[example: ACCOUNT::Nike__has__CONCERN::performance]. Note that this is NOT UNIQUE!\n   - source_entity (str): the id of the source ENTITY/NODE in the relationship [example: ACCOUNT::Nike]\n   - source_entity_attributes (json): the attributes of the source entity/node [example: {{\"account_type\": \"customer\"}}]\n   - target_entity (str): the id of the target ENTITY/NODE in the relationship [example: CONCERN::performance]\n   - target_entity_attributes (json): the attributes of the target entity/node [example: {{\"degree\": \"severe\"}}]\n   - source_entity_type (str): the type of the source entity/node [example: ACCOUNT]. Only the entity types provided \\\n   below are valid.\n   - target_entity_type (str): the type of the target entity/node [example: CONCERN]. Only the entity types provided \\\n   below are valid.\n   - relationship_type (str): the type of the relationship, formatted as  \\\n<source_entity_type>__<relationship_description>__<target_entity_type>.   So the explicit entity_names have \\\nbeen removed. [example: ACCOUNT__has__CONCERN]\n   - source_document (str): the id of the document that contains the relationship. Note that the combination of \\\nid_name and source_document IS UNIQUE!\n   - source_date (str): the 'event' date of the source document [example: 2021-01-01]\n\n{SEPARATOR_LINE}\n\n5) Here is a query that was generated for that table to provide additional filters:\n{SEPARATOR_LINE}\n---sql_query---\n{SEPARATOR_LINE}\n\n6) Here are the results of that SQL query. (Consider the schema description and the \\\nstructure of the entities to interpret the results)\n{SEPARATOR_LINE}\n---sql_results---\n{SEPARATOR_LINE}\n\n7) Here are the results of the other query that provided the underlying source documents \\\nusing the schema:\n{SEPARATOR_LINE}\n---source_document_results---\n{SEPARATOR_LINE}\n\nHere is the detailed set of tasks that you should perform, including the proper output format for you:\n\nPlease reply as a json dictionary in this form:\n\n{{\n    \"global_entity_filters\": <a list of entity filters>,\n    \"global_relationship_filters\": <a list of relationship filters, derived from the 'global' \\\nrelationship filers above.>,\n    \"local_entity_filters\": <a list of lists of 'local' entity filters, which were obtained from the \\\nSQL results in 6 above. Each inner list can have one or more entities, which will correspond to the \\\nrows in the sql results in point 6 above.>,\n    \"source_document_filters\": <a list of strings, derived from the source document filters above. \\\nYou are essentially only formatting here, so do not change the content of the strings.>,\n    \"structure\": <a list of entity ids (entity_type::uuid) that the user maybe want to know more about. \\\nMore specifically, think about how (and if) the user would naturally want the answer to be divided up in \\\n*equivalent and parallel* sub-investigations. For example, if the question was something like 'what was discussed \\\nin the last 5 calls', the user probably expects to see a bullet point list, one bullet point for each call that \\\nthen shows the summary. In that case for this part of the task, your response for the structure should be the \\\nlist of call entities from the sql results in 6 above. (The actual 'what was discussed' will be addressed later). \\\nIn other words, respond with a list of entity ids that you think the user would like to have independently analyzed\nand the results reported for each of those entities.>\n}}\n\nAgain - DO NOT FORGET - here is the user question that motivates this whole task:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nYour json dictionary answer:\n\"\"\".strip()\n\nOUTPUT_FORMAT_NO_EXAMPLES_PROMPT = f\"\"\"\nYou need to format an answer to a research question. \\\nYou will see what the desired output is, the original question, and the unformatted answer to the research question. \\\nYour purpose is to generate the answer respecting the desired format.\n\nNotes:\n - Note that you are a language model and that answers may or may not be perfect. To communicate \\\nthis to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \\\nI found...]\n- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.\n\nHere is the unformatted answer to the research question:\n{SEPARATOR_LINE}\n---introductory_answer---\n{SEPARATOR_LINE}\n\nHere is the original question:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nAnd finally, here is the desired output format:\n{SEPARATOR_LINE}\n---output_format---\n{SEPARATOR_LINE}\n\nPlease start generating the answer, without any explanation. There should be no real modifications to \\\nthe text, after all, all you need to do here is formatting. \\\n\nYour Answer:\n\"\"\".strip()\n\n\nOUTPUT_FORMAT_PROMPT = f\"\"\"\nYou need to format the answers to a research question that was generated using one or more objects. \\\nAn overall introductory answer may be provided to you, as well as the research results for each individual object. \\\nYou will also be provided with the original question as background, and the desired format. \\\n\nYour purpose is to generate a consolidated and FORMATTED answer that starts of with the introductory \\\nanswer, and then formats the research results for each individual object in the desired format. \\\nDo not add any other text please!\n\nNotes:\n - Note that you are a language model and that answers may or may not be perfect. To communicate \\\nthis to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \\\nI found...]\n- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.\n- DO NOT add any content to the introductory answer!\n\n\nHere is the original question for your background:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nHere is the desired output format:\n{SEPARATOR_LINE}\n---output_format---\n{SEPARATOR_LINE}\n\nHere is the introductory answer:\n{SEPARATOR_LINE}\n---introductory_answer---\n{SEPARATOR_LINE}\n\nHere are the research results that you should - respecting the target format- return in a formatted way:\n{SEPARATOR_LINE}\n---research_results---\n{SEPARATOR_LINE}\n\nPlease start generating the answer, without any explanation. After all, all you need to do here is formatting. \\\n\n\nYour Answer:\n\"\"\".strip()\n\nOUTPUT_FORMAT_NO_OVERALL_ANSWER_PROMPT = f\"\"\"\nYou need to format the return of research on multiple objects. The research results will be given \\\nto you as a string. You will also see what the desired output is, as well as the original question. \\\nYour purpose is to generate the answer respecting the desired format.\n\nNotes:\n - Note that you are a language model and that answers may or may not be perfect. To communicate \\\nthis to the user, consider phrases like 'I found [10 accounts]...', or 'Here are a number of [goals] that \\\nI found...]\n- Please DO NOT mention the explicit output format in your answer. Just use it to inform the formatting.\n - Often, you are also provided with a list of explicit examples. If  - AND ONLY IF - the list is not \\\nempty, then these should be listed at the end with the text:\n'...\nHere are some examples of what I found:\n<bullet point list of examples>\n...'\n - Again if the list of examples is an empty string then skip this section! Do not use the \\\nresults data for this purpose instead! (They will already be handled in the answer.)\n- Even if the desired output format is 'text', make sure that you keep the individual research results \\\nseparated by bullet points, and mention the object name first, followed by a new line. The object name \\\nis at the beginning of the research result, and should be in the format <object_type>::<object_name>.\n\n\nHere is the original question:\n{SEPARATOR_LINE}\n---question---\n{SEPARATOR_LINE}\n\nAnd finally, here is the desired output format:\n{SEPARATOR_LINE}\n---output_format---\n{SEPARATOR_LINE}\n\nHere are the research results that you should properly format:\n{SEPARATOR_LINE}\n---research_results---\n{SEPARATOR_LINE}\n\nPlease start generating the answer, without any explanation. After all, all you need to do here is formatting. \\\n\n\nYour Answer:\n\"\"\".strip()\n\nKG_OBJECT_SOURCE_RESEARCH_PROMPT = f\"\"\"\nYou are an expert in extracting relevant structured information from a list of documents that \\\nshould relate to one object. You are presented with a list of documents that have been determined to be \\\nrelevant to the task of interest. Your goal is to extract the information asked around these topics:\nYou should look at the documents - in no particular order! - and extract the information that relates \\\nto a question:\n{SEPARATOR_LINE}\n{{question}}\n{SEPARATOR_LINE}\n\nHere are the documents you are supposed to search through:\n--\n{{document_text}}\n{SEPARATOR_LINE}\nNote: in this case, please do NOT cite your sources. This is very important!\n\nPlease now generate the answer to the question given the documents:\n\"\"\".strip()\n\nKG_SEARCH_PROMPT = f\"\"\"\nYou are an expert in extracting relevant structured information from a list of documents that \\\nshould relate to one object. You are presented with a list of documents that have been determined to be \\\nrelevant to the task of interest. Your goal is to extract the information asked around these topics:\nYou should look at the documents and extract the information that relates \\\nto a question:\n{SEPARATOR_LINE}\n{{question}}\n{SEPARATOR_LINE}\n\nHere are the documents you are supposed to search through:\n--\n{{document_text}}\n{SEPARATOR_LINE}\nNote: in this case, please DO cite your sources. This is very important! \\\nUse the format [<document number>]. Ie, use [1], [2], and NOT [1,2] if \\\nthere are two documents to cite, etc. \\\n\n\nPlease now generate the answer to the question given the documents:\n\"\"\".strip()\n\n# KG Beta Assistant System Prompt\nKG_BETA_ASSISTANT_SYSTEM_PROMPT = \"\"\"\"You are a knowledge graph assistant that helps users explore and \\\nunderstand relationships between entities.\"\"\"\n\nKG_BETA_ASSISTANT_TASK_PROMPT = \"\"\"\"Help users explore and understand the knowledge graph by answering \\\nquestions about entities and their relationships.\"\"\"\n\n\n# Just in case, for best practice, send a system message with key rules.\n# (The db user permissions executing the SQL will avoid issues anyway,\n# but it does not hurt to to put multiple checks in place.)\nSQL_INSTRUCTIONS_RELATIONSHIP_PROMPT = \"\"\"\nYou are an expert at generating SQL queries to answer questions about a knowledge graph.\n\nYou will be given a lot of instructions later, but here rules that MUST BE FOLLOWED:\n  - the SQL generated MUST only use the table one table named 'relationship_table'. \\\nThis table is not a table that can be defined or overwritten by the user and the resulting SQL \\\nstatement, it MUST be seen as an existing table in the database.\n  - self-joins of the 'relationship_table' are allowed, as well as common table expressions \\\n  that reference only the 'relationship_table'.\n  - no other table or view can in any way or shape be \\\ninvolved in the generated SQL.\n  - no other database operations can be generated except for those that query the 'relationship_table'. \\\n(WHERE, GROUP BY, etc. are certainly allowed, but no other database table can be used in the generated SQL.)\n\"\"\"\n\nSQL_INSTRUCTIONS_ENTITY_PROMPT = \"\"\"\nYou are an expert at generating SQL queries to answer questions about a knowledge graph.\n\nYou will be given a lot of instructions later, but here rules that MUST BE FOLLOWED:\n  - the SQL generated MUST only use the table one table named 'entity_table'. \\\nThis table is not a table that can be defined or overwritten by the user and the resulting SQL \\\nstatement, it MUST be seen as an existing table in the database.\n  - common table expressions that reference only the 'entity_table' are allowed.\n  - no other table or view of a potential underlying schema can in any way or shape be \\\ninvolved in the generated SQL.\n  - no other database operations can be generated except for those that query the 'entity_table'. \\\n(WHERE, GROUP BY, etc. are certainly allowed, but no other database table can be used in the generated SQL.)\n\"\"\"\n"
  },
  {
    "path": "backend/onyx/prompts/prompt_template.py",
    "content": "import re\n\nfrom onyx.prompts.prompt_utils import replace_current_datetime_tag\n\n\nclass PromptTemplate:\n    \"\"\"\n    A class for building prompt templates with placeholders.\n    Useful when building templates with json schemas, as {} will not work with f-strings.\n    Unlike string.replace, this class will raise an error if the fields are missing.\n    \"\"\"\n\n    DEFAULT_PATTERN = r\"---([a-zA-Z0-9_]+)---\"\n\n    def __init__(self, template: str, pattern: str = DEFAULT_PATTERN):\n        self._pattern_str = pattern\n        self._pattern = re.compile(pattern)\n        self._template = template\n        self._fields: set[str] = set(self._pattern.findall(template))\n\n    def build(self, **kwargs: str) -> str:\n        \"\"\"\n        Build the prompt template with the given fields.\n        Will raise an error if the fields are missing.\n        Will ignore fields that are not in the template.\n        \"\"\"\n        missing = self._fields - set(kwargs.keys())\n        if missing:\n            raise ValueError(f\"Missing required fields: {missing}.\")\n        built = self._replace_fields(kwargs)\n        return self._postprocess(built)\n\n    def partial_build(self, **kwargs: str) -> \"PromptTemplate\":\n        \"\"\"\n        Returns another PromptTemplate with the given fields replaced.\n        Will ignore fields that are not in the template.\n        \"\"\"\n        new_template = self._replace_fields(kwargs)\n        return PromptTemplate(new_template, self._pattern_str)\n\n    def _replace_fields(self, field_vals: dict[str, str]) -> str:\n        def repl(match: re.Match) -> str:\n            key = match.group(1)\n            return field_vals.get(key, match.group(0))\n\n        return self._pattern.sub(repl, self._template)\n\n    def _postprocess(self, text: str) -> str:\n        \"\"\"Apply global replacements such as [[CURRENT_DATETIME]].\"\"\"\n        if not text:\n            return text\n        # Ensure [[CURRENT_DATETIME]] matches shared prompt formatting\n        return replace_current_datetime_tag(\n            text,\n            full_sentence=True,\n            include_day_of_week=True,\n        )\n"
  },
  {
    "path": "backend/onyx/prompts/prompt_utils.py",
    "content": "from datetime import datetime\nfrom typing import cast\n\nfrom langchain_core.messages import BaseMessage\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.prompts.chat_prompts import ADDITIONAL_INFO\nfrom onyx.prompts.chat_prompts import CITATION_GUIDANCE_REPLACEMENT_PAT\nfrom onyx.prompts.chat_prompts import COMPANY_DESCRIPTION_BLOCK\nfrom onyx.prompts.chat_prompts import COMPANY_NAME_BLOCK\nfrom onyx.prompts.chat_prompts import DATETIME_REPLACEMENT_PAT\nfrom onyx.prompts.chat_prompts import REMINDER_TAG_REPLACEMENT_PAT\nfrom onyx.prompts.chat_prompts import REQUIRE_CITATION_GUIDANCE\nfrom onyx.prompts.constants import CODE_BLOCK_PAT\nfrom onyx.prompts.constants import REMINDER_TAG_DESCRIPTION\nfrom onyx.server.settings.store import load_settings\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\n_BASIC_TIME_STR = \"The current date is {datetime_info}.\"\n\n\ndef get_current_llm_day_time(\n    include_day_of_week: bool = True,\n    full_sentence: bool = True,\n    include_hour_min: bool = False,\n) -> str:\n    current_datetime = datetime.now()\n    # Format looks like: \"October 16, 2023 14:30\" if include_hour_min, otherwise \"October 16, 2023\"\n    formatted_datetime = (\n        current_datetime.strftime(\"%B %d, %Y %H:%M\")\n        if include_hour_min\n        else current_datetime.strftime(\"%B %d, %Y\")\n    )\n    day_of_week = current_datetime.strftime(\"%A\")\n    if full_sentence:\n        return f\"The current day and time is {day_of_week} {formatted_datetime}\"\n    if include_day_of_week:\n        return f\"{day_of_week} {formatted_datetime}\"\n    return f\"{formatted_datetime}\"\n\n\ndef replace_current_datetime_tag(\n    prompt_str: str,\n    *,\n    full_sentence: bool = False,\n    include_day_of_week: bool = True,\n) -> str:\n    datetime_str = get_current_llm_day_time(\n        full_sentence=full_sentence,\n        include_day_of_week=include_day_of_week,\n    )\n\n    if DATETIME_REPLACEMENT_PAT in prompt_str:\n        prompt_str = prompt_str.replace(DATETIME_REPLACEMENT_PAT, datetime_str)\n\n    return prompt_str\n\n\ndef replace_citation_guidance_tag(\n    prompt_str: str,\n    *,\n    should_cite_documents: bool = False,\n    include_all_guidance: bool = False,\n) -> tuple[str, bool]:\n    \"\"\"\n    Replace {{CITATION_GUIDANCE}} placeholder with citation guidance if needed.\n\n    Returns:\n        tuple[str, bool]: (prompt_with_replacement, should_append_fallback)\n        - prompt_with_replacement: The prompt with placeholder replaced (or unchanged if not present)\n        - should_append_fallback: True if citation guidance should be appended\n            (placeholder is not present and citations are needed)\n    \"\"\"\n    placeholder_was_present = CITATION_GUIDANCE_REPLACEMENT_PAT in prompt_str\n\n    if not placeholder_was_present:\n        # Placeholder not present - caller should append if citations are needed\n        should_append = (\n            should_cite_documents or include_all_guidance\n        ) and REQUIRE_CITATION_GUIDANCE not in prompt_str\n        return prompt_str, should_append\n\n    citation_guidance = (\n        REQUIRE_CITATION_GUIDANCE\n        if should_cite_documents or include_all_guidance\n        else \"\"\n    )\n\n    prompt_str = prompt_str.replace(\n        CITATION_GUIDANCE_REPLACEMENT_PAT,\n        citation_guidance,\n    )\n\n    return prompt_str, False\n\n\ndef replace_reminder_tag(prompt_str: str) -> str:\n    \"\"\"Replace {{REMINDER_TAG_DESCRIPTION}} with the reminder tag content.\"\"\"\n    if REMINDER_TAG_REPLACEMENT_PAT in prompt_str:\n        prompt_str = prompt_str.replace(\n            REMINDER_TAG_REPLACEMENT_PAT, REMINDER_TAG_DESCRIPTION\n        )\n\n    return prompt_str\n\n\ndef handle_onyx_date_awareness(\n    prompt_str: str,\n    # We always replace the pattern {{CURRENT_DATETIME}} if it shows up\n    # but if it doesn't show up and the prompt is datetime aware, add it to the prompt at the end.\n    datetime_aware: bool = False,\n) -> str:\n    \"\"\"\n    If there is a {{CURRENT_DATETIME}} tag, replace it with the current date and time no matter what.\n    If the prompt is datetime aware, and there are no datetime tags, add it to the prompt.\n    Do nothing otherwise.\n    This can later be expanded to support other tags.\n    \"\"\"\n\n    prompt_with_datetime = replace_current_datetime_tag(\n        prompt_str,\n        full_sentence=False,\n        include_day_of_week=True,\n    )\n    if prompt_with_datetime != prompt_str:\n        return prompt_with_datetime\n\n    if datetime_aware:\n        return prompt_str + ADDITIONAL_INFO.format(\n            datetime_info=_BASIC_TIME_STR.format(\n                datetime_info=get_current_llm_day_time()\n            )\n        )\n\n    return prompt_str\n\n\ndef get_company_context() -> str | None:\n    prompt_str = None\n    try:\n        workspace_settings = load_settings()\n        company_name = workspace_settings.company_name\n        company_description = workspace_settings.company_description\n\n        if not company_name and not company_description:\n            return None\n\n        prompt_str = \"\"\n        if company_name:\n            prompt_str += COMPANY_NAME_BLOCK.format(company_name=company_name)\n        if company_description:\n            prompt_str += COMPANY_DESCRIPTION_BLOCK.format(\n                company_description=company_description\n            )\n        return prompt_str\n    except Exception as e:\n        logger.error(f\"Error handling company awareness: {e}\")\n        return None\n\n\n# Maps connector enum string to a more natural language representation for the LLM\n# If not on the list, uses the original but slightly cleaned up, see below\nCONNECTOR_NAME_MAP = {\n    \"web\": \"Website\",\n    \"requesttracker\": \"Request Tracker\",\n    \"github\": \"GitHub\",\n    \"file\": \"File Upload\",\n}\n\n\ndef clean_up_source(source_str: str) -> str:\n    if source_str in CONNECTOR_NAME_MAP:\n        return CONNECTOR_NAME_MAP[source_str]\n    return source_str.replace(\"_\", \" \").title()\n\n\ndef build_doc_context_str(\n    semantic_identifier: str,\n    source_type: DocumentSource,\n    content: str,\n    metadata_dict: dict[str, str | list[str]],\n    updated_at: datetime | None,\n    ind: int,\n    include_metadata: bool = True,\n) -> str:\n    context_str = \"\"\n    if include_metadata:\n        context_str += f\"DOCUMENT {ind}: {semantic_identifier}\\n\"\n        context_str += f\"Source: {clean_up_source(source_type)}\\n\"\n\n        for k, v in metadata_dict.items():\n            if isinstance(v, list):\n                v_str = \", \".join(v)\n                context_str += f\"{k.capitalize()}: {v_str}\\n\"\n            else:\n                context_str += f\"{k.capitalize()}: {v}\\n\"\n\n        if updated_at:\n            update_str = updated_at.strftime(\"%B %d, %Y %H:%M\")\n            context_str += f\"Updated: {update_str}\\n\"\n    context_str += f\"{CODE_BLOCK_PAT.format(content.strip())}\\n\\n\\n\"\n    return context_str\n\n\n_PER_MESSAGE_TOKEN_BUFFER = 7\n\n\ndef find_last_index(lst: list[int], max_prompt_tokens: int) -> int:\n    \"\"\"From the back, find the index of the last element to include\n    before the list exceeds the maximum\"\"\"\n    running_sum = 0\n\n    if not lst:\n        logger.warning(\"Empty message history passed to find_last_index\")\n        return 0\n\n    last_ind = 0\n    for i in range(len(lst) - 1, -1, -1):\n        running_sum += lst[i] + _PER_MESSAGE_TOKEN_BUFFER\n        if running_sum > max_prompt_tokens:\n            last_ind = i + 1\n            break\n\n    if last_ind >= len(lst):\n        logger.error(\n            f\"Last message alone is too large! max_prompt_tokens: {max_prompt_tokens}, message_token_counts: {lst}\"\n        )\n        raise ValueError(\"Last message alone is too large!\")\n\n    return last_ind\n\n\ndef drop_messages_history_overflow(\n    messages_with_token_cnts: list[tuple[BaseMessage, int]],\n    max_allowed_tokens: int,\n) -> list[BaseMessage]:\n    \"\"\"As message history grows, messages need to be dropped starting from the furthest in the past.\n    The System message should be kept if at all possible and the latest user input which is inserted in the\n    prompt template must be included\"\"\"\n\n    final_messages: list[BaseMessage] = []\n    messages, token_counts = cast(\n        tuple[list[BaseMessage], list[int]], zip(*messages_with_token_cnts)\n    )\n    system_msg = (\n        final_messages[0]\n        if final_messages and final_messages[0].type == \"system\"\n        else None\n    )\n\n    history_msgs = messages[:-1]\n    final_msg = messages[-1]\n    if final_msg.type != \"human\":\n        if final_msg.type != \"tool\":\n            raise ValueError(\"Last message must be user input OR a tool result\")\n        else:\n            final_msgs = messages[-3:]\n            history_msgs = messages[:-3]\n    else:\n        final_msgs = [final_msg]\n\n    # Start dropping from the history if necessary\n    ind_prev_msg_start = find_last_index(\n        token_counts, max_prompt_tokens=max_allowed_tokens\n    )\n\n    if system_msg and ind_prev_msg_start <= len(history_msgs):\n        final_messages.append(system_msg)\n\n    final_messages.extend(history_msgs[ind_prev_msg_start:])\n    final_messages.extend(final_msgs)\n\n    return final_messages\n"
  },
  {
    "path": "backend/onyx/prompts/search_prompts.py",
    "content": "# How it works and rationale:\n# First - this works best empirically across multiple LLMs, some of this is back-explaining reasons based on results.\n#\n# The system prompt is kept simple and as similar to typical system prompts as possible to stay within training distribution.\n# The history is passed through as a list of messages, this should allow the LLM to more easily understand what is going on.\n# The special tokens and separators let the LLM more easily disregard no longer relevant past messages.\n# The last message is dynamically created and has a detailed description of the actual task.\n# This is based on the assumption that users give much more varied requests in their prompts and LLMs are well adjusted to this.\n# The proximity of the instructions and the lack of any breaks should also let the LLM follow the task more clearly.\n#\n# For document verification, the history is not included as the queries should ideally be standalone enough.\n# To keep it simple, it is just a single simple prompt.\n\n\nSEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT = \"\"\"\nYou are an assistant that reformulates the last user message into a standalone, self-contained query suitable for \\\nsemantic search. Your goal is to output a single natural language query that captures the full meaning of the user's \\\nmost recent message. It should be fully semantic and natural language unless the user query is already a keyword query. \\\nWhen relevant, you bring in context from the history or knowledge about the user.\n\nThe current date is {current_date}.\n\"\"\"\n\nSEMANTIC_QUERY_REPHRASE_USER_PROMPT = \"\"\"\nGiven the chat history above (if any) and the final user query (provided below), provide a standalone query that is as\nrepresentative of the user query as possible. In most cases, it should be exactly the same as the last user query. \\\nIt should be fully semantic and natural language unless the user query is already a keyword query. \\\nFocus on the last user message, in most cases the history and extra context should be ignored.\n\nFor a query like \"What are the use cases for product X\", your output should remain \"What are the use cases for product X\". \\\nIt should remain semantic, and as close to the original query as possible. There is nothing additional needed \\\nfrom the history or that should be removed / replaced from the query.\n\nFor modifications, you can:\n1. Insert relevant context from the chat history. For example:\n\"How do I set it up?\" -> \"How do I set up software Y?\" (assuming the conversation was about software Y)\n\n2. Remove asks or requests not related to the searching. For example:\n\"Can you summarize the calls with example company\" -> \"calls with example company\"\n\"Can you find me the document that goes over all of the software to set up on an engineer's first day?\" -> \\\n\"all of the software to set up on an engineer's first day\"\n\n3. Fill in relevant information about the user. For example:\n\"What document did I write last week?\" -> \"What document did John Doe write last week?\" (assuming the user is John Doe)\n{additional_context}\n=========================\nCRITICAL: ONLY provide the standalone query and nothing else.\n\nFinal user query:\n{user_query}\n\"\"\".strip()\n\n\nKEYWORD_REPHRASE_SYSTEM_PROMPT = \"\"\"\nYou are an assistant that reformulates the last user message into a set of standalone keyword queries suitable for a keyword \\\nsearch engine. Your goal is to output keyword queries that optimize finding relevant documents to answer the user query. \\\nWhen relevant, you bring in context from the history or knowledge about the user.\n\nThe current date is {current_date}.\n\"\"\"\n\n\nKEYWORD_REPHRASE_USER_PROMPT = \"\"\"\nGiven the chat history above (if any) and the final user query (provided below), provide a set of keyword only queries that can\nhelp find relevant documents. Provide a single query per line (where each query consists of one or more keywords). \\\nThe queries must be purely keywords and not contain any natural language. \\\nEach query should have as few keywords as necessary to represent the user's search intent.\n\nGuidelines:\n- Do not provide more than 3 queries.\n- Do not replace or expand niche, proprietary, or obscure terms\n- Focus on the last user message, in most cases the history and any extra context should be ignored.\n{additional_context}\n=========================\nCRITICAL: ONLY provide the keyword queries, one set of keywords per line and nothing else.\n\nFinal user query:\n{user_query}\n\"\"\".strip()\n\n\nREPHRASE_CONTEXT_PROMPT = \"\"\"\nIn most cases the following additional context is not needed. If relevant, here is some information about the user:\n{user_info}\n\nHere are some memories about the user:\n{memories}\n\"\"\"\n\n\n# This prompt is intended to be fairly lenient since there are additional filters downstream.\n# There are now multiple places for misleading docs to get dropped so each one can be a bit more lax.\n# As models get better, it's likely better to include more context than not, some questionably\n# useful stuff may be helpful downstream.\n# Adding the ! option to allow better models to handle questions where all of the documents are\n# necessary to make a good determination.\n# If a document is by far the best and is a very obvious inclusion, add a ! after the section_id to indicate that it should \\\n# be included in full. Example output: [8, 2!, 5].\nDOCUMENT_SELECTION_PROMPT = \"\"\"\nSelect the most relevant document sections for the user's query (maximum {max_sections}).{extra_instructions}\n\n# Document Sections\n```\n{formatted_doc_sections}\n```\n\n# User Query\n```\n{user_query}\n```\n\n# Selection Criteria\n- Choose sections most relevant to answering the query, if at all in doubt, include the section.\n- Even if only a tiny part of the section is relevant, include it.\n- It is ok to select multiple sections from the same document.\n- Consider indirect connections and supporting context to be valuable.\n- If the section is not directly helpful but the document seems relevant, there is an opportunity \\\nlater to expand the section and read more from the document so include the section.\n\n# Output Format\nReturn ONLY section_ids as a comma-separated list, ordered by relevance:\n[most_relevant_section_id, second_most_relevant_section_id, ...]\n\nSection IDs:\n\"\"\".strip()\n\nTRY_TO_FILL_TO_MAX_INSTRUCTIONS = \"\"\"\nTry to fill the list to the maximum number of sections if possible without including non-relevant or misleading sections.\n\"\"\"\n\n\n# Some models are trained heavily to reason in the actual output so we allow some flexibility in the prompt.\n# Downstream of the model, we will attempt to parse the output to extract the number.\n# This inference will not have a system prompt as it's a single message task more like the traditional ones.\n# LLMs should do better with just this type of next word prediction.\n# Opted to not include metadata here as the doc was already selected by the previous step that has it.\n# Also hopefully it leans not throwing out documents as there are not many bad ones that make it to this stage.\n# If anything, it's mostly because of something misleading, otherwise this step should be treated as 95% expansion/filtering.\nDOCUMENT_CONTEXT_SELECTION_PROMPT = \"\"\"\nAnalyze the relevance of document sections to a search query and classify according to the categories \\\ndescribed at the end of the prompt.\n\n# Document Title / Metadata\n```\n{document_title}\n```\n\n# Section Above:\n```\n{section_above}\n```\n\n# Main Section:\n```\n{main_section}\n```\n\n# Section Below:\n```\n{section_below}\n```\n\n# User Query:\n```\n{user_query}\n```\n\n# Classification Categories:\n**0 - NOT_RELEVANT**\n- Main section and surrounding sections do not help answer the query or provide meaningful, relevant information.\n- Appears on topic but refers to a different context or subject (could lead to potential confusion or misdirection). \\\nIt is important to avoid conflating different contexts and subjects - if the document is related to the query but not about \\\nthe correct subject. Example: \"How much did we quote ACME for project X\", \"ACME paid us $100,000 for project Y\".\n\n**1 - MAIN_SECTION_ONLY**\n- Main section contains useful information relevant to the query.\n- Adjacent sections do not provide additional directly relevant information.\n\n**2 - INCLUDE_ADJACENT_SECTIONS**\n- The main section AND adjacent sections are all useful for answering the user query.\n- The surrounding sections provide relevant information that does not exist in the main section.\n- Even if only 1 of the adjacent sections is useful or there is a small piece in either that is useful.\n- Additional unseen sections are unlikely to contain valuable related information.\n\n**3 - INCLUDE_FULL_DOCUMENT**\n- Additional unseen sections are likely to contain valuable related information to the query.\n\n## Additional Decision Notes\n- If only a small piece of the document is useful - use classification 1 or 2, do not use 0.\n- If the document is on topic and provides additional context that might be useful in \\\ncombination with other documents - use classification 1, 2 or 3, do not use 0.\n\nCRITICAL: ONLY output the NUMBER of the situation most applicable to the query and sections provided (0, 1, 2, or 3).\n\nSituation Number:\n\"\"\".strip()\n"
  },
  {
    "path": "backend/onyx/prompts/tool_prompts.py",
    "content": "# ruff: noqa: E501, W605 start\n# If there are any tools, this section is included, the sections below are for the available tools\nTOOL_SECTION_HEADER = \"\\n# Tools\\n\\n\"\n\n\n# This section is included if there are search type tools, currently internal_search and web_search\nTOOL_DESCRIPTION_SEARCH_GUIDANCE = \"\"\"\nFor questions that can be answered from existing knowledge, answer the user directly without using any tools. \\\nIf you suspect your knowledge is outdated or for topics where things are rapidly changing, use search tools to get more context. \\\nFor statements that may be describing or referring to a document, run a search for the document. \\\nIn ambiguous cases, favor searching to get more context.\n\nWhen using any search type tool, do not make any assumptions and stay as faithful to the user's query as possible. \\\nBetween internal and web search (if both are available), think about if the user's query is likely better answered by team internal sources or online web pages. \\\nWhen searching for information, if the initial results cannot fully answer the user's query, try again with different tools or arguments. \\\nDo not repeat the same or very similar queries if it already has been run in the chat history.\n\nIf it is unclear which tool to use, consider using multiple in parallel to be efficient with time.\n\"\"\".lstrip()\n\n\nINTERNAL_SEARCH_GUIDANCE = \"\"\"\n## internal_search\nUse the `internal_search` tool to search connected applications for information. Some examples of when to use `internal_search` include:\n- Internal information: any time where there may be some information stored in internal applications that could help better answer the query.\n- Niche/Specific information: information that is likely not found in public sources, things specific to a project or product, team, process, etc.\n- Keyword Queries: queries that are heavily keyword based are often internal document search queries.\n- Ambiguity: questions about something that is not widely known or understood.\nNever provide more than 3 queries at once to `internal_search`.\n\"\"\".lstrip()\n\n\nWEB_SEARCH_GUIDANCE = \"\"\"\n## web_search\nUse the `web_search` tool to access up-to-date information from the web. Some examples of when to use `web_search` include:\n- Freshness: when the answer might be enhanced by up-to-date information on a topic. Very important for topics that are changing or evolving.\n- Accuracy: if the cost of outdated/inaccurate information is high.\n- Niche Information: when detailed info is not widely known or understood (but is likely found on the internet).{site_colon_disabled}\n\"\"\".lstrip()\n\nWEB_SEARCH_SITE_DISABLED_GUIDANCE = \"\"\"\nDo not use the \"site:\" operator in your web search queries.\n\"\"\".lstrip()\n\n\nOPEN_URLS_GUIDANCE = \"\"\"\n## open_url\nUse the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs. \\\nYou can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources. \\\nDo not open URLs that are image files like .png, .jpg, etc.\nYou should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.\n\"\"\".lstrip()\n\nPYTHON_TOOL_GUIDANCE = \"\"\"\n## python\nUse the `python` tool to execute Python code in an isolated sandbox. The tool will respond with the output of the execution or time out after 60.0 seconds.\nAny files uploaded to the chat will be automatically be available in the execution environment's current directory. \\\nThe current directory in the file system can be used to save and persist user files. Files written to the current directory will be returned with a `file_link`. \\\nUse this to give the user a way to download the file OR to display generated images.\nInternet access for this session is disabled. Do not make external web requests or API calls as they will fail.\nUse `openpyxl` to read and write Excel files. You have access to libraries like numpy, pandas, scipy, matplotlib, and PIL.\nIMPORTANT: each call to this tool is independent. Variables from previous calls will NOT be available in the current call.\n\"\"\".lstrip()\n\nGENERATE_IMAGE_GUIDANCE = \"\"\"\n## generate_image\nNEVER use generate_image unless the user specifically requests an image.\nFor edits/variations of a previously generated image, pass `reference_image_file_ids` with\nthe `file_id` values returned by earlier `generate_image` tool results.\n\"\"\".lstrip()\n\nMEMORY_GUIDANCE = \"\"\"\n## add_memory\nUse the `add_memory` tool for facts shared by the user that should be remembered for future conversations. \\\nOnly add memories that are specific, likely to remain true, and likely to be useful later. \\\nFocus on enduring preferences, long-term goals, stable constraints, and explicit \"remember this\" type requests.\n\"\"\".lstrip()\n\nTOOL_CALL_FAILURE_PROMPT = \"\"\"\nLLM attempted to call a tool but failed. Most likely the tool name or arguments were misspelled.\n\"\"\".strip()\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/prompts/user_info.py",
    "content": "# ruff: noqa: E501, W605 start\nUSER_INFORMATION_HEADER = \"\\n# User Information\\n\\n\"\n\nBASIC_INFORMATION_PROMPT = \"\"\"\n## Basic Information\nUser name: {user_name}\nUser email: {user_email}{user_role}\n\"\"\".lstrip()\n\n# This line only shows up if the user has configured their role.\nUSER_ROLE_PROMPT = \"\"\"\nUser role: {user_role}\n\"\"\".lstrip()\n\n# Team information should be a paragraph style description of the user's team.\nTEAM_INFORMATION_PROMPT = \"\"\"\n## Team Information\n{team_information}\n\"\"\".lstrip()\n\n# User preferences should be a paragraph style description of the user's preferences.\nUSER_PREFERENCES_PROMPT = \"\"\"\n## User Preferences\n{user_preferences}\n\"\"\".lstrip()\n\n# User memories should look something like:\n# - Memory 1\n# - Memory 2\n# - Memory 3\nUSER_MEMORIES_PROMPT = \"\"\"\n## User Memories\n{user_memories}\n\"\"\".lstrip()\n\n# ruff: noqa: E501, W605 end\n"
  },
  {
    "path": "backend/onyx/redis/iam_auth.py",
    "content": "\"\"\"\nRedis IAM Authentication Module\nThis module provides Redis IAM authentication functionality for AWS ElastiCache.\nUnlike RDS IAM auth, Redis IAM auth relies on IAM roles and policies rather than\ngenerating authentication tokens.\nKey functions:\n- configure_redis_iam_auth: Configure Redis connection parameters for IAM auth\n- create_redis_ssl_context_if_iam: Create SSL context for secure connections\n\"\"\"\n\nimport ssl\nfrom typing import Any\n\n\ndef configure_redis_iam_auth(connection_kwargs: dict[str, Any]) -> None:\n    \"\"\"\n    Configure Redis connection parameters for IAM authentication.\n    Modifies the connection_kwargs dict in-place to:\n    1. Remove password (not needed with IAM)\n    2. Enable SSL with system CA certificates\n    3. Set proper SSL context for secure connections\n    \"\"\"\n    # Remove password as it's not needed with IAM authentication\n    if \"password\" in connection_kwargs:\n        del connection_kwargs[\"password\"]\n\n    # Ensure SSL is enabled for IAM authentication\n    connection_kwargs[\"ssl\"] = True\n    connection_kwargs[\"ssl_context\"] = create_redis_ssl_context_if_iam()\n\n\ndef create_redis_ssl_context_if_iam() -> ssl.SSLContext:\n    \"\"\"Create an SSL context for Redis IAM authentication using system CA certificates.\"\"\"\n    # Use system CA certificates by default - no need for additional CA files\n    ssl_context = ssl.create_default_context()\n    ssl_context.check_hostname = True\n    ssl_context.verify_mode = ssl.CERT_REQUIRED\n    return ssl_context\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector.py",
    "content": "import redis\n\nfrom onyx.redis.redis_connector_delete import RedisConnectorDelete\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync\nfrom onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrune\nfrom onyx.redis.redis_connector_stop import RedisConnectorStop\nfrom onyx.redis.redis_pool import get_redis_client\n\n\n# TODO: reduce dependence on redis\nclass RedisConnector:\n    \"\"\"Composes several classes to simplify interacting with a connector and its\n    associated background tasks / associated redis interactions.\"\"\"\n\n    def __init__(self, tenant_id: str, cc_pair_id: int) -> None:\n        \"\"\"id: a connector credential pair id\"\"\"\n\n        self.tenant_id: str = tenant_id\n        self.cc_pair_id: int = cc_pair_id\n        self.redis: redis.Redis = get_redis_client(tenant_id=tenant_id)\n\n        self.stop = RedisConnectorStop(tenant_id, cc_pair_id, self.redis)\n        self.prune = RedisConnectorPrune(tenant_id, cc_pair_id, self.redis)\n        self.delete = RedisConnectorDelete(tenant_id, cc_pair_id, self.redis)\n        self.permissions = RedisConnectorPermissionSync(\n            tenant_id, cc_pair_id, self.redis\n        )\n        self.external_group_sync = RedisConnectorExternalGroupSync(\n            tenant_id, cc_pair_id, self.redis\n        )\n\n    @staticmethod\n    def get_id_from_fence_key(key: str) -> str | None:\n        \"\"\"\n        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.\n\n        Args:\n            key (str): The fence key string.\n\n        Returns:\n            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.\n        \"\"\"\n        parts = key.split(\"_\")\n        if len(parts) != 3:\n            return None\n\n        object_id = parts[2]\n        return object_id\n\n    @staticmethod\n    def get_id_from_task_id(task_id: str) -> str | None:\n        \"\"\"\n        Extracts the object ID from a task ID string.\n\n        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:\n        - `prefix` is an arbitrary string (e.g., the name of the task or entity),\n        - `objectid` is the ID you want to extract,\n        - `suffix` is another arbitrary string (e.g., a UUID).\n\n        Example:\n            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,\n            this method will return the string `\"1\"`.\n\n        Args:\n            task_id (str): The task ID string from which to extract the object ID.\n\n        Returns:\n            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.\n        \"\"\"\n        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc\n        parts = task_id.split(\"_\")\n        if len(parts) != 3:\n            return None\n\n        object_id = parts[1]\n        return object_id\n\n    def db_lock_key(self, search_settings_id: int) -> str:\n        \"\"\"\n        Key for the db lock for an indexing attempt.\n        Prevents multiple modifications to the current indexing attempt row\n        from multiple docfetching/docprocessing tasks.\n        \"\"\"\n        return f\"da_lock:indexing:db_{self.cc_pair_id}/{search_settings_id}\"\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_delete.py",
    "content": "import time\nfrom datetime import datetime\nfrom typing import cast\nfrom uuid import uuid4\n\nimport redis\nfrom celery import Celery\nfrom pydantic import BaseModel\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DB_YIELD_PER_DEFAULT\nfrom onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.document import construct_document_id_select_for_connector_credential_pair\n\n\nclass RedisConnectorDeletePayload(BaseModel):\n    num_tasks: int | None\n    submitted: datetime\n\n\nclass RedisConnectorDelete:\n    \"\"\"Manages interactions with redis for deletion tasks. Should only be accessed\n    through RedisConnector.\"\"\"\n\n    PREFIX = \"connectordeletion\"\n    FENCE_PREFIX = f\"{PREFIX}_fence\"  # \"connectordeletion_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n    TASKSET_PREFIX = f\"{PREFIX}_taskset\"  # \"connectordeletion_taskset\"\n    TASKSET_TTL = FENCE_TTL\n\n    # used to signal the overall workflow is still active\n    # it's impossible to get the exact state of the system at a single point in time\n    # so we need a signal with a TTL to bridge gaps in our checks\n    ACTIVE_PREFIX = PREFIX + \"_active\"\n    ACTIVE_TTL = 3600\n\n    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:\n        self.tenant_id: str = tenant_id\n        self.id = id\n        self.redis = redis\n\n        self.fence_key: str = f\"{self.FENCE_PREFIX}_{id}\"\n        self.taskset_key = f\"{self.TASKSET_PREFIX}_{id}\"\n\n        self.active_key = f\"{self.ACTIVE_PREFIX}_{id}\"\n\n    def taskset_clear(self) -> None:\n        self.redis.delete(self.taskset_key)\n\n    def get_remaining(self) -> int:\n        # todo: move into fence\n        remaining = cast(int, self.redis.scard(self.taskset_key))\n        return remaining\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    @property\n    def payload(self) -> RedisConnectorDeletePayload | None:\n        # read related data and evaluate/print task progress\n        fence_bytes = cast(bytes, self.redis.get(self.fence_key))\n        if fence_bytes is None:\n            return None\n\n        fence_str = fence_bytes.decode(\"utf-8\")\n        payload = RedisConnectorDeletePayload.model_validate_json(cast(str, fence_str))\n\n        return payload\n\n    def set_fence(self, payload: RedisConnectorDeletePayload | None) -> None:\n        if not payload:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    def set_active(self) -> None:\n        \"\"\"This sets a signal to keep the permissioning flow from getting cleaned up within\n        the expiration time.\n\n        The slack in timing is needed to avoid race conditions where simply checking\n        the celery queue and task status could result in race conditions.\"\"\"\n        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)\n\n    def active(self) -> bool:\n        return bool(self.redis.exists(self.active_key))\n\n    def _generate_task_id(self) -> str:\n        # celery's default task id format is \"dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n        # we prefix the task id so it's easier to keep track of who created the task\n        # aka \"connectordeletion_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n\n        return f\"{self.PREFIX}_{self.id}_{uuid4()}\"\n\n    def generate_tasks(\n        self,\n        celery_app: Celery,\n        db_session: Session,\n        lock: RedisLock,\n    ) -> int | None:\n        \"\"\"Returns None if the cc_pair doesn't exist.\n        Otherwise, returns an int with the number of generated tasks.\"\"\"\n        last_lock_time = time.monotonic()\n\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=int(self.id),\n        )\n        if not cc_pair:\n            return None\n\n        num_tasks_sent = 0\n\n        stmt = construct_document_id_select_for_connector_credential_pair(\n            cc_pair.connector_id, cc_pair.credential_id\n        )\n        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):\n            doc_id = cast(str, doc_id)\n            current_time = time.monotonic()\n            if current_time - last_lock_time >= (\n                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                lock.reacquire()\n                last_lock_time = current_time\n\n            custom_task_id = self._generate_task_id()\n\n            # add to the tracking taskset in redis BEFORE creating the celery task.\n            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id\n            self.redis.sadd(self.taskset_key, custom_task_id)\n            self.redis.expire(self.taskset_key, self.TASKSET_TTL)\n\n            # Priority on sync's triggered by new indexing should be medium\n            celery_app.send_task(\n                OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,\n                kwargs=dict(\n                    document_id=doc_id,\n                    connector_id=cc_pair.connector_id,\n                    credential_id=cc_pair.credential_id,\n                    tenant_id=self.tenant_id,\n                ),\n                queue=OnyxCeleryQueues.CONNECTOR_DELETION,\n                task_id=custom_task_id,\n                priority=OnyxCeleryPriority.MEDIUM,\n                ignore_result=True,\n            )\n\n            num_tasks_sent += 1\n\n        return num_tasks_sent\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.active_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:\n        taskset_key = f\"{RedisConnectorDelete.TASKSET_PREFIX}_{id}\"\n        r.srem(taskset_key, task_id)\n        return\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        \"\"\"Deletes all redis values for all connectors\"\"\"\n        for key in r.scan_iter(RedisConnectorDelete.ACTIVE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorDelete.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_doc_perm_sync.py",
    "content": "import time\nfrom datetime import datetime\nfrom logging import Logger\nfrom typing import Any\nfrom typing import cast\nfrom typing import NamedTuple\n\nimport redis\nfrom pydantic import BaseModel\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.access.models import ElementExternalAccess\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\n\nclass PermissionSyncResult(NamedTuple):\n    \"\"\"Result of a permission sync operation.\n\n    Attributes:\n        num_updated: Number of documents successfully updated\n        num_errors: Number of documents that failed to update\n    \"\"\"\n\n    num_updated: int\n    num_errors: int\n\n\nclass RedisConnectorPermissionSyncPayload(BaseModel):\n    id: str\n    submitted: datetime\n    started: datetime | None\n    celery_task_id: str | None\n\n\nclass RedisConnectorPermissionSync:\n    \"\"\"Manages interactions with redis for doc permission sync tasks. Should only be accessed\n    through RedisConnector.\"\"\"\n\n    PREFIX = \"connectordocpermissionsync\"\n\n    FENCE_PREFIX = f\"{PREFIX}_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n\n    # phase 1 - geneartor task and progress signals\n    GENERATORTASK_PREFIX = f\"{PREFIX}+generator\"  # connectorpermissions+generator\n    GENERATOR_PROGRESS_PREFIX = (\n        PREFIX + \"_generator_progress\"\n    )  # connectorpermissions_generator_progress\n    GENERATOR_COMPLETE_PREFIX = (\n        PREFIX + \"_generator_complete\"\n    )  # connectorpermissions_generator_complete\n\n    TASKSET_PREFIX = f\"{PREFIX}_taskset\"  # connectorpermissions_taskset\n    SUBTASK_PREFIX = f\"{PREFIX}+sub\"  # connectorpermissions+sub\n\n    # used to signal the overall workflow is still active\n    # it's impossible to get the exact state of the system at a single point in time\n    # so we need a signal with a TTL to bridge gaps in our checks\n    ACTIVE_PREFIX = PREFIX + \"_active\"\n    ACTIVE_TTL = CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT * 2\n\n    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:\n        self.tenant_id: str = tenant_id\n        self.id = id\n        self.redis = redis\n\n        self.fence_key: str = f\"{self.FENCE_PREFIX}_{id}\"\n        self.generator_task_key = f\"{self.GENERATORTASK_PREFIX}_{id}\"\n        self.generator_progress_key = f\"{self.GENERATOR_PROGRESS_PREFIX}_{id}\"\n        self.generator_complete_key = f\"{self.GENERATOR_COMPLETE_PREFIX}_{id}\"\n\n        self.taskset_key = f\"{self.TASKSET_PREFIX}_{id}\"\n\n        self.subtask_prefix: str = f\"{self.SUBTASK_PREFIX}_{id}\"\n        self.active_key = f\"{self.ACTIVE_PREFIX}_{id}\"\n\n    def taskset_clear(self) -> None:\n        self.redis.delete(self.taskset_key)\n\n    def generator_clear(self) -> None:\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n\n    def get_remaining(self) -> int:\n        remaining = cast(int, self.redis.scard(self.taskset_key))\n        return remaining\n\n    def get_active_task_count(self) -> int:\n        \"\"\"Count of active permission sync tasks\"\"\"\n        count = 0\n        for _ in self.redis.sscan_iter(\n            OnyxRedisConstants.ACTIVE_FENCES,\n            RedisConnectorPermissionSync.FENCE_PREFIX + \"*\",\n            count=SCAN_ITER_COUNT_DEFAULT,\n        ):\n            count += 1\n        return count\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    @property\n    def payload(self) -> RedisConnectorPermissionSyncPayload | None:\n        # read related data and evaluate/print task progress\n        fence_bytes = cast(Any, self.redis.get(self.fence_key))\n        if fence_bytes is None:\n            return None\n\n        fence_str = fence_bytes.decode(\"utf-8\")\n        payload = RedisConnectorPermissionSyncPayload.model_validate_json(\n            cast(str, fence_str)\n        )\n\n        return payload\n\n    def set_fence(\n        self,\n        payload: RedisConnectorPermissionSyncPayload | None,\n    ) -> None:\n        if not payload:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    def set_active(self) -> None:\n        \"\"\"This sets a signal to keep the permissioning flow from getting cleaned up within\n        the expiration time.\n\n        The slack in timing is needed to avoid race conditions where simply checking\n        the celery queue and task status could result in race conditions.\"\"\"\n        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)\n\n    def active(self) -> bool:\n        return bool(self.redis.exists(self.active_key))\n\n    @property\n    def generator_complete(self) -> int | None:\n        \"\"\"the fence payload is an int representing the starting number of\n        permission sync tasks to be processed ... just after the generator completes.\"\"\"\n        fence_bytes = self.redis.get(self.generator_complete_key)\n        if fence_bytes is None:\n            return None\n\n        if fence_bytes == b\"None\":\n            return None\n\n        fence_int = int(cast(bytes, fence_bytes).decode())\n        return fence_int\n\n    @generator_complete.setter\n    def generator_complete(self, payload: int | None) -> None:\n        \"\"\"Set the payload to an int to set the fence, otherwise if None it will\n        be deleted\"\"\"\n        if payload is None:\n            self.redis.delete(self.generator_complete_key)\n            return\n\n        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)\n\n    def update_db(\n        self,\n        lock: RedisLock | None,\n        new_permissions: list[ElementExternalAccess],\n        source_string: str,\n        connector_id: int,\n        credential_id: int,\n        task_logger: Logger | None = None,\n    ) -> PermissionSyncResult:\n        \"\"\"Update permissions for documents and hierarchy nodes.\n\n        Returns:\n            PermissionSyncResult containing counts of successful updates and errors\n        \"\"\"\n        last_lock_time = time.monotonic()\n\n        element_update_permissions_fn = fetch_versioned_implementation(\n            \"onyx.background.celery.tasks.doc_permission_syncing.tasks\",\n            \"element_update_permissions\",\n        )\n\n        num_permissions = 0\n        num_errors = 0\n        # Create a task for each permission sync\n        for permissions in new_permissions:\n            current_time = time.monotonic()\n            if lock and current_time - last_lock_time >= (\n                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                lock.reacquire()\n                last_lock_time = current_time\n\n            if (\n                permissions.external_access.num_entries\n                > permissions.external_access.MAX_NUM_ENTRIES\n            ):\n                if task_logger:\n                    num_users = len(permissions.external_access.external_user_emails)\n                    num_groups = len(\n                        permissions.external_access.external_user_group_ids\n                    )\n                    element_id = (\n                        permissions.doc_id\n                        if isinstance(permissions, DocExternalAccess)\n                        else permissions.raw_node_id\n                    )\n                    task_logger.warning(\n                        f\"Permissions length exceeded, skipping...: \"\n                        f\"{element_id} \"\n                        f\"{num_users=} {num_groups=} \"\n                        f\"{permissions.external_access.MAX_NUM_ENTRIES=}\"\n                    )\n                continue\n\n            # NOTE(rkuo): this used to fire a task instead of directly writing to the DB,\n            # but the permissions can be excessively large if sent over the wire.\n            # On the other hand, the downside of doing db updates here is that we can\n            # block and fail if we can't make the calls to the DB ... but that's probably\n            # a rare enough case to be acceptable.\n\n            # This can internally exception due to db issues but still continue\n            # Catch exceptions per-element to avoid breaking the entire sync\n            try:\n                element_update_permissions_fn(\n                    self.tenant_id,\n                    permissions,\n                    source_string,\n                    connector_id,\n                    credential_id,\n                )\n\n                num_permissions += 1\n            except Exception:\n                num_errors += 1\n                if task_logger:\n                    element_id = (\n                        permissions.doc_id\n                        if isinstance(permissions, DocExternalAccess)\n                        else permissions.raw_node_id\n                    )\n                    task_logger.exception(\n                        f\"Failed to update permissions for element {element_id}\"\n                    )\n                # Continue processing other elements\n\n        return PermissionSyncResult(num_updated=num_permissions, num_errors=num_errors)\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.active_key)\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:\n        taskset_key = f\"{RedisConnectorPermissionSync.TASKSET_PREFIX}_{id}\"\n        r.srem(taskset_key, task_id)\n        return\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        \"\"\"Deletes all redis values for all connectors\"\"\"\n        for key in r.scan_iter(RedisConnectorPermissionSync.ACTIVE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPermissionSync.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(\n            RedisConnectorPermissionSync.GENERATOR_COMPLETE_PREFIX + \"*\"\n        ):\n            r.delete(key)\n\n        for key in r.scan_iter(\n            RedisConnectorPermissionSync.GENERATOR_PROGRESS_PREFIX + \"*\"\n        ):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPermissionSync.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_ext_group_sync.py",
    "content": "from datetime import datetime\nfrom typing import cast\n\nimport redis\nfrom celery import Celery\nfrom pydantic import BaseModel\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT\n\n\nclass RedisConnectorExternalGroupSyncPayload(BaseModel):\n    id: str\n    submitted: datetime\n    started: datetime | None\n    celery_task_id: str | None\n\n\nclass RedisConnectorExternalGroupSync:\n    \"\"\"Manages interactions with redis for external group syncing tasks. Should only be accessed\n    through RedisConnector.\"\"\"\n\n    PREFIX = \"connectorexternalgroupsync\"\n\n    FENCE_PREFIX = f\"{PREFIX}_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n\n    # phase 1 - geneartor task and progress signals\n    GENERATORTASK_PREFIX = f\"{PREFIX}+generator\"  # connectorexternalgroupsync+generator\n    GENERATOR_PROGRESS_PREFIX = (\n        PREFIX + \"_generator_progress\"\n    )  # connectorexternalgroupsync_generator_progress\n    GENERATOR_COMPLETE_PREFIX = (\n        PREFIX + \"_generator_complete\"\n    )  # connectorexternalgroupsync_generator_complete\n\n    TASKSET_PREFIX = f\"{PREFIX}_taskset\"  # connectorexternalgroupsync_taskset\n    SUBTASK_PREFIX = f\"{PREFIX}+sub\"  # connectorexternalgroupsync+sub\n\n    # used to signal the overall workflow is still active\n    # it's impossible to get the exact state of the system at a single point in time\n    # so we need a signal with a TTL to bridge gaps in our checks\n    ACTIVE_PREFIX = PREFIX + \"_active\"\n    ACTIVE_TTL = 3600\n\n    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:\n        self.tenant_id: str = tenant_id\n        self.id = id\n        self.redis = redis\n\n        self.fence_key: str = f\"{self.FENCE_PREFIX}_{id}\"\n        self.generator_task_key = f\"{self.GENERATORTASK_PREFIX}_{id}\"\n        self.generator_progress_key = f\"{self.GENERATOR_PROGRESS_PREFIX}_{id}\"\n        self.generator_complete_key = f\"{self.GENERATOR_COMPLETE_PREFIX}_{id}\"\n\n        self.taskset_key = f\"{self.TASKSET_PREFIX}_{id}\"\n\n        self.subtask_prefix: str = f\"{self.SUBTASK_PREFIX}_{id}\"\n        self.active_key = f\"{self.ACTIVE_PREFIX}_{id}\"\n\n    def taskset_clear(self) -> None:\n        self.redis.delete(self.taskset_key)\n\n    def generator_clear(self) -> None:\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n\n    def get_remaining(self) -> int:\n        # todo: move into fence\n        remaining = cast(int, self.redis.scard(self.taskset_key))\n        return remaining\n\n    def get_active_task_count(self) -> int:\n        \"\"\"Count of active external group syncing tasks\"\"\"\n        count = 0\n        for _ in self.redis.sscan_iter(\n            OnyxRedisConstants.ACTIVE_FENCES,\n            RedisConnectorExternalGroupSync.FENCE_PREFIX + \"*\",\n            count=SCAN_ITER_COUNT_DEFAULT,\n        ):\n            count += 1\n        return count\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    @property\n    def payload(self) -> RedisConnectorExternalGroupSyncPayload | None:\n        # read related data and evaluate/print task progress\n        fence_raw = self.redis.get(self.fence_key)\n        if fence_raw is None:\n            return None\n\n        fence_bytes = cast(bytes, fence_raw)\n        fence_str = fence_bytes.decode(\"utf-8\")\n        payload = RedisConnectorExternalGroupSyncPayload.model_validate_json(\n            cast(str, fence_str)\n        )\n\n        return payload\n\n    def set_fence(\n        self,\n        payload: RedisConnectorExternalGroupSyncPayload | None,\n    ) -> None:\n        if not payload:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    def set_active(self) -> None:\n        \"\"\"This sets a signal to keep the permissioning flow from getting cleaned up within\n        the expiration time.\n\n        The slack in timing is needed to avoid race conditions where simply checking\n        the celery queue and task status could result in race conditions.\"\"\"\n        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)\n\n    def active(self) -> bool:\n        return bool(self.redis.exists(self.active_key))\n\n    @property\n    def generator_complete(self) -> int | None:\n        \"\"\"the fence payload is an int representing the starting number of\n        external group syncing tasks to be processed ... just after the generator completes.\n        \"\"\"\n        fence_bytes = self.redis.get(self.generator_complete_key)\n        if fence_bytes is None:\n            return None\n\n        if fence_bytes == b\"None\":\n            return None\n\n        fence_int = int(cast(bytes, fence_bytes).decode())\n        return fence_int\n\n    @generator_complete.setter\n    def generator_complete(self, payload: int | None) -> None:\n        \"\"\"Set the payload to an int to set the fence, otherwise if None it will\n        be deleted\"\"\"\n        if payload is None:\n            self.redis.delete(self.generator_complete_key)\n            return\n\n        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)\n\n    def generate_tasks(\n        self,\n        celery_app: Celery,\n        db_session: Session,\n        lock: RedisLock | None,\n    ) -> int | None:\n        pass\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.active_key)\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:\n        taskset_key = f\"{RedisConnectorExternalGroupSync.TASKSET_PREFIX}_{id}\"\n        r.srem(taskset_key, task_id)\n        return\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        \"\"\"Deletes all redis values for all connectors\"\"\"\n        for key in r.scan_iter(RedisConnectorExternalGroupSync.ACTIVE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorExternalGroupSync.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(\n            RedisConnectorExternalGroupSync.GENERATOR_COMPLETE_PREFIX + \"*\"\n        ):\n            r.delete(key)\n\n        for key in r.scan_iter(\n            RedisConnectorExternalGroupSync.GENERATOR_PROGRESS_PREFIX + \"*\"\n        ):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorExternalGroupSync.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_index.py",
    "content": "from datetime import datetime\n\nfrom pydantic import BaseModel\n\n\nclass RedisConnectorIndexPayload(BaseModel):\n    index_attempt_id: int | None\n    started: datetime | None\n    submitted: datetime\n    celery_task_id: str | None\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_prune.py",
    "content": "import time\nfrom datetime import datetime\nfrom typing import cast\nfrom uuid import uuid4\n\nimport redis\nfrom celery import Celery\nfrom pydantic import BaseModel\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT\n\n\nclass RedisConnectorPrunePayload(BaseModel):\n    id: str\n    submitted: datetime\n    started: datetime | None\n    celery_task_id: str | None\n\n\nclass RedisConnectorPrune:\n    \"\"\"Manages interactions with redis for pruning tasks. Should only be accessed\n    through RedisConnector.\"\"\"\n\n    PREFIX = \"connectorpruning\"\n\n    FENCE_PREFIX = f\"{PREFIX}_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n\n    # phase 1 - geneartor task and progress signals\n    GENERATORTASK_PREFIX = f\"{PREFIX}+generator\"  # connectorpruning+generator\n    GENERATOR_PROGRESS_PREFIX = (\n        PREFIX + \"_generator_progress\"\n    )  # connectorpruning_generator_progress\n    GENERATOR_COMPLETE_PREFIX = (\n        PREFIX + \"_generator_complete\"\n    )  # connectorpruning_generator_complete\n\n    TASKSET_PREFIX = f\"{PREFIX}_taskset\"  # connectorpruning_taskset\n    TASKSET_TTL = FENCE_TTL\n    SUBTASK_PREFIX = f\"{PREFIX}+sub\"  # connectorpruning+sub\n\n    # used to signal the overall workflow is still active\n    # it's impossible to get the exact state of the system at a single point in time\n    # so we need a signal with a TTL to bridge gaps in our checks\n    ACTIVE_PREFIX = PREFIX + \"_active\"\n    ACTIVE_TTL = CELERY_PRUNING_LOCK_TIMEOUT * 2\n\n    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:\n        self.tenant_id: str = tenant_id\n        self.id = id\n        self.redis = redis\n\n        self.fence_key: str = f\"{self.FENCE_PREFIX}_{id}\"\n        self.generator_task_key = f\"{self.GENERATORTASK_PREFIX}_{id}\"\n        self.generator_progress_key = f\"{self.GENERATOR_PROGRESS_PREFIX}_{id}\"\n        self.generator_complete_key = f\"{self.GENERATOR_COMPLETE_PREFIX}_{id}\"\n\n        self.taskset_key = f\"{self.TASKSET_PREFIX}_{id}\"\n\n        self.subtask_prefix: str = f\"{self.SUBTASK_PREFIX}_{id}\"\n        self.active_key = f\"{self.ACTIVE_PREFIX}_{id}\"\n\n    def taskset_clear(self) -> None:\n        self.redis.delete(self.taskset_key)\n\n    def generator_clear(self) -> None:\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n\n    def get_remaining(self) -> int:\n        # todo: move into fence\n        remaining = cast(int, self.redis.scard(self.taskset_key))\n        return remaining\n\n    def get_active_task_count(self) -> int:\n        \"\"\"Count of active pruning tasks\"\"\"\n        count = 0\n        for _ in self.redis.sscan_iter(\n            OnyxRedisConstants.ACTIVE_FENCES,\n            RedisConnectorPrune.FENCE_PREFIX + \"*\",\n            count=SCAN_ITER_COUNT_DEFAULT,\n        ):\n            count += 1\n        return count\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    @property\n    def payload(self) -> RedisConnectorPrunePayload | None:\n        # read related data and evaluate/print task progress\n        fence_bytes = cast(bytes, self.redis.get(self.fence_key))\n        if fence_bytes is None:\n            return None\n\n        fence_str = fence_bytes.decode(\"utf-8\")\n        payload = RedisConnectorPrunePayload.model_validate_json(cast(str, fence_str))\n\n        return payload\n\n    def set_fence(\n        self,\n        payload: RedisConnectorPrunePayload | None,\n    ) -> None:\n        if not payload:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload.model_dump_json(), ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    def set_active(self) -> None:\n        \"\"\"This sets a signal to keep the permissioning flow from getting cleaned up within\n        the expiration time.\n\n        The slack in timing is needed to avoid race conditions where simply checking\n        the celery queue and task status could result in race conditions.\"\"\"\n        self.redis.set(self.active_key, 0, ex=self.ACTIVE_TTL)\n\n    def active(self) -> bool:\n        return bool(self.redis.exists(self.active_key))\n\n    @property\n    def generator_complete(self) -> int | None:\n        \"\"\"the fence payload is an int representing the starting number of\n        pruning tasks to be processed ... just after the generator completes.\"\"\"\n        fence_bytes = self.redis.get(self.generator_complete_key)\n        if fence_bytes is None:\n            return None\n\n        fence_int = int(cast(bytes, fence_bytes))\n        return fence_int\n\n    @generator_complete.setter\n    def generator_complete(self, payload: int | None) -> None:\n        \"\"\"Set the payload to an int to set the fence, otherwise if None it will\n        be deleted\"\"\"\n        if payload is None:\n            self.redis.delete(self.generator_complete_key)\n            return\n\n        self.redis.set(self.generator_complete_key, payload, ex=self.FENCE_TTL)\n\n    def generate_tasks(\n        self,\n        documents_to_prune: set[str],\n        celery_app: Celery,\n        db_session: Session,\n        lock: RedisLock | None,\n    ) -> int | None:\n        last_lock_time = time.monotonic()\n\n        async_results = []\n        cc_pair = get_connector_credential_pair_from_id(\n            db_session=db_session,\n            cc_pair_id=int(self.id),\n        )\n        if not cc_pair:\n            return None\n\n        for doc_id in documents_to_prune:\n            current_time = time.monotonic()\n            if lock and current_time - last_lock_time >= (\n                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                lock.reacquire()\n                last_lock_time = current_time\n\n            # celery's default task id format is \"dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # the actual redis key is \"celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # we prefix the task id so it's easier to keep track of who created the task\n            # aka \"documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            custom_task_id = f\"{self.subtask_prefix}_{uuid4()}\"\n\n            # add to the tracking taskset in redis BEFORE creating the celery task.\n            self.redis.sadd(self.taskset_key, custom_task_id)\n            self.redis.expire(self.taskset_key, self.TASKSET_TTL)\n\n            # Priority on sync's triggered by new indexing should be medium\n            result = celery_app.send_task(\n                OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,\n                kwargs=dict(\n                    document_id=doc_id,\n                    connector_id=cc_pair.connector_id,\n                    credential_id=cc_pair.credential_id,\n                    tenant_id=self.tenant_id,\n                ),\n                queue=OnyxCeleryQueues.CONNECTOR_DELETION,\n                task_id=custom_task_id,\n                priority=OnyxCeleryPriority.MEDIUM,\n                ignore_result=True,\n            )\n\n            async_results.append(result)\n\n        return len(async_results)\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.active_key)\n        self.redis.delete(self.generator_progress_key)\n        self.redis.delete(self.generator_complete_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:\n        taskset_key = f\"{RedisConnectorPrune.TASKSET_PREFIX}_{id}\"\n        r.srem(taskset_key, task_id)\n        return\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        \"\"\"Deletes all redis values for all connectors\"\"\"\n        for key in r.scan_iter(RedisConnectorPrune.ACTIVE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPrune.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPrune.GENERATOR_COMPLETE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPrune.GENERATOR_PROGRESS_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_stop.py",
    "content": "import redis\n\n\nclass RedisConnectorStop:\n    \"\"\"Manages interactions with redis for stop signaling. Should only be accessed\n    through RedisConnector.\"\"\"\n\n    PREFIX = \"connectorstop\"\n    FENCE_PREFIX = f\"{PREFIX}_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n\n    # if this timeout is exceeded, the caller may decide to take more\n    # drastic measures\n    TIMEOUT_PREFIX = f\"{PREFIX}_timeout\"\n    TIMEOUT_TTL = 300\n\n    def __init__(self, tenant_id: str, id: int, redis: redis.Redis) -> None:\n        self.tenant_id: str = tenant_id\n        self.id: int = id\n        self.redis = redis\n\n        self.fence_key: str = f\"{self.FENCE_PREFIX}_{id}\"\n        self.timeout_key: str = f\"{self.TIMEOUT_PREFIX}_{id}\"\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    def set_fence(self, value: bool) -> None:\n        if not value:\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, 0, ex=self.FENCE_TTL)\n\n    @property\n    def timed_out(self) -> bool:\n        return not bool(self.redis.exists(self.timeout_key))\n\n    def set_timeout(self) -> None:\n        \"\"\"After calling this, call timed_out to determine if the timeout has been\n        exceeded.\"\"\"\n        self.redis.set(f\"{self.timeout_key}\", 0, ex=self.TIMEOUT_TTL)\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        for key in r.scan_iter(RedisConnectorStop.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisConnectorStop.TIMEOUT_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_connector_utils.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import TaskStatus\nfrom onyx.db.models import TaskQueueState\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.server.documents.models import DeletionAttemptSnapshot\n\n\ndef _get_deletion_status(\n    connector_id: int,\n    credential_id: int,\n    db_session: Session,\n    tenant_id: str,\n) -> TaskQueueState | None:\n    \"\"\"We no longer store TaskQueueState in the DB for a deletion attempt.\n    This function populates TaskQueueState by just checking redis.\n    \"\"\"\n    cc_pair = get_connector_credential_pair(\n        connector_id=connector_id, credential_id=credential_id, db_session=db_session\n    )\n    if not cc_pair:\n        return None\n\n    redis_connector = RedisConnector(tenant_id, cc_pair.id)\n    if redis_connector.delete.fenced:\n        return TaskQueueState(\n            task_id=\"\",\n            task_name=redis_connector.delete.fence_key,\n            status=TaskStatus.STARTED,\n        )\n\n    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n        return TaskQueueState(\n            task_id=\"\",\n            task_name=redis_connector.delete.fence_key,\n            status=TaskStatus.PENDING,\n        )\n\n    return None\n\n\ndef get_deletion_attempt_snapshot(\n    connector_id: int,\n    credential_id: int,\n    db_session: Session,\n    tenant_id: str,\n) -> DeletionAttemptSnapshot | None:\n    deletion_task = _get_deletion_status(\n        connector_id, credential_id, db_session, tenant_id\n    )\n    if not deletion_task:\n        return None\n\n    return DeletionAttemptSnapshot(\n        connector_id=connector_id,\n        credential_id=credential_id,\n        status=deletion_task.status,\n    )\n"
  },
  {
    "path": "backend/onyx/redis/redis_document_set.py",
    "content": "import time\nfrom typing import cast\nfrom uuid import uuid4\n\nimport redis\nfrom celery import Celery\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DB_YIELD_PER_DEFAULT\nfrom onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.db.document_set import construct_document_id_select_by_docset\nfrom onyx.redis.redis_object_helper import RedisObjectHelper\n\n\nclass RedisDocumentSet(RedisObjectHelper):\n    PREFIX = \"documentset\"\n    FENCE_PREFIX = PREFIX + \"_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n    TASKSET_PREFIX = PREFIX + \"_taskset\"\n    TASKSET_TTL = FENCE_TTL\n\n    def __init__(self, tenant_id: str, id: int) -> None:\n        super().__init__(tenant_id, str(id))\n\n    @property\n    def fenced(self) -> bool:\n        return bool(self.redis.exists(self.fence_key))\n\n    def set_fence(self, payload: int | None) -> None:\n        if payload is None:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload, ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    @property\n    def payload(self) -> int | None:\n        bytes = self.redis.get(self.fence_key)\n        if bytes is None:\n            return None\n\n        progress = int(cast(int, bytes))\n        return progress\n\n    def generate_tasks(\n        self,\n        max_tasks: int,  # noqa: ARG002\n        celery_app: Celery,\n        db_session: Session,\n        redis_client: Redis,\n        lock: RedisLock,\n        tenant_id: str,\n    ) -> tuple[int, int] | None:\n        \"\"\"Max tasks is ignored for now until we can build the logic to mark the\n        document set up to date over multiple batches.\n        \"\"\"\n        last_lock_time = time.monotonic()\n\n        num_tasks_sent = 0\n\n        stmt = construct_document_id_select_by_docset(int(self._id), current_only=False)\n        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):\n            doc_id = cast(str, doc_id)\n            current_time = time.monotonic()\n            if current_time - last_lock_time >= (\n                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                lock.reacquire()\n                last_lock_time = current_time\n\n            # celery's default task id format is \"dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # the key for the result is \"celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # we prefix the task id so it's easier to keep track of who created the task\n            # aka \"documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            custom_task_id = f\"{self.task_id_prefix}_{uuid4()}\"\n\n            # add to the set BEFORE creating the task.\n            redis_client.sadd(self.taskset_key, custom_task_id)\n            redis_client.expire(self.taskset_key, self.TASKSET_TTL)\n\n            celery_app.send_task(\n                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,\n                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),\n                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,\n                task_id=custom_task_id,\n                priority=OnyxCeleryPriority.MEDIUM,\n            )\n\n            num_tasks_sent += 1\n\n        return num_tasks_sent, num_tasks_sent\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_hierarchy.py",
    "content": "\"\"\"Redis cache operations for hierarchy node ancestor resolution.\n\nThis module provides a Redis-based cache for hierarchy node parent relationships,\nenabling fast ancestor path resolution without repeated database queries.\n\nThe cache stores node_id -> parent_id mappings for all hierarchy nodes of a given\nsource type. When resolving ancestors for a document, we walk up the tree using\nRedis lookups instead of database queries.\n\nCache Strategy:\n- Nodes are cached per source type with a 6-hour TTL\n- During docfetching, nodes are added to cache as they're upserted to Postgres\n- If the cache is stale (TTL expired during long-running job), one worker does\n  a full refresh from DB while others wait\n- If a node is still not found after refresh, we log an error and fall back to\n  using only the SOURCE-type node as the ancestor\n\"\"\"\n\nfrom typing import cast\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.hierarchy import ensure_source_node_exists as db_ensure_source_node_exists\nfrom onyx.db.hierarchy import get_all_hierarchy_nodes_for_source\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    from onyx.db.models import HierarchyNode as DBHierarchyNode\n\nlogger = setup_logger()\n\n# Cache TTL: 6 hours in seconds\nHIERARCHY_CACHE_TTL_SECONDS = 6 * 60 * 60\n\n# Lock timeout for cache refresh: 5 minutes\nHIERARCHY_CACHE_LOCK_TIMEOUT_SECONDS = 5 * 60\n\n# Lock acquisition timeout: 60 seconds\nHIERARCHY_CACHE_LOCK_ACQUIRE_TIMEOUT_SECONDS = 60\n\nMAX_DEPTH = 1000\n\n\nclass HierarchyNodeCacheEntry(BaseModel):\n    \"\"\"Represents a hierarchy node for caching purposes.\"\"\"\n\n    node_id: int\n    parent_id: int | None\n    node_type: HierarchyNodeType\n    raw_node_id: str\n\n    @classmethod\n    def from_db_model(cls, node: \"DBHierarchyNode\") -> \"HierarchyNodeCacheEntry\":\n        \"\"\"Create a cache entry from a SQLAlchemy HierarchyNode model.\"\"\"\n        return cls(\n            node_id=node.id,\n            parent_id=node.parent_id,\n            node_type=node.node_type,\n            raw_node_id=node.raw_node_id,\n        )\n\n\ndef _cache_key(source: DocumentSource) -> str:\n    \"\"\"Get the Redis hash key for hierarchy node cache of a given source.\n\n    This hash stores: node_id -> \"parent_id:node_type\"\n    \"\"\"\n    return f\"hierarchy_cache:{source.value}\"\n\n\ndef _raw_id_cache_key(source: DocumentSource) -> str:\n    \"\"\"Get the Redis hash key for raw_node_id -> node_id mapping.\n\n    This hash stores: raw_node_id -> node_id\n    \"\"\"\n    return f\"hierarchy_cache_rawid:{source.value}\"\n\n\ndef _source_node_key(source: DocumentSource) -> str:\n    \"\"\"Get the Redis key for the SOURCE-type node ID of a given source.\n\n    This is a simple string key storing the database ID of the SOURCE node.\n    \"\"\"\n    return f\"hierarchy_source_node:{source.value}\"\n\n\ndef _loading_lock_key(source: DocumentSource) -> str:\n    \"\"\"Get the Redis lock key for cache loading of a given source.\"\"\"\n    return f\"hierarchy_cache_loading:{source.value}\"\n\n\ndef _construct_parent_value(parent_id: int | None, node_type: HierarchyNodeType) -> str:\n    \"\"\"Construct the cached value string from parent_id and node_type.\n\n    Format: \"parent_id:node_type\" where parent_id is empty string if None.\n    \"\"\"\n    parent_str = str(parent_id) if parent_id is not None else \"\"\n    return f\"{parent_str}:{node_type.value}\"\n\n\ndef _unpack_parent_value(value: str) -> tuple[int | None, HierarchyNodeType | None]:\n    \"\"\"Unpack a cached value string back into (parent_id, node_type).\n\n    Returns None for invalid values.\n    \"\"\"\n    parts = value.split(\":\", 1)\n    parent_str = parts[0]\n    node_type_str = parts[1] if len(parts) > 1 else \"\"\n    parent_id = int(parent_str) if parent_str else None\n\n    node_type = HierarchyNodeType(node_type_str) if node_type_str else None\n\n    return parent_id, node_type\n\n\ndef cache_hierarchy_node(\n    redis_client: Redis,\n    source: DocumentSource,\n    entry: HierarchyNodeCacheEntry,\n) -> None:\n    \"\"\"\n    Add or update a single hierarchy node in the Redis cache.\n\n    Called during docfetching when nodes are upserted to Postgres.\n    Stores the parent chain mapping, raw_id -> node_id mapping, and\n    SOURCE node ID (if this is a SOURCE-type node).\n\n    Args:\n        redis_client: Redis client with tenant prefixing\n        source: The document source (e.g., CONFLUENCE, GOOGLE_DRIVE)\n        entry: The hierarchy node cache entry\n    \"\"\"\n    cache_key = _cache_key(source)\n    raw_id_key = _raw_id_cache_key(source)\n\n    # Store parent chain: node_id -> \"parent_id:node_type\"\n    value = _construct_parent_value(entry.parent_id, entry.node_type)\n    redis_client.hset(cache_key, str(entry.node_id), value)\n\n    # Store raw_id -> node_id mapping\n    redis_client.hset(raw_id_key, entry.raw_node_id, str(entry.node_id))\n\n    # If this is the SOURCE node, store its ID in the dedicated key\n    if entry.node_type == HierarchyNodeType.SOURCE:\n        source_node_key = _source_node_key(source)\n        redis_client.set(source_node_key, str(entry.node_id))\n        redis_client.expire(source_node_key, HIERARCHY_CACHE_TTL_SECONDS)\n\n    # Refresh TTL on every write (ensures cache stays alive during long indexing)\n    redis_client.expire(cache_key, HIERARCHY_CACHE_TTL_SECONDS)\n    redis_client.expire(raw_id_key, HIERARCHY_CACHE_TTL_SECONDS)\n\n\ndef cache_hierarchy_nodes_batch(\n    redis_client: Redis,\n    source: DocumentSource,\n    entries: list[HierarchyNodeCacheEntry],\n) -> None:\n    \"\"\"\n    Add or update multiple hierarchy nodes in the Redis cache.\n\n    Args:\n        redis_client: Redis client with tenant prefixing\n        source: The document source\n        entries: List of HierarchyNodeCacheEntry objects\n    \"\"\"\n    if not entries:\n        return\n\n    cache_key = _cache_key(source)\n    raw_id_key = _raw_id_cache_key(source)\n    source_node_key = _source_node_key(source)\n\n    # Build mappings for batch insert\n    parent_mapping: dict[str, str] = {}\n    raw_id_mapping: dict[str, str] = {}\n    source_node_id: int | None = None\n\n    for entry in entries:\n        parent_mapping[str(entry.node_id)] = _construct_parent_value(\n            entry.parent_id, entry.node_type\n        )\n        raw_id_mapping[entry.raw_node_id] = str(entry.node_id)\n\n        # Track the SOURCE node if we encounter it\n        if entry.node_type == HierarchyNodeType.SOURCE:\n            source_node_id = entry.node_id\n\n    # Use hset with mapping for batch insert\n    redis_client.hset(cache_key, mapping=parent_mapping)\n    redis_client.hset(raw_id_key, mapping=raw_id_mapping)\n\n    # Cache the SOURCE node ID if found\n    if source_node_id is not None:\n        redis_client.set(source_node_key, str(source_node_id))\n        redis_client.expire(source_node_key, HIERARCHY_CACHE_TTL_SECONDS)\n\n    redis_client.expire(cache_key, HIERARCHY_CACHE_TTL_SECONDS)\n    redis_client.expire(raw_id_key, HIERARCHY_CACHE_TTL_SECONDS)\n\n\ndef evict_hierarchy_nodes_from_cache(\n    redis_client: Redis,\n    source: DocumentSource,\n    raw_node_ids: list[str],\n) -> None:\n    \"\"\"Remove specific hierarchy nodes from the Redis cache.\n\n    Deletes entries from both the parent-chain hash and the raw_id→node_id hash.\n    \"\"\"\n    if not raw_node_ids:\n        return\n\n    cache_key = _cache_key(source)\n    raw_id_key = _raw_id_cache_key(source)\n\n    # Look up node_ids so we can remove them from the parent-chain hash\n    raw_values = cast(list[str | None], redis_client.hmget(raw_id_key, raw_node_ids))\n    node_id_strs = [v for v in raw_values if v is not None]\n\n    if node_id_strs:\n        redis_client.hdel(cache_key, *node_id_strs)\n    redis_client.hdel(raw_id_key, *raw_node_ids)\n\n\ndef get_node_id_from_raw_id(\n    redis_client: Redis,\n    source: DocumentSource,\n    raw_node_id: str,\n) -> tuple[int | None, bool]:\n    \"\"\"\n    Get the database node_id for a raw_node_id from the cache.\n\n    Returns:\n        Tuple of (node_id or None, found_in_cache)\n        - If found_in_cache is False, the raw_id doesn't exist in cache\n        - If found_in_cache is True, node_id is the database ID\n    \"\"\"\n    raw_id_key = _raw_id_cache_key(source)\n    value = redis_client.hget(raw_id_key, raw_node_id)\n\n    if value is None:\n        return None, False\n\n    # Decode bytes if needed\n    value_str: str\n    if isinstance(value, bytes):\n        value_str = value.decode(\"utf-8\")\n    else:\n        value_str = str(value)\n\n    return int(value_str), True\n\n\ndef get_parent_id_from_cache(\n    redis_client: Redis,\n    source: DocumentSource,\n    node_id: int,\n) -> tuple[int | None, bool]:\n    \"\"\"\n    Get the parent_id for a node from the cache.\n\n    Returns:\n        Tuple of (parent_id or None, found_in_cache)\n        - If found_in_cache is False, the node doesn't exist in cache\n        - If found_in_cache is True, parent_id is the actual parent (or None for root)\n    \"\"\"\n    cache_key = _cache_key(source)\n    value = redis_client.hget(cache_key, str(node_id))\n\n    if value is None:\n        return None, False\n\n    # Decode bytes if needed\n    value_str: str\n    if isinstance(value, bytes):\n        value_str = value.decode(\"utf-8\")\n    else:\n        value_str = str(value)\n\n    parent_id, _ = _unpack_parent_value(value_str)\n    return parent_id, True\n\n\ndef is_cache_populated(redis_client: Redis, source: DocumentSource) -> bool:\n    \"\"\"Check if the cache has any entries for this source.\"\"\"\n    cache_key = _cache_key(source)\n    # redis.exists returns int (number of keys that exist)\n    exists_result: int = redis_client.exists(cache_key)  # type: ignore[assignment]\n    return exists_result > 0\n\n\ndef refresh_hierarchy_cache_from_db(\n    redis_client: Redis,\n    db_session: Session,\n    source: DocumentSource,\n) -> None:\n    \"\"\"\n    Refresh the entire hierarchy cache for a source from the database.\n\n    This function acquires a distributed lock to ensure only one worker\n    performs the refresh. Other workers will wait for the refresh to complete.\n\n    Args:\n        redis_client: Redis client with tenant prefixing\n        db_session: SQLAlchemy session for database access\n        source: The document source to refresh\n    \"\"\"\n\n    lock_key = _loading_lock_key(source)\n\n    # Try to acquire lock - if we can't get it, someone else is refreshing\n    lock: RedisLock = redis_client.lock(\n        lock_key,\n        timeout=HIERARCHY_CACHE_LOCK_TIMEOUT_SECONDS,\n        blocking=True,\n        blocking_timeout=HIERARCHY_CACHE_LOCK_ACQUIRE_TIMEOUT_SECONDS,\n    )\n\n    acquired = lock.acquire(blocking=True)\n    if not acquired:\n        logger.warning(\n            f\"Could not acquire lock for hierarchy cache refresh for source {source.value} - another worker may be refreshing\"\n        )\n        return\n\n    try:\n        # Always refresh from DB when called - new nodes may have been added\n        # since the cache was last populated. The lock ensures only one worker\n        # does the refresh at a time.\n        logger.info(f\"Refreshing hierarchy cache for source {source.value} from DB\")\n\n        # Load all nodes for this source from DB\n        nodes = get_all_hierarchy_nodes_for_source(db_session, source)\n\n        if not nodes:\n            logger.warning(f\"No hierarchy nodes found in DB for source {source.value}\")\n            return\n\n        # Batch insert into cache\n        cache_entries = [HierarchyNodeCacheEntry.from_db_model(node) for node in nodes]\n        cache_hierarchy_nodes_batch(redis_client, source, cache_entries)\n\n        logger.info(\n            f\"Refreshed hierarchy cache for {source.value} with {len(nodes)} nodes\"\n        )\n\n    finally:\n        try:\n            lock.release()\n        except Exception as e:\n            logger.warning(f\"Error releasing hierarchy cache lock: {e}\")\n\n\ndef _walk_ancestor_chain(\n    redis_client: Redis,\n    source: DocumentSource,\n    start_node_id: int,\n    db_session: Session,\n) -> list[int]:\n    \"\"\"\n    Walk up the hierarchy tree from a node, collecting all ancestor IDs.\n\n    Internal helper used by both get_ancestors_from_node_id and\n    get_ancestors_from_raw_id.\n    \"\"\"\n    ancestors: list[int] = []\n    current_id: int | None = start_node_id\n    visited: set[int] = set()\n\n    while current_id is not None and len(ancestors) < MAX_DEPTH:\n        if current_id in visited:\n            logger.error(\n                f\"Cycle detected in hierarchy for source {source.value} at node {current_id}. Ancestors so far: {ancestors}\"\n            )\n            break\n\n        visited.add(current_id)\n        ancestors.append(current_id)\n\n        parent_id, found = get_parent_id_from_cache(redis_client, source, current_id)\n\n        if not found:\n            logger.debug(\n                f\"Cache miss for hierarchy node {current_id} of source {source.value}, attempting refresh\"\n            )\n            refresh_hierarchy_cache_from_db(redis_client, db_session, source)\n            parent_id, found = get_parent_id_from_cache(\n                redis_client, source, current_id\n            )\n\n            if not found:\n                logger.error(\n                    f\"Hierarchy node {current_id} not found in cache for source {source.value} even after refresh.\"\n                )\n                break\n\n        current_id = parent_id\n\n    if len(ancestors) >= MAX_DEPTH:\n        logger.error(\n            f\"Hit max depth {MAX_DEPTH} traversing hierarchy for source \"\n            f\"{source.value}. Possible infinite loop or very deep hierarchy.\"\n        )\n\n    return ancestors\n\n\ndef get_ancestors_from_raw_id(\n    redis_client: Redis,\n    source: DocumentSource,\n    parent_hierarchy_raw_node_id: str | None,\n    db_session: Session,\n) -> list[int]:\n    \"\"\"\n    Get all ancestor hierarchy node IDs from a raw_node_id.\n\n    This is the main entry point for getting ancestors from a document's\n    parent_hierarchy_raw_node_id. It resolves the raw_id to a database ID\n    via Redis cache, then walks up the tree.\n\n    No DB calls are made unless the cache is stale.\n\n    Args:\n        redis_client: Redis client with tenant prefixing\n        source: The document source\n        parent_hierarchy_raw_node_id: The document's parent raw node ID (from connector)\n        db_session: DB session for cache refresh if needed\n\n    Returns:\n        List of ancestor hierarchy node IDs from parent to root (inclusive).\n        Returns list with just SOURCE node ID if parent is None or not found.\n    \"\"\"\n    # If no parent specified, return just the SOURCE node\n    if parent_hierarchy_raw_node_id is None:\n        source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)\n        return [source_node_id] if source_node_id else []\n\n    # Resolve raw_id to node_id via Redis\n    node_id, found = get_node_id_from_raw_id(\n        redis_client, source, parent_hierarchy_raw_node_id\n    )\n\n    if not found:\n        # Cache miss - try refresh\n        logger.debug(\n            f\"Cache miss for raw_node_id '{parent_hierarchy_raw_node_id}' of source {source.value}, attempting refresh\"\n        )\n        refresh_hierarchy_cache_from_db(redis_client, db_session, source)\n        node_id, found = get_node_id_from_raw_id(\n            redis_client, source, parent_hierarchy_raw_node_id\n        )\n\n    if not found or node_id is None:\n        logger.error(\n            f\"Raw node ID '{parent_hierarchy_raw_node_id}' not found in cache \"\n            f\"for source {source.value}. Falling back to SOURCE node only.\"\n        )\n        source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)\n        return [source_node_id] if source_node_id else []\n\n    # Walk up the ancestor chain\n    return _walk_ancestor_chain(redis_client, source, node_id, db_session)\n\n\ndef get_source_node_id_from_cache(\n    redis_client: Redis,\n    db_session: Session,\n    source: DocumentSource,\n) -> int | None:\n    \"\"\"\n    Get the SOURCE-type node ID for a given source from cache.\n\n    If not in cache and db_session is provided, refreshes from DB.\n\n    Returns:\n        The ID of the SOURCE node, or None if not found.\n    \"\"\"\n    source_node_key = _source_node_key(source)\n\n    # Try to get from dedicated SOURCE node key\n    value = redis_client.get(source_node_key)\n    if value is not None:\n        if isinstance(value, bytes):\n            value = value.decode(\"utf-8\")\n        if not isinstance(value, str):\n            raise ValueError(f\"SOURCE node value is not a string: {value}\")\n        return int(value)\n\n    # Not in cache - try refresh from DB\n    refresh_hierarchy_cache_from_db(redis_client, db_session, source)\n\n    # Try again after refresh\n    value = redis_client.get(source_node_key)\n    if value is not None:\n        if isinstance(value, bytes):\n            value = value.decode(\"utf-8\")\n        if not isinstance(value, str):\n            raise ValueError(f\"SOURCE node value is not a string: {value}\")\n        return int(value)\n\n    logger.error(f\"SOURCE node not found for source {source.value}\")\n    return None\n\n\ndef clear_hierarchy_cache(redis_client: Redis, source: DocumentSource) -> None:\n    \"\"\"Clear the hierarchy cache for a source (useful for testing).\"\"\"\n    cache_key = _cache_key(source)\n    raw_id_key = _raw_id_cache_key(source)\n    source_node_key = _source_node_key(source)\n    redis_client.delete(cache_key)\n    redis_client.delete(raw_id_key)\n    redis_client.delete(source_node_key)\n\n\ndef ensure_source_node_exists(\n    redis_client: Redis,\n    db_session: Session,\n    source: DocumentSource,\n) -> int:\n    \"\"\"\n    Ensure that a SOURCE-type hierarchy node exists for the given source and cache it.\n\n    This is the primary entry point for ensuring hierarchy infrastructure is set up\n    for a source before processing documents. It should be called early in the\n    indexing pipeline (e.g., at the start of docfetching or hierarchy fetching).\n\n    The function:\n    1. Checks Redis cache for existing SOURCE node ID\n    2. If not cached, ensures the SOURCE node exists in the database\n    3. Caches the SOURCE node in Redis for fast subsequent lookups\n\n    This is idempotent and safe to call multiple times concurrently.\n\n    Args:\n        redis_client: Redis client with tenant prefixing\n        db_session: SQLAlchemy session for database operations\n        source: The document source type (e.g., GOOGLE_DRIVE, CONFLUENCE)\n\n    Returns:\n        The database ID of the SOURCE-type hierarchy node\n    \"\"\"\n    # First check if we already have it cached\n    source_node_key = _source_node_key(source)\n    cached_value = redis_client.get(source_node_key)\n\n    if cached_value is not None:\n        value_str: str\n        if isinstance(cached_value, bytes):\n            value_str = cached_value.decode(\"utf-8\")\n        else:\n            value_str = str(cached_value)\n        return int(value_str)\n\n    # Not cached - ensure it exists in DB and cache it\n    source_node = db_ensure_source_node_exists(db_session, source, commit=True)\n\n    # Cache the SOURCE node\n    cache_entry = HierarchyNodeCacheEntry.from_db_model(source_node)\n    cache_hierarchy_node(redis_client, source, cache_entry)\n\n    logger.info(\n        f\"Ensured SOURCE node exists and cached for {source.value}: id={source_node.id}\"\n    )\n\n    return source_node.id\n"
  },
  {
    "path": "backend/onyx/redis/redis_object_helper.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\n\nfrom celery import Celery\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.redis.redis_pool import get_redis_client\n\n\nclass RedisObjectHelper(ABC):\n    PREFIX = \"base\"\n    FENCE_PREFIX = PREFIX + \"_fence\"\n    TASKSET_PREFIX = PREFIX + \"_taskset\"\n\n    def __init__(self, tenant_id: str, id: str):\n        self._tenant_id: str = tenant_id\n        self._id: str = id\n        self.redis = get_redis_client(tenant_id=tenant_id)\n\n    @property\n    def task_id_prefix(self) -> str:\n        return f\"{self.PREFIX}_{self._id}\"\n\n    @property\n    def fence_key(self) -> str:\n        # example: documentset_fence_1\n        return f\"{self.FENCE_PREFIX}_{self._id}\"\n\n    @property\n    def taskset_key(self) -> str:\n        # example: documentset_taskset_1\n        return f\"{self.TASKSET_PREFIX}_{self._id}\"\n\n    @staticmethod\n    def get_id_from_fence_key(key: str) -> str | None:\n        \"\"\"\n        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.\n\n        Args:\n            key (str): The fence key string.\n\n        Returns:\n            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.\n        \"\"\"\n        parts = key.split(\"_\")\n        if len(parts) != 3:\n            return None\n\n        object_id = parts[2]\n        return object_id\n\n    @staticmethod\n    def get_id_from_task_id(task_id: str) -> str | None:\n        \"\"\"\n        Extracts the object ID from a task ID string.\n\n        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:\n        - `prefix` is an arbitrary string (e.g., the name of the task or entity),\n        - `objectid` is the ID you want to extract,\n        - `suffix` is another arbitrary string (e.g., a UUID).\n\n        Example:\n            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,\n            this method will return the string `\"1\"`.\n\n        Args:\n            task_id (str): The task ID string from which to extract the object ID.\n\n        Returns:\n            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.\n        \"\"\"\n        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc\n        parts = task_id.split(\"_\")\n        if len(parts) != 3:\n            return None\n\n        object_id = parts[1]\n        return object_id\n\n    @abstractmethod\n    def generate_tasks(\n        self,\n        max_tasks: int,\n        celery_app: Celery,\n        db_session: Session,\n        redis_client: Redis,\n        lock: RedisLock,\n        tenant_id: str,\n    ) -> tuple[int, int] | None:\n        \"\"\"First element should be the number of actual tasks generated, second should\n        be the number of docs that were candidates to be synced for the cc pair.\n\n        The need for this is when we are syncing stale docs referenced by multiple\n        connectors. In a single pass across multiple cc pairs, we only want a task\n        for be created for a particular document id the first time we see it.\n        The rest can be skipped.\"\"\"\n"
  },
  {
    "path": "backend/onyx/redis/redis_pool.py",
    "content": "import asyncio\nimport functools\nimport json\nimport ssl\nimport threading\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import cast\nfrom typing import Optional\n\nimport redis\nfrom fastapi import Request\nfrom redis import asyncio as aioredis\nfrom redis.client import Redis\nfrom redis.lock import Lock as RedisLock\n\nfrom onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER\nfrom onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PASSWORD\nfrom onyx.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.configs.app_configs import REDIS_REPLICA_HOST\nfrom onyx.configs.app_configs import REDIS_SSL\nfrom onyx.configs.app_configs import REDIS_SSL_CA_CERTS\nfrom onyx.configs.app_configs import REDIS_SSL_CERT_REQS\nfrom onyx.configs.app_configs import USE_REDIS_IAM_AUTH\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS\nfrom onyx.redis.iam_auth import configure_redis_iam_auth\nfrom onyx.redis.iam_auth import create_redis_ssl_context_if_iam\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import DEFAULT_REDIS_PREFIX\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nSCAN_ITER_COUNT_DEFAULT = 4096\n\n\nclass TenantRedis(redis.Redis):\n    def __init__(self, tenant_id: str, *args: Any, **kwargs: Any) -> None:\n        super().__init__(*args, **kwargs)\n        self.tenant_id: str = tenant_id\n\n    def _prefixed(self, key: str | bytes | memoryview) -> str | bytes | memoryview:\n        prefix: str = f\"{self.tenant_id}:\"\n        if isinstance(key, str):\n            if key.startswith(prefix):\n                return key\n            else:\n                return prefix + key\n        elif isinstance(key, bytes):\n            prefix_bytes = prefix.encode()\n            if key.startswith(prefix_bytes):\n                return key\n            else:\n                return prefix_bytes + key\n        elif isinstance(key, memoryview):\n            key_bytes = key.tobytes()\n            prefix_bytes = prefix.encode()\n            if key_bytes.startswith(prefix_bytes):\n                return key\n            else:\n                return memoryview(prefix_bytes + key_bytes)\n        else:\n            raise TypeError(f\"Unsupported key type: {type(key)}\")\n\n    def _prefix_method(self, method: Callable) -> Callable:\n        @functools.wraps(method)\n        def wrapper(*args: Any, **kwargs: Any) -> Any:\n            if \"name\" in kwargs:\n                kwargs[\"name\"] = self._prefixed(kwargs[\"name\"])\n            elif len(args) > 0:\n                args = (self._prefixed(args[0]),) + args[1:]\n            return method(*args, **kwargs)\n\n        return wrapper\n\n    def _prefix_scan_iter(self, method: Callable) -> Callable:\n        @functools.wraps(method)\n        def wrapper(*args: Any, **kwargs: Any) -> Any:\n            # Prefix the match pattern if provided\n            if \"match\" in kwargs:\n                kwargs[\"match\"] = self._prefixed(kwargs[\"match\"])\n            elif len(args) > 0:\n                args = (self._prefixed(args[0]),) + args[1:]\n\n            # Get the iterator\n            iterator = method(*args, **kwargs)\n\n            # Remove prefix from returned keys\n            prefix = f\"{self.tenant_id}:\".encode()\n            prefix_len = len(prefix)\n\n            for key in iterator:\n                if isinstance(key, bytes) and key.startswith(prefix):\n                    yield key[prefix_len:]\n                else:\n                    yield key\n\n        return wrapper\n\n    def __getattribute__(self, item: str) -> Any:\n        original_attr = super().__getattribute__(item)\n        methods_to_wrap = [\n            \"lock\",\n            \"unlock\",\n            \"get\",\n            \"set\",\n            \"setex\",\n            \"delete\",\n            \"exists\",\n            \"incrby\",\n            \"hset\",\n            \"hget\",\n            \"getset\",\n            \"owned\",\n            \"reacquire\",\n            \"create_lock\",\n            \"startswith\",\n            \"smembers\",\n            \"sismember\",\n            \"sadd\",\n            \"srem\",\n            \"scard\",\n            \"hexists\",\n            \"hset\",\n            \"hdel\",\n            \"ttl\",\n            \"pttl\",\n        ]  # Regular methods that need simple prefixing\n\n        if item == \"scan_iter\" or item == \"sscan_iter\":\n            return self._prefix_scan_iter(original_attr)\n        elif item in methods_to_wrap and callable(original_attr):\n            return self._prefix_method(original_attr)\n        return original_attr\n\n\nclass RedisPool:\n    _instance: Optional[\"RedisPool\"] = None\n    _lock: threading.Lock = threading.Lock()\n    _pool: redis.BlockingConnectionPool\n    _replica_pool: redis.BlockingConnectionPool\n\n    def __new__(cls) -> \"RedisPool\":\n        if not cls._instance:\n            with cls._lock:\n                if not cls._instance:\n                    cls._instance = super(RedisPool, cls).__new__(cls)\n                    cls._instance._init_pools()\n        return cls._instance\n\n    def _init_pools(self) -> None:\n        self._pool = RedisPool.create_pool(ssl=REDIS_SSL)\n        self._replica_pool = RedisPool.create_pool(\n            host=REDIS_REPLICA_HOST, ssl=REDIS_SSL\n        )\n\n    def get_client(self, tenant_id: str) -> Redis:\n        return TenantRedis(tenant_id, connection_pool=self._pool)\n\n    def get_replica_client(self, tenant_id: str) -> Redis:\n        return TenantRedis(tenant_id, connection_pool=self._replica_pool)\n\n    def get_raw_client(self) -> Redis:\n        \"\"\"\n        Returns a Redis client with direct access to the primary connection pool,\n        without tenant prefixing.\n        \"\"\"\n        return redis.Redis(connection_pool=self._pool)\n\n    def get_raw_replica_client(self) -> Redis:\n        \"\"\"\n        Returns a Redis client with direct access to the replica connection pool,\n        without tenant prefixing.\n        \"\"\"\n        return redis.Redis(connection_pool=self._replica_pool)\n\n    @staticmethod\n    def create_pool(\n        host: str = REDIS_HOST,\n        port: int = REDIS_PORT,\n        db: int = REDIS_DB_NUMBER,\n        password: str = REDIS_PASSWORD,\n        max_connections: int = REDIS_POOL_MAX_CONNECTIONS,\n        ssl_ca_certs: str | None = REDIS_SSL_CA_CERTS,\n        ssl_cert_reqs: str = REDIS_SSL_CERT_REQS,\n        ssl: bool = False,\n    ) -> redis.BlockingConnectionPool:\n        \"\"\"\n        Create a Redis connection pool with appropriate SSL configuration.\n        SSL Configuration Priority:\n        1. IAM Authentication (USE_REDIS_IAM_AUTH=true): Uses system CA certificates\n        2. Regular SSL (REDIS_SSL=true): Uses custom SSL configuration\n        3. No SSL: Standard connection without encryption\n        Note: IAM authentication automatically enables SSL and takes precedence\n        over regular SSL configuration to ensure proper security.\n\n        We use BlockingConnectionPool because it will block and wait for a connection\n        rather than error if max_connections is reached. This is far more deterministic\n        behavior and aligned with how we want to use Redis.\"\"\"\n\n        # Using ConnectionPool is not well documented.\n        # Useful examples: https://github.com/redis/redis-py/issues/780\n\n        # Handle IAM authentication\n        if USE_REDIS_IAM_AUTH:\n            # For IAM authentication, we don't use password\n            # and ensure SSL is enabled with proper context\n            ssl_context = create_redis_ssl_context_if_iam()\n            return redis.BlockingConnectionPool(\n                host=host,\n                port=port,\n                db=db,\n                password=None,  # No password with IAM auth\n                max_connections=max_connections,\n                timeout=None,\n                health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,\n                socket_keepalive=True,\n                socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,\n                connection_class=redis.SSLConnection,\n                ssl_context=ssl_context,  # Use IAM auth SSL context\n            )\n\n        if ssl:\n            return redis.BlockingConnectionPool(\n                host=host,\n                port=port,\n                db=db,\n                password=password,\n                max_connections=max_connections,\n                timeout=None,\n                health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,\n                socket_keepalive=True,\n                socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,\n                connection_class=redis.SSLConnection,\n                ssl_ca_certs=ssl_ca_certs,\n                ssl_cert_reqs=ssl_cert_reqs,\n            )\n\n        return redis.BlockingConnectionPool(\n            host=host,\n            port=port,\n            db=db,\n            password=password,\n            max_connections=max_connections,\n            timeout=None,\n            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,\n            socket_keepalive=True,\n            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,\n        )\n\n\nredis_pool = RedisPool()\n\n\n# # Usage example\n# redis_pool = RedisPool()\n# redis_client = redis_pool.get_client()\n\n# # Example of setting and getting a value\n# redis_client.set('key', 'value')\n# value = redis_client.get('key')\n# print(value.decode())  # Output: 'value'\n\n\ndef get_redis_client(\n    *,\n    #  This argument will be deprecated in the future\n    tenant_id: str | None = None,\n) -> Redis:\n    \"\"\"\n    Returns a Redis client with tenant-specific key prefixing.\n\n    This ensures proper data isolation between tenants by automatically\n    prefixing all Redis keys with the tenant ID.\n\n    Use this when working with tenant-specific data that should be\n    isolated from other tenants.\n    \"\"\"\n    if tenant_id is None:\n        tenant_id = get_current_tenant_id()\n\n    return redis_pool.get_client(tenant_id)\n\n\ndef get_redis_replica_client(\n    *,\n    # this argument will be deprecated in the future\n    tenant_id: str | None = None,\n) -> Redis:\n    \"\"\"\n    Returns a Redis replica client with tenant-specific key prefixing.\n\n    Similar to get_redis_client(), but connects to a read replica when available.\n    This ensures proper data isolation between tenants by automatically\n    prefixing all Redis keys with the tenant ID.\n\n    Use this for read-heavy operations on tenant-specific data.\n    \"\"\"\n    if tenant_id is None:\n        tenant_id = get_current_tenant_id()\n\n    return redis_pool.get_replica_client(tenant_id)\n\n\ndef get_shared_redis_client() -> Redis:\n    \"\"\"\n    Returns a Redis client with a shared namespace prefix.\n\n    Unlike tenant-specific clients, this uses a common prefix for all keys,\n    creating a shared namespace accessible across all tenants.\n\n    Use this for data that should be shared across the application and\n    isn't specific to any individual tenant.\n    \"\"\"\n    return redis_pool.get_client(DEFAULT_REDIS_PREFIX)\n\n\ndef get_shared_redis_replica_client() -> Redis:\n    \"\"\"\n    Returns a Redis replica client with a shared namespace prefix.\n\n    Similar to get_shared_redis_client(), but connects to a read replica when available.\n    Uses a common prefix for all keys, creating a shared namespace.\n\n    Use this for read-heavy operations on data that should be shared\n    across the application.\n    \"\"\"\n    return redis_pool.get_replica_client(DEFAULT_REDIS_PREFIX)\n\n\ndef get_raw_redis_client() -> Redis:\n    \"\"\"\n    Returns a Redis client that doesn't apply tenant prefixing to keys.\n\n    Use this only when you need to access Redis directly without tenant isolation\n    or any key prefixing. Typically needed for integrating with external systems\n    or libraries that have inflexible key requirements.\n\n    Warning: Be careful with this client as it bypasses tenant isolation.\n    \"\"\"\n    return redis_pool.get_raw_client()\n\n\ndef get_raw_redis_replica_client() -> Redis:\n    \"\"\"\n    Returns a Redis replica client that doesn't apply tenant prefixing to keys.\n\n    Similar to get_raw_redis_client(), but connects to a read replica when available.\n    Use this for read-heavy operations that need direct Redis access without\n    tenant isolation or key prefixing.\n\n    Warning: Be careful with this client as it bypasses tenant isolation.\n    \"\"\"\n    return redis_pool.get_raw_replica_client()\n\n\nSSL_CERT_REQS_MAP = {\n    \"none\": ssl.CERT_NONE,\n    \"optional\": ssl.CERT_OPTIONAL,\n    \"required\": ssl.CERT_REQUIRED,\n}\n\n\n_async_redis_connection: aioredis.Redis | None = None\n_async_lock = asyncio.Lock()\n\n\nasync def get_async_redis_connection() -> aioredis.Redis:\n    \"\"\"\n    Provides a shared async Redis connection, using the same configs (host, port, SSL, etc.).\n    Ensures that the connection is created only once (lazily) and reused for all future calls.\n    \"\"\"\n    global _async_redis_connection\n\n    # If we haven't yet created an async Redis connection, we need to create one\n    if _async_redis_connection is None:\n        # Acquire the lock to ensure that only one coroutine attempts to create the connection\n        async with _async_lock:\n            # Double-check inside the lock to avoid race conditions\n            if _async_redis_connection is None:\n                # Load env vars or your config variables\n\n                connection_kwargs: dict[str, Any] = {\n                    \"host\": REDIS_HOST,\n                    \"port\": REDIS_PORT,\n                    \"db\": REDIS_DB_NUMBER,\n                    \"password\": REDIS_PASSWORD,\n                    \"max_connections\": REDIS_POOL_MAX_CONNECTIONS,\n                    \"health_check_interval\": REDIS_HEALTH_CHECK_INTERVAL,\n                    \"socket_keepalive\": True,\n                    \"socket_keepalive_options\": REDIS_SOCKET_KEEPALIVE_OPTIONS,\n                }\n\n                if USE_REDIS_IAM_AUTH:\n                    configure_redis_iam_auth(connection_kwargs)\n                elif REDIS_SSL:\n                    ssl_context = ssl.create_default_context()\n\n                    if REDIS_SSL_CA_CERTS:\n                        ssl_context.load_verify_locations(REDIS_SSL_CA_CERTS)\n                    ssl_context.check_hostname = False\n\n                    # Map your string to the proper ssl.CERT_* constant\n                    ssl_context.verify_mode = SSL_CERT_REQS_MAP.get(\n                        REDIS_SSL_CERT_REQS, ssl.CERT_NONE\n                    )\n\n                    connection_kwargs[\"ssl\"] = ssl_context\n\n                # Create a new Redis connection (or connection pool) with SSL configuration\n                _async_redis_connection = aioredis.Redis(**connection_kwargs)\n\n    # Return the established connection (or pool) for all future operations\n    return _async_redis_connection\n\n\nasync def retrieve_auth_token_data(token: str) -> dict | None:\n    \"\"\"Validate auth token against Redis and return token data.\n\n    Args:\n        token: The raw authentication token string.\n\n    Returns:\n        Token data dict if valid, None if invalid/expired.\n    \"\"\"\n    try:\n        redis = await get_async_redis_connection()\n        redis_key = REDIS_AUTH_KEY_PREFIX + token\n        token_data_str = await redis.get(redis_key)\n\n        if not token_data_str:\n            logger.debug(f\"Token key {redis_key} not found or expired in Redis\")\n            return None\n\n        return json.loads(token_data_str)\n    except json.JSONDecodeError:\n        logger.error(\"Error decoding token data from Redis\")\n        return None\n    except Exception as e:\n        logger.error(f\"Unexpected error in retrieve_auth_token_data: {str(e)}\")\n        raise ValueError(f\"Unexpected error in retrieve_auth_token_data: {str(e)}\")\n\n\nasync def retrieve_auth_token_data_from_redis(request: Request) -> dict | None:\n    \"\"\"Validate auth token from request cookie. Wrapper for backwards compatibility.\"\"\"\n    token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)\n    if not token:\n        logger.debug(\"No auth token cookie found\")\n        return None\n    return await retrieve_auth_token_data(token)\n\n\n# WebSocket token prefix (separate from regular auth tokens)\nREDIS_WS_TOKEN_PREFIX = \"ws_token:\"\n# WebSocket tokens expire after 60 seconds\nWS_TOKEN_TTL_SECONDS = 60\n# Rate limit: max tokens per user per window\nWS_TOKEN_RATE_LIMIT_MAX = 10\nWS_TOKEN_RATE_LIMIT_WINDOW_SECONDS = 60\nREDIS_WS_TOKEN_RATE_LIMIT_PREFIX = \"ws_token_rate:\"\n\n\nclass WsTokenRateLimitExceeded(Exception):\n    \"\"\"Raised when a user exceeds the WS token generation rate limit.\"\"\"\n\n\nasync def store_ws_token(token: str, user_id: str) -> None:\n    \"\"\"Store a short-lived WebSocket authentication token in Redis.\n\n    Args:\n        token: The generated WS token.\n        user_id: The user ID to associate with this token.\n\n    Raises:\n        WsTokenRateLimitExceeded: If the user has exceeded the rate limit.\n    \"\"\"\n    redis = await get_async_redis_connection()\n\n    # Atomically increment and check rate limit to avoid TOCTOU races\n    rate_limit_key = REDIS_WS_TOKEN_RATE_LIMIT_PREFIX + user_id\n    pipe = redis.pipeline()\n    pipe.incr(rate_limit_key)\n    pipe.expire(rate_limit_key, WS_TOKEN_RATE_LIMIT_WINDOW_SECONDS)\n    results = await pipe.execute()\n    new_count = results[0]\n\n    if new_count > WS_TOKEN_RATE_LIMIT_MAX:\n        # Over limit — decrement back since we won't use this slot\n        await redis.decr(rate_limit_key)\n        logger.warning(f\"WS token rate limit exceeded for user {user_id}\")\n        raise WsTokenRateLimitExceeded(\n            f\"Rate limit exceeded. Maximum {WS_TOKEN_RATE_LIMIT_MAX} tokens per minute.\"\n        )\n\n    # Store the actual token\n    redis_key = REDIS_WS_TOKEN_PREFIX + token\n    token_data = json.dumps({\"sub\": user_id})\n    await redis.set(redis_key, token_data, ex=WS_TOKEN_TTL_SECONDS)\n\n\nasync def retrieve_ws_token_data(token: str) -> dict | None:\n    \"\"\"Validate a WebSocket token and return the token data.\n\n    This uses GETDEL for atomic get-and-delete to prevent race conditions\n    where the same token could be used twice.\n\n    Args:\n        token: The WS token to validate.\n\n    Returns:\n        Token data dict with 'sub' (user ID) if valid, None if invalid/expired.\n    \"\"\"\n    try:\n        redis = await get_async_redis_connection()\n        redis_key = REDIS_WS_TOKEN_PREFIX + token\n\n        # Atomic get-and-delete to prevent race conditions (Redis 6.2+)\n        token_data_str = await redis.getdel(redis_key)\n\n        if not token_data_str:\n            return None\n\n        return json.loads(token_data_str)\n    except json.JSONDecodeError:\n        logger.error(\"Error decoding WS token data from Redis\")\n        return None\n    except Exception as e:\n        logger.error(f\"Unexpected error in retrieve_ws_token_data: {str(e)}\")\n        return None\n\n\ndef redis_lock_dump(lock: RedisLock, r: Redis) -> None:\n    # diagnostic logging for lock errors\n    name = lock.name\n    ttl = r.ttl(name)\n    locked = lock.locked()\n    owned = lock.owned()\n    local_token: str | None = lock.local.token\n\n    remote_token_raw = r.get(lock.name)\n    if remote_token_raw:\n        remote_token_bytes = cast(bytes, remote_token_raw)\n        remote_token = remote_token_bytes.decode(\"utf-8\")\n    else:\n        remote_token = None\n\n    logger.warning(\n        f\"RedisLock diagnostic: \"\n        f\"name={name} \"\n        f\"locked={locked} \"\n        f\"owned={owned} \"\n        f\"local_token={local_token} \"\n        f\"remote_token={remote_token} \"\n        f\"ttl={ttl}\"\n    )\n"
  },
  {
    "path": "backend/onyx/redis/redis_usergroup.py",
    "content": "import time\nfrom typing import cast\nfrom uuid import uuid4\n\nimport redis\nfrom celery import Celery\nfrom redis import Redis\nfrom redis.lock import Lock as RedisLock\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DB_YIELD_PER_DEFAULT\nfrom onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisConstants\nfrom onyx.redis.redis_object_helper import RedisObjectHelper\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\nclass RedisUserGroup(RedisObjectHelper):\n    PREFIX = \"usergroup\"\n    FENCE_PREFIX = PREFIX + \"_fence\"\n    FENCE_TTL = 7 * 24 * 60 * 60  # 7 days - defensive TTL to prevent memory leaks\n    TASKSET_PREFIX = PREFIX + \"_taskset\"\n    TASKSET_TTL = FENCE_TTL\n\n    def __init__(self, tenant_id: str, id: int) -> None:\n        super().__init__(tenant_id, str(id))\n\n    @property\n    def fenced(self) -> bool:\n        if self.redis.exists(self.fence_key):\n            return True\n\n        return False\n\n    def set_fence(self, payload: int | None) -> None:\n        if payload is None:\n            self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n            self.redis.delete(self.fence_key)\n            return\n\n        self.redis.set(self.fence_key, payload, ex=self.FENCE_TTL)\n        self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n\n    @property\n    def payload(self) -> int | None:\n        bytes = self.redis.get(self.fence_key)\n        if bytes is None:\n            return None\n\n        progress = int(cast(int, bytes))\n        return progress\n\n    def generate_tasks(\n        self,\n        max_tasks: int,  # noqa: ARG002\n        celery_app: Celery,\n        db_session: Session,\n        redis_client: Redis,\n        lock: RedisLock,\n        tenant_id: str,\n    ) -> tuple[int, int] | None:\n        \"\"\"Max tasks is ignored for now until we can build the logic to mark the\n        user group up to date over multiple batches.\n        \"\"\"\n        last_lock_time = time.monotonic()\n        num_tasks_sent = 0\n\n        if not global_version.is_ee_version():\n            return 0, 0\n\n        try:\n            construct_document_id_select_by_usergroup = fetch_versioned_implementation(\n                \"onyx.db.user_group\",\n                \"construct_document_id_select_by_usergroup\",\n            )\n        except ModuleNotFoundError:\n            return 0, 0\n\n        stmt = construct_document_id_select_by_usergroup(int(self._id))\n        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):\n            doc_id = cast(str, doc_id)\n            current_time = time.monotonic()\n            if current_time - last_lock_time >= (\n                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4\n            ):\n                lock.reacquire()\n                last_lock_time = current_time\n\n            # celery's default task id format is \"dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # the key for the result is \"celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            # we prefix the task id so it's easier to keep track of who created the task\n            # aka \"documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac\"\n            custom_task_id = f\"{self.task_id_prefix}_{uuid4()}\"\n\n            # add to the set BEFORE creating the task.\n            redis_client.sadd(self.taskset_key, custom_task_id)\n            redis_client.expire(self.taskset_key, self.TASKSET_TTL)\n\n            celery_app.send_task(\n                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,\n                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),\n                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,\n                task_id=custom_task_id,\n                priority=OnyxCeleryPriority.MEDIUM,\n            )\n\n            num_tasks_sent += 1\n\n        return num_tasks_sent, num_tasks_sent\n\n    def reset(self) -> None:\n        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)\n        self.redis.delete(self.taskset_key)\n        self.redis.delete(self.fence_key)\n\n    @staticmethod\n    def reset_all(r: redis.Redis) -> None:\n        for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + \"*\"):\n            r.delete(key)\n\n        for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + \"*\"):\n            r.delete(key)\n"
  },
  {
    "path": "backend/onyx/redis/redis_utils.py",
    "content": "from onyx.redis.redis_connector_delete import RedisConnectorDelete\nfrom onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync\nfrom onyx.redis.redis_connector_prune import RedisConnectorPrune\nfrom onyx.redis.redis_document_set import RedisDocumentSet\nfrom onyx.redis.redis_usergroup import RedisUserGroup\n\n\ndef is_fence(key_bytes: bytes) -> bool:\n    key_str = key_bytes.decode(\"utf-8\")\n    if key_str.startswith(RedisDocumentSet.FENCE_PREFIX):\n        return True\n    if key_str.startswith(RedisUserGroup.FENCE_PREFIX):\n        return True\n    if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):\n        return True\n    if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):\n        return True\n    if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):\n        return True\n\n    return False\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/secondary_llm_flows/chat_session_naming.py",
    "content": "from onyx.chat.llm_step import translate_history_to_llm_format\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.prompts.chat_prompts import CHAT_NAMING_REMINDER\nfrom onyx.prompts.chat_prompts import CHAT_NAMING_SYSTEM_PROMPT\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef generate_chat_session_name(\n    chat_history: list[ChatMessageSimple],\n    llm: LLM,\n) -> str:\n    system_prompt = ChatMessageSimple(\n        message=CHAT_NAMING_SYSTEM_PROMPT,\n        token_count=100,\n        message_type=MessageType.SYSTEM,\n    )\n\n    reminder_prompt = ChatMessageSimple(\n        message=CHAT_NAMING_REMINDER,\n        token_count=100,\n        message_type=MessageType.USER_REMINDER,\n    )\n\n    complete_message_history = [system_prompt] + chat_history + [reminder_prompt]\n\n    llm_facing_history = translate_history_to_llm_format(\n        complete_message_history, llm.config\n    )\n\n    # Call LLM with Braintrust tracing\n    with llm_generation_span(\n        llm=llm, flow=\"chat_session_naming\", input_messages=llm_facing_history\n    ) as span_generation:\n        response = llm.invoke(llm_facing_history, reasoning_effort=ReasoningEffort.OFF)\n        record_llm_response(span_generation, response)\n        new_name_raw = llm_response_to_string(response)\n\n    return new_name_raw.strip().strip('\"')\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/document_filter.py",
    "content": "import json\nimport re\n\nfrom onyx.context.search.models import ContextExpansionType\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.search_prompts import DOCUMENT_CONTEXT_SELECTION_PROMPT\nfrom onyx.prompts.search_prompts import DOCUMENT_SELECTION_PROMPT\nfrom onyx.prompts.search_prompts import TRY_TO_FILL_TO_MAX_INSTRUCTIONS\nfrom onyx.tools.tool_implementations.search.constants import (\n    MAX_CHUNKS_FOR_RELEVANCE,\n)\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef select_chunks_for_relevance(\n    section: InferenceSection,\n    max_chunks: int = MAX_CHUNKS_FOR_RELEVANCE,\n) -> list[InferenceChunk]:\n    \"\"\"Select a subset of chunks from a section based on center chunk position.\n\n    Logic:\n    - Always include the center chunk\n    - If there are chunks directly next to it by index, grab the preceding and following\n    - Otherwise grab 2 in the direction that does exist (2 before or 2 after)\n    - If there are not enough in either direction, just grab what's available\n    - If there are no other chunks, just use the central chunk\n\n    Args:\n        section: InferenceSection with center_chunk and chunks\n        max_chunks: Maximum number of chunks to select (default: MAX_CHUNKS_FOR_RELEVANCE)\n\n    Returns:\n        List of selected InferenceChunks ordered by position\n    \"\"\"\n    if max_chunks <= 0:\n        return []\n\n    center_chunk = section.center_chunk\n    all_chunks = section.chunks\n\n    # Find the index of the center chunk in the chunks list\n    try:\n        center_index = next(\n            i\n            for i, chunk in enumerate(all_chunks)\n            if chunk.chunk_id == center_chunk.chunk_id\n        )\n    except StopIteration:\n        # If center chunk not found in chunks list, just return center chunk\n        return [center_chunk]\n\n    if max_chunks == 1:\n        return [center_chunk]\n\n    # Calculate how many chunks to take before and after\n    chunks_needed = max_chunks - 1  # minus 1 for center chunk\n\n    # Determine available chunks before and after center\n    chunks_before_available = center_index\n    chunks_after_available = len(all_chunks) - center_index - 1\n\n    # Start with balanced distribution (1 before, 1 after for max_chunks=3)\n    chunks_before = min(chunks_needed // 2, chunks_before_available)\n    chunks_after = min(chunks_needed // 2, chunks_after_available)\n\n    # Allocate remaining chunks to whichever direction has availability\n    remaining = chunks_needed - chunks_before - chunks_after\n    if remaining > 0:\n        # Try to add more chunks before center if available\n        if chunks_before_available > chunks_before:\n            additional_before = min(remaining, chunks_before_available - chunks_before)\n            chunks_before += additional_before\n            remaining -= additional_before\n        # Try to add more chunks after center if available\n        if remaining > 0 and chunks_after_available > chunks_after:\n            additional_after = min(remaining, chunks_after_available - chunks_after)\n            chunks_after += additional_after\n\n    # Select the chunks\n    start_index = center_index - chunks_before\n    end_index = center_index + chunks_after + 1  # +1 to include center and chunks after\n\n    return all_chunks[start_index:end_index]\n\n\ndef classify_section_relevance(\n    document_title: str,\n    section_text: str,\n    user_query: str,\n    llm: LLM,\n    section_above_text: str | None,\n    section_below_text: str | None,\n) -> ContextExpansionType:\n    \"\"\"Use LLM to classify section relevance and determine context expansion type.\n\n    Args:\n        section_text: The text content of the section to classify\n        user_query: The user's search query\n        llm: LLM instance to use for classification\n        section_above_text: Text content from chunks above the section\n        section_below_text: Text content from chunks below the section\n\n    Returns:\n        ContextExpansionType indicating how the section should be expanded\n    \"\"\"\n    # Build the prompt\n    prompt_text = DOCUMENT_CONTEXT_SELECTION_PROMPT.format(\n        document_title=document_title,\n        main_section=section_text,\n        section_above=section_above_text if section_above_text else \"N/A\",\n        section_below=section_below_text if section_below_text else \"N/A\",\n        user_query=user_query,\n    )\n\n    # Default to MAIN_SECTION_ONLY\n    default_classification = ContextExpansionType.MAIN_SECTION_ONLY\n\n    # Call LLM for classification with Braintrust tracing\n    try:\n        prompt_msg = UserMessage(content=prompt_text)\n        with llm_generation_span(\n            llm=llm, flow=\"classify_section_relevance\", input_messages=[prompt_msg]\n        ) as span_generation:\n            response = llm.invoke(\n                prompt=prompt_msg,\n                reasoning_effort=ReasoningEffort.OFF,\n            )\n            record_llm_response(span_generation, response)\n            llm_response = response.choice.message.content\n\n        if not llm_response:\n            logger.warning(\n                \"LLM returned empty response for context selection, defaulting to MAIN_SECTION_ONLY\"\n            )\n            classification = default_classification\n        else:\n            # Parse the response to extract the situation number (0-3)\n            numbers = re.findall(r\"\\b[0-3]\\b\", llm_response)\n            if numbers:\n                situation = int(numbers[-1])\n                # Map situation number to ContextExpansionType\n                situation_to_type = {\n                    0: ContextExpansionType.NOT_RELEVANT,\n                    1: ContextExpansionType.MAIN_SECTION_ONLY,\n                    2: ContextExpansionType.INCLUDE_ADJACENT_SECTIONS,\n                    3: ContextExpansionType.FULL_DOCUMENT,\n                }\n                classification = situation_to_type.get(\n                    situation, default_classification\n                )\n            else:\n                logger.warning(\n                    f\"Could not parse situation number from LLM response: {llm_response}\"\n                )\n                classification = default_classification\n\n    except Exception as e:\n        logger.error(f\"Error calling LLM for context selection: {e}\")\n        classification = default_classification\n\n    # To save some effort down the line, if there is nothing surrounding, don't allow a classification of adjacent or whole doc\n    if (\n        not section_above_text\n        and not section_below_text\n        and classification != ContextExpansionType.NOT_RELEVANT\n    ):\n        classification = ContextExpansionType.MAIN_SECTION_ONLY\n\n    return classification\n\n\ndef select_sections_for_expansion(\n    sections: list[InferenceSection],\n    user_query: str,\n    llm: LLM,\n    max_sections: int = 10,\n    max_chunks_per_section: int | None = MAX_CHUNKS_FOR_RELEVANCE,\n    try_to_fill_to_max: bool = False,\n) -> tuple[list[InferenceSection], list[str] | None]:\n    \"\"\"Use LLM to select the most relevant document sections for expansion.\n\n    Args:\n        sections: List of InferenceSection objects to select from\n        user_query: The user's search query\n        llm: LLM instance to use for selection\n        max_sections: Maximum number of sections to select (default: 10)\n        max_chunks_per_section: Maximum chunks to consider per section (default: MAX_CHUNKS_FOR_RELEVANCE)\n\n    Returns:\n        A tuple of:\n        - Filtered list of InferenceSection objects selected by the LLM\n        - List of document IDs for sections marked with \"!\" by the LLM, or None if none.\n          Note: The \"!\" marker support exists in parsing but is not currently used because\n          the prompt does not instruct the LLM to use it.\n    \"\"\"\n    if not sections:\n        return [], None\n\n    # Create a mapping of section ID to section\n    section_map: dict[str, InferenceSection] = {}\n    sections_dict: list[dict[str, str | int | list[str]]] = []\n\n    for idx, section in enumerate(sections):\n        # Create a unique ID for each section\n        section_id = f\"{idx}\"\n        section_map[section_id] = section\n\n        # Format the section for the LLM\n        chunk = section.center_chunk\n\n        # Combine primary and secondary owners for authors\n        authors = None\n        if chunk.primary_owners or chunk.secondary_owners:\n            authors = []\n            if chunk.primary_owners:\n                authors.extend(chunk.primary_owners)\n            if chunk.secondary_owners:\n                authors.extend(chunk.secondary_owners)\n\n        # Format updated_at as ISO string if available\n        updated_at_str = None\n        if chunk.updated_at:\n            updated_at_str = chunk.updated_at.isoformat()\n\n        # Convert metadata to JSON string\n        metadata_str = json.dumps(chunk.metadata)\n\n        # Select only the most relevant chunks from the section to avoid flooding\n        # the LLM with too much content from documents with many matching sections\n        if max_chunks_per_section is not None:\n            selected_chunks = select_chunks_for_relevance(\n                section, max_chunks_per_section\n            )\n            selected_content = \" \".join(chunk.content for chunk in selected_chunks)\n        else:\n            selected_content = section.combined_content\n\n        section_dict: dict[str, str | int | list[str]] = {\n            \"section_id\": idx,\n            \"title\": chunk.semantic_identifier,\n        }\n\n        # Only include updated_at if not None\n        if updated_at_str is not None:\n            section_dict[\"updated_at\"] = updated_at_str\n\n        # Only include authors if not None\n        if authors is not None:\n            section_dict[\"authors\"] = authors\n\n        section_dict[\"source_type\"] = str(chunk.source_type)\n        section_dict[\"metadata\"] = metadata_str\n        section_dict[\"content\"] = selected_content\n\n        sections_dict.append(section_dict)\n\n    # Build the prompt\n    extra_instructions = TRY_TO_FILL_TO_MAX_INSTRUCTIONS if try_to_fill_to_max else \"\"\n    prompt_text = UserMessage(\n        content=DOCUMENT_SELECTION_PROMPT.format(\n            max_sections=max_sections,\n            extra_instructions=extra_instructions,\n            formatted_doc_sections=json.dumps(sections_dict, indent=2),\n            user_query=user_query,\n        )\n    )\n\n    # Call LLM for selection with Braintrust tracing\n    try:\n        with llm_generation_span(\n            llm=llm, flow=\"select_sections_for_expansion\", input_messages=[prompt_text]\n        ) as span_generation:\n            response = llm.invoke(\n                prompt=[prompt_text], reasoning_effort=ReasoningEffort.OFF\n            )\n            record_llm_response(span_generation, response)\n            llm_response = response.choice.message.content\n\n        if not llm_response:\n            logger.warning(\n                \"LLM returned empty response for document selection, returning first max_sections\"\n            )\n            return sections[:max_sections], None\n\n        # Parse the response to extract section IDs\n        # Look for patterns like [1, 2, 3] or [1,2,3] with flexible whitespace/newlines\n        # Also handle unbracketed comma-separated lists like \"1, 2, 3\"\n        # Track which sections have \"!\" marker (e.g., \"1, 2!, 3\" or \"[1, 2!, 3]\")\n        section_ids = []\n        sections_with_exclamation = set()  # Track section IDs that have \"!\" marker\n\n        # First try to find a bracketed list\n        bracket_pattern = r\"\\[([^\\]]+)\\]\"\n        bracket_match = re.search(bracket_pattern, llm_response)\n\n        if bracket_match:\n            # Extract the content between brackets\n            list_content = bracket_match.group(1)\n            # Split by comma, preserving the parts\n            parts = [part.strip() for part in list_content.split(\",\")]\n            for part in parts:\n                # Check if this part has an exclamation mark\n                has_exclamation = \"!\" in part\n                # Extract the number (digits only)\n                numbers = re.findall(r\"\\d+\", part)\n                if numbers:\n                    section_id = numbers[0]\n                    section_ids.append(section_id)\n                    if has_exclamation:\n                        sections_with_exclamation.add(section_id)\n        else:\n            # Try to find an unbracketed comma-separated list\n            # Look for patterns like \"1, 2, 3\" or \"1, 2!, 3\"\n            # This regex finds sequences of digits optionally followed by \"!\" and separated by commas\n            comma_list_pattern = r\"\\b\\d+!?\\b(?:\\s*,\\s*\\b\\d+!?\\b)*\"\n            comma_match = re.search(comma_list_pattern, llm_response)\n\n            if comma_match:\n                # Extract the matched comma-separated list\n                list_content = comma_match.group(0)\n                parts = [part.strip() for part in list_content.split(\",\")]\n                for part in parts:\n                    # Check if this part has an exclamation mark\n                    has_exclamation = \"!\" in part\n                    # Extract the number (digits only)\n                    numbers = re.findall(r\"\\d+\", part)\n                    if numbers:\n                        section_id = numbers[0]\n                        section_ids.append(section_id)\n                        if has_exclamation:\n                            sections_with_exclamation.add(section_id)\n            else:\n                # Fallback: try to extract all numbers from the response\n                # Also check for \"!\" after numbers\n                number_pattern = r\"\\b(\\d+)(!)?\\b\"\n                matches = re.finditer(number_pattern, llm_response)\n                for match in matches:\n                    section_id = match.group(1)\n                    has_exclamation = match.group(2) == \"!\"\n                    section_ids.append(section_id)\n                    if has_exclamation:\n                        sections_with_exclamation.add(section_id)\n\n        if not section_ids:\n            logger.warning(\n                f\"Could not parse section IDs from LLM response: {llm_response}\"\n            )\n            return sections[:max_sections], None\n\n        # Filter sections based on LLM selection\n        # Skip out-of-range IDs and don't count them toward max_sections\n        selected_sections = []\n        document_ids_with_exclamation = []  # Collect document_ids for sections with \"!\"\n        num_sections = len(sections)\n\n        for section_id_str in section_ids:\n            # Convert to int\n            try:\n                section_id_int = int(section_id_str)\n            except ValueError:\n                logger.warning(f\"Could not convert section ID to int: {section_id_str}\")\n                continue\n\n            # Check if in valid range\n            if section_id_int < 0 or section_id_int >= num_sections:\n                logger.warning(\n                    f\"Section ID {section_id_int} is out of range [0, {num_sections - 1}], skipping\"\n                )\n                continue\n\n            # Convert back to string for section_map lookup\n            section_id = str(section_id_int)\n            if section_id in section_map:\n                section = section_map[section_id]\n                selected_sections.append(section)\n\n                # If this section has an exclamation mark, collect its document_id\n                if section_id_str in sections_with_exclamation:\n                    document_id = section.center_chunk.document_id\n                    if document_id not in document_ids_with_exclamation:\n                        document_ids_with_exclamation.append(document_id)\n\n            # Stop if we've reached max_sections valid selections\n            if len(selected_sections) >= max_sections:\n                break\n\n        if not selected_sections:\n            logger.warning(\n                \"No valid sections selected from LLM response, returning first max_sections\"\n            )\n            return sections[:max_sections], None\n\n        # Collect all selected document IDs\n        selected_document_ids = [\n            section.center_chunk.document_id for section in selected_sections\n        ]\n\n        logger.debug(\n            f\"LLM selected {len(selected_sections)} valid sections from {len(sections)} total candidates. \"\n            f\"Selected document IDs: {selected_document_ids}. \"\n            f\"Document IDs with exclamation: {document_ids_with_exclamation if document_ids_with_exclamation else []}\"\n        )\n\n        # Return document_ids if any sections had exclamation marks, otherwise None\n        return selected_sections, (\n            document_ids_with_exclamation if document_ids_with_exclamation else None\n        )\n\n    except Exception as e:\n        logger.error(f\"Error calling LLM for document selection: {e}\")\n        return sections[:max_sections], None\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/memory_update.py",
    "content": "from onyx.configs.constants import MessageType\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.basic_memory import FULL_MEMORY_UPDATE_PROMPT\nfrom onyx.tools.models import ChatMinimalTextMessage\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.text_processing import parse_llm_json_response\n\nlogger = setup_logger()\n\n# Maximum number of user messages to include\nMAX_USER_MESSAGES = 3\nMAX_CHARS_PER_MESSAGE = 500\n\n\ndef _format_chat_history(chat_history: list[ChatMinimalTextMessage]) -> str:\n    user_messages = [\n        msg for msg in chat_history if msg.message_type == MessageType.USER\n    ]\n\n    if not user_messages:\n        return \"No chat history available.\"\n\n    # Take the last N user messages\n    recent_user_messages = user_messages[-MAX_USER_MESSAGES:]\n\n    formatted_parts = []\n    for i, msg in enumerate(recent_user_messages, start=1):\n        if len(msg.message) > MAX_CHARS_PER_MESSAGE:\n            truncated_message = msg.message[:MAX_CHARS_PER_MESSAGE] + \"[...truncated]\"\n        else:\n            truncated_message = msg.message\n        formatted_parts.append(f\"\\nUser message:\\n{truncated_message}\\n\")\n\n    return \"\".join(formatted_parts).strip()\n\n\ndef _format_existing_memories(existing_memories: list[str]) -> str:\n    \"\"\"Format existing memories as a numbered list (1-indexed for readability).\"\"\"\n    if not existing_memories:\n        return \"No existing memories.\"\n\n    formatted_lines = []\n    for i, memory in enumerate(existing_memories, start=1):\n        formatted_lines.append(f\"{i}. {memory}\")\n\n    return \"\\n\".join(formatted_lines)\n\n\ndef _format_user_basic_information(\n    user_name: str | None,\n    user_email: str | None,\n    user_role: str | None,\n) -> str:\n    \"\"\"Format user basic information, only including fields that have values.\"\"\"\n    lines = []\n    if user_name:\n        lines.append(f\"User name: {user_name}\")\n    if user_email:\n        lines.append(f\"User email: {user_email}\")\n    if user_role:\n        lines.append(f\"User role: {user_role}\")\n\n    if not lines:\n        return \"\"\n\n    return \"\\n\\n# User Basic Information\\n\" + \"\\n\".join(lines)\n\n\ndef process_memory_update(\n    new_memory: str,\n    existing_memories: list[str],\n    chat_history: list[ChatMinimalTextMessage],\n    llm: LLM,\n    user_name: str | None = None,\n    user_email: str | None = None,\n    user_role: str | None = None,\n) -> tuple[str, int | None]:\n    \"\"\"\n    Determine if a memory should be added or updated.\n\n    Uses the LLM to analyze the new memory against existing memories and\n    determine whether to add it as new or update an existing memory.\n\n    Args:\n        new_memory: The new memory text from the memory tool\n        existing_memories: List of existing memory strings\n        chat_history: Recent chat history for context\n        llm: LLM instance to use for the decision\n        user_name: Optional user name for context\n        user_email: Optional user email for context\n        user_role: Optional user role for context\n\n    Returns:\n        Tuple of (memory_text, index_to_replace)\n        - memory_text: The final memory text to store\n        - index_to_replace: Index in existing_memories to replace, or None if adding new\n    \"\"\"\n    # Format inputs for the prompt\n    formatted_chat_history = _format_chat_history(chat_history)\n    formatted_memories = _format_existing_memories(existing_memories)\n    formatted_user_info = _format_user_basic_information(\n        user_name, user_email, user_role\n    )\n\n    # Build the prompt\n    prompt = FULL_MEMORY_UPDATE_PROMPT.format(\n        chat_history=formatted_chat_history,\n        user_basic_information=formatted_user_info,\n        existing_memories=formatted_memories,\n        new_memory=new_memory,\n    )\n\n    # Call LLM with Braintrust tracing\n    try:\n        prompt_msg = UserMessage(content=prompt)\n        with llm_generation_span(\n            llm=llm, flow=\"memory_update\", input_messages=[prompt_msg]\n        ) as span_generation:\n            response = llm.invoke(\n                prompt=prompt_msg, reasoning_effort=ReasoningEffort.OFF\n            )\n            record_llm_response(span_generation, response)\n            content = response.choice.message.content\n    except Exception as e:\n        logger.warning(f\"LLM invocation failed for memory update: {e}\")\n        return (new_memory, None)\n\n    # Handle empty response\n    if not content:\n        logger.warning(\n            \"LLM returned empty response for memory update, defaulting to add\"\n        )\n        return (new_memory, None)\n\n    # Parse JSON response\n    parsed_response = parse_llm_json_response(content)\n\n    if not parsed_response:\n        logger.warning(\n            f\"Failed to parse JSON from LLM response: {content[:200]}..., defaulting to add\"\n        )\n        return (new_memory, None)\n\n    # Extract fields from response\n    operation = parsed_response.get(\"operation\", \"add\").lower()\n    memory_id = parsed_response.get(\"memory_id\")\n    memory_text = parsed_response.get(\"memory_text\", new_memory)\n\n    # Ensure memory_text is valid\n    if not memory_text or not isinstance(memory_text, str):\n        memory_text = new_memory\n\n    # Handle add operation\n    if operation == \"add\":\n        logger.debug(\"Memory update operation: add\")\n        return (memory_text, None)\n\n    # Handle update operation\n    if operation == \"update\":\n        # Validate memory_id\n        if memory_id is None:\n            logger.warning(\"Update operation specified but no memory_id provided\")\n            return (memory_text, None)\n\n        # Convert memory_id to integer if it's a string\n        try:\n            memory_id_int = int(memory_id)\n        except (ValueError, TypeError):\n            logger.warning(f\"Invalid memory_id format: {memory_id}\")\n            return (memory_text, None)\n\n        # Convert from 1-indexed (LLM response) to 0-indexed (internal)\n        index_to_replace = memory_id_int - 1\n\n        # Validate index is in range\n        if index_to_replace < 0 or index_to_replace >= len(existing_memories):\n            logger.warning(\n                f\"memory_id {memory_id_int} out of range (1-{len(existing_memories)}), defaulting to add\"\n            )\n            return (memory_text, None)\n\n        logger.debug(f\"Memory update operation: update at index {index_to_replace}\")\n        return (memory_text, index_to_replace)\n\n    # Unknown operation, default to add\n    logger.warning(f\"Unknown operation '{operation}', defaulting to add\")\n    return (memory_text, None)\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/query_expansion.py",
    "content": "from onyx.configs.constants import MessageType\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.prompt_utils import get_current_llm_day_time\nfrom onyx.prompts.search_prompts import KEYWORD_REPHRASE_SYSTEM_PROMPT\nfrom onyx.prompts.search_prompts import KEYWORD_REPHRASE_USER_PROMPT\nfrom onyx.prompts.search_prompts import REPHRASE_CONTEXT_PROMPT\nfrom onyx.prompts.search_prompts import SEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT\nfrom onyx.prompts.search_prompts import SEMANTIC_QUERY_REPHRASE_USER_PROMPT\nfrom onyx.tools.models import ChatMinimalTextMessage\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _build_additional_context(\n    user_info: str | None = None,\n    memories: list[str] | None = None,\n) -> str:\n    \"\"\"Build additional context section for query rephrasing/expansion.\n\n    Returns empty string if both user_info and memories are None/empty.\n    Otherwise returns formatted context with \"N/A\" for missing fields.\n    \"\"\"\n    has_user_info = user_info and user_info.strip()\n    has_memories = memories and any(m.strip() for m in memories)\n\n    if not has_user_info and not has_memories:\n        return \"\"\n\n    formatted_user_info = user_info if has_user_info else \"N/A\"\n    formatted_memories = (\n        \"\\n\".join(f\"- {memory}\" for memory in memories)\n        if has_memories and memories\n        else \"N/A\"\n    )\n\n    return REPHRASE_CONTEXT_PROMPT.format(\n        user_info=formatted_user_info,\n        memories=formatted_memories,\n    )\n\n\ndef _build_message_history(\n    history: list[ChatMinimalTextMessage],\n) -> list[ChatCompletionMessage]:\n    \"\"\"Convert ChatMinimalTextMessage list to ChatCompletionMessage list.\"\"\"\n    messages: list[ChatCompletionMessage] = []\n\n    for msg in history:\n        if msg.message_type == MessageType.USER:\n            user_msg = UserMessage(content=msg.message)\n            messages.append(user_msg)\n        elif msg.message_type == MessageType.ASSISTANT:\n            assistant_msg = AssistantMessage(content=msg.message)\n            messages.append(assistant_msg)\n\n    return messages\n\n\ndef semantic_query_rephrase(\n    history: list[ChatMinimalTextMessage],\n    llm: LLM,\n    user_info: str | None = None,\n    memories: list[str] | None = None,\n) -> str:\n    \"\"\"Rephrase a query into a standalone query using chat history context.\n\n    Converts the user's query into a self-contained search query that incorporates\n    relevant context from the chat history and optional user information/memories.\n\n    Args:\n        history: Chat message history. Must contain at least one user message.\n        llm: Language model to use for rephrasing\n        user_info: Optional user information for personalization\n        memories: Optional user memories for personalization\n\n    Returns:\n        Rephrased standalone query string\n\n    Raises:\n        ValueError: If history is empty or contains no user messages\n        RuntimeError: If LLM fails to generate a rephrased query\n    \"\"\"\n    if not history:\n        raise ValueError(\"History cannot be empty for query rephrasing\")\n\n    # Find the last user message in the history\n    last_user_message_idx = None\n    for i in range(len(history) - 1, -1, -1):\n        if history[i].message_type == MessageType.USER:\n            last_user_message_idx = i\n            break\n\n    if last_user_message_idx is None:\n        raise ValueError(\"History must contain at least one user message\")\n\n    # Extract the last user query\n    user_query = history[last_user_message_idx].message\n\n    # Build additional context section\n    additional_context = _build_additional_context(user_info, memories)\n\n    current_datetime_str = get_current_llm_day_time(\n        include_day_of_week=True, full_sentence=False\n    )\n\n    # Build system message with current date\n    system_msg = SystemMessage(\n        content=SEMANTIC_QUERY_REPHRASE_SYSTEM_PROMPT.format(\n            current_date=current_datetime_str\n        )\n    )\n\n    # Convert chat history to message format (excluding the last user message and everything after it)\n    messages: list[ChatCompletionMessage] = [system_msg]\n    messages.extend(_build_message_history(history[:last_user_message_idx]))\n\n    # Add the last message as the user prompt with instructions\n    final_user_msg = UserMessage(\n        content=SEMANTIC_QUERY_REPHRASE_USER_PROMPT.format(\n            additional_context=additional_context, user_query=user_query\n        )\n    )\n    messages.append(final_user_msg)\n\n    # Call LLM and return result with Braintrust tracing\n    with llm_generation_span(\n        llm=llm, flow=\"semantic_query_rephrase\", input_messages=messages\n    ) as span_generation:\n        response = llm.invoke(prompt=messages, reasoning_effort=ReasoningEffort.OFF)\n        record_llm_response(span_generation, response)\n        final_query = response.choice.message.content\n\n    if not final_query:\n        # It's ok if some other queries fail, this one is likely the best one\n        # It also can't fail in parsing so we should be able to guarantee a valid query here.\n        raise RuntimeError(\"LLM failed to generate a rephrased query\")\n\n    return final_query\n\n\ndef keyword_query_expansion(\n    history: list[ChatMinimalTextMessage],\n    llm: LLM,\n    user_info: str | None = None,\n    memories: list[str] | None = None,\n) -> list[str] | None:\n    \"\"\"Expand a query into multiple keyword-only queries using chat history context.\n\n    Converts the user's query into a set of keyword-based search queries (max 3)\n    that incorporate relevant context from the chat history and optional user\n    information/memories. Returns a list of keyword queries.\n\n    Args:\n        history: Chat message history. Must contain at least one user message.\n        llm: Language model to use for keyword expansion\n        user_info: Optional user information for personalization\n        memories: Optional user memories for personalization\n\n    Returns:\n        List of keyword-only query strings (max 3), or empty list if generation fails\n\n    Raises:\n        ValueError: If history is empty or contains no user messages\n    \"\"\"\n    if not history:\n        raise ValueError(\"History cannot be empty for keyword query expansion\")\n\n    # Find the last user message in the history\n    last_user_message_idx = None\n    for i in range(len(history) - 1, -1, -1):\n        if history[i].message_type == MessageType.USER:\n            last_user_message_idx = i\n            break\n\n    if last_user_message_idx is None:\n        raise ValueError(\"History must contain at least one user message\")\n\n    # Extract the last user query\n    user_query = history[last_user_message_idx].message\n\n    # Build additional context section\n    additional_context = _build_additional_context(user_info, memories)\n\n    current_datetime_str = get_current_llm_day_time(\n        include_day_of_week=True, full_sentence=False\n    )\n\n    # Build system message with current date\n    system_msg = SystemMessage(\n        content=KEYWORD_REPHRASE_SYSTEM_PROMPT.format(current_date=current_datetime_str)\n    )\n\n    # Convert chat history to message format (excluding the last user message and everything after it)\n    messages: list[ChatCompletionMessage] = [system_msg]\n    messages.extend(_build_message_history(history[:last_user_message_idx]))\n\n    # Add the last message as the user prompt with instructions\n    final_user_msg = UserMessage(\n        content=KEYWORD_REPHRASE_USER_PROMPT.format(\n            additional_context=additional_context, user_query=user_query\n        )\n    )\n    messages.append(final_user_msg)\n\n    # Call LLM and return result with Braintrust tracing\n    with llm_generation_span(\n        llm=llm, flow=\"keyword_query_expansion\", input_messages=messages\n    ) as span_generation:\n        response = llm.invoke(prompt=messages, reasoning_effort=ReasoningEffort.OFF)\n        record_llm_response(span_generation, response)\n        content = response.choice.message.content\n\n    # Parse the response - each line is a separate keyword query\n    if not content:\n        return []\n\n    queries = [line.strip() for line in content.strip().split(\"\\n\") if line.strip()]\n    return queries\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/source_filter.py",
    "content": "from sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.llm.interfaces import LLM\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef strings_to_document_sources(source_strs: list[str]) -> list[DocumentSource]:\n    sources = []\n    for s in source_strs:\n        try:\n            sources.append(DocumentSource(s))\n        except ValueError:\n            logger.warning(f\"Failed to translate {s} to a DocumentSource\")\n    return sources\n\n\ndef extract_source_filter(\n    query: str, llm: LLM, db_session: Session\n) -> list[DocumentSource] | None:\n    # Can reference onyx/prompts/filter_extration.py for previous implementation prompts\n    raise NotImplementedError(\"This function should not be getting called right now\")\n"
  },
  {
    "path": "backend/onyx/secondary_llm_flows/time_filter.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\n\nfrom dateutil.parser import parse\n\nfrom onyx.llm.interfaces import LLM\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef best_match_time(time_str: str) -> datetime | None:\n    preferred_formats = [\"%m/%d/%Y\", \"%m-%d-%Y\"]\n\n    for fmt in preferred_formats:\n        try:\n            # As we don't know if the user is interacting with the API server from\n            # the same timezone as the API server, just assume the queries are UTC time\n            # the few hours offset (if any) shouldn't make any significant difference\n            dt = datetime.strptime(time_str, fmt)\n            return dt.replace(tzinfo=timezone.utc)\n        except ValueError:\n            continue\n\n    # If the above formats don't match, try using dateutil's parser\n    try:\n        dt = parse(time_str)\n        return (\n            dt.astimezone(timezone.utc)\n            if dt.tzinfo\n            else dt.replace(tzinfo=timezone.utc)\n        )\n    except ValueError:\n        return None\n\n\ndef extract_time_filter(query: str, llm: LLM) -> tuple[datetime | None, bool]:\n    \"\"\"Returns a datetime if a hard time filter should be applied for the given query\n    Additionally returns a bool, True if more recently updated Documents should be\n    heavily favored\"\"\"\n    raise NotImplementedError(\"This function should not be getting called right now\")\n\n\n#     def _get_time_filter_messages(query: str) -> list[dict[str, str]]:\n#         messages = [\n#             {\n#                 \"role\": \"system\",\n#                 \"content\": TIME_FILTER_PROMPT.format(\n#                     current_day_time_str=get_current_llm_day_time()\n#                 ),\n#             },\n#             {\n#                 \"role\": \"user\",\n#                 \"content\": \"What documents in Confluence were written in the last two quarters\",\n#             },\n#             {\n#                 \"role\": \"assistant\",\n#                 \"content\": json.dumps(\n#                     {\n#                         \"filter_type\": \"hard cutoff\",\n#                         \"filter_value\": \"quarter\",\n#                         \"value_multiple\": 2,\n#                     }\n#                 ),\n#             },\n#             {\"role\": \"user\", \"content\": \"What's the latest on project Corgies?\"},\n#             {\n#                 \"role\": \"assistant\",\n#                 \"content\": json.dumps({\"filter_type\": \"favor recent\"}),\n#             },\n#             {\n#                 \"role\": \"user\",\n#                 \"content\": \"Which customer asked about security features in February of 2022?\",\n#             },\n#             {\n#                 \"role\": \"assistant\",\n#                 \"content\": json.dumps(\n#                     {\"filter_type\": \"hard cutoff\", \"date\": \"02/01/2022\"}\n#                 ),\n#             },\n#             {\"role\": \"user\", \"content\": query},\n#         ]\n#         return messages\n\n#     def _extract_time_filter_from_llm_out(\n#         model_out: str,\n#     ) -> tuple[datetime | None, bool]:\n#         \"\"\"Returns a datetime for a hard cutoff and a bool for if the\"\"\"\n#         try:\n#             model_json = json.loads(model_out, strict=False)\n#         except json.JSONDecodeError:\n#             return None, False\n\n#         # If filter type is not present, just assume something has gone wrong\n#         # Potentially model has identified a date and just returned that but\n#         # better to be conservative and not identify the wrong filter.\n#         if \"filter_type\" not in model_json:\n#             return None, False\n\n#         if \"hard\" in model_json[\"filter_type\"] or \"recent\" in model_json[\"filter_type\"]:\n#             favor_recent = \"recent\" in model_json[\"filter_type\"]\n\n#             if \"date\" in model_json:\n#                 extracted_time = best_match_time(model_json[\"date\"])\n#                 if extracted_time is not None:\n#                     # LLM struggles to understand the concept of not sensitive within a time range\n#                     # So if a time is extracted, just go with that alone\n#                     return extracted_time, False\n\n#             time_diff = None\n#             multiplier = 1.0\n\n#             if \"value_multiple\" in model_json:\n#                 try:\n#                     multiplier = float(model_json[\"value_multiple\"])\n#                 except ValueError:\n#                     pass\n\n#             if \"filter_value\" in model_json:\n#                 filter_value = model_json[\"filter_value\"]\n#                 if \"day\" in filter_value:\n#                     time_diff = timedelta(days=multiplier)\n#                 elif \"week\" in filter_value:\n#                     time_diff = timedelta(weeks=multiplier)\n#                 elif \"month\" in filter_value:\n#                     # Have to just use the average here, too complicated to calculate exact day\n#                     # based on current day etc.\n#                     time_diff = timedelta(days=multiplier * 30.437)\n#                 elif \"quarter\" in filter_value:\n#                     time_diff = timedelta(days=multiplier * 91.25)\n#                 elif \"year\" in filter_value:\n#                     time_diff = timedelta(days=multiplier * 365)\n\n#             if time_diff is not None:\n#                 current = datetime.now(timezone.utc)\n#                 # LLM struggles to understand the concept of not sensitive within a time range\n#                 # So if a time is extracted, just go with that alone\n#                 return current - time_diff, False\n\n#             # If we failed to extract a hard filter, just pass back the value of favor recent\n#             return None, favor_recent\n\n#         return None, False\n\n#     messages = _get_time_filter_messages(query)\n#     filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)\n#     model_output = message_to_string(llm.invoke_langchain(filled_llm_prompt))\n#     logger.debug(model_output)\n\n#     return _extract_time_filter_from_llm_out(model_output)\n"
  },
  {
    "path": "backend/onyx/seeding/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/api_key/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.api_key import ApiKeyDescriptor\nfrom onyx.db.api_key import fetch_api_keys\nfrom onyx.db.api_key import insert_api_key\nfrom onyx.db.api_key import regenerate_api_key\nfrom onyx.db.api_key import remove_api_key\nfrom onyx.db.api_key import update_api_key\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.server.api_key.models import APIKeyArgs\n\n\nrouter = APIRouter(prefix=\"/admin/api-key\")\n\n\n@router.get(\"\")\ndef list_api_keys(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[ApiKeyDescriptor]:\n    return fetch_api_keys(db_session)\n\n\n@router.post(\"\")\ndef create_api_key(\n    api_key_args: APIKeyArgs,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ApiKeyDescriptor:\n    return insert_api_key(db_session, api_key_args, user.id)\n\n\n@router.post(\"/{api_key_id}/regenerate\")\ndef regenerate_existing_api_key(\n    api_key_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ApiKeyDescriptor:\n    return regenerate_api_key(db_session, api_key_id)\n\n\n@router.patch(\"/{api_key_id}\")\ndef update_existing_api_key(\n    api_key_id: int,\n    api_key_args: APIKeyArgs,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ApiKeyDescriptor:\n    return update_api_key(db_session, api_key_id, api_key_args)\n\n\n@router.delete(\"/{api_key_id}\")\ndef delete_api_key(\n    api_key_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    remove_api_key(db_session, api_key_id)\n"
  },
  {
    "path": "backend/onyx/server/api_key/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.auth.schemas import UserRole\n\n\nclass APIKeyArgs(BaseModel):\n    name: str | None = None\n    role: UserRole = UserRole.BASIC\n"
  },
  {
    "path": "backend/onyx/server/api_key_usage.py",
    "content": "\"\"\"API key and PAT usage tracking for cloud usage limits.\"\"\"\n\nfrom fastapi import Depends\nfrom fastapi import Request\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.api_key import get_hashed_api_key_from_request\nfrom onyx.auth.pat import get_hashed_pat_from_request\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.usage import increment_usage\nfrom onyx.db.usage import UsageType\nfrom onyx.server.usage_limits import check_usage_and_raise\nfrom onyx.server.usage_limits import is_usage_limits_enabled\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\ndef check_api_key_usage(\n    request: Request,\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"\n    FastAPI dependency that checks and tracks API key/PAT usage limits.\n\n    This should be added as a dependency to endpoints that accept API key\n    or PAT authentication and should be usage-limited.\n    \"\"\"\n    if not is_usage_limits_enabled():\n        return\n\n    # Check if request is authenticated via API key or PAT\n    is_api_key_request = get_hashed_api_key_from_request(request) is not None\n    is_pat_request = get_hashed_pat_from_request(request) is not None\n\n    if not is_api_key_request and not is_pat_request:\n        return\n\n    tenant_id = get_current_tenant_id()\n\n    # Check usage limit\n    check_usage_and_raise(\n        db_session=db_session,\n        usage_type=UsageType.API_CALLS,\n        tenant_id=tenant_id,\n        pending_amount=1,\n    )\n\n    # Increment usage counter\n    increment_usage(\n        db_session=db_session,\n        usage_type=UsageType.API_CALLS,\n        amount=1,\n    )\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/server/auth_check.py",
    "content": "from typing import cast\n\nfrom fastapi import FastAPI\nfrom fastapi.dependencies.models import Dependant\nfrom starlette.routing import BaseRoute\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_limited_user\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import current_user_from_websocket\nfrom onyx.auth.users import current_user_with_expired_token\nfrom onyx.configs.app_configs import APP_API_PREFIX\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\n\nPUBLIC_ENDPOINT_SPECS = [\n    # built-in documentation functions\n    (\"/openapi.json\", {\"GET\", \"HEAD\"}),\n    (\"/docs\", {\"GET\", \"HEAD\"}),\n    (\"/docs/oauth2-redirect\", {\"GET\", \"HEAD\"}),\n    (\"/redoc\", {\"GET\", \"HEAD\"}),\n    # should always be callable, will just return 401 if not authenticated\n    (\"/me\", {\"GET\"}),\n    # just returns 200 to validate that the server is up\n    (\"/health\", {\"GET\"}),\n    # just returns auth type, needs to be accessible before the user is logged\n    # in to determine what flow to give the user\n    (\"/auth/type\", {\"GET\"}),\n    # just gets the version of Onyx (e.g. 0.3.11)\n    (\"/version\", {\"GET\"}),\n    # Gets stable and beta versions for Onyx docker images\n    (\"/versions\", {\"GET\"}),\n    # stuff related to basic auth\n    (\"/auth/refresh\", {\"POST\"}),\n    (\"/auth/register\", {\"POST\"}),\n    (\"/auth/login\", {\"POST\"}),\n    (\"/auth/logout\", {\"POST\"}),\n    (\"/auth/forgot-password\", {\"POST\"}),\n    (\"/auth/reset-password\", {\"POST\"}),\n    (\"/auth/request-verify-token\", {\"POST\"}),\n    (\"/auth/verify\", {\"POST\"}),\n    (\"/users/me\", {\"GET\"}),\n    (\"/users/me\", {\"PATCH\"}),\n    (\"/users/{id}\", {\"GET\"}),\n    (\"/users/{id}\", {\"PATCH\"}),\n    (\"/users/{id}\", {\"DELETE\"}),\n    # oauth\n    (\"/auth/oauth/authorize\", {\"GET\"}),\n    (\"/auth/oauth/callback\", {\"GET\"}),\n    # oidc\n    (\"/auth/oidc/authorize\", {\"GET\"}),\n    (\"/auth/oidc/callback\", {\"GET\"}),\n    # saml\n    (\"/auth/saml/authorize\", {\"GET\"}),\n    (\"/auth/saml/callback\", {\"POST\"}),\n    (\"/auth/saml/callback\", {\"GET\"}),\n    (\"/auth/saml/logout\", {\"POST\"}),\n    # anonymous user on cloud\n    (\"/tenants/anonymous-user\", {\"POST\"}),\n    (\"/metrics\", {\"GET\"}),  # added by prometheus_fastapi_instrumentator\n    # craft webapp proxy — access enforced per-session via sharing_scope in handler\n    (\"/build/sessions/{session_id}/webapp\", {\"GET\"}),\n    (\"/build/sessions/{session_id}/webapp/{path:path}\", {\"GET\"}),\n]\n\n\ndef is_route_in_spec_list(\n    route: BaseRoute, public_endpoint_specs: list[tuple[str, set[str]]]\n) -> bool:\n    if not hasattr(route, \"path\") or not hasattr(route, \"methods\"):\n        return False\n\n    # try adding the prefix AND not adding the prefix, since some endpoints\n    # are not prefixed (e.g. /openapi.json)\n    if (route.path, route.methods) in public_endpoint_specs:\n        return True\n\n    processed_global_prefix = f\"/{APP_API_PREFIX.strip('/')}\" if APP_API_PREFIX else \"\"\n    if not processed_global_prefix:\n        return False\n\n    for endpoint_spec in public_endpoint_specs:\n        base_path, methods = endpoint_spec\n        prefixed_path = f\"{processed_global_prefix}/{base_path.strip('/')}\"\n\n        if prefixed_path == route.path and route.methods == methods:\n            return True\n\n    return False\n\n\ndef check_router_auth(\n    application: FastAPI,\n    public_endpoint_specs: list[tuple[str, set[str]]] = PUBLIC_ENDPOINT_SPECS,\n) -> None:\n    \"\"\"Ensures that all endpoints on the passed in application either\n    (1) have auth enabled OR\n    (2) are explicitly marked as a public endpoint\n    \"\"\"\n\n    control_plane_dep = fetch_ee_implementation_or_noop(\n        \"onyx.server.tenants.access\", \"control_plane_dep\"\n    )\n    current_cloud_superuser = fetch_ee_implementation_or_noop(\n        \"onyx.auth.users\", \"current_cloud_superuser\"\n    )\n    verify_scim_token = fetch_ee_implementation_or_noop(\n        \"onyx.server.scim.auth\", \"verify_scim_token\"\n    )\n\n    for route in application.routes:\n        # explicitly marked as public\n        if is_route_in_spec_list(route, public_endpoint_specs):\n            continue\n\n        # check for auth\n        found_auth = False\n        route_dependant_obj = cast(\n            Dependant | None, route.dependant if hasattr(route, \"dependant\") else None\n        )\n        if route_dependant_obj:\n            for dependency in route_dependant_obj.dependencies:\n                depends_fn = dependency.cache_key[0]\n                if (\n                    depends_fn == current_limited_user\n                    or depends_fn == current_user\n                    or depends_fn == current_admin_user\n                    or depends_fn == current_curator_or_admin_user\n                    or depends_fn == current_user_with_expired_token\n                    or depends_fn == current_chat_accessible_user\n                    or depends_fn == current_user_from_websocket\n                    or depends_fn == control_plane_dep\n                    or depends_fn == current_cloud_superuser\n                    or depends_fn == verify_scim_token\n                ):\n                    found_auth = True\n                    break\n\n        if not found_auth:\n            # uncomment to print out all route(s) that are missing auth\n            # print(f\"(\\\"{route.path}\\\", {set(route.methods)}),\")\n\n            raise RuntimeError(\n                f\"Did not find user dependency in private route - {route}\"\n            )\n"
  },
  {
    "path": "backend/onyx/server/documents/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/documents/cc_pair.py",
    "content": "from datetime import datetime\nfrom http import HTTPStatus\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi.responses import JSONResponse\nfrom sqlalchemy import select\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.background.celery.tasks.pruning.tasks import (\n    try_creating_prune_generator_task,\n)\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.background.indexing.models import IndexAttemptErrorPydantic\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.connectors.exceptions import ValidationError\nfrom onyx.connectors.factory import validate_ccpair_for_user\nfrom onyx.db.connector import delete_connector\nfrom onyx.db.connector_credential_pair import add_credential_to_connector\nfrom onyx.db.connector_credential_pair import (\n    get_connector_credential_pair_from_id_for_user,\n)\nfrom onyx.db.connector_credential_pair import remove_credential_from_connector\nfrom onyx.db.connector_credential_pair import (\n    update_connector_credential_pair_from_id,\n)\nfrom onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair\nfrom onyx.db.document import get_document_counts_for_cc_pairs\nfrom onyx.db.document import get_documents_for_cc_pair\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.index_attempt import count_index_attempt_errors_for_cc_pair\nfrom onyx.db.index_attempt import count_index_attempts_for_cc_pair\nfrom onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair\nfrom onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id\nfrom onyx.db.index_attempt import (\n    get_latest_successful_index_attempt_for_cc_pair_id,\n)\nfrom onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id\nfrom onyx.db.indexing_coordination import IndexingCoordination\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import User\nfrom onyx.db.permission_sync_attempt import (\n    get_latest_doc_permission_sync_attempt_for_cc_pair,\n)\nfrom onyx.db.permission_sync_attempt import (\n    get_recent_doc_permission_sync_attempts_for_cc_pair,\n)\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_connector_utils import get_deletion_attempt_snapshot\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import CCPairFullInfo\nfrom onyx.server.documents.models import CCPropertyUpdateRequest\nfrom onyx.server.documents.models import CCStatusUpdateRequest\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.server.documents.models import ConnectorCredentialPairMetadata\nfrom onyx.server.documents.models import DocumentSyncStatus\nfrom onyx.server.documents.models import IndexAttemptSnapshot\nfrom onyx.server.documents.models import PaginatedReturn\nfrom onyx.server.documents.models import PermissionSyncAttemptSnapshot\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\nrouter = APIRouter(prefix=\"/manage\")\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/index-attempts\", tags=PUBLIC_API_TAGS)\ndef get_cc_pair_index_attempts(\n    cc_pair_id: int,\n    page_num: int = Query(0, ge=0),\n    page_size: int = Query(10, ge=1, le=1000),\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> PaginatedReturn[IndexAttemptSnapshot]:\n    if user:\n        user_has_access = verify_user_has_access_to_cc_pair(\n            cc_pair_id, db_session, user, get_editable=False\n        )\n        if not user_has_access:\n            raise HTTPException(\n                status_code=400, detail=\"CC Pair not found for current user permissions\"\n            )\n\n    total_count = count_index_attempts_for_cc_pair(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    index_attempts = get_paginated_index_attempts_for_cc_pair_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n        page=page_num,\n        page_size=page_size,\n    )\n    return PaginatedReturn(\n        items=[\n            IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt)\n            for index_attempt in index_attempts\n        ],\n        total_items=total_count,\n    )\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/permission-sync-attempts\")\ndef get_cc_pair_permission_sync_attempts(\n    cc_pair_id: int,\n    page_num: int = Query(0, ge=0),\n    page_size: int = Query(10, ge=1, le=1000),\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> PaginatedReturn[PermissionSyncAttemptSnapshot]:\n    if user:\n        user_has_access = verify_user_has_access_to_cc_pair(\n            cc_pair_id, db_session, user, get_editable=False\n        )\n        if not user_has_access:\n            raise HTTPException(\n                status_code=400, detail=\"CC Pair not found for current user permissions\"\n            )\n\n    # Get all permission sync attempts for this cc pair\n    all_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(\n        cc_pair_id=cc_pair_id,\n        limit=1000,\n        db_session=db_session,\n    )\n\n    start_idx = page_num * page_size\n    end_idx = start_idx + page_size\n    paginated_attempts = all_attempts[start_idx:end_idx]\n    items = [\n        PermissionSyncAttemptSnapshot.from_permission_sync_attempt_db_model(attempt)\n        for attempt in paginated_attempts\n    ]\n\n    return PaginatedReturn(\n        items=items,\n        total_items=len(all_attempts),\n    )\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}\", tags=PUBLIC_API_TAGS)\ndef get_cc_pair_full_info(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> CCPairFullInfo:\n    tenant_id = get_current_tenant_id()\n\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id, db_session, user, get_editable=False\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=404, detail=\"CC Pair not found for current user permissions\"\n        )\n    editable_cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id, db_session, user, get_editable=True\n    )\n    is_editable_for_current_user = editable_cc_pair is not None\n\n    document_count_info_list = list(\n        get_document_counts_for_cc_pairs(\n            db_session=db_session,\n            cc_pairs=[\n                ConnectorCredentialPairIdentifier(\n                    connector_id=cc_pair.connector_id,\n                    credential_id=cc_pair.credential_id,\n                )\n            ],\n        )\n    )\n    documents_indexed = (\n        document_count_info_list[0][-1] if document_count_info_list else 0\n    )\n\n    latest_attempt = get_latest_index_attempt_for_cc_pair_id(\n        db_session=db_session,\n        connector_credential_pair_id=cc_pair_id,\n        secondary_index=False,\n        only_finished=False,\n    )\n\n    latest_successful_attempt = get_latest_successful_index_attempt_for_cc_pair_id(\n        db_session=db_session,\n        connector_credential_pair_id=cc_pair_id,\n    )\n\n    # Get latest permission sync attempt for status\n    latest_permission_sync_attempt = None\n    if cc_pair.access_type == AccessType.SYNC:\n        latest_permission_sync_attempt = (\n            get_latest_doc_permission_sync_attempt_for_cc_pair(\n                db_session=db_session,\n                connector_credential_pair_id=cc_pair_id,\n            )\n        )\n\n    return CCPairFullInfo.from_models(\n        cc_pair_model=cc_pair,\n        number_of_index_attempts=count_index_attempts_for_cc_pair(\n            db_session=db_session,\n            cc_pair_id=cc_pair_id,\n        ),\n        last_index_attempt=latest_attempt,\n        last_successful_index_time=(\n            latest_successful_attempt.time_started\n            if latest_successful_attempt\n            else None\n        ),\n        latest_deletion_attempt=get_deletion_attempt_snapshot(\n            connector_id=cc_pair.connector_id,\n            credential_id=cc_pair.credential_id,\n            db_session=db_session,\n            tenant_id=tenant_id,\n        ),\n        num_docs_indexed=documents_indexed,\n        is_editable_for_current_user=is_editable_for_current_user,\n        indexing=bool(\n            latest_attempt and latest_attempt.status == IndexingStatus.IN_PROGRESS\n        ),\n        last_permission_sync_attempt_status=(\n            latest_permission_sync_attempt.status\n            if latest_permission_sync_attempt\n            else None\n        ),\n        permission_syncing=bool(\n            latest_permission_sync_attempt\n            and latest_permission_sync_attempt.status\n            == PermissionSyncStatus.IN_PROGRESS\n        ),\n        last_permission_sync_attempt_finished=(\n            latest_permission_sync_attempt.time_finished\n            if latest_permission_sync_attempt\n            else None\n        ),\n        last_permission_sync_attempt_error_message=(\n            latest_permission_sync_attempt.error_message\n            if latest_permission_sync_attempt\n            else None\n        ),\n    )\n\n\n@router.put(\"/admin/cc-pair/{cc_pair_id}/status\", tags=PUBLIC_API_TAGS)\ndef update_cc_pair_status(\n    cc_pair_id: int,\n    status_update_request: CCStatusUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> JSONResponse:\n    \"\"\"This method returns nearly immediately. It simply sets some signals and\n    optimistically assumes any running background processes will clean themselves up.\n    This is done to improve the perceived end user experience.\n\n    Returns HTTPStatus.OK if everything finished.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=True,\n    )\n\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Connection not found for current user's permissions\",\n        )\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if status_update_request.status == ConnectorCredentialPairStatus.PAUSED:\n        redis_connector.stop.set_fence(True)\n\n        # Request cancellation for any active indexing attempts for this cc_pair\n        active_attempts = (\n            db_session.execute(\n                select(IndexAttempt).where(\n                    IndexAttempt.connector_credential_pair_id == cc_pair_id,\n                    IndexAttempt.status.in_(\n                        [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]\n                    ),\n                )\n            )\n            .scalars()\n            .all()\n        )\n\n        for attempt in active_attempts:\n            try:\n                IndexingCoordination.request_cancellation(db_session, attempt.id)\n                # Revoke the task to prevent it from running\n                if attempt.celery_task_id:\n                    client_app.control.revoke(attempt.celery_task_id)\n                logger.info(\n                    f\"Requested cancellation for active indexing attempt {attempt.id} \"\n                    f\"due to connector pause: cc_pair={cc_pair_id}\"\n                )\n            except Exception:\n                logger.exception(\n                    f\"Failed to request cancellation for indexing attempt {attempt.id}\"\n                )\n\n    else:\n        redis_connector.stop.set_fence(False)\n\n    update_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n        status=status_update_request.status,\n    )\n\n    db_session.commit()\n\n    # this speeds up the start of indexing by firing the check immediately\n    client_app.send_task(\n        OnyxCeleryTask.CHECK_FOR_INDEXING,\n        kwargs=dict(tenant_id=tenant_id),\n        priority=OnyxCeleryPriority.HIGH,\n    )\n\n    return JSONResponse(\n        status_code=HTTPStatus.OK, content={\"message\": str(HTTPStatus.OK)}\n    )\n\n\n@router.put(\"/admin/cc-pair/{cc_pair_id}/name\")\ndef update_cc_pair_name(\n    cc_pair_id: int,\n    new_name: str,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[int]:\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=True,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400, detail=\"CC Pair not found for current user's permissions\"\n        )\n\n    try:\n        cc_pair.name = new_name\n        db_session.commit()\n        return StatusResponse(\n            success=True, message=\"Name updated successfully\", data=cc_pair_id\n        )\n    except IntegrityError:\n        db_session.rollback()\n        raise HTTPException(status_code=400, detail=\"Name must be unique\")\n\n\n@router.put(\"/admin/cc-pair/{cc_pair_id}/property\")\ndef update_cc_pair_property(\n    cc_pair_id: int,\n    update_request: CCPropertyUpdateRequest,  # in seconds\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[int]:\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=True,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400, detail=\"CC Pair not found for current user's permissions\"\n        )\n\n    # Can we centralize logic for updating connector properties\n    # so that we don't need to manually validate everywhere?\n    if update_request.name == \"refresh_frequency\":\n        cc_pair.connector.refresh_freq = int(update_request.value)\n        cc_pair.connector.validate_refresh_freq()\n        db_session.commit()\n\n        msg = \"Refresh frequency updated successfully\"\n    elif update_request.name == \"pruning_frequency\":\n        cc_pair.connector.prune_freq = int(update_request.value)\n        cc_pair.connector.validate_prune_freq()\n        db_session.commit()\n\n        msg = \"Pruning frequency updated successfully\"\n    else:\n        raise HTTPException(\n            status_code=400, detail=f\"Property name {update_request.name} is not valid.\"\n        )\n\n    return StatusResponse(success=True, message=msg, data=cc_pair_id)\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/last_pruned\")\ndef get_cc_pair_last_pruned(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> datetime | None:\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"cc_pair not found for current user's permissions\",\n        )\n\n    return cc_pair.last_pruned\n\n\n@router.post(\"/admin/cc-pair/{cc_pair_id}/prune\", tags=PUBLIC_API_TAGS)\ndef prune_cc_pair(\n    cc_pair_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[list[int]]:\n    \"\"\"Triggers pruning on a particular cc_pair immediately\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    cc_pair = get_connector_credential_pair_from_id_for_user(\n        cc_pair_id=cc_pair_id,\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    if not cc_pair:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Connection not found for current user's permissions\",\n        )\n\n    r = get_redis_client()\n\n    redis_connector = RedisConnector(tenant_id, cc_pair_id)\n    if redis_connector.prune.fenced:\n        raise HTTPException(\n            status_code=HTTPStatus.CONFLICT,\n            detail=\"Pruning task already in progress.\",\n        )\n\n    logger.info(\n        f\"Pruning cc_pair: cc_pair={cc_pair_id} \"\n        f\"connector={cc_pair.connector_id} \"\n        f\"credential={cc_pair.credential_id} \"\n        f\"{cc_pair.connector.name} connector.\"\n    )\n    payload_id = try_creating_prune_generator_task(\n        client_app, cc_pair, db_session, r, tenant_id\n    )\n    if not payload_id:\n        raise HTTPException(\n            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,\n            detail=\"Pruning task creation failed.\",\n        )\n\n    logger.info(f\"Pruning queued: cc_pair={cc_pair.id} id={payload_id}\")\n\n    return StatusResponse(\n        success=True,\n        message=\"Successfully created the pruning task.\",\n    )\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/get-docs-sync-status\")\ndef get_docs_sync_status(\n    cc_pair_id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[DocumentSyncStatus]:\n    all_docs_for_cc_pair = get_documents_for_cc_pair(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n    return [DocumentSyncStatus.from_model(doc) for doc in all_docs_for_cc_pair]\n\n\n@router.get(\"/admin/cc-pair/{cc_pair_id}/errors\", tags=PUBLIC_API_TAGS)\ndef get_cc_pair_indexing_errors(\n    cc_pair_id: int,\n    include_resolved: bool = Query(False),\n    page_num: int = Query(0, ge=0),\n    page_size: int = Query(10, ge=1, le=100),\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> PaginatedReturn[IndexAttemptErrorPydantic]:\n    \"\"\"Gives back all errors for a given CC Pair. Allows pagination based on page and page_size params.\n\n    Args:\n        cc_pair_id: ID of the connector-credential pair to get errors for\n        include_resolved: Whether to include resolved errors in the results\n        page_num: Page number for pagination, starting at 0\n        page_size: Number of errors to return per page\n        _: Current user, must be curator or admin\n        db_session: Database session\n\n    Returns:\n        Paginated list of indexing errors for the CC pair.\n    \"\"\"\n    total_count = count_index_attempt_errors_for_cc_pair(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n        unresolved_only=not include_resolved,\n    )\n\n    index_attempt_errors = get_index_attempt_errors_for_cc_pair(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n        unresolved_only=not include_resolved,\n        page=page_num,\n        page_size=page_size,\n    )\n    return PaginatedReturn(\n        items=[IndexAttemptErrorPydantic.from_model(e) for e in index_attempt_errors],\n        total_items=total_count,\n    )\n\n\n@router.put(\n    \"/connector/{connector_id}/credential/{credential_id}\", tags=PUBLIC_API_TAGS\n)\ndef associate_credential_to_connector(\n    connector_id: int,\n    credential_id: int,\n    metadata: ConnectorCredentialPairMetadata,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str = Depends(get_current_tenant_id),\n) -> StatusResponse[int]:\n    \"\"\"NOTE(rkuo): internally discussed and the consensus is this endpoint\n    and create_connector_with_mock_credential should be combined.\n\n    The intent of this endpoint is to handle connectors that actually need credentials.\n    \"\"\"\n\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n    )(\n        db_session=db_session,\n        user=user,\n        target_group_ids=metadata.groups,\n        object_is_public=metadata.access_type == AccessType.PUBLIC,\n        object_is_perm_sync=metadata.access_type == AccessType.SYNC,\n        object_is_new=True,\n    )\n\n    try:\n        validate_ccpair_for_user(\n            connector_id, credential_id, metadata.access_type, db_session\n        )\n\n        response = add_credential_to_connector(\n            db_session=db_session,\n            user=user,\n            connector_id=connector_id,\n            credential_id=credential_id,\n            cc_pair_name=metadata.name,\n            access_type=metadata.access_type,\n            auto_sync_options=metadata.auto_sync_options,\n            groups=metadata.groups,\n            processing_mode=metadata.processing_mode,\n        )\n\n        # trigger indexing immediately\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_INDEXING,\n            priority=OnyxCeleryPriority.HIGH,\n            kwargs={\"tenant_id\": tenant_id},\n        )\n\n        logger.info(\n            f\"associate_credential_to_connector - running check_for_indexing: cc_pair={response.data}\"\n        )\n\n        return response\n    except ValidationError as e:\n        # If validation fails, delete the connector and commit the changes\n        # Ensures we don't leave invalid connectors in the database\n        # NOTE: consensus is that it makes sense to unify connector and ccpair creation flows\n        # which would rid us of needing to handle cases like these\n        delete_connector(db_session, connector_id)\n        db_session.commit()\n\n        raise HTTPException(\n            status_code=400, detail=\"Connector validation error: \" + str(e)\n        )\n    except IntegrityError as e:\n        logger.error(f\"IntegrityError: {e}\")\n        delete_connector(db_session, connector_id)\n        db_session.commit()\n\n        raise HTTPException(status_code=400, detail=\"Name must be unique\")\n\n    except Exception as e:\n        logger.exception(f\"Unexpected error: {e}\")\n\n        raise HTTPException(status_code=500, detail=\"Unexpected error\")\n\n\n@router.delete(\n    \"/connector/{connector_id}/credential/{credential_id}\", tags=PUBLIC_API_TAGS\n)\ndef dissociate_credential_from_connector(\n    connector_id: int,\n    credential_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[int]:\n    return remove_credential_from_connector(\n        connector_id, credential_id, user, db_session\n    )\n"
  },
  {
    "path": "backend/onyx/server/documents/connector.py",
    "content": "import json\nimport math\nimport mimetypes\nimport os\nimport zipfile\nfrom datetime import datetime\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import Form\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi import UploadFile\nfrom google.oauth2.credentials import Credentials\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.email_utils import send_email\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.background.celery.tasks.pruning.tasks import (\n    try_creating_prune_generator_task,\n)\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.configs.app_configs import EMAIL_CONFIGURED\nfrom onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES\nfrom onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.configs.constants import ONYX_METADATA_FILENAME\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.factory import validate_ccpair_for_user\nfrom onyx.connectors.google_utils.google_auth import (\n    get_google_oauth_creds,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    build_service_account_creds,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    delete_google_app_cred,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    delete_service_account_key,\n)\nfrom onyx.connectors.google_utils.google_kv import get_auth_url\nfrom onyx.connectors.google_utils.google_kv import (\n    get_google_app_cred,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    get_service_account_key,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    update_credential_access_tokens,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    upsert_google_app_cred,\n)\nfrom onyx.connectors.google_utils.google_kv import (\n    upsert_service_account_key,\n)\nfrom onyx.connectors.google_utils.google_kv import verify_csrf\nfrom onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TOKEN_KEY\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom onyx.db.connector import create_connector\nfrom onyx.db.connector import delete_connector\nfrom onyx.db.connector import fetch_connector_by_id\nfrom onyx.db.connector import fetch_connectors\nfrom onyx.db.connector import fetch_unique_document_sources\nfrom onyx.db.connector import get_connector_credential_ids\nfrom onyx.db.connector import mark_ccpair_with_indexing_trigger\nfrom onyx.db.connector import update_connector\nfrom onyx.db.connector_credential_pair import add_credential_to_connector\nfrom onyx.db.connector_credential_pair import (\n    fetch_connector_credential_pair_for_connector,\n)\nfrom onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user\nfrom onyx.db.connector_credential_pair import (\n    get_connector_credential_pairs_for_user_parallel,\n)\nfrom onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair\nfrom onyx.db.credentials import cleanup_gmail_credentials\nfrom onyx.db.credentials import cleanup_google_drive_credentials\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.credentials import delete_service_account_credentials\nfrom onyx.db.credentials import fetch_credential_by_id_for_user\nfrom onyx.db.deletion_attempt import check_deletion_attempt_is_allowed\nfrom onyx.db.document import get_document_counts_for_all_cc_pairs\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingMode\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.federated import fetch_all_federated_connectors_parallel\nfrom onyx.db.index_attempt import get_index_attempts_for_cc_pair\nfrom onyx.db.index_attempt import get_latest_index_attempts_by_status\nfrom onyx.db.index_attempt import get_latest_index_attempts_parallel\nfrom onyx.db.index_attempt import (\n    get_latest_successful_index_attempts_parallel,\n)\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import FederatedConnector\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexingStatus\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom onyx.file_processing.file_types import PLAIN_TEXT_MIME_TYPE\nfrom onyx.file_processing.file_types import WORD_PROCESSING_MIME_TYPE\nfrom onyx.file_store.file_store import FileStore\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import AuthStatus\nfrom onyx.server.documents.models import AuthUrl\nfrom onyx.server.documents.models import ConnectorBase\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.server.documents.models import ConnectorFileInfo\nfrom onyx.server.documents.models import ConnectorFilesResponse\nfrom onyx.server.documents.models import ConnectorIndexingStatusLite\nfrom onyx.server.documents.models import ConnectorIndexingStatusLiteResponse\nfrom onyx.server.documents.models import ConnectorRequestSubmission\nfrom onyx.server.documents.models import ConnectorSnapshot\nfrom onyx.server.documents.models import ConnectorStatus\nfrom onyx.server.documents.models import ConnectorUpdateRequest\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.server.documents.models import CredentialSnapshot\nfrom onyx.server.documents.models import DocsCountOperator\nfrom onyx.server.documents.models import FailedConnectorIndexingStatus\nfrom onyx.server.documents.models import FileUploadResponse\nfrom onyx.server.documents.models import GDriveCallback\nfrom onyx.server.documents.models import GmailCallback\nfrom onyx.server.documents.models import GoogleAppCredentials\nfrom onyx.server.documents.models import GoogleServiceAccountCredentialRequest\nfrom onyx.server.documents.models import GoogleServiceAccountKey\nfrom onyx.server.documents.models import IndexedSourcesResponse\nfrom onyx.server.documents.models import IndexingStatusRequest\nfrom onyx.server.documents.models import ObjectCreationIdResponse\nfrom onyx.server.documents.models import RunConnectorRequest\nfrom onyx.server.documents.models import SourceSummary\nfrom onyx.server.federated.models import FederatedConnectorStatus\nfrom onyx.server.models import StatusResponse\nfrom onyx.server.utils_vector_db import require_vector_db\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom onyx.utils.threadpool_concurrency import CallableProtocol\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n_GMAIL_CREDENTIAL_ID_COOKIE_NAME = \"gmail_credential_id\"\n_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = \"google_drive_credential_id\"\n_INDEXING_STATUS_PAGE_SIZE = 10\n\nSEEN_ZIP_DETAIL = \"Only one zip file is allowed per file connector, \\\nuse the ingestion APIs for multiple files\"\n\nrouter = APIRouter(prefix=\"/manage\", dependencies=[Depends(require_vector_db)])\n\n\n\"\"\"Admin only API endpoints\"\"\"\n\n\n@router.get(\"/admin/connector/gmail/app-credential\")\ndef check_google_app_gmail_credentials_exist(\n    _: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    try:\n        return {\"client_id\": get_google_app_cred(DocumentSource.GMAIL).web.client_id}\n    except KvKeyNotFoundError:\n        raise HTTPException(status_code=404, detail=\"Google App Credentials not found\")\n\n\n@router.put(\"/admin/connector/gmail/app-credential\")\ndef upsert_google_app_gmail_credentials(\n    app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)\n) -> StatusResponse:\n    try:\n        upsert_google_app_cred(app_credentials, DocumentSource.GMAIL)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully saved Google App Credentials\"\n    )\n\n\n@router.delete(\"/admin/connector/gmail/app-credential\")\ndef delete_google_app_gmail_credentials(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    try:\n        delete_google_app_cred(DocumentSource.GMAIL)\n        cleanup_gmail_credentials(db_session=db_session)\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully deleted Google App Credentials\"\n    )\n\n\n@router.get(\"/admin/connector/google-drive/app-credential\")\ndef check_google_app_credentials_exist(\n    _: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    try:\n        return {\n            \"client_id\": get_google_app_cred(DocumentSource.GOOGLE_DRIVE).web.client_id\n        }\n    except KvKeyNotFoundError:\n        raise HTTPException(status_code=404, detail=\"Google App Credentials not found\")\n\n\n@router.put(\"/admin/connector/google-drive/app-credential\")\ndef upsert_google_app_credentials(\n    app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)\n) -> StatusResponse:\n    try:\n        upsert_google_app_cred(app_credentials, DocumentSource.GOOGLE_DRIVE)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully saved Google App Credentials\"\n    )\n\n\n@router.delete(\"/admin/connector/google-drive/app-credential\")\ndef delete_google_app_credentials(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    try:\n        delete_google_app_cred(DocumentSource.GOOGLE_DRIVE)\n        cleanup_google_drive_credentials(db_session=db_session)\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully deleted Google App Credentials\"\n    )\n\n\n@router.get(\"/admin/connector/gmail/service-account-key\")\ndef check_google_service_gmail_account_key_exist(\n    _: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    try:\n        return {\n            \"service_account_email\": get_service_account_key(\n                DocumentSource.GMAIL\n            ).client_email\n        }\n    except KvKeyNotFoundError:\n        raise HTTPException(\n            status_code=404, detail=\"Google Service Account Key not found\"\n        )\n\n\n@router.put(\"/admin/connector/gmail/service-account-key\")\ndef upsert_google_service_gmail_account_key(\n    service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)\n) -> StatusResponse:\n    try:\n        upsert_service_account_key(service_account_key, DocumentSource.GMAIL)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully saved Google Service Account Key\"\n    )\n\n\n@router.delete(\"/admin/connector/gmail/service-account-key\")\ndef delete_google_service_gmail_account_key(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    try:\n        delete_service_account_key(DocumentSource.GMAIL)\n        cleanup_gmail_credentials(db_session=db_session)\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully deleted Google Service Account Key\"\n    )\n\n\n@router.get(\"/admin/connector/google-drive/service-account-key\")\ndef check_google_service_account_key_exist(\n    _: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    try:\n        return {\n            \"service_account_email\": get_service_account_key(\n                DocumentSource.GOOGLE_DRIVE\n            ).client_email\n        }\n    except KvKeyNotFoundError:\n        raise HTTPException(\n            status_code=404, detail=\"Google Service Account Key not found\"\n        )\n\n\n@router.put(\"/admin/connector/google-drive/service-account-key\")\ndef upsert_google_service_account_key(\n    service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)\n) -> StatusResponse:\n    try:\n        upsert_service_account_key(service_account_key, DocumentSource.GOOGLE_DRIVE)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully saved Google Service Account Key\"\n    )\n\n\n@router.delete(\"/admin/connector/google-drive/service-account-key\")\ndef delete_google_service_account_key(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    try:\n        delete_service_account_key(DocumentSource.GOOGLE_DRIVE)\n        cleanup_google_drive_credentials(db_session=db_session)\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    return StatusResponse(\n        success=True, message=\"Successfully deleted Google Service Account Key\"\n    )\n\n\n@router.put(\"/admin/connector/google-drive/service-account-credential\")\ndef upsert_service_account_credential(\n    service_account_credential_request: GoogleServiceAccountCredentialRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ObjectCreationIdResponse:\n    \"\"\"Special API which allows the creation of a credential for a service account.\n    Combines the input with the saved service account key to create an entry in the\n    `Credential` table.\"\"\"\n    try:\n        credential_base = build_service_account_creds(\n            DocumentSource.GOOGLE_DRIVE,\n            primary_admin_email=service_account_credential_request.google_primary_admin,\n            name=\"Service Account (uploaded)\",\n        )\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    # first delete all existing service account credentials\n    delete_service_account_credentials(user, db_session, DocumentSource.GOOGLE_DRIVE)\n    # `user=None` since this credential is not a personal credential\n    credential = create_credential(\n        credential_data=credential_base, user=user, db_session=db_session\n    )\n    return ObjectCreationIdResponse(id=credential.id)\n\n\n@router.put(\"/admin/connector/gmail/service-account-credential\")\ndef upsert_gmail_service_account_credential(\n    service_account_credential_request: GoogleServiceAccountCredentialRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ObjectCreationIdResponse:\n    \"\"\"Special API which allows the creation of a credential for a service account.\n    Combines the input with the saved service account key to create an entry in the\n    `Credential` table.\"\"\"\n    try:\n        credential_base = build_service_account_creds(\n            DocumentSource.GMAIL,\n            primary_admin_email=service_account_credential_request.google_primary_admin,\n        )\n    except KvKeyNotFoundError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    # first delete all existing service account credentials\n    delete_service_account_credentials(user, db_session, DocumentSource.GMAIL)\n    # `user=None` since this credential is not a personal credential\n    credential = create_credential(\n        credential_data=credential_base, user=user, db_session=db_session\n    )\n    return ObjectCreationIdResponse(id=credential.id)\n\n\n@router.get(\"/admin/connector/google-drive/check-auth/{credential_id}\")\ndef check_drive_tokens(\n    credential_id: int,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> AuthStatus:\n    db_credentials = fetch_credential_by_id_for_user(credential_id, user, db_session)\n    if not db_credentials or not db_credentials.credential_json:\n        return AuthStatus(authenticated=False)\n\n    credential_json = db_credentials.credential_json.get_value(apply_mask=False)\n    if DB_CREDENTIALS_DICT_TOKEN_KEY not in credential_json:\n        return AuthStatus(authenticated=False)\n    token_json_str = str(credential_json[DB_CREDENTIALS_DICT_TOKEN_KEY])\n    google_drive_creds = get_google_oauth_creds(\n        token_json_str=token_json_str,\n        source=DocumentSource.GOOGLE_DRIVE,\n    )\n    if google_drive_creds is None:\n        return AuthStatus(authenticated=False)\n    return AuthStatus(authenticated=True)\n\n\ndef save_zip_metadata_to_file_store(\n    zf: zipfile.ZipFile, file_store: FileStore\n) -> str | None:\n    \"\"\"\n    Extract .onyx_metadata.json from zip and save to file store.\n    Returns the file_id or None if no metadata file exists.\n    \"\"\"\n    try:\n        metadata_file_info = zf.getinfo(ONYX_METADATA_FILENAME)\n        with zf.open(metadata_file_info, \"r\") as metadata_file:\n            metadata_bytes = metadata_file.read()\n\n            # Validate that it's valid JSON before saving\n            try:\n                json.loads(metadata_bytes)\n            except json.JSONDecodeError as e:\n                logger.warning(f\"Unable to load {ONYX_METADATA_FILENAME}: {e}\")\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Unable to load {ONYX_METADATA_FILENAME}: {e}\",\n                )\n\n            # Save to file store\n            file_id = file_store.save_file(\n                content=BytesIO(metadata_bytes),\n                display_name=ONYX_METADATA_FILENAME,\n                file_origin=FileOrigin.CONNECTOR_METADATA,\n                file_type=\"application/json\",\n            )\n            return file_id\n    except KeyError:\n        logger.info(f\"No {ONYX_METADATA_FILENAME} file\")\n        return None\n\n\ndef is_zip_file(file: UploadFile) -> bool:\n    \"\"\"\n    Check if the file is a zip file by content type or filename.\n    \"\"\"\n    return bool(\n        (\n            file.content_type\n            and file.content_type.startswith(\n                (\n                    \"application/zip\",\n                    \"application/x-zip-compressed\",  # May be this in Windows\n                    \"application/x-zip\",\n                    \"multipart/x-zip\",\n                )\n            )\n        )\n        or (file.filename and file.filename.lower().endswith(\".zip\"))\n    )\n\n\ndef upload_files(\n    files: list[UploadFile],\n    file_origin: FileOrigin = FileOrigin.CONNECTOR,\n    unzip: bool = True,\n) -> FileUploadResponse:\n\n    # Skip directories and known macOS metadata entries\n    def should_process_file(file_path: str) -> bool:\n        normalized_path = os.path.normpath(file_path)\n        return not any(part.startswith(\".\") for part in normalized_path.split(os.sep))\n\n    deduped_file_paths = []\n    deduped_file_names = []\n    zip_metadata_file_id: str | None = None\n    try:\n        file_store = get_default_file_store()\n        seen_zip = False\n        for file in files:\n            if not file.filename:\n                logger.warning(\"File has no filename, skipping\")\n                continue\n\n            if is_zip_file(file):\n                if seen_zip:\n                    raise HTTPException(status_code=400, detail=SEEN_ZIP_DETAIL)\n                seen_zip = True\n\n                # Validate the zip by opening it (catches corrupt/non-zip files)\n                with zipfile.ZipFile(file.file, \"r\") as zf:\n                    if unzip:\n                        zip_metadata_file_id = save_zip_metadata_to_file_store(\n                            zf, file_store\n                        )\n                        for file_info in zf.namelist():\n                            if zf.getinfo(file_info).is_dir():\n                                continue\n\n                            if not should_process_file(file_info):\n                                continue\n\n                            sub_file_bytes = zf.read(file_info)\n\n                            mime_type, __ = mimetypes.guess_type(file_info)\n                            if mime_type is None:\n                                mime_type = \"application/octet-stream\"\n\n                            file_id = file_store.save_file(\n                                content=BytesIO(sub_file_bytes),\n                                display_name=os.path.basename(file_info),\n                                file_origin=file_origin,\n                                file_type=mime_type,\n                            )\n                            deduped_file_paths.append(file_id)\n                            deduped_file_names.append(os.path.basename(file_info))\n                        continue\n\n                # Store the zip as-is (unzip=False)\n                file.file.seek(0)\n                file_id = file_store.save_file(\n                    content=file.file,\n                    display_name=file.filename,\n                    file_origin=file_origin,\n                    file_type=file.content_type or \"application/zip\",\n                )\n                deduped_file_paths.append(file_id)\n                deduped_file_names.append(file.filename)\n                continue\n\n            # Since we can't render docx files in the UI,\n            # we store them in the file store as plain text\n            if file.content_type == WORD_PROCESSING_MIME_TYPE:\n                # Lazy load to avoid importing markitdown when not needed\n                from onyx.file_processing.extract_file_text import read_docx_file\n\n                text, _ = read_docx_file(file.file, file.filename)\n                file_id = file_store.save_file(\n                    content=BytesIO(text.encode(\"utf-8\")),\n                    display_name=file.filename,\n                    file_origin=file_origin,\n                    file_type=PLAIN_TEXT_MIME_TYPE,\n                )\n\n            else:\n                file_id = file_store.save_file(\n                    content=file.file,\n                    display_name=file.filename,\n                    file_origin=file_origin,\n                    file_type=file.content_type or \"text/plain\",\n                )\n            deduped_file_paths.append(file_id)\n            deduped_file_names.append(file.filename)\n\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n    return FileUploadResponse(\n        file_paths=deduped_file_paths,\n        file_names=deduped_file_names,\n        zip_metadata_file_id=zip_metadata_file_id,\n    )\n\n\ndef _normalize_file_names_for_backwards_compatibility(\n    file_locations: list[str], file_names: list[str]\n) -> list[str]:\n    \"\"\"\n    Ensures file_names list is the same length as file_locations for backwards compatibility.\n    In legacy data, file_names might not exist or be shorter than file_locations.\n    If file_names is shorter, pads it with corresponding file_locations values.\n    \"\"\"\n    return file_names + file_locations[len(file_names) :]\n\n\ndef _fetch_and_check_file_connector_cc_pair_permissions(\n    connector_id: int,\n    user: User,\n    db_session: Session,\n    require_editable: bool,\n) -> ConnectorCredentialPair:\n    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)\n    if cc_pair is None:\n        raise HTTPException(\n            status_code=404,\n            detail=\"No Connector-Credential Pair found for this connector\",\n        )\n\n    has_requested_access = verify_user_has_access_to_cc_pair(\n        cc_pair_id=cc_pair.id,\n        db_session=db_session,\n        user=user,\n        get_editable=require_editable,\n    )\n    if has_requested_access:\n        return cc_pair\n\n    # Special case: global curators should be able to manage files\n    # for public file connectors even when they are not the creator.\n    if (\n        require_editable\n        and user.role == UserRole.GLOBAL_CURATOR\n        and cc_pair.access_type == AccessType.PUBLIC\n    ):\n        return cc_pair\n\n    raise HTTPException(\n        status_code=403,\n        detail=\"Access denied. User cannot manage files for this connector.\",\n    )\n\n\n@router.post(\"/admin/connector/file/upload\", tags=PUBLIC_API_TAGS)\ndef upload_files_api(\n    files: list[UploadFile],\n    unzip: bool = True,\n    _: User = Depends(current_curator_or_admin_user),\n) -> FileUploadResponse:\n    return upload_files(files, FileOrigin.OTHER, unzip=unzip)\n\n\n@router.get(\"/admin/connector/{connector_id}/files\", tags=PUBLIC_API_TAGS)\ndef list_connector_files(\n    connector_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ConnectorFilesResponse:\n    \"\"\"List all files in a file connector.\"\"\"\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        raise HTTPException(status_code=404, detail=\"Connector not found\")\n\n    if connector.source != DocumentSource.FILE:\n        raise HTTPException(\n            status_code=400, detail=\"This endpoint only works with file connectors\"\n        )\n\n    _ = _fetch_and_check_file_connector_cc_pair_permissions(\n        connector_id=connector_id,\n        user=user,\n        db_session=db_session,\n        require_editable=False,\n    )\n\n    file_locations = connector.connector_specific_config.get(\"file_locations\", [])\n    file_names = connector.connector_specific_config.get(\"file_names\", [])\n\n    # Normalize file_names for backwards compatibility with legacy data\n    file_names = _normalize_file_names_for_backwards_compatibility(\n        file_locations, file_names\n    )\n\n    file_store = get_default_file_store()\n    files = []\n\n    for file_id, file_name in zip(file_locations, file_names):\n        try:\n            file_record = file_store.read_file_record(file_id)\n            file_size = None\n            upload_date = None\n            if file_record:\n                file_size = file_store.get_file_size(file_id)\n                upload_date = (\n                    file_record.created_at.isoformat()\n                    if file_record.created_at\n                    else None\n                )\n            files.append(\n                ConnectorFileInfo(\n                    file_id=file_id,\n                    file_name=file_name,\n                    file_size=file_size,\n                    upload_date=upload_date,\n                )\n            )\n        except Exception as e:\n            logger.warning(f\"Error reading file record for {file_id}: {e}\")\n            # Include file with basic info even if record fetch fails\n            files.append(\n                ConnectorFileInfo(\n                    file_id=file_id,\n                    file_name=file_name,\n                )\n            )\n\n    return ConnectorFilesResponse(files=files)\n\n\n@router.post(\"/admin/connector/{connector_id}/files/update\", tags=PUBLIC_API_TAGS)\ndef update_connector_files(\n    connector_id: int,\n    files: list[UploadFile] | None = File(None),\n    file_ids_to_remove: str = Form(\"[]\"),\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> FileUploadResponse:\n    \"\"\"\n    Update files in a connector by adding new files and/or removing existing ones.\n    This is an atomic operation that validates, updates the connector config, and triggers indexing.\n    \"\"\"\n    files = files or []\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        raise HTTPException(status_code=404, detail=\"Connector not found\")\n\n    if connector.source != DocumentSource.FILE:\n        raise HTTPException(\n            status_code=400, detail=\"This endpoint only works with file connectors\"\n        )\n\n    # Get the connector-credential pair for indexing/pruning triggers\n    # and validate user permissions for file management.\n    cc_pair = _fetch_and_check_file_connector_cc_pair_permissions(\n        connector_id=connector_id,\n        user=user,\n        db_session=db_session,\n        require_editable=True,\n    )\n\n    # Parse file IDs to remove\n    try:\n        file_ids_list = json.loads(file_ids_to_remove)\n    except json.JSONDecodeError:\n        raise HTTPException(status_code=400, detail=\"Invalid file_ids_to_remove format\")\n\n    if not isinstance(file_ids_list, list):\n        raise HTTPException(\n            status_code=400,\n            detail=\"file_ids_to_remove must be a JSON-encoded list\",\n        )\n\n    # Get current connector config\n    current_config = connector.connector_specific_config\n    current_file_locations = current_config.get(\"file_locations\", [])\n    current_file_names = current_config.get(\"file_names\", [])\n    current_zip_metadata_file_id = current_config.get(\"zip_metadata_file_id\")\n\n    # Load existing metadata from file store if available\n    file_store = get_default_file_store()\n    current_zip_metadata: dict[str, Any] = {}\n    if current_zip_metadata_file_id:\n        try:\n            metadata_io = file_store.read_file(\n                file_id=current_zip_metadata_file_id, mode=\"b\"\n            )\n            metadata_bytes = metadata_io.read()\n            loaded_metadata = json.loads(metadata_bytes)\n            if isinstance(loaded_metadata, list):\n                current_zip_metadata = {d[\"filename\"]: d for d in loaded_metadata}\n            else:\n                current_zip_metadata = loaded_metadata\n        except Exception as e:\n            logger.warning(f\"Failed to load existing metadata file: {e}\")\n            raise HTTPException(\n                status_code=500,\n                detail=\"Failed to load existing connector metadata file\",\n            )\n\n    # Upload new files if any\n    new_file_paths = []\n    new_file_names_list = []\n    new_zip_metadata_file_id: str | None = None\n    new_zip_metadata: dict[str, Any] = {}\n\n    if files and len(files) > 0:\n        upload_response = upload_files(files, FileOrigin.CONNECTOR)\n        new_file_paths = upload_response.file_paths\n        new_file_names_list = upload_response.file_names\n        new_zip_metadata_file_id = upload_response.zip_metadata_file_id\n\n        # Load new metadata from file store if available\n        if new_zip_metadata_file_id:\n            try:\n                metadata_io = file_store.read_file(\n                    file_id=new_zip_metadata_file_id, mode=\"b\"\n                )\n                metadata_bytes = metadata_io.read()\n                loaded_metadata = json.loads(metadata_bytes)\n                if isinstance(loaded_metadata, list):\n                    new_zip_metadata = {d[\"filename\"]: d for d in loaded_metadata}\n                else:\n                    new_zip_metadata = loaded_metadata\n            except Exception as e:\n                logger.warning(f\"Failed to load new metadata file: {e}\")\n\n    # Remove specified files\n    files_to_remove_set = set(file_ids_list)\n\n    # Normalize file_names for backwards compatibility with legacy data\n    current_file_names = _normalize_file_names_for_backwards_compatibility(\n        current_file_locations, current_file_names\n    )\n\n    remaining_file_locations = []\n    remaining_file_names = []\n    removed_file_names = set()\n\n    for file_id, file_name in zip(current_file_locations, current_file_names):\n        if file_id not in files_to_remove_set:\n            remaining_file_locations.append(file_id)\n            remaining_file_names.append(file_name)\n        else:\n            removed_file_names.add(file_name)\n\n    # Combine remaining files with new files\n    final_file_locations = remaining_file_locations + new_file_paths\n    final_file_names = remaining_file_names + new_file_names_list\n\n    # Validate that at least one file remains\n    if not final_file_locations:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Cannot remove all files from connector. At least one file must remain.\",\n        )\n\n    # Merge and filter metadata (remove metadata for deleted files)\n    final_zip_metadata = {\n        key: value\n        for key, value in current_zip_metadata.items()\n        if key not in removed_file_names\n    }\n    final_zip_metadata.update(new_zip_metadata)\n\n    # Save merged metadata to file store if we have any metadata\n    final_zip_metadata_file_id: str | None = None\n    if final_zip_metadata:\n        final_zip_metadata_file_id = file_store.save_file(\n            content=BytesIO(json.dumps(final_zip_metadata).encode(\"utf-8\")),\n            display_name=ONYX_METADATA_FILENAME,\n            file_origin=FileOrigin.CONNECTOR_METADATA,\n            file_type=\"application/json\",\n        )\n\n    # Update connector config\n    updated_config = {\n        **current_config,\n        \"file_locations\": final_file_locations,\n        \"file_names\": final_file_names,\n        \"zip_metadata_file_id\": final_zip_metadata_file_id,\n    }\n    # Remove old zip_metadata dict if present (backwards compatibility cleanup)\n    updated_config.pop(\"zip_metadata\", None)\n\n    connector_base = ConnectorBase(\n        name=connector.name,\n        source=connector.source,\n        input_type=connector.input_type,\n        connector_specific_config=updated_config,\n        refresh_freq=connector.refresh_freq,\n        prune_freq=connector.prune_freq,\n        indexing_start=connector.indexing_start,\n    )\n\n    updated_connector = update_connector(connector_id, connector_base, db_session)\n    if updated_connector is None:\n        raise HTTPException(\n            status_code=500, detail=\"Failed to update connector configuration\"\n        )\n\n    # Trigger re-indexing for new files and pruning for removed files\n    try:\n        tenant_id = get_current_tenant_id()\n\n        # If files were added, mark for UPDATE indexing (only new docs)\n        if new_file_paths:\n            mark_ccpair_with_indexing_trigger(\n                cc_pair.id, IndexingMode.UPDATE, db_session\n            )\n\n            # Send task to check for indexing immediately\n            client_app.send_task(\n                OnyxCeleryTask.CHECK_FOR_INDEXING,\n                kwargs={\"tenant_id\": tenant_id},\n                priority=OnyxCeleryPriority.HIGH,\n            )\n            logger.info(\n                f\"Marked cc_pair {cc_pair.id} for UPDATE indexing (new files) for connector {connector_id}\"\n            )\n\n        # If files were removed, trigger pruning immediately\n        if file_ids_list:\n            r = get_redis_client()\n            payload_id = try_creating_prune_generator_task(\n                client_app, cc_pair, db_session, r, tenant_id\n            )\n            if payload_id:\n                logger.info(\n                    f\"Triggered pruning for cc_pair {cc_pair.id} (removed files) for connector \"\n                    f\"{connector_id}, payload_id={payload_id}\"\n                )\n            else:\n                logger.warning(\n                    f\"Failed to trigger pruning for cc_pair {cc_pair.id} (removed files) for connector {connector_id}\"\n                )\n    except Exception as e:\n        logger.error(f\"Failed to trigger re-indexing after file update: {e}\")\n\n    return FileUploadResponse(\n        file_paths=final_file_locations,\n        file_names=final_file_names,\n        zip_metadata_file_id=final_zip_metadata_file_id,\n    )\n\n\n@router.get(\"/admin/connector\", tags=PUBLIC_API_TAGS)\ndef get_connectors_by_credential(\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    credential: int | None = None,\n) -> list[ConnectorSnapshot]:\n    \"\"\"Get a list of connectors. Allow filtering by a specific credential id.\"\"\"\n\n    connectors = fetch_connectors(db_session)\n\n    filtered_connectors = []\n    for connector in connectors:\n        if connector.source == DocumentSource.INGESTION_API:\n            # don't include INGESTION_API, as it's a system level\n            # connector not manageable by the user\n            continue\n\n        if credential is not None:\n            found = False\n            for cc_pair in connector.credentials:\n                if credential == cc_pair.credential_id:\n                    found = True\n                    break\n\n            if not found:\n                continue\n\n        filtered_connectors.append(ConnectorSnapshot.from_connector_db_model(connector))\n\n    return filtered_connectors\n\n\n# Retrieves most recent failure cases for connectors that are currently failing\n@router.get(\"/admin/connector/failed-indexing-status\", tags=PUBLIC_API_TAGS)\ndef get_currently_failed_indexing_status(\n    secondary_index: bool = False,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    get_editable: bool = Query(\n        False, description=\"If true, return editable document sets\"\n    ),\n) -> list[FailedConnectorIndexingStatus]:\n    # Get the latest failed indexing attempts\n    latest_failed_indexing_attempts = get_latest_index_attempts_by_status(\n        secondary_index=secondary_index,\n        db_session=db_session,\n        status=IndexingStatus.FAILED,\n    )\n\n    # Get the latest successful indexing attempts\n    latest_successful_indexing_attempts = get_latest_index_attempts_by_status(\n        secondary_index=secondary_index,\n        db_session=db_session,\n        status=IndexingStatus.SUCCESS,\n    )\n\n    # Get all connector credential pairs\n    cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        user=user,\n        get_editable=get_editable,\n    )\n\n    # Filter out failed attempts that have a more recent successful attempt\n    filtered_failed_attempts = [\n        failed_attempt\n        for failed_attempt in latest_failed_indexing_attempts\n        if not any(\n            success_attempt.connector_credential_pair_id\n            == failed_attempt.connector_credential_pair_id\n            and success_attempt.time_updated > failed_attempt.time_updated\n            for success_attempt in latest_successful_indexing_attempts\n        )\n    ]\n\n    # Filter cc_pairs to include only those with failed attempts\n    cc_pairs = [\n        cc_pair\n        for cc_pair in cc_pairs\n        if any(\n            attempt.connector_credential_pair == cc_pair\n            for attempt in filtered_failed_attempts\n        )\n    ]\n\n    # Create a mapping of cc_pair_id to its latest failed index attempt\n    cc_pair_to_latest_index_attempt = {\n        attempt.connector_credential_pair_id: attempt\n        for attempt in filtered_failed_attempts\n    }\n\n    indexing_statuses = []\n\n    for cc_pair in cc_pairs:\n        # Skip DefaultCCPair\n        if cc_pair.name == \"DefaultCCPair\":\n            continue\n\n        latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)\n\n        indexing_statuses.append(\n            FailedConnectorIndexingStatus(\n                cc_pair_id=cc_pair.id,\n                name=cc_pair.name,\n                error_msg=(\n                    latest_index_attempt.error_msg if latest_index_attempt else None\n                ),\n                connector_id=cc_pair.connector_id,\n                credential_id=cc_pair.credential_id,\n                is_deletable=check_deletion_attempt_is_allowed(\n                    connector_credential_pair=cc_pair,\n                    db_session=db_session,\n                    allow_scheduled=True,\n                )\n                is None,\n            )\n        )\n\n    return indexing_statuses\n\n\n@router.get(\"/admin/connector/status\", tags=PUBLIC_API_TAGS)\ndef get_connector_status(\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[ConnectorStatus]:\n    # This method is only used document set and group creation/editing\n    # Therefore, it is okay to get non-editable, but public cc_pairs\n    cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        user=user,\n        eager_load_connector=True,\n        eager_load_credential=True,\n        eager_load_user=True,\n        get_editable=False,\n    )\n\n    group_cc_pair_relationships = get_cc_pair_groups_for_ids(\n        db_session=db_session,\n        cc_pair_ids=[cc_pair.id for cc_pair in cc_pairs],\n    )\n    group_cc_pair_relationships_dict: dict[int, list[int]] = {}\n    for relationship in group_cc_pair_relationships:\n        group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(\n            relationship.user_group_id\n        )\n\n    # Pre-compute credential_ids per connector to avoid N+1 lazy loads\n    connector_to_credential_ids: dict[int, list[int]] = {}\n    for cc_pair in cc_pairs:\n        connector_to_credential_ids.setdefault(cc_pair.connector_id, []).append(\n            cc_pair.credential_id\n        )\n\n    return [\n        ConnectorStatus(\n            cc_pair_id=cc_pair.id,\n            name=cc_pair.name,\n            connector=ConnectorSnapshot.from_connector_db_model(\n                cc_pair.connector,\n                credential_ids=connector_to_credential_ids.get(\n                    cc_pair.connector_id, []\n                ),\n            ),\n            credential=CredentialSnapshot.from_credential_db_model(cc_pair.credential),\n            access_type=cc_pair.access_type,\n            groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),\n        )\n        for cc_pair in cc_pairs\n        if cc_pair.name != \"DefaultCCPair\" and cc_pair.connector and cc_pair.credential\n    ]\n\n\n@router.post(\"/admin/connector/indexing-status\", tags=PUBLIC_API_TAGS)\ndef get_connector_indexing_status(\n    request: IndexingStatusRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[ConnectorIndexingStatusLiteResponse]:\n    tenant_id = get_current_tenant_id()\n\n    # NOTE: If the connector is deleting behind the scenes,\n    # accessing cc_pairs can be inconsistent and members like\n    # connector or credential may be None.\n    # Additional checks are done to make sure the connector and credential still exist.\n    # TODO: make this one query ... possibly eager load or wrap in a read transaction\n    # to avoid the complexity of trying to error check throughout the function\n\n    # see https://stackoverflow.com/questions/75758327/\n    # sqlalchemy-method-connection-for-bind-is-already-in-progress\n    # for why we can't pass in the current db_session to these functions\n\n    if MOCK_CONNECTOR_FILE_PATH:\n        import json\n\n        with open(MOCK_CONNECTOR_FILE_PATH, \"r\") as f:\n            raw_data = json.load(f)\n            connector_indexing_statuses = [\n                ConnectorIndexingStatusLite(**status) for status in raw_data\n            ]\n        return [\n            ConnectorIndexingStatusLiteResponse(\n                source=DocumentSource.FILE,\n                summary=SourceSummary(\n                    total_connectors=100,\n                    active_connectors=100,\n                    public_connectors=100,\n                    total_docs_indexed=100000,\n                ),\n                current_page=1,\n                total_pages=1,\n                indexing_statuses=connector_indexing_statuses,\n            )\n        ]\n\n    parallel_functions: list[tuple[CallableProtocol, tuple[Any, ...]]] = [\n        # Get editable connector/credential pairs\n        (\n            lambda: get_connector_credential_pairs_for_user_parallel(\n                user, True, None, True, True, False, True, request.source\n            ),\n            (),\n        ),\n        # Get federated connectors\n        (fetch_all_federated_connectors_parallel, ()),\n        # Get most recent index attempts\n        (\n            lambda: get_latest_index_attempts_parallel(\n                request.secondary_index, True, False\n            ),\n            (),\n        ),\n        # Get most recent finished index attempts\n        (\n            lambda: get_latest_index_attempts_parallel(\n                request.secondary_index, True, True\n            ),\n            (),\n        ),\n        # Get most recent successful index attempts\n        (\n            lambda: get_latest_successful_index_attempts_parallel(\n                request.secondary_index,\n            ),\n            (),\n        ),\n    ]\n\n    if user and user.role == UserRole.ADMIN:\n        (\n            editable_cc_pairs,\n            federated_connectors,\n            latest_index_attempts,\n            latest_finished_index_attempts,\n            latest_successful_index_attempts,\n        ) = run_functions_tuples_in_parallel(parallel_functions)\n        non_editable_cc_pairs = []\n    else:\n        parallel_functions.append(\n            (\n                lambda: get_connector_credential_pairs_for_user_parallel(\n                    user, False, None, True, True, False, True, request.source\n                ),\n                (),\n            ),\n        )\n\n        (\n            editable_cc_pairs,\n            federated_connectors,\n            latest_index_attempts,\n            latest_finished_index_attempts,\n            latest_successful_index_attempts,\n            non_editable_cc_pairs,\n        ) = run_functions_tuples_in_parallel(parallel_functions)\n\n    # Cast results to proper types\n    non_editable_cc_pairs = cast(list[ConnectorCredentialPair], non_editable_cc_pairs)\n    editable_cc_pairs = cast(list[ConnectorCredentialPair], editable_cc_pairs)\n    federated_connectors = cast(list[FederatedConnector], federated_connectors)\n    latest_index_attempts = cast(list[IndexAttempt], latest_index_attempts)\n    latest_finished_index_attempts = cast(\n        list[IndexAttempt], latest_finished_index_attempts\n    )\n    latest_successful_index_attempts = cast(\n        list[IndexAttempt], latest_successful_index_attempts\n    )\n\n    document_count_info = get_document_counts_for_all_cc_pairs(db_session)\n\n    # Create lookup dictionaries for efficient access\n    cc_pair_to_document_cnt: dict[tuple[int, int], int] = {\n        (connector_id, credential_id): cnt\n        for connector_id, credential_id, cnt in document_count_info\n    }\n\n    def _attempt_lookup(\n        attempts: list[IndexAttempt],\n    ) -> dict[int, IndexAttempt]:\n        return {attempt.connector_credential_pair_id: attempt for attempt in attempts}\n\n    cc_pair_to_latest_index_attempt = _attempt_lookup(latest_index_attempts)\n    cc_pair_to_latest_finished_index_attempt = _attempt_lookup(\n        latest_finished_index_attempts\n    )\n    cc_pair_to_latest_successful_index_attempt = _attempt_lookup(\n        latest_successful_index_attempts\n    )\n\n    def build_connector_indexing_status(\n        cc_pair: ConnectorCredentialPair,\n        is_editable: bool,\n    ) -> ConnectorIndexingStatusLite | None:\n        if cc_pair.name == \"DefaultCCPair\":\n            return None\n\n        latest_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)\n        latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(\n            cc_pair.id\n        )\n        latest_successful_attempt = cc_pair_to_latest_successful_index_attempt.get(\n            cc_pair.id\n        )\n        doc_count = cc_pair_to_document_cnt.get(\n            (cc_pair.connector_id, cc_pair.credential_id), 0\n        )\n\n        return _get_connector_indexing_status_lite(\n            cc_pair,\n            latest_attempt,\n            latest_finished_attempt,\n            (\n                latest_successful_attempt.time_started\n                if latest_successful_attempt\n                else None\n            ),\n            is_editable,\n            doc_count,\n        )\n\n    # Process editable cc_pairs\n    editable_statuses: list[ConnectorIndexingStatusLite] = []\n    for cc_pair in editable_cc_pairs:\n        status = build_connector_indexing_status(cc_pair, True)\n        if status:\n            editable_statuses.append(status)\n\n    # Process non-editable cc_pairs\n    non_editable_statuses: list[ConnectorIndexingStatusLite] = []\n    for cc_pair in non_editable_cc_pairs:\n        status = build_connector_indexing_status(cc_pair, False)\n        if status:\n            non_editable_statuses.append(status)\n\n    # Process federated connectors\n    federated_statuses: list[FederatedConnectorStatus] = []\n    for federated_connector in federated_connectors:\n        federated_status = FederatedConnectorStatus(\n            id=federated_connector.id,\n            source=federated_connector.source,\n            name=f\"{federated_connector.source.replace('_', ' ').title()}\",\n        )\n\n        federated_statuses.append(federated_status)\n\n    source_to_summary: dict[DocumentSource, SourceSummary] = {}\n\n    # Apply filters only if any are provided\n    has_filters = bool(\n        request.access_type_filters\n        or request.last_status_filters\n        or (\n            request.docs_count_operator is not None\n            and request.docs_count_value is not None\n        )\n        or request.name_filter\n    )\n\n    if has_filters:\n        editable_statuses = _apply_connector_status_filters(\n            editable_statuses,\n            request.access_type_filters,\n            request.last_status_filters,\n            request.docs_count_operator,\n            request.docs_count_value,\n            request.name_filter,\n        )\n        non_editable_statuses = _apply_connector_status_filters(\n            non_editable_statuses,\n            request.access_type_filters,\n            request.last_status_filters,\n            request.docs_count_operator,\n            request.docs_count_value,\n            request.name_filter,\n        )\n        federated_statuses = _apply_federated_connector_status_filters(\n            federated_statuses,\n            request.name_filter,\n        )\n\n    # Calculate source summary\n    for connector_status in (\n        editable_statuses + non_editable_statuses + federated_statuses\n    ):\n        if isinstance(connector_status, FederatedConnectorStatus):\n            source = connector_status.source.to_non_federated_source()\n        else:\n            source = connector_status.source\n\n        # Skip if source is None (federated connectors without mapping)\n        if source is None:\n            continue\n\n        if source not in source_to_summary:\n            source_to_summary[source] = SourceSummary(\n                total_connectors=0,\n                active_connectors=0,\n                public_connectors=0,\n                total_docs_indexed=0,\n            )\n        source_to_summary[source].total_connectors += 1\n        if isinstance(connector_status, ConnectorIndexingStatusLite):\n            if connector_status.cc_pair_status == ConnectorCredentialPairStatus.ACTIVE:\n                source_to_summary[source].active_connectors += 1\n            if connector_status.access_type == AccessType.PUBLIC:\n                source_to_summary[source].public_connectors += 1\n            source_to_summary[\n                source\n            ].total_docs_indexed += connector_status.docs_indexed\n\n    # Track admin page visit for analytics\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=str(user.id),\n        event=MilestoneRecordType.VISITED_ADMIN_PAGE,\n    )\n\n    # Group statuses by source for pagination\n    source_to_all_statuses: dict[\n        DocumentSource, list[ConnectorIndexingStatusLite | FederatedConnectorStatus]\n    ] = {}\n    # Group by source\n    for connector_status in (\n        editable_statuses + non_editable_statuses + federated_statuses\n    ):\n        if isinstance(connector_status, FederatedConnectorStatus):\n            source = connector_status.source.to_non_federated_source()\n        else:\n            source = connector_status.source\n\n        # Skip if source is None (federated connectors without mapping)\n        if source is None:\n            continue\n\n        if source not in source_to_all_statuses:\n            source_to_all_statuses[source] = []\n        source_to_all_statuses[source].append(connector_status)\n\n    # Create paginated response objects by source\n    response_list: list[ConnectorIndexingStatusLiteResponse] = []\n\n    source_list = list(source_to_all_statuses.keys())\n    source_list.sort()\n\n    for source in source_list:\n        statuses = source_to_all_statuses[source]\n        # Get current page for this source (default to page 1, 1-indexed)\n        current_page = request.source_to_page.get(source, 1)\n\n        # Calculate start and end indices for pagination (convert to 0-indexed)\n        start_idx = (current_page - 1) * _INDEXING_STATUS_PAGE_SIZE\n        end_idx = start_idx + _INDEXING_STATUS_PAGE_SIZE\n\n        if request.get_all_connectors:\n            page_statuses = statuses\n        else:\n            # Get the page slice for this source\n            page_statuses = statuses[start_idx:end_idx]\n\n        # Create response object for this source\n        if page_statuses:  # Only include sources that have data on this page\n            response_list.append(\n                ConnectorIndexingStatusLiteResponse(\n                    source=source,\n                    summary=source_to_summary[source],\n                    current_page=current_page,\n                    total_pages=math.ceil(len(statuses) / _INDEXING_STATUS_PAGE_SIZE),\n                    indexing_statuses=page_statuses,\n                )\n            )\n\n    return response_list\n\n\ndef _get_connector_indexing_status_lite(\n    cc_pair: ConnectorCredentialPair,\n    latest_index_attempt: IndexAttempt | None,\n    latest_finished_index_attempt: IndexAttempt | None,\n    last_successful_index_time: datetime | None,\n    is_editable: bool,\n    document_cnt: int,\n) -> ConnectorIndexingStatusLite | None:\n    # TODO remove this to enable ingestion API\n    if cc_pair.name == \"DefaultCCPair\":\n        return None\n\n    connector = cc_pair.connector\n    credential = cc_pair.credential\n    if not connector or not credential:\n        # This may happen if background deletion is happening\n        return None\n\n    in_progress = bool(\n        latest_index_attempt\n        and latest_index_attempt.status == IndexingStatus.IN_PROGRESS\n    )\n\n    return ConnectorIndexingStatusLite(\n        cc_pair_id=cc_pair.id,\n        name=cc_pair.name,\n        source=cc_pair.connector.source,\n        access_type=cc_pair.access_type,\n        cc_pair_status=cc_pair.status,\n        is_editable=is_editable,\n        in_progress=in_progress,\n        in_repeated_error_state=cc_pair.in_repeated_error_state,\n        last_finished_status=(\n            latest_finished_index_attempt.status\n            if latest_finished_index_attempt\n            else None\n        ),\n        last_status=latest_index_attempt.status if latest_index_attempt else None,\n        last_success=last_successful_index_time,\n        docs_indexed=document_cnt,\n        latest_index_attempt_docs_indexed=(\n            latest_index_attempt.total_docs_indexed if latest_index_attempt else None\n        ),\n    )\n\n\ndef _apply_connector_status_filters(\n    statuses: list[ConnectorIndexingStatusLite],\n    access_type_filters: list[AccessType],\n    last_status_filters: list[IndexingStatus],\n    docs_count_operator: DocsCountOperator | None,\n    docs_count_value: int | None,\n    name_filter: str | None,\n) -> list[ConnectorIndexingStatusLite]:\n    \"\"\"Apply filters to a list of ConnectorIndexingStatusLite objects\"\"\"\n    filtered_statuses: list[ConnectorIndexingStatusLite] = []\n\n    for status in statuses:\n        # Filter by access type\n        if access_type_filters and status.access_type not in access_type_filters:\n            continue\n\n        # Filter by last status\n        if last_status_filters and status.last_status not in last_status_filters:\n            continue\n\n        # Filter by document count\n        if docs_count_operator and docs_count_value is not None:\n            if docs_count_operator == DocsCountOperator.GREATER_THAN and not (\n                status.docs_indexed > docs_count_value\n            ):\n                continue\n            elif docs_count_operator == DocsCountOperator.LESS_THAN and not (\n                status.docs_indexed < docs_count_value\n            ):\n                continue\n            elif (\n                docs_count_operator == DocsCountOperator.EQUAL_TO\n                and status.docs_indexed != docs_count_value\n            ):\n                continue\n\n        # Filter by name\n        if status.name:\n            if name_filter and name_filter.lower() not in status.name.lower():\n                continue\n        else:\n            if name_filter:\n                continue\n\n        filtered_statuses.append(status)\n\n    return filtered_statuses\n\n\ndef _apply_federated_connector_status_filters(\n    statuses: list[FederatedConnectorStatus],\n    name_filter: str | None,\n) -> list[FederatedConnectorStatus]:\n    filtered_statuses: list[FederatedConnectorStatus] = []\n\n    for status in statuses:\n        if name_filter and name_filter.lower() not in status.name.lower():\n            continue\n\n        filtered_statuses.append(status)\n\n    return filtered_statuses\n\n\ndef _validate_connector_allowed(source: DocumentSource) -> None:\n    valid_connectors = [\n        x for x in ENABLED_CONNECTOR_TYPES.replace(\"_\", \"\").split(\",\") if x\n    ]\n    if not valid_connectors:\n        return\n    for connector_type in valid_connectors:\n        if source.value.lower().replace(\"_\", \"\") == connector_type:\n            return\n\n    raise ValueError(\n        \"This connector type has been disabled by your system admin. Please contact them to get it enabled if you wish to use it.\"\n    )\n\n\n@router.post(\"/admin/connector\", tags=PUBLIC_API_TAGS)\ndef create_connector_from_model(\n    connector_data: ConnectorUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ObjectCreationIdResponse:\n    tenant_id = get_current_tenant_id()\n\n    try:\n        _validate_connector_allowed(connector_data.source)\n\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n        )(\n            db_session=db_session,\n            user=user,\n            target_group_ids=connector_data.groups,\n            object_is_public=connector_data.access_type == AccessType.PUBLIC,\n            object_is_perm_sync=connector_data.access_type == AccessType.SYNC,\n            object_is_new=True,\n        )\n        connector_base = connector_data.to_connector_base()\n        connector_response = create_connector(\n            db_session=db_session,\n            connector_data=connector_base,\n        )\n\n        mt_cloud_telemetry(\n            tenant_id=tenant_id,\n            distinct_id=str(user.id),\n            event=MilestoneRecordType.CREATED_CONNECTOR,\n        )\n\n        return connector_response\n    except ValueError as e:\n        logger.error(f\"Error creating connector: {e}\")\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@router.post(\"/admin/connector-with-mock-credential\")\ndef create_connector_with_mock_credential(\n    connector_data: ConnectorUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    tenant_id = get_current_tenant_id()\n\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n    )(\n        db_session=db_session,\n        user=user,\n        target_group_ids=connector_data.groups,\n        object_is_public=connector_data.access_type == AccessType.PUBLIC,\n        object_is_perm_sync=connector_data.access_type == AccessType.SYNC,\n    )\n    try:\n        _validate_connector_allowed(connector_data.source)\n        connector_response = create_connector(\n            db_session=db_session,\n            connector_data=connector_data,\n        )\n\n        mock_credential = CredentialBase(\n            credential_json={},\n            admin_public=True,\n            source=connector_data.source,\n        )\n        credential = create_credential(\n            credential_data=mock_credential,\n            user=user,\n            db_session=db_session,\n        )\n\n        # Store the created connector and credential IDs\n        connector_id = cast(int, connector_response.id)\n        credential_id = credential.id\n\n        validate_ccpair_for_user(\n            connector_id=connector_id,\n            credential_id=credential_id,\n            access_type=connector_data.access_type,\n            db_session=db_session,\n        )\n        response = add_credential_to_connector(\n            db_session=db_session,\n            user=user,\n            connector_id=connector_id,\n            credential_id=credential_id,\n            access_type=connector_data.access_type,\n            cc_pair_name=connector_data.name,\n            groups=connector_data.groups,\n        )\n\n        # trigger indexing immediately\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_INDEXING,\n            priority=OnyxCeleryPriority.HIGH,\n            kwargs={\"tenant_id\": tenant_id},\n        )\n\n        logger.info(\n            f\"create_connector_with_mock_credential - running check_for_indexing: cc_pair={response.data}\"\n        )\n\n        mt_cloud_telemetry(\n            tenant_id=tenant_id,\n            distinct_id=str(user.id),\n            event=MilestoneRecordType.CREATED_CONNECTOR,\n        )\n        return response\n\n    except ConnectorValidationError as e:\n        raise HTTPException(\n            status_code=400, detail=\"Connector validation error: \" + str(e)\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@router.patch(\"/admin/connector/{connector_id}\", tags=PUBLIC_API_TAGS)\ndef update_connector_from_model(\n    connector_id: int,\n    connector_data: ConnectorUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ConnectorSnapshot | StatusResponse[int]:\n    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)\n    try:\n        _validate_connector_allowed(connector_data.source)\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n        )(\n            db_session=db_session,\n            user=user,\n            target_group_ids=connector_data.groups,\n            object_is_public=connector_data.access_type == AccessType.PUBLIC,\n            object_is_perm_sync=connector_data.access_type == AccessType.SYNC,\n            object_is_owned_by_user=cc_pair and user and cc_pair.creator_id == user.id,\n        )\n        connector_base = connector_data.to_connector_base()\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    updated_connector = update_connector(connector_id, connector_base, db_session)\n    if updated_connector is None:\n        raise HTTPException(\n            status_code=404, detail=f\"Connector {connector_id} does not exist\"\n        )\n\n    return ConnectorSnapshot(\n        id=updated_connector.id,\n        name=updated_connector.name,\n        source=updated_connector.source,\n        input_type=updated_connector.input_type,\n        connector_specific_config=updated_connector.connector_specific_config,\n        refresh_freq=updated_connector.refresh_freq,\n        prune_freq=updated_connector.prune_freq,\n        credential_ids=[\n            association.credential.id for association in updated_connector.credentials\n        ],\n        indexing_start=updated_connector.indexing_start,\n        time_created=updated_connector.time_created,\n        time_updated=updated_connector.time_updated,\n    )\n\n\n@router.delete(\n    \"/admin/connector/{connector_id}\",\n    response_model=StatusResponse[int],\n    tags=PUBLIC_API_TAGS,\n)\ndef delete_connector_by_id(\n    connector_id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[int]:\n    try:\n        with db_session.begin():\n            return delete_connector(\n                db_session=db_session,\n                connector_id=connector_id,\n            )\n    except AssertionError:\n        raise HTTPException(status_code=400, detail=\"Connector is not deletable\")\n\n\n@router.post(\"/admin/connector/run-once\", tags=PUBLIC_API_TAGS)\ndef connector_run_once(\n    run_info: RunConnectorRequest,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse[int]:\n    \"\"\"Used to trigger indexing on a set of cc_pairs associated with a\n    single connector.\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    connector_id = run_info.connector_id\n    specified_credential_ids = run_info.credential_ids\n\n    try:\n        possible_credential_ids = get_connector_credential_ids(\n            run_info.connector_id, db_session\n        )\n    except ValueError:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"Connector by id {connector_id} does not exist.\",\n        )\n\n    if not specified_credential_ids:\n        credential_ids = possible_credential_ids\n    else:\n        if set(specified_credential_ids).issubset(set(possible_credential_ids)):\n            credential_ids = specified_credential_ids\n        else:\n            raise HTTPException(\n                status_code=400,\n                detail=\"Not all specified credentials are associated with connector\",\n            )\n\n    if not credential_ids:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Connector has no valid credentials, cannot create index attempts.\",\n        )\n    try:\n        num_triggers = trigger_indexing_for_cc_pair(\n            credential_ids,\n            connector_id,\n            run_info.from_beginning,\n            tenant_id,\n            db_session,\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    logger.info(\"connector_run_once - running check_for_indexing\")\n\n    msg = f\"Marked {num_triggers} index attempts with indexing triggers.\"\n    return StatusResponse(\n        success=True,\n        message=msg,\n        data=num_triggers,\n    )\n\n\n\"\"\"Endpoints for basic users\"\"\"\n\n\n@router.get(\"/connector/gmail/authorize/{credential_id}\")\ndef gmail_auth(\n    response: Response, credential_id: str, _: User = Depends(current_user)\n) -> AuthUrl:\n    # set a cookie that we can read in the callback (used for `verify_csrf`)\n    response.set_cookie(\n        key=_GMAIL_CREDENTIAL_ID_COOKIE_NAME,\n        value=credential_id,\n        httponly=True,\n        max_age=600,\n    )\n    return AuthUrl(auth_url=get_auth_url(int(credential_id), DocumentSource.GMAIL))\n\n\n@router.get(\"/connector/google-drive/authorize/{credential_id}\")\ndef google_drive_auth(\n    response: Response, credential_id: str, _: User = Depends(current_user)\n) -> AuthUrl:\n    # set a cookie that we can read in the callback (used for `verify_csrf`)\n    response.set_cookie(\n        key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME,\n        value=credential_id,\n        httponly=True,\n        max_age=600,\n    )\n    return AuthUrl(\n        auth_url=get_auth_url(int(credential_id), DocumentSource.GOOGLE_DRIVE)\n    )\n\n\n@router.get(\"/connector/gmail/callback\")\ndef gmail_callback(\n    request: Request,\n    callback: GmailCallback = Depends(),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    credential_id_cookie = request.cookies.get(_GMAIL_CREDENTIAL_ID_COOKIE_NAME)\n    if credential_id_cookie is None or not credential_id_cookie.isdigit():\n        raise HTTPException(\n            status_code=401, detail=\"Request did not pass CSRF verification.\"\n        )\n    credential_id = int(credential_id_cookie)\n    verify_csrf(credential_id, callback.state)\n    credentials: Credentials | None = update_credential_access_tokens(\n        callback.code,\n        credential_id,\n        user,\n        db_session,\n        DocumentSource.GMAIL,\n        GoogleOAuthAuthenticationMethod.UPLOADED,\n    )\n    if credentials is None:\n        raise HTTPException(\n            status_code=500, detail=\"Unable to fetch Gmail access tokens\"\n        )\n\n    return StatusResponse(success=True, message=\"Updated Gmail access tokens\")\n\n\n@router.get(\"/connector/google-drive/callback\")\ndef google_drive_callback(\n    request: Request,\n    callback: GDriveCallback = Depends(),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    credential_id_cookie = request.cookies.get(_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME)\n    if credential_id_cookie is None or not credential_id_cookie.isdigit():\n        raise HTTPException(\n            status_code=401, detail=\"Request did not pass CSRF verification.\"\n        )\n    credential_id = int(credential_id_cookie)\n    verify_csrf(credential_id, callback.state)\n\n    credentials: Credentials | None = update_credential_access_tokens(\n        callback.code,\n        credential_id,\n        user,\n        db_session,\n        DocumentSource.GOOGLE_DRIVE,\n        GoogleOAuthAuthenticationMethod.UPLOADED,\n    )\n    if credentials is None:\n        raise HTTPException(\n            status_code=500, detail=\"Unable to fetch Google Drive access tokens\"\n        )\n\n    return StatusResponse(success=True, message=\"Updated Google Drive access tokens\")\n\n\n@router.get(\"/connector\", tags=PUBLIC_API_TAGS)\ndef get_connectors(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[ConnectorSnapshot]:\n    connectors = fetch_connectors(db_session)\n    return [\n        ConnectorSnapshot.from_connector_db_model(connector)\n        for connector in connectors\n        # don't include INGESTION_API, as it's not a \"real\"\n        # connector like those created by the user\n        if connector.source != DocumentSource.INGESTION_API\n    ]\n\n\n@router.get(\"/indexed-sources\", tags=PUBLIC_API_TAGS)\ndef get_indexed_sources(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> IndexedSourcesResponse:\n    sources = sorted(\n        fetch_unique_document_sources(db_session), key=lambda source: source.value\n    )\n    return IndexedSourcesResponse(sources=sources)\n\n\n@router.get(\"/connector/{connector_id}\", tags=PUBLIC_API_TAGS)\ndef get_connector_by_id(\n    connector_id: int,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ConnectorSnapshot | StatusResponse[int]:\n    connector = fetch_connector_by_id(connector_id, db_session)\n    if connector is None:\n        raise HTTPException(\n            status_code=404, detail=f\"Connector {connector_id} does not exist\"\n        )\n\n    return ConnectorSnapshot(\n        id=connector.id,\n        name=connector.name,\n        source=connector.source,\n        indexing_start=connector.indexing_start,\n        input_type=connector.input_type,\n        connector_specific_config=connector.connector_specific_config,\n        refresh_freq=connector.refresh_freq,\n        prune_freq=connector.prune_freq,\n        credential_ids=[\n            association.credential.id for association in connector.credentials\n        ],\n        time_created=connector.time_created,\n        time_updated=connector.time_updated,\n    )\n\n\n@router.post(\"/connector-request\")\ndef submit_connector_request(\n    request_data: ConnectorRequestSubmission,\n    user: User = Depends(current_user),\n) -> StatusResponse:\n    \"\"\"\n    Submit a connector request for Cloud deployments.\n    Tracks via PostHog telemetry and sends email to hello@onyx.app.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    connector_name = request_data.connector_name.strip()\n\n    if not connector_name:\n        raise HTTPException(status_code=400, detail=\"Connector name cannot be empty\")\n\n    user_email = user.email\n\n    # Track connector request via PostHog telemetry (Cloud only)\n    from shared_configs.configs import MULTI_TENANT\n\n    if MULTI_TENANT:\n        mt_cloud_telemetry(\n            tenant_id=tenant_id,\n            distinct_id=str(user.id),\n            event=MilestoneRecordType.REQUESTED_CONNECTOR,\n            properties={\n                \"connector_name\": connector_name,\n                \"user_email\": user.email,\n            },\n        )\n\n    # Send email notification (if email is configured)\n    if EMAIL_CONFIGURED:\n        try:\n            subject = \"Onyx Craft Connector Request\"\n            email_body_text = f\"\"\"A new connector request has been submitted:\n\nConnector Name: {connector_name}\nUser Email: {user_email or \"Not provided (anonymous user)\"}\nTenant ID: {tenant_id}\n\"\"\"\n            email_body_html = f\"\"\"<html>\n<body>\n<p>A new connector request has been submitted:</p>\n<ul>\n<li><strong>Connector Name:</strong> {connector_name}</li>\n<li><strong>User Email:</strong> {user_email or \"Not provided (anonymous user)\"}</li>\n<li><strong>Tenant ID:</strong> {tenant_id}</li>\n</ul>\n</body>\n</html>\"\"\"\n\n            send_email(\n                user_email=\"hello@onyx.app\",\n                subject=subject,\n                html_body=email_body_html,\n                text_body=email_body_text,\n            )\n            logger.info(\n                f\"Connector request email sent to hello@onyx.app for connector: {connector_name}\"\n            )\n        except Exception as e:\n            # Log error but don't fail the request if email fails\n            logger.error(\n                f\"Failed to send connector request email for {connector_name}: {e}\"\n            )\n\n    logger.info(\n        f\"Connector request submitted: {connector_name} by user {user_email or 'anonymous'} (tenant: {tenant_id})\"\n    )\n\n    return StatusResponse(\n        success=True,\n        message=\"Connector request submitted successfully. We'll prioritize popular requests!\",\n    )\n\n\nclass BasicCCPairInfo(BaseModel):\n    has_successful_run: bool\n    source: DocumentSource\n    status: ConnectorCredentialPairStatus\n\n\n@router.get(\"/connector-status\", tags=PUBLIC_API_TAGS)\ndef get_basic_connector_indexing_status(\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> list[BasicCCPairInfo]:\n    cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        eager_load_connector=True,\n        get_editable=False,\n        user=user,\n    )\n\n    # NOTE: This endpoint excludes Craft connectors\n    return [\n        BasicCCPairInfo(\n            has_successful_run=cc_pair.last_successful_index_time is not None,\n            source=cc_pair.connector.source,\n            status=cc_pair.status,\n        )\n        for cc_pair in cc_pairs\n        if cc_pair.connector.source != DocumentSource.INGESTION_API\n        and cc_pair.processing_mode == ProcessingMode.REGULAR\n    ]\n\n\ndef trigger_indexing_for_cc_pair(\n    specified_credential_ids: list[int],\n    connector_id: int,\n    from_beginning: bool,\n    tenant_id: str,\n    db_session: Session,\n) -> int:\n    try:\n        possible_credential_ids = get_connector_credential_ids(connector_id, db_session)\n    except ValueError as e:\n        raise ValueError(f\"Connector by id {connector_id} does not exist: {str(e)}\")\n\n    if not specified_credential_ids:\n        credential_ids = possible_credential_ids\n    else:\n        if set(specified_credential_ids).issubset(set(possible_credential_ids)):\n            credential_ids = specified_credential_ids\n        else:\n            raise ValueError(\n                \"Not all specified credentials are associated with connector\"\n            )\n\n    if not credential_ids:\n        raise ValueError(\n            \"Connector has no valid credentials, cannot create index attempts.\"\n        )\n\n    # Prevents index attempts for cc pairs that already have an index attempt currently running\n    skipped_credentials = [\n        credential_id\n        for credential_id in credential_ids\n        if get_index_attempts_for_cc_pair(\n            cc_pair_identifier=ConnectorCredentialPairIdentifier(\n                connector_id=connector_id,\n                credential_id=credential_id,\n            ),\n            only_current=True,\n            db_session=db_session,\n            disinclude_finished=True,\n        )\n    ]\n\n    connector_credential_pairs = [\n        get_connector_credential_pair(\n            db_session=db_session,\n            connector_id=connector_id,\n            credential_id=credential_id,\n        )\n        for credential_id in credential_ids\n        if credential_id not in skipped_credentials\n    ]\n\n    num_triggers = 0\n    for cc_pair in connector_credential_pairs:\n        if cc_pair is not None:\n            indexing_mode = IndexingMode.UPDATE\n            if from_beginning:\n                indexing_mode = IndexingMode.REINDEX\n\n            mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)\n            num_triggers += 1\n\n            logger.info(\n                f\"connector_run_once - marking cc_pair with indexing trigger: \"\n                f\"connector={connector_id} \"\n                f\"cc_pair={cc_pair.id} \"\n                f\"indexing_trigger={indexing_mode}\"\n            )\n\n    priority = OnyxCeleryPriority.HIGH\n\n    # run the beat task to pick up the triggers immediately\n    logger.info(f\"Sending indexing check task with priority {priority}\")\n    client_app.send_task(\n        OnyxCeleryTask.CHECK_FOR_INDEXING,\n        priority=priority,\n        kwargs={\"tenant_id\": tenant_id},\n    )\n\n    return num_triggers\n"
  },
  {
    "path": "backend/onyx/server/documents/credential.py",
    "content": "import json\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import Form\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import UploadFile\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.connectors.factory import validate_ccpair_for_user\nfrom onyx.db.credentials import alter_credential\nfrom onyx.db.credentials import cleanup_gmail_credentials\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE\nfrom onyx.db.credentials import delete_credential\nfrom onyx.db.credentials import delete_credential_for_user\nfrom onyx.db.credentials import fetch_credential_by_id_for_user\nfrom onyx.db.credentials import fetch_credentials_by_source_for_user\nfrom onyx.db.credentials import fetch_credentials_for_user\nfrom onyx.db.credentials import swap_credentials_connector\nfrom onyx.db.credentials import update_credential\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import DocumentSource\nfrom onyx.db.models import User\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.server.documents.models import CredentialDataUpdateRequest\nfrom onyx.server.documents.models import CredentialSnapshot\nfrom onyx.server.documents.models import CredentialSwapRequest\nfrom onyx.server.documents.models import ObjectCreationIdResponse\nfrom onyx.server.documents.private_key_types import FILE_TYPE_TO_FILE_PROCESSOR\nfrom onyx.server.documents.private_key_types import PrivateKeyFileTypes\nfrom onyx.server.documents.private_key_types import ProcessPrivateKeyFileProtocol\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\nlogger = setup_logger()\n\n\nrouter = APIRouter(prefix=\"/manage\", tags=PUBLIC_API_TAGS)\n\n\ndef _ignore_credential_permissions(source: DocumentSource) -> bool:\n    return source in CREDENTIAL_PERMISSIONS_TO_IGNORE\n\n\n\"\"\"Admin-only endpoints\"\"\"\n\n\n@router.get(\"/admin/credential\")\ndef list_credentials_admin(\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[CredentialSnapshot]:\n    \"\"\"Lists all public credentials\"\"\"\n    credentials = fetch_credentials_for_user(\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n    )\n    return [\n        CredentialSnapshot.from_credential_db_model(credential)\n        for credential in credentials\n    ]\n\n\n@router.get(\"/admin/similar-credentials/{source_type}\")\ndef get_cc_source_full_info(\n    source_type: DocumentSource,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    get_editable: bool = Query(\n        False, description=\"If true, return editable credentials\"\n    ),\n) -> list[CredentialSnapshot]:\n    credentials = fetch_credentials_by_source_for_user(\n        db_session=db_session,\n        user=user,\n        document_source=source_type,\n        get_editable=get_editable,\n    )\n\n    return [\n        CredentialSnapshot.from_credential_db_model(credential)\n        for credential in credentials\n    ]\n\n\n@router.delete(\"/admin/credential/{credential_id}\")\ndef delete_credential_by_id_admin(\n    credential_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    \"\"\"Same as the user endpoint, but can delete any credential (not just the user's own)\"\"\"\n    delete_credential(db_session=db_session, credential_id=credential_id)\n    return StatusResponse(\n        success=True, message=\"Credential deleted successfully\", data=credential_id\n    )\n\n\n@router.put(\"/admin/credential/swap\")\ndef swap_credentials_for_connector(\n    credential_swap_req: CredentialSwapRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    validate_ccpair_for_user(\n        credential_swap_req.connector_id,\n        credential_swap_req.new_credential_id,\n        credential_swap_req.access_type,\n        db_session,\n    )\n\n    connector_credential_pair = swap_credentials_connector(\n        new_credential_id=credential_swap_req.new_credential_id,\n        connector_id=credential_swap_req.connector_id,\n        db_session=db_session,\n        user=user,\n    )\n\n    return StatusResponse(\n        success=True,\n        message=\"Credential swapped successfully\",\n        data=connector_credential_pair.id,\n    )\n\n\n@router.post(\"/credential\")\ndef create_credential_from_model(\n    credential_info: CredentialBase,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ObjectCreationIdResponse:\n    if not _ignore_credential_permissions(credential_info.source):\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n        )(\n            db_session=db_session,\n            user=user,\n            target_group_ids=credential_info.groups,\n            object_is_public=credential_info.curator_public,\n        )\n\n    # Temporary fix for empty Google App credentials\n    if credential_info.source == DocumentSource.GMAIL:\n        cleanup_gmail_credentials(db_session=db_session)\n\n    credential = create_credential(credential_info, user, db_session)\n    return ObjectCreationIdResponse(\n        id=credential.id,\n        credential=CredentialSnapshot.from_credential_db_model(credential),\n    )\n\n\n@router.post(\"/credential/private-key\")\ndef create_credential_with_private_key(\n    credential_json: str = Form(...),\n    admin_public: bool = Form(False),\n    curator_public: bool = Form(False),\n    groups: list[int] = Form([]),\n    name: str | None = Form(None),\n    source: str = Form(...),\n    user: User = Depends(current_curator_or_admin_user),\n    uploaded_file: UploadFile = File(...),\n    field_key: str = Form(...),\n    type_definition_key: str = Form(...),\n    db_session: Session = Depends(get_session),\n) -> ObjectCreationIdResponse:\n    try:\n        credential_data = json.loads(credential_json)\n    except json.JSONDecodeError as e:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid JSON in credential_json: {str(e)}\",\n        )\n\n    private_key_processor: ProcessPrivateKeyFileProtocol | None = (\n        FILE_TYPE_TO_FILE_PROCESSOR.get(PrivateKeyFileTypes(type_definition_key))\n    )\n    if private_key_processor is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Invalid type definition key for private key file\",\n        )\n    private_key_content: str = private_key_processor(uploaded_file)\n\n    credential_data[field_key] = private_key_content\n\n    credential_info = CredentialBase(\n        credential_json=credential_data,\n        admin_public=admin_public,\n        curator_public=curator_public,\n        groups=groups,\n        name=name,\n        source=DocumentSource(source),\n    )\n\n    if not _ignore_credential_permissions(DocumentSource(source)):\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n        )(\n            db_session=db_session,\n            user=user,\n            target_group_ids=groups,\n            object_is_public=curator_public,\n        )\n\n    # Temporary fix for empty Google App credentials\n    if DocumentSource(source) == DocumentSource.GMAIL:\n        cleanup_gmail_credentials(db_session=db_session)\n\n    credential = create_credential(credential_info, user, db_session)\n    return ObjectCreationIdResponse(\n        id=credential.id,\n        credential=CredentialSnapshot.from_credential_db_model(credential),\n    )\n\n\n\"\"\"Endpoints for all\"\"\"\n\n\n@router.get(\"/credential\")\ndef list_credentials(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[CredentialSnapshot]:\n    credentials = fetch_credentials_for_user(db_session=db_session, user=user)\n    return [\n        CredentialSnapshot.from_credential_db_model(credential)\n        for credential in credentials\n    ]\n\n\n@router.get(\"/credential/{credential_id}\")\ndef get_credential_by_id(\n    credential_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CredentialSnapshot | StatusResponse[int]:\n    credential = fetch_credential_by_id_for_user(\n        credential_id,\n        user,\n        db_session,\n        get_editable=False,\n    )\n    if credential is None:\n        raise HTTPException(\n            status_code=401,\n            detail=f\"Credential {credential_id} does not exist or does not belong to user\",\n        )\n\n    return CredentialSnapshot.from_credential_db_model(credential)\n\n\n@router.put(\"/admin/credential/{credential_id}\")\ndef update_credential_data(\n    credential_id: int,\n    credential_update: CredentialDataUpdateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CredentialBase:\n    credential = alter_credential(\n        credential_id,\n        credential_update.name,\n        credential_update.credential_json,\n        user,\n        db_session,\n    )\n\n    if credential is None:\n        raise HTTPException(\n            status_code=401,\n            detail=f\"Credential {credential_id} does not exist or does not belong to user\",\n        )\n\n    return CredentialSnapshot.from_credential_db_model(credential)\n\n\n@router.put(\"/admin/credential/private-key/{credential_id}\")\ndef update_credential_private_key(\n    credential_id: int,\n    name: str = Form(...),\n    credential_json: str = Form(...),\n    uploaded_file: UploadFile = File(...),\n    field_key: str = Form(...),\n    type_definition_key: str = Form(...),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CredentialBase:\n    try:\n        credential_data = json.loads(credential_json)\n    except json.JSONDecodeError as e:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid JSON in credential_json: {str(e)}\",\n        )\n\n    private_key_processor: ProcessPrivateKeyFileProtocol | None = (\n        FILE_TYPE_TO_FILE_PROCESSOR.get(PrivateKeyFileTypes(type_definition_key))\n    )\n    if private_key_processor is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Invalid type definition key for private key file\",\n        )\n    private_key_content: str = private_key_processor(uploaded_file)\n    credential_data[field_key] = private_key_content\n\n    credential = alter_credential(\n        credential_id,\n        name,\n        credential_data,\n        user,\n        db_session,\n    )\n\n    if credential is None:\n        raise HTTPException(\n            status_code=401,\n            detail=f\"Credential {credential_id} does not exist or does not belong to user\",\n        )\n\n    return CredentialSnapshot.from_credential_db_model(credential)\n\n\n@router.patch(\"/credential/{credential_id}\")\ndef update_credential_from_model(\n    credential_id: int,\n    credential_data: CredentialBase,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CredentialSnapshot | StatusResponse[int]:\n    updated_credential = update_credential(\n        credential_id, credential_data, user, db_session\n    )\n    if updated_credential is None:\n        raise HTTPException(\n            status_code=401,\n            detail=f\"Credential {credential_id} does not exist or does not belong to user\",\n        )\n\n    # Get credential_json value - use masking for API responses\n    credential_json_value = (\n        updated_credential.credential_json.get_value(apply_mask=True)\n        if updated_credential.credential_json\n        else {}\n    )\n\n    return CredentialSnapshot(\n        source=updated_credential.source,\n        id=updated_credential.id,\n        credential_json=credential_json_value,\n        user_id=updated_credential.user_id,\n        name=updated_credential.name,\n        admin_public=updated_credential.admin_public,\n        time_created=updated_credential.time_created,\n        time_updated=updated_credential.time_updated,\n        curator_public=updated_credential.curator_public,\n    )\n\n\n@router.delete(\"/credential/{credential_id}\")\ndef delete_credential_by_id(\n    credential_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    delete_credential_for_user(\n        credential_id,\n        user,\n        db_session,\n    )\n\n    return StatusResponse(\n        success=True, message=\"Credential deleted successfully\", data=credential_id\n    )\n\n\n@router.delete(\"/credential/force/{credential_id}\")\ndef force_delete_credential_by_id(\n    credential_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    delete_credential_for_user(credential_id, user, db_session, True)\n\n    return StatusResponse(\n        success=True, message=\"Credential deleted successfully\", data=credential_id\n    )\n"
  },
  {
    "path": "backend/onyx/server/documents/document.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.preprocessing.access_filters import (\n    build_access_filters_for_user,\n)\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.prompts.prompt_utils import build_doc_context_str\nfrom onyx.server.documents.models import ChunkInfo\nfrom onyx.server.documents.models import DocumentInfo\nfrom onyx.server.utils_vector_db import require_vector_db\n\n\nrouter = APIRouter(prefix=\"/document\")\n\n\n# Have to use a query parameter as FastAPI is interpreting the URL type document_ids\n# as a different path\n@router.get(\"/document-size-info\", dependencies=[Depends(require_vector_db)])\ndef get_document_info(\n    document_id: str = Query(...),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DocumentInfo:\n    search_settings = get_current_search_settings(db_session)\n    # This flow is for search so we do not get all indices.\n    document_index = get_default_document_index(search_settings, None, db_session)\n\n    user_acl_filters = build_access_filters_for_user(user, db_session)\n    inference_chunks = document_index.id_based_retrieval(\n        chunk_requests=[VespaChunkRequest(document_id=document_id)],\n        filters=IndexFilters(access_control_list=user_acl_filters),\n    )\n\n    if not inference_chunks:\n        raise HTTPException(status_code=404, detail=\"Document not found\")\n\n    contents = [chunk.content for chunk in inference_chunks]\n\n    combined_contents = \"\\n\".join(contents)\n\n    # get actual document context used for LLM\n    first_chunk = inference_chunks[0]\n    tokenizer_encode = get_tokenizer(\n        provider_type=search_settings.provider_type,\n        model_name=search_settings.model_name,\n    ).encode\n    full_context_str = build_doc_context_str(\n        semantic_identifier=first_chunk.semantic_identifier,\n        source_type=first_chunk.source_type,\n        content=combined_contents,\n        metadata_dict=first_chunk.metadata,\n        updated_at=first_chunk.updated_at,\n        ind=0,\n    )\n\n    return DocumentInfo(\n        num_chunks=len(inference_chunks),\n        num_tokens=len(tokenizer_encode(full_context_str)),\n    )\n\n\n@router.get(\"/chunk-info\", dependencies=[Depends(require_vector_db)])\ndef get_chunk_info(\n    document_id: str = Query(...),\n    chunk_id: int = Query(...),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ChunkInfo:\n    search_settings = get_current_search_settings(db_session)\n    # This flow is for search so we do not get all indices.\n    document_index = get_default_document_index(search_settings, None, db_session)\n\n    user_acl_filters = build_access_filters_for_user(user, db_session)\n    chunk_request = VespaChunkRequest(\n        document_id=document_id,\n        min_chunk_ind=chunk_id,\n        max_chunk_ind=chunk_id,\n    )\n\n    inference_chunks = document_index.id_based_retrieval(\n        chunk_requests=[chunk_request],\n        filters=IndexFilters(access_control_list=user_acl_filters),\n        batch_retrieval=True,\n    )\n\n    if not inference_chunks:\n        raise HTTPException(status_code=404, detail=\"Chunk not found\")\n\n    chunk_content = inference_chunks[0].content\n\n    tokenizer_encode = get_tokenizer(\n        provider_type=search_settings.provider_type,\n        model_name=search_settings.model_name,\n    ).encode\n\n    return ChunkInfo(\n        content=chunk_content, num_tokens=len(tokenizer_encode(chunk_content))\n    )\n"
  },
  {
    "path": "backend/onyx/server/documents/document_utils.py",
    "content": "from cryptography.hazmat.primitives.serialization import pkcs12\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _is_password_related_error(error: Exception) -> bool:\n    \"\"\"\n    Check if the exception indicates a password-related issue rather than a format issue.\n    \"\"\"\n    error_msg = str(error).lower()\n    password_keywords = [\"mac\", \"integrity\", \"password\", \"authentication\", \"verify\"]\n    return any(keyword in error_msg for keyword in password_keywords)\n\n\ndef validate_pkcs12_content(file_bytes: bytes) -> bool:\n    \"\"\"\n    Validate that the file content is actually a PKCS#12 file.\n    This performs basic format validation without requiring passwords.\n    \"\"\"\n    try:\n        # Basic file size check\n        if len(file_bytes) < 10:\n            logger.debug(\"File too small to be a valid PKCS#12 file\")\n            return False\n\n        # Check for PKCS#12 magic bytes/ASN.1 structure\n        # PKCS#12 files start with ASN.1 SEQUENCE tag (0x30)\n        if file_bytes[0] != 0x30:\n            logger.debug(\"File does not start with ASN.1 SEQUENCE tag\")\n            return False\n\n        # Try to parse the outer ASN.1 structure without password validation\n        # This checks if the file has the basic PKCS#12 structure\n        try:\n            # Attempt to load just to validate the basic format\n            # We expect this to fail due to password, but it should fail with a specific error\n            pkcs12.load_key_and_certificates(file_bytes, password=None)\n            return True\n        except ValueError as e:\n            # Check if the error is related to password (expected) vs format issues\n            if _is_password_related_error(e):\n                # These errors indicate the file format is correct but password is wrong/missing\n                logger.debug(\n                    f\"PKCS#12 format appears valid, password-related error: {e}\"\n                )\n                return True\n            else:\n                # Other ValueError likely indicates format issues\n                logger.debug(f\"PKCS#12 format validation failed: {e}\")\n                return False\n        except Exception as e:\n            # Try with empty password as fallback\n            try:\n                pkcs12.load_key_and_certificates(file_bytes, password=b\"\")\n                return True\n            except ValueError as e2:\n                if _is_password_related_error(e2):\n                    logger.debug(\n                        f\"PKCS#12 format appears valid with empty password attempt: {e2}\"\n                    )\n                    return True\n                else:\n                    logger.debug(\n                        f\"PKCS#12 validation failed on both attempts: {e}, {e2}\"\n                    )\n                    return False\n            except Exception:\n                logger.debug(f\"PKCS#12 validation failed: {e}\")\n                return False\n\n    except Exception as e:\n        logger.debug(f\"Unexpected error during PKCS#12 validation: {e}\")\n        return False\n"
  },
  {
    "path": "backend/onyx/server/documents/models.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timezone\nfrom datetime import UTC\nfrom enum import Enum\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TypeVar\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\n\nfrom onyx.configs.app_configs import MASK_CREDENTIAL_PREFIX\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import DocPermissionSyncAttempt\nfrom onyx.db.models import Document as DbDocument\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexingStatus\nfrom onyx.db.models import TaskStatus\nfrom onyx.server.federated.models import FederatedConnectorStatus\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\n\nclass DocumentSyncStatus(BaseModel):\n    doc_id: str\n    last_synced: datetime | None\n    last_modified: datetime | None\n\n    @classmethod\n    def from_model(cls, doc: DbDocument) -> \"DocumentSyncStatus\":\n        return DocumentSyncStatus(\n            doc_id=doc.id,\n            last_synced=doc.last_synced,\n            last_modified=doc.last_modified,\n        )\n\n\nclass DocumentInfo(BaseModel):\n    num_chunks: int\n    num_tokens: int\n\n\nclass ChunkInfo(BaseModel):\n    content: str\n    num_tokens: int\n\n\nclass IndexedSourcesResponse(BaseModel):\n    model_config = ConfigDict(use_enum_values=True)\n    sources: list[DocumentSource]\n\n\nclass DeletionAttemptSnapshot(BaseModel):\n    connector_id: int\n    credential_id: int\n    status: TaskStatus\n\n\nclass ConnectorBase(BaseModel):\n    name: str\n    source: DocumentSource\n    input_type: InputType\n    connector_specific_config: dict[str, Any]\n    # In seconds, None for one time index with no refresh\n    refresh_freq: int | None = None\n    prune_freq: int | None = None\n    indexing_start: datetime | None = None\n\n\nclass ConnectorUpdateRequest(ConnectorBase):\n    access_type: AccessType\n    groups: list[int] = Field(default_factory=list)\n\n    def to_connector_base(self) -> ConnectorBase:\n        return ConnectorBase(**self.model_dump(exclude={\"access_type\", \"groups\"}))\n\n\nclass ConnectorSnapshot(ConnectorBase):\n    id: int\n    credential_ids: list[int]\n    time_created: datetime\n    time_updated: datetime\n    source: DocumentSource\n\n    @classmethod\n    def from_connector_db_model(\n        cls, connector: Connector, credential_ids: list[int] | None = None\n    ) -> \"ConnectorSnapshot\":\n        return ConnectorSnapshot(\n            id=connector.id,\n            name=connector.name,\n            source=connector.source,\n            input_type=connector.input_type,\n            connector_specific_config=connector.connector_specific_config,\n            refresh_freq=connector.refresh_freq,\n            prune_freq=connector.prune_freq,\n            credential_ids=(\n                credential_ids\n                or [association.credential.id for association in connector.credentials]\n            ),\n            indexing_start=connector.indexing_start,\n            time_created=connector.time_created,\n            time_updated=connector.time_updated,\n        )\n\n\nclass CredentialSwapRequest(BaseModel):\n    new_credential_id: int\n    connector_id: int\n    access_type: AccessType\n\n\nclass CredentialDataUpdateRequest(BaseModel):\n    name: str\n    credential_json: dict[str, Any]\n\n\nclass CredentialBase(BaseModel):\n    credential_json: dict[str, Any]\n    # if `true`, then all Admins will have access to the credential\n    admin_public: bool\n    source: DocumentSource\n    name: str | None = None\n    curator_public: bool = False\n    groups: list[int] = Field(default_factory=list)\n\n\nclass CredentialSnapshot(CredentialBase):\n    id: int\n    user_id: UUID | None\n    user_email: str | None = None\n    time_created: datetime\n    time_updated: datetime\n\n    @classmethod\n    def from_credential_db_model(cls, credential: Credential) -> \"CredentialSnapshot\":\n        # Get the credential_json value with appropriate masking\n        if credential.credential_json is None:\n            credential_json_value: dict[str, Any] = {}\n        elif MASK_CREDENTIAL_PREFIX:\n            credential_json_value = credential.credential_json.get_value(\n                apply_mask=True\n            )\n        else:\n            credential_json_value = credential.credential_json.get_value(\n                apply_mask=False\n            )\n\n        return CredentialSnapshot(\n            id=credential.id,\n            credential_json=credential_json_value,\n            user_id=credential.user_id,\n            user_email=credential.user.email if credential.user else None,\n            admin_public=credential.admin_public,\n            time_created=credential.time_created,\n            time_updated=credential.time_updated,\n            source=credential.source or DocumentSource.NOT_APPLICABLE,\n            name=credential.name,\n            curator_public=credential.curator_public,\n        )\n\n\nclass IndexAttemptSnapshot(BaseModel):\n    id: int\n    status: IndexingStatus | None\n    from_beginning: bool\n    new_docs_indexed: int  # only includes completely new docs\n    total_docs_indexed: int  # includes docs that are updated\n    docs_removed_from_index: int\n    error_msg: str | None\n    error_count: int\n    full_exception_trace: str | None\n    time_started: str | None\n    time_updated: str\n    poll_range_start: datetime | None = None\n    poll_range_end: datetime | None = None\n\n    @classmethod\n    def from_index_attempt_db_model(\n        cls, index_attempt: IndexAttempt\n    ) -> \"IndexAttemptSnapshot\":\n        return IndexAttemptSnapshot(\n            id=index_attempt.id,\n            status=index_attempt.status,\n            from_beginning=index_attempt.from_beginning,\n            new_docs_indexed=index_attempt.new_docs_indexed or 0,\n            total_docs_indexed=index_attempt.total_docs_indexed or 0,\n            docs_removed_from_index=index_attempt.docs_removed_from_index or 0,\n            error_msg=index_attempt.error_msg,\n            error_count=len(index_attempt.error_rows),\n            full_exception_trace=index_attempt.full_exception_trace,\n            time_started=(\n                index_attempt.time_started.isoformat()\n                if index_attempt.time_started\n                else None\n            ),\n            time_updated=index_attempt.time_updated.isoformat(),\n            poll_range_start=index_attempt.poll_range_start,\n            poll_range_end=index_attempt.poll_range_end,\n        )\n\n\n# These are the types currently supported by the pagination hook\n# More api endpoints can be refactored and be added here for use with the pagination hook\nPaginatedType = TypeVar(\"PaginatedType\", bound=BaseModel)\n\n\nclass PermissionSyncAttemptSnapshot(BaseModel):\n    id: int\n    status: PermissionSyncStatus\n    error_message: str | None\n    total_docs_synced: int\n    docs_with_permission_errors: int\n    time_created: str\n    time_started: str | None\n    time_finished: str | None\n\n    @classmethod\n    def from_permission_sync_attempt_db_model(\n        cls, attempt: DocPermissionSyncAttempt\n    ) -> \"PermissionSyncAttemptSnapshot\":\n        return PermissionSyncAttemptSnapshot(\n            id=attempt.id,\n            status=attempt.status,\n            error_message=attempt.error_message,\n            total_docs_synced=attempt.total_docs_synced or 0,\n            docs_with_permission_errors=attempt.docs_with_permission_errors or 0,\n            time_created=attempt.time_created.isoformat(),\n            time_started=(\n                attempt.time_started.isoformat() if attempt.time_started else None\n            ),\n            time_finished=(\n                attempt.time_finished.isoformat() if attempt.time_finished else None\n            ),\n        )\n\n\nclass PaginatedReturn(BaseModel, Generic[PaginatedType]):\n    items: list[PaginatedType]\n    total_items: int\n\n\nclass CCPairFullInfo(BaseModel):\n    id: int\n    name: str\n    status: ConnectorCredentialPairStatus\n    in_repeated_error_state: bool\n    num_docs_indexed: int\n    connector: ConnectorSnapshot\n    credential: CredentialSnapshot\n    number_of_index_attempts: int\n    last_index_attempt_status: IndexingStatus | None\n    latest_deletion_attempt: DeletionAttemptSnapshot | None\n    access_type: AccessType\n    is_editable_for_current_user: bool\n    deletion_failure_message: str | None\n    indexing: bool\n    creator: UUID | None\n    creator_email: str | None\n\n    # information on syncing/indexing\n    last_indexed: datetime | None\n    last_pruned: datetime | None\n    # accounts for both doc sync and group sync\n    last_full_permission_sync: datetime | None\n    overall_indexing_speed: float | None\n    latest_checkpoint_description: str | None\n\n    # permission sync attempt status\n    last_permission_sync_attempt_status: PermissionSyncStatus | None\n    permission_syncing: bool\n    last_permission_sync_attempt_finished: datetime | None\n    last_permission_sync_attempt_error_message: str | None\n\n    @classmethod\n    def _get_last_full_permission_sync(\n        cls, cc_pair_model: ConnectorCredentialPair\n    ) -> datetime | None:\n        check_if_source_requires_external_group_sync = fetch_ee_implementation_or_noop(\n            \"onyx.external_permissions.sync_params\",\n            \"source_requires_external_group_sync\",\n            noop_return_value=False,\n        )\n        check_if_source_requires_doc_sync = fetch_ee_implementation_or_noop(\n            \"onyx.external_permissions.sync_params\",\n            \"source_requires_doc_sync\",\n            noop_return_value=False,\n        )\n\n        needs_group_sync = check_if_source_requires_external_group_sync(\n            cc_pair_model.connector.source\n        )\n        needs_doc_sync = check_if_source_requires_doc_sync(\n            cc_pair_model.connector.source\n        )\n\n        last_group_sync = (\n            cc_pair_model.last_time_external_group_sync\n            if needs_group_sync\n            else datetime.now(UTC)\n        )\n        last_doc_sync = (\n            cc_pair_model.last_time_perm_sync if needs_doc_sync else datetime.now(UTC)\n        )\n\n        # if either is still None at this point, it means sync is necessary but\n        # has never completed.\n        if last_group_sync is None or last_doc_sync is None:\n            return None\n\n        return min(last_group_sync, last_doc_sync)\n\n    @classmethod\n    def from_models(\n        cls,\n        cc_pair_model: ConnectorCredentialPair,\n        latest_deletion_attempt: DeletionAttemptSnapshot | None,\n        number_of_index_attempts: int,\n        last_index_attempt: IndexAttempt | None,\n        num_docs_indexed: int,  # not ideal, but this must be computed separately\n        is_editable_for_current_user: bool,\n        indexing: bool,\n        last_successful_index_time: datetime | None = None,\n        last_permission_sync_attempt_status: PermissionSyncStatus | None = None,\n        permission_syncing: bool = False,\n        last_permission_sync_attempt_finished: datetime | None = None,\n        last_permission_sync_attempt_error_message: str | None = None,\n    ) -> \"CCPairFullInfo\":\n        # figure out if we need to artificially deflate the number of docs indexed.\n        # This is required since the total number of docs indexed by a CC Pair is\n        # updated before the new docs for an indexing attempt. If we don't do this,\n        # there is a mismatch between these two numbers which may confuse users.\n        last_indexing_status = last_index_attempt.status if last_index_attempt else None\n        if (\n            # only need to do this if the last indexing attempt is still in progress\n            last_indexing_status == IndexingStatus.IN_PROGRESS\n            and number_of_index_attempts == 1\n            and last_index_attempt\n            and last_index_attempt.new_docs_indexed\n        ):\n            num_docs_indexed = (\n                last_index_attempt.new_docs_indexed if last_index_attempt else 0\n            )\n\n        overall_indexing_speed = num_docs_indexed / (\n            (\n                datetime.now(tz=timezone.utc) - cc_pair_model.connector.time_created\n            ).total_seconds()\n            / 60\n        )\n\n        return cls(\n            id=cc_pair_model.id,\n            name=cc_pair_model.name,\n            status=cc_pair_model.status,\n            in_repeated_error_state=cc_pair_model.in_repeated_error_state,\n            num_docs_indexed=num_docs_indexed,\n            connector=ConnectorSnapshot.from_connector_db_model(\n                cc_pair_model.connector,\n                credential_ids=[cc_pair_model.credential_id],\n            ),\n            credential=CredentialSnapshot.from_credential_db_model(\n                cc_pair_model.credential\n            ),\n            number_of_index_attempts=number_of_index_attempts,\n            last_index_attempt_status=last_indexing_status,\n            latest_deletion_attempt=latest_deletion_attempt,\n            access_type=cc_pair_model.access_type,\n            is_editable_for_current_user=is_editable_for_current_user,\n            deletion_failure_message=cc_pair_model.deletion_failure_message,\n            indexing=indexing,\n            creator=cc_pair_model.creator_id,\n            creator_email=(\n                cc_pair_model.creator.email if cc_pair_model.creator else None\n            ),\n            last_indexed=last_successful_index_time,\n            last_pruned=cc_pair_model.last_pruned,\n            last_full_permission_sync=cls._get_last_full_permission_sync(cc_pair_model),\n            overall_indexing_speed=overall_indexing_speed,\n            latest_checkpoint_description=None,\n            last_permission_sync_attempt_status=last_permission_sync_attempt_status,\n            permission_syncing=permission_syncing,\n            last_permission_sync_attempt_finished=last_permission_sync_attempt_finished,\n            last_permission_sync_attempt_error_message=last_permission_sync_attempt_error_message,\n        )\n\n\nclass CeleryTaskStatus(BaseModel):\n    id: str\n    name: str\n    status: TaskStatus\n    start_time: datetime | None\n    register_time: datetime | None\n\n\nclass FailedConnectorIndexingStatus(BaseModel):\n    \"\"\"Simplified version of ConnectorIndexingStatus for failed indexing attempts\"\"\"\n\n    cc_pair_id: int\n    name: str\n    error_msg: str | None\n    is_deletable: bool\n    connector_id: int\n    credential_id: int\n\n\nclass ConnectorStatus(BaseModel):\n    \"\"\"\n    Represents the status of a connector,\n    including indexing status elated information\n    \"\"\"\n\n    cc_pair_id: int\n    name: str\n    connector: ConnectorSnapshot\n    credential: CredentialSnapshot\n    access_type: AccessType\n    groups: list[int]\n\n\nclass ConnectorIndexingStatus(ConnectorStatus):\n    \"\"\"Represents the full indexing status of a connector\"\"\"\n\n    cc_pair_status: ConnectorCredentialPairStatus\n    # this is separate from the `status` above, since a connector can be `INITIAL_INDEXING`, `ACTIVE`,\n    # or `PAUSED` and still be in a repeated error state.\n    in_repeated_error_state: bool\n    owner: str\n    last_finished_status: IndexingStatus | None\n    last_status: IndexingStatus | None\n    last_success: datetime | None\n    latest_index_attempt: IndexAttemptSnapshot | None\n    docs_indexed: int\n    in_progress: bool\n\n\nclass DocsCountOperator(str, Enum):\n    GREATER_THAN = \">\"\n    LESS_THAN = \"<\"\n    EQUAL_TO = \"=\"\n\n\nclass ConnectorIndexingStatusLite(BaseModel):\n    cc_pair_id: int\n    name: str\n    source: DocumentSource\n    access_type: AccessType\n    cc_pair_status: ConnectorCredentialPairStatus\n    in_progress: bool\n    in_repeated_error_state: bool\n    last_finished_status: IndexingStatus | None\n    last_status: IndexingStatus | None\n    last_success: datetime | None\n    is_editable: bool\n    docs_indexed: int\n    latest_index_attempt_docs_indexed: int | None\n\n\nclass SourceSummary(BaseModel):\n    total_connectors: int\n    active_connectors: int\n    public_connectors: int\n    total_docs_indexed: int\n\n\nclass ConnectorIndexingStatusLiteResponse(BaseModel):\n    source: DocumentSource\n    summary: SourceSummary\n    current_page: int\n    total_pages: int\n    indexing_statuses: Sequence[ConnectorIndexingStatusLite | FederatedConnectorStatus]\n\n\nclass ConnectorCredentialPairIdentifier(BaseModel):\n    connector_id: int\n    credential_id: int\n\n\nclass ConnectorCredentialPairMetadata(BaseModel):\n    name: str\n    access_type: AccessType\n    auto_sync_options: dict[str, Any] | None = None\n    groups: list[int] = Field(default_factory=list)\n    processing_mode: ProcessingMode = ProcessingMode.REGULAR\n\n\nclass CCStatusUpdateRequest(BaseModel):\n    status: ConnectorCredentialPairStatus\n\n\nclass ConnectorCredentialPairDescriptor(BaseModel):\n    id: int\n    name: str\n    connector: ConnectorSnapshot\n    credential: CredentialSnapshot\n    access_type: AccessType\n\n\nclass CCPairSummary(BaseModel):\n    \"\"\"Simplified connector-credential pair information with just essential data\"\"\"\n\n    id: int\n    name: str\n    source: DocumentSource\n    access_type: AccessType\n\n    @classmethod\n    def from_cc_pair_descriptor(\n        cls, descriptor: ConnectorCredentialPairDescriptor\n    ) -> \"CCPairSummary\":\n        return cls(\n            id=descriptor.id,\n            name=descriptor.name,\n            source=descriptor.connector.source,\n            access_type=descriptor.access_type,\n        )\n\n\nclass RunConnectorRequest(BaseModel):\n    connector_id: int\n    credential_ids: list[int] | None = None\n    from_beginning: bool = False\n\n\nclass ConnectorRequestSubmission(BaseModel):\n    connector_name: str\n\n\nclass CCPropertyUpdateRequest(BaseModel):\n    name: str\n    value: str\n\n\n\"\"\"Connectors Models\"\"\"\n\n\nclass GoogleAppWebCredentials(BaseModel):\n    client_id: str\n    project_id: str\n    auth_uri: str\n    token_uri: str\n    auth_provider_x509_cert_url: str\n    client_secret: str\n    redirect_uris: list[str]\n    javascript_origins: list[str]\n\n\nclass GoogleAppCredentials(BaseModel):\n    web: GoogleAppWebCredentials\n\n\nclass GoogleServiceAccountKey(BaseModel):\n    type: str\n    project_id: str\n    private_key_id: str\n    private_key: str\n    client_email: str\n    client_id: str\n    auth_uri: str\n    token_uri: str\n    auth_provider_x509_cert_url: str\n    client_x509_cert_url: str\n    universe_domain: str\n\n\nclass GoogleServiceAccountCredentialRequest(BaseModel):\n    google_primary_admin: str | None = None  # email of user to impersonate\n\n\nclass FileUploadResponse(BaseModel):\n    file_paths: list[str]\n    file_names: list[str]\n    zip_metadata_file_id: str | None  # File ID pointing to metadata in file store\n\n\nclass ConnectorFileInfo(BaseModel):\n    file_id: str\n    file_name: str\n    file_size: int | None = None\n    upload_date: str | None = None\n\n\nclass ConnectorFilesResponse(BaseModel):\n    files: list[ConnectorFileInfo]\n\n\nclass ObjectCreationIdResponse(BaseModel):\n    id: int\n    credential: CredentialSnapshot | None = None\n\n\nclass AuthStatus(BaseModel):\n    authenticated: bool\n\n\nclass AuthUrl(BaseModel):\n    auth_url: str\n\n\nclass GmailCallback(BaseModel):\n    state: str\n    code: str\n\n\nclass GDriveCallback(BaseModel):\n    state: str\n    code: str\n\n\nclass IndexingStatusRequest(BaseModel):\n    secondary_index: bool = False\n    source: DocumentSource | None = None\n    access_type_filters: list[AccessType] = Field(default_factory=list)\n    last_status_filters: list[IndexingStatus] = Field(default_factory=list)\n    docs_count_operator: DocsCountOperator | None = None\n    docs_count_value: int | None = None\n    name_filter: str | None = None\n    source_to_page: dict[DocumentSource, int] = Field(default_factory=dict)\n    get_all_connectors: bool = False\n"
  },
  {
    "path": "backend/onyx/server/documents/private_key_types.py",
    "content": "import base64\nfrom enum import Enum\nfrom typing import Protocol\n\nfrom fastapi import HTTPException\nfrom fastapi import UploadFile\n\nfrom onyx.server.documents.document_utils import validate_pkcs12_content\n\n\nclass ProcessPrivateKeyFileProtocol(Protocol):\n    def __call__(self, file: UploadFile) -> str:\n        \"\"\"\n        Accepts a file-like object, validates the file (e.g., checks extension and content),\n        and returns its contents as a base64-encoded string if valid.\n        Raises an exception if validation fails.\n        \"\"\"\n        ...\n\n\nclass PrivateKeyFileTypes(Enum):\n    SHAREPOINT_PFX_FILE = \"sharepoint_pfx_file\"\n\n\ndef process_sharepoint_private_key_file(file: UploadFile) -> str:\n    \"\"\"\n    Process and validate a private key file upload.\n\n    Validates both the file extension and file content to ensure it's a valid PKCS#12 file.\n    Content validation prevents attacks that rely on file extension spoofing.\n    \"\"\"\n    # First check file extension (basic filter)\n    if not (file.filename and file.filename.lower().endswith(\".pfx\")):\n        raise HTTPException(\n            status_code=400, detail=\"Invalid file type. Only .pfx files are supported.\"\n        )\n\n    # Read file content for validation and processing\n    private_key_bytes = file.file.read()\n\n    # Validate file content to prevent extension spoofing attacks\n    if not validate_pkcs12_content(private_key_bytes):\n        raise HTTPException(\n            status_code=400,\n            detail=\"Invalid file content. The uploaded file does not appear to be a valid PKCS#12 (.pfx) file.\",\n        )\n\n    # Convert to base64 if validation passes\n    pfx_64 = base64.b64encode(private_key_bytes).decode(\"ascii\")\n    return pfx_64\n\n\nFILE_TYPE_TO_FILE_PROCESSOR: dict[\n    PrivateKeyFileTypes, ProcessPrivateKeyFileProtocol\n] = {\n    PrivateKeyFileTypes.SHAREPOINT_PFX_FILE: process_sharepoint_private_key_file,\n}\n"
  },
  {
    "path": "backend/onyx/server/documents/standard_oauth.py",
    "content": "import json\nimport uuid\nfrom typing import Annotated\nfrom typing import cast\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import Request\nfrom pydantic import BaseModel\nfrom pydantic import ValidationError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import OAuthConnector\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.subclasses import find_all_subclasses_in_package\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/connector/oauth\")\n\n_OAUTH_STATE_KEY_FMT = \"oauth_state:{state}\"\n_OAUTH_STATE_EXPIRATION_SECONDS = 10 * 60  # 10 minutes\n_DESIRED_RETURN_URL_KEY = \"desired_return_url\"\n_ADDITIONAL_KWARGS_KEY = \"additional_kwargs\"\n\n# Cache for OAuth connectors, populated at module load time\n_OAUTH_CONNECTORS: dict[DocumentSource, type[OAuthConnector]] = {}\n\n\ndef _discover_oauth_connectors() -> dict[DocumentSource, type[OAuthConnector]]:\n    \"\"\"Walk through the connectors package to find all OAuthConnector implementations\"\"\"\n    global _OAUTH_CONNECTORS\n    if _OAUTH_CONNECTORS:  # Return cached connectors if already discovered\n        return _OAUTH_CONNECTORS\n\n    # Import submodules using package-based discovery to avoid sys.path mutations\n    oauth_connectors = find_all_subclasses_in_package(\n        cast(type[OAuthConnector], OAuthConnector), \"onyx.connectors\"\n    )\n\n    _OAUTH_CONNECTORS = {cls.oauth_id(): cls for cls in oauth_connectors}\n    return _OAUTH_CONNECTORS\n\n\n# Discover OAuth connectors at module load time\n_discover_oauth_connectors()\n\n\ndef _get_additional_kwargs(\n    request: Request, connector_cls: type[OAuthConnector], args_to_ignore: list[str]\n) -> dict[str, str]:\n    # get additional kwargs from request\n    # e.g. anything except for desired_return_url\n    additional_kwargs_dict = {\n        k: v for k, v in request.query_params.items() if k not in args_to_ignore\n    }\n    try:\n        # validate\n        connector_cls.AdditionalOauthKwargs(**additional_kwargs_dict)\n    except ValidationError:\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                f\"Invalid additional kwargs. Got {additional_kwargs_dict}, expected \"\n                f\"{connector_cls.AdditionalOauthKwargs.model_json_schema()}\"\n            ),\n        )\n\n    return additional_kwargs_dict\n\n\nclass AuthorizeResponse(BaseModel):\n    redirect_url: str\n\n\n@router.get(\"/authorize/{source}\")\ndef oauth_authorize(\n    request: Request,\n    source: DocumentSource,\n    desired_return_url: Annotated[str | None, Query()] = None,\n    _: User = Depends(current_user),\n) -> AuthorizeResponse:\n    \"\"\"Initiates the OAuth flow by redirecting to the provider's auth page\"\"\"\n\n    tenant_id = get_current_tenant_id()\n    oauth_connectors = _discover_oauth_connectors()\n\n    if source not in oauth_connectors:\n        raise HTTPException(status_code=400, detail=f\"Unknown OAuth source: {source}\")\n\n    connector_cls = oauth_connectors[source]\n    base_url = WEB_DOMAIN\n\n    # get additional kwargs from request\n    # e.g. anything except for desired_return_url\n    additional_kwargs = _get_additional_kwargs(\n        request, connector_cls, [\"desired_return_url\"]\n    )\n\n    # store state in redis\n    if not desired_return_url:\n        desired_return_url = f\"{base_url}/admin/connectors/{source}?step=0\"\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    state = str(uuid.uuid4())\n    redis_client.set(\n        _OAUTH_STATE_KEY_FMT.format(state=state),\n        json.dumps(\n            {\n                _DESIRED_RETURN_URL_KEY: desired_return_url,\n                _ADDITIONAL_KWARGS_KEY: additional_kwargs,\n            }\n        ),\n        ex=_OAUTH_STATE_EXPIRATION_SECONDS,\n    )\n\n    return AuthorizeResponse(\n        redirect_url=connector_cls.oauth_authorization_url(\n            base_url, state, additional_kwargs\n        )\n    )\n\n\nclass CallbackResponse(BaseModel):\n    redirect_url: str\n\n\n@router.get(\"/callback/{source}\")\ndef oauth_callback(\n    source: DocumentSource,\n    code: Annotated[str, Query()],\n    state: Annotated[str, Query()],\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> CallbackResponse:\n    \"\"\"Handles the OAuth callback and exchanges the code for tokens\"\"\"\n    oauth_connectors = _discover_oauth_connectors()\n\n    if source not in oauth_connectors:\n        raise HTTPException(status_code=400, detail=f\"Unknown OAuth source: {source}\")\n\n    connector_cls = oauth_connectors[source]\n\n    # get state from redis\n    redis_client = get_redis_client()\n    oauth_state_bytes = cast(\n        bytes, redis_client.get(_OAUTH_STATE_KEY_FMT.format(state=state))\n    )\n    if not oauth_state_bytes:\n        raise HTTPException(status_code=400, detail=\"Invalid OAuth state\")\n    oauth_state = json.loads(oauth_state_bytes.decode(\"utf-8\"))\n\n    desired_return_url = cast(str, oauth_state[_DESIRED_RETURN_URL_KEY])\n    additional_kwargs = cast(dict[str, str], oauth_state[_ADDITIONAL_KWARGS_KEY])\n\n    base_url = WEB_DOMAIN\n    token_info = connector_cls.oauth_code_to_token(base_url, code, additional_kwargs)\n\n    # Create a new credential with the token info\n    credential_data = CredentialBase(\n        credential_json=token_info,\n        admin_public=True,  # Or based on some logic/parameter\n        source=source,\n        name=f\"{source.title()} OAuth Credential\",\n    )\n\n    credential = create_credential(\n        credential_data=credential_data,\n        user=user,\n        db_session=db_session,\n    )\n\n    # TODO: use a library for url handling\n    sep = \"&\" if \"?\" in desired_return_url else \"?\"\n    return CallbackResponse(\n        redirect_url=f\"{desired_return_url}{sep}credentialId={credential.id}\"\n    )\n\n\nclass OAuthAdditionalKwargDescription(BaseModel):\n    name: str\n    display_name: str\n    description: str\n\n\nclass OAuthDetails(BaseModel):\n    oauth_enabled: bool\n    additional_kwargs: list[OAuthAdditionalKwargDescription]\n\n\n@router.get(\"/details/{source}\")\ndef oauth_details(\n    source: DocumentSource,\n    _: User = Depends(current_user),\n) -> OAuthDetails:\n    oauth_connectors = _discover_oauth_connectors()\n\n    if source not in oauth_connectors:\n        return OAuthDetails(\n            oauth_enabled=False,\n            additional_kwargs=[],\n        )\n\n    connector_cls = oauth_connectors[source]\n\n    additional_kwarg_descriptions = []\n    for key, value in connector_cls.AdditionalOauthKwargs.model_json_schema()[\n        \"properties\"\n    ].items():\n        additional_kwarg_descriptions.append(\n            OAuthAdditionalKwargDescription(\n                name=key,\n                display_name=value.get(\"title\", key),\n                description=value.get(\"description\", \"\"),\n            )\n        )\n\n    return OAuthDetails(\n        oauth_enabled=True,\n        additional_kwargs=additional_kwarg_descriptions,\n    )\n"
  },
  {
    "path": "backend/onyx/server/evals/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/evals/models.py",
    "content": "from pydantic import BaseModel\n\n\nclass EvalRunAck(BaseModel):\n    \"\"\"Response model for evaluation runs\"\"\"\n\n    success: bool\n"
  },
  {
    "path": "backend/onyx/server/features/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/build/.gitignore",
    "content": "sandbox/kubernetes/docker/templates/venv/**\nsandbox/kubernetes/docker/demo_data/**\n"
  },
  {
    "path": "backend/onyx/server/features/build/AGENTS.template.md",
    "content": "# AGENTS.md\n\nYou are an AI agent powering **Onyx Craft**. You create interactive web applications, dashboards, and documents from company knowledge. You run in a secure sandbox with access to the user's knowledge sources. The knowledge sources you have are organization context like meeting notes, emails, slack messages, and other organizational data that you must use to answer your question.\n\n{{USER_CONTEXT}}\n\n## Configuration\n\n- **LLM**: {{LLM_PROVIDER_NAME}} / {{LLM_MODEL_NAME}}\n- **Next.js**: Running on port {{NEXTJS_PORT}} (already started — do NOT run `npm run dev`)\n  {{DISABLED_TOOLS_SECTION}}\n\n## Environment\n\nEphemeral VM with Python 3.11 and Node v22. Virtual environment at `.venv/` includes numpy, pandas, matplotlib, scipy.\n\nInstall packages: `pip install <pkg>` or `npm install <pkg>` (from `outputs/web`).\n\n{{ORG_INFO_SECTION}}\n\n## Skills\n\n{{AVAILABLE_SKILLS_SECTION}}\n\nRead the relevant SKILL.md before starting work that the skill covers.\n\n## Recommended Task Approach Methodology\n\nWhen presented with a task, you typically:\n\n1. Analyze the request to understand what's being asked\n2. Break down complex problems into manageable steps and sub-questions\n3. Use appropriate tools and methods to address each step\n4. Provide clear communication throughout the process\n5. Deliver results in a helpful and organized manner\n\nFollow this two-step pattern for most tasks:\n\n### Step 1: Information Retrieval\n\n1. **Search** knowledge sources using `find`, `grep`, or direct file reads. Start your search at the root of the `files/` directory\nto get a general grasp of what subdirectories to further explore, especially when looking for a person. their name may be a proper noun\nor strictly lowercase.\n2. **Extract** relevant data from JSON documents\n3. **Summarize** key findings before proceeding\n\n**Tip**: Use `find`, `grep`, or `glob` to search files directly rather than navigating directories one at a time.\n\n### Step 2: Output Generation\n\n1. **Choose format**: Web app for interactive/visual, Markdown for reports, or direct response for quick answers\n2. **Build** the output using retrieved information\n3. **Verify** the output renders correctly and includes accurate data\n\n## Behavior Guidelines\n\n- **Accuracy**: Do not make any assumptions about the user. Any conclusions you reach must be supported by the provided data.\n\n- **Completeness**: For any tasks requiring data from the knowledge sources, you should make sure to look at ALL sources that may be relevant to the user's questions and use that in your final response. Make sure you check Google Drive if applicable\n  - **Explicitly state** which sources were checked and which had no relevant data\n  - **Search ALL knowledge sources** for the person's name/email, not just the obvious ones when answering questions about a person's activites.\n\n- **Task Management**: For any non-trivial task involving multiple steps, you should organize your work and track progress. This helps users understand what you're doing and ensures nothing is missed.\n\n- **Verification**: For important work, include a verification step to double-check your output. This could involve testing functionality, reviewing for accuracy, or validating against requirements.\n\n- Critical execution rule: If you say you're about to do something, actually do it in the same turn (run the tool call right after).\n\n- Check off completed TODOs before reporting progress.\n\n- Your main goal is to follow the USER's instructions at each message\n\n- Don't mention tool names to the user; describe actions naturally.\n\n## Knowledge Sources\n\nThe `files/` directory contains JSON documents from various knowledge sources. Here's what's available:\n\n{{KNOWLEDGE_SOURCES_SECTION}}\n\n### Document Format\n\nFiles are JSON with: `title`, `source`, `metadata`, `sections[{text, link}]`.\n\n**Important**: The `files/` directory is read-only. Do NOT attempt to write to it.\n\n## Outputs\n\nAll outputs go in the `outputs/` directory.\n\n| Format       | Use For                                  |\n| ------------ | ---------------------------------------- |\n| **Web App**  | Interactive dashboards, data exploration |\n| **Markdown** | Reports, analyses, documentation         |\n| **Response** | Quick answers, lookups                   |\n\nYou can also generate other output formats if you think they more directly answer the user's question\n\n### Web Apps\n\nUse `outputs/web` with Next.js 16.1.1, React v19, Tailwind, Recharts, shadcn/ui.\n\n<!-- **⚠️ Read `outputs/web/AGENTS.md` for webapp technical specs and styling rules. For all other output types, this is unneccessary. ** -->\n\n### Markdown\n\nSave to `outputs/markdown/*.md`. Use clear headings and tables.\n\n## Questions to Ask\n\n- Did you check all relevant sources that could be useful in addressing the user's question?\n- Did you generate the correct output format that the user requested?\n- Did you answer the user's question thoroughly?\n"
  },
  {
    "path": "backend/onyx/server/features/build/__init__.py",
    "content": "# Build feature module\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/api.py",
    "content": "import re\nfrom collections.abc import Iterator\nfrom pathlib import Path\nfrom uuid import UUID\n\nimport httpx\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi.responses import RedirectResponse\nfrom fastapi.responses import StreamingResponse\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import optional_user\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.enums import SharingScope\nfrom onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import User\nfrom onyx.server.features.build.api.messages_api import router as messages_router\nfrom onyx.server.features.build.api.models import BuildConnectorInfo\nfrom onyx.server.features.build.api.models import BuildConnectorListResponse\nfrom onyx.server.features.build.api.models import BuildConnectorStatus\nfrom onyx.server.features.build.api.models import RateLimitResponse\nfrom onyx.server.features.build.api.rate_limit import get_user_rate_limit_status\nfrom onyx.server.features.build.api.sessions_api import router as sessions_router\nfrom onyx.server.features.build.api.user_library import router as user_library_router\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_user_id\nfrom onyx.server.features.build.sandbox import get_sandbox_manager\nfrom onyx.server.features.build.session.manager import SessionManager\nfrom onyx.server.features.build.utils import is_onyx_craft_enabled\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_TEMPLATES_DIR = Path(__file__).parent / \"templates\"\n_WEBAPP_HMR_FIXER_TEMPLATE = (_TEMPLATES_DIR / \"webapp_hmr_fixer.js\").read_text()\n\n\ndef require_onyx_craft_enabled(user: User = Depends(current_user)) -> User:\n    \"\"\"\n    Dependency that checks if Onyx Craft is enabled for the user.\n    Raises HTTP 403 if Onyx Craft is disabled via feature flag.\n    \"\"\"\n    if not is_onyx_craft_enabled(user):\n        raise HTTPException(\n            status_code=403,\n            detail=\"Onyx Craft is not available\",\n        )\n    return user\n\n\nrouter = APIRouter(prefix=\"/build\", dependencies=[Depends(require_onyx_craft_enabled)])\n\n# Include sub-routers for sessions, messages, and user library\nrouter.include_router(sessions_router, tags=[\"build\"])\nrouter.include_router(messages_router, tags=[\"build\"])\nrouter.include_router(user_library_router, tags=[\"build\"])\n\n\n# -----------------------------------------------------------------------------\n# Rate Limiting\n# -----------------------------------------------------------------------------\n\n\n@router.get(\"/limit\", response_model=RateLimitResponse)\ndef get_rate_limit(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> RateLimitResponse:\n    \"\"\"Get rate limit information for the current user.\"\"\"\n    return get_user_rate_limit_status(user, db_session)\n\n\n# -----------------------------------------------------------------------------\n# Build Connectors\n# -----------------------------------------------------------------------------\n\n\n@router.get(\"/connectors\", response_model=BuildConnectorListResponse)\ndef get_build_connectors(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> BuildConnectorListResponse:\n    \"\"\"Get all connectors for the build admin panel.\n\n    Returns connector-credential pairs with simplified status information.\n    On the build configure page, all users (including admins) only see connectors\n    they own/created. Users can create new connectors if they don't have one of a type.\n    \"\"\"\n    # Fetch both FILE_SYSTEM (standard connectors) and RAW_BINARY (User Library) connectors\n    file_system_cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n        eager_load_connector=True,\n        eager_load_credential=True,\n        processing_mode=ProcessingMode.FILE_SYSTEM,\n    )\n    raw_binary_cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n        eager_load_connector=True,\n        eager_load_credential=True,\n        processing_mode=ProcessingMode.RAW_BINARY,\n    )\n    cc_pairs = file_system_cc_pairs + raw_binary_cc_pairs\n\n    # Filter to only show connectors created by the current user\n    # All users (including admins) must create their own connectors on the build configure page\n    if user:\n        cc_pairs = [cc_pair for cc_pair in cc_pairs if cc_pair.creator_id == user.id]\n\n    connectors: list[BuildConnectorInfo] = []\n    for cc_pair in cc_pairs:\n        # Skip ingestion API connectors and default pairs\n        if cc_pair.connector.source == DocumentSource.INGESTION_API:\n            continue\n        if cc_pair.name == \"DefaultCCPair\":\n            continue\n\n        # Determine status\n        error_message: str | None = None\n        has_ever_succeeded = cc_pair.last_successful_index_time is not None\n\n        if cc_pair.status == ConnectorCredentialPairStatus.DELETING:\n            status = BuildConnectorStatus.DELETING\n        elif cc_pair.status == ConnectorCredentialPairStatus.INVALID:\n            # If connector has succeeded before but credentials are now invalid,\n            # show as connected_with_errors so user can still disable demo data\n            if has_ever_succeeded:\n                status = BuildConnectorStatus.CONNECTED_WITH_ERRORS\n                error_message = \"Connector credentials are invalid\"\n            else:\n                status = BuildConnectorStatus.ERROR\n                error_message = \"Connector credentials are invalid\"\n        else:\n            # Check latest index attempt for errors\n            latest_attempt = get_latest_index_attempt_for_cc_pair_id(\n                db_session=db_session,\n                connector_credential_pair_id=cc_pair.id,\n                secondary_index=False,\n                only_finished=True,\n            )\n\n            if latest_attempt and latest_attempt.status == IndexingStatus.FAILED:\n                # If connector has succeeded before but latest attempt failed,\n                # show as connected_with_errors\n                if has_ever_succeeded:\n                    status = BuildConnectorStatus.CONNECTED_WITH_ERRORS\n                else:\n                    status = BuildConnectorStatus.ERROR\n                error_message = latest_attempt.error_msg\n            elif (\n                latest_attempt\n                and latest_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS\n            ):\n                # Completed with errors - if it has succeeded before, show as connected_with_errors\n                if has_ever_succeeded:\n                    status = BuildConnectorStatus.CONNECTED_WITH_ERRORS\n                else:\n                    status = BuildConnectorStatus.ERROR\n                error_message = \"Indexing completed with errors\"\n            elif cc_pair.status == ConnectorCredentialPairStatus.PAUSED:\n                status = BuildConnectorStatus.CONNECTED\n            elif cc_pair.last_successful_index_time is None:\n                # Never successfully indexed - check if currently indexing\n                # First check cc_pair status for scheduled/initial indexing\n                if cc_pair.status in (\n                    ConnectorCredentialPairStatus.SCHEDULED,\n                    ConnectorCredentialPairStatus.INITIAL_INDEXING,\n                ):\n                    status = BuildConnectorStatus.INDEXING\n                else:\n                    in_progress_attempt = get_latest_index_attempt_for_cc_pair_id(\n                        db_session=db_session,\n                        connector_credential_pair_id=cc_pair.id,\n                        secondary_index=False,\n                        only_finished=False,\n                    )\n                    if (\n                        in_progress_attempt\n                        and in_progress_attempt.status == IndexingStatus.IN_PROGRESS\n                    ):\n                        status = BuildConnectorStatus.INDEXING\n                    elif (\n                        in_progress_attempt\n                        and in_progress_attempt.status == IndexingStatus.NOT_STARTED\n                    ):\n                        status = BuildConnectorStatus.INDEXING\n                    else:\n                        # Has a finished attempt but never succeeded - likely error\n                        status = BuildConnectorStatus.ERROR\n                        error_message = (\n                            latest_attempt.error_msg\n                            if latest_attempt\n                            else \"Initial indexing failed\"\n                        )\n            else:\n                status = BuildConnectorStatus.CONNECTED\n\n        connectors.append(\n            BuildConnectorInfo(\n                cc_pair_id=cc_pair.id,\n                connector_id=cc_pair.connector.id,\n                credential_id=cc_pair.credential.id,\n                source=cc_pair.connector.source.value,\n                name=cc_pair.name or cc_pair.connector.name or \"Unnamed\",\n                status=status,\n                docs_indexed=0,  # Would need to query for this\n                last_indexed=cc_pair.last_successful_index_time,\n                error_message=error_message,\n            )\n        )\n\n    return BuildConnectorListResponse(connectors=connectors)\n\n\n# Headers to skip when proxying.\n# Hop-by-hop headers must not be forwarded, and set-cookie is stripped to\n# prevent LLM-generated apps from setting cookies on the parent Onyx domain.\nEXCLUDED_HEADERS = {\n    \"content-encoding\",\n    \"content-length\",\n    \"transfer-encoding\",\n    \"connection\",\n    \"set-cookie\",\n}\n\n\ndef _stream_response(response: httpx.Response) -> Iterator[bytes]:\n    \"\"\"Stream the response content in chunks.\"\"\"\n    for chunk in response.iter_bytes(chunk_size=8192):\n        yield chunk\n\n\ndef _inject_hmr_fixer(content: bytes, session_id: str) -> bytes:\n    \"\"\"Inject a script that stubs root-scoped Next HMR websocket connections.\"\"\"\n    base = f\"/api/build/sessions/{session_id}/webapp\"\n    script = f\"<script>{_WEBAPP_HMR_FIXER_TEMPLATE.replace('__WEBAPP_BASE__', base)}</script>\"\n    text = content.decode(\"utf-8\")\n    text = re.sub(\n        r\"(<head\\b[^>]*>)\",\n        lambda m: m.group(0) + script,\n        text,\n        count=1,\n        flags=re.IGNORECASE,\n    )\n    return text.encode(\"utf-8\")\n\n\ndef _rewrite_asset_paths(content: bytes, session_id: str) -> bytes:\n    \"\"\"Rewrite Next.js asset paths to go through the proxy.\"\"\"\n    webapp_base_path = f\"/api/build/sessions/{session_id}/webapp\"\n    escaped_webapp_base_path = webapp_base_path.replace(\"/\", r\"\\/\")\n    hmr_paths = (\"/_next/webpack-hmr\", \"/_next/hmr\")\n\n    text = content.decode(\"utf-8\")\n    # Anchor on delimiter so already-prefixed URLs (from assetPrefix) aren't double-rewritten.\n    for delim in ('\"', \"'\", \"(\"):\n        text = text.replace(f\"{delim}/_next/\", f\"{delim}{webapp_base_path}/_next/\")\n        text = re.sub(\n            rf\"{re.escape(delim)}https?://[^/\\\"')]+/_next/\",\n            f\"{delim}{webapp_base_path}/_next/\",\n            text,\n        )\n        text = re.sub(\n            rf\"{re.escape(delim)}wss?://[^/\\\"')]+/_next/\",\n            f\"{delim}{webapp_base_path}/_next/\",\n            text,\n        )\n    text = text.replace(r\"\\/_next\\/\", rf\"{escaped_webapp_base_path}\\/_next\\/\")\n    text = re.sub(\n        r\"https?:\\\\\\/\\\\\\/[^\\\"']+?\\\\\\/_next\\\\\\/\",\n        rf\"{escaped_webapp_base_path}\\/_next\\/\",\n        text,\n    )\n    text = re.sub(\n        r\"wss?:\\\\\\/\\\\\\/[^\\\"']+?\\\\\\/_next\\\\\\/\",\n        rf\"{escaped_webapp_base_path}\\/_next\\/\",\n        text,\n    )\n    for hmr_path in hmr_paths:\n        escaped_hmr_path = hmr_path.replace(\"/\", r\"\\/\")\n        text = text.replace(\n            f\"{webapp_base_path}{hmr_path}\",\n            hmr_path,\n        )\n        text = text.replace(\n            f\"{escaped_webapp_base_path}{escaped_hmr_path}\",\n            escaped_hmr_path,\n        )\n    text = re.sub(\n        r'\"(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\\.json)\"',\n        f'\"{webapp_base_path}\\\\1\"',\n        text,\n    )\n    text = re.sub(\n        r\"'(/(?:[a-zA-Z0-9_-]+/)*[a-zA-Z0-9_-]+\\.json)'\",\n        f\"'{webapp_base_path}\\\\1'\",\n        text,\n    )\n    text = text.replace('\"/favicon.ico', f'\"{webapp_base_path}/favicon.ico')\n    return text.encode(\"utf-8\")\n\n\ndef _rewrite_proxy_response_headers(\n    headers: dict[str, str], session_id: str\n) -> dict[str, str]:\n    \"\"\"Rewrite response headers that can leak root-scoped asset URLs.\"\"\"\n    link = headers.get(\"link\")\n    if link:\n        webapp_base_path = f\"/api/build/sessions/{session_id}/webapp\"\n        rewritten_link = re.sub(\n            r\"<https?://[^>]+/_next/\",\n            f\"<{webapp_base_path}/_next/\",\n            link,\n        )\n        rewritten_link = rewritten_link.replace(\n            \"</_next/\", f\"<{webapp_base_path}/_next/\"\n        )\n        headers[\"link\"] = rewritten_link\n    return headers\n\n\n# Content types that may contain asset path references that need rewriting\nREWRITABLE_CONTENT_TYPES = {\n    \"text/html\",\n    \"text/css\",\n    \"application/javascript\",\n    \"text/javascript\",\n    \"application/x-javascript\",\n}\n\n\ndef _get_sandbox_url(session_id: UUID, db_session: Session) -> str:\n    \"\"\"Get the internal URL for a session's Next.js server.\n\n    Uses the sandbox manager to get the correct URL for both local and\n    Kubernetes environments.\n\n    Args:\n        session_id: The build session ID\n        db_session: Database session\n\n    Returns:\n        Internal URL to proxy requests to\n\n    Raises:\n        HTTPException: If session not found, port not allocated, or sandbox not found\n    \"\"\"\n\n    session = db_session.get(BuildSession, session_id)\n    if not session:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    if session.nextjs_port is None:\n        raise HTTPException(status_code=503, detail=\"Session port not allocated\")\n    if session.user_id is None:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    sandbox = get_sandbox_by_user_id(db_session, session.user_id)\n    if sandbox is None:\n        raise HTTPException(status_code=404, detail=\"Sandbox not found\")\n\n    sandbox_manager = get_sandbox_manager()\n    return sandbox_manager.get_webapp_url(sandbox.id, session.nextjs_port)\n\n\ndef _proxy_request(\n    path: str, request: Request, session_id: UUID, db_session: Session\n) -> StreamingResponse | Response:\n    \"\"\"Proxy a request to the sandbox's Next.js server.\"\"\"\n    base_url = _get_sandbox_url(session_id, db_session)\n\n    # Build the target URL\n    target_url = f\"{base_url}/{path.lstrip('/')}\"\n\n    # Include query params if present\n    if request.query_params:\n        target_url = f\"{target_url}?{request.query_params}\"\n\n    logger.debug(f\"Proxying request to: {target_url}\")\n\n    try:\n        # Make the request to the target URL\n        with httpx.Client(timeout=30.0, follow_redirects=True) as client:\n            response = client.get(\n                target_url,\n                headers={\n                    key: value\n                    for key, value in request.headers.items()\n                    if key.lower() not in (\"host\", \"content-length\")\n                },\n            )\n\n            # Build response headers, excluding hop-by-hop headers\n            response_headers = {\n                key: value\n                for key, value in response.headers.items()\n                if key.lower() not in EXCLUDED_HEADERS\n            }\n            response_headers = _rewrite_proxy_response_headers(\n                response_headers, str(session_id)\n            )\n\n            content_type = response.headers.get(\"content-type\", \"\")\n\n            # For HTML/CSS/JS responses, rewrite asset paths\n            if any(ct in content_type for ct in REWRITABLE_CONTENT_TYPES):\n                content = _rewrite_asset_paths(response.content, str(session_id))\n                if \"text/html\" in content_type:\n                    content = _inject_hmr_fixer(content, str(session_id))\n                return Response(\n                    content=content,\n                    status_code=response.status_code,\n                    headers=response_headers,\n                    media_type=content_type,\n                )\n\n            return StreamingResponse(\n                content=_stream_response(response),\n                status_code=response.status_code,\n                headers=response_headers,\n                media_type=content_type or None,\n            )\n\n    except httpx.TimeoutException:\n        logger.error(f\"Timeout while proxying request to {target_url}\")\n        raise HTTPException(status_code=504, detail=\"Gateway timeout\")\n    except httpx.RequestError as e:\n        logger.error(f\"Error proxying request to {target_url}: {e}\")\n        raise HTTPException(status_code=502, detail=\"Bad gateway\")\n\n\ndef _check_webapp_access(\n    session_id: UUID, user: User | None, db_session: Session\n) -> BuildSession:\n    \"\"\"Check if user can access a session's webapp.\n\n    - public_global: accessible by anyone (no auth required)\n    - public_org: accessible by any authenticated user\n    - private: only accessible by the session owner\n    \"\"\"\n    session = db_session.get(BuildSession, session_id)\n    if not session:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    if session.sharing_scope == SharingScope.PUBLIC_GLOBAL:\n        return session\n    if user is None:\n        raise HTTPException(status_code=401, detail=\"Authentication required\")\n    if session.sharing_scope == SharingScope.PRIVATE and session.user_id != user.id:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    return session\n\n\n_OFFLINE_HTML_PATH = _TEMPLATES_DIR / \"webapp_offline.html\"\n\n\ndef _offline_html_response() -> Response:\n    \"\"\"Return a branded Craft HTML page when the sandbox is not reachable.\n\n    Design mirrors the default Craft web template (outputs/web/app/page.tsx):\n    terminal window aesthetic with Minecraft-themed typing animation.\n\n    \"\"\"\n    html = _OFFLINE_HTML_PATH.read_text()\n    return Response(content=html, status_code=503, media_type=\"text/html\")\n\n\n# Public router for webapp proxy — no authentication required\n# (access controlled per-session via sharing_scope)\npublic_build_router = APIRouter(prefix=\"/build\")\n\n\n@public_build_router.get(\"/sessions/{session_id}/webapp\", response_model=None)\n@public_build_router.get(\n    \"/sessions/{session_id}/webapp/{path:path}\", response_model=None\n)\ndef get_webapp(\n    session_id: UUID,\n    request: Request,\n    path: str = \"\",\n    user: User | None = Depends(optional_user),\n    db_session: Session = Depends(get_session),\n) -> StreamingResponse | Response:\n    \"\"\"Proxy the webapp for a specific session (root and subpaths).\n\n    Accessible without authentication when sharing_scope is public_global.\n    Returns a friendly offline page when the sandbox is not running.\n    \"\"\"\n    try:\n        _check_webapp_access(session_id, user, db_session)\n    except HTTPException as e:\n        if e.status_code == 401:\n            return RedirectResponse(url=\"/auth/login\", status_code=302)\n        raise\n    try:\n        return _proxy_request(path, request, session_id, db_session)\n    except HTTPException as e:\n        if e.status_code in (502, 503, 504):\n            return _offline_html_response()\n        raise\n\n\n# =============================================================================\n# Sandbox Management Endpoints\n# =============================================================================\n\n\n@router.post(\"/sandbox/reset\", response_model=None)\ndef reset_sandbox(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Reset the user's sandbox by terminating it and cleaning up all sessions.\n\n    This endpoint terminates the user's shared sandbox container/pod and\n    cleans up all session workspaces. Useful for \"start fresh\" functionality.\n\n    After calling this endpoint, the next session creation will provision a\n    new sandbox.\n    \"\"\"\n    session_manager = SessionManager(db_session)\n\n    try:\n        success = session_manager.terminate_user_sandbox(user.id)\n        if not success:\n            raise HTTPException(\n                status_code=404,\n                detail=\"No sandbox found for user\",\n            )\n        db_session.commit()\n    except HTTPException:\n        raise\n    except Exception as e:\n        db_session.rollback()\n        logger.error(f\"Failed to reset sandbox for user {user.id}: {e}\")\n        raise HTTPException(\n            status_code=500,\n            detail=f\"Failed to reset sandbox: {e}\",\n        )\n\n    return Response(status_code=204)\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/messages_api.py",
    "content": "\"\"\"API endpoints for Build Mode message management.\"\"\"\n\nfrom collections.abc import Generator\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi.responses import StreamingResponse\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.server.features.build.api.models import MessageListResponse\nfrom onyx.server.features.build.api.models import MessageRequest\nfrom onyx.server.features.build.api.models import MessageResponse\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_user_id\nfrom onyx.server.features.build.db.sandbox import update_sandbox_heartbeat\nfrom onyx.server.features.build.session.manager import RateLimitError\nfrom onyx.server.features.build.session.manager import SessionManager\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nrouter = APIRouter()\n\n\ndef check_build_rate_limits(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"\n    Dependency to check build mode rate limits before processing the request.\n\n    Raises HTTPException(429) if rate limit is exceeded.\n    Follows the same pattern as chat's check_token_rate_limits.\n    \"\"\"\n    session_manager = SessionManager(db_session)\n\n    try:\n        session_manager.check_rate_limit(user)\n    except RateLimitError as e:\n        raise HTTPException(\n            status_code=429,\n            detail=str(e),\n        )\n\n\n@router.get(\"/sessions/{session_id}/messages\", tags=PUBLIC_API_TAGS)\ndef list_messages(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> MessageListResponse:\n    \"\"\"Get all messages for a build session.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    messages = session_manager.list_messages(session_id, user.id)\n\n    if messages is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return MessageListResponse(\n        messages=[MessageResponse.from_model(msg) for msg in messages]\n    )\n\n\n@router.post(\"/sessions/{session_id}/send-message\", tags=PUBLIC_API_TAGS)\ndef send_message(\n    session_id: UUID,\n    request: MessageRequest,\n    user: User = Depends(current_user),\n    _rate_limit_check: None = Depends(check_build_rate_limits),\n) -> StreamingResponse:\n    \"\"\"\n    Send a message to the CLI agent and stream the response.\n\n    Enforces rate limiting before executing the agent (via dependency).\n    Returns a Server-Sent Events (SSE) stream with the agent's response.\n\n    Follows the same pattern as /chat/send-chat-message for consistency.\n    \"\"\"\n\n    def stream_generator() -> Generator[str, None, None]:\n        \"\"\"Stream generator that manages its own database session.\n\n        This is necessary because StreamingResponse consumes the generator\n        AFTER the endpoint returns, at which point FastAPI's dependency-injected\n        db_session has already been closed. By creating a new session inside\n        the generator, we ensure the session remains open for the entire\n        streaming duration.\n        \"\"\"\n        # Capture user info needed for streaming (user object may not be available\n        # after the endpoint returns due to dependency cleanup)\n        user_id = user.id\n        message_content = request.content\n\n        with get_session_with_current_tenant() as db_session:\n            # Update sandbox heartbeat - this is the only place we track activity\n            # for determining when a sandbox should be put to sleep\n            sandbox = get_sandbox_by_user_id(db_session, user.id)\n            if sandbox and sandbox.status.is_active():\n                update_sandbox_heartbeat(db_session, sandbox.id)\n\n            session_manager = SessionManager(db_session)\n            yield from session_manager.send_message(\n                session_id, user_id, message_content\n            )\n\n    # Stream the CLI agent's response\n    return StreamingResponse(\n        stream_generator(),\n        media_type=\"text/event-stream\",\n        headers={\n            \"Cache-Control\": \"no-cache\",\n            \"Connection\": \"keep-alive\",\n            \"X-Accel-Buffering\": \"no\",  # Disable nginx buffering\n        },\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom typing import TYPE_CHECKING\nfrom typing import Union\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.enums import ArtifactType\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.enums import SharingScope\nfrom onyx.server.features.build.sandbox.models import (\n    FilesystemEntry as FileSystemEntry,\n)\n\nif TYPE_CHECKING:\n    from onyx.db.models import Sandbox\n    from onyx.db.models import BuildSession\n\n\n# ===== Session Models =====\nclass SessionCreateRequest(BaseModel):\n    \"\"\"Request to create a new build session.\"\"\"\n\n    name: str | None = None  # Optional session name\n    demo_data_enabled: bool = True  # Whether to enable demo org_info data in sandbox\n    user_work_area: str | None = None  # User's work area (e.g., \"engineering\")\n    user_level: str | None = None  # User's level (e.g., \"ic\", \"manager\")\n    # LLM selection from user's cookie\n    llm_provider_type: str | None = None  # Provider type (e.g., \"anthropic\", \"openai\")\n    llm_model_name: str | None = None  # Model name (e.g., \"claude-opus-4-5\")\n\n\nclass SessionUpdateRequest(BaseModel):\n    \"\"\"Request to update a build session.\n\n    If name is None, the session name will be auto-generated using LLM.\n    \"\"\"\n\n    name: str | None = None\n\n\nclass SessionNameGenerateResponse(BaseModel):\n    \"\"\"Response containing a generated session name.\"\"\"\n\n    name: str\n\n\nclass SandboxResponse(BaseModel):\n    \"\"\"Sandbox metadata in session response.\"\"\"\n\n    id: str\n    status: SandboxStatus\n    container_id: str | None\n    created_at: datetime\n    last_heartbeat: datetime | None\n\n    @classmethod\n    def from_model(cls, sandbox: Any) -> \"SandboxResponse\":\n        \"\"\"Convert Sandbox ORM model to response.\"\"\"\n        return cls(\n            id=str(sandbox.id),\n            status=sandbox.status,\n            container_id=sandbox.container_id,\n            created_at=sandbox.created_at,\n            last_heartbeat=sandbox.last_heartbeat,\n        )\n\n\nclass ArtifactResponse(BaseModel):\n    \"\"\"Artifact metadata in session response.\"\"\"\n\n    id: str\n    session_id: str\n    type: ArtifactType\n    name: str\n    path: str\n    preview_url: str | None\n    created_at: datetime\n    updated_at: datetime\n\n    @classmethod\n    def from_model(cls, artifact: Any) -> \"ArtifactResponse\":\n        \"\"\"Convert Artifact ORM model to response.\"\"\"\n        return cls(\n            id=str(artifact.id),\n            session_id=str(artifact.session_id),\n            type=artifact.type,\n            name=artifact.name,\n            path=artifact.path,\n            preview_url=getattr(artifact, \"preview_url\", None),\n            created_at=artifact.created_at,\n            updated_at=artifact.updated_at,\n        )\n\n\nclass SessionResponse(BaseModel):\n    \"\"\"Response containing session details.\"\"\"\n\n    id: str\n    user_id: str | None\n    name: str | None\n    status: BuildSessionStatus\n    created_at: datetime\n    last_activity_at: datetime\n    nextjs_port: int | None\n    sandbox: SandboxResponse | None\n    artifacts: list[ArtifactResponse]\n    sharing_scope: SharingScope\n\n    @classmethod\n    def from_model(\n        cls, session: \"BuildSession\", sandbox: Union[\"Sandbox\", None] = None\n    ) -> \"SessionResponse\":\n        \"\"\"Convert BuildSession ORM model to response.\n\n        Args:\n            session: BuildSession ORM model\n            sandbox: Optional Sandbox ORM model. Since sandboxes are now user-owned\n                     (not session-owned), the sandbox must be passed separately.\n        \"\"\"\n        return cls(\n            id=str(session.id),\n            user_id=str(session.user_id) if session.user_id else None,\n            name=session.name,\n            status=session.status,\n            created_at=session.created_at,\n            last_activity_at=session.last_activity_at,\n            nextjs_port=session.nextjs_port,\n            sandbox=(SandboxResponse.from_model(sandbox) if sandbox else None),\n            artifacts=[ArtifactResponse.from_model(a) for a in session.artifacts],\n            sharing_scope=session.sharing_scope,\n        )\n\n\nclass DetailedSessionResponse(SessionResponse):\n    \"\"\"Extended session response with sandbox state details.\n\n    Used for single-session endpoints where we compute expensive fields\n    like session_loaded_in_sandbox.\n    \"\"\"\n\n    session_loaded_in_sandbox: bool\n\n    @classmethod\n    def from_session_response(\n        cls,\n        base: SessionResponse,\n        session_loaded_in_sandbox: bool,\n    ) -> \"DetailedSessionResponse\":\n        return cls(\n            **base.model_dump(),\n            session_loaded_in_sandbox=session_loaded_in_sandbox,\n        )\n\n\nclass SessionListResponse(BaseModel):\n    \"\"\"Response containing list of sessions.\"\"\"\n\n    sessions: list[SessionResponse]\n\n\nclass SetSessionSharingRequest(BaseModel):\n    \"\"\"Request to set the sharing scope of a session.\"\"\"\n\n    sharing_scope: SharingScope\n\n\nclass SetSessionSharingResponse(BaseModel):\n    \"\"\"Response after setting session sharing scope.\"\"\"\n\n    session_id: str\n    sharing_scope: SharingScope\n\n\n# ===== Message Models =====\nclass MessageRequest(BaseModel):\n    \"\"\"Request to send a message to the CLI agent.\"\"\"\n\n    content: str\n\n\nclass MessageResponse(BaseModel):\n    \"\"\"Response containing message details.\n\n    All message data is stored in message_metadata as JSON (the raw ACP packet).\n    The turn_index groups all assistant responses under the user prompt they respond to.\n\n    Packet types in message_metadata:\n    - user_message: {type: \"user_message\", content: {...}}\n    - agent_message: {type: \"agent_message\", content: {...}}\n    - agent_thought: {type: \"agent_thought\", content: {...}}\n    - tool_call_progress: {type: \"tool_call_progress\", status: \"completed\", ...}\n    - agent_plan_update: {type: \"agent_plan_update\", entries: [...]}\n    \"\"\"\n\n    id: str\n    session_id: str\n    turn_index: int\n    type: MessageType\n    message_metadata: dict[str, Any]\n    created_at: datetime\n\n    @classmethod\n    def from_model(cls, message: Any) -> \"MessageResponse\":\n        \"\"\"Convert BuildMessage ORM model to response.\"\"\"\n        return cls(\n            id=str(message.id),\n            session_id=str(message.session_id),\n            turn_index=message.turn_index,\n            type=message.type,\n            message_metadata=message.message_metadata,\n            created_at=message.created_at,\n        )\n\n\nclass MessageListResponse(BaseModel):\n    \"\"\"Response containing list of messages.\"\"\"\n\n    messages: list[MessageResponse]\n\n\n# ===== Legacy Models (for compatibility with other code) =====\nclass CreateSessionRequest(BaseModel):\n    task: str\n    available_sources: list[str] | None = None\n\n\nclass CreateSessionResponse(BaseModel):\n    session_id: str\n\n\nclass ExecuteRequest(BaseModel):\n    task: str\n    context: str | None = None\n\n\nclass ArtifactInfo(BaseModel):\n    artifact_type: str  # \"webapp\", \"file\", \"markdown\", \"image\"\n    path: str\n    filename: str\n    mime_type: str | None = None\n\n\nclass SessionStatus(BaseModel):\n    session_id: str\n    status: str  # \"idle\", \"running\", \"completed\", \"failed\"\n    webapp_url: str | None = None\n\n\nclass DirectoryListing(BaseModel):\n    path: str  # Current directory path\n    entries: list[FileSystemEntry]  # Contents\n\n\nclass WebappInfo(BaseModel):\n    has_webapp: bool  # Whether a webapp exists in outputs/web\n    webapp_url: str | None  # URL to access the webapp (e.g., http://localhost:3015)\n    status: str  # Sandbox status (running, terminated, etc.)\n    ready: bool  # Whether the NextJS dev server is actually responding\n    sharing_scope: SharingScope\n\n\n# ===== File Upload Models =====\nclass UploadResponse(BaseModel):\n    \"\"\"Response after successful file upload.\"\"\"\n\n    filename: str  # Sanitized filename\n    path: str  # Relative path in sandbox (e.g., \"attachments/doc.pdf\")\n    size_bytes: int  # File size in bytes\n\n\n# ===== Rate Limit Models =====\nclass RateLimitResponse(BaseModel):\n    \"\"\"Rate limit information.\"\"\"\n\n    is_limited: bool\n    limit_type: str  # \"weekly\" or \"total\"\n    messages_used: int\n    limit: int\n    reset_timestamp: str | None = None\n\n\n# ===== Pre-Provisioned Session Check Models =====\nclass PreProvisionedCheckResponse(BaseModel):\n    \"\"\"Response for checking if a pre-provisioned session is still valid (empty).\"\"\"\n\n    valid: bool  # True if session exists and has no messages\n    session_id: str | None = None  # Session ID if valid, None otherwise\n\n\n# ===== Build Connector Models =====\nclass BuildConnectorStatus(str, Enum):\n    \"\"\"Status of a build connector.\"\"\"\n\n    NOT_CONNECTED = \"not_connected\"\n    CONNECTED = \"connected\"\n    CONNECTED_WITH_ERRORS = \"connected_with_errors\"\n    INDEXING = \"indexing\"\n    ERROR = \"error\"\n    DELETING = \"deleting\"\n\n\nclass BuildConnectorInfo(BaseModel):\n    \"\"\"Simplified connector info for build admin panel.\"\"\"\n\n    cc_pair_id: int\n    connector_id: int\n    credential_id: int\n    source: str\n    name: str\n    status: BuildConnectorStatus\n    docs_indexed: int\n    last_indexed: datetime | None\n    error_message: str | None = None\n\n\nclass BuildConnectorListResponse(BaseModel):\n    \"\"\"List of build connectors.\"\"\"\n\n    connectors: list[BuildConnectorInfo]\n\n\n# ===== Suggestion Bubble Models =====\nclass SuggestionTheme(str, Enum):\n    \"\"\"Theme/category of a follow-up suggestion.\"\"\"\n\n    ADD = \"add\"\n    QUESTION = \"question\"\n\n\nclass SuggestionBubble(BaseModel):\n    \"\"\"A single follow-up suggestion bubble.\"\"\"\n\n    theme: SuggestionTheme\n    text: str\n\n\nclass GenerateSuggestionsRequest(BaseModel):\n    \"\"\"Request to generate follow-up suggestions.\"\"\"\n\n    user_message: str  # First user message\n    assistant_message: str  # First assistant text response (accumulated)\n\n\nclass GenerateSuggestionsResponse(BaseModel):\n    \"\"\"Response containing generated suggestions.\"\"\"\n\n    suggestions: list[SuggestionBubble]\n\n\nclass PptxPreviewResponse(BaseModel):\n    \"\"\"Response with PPTX slide preview metadata.\"\"\"\n\n    slide_count: int\n    slide_paths: list[str]  # Relative paths to slide JPEGs within session workspace\n    cached: bool  # Whether result was served from cache\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/packet_logger.py",
    "content": "\"\"\"Comprehensive packet and ACP event logger for build mode debugging.\n\nLogs all packets, JSON-RPC messages, and ACP events during build mode streaming.\nProvides detailed tracing for the entire agent loop and communication flow.\n\nLog output locations (in priority order):\n1. /var/log/onyx/packets.log (for Docker - mounted to host via docker-compose volumes)\n2. backend/log/packets.log (for local dev without Docker)\n3. backend/onyx/server/features/build/packets.log (fallback)\n\nEnable logging by setting LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true.\n\nFeatures:\n- Rotating log with max 5000 lines (configurable via BUILD_PACKET_LOG_MAX_LINES)\n- Automatically trims oldest entries when limit is exceeded\n- Visual separators between message streams for easy reading\n\"\"\"\n\nimport json\nimport logging\nimport os\nimport threading\nimport time\nfrom pathlib import Path\nfrom typing import Any\nfrom uuid import UUID\n\n# Default max lines to keep in the log file (acts like a deque)\nDEFAULT_MAX_LOG_LINES = 5000\n\n\nclass PacketLogger:\n    \"\"\"Comprehensive logger for ACP/OpenCode communication and packet streaming.\n\n    Logs:\n    - All JSON-RPC requests sent to the agent\n    - All JSON-RPC responses/notifications received from the agent\n    - All ACP events emitted during streaming\n    - Session and sandbox lifecycle events\n    - Timing information for debugging performance\n\n    The log file is kept to a maximum number of lines (default 5000) to prevent\n    unbounded growth. When the limit is exceeded, the oldest lines are trimmed.\n    \"\"\"\n\n    _instance: \"PacketLogger | None\" = None\n    _initialized: bool\n\n    def __new__(cls) -> \"PacketLogger\":\n        if cls._instance is None:\n            cls._instance = super().__new__(cls)\n            cls._instance._initialized = False\n        return cls._instance\n\n    def __init__(self) -> None:\n        if self._initialized:\n            return\n\n        self._initialized = True\n        # Enable via LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true\n        log_level = os.getenv(\"LOG_LEVEL\", \"\").upper()\n        packet_logging = os.getenv(\"BUILD_PACKET_LOGGING\", \"\").lower()\n        self._enabled = log_level == \"DEBUG\" or packet_logging in (\"true\", \"1\", \"yes\")\n        self._logger: logging.Logger | None = None\n        self._log_file_path: Path | None = None\n        self._session_start_times: dict[str, float] = {}\n\n        # Max lines to keep in log file\n        try:\n            self._max_lines = int(\n                os.getenv(\"BUILD_PACKET_LOG_MAX_LINES\", str(DEFAULT_MAX_LOG_LINES))\n            )\n        except ValueError:\n            self._max_lines = DEFAULT_MAX_LOG_LINES\n\n        # Lock for thread-safe file operations\n        self._file_lock = threading.Lock()\n\n        # Track approximate line count to avoid reading file too often\n        self._approx_line_count = 0\n        self._lines_since_last_trim = 0\n        # Trim every N lines written to avoid constant file reads\n        self._trim_interval = 500\n\n        if self._enabled:\n            self._setup_logger()\n\n    def _get_log_file_path(self) -> Path:\n        \"\"\"Determine the best log file path based on environment.\n\n        Priority:\n        1. /var/log/onyx/packets.log - Docker environment (mounted to host)\n        2. backend/log/packets.log - Local dev (same dir as other logs)\n        3. backend/onyx/server/features/build/packets.log - Fallback\n        \"\"\"\n        # Option 1: Docker environment - use /var/log/onyx which is mounted\n        docker_log_dir = Path(\"/var/log/onyx\")\n        if docker_log_dir.exists() and docker_log_dir.is_dir():\n            return docker_log_dir / \"packets.log\"\n\n        # Option 2: Local dev - use backend/log directory (same as other debug logs)\n        # Navigate from this file to backend/log\n        backend_dir = Path(__file__).parents[4]  # up to backend/\n        local_log_dir = backend_dir / \"log\"\n        if local_log_dir.exists() and local_log_dir.is_dir():\n            return local_log_dir / \"packets.log\"\n\n        # Option 3: Fallback to build directory\n        build_dir = Path(__file__).parents[1]\n        return build_dir / \"packets.log\"\n\n    def _setup_logger(self) -> None:\n        \"\"\"Set up the file handler for packet logging.\"\"\"\n        self._log_file_path = self._get_log_file_path()\n\n        # Ensure parent directory exists\n        self._log_file_path.parent.mkdir(parents=True, exist_ok=True)\n\n        self._logger = logging.getLogger(\"build.packets\")\n        self._logger.setLevel(logging.DEBUG)\n        self._logger.propagate = False\n\n        self._logger.handlers.clear()\n\n        # Use append mode\n        handler = logging.FileHandler(self._log_file_path, mode=\"a\", encoding=\"utf-8\")\n        handler.setLevel(logging.DEBUG)\n        # Include timestamp in each log entry\n        handler.setFormatter(\n            logging.Formatter(\n                \"%(asctime)s.%(msecs)03d | %(message)s\", \"%Y-%m-%d %H:%M:%S\"\n            )\n        )\n\n        self._logger.addHandler(handler)\n\n        # Initialize line count from existing file\n        self._init_line_count()\n\n    def _init_line_count(self) -> None:\n        \"\"\"Initialize the approximate line count from the existing log file.\"\"\"\n        if not self._log_file_path or not self._log_file_path.exists():\n            self._approx_line_count = 0\n            return\n\n        try:\n            with open(self._log_file_path, \"r\", encoding=\"utf-8\", errors=\"ignore\") as f:\n                self._approx_line_count = sum(1 for _ in f)\n        except Exception:\n            self._approx_line_count = 0\n\n    def _maybe_trim_log(self) -> None:\n        \"\"\"Trim the log file if it exceeds the max line limit.\n\n        This is called periodically (every _trim_interval lines) to avoid\n        reading the file on every write.\n        \"\"\"\n        self._lines_since_last_trim += 1\n\n        if self._lines_since_last_trim < self._trim_interval:\n            return\n\n        self._lines_since_last_trim = 0\n        self._trim_log_file()\n\n    def _trim_log_file(self) -> None:\n        \"\"\"Trim the log file to keep only the last max_lines.\"\"\"\n        if not self._log_file_path or not self._log_file_path.exists():\n            return\n\n        with self._file_lock:\n            try:\n                # Read all lines\n                with open(\n                    self._log_file_path, \"r\", encoding=\"utf-8\", errors=\"ignore\"\n                ) as f:\n                    lines = f.readlines()\n\n                current_count = len(lines)\n                self._approx_line_count = current_count\n\n                # If under limit, nothing to do\n                if current_count <= self._max_lines:\n                    return\n\n                # Keep only the last max_lines\n                lines_to_keep = lines[-self._max_lines :]\n\n                # Close the logger's file handler temporarily\n                if self._logger:\n                    for handler in self._logger.handlers:\n                        handler.close()\n\n                # Rewrite the file with trimmed content\n                with open(self._log_file_path, \"w\", encoding=\"utf-8\") as f:\n                    f.writelines(lines_to_keep)\n\n                # Reopen the handler\n                if self._logger:\n                    self._logger.handlers.clear()\n                    handler = logging.FileHandler(\n                        self._log_file_path, mode=\"a\", encoding=\"utf-8\"\n                    )\n                    handler.setLevel(logging.DEBUG)\n                    handler.setFormatter(\n                        logging.Formatter(\n                            \"%(asctime)s.%(msecs)03d | %(message)s\", \"%Y-%m-%d %H:%M:%S\"\n                        )\n                    )\n                    self._logger.addHandler(handler)\n\n                self._approx_line_count = len(lines_to_keep)\n\n            except Exception:\n                pass  # Silently ignore errors during trim\n\n    def clear_log_file(self) -> None:\n        \"\"\"Clear the log file contents.\n\n        Note: With the rotating log approach, this is optional. The log will\n        automatically trim itself. But this can still be useful to start fresh.\n        \"\"\"\n        if not self._enabled or not self._log_file_path:\n            return\n\n        with self._file_lock:\n            try:\n                # Close the logger's file handler temporarily\n                if self._logger:\n                    for handler in self._logger.handlers:\n                        handler.close()\n\n                # Truncate the file\n                with open(self._log_file_path, \"w\", encoding=\"utf-8\") as f:\n                    f.write(\"\")  # Empty the file\n\n                # Reopen the handler\n                if self._logger:\n                    self._logger.handlers.clear()\n                    handler = logging.FileHandler(\n                        self._log_file_path, mode=\"a\", encoding=\"utf-8\"\n                    )\n                    handler.setLevel(logging.DEBUG)\n                    handler.setFormatter(\n                        logging.Formatter(\n                            \"%(asctime)s.%(msecs)03d | %(message)s\", \"%Y-%m-%d %H:%M:%S\"\n                        )\n                    )\n                    self._logger.addHandler(handler)\n\n                self._approx_line_count = 0\n                self._lines_since_last_trim = 0\n\n            except Exception:\n                pass  # Silently ignore errors\n\n    @property\n    def is_enabled(self) -> bool:\n        \"\"\"Check if logging is enabled.\"\"\"\n        return self._enabled and self._logger is not None\n\n    def _format_uuid(self, value: Any) -> str:\n        \"\"\"Format UUID for logging (shortened for readability).\"\"\"\n        if isinstance(value, UUID):\n            return str(value)[:8]\n        if isinstance(value, str) and len(value) >= 8:\n            return value[:8]\n        return str(value)\n\n    def _write_log(self, message: str) -> None:\n        \"\"\"Internal method to write a log message and trigger trim check.\n\n        Args:\n            message: The formatted log message\n        \"\"\"\n        if not self._logger:\n            return\n\n        self._logger.debug(message)\n        self._maybe_trim_log()\n\n    def log(self, packet_type: str, payload: dict[str, Any] | None = None) -> None:\n        \"\"\"Log a packet as JSON.\n\n        Args:\n            packet_type: The type of packet\n            payload: The packet payload\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            output = json.dumps(payload, indent=2, default=str) if payload else \"{}\"\n            self._write_log(f\"[PACKET] {packet_type}\\n{output}\")\n        except Exception:\n            self._write_log(f\"[PACKET] {packet_type}\\n{payload}\")\n\n    def log_raw(self, label: str, data: Any) -> None:\n        \"\"\"Log raw data with a label.\n\n        Args:\n            label: A label for this log entry\n            data: Any data to log\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            if isinstance(data, (dict, list)):\n                output = json.dumps(data, indent=2, default=str)\n            else:\n                output = str(data)\n            self._write_log(f\"[RAW] {label}\\n{output}\")\n        except Exception:\n            self._write_log(f\"[RAW] {label}\\n{data}\")\n\n    # =========================================================================\n    # JSON-RPC Communication Logging\n    # =========================================================================\n\n    def log_jsonrpc_request(\n        self,\n        method: str,\n        request_id: int | None,\n        params: dict[str, Any] | None = None,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log a JSON-RPC request being sent to the agent.\n\n        Args:\n            method: The JSON-RPC method name\n            request_id: The request ID (None for notifications)\n            params: The request parameters\n            context: Additional context (e.g., \"local\", \"k8s\")\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            req_type = \"REQUEST\" if request_id is not None else \"NOTIFICATION\"\n            ctx_prefix = f\"[{context}] \" if context else \"\"\n            params_str = json.dumps(params, indent=2, default=str) if params else \"{}\"\n            id_str = f\" id={request_id}\" if request_id is not None else \"\"\n            self._write_log(\n                f\"{ctx_prefix}[JSONRPC-OUT] {req_type} {method}{id_str}\\n{params_str}\"\n            )\n        except Exception as e:\n            self._write_log(f\"[JSONRPC-OUT] {method} (logging error: {e})\")\n\n    def log_jsonrpc_response(\n        self,\n        request_id: int | None,\n        result: dict[str, Any] | None = None,\n        error: dict[str, Any] | None = None,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log a JSON-RPC response received from the agent.\n\n        Args:\n            request_id: The request ID this is responding to\n            result: The result payload (if success)\n            error: The error payload (if error)\n            context: Additional context (e.g., \"local\", \"k8s\")\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            ctx_prefix = f\"[{context}] \" if context else \"\"\n            id_str = f\" id={request_id}\" if request_id is not None else \"\"\n            if error:\n                error_str = json.dumps(error, indent=2, default=str)\n                self._write_log(\n                    f\"{ctx_prefix}[JSONRPC-IN] RESPONSE{id_str} ERROR\\n{error_str}\"\n                )\n            else:\n                result_str = (\n                    json.dumps(result, indent=2, default=str) if result else \"{}\"\n                )\n                self._write_log(\n                    f\"{ctx_prefix}[JSONRPC-IN] RESPONSE{id_str}\\n{result_str}\"\n                )\n        except Exception as e:\n            self._write_log(f\"[JSONRPC-IN] RESPONSE (logging error: {e})\")\n\n    def log_jsonrpc_notification(\n        self,\n        method: str,\n        params: dict[str, Any] | None = None,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log a JSON-RPC notification received from the agent.\n\n        Args:\n            method: The notification method name\n            params: The notification parameters\n            context: Additional context (e.g., \"local\", \"k8s\")\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            ctx_prefix = f\"[{context}] \" if context else \"\"\n            params_str = json.dumps(params, indent=2, default=str) if params else \"{}\"\n            self._write_log(\n                f\"{ctx_prefix}[JSONRPC-IN] NOTIFICATION {method}\\n{params_str}\"\n            )\n        except Exception as e:\n            self._write_log(f\"[JSONRPC-IN] NOTIFICATION {method} (logging error: {e})\")\n\n    def log_jsonrpc_raw_message(\n        self,\n        direction: str,\n        message: dict[str, Any] | str,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log a raw JSON-RPC message (for debugging parsing issues).\n\n        Args:\n            direction: \"IN\" or \"OUT\"\n            message: The raw message (dict or string)\n            context: Additional context\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            ctx_prefix = f\"[{context}] \" if context else \"\"\n            if isinstance(message, dict):\n                msg_str = json.dumps(message, indent=2, default=str)\n            else:\n                msg_str = str(message)\n            self._write_log(f\"{ctx_prefix}[JSONRPC-RAW-{direction}]\\n{msg_str}\")\n        except Exception as e:\n            self._write_log(f\"[JSONRPC-RAW-{direction}] (logging error: {e})\")\n\n    # =========================================================================\n    # ACP Event Logging\n    # =========================================================================\n\n    def log_acp_event(\n        self,\n        event_type: str,\n        event_data: dict[str, Any],\n        sandbox_id: UUID | str | None = None,\n        session_id: UUID | str | None = None,\n    ) -> None:\n        \"\"\"Log an ACP event being emitted.\n\n        Args:\n            event_type: The ACP event type (e.g., \"agent_message_chunk\")\n            event_data: The full event data\n            sandbox_id: The sandbox ID (optional, for context)\n            session_id: The session ID (optional, for context)\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            ctx_parts = []\n            if sandbox_id:\n                ctx_parts.append(f\"sandbox={self._format_uuid(sandbox_id)}\")\n            if session_id:\n                ctx_parts.append(f\"session={self._format_uuid(session_id)}\")\n            ctx = f\" ({', '.join(ctx_parts)})\" if ctx_parts else \"\"\n\n            # For message chunks, show truncated content for readability\n            display_data = event_data.copy()\n            if event_type in (\"agent_message_chunk\", \"agent_thought_chunk\"):\n                content = display_data.get(\"content\", {})\n                if isinstance(content, dict) and \"text\" in content:\n                    text = content.get(\"text\", \"\")\n                    if len(text) > 200:\n                        display_data[\"content\"] = {\n                            **content,\n                            \"text\": text[:200] + f\"... ({len(text)} chars total)\",\n                        }\n\n            event_str = json.dumps(display_data, indent=2, default=str)\n            self._write_log(f\"[ACP-EVENT] {event_type}{ctx}\\n{event_str}\")\n        except Exception as e:\n            self._write_log(f\"[ACP-EVENT] {event_type} (logging error: {e})\")\n\n    def log_acp_event_yielded(\n        self,\n        event_type: str,\n        event_obj: Any,\n        sandbox_id: UUID | str | None = None,\n        session_id: UUID | str | None = None,\n    ) -> None:\n        \"\"\"Log an ACP event object being yielded from the generator.\n\n        Args:\n            event_type: The ACP event type\n            event_obj: The Pydantic event object\n            sandbox_id: The sandbox ID (optional)\n            session_id: The session ID (optional)\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            if hasattr(event_obj, \"model_dump\"):\n                event_data = event_obj.model_dump(mode=\"json\", by_alias=True)\n            else:\n                event_data = {\"raw\": str(event_obj)}\n            self.log_acp_event(event_type, event_data, sandbox_id, session_id)\n        except Exception as e:\n            self._write_log(f\"[ACP-EVENT] {event_type} (logging error: {e})\")\n\n    # =========================================================================\n    # Session and Sandbox Lifecycle Logging\n    # =========================================================================\n\n    def log_session_start(\n        self,\n        session_id: UUID | str,\n        sandbox_id: UUID | str,\n        message_preview: str = \"\",\n    ) -> None:\n        \"\"\"Log the start of a message streaming session.\n\n        Args:\n            session_id: The session ID\n            sandbox_id: The sandbox ID\n            message_preview: First 100 chars of the user message\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        session_key = str(session_id)\n        self._session_start_times[session_key] = time.time()\n\n        preview = (\n            message_preview[:100] + \"...\"\n            if len(message_preview) > 100\n            else message_preview\n        )\n        self._write_log(\n            f\"[SESSION-START] session={self._format_uuid(session_id)} \"\n            f\"sandbox={self._format_uuid(sandbox_id)}\\n\"\n            f\"  message: {preview}\"\n        )\n\n    def log_session_end(\n        self,\n        session_id: UUID | str,\n        success: bool = True,\n        error: str | None = None,\n        events_count: int = 0,\n    ) -> None:\n        \"\"\"Log the end of a message streaming session.\n\n        Args:\n            session_id: The session ID\n            success: Whether the session completed successfully\n            error: Error message if failed\n            events_count: Number of events emitted\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        session_key = str(session_id)\n        start_time = self._session_start_times.pop(session_key, None)\n        duration_ms = (time.time() - start_time) * 1000 if start_time else 0\n\n        status = \"SUCCESS\" if success else \"FAILED\"\n        error_str = f\"\\n  error: {error}\" if error else \"\"\n        self._write_log(\n            f\"[SESSION-END] session={self._format_uuid(session_id)} \"\n            f\"status={status} duration={duration_ms:.0f}ms events={events_count}\"\n            f\"{error_str}\"\n        )\n\n    def log_acp_client_start(\n        self,\n        sandbox_id: UUID | str,\n        session_id: UUID | str,\n        cwd: str,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log ACP client initialization.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            cwd: Working directory\n            context: \"local\" or \"k8s\"\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        ctx_prefix = f\"[{context}] \" if context else \"\"\n        self._write_log(\n            f\"{ctx_prefix}[ACP-CLIENT-START] \"\n            f\"sandbox={self._format_uuid(sandbox_id)} \"\n            f\"session={self._format_uuid(session_id)}\\n\"\n            f\"  cwd: {cwd}\"\n        )\n\n    def log_acp_client_stop(\n        self,\n        sandbox_id: UUID | str,\n        session_id: UUID | str,\n        context: str = \"\",\n    ) -> None:\n        \"\"\"Log ACP client shutdown.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            context: \"local\" or \"k8s\"\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        ctx_prefix = f\"[{context}] \" if context else \"\"\n        self._write_log(\n            f\"{ctx_prefix}[ACP-CLIENT-STOP] sandbox={self._format_uuid(sandbox_id)} session={self._format_uuid(session_id)}\"\n        )\n\n    # =========================================================================\n    # Streaming State Logging\n    # =========================================================================\n\n    def log_streaming_state_update(\n        self,\n        session_id: UUID | str,\n        state_type: str,\n        details: dict[str, Any] | None = None,\n    ) -> None:\n        \"\"\"Log streaming state changes.\n\n        Args:\n            session_id: The session ID\n            state_type: Type of state change (e.g., \"chunk_accumulated\", \"saved_to_db\")\n            details: Additional details\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        try:\n            details_str = \"\"\n            if details:\n                details_str = \"\\n\" + json.dumps(details, indent=2, default=str)\n            self._write_log(\n                f\"[STREAMING-STATE] session={self._format_uuid(session_id)} type={state_type}{details_str}\"\n            )\n        except Exception as e:\n            self._write_log(f\"[STREAMING-STATE] {state_type} (logging error: {e})\")\n\n    def log_sse_emit(\n        self,\n        event_type: str,\n        session_id: UUID | str | None = None,\n    ) -> None:\n        \"\"\"Log SSE event being emitted to frontend.\n\n        Args:\n            event_type: The event type being emitted\n            session_id: The session ID\n        \"\"\"\n        if not self._enabled or not self._logger:\n            return\n\n        session_str = f\" session={self._format_uuid(session_id)}\" if session_id else \"\"\n        self._write_log(f\"[SSE-EMIT] {event_type}{session_str}\")\n\n\n# Singleton instance\n_packet_logger: PacketLogger | None = None\n\n\ndef get_packet_logger() -> PacketLogger:\n    \"\"\"Get the singleton packet logger instance.\"\"\"\n    global _packet_logger\n    if _packet_logger is None:\n        _packet_logger = PacketLogger()\n    return _packet_logger\n\n\ndef log_separator(label: str = \"\") -> None:\n    \"\"\"Log a visual separator for readability in the log file.\n\n    Args:\n        label: Optional label for the separator\n    \"\"\"\n    logger = get_packet_logger()\n    if not logger.is_enabled or not logger._logger:\n        return\n\n    separator = \"=\" * 80\n    if label:\n        logger._write_log(f\"\\n{separator}\\n{label}\\n{separator}\")\n    else:\n        logger._write_log(f\"\\n{separator}\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/packets.py",
    "content": "\"\"\"Build Mode packet types for streaming agent responses.\n\nThis module defines CUSTOM Onyx packet types that extend ACP (Agent Client Protocol).\nACP events are passed through directly from the agent - this module only contains\nOnyx-specific extensions like artifacts and file operations.\n\nAll packets use SSE (Server-Sent Events) format with `event: message` and include\na `type` field to distinguish packet types.\n\nACP events (passed through directly from acp.schema):\n- agent_message_chunk: Text/image content from agent\n- agent_thought_chunk: Agent's internal reasoning\n- tool_call_start: Tool invocation started\n- tool_call_progress: Tool execution progress/result\n- agent_plan_update: Agent's execution plan\n- current_mode_update: Agent mode change\n- prompt_response: Agent finished processing\n- error: An error occurred\n\nCustom Onyx packets (defined here):\n- error: Onyx-specific errors (e.g., session not found)\n\nBased on:\n- Agent Client Protocol (ACP): https://agentclientprotocol.com\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import Literal\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\n# =============================================================================\n# Base Packet Type\n# =============================================================================\n\n\nclass BasePacket(BaseModel):\n    \"\"\"Base packet with common fields for all custom Onyx packet types.\"\"\"\n\n    type: str\n    timestamp: str = Field(\n        default_factory=lambda: datetime.now(tz=timezone.utc).isoformat()\n    )\n\n\n# =============================================================================\n# Custom Onyx Packets\n# =============================================================================\n\n\nclass ErrorPacket(BasePacket):\n    \"\"\"An Onyx-specific error occurred (e.g., session not found, sandbox not running).\"\"\"\n\n    type: Literal[\"error\"] = \"error\"\n    message: str\n    code: int | None = None\n    details: dict[str, Any] | None = None\n\n\n# =============================================================================\n# Union Type for Custom Onyx Packets\n# =============================================================================\n\nBuildPacket = ErrorPacket\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/rate_limit.py",
    "content": "\"\"\"Rate limiting logic for Build Mode.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Literal\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import User\nfrom onyx.feature_flags.factory import get_default_feature_flag_provider\nfrom onyx.server.features.build.api.models import RateLimitResponse\nfrom onyx.server.features.build.api.subscription_check import is_user_subscribed\nfrom onyx.server.features.build.configs import CRAFT_PAID_USER_RATE_LIMIT\nfrom onyx.server.features.build.db.rate_limit import count_user_messages_in_window\nfrom onyx.server.features.build.db.rate_limit import count_user_messages_total\nfrom onyx.server.features.build.db.rate_limit import get_oldest_message_timestamp\nfrom onyx.server.features.build.utils import CRAFT_HAS_USAGE_LIMITS\nfrom shared_configs.configs import MULTI_TENANT\n\n# Default limit for free/non-subscribed users (not configurable)\nFREE_USER_RATE_LIMIT = 5\n\n\ndef _should_skip_rate_limiting(user: User) -> bool:\n    \"\"\"\n    Check if rate limiting should be skipped for this user.\n\n    Currently grants unlimited usage to dev tenant users (tenant_dev).\n    Controlled via PostHog feature flag.\n\n    Returns:\n        True to skip rate limiting (unlimited), False to apply normal limits\n    \"\"\"\n    # NOTE: We can modify the posthog flag to return more detail about a limit\n    # i.e. can set variable limits per user and tenant via PostHog instead of env vars\n    # to avoid re-deploying on every limit change\n\n    feature_flag_provider = get_default_feature_flag_provider()\n    # Flag returns True for users who SHOULD be rate limited\n    # We negate to get: True = skip rate limiting\n    has_rate_limit = feature_flag_provider.feature_enabled(\n        CRAFT_HAS_USAGE_LIMITS,\n        user.id,\n    )\n    return not has_rate_limit\n\n\ndef get_user_rate_limit_status(\n    user: User,\n    db_session: Session,\n) -> RateLimitResponse:\n    \"\"\"\n    Get the rate limit status for a user.\n\n    Rate limits:\n        - Cloud (MULTI_TENANT=true):\n            - Subscribed users: CRAFT_PAID_USER_RATE_LIMIT messages per week\n              (configurable, default 25)\n            - Non-subscribed users: 5 messages (lifetime total)\n            - Per-user overrides via PostHog feature flag\n        - Self-hosted (MULTI_TENANT=false):\n            - Unlimited (no rate limiting)\n\n    Args:\n        user: The authenticated user\n        db_session: Database session\n\n    Returns:\n        RateLimitResponse with current limit status\n    \"\"\"\n    # Self-hosted deployments have no rate limits\n    if not MULTI_TENANT:\n        return RateLimitResponse(\n            is_limited=False,\n            limit_type=\"weekly\",\n            messages_used=0,\n            limit=0,  # 0 indicates unlimited\n            reset_timestamp=None,\n        )\n\n    # Check if user should skip rate limiting (e.g., dev tenant users)\n    if _should_skip_rate_limiting(user):\n        return RateLimitResponse(\n            is_limited=False,\n            limit_type=\"weekly\",\n            messages_used=-1,\n            limit=0,  # 0 indicates unlimited\n            reset_timestamp=None,\n        )\n\n    # Determine subscription status\n    is_subscribed = is_user_subscribed(user, db_session)\n\n    # Get limit based on subscription status\n    limit = CRAFT_PAID_USER_RATE_LIMIT if is_subscribed else FREE_USER_RATE_LIMIT\n\n    # Limit type: weekly for subscribed users, total for free\n    limit_type: Literal[\"weekly\", \"total\"] = \"weekly\" if is_subscribed else \"total\"\n\n    # Count messages\n    if limit_type == \"weekly\":\n        # Subscribed: rolling 7-day window\n        cutoff_time = datetime.now(tz=timezone.utc) - timedelta(days=7)\n        messages_used = count_user_messages_in_window(user.id, cutoff_time, db_session)\n\n        # Calculate reset timestamp (when oldest message ages out)\n        # Only show reset time if user is at or over the limit\n        if messages_used >= limit:\n            oldest_msg = get_oldest_message_timestamp(user.id, cutoff_time, db_session)\n            if oldest_msg:\n                reset_time = oldest_msg + timedelta(days=7)\n                reset_timestamp = reset_time.isoformat()\n            else:\n                reset_timestamp = None\n        else:\n            reset_timestamp = None\n    else:\n        # Non-subscribed: lifetime total\n        messages_used = count_user_messages_total(user.id, db_session)\n        reset_timestamp = None\n\n    return RateLimitResponse(\n        is_limited=messages_used >= limit,\n        limit_type=limit_type,\n        messages_used=messages_used,\n        limit=limit,\n        reset_timestamp=reset_timestamp,\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/sessions_api.py",
    "content": "\"\"\"API endpoints for Build Mode session management.\"\"\"\n\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom fastapi import UploadFile\nfrom sqlalchemy import exists\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.models import BuildMessage\nfrom onyx.db.models import User\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.features.build.api.models import ArtifactResponse\nfrom onyx.server.features.build.api.models import DetailedSessionResponse\nfrom onyx.server.features.build.api.models import DirectoryListing\nfrom onyx.server.features.build.api.models import GenerateSuggestionsRequest\nfrom onyx.server.features.build.api.models import GenerateSuggestionsResponse\nfrom onyx.server.features.build.api.models import PptxPreviewResponse\nfrom onyx.server.features.build.api.models import PreProvisionedCheckResponse\nfrom onyx.server.features.build.api.models import SessionCreateRequest\nfrom onyx.server.features.build.api.models import SessionListResponse\nfrom onyx.server.features.build.api.models import SessionNameGenerateResponse\nfrom onyx.server.features.build.api.models import SessionResponse\nfrom onyx.server.features.build.api.models import SessionUpdateRequest\nfrom onyx.server.features.build.api.models import SetSessionSharingRequest\nfrom onyx.server.features.build.api.models import SetSessionSharingResponse\nfrom onyx.server.features.build.api.models import SuggestionBubble\nfrom onyx.server.features.build.api.models import SuggestionTheme\nfrom onyx.server.features.build.api.models import UploadResponse\nfrom onyx.server.features.build.api.models import WebappInfo\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.db.build_session import allocate_nextjs_port\nfrom onyx.server.features.build.db.build_session import get_build_session\nfrom onyx.server.features.build.db.build_session import set_build_session_sharing_scope\nfrom onyx.server.features.build.db.sandbox import get_latest_snapshot_for_session\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_user_id\nfrom onyx.server.features.build.db.sandbox import update_sandbox_heartbeat\nfrom onyx.server.features.build.db.sandbox import update_sandbox_status__no_commit\nfrom onyx.server.features.build.sandbox import get_sandbox_manager\nfrom onyx.server.features.build.session.manager import SessionManager\nfrom onyx.server.features.build.session.manager import UploadLimitExceededError\nfrom onyx.server.features.build.utils import sanitize_filename\nfrom onyx.server.features.build.utils import validate_file\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/sessions\")\n\n\n# =============================================================================\n# Session Management Endpoints\n# =============================================================================\n\n\n@router.get(\"\", response_model=SessionListResponse)\ndef list_sessions(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SessionListResponse:\n    \"\"\"List all build sessions for the current user.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    sessions = session_manager.list_sessions(user.id)\n\n    # Get the user's sandbox (shared across all sessions)\n    sandbox = get_sandbox_by_user_id(db_session, user.id)\n\n    return SessionListResponse(\n        sessions=[SessionResponse.from_model(session, sandbox) for session in sessions]\n    )\n\n\n# Lock timeout for session creation (should be longer than max provision time)\nSESSION_CREATE_LOCK_TIMEOUT_SECONDS = 300\n\n\n@router.post(\"\", response_model=DetailedSessionResponse)\ndef create_session(\n    request: SessionCreateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DetailedSessionResponse:\n    \"\"\"\n    Create or get an existing empty build session.\n\n    Creates a sandbox with the necessary file structure and returns a session ID.\n    Uses SessionManager for session and sandbox provisioning.\n\n    This endpoint is atomic - if sandbox provisioning fails, no database\n    records are created (transaction is rolled back).\n\n    Uses Redis lock to prevent race conditions when multiple requests try to\n    create/provision a session for the same user concurrently.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    redis_client = get_redis_client(tenant_id=tenant_id)\n\n    # Lock on user_id to prevent concurrent session creation for the same user\n    # This prevents race conditions where two requests both see sandbox as SLEEPING\n    # and both try to provision, with one deleting the other's work\n    lock_key = f\"session_create:{user.id}\"\n    lock = redis_client.lock(lock_key, timeout=SESSION_CREATE_LOCK_TIMEOUT_SECONDS)\n\n    # blocking=True means wait if another create is in progress\n    acquired = lock.acquire(\n        blocking=True, blocking_timeout=SESSION_CREATE_LOCK_TIMEOUT_SECONDS\n    )\n    if not acquired:\n        raise HTTPException(\n            status_code=503,\n            detail=\"Session creation timed out waiting for lock\",\n        )\n\n    try:\n        session_manager = SessionManager(db_session)\n        build_session = session_manager.get_or_create_empty_session(\n            user.id,\n            user_work_area=(\n                request.user_work_area if request.demo_data_enabled else None\n            ),\n            user_level=request.user_level if request.demo_data_enabled else None,\n            llm_provider_type=request.llm_provider_type,\n            llm_model_name=request.llm_model_name,\n            demo_data_enabled=request.demo_data_enabled,\n        )\n        db_session.commit()\n\n        sandbox = get_sandbox_by_user_id(db_session, user.id)\n        base_response = SessionResponse.from_model(build_session, sandbox)\n        return DetailedSessionResponse.from_session_response(\n            base_response, session_loaded_in_sandbox=True\n        )\n    except ValueError as e:\n        logger.exception(\"Session creation failed\")\n        db_session.rollback()\n        raise HTTPException(status_code=429, detail=str(e))\n    except Exception as e:\n        db_session.rollback()\n        logger.error(f\"Session creation failed: {e}\")\n        raise HTTPException(status_code=500, detail=f\"Session creation failed: {e}\")\n    finally:\n        if lock.owned():\n            lock.release()\n\n\n@router.get(\"/{session_id}\", response_model=DetailedSessionResponse)\ndef get_session_details(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DetailedSessionResponse:\n    \"\"\"\n    Get details of a specific build session.\n\n    Returns session_loaded_in_sandbox to indicate if the session workspace\n    exists in the running sandbox.\n    \"\"\"\n    session_manager = SessionManager(db_session)\n\n    session = session_manager.get_session(session_id, user.id)\n\n    if session is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    # Get the user's sandbox to include in response\n    sandbox = get_sandbox_by_user_id(db_session, user.id)\n\n    # Check if session workspace exists in the sandbox\n    session_loaded = False\n    if sandbox and sandbox.status == SandboxStatus.RUNNING:\n        sandbox_manager = get_sandbox_manager()\n        session_loaded = sandbox_manager.session_workspace_exists(\n            sandbox.id, session_id\n        )\n\n    base_response = SessionResponse.from_model(session, sandbox)\n    return DetailedSessionResponse.from_session_response(\n        base_response, session_loaded_in_sandbox=session_loaded\n    )\n\n\n@router.get(\n    \"/{session_id}/pre-provisioned-check\", response_model=PreProvisionedCheckResponse\n)\ndef check_pre_provisioned_session(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> PreProvisionedCheckResponse:\n    \"\"\"\n    Check if a pre-provisioned session is still valid (empty).\n\n    Used by the frontend to poll and detect when another tab has used\n    the session. A session is considered valid if it has no messages yet.\n\n    Returns:\n        - valid=True, session_id=<id> if the session is still empty\n        - valid=False, session_id=None if the session has messages or doesn't exist\n    \"\"\"\n    session = get_build_session(session_id, user.id, db_session)\n\n    if session is None:\n        return PreProvisionedCheckResponse(valid=False, session_id=None)\n\n    # Check if session is still empty (no messages = pre-provisioned)\n    has_messages = db_session.query(\n        exists().where(BuildMessage.session_id == session_id)\n    ).scalar()\n\n    if not has_messages:\n        return PreProvisionedCheckResponse(valid=True, session_id=str(session_id))\n\n    # Session has messages - it's no longer a valid pre-provisioned session\n    return PreProvisionedCheckResponse(valid=False, session_id=None)\n\n\n@router.post(\"/{session_id}/generate-name\", response_model=SessionNameGenerateResponse)\ndef generate_session_name(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SessionNameGenerateResponse:\n    \"\"\"Generate a session name using LLM based on the first user message.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    generated_name = session_manager.generate_session_name(session_id, user.id)\n\n    if generated_name is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return SessionNameGenerateResponse(name=generated_name)\n\n\n@router.post(\n    \"/{session_id}/generate-suggestions\", response_model=GenerateSuggestionsResponse\n)\ndef generate_suggestions(\n    session_id: UUID,\n    request: GenerateSuggestionsRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> GenerateSuggestionsResponse:\n    \"\"\"Generate follow-up suggestions based on the first exchange in a session.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    # Verify session exists and belongs to user\n    session = session_manager.get_session(session_id, user.id)\n    if session is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    # Generate suggestions\n    suggestions_data = session_manager.generate_followup_suggestions(\n        user_message=request.user_message,\n        assistant_message=request.assistant_message,\n    )\n\n    # Convert to response model\n    suggestions = [\n        SuggestionBubble(\n            theme=SuggestionTheme(item[\"theme\"]),\n            text=item[\"text\"],\n        )\n        for item in suggestions_data\n    ]\n\n    return GenerateSuggestionsResponse(suggestions=suggestions)\n\n\n@router.put(\"/{session_id}/name\", response_model=SessionResponse)\ndef update_session_name(\n    session_id: UUID,\n    request: SessionUpdateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SessionResponse:\n    \"\"\"Update the name of a build session.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    session = session_manager.update_session_name(session_id, user.id, request.name)\n\n    if session is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    # Get the user's sandbox to include in response\n    sandbox = get_sandbox_by_user_id(db_session, user.id)\n    return SessionResponse.from_model(session, sandbox)\n\n\n@router.patch(\"/{session_id}/public\")\ndef set_session_public(\n    session_id: UUID,\n    request: SetSessionSharingRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SetSessionSharingResponse:\n    \"\"\"Set the sharing scope of a build session's webapp.\"\"\"\n    updated = set_build_session_sharing_scope(\n        session_id, user.id, request.sharing_scope, db_session\n    )\n    if not updated:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    return SetSessionSharingResponse(\n        session_id=str(session_id),\n        sharing_scope=updated.sharing_scope,\n    )\n\n\n@router.delete(\"/{session_id}\", response_model=None)\ndef delete_session(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Delete a build session and all associated data.\n\n    This endpoint is atomic - if sandbox termination fails, the session\n    is NOT deleted (transaction is rolled back).\n    \"\"\"\n    session_manager = SessionManager(db_session)\n\n    try:\n        success = session_manager.delete_session(session_id, user.id)\n        if not success:\n            raise HTTPException(status_code=404, detail=\"Session not found\")\n        db_session.commit()\n    except HTTPException:\n        # Re-raise HTTP exceptions (like 404) without rollback\n        raise\n    except Exception as e:\n        # Sandbox termination failed - rollback to preserve session\n        db_session.rollback()\n        logger.error(f\"Failed to delete session {session_id}: {e}\")\n        raise HTTPException(\n            status_code=500,\n            detail=f\"Failed to delete session: {e}\",\n        )\n\n    return Response(status_code=204)\n\n\n# Lock timeout should be longer than max restore time (5 minutes)\nRESTORE_LOCK_TIMEOUT_SECONDS = 300\n\n\n@router.post(\"/{session_id}/restore\", response_model=DetailedSessionResponse)\ndef restore_session(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DetailedSessionResponse:\n    \"\"\"Restore sandbox and load session snapshot. Blocks until complete.\n\n    Uses Redis lock to ensure only one restore runs per sandbox at a time.\n    If another restore is in progress, waits for it to complete.\n\n    Handles two cases:\n    1. Sandbox is SLEEPING: Re-provision pod, then load session snapshot\n    2. Sandbox is RUNNING but session not loaded: Just load session snapshot\n\n    Returns immediately if session workspace already exists in pod.\n    Always returns session_loaded_in_sandbox=True on success.\n    \"\"\"\n    session = get_build_session(session_id, user.id, db_session)\n    if not session:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    sandbox = get_sandbox_by_user_id(db_session, user.id)\n    if not sandbox:\n        raise HTTPException(status_code=404, detail=\"Sandbox not found\")\n\n    # If sandbox is already running, check if session workspace exists\n    sandbox_manager = get_sandbox_manager()\n    tenant_id = get_current_tenant_id()\n\n    # Need to do some work - acquire Redis lock\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock_key = f\"sandbox_restore:{sandbox.id}\"\n    lock = redis_client.lock(lock_key, timeout=RESTORE_LOCK_TIMEOUT_SECONDS)\n\n    # Non-blocking: if another restore is already running, return 409 immediately\n    # instead of making the user wait. The frontend will retry.\n    acquired = lock.acquire(blocking=False)\n    if not acquired:\n        raise HTTPException(\n            status_code=409,\n            detail=\"Restore already in progress\",\n        )\n\n    try:\n        # Re-fetch sandbox status (may have changed while waiting for lock)\n        db_session.refresh(sandbox)\n\n        # Also re-check if session workspace exists (another request may have\n        # restored it while we were waiting)\n        if sandbox.status == SandboxStatus.RUNNING:\n            is_healthy = sandbox_manager.health_check(sandbox.id, timeout=10.0)\n            if is_healthy and sandbox_manager.session_workspace_exists(\n                sandbox.id, session_id\n            ):\n                session.status = BuildSessionStatus.ACTIVE\n                update_sandbox_heartbeat(db_session, sandbox.id)\n                base_response = SessionResponse.from_model(session, sandbox)\n                return DetailedSessionResponse.from_session_response(\n                    base_response, session_loaded_in_sandbox=True\n                )\n\n            if not is_healthy:\n                logger.warning(\n                    f\"Sandbox {sandbox.id} marked as RUNNING but pod is unhealthy/missing. Entering recovery mode.\"\n                )\n                # Terminate to clean up any lingering K8s resources\n                sandbox_manager.terminate(sandbox.id)\n\n                update_sandbox_status__no_commit(\n                    db_session, sandbox.id, SandboxStatus.TERMINATED\n                )\n                db_session.commit()\n                db_session.refresh(sandbox)\n                # Fall through to TERMINATED handling below\n\n        session_manager = SessionManager(db_session)\n        llm_config = session_manager._get_llm_config(None, None)\n\n        if sandbox.status in (SandboxStatus.SLEEPING, SandboxStatus.TERMINATED):\n            # Mark as PROVISIONING before the long-running provision() call\n            # so other requests know work is in progress\n            update_sandbox_status__no_commit(\n                db_session, sandbox.id, SandboxStatus.PROVISIONING\n            )\n            db_session.commit()\n\n            sandbox_manager.provision(\n                sandbox_id=sandbox.id,\n                user_id=user.id,\n                tenant_id=tenant_id,\n                llm_config=llm_config,\n            )\n\n            # Mark as RUNNING after successful provision\n            update_sandbox_status__no_commit(\n                db_session, sandbox.id, SandboxStatus.RUNNING\n            )\n            db_session.commit()\n\n        # 2. Check if session workspace needs to be loaded\n        if sandbox.status == SandboxStatus.RUNNING:\n            workspace_exists = sandbox_manager.session_workspace_exists(\n                sandbox.id, session_id\n            )\n\n            if not workspace_exists:\n                # Allocate port if not already set (needed for both snapshot restore and fresh setup)\n                if not session.nextjs_port:\n                    session.nextjs_port = allocate_nextjs_port(db_session)\n                    # Commit port allocation before long-running operations\n                    db_session.commit()\n\n                # Only Kubernetes backend supports snapshot restoration\n                snapshot = None\n                if SANDBOX_BACKEND == SandboxBackend.KUBERNETES:\n                    snapshot = get_latest_snapshot_for_session(db_session, session_id)\n\n                if snapshot:\n                    try:\n                        sandbox_manager.restore_snapshot(\n                            sandbox_id=sandbox.id,\n                            session_id=session_id,\n                            snapshot_storage_path=snapshot.storage_path,\n                            tenant_id=tenant_id,\n                            nextjs_port=session.nextjs_port,\n                            llm_config=llm_config,\n                            use_demo_data=session.demo_data_enabled,\n                        )\n                        session.status = BuildSessionStatus.ACTIVE\n                        db_session.commit()\n                    except Exception as e:\n                        logger.error(\n                            f\"Snapshot restore failed for session {session_id}: {e}\"\n                        )\n                        session.nextjs_port = None\n                        db_session.commit()\n                        raise\n                else:\n                    # No snapshot - set up fresh workspace\n                    sandbox_manager.setup_session_workspace(\n                        sandbox_id=sandbox.id,\n                        session_id=session_id,\n                        llm_config=llm_config,\n                        nextjs_port=session.nextjs_port,\n                    )\n                    session.status = BuildSessionStatus.ACTIVE\n                    db_session.commit()\n        else:\n            logger.warning(\n                f\"Sandbox {sandbox.id} status is {sandbox.status} after re-provision, expected RUNNING\"\n            )\n\n    except Exception as e:\n        logger.error(f\"Failed to restore session {session_id}: {e}\", exc_info=True)\n        raise HTTPException(\n            status_code=500,\n            detail=f\"Failed to restore session: {e}\",\n        )\n    finally:\n        if lock.owned():\n            lock.release()\n\n    # Update heartbeat to mark sandbox as active after successful restore\n    update_sandbox_heartbeat(db_session, sandbox.id)\n\n    base_response = SessionResponse.from_model(session, sandbox)\n    return DetailedSessionResponse.from_session_response(\n        base_response, session_loaded_in_sandbox=True\n    )\n\n\n# =============================================================================\n# Artifact Endpoints\n# =============================================================================\n\n\n@router.get(\n    \"/{session_id}/artifacts\",\n    response_model=list[ArtifactResponse],\n)\ndef list_artifacts(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[dict]:\n    \"\"\"List artifacts generated in the session.\"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    artifacts = session_manager.list_artifacts(session_id, user_id)\n    if artifacts is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return artifacts\n\n\n@router.get(\"/{session_id}/files\", response_model=DirectoryListing)\ndef list_directory(\n    session_id: UUID,\n    path: str = \"\",\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DirectoryListing:\n    \"\"\"\n    List files and directories in the sandbox.\n\n    Args:\n        session_id: The session ID\n        path: Relative path from sandbox root (empty string for root)\n\n    Returns:\n        DirectoryListing with sorted entries (directories first, then files)\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    try:\n        listing = session_manager.list_directory(session_id, user_id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if \"path traversal\" in error_message.lower():\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        elif \"not found\" in error_message.lower():\n            raise HTTPException(status_code=404, detail=\"Directory not found\")\n        elif \"not a directory\" in error_message.lower():\n            raise HTTPException(status_code=400, detail=\"Path is not a directory\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if listing is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return listing\n\n\n@router.get(\"/{session_id}/artifacts/{path:path}\")\ndef download_artifact(\n    session_id: UUID,\n    path: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Download a specific artifact file.\"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    try:\n        result = session_manager.download_artifact(session_id, user_id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if (\n            \"path traversal\" in error_message.lower()\n            or \"access denied\" in error_message.lower()\n        ):\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        elif \"directory\" in error_message.lower():\n            raise HTTPException(status_code=400, detail=\"Cannot download directory\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if result is None:\n        raise HTTPException(status_code=404, detail=\"Artifact not found\")\n\n    content, mime_type, filename = result\n\n    # Handle Unicode filenames in Content-Disposition header\n    # HTTP headers require Latin-1 encoding, so we use RFC 5987 for Unicode\n    try:\n        # Try Latin-1 encoding first (ASCII-compatible filenames)\n        filename.encode(\"latin-1\")\n        content_disposition = f'attachment; filename=\"{filename}\"'\n    except UnicodeEncodeError:\n        # Use RFC 5987 encoding for Unicode filenames\n        from urllib.parse import quote\n\n        encoded_filename = quote(filename, safe=\"\")\n        content_disposition = f\"attachment; filename*=UTF-8''{encoded_filename}\"\n\n    return Response(\n        content=content,\n        media_type=mime_type,\n        headers={\n            \"Content-Disposition\": content_disposition,\n        },\n    )\n\n\n@router.get(\"/{session_id}/export-docx/{path:path}\")\ndef export_docx(\n    session_id: UUID,\n    path: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Export a markdown file as DOCX.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    try:\n        result = session_manager.export_docx(session_id, user.id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if (\n            \"path traversal\" in error_message.lower()\n            or \"access denied\" in error_message.lower()\n        ):\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if result is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    docx_bytes, filename = result\n\n    try:\n        filename.encode(\"latin-1\")\n        content_disposition = f'attachment; filename=\"{filename}\"'\n    except UnicodeEncodeError:\n        from urllib.parse import quote\n\n        encoded_filename = quote(filename, safe=\"\")\n        content_disposition = f\"attachment; filename*=UTF-8''{encoded_filename}\"\n\n    return Response(\n        content=docx_bytes,\n        media_type=\"application/vnd.openxmlformats-officedocument.wordprocessingml.document\",\n        headers={\"Content-Disposition\": content_disposition},\n    )\n\n\n@router.get(\"/{session_id}/pptx-preview/{path:path}\")\ndef get_pptx_preview(\n    session_id: UUID,\n    path: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> PptxPreviewResponse:\n    \"\"\"Generate slide image previews for a PPTX file.\"\"\"\n    session_manager = SessionManager(db_session)\n\n    try:\n        result = session_manager.get_pptx_preview(session_id, user.id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if (\n            \"path traversal\" in error_message.lower()\n            or \"access denied\" in error_message.lower()\n        ):\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if result is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return PptxPreviewResponse(**result)\n\n\n@router.get(\"/{session_id}/webapp-info\", response_model=WebappInfo)\ndef get_webapp_info(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> WebappInfo:\n    \"\"\"\n    Get webapp information for a session.\n\n    Returns whether a webapp exists, its URL, and the sandbox status.\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    webapp_info = session_manager.get_webapp_info(session_id, user_id)\n\n    if webapp_info is None:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n\n    return WebappInfo(**webapp_info)\n\n\n@router.get(\"/{session_id}/webapp-download\")\ndef download_webapp(\n    session_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"\n    Download the webapp directory as a zip file.\n\n    Returns the entire outputs/web directory as a zip archive.\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    result = session_manager.download_webapp_zip(session_id, user_id)\n\n    if result is None:\n        raise HTTPException(status_code=404, detail=\"Webapp not found\")\n\n    zip_bytes, filename = result\n\n    return Response(\n        content=zip_bytes,\n        media_type=\"application/zip\",\n        headers={\n            \"Content-Disposition\": f'attachment; filename=\"{filename}\"',\n        },\n    )\n\n\n@router.get(\"/{session_id}/download-directory/{path:path}\")\ndef download_directory(\n    session_id: UUID,\n    path: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"\n    Download a directory as a zip file.\n\n    Returns the specified directory as a zip archive.\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    try:\n        result = session_manager.download_directory(session_id, user_id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if \"path traversal\" in error_message.lower():\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if result is None:\n        raise HTTPException(status_code=404, detail=\"Directory not found\")\n\n    zip_bytes, filename = result\n\n    return Response(\n        content=zip_bytes,\n        media_type=\"application/zip\",\n        headers={\n            \"Content-Disposition\": f'attachment; filename=\"{filename}\"',\n        },\n    )\n\n\n@router.post(\"/{session_id}/upload\", response_model=UploadResponse)\ndef upload_file_endpoint(\n    session_id: UUID,\n    file: UploadFile = File(...),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UploadResponse:\n    \"\"\"Upload a file to the session's sandbox.\n\n    The file will be placed in the sandbox's attachments directory.\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    if not file.filename:\n        raise HTTPException(status_code=400, detail=\"File has no filename\")\n\n    # Read file content (use sync file interface)\n    content = file.file.read()\n\n    # Validate file (extension, mime type, size)\n    is_valid, error = validate_file(file.filename, file.content_type, len(content))\n    if not is_valid:\n        raise HTTPException(status_code=400, detail=error)\n\n    # Sanitize filename\n    safe_filename = sanitize_filename(file.filename)\n\n    try:\n        relative_path, _ = session_manager.upload_file(\n            session_id=session_id,\n            user_id=user_id,\n            filename=safe_filename,\n            content=content,\n        )\n    except UploadLimitExceededError as e:\n        # Return 429 for limit exceeded errors\n        raise HTTPException(status_code=429, detail=str(e))\n    except ValueError as e:\n        error_message = str(e)\n        if \"not found\" in error_message.lower():\n            raise HTTPException(status_code=404, detail=error_message)\n        raise HTTPException(status_code=400, detail=error_message)\n\n    return UploadResponse(\n        filename=safe_filename,\n        path=relative_path,\n        size_bytes=len(content),\n    )\n\n\n@router.delete(\"/{session_id}/files/{path:path}\", response_model=None)\ndef delete_file_endpoint(\n    session_id: UUID,\n    path: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Delete a file from the session's sandbox.\n\n    Args:\n        session_id: The session ID\n        path: Relative path to the file (e.g., \"attachments/doc.pdf\")\n    \"\"\"\n    user_id: UUID = user.id\n    session_manager = SessionManager(db_session)\n\n    try:\n        deleted = session_manager.delete_file(session_id, user_id, path)\n    except ValueError as e:\n        error_message = str(e)\n        if \"path traversal\" in error_message.lower():\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n        elif \"not found\" in error_message.lower():\n            raise HTTPException(status_code=404, detail=error_message)\n        elif \"directory\" in error_message.lower():\n            raise HTTPException(status_code=400, detail=\"Cannot delete directory\")\n        raise HTTPException(status_code=400, detail=error_message)\n\n    if not deleted:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    return Response(status_code=204)\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/subscription_check.py",
    "content": "\"\"\"Subscription detection for Build Mode rate limiting.\"\"\"\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.db.models import User\nfrom onyx.server.usage_limits import is_tenant_on_trial_fn\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\ndef is_user_subscribed(user: User, db_session: Session) -> bool:  # noqa: ARG001\n    \"\"\"\n    Check if a user has an active subscription.\n\n    For cloud (MULTI_TENANT=true):\n        - Checks Stripe billing via control plane\n        - Returns True if tenant is NOT on trial (subscribed = NOT on trial)\n\n    For self-hosted (MULTI_TENANT=false):\n        - Checks license metadata\n        - Returns True if license status is ACTIVE\n\n    Args:\n        user: The user object (None for unauthenticated users)\n        db_session: Database session\n\n    Returns:\n        True if user has active subscription, False otherwise\n    \"\"\"\n    if DEV_MODE:\n        return True\n\n    if user is None:\n        return False\n\n    if MULTI_TENANT:\n        # Cloud: check Stripe billing via control plane\n        tenant_id = get_current_tenant_id()\n        try:\n            on_trial = is_tenant_on_trial_fn(tenant_id)\n            # Subscribed = NOT on trial\n            return not on_trial\n        except Exception as e:\n            logger.warning(f\"Subscription check failed for tenant {tenant_id}: {e}\")\n            # Default to non-subscribed (safer/more restrictive)\n            return False\n\n    return True\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/templates/webapp_hmr_fixer.js",
    "content": "(function () {\n  var WEBAPP_BASE = \"__WEBAPP_BASE__\";\n  var PROXIED_NEXT_PREFIX = WEBAPP_BASE + \"/_next/\";\n  var PROXIED_HMR_PREFIX = WEBAPP_BASE + \"/_next/webpack-hmr\";\n  var PROXIED_ALT_HMR_PREFIX = WEBAPP_BASE + \"/_next/hmr\";\n\n  function isHmrWebSocketUrl(url) {\n    if (!url) return false;\n    try {\n      var parsedUrl = new URL(String(url), window.location.href);\n      return (\n        parsedUrl.pathname.indexOf(\"/_next/webpack-hmr\") === 0 ||\n        parsedUrl.pathname.indexOf(\"/_next/hmr\") === 0 ||\n        parsedUrl.pathname.indexOf(PROXIED_HMR_PREFIX) === 0 ||\n        parsedUrl.pathname.indexOf(PROXIED_ALT_HMR_PREFIX) === 0\n      );\n    } catch (e) {}\n    if (typeof url === \"string\") {\n      return (\n        url.indexOf(\"/_next/webpack-hmr\") === 0 ||\n        url.indexOf(\"/_next/hmr\") === 0 ||\n        url.indexOf(PROXIED_HMR_PREFIX) === 0 ||\n        url.indexOf(PROXIED_ALT_HMR_PREFIX) === 0\n      );\n    }\n    return false;\n  }\n\n  function rewriteNextAssetUrl(url) {\n    if (!url) return url;\n    try {\n      var parsedUrl = new URL(String(url), window.location.href);\n      if (parsedUrl.pathname.indexOf(PROXIED_NEXT_PREFIX) === 0) {\n        return parsedUrl.pathname + parsedUrl.search + parsedUrl.hash;\n      }\n      if (parsedUrl.pathname.indexOf(\"/_next/\") === 0) {\n        return (\n          WEBAPP_BASE + parsedUrl.pathname + parsedUrl.search + parsedUrl.hash\n        );\n      }\n    } catch (e) {}\n    if (typeof url === \"string\") {\n      if (url.indexOf(PROXIED_NEXT_PREFIX) === 0) {\n        return url;\n      }\n      if (url.indexOf(\"/_next/\") === 0) {\n        return WEBAPP_BASE + url;\n      }\n    }\n    return url;\n  }\n\n  function createEvent(eventType) {\n    return typeof Event === \"function\"\n      ? new Event(eventType)\n      : { type: eventType };\n  }\n\n  function MockHmrWebSocket(url) {\n    this.url = String(url);\n    this.readyState = 1;\n    this.bufferedAmount = 0;\n    this.extensions = \"\";\n    this.protocol = \"\";\n    this.binaryType = \"blob\";\n    this.onopen = null;\n    this.onmessage = null;\n    this.onerror = null;\n    this.onclose = null;\n    this._l = {};\n    var socket = this;\n    setTimeout(function () {\n      socket._d(\"open\", createEvent(\"open\"));\n    }, 0);\n  }\n\n  MockHmrWebSocket.CONNECTING = 0;\n  MockHmrWebSocket.OPEN = 1;\n  MockHmrWebSocket.CLOSING = 2;\n  MockHmrWebSocket.CLOSED = 3;\n\n  MockHmrWebSocket.prototype.addEventListener = function (eventType, callback) {\n    (this._l[eventType] || (this._l[eventType] = [])).push(callback);\n  };\n\n  MockHmrWebSocket.prototype.removeEventListener = function (\n    eventType,\n    callback,\n  ) {\n    var listeners = this._l[eventType] || [];\n    this._l[eventType] = listeners.filter(function (listener) {\n      return listener !== callback;\n    });\n  };\n\n  MockHmrWebSocket.prototype._d = function (eventType, eventValue) {\n    var listeners = this._l[eventType] || [];\n    for (var i = 0; i < listeners.length; i++) {\n      listeners[i].call(this, eventValue);\n    }\n    var handler = this[\"on\" + eventType];\n    if (typeof handler === \"function\") {\n      handler.call(this, eventValue);\n    }\n  };\n\n  MockHmrWebSocket.prototype.send = function () {};\n\n  MockHmrWebSocket.prototype.close = function (code, reason) {\n    if (this.readyState >= 2) return;\n    this.readyState = 3;\n    var closeEvent = createEvent(\"close\");\n    closeEvent.code = code === undefined ? 1000 : code;\n    closeEvent.reason = reason || \"\";\n    closeEvent.wasClean = true;\n    this._d(\"close\", closeEvent);\n  };\n\n  if (window.WebSocket) {\n    var OriginalWebSocket = window.WebSocket;\n    window.WebSocket = function (url, protocols) {\n      if (isHmrWebSocketUrl(url)) {\n        return new MockHmrWebSocket(rewriteNextAssetUrl(url));\n      }\n      return protocols === undefined\n        ? new OriginalWebSocket(url)\n        : new OriginalWebSocket(url, protocols);\n    };\n    window.WebSocket.prototype = OriginalWebSocket.prototype;\n    Object.setPrototypeOf(window.WebSocket, OriginalWebSocket);\n    [\"CONNECTING\", \"OPEN\", \"CLOSING\", \"CLOSED\"].forEach(function (stateKey) {\n      window.WebSocket[stateKey] = OriginalWebSocket[stateKey];\n    });\n  }\n})();\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/templates/webapp_offline.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta http-equiv=\"refresh\" content=\"15\" />\n    <title>Craft — Starting up</title>\n    <style>\n      *,\n      *::before,\n      *::after {\n        box-sizing: border-box;\n        margin: 0;\n        padding: 0;\n      }\n\n      body {\n        font-family: ui-monospace, SFMono-Regular, \"SF Mono\", Menlo, Consolas,\n          monospace;\n        background: linear-gradient(to bottom right, #030712, #111827, #030712);\n        min-height: 100vh;\n        display: flex;\n        flex-direction: column;\n        align-items: center;\n        justify-content: center;\n        gap: 1.5rem;\n        padding: 2rem;\n      }\n\n      .terminal {\n        width: 100%;\n        max-width: 580px;\n        border: 2px solid #374151;\n        border-radius: 2px;\n      }\n\n      .titlebar {\n        background: #1f2937;\n        padding: 0.5rem 0.75rem;\n        display: flex;\n        align-items: center;\n        gap: 0.5rem;\n        border-bottom: 1px solid #374151;\n      }\n\n      .btn {\n        width: 12px;\n        height: 12px;\n        border-radius: 2px;\n        flex-shrink: 0;\n      }\n      .btn-red {\n        background: #ef4444;\n      }\n      .btn-yellow {\n        background: #eab308;\n      }\n      .btn-green {\n        background: #22c55e;\n      }\n\n      .title-label {\n        flex: 1;\n        text-align: center;\n        font-size: 0.75rem;\n        color: #6b7280;\n        margin-right: 36px;\n      }\n\n      .body {\n        background: #111827;\n        padding: 1.5rem;\n        min-height: 200px;\n        font-size: 0.875rem;\n        color: #d1d5db;\n        display: flex;\n        align-items: flex-start;\n        gap: 0.375rem;\n      }\n\n      .prompt {\n        color: #10b981;\n        user-select: none;\n      }\n\n      .tagline {\n        font-size: 0.8125rem;\n        color: #4b5563;\n        text-align: center;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"terminal\">\n      <div class=\"titlebar\">\n        <div class=\"btn btn-red\"></div>\n        <div class=\"btn btn-yellow\"></div>\n        <div class=\"btn btn-green\"></div>\n        <span class=\"title-label\">crafting_table</span>\n      </div>\n      <div class=\"body\">\n        <span class=\"prompt\">/&gt;</span>\n        <span>Sandbox is asleep...</span>\n      </div>\n    </div>\n    <p class=\"tagline\">\n      Ask the owner to open their Craft session to wake it up.\n    </p>\n  </body>\n</html>\n"
  },
  {
    "path": "backend/onyx/server/features/build/api/user_library.py",
    "content": "\"\"\"API endpoints for User Library file management in Craft.\n\nThis module provides endpoints for uploading and managing raw binary files\n(xlsx, pptx, docx, csv, etc.) that are stored directly in S3 for sandbox access.\n\nFiles are stored at:\n    s3://{bucket}/{tenant_id}/knowledge/{user_id}/user_library/{path}\n\nAnd synced to sandbox at:\n    /workspace/files/user_library/{path}\n\nKnown Issues / TODOs:\n    - Memory: Upload endpoints read entire file content into memory (up to 500MB).\n      Should be refactored to stream uploads directly to S3 via multipart upload\n      for better memory efficiency under concurrent load.\n    - Transaction safety: Multi-file uploads are not atomic. If the endpoint fails\n      mid-batch (e.g., file 3 of 5 exceeds storage quota), files 1-2 are already\n      persisted to S3 and DB. A partial upload is not catastrophic but the response\n      implies atomicity that doesn't exist.\n\"\"\"\n\nimport hashlib\nimport mimetypes\nimport re\nimport zipfile\nfrom datetime import datetime\nfrom datetime import timezone\nfrom io import BytesIO\nfrom typing import Any\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import Form\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import UploadFile\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.background.celery.versioned_apps.client import app as celery_app\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.connector_credential_pair import update_connector_credential_pair\nfrom onyx.db.document import upsert_document_by_connector_credential_pair\nfrom onyx.db.document import upsert_documents\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import User\nfrom onyx.document_index.interfaces import DocumentMetadata\nfrom onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES\nfrom onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD\nfrom onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES\nfrom onyx.server.features.build.configs import USER_LIBRARY_SOURCE_DIR\nfrom onyx.server.features.build.db.user_library import get_or_create_craft_connector\nfrom onyx.server.features.build.db.user_library import get_user_storage_bytes\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    get_persistent_document_writer,\n)\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    PersistentDocumentWriter,\n)\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    S3PersistentDocumentWriter,\n)\nfrom onyx.server.features.build.utils import sanitize_filename as api_sanitize_filename\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/user-library\")\n\n\n# =============================================================================\n# Pydantic Models\n# =============================================================================\n\n\nclass LibraryEntryResponse(BaseModel):\n    \"\"\"Response for a single library entry (file or directory).\"\"\"\n\n    id: str  # document_id\n    name: str\n    path: str\n    is_directory: bool\n    file_size: int | None\n    mime_type: str | None\n    sync_enabled: bool\n    created_at: datetime\n    children: list[\"LibraryEntryResponse\"] | None = None\n\n\nclass CreateDirectoryRequest(BaseModel):\n    \"\"\"Request to create a virtual directory.\"\"\"\n\n    name: str\n    parent_path: str = \"/\"\n\n\nclass UploadResponse(BaseModel):\n    \"\"\"Response after successful file upload.\"\"\"\n\n    entries: list[LibraryEntryResponse]\n    total_uploaded: int\n    total_size_bytes: int\n\n\nclass ToggleSyncResponse(BaseModel):\n    \"\"\"Response after toggling file sync.\"\"\"\n\n    success: bool\n    sync_enabled: bool\n\n\nclass DeleteFileResponse(BaseModel):\n    \"\"\"Response after deleting a file.\"\"\"\n\n    success: bool\n    deleted: str\n\n\n# =============================================================================\n# Helper Functions\n# =============================================================================\n\n\ndef _sanitize_path(path: str) -> str:\n    \"\"\"Sanitize a file path, removing traversal attempts and normalizing.\n\n    Removes '..' and '.' segments and ensures the path starts with '/'.\n    Only allows alphanumeric characters, hyphens, underscores, dots, spaces,\n    and forward slashes. All other characters are stripped.\n    \"\"\"\n    parts = path.split(\"/\")\n    sanitized_parts: list[str] = []\n    for p in parts:\n        if not p or p == \"..\" or p == \".\":\n            continue\n        # Strip any character not in the whitelist\n        cleaned = re.sub(r\"[^a-zA-Z0-9\\-_. ]\", \"\", p)\n        if cleaned:\n            sanitized_parts.append(cleaned)\n    return \"/\" + \"/\".join(sanitized_parts)\n\n\ndef _build_document_id(user_id: str, path: str) -> str:\n    \"\"\"Build a document ID for a craft file.\n\n    Deterministic: re-uploading the same file to the same path will produce the\n    same document ID, allowing upsert to overwrite the previous record.\n\n    Uses a hash of the path to avoid collisions from separator replacement\n    (e.g., \"/a/b_c\" vs \"/a_b/c\" would collide with naive slash-to-underscore).\n    \"\"\"\n    path_hash = hashlib.sha256(path.encode()).hexdigest()[:16]\n    return f\"CRAFT_FILE__{user_id}__{path_hash}\"\n\n\ndef _trigger_sandbox_sync(\n    user_id: str, tenant_id: str, source: str | None = None\n) -> None:\n    \"\"\"Trigger sandbox file sync task.\n\n    Args:\n        user_id: The user ID whose sandbox should be synced\n        tenant_id: The tenant ID for S3 path construction\n        source: Optional source type (e.g., \"user_library\"). If specified,\n                only syncs that source's directory with --delete flag.\n    \"\"\"\n    celery_app.send_task(\n        OnyxCeleryTask.SANDBOX_FILE_SYNC,\n        kwargs={\"user_id\": user_id, \"tenant_id\": tenant_id, \"source\": source},\n        queue=OnyxCeleryQueues.SANDBOX,\n    )\n\n\ndef _validate_zip_contents(\n    zip_file: zipfile.ZipFile,\n    existing_usage: int,\n) -> None:\n    \"\"\"Validate zip file contents before extraction.\n\n    Checks file count limit and total decompressed size against storage quota.\n    Raises HTTPException on validation failure.\n    \"\"\"\n    if len(zip_file.namelist()) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Zip contains too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD}.\",\n        )\n\n    # Zip bomb protection: check total decompressed size before extracting\n    declared_total = sum(\n        info.file_size for info in zip_file.infolist() if not info.is_dir()\n    )\n    if existing_usage + declared_total > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                f\"Zip decompressed size ({declared_total // (1024 * 1024)}MB) would exceed storage limit.\"\n            ),\n        )\n\n\ndef _verify_ownership_and_get_document(\n    document_id: str,\n    user: User,\n    db_session: Session,\n) -> Any:\n    \"\"\"Verify the user owns the document and return it.\n\n    Raises HTTPException on authorization failure or if document not found.\n    \"\"\"\n    from onyx.db.document import get_document\n\n    user_prefix = f\"CRAFT_FILE__{user.id}__\"\n    if not document_id.startswith(user_prefix):\n        raise HTTPException(\n            status_code=403, detail=\"Not authorized to modify this file\"\n        )\n\n    doc = get_document(document_id, db_session)\n    if doc is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    return doc\n\n\ndef _store_and_track_file(\n    *,\n    writer: \"PersistentDocumentWriter | S3PersistentDocumentWriter\",\n    file_path: str,\n    content: bytes,\n    content_type: str | None,\n    user_id: str,\n    connector_id: int,\n    credential_id: int,\n    db_session: Session,\n) -> tuple[str, str]:\n    \"\"\"Write a file to storage and upsert its document record.\n\n    Returns:\n        Tuple of (document_id, storage_key)\n    \"\"\"\n    storage_key = writer.write_raw_file(\n        path=file_path,\n        content=content,\n        content_type=content_type,\n    )\n\n    doc_id = _build_document_id(user_id, file_path)\n    doc_metadata = DocumentMetadata(\n        connector_id=connector_id,\n        credential_id=credential_id,\n        document_id=doc_id,\n        semantic_identifier=f\"{USER_LIBRARY_SOURCE_DIR}{file_path}\",\n        first_link=storage_key,\n        doc_metadata={\n            \"storage_key\": storage_key,\n            \"file_path\": file_path,\n            \"file_size\": len(content),\n            \"mime_type\": content_type,\n            \"is_directory\": False,\n        },\n    )\n    upsert_documents(db_session, [doc_metadata])\n    upsert_document_by_connector_credential_pair(\n        db_session, connector_id, credential_id, [doc_id]\n    )\n\n    return doc_id, storage_key\n\n\n# =============================================================================\n# API Endpoints\n# =============================================================================\n\n\n@router.get(\"/tree\")\ndef get_library_tree(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[LibraryEntryResponse]:\n    \"\"\"Get user's uploaded files as a tree structure.\n\n    Returns all CRAFT_FILE documents for the user, organized hierarchically.\n    \"\"\"\n    from onyx.db.document import get_documents_by_source\n\n    # Get CRAFT_FILE documents for this user (filtered at SQL level)\n    user_docs = get_documents_by_source(\n        db_session=db_session,\n        source=DocumentSource.CRAFT_FILE,\n        creator_id=user.id,\n    )\n\n    # Build tree structure\n    entries: list[LibraryEntryResponse] = []\n    now = datetime.now(timezone.utc)\n    for doc in user_docs:\n        doc_metadata = doc.doc_metadata or {}\n        entries.append(\n            LibraryEntryResponse(\n                id=doc.id,\n                name=doc.semantic_id.split(\"/\")[-1] if doc.semantic_id else \"unknown\",\n                path=doc.semantic_id or \"\",\n                is_directory=doc_metadata.get(\"is_directory\", False),\n                file_size=doc_metadata.get(\"file_size\"),\n                mime_type=doc_metadata.get(\"mime_type\"),\n                sync_enabled=not doc_metadata.get(\"sync_disabled\", False),\n                created_at=doc.last_modified or now,\n            )\n        )\n\n    return entries\n\n\n@router.post(\"/upload\")\nasync def upload_files(\n    files: list[UploadFile] = File(...),\n    path: str = Form(\"/\"),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UploadResponse:\n    \"\"\"Upload files directly to S3 and track in PostgreSQL.\n\n    Files are stored as raw binary (no text extraction) for access by\n    the sandbox agent using Python libraries like openpyxl, python-pptx, etc.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    if tenant_id is None:\n        raise HTTPException(status_code=500, detail=\"Tenant ID not found\")\n\n    # Validate file count\n    if len(files) > USER_LIBRARY_MAX_FILES_PER_UPLOAD:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Too many files. Maximum is {USER_LIBRARY_MAX_FILES_PER_UPLOAD} per upload.\",\n        )\n\n    # Check cumulative storage usage\n    existing_usage = get_user_storage_bytes(db_session, user.id)\n\n    # Get or create connector\n    connector_id, credential_id = get_or_create_craft_connector(db_session, user)\n\n    # Get the persistent document writer\n    writer = get_persistent_document_writer(\n        user_id=str(user.id),\n        tenant_id=tenant_id,\n    )\n\n    uploaded_entries: list[LibraryEntryResponse] = []\n    total_size = 0\n    now = datetime.now(timezone.utc)\n\n    # Sanitize the base path\n    base_path = _sanitize_path(path)\n\n    for file in files:\n        # TODO: Stream directly to S3 via multipart upload instead of reading\n        # entire file into memory. With 500MB max file size, this can OOM under\n        # concurrent uploads.\n        content = await file.read()\n        file_size = len(content)\n\n        # Validate individual file size\n        if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:\n            raise HTTPException(\n                status_code=400,\n                detail=f\"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB\",\n            )\n\n        # Validate cumulative storage (existing + this upload batch)\n        total_size += file_size\n        if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB\",\n            )\n\n        # Sanitize filename\n        safe_filename = api_sanitize_filename(file.filename or \"unnamed\")\n        file_path = f\"{base_path}/{safe_filename}\".replace(\"//\", \"/\")\n\n        doc_id, _ = _store_and_track_file(\n            writer=writer,\n            file_path=file_path,\n            content=content,\n            content_type=file.content_type,\n            user_id=str(user.id),\n            connector_id=connector_id,\n            credential_id=credential_id,\n            db_session=db_session,\n        )\n\n        uploaded_entries.append(\n            LibraryEntryResponse(\n                id=doc_id,\n                name=safe_filename,\n                path=file_path,\n                is_directory=False,\n                file_size=file_size,\n                mime_type=file.content_type,\n                sync_enabled=True,\n                created_at=now,\n            )\n        )\n\n    # Mark connector as having succeeded (sets last_successful_index_time)\n    # This allows the demo data toggle to be disabled\n    update_connector_credential_pair(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        net_docs=len(uploaded_entries),\n        run_dt=now,\n    )\n\n    # Trigger sandbox sync for user_library source only\n    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)\n\n    logger.info(\n        f\"Uploaded {len(uploaded_entries)} files ({total_size} bytes) for user {user.id}\"\n    )\n\n    return UploadResponse(\n        entries=uploaded_entries,\n        total_uploaded=len(uploaded_entries),\n        total_size_bytes=total_size,\n    )\n\n\n@router.post(\"/upload-zip\")\nasync def upload_zip(\n    file: UploadFile = File(...),\n    path: str = Form(\"/\"),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UploadResponse:\n    \"\"\"Upload and extract a zip file, storing each extracted file to S3.\n\n    Preserves the directory structure from the zip file.\n    \"\"\"\n    tenant_id = get_current_tenant_id()\n    if tenant_id is None:\n        raise HTTPException(status_code=500, detail=\"Tenant ID not found\")\n\n    # Read zip content\n    content = await file.read()\n    if len(content) > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Zip file exceeds maximum size of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB\",\n        )\n\n    # Check cumulative storage usage\n    existing_usage = get_user_storage_bytes(db_session, user.id)\n\n    # Get or create connector\n    connector_id, credential_id = get_or_create_craft_connector(db_session, user)\n\n    # Get the persistent document writer\n    writer = get_persistent_document_writer(\n        user_id=str(user.id),\n        tenant_id=tenant_id,\n    )\n\n    uploaded_entries: list[LibraryEntryResponse] = []\n    total_size = 0\n\n    # Extract zip contents into a subfolder named after the zip file\n    zip_name = api_sanitize_filename(file.filename or \"upload\")\n    if zip_name.lower().endswith(\".zip\"):\n        zip_name = zip_name[:-4]\n    folder_path = f\"{_sanitize_path(path)}/{zip_name}\".replace(\"//\", \"/\")\n    base_path = folder_path\n\n    now = datetime.now(timezone.utc)\n\n    # Track all directory paths we need to create records for\n    directory_paths: set[str] = set()\n\n    try:\n        with zipfile.ZipFile(BytesIO(content), \"r\") as zip_file:\n            _validate_zip_contents(zip_file, existing_usage)\n\n            for zip_info in zip_file.infolist():\n                # Skip hidden files and __MACOSX\n                if (\n                    zip_info.filename.startswith(\"__MACOSX\")\n                    or \"/.\" in zip_info.filename\n                ):\n                    continue\n\n                # Skip directories - we'll create records from file paths below\n                if zip_info.is_dir():\n                    continue\n\n                # Read file content\n                file_content = zip_file.read(zip_info.filename)\n                file_size = len(file_content)\n\n                # Validate individual file size\n                if file_size > USER_LIBRARY_MAX_FILE_SIZE_BYTES:\n                    logger.warning(f\"Skipping '{zip_info.filename}' - exceeds max size\")\n                    continue\n\n                total_size += file_size\n\n                # Validate cumulative storage\n                if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:\n                    raise HTTPException(\n                        status_code=400,\n                        detail=f\"Total storage would exceed maximum of {USER_LIBRARY_MAX_TOTAL_SIZE_BYTES // (1024 * 1024 * 1024)}GB\",\n                    )\n\n                # Build path preserving zip structure\n                sanitized_zip_path = _sanitize_path(zip_info.filename)\n                file_path = f\"{base_path}{sanitized_zip_path}\".replace(\"//\", \"/\")\n                file_name = file_path.split(\"/\")[-1]\n\n                # Collect all intermediate directories for this file\n                parts = file_path.split(\"/\")\n                for i in range(\n                    2, len(parts)\n                ):  # start at 2 to skip empty + first segment\n                    directory_paths.add(\"/\".join(parts[:i]))\n\n                # Guess content type\n                content_type, _ = mimetypes.guess_type(file_name)\n\n                doc_id, _ = _store_and_track_file(\n                    writer=writer,\n                    file_path=file_path,\n                    content=file_content,\n                    content_type=content_type,\n                    user_id=str(user.id),\n                    connector_id=connector_id,\n                    credential_id=credential_id,\n                    db_session=db_session,\n                )\n\n                uploaded_entries.append(\n                    LibraryEntryResponse(\n                        id=doc_id,\n                        name=file_name,\n                        path=file_path,\n                        is_directory=False,\n                        file_size=file_size,\n                        mime_type=content_type,\n                        sync_enabled=True,\n                        created_at=now,\n                    )\n                )\n\n    except zipfile.BadZipFile:\n        raise HTTPException(status_code=400, detail=\"Invalid zip file\")\n\n    # Create directory document records so they appear in the tree view\n    if directory_paths:\n        dir_doc_ids: list[str] = []\n        for dir_path in sorted(directory_paths):\n            dir_doc_id = _build_document_id(str(user.id), dir_path)\n            dir_doc_ids.append(dir_doc_id)\n            dir_metadata = DocumentMetadata(\n                connector_id=connector_id,\n                credential_id=credential_id,\n                document_id=dir_doc_id,\n                semantic_identifier=f\"{USER_LIBRARY_SOURCE_DIR}{dir_path}\",\n                first_link=\"\",\n                doc_metadata={\"is_directory\": True},\n            )\n            upsert_documents(db_session, [dir_metadata])\n        upsert_document_by_connector_credential_pair(\n            db_session, connector_id, credential_id, dir_doc_ids\n        )\n\n    # Mark connector as having succeeded (sets last_successful_index_time)\n    # This allows the demo data toggle to be disabled\n    update_connector_credential_pair(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        net_docs=len(uploaded_entries),\n        run_dt=now,\n    )\n\n    # Trigger sandbox sync for user_library source only\n    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)\n\n    logger.info(\n        f\"Extracted {len(uploaded_entries)} files ({total_size} bytes) from zip for user {user.id}\"\n    )\n\n    return UploadResponse(\n        entries=uploaded_entries,\n        total_uploaded=len(uploaded_entries),\n        total_size_bytes=total_size,\n    )\n\n\n@router.post(\"/directories\")\ndef create_directory(\n    request: CreateDirectoryRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> LibraryEntryResponse:\n    \"\"\"Create a virtual directory.\n\n    Directories are tracked as documents with is_directory=True.\n    No S3 object is created (S3 doesn't have real directories).\n    \"\"\"\n    # Get or create connector\n    connector_id, credential_id = get_or_create_craft_connector(db_session, user)\n\n    # Build path\n    parent_path = _sanitize_path(request.parent_path)\n    safe_name = api_sanitize_filename(request.name)\n    dir_path = f\"{parent_path}/{safe_name}\".replace(\"//\", \"/\")\n\n    # Track in document table\n    doc_id = _build_document_id(str(user.id), dir_path)\n    doc_metadata = DocumentMetadata(\n        connector_id=connector_id,\n        credential_id=credential_id,\n        document_id=doc_id,\n        semantic_identifier=f\"{USER_LIBRARY_SOURCE_DIR}{dir_path}\",\n        first_link=\"\",\n        doc_metadata={\n            \"is_directory\": True,\n        },\n    )\n    upsert_documents(db_session, [doc_metadata])\n    upsert_document_by_connector_credential_pair(\n        db_session, connector_id, credential_id, [doc_id]\n    )\n    db_session.commit()\n\n    return LibraryEntryResponse(\n        id=doc_id,\n        name=safe_name,\n        path=dir_path,\n        is_directory=True,\n        file_size=None,\n        mime_type=None,\n        sync_enabled=True,\n        created_at=datetime.now(timezone.utc),\n    )\n\n\n@router.patch(\"/files/{document_id}/toggle\")\ndef toggle_file_sync(\n    document_id: str,\n    enabled: bool = Query(...),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ToggleSyncResponse:\n    \"\"\"Enable/disable syncing a file to sandboxes.\n\n    When sync is disabled, the file's metadata is updated with sync_disabled=True.\n    The sandbox sync task will exclude these files when syncing to the sandbox.\n\n    If the item is a directory, all children are also toggled.\n    \"\"\"\n    from onyx.db.document import get_documents_by_source\n    from onyx.db.document import update_document_metadata__no_commit\n\n    tenant_id = get_current_tenant_id()\n    if tenant_id is None:\n        raise HTTPException(status_code=500, detail=\"Tenant ID not found\")\n\n    doc = _verify_ownership_and_get_document(document_id, user, db_session)\n\n    # Update metadata for this document\n    new_metadata = dict(doc.doc_metadata or {})\n    new_metadata[\"sync_disabled\"] = not enabled\n    update_document_metadata__no_commit(db_session, document_id, new_metadata)\n\n    # If this is a directory, also toggle all children\n    doc_metadata = doc.doc_metadata or {}\n    if doc_metadata.get(\"is_directory\"):\n        folder_path = doc.semantic_id\n        if folder_path:\n            all_docs = get_documents_by_source(\n                db_session=db_session,\n                source=DocumentSource.CRAFT_FILE,\n                creator_id=user.id,\n            )\n            for child_doc in all_docs:\n                if child_doc.semantic_id and child_doc.semantic_id.startswith(\n                    folder_path + \"/\"\n                ):\n                    child_metadata = dict(child_doc.doc_metadata or {})\n                    child_metadata[\"sync_disabled\"] = not enabled\n                    update_document_metadata__no_commit(\n                        db_session, child_doc.id, child_metadata\n                    )\n\n    db_session.commit()\n\n    return ToggleSyncResponse(success=True, sync_enabled=enabled)\n\n\n@router.delete(\"/files/{document_id}\")\ndef delete_file(\n    document_id: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> DeleteFileResponse:\n    \"\"\"Delete a file from both S3 and the document table.\"\"\"\n    from onyx.db.document import delete_document_by_id__no_commit\n\n    tenant_id = get_current_tenant_id()\n    if tenant_id is None:\n        raise HTTPException(status_code=500, detail=\"Tenant ID not found\")\n\n    doc = _verify_ownership_and_get_document(document_id, user, db_session)\n\n    # Delete from storage if it's a file (not directory)\n    doc_metadata = doc.doc_metadata or {}\n    if not doc_metadata.get(\"is_directory\"):\n        file_path = doc_metadata.get(\"file_path\")\n        if file_path:\n            writer = get_persistent_document_writer(\n                user_id=str(user.id),\n                tenant_id=tenant_id,\n            )\n            try:\n                if isinstance(writer, S3PersistentDocumentWriter):\n                    writer.delete_raw_file_by_path(file_path)\n                else:\n                    writer.delete_raw_file(file_path)\n            except Exception as e:\n                logger.warning(f\"Failed to delete file at path {file_path}: {e}\")\n        else:\n            # Fallback for documents created before file_path was stored\n            storage_key = doc_metadata.get(\"storage_key\") or doc_metadata.get(\"s3_key\")\n            if storage_key:\n                writer = get_persistent_document_writer(\n                    user_id=str(user.id),\n                    tenant_id=tenant_id,\n                )\n                try:\n                    if isinstance(writer, S3PersistentDocumentWriter):\n                        writer.delete_raw_file(storage_key)\n                    else:\n                        logger.warning(\n                            f\"Cannot delete file in local mode without file_path: {document_id}\"\n                        )\n                except Exception as e:\n                    logger.warning(\n                        f\"Failed to delete storage object {storage_key}: {e}\"\n                    )\n\n    # Delete from document table\n    delete_document_by_id__no_commit(db_session, document_id)\n    db_session.commit()\n\n    # Trigger sync to apply changes\n    _trigger_sandbox_sync(str(user.id), tenant_id, source=USER_LIBRARY_SOURCE_DIR)\n\n    return DeleteFileResponse(success=True, deleted=document_id)\n"
  },
  {
    "path": "backend/onyx/server/features/build/configs.py",
    "content": "import os\nfrom enum import Enum\nfrom pathlib import Path\n\n\nclass SandboxBackend(str, Enum):\n    \"\"\"Backend mode for sandbox operations.\n\n    LOCAL: Development mode - no snapshots, no automatic cleanup\n    KUBERNETES: Production mode - full snapshots and cleanup\n    \"\"\"\n\n    LOCAL = \"local\"\n    KUBERNETES = \"kubernetes\"\n\n\n# Sandbox backend mode (controls snapshot and cleanup behavior)\n# \"local\" = no snapshots, no cleanup (for development)\n# \"kubernetes\" = full snapshots and cleanup (for production)\nSANDBOX_BACKEND = SandboxBackend(os.environ.get(\"SANDBOX_BACKEND\", \"local\"))\n\n# Base directory path for persistent document storage (local filesystem)\n# Example: /var/onyx/file-system or /app/file-system\nPERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(\n    \"PERSISTENT_DOCUMENT_STORAGE_PATH\", \"/app/file-system\"\n)\n\n# Demo Data Path\n# Local: Source tree path (relative to this file)\n# Kubernetes: Baked into container image at /workspace/demo_data\n_THIS_FILE = Path(__file__)\nDEMO_DATA_PATH = str(\n    _THIS_FILE.parent / \"sandbox\" / \"kubernetes\" / \"docker\" / \"demo_data\"\n)\n\n# Sandbox filesystem paths\nSANDBOX_BASE_PATH = os.environ.get(\"SANDBOX_BASE_PATH\", \"/tmp/onyx-sandboxes\")\nOUTPUTS_TEMPLATE_PATH = os.environ.get(\"OUTPUTS_TEMPLATE_PATH\", \"/templates/outputs\")\nVENV_TEMPLATE_PATH = os.environ.get(\"VENV_TEMPLATE_PATH\", \"/templates/venv\")\n\n# Sandbox agent configuration\nSANDBOX_AGENT_COMMAND = os.environ.get(\"SANDBOX_AGENT_COMMAND\", \"opencode\").split()\n\n# OpenCode disabled tools (comma-separated list)\n# Available tools: bash, edit, write, read, grep, glob, list, lsp, patch,\n#                  skill, todowrite, todoread, webfetch, question\n# Example: \"question,webfetch\" to disable user questions and web fetching\n_disabled_tools_str = os.environ.get(\"OPENCODE_DISABLED_TOOLS\", \"question\")\nOPENCODE_DISABLED_TOOLS: list[str] = [\n    t.strip() for t in _disabled_tools_str.split(\",\") if t.strip()\n]\n\n# Sandbox lifecycle configuration\nSANDBOX_IDLE_TIMEOUT_SECONDS = int(\n    os.environ.get(\"SANDBOX_IDLE_TIMEOUT_SECONDS\", \"3600\")\n)\nSANDBOX_MAX_CONCURRENT_PER_ORG = int(\n    os.environ.get(\"SANDBOX_MAX_CONCURRENT_PER_ORG\", \"10\")\n)\n\n# Sandbox snapshot storage\nSANDBOX_SNAPSHOTS_BUCKET = os.environ.get(\n    \"SANDBOX_SNAPSHOTS_BUCKET\", \"sandbox-snapshots\"\n)\n\n# Next.js preview server port range\nSANDBOX_NEXTJS_PORT_START = int(os.environ.get(\"SANDBOX_NEXTJS_PORT_START\", \"3010\"))\nSANDBOX_NEXTJS_PORT_END = int(os.environ.get(\"SANDBOX_NEXTJS_PORT_END\", \"3100\"))\n\n# File upload configuration\nMAX_UPLOAD_FILE_SIZE_MB = int(os.environ.get(\"BUILD_MAX_UPLOAD_FILE_SIZE_MB\", \"50\"))\nMAX_UPLOAD_FILE_SIZE_BYTES = MAX_UPLOAD_FILE_SIZE_MB * 1024 * 1024\nMAX_UPLOAD_FILES_PER_SESSION = int(\n    os.environ.get(\"BUILD_MAX_UPLOAD_FILES_PER_SESSION\", \"20\")\n)\nMAX_TOTAL_UPLOAD_SIZE_MB = int(os.environ.get(\"BUILD_MAX_TOTAL_UPLOAD_SIZE_MB\", \"200\"))\nMAX_TOTAL_UPLOAD_SIZE_BYTES = MAX_TOTAL_UPLOAD_SIZE_MB * 1024 * 1024\nATTACHMENTS_DIRECTORY = \"attachments\"\n\n# ============================================================================\n# Kubernetes Sandbox Configuration\n# Only used when SANDBOX_BACKEND = \"kubernetes\"\n# ============================================================================\n\n# Namespace where sandbox pods are created\nSANDBOX_NAMESPACE = os.environ.get(\"SANDBOX_NAMESPACE\", \"onyx-sandboxes\")\n\n# Container image for sandbox pods\n# Should include Next.js template, opencode CLI, and demo_data zip\nSANDBOX_CONTAINER_IMAGE = os.environ.get(\n    \"SANDBOX_CONTAINER_IMAGE\", \"onyxdotapp/sandbox:v0.1.5\"\n)\n\n# S3 bucket for sandbox file storage (snapshots, knowledge files, uploads)\n# Path structure: s3://{bucket}/{tenant_id}/snapshots/{session_id}/{snapshot_id}.tar.gz\n#                 s3://{bucket}/{tenant_id}/knowledge/{user_id}/\n#                 s3://{bucket}/{tenant_id}/uploads/{session_id}/\nSANDBOX_S3_BUCKET = os.environ.get(\"SANDBOX_S3_BUCKET\", \"onyx-sandbox-files\")\n\n# Service account for sandbox pods (NO IRSA - no AWS API access)\nSANDBOX_SERVICE_ACCOUNT_NAME = os.environ.get(\n    \"SANDBOX_SERVICE_ACCOUNT_NAME\", \"sandbox-runner\"\n)\n\n# Service account for init container (has IRSA for S3 access)\nSANDBOX_FILE_SYNC_SERVICE_ACCOUNT = os.environ.get(\n    \"SANDBOX_FILE_SYNC_SERVICE_ACCOUNT\", \"sandbox-file-sync\"\n)\n\nENABLE_CRAFT = os.environ.get(\"ENABLE_CRAFT\", \"false\").lower() == \"true\"\n\n# ============================================================================\n# SSE Streaming Configuration\n# ============================================================================\n\n# SSE keepalive interval in seconds - send keepalive comment if no events\nSSE_KEEPALIVE_INTERVAL = float(os.environ.get(\"SSE_KEEPALIVE_INTERVAL\", \"15.0\"))\n\n# ============================================================================\n# ACP (Agent Communication Protocol) Configuration\n# ============================================================================\n\n# Timeout for ACP message processing in seconds\n# This is the maximum time to wait for a complete response from the agent\nACP_MESSAGE_TIMEOUT = float(os.environ.get(\"ACP_MESSAGE_TIMEOUT\", \"900.0\"))\n\n# ============================================================================\n# Rate Limiting Configuration\n# ============================================================================\n\n# Base rate limit for paid/subscribed users (messages per week)\n# Free users always get 5 messages total (not configurable)\n# Per-user overrides are managed via PostHog feature flag \"craft-has-usage-limits\"\nCRAFT_PAID_USER_RATE_LIMIT = int(os.environ.get(\"CRAFT_PAID_USER_RATE_LIMIT\", \"25\"))\n\n# ============================================================================\n# User Library Configuration\n# For user-uploaded raw files (xlsx, pptx, docx, etc.) in Craft\n# ============================================================================\n\n# Maximum size per file in MB (default 500MB)\nUSER_LIBRARY_MAX_FILE_SIZE_MB = int(\n    os.environ.get(\"USER_LIBRARY_MAX_FILE_SIZE_MB\", \"500\")\n)\nUSER_LIBRARY_MAX_FILE_SIZE_BYTES = USER_LIBRARY_MAX_FILE_SIZE_MB * 1024 * 1024\n\n# Maximum total storage per user in GB (default 10GB)\nUSER_LIBRARY_MAX_TOTAL_SIZE_GB = int(\n    os.environ.get(\"USER_LIBRARY_MAX_TOTAL_SIZE_GB\", \"10\")\n)\nUSER_LIBRARY_MAX_TOTAL_SIZE_BYTES = USER_LIBRARY_MAX_TOTAL_SIZE_GB * 1024 * 1024 * 1024\n\n# Maximum files per single upload request (default 100)\nUSER_LIBRARY_MAX_FILES_PER_UPLOAD = int(\n    os.environ.get(\"USER_LIBRARY_MAX_FILES_PER_UPLOAD\", \"100\")\n)\n\n# String constants for User Library entities\nUSER_LIBRARY_CONNECTOR_NAME = \"User Library\"\nUSER_LIBRARY_CREDENTIAL_NAME = \"User Library Credential\"\nUSER_LIBRARY_SOURCE_DIR = \"user_library\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/db/__init__.py",
    "content": "# Database operations for the build feature\n"
  },
  {
    "path": "backend/onyx/server/features/build/db/build_session.py",
    "content": "\"\"\"Database operations for Build Mode sessions.\"\"\"\n\nfrom datetime import datetime\nfrom typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import desc\nfrom sqlalchemy import exists\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import selectinload\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.enums import SharingScope\nfrom onyx.db.models import Artifact\nfrom onyx.db.models import BuildMessage\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import LLMProvider as LLMProviderModel\nfrom onyx.db.models import Sandbox\nfrom onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_END\nfrom onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef create_build_session__no_commit(\n    user_id: UUID,\n    db_session: Session,\n    name: str | None = None,\n    demo_data_enabled: bool = True,\n) -> BuildSession:\n    \"\"\"Create a new build session for the given user.\n\n    NOTE: This function uses flush() instead of commit(). The caller is\n    responsible for committing the transaction when ready.\n\n    Args:\n        user_id: The user ID\n        db_session: Database session\n        name: Optional session name\n        demo_data_enabled: Whether this session uses demo data (default True)\n    \"\"\"\n    session = BuildSession(\n        user_id=user_id,\n        name=name,\n        status=BuildSessionStatus.ACTIVE,\n        demo_data_enabled=demo_data_enabled,\n    )\n    db_session.add(session)\n    db_session.flush()\n\n    logger.info(\n        f\"Created build session {session.id} for user {user_id} (demo_data={demo_data_enabled})\"\n    )\n    return session\n\n\ndef get_build_session(\n    session_id: UUID,\n    user_id: UUID,\n    db_session: Session,\n) -> BuildSession | None:\n    \"\"\"Get a build session by ID, ensuring it belongs to the user.\"\"\"\n    return (\n        db_session.query(BuildSession)\n        .filter(\n            BuildSession.id == session_id,\n            BuildSession.user_id == user_id,\n        )\n        .one_or_none()\n    )\n\n\ndef get_user_build_sessions(\n    user_id: UUID,\n    db_session: Session,\n    limit: int = 100,\n) -> list[BuildSession]:\n    \"\"\"Get all build sessions for a user that have at least one message.\n\n    Excludes empty (pre-provisioned) sessions from the listing.\n    \"\"\"\n    # Subquery to check if session has any messages\n    has_messages = exists().where(BuildMessage.session_id == BuildSession.id)\n\n    return (\n        db_session.query(BuildSession)\n        .filter(\n            BuildSession.user_id == user_id,\n            has_messages,  # Only sessions with messages\n        )\n        .order_by(desc(BuildSession.created_at))\n        .limit(limit)\n        .all()\n    )\n\n\ndef get_empty_session_for_user(\n    user_id: UUID,\n    db_session: Session,\n    demo_data_enabled: bool | None = None,\n) -> BuildSession | None:\n    \"\"\"Get an empty (pre-provisioned) session for the user if one exists.\n\n    Returns a session with no messages, or None if all sessions have messages.\n\n    Args:\n        user_id: The user ID\n        db_session: Database session\n        demo_data_enabled: Match sessions with this demo_data setting.\n                          If None, matches any session regardless of setting.\n    \"\"\"\n    # Subquery to check if session has any messages\n    has_messages = exists().where(BuildMessage.session_id == BuildSession.id)\n\n    query = db_session.query(BuildSession).filter(\n        BuildSession.user_id == user_id,\n        ~has_messages,  # Sessions with no messages only\n    )\n\n    if demo_data_enabled is not None:\n        query = query.filter(BuildSession.demo_data_enabled == demo_data_enabled)\n\n    return query.first()\n\n\ndef update_session_activity(\n    session_id: UUID,\n    db_session: Session,\n) -> None:\n    \"\"\"Update the last activity timestamp for a session.\"\"\"\n    session = (\n        db_session.query(BuildSession)\n        .filter(BuildSession.id == session_id)\n        .one_or_none()\n    )\n    if session:\n        session.last_activity_at = datetime.utcnow()\n        db_session.commit()\n\n\ndef update_session_status(\n    session_id: UUID,\n    status: BuildSessionStatus,\n    db_session: Session,\n) -> None:\n    \"\"\"Update the status of a build session.\"\"\"\n    session = (\n        db_session.query(BuildSession)\n        .filter(BuildSession.id == session_id)\n        .one_or_none()\n    )\n    if session:\n        session.status = status\n        db_session.commit()\n        logger.info(f\"Updated build session {session_id} status to {status}\")\n\n\ndef set_build_session_sharing_scope(\n    session_id: UUID,\n    user_id: UUID,\n    sharing_scope: SharingScope,\n    db_session: Session,\n) -> BuildSession | None:\n    \"\"\"Set the sharing scope of a build session.\n\n    Only the session owner can change this setting.\n    Returns the updated session, or None if not found/unauthorized.\n    \"\"\"\n    session = get_build_session(session_id, user_id, db_session)\n    if not session:\n        return None\n    session.sharing_scope = sharing_scope\n    db_session.commit()\n    logger.info(f\"Set build session {session_id} sharing_scope={sharing_scope}\")\n    return session\n\n\ndef delete_build_session__no_commit(\n    session_id: UUID,\n    user_id: UUID,\n    db_session: Session,\n) -> bool:\n    \"\"\"Delete a build session and all related data.\n\n    NOTE: This function uses flush() instead of commit(). The caller is\n    responsible for committing the transaction when ready.\n    \"\"\"\n    session = get_build_session(session_id, user_id, db_session)\n    if not session:\n        return False\n\n    db_session.delete(session)\n    db_session.flush()\n    logger.info(f\"Deleted build session {session_id}\")\n    return True\n\n\n# Sandbox operations\n# NOTE: Most sandbox operations have moved to sandbox.py\n# These remain here for convenience in session-related workflows\n\n\ndef update_sandbox_status(\n    sandbox_id: UUID,\n    status: SandboxStatus,\n    db_session: Session,\n    container_id: str | None = None,\n) -> None:\n    \"\"\"Update the status of a sandbox.\"\"\"\n    sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()\n    if sandbox:\n        sandbox.status = status\n        if container_id is not None:\n            sandbox.container_id = container_id\n        sandbox.last_heartbeat = datetime.utcnow()\n        db_session.commit()\n        logger.info(f\"Updated sandbox {sandbox_id} status to {status}\")\n\n\ndef update_sandbox_heartbeat(\n    sandbox_id: UUID,\n    db_session: Session,\n) -> None:\n    \"\"\"Update the heartbeat timestamp for a sandbox.\"\"\"\n    sandbox = db_session.query(Sandbox).filter(Sandbox.id == sandbox_id).one_or_none()\n    if sandbox:\n        sandbox.last_heartbeat = datetime.utcnow()\n        db_session.commit()\n\n\n# Artifact operations\ndef create_artifact(\n    session_id: UUID,\n    artifact_type: str,\n    path: str,\n    name: str,\n    db_session: Session,\n) -> Artifact:\n    \"\"\"Create a new artifact record.\"\"\"\n    artifact = Artifact(\n        session_id=session_id,\n        type=artifact_type,\n        path=path,\n        name=name,\n    )\n    db_session.add(artifact)\n    db_session.commit()\n    db_session.refresh(artifact)\n\n    logger.info(f\"Created artifact {artifact.id} for session {session_id}\")\n    return artifact\n\n\ndef get_session_artifacts(\n    session_id: UUID,\n    db_session: Session,\n) -> list[Artifact]:\n    \"\"\"Get all artifacts for a session.\"\"\"\n    return (\n        db_session.query(Artifact)\n        .filter(Artifact.session_id == session_id)\n        .order_by(desc(Artifact.created_at))\n        .all()\n    )\n\n\ndef update_artifact(\n    artifact_id: UUID,\n    db_session: Session,\n    path: str | None = None,\n    name: str | None = None,\n) -> None:\n    \"\"\"Update artifact metadata.\"\"\"\n    artifact = (\n        db_session.query(Artifact).filter(Artifact.id == artifact_id).one_or_none()\n    )\n    if artifact:\n        if path is not None:\n            artifact.path = path\n        if name is not None:\n            artifact.name = name\n        artifact.updated_at = datetime.utcnow()\n        db_session.commit()\n        logger.info(f\"Updated artifact {artifact_id}\")\n\n\n# Message operations\ndef create_message(\n    session_id: UUID,\n    message_type: MessageType,\n    turn_index: int,\n    message_metadata: dict[str, Any],\n    db_session: Session,\n) -> BuildMessage:\n    \"\"\"Create a new message in a build session.\n\n    All message data is stored in message_metadata as JSON.\n\n    Args:\n        session_id: Session UUID\n        message_type: Type of message (USER, ASSISTANT, SYSTEM)\n        turn_index: 0-indexed user message number this message belongs to\n        message_metadata: Required structured data (the raw ACP packet JSON)\n        db_session: Database session\n    \"\"\"\n    message = BuildMessage(\n        session_id=session_id,\n        turn_index=turn_index,\n        type=message_type,\n        message_metadata=message_metadata,\n    )\n    db_session.add(message)\n    db_session.commit()\n    db_session.refresh(message)\n\n    logger.info(\n        f\"Created {message_type.value} message {message.id} for session {session_id} \"\n        f\"turn={turn_index} type={message_metadata.get('type')}\"\n    )\n    return message\n\n\ndef update_message(\n    message_id: UUID,\n    message_metadata: dict[str, Any],\n    db_session: Session,\n) -> BuildMessage | None:\n    \"\"\"Update an existing message's metadata.\n\n    Used for upserting agent_plan_update messages.\n\n    Args:\n        message_id: The message UUID to update\n        message_metadata: New metadata to set\n        db_session: Database session\n\n    Returns:\n        Updated BuildMessage or None if not found\n    \"\"\"\n    message = (\n        db_session.query(BuildMessage).filter(BuildMessage.id == message_id).first()\n    )\n    if message is None:\n        return None\n\n    message.message_metadata = message_metadata\n    db_session.commit()\n    db_session.refresh(message)\n\n    logger.info(\n        f\"Updated message {message_id} metadata type={message_metadata.get('type')}\"\n    )\n    return message\n\n\ndef upsert_agent_plan(\n    session_id: UUID,\n    turn_index: int,\n    plan_metadata: dict[str, Any],\n    db_session: Session,\n    existing_plan_id: UUID | None = None,\n) -> BuildMessage:\n    \"\"\"Upsert an agent plan - update if exists, create if not.\n\n    Each session/turn should only have one agent_plan_update message.\n    This function updates the existing plan message or creates a new one.\n\n    Args:\n        session_id: Session UUID\n        turn_index: Current turn index\n        plan_metadata: The agent_plan_update packet data\n        db_session: Database session\n        existing_plan_id: ID of existing plan message to update (if known)\n\n    Returns:\n        The created or updated BuildMessage\n    \"\"\"\n    if existing_plan_id:\n        # Fast path: we know the plan ID\n        updated = update_message(existing_plan_id, plan_metadata, db_session)\n        if updated:\n            return updated\n\n    # Check if a plan already exists for this session/turn\n    existing_plan = (\n        db_session.query(BuildMessage)\n        .filter(\n            BuildMessage.session_id == session_id,\n            BuildMessage.turn_index == turn_index,\n            BuildMessage.message_metadata[\"type\"].astext == \"agent_plan_update\",\n        )\n        .first()\n    )\n\n    if existing_plan:\n        existing_plan.message_metadata = plan_metadata\n        db_session.commit()\n        db_session.refresh(existing_plan)\n        logger.info(\n            f\"Updated agent_plan_update message {existing_plan.id} for session {session_id}\"\n        )\n        return existing_plan\n\n    # Create new plan message\n    return create_message(\n        session_id=session_id,\n        message_type=MessageType.ASSISTANT,\n        turn_index=turn_index,\n        message_metadata=plan_metadata,\n        db_session=db_session,\n    )\n\n\ndef get_session_messages(\n    session_id: UUID,\n    db_session: Session,\n) -> list[BuildMessage]:\n    \"\"\"Get all messages for a session, ordered by turn index and creation time.\"\"\"\n    return (\n        db_session.query(BuildMessage)\n        .filter(BuildMessage.session_id == session_id)\n        .order_by(BuildMessage.turn_index, BuildMessage.created_at)\n        .all()\n    )\n\n\ndef _is_port_available(port: int) -> bool:\n    \"\"\"Check if a port is available by attempting to bind to it.\n\n    Checks both IPv4 and IPv6 wildcard addresses to properly detect\n    if anything is listening on the port, regardless of address family.\n    \"\"\"\n    import socket\n\n    logger.debug(f\"Checking if port {port} is available\")\n\n    # Check IPv4 wildcard (0.0.0.0) - this will detect any IPv4 listener\n    try:\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:\n            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n            sock.bind((\"0.0.0.0\", port))\n            logger.debug(f\"Port {port} IPv4 wildcard bind successful\")\n    except OSError as e:\n        logger.debug(f\"Port {port} IPv4 wildcard not available: {e}\")\n        return False\n\n    # Check IPv6 wildcard (::) - this will detect any IPv6 listener\n    try:\n        with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as sock:\n            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n            # IPV6_V6ONLY must be False to allow dual-stack behavior\n            sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)\n            sock.bind((\"::\", port))\n            logger.debug(f\"Port {port} IPv6 wildcard bind successful\")\n    except OSError as e:\n        logger.debug(f\"Port {port} IPv6 wildcard not available: {e}\")\n        return False\n\n    logger.debug(f\"Port {port} is available\")\n    return True\n\n\ndef allocate_nextjs_port(db_session: Session) -> int:\n    \"\"\"Allocate an available port for a new session.\n\n    Finds the first available port in the configured range by checking\n    both database allocations and system-level port availability.\n\n    Args:\n        db_session: Database session for querying allocated ports\n\n    Returns:\n        An available port number\n\n    Raises:\n        RuntimeError: If no ports are available in the configured range\n    \"\"\"\n    from onyx.db.models import BuildSession\n\n    # Get all currently allocated ports from active sessions\n    allocated_ports = set(\n        db_session.query(BuildSession.nextjs_port)\n        .filter(BuildSession.nextjs_port.isnot(None))\n        .all()\n    )\n    allocated_ports = {port[0] for port in allocated_ports if port[0] is not None}\n\n    # Find first port that's not in DB and not currently bound\n    for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):\n        if port not in allocated_ports and _is_port_available(port):\n            return port\n\n    raise RuntimeError(\n        f\"No available ports in range [{SANDBOX_NEXTJS_PORT_START}, {SANDBOX_NEXTJS_PORT_END})\"\n    )\n\n\ndef mark_user_sessions_idle__no_commit(db_session: Session, user_id: UUID) -> int:\n    \"\"\"Mark all ACTIVE sessions for a user as IDLE.\n\n    Called when a sandbox goes to sleep so the frontend knows these sessions\n    need restoration before they can be used again.\n\n    Args:\n        db_session: Database session\n        user_id: The user whose sessions should be marked idle\n\n    Returns:\n        Number of sessions updated\n    \"\"\"\n    result = (\n        db_session.query(BuildSession)\n        .filter(\n            BuildSession.user_id == user_id,\n            BuildSession.status == BuildSessionStatus.ACTIVE,\n        )\n        .update({BuildSession.status: BuildSessionStatus.IDLE})\n    )\n    db_session.flush()\n    logger.info(f\"Marked {result} sessions as IDLE for user {user_id}\")\n    return result\n\n\ndef clear_nextjs_ports_for_user(db_session: Session, user_id: UUID) -> int:\n    \"\"\"Clear nextjs_port for all sessions belonging to a user.\n\n    Called when sandbox goes to sleep to release port allocations.\n\n    Args:\n        db_session: Database session\n        user_id: The user whose sessions should have ports cleared\n\n    Returns:\n        Number of sessions updated\n    \"\"\"\n    result = (\n        db_session.query(BuildSession)\n        .filter(\n            BuildSession.user_id == user_id,\n            BuildSession.nextjs_port.isnot(None),\n        )\n        .update({BuildSession.nextjs_port: None})\n    )\n    db_session.flush()\n    logger.info(f\"Cleared {result} nextjs_port allocations for user {user_id}\")\n    return result\n\n\ndef fetch_llm_provider_by_type_for_build_mode(\n    db_session: Session, provider_type: str\n) -> LLMProviderView | None:\n    \"\"\"Fetch an LLM provider by its provider type (e.g., \"anthropic\", \"openai\").\n\n    Resolution priority:\n    1. First try to find a provider named \"build-mode-{type}\" (e.g., \"build-mode-anthropic\")\n    2. If not found, fall back to any provider that matches the type\n\n    Args:\n        db_session: Database session\n        provider_type: The provider type (e.g., \"anthropic\", \"openai\", \"openrouter\")\n\n    Returns:\n        LLMProviderView if found, None otherwise\n    \"\"\"\n    from onyx.db.llm import fetch_existing_llm_provider\n\n    # First try to find a \"build-mode-{type}\" provider\n    build_mode_name = f\"build-mode-{provider_type}\"\n    provider_model = fetch_existing_llm_provider(\n        name=build_mode_name, db_session=db_session\n    )\n\n    # If not found, fall back to any provider that matches the type\n    if not provider_model:\n        provider_model = db_session.scalar(\n            select(LLMProviderModel)\n            .where(LLMProviderModel.provider == provider_type)\n            .options(\n                selectinload(LLMProviderModel.model_configurations),\n                selectinload(LLMProviderModel.groups),\n                selectinload(LLMProviderModel.personas),\n            )\n        )\n\n    if not provider_model:\n        return None\n    return LLMProviderView.from_model(provider_model)\n"
  },
  {
    "path": "backend/onyx/server/features/build/db/rate_limit.py",
    "content": "\"\"\"Database queries for Build Mode rate limiting.\"\"\"\n\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom sqlalchemy import func\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.models import BuildMessage\nfrom onyx.db.models import BuildSession\n\n\ndef count_user_messages_in_window(\n    user_id: UUID,\n    cutoff_time: datetime,\n    db_session: Session,\n) -> int:\n    \"\"\"\n    Count USER messages for a user since cutoff_time.\n\n    Args:\n        user_id: The user's UUID\n        cutoff_time: Only count messages created at or after this time\n        db_session: Database session\n\n    Returns:\n        Number of USER messages in the time window\n    \"\"\"\n    return (\n        db_session.query(func.count(BuildMessage.id))\n        .join(BuildSession, BuildMessage.session_id == BuildSession.id)\n        .filter(\n            BuildSession.user_id == user_id,\n            BuildMessage.type == MessageType.USER,\n            BuildMessage.created_at >= cutoff_time,\n        )\n        .scalar()\n        or 0\n    )\n\n\ndef count_user_messages_total(user_id: UUID, db_session: Session) -> int:\n    \"\"\"\n    Count all USER messages for a user (lifetime total).\n\n    Args:\n        user_id: The user's UUID\n        db_session: Database session\n\n    Returns:\n        Total number of USER messages\n    \"\"\"\n    return (\n        db_session.query(func.count(BuildMessage.id))\n        .join(BuildSession, BuildMessage.session_id == BuildSession.id)\n        .filter(\n            BuildSession.user_id == user_id,\n            BuildMessage.type == MessageType.USER,\n        )\n        .scalar()\n        or 0\n    )\n\n\ndef get_oldest_message_timestamp(\n    user_id: UUID,\n    cutoff_time: datetime,\n    db_session: Session,\n) -> datetime | None:\n    \"\"\"\n    Get the timestamp of the oldest USER message in the time window.\n\n    Used to calculate when the rate limit will reset (when the oldest\n    message ages out of the rolling window).\n\n    Args:\n        user_id: The user's UUID\n        cutoff_time: Only consider messages created at or after this time\n        db_session: Database session\n\n    Returns:\n        Timestamp of oldest message in window, or None if no messages\n    \"\"\"\n    return (\n        db_session.query(BuildMessage.created_at)\n        .join(BuildSession, BuildMessage.session_id == BuildSession.id)\n        .filter(\n            BuildSession.user_id == user_id,\n            BuildMessage.type == MessageType.USER,\n            BuildMessage.created_at >= cutoff_time,\n        )\n        .order_by(BuildMessage.created_at.asc())\n        .limit(1)\n        .scalar()\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/build/db/sandbox.py",
    "content": "\"\"\"Database operations for CLI agent sandbox management.\"\"\"\n\nimport datetime\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import func\nfrom sqlalchemy import or_\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.models import Sandbox\nfrom onyx.db.models import Snapshot\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef create_sandbox__no_commit(\n    db_session: Session,\n    user_id: UUID,\n) -> Sandbox:\n    \"\"\"Create a new sandbox record for a user.\n\n    Sets last_heartbeat to now so that:\n    1. The sandbox has a proper idle timeout baseline from creation\n    2. Long-running provisioning doesn't cause the sandbox to appear \"old\"\n       when it transitions to RUNNING\n\n    NOTE: This function uses flush() instead of commit(). The caller is\n    responsible for committing the transaction when ready.\n    \"\"\"\n    sandbox = Sandbox(\n        user_id=user_id,\n        status=SandboxStatus.PROVISIONING,\n        last_heartbeat=datetime.datetime.now(datetime.timezone.utc),\n    )\n    db_session.add(sandbox)\n    db_session.flush()\n    return sandbox\n\n\ndef get_sandbox_by_user_id(db_session: Session, user_id: UUID) -> Sandbox | None:\n    \"\"\"Get sandbox by user ID (primary lookup method).\"\"\"\n    stmt = select(Sandbox).where(Sandbox.user_id == user_id)\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_sandbox_by_session_id(db_session: Session, session_id: UUID) -> Sandbox | None:\n    \"\"\"Get sandbox by session ID (compatibility function).\n\n    This function provides backwards compatibility during the transition to\n    user-owned sandboxes. It looks up the session's user_id, then finds the\n    user's sandbox.\n\n    NOTE: This will be removed in a future phase when all callers are updated\n    to use get_sandbox_by_user_id() directly.\n    \"\"\"\n    from onyx.db.models import BuildSession\n\n    stmt = select(BuildSession.user_id).where(BuildSession.id == session_id)\n    result = db_session.execute(stmt).scalar_one_or_none()\n    if result is None:\n        return None\n\n    return get_sandbox_by_user_id(db_session, result)\n\n\ndef get_sandbox_by_id(db_session: Session, sandbox_id: UUID) -> Sandbox | None:\n    \"\"\"Get sandbox by its ID.\"\"\"\n    stmt = select(Sandbox).where(Sandbox.id == sandbox_id)\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef update_sandbox_status__no_commit(\n    db_session: Session,\n    sandbox_id: UUID,\n    status: SandboxStatus,\n) -> Sandbox:\n    \"\"\"Update sandbox status.\n\n    When transitioning to RUNNING, also sets last_heartbeat to now. This ensures\n    newly provisioned sandboxes have a proper idle timeout baseline (rather than\n    being immediately considered idle due to NULL heartbeat).\n\n    NOTE: This function uses flush() instead of commit(). The caller is\n    responsible for committing the transaction when ready.\n    \"\"\"\n    sandbox = get_sandbox_by_id(db_session, sandbox_id)\n    if not sandbox:\n        raise ValueError(f\"Sandbox {sandbox_id} not found\")\n\n    sandbox.status = status\n\n    # Set heartbeat when sandbox becomes active to establish idle timeout baseline\n    if status == SandboxStatus.RUNNING:\n        sandbox.last_heartbeat = datetime.datetime.now(datetime.timezone.utc)\n\n    db_session.flush()\n    return sandbox\n\n\ndef update_sandbox_heartbeat(db_session: Session, sandbox_id: UUID) -> Sandbox:\n    \"\"\"Update sandbox last_heartbeat to now.\"\"\"\n    sandbox = get_sandbox_by_id(db_session, sandbox_id)\n    if not sandbox:\n        raise ValueError(f\"Sandbox {sandbox_id} not found\")\n\n    sandbox.last_heartbeat = datetime.datetime.now(datetime.timezone.utc)\n    db_session.commit()\n    return sandbox\n\n\ndef get_idle_sandboxes(\n    db_session: Session, idle_threshold_seconds: int\n) -> list[Sandbox]:\n    \"\"\"Get sandboxes that have been idle longer than threshold.\n\n    Also includes sandboxes with NULL heartbeat, but only if they were created\n    before the threshold (to avoid sweeping up brand-new sandboxes that may have\n    NULL heartbeat due to edge cases like older rows or manual inserts).\n    \"\"\"\n    threshold_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(\n        seconds=idle_threshold_seconds\n    )\n\n    stmt = select(Sandbox).where(\n        Sandbox.status == SandboxStatus.RUNNING,\n        or_(\n            Sandbox.last_heartbeat < threshold_time,\n            and_(\n                Sandbox.last_heartbeat.is_(None),\n                Sandbox.created_at < threshold_time,\n            ),\n        ),\n    )\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef get_running_sandbox_count_by_tenant(\n    db_session: Session,\n    tenant_id: str,  # noqa: ARG001\n) -> int:\n    \"\"\"Get count of running sandboxes for a tenant (for limit enforcement).\n\n    Note: tenant_id parameter is kept for API compatibility but is not used\n    since Sandbox model no longer has tenant_id. This function returns\n    the count of all running sandboxes.\n    \"\"\"\n    stmt = select(func.count(Sandbox.id)).where(Sandbox.status == SandboxStatus.RUNNING)\n    result = db_session.execute(stmt).scalar()\n    return result or 0\n\n\ndef create_snapshot__no_commit(\n    db_session: Session,\n    session_id: UUID,\n    storage_path: str,\n    size_bytes: int,\n) -> Snapshot:\n    \"\"\"Create a snapshot record for a session.\n\n    NOTE: Uses flush() instead of commit(). The caller (cleanup task) is\n    responsible for committing after all snapshots + status updates are done,\n    so the entire operation is atomic.\n    \"\"\"\n    snapshot = Snapshot(\n        session_id=session_id,\n        storage_path=storage_path,\n        size_bytes=size_bytes,\n    )\n    db_session.add(snapshot)\n    db_session.flush()\n    return snapshot\n\n\ndef get_latest_snapshot_for_session(\n    db_session: Session, session_id: UUID\n) -> Snapshot | None:\n    \"\"\"Get most recent snapshot for a session.\"\"\"\n    stmt = (\n        select(Snapshot)\n        .where(Snapshot.session_id == session_id)\n        .order_by(Snapshot.created_at.desc())\n        .limit(1)\n    )\n    return db_session.execute(stmt).scalar_one_or_none()\n\n\ndef get_snapshots_for_session(db_session: Session, session_id: UUID) -> list[Snapshot]:\n    \"\"\"Get all snapshots for a session, ordered by creation time descending.\"\"\"\n    stmt = (\n        select(Snapshot)\n        .where(Snapshot.session_id == session_id)\n        .order_by(Snapshot.created_at.desc())\n    )\n    return list(db_session.execute(stmt).scalars().all())\n\n\ndef delete_old_snapshots(\n    db_session: Session,\n    tenant_id: str,  # noqa: ARG001\n    retention_days: int,\n) -> int:\n    \"\"\"Delete snapshots older than retention period, return count deleted.\n\n    Note: tenant_id parameter is kept for API compatibility but is not used\n    since Snapshot model no longer has tenant_id. This function deletes\n    all snapshots older than the retention period.\n    \"\"\"\n    cutoff_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(\n        days=retention_days\n    )\n\n    stmt = select(Snapshot).where(\n        Snapshot.created_at < cutoff_time,\n    )\n    old_snapshots = db_session.execute(stmt).scalars().all()\n\n    count = 0\n    for snapshot in old_snapshots:\n        db_session.delete(snapshot)\n        count += 1\n\n    if count > 0:\n        db_session.commit()\n\n    return count\n\n\ndef delete_snapshot(db_session: Session, snapshot_id: UUID) -> bool:\n    \"\"\"Delete a specific snapshot by ID. Returns True if deleted, False if not found.\"\"\"\n    stmt = select(Snapshot).where(Snapshot.id == snapshot_id)\n    snapshot = db_session.execute(stmt).scalar_one_or_none()\n\n    if not snapshot:\n        return False\n\n    db_session.delete(snapshot)\n    db_session.commit()\n    return True\n"
  },
  {
    "path": "backend/onyx/server/features/build/db/user_library.py",
    "content": "\"\"\"Database operations for User Library (CRAFT_FILE connector).\n\nHandles storage quota queries and connector/credential setup for the\nUser Library feature in Craft.\n\"\"\"\n\nfrom uuid import UUID\n\nfrom sqlalchemy import and_\nfrom sqlalchemy import cast\nfrom sqlalchemy import func\nfrom sqlalchemy import Integer\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.connector import create_connector\nfrom onyx.db.connector import fetch_connectors\nfrom onyx.db.connector_credential_pair import add_credential_to_connector\nfrom onyx.db.connector_credential_pair import (\n    get_connector_credential_pairs_for_user,\n)\nfrom onyx.db.credentials import create_credential\nfrom onyx.db.credentials import fetch_credentials_for_user\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ProcessingMode\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document as DbDocument\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import User\nfrom onyx.server.documents.models import ConnectorBase\nfrom onyx.server.documents.models import CredentialBase\nfrom onyx.server.features.build.configs import USER_LIBRARY_CONNECTOR_NAME\nfrom onyx.server.features.build.configs import USER_LIBRARY_CREDENTIAL_NAME\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef get_user_storage_bytes(db_session: Session, user_id: UUID) -> int:\n    \"\"\"Get total storage usage for a user's library files.\n\n    Uses SQL aggregation to sum file_size from doc_metadata JSONB for all\n    CRAFT_FILE documents owned by this user, avoiding loading all documents\n    into Python memory.\n    \"\"\"\n    stmt = (\n        select(\n            func.coalesce(\n                func.sum(\n                    cast(\n                        DbDocument.doc_metadata[\"file_size\"].as_string(),\n                        Integer,\n                    )\n                ),\n                0,\n            )\n        )\n        .join(\n            DocumentByConnectorCredentialPair,\n            DbDocument.id == DocumentByConnectorCredentialPair.id,\n        )\n        .join(\n            ConnectorCredentialPair,\n            and_(\n                DocumentByConnectorCredentialPair.connector_id\n                == ConnectorCredentialPair.connector_id,\n                DocumentByConnectorCredentialPair.credential_id\n                == ConnectorCredentialPair.credential_id,\n            ),\n        )\n        .join(\n            Connector,\n            ConnectorCredentialPair.connector_id == Connector.id,\n        )\n        .where(Connector.source == DocumentSource.CRAFT_FILE)\n        .where(ConnectorCredentialPair.creator_id == user_id)\n        .where(DbDocument.doc_metadata[\"is_directory\"].as_boolean().is_not(True))\n    )\n    result = db_session.execute(stmt).scalar()\n    return int(result or 0)\n\n\ndef get_or_create_craft_connector(db_session: Session, user: User) -> tuple[int, int]:\n    \"\"\"Get or create the CRAFT_FILE connector for a user.\n\n    Returns:\n        Tuple of (connector_id, credential_id)\n\n    Note: We need to create a credential even though CRAFT_FILE doesn't require\n    authentication. This is because Onyx's connector-credential pair system\n    requires a credential for all connectors. The credential is empty ({}).\n\n    This function handles recovery from partial creation failures by detecting\n    orphaned connectors (connectors without cc_pairs) and completing their setup.\n    \"\"\"\n    # Check if user already has a complete CRAFT_FILE cc_pair\n    cc_pairs = get_connector_credential_pairs_for_user(\n        db_session=db_session,\n        user=user,\n        get_editable=False,\n        eager_load_connector=True,\n        eager_load_credential=True,\n        processing_mode=ProcessingMode.RAW_BINARY,\n    )\n\n    for cc_pair in cc_pairs:\n        if (\n            cc_pair.connector.source == DocumentSource.CRAFT_FILE\n            and cc_pair.creator_id == user.id\n        ):\n            return cc_pair.connector.id, cc_pair.credential.id\n\n    # No cc_pair for this user — find or create the shared CRAFT_FILE connector\n    existing_connectors = fetch_connectors(\n        db_session, sources=[DocumentSource.CRAFT_FILE]\n    )\n    connector_id: int | None = None\n    for conn in existing_connectors:\n        if conn.name == USER_LIBRARY_CONNECTOR_NAME:\n            connector_id = conn.id\n            break\n\n    if connector_id is None:\n        connector_data = ConnectorBase(\n            name=USER_LIBRARY_CONNECTOR_NAME,\n            source=DocumentSource.CRAFT_FILE,\n            input_type=InputType.LOAD_STATE,\n            connector_specific_config={\"disabled_paths\": []},\n            refresh_freq=None,\n            prune_freq=None,\n        )\n        connector_response = create_connector(\n            db_session=db_session,\n            connector_data=connector_data,\n        )\n        connector_id = connector_response.id\n\n    # Try to reuse an existing User Library credential for this user\n    existing_credentials = fetch_credentials_for_user(\n        db_session=db_session,\n        user=user,\n    )\n    credential = None\n    for cred in existing_credentials:\n        if (\n            cred.source == DocumentSource.CRAFT_FILE\n            and cred.name == USER_LIBRARY_CREDENTIAL_NAME\n        ):\n            credential = cred\n            break\n\n    if credential is None:\n        credential_data = CredentialBase(\n            credential_json={},\n            admin_public=False,\n            source=DocumentSource.CRAFT_FILE,\n            name=USER_LIBRARY_CREDENTIAL_NAME,\n        )\n        credential = create_credential(\n            credential_data=credential_data,\n            user=user,\n            db_session=db_session,\n        )\n\n    # Link them with RAW_BINARY processing mode\n    add_credential_to_connector(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential.id,\n        user=user,\n        cc_pair_name=USER_LIBRARY_CONNECTOR_NAME,\n        access_type=AccessType.PRIVATE,\n        groups=None,\n        processing_mode=ProcessingMode.RAW_BINARY,\n    )\n\n    db_session.commit()\n    return connector_id, credential.id\n"
  },
  {
    "path": "backend/onyx/server/features/build/indexing/persistent_document_writer.py",
    "content": "\"\"\"\nPersistent Document Writer for writing indexed documents to local filesystem or S3 with\nhierarchical directory structure that mirrors the source organization.\n\nLocal mode (SandboxBackend.LOCAL):\n    Writes to local filesystem at {PERSISTENT_DOCUMENT_STORAGE_PATH}/{tenant_id}/knowledge/{user_id}/...\n\nKubernetes mode (SandboxBackend.KUBERNETES):\n    Writes to S3 at s3://{SANDBOX_S3_BUCKET}/{tenant_id}/knowledge/{user_id}/...\n    This is the same location that kubernetes_sandbox_manager.py reads from when\n    provisioning sandboxes.\n\nBoth modes use consistent tenant/user-segregated paths for multi-tenant isolation.\n\"\"\"\n\nimport hashlib\nimport json\nimport unicodedata\nfrom pathlib import Path\nfrom typing import Any\n\nfrom botocore.exceptions import ClientError\nfrom mypy_boto3_s3.client import S3Client\n\nfrom onyx.connectors.models import Document\nfrom onyx.server.features.build.configs import PERSISTENT_DOCUMENT_STORAGE_PATH\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SANDBOX_S3_BUCKET\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.s3.s3_client import build_s3_client\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# =============================================================================\n# Shared Utilities for Path Building\n# =============================================================================\n\n\ndef sanitize_path_component(component: str, replace_slash: bool = True) -> str:\n    \"\"\"Sanitize a path component for file system / S3 key safety.\n\n    Args:\n        component: The path component to sanitize\n        replace_slash: If True, replaces forward slashes (needed for local filesystem).\n                      Set to False for S3 where `/` is a valid delimiter.\n\n    Returns:\n        Sanitized path component safe for use in file paths or S3 keys\n    \"\"\"\n    # First, normalize Unicode to decomposed form and remove combining characters\n    # This handles cases like accented characters, while also filtering format chars\n    normalized = unicodedata.normalize(\"NFKD\", component)\n\n    # Filter out Unicode format/control characters (categories Cf, Cc)\n    # This removes invisible chars like U+2060 (WORD JOINER), zero-width spaces, etc.\n    sanitized = \"\".join(\n        c for c in normalized if unicodedata.category(c) not in (\"Cf\", \"Cc\")\n    )\n\n    # Replace spaces with underscores\n    sanitized = sanitized.replace(\" \", \"_\")\n    # Replace problematic characters\n    if replace_slash:\n        sanitized = sanitized.replace(\"/\", \"_\")\n    sanitized = sanitized.replace(\"\\\\\", \"_\").replace(\":\", \"_\")\n    sanitized = sanitized.replace(\"<\", \"_\").replace(\">\", \"_\").replace(\"|\", \"_\")\n    sanitized = sanitized.replace('\"', \"_\").replace(\"?\", \"_\").replace(\"*\", \"_\")\n    return sanitized.strip() or \"unnamed\"\n\n\ndef sanitize_filename(name: str, replace_slash: bool = True) -> str:\n    \"\"\"Sanitize name for use as filename.\n\n    Args:\n        name: The filename to sanitize\n        replace_slash: Passed through to sanitize_path_component\n\n    Returns:\n        Sanitized filename, truncated with hash suffix if too long\n    \"\"\"\n    sanitized = sanitize_path_component(name, replace_slash=replace_slash)\n    if len(sanitized) > 200:\n        # Keep first 150 chars + hash suffix for uniqueness\n        hash_suffix = hashlib.sha256(name.encode()).hexdigest()[:16]\n        return f\"{sanitized[:150]}_{hash_suffix}\"\n    return sanitized\n\n\ndef normalize_leading_slash(path: str) -> str:\n    \"\"\"Ensure a path starts with exactly one leading slash.\"\"\"\n    return \"/\" + path.lstrip(\"/\")\n\n\ndef get_base_filename(doc: Document, replace_slash: bool = True) -> str:\n    \"\"\"Get base filename from document, preferring semantic identifier.\n\n    Args:\n        doc: The document to get filename for\n        replace_slash: Passed through to sanitize_filename\n\n    Returns:\n        Sanitized base filename (without extension)\n    \"\"\"\n    name = doc.semantic_identifier or doc.title or doc.id\n    return sanitize_filename(name, replace_slash=replace_slash)\n\n\ndef build_document_subpath(doc: Document, replace_slash: bool = True) -> list[str]:\n    \"\"\"Build the source/hierarchy path components from a document.\n\n    Returns path components like: [source, hierarchy_part1, hierarchy_part2, ...]\n\n    This is the common part of the path that comes after user/tenant segregation.\n\n    Args:\n        doc: The document to build path for\n        replace_slash: Passed through to sanitize_path_component\n\n    Returns:\n        List of sanitized path components\n    \"\"\"\n    parts: list[str] = []\n\n    # Source type (e.g., \"google_drive\", \"confluence\")\n    parts.append(doc.source.value)\n\n    # Get hierarchy from doc_metadata\n    hierarchy: dict[str, Any] = (\n        doc.doc_metadata.get(\"hierarchy\", {}) if doc.doc_metadata else {}\n    )\n    source_path: list[str] = hierarchy.get(\"source_path\", [])\n\n    if source_path:\n        parts.extend(\n            [\n                sanitize_path_component(p, replace_slash=replace_slash)\n                for p in source_path\n            ]\n        )\n\n    return parts\n\n\ndef resolve_duplicate_filename(\n    doc: Document,\n    base_filename: str,\n    has_duplicates: bool,\n    replace_slash: bool = True,\n) -> str:\n    \"\"\"Resolve filename, appending ID suffix if there are duplicates.\n\n    Args:\n        doc: The document (for ID extraction)\n        base_filename: The base filename without extension\n        has_duplicates: Whether there are other docs with the same base filename\n        replace_slash: Passed through to sanitize_path_component\n\n    Returns:\n        Final filename with .json extension\n    \"\"\"\n    if has_duplicates:\n        id_suffix = sanitize_path_component(doc.id, replace_slash=replace_slash)\n        if len(id_suffix) > 50:\n            id_suffix = hashlib.sha256(doc.id.encode()).hexdigest()[:16]\n        return f\"{base_filename}_{id_suffix}.json\"\n    return f\"{base_filename}.json\"\n\n\ndef serialize_document(doc: Document) -> dict[str, Any]:\n    \"\"\"Serialize a document to a dictionary for JSON storage.\n\n    Args:\n        doc: The document to serialize\n\n    Returns:\n        Dictionary representation of the document\n    \"\"\"\n    return {\n        \"id\": doc.id,\n        \"semantic_identifier\": doc.semantic_identifier,\n        \"title\": doc.title,\n        \"source\": doc.source.value,\n        \"doc_updated_at\": (\n            doc.doc_updated_at.isoformat() if doc.doc_updated_at else None\n        ),\n        \"metadata\": doc.metadata,\n        \"doc_metadata\": doc.doc_metadata,\n        \"sections\": [\n            {\"text\": s.text if hasattr(s, \"text\") else None, \"link\": s.link}\n            for s in doc.sections\n        ],\n        \"primary_owners\": [o.model_dump() for o in (doc.primary_owners or [])],\n        \"secondary_owners\": [o.model_dump() for o in (doc.secondary_owners or [])],\n    }\n\n\n# =============================================================================\n# Classes\n# =============================================================================\n\n\nclass PersistentDocumentWriter:\n    \"\"\"Writes indexed documents to local filesystem with hierarchical structure.\n\n    Documents are stored in tenant/user-segregated paths:\n    {base_path}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/document.json\n\n    This enables per-tenant and per-user isolation for sandbox access control.\n    \"\"\"\n\n    def __init__(\n        self,\n        base_path: str,\n        tenant_id: str,\n        user_id: str,\n    ):\n        self.base_path = Path(base_path)\n        self.tenant_id = tenant_id\n        self.user_id = user_id\n\n    def write_documents(self, documents: list[Document]) -> list[str]:\n        \"\"\"Write documents to local filesystem, returns written file paths.\"\"\"\n        written_paths: list[str] = []\n\n        # Build a map of base filenames to detect duplicates\n        # Key: (directory_path, base_filename) -> list of docs with that name\n        filename_map: dict[tuple[Path, str], list[Document]] = {}\n\n        for doc in documents:\n            dir_path = self._build_directory_path(doc)\n            base_filename = get_base_filename(doc, replace_slash=True)\n            key = (dir_path, base_filename)\n            if key not in filename_map:\n                filename_map[key] = []\n            filename_map[key].append(doc)\n\n        # Now write documents, appending ID if there are duplicates\n        for (dir_path, base_filename), docs in filename_map.items():\n            has_duplicates = len(docs) > 1\n            for doc in docs:\n                filename = resolve_duplicate_filename(\n                    doc, base_filename, has_duplicates, replace_slash=True\n                )\n                path = dir_path / filename\n                self._write_document(doc, path)\n                written_paths.append(str(path))\n\n        return written_paths\n\n    def _build_directory_path(self, doc: Document) -> Path:\n        \"\"\"Build directory path from document metadata.\n\n        Documents are stored under tenant/user-segregated paths:\n        {base_path}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/\n\n        This enables per-tenant and per-user isolation for sandbox access control.\n        \"\"\"\n        # Tenant and user segregation prefix (matches S3 path structure)\n        parts = [self.tenant_id, \"knowledge\", self.user_id]\n        # Add source and hierarchy from document\n        parts.extend(build_document_subpath(doc, replace_slash=True))\n\n        return self.base_path / \"/\".join(parts)\n\n    def _write_document(self, doc: Document, path: Path) -> None:\n        \"\"\"Serialize and write document to filesystem.\"\"\"\n        content = serialize_document(doc)\n\n        # Create parent directories if they don't exist\n        path.parent.mkdir(parents=True, exist_ok=True)\n\n        # Write the JSON file\n        with open(path, \"w\", encoding=\"utf-8\") as f:\n            json.dump(content, f, indent=2, default=str)\n\n        logger.debug(f\"Wrote document to {path}\")\n\n    def write_raw_file(\n        self,\n        path: str,\n        content: bytes,\n        content_type: str | None = None,  # noqa: ARG002\n    ) -> str:\n        \"\"\"Write a raw binary file to local filesystem (for User Library).\n\n        Unlike write_documents which serializes Document objects to JSON, this method\n        writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.\n\n        Args:\n            path: Relative path within user's library (e.g., \"/project-data/financials.xlsx\")\n            content: Raw binary content to write\n            content_type: MIME type of the file (stored as metadata, unused locally)\n\n        Returns:\n            Full filesystem path where file was written\n        \"\"\"\n        # Build full path: {base_path}/{tenant}/knowledge/{user}/user_library/{path}\n        normalized_path = normalize_leading_slash(path)\n        full_path = (\n            self.base_path\n            / self.tenant_id\n            / \"knowledge\"\n            / self.user_id\n            / \"user_library\"\n            / normalized_path.lstrip(\"/\")\n        )\n\n        # Create parent directories if they don't exist\n        full_path.parent.mkdir(parents=True, exist_ok=True)\n\n        # Write the raw binary content\n        with open(full_path, \"wb\") as f:\n            f.write(content)\n\n        logger.debug(f\"Wrote raw file to {full_path}\")\n        return str(full_path)\n\n    def delete_raw_file(self, path: str) -> None:\n        \"\"\"Delete a raw file from local filesystem.\n\n        Args:\n            path: Relative path within user's library (e.g., \"/project-data/financials.xlsx\")\n        \"\"\"\n        # Build full path\n        normalized_path = normalize_leading_slash(path)\n        full_path = (\n            self.base_path\n            / self.tenant_id\n            / \"knowledge\"\n            / self.user_id\n            / \"user_library\"\n            / normalized_path.lstrip(\"/\")\n        )\n\n        if full_path.exists():\n            full_path.unlink()\n            logger.debug(f\"Deleted raw file at {full_path}\")\n        else:\n            logger.warning(f\"File not found for deletion: {full_path}\")\n\n\nclass S3PersistentDocumentWriter:\n    \"\"\"Writes indexed documents to S3 with hierarchical structure.\n\n    Documents are stored in tenant/user-segregated paths:\n    s3://{bucket}/{tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/document.json\n\n    This matches the location that KubernetesSandboxManager reads from when\n    provisioning sandboxes (via the sidecar container's s5cmd sync command).\n    \"\"\"\n\n    def __init__(self, tenant_id: str, user_id: str):\n        \"\"\"Initialize S3PersistentDocumentWriter.\n\n        Args:\n            tenant_id: Tenant identifier for multi-tenant isolation\n            user_id: User ID for user-segregated storage paths\n        \"\"\"\n        self.tenant_id = tenant_id\n        self.user_id = user_id\n        self.bucket = SANDBOX_S3_BUCKET\n        self._s3_client: S3Client | None = None\n\n    def _get_s3_client(self) -> S3Client:\n        \"\"\"Lazily initialize S3 client.\n\n        Uses the craft-specific boto3 client which only supports IAM roles (IRSA).\n        \"\"\"\n        if self._s3_client is None:\n            self._s3_client = build_s3_client()\n        return self._s3_client\n\n    def write_documents(self, documents: list[Document]) -> list[str]:\n        \"\"\"Write documents to S3, returns written S3 keys.\n\n        Args:\n            documents: List of documents to write\n\n        Returns:\n            List of S3 keys that were written\n        \"\"\"\n        written_keys: list[str] = []\n\n        # Build a map of base keys to detect duplicates\n        # Key: (directory_prefix, base_filename) -> list of docs with that name\n        key_map: dict[tuple[str, str], list[Document]] = {}\n\n        for doc in documents:\n            dir_prefix = self._build_directory_path(doc)\n            base_filename = get_base_filename(doc, replace_slash=False)\n            key = (dir_prefix, base_filename)\n            if key not in key_map:\n                key_map[key] = []\n            key_map[key].append(doc)\n\n        # Now write documents, appending ID if there are duplicates\n        s3_client = self._get_s3_client()\n\n        for (dir_prefix, base_filename), docs in key_map.items():\n            has_duplicates = len(docs) > 1\n            for doc in docs:\n                filename = resolve_duplicate_filename(\n                    doc, base_filename, has_duplicates, replace_slash=False\n                )\n                s3_key = f\"{dir_prefix}/{filename}\"\n                self._write_document(s3_client, doc, s3_key)\n                written_keys.append(s3_key)\n\n        return written_keys\n\n    def _build_directory_path(self, doc: Document) -> str:\n        \"\"\"Build S3 key prefix from document metadata.\n\n        Documents are stored under tenant/user-segregated paths:\n        {tenant_id}/knowledge/{user_id}/{source}/{hierarchy}/\n\n        This matches the path that KubernetesSandboxManager syncs from:\n        s5cmd sync \"s3://{bucket}/{tenant_id}/knowledge/{user_id}/*\" /workspace/files/\n        \"\"\"\n        # Tenant and user segregation (matches K8s sandbox init container path)\n        parts = [self.tenant_id, \"knowledge\", self.user_id]\n        # Add source and hierarchy from document\n        parts.extend(build_document_subpath(doc, replace_slash=False))\n\n        return \"/\".join(parts)\n\n    def _write_document(self, s3_client: S3Client, doc: Document, s3_key: str) -> None:\n        \"\"\"Serialize and write document to S3.\"\"\"\n        content = serialize_document(doc)\n        json_content = json.dumps(content, indent=2, default=str)\n\n        try:\n            s3_client.put_object(\n                Bucket=self.bucket,\n                Key=s3_key,\n                Body=json_content.encode(\"utf-8\"),\n                ContentType=\"application/json\",\n            )\n            logger.debug(f\"Wrote document to s3://{self.bucket}/{s3_key}\")\n        except ClientError as e:\n            logger.error(f\"Failed to write to S3: {e}\")\n            raise\n\n    def write_raw_file(\n        self,\n        path: str,\n        content: bytes,\n        content_type: str | None = None,\n    ) -> str:\n        \"\"\"Write a raw binary file to S3 (for User Library).\n\n        Unlike write_documents which serializes Document objects to JSON, this method\n        writes raw binary content directly. Used for user-uploaded files like xlsx, pptx.\n\n        Args:\n            path: Relative path within user's library (e.g., \"/project-data/financials.xlsx\")\n            content: Raw binary content to write\n            content_type: MIME type of the file\n\n        Returns:\n            S3 key where file was written\n        \"\"\"\n        # Build S3 key: {tenant}/knowledge/{user}/user_library/{path}\n        normalized_path = path.lstrip(\"/\")\n        s3_key = (\n            f\"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}\"\n        )\n\n        s3_client = self._get_s3_client()\n\n        try:\n            s3_client.put_object(\n                Bucket=self.bucket,\n                Key=s3_key,\n                Body=content,\n                ContentType=content_type or \"application/octet-stream\",\n            )\n            logger.debug(f\"Wrote raw file to s3://{self.bucket}/{s3_key}\")\n            return s3_key\n        except ClientError as e:\n            logger.error(f\"Failed to write raw file to S3: {e}\")\n            raise\n\n    def delete_raw_file(self, s3_key: str) -> None:\n        \"\"\"Delete a raw file from S3.\n\n        Args:\n            s3_key: Full S3 key of the file to delete\n        \"\"\"\n        s3_client = self._get_s3_client()\n\n        try:\n            s3_client.delete_object(Bucket=self.bucket, Key=s3_key)\n            logger.debug(f\"Deleted raw file at s3://{self.bucket}/{s3_key}\")\n        except ClientError as e:\n            logger.error(f\"Failed to delete raw file from S3: {e}\")\n            raise\n\n    def delete_raw_file_by_path(self, path: str) -> None:\n        \"\"\"Delete a raw file from S3 by its relative path.\n\n        Args:\n            path: Relative path within user's library (e.g., \"/project-data/financials.xlsx\")\n        \"\"\"\n        normalized_path = path.lstrip(\"/\")\n        s3_key = (\n            f\"{self.tenant_id}/knowledge/{self.user_id}/user_library/{normalized_path}\"\n        )\n        self.delete_raw_file(s3_key)\n\n\ndef get_persistent_document_writer(\n    user_id: str,\n    tenant_id: str,\n) -> PersistentDocumentWriter | S3PersistentDocumentWriter:\n    \"\"\"Factory function to create a PersistentDocumentWriter with default configuration.\n\n    Args:\n        user_id: User ID for user-segregated storage paths.\n        tenant_id: Tenant ID for multi-tenant isolation.\n\n    Both local and S3 modes use consistent tenant/user-segregated paths:\n        - Local: {base_path}/{tenant_id}/knowledge/{user_id}/...\n        - S3: s3://{bucket}/{tenant_id}/knowledge/{user_id}/...\n\n    Returns:\n        PersistentDocumentWriter for local mode, S3PersistentDocumentWriter for K8s mode\n    \"\"\"\n    if SANDBOX_BACKEND == SandboxBackend.LOCAL:\n        return PersistentDocumentWriter(\n            base_path=PERSISTENT_DOCUMENT_STORAGE_PATH,\n            tenant_id=tenant_id,\n            user_id=user_id,\n        )\n    elif SANDBOX_BACKEND == SandboxBackend.KUBERNETES:\n        return S3PersistentDocumentWriter(\n            tenant_id=tenant_id,\n            user_id=user_id,\n        )\n    else:\n        raise ValueError(f\"Unknown sandbox backend: {SANDBOX_BACKEND}\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/s3/s3_client.py",
    "content": "import boto3\nfrom mypy_boto3_s3.client import S3Client\n\nfrom onyx.configs.app_configs import AWS_REGION_NAME\n\n\ndef build_s3_client() -> S3Client:\n    \"\"\"Build an S3 client using IAM roles (IRSA)\"\"\"\n    return boto3.client(\"s3\", region_name=AWS_REGION_NAME)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/README.md",
    "content": "# Onyx Sandbox System\n\nThis directory contains the implementation of Onyx's sandbox system for running OpenCode agents in isolated environments.\n\n## Overview\n\nThe sandbox system provides isolated execution environments where OpenCode agents can build web applications, run code, and interact with knowledge files. Each sandbox includes:\n\n- **Next.js development environment** - Lightweight Next.js scaffold with shadcn/ui and Recharts for building UIs\n- **Python virtual environment** - Pre-installed packages for data processing\n- **OpenCode agent** - AI coding agent with access to tools and MCP servers\n- **Knowledge files** - Access to indexed documents and user uploads\n\n## Architecture\n\n### Deployment Modes\n\n1. **Local Mode** (`SANDBOX_BACKEND=local`)\n   - Sandboxes run as directories on the local filesystem\n   - No automatic cleanup or snapshots\n   - Suitable for development and testing\n\n2. **Kubernetes Mode** (`SANDBOX_BACKEND=kubernetes`)\n   - Sandboxes run as Kubernetes pods\n   - Automatic snapshots to S3\n   - Auto-cleanup of idle sandboxes\n   - Production-ready with resource isolation\n\n### Directory Structure\n\n```\n/workspace/                          # Sandbox root (in container)\n├── outputs/                         # Working directory\n│   ├── web/                        # Lightweight Next.js app (shadcn/ui, Recharts)\n│   ├── slides/                     # Generated presentations\n│   ├── markdown/                   # Generated documents\n│   └── graphs/                     # Generated visualizations\n├── .venv/                          # Python virtual environment\n├── files/                          # Symlink to knowledge files\n├── attachments/                    # User uploads\n├── AGENTS.md                       # Agent instructions\n└── .opencode/\n    └── skills/                     # Agent skills\n```\n\n## Setup\n\n### Running via Docker/Kubernetes (Zero Setup!) 🎉\n\n**No setup required!** Just build and deploy:\n\n```bash\n# Build backend image (includes both templates)\ncd backend\ndocker build -f Dockerfile.sandbox-templates -t onyxdotapp/backend:latest .\n\n# Build sandbox container (lightweight runner)\ncd onyx/server/features/build/sandbox/kubernetes/docker\ndocker build -t onyxdotapp/sandbox:latest .\n\n# Deploy with docker-compose or kubectl - sandboxes work immediately!\n```\n\n**How it works:**\n\n- **Backend image**: Contains both templates at build time:\n  - Web template at `/templates/outputs/web` (lightweight Next.js scaffold, ~2MB)\n  - Python venv template at `/templates/venv` (pre-installed packages, ~50MB)\n- **Init container** (Kubernetes only): Syncs knowledge files from S3\n- **Sandbox startup**: Runs `npm install` (for fresh dependency locks) + `next dev`\n\n### Running Backend Directly (Without Docker)\n\n**Only needed if you're running the Onyx backend outside of Docker.** Most developers use Docker and can skip this section.\n\nIf you're running the backend Python process directly on your machine, you need templates at `/templates/`:\n\n#### Web Template\n\nThe web template is a lightweight Next.js app (Next.js 16, React 19, shadcn/ui, Recharts) checked into the codebase at `backend/onyx/server/features/build/templates/outputs/web/`.\n\nFor local development, create a symlink to this template:\n\n```bash\nsudo mkdir -p /templates/outputs\nsudo ln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web /templates/outputs/web\n```\n\n#### Python Venv Template\n\nIf you don't have a venv template, create it:\n\n```bash\n# Use the utility script\ncd backend\npython -m onyx.server.features.build.sandbox.util.build_venv_template\n\n# Or manually\npython3 -m venv /templates/venv\n/templates/venv/bin/pip install -r backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt\n```\n\n#### System Dependencies (for PPTX skill)\n\nThe PPTX skill requires LibreOffice and Poppler for PDF conversion and thumbnail generation:\n\n**macOS:**\n\n```bash\nbrew install poppler\nbrew install --cask libreoffice\n```\n\nEnsure `soffice` is on your PATH:\n\n```bash\nexport PATH=\"/Applications/LibreOffice.app/Contents/MacOS:$PATH\"\n```\n\n**Linux (Debian/Ubuntu):**\n\n```bash\nsudo apt-get install libreoffice-impress poppler-utils\n```\n\n**That's it!** When sandboxes are created:\n\n1. Web template is copied from `/templates/outputs/web`\n2. Python venv is copied from `/templates/venv`\n3. `npm install` runs automatically to install fresh Next.js dependencies\n\n## OpenCode Configuration\n\nEach sandbox includes an OpenCode agent configured with:\n\n- **LLM Provider**: Anthropic, OpenAI, Google, Bedrock, or Azure\n- **Extended thinking**: High reasoning effort / thinking budgets for complex tasks\n- **Tool permissions**: File operations, bash commands, web access\n- **Disabled tools**: Configurable via `OPENCODE_DISABLED_TOOLS` env var\n\nConfiguration is generated dynamically in `templates/opencode_config.py`.\n\n## Key Components\n\n### Managers\n\n- **`base.py`** - Abstract base class defining the sandbox interface\n- **`local/manager.py`** - Filesystem-based sandbox manager for local development\n- **`kubernetes/manager.py`** - Kubernetes-based sandbox manager for production\n\n### Managers (Shared)\n\n- **`manager/directory_manager.py`** - Creates sandbox directory structure and copies templates\n- **`manager/snapshot_manager.py`** - Handles snapshot creation and restoration\n\n### Utilities\n\n- **`util/opencode_config.py`** - Generates OpenCode configuration with MCP support\n- **`util/agent_instructions.py`** - Generates agent instructions (AGENTS.md)\n- **`util/build_venv_template.py`** - Utility to build Python venv template for local development\n\n### Templates\n\n- **`../templates/outputs/web/`** - Lightweight Next.js scaffold (shadcn/ui, Recharts) versioned with the backend code\n\n### Kubernetes Specific\n\n- **`kubernetes/docker/Dockerfile`** - Sandbox container image (runs Next.js + OpenCode)\n- **`kubernetes/docker/entrypoint.sh`** - Container startup script\n\n## Environment Variables\n\n### Core Settings\n\n```bash\n# Sandbox backend mode\nSANDBOX_BACKEND=local|kubernetes           # Default: local\n\n# Template paths (local mode)\nOUTPUTS_TEMPLATE_PATH=/templates/outputs   # Default: /templates/outputs\nVENV_TEMPLATE_PATH=/templates/venv        # Default: /templates/venv\n\n# Sandbox base path (local mode)\nSANDBOX_BASE_PATH=/tmp/onyx-sandboxes     # Default: /tmp/onyx-sandboxes\n\n# OpenCode configuration\nOPENCODE_DISABLED_TOOLS=question          # Comma-separated list, default: question\n```\n\n### Kubernetes Settings\n\n```bash\n# Kubernetes namespace\nSANDBOX_NAMESPACE=onyx-sandboxes          # Default: onyx-sandboxes\n\n# Container image\nSANDBOX_CONTAINER_IMAGE=onyxdotapp/sandbox:latest\n\n# S3 bucket for snapshots and files\nSANDBOX_S3_BUCKET=onyx-sandbox-files      # Default: onyx-sandbox-files\n\n# Service accounts\nSANDBOX_SERVICE_ACCOUNT_NAME=sandbox-runner          # No AWS access\nSANDBOX_FILE_SYNC_SERVICE_ACCOUNT=sandbox-file-sync  # Has S3 access via IRSA\n```\n\n### Lifecycle Settings\n\n```bash\n# Idle timeout before cleanup (seconds)\nSANDBOX_IDLE_TIMEOUT_SECONDS=900          # Default: 900 (15 minutes)\n\n# Max concurrent sandboxes per organization\nSANDBOX_MAX_CONCURRENT_PER_ORG=10         # Default: 10\n\n# Next.js port range (local mode)\nSANDBOX_NEXTJS_PORT_START=3010            # Default: 3010\nSANDBOX_NEXTJS_PORT_END=3100              # Default: 3100\n```\n\n## Testing\n\n### Integration Tests\n\n```bash\n# Test local sandbox provisioning\nuv run pytest backend/tests/integration/sandbox/test_local_sandbox.py\n\n# Test Kubernetes sandbox provisioning (requires k8s cluster)\nuv run pytest backend/tests/integration/sandbox/test_kubernetes_sandbox.py\n```\n\n### Manual Testing\n\n```bash\n# Start a local sandbox session\ncurl -X POST http://localhost:3000/api/build/session \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"user_id\": \"user-123\",\n    \"file_system_path\": \"/path/to/files\"\n  }'\n\n# Send a message to the agent\ncurl -X POST http://localhost:3000/api/build/session/{session_id}/message \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"message\": \"Create a simple web page\"\n  }'\n```\n\n## Troubleshooting\n\n### Sandbox Stuck in PROVISIONING (Kubernetes)\n\n**Symptoms**: Sandbox status never changes from `PROVISIONING`\n\n**Solutions**:\n\n- Check pod logs: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id}`\n- Check init container: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id} -c file-sync`\n- Verify init container completed: `kubectl describe pod -n onyx-sandboxes sandbox-{sandbox-id}`\n- Check S3 bucket access: Ensure init container service account has IRSA configured\n\n### Next.js Server Won't Start\n\n**Symptoms**: Sandbox provisioned but web preview doesn't load\n\n**Solutions**:\n\n- **Local mode**: Check if port is already in use\n- **Docker/K8s**: Check container logs: `kubectl logs -n onyx-sandboxes sandbox-{sandbox-id}`\n- Verify npm install succeeded (check entrypoint.sh logs)\n- Check that web template was copied: `kubectl exec -n onyx-sandboxes sandbox-{sandbox-id} -- ls /workspace/outputs/web`\n\n### Templates Not Found (Local Mode)\n\n**Symptoms**: `RuntimeError: Sandbox templates are missing`\n\n**Solution**: Set up templates as described in the \"Local Development\" section above:\n\n```bash\n# Symlink web template\nsudo ln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web /templates/outputs/web\n\n# Create Python venv\npython3 -m venv /templates/venv\n/templates/venv/bin/pip install -r backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt\n```\n\n### Permission Denied\n\n**Symptoms**: `Permission denied` error accessing `/templates/`\n\n**Solution**: Either use sudo when creating symlinks, or use custom paths:\n\n```bash\nexport OUTPUTS_TEMPLATE_PATH=$HOME/.onyx/templates/outputs\nexport VENV_TEMPLATE_PATH=$HOME/.onyx/templates/venv\n\n# Then symlink to your home directory\nmkdir -p $HOME/.onyx/templates/outputs\nln -s $(pwd)/backend/onyx/server/features/build/templates/outputs/web $HOME/.onyx/templates/outputs/web\n```\n\n## Security Considerations\n\n### Sandbox Isolation\n\n- **Kubernetes pods** run with restricted security context (non-root, no privilege escalation)\n- **Init containers** have S3 access for file sync, but main sandbox container does NOT\n- **Network policies** can restrict sandbox egress traffic\n- **Resource limits** prevent resource exhaustion\n\n### Credentials Management\n\n- LLM API keys are passed as environment variables (not stored in sandbox)\n- User file access is read-only via symlinks\n- Snapshots are isolated per tenant in S3\n\n## Development\n\n### Adding New MCP Servers\n\n1. Add MCP configuration to `templates/opencode_config.py`:\n\n   ```python\n   config[\"mcp\"] = {\n       \"my-mcp\": {\n           \"type\": \"local\",\n           \"command\": [\"npx\", \"@my/mcp@latest\"],\n           \"enabled\": True,\n       }\n   }\n   ```\n\n2. Install required npm packages in web template (if needed)\n\n3. Rebuild Docker image and templates\n\n### Modifying Agent Instructions\n\nEdit `AGENTS.template.md` in the build directory. This is populated with dynamic content by `templates/agent_instructions.py`.\n\n### Adding New Tools/Permissions\n\nUpdate `templates/opencode_config.py` to add/remove tool permissions in the `permission` section.\n\n## Template Details\n\n### Web Template\n\nThe lightweight Next.js template (`backend/onyx/server/features/build/templates/outputs/web/`) includes:\n\n- **Framework**: Next.js 16.1.4 with React 19.2.3\n- **UI Library**: shadcn/ui components with Radix UI primitives\n- **Styling**: Tailwind CSS v4 with custom theming support\n- **Charts**: Recharts for data visualization\n- **Size**: ~2MB (excluding node_modules, which are installed fresh per sandbox)\n\nThis template provides a modern development environment without the complexity of the full Onyx application, allowing agents to build custom UIs quickly.\n\n### Python Venv Template\n\nThe Python venv (`/templates/venv/`) includes packages from `initial-requirements.txt`:\n\n- Data processing: pandas, numpy, polars\n- HTTP clients: requests, httpx\n- Utilities: python-dotenv, pydantic\n\n## References\n\n- [OpenCode Documentation](https://docs.opencode.ai)\n- [Next.js Documentation](https://nextjs.org/docs)\n- [shadcn/ui Components](https://ui.shadcn.com)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/__init__.py",
    "content": "\"\"\"\nSandbox module for CLI agent filesystem-based isolation.\n\nThis module provides lightweight sandbox management for CLI-based AI agent sessions.\nEach sandbox is a directory on the local filesystem or a Kubernetes pod.\n\nUsage:\n    from onyx.server.features.build.sandbox import get_sandbox_manager\n\n    # Get the appropriate sandbox manager based on SANDBOX_BACKEND config\n    sandbox_manager = get_sandbox_manager()\n\n    # Use the sandbox manager\n    sandbox_info = sandbox_manager.provision(...)\n\nModule structure:\n    - base.py: SandboxManager ABC and get_sandbox_manager() factory\n    - models.py: Shared Pydantic models\n    - local/: Local filesystem-based implementation for development\n    - kubernetes/: Kubernetes pod-based implementation for production\n    - internal/: Shared internal utilities (snapshot manager)\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.base import get_sandbox_manager\nfrom onyx.server.features.build.sandbox.base import SandboxManager\nfrom onyx.server.features.build.sandbox.local.local_sandbox_manager import (\n    LocalSandboxManager,\n)\nfrom onyx.server.features.build.sandbox.models import FilesystemEntry\nfrom onyx.server.features.build.sandbox.models import SandboxInfo\nfrom onyx.server.features.build.sandbox.models import SnapshotInfo\n\n__all__ = [\n    # Factory function (preferred)\n    \"get_sandbox_manager\",\n    # Interface\n    \"SandboxManager\",\n    # Implementations\n    \"LocalSandboxManager\",\n    # Models\n    \"SandboxInfo\",\n    \"SnapshotInfo\",\n    \"FilesystemEntry\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/base.py",
    "content": "\"\"\"Abstract base class and factory for sandbox operations.\n\nSandboxManager is the abstract interface for sandbox lifecycle management.\nUse get_sandbox_manager() to get the appropriate implementation based on SANDBOX_BACKEND.\n\nIMPORTANT: SandboxManager implementations must NOT interface with the database directly.\nAll database operations should be handled by the caller (SessionManager, Celery tasks, etc.).\n\nArchitecture Note (User-Shared Sandbox Model):\n- One sandbox (container/pod) is shared across all of a user's sessions\n- provision() creates the user's sandbox with shared files/ directory\n- setup_session_workspace() creates per-session workspace within the sandbox\n- cleanup_session_workspace() removes session workspace on session delete\n- terminate() destroys the entire sandbox (all sessions)\n\"\"\"\n\nimport threading\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom collections.abc import Generator\nfrom typing import Any\nfrom uuid import UUID\n\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.sandbox.models import FilesystemEntry\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.server.features.build.sandbox.models import SandboxInfo\nfrom onyx.server.features.build.sandbox.models import SnapshotResult\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# ACPEvent is a union type defined in both local and kubernetes modules\n# Using Any here to avoid circular imports - the actual type checking\n# happens in the implementation modules\nACPEvent = Any\n\n\nclass SandboxManager(ABC):\n    \"\"\"Abstract interface for sandbox operations.\n\n    Defines the contract for sandbox lifecycle management including:\n    - Provisioning and termination (user-level)\n    - Session workspace setup and cleanup (session-level)\n    - Snapshot creation (session-level)\n    - Health checks\n    - Agent communication (session-level)\n    - Filesystem operations (session-level)\n\n    Directory Structure:\n        $SANDBOX_ROOT/\n        ├── files/                     # SHARED - symlink to user's persistent documents\n        └── sessions/\n            ├── $session_id_1/         # Per-session workspace\n            │   ├── outputs/           # Agent output for this session\n            │   │   └── web/           # Next.js app\n            │   ├── venv/              # Python virtual environment\n            │   ├── skills/            # Opencode skills\n            │   ├── AGENTS.md          # Agent instructions\n            │   ├── opencode.json      # LLM config\n            │   └── attachments/\n            └── $session_id_2/\n                └── ...\n\n    IMPORTANT: Implementations must NOT interface with the database directly.\n    All database operations should be handled by the caller.\n\n    Use get_sandbox_manager() to get the appropriate implementation.\n    \"\"\"\n\n    @abstractmethod\n    def provision(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,\n        tenant_id: str,\n        llm_config: LLMProviderConfig,\n    ) -> SandboxInfo:\n        \"\"\"Provision a new sandbox for a user.\n\n        Creates the sandbox container/directory with:\n        - sessions/ directory for per-session workspaces\n\n        NOTE: This does NOT set up session-specific workspaces.\n        Call setup_session_workspace() after provisioning to create a session workspace.\n\n        Args:\n            sandbox_id: Unique identifier for the sandbox\n            user_id: User identifier who owns this sandbox\n            tenant_id: Tenant identifier for multi-tenant isolation\n            llm_config: LLM provider configuration (for default config)\n\n        Returns:\n            SandboxInfo with the provisioned sandbox details\n\n        Raises:\n            RuntimeError: If provisioning fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def terminate(self, sandbox_id: UUID) -> None:\n        \"\"\"Terminate a sandbox and clean up all resources.\n\n        Destroys the entire sandbox including all session workspaces.\n        Use cleanup_session_workspace() to remove individual sessions.\n\n        Args:\n            sandbox_id: The sandbox ID to terminate\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def setup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        llm_config: LLMProviderConfig,\n        nextjs_port: int,\n        file_system_path: str | None = None,\n        snapshot_path: str | None = None,\n        user_name: str | None = None,\n        user_role: str | None = None,\n        user_work_area: str | None = None,\n        user_level: str | None = None,\n        use_demo_data: bool = False,\n        excluded_user_library_paths: list[str] | None = None,\n    ) -> None:\n        \"\"\"Set up a session workspace within an existing sandbox.\n\n        Creates the per-session directory structure:\n        - sessions/$session_id/outputs/ (from snapshot or template)\n        - sessions/$session_id/venv/\n        - sessions/$session_id/skills/\n        - sessions/$session_id/files/ (symlink to demo data or user files)\n        - sessions/$session_id/AGENTS.md\n        - sessions/$session_id/opencode.json\n        - sessions/$session_id/attachments/\n        - sessions/$session_id/org_info/ (if demo data enabled)\n\n        Args:\n            sandbox_id: The sandbox ID (must be provisioned)\n            session_id: The session ID for this workspace\n            llm_config: LLM provider configuration for opencode.json\n            file_system_path: Path to user's knowledge/source files\n            snapshot_path: Optional storage path to restore outputs from\n            user_name: User's name for personalization in AGENTS.md\n            user_role: User's role/title for personalization in AGENTS.md\n            user_work_area: User's work area for demo persona (e.g., \"engineering\")\n            user_level: User's level for demo persona (e.g., \"ic\", \"manager\")\n            use_demo_data: If True, symlink files/ to demo data; else to user files\n            excluded_user_library_paths: List of paths within user_library to exclude\n                from the sandbox (e.g., [\"/data/file.xlsx\"]). Only applies when\n                use_demo_data=False. Files at these paths won't be accessible.\n\n        Raises:\n            RuntimeError: If workspace setup fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def cleanup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        nextjs_port: int | None = None,\n    ) -> None:\n        \"\"\"Clean up a session workspace (on session delete).\n\n        1. Stop the Next.js dev server if running on nextjs_port\n        2. Remove the session directory: sessions/$session_id/\n\n        Does NOT terminate the sandbox - other sessions may still be using it.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to clean up\n            nextjs_port: Optional port where Next.js server is running\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def create_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        tenant_id: str,\n    ) -> SnapshotResult | None:\n        \"\"\"Create a snapshot of a session's outputs and attachments directories.\n\n        Captures session-specific user data:\n        - sessions/$session_id/outputs/ (generated artifacts, web apps)\n        - sessions/$session_id/attachments/ (user uploaded files)\n\n        Does NOT include: venv, skills, AGENTS.md, opencode.json, files symlink\n        (these are regenerated during restore)\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to snapshot\n            tenant_id: Tenant identifier for storage path\n\n        Returns:\n            SnapshotResult with storage path and size, or None if:\n            - Snapshots are disabled for this backend\n            - No outputs directory exists (nothing to snapshot)\n\n        Raises:\n            RuntimeError: If snapshot creation fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def restore_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        snapshot_storage_path: str,\n        tenant_id: str,\n        nextjs_port: int,\n        llm_config: LLMProviderConfig,\n        use_demo_data: bool = False,\n    ) -> None:\n        \"\"\"Restore a session workspace from a snapshot.\n\n        For Kubernetes: Downloads and extracts the snapshot, regenerates config files.\n        For Local: No-op since workspaces persist on disk (no snapshots).\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to restore\n            snapshot_storage_path: Path to the snapshot in storage\n            tenant_id: Tenant identifier for storage access\n            nextjs_port: Port number for the NextJS dev server\n            llm_config: LLM provider configuration for opencode.json\n            use_demo_data: If True, symlink files/ to demo data\n\n        Raises:\n            RuntimeError: If snapshot restoration fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def session_workspace_exists(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> bool:\n        \"\"\"Check if a session's workspace directory exists in the sandbox.\n\n        Used to determine if we need to restore from snapshot.\n        Checks for sessions/$session_id/outputs/ directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to check\n\n        Returns:\n            True if the session workspace exists, False otherwise\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def health_check(self, sandbox_id: UUID, timeout: float = 60.0) -> bool:\n        \"\"\"Check if the sandbox is healthy.\n\n        Args:\n            sandbox_id: The sandbox ID to check\n\n        Returns:\n            True if sandbox is healthy, False otherwise\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def send_message(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        message: str,\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Send a message to the CLI agent and stream typed ACP events.\n\n        The agent runs in the session-specific workspace:\n        sessions/$session_id/\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID (determines workspace directory)\n            message: The message content to send\n\n        Yields:\n            Typed ACP schema event objects\n\n        Raises:\n            RuntimeError: If agent communication fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def list_directory(\n        self, sandbox_id: UUID, session_id: UUID, path: str\n    ) -> list[FilesystemEntry]:\n        \"\"\"List contents of a directory in the session's outputs directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/outputs/\n\n        Returns:\n            List of FilesystemEntry objects sorted by directory first, then name\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a directory\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:\n        \"\"\"Read a file from the session's workspace.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/\n\n        Returns:\n            File contents as bytes\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a file\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def upload_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        filename: str,\n        content: bytes,\n    ) -> str:\n        \"\"\"Upload a file to the session's attachments directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            filename: Sanitized filename\n            content: File content as bytes\n\n        Returns:\n            Relative path where file was saved (e.g., \"attachments/doc.pdf\")\n\n        Raises:\n            RuntimeError: If upload fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def delete_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        path: str,\n    ) -> bool:\n        \"\"\"Delete a file from the session's workspace.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path to the file (e.g., \"attachments/doc.pdf\")\n\n        Returns:\n            True if file was deleted, False if not found\n\n        Raises:\n            ValueError: If path traversal attempted\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get_upload_stats(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> tuple[int, int]:\n        \"\"\"Get current file count and total size for a session's attachments.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n\n        Returns:\n            Tuple of (file_count, total_size_bytes)\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:\n        \"\"\"Get the webapp URL for a session's Next.js server.\n\n        Returns the appropriate URL based on the backend:\n        - Local: Returns localhost URL with port\n        - Kubernetes: Returns internal cluster service URL\n\n        Args:\n            sandbox_id: The sandbox ID\n            port: The session's allocated Next.js port\n\n        Returns:\n            URL to access the webapp\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def generate_pptx_preview(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        pptx_path: str,\n        cache_dir: str,\n    ) -> tuple[list[str], bool]:\n        \"\"\"Convert PPTX to slide JPEG images for preview, with caching.\n\n        Checks if cache_dir already has slides. If the PPTX is newer than the\n        cached images (or no cache exists), runs soffice -> pdftoppm pipeline.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            pptx_path: Relative path to the PPTX file within the session workspace\n            cache_dir: Relative path for the cache directory\n                       (e.g., \"outputs/.pptx-preview/abc123\")\n\n        Returns:\n            Tuple of (slide_paths, cached) where slide_paths is a list of\n            relative paths to slide JPEG images (within session workspace)\n            and cached indicates whether the result was served from cache.\n\n        Raises:\n            ValueError: If file not found or conversion fails\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def sync_files(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,\n        tenant_id: str,\n        source: str | None = None,\n    ) -> bool:\n        \"\"\"Sync files from S3 to the sandbox's /workspace/files directory.\n\n        For Kubernetes backend: Executes `s5cmd sync` in the file-sync sidecar container.\n        For Local backend: No-op since files are directly accessible via symlink.\n\n        This is idempotent - only downloads changed files. File visibility in\n        sessions is controlled via filtered symlinks in setup_session_workspace(),\n        not at the sync level.\n\n        Args:\n            sandbox_id: The sandbox UUID\n            user_id: The user ID (for S3 path construction)\n            tenant_id: The tenant ID (for S3 path construction)\n            source: Optional source type (e.g., \"gmail\", \"google_drive\").\n                    If None, syncs all sources. If specified, only syncs\n                    that source's directory.\n\n        Returns:\n            True if sync was successful, False otherwise.\n        \"\"\"\n        ...\n\n    def ensure_nextjs_running(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        nextjs_port: int,\n    ) -> None:\n        \"\"\"Ensure the Next.js server is running for a session.\n\n        Default is a no-op — only meaningful for local backends that manage\n        process lifecycles directly (e.g., LocalSandboxManager).\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            nextjs_port: The port the Next.js server should be listening on\n        \"\"\"\n\n\n# Singleton instance cache for the factory\n_sandbox_manager_instance: SandboxManager | None = None\n_sandbox_manager_lock = threading.Lock()\n\n\ndef get_sandbox_manager() -> SandboxManager:\n    \"\"\"Get the appropriate SandboxManager implementation based on SANDBOX_BACKEND.\n\n    Returns:\n        SandboxManager instance:\n        - LocalSandboxManager for local backend (development)\n        - KubernetesSandboxManager for kubernetes backend (production)\n    \"\"\"\n    global _sandbox_manager_instance\n\n    if _sandbox_manager_instance is None:\n        with _sandbox_manager_lock:\n            if _sandbox_manager_instance is None:\n                if SANDBOX_BACKEND == SandboxBackend.LOCAL:\n                    from onyx.server.features.build.sandbox.local.local_sandbox_manager import (\n                        LocalSandboxManager,\n                    )\n\n                    _sandbox_manager_instance = LocalSandboxManager()\n                elif SANDBOX_BACKEND == SandboxBackend.KUBERNETES:\n                    from onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (\n                        KubernetesSandboxManager,\n                    )\n\n                    _sandbox_manager_instance = KubernetesSandboxManager()\n                    logger.info(\"Using KubernetesSandboxManager for sandbox operations\")\n                else:\n                    raise ValueError(f\"Unknown sandbox backend: {SANDBOX_BACKEND}\")\n\n    return _sandbox_manager_instance\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/__init__.py",
    "content": "\"\"\"Kubernetes-based sandbox implementation.\n\nThis module provides the KubernetesSandboxManager for production deployments\nthat run sandboxes as isolated Kubernetes pods.\n\nInternal implementation details (acp_http_client) are in the internal/\nsubdirectory and should not be used directly.\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (\n    KubernetesSandboxManager,\n)\n\n__all__ = [\n    \"KubernetesSandboxManager\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile",
    "content": "# Sandbox Container Image\n#\n# User-shared sandbox model:\n# - One pod per user, shared across all user's sessions\n# - Session workspaces created via kubectl exec (setup_session_workspace)\n# - OpenCode agent runs via kubectl exec when needed\n#\n# Directory structure (created by init container + session setup):\n#   /workspace/\n#   ├── demo_data/       # Demo data (baked into image, for demo sessions)\n#   ├── files/           # User's knowledge files (synced from S3)\n#   ├── skills/          # Agent skills (baked into image, copied per-session)\n#   ├── templates/       # Output templates (baked into image)\n#   └── sessions/        # Per-session workspaces (created via exec)\n#       └── $session_id/\n#           ├── files/   # Symlink to /workspace/demo_data or /workspace/files\n#           ├── outputs/\n#           ├── AGENTS.md\n#           └── opencode.json\n\nFROM node:20-slim\n\n# Install system dependencies\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    python3 \\\n    python3-pip \\\n    python3-venv \\\n    curl \\\n    git \\\n    procps \\\n    unzip \\\n    \\\n    libreoffice-core \\\n    libreoffice-common \\\n    libreoffice-impress \\\n    libreoffice-draw \\\n    poppler-utils \\\n    gcc \\\n    libc6-dev \\\n    fontconfig \\\n    fonts-dejavu-core \\\n    fonts-liberation \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Create non-root user (matches pod securityContext)\n# Handle existing user/group with UID/GID 1000 in base image\nRUN EXISTING_USER=$(id -nu 1000 2>/dev/null || echo \"\"); \\\n    EXISTING_GROUP=$(getent group 1000 | cut -d: -f1 2>/dev/null || echo \"\"); \\\n    if [ -n \"$EXISTING_GROUP\" ] && [ \"$EXISTING_GROUP\" != \"sandbox\" ]; then \\\n    groupmod -n sandbox $EXISTING_GROUP; \\\n    elif [ -z \"$EXISTING_GROUP\" ]; then \\\n    groupadd -g 1000 sandbox; \\\n    fi; \\\n    if [ -n \"$EXISTING_USER\" ] && [ \"$EXISTING_USER\" != \"sandbox\" ]; then \\\n    usermod -l sandbox -g sandbox $EXISTING_USER; \\\n    usermod -d /home/sandbox -m sandbox; \\\n    usermod -s /bin/bash sandbox; \\\n    elif [ -z \"$EXISTING_USER\" ]; then \\\n    useradd -u 1000 -g sandbox -m -s /bin/bash sandbox; \\\n    fi\n\n# Create workspace directories\nRUN mkdir -p workspace/sessions /workspace/files /workspace/templates /workspace/demo_data && \\\n    chown -R sandbox:sandbox /workspace\n\n# Copy outputs template (web app scaffold, without node_modules)\nCOPY --exclude=.next --exclude=node_modules templates/outputs /workspace/templates/outputs\nRUN chown -R sandbox:sandbox /workspace/templates\n\n# Copy and extract demo data from zip file\n# Zip contains demo_data/ as root folder\nCOPY demo_data.zip /tmp/demo_data.zip\nRUN unzip -q /tmp/demo_data.zip -d /workspace && \\\n    rm /tmp/demo_data.zip && \\\n    chown -R sandbox:sandbox /workspace/demo_data\n\n# Copy and install Python requirements into a venv\nCOPY initial-requirements.txt /tmp/initial-requirements.txt\nRUN python3 -m venv /workspace/.venv && \\\n    /workspace/.venv/bin/pip install --upgrade pip && \\\n    /workspace/.venv/bin/pip install -r /tmp/initial-requirements.txt && \\\n    rm /tmp/initial-requirements.txt && \\\n    chown -R sandbox:sandbox /workspace/.venv\n\n# Add venv to PATH so python/pip use it by default\nENV PATH=\"/workspace/.venv/bin:${PATH}\"\n\n# Install pptxgenjs globally for creating presentations from scratch\nRUN npm install -g pptxgenjs\n\n# Install opencode CLI as sandbox user so it goes to their home directory\nUSER sandbox\nRUN curl -fsSL https://opencode.ai/install | bash\nUSER root\n\n# Add opencode to PATH (installs to ~/.opencode/bin)\nENV PATH=\"/home/sandbox/.opencode/bin:${PATH}\"\n\n# Copy agent skills (symlinked into each session's .opencode/skills/ at setup time)\nCOPY --exclude=__pycache__ skills/ /workspace/skills/\n\n# Set ownership\nRUN chown -R sandbox:sandbox /workspace\n\n# Copy scripts\nCOPY generate_agents_md.py /usr/local/bin/generate_agents_md.py\nRUN chmod +x /usr/local/bin/generate_agents_md.py\n\n# Switch to non-root user\nUSER sandbox\nWORKDIR /workspace\n\n# Expose ports\n# - 3000: Next.js dev server (started per-session if needed)\n# - 8081: OpenCode ACP HTTP server (started via exec)\nEXPOSE 3000 8081\n\n# Keep container alive - all work done via kubectl exec\nCMD [\"sleep\", \"infinity\"]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/README.md",
    "content": "# Sandbox Container Image\n\nThis directory contains the Dockerfile and resources for building the Onyx Craft sandbox container image.\n\n## Directory Structure\n\n```\ndocker/\n├── Dockerfile              # Main container image definition\n├── demo_data.zip           # Demo data (extracted to /workspace/demo_data)\n├── skills/                 # Agent skills (image-generation, pptx, etc.)\n├── templates/\n│   └── outputs/            # Web app scaffold template (Next.js)\n├── initial-requirements.txt # Python packages pre-installed in sandbox\n├── generate_agents_md.py   # Script to generate AGENTS.md for sessions\n└── README.md               # This file\n```\n\n## Building the Image\n\nThe sandbox image must be built for **amd64** architecture since our Kubernetes cluster runs on x86_64 nodes.\n\n### Build for amd64 only (fastest)\n\n```bash\ncd backend/onyx/server/features/build/sandbox/kubernetes/docker\ndocker build --platform linux/amd64 -t onyxdotapp/sandbox:v0.1.x .\ndocker push onyxdotapp/sandbox:v0.1.x\n```\n\n### Build multi-arch (recommended for flexibility)\n\n```bash\ndocker buildx build --platform linux/amd64,linux/arm64 \\\n  -t onyxdotapp/sandbox:v0.1.x \\\n  --push .\n```\n\n### Update the `latest` tag\n\nAfter pushing a versioned tag, update `latest`:\n\n```bash\ndocker tag onyxdotapp/sandbox:v0.1.x onyxdotapp/sandbox:latest\ndocker push onyxdotapp/sandbox:latest\n```\n\nOr with buildx:\n\n```bash\ndocker buildx build --platform linux/amd64,linux/arm64 \\\n  -t onyxdotapp/sandbox:v0.1.x \\\n  -t onyxdotapp/sandbox:latest \\\n  --push .\n```\n\n## Deploying a New Version\n\n1. **Build and push** the new image (see above)\n\n2. **Update the ConfigMap** in `cloud-deployment-yamls/danswer/configmap/env-configmap.yaml`:\n   ```yaml\n   SANDBOX_CONTAINER_IMAGE: \"onyxdotapp/sandbox:v0.1.x\"\n   ```\n\n3. **Apply the ConfigMap**:\n   ```bash\n   kubectl apply -f configmap/env-configmap.yaml\n   ```\n\n4. **Restart the API server** to pick up the new config:\n   ```bash\n   kubectl rollout restart deployment/api-server -n danswer\n   ```\n\n5. **Delete existing sandbox pods** (they will be recreated with the new image):\n   ```bash\n   kubectl delete pods -n onyx-sandboxes -l app.kubernetes.io/component=sandbox\n   ```\n\n## What's Baked Into the Image\n\n- **Base**: `node:20-slim` (Debian-based)\n- **Demo data**: `/workspace/demo_data/` - sample files for demo sessions\n- **Skills**: `/workspace/skills/` - agent skills (image-generation, pptx, etc.)\n- **Templates**: `/workspace/templates/outputs/` - Next.js web app scaffold\n- **Python venv**: `/workspace/.venv/` with packages from `initial-requirements.txt`\n- **OpenCode CLI**: Installed in `/home/sandbox/.opencode/bin/`\n\n## Runtime Directory Structure\n\nWhen a session is created, the following structure is set up in the pod:\n\n```\n/workspace/\n├── demo_data/              # Baked into image\n├── files/                  # Mounted volume, synced from S3\n├── skills/                 # Baked into image (agent skills)\n├── templates/              # Baked into image\n└── sessions/\n    └── $session_id/\n        ├── .opencode/\n        │   └── skills/     # Symlink to /workspace/skills\n        ├── files/          # Symlink to /workspace/demo_data or /workspace/files\n        ├── outputs/        # Copied from templates, contains web app\n        ├── attachments/    # User-uploaded files\n        ├── org_info/       # Demo persona info (if demo mode)\n        ├── AGENTS.md       # Instructions for the AI agent\n        └── opencode.json   # OpenCode configuration\n```\n\n## Troubleshooting\n\n### Verify image exists on Docker Hub\n\n```bash\ncurl -s \"https://hub.docker.com/v2/repositories/onyxdotapp/sandbox/tags\" | jq '.results[].name'\n```\n\n### Check what image a pod is using\n\n```bash\nkubectl get pod <pod-name> -n onyx-sandboxes -o jsonpath='{.spec.containers[?(@.name==\"sandbox\")].image}'\n```\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/generate_agents_md.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Generate AGENTS.md by scanning the files directory and populating the template.\n\nThis script runs during session setup, AFTER files have been synced from S3\nand the files symlink has been created. It reads an existing AGENTS.md (which\ncontains the {{KNOWLEDGE_SOURCES_SECTION}} placeholder), replaces the\nplaceholder by scanning the knowledge source directory, and writes it back.\n\nUsage:\n    python3 generate_agents_md.py <agents_md_path> <files_path>\n\nArguments:\n    agents_md_path: Path to the AGENTS.md file to update in place\n    files_path: Path to the files directory to scan for knowledge sources\n\"\"\"\n\nimport sys\nfrom pathlib import Path\n\n# Type alias for connector info entries\nConnectorInfoEntry = dict[str, str | int]\n\n# Connector information for generating knowledge sources section\n# Keys are normalized (lowercase, underscores) directory names\n# Each entry has: summary (with optional {subdirs}), file_pattern, scan_depth\n# NOTE: This is duplicated from agent_instructions.py to avoid circular imports\nCONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {\n    \"google_drive\": {\n        \"summary\": \"Documents and files from Google Drive. This may contain information about a user and work they have done.\",\n        \"file_pattern\": \"`FILE_NAME.json`\",\n        \"scan_depth\": 0,\n    },\n    \"gmail\": {\n        \"summary\": \"Email conversations and threads\",\n        \"file_pattern\": \"`FILE_NAME.json`\",\n        \"scan_depth\": 0,\n    },\n    \"linear\": {\n        \"summary\": \"Engineering tickets from teams: {subdirs}\",\n        \"file_pattern\": \"`[TEAM]/[TICKET_ID]_TICKET_TITLE.json`\",\n        \"scan_depth\": 2,\n    },\n    \"slack\": {\n        \"summary\": \"Team messages from channels: {subdirs}\",\n        \"file_pattern\": \"`[CHANNEL]/[AUTHOR]_in_[CHANNEL]__[MSG].json`\",\n        \"scan_depth\": 1,\n    },\n    \"github\": {\n        \"summary\": \"Pull requests and code from: {subdirs}\",\n        \"file_pattern\": \"`[ORG]/[REPO]/pull_requests/[PR_NUMBER]__[PR_TITLE].json`\",\n        \"scan_depth\": 2,\n    },\n    \"fireflies\": {\n        \"summary\": \"Meeting transcripts from: {subdirs}\",\n        \"file_pattern\": \"`[YYYY-MM]/CALL_TITLE.json`\",\n        \"scan_depth\": 1,\n    },\n    \"hubspot\": {\n        \"summary\": \"CRM data including: {subdirs}\",\n        \"file_pattern\": \"`[TYPE]/[RECORD_NAME].json`\",\n        \"scan_depth\": 1,\n    },\n    \"notion\": {\n        \"summary\": \"Documentation and notes: {subdirs}\",\n        \"file_pattern\": \"`PAGE_TITLE.json`\",\n        \"scan_depth\": 1,\n    },\n    \"user_library\": {\n        \"summary\": \"User-uploaded files (spreadsheets, documents, presentations, etc.)\",\n        \"file_pattern\": \"Any file format\",\n        \"scan_depth\": 1,\n    },\n}\nDEFAULT_SCAN_DEPTH = 1\n\n\ndef _normalize_connector_name(name: str) -> str:\n    \"\"\"Normalize a connector directory name for lookup.\"\"\"\n    return name.lower().replace(\" \", \"_\").replace(\"-\", \"_\")\n\n\ndef _scan_directory_to_depth(\n    directory: Path, current_depth: int, max_depth: int, indent: str = \"  \"\n) -> list[str]:\n    \"\"\"Recursively scan directory up to max_depth levels.\"\"\"\n    if current_depth >= max_depth:\n        return []\n\n    lines: list[str] = []\n    try:\n        subdirs = sorted(\n            d for d in directory.iterdir() if d.is_dir() and not d.name.startswith(\".\")\n        )\n\n        for subdir in subdirs[:10]:  # Limit to 10 per level\n            lines.append(f\"{indent}- {subdir.name}/\")\n\n            # Recurse if we haven't hit max depth\n            if current_depth + 1 < max_depth:\n                nested = _scan_directory_to_depth(\n                    subdir, current_depth + 1, max_depth, indent + \"  \"\n                )\n                lines.extend(nested)\n\n        if len(subdirs) > 10:\n            lines.append(f\"{indent}- ... and {len(subdirs) - 10} more\")\n    except Exception:\n        pass\n\n    return lines\n\n\ndef build_knowledge_sources_section(files_path: Path) -> str:\n    \"\"\"Build combined knowledge sources section with summary, structure, and file patterns.\n\n    This creates a single section per connector that includes:\n    - What kind of data it contains (with actual subdirectory names)\n    - The directory structure\n    - The file naming pattern\n\n    Args:\n        files_path: Path to the files directory\n\n    Returns:\n        Formatted knowledge sources section\n    \"\"\"\n    if not files_path.exists():\n        return \"No knowledge sources available.\"\n\n    sections: list[str] = []\n    try:\n        for item in sorted(files_path.iterdir()):\n            if not item.is_dir() or item.name.startswith(\".\"):\n                continue\n\n            normalized = _normalize_connector_name(item.name)\n            info = CONNECTOR_INFO.get(normalized, {})\n\n            # Get subdirectory names\n            subdirs: list[str] = []\n            try:\n                subdirs = sorted(\n                    d.name\n                    for d in item.iterdir()\n                    if d.is_dir() and not d.name.startswith(\".\")\n                )[:5]\n            except Exception:\n                pass\n\n            # Build summary with subdirs\n            summary_template = str(info.get(\"summary\", f\"Data from {item.name}\"))\n            if \"{subdirs}\" in summary_template and subdirs:\n                subdir_str = \", \".join(subdirs)\n                if len(subdirs) == 5:\n                    subdir_str += \", ...\"\n                summary = summary_template.format(subdirs=subdir_str)\n            elif \"{subdirs}\" in summary_template:\n                summary = summary_template.replace(\": {subdirs}\", \"\").replace(\n                    \" {subdirs}\", \"\"\n                )\n            else:\n                summary = summary_template\n\n            # Build connector section\n            file_pattern = str(info.get(\"file_pattern\", \"\"))\n            scan_depth = int(info.get(\"scan_depth\", DEFAULT_SCAN_DEPTH))\n\n            lines = [f\"### {item.name}/\"]\n            lines.append(f\"{summary}.\\n\")\n            # Add directory structure if depth > 0\n            if scan_depth > 0:\n                lines.append(\"Directory structure:\\n\")\n                nested = _scan_directory_to_depth(item, 0, scan_depth, \"\")\n                if nested:\n                    lines.append(\"\")\n                    lines.extend(nested)\n\n            lines.append(f\"\\nFile format: {file_pattern}\")\n\n            sections.append(\"\\n\".join(lines))\n    except Exception as e:\n        print(\n            f\"Warning: Error building knowledge sources section: {e}\", file=sys.stderr\n        )\n        return \"Error scanning knowledge sources.\"\n\n    if not sections:\n        return \"No knowledge sources available.\"\n\n    return \"\\n\\n\".join(sections)\n\n\ndef main() -> None:\n    \"\"\"Main entry point for container startup script.\n\n    Reads an existing AGENTS.md, replaces the {{KNOWLEDGE_SOURCES_SECTION}}\n    placeholder by scanning the files directory, and writes it back.\n\n    Usage:\n        python3 generate_agents_md.py <agents_md_path> <files_path>\n    \"\"\"\n    if len(sys.argv) != 3:\n        print(\n            f\"Usage: {sys.argv[0]} <agents_md_path> <files_path>\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    agents_md_path = Path(sys.argv[1])\n    files_path = Path(sys.argv[2])\n\n    if not agents_md_path.exists():\n        print(f\"Error: {agents_md_path} not found\", file=sys.stderr)\n        sys.exit(1)\n\n    template = agents_md_path.read_text()\n\n    # Resolve symlinks (handles both direct symlinks and dirs containing symlinks)\n    resolved_files_path = files_path.resolve()\n\n    knowledge_sources_section = build_knowledge_sources_section(resolved_files_path)\n\n    # Replace placeholder and write back\n    content = template.replace(\n        \"{{KNOWLEDGE_SOURCES_SECTION}}\", knowledge_sources_section\n    )\n    agents_md_path.write_text(content)\n    print(f\"Populated knowledge sources in {agents_md_path}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/initial-requirements.txt",
    "content": "defusedxml>=0.7.1\ngoogle-genai>=1.0.0\nlxml>=5.0.0\nmarkitdown>=0.1.2\nmatplotlib==3.9.1\nmatplotlib-inline>=0.1.7\nmatplotlib-venn>=1.1.2\nnumpy==1.26.4\nopencv-python>=4.11.0.86\nopenpyxl>=3.1.5\npandas==2.2.2\npdfplumber>=0.11.7\nPillow>=10.0.0\npydantic>=2.11.9\npython-pptx>=1.0.2\nscikit-image>=0.25.2\nscikit-learn>=1.7.2\nscipy>=1.16.2\nseaborn>=0.13.2\nxgboost>=3.0.5"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/run-test.sh",
    "content": "#!/bin/bash\n# Run Kubernetes sandbox integration tests\n#\n# This script:\n# 1. Builds the onyx-backend Docker image\n# 2. Loads it into the kind cluster\n# 3. Deletes/recreates the test pod\n# 4. Waits for the pod to be ready\n# 5. Runs the pytest command inside the pod\n#\n# Usage:\n#   ./run-test.sh [test_name]\n#\n# Examples:\n#   ./run-test.sh                                    # Run all tests\n#   ./run-test.sh test_kubernetes_sandbox_provision  # Run specific test\n\nset -e\n\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nPROJECT_ROOT=\"$(cd \"$SCRIPT_DIR/../../../../../../../..\" && pwd)\"\nNAMESPACE=\"onyx-sandboxes\"\nPOD_NAME=\"sandbox-test\"\nIMAGE_NAME=\"onyxdotapp/onyx-backend:latest\"\nTEST_FILE=\"onyx/server/features/build/sandbox/kubernetes/test_kubernetes_sandbox.py\"\nENV_FILE=\"$PROJECT_ROOT/.vscode/.env\"\n\nORIGINAL_TEST_FILE=\"$PROJECT_ROOT/backend/tests/external_dependency_unit/craft/test_kubernetes_sandbox.py\"\ncp \"$ORIGINAL_TEST_FILE\" \"$PROJECT_ROOT/backend/$TEST_FILE\"\n\n# Optional: specific test to run\nTEST_NAME=\"${1:-}\"\n\n# Build env var arguments from .vscode/.env file for passing to the container\nENV_VARS=()\nif [ -f \"$ENV_FILE\" ]; then\n    echo \"=== Loading environment variables from .vscode/.env ===\"\n    while IFS= read -r line || [ -n \"$line\" ]; do\n        # Skip empty lines and comments\n        [[ -z \"$line\" || \"$line\" =~ ^[[:space:]]*# ]] && continue\n        # Skip lines without =\n        [[ \"$line\" != *\"=\"* ]] && continue\n        # Add to env vars array\n        ENV_VARS+=(\"$line\")\n    done < \"$ENV_FILE\"\n    echo \"Loaded ${#ENV_VARS[@]} environment variables\"\nelse\n    echo \"Warning: .vscode/.env not found, running without additional env vars\"\nfi\n\necho \"=== Building onyx-backend Docker image ===\"\ncd \"$PROJECT_ROOT/backend\"\ndocker build -t \"$IMAGE_NAME\" -f Dockerfile .\n\nrm \"$PROJECT_ROOT/backend/$TEST_FILE\"\n\necho \"=== Loading image into kind cluster ===\"\nkind load docker-image \"$IMAGE_NAME\" --name onyx 2>/dev/null || \\\n    kind load docker-image \"$IMAGE_NAME\" 2>/dev/null || \\\n    echo \"Warning: Could not load into kind. If using minikube, run: minikube image load $IMAGE_NAME\"\n\necho \"=== Deleting existing test pod (if any) ===\"\nkubectl delete pod \"$POD_NAME\" -n \"$NAMESPACE\" --ignore-not-found=true\n\necho \"=== Creating test pod ===\"\nkubectl apply -f \"$SCRIPT_DIR/test-job.yaml\"\n\necho \"=== Waiting for pod to be ready ===\"\nkubectl wait --for=condition=Ready pod/\"$POD_NAME\" -n \"$NAMESPACE\" --timeout=120s\n\necho \"=== Running tests ===\"\nif [ -n \"$TEST_NAME\" ]; then\n    kubectl exec -it \"$POD_NAME\" -n \"$NAMESPACE\" -- \\\n        env \"${ENV_VARS[@]}\" pytest \"$TEST_FILE::$TEST_NAME\" -v -s\nelse\n    kubectl exec -it \"$POD_NAME\" -n \"$NAMESPACE\" -- \\\n        env \"${ENV_VARS[@]}\" pytest \"$TEST_FILE\" -v -s\nfi\n\necho \"=== Tests complete ===\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/image-generation/SKILL.md",
    "content": "---\nname: image-generation\ndescription: Generate images using nano banana.\n---\n\n# Image Generation Skill\n\nGenerate images using Nano Banana (Google Gemini Image API). Supports text-to-image and image-to-image generation with configurable options.\n\n## Setup\n\n### Dependencies\n\n```bash\npip install google-genai Pillow\n```\n\n### Environment Variable\n\nSet your API key:\n\n```bash\nexport GEMINI_API_KEY=\"your_api_key_here\"\n```\n\n## Usage\n\n### Basic Text-to-Image\n\n```bash\npython scripts/generate.py --prompt \"A futuristic city at sunset with neon lights\" --output city.png\n```\n\n### With Aspect Ratio\n\n```bash\npython scripts/generate.py \\\n  --prompt \"Mountain landscape with a lake\" \\\n  --output landscape.png \\\n  --aspect-ratio 16:9\n```\n\n### Image-to-Image Mode\n\nUse a reference image to guide generation:\n\n```bash\npython scripts/generate.py \\\n  --prompt \"Make it look like a watercolor painting\" \\\n  --input-image original.png \\\n  --output watercolor.png\n```\n\n### Generate Multiple Images\n\n```bash\npython scripts/generate.py \\\n  --prompt \"Abstract colorful art\" \\\n  --output art.png \\\n  --num-images 3\n```\n\n## Arguments\n\n| Argument | Short | Required | Default | Description |\n|----------|-------|----------|---------|-------------|\n| `--prompt` | `-p` | Yes | — | Text prompt describing the desired image |\n| `--output` | `-o` | No | `output.png` | Output path for the generated image |\n| `--model` | `-m` | No | `gemini-2.0-flash-preview-image-generation` | Model to use for generation |\n| `--input-image` | `-i` | No | — | Reference image for image-to-image mode |\n| `--aspect-ratio` | `-a` | No | — | Aspect ratio: `1:1`, `16:9`, `9:16`, `4:3`, `3:4` |\n| `--num-images` | `-n` | No | `1` | Number of images to generate |\n\n## Available Models\n\n- `gemini-2.0-flash-preview-image-generation` - Fast, optimized for speed and lower latency\n- `imagen-3.0-generate-002` - High quality image generation\n\n## Programmatic Usage\n\nImport the function directly in Python:\n\n```python\nfrom scripts.generate import generate_image\n\npaths = generate_image(\n    prompt=\"A serene mountain lake under moonlight\",\n    output_path=\"./outputs/lake.png\",\n    aspect_ratio=\"16:9\",\n    num_images=2,\n)\n```\n\n## Tips\n\n- **Detailed prompts work better**: Instead of \"a cat\", try \"a fluffy orange tabby cat sitting on a windowsill, soft morning light, photorealistic\"\n- **Specify style**: Include style keywords like \"digital art\", \"oil painting\", \"photorealistic\", \"anime style\"\n- **Use aspect ratios**: Match the aspect ratio to your intended use (16:9 for landscapes, 9:16 for portraits/mobile)\n- **Image-to-image**: Great for style transfer, variations, or guided modifications of existing images\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/image-generation/scripts/generate.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nImage generation script using Nano Banana (Google Gemini Image API).\n\nSupports text-to-image and image-to-image generation with configurable options.\n\"\"\"\n\nimport argparse\nimport base64\nimport os\nimport sys\nfrom io import BytesIO\nfrom pathlib import Path\n\nfrom PIL import Image\n\n\ndef load_image_as_base64(image_path: str) -> tuple[str, str]:\n    \"\"\"Load an image file and return base64 data and mime type.\"\"\"\n    path = Path(image_path)\n    if not path.exists():\n        raise FileNotFoundError(f\"Image not found: {image_path}\")\n\n    # Determine mime type from extension\n    ext = path.suffix.lower()\n    mime_types = {\n        \".png\": \"image/png\",\n        \".jpg\": \"image/jpeg\",\n        \".jpeg\": \"image/jpeg\",\n        \".gif\": \"image/gif\",\n        \".webp\": \"image/webp\",\n    }\n    mime_type = mime_types.get(ext, \"image/png\")\n\n    with open(image_path, \"rb\") as f:\n        data = base64.b64encode(f.read()).decode(\"utf-8\")\n\n    return data, mime_type\n\n\ndef generate_image(\n    prompt: str,\n    output_path: str,\n    model: str = \"gemini-3-pro-image-preview\",\n    input_image: str | None = None,\n    aspect_ratio: str | None = None,  # noqa: ARG001\n    num_images: int = 1,\n) -> list[str]:\n    \"\"\"\n    Generate image(s) using Google Gemini / Nano Banana API.\n\n    Args:\n        prompt: Text description for image generation.\n        output_path: Path to save the generated image(s).\n        model: Model ID to use for generation.\n        input_image: Optional path to reference image for image-to-image mode.\n        aspect_ratio: Aspect ratio (e.g., \"1:1\", \"16:9\", \"9:16\", \"4:3\", \"3:4\").\n        num_images: Number of images to generate.\n\n    Returns:\n        List of paths to saved images.\n    \"\"\"\n    api_key = os.environ.get(\"GEMINI_API_KEY\") or os.environ.get(\"GENAI_API_KEY\")\n    if not api_key:\n        raise ValueError(\n            \"API key not found. Set GEMINI_API_KEY or GENAI_API_KEY environment variable.\"\n        )\n\n    # lazy importing since very heavy libs\n    from google import genai\n    from google.genai import types\n\n    client = genai.Client(api_key=api_key)\n\n    # Build content parts\n    parts: list[types.Part] = []\n\n    # Add reference image if provided (image-to-image mode)\n    if input_image:\n        img_data, mime_type = load_image_as_base64(input_image)\n        parts.append(\n            types.Part.from_bytes(\n                data=base64.b64decode(img_data),\n                mime_type=mime_type,\n            )\n        )\n\n    # Add text prompt\n    parts.append(types.Part.from_text(text=prompt))\n\n    # Build generation config\n    generate_config = types.GenerateContentConfig(\n        response_modalities=[\"TEXT\", \"IMAGE\"],\n    )\n\n    saved_paths: list[str] = []\n    output_dir = Path(output_path).parent\n    output_dir.mkdir(parents=True, exist_ok=True)\n\n    base_name = Path(output_path).stem\n    extension = Path(output_path).suffix or \".png\"\n\n    for i in range(num_images):\n        response = client.models.generate_content(\n            model=model,\n            contents=types.Content(parts=parts),\n            config=generate_config,\n        )\n\n        # Validate response\n        if not response.candidates:\n            raise ValueError(\"No candidates returned from the API\")\n\n        candidate = response.candidates[0]\n        if not candidate.content or not candidate.content.parts:\n            raise ValueError(\"No content parts returned from the API\")\n\n        # Process response parts\n        image_count = 0\n        for part in candidate.content.parts:\n            if part.inline_data is not None and part.inline_data.data is not None:\n                # Extract and save the image\n                image_data = part.inline_data.data\n                image = Image.open(BytesIO(image_data))\n\n                # Generate output filename\n                if num_images == 1 and image_count == 0:\n                    save_path = output_path\n                else:\n                    save_path = str(\n                        output_dir / f\"{base_name}_{i + 1}_{image_count + 1}{extension}\"\n                    )\n\n                image.save(save_path)\n                saved_paths.append(save_path)\n                print(f\"Saved: {save_path}\")\n                image_count += 1\n            elif part.text:\n                # Print any text response from the model\n                print(f\"Model response: {part.text}\")\n\n    return saved_paths\n\n\ndef main() -> None:\n    \"\"\"Main entry point for CLI usage.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Generate images using Nano Banana (Google Gemini Image API).\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Basic text-to-image generation\n  python generate.py --prompt \"A futuristic city at sunset\" --output city.png\n\n  # Generate with specific aspect ratio\n  python generate.py --prompt \"Mountain landscape\" --output landscape.png --aspect-ratio 16:9\n\n  # Image-to-image mode (use reference image)\n  python generate.py --prompt \"Make it more colorful\" --input-image ref.png --output colorful.png\n\n  # Generate multiple images\n  python generate.py --prompt \"Abstract art\" --output art.png --num-images 3\n\"\"\",\n    )\n\n    parser.add_argument(\n        \"--prompt\",\n        \"-p\",\n        type=str,\n        required=True,\n        help=\"Text prompt describing the desired image.\",\n    )\n    parser.add_argument(\n        \"--output\",\n        \"-o\",\n        type=str,\n        default=\"output.png\",\n        help=\"Output path for the generated image (default: output.png).\",\n    )\n    parser.add_argument(\n        \"--model\",\n        \"-m\",\n        type=str,\n        default=\"gemini-3-pro-image-preview\",\n        help=\"Model to use (default: gemini-3-pro-image-preview).\",\n    )\n    parser.add_argument(\n        \"--input-image\",\n        \"-i\",\n        type=str,\n        help=\"Path to reference image for image-to-image generation.\",\n    )\n    parser.add_argument(\n        \"--aspect-ratio\",\n        \"-a\",\n        type=str,\n        choices=[\"1:1\", \"16:9\", \"9:16\", \"4:3\", \"3:4\"],\n        help=\"Aspect ratio for the generated image.\",\n    )\n    parser.add_argument(\n        \"--num-images\",\n        \"-n\",\n        type=int,\n        default=1,\n        help=\"Number of images to generate (default: 1).\",\n    )\n\n    args = parser.parse_args()\n\n    try:\n        saved_paths = generate_image(\n            prompt=args.prompt,\n            output_path=args.output,\n            model=args.model,\n            input_image=args.input_image,\n            aspect_ratio=args.aspect_ratio,\n            num_images=args.num_images,\n        )\n\n        print(f\"\\nSuccessfully generated {len(saved_paths)} image(s):\")\n        for path in saved_paths:\n            print(f\"  - {path}\")\n\n    except Exception as e:\n        print(f\"Error: {e}\", file=sys.stderr)\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/SKILL.md",
    "content": "---\nname: pptx\ndescription: \"Use this skill any time a .pptx file is involved in any way — as input, output, or both. This includes: creating slide decks, pitch decks, or presentations; reading, parsing, or extracting text from any .pptx file (even if the extracted content will be used elsewhere, like in an email or summary); editing, modifying, or updating existing presentations; combining or splitting slide files; working with templates, layouts, speaker notes, or comments. Trigger whenever the user mentions \\\"deck,\\\" \\\"slides,\\\" \\\"presentation,\\\" or references a .pptx filename, regardless of what they plan to do with the content afterward. If a .pptx file needs to be opened, created, or touched, use this skill.\"\nlicense: Proprietary. LICENSE.txt has complete terms\n---\n\n# PPTX Skill\n\n> **Path convention**: All commands run from the **session workspace** (your working directory). Never `cd` into the skill directory. Prefix all skill scripts with `.opencode/skills/pptx/`. All generated files (unpacked dirs, output presentations, thumbnails, PDFs, images) go in `outputs/`.\n\n## Quick Reference\n\n| Task | Guide |\n|------|-------|\n| Read/analyze content | `python -m markitdown presentation.pptx` |\n| Edit or create from template | Read [editing.md](editing.md) |\n| Create from scratch | Read [pptxgenjs.md](pptxgenjs.md) |\n\n---\n\n## Reading Content\n\n```bash\n# Text extraction\npython -m markitdown presentation.pptx\n\n# Visual overview\npython .opencode/skills/pptx/scripts/thumbnail.py presentation.pptx\n\n# Raw XML\npython .opencode/skills/pptx/scripts/office/unpack.py presentation.pptx outputs/unpacked/\n```\n\n---\n\n## Editing Workflow\n\n**Read [editing.md](editing.md) for full details.**\n\n1. Analyze template with `thumbnail.py`\n2. Unpack → manipulate slides → edit content → clean → pack\n\n---\n\n## Creating from Scratch\n\n**Read [pptxgenjs.md](pptxgenjs.md) for full details.**\n\nUse when no template or reference presentation is available.\n\n---\n\n## Design Ideas\n\n**Don't create boring slides.** Plain bullets on a white background won't impress anyone. Consider ideas from this list for each slide.\n\n### Before Starting\n\n- **Pick a bold, content-informed color palette**: The palette should feel designed for THIS topic. If swapping your colors into a completely different presentation would still \"work,\" you haven't made specific enough choices.\n- **Dominance over equality**: One color should dominate (60-70% visual weight), with 1-2 supporting tones and one sharp accent. Never give all colors equal weight.\n- **Dark/light contrast**: Dark backgrounds for title + conclusion slides, light for content (\"sandwich\" structure). Or commit to dark throughout for a premium feel.\n- **Commit to a visual motif**: Pick ONE distinctive element and repeat it — rounded image frames, icons in colored circles, thick single-side borders. Carry it across every slide.\n\n### Color Palettes\n\nChoose colors that match your topic — don't default to generic blue. Use these palettes as inspiration:\n\n| Theme | Primary | Secondary | Accent |\n|-------|---------|-----------|--------|\n| **Midnight Executive** | `1E2761` (navy) | `CADCFC` (ice blue) | `FFFFFF` (white) |\n| **Forest & Moss** | `2C5F2D` (forest) | `97BC62` (moss) | `F5F5F5` (cream) |\n| **Coral Energy** | `F96167` (coral) | `F9E795` (gold) | `2F3C7E` (navy) |\n| **Warm Terracotta** | `B85042` (terracotta) | `E7E8D1` (sand) | `A7BEAE` (sage) |\n| **Ocean Gradient** | `065A82` (deep blue) | `1C7293` (teal) | `21295C` (midnight) |\n| **Charcoal Minimal** | `36454F` (charcoal) | `F2F2F2` (off-white) | `212121` (black) |\n| **Teal Trust** | `028090` (teal) | `00A896` (seafoam) | `02C39A` (mint) |\n| **Berry & Cream** | `6D2E46` (berry) | `A26769` (dusty rose) | `ECE2D0` (cream) |\n| **Sage Calm** | `84B59F` (sage) | `69A297` (eucalyptus) | `50808E` (slate) |\n| **Cherry Bold** | `990011` (cherry) | `FCF6F5` (off-white) | `2F3C7E` (navy) |\n\n### For Each Slide\n\n**Every slide needs a visual element** — image, chart, icon, or shape. Text-only slides are forgettable.\n\n**Layout options:**\n- Two-column (text left, illustration on right)\n- Icon + text rows (icon in colored circle, bold header, description below)\n- 2x2 or 2x3 grid (image on one side, grid of content blocks on other)\n- Half-bleed image (full left or right side) with content overlay\n\n**Data display:**\n- Large stat callouts (big numbers 60-72pt with small labels below)\n- Comparison columns (before/after, pros/cons, side-by-side options)\n- Timeline or process flow (numbered steps, arrows)\n\n**Visual polish:**\n- Icons in small colored circles next to section headers\n- Italic accent text for key stats or taglines\n\n### Typography\n\n**Choose an interesting font pairing** — don't default to Arial. Pick a header font with personality and pair it with a clean body font.\n\n| Header Font | Body Font |\n|-------------|-----------|\n| Georgia | Calibri |\n| Arial Black | Arial |\n| Calibri | Calibri Light |\n| Cambria | Calibri |\n| Trebuchet MS | Calibri |\n| Impact | Arial |\n| Palatino | Garamond |\n| Consolas | Calibri |\n\n| Element | Size |\n|---------|------|\n| Slide title | 36-44pt bold |\n| Section header | 20-24pt bold |\n| Body text | 14-16pt |\n| Captions | 10-12pt muted |\n\n### Spacing\n\n- 0.5\" minimum margins\n- 0.3-0.5\" between content blocks\n- Leave breathing room—don't fill every inch\n\n### Avoid (Common Mistakes)\n\n- **Don't repeat the same layout** — vary columns, cards, and callouts across slides\n- **Don't center body text** — left-align paragraphs and lists; center only titles\n- **Don't skimp on size contrast** — titles need 36pt+ to stand out from 14-16pt body\n- **Don't default to blue** — pick colors that reflect the specific topic\n- **Don't mix spacing randomly** — choose 0.3\" or 0.5\" gaps and use consistently\n- **Don't style one slide and leave the rest plain** — commit fully or keep it simple throughout\n- **Don't create text-only slides** — add images, icons, charts, or visual elements; avoid plain title + bullets\n- **Don't forget text box padding** — when aligning lines or shapes with text edges, set `margin: 0` on the text box or offset the shape to account for padding\n- **Don't use low-contrast elements** — icons AND text need strong contrast against the background; avoid light text on light backgrounds or dark text on dark backgrounds\n- **NEVER use accent lines under titles** — these are a hallmark of AI-generated slides; use whitespace or background color instead\n\n---\n\n## QA (Required)\n\n**Assume there are problems. Your job is to find them.**\n\nYour first render is almost never correct. Approach QA as a bug hunt, not a confirmation step. If you found zero issues on first inspection, you weren't looking hard enough.\n\n### Content QA\n\n```bash\npython -m markitdown output.pptx\n```\n\nCheck for missing content, typos, wrong order.\n\n**When using templates, check for leftover placeholder text:**\n\n```bash\npython -m markitdown output.pptx | grep -iE \"xxxx|lorem|ipsum|this.*(page|slide).*layout\"\n```\n\nIf grep returns results, fix them before declaring success.\n\n### Visual QA\n\n**⚠️ USE SUBAGENTS** — even for 2-3 slides. You've been staring at the code and will see what you expect, not what's there. Subagents have fresh eyes.\n\nConvert slides to images (see [Converting to Images](#converting-to-images)), then use this prompt:\n\n```\nVisually inspect these slides. Assume there are issues — find them.\n\nLook for:\n- Overlapping elements (text through shapes, lines through words, stacked elements)\n- Text overflow or cut off at edges/box boundaries\n- Decorative lines positioned for single-line text but title wrapped to two lines\n- Source citations or footers colliding with content above\n- Elements too close (< 0.3\" gaps) or cards/sections nearly touching\n- Uneven gaps (large empty area in one place, cramped in another)\n- Insufficient margin from slide edges (< 0.5\")\n- Columns or similar elements not aligned consistently\n- Low-contrast text (e.g., light gray text on cream-colored background)\n- Low-contrast icons (e.g., dark icons on dark backgrounds without a contrasting circle)\n- Text boxes too narrow causing excessive wrapping\n- Leftover placeholder content\n\nFor each slide, list issues or areas of concern, even if minor.\n\nRead and analyze these images:\n1. /path/to/slide-01.jpg (Expected: [brief description])\n2. /path/to/slide-02.jpg (Expected: [brief description])\n\nReport ALL issues found, including minor ones.\n```\n\n### Verification Loop\n\n1. Generate slides → Convert to images → Inspect\n2. **List issues found** (if none found, look again more critically)\n3. Fix issues\n4. **Re-verify affected slides** — one fix often creates another problem\n5. Repeat until a full pass reveals no new issues\n\n**Do not declare success until you've completed at least one fix-and-verify cycle.**\n\n---\n\n## Converting to Images\n\nConvert presentations to individual slide images for visual inspection:\n\n```bash\npython .opencode/skills/pptx/scripts/office/soffice.py --headless --convert-to pdf outputs/output.pptx\npdftoppm -jpeg -r 150 outputs/output.pdf outputs/slide\n```\n\nThis creates `slide-01.jpg`, `slide-02.jpg`, etc.\n\nTo re-render specific slides after fixes:\n\n```bash\npdftoppm -jpeg -r 150 -f N -l N outputs/output.pdf outputs/slide-fixed\n```\n\n---\n\n## Dependencies\n\n- `pip install \"markitdown[pptx]\"` - text extraction\n- `pip install Pillow` - thumbnail grids\n- `npm install -g pptxgenjs` - creating from scratch\n- LibreOffice (`soffice`) - PDF conversion (auto-configured for sandboxed environments via `.opencode/skills/pptx/scripts/office/soffice.py`)\n- Poppler (`pdftoppm`) - PDF to images\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/editing.md",
    "content": "# Editing Presentations\n\n> **Path convention**: All commands run from the **session workspace**. Never `cd` into the skill directory. Prefix all skill scripts with `.opencode/skills/pptx/`. All generated files go in `outputs/`.\n\n## Template-Based Workflow\n\nWhen using an existing presentation as a template:\n\n1. **Analyze existing slides**:\n   ```bash\n   python .opencode/skills/pptx/scripts/thumbnail.py template.pptx outputs/thumbnails\n   python -m markitdown template.pptx\n   ```\n   Review `outputs/thumbnails.jpg` to see layouts, and markitdown output to see placeholder text.\n\n2. **Plan slide mapping**: For each content section, choose a template slide.\n\n   ⚠️ **USE VARIED LAYOUTS** — monotonous presentations are a common failure mode. Don't default to basic title + bullet slides. Actively seek out:\n   - Multi-column layouts (2-column, 3-column)\n   - Image + text combinations\n   - Full-bleed images with text overlay\n   - Quote or callout slides\n   - Section dividers\n   - Stat/number callouts\n   - Icon grids or icon + text rows\n\n   **Avoid:** Repeating the same text-heavy layout for every slide.\n\n   Match content type to layout style (e.g., key points → bullet slide, team info → multi-column, testimonials → quote slide).\n\n3. **Unpack**: `python .opencode/skills/pptx/scripts/office/unpack.py template.pptx outputs/unpacked/`\n\n4. **Build presentation** (do this yourself, not with subagents):\n   - Delete unwanted slides (remove from `<p:sldIdLst>`)\n   - Duplicate slides you want to reuse (`add_slide.py`)\n   - Reorder slides in `<p:sldIdLst>`\n   - **Complete all structural changes before step 5**\n\n5. **Edit content**: Update text in each `slide{N}.xml`.\n   **Use subagents here if available** — slides are separate XML files, so subagents can edit in parallel.\n\n6. **Clean**: `python .opencode/skills/pptx/scripts/clean.py outputs/unpacked/`\n\n7. **Pack**: `python .opencode/skills/pptx/scripts/office/pack.py outputs/unpacked/ outputs/output.pptx --original template.pptx`\n\n---\n\n## Scripts\n\n| Script | Purpose |\n|--------|---------|\n| `unpack.py` | Extract and pretty-print PPTX |\n| `add_slide.py` | Duplicate slide or create from layout |\n| `clean.py` | Remove orphaned files |\n| `pack.py` | Repack with validation |\n| `thumbnail.py` | Create visual grid of slides |\n\n### unpack.py\n\n```bash\npython .opencode/skills/pptx/scripts/office/unpack.py input.pptx outputs/unpacked/\n```\n\nExtracts PPTX, pretty-prints XML, escapes smart quotes.\n\n### add_slide.py\n\n```bash\npython .opencode/skills/pptx/scripts/add_slide.py outputs/unpacked/ slide2.xml      # Duplicate slide\npython .opencode/skills/pptx/scripts/add_slide.py outputs/unpacked/ slideLayout2.xml # From layout\n```\n\nPrints `<p:sldId>` to add to `<p:sldIdLst>` at desired position.\n\n### clean.py\n\n```bash\npython .opencode/skills/pptx/scripts/clean.py outputs/unpacked/\n```\n\nRemoves slides not in `<p:sldIdLst>`, unreferenced media, orphaned rels.\n\n### pack.py\n\n```bash\npython .opencode/skills/pptx/scripts/office/pack.py outputs/unpacked/ outputs/output.pptx --original input.pptx\n```\n\nValidates, repairs, condenses XML, re-encodes smart quotes.\n\n### thumbnail.py\n\n```bash\npython .opencode/skills/pptx/scripts/thumbnail.py input.pptx outputs/thumbnails [--cols N]\n```\n\nCreates `outputs/thumbnails.jpg` with slide filenames as labels. Default 3 columns, max 12 per grid.\n\n**Use for template analysis only** (choosing layouts). For visual QA, use `soffice` + `pdftoppm` to create full-resolution individual slide images—see SKILL.md.\n\n---\n\n## Slide Operations\n\nSlide order is in `outputs/unpacked/ppt/presentation.xml` → `<p:sldIdLst>`.\n\n**Reorder**: Rearrange `<p:sldId>` elements.\n\n**Delete**: Remove `<p:sldId>`, then run `clean.py`.\n\n**See available layouts**: `ls outputs/unpacked/ppt/slideLayouts/`\n\n**Add**: Use `add_slide.py`. Never manually copy slide files—the script handles notes references, Content_Types.xml, and relationship IDs that manual copying misses.\n\n---\n\n## Editing Content\n\n**Subagents:** If available, use them here (after completing step 4). Each slide is a separate XML file, so subagents can edit in parallel. In your prompt to subagents, include:\n- The slide file path(s) to edit\n- **\"Use the Edit tool for all changes\"**\n- The formatting rules and common pitfalls below\n\nFor each slide:\n1. Read the slide's XML\n2. Identify ALL placeholder content—text, images, charts, icons, captions\n3. Replace each placeholder with final content\n\n**Use the Edit tool, not sed or Python scripts.** The Edit tool forces specificity about what to replace and where, yielding better reliability.\n\n### Formatting Rules\n\n- **Bold all headers, subheadings, and inline labels**: Use `b=\"1\"` on `<a:rPr>`. This includes:\n  - Slide titles\n  - Section headers within a slide\n  - Inline labels like (e.g.: \"Status:\", \"Description:\") at the start of a line\n- **Never use unicode bullets (•)**: Use proper list formatting with `<a:buChar>` or `<a:buAutoNum>`\n- **Bullet consistency**: Let bullets inherit from the layout. Only specify `<a:buChar>` or `<a:buNone>`.\n\n---\n\n## Common Pitfalls\n\n### Template Adaptation\n\nWhen source content has fewer items than the template:\n- **Remove excess elements entirely** (images, shapes, text boxes), don't just clear text\n- Check for orphaned visuals after clearing text content\n- Run visual QA to catch mismatched counts\n\nWhen replacing text with different length content:\n- **Shorter replacements**: Usually safe\n- **Longer replacements**: May overflow or wrap unexpectedly\n- Test with visual QA after text changes\n- Consider truncating or splitting content to fit the template's design constraints\n\n**Template slots ≠ Source items**: If template has 4 team members but source has 3 users, delete the 4th member's entire group (image + text boxes), not just the text.\n\n### Multi-Item Content\n\nIf source has multiple items (numbered lists, multiple sections), create separate `<a:p>` elements for each — **never concatenate into one string**.\n\n**❌ WRONG** — all items in one paragraph:\n```xml\n<a:p>\n  <a:r><a:rPr .../><a:t>Step 1: Do the first thing. Step 2: Do the second thing.</a:t></a:r>\n</a:p>\n```\n\n**✅ CORRECT** — separate paragraphs with bold headers:\n```xml\n<a:p>\n  <a:pPr algn=\"l\"><a:lnSpc><a:spcPts val=\"3919\"/></a:lnSpc></a:pPr>\n  <a:r><a:rPr lang=\"en-US\" sz=\"2799\" b=\"1\" .../><a:t>Step 1</a:t></a:r>\n</a:p>\n<a:p>\n  <a:pPr algn=\"l\"><a:lnSpc><a:spcPts val=\"3919\"/></a:lnSpc></a:pPr>\n  <a:r><a:rPr lang=\"en-US\" sz=\"2799\" .../><a:t>Do the first thing.</a:t></a:r>\n</a:p>\n<a:p>\n  <a:pPr algn=\"l\"><a:lnSpc><a:spcPts val=\"3919\"/></a:lnSpc></a:pPr>\n  <a:r><a:rPr lang=\"en-US\" sz=\"2799\" b=\"1\" .../><a:t>Step 2</a:t></a:r>\n</a:p>\n<!-- continue pattern -->\n```\n\nCopy `<a:pPr>` from the original paragraph to preserve line spacing. Use `b=\"1\"` on headers.\n\n### Smart Quotes\n\nHandled automatically by unpack/pack. But the Edit tool converts smart quotes to ASCII.\n\n**When adding new text with quotes, use XML entities:**\n\n```xml\n<a:t>the &#x201C;Agreement&#x201D;</a:t>\n```\n\n| Character | Name | Unicode | XML Entity |\n|-----------|------|---------|------------|\n| `“` | Left double quote | U+201C | `&#x201C;` |\n| `”` | Right double quote | U+201D | `&#x201D;` |\n| `‘` | Left single quote | U+2018 | `&#x2018;` |\n| `’` | Right single quote | U+2019 | `&#x2019;` |\n\n### Other\n\n- **Whitespace**: Use `xml:space=\"preserve\"` on `<a:t>` with leading/trailing spaces\n- **XML parsing**: Use `defusedxml.minidom`, not `xml.etree.ElementTree` (corrupts namespaces)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/pptxgenjs.md",
    "content": "# PptxGenJS Tutorial\n\n## Setup & Basic Structure\n\n```javascript\nconst pptxgen = require(\"pptxgenjs\");\n\nlet pres = new pptxgen();\npres.layout = 'LAYOUT_16x9';  // or 'LAYOUT_16x10', 'LAYOUT_4x3', 'LAYOUT_WIDE'\npres.author = 'Your Name';\npres.title = 'Presentation Title';\n\nlet slide = pres.addSlide();\nslide.addText(\"Hello World!\", { x: 0.5, y: 0.5, fontSize: 36, color: \"363636\" });\n\npres.writeFile({ fileName: \"Presentation.pptx\" });\n```\n\n## Layout Dimensions\n\nSlide dimensions (coordinates in inches):\n- `LAYOUT_16x9`: 10\" × 5.625\" (default)\n- `LAYOUT_16x10`: 10\" × 6.25\"\n- `LAYOUT_4x3`: 10\" × 7.5\"\n- `LAYOUT_WIDE`: 13.3\" × 7.5\"\n\n---\n\n## Text & Formatting\n\n```javascript\n// Basic text\nslide.addText(\"Simple Text\", {\n  x: 1, y: 1, w: 8, h: 2, fontSize: 24, fontFace: \"Arial\",\n  color: \"363636\", bold: true, align: \"center\", valign: \"middle\"\n});\n\n// Character spacing (use charSpacing, not letterSpacing which is silently ignored)\nslide.addText(\"SPACED TEXT\", { x: 1, y: 1, w: 8, h: 1, charSpacing: 6 });\n\n// Rich text arrays\nslide.addText([\n  { text: \"Bold \", options: { bold: true } },\n  { text: \"Italic \", options: { italic: true } }\n], { x: 1, y: 3, w: 8, h: 1 });\n\n// Multi-line text (requires breakLine: true)\nslide.addText([\n  { text: \"Line 1\", options: { breakLine: true } },\n  { text: \"Line 2\", options: { breakLine: true } },\n  { text: \"Line 3\" }  // Last item doesn't need breakLine\n], { x: 0.5, y: 0.5, w: 8, h: 2 });\n\n// Text box margin (internal padding)\nslide.addText(\"Title\", {\n  x: 0.5, y: 0.3, w: 9, h: 0.6,\n  margin: 0  // Use 0 when aligning text with other elements like shapes or icons\n});\n```\n\n**Tip:** Text boxes have internal margin by default. Set `margin: 0` when you need text to align precisely with shapes, lines, or icons at the same x-position.\n\n---\n\n## Lists & Bullets\n\n```javascript\n// ✅ CORRECT: Multiple bullets\nslide.addText([\n  { text: \"First item\", options: { bullet: true, breakLine: true } },\n  { text: \"Second item\", options: { bullet: true, breakLine: true } },\n  { text: \"Third item\", options: { bullet: true } }\n], { x: 0.5, y: 0.5, w: 8, h: 3 });\n\n// ❌ WRONG: Never use unicode bullets\nslide.addText(\"• First item\", { ... });  // Creates double bullets\n\n// Sub-items and numbered lists\n{ text: \"Sub-item\", options: { bullet: true, indentLevel: 1 } }\n{ text: \"First\", options: { bullet: { type: \"number\" }, breakLine: true } }\n```\n\n---\n\n## Shapes\n\n```javascript\nslide.addShape(pres.shapes.RECTANGLE, {\n  x: 0.5, y: 0.8, w: 1.5, h: 3.0,\n  fill: { color: \"FF0000\" }, line: { color: \"000000\", width: 2 }\n});\n\nslide.addShape(pres.shapes.OVAL, { x: 4, y: 1, w: 2, h: 2, fill: { color: \"0000FF\" } });\n\nslide.addShape(pres.shapes.LINE, {\n  x: 1, y: 3, w: 5, h: 0, line: { color: \"FF0000\", width: 3, dashType: \"dash\" }\n});\n\n// With transparency\nslide.addShape(pres.shapes.RECTANGLE, {\n  x: 1, y: 1, w: 3, h: 2,\n  fill: { color: \"0088CC\", transparency: 50 }\n});\n\n// Rounded rectangle (rectRadius only works with ROUNDED_RECTANGLE, not RECTANGLE)\n// ⚠️ Don't pair with rectangular accent overlays — they won't cover rounded corners. Use RECTANGLE instead.\nslide.addShape(pres.shapes.ROUNDED_RECTANGLE, {\n  x: 1, y: 1, w: 3, h: 2,\n  fill: { color: \"FFFFFF\" }, rectRadius: 0.1\n});\n\n// With shadow\nslide.addShape(pres.shapes.RECTANGLE, {\n  x: 1, y: 1, w: 3, h: 2,\n  fill: { color: \"FFFFFF\" },\n  shadow: { type: \"outer\", color: \"000000\", blur: 6, offset: 2, angle: 135, opacity: 0.15 }\n});\n```\n\nShadow options:\n\n| Property | Type | Range | Notes |\n|----------|------|-------|-------|\n| `type` | string | `\"outer\"`, `\"inner\"` | |\n| `color` | string | 6-char hex (e.g. `\"000000\"`) | No `#` prefix, no 8-char hex — see Common Pitfalls |\n| `blur` | number | 0-100 pt | |\n| `offset` | number | 0-200 pt | **Must be non-negative** — negative values corrupt the file |\n| `angle` | number | 0-359 degrees | Direction the shadow falls (135 = bottom-right, 270 = upward) |\n| `opacity` | number | 0.0-1.0 | Use this for transparency, never encode in color string |\n\nTo cast a shadow upward (e.g. on a footer bar), use `angle: 270` with a positive offset — do **not** use a negative offset.\n\n**Note**: Gradient fills are not natively supported. Use a gradient image as a background instead.\n\n---\n\n## Images\n\n### Image Sources\n\n```javascript\n// From file path\nslide.addImage({ path: \"images/chart.png\", x: 1, y: 1, w: 5, h: 3 });\n\n// From URL\nslide.addImage({ path: \"https://example.com/image.jpg\", x: 1, y: 1, w: 5, h: 3 });\n\n// From base64 (faster, no file I/O)\nslide.addImage({ data: \"image/png;base64,iVBORw0KGgo...\", x: 1, y: 1, w: 5, h: 3 });\n```\n\n### Image Options\n\n```javascript\nslide.addImage({\n  path: \"image.png\",\n  x: 1, y: 1, w: 5, h: 3,\n  rotate: 45,              // 0-359 degrees\n  rounding: true,          // Circular crop\n  transparency: 50,        // 0-100\n  flipH: true,             // Horizontal flip\n  flipV: false,            // Vertical flip\n  altText: \"Description\",  // Accessibility\n  hyperlink: { url: \"https://example.com\" }\n});\n```\n\n### Image Sizing Modes\n\n```javascript\n// Contain - fit inside, preserve ratio\n{ sizing: { type: 'contain', w: 4, h: 3 } }\n\n// Cover - fill area, preserve ratio (may crop)\n{ sizing: { type: 'cover', w: 4, h: 3 } }\n\n// Crop - cut specific portion\n{ sizing: { type: 'crop', x: 0.5, y: 0.5, w: 2, h: 2 } }\n```\n\n### Calculate Dimensions (preserve aspect ratio)\n\n```javascript\nconst origWidth = 1978, origHeight = 923, maxHeight = 3.0;\nconst calcWidth = maxHeight * (origWidth / origHeight);\nconst centerX = (10 - calcWidth) / 2;\n\nslide.addImage({ path: \"image.png\", x: centerX, y: 1.2, w: calcWidth, h: maxHeight });\n```\n\n### Supported Formats\n\n- **Standard**: PNG, JPG, GIF (animated GIFs work in Microsoft 365)\n- **SVG**: Works in modern PowerPoint/Microsoft 365\n\n---\n\n## Icons\n\nUse react-icons to generate SVG icons, then rasterize to PNG for universal compatibility.\n\n### Setup\n\n```javascript\nconst React = require(\"react\");\nconst ReactDOMServer = require(\"react-dom/server\");\nconst sharp = require(\"sharp\");\nconst { FaCheckCircle, FaChartLine } = require(\"react-icons/fa\");\n\nfunction renderIconSvg(IconComponent, color = \"#000000\", size = 256) {\n  return ReactDOMServer.renderToStaticMarkup(\n    React.createElement(IconComponent, { color, size: String(size) })\n  );\n}\n\nasync function iconToBase64Png(IconComponent, color, size = 256) {\n  const svg = renderIconSvg(IconComponent, color, size);\n  const pngBuffer = await sharp(Buffer.from(svg)).png().toBuffer();\n  return \"image/png;base64,\" + pngBuffer.toString(\"base64\");\n}\n```\n\n### Add Icon to Slide\n\n```javascript\nconst iconData = await iconToBase64Png(FaCheckCircle, \"#4472C4\", 256);\n\nslide.addImage({\n  data: iconData,\n  x: 1, y: 1, w: 0.5, h: 0.5  // Size in inches\n});\n```\n\n**Note**: Use size 256 or higher for crisp icons. The size parameter controls the rasterization resolution, not the display size on the slide (which is set by `w` and `h` in inches).\n\n### Icon Libraries\n\nInstall: `npm install -g react-icons react react-dom sharp`\n\nPopular icon sets in react-icons:\n- `react-icons/fa` - Font Awesome\n- `react-icons/md` - Material Design\n- `react-icons/hi` - Heroicons\n- `react-icons/bi` - Bootstrap Icons\n\n---\n\n## Slide Backgrounds\n\n```javascript\n// Solid color\nslide.background = { color: \"F1F1F1\" };\n\n// Color with transparency\nslide.background = { color: \"FF3399\", transparency: 50 };\n\n// Image from URL\nslide.background = { path: \"https://example.com/bg.jpg\" };\n\n// Image from base64\nslide.background = { data: \"image/png;base64,iVBORw0KGgo...\" };\n```\n\n---\n\n## Tables\n\n```javascript\nslide.addTable([\n  [\"Header 1\", \"Header 2\"],\n  [\"Cell 1\", \"Cell 2\"]\n], {\n  x: 1, y: 1, w: 8, h: 2,\n  border: { pt: 1, color: \"999999\" }, fill: { color: \"F1F1F1\" }\n});\n\n// Advanced with merged cells\nlet tableData = [\n  [{ text: \"Header\", options: { fill: { color: \"6699CC\" }, color: \"FFFFFF\", bold: true } }, \"Cell\"],\n  [{ text: \"Merged\", options: { colspan: 2 } }]\n];\nslide.addTable(tableData, { x: 1, y: 3.5, w: 8, colW: [4, 4] });\n```\n\n---\n\n## Charts\n\n```javascript\n// Bar chart\nslide.addChart(pres.charts.BAR, [{\n  name: \"Sales\", labels: [\"Q1\", \"Q2\", \"Q3\", \"Q4\"], values: [4500, 5500, 6200, 7100]\n}], {\n  x: 0.5, y: 0.6, w: 6, h: 3, barDir: 'col',\n  showTitle: true, title: 'Quarterly Sales'\n});\n\n// Line chart\nslide.addChart(pres.charts.LINE, [{\n  name: \"Temp\", labels: [\"Jan\", \"Feb\", \"Mar\"], values: [32, 35, 42]\n}], { x: 0.5, y: 4, w: 6, h: 3, lineSize: 3, lineSmooth: true });\n\n// Pie chart\nslide.addChart(pres.charts.PIE, [{\n  name: \"Share\", labels: [\"A\", \"B\", \"Other\"], values: [35, 45, 20]\n}], { x: 7, y: 1, w: 5, h: 4, showPercent: true });\n```\n\n### Better-Looking Charts\n\nDefault charts look dated. Apply these options for a modern, clean appearance:\n\n```javascript\nslide.addChart(pres.charts.BAR, chartData, {\n  x: 0.5, y: 1, w: 9, h: 4, barDir: \"col\",\n\n  // Custom colors (match your presentation palette)\n  chartColors: [\"0D9488\", \"14B8A6\", \"5EEAD4\"],\n\n  // Clean background\n  chartArea: { fill: { color: \"FFFFFF\" }, roundedCorners: true },\n\n  // Muted axis labels\n  catAxisLabelColor: \"64748B\",\n  valAxisLabelColor: \"64748B\",\n\n  // Subtle grid (value axis only)\n  valGridLine: { color: \"E2E8F0\", size: 0.5 },\n  catGridLine: { style: \"none\" },\n\n  // Data labels on bars\n  showValue: true,\n  dataLabelPosition: \"outEnd\",\n  dataLabelColor: \"1E293B\",\n\n  // Hide legend for single series\n  showLegend: false,\n});\n```\n\n**Key styling options:**\n- `chartColors: [...]` - hex colors for series/segments\n- `chartArea: { fill, border, roundedCorners }` - chart background\n- `catGridLine/valGridLine: { color, style, size }` - grid lines (`style: \"none\"` to hide)\n- `lineSmooth: true` - curved lines (line charts)\n- `legendPos: \"r\"` - legend position: \"b\", \"t\", \"l\", \"r\", \"tr\"\n\n---\n\n## Slide Masters\n\n```javascript\npres.defineSlideMaster({\n  title: 'TITLE_SLIDE', background: { color: '283A5E' },\n  objects: [{\n    placeholder: { options: { name: 'title', type: 'title', x: 1, y: 2, w: 8, h: 2 } }\n  }]\n});\n\nlet titleSlide = pres.addSlide({ masterName: \"TITLE_SLIDE\" });\ntitleSlide.addText(\"My Title\", { placeholder: \"title\" });\n```\n\n---\n\n## Common Pitfalls\n\n⚠️ These issues cause file corruption, visual bugs, or broken output. Avoid them.\n\n1. **NEVER use \"#\" with hex colors** - causes file corruption\n   ```javascript\n   color: \"FF0000\"      // ✅ CORRECT\n   color: \"#FF0000\"     // ❌ WRONG\n   ```\n\n2. **NEVER encode opacity in hex color strings** - 8-char colors (e.g., `\"00000020\"`) corrupt the file. Use the `opacity` property instead.\n   ```javascript\n   shadow: { type: \"outer\", blur: 6, offset: 2, color: \"00000020\" }          // ❌ CORRUPTS FILE\n   shadow: { type: \"outer\", blur: 6, offset: 2, color: \"000000\", opacity: 0.12 }  // ✅ CORRECT\n   ```\n\n3. **Use `bullet: true`** - NEVER unicode symbols like \"•\" (creates double bullets)\n\n4. **Use `breakLine: true`** between array items or text runs together\n\n5. **Avoid `lineSpacing` with bullets** - causes excessive gaps; use `paraSpaceAfter` instead\n\n6. **Each presentation needs fresh instance** - don't reuse `pptxgen()` objects\n\n7. **NEVER reuse option objects across calls** - PptxGenJS mutates objects in-place (e.g. converting shadow values to EMU). Sharing one object between multiple calls corrupts the second shape.\n   ```javascript\n   const shadow = { type: \"outer\", blur: 6, offset: 2, color: \"000000\", opacity: 0.15 };\n   slide.addShape(pres.shapes.RECTANGLE, { shadow, ... });  // ❌ second call gets already-converted values\n   slide.addShape(pres.shapes.RECTANGLE, { shadow, ... });\n\n   const makeShadow = () => ({ type: \"outer\", blur: 6, offset: 2, color: \"000000\", opacity: 0.15 });\n   slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... });  // ✅ fresh object each time\n   slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... });\n   ```\n\n8. **Don't use `ROUNDED_RECTANGLE` with accent borders** - rectangular overlay bars won't cover rounded corners. Use `RECTANGLE` instead.\n   ```javascript\n   // ❌ WRONG: Accent bar doesn't cover rounded corners\n   slide.addShape(pres.shapes.ROUNDED_RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: \"FFFFFF\" } });\n   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: \"0891B2\" } });\n\n   // ✅ CORRECT: Use RECTANGLE for clean alignment\n   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: \"FFFFFF\" } });\n   slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: \"0891B2\" } });\n   ```\n\n---\n\n## Quick Reference\n\n- **Shapes**: RECTANGLE, OVAL, LINE, ROUNDED_RECTANGLE\n- **Charts**: BAR, LINE, PIE, DOUGHNUT, SCATTER, BUBBLE, RADAR\n- **Layouts**: LAYOUT_16x9 (10\"×5.625\"), LAYOUT_16x10, LAYOUT_4x3, LAYOUT_WIDE\n- **Alignment**: \"left\", \"center\", \"right\"\n- **Chart data labels**: \"outEnd\", \"inEnd\", \"center\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/add_slide.py",
    "content": "\"\"\"Add a new slide to an unpacked PPTX directory.\n\nUsage: python add_slide.py <unpacked_dir> <source>\n\nThe source can be:\n  - A slide file (e.g., slide2.xml) - duplicates the slide\n  - A layout file (e.g., slideLayout2.xml) - creates from layout\n\nExamples:\n    python add_slide.py unpacked/ slide2.xml\n    # Duplicates slide2, creates slide5.xml\n\n    python add_slide.py unpacked/ slideLayout2.xml\n    # Creates slide5.xml from slideLayout2.xml\n\nTo see available layouts: ls unpacked/ppt/slideLayouts/\n\nPrints the <p:sldId> element to add to presentation.xml.\n\"\"\"\n\nimport re\nimport shutil\nimport sys\nfrom pathlib import Path\n\n\ndef get_next_slide_number(slides_dir: Path) -> int:\n    existing = [\n        int(m.group(1))\n        for f in slides_dir.glob(\"slide*.xml\")\n        if (m := re.match(r\"slide(\\d+)\\.xml\", f.name))\n    ]\n    return max(existing) + 1 if existing else 1\n\n\ndef create_slide_from_layout(unpacked_dir: Path, layout_file: str) -> None:\n    slides_dir = unpacked_dir / \"ppt\" / \"slides\"\n    rels_dir = slides_dir / \"_rels\"\n    layouts_dir = unpacked_dir / \"ppt\" / \"slideLayouts\"\n\n    layout_path = layouts_dir / layout_file\n    if not layout_path.exists():\n        print(f\"Error: {layout_path} not found\", file=sys.stderr)\n        sys.exit(1)\n\n    next_num = get_next_slide_number(slides_dir)\n    dest = f\"slide{next_num}.xml\"\n    dest_slide = slides_dir / dest\n    dest_rels = rels_dir / f\"{dest}.rels\"\n\n    slide_xml = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<p:sld xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\">\n  <p:cSld>\n    <p:spTree>\n      <p:nvGrpSpPr>\n        <p:cNvPr id=\"1\" name=\"\"/>\n        <p:cNvGrpSpPr/>\n        <p:nvPr/>\n      </p:nvGrpSpPr>\n      <p:grpSpPr>\n        <a:xfrm>\n          <a:off x=\"0\" y=\"0\"/>\n          <a:ext cx=\"0\" cy=\"0\"/>\n          <a:chOff x=\"0\" y=\"0\"/>\n          <a:chExt cx=\"0\" cy=\"0\"/>\n        </a:xfrm>\n      </p:grpSpPr>\n    </p:spTree>\n  </p:cSld>\n  <p:clrMapOvr>\n    <a:masterClrMapping/>\n  </p:clrMapOvr>\n</p:sld>\"\"\"\n    dest_slide.write_text(slide_xml, encoding=\"utf-8\")\n\n    rels_dir.mkdir(exist_ok=True)\n    rels_xml = f\"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">\n  <Relationship Id=\"rId1\"\n    Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout\"\n    Target=\"../slideLayouts/{layout_file}\"/>\n</Relationships>\"\"\"\n    dest_rels.write_text(rels_xml, encoding=\"utf-8\")\n\n    _add_to_content_types(unpacked_dir, dest)\n\n    rid = _add_to_presentation_rels(unpacked_dir, dest)\n\n    next_slide_id = _get_next_slide_id(unpacked_dir)\n\n    print(f\"Created {dest} from {layout_file}\")\n    print(\n        f'Add to presentation.xml <p:sldIdLst>: <p:sldId id=\"{next_slide_id}\" r:id=\"{rid}\"/>'\n    )\n\n\ndef duplicate_slide(unpacked_dir: Path, source: str) -> None:\n    slides_dir = unpacked_dir / \"ppt\" / \"slides\"\n    rels_dir = slides_dir / \"_rels\"\n\n    source_slide = slides_dir / source\n\n    if not source_slide.exists():\n        print(f\"Error: {source_slide} not found\", file=sys.stderr)\n        sys.exit(1)\n\n    next_num = get_next_slide_number(slides_dir)\n    dest = f\"slide{next_num}.xml\"\n    dest_slide = slides_dir / dest\n\n    source_rels = rels_dir / f\"{source}.rels\"\n    dest_rels = rels_dir / f\"{dest}.rels\"\n\n    shutil.copy2(source_slide, dest_slide)\n\n    if source_rels.exists():\n        shutil.copy2(source_rels, dest_rels)\n\n        rels_content = dest_rels.read_text(encoding=\"utf-8\")\n        rels_content = re.sub(\n            r'\\s*<Relationship[^>]*Type=\"[^\"]*notesSlide\"[^>]*/>\\s*',\n            \"\\n\",\n            rels_content,\n        )\n        dest_rels.write_text(rels_content, encoding=\"utf-8\")\n\n    _add_to_content_types(unpacked_dir, dest)\n\n    rid = _add_to_presentation_rels(unpacked_dir, dest)\n\n    next_slide_id = _get_next_slide_id(unpacked_dir)\n\n    print(f\"Created {dest} from {source}\")\n    print(\n        f'Add to presentation.xml <p:sldIdLst>: <p:sldId id=\"{next_slide_id}\" r:id=\"{rid}\"/>'\n    )\n\n\ndef _add_to_content_types(unpacked_dir: Path, dest: str) -> None:\n    content_types_path = unpacked_dir / \"[Content_Types].xml\"\n    content_types = content_types_path.read_text(encoding=\"utf-8\")\n\n    content_type = (\n        \"application/vnd.openxmlformats-officedocument.presentationml.slide+xml\"\n    )\n    new_override = (\n        f'<Override PartName=\"/ppt/slides/{dest}\" ContentType=\"{content_type}\"/>'\n    )\n\n    if f\"/ppt/slides/{dest}\" not in content_types:\n        content_types = content_types.replace(\"</Types>\", f\"  {new_override}\\n</Types>\")\n        content_types_path.write_text(content_types, encoding=\"utf-8\")\n\n\ndef _add_to_presentation_rels(unpacked_dir: Path, dest: str) -> str:\n    pres_rels_path = unpacked_dir / \"ppt\" / \"_rels\" / \"presentation.xml.rels\"\n    pres_rels = pres_rels_path.read_text(encoding=\"utf-8\")\n\n    rids = [int(m) for m in re.findall(r'Id=\"rId(\\d+)\"', pres_rels)]\n    next_rid = max(rids) + 1 if rids else 1\n    rid = f\"rId{next_rid}\"\n\n    slide_type = (\n        \"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide\"\n    )\n    new_rel = f'<Relationship Id=\"{rid}\" Type=\"{slide_type}\" Target=\"slides/{dest}\"/>'\n\n    if f\"slides/{dest}\" not in pres_rels:\n        pres_rels = pres_rels.replace(\n            \"</Relationships>\", f\"  {new_rel}\\n</Relationships>\"\n        )\n        pres_rels_path.write_text(pres_rels, encoding=\"utf-8\")\n\n    return rid\n\n\ndef _get_next_slide_id(unpacked_dir: Path) -> int:\n    pres_path = unpacked_dir / \"ppt\" / \"presentation.xml\"\n    pres_content = pres_path.read_text(encoding=\"utf-8\")\n    slide_ids = [int(m) for m in re.findall(r'<p:sldId[^>]*id=\"(\\d+)\"', pres_content)]\n    return max(slide_ids) + 1 if slide_ids else 256\n\n\ndef parse_source(source: str) -> tuple[str, str | None]:\n    if source.startswith(\"slideLayout\") and source.endswith(\".xml\"):\n        return (\"layout\", source)\n\n    return (\"slide\", None)\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 3:\n        print(\"Usage: python add_slide.py <unpacked_dir> <source>\", file=sys.stderr)\n        print(\"\", file=sys.stderr)\n        print(\"Source can be:\", file=sys.stderr)\n        print(\"  slide2.xml        - duplicate an existing slide\", file=sys.stderr)\n        print(\"  slideLayout2.xml  - create from a layout template\", file=sys.stderr)\n        print(\"\", file=sys.stderr)\n        print(\n            \"To see available layouts: ls <unpacked_dir>/ppt/slideLayouts/\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    unpacked_dir = Path(sys.argv[1])\n    source = sys.argv[2]\n\n    if not unpacked_dir.exists():\n        print(f\"Error: {unpacked_dir} not found\", file=sys.stderr)\n        sys.exit(1)\n\n    source_type, layout_file = parse_source(source)\n\n    if source_type == \"layout\" and layout_file is not None:\n        create_slide_from_layout(unpacked_dir, layout_file)\n    else:\n        duplicate_slide(unpacked_dir, source)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/clean.py",
    "content": "\"\"\"Remove unreferenced files from an unpacked PPTX directory.\n\nUsage: python clean.py <unpacked_dir>\n\nExample:\n    python clean.py unpacked/\n\nThis script removes:\n- Orphaned slides (not in sldIdLst) and their relationships\n- [trash] directory (unreferenced files)\n- Orphaned .rels files for deleted resources\n- Unreferenced media, embeddings, charts, diagrams, drawings, ink files\n- Unreferenced theme files\n- Unreferenced notes slides\n- Content-Type overrides for deleted files\n\"\"\"\n\nimport re\nimport sys\nfrom pathlib import Path\n\nimport defusedxml.minidom\n\n\ndef get_slides_in_sldidlst(unpacked_dir: Path) -> set[str]:\n    pres_path = unpacked_dir / \"ppt\" / \"presentation.xml\"\n    pres_rels_path = unpacked_dir / \"ppt\" / \"_rels\" / \"presentation.xml.rels\"\n\n    if not pres_path.exists() or not pres_rels_path.exists():\n        return set()\n\n    rels_dom = defusedxml.minidom.parse(str(pres_rels_path))\n    rid_to_slide = {}\n    for rel in rels_dom.getElementsByTagName(\"Relationship\"):\n        rid = rel.getAttribute(\"Id\")\n        target = rel.getAttribute(\"Target\")\n        rel_type = rel.getAttribute(\"Type\")\n        if \"slide\" in rel_type and target.startswith(\"slides/\"):\n            rid_to_slide[rid] = target.replace(\"slides/\", \"\")\n\n    pres_content = pres_path.read_text(encoding=\"utf-8\")\n    referenced_rids = set(re.findall(r'<p:sldId[^>]*r:id=\"([^\"]+)\"', pres_content))\n\n    return {rid_to_slide[rid] for rid in referenced_rids if rid in rid_to_slide}\n\n\ndef remove_orphaned_slides(unpacked_dir: Path) -> list[str]:\n    slides_dir = unpacked_dir / \"ppt\" / \"slides\"\n    slides_rels_dir = slides_dir / \"_rels\"\n    pres_rels_path = unpacked_dir / \"ppt\" / \"_rels\" / \"presentation.xml.rels\"\n\n    if not slides_dir.exists():\n        return []\n\n    referenced_slides = get_slides_in_sldidlst(unpacked_dir)\n    removed = []\n\n    for slide_file in slides_dir.glob(\"slide*.xml\"):\n        if slide_file.name not in referenced_slides:\n            rel_path = slide_file.relative_to(unpacked_dir)\n            slide_file.unlink()\n            removed.append(str(rel_path))\n\n            rels_file = slides_rels_dir / f\"{slide_file.name}.rels\"\n            if rels_file.exists():\n                rels_file.unlink()\n                removed.append(str(rels_file.relative_to(unpacked_dir)))\n\n    if removed and pres_rels_path.exists():\n        rels_dom = defusedxml.minidom.parse(str(pres_rels_path))\n        changed = False\n\n        for rel in list(rels_dom.getElementsByTagName(\"Relationship\")):\n            target = rel.getAttribute(\"Target\")\n            if target.startswith(\"slides/\"):\n                slide_name = target.replace(\"slides/\", \"\")\n                if slide_name not in referenced_slides:\n                    if rel.parentNode:\n                        rel.parentNode.removeChild(rel)\n                        changed = True\n\n        if changed:\n            with open(pres_rels_path, \"wb\") as f:\n                f.write(rels_dom.toxml(encoding=\"utf-8\"))\n\n    return removed\n\n\ndef remove_trash_directory(unpacked_dir: Path) -> list[str]:\n    trash_dir = unpacked_dir / \"[trash]\"\n    removed = []\n\n    if trash_dir.exists() and trash_dir.is_dir():\n        for file_path in trash_dir.iterdir():\n            if file_path.is_file():\n                rel_path = file_path.relative_to(unpacked_dir)\n                removed.append(str(rel_path))\n                file_path.unlink()\n        trash_dir.rmdir()\n\n    return removed\n\n\ndef get_slide_referenced_files(unpacked_dir: Path) -> set:\n    referenced = set()\n    slides_rels_dir = unpacked_dir / \"ppt\" / \"slides\" / \"_rels\"\n\n    if not slides_rels_dir.exists():\n        return referenced\n\n    for rels_file in slides_rels_dir.glob(\"*.rels\"):\n        dom = defusedxml.minidom.parse(str(rels_file))\n        for rel in dom.getElementsByTagName(\"Relationship\"):\n            target = rel.getAttribute(\"Target\")\n            if not target:\n                continue\n            target_path = (rels_file.parent.parent / target).resolve()\n            try:\n                referenced.add(target_path.relative_to(unpacked_dir.resolve()))\n            except ValueError:\n                pass\n\n    return referenced\n\n\ndef remove_orphaned_rels_files(unpacked_dir: Path) -> list[str]:\n    resource_dirs = [\"charts\", \"diagrams\", \"drawings\"]\n    removed = []\n    slide_referenced = get_slide_referenced_files(unpacked_dir)\n\n    for dir_name in resource_dirs:\n        rels_dir = unpacked_dir / \"ppt\" / dir_name / \"_rels\"\n        if not rels_dir.exists():\n            continue\n\n        for rels_file in rels_dir.glob(\"*.rels\"):\n            resource_file = rels_dir.parent / rels_file.name.replace(\".rels\", \"\")\n            try:\n                resource_rel_path = resource_file.resolve().relative_to(\n                    unpacked_dir.resolve()\n                )\n            except ValueError:\n                continue\n\n            if not resource_file.exists() or resource_rel_path not in slide_referenced:\n                rels_file.unlink()\n                rel_path = rels_file.relative_to(unpacked_dir)\n                removed.append(str(rel_path))\n\n    return removed\n\n\ndef get_referenced_files(unpacked_dir: Path) -> set:\n    referenced = set()\n\n    for rels_file in unpacked_dir.rglob(\"*.rels\"):\n        dom = defusedxml.minidom.parse(str(rels_file))\n        for rel in dom.getElementsByTagName(\"Relationship\"):\n            target = rel.getAttribute(\"Target\")\n            if not target:\n                continue\n            target_path = (rels_file.parent.parent / target).resolve()\n            try:\n                referenced.add(target_path.relative_to(unpacked_dir.resolve()))\n            except ValueError:\n                pass\n\n    return referenced\n\n\ndef remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[str]:\n    resource_dirs = [\n        \"media\",\n        \"embeddings\",\n        \"charts\",\n        \"diagrams\",\n        \"tags\",\n        \"drawings\",\n        \"ink\",\n    ]\n    removed = []\n\n    for dir_name in resource_dirs:\n        dir_path = unpacked_dir / \"ppt\" / dir_name\n        if not dir_path.exists():\n            continue\n\n        for file_path in dir_path.glob(\"*\"):\n            if not file_path.is_file():\n                continue\n            rel_path = file_path.relative_to(unpacked_dir)\n            if rel_path not in referenced:\n                file_path.unlink()\n                removed.append(str(rel_path))\n\n    theme_dir = unpacked_dir / \"ppt\" / \"theme\"\n    if theme_dir.exists():\n        for file_path in theme_dir.glob(\"theme*.xml\"):\n            rel_path = file_path.relative_to(unpacked_dir)\n            if rel_path not in referenced:\n                file_path.unlink()\n                removed.append(str(rel_path))\n                theme_rels = theme_dir / \"_rels\" / f\"{file_path.name}.rels\"\n                if theme_rels.exists():\n                    theme_rels.unlink()\n                    removed.append(str(theme_rels.relative_to(unpacked_dir)))\n\n    notes_dir = unpacked_dir / \"ppt\" / \"notesSlides\"\n    if notes_dir.exists():\n        for file_path in notes_dir.glob(\"*.xml\"):\n            if not file_path.is_file():\n                continue\n            rel_path = file_path.relative_to(unpacked_dir)\n            if rel_path not in referenced:\n                file_path.unlink()\n                removed.append(str(rel_path))\n\n        notes_rels_dir = notes_dir / \"_rels\"\n        if notes_rels_dir.exists():\n            for file_path in notes_rels_dir.glob(\"*.rels\"):\n                notes_file = notes_dir / file_path.name.replace(\".rels\", \"\")\n                if not notes_file.exists():\n                    file_path.unlink()\n                    removed.append(str(file_path.relative_to(unpacked_dir)))\n\n    return removed\n\n\ndef update_content_types(unpacked_dir: Path, removed_files: list[str]) -> None:\n    ct_path = unpacked_dir / \"[Content_Types].xml\"\n    if not ct_path.exists():\n        return\n\n    dom = defusedxml.minidom.parse(str(ct_path))\n    changed = False\n\n    for override in list(dom.getElementsByTagName(\"Override\")):\n        part_name = override.getAttribute(\"PartName\").lstrip(\"/\")\n        if part_name in removed_files:\n            if override.parentNode:\n                override.parentNode.removeChild(override)\n                changed = True\n\n    if changed:\n        with open(ct_path, \"wb\") as f:\n            f.write(dom.toxml(encoding=\"utf-8\"))\n\n\ndef clean_unused_files(unpacked_dir: Path) -> list[str]:\n    all_removed = []\n\n    slides_removed = remove_orphaned_slides(unpacked_dir)\n    all_removed.extend(slides_removed)\n\n    trash_removed = remove_trash_directory(unpacked_dir)\n    all_removed.extend(trash_removed)\n\n    while True:\n        removed_rels = remove_orphaned_rels_files(unpacked_dir)\n        referenced = get_referenced_files(unpacked_dir)\n        removed_files = remove_orphaned_files(unpacked_dir, referenced)\n\n        total_removed = removed_rels + removed_files\n        if not total_removed:\n            break\n\n        all_removed.extend(total_removed)\n\n    if all_removed:\n        update_content_types(unpacked_dir, all_removed)\n\n    return all_removed\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 2:\n        print(\"Usage: python clean.py <unpacked_dir>\", file=sys.stderr)\n        print(\"Example: python clean.py unpacked/\", file=sys.stderr)\n        sys.exit(1)\n\n    unpacked_dir = Path(sys.argv[1])\n\n    if not unpacked_dir.exists():\n        print(f\"Error: {unpacked_dir} not found\", file=sys.stderr)\n        sys.exit(1)\n\n    removed = clean_unused_files(unpacked_dir)\n\n    if removed:\n        print(f\"Removed {len(removed)} unreferenced files:\")\n        for f in removed:\n            print(f\"  {f}\")\n    else:\n        print(\"No unreferenced files found\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/merge_runs.py",
    "content": "\"\"\"Merge adjacent runs with identical formatting in DOCX.\n\nMerges adjacent <w:r> elements that have identical <w:rPr> properties.\nWorks on runs in paragraphs and inside tracked changes (<w:ins>, <w:del>).\n\nAlso:\n- Removes rsid attributes from runs (revision metadata that doesn't affect rendering)\n- Removes proofErr elements (spell/grammar markers that block merging)\n\"\"\"\n\nfrom pathlib import Path\n\nimport defusedxml.minidom\n\n\ndef merge_runs(input_dir: str) -> tuple[int, str]:\n    doc_xml = Path(input_dir) / \"word\" / \"document.xml\"\n\n    if not doc_xml.exists():\n        return 0, f\"Error: {doc_xml} not found\"\n\n    try:\n        dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding=\"utf-8\"))\n        root = dom.documentElement\n\n        _remove_elements(root, \"proofErr\")\n        _strip_run_rsid_attrs(root)\n\n        containers = {run.parentNode for run in _find_elements(root, \"r\")}\n\n        merge_count = 0\n        for container in containers:\n            merge_count += _merge_runs_in(container)\n\n        doc_xml.write_bytes(dom.toxml(encoding=\"UTF-8\"))\n        return merge_count, f\"Merged {merge_count} runs\"\n\n    except Exception as e:\n        return 0, f\"Error: {e}\"\n\n\ndef _find_elements(root, tag: str) -> list:\n    results = []\n\n    def traverse(node):\n        if node.nodeType == node.ELEMENT_NODE:\n            name = node.localName or node.tagName\n            if name == tag or name.endswith(f\":{tag}\"):\n                results.append(node)\n            for child in node.childNodes:\n                traverse(child)\n\n    traverse(root)\n    return results\n\n\ndef _get_child(parent, tag: str):\n    for child in parent.childNodes:\n        if child.nodeType == child.ELEMENT_NODE:\n            name = child.localName or child.tagName\n            if name == tag or name.endswith(f\":{tag}\"):\n                return child\n    return None\n\n\ndef _get_children(parent, tag: str) -> list:\n    results = []\n    for child in parent.childNodes:\n        if child.nodeType == child.ELEMENT_NODE:\n            name = child.localName or child.tagName\n            if name == tag or name.endswith(f\":{tag}\"):\n                results.append(child)\n    return results\n\n\ndef _is_adjacent(elem1, elem2) -> bool:\n    node = elem1.nextSibling\n    while node:\n        if node == elem2:\n            return True\n        if node.nodeType == node.ELEMENT_NODE:\n            return False\n        if node.nodeType == node.TEXT_NODE and node.data.strip():\n            return False\n        node = node.nextSibling\n    return False\n\n\ndef _remove_elements(root, tag: str):\n    for elem in _find_elements(root, tag):\n        if elem.parentNode:\n            elem.parentNode.removeChild(elem)\n\n\ndef _strip_run_rsid_attrs(root):\n    for run in _find_elements(root, \"r\"):\n        for attr in list(run.attributes.values()):\n            if \"rsid\" in attr.name.lower():\n                run.removeAttribute(attr.name)\n\n\ndef _merge_runs_in(container) -> int:\n    merge_count = 0\n    run = _first_child_run(container)\n\n    while run:\n        while True:\n            next_elem = _next_element_sibling(run)\n            if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):\n                _merge_run_content(run, next_elem)\n                container.removeChild(next_elem)\n                merge_count += 1\n            else:\n                break\n\n        _consolidate_text(run)\n        run = _next_sibling_run(run)\n\n    return merge_count\n\n\ndef _first_child_run(container):\n    for child in container.childNodes:\n        if child.nodeType == child.ELEMENT_NODE and _is_run(child):\n            return child\n    return None\n\n\ndef _next_element_sibling(node):\n    sibling = node.nextSibling\n    while sibling:\n        if sibling.nodeType == sibling.ELEMENT_NODE:\n            return sibling\n        sibling = sibling.nextSibling\n    return None\n\n\ndef _next_sibling_run(node):\n    sibling = node.nextSibling\n    while sibling:\n        if sibling.nodeType == sibling.ELEMENT_NODE:\n            if _is_run(sibling):\n                return sibling\n        sibling = sibling.nextSibling\n    return None\n\n\ndef _is_run(node) -> bool:\n    name = node.localName or node.tagName\n    return name == \"r\" or name.endswith(\":r\")\n\n\ndef _can_merge(run1, run2) -> bool:\n    rpr1 = _get_child(run1, \"rPr\")\n    rpr2 = _get_child(run2, \"rPr\")\n\n    if (rpr1 is None) != (rpr2 is None):\n        return False\n    if rpr1 is None:\n        return True\n    return rpr1.toxml() == rpr2.toxml()\n\n\ndef _merge_run_content(target, source):\n    for child in list(source.childNodes):\n        if child.nodeType == child.ELEMENT_NODE:\n            name = child.localName or child.tagName\n            if name != \"rPr\" and not name.endswith(\":rPr\"):\n                target.appendChild(child)\n\n\ndef _consolidate_text(run):\n    t_elements = _get_children(run, \"t\")\n\n    for i in range(len(t_elements) - 1, 0, -1):\n        curr, prev = t_elements[i], t_elements[i - 1]\n\n        if _is_adjacent(prev, curr):\n            prev_text = prev.firstChild.data if prev.firstChild else \"\"\n            curr_text = curr.firstChild.data if curr.firstChild else \"\"\n            merged = prev_text + curr_text\n\n            if prev.firstChild:\n                prev.firstChild.data = merged\n            else:\n                prev.appendChild(run.ownerDocument.createTextNode(merged))\n\n            if merged.startswith(\" \") or merged.endswith(\" \"):\n                prev.setAttribute(\"xml:space\", \"preserve\")\n            elif prev.hasAttribute(\"xml:space\"):\n                prev.removeAttribute(\"xml:space\")\n\n            run.removeChild(curr)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/helpers/simplify_redlines.py",
    "content": "\"\"\"Simplify tracked changes by merging adjacent w:ins or w:del elements.\n\nMerges adjacent <w:ins> elements from the same author into a single element.\nSame for <w:del> elements. This makes heavily-redlined documents easier to\nwork with by reducing the number of tracked change wrappers.\n\nRules:\n- Only merges w:ins with w:ins, w:del with w:del (same element type)\n- Only merges if same author (ignores timestamp differences)\n- Only merges if truly adjacent (only whitespace between them)\n\"\"\"\n\nimport xml.etree.ElementTree as ET\nimport zipfile\nfrom pathlib import Path\n\nimport defusedxml.minidom\n\nWORD_NS = \"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n\n\ndef simplify_redlines(input_dir: str) -> tuple[int, str]:\n    doc_xml = Path(input_dir) / \"word\" / \"document.xml\"\n\n    if not doc_xml.exists():\n        return 0, f\"Error: {doc_xml} not found\"\n\n    try:\n        dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding=\"utf-8\"))\n        root = dom.documentElement\n\n        merge_count = 0\n\n        containers = _find_elements(root, \"p\") + _find_elements(root, \"tc\")\n\n        for container in containers:\n            merge_count += _merge_tracked_changes_in(container, \"ins\")\n            merge_count += _merge_tracked_changes_in(container, \"del\")\n\n        doc_xml.write_bytes(dom.toxml(encoding=\"UTF-8\"))\n        return merge_count, f\"Simplified {merge_count} tracked changes\"\n\n    except Exception as e:\n        return 0, f\"Error: {e}\"\n\n\ndef _merge_tracked_changes_in(container, tag: str) -> int:\n    merge_count = 0\n\n    tracked = [\n        child\n        for child in container.childNodes\n        if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)\n    ]\n\n    if len(tracked) < 2:\n        return 0\n\n    i = 0\n    while i < len(tracked) - 1:\n        curr = tracked[i]\n        next_elem = tracked[i + 1]\n\n        if _can_merge_tracked(curr, next_elem):\n            _merge_tracked_content(curr, next_elem)\n            container.removeChild(next_elem)\n            tracked.pop(i + 1)\n            merge_count += 1\n        else:\n            i += 1\n\n    return merge_count\n\n\ndef _is_element(node, tag: str) -> bool:\n    name = node.localName or node.tagName\n    return name == tag or name.endswith(f\":{tag}\")\n\n\ndef _get_author(elem) -> str:\n    author = elem.getAttribute(\"w:author\")\n    if not author:\n        for attr in elem.attributes.values():\n            if attr.localName == \"author\" or attr.name.endswith(\":author\"):\n                return attr.value\n    return author\n\n\ndef _can_merge_tracked(elem1, elem2) -> bool:\n    if _get_author(elem1) != _get_author(elem2):\n        return False\n\n    node = elem1.nextSibling\n    while node and node != elem2:\n        if node.nodeType == node.ELEMENT_NODE:\n            return False\n        if node.nodeType == node.TEXT_NODE and node.data.strip():\n            return False\n        node = node.nextSibling\n\n    return True\n\n\ndef _merge_tracked_content(target, source):\n    while source.firstChild:\n        child = source.firstChild\n        source.removeChild(child)\n        target.appendChild(child)\n\n\ndef _find_elements(root, tag: str) -> list:\n    results = []\n\n    def traverse(node):\n        if node.nodeType == node.ELEMENT_NODE:\n            name = node.localName or node.tagName\n            if name == tag or name.endswith(f\":{tag}\"):\n                results.append(node)\n            for child in node.childNodes:\n                traverse(child)\n\n    traverse(root)\n    return results\n\n\ndef get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:\n    if not doc_xml_path.exists():\n        return {}\n\n    try:\n        tree = ET.parse(doc_xml_path)\n        root = tree.getroot()\n    except ET.ParseError:\n        return {}\n\n    namespaces = {\"w\": WORD_NS}\n    author_attr = f\"{{{WORD_NS}}}author\"\n\n    authors: dict[str, int] = {}\n    for tag in [\"ins\", \"del\"]:\n        for elem in root.findall(f\".//w:{tag}\", namespaces):\n            author = elem.get(author_attr)\n            if author:\n                authors[author] = authors.get(author, 0) + 1\n\n    return authors\n\n\ndef _get_authors_from_docx(docx_path: Path) -> dict[str, int]:\n    try:\n        with zipfile.ZipFile(docx_path, \"r\") as zf:\n            if \"word/document.xml\" not in zf.namelist():\n                return {}\n            with zf.open(\"word/document.xml\") as f:\n                tree = ET.parse(f)\n                root = tree.getroot()\n\n                namespaces = {\"w\": WORD_NS}\n                author_attr = f\"{{{WORD_NS}}}author\"\n\n                authors: dict[str, int] = {}\n                for tag in [\"ins\", \"del\"]:\n                    for elem in root.findall(f\".//w:{tag}\", namespaces):\n                        author = elem.get(author_attr)\n                        if author:\n                            authors[author] = authors.get(author, 0) + 1\n                return authors\n    except (zipfile.BadZipFile, ET.ParseError):\n        return {}\n\n\ndef infer_author(\n    modified_dir: Path, original_docx: Path, default: str = \"Claude\"\n) -> str:\n    modified_xml = modified_dir / \"word\" / \"document.xml\"\n    modified_authors = get_tracked_change_authors(modified_xml)\n\n    if not modified_authors:\n        return default\n\n    original_authors = _get_authors_from_docx(original_docx)\n\n    new_changes: dict[str, int] = {}\n    for author, count in modified_authors.items():\n        original_count = original_authors.get(author, 0)\n        diff = count - original_count\n        if diff > 0:\n            new_changes[author] = diff\n\n    if not new_changes:\n        return default\n\n    if len(new_changes) == 1:\n        return next(iter(new_changes))\n\n    raise ValueError(\n        f\"Multiple authors added new changes: {new_changes}. Cannot infer which author to validate.\"\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/pack.py",
    "content": "\"\"\"Pack a directory into a DOCX, PPTX, or XLSX file.\n\nValidates with auto-repair, condenses XML formatting, and creates the Office file.\n\nUsage:\n    python pack.py <input_directory> <output_file> [--original <file>] [--validate true|false]\n\nExamples:\n    python pack.py unpacked/ output.docx --original input.docx\n    python pack.py unpacked/ output.pptx --validate false\n\"\"\"\n\nimport argparse\nimport shutil\nimport sys\nimport tempfile\nimport zipfile\nfrom pathlib import Path\n\nimport defusedxml.minidom\nfrom validators import DOCXSchemaValidator\nfrom validators import PPTXSchemaValidator\nfrom validators import RedliningValidator\n\n\ndef pack(\n    input_directory: str,\n    output_file: str,\n    original_file: str | None = None,\n    validate: bool = True,\n    infer_author_func=None,\n) -> tuple[None, str]:\n    input_dir = Path(input_directory)\n    output_path = Path(output_file)\n    suffix = output_path.suffix.lower()\n\n    if not input_dir.is_dir():\n        return None, f\"Error: {input_dir} is not a directory\"\n\n    if suffix not in {\".docx\", \".pptx\", \".xlsx\"}:\n        return None, f\"Error: {output_file} must be a .docx, .pptx, or .xlsx file\"\n\n    if validate and original_file:\n        original_path = Path(original_file)\n        if original_path.exists():\n            success, output = _run_validation(\n                input_dir, original_path, suffix, infer_author_func\n            )\n            if output:\n                print(output)\n            if not success:\n                return None, f\"Error: Validation failed for {input_dir}\"\n\n    with tempfile.TemporaryDirectory() as temp_dir:\n        temp_content_dir = Path(temp_dir) / \"content\"\n        shutil.copytree(input_dir, temp_content_dir)\n\n        for pattern in [\"*.xml\", \"*.rels\"]:\n            for xml_file in temp_content_dir.rglob(pattern):\n                _condense_xml(xml_file)\n\n        output_path.parent.mkdir(parents=True, exist_ok=True)\n        with zipfile.ZipFile(output_path, \"w\", zipfile.ZIP_DEFLATED) as zf:\n            for f in temp_content_dir.rglob(\"*\"):\n                if f.is_file():\n                    zf.write(f, f.relative_to(temp_content_dir))\n\n    return None, f\"Successfully packed {input_dir} to {output_file}\"\n\n\ndef _run_validation(\n    unpacked_dir: Path,\n    original_file: Path,\n    suffix: str,\n    infer_author_func=None,\n) -> tuple[bool, str | None]:\n    output_lines = []\n    validators = []\n\n    if suffix == \".docx\":\n        author = \"Claude\"\n        if infer_author_func:\n            try:\n                author = infer_author_func(unpacked_dir, original_file)\n            except ValueError as e:\n                print(f\"Warning: {e} Using default author 'Claude'.\", file=sys.stderr)\n\n        validators = [\n            DOCXSchemaValidator(unpacked_dir, original_file),\n            RedliningValidator(unpacked_dir, original_file, author=author),\n        ]\n    elif suffix == \".pptx\":\n        validators = [PPTXSchemaValidator(unpacked_dir, original_file)]\n\n    if not validators:\n        return True, None\n\n    total_repairs = sum(v.repair() for v in validators)\n    if total_repairs:\n        output_lines.append(f\"Auto-repaired {total_repairs} issue(s)\")\n\n    success = all(v.validate() for v in validators)\n\n    if success:\n        output_lines.append(\"All validations PASSED!\")\n\n    return success, \"\\n\".join(output_lines) if output_lines else None\n\n\ndef _condense_xml(xml_file: Path) -> None:\n    try:\n        with open(xml_file, encoding=\"utf-8\") as f:\n            dom = defusedxml.minidom.parse(f)\n\n        for element in dom.getElementsByTagName(\"*\"):\n            if element.tagName.endswith(\":t\"):\n                continue\n\n            for child in list(element.childNodes):\n                if (\n                    child.nodeType == child.TEXT_NODE\n                    and child.nodeValue\n                    and child.nodeValue.strip() == \"\"\n                ) or child.nodeType == child.COMMENT_NODE:\n                    element.removeChild(child)\n\n        xml_file.write_bytes(dom.toxml(encoding=\"UTF-8\"))\n    except Exception as e:\n        print(f\"ERROR: Failed to parse {xml_file.name}: {e}\", file=sys.stderr)\n        raise\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Pack a directory into a DOCX, PPTX, or XLSX file\"\n    )\n    parser.add_argument(\"input_directory\", help=\"Unpacked Office document directory\")\n    parser.add_argument(\"output_file\", help=\"Output Office file (.docx/.pptx/.xlsx)\")\n    parser.add_argument(\n        \"--original\",\n        help=\"Original file for validation comparison\",\n    )\n    parser.add_argument(\n        \"--validate\",\n        type=lambda x: x.lower() == \"true\",\n        default=True,\n        metavar=\"true|false\",\n        help=\"Run validation with auto-repair (default: true)\",\n    )\n    args = parser.parse_args()\n\n    _, message = pack(\n        args.input_directory,\n        args.output_file,\n        original_file=args.original,\n        validate=args.validate,\n    )\n    print(message)\n\n    if \"Error\" in message:\n        sys.exit(1)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/chart\"\n  xmlns:cdr=\"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/chart\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\" blockDefault=\"#all\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing\"\n    schemaLocation=\"dml-chartDrawing.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:complexType name=\"CT_Boolean\">\n    <xsd:attribute name=\"val\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Double\">\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_UnsignedInt\">\n    <xsd:attribute name=\"val\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RelId\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Extension\">\n    <xsd:sequence>\n      <xsd:any processContents=\"lax\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"xsd:token\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExtensionList\">\n    <xsd:sequence>\n      <xsd:element name=\"ext\" type=\"CT_Extension\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumVal\">\n    <xsd:sequence>\n      <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"idx\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"formatCode\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumData\">\n    <xsd:sequence>\n      <xsd:element name=\"formatCode\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ptCount\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pt\" type=\"CT_NumVal\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumRef\">\n    <xsd:sequence>\n      <xsd:element name=\"f\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"numCache\" type=\"CT_NumData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumDataSource\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"numRef\" type=\"CT_NumRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"numLit\" type=\"CT_NumData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StrVal\">\n    <xsd:sequence>\n      <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"idx\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StrData\">\n    <xsd:sequence>\n      <xsd:element name=\"ptCount\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pt\" type=\"CT_StrVal\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StrRef\">\n    <xsd:sequence>\n      <xsd:element name=\"f\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"strCache\" type=\"CT_StrData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tx\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"strRef\" type=\"CT_StrRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"rich\" type=\"a:CT_TextBody\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextLanguageID\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Lang\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Lvl\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_StrVal\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MultiLvlStrData\">\n    <xsd:sequence>\n      <xsd:element name=\"ptCount\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl\" type=\"CT_Lvl\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MultiLvlStrRef\">\n    <xsd:sequence>\n      <xsd:element name=\"f\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"multiLvlStrCache\" type=\"CT_MultiLvlStrData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AxDataSource\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"multiLvlStrRef\" type=\"CT_MultiLvlStrRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"numRef\" type=\"CT_NumRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"numLit\" type=\"CT_NumData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"strRef\" type=\"CT_StrRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"strLit\" type=\"CT_StrData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SerTx\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"strRef\" type=\"CT_StrRef\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LayoutTarget\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"inner\"/>\n      <xsd:enumeration value=\"outer\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LayoutTarget\">\n    <xsd:attribute name=\"val\" type=\"ST_LayoutTarget\" default=\"outer\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LayoutMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"edge\"/>\n      <xsd:enumeration value=\"factor\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LayoutMode\">\n    <xsd:attribute name=\"val\" type=\"ST_LayoutMode\" default=\"factor\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ManualLayout\">\n    <xsd:sequence>\n      <xsd:element name=\"layoutTarget\" type=\"CT_LayoutTarget\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"xMode\" type=\"CT_LayoutMode\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"yMode\" type=\"CT_LayoutMode\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"wMode\" type=\"CT_LayoutMode\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hMode\" type=\"CT_LayoutMode\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"x\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"y\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"w\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"h\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Layout\">\n    <xsd:sequence>\n      <xsd:element name=\"manualLayout\" type=\"CT_ManualLayout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Title\">\n    <xsd:sequence>\n      <xsd:element name=\"tx\" type=\"CT_Tx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"overlay\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RotX\">\n    <xsd:restriction base=\"xsd:byte\">\n      <xsd:minInclusive value=\"-90\"/>\n      <xsd:maxInclusive value=\"90\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_RotX\">\n    <xsd:attribute name=\"val\" type=\"ST_RotX\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HPercent\">\n    <xsd:union memberTypes=\"ST_HPercentWithSymbol ST_HPercentUShort\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HPercentWithSymbol\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([5-9])|([1-9][0-9])|([1-4][0-9][0-9])|500)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HPercentUShort\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"5\"/>\n      <xsd:maxInclusive value=\"500\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_HPercent\">\n    <xsd:attribute name=\"val\" type=\"ST_HPercent\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RotY\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"360\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_RotY\">\n    <xsd:attribute name=\"val\" type=\"ST_RotY\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DepthPercent\">\n    <xsd:union memberTypes=\"ST_DepthPercentWithSymbol ST_DepthPercentUShort\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DepthPercentWithSymbol\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([2-9][0-9])|([1-9][0-9][0-9])|(1[0-9][0-9][0-9])|2000)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DepthPercentUShort\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"20\"/>\n      <xsd:maxInclusive value=\"2000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DepthPercent\">\n    <xsd:attribute name=\"val\" type=\"ST_DepthPercent\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Perspective\">\n    <xsd:restriction base=\"xsd:unsignedByte\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"240\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Perspective\">\n    <xsd:attribute name=\"val\" type=\"ST_Perspective\" default=\"30\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_View3D\">\n    <xsd:sequence>\n      <xsd:element name=\"rotX\" type=\"CT_RotX\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hPercent\" type=\"CT_HPercent\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rotY\" type=\"CT_RotY\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"depthPercent\" type=\"CT_DepthPercent\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rAngAx\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"perspective\" type=\"CT_Perspective\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Surface\">\n    <xsd:sequence>\n      <xsd:element name=\"thickness\" type=\"CT_Thickness\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureOptions\" type=\"CT_PictureOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Thickness\">\n    <xsd:union memberTypes=\"ST_ThicknessPercent xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ThicknessPercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"([0-9]+)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Thickness\">\n    <xsd:attribute name=\"val\" type=\"ST_Thickness\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DTable\">\n    <xsd:sequence>\n      <xsd:element name=\"showHorzBorder\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showVertBorder\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showOutline\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showKeys\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_GapAmount\">\n    <xsd:union memberTypes=\"ST_GapAmountPercent ST_GapAmountUShort\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_GapAmountPercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([0-9])|([1-9][0-9])|([1-4][0-9][0-9])|500)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_GapAmountUShort\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"500\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_GapAmount\">\n    <xsd:attribute name=\"val\" type=\"ST_GapAmount\" default=\"150%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Overlap\">\n    <xsd:union memberTypes=\"ST_OverlapPercent ST_OverlapByte\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OverlapPercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"(-?0*(([0-9])|([1-9][0-9])|100))%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OverlapByte\">\n    <xsd:restriction base=\"xsd:byte\">\n      <xsd:minInclusive value=\"-100\"/>\n      <xsd:maxInclusive value=\"100\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Overlap\">\n    <xsd:attribute name=\"val\" type=\"ST_Overlap\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BubbleScale\">\n    <xsd:union memberTypes=\"ST_BubbleScalePercent ST_BubbleScaleUInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BubbleScalePercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([0-9])|([1-9][0-9])|([1-2][0-9][0-9])|300)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BubbleScaleUInt\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"300\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BubbleScale\">\n    <xsd:attribute name=\"val\" type=\"ST_BubbleScale\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SizeRepresents\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"area\"/>\n      <xsd:enumeration value=\"w\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SizeRepresents\">\n    <xsd:attribute name=\"val\" type=\"ST_SizeRepresents\" default=\"area\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FirstSliceAng\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"360\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FirstSliceAng\">\n    <xsd:attribute name=\"val\" type=\"ST_FirstSliceAng\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HoleSize\">\n    <xsd:union memberTypes=\"ST_HoleSizePercent ST_HoleSizeUByte\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HoleSizePercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*([1-9]|([1-8][0-9])|90)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HoleSizeUByte\">\n    <xsd:restriction base=\"xsd:unsignedByte\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxInclusive value=\"90\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_HoleSize\">\n    <xsd:attribute name=\"val\" type=\"ST_HoleSize\" default=\"10%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SplitType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"cust\"/>\n      <xsd:enumeration value=\"percent\"/>\n      <xsd:enumeration value=\"pos\"/>\n      <xsd:enumeration value=\"val\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SplitType\">\n    <xsd:attribute name=\"val\" type=\"ST_SplitType\" default=\"auto\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustSplit\">\n    <xsd:sequence>\n      <xsd:element name=\"secondPiePt\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SecondPieSize\">\n    <xsd:union memberTypes=\"ST_SecondPieSizePercent ST_SecondPieSizeUShort\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SecondPieSizePercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([5-9])|([1-9][0-9])|(1[0-9][0-9])|200)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SecondPieSizeUShort\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"5\"/>\n      <xsd:maxInclusive value=\"200\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SecondPieSize\">\n    <xsd:attribute name=\"val\" type=\"ST_SecondPieSize\" default=\"75%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumFmt\">\n    <xsd:attribute name=\"formatCode\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"sourceLinked\" type=\"xsd:boolean\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LblAlgn\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LblAlgn\">\n    <xsd:attribute name=\"val\" type=\"ST_LblAlgn\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DLblPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"bestFit\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"inBase\"/>\n      <xsd:enumeration value=\"inEnd\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"outEnd\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"t\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DLblPos\">\n    <xsd:attribute name=\"val\" type=\"ST_DLblPos\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_DLblShared\">\n    <xsd:sequence>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dLblPos\" type=\"CT_DLblPos\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showLegendKey\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showVal\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showCatName\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showSerName\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showPercent\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showBubbleSize\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"separator\" type=\"xsd:string\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:group name=\"Group_DLbl\">\n    <xsd:sequence>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tx\" type=\"CT_Tx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_DLblShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_DLbl\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice>\n        <xsd:element name=\"delete\" type=\"CT_Boolean\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:group ref=\"Group_DLbl\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"Group_DLbls\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_DLblShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showLeaderLines\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"leaderLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_DLbls\">\n    <xsd:sequence>\n      <xsd:element name=\"dLbl\" type=\"CT_DLbl\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:choice>\n        <xsd:element name=\"delete\" type=\"CT_Boolean\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:group ref=\"Group_DLbls\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MarkerStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"circle\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"diamond\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"picture\"/>\n      <xsd:enumeration value=\"plus\"/>\n      <xsd:enumeration value=\"square\"/>\n      <xsd:enumeration value=\"star\"/>\n      <xsd:enumeration value=\"triangle\"/>\n      <xsd:enumeration value=\"x\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MarkerStyle\">\n    <xsd:attribute name=\"val\" type=\"ST_MarkerStyle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MarkerSize\">\n    <xsd:restriction base=\"xsd:unsignedByte\">\n      <xsd:minInclusive value=\"2\"/>\n      <xsd:maxInclusive value=\"72\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MarkerSize\">\n    <xsd:attribute name=\"val\" type=\"ST_MarkerSize\" default=\"5\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Marker\">\n    <xsd:sequence>\n      <xsd:element name=\"symbol\" type=\"CT_MarkerStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"size\" type=\"CT_MarkerSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DPt\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"invertIfNegative\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Marker\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bubble3D\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"explosion\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureOptions\" type=\"CT_PictureOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TrendlineType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"exp\"/>\n      <xsd:enumeration value=\"linear\"/>\n      <xsd:enumeration value=\"log\"/>\n      <xsd:enumeration value=\"movingAvg\"/>\n      <xsd:enumeration value=\"poly\"/>\n      <xsd:enumeration value=\"power\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TrendlineType\">\n    <xsd:attribute name=\"val\" type=\"ST_TrendlineType\" default=\"linear\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Order\">\n    <xsd:restriction base=\"xsd:unsignedByte\">\n      <xsd:minInclusive value=\"2\"/>\n      <xsd:maxInclusive value=\"6\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Order\">\n    <xsd:attribute name=\"val\" type=\"ST_Order\" default=\"2\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Period\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Period\">\n    <xsd:attribute name=\"val\" type=\"ST_Period\" default=\"2\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrendlineLbl\">\n    <xsd:sequence>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tx\" type=\"CT_Tx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Trendline\">\n    <xsd:sequence>\n      <xsd:element name=\"name\" type=\"xsd:string\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendlineType\" type=\"CT_TrendlineType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"order\" type=\"CT_Order\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"period\" type=\"CT_Period\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"forward\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"backward\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"intercept\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dispRSqr\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dispEq\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendlineLbl\" type=\"CT_TrendlineLbl\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ErrDir\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"x\"/>\n      <xsd:enumeration value=\"y\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ErrDir\">\n    <xsd:attribute name=\"val\" type=\"ST_ErrDir\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ErrBarType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"both\"/>\n      <xsd:enumeration value=\"minus\"/>\n      <xsd:enumeration value=\"plus\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ErrBarType\">\n    <xsd:attribute name=\"val\" type=\"ST_ErrBarType\" default=\"both\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ErrValType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"cust\"/>\n      <xsd:enumeration value=\"fixedVal\"/>\n      <xsd:enumeration value=\"percentage\"/>\n      <xsd:enumeration value=\"stdDev\"/>\n      <xsd:enumeration value=\"stdErr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ErrValType\">\n    <xsd:attribute name=\"val\" type=\"ST_ErrValType\" default=\"fixedVal\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ErrBars\">\n    <xsd:sequence>\n      <xsd:element name=\"errDir\" type=\"CT_ErrDir\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"errBarType\" type=\"CT_ErrBarType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"errValType\" type=\"CT_ErrValType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"noEndCap\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"plus\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minus\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_UpDownBar\">\n    <xsd:sequence>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_UpDownBars\">\n    <xsd:sequence>\n      <xsd:element name=\"gapWidth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"upBars\" type=\"CT_UpDownBar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"downBars\" type=\"CT_UpDownBar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_SerShared\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"order\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tx\" type=\"CT_SerTx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_LineSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Marker\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendline\" type=\"CT_Trendline\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"errBars\" type=\"CT_ErrBars\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smooth\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ScatterSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Marker\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendline\" type=\"CT_Trendline\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"errBars\" type=\"CT_ErrBars\" minOccurs=\"0\" maxOccurs=\"2\"/>\n      <xsd:element name=\"xVal\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"yVal\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smooth\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RadarSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Marker\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BarSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"invertIfNegative\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureOptions\" type=\"CT_PictureOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendline\" type=\"CT_Trendline\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"errBars\" type=\"CT_ErrBars\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AreaSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureOptions\" type=\"CT_PictureOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendline\" type=\"CT_Trendline\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"errBars\" type=\"CT_ErrBars\" minOccurs=\"0\" maxOccurs=\"2\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PieSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"explosion\" type=\"CT_UnsignedInt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BubbleSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"invertIfNegative\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dPt\" type=\"CT_DPt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trendline\" type=\"CT_Trendline\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"errBars\" type=\"CT_ErrBars\" minOccurs=\"0\" maxOccurs=\"2\"/>\n      <xsd:element name=\"xVal\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"yVal\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bubbleSize\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bubble3D\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SurfaceSer\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SerShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cat\" type=\"CT_AxDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"val\" type=\"CT_NumDataSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Grouping\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"percentStacked\"/>\n      <xsd:enumeration value=\"standard\"/>\n      <xsd:enumeration value=\"stacked\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Grouping\">\n    <xsd:attribute name=\"val\" type=\"ST_Grouping\" default=\"standard\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartLines\">\n    <xsd:sequence>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_LineChartShared\">\n    <xsd:sequence>\n      <xsd:element name=\"grouping\" type=\"CT_Grouping\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_LineSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dropLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_LineChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_LineChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hiLowLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"upDownBars\" type=\"CT_UpDownBars\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smooth\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Line3DChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_LineChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapDepth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"3\" maxOccurs=\"3\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StockChart\">\n    <xsd:sequence>\n      <xsd:element name=\"ser\" type=\"CT_LineSer\" minOccurs=\"3\" maxOccurs=\"4\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dropLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hiLowLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"upDownBars\" type=\"CT_UpDownBars\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ScatterStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"line\"/>\n      <xsd:enumeration value=\"lineMarker\"/>\n      <xsd:enumeration value=\"marker\"/>\n      <xsd:enumeration value=\"smooth\"/>\n      <xsd:enumeration value=\"smoothMarker\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ScatterStyle\">\n    <xsd:attribute name=\"val\" type=\"ST_ScatterStyle\" default=\"marker\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ScatterChart\">\n    <xsd:sequence>\n      <xsd:element name=\"scatterStyle\" type=\"CT_ScatterStyle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_ScatterSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RadarStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"standard\"/>\n      <xsd:enumeration value=\"marker\"/>\n      <xsd:enumeration value=\"filled\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_RadarStyle\">\n    <xsd:attribute name=\"val\" type=\"ST_RadarStyle\" default=\"standard\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RadarChart\">\n    <xsd:sequence>\n      <xsd:element name=\"radarStyle\" type=\"CT_RadarStyle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_RadarSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BarGrouping\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"percentStacked\"/>\n      <xsd:enumeration value=\"clustered\"/>\n      <xsd:enumeration value=\"standard\"/>\n      <xsd:enumeration value=\"stacked\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BarGrouping\">\n    <xsd:attribute name=\"val\" type=\"ST_BarGrouping\" default=\"clustered\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BarDir\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"bar\"/>\n      <xsd:enumeration value=\"col\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BarDir\">\n    <xsd:attribute name=\"val\" type=\"ST_BarDir\" default=\"col\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Shape\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"cone\"/>\n      <xsd:enumeration value=\"coneToMax\"/>\n      <xsd:enumeration value=\"box\"/>\n      <xsd:enumeration value=\"cylinder\"/>\n      <xsd:enumeration value=\"pyramid\"/>\n      <xsd:enumeration value=\"pyramidToMax\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:attribute name=\"val\" type=\"ST_Shape\" default=\"box\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_BarChartShared\">\n    <xsd:sequence>\n      <xsd:element name=\"barDir\" type=\"CT_BarDir\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grouping\" type=\"CT_BarGrouping\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_BarSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_BarChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_BarChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapWidth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"overlap\" type=\"CT_Overlap\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"serLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Bar3DChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_BarChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapWidth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapDepth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"3\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_AreaChartShared\">\n    <xsd:sequence>\n      <xsd:element name=\"grouping\" type=\"CT_Grouping\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_AreaSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dropLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_AreaChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AreaChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Area3DChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AreaChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapDepth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"3\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_PieChartShared\">\n    <xsd:sequence>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_PieSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_PieChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_PieChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstSliceAng\" type=\"CT_FirstSliceAng\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Pie3DChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_PieChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DoughnutChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_PieChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstSliceAng\" type=\"CT_FirstSliceAng\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"holeSize\" type=\"CT_HoleSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_OfPieType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"pie\"/>\n      <xsd:enumeration value=\"bar\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OfPieType\">\n    <xsd:attribute name=\"val\" type=\"ST_OfPieType\" default=\"pie\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OfPieChart\">\n    <xsd:sequence>\n      <xsd:element name=\"ofPieType\" type=\"CT_OfPieType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_PieChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gapWidth\" type=\"CT_GapAmount\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"splitType\" type=\"CT_SplitType\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"splitPos\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custSplit\" type=\"CT_CustSplit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"secondPieSize\" type=\"CT_SecondPieSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"serLines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BubbleChart\">\n    <xsd:sequence>\n      <xsd:element name=\"varyColors\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_BubbleSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dLbls\" type=\"CT_DLbls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bubble3D\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bubbleScale\" type=\"CT_BubbleScale\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showNegBubbles\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sizeRepresents\" type=\"CT_SizeRepresents\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BandFmt\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BandFmts\">\n    <xsd:sequence>\n      <xsd:element name=\"bandFmt\" type=\"CT_BandFmt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_SurfaceChartShared\">\n    <xsd:sequence>\n      <xsd:element name=\"wireframe\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ser\" type=\"CT_SurfaceSer\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"bandFmts\" type=\"CT_BandFmts\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_SurfaceChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SurfaceChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"2\" maxOccurs=\"3\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Surface3DChart\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SurfaceChartShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"3\" maxOccurs=\"3\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AxPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"t\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AxPos\">\n    <xsd:attribute name=\"val\" type=\"ST_AxPos\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Crosses\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"autoZero\"/>\n      <xsd:enumeration value=\"max\"/>\n      <xsd:enumeration value=\"min\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Crosses\">\n    <xsd:attribute name=\"val\" type=\"ST_Crosses\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CrossBetween\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"between\"/>\n      <xsd:enumeration value=\"midCat\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_CrossBetween\">\n    <xsd:attribute name=\"val\" type=\"ST_CrossBetween\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TickMark\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"cross\"/>\n      <xsd:enumeration value=\"in\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"out\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TickMark\">\n    <xsd:attribute name=\"val\" type=\"ST_TickMark\" default=\"cross\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TickLblPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"high\"/>\n      <xsd:enumeration value=\"low\"/>\n      <xsd:enumeration value=\"nextTo\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TickLblPos\">\n    <xsd:attribute name=\"val\" type=\"ST_TickLblPos\" default=\"nextTo\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Skip\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Skip\">\n    <xsd:attribute name=\"val\" type=\"ST_Skip\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TimeUnit\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"days\"/>\n      <xsd:enumeration value=\"months\"/>\n      <xsd:enumeration value=\"years\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TimeUnit\">\n    <xsd:attribute name=\"val\" type=\"ST_TimeUnit\" default=\"days\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AxisUnit\">\n    <xsd:restriction base=\"xsd:double\">\n      <xsd:minExclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AxisUnit\">\n    <xsd:attribute name=\"val\" type=\"ST_AxisUnit\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BuiltInUnit\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"hundreds\"/>\n      <xsd:enumeration value=\"thousands\"/>\n      <xsd:enumeration value=\"tenThousands\"/>\n      <xsd:enumeration value=\"hundredThousands\"/>\n      <xsd:enumeration value=\"millions\"/>\n      <xsd:enumeration value=\"tenMillions\"/>\n      <xsd:enumeration value=\"hundredMillions\"/>\n      <xsd:enumeration value=\"billions\"/>\n      <xsd:enumeration value=\"trillions\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BuiltInUnit\">\n    <xsd:attribute name=\"val\" type=\"ST_BuiltInUnit\" default=\"thousands\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PictureFormat\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"stretch\"/>\n      <xsd:enumeration value=\"stack\"/>\n      <xsd:enumeration value=\"stackScale\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PictureFormat\">\n    <xsd:attribute name=\"val\" type=\"ST_PictureFormat\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PictureStackUnit\">\n    <xsd:restriction base=\"xsd:double\">\n      <xsd:minExclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PictureStackUnit\">\n    <xsd:attribute name=\"val\" type=\"ST_PictureStackUnit\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PictureOptions\">\n    <xsd:sequence>\n      <xsd:element name=\"applyToFront\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"applyToSides\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"applyToEnd\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureFormat\" type=\"CT_PictureFormat\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pictureStackUnit\" type=\"CT_PictureStackUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DispUnitsLbl\">\n    <xsd:sequence>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tx\" type=\"CT_Tx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DispUnits\">\n    <xsd:sequence>\n      <xsd:choice>\n        <xsd:element name=\"custUnit\" type=\"CT_Double\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"builtInUnit\" type=\"CT_BuiltInUnit\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"dispUnitsLbl\" type=\"CT_DispUnitsLbl\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Orientation\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"maxMin\"/>\n      <xsd:enumeration value=\"minMax\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Orientation\">\n    <xsd:attribute name=\"val\" type=\"ST_Orientation\" default=\"minMax\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LogBase\">\n    <xsd:restriction base=\"xsd:double\">\n      <xsd:minInclusive value=\"2\"/>\n      <xsd:maxInclusive value=\"1000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LogBase\">\n    <xsd:attribute name=\"val\" type=\"ST_LogBase\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Scaling\">\n    <xsd:sequence>\n      <xsd:element name=\"logBase\" type=\"CT_LogBase\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"orientation\" type=\"CT_Orientation\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"max\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"min\" type=\"CT_Double\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LblOffset\">\n    <xsd:union memberTypes=\"ST_LblOffsetPercent ST_LblOffsetUShort\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LblOffsetPercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(([0-9])|([1-9][0-9])|([1-9][0-9][0-9])|1000)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LblOffsetUShort\">\n    <xsd:restriction base=\"xsd:unsignedShort\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"1000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LblOffset\">\n    <xsd:attribute name=\"val\" type=\"ST_LblOffset\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_AxShared\">\n    <xsd:sequence>\n      <xsd:element name=\"axId\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scaling\" type=\"CT_Scaling\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"delete\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"axPos\" type=\"CT_AxPos\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"majorGridlines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorGridlines\" type=\"CT_ChartLines\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"title\" type=\"CT_Title\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"majorTickMark\" type=\"CT_TickMark\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorTickMark\" type=\"CT_TickMark\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tickLblPos\" type=\"CT_TickLblPos\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"crossAx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"crosses\" type=\"CT_Crosses\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"crossesAt\" type=\"CT_Double\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_CatAx\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AxShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"auto\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lblAlgn\" type=\"CT_LblAlgn\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lblOffset\" type=\"CT_LblOffset\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tickLblSkip\" type=\"CT_Skip\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tickMarkSkip\" type=\"CT_Skip\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"noMultiLvlLbl\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DateAx\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AxShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"auto\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lblOffset\" type=\"CT_LblOffset\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"baseTimeUnit\" type=\"CT_TimeUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"majorUnit\" type=\"CT_AxisUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"majorTimeUnit\" type=\"CT_TimeUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorUnit\" type=\"CT_AxisUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorTimeUnit\" type=\"CT_TimeUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SerAx\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AxShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tickLblSkip\" type=\"CT_Skip\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tickMarkSkip\" type=\"CT_Skip\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ValAx\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_AxShared\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"crossBetween\" type=\"CT_CrossBetween\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"majorUnit\" type=\"CT_AxisUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorUnit\" type=\"CT_AxisUnit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dispUnits\" type=\"CT_DispUnits\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PlotArea\">\n    <xsd:sequence>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"areaChart\" type=\"CT_AreaChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"area3DChart\" type=\"CT_Area3DChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"lineChart\" type=\"CT_LineChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"line3DChart\" type=\"CT_Line3DChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"stockChart\" type=\"CT_StockChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"radarChart\" type=\"CT_RadarChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"scatterChart\" type=\"CT_ScatterChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"pieChart\" type=\"CT_PieChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"pie3DChart\" type=\"CT_Pie3DChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"doughnutChart\" type=\"CT_DoughnutChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"barChart\" type=\"CT_BarChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"bar3DChart\" type=\"CT_Bar3DChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"ofPieChart\" type=\"CT_OfPieChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"surfaceChart\" type=\"CT_SurfaceChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"surface3DChart\" type=\"CT_Surface3DChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"bubbleChart\" type=\"CT_BubbleChart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"valAx\" type=\"CT_ValAx\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"catAx\" type=\"CT_CatAx\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"dateAx\" type=\"CT_DateAx\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"serAx\" type=\"CT_SerAx\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"dTable\" type=\"CT_DTable\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotFmt\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"marker\" type=\"CT_Marker\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dLbl\" type=\"CT_DLbl\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotFmts\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotFmt\" type=\"CT_PivotFmt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LegendPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"tr\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"t\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LegendPos\">\n    <xsd:attribute name=\"val\" type=\"ST_LegendPos\" default=\"r\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_LegendEntryData\">\n    <xsd:sequence>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_LegendEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"idx\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice>\n        <xsd:element name=\"delete\" type=\"CT_Boolean\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:group ref=\"EG_LegendEntryData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Legend\">\n    <xsd:sequence>\n      <xsd:element name=\"legendPos\" type=\"CT_LegendPos\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legendEntry\" type=\"CT_LegendEntry\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"layout\" type=\"CT_Layout\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"overlay\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DispBlanksAs\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"span\"/>\n      <xsd:enumeration value=\"gap\"/>\n      <xsd:enumeration value=\"zero\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DispBlanksAs\">\n    <xsd:attribute name=\"val\" type=\"ST_DispBlanksAs\" default=\"zero\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Chart\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_Title\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"autoTitleDeleted\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pivotFmts\" type=\"CT_PivotFmts\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"view3D\" type=\"CT_View3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"floor\" type=\"CT_Surface\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sideWall\" type=\"CT_Surface\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"backWall\" type=\"CT_Surface\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"plotArea\" type=\"CT_PlotArea\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legend\" type=\"CT_Legend\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"plotVisOnly\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dispBlanksAs\" type=\"CT_DispBlanksAs\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showDLblsOverMax\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Style\">\n    <xsd:restriction base=\"xsd:unsignedByte\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxInclusive value=\"48\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Style\">\n    <xsd:attribute name=\"val\" type=\"ST_Style\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotSource\">\n    <xsd:sequence>\n      <xsd:element name=\"name\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fmtId\" type=\"CT_UnsignedInt\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Protection\">\n    <xsd:sequence>\n      <xsd:element name=\"chartObject\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"data\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"formatting\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"selection\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"userInterface\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HeaderFooter\">\n    <xsd:sequence>\n      <xsd:element name=\"oddHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oddFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"evenHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"evenFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"alignWithMargins\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"differentOddEven\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"differentFirst\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageMargins\">\n    <xsd:attribute name=\"l\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"r\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"t\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"header\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"footer\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PageSetupOrientation\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"portrait\"/>\n      <xsd:enumeration value=\"landscape\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ExternalData\">\n    <xsd:sequence>\n      <xsd:element name=\"autoUpdate\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageSetup\">\n    <xsd:attribute name=\"paperSize\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"paperHeight\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"paperWidth\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"firstPageNumber\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"orientation\" type=\"ST_PageSetupOrientation\" use=\"optional\"\n      default=\"default\"/>\n    <xsd:attribute name=\"blackAndWhite\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"draft\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"useFirstPageNumber\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"horizontalDpi\" type=\"xsd:int\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"verticalDpi\" type=\"xsd:int\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"copies\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PrintSettings\">\n    <xsd:sequence>\n      <xsd:element name=\"headerFooter\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageMargins\" type=\"CT_PageMargins\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_PageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawingHF\" type=\"CT_RelId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartSpace\">\n    <xsd:sequence>\n      <xsd:element name=\"date1904\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lang\" type=\"CT_TextLanguageID\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"roundedCorners\" type=\"CT_Boolean\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_Style\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrMapOvr\" type=\"a:CT_ColorMapping\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pivotSource\" type=\"CT_PivotSource\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"protection\" type=\"CT_Protection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"chart\" type=\"CT_Chart\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"externalData\" type=\"CT_ExternalData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"printSettings\" type=\"CT_PrintSettings\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"userShapes\" type=\"CT_RelId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"chartSpace\" type=\"CT_ChartSpace\"/>\n  <xsd:element name=\"userShapes\" type=\"cdr:CT_Drawing\"/>\n  <xsd:element name=\"chart\" type=\"CT_RelId\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:complexType name=\"CT_ShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvSpPr\" type=\"a:CT_NonVisualDrawingShapeProps\" minOccurs=\"1\" maxOccurs=\"1\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvSpPr\" type=\"CT_ShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txBody\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"textlink\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fLocksText\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConnectorNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvCxnSpPr\" type=\"a:CT_NonVisualConnectorProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Connector\">\n    <xsd:sequence>\n      <xsd:element name=\"nvCxnSpPr\" type=\"CT_ConnectorNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PictureNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvPicPr\" type=\"a:CT_NonVisualPictureProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Picture\">\n    <xsd:sequence>\n      <xsd:element name=\"nvPicPr\" type=\"CT_PictureNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"a:CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicFrameNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"a:CT_NonVisualGraphicFrameProperties\"\n        minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicFrame\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGraphicFramePr\" type=\"CT_GraphicFrameNonVisual\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"a:CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGrpSpPr\" type=\"a:CT_NonVisualGroupDrawingShapeProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGrpSpPr\" type=\"CT_GroupShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grpSpPr\" type=\"a:CT_GroupShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"sp\" type=\"CT_Shape\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GroupShape\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicFrame\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_Connector\"/>\n        <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ObjectChoices\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"sp\" type=\"CT_Shape\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GroupShape\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicFrame\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_Connector\"/>\n        <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_MarkerCoordinate\">\n    <xsd:restriction base=\"xsd:double\">\n      <xsd:minInclusive value=\"0.0\"/>\n      <xsd:maxInclusive value=\"1.0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Marker\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" type=\"ST_MarkerCoordinate\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"y\" type=\"ST_MarkerCoordinate\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RelSizeAnchor\">\n    <xsd:sequence>\n      <xsd:element name=\"from\" type=\"CT_Marker\"/>\n      <xsd:element name=\"to\" type=\"CT_Marker\"/>\n      <xsd:group ref=\"EG_ObjectChoices\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AbsSizeAnchor\">\n    <xsd:sequence>\n      <xsd:element name=\"from\" type=\"CT_Marker\"/>\n      <xsd:element name=\"ext\" type=\"a:CT_PositiveSize2D\"/>\n      <xsd:group ref=\"EG_ObjectChoices\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Anchor\">\n    <xsd:choice>\n      <xsd:element name=\"relSizeAnchor\" type=\"CT_RelSizeAnchor\"/>\n      <xsd:element name=\"absSizeAnchor\" type=\"CT_AbsSizeAnchor\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Drawing\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_Anchor\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/diagram\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/diagram\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:complexType name=\"CT_CTName\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CTDescription\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CTCategory\">\n    <xsd:attribute name=\"type\" type=\"xsd:anyURI\" use=\"required\"/>\n    <xsd:attribute name=\"pri\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CTCategories\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"cat\" type=\"CT_CTCategory\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ClrAppMethod\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"span\"/>\n      <xsd:enumeration value=\"cycle\"/>\n      <xsd:enumeration value=\"repeat\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HueDir\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"cw\"/>\n      <xsd:enumeration value=\"ccw\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Colors\">\n    <xsd:sequence>\n      <xsd:group ref=\"a:EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"meth\" type=\"ST_ClrAppMethod\" use=\"optional\" default=\"span\"/>\n    <xsd:attribute name=\"hueDir\" type=\"ST_HueDir\" use=\"optional\" default=\"cw\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CTStyleLabel\">\n    <xsd:sequence>\n      <xsd:element name=\"fillClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"linClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effectClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txLinClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txFillClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txEffectClrLst\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorTransform\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_CTName\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_CTDescription\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_CTCategories\" minOccurs=\"0\"/>\n      <xsd:element name=\"styleLbl\" type=\"CT_CTStyleLabel\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"colorsDef\" type=\"CT_ColorTransform\"/>\n  <xsd:complexType name=\"CT_ColorTransformHeader\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_CTName\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_CTDescription\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_CTCategories\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"resId\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:element name=\"colorsDefHdr\" type=\"CT_ColorTransformHeader\"/>\n  <xsd:complexType name=\"CT_ColorTransformHeaderLst\">\n    <xsd:sequence>\n      <xsd:element name=\"colorsDefHdr\" type=\"CT_ColorTransformHeader\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"colorsDefHdrLst\" type=\"CT_ColorTransformHeaderLst\"/>\n  <xsd:simpleType name=\"ST_PtType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"node\"/>\n      <xsd:enumeration value=\"asst\"/>\n      <xsd:enumeration value=\"doc\"/>\n      <xsd:enumeration value=\"pres\"/>\n      <xsd:enumeration value=\"parTrans\"/>\n      <xsd:enumeration value=\"sibTrans\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Pt\">\n    <xsd:sequence>\n      <xsd:element name=\"prSet\" type=\"CT_ElemPropSet\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"t\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"modelId\" type=\"ST_ModelId\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"ST_PtType\" use=\"optional\" default=\"node\"/>\n    <xsd:attribute name=\"cxnId\" type=\"ST_ModelId\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PtList\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_Pt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CxnType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"parOf\"/>\n      <xsd:enumeration value=\"presOf\"/>\n      <xsd:enumeration value=\"presParOf\"/>\n      <xsd:enumeration value=\"unknownRelationship\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Cxn\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"modelId\" type=\"ST_ModelId\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"ST_CxnType\" use=\"optional\" default=\"parOf\"/>\n    <xsd:attribute name=\"srcId\" type=\"ST_ModelId\" use=\"required\"/>\n    <xsd:attribute name=\"destId\" type=\"ST_ModelId\" use=\"required\"/>\n    <xsd:attribute name=\"srcOrd\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"destOrd\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"parTransId\" type=\"ST_ModelId\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"sibTransId\" type=\"ST_ModelId\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"presId\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CxnList\">\n    <xsd:sequence>\n      <xsd:element name=\"cxn\" type=\"CT_Cxn\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataModel\">\n    <xsd:sequence>\n      <xsd:element name=\"ptLst\" type=\"CT_PtList\"/>\n      <xsd:element name=\"cxnLst\" type=\"CT_CxnList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bg\" type=\"a:CT_BackgroundFormatting\" minOccurs=\"0\"/>\n      <xsd:element name=\"whole\" type=\"a:CT_WholeE2oFormatting\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"dataModel\" type=\"CT_DataModel\"/>\n  <xsd:attributeGroup name=\"AG_IteratorAttributes\">\n    <xsd:attribute name=\"axis\" type=\"ST_AxisTypes\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"ptType\" type=\"ST_ElementTypes\" use=\"optional\" default=\"all\"/>\n    <xsd:attribute name=\"hideLastTrans\" type=\"ST_Booleans\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"st\" type=\"ST_Ints\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"cnt\" type=\"ST_UnsignedInts\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"step\" type=\"ST_Ints\" use=\"optional\" default=\"1\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_ConstraintAttributes\">\n    <xsd:attribute name=\"type\" type=\"ST_ConstraintType\" use=\"required\"/>\n    <xsd:attribute name=\"for\" type=\"ST_ConstraintRelationship\" use=\"optional\" default=\"self\"/>\n    <xsd:attribute name=\"forName\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"ptType\" type=\"ST_ElementType\" use=\"optional\" default=\"all\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_ConstraintRefAttributes\">\n    <xsd:attribute name=\"refType\" type=\"ST_ConstraintType\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"refFor\" type=\"ST_ConstraintRelationship\" use=\"optional\" default=\"self\"/>\n    <xsd:attribute name=\"refForName\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"refPtType\" type=\"ST_ElementType\" use=\"optional\" default=\"all\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_Constraint\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_ConstraintAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_ConstraintRefAttributes\"/>\n    <xsd:attribute name=\"op\" type=\"ST_BoolOperator\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"fact\" type=\"xsd:double\" use=\"optional\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Constraints\">\n    <xsd:sequence>\n      <xsd:element name=\"constr\" type=\"CT_Constraint\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumericRule\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_ConstraintAttributes\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"optional\" default=\"NaN\"/>\n    <xsd:attribute name=\"fact\" type=\"xsd:double\" use=\"optional\" default=\"NaN\"/>\n    <xsd:attribute name=\"max\" type=\"xsd:double\" use=\"optional\" default=\"NaN\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rules\">\n    <xsd:sequence>\n      <xsd:element name=\"rule\" type=\"CT_NumericRule\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PresentationOf\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_IteratorAttributes\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LayoutShapeType\" final=\"restriction\">\n    <xsd:union memberTypes=\"a:ST_ShapeType ST_OutputShapeType\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Index1\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Adj\">\n    <xsd:attribute name=\"idx\" type=\"ST_Index1\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AdjLst\">\n    <xsd:sequence>\n      <xsd:element name=\"adj\" type=\"CT_Adj\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:sequence>\n      <xsd:element name=\"adjLst\" type=\"CT_AdjLst\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rot\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"type\" type=\"ST_LayoutShapeType\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute ref=\"r:blip\" use=\"optional\"/>\n    <xsd:attribute name=\"zOrderOff\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"hideGeom\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"lkTxEntry\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"blipPhldr\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Parameter\">\n    <xsd:attribute name=\"type\" type=\"ST_ParameterId\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"ST_ParameterVal\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Algorithm\">\n    <xsd:sequence>\n      <xsd:element name=\"param\" type=\"CT_Parameter\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_AlgorithmType\" use=\"required\"/>\n    <xsd:attribute name=\"rev\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LayoutNode\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"alg\" type=\"CT_Algorithm\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"presOf\" type=\"CT_PresentationOf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"constrLst\" type=\"CT_Constraints\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ruleLst\" type=\"CT_Rules\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"varLst\" type=\"CT_LayoutVariablePropertySet\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"forEach\" type=\"CT_ForEach\"/>\n      <xsd:element name=\"layoutNode\" type=\"CT_LayoutNode\"/>\n      <xsd:element name=\"choose\" type=\"CT_Choose\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"styleLbl\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"chOrder\" type=\"ST_ChildOrderType\" use=\"optional\" default=\"b\"/>\n    <xsd:attribute name=\"moveWith\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ForEach\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"alg\" type=\"CT_Algorithm\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"presOf\" type=\"CT_PresentationOf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"constrLst\" type=\"CT_Constraints\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ruleLst\" type=\"CT_Rules\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"forEach\" type=\"CT_ForEach\"/>\n      <xsd:element name=\"layoutNode\" type=\"CT_LayoutNode\"/>\n      <xsd:element name=\"choose\" type=\"CT_Choose\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"ref\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attributeGroup ref=\"AG_IteratorAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_When\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"alg\" type=\"CT_Algorithm\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"presOf\" type=\"CT_PresentationOf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"constrLst\" type=\"CT_Constraints\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ruleLst\" type=\"CT_Rules\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"forEach\" type=\"CT_ForEach\"/>\n      <xsd:element name=\"layoutNode\" type=\"CT_LayoutNode\"/>\n      <xsd:element name=\"choose\" type=\"CT_Choose\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attributeGroup ref=\"AG_IteratorAttributes\"/>\n    <xsd:attribute name=\"func\" type=\"ST_FunctionType\" use=\"required\"/>\n    <xsd:attribute name=\"arg\" type=\"ST_FunctionArgument\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"op\" type=\"ST_FunctionOperator\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"ST_FunctionValue\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Otherwise\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"alg\" type=\"CT_Algorithm\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shape\" type=\"CT_Shape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"presOf\" type=\"CT_PresentationOf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"constrLst\" type=\"CT_Constraints\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ruleLst\" type=\"CT_Rules\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"forEach\" type=\"CT_ForEach\"/>\n      <xsd:element name=\"layoutNode\" type=\"CT_LayoutNode\"/>\n      <xsd:element name=\"choose\" type=\"CT_Choose\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Choose\">\n    <xsd:sequence>\n      <xsd:element name=\"if\" type=\"CT_When\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"else\" type=\"CT_Otherwise\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SampleData\">\n    <xsd:sequence>\n      <xsd:element name=\"dataModel\" type=\"CT_DataModel\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"useDef\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Category\">\n    <xsd:attribute name=\"type\" type=\"xsd:anyURI\" use=\"required\"/>\n    <xsd:attribute name=\"pri\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Categories\">\n    <xsd:sequence>\n      <xsd:element name=\"cat\" type=\"CT_Category\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Name\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Description\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DiagramDefinition\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_Name\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_Description\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_Categories\" minOccurs=\"0\"/>\n      <xsd:element name=\"sampData\" type=\"CT_SampleData\" minOccurs=\"0\"/>\n      <xsd:element name=\"styleData\" type=\"CT_SampleData\" minOccurs=\"0\"/>\n      <xsd:element name=\"clrData\" type=\"CT_SampleData\" minOccurs=\"0\"/>\n      <xsd:element name=\"layoutNode\" type=\"CT_LayoutNode\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"defStyle\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:element name=\"layoutDef\" type=\"CT_DiagramDefinition\"/>\n  <xsd:complexType name=\"CT_DiagramDefinitionHeader\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_Name\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_Description\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_Categories\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"defStyle\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"resId\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:element name=\"layoutDefHdr\" type=\"CT_DiagramDefinitionHeader\"/>\n  <xsd:complexType name=\"CT_DiagramDefinitionHeaderLst\">\n    <xsd:sequence>\n      <xsd:element name=\"layoutDefHdr\" type=\"CT_DiagramDefinitionHeader\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"layoutDefHdrLst\" type=\"CT_DiagramDefinitionHeaderLst\"/>\n  <xsd:complexType name=\"CT_RelIds\">\n    <xsd:attribute ref=\"r:dm\" use=\"required\"/>\n    <xsd:attribute ref=\"r:lo\" use=\"required\"/>\n    <xsd:attribute ref=\"r:qs\" use=\"required\"/>\n    <xsd:attribute ref=\"r:cs\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"relIds\" type=\"CT_RelIds\"/>\n  <xsd:simpleType name=\"ST_ParameterVal\">\n    <xsd:union\n      memberTypes=\"ST_DiagramHorizontalAlignment ST_VerticalAlignment ST_ChildDirection ST_ChildAlignment ST_SecondaryChildAlignment ST_LinearDirection ST_SecondaryLinearDirection ST_StartingElement ST_BendPoint ST_ConnectorRouting ST_ArrowheadStyle ST_ConnectorDimension ST_RotationPath ST_CenterShapeMapping ST_NodeHorizontalAlignment ST_NodeVerticalAlignment ST_FallbackDimension ST_TextDirection ST_PyramidAccentPosition ST_PyramidAccentTextMargin ST_TextBlockDirection ST_TextAnchorHorizontal ST_TextAnchorVertical ST_DiagramTextAlignment ST_AutoTextRotation ST_GrowDirection ST_FlowDirection ST_ContinueDirection ST_Breakpoint ST_Offset ST_HierarchyAlignment xsd:int xsd:double xsd:boolean xsd:string ST_ConnectorPoint\"\n    />\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ModelId\">\n    <xsd:union memberTypes=\"xsd:int s:ST_Guid\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PrSetCustVal\">\n    <xsd:union memberTypes=\"s:ST_Percentage xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ElemPropSet\">\n    <xsd:sequence>\n      <xsd:element name=\"presLayoutVars\" type=\"CT_LayoutVariablePropertySet\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"presAssocID\" type=\"ST_ModelId\" use=\"optional\"/>\n    <xsd:attribute name=\"presName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"presStyleLbl\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"presStyleIdx\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"presStyleCnt\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"loTypeId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"loCatId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"qsTypeId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"qsCatId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"csTypeId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"csCatId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"coherent3DOff\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"phldrT\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"phldr\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"custAng\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"custFlipVert\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"custFlipHor\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"custSzX\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"custSzY\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"custScaleX\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custScaleY\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custT\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"custLinFactX\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custLinFactY\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custLinFactNeighborX\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custLinFactNeighborY\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custRadScaleRad\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n    <xsd:attribute name=\"custRadScaleInc\" type=\"ST_PrSetCustVal\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Direction\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"norm\"/>\n      <xsd:enumeration value=\"rev\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HierBranchStyle\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"hang\"/>\n      <xsd:enumeration value=\"std\"/>\n      <xsd:enumeration value=\"init\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnimOneStr\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"one\"/>\n      <xsd:enumeration value=\"branch\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnimLvlStr\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"lvl\"/>\n      <xsd:enumeration value=\"ctr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OrgChart\">\n    <xsd:attribute name=\"val\" type=\"xsd:boolean\" default=\"false\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_NodeCount\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"-1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ChildMax\">\n    <xsd:attribute name=\"val\" type=\"ST_NodeCount\" default=\"-1\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChildPref\">\n    <xsd:attribute name=\"val\" type=\"ST_NodeCount\" default=\"-1\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BulletEnabled\">\n    <xsd:attribute name=\"val\" type=\"xsd:boolean\" default=\"false\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Direction\">\n    <xsd:attribute name=\"val\" type=\"ST_Direction\" default=\"norm\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HierBranchStyle\">\n    <xsd:attribute name=\"val\" type=\"ST_HierBranchStyle\" default=\"std\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AnimOne\">\n    <xsd:attribute name=\"val\" type=\"ST_AnimOneStr\" default=\"one\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AnimLvl\">\n    <xsd:attribute name=\"val\" type=\"ST_AnimLvlStr\" default=\"none\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ResizeHandlesStr\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"exact\"/>\n      <xsd:enumeration value=\"rel\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ResizeHandles\">\n    <xsd:attribute name=\"val\" type=\"ST_ResizeHandlesStr\" default=\"rel\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LayoutVariablePropertySet\">\n    <xsd:sequence>\n      <xsd:element name=\"orgChart\" type=\"CT_OrgChart\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"chMax\" type=\"CT_ChildMax\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"chPref\" type=\"CT_ChildPref\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bulletEnabled\" type=\"CT_BulletEnabled\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dir\" type=\"CT_Direction\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hierBranch\" type=\"CT_HierBranchStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"animOne\" type=\"CT_AnimOne\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"animLvl\" type=\"CT_AnimLvl\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"resizeHandles\" type=\"CT_ResizeHandles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SDName\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SDDescription\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SDCategory\">\n    <xsd:attribute name=\"type\" type=\"xsd:anyURI\" use=\"required\"/>\n    <xsd:attribute name=\"pri\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SDCategories\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"cat\" type=\"CT_SDCategory\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextProps\">\n    <xsd:sequence>\n      <xsd:group ref=\"a:EG_Text3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StyleLabel\">\n    <xsd:sequence>\n      <xsd:element name=\"scene3d\" type=\"a:CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sp3d\" type=\"a:CT_Shape3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txPr\" type=\"CT_TextProps\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StyleDefinition\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_SDName\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_SDDescription\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_SDCategories\" minOccurs=\"0\"/>\n      <xsd:element name=\"scene3d\" type=\"a:CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"styleLbl\" type=\"CT_StyleLabel\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"styleDef\" type=\"CT_StyleDefinition\"/>\n  <xsd:complexType name=\"CT_StyleDefinitionHeader\">\n    <xsd:sequence>\n      <xsd:element name=\"title\" type=\"CT_SDName\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"desc\" type=\"CT_SDDescription\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"catLst\" type=\"CT_SDCategories\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueId\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"minVer\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"resId\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:element name=\"styleDefHdr\" type=\"CT_StyleDefinitionHeader\"/>\n  <xsd:complexType name=\"CT_StyleDefinitionHeaderLst\">\n    <xsd:sequence>\n      <xsd:element name=\"styleDefHdr\" type=\"CT_StyleDefinitionHeader\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"styleDefHdrLst\" type=\"CT_StyleDefinitionHeaderLst\"/>\n  <xsd:simpleType name=\"ST_AlgorithmType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"composite\"/>\n      <xsd:enumeration value=\"conn\"/>\n      <xsd:enumeration value=\"cycle\"/>\n      <xsd:enumeration value=\"hierChild\"/>\n      <xsd:enumeration value=\"hierRoot\"/>\n      <xsd:enumeration value=\"pyra\"/>\n      <xsd:enumeration value=\"lin\"/>\n      <xsd:enumeration value=\"sp\"/>\n      <xsd:enumeration value=\"tx\"/>\n      <xsd:enumeration value=\"snake\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AxisType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"self\"/>\n      <xsd:enumeration value=\"ch\"/>\n      <xsd:enumeration value=\"des\"/>\n      <xsd:enumeration value=\"desOrSelf\"/>\n      <xsd:enumeration value=\"par\"/>\n      <xsd:enumeration value=\"ancst\"/>\n      <xsd:enumeration value=\"ancstOrSelf\"/>\n      <xsd:enumeration value=\"followSib\"/>\n      <xsd:enumeration value=\"precedSib\"/>\n      <xsd:enumeration value=\"follow\"/>\n      <xsd:enumeration value=\"preced\"/>\n      <xsd:enumeration value=\"root\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AxisTypes\">\n    <xsd:list itemType=\"ST_AxisType\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BoolOperator\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"equ\"/>\n      <xsd:enumeration value=\"gte\"/>\n      <xsd:enumeration value=\"lte\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ChildOrderType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"t\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConstraintType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"alignOff\"/>\n      <xsd:enumeration value=\"begMarg\"/>\n      <xsd:enumeration value=\"bendDist\"/>\n      <xsd:enumeration value=\"begPad\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"bMarg\"/>\n      <xsd:enumeration value=\"bOff\"/>\n      <xsd:enumeration value=\"ctrX\"/>\n      <xsd:enumeration value=\"ctrXOff\"/>\n      <xsd:enumeration value=\"ctrY\"/>\n      <xsd:enumeration value=\"ctrYOff\"/>\n      <xsd:enumeration value=\"connDist\"/>\n      <xsd:enumeration value=\"diam\"/>\n      <xsd:enumeration value=\"endMarg\"/>\n      <xsd:enumeration value=\"endPad\"/>\n      <xsd:enumeration value=\"h\"/>\n      <xsd:enumeration value=\"hArH\"/>\n      <xsd:enumeration value=\"hOff\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"lMarg\"/>\n      <xsd:enumeration value=\"lOff\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"rMarg\"/>\n      <xsd:enumeration value=\"rOff\"/>\n      <xsd:enumeration value=\"primFontSz\"/>\n      <xsd:enumeration value=\"pyraAcctRatio\"/>\n      <xsd:enumeration value=\"secFontSz\"/>\n      <xsd:enumeration value=\"sibSp\"/>\n      <xsd:enumeration value=\"secSibSp\"/>\n      <xsd:enumeration value=\"sp\"/>\n      <xsd:enumeration value=\"stemThick\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"tMarg\"/>\n      <xsd:enumeration value=\"tOff\"/>\n      <xsd:enumeration value=\"userA\"/>\n      <xsd:enumeration value=\"userB\"/>\n      <xsd:enumeration value=\"userC\"/>\n      <xsd:enumeration value=\"userD\"/>\n      <xsd:enumeration value=\"userE\"/>\n      <xsd:enumeration value=\"userF\"/>\n      <xsd:enumeration value=\"userG\"/>\n      <xsd:enumeration value=\"userH\"/>\n      <xsd:enumeration value=\"userI\"/>\n      <xsd:enumeration value=\"userJ\"/>\n      <xsd:enumeration value=\"userK\"/>\n      <xsd:enumeration value=\"userL\"/>\n      <xsd:enumeration value=\"userM\"/>\n      <xsd:enumeration value=\"userN\"/>\n      <xsd:enumeration value=\"userO\"/>\n      <xsd:enumeration value=\"userP\"/>\n      <xsd:enumeration value=\"userQ\"/>\n      <xsd:enumeration value=\"userR\"/>\n      <xsd:enumeration value=\"userS\"/>\n      <xsd:enumeration value=\"userT\"/>\n      <xsd:enumeration value=\"userU\"/>\n      <xsd:enumeration value=\"userV\"/>\n      <xsd:enumeration value=\"userW\"/>\n      <xsd:enumeration value=\"userX\"/>\n      <xsd:enumeration value=\"userY\"/>\n      <xsd:enumeration value=\"userZ\"/>\n      <xsd:enumeration value=\"w\"/>\n      <xsd:enumeration value=\"wArH\"/>\n      <xsd:enumeration value=\"wOff\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConstraintRelationship\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"self\"/>\n      <xsd:enumeration value=\"ch\"/>\n      <xsd:enumeration value=\"des\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ElementType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"all\"/>\n      <xsd:enumeration value=\"doc\"/>\n      <xsd:enumeration value=\"node\"/>\n      <xsd:enumeration value=\"norm\"/>\n      <xsd:enumeration value=\"nonNorm\"/>\n      <xsd:enumeration value=\"asst\"/>\n      <xsd:enumeration value=\"nonAsst\"/>\n      <xsd:enumeration value=\"parTrans\"/>\n      <xsd:enumeration value=\"pres\"/>\n      <xsd:enumeration value=\"sibTrans\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ElementTypes\">\n    <xsd:list itemType=\"ST_ElementType\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ParameterId\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"horzAlign\"/>\n      <xsd:enumeration value=\"vertAlign\"/>\n      <xsd:enumeration value=\"chDir\"/>\n      <xsd:enumeration value=\"chAlign\"/>\n      <xsd:enumeration value=\"secChAlign\"/>\n      <xsd:enumeration value=\"linDir\"/>\n      <xsd:enumeration value=\"secLinDir\"/>\n      <xsd:enumeration value=\"stElem\"/>\n      <xsd:enumeration value=\"bendPt\"/>\n      <xsd:enumeration value=\"connRout\"/>\n      <xsd:enumeration value=\"begSty\"/>\n      <xsd:enumeration value=\"endSty\"/>\n      <xsd:enumeration value=\"dim\"/>\n      <xsd:enumeration value=\"rotPath\"/>\n      <xsd:enumeration value=\"ctrShpMap\"/>\n      <xsd:enumeration value=\"nodeHorzAlign\"/>\n      <xsd:enumeration value=\"nodeVertAlign\"/>\n      <xsd:enumeration value=\"fallback\"/>\n      <xsd:enumeration value=\"txDir\"/>\n      <xsd:enumeration value=\"pyraAcctPos\"/>\n      <xsd:enumeration value=\"pyraAcctTxMar\"/>\n      <xsd:enumeration value=\"txBlDir\"/>\n      <xsd:enumeration value=\"txAnchorHorz\"/>\n      <xsd:enumeration value=\"txAnchorVert\"/>\n      <xsd:enumeration value=\"txAnchorHorzCh\"/>\n      <xsd:enumeration value=\"txAnchorVertCh\"/>\n      <xsd:enumeration value=\"parTxLTRAlign\"/>\n      <xsd:enumeration value=\"parTxRTLAlign\"/>\n      <xsd:enumeration value=\"shpTxLTRAlignCh\"/>\n      <xsd:enumeration value=\"shpTxRTLAlignCh\"/>\n      <xsd:enumeration value=\"autoTxRot\"/>\n      <xsd:enumeration value=\"grDir\"/>\n      <xsd:enumeration value=\"flowDir\"/>\n      <xsd:enumeration value=\"contDir\"/>\n      <xsd:enumeration value=\"bkpt\"/>\n      <xsd:enumeration value=\"off\"/>\n      <xsd:enumeration value=\"hierAlign\"/>\n      <xsd:enumeration value=\"bkPtFixedVal\"/>\n      <xsd:enumeration value=\"stBulletLvl\"/>\n      <xsd:enumeration value=\"stAng\"/>\n      <xsd:enumeration value=\"spanAng\"/>\n      <xsd:enumeration value=\"ar\"/>\n      <xsd:enumeration value=\"lnSpPar\"/>\n      <xsd:enumeration value=\"lnSpAfParP\"/>\n      <xsd:enumeration value=\"lnSpCh\"/>\n      <xsd:enumeration value=\"lnSpAfChP\"/>\n      <xsd:enumeration value=\"rtShortDist\"/>\n      <xsd:enumeration value=\"alignTx\"/>\n      <xsd:enumeration value=\"pyraLvlNode\"/>\n      <xsd:enumeration value=\"pyraAcctBkgdNode\"/>\n      <xsd:enumeration value=\"pyraAcctTxNode\"/>\n      <xsd:enumeration value=\"srcNode\"/>\n      <xsd:enumeration value=\"dstNode\"/>\n      <xsd:enumeration value=\"begPts\"/>\n      <xsd:enumeration value=\"endPts\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Ints\">\n    <xsd:list itemType=\"xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UnsignedInts\">\n    <xsd:list itemType=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Booleans\">\n    <xsd:list itemType=\"xsd:boolean\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FunctionType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"cnt\"/>\n      <xsd:enumeration value=\"pos\"/>\n      <xsd:enumeration value=\"revPos\"/>\n      <xsd:enumeration value=\"posEven\"/>\n      <xsd:enumeration value=\"posOdd\"/>\n      <xsd:enumeration value=\"var\"/>\n      <xsd:enumeration value=\"depth\"/>\n      <xsd:enumeration value=\"maxDepth\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FunctionOperator\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"equ\"/>\n      <xsd:enumeration value=\"neq\"/>\n      <xsd:enumeration value=\"gt\"/>\n      <xsd:enumeration value=\"lt\"/>\n      <xsd:enumeration value=\"gte\"/>\n      <xsd:enumeration value=\"lte\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DiagramHorizontalAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VerticalAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"mid\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ChildDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"horz\"/>\n      <xsd:enumeration value=\"vert\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ChildAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SecondaryChildAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LinearDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"fromL\"/>\n      <xsd:enumeration value=\"fromR\"/>\n      <xsd:enumeration value=\"fromT\"/>\n      <xsd:enumeration value=\"fromB\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SecondaryLinearDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"fromL\"/>\n      <xsd:enumeration value=\"fromR\"/>\n      <xsd:enumeration value=\"fromT\"/>\n      <xsd:enumeration value=\"fromB\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StartingElement\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"node\"/>\n      <xsd:enumeration value=\"trans\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RotationPath\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"alongPath\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CenterShapeMapping\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"fNode\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BendPoint\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"beg\"/>\n      <xsd:enumeration value=\"def\"/>\n      <xsd:enumeration value=\"end\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConnectorRouting\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"stra\"/>\n      <xsd:enumeration value=\"bend\"/>\n      <xsd:enumeration value=\"curve\"/>\n      <xsd:enumeration value=\"longCurve\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ArrowheadStyle\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"arr\"/>\n      <xsd:enumeration value=\"noArr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConnectorDimension\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"1D\"/>\n      <xsd:enumeration value=\"2D\"/>\n      <xsd:enumeration value=\"cust\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConnectorPoint\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"bCtr\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"midL\"/>\n      <xsd:enumeration value=\"midR\"/>\n      <xsd:enumeration value=\"tCtr\"/>\n      <xsd:enumeration value=\"bL\"/>\n      <xsd:enumeration value=\"bR\"/>\n      <xsd:enumeration value=\"tL\"/>\n      <xsd:enumeration value=\"tR\"/>\n      <xsd:enumeration value=\"radial\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_NodeHorizontalAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_NodeVerticalAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"mid\"/>\n      <xsd:enumeration value=\"b\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FallbackDimension\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"1D\"/>\n      <xsd:enumeration value=\"2D\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"fromT\"/>\n      <xsd:enumeration value=\"fromB\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PyramidAccentPosition\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"bef\"/>\n      <xsd:enumeration value=\"aft\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PyramidAccentTextMargin\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"step\"/>\n      <xsd:enumeration value=\"stack\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextBlockDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"horz\"/>\n      <xsd:enumeration value=\"vert\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextAnchorHorizontal\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"ctr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextAnchorVertical\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"mid\"/>\n      <xsd:enumeration value=\"b\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DiagramTextAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AutoTextRotation\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"upr\"/>\n      <xsd:enumeration value=\"grav\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_GrowDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"tL\"/>\n      <xsd:enumeration value=\"tR\"/>\n      <xsd:enumeration value=\"bL\"/>\n      <xsd:enumeration value=\"bR\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FlowDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"row\"/>\n      <xsd:enumeration value=\"col\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ContinueDirection\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"revDir\"/>\n      <xsd:enumeration value=\"sameDir\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Breakpoint\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"endCnv\"/>\n      <xsd:enumeration value=\"bal\"/>\n      <xsd:enumeration value=\"fixed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Offset\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"off\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HierarchyAlignment\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"tL\"/>\n      <xsd:enumeration value=\"tR\"/>\n      <xsd:enumeration value=\"tCtrCh\"/>\n      <xsd:enumeration value=\"tCtrDes\"/>\n      <xsd:enumeration value=\"bL\"/>\n      <xsd:enumeration value=\"bR\"/>\n      <xsd:enumeration value=\"bCtrCh\"/>\n      <xsd:enumeration value=\"bCtrDes\"/>\n      <xsd:enumeration value=\"lT\"/>\n      <xsd:enumeration value=\"lB\"/>\n      <xsd:enumeration value=\"lCtrCh\"/>\n      <xsd:enumeration value=\"lCtrDes\"/>\n      <xsd:enumeration value=\"rT\"/>\n      <xsd:enumeration value=\"rB\"/>\n      <xsd:enumeration value=\"rCtrCh\"/>\n      <xsd:enumeration value=\"rCtrDes\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FunctionValue\" final=\"restriction\">\n    <xsd:union\n      memberTypes=\"xsd:int xsd:boolean ST_Direction ST_HierBranchStyle ST_AnimOneStr ST_AnimLvlStr ST_ResizeHandlesStr\"\n    />\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VariableType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"orgChart\"/>\n      <xsd:enumeration value=\"chMax\"/>\n      <xsd:enumeration value=\"chPref\"/>\n      <xsd:enumeration value=\"bulEnabled\"/>\n      <xsd:enumeration value=\"dir\"/>\n      <xsd:enumeration value=\"hierBranch\"/>\n      <xsd:enumeration value=\"animOne\"/>\n      <xsd:enumeration value=\"animLvl\"/>\n      <xsd:enumeration value=\"resizeHandles\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FunctionArgument\" final=\"restriction\">\n    <xsd:union memberTypes=\"ST_VariableType\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OutputShapeType\" final=\"restriction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"conn\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  elementFormDefault=\"qualified\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:element name=\"lockedCanvas\" type=\"a:CT_GvmlGroupShape\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/diagram\"\n    schemaLocation=\"dml-diagram.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/chart\"\n    schemaLocation=\"dml-chart.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/picture\"\n    schemaLocation=\"dml-picture.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/lockedCanvas\"\n    schemaLocation=\"dml-lockedCanvas.xsd\"/>\n  <xsd:complexType name=\"CT_AudioFile\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:link\" use=\"required\"/>\n    <xsd:attribute name=\"contentType\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VideoFile\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:link\" use=\"required\"/>\n    <xsd:attribute name=\"contentType\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_QuickTimeFile\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:link\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AudioCDTime\">\n    <xsd:attribute name=\"track\" type=\"xsd:unsignedByte\" use=\"required\"/>\n    <xsd:attribute name=\"time\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AudioCD\">\n    <xsd:sequence>\n      <xsd:element name=\"st\" type=\"CT_AudioCDTime\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"end\" type=\"CT_AudioCDTime\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Media\">\n    <xsd:choice>\n      <xsd:element name=\"audioCd\" type=\"CT_AudioCD\"/>\n      <xsd:element name=\"wavAudioFile\" type=\"CT_EmbeddedWAVAudioFile\"/>\n      <xsd:element name=\"audioFile\" type=\"CT_AudioFile\"/>\n      <xsd:element name=\"videoFile\" type=\"CT_VideoFile\"/>\n      <xsd:element name=\"quickTimeFile\" type=\"CT_QuickTimeFile\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:element name=\"videoFile\" type=\"CT_VideoFile\"/>\n  <xsd:simpleType name=\"ST_StyleMatrixColumnIndex\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FontCollectionIndex\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"major\"/>\n      <xsd:enumeration value=\"minor\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ColorSchemeIndex\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"dk1\"/>\n      <xsd:enumeration value=\"lt1\"/>\n      <xsd:enumeration value=\"dk2\"/>\n      <xsd:enumeration value=\"lt2\"/>\n      <xsd:enumeration value=\"accent1\"/>\n      <xsd:enumeration value=\"accent2\"/>\n      <xsd:enumeration value=\"accent3\"/>\n      <xsd:enumeration value=\"accent4\"/>\n      <xsd:enumeration value=\"accent5\"/>\n      <xsd:enumeration value=\"accent6\"/>\n      <xsd:enumeration value=\"hlink\"/>\n      <xsd:enumeration value=\"folHlink\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ColorScheme\">\n    <xsd:sequence>\n      <xsd:element name=\"dk1\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lt1\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dk2\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lt2\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent1\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent2\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent3\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent4\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent5\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"accent6\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hlink\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"folHlink\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SupplementalFont\">\n    <xsd:attribute name=\"script\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"typeface\" type=\"ST_TextTypeface\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomColorList\">\n    <xsd:sequence>\n      <xsd:element name=\"custClr\" type=\"CT_CustomColor\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontCollection\">\n    <xsd:sequence>\n      <xsd:element name=\"latin\" type=\"CT_TextFont\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ea\" type=\"CT_TextFont\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cs\" type=\"CT_TextFont\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"font\" type=\"CT_SupplementalFont\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EffectStyleItem\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scene3d\" type=\"CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sp3d\" type=\"CT_Shape3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontScheme\">\n    <xsd:sequence>\n      <xsd:element name=\"majorFont\" type=\"CT_FontCollection\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"minorFont\" type=\"CT_FontCollection\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FillStyleList\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"3\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LineStyleList\">\n    <xsd:sequence>\n      <xsd:element name=\"ln\" type=\"CT_LineProperties\" minOccurs=\"3\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EffectStyleList\">\n    <xsd:sequence>\n      <xsd:element name=\"effectStyle\" type=\"CT_EffectStyleItem\" minOccurs=\"3\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BackgroundFillStyleList\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"3\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StyleMatrix\">\n    <xsd:sequence>\n      <xsd:element name=\"fillStyleLst\" type=\"CT_FillStyleList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnStyleLst\" type=\"CT_LineStyleList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effectStyleLst\" type=\"CT_EffectStyleList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bgFillStyleLst\" type=\"CT_BackgroundFillStyleList\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BaseStyles\">\n    <xsd:sequence>\n      <xsd:element name=\"clrScheme\" type=\"CT_ColorScheme\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fontScheme\" type=\"CT_FontScheme\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fmtScheme\" type=\"CT_StyleMatrix\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OfficeArtExtension\">\n    <xsd:sequence>\n      <xsd:any processContents=\"lax\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"xsd:token\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Coordinate\">\n    <xsd:union memberTypes=\"ST_CoordinateUnqualified s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CoordinateUnqualified\">\n    <xsd:restriction base=\"xsd:long\">\n      <xsd:minInclusive value=\"-27273042329600\"/>\n      <xsd:maxInclusive value=\"27273042316900\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Coordinate32\">\n    <xsd:union memberTypes=\"ST_Coordinate32Unqualified s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Coordinate32Unqualified\">\n    <xsd:restriction base=\"xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveCoordinate\">\n    <xsd:restriction base=\"xsd:long\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"27273042316900\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveCoordinate32\">\n    <xsd:restriction base=\"ST_Coordinate32Unqualified\">\n      <xsd:minInclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Angle\">\n    <xsd:restriction base=\"xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Angle\">\n    <xsd:attribute name=\"val\" type=\"ST_Angle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FixedAngle\">\n    <xsd:restriction base=\"ST_Angle\">\n      <xsd:minExclusive value=\"-5400000\"/>\n      <xsd:maxExclusive value=\"5400000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveFixedAngle\">\n    <xsd:restriction base=\"ST_Angle\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxExclusive value=\"21600000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PositiveFixedAngle\">\n    <xsd:attribute name=\"val\" type=\"ST_PositiveFixedAngle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Percentage\">\n    <xsd:union memberTypes=\"ST_PercentageDecimal s:ST_Percentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PercentageDecimal\">\n    <xsd:restriction base=\"xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Percentage\">\n    <xsd:attribute name=\"val\" type=\"ST_Percentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PositivePercentage\">\n    <xsd:union memberTypes=\"ST_PositivePercentageDecimal s:ST_PositivePercentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositivePercentageDecimal\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PositivePercentage\">\n    <xsd:attribute name=\"val\" type=\"ST_PositivePercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FixedPercentage\">\n    <xsd:union memberTypes=\"ST_FixedPercentageDecimal s:ST_FixedPercentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FixedPercentageDecimal\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"-100000\"/>\n      <xsd:maxInclusive value=\"100000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FixedPercentage\">\n    <xsd:attribute name=\"val\" type=\"ST_FixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PositiveFixedPercentage\">\n    <xsd:union memberTypes=\"ST_PositiveFixedPercentageDecimal s:ST_PositiveFixedPercentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveFixedPercentageDecimal\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"100000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PositiveFixedPercentage\">\n    <xsd:attribute name=\"val\" type=\"ST_PositiveFixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Ratio\">\n    <xsd:attribute name=\"n\" type=\"xsd:long\" use=\"required\"/>\n    <xsd:attribute name=\"d\" type=\"xsd:long\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Point2D\">\n    <xsd:attribute name=\"x\" type=\"ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"y\" type=\"ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PositiveSize2D\">\n    <xsd:attribute name=\"cx\" type=\"ST_PositiveCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"cy\" type=\"ST_PositiveCoordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ComplementTransform\"/>\n  <xsd:complexType name=\"CT_InverseTransform\"/>\n  <xsd:complexType name=\"CT_GrayscaleTransform\"/>\n  <xsd:complexType name=\"CT_GammaTransform\"/>\n  <xsd:complexType name=\"CT_InverseGammaTransform\"/>\n  <xsd:group name=\"EG_ColorTransform\">\n    <xsd:choice>\n      <xsd:element name=\"tint\" type=\"CT_PositiveFixedPercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shade\" type=\"CT_PositiveFixedPercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"comp\" type=\"CT_ComplementTransform\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"inv\" type=\"CT_InverseTransform\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gray\" type=\"CT_GrayscaleTransform\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alpha\" type=\"CT_PositiveFixedPercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaOff\" type=\"CT_FixedPercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaMod\" type=\"CT_PositivePercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hue\" type=\"CT_PositiveFixedAngle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hueOff\" type=\"CT_Angle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hueMod\" type=\"CT_PositivePercentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sat\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"satOff\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"satMod\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lum\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lumOff\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lumMod\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"red\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"redOff\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"redMod\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"green\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"greenOff\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"greenMod\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blue\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blueOff\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blueMod\" type=\"CT_Percentage\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gamma\" type=\"CT_GammaTransform\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"invGamma\" type=\"CT_InverseGammaTransform\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_ScRgbColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"ST_Percentage\" use=\"required\"/>\n    <xsd:attribute name=\"g\" type=\"ST_Percentage\" use=\"required\"/>\n    <xsd:attribute name=\"b\" type=\"ST_Percentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SRgbColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"val\" type=\"s:ST_HexColorRGB\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HslColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"hue\" type=\"ST_PositiveFixedAngle\" use=\"required\"/>\n    <xsd:attribute name=\"sat\" type=\"ST_Percentage\" use=\"required\"/>\n    <xsd:attribute name=\"lum\" type=\"ST_Percentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SystemColorVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"scrollBar\"/>\n      <xsd:enumeration value=\"background\"/>\n      <xsd:enumeration value=\"activeCaption\"/>\n      <xsd:enumeration value=\"inactiveCaption\"/>\n      <xsd:enumeration value=\"menu\"/>\n      <xsd:enumeration value=\"window\"/>\n      <xsd:enumeration value=\"windowFrame\"/>\n      <xsd:enumeration value=\"menuText\"/>\n      <xsd:enumeration value=\"windowText\"/>\n      <xsd:enumeration value=\"captionText\"/>\n      <xsd:enumeration value=\"activeBorder\"/>\n      <xsd:enumeration value=\"inactiveBorder\"/>\n      <xsd:enumeration value=\"appWorkspace\"/>\n      <xsd:enumeration value=\"highlight\"/>\n      <xsd:enumeration value=\"highlightText\"/>\n      <xsd:enumeration value=\"btnFace\"/>\n      <xsd:enumeration value=\"btnShadow\"/>\n      <xsd:enumeration value=\"grayText\"/>\n      <xsd:enumeration value=\"btnText\"/>\n      <xsd:enumeration value=\"inactiveCaptionText\"/>\n      <xsd:enumeration value=\"btnHighlight\"/>\n      <xsd:enumeration value=\"3dDkShadow\"/>\n      <xsd:enumeration value=\"3dLight\"/>\n      <xsd:enumeration value=\"infoText\"/>\n      <xsd:enumeration value=\"infoBk\"/>\n      <xsd:enumeration value=\"hotLight\"/>\n      <xsd:enumeration value=\"gradientActiveCaption\"/>\n      <xsd:enumeration value=\"gradientInactiveCaption\"/>\n      <xsd:enumeration value=\"menuHighlight\"/>\n      <xsd:enumeration value=\"menuBar\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SystemColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"val\" type=\"ST_SystemColorVal\" use=\"required\"/>\n    <xsd:attribute name=\"lastClr\" type=\"s:ST_HexColorRGB\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SchemeColorVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"bg1\"/>\n      <xsd:enumeration value=\"tx1\"/>\n      <xsd:enumeration value=\"bg2\"/>\n      <xsd:enumeration value=\"tx2\"/>\n      <xsd:enumeration value=\"accent1\"/>\n      <xsd:enumeration value=\"accent2\"/>\n      <xsd:enumeration value=\"accent3\"/>\n      <xsd:enumeration value=\"accent4\"/>\n      <xsd:enumeration value=\"accent5\"/>\n      <xsd:enumeration value=\"accent6\"/>\n      <xsd:enumeration value=\"hlink\"/>\n      <xsd:enumeration value=\"folHlink\"/>\n      <xsd:enumeration value=\"phClr\"/>\n      <xsd:enumeration value=\"dk1\"/>\n      <xsd:enumeration value=\"lt1\"/>\n      <xsd:enumeration value=\"dk2\"/>\n      <xsd:enumeration value=\"lt2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SchemeColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"val\" type=\"ST_SchemeColorVal\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PresetColorVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"aliceBlue\"/>\n      <xsd:enumeration value=\"antiqueWhite\"/>\n      <xsd:enumeration value=\"aqua\"/>\n      <xsd:enumeration value=\"aquamarine\"/>\n      <xsd:enumeration value=\"azure\"/>\n      <xsd:enumeration value=\"beige\"/>\n      <xsd:enumeration value=\"bisque\"/>\n      <xsd:enumeration value=\"black\"/>\n      <xsd:enumeration value=\"blanchedAlmond\"/>\n      <xsd:enumeration value=\"blue\"/>\n      <xsd:enumeration value=\"blueViolet\"/>\n      <xsd:enumeration value=\"brown\"/>\n      <xsd:enumeration value=\"burlyWood\"/>\n      <xsd:enumeration value=\"cadetBlue\"/>\n      <xsd:enumeration value=\"chartreuse\"/>\n      <xsd:enumeration value=\"chocolate\"/>\n      <xsd:enumeration value=\"coral\"/>\n      <xsd:enumeration value=\"cornflowerBlue\"/>\n      <xsd:enumeration value=\"cornsilk\"/>\n      <xsd:enumeration value=\"crimson\"/>\n      <xsd:enumeration value=\"cyan\"/>\n      <xsd:enumeration value=\"darkBlue\"/>\n      <xsd:enumeration value=\"darkCyan\"/>\n      <xsd:enumeration value=\"darkGoldenrod\"/>\n      <xsd:enumeration value=\"darkGray\"/>\n      <xsd:enumeration value=\"darkGrey\"/>\n      <xsd:enumeration value=\"darkGreen\"/>\n      <xsd:enumeration value=\"darkKhaki\"/>\n      <xsd:enumeration value=\"darkMagenta\"/>\n      <xsd:enumeration value=\"darkOliveGreen\"/>\n      <xsd:enumeration value=\"darkOrange\"/>\n      <xsd:enumeration value=\"darkOrchid\"/>\n      <xsd:enumeration value=\"darkRed\"/>\n      <xsd:enumeration value=\"darkSalmon\"/>\n      <xsd:enumeration value=\"darkSeaGreen\"/>\n      <xsd:enumeration value=\"darkSlateBlue\"/>\n      <xsd:enumeration value=\"darkSlateGray\"/>\n      <xsd:enumeration value=\"darkSlateGrey\"/>\n      <xsd:enumeration value=\"darkTurquoise\"/>\n      <xsd:enumeration value=\"darkViolet\"/>\n      <xsd:enumeration value=\"dkBlue\"/>\n      <xsd:enumeration value=\"dkCyan\"/>\n      <xsd:enumeration value=\"dkGoldenrod\"/>\n      <xsd:enumeration value=\"dkGray\"/>\n      <xsd:enumeration value=\"dkGrey\"/>\n      <xsd:enumeration value=\"dkGreen\"/>\n      <xsd:enumeration value=\"dkKhaki\"/>\n      <xsd:enumeration value=\"dkMagenta\"/>\n      <xsd:enumeration value=\"dkOliveGreen\"/>\n      <xsd:enumeration value=\"dkOrange\"/>\n      <xsd:enumeration value=\"dkOrchid\"/>\n      <xsd:enumeration value=\"dkRed\"/>\n      <xsd:enumeration value=\"dkSalmon\"/>\n      <xsd:enumeration value=\"dkSeaGreen\"/>\n      <xsd:enumeration value=\"dkSlateBlue\"/>\n      <xsd:enumeration value=\"dkSlateGray\"/>\n      <xsd:enumeration value=\"dkSlateGrey\"/>\n      <xsd:enumeration value=\"dkTurquoise\"/>\n      <xsd:enumeration value=\"dkViolet\"/>\n      <xsd:enumeration value=\"deepPink\"/>\n      <xsd:enumeration value=\"deepSkyBlue\"/>\n      <xsd:enumeration value=\"dimGray\"/>\n      <xsd:enumeration value=\"dimGrey\"/>\n      <xsd:enumeration value=\"dodgerBlue\"/>\n      <xsd:enumeration value=\"firebrick\"/>\n      <xsd:enumeration value=\"floralWhite\"/>\n      <xsd:enumeration value=\"forestGreen\"/>\n      <xsd:enumeration value=\"fuchsia\"/>\n      <xsd:enumeration value=\"gainsboro\"/>\n      <xsd:enumeration value=\"ghostWhite\"/>\n      <xsd:enumeration value=\"gold\"/>\n      <xsd:enumeration value=\"goldenrod\"/>\n      <xsd:enumeration value=\"gray\"/>\n      <xsd:enumeration value=\"grey\"/>\n      <xsd:enumeration value=\"green\"/>\n      <xsd:enumeration value=\"greenYellow\"/>\n      <xsd:enumeration value=\"honeydew\"/>\n      <xsd:enumeration value=\"hotPink\"/>\n      <xsd:enumeration value=\"indianRed\"/>\n      <xsd:enumeration value=\"indigo\"/>\n      <xsd:enumeration value=\"ivory\"/>\n      <xsd:enumeration value=\"khaki\"/>\n      <xsd:enumeration value=\"lavender\"/>\n      <xsd:enumeration value=\"lavenderBlush\"/>\n      <xsd:enumeration value=\"lawnGreen\"/>\n      <xsd:enumeration value=\"lemonChiffon\"/>\n      <xsd:enumeration value=\"lightBlue\"/>\n      <xsd:enumeration value=\"lightCoral\"/>\n      <xsd:enumeration value=\"lightCyan\"/>\n      <xsd:enumeration value=\"lightGoldenrodYellow\"/>\n      <xsd:enumeration value=\"lightGray\"/>\n      <xsd:enumeration value=\"lightGrey\"/>\n      <xsd:enumeration value=\"lightGreen\"/>\n      <xsd:enumeration value=\"lightPink\"/>\n      <xsd:enumeration value=\"lightSalmon\"/>\n      <xsd:enumeration value=\"lightSeaGreen\"/>\n      <xsd:enumeration value=\"lightSkyBlue\"/>\n      <xsd:enumeration value=\"lightSlateGray\"/>\n      <xsd:enumeration value=\"lightSlateGrey\"/>\n      <xsd:enumeration value=\"lightSteelBlue\"/>\n      <xsd:enumeration value=\"lightYellow\"/>\n      <xsd:enumeration value=\"ltBlue\"/>\n      <xsd:enumeration value=\"ltCoral\"/>\n      <xsd:enumeration value=\"ltCyan\"/>\n      <xsd:enumeration value=\"ltGoldenrodYellow\"/>\n      <xsd:enumeration value=\"ltGray\"/>\n      <xsd:enumeration value=\"ltGrey\"/>\n      <xsd:enumeration value=\"ltGreen\"/>\n      <xsd:enumeration value=\"ltPink\"/>\n      <xsd:enumeration value=\"ltSalmon\"/>\n      <xsd:enumeration value=\"ltSeaGreen\"/>\n      <xsd:enumeration value=\"ltSkyBlue\"/>\n      <xsd:enumeration value=\"ltSlateGray\"/>\n      <xsd:enumeration value=\"ltSlateGrey\"/>\n      <xsd:enumeration value=\"ltSteelBlue\"/>\n      <xsd:enumeration value=\"ltYellow\"/>\n      <xsd:enumeration value=\"lime\"/>\n      <xsd:enumeration value=\"limeGreen\"/>\n      <xsd:enumeration value=\"linen\"/>\n      <xsd:enumeration value=\"magenta\"/>\n      <xsd:enumeration value=\"maroon\"/>\n      <xsd:enumeration value=\"medAquamarine\"/>\n      <xsd:enumeration value=\"medBlue\"/>\n      <xsd:enumeration value=\"medOrchid\"/>\n      <xsd:enumeration value=\"medPurple\"/>\n      <xsd:enumeration value=\"medSeaGreen\"/>\n      <xsd:enumeration value=\"medSlateBlue\"/>\n      <xsd:enumeration value=\"medSpringGreen\"/>\n      <xsd:enumeration value=\"medTurquoise\"/>\n      <xsd:enumeration value=\"medVioletRed\"/>\n      <xsd:enumeration value=\"mediumAquamarine\"/>\n      <xsd:enumeration value=\"mediumBlue\"/>\n      <xsd:enumeration value=\"mediumOrchid\"/>\n      <xsd:enumeration value=\"mediumPurple\"/>\n      <xsd:enumeration value=\"mediumSeaGreen\"/>\n      <xsd:enumeration value=\"mediumSlateBlue\"/>\n      <xsd:enumeration value=\"mediumSpringGreen\"/>\n      <xsd:enumeration value=\"mediumTurquoise\"/>\n      <xsd:enumeration value=\"mediumVioletRed\"/>\n      <xsd:enumeration value=\"midnightBlue\"/>\n      <xsd:enumeration value=\"mintCream\"/>\n      <xsd:enumeration value=\"mistyRose\"/>\n      <xsd:enumeration value=\"moccasin\"/>\n      <xsd:enumeration value=\"navajoWhite\"/>\n      <xsd:enumeration value=\"navy\"/>\n      <xsd:enumeration value=\"oldLace\"/>\n      <xsd:enumeration value=\"olive\"/>\n      <xsd:enumeration value=\"oliveDrab\"/>\n      <xsd:enumeration value=\"orange\"/>\n      <xsd:enumeration value=\"orangeRed\"/>\n      <xsd:enumeration value=\"orchid\"/>\n      <xsd:enumeration value=\"paleGoldenrod\"/>\n      <xsd:enumeration value=\"paleGreen\"/>\n      <xsd:enumeration value=\"paleTurquoise\"/>\n      <xsd:enumeration value=\"paleVioletRed\"/>\n      <xsd:enumeration value=\"papayaWhip\"/>\n      <xsd:enumeration value=\"peachPuff\"/>\n      <xsd:enumeration value=\"peru\"/>\n      <xsd:enumeration value=\"pink\"/>\n      <xsd:enumeration value=\"plum\"/>\n      <xsd:enumeration value=\"powderBlue\"/>\n      <xsd:enumeration value=\"purple\"/>\n      <xsd:enumeration value=\"red\"/>\n      <xsd:enumeration value=\"rosyBrown\"/>\n      <xsd:enumeration value=\"royalBlue\"/>\n      <xsd:enumeration value=\"saddleBrown\"/>\n      <xsd:enumeration value=\"salmon\"/>\n      <xsd:enumeration value=\"sandyBrown\"/>\n      <xsd:enumeration value=\"seaGreen\"/>\n      <xsd:enumeration value=\"seaShell\"/>\n      <xsd:enumeration value=\"sienna\"/>\n      <xsd:enumeration value=\"silver\"/>\n      <xsd:enumeration value=\"skyBlue\"/>\n      <xsd:enumeration value=\"slateBlue\"/>\n      <xsd:enumeration value=\"slateGray\"/>\n      <xsd:enumeration value=\"slateGrey\"/>\n      <xsd:enumeration value=\"snow\"/>\n      <xsd:enumeration value=\"springGreen\"/>\n      <xsd:enumeration value=\"steelBlue\"/>\n      <xsd:enumeration value=\"tan\"/>\n      <xsd:enumeration value=\"teal\"/>\n      <xsd:enumeration value=\"thistle\"/>\n      <xsd:enumeration value=\"tomato\"/>\n      <xsd:enumeration value=\"turquoise\"/>\n      <xsd:enumeration value=\"violet\"/>\n      <xsd:enumeration value=\"wheat\"/>\n      <xsd:enumeration value=\"white\"/>\n      <xsd:enumeration value=\"whiteSmoke\"/>\n      <xsd:enumeration value=\"yellow\"/>\n      <xsd:enumeration value=\"yellowGreen\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PresetColor\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"val\" type=\"ST_PresetColorVal\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_OfficeArtExtensionList\">\n    <xsd:sequence>\n      <xsd:element name=\"ext\" type=\"CT_OfficeArtExtension\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_OfficeArtExtensionList\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_OfficeArtExtensionList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Scale2D\">\n    <xsd:sequence>\n      <xsd:element name=\"sx\" type=\"CT_Ratio\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sy\" type=\"CT_Ratio\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Transform2D\">\n    <xsd:sequence>\n      <xsd:element name=\"off\" type=\"CT_Point2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ext\" type=\"CT_PositiveSize2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rot\" type=\"ST_Angle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"flipH\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"flipV\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupTransform2D\">\n    <xsd:sequence>\n      <xsd:element name=\"off\" type=\"CT_Point2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ext\" type=\"CT_PositiveSize2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"chOff\" type=\"CT_Point2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"chExt\" type=\"CT_PositiveSize2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rot\" type=\"ST_Angle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"flipH\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"flipV\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Point3D\">\n    <xsd:attribute name=\"x\" type=\"ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"y\" type=\"ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"z\" type=\"ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Vector3D\">\n    <xsd:attribute name=\"dx\" type=\"ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"dy\" type=\"ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"dz\" type=\"ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SphereCoords\">\n    <xsd:attribute name=\"lat\" type=\"ST_PositiveFixedAngle\" use=\"required\"/>\n    <xsd:attribute name=\"lon\" type=\"ST_PositiveFixedAngle\" use=\"required\"/>\n    <xsd:attribute name=\"rev\" type=\"ST_PositiveFixedAngle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RelativeRect\">\n    <xsd:attribute name=\"l\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"t\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"r\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"b\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RectAlignment\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"tl\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"tr\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"bl\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"br\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:group name=\"EG_ColorChoice\">\n    <xsd:choice>\n      <xsd:element name=\"scrgbClr\" type=\"CT_ScRgbColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"srgbClr\" type=\"CT_SRgbColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hslClr\" type=\"CT_HslColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sysClr\" type=\"CT_SystemColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"schemeClr\" type=\"CT_SchemeColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prstClr\" type=\"CT_PresetColor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Color\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorMRU\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BlackWhiteMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"clr\"/>\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"gray\"/>\n      <xsd:enumeration value=\"ltGray\"/>\n      <xsd:enumeration value=\"invGray\"/>\n      <xsd:enumeration value=\"grayWhite\"/>\n      <xsd:enumeration value=\"blackGray\"/>\n      <xsd:enumeration value=\"blackWhite\"/>\n      <xsd:enumeration value=\"black\"/>\n      <xsd:enumeration value=\"white\"/>\n      <xsd:enumeration value=\"hidden\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:attributeGroup name=\"AG_Blob\">\n    <xsd:attribute ref=\"r:embed\" use=\"optional\" default=\"\"/>\n    <xsd:attribute ref=\"r:link\" use=\"optional\" default=\"\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_EmbeddedWAVAudioFile\">\n    <xsd:attribute ref=\"r:embed\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Hyperlink\">\n    <xsd:sequence>\n      <xsd:element name=\"snd\" type=\"CT_EmbeddedWAVAudioFile\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"invalidUrl\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"action\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"tgtFrame\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"tooltip\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"history\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"highlightClick\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"endSnd\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DrawingElementId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:attributeGroup name=\"AG_Locking\">\n    <xsd:attribute name=\"noGrp\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noSelect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noRot\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noChangeAspect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noMove\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noResize\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noEditPoints\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noAdjustHandles\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noChangeArrowheads\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noChangeShapeType\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_ConnectorLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Locking\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Locking\"/>\n    <xsd:attribute name=\"noTextEdit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PictureLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Locking\"/>\n    <xsd:attribute name=\"noCrop\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"noGrp\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noUngrp\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noSelect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noRot\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noChangeAspect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noMove\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noResize\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectFrameLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"noGrp\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noDrilldown\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noSelect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noChangeAspect\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noMove\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"noResize\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ContentPartLocking\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Locking\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualDrawingProps\">\n    <xsd:sequence>\n      <xsd:element name=\"hlinkClick\" type=\"CT_Hyperlink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hlinkHover\" type=\"CT_Hyperlink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_DrawingElementId\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"descr\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"title\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualDrawingShapeProps\">\n    <xsd:sequence>\n      <xsd:element name=\"spLocks\" type=\"CT_ShapeLocking\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"txBox\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualConnectorProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"cxnSpLocks\" type=\"CT_ConnectorLocking\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"stCxn\" type=\"CT_Connection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"endCxn\" type=\"CT_Connection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualPictureProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"picLocks\" type=\"CT_PictureLocking\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"preferRelativeResize\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualGroupDrawingShapeProps\">\n    <xsd:sequence>\n      <xsd:element name=\"grpSpLocks\" type=\"CT_GroupLocking\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualGraphicFrameProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"graphicFrameLocks\" type=\"CT_GraphicalObjectFrameLocking\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NonVisualContentPartProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"cpLocks\" type=\"CT_ContentPartLocking\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"isComment\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectData\">\n    <xsd:sequence>\n      <xsd:any minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"strict\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"xsd:token\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObject\">\n    <xsd:sequence>\n      <xsd:element name=\"graphicData\" type=\"CT_GraphicalObjectData\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"graphic\" type=\"CT_GraphicalObject\"/>\n  <xsd:simpleType name=\"ST_ChartBuildStep\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"category\"/>\n      <xsd:enumeration value=\"ptInCategory\"/>\n      <xsd:enumeration value=\"series\"/>\n      <xsd:enumeration value=\"ptInSeries\"/>\n      <xsd:enumeration value=\"allPts\"/>\n      <xsd:enumeration value=\"gridLegend\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DgmBuildStep\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sp\"/>\n      <xsd:enumeration value=\"bg\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AnimationDgmElement\">\n    <xsd:attribute name=\"id\" type=\"s:ST_Guid\" use=\"optional\"\n      default=\"{00000000-0000-0000-0000-000000000000}\"/>\n    <xsd:attribute name=\"bldStep\" type=\"ST_DgmBuildStep\" use=\"optional\" default=\"sp\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AnimationChartElement\">\n    <xsd:attribute name=\"seriesIdx\" type=\"xsd:int\" use=\"optional\" default=\"-1\"/>\n    <xsd:attribute name=\"categoryIdx\" type=\"xsd:int\" use=\"optional\" default=\"-1\"/>\n    <xsd:attribute name=\"bldStep\" type=\"ST_ChartBuildStep\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AnimationElementChoice\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"dgm\" type=\"CT_AnimationDgmElement\"/>\n      <xsd:element name=\"chart\" type=\"CT_AnimationChartElement\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AnimationBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"allAtOnce\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnimationDgmOnlyBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"one\"/>\n      <xsd:enumeration value=\"lvlOne\"/>\n      <xsd:enumeration value=\"lvlAtOnce\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnimationDgmBuildType\">\n    <xsd:union memberTypes=\"ST_AnimationBuildType ST_AnimationDgmOnlyBuildType\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AnimationDgmBuildProperties\">\n    <xsd:attribute name=\"bld\" type=\"ST_AnimationDgmBuildType\" use=\"optional\" default=\"allAtOnce\"/>\n    <xsd:attribute name=\"rev\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AnimationChartOnlyBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"series\"/>\n      <xsd:enumeration value=\"category\"/>\n      <xsd:enumeration value=\"seriesEl\"/>\n      <xsd:enumeration value=\"categoryEl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnimationChartBuildType\">\n    <xsd:union memberTypes=\"ST_AnimationBuildType ST_AnimationChartOnlyBuildType\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AnimationChartBuildProperties\">\n    <xsd:attribute name=\"bld\" type=\"ST_AnimationChartBuildType\" use=\"optional\" default=\"allAtOnce\"/>\n    <xsd:attribute name=\"animBg\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AnimationGraphicalObjectBuildProperties\">\n    <xsd:choice>\n      <xsd:element name=\"bldDgm\" type=\"CT_AnimationDgmBuildProperties\"/>\n      <xsd:element name=\"bldChart\" type=\"CT_AnimationChartBuildProperties\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BackgroundFormatting\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WholeE2oFormatting\">\n    <xsd:sequence>\n      <xsd:element name=\"ln\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlUseShapeRectangle\"/>\n  <xsd:complexType name=\"CT_GvmlTextShape\">\n    <xsd:sequence>\n      <xsd:element name=\"txBody\" type=\"CT_TextBody\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice>\n        <xsd:element name=\"useSpRect\" type=\"CT_GvmlUseShapeRectangle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"xfrm\" type=\"CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvSpPr\" type=\"CT_NonVisualDrawingShapeProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlShape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvSpPr\" type=\"CT_GvmlShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txSp\" type=\"CT_GvmlTextShape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlConnectorNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvCxnSpPr\" type=\"CT_NonVisualConnectorProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlConnector\">\n    <xsd:sequence>\n      <xsd:element name=\"nvCxnSpPr\" type=\"CT_GvmlConnectorNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlPictureNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvPicPr\" type=\"CT_NonVisualPictureProperties\" minOccurs=\"1\" maxOccurs=\"1\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlPicture\">\n    <xsd:sequence>\n      <xsd:element name=\"nvPicPr\" type=\"CT_GvmlPictureNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlGraphicFrameNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"CT_NonVisualGraphicFrameProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlGraphicalObjectFrame\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGraphicFramePr\" type=\"CT_GvmlGraphicFrameNonVisual\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element ref=\"graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlGroupShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGrpSpPr\" type=\"CT_NonVisualGroupDrawingShapeProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GvmlGroupShape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGrpSpPr\" type=\"CT_GvmlGroupShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grpSpPr\" type=\"CT_GroupShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"txSp\" type=\"CT_GvmlTextShape\"/>\n        <xsd:element name=\"sp\" type=\"CT_GvmlShape\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_GvmlConnector\"/>\n        <xsd:element name=\"pic\" type=\"CT_GvmlPicture\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GvmlGraphicalObjectFrame\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GvmlGroupShape\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PresetCameraType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"legacyObliqueTopLeft\"/>\n      <xsd:enumeration value=\"legacyObliqueTop\"/>\n      <xsd:enumeration value=\"legacyObliqueTopRight\"/>\n      <xsd:enumeration value=\"legacyObliqueLeft\"/>\n      <xsd:enumeration value=\"legacyObliqueFront\"/>\n      <xsd:enumeration value=\"legacyObliqueRight\"/>\n      <xsd:enumeration value=\"legacyObliqueBottomLeft\"/>\n      <xsd:enumeration value=\"legacyObliqueBottom\"/>\n      <xsd:enumeration value=\"legacyObliqueBottomRight\"/>\n      <xsd:enumeration value=\"legacyPerspectiveTopLeft\"/>\n      <xsd:enumeration value=\"legacyPerspectiveTop\"/>\n      <xsd:enumeration value=\"legacyPerspectiveTopRight\"/>\n      <xsd:enumeration value=\"legacyPerspectiveLeft\"/>\n      <xsd:enumeration value=\"legacyPerspectiveFront\"/>\n      <xsd:enumeration value=\"legacyPerspectiveRight\"/>\n      <xsd:enumeration value=\"legacyPerspectiveBottomLeft\"/>\n      <xsd:enumeration value=\"legacyPerspectiveBottom\"/>\n      <xsd:enumeration value=\"legacyPerspectiveBottomRight\"/>\n      <xsd:enumeration value=\"orthographicFront\"/>\n      <xsd:enumeration value=\"isometricTopUp\"/>\n      <xsd:enumeration value=\"isometricTopDown\"/>\n      <xsd:enumeration value=\"isometricBottomUp\"/>\n      <xsd:enumeration value=\"isometricBottomDown\"/>\n      <xsd:enumeration value=\"isometricLeftUp\"/>\n      <xsd:enumeration value=\"isometricLeftDown\"/>\n      <xsd:enumeration value=\"isometricRightUp\"/>\n      <xsd:enumeration value=\"isometricRightDown\"/>\n      <xsd:enumeration value=\"isometricOffAxis1Left\"/>\n      <xsd:enumeration value=\"isometricOffAxis1Right\"/>\n      <xsd:enumeration value=\"isometricOffAxis1Top\"/>\n      <xsd:enumeration value=\"isometricOffAxis2Left\"/>\n      <xsd:enumeration value=\"isometricOffAxis2Right\"/>\n      <xsd:enumeration value=\"isometricOffAxis2Top\"/>\n      <xsd:enumeration value=\"isometricOffAxis3Left\"/>\n      <xsd:enumeration value=\"isometricOffAxis3Right\"/>\n      <xsd:enumeration value=\"isometricOffAxis3Bottom\"/>\n      <xsd:enumeration value=\"isometricOffAxis4Left\"/>\n      <xsd:enumeration value=\"isometricOffAxis4Right\"/>\n      <xsd:enumeration value=\"isometricOffAxis4Bottom\"/>\n      <xsd:enumeration value=\"obliqueTopLeft\"/>\n      <xsd:enumeration value=\"obliqueTop\"/>\n      <xsd:enumeration value=\"obliqueTopRight\"/>\n      <xsd:enumeration value=\"obliqueLeft\"/>\n      <xsd:enumeration value=\"obliqueRight\"/>\n      <xsd:enumeration value=\"obliqueBottomLeft\"/>\n      <xsd:enumeration value=\"obliqueBottom\"/>\n      <xsd:enumeration value=\"obliqueBottomRight\"/>\n      <xsd:enumeration value=\"perspectiveFront\"/>\n      <xsd:enumeration value=\"perspectiveLeft\"/>\n      <xsd:enumeration value=\"perspectiveRight\"/>\n      <xsd:enumeration value=\"perspectiveAbove\"/>\n      <xsd:enumeration value=\"perspectiveBelow\"/>\n      <xsd:enumeration value=\"perspectiveAboveLeftFacing\"/>\n      <xsd:enumeration value=\"perspectiveAboveRightFacing\"/>\n      <xsd:enumeration value=\"perspectiveContrastingLeftFacing\"/>\n      <xsd:enumeration value=\"perspectiveContrastingRightFacing\"/>\n      <xsd:enumeration value=\"perspectiveHeroicLeftFacing\"/>\n      <xsd:enumeration value=\"perspectiveHeroicRightFacing\"/>\n      <xsd:enumeration value=\"perspectiveHeroicExtremeLeftFacing\"/>\n      <xsd:enumeration value=\"perspectiveHeroicExtremeRightFacing\"/>\n      <xsd:enumeration value=\"perspectiveRelaxed\"/>\n      <xsd:enumeration value=\"perspectiveRelaxedModerately\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FOVAngle\">\n    <xsd:restriction base=\"ST_Angle\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"10800000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Camera\">\n    <xsd:sequence>\n      <xsd:element name=\"rot\" type=\"CT_SphereCoords\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prst\" type=\"ST_PresetCameraType\" use=\"required\"/>\n    <xsd:attribute name=\"fov\" type=\"ST_FOVAngle\" use=\"optional\"/>\n    <xsd:attribute name=\"zoom\" type=\"ST_PositivePercentage\" use=\"optional\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LightRigDirection\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"tl\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"tr\"/>\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"bl\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"br\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LightRigType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"legacyFlat1\"/>\n      <xsd:enumeration value=\"legacyFlat2\"/>\n      <xsd:enumeration value=\"legacyFlat3\"/>\n      <xsd:enumeration value=\"legacyFlat4\"/>\n      <xsd:enumeration value=\"legacyNormal1\"/>\n      <xsd:enumeration value=\"legacyNormal2\"/>\n      <xsd:enumeration value=\"legacyNormal3\"/>\n      <xsd:enumeration value=\"legacyNormal4\"/>\n      <xsd:enumeration value=\"legacyHarsh1\"/>\n      <xsd:enumeration value=\"legacyHarsh2\"/>\n      <xsd:enumeration value=\"legacyHarsh3\"/>\n      <xsd:enumeration value=\"legacyHarsh4\"/>\n      <xsd:enumeration value=\"threePt\"/>\n      <xsd:enumeration value=\"balanced\"/>\n      <xsd:enumeration value=\"soft\"/>\n      <xsd:enumeration value=\"harsh\"/>\n      <xsd:enumeration value=\"flood\"/>\n      <xsd:enumeration value=\"contrasting\"/>\n      <xsd:enumeration value=\"morning\"/>\n      <xsd:enumeration value=\"sunrise\"/>\n      <xsd:enumeration value=\"sunset\"/>\n      <xsd:enumeration value=\"chilly\"/>\n      <xsd:enumeration value=\"freezing\"/>\n      <xsd:enumeration value=\"flat\"/>\n      <xsd:enumeration value=\"twoPt\"/>\n      <xsd:enumeration value=\"glow\"/>\n      <xsd:enumeration value=\"brightRoom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LightRig\">\n    <xsd:sequence>\n      <xsd:element name=\"rot\" type=\"CT_SphereCoords\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rig\" type=\"ST_LightRigType\" use=\"required\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_LightRigDirection\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Scene3D\">\n    <xsd:sequence>\n      <xsd:element name=\"camera\" type=\"CT_Camera\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lightRig\" type=\"CT_LightRig\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"backdrop\" type=\"CT_Backdrop\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Backdrop\">\n    <xsd:sequence>\n      <xsd:element name=\"anchor\" type=\"CT_Point3D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"norm\" type=\"CT_Vector3D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"up\" type=\"CT_Vector3D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BevelPresetType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"relaxedInset\"/>\n      <xsd:enumeration value=\"circle\"/>\n      <xsd:enumeration value=\"slope\"/>\n      <xsd:enumeration value=\"cross\"/>\n      <xsd:enumeration value=\"angle\"/>\n      <xsd:enumeration value=\"softRound\"/>\n      <xsd:enumeration value=\"convex\"/>\n      <xsd:enumeration value=\"coolSlant\"/>\n      <xsd:enumeration value=\"divot\"/>\n      <xsd:enumeration value=\"riblet\"/>\n      <xsd:enumeration value=\"hardEdge\"/>\n      <xsd:enumeration value=\"artDeco\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Bevel\">\n    <xsd:attribute name=\"w\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"76200\"/>\n    <xsd:attribute name=\"h\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"76200\"/>\n    <xsd:attribute name=\"prst\" type=\"ST_BevelPresetType\" use=\"optional\" default=\"circle\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PresetMaterialType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"legacyMatte\"/>\n      <xsd:enumeration value=\"legacyPlastic\"/>\n      <xsd:enumeration value=\"legacyMetal\"/>\n      <xsd:enumeration value=\"legacyWireframe\"/>\n      <xsd:enumeration value=\"matte\"/>\n      <xsd:enumeration value=\"plastic\"/>\n      <xsd:enumeration value=\"metal\"/>\n      <xsd:enumeration value=\"warmMatte\"/>\n      <xsd:enumeration value=\"translucentPowder\"/>\n      <xsd:enumeration value=\"powder\"/>\n      <xsd:enumeration value=\"dkEdge\"/>\n      <xsd:enumeration value=\"softEdge\"/>\n      <xsd:enumeration value=\"clear\"/>\n      <xsd:enumeration value=\"flat\"/>\n      <xsd:enumeration value=\"softmetal\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Shape3D\">\n    <xsd:sequence>\n      <xsd:element name=\"bevelT\" type=\"CT_Bevel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bevelB\" type=\"CT_Bevel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extrusionClr\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"contourClr\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"z\" type=\"ST_Coordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"extrusionH\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"contourW\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"prstMaterial\" type=\"ST_PresetMaterialType\" use=\"optional\"\n      default=\"warmMatte\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FlatText\">\n    <xsd:attribute name=\"z\" type=\"ST_Coordinate\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Text3D\">\n    <xsd:choice>\n      <xsd:element name=\"sp3d\" type=\"CT_Shape3D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"flatTx\" type=\"CT_FlatText\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_AlphaBiLevelEffect\">\n    <xsd:attribute name=\"thresh\" type=\"ST_PositiveFixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AlphaCeilingEffect\"/>\n  <xsd:complexType name=\"CT_AlphaFloorEffect\"/>\n  <xsd:complexType name=\"CT_AlphaInverseEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AlphaModulateFixedEffect\">\n    <xsd:attribute name=\"amt\" type=\"ST_PositivePercentage\" use=\"optional\" default=\"100%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AlphaOutsetEffect\">\n    <xsd:attribute name=\"rad\" type=\"ST_Coordinate\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AlphaReplaceEffect\">\n    <xsd:attribute name=\"a\" type=\"ST_PositiveFixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BiLevelEffect\">\n    <xsd:attribute name=\"thresh\" type=\"ST_PositiveFixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BlurEffect\">\n    <xsd:attribute name=\"rad\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"grow\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorChangeEffect\">\n    <xsd:sequence>\n      <xsd:element name=\"clrFrom\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrTo\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"useA\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorReplaceEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DuotoneEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"2\" maxOccurs=\"2\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GlowEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rad\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GrayscaleEffect\"/>\n  <xsd:complexType name=\"CT_HSLEffect\">\n    <xsd:attribute name=\"hue\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"sat\" type=\"ST_FixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"lum\" type=\"ST_FixedPercentage\" use=\"optional\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_InnerShadowEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"blurRad\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dist\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LuminanceEffect\">\n    <xsd:attribute name=\"bright\" type=\"ST_FixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"contrast\" type=\"ST_FixedPercentage\" use=\"optional\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OuterShadowEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"blurRad\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dist\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"sx\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"sy\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"kx\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ky\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_RectAlignment\" use=\"optional\" default=\"b\"/>\n    <xsd:attribute name=\"rotWithShape\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PresetShadowVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"shdw1\"/>\n      <xsd:enumeration value=\"shdw2\"/>\n      <xsd:enumeration value=\"shdw3\"/>\n      <xsd:enumeration value=\"shdw4\"/>\n      <xsd:enumeration value=\"shdw5\"/>\n      <xsd:enumeration value=\"shdw6\"/>\n      <xsd:enumeration value=\"shdw7\"/>\n      <xsd:enumeration value=\"shdw8\"/>\n      <xsd:enumeration value=\"shdw9\"/>\n      <xsd:enumeration value=\"shdw10\"/>\n      <xsd:enumeration value=\"shdw11\"/>\n      <xsd:enumeration value=\"shdw12\"/>\n      <xsd:enumeration value=\"shdw13\"/>\n      <xsd:enumeration value=\"shdw14\"/>\n      <xsd:enumeration value=\"shdw15\"/>\n      <xsd:enumeration value=\"shdw16\"/>\n      <xsd:enumeration value=\"shdw17\"/>\n      <xsd:enumeration value=\"shdw18\"/>\n      <xsd:enumeration value=\"shdw19\"/>\n      <xsd:enumeration value=\"shdw20\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PresetShadowEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prst\" type=\"ST_PresetShadowVal\" use=\"required\"/>\n    <xsd:attribute name=\"dist\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ReflectionEffect\">\n    <xsd:attribute name=\"blurRad\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"stA\" type=\"ST_PositiveFixedPercentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"stPos\" type=\"ST_PositiveFixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"endA\" type=\"ST_PositiveFixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"endPos\" type=\"ST_PositiveFixedPercentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"dist\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"fadeDir\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"5400000\"/>\n    <xsd:attribute name=\"sx\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"sy\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"kx\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ky\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_RectAlignment\" use=\"optional\" default=\"b\"/>\n    <xsd:attribute name=\"rotWithShape\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RelativeOffsetEffect\">\n    <xsd:attribute name=\"tx\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"ty\" type=\"ST_Percentage\" use=\"optional\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SoftEdgesEffect\">\n    <xsd:attribute name=\"rad\" type=\"ST_PositiveCoordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TintEffect\">\n    <xsd:attribute name=\"hue\" type=\"ST_PositiveFixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"amt\" type=\"ST_FixedPercentage\" use=\"optional\" default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TransformEffect\">\n    <xsd:attribute name=\"sx\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"sy\" type=\"ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"kx\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ky\" type=\"ST_FixedAngle\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"tx\" type=\"ST_Coordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ty\" type=\"ST_Coordinate\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NoFillProperties\"/>\n  <xsd:complexType name=\"CT_SolidColorFillProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LinearShadeProperties\">\n    <xsd:attribute name=\"ang\" type=\"ST_PositiveFixedAngle\" use=\"optional\"/>\n    <xsd:attribute name=\"scaled\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PathShadeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"shape\"/>\n      <xsd:enumeration value=\"circle\"/>\n      <xsd:enumeration value=\"rect\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PathShadeProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"fillToRect\" type=\"CT_RelativeRect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"path\" type=\"ST_PathShadeType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ShadeProperties\">\n    <xsd:choice>\n      <xsd:element name=\"lin\" type=\"CT_LinearShadeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"path\" type=\"CT_PathShadeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_TileFlipMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"x\"/>\n      <xsd:enumeration value=\"y\"/>\n      <xsd:enumeration value=\"xy\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_GradientStop\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"pos\" type=\"ST_PositiveFixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GradientStopList\">\n    <xsd:sequence>\n      <xsd:element name=\"gs\" type=\"CT_GradientStop\" minOccurs=\"2\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GradientFillProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"gsLst\" type=\"CT_GradientStopList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ShadeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tileRect\" type=\"CT_RelativeRect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"flip\" type=\"ST_TileFlipMode\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"rotWithShape\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TileInfoProperties\">\n    <xsd:attribute name=\"tx\" type=\"ST_Coordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"ty\" type=\"ST_Coordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"sx\" type=\"ST_Percentage\" use=\"optional\"/>\n    <xsd:attribute name=\"sy\" type=\"ST_Percentage\" use=\"optional\"/>\n    <xsd:attribute name=\"flip\" type=\"ST_TileFlipMode\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_RectAlignment\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StretchInfoProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"fillRect\" type=\"CT_RelativeRect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_FillModeProperties\">\n    <xsd:choice>\n      <xsd:element name=\"tile\" type=\"CT_TileInfoProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"stretch\" type=\"CT_StretchInfoProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_BlipCompression\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"email\"/>\n      <xsd:enumeration value=\"screen\"/>\n      <xsd:enumeration value=\"print\"/>\n      <xsd:enumeration value=\"hqprint\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Blip\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"alphaBiLevel\" type=\"CT_AlphaBiLevelEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaCeiling\" type=\"CT_AlphaCeilingEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaFloor\" type=\"CT_AlphaFloorEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaInv\" type=\"CT_AlphaInverseEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaMod\" type=\"CT_AlphaModulateEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaModFix\" type=\"CT_AlphaModulateFixedEffect\" minOccurs=\"1\"\n          maxOccurs=\"1\"/>\n        <xsd:element name=\"alphaRepl\" type=\"CT_AlphaReplaceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"biLevel\" type=\"CT_BiLevelEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"blur\" type=\"CT_BlurEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"clrChange\" type=\"CT_ColorChangeEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"clrRepl\" type=\"CT_ColorReplaceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"duotone\" type=\"CT_DuotoneEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"fillOverlay\" type=\"CT_FillOverlayEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"grayscl\" type=\"CT_GrayscaleEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"hsl\" type=\"CT_HSLEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"lum\" type=\"CT_LuminanceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"tint\" type=\"CT_TintEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Blob\"/>\n    <xsd:attribute name=\"cstate\" type=\"ST_BlipCompression\" use=\"optional\" default=\"none\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BlipFillProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"blip\" type=\"CT_Blip\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"srcRect\" type=\"CT_RelativeRect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_FillModeProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"dpi\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rotWithShape\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PresetPatternVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"pct5\"/>\n      <xsd:enumeration value=\"pct10\"/>\n      <xsd:enumeration value=\"pct20\"/>\n      <xsd:enumeration value=\"pct25\"/>\n      <xsd:enumeration value=\"pct30\"/>\n      <xsd:enumeration value=\"pct40\"/>\n      <xsd:enumeration value=\"pct50\"/>\n      <xsd:enumeration value=\"pct60\"/>\n      <xsd:enumeration value=\"pct70\"/>\n      <xsd:enumeration value=\"pct75\"/>\n      <xsd:enumeration value=\"pct80\"/>\n      <xsd:enumeration value=\"pct90\"/>\n      <xsd:enumeration value=\"horz\"/>\n      <xsd:enumeration value=\"vert\"/>\n      <xsd:enumeration value=\"ltHorz\"/>\n      <xsd:enumeration value=\"ltVert\"/>\n      <xsd:enumeration value=\"dkHorz\"/>\n      <xsd:enumeration value=\"dkVert\"/>\n      <xsd:enumeration value=\"narHorz\"/>\n      <xsd:enumeration value=\"narVert\"/>\n      <xsd:enumeration value=\"dashHorz\"/>\n      <xsd:enumeration value=\"dashVert\"/>\n      <xsd:enumeration value=\"cross\"/>\n      <xsd:enumeration value=\"dnDiag\"/>\n      <xsd:enumeration value=\"upDiag\"/>\n      <xsd:enumeration value=\"ltDnDiag\"/>\n      <xsd:enumeration value=\"ltUpDiag\"/>\n      <xsd:enumeration value=\"dkDnDiag\"/>\n      <xsd:enumeration value=\"dkUpDiag\"/>\n      <xsd:enumeration value=\"wdDnDiag\"/>\n      <xsd:enumeration value=\"wdUpDiag\"/>\n      <xsd:enumeration value=\"dashDnDiag\"/>\n      <xsd:enumeration value=\"dashUpDiag\"/>\n      <xsd:enumeration value=\"diagCross\"/>\n      <xsd:enumeration value=\"smCheck\"/>\n      <xsd:enumeration value=\"lgCheck\"/>\n      <xsd:enumeration value=\"smGrid\"/>\n      <xsd:enumeration value=\"lgGrid\"/>\n      <xsd:enumeration value=\"dotGrid\"/>\n      <xsd:enumeration value=\"smConfetti\"/>\n      <xsd:enumeration value=\"lgConfetti\"/>\n      <xsd:enumeration value=\"horzBrick\"/>\n      <xsd:enumeration value=\"diagBrick\"/>\n      <xsd:enumeration value=\"solidDmnd\"/>\n      <xsd:enumeration value=\"openDmnd\"/>\n      <xsd:enumeration value=\"dotDmnd\"/>\n      <xsd:enumeration value=\"plaid\"/>\n      <xsd:enumeration value=\"sphere\"/>\n      <xsd:enumeration value=\"weave\"/>\n      <xsd:enumeration value=\"divot\"/>\n      <xsd:enumeration value=\"shingle\"/>\n      <xsd:enumeration value=\"wave\"/>\n      <xsd:enumeration value=\"trellis\"/>\n      <xsd:enumeration value=\"zigZag\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PatternFillProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"fgClr\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bgClr\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prst\" type=\"ST_PresetPatternVal\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupFillProperties\"/>\n  <xsd:group name=\"EG_FillProperties\">\n    <xsd:choice>\n      <xsd:element name=\"noFill\" type=\"CT_NoFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"solidFill\" type=\"CT_SolidColorFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gradFill\" type=\"CT_GradientFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pattFill\" type=\"CT_PatternFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grpFill\" type=\"CT_GroupFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_FillProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FillEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BlendMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"over\"/>\n      <xsd:enumeration value=\"mult\"/>\n      <xsd:enumeration value=\"screen\"/>\n      <xsd:enumeration value=\"darken\"/>\n      <xsd:enumeration value=\"lighten\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FillOverlayEffect\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"blend\" type=\"ST_BlendMode\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EffectReference\">\n    <xsd:attribute name=\"ref\" type=\"xsd:token\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Effect\">\n    <xsd:choice>\n      <xsd:element name=\"cont\" type=\"CT_EffectContainer\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effect\" type=\"CT_EffectReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaBiLevel\" type=\"CT_AlphaBiLevelEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaCeiling\" type=\"CT_AlphaCeilingEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaFloor\" type=\"CT_AlphaFloorEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaInv\" type=\"CT_AlphaInverseEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaMod\" type=\"CT_AlphaModulateEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaModFix\" type=\"CT_AlphaModulateFixedEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaOutset\" type=\"CT_AlphaOutsetEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alphaRepl\" type=\"CT_AlphaReplaceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"biLevel\" type=\"CT_BiLevelEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blend\" type=\"CT_BlendEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blur\" type=\"CT_BlurEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrChange\" type=\"CT_ColorChangeEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrRepl\" type=\"CT_ColorReplaceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"duotone\" type=\"CT_DuotoneEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fill\" type=\"CT_FillEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fillOverlay\" type=\"CT_FillOverlayEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"glow\" type=\"CT_GlowEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grayscl\" type=\"CT_GrayscaleEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hsl\" type=\"CT_HSLEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"innerShdw\" type=\"CT_InnerShadowEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lum\" type=\"CT_LuminanceEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outerShdw\" type=\"CT_OuterShadowEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prstShdw\" type=\"CT_PresetShadowEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"reflection\" type=\"CT_ReflectionEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"relOff\" type=\"CT_RelativeOffsetEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"softEdge\" type=\"CT_SoftEdgesEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tint\" type=\"CT_TintEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"CT_TransformEffect\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_EffectContainerType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sib\"/>\n      <xsd:enumeration value=\"tree\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_EffectContainer\">\n    <xsd:group ref=\"EG_Effect\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    <xsd:attribute name=\"type\" type=\"ST_EffectContainerType\" use=\"optional\" default=\"sib\"/>\n    <xsd:attribute name=\"name\" type=\"xsd:token\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AlphaModulateEffect\">\n    <xsd:sequence>\n      <xsd:element name=\"cont\" type=\"CT_EffectContainer\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BlendEffect\">\n    <xsd:sequence>\n      <xsd:element name=\"cont\" type=\"CT_EffectContainer\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"blend\" type=\"ST_BlendMode\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EffectList\">\n    <xsd:sequence>\n      <xsd:element name=\"blur\" type=\"CT_BlurEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fillOverlay\" type=\"CT_FillOverlayEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"glow\" type=\"CT_GlowEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"innerShdw\" type=\"CT_InnerShadowEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outerShdw\" type=\"CT_OuterShadowEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prstShdw\" type=\"CT_PresetShadowEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"reflection\" type=\"CT_ReflectionEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"softEdge\" type=\"CT_SoftEdgesEffect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_EffectProperties\">\n    <xsd:choice>\n      <xsd:element name=\"effectLst\" type=\"CT_EffectList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effectDag\" type=\"CT_EffectContainer\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_EffectProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"blip\" type=\"CT_Blip\"/>\n  <xsd:simpleType name=\"ST_ShapeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"line\"/>\n      <xsd:enumeration value=\"lineInv\"/>\n      <xsd:enumeration value=\"triangle\"/>\n      <xsd:enumeration value=\"rtTriangle\"/>\n      <xsd:enumeration value=\"rect\"/>\n      <xsd:enumeration value=\"diamond\"/>\n      <xsd:enumeration value=\"parallelogram\"/>\n      <xsd:enumeration value=\"trapezoid\"/>\n      <xsd:enumeration value=\"nonIsoscelesTrapezoid\"/>\n      <xsd:enumeration value=\"pentagon\"/>\n      <xsd:enumeration value=\"hexagon\"/>\n      <xsd:enumeration value=\"heptagon\"/>\n      <xsd:enumeration value=\"octagon\"/>\n      <xsd:enumeration value=\"decagon\"/>\n      <xsd:enumeration value=\"dodecagon\"/>\n      <xsd:enumeration value=\"star4\"/>\n      <xsd:enumeration value=\"star5\"/>\n      <xsd:enumeration value=\"star6\"/>\n      <xsd:enumeration value=\"star7\"/>\n      <xsd:enumeration value=\"star8\"/>\n      <xsd:enumeration value=\"star10\"/>\n      <xsd:enumeration value=\"star12\"/>\n      <xsd:enumeration value=\"star16\"/>\n      <xsd:enumeration value=\"star24\"/>\n      <xsd:enumeration value=\"star32\"/>\n      <xsd:enumeration value=\"roundRect\"/>\n      <xsd:enumeration value=\"round1Rect\"/>\n      <xsd:enumeration value=\"round2SameRect\"/>\n      <xsd:enumeration value=\"round2DiagRect\"/>\n      <xsd:enumeration value=\"snipRoundRect\"/>\n      <xsd:enumeration value=\"snip1Rect\"/>\n      <xsd:enumeration value=\"snip2SameRect\"/>\n      <xsd:enumeration value=\"snip2DiagRect\"/>\n      <xsd:enumeration value=\"plaque\"/>\n      <xsd:enumeration value=\"ellipse\"/>\n      <xsd:enumeration value=\"teardrop\"/>\n      <xsd:enumeration value=\"homePlate\"/>\n      <xsd:enumeration value=\"chevron\"/>\n      <xsd:enumeration value=\"pieWedge\"/>\n      <xsd:enumeration value=\"pie\"/>\n      <xsd:enumeration value=\"blockArc\"/>\n      <xsd:enumeration value=\"donut\"/>\n      <xsd:enumeration value=\"noSmoking\"/>\n      <xsd:enumeration value=\"rightArrow\"/>\n      <xsd:enumeration value=\"leftArrow\"/>\n      <xsd:enumeration value=\"upArrow\"/>\n      <xsd:enumeration value=\"downArrow\"/>\n      <xsd:enumeration value=\"stripedRightArrow\"/>\n      <xsd:enumeration value=\"notchedRightArrow\"/>\n      <xsd:enumeration value=\"bentUpArrow\"/>\n      <xsd:enumeration value=\"leftRightArrow\"/>\n      <xsd:enumeration value=\"upDownArrow\"/>\n      <xsd:enumeration value=\"leftUpArrow\"/>\n      <xsd:enumeration value=\"leftRightUpArrow\"/>\n      <xsd:enumeration value=\"quadArrow\"/>\n      <xsd:enumeration value=\"leftArrowCallout\"/>\n      <xsd:enumeration value=\"rightArrowCallout\"/>\n      <xsd:enumeration value=\"upArrowCallout\"/>\n      <xsd:enumeration value=\"downArrowCallout\"/>\n      <xsd:enumeration value=\"leftRightArrowCallout\"/>\n      <xsd:enumeration value=\"upDownArrowCallout\"/>\n      <xsd:enumeration value=\"quadArrowCallout\"/>\n      <xsd:enumeration value=\"bentArrow\"/>\n      <xsd:enumeration value=\"uturnArrow\"/>\n      <xsd:enumeration value=\"circularArrow\"/>\n      <xsd:enumeration value=\"leftCircularArrow\"/>\n      <xsd:enumeration value=\"leftRightCircularArrow\"/>\n      <xsd:enumeration value=\"curvedRightArrow\"/>\n      <xsd:enumeration value=\"curvedLeftArrow\"/>\n      <xsd:enumeration value=\"curvedUpArrow\"/>\n      <xsd:enumeration value=\"curvedDownArrow\"/>\n      <xsd:enumeration value=\"swooshArrow\"/>\n      <xsd:enumeration value=\"cube\"/>\n      <xsd:enumeration value=\"can\"/>\n      <xsd:enumeration value=\"lightningBolt\"/>\n      <xsd:enumeration value=\"heart\"/>\n      <xsd:enumeration value=\"sun\"/>\n      <xsd:enumeration value=\"moon\"/>\n      <xsd:enumeration value=\"smileyFace\"/>\n      <xsd:enumeration value=\"irregularSeal1\"/>\n      <xsd:enumeration value=\"irregularSeal2\"/>\n      <xsd:enumeration value=\"foldedCorner\"/>\n      <xsd:enumeration value=\"bevel\"/>\n      <xsd:enumeration value=\"frame\"/>\n      <xsd:enumeration value=\"halfFrame\"/>\n      <xsd:enumeration value=\"corner\"/>\n      <xsd:enumeration value=\"diagStripe\"/>\n      <xsd:enumeration value=\"chord\"/>\n      <xsd:enumeration value=\"arc\"/>\n      <xsd:enumeration value=\"leftBracket\"/>\n      <xsd:enumeration value=\"rightBracket\"/>\n      <xsd:enumeration value=\"leftBrace\"/>\n      <xsd:enumeration value=\"rightBrace\"/>\n      <xsd:enumeration value=\"bracketPair\"/>\n      <xsd:enumeration value=\"bracePair\"/>\n      <xsd:enumeration value=\"straightConnector1\"/>\n      <xsd:enumeration value=\"bentConnector2\"/>\n      <xsd:enumeration value=\"bentConnector3\"/>\n      <xsd:enumeration value=\"bentConnector4\"/>\n      <xsd:enumeration value=\"bentConnector5\"/>\n      <xsd:enumeration value=\"curvedConnector2\"/>\n      <xsd:enumeration value=\"curvedConnector3\"/>\n      <xsd:enumeration value=\"curvedConnector4\"/>\n      <xsd:enumeration value=\"curvedConnector5\"/>\n      <xsd:enumeration value=\"callout1\"/>\n      <xsd:enumeration value=\"callout2\"/>\n      <xsd:enumeration value=\"callout3\"/>\n      <xsd:enumeration value=\"accentCallout1\"/>\n      <xsd:enumeration value=\"accentCallout2\"/>\n      <xsd:enumeration value=\"accentCallout3\"/>\n      <xsd:enumeration value=\"borderCallout1\"/>\n      <xsd:enumeration value=\"borderCallout2\"/>\n      <xsd:enumeration value=\"borderCallout3\"/>\n      <xsd:enumeration value=\"accentBorderCallout1\"/>\n      <xsd:enumeration value=\"accentBorderCallout2\"/>\n      <xsd:enumeration value=\"accentBorderCallout3\"/>\n      <xsd:enumeration value=\"wedgeRectCallout\"/>\n      <xsd:enumeration value=\"wedgeRoundRectCallout\"/>\n      <xsd:enumeration value=\"wedgeEllipseCallout\"/>\n      <xsd:enumeration value=\"cloudCallout\"/>\n      <xsd:enumeration value=\"cloud\"/>\n      <xsd:enumeration value=\"ribbon\"/>\n      <xsd:enumeration value=\"ribbon2\"/>\n      <xsd:enumeration value=\"ellipseRibbon\"/>\n      <xsd:enumeration value=\"ellipseRibbon2\"/>\n      <xsd:enumeration value=\"leftRightRibbon\"/>\n      <xsd:enumeration value=\"verticalScroll\"/>\n      <xsd:enumeration value=\"horizontalScroll\"/>\n      <xsd:enumeration value=\"wave\"/>\n      <xsd:enumeration value=\"doubleWave\"/>\n      <xsd:enumeration value=\"plus\"/>\n      <xsd:enumeration value=\"flowChartProcess\"/>\n      <xsd:enumeration value=\"flowChartDecision\"/>\n      <xsd:enumeration value=\"flowChartInputOutput\"/>\n      <xsd:enumeration value=\"flowChartPredefinedProcess\"/>\n      <xsd:enumeration value=\"flowChartInternalStorage\"/>\n      <xsd:enumeration value=\"flowChartDocument\"/>\n      <xsd:enumeration value=\"flowChartMultidocument\"/>\n      <xsd:enumeration value=\"flowChartTerminator\"/>\n      <xsd:enumeration value=\"flowChartPreparation\"/>\n      <xsd:enumeration value=\"flowChartManualInput\"/>\n      <xsd:enumeration value=\"flowChartManualOperation\"/>\n      <xsd:enumeration value=\"flowChartConnector\"/>\n      <xsd:enumeration value=\"flowChartPunchedCard\"/>\n      <xsd:enumeration value=\"flowChartPunchedTape\"/>\n      <xsd:enumeration value=\"flowChartSummingJunction\"/>\n      <xsd:enumeration value=\"flowChartOr\"/>\n      <xsd:enumeration value=\"flowChartCollate\"/>\n      <xsd:enumeration value=\"flowChartSort\"/>\n      <xsd:enumeration value=\"flowChartExtract\"/>\n      <xsd:enumeration value=\"flowChartMerge\"/>\n      <xsd:enumeration value=\"flowChartOfflineStorage\"/>\n      <xsd:enumeration value=\"flowChartOnlineStorage\"/>\n      <xsd:enumeration value=\"flowChartMagneticTape\"/>\n      <xsd:enumeration value=\"flowChartMagneticDisk\"/>\n      <xsd:enumeration value=\"flowChartMagneticDrum\"/>\n      <xsd:enumeration value=\"flowChartDisplay\"/>\n      <xsd:enumeration value=\"flowChartDelay\"/>\n      <xsd:enumeration value=\"flowChartAlternateProcess\"/>\n      <xsd:enumeration value=\"flowChartOffpageConnector\"/>\n      <xsd:enumeration value=\"actionButtonBlank\"/>\n      <xsd:enumeration value=\"actionButtonHome\"/>\n      <xsd:enumeration value=\"actionButtonHelp\"/>\n      <xsd:enumeration value=\"actionButtonInformation\"/>\n      <xsd:enumeration value=\"actionButtonForwardNext\"/>\n      <xsd:enumeration value=\"actionButtonBackPrevious\"/>\n      <xsd:enumeration value=\"actionButtonEnd\"/>\n      <xsd:enumeration value=\"actionButtonBeginning\"/>\n      <xsd:enumeration value=\"actionButtonReturn\"/>\n      <xsd:enumeration value=\"actionButtonDocument\"/>\n      <xsd:enumeration value=\"actionButtonSound\"/>\n      <xsd:enumeration value=\"actionButtonMovie\"/>\n      <xsd:enumeration value=\"gear6\"/>\n      <xsd:enumeration value=\"gear9\"/>\n      <xsd:enumeration value=\"funnel\"/>\n      <xsd:enumeration value=\"mathPlus\"/>\n      <xsd:enumeration value=\"mathMinus\"/>\n      <xsd:enumeration value=\"mathMultiply\"/>\n      <xsd:enumeration value=\"mathDivide\"/>\n      <xsd:enumeration value=\"mathEqual\"/>\n      <xsd:enumeration value=\"mathNotEqual\"/>\n      <xsd:enumeration value=\"cornerTabs\"/>\n      <xsd:enumeration value=\"squareTabs\"/>\n      <xsd:enumeration value=\"plaqueTabs\"/>\n      <xsd:enumeration value=\"chartX\"/>\n      <xsd:enumeration value=\"chartStar\"/>\n      <xsd:enumeration value=\"chartPlus\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextShapeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"textNoShape\"/>\n      <xsd:enumeration value=\"textPlain\"/>\n      <xsd:enumeration value=\"textStop\"/>\n      <xsd:enumeration value=\"textTriangle\"/>\n      <xsd:enumeration value=\"textTriangleInverted\"/>\n      <xsd:enumeration value=\"textChevron\"/>\n      <xsd:enumeration value=\"textChevronInverted\"/>\n      <xsd:enumeration value=\"textRingInside\"/>\n      <xsd:enumeration value=\"textRingOutside\"/>\n      <xsd:enumeration value=\"textArchUp\"/>\n      <xsd:enumeration value=\"textArchDown\"/>\n      <xsd:enumeration value=\"textCircle\"/>\n      <xsd:enumeration value=\"textButton\"/>\n      <xsd:enumeration value=\"textArchUpPour\"/>\n      <xsd:enumeration value=\"textArchDownPour\"/>\n      <xsd:enumeration value=\"textCirclePour\"/>\n      <xsd:enumeration value=\"textButtonPour\"/>\n      <xsd:enumeration value=\"textCurveUp\"/>\n      <xsd:enumeration value=\"textCurveDown\"/>\n      <xsd:enumeration value=\"textCanUp\"/>\n      <xsd:enumeration value=\"textCanDown\"/>\n      <xsd:enumeration value=\"textWave1\"/>\n      <xsd:enumeration value=\"textWave2\"/>\n      <xsd:enumeration value=\"textDoubleWave1\"/>\n      <xsd:enumeration value=\"textWave4\"/>\n      <xsd:enumeration value=\"textInflate\"/>\n      <xsd:enumeration value=\"textDeflate\"/>\n      <xsd:enumeration value=\"textInflateBottom\"/>\n      <xsd:enumeration value=\"textDeflateBottom\"/>\n      <xsd:enumeration value=\"textInflateTop\"/>\n      <xsd:enumeration value=\"textDeflateTop\"/>\n      <xsd:enumeration value=\"textDeflateInflate\"/>\n      <xsd:enumeration value=\"textDeflateInflateDeflate\"/>\n      <xsd:enumeration value=\"textFadeRight\"/>\n      <xsd:enumeration value=\"textFadeLeft\"/>\n      <xsd:enumeration value=\"textFadeUp\"/>\n      <xsd:enumeration value=\"textFadeDown\"/>\n      <xsd:enumeration value=\"textSlantUp\"/>\n      <xsd:enumeration value=\"textSlantDown\"/>\n      <xsd:enumeration value=\"textCascadeUp\"/>\n      <xsd:enumeration value=\"textCascadeDown\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_GeomGuideName\">\n    <xsd:restriction base=\"xsd:token\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_GeomGuideFormula\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_GeomGuide\">\n    <xsd:attribute name=\"name\" type=\"ST_GeomGuideName\" use=\"required\"/>\n    <xsd:attribute name=\"fmla\" type=\"ST_GeomGuideFormula\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GeomGuideList\">\n    <xsd:sequence>\n      <xsd:element name=\"gd\" type=\"CT_GeomGuide\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AdjCoordinate\">\n    <xsd:union memberTypes=\"ST_Coordinate ST_GeomGuideName\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AdjAngle\">\n    <xsd:union memberTypes=\"ST_Angle ST_GeomGuideName\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_AdjPoint2D\">\n    <xsd:attribute name=\"x\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"y\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GeomRect\">\n    <xsd:attribute name=\"l\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"t\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"r\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"b\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_XYAdjustHandle\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_AdjPoint2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"gdRefX\" type=\"ST_GeomGuideName\" use=\"optional\"/>\n    <xsd:attribute name=\"minX\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"maxX\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"gdRefY\" type=\"ST_GeomGuideName\" use=\"optional\"/>\n    <xsd:attribute name=\"minY\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"maxY\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PolarAdjustHandle\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_AdjPoint2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"gdRefR\" type=\"ST_GeomGuideName\" use=\"optional\"/>\n    <xsd:attribute name=\"minR\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"maxR\" type=\"ST_AdjCoordinate\" use=\"optional\"/>\n    <xsd:attribute name=\"gdRefAng\" type=\"ST_GeomGuideName\" use=\"optional\"/>\n    <xsd:attribute name=\"minAng\" type=\"ST_AdjAngle\" use=\"optional\"/>\n    <xsd:attribute name=\"maxAng\" type=\"ST_AdjAngle\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConnectionSite\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_AdjPoint2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ang\" type=\"ST_AdjAngle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AdjustHandleList\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"ahXY\" type=\"CT_XYAdjustHandle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ahPolar\" type=\"CT_PolarAdjustHandle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConnectionSiteList\">\n    <xsd:sequence>\n      <xsd:element name=\"cxn\" type=\"CT_ConnectionSite\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Connection\">\n    <xsd:attribute name=\"id\" type=\"ST_DrawingElementId\" use=\"required\"/>\n    <xsd:attribute name=\"idx\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DMoveTo\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_AdjPoint2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DLineTo\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_AdjPoint2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DArcTo\">\n    <xsd:attribute name=\"wR\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"hR\" type=\"ST_AdjCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"stAng\" type=\"ST_AdjAngle\" use=\"required\"/>\n    <xsd:attribute name=\"swAng\" type=\"ST_AdjAngle\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DQuadBezierTo\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_AdjPoint2D\" minOccurs=\"2\" maxOccurs=\"2\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DCubicBezierTo\">\n    <xsd:sequence>\n      <xsd:element name=\"pt\" type=\"CT_AdjPoint2D\" minOccurs=\"3\" maxOccurs=\"3\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DClose\"/>\n  <xsd:simpleType name=\"ST_PathFillMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"norm\"/>\n      <xsd:enumeration value=\"lighten\"/>\n      <xsd:enumeration value=\"lightenLess\"/>\n      <xsd:enumeration value=\"darken\"/>\n      <xsd:enumeration value=\"darkenLess\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Path2D\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"close\" type=\"CT_Path2DClose\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"moveTo\" type=\"CT_Path2DMoveTo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnTo\" type=\"CT_Path2DLineTo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"arcTo\" type=\"CT_Path2DArcTo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"quadBezTo\" type=\"CT_Path2DQuadBezierTo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cubicBezTo\" type=\"CT_Path2DCubicBezierTo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"w\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"h\" type=\"ST_PositiveCoordinate\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"fill\" type=\"ST_PathFillMode\" use=\"optional\" default=\"norm\"/>\n    <xsd:attribute name=\"stroke\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"extrusionOk\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path2DList\">\n    <xsd:sequence>\n      <xsd:element name=\"path\" type=\"CT_Path2D\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PresetGeometry2D\">\n    <xsd:sequence>\n      <xsd:element name=\"avLst\" type=\"CT_GeomGuideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prst\" type=\"ST_ShapeType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PresetTextShape\">\n    <xsd:sequence>\n      <xsd:element name=\"avLst\" type=\"CT_GeomGuideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prst\" type=\"ST_TextShapeType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomGeometry2D\">\n    <xsd:sequence>\n      <xsd:element name=\"avLst\" type=\"CT_GeomGuideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gdLst\" type=\"CT_GeomGuideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ahLst\" type=\"CT_AdjustHandleList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cxnLst\" type=\"CT_ConnectionSiteList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rect\" type=\"CT_GeomRect\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pathLst\" type=\"CT_Path2DList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Geometry\">\n    <xsd:choice>\n      <xsd:element name=\"custGeom\" type=\"CT_CustomGeometry2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prstGeom\" type=\"CT_PresetGeometry2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_TextGeometry\">\n    <xsd:choice>\n      <xsd:element name=\"custGeom\" type=\"CT_CustomGeometry2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prstTxWarp\" type=\"CT_PresetTextShape\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_LineEndType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"triangle\"/>\n      <xsd:enumeration value=\"stealth\"/>\n      <xsd:enumeration value=\"diamond\"/>\n      <xsd:enumeration value=\"oval\"/>\n      <xsd:enumeration value=\"arrow\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LineEndWidth\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sm\"/>\n      <xsd:enumeration value=\"med\"/>\n      <xsd:enumeration value=\"lg\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LineEndLength\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sm\"/>\n      <xsd:enumeration value=\"med\"/>\n      <xsd:enumeration value=\"lg\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LineEndProperties\">\n    <xsd:attribute name=\"type\" type=\"ST_LineEndType\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"w\" type=\"ST_LineEndWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"len\" type=\"ST_LineEndLength\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_LineFillProperties\">\n    <xsd:choice>\n      <xsd:element name=\"noFill\" type=\"CT_NoFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"solidFill\" type=\"CT_SolidColorFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gradFill\" type=\"CT_GradientFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pattFill\" type=\"CT_PatternFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_LineJoinBevel\"/>\n  <xsd:complexType name=\"CT_LineJoinRound\"/>\n  <xsd:complexType name=\"CT_LineJoinMiterProperties\">\n    <xsd:attribute name=\"lim\" type=\"ST_PositivePercentage\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_LineJoinProperties\">\n    <xsd:choice>\n      <xsd:element name=\"round\" type=\"CT_LineJoinRound\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bevel\" type=\"CT_LineJoinBevel\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"miter\" type=\"CT_LineJoinMiterProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_PresetLineDashVal\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"lgDash\"/>\n      <xsd:enumeration value=\"dashDot\"/>\n      <xsd:enumeration value=\"lgDashDot\"/>\n      <xsd:enumeration value=\"lgDashDotDot\"/>\n      <xsd:enumeration value=\"sysDash\"/>\n      <xsd:enumeration value=\"sysDot\"/>\n      <xsd:enumeration value=\"sysDashDot\"/>\n      <xsd:enumeration value=\"sysDashDotDot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PresetLineDashProperties\">\n    <xsd:attribute name=\"val\" type=\"ST_PresetLineDashVal\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DashStop\">\n    <xsd:attribute name=\"d\" type=\"ST_PositivePercentage\" use=\"required\"/>\n    <xsd:attribute name=\"sp\" type=\"ST_PositivePercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DashStopList\">\n    <xsd:sequence>\n      <xsd:element name=\"ds\" type=\"CT_DashStop\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_LineDashProperties\">\n    <xsd:choice>\n      <xsd:element name=\"prstDash\" type=\"CT_PresetLineDashProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custDash\" type=\"CT_DashStopList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_LineCap\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"rnd\"/>\n      <xsd:enumeration value=\"sq\"/>\n      <xsd:enumeration value=\"flat\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LineWidth\">\n    <xsd:restriction base=\"ST_Coordinate32Unqualified\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"20116800\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PenAlignment\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"in\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CompoundLine\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sng\"/>\n      <xsd:enumeration value=\"dbl\"/>\n      <xsd:enumeration value=\"thickThin\"/>\n      <xsd:enumeration value=\"thinThick\"/>\n      <xsd:enumeration value=\"tri\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LineProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_LineFillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_LineDashProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_LineJoinProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headEnd\" type=\"CT_LineEndProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tailEnd\" type=\"CT_LineEndProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"w\" type=\"ST_LineWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"cap\" type=\"ST_LineCap\" use=\"optional\"/>\n    <xsd:attribute name=\"cmpd\" type=\"ST_CompoundLine\" use=\"optional\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_PenAlignment\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ShapeID\">\n    <xsd:restriction base=\"xsd:token\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ShapeProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"xfrm\" type=\"CT_Transform2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_Geometry\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ln\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scene3d\" type=\"CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sp3d\" type=\"CT_Shape3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bwMode\" type=\"ST_BlackWhiteMode\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShapeProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"xfrm\" type=\"CT_GroupTransform2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scene3d\" type=\"CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bwMode\" type=\"ST_BlackWhiteMode\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StyleMatrixReference\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"idx\" type=\"ST_StyleMatrixColumnIndex\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontReference\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"idx\" type=\"ST_FontCollectionIndex\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"lnRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fillRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effectRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fontRef\" type=\"CT_FontReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DefaultShapeDefinition\">\n    <xsd:sequence>\n      <xsd:element name=\"spPr\" type=\"CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bodyPr\" type=\"CT_TextBodyProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lstStyle\" type=\"CT_TextListStyle\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ObjectStyleDefaults\">\n    <xsd:sequence>\n      <xsd:element name=\"spDef\" type=\"CT_DefaultShapeDefinition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnDef\" type=\"CT_DefaultShapeDefinition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txDef\" type=\"CT_DefaultShapeDefinition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EmptyElement\"/>\n  <xsd:complexType name=\"CT_ColorMapping\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bg1\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"tx1\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"bg2\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"tx2\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent1\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent2\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent3\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent4\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent5\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"accent6\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"hlink\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n    <xsd:attribute name=\"folHlink\" type=\"ST_ColorSchemeIndex\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorMappingOverride\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"masterClrMapping\" type=\"CT_EmptyElement\"/>\n        <xsd:element name=\"overrideClrMapping\" type=\"CT_ColorMapping\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorSchemeAndMapping\">\n    <xsd:sequence>\n      <xsd:element name=\"clrScheme\" type=\"CT_ColorScheme\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrMap\" type=\"CT_ColorMapping\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorSchemeList\">\n    <xsd:sequence>\n      <xsd:element name=\"extraClrScheme\" type=\"CT_ColorSchemeAndMapping\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OfficeStyleSheet\">\n    <xsd:sequence>\n      <xsd:element name=\"themeElements\" type=\"CT_BaseStyles\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"objectDefaults\" type=\"CT_ObjectStyleDefaults\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extraClrSchemeLst\" type=\"CT_ColorSchemeList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custClrLst\" type=\"CT_CustomColorList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BaseStylesOverride\">\n    <xsd:sequence>\n      <xsd:element name=\"clrScheme\" type=\"CT_ColorScheme\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fontScheme\" type=\"CT_FontScheme\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fmtScheme\" type=\"CT_StyleMatrix\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ClipboardStyleSheet\">\n    <xsd:sequence>\n      <xsd:element name=\"themeElements\" type=\"CT_BaseStyles\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrMap\" type=\"CT_ColorMapping\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"theme\" type=\"CT_OfficeStyleSheet\"/>\n  <xsd:element name=\"themeOverride\" type=\"CT_BaseStylesOverride\"/>\n  <xsd:element name=\"themeManager\" type=\"CT_EmptyElement\"/>\n  <xsd:complexType name=\"CT_TableCellProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"lnL\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnR\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnT\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnB\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnTlToBr\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnBlToTr\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cell3D\" type=\"CT_Cell3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headers\" type=\"CT_Headers\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"marL\" type=\"ST_Coordinate32\" use=\"optional\" default=\"91440\"/>\n    <xsd:attribute name=\"marR\" type=\"ST_Coordinate32\" use=\"optional\" default=\"91440\"/>\n    <xsd:attribute name=\"marT\" type=\"ST_Coordinate32\" use=\"optional\" default=\"45720\"/>\n    <xsd:attribute name=\"marB\" type=\"ST_Coordinate32\" use=\"optional\" default=\"45720\"/>\n    <xsd:attribute name=\"vert\" type=\"ST_TextVerticalType\" use=\"optional\" default=\"horz\"/>\n    <xsd:attribute name=\"anchor\" type=\"ST_TextAnchoringType\" use=\"optional\" default=\"t\"/>\n    <xsd:attribute name=\"anchorCtr\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"horzOverflow\" type=\"ST_TextHorzOverflowType\" use=\"optional\" default=\"clip\"\n    />\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Headers\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"header\" type=\"xsd:string\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableCol\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"w\" type=\"ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableGrid\">\n    <xsd:sequence>\n      <xsd:element name=\"gridCol\" type=\"CT_TableCol\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableCell\">\n    <xsd:sequence>\n      <xsd:element name=\"txBody\" type=\"CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcPr\" type=\"CT_TableCellProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rowSpan\" type=\"xsd:int\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"gridSpan\" type=\"xsd:int\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"hMerge\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"vMerge\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"id\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableRow\">\n    <xsd:sequence>\n      <xsd:element name=\"tc\" type=\"CT_TableCell\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"h\" type=\"ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"tableStyle\" type=\"CT_TableStyle\"/>\n        <xsd:element name=\"tableStyleId\" type=\"s:ST_Guid\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rtl\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"firstRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"firstCol\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"lastRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"lastCol\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"bandRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"bandCol\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Table\">\n    <xsd:sequence>\n      <xsd:element name=\"tblPr\" type=\"CT_TableProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblGrid\" type=\"CT_TableGrid\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tr\" type=\"CT_TableRow\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"tbl\" type=\"CT_Table\"/>\n  <xsd:complexType name=\"CT_Cell3D\">\n    <xsd:sequence>\n      <xsd:element name=\"bevel\" type=\"CT_Bevel\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lightRig\" type=\"CT_LightRig\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prstMaterial\" type=\"ST_PresetMaterialType\" use=\"optional\" default=\"plastic\"\n    />\n  </xsd:complexType>\n  <xsd:group name=\"EG_ThemeableFillStyle\">\n    <xsd:choice>\n      <xsd:element name=\"fill\" type=\"CT_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fillRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_ThemeableLineStyle\">\n    <xsd:choice>\n      <xsd:element name=\"ln\" type=\"CT_LineProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lnRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ThemeableEffectStyle\">\n    <xsd:choice>\n      <xsd:element name=\"effect\" type=\"CT_EffectProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"effectRef\" type=\"CT_StyleMatrixReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_ThemeableFontStyles\">\n    <xsd:choice>\n      <xsd:element name=\"font\" type=\"CT_FontCollection\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fontRef\" type=\"CT_FontReference\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_OnOffStyleType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"on\"/>\n      <xsd:enumeration value=\"off\"/>\n      <xsd:enumeration value=\"def\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TableStyleTextStyle\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ThemeableFontStyles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"b\" type=\"ST_OnOffStyleType\" use=\"optional\" default=\"def\"/>\n    <xsd:attribute name=\"i\" type=\"ST_OnOffStyleType\" use=\"optional\" default=\"def\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableCellBorderStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"left\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"right\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"top\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bottom\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"insideH\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"insideV\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tl2br\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tr2bl\" type=\"CT_ThemeableLineStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableBackgroundStyle\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ThemeableFillStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ThemeableEffectStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyleCellStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"tcBdr\" type=\"CT_TableCellBorderStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ThemeableFillStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cell3D\" type=\"CT_Cell3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TablePartStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"tcTxStyle\" type=\"CT_TableStyleTextStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcStyle\" type=\"CT_TableStyleCellStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"tblBg\" type=\"CT_TableBackgroundStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"wholeTbl\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"band1H\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"band2H\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"band1V\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"band2V\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lastCol\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstCol\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lastRow\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"seCell\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"swCell\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstRow\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"neCell\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"nwCell\" type=\"CT_TablePartStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"styleId\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"styleName\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyleList\">\n    <xsd:sequence>\n      <xsd:element name=\"tblStyle\" type=\"CT_TableStyle\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"def\" type=\"s:ST_Guid\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"tblStyleLst\" type=\"CT_TableStyleList\"/>\n  <xsd:complexType name=\"CT_TextParagraph\">\n    <xsd:sequence>\n      <xsd:element name=\"pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextRun\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"endParaRPr\" type=\"CT_TextCharacterProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextAnchoringType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"just\"/>\n      <xsd:enumeration value=\"dist\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextVertOverflowType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"overflow\"/>\n      <xsd:enumeration value=\"ellipsis\"/>\n      <xsd:enumeration value=\"clip\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextHorzOverflowType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"overflow\"/>\n      <xsd:enumeration value=\"clip\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextVerticalType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"horz\"/>\n      <xsd:enumeration value=\"vert\"/>\n      <xsd:enumeration value=\"vert270\"/>\n      <xsd:enumeration value=\"wordArtVert\"/>\n      <xsd:enumeration value=\"eaVert\"/>\n      <xsd:enumeration value=\"mongolianVert\"/>\n      <xsd:enumeration value=\"wordArtVertRtl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextWrappingType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"square\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextColumnCount\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxInclusive value=\"16\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextListStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"defPPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl1pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl2pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl3pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl4pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl5pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl6pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl7pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl8pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lvl9pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextFontScalePercentOrPercentString\">\n    <xsd:union memberTypes=\"ST_TextFontScalePercent s:ST_Percentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextFontScalePercent\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"1000\"/>\n      <xsd:maxInclusive value=\"100000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextNormalAutofit\">\n    <xsd:attribute name=\"fontScale\" type=\"ST_TextFontScalePercentOrPercentString\" use=\"optional\"\n      default=\"100%\"/>\n    <xsd:attribute name=\"lnSpcReduction\" type=\"ST_TextSpacingPercentOrPercentString\" use=\"optional\"\n      default=\"0%\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextShapeAutofit\"/>\n  <xsd:complexType name=\"CT_TextNoAutofit\"/>\n  <xsd:group name=\"EG_TextAutofit\">\n    <xsd:choice>\n      <xsd:element name=\"noAutofit\" type=\"CT_TextNoAutofit\"/>\n      <xsd:element name=\"normAutofit\" type=\"CT_TextNormalAutofit\"/>\n      <xsd:element name=\"spAutoFit\" type=\"CT_TextShapeAutofit\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_TextBodyProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"prstTxWarp\" type=\"CT_PresetTextShape\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextAutofit\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scene3d\" type=\"CT_Scene3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_Text3D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rot\" type=\"ST_Angle\" use=\"optional\"/>\n    <xsd:attribute name=\"spcFirstLastPara\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"vertOverflow\" type=\"ST_TextVertOverflowType\" use=\"optional\"/>\n    <xsd:attribute name=\"horzOverflow\" type=\"ST_TextHorzOverflowType\" use=\"optional\"/>\n    <xsd:attribute name=\"vert\" type=\"ST_TextVerticalType\" use=\"optional\"/>\n    <xsd:attribute name=\"wrap\" type=\"ST_TextWrappingType\" use=\"optional\"/>\n    <xsd:attribute name=\"lIns\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"tIns\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"rIns\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"bIns\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"numCol\" type=\"ST_TextColumnCount\" use=\"optional\"/>\n    <xsd:attribute name=\"spcCol\" type=\"ST_PositiveCoordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"rtlCol\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"fromWordArt\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"anchor\" type=\"ST_TextAnchoringType\" use=\"optional\"/>\n    <xsd:attribute name=\"anchorCtr\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"forceAA\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"upright\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"compatLnSpc\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextBody\">\n    <xsd:sequence>\n      <xsd:element name=\"bodyPr\" type=\"CT_TextBodyProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lstStyle\" type=\"CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"p\" type=\"CT_TextParagraph\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextBulletStartAtNum\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxInclusive value=\"32767\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextAutonumberScheme\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"alphaLcParenBoth\"/>\n      <xsd:enumeration value=\"alphaUcParenBoth\"/>\n      <xsd:enumeration value=\"alphaLcParenR\"/>\n      <xsd:enumeration value=\"alphaUcParenR\"/>\n      <xsd:enumeration value=\"alphaLcPeriod\"/>\n      <xsd:enumeration value=\"alphaUcPeriod\"/>\n      <xsd:enumeration value=\"arabicParenBoth\"/>\n      <xsd:enumeration value=\"arabicParenR\"/>\n      <xsd:enumeration value=\"arabicPeriod\"/>\n      <xsd:enumeration value=\"arabicPlain\"/>\n      <xsd:enumeration value=\"romanLcParenBoth\"/>\n      <xsd:enumeration value=\"romanUcParenBoth\"/>\n      <xsd:enumeration value=\"romanLcParenR\"/>\n      <xsd:enumeration value=\"romanUcParenR\"/>\n      <xsd:enumeration value=\"romanLcPeriod\"/>\n      <xsd:enumeration value=\"romanUcPeriod\"/>\n      <xsd:enumeration value=\"circleNumDbPlain\"/>\n      <xsd:enumeration value=\"circleNumWdBlackPlain\"/>\n      <xsd:enumeration value=\"circleNumWdWhitePlain\"/>\n      <xsd:enumeration value=\"arabicDbPeriod\"/>\n      <xsd:enumeration value=\"arabicDbPlain\"/>\n      <xsd:enumeration value=\"ea1ChsPeriod\"/>\n      <xsd:enumeration value=\"ea1ChsPlain\"/>\n      <xsd:enumeration value=\"ea1ChtPeriod\"/>\n      <xsd:enumeration value=\"ea1ChtPlain\"/>\n      <xsd:enumeration value=\"ea1JpnChsDbPeriod\"/>\n      <xsd:enumeration value=\"ea1JpnKorPlain\"/>\n      <xsd:enumeration value=\"ea1JpnKorPeriod\"/>\n      <xsd:enumeration value=\"arabic1Minus\"/>\n      <xsd:enumeration value=\"arabic2Minus\"/>\n      <xsd:enumeration value=\"hebrew2Minus\"/>\n      <xsd:enumeration value=\"thaiAlphaPeriod\"/>\n      <xsd:enumeration value=\"thaiAlphaParenR\"/>\n      <xsd:enumeration value=\"thaiAlphaParenBoth\"/>\n      <xsd:enumeration value=\"thaiNumPeriod\"/>\n      <xsd:enumeration value=\"thaiNumParenR\"/>\n      <xsd:enumeration value=\"thaiNumParenBoth\"/>\n      <xsd:enumeration value=\"hindiAlphaPeriod\"/>\n      <xsd:enumeration value=\"hindiNumPeriod\"/>\n      <xsd:enumeration value=\"hindiNumParenR\"/>\n      <xsd:enumeration value=\"hindiAlpha1Period\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextBulletColorFollowText\"/>\n  <xsd:group name=\"EG_TextBulletColor\">\n    <xsd:choice>\n      <xsd:element name=\"buClrTx\" type=\"CT_TextBulletColorFollowText\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"buClr\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_TextBulletSize\">\n    <xsd:union memberTypes=\"ST_TextBulletSizePercent ST_TextBulletSizeDecimal\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextBulletSizePercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*((2[5-9])|([3-9][0-9])|([1-3][0-9][0-9])|400)%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextBulletSizeDecimal\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"25000\"/>\n      <xsd:maxInclusive value=\"400000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextBulletSizeFollowText\"/>\n  <xsd:complexType name=\"CT_TextBulletSizePercent\">\n    <xsd:attribute name=\"val\" type=\"ST_TextBulletSizePercent\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextBulletSizePoint\">\n    <xsd:attribute name=\"val\" type=\"ST_TextFontSize\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_TextBulletSize\">\n    <xsd:choice>\n      <xsd:element name=\"buSzTx\" type=\"CT_TextBulletSizeFollowText\"/>\n      <xsd:element name=\"buSzPct\" type=\"CT_TextBulletSizePercent\"/>\n      <xsd:element name=\"buSzPts\" type=\"CT_TextBulletSizePoint\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_TextBulletTypefaceFollowText\"/>\n  <xsd:group name=\"EG_TextBulletTypeface\">\n    <xsd:choice>\n      <xsd:element name=\"buFontTx\" type=\"CT_TextBulletTypefaceFollowText\"/>\n      <xsd:element name=\"buFont\" type=\"CT_TextFont\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_TextAutonumberBullet\">\n    <xsd:attribute name=\"type\" type=\"ST_TextAutonumberScheme\" use=\"required\"/>\n    <xsd:attribute name=\"startAt\" type=\"ST_TextBulletStartAtNum\" use=\"optional\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextCharBullet\">\n    <xsd:attribute name=\"char\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextBlipBullet\">\n    <xsd:sequence>\n      <xsd:element name=\"blip\" type=\"CT_Blip\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextNoBullet\"/>\n  <xsd:group name=\"EG_TextBullet\">\n    <xsd:choice>\n      <xsd:element name=\"buNone\" type=\"CT_TextNoBullet\"/>\n      <xsd:element name=\"buAutoNum\" type=\"CT_TextAutonumberBullet\"/>\n      <xsd:element name=\"buChar\" type=\"CT_TextCharBullet\"/>\n      <xsd:element name=\"buBlip\" type=\"CT_TextBlipBullet\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_TextPoint\">\n    <xsd:union memberTypes=\"ST_TextPointUnqualified s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextPointUnqualified\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"-400000\"/>\n      <xsd:maxInclusive value=\"400000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextNonNegativePoint\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"400000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextFontSize\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"100\"/>\n      <xsd:maxInclusive value=\"400000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextTypeface\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PitchFamily\">\n   <xsd:restriction base=\"xsd:byte\">\n     <xsd:enumeration value=\"00\"/>\n     <xsd:enumeration value=\"01\"/>\n     <xsd:enumeration value=\"02\"/>\n     <xsd:enumeration value=\"16\"/>\n     <xsd:enumeration value=\"17\"/>\n     <xsd:enumeration value=\"18\"/>\n     <xsd:enumeration value=\"32\"/>\n     <xsd:enumeration value=\"33\"/>\n     <xsd:enumeration value=\"34\"/>\n     <xsd:enumeration value=\"48\"/>\n     <xsd:enumeration value=\"49\"/>\n     <xsd:enumeration value=\"50\"/>\n     <xsd:enumeration value=\"64\"/>\n     <xsd:enumeration value=\"65\"/>\n     <xsd:enumeration value=\"66\"/>\n     <xsd:enumeration value=\"80\"/>\n     <xsd:enumeration value=\"81\"/>\n     <xsd:enumeration value=\"82\"/>\n   </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_TextFont\">\n    <xsd:attribute name=\"typeface\" type=\"ST_TextTypeface\" use=\"required\"/>\n    <xsd:attribute name=\"panose\" type=\"s:ST_Panose\" use=\"optional\"/>\n    <xsd:attribute name=\"pitchFamily\" type=\"ST_PitchFamily\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"charset\" type=\"xsd:byte\" use=\"optional\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextUnderlineType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"words\"/>\n      <xsd:enumeration value=\"sng\"/>\n      <xsd:enumeration value=\"dbl\"/>\n      <xsd:enumeration value=\"heavy\"/>\n      <xsd:enumeration value=\"dotted\"/>\n      <xsd:enumeration value=\"dottedHeavy\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"dashHeavy\"/>\n      <xsd:enumeration value=\"dashLong\"/>\n      <xsd:enumeration value=\"dashLongHeavy\"/>\n      <xsd:enumeration value=\"dotDash\"/>\n      <xsd:enumeration value=\"dotDashHeavy\"/>\n      <xsd:enumeration value=\"dotDotDash\"/>\n      <xsd:enumeration value=\"dotDotDashHeavy\"/>\n      <xsd:enumeration value=\"wavy\"/>\n      <xsd:enumeration value=\"wavyHeavy\"/>\n      <xsd:enumeration value=\"wavyDbl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextUnderlineLineFollowText\"/>\n  <xsd:complexType name=\"CT_TextUnderlineFillFollowText\"/>\n  <xsd:complexType name=\"CT_TextUnderlineFillGroupWrapper\">\n    <xsd:group ref=\"EG_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_TextUnderlineLine\">\n    <xsd:choice>\n      <xsd:element name=\"uLnTx\" type=\"CT_TextUnderlineLineFollowText\"/>\n      <xsd:element name=\"uLn\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_TextUnderlineFill\">\n    <xsd:choice>\n      <xsd:element name=\"uFillTx\" type=\"CT_TextUnderlineFillFollowText\"/>\n      <xsd:element name=\"uFill\" type=\"CT_TextUnderlineFillGroupWrapper\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_TextStrikeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"noStrike\"/>\n      <xsd:enumeration value=\"sngStrike\"/>\n      <xsd:enumeration value=\"dblStrike\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextCapsType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"small\"/>\n      <xsd:enumeration value=\"all\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextCharacterProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"ln\" type=\"CT_LineProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"highlight\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextUnderlineLine\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextUnderlineFill\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"latin\" type=\"CT_TextFont\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ea\" type=\"CT_TextFont\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cs\" type=\"CT_TextFont\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sym\" type=\"CT_TextFont\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hlinkClick\" type=\"CT_Hyperlink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hlinkMouseOver\" type=\"CT_Hyperlink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rtl\" type=\"CT_Boolean\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"kumimoji\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"lang\" type=\"s:ST_Lang\" use=\"optional\"/>\n    <xsd:attribute name=\"altLang\" type=\"s:ST_Lang\" use=\"optional\"/>\n    <xsd:attribute name=\"sz\" type=\"ST_TextFontSize\" use=\"optional\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"u\" type=\"ST_TextUnderlineType\" use=\"optional\"/>\n    <xsd:attribute name=\"strike\" type=\"ST_TextStrikeType\" use=\"optional\"/>\n    <xsd:attribute name=\"kern\" type=\"ST_TextNonNegativePoint\" use=\"optional\"/>\n    <xsd:attribute name=\"cap\" type=\"ST_TextCapsType\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"spc\" type=\"ST_TextPoint\" use=\"optional\"/>\n    <xsd:attribute name=\"normalizeH\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"baseline\" type=\"ST_Percentage\" use=\"optional\"/>\n    <xsd:attribute name=\"noProof\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"dirty\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"err\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"smtClean\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"smtId\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"bmk\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Boolean\">\n    <xsd:attribute name=\"val\" type=\"s:ST_OnOff\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextSpacingPoint\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"158400\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextSpacingPercentOrPercentString\">\n    <xsd:union memberTypes=\"ST_TextSpacingPercent s:ST_Percentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextSpacingPercent\">\n    <xsd:restriction base=\"ST_PercentageDecimal\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"13200000\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextSpacingPercent\">\n    <xsd:attribute name=\"val\" type=\"ST_TextSpacingPercentOrPercentString\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextSpacingPoint\">\n    <xsd:attribute name=\"val\" type=\"ST_TextSpacingPoint\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextMargin\">\n    <xsd:restriction base=\"ST_Coordinate32Unqualified\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"51206400\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextIndent\">\n    <xsd:restriction base=\"ST_Coordinate32Unqualified\">\n      <xsd:minInclusive value=\"-51206400\"/>\n      <xsd:maxInclusive value=\"51206400\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextTabAlignType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"dec\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextTabStop\">\n    <xsd:attribute name=\"pos\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_TextTabAlignType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextTabStopList\">\n    <xsd:sequence>\n      <xsd:element name=\"tab\" type=\"CT_TextTabStop\" minOccurs=\"0\" maxOccurs=\"32\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextLineBreak\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_TextCharacterProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextSpacing\">\n    <xsd:choice>\n      <xsd:element name=\"spcPct\" type=\"CT_TextSpacingPercent\"/>\n      <xsd:element name=\"spcPts\" type=\"CT_TextSpacingPoint\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextAlignType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"just\"/>\n      <xsd:enumeration value=\"justLow\"/>\n      <xsd:enumeration value=\"dist\"/>\n      <xsd:enumeration value=\"thaiDist\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextFontAlignType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"ctr\"/>\n      <xsd:enumeration value=\"base\"/>\n      <xsd:enumeration value=\"b\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextIndentLevelType\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"8\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextParagraphProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"lnSpc\" type=\"CT_TextSpacing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spcBef\" type=\"CT_TextSpacing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spcAft\" type=\"CT_TextSpacing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextBulletColor\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextBulletSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextBulletTypeface\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TextBullet\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tabLst\" type=\"CT_TextTabStopList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"defRPr\" type=\"CT_TextCharacterProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"marL\" type=\"ST_TextMargin\" use=\"optional\"/>\n    <xsd:attribute name=\"marR\" type=\"ST_TextMargin\" use=\"optional\"/>\n    <xsd:attribute name=\"lvl\" type=\"ST_TextIndentLevelType\" use=\"optional\"/>\n    <xsd:attribute name=\"indent\" type=\"ST_TextIndent\" use=\"optional\"/>\n    <xsd:attribute name=\"algn\" type=\"ST_TextAlignType\" use=\"optional\"/>\n    <xsd:attribute name=\"defTabSz\" type=\"ST_Coordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"rtl\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"eaLnBrk\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"fontAlgn\" type=\"ST_TextFontAlignType\" use=\"optional\"/>\n    <xsd:attribute name=\"latinLnBrk\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"hangingPunct\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextField\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_TextCharacterProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pPr\" type=\"CT_TextParagraphProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"t\" type=\"xsd:string\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_TextRun\">\n    <xsd:choice>\n      <xsd:element name=\"r\" type=\"CT_RegularTextRun\"/>\n      <xsd:element name=\"br\" type=\"CT_TextLineBreak\"/>\n      <xsd:element name=\"fld\" type=\"CT_TextField\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_RegularTextRun\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_TextCharacterProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"t\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/picture\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" elementFormDefault=\"qualified\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:complexType name=\"CT_PictureNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvPicPr\" type=\"a:CT_NonVisualPictureProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Picture\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"nvPicPr\" type=\"CT_PictureNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"a:CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:import schemaLocation=\"shared-relationshipReference.xsd\"\n    namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"/>\n  <xsd:element name=\"from\" type=\"CT_Marker\"/>\n  <xsd:element name=\"to\" type=\"CT_Marker\"/>\n  <xsd:complexType name=\"CT_AnchorClientData\">\n    <xsd:attribute name=\"fLocksWithSheet\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fPrintsWithSheet\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvSpPr\" type=\"a:CT_NonVisualDrawingShapeProps\" minOccurs=\"1\" maxOccurs=\"1\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvSpPr\" type=\"CT_ShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txBody\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"textlink\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fLocksText\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConnectorNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvCxnSpPr\" type=\"a:CT_NonVisualConnectorProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Connector\">\n    <xsd:sequence>\n      <xsd:element name=\"nvCxnSpPr\" type=\"CT_ConnectorNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PictureNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvPicPr\" type=\"a:CT_NonVisualPictureProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Picture\">\n    <xsd:sequence>\n      <xsd:element name=\"nvPicPr\" type=\"CT_PictureNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"a:CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectFrameNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"a:CT_NonVisualGraphicFrameProperties\"\n        minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectFrame\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGraphicFramePr\" type=\"CT_GraphicalObjectFrameNonVisual\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"a:CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"macro\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fPublished\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGrpSpPr\" type=\"a:CT_NonVisualGroupDrawingShapeProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGrpSpPr\" type=\"CT_GroupShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grpSpPr\" type=\"a:CT_GroupShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"sp\" type=\"CT_Shape\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GroupShape\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicalObjectFrame\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_Connector\"/>\n        <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ObjectChoices\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"sp\" type=\"CT_Shape\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GroupShape\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicalObjectFrame\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_Connector\"/>\n        <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n        <xsd:element name=\"contentPart\" type=\"CT_Rel\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Rel\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ColID\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RowID\">\n    <xsd:restriction base=\"xsd:int\">\n      <xsd:minInclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Marker\">\n    <xsd:sequence>\n      <xsd:element name=\"col\" type=\"ST_ColID\"/>\n      <xsd:element name=\"colOff\" type=\"a:ST_Coordinate\"/>\n      <xsd:element name=\"row\" type=\"ST_RowID\"/>\n      <xsd:element name=\"rowOff\" type=\"a:ST_Coordinate\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_EditAs\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"twoCell\"/>\n      <xsd:enumeration value=\"oneCell\"/>\n      <xsd:enumeration value=\"absolute\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TwoCellAnchor\">\n    <xsd:sequence>\n      <xsd:element name=\"from\" type=\"CT_Marker\"/>\n      <xsd:element name=\"to\" type=\"CT_Marker\"/>\n      <xsd:group ref=\"EG_ObjectChoices\"/>\n      <xsd:element name=\"clientData\" type=\"CT_AnchorClientData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"editAs\" type=\"ST_EditAs\" use=\"optional\" default=\"twoCell\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OneCellAnchor\">\n    <xsd:sequence>\n      <xsd:element name=\"from\" type=\"CT_Marker\"/>\n      <xsd:element name=\"ext\" type=\"a:CT_PositiveSize2D\"/>\n      <xsd:group ref=\"EG_ObjectChoices\"/>\n      <xsd:element name=\"clientData\" type=\"CT_AnchorClientData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AbsoluteAnchor\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"a:CT_Point2D\"/>\n      <xsd:element name=\"ext\" type=\"a:CT_PositiveSize2D\"/>\n      <xsd:group ref=\"EG_ObjectChoices\"/>\n      <xsd:element name=\"clientData\" type=\"CT_AnchorClientData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Anchor\">\n    <xsd:choice>\n      <xsd:element name=\"twoCellAnchor\" type=\"CT_TwoCellAnchor\"/>\n      <xsd:element name=\"oneCellAnchor\" type=\"CT_OneCellAnchor\"/>\n      <xsd:element name=\"absoluteAnchor\" type=\"CT_AbsoluteAnchor\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Drawing\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_Anchor\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"wsDr\" type=\"CT_Drawing\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n  xmlns:dpct=\"http://schemas.openxmlformats.org/drawingml/2006/picture\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\"\n  targetNamespace=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:import schemaLocation=\"wml.xsd\"\n    namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/picture\"\n    schemaLocation=\"dml-picture.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:complexType name=\"CT_EffectExtent\">\n    <xsd:attribute name=\"l\" type=\"a:ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"t\" type=\"a:ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"r\" type=\"a:ST_Coordinate\" use=\"required\"/>\n    <xsd:attribute name=\"b\" type=\"a:ST_Coordinate\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_WrapDistance\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Inline\">\n    <xsd:sequence>\n      <xsd:element name=\"extent\" type=\"a:CT_PositiveSize2D\"/>\n      <xsd:element name=\"effectExtent\" type=\"CT_EffectExtent\" minOccurs=\"0\"/>\n      <xsd:element name=\"docPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"a:CT_NonVisualGraphicFrameProperties\"\n        minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"distT\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distB\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distL\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distR\" type=\"ST_WrapDistance\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_WrapText\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"bothSides\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"largest\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WrapPath\">\n    <xsd:sequence>\n      <xsd:element name=\"start\" type=\"a:CT_Point2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"lineTo\" type=\"a:CT_Point2D\" minOccurs=\"2\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"edited\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WrapNone\"/>\n  <xsd:complexType name=\"CT_WrapSquare\">\n    <xsd:sequence>\n      <xsd:element name=\"effectExtent\" type=\"CT_EffectExtent\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"wrapText\" type=\"ST_WrapText\" use=\"required\"/>\n    <xsd:attribute name=\"distT\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distB\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distL\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distR\" type=\"ST_WrapDistance\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WrapTight\">\n    <xsd:sequence>\n      <xsd:element name=\"wrapPolygon\" type=\"CT_WrapPath\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"wrapText\" type=\"ST_WrapText\" use=\"required\"/>\n    <xsd:attribute name=\"distL\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distR\" type=\"ST_WrapDistance\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WrapThrough\">\n    <xsd:sequence>\n      <xsd:element name=\"wrapPolygon\" type=\"CT_WrapPath\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"wrapText\" type=\"ST_WrapText\" use=\"required\"/>\n    <xsd:attribute name=\"distL\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distR\" type=\"ST_WrapDistance\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WrapTopBottom\">\n    <xsd:sequence>\n      <xsd:element name=\"effectExtent\" type=\"CT_EffectExtent\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"distT\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distB\" type=\"ST_WrapDistance\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_WrapType\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"wrapNone\" type=\"CT_WrapNone\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"wrapSquare\" type=\"CT_WrapSquare\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"wrapTight\" type=\"CT_WrapTight\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"wrapThrough\" type=\"CT_WrapThrough\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"wrapTopAndBottom\" type=\"CT_WrapTopBottom\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:simpleType name=\"ST_PositionOffset\">\n    <xsd:restriction base=\"xsd:int\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AlignH\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"inside\"/>\n      <xsd:enumeration value=\"outside\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RelFromH\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"column\"/>\n      <xsd:enumeration value=\"character\"/>\n      <xsd:enumeration value=\"leftMargin\"/>\n      <xsd:enumeration value=\"rightMargin\"/>\n      <xsd:enumeration value=\"insideMargin\"/>\n      <xsd:enumeration value=\"outsideMargin\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PosH\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"align\" type=\"ST_AlignH\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"posOffset\" type=\"ST_PositionOffset\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n    <xsd:attribute name=\"relativeFrom\" type=\"ST_RelFromH\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AlignV\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"inside\"/>\n      <xsd:enumeration value=\"outside\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RelFromV\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"paragraph\"/>\n      <xsd:enumeration value=\"line\"/>\n      <xsd:enumeration value=\"topMargin\"/>\n      <xsd:enumeration value=\"bottomMargin\"/>\n      <xsd:enumeration value=\"insideMargin\"/>\n      <xsd:enumeration value=\"outsideMargin\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PosV\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"align\" type=\"ST_AlignV\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"posOffset\" type=\"ST_PositionOffset\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      </xsd:choice>\n    </xsd:sequence>\n    <xsd:attribute name=\"relativeFrom\" type=\"ST_RelFromV\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Anchor\">\n    <xsd:sequence>\n      <xsd:element name=\"simplePos\" type=\"a:CT_Point2D\"/>\n      <xsd:element name=\"positionH\" type=\"CT_PosH\"/>\n      <xsd:element name=\"positionV\" type=\"CT_PosV\"/>\n      <xsd:element name=\"extent\" type=\"a:CT_PositiveSize2D\"/>\n      <xsd:element name=\"effectExtent\" type=\"CT_EffectExtent\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_WrapType\"/>\n      <xsd:element name=\"docPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"a:CT_NonVisualGraphicFrameProperties\"\n        minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"distT\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distB\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distL\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"distR\" type=\"ST_WrapDistance\" use=\"optional\"/>\n    <xsd:attribute name=\"simplePos\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"relativeHeight\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"behindDoc\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"layoutInCell\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"allowOverlap\" type=\"xsd:boolean\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TxbxContent\">\n    <xsd:group ref=\"w:EG_BlockLevelElts\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextboxInfo\">\n    <xsd:sequence>\n      <xsd:element name=\"txbxContent\" type=\"CT_TxbxContent\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedShort\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LinkedTextboxInformation\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedShort\" use=\"required\"/>\n    <xsd:attribute name=\"seq\" type=\"xsd:unsignedShort\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WordprocessingShape\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"cNvSpPr\" type=\"a:CT_NonVisualDrawingShapeProps\" minOccurs=\"1\"\n          maxOccurs=\"1\"/>\n        <xsd:element name=\"cNvCnPr\" type=\"a:CT_NonVisualConnectorProperties\" minOccurs=\"1\"\n          maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"txbx\" type=\"CT_TextboxInfo\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"linkedTxbx\" type=\"CT_LinkedTextboxInformation\" minOccurs=\"1\"\n          maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"bodyPr\" type=\"a:CT_TextBodyProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"normalEastAsianFlow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicFrame\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvFrPr\" type=\"a:CT_NonVisualGraphicFrameProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"a:CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WordprocessingContentPartNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvContentPartPr\" type=\"a:CT_NonVisualContentPartProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WordprocessingContentPart\">\n    <xsd:sequence>\n      <xsd:element name=\"nvContentPartPr\" type=\"CT_WordprocessingContentPartNonVisual\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"a:CT_Transform2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bwMode\" type=\"a:ST_BlackWhiteMode\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WordprocessingGroup\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGrpSpPr\" type=\"a:CT_NonVisualGroupDrawingShapeProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"grpSpPr\" type=\"a:CT_GroupShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element ref=\"wsp\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_WordprocessingGroup\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicFrame\"/>\n        <xsd:element ref=\"dpct:pic\"/>\n        <xsd:element name=\"contentPart\" type=\"CT_WordprocessingContentPart\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WordprocessingCanvas\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"bg\" type=\"a:CT_BackgroundFormatting\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"whole\" type=\"a:CT_WholeE2oFormatting\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element ref=\"wsp\"/>\n        <xsd:element ref=\"dpct:pic\"/>\n        <xsd:element name=\"contentPart\" type=\"CT_WordprocessingContentPart\"/>\n        <xsd:element ref=\"wgp\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicFrame\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"a:CT_OfficeArtExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"wpc\" type=\"CT_WordprocessingCanvas\"/>\n  <xsd:element name=\"wgp\" type=\"CT_WordprocessingGroup\"/>\n  <xsd:element name=\"wsp\" type=\"CT_WordprocessingShape\"/>\n  <xsd:element name=\"inline\" type=\"CT_Inline\"/>\n  <xsd:element name=\"anchor\" type=\"CT_Anchor\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/presentationml/2006/main\"\n  xmlns:p=\"http://schemas.openxmlformats.org/presentationml/2006/main\"\n  xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  elementFormDefault=\"qualified\"\n  targetNamespace=\"http://schemas.openxmlformats.org/presentationml/2006/main\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\"\n    schemaLocation=\"dml-main.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:simpleType name=\"ST_TransitionSideDirectionType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"l\"/>\n      <xsd:enumeration value=\"u\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"d\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TransitionCornerDirectionType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"lu\"/>\n      <xsd:enumeration value=\"ru\"/>\n      <xsd:enumeration value=\"ld\"/>\n      <xsd:enumeration value=\"rd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TransitionInOutDirectionType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"out\"/>\n      <xsd:enumeration value=\"in\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SideDirectionTransition\">\n    <xsd:attribute name=\"dir\" type=\"ST_TransitionSideDirectionType\" use=\"optional\" default=\"l\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CornerDirectionTransition\">\n    <xsd:attribute name=\"dir\" type=\"ST_TransitionCornerDirectionType\" use=\"optional\" default=\"lu\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TransitionEightDirectionType\">\n    <xsd:union memberTypes=\"ST_TransitionSideDirectionType ST_TransitionCornerDirectionType\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_EightDirectionTransition\">\n    <xsd:attribute name=\"dir\" type=\"ST_TransitionEightDirectionType\" use=\"optional\" default=\"l\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OrientationTransition\">\n    <xsd:attribute name=\"dir\" type=\"ST_Direction\" use=\"optional\" default=\"horz\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_InOutTransition\">\n    <xsd:attribute name=\"dir\" type=\"ST_TransitionInOutDirectionType\" use=\"optional\" default=\"out\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OptionalBlackTransition\">\n    <xsd:attribute name=\"thruBlk\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SplitTransition\">\n    <xsd:attribute name=\"orient\" type=\"ST_Direction\" use=\"optional\" default=\"horz\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_TransitionInOutDirectionType\" use=\"optional\" default=\"out\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WheelTransition\">\n    <xsd:attribute name=\"spokes\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"4\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TransitionStartSoundAction\">\n    <xsd:sequence>\n      <xsd:element minOccurs=\"1\" maxOccurs=\"1\" name=\"snd\" type=\"a:CT_EmbeddedWAVAudioFile\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"loop\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TransitionSoundAction\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"stSnd\" type=\"CT_TransitionStartSoundAction\"/>\n      <xsd:element name=\"endSnd\" type=\"CT_Empty\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TransitionSpeed\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"slow\"/>\n      <xsd:enumeration value=\"med\"/>\n      <xsd:enumeration value=\"fast\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideTransition\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"blinds\" type=\"CT_OrientationTransition\"/>\n        <xsd:element name=\"checker\" type=\"CT_OrientationTransition\"/>\n        <xsd:element name=\"circle\" type=\"CT_Empty\"/>\n        <xsd:element name=\"dissolve\" type=\"CT_Empty\"/>\n        <xsd:element name=\"comb\" type=\"CT_OrientationTransition\"/>\n        <xsd:element name=\"cover\" type=\"CT_EightDirectionTransition\"/>\n        <xsd:element name=\"cut\" type=\"CT_OptionalBlackTransition\"/>\n        <xsd:element name=\"diamond\" type=\"CT_Empty\"/>\n        <xsd:element name=\"fade\" type=\"CT_OptionalBlackTransition\"/>\n        <xsd:element name=\"newsflash\" type=\"CT_Empty\"/>\n        <xsd:element name=\"plus\" type=\"CT_Empty\"/>\n        <xsd:element name=\"pull\" type=\"CT_EightDirectionTransition\"/>\n        <xsd:element name=\"push\" type=\"CT_SideDirectionTransition\"/>\n        <xsd:element name=\"random\" type=\"CT_Empty\"/>\n        <xsd:element name=\"randomBar\" type=\"CT_OrientationTransition\"/>\n        <xsd:element name=\"split\" type=\"CT_SplitTransition\"/>\n        <xsd:element name=\"strips\" type=\"CT_CornerDirectionTransition\"/>\n        <xsd:element name=\"wedge\" type=\"CT_Empty\"/>\n        <xsd:element name=\"wheel\" type=\"CT_WheelTransition\"/>\n        <xsd:element name=\"wipe\" type=\"CT_SideDirectionTransition\"/>\n        <xsd:element name=\"zoom\" type=\"CT_InOutTransition\"/>\n      </xsd:choice>\n      <xsd:element name=\"sndAc\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_TransitionSoundAction\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"spd\" type=\"ST_TransitionSpeed\" use=\"optional\" default=\"fast\"/>\n    <xsd:attribute name=\"advClick\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"advTm\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLTimeIndefinite\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"indefinite\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTime\">\n    <xsd:union memberTypes=\"xsd:unsignedInt ST_TLTimeIndefinite\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeID\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLIterateIntervalTime\">\n    <xsd:attribute name=\"val\" type=\"ST_TLTime\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLIterateIntervalPercentage\">\n    <xsd:attribute name=\"val\" type=\"a:ST_PositivePercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_IterateType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"el\"/>\n      <xsd:enumeration value=\"wd\"/>\n      <xsd:enumeration value=\"lt\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLIterateData\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"tmAbs\" type=\"CT_TLIterateIntervalTime\"/>\n      <xsd:element name=\"tmPct\" type=\"CT_TLIterateIntervalPercentage\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"type\" type=\"ST_IterateType\" use=\"optional\" default=\"el\"/>\n    <xsd:attribute name=\"backwards\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLSubShapeId\">\n    <xsd:attribute name=\"spid\" type=\"a:ST_ShapeID\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTextTargetElement\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"charRg\" type=\"CT_IndexRange\"/>\n      <xsd:element name=\"pRg\" type=\"CT_IndexRange\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLChartSubelementType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"gridLegend\"/>\n      <xsd:enumeration value=\"series\"/>\n      <xsd:enumeration value=\"category\"/>\n      <xsd:enumeration value=\"ptInSeries\"/>\n      <xsd:enumeration value=\"ptInCategory\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLOleChartTargetElement\">\n    <xsd:attribute name=\"type\" type=\"ST_TLChartSubelementType\" use=\"required\"/>\n    <xsd:attribute name=\"lvl\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLShapeTargetElement\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"bg\" type=\"CT_Empty\"/>\n      <xsd:element name=\"subSp\" type=\"CT_TLSubShapeId\"/>\n      <xsd:element name=\"oleChartEl\" type=\"CT_TLOleChartTargetElement\"/>\n      <xsd:element name=\"txEl\" type=\"CT_TLTextTargetElement\"/>\n      <xsd:element name=\"graphicEl\" type=\"a:CT_AnimationElementChoice\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"spid\" type=\"a:ST_DrawingElementId\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTimeTargetElement\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"sldTgt\" type=\"CT_Empty\"/>\n      <xsd:element name=\"sndTgt\" type=\"a:CT_EmbeddedWAVAudioFile\"/>\n      <xsd:element name=\"spTgt\" type=\"CT_TLShapeTargetElement\"/>\n      <xsd:element name=\"inkTgt\" type=\"CT_TLSubShapeId\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTriggerTimeNodeID\">\n    <xsd:attribute name=\"val\" type=\"ST_TLTimeNodeID\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLTriggerRuntimeNode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"first\"/>\n      <xsd:enumeration value=\"last\"/>\n      <xsd:enumeration value=\"all\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLTriggerRuntimeNode\">\n    <xsd:attribute name=\"val\" type=\"ST_TLTriggerRuntimeNode\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLTriggerEvent\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"onBegin\"/>\n      <xsd:enumeration value=\"onEnd\"/>\n      <xsd:enumeration value=\"begin\"/>\n      <xsd:enumeration value=\"end\"/>\n      <xsd:enumeration value=\"onClick\"/>\n      <xsd:enumeration value=\"onDblClick\"/>\n      <xsd:enumeration value=\"onMouseOver\"/>\n      <xsd:enumeration value=\"onMouseOut\"/>\n      <xsd:enumeration value=\"onNext\"/>\n      <xsd:enumeration value=\"onPrev\"/>\n      <xsd:enumeration value=\"onStopAudio\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLTimeCondition\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"tgtEl\" type=\"CT_TLTimeTargetElement\"/>\n      <xsd:element name=\"tn\" type=\"CT_TLTriggerTimeNodeID\"/>\n      <xsd:element name=\"rtn\" type=\"CT_TLTriggerRuntimeNode\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"evt\" use=\"optional\" type=\"ST_TLTriggerEvent\"/>\n    <xsd:attribute name=\"delay\" type=\"ST_TLTime\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTimeConditionList\">\n    <xsd:sequence>\n      <xsd:element name=\"cond\" type=\"CT_TLTimeCondition\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TimeNodeList\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"par\" type=\"CT_TLTimeNodeParallel\"/>\n      <xsd:element name=\"seq\" type=\"CT_TLTimeNodeSequence\"/>\n      <xsd:element name=\"excl\" type=\"CT_TLTimeNodeExclusive\"/>\n      <xsd:element name=\"anim\" type=\"CT_TLAnimateBehavior\"/>\n      <xsd:element name=\"animClr\" type=\"CT_TLAnimateColorBehavior\"/>\n      <xsd:element name=\"animEffect\" type=\"CT_TLAnimateEffectBehavior\"/>\n      <xsd:element name=\"animMotion\" type=\"CT_TLAnimateMotionBehavior\"/>\n      <xsd:element name=\"animRot\" type=\"CT_TLAnimateRotationBehavior\"/>\n      <xsd:element name=\"animScale\" type=\"CT_TLAnimateScaleBehavior\"/>\n      <xsd:element name=\"cmd\" type=\"CT_TLCommandBehavior\"/>\n      <xsd:element name=\"set\" type=\"CT_TLSetBehavior\"/>\n      <xsd:element name=\"audio\" type=\"CT_TLMediaNodeAudio\"/>\n      <xsd:element name=\"video\" type=\"CT_TLMediaNodeVideo\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLTimeNodePresetClassType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"entr\"/>\n      <xsd:enumeration value=\"exit\"/>\n      <xsd:enumeration value=\"emph\"/>\n      <xsd:enumeration value=\"path\"/>\n      <xsd:enumeration value=\"verb\"/>\n      <xsd:enumeration value=\"mediacall\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeRestartType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"always\"/>\n      <xsd:enumeration value=\"whenNotActive\"/>\n      <xsd:enumeration value=\"never\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeFillType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"remove\"/>\n      <xsd:enumeration value=\"freeze\"/>\n      <xsd:enumeration value=\"hold\"/>\n      <xsd:enumeration value=\"transition\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeSyncType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"canSlip\"/>\n      <xsd:enumeration value=\"locked\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeMasterRelation\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sameClick\"/>\n      <xsd:enumeration value=\"lastClick\"/>\n      <xsd:enumeration value=\"nextClick\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLTimeNodeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"clickEffect\"/>\n      <xsd:enumeration value=\"withEffect\"/>\n      <xsd:enumeration value=\"afterEffect\"/>\n      <xsd:enumeration value=\"mainSeq\"/>\n      <xsd:enumeration value=\"interactiveSeq\"/>\n      <xsd:enumeration value=\"clickPar\"/>\n      <xsd:enumeration value=\"withGroup\"/>\n      <xsd:enumeration value=\"afterGroup\"/>\n      <xsd:enumeration value=\"tmRoot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLCommonTimeNodeData\">\n    <xsd:sequence>\n      <xsd:element name=\"stCondLst\" type=\"CT_TLTimeConditionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"endCondLst\" type=\"CT_TLTimeConditionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"endSync\" type=\"CT_TLTimeCondition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"iterate\" type=\"CT_TLIterateData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"childTnLst\" type=\"CT_TimeNodeList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"subTnLst\" type=\"CT_TimeNodeList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_TLTimeNodeID\" use=\"optional\"/>\n    <xsd:attribute name=\"presetID\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"presetClass\" type=\"ST_TLTimeNodePresetClassType\" use=\"optional\"/>\n    <xsd:attribute name=\"presetSubtype\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"dur\" type=\"ST_TLTime\" use=\"optional\"/>\n    <xsd:attribute name=\"repeatCount\" type=\"ST_TLTime\" use=\"optional\" default=\"1000\"/>\n    <xsd:attribute name=\"repeatDur\" type=\"ST_TLTime\" use=\"optional\"/>\n    <xsd:attribute name=\"spd\" type=\"a:ST_Percentage\" use=\"optional\" default=\"100%\"/>\n    <xsd:attribute name=\"accel\" type=\"a:ST_PositiveFixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"decel\" type=\"a:ST_PositiveFixedPercentage\" use=\"optional\" default=\"0%\"/>\n    <xsd:attribute name=\"autoRev\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"restart\" type=\"ST_TLTimeNodeRestartType\" use=\"optional\"/>\n    <xsd:attribute name=\"fill\" type=\"ST_TLTimeNodeFillType\" use=\"optional\"/>\n    <xsd:attribute name=\"syncBehavior\" type=\"ST_TLTimeNodeSyncType\" use=\"optional\"/>\n    <xsd:attribute name=\"tmFilter\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"evtFilter\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"display\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"masterRel\" type=\"ST_TLTimeNodeMasterRelation\" use=\"optional\"/>\n    <xsd:attribute name=\"bldLvl\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"grpId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"afterEffect\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"nodeType\" type=\"ST_TLTimeNodeType\" use=\"optional\"/>\n    <xsd:attribute name=\"nodePh\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTimeNodeParallel\">\n    <xsd:sequence>\n      <xsd:element name=\"cTn\" type=\"CT_TLCommonTimeNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLNextActionType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"seek\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLPreviousActionType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"skipTimed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLTimeNodeSequence\">\n    <xsd:sequence>\n      <xsd:element name=\"cTn\" type=\"CT_TLCommonTimeNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prevCondLst\" type=\"CT_TLTimeConditionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"nextCondLst\" type=\"CT_TLTimeConditionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"concurrent\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"prevAc\" type=\"ST_TLPreviousActionType\" use=\"optional\"/>\n    <xsd:attribute name=\"nextAc\" type=\"ST_TLNextActionType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTimeNodeExclusive\">\n    <xsd:sequence>\n      <xsd:element name=\"cTn\" type=\"CT_TLCommonTimeNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLBehaviorAttributeNameList\">\n    <xsd:sequence>\n      <xsd:element name=\"attrName\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLBehaviorAdditiveType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"base\"/>\n      <xsd:enumeration value=\"sum\"/>\n      <xsd:enumeration value=\"repl\"/>\n      <xsd:enumeration value=\"mult\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLBehaviorAccumulateType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"always\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLBehaviorTransformType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"pt\"/>\n      <xsd:enumeration value=\"img\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLBehaviorOverrideType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"childStyle\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLCommonBehaviorData\">\n    <xsd:sequence>\n      <xsd:element name=\"cTn\" type=\"CT_TLCommonTimeNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tgtEl\" type=\"CT_TLTimeTargetElement\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"attrNameLst\" type=\"CT_TLBehaviorAttributeNameList\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"additive\" type=\"ST_TLBehaviorAdditiveType\" use=\"optional\"/>\n    <xsd:attribute name=\"accumulate\" type=\"ST_TLBehaviorAccumulateType\" use=\"optional\"/>\n    <xsd:attribute name=\"xfrmType\" type=\"ST_TLBehaviorTransformType\" use=\"optional\"/>\n    <xsd:attribute name=\"from\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"to\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"by\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"rctx\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"override\" type=\"ST_TLBehaviorOverrideType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimVariantBooleanVal\">\n    <xsd:attribute name=\"val\" type=\"xsd:boolean\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimVariantIntegerVal\">\n    <xsd:attribute name=\"val\" type=\"xsd:int\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimVariantFloatVal\">\n    <xsd:attribute name=\"val\" type=\"xsd:float\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimVariantStringVal\">\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimVariant\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"boolVal\" type=\"CT_TLAnimVariantBooleanVal\"/>\n      <xsd:element name=\"intVal\" type=\"CT_TLAnimVariantIntegerVal\"/>\n      <xsd:element name=\"fltVal\" type=\"CT_TLAnimVariantFloatVal\"/>\n      <xsd:element name=\"strVal\" type=\"CT_TLAnimVariantStringVal\"/>\n      <xsd:element name=\"clrVal\" type=\"a:CT_Color\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLTimeAnimateValueTime\">\n    <xsd:union memberTypes=\"a:ST_PositiveFixedPercentage ST_TLTimeIndefinite\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLTimeAnimateValue\">\n    <xsd:sequence>\n      <xsd:element name=\"val\" type=\"CT_TLAnimVariant\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"tm\" type=\"ST_TLTimeAnimateValueTime\" use=\"optional\" default=\"indefinite\"/>\n    <xsd:attribute name=\"fmla\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTimeAnimateValueList\">\n    <xsd:sequence>\n      <xsd:element name=\"tav\" type=\"CT_TLTimeAnimateValue\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLAnimateBehaviorCalcMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"discrete\"/>\n      <xsd:enumeration value=\"lin\"/>\n      <xsd:enumeration value=\"fmla\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLAnimateBehaviorValueType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"str\"/>\n      <xsd:enumeration value=\"num\"/>\n      <xsd:enumeration value=\"clr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLAnimateBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tavLst\" type=\"CT_TLTimeAnimateValueList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"by\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"from\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"to\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"calcmode\" type=\"ST_TLAnimateBehaviorCalcMode\" use=\"optional\"/>\n    <xsd:attribute name=\"valueType\" type=\"ST_TLAnimateBehaviorValueType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLByRgbColorTransform\">\n    <xsd:attribute name=\"r\" type=\"a:ST_FixedPercentage\" use=\"required\"/>\n    <xsd:attribute name=\"g\" type=\"a:ST_FixedPercentage\" use=\"required\"/>\n    <xsd:attribute name=\"b\" type=\"a:ST_FixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLByHslColorTransform\">\n    <xsd:attribute name=\"h\" type=\"a:ST_Angle\" use=\"required\"/>\n    <xsd:attribute name=\"s\" type=\"a:ST_FixedPercentage\" use=\"required\"/>\n    <xsd:attribute name=\"l\" type=\"a:ST_FixedPercentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLByAnimateColorTransform\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"rgb\" type=\"CT_TLByRgbColorTransform\"/>\n      <xsd:element name=\"hsl\" type=\"CT_TLByHslColorTransform\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLAnimateColorSpace\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"rgb\"/>\n      <xsd:enumeration value=\"hsl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLAnimateColorDirection\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"cw\"/>\n      <xsd:enumeration value=\"ccw\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLAnimateColorBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"by\" type=\"CT_TLByAnimateColorTransform\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"from\" type=\"a:CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"to\" type=\"a:CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"clrSpc\" type=\"ST_TLAnimateColorSpace\" use=\"optional\"/>\n    <xsd:attribute name=\"dir\" type=\"ST_TLAnimateColorDirection\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLAnimateEffectTransition\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"in\"/>\n      <xsd:enumeration value=\"out\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLAnimateEffectBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"progress\" type=\"CT_TLAnimVariant\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"transition\" type=\"ST_TLAnimateEffectTransition\" default=\"in\" use=\"optional\"/>\n    <xsd:attribute name=\"filter\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"prLst\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLAnimateMotionBehaviorOrigin\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"parent\"/>\n      <xsd:enumeration value=\"layout\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TLAnimateMotionPathEditMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"relative\"/>\n      <xsd:enumeration value=\"fixed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLPoint\">\n    <xsd:attribute name=\"x\" type=\"a:ST_Percentage\" use=\"required\"/>\n    <xsd:attribute name=\"y\" type=\"a:ST_Percentage\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimateMotionBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"by\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"from\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"to\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rCtr\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"origin\" type=\"ST_TLAnimateMotionBehaviorOrigin\" use=\"optional\"/>\n    <xsd:attribute name=\"path\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"pathEditMode\" type=\"ST_TLAnimateMotionPathEditMode\" use=\"optional\"/>\n    <xsd:attribute name=\"rAng\" type=\"a:ST_Angle\" use=\"optional\"/>\n    <xsd:attribute name=\"ptsTypes\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimateRotationBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"by\" type=\"a:ST_Angle\" use=\"optional\"/>\n    <xsd:attribute name=\"from\" type=\"a:ST_Angle\" use=\"optional\"/>\n    <xsd:attribute name=\"to\" type=\"a:ST_Angle\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLAnimateScaleBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"by\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"from\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"to\" type=\"CT_TLPoint\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"zoomContents\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLCommandType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"evt\"/>\n      <xsd:enumeration value=\"call\"/>\n      <xsd:enumeration value=\"verb\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLCommandBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute type=\"ST_TLCommandType\" name=\"type\" use=\"optional\"/>\n    <xsd:attribute name=\"cmd\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLSetBehavior\">\n    <xsd:sequence>\n      <xsd:element name=\"cBhvr\" type=\"CT_TLCommonBehaviorData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"to\" type=\"CT_TLAnimVariant\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLCommonMediaNodeData\">\n    <xsd:sequence>\n      <xsd:element name=\"cTn\" type=\"CT_TLCommonTimeNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tgtEl\" type=\"CT_TLTimeTargetElement\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"vol\" type=\"a:ST_PositiveFixedPercentage\" default=\"50%\" use=\"optional\"/>\n    <xsd:attribute name=\"mute\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"numSld\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"showWhenStopped\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLMediaNodeAudio\">\n    <xsd:sequence>\n      <xsd:element name=\"cMediaNode\" type=\"CT_TLCommonMediaNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"isNarration\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLMediaNodeVideo\">\n    <xsd:sequence>\n      <xsd:element name=\"cMediaNode\" type=\"CT_TLCommonMediaNodeData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"fullScrn\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:attributeGroup name=\"AG_TLBuild\">\n    <xsd:attribute name=\"spid\" type=\"a:ST_DrawingElementId\" use=\"required\"/>\n    <xsd:attribute name=\"grpId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"uiExpand\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_TLTemplate\">\n    <xsd:sequence>\n      <xsd:element name=\"tnLst\" type=\"CT_TimeNodeList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"lvl\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLTemplateList\">\n    <xsd:sequence>\n      <xsd:element name=\"tmpl\" type=\"CT_TLTemplate\" minOccurs=\"0\" maxOccurs=\"9\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLParaBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"allAtOnce\"/>\n      <xsd:enumeration value=\"p\"/>\n      <xsd:enumeration value=\"cust\"/>\n      <xsd:enumeration value=\"whole\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLBuildParagraph\">\n    <xsd:sequence>\n      <xsd:element name=\"tmplLst\" type=\"CT_TLTemplateList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_TLBuild\"/>\n    <xsd:attribute name=\"build\" type=\"ST_TLParaBuildType\" use=\"optional\" default=\"whole\"/>\n    <xsd:attribute name=\"bldLvl\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"animBg\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoUpdateAnimBg\" type=\"xsd:boolean\" default=\"true\" use=\"optional\"/>\n    <xsd:attribute name=\"rev\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"advAuto\" type=\"ST_TLTime\" use=\"optional\" default=\"indefinite\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLDiagramBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"whole\"/>\n      <xsd:enumeration value=\"depthByNode\"/>\n      <xsd:enumeration value=\"depthByBranch\"/>\n      <xsd:enumeration value=\"breadthByNode\"/>\n      <xsd:enumeration value=\"breadthByLvl\"/>\n      <xsd:enumeration value=\"cw\"/>\n      <xsd:enumeration value=\"cwIn\"/>\n      <xsd:enumeration value=\"cwOut\"/>\n      <xsd:enumeration value=\"ccw\"/>\n      <xsd:enumeration value=\"ccwIn\"/>\n      <xsd:enumeration value=\"ccwOut\"/>\n      <xsd:enumeration value=\"inByRing\"/>\n      <xsd:enumeration value=\"outByRing\"/>\n      <xsd:enumeration value=\"up\"/>\n      <xsd:enumeration value=\"down\"/>\n      <xsd:enumeration value=\"allAtOnce\"/>\n      <xsd:enumeration value=\"cust\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLBuildDiagram\">\n    <xsd:attributeGroup ref=\"AG_TLBuild\"/>\n    <xsd:attribute name=\"bld\" type=\"ST_TLDiagramBuildType\" use=\"optional\" default=\"whole\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TLOleChartBuildType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"allAtOnce\"/>\n      <xsd:enumeration value=\"series\"/>\n      <xsd:enumeration value=\"category\"/>\n      <xsd:enumeration value=\"seriesEl\"/>\n      <xsd:enumeration value=\"categoryEl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TLOleBuildChart\">\n    <xsd:attributeGroup ref=\"AG_TLBuild\"/>\n    <xsd:attribute name=\"bld\" type=\"ST_TLOleChartBuildType\" use=\"optional\" default=\"allAtOnce\"/>\n    <xsd:attribute name=\"animBg\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TLGraphicalObjectBuild\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"bldAsOne\" type=\"CT_Empty\"/>\n      <xsd:element name=\"bldSub\" type=\"a:CT_AnimationGraphicalObjectBuildProperties\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_TLBuild\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BuildList\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"bldP\" type=\"CT_TLBuildParagraph\"/>\n      <xsd:element name=\"bldDgm\" type=\"CT_TLBuildDiagram\"/>\n      <xsd:element name=\"bldOleChart\" type=\"CT_TLOleBuildChart\"/>\n      <xsd:element name=\"bldGraphic\" type=\"CT_TLGraphicalObjectBuild\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideTiming\">\n    <xsd:sequence>\n      <xsd:element name=\"tnLst\" type=\"CT_TimeNodeList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bldLst\" type=\"CT_BuildList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Empty\"/>\n  <xsd:simpleType name=\"ST_Name\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Direction\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"horz\"/>\n      <xsd:enumeration value=\"vert\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Index\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_IndexRange\">\n    <xsd:attribute name=\"st\" type=\"ST_Index\" use=\"required\"/>\n    <xsd:attribute name=\"end\" type=\"ST_Index\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideRelationshipListEntry\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideRelationshipList\">\n    <xsd:sequence>\n      <xsd:element name=\"sld\" type=\"CT_SlideRelationshipListEntry\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomShowId\">\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_SlideListChoice\">\n    <xsd:choice>\n      <xsd:element name=\"sldAll\" type=\"CT_Empty\"/>\n      <xsd:element name=\"sldRg\" type=\"CT_IndexRange\"/>\n      <xsd:element name=\"custShow\" type=\"CT_CustomShowId\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_CustomerData\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TagsData\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomerDataList\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"custData\" type=\"CT_CustomerData\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"tags\" type=\"CT_TagsData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Extension\">\n    <xsd:sequence>\n      <xsd:any processContents=\"lax\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"xsd:token\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ExtensionList\">\n    <xsd:sequence>\n      <xsd:element name=\"ext\" type=\"CT_Extension\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_ExtensionList\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExtensionListModify\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"mod\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommentAuthor\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"ST_Name\" use=\"required\"/>\n    <xsd:attribute name=\"initials\" type=\"ST_Name\" use=\"required\"/>\n    <xsd:attribute name=\"lastIdx\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"clrIdx\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommentAuthorList\">\n    <xsd:sequence>\n      <xsd:element name=\"cmAuthor\" type=\"CT_CommentAuthor\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"cmAuthorLst\" type=\"CT_CommentAuthorList\"/>\n  <xsd:complexType name=\"CT_Comment\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"a:CT_Point2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"text\" type=\"xsd:string\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"authorId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"dt\" type=\"xsd:dateTime\" use=\"optional\"/>\n    <xsd:attribute name=\"idx\" type=\"ST_Index\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommentList\">\n    <xsd:sequence>\n      <xsd:element name=\"cm\" type=\"CT_Comment\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"cmLst\" type=\"CT_CommentList\"/>\n  <xsd:attributeGroup name=\"AG_Ole\">\n    <xsd:attribute name=\"spid\" type=\"a:ST_ShapeID\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"showAsIcon\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"imgW\" type=\"a:ST_PositiveCoordinate32\" use=\"optional\"/>\n    <xsd:attribute name=\"imgH\" type=\"a:ST_PositiveCoordinate32\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:simpleType name=\"ST_OleObjectFollowColorScheme\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"full\"/>\n      <xsd:enumeration value=\"textAndBackground\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OleObjectEmbed\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"followColorScheme\" type=\"ST_OleObjectFollowColorScheme\" use=\"optional\"\n      default=\"none\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleObjectLink\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"updateAutomatic\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleObject\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n        <xsd:element name=\"embed\" type=\"CT_OleObjectEmbed\"/>\n        <xsd:element name=\"link\" type=\"CT_OleObjectLink\"/>\n      </xsd:choice>\n      <xsd:element name=\"pic\" type=\"CT_Picture\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Ole\"/>\n    <xsd:attribute name=\"progId\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"oleObj\" type=\"CT_OleObject\"/>\n  <xsd:complexType name=\"CT_Control\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pic\" type=\"CT_Picture\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Ole\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ControlList\">\n    <xsd:sequence>\n      <xsd:element name=\"control\" type=\"CT_Control\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SlideId\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"256\"/>\n      <xsd:maxExclusive value=\"2147483648\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideIdListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_SlideId\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideIdList\">\n    <xsd:sequence>\n      <xsd:element name=\"sldId\" type=\"CT_SlideIdListEntry\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SlideMasterId\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"2147483648\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideMasterIdListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_SlideMasterId\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideMasterIdList\">\n    <xsd:sequence>\n      <xsd:element name=\"sldMasterId\" type=\"CT_SlideMasterIdListEntry\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NotesMasterIdListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NotesMasterIdList\">\n    <xsd:sequence>\n      <xsd:element name=\"notesMasterId\" type=\"CT_NotesMasterIdListEntry\" minOccurs=\"0\" maxOccurs=\"1\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HandoutMasterIdListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HandoutMasterIdList\">\n    <xsd:sequence>\n      <xsd:element name=\"handoutMasterId\" type=\"CT_HandoutMasterIdListEntry\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EmbeddedFontDataId\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EmbeddedFontListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"font\" type=\"a:CT_TextFont\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"regular\" type=\"CT_EmbeddedFontDataId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bold\" type=\"CT_EmbeddedFontDataId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"italic\" type=\"CT_EmbeddedFontDataId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"boldItalic\" type=\"CT_EmbeddedFontDataId\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EmbeddedFontList\">\n    <xsd:sequence>\n      <xsd:element name=\"embeddedFont\" type=\"CT_EmbeddedFontListEntry\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SmartTags\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomShow\">\n    <xsd:sequence>\n      <xsd:element name=\"sldLst\" type=\"CT_SlideRelationshipList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"ST_Name\" use=\"required\"/>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomShowList\">\n    <xsd:sequence>\n      <xsd:element name=\"custShow\" type=\"CT_CustomShow\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PhotoAlbumLayout\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"fitToSlide\"/>\n      <xsd:enumeration value=\"1pic\"/>\n      <xsd:enumeration value=\"2pic\"/>\n      <xsd:enumeration value=\"4pic\"/>\n      <xsd:enumeration value=\"1picTitle\"/>\n      <xsd:enumeration value=\"2picTitle\"/>\n      <xsd:enumeration value=\"4picTitle\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PhotoAlbumFrameShape\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"frameStyle1\"/>\n      <xsd:enumeration value=\"frameStyle2\"/>\n      <xsd:enumeration value=\"frameStyle3\"/>\n      <xsd:enumeration value=\"frameStyle4\"/>\n      <xsd:enumeration value=\"frameStyle5\"/>\n      <xsd:enumeration value=\"frameStyle6\"/>\n      <xsd:enumeration value=\"frameStyle7\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PhotoAlbum\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bw\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showCaptions\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"layout\" type=\"ST_PhotoAlbumLayout\" use=\"optional\" default=\"fitToSlide\"/>\n    <xsd:attribute name=\"frame\" type=\"ST_PhotoAlbumFrameShape\" use=\"optional\" default=\"frameStyle1\"\n    />\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SlideSizeCoordinate\">\n    <xsd:restriction base=\"a:ST_PositiveCoordinate32\">\n      <xsd:minInclusive value=\"914400\"/>\n      <xsd:maxInclusive value=\"51206400\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SlideSizeType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"screen4x3\"/>\n      <xsd:enumeration value=\"letter\"/>\n      <xsd:enumeration value=\"A4\"/>\n      <xsd:enumeration value=\"35mm\"/>\n      <xsd:enumeration value=\"overhead\"/>\n      <xsd:enumeration value=\"banner\"/>\n      <xsd:enumeration value=\"custom\"/>\n      <xsd:enumeration value=\"ledger\"/>\n      <xsd:enumeration value=\"A3\"/>\n      <xsd:enumeration value=\"B4ISO\"/>\n      <xsd:enumeration value=\"B5ISO\"/>\n      <xsd:enumeration value=\"B4JIS\"/>\n      <xsd:enumeration value=\"B5JIS\"/>\n      <xsd:enumeration value=\"hagakiCard\"/>\n      <xsd:enumeration value=\"screen16x9\"/>\n      <xsd:enumeration value=\"screen16x10\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideSize\">\n    <xsd:attribute name=\"cx\" type=\"ST_SlideSizeCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"cy\" type=\"ST_SlideSizeCoordinate\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"ST_SlideSizeType\" use=\"optional\" default=\"custom\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Kinsoku\">\n    <xsd:attribute name=\"lang\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"invalStChars\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"invalEndChars\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BookmarkIdSeed\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxExclusive value=\"2147483648\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ModifyVerifier\">\n    <xsd:attribute name=\"algorithmName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinValue\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptProviderType\" type=\"s:ST_CryptProv\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptAlgorithmClass\" type=\"s:ST_AlgClass\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptAlgorithmType\" type=\"s:ST_AlgType\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptAlgorithmSid\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"saltData\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"hashData\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptProvider\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"algIdExt\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"algIdExtSource\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptProviderTypeExt\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cryptProviderTypeExtSource\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Presentation\">\n    <xsd:sequence>\n      <xsd:element name=\"sldMasterIdLst\" type=\"CT_SlideMasterIdList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"notesMasterIdLst\" type=\"CT_NotesMasterIdList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"handoutMasterIdLst\" type=\"CT_HandoutMasterIdList\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"sldIdLst\" type=\"CT_SlideIdList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sldSz\" type=\"CT_SlideSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"notesSz\" type=\"a:CT_PositiveSize2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smartTags\" type=\"CT_SmartTags\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"embeddedFontLst\" type=\"CT_EmbeddedFontList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custShowLst\" type=\"CT_CustomShowList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"photoAlbum\" type=\"CT_PhotoAlbum\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custDataLst\" type=\"CT_CustomerDataList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"kinsoku\" type=\"CT_Kinsoku\" minOccurs=\"0\"/>\n      <xsd:element name=\"defaultTextStyle\" type=\"a:CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"modifyVerifier\" type=\"CT_ModifyVerifier\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"serverZoom\" type=\"a:ST_Percentage\" use=\"optional\" default=\"50%\"/>\n    <xsd:attribute name=\"firstSlideNum\" type=\"xsd:int\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"showSpecialPlsOnTitleSld\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"rtl\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"removePersonalInfoOnSave\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"compatMode\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"strictFirstAndLastChars\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"embedTrueTypeFonts\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"saveSubsetFonts\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoCompressPictures\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"bookmarkIdSeed\" type=\"ST_BookmarkIdSeed\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"conformance\" type=\"s:ST_ConformanceClass\"/>\n  </xsd:complexType>\n  <xsd:element name=\"presentation\" type=\"CT_Presentation\"/>\n  <xsd:complexType name=\"CT_HtmlPublishProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SlideListChoice\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"showSpeakerNotes\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"target\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"title\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_WebColorType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"browser\"/>\n      <xsd:enumeration value=\"presentationText\"/>\n      <xsd:enumeration value=\"presentationAccent\"/>\n      <xsd:enumeration value=\"whiteTextOnBlack\"/>\n      <xsd:enumeration value=\"blackTextOnWhite\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_WebScreenSize\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"544x376\"/>\n      <xsd:enumeration value=\"640x480\"/>\n      <xsd:enumeration value=\"720x512\"/>\n      <xsd:enumeration value=\"800x600\"/>\n      <xsd:enumeration value=\"1024x768\"/>\n      <xsd:enumeration value=\"1152x882\"/>\n      <xsd:enumeration value=\"1152x900\"/>\n      <xsd:enumeration value=\"1280x1024\"/>\n      <xsd:enumeration value=\"1600x1200\"/>\n      <xsd:enumeration value=\"1800x1400\"/>\n      <xsd:enumeration value=\"1920x1200\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_WebEncoding\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WebProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"showAnimation\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"resizeGraphics\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"allowPng\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"relyOnVml\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"organizeInFolders\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"useLongFilenames\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"imgSz\" type=\"ST_WebScreenSize\" use=\"optional\" default=\"800x600\"/>\n    <xsd:attribute name=\"encoding\" type=\"ST_WebEncoding\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"clr\" type=\"ST_WebColorType\" use=\"optional\" default=\"whiteTextOnBlack\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PrintWhat\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"slides\"/>\n      <xsd:enumeration value=\"handouts1\"/>\n      <xsd:enumeration value=\"handouts2\"/>\n      <xsd:enumeration value=\"handouts3\"/>\n      <xsd:enumeration value=\"handouts4\"/>\n      <xsd:enumeration value=\"handouts6\"/>\n      <xsd:enumeration value=\"handouts9\"/>\n      <xsd:enumeration value=\"notes\"/>\n      <xsd:enumeration value=\"outline\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PrintColorMode\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"bw\"/>\n      <xsd:enumeration value=\"gray\"/>\n      <xsd:enumeration value=\"clr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PrintProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prnWhat\" type=\"ST_PrintWhat\" use=\"optional\" default=\"slides\"/>\n    <xsd:attribute name=\"clrMode\" type=\"ST_PrintColorMode\" use=\"optional\" default=\"clr\"/>\n    <xsd:attribute name=\"hiddenSlides\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"scaleToFitPaper\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"frameSlides\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShowInfoBrowse\">\n    <xsd:attribute name=\"showScrollbar\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShowInfoKiosk\">\n    <xsd:attribute name=\"restart\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"300000\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ShowType\">\n    <xsd:choice>\n      <xsd:element name=\"present\" type=\"CT_Empty\"/>\n      <xsd:element name=\"browse\" type=\"CT_ShowInfoBrowse\"/>\n      <xsd:element name=\"kiosk\" type=\"CT_ShowInfoKiosk\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_ShowProperties\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:group ref=\"EG_ShowType\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_SlideListChoice\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"penClr\" type=\"a:CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"loop\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showNarration\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showAnimation\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"useTimings\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PresentationProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"htmlPubPr\" type=\"CT_HtmlPublishProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"webPr\" type=\"CT_WebProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"prnPr\" type=\"CT_PrintProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"showPr\" type=\"CT_ShowProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrMru\" type=\"a:CT_ColorMRU\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"presentationPr\" type=\"CT_PresentationProperties\"/>\n  <xsd:complexType name=\"CT_HeaderFooter\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"sldNum\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"hdr\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"ftr\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"dt\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PlaceholderType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"title\"/>\n      <xsd:enumeration value=\"body\"/>\n      <xsd:enumeration value=\"ctrTitle\"/>\n      <xsd:enumeration value=\"subTitle\"/>\n      <xsd:enumeration value=\"dt\"/>\n      <xsd:enumeration value=\"sldNum\"/>\n      <xsd:enumeration value=\"ftr\"/>\n      <xsd:enumeration value=\"hdr\"/>\n      <xsd:enumeration value=\"obj\"/>\n      <xsd:enumeration value=\"chart\"/>\n      <xsd:enumeration value=\"tbl\"/>\n      <xsd:enumeration value=\"clipArt\"/>\n      <xsd:enumeration value=\"dgm\"/>\n      <xsd:enumeration value=\"media\"/>\n      <xsd:enumeration value=\"sldImg\"/>\n      <xsd:enumeration value=\"pic\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PlaceholderSize\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"full\"/>\n      <xsd:enumeration value=\"half\"/>\n      <xsd:enumeration value=\"quarter\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Placeholder\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_PlaceholderType\" use=\"optional\" default=\"obj\"/>\n    <xsd:attribute name=\"orient\" type=\"ST_Direction\" use=\"optional\" default=\"horz\"/>\n    <xsd:attribute name=\"sz\" type=\"ST_PlaceholderSize\" use=\"optional\" default=\"full\"/>\n    <xsd:attribute name=\"idx\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"hasCustomPrompt\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ApplicationNonVisualDrawingProps\">\n    <xsd:sequence>\n      <xsd:element name=\"ph\" type=\"CT_Placeholder\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"a:EG_Media\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custDataLst\" type=\"CT_CustomerDataList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"isPhoto\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"userDrawn\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvSpPr\" type=\"a:CT_NonVisualDrawingShapeProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"nvPr\" type=\"CT_ApplicationNonVisualDrawingProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvSpPr\" type=\"CT_ShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txBody\" type=\"a:CT_TextBody\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"useBgFill\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConnectorNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvCxnSpPr\" type=\"a:CT_NonVisualConnectorProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"nvPr\" type=\"CT_ApplicationNonVisualDrawingProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Connector\">\n    <xsd:sequence>\n      <xsd:element name=\"nvCxnSpPr\" type=\"CT_ConnectorNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PictureNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvPicPr\" type=\"a:CT_NonVisualPictureProperties\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"nvPr\" type=\"CT_ApplicationNonVisualDrawingProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Picture\">\n    <xsd:sequence>\n      <xsd:element name=\"nvPicPr\" type=\"CT_PictureNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"blipFill\" type=\"a:CT_BlipFillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spPr\" type=\"a:CT_ShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"a:CT_ShapeStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectFrameNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGraphicFramePr\" type=\"a:CT_NonVisualGraphicFrameProperties\"\n        minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"nvPr\" type=\"CT_ApplicationNonVisualDrawingProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GraphicalObjectFrame\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGraphicFramePr\" type=\"CT_GraphicalObjectFrameNonVisual\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"xfrm\" type=\"a:CT_Transform2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"a:graphic\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bwMode\" type=\"a:ST_BlackWhiteMode\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShapeNonVisual\">\n    <xsd:sequence>\n      <xsd:element name=\"cNvPr\" type=\"a:CT_NonVisualDrawingProps\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cNvGrpSpPr\" type=\"a:CT_NonVisualGroupDrawingShapeProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"nvPr\" type=\"CT_ApplicationNonVisualDrawingProps\" minOccurs=\"1\"\n        maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupShape\">\n    <xsd:sequence>\n      <xsd:element name=\"nvGrpSpPr\" type=\"CT_GroupShapeNonVisual\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"grpSpPr\" type=\"a:CT_GroupShapeProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"sp\" type=\"CT_Shape\"/>\n        <xsd:element name=\"grpSp\" type=\"CT_GroupShape\"/>\n        <xsd:element name=\"graphicFrame\" type=\"CT_GraphicalObjectFrame\"/>\n        <xsd:element name=\"cxnSp\" type=\"CT_Connector\"/>\n        <xsd:element name=\"pic\" type=\"CT_Picture\"/>\n        <xsd:element name=\"contentPart\" type=\"CT_Rel\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rel\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_TopLevelSlide\">\n    <xsd:sequence>\n      <xsd:element name=\"clrMap\" type=\"a:CT_ColorMapping\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:group name=\"EG_ChildSlide\">\n    <xsd:sequence>\n      <xsd:element name=\"clrMapOvr\" type=\"a:CT_ColorMappingOverride\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:attributeGroup name=\"AG_ChildSlide\">\n    <xsd:attribute name=\"showMasterSp\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showMasterPhAnim\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_BackgroundProperties\">\n    <xsd:sequence>\n      <xsd:group ref=\"a:EG_FillProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"a:EG_EffectProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"shadeToTitle\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_Background\">\n    <xsd:choice>\n      <xsd:element name=\"bgPr\" type=\"CT_BackgroundProperties\"/>\n      <xsd:element name=\"bgRef\" type=\"a:CT_StyleMatrixReference\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Background\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_Background\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"bwMode\" type=\"a:ST_BlackWhiteMode\" use=\"optional\" default=\"white\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommonSlideData\">\n    <xsd:sequence>\n      <xsd:element name=\"bg\" type=\"CT_Background\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"spTree\" type=\"CT_GroupShape\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"custDataLst\" type=\"CT_CustomerDataList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"controls\" type=\"CT_ControlList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Slide\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ChildSlide\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"transition\" type=\"CT_SlideTransition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"timing\" type=\"CT_SlideTiming\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_ChildSlide\"/>\n    <xsd:attribute name=\"show\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:element name=\"sld\" type=\"CT_Slide\"/>\n  <xsd:simpleType name=\"ST_SlideLayoutType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"title\"/>\n      <xsd:enumeration value=\"tx\"/>\n      <xsd:enumeration value=\"twoColTx\"/>\n      <xsd:enumeration value=\"tbl\"/>\n      <xsd:enumeration value=\"txAndChart\"/>\n      <xsd:enumeration value=\"chartAndTx\"/>\n      <xsd:enumeration value=\"dgm\"/>\n      <xsd:enumeration value=\"chart\"/>\n      <xsd:enumeration value=\"txAndClipArt\"/>\n      <xsd:enumeration value=\"clipArtAndTx\"/>\n      <xsd:enumeration value=\"titleOnly\"/>\n      <xsd:enumeration value=\"blank\"/>\n      <xsd:enumeration value=\"txAndObj\"/>\n      <xsd:enumeration value=\"objAndTx\"/>\n      <xsd:enumeration value=\"objOnly\"/>\n      <xsd:enumeration value=\"obj\"/>\n      <xsd:enumeration value=\"txAndMedia\"/>\n      <xsd:enumeration value=\"mediaAndTx\"/>\n      <xsd:enumeration value=\"objOverTx\"/>\n      <xsd:enumeration value=\"txOverObj\"/>\n      <xsd:enumeration value=\"txAndTwoObj\"/>\n      <xsd:enumeration value=\"twoObjAndTx\"/>\n      <xsd:enumeration value=\"twoObjOverTx\"/>\n      <xsd:enumeration value=\"fourObj\"/>\n      <xsd:enumeration value=\"vertTx\"/>\n      <xsd:enumeration value=\"clipArtAndVertTx\"/>\n      <xsd:enumeration value=\"vertTitleAndTx\"/>\n      <xsd:enumeration value=\"vertTitleAndTxOverChart\"/>\n      <xsd:enumeration value=\"twoObj\"/>\n      <xsd:enumeration value=\"objAndTwoObj\"/>\n      <xsd:enumeration value=\"twoObjAndObj\"/>\n      <xsd:enumeration value=\"cust\"/>\n      <xsd:enumeration value=\"secHead\"/>\n      <xsd:enumeration value=\"twoTxTwoObj\"/>\n      <xsd:enumeration value=\"objTx\"/>\n      <xsd:enumeration value=\"picTx\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideLayout\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ChildSlide\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"transition\" type=\"CT_SlideTransition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"timing\" type=\"CT_SlideTiming\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hf\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_ChildSlide\"/>\n    <xsd:attribute name=\"matchingName\" type=\"xsd:string\" use=\"optional\" default=\"\"/>\n    <xsd:attribute name=\"type\" type=\"ST_SlideLayoutType\" use=\"optional\" default=\"cust\"/>\n    <xsd:attribute name=\"preserve\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"userDrawn\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:element name=\"sldLayout\" type=\"CT_SlideLayout\"/>\n  <xsd:complexType name=\"CT_SlideMasterTextStyles\">\n    <xsd:sequence>\n      <xsd:element name=\"titleStyle\" type=\"a:CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bodyStyle\" type=\"a:CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"otherStyle\" type=\"a:CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SlideLayoutId\">\n    <xsd:restriction base=\"xsd:unsignedInt\">\n      <xsd:minInclusive value=\"2147483648\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SlideLayoutIdListEntry\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_SlideLayoutId\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideLayoutIdList\">\n    <xsd:sequence>\n      <xsd:element name=\"sldLayoutId\" type=\"CT_SlideLayoutIdListEntry\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideMaster\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TopLevelSlide\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sldLayoutIdLst\" type=\"CT_SlideLayoutIdList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"transition\" type=\"CT_SlideTransition\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"timing\" type=\"CT_SlideTiming\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hf\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"txStyles\" type=\"CT_SlideMasterTextStyles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"preserve\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:element name=\"sldMaster\" type=\"CT_SlideMaster\"/>\n  <xsd:complexType name=\"CT_HandoutMaster\">\n    <xsd:sequence>\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TopLevelSlide\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hf\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"handoutMaster\" type=\"CT_HandoutMaster\"/>\n  <xsd:complexType name=\"CT_NotesMaster\">\n    <xsd:sequence>\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_TopLevelSlide\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hf\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"notesStyle\" type=\"a:CT_TextListStyle\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"notesMaster\" type=\"CT_NotesMaster\"/>\n  <xsd:complexType name=\"CT_NotesSlide\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cSld\" type=\"CT_CommonSlideData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ChildSlide\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionListModify\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_ChildSlide\"/>\n  </xsd:complexType>\n  <xsd:element name=\"notes\" type=\"CT_NotesSlide\"/>\n  <xsd:complexType name=\"CT_SlideSyncProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"serverSldId\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"serverSldModifiedTime\" type=\"xsd:dateTime\" use=\"required\"/>\n    <xsd:attribute name=\"clientInsertedTime\" type=\"xsd:dateTime\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"sldSyncPr\" type=\"CT_SlideSyncProperties\"/>\n  <xsd:complexType name=\"CT_StringTag\">\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TagList\">\n    <xsd:sequence>\n      <xsd:element name=\"tag\" type=\"CT_StringTag\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"tagLst\" type=\"CT_TagList\"/>\n  <xsd:simpleType name=\"ST_SplitterBarState\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"minimized\"/>\n      <xsd:enumeration value=\"restored\"/>\n      <xsd:enumeration value=\"maximized\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ViewType\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:enumeration value=\"sldView\"/>\n      <xsd:enumeration value=\"sldMasterView\"/>\n      <xsd:enumeration value=\"notesView\"/>\n      <xsd:enumeration value=\"handoutView\"/>\n      <xsd:enumeration value=\"notesMasterView\"/>\n      <xsd:enumeration value=\"outlineView\"/>\n      <xsd:enumeration value=\"sldSorterView\"/>\n      <xsd:enumeration value=\"sldThumbnailView\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_NormalViewPortion\">\n    <xsd:attribute name=\"sz\" type=\"a:ST_PositiveFixedPercentage\" use=\"required\"/>\n    <xsd:attribute name=\"autoAdjust\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NormalViewProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"restoredLeft\" type=\"CT_NormalViewPortion\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"restoredTop\" type=\"CT_NormalViewPortion\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"showOutlineIcons\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"snapVertSplitter\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"vertBarState\" type=\"ST_SplitterBarState\" use=\"optional\" default=\"restored\"/>\n    <xsd:attribute name=\"horzBarState\" type=\"ST_SplitterBarState\" use=\"optional\" default=\"restored\"/>\n    <xsd:attribute name=\"preferSingleView\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommonViewProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"scale\" type=\"a:CT_Scale2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"origin\" type=\"a:CT_Point2D\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"varScale\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NotesTextViewProperties\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cViewPr\" type=\"CT_CommonViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OutlineViewSlideEntry\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"collapse\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OutlineViewSlideList\">\n    <xsd:sequence>\n      <xsd:element name=\"sld\" type=\"CT_OutlineViewSlideEntry\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OutlineViewProperties\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cViewPr\" type=\"CT_CommonViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sldLst\" type=\"CT_OutlineViewSlideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideSorterViewProperties\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"cViewPr\" type=\"CT_CommonViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"showFormatting\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Guide\">\n    <xsd:attribute name=\"orient\" type=\"ST_Direction\" use=\"optional\" default=\"vert\"/>\n    <xsd:attribute name=\"pos\" type=\"a:ST_Coordinate32\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GuideList\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"guide\" type=\"CT_Guide\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommonSlideViewProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"cViewPr\" type=\"CT_CommonViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"guideLst\" type=\"CT_GuideList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"snapToGrid\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"snapToObjects\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showGuides\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SlideViewProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"cSldViewPr\" type=\"CT_CommonSlideViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NotesViewProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"cSldViewPr\" type=\"CT_CommonSlideViewProperties\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ViewProperties\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"normalViewPr\" type=\"CT_NormalViewProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"slideViewPr\" type=\"CT_SlideViewProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outlineViewPr\" type=\"CT_OutlineViewProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"notesTextViewPr\" type=\"CT_NotesTextViewProperties\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"sorterViewPr\" type=\"CT_SlideSorterViewProperties\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"notesViewPr\" type=\"CT_NotesViewProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gridSpacing\" type=\"a:CT_PositiveSize2D\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"lastView\" type=\"ST_ViewType\" use=\"optional\" default=\"sldView\"/>\n    <xsd:attribute name=\"showComments\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:element name=\"viewPr\" type=\"CT_ViewProperties\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/characteristics\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/characteristics\"\n  elementFormDefault=\"qualified\">\n  <xsd:complexType name=\"CT_AdditionalCharacteristics\">\n    <xsd:sequence>\n      <xsd:element name=\"characteristic\" type=\"CT_Characteristic\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Characteristic\">\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"relation\" type=\"ST_Relation\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"vocabulary\" type=\"xsd:anyURI\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Relation\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"ge\"/>\n      <xsd:enumeration value=\"le\"/>\n      <xsd:enumeration value=\"gt\"/>\n      <xsd:enumeration value=\"lt\"/>\n      <xsd:enumeration value=\"eq\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"additionalCharacteristics\" type=\"CT_AdditionalCharacteristics\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:simpleType name=\"ST_SourceType\">\n    <xsd:restriction base=\"s:ST_String\">\n      <xsd:enumeration value=\"ArticleInAPeriodical\"/>\n      <xsd:enumeration value=\"Book\"/>\n      <xsd:enumeration value=\"BookSection\"/>\n      <xsd:enumeration value=\"JournalArticle\"/>\n      <xsd:enumeration value=\"ConferenceProceedings\"/>\n      <xsd:enumeration value=\"Report\"/>\n      <xsd:enumeration value=\"SoundRecording\"/>\n      <xsd:enumeration value=\"Performance\"/>\n      <xsd:enumeration value=\"Art\"/>\n      <xsd:enumeration value=\"DocumentFromInternetSite\"/>\n      <xsd:enumeration value=\"InternetSite\"/>\n      <xsd:enumeration value=\"Film\"/>\n      <xsd:enumeration value=\"Interview\"/>\n      <xsd:enumeration value=\"Patent\"/>\n      <xsd:enumeration value=\"ElectronicSource\"/>\n      <xsd:enumeration value=\"Case\"/>\n      <xsd:enumeration value=\"Misc\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_NameListType\">\n    <xsd:sequence>\n      <xsd:element name=\"Person\" type=\"CT_PersonType\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PersonType\">\n    <xsd:sequence>\n      <xsd:element name=\"Last\" type=\"s:ST_String\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"First\" type=\"s:ST_String\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"Middle\" type=\"s:ST_String\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NameType\">\n    <xsd:sequence>\n      <xsd:element name=\"NameList\" type=\"CT_NameListType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NameOrCorporateType\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"NameList\" type=\"CT_NameListType\" minOccurs=\"1\" maxOccurs=\"1\"/>\n        <xsd:element name=\"Corporate\" minOccurs=\"1\" maxOccurs=\"1\" type=\"s:ST_String\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AuthorType\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"Artist\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Author\" type=\"CT_NameOrCorporateType\"/>\n        <xsd:element name=\"BookAuthor\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Compiler\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Composer\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Conductor\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Counsel\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Director\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Editor\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Interviewee\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Interviewer\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Inventor\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Performer\" type=\"CT_NameOrCorporateType\"/>\n        <xsd:element name=\"ProducerName\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Translator\" type=\"CT_NameType\"/>\n        <xsd:element name=\"Writer\" type=\"CT_NameType\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SourceType\">\n    <xsd:sequence>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"AbbreviatedCaseNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"AlbumTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Author\" type=\"CT_AuthorType\"/>\n        <xsd:element name=\"BookTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Broadcaster\" type=\"s:ST_String\"/>\n        <xsd:element name=\"BroadcastTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"CaseNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"ChapterNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"City\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Comments\" type=\"s:ST_String\"/>\n        <xsd:element name=\"ConferenceName\" type=\"s:ST_String\"/>\n        <xsd:element name=\"CountryRegion\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Court\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Day\" type=\"s:ST_String\"/>\n        <xsd:element name=\"DayAccessed\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Department\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Distributor\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Edition\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Guid\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Institution\" type=\"s:ST_String\"/>\n        <xsd:element name=\"InternetSiteTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Issue\" type=\"s:ST_String\"/>\n        <xsd:element name=\"JournalName\" type=\"s:ST_String\"/>\n        <xsd:element name=\"LCID\" type=\"s:ST_Lang\"/>\n        <xsd:element name=\"Medium\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Month\" type=\"s:ST_String\"/>\n        <xsd:element name=\"MonthAccessed\" type=\"s:ST_String\"/>\n        <xsd:element name=\"NumberVolumes\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Pages\" type=\"s:ST_String\"/>\n        <xsd:element name=\"PatentNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"PeriodicalTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"ProductionCompany\" type=\"s:ST_String\"/>\n        <xsd:element name=\"PublicationTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Publisher\" type=\"s:ST_String\"/>\n        <xsd:element name=\"RecordingNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"RefOrder\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Reporter\" type=\"s:ST_String\"/>\n        <xsd:element name=\"SourceType\" type=\"ST_SourceType\"/>\n        <xsd:element name=\"ShortTitle\" type=\"s:ST_String\"/>\n        <xsd:element name=\"StandardNumber\" type=\"s:ST_String\"/>\n        <xsd:element name=\"StateProvince\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Station\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Tag\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Theater\" type=\"s:ST_String\"/>\n        <xsd:element name=\"ThesisType\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Title\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Type\" type=\"s:ST_String\"/>\n        <xsd:element name=\"URL\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Version\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Volume\" type=\"s:ST_String\"/>\n        <xsd:element name=\"Year\" type=\"s:ST_String\"/>\n        <xsd:element name=\"YearAccessed\" type=\"s:ST_String\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"Sources\" type=\"CT_Sources\"/>\n  <xsd:complexType name=\"CT_Sources\">\n    <xsd:sequence>\n      <xsd:element name=\"Source\" type=\"CT_SourceType\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"SelectedStyle\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"StyleName\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"URI\" type=\"s:ST_String\"/>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  elementFormDefault=\"qualified\">\n  <xsd:simpleType name=\"ST_Lang\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HexColorRGB\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"3\" fixed=\"true\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Panose\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"10\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CalendarType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"gregorian\"/>\n      <xsd:enumeration value=\"gregorianUs\"/>\n      <xsd:enumeration value=\"gregorianMeFrench\"/>\n      <xsd:enumeration value=\"gregorianArabic\"/>\n      <xsd:enumeration value=\"hijri\"/>\n      <xsd:enumeration value=\"hebrew\"/>\n      <xsd:enumeration value=\"taiwan\"/>\n      <xsd:enumeration value=\"japan\"/>\n      <xsd:enumeration value=\"thai\"/>\n      <xsd:enumeration value=\"korea\"/>\n      <xsd:enumeration value=\"saka\"/>\n      <xsd:enumeration value=\"gregorianXlitEnglish\"/>\n      <xsd:enumeration value=\"gregorianXlitFrench\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AlgClass\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"hash\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CryptProv\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"rsaAES\"/>\n      <xsd:enumeration value=\"rsaFull\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AlgType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"typeAny\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ColorType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Guid\">\n    <xsd:restriction base=\"xsd:token\">\n      <xsd:pattern value=\"\\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\\}\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OnOff\">\n    <xsd:union memberTypes=\"xsd:boolean ST_OnOff1\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OnOff1\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"on\"/>\n      <xsd:enumeration value=\"off\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_String\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_XmlName\">\n    <xsd:restriction base=\"xsd:NCName\">\n      <xsd:minLength value=\"1\"/>\n      <xsd:maxLength value=\"255\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TrueFalse\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"f\"/>\n      <xsd:enumeration value=\"true\"/>\n      <xsd:enumeration value=\"false\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TrueFalseBlank\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"f\"/>\n      <xsd:enumeration value=\"true\"/>\n      <xsd:enumeration value=\"false\"/>\n      <xsd:enumeration value=\"\"/>\n      <xsd:enumeration value=\"True\"/>\n      <xsd:enumeration value=\"False\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UnsignedDecimalNumber\">\n    <xsd:restriction base=\"xsd:decimal\">\n      <xsd:minInclusive value=\"0\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TwipsMeasure\">\n    <xsd:union memberTypes=\"ST_UnsignedDecimalNumber ST_PositiveUniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VerticalAlignRun\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"baseline\"/>\n      <xsd:enumeration value=\"superscript\"/>\n      <xsd:enumeration value=\"subscript\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Xstring\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_XAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"inside\"/>\n      <xsd:enumeration value=\"outside\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_YAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"inline\"/>\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"inside\"/>\n      <xsd:enumeration value=\"outside\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConformanceClass\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"strict\"/>\n      <xsd:enumeration value=\"transitional\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UniversalMeasure\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"-?[0-9]+(\\.[0-9]+)?(mm|cm|in|pt|pc|pi)\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveUniversalMeasure\">\n    <xsd:restriction base=\"ST_UniversalMeasure\">\n      <xsd:pattern value=\"[0-9]+(\\.[0-9]+)?(mm|cm|in|pt|pc|pi)\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Percentage\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"-?[0-9]+(\\.[0-9]+)?%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FixedPercentage\">\n    <xsd:restriction base=\"ST_Percentage\">\n      <xsd:pattern value=\"-?((100)|([0-9][0-9]?))(\\.[0-9][0-9]?)?%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositivePercentage\">\n    <xsd:restriction base=\"ST_Percentage\">\n      <xsd:pattern value=\"[0-9]+(\\.[0-9]+)?%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PositiveFixedPercentage\">\n    <xsd:restriction base=\"ST_Percentage\">\n      <xsd:pattern value=\"((100)|([0-9][0-9]?))(\\.[0-9][0-9]?)?%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/customXml\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/customXml\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:complexType name=\"CT_DatastoreSchemaRef\">\n    <xsd:attribute name=\"uri\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DatastoreSchemaRefs\">\n    <xsd:sequence>\n      <xsd:element name=\"schemaRef\" type=\"CT_DatastoreSchemaRef\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DatastoreItem\">\n    <xsd:sequence>\n      <xsd:element name=\"schemaRefs\" type=\"CT_DatastoreSchemaRefs\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"itemID\" type=\"s:ST_Guid\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"datastoreItem\" type=\"CT_DatastoreItem\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\"\n  targetNamespace=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\"\n  attributeFormDefault=\"qualified\" elementFormDefault=\"qualified\">\n  <xsd:complexType name=\"CT_Schema\">\n    <xsd:attribute name=\"uri\" type=\"xsd:string\" default=\"\"/>\n    <xsd:attribute name=\"manifestLocation\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"schemaLocation\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"schemaLanguage\" type=\"xsd:token\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SchemaLibrary\">\n    <xsd:sequence>\n      <xsd:element name=\"schema\" type=\"CT_Schema\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"schemaLibrary\" type=\"CT_SchemaLibrary\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/custom-properties\"\n  xmlns:vt=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/custom-properties\"\n  blockDefault=\"#all\" elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n    schemaLocation=\"shared-documentPropertiesVariantTypes.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:element name=\"Properties\" type=\"CT_Properties\"/>\n  <xsd:complexType name=\"CT_Properties\">\n    <xsd:sequence>\n      <xsd:element name=\"property\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Property\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Property\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element ref=\"vt:vector\"/>\n      <xsd:element ref=\"vt:array\"/>\n      <xsd:element ref=\"vt:blob\"/>\n      <xsd:element ref=\"vt:oblob\"/>\n      <xsd:element ref=\"vt:empty\"/>\n      <xsd:element ref=\"vt:null\"/>\n      <xsd:element ref=\"vt:i1\"/>\n      <xsd:element ref=\"vt:i2\"/>\n      <xsd:element ref=\"vt:i4\"/>\n      <xsd:element ref=\"vt:i8\"/>\n      <xsd:element ref=\"vt:int\"/>\n      <xsd:element ref=\"vt:ui1\"/>\n      <xsd:element ref=\"vt:ui2\"/>\n      <xsd:element ref=\"vt:ui4\"/>\n      <xsd:element ref=\"vt:ui8\"/>\n      <xsd:element ref=\"vt:uint\"/>\n      <xsd:element ref=\"vt:r4\"/>\n      <xsd:element ref=\"vt:r8\"/>\n      <xsd:element ref=\"vt:decimal\"/>\n      <xsd:element ref=\"vt:lpstr\"/>\n      <xsd:element ref=\"vt:lpwstr\"/>\n      <xsd:element ref=\"vt:bstr\"/>\n      <xsd:element ref=\"vt:date\"/>\n      <xsd:element ref=\"vt:filetime\"/>\n      <xsd:element ref=\"vt:bool\"/>\n      <xsd:element ref=\"vt:cy\"/>\n      <xsd:element ref=\"vt:error\"/>\n      <xsd:element ref=\"vt:stream\"/>\n      <xsd:element ref=\"vt:ostream\"/>\n      <xsd:element ref=\"vt:storage\"/>\n      <xsd:element ref=\"vt:ostorage\"/>\n      <xsd:element ref=\"vt:vstream\"/>\n      <xsd:element ref=\"vt:clsid\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"fmtid\" use=\"required\" type=\"s:ST_Guid\"/>\n    <xsd:attribute name=\"pid\" use=\"required\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"name\" use=\"optional\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"linkTarget\" use=\"optional\" type=\"xsd:string\"/>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties\"\n  xmlns:vt=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties\"\n  elementFormDefault=\"qualified\" blockDefault=\"#all\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n    schemaLocation=\"shared-documentPropertiesVariantTypes.xsd\"/>\n  <xsd:element name=\"Properties\" type=\"CT_Properties\"/>\n  <xsd:complexType name=\"CT_Properties\">\n    <xsd:all>\n      <xsd:element name=\"Template\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"Manager\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"Company\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"Pages\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"Words\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"Characters\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"PresentationFormat\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"Lines\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"Paragraphs\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"Slides\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"Notes\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"TotalTime\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"HiddenSlides\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"MMClips\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"ScaleCrop\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:boolean\"/>\n      <xsd:element name=\"HeadingPairs\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_VectorVariant\"/>\n      <xsd:element name=\"TitlesOfParts\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_VectorLpstr\"/>\n      <xsd:element name=\"LinksUpToDate\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:boolean\"/>\n      <xsd:element name=\"CharactersWithSpaces\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n      <xsd:element name=\"SharedDoc\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:boolean\"/>\n      <xsd:element name=\"HyperlinkBase\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"HLinks\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_VectorVariant\"/>\n      <xsd:element name=\"HyperlinksChanged\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:boolean\"/>\n      <xsd:element name=\"DigSig\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_DigSigBlob\"/>\n      <xsd:element name=\"Application\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"AppVersion\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:string\"/>\n      <xsd:element name=\"DocSecurity\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xsd:int\"/>\n    </xsd:all>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VectorVariant\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element ref=\"vt:vector\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VectorLpstr\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element ref=\"vt:vector\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DigSigBlob\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element ref=\"vt:blob\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\"\n  blockDefault=\"#all\" elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:simpleType name=\"ST_VectorBaseType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"variant\"/>\n      <xsd:enumeration value=\"i1\"/>\n      <xsd:enumeration value=\"i2\"/>\n      <xsd:enumeration value=\"i4\"/>\n      <xsd:enumeration value=\"i8\"/>\n      <xsd:enumeration value=\"ui1\"/>\n      <xsd:enumeration value=\"ui2\"/>\n      <xsd:enumeration value=\"ui4\"/>\n      <xsd:enumeration value=\"ui8\"/>\n      <xsd:enumeration value=\"r4\"/>\n      <xsd:enumeration value=\"r8\"/>\n      <xsd:enumeration value=\"lpstr\"/>\n      <xsd:enumeration value=\"lpwstr\"/>\n      <xsd:enumeration value=\"bstr\"/>\n      <xsd:enumeration value=\"date\"/>\n      <xsd:enumeration value=\"filetime\"/>\n      <xsd:enumeration value=\"bool\"/>\n      <xsd:enumeration value=\"cy\"/>\n      <xsd:enumeration value=\"error\"/>\n      <xsd:enumeration value=\"clsid\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ArrayBaseType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"variant\"/>\n      <xsd:enumeration value=\"i1\"/>\n      <xsd:enumeration value=\"i2\"/>\n      <xsd:enumeration value=\"i4\"/>\n      <xsd:enumeration value=\"int\"/>\n      <xsd:enumeration value=\"ui1\"/>\n      <xsd:enumeration value=\"ui2\"/>\n      <xsd:enumeration value=\"ui4\"/>\n      <xsd:enumeration value=\"uint\"/>\n      <xsd:enumeration value=\"r4\"/>\n      <xsd:enumeration value=\"r8\"/>\n      <xsd:enumeration value=\"decimal\"/>\n      <xsd:enumeration value=\"bstr\"/>\n      <xsd:enumeration value=\"date\"/>\n      <xsd:enumeration value=\"bool\"/>\n      <xsd:enumeration value=\"cy\"/>\n      <xsd:enumeration value=\"error\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Cy\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"\\s*[0-9]*\\.[0-9]{4}\\s*\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Error\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"\\s*0x[0-9A-Za-z]{8}\\s*\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Empty\"/>\n  <xsd:complexType name=\"CT_Null\"/>\n  <xsd:complexType name=\"CT_Vector\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element ref=\"variant\"/>\n      <xsd:element ref=\"i1\"/>\n      <xsd:element ref=\"i2\"/>\n      <xsd:element ref=\"i4\"/>\n      <xsd:element ref=\"i8\"/>\n      <xsd:element ref=\"ui1\"/>\n      <xsd:element ref=\"ui2\"/>\n      <xsd:element ref=\"ui4\"/>\n      <xsd:element ref=\"ui8\"/>\n      <xsd:element ref=\"r4\"/>\n      <xsd:element ref=\"r8\"/>\n      <xsd:element ref=\"lpstr\"/>\n      <xsd:element ref=\"lpwstr\"/>\n      <xsd:element ref=\"bstr\"/>\n      <xsd:element ref=\"date\"/>\n      <xsd:element ref=\"filetime\"/>\n      <xsd:element ref=\"bool\"/>\n      <xsd:element ref=\"cy\"/>\n      <xsd:element ref=\"error\"/>\n      <xsd:element ref=\"clsid\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"baseType\" type=\"ST_VectorBaseType\" use=\"required\"/>\n    <xsd:attribute name=\"size\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Array\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element ref=\"variant\"/>\n      <xsd:element ref=\"i1\"/>\n      <xsd:element ref=\"i2\"/>\n      <xsd:element ref=\"i4\"/>\n      <xsd:element ref=\"int\"/>\n      <xsd:element ref=\"ui1\"/>\n      <xsd:element ref=\"ui2\"/>\n      <xsd:element ref=\"ui4\"/>\n      <xsd:element ref=\"uint\"/>\n      <xsd:element ref=\"r4\"/>\n      <xsd:element ref=\"r8\"/>\n      <xsd:element ref=\"decimal\"/>\n      <xsd:element ref=\"bstr\"/>\n      <xsd:element ref=\"date\"/>\n      <xsd:element ref=\"bool\"/>\n      <xsd:element ref=\"error\"/>\n      <xsd:element ref=\"cy\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"lBounds\" type=\"xsd:int\" use=\"required\"/>\n    <xsd:attribute name=\"uBounds\" type=\"xsd:int\" use=\"required\"/>\n    <xsd:attribute name=\"baseType\" type=\"ST_ArrayBaseType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Variant\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element ref=\"variant\"/>\n      <xsd:element ref=\"vector\"/>\n      <xsd:element ref=\"array\"/>\n      <xsd:element ref=\"blob\"/>\n      <xsd:element ref=\"oblob\"/>\n      <xsd:element ref=\"empty\"/>\n      <xsd:element ref=\"null\"/>\n      <xsd:element ref=\"i1\"/>\n      <xsd:element ref=\"i2\"/>\n      <xsd:element ref=\"i4\"/>\n      <xsd:element ref=\"i8\"/>\n      <xsd:element ref=\"int\"/>\n      <xsd:element ref=\"ui1\"/>\n      <xsd:element ref=\"ui2\"/>\n      <xsd:element ref=\"ui4\"/>\n      <xsd:element ref=\"ui8\"/>\n      <xsd:element ref=\"uint\"/>\n      <xsd:element ref=\"r4\"/>\n      <xsd:element ref=\"r8\"/>\n      <xsd:element ref=\"decimal\"/>\n      <xsd:element ref=\"lpstr\"/>\n      <xsd:element ref=\"lpwstr\"/>\n      <xsd:element ref=\"bstr\"/>\n      <xsd:element ref=\"date\"/>\n      <xsd:element ref=\"filetime\"/>\n      <xsd:element ref=\"bool\"/>\n      <xsd:element ref=\"cy\"/>\n      <xsd:element ref=\"error\"/>\n      <xsd:element ref=\"stream\"/>\n      <xsd:element ref=\"ostream\"/>\n      <xsd:element ref=\"storage\"/>\n      <xsd:element ref=\"ostorage\"/>\n      <xsd:element ref=\"vstream\"/>\n      <xsd:element ref=\"clsid\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Vstream\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"xsd:base64Binary\">\n        <xsd:attribute name=\"version\" type=\"s:ST_Guid\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:element name=\"variant\" type=\"CT_Variant\"/>\n  <xsd:element name=\"vector\" type=\"CT_Vector\"/>\n  <xsd:element name=\"array\" type=\"CT_Array\"/>\n  <xsd:element name=\"blob\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"oblob\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"empty\" type=\"CT_Empty\"/>\n  <xsd:element name=\"null\" type=\"CT_Null\"/>\n  <xsd:element name=\"i1\" type=\"xsd:byte\"/>\n  <xsd:element name=\"i2\" type=\"xsd:short\"/>\n  <xsd:element name=\"i4\" type=\"xsd:int\"/>\n  <xsd:element name=\"i8\" type=\"xsd:long\"/>\n  <xsd:element name=\"int\" type=\"xsd:int\"/>\n  <xsd:element name=\"ui1\" type=\"xsd:unsignedByte\"/>\n  <xsd:element name=\"ui2\" type=\"xsd:unsignedShort\"/>\n  <xsd:element name=\"ui4\" type=\"xsd:unsignedInt\"/>\n  <xsd:element name=\"ui8\" type=\"xsd:unsignedLong\"/>\n  <xsd:element name=\"uint\" type=\"xsd:unsignedInt\"/>\n  <xsd:element name=\"r4\" type=\"xsd:float\"/>\n  <xsd:element name=\"r8\" type=\"xsd:double\"/>\n  <xsd:element name=\"decimal\" type=\"xsd:decimal\"/>\n  <xsd:element name=\"lpstr\" type=\"xsd:string\"/>\n  <xsd:element name=\"lpwstr\" type=\"xsd:string\"/>\n  <xsd:element name=\"bstr\" type=\"xsd:string\"/>\n  <xsd:element name=\"date\" type=\"xsd:dateTime\"/>\n  <xsd:element name=\"filetime\" type=\"xsd:dateTime\"/>\n  <xsd:element name=\"bool\" type=\"xsd:boolean\"/>\n  <xsd:element name=\"cy\" type=\"ST_Cy\"/>\n  <xsd:element name=\"error\" type=\"ST_Error\"/>\n  <xsd:element name=\"stream\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"ostream\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"storage\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"ostorage\" type=\"xsd:base64Binary\"/>\n  <xsd:element name=\"vstream\" type=\"CT_Vstream\"/>\n  <xsd:element name=\"clsid\" type=\"s:ST_Guid\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/math\"\n  xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\"\n  xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/math\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n    schemaLocation=\"wml.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:import namespace=\"http://www.w3.org/XML/1998/namespace\" schemaLocation=\"xml.xsd\"/>\n  <xsd:simpleType name=\"ST_Integer255\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:minInclusive value=\"1\"/>\n      <xsd:maxInclusive value=\"255\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Integer255\">\n    <xsd:attribute name=\"val\" type=\"ST_Integer255\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Integer2\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:minInclusive value=\"-2\"/>\n      <xsd:maxInclusive value=\"2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Integer2\">\n    <xsd:attribute name=\"val\" type=\"ST_Integer2\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SpacingRule\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"4\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SpacingRule\">\n    <xsd:attribute name=\"val\" type=\"ST_SpacingRule\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_UnSignedInteger\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_UnSignedInteger\">\n    <xsd:attribute name=\"val\" type=\"ST_UnSignedInteger\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Char\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:maxLength value=\"1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Char\">\n    <xsd:attribute name=\"val\" type=\"ST_Char\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OnOff\">\n    <xsd:attribute name=\"val\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_String\">\n    <xsd:attribute name=\"val\" type=\"s:ST_String\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_XAlign\">\n    <xsd:attribute name=\"val\" type=\"s:ST_XAlign\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_YAlign\">\n    <xsd:attribute name=\"val\" type=\"s:ST_YAlign\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Shp\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"centered\"/>\n      <xsd:enumeration value=\"match\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Shp\">\n    <xsd:attribute name=\"val\" type=\"ST_Shp\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"bar\"/>\n      <xsd:enumeration value=\"skw\"/>\n      <xsd:enumeration value=\"lin\"/>\n      <xsd:enumeration value=\"noBar\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FType\">\n    <xsd:attribute name=\"val\" type=\"ST_FType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LimLoc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"undOvr\"/>\n      <xsd:enumeration value=\"subSup\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LimLoc\">\n    <xsd:attribute name=\"val\" type=\"ST_LimLoc\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TopBot\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"bot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TopBot\">\n    <xsd:attribute name=\"val\" type=\"ST_TopBot\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Script\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"roman\"/>\n      <xsd:enumeration value=\"script\"/>\n      <xsd:enumeration value=\"fraktur\"/>\n      <xsd:enumeration value=\"double-struck\"/>\n      <xsd:enumeration value=\"sans-serif\"/>\n      <xsd:enumeration value=\"monospace\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Script\">\n    <xsd:attribute name=\"val\" type=\"ST_Script\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Style\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"p\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"i\"/>\n      <xsd:enumeration value=\"bi\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Style\">\n    <xsd:attribute name=\"val\" type=\"ST_Style\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ManualBreak\">\n    <xsd:attribute name=\"alnAt\" type=\"ST_Integer255\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ScriptStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"scr\" minOccurs=\"0\" type=\"CT_Script\"/>\n      <xsd:element name=\"sty\" minOccurs=\"0\" type=\"CT_Style\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_RPR\">\n    <xsd:sequence>\n      <xsd:element name=\"lit\" minOccurs=\"0\" type=\"CT_OnOff\"/>\n      <xsd:choice>\n        <xsd:element name=\"nor\" minOccurs=\"0\" type=\"CT_OnOff\"/>\n        <xsd:sequence>\n          <xsd:group ref=\"EG_ScriptStyle\"/>\n        </xsd:sequence>\n      </xsd:choice>\n      <xsd:element name=\"brk\" minOccurs=\"0\" type=\"CT_ManualBreak\"/>\n      <xsd:element name=\"aln\" minOccurs=\"0\" type=\"CT_OnOff\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Text\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"s:ST_String\">\n        <xsd:attribute ref=\"xml:space\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_R\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_RPR\" minOccurs=\"0\"/>\n      <xsd:group ref=\"w:EG_RPr\" minOccurs=\"0\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:group ref=\"w:EG_RunInnerContent\"/>\n        <xsd:element name=\"t\" type=\"CT_Text\" minOccurs=\"0\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CtrlPr\">\n    <xsd:sequence>\n      <xsd:group ref=\"w:EG_RPrMath\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AccPr\">\n    <xsd:sequence>\n      <xsd:element name=\"chr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Acc\">\n    <xsd:sequence>\n      <xsd:element name=\"accPr\" type=\"CT_AccPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BarPr\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_TopBot\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Bar\">\n    <xsd:sequence>\n      <xsd:element name=\"barPr\" type=\"CT_BarPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BoxPr\">\n    <xsd:sequence>\n      <xsd:element name=\"opEmu\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noBreak\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"diff\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"brk\" type=\"CT_ManualBreak\" minOccurs=\"0\"/>\n      <xsd:element name=\"aln\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Box\">\n    <xsd:sequence>\n      <xsd:element name=\"boxPr\" type=\"CT_BoxPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BorderBoxPr\">\n    <xsd:sequence>\n      <xsd:element name=\"hideTop\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideBot\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideLeft\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideRight\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"strikeH\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"strikeV\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"strikeBLTR\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"strikeTLBR\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BorderBox\">\n    <xsd:sequence>\n      <xsd:element name=\"borderBoxPr\" type=\"CT_BorderBoxPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DPr\">\n    <xsd:sequence>\n      <xsd:element name=\"begChr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"sepChr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"endChr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"grow\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"shp\" type=\"CT_Shp\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_D\">\n    <xsd:sequence>\n      <xsd:element name=\"dPr\" type=\"CT_DPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EqArrPr\">\n    <xsd:sequence>\n      <xsd:element name=\"baseJc\" type=\"CT_YAlign\" minOccurs=\"0\"/>\n      <xsd:element name=\"maxDist\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"objDist\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"rSpRule\" type=\"CT_SpacingRule\" minOccurs=\"0\"/>\n      <xsd:element name=\"rSp\" type=\"CT_UnSignedInteger\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EqArr\">\n    <xsd:sequence>\n      <xsd:element name=\"eqArrPr\" type=\"CT_EqArrPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FPr\">\n    <xsd:sequence>\n      <xsd:element name=\"type\" type=\"CT_FType\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_F\">\n    <xsd:sequence>\n      <xsd:element name=\"fPr\" type=\"CT_FPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"num\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"den\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FuncPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Func\">\n    <xsd:sequence>\n      <xsd:element name=\"funcPr\" type=\"CT_FuncPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"fName\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupChrPr\">\n    <xsd:sequence>\n      <xsd:element name=\"chr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"pos\" type=\"CT_TopBot\" minOccurs=\"0\"/>\n      <xsd:element name=\"vertJc\" type=\"CT_TopBot\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupChr\">\n    <xsd:sequence>\n      <xsd:element name=\"groupChrPr\" type=\"CT_GroupChrPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LimLowPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LimLow\">\n    <xsd:sequence>\n      <xsd:element name=\"limLowPr\" type=\"CT_LimLowPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"lim\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LimUppPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LimUpp\">\n    <xsd:sequence>\n      <xsd:element name=\"limUppPr\" type=\"CT_LimUppPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"lim\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MCPr\">\n    <xsd:sequence>\n      <xsd:element name=\"count\" type=\"CT_Integer255\" minOccurs=\"0\"/>\n      <xsd:element name=\"mcJc\" type=\"CT_XAlign\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MC\">\n    <xsd:sequence>\n      <xsd:element name=\"mcPr\" type=\"CT_MCPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MCS\">\n    <xsd:sequence>\n      <xsd:element name=\"mc\" type=\"CT_MC\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MPr\">\n    <xsd:sequence>\n      <xsd:element name=\"baseJc\" type=\"CT_YAlign\" minOccurs=\"0\"/>\n      <xsd:element name=\"plcHide\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"rSpRule\" type=\"CT_SpacingRule\" minOccurs=\"0\"/>\n      <xsd:element name=\"cGpRule\" type=\"CT_SpacingRule\" minOccurs=\"0\"/>\n      <xsd:element name=\"rSp\" type=\"CT_UnSignedInteger\" minOccurs=\"0\"/>\n      <xsd:element name=\"cSp\" type=\"CT_UnSignedInteger\" minOccurs=\"0\"/>\n      <xsd:element name=\"cGp\" type=\"CT_UnSignedInteger\" minOccurs=\"0\"/>\n      <xsd:element name=\"mcs\" type=\"CT_MCS\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MR\">\n    <xsd:sequence>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_M\">\n    <xsd:sequence>\n      <xsd:element name=\"mPr\" type=\"CT_MPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"mr\" type=\"CT_MR\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NaryPr\">\n    <xsd:sequence>\n      <xsd:element name=\"chr\" type=\"CT_Char\" minOccurs=\"0\"/>\n      <xsd:element name=\"limLoc\" type=\"CT_LimLoc\" minOccurs=\"0\"/>\n      <xsd:element name=\"grow\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"subHide\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"supHide\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Nary\">\n    <xsd:sequence>\n      <xsd:element name=\"naryPr\" type=\"CT_NaryPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"sub\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sup\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PhantPr\">\n    <xsd:sequence>\n      <xsd:element name=\"show\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"zeroWid\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"zeroAsc\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"zeroDesc\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"transp\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Phant\">\n    <xsd:sequence>\n      <xsd:element name=\"phantPr\" type=\"CT_PhantPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RadPr\">\n    <xsd:sequence>\n      <xsd:element name=\"degHide\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rad\">\n    <xsd:sequence>\n      <xsd:element name=\"radPr\" type=\"CT_RadPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"deg\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SPrePr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SPre\">\n    <xsd:sequence>\n      <xsd:element name=\"sPrePr\" type=\"CT_SPrePr\" minOccurs=\"0\"/>\n      <xsd:element name=\"sub\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sup\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSubPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSub\">\n    <xsd:sequence>\n      <xsd:element name=\"sSubPr\" type=\"CT_SSubPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sub\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSubSupPr\">\n    <xsd:sequence>\n      <xsd:element name=\"alnScr\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSubSup\">\n    <xsd:sequence>\n      <xsd:element name=\"sSubSupPr\" type=\"CT_SSubSupPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sub\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sup\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSupPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SSup\">\n    <xsd:sequence>\n      <xsd:element name=\"sSupPr\" type=\"CT_SSupPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"e\" type=\"CT_OMathArg\"/>\n      <xsd:element name=\"sup\" type=\"CT_OMathArg\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_OMathMathElements\">\n    <xsd:choice>\n      <xsd:element name=\"acc\" type=\"CT_Acc\"/>\n      <xsd:element name=\"bar\" type=\"CT_Bar\"/>\n      <xsd:element name=\"box\" type=\"CT_Box\"/>\n      <xsd:element name=\"borderBox\" type=\"CT_BorderBox\"/>\n      <xsd:element name=\"d\" type=\"CT_D\"/>\n      <xsd:element name=\"eqArr\" type=\"CT_EqArr\"/>\n      <xsd:element name=\"f\" type=\"CT_F\"/>\n      <xsd:element name=\"func\" type=\"CT_Func\"/>\n      <xsd:element name=\"groupChr\" type=\"CT_GroupChr\"/>\n      <xsd:element name=\"limLow\" type=\"CT_LimLow\"/>\n      <xsd:element name=\"limUpp\" type=\"CT_LimUpp\"/>\n      <xsd:element name=\"m\" type=\"CT_M\"/>\n      <xsd:element name=\"nary\" type=\"CT_Nary\"/>\n      <xsd:element name=\"phant\" type=\"CT_Phant\"/>\n      <xsd:element name=\"rad\" type=\"CT_Rad\"/>\n      <xsd:element name=\"sPre\" type=\"CT_SPre\"/>\n      <xsd:element name=\"sSub\" type=\"CT_SSub\"/>\n      <xsd:element name=\"sSubSup\" type=\"CT_SSubSup\"/>\n      <xsd:element name=\"sSup\" type=\"CT_SSup\"/>\n      <xsd:element name=\"r\" type=\"CT_R\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_OMathElements\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_OMathMathElements\"/>\n      <xsd:group ref=\"w:EG_PContentMath\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_OMathArgPr\">\n    <xsd:sequence>\n      <xsd:element name=\"argSz\" type=\"CT_Integer2\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OMathArg\">\n    <xsd:sequence>\n      <xsd:element name=\"argPr\" type=\"CT_OMathArgPr\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_OMathElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"ctrlPr\" type=\"CT_CtrlPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Jc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"centerGroup\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OMathJc\">\n    <xsd:attribute name=\"val\" type=\"ST_Jc\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OMathParaPr\">\n    <xsd:sequence>\n      <xsd:element name=\"jc\" type=\"CT_OMathJc\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TwipsMeasure\">\n    <xsd:attribute name=\"val\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BreakBin\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"before\"/>\n      <xsd:enumeration value=\"after\"/>\n      <xsd:enumeration value=\"repeat\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BreakBin\">\n    <xsd:attribute name=\"val\" type=\"ST_BreakBin\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BreakBinSub\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"--\"/>\n      <xsd:enumeration value=\"-+\"/>\n      <xsd:enumeration value=\"+-\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BreakBinSub\">\n    <xsd:attribute name=\"val\" type=\"ST_BreakBinSub\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MathPr\">\n    <xsd:sequence>\n      <xsd:element name=\"mathFont\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"brkBin\" type=\"CT_BreakBin\" minOccurs=\"0\"/>\n      <xsd:element name=\"brkBinSub\" type=\"CT_BreakBinSub\" minOccurs=\"0\"/>\n      <xsd:element name=\"smallFrac\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"dispDef\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"lMargin\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"rMargin\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"defJc\" type=\"CT_OMathJc\" minOccurs=\"0\"/>\n      <xsd:element name=\"preSp\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"postSp\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"interSp\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"intraSp\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:choice minOccurs=\"0\">\n        <xsd:element name=\"wrapIndent\" type=\"CT_TwipsMeasure\"/>\n        <xsd:element name=\"wrapRight\" type=\"CT_OnOff\"/>\n      </xsd:choice>\n      <xsd:element name=\"intLim\" type=\"CT_LimLoc\" minOccurs=\"0\"/>\n      <xsd:element name=\"naryLim\" type=\"CT_LimLoc\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"mathPr\" type=\"CT_MathPr\"/>\n  <xsd:complexType name=\"CT_OMathPara\">\n    <xsd:sequence>\n      <xsd:element name=\"oMathParaPr\" type=\"CT_OMathParaPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"oMath\" type=\"CT_OMath\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OMath\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_OMathElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"oMathPara\" type=\"CT_OMathPara\"/>\n  <xsd:element name=\"oMath\" type=\"CT_OMath\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  elementFormDefault=\"qualified\"\n  targetNamespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  blockDefault=\"#all\">\n  <xsd:simpleType name=\"ST_RelationshipId\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:attribute name=\"id\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"embed\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"link\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"dm\" type=\"ST_RelationshipId\" default=\"\"/>\n  <xsd:attribute name=\"lo\" type=\"ST_RelationshipId\" default=\"\"/>\n  <xsd:attribute name=\"qs\" type=\"ST_RelationshipId\" default=\"\"/>\n  <xsd:attribute name=\"cs\" type=\"ST_RelationshipId\" default=\"\"/>\n  <xsd:attribute name=\"blip\" type=\"ST_RelationshipId\" default=\"\"/>\n  <xsd:attribute name=\"pict\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"href\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"topLeft\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"topRight\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"bottomLeft\" type=\"ST_RelationshipId\"/>\n  <xsd:attribute name=\"bottomRight\" type=\"ST_RelationshipId\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:xdr=\"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"\n  elementFormDefault=\"qualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:import \n    namespace=\"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing\"\n    schemaLocation=\"dml-spreadsheetDrawing.xsd\"/>\n  <xsd:complexType name=\"CT_AutoFilter\">\n    <xsd:sequence>\n      <xsd:element name=\"filterColumn\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_FilterColumn\"/>\n      <xsd:element name=\"sortState\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_SortState\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FilterColumn\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"filters\" type=\"CT_Filters\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"top10\" type=\"CT_Top10\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customFilters\" type=\"CT_CustomFilters\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dynamicFilter\" type=\"CT_DynamicFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"colorFilter\" type=\"CT_ColorFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"iconFilter\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_IconFilter\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"colId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"hiddenButton\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showButton\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Filters\">\n    <xsd:sequence>\n      <xsd:element name=\"filter\" type=\"CT_Filter\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dateGroupItem\" type=\"CT_DateGroupItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n    <xsd:attribute name=\"blank\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"calendarType\" type=\"s:ST_CalendarType\" use=\"optional\" default=\"none\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Filter\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomFilters\">\n    <xsd:sequence>\n      <xsd:element name=\"customFilter\" type=\"CT_CustomFilter\" minOccurs=\"1\" maxOccurs=\"2\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"and\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomFilter\">\n    <xsd:attribute name=\"operator\" type=\"ST_FilterOperator\" default=\"equal\" use=\"optional\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Top10\">\n    <xsd:attribute name=\"top\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"percent\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"filterVal\" type=\"xsd:double\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorFilter\">\n    <xsd:attribute name=\"dxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"cellColor\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IconFilter\">\n    <xsd:attribute name=\"iconSet\" type=\"ST_IconSetType\" use=\"required\"/>\n    <xsd:attribute name=\"iconId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FilterOperator\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"equal\"/>\n      <xsd:enumeration value=\"lessThan\"/>\n      <xsd:enumeration value=\"lessThanOrEqual\"/>\n      <xsd:enumeration value=\"notEqual\"/>\n      <xsd:enumeration value=\"greaterThanOrEqual\"/>\n      <xsd:enumeration value=\"greaterThan\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DynamicFilter\">\n    <xsd:attribute name=\"type\" type=\"ST_DynamicFilterType\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"valIso\" type=\"xsd:dateTime\" use=\"optional\"/>\n    <xsd:attribute name=\"maxVal\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"maxValIso\" type=\"xsd:dateTime\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DynamicFilterType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"null\"/>\n      <xsd:enumeration value=\"aboveAverage\"/>\n      <xsd:enumeration value=\"belowAverage\"/>\n      <xsd:enumeration value=\"tomorrow\"/>\n      <xsd:enumeration value=\"today\"/>\n      <xsd:enumeration value=\"yesterday\"/>\n      <xsd:enumeration value=\"nextWeek\"/>\n      <xsd:enumeration value=\"thisWeek\"/>\n      <xsd:enumeration value=\"lastWeek\"/>\n      <xsd:enumeration value=\"nextMonth\"/>\n      <xsd:enumeration value=\"thisMonth\"/>\n      <xsd:enumeration value=\"lastMonth\"/>\n      <xsd:enumeration value=\"nextQuarter\"/>\n      <xsd:enumeration value=\"thisQuarter\"/>\n      <xsd:enumeration value=\"lastQuarter\"/>\n      <xsd:enumeration value=\"nextYear\"/>\n      <xsd:enumeration value=\"thisYear\"/>\n      <xsd:enumeration value=\"lastYear\"/>\n      <xsd:enumeration value=\"yearToDate\"/>\n      <xsd:enumeration value=\"Q1\"/>\n      <xsd:enumeration value=\"Q2\"/>\n      <xsd:enumeration value=\"Q3\"/>\n      <xsd:enumeration value=\"Q4\"/>\n      <xsd:enumeration value=\"M1\"/>\n      <xsd:enumeration value=\"M2\"/>\n      <xsd:enumeration value=\"M3\"/>\n      <xsd:enumeration value=\"M4\"/>\n      <xsd:enumeration value=\"M5\"/>\n      <xsd:enumeration value=\"M6\"/>\n      <xsd:enumeration value=\"M7\"/>\n      <xsd:enumeration value=\"M8\"/>\n      <xsd:enumeration value=\"M9\"/>\n      <xsd:enumeration value=\"M10\"/>\n      <xsd:enumeration value=\"M11\"/>\n      <xsd:enumeration value=\"M12\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_IconSetType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"3Arrows\"/>\n      <xsd:enumeration value=\"3ArrowsGray\"/>\n      <xsd:enumeration value=\"3Flags\"/>\n      <xsd:enumeration value=\"3TrafficLights1\"/>\n      <xsd:enumeration value=\"3TrafficLights2\"/>\n      <xsd:enumeration value=\"3Signs\"/>\n      <xsd:enumeration value=\"3Symbols\"/>\n      <xsd:enumeration value=\"3Symbols2\"/>\n      <xsd:enumeration value=\"4Arrows\"/>\n      <xsd:enumeration value=\"4ArrowsGray\"/>\n      <xsd:enumeration value=\"4RedToBlack\"/>\n      <xsd:enumeration value=\"4Rating\"/>\n      <xsd:enumeration value=\"4TrafficLights\"/>\n      <xsd:enumeration value=\"5Arrows\"/>\n      <xsd:enumeration value=\"5ArrowsGray\"/>\n      <xsd:enumeration value=\"5Rating\"/>\n      <xsd:enumeration value=\"5Quarters\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SortState\">\n    <xsd:sequence>\n      <xsd:element name=\"sortCondition\" minOccurs=\"0\" maxOccurs=\"64\" type=\"CT_SortCondition\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"columnSort\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"caseSensitive\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"sortMethod\" type=\"ST_SortMethod\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SortCondition\">\n    <xsd:attribute name=\"descending\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"sortBy\" type=\"ST_SortBy\" use=\"optional\" default=\"value\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"customList\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"dxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"iconSet\" type=\"ST_IconSetType\" use=\"optional\" default=\"3Arrows\"/>\n    <xsd:attribute name=\"iconId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SortBy\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"value\"/>\n      <xsd:enumeration value=\"cellColor\"/>\n      <xsd:enumeration value=\"fontColor\"/>\n      <xsd:enumeration value=\"icon\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_SortMethod\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"stroke\"/>\n      <xsd:enumeration value=\"pinYin\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DateGroupItem\">\n    <xsd:attribute name=\"year\" type=\"xsd:unsignedShort\" use=\"required\"/>\n    <xsd:attribute name=\"month\" type=\"xsd:unsignedShort\" use=\"optional\"/>\n    <xsd:attribute name=\"day\" type=\"xsd:unsignedShort\" use=\"optional\"/>\n    <xsd:attribute name=\"hour\" type=\"xsd:unsignedShort\" use=\"optional\"/>\n    <xsd:attribute name=\"minute\" type=\"xsd:unsignedShort\" use=\"optional\"/>\n    <xsd:attribute name=\"second\" type=\"xsd:unsignedShort\" use=\"optional\"/>\n    <xsd:attribute name=\"dateTimeGrouping\" type=\"ST_DateTimeGrouping\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DateTimeGrouping\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"year\"/>\n      <xsd:enumeration value=\"month\"/>\n      <xsd:enumeration value=\"day\"/>\n      <xsd:enumeration value=\"hour\"/>\n      <xsd:enumeration value=\"minute\"/>\n      <xsd:enumeration value=\"second\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CellRef\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Ref\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RefA\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Sqref\">\n    <xsd:list itemType=\"ST_Ref\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Formula\">\n    <xsd:restriction base=\"s:ST_Xstring\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UnsignedIntHex\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"4\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UnsignedShortHex\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_XStringElement\">\n    <xsd:attribute name=\"v\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Extension\">\n    <xsd:sequence>\n      <xsd:any processContents=\"lax\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"xsd:token\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ObjectAnchor\">\n    <xsd:sequence>\n      <xsd:element ref=\"xdr:from\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element ref=\"xdr:to\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"moveWithCells\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"sizeWithCells\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ExtensionList\">\n    <xsd:sequence>\n      <xsd:element name=\"ext\" type=\"CT_Extension\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_ExtensionList\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ExtensionList\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"calcChain\" type=\"CT_CalcChain\"/>\n  <xsd:complexType name=\"CT_CalcChain\">\n    <xsd:sequence>\n      <xsd:element name=\"c\" type=\"CT_CalcCell\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalcCell\">\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"l\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"t\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"a\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:element name=\"comments\" type=\"CT_Comments\"/>\n  <xsd:complexType name=\"CT_Comments\">\n    <xsd:sequence>\n      <xsd:element name=\"authors\" type=\"CT_Authors\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"commentList\" type=\"CT_CommentList\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Authors\">\n    <xsd:sequence>\n      <xsd:element name=\"author\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommentList\">\n    <xsd:sequence>\n      <xsd:element name=\"comment\" type=\"CT_Comment\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Comment\">\n    <xsd:sequence>\n      <xsd:element name=\"text\" type=\"CT_Rst\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"commentPr\" type=\"CT_CommentPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"authorId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"optional\"/>\n    <xsd:attribute name=\"shapeId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CommentPr\">\n    <xsd:sequence>\n      <xsd:element name=\"anchor\" type=\"CT_ObjectAnchor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"defaultSize\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"print\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"disabled\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoFill\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoLine\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"altText\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"textHAlign\" type=\"ST_TextHAlign\" use=\"optional\" default=\"left\"/>\n    <xsd:attribute name=\"textVAlign\" type=\"ST_TextVAlign\" use=\"optional\" default=\"top\"/>\n    <xsd:attribute name=\"lockText\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"justLastX\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoScale\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextHAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"justify\"/>\n      <xsd:enumeration value=\"distributed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextVAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"justify\"/>\n      <xsd:enumeration value=\"distributed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"MapInfo\" type=\"CT_MapInfo\"/>\n  <xsd:complexType name=\"CT_MapInfo\">\n    <xsd:sequence>\n      <xsd:element name=\"Schema\" type=\"CT_Schema\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"Map\" type=\"CT_Map\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"SelectionNamespaces\" type=\"xsd:string\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Schema\" mixed=\"true\">\n    <xsd:sequence>\n      <xsd:any/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ID\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"SchemaRef\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"Namespace\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"SchemaLanguage\" type=\"xsd:token\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Map\">\n    <xsd:sequence>\n      <xsd:element name=\"DataBinding\" type=\"CT_DataBinding\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ID\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"Name\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"RootElement\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"SchemaID\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"ShowImportExportValidationErrors\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"AutoFit\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"Append\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"PreserveSortAFLayout\" type=\"xsd:boolean\" use=\"required\"/>\n    <xsd:attribute name=\"PreserveFormat\" type=\"xsd:boolean\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataBinding\">\n    <xsd:sequence>\n      <xsd:any/>\n    </xsd:sequence>\n    <xsd:attribute name=\"DataBindingName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"FileBinding\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"ConnectionID\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"FileBindingName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"DataBindingLoadMode\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"connections\" type=\"CT_Connections\"/>\n  <xsd:complexType name=\"CT_Connections\">\n    <xsd:sequence>\n      <xsd:element name=\"connection\" minOccurs=\"1\" maxOccurs=\"unbounded\" type=\"CT_Connection\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Connection\">\n    <xsd:sequence>\n      <xsd:element name=\"dbPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_DbPr\"/>\n      <xsd:element name=\"olapPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_OlapPr\"/>\n      <xsd:element name=\"webPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_WebPr\"/>\n      <xsd:element name=\"textPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_TextPr\"/>\n      <xsd:element name=\"parameters\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_Parameters\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"sourceFile\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"odcFile\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"keepAlive\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"interval\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"name\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"description\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"type\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"reconnectionMethod\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"1\"/>\n    <xsd:attribute name=\"refreshedVersion\" use=\"required\" type=\"xsd:unsignedByte\"/>\n    <xsd:attribute name=\"minRefreshableVersion\" use=\"optional\" type=\"xsd:unsignedByte\" default=\"0\"/>\n    <xsd:attribute name=\"savePassword\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"new\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"deleted\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"onlyUseConnectionFile\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"background\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"refreshOnLoad\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"saveData\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"credentials\" use=\"optional\" type=\"ST_CredMethod\" default=\"integrated\"/>\n    <xsd:attribute name=\"singleSignOnId\" use=\"optional\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CredMethod\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"integrated\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"stored\"/>\n      <xsd:enumeration value=\"prompt\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DbPr\">\n    <xsd:attribute name=\"connection\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"command\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"serverCommand\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"commandType\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"2\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OlapPr\">\n    <xsd:attribute name=\"local\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"localConnection\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"localRefresh\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"sendLocale\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"rowDrillCount\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"serverFill\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"serverNumberFormat\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"serverFont\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"serverFontColor\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WebPr\">\n    <xsd:sequence>\n      <xsd:element name=\"tables\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_Tables\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"xml\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"sourceData\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"parsePre\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"consecutive\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"firstRow\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"xl97\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"textDates\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"xl2000\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"url\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"post\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"htmlTables\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"htmlFormat\" use=\"optional\" type=\"ST_HtmlFmt\" default=\"none\"/>\n    <xsd:attribute name=\"editPage\" use=\"optional\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HtmlFmt\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"rtf\"/>\n      <xsd:enumeration value=\"all\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Parameters\">\n    <xsd:sequence>\n      <xsd:element name=\"parameter\" minOccurs=\"1\" maxOccurs=\"unbounded\" type=\"CT_Parameter\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Parameter\">\n    <xsd:attribute name=\"name\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"sqlType\" use=\"optional\" type=\"xsd:int\" default=\"0\"/>\n    <xsd:attribute name=\"parameterType\" use=\"optional\" type=\"ST_ParameterType\" default=\"prompt\"/>\n    <xsd:attribute name=\"refreshOnChange\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"prompt\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"boolean\" use=\"optional\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"double\" use=\"optional\" type=\"xsd:double\"/>\n    <xsd:attribute name=\"integer\" use=\"optional\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"string\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cell\" use=\"optional\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ParameterType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"prompt\"/>\n      <xsd:enumeration value=\"value\"/>\n      <xsd:enumeration value=\"cell\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Tables\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"m\" type=\"CT_TableMissing\"/>\n      <xsd:element name=\"s\" type=\"CT_XStringElement\"/>\n      <xsd:element name=\"x\" type=\"CT_Index\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"count\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableMissing\"/>\n  <xsd:complexType name=\"CT_TextPr\">\n    <xsd:sequence>\n      <xsd:element name=\"textFields\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_TextFields\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"prompt\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"fileType\" use=\"optional\" type=\"ST_FileType\" default=\"win\"/>\n    <xsd:attribute name=\"codePage\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"1252\"/>\n    <xsd:attribute name=\"characterSet\" use=\"optional\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"firstRow\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"1\"/>\n    <xsd:attribute name=\"sourceFile\" use=\"optional\" type=\"s:ST_Xstring\" default=\"\"/>\n    <xsd:attribute name=\"delimited\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"decimal\" use=\"optional\" type=\"s:ST_Xstring\" default=\".\"/>\n    <xsd:attribute name=\"thousands\" use=\"optional\" type=\"s:ST_Xstring\" default=\",\"/>\n    <xsd:attribute name=\"tab\" use=\"optional\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"space\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"comma\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"semicolon\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"consecutive\" use=\"optional\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"qualifier\" use=\"optional\" type=\"ST_Qualifier\" default=\"doubleQuote\"/>\n    <xsd:attribute name=\"delimiter\" use=\"optional\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FileType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"mac\"/>\n      <xsd:enumeration value=\"win\"/>\n      <xsd:enumeration value=\"dos\"/>\n      <xsd:enumeration value=\"lin\"/>\n      <xsd:enumeration value=\"other\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Qualifier\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"doubleQuote\"/>\n      <xsd:enumeration value=\"singleQuote\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextFields\">\n    <xsd:sequence>\n      <xsd:element name=\"textField\" minOccurs=\"1\" maxOccurs=\"unbounded\" type=\"CT_TextField\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextField\">\n    <xsd:attribute name=\"type\" use=\"optional\" type=\"ST_ExternalConnectionType\" default=\"general\"/>\n    <xsd:attribute name=\"position\" use=\"optional\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ExternalConnectionType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"general\"/>\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"MDY\"/>\n      <xsd:enumeration value=\"DMY\"/>\n      <xsd:enumeration value=\"YMD\"/>\n      <xsd:enumeration value=\"MYD\"/>\n      <xsd:enumeration value=\"DYM\"/>\n      <xsd:enumeration value=\"YDM\"/>\n      <xsd:enumeration value=\"skip\"/>\n      <xsd:enumeration value=\"EMD\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"pivotCacheDefinition\" type=\"CT_PivotCacheDefinition\"/>\n  <xsd:element name=\"pivotCacheRecords\" type=\"CT_PivotCacheRecords\"/>\n  <xsd:element name=\"pivotTableDefinition\" type=\"CT_pivotTableDefinition\"/>\n  <xsd:complexType name=\"CT_PivotCacheDefinition\">\n    <xsd:sequence>\n      <xsd:element name=\"cacheSource\" type=\"CT_CacheSource\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cacheFields\" type=\"CT_CacheFields\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cacheHierarchies\" minOccurs=\"0\" type=\"CT_CacheHierarchies\"/>\n      <xsd:element name=\"kpis\" minOccurs=\"0\" type=\"CT_PCDKPIs\"/>\n      <xsd:element name=\"tupleCache\" minOccurs=\"0\" type=\"CT_TupleCache\"/>\n      <xsd:element name=\"calculatedItems\" minOccurs=\"0\" type=\"CT_CalculatedItems\"/>\n      <xsd:element name=\"calculatedMembers\" type=\"CT_CalculatedMembers\" minOccurs=\"0\"/>\n      <xsd:element name=\"dimensions\" type=\"CT_Dimensions\" minOccurs=\"0\"/>\n      <xsd:element name=\"measureGroups\" type=\"CT_MeasureGroups\" minOccurs=\"0\"/>\n      <xsd:element name=\"maps\" type=\"CT_MeasureDimensionMaps\" minOccurs=\"0\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"invalid\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"saveData\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"refreshOnLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"optimizeMemory\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"enableRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"refreshedBy\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"refreshedDate\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"refreshedDateIso\" type=\"xsd:dateTime\" use=\"optional\"/>\n    <xsd:attribute name=\"backgroundQuery\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"missingItemsLimit\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"createdVersion\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"refreshedVersion\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"minRefreshableVersion\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"recordCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"upgradeOnRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"tupleCache\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"supportSubquery\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"supportAdvancedDrill\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CacheFields\">\n    <xsd:sequence>\n      <xsd:element name=\"cacheField\" type=\"CT_CacheField\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CacheField\">\n    <xsd:sequence>\n      <xsd:element name=\"sharedItems\" type=\"CT_SharedItems\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fieldGroup\" minOccurs=\"0\" type=\"CT_FieldGroup\"/>\n      <xsd:element name=\"mpMap\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"caption\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"propertyName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"serverField\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"uniqueList\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n    <xsd:attribute name=\"formula\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sqlType\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"hierarchy\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"level\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"databaseField\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"mappingCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"memberPropertyField\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CacheSource\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n      <xsd:element name=\"worksheetSource\" type=\"CT_WorksheetSource\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"consolidation\" type=\"CT_Consolidation\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"type\" type=\"ST_SourceType\" use=\"required\"/>\n    <xsd:attribute name=\"connectionId\" type=\"xsd:unsignedInt\" default=\"0\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SourceType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"worksheet\"/>\n      <xsd:enumeration value=\"external\"/>\n      <xsd:enumeration value=\"consolidation\"/>\n      <xsd:enumeration value=\"scenario\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WorksheetSource\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sheet\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Consolidation\">\n    <xsd:sequence>\n      <xsd:element name=\"pages\" type=\"CT_Pages\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rangeSets\" type=\"CT_RangeSets\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"autoPage\" type=\"xsd:boolean\" default=\"true\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Pages\">\n    <xsd:sequence>\n      <xsd:element name=\"page\" type=\"CT_PCDSCPage\" minOccurs=\"1\" maxOccurs=\"4\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PCDSCPage\">\n    <xsd:sequence>\n      <xsd:element name=\"pageItem\" type=\"CT_PageItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageItem\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RangeSets\">\n    <xsd:sequence>\n      <xsd:element name=\"rangeSet\" type=\"CT_RangeSet\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RangeSet\">\n    <xsd:attribute name=\"i1\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"i2\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"i3\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"i4\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sheet\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SharedItems\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"m\" type=\"CT_Missing\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"n\" type=\"CT_Number\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"b\" type=\"CT_Boolean\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"e\" type=\"CT_Error\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"s\" type=\"CT_String\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"d\" type=\"CT_DateTime\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"containsSemiMixedTypes\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"containsNonDate\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"containsDate\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"containsString\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"containsBlank\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"containsMixedTypes\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"containsNumber\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"containsInteger\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"minValue\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"maxValue\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"minDate\" type=\"xsd:dateTime\" use=\"optional\"/>\n    <xsd:attribute name=\"maxDate\" type=\"xsd:dateTime\" use=\"optional\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"longText\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Missing\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Tuples\"/>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"in\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"bc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"fc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"un\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"st\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Number\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Tuples\"/>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"v\" use=\"required\" type=\"xsd:double\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"in\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"bc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"fc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"un\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"st\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Boolean\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"v\" use=\"required\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Error\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" type=\"CT_Tuples\"/>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"v\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"in\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"bc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"fc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"un\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"st\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_String\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Tuples\"/>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"v\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"in\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"bc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"fc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"un\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"st\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DateTime\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"v\" use=\"required\" type=\"xsd:dateTime\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"c\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cp\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FieldGroup\">\n    <xsd:sequence>\n      <xsd:element name=\"rangePr\" minOccurs=\"0\" type=\"CT_RangePr\"/>\n      <xsd:element name=\"discretePr\" minOccurs=\"0\" type=\"CT_DiscretePr\"/>\n      <xsd:element name=\"groupItems\" minOccurs=\"0\" type=\"CT_GroupItems\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"par\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"base\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RangePr\">\n    <xsd:attribute name=\"autoStart\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"autoEnd\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"groupBy\" type=\"ST_GroupBy\" default=\"range\"/>\n    <xsd:attribute name=\"startNum\" type=\"xsd:double\"/>\n    <xsd:attribute name=\"endNum\" type=\"xsd:double\"/>\n    <xsd:attribute name=\"startDate\" type=\"xsd:dateTime\"/>\n    <xsd:attribute name=\"endDate\" type=\"xsd:dateTime\"/>\n    <xsd:attribute name=\"groupInterval\" type=\"xsd:double\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_GroupBy\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"range\"/>\n      <xsd:enumeration value=\"seconds\"/>\n      <xsd:enumeration value=\"minutes\"/>\n      <xsd:enumeration value=\"hours\"/>\n      <xsd:enumeration value=\"days\"/>\n      <xsd:enumeration value=\"months\"/>\n      <xsd:enumeration value=\"quarters\"/>\n      <xsd:enumeration value=\"years\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DiscretePr\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" maxOccurs=\"unbounded\" type=\"CT_Index\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupItems\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"m\" type=\"CT_Missing\"/>\n      <xsd:element name=\"n\" type=\"CT_Number\"/>\n      <xsd:element name=\"b\" type=\"CT_Boolean\"/>\n      <xsd:element name=\"e\" type=\"CT_Error\"/>\n      <xsd:element name=\"s\" type=\"CT_String\"/>\n      <xsd:element name=\"d\" type=\"CT_DateTime\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotCacheRecords\">\n    <xsd:sequence>\n      <xsd:element name=\"r\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Record\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Record\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"m\" type=\"CT_Missing\"/>\n      <xsd:element name=\"n\" type=\"CT_Number\"/>\n      <xsd:element name=\"b\" type=\"CT_Boolean\"/>\n      <xsd:element name=\"e\" type=\"CT_Error\"/>\n      <xsd:element name=\"s\" type=\"CT_String\"/>\n      <xsd:element name=\"d\" type=\"CT_DateTime\"/>\n      <xsd:element name=\"x\" type=\"CT_Index\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PCDKPIs\">\n    <xsd:sequence>\n      <xsd:element name=\"kpi\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_PCDKPI\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PCDKPI\">\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"displayFolder\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"measureGroup\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"parent\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"value\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"goal\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"status\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"trend\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"weight\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"time\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CacheHierarchies\">\n    <xsd:sequence>\n      <xsd:element name=\"cacheHierarchy\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n        type=\"CT_CacheHierarchy\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CacheHierarchy\">\n    <xsd:sequence>\n      <xsd:element name=\"fieldsUsage\" minOccurs=\"0\" type=\"CT_FieldsUsage\"/>\n      <xsd:element name=\"groupLevels\" minOccurs=\"0\" type=\"CT_GroupLevels\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"measure\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"set\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"parentSet\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"iconSet\" type=\"xsd:int\" default=\"0\"/>\n    <xsd:attribute name=\"attribute\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"time\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"keyAttribute\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"defaultMemberUniqueName\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"allUniqueName\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"allCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"dimensionUniqueName\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"displayFolder\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"measureGroup\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"measures\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"count\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"oneField\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"memberValueDatatype\" use=\"optional\" type=\"xsd:unsignedShort\"/>\n    <xsd:attribute name=\"unbalanced\" use=\"optional\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"unbalancedGroup\" use=\"optional\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FieldsUsage\">\n    <xsd:sequence>\n      <xsd:element name=\"fieldUsage\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_FieldUsage\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FieldUsage\">\n    <xsd:attribute name=\"x\" use=\"required\" type=\"xsd:int\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupLevels\">\n    <xsd:sequence>\n      <xsd:element name=\"groupLevel\" maxOccurs=\"unbounded\" type=\"CT_GroupLevel\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupLevel\">\n    <xsd:sequence>\n      <xsd:element name=\"groups\" minOccurs=\"0\" type=\"CT_Groups\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"user\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"customRollUp\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Groups\">\n    <xsd:sequence>\n      <xsd:element name=\"group\" maxOccurs=\"unbounded\" type=\"CT_LevelGroup\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LevelGroup\">\n    <xsd:sequence>\n      <xsd:element name=\"groupMembers\" type=\"CT_GroupMembers\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"uniqueParent\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"id\" type=\"xsd:int\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupMembers\">\n    <xsd:sequence>\n      <xsd:element name=\"groupMember\" maxOccurs=\"unbounded\" type=\"CT_GroupMember\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GroupMember\">\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"group\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TupleCache\">\n    <xsd:sequence>\n      <xsd:element name=\"entries\" minOccurs=\"0\" type=\"CT_PCDSDTCEntries\"/>\n      <xsd:element name=\"sets\" minOccurs=\"0\" type=\"CT_Sets\"/>\n      <xsd:element name=\"queryCache\" minOccurs=\"0\" type=\"CT_QueryCache\"/>\n      <xsd:element name=\"serverFormats\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ServerFormats\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ServerFormat\">\n    <xsd:attribute name=\"culture\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"format\" use=\"optional\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ServerFormats\">\n    <xsd:sequence>\n      <xsd:element name=\"serverFormat\" type=\"CT_ServerFormat\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PCDSDTCEntries\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"m\" type=\"CT_Missing\"/>\n      <xsd:element name=\"n\" type=\"CT_Number\"/>\n      <xsd:element name=\"e\" type=\"CT_Error\"/>\n      <xsd:element name=\"s\" type=\"CT_String\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tuples\">\n    <xsd:sequence>\n      <xsd:element name=\"tpl\" type=\"CT_Tuple\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"c\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tuple\">\n    <xsd:attribute name=\"fld\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"hier\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"item\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Sets\">\n    <xsd:sequence>\n      <xsd:element name=\"set\" maxOccurs=\"unbounded\" type=\"CT_Set\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Set\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Tuples\"/>\n      <xsd:element name=\"sortByTuple\" minOccurs=\"0\" type=\"CT_Tuples\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"maxRank\" use=\"required\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"setDefinition\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"sortType\" type=\"ST_SortType\" default=\"none\"/>\n    <xsd:attribute name=\"queryFailed\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SortType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"ascending\"/>\n      <xsd:enumeration value=\"descending\"/>\n      <xsd:enumeration value=\"ascendingAlpha\"/>\n      <xsd:enumeration value=\"descendingAlpha\"/>\n      <xsd:enumeration value=\"ascendingNatural\"/>\n      <xsd:enumeration value=\"descendingNatural\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_QueryCache\">\n    <xsd:sequence>\n      <xsd:element name=\"query\" maxOccurs=\"unbounded\" type=\"CT_Query\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Query\">\n    <xsd:sequence>\n      <xsd:element name=\"tpls\" minOccurs=\"0\" type=\"CT_Tuples\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"mdx\" use=\"required\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalculatedItems\">\n    <xsd:sequence>\n      <xsd:element name=\"calculatedItem\" maxOccurs=\"unbounded\" type=\"CT_CalculatedItem\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalculatedItem\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" type=\"CT_PivotArea\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"field\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"formula\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalculatedMembers\">\n    <xsd:sequence>\n      <xsd:element name=\"calculatedMember\" maxOccurs=\"unbounded\" type=\"CT_CalculatedMember\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalculatedMember\">\n    <xsd:sequence minOccurs=\"0\">\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"mdx\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"memberName\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"hierarchy\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"parent\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"solveOrder\" type=\"xsd:int\" default=\"0\"/>\n    <xsd:attribute name=\"set\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_pivotTableDefinition\">\n    <xsd:sequence>\n      <xsd:element name=\"location\" type=\"CT_Location\"/>\n      <xsd:element name=\"pivotFields\" type=\"CT_PivotFields\" minOccurs=\"0\"/>\n      <xsd:element name=\"rowFields\" type=\"CT_RowFields\" minOccurs=\"0\"/>\n      <xsd:element name=\"rowItems\" type=\"CT_rowItems\" minOccurs=\"0\"/>\n      <xsd:element name=\"colFields\" type=\"CT_ColFields\" minOccurs=\"0\"/>\n      <xsd:element name=\"colItems\" type=\"CT_colItems\" minOccurs=\"0\"/>\n      <xsd:element name=\"pageFields\" type=\"CT_PageFields\" minOccurs=\"0\"/>\n      <xsd:element name=\"dataFields\" type=\"CT_DataFields\" minOccurs=\"0\"/>\n      <xsd:element name=\"formats\" type=\"CT_Formats\" minOccurs=\"0\"/>\n      <xsd:element name=\"conditionalFormats\" type=\"CT_ConditionalFormats\" minOccurs=\"0\"/>\n      <xsd:element name=\"chartFormats\" type=\"CT_ChartFormats\" minOccurs=\"0\"/>\n      <xsd:element name=\"pivotHierarchies\" type=\"CT_PivotHierarchies\" minOccurs=\"0\"/>\n      <xsd:element name=\"pivotTableStyleInfo\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_PivotTableStyle\"/>\n      <xsd:element name=\"filters\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_PivotFilters\"/>\n      <xsd:element name=\"rowHierarchiesUsage\" type=\"CT_RowHierarchiesUsage\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"colHierarchiesUsage\" type=\"CT_ColHierarchiesUsage\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cacheId\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"dataOnRows\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"dataPosition\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attributeGroup ref=\"AG_AutoFormat\"/>\n    <xsd:attribute name=\"dataCaption\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"grandTotalCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"errorCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"showError\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"missingCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"showMissing\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"pageStyle\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"pivotTableStyle\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"vacatedStyle\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"tag\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"updatedVersion\" type=\"xsd:unsignedByte\" default=\"0\"/>\n    <xsd:attribute name=\"minRefreshableVersion\" type=\"xsd:unsignedByte\" default=\"0\"/>\n    <xsd:attribute name=\"asteriskTotals\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showItems\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"editData\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"disableFieldList\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showCalcMbrs\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"visualTotals\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"showMultipleLabel\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"showDataDropDown\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"showDrill\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"printDrill\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showMemberPropertyTips\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"showDataTips\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"enableWizard\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"enableDrill\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"enableFieldProperties\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"preserveFormatting\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"useAutoFormatting\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"pageWrap\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"pageOverThenDown\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"subtotalHiddenItems\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"rowGrandTotals\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"colGrandTotals\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"fieldPrintTitles\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"itemPrintTitles\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"mergeItem\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showDropZones\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"createdVersion\" type=\"xsd:unsignedByte\" default=\"0\"/>\n    <xsd:attribute name=\"indent\" type=\"xsd:unsignedInt\" default=\"1\"/>\n    <xsd:attribute name=\"showEmptyRow\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showEmptyCol\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showHeaders\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"compact\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"outline\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"outlineData\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"compactData\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"published\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"gridDropZones\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"immersive\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"multipleFieldFilters\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"chartFormat\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"rowHeaderCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"colHeaderCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"fieldListSortAscending\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"mdxSubqueries\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"customListSort\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Location\">\n    <xsd:attribute name=\"ref\" use=\"required\" type=\"ST_Ref\"/>\n    <xsd:attribute name=\"firstHeaderRow\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"firstDataRow\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"firstDataCol\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"rowPageCount\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"colPageCount\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotFields\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotField\" maxOccurs=\"unbounded\" type=\"CT_PivotField\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotField\">\n    <xsd:sequence>\n      <xsd:element name=\"items\" minOccurs=\"0\" type=\"CT_Items\"/>\n      <xsd:element name=\"autoSortScope\" minOccurs=\"0\" type=\"CT_AutoSortScope\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"axis\" use=\"optional\" type=\"ST_Axis\"/>\n    <xsd:attribute name=\"dataField\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"subtotalCaption\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"showDropDowns\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"hiddenLevel\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"uniqueMemberProperty\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"compact\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"allDrilled\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n    <xsd:attribute name=\"outline\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"subtotalTop\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToRow\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToCol\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"multipleItemSelectionAllowed\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"dragToPage\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToData\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragOff\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"showAll\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"insertBlankRow\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"serverField\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"insertPageBreak\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"autoShow\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"topAutoShow\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"hideNewItems\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"measureFilter\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"includeNewItemsInFilter\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"itemPageCount\" type=\"xsd:unsignedInt\" default=\"10\"/>\n    <xsd:attribute name=\"sortType\" type=\"ST_FieldSortType\" default=\"manual\"/>\n    <xsd:attribute name=\"dataSourceSort\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"nonAutoSortDefault\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"rankBy\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"defaultSubtotal\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"sumSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"countASubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"avgSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"maxSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"minSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"productSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"countSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"stdDevSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"stdDevPSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"varSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"varPSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showPropCell\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showPropTip\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showPropAsCaption\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"defaultAttributeDrillState\" type=\"xsd:boolean\" use=\"optional\"\n      default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AutoSortScope\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" type=\"CT_PivotArea\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Items\">\n    <xsd:sequence>\n      <xsd:element name=\"item\" maxOccurs=\"unbounded\" type=\"CT_Item\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Item\">\n    <xsd:attribute name=\"n\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"t\" type=\"ST_ItemType\" default=\"data\"/>\n    <xsd:attribute name=\"h\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"sd\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"f\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"m\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"c\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"x\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"d\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"e\" type=\"xsd:boolean\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageFields\">\n    <xsd:sequence>\n      <xsd:element name=\"pageField\" maxOccurs=\"unbounded\" type=\"CT_PageField\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageField\">\n    <xsd:sequence minOccurs=\"0\">\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"fld\" use=\"required\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"item\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"hier\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"cap\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataFields\">\n    <xsd:sequence>\n      <xsd:element name=\"dataField\" maxOccurs=\"unbounded\" type=\"CT_DataField\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataField\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" use=\"optional\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"fld\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"subtotal\" type=\"ST_DataConsolidateFunction\" default=\"sum\"/>\n    <xsd:attribute name=\"showDataAs\" type=\"ST_ShowDataAs\" default=\"normal\"/>\n    <xsd:attribute name=\"baseField\" type=\"xsd:int\" default=\"-1\"/>\n    <xsd:attribute name=\"baseItem\" type=\"xsd:unsignedInt\" default=\"1048832\"/>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_rowItems\">\n    <xsd:sequence>\n      <xsd:element name=\"i\" maxOccurs=\"unbounded\" type=\"CT_I\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_colItems\">\n    <xsd:sequence>\n      <xsd:element name=\"i\" maxOccurs=\"unbounded\" type=\"CT_I\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_I\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_X\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"t\" type=\"ST_ItemType\" default=\"data\"/>\n    <xsd:attribute name=\"r\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_X\">\n    <xsd:attribute name=\"v\" type=\"xsd:int\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RowFields\">\n    <xsd:sequence>\n      <xsd:element name=\"field\" maxOccurs=\"unbounded\" type=\"CT_Field\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColFields\">\n    <xsd:sequence>\n      <xsd:element name=\"field\" maxOccurs=\"unbounded\" type=\"CT_Field\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Field\">\n    <xsd:attribute name=\"x\" type=\"xsd:int\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Formats\">\n    <xsd:sequence>\n      <xsd:element name=\"format\" maxOccurs=\"unbounded\" type=\"CT_Format\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Format\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" type=\"CT_PivotArea\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"action\" type=\"ST_FormatAction\" default=\"formatting\"/>\n    <xsd:attribute name=\"dxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConditionalFormats\">\n    <xsd:sequence>\n      <xsd:element name=\"conditionalFormat\" maxOccurs=\"unbounded\" type=\"CT_ConditionalFormat\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ConditionalFormat\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotAreas\" type=\"CT_PivotAreas\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"scope\" type=\"ST_Scope\" default=\"selection\"/>\n    <xsd:attribute name=\"type\" type=\"ST_Type\" default=\"none\"/>\n    <xsd:attribute name=\"priority\" use=\"required\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotAreas\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_PivotArea\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Scope\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"selection\"/>\n      <xsd:enumeration value=\"data\"/>\n      <xsd:enumeration value=\"field\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Type\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"all\"/>\n      <xsd:enumeration value=\"row\"/>\n      <xsd:enumeration value=\"column\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ChartFormats\">\n    <xsd:sequence>\n      <xsd:element name=\"chartFormat\" maxOccurs=\"unbounded\" type=\"CT_ChartFormat\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartFormat\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" type=\"CT_PivotArea\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"chart\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"format\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"series\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotHierarchies\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotHierarchy\" maxOccurs=\"unbounded\" type=\"CT_PivotHierarchy\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotHierarchy\">\n    <xsd:sequence>\n      <xsd:element name=\"mps\" minOccurs=\"0\" type=\"CT_MemberProperties\"/>\n      <xsd:element name=\"members\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Members\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"outline\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"multipleItemSelectionAllowed\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"subtotalTop\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"showInFieldList\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToRow\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToCol\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToPage\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"dragToData\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"dragOff\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"includeNewItemsInFilter\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"caption\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RowHierarchiesUsage\">\n    <xsd:sequence>\n      <xsd:element name=\"rowHierarchyUsage\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n        type=\"CT_HierarchyUsage\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColHierarchiesUsage\">\n    <xsd:sequence>\n      <xsd:element name=\"colHierarchyUsage\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n        type=\"CT_HierarchyUsage\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HierarchyUsage\">\n    <xsd:attribute name=\"hierarchyUsage\" type=\"xsd:int\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MemberProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"mp\" maxOccurs=\"unbounded\" type=\"CT_MemberProperty\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MemberProperty\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"showCell\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showTip\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showAsCaption\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"nameLen\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"pPos\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"pLen\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"level\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"field\" use=\"required\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Members\">\n    <xsd:sequence>\n      <xsd:element name=\"member\" maxOccurs=\"unbounded\" type=\"CT_Member\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"level\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Member\">\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Dimensions\">\n    <xsd:sequence>\n      <xsd:element name=\"dimension\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_PivotDimension\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotDimension\">\n    <xsd:attribute name=\"measure\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"uniqueName\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"required\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MeasureGroups\">\n    <xsd:sequence>\n      <xsd:element name=\"measureGroup\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_MeasureGroup\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MeasureDimensionMaps\">\n    <xsd:sequence>\n      <xsd:element name=\"map\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_MeasureDimensionMap\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MeasureGroup\">\n    <xsd:attribute name=\"name\" use=\"required\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"caption\" use=\"required\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MeasureDimensionMap\">\n    <xsd:attribute name=\"measureGroup\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"dimension\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotTableStyle\">\n    <xsd:attribute name=\"name\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"showRowHeaders\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"showColHeaders\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"showRowStripes\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"showColStripes\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"showLastColumn\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotFilters\">\n    <xsd:sequence>\n      <xsd:element name=\"filter\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_PivotFilter\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotFilter\">\n    <xsd:sequence>\n      <xsd:element name=\"autoFilter\" minOccurs=\"1\" maxOccurs=\"1\" type=\"CT_AutoFilter\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"fld\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"mpFld\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"type\" use=\"required\" type=\"ST_PivotFilterType\"/>\n    <xsd:attribute name=\"evalOrder\" use=\"optional\" type=\"xsd:int\" default=\"0\"/>\n    <xsd:attribute name=\"id\" use=\"required\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"iMeasureHier\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"iMeasureFld\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"description\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"stringValue1\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"stringValue2\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ShowDataAs\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"difference\"/>\n      <xsd:enumeration value=\"percent\"/>\n      <xsd:enumeration value=\"percentDiff\"/>\n      <xsd:enumeration value=\"runTotal\"/>\n      <xsd:enumeration value=\"percentOfRow\"/>\n      <xsd:enumeration value=\"percentOfCol\"/>\n      <xsd:enumeration value=\"percentOfTotal\"/>\n      <xsd:enumeration value=\"index\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ItemType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"data\"/>\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"sum\"/>\n      <xsd:enumeration value=\"countA\"/>\n      <xsd:enumeration value=\"avg\"/>\n      <xsd:enumeration value=\"max\"/>\n      <xsd:enumeration value=\"min\"/>\n      <xsd:enumeration value=\"product\"/>\n      <xsd:enumeration value=\"count\"/>\n      <xsd:enumeration value=\"stdDev\"/>\n      <xsd:enumeration value=\"stdDevP\"/>\n      <xsd:enumeration value=\"var\"/>\n      <xsd:enumeration value=\"varP\"/>\n      <xsd:enumeration value=\"grand\"/>\n      <xsd:enumeration value=\"blank\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FormatAction\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"blank\"/>\n      <xsd:enumeration value=\"formatting\"/>\n      <xsd:enumeration value=\"drill\"/>\n      <xsd:enumeration value=\"formula\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FieldSortType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"manual\"/>\n      <xsd:enumeration value=\"ascending\"/>\n      <xsd:enumeration value=\"descending\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PivotFilterType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"unknown\"/>\n      <xsd:enumeration value=\"count\"/>\n      <xsd:enumeration value=\"percent\"/>\n      <xsd:enumeration value=\"sum\"/>\n      <xsd:enumeration value=\"captionEqual\"/>\n      <xsd:enumeration value=\"captionNotEqual\"/>\n      <xsd:enumeration value=\"captionBeginsWith\"/>\n      <xsd:enumeration value=\"captionNotBeginsWith\"/>\n      <xsd:enumeration value=\"captionEndsWith\"/>\n      <xsd:enumeration value=\"captionNotEndsWith\"/>\n      <xsd:enumeration value=\"captionContains\"/>\n      <xsd:enumeration value=\"captionNotContains\"/>\n      <xsd:enumeration value=\"captionGreaterThan\"/>\n      <xsd:enumeration value=\"captionGreaterThanOrEqual\"/>\n      <xsd:enumeration value=\"captionLessThan\"/>\n      <xsd:enumeration value=\"captionLessThanOrEqual\"/>\n      <xsd:enumeration value=\"captionBetween\"/>\n      <xsd:enumeration value=\"captionNotBetween\"/>\n      <xsd:enumeration value=\"valueEqual\"/>\n      <xsd:enumeration value=\"valueNotEqual\"/>\n      <xsd:enumeration value=\"valueGreaterThan\"/>\n      <xsd:enumeration value=\"valueGreaterThanOrEqual\"/>\n      <xsd:enumeration value=\"valueLessThan\"/>\n      <xsd:enumeration value=\"valueLessThanOrEqual\"/>\n      <xsd:enumeration value=\"valueBetween\"/>\n      <xsd:enumeration value=\"valueNotBetween\"/>\n      <xsd:enumeration value=\"dateEqual\"/>\n      <xsd:enumeration value=\"dateNotEqual\"/>\n      <xsd:enumeration value=\"dateOlderThan\"/>\n      <xsd:enumeration value=\"dateOlderThanOrEqual\"/>\n      <xsd:enumeration value=\"dateNewerThan\"/>\n      <xsd:enumeration value=\"dateNewerThanOrEqual\"/>\n      <xsd:enumeration value=\"dateBetween\"/>\n      <xsd:enumeration value=\"dateNotBetween\"/>\n      <xsd:enumeration value=\"tomorrow\"/>\n      <xsd:enumeration value=\"today\"/>\n      <xsd:enumeration value=\"yesterday\"/>\n      <xsd:enumeration value=\"nextWeek\"/>\n      <xsd:enumeration value=\"thisWeek\"/>\n      <xsd:enumeration value=\"lastWeek\"/>\n      <xsd:enumeration value=\"nextMonth\"/>\n      <xsd:enumeration value=\"thisMonth\"/>\n      <xsd:enumeration value=\"lastMonth\"/>\n      <xsd:enumeration value=\"nextQuarter\"/>\n      <xsd:enumeration value=\"thisQuarter\"/>\n      <xsd:enumeration value=\"lastQuarter\"/>\n      <xsd:enumeration value=\"nextYear\"/>\n      <xsd:enumeration value=\"thisYear\"/>\n      <xsd:enumeration value=\"lastYear\"/>\n      <xsd:enumeration value=\"yearToDate\"/>\n      <xsd:enumeration value=\"Q1\"/>\n      <xsd:enumeration value=\"Q2\"/>\n      <xsd:enumeration value=\"Q3\"/>\n      <xsd:enumeration value=\"Q4\"/>\n      <xsd:enumeration value=\"M1\"/>\n      <xsd:enumeration value=\"M2\"/>\n      <xsd:enumeration value=\"M3\"/>\n      <xsd:enumeration value=\"M4\"/>\n      <xsd:enumeration value=\"M5\"/>\n      <xsd:enumeration value=\"M6\"/>\n      <xsd:enumeration value=\"M7\"/>\n      <xsd:enumeration value=\"M8\"/>\n      <xsd:enumeration value=\"M9\"/>\n      <xsd:enumeration value=\"M10\"/>\n      <xsd:enumeration value=\"M11\"/>\n      <xsd:enumeration value=\"M12\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PivotArea\">\n    <xsd:sequence>\n      <xsd:element name=\"references\" minOccurs=\"0\" type=\"CT_PivotAreaReferences\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"field\" use=\"optional\" type=\"xsd:int\"/>\n    <xsd:attribute name=\"type\" type=\"ST_PivotAreaType\" default=\"normal\"/>\n    <xsd:attribute name=\"dataOnly\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"labelOnly\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"grandRow\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"grandCol\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"cacheIndex\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"outline\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"offset\" type=\"ST_Ref\"/>\n    <xsd:attribute name=\"collapsedLevelsAreSubtotals\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"axis\" type=\"ST_Axis\" use=\"optional\"/>\n    <xsd:attribute name=\"fieldPosition\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PivotAreaType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"data\"/>\n      <xsd:enumeration value=\"all\"/>\n      <xsd:enumeration value=\"origin\"/>\n      <xsd:enumeration value=\"button\"/>\n      <xsd:enumeration value=\"topEnd\"/>\n      <xsd:enumeration value=\"topRight\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PivotAreaReferences\">\n    <xsd:sequence>\n      <xsd:element name=\"reference\" maxOccurs=\"unbounded\" type=\"CT_PivotAreaReference\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotAreaReference\">\n    <xsd:sequence>\n      <xsd:element name=\"x\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Index\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"field\" use=\"optional\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"selected\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"byPosition\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"relative\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"defaultSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"sumSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"countASubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"avgSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"maxSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"minSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"productSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"countSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"stdDevSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"stdDevPSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"varSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"varPSubtotal\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Index\">\n    <xsd:attribute name=\"v\" use=\"required\" type=\"xsd:unsignedInt\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Axis\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"axisRow\"/>\n      <xsd:enumeration value=\"axisCol\"/>\n      <xsd:enumeration value=\"axisPage\"/>\n      <xsd:enumeration value=\"axisValues\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"queryTable\" type=\"CT_QueryTable\"/>\n  <xsd:complexType name=\"CT_QueryTable\">\n    <xsd:sequence>\n      <xsd:element name=\"queryTableRefresh\" type=\"CT_QueryTableRefresh\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"headers\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"rowNumbers\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"disableRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"backgroundRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"firstBackgroundRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"refreshOnLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"growShrinkType\" type=\"ST_GrowShrinkType\" use=\"optional\"\n      default=\"insertDelete\"/>\n    <xsd:attribute name=\"fillFormulas\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"removeDataOnSave\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"disableEdit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"preserveFormatting\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"adjustColumnWidth\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"intermediate\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"connectionId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attributeGroup ref=\"AG_AutoFormat\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_QueryTableRefresh\">\n    <xsd:sequence>\n      <xsd:element name=\"queryTableFields\" type=\"CT_QueryTableFields\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"queryTableDeletedFields\" type=\"CT_QueryTableDeletedFields\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"sortState\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_SortState\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"preserveSortFilterLayout\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fieldIdWrapped\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"headersInLastRefresh\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"minimumVersion\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"nextId\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"unboundColumnsLeft\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"unboundColumnsRight\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_QueryTableDeletedFields\">\n    <xsd:sequence>\n      <xsd:element name=\"deletedField\" type=\"CT_DeletedField\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DeletedField\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_QueryTableFields\">\n    <xsd:sequence>\n      <xsd:element name=\"queryTableField\" type=\"CT_QueryTableField\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_QueryTableField\">\n    <xsd:sequence minOccurs=\"0\">\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"dataBound\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"rowNumbers\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"fillFormulas\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"clipped\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"tableColumnId\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_GrowShrinkType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"insertDelete\"/>\n      <xsd:enumeration value=\"insertClear\"/>\n      <xsd:enumeration value=\"overwriteClear\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"sst\" type=\"CT_Sst\"/>\n  <xsd:complexType name=\"CT_Sst\">\n    <xsd:sequence>\n      <xsd:element name=\"si\" type=\"CT_Rst\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"uniqueCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PhoneticType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"halfwidthKatakana\"/>\n      <xsd:enumeration value=\"fullwidthKatakana\"/>\n      <xsd:enumeration value=\"Hiragana\"/>\n      <xsd:enumeration value=\"noConversion\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PhoneticAlignment\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"noControl\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"distributed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PhoneticRun\">\n    <xsd:sequence>\n      <xsd:element name=\"t\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"sb\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"eb\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RElt\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_RPrElt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"t\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RPrElt\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"rFont\" type=\"CT_FontName\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"charset\" type=\"CT_IntProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"family\" type=\"CT_IntProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"b\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"i\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"strike\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outline\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shadow\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"condense\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extend\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sz\" type=\"CT_FontSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"u\" type=\"CT_UnderlineProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"vertAlign\" type=\"CT_VerticalAlignFontProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scheme\" type=\"CT_FontScheme\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rst\">\n    <xsd:sequence>\n      <xsd:element name=\"t\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"r\" type=\"CT_RElt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rPh\" type=\"CT_PhoneticRun\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"phoneticPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_PhoneticPr\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PhoneticPr\">\n    <xsd:attribute name=\"fontId\" type=\"ST_FontId\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"ST_PhoneticType\" use=\"optional\" default=\"fullwidthKatakana\"/>\n    <xsd:attribute name=\"alignment\" type=\"ST_PhoneticAlignment\" use=\"optional\" default=\"left\"/>\n  </xsd:complexType>\n  <xsd:element name=\"headers\" type=\"CT_RevisionHeaders\"/>\n  <xsd:element name=\"revisions\" type=\"CT_Revisions\"/>\n  <xsd:complexType name=\"CT_RevisionHeaders\">\n    <xsd:sequence>\n      <xsd:element name=\"header\" type=\"CT_RevisionHeader\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"lastGuid\" type=\"s:ST_Guid\" use=\"optional\"/>\n    <xsd:attribute name=\"shared\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"diskRevisions\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"history\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"trackRevisions\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"exclusive\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"revisionId\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"version\" type=\"xsd:int\" default=\"1\"/>\n    <xsd:attribute name=\"keepChangeHistory\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"protected\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"preserveHistory\" type=\"xsd:unsignedInt\" default=\"30\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Revisions\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"rrc\" type=\"CT_RevisionRowColumn\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rm\" type=\"CT_RevisionMove\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcv\" type=\"CT_RevisionCustomView\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rsnm\" type=\"CT_RevisionSheetRename\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"ris\" type=\"CT_RevisionInsertSheet\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcc\" type=\"CT_RevisionCellChange\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rfmt\" type=\"CT_RevisionFormatting\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"raf\" type=\"CT_RevisionAutoFormatting\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rdn\" type=\"CT_RevisionDefinedName\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcmt\" type=\"CT_RevisionComment\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rqt\" type=\"CT_RevisionQueryTableField\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcft\" type=\"CT_RevisionConflict\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:attributeGroup name=\"AG_RevData\">\n    <xsd:attribute name=\"rId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"ua\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ra\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_RevisionHeader\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetIdMap\" minOccurs=\"1\" maxOccurs=\"1\" type=\"CT_SheetIdMap\"/>\n      <xsd:element name=\"reviewedList\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ReviewedRevisions\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"dateTime\" type=\"xsd:dateTime\" use=\"required\"/>\n    <xsd:attribute name=\"maxSheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"userName\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"minRId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"maxRId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetIdMap\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetId\" type=\"CT_SheetId\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetId\">\n    <xsd:attribute name=\"val\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ReviewedRevisions\">\n    <xsd:sequence>\n      <xsd:element name=\"reviewed\" type=\"CT_Reviewed\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Reviewed\">\n    <xsd:attribute name=\"rId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_UndoInfo\">\n    <xsd:attribute name=\"index\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"exp\" type=\"ST_FormulaExpression\" use=\"required\"/>\n    <xsd:attribute name=\"ref3D\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"array\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"v\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"nf\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"cs\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"dr\" type=\"ST_RefA\" use=\"required\"/>\n    <xsd:attribute name=\"dn\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"sId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionRowColumn\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"undo\" type=\"CT_UndoInfo\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcc\" type=\"CT_RevisionCellChange\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rfmt\" type=\"CT_RevisionFormatting\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"eol\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"action\" type=\"ST_rwColActionType\" use=\"required\"/>\n    <xsd:attribute name=\"edge\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionMove\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"undo\" type=\"CT_UndoInfo\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rcc\" type=\"CT_RevisionCellChange\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rfmt\" type=\"CT_RevisionFormatting\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"source\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"destination\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"sourceSheetId\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionCustomView\">\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"action\" type=\"ST_RevisionAction\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionSheetRename\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"oldName\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"newName\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionInsertSheet\">\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"sheetPosition\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionCellChange\">\n    <xsd:sequence>\n      <xsd:element name=\"oc\" type=\"CT_Cell\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"nc\" type=\"CT_Cell\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"odxf\" type=\"CT_Dxf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ndxf\" type=\"CT_Dxf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"odxf\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"xfDxf\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"dxf\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n    <xsd:attribute name=\"quotePrefix\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"oldQuotePrefix\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ph\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"oldPh\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"endOfListFormulaUpdate\" type=\"xsd:boolean\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionFormatting\">\n    <xsd:sequence>\n      <xsd:element name=\"dxf\" type=\"CT_Dxf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"xfDxf\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"required\"/>\n    <xsd:attribute name=\"start\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"length\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionAutoFormatting\">\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attributeGroup ref=\"AG_AutoFormat\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionComment\">\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"cell\" type=\"ST_CellRef\" use=\"required\"/>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"action\" type=\"ST_RevisionAction\" default=\"add\"/>\n    <xsd:attribute name=\"alwaysShow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"old\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"hiddenRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"hiddenColumn\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"author\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"oldLength\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"newLength\" type=\"xsd:unsignedInt\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionDefinedName\">\n    <xsd:sequence>\n      <xsd:element name=\"formula\" type=\"ST_Formula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oldFormula\" type=\"ST_Formula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"localSheetId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"customView\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"function\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"oldFunction\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"functionGroupId\" type=\"xsd:unsignedByte\" use=\"optional\"/>\n    <xsd:attribute name=\"oldFunctionGroupId\" type=\"xsd:unsignedByte\" use=\"optional\"/>\n    <xsd:attribute name=\"shortcutKey\" type=\"xsd:unsignedByte\" use=\"optional\"/>\n    <xsd:attribute name=\"oldShortcutKey\" type=\"xsd:unsignedByte\" use=\"optional\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"oldHidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"customMenu\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oldCustomMenu\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"description\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oldDescription\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"help\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oldHelp\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"statusBar\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oldStatusBar\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"comment\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oldComment\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionConflict\">\n    <xsd:attributeGroup ref=\"AG_RevData\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RevisionQueryTableField\">\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"fieldId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_rwColActionType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"insertRow\"/>\n      <xsd:enumeration value=\"deleteRow\"/>\n      <xsd:enumeration value=\"insertCol\"/>\n      <xsd:enumeration value=\"deleteCol\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RevisionAction\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"add\"/>\n      <xsd:enumeration value=\"delete\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FormulaExpression\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"ref\"/>\n      <xsd:enumeration value=\"refError\"/>\n      <xsd:enumeration value=\"area\"/>\n      <xsd:enumeration value=\"areaError\"/>\n      <xsd:enumeration value=\"computedArea\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"users\" type=\"CT_Users\"/>\n  <xsd:complexType name=\"CT_Users\">\n    <xsd:sequence>\n      <xsd:element name=\"userInfo\" minOccurs=\"0\" maxOccurs=\"256\" type=\"CT_SharedUser\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SharedUser\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"id\" type=\"xsd:int\" use=\"required\"/>\n    <xsd:attribute name=\"dateTime\" type=\"xsd:dateTime\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"worksheet\" type=\"CT_Worksheet\"/>\n  <xsd:element name=\"chartsheet\" type=\"CT_Chartsheet\"/>\n  <xsd:element name=\"dialogsheet\" type=\"CT_Dialogsheet\"/>\n  <xsd:complexType name=\"CT_Macrosheet\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetPr\" type=\"CT_SheetPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dimension\" type=\"CT_SheetDimension\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetViews\" type=\"CT_SheetViews\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetFormatPr\" type=\"CT_SheetFormatPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cols\" type=\"CT_Cols\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"sheetData\" type=\"CT_SheetData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetProtection\" type=\"CT_SheetProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"autoFilter\" type=\"CT_AutoFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sortState\" type=\"CT_SortState\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dataConsolidate\" type=\"CT_DataConsolidate\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customSheetViews\" type=\"CT_CustomSheetViews\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"phoneticPr\" type=\"CT_PhoneticPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"conditionalFormatting\" type=\"CT_ConditionalFormatting\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"printOptions\" type=\"CT_PrintOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageMargins\" type=\"CT_PageMargins\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_PageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headerFooter\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rowBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"colBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customProperties\" type=\"CT_CustomProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawing\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawingHF\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawingHF\" type=\"CT_DrawingHF\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"picture\" type=\"CT_SheetBackgroundPicture\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oleObjects\" type=\"CT_OleObjects\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Dialogsheet\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetPr\" minOccurs=\"0\" type=\"CT_SheetPr\"/>\n      <xsd:element name=\"sheetViews\" minOccurs=\"0\" type=\"CT_SheetViews\"/>\n      <xsd:element name=\"sheetFormatPr\" minOccurs=\"0\" type=\"CT_SheetFormatPr\"/>\n      <xsd:element name=\"sheetProtection\" type=\"CT_SheetProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customSheetViews\" minOccurs=\"0\" type=\"CT_CustomSheetViews\"/>\n      <xsd:element name=\"printOptions\" minOccurs=\"0\" type=\"CT_PrintOptions\"/>\n      <xsd:element name=\"pageMargins\" minOccurs=\"0\" type=\"CT_PageMargins\"/>\n      <xsd:element name=\"pageSetup\" minOccurs=\"0\" type=\"CT_PageSetup\"/>\n      <xsd:element name=\"headerFooter\" minOccurs=\"0\" type=\"CT_HeaderFooter\"/>\n      <xsd:element name=\"drawing\" minOccurs=\"0\" type=\"CT_Drawing\"/>\n      <xsd:element name=\"legacyDrawing\" minOccurs=\"0\" type=\"CT_LegacyDrawing\"/>\n      <xsd:element name=\"legacyDrawingHF\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawingHF\" type=\"CT_DrawingHF\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oleObjects\" type=\"CT_OleObjects\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"controls\" type=\"CT_Controls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Worksheet\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetPr\" type=\"CT_SheetPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dimension\" type=\"CT_SheetDimension\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetViews\" type=\"CT_SheetViews\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetFormatPr\" type=\"CT_SheetFormatPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cols\" type=\"CT_Cols\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"sheetData\" type=\"CT_SheetData\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetCalcPr\" type=\"CT_SheetCalcPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetProtection\" type=\"CT_SheetProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"protectedRanges\" type=\"CT_ProtectedRanges\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scenarios\" type=\"CT_Scenarios\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"autoFilter\" type=\"CT_AutoFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sortState\" type=\"CT_SortState\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dataConsolidate\" type=\"CT_DataConsolidate\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customSheetViews\" type=\"CT_CustomSheetViews\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"mergeCells\" type=\"CT_MergeCells\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"phoneticPr\" type=\"CT_PhoneticPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"conditionalFormatting\" type=\"CT_ConditionalFormatting\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"dataValidations\" type=\"CT_DataValidations\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hyperlinks\" type=\"CT_Hyperlinks\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"printOptions\" type=\"CT_PrintOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageMargins\" type=\"CT_PageMargins\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_PageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headerFooter\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rowBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"colBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customProperties\" type=\"CT_CustomProperties\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cellWatches\" type=\"CT_CellWatches\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ignoredErrors\" type=\"CT_IgnoredErrors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smartTags\" type=\"CT_SmartTags\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawing\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawingHF\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawingHF\" type=\"CT_DrawingHF\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"picture\" type=\"CT_SheetBackgroundPicture\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oleObjects\" type=\"CT_OleObjects\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"controls\" type=\"CT_Controls\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"webPublishItems\" type=\"CT_WebPublishItems\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tableParts\" type=\"CT_TableParts\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetData\">\n    <xsd:sequence>\n      <xsd:element name=\"row\" type=\"CT_Row\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetCalcPr\">\n    <xsd:attribute name=\"fullCalcOnLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetFormatPr\">\n    <xsd:attribute name=\"baseColWidth\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"8\"/>\n    <xsd:attribute name=\"defaultColWidth\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"defaultRowHeight\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"customHeight\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"zeroHeight\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"thickTop\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"thickBottom\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"outlineLevelRow\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"outlineLevelCol\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Cols\">\n    <xsd:sequence>\n      <xsd:element name=\"col\" type=\"CT_Col\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Col\">\n    <xsd:attribute name=\"min\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"max\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"width\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"style\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"bestFit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"customWidth\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"phonetic\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"outlineLevel\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"collapsed\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CellSpan\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CellSpans\">\n    <xsd:list itemType=\"ST_CellSpan\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Row\">\n    <xsd:sequence>\n      <xsd:element name=\"c\" type=\"CT_Cell\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"spans\" type=\"ST_CellSpans\" use=\"optional\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"customFormat\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ht\" type=\"xsd:double\" use=\"optional\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"customHeight\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"outlineLevel\" type=\"xsd:unsignedByte\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"collapsed\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"thickTop\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"thickBot\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ph\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Cell\">\n    <xsd:sequence>\n      <xsd:element name=\"f\" type=\"CT_CellFormula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"is\" type=\"CT_Rst\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"t\" type=\"ST_CellType\" use=\"optional\" default=\"n\"/>\n    <xsd:attribute name=\"cm\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"vm\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ph\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CellType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"n\"/>\n      <xsd:enumeration value=\"e\"/>\n      <xsd:enumeration value=\"s\"/>\n      <xsd:enumeration value=\"str\"/>\n      <xsd:enumeration value=\"inlineStr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CellFormulaType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"array\"/>\n      <xsd:enumeration value=\"dataTable\"/>\n      <xsd:enumeration value=\"shared\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SheetPr\">\n    <xsd:sequence>\n      <xsd:element name=\"tabColor\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outlinePr\" type=\"CT_OutlinePr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetUpPr\" type=\"CT_PageSetUpPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"syncHorizontal\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"syncVertical\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"syncRef\" type=\"ST_Ref\" use=\"optional\"/>\n    <xsd:attribute name=\"transitionEvaluation\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"transitionEntry\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"published\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"codeName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"filterMode\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"enableFormatConditionsCalculation\" type=\"xsd:boolean\" use=\"optional\"\n      default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetDimension\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetViews\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetView\" type=\"CT_SheetView\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetView\">\n    <xsd:sequence>\n      <xsd:element name=\"pane\" type=\"CT_Pane\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"selection\" type=\"CT_Selection\" minOccurs=\"0\" maxOccurs=\"4\"/>\n      <xsd:element name=\"pivotSelection\" type=\"CT_PivotSelection\" minOccurs=\"0\" maxOccurs=\"4\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"windowProtection\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showFormulas\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showGridLines\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showRowColHeaders\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showZeros\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"rightToLeft\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"tabSelected\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showRuler\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showOutlineSymbols\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"defaultGridColor\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showWhiteSpace\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"view\" type=\"ST_SheetViewType\" use=\"optional\" default=\"normal\"/>\n    <xsd:attribute name=\"topLeftCell\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"colorId\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"64\"/>\n    <xsd:attribute name=\"zoomScale\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"100\"/>\n    <xsd:attribute name=\"zoomScaleNormal\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"zoomScaleSheetLayoutView\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"zoomScalePageLayoutView\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"workbookViewId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Pane\">\n    <xsd:attribute name=\"xSplit\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ySplit\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"topLeftCell\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"activePane\" type=\"ST_Pane\" use=\"optional\" default=\"topLeft\"/>\n    <xsd:attribute name=\"state\" type=\"ST_PaneState\" use=\"optional\" default=\"split\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotSelection\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotArea\" type=\"CT_PivotArea\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"pane\" type=\"ST_Pane\" use=\"optional\" default=\"topLeft\"/>\n    <xsd:attribute name=\"showHeader\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"label\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"data\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"extendable\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"axis\" type=\"ST_Axis\" use=\"optional\"/>\n    <xsd:attribute name=\"dimension\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"start\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"min\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"max\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"activeRow\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"activeCol\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"previousRow\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"previousCol\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute name=\"click\" type=\"xsd:unsignedInt\" default=\"0\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Selection\">\n    <xsd:attribute name=\"pane\" type=\"ST_Pane\" use=\"optional\" default=\"topLeft\"/>\n    <xsd:attribute name=\"activeCell\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"activeCellId\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"optional\" default=\"A1\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Pane\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"bottomRight\"/>\n      <xsd:enumeration value=\"topRight\"/>\n      <xsd:enumeration value=\"bottomLeft\"/>\n      <xsd:enumeration value=\"topLeft\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PageBreak\">\n    <xsd:sequence>\n      <xsd:element name=\"brk\" type=\"CT_Break\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"manualBreakCount\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Break\">\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"min\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"max\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"man\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pt\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SheetViewType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"pageBreakPreview\"/>\n      <xsd:enumeration value=\"pageLayout\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OutlinePr\">\n    <xsd:attribute name=\"applyStyles\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"summaryBelow\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"summaryRight\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showOutlineSymbols\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageSetUpPr\">\n    <xsd:attribute name=\"autoPageBreaks\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fitToPage\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataConsolidate\">\n    <xsd:sequence>\n      <xsd:element name=\"dataRefs\" type=\"CT_DataRefs\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"function\" type=\"ST_DataConsolidateFunction\" use=\"optional\" default=\"sum\"/>\n    <xsd:attribute name=\"startLabels\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"leftLabels\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"topLabels\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"link\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DataConsolidateFunction\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"average\"/>\n      <xsd:enumeration value=\"count\"/>\n      <xsd:enumeration value=\"countNums\"/>\n      <xsd:enumeration value=\"max\"/>\n      <xsd:enumeration value=\"min\"/>\n      <xsd:enumeration value=\"product\"/>\n      <xsd:enumeration value=\"stdDev\"/>\n      <xsd:enumeration value=\"stdDevp\"/>\n      <xsd:enumeration value=\"sum\"/>\n      <xsd:enumeration value=\"var\"/>\n      <xsd:enumeration value=\"varp\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DataRefs\">\n    <xsd:sequence>\n      <xsd:element name=\"dataRef\" type=\"CT_DataRef\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataRef\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sheet\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MergeCells\">\n    <xsd:sequence>\n      <xsd:element name=\"mergeCell\" type=\"CT_MergeCell\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MergeCell\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SmartTags\">\n    <xsd:sequence>\n      <xsd:element name=\"cellSmartTags\" type=\"CT_CellSmartTags\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellSmartTags\">\n    <xsd:sequence>\n      <xsd:element name=\"cellSmartTag\" type=\"CT_CellSmartTag\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellSmartTag\">\n    <xsd:sequence>\n      <xsd:element name=\"cellSmartTagPr\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n        type=\"CT_CellSmartTagPr\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"deleted\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"xmlBased\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellSmartTagPr\">\n    <xsd:attribute name=\"key\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Drawing\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LegacyDrawing\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DrawingHF\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"lho\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"lhe\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"lhf\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cho\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"che\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"chf\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rho\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rhe\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rhf\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"lfo\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"lfe\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"lff\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cfo\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cfe\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"cff\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rfo\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rfe\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rff\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomSheetViews\">\n    <xsd:sequence>\n      <xsd:element name=\"customSheetView\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n        type=\"CT_CustomSheetView\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomSheetView\">\n    <xsd:sequence>\n      <xsd:element name=\"pane\" type=\"CT_Pane\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"selection\" type=\"CT_Selection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rowBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"colBreaks\" type=\"CT_PageBreak\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageMargins\" type=\"CT_PageMargins\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"printOptions\" type=\"CT_PrintOptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_PageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headerFooter\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"autoFilter\" type=\"CT_AutoFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"scale\" type=\"xsd:unsignedInt\" default=\"100\"/>\n    <xsd:attribute name=\"colorId\" type=\"xsd:unsignedInt\" default=\"64\"/>\n    <xsd:attribute name=\"showPageBreaks\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showFormulas\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showGridLines\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showRowCol\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"outlineSymbols\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"zeroValues\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"fitToPage\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"printArea\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"filter\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showAutoFilter\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"hiddenRows\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"hiddenColumns\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"state\" type=\"ST_SheetState\" default=\"visible\"/>\n    <xsd:attribute name=\"filterUnique\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"view\" type=\"ST_SheetViewType\" default=\"normal\"/>\n    <xsd:attribute name=\"showRuler\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"topLeftCell\" type=\"ST_CellRef\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataValidations\">\n    <xsd:sequence>\n      <xsd:element name=\"dataValidation\" type=\"CT_DataValidation\" minOccurs=\"1\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"disablePrompts\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"xWindow\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"yWindow\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataValidation\">\n    <xsd:sequence>\n      <xsd:element name=\"formula1\" type=\"ST_Formula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"formula2\" type=\"ST_Formula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_DataValidationType\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"errorStyle\" type=\"ST_DataValidationErrorStyle\" use=\"optional\"\n      default=\"stop\"/>\n    <xsd:attribute name=\"imeMode\" type=\"ST_DataValidationImeMode\" use=\"optional\" default=\"noControl\"/>\n    <xsd:attribute name=\"operator\" type=\"ST_DataValidationOperator\" use=\"optional\" default=\"between\"/>\n    <xsd:attribute name=\"allowBlank\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showDropDown\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showInputMessage\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showErrorMessage\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"errorTitle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"error\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"promptTitle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"prompt\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DataValidationType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"whole\"/>\n      <xsd:enumeration value=\"decimal\"/>\n      <xsd:enumeration value=\"list\"/>\n      <xsd:enumeration value=\"date\"/>\n      <xsd:enumeration value=\"time\"/>\n      <xsd:enumeration value=\"textLength\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DataValidationOperator\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"between\"/>\n      <xsd:enumeration value=\"notBetween\"/>\n      <xsd:enumeration value=\"equal\"/>\n      <xsd:enumeration value=\"notEqual\"/>\n      <xsd:enumeration value=\"lessThan\"/>\n      <xsd:enumeration value=\"lessThanOrEqual\"/>\n      <xsd:enumeration value=\"greaterThan\"/>\n      <xsd:enumeration value=\"greaterThanOrEqual\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DataValidationErrorStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"stop\"/>\n      <xsd:enumeration value=\"warning\"/>\n      <xsd:enumeration value=\"information\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DataValidationImeMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"noControl\"/>\n      <xsd:enumeration value=\"off\"/>\n      <xsd:enumeration value=\"on\"/>\n      <xsd:enumeration value=\"disabled\"/>\n      <xsd:enumeration value=\"hiragana\"/>\n      <xsd:enumeration value=\"fullKatakana\"/>\n      <xsd:enumeration value=\"halfKatakana\"/>\n      <xsd:enumeration value=\"fullAlpha\"/>\n      <xsd:enumeration value=\"halfAlpha\"/>\n      <xsd:enumeration value=\"fullHangul\"/>\n      <xsd:enumeration value=\"halfHangul\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CfType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"expression\"/>\n      <xsd:enumeration value=\"cellIs\"/>\n      <xsd:enumeration value=\"colorScale\"/>\n      <xsd:enumeration value=\"dataBar\"/>\n      <xsd:enumeration value=\"iconSet\"/>\n      <xsd:enumeration value=\"top10\"/>\n      <xsd:enumeration value=\"uniqueValues\"/>\n      <xsd:enumeration value=\"duplicateValues\"/>\n      <xsd:enumeration value=\"containsText\"/>\n      <xsd:enumeration value=\"notContainsText\"/>\n      <xsd:enumeration value=\"beginsWith\"/>\n      <xsd:enumeration value=\"endsWith\"/>\n      <xsd:enumeration value=\"containsBlanks\"/>\n      <xsd:enumeration value=\"notContainsBlanks\"/>\n      <xsd:enumeration value=\"containsErrors\"/>\n      <xsd:enumeration value=\"notContainsErrors\"/>\n      <xsd:enumeration value=\"timePeriod\"/>\n      <xsd:enumeration value=\"aboveAverage\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TimePeriod\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"today\"/>\n      <xsd:enumeration value=\"yesterday\"/>\n      <xsd:enumeration value=\"tomorrow\"/>\n      <xsd:enumeration value=\"last7Days\"/>\n      <xsd:enumeration value=\"thisMonth\"/>\n      <xsd:enumeration value=\"lastMonth\"/>\n      <xsd:enumeration value=\"nextMonth\"/>\n      <xsd:enumeration value=\"thisWeek\"/>\n      <xsd:enumeration value=\"lastWeek\"/>\n      <xsd:enumeration value=\"nextWeek\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConditionalFormattingOperator\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"lessThan\"/>\n      <xsd:enumeration value=\"lessThanOrEqual\"/>\n      <xsd:enumeration value=\"equal\"/>\n      <xsd:enumeration value=\"notEqual\"/>\n      <xsd:enumeration value=\"greaterThanOrEqual\"/>\n      <xsd:enumeration value=\"greaterThan\"/>\n      <xsd:enumeration value=\"between\"/>\n      <xsd:enumeration value=\"notBetween\"/>\n      <xsd:enumeration value=\"containsText\"/>\n      <xsd:enumeration value=\"notContains\"/>\n      <xsd:enumeration value=\"beginsWith\"/>\n      <xsd:enumeration value=\"endsWith\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CfvoType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"num\"/>\n      <xsd:enumeration value=\"percent\"/>\n      <xsd:enumeration value=\"max\"/>\n      <xsd:enumeration value=\"min\"/>\n      <xsd:enumeration value=\"formula\"/>\n      <xsd:enumeration value=\"percentile\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ConditionalFormatting\">\n    <xsd:sequence>\n      <xsd:element name=\"cfRule\" type=\"CT_CfRule\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"pivot\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CfRule\">\n    <xsd:sequence>\n      <xsd:element name=\"formula\" type=\"ST_Formula\" minOccurs=\"0\" maxOccurs=\"3\"/>\n      <xsd:element name=\"colorScale\" type=\"CT_ColorScale\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dataBar\" type=\"CT_DataBar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"iconSet\" type=\"CT_IconSet\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_CfType\"/>\n    <xsd:attribute name=\"dxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"priority\" type=\"xsd:int\" use=\"required\"/>\n    <xsd:attribute name=\"stopIfTrue\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"aboveAverage\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"percent\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"bottom\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"operator\" type=\"ST_ConditionalFormattingOperator\" use=\"optional\"/>\n    <xsd:attribute name=\"text\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"timePeriod\" type=\"ST_TimePeriod\" use=\"optional\"/>\n    <xsd:attribute name=\"rank\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"stdDev\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"equalAverage\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Hyperlinks\">\n    <xsd:sequence>\n      <xsd:element name=\"hyperlink\" type=\"CT_Hyperlink\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Hyperlink\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"location\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"tooltip\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"display\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellFormula\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"ST_Formula\">\n        <xsd:attribute name=\"t\" type=\"ST_CellFormulaType\" use=\"optional\" default=\"normal\"/>\n        <xsd:attribute name=\"aca\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"optional\"/>\n        <xsd:attribute name=\"dt2D\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"dtr\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"del1\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"del2\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"r1\" type=\"ST_CellRef\" use=\"optional\"/>\n        <xsd:attribute name=\"r2\" type=\"ST_CellRef\" use=\"optional\"/>\n        <xsd:attribute name=\"ca\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"si\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n        <xsd:attribute name=\"bx\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorScale\">\n    <xsd:sequence>\n      <xsd:element name=\"cfvo\" type=\"CT_Cfvo\" minOccurs=\"2\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"2\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataBar\">\n    <xsd:sequence>\n      <xsd:element name=\"cfvo\" type=\"CT_Cfvo\" minOccurs=\"2\" maxOccurs=\"2\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"minLength\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"10\"/>\n    <xsd:attribute name=\"maxLength\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"90\"/>\n    <xsd:attribute name=\"showValue\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IconSet\">\n    <xsd:sequence>\n      <xsd:element name=\"cfvo\" type=\"CT_Cfvo\" minOccurs=\"2\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"iconSet\" type=\"ST_IconSetType\" use=\"optional\" default=\"3TrafficLights1\"/>\n    <xsd:attribute name=\"showValue\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"percent\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"reverse\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Cfvo\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_CfvoType\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"gte\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageMargins\">\n    <xsd:attribute name=\"left\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"right\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"top\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"bottom\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"header\" type=\"xsd:double\" use=\"required\"/>\n    <xsd:attribute name=\"footer\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PrintOptions\">\n    <xsd:attribute name=\"horizontalCentered\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"verticalCentered\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"headings\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"gridLines\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"gridLinesSet\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageSetup\">\n    <xsd:attribute name=\"paperSize\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"paperHeight\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"paperWidth\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"scale\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"100\"/>\n    <xsd:attribute name=\"firstPageNumber\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"fitToWidth\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"fitToHeight\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"pageOrder\" type=\"ST_PageOrder\" use=\"optional\" default=\"downThenOver\"/>\n    <xsd:attribute name=\"orientation\" type=\"ST_Orientation\" use=\"optional\" default=\"default\"/>\n    <xsd:attribute name=\"usePrinterDefaults\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"blackAndWhite\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"draft\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"cellComments\" type=\"ST_CellComments\" use=\"optional\" default=\"none\"/>\n    <xsd:attribute name=\"useFirstPageNumber\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"errors\" type=\"ST_PrintError\" use=\"optional\" default=\"displayed\"/>\n    <xsd:attribute name=\"horizontalDpi\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"verticalDpi\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"copies\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PageOrder\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"downThenOver\"/>\n      <xsd:enumeration value=\"overThenDown\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Orientation\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"portrait\"/>\n      <xsd:enumeration value=\"landscape\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CellComments\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"asDisplayed\"/>\n      <xsd:enumeration value=\"atEnd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_HeaderFooter\">\n    <xsd:sequence>\n      <xsd:element name=\"oddHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oddFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"evenHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"evenFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstHeader\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"firstFooter\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"differentOddEven\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"differentFirst\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"scaleWithDoc\" type=\"xsd:boolean\" default=\"true\"/>\n    <xsd:attribute name=\"alignWithMargins\" type=\"xsd:boolean\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PrintError\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"displayed\"/>\n      <xsd:enumeration value=\"blank\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"NA\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Scenarios\">\n    <xsd:sequence>\n      <xsd:element name=\"scenario\" type=\"CT_Scenario\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"current\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"show\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetProtection\">\n    <xsd:attribute name=\"password\" type=\"ST_UnsignedShortHex\" use=\"optional\"/>\n    <xsd:attribute name=\"algorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"sheet\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"objects\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"scenarios\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"formatCells\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"formatColumns\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"formatRows\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"insertColumns\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"insertRows\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"insertHyperlinks\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"deleteColumns\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"deleteRows\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"selectLockedCells\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"sort\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoFilter\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"pivotTables\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"selectUnlockedCells\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ProtectedRanges\">\n    <xsd:sequence>\n      <xsd:element name=\"protectedRange\" type=\"CT_ProtectedRange\" minOccurs=\"1\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ProtectedRange\">\n    <xsd:sequence>\n      <xsd:element name=\"securityDescriptor\" type=\"xsd:string\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"password\" type=\"ST_UnsignedShortHex\" use=\"optional\"/>\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"securityDescriptor\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"algorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Scenario\">\n    <xsd:sequence>\n      <xsd:element name=\"inputCells\" type=\"CT_InputCells\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"user\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"comment\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_InputCells\">\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"required\"/>\n    <xsd:attribute name=\"deleted\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"undone\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellWatches\">\n    <xsd:sequence>\n      <xsd:element name=\"cellWatch\" type=\"CT_CellWatch\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellWatch\">\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Chartsheet\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetPr\" type=\"CT_ChartsheetPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetViews\" type=\"CT_ChartsheetViews\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetProtection\" type=\"CT_ChartsheetProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customSheetViews\" type=\"CT_CustomChartsheetViews\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"pageMargins\" minOccurs=\"0\" type=\"CT_PageMargins\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_CsPageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headerFooter\" minOccurs=\"0\" type=\"CT_HeaderFooter\"/>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawing\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"legacyDrawingHF\" type=\"CT_LegacyDrawing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"drawingHF\" type=\"CT_DrawingHF\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"picture\" type=\"CT_SheetBackgroundPicture\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"webPublishItems\" type=\"CT_WebPublishItems\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartsheetPr\">\n    <xsd:sequence>\n      <xsd:element name=\"tabColor\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"published\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"codeName\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartsheetViews\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetView\" type=\"CT_ChartsheetView\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartsheetView\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"tabSelected\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"zoomScale\" type=\"xsd:unsignedInt\" default=\"100\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookViewId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"zoomToFit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ChartsheetProtection\">\n    <xsd:attribute name=\"password\" type=\"ST_UnsignedShortHex\" use=\"optional\"/>\n    <xsd:attribute name=\"algorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"content\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"objects\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CsPageSetup\">\n    <xsd:attribute name=\"paperSize\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"paperHeight\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"paperWidth\" type=\"s:ST_PositiveUniversalMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"firstPageNumber\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"orientation\" type=\"ST_Orientation\" use=\"optional\" default=\"default\"/>\n    <xsd:attribute name=\"usePrinterDefaults\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"blackAndWhite\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"draft\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"useFirstPageNumber\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"horizontalDpi\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"verticalDpi\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"copies\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomChartsheetViews\">\n    <xsd:sequence>\n      <xsd:element name=\"customSheetView\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n        type=\"CT_CustomChartsheetView\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomChartsheetView\">\n    <xsd:sequence>\n      <xsd:element name=\"pageMargins\" type=\"CT_PageMargins\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pageSetup\" type=\"CT_CsPageSetup\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"headerFooter\" type=\"CT_HeaderFooter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"scale\" type=\"xsd:unsignedInt\" default=\"100\"/>\n    <xsd:attribute name=\"state\" type=\"ST_SheetState\" default=\"visible\"/>\n    <xsd:attribute name=\"zoomToFit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomProperties\">\n    <xsd:sequence>\n      <xsd:element name=\"customPr\" type=\"CT_CustomProperty\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomProperty\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleObjects\">\n    <xsd:sequence>\n      <xsd:element name=\"oleObject\" type=\"CT_OleObject\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleObject\">\n    <xsd:sequence>\n      <xsd:element name=\"objectPr\" type=\"CT_ObjectPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"progId\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"dvAspect\" type=\"ST_DvAspect\" use=\"optional\" default=\"DVASPECT_CONTENT\"/>\n    <xsd:attribute name=\"link\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"oleUpdate\" type=\"ST_OleUpdate\" use=\"optional\"/>\n    <xsd:attribute name=\"autoLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"shapeId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ObjectPr\">\n    <xsd:sequence>\n      <xsd:element name=\"anchor\" type=\"CT_ObjectAnchor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"defaultSize\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"print\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"disabled\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"uiObject\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoFill\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoLine\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoPict\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"macro\" type=\"ST_Formula\" use=\"optional\"/>\n    <xsd:attribute name=\"altText\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"dde\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DvAspect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"DVASPECT_CONTENT\"/>\n      <xsd:enumeration value=\"DVASPECT_ICON\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OleUpdate\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"OLEUPDATE_ALWAYS\"/>\n      <xsd:enumeration value=\"OLEUPDATE_ONCALL\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WebPublishItems\">\n    <xsd:sequence>\n      <xsd:element name=\"webPublishItem\" type=\"CT_WebPublishItem\" minOccurs=\"1\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WebPublishItem\">\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"divId\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"sourceType\" type=\"ST_WebSourceType\" use=\"required\"/>\n    <xsd:attribute name=\"sourceRef\" type=\"ST_Ref\" use=\"optional\"/>\n    <xsd:attribute name=\"sourceObject\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"destinationFile\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"title\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"autoRepublish\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Controls\">\n    <xsd:sequence>\n      <xsd:element name=\"control\" type=\"CT_Control\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Control\">\n    <xsd:sequence>\n      <xsd:element name=\"controlPr\" type=\"CT_ControlPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"shapeId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ControlPr\">\n    <xsd:sequence>\n      <xsd:element name=\"anchor\" type=\"CT_ObjectAnchor\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"defaultSize\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"print\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"disabled\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"recalcAlways\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"uiObject\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoFill\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoLine\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"autoPict\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"macro\" type=\"ST_Formula\" use=\"optional\"/>\n    <xsd:attribute name=\"altText\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"linkedCell\" type=\"ST_Formula\" use=\"optional\"/>\n    <xsd:attribute name=\"listFillRange\" type=\"ST_Formula\" use=\"optional\"/>\n    <xsd:attribute name=\"cf\" type=\"s:ST_Xstring\" use=\"optional\" default=\"pict\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_WebSourceType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"sheet\"/>\n      <xsd:enumeration value=\"printArea\"/>\n      <xsd:enumeration value=\"autoFilter\"/>\n      <xsd:enumeration value=\"range\"/>\n      <xsd:enumeration value=\"chart\"/>\n      <xsd:enumeration value=\"pivotTable\"/>\n      <xsd:enumeration value=\"query\"/>\n      <xsd:enumeration value=\"label\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_IgnoredErrors\">\n    <xsd:sequence>\n      <xsd:element name=\"ignoredError\" type=\"CT_IgnoredError\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IgnoredError\">\n    <xsd:attribute name=\"sqref\" type=\"ST_Sqref\" use=\"required\"/>\n    <xsd:attribute name=\"evalError\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"twoDigitTextYear\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"numberStoredAsText\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"formula\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"formulaRange\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"unlockedFormula\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"emptyCellReference\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"listDataValidation\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"calculatedColumn\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PaneState\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"split\"/>\n      <xsd:enumeration value=\"frozen\"/>\n      <xsd:enumeration value=\"frozenSplit\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TableParts\">\n    <xsd:sequence>\n      <xsd:element name=\"tablePart\" type=\"CT_TablePart\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TablePart\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"metadata\" type=\"CT_Metadata\"/>\n  <xsd:complexType name=\"CT_Metadata\">\n    <xsd:sequence>\n      <xsd:element name=\"metadataTypes\" type=\"CT_MetadataTypes\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"metadataStrings\" type=\"CT_MetadataStrings\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"mdxMetadata\" type=\"CT_MdxMetadata\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"futureMetadata\" type=\"CT_FutureMetadata\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"cellMetadata\" type=\"CT_MetadataBlocks\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"valueMetadata\" type=\"CT_MetadataBlocks\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataTypes\">\n    <xsd:sequence>\n      <xsd:element name=\"metadataType\" type=\"CT_MetadataType\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataType\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"minSupportedVersion\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"ghostRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"ghostCol\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"edit\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"delete\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"copy\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteAll\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteFormulas\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteValues\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteFormats\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteComments\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteDataValidation\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteBorders\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteColWidths\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pasteNumberFormats\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"merge\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"splitFirst\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"splitAll\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"rowColShift\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"clearAll\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"clearFormats\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"clearContents\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"clearComments\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"assign\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"coerce\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"adjust\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"cellMeta\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataBlocks\">\n    <xsd:sequence>\n      <xsd:element name=\"bk\" type=\"CT_MetadataBlock\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataBlock\">\n    <xsd:sequence>\n      <xsd:element name=\"rc\" type=\"CT_MetadataRecord\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataRecord\">\n    <xsd:attribute name=\"t\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"v\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FutureMetadata\">\n    <xsd:sequence>\n      <xsd:element name=\"bk\" type=\"CT_FutureMetadataBlock\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FutureMetadataBlock\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MdxMetadata\">\n    <xsd:sequence>\n      <xsd:element name=\"mdx\" type=\"CT_Mdx\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Mdx\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"t\" type=\"CT_MdxTuple\"/>\n      <xsd:element name=\"ms\" type=\"CT_MdxSet\"/>\n      <xsd:element name=\"p\" type=\"CT_MdxMemeberProp\"/>\n      <xsd:element name=\"k\" type=\"CT_MdxKPI\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"n\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"f\" type=\"ST_MdxFunctionType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MdxFunctionType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"m\"/>\n      <xsd:enumeration value=\"v\"/>\n      <xsd:enumeration value=\"s\"/>\n      <xsd:enumeration value=\"c\"/>\n      <xsd:enumeration value=\"r\"/>\n      <xsd:enumeration value=\"p\"/>\n      <xsd:enumeration value=\"k\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MdxTuple\">\n    <xsd:sequence>\n      <xsd:element name=\"n\" type=\"CT_MetadataStringIndex\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"c\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"ct\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"si\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"fi\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"bc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"fc\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"i\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"u\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"st\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"b\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MdxSet\">\n    <xsd:sequence>\n      <xsd:element name=\"n\" type=\"CT_MetadataStringIndex\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ns\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"c\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"o\" type=\"ST_MdxSetOrder\" use=\"optional\" default=\"u\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MdxSetOrder\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"u\"/>\n      <xsd:enumeration value=\"a\"/>\n      <xsd:enumeration value=\"d\"/>\n      <xsd:enumeration value=\"aa\"/>\n      <xsd:enumeration value=\"ad\"/>\n      <xsd:enumeration value=\"na\"/>\n      <xsd:enumeration value=\"nd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MdxMemeberProp\">\n    <xsd:attribute name=\"n\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"np\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MdxKPI\">\n    <xsd:attribute name=\"n\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"np\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"p\" type=\"ST_MdxKPIProperty\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MdxKPIProperty\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"v\"/>\n      <xsd:enumeration value=\"g\"/>\n      <xsd:enumeration value=\"s\"/>\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"w\"/>\n      <xsd:enumeration value=\"m\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MetadataStringIndex\">\n    <xsd:attribute name=\"x\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MetadataStrings\">\n    <xsd:sequence>\n      <xsd:element name=\"s\" type=\"CT_XStringElement\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:element name=\"singleXmlCells\" type=\"CT_SingleXmlCells\"/>\n  <xsd:complexType name=\"CT_SingleXmlCells\">\n    <xsd:sequence>\n      <xsd:element name=\"singleXmlCell\" type=\"CT_SingleXmlCell\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SingleXmlCell\">\n    <xsd:sequence>\n      <xsd:element name=\"xmlCellPr\" type=\"CT_XmlCellPr\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"required\"/>\n    <xsd:attribute name=\"connectionId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_XmlCellPr\">\n    <xsd:sequence>\n      <xsd:element name=\"xmlPr\" type=\"CT_XmlPr\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"uniqueName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_XmlPr\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"mapId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"xpath\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"xmlDataType\" type=\"ST_XmlDataType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:element name=\"styleSheet\" type=\"CT_Stylesheet\"/>\n  <xsd:complexType name=\"CT_Stylesheet\">\n    <xsd:sequence>\n      <xsd:element name=\"numFmts\" type=\"CT_NumFmts\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fonts\" type=\"CT_Fonts\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fills\" type=\"CT_Fills\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"borders\" type=\"CT_Borders\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cellStyleXfs\" type=\"CT_CellStyleXfs\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cellXfs\" type=\"CT_CellXfs\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cellStyles\" type=\"CT_CellStyles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"dxfs\" type=\"CT_Dxfs\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tableStyles\" type=\"CT_TableStyles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"colors\" type=\"CT_Colors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellAlignment\">\n    <xsd:attribute name=\"horizontal\" type=\"ST_HorizontalAlignment\" use=\"optional\"/>\n    <xsd:attribute name=\"vertical\" type=\"ST_VerticalAlignment\" default=\"bottom\" use=\"optional\"/>\n    <xsd:attribute name=\"textRotation\" type=\"ST_TextRotation\" use=\"optional\"/>\n    <xsd:attribute name=\"wrapText\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"indent\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"relativeIndent\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"justifyLastLine\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"shrinkToFit\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"readingOrder\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextRotation\">\n    <xsd:union>\n      <xsd:simpleType>\n        <xsd:restriction base=\"xsd:nonNegativeInteger\">\n          <xsd:maxInclusive value=\"180\"/>\n        </xsd:restriction>\n      </xsd:simpleType>\n      <xsd:simpleType>\n        <xsd:restriction base=\"xsd:nonNegativeInteger\">\n          <xsd:enumeration value=\"255\"/>\n        </xsd:restriction>\n      </xsd:simpleType>\n    </xsd:union>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BorderStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"thin\"/>\n      <xsd:enumeration value=\"medium\"/>\n      <xsd:enumeration value=\"dashed\"/>\n      <xsd:enumeration value=\"dotted\"/>\n      <xsd:enumeration value=\"thick\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"hair\"/>\n      <xsd:enumeration value=\"mediumDashed\"/>\n      <xsd:enumeration value=\"dashDot\"/>\n      <xsd:enumeration value=\"mediumDashDot\"/>\n      <xsd:enumeration value=\"dashDotDot\"/>\n      <xsd:enumeration value=\"mediumDashDotDot\"/>\n      <xsd:enumeration value=\"slantDashDot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Borders\">\n    <xsd:sequence>\n      <xsd:element name=\"border\" type=\"CT_Border\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Border\">\n    <xsd:sequence>\n      <xsd:element name=\"start\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"end\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"left\" type=\"CT_BorderPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_BorderPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"top\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bottom\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"diagonal\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"vertical\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"horizontal\" type=\"CT_BorderPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"diagonalUp\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"diagonalDown\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"outline\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BorderPr\">\n    <xsd:sequence>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"style\" type=\"ST_BorderStyle\" use=\"optional\" default=\"none\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellProtection\">\n    <xsd:attribute name=\"locked\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Fonts\">\n    <xsd:sequence>\n      <xsd:element name=\"font\" type=\"CT_Font\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Fills\">\n    <xsd:sequence>\n      <xsd:element name=\"fill\" type=\"CT_Fill\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Fill\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"1\">\n      <xsd:element name=\"patternFill\" type=\"CT_PatternFill\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gradientFill\" type=\"CT_GradientFill\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PatternFill\">\n    <xsd:sequence>\n      <xsd:element name=\"fgColor\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bgColor\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"patternType\" type=\"ST_PatternType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Color\">\n    <xsd:attribute name=\"auto\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"indexed\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"rgb\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n    <xsd:attribute name=\"theme\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"tint\" type=\"xsd:double\" use=\"optional\" default=\"0.0\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PatternType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"mediumGray\"/>\n      <xsd:enumeration value=\"darkGray\"/>\n      <xsd:enumeration value=\"lightGray\"/>\n      <xsd:enumeration value=\"darkHorizontal\"/>\n      <xsd:enumeration value=\"darkVertical\"/>\n      <xsd:enumeration value=\"darkDown\"/>\n      <xsd:enumeration value=\"darkUp\"/>\n      <xsd:enumeration value=\"darkGrid\"/>\n      <xsd:enumeration value=\"darkTrellis\"/>\n      <xsd:enumeration value=\"lightHorizontal\"/>\n      <xsd:enumeration value=\"lightVertical\"/>\n      <xsd:enumeration value=\"lightDown\"/>\n      <xsd:enumeration value=\"lightUp\"/>\n      <xsd:enumeration value=\"lightGrid\"/>\n      <xsd:enumeration value=\"lightTrellis\"/>\n      <xsd:enumeration value=\"gray125\"/>\n      <xsd:enumeration value=\"gray0625\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_GradientFill\">\n    <xsd:sequence>\n      <xsd:element name=\"stop\" type=\"CT_GradientStop\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_GradientType\" use=\"optional\" default=\"linear\"/>\n    <xsd:attribute name=\"degree\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"left\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"right\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"top\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"bottom\" type=\"xsd:double\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GradientStop\">\n    <xsd:sequence>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"position\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_GradientType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"linear\"/>\n      <xsd:enumeration value=\"path\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HorizontalAlignment\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"general\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"fill\"/>\n      <xsd:enumeration value=\"justify\"/>\n      <xsd:enumeration value=\"centerContinuous\"/>\n      <xsd:enumeration value=\"distributed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VerticalAlignment\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"justify\"/>\n      <xsd:enumeration value=\"distributed\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_NumFmts\">\n    <xsd:sequence>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumFmt\">\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"required\"/>\n    <xsd:attribute name=\"formatCode\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellStyleXfs\">\n    <xsd:sequence>\n      <xsd:element name=\"xf\" type=\"CT_Xf\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellXfs\">\n    <xsd:sequence>\n      <xsd:element name=\"xf\" type=\"CT_Xf\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Xf\">\n    <xsd:sequence>\n      <xsd:element name=\"alignment\" type=\"CT_CellAlignment\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"protection\" type=\"CT_CellProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"numFmtId\" type=\"ST_NumFmtId\" use=\"optional\"/>\n    <xsd:attribute name=\"fontId\" type=\"ST_FontId\" use=\"optional\"/>\n    <xsd:attribute name=\"fillId\" type=\"ST_FillId\" use=\"optional\"/>\n    <xsd:attribute name=\"borderId\" type=\"ST_BorderId\" use=\"optional\"/>\n    <xsd:attribute name=\"xfId\" type=\"ST_CellStyleXfId\" use=\"optional\"/>\n    <xsd:attribute name=\"quotePrefix\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"pivotButton\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"applyNumberFormat\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"applyFont\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"applyFill\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"applyBorder\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"applyAlignment\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"applyProtection\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellStyles\">\n    <xsd:sequence>\n      <xsd:element name=\"cellStyle\" type=\"CT_CellStyle\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"xfId\" type=\"ST_CellStyleXfId\" use=\"required\"/>\n    <xsd:attribute name=\"builtinId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"iLevel\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"customBuiltin\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Dxfs\">\n    <xsd:sequence>\n      <xsd:element name=\"dxf\" type=\"CT_Dxf\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Dxf\">\n    <xsd:sequence>\n      <xsd:element name=\"font\" type=\"CT_Font\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fill\" type=\"CT_Fill\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"alignment\" type=\"CT_CellAlignment\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"border\" type=\"CT_Border\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"protection\" type=\"CT_CellProtection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_NumFmtId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FontId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FillId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BorderId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CellStyleXfId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DxfId\">\n    <xsd:restriction base=\"xsd:unsignedInt\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Colors\">\n    <xsd:sequence>\n      <xsd:element name=\"indexedColors\" type=\"CT_IndexedColors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"mruColors\" type=\"CT_MRUColors\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IndexedColors\">\n    <xsd:sequence>\n      <xsd:element name=\"rgbColor\" type=\"CT_RgbColor\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MRUColors\">\n    <xsd:sequence>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RgbColor\">\n    <xsd:attribute name=\"rgb\" type=\"ST_UnsignedIntHex\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyles\">\n    <xsd:sequence>\n      <xsd:element name=\"tableStyle\" type=\"CT_TableStyle\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"defaultTableStyle\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"defaultPivotStyle\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyle\">\n    <xsd:sequence>\n      <xsd:element name=\"tableStyleElement\" type=\"CT_TableStyleElement\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"pivot\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"table\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableStyleElement\">\n    <xsd:attribute name=\"type\" type=\"ST_TableStyleType\" use=\"required\"/>\n    <xsd:attribute name=\"size\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"dxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TableStyleType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"wholeTable\"/>\n      <xsd:enumeration value=\"headerRow\"/>\n      <xsd:enumeration value=\"totalRow\"/>\n      <xsd:enumeration value=\"firstColumn\"/>\n      <xsd:enumeration value=\"lastColumn\"/>\n      <xsd:enumeration value=\"firstRowStripe\"/>\n      <xsd:enumeration value=\"secondRowStripe\"/>\n      <xsd:enumeration value=\"firstColumnStripe\"/>\n      <xsd:enumeration value=\"secondColumnStripe\"/>\n      <xsd:enumeration value=\"firstHeaderCell\"/>\n      <xsd:enumeration value=\"lastHeaderCell\"/>\n      <xsd:enumeration value=\"firstTotalCell\"/>\n      <xsd:enumeration value=\"lastTotalCell\"/>\n      <xsd:enumeration value=\"firstSubtotalColumn\"/>\n      <xsd:enumeration value=\"secondSubtotalColumn\"/>\n      <xsd:enumeration value=\"thirdSubtotalColumn\"/>\n      <xsd:enumeration value=\"firstSubtotalRow\"/>\n      <xsd:enumeration value=\"secondSubtotalRow\"/>\n      <xsd:enumeration value=\"thirdSubtotalRow\"/>\n      <xsd:enumeration value=\"blankRow\"/>\n      <xsd:enumeration value=\"firstColumnSubheading\"/>\n      <xsd:enumeration value=\"secondColumnSubheading\"/>\n      <xsd:enumeration value=\"thirdColumnSubheading\"/>\n      <xsd:enumeration value=\"firstRowSubheading\"/>\n      <xsd:enumeration value=\"secondRowSubheading\"/>\n      <xsd:enumeration value=\"thirdRowSubheading\"/>\n      <xsd:enumeration value=\"pageFieldLabels\"/>\n      <xsd:enumeration value=\"pageFieldValues\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_BooleanProperty\">\n    <xsd:attribute name=\"val\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontSize\">\n    <xsd:attribute name=\"val\" type=\"xsd:double\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IntProperty\">\n    <xsd:attribute name=\"val\" type=\"xsd:int\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontName\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VerticalAlignFontProperty\">\n    <xsd:attribute name=\"val\" type=\"s:ST_VerticalAlignRun\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontScheme\">\n    <xsd:attribute name=\"val\" type=\"ST_FontScheme\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FontScheme\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"major\"/>\n      <xsd:enumeration value=\"minor\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_UnderlineProperty\">\n    <xsd:attribute name=\"val\" type=\"ST_UnderlineValues\" use=\"optional\" default=\"single\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_UnderlineValues\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"singleAccounting\"/>\n      <xsd:enumeration value=\"doubleAccounting\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Font\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"name\" type=\"CT_FontName\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"charset\" type=\"CT_IntProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"family\" type=\"CT_FontFamily\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"b\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"i\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"strike\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"outline\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shadow\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"condense\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extend\" type=\"CT_BooleanProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sz\" type=\"CT_FontSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"u\" type=\"CT_UnderlineProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"vertAlign\" type=\"CT_VerticalAlignFontProperty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"scheme\" type=\"CT_FontScheme\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontFamily\">\n    <xsd:attribute name=\"val\" type=\"ST_FontFamily\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FontFamily\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"14\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:attributeGroup name=\"AG_AutoFormat\">\n    <xsd:attribute name=\"autoFormatId\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"applyNumberFormats\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"applyBorderFormats\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"applyFontFormats\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"applyPatternFormats\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"applyAlignmentFormats\" type=\"xsd:boolean\"/>\n    <xsd:attribute name=\"applyWidthHeightFormats\" type=\"xsd:boolean\"/>\n  </xsd:attributeGroup>\n  <xsd:element name=\"externalLink\" type=\"CT_ExternalLink\"/>\n  <xsd:complexType name=\"CT_ExternalLink\">\n    <xsd:sequence>\n      <xsd:choice>\n        <xsd:element name=\"externalBook\" type=\"CT_ExternalBook\" minOccurs=\"0\" maxOccurs=\"1\"/>\n        <xsd:element name=\"ddeLink\" type=\"CT_DdeLink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n        <xsd:element name=\"oleLink\" type=\"CT_OleLink\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      </xsd:choice>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalBook\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetNames\" type=\"CT_ExternalSheetNames\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"definedNames\" type=\"CT_ExternalDefinedNames\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheetDataSet\" type=\"CT_ExternalSheetDataSet\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalSheetNames\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetName\" minOccurs=\"1\" maxOccurs=\"unbounded\" type=\"CT_ExternalSheetName\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalSheetName\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalDefinedNames\">\n    <xsd:sequence>\n      <xsd:element name=\"definedName\" type=\"CT_ExternalDefinedName\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalDefinedName\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"refersTo\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalSheetDataSet\">\n    <xsd:sequence>\n      <xsd:element name=\"sheetData\" type=\"CT_ExternalSheetData\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalSheetData\">\n    <xsd:sequence>\n      <xsd:element name=\"row\" type=\"CT_ExternalRow\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"refreshError\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalRow\">\n    <xsd:sequence>\n      <xsd:element name=\"cell\" type=\"CT_ExternalCell\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalCell\">\n    <xsd:sequence>\n      <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"optional\"/>\n    <xsd:attribute name=\"t\" type=\"ST_CellType\" use=\"optional\" default=\"n\"/>\n    <xsd:attribute name=\"vm\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DdeLink\">\n    <xsd:sequence>\n      <xsd:element name=\"ddeItems\" type=\"CT_DdeItems\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ddeService\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"ddeTopic\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DdeItems\">\n    <xsd:sequence>\n      <xsd:element name=\"ddeItem\" type=\"CT_DdeItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DdeItem\">\n    <xsd:sequence>\n      <xsd:element name=\"values\" type=\"CT_DdeValues\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" default=\"0\"/>\n    <xsd:attribute name=\"ole\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"advise\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"preferPic\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DdeValues\">\n    <xsd:sequence>\n      <xsd:element name=\"value\" minOccurs=\"1\" maxOccurs=\"unbounded\" type=\"CT_DdeValue\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rows\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"cols\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DdeValue\">\n    <xsd:sequence>\n      <xsd:element name=\"val\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"t\" type=\"ST_DdeValueType\" use=\"optional\" default=\"n\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DdeValueType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"nil\"/>\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"n\"/>\n      <xsd:enumeration value=\"e\"/>\n      <xsd:enumeration value=\"str\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_OleLink\">\n    <xsd:sequence>\n      <xsd:element name=\"oleItems\" type=\"CT_OleItems\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"progId\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleItems\">\n    <xsd:sequence>\n      <xsd:element name=\"oleItem\" type=\"CT_OleItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleItem\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"icon\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"advise\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"preferPic\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:element name=\"table\" type=\"CT_Table\"/>\n  <xsd:complexType name=\"CT_Table\">\n    <xsd:sequence>\n      <xsd:element name=\"autoFilter\" type=\"CT_AutoFilter\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sortState\" type=\"CT_SortState\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tableColumns\" type=\"CT_TableColumns\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tableStyleInfo\" type=\"CT_TableStyleInfo\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"displayName\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"comment\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n    <xsd:attribute name=\"tableType\" type=\"ST_TableType\" use=\"optional\" default=\"worksheet\"/>\n    <xsd:attribute name=\"headerRowCount\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"insertRow\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"insertRowShift\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"totalsRowCount\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"totalsRowShown\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"published\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"headerRowDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"dataDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"totalsRowDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"headerRowBorderDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"tableBorderDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"totalsRowBorderDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"headerRowCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"dataCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"totalsRowCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"connectionId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TableType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"worksheet\"/>\n      <xsd:enumeration value=\"xml\"/>\n      <xsd:enumeration value=\"queryTable\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TableStyleInfo\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"showFirstColumn\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"showLastColumn\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"showRowStripes\" type=\"xsd:boolean\" use=\"optional\"/>\n    <xsd:attribute name=\"showColumnStripes\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableColumns\">\n    <xsd:sequence>\n      <xsd:element name=\"tableColumn\" type=\"CT_TableColumn\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableColumn\">\n    <xsd:sequence>\n      <xsd:element name=\"calculatedColumnFormula\" type=\"CT_TableFormula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"totalsRowFormula\" type=\"CT_TableFormula\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"xmlColumnPr\" type=\"CT_XmlColumnPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"uniqueName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"totalsRowFunction\" type=\"ST_TotalsRowFunction\" use=\"optional\"\n      default=\"none\"/>\n    <xsd:attribute name=\"totalsRowLabel\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"queryTableFieldId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"headerRowDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"dataDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"totalsRowDxfId\" type=\"ST_DxfId\" use=\"optional\"/>\n    <xsd:attribute name=\"headerRowCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"dataCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"totalsRowCellStyle\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TableFormula\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"ST_Formula\">\n        <xsd:attribute name=\"array\" type=\"xsd:boolean\" default=\"false\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TotalsRowFunction\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"sum\"/>\n      <xsd:enumeration value=\"min\"/>\n      <xsd:enumeration value=\"max\"/>\n      <xsd:enumeration value=\"average\"/>\n      <xsd:enumeration value=\"count\"/>\n      <xsd:enumeration value=\"countNums\"/>\n      <xsd:enumeration value=\"stdDev\"/>\n      <xsd:enumeration value=\"var\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_XmlColumnPr\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"mapId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"xpath\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"denormalized\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"xmlDataType\" type=\"ST_XmlDataType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_XmlDataType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:element name=\"volTypes\" type=\"CT_VolTypes\"/>\n  <xsd:complexType name=\"CT_VolTypes\">\n    <xsd:sequence>\n      <xsd:element name=\"volType\" type=\"CT_VolType\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VolType\">\n    <xsd:sequence>\n      <xsd:element name=\"main\" type=\"CT_VolMain\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_VolDepType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VolMain\">\n    <xsd:sequence>\n      <xsd:element name=\"tp\" type=\"CT_VolTopic\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"first\" type=\"s:ST_Xstring\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VolTopic\">\n    <xsd:sequence>\n      <xsd:element name=\"v\" type=\"s:ST_Xstring\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"stp\" type=\"s:ST_Xstring\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"tr\" type=\"CT_VolTopicRef\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"t\" type=\"ST_VolValueType\" use=\"optional\" default=\"n\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VolTopicRef\">\n    <xsd:attribute name=\"r\" type=\"ST_CellRef\" use=\"required\"/>\n    <xsd:attribute name=\"s\" type=\"xsd:unsignedInt\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_VolDepType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"realTimeData\"/>\n      <xsd:enumeration value=\"olapFunctions\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VolValueType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"b\"/>\n      <xsd:enumeration value=\"n\"/>\n      <xsd:enumeration value=\"e\"/>\n      <xsd:enumeration value=\"s\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:element name=\"workbook\" type=\"CT_Workbook\"/>\n  <xsd:complexType name=\"CT_Workbook\">\n    <xsd:sequence>\n      <xsd:element name=\"fileVersion\" type=\"CT_FileVersion\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fileSharing\" type=\"CT_FileSharing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"workbookPr\" type=\"CT_WorkbookPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"workbookProtection\" type=\"CT_WorkbookProtection\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"bookViews\" type=\"CT_BookViews\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sheets\" type=\"CT_Sheets\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"functionGroups\" type=\"CT_FunctionGroups\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"externalReferences\" type=\"CT_ExternalReferences\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"definedNames\" type=\"CT_DefinedNames\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"calcPr\" type=\"CT_CalcPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"oleSize\" type=\"CT_OleSize\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"customWorkbookViews\" type=\"CT_CustomWorkbookViews\" minOccurs=\"0\"\n        maxOccurs=\"1\"/>\n      <xsd:element name=\"pivotCaches\" type=\"CT_PivotCaches\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smartTagPr\" type=\"CT_SmartTagPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"smartTagTypes\" type=\"CT_SmartTagTypes\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"webPublishing\" type=\"CT_WebPublishing\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"fileRecoveryPr\" type=\"CT_FileRecoveryPr\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"webPublishObjects\" type=\"CT_WebPublishObjects\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"conformance\" type=\"s:ST_ConformanceClass\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FileVersion\">\n    <xsd:attribute name=\"appName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lastEdited\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lowestEdited\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"rupBuild\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"codeName\" type=\"s:ST_Guid\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BookViews\">\n    <xsd:sequence>\n      <xsd:element name=\"workbookView\" type=\"CT_BookView\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BookView\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" type=\"CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"visibility\" type=\"ST_Visibility\" use=\"optional\" default=\"visible\"/>\n    <xsd:attribute name=\"minimized\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showHorizontalScroll\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showVerticalScroll\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showSheetTabs\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"xWindow\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"yWindow\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"windowWidth\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"windowHeight\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"tabRatio\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"firstSheet\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"activeTab\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"autoFilterDateGrouping\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Visibility\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"visible\"/>\n      <xsd:enumeration value=\"hidden\"/>\n      <xsd:enumeration value=\"veryHidden\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_CustomWorkbookViews\">\n    <xsd:sequence>\n      <xsd:element name=\"customWorkbookView\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n        type=\"CT_CustomWorkbookView\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomWorkbookView\">\n    <xsd:sequence>\n      <xsd:element name=\"extLst\" minOccurs=\"0\" type=\"CT_ExtensionList\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"guid\" type=\"s:ST_Guid\" use=\"required\"/>\n    <xsd:attribute name=\"autoUpdate\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"mergeInterval\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"changesSavedWin\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"onlySync\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"personalView\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"includePrintSettings\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"includeHiddenRowCol\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"maximized\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"minimized\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showHorizontalScroll\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showVerticalScroll\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showSheetTabs\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"xWindow\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"yWindow\" type=\"xsd:int\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"windowWidth\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"windowHeight\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"tabRatio\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"600\"/>\n    <xsd:attribute name=\"activeSheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"showFormulaBar\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showStatusbar\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"showComments\" type=\"ST_Comments\" use=\"optional\" default=\"commIndicator\"/>\n    <xsd:attribute name=\"showObjects\" type=\"ST_Objects\" use=\"optional\" default=\"all\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Comments\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"commNone\"/>\n      <xsd:enumeration value=\"commIndicator\"/>\n      <xsd:enumeration value=\"commIndAndComment\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Objects\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"all\"/>\n      <xsd:enumeration value=\"placeholders\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Sheets\">\n    <xsd:sequence>\n      <xsd:element name=\"sheet\" type=\"CT_Sheet\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Sheet\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"sheetId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"state\" type=\"ST_SheetState\" use=\"optional\" default=\"visible\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SheetState\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"visible\"/>\n      <xsd:enumeration value=\"hidden\"/>\n      <xsd:enumeration value=\"veryHidden\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WorkbookPr\">\n    <xsd:attribute name=\"date1904\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showObjects\" type=\"ST_Objects\" use=\"optional\" default=\"all\"/>\n    <xsd:attribute name=\"showBorderUnselectedTables\" type=\"xsd:boolean\" use=\"optional\"\n      default=\"true\"/>\n    <xsd:attribute name=\"filterPrivacy\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"promptedSolutions\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showInkAnnotation\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"backupFile\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"saveExternalLinkValues\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"updateLinks\" type=\"ST_UpdateLinks\" use=\"optional\" default=\"userSet\"/>\n    <xsd:attribute name=\"codeName\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"hidePivotFieldList\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"showPivotChartFilter\" type=\"xsd:boolean\" default=\"false\"/>\n    <xsd:attribute name=\"allowRefreshQuery\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"publishItems\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"checkCompatibility\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"autoCompressPictures\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"refreshAllConnections\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"defaultThemeVersion\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_UpdateLinks\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"userSet\"/>\n      <xsd:enumeration value=\"never\"/>\n      <xsd:enumeration value=\"always\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SmartTagPr\">\n    <xsd:attribute name=\"embed\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"show\" type=\"ST_SmartTagShow\" use=\"optional\" default=\"all\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SmartTagShow\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"all\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"noIndicator\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SmartTagTypes\">\n    <xsd:sequence>\n      <xsd:element name=\"smartTagType\" type=\"CT_SmartTagType\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SmartTagType\">\n    <xsd:attribute name=\"namespaceUri\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"url\" type=\"s:ST_Xstring\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FileRecoveryPr\">\n    <xsd:attribute name=\"autoRecover\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"crashSave\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"dataExtractLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"repairLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalcPr\">\n    <xsd:attribute name=\"calcId\" type=\"xsd:unsignedInt\"/>\n    <xsd:attribute name=\"calcMode\" type=\"ST_CalcMode\" use=\"optional\" default=\"auto\"/>\n    <xsd:attribute name=\"fullCalcOnLoad\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"refMode\" type=\"ST_RefMode\" use=\"optional\" default=\"A1\"/>\n    <xsd:attribute name=\"iterate\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"iterateCount\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"100\"/>\n    <xsd:attribute name=\"iterateDelta\" type=\"xsd:double\" use=\"optional\" default=\"0.001\"/>\n    <xsd:attribute name=\"fullPrecision\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"calcCompleted\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"calcOnSave\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"concurrentCalc\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"concurrentManualCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"forceFullCalc\" type=\"xsd:boolean\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CalcMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"manual\"/>\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"autoNoTable\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_RefMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"A1\"/>\n      <xsd:enumeration value=\"R1C1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DefinedNames\">\n    <xsd:sequence>\n      <xsd:element name=\"definedName\" type=\"CT_DefinedName\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DefinedName\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"ST_Formula\">\n        <xsd:attribute name=\"name\" type=\"s:ST_Xstring\" use=\"required\"/>\n        <xsd:attribute name=\"comment\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"customMenu\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"description\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"help\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"statusBar\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"localSheetId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n        <xsd:attribute name=\"hidden\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"function\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"vbProcedure\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"xlm\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"functionGroupId\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n        <xsd:attribute name=\"shortcutKey\" type=\"s:ST_Xstring\" use=\"optional\"/>\n        <xsd:attribute name=\"publishToServer\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n        <xsd:attribute name=\"workbookParameter\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalReferences\">\n    <xsd:sequence>\n      <xsd:element name=\"externalReference\" type=\"CT_ExternalReference\" minOccurs=\"1\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ExternalReference\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SheetBackgroundPicture\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotCaches\">\n    <xsd:sequence>\n      <xsd:element name=\"pivotCache\" type=\"CT_PivotCache\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PivotCache\">\n    <xsd:attribute name=\"cacheId\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FileSharing\">\n    <xsd:attribute name=\"readOnlyRecommended\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"userName\" type=\"s:ST_Xstring\"/>\n    <xsd:attribute name=\"reservationPassword\" type=\"ST_UnsignedShortHex\"/>\n    <xsd:attribute name=\"algorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OleSize\">\n    <xsd:attribute name=\"ref\" type=\"ST_Ref\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WorkbookProtection\">\n    <xsd:attribute name=\"workbookPassword\" type=\"ST_UnsignedShortHex\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookPasswordCharacterSet\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"revisionsPassword\" type=\"ST_UnsignedShortHex\" use=\"optional\"/>\n    <xsd:attribute name=\"revisionsPasswordCharacterSet\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lockStructure\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"lockWindows\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"lockRevision\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"revisionsAlgorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"revisionsHashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"revisionsSaltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"revisionsSpinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookAlgorithmName\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookHashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookSaltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"workbookSpinCount\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WebPublishing\">\n    <xsd:attribute name=\"css\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"thicket\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"longFileNames\" type=\"xsd:boolean\" use=\"optional\" default=\"true\"/>\n    <xsd:attribute name=\"vml\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"allowPng\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"targetScreenSize\" type=\"ST_TargetScreenSize\" use=\"optional\"\n      default=\"800x600\"/>\n    <xsd:attribute name=\"dpi\" type=\"xsd:unsignedInt\" use=\"optional\" default=\"96\"/>\n    <xsd:attribute name=\"codePage\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n    <xsd:attribute name=\"characterSet\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TargetScreenSize\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"544x376\"/>\n      <xsd:enumeration value=\"640x480\"/>\n      <xsd:enumeration value=\"720x512\"/>\n      <xsd:enumeration value=\"800x600\"/>\n      <xsd:enumeration value=\"1024x768\"/>\n      <xsd:enumeration value=\"1152x882\"/>\n      <xsd:enumeration value=\"1152x900\"/>\n      <xsd:enumeration value=\"1280x1024\"/>\n      <xsd:enumeration value=\"1600x1200\"/>\n      <xsd:enumeration value=\"1800x1440\"/>\n      <xsd:enumeration value=\"1920x1200\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FunctionGroups\">\n    <xsd:sequence maxOccurs=\"unbounded\">\n      <xsd:element name=\"functionGroup\" type=\"CT_FunctionGroup\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"builtInGroupCount\" type=\"xsd:unsignedInt\" default=\"16\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FunctionGroup\">\n    <xsd:attribute name=\"name\" type=\"s:ST_Xstring\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WebPublishObjects\">\n    <xsd:sequence>\n      <xsd:element name=\"webPublishObject\" type=\"CT_WebPublishObject\" minOccurs=\"1\"\n        maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"count\" type=\"xsd:unsignedInt\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WebPublishObject\">\n    <xsd:attribute name=\"id\" type=\"xsd:unsignedInt\" use=\"required\"/>\n    <xsd:attribute name=\"divId\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"sourceObject\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"destinationFile\" type=\"s:ST_Xstring\" use=\"required\"/>\n    <xsd:attribute name=\"title\" type=\"s:ST_Xstring\" use=\"optional\"/>\n    <xsd:attribute name=\"autoRepublish\" type=\"xsd:boolean\" use=\"optional\" default=\"false\"/>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns=\"urn:schemas-microsoft-com:vml\"\n  xmlns:pvml=\"urn:schemas-microsoft-com:office:powerpoint\"\n  xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n  xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n  xmlns:w10=\"urn:schemas-microsoft-com:office:word\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:x=\"urn:schemas-microsoft-com:office:excel\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"urn:schemas-microsoft-com:vml\" elementFormDefault=\"qualified\"\n  attributeFormDefault=\"unqualified\">\n  <xsd:import namespace=\"urn:schemas-microsoft-com:office:office\"\n    schemaLocation=\"vml-officeDrawing.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n    schemaLocation=\"wml.xsd\"/>\n  <xsd:import namespace=\"urn:schemas-microsoft-com:office:word\"\n    schemaLocation=\"vml-wordprocessingDrawing.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"urn:schemas-microsoft-com:office:excel\"\n    schemaLocation=\"vml-spreadsheetDrawing.xsd\"/>\n  <xsd:import namespace=\"urn:schemas-microsoft-com:office:powerpoint\"\n    schemaLocation=\"vml-presentationDrawing.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:attributeGroup name=\"AG_Id\">\n    <xsd:attribute name=\"id\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Style\">\n    <xsd:attribute name=\"style\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Type\">\n    <xsd:attribute name=\"type\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Adj\">\n    <xsd:attribute name=\"adj\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Path\">\n    <xsd:attribute name=\"path\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Fill\">\n    <xsd:attribute name=\"filled\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"fillcolor\" type=\"s:ST_ColorType\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Chromakey\">\n    <xsd:attribute name=\"chromakey\" type=\"s:ST_ColorType\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_Ext\">\n    <xsd:attribute name=\"ext\" form=\"qualified\" type=\"ST_Ext\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_CoreAttributes\">\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_Style\"/>\n    <xsd:attribute name=\"href\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"target\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"class\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"title\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"alt\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"coordsize\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"coordorigin\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"wrapcoords\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"print\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_ShapeAttributes\">\n    <xsd:attributeGroup ref=\"AG_Chromakey\"/>\n    <xsd:attributeGroup ref=\"AG_Fill\"/>\n    <xsd:attribute name=\"opacity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"stroked\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"strokecolor\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"strokeweight\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"insetpen\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_OfficeCoreAttributes\">\n    <xsd:attribute ref=\"o:spid\"/>\n    <xsd:attribute ref=\"o:oned\"/>\n    <xsd:attribute ref=\"o:regroupid\"/>\n    <xsd:attribute ref=\"o:doubleclicknotify\"/>\n    <xsd:attribute ref=\"o:button\"/>\n    <xsd:attribute ref=\"o:userhidden\"/>\n    <xsd:attribute ref=\"o:bullet\"/>\n    <xsd:attribute ref=\"o:hr\"/>\n    <xsd:attribute ref=\"o:hrstd\"/>\n    <xsd:attribute ref=\"o:hrnoshade\"/>\n    <xsd:attribute ref=\"o:hrpct\"/>\n    <xsd:attribute ref=\"o:hralign\"/>\n    <xsd:attribute ref=\"o:allowincell\"/>\n    <xsd:attribute ref=\"o:allowoverlap\"/>\n    <xsd:attribute ref=\"o:userdrawn\"/>\n    <xsd:attribute ref=\"o:bordertopcolor\"/>\n    <xsd:attribute ref=\"o:borderleftcolor\"/>\n    <xsd:attribute ref=\"o:borderbottomcolor\"/>\n    <xsd:attribute ref=\"o:borderrightcolor\"/>\n    <xsd:attribute ref=\"o:dgmlayout\"/>\n    <xsd:attribute ref=\"o:dgmnodekind\"/>\n    <xsd:attribute ref=\"o:dgmlayoutmru\"/>\n    <xsd:attribute ref=\"o:insetmode\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_OfficeShapeAttributes\">\n    <xsd:attribute ref=\"o:spt\"/>\n    <xsd:attribute ref=\"o:connectortype\"/>\n    <xsd:attribute ref=\"o:bwmode\"/>\n    <xsd:attribute ref=\"o:bwpure\"/>\n    <xsd:attribute ref=\"o:bwnormal\"/>\n    <xsd:attribute ref=\"o:forcedash\"/>\n    <xsd:attribute ref=\"o:oleicon\"/>\n    <xsd:attribute ref=\"o:ole\"/>\n    <xsd:attribute ref=\"o:preferrelative\"/>\n    <xsd:attribute ref=\"o:cliptowrap\"/>\n    <xsd:attribute ref=\"o:clip\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_AllCoreAttributes\">\n    <xsd:attributeGroup ref=\"AG_CoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_OfficeCoreAttributes\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_AllShapeAttributes\">\n    <xsd:attributeGroup ref=\"AG_ShapeAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_OfficeShapeAttributes\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_ImageAttributes\">\n    <xsd:attribute name=\"src\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"cropleft\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"croptop\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"cropright\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"cropbottom\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"gain\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"blacklevel\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"gamma\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"grayscale\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"bilevel\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_StrokeAttributes\">\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"weight\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"opacity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"linestyle\" type=\"ST_StrokeLineStyle\" use=\"optional\"/>\n    <xsd:attribute name=\"miterlimit\" type=\"xsd:decimal\" use=\"optional\"/>\n    <xsd:attribute name=\"joinstyle\" type=\"ST_StrokeJoinStyle\" use=\"optional\"/>\n    <xsd:attribute name=\"endcap\" type=\"ST_StrokeEndCap\" use=\"optional\"/>\n    <xsd:attribute name=\"dashstyle\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"filltype\" type=\"ST_FillType\" use=\"optional\"/>\n    <xsd:attribute name=\"src\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"imageaspect\" type=\"ST_ImageAspect\" use=\"optional\"/>\n    <xsd:attribute name=\"imagesize\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"imagealignshape\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"color2\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrow\" type=\"ST_StrokeArrowType\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrowwidth\" type=\"ST_StrokeArrowWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrowlength\" type=\"ST_StrokeArrowLength\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrow\" type=\"ST_StrokeArrowType\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrowwidth\" type=\"ST_StrokeArrowWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrowlength\" type=\"ST_StrokeArrowLength\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:href\"/>\n    <xsd:attribute ref=\"o:althref\"/>\n    <xsd:attribute ref=\"o:title\"/>\n    <xsd:attribute ref=\"o:forcedash\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"insetpen\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:relid\"/>\n  </xsd:attributeGroup>\n  <xsd:group name=\"EG_ShapeElements\">\n    <xsd:choice>\n      <xsd:element ref=\"path\"/>\n      <xsd:element ref=\"formulas\"/>\n      <xsd:element ref=\"handles\"/>\n      <xsd:element ref=\"fill\"/>\n      <xsd:element ref=\"stroke\"/>\n      <xsd:element ref=\"shadow\"/>\n      <xsd:element ref=\"textbox\"/>\n      <xsd:element ref=\"textpath\"/>\n      <xsd:element ref=\"imagedata\"/>\n      <xsd:element ref=\"o:skew\"/>\n      <xsd:element ref=\"o:extrusion\"/>\n      <xsd:element ref=\"o:callout\"/>\n      <xsd:element ref=\"o:lock\"/>\n      <xsd:element ref=\"o:clippath\"/>\n      <xsd:element ref=\"o:signatureline\"/>\n      <xsd:element ref=\"w10:wrap\"/>\n      <xsd:element ref=\"w10:anchorlock\"/>\n      <xsd:element ref=\"w10:bordertop\"/>\n      <xsd:element ref=\"w10:borderbottom\"/>\n      <xsd:element ref=\"w10:borderleft\"/>\n      <xsd:element ref=\"w10:borderright\"/>\n      <xsd:element ref=\"x:ClientData\" minOccurs=\"0\"/>\n      <xsd:element ref=\"pvml:textdata\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:element name=\"shape\" type=\"CT_Shape\"/>\n  <xsd:element name=\"shapetype\" type=\"CT_Shapetype\"/>\n  <xsd:element name=\"group\" type=\"CT_Group\"/>\n  <xsd:element name=\"background\" type=\"CT_Background\"/>\n  <xsd:complexType name=\"CT_Shape\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\"/>\n      <xsd:element ref=\"o:ink\"/>\n      <xsd:element ref=\"pvml:iscomment\"/>\n      <xsd:element ref=\"o:equationxml\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_Type\"/>\n    <xsd:attributeGroup ref=\"AG_Adj\"/>\n    <xsd:attributeGroup ref=\"AG_Path\"/>\n    <xsd:attribute ref=\"o:gfxdata\"/>\n    <xsd:attribute name=\"equationxml\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shapetype\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element ref=\"o:complex\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_Adj\"/>\n    <xsd:attributeGroup ref=\"AG_Path\"/>\n    <xsd:attribute ref=\"o:master\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Group\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\"/>\n      <xsd:element ref=\"group\"/>\n      <xsd:element ref=\"shape\"/>\n      <xsd:element ref=\"shapetype\"/>\n      <xsd:element ref=\"arc\"/>\n      <xsd:element ref=\"curve\"/>\n      <xsd:element ref=\"image\"/>\n      <xsd:element ref=\"line\"/>\n      <xsd:element ref=\"oval\"/>\n      <xsd:element ref=\"polyline\"/>\n      <xsd:element ref=\"rect\"/>\n      <xsd:element ref=\"roundrect\"/>\n      <xsd:element ref=\"o:diagram\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_Fill\"/>\n    <xsd:attribute name=\"editas\" type=\"ST_EditAs\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:tableproperties\"/>\n    <xsd:attribute ref=\"o:tablelimits\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Background\">\n    <xsd:sequence>\n      <xsd:element ref=\"fill\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_Fill\"/>\n    <xsd:attribute ref=\"o:bwmode\"/>\n    <xsd:attribute ref=\"o:bwpure\"/>\n    <xsd:attribute ref=\"o:bwnormal\"/>\n    <xsd:attribute ref=\"o:targetscreensize\"/>\n  </xsd:complexType>\n  <xsd:element name=\"fill\" type=\"CT_Fill\"/>\n  <xsd:element name=\"formulas\" type=\"CT_Formulas\"/>\n  <xsd:element name=\"handles\" type=\"CT_Handles\"/>\n  <xsd:element name=\"imagedata\" type=\"CT_ImageData\"/>\n  <xsd:element name=\"path\" type=\"CT_Path\"/>\n  <xsd:element name=\"textbox\" type=\"CT_Textbox\"/>\n  <xsd:element name=\"shadow\" type=\"CT_Shadow\"/>\n  <xsd:element name=\"stroke\" type=\"CT_Stroke\"/>\n  <xsd:element name=\"textpath\" type=\"CT_TextPath\"/>\n  <xsd:complexType name=\"CT_Fill\">\n    <xsd:sequence>\n      <xsd:element ref=\"o:fill\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attribute name=\"type\" type=\"ST_FillType\" use=\"optional\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"opacity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"color2\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"src\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:href\"/>\n    <xsd:attribute ref=\"o:althref\"/>\n    <xsd:attribute name=\"size\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"origin\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"position\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"aspect\" type=\"ST_ImageAspect\" use=\"optional\"/>\n    <xsd:attribute name=\"colors\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"angle\" type=\"xsd:decimal\" use=\"optional\"/>\n    <xsd:attribute name=\"alignshape\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"focus\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"focussize\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"focusposition\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"method\" type=\"ST_FillMethod\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:detectmouseclick\"/>\n    <xsd:attribute ref=\"o:title\"/>\n    <xsd:attribute ref=\"o:opacity2\"/>\n    <xsd:attribute name=\"recolor\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"rotate\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:relid\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Formulas\">\n    <xsd:sequence>\n      <xsd:element name=\"f\" type=\"CT_F\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_F\">\n    <xsd:attribute name=\"eqn\" type=\"xsd:string\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Handles\">\n    <xsd:sequence>\n      <xsd:element name=\"h\" type=\"CT_H\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_H\">\n    <xsd:attribute name=\"position\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"polar\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"map\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"invx\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"invy\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"switch\" type=\"s:ST_TrueFalseBlank\"/>\n    <xsd:attribute name=\"xrange\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"yrange\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"radiusrange\" type=\"xsd:string\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ImageData\">\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_ImageAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_Chromakey\"/>\n    <xsd:attribute name=\"embosscolor\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"recolortarget\" type=\"s:ST_ColorType\"/>\n    <xsd:attribute ref=\"o:href\"/>\n    <xsd:attribute ref=\"o:althref\"/>\n    <xsd:attribute ref=\"o:title\"/>\n    <xsd:attribute ref=\"o:oleid\"/>\n    <xsd:attribute ref=\"o:detectmouseclick\"/>\n    <xsd:attribute ref=\"o:movie\"/>\n    <xsd:attribute ref=\"o:relid\"/>\n    <xsd:attribute ref=\"r:id\"/>\n    <xsd:attribute ref=\"r:pict\"/>\n    <xsd:attribute ref=\"r:href\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Path\">\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attribute name=\"v\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"limo\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"textboxrect\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fillok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"strokeok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"shadowok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"arrowok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"gradientshapeok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"textpathok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"insetpenok\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:connecttype\"/>\n    <xsd:attribute ref=\"o:connectlocs\"/>\n    <xsd:attribute ref=\"o:connectangles\"/>\n    <xsd:attribute ref=\"o:extrusionok\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Shadow\">\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"type\" type=\"ST_ShadowType\" use=\"optional\"/>\n    <xsd:attribute name=\"obscured\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"opacity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"offset\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"color2\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"offset2\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"origin\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"matrix\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Stroke\">\n    <xsd:sequence>\n      <xsd:element ref=\"o:left\" minOccurs=\"0\"/>\n      <xsd:element ref=\"o:top\" minOccurs=\"0\"/>\n      <xsd:element ref=\"o:right\" minOccurs=\"0\"/>\n      <xsd:element ref=\"o:bottom\" minOccurs=\"0\"/>\n      <xsd:element ref=\"o:column\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_StrokeAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Textbox\">\n    <xsd:choice>\n      <xsd:element ref=\"w:txbxContent\" minOccurs=\"0\"/>\n      <xsd:any namespace=\"##local\" processContents=\"skip\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_Style\"/>\n    <xsd:attribute name=\"inset\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute ref=\"o:singleclick\"/>\n    <xsd:attribute ref=\"o:insetmode\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TextPath\">\n    <xsd:attributeGroup ref=\"AG_Id\"/>\n    <xsd:attributeGroup ref=\"AG_Style\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"fitshape\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"fitpath\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"trim\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"xscale\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"string\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"arc\" type=\"CT_Arc\"/>\n  <xsd:element name=\"curve\" type=\"CT_Curve\"/>\n  <xsd:element name=\"image\" type=\"CT_Image\"/>\n  <xsd:element name=\"line\" type=\"CT_Line\"/>\n  <xsd:element name=\"oval\" type=\"CT_Oval\"/>\n  <xsd:element name=\"polyline\" type=\"CT_PolyLine\"/>\n  <xsd:element name=\"rect\" type=\"CT_Rect\"/>\n  <xsd:element name=\"roundrect\" type=\"CT_RoundRect\"/>\n  <xsd:complexType name=\"CT_Arc\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attribute name=\"startAngle\" type=\"xsd:decimal\" use=\"optional\"/>\n    <xsd:attribute name=\"endAngle\" type=\"xsd:decimal\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Curve\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attribute name=\"from\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"control1\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"control2\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"to\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Image\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_ImageAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Line\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attribute name=\"from\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"to\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Oval\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PolyLine\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\"/>\n      <xsd:element ref=\"o:ink\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attribute name=\"points\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rect\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RoundRect\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:group ref=\"EG_ShapeElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attributeGroup ref=\"AG_AllCoreAttributes\"/>\n    <xsd:attributeGroup ref=\"AG_AllShapeAttributes\"/>\n    <xsd:attribute name=\"arcsize\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Ext\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"view\"/>\n      <xsd:enumeration value=\"edit\"/>\n      <xsd:enumeration value=\"backwardCompatible\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FillType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"gradient\"/>\n      <xsd:enumeration value=\"gradientRadial\"/>\n      <xsd:enumeration value=\"tile\"/>\n      <xsd:enumeration value=\"pattern\"/>\n      <xsd:enumeration value=\"frame\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FillMethod\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"linear\"/>\n      <xsd:enumeration value=\"sigma\"/>\n      <xsd:enumeration value=\"any\"/>\n      <xsd:enumeration value=\"linear sigma\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ShadowType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"emboss\"/>\n      <xsd:enumeration value=\"perspective\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeLineStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"thinThin\"/>\n      <xsd:enumeration value=\"thinThick\"/>\n      <xsd:enumeration value=\"thickThin\"/>\n      <xsd:enumeration value=\"thickBetweenThin\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeJoinStyle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"round\"/>\n      <xsd:enumeration value=\"bevel\"/>\n      <xsd:enumeration value=\"miter\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeEndCap\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"flat\"/>\n      <xsd:enumeration value=\"square\"/>\n      <xsd:enumeration value=\"round\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeArrowLength\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"short\"/>\n      <xsd:enumeration value=\"medium\"/>\n      <xsd:enumeration value=\"long\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeArrowWidth\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"narrow\"/>\n      <xsd:enumeration value=\"medium\"/>\n      <xsd:enumeration value=\"wide\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_StrokeArrowType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"block\"/>\n      <xsd:enumeration value=\"classic\"/>\n      <xsd:enumeration value=\"oval\"/>\n      <xsd:enumeration value=\"diamond\"/>\n      <xsd:enumeration value=\"open\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ImageAspect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"ignore\"/>\n      <xsd:enumeration value=\"atMost\"/>\n      <xsd:enumeration value=\"atLeast\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_EditAs\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"canvas\"/>\n      <xsd:enumeration value=\"orgchart\"/>\n      <xsd:enumeration value=\"radial\"/>\n      <xsd:enumeration value=\"cycle\"/>\n      <xsd:enumeration value=\"stacked\"/>\n      <xsd:enumeration value=\"venn\"/>\n      <xsd:enumeration value=\"bullseye\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"urn:schemas-microsoft-com:office:office\" xmlns:v=\"urn:schemas-microsoft-com:vml\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"urn:schemas-microsoft-com:office:office\" elementFormDefault=\"qualified\"\n  attributeFormDefault=\"unqualified\">\n  <xsd:import namespace=\"urn:schemas-microsoft-com:vml\" schemaLocation=\"vml-main.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:attribute name=\"bwmode\" type=\"ST_BWMode\"/>\n  <xsd:attribute name=\"bwpure\" type=\"ST_BWMode\"/>\n  <xsd:attribute name=\"bwnormal\" type=\"ST_BWMode\"/>\n  <xsd:attribute name=\"targetscreensize\" type=\"ST_ScreenSize\"/>\n  <xsd:attribute name=\"insetmode\" type=\"ST_InsetMode\" default=\"custom\"/>\n  <xsd:attribute name=\"spt\" type=\"xsd:float\"/>\n  <xsd:attribute name=\"wrapcoords\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"oned\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"regroupid\" type=\"xsd:integer\"/>\n  <xsd:attribute name=\"doubleclicknotify\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"connectortype\" type=\"ST_ConnectorType\" default=\"straight\"/>\n  <xsd:attribute name=\"button\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"userhidden\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"forcedash\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"oleicon\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"ole\" type=\"s:ST_TrueFalseBlank\"/>\n  <xsd:attribute name=\"preferrelative\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"cliptowrap\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"clip\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"bullet\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"hr\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"hrstd\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"hrnoshade\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"hrpct\" type=\"xsd:float\"/>\n  <xsd:attribute name=\"hralign\" type=\"ST_HrAlign\" default=\"left\"/>\n  <xsd:attribute name=\"allowincell\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"allowoverlap\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"userdrawn\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"bordertopcolor\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"borderleftcolor\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"borderbottomcolor\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"borderrightcolor\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"connecttype\" type=\"ST_ConnectType\"/>\n  <xsd:attribute name=\"connectlocs\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"connectangles\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"master\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"extrusionok\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"href\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"althref\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"title\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"singleclick\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"oleid\" type=\"xsd:float\"/>\n  <xsd:attribute name=\"detectmouseclick\" type=\"s:ST_TrueFalse\"/>\n  <xsd:attribute name=\"movie\" type=\"xsd:float\"/>\n  <xsd:attribute name=\"spid\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"opacity2\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"relid\" type=\"r:ST_RelationshipId\"/>\n  <xsd:attribute name=\"dgmlayout\" type=\"ST_DiagramLayout\"/>\n  <xsd:attribute name=\"dgmnodekind\" type=\"xsd:integer\"/>\n  <xsd:attribute name=\"dgmlayoutmru\" type=\"ST_DiagramLayout\"/>\n  <xsd:attribute name=\"gfxdata\" type=\"xsd:base64Binary\"/>\n  <xsd:attribute name=\"tableproperties\" type=\"xsd:string\"/>\n  <xsd:attribute name=\"tablelimits\" type=\"xsd:string\"/>\n  <xsd:element name=\"shapedefaults\" type=\"CT_ShapeDefaults\"/>\n  <xsd:element name=\"shapelayout\" type=\"CT_ShapeLayout\"/>\n  <xsd:element name=\"signatureline\" type=\"CT_SignatureLine\"/>\n  <xsd:element name=\"ink\" type=\"CT_Ink\"/>\n  <xsd:element name=\"diagram\" type=\"CT_Diagram\"/>\n  <xsd:element name=\"equationxml\" type=\"CT_EquationXml\"/>\n  <xsd:complexType name=\"CT_ShapeDefaults\">\n    <xsd:all minOccurs=\"0\">\n      <xsd:element ref=\"v:fill\" minOccurs=\"0\"/>\n      <xsd:element ref=\"v:stroke\" minOccurs=\"0\"/>\n      <xsd:element ref=\"v:textbox\" minOccurs=\"0\"/>\n      <xsd:element ref=\"v:shadow\" minOccurs=\"0\"/>\n      <xsd:element ref=\"skew\" minOccurs=\"0\"/>\n      <xsd:element ref=\"extrusion\" minOccurs=\"0\"/>\n      <xsd:element ref=\"callout\" minOccurs=\"0\"/>\n      <xsd:element ref=\"lock\" minOccurs=\"0\"/>\n      <xsd:element name=\"colormru\" minOccurs=\"0\" type=\"CT_ColorMru\"/>\n      <xsd:element name=\"colormenu\" minOccurs=\"0\" type=\"CT_ColorMenu\"/>\n    </xsd:all>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"spidmax\" type=\"xsd:integer\" use=\"optional\"/>\n    <xsd:attribute name=\"style\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"fill\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"fillcolor\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"stroke\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"strokecolor\" type=\"s:ST_ColorType\"/>\n    <xsd:attribute name=\"allowincell\" form=\"qualified\" type=\"s:ST_TrueFalse\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Ink\">\n    <xsd:sequence/>\n    <xsd:attribute name=\"i\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"annotation\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"contentType\" type=\"ST_ContentType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SignatureLine\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"issignatureline\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"id\" type=\"s:ST_Guid\"/>\n    <xsd:attribute name=\"provid\" type=\"s:ST_Guid\"/>\n    <xsd:attribute name=\"signinginstructionsset\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"allowcomments\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"showsigndate\" type=\"s:ST_TrueFalse\"/>\n    <xsd:attribute name=\"suggestedsigner\" type=\"xsd:string\" form=\"qualified\"/>\n    <xsd:attribute name=\"suggestedsigner2\" type=\"xsd:string\" form=\"qualified\"/>\n    <xsd:attribute name=\"suggestedsigneremail\" type=\"xsd:string\" form=\"qualified\"/>\n    <xsd:attribute name=\"signinginstructions\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"addlxml\" type=\"xsd:string\"/>\n    <xsd:attribute name=\"sigprovurl\" type=\"xsd:string\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeLayout\">\n    <xsd:all>\n      <xsd:element name=\"idmap\" type=\"CT_IdMap\" minOccurs=\"0\"/>\n      <xsd:element name=\"regrouptable\" type=\"CT_RegroupTable\" minOccurs=\"0\"/>\n      <xsd:element name=\"rules\" type=\"CT_Rules\" minOccurs=\"0\"/>\n    </xsd:all>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_IdMap\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"data\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RegroupTable\">\n    <xsd:sequence>\n      <xsd:element name=\"entry\" type=\"CT_Entry\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Entry\">\n    <xsd:attribute name=\"new\" type=\"xsd:int\" use=\"optional\"/>\n    <xsd:attribute name=\"old\" type=\"xsd:int\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rules\">\n    <xsd:sequence>\n      <xsd:element name=\"r\" type=\"CT_R\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_R\">\n    <xsd:sequence>\n      <xsd:element name=\"proxy\" type=\"CT_Proxy\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"xsd:string\" use=\"required\"/>\n    <xsd:attribute name=\"type\" type=\"ST_RType\" use=\"optional\"/>\n    <xsd:attribute name=\"how\" type=\"ST_How\" use=\"optional\"/>\n    <xsd:attribute name=\"idref\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Proxy\">\n    <xsd:attribute name=\"start\" type=\"s:ST_TrueFalseBlank\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"end\" type=\"s:ST_TrueFalseBlank\" use=\"optional\" default=\"false\"/>\n    <xsd:attribute name=\"idref\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"connectloc\" type=\"xsd:int\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Diagram\">\n    <xsd:sequence>\n      <xsd:element name=\"relationtable\" type=\"CT_RelationTable\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"dgmstyle\" type=\"xsd:integer\" use=\"optional\"/>\n    <xsd:attribute name=\"autoformat\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"reverse\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"autolayout\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"dgmscalex\" type=\"xsd:integer\" use=\"optional\"/>\n    <xsd:attribute name=\"dgmscaley\" type=\"xsd:integer\" use=\"optional\"/>\n    <xsd:attribute name=\"dgmfontsize\" type=\"xsd:integer\" use=\"optional\"/>\n    <xsd:attribute name=\"constrainbounds\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"dgmbasetextscale\" type=\"xsd:integer\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EquationXml\">\n    <xsd:sequence>\n      <xsd:any namespace=\"##any\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"contentType\" type=\"ST_AlternateMathContentType\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_AlternateMathContentType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_RelationTable\">\n    <xsd:sequence>\n      <xsd:element name=\"rel\" type=\"CT_Relation\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Relation\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"idsrc\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"iddest\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"idcntr\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorMru\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"colors\" type=\"xsd:string\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ColorMenu\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"strokecolor\" type=\"s:ST_ColorType\"/>\n    <xsd:attribute name=\"fillcolor\" type=\"s:ST_ColorType\"/>\n    <xsd:attribute name=\"shadowcolor\" type=\"s:ST_ColorType\"/>\n    <xsd:attribute name=\"extrusioncolor\" type=\"s:ST_ColorType\"/>\n  </xsd:complexType>\n  <xsd:element name=\"skew\" type=\"CT_Skew\"/>\n  <xsd:element name=\"extrusion\" type=\"CT_Extrusion\"/>\n  <xsd:element name=\"callout\" type=\"CT_Callout\"/>\n  <xsd:element name=\"lock\" type=\"CT_Lock\"/>\n  <xsd:element name=\"OLEObject\" type=\"CT_OLEObject\"/>\n  <xsd:element name=\"complex\" type=\"CT_Complex\"/>\n  <xsd:element name=\"left\" type=\"CT_StrokeChild\"/>\n  <xsd:element name=\"top\" type=\"CT_StrokeChild\"/>\n  <xsd:element name=\"right\" type=\"CT_StrokeChild\"/>\n  <xsd:element name=\"bottom\" type=\"CT_StrokeChild\"/>\n  <xsd:element name=\"column\" type=\"CT_StrokeChild\"/>\n  <xsd:element name=\"clippath\" type=\"CT_ClipPath\"/>\n  <xsd:element name=\"fill\" type=\"CT_Fill\"/>\n  <xsd:complexType name=\"CT_Skew\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"id\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"offset\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"origin\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"matrix\" type=\"xsd:string\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Extrusion\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"type\" type=\"ST_ExtrusionType\" default=\"parallel\" use=\"optional\"/>\n    <xsd:attribute name=\"render\" type=\"ST_ExtrusionRender\" default=\"solid\" use=\"optional\"/>\n    <xsd:attribute name=\"viewpointorigin\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"viewpoint\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"plane\" type=\"ST_ExtrusionPlane\" default=\"XY\" use=\"optional\"/>\n    <xsd:attribute name=\"skewangle\" type=\"xsd:float\" use=\"optional\"/>\n    <xsd:attribute name=\"skewamt\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"foredepth\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"backdepth\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"orientation\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"orientationangle\" type=\"xsd:float\" use=\"optional\"/>\n    <xsd:attribute name=\"lockrotationcenter\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"autorotationcenter\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"rotationcenter\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"rotationangle\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"colormode\" type=\"ST_ColorMode\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"shininess\" type=\"xsd:float\" use=\"optional\"/>\n    <xsd:attribute name=\"specularity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"diffusity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"metal\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"edge\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"facet\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightface\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"brightness\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightposition\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightlevel\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightharsh\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"lightposition2\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightlevel2\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lightharsh2\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Callout\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"type\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"gap\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"angle\" type=\"ST_Angle\" use=\"optional\"/>\n    <xsd:attribute name=\"dropauto\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"drop\" type=\"ST_CalloutDrop\" use=\"optional\"/>\n    <xsd:attribute name=\"distance\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"lengthspecified\" type=\"s:ST_TrueFalse\" default=\"f\" use=\"optional\"/>\n    <xsd:attribute name=\"length\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"accentbar\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"textborder\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"minusx\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"minusy\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Lock\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"position\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"selection\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"grouping\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"ungrouping\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"rotation\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"cropping\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"verticies\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"adjusthandles\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"text\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"aspectratio\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"shapetype\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OLEObject\">\n    <xsd:sequence>\n      <xsd:element name=\"LinkType\" type=\"ST_OLELinkType\" minOccurs=\"0\"/>\n      <xsd:element name=\"LockedField\" type=\"s:ST_TrueFalseBlank\" minOccurs=\"0\"/>\n      <xsd:element name=\"FieldCodes\" type=\"xsd:string\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"Type\" type=\"ST_OLEType\" use=\"optional\"/>\n    <xsd:attribute name=\"ProgID\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"ShapeID\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"DrawAspect\" type=\"ST_OLEDrawAspect\" use=\"optional\"/>\n    <xsd:attribute name=\"ObjectID\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"UpdateMode\" type=\"ST_OLEUpdateMode\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Complex\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StrokeChild\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"on\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"weight\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"color2\" type=\"s:ST_ColorType\" use=\"optional\"/>\n    <xsd:attribute name=\"opacity\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"linestyle\" type=\"v:ST_StrokeLineStyle\" use=\"optional\"/>\n    <xsd:attribute name=\"miterlimit\" type=\"xsd:decimal\" use=\"optional\"/>\n    <xsd:attribute name=\"joinstyle\" type=\"v:ST_StrokeJoinStyle\" use=\"optional\"/>\n    <xsd:attribute name=\"endcap\" type=\"v:ST_StrokeEndCap\" use=\"optional\"/>\n    <xsd:attribute name=\"dashstyle\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"insetpen\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"filltype\" type=\"v:ST_FillType\" use=\"optional\"/>\n    <xsd:attribute name=\"src\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"imageaspect\" type=\"v:ST_ImageAspect\" use=\"optional\"/>\n    <xsd:attribute name=\"imagesize\" type=\"xsd:string\" use=\"optional\"/>\n    <xsd:attribute name=\"imagealignshape\" type=\"s:ST_TrueFalse\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrow\" type=\"v:ST_StrokeArrowType\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrowwidth\" type=\"v:ST_StrokeArrowWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"startarrowlength\" type=\"v:ST_StrokeArrowLength\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrow\" type=\"v:ST_StrokeArrowType\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrowwidth\" type=\"v:ST_StrokeArrowWidth\" use=\"optional\"/>\n    <xsd:attribute name=\"endarrowlength\" type=\"v:ST_StrokeArrowLength\" use=\"optional\"/>\n    <xsd:attribute ref=\"href\"/>\n    <xsd:attribute ref=\"althref\"/>\n    <xsd:attribute ref=\"title\"/>\n    <xsd:attribute ref=\"forcedash\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ClipPath\">\n    <xsd:attribute name=\"v\" type=\"xsd:string\" use=\"required\" form=\"qualified\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Fill\">\n    <xsd:attributeGroup ref=\"v:AG_Ext\"/>\n    <xsd:attribute name=\"type\" type=\"ST_FillType\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"arc\"/>\n      <xsd:enumeration value=\"callout\"/>\n      <xsd:enumeration value=\"connector\"/>\n      <xsd:enumeration value=\"align\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_How\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"middle\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"right\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BWMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"color\"/>\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"grayScale\"/>\n      <xsd:enumeration value=\"lightGrayscale\"/>\n      <xsd:enumeration value=\"inverseGray\"/>\n      <xsd:enumeration value=\"grayOutline\"/>\n      <xsd:enumeration value=\"highContrast\"/>\n      <xsd:enumeration value=\"black\"/>\n      <xsd:enumeration value=\"white\"/>\n      <xsd:enumeration value=\"hide\"/>\n      <xsd:enumeration value=\"undrawn\"/>\n      <xsd:enumeration value=\"blackTextAndLines\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ScreenSize\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"544,376\"/>\n      <xsd:enumeration value=\"640,480\"/>\n      <xsd:enumeration value=\"720,512\"/>\n      <xsd:enumeration value=\"800,600\"/>\n      <xsd:enumeration value=\"1024,768\"/>\n      <xsd:enumeration value=\"1152,862\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_InsetMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ColorMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ContentType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DiagramLayout\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:enumeration value=\"0\"/>\n      <xsd:enumeration value=\"1\"/>\n      <xsd:enumeration value=\"2\"/>\n      <xsd:enumeration value=\"3\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ExtrusionType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"perspective\"/>\n      <xsd:enumeration value=\"parallel\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ExtrusionRender\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"wireFrame\"/>\n      <xsd:enumeration value=\"boundingCube\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ExtrusionPlane\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"XY\"/>\n      <xsd:enumeration value=\"ZX\"/>\n      <xsd:enumeration value=\"YZ\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Angle\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"any\"/>\n      <xsd:enumeration value=\"30\"/>\n      <xsd:enumeration value=\"45\"/>\n      <xsd:enumeration value=\"60\"/>\n      <xsd:enumeration value=\"90\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CalloutDrop\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_CalloutPlacement\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"user\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConnectorType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"straight\"/>\n      <xsd:enumeration value=\"elbow\"/>\n      <xsd:enumeration value=\"curved\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HrAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"center\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ConnectType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"rect\"/>\n      <xsd:enumeration value=\"segments\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OLELinkType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OLEType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"Embed\"/>\n      <xsd:enumeration value=\"Link\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OLEDrawAspect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"Content\"/>\n      <xsd:enumeration value=\"Icon\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_OLEUpdateMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"Always\"/>\n      <xsd:enumeration value=\"OnCall\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FillType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"gradientCenter\"/>\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"pattern\"/>\n      <xsd:enumeration value=\"tile\"/>\n      <xsd:enumeration value=\"frame\"/>\n      <xsd:enumeration value=\"gradientUnscaled\"/>\n      <xsd:enumeration value=\"gradientRadial\"/>\n      <xsd:enumeration value=\"gradient\"/>\n      <xsd:enumeration value=\"background\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"urn:schemas-microsoft-com:office:powerpoint\"\n  targetNamespace=\"urn:schemas-microsoft-com:office:powerpoint\" elementFormDefault=\"qualified\"\n  attributeFormDefault=\"unqualified\">\n  <xsd:element name=\"iscomment\" type=\"CT_Empty\"/>\n  <xsd:element name=\"textdata\" type=\"CT_Rel\"/>\n  <xsd:complexType name=\"CT_Empty\"/>\n  <xsd:complexType name=\"CT_Rel\">\n    <xsd:attribute name=\"id\" type=\"xsd:string\"/>\n  </xsd:complexType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"urn:schemas-microsoft-com:office:excel\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  targetNamespace=\"urn:schemas-microsoft-com:office:excel\" elementFormDefault=\"qualified\"\n  attributeFormDefault=\"unqualified\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:element name=\"ClientData\" type=\"CT_ClientData\"/>\n  <xsd:complexType name=\"CT_ClientData\">\n    <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"MoveWithCells\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"SizeWithCells\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Anchor\" type=\"xsd:string\"/>\n      <xsd:element name=\"Locked\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"DefaultSize\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"PrintObject\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Disabled\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"AutoFill\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"AutoLine\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"AutoPict\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"FmlaMacro\" type=\"xsd:string\"/>\n      <xsd:element name=\"TextHAlign\" type=\"xsd:string\"/>\n      <xsd:element name=\"TextVAlign\" type=\"xsd:string\"/>\n      <xsd:element name=\"LockText\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"JustLastX\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"SecretEdit\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Default\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Help\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Cancel\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Dismiss\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Accel\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Accel2\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Row\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Column\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Visible\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"RowHidden\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"ColHidden\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"VTEdit\" type=\"xsd:integer\"/>\n      <xsd:element name=\"MultiLine\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"VScroll\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"ValidIds\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"FmlaRange\" type=\"xsd:string\"/>\n      <xsd:element name=\"WidthMin\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Sel\" type=\"xsd:integer\"/>\n      <xsd:element name=\"NoThreeD2\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"SelType\" type=\"xsd:string\"/>\n      <xsd:element name=\"MultiSel\" type=\"xsd:string\"/>\n      <xsd:element name=\"LCT\" type=\"xsd:string\"/>\n      <xsd:element name=\"ListItem\" type=\"xsd:string\"/>\n      <xsd:element name=\"DropStyle\" type=\"xsd:string\"/>\n      <xsd:element name=\"Colored\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"DropLines\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Checked\" type=\"xsd:integer\"/>\n      <xsd:element name=\"FmlaLink\" type=\"xsd:string\"/>\n      <xsd:element name=\"FmlaPict\" type=\"xsd:string\"/>\n      <xsd:element name=\"NoThreeD\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"FirstButton\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"FmlaGroup\" type=\"xsd:string\"/>\n      <xsd:element name=\"Val\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Min\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Max\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Inc\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Page\" type=\"xsd:integer\"/>\n      <xsd:element name=\"Horiz\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"Dx\" type=\"xsd:integer\"/>\n      <xsd:element name=\"MapOCX\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"CF\" type=\"ST_CF\"/>\n      <xsd:element name=\"Camera\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"RecalcAlways\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"AutoScale\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"DDE\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"UIObj\" type=\"s:ST_TrueFalseBlank\"/>\n      <xsd:element name=\"ScriptText\" type=\"xsd:string\"/>\n      <xsd:element name=\"ScriptExtended\" type=\"xsd:string\"/>\n      <xsd:element name=\"ScriptLanguage\" type=\"xsd:nonNegativeInteger\"/>\n      <xsd:element name=\"ScriptLocation\" type=\"xsd:nonNegativeInteger\"/>\n      <xsd:element name=\"FmlaTxbx\" type=\"xsd:string\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"ObjectType\" type=\"ST_ObjectType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CF\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_ObjectType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"Button\"/>\n      <xsd:enumeration value=\"Checkbox\"/>\n      <xsd:enumeration value=\"Dialog\"/>\n      <xsd:enumeration value=\"Drop\"/>\n      <xsd:enumeration value=\"Edit\"/>\n      <xsd:enumeration value=\"GBox\"/>\n      <xsd:enumeration value=\"Label\"/>\n      <xsd:enumeration value=\"LineA\"/>\n      <xsd:enumeration value=\"List\"/>\n      <xsd:enumeration value=\"Movie\"/>\n      <xsd:enumeration value=\"Note\"/>\n      <xsd:enumeration value=\"Pict\"/>\n      <xsd:enumeration value=\"Radio\"/>\n      <xsd:enumeration value=\"RectA\"/>\n      <xsd:enumeration value=\"Scroll\"/>\n      <xsd:enumeration value=\"Spin\"/>\n      <xsd:enumeration value=\"Shape\"/>\n      <xsd:enumeration value=\"Group\"/>\n      <xsd:enumeration value=\"Rect\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns=\"urn:schemas-microsoft-com:office:word\"\n  targetNamespace=\"urn:schemas-microsoft-com:office:word\" elementFormDefault=\"qualified\"\n  attributeFormDefault=\"unqualified\">\n  <xsd:element name=\"bordertop\" type=\"CT_Border\"/>\n  <xsd:element name=\"borderleft\" type=\"CT_Border\"/>\n  <xsd:element name=\"borderright\" type=\"CT_Border\"/>\n  <xsd:element name=\"borderbottom\" type=\"CT_Border\"/>\n  <xsd:complexType name=\"CT_Border\">\n    <xsd:attribute name=\"type\" type=\"ST_BorderType\" use=\"optional\"/>\n    <xsd:attribute name=\"width\" type=\"xsd:positiveInteger\" use=\"optional\"/>\n    <xsd:attribute name=\"shadow\" type=\"ST_BorderShadow\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"wrap\" type=\"CT_Wrap\"/>\n  <xsd:complexType name=\"CT_Wrap\">\n    <xsd:attribute name=\"type\" type=\"ST_WrapType\" use=\"optional\"/>\n    <xsd:attribute name=\"side\" type=\"ST_WrapSide\" use=\"optional\"/>\n    <xsd:attribute name=\"anchorx\" type=\"ST_HorizontalAnchor\" use=\"optional\"/>\n    <xsd:attribute name=\"anchory\" type=\"ST_VerticalAnchor\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:element name=\"anchorlock\" type=\"CT_AnchorLock\"/>\n  <xsd:complexType name=\"CT_AnchorLock\"/>\n  <xsd:simpleType name=\"ST_BorderType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"thick\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"hairline\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"dotDash\"/>\n      <xsd:enumeration value=\"dashDotDot\"/>\n      <xsd:enumeration value=\"triple\"/>\n      <xsd:enumeration value=\"thinThickSmall\"/>\n      <xsd:enumeration value=\"thickThinSmall\"/>\n      <xsd:enumeration value=\"thickBetweenThinSmall\"/>\n      <xsd:enumeration value=\"thinThick\"/>\n      <xsd:enumeration value=\"thickThin\"/>\n      <xsd:enumeration value=\"thickBetweenThin\"/>\n      <xsd:enumeration value=\"thinThickLarge\"/>\n      <xsd:enumeration value=\"thickThinLarge\"/>\n      <xsd:enumeration value=\"thickBetweenThinLarge\"/>\n      <xsd:enumeration value=\"wave\"/>\n      <xsd:enumeration value=\"doubleWave\"/>\n      <xsd:enumeration value=\"dashedSmall\"/>\n      <xsd:enumeration value=\"dashDotStroked\"/>\n      <xsd:enumeration value=\"threeDEmboss\"/>\n      <xsd:enumeration value=\"threeDEngrave\"/>\n      <xsd:enumeration value=\"HTMLOutset\"/>\n      <xsd:enumeration value=\"HTMLInset\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BorderShadow\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"t\"/>\n      <xsd:enumeration value=\"true\"/>\n      <xsd:enumeration value=\"f\"/>\n      <xsd:enumeration value=\"false\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_WrapType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"topAndBottom\"/>\n      <xsd:enumeration value=\"square\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"tight\"/>\n      <xsd:enumeration value=\"through\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_WrapSide\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"both\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"largest\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HorizontalAnchor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"char\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VerticalAnchor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"line\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\"\n  xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n  xmlns:sl=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\"\n  xmlns:wp=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\"\n  xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n  xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n  xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\"\n  targetNamespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" schemaLocation=\"../mce/mc.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\"\n    schemaLocation=\"dml-wordprocessingDrawing.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/math\"\n    schemaLocation=\"shared-math.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    schemaLocation=\"shared-relationshipReference.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\"\n    schemaLocation=\"shared-commonSimpleTypes.xsd\"/>\n  <xsd:import namespace=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\"\n    schemaLocation=\"shared-customXmlSchemaProperties.xsd\"/>\n  <xsd:import namespace=\"http://www.w3.org/XML/1998/namespace\"/>\n  <xsd:complexType name=\"CT_Empty\"/>\n  <xsd:complexType name=\"CT_OnOff\">\n    <xsd:attribute name=\"val\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LongHexNumber\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"4\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LongHexNumber\">\n    <xsd:attribute name=\"val\" type=\"ST_LongHexNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ShortHexNumber\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UcharHexNumber\">\n    <xsd:restriction base=\"xsd:hexBinary\">\n      <xsd:length value=\"1\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Charset\">\n    <xsd:attribute name=\"val\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"characterSet\" type=\"s:ST_String\" use=\"optional\" default=\"ISO-8859-1\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DecimalNumberOrPercent\">\n    <xsd:union memberTypes=\"ST_UnqualifiedPercentage s:ST_Percentage\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_UnqualifiedPercentage\">\n    <xsd:restriction base=\"xsd:decimal\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DecimalNumber\">\n    <xsd:restriction base=\"xsd:integer\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DecimalNumber\">\n    <xsd:attribute name=\"val\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_UnsignedDecimalNumber\">\n    <xsd:attribute name=\"val\" type=\"s:ST_UnsignedDecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DecimalNumberOrPrecent\">\n    <xsd:attribute name=\"val\" type=\"ST_DecimalNumberOrPercent\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TwipsMeasure\">\n    <xsd:attribute name=\"val\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SignedTwipsMeasure\">\n    <xsd:union memberTypes=\"xsd:integer s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SignedTwipsMeasure\">\n    <xsd:attribute name=\"val\" type=\"ST_SignedTwipsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PixelsMeasure\">\n    <xsd:restriction base=\"s:ST_UnsignedDecimalNumber\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PixelsMeasure\">\n    <xsd:attribute name=\"val\" type=\"ST_PixelsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HpsMeasure\">\n    <xsd:union memberTypes=\"s:ST_UnsignedDecimalNumber s:ST_PositiveUniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_HpsMeasure\">\n    <xsd:attribute name=\"val\" type=\"ST_HpsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SignedHpsMeasure\">\n    <xsd:union memberTypes=\"xsd:integer s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SignedHpsMeasure\">\n    <xsd:attribute name=\"val\" type=\"ST_SignedHpsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DateTime\">\n    <xsd:restriction base=\"xsd:dateTime\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_MacroName\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:maxLength value=\"33\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MacroName\">\n    <xsd:attribute name=\"val\" use=\"required\" type=\"ST_MacroName\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_EighthPointMeasure\">\n    <xsd:restriction base=\"s:ST_UnsignedDecimalNumber\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PointMeasure\">\n    <xsd:restriction base=\"s:ST_UnsignedDecimalNumber\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_String\">\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextScale\">\n    <xsd:union memberTypes=\"ST_TextScalePercent ST_TextScaleDecimal\"/>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextScalePercent\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern value=\"0*(600|([0-5]?[0-9]?[0-9]))%\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TextScaleDecimal\">\n    <xsd:restriction base=\"xsd:integer\">\n      <xsd:minInclusive value=\"0\"/>\n      <xsd:maxInclusive value=\"600\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextScale\">\n    <xsd:attribute name=\"val\" type=\"ST_TextScale\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HighlightColor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"black\"/>\n      <xsd:enumeration value=\"blue\"/>\n      <xsd:enumeration value=\"cyan\"/>\n      <xsd:enumeration value=\"green\"/>\n      <xsd:enumeration value=\"magenta\"/>\n      <xsd:enumeration value=\"red\"/>\n      <xsd:enumeration value=\"yellow\"/>\n      <xsd:enumeration value=\"white\"/>\n      <xsd:enumeration value=\"darkBlue\"/>\n      <xsd:enumeration value=\"darkCyan\"/>\n      <xsd:enumeration value=\"darkGreen\"/>\n      <xsd:enumeration value=\"darkMagenta\"/>\n      <xsd:enumeration value=\"darkRed\"/>\n      <xsd:enumeration value=\"darkYellow\"/>\n      <xsd:enumeration value=\"darkGray\"/>\n      <xsd:enumeration value=\"lightGray\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Highlight\">\n    <xsd:attribute name=\"val\" type=\"ST_HighlightColor\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HexColorAuto\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HexColor\">\n    <xsd:union memberTypes=\"ST_HexColorAuto s:ST_HexColorRGB\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Color\">\n    <xsd:attribute name=\"val\" type=\"ST_HexColor\" use=\"required\"/>\n    <xsd:attribute name=\"themeColor\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Lang\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Lang\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Guid\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Guid\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Underline\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"words\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"thick\"/>\n      <xsd:enumeration value=\"dotted\"/>\n      <xsd:enumeration value=\"dottedHeavy\"/>\n      <xsd:enumeration value=\"dash\"/>\n      <xsd:enumeration value=\"dashedHeavy\"/>\n      <xsd:enumeration value=\"dashLong\"/>\n      <xsd:enumeration value=\"dashLongHeavy\"/>\n      <xsd:enumeration value=\"dotDash\"/>\n      <xsd:enumeration value=\"dashDotHeavy\"/>\n      <xsd:enumeration value=\"dotDotDash\"/>\n      <xsd:enumeration value=\"dashDotDotHeavy\"/>\n      <xsd:enumeration value=\"wave\"/>\n      <xsd:enumeration value=\"wavyHeavy\"/>\n      <xsd:enumeration value=\"wavyDouble\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Underline\">\n    <xsd:attribute name=\"val\" type=\"ST_Underline\" use=\"optional\"/>\n    <xsd:attribute name=\"color\" type=\"ST_HexColor\" use=\"optional\" default=\"auto\"/>\n    <xsd:attribute name=\"themeColor\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextEffect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"blinkBackground\"/>\n      <xsd:enumeration value=\"lights\"/>\n      <xsd:enumeration value=\"antsBlack\"/>\n      <xsd:enumeration value=\"antsRed\"/>\n      <xsd:enumeration value=\"shimmer\"/>\n      <xsd:enumeration value=\"sparkle\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextEffect\">\n    <xsd:attribute name=\"val\" type=\"ST_TextEffect\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Border\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"nil\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"single\"/>\n      <xsd:enumeration value=\"thick\"/>\n      <xsd:enumeration value=\"double\"/>\n      <xsd:enumeration value=\"dotted\"/>\n      <xsd:enumeration value=\"dashed\"/>\n      <xsd:enumeration value=\"dotDash\"/>\n      <xsd:enumeration value=\"dotDotDash\"/>\n      <xsd:enumeration value=\"triple\"/>\n      <xsd:enumeration value=\"thinThickSmallGap\"/>\n      <xsd:enumeration value=\"thickThinSmallGap\"/>\n      <xsd:enumeration value=\"thinThickThinSmallGap\"/>\n      <xsd:enumeration value=\"thinThickMediumGap\"/>\n      <xsd:enumeration value=\"thickThinMediumGap\"/>\n      <xsd:enumeration value=\"thinThickThinMediumGap\"/>\n      <xsd:enumeration value=\"thinThickLargeGap\"/>\n      <xsd:enumeration value=\"thickThinLargeGap\"/>\n      <xsd:enumeration value=\"thinThickThinLargeGap\"/>\n      <xsd:enumeration value=\"wave\"/>\n      <xsd:enumeration value=\"doubleWave\"/>\n      <xsd:enumeration value=\"dashSmallGap\"/>\n      <xsd:enumeration value=\"dashDotStroked\"/>\n      <xsd:enumeration value=\"threeDEmboss\"/>\n      <xsd:enumeration value=\"threeDEngrave\"/>\n      <xsd:enumeration value=\"outset\"/>\n      <xsd:enumeration value=\"inset\"/>\n      <xsd:enumeration value=\"apples\"/>\n      <xsd:enumeration value=\"archedScallops\"/>\n      <xsd:enumeration value=\"babyPacifier\"/>\n      <xsd:enumeration value=\"babyRattle\"/>\n      <xsd:enumeration value=\"balloons3Colors\"/>\n      <xsd:enumeration value=\"balloonsHotAir\"/>\n      <xsd:enumeration value=\"basicBlackDashes\"/>\n      <xsd:enumeration value=\"basicBlackDots\"/>\n      <xsd:enumeration value=\"basicBlackSquares\"/>\n      <xsd:enumeration value=\"basicThinLines\"/>\n      <xsd:enumeration value=\"basicWhiteDashes\"/>\n      <xsd:enumeration value=\"basicWhiteDots\"/>\n      <xsd:enumeration value=\"basicWhiteSquares\"/>\n      <xsd:enumeration value=\"basicWideInline\"/>\n      <xsd:enumeration value=\"basicWideMidline\"/>\n      <xsd:enumeration value=\"basicWideOutline\"/>\n      <xsd:enumeration value=\"bats\"/>\n      <xsd:enumeration value=\"birds\"/>\n      <xsd:enumeration value=\"birdsFlight\"/>\n      <xsd:enumeration value=\"cabins\"/>\n      <xsd:enumeration value=\"cakeSlice\"/>\n      <xsd:enumeration value=\"candyCorn\"/>\n      <xsd:enumeration value=\"celticKnotwork\"/>\n      <xsd:enumeration value=\"certificateBanner\"/>\n      <xsd:enumeration value=\"chainLink\"/>\n      <xsd:enumeration value=\"champagneBottle\"/>\n      <xsd:enumeration value=\"checkedBarBlack\"/>\n      <xsd:enumeration value=\"checkedBarColor\"/>\n      <xsd:enumeration value=\"checkered\"/>\n      <xsd:enumeration value=\"christmasTree\"/>\n      <xsd:enumeration value=\"circlesLines\"/>\n      <xsd:enumeration value=\"circlesRectangles\"/>\n      <xsd:enumeration value=\"classicalWave\"/>\n      <xsd:enumeration value=\"clocks\"/>\n      <xsd:enumeration value=\"compass\"/>\n      <xsd:enumeration value=\"confetti\"/>\n      <xsd:enumeration value=\"confettiGrays\"/>\n      <xsd:enumeration value=\"confettiOutline\"/>\n      <xsd:enumeration value=\"confettiStreamers\"/>\n      <xsd:enumeration value=\"confettiWhite\"/>\n      <xsd:enumeration value=\"cornerTriangles\"/>\n      <xsd:enumeration value=\"couponCutoutDashes\"/>\n      <xsd:enumeration value=\"couponCutoutDots\"/>\n      <xsd:enumeration value=\"crazyMaze\"/>\n      <xsd:enumeration value=\"creaturesButterfly\"/>\n      <xsd:enumeration value=\"creaturesFish\"/>\n      <xsd:enumeration value=\"creaturesInsects\"/>\n      <xsd:enumeration value=\"creaturesLadyBug\"/>\n      <xsd:enumeration value=\"crossStitch\"/>\n      <xsd:enumeration value=\"cup\"/>\n      <xsd:enumeration value=\"decoArch\"/>\n      <xsd:enumeration value=\"decoArchColor\"/>\n      <xsd:enumeration value=\"decoBlocks\"/>\n      <xsd:enumeration value=\"diamondsGray\"/>\n      <xsd:enumeration value=\"doubleD\"/>\n      <xsd:enumeration value=\"doubleDiamonds\"/>\n      <xsd:enumeration value=\"earth1\"/>\n      <xsd:enumeration value=\"earth2\"/>\n      <xsd:enumeration value=\"earth3\"/>\n      <xsd:enumeration value=\"eclipsingSquares1\"/>\n      <xsd:enumeration value=\"eclipsingSquares2\"/>\n      <xsd:enumeration value=\"eggsBlack\"/>\n      <xsd:enumeration value=\"fans\"/>\n      <xsd:enumeration value=\"film\"/>\n      <xsd:enumeration value=\"firecrackers\"/>\n      <xsd:enumeration value=\"flowersBlockPrint\"/>\n      <xsd:enumeration value=\"flowersDaisies\"/>\n      <xsd:enumeration value=\"flowersModern1\"/>\n      <xsd:enumeration value=\"flowersModern2\"/>\n      <xsd:enumeration value=\"flowersPansy\"/>\n      <xsd:enumeration value=\"flowersRedRose\"/>\n      <xsd:enumeration value=\"flowersRoses\"/>\n      <xsd:enumeration value=\"flowersTeacup\"/>\n      <xsd:enumeration value=\"flowersTiny\"/>\n      <xsd:enumeration value=\"gems\"/>\n      <xsd:enumeration value=\"gingerbreadMan\"/>\n      <xsd:enumeration value=\"gradient\"/>\n      <xsd:enumeration value=\"handmade1\"/>\n      <xsd:enumeration value=\"handmade2\"/>\n      <xsd:enumeration value=\"heartBalloon\"/>\n      <xsd:enumeration value=\"heartGray\"/>\n      <xsd:enumeration value=\"hearts\"/>\n      <xsd:enumeration value=\"heebieJeebies\"/>\n      <xsd:enumeration value=\"holly\"/>\n      <xsd:enumeration value=\"houseFunky\"/>\n      <xsd:enumeration value=\"hypnotic\"/>\n      <xsd:enumeration value=\"iceCreamCones\"/>\n      <xsd:enumeration value=\"lightBulb\"/>\n      <xsd:enumeration value=\"lightning1\"/>\n      <xsd:enumeration value=\"lightning2\"/>\n      <xsd:enumeration value=\"mapPins\"/>\n      <xsd:enumeration value=\"mapleLeaf\"/>\n      <xsd:enumeration value=\"mapleMuffins\"/>\n      <xsd:enumeration value=\"marquee\"/>\n      <xsd:enumeration value=\"marqueeToothed\"/>\n      <xsd:enumeration value=\"moons\"/>\n      <xsd:enumeration value=\"mosaic\"/>\n      <xsd:enumeration value=\"musicNotes\"/>\n      <xsd:enumeration value=\"northwest\"/>\n      <xsd:enumeration value=\"ovals\"/>\n      <xsd:enumeration value=\"packages\"/>\n      <xsd:enumeration value=\"palmsBlack\"/>\n      <xsd:enumeration value=\"palmsColor\"/>\n      <xsd:enumeration value=\"paperClips\"/>\n      <xsd:enumeration value=\"papyrus\"/>\n      <xsd:enumeration value=\"partyFavor\"/>\n      <xsd:enumeration value=\"partyGlass\"/>\n      <xsd:enumeration value=\"pencils\"/>\n      <xsd:enumeration value=\"people\"/>\n      <xsd:enumeration value=\"peopleWaving\"/>\n      <xsd:enumeration value=\"peopleHats\"/>\n      <xsd:enumeration value=\"poinsettias\"/>\n      <xsd:enumeration value=\"postageStamp\"/>\n      <xsd:enumeration value=\"pumpkin1\"/>\n      <xsd:enumeration value=\"pushPinNote2\"/>\n      <xsd:enumeration value=\"pushPinNote1\"/>\n      <xsd:enumeration value=\"pyramids\"/>\n      <xsd:enumeration value=\"pyramidsAbove\"/>\n      <xsd:enumeration value=\"quadrants\"/>\n      <xsd:enumeration value=\"rings\"/>\n      <xsd:enumeration value=\"safari\"/>\n      <xsd:enumeration value=\"sawtooth\"/>\n      <xsd:enumeration value=\"sawtoothGray\"/>\n      <xsd:enumeration value=\"scaredCat\"/>\n      <xsd:enumeration value=\"seattle\"/>\n      <xsd:enumeration value=\"shadowedSquares\"/>\n      <xsd:enumeration value=\"sharksTeeth\"/>\n      <xsd:enumeration value=\"shorebirdTracks\"/>\n      <xsd:enumeration value=\"skyrocket\"/>\n      <xsd:enumeration value=\"snowflakeFancy\"/>\n      <xsd:enumeration value=\"snowflakes\"/>\n      <xsd:enumeration value=\"sombrero\"/>\n      <xsd:enumeration value=\"southwest\"/>\n      <xsd:enumeration value=\"stars\"/>\n      <xsd:enumeration value=\"starsTop\"/>\n      <xsd:enumeration value=\"stars3d\"/>\n      <xsd:enumeration value=\"starsBlack\"/>\n      <xsd:enumeration value=\"starsShadowed\"/>\n      <xsd:enumeration value=\"sun\"/>\n      <xsd:enumeration value=\"swirligig\"/>\n      <xsd:enumeration value=\"tornPaper\"/>\n      <xsd:enumeration value=\"tornPaperBlack\"/>\n      <xsd:enumeration value=\"trees\"/>\n      <xsd:enumeration value=\"triangleParty\"/>\n      <xsd:enumeration value=\"triangles\"/>\n      <xsd:enumeration value=\"triangle1\"/>\n      <xsd:enumeration value=\"triangle2\"/>\n      <xsd:enumeration value=\"triangleCircle1\"/>\n      <xsd:enumeration value=\"triangleCircle2\"/>\n      <xsd:enumeration value=\"shapes1\"/>\n      <xsd:enumeration value=\"shapes2\"/>\n      <xsd:enumeration value=\"twistedLines1\"/>\n      <xsd:enumeration value=\"twistedLines2\"/>\n      <xsd:enumeration value=\"vine\"/>\n      <xsd:enumeration value=\"waveline\"/>\n      <xsd:enumeration value=\"weavingAngles\"/>\n      <xsd:enumeration value=\"weavingBraid\"/>\n      <xsd:enumeration value=\"weavingRibbon\"/>\n      <xsd:enumeration value=\"weavingStrips\"/>\n      <xsd:enumeration value=\"whiteFlowers\"/>\n      <xsd:enumeration value=\"woodwork\"/>\n      <xsd:enumeration value=\"xIllusions\"/>\n      <xsd:enumeration value=\"zanyTriangles\"/>\n      <xsd:enumeration value=\"zigZag\"/>\n      <xsd:enumeration value=\"zigZagStitch\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Border\">\n    <xsd:attribute name=\"val\" type=\"ST_Border\" use=\"required\"/>\n    <xsd:attribute name=\"color\" type=\"ST_HexColor\" use=\"optional\" default=\"auto\"/>\n    <xsd:attribute name=\"themeColor\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"sz\" type=\"ST_EighthPointMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"space\" type=\"ST_PointMeasure\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"shadow\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"frame\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Shd\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"nil\"/>\n      <xsd:enumeration value=\"clear\"/>\n      <xsd:enumeration value=\"solid\"/>\n      <xsd:enumeration value=\"horzStripe\"/>\n      <xsd:enumeration value=\"vertStripe\"/>\n      <xsd:enumeration value=\"reverseDiagStripe\"/>\n      <xsd:enumeration value=\"diagStripe\"/>\n      <xsd:enumeration value=\"horzCross\"/>\n      <xsd:enumeration value=\"diagCross\"/>\n      <xsd:enumeration value=\"thinHorzStripe\"/>\n      <xsd:enumeration value=\"thinVertStripe\"/>\n      <xsd:enumeration value=\"thinReverseDiagStripe\"/>\n      <xsd:enumeration value=\"thinDiagStripe\"/>\n      <xsd:enumeration value=\"thinHorzCross\"/>\n      <xsd:enumeration value=\"thinDiagCross\"/>\n      <xsd:enumeration value=\"pct5\"/>\n      <xsd:enumeration value=\"pct10\"/>\n      <xsd:enumeration value=\"pct12\"/>\n      <xsd:enumeration value=\"pct15\"/>\n      <xsd:enumeration value=\"pct20\"/>\n      <xsd:enumeration value=\"pct25\"/>\n      <xsd:enumeration value=\"pct30\"/>\n      <xsd:enumeration value=\"pct35\"/>\n      <xsd:enumeration value=\"pct37\"/>\n      <xsd:enumeration value=\"pct40\"/>\n      <xsd:enumeration value=\"pct45\"/>\n      <xsd:enumeration value=\"pct50\"/>\n      <xsd:enumeration value=\"pct55\"/>\n      <xsd:enumeration value=\"pct60\"/>\n      <xsd:enumeration value=\"pct62\"/>\n      <xsd:enumeration value=\"pct65\"/>\n      <xsd:enumeration value=\"pct70\"/>\n      <xsd:enumeration value=\"pct75\"/>\n      <xsd:enumeration value=\"pct80\"/>\n      <xsd:enumeration value=\"pct85\"/>\n      <xsd:enumeration value=\"pct87\"/>\n      <xsd:enumeration value=\"pct90\"/>\n      <xsd:enumeration value=\"pct95\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Shd\">\n    <xsd:attribute name=\"val\" type=\"ST_Shd\" use=\"required\"/>\n    <xsd:attribute name=\"color\" type=\"ST_HexColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeColor\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"fill\" type=\"ST_HexColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeFill\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeFillTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeFillShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_VerticalAlignRun\">\n    <xsd:attribute name=\"val\" type=\"s:ST_VerticalAlignRun\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FitText\">\n    <xsd:attribute name=\"val\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Em\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"comma\"/>\n      <xsd:enumeration value=\"circle\"/>\n      <xsd:enumeration value=\"underDot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Em\">\n    <xsd:attribute name=\"val\" type=\"ST_Em\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Language\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Lang\" use=\"optional\"/>\n    <xsd:attribute name=\"eastAsia\" type=\"s:ST_Lang\" use=\"optional\"/>\n    <xsd:attribute name=\"bidi\" type=\"s:ST_Lang\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CombineBrackets\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"round\"/>\n      <xsd:enumeration value=\"square\"/>\n      <xsd:enumeration value=\"angle\"/>\n      <xsd:enumeration value=\"curly\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_EastAsianLayout\">\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"combine\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"combineBrackets\" type=\"ST_CombineBrackets\" use=\"optional\"/>\n    <xsd:attribute name=\"vert\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"vertCompress\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HeightRule\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"exact\"/>\n      <xsd:enumeration value=\"atLeast\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Wrap\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"notBeside\"/>\n      <xsd:enumeration value=\"around\"/>\n      <xsd:enumeration value=\"tight\"/>\n      <xsd:enumeration value=\"through\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_VAnchor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_HAnchor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"page\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DropCap\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"drop\"/>\n      <xsd:enumeration value=\"margin\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FramePr\">\n    <xsd:attribute name=\"dropCap\" type=\"ST_DropCap\" use=\"optional\"/>\n    <xsd:attribute name=\"lines\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"w\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"h\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"vSpace\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"hSpace\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"wrap\" type=\"ST_Wrap\" use=\"optional\"/>\n    <xsd:attribute name=\"hAnchor\" type=\"ST_HAnchor\" use=\"optional\"/>\n    <xsd:attribute name=\"vAnchor\" type=\"ST_VAnchor\" use=\"optional\"/>\n    <xsd:attribute name=\"x\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"xAlign\" type=\"s:ST_XAlign\" use=\"optional\"/>\n    <xsd:attribute name=\"y\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"yAlign\" type=\"s:ST_YAlign\" use=\"optional\"/>\n    <xsd:attribute name=\"hRule\" type=\"ST_HeightRule\" use=\"optional\"/>\n    <xsd:attribute name=\"anchorLock\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TabJc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"clear\"/>\n      <xsd:enumeration value=\"start\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"end\"/>\n      <xsd:enumeration value=\"decimal\"/>\n      <xsd:enumeration value=\"bar\"/>\n      <xsd:enumeration value=\"num\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_TabTlc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"hyphen\"/>\n      <xsd:enumeration value=\"underscore\"/>\n      <xsd:enumeration value=\"heavy\"/>\n      <xsd:enumeration value=\"middleDot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TabStop\">\n    <xsd:attribute name=\"val\" type=\"ST_TabJc\" use=\"required\"/>\n    <xsd:attribute name=\"leader\" type=\"ST_TabTlc\" use=\"optional\"/>\n    <xsd:attribute name=\"pos\" type=\"ST_SignedTwipsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LineSpacingRule\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"auto\"/>\n      <xsd:enumeration value=\"exact\"/>\n      <xsd:enumeration value=\"atLeast\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Spacing\">\n    <xsd:attribute name=\"before\" type=\"s:ST_TwipsMeasure\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"beforeLines\" type=\"ST_DecimalNumber\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"beforeAutospacing\" type=\"s:ST_OnOff\" use=\"optional\" default=\"off\"/>\n    <xsd:attribute name=\"after\" type=\"s:ST_TwipsMeasure\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"afterLines\" type=\"ST_DecimalNumber\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"afterAutospacing\" type=\"s:ST_OnOff\" use=\"optional\" default=\"off\"/>\n    <xsd:attribute name=\"line\" type=\"ST_SignedTwipsMeasure\" use=\"optional\" default=\"0\"/>\n    <xsd:attribute name=\"lineRule\" type=\"ST_LineSpacingRule\" use=\"optional\" default=\"auto\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Ind\">\n    <xsd:attribute name=\"start\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"startChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"end\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"endChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"left\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"leftChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"right\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"rightChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"hanging\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"hangingChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"firstLine\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"firstLineChars\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Jc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"start\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"end\"/>\n      <xsd:enumeration value=\"both\"/>\n      <xsd:enumeration value=\"mediumKashida\"/>\n      <xsd:enumeration value=\"distribute\"/>\n      <xsd:enumeration value=\"numTab\"/>\n      <xsd:enumeration value=\"highKashida\"/>\n      <xsd:enumeration value=\"lowKashida\"/>\n      <xsd:enumeration value=\"thaiDistribute\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_JcTable\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"end\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"start\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Jc\">\n    <xsd:attribute name=\"val\" type=\"ST_Jc\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_JcTable\">\n    <xsd:attribute name=\"val\" type=\"ST_JcTable\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_View\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"print\"/>\n      <xsd:enumeration value=\"outline\"/>\n      <xsd:enumeration value=\"masterPages\"/>\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"web\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_View\">\n    <xsd:attribute name=\"val\" type=\"ST_View\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Zoom\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"fullPage\"/>\n      <xsd:enumeration value=\"bestFit\"/>\n      <xsd:enumeration value=\"textFit\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Zoom\">\n    <xsd:attribute name=\"val\" type=\"ST_Zoom\" use=\"optional\"/>\n    <xsd:attribute name=\"percent\" type=\"ST_DecimalNumberOrPercent\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WritingStyle\">\n    <xsd:attribute name=\"lang\" type=\"s:ST_Lang\" use=\"required\"/>\n    <xsd:attribute name=\"vendorID\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"dllVersion\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"nlCheck\" type=\"s:ST_OnOff\" use=\"optional\" default=\"off\"/>\n    <xsd:attribute name=\"checkStyle\" type=\"s:ST_OnOff\" use=\"required\"/>\n    <xsd:attribute name=\"appName\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Proof\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"clean\"/>\n      <xsd:enumeration value=\"dirty\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Proof\">\n    <xsd:attribute name=\"spelling\" type=\"ST_Proof\" use=\"optional\"/>\n    <xsd:attribute name=\"grammar\" type=\"ST_Proof\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DocType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DocType\">\n    <xsd:attribute name=\"val\" type=\"ST_DocType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DocProtect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"readOnly\"/>\n      <xsd:enumeration value=\"comments\"/>\n      <xsd:enumeration value=\"trackedChanges\"/>\n      <xsd:enumeration value=\"forms\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:attributeGroup name=\"AG_Password\">\n    <xsd:attribute name=\"algorithmName\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"hashValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"saltValue\" type=\"xsd:base64Binary\" use=\"optional\"/>\n    <xsd:attribute name=\"spinCount\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n  </xsd:attributeGroup>\n  <xsd:attributeGroup name=\"AG_TransitionalPassword\">\n    <xsd:attribute name=\"cryptProviderType\" type=\"s:ST_CryptProv\"/>\n    <xsd:attribute name=\"cryptAlgorithmClass\" type=\"s:ST_AlgClass\"/>\n    <xsd:attribute name=\"cryptAlgorithmType\" type=\"s:ST_AlgType\"/>\n    <xsd:attribute name=\"cryptAlgorithmSid\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"cryptSpinCount\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"cryptProvider\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"algIdExt\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"algIdExtSource\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"cryptProviderTypeExt\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"cryptProviderTypeExtSource\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"hash\" type=\"xsd:base64Binary\"/>\n    <xsd:attribute name=\"salt\" type=\"xsd:base64Binary\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_DocProtect\">\n    <xsd:attribute name=\"edit\" type=\"ST_DocProtect\" use=\"optional\"/>\n    <xsd:attribute name=\"formatting\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"enforcement\" type=\"s:ST_OnOff\"/>\n    <xsd:attributeGroup ref=\"AG_Password\"/>\n    <xsd:attributeGroup ref=\"AG_TransitionalPassword\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MailMergeDocType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"catalog\"/>\n      <xsd:enumeration value=\"envelopes\"/>\n      <xsd:enumeration value=\"mailingLabels\"/>\n      <xsd:enumeration value=\"formLetters\"/>\n      <xsd:enumeration value=\"email\"/>\n      <xsd:enumeration value=\"fax\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MailMergeDocType\">\n    <xsd:attribute name=\"val\" type=\"ST_MailMergeDocType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MailMergeDataType\">\n    <xsd:restriction base=\"xsd:string\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MailMergeDataType\">\n    <xsd:attribute name=\"val\" type=\"ST_MailMergeDataType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MailMergeDest\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"newDocument\"/>\n      <xsd:enumeration value=\"printer\"/>\n      <xsd:enumeration value=\"email\"/>\n      <xsd:enumeration value=\"fax\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MailMergeDest\">\n    <xsd:attribute name=\"val\" type=\"ST_MailMergeDest\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MailMergeOdsoFMDFieldType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"null\"/>\n      <xsd:enumeration value=\"dbColumn\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MailMergeOdsoFMDFieldType\">\n    <xsd:attribute name=\"val\" type=\"ST_MailMergeOdsoFMDFieldType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrackChangesView\">\n    <xsd:attribute name=\"markup\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"comments\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"insDel\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"formatting\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"inkAnnotations\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Kinsoku\">\n    <xsd:attribute name=\"lang\" type=\"s:ST_Lang\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextDirection\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"tb\"/>\n      <xsd:enumeration value=\"rl\"/>\n      <xsd:enumeration value=\"lr\"/>\n      <xsd:enumeration value=\"tbV\"/>\n      <xsd:enumeration value=\"rlV\"/>\n      <xsd:enumeration value=\"lrV\"/>\n      <xsd:enumeration value=\"btLr\"/>\n      <xsd:enumeration value=\"lrTb\"/>\n      <xsd:enumeration value=\"lrTbV\"/>\n      <xsd:enumeration value=\"tbLrV\"/>\n      <xsd:enumeration value=\"tbRl\"/>\n      <xsd:enumeration value=\"tbRlV\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextDirection\">\n    <xsd:attribute name=\"val\" type=\"ST_TextDirection\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextAlignment\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"baseline\"/>\n      <xsd:enumeration value=\"bottom\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextAlignment\">\n    <xsd:attribute name=\"val\" type=\"ST_TextAlignment\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DisplacedByCustomXml\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"next\"/>\n      <xsd:enumeration value=\"prev\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_AnnotationVMerge\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"cont\"/>\n      <xsd:enumeration value=\"rest\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Markup\">\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrackChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Markup\">\n        <xsd:attribute name=\"author\" type=\"s:ST_String\" use=\"required\"/>\n        <xsd:attribute name=\"date\" type=\"ST_DateTime\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CellMergeTrackChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:attribute name=\"vMerge\" type=\"ST_AnnotationVMerge\" use=\"optional\"/>\n        <xsd:attribute name=\"vMergeOrig\" type=\"ST_AnnotationVMerge\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrackChangeRange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:attribute name=\"displacedByCustomXml\" type=\"ST_DisplacedByCustomXml\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MarkupRange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Markup\">\n        <xsd:attribute name=\"displacedByCustomXml\" type=\"ST_DisplacedByCustomXml\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BookmarkRange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_MarkupRange\">\n        <xsd:attribute name=\"colFirst\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n        <xsd:attribute name=\"colLast\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Bookmark\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_BookmarkRange\">\n        <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MoveBookmark\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Bookmark\">\n        <xsd:attribute name=\"author\" type=\"s:ST_String\" use=\"required\"/>\n        <xsd:attribute name=\"date\" type=\"ST_DateTime\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Comment\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n        </xsd:sequence>\n        <xsd:attribute name=\"initials\" type=\"s:ST_String\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrackChangeNumbering\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:attribute name=\"original\" type=\"s:ST_String\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPrExChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"tblPrEx\" type=\"CT_TblPrExBase\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"tcPr\" type=\"CT_TcPrInner\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"trPr\" type=\"CT_TrPrBase\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblGridChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Markup\">\n        <xsd:sequence>\n          <xsd:element name=\"tblGrid\" type=\"CT_TblGridBase\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"tblPr\" type=\"CT_TblPrBase\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SectPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"sectPr\" type=\"CT_SectPrBase\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"pPr\" type=\"CT_PPrBase\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"rPr\" type=\"CT_RPrOriginal\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ParaRPrChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:sequence>\n          <xsd:element name=\"rPr\" type=\"CT_ParaRPrOriginal\" minOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RunTrackChange\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n          <xsd:group ref=\"EG_ContentRunContent\"/>\n          <xsd:group ref=\"m:EG_OMathMathElements\"/>\n        </xsd:choice>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:group name=\"EG_PContentMath\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_PContentBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:group ref=\"EG_ContentRunContentBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_PContentBase\">\n    <xsd:choice>\n      <xsd:element name=\"customXml\" type=\"CT_CustomXmlRun\"/>\n      <xsd:element name=\"fldSimple\" type=\"CT_SimpleField\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"hyperlink\" type=\"CT_Hyperlink\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_ContentRunContentBase\">\n    <xsd:choice>\n      <xsd:element name=\"smartTag\" type=\"CT_SmartTagRun\"/>\n      <xsd:element name=\"sdt\" type=\"CT_SdtRun\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_CellMarkupElements\">\n    <xsd:choice>\n      <xsd:element name=\"cellIns\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"cellDel\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"cellMerge\" type=\"CT_CellMergeTrackChange\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_RangeMarkupElements\">\n    <xsd:choice>\n      <xsd:element name=\"bookmarkStart\" type=\"CT_Bookmark\"/>\n      <xsd:element name=\"bookmarkEnd\" type=\"CT_MarkupRange\"/>\n      <xsd:element name=\"moveFromRangeStart\" type=\"CT_MoveBookmark\"/>\n      <xsd:element name=\"moveFromRangeEnd\" type=\"CT_MarkupRange\"/>\n      <xsd:element name=\"moveToRangeStart\" type=\"CT_MoveBookmark\"/>\n      <xsd:element name=\"moveToRangeEnd\" type=\"CT_MarkupRange\"/>\n      <xsd:element name=\"commentRangeStart\" type=\"CT_MarkupRange\"/>\n      <xsd:element name=\"commentRangeEnd\" type=\"CT_MarkupRange\"/>\n      <xsd:element name=\"customXmlInsRangeStart\" type=\"CT_TrackChange\"/>\n      <xsd:element name=\"customXmlInsRangeEnd\" type=\"CT_Markup\"/>\n      <xsd:element name=\"customXmlDelRangeStart\" type=\"CT_TrackChange\"/>\n      <xsd:element name=\"customXmlDelRangeEnd\" type=\"CT_Markup\"/>\n      <xsd:element name=\"customXmlMoveFromRangeStart\" type=\"CT_TrackChange\"/>\n      <xsd:element name=\"customXmlMoveFromRangeEnd\" type=\"CT_Markup\"/>\n      <xsd:element name=\"customXmlMoveToRangeStart\" type=\"CT_TrackChange\"/>\n      <xsd:element name=\"customXmlMoveToRangeEnd\" type=\"CT_Markup\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_NumPr\">\n    <xsd:sequence>\n      <xsd:element name=\"ilvl\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"numId\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"numberingChange\" type=\"CT_TrackChangeNumbering\" minOccurs=\"0\"/>\n      <xsd:element name=\"ins\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PBdr\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"left\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"between\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"bar\" type=\"CT_Border\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tabs\">\n    <xsd:sequence>\n      <xsd:element name=\"tab\" type=\"CT_TabStop\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TextboxTightWrap\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"allLines\"/>\n      <xsd:enumeration value=\"firstAndLastLine\"/>\n      <xsd:enumeration value=\"firstLineOnly\"/>\n      <xsd:enumeration value=\"lastLineOnly\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TextboxTightWrap\">\n    <xsd:attribute name=\"val\" type=\"ST_TextboxTightWrap\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PPrBase\">\n    <xsd:sequence>\n      <xsd:element name=\"pStyle\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"keepNext\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"keepLines\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"pageBreakBefore\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"framePr\" type=\"CT_FramePr\" minOccurs=\"0\"/>\n      <xsd:element name=\"widowControl\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"numPr\" type=\"CT_NumPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressLineNumbers\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"pBdr\" type=\"CT_PBdr\" minOccurs=\"0\"/>\n      <xsd:element name=\"shd\" type=\"CT_Shd\" minOccurs=\"0\"/>\n      <xsd:element name=\"tabs\" type=\"CT_Tabs\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressAutoHyphens\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"kinsoku\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"wordWrap\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"overflowPunct\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"topLinePunct\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoSpaceDE\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoSpaceDN\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bidi\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"adjustRightInd\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"snapToGrid\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"spacing\" type=\"CT_Spacing\" minOccurs=\"0\"/>\n      <xsd:element name=\"ind\" type=\"CT_Ind\" minOccurs=\"0\"/>\n      <xsd:element name=\"contextualSpacing\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"mirrorIndents\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressOverlap\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"jc\" type=\"CT_Jc\" minOccurs=\"0\"/>\n      <xsd:element name=\"textDirection\" type=\"CT_TextDirection\" minOccurs=\"0\"/>\n      <xsd:element name=\"textAlignment\" type=\"CT_TextAlignment\" minOccurs=\"0\"/>\n      <xsd:element name=\"textboxTightWrap\" type=\"CT_TextboxTightWrap\" minOccurs=\"0\"/>\n      <xsd:element name=\"outlineLvl\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"divId\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"cnfStyle\" type=\"CT_Cnf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PPr\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_PPrBase\">\n        <xsd:sequence>\n          <xsd:element name=\"rPr\" type=\"CT_ParaRPr\" minOccurs=\"0\"/>\n          <xsd:element name=\"sectPr\" type=\"CT_SectPr\" minOccurs=\"0\"/>\n          <xsd:element name=\"pPrChange\" type=\"CT_PPrChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PPrGeneral\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_PPrBase\">\n        <xsd:sequence>\n          <xsd:element name=\"pPrChange\" type=\"CT_PPrChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Control\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"shapeid\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Background\">\n    <xsd:sequence>\n      <xsd:sequence maxOccurs=\"unbounded\">\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:vml\" minOccurs=\"0\"\n          maxOccurs=\"unbounded\"/>\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:office:office\"\n          minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      </xsd:sequence>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"color\" type=\"ST_HexColor\" use=\"optional\" default=\"auto\"/>\n    <xsd:attribute name=\"themeColor\" type=\"ST_ThemeColor\" use=\"optional\"/>\n    <xsd:attribute name=\"themeTint\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"themeShade\" type=\"ST_UcharHexNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Rel\">\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Object\">\n    <xsd:sequence>\n      <xsd:sequence maxOccurs=\"unbounded\">\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:vml\" minOccurs=\"0\"\n          maxOccurs=\"unbounded\"/>\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:office:office\"\n          minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      </xsd:sequence>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\" minOccurs=\"0\"/>\n      <xsd:choice minOccurs=\"0\">\n        <xsd:element name=\"control\" type=\"CT_Control\"/>\n        <xsd:element name=\"objectLink\" type=\"CT_ObjectLink\"/>\n        <xsd:element name=\"objectEmbed\" type=\"CT_ObjectEmbed\"/>\n        <xsd:element name=\"movie\" type=\"CT_Rel\"/>\n      </xsd:choice>\n    </xsd:sequence>\n    <xsd:attribute name=\"dxaOrig\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"dyaOrig\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Picture\">\n    <xsd:sequence>\n      <xsd:sequence maxOccurs=\"unbounded\">\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:vml\" minOccurs=\"0\"\n          maxOccurs=\"unbounded\"/>\n        <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:office:office\"\n          minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      </xsd:sequence>\n      <xsd:element name=\"movie\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"control\" type=\"CT_Control\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ObjectEmbed\">\n    <xsd:attribute name=\"drawAspect\" type=\"ST_ObjectDrawAspect\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\" use=\"required\"/>\n    <xsd:attribute name=\"progId\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"shapeId\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"fieldCodes\" type=\"s:ST_String\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ObjectDrawAspect\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"content\"/>\n      <xsd:enumeration value=\"icon\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ObjectLink\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_ObjectEmbed\">\n        <xsd:attribute name=\"updateMode\" type=\"ST_ObjectUpdateMode\" use=\"required\"/>\n        <xsd:attribute name=\"lockedField\" type=\"s:ST_OnOff\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ObjectUpdateMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"always\"/>\n      <xsd:enumeration value=\"onCall\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Drawing\">\n    <xsd:choice minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element ref=\"wp:anchor\" minOccurs=\"0\"/>\n      <xsd:element ref=\"wp:inline\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SimpleField\">\n    <xsd:sequence>\n      <xsd:element name=\"fldData\" type=\"CT_Text\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"instr\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"fldLock\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"dirty\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FldCharType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"begin\"/>\n      <xsd:enumeration value=\"separate\"/>\n      <xsd:enumeration value=\"end\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_InfoTextType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"autoText\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FFHelpTextVal\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:maxLength value=\"256\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FFStatusTextVal\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:maxLength value=\"140\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FFName\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:maxLength value=\"65\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FFTextType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"regular\"/>\n      <xsd:enumeration value=\"number\"/>\n      <xsd:enumeration value=\"date\"/>\n      <xsd:enumeration value=\"currentTime\"/>\n      <xsd:enumeration value=\"currentDate\"/>\n      <xsd:enumeration value=\"calculated\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FFTextType\">\n    <xsd:attribute name=\"val\" type=\"ST_FFTextType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFName\">\n    <xsd:attribute name=\"val\" type=\"ST_FFName\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FldChar\">\n    <xsd:choice>\n      <xsd:element name=\"fldData\" type=\"CT_Text\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"ffData\" type=\"CT_FFData\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"numberingChange\" type=\"CT_TrackChangeNumbering\" minOccurs=\"0\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"fldCharType\" type=\"ST_FldCharType\" use=\"required\"/>\n    <xsd:attribute name=\"fldLock\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"dirty\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Hyperlink\">\n    <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    <xsd:attribute name=\"tgtFrame\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"tooltip\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"docLocation\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"history\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"anchor\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute ref=\"r:id\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFData\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"name\" type=\"CT_FFName\"/>\n      <xsd:element name=\"label\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"tabIndex\" type=\"CT_UnsignedDecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"enabled\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"calcOnExit\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"entryMacro\" type=\"CT_MacroName\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"exitMacro\" type=\"CT_MacroName\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"helpText\" type=\"CT_FFHelpText\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"statusText\" type=\"CT_FFStatusText\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:choice>\n        <xsd:element name=\"checkBox\" type=\"CT_FFCheckBox\"/>\n        <xsd:element name=\"ddList\" type=\"CT_FFDDList\"/>\n        <xsd:element name=\"textInput\" type=\"CT_FFTextInput\"/>\n      </xsd:choice>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFHelpText\">\n    <xsd:attribute name=\"type\" type=\"ST_InfoTextType\"/>\n    <xsd:attribute name=\"val\" type=\"ST_FFHelpTextVal\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFStatusText\">\n    <xsd:attribute name=\"type\" type=\"ST_InfoTextType\"/>\n    <xsd:attribute name=\"val\" type=\"ST_FFStatusTextVal\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFCheckBox\">\n    <xsd:sequence>\n      <xsd:choice>\n        <xsd:element name=\"size\" type=\"CT_HpsMeasure\"/>\n        <xsd:element name=\"sizeAuto\" type=\"CT_OnOff\"/>\n      </xsd:choice>\n      <xsd:element name=\"default\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"checked\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFDDList\">\n    <xsd:sequence>\n      <xsd:element name=\"result\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"default\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"listEntry\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FFTextInput\">\n    <xsd:sequence>\n      <xsd:element name=\"type\" type=\"CT_FFTextType\" minOccurs=\"0\"/>\n      <xsd:element name=\"default\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"maxLength\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"format\" type=\"CT_String\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SectionMark\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"nextPage\"/>\n      <xsd:enumeration value=\"nextColumn\"/>\n      <xsd:enumeration value=\"continuous\"/>\n      <xsd:enumeration value=\"evenPage\"/>\n      <xsd:enumeration value=\"oddPage\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SectType\">\n    <xsd:attribute name=\"val\" type=\"ST_SectionMark\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PaperSource\">\n    <xsd:attribute name=\"first\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"other\" type=\"ST_DecimalNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_NumberFormat\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"decimal\"/>\n      <xsd:enumeration value=\"upperRoman\"/>\n      <xsd:enumeration value=\"lowerRoman\"/>\n      <xsd:enumeration value=\"upperLetter\"/>\n      <xsd:enumeration value=\"lowerLetter\"/>\n      <xsd:enumeration value=\"ordinal\"/>\n      <xsd:enumeration value=\"cardinalText\"/>\n      <xsd:enumeration value=\"ordinalText\"/>\n      <xsd:enumeration value=\"hex\"/>\n      <xsd:enumeration value=\"chicago\"/>\n      <xsd:enumeration value=\"ideographDigital\"/>\n      <xsd:enumeration value=\"japaneseCounting\"/>\n      <xsd:enumeration value=\"aiueo\"/>\n      <xsd:enumeration value=\"iroha\"/>\n      <xsd:enumeration value=\"decimalFullWidth\"/>\n      <xsd:enumeration value=\"decimalHalfWidth\"/>\n      <xsd:enumeration value=\"japaneseLegal\"/>\n      <xsd:enumeration value=\"japaneseDigitalTenThousand\"/>\n      <xsd:enumeration value=\"decimalEnclosedCircle\"/>\n      <xsd:enumeration value=\"decimalFullWidth2\"/>\n      <xsd:enumeration value=\"aiueoFullWidth\"/>\n      <xsd:enumeration value=\"irohaFullWidth\"/>\n      <xsd:enumeration value=\"decimalZero\"/>\n      <xsd:enumeration value=\"bullet\"/>\n      <xsd:enumeration value=\"ganada\"/>\n      <xsd:enumeration value=\"chosung\"/>\n      <xsd:enumeration value=\"decimalEnclosedFullstop\"/>\n      <xsd:enumeration value=\"decimalEnclosedParen\"/>\n      <xsd:enumeration value=\"decimalEnclosedCircleChinese\"/>\n      <xsd:enumeration value=\"ideographEnclosedCircle\"/>\n      <xsd:enumeration value=\"ideographTraditional\"/>\n      <xsd:enumeration value=\"ideographZodiac\"/>\n      <xsd:enumeration value=\"ideographZodiacTraditional\"/>\n      <xsd:enumeration value=\"taiwaneseCounting\"/>\n      <xsd:enumeration value=\"ideographLegalTraditional\"/>\n      <xsd:enumeration value=\"taiwaneseCountingThousand\"/>\n      <xsd:enumeration value=\"taiwaneseDigital\"/>\n      <xsd:enumeration value=\"chineseCounting\"/>\n      <xsd:enumeration value=\"chineseLegalSimplified\"/>\n      <xsd:enumeration value=\"chineseCountingThousand\"/>\n      <xsd:enumeration value=\"koreanDigital\"/>\n      <xsd:enumeration value=\"koreanCounting\"/>\n      <xsd:enumeration value=\"koreanLegal\"/>\n      <xsd:enumeration value=\"koreanDigital2\"/>\n      <xsd:enumeration value=\"vietnameseCounting\"/>\n      <xsd:enumeration value=\"russianLower\"/>\n      <xsd:enumeration value=\"russianUpper\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"numberInDash\"/>\n      <xsd:enumeration value=\"hebrew1\"/>\n      <xsd:enumeration value=\"hebrew2\"/>\n      <xsd:enumeration value=\"arabicAlpha\"/>\n      <xsd:enumeration value=\"arabicAbjad\"/>\n      <xsd:enumeration value=\"hindiVowels\"/>\n      <xsd:enumeration value=\"hindiConsonants\"/>\n      <xsd:enumeration value=\"hindiNumbers\"/>\n      <xsd:enumeration value=\"hindiCounting\"/>\n      <xsd:enumeration value=\"thaiLetters\"/>\n      <xsd:enumeration value=\"thaiNumbers\"/>\n      <xsd:enumeration value=\"thaiCounting\"/>\n      <xsd:enumeration value=\"bahtText\"/>\n      <xsd:enumeration value=\"dollarText\"/>\n      <xsd:enumeration value=\"custom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PageOrientation\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"portrait\"/>\n      <xsd:enumeration value=\"landscape\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PageSz\">\n    <xsd:attribute name=\"w\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"h\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"orient\" type=\"ST_PageOrientation\" use=\"optional\"/>\n    <xsd:attribute name=\"code\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageMar\">\n    <xsd:attribute name=\"top\" type=\"ST_SignedTwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"right\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"bottom\" type=\"ST_SignedTwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"left\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"header\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"footer\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"gutter\" type=\"s:ST_TwipsMeasure\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PageBorderZOrder\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"front\"/>\n      <xsd:enumeration value=\"back\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PageBorderDisplay\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"allPages\"/>\n      <xsd:enumeration value=\"firstPage\"/>\n      <xsd:enumeration value=\"notFirstPage\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PageBorderOffset\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"text\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PageBorders\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_TopPageBorder\" minOccurs=\"0\"/>\n      <xsd:element name=\"left\" type=\"CT_PageBorder\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_BottomPageBorder\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_PageBorder\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"zOrder\" type=\"ST_PageBorderZOrder\" use=\"optional\" default=\"front\"/>\n    <xsd:attribute name=\"display\" type=\"ST_PageBorderDisplay\" use=\"optional\"/>\n    <xsd:attribute name=\"offsetFrom\" type=\"ST_PageBorderOffset\" use=\"optional\" default=\"text\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageBorder\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Border\">\n        <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BottomPageBorder\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_PageBorder\">\n        <xsd:attribute ref=\"r:bottomLeft\" use=\"optional\"/>\n        <xsd:attribute ref=\"r:bottomRight\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TopPageBorder\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_PageBorder\">\n        <xsd:attribute ref=\"r:topLeft\" use=\"optional\"/>\n        <xsd:attribute ref=\"r:topRight\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ChapterSep\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"hyphen\"/>\n      <xsd:enumeration value=\"period\"/>\n      <xsd:enumeration value=\"colon\"/>\n      <xsd:enumeration value=\"emDash\"/>\n      <xsd:enumeration value=\"enDash\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_LineNumberRestart\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"newPage\"/>\n      <xsd:enumeration value=\"newSection\"/>\n      <xsd:enumeration value=\"continuous\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LineNumber\">\n    <xsd:attribute name=\"countBy\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"start\" type=\"ST_DecimalNumber\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"distance\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"restart\" type=\"ST_LineNumberRestart\" use=\"optional\" default=\"newPage\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PageNumber\">\n    <xsd:attribute name=\"fmt\" type=\"ST_NumberFormat\" use=\"optional\" default=\"decimal\"/>\n    <xsd:attribute name=\"start\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"chapStyle\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"chapSep\" type=\"ST_ChapterSep\" use=\"optional\" default=\"hyphen\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Column\">\n    <xsd:attribute name=\"w\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"space\" type=\"s:ST_TwipsMeasure\" use=\"optional\" default=\"0\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Columns\">\n    <xsd:sequence minOccurs=\"0\">\n      <xsd:element name=\"col\" type=\"CT_Column\" maxOccurs=\"45\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"equalWidth\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"space\" type=\"s:ST_TwipsMeasure\" use=\"optional\" default=\"720\"/>\n    <xsd:attribute name=\"num\" type=\"ST_DecimalNumber\" use=\"optional\" default=\"1\"/>\n    <xsd:attribute name=\"sep\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_VerticalJc\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"top\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"both\"/>\n      <xsd:enumeration value=\"bottom\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_VerticalJc\">\n    <xsd:attribute name=\"val\" type=\"ST_VerticalJc\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DocGrid\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"lines\"/>\n      <xsd:enumeration value=\"linesAndChars\"/>\n      <xsd:enumeration value=\"snapToChars\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DocGrid\">\n    <xsd:attribute name=\"type\" type=\"ST_DocGrid\"/>\n    <xsd:attribute name=\"linePitch\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"charSpace\" type=\"ST_DecimalNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_HdrFtr\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"even\"/>\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"first\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_FtnEdn\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"separator\"/>\n      <xsd:enumeration value=\"continuationSeparator\"/>\n      <xsd:enumeration value=\"continuationNotice\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_HdrFtrRef\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Rel\">\n        <xsd:attribute name=\"type\" type=\"ST_HdrFtr\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:group name=\"EG_HdrFtrReferences\">\n    <xsd:choice>\n      <xsd:element name=\"headerReference\" type=\"CT_HdrFtrRef\" minOccurs=\"0\"/>\n      <xsd:element name=\"footerReference\" type=\"CT_HdrFtrRef\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_HdrFtr\">\n    <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_SectPrContents\">\n    <xsd:sequence>\n      <xsd:element name=\"footnotePr\" type=\"CT_FtnProps\" minOccurs=\"0\"/>\n      <xsd:element name=\"endnotePr\" type=\"CT_EdnProps\" minOccurs=\"0\"/>\n      <xsd:element name=\"type\" type=\"CT_SectType\" minOccurs=\"0\"/>\n      <xsd:element name=\"pgSz\" type=\"CT_PageSz\" minOccurs=\"0\"/>\n      <xsd:element name=\"pgMar\" type=\"CT_PageMar\" minOccurs=\"0\"/>\n      <xsd:element name=\"paperSrc\" type=\"CT_PaperSource\" minOccurs=\"0\"/>\n      <xsd:element name=\"pgBorders\" type=\"CT_PageBorders\" minOccurs=\"0\"/>\n      <xsd:element name=\"lnNumType\" type=\"CT_LineNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"pgNumType\" type=\"CT_PageNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"cols\" type=\"CT_Columns\" minOccurs=\"0\"/>\n      <xsd:element name=\"formProt\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"vAlign\" type=\"CT_VerticalJc\" minOccurs=\"0\"/>\n      <xsd:element name=\"noEndnote\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"titlePg\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"textDirection\" type=\"CT_TextDirection\" minOccurs=\"0\"/>\n      <xsd:element name=\"bidi\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"rtlGutter\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"docGrid\" type=\"CT_DocGrid\" minOccurs=\"0\"/>\n      <xsd:element name=\"printerSettings\" type=\"CT_Rel\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:attributeGroup name=\"AG_SectPrAttributes\">\n    <xsd:attribute name=\"rsidRPr\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidDel\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidR\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidSect\" type=\"ST_LongHexNumber\"/>\n  </xsd:attributeGroup>\n  <xsd:complexType name=\"CT_SectPrBase\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_SectPrContents\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_SectPrAttributes\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SectPr\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_HdrFtrReferences\" minOccurs=\"0\" maxOccurs=\"6\"/>\n      <xsd:group ref=\"EG_SectPrContents\" minOccurs=\"0\"/>\n      <xsd:element name=\"sectPrChange\" type=\"CT_SectPrChange\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attributeGroup ref=\"AG_SectPrAttributes\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_BrType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"page\"/>\n      <xsd:enumeration value=\"column\"/>\n      <xsd:enumeration value=\"textWrapping\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_BrClear\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"all\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Br\">\n    <xsd:attribute name=\"type\" type=\"ST_BrType\" use=\"optional\"/>\n    <xsd:attribute name=\"clear\" type=\"ST_BrClear\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_PTabAlignment\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"right\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PTabRelativeTo\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"margin\"/>\n      <xsd:enumeration value=\"indent\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_PTabLeader\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"dot\"/>\n      <xsd:enumeration value=\"hyphen\"/>\n      <xsd:enumeration value=\"underscore\"/>\n      <xsd:enumeration value=\"middleDot\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_PTab\">\n    <xsd:attribute name=\"alignment\" type=\"ST_PTabAlignment\" use=\"required\"/>\n    <xsd:attribute name=\"relativeTo\" type=\"ST_PTabRelativeTo\" use=\"required\"/>\n    <xsd:attribute name=\"leader\" type=\"ST_PTabLeader\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Sym\">\n    <xsd:attribute name=\"font\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"char\" type=\"ST_ShortHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ProofErr\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"spellStart\"/>\n      <xsd:enumeration value=\"spellEnd\"/>\n      <xsd:enumeration value=\"gramStart\"/>\n      <xsd:enumeration value=\"gramEnd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ProofErr\">\n    <xsd:attribute name=\"type\" type=\"ST_ProofErr\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_EdGrp\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"everyone\"/>\n      <xsd:enumeration value=\"administrators\"/>\n      <xsd:enumeration value=\"contributors\"/>\n      <xsd:enumeration value=\"editors\"/>\n      <xsd:enumeration value=\"owners\"/>\n      <xsd:enumeration value=\"current\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Perm\">\n    <xsd:attribute name=\"id\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"displacedByCustomXml\" type=\"ST_DisplacedByCustomXml\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PermStart\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Perm\">\n        <xsd:attribute name=\"edGrp\" type=\"ST_EdGrp\" use=\"optional\"/>\n        <xsd:attribute name=\"ed\" type=\"s:ST_String\" use=\"optional\"/>\n        <xsd:attribute name=\"colFirst\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n        <xsd:attribute name=\"colLast\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Text\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"s:ST_String\">\n        <xsd:attribute ref=\"xml:space\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n  <xsd:group name=\"EG_RunInnerContent\">\n    <xsd:choice>\n      <xsd:element name=\"br\" type=\"CT_Br\"/>\n      <xsd:element name=\"t\" type=\"CT_Text\"/>\n      <xsd:element name=\"contentPart\" type=\"CT_Rel\"/>\n      <xsd:element name=\"delText\" type=\"CT_Text\"/>\n      <xsd:element name=\"instrText\" type=\"CT_Text\"/>\n      <xsd:element name=\"delInstrText\" type=\"CT_Text\"/>\n      <xsd:element name=\"noBreakHyphen\" type=\"CT_Empty\"/>\n      <xsd:element name=\"softHyphen\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"dayShort\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"monthShort\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"yearShort\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"dayLong\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"monthLong\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"yearLong\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"annotationRef\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"footnoteRef\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"endnoteRef\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"separator\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"continuationSeparator\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"sym\" type=\"CT_Sym\" minOccurs=\"0\"/>\n      <xsd:element name=\"pgNum\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"cr\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"tab\" type=\"CT_Empty\" minOccurs=\"0\"/>\n      <xsd:element name=\"object\" type=\"CT_Object\"/>\n      <xsd:element name=\"pict\" type=\"CT_Picture\"/>\n      <xsd:element name=\"fldChar\" type=\"CT_FldChar\"/>\n      <xsd:element name=\"ruby\" type=\"CT_Ruby\"/>\n      <xsd:element name=\"footnoteReference\" type=\"CT_FtnEdnRef\"/>\n      <xsd:element name=\"endnoteReference\" type=\"CT_FtnEdnRef\"/>\n      <xsd:element name=\"commentReference\" type=\"CT_Markup\"/>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\"/>\n      <xsd:element name=\"ptab\" type=\"CT_PTab\" minOccurs=\"0\"/>\n      <xsd:element name=\"lastRenderedPageBreak\" type=\"CT_Empty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_R\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_RPr\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_RunInnerContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rsidRPr\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidDel\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidR\" type=\"ST_LongHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Hint\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"eastAsia\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_Theme\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"majorEastAsia\"/>\n      <xsd:enumeration value=\"majorBidi\"/>\n      <xsd:enumeration value=\"majorAscii\"/>\n      <xsd:enumeration value=\"majorHAnsi\"/>\n      <xsd:enumeration value=\"minorEastAsia\"/>\n      <xsd:enumeration value=\"minorBidi\"/>\n      <xsd:enumeration value=\"minorAscii\"/>\n      <xsd:enumeration value=\"minorHAnsi\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Fonts\">\n    <xsd:attribute name=\"hint\" type=\"ST_Hint\"/>\n    <xsd:attribute name=\"ascii\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"hAnsi\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"eastAsia\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"cs\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"asciiTheme\" type=\"ST_Theme\"/>\n    <xsd:attribute name=\"hAnsiTheme\" type=\"ST_Theme\"/>\n    <xsd:attribute name=\"eastAsiaTheme\" type=\"ST_Theme\"/>\n    <xsd:attribute name=\"cstheme\" type=\"ST_Theme\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_RPrBase\">\n    <xsd:choice>\n      <xsd:element name=\"rStyle\" type=\"CT_String\"/>\n      <xsd:element name=\"rFonts\" type=\"CT_Fonts\"/>\n      <xsd:element name=\"b\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"bCs\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"i\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"iCs\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"caps\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"smallCaps\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"strike\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"dstrike\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"outline\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"shadow\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"emboss\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"imprint\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"noProof\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"snapToGrid\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"vanish\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"webHidden\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\"/>\n      <xsd:element name=\"spacing\" type=\"CT_SignedTwipsMeasure\"/>\n      <xsd:element name=\"w\" type=\"CT_TextScale\"/>\n      <xsd:element name=\"kern\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"position\" type=\"CT_SignedHpsMeasure\"/>\n      <xsd:element name=\"sz\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"szCs\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"highlight\" type=\"CT_Highlight\"/>\n      <xsd:element name=\"u\" type=\"CT_Underline\"/>\n      <xsd:element name=\"effect\" type=\"CT_TextEffect\"/>\n      <xsd:element name=\"bdr\" type=\"CT_Border\"/>\n      <xsd:element name=\"shd\" type=\"CT_Shd\"/>\n      <xsd:element name=\"fitText\" type=\"CT_FitText\"/>\n      <xsd:element name=\"vertAlign\" type=\"CT_VerticalAlignRun\"/>\n      <xsd:element name=\"rtl\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"cs\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"em\" type=\"CT_Em\"/>\n      <xsd:element name=\"lang\" type=\"CT_Language\"/>\n      <xsd:element name=\"eastAsianLayout\" type=\"CT_EastAsianLayout\"/>\n      <xsd:element name=\"specVanish\" type=\"CT_OnOff\"/>\n      <xsd:element name=\"oMath\" type=\"CT_OnOff\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_RPrContent\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_RPrBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rPrChange\" type=\"CT_RPrChange\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_RPr\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_RPrContent\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_RPr\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:group name=\"EG_RPrMath\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_RPr\"/>\n      <xsd:element name=\"ins\" type=\"CT_MathCtrlIns\"/>\n      <xsd:element name=\"del\" type=\"CT_MathCtrlDel\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_MathCtrlIns\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:choice minOccurs=\"0\">\n          <xsd:element name=\"del\" type=\"CT_RPrChange\" minOccurs=\"1\"/>\n          <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"1\"/>\n        </xsd:choice>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MathCtrlDel\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrackChange\">\n        <xsd:choice minOccurs=\"0\">\n          <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"1\"/>\n        </xsd:choice>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RPrOriginal\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_RPrBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ParaRPrOriginal\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ParaRPrTrackChanges\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_RPrBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ParaRPr\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_ParaRPrTrackChanges\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_RPrBase\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"rPrChange\" type=\"CT_ParaRPrChange\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ParaRPrTrackChanges\">\n    <xsd:sequence>\n      <xsd:element name=\"ins\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"del\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"moveFrom\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"moveTo\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_AltChunk\">\n    <xsd:sequence>\n      <xsd:element name=\"altChunkPr\" type=\"CT_AltChunkPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AltChunkPr\">\n    <xsd:sequence>\n      <xsd:element name=\"matchSrc\" type=\"CT_OnOff\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RubyAlign\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"center\"/>\n      <xsd:enumeration value=\"distributeLetter\"/>\n      <xsd:enumeration value=\"distributeSpace\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n      <xsd:enumeration value=\"rightVertical\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_RubyAlign\">\n    <xsd:attribute name=\"val\" type=\"ST_RubyAlign\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RubyPr\">\n    <xsd:sequence>\n      <xsd:element name=\"rubyAlign\" type=\"CT_RubyAlign\"/>\n      <xsd:element name=\"hps\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"hpsRaise\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"hpsBaseText\" type=\"CT_HpsMeasure\"/>\n      <xsd:element name=\"lid\" type=\"CT_Lang\"/>\n      <xsd:element name=\"dirty\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_RubyContent\">\n    <xsd:choice>\n      <xsd:element name=\"r\" type=\"CT_R\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_RubyContent\">\n    <xsd:group ref=\"EG_RubyContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Ruby\">\n    <xsd:sequence>\n      <xsd:element name=\"rubyPr\" type=\"CT_RubyPr\"/>\n      <xsd:element name=\"rt\" type=\"CT_RubyContent\"/>\n      <xsd:element name=\"rubyBase\" type=\"CT_RubyContent\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Lock\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"sdtLocked\"/>\n      <xsd:enumeration value=\"contentLocked\"/>\n      <xsd:enumeration value=\"unlocked\"/>\n      <xsd:enumeration value=\"sdtContentLocked\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Lock\">\n    <xsd:attribute name=\"val\" type=\"ST_Lock\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtListItem\">\n    <xsd:attribute name=\"displayText\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"value\" type=\"s:ST_String\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_SdtDateMappingType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"date\"/>\n      <xsd:enumeration value=\"dateTime\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SdtDateMappingType\">\n    <xsd:attribute name=\"val\" type=\"ST_SdtDateMappingType\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CalendarType\">\n    <xsd:attribute name=\"val\" type=\"s:ST_CalendarType\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtDate\">\n    <xsd:sequence>\n      <xsd:element name=\"dateFormat\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"lid\" type=\"CT_Lang\" minOccurs=\"0\"/>\n      <xsd:element name=\"storeMappedDataAs\" type=\"CT_SdtDateMappingType\" minOccurs=\"0\"/>\n      <xsd:element name=\"calendar\" type=\"CT_CalendarType\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"fullDate\" type=\"ST_DateTime\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtComboBox\">\n    <xsd:sequence>\n      <xsd:element name=\"listItem\" type=\"CT_SdtListItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"lastValue\" type=\"s:ST_String\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtDocPart\">\n    <xsd:sequence>\n      <xsd:element name=\"docPartGallery\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"docPartCategory\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"docPartUnique\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtDropDownList\">\n    <xsd:sequence>\n      <xsd:element name=\"listItem\" type=\"CT_SdtListItem\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"lastValue\" type=\"s:ST_String\" use=\"optional\" default=\"\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Placeholder\">\n    <xsd:sequence>\n      <xsd:element name=\"docPart\" type=\"CT_String\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtText\">\n    <xsd:attribute name=\"multiLine\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DataBinding\">\n    <xsd:attribute name=\"prefixMappings\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"xpath\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"storeItemID\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtPr\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"alias\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"tag\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"id\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"lock\" type=\"CT_Lock\" minOccurs=\"0\"/>\n      <xsd:element name=\"placeholder\" type=\"CT_Placeholder\" minOccurs=\"0\"/>\n      <xsd:element name=\"temporary\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"showingPlcHdr\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"dataBinding\" type=\"CT_DataBinding\" minOccurs=\"0\"/>\n      <xsd:element name=\"label\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"tabIndex\" type=\"CT_UnsignedDecimalNumber\" minOccurs=\"0\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"1\">\n        <xsd:element name=\"equation\" type=\"CT_Empty\"/>\n        <xsd:element name=\"comboBox\" type=\"CT_SdtComboBox\"/>\n        <xsd:element name=\"date\" type=\"CT_SdtDate\"/>\n        <xsd:element name=\"docPartObj\" type=\"CT_SdtDocPart\"/>\n        <xsd:element name=\"docPartList\" type=\"CT_SdtDocPart\"/>\n        <xsd:element name=\"dropDownList\" type=\"CT_SdtDropDownList\"/>\n        <xsd:element name=\"picture\" type=\"CT_Empty\"/>\n        <xsd:element name=\"richText\" type=\"CT_Empty\"/>\n        <xsd:element name=\"text\" type=\"CT_SdtText\"/>\n        <xsd:element name=\"citation\" type=\"CT_Empty\"/>\n        <xsd:element name=\"group\" type=\"CT_Empty\"/>\n        <xsd:element name=\"bibliography\" type=\"CT_Empty\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtEndPr\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ContentRunContent\">\n    <xsd:choice>\n      <xsd:element name=\"customXml\" type=\"CT_CustomXmlRun\"/>\n      <xsd:element name=\"smartTag\" type=\"CT_SmartTagRun\"/>\n      <xsd:element name=\"sdt\" type=\"CT_SdtRun\"/>\n      <xsd:element name=\"dir\" type=\"CT_DirContentRun\"/>\n      <xsd:element name=\"bdo\" type=\"CT_BdoContentRun\"/>\n      <xsd:element name=\"r\" type=\"CT_R\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_DirContentRun\">\n    <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    <xsd:attribute name=\"val\" type=\"ST_Direction\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_BdoContentRun\">\n    <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    <xsd:attribute name=\"val\" type=\"ST_Direction\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Direction\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"ltr\"/>\n      <xsd:enumeration value=\"rtl\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_SdtContentRun\">\n    <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ContentBlockContent\">\n    <xsd:choice>\n      <xsd:element name=\"customXml\" type=\"CT_CustomXmlBlock\"/>\n      <xsd:element name=\"sdt\" type=\"CT_SdtBlock\"/>\n      <xsd:element name=\"p\" type=\"CT_P\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"tbl\" type=\"CT_Tbl\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_SdtContentBlock\">\n    <xsd:group ref=\"EG_ContentBlockContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ContentRowContent\">\n    <xsd:choice>\n      <xsd:element name=\"tr\" type=\"CT_Row\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"customXml\" type=\"CT_CustomXmlRow\"/>\n      <xsd:element name=\"sdt\" type=\"CT_SdtRow\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_SdtContentRow\">\n    <xsd:group ref=\"EG_ContentRowContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_ContentCellContent\">\n    <xsd:choice>\n      <xsd:element name=\"tc\" type=\"CT_Tc\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"customXml\" type=\"CT_CustomXmlCell\"/>\n      <xsd:element name=\"sdt\" type=\"CT_SdtCell\"/>\n      <xsd:group ref=\"EG_RunLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_SdtContentCell\">\n    <xsd:group ref=\"EG_ContentCellContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtBlock\">\n    <xsd:sequence>\n      <xsd:element name=\"sdtPr\" type=\"CT_SdtPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtEndPr\" type=\"CT_SdtEndPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtContent\" type=\"CT_SdtContentBlock\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtRun\">\n    <xsd:sequence>\n      <xsd:element name=\"sdtPr\" type=\"CT_SdtPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtEndPr\" type=\"CT_SdtEndPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtContent\" type=\"CT_SdtContentRun\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtCell\">\n    <xsd:sequence>\n      <xsd:element name=\"sdtPr\" type=\"CT_SdtPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtEndPr\" type=\"CT_SdtEndPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtContent\" type=\"CT_SdtContentCell\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SdtRow\">\n    <xsd:sequence>\n      <xsd:element name=\"sdtPr\" type=\"CT_SdtPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtEndPr\" type=\"CT_SdtEndPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sdtContent\" type=\"CT_SdtContentRow\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Attr\">\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomXmlRun\">\n    <xsd:sequence>\n      <xsd:element name=\"customXmlPr\" type=\"CT_CustomXmlPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"element\" type=\"s:ST_XmlName\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SmartTagRun\">\n    <xsd:sequence>\n      <xsd:element name=\"smartTagPr\" type=\"CT_SmartTagPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"element\" type=\"s:ST_XmlName\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomXmlBlock\">\n    <xsd:sequence>\n      <xsd:element name=\"customXmlPr\" type=\"CT_CustomXmlPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ContentBlockContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"element\" type=\"s:ST_XmlName\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomXmlPr\">\n    <xsd:sequence>\n      <xsd:element name=\"placeholder\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"attr\" type=\"CT_Attr\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomXmlRow\">\n    <xsd:sequence>\n      <xsd:element name=\"customXmlPr\" type=\"CT_CustomXmlPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ContentRowContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"element\" type=\"s:ST_XmlName\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CustomXmlCell\">\n    <xsd:sequence>\n      <xsd:element name=\"customXmlPr\" type=\"CT_CustomXmlPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ContentCellContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"element\" type=\"s:ST_XmlName\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SmartTagPr\">\n    <xsd:sequence>\n      <xsd:element name=\"attr\" type=\"CT_Attr\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:group name=\"EG_PContent\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_ContentRunContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"fldSimple\" type=\"CT_SimpleField\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"hyperlink\" type=\"CT_Hyperlink\"/>\n      <xsd:element name=\"subDoc\" type=\"CT_Rel\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_P\">\n    <xsd:sequence>\n      <xsd:element name=\"pPr\" type=\"CT_PPr\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_PContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rsidRPr\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidR\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidDel\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidP\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidRDefault\" type=\"ST_LongHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TblWidth\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"nil\"/>\n      <xsd:enumeration value=\"pct\"/>\n      <xsd:enumeration value=\"dxa\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Height\">\n    <xsd:attribute name=\"val\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"hRule\" type=\"ST_HeightRule\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MeasurementOrPercent\">\n    <xsd:union memberTypes=\"ST_DecimalNumberOrPercent s:ST_UniversalMeasure\"/>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TblWidth\">\n    <xsd:attribute name=\"w\" type=\"ST_MeasurementOrPercent\"/>\n    <xsd:attribute name=\"type\" type=\"ST_TblWidth\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblGridCol\">\n    <xsd:attribute name=\"w\" type=\"s:ST_TwipsMeasure\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblGridBase\">\n    <xsd:sequence>\n      <xsd:element name=\"gridCol\" type=\"CT_TblGridCol\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblGrid\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TblGridBase\">\n        <xsd:sequence>\n          <xsd:element name=\"tblGridChange\" type=\"CT_TblGridChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcBorders\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"start\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"left\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"end\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"insideH\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"insideV\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"tl2br\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"tr2bl\" type=\"CT_Border\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcMar\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"start\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"left\" type=\"CT_TblWidth\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"end\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"right\" type=\"CT_TblWidth\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Merge\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"continue\"/>\n      <xsd:enumeration value=\"restart\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_VMerge\">\n    <xsd:attribute name=\"val\" type=\"ST_Merge\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_HMerge\">\n    <xsd:attribute name=\"val\" type=\"ST_Merge\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcPrBase\">\n    <xsd:sequence>\n      <xsd:element name=\"cnfStyle\" type=\"CT_Cnf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcW\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gridSpan\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"hMerge\" type=\"CT_HMerge\" minOccurs=\"0\"/>\n      <xsd:element name=\"vMerge\" type=\"CT_VMerge\" minOccurs=\"0\"/>\n      <xsd:element name=\"tcBorders\" type=\"CT_TcBorders\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shd\" type=\"CT_Shd\" minOccurs=\"0\"/>\n      <xsd:element name=\"noWrap\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"tcMar\" type=\"CT_TcMar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"textDirection\" type=\"CT_TextDirection\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcFitText\" type=\"CT_OnOff\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"vAlign\" type=\"CT_VerticalJc\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideMark\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"headers\" type=\"CT_Headers\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcPr\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TcPrInner\">\n        <xsd:sequence>\n          <xsd:element name=\"tcPrChange\" type=\"CT_TcPrChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TcPrInner\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TcPrBase\">\n        <xsd:sequence>\n          <xsd:group ref=\"EG_CellMarkupElements\" minOccurs=\"0\" maxOccurs=\"1\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tc\">\n    <xsd:sequence>\n      <xsd:element name=\"tcPr\" type=\"CT_TcPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"s:ST_String\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Cnf\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:length value=\"12\"/>\n      <xsd:pattern value=\"[01]*\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Cnf\">\n    <xsd:attribute name=\"val\" type=\"ST_Cnf\"/>\n    <xsd:attribute name=\"firstRow\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastRow\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"firstColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"oddVBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"evenVBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"oddHBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"evenHBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"firstRowFirstColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"firstRowLastColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastRowFirstColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastRowLastColumn\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Headers\">\n    <xsd:sequence minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"header\" type=\"CT_String\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrPrBase\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:element name=\"cnfStyle\" type=\"CT_Cnf\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"divId\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"gridBefore\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"gridAfter\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"wBefore\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"wAfter\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"cantSplit\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"trHeight\" type=\"CT_Height\" minOccurs=\"0\"/>\n      <xsd:element name=\"tblHeader\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"tblCellSpacing\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"jc\" type=\"CT_JcTable\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"hidden\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TrPr\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TrPrBase\">\n        <xsd:sequence>\n          <xsd:element name=\"ins\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n          <xsd:element name=\"del\" type=\"CT_TrackChange\" minOccurs=\"0\"/>\n          <xsd:element name=\"trPrChange\" type=\"CT_TrPrChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Row\">\n    <xsd:sequence>\n      <xsd:element name=\"tblPrEx\" type=\"CT_TblPrEx\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trPr\" type=\"CT_TrPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:group ref=\"EG_ContentCellContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"rsidRPr\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidR\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidDel\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"rsidTr\" type=\"ST_LongHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TblLayoutType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"fixed\"/>\n      <xsd:enumeration value=\"autofit\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TblLayoutType\">\n    <xsd:attribute name=\"type\" type=\"ST_TblLayoutType\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TblOverlap\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"never\"/>\n      <xsd:enumeration value=\"overlap\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TblOverlap\">\n    <xsd:attribute name=\"val\" type=\"ST_TblOverlap\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPPr\">\n    <xsd:attribute name=\"leftFromText\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"rightFromText\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"topFromText\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"bottomFromText\" type=\"s:ST_TwipsMeasure\"/>\n    <xsd:attribute name=\"vertAnchor\" type=\"ST_VAnchor\"/>\n    <xsd:attribute name=\"horzAnchor\" type=\"ST_HAnchor\"/>\n    <xsd:attribute name=\"tblpXSpec\" type=\"s:ST_XAlign\"/>\n    <xsd:attribute name=\"tblpX\" type=\"ST_SignedTwipsMeasure\"/>\n    <xsd:attribute name=\"tblpYSpec\" type=\"s:ST_YAlign\"/>\n    <xsd:attribute name=\"tblpY\" type=\"ST_SignedTwipsMeasure\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblCellMar\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"start\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"left\" type=\"CT_TblWidth\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"end\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"right\" type=\"CT_TblWidth\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblBorders\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"start\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"left\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"end\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"insideH\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"insideV\" type=\"CT_Border\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPrBase\">\n    <xsd:sequence>\n      <xsd:element name=\"tblStyle\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"tblpPr\" type=\"CT_TblPPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblOverlap\" type=\"CT_TblOverlap\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"bidiVisual\" type=\"CT_OnOff\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblStyleRowBandSize\" type=\"CT_DecimalNumber\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblStyleColBandSize\" type=\"CT_DecimalNumber\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblW\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"jc\" type=\"CT_JcTable\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblCellSpacing\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblInd\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblBorders\" type=\"CT_TblBorders\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shd\" type=\"CT_Shd\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblLayout\" type=\"CT_TblLayoutType\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblCellMar\" type=\"CT_TblCellMar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblLook\" type=\"CT_TblLook\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblCaption\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblDescription\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPr\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TblPrBase\">\n        <xsd:sequence>\n          <xsd:element name=\"tblPrChange\" type=\"CT_TblPrChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPrExBase\">\n    <xsd:sequence>\n      <xsd:element name=\"tblW\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"jc\" type=\"CT_JcTable\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblCellSpacing\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblInd\" type=\"CT_TblWidth\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblBorders\" type=\"CT_TblBorders\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shd\" type=\"CT_Shd\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblLayout\" type=\"CT_TblLayoutType\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblCellMar\" type=\"CT_TblCellMar\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblLook\" type=\"CT_TblLook\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblPrEx\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_TblPrExBase\">\n        <xsd:sequence>\n          <xsd:element name=\"tblPrExChange\" type=\"CT_TblPrExChange\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Tbl\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_RangeMarkupElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"tblPr\" type=\"CT_TblPr\"/>\n      <xsd:element name=\"tblGrid\" type=\"CT_TblGrid\"/>\n      <xsd:group ref=\"EG_ContentRowContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TblLook\">\n    <xsd:attribute name=\"firstRow\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastRow\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"firstColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"lastColumn\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"noHBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"noVBand\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"val\" type=\"ST_ShortHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FtnPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"pageBottom\"/>\n      <xsd:enumeration value=\"beneathText\"/>\n      <xsd:enumeration value=\"sectEnd\"/>\n      <xsd:enumeration value=\"docEnd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FtnPos\">\n    <xsd:attribute name=\"val\" type=\"ST_FtnPos\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_EdnPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"sectEnd\"/>\n      <xsd:enumeration value=\"docEnd\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_EdnPos\">\n    <xsd:attribute name=\"val\" type=\"ST_EdnPos\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumFmt\">\n    <xsd:attribute name=\"val\" type=\"ST_NumberFormat\" use=\"required\"/>\n    <xsd:attribute name=\"format\" type=\"s:ST_String\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_RestartNumber\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"continuous\"/>\n      <xsd:enumeration value=\"eachSect\"/>\n      <xsd:enumeration value=\"eachPage\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_NumRestart\">\n    <xsd:attribute name=\"val\" type=\"ST_RestartNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FtnEdnRef\">\n    <xsd:attribute name=\"customMarkFollows\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"id\" use=\"required\" type=\"ST_DecimalNumber\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FtnEdnSepRef\">\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FtnEdn\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_FtnEdn\" use=\"optional\"/>\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:group name=\"EG_FtnEdnNumProps\">\n    <xsd:sequence>\n      <xsd:element name=\"numStart\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"numRestart\" type=\"CT_NumRestart\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:group>\n  <xsd:complexType name=\"CT_FtnProps\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_FtnPos\" minOccurs=\"0\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_FtnEdnNumProps\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EdnProps\">\n    <xsd:sequence>\n      <xsd:element name=\"pos\" type=\"CT_EdnPos\" minOccurs=\"0\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\"/>\n      <xsd:group ref=\"EG_FtnEdnNumProps\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FtnDocProps\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_FtnProps\">\n        <xsd:sequence>\n          <xsd:element name=\"footnote\" type=\"CT_FtnEdnSepRef\" minOccurs=\"0\" maxOccurs=\"3\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_EdnDocProps\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_EdnProps\">\n        <xsd:sequence>\n          <xsd:element name=\"endnote\" type=\"CT_FtnEdnSepRef\" minOccurs=\"0\" maxOccurs=\"3\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RecipientData\">\n    <xsd:sequence>\n      <xsd:element name=\"active\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"column\" type=\"CT_DecimalNumber\" minOccurs=\"1\"/>\n      <xsd:element name=\"uniqueTag\" type=\"CT_Base64Binary\" minOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Base64Binary\">\n    <xsd:attribute name=\"val\" type=\"xsd:base64Binary\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Recipients\">\n    <xsd:sequence>\n      <xsd:element name=\"recipientData\" type=\"CT_RecipientData\" minOccurs=\"1\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"recipients\" type=\"CT_Recipients\"/>\n  <xsd:complexType name=\"CT_OdsoFieldMapData\">\n    <xsd:sequence>\n      <xsd:element name=\"type\" type=\"CT_MailMergeOdsoFMDFieldType\" minOccurs=\"0\"/>\n      <xsd:element name=\"name\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"mappedName\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"column\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"lid\" type=\"CT_Lang\" minOccurs=\"0\"/>\n      <xsd:element name=\"dynamicAddress\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MailMergeSourceType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"database\"/>\n      <xsd:enumeration value=\"addressBook\"/>\n      <xsd:enumeration value=\"document1\"/>\n      <xsd:enumeration value=\"document2\"/>\n      <xsd:enumeration value=\"text\"/>\n      <xsd:enumeration value=\"email\"/>\n      <xsd:enumeration value=\"native\"/>\n      <xsd:enumeration value=\"legacy\"/>\n      <xsd:enumeration value=\"master\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MailMergeSourceType\">\n    <xsd:attribute name=\"val\" use=\"required\" type=\"ST_MailMergeSourceType\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Odso\">\n    <xsd:sequence>\n      <xsd:element name=\"udl\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"table\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"src\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"colDelim\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"type\" type=\"CT_MailMergeSourceType\" minOccurs=\"0\"/>\n      <xsd:element name=\"fHdr\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"fieldMapData\" type=\"CT_OdsoFieldMapData\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"recipientData\" type=\"CT_Rel\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_MailMerge\">\n    <xsd:sequence>\n      <xsd:element name=\"mainDocumentType\" type=\"CT_MailMergeDocType\" minOccurs=\"1\"/>\n      <xsd:element name=\"linkToQuery\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"dataType\" type=\"CT_MailMergeDataType\" minOccurs=\"1\"/>\n      <xsd:element name=\"connectString\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"query\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"dataSource\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"headerSource\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotSuppressBlankLines\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"destination\" type=\"CT_MailMergeDest\" minOccurs=\"0\"/>\n      <xsd:element name=\"addressFieldName\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"mailSubject\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"mailAsAttachment\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"viewMergedData\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"activeRecord\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"checkErrors\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"odso\" type=\"CT_Odso\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TargetScreenSz\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"544x376\"/>\n      <xsd:enumeration value=\"640x480\"/>\n      <xsd:enumeration value=\"720x512\"/>\n      <xsd:enumeration value=\"800x600\"/>\n      <xsd:enumeration value=\"1024x768\"/>\n      <xsd:enumeration value=\"1152x882\"/>\n      <xsd:enumeration value=\"1152x900\"/>\n      <xsd:enumeration value=\"1280x1024\"/>\n      <xsd:enumeration value=\"1600x1200\"/>\n      <xsd:enumeration value=\"1800x1440\"/>\n      <xsd:enumeration value=\"1920x1200\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TargetScreenSz\">\n    <xsd:attribute name=\"val\" type=\"ST_TargetScreenSz\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Compat\">\n    <xsd:sequence>\n      <xsd:element name=\"useSingleBorderforContiguousCells\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"wpJustification\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noTabHangInd\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noLeading\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"spaceForUL\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noColumnBalance\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"balanceSingleByteDoubleByteWidth\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noExtraLineSpacing\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotLeaveBackslashAlone\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ulTrailSpace\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotExpandShiftReturn\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"spacingInWholePoints\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"lineWrapLikeWord6\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"printBodyTextBeforeHeader\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"printColBlack\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"wpSpaceWidth\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"showBreaksInFrames\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"subFontBySize\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressBottomSpacing\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressTopSpacing\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressSpacingAtTopOfPage\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressTopSpacingWP\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suppressSpBfAfterPgBrk\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"swapBordersFacingPages\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"convMailMergeEsc\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"truncateFontHeightsLikeWP6\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"mwSmallCaps\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"usePrinterMetrics\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotSuppressParagraphBorders\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"wrapTrailSpaces\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"footnoteLayoutLikeWW8\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"shapeLayoutLikeWW8\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"alignTablesRowByRow\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"forgetLastTabAlignment\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"adjustLineHeightInTable\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoSpaceLikeWord95\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noSpaceRaiseLower\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotUseHTMLParagraphAutoSpacing\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"layoutRawTableWidth\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"layoutTableRowsApart\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useWord97LineBreakRules\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotBreakWrappedTables\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotSnapToGridInCell\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"selectFldWithFirstOrLastChar\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"applyBreakingRules\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotWrapTextWithPunct\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotUseEastAsianBreakRules\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useWord2002TableStyleRules\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"growAutofit\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useFELayout\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useNormalStyleForList\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotUseIndentAsNumberingTabStop\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useAltKinsokuLineBreakRules\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"allowSpaceOfSameStyleInTable\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotSuppressIndentation\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotAutofitConstrainedTables\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"autofitToFirstFixedWidthCell\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"underlineTabInNumList\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"displayHangulFixedWidth\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"splitPgBreakAndParaMark\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotVertAlignCellWithSp\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotBreakConstrainedForcedTable\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotVertAlignInTxbx\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useAnsiKerningPairs\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"cachedColBalance\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"compatSetting\" type=\"CT_CompatSetting\" minOccurs=\"0\" maxOccurs=\"unbounded\"\n      />\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_CompatSetting\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"uri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_String\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocVar\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocVars\">\n    <xsd:sequence>\n      <xsd:element name=\"docVar\" type=\"CT_DocVar\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocRsids\">\n    <xsd:sequence>\n      <xsd:element name=\"rsidRoot\" type=\"CT_LongHexNumber\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rsid\" type=\"CT_LongHexNumber\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_CharacterSpacing\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"doNotCompress\"/>\n      <xsd:enumeration value=\"compressPunctuation\"/>\n      <xsd:enumeration value=\"compressPunctuationAndJapaneseKana\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_CharacterSpacing\">\n    <xsd:attribute name=\"val\" type=\"ST_CharacterSpacing\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_SaveThroughXslt\">\n    <xsd:attribute ref=\"r:id\" use=\"optional\"/>\n    <xsd:attribute name=\"solutionID\" type=\"s:ST_String\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_RPrDefault\">\n    <xsd:sequence>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_PPrDefault\">\n    <xsd:sequence>\n      <xsd:element name=\"pPr\" type=\"CT_PPrGeneral\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocDefaults\">\n    <xsd:sequence>\n      <xsd:element name=\"rPrDefault\" type=\"CT_RPrDefault\" minOccurs=\"0\"/>\n      <xsd:element name=\"pPrDefault\" type=\"CT_PPrDefault\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_WmlColorSchemeIndex\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"dark1\"/>\n      <xsd:enumeration value=\"light1\"/>\n      <xsd:enumeration value=\"dark2\"/>\n      <xsd:enumeration value=\"light2\"/>\n      <xsd:enumeration value=\"accent1\"/>\n      <xsd:enumeration value=\"accent2\"/>\n      <xsd:enumeration value=\"accent3\"/>\n      <xsd:enumeration value=\"accent4\"/>\n      <xsd:enumeration value=\"accent5\"/>\n      <xsd:enumeration value=\"accent6\"/>\n      <xsd:enumeration value=\"hyperlink\"/>\n      <xsd:enumeration value=\"followedHyperlink\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_ColorSchemeMapping\">\n    <xsd:attribute name=\"bg1\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"t1\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"bg2\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"t2\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent1\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent2\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent3\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent4\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent5\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"accent6\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"hyperlink\" type=\"ST_WmlColorSchemeIndex\"/>\n    <xsd:attribute name=\"followedHyperlink\" type=\"ST_WmlColorSchemeIndex\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ReadingModeInkLockDown\">\n    <xsd:attribute name=\"actualPg\" type=\"s:ST_OnOff\" use=\"required\"/>\n    <xsd:attribute name=\"w\" type=\"ST_PixelsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"h\" type=\"ST_PixelsMeasure\" use=\"required\"/>\n    <xsd:attribute name=\"fontSz\" type=\"ST_DecimalNumberOrPercent\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_WriteProtection\">\n    <xsd:attribute name=\"recommended\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attributeGroup ref=\"AG_Password\"/>\n    <xsd:attributeGroup ref=\"AG_TransitionalPassword\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Settings\">\n    <xsd:sequence>\n      <xsd:element name=\"writeProtection\" type=\"CT_WriteProtection\" minOccurs=\"0\"/>\n      <xsd:element name=\"view\" type=\"CT_View\" minOccurs=\"0\"/>\n      <xsd:element name=\"zoom\" type=\"CT_Zoom\" minOccurs=\"0\"/>\n      <xsd:element name=\"removePersonalInformation\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"removeDateAndTime\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotDisplayPageBoundaries\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"displayBackgroundShape\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"printPostScriptOverText\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"printFractionalCharacterWidth\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"printFormsData\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"embedTrueTypeFonts\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"embedSystemFonts\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveSubsetFonts\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveFormsData\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"mirrorMargins\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"alignBordersAndEdges\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bordersDoNotSurroundHeader\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bordersDoNotSurroundFooter\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"gutterAtTop\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideSpellingErrors\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hideGrammaticalErrors\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"activeWritingStyle\" type=\"CT_WritingStyle\" minOccurs=\"0\"\n        maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"proofState\" type=\"CT_Proof\" minOccurs=\"0\"/>\n      <xsd:element name=\"formsDesign\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"attachedTemplate\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"linkStyles\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"stylePaneFormatFilter\" type=\"CT_StylePaneFilter\" minOccurs=\"0\"/>\n      <xsd:element name=\"stylePaneSortMethod\" type=\"CT_StyleSort\" minOccurs=\"0\"/>\n      <xsd:element name=\"documentType\" type=\"CT_DocType\" minOccurs=\"0\"/>\n      <xsd:element name=\"mailMerge\" type=\"CT_MailMerge\" minOccurs=\"0\"/>\n      <xsd:element name=\"revisionView\" type=\"CT_TrackChangesView\" minOccurs=\"0\"/>\n      <xsd:element name=\"trackRevisions\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotTrackMoves\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotTrackFormatting\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"documentProtection\" type=\"CT_DocProtect\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoFormatOverride\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"styleLockTheme\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"styleLockQFSet\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"defaultTabStop\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoHyphenation\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"consecutiveHyphenLimit\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"hyphenationZone\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotHyphenateCaps\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"showEnvelope\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"summaryLength\" type=\"CT_DecimalNumberOrPrecent\" minOccurs=\"0\"/>\n      <xsd:element name=\"clickAndTypeStyle\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"defaultTableStyle\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"evenAndOddHeaders\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bookFoldRevPrinting\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bookFoldPrinting\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bookFoldPrintingSheets\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"drawingGridHorizontalSpacing\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"drawingGridVerticalSpacing\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"displayHorizontalDrawingGridEvery\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"displayVerticalDrawingGridEvery\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotUseMarginsForDrawingGridOrigin\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"drawingGridHorizontalOrigin\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"drawingGridVerticalOrigin\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotShadeFormData\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noPunctuationKerning\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"characterSpacingControl\" type=\"CT_CharacterSpacing\" minOccurs=\"0\"/>\n      <xsd:element name=\"printTwoOnOne\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"strictFirstAndLastChars\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"noLineBreaksAfter\" type=\"CT_Kinsoku\" minOccurs=\"0\"/>\n      <xsd:element name=\"noLineBreaksBefore\" type=\"CT_Kinsoku\" minOccurs=\"0\"/>\n      <xsd:element name=\"savePreviewPicture\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotValidateAgainstSchema\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveInvalidXml\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"ignoreMixedContent\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"alwaysShowPlaceholderText\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotDemarcateInvalidXml\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveXmlDataOnly\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"useXSLTWhenSaving\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveThroughXslt\" type=\"CT_SaveThroughXslt\" minOccurs=\"0\"/>\n      <xsd:element name=\"showXMLTags\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"alwaysMergeEmptyNamespace\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"updateFields\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hdrShapeDefaults\" type=\"CT_ShapeDefaults\" minOccurs=\"0\"/>\n      <xsd:element name=\"footnotePr\" type=\"CT_FtnDocProps\" minOccurs=\"0\"/>\n      <xsd:element name=\"endnotePr\" type=\"CT_EdnDocProps\" minOccurs=\"0\"/>\n      <xsd:element name=\"compat\" type=\"CT_Compat\" minOccurs=\"0\"/>\n      <xsd:element name=\"docVars\" type=\"CT_DocVars\" minOccurs=\"0\"/>\n      <xsd:element name=\"rsids\" type=\"CT_DocRsids\" minOccurs=\"0\"/>\n      <xsd:element ref=\"m:mathPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"attachedSchema\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"themeFontLang\" type=\"CT_Language\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"clrSchemeMapping\" type=\"CT_ColorSchemeMapping\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotIncludeSubdocsInStats\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotAutoCompressPictures\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"forceUpgrade\" type=\"CT_Empty\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"captions\" type=\"CT_Captions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"readModeInkLockDown\" type=\"CT_ReadingModeInkLockDown\" minOccurs=\"0\"/>\n      <xsd:element name=\"smartTagType\" type=\"CT_SmartTagType\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element ref=\"sl:schemaLibrary\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"shapeDefaults\" type=\"CT_ShapeDefaults\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotEmbedSmartTags\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"decimalSymbol\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"listSeparator\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StyleSort\">\n    <xsd:attribute name=\"val\" type=\"ST_StyleSort\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_StylePaneFilter\">\n    <xsd:attribute name=\"allStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"customStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"latentStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"stylesInUse\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"headingStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"numberingStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"tableStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"directFormattingOnRuns\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"directFormattingOnParagraphs\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"directFormattingOnNumbering\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"directFormattingOnTables\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"clearFormatting\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"top3HeadingStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"visibleStyles\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"alternateStyleNames\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"val\" type=\"ST_ShortHexNumber\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_StyleSort\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"name\"/>\n      <xsd:enumeration value=\"priority\"/>\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"font\"/>\n      <xsd:enumeration value=\"basedOn\"/>\n      <xsd:enumeration value=\"type\"/>\n      <xsd:enumeration value=\"0000\"/>\n      <xsd:enumeration value=\"0001\"/>\n      <xsd:enumeration value=\"0002\"/>\n      <xsd:enumeration value=\"0003\"/>\n      <xsd:enumeration value=\"0004\"/>\n      <xsd:enumeration value=\"0005\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_WebSettings\">\n    <xsd:sequence>\n      <xsd:element name=\"frameset\" type=\"CT_Frameset\" minOccurs=\"0\"/>\n      <xsd:element name=\"divs\" type=\"CT_Divs\" minOccurs=\"0\"/>\n      <xsd:element name=\"encoding\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"optimizeForBrowser\" type=\"CT_OptimizeForBrowser\" minOccurs=\"0\"/>\n      <xsd:element name=\"relyOnVML\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"allowPNG\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotRelyOnCSS\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotSaveAsSingleFile\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotOrganizeInFolder\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"doNotUseLongFileNames\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"pixelsPerInch\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"targetScreenSz\" type=\"CT_TargetScreenSz\" minOccurs=\"0\"/>\n      <xsd:element name=\"saveSmartTagsAsXml\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FrameScrollbar\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"on\"/>\n      <xsd:enumeration value=\"off\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FrameScrollbar\">\n    <xsd:attribute name=\"val\" type=\"ST_FrameScrollbar\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_OptimizeForBrowser\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_OnOff\">\n        <xsd:attribute name=\"target\" type=\"s:ST_String\" use=\"optional\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Frame\">\n    <xsd:sequence>\n      <xsd:element name=\"sz\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"name\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"title\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"longDesc\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"sourceFileName\" type=\"CT_Rel\" minOccurs=\"0\"/>\n      <xsd:element name=\"marW\" type=\"CT_PixelsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"marH\" type=\"CT_PixelsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"scrollbar\" type=\"CT_FrameScrollbar\" minOccurs=\"0\"/>\n      <xsd:element name=\"noResizeAllowed\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"linkedToFile\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FrameLayout\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"rows\"/>\n      <xsd:enumeration value=\"cols\"/>\n      <xsd:enumeration value=\"none\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FrameLayout\">\n    <xsd:attribute name=\"val\" type=\"ST_FrameLayout\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FramesetSplitbar\">\n    <xsd:sequence>\n      <xsd:element name=\"w\" type=\"CT_TwipsMeasure\" minOccurs=\"0\"/>\n      <xsd:element name=\"color\" type=\"CT_Color\" minOccurs=\"0\"/>\n      <xsd:element name=\"noBorder\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"flatBorders\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Frameset\">\n    <xsd:sequence>\n      <xsd:element name=\"sz\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"framesetSplitbar\" type=\"CT_FramesetSplitbar\" minOccurs=\"0\"/>\n      <xsd:element name=\"frameLayout\" type=\"CT_FrameLayout\" minOccurs=\"0\"/>\n      <xsd:element name=\"title\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n        <xsd:element name=\"frameset\" type=\"CT_Frameset\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n        <xsd:element name=\"frame\" type=\"CT_Frame\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      </xsd:choice>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumPicBullet\">\n    <xsd:choice>\n      <xsd:element name=\"pict\" type=\"CT_Picture\"/>\n      <xsd:element name=\"drawing\" type=\"CT_Drawing\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"numPicBulletId\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_LevelSuffix\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"tab\"/>\n      <xsd:enumeration value=\"space\"/>\n      <xsd:enumeration value=\"nothing\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_LevelSuffix\">\n    <xsd:attribute name=\"val\" type=\"ST_LevelSuffix\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LevelText\">\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"null\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LvlLegacy\">\n    <xsd:attribute name=\"legacy\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"legacySpace\" type=\"s:ST_TwipsMeasure\" use=\"optional\"/>\n    <xsd:attribute name=\"legacyIndent\" type=\"ST_SignedTwipsMeasure\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Lvl\">\n    <xsd:sequence>\n      <xsd:element name=\"start\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"numFmt\" type=\"CT_NumFmt\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvlRestart\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"pStyle\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"isLgl\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"suff\" type=\"CT_LevelSuffix\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvlText\" type=\"CT_LevelText\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvlPicBulletId\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"legacy\" type=\"CT_LvlLegacy\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvlJc\" type=\"CT_Jc\" minOccurs=\"0\"/>\n      <xsd:element name=\"pPr\" type=\"CT_PPrGeneral\" minOccurs=\"0\"/>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ilvl\" type=\"ST_DecimalNumber\" use=\"required\"/>\n    <xsd:attribute name=\"tplc\" type=\"ST_LongHexNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"tentative\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_MultiLevelType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"singleLevel\"/>\n      <xsd:enumeration value=\"multilevel\"/>\n      <xsd:enumeration value=\"hybridMultilevel\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_MultiLevelType\">\n    <xsd:attribute name=\"val\" type=\"ST_MultiLevelType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AbstractNum\">\n    <xsd:sequence>\n      <xsd:element name=\"nsid\" type=\"CT_LongHexNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"multiLevelType\" type=\"CT_MultiLevelType\" minOccurs=\"0\"/>\n      <xsd:element name=\"tmpl\" type=\"CT_LongHexNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"name\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"styleLink\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"numStyleLink\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvl\" type=\"CT_Lvl\" minOccurs=\"0\" maxOccurs=\"9\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"abstractNumId\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_NumLvl\">\n    <xsd:sequence>\n      <xsd:element name=\"startOverride\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"lvl\" type=\"CT_Lvl\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"ilvl\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Num\">\n    <xsd:sequence>\n      <xsd:element name=\"abstractNumId\" type=\"CT_DecimalNumber\" minOccurs=\"1\"/>\n      <xsd:element name=\"lvlOverride\" type=\"CT_NumLvl\" minOccurs=\"0\" maxOccurs=\"9\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"numId\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Numbering\">\n    <xsd:sequence>\n      <xsd:element name=\"numPicBullet\" type=\"CT_NumPicBullet\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"abstractNum\" type=\"CT_AbstractNum\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"num\" type=\"CT_Num\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"numIdMacAtCleanup\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_TblStyleOverrideType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"wholeTable\"/>\n      <xsd:enumeration value=\"firstRow\"/>\n      <xsd:enumeration value=\"lastRow\"/>\n      <xsd:enumeration value=\"firstCol\"/>\n      <xsd:enumeration value=\"lastCol\"/>\n      <xsd:enumeration value=\"band1Vert\"/>\n      <xsd:enumeration value=\"band2Vert\"/>\n      <xsd:enumeration value=\"band1Horz\"/>\n      <xsd:enumeration value=\"band2Horz\"/>\n      <xsd:enumeration value=\"neCell\"/>\n      <xsd:enumeration value=\"nwCell\"/>\n      <xsd:enumeration value=\"seCell\"/>\n      <xsd:enumeration value=\"swCell\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_TblStylePr\">\n    <xsd:sequence>\n      <xsd:element name=\"pPr\" type=\"CT_PPrGeneral\" minOccurs=\"0\"/>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"tblPr\" type=\"CT_TblPrBase\" minOccurs=\"0\"/>\n      <xsd:element name=\"trPr\" type=\"CT_TrPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcPr\" type=\"CT_TcPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_TblStyleOverrideType\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_StyleType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"paragraph\"/>\n      <xsd:enumeration value=\"character\"/>\n      <xsd:enumeration value=\"table\"/>\n      <xsd:enumeration value=\"numbering\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Style\">\n    <xsd:sequence>\n      <xsd:element name=\"name\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"aliases\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"basedOn\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"next\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"link\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"autoRedefine\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"hidden\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"uiPriority\" type=\"CT_DecimalNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"semiHidden\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"unhideWhenUsed\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"qFormat\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"locked\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"personal\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"personalCompose\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"personalReply\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"rsid\" type=\"CT_LongHexNumber\" minOccurs=\"0\"/>\n      <xsd:element name=\"pPr\" type=\"CT_PPrGeneral\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"rPr\" type=\"CT_RPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblPr\" type=\"CT_TblPrBase\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"trPr\" type=\"CT_TrPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tcPr\" type=\"CT_TcPr\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"tblStylePr\" type=\"CT_TblStylePr\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"type\" type=\"ST_StyleType\" use=\"optional\"/>\n    <xsd:attribute name=\"styleId\" type=\"s:ST_String\" use=\"optional\"/>\n    <xsd:attribute name=\"default\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"customStyle\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LsdException\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"locked\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"uiPriority\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"semiHidden\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"unhideWhenUsed\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"qFormat\" type=\"s:ST_OnOff\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_LatentStyles\">\n    <xsd:sequence>\n      <xsd:element name=\"lsdException\" type=\"CT_LsdException\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"defLockedState\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"defUIPriority\" type=\"ST_DecimalNumber\"/>\n    <xsd:attribute name=\"defSemiHidden\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"defUnhideWhenUsed\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"defQFormat\" type=\"s:ST_OnOff\"/>\n    <xsd:attribute name=\"count\" type=\"ST_DecimalNumber\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Styles\">\n    <xsd:sequence>\n      <xsd:element name=\"docDefaults\" type=\"CT_DocDefaults\" minOccurs=\"0\"/>\n      <xsd:element name=\"latentStyles\" type=\"CT_LatentStyles\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_Style\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Panose\">\n    <xsd:attribute name=\"val\" type=\"s:ST_Panose\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_FontFamily\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"decorative\"/>\n      <xsd:enumeration value=\"modern\"/>\n      <xsd:enumeration value=\"roman\"/>\n      <xsd:enumeration value=\"script\"/>\n      <xsd:enumeration value=\"swiss\"/>\n      <xsd:enumeration value=\"auto\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_FontFamily\">\n    <xsd:attribute name=\"val\" type=\"ST_FontFamily\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_Pitch\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"fixed\"/>\n      <xsd:enumeration value=\"variable\"/>\n      <xsd:enumeration value=\"default\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Pitch\">\n    <xsd:attribute name=\"val\" type=\"ST_Pitch\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontSig\">\n    <xsd:attribute name=\"usb0\" use=\"required\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"usb1\" use=\"required\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"usb2\" use=\"required\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"usb3\" use=\"required\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"csb0\" use=\"required\" type=\"ST_LongHexNumber\"/>\n    <xsd:attribute name=\"csb1\" use=\"required\" type=\"ST_LongHexNumber\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontRel\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_Rel\">\n        <xsd:attribute name=\"fontKey\" type=\"s:ST_Guid\"/>\n        <xsd:attribute name=\"subsetted\" type=\"s:ST_OnOff\"/>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Font\">\n    <xsd:sequence>\n      <xsd:element name=\"altName\" type=\"CT_String\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"panose1\" type=\"CT_Panose\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"charset\" type=\"CT_Charset\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"family\" type=\"CT_FontFamily\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"notTrueType\" type=\"CT_OnOff\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"pitch\" type=\"CT_Pitch\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"sig\" type=\"CT_FontSig\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"embedRegular\" type=\"CT_FontRel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"embedBold\" type=\"CT_FontRel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"embedItalic\" type=\"CT_FontRel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xsd:element name=\"embedBoldItalic\" type=\"CT_FontRel\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_FontsList\">\n    <xsd:sequence>\n      <xsd:element name=\"font\" type=\"CT_Font\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DivBdr\">\n    <xsd:sequence>\n      <xsd:element name=\"top\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"left\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"bottom\" type=\"CT_Border\" minOccurs=\"0\"/>\n      <xsd:element name=\"right\" type=\"CT_Border\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Div\">\n    <xsd:sequence>\n      <xsd:element name=\"blockQuote\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"bodyDiv\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n      <xsd:element name=\"marLeft\" type=\"CT_SignedTwipsMeasure\"/>\n      <xsd:element name=\"marRight\" type=\"CT_SignedTwipsMeasure\"/>\n      <xsd:element name=\"marTop\" type=\"CT_SignedTwipsMeasure\"/>\n      <xsd:element name=\"marBottom\" type=\"CT_SignedTwipsMeasure\"/>\n      <xsd:element name=\"divBdr\" type=\"CT_DivBdr\" minOccurs=\"0\"/>\n      <xsd:element name=\"divsChild\" type=\"CT_Divs\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n    <xsd:attribute name=\"id\" type=\"ST_DecimalNumber\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Divs\">\n    <xsd:sequence minOccurs=\"1\" maxOccurs=\"unbounded\">\n      <xsd:element name=\"div\" type=\"CT_Div\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_TxbxContent\">\n    <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n  </xsd:complexType>\n  <xsd:element name=\"txbxContent\" type=\"CT_TxbxContent\"/>\n  <xsd:group name=\"EG_MathContent\">\n    <xsd:choice>\n      <xsd:element ref=\"m:oMathPara\"/>\n      <xsd:element ref=\"m:oMath\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_BlockLevelChunkElts\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_ContentBlockContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_BlockLevelElts\">\n    <xsd:choice>\n      <xsd:group ref=\"EG_BlockLevelChunkElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"altChunk\" type=\"CT_AltChunk\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:group name=\"EG_RunLevelElts\">\n    <xsd:choice>\n      <xsd:element name=\"proofErr\" minOccurs=\"0\" type=\"CT_ProofErr\"/>\n      <xsd:element name=\"permStart\" minOccurs=\"0\" type=\"CT_PermStart\"/>\n      <xsd:element name=\"permEnd\" minOccurs=\"0\" type=\"CT_Perm\"/>\n      <xsd:group ref=\"EG_RangeMarkupElements\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"ins\" type=\"CT_RunTrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"del\" type=\"CT_RunTrackChange\" minOccurs=\"0\"/>\n      <xsd:element name=\"moveFrom\" type=\"CT_RunTrackChange\"/>\n      <xsd:element name=\"moveTo\" type=\"CT_RunTrackChange\"/>\n      <xsd:group ref=\"EG_MathContent\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:group>\n  <xsd:complexType name=\"CT_Body\">\n    <xsd:sequence>\n      <xsd:group ref=\"EG_BlockLevelElts\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"sectPr\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_SectPr\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_ShapeDefaults\">\n    <xsd:choice maxOccurs=\"unbounded\">\n      <xsd:any processContents=\"lax\" namespace=\"urn:schemas-microsoft-com:office:office\"\n        minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Comments\">\n    <xsd:sequence>\n      <xsd:element name=\"comment\" type=\"CT_Comment\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"comments\" type=\"CT_Comments\"/>\n  <xsd:complexType name=\"CT_Footnotes\">\n    <xsd:sequence maxOccurs=\"unbounded\">\n      <xsd:element name=\"footnote\" type=\"CT_FtnEdn\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"footnotes\" type=\"CT_Footnotes\"/>\n  <xsd:complexType name=\"CT_Endnotes\">\n    <xsd:sequence maxOccurs=\"unbounded\">\n      <xsd:element name=\"endnote\" type=\"CT_FtnEdn\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:element name=\"endnotes\" type=\"CT_Endnotes\"/>\n  <xsd:element name=\"hdr\" type=\"CT_HdrFtr\"/>\n  <xsd:element name=\"ftr\" type=\"CT_HdrFtr\"/>\n  <xsd:complexType name=\"CT_SmartTagType\">\n    <xsd:attribute name=\"namespaceuri\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"name\" type=\"s:ST_String\"/>\n    <xsd:attribute name=\"url\" type=\"s:ST_String\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_ThemeColor\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"dark1\"/>\n      <xsd:enumeration value=\"light1\"/>\n      <xsd:enumeration value=\"dark2\"/>\n      <xsd:enumeration value=\"light2\"/>\n      <xsd:enumeration value=\"accent1\"/>\n      <xsd:enumeration value=\"accent2\"/>\n      <xsd:enumeration value=\"accent3\"/>\n      <xsd:enumeration value=\"accent4\"/>\n      <xsd:enumeration value=\"accent5\"/>\n      <xsd:enumeration value=\"accent6\"/>\n      <xsd:enumeration value=\"hyperlink\"/>\n      <xsd:enumeration value=\"followedHyperlink\"/>\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"background1\"/>\n      <xsd:enumeration value=\"text1\"/>\n      <xsd:enumeration value=\"background2\"/>\n      <xsd:enumeration value=\"text2\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:simpleType name=\"ST_DocPartBehavior\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"content\"/>\n      <xsd:enumeration value=\"p\"/>\n      <xsd:enumeration value=\"pg\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DocPartBehavior\">\n    <xsd:attribute name=\"val\" use=\"required\" type=\"ST_DocPartBehavior\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPartBehaviors\">\n    <xsd:choice>\n      <xsd:element name=\"behavior\" type=\"CT_DocPartBehavior\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DocPartType\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"none\"/>\n      <xsd:enumeration value=\"normal\"/>\n      <xsd:enumeration value=\"autoExp\"/>\n      <xsd:enumeration value=\"toolbar\"/>\n      <xsd:enumeration value=\"speller\"/>\n      <xsd:enumeration value=\"formFld\"/>\n      <xsd:enumeration value=\"bbPlcHdr\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DocPartType\">\n    <xsd:attribute name=\"val\" use=\"required\" type=\"ST_DocPartType\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPartTypes\">\n    <xsd:choice>\n      <xsd:element name=\"type\" type=\"CT_DocPartType\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n    <xsd:attribute name=\"all\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:simpleType name=\"ST_DocPartGallery\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"placeholder\"/>\n      <xsd:enumeration value=\"any\"/>\n      <xsd:enumeration value=\"default\"/>\n      <xsd:enumeration value=\"docParts\"/>\n      <xsd:enumeration value=\"coverPg\"/>\n      <xsd:enumeration value=\"eq\"/>\n      <xsd:enumeration value=\"ftrs\"/>\n      <xsd:enumeration value=\"hdrs\"/>\n      <xsd:enumeration value=\"pgNum\"/>\n      <xsd:enumeration value=\"tbls\"/>\n      <xsd:enumeration value=\"watermarks\"/>\n      <xsd:enumeration value=\"autoTxt\"/>\n      <xsd:enumeration value=\"txtBox\"/>\n      <xsd:enumeration value=\"pgNumT\"/>\n      <xsd:enumeration value=\"pgNumB\"/>\n      <xsd:enumeration value=\"pgNumMargins\"/>\n      <xsd:enumeration value=\"tblOfContents\"/>\n      <xsd:enumeration value=\"bib\"/>\n      <xsd:enumeration value=\"custQuickParts\"/>\n      <xsd:enumeration value=\"custCoverPg\"/>\n      <xsd:enumeration value=\"custEq\"/>\n      <xsd:enumeration value=\"custFtrs\"/>\n      <xsd:enumeration value=\"custHdrs\"/>\n      <xsd:enumeration value=\"custPgNum\"/>\n      <xsd:enumeration value=\"custTbls\"/>\n      <xsd:enumeration value=\"custWatermarks\"/>\n      <xsd:enumeration value=\"custAutoTxt\"/>\n      <xsd:enumeration value=\"custTxtBox\"/>\n      <xsd:enumeration value=\"custPgNumT\"/>\n      <xsd:enumeration value=\"custPgNumB\"/>\n      <xsd:enumeration value=\"custPgNumMargins\"/>\n      <xsd:enumeration value=\"custTblOfContents\"/>\n      <xsd:enumeration value=\"custBib\"/>\n      <xsd:enumeration value=\"custom1\"/>\n      <xsd:enumeration value=\"custom2\"/>\n      <xsd:enumeration value=\"custom3\"/>\n      <xsd:enumeration value=\"custom4\"/>\n      <xsd:enumeration value=\"custom5\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_DocPartGallery\">\n    <xsd:attribute name=\"val\" type=\"ST_DocPartGallery\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPartCategory\">\n    <xsd:sequence>\n      <xsd:element name=\"name\" type=\"CT_String\" minOccurs=\"1\" maxOccurs=\"1\"/>\n      <xsd:element name=\"gallery\" type=\"CT_DocPartGallery\" minOccurs=\"1\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPartName\">\n    <xsd:attribute name=\"val\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"decorated\" type=\"s:ST_OnOff\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPartPr\">\n    <xsd:all>\n      <xsd:element name=\"name\" type=\"CT_DocPartName\" minOccurs=\"1\"/>\n      <xsd:element name=\"style\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"category\" type=\"CT_DocPartCategory\" minOccurs=\"0\"/>\n      <xsd:element name=\"types\" type=\"CT_DocPartTypes\" minOccurs=\"0\"/>\n      <xsd:element name=\"behaviors\" type=\"CT_DocPartBehaviors\" minOccurs=\"0\"/>\n      <xsd:element name=\"description\" type=\"CT_String\" minOccurs=\"0\"/>\n      <xsd:element name=\"guid\" type=\"CT_Guid\" minOccurs=\"0\"/>\n    </xsd:all>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocPart\">\n    <xsd:sequence>\n      <xsd:element name=\"docPartPr\" type=\"CT_DocPartPr\" minOccurs=\"0\"/>\n      <xsd:element name=\"docPartBody\" type=\"CT_Body\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocParts\">\n    <xsd:choice>\n      <xsd:element name=\"docPart\" type=\"CT_DocPart\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:choice>\n  </xsd:complexType>\n  <xsd:element name=\"settings\" type=\"CT_Settings\"/>\n  <xsd:element name=\"webSettings\" type=\"CT_WebSettings\"/>\n  <xsd:element name=\"fonts\" type=\"CT_FontsList\"/>\n  <xsd:element name=\"numbering\" type=\"CT_Numbering\"/>\n  <xsd:element name=\"styles\" type=\"CT_Styles\"/>\n  <xsd:simpleType name=\"ST_CaptionPos\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"above\"/>\n      <xsd:enumeration value=\"below\"/>\n      <xsd:enumeration value=\"left\"/>\n      <xsd:enumeration value=\"right\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n  <xsd:complexType name=\"CT_Caption\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"pos\" type=\"ST_CaptionPos\" use=\"optional\"/>\n    <xsd:attribute name=\"chapNum\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"heading\" type=\"ST_DecimalNumber\" use=\"optional\"/>\n    <xsd:attribute name=\"noLabel\" type=\"s:ST_OnOff\" use=\"optional\"/>\n    <xsd:attribute name=\"numFmt\" type=\"ST_NumberFormat\" use=\"optional\"/>\n    <xsd:attribute name=\"sep\" type=\"ST_ChapterSep\" use=\"optional\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AutoCaption\">\n    <xsd:attribute name=\"name\" type=\"s:ST_String\" use=\"required\"/>\n    <xsd:attribute name=\"caption\" type=\"s:ST_String\" use=\"required\"/>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_AutoCaptions\">\n    <xsd:sequence>\n      <xsd:element name=\"autoCaption\" type=\"CT_AutoCaption\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Captions\">\n    <xsd:sequence>\n      <xsd:element name=\"caption\" type=\"CT_Caption\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>\n      <xsd:element name=\"autoCaptions\" type=\"CT_AutoCaptions\" minOccurs=\"0\" maxOccurs=\"1\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_DocumentBase\">\n    <xsd:sequence>\n      <xsd:element name=\"background\" type=\"CT_Background\" minOccurs=\"0\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_Document\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_DocumentBase\">\n        <xsd:sequence>\n          <xsd:element name=\"body\" type=\"CT_Body\" minOccurs=\"0\" maxOccurs=\"1\"/>\n        </xsd:sequence>\n        <xsd:attribute name=\"conformance\" type=\"s:ST_ConformanceClass\"/>\n        <xsd:attribute ref=\"mc:Ignorable\" use=\"optional\" />\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:complexType name=\"CT_GlossaryDocument\">\n    <xsd:complexContent>\n      <xsd:extension base=\"CT_DocumentBase\">\n        <xsd:sequence>\n          <xsd:element name=\"docParts\" type=\"CT_DocParts\" minOccurs=\"0\"/>\n        </xsd:sequence>\n      </xsd:extension>\n    </xsd:complexContent>\n  </xsd:complexType>\n  <xsd:element name=\"document\" type=\"CT_Document\"/>\n  <xsd:element name=\"glossaryDocument\" type=\"CT_GlossaryDocument\"/>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd",
    "content": "<?xml version='1.0'?>\n<xs:schema targetNamespace=\"http://www.w3.org/XML/1998/namespace\" xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xml:lang=\"en\">\n\n <xs:annotation>\n  <xs:documentation>\n   See http://www.w3.org/XML/1998/namespace.html and\n   http://www.w3.org/TR/REC-xml for information about this namespace.\n\n    This schema document describes the XML namespace, in a form\n    suitable for import by other schema documents.  \n\n    Note that local names in this namespace are intended to be defined\n    only by the World Wide Web Consortium or its subgroups.  The\n    following names are currently defined in this namespace and should\n    not be used with conflicting semantics by any Working Group,\n    specification, or document instance:\n\n    base (as an attribute name): denotes an attribute whose value\n         provides a URI to be used as the base for interpreting any\n         relative URIs in the scope of the element on which it\n         appears; its value is inherited.  This name is reserved\n         by virtue of its definition in the XML Base specification.\n\n    lang (as an attribute name): denotes an attribute whose value\n         is a language code for the natural language of the content of\n         any element; its value is inherited.  This name is reserved\n         by virtue of its definition in the XML specification.\n  \n    space (as an attribute name): denotes an attribute whose\n         value is a keyword indicating what whitespace processing\n         discipline is intended for the content of the element; its\n         value is inherited.  This name is reserved by virtue of its\n         definition in the XML specification.\n\n    Father (in any context at all): denotes Jon Bosak, the chair of \n         the original XML Working Group.  This name is reserved by \n         the following decision of the W3C XML Plenary and \n         XML Coordination groups:\n\n             In appreciation for his vision, leadership and dedication\n             the W3C XML Plenary on this 10th day of February, 2000\n             reserves for Jon Bosak in perpetuity the XML name\n             xml:Father\n  </xs:documentation>\n </xs:annotation>\n\n <xs:annotation>\n  <xs:documentation>This schema defines attributes and an attribute group\n        suitable for use by\n        schemas wishing to allow xml:base, xml:lang or xml:space attributes\n        on elements they define.\n\n        To enable this, such a schema must import this schema\n        for the XML namespace, e.g. as follows:\n        &lt;schema . . .>\n         . . .\n         &lt;import namespace=\"http://www.w3.org/XML/1998/namespace\"\n                    schemaLocation=\"http://www.w3.org/2001/03/xml.xsd\"/>\n\n        Subsequently, qualified reference to any of the attributes\n        or the group defined below will have the desired effect, e.g.\n\n        &lt;type . . .>\n         . . .\n         &lt;attributeGroup ref=\"xml:specialAttrs\"/>\n \n         will define a type which will schema-validate an instance\n         element with any of those attributes</xs:documentation>\n </xs:annotation>\n\n <xs:annotation>\n  <xs:documentation>In keeping with the XML Schema WG's standard versioning\n   policy, this schema document will persist at\n   http://www.w3.org/2001/03/xml.xsd.\n   At the date of issue it can also be found at\n   http://www.w3.org/2001/xml.xsd.\n   The schema document at that URI may however change in the future,\n   in order to remain compatible with the latest version of XML Schema\n   itself.  In other words, if the XML Schema namespace changes, the version\n   of this document at\n   http://www.w3.org/2001/xml.xsd will change\n   accordingly; the version at\n   http://www.w3.org/2001/03/xml.xsd will not change.\n  </xs:documentation>\n </xs:annotation>\n\n <xs:attribute name=\"lang\" type=\"xs:language\">\n  <xs:annotation>\n   <xs:documentation>In due course, we should install the relevant ISO 2- and 3-letter\n         codes as the enumerated possible values . . .</xs:documentation>\n  </xs:annotation>\n </xs:attribute>\n\n <xs:attribute name=\"space\" default=\"preserve\">\n  <xs:simpleType>\n   <xs:restriction base=\"xs:NCName\">\n    <xs:enumeration value=\"default\"/>\n    <xs:enumeration value=\"preserve\"/>\n   </xs:restriction>\n  </xs:simpleType>\n </xs:attribute>\n\n <xs:attribute name=\"base\" type=\"xs:anyURI\">\n  <xs:annotation>\n   <xs:documentation>See http://www.w3.org/TR/xmlbase/ for\n                     information about this attribute.</xs:documentation>\n  </xs:annotation>\n </xs:attribute>\n\n <xs:attributeGroup name=\"specialAttrs\">\n  <xs:attribute ref=\"xml:base\"/>\n  <xs:attribute ref=\"xml:lang\"/>\n  <xs:attribute ref=\"xml:space\"/>\n </xs:attributeGroup>\n\n</xs:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd",
    "content": "﻿<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<xs:schema xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\"\n  xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"\n  targetNamespace=\"http://schemas.openxmlformats.org/package/2006/content-types\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\" blockDefault=\"#all\">\n\n  <xs:element name=\"Types\" type=\"CT_Types\"/>\n  <xs:element name=\"Default\" type=\"CT_Default\"/>\n  <xs:element name=\"Override\" type=\"CT_Override\"/>\n\n  <xs:complexType name=\"CT_Types\">\n    <xs:choice minOccurs=\"0\" maxOccurs=\"unbounded\">\n      <xs:element ref=\"Default\"/>\n      <xs:element ref=\"Override\"/>\n    </xs:choice>\n  </xs:complexType>\n\n  <xs:complexType name=\"CT_Default\">\n    <xs:attribute name=\"Extension\" type=\"ST_Extension\" use=\"required\"/>\n    <xs:attribute name=\"ContentType\" type=\"ST_ContentType\" use=\"required\"/>\n  </xs:complexType>\n\n  <xs:complexType name=\"CT_Override\">\n    <xs:attribute name=\"ContentType\" type=\"ST_ContentType\" use=\"required\"/>\n    <xs:attribute name=\"PartName\" type=\"xs:anyURI\" use=\"required\"/>\n  </xs:complexType>\n\n  <xs:simpleType name=\"ST_ContentType\">\n    <xs:restriction base=\"xs:string\">\n      <xs:pattern\n        value=\"(((([\\p{IsBasicLatin}-[\\p{Cc}&#127;\\(\\)&lt;&gt;@,;:\\\\&quot;/\\[\\]\\?=\\{\\}\\s\\t]])+))/((([\\p{IsBasicLatin}-[\\p{Cc}&#127;\\(\\)&lt;&gt;@,;:\\\\&quot;/\\[\\]\\?=\\{\\}\\s\\t]])+))((\\s+)*;(\\s+)*(((([\\p{IsBasicLatin}-[\\p{Cc}&#127;\\(\\)&lt;&gt;@,;:\\\\&quot;/\\[\\]\\?=\\{\\}\\s\\t]])+))=((([\\p{IsBasicLatin}-[\\p{Cc}&#127;\\(\\)&lt;&gt;@,;:\\\\&quot;/\\[\\]\\?=\\{\\}\\s\\t]])+)|(&quot;(([\\p{IsLatin-1Supplement}\\p{IsBasicLatin}-[\\p{Cc}&#127;&quot;\\n\\r]]|(\\s+))|(\\\\[\\p{IsBasicLatin}]))*&quot;))))*)\"\n      />\n    </xs:restriction>\n  </xs:simpleType>\n\n  <xs:simpleType name=\"ST_Extension\">\n    <xs:restriction base=\"xs:string\">\n      <xs:pattern\n        value=\"([!$&amp;'\\(\\)\\*\\+,:=]|(%[0-9a-fA-F][0-9a-fA-F])|[:@]|[a-zA-Z0-9\\-_~])+\"/>\n    </xs:restriction>\n  </xs:simpleType>\n</xs:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd",
    "content": "﻿<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<xs:schema targetNamespace=\"http://schemas.openxmlformats.org/package/2006/metadata/core-properties\"\n  xmlns=\"http://schemas.openxmlformats.org/package/2006/metadata/core-properties\"\n  xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n  xmlns:dcterms=\"http://purl.org/dc/terms/\" elementFormDefault=\"qualified\" blockDefault=\"#all\">\n\n  <xs:import namespace=\"http://purl.org/dc/elements/1.1/\"\n    schemaLocation=\"http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd\"/>\n  <xs:import namespace=\"http://purl.org/dc/terms/\"\n    schemaLocation=\"http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd\"/>\n  <xs:import id=\"xml\" namespace=\"http://www.w3.org/XML/1998/namespace\"/>\n\n  <xs:element name=\"coreProperties\" type=\"CT_CoreProperties\"/>\n\n  <xs:complexType name=\"CT_CoreProperties\">\n    <xs:all>\n      <xs:element name=\"category\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:string\"/>\n      <xs:element name=\"contentStatus\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:string\"/>\n      <xs:element ref=\"dcterms:created\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element ref=\"dc:creator\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element ref=\"dc:description\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element ref=\"dc:identifier\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element name=\"keywords\" minOccurs=\"0\" maxOccurs=\"1\" type=\"CT_Keywords\"/>\n      <xs:element ref=\"dc:language\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element name=\"lastModifiedBy\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:string\"/>\n      <xs:element name=\"lastPrinted\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:dateTime\"/>\n      <xs:element ref=\"dcterms:modified\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element name=\"revision\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:string\"/>\n      <xs:element ref=\"dc:subject\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element ref=\"dc:title\" minOccurs=\"0\" maxOccurs=\"1\"/>\n      <xs:element name=\"version\" minOccurs=\"0\" maxOccurs=\"1\" type=\"xs:string\"/>\n    </xs:all>\n  </xs:complexType>\n\n  <xs:complexType name=\"CT_Keywords\" mixed=\"true\">\n    <xs:sequence>\n      <xs:element name=\"value\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_Keyword\"/>\n    </xs:sequence>\n    <xs:attribute ref=\"xml:lang\" use=\"optional\"/>\n  </xs:complexType>\n\n  <xs:complexType name=\"CT_Keyword\">\n    <xs:simpleContent>\n      <xs:extension base=\"xs:string\">\n        <xs:attribute ref=\"xml:lang\" use=\"optional\"/>\n      </xs:extension>\n    </xs:simpleContent>\n  </xs:complexType>\n\n</xs:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<xsd:schema xmlns=\"http://schemas.openxmlformats.org/package/2006/digital-signature\"\n  xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  targetNamespace=\"http://schemas.openxmlformats.org/package/2006/digital-signature\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\" blockDefault=\"#all\">\n\n  <xsd:element name=\"SignatureTime\" type=\"CT_SignatureTime\"/>\n  <xsd:element name=\"RelationshipReference\" type=\"CT_RelationshipReference\"/>\n  <xsd:element name=\"RelationshipsGroupReference\" type=\"CT_RelationshipsGroupReference\"/>\n\n  <xsd:complexType name=\"CT_SignatureTime\">\n    <xsd:sequence>\n      <xsd:element name=\"Format\" type=\"ST_Format\"/>\n      <xsd:element name=\"Value\" type=\"ST_Value\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n\n  <xsd:complexType name=\"CT_RelationshipReference\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"xsd:string\">\n        <xsd:attribute name=\"SourceId\" type=\"xsd:string\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n\n  <xsd:complexType name=\"CT_RelationshipsGroupReference\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"xsd:string\">\n        <xsd:attribute name=\"SourceType\" type=\"xsd:anyURI\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n\n  <xsd:simpleType name=\"ST_Format\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern\n        value=\"(YYYY)|(YYYY-MM)|(YYYY-MM-DD)|(YYYY-MM-DDThh:mmTZD)|(YYYY-MM-DDThh:mm:ssTZD)|(YYYY-MM-DDThh:mm:ss.sTZD)\"\n      />\n    </xsd:restriction>\n  </xsd:simpleType>\n\n  <xsd:simpleType name=\"ST_Value\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:pattern\n        value=\"(([0-9][0-9][0-9][0-9]))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2))))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1))))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))(((\\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))(((\\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))|(([0-9][0-9][0-9][0-9])-((0[1-9])|(1(0|1|2)))-((0[1-9])|(1[0-9])|(2[0-9])|(3(0|1)))T((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])):(((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9]))\\.[0-9])(((\\+|-)((0[0-9])|(1[0-9])|(2(0|1|2|3))):((0[0-9])|(1[0-9])|(2[0-9])|(3[0-9])|(4[0-9])|(5[0-9])))|Z))\"\n      />\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd",
    "content": "﻿<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<xsd:schema xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\"\n  xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n  targetNamespace=\"http://schemas.openxmlformats.org/package/2006/relationships\"\n  elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\" blockDefault=\"#all\">\n\n  <xsd:element name=\"Relationships\" type=\"CT_Relationships\"/>\n  <xsd:element name=\"Relationship\" type=\"CT_Relationship\"/>\n\n  <xsd:complexType name=\"CT_Relationships\">\n    <xsd:sequence>\n      <xsd:element ref=\"Relationship\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n    </xsd:sequence>\n  </xsd:complexType>\n\n  <xsd:complexType name=\"CT_Relationship\">\n    <xsd:simpleContent>\n      <xsd:extension base=\"xsd:string\">\n        <xsd:attribute name=\"TargetMode\" type=\"ST_TargetMode\" use=\"optional\"/>\n        <xsd:attribute name=\"Target\" type=\"xsd:anyURI\" use=\"required\"/>\n        <xsd:attribute name=\"Type\" type=\"xsd:anyURI\" use=\"required\"/>\n        <xsd:attribute name=\"Id\" type=\"xsd:ID\" use=\"required\"/>\n      </xsd:extension>\n    </xsd:simpleContent>\n  </xsd:complexType>\n\n  <xsd:simpleType name=\"ST_TargetMode\">\n    <xsd:restriction base=\"xsd:string\">\n      <xsd:enumeration value=\"External\"/>\n      <xsd:enumeration value=\"Internal\"/>\n    </xsd:restriction>\n  </xsd:simpleType>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/mce/mc.xsd",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<xsd:schema xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\"\n\tattributeFormDefault=\"unqualified\" elementFormDefault=\"qualified\"\n\ttargetNamespace=\"http://schemas.openxmlformats.org/markup-compatibility/2006\"\n\txmlns:xsd=\"http://www.w3.org/2001/XMLSchema\">\n\n  <!--\n    This XSD is a modified version of the one found at:\n    https://github.com/plutext/docx4j/blob/master/xsd/mce/markup-compatibility-2006-MINIMAL.xsd\n\n    This XSD has 2 objectives:\n\n        1. round tripping @mc:Ignorable\n\n\t\t\t<w:document\n\t\t\t            xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\"\n\t\t\t            xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n\t\t\t            mc:Ignorable=\"w14 w15 wp14\">\n\n        2. enabling AlternateContent to be manipulated in certain elements\n           (in the unusual case where the content model is xsd:any, it doesn't have to be explicitly added)\n\n\t\tSee further ECMA-376, 4th Edition, Office Open XML File Formats\n\t\tPart 3 : Markup Compatibility and Extensibility\n   -->\n\n  <!--  Objective 1 -->\n  <xsd:attribute name=\"Ignorable\" type=\"xsd:string\" />\n\n  <!--  Objective 2 -->\n\t<xsd:attribute name=\"MustUnderstand\" type=\"xsd:string\"  />\n\t<xsd:attribute name=\"ProcessContent\" type=\"xsd:string\"  />\n\n<!-- An AlternateContent element shall contain one or more Choice child elements, optionally followed by a\nFallback child element. If present, there shall be only one Fallback element, and it shall follow all Choice\nelements. -->\n\t<xsd:element name=\"AlternateContent\">\n\t\t<xsd:complexType>\n\t\t\t<xsd:sequence>\n\t\t\t\t<xsd:element name=\"Choice\" minOccurs=\"0\" maxOccurs=\"unbounded\">\n\t\t\t\t\t<xsd:complexType>\n\t\t\t\t\t\t<xsd:sequence>\n\t\t\t\t\t\t\t<xsd:any minOccurs=\"0\" maxOccurs=\"unbounded\"\n\t\t\t\t\t\t\t\tprocessContents=\"strict\">\n\t\t\t\t\t\t\t</xsd:any>\n\t\t\t\t\t\t</xsd:sequence>\n\t\t\t\t\t\t<xsd:attribute name=\"Requires\" type=\"xsd:string\" use=\"required\" />\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:Ignorable\" use=\"optional\" />\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:MustUnderstand\" use=\"optional\" />\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:ProcessContent\" use=\"optional\" />\n\t\t\t\t\t</xsd:complexType>\n\t\t\t\t</xsd:element>\n\t\t\t\t<xsd:element name=\"Fallback\" minOccurs=\"0\" maxOccurs=\"1\">\n\t\t\t\t\t<xsd:complexType>\n\t\t\t\t\t\t<xsd:sequence>\n\t\t\t\t\t\t\t<xsd:any minOccurs=\"0\" maxOccurs=\"unbounded\"\n\t\t\t\t\t\t\t\tprocessContents=\"strict\">\n\t\t\t\t\t\t\t</xsd:any>\n\t\t\t\t\t\t</xsd:sequence>\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:Ignorable\" use=\"optional\" />\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:MustUnderstand\" use=\"optional\" />\n\t\t\t\t\t\t<xsd:attribute ref=\"mc:ProcessContent\" use=\"optional\" />\n\t\t\t\t\t</xsd:complexType>\n\t\t\t\t</xsd:element>\n\t\t\t</xsd:sequence>\n\t\t\t<!-- AlternateContent elements might include the attributes Ignorable,\n\t\t\t\tMustUnderstand and ProcessContent described in this Part of ECMA-376. These\n\t\t\t\tattributes’ qualified names shall be prefixed when associated with an AlternateContent\n\t\t\t\telement. -->\n\t\t\t<xsd:attribute ref=\"mc:Ignorable\" use=\"optional\" />\n\t\t\t<xsd:attribute ref=\"mc:MustUnderstand\" use=\"optional\" />\n\t\t\t<xsd:attribute ref=\"mc:ProcessContent\" use=\"optional\" />\n\t\t</xsd:complexType>\n\t</xsd:element>\n</xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\" xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" xmlns=\"http://schemas.microsoft.com/office/word/2010/wordml\" targetNamespace=\"http://schemas.microsoft.com/office/word/2010/wordml\">\n   <!-- <xsd:import id=\"rel\" namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" schemaLocation=\"orel.xsd\"/> -->\n   <xsd:import id=\"w\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <!-- <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\" schemaLocation=\"oartbasetypes.xsd\"/>\n   <xsd:import namespace=\"http://schemas.openxmlformats.org/drawingml/2006/main\" schemaLocation=\"oartsplineproperties.xsd\"/> -->\n   <xsd:complexType name=\"CT_LongHexNumber\">\n     <xsd:attribute name=\"val\" type=\"w:ST_LongHexNumber\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_OnOff\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"true\"/>\n       <xsd:enumeration value=\"false\"/>\n       <xsd:enumeration value=\"0\"/>\n       <xsd:enumeration value=\"1\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_OnOff\">\n     <xsd:attribute name=\"val\" type=\"ST_OnOff\"/>\n   </xsd:complexType>\n   <xsd:element name=\"docId\" type=\"CT_LongHexNumber\"/>\n   <xsd:element name=\"conflictMode\" type=\"CT_OnOff\"/>\n   <xsd:attributeGroup name=\"AG_Parids\">\n     <xsd:attribute name=\"paraId\" type=\"w:ST_LongHexNumber\"/>\n     <xsd:attribute name=\"textId\" type=\"w:ST_LongHexNumber\"/>\n   </xsd:attributeGroup>\n   <xsd:attribute name=\"anchorId\" type=\"w:ST_LongHexNumber\"/>\n   <xsd:attribute name=\"noSpellErr\" type=\"ST_OnOff\"/>\n   <xsd:element name=\"customXmlConflictInsRangeStart\" type=\"w:CT_TrackChange\"/>\n   <xsd:element name=\"customXmlConflictInsRangeEnd\" type=\"w:CT_Markup\"/>\n   <xsd:element name=\"customXmlConflictDelRangeStart\" type=\"w:CT_TrackChange\"/>\n   <xsd:element name=\"customXmlConflictDelRangeEnd\" type=\"w:CT_Markup\"/>\n   <xsd:group name=\"EG_RunLevelConflicts\">\n     <xsd:sequence>\n       <xsd:element name=\"conflictIns\" type=\"w:CT_RunTrackChange\" minOccurs=\"0\"/>\n       <xsd:element name=\"conflictDel\" type=\"w:CT_RunTrackChange\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:group>\n   <xsd:group name=\"EG_Conflicts\">\n     <xsd:choice>\n       <xsd:element name=\"conflictIns\" type=\"w:CT_TrackChange\" minOccurs=\"0\"/>\n       <xsd:element name=\"conflictDel\" type=\"w:CT_TrackChange\" minOccurs=\"0\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_Percentage\">\n     <xsd:attribute name=\"val\" type=\"a:ST_Percentage\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_PositiveFixedPercentage\">\n     <xsd:attribute name=\"val\" type=\"a:ST_PositiveFixedPercentage\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_PositivePercentage\">\n     <xsd:attribute name=\"val\" type=\"a:ST_PositivePercentage\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_SchemeColorVal\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"bg1\"/>\n       <xsd:enumeration value=\"tx1\"/>\n       <xsd:enumeration value=\"bg2\"/>\n       <xsd:enumeration value=\"tx2\"/>\n       <xsd:enumeration value=\"accent1\"/>\n       <xsd:enumeration value=\"accent2\"/>\n       <xsd:enumeration value=\"accent3\"/>\n       <xsd:enumeration value=\"accent4\"/>\n       <xsd:enumeration value=\"accent5\"/>\n       <xsd:enumeration value=\"accent6\"/>\n       <xsd:enumeration value=\"hlink\"/>\n       <xsd:enumeration value=\"folHlink\"/>\n       <xsd:enumeration value=\"dk1\"/>\n       <xsd:enumeration value=\"lt1\"/>\n       <xsd:enumeration value=\"dk2\"/>\n       <xsd:enumeration value=\"lt2\"/>\n       <xsd:enumeration value=\"phClr\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_RectAlignment\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"none\"/>\n       <xsd:enumeration value=\"tl\"/>\n       <xsd:enumeration value=\"t\"/>\n       <xsd:enumeration value=\"tr\"/>\n       <xsd:enumeration value=\"l\"/>\n       <xsd:enumeration value=\"ctr\"/>\n       <xsd:enumeration value=\"r\"/>\n       <xsd:enumeration value=\"bl\"/>\n       <xsd:enumeration value=\"b\"/>\n       <xsd:enumeration value=\"br\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_PathShadeType\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"shape\"/>\n       <xsd:enumeration value=\"circle\"/>\n       <xsd:enumeration value=\"rect\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_LineCap\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"rnd\"/>\n       <xsd:enumeration value=\"sq\"/>\n       <xsd:enumeration value=\"flat\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_PresetLineDashVal\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"solid\"/>\n       <xsd:enumeration value=\"dot\"/>\n       <xsd:enumeration value=\"sysDot\"/>\n       <xsd:enumeration value=\"dash\"/>\n       <xsd:enumeration value=\"sysDash\"/>\n       <xsd:enumeration value=\"lgDash\"/>\n       <xsd:enumeration value=\"dashDot\"/>\n       <xsd:enumeration value=\"sysDashDot\"/>\n       <xsd:enumeration value=\"lgDashDot\"/>\n       <xsd:enumeration value=\"lgDashDotDot\"/>\n       <xsd:enumeration value=\"sysDashDotDot\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_PenAlignment\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"ctr\"/>\n       <xsd:enumeration value=\"in\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_CompoundLine\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"sng\"/>\n       <xsd:enumeration value=\"dbl\"/>\n       <xsd:enumeration value=\"thickThin\"/>\n       <xsd:enumeration value=\"thinThick\"/>\n       <xsd:enumeration value=\"tri\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_RelativeRect\">\n     <xsd:attribute name=\"l\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"t\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"r\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"b\" use=\"optional\" type=\"a:ST_Percentage\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_ColorTransform\">\n     <xsd:choice>\n       <xsd:element name=\"tint\" type=\"CT_PositiveFixedPercentage\"/>\n       <xsd:element name=\"shade\" type=\"CT_PositiveFixedPercentage\"/>\n       <xsd:element name=\"alpha\" type=\"CT_PositiveFixedPercentage\"/>\n       <xsd:element name=\"hueMod\" type=\"CT_PositivePercentage\"/>\n       <xsd:element name=\"sat\" type=\"CT_Percentage\"/>\n       <xsd:element name=\"satOff\" type=\"CT_Percentage\"/>\n       <xsd:element name=\"satMod\" type=\"CT_Percentage\"/>\n       <xsd:element name=\"lum\" type=\"CT_Percentage\"/>\n       <xsd:element name=\"lumOff\" type=\"CT_Percentage\"/>\n       <xsd:element name=\"lumMod\" type=\"CT_Percentage\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_SRgbColor\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"val\" type=\"s:ST_HexColorRGB\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_SchemeColor\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorTransform\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"val\" type=\"ST_SchemeColorVal\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_ColorChoice\">\n     <xsd:choice>\n       <xsd:element name=\"srgbClr\" type=\"CT_SRgbColor\"/>\n       <xsd:element name=\"schemeClr\" type=\"CT_SchemeColor\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_Color\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorChoice\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_GradientStop\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorChoice\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"pos\" type=\"a:ST_PositiveFixedPercentage\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_GradientStopList\">\n     <xsd:sequence>\n       <xsd:element name=\"gs\" type=\"CT_GradientStop\" minOccurs=\"2\" maxOccurs=\"10\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_LinearShadeProperties\">\n     <xsd:attribute name=\"ang\" type=\"a:ST_PositiveFixedAngle\" use=\"optional\"/>\n     <xsd:attribute name=\"scaled\" type=\"ST_OnOff\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_PathShadeProperties\">\n     <xsd:sequence>\n       <xsd:element name=\"fillToRect\" type=\"CT_RelativeRect\" minOccurs=\"0\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"path\" type=\"ST_PathShadeType\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_ShadeProperties\">\n     <xsd:choice>\n       <xsd:element name=\"lin\" type=\"CT_LinearShadeProperties\"/>\n       <xsd:element name=\"path\" type=\"CT_PathShadeProperties\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_SolidColorFillProperties\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorChoice\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_GradientFillProperties\">\n     <xsd:sequence>\n       <xsd:element name=\"gsLst\" type=\"CT_GradientStopList\" minOccurs=\"0\"/>\n       <xsd:group ref=\"EG_ShadeProperties\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:group name=\"EG_FillProperties\">\n     <xsd:choice>\n       <xsd:element name=\"noFill\" type=\"w:CT_Empty\"/>\n       <xsd:element name=\"solidFill\" type=\"CT_SolidColorFillProperties\"/>\n       <xsd:element name=\"gradFill\" type=\"CT_GradientFillProperties\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_PresetLineDashProperties\">\n     <xsd:attribute name=\"val\" type=\"ST_PresetLineDashVal\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_LineDashProperties\">\n     <xsd:choice>\n       <xsd:element name=\"prstDash\" type=\"CT_PresetLineDashProperties\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:complexType name=\"CT_LineJoinMiterProperties\">\n     <xsd:attribute name=\"lim\" type=\"a:ST_PositivePercentage\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_LineJoinProperties\">\n     <xsd:choice>\n       <xsd:element name=\"round\" type=\"w:CT_Empty\"/>\n       <xsd:element name=\"bevel\" type=\"w:CT_Empty\"/>\n       <xsd:element name=\"miter\" type=\"CT_LineJoinMiterProperties\"/>\n     </xsd:choice>\n   </xsd:group>\n   <xsd:simpleType name=\"ST_PresetCameraType\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:enumeration value=\"legacyObliqueTopLeft\"/>\n       <xsd:enumeration value=\"legacyObliqueTop\"/>\n       <xsd:enumeration value=\"legacyObliqueTopRight\"/>\n       <xsd:enumeration value=\"legacyObliqueLeft\"/>\n       <xsd:enumeration value=\"legacyObliqueFront\"/>\n       <xsd:enumeration value=\"legacyObliqueRight\"/>\n       <xsd:enumeration value=\"legacyObliqueBottomLeft\"/>\n       <xsd:enumeration value=\"legacyObliqueBottom\"/>\n       <xsd:enumeration value=\"legacyObliqueBottomRight\"/>\n       <xsd:enumeration value=\"legacyPerspectiveTopLeft\"/>\n       <xsd:enumeration value=\"legacyPerspectiveTop\"/>\n       <xsd:enumeration value=\"legacyPerspectiveTopRight\"/>\n       <xsd:enumeration value=\"legacyPerspectiveLeft\"/>\n       <xsd:enumeration value=\"legacyPerspectiveFront\"/>\n       <xsd:enumeration value=\"legacyPerspectiveRight\"/>\n       <xsd:enumeration value=\"legacyPerspectiveBottomLeft\"/>\n       <xsd:enumeration value=\"legacyPerspectiveBottom\"/>\n       <xsd:enumeration value=\"legacyPerspectiveBottomRight\"/>\n       <xsd:enumeration value=\"orthographicFront\"/>\n       <xsd:enumeration value=\"isometricTopUp\"/>\n       <xsd:enumeration value=\"isometricTopDown\"/>\n       <xsd:enumeration value=\"isometricBottomUp\"/>\n       <xsd:enumeration value=\"isometricBottomDown\"/>\n       <xsd:enumeration value=\"isometricLeftUp\"/>\n       <xsd:enumeration value=\"isometricLeftDown\"/>\n       <xsd:enumeration value=\"isometricRightUp\"/>\n       <xsd:enumeration value=\"isometricRightDown\"/>\n       <xsd:enumeration value=\"isometricOffAxis1Left\"/>\n       <xsd:enumeration value=\"isometricOffAxis1Right\"/>\n       <xsd:enumeration value=\"isometricOffAxis1Top\"/>\n       <xsd:enumeration value=\"isometricOffAxis2Left\"/>\n       <xsd:enumeration value=\"isometricOffAxis2Right\"/>\n       <xsd:enumeration value=\"isometricOffAxis2Top\"/>\n       <xsd:enumeration value=\"isometricOffAxis3Left\"/>\n       <xsd:enumeration value=\"isometricOffAxis3Right\"/>\n       <xsd:enumeration value=\"isometricOffAxis3Bottom\"/>\n       <xsd:enumeration value=\"isometricOffAxis4Left\"/>\n       <xsd:enumeration value=\"isometricOffAxis4Right\"/>\n       <xsd:enumeration value=\"isometricOffAxis4Bottom\"/>\n       <xsd:enumeration value=\"obliqueTopLeft\"/>\n       <xsd:enumeration value=\"obliqueTop\"/>\n       <xsd:enumeration value=\"obliqueTopRight\"/>\n       <xsd:enumeration value=\"obliqueLeft\"/>\n       <xsd:enumeration value=\"obliqueRight\"/>\n       <xsd:enumeration value=\"obliqueBottomLeft\"/>\n       <xsd:enumeration value=\"obliqueBottom\"/>\n       <xsd:enumeration value=\"obliqueBottomRight\"/>\n       <xsd:enumeration value=\"perspectiveFront\"/>\n       <xsd:enumeration value=\"perspectiveLeft\"/>\n       <xsd:enumeration value=\"perspectiveRight\"/>\n       <xsd:enumeration value=\"perspectiveAbove\"/>\n       <xsd:enumeration value=\"perspectiveBelow\"/>\n       <xsd:enumeration value=\"perspectiveAboveLeftFacing\"/>\n       <xsd:enumeration value=\"perspectiveAboveRightFacing\"/>\n       <xsd:enumeration value=\"perspectiveContrastingLeftFacing\"/>\n       <xsd:enumeration value=\"perspectiveContrastingRightFacing\"/>\n       <xsd:enumeration value=\"perspectiveHeroicLeftFacing\"/>\n       <xsd:enumeration value=\"perspectiveHeroicRightFacing\"/>\n       <xsd:enumeration value=\"perspectiveHeroicExtremeLeftFacing\"/>\n       <xsd:enumeration value=\"perspectiveHeroicExtremeRightFacing\"/>\n       <xsd:enumeration value=\"perspectiveRelaxed\"/>\n       <xsd:enumeration value=\"perspectiveRelaxedModerately\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_Camera\">\n     <xsd:attribute name=\"prst\" use=\"required\" type=\"ST_PresetCameraType\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_SphereCoords\">\n     <xsd:attribute name=\"lat\" type=\"a:ST_PositiveFixedAngle\" use=\"required\"/>\n     <xsd:attribute name=\"lon\" type=\"a:ST_PositiveFixedAngle\" use=\"required\"/>\n     <xsd:attribute name=\"rev\" type=\"a:ST_PositiveFixedAngle\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_LightRigType\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:enumeration value=\"legacyFlat1\"/>\n       <xsd:enumeration value=\"legacyFlat2\"/>\n       <xsd:enumeration value=\"legacyFlat3\"/>\n       <xsd:enumeration value=\"legacyFlat4\"/>\n       <xsd:enumeration value=\"legacyNormal1\"/>\n       <xsd:enumeration value=\"legacyNormal2\"/>\n       <xsd:enumeration value=\"legacyNormal3\"/>\n       <xsd:enumeration value=\"legacyNormal4\"/>\n       <xsd:enumeration value=\"legacyHarsh1\"/>\n       <xsd:enumeration value=\"legacyHarsh2\"/>\n       <xsd:enumeration value=\"legacyHarsh3\"/>\n       <xsd:enumeration value=\"legacyHarsh4\"/>\n       <xsd:enumeration value=\"threePt\"/>\n       <xsd:enumeration value=\"balanced\"/>\n       <xsd:enumeration value=\"soft\"/>\n       <xsd:enumeration value=\"harsh\"/>\n       <xsd:enumeration value=\"flood\"/>\n       <xsd:enumeration value=\"contrasting\"/>\n       <xsd:enumeration value=\"morning\"/>\n       <xsd:enumeration value=\"sunrise\"/>\n       <xsd:enumeration value=\"sunset\"/>\n       <xsd:enumeration value=\"chilly\"/>\n       <xsd:enumeration value=\"freezing\"/>\n       <xsd:enumeration value=\"flat\"/>\n       <xsd:enumeration value=\"twoPt\"/>\n       <xsd:enumeration value=\"glow\"/>\n       <xsd:enumeration value=\"brightRoom\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:simpleType name=\"ST_LightRigDirection\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:enumeration value=\"tl\"/>\n       <xsd:enumeration value=\"t\"/>\n       <xsd:enumeration value=\"tr\"/>\n       <xsd:enumeration value=\"l\"/>\n       <xsd:enumeration value=\"r\"/>\n       <xsd:enumeration value=\"bl\"/>\n       <xsd:enumeration value=\"b\"/>\n       <xsd:enumeration value=\"br\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_LightRig\">\n     <xsd:sequence>\n       <xsd:element name=\"rot\" type=\"CT_SphereCoords\" minOccurs=\"0\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"rig\" type=\"ST_LightRigType\" use=\"required\"/>\n     <xsd:attribute name=\"dir\" type=\"ST_LightRigDirection\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_BevelPresetType\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:enumeration value=\"relaxedInset\"/>\n       <xsd:enumeration value=\"circle\"/>\n       <xsd:enumeration value=\"slope\"/>\n       <xsd:enumeration value=\"cross\"/>\n       <xsd:enumeration value=\"angle\"/>\n       <xsd:enumeration value=\"softRound\"/>\n       <xsd:enumeration value=\"convex\"/>\n       <xsd:enumeration value=\"coolSlant\"/>\n       <xsd:enumeration value=\"divot\"/>\n       <xsd:enumeration value=\"riblet\"/>\n       <xsd:enumeration value=\"hardEdge\"/>\n       <xsd:enumeration value=\"artDeco\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_Bevel\">\n     <xsd:attribute name=\"w\" type=\"a:ST_PositiveCoordinate\" use=\"optional\"/>\n     <xsd:attribute name=\"h\" type=\"a:ST_PositiveCoordinate\" use=\"optional\"/>\n     <xsd:attribute name=\"prst\" type=\"ST_BevelPresetType\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_PresetMaterialType\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:enumeration value=\"legacyMatte\"/>\n       <xsd:enumeration value=\"legacyPlastic\"/>\n       <xsd:enumeration value=\"legacyMetal\"/>\n       <xsd:enumeration value=\"legacyWireframe\"/>\n       <xsd:enumeration value=\"matte\"/>\n       <xsd:enumeration value=\"plastic\"/>\n       <xsd:enumeration value=\"metal\"/>\n       <xsd:enumeration value=\"warmMatte\"/>\n       <xsd:enumeration value=\"translucentPowder\"/>\n       <xsd:enumeration value=\"powder\"/>\n       <xsd:enumeration value=\"dkEdge\"/>\n       <xsd:enumeration value=\"softEdge\"/>\n       <xsd:enumeration value=\"clear\"/>\n       <xsd:enumeration value=\"flat\"/>\n       <xsd:enumeration value=\"softmetal\"/>\n       <xsd:enumeration value=\"none\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_Glow\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorChoice\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"rad\" use=\"optional\" type=\"a:ST_PositiveCoordinate\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_Shadow\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_ColorChoice\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"blurRad\" use=\"optional\" type=\"a:ST_PositiveCoordinate\"/>\n     <xsd:attribute name=\"dist\" use=\"optional\" type=\"a:ST_PositiveCoordinate\"/>\n     <xsd:attribute name=\"dir\" use=\"optional\" type=\"a:ST_PositiveFixedAngle\"/>\n     <xsd:attribute name=\"sx\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"sy\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"kx\" use=\"optional\" type=\"a:ST_FixedAngle\"/>\n     <xsd:attribute name=\"ky\" use=\"optional\" type=\"a:ST_FixedAngle\"/>\n     <xsd:attribute name=\"algn\" use=\"optional\" type=\"ST_RectAlignment\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_Reflection\">\n     <xsd:attribute name=\"blurRad\" use=\"optional\" type=\"a:ST_PositiveCoordinate\"/>\n     <xsd:attribute name=\"stA\" use=\"optional\" type=\"a:ST_PositiveFixedPercentage\"/>\n     <xsd:attribute name=\"stPos\" use=\"optional\" type=\"a:ST_PositiveFixedPercentage\"/>\n     <xsd:attribute name=\"endA\" use=\"optional\" type=\"a:ST_PositiveFixedPercentage\"/>\n     <xsd:attribute name=\"endPos\" use=\"optional\" type=\"a:ST_PositiveFixedPercentage\"/>\n     <xsd:attribute name=\"dist\" use=\"optional\" type=\"a:ST_PositiveCoordinate\"/>\n     <xsd:attribute name=\"dir\" use=\"optional\" type=\"a:ST_PositiveFixedAngle\"/>\n     <xsd:attribute name=\"fadeDir\" use=\"optional\" type=\"a:ST_PositiveFixedAngle\"/>\n     <xsd:attribute name=\"sx\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"sy\" use=\"optional\" type=\"a:ST_Percentage\"/>\n     <xsd:attribute name=\"kx\" use=\"optional\" type=\"a:ST_FixedAngle\"/>\n     <xsd:attribute name=\"ky\" use=\"optional\" type=\"a:ST_FixedAngle\"/>\n     <xsd:attribute name=\"algn\" use=\"optional\" type=\"ST_RectAlignment\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_FillTextEffect\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_TextOutlineEffect\">\n     <xsd:sequence>\n       <xsd:group ref=\"EG_FillProperties\" minOccurs=\"0\"/>\n       <xsd:group ref=\"EG_LineDashProperties\" minOccurs=\"0\"/>\n       <xsd:group ref=\"EG_LineJoinProperties\" minOccurs=\"0\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"w\" use=\"optional\" type=\"a:ST_LineWidth\"/>\n     <xsd:attribute name=\"cap\" use=\"optional\" type=\"ST_LineCap\"/>\n     <xsd:attribute name=\"cmpd\" use=\"optional\" type=\"ST_CompoundLine\"/>\n     <xsd:attribute name=\"algn\" use=\"optional\" type=\"ST_PenAlignment\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_Scene3D\">\n     <xsd:sequence>\n       <xsd:element name=\"camera\" type=\"CT_Camera\"/>\n       <xsd:element name=\"lightRig\" type=\"CT_LightRig\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_Props3D\">\n     <xsd:sequence>\n       <xsd:element name=\"bevelT\" type=\"CT_Bevel\" minOccurs=\"0\"/>\n       <xsd:element name=\"bevelB\" type=\"CT_Bevel\" minOccurs=\"0\"/>\n       <xsd:element name=\"extrusionClr\" type=\"CT_Color\" minOccurs=\"0\"/>\n       <xsd:element name=\"contourClr\" type=\"CT_Color\" minOccurs=\"0\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"extrusionH\" type=\"a:ST_PositiveCoordinate\" use=\"optional\"/>\n     <xsd:attribute name=\"contourW\" type=\"a:ST_PositiveCoordinate\" use=\"optional\"/>\n     <xsd:attribute name=\"prstMaterial\" type=\"ST_PresetMaterialType\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:group name=\"EG_RPrTextEffects\">\n     <xsd:sequence>\n       <xsd:element name=\"glow\" minOccurs=\"0\" type=\"CT_Glow\"/>\n       <xsd:element name=\"shadow\" minOccurs=\"0\" type=\"CT_Shadow\"/>\n       <xsd:element name=\"reflection\" minOccurs=\"0\" type=\"CT_Reflection\"/>\n       <xsd:element name=\"textOutline\" minOccurs=\"0\" type=\"CT_TextOutlineEffect\"/>\n       <xsd:element name=\"textFill\" minOccurs=\"0\" type=\"CT_FillTextEffect\"/>\n       <xsd:element name=\"scene3d\" minOccurs=\"0\" type=\"CT_Scene3D\"/>\n       <xsd:element name=\"props3d\" minOccurs=\"0\" type=\"CT_Props3D\"/>\n     </xsd:sequence>\n   </xsd:group>\n   <xsd:simpleType name=\"ST_Ligatures\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"none\"/>\n       <xsd:enumeration value=\"standard\"/>\n       <xsd:enumeration value=\"contextual\"/>\n       <xsd:enumeration value=\"historical\"/>\n       <xsd:enumeration value=\"discretional\"/>\n       <xsd:enumeration value=\"standardContextual\"/>\n       <xsd:enumeration value=\"standardHistorical\"/>\n       <xsd:enumeration value=\"contextualHistorical\"/>\n       <xsd:enumeration value=\"standardDiscretional\"/>\n       <xsd:enumeration value=\"contextualDiscretional\"/>\n       <xsd:enumeration value=\"historicalDiscretional\"/>\n       <xsd:enumeration value=\"standardContextualHistorical\"/>\n       <xsd:enumeration value=\"standardContextualDiscretional\"/>\n       <xsd:enumeration value=\"standardHistoricalDiscretional\"/>\n       <xsd:enumeration value=\"contextualHistoricalDiscretional\"/>\n       <xsd:enumeration value=\"all\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_Ligatures\">\n     <xsd:attribute name=\"val\" type=\"ST_Ligatures\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_NumForm\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"default\"/>\n       <xsd:enumeration value=\"lining\"/>\n       <xsd:enumeration value=\"oldStyle\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_NumForm\">\n     <xsd:attribute name=\"val\" type=\"ST_NumForm\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_NumSpacing\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"default\"/>\n       <xsd:enumeration value=\"proportional\"/>\n       <xsd:enumeration value=\"tabular\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_NumSpacing\">\n     <xsd:attribute name=\"val\" type=\"ST_NumSpacing\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_StyleSet\">\n     <xsd:attribute name=\"id\" type=\"s:ST_UnsignedDecimalNumber\" use=\"required\"/>\n     <xsd:attribute name=\"val\" type=\"ST_OnOff\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_StylisticSets\">\n     <xsd:sequence minOccurs=\"0\">\n       <xsd:element name=\"styleSet\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"CT_StyleSet\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:group name=\"EG_RPrOpenType\">\n     <xsd:sequence>\n       <xsd:element name=\"ligatures\" minOccurs=\"0\" type=\"CT_Ligatures\"/>\n       <xsd:element name=\"numForm\" minOccurs=\"0\" type=\"CT_NumForm\"/>\n       <xsd:element name=\"numSpacing\" minOccurs=\"0\" type=\"CT_NumSpacing\"/>\n       <xsd:element name=\"stylisticSets\" minOccurs=\"0\" type=\"CT_StylisticSets\"/>\n       <xsd:element name=\"cntxtAlts\" minOccurs=\"0\" type=\"CT_OnOff\"/>\n     </xsd:sequence>\n   </xsd:group>\n   <xsd:element name=\"discardImageEditingData\" type=\"CT_OnOff\"/>\n   <xsd:element name=\"defaultImageDpi\" type=\"CT_DefaultImageDpi\"/>\n   <xsd:complexType name=\"CT_DefaultImageDpi\">\n     <xsd:attribute name=\"val\" type=\"w:ST_DecimalNumber\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:element name=\"entityPicker\" type=\"w:CT_Empty\"/>\n   <xsd:complexType name=\"CT_SdtCheckboxSymbol\">\n     <xsd:attribute name=\"font\" type=\"s:ST_String\"/>\n     <xsd:attribute name=\"val\" type=\"w:ST_ShortHexNumber\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_SdtCheckbox\">\n     <xsd:sequence>\n       <xsd:element name=\"checked\" type=\"CT_OnOff\" minOccurs=\"0\"/>\n       <xsd:element name=\"checkedState\" type=\"CT_SdtCheckboxSymbol\" minOccurs=\"0\"/>\n       <xsd:element name=\"uncheckedState\" type=\"CT_SdtCheckboxSymbol\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:element name=\"checkbox\" type=\"CT_SdtCheckbox\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2012/wordml\" targetNamespace=\"http://schemas.microsoft.com/office/word/2012/wordml\">\n   <xsd:import id=\"w12\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:import namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\" schemaLocation=\"../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd\"/>\n   <xsd:element name=\"color\" type=\"w12:CT_Color\"/>\n   <xsd:simpleType name=\"ST_SdtAppearance\">\n     <xsd:restriction base=\"xsd:string\">\n       <xsd:enumeration value=\"boundingBox\"/>\n       <xsd:enumeration value=\"tags\"/>\n       <xsd:enumeration value=\"hidden\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:element name=\"dataBinding\" type=\"w12:CT_DataBinding\"/>\n   <xsd:complexType name=\"CT_SdtAppearance\">\n     <xsd:attribute name=\"val\" type=\"ST_SdtAppearance\"/>\n   </xsd:complexType>\n   <xsd:element name=\"appearance\" type=\"CT_SdtAppearance\"/>\n   <xsd:complexType name=\"CT_CommentsEx\">\n     <xsd:sequence>\n       <xsd:element name=\"commentEx\" type=\"CT_CommentEx\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_CommentEx\">\n     <xsd:attribute name=\"paraId\" type=\"w12:ST_LongHexNumber\" use=\"required\"/>\n     <xsd:attribute name=\"paraIdParent\" type=\"w12:ST_LongHexNumber\" use=\"optional\"/>\n     <xsd:attribute name=\"done\" type=\"s:ST_OnOff\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:element name=\"commentsEx\" type=\"CT_CommentsEx\"/>\n   <xsd:complexType name=\"CT_People\">\n     <xsd:sequence>\n       <xsd:element name=\"person\" type=\"CT_Person\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_PresenceInfo\">\n     <xsd:attribute name=\"providerId\" type=\"xsd:string\" use=\"required\"/>\n     <xsd:attribute name=\"userId\" type=\"xsd:string\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_Person\">\n     <xsd:sequence>\n       <xsd:element name=\"presenceInfo\" type=\"CT_PresenceInfo\" minOccurs=\"0\" maxOccurs=\"1\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"author\" type=\"s:ST_String\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:element name=\"people\" type=\"CT_People\"/>\n   <xsd:complexType name=\"CT_SdtRepeatedSection\">\n     <xsd:sequence>\n       <xsd:element name=\"sectionTitle\" type=\"w12:CT_String\" minOccurs=\"0\"/>\n       <xsd:element name=\"doNotAllowInsertDeleteSection\" type=\"w12:CT_OnOff\" minOccurs=\"0\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:simpleType name=\"ST_Guid\">\n     <xsd:restriction base=\"xsd:token\">\n       <xsd:pattern value=\"\\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\\}\"/>\n     </xsd:restriction>\n   </xsd:simpleType>\n   <xsd:complexType name=\"CT_Guid\">\n     <xsd:attribute name=\"val\" type=\"ST_Guid\"/>\n   </xsd:complexType>\n   <xsd:element name=\"repeatingSection\" type=\"CT_SdtRepeatedSection\"/>\n   <xsd:element name=\"repeatingSectionItem\" type=\"w12:CT_Empty\"/>\n   <xsd:element name=\"chartTrackingRefBased\" type=\"w12:CT_OnOff\"/>\n   <xsd:element name=\"collapsed\" type=\"w12:CT_OnOff\"/>\n   <xsd:element name=\"docId\" type=\"CT_Guid\"/>\n   <xsd:element name=\"footnoteColumns\" type=\"w12:CT_DecimalNumber\"/>\n   <xsd:element name=\"webExtensionLinked\" type=\"w12:CT_OnOff\"/>\n   <xsd:element name=\"webExtensionCreated\" type=\"w12:CT_OnOff\"/>\n   <xsd:attribute name=\"restartNumberingAfterBreak\" type=\"s:ST_OnOff\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2018/wordml\" targetNamespace=\"http://schemas.microsoft.com/office/word/2018/wordml\">\n   <xsd:import id=\"w12\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:complexType name=\"CT_Extension\">\n     <xsd:sequence>\n       <xsd:any processContents=\"lax\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"uri\" type=\"xsd:token\"/>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_ExtensionList\">\n     <xsd:sequence>\n       <xsd:element name=\"ext\" type=\"CT_Extension\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:s=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" targetNamespace=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\">\n   <xsd:import id=\"w16\" namespace=\"http://schemas.microsoft.com/office/word/2018/wordml\" schemaLocation=\"wml-2018.xsd\"/>\n   <xsd:import id=\"w\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:import id=\"s\" namespace=\"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\" schemaLocation=\"../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd\"/>\n   <xsd:complexType name=\"CT_CommentsExtensible\">\n     <xsd:sequence>\n       <xsd:element name=\"commentExtensible\" type=\"CT_CommentExtensible\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n       <xsd:element name=\"extLst\" type=\"w16:CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_CommentExtensible\">\n     <xsd:sequence>\n       <xsd:element name=\"extLst\" type=\"w16:CT_ExtensionList\" minOccurs=\"0\" maxOccurs=\"1\"/>\n     </xsd:sequence>\n     <xsd:attribute name=\"durableId\" type=\"w:ST_LongHexNumber\" use=\"required\"/>\n     <xsd:attribute name=\"dateUtc\" type=\"w:ST_DateTime\" use=\"optional\"/>\n     <xsd:attribute name=\"intelligentPlaceholder\" type=\"s:ST_OnOff\" use=\"optional\"/>\n   </xsd:complexType>\n   <xsd:element name=\"commentsExtensible\" type=\"CT_CommentsExtensible\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" targetNamespace=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\">\n   <xsd:import id=\"w12\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:complexType name=\"CT_CommentsIds\">\n     <xsd:sequence>\n       <xsd:element name=\"commentId\" type=\"CT_CommentId\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n     </xsd:sequence>\n   </xsd:complexType>\n   <xsd:complexType name=\"CT_CommentId\">\n     <xsd:attribute name=\"paraId\" type=\"w12:ST_LongHexNumber\" use=\"required\"/>\n     <xsd:attribute name=\"durableId\" type=\"w12:ST_LongHexNumber\" use=\"required\"/>\n   </xsd:complexType>\n   <xsd:element name=\"commentsIds\" type=\"CT_CommentsIds\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash\" targetNamespace=\"http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash\">\n   <xsd:import id=\"w12\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:attribute name=\"storeItemChecksum\" type=\"w12:ST_String\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd",
    "content": " <xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:w12=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" elementFormDefault=\"qualified\" attributeFormDefault=\"qualified\" blockDefault=\"#all\" xmlns=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" targetNamespace=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\">\n   <xsd:import id=\"w12\" namespace=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" schemaLocation=\"../ISO-IEC29500-4_2016/wml.xsd\"/>\n   <xsd:complexType name=\"CT_SymEx\">\n     <xsd:attribute name=\"font\" type=\"w12:ST_String\"/>\n     <xsd:attribute name=\"char\" type=\"w12:ST_LongHexNumber\"/>\n   </xsd:complexType>\n   <xsd:element name=\"symEx\" type=\"CT_SymEx\"/>\n </xsd:schema>\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/soffice.py",
    "content": "\"\"\"\nHelper for running LibreOffice (soffice) in environments where AF_UNIX\nsockets may be blocked (e.g., sandboxed VMs).  Detects the restriction\nat runtime and applies an LD_PRELOAD shim if needed.\n\nUsage:\n    from office.soffice import run_soffice, get_soffice_env\n\n    # Option 1 – run soffice directly\n    result = run_soffice([\"--headless\", \"--convert-to\", \"pdf\", \"input.docx\"])\n\n    # Option 2 – get env dict for your own subprocess calls\n    env = get_soffice_env()\n    subprocess.run([\"soffice\", ...], env=env)\n\"\"\"\n\nimport os\nimport socket\nimport subprocess\nimport tempfile\nfrom pathlib import Path\n\n\ndef get_soffice_env() -> dict:\n    env = os.environ.copy()\n    env[\"SAL_USE_VCLPLUGIN\"] = \"svp\"\n\n    if _needs_shim():\n        shim = _ensure_shim()\n        env[\"LD_PRELOAD\"] = str(shim)\n\n    return env\n\n\ndef run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:\n    env = get_soffice_env()\n    return subprocess.run([\"soffice\"] + args, env=env, **kwargs)\n\n\n_SHIM_SO = Path(tempfile.gettempdir()) / \"lo_socket_shim.so\"\n\n\ndef _needs_shim() -> bool:\n    try:\n        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)\n        s.close()\n        return False\n    except OSError:\n        return True\n\n\ndef _ensure_shim() -> Path:\n    if _SHIM_SO.exists():\n        return _SHIM_SO\n\n    src = Path(tempfile.gettempdir()) / \"lo_socket_shim.c\"\n    src.write_text(_SHIM_SOURCE)\n    subprocess.run(\n        [\"gcc\", \"-shared\", \"-fPIC\", \"-o\", str(_SHIM_SO), str(src), \"-ldl\"],\n        check=True,\n        capture_output=True,\n    )\n    src.unlink()\n    return _SHIM_SO\n\n\n_SHIM_SOURCE = r\"\"\"\n#define _GNU_SOURCE\n#include <dlfcn.h>\n#include <errno.h>\n#include <signal.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <sys/socket.h>\n#include <unistd.h>\n\nstatic int (*real_socket)(int, int, int);\nstatic int (*real_socketpair)(int, int, int, int[2]);\nstatic int (*real_listen)(int, int);\nstatic int (*real_accept)(int, struct sockaddr *, socklen_t *);\nstatic int (*real_close)(int);\nstatic int (*real_read)(int, void *, size_t);\n\n/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */\nstatic int is_shimmed[1024];\nstatic int peer_of[1024];\nstatic int wake_r[1024];            /* accept() blocks reading this */\nstatic int wake_w[1024];            /* close()  writes to this      */\nstatic int listener_fd = -1;        /* FD that received listen()    */\n\n__attribute__((constructor))\nstatic void init(void) {\n    real_socket     = dlsym(RTLD_NEXT, \"socket\");\n    real_socketpair = dlsym(RTLD_NEXT, \"socketpair\");\n    real_listen     = dlsym(RTLD_NEXT, \"listen\");\n    real_accept     = dlsym(RTLD_NEXT, \"accept\");\n    real_close      = dlsym(RTLD_NEXT, \"close\");\n    real_read       = dlsym(RTLD_NEXT, \"read\");\n    for (int i = 0; i < 1024; i++) {\n        peer_of[i] = -1;\n        wake_r[i]  = -1;\n        wake_w[i]  = -1;\n    }\n}\n\n/* ---- socket ---------------------------------------------------------- */\nint socket(int domain, int type, int protocol) {\n    if (domain == AF_UNIX) {\n        int fd = real_socket(domain, type, protocol);\n        if (fd >= 0) return fd;\n        /* socket(AF_UNIX) blocked – fall back to socketpair(). */\n        int sv[2];\n        if (real_socketpair(domain, type, protocol, sv) == 0) {\n            if (sv[0] >= 0 && sv[0] < 1024) {\n                is_shimmed[sv[0]] = 1;\n                peer_of[sv[0]]    = sv[1];\n                int wp[2];\n                if (pipe(wp) == 0) {\n                    wake_r[sv[0]] = wp[0];\n                    wake_w[sv[0]] = wp[1];\n                }\n            }\n            return sv[0];\n        }\n        errno = EPERM;\n        return -1;\n    }\n    return real_socket(domain, type, protocol);\n}\n\n/* ---- listen ---------------------------------------------------------- */\nint listen(int sockfd, int backlog) {\n    if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {\n        listener_fd = sockfd;\n        return 0;\n    }\n    return real_listen(sockfd, backlog);\n}\n\n/* ---- accept ---------------------------------------------------------- */\nint accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {\n    if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {\n        /* Block until close() writes to the wake pipe. */\n        if (wake_r[sockfd] >= 0) {\n            char buf;\n            real_read(wake_r[sockfd], &buf, 1);\n        }\n        errno = ECONNABORTED;\n        return -1;\n    }\n    return real_accept(sockfd, addr, addrlen);\n}\n\n/* ---- close ----------------------------------------------------------- */\nint close(int fd) {\n    if (fd >= 0 && fd < 1024 && is_shimmed[fd]) {\n        int was_listener = (fd == listener_fd);\n        is_shimmed[fd] = 0;\n\n        if (wake_w[fd] >= 0) {              /* unblock accept() */\n            char c = 0;\n            write(wake_w[fd], &c, 1);\n            real_close(wake_w[fd]);\n            wake_w[fd] = -1;\n        }\n        if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd]  = -1; }\n        if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; }\n\n        if (was_listener)\n            _exit(0);                        /* conversion done – exit */\n    }\n    return real_close(fd);\n}\n\"\"\"\n\n\nif __name__ == \"__main__\":\n    import sys\n\n    result = run_soffice(sys.argv[1:])\n    sys.exit(result.returncode)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/unpack.py",
    "content": "\"\"\"Unpack Office files (DOCX, PPTX, XLSX) for editing.\n\nExtracts the ZIP archive, pretty-prints XML files, and optionally:\n- Merges adjacent runs with identical formatting (DOCX only)\n- Simplifies adjacent tracked changes from same author (DOCX only)\n\nUsage:\n    python unpack.py <office_file> <output_dir> [options]\n\nExamples:\n    python unpack.py document.docx unpacked/\n    python unpack.py presentation.pptx unpacked/\n    python unpack.py document.docx unpacked/ --merge-runs false\n\"\"\"\n\nimport argparse\nimport sys\nimport zipfile\nfrom pathlib import Path\n\nimport defusedxml.minidom\nfrom helpers.merge_runs import merge_runs as do_merge_runs\nfrom helpers.simplify_redlines import simplify_redlines as do_simplify_redlines\n\nSMART_QUOTE_REPLACEMENTS = {\n    \"\\u201c\": \"&#x201C;\",\n    \"\\u201d\": \"&#x201D;\",\n    \"\\u2018\": \"&#x2018;\",\n    \"\\u2019\": \"&#x2019;\",\n}\n\n\ndef unpack(\n    input_file: str,\n    output_directory: str,\n    merge_runs: bool = True,\n    simplify_redlines: bool = True,\n) -> tuple[None, str]:\n    input_path = Path(input_file)\n    output_path = Path(output_directory)\n    suffix = input_path.suffix.lower()\n\n    if not input_path.exists():\n        return None, f\"Error: {input_file} does not exist\"\n\n    if suffix not in {\".docx\", \".pptx\", \".xlsx\"}:\n        return None, f\"Error: {input_file} must be a .docx, .pptx, or .xlsx file\"\n\n    try:\n        output_path.mkdir(parents=True, exist_ok=True)\n\n        with zipfile.ZipFile(input_path, \"r\") as zf:\n            zf.extractall(output_path)\n\n        xml_files = list(output_path.rglob(\"*.xml\")) + list(output_path.rglob(\"*.rels\"))\n        for xml_file in xml_files:\n            _pretty_print_xml(xml_file)\n\n        message = f\"Unpacked {input_file} ({len(xml_files)} XML files)\"\n\n        if suffix == \".docx\":\n            if simplify_redlines:\n                simplify_count, _ = do_simplify_redlines(str(output_path))\n                message += f\", simplified {simplify_count} tracked changes\"\n\n            if merge_runs:\n                merge_count, _ = do_merge_runs(str(output_path))\n                message += f\", merged {merge_count} runs\"\n\n        for xml_file in xml_files:\n            _escape_smart_quotes(xml_file)\n\n        return None, message\n\n    except zipfile.BadZipFile:\n        return None, f\"Error: {input_file} is not a valid Office file\"\n    except Exception as e:\n        return None, f\"Error unpacking: {e}\"\n\n\ndef _pretty_print_xml(xml_file: Path) -> None:\n    try:\n        content = xml_file.read_text(encoding=\"utf-8\")\n        dom = defusedxml.minidom.parseString(content)\n        xml_file.write_bytes(dom.toprettyxml(indent=\"  \", encoding=\"utf-8\"))\n    except Exception:\n        pass\n\n\ndef _escape_smart_quotes(xml_file: Path) -> None:\n    try:\n        content = xml_file.read_text(encoding=\"utf-8\")\n        for char, entity in SMART_QUOTE_REPLACEMENTS.items():\n            content = content.replace(char, entity)\n        xml_file.write_text(content, encoding=\"utf-8\")\n    except Exception:\n        pass\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Unpack an Office file (DOCX, PPTX, XLSX) for editing\"\n    )\n    parser.add_argument(\"input_file\", help=\"Office file to unpack\")\n    parser.add_argument(\"output_directory\", help=\"Output directory\")\n    parser.add_argument(\n        \"--merge-runs\",\n        type=lambda x: x.lower() == \"true\",\n        default=True,\n        metavar=\"true|false\",\n        help=\"Merge adjacent runs with identical formatting (DOCX only, default: true)\",\n    )\n    parser.add_argument(\n        \"--simplify-redlines\",\n        type=lambda x: x.lower() == \"true\",\n        default=True,\n        metavar=\"true|false\",\n        help=\"Merge adjacent tracked changes from same author (DOCX only, default: true)\",\n    )\n    args = parser.parse_args()\n\n    _, message = unpack(\n        args.input_file,\n        args.output_directory,\n        merge_runs=args.merge_runs,\n        simplify_redlines=args.simplify_redlines,\n    )\n    print(message)\n\n    if \"Error\" in message:\n        sys.exit(1)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validate.py",
    "content": "\"\"\"\nCommand line tool to validate Office document XML files against XSD schemas and tracked changes.\n\nUsage:\n    python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]\n\nThe first argument can be either:\n- An unpacked directory containing the Office document XML files\n- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory\n\nAuto-repair fixes:\n- paraId/durableId values that exceed OOXML limits\n- Missing xml:space=\"preserve\" on w:t elements with whitespace\n\"\"\"\n\nimport argparse\nimport sys\nimport tempfile\nimport zipfile\nfrom pathlib import Path\n\nfrom validators import DOCXSchemaValidator\nfrom validators import PPTXSchemaValidator\nfrom validators import RedliningValidator\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"Validate Office document XML files\")\n    parser.add_argument(\n        \"path\",\n        help=\"Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)\",\n    )\n    parser.add_argument(\n        \"--original\",\n        required=False,\n        default=None,\n        help=(\n            \"Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors \"\n            \"are reported and redlining validation is skipped.\"\n        ),\n    )\n    parser.add_argument(\n        \"-v\",\n        \"--verbose\",\n        action=\"store_true\",\n        help=\"Enable verbose output\",\n    )\n    parser.add_argument(\n        \"--auto-repair\",\n        action=\"store_true\",\n        help=\"Automatically repair common issues (hex IDs, whitespace preservation)\",\n    )\n    parser.add_argument(\n        \"--author\",\n        default=\"Claude\",\n        help=\"Author name for redlining validation (default: Claude)\",\n    )\n    args = parser.parse_args()\n\n    path = Path(args.path)\n    assert path.exists(), f\"Error: {path} does not exist\"\n\n    original_file = None\n    if args.original:\n        original_file = Path(args.original)\n        assert original_file.is_file(), f\"Error: {original_file} is not a file\"\n        assert original_file.suffix.lower() in [\n            \".docx\",\n            \".pptx\",\n            \".xlsx\",\n        ], f\"Error: {original_file} must be a .docx, .pptx, or .xlsx file\"\n\n    file_extension = (original_file or path).suffix.lower()\n    assert file_extension in [\n        \".docx\",\n        \".pptx\",\n        \".xlsx\",\n    ], f\"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file.\"\n\n    if path.is_file() and path.suffix.lower() in [\".docx\", \".pptx\", \".xlsx\"]:\n        temp_dir = tempfile.mkdtemp()\n        with zipfile.ZipFile(path, \"r\") as zf:\n            zf.extractall(temp_dir)\n        unpacked_dir = Path(temp_dir)\n    else:\n        assert path.is_dir(), f\"Error: {path} is not a directory or Office file\"\n        unpacked_dir = path\n\n    match file_extension:\n        case \".docx\":\n            validators = [\n                DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),\n            ]\n            if original_file:\n                validators.append(\n                    RedliningValidator(\n                        unpacked_dir,\n                        original_file,\n                        verbose=args.verbose,\n                        author=args.author,\n                    )\n                )\n        case \".pptx\":\n            validators = [\n                PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),\n            ]\n        case _:\n            print(f\"Error: Validation not supported for file type {file_extension}\")\n            sys.exit(1)\n\n    if args.auto_repair:\n        total_repairs = sum(v.repair() for v in validators)\n        if total_repairs:\n            print(f\"Auto-repaired {total_repairs} issue(s)\")\n\n    success = all(v.validate() for v in validators)\n\n    if success:\n        print(\"All validations PASSED!\")\n\n    sys.exit(0 if success else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/__init__.py",
    "content": "\"\"\"\nValidation modules for Word document processing.\n\"\"\"\n\nfrom .base import BaseSchemaValidator\nfrom .docx import DOCXSchemaValidator\nfrom .pptx import PPTXSchemaValidator\nfrom .redlining import RedliningValidator\n\n__all__ = [\n    \"BaseSchemaValidator\",\n    \"DOCXSchemaValidator\",\n    \"PPTXSchemaValidator\",\n    \"RedliningValidator\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/base.py",
    "content": "\"\"\"\nBase validator with common validation logic for document files.\n\"\"\"\n\nimport re\nfrom pathlib import Path\n\nimport defusedxml.minidom\nimport lxml.etree\n\n\nclass BaseSchemaValidator:\n    IGNORED_VALIDATION_ERRORS = [\n        \"hyphenationZone\",\n        \"purl.org/dc/terms\",\n    ]\n\n    UNIQUE_ID_REQUIREMENTS = {\n        \"comment\": (\"id\", \"file\"),\n        \"commentrangestart\": (\"id\", \"file\"),\n        \"commentrangeend\": (\"id\", \"file\"),\n        \"bookmarkstart\": (\"id\", \"file\"),\n        \"bookmarkend\": (\"id\", \"file\"),\n        \"sldid\": (\"id\", \"file\"),\n        \"sldmasterid\": (\"id\", \"global\"),\n        \"sldlayoutid\": (\"id\", \"global\"),\n        \"cm\": (\"authorid\", \"file\"),\n        \"sheet\": (\"sheetid\", \"file\"),\n        \"definedname\": (\"id\", \"file\"),\n        \"cxnsp\": (\"id\", \"file\"),\n        \"sp\": (\"id\", \"file\"),\n        \"pic\": (\"id\", \"file\"),\n        \"grpsp\": (\"id\", \"file\"),\n    }\n\n    EXCLUDED_ID_CONTAINERS = {\n        \"sectionlst\",\n    }\n\n    ELEMENT_RELATIONSHIP_TYPES = {}\n\n    SCHEMA_MAPPINGS = {\n        \"word\": \"ISO-IEC29500-4_2016/wml.xsd\",\n        \"ppt\": \"ISO-IEC29500-4_2016/pml.xsd\",\n        \"xl\": \"ISO-IEC29500-4_2016/sml.xsd\",\n        \"[Content_Types].xml\": \"ecma/fouth-edition/opc-contentTypes.xsd\",\n        \"app.xml\": \"ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd\",\n        \"core.xml\": \"ecma/fouth-edition/opc-coreProperties.xsd\",\n        \"custom.xml\": \"ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd\",\n        \".rels\": \"ecma/fouth-edition/opc-relationships.xsd\",\n        \"people.xml\": \"microsoft/wml-2012.xsd\",\n        \"commentsIds.xml\": \"microsoft/wml-cid-2016.xsd\",\n        \"commentsExtensible.xml\": \"microsoft/wml-cex-2018.xsd\",\n        \"commentsExtended.xml\": \"microsoft/wml-2012.xsd\",\n        \"chart\": \"ISO-IEC29500-4_2016/dml-chart.xsd\",\n        \"theme\": \"ISO-IEC29500-4_2016/dml-main.xsd\",\n        \"drawing\": \"ISO-IEC29500-4_2016/dml-main.xsd\",\n    }\n\n    MC_NAMESPACE = \"http://schemas.openxmlformats.org/markup-compatibility/2006\"\n    XML_NAMESPACE = \"http://www.w3.org/XML/1998/namespace\"\n\n    PACKAGE_RELATIONSHIPS_NAMESPACE = (\n        \"http://schemas.openxmlformats.org/package/2006/relationships\"\n    )\n    OFFICE_RELATIONSHIPS_NAMESPACE = (\n        \"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"\n    )\n    CONTENT_TYPES_NAMESPACE = (\n        \"http://schemas.openxmlformats.org/package/2006/content-types\"\n    )\n\n    MAIN_CONTENT_FOLDERS = {\"word\", \"ppt\", \"xl\"}\n\n    OOXML_NAMESPACES = {\n        \"http://schemas.openxmlformats.org/officeDocument/2006/math\",\n        \"http://schemas.openxmlformats.org/officeDocument/2006/relationships\",\n        \"http://schemas.openxmlformats.org/schemaLibrary/2006/main\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/main\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/chart\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/diagram\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/picture\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing\",\n        \"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\",\n        \"http://schemas.openxmlformats.org/wordprocessingml/2006/main\",\n        \"http://schemas.openxmlformats.org/presentationml/2006/main\",\n        \"http://schemas.openxmlformats.org/spreadsheetml/2006/main\",\n        \"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes\",\n        \"http://www.w3.org/XML/1998/namespace\",\n    }\n\n    def __init__(self, unpacked_dir, original_file=None, verbose=False):\n        self.unpacked_dir = Path(unpacked_dir).resolve()\n        self.original_file = Path(original_file) if original_file else None\n        self.verbose = verbose\n\n        self.schemas_dir = Path(__file__).parent.parent / \"schemas\"\n\n        patterns = [\"*.xml\", \"*.rels\"]\n        self.xml_files = [\n            f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)\n        ]\n\n        if not self.xml_files:\n            print(f\"Warning: No XML files found in {self.unpacked_dir}\")\n\n    def validate(self):\n        raise NotImplementedError(\"Subclasses must implement the validate method\")\n\n    def repair(self) -> int:\n        return self.repair_whitespace_preservation()\n\n    def repair_whitespace_preservation(self) -> int:\n        repairs = 0\n\n        for xml_file in self.xml_files:\n            try:\n                content = xml_file.read_text(encoding=\"utf-8\")\n                dom = defusedxml.minidom.parseString(content)\n                modified = False\n\n                for elem in dom.getElementsByTagName(\"*\"):\n                    if elem.tagName.endswith(\":t\") and elem.firstChild:\n                        text = elem.firstChild.nodeValue\n                        if text and (\n                            text.startswith((\" \", \"\\t\")) or text.endswith((\" \", \"\\t\"))\n                        ):\n                            if elem.getAttribute(\"xml:space\") != \"preserve\":\n                                elem.setAttribute(\"xml:space\", \"preserve\")\n                                text_preview = (\n                                    repr(text[:30]) + \"...\"\n                                    if len(text) > 30\n                                    else repr(text)\n                                )\n                                print(\n                                    f\"  Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}\"\n                                )\n                                repairs += 1\n                                modified = True\n\n                if modified:\n                    xml_file.write_bytes(dom.toxml(encoding=\"UTF-8\"))\n\n            except Exception:\n                pass\n\n        return repairs\n\n    def validate_xml(self):\n        errors = []\n\n        for xml_file in self.xml_files:\n            try:\n                lxml.etree.parse(str(xml_file))\n            except lxml.etree.XMLSyntaxError as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Line {e.lineno}: {e.msg}\"\n                )\n            except Exception as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Unexpected error: {str(e)}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} XML violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All XML files are well-formed\")\n            return True\n\n    def validate_namespaces(self):\n        errors = []\n\n        for xml_file in self.xml_files:\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n                declared = set(root.nsmap.keys()) - {None}\n\n                for attr_val in [\n                    v for k, v in root.attrib.items() if k.endswith(\"Ignorable\")\n                ]:\n                    undeclared = set(attr_val.split()) - declared\n                    errors.extend(\n                        f\"  {xml_file.relative_to(self.unpacked_dir)}: Namespace '{ns}' in Ignorable but not declared\"\n                        for ns in undeclared\n                    )\n            except lxml.etree.XMLSyntaxError:\n                continue\n\n        if errors:\n            print(f\"FAILED - {len(errors)} namespace issues:\")\n            for error in errors:\n                print(error)\n            return False\n        if self.verbose:\n            print(\"PASSED - All namespace prefixes properly declared\")\n        return True\n\n    def validate_unique_ids(self):\n        errors = []\n        global_ids = {}\n\n        for xml_file in self.xml_files:\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n                file_ids = {}\n\n                mc_elements = root.xpath(\n                    \".//mc:AlternateContent\", namespaces={\"mc\": self.MC_NAMESPACE}\n                )\n                for elem in mc_elements:\n                    elem.getparent().remove(elem)\n\n                for elem in root.iter():\n                    tag = (\n                        elem.tag.split(\"}\")[-1].lower()\n                        if \"}\" in elem.tag\n                        else elem.tag.lower()\n                    )\n\n                    if tag in self.UNIQUE_ID_REQUIREMENTS:\n                        in_excluded_container = any(\n                            ancestor.tag.split(\"}\")[-1].lower()\n                            in self.EXCLUDED_ID_CONTAINERS\n                            for ancestor in elem.iterancestors()\n                        )\n                        if in_excluded_container:\n                            continue\n\n                        attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]\n\n                        id_value = None\n                        for attr, value in elem.attrib.items():\n                            attr_local = (\n                                attr.split(\"}\")[-1].lower()\n                                if \"}\" in attr\n                                else attr.lower()\n                            )\n                            if attr_local == attr_name:\n                                id_value = value\n                                break\n\n                        if id_value is not None:\n                            if scope == \"global\":\n                                if id_value in global_ids:\n                                    prev_file, prev_line, prev_tag = global_ids[\n                                        id_value\n                                    ]\n                                    errors.append(\n                                        f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                                        f\"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> \"\n                                        f\"already used in {prev_file} at line {prev_line} in <{prev_tag}>\"\n                                    )\n                                else:\n                                    global_ids[id_value] = (\n                                        xml_file.relative_to(self.unpacked_dir),\n                                        elem.sourceline,\n                                        tag,\n                                    )\n                            elif scope == \"file\":\n                                key = (tag, attr_name)\n                                if key not in file_ids:\n                                    file_ids[key] = {}\n\n                                if id_value in file_ids[key]:\n                                    prev_line = file_ids[key][id_value]\n                                    errors.append(\n                                        f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                                        f\"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> \"\n                                        f\"(first occurrence at line {prev_line})\"\n                                    )\n                                else:\n                                    file_ids[key][id_value] = elem.sourceline\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} ID uniqueness violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All required IDs are unique\")\n            return True\n\n    def validate_file_references(self):\n        errors = []\n\n        rels_files = list(self.unpacked_dir.rglob(\"*.rels\"))\n\n        if not rels_files:\n            if self.verbose:\n                print(\"PASSED - No .rels files found\")\n            return True\n\n        all_files = []\n        for file_path in self.unpacked_dir.rglob(\"*\"):\n            if (\n                file_path.is_file()\n                and file_path.name != \"[Content_Types].xml\"\n                and not file_path.name.endswith(\".rels\")\n            ):\n                all_files.append(file_path.resolve())\n\n        all_referenced_files = set()\n\n        if self.verbose:\n            print(\n                f\"Found {len(rels_files)} .rels files and {len(all_files)} target files\"\n            )\n\n        for rels_file in rels_files:\n            try:\n                rels_root = lxml.etree.parse(str(rels_file)).getroot()\n\n                rels_dir = rels_file.parent\n\n                referenced_files = set()\n                broken_refs = []\n\n                for rel in rels_root.findall(\n                    \".//ns:Relationship\",\n                    namespaces={\"ns\": self.PACKAGE_RELATIONSHIPS_NAMESPACE},\n                ):\n                    target = rel.get(\"Target\")\n                    if target and not target.startswith((\"http\", \"mailto:\")):\n                        if target.startswith(\"/\"):\n                            target_path = self.unpacked_dir / target.lstrip(\"/\")\n                        elif rels_file.name == \".rels\":\n                            target_path = self.unpacked_dir / target\n                        else:\n                            base_dir = rels_dir.parent\n                            target_path = base_dir / target\n\n                        try:\n                            target_path = target_path.resolve()\n                            if target_path.exists() and target_path.is_file():\n                                referenced_files.add(target_path)\n                                all_referenced_files.add(target_path)\n                            else:\n                                broken_refs.append((target, rel.sourceline))\n                        except (OSError, ValueError):\n                            broken_refs.append((target, rel.sourceline))\n\n                if broken_refs:\n                    rel_path = rels_file.relative_to(self.unpacked_dir)\n                    for broken_ref, line_num in broken_refs:\n                        errors.append(\n                            f\"  {rel_path}: Line {line_num}: Broken reference to {broken_ref}\"\n                        )\n\n            except Exception as e:\n                rel_path = rels_file.relative_to(self.unpacked_dir)\n                errors.append(f\"  Error parsing {rel_path}: {e}\")\n\n        unreferenced_files = set(all_files) - all_referenced_files\n\n        if unreferenced_files:\n            for unref_file in sorted(unreferenced_files):\n                unref_rel_path = unref_file.relative_to(self.unpacked_dir)\n                errors.append(f\"  Unreferenced file: {unref_rel_path}\")\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} relationship validation errors:\")\n            for error in errors:\n                print(error)\n            print(\n                \"CRITICAL: These errors will cause the document to appear corrupt. \"\n                + \"Broken references MUST be fixed, \"\n                + \"and unreferenced files MUST be referenced or removed.\"\n            )\n            return False\n        else:\n            if self.verbose:\n                print(\n                    \"PASSED - All references are valid and all files are properly referenced\"\n                )\n            return True\n\n    def validate_all_relationship_ids(self):\n        import lxml.etree\n\n        errors = []\n\n        for xml_file in self.xml_files:\n            if xml_file.suffix == \".rels\":\n                continue\n\n            rels_dir = xml_file.parent / \"_rels\"\n            rels_file = rels_dir / f\"{xml_file.name}.rels\"\n\n            if not rels_file.exists():\n                continue\n\n            try:\n                rels_root = lxml.etree.parse(str(rels_file)).getroot()\n                rid_to_type = {}\n\n                for rel in rels_root.findall(\n                    f\".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship\"\n                ):\n                    rid = rel.get(\"Id\")\n                    rel_type = rel.get(\"Type\", \"\")\n                    if rid:\n                        if rid in rid_to_type:\n                            rels_rel_path = rels_file.relative_to(self.unpacked_dir)\n                            errors.append(\n                                f\"  {rels_rel_path}: Line {rel.sourceline}: \"\n                                f\"Duplicate relationship ID '{rid}' (IDs must be unique)\"\n                            )\n                        type_name = (\n                            rel_type.split(\"/\")[-1] if \"/\" in rel_type else rel_type\n                        )\n                        rid_to_type[rid] = type_name\n\n                xml_root = lxml.etree.parse(str(xml_file)).getroot()\n\n                r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE\n                rid_attrs_to_check = [\"id\", \"embed\", \"link\"]\n                for elem in xml_root.iter():\n                    for attr_name in rid_attrs_to_check:\n                        rid_attr = elem.get(f\"{{{r_ns}}}{attr_name}\")\n                        if not rid_attr:\n                            continue\n                        xml_rel_path = xml_file.relative_to(self.unpacked_dir)\n                        elem_name = (\n                            elem.tag.split(\"}\")[-1] if \"}\" in elem.tag else elem.tag\n                        )\n\n                        if rid_attr not in rid_to_type:\n                            errors.append(\n                                f\"  {xml_rel_path}: Line {elem.sourceline}: \"\n                                f\"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' \"\n                                f\"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})\"\n                            )\n                        elif attr_name == \"id\" and self.ELEMENT_RELATIONSHIP_TYPES:\n                            expected_type = self._get_expected_relationship_type(\n                                elem_name\n                            )\n                            if expected_type:\n                                actual_type = rid_to_type[rid_attr]\n                                if expected_type not in actual_type.lower():\n                                    errors.append(\n                                        f\"  {xml_rel_path}: Line {elem.sourceline}: \"\n                                        f\"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' \"\n                                        f\"but should point to a '{expected_type}' relationship\"\n                                    )\n\n            except Exception as e:\n                xml_rel_path = xml_file.relative_to(self.unpacked_dir)\n                errors.append(f\"  Error processing {xml_rel_path}: {e}\")\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} relationship ID reference errors:\")\n            for error in errors:\n                print(error)\n            print(\"\\nThese ID mismatches will cause the document to appear corrupt!\")\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All relationship ID references are valid\")\n            return True\n\n    def _get_expected_relationship_type(self, element_name):\n        elem_lower = element_name.lower()\n\n        if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:\n            return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]\n\n        if elem_lower.endswith(\"id\") and len(elem_lower) > 2:\n            prefix = elem_lower[:-2]\n            if prefix.endswith(\"master\"):\n                return prefix.lower()\n            elif prefix.endswith(\"layout\"):\n                return prefix.lower()\n            else:\n                if prefix == \"sld\":\n                    return \"slide\"\n                return prefix.lower()\n\n        if elem_lower.endswith(\"reference\") and len(elem_lower) > 9:\n            prefix = elem_lower[:-9]\n            return prefix.lower()\n\n        return None\n\n    def validate_content_types(self):\n        errors = []\n\n        content_types_file = self.unpacked_dir / \"[Content_Types].xml\"\n        if not content_types_file.exists():\n            print(\"FAILED - [Content_Types].xml file not found\")\n            return False\n\n        try:\n            root = lxml.etree.parse(str(content_types_file)).getroot()\n            declared_parts = set()\n            declared_extensions = set()\n\n            for override in root.findall(\n                f\".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override\"\n            ):\n                part_name = override.get(\"PartName\")\n                if part_name is not None:\n                    declared_parts.add(part_name.lstrip(\"/\"))\n\n            for default in root.findall(\n                f\".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default\"\n            ):\n                extension = default.get(\"Extension\")\n                if extension is not None:\n                    declared_extensions.add(extension.lower())\n\n            declarable_roots = {\n                \"sld\",\n                \"sldLayout\",\n                \"sldMaster\",\n                \"presentation\",\n                \"document\",\n                \"workbook\",\n                \"worksheet\",\n                \"theme\",\n            }\n\n            media_extensions = {\n                \"png\": \"image/png\",\n                \"jpg\": \"image/jpeg\",\n                \"jpeg\": \"image/jpeg\",\n                \"gif\": \"image/gif\",\n                \"bmp\": \"image/bmp\",\n                \"tiff\": \"image/tiff\",\n                \"wmf\": \"image/x-wmf\",\n                \"emf\": \"image/x-emf\",\n            }\n\n            all_files = list(self.unpacked_dir.rglob(\"*\"))\n            all_files = [f for f in all_files if f.is_file()]\n\n            for xml_file in self.xml_files:\n                path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(\n                    \"\\\\\", \"/\"\n                )\n\n                if any(\n                    skip in path_str\n                    for skip in [\".rels\", \"[Content_Types]\", \"docProps/\", \"_rels/\"]\n                ):\n                    continue\n\n                try:\n                    root_tag = lxml.etree.parse(str(xml_file)).getroot().tag\n                    root_name = root_tag.split(\"}\")[-1] if \"}\" in root_tag else root_tag\n\n                    if root_name in declarable_roots and path_str not in declared_parts:\n                        errors.append(\n                            f\"  {path_str}: File with <{root_name}> root not declared in [Content_Types].xml\"\n                        )\n\n                except Exception:\n                    continue\n\n            for file_path in all_files:\n                if file_path.suffix.lower() in {\".xml\", \".rels\"}:\n                    continue\n                if file_path.name == \"[Content_Types].xml\":\n                    continue\n                if \"_rels\" in file_path.parts or \"docProps\" in file_path.parts:\n                    continue\n\n                extension = file_path.suffix.lstrip(\".\").lower()\n                if extension and extension not in declared_extensions:\n                    if extension in media_extensions:\n                        relative_path = file_path.relative_to(self.unpacked_dir)\n                        msg = (\n                            f\"  {relative_path}: File with extension '{extension}' \"\n                            f\"not declared in [Content_Types].xml - should add: \"\n                            f'<Default Extension=\"{extension}\" '\n                            f'ContentType=\"{media_extensions[extension]}\"/>'\n                        )\n                        errors.append(msg)\n\n        except Exception as e:\n            errors.append(f\"  Error parsing [Content_Types].xml: {e}\")\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} content type declaration errors:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\n                    \"PASSED - All content files are properly declared in [Content_Types].xml\"\n                )\n            return True\n\n    def validate_file_against_xsd(self, xml_file, verbose=False):\n        xml_file = Path(xml_file).resolve()\n        unpacked_dir = self.unpacked_dir.resolve()\n\n        is_valid, current_errors = self._validate_single_file_xsd(\n            xml_file, unpacked_dir\n        )\n\n        if is_valid is None:\n            return None, set()\n        elif is_valid:\n            return True, set()\n\n        original_errors = self._get_original_file_errors(xml_file)\n\n        assert current_errors is not None\n        new_errors = current_errors - original_errors\n\n        new_errors = {\n            e\n            for e in new_errors\n            if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS)\n        }\n\n        if new_errors:\n            if verbose:\n                relative_path = xml_file.relative_to(unpacked_dir)\n                print(f\"FAILED - {relative_path}: {len(new_errors)} new error(s)\")\n                for error in list(new_errors)[:3]:\n                    truncated = error[:250] + \"...\" if len(error) > 250 else error\n                    print(f\"  - {truncated}\")\n            return False, new_errors\n        else:\n            if verbose:\n                print(\n                    f\"PASSED - No new errors (original had {len(current_errors)} errors)\"\n                )\n            return True, set()\n\n    def validate_against_xsd(self):\n        new_errors = []\n        original_error_count = 0\n        valid_count = 0\n        skipped_count = 0\n\n        for xml_file in self.xml_files:\n            relative_path = str(xml_file.relative_to(self.unpacked_dir))\n            is_valid, new_file_errors = self.validate_file_against_xsd(\n                xml_file, verbose=False\n            )\n\n            if is_valid is None:\n                skipped_count += 1\n                continue\n            elif is_valid and not new_file_errors:\n                valid_count += 1\n                continue\n            elif is_valid:\n                original_error_count += 1\n                valid_count += 1\n                continue\n\n            new_errors.append(f\"  {relative_path}: {len(new_file_errors)} new error(s)\")\n            for error in list(new_file_errors)[:3]:\n                new_errors.append(\n                    f\"    - {error[:250]}...\" if len(error) > 250 else f\"    - {error}\"\n                )\n\n        if self.verbose:\n            print(f\"Validated {len(self.xml_files)} files:\")\n            print(f\"  - Valid: {valid_count}\")\n            print(f\"  - Skipped (no schema): {skipped_count}\")\n            if original_error_count:\n                print(f\"  - With original errors (ignored): {original_error_count}\")\n            print(\n                f\"  - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith('    ')]) or 0}\"\n            )\n\n        if new_errors:\n            print(\"\\nFAILED - Found NEW validation errors:\")\n            for error in new_errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"\\nPASSED - No new XSD validation errors introduced\")\n            return True\n\n    def _get_schema_path(self, xml_file):\n        if xml_file.name in self.SCHEMA_MAPPINGS:\n            return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]\n\n        if xml_file.suffix == \".rels\":\n            return self.schemas_dir / self.SCHEMA_MAPPINGS[\".rels\"]\n\n        if \"charts/\" in str(xml_file) and xml_file.name.startswith(\"chart\"):\n            return self.schemas_dir / self.SCHEMA_MAPPINGS[\"chart\"]\n\n        if \"theme/\" in str(xml_file) and xml_file.name.startswith(\"theme\"):\n            return self.schemas_dir / self.SCHEMA_MAPPINGS[\"theme\"]\n\n        if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:\n            return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]\n\n        return None\n\n    def _clean_ignorable_namespaces(self, xml_doc):\n        xml_string = lxml.etree.tostring(xml_doc, encoding=\"unicode\")\n        xml_copy = lxml.etree.fromstring(xml_string)\n\n        for elem in xml_copy.iter():\n            attrs_to_remove = []\n\n            for attr in elem.attrib:\n                if \"{\" in attr:\n                    ns = attr.split(\"}\")[0][1:]\n                    if ns not in self.OOXML_NAMESPACES:\n                        attrs_to_remove.append(attr)\n\n            for attr in attrs_to_remove:\n                del elem.attrib[attr]\n\n        self._remove_ignorable_elements(xml_copy)\n\n        return lxml.etree.ElementTree(xml_copy)\n\n    def _remove_ignorable_elements(self, root):\n        elements_to_remove = []\n\n        for elem in list(root):\n            if not hasattr(elem, \"tag\") or callable(elem.tag):\n                continue\n\n            tag_str = str(elem.tag)\n            if tag_str.startswith(\"{\"):\n                ns = tag_str.split(\"}\")[0][1:]\n                if ns not in self.OOXML_NAMESPACES:\n                    elements_to_remove.append(elem)\n                    continue\n\n            self._remove_ignorable_elements(elem)\n\n        for elem in elements_to_remove:\n            root.remove(elem)\n\n    def _preprocess_for_mc_ignorable(self, xml_doc):\n        root = xml_doc.getroot()\n\n        if f\"{{{self.MC_NAMESPACE}}}Ignorable\" in root.attrib:\n            del root.attrib[f\"{{{self.MC_NAMESPACE}}}Ignorable\"]\n\n        return xml_doc\n\n    def _validate_single_file_xsd(self, xml_file, base_path):\n        schema_path = self._get_schema_path(xml_file)\n        if not schema_path:\n            return None, None\n\n        try:\n            with open(schema_path, \"rb\") as xsd_file:\n                parser = lxml.etree.XMLParser()\n                xsd_doc = lxml.etree.parse(\n                    xsd_file, parser=parser, base_url=str(schema_path)\n                )\n                schema = lxml.etree.XMLSchema(xsd_doc)\n\n            with open(xml_file, \"r\") as f:\n                xml_doc = lxml.etree.parse(f)\n\n            xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)\n            xml_doc = self._preprocess_for_mc_ignorable(xml_doc)\n\n            relative_path = xml_file.relative_to(base_path)\n            if (\n                relative_path.parts\n                and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS\n            ):\n                xml_doc = self._clean_ignorable_namespaces(xml_doc)\n\n            if schema.validate(xml_doc):\n                return True, set()\n            else:\n                errors = set()\n                for error in schema.error_log:\n                    errors.add(error.message)\n                return False, errors\n\n        except Exception as e:\n            return False, {str(e)}\n\n    def _get_original_file_errors(self, xml_file):\n        if self.original_file is None:\n            return set()\n\n        import tempfile\n        import zipfile\n\n        xml_file = Path(xml_file).resolve()\n        unpacked_dir = self.unpacked_dir.resolve()\n        relative_path = xml_file.relative_to(unpacked_dir)\n\n        with tempfile.TemporaryDirectory() as temp_dir:\n            temp_path = Path(temp_dir)\n\n            with zipfile.ZipFile(self.original_file, \"r\") as zip_ref:\n                zip_ref.extractall(temp_path)\n\n            original_xml_file = temp_path / relative_path\n\n            if not original_xml_file.exists():\n                return set()\n\n            is_valid, errors = self._validate_single_file_xsd(\n                original_xml_file, temp_path\n            )\n            return errors if errors else set()\n\n    def _remove_template_tags_from_text_nodes(self, xml_doc):\n        warnings = []\n        template_pattern = re.compile(r\"\\{\\{[^}]*\\}\\}\")\n\n        xml_string = lxml.etree.tostring(xml_doc, encoding=\"unicode\")\n        xml_copy = lxml.etree.fromstring(xml_string)\n\n        def process_text_content(text, content_type):\n            if not text:\n                return text\n            matches = list(template_pattern.finditer(text))\n            if matches:\n                for match in matches:\n                    warnings.append(\n                        f\"Found template tag in {content_type}: {match.group()}\"\n                    )\n                return template_pattern.sub(\"\", text)\n            return text\n\n        for elem in xml_copy.iter():\n            if not hasattr(elem, \"tag\") or callable(elem.tag):\n                continue\n            tag_str = str(elem.tag)\n            if tag_str.endswith(\"}t\") or tag_str == \"t\":\n                continue\n\n            elem.text = process_text_content(elem.text, \"text content\")\n            elem.tail = process_text_content(elem.tail, \"tail content\")\n\n        return lxml.etree.ElementTree(xml_copy), warnings\n\n\nif __name__ == \"__main__\":\n    raise RuntimeError(\"This module should not be run directly.\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/docx.py",
    "content": "\"\"\"\nValidator for Word document XML files against XSD schemas.\n\"\"\"\n\nimport random\nimport re\nimport tempfile\nimport zipfile\n\nimport defusedxml.minidom\nimport lxml.etree\n\nfrom .base import BaseSchemaValidator\n\n\nclass DOCXSchemaValidator(BaseSchemaValidator):\n    WORD_2006_NAMESPACE = \"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n    W14_NAMESPACE = \"http://schemas.microsoft.com/office/word/2010/wordml\"\n    W16CID_NAMESPACE = \"http://schemas.microsoft.com/office/word/2016/wordml/cid\"\n\n    ELEMENT_RELATIONSHIP_TYPES = {}\n\n    def validate(self):\n        if not self.validate_xml():\n            return False\n\n        all_valid = True\n        if not self.validate_namespaces():\n            all_valid = False\n\n        if not self.validate_unique_ids():\n            all_valid = False\n\n        if not self.validate_file_references():\n            all_valid = False\n\n        if not self.validate_content_types():\n            all_valid = False\n\n        if not self.validate_against_xsd():\n            all_valid = False\n\n        if not self.validate_whitespace_preservation():\n            all_valid = False\n\n        if not self.validate_deletions():\n            all_valid = False\n\n        if not self.validate_insertions():\n            all_valid = False\n\n        if not self.validate_all_relationship_ids():\n            all_valid = False\n\n        if not self.validate_id_constraints():\n            all_valid = False\n\n        if not self.validate_comment_markers():\n            all_valid = False\n\n        self.compare_paragraph_counts()\n\n        return all_valid\n\n    def validate_whitespace_preservation(self):\n        errors = []\n\n        for xml_file in self.xml_files:\n            if xml_file.name != \"document.xml\":\n                continue\n\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n\n                for elem in root.iter(f\"{{{self.WORD_2006_NAMESPACE}}}t\"):\n                    if elem.text:\n                        text = elem.text\n                        if re.search(r\"^[ \\t\\n\\r]\", text) or re.search(\n                            r\"[ \\t\\n\\r]$\", text\n                        ):\n                            xml_space_attr = f\"{{{self.XML_NAMESPACE}}}space\"\n                            if (\n                                xml_space_attr not in elem.attrib\n                                or elem.attrib[xml_space_attr] != \"preserve\"\n                            ):\n                                text_preview = (\n                                    repr(text)[:50] + \"...\"\n                                    if len(repr(text)) > 50\n                                    else repr(text)\n                                )\n                                errors.append(\n                                    f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                                    f\"Line {elem.sourceline}: w:t element with whitespace \"\n                                    f\"missing xml:space='preserve': {text_preview}\"\n                                )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} whitespace preservation violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All whitespace is properly preserved\")\n            return True\n\n    def validate_deletions(self):\n        errors = []\n\n        for xml_file in self.xml_files:\n            if xml_file.name != \"document.xml\":\n                continue\n\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n                namespaces = {\"w\": self.WORD_2006_NAMESPACE}\n\n                for t_elem in root.xpath(\".//w:del//w:t\", namespaces=namespaces):\n                    if t_elem.text:\n                        text_preview = (\n                            repr(t_elem.text)[:50] + \"...\"\n                            if len(repr(t_elem.text)) > 50\n                            else repr(t_elem.text)\n                        )\n                        errors.append(\n                            f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                            f\"Line {t_elem.sourceline}: <w:t> found within <w:del>: {text_preview}\"\n                        )\n\n                for instr_elem in root.xpath(\n                    \".//w:del//w:instrText\", namespaces=namespaces\n                ):\n                    text_preview = (\n                        repr(instr_elem.text or \"\")[:50] + \"...\"\n                        if len(repr(instr_elem.text or \"\")) > 50\n                        else repr(instr_elem.text or \"\")\n                    )\n                    errors.append(\n                        f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                        f\"Line {instr_elem.sourceline}: <w:instrText> found within <w:del> (use <w:delInstrText>): {text_preview}\"\n                    )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} deletion validation violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - No w:t elements found within w:del elements\")\n            return True\n\n    def count_paragraphs_in_unpacked(self):\n        count = 0\n\n        for xml_file in self.xml_files:\n            if xml_file.name != \"document.xml\":\n                continue\n\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n                paragraphs = root.findall(f\".//{{{self.WORD_2006_NAMESPACE}}}p\")\n                count = len(paragraphs)\n            except Exception as e:\n                print(f\"Error counting paragraphs in unpacked document: {e}\")\n\n        return count\n\n    def count_paragraphs_in_original(self):\n        original = self.original_file\n        if original is None:\n            return 0\n\n        count = 0\n\n        try:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                with zipfile.ZipFile(original, \"r\") as zip_ref:\n                    zip_ref.extractall(temp_dir)\n\n                doc_xml_path = temp_dir + \"/word/document.xml\"\n                root = lxml.etree.parse(doc_xml_path).getroot()\n\n                paragraphs = root.findall(f\".//{{{self.WORD_2006_NAMESPACE}}}p\")\n                count = len(paragraphs)\n\n        except Exception as e:\n            print(f\"Error counting paragraphs in original document: {e}\")\n\n        return count\n\n    def validate_insertions(self):\n        errors = []\n\n        for xml_file in self.xml_files:\n            if xml_file.name != \"document.xml\":\n                continue\n\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n                namespaces = {\"w\": self.WORD_2006_NAMESPACE}\n\n                invalid_elements = root.xpath(\n                    \".//w:ins//w:delText[not(ancestor::w:del)]\", namespaces=namespaces\n                )\n\n                for elem in invalid_elements:\n                    text_preview = (\n                        repr(elem.text or \"\")[:50] + \"...\"\n                        if len(repr(elem.text or \"\")) > 50\n                        else repr(elem.text or \"\")\n                    )\n                    errors.append(\n                        f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                        f\"Line {elem.sourceline}: <w:delText> within <w:ins>: {text_preview}\"\n                    )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} insertion validation violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - No w:delText elements within w:ins elements\")\n            return True\n\n    def compare_paragraph_counts(self):\n        original_count = self.count_paragraphs_in_original()\n        new_count = self.count_paragraphs_in_unpacked()\n\n        diff = new_count - original_count\n        diff_str = f\"+{diff}\" if diff > 0 else str(diff)\n        print(f\"\\nParagraphs: {original_count} → {new_count} ({diff_str})\")\n\n    def _parse_id_value(self, val: str, base: int = 16) -> int:\n        return int(val, base)\n\n    def validate_id_constraints(self):\n        errors = []\n        para_id_attr = f\"{{{self.W14_NAMESPACE}}}paraId\"\n        durable_id_attr = f\"{{{self.W16CID_NAMESPACE}}}durableId\"\n\n        for xml_file in self.xml_files:\n            try:\n                for elem in lxml.etree.parse(str(xml_file)).iter():\n                    if val := elem.get(para_id_attr):\n                        if self._parse_id_value(val, base=16) >= 0x80000000:\n                            errors.append(\n                                f\"  {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000\"\n                            )\n\n                    if val := elem.get(durable_id_attr):\n                        if xml_file.name == \"numbering.xml\":\n                            try:\n                                if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:\n                                    errors.append(\n                                        f\"  {xml_file.name}:{elem.sourceline}: durableId={val} >= 0x7FFFFFFF\"\n                                    )\n                            except ValueError:\n                                errors.append(\n                                    f\"  {xml_file.name}:{elem.sourceline}: durableId={val} must be decimal in numbering.xml\"\n                                )\n                        else:\n                            if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:\n                                errors.append(\n                                    f\"  {xml_file.name}:{elem.sourceline}: durableId={val} >= 0x7FFFFFFF\"\n                                )\n            except Exception:\n                pass\n\n        if errors:\n            print(f\"FAILED - {len(errors)} ID constraint violations:\")\n            for e in errors:\n                print(e)\n        elif self.verbose:\n            print(\"PASSED - All paraId/durableId values within constraints\")\n        return not errors\n\n    def validate_comment_markers(self):\n        errors = []\n\n        document_xml = None\n        comments_xml = None\n        for xml_file in self.xml_files:\n            if xml_file.name == \"document.xml\" and \"word\" in str(xml_file):\n                document_xml = xml_file\n            elif xml_file.name == \"comments.xml\":\n                comments_xml = xml_file\n\n        if not document_xml:\n            if self.verbose:\n                print(\"PASSED - No document.xml found (skipping comment validation)\")\n            return True\n\n        try:\n            doc_root = lxml.etree.parse(str(document_xml)).getroot()\n            namespaces = {\"w\": self.WORD_2006_NAMESPACE}\n\n            range_starts = {\n                elem.get(f\"{{{self.WORD_2006_NAMESPACE}}}id\")\n                for elem in doc_root.xpath(\n                    \".//w:commentRangeStart\", namespaces=namespaces\n                )\n            }\n            range_ends = {\n                elem.get(f\"{{{self.WORD_2006_NAMESPACE}}}id\")\n                for elem in doc_root.xpath(\n                    \".//w:commentRangeEnd\", namespaces=namespaces\n                )\n            }\n            references = {\n                elem.get(f\"{{{self.WORD_2006_NAMESPACE}}}id\")\n                for elem in doc_root.xpath(\n                    \".//w:commentReference\", namespaces=namespaces\n                )\n            }\n\n            orphaned_ends = range_ends - range_starts\n            for comment_id in sorted(\n                orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0\n            ):\n                errors.append(\n                    f'  document.xml: commentRangeEnd id=\"{comment_id}\" has no matching commentRangeStart'\n                )\n\n            orphaned_starts = range_starts - range_ends\n            for comment_id in sorted(\n                orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0\n            ):\n                errors.append(\n                    f'  document.xml: commentRangeStart id=\"{comment_id}\" has no matching commentRangeEnd'\n                )\n\n            comment_ids = set()\n            if comments_xml and comments_xml.exists():\n                comments_root = lxml.etree.parse(str(comments_xml)).getroot()\n                comment_ids = {\n                    elem.get(f\"{{{self.WORD_2006_NAMESPACE}}}id\")\n                    for elem in comments_root.xpath(\n                        \".//w:comment\", namespaces=namespaces\n                    )\n                }\n\n                marker_ids = range_starts | range_ends | references\n                invalid_refs = marker_ids - comment_ids\n                for comment_id in sorted(\n                    invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0\n                ):\n                    if comment_id:\n                        errors.append(\n                            f'  document.xml: marker id=\"{comment_id}\" references non-existent comment'\n                        )\n\n        except (lxml.etree.XMLSyntaxError, Exception) as e:\n            errors.append(f\"  Error parsing XML: {e}\")\n\n        if errors:\n            print(f\"FAILED - {len(errors)} comment marker violations:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All comment markers properly paired\")\n            return True\n\n    def repair(self) -> int:\n        repairs = super().repair()\n        repairs += self.repair_durableId()\n        return repairs\n\n    def repair_durableId(self) -> int:\n        repairs = 0\n\n        for xml_file in self.xml_files:\n            try:\n                content = xml_file.read_text(encoding=\"utf-8\")\n                dom = defusedxml.minidom.parseString(content)\n                modified = False\n\n                for elem in dom.getElementsByTagName(\"*\"):\n                    if not elem.hasAttribute(\"w16cid:durableId\"):\n                        continue\n\n                    durable_id = elem.getAttribute(\"w16cid:durableId\")\n                    needs_repair = False\n\n                    if xml_file.name == \"numbering.xml\":\n                        try:\n                            needs_repair = (\n                                self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF\n                            )\n                        except ValueError:\n                            needs_repair = True\n                    else:\n                        try:\n                            needs_repair = (\n                                self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF\n                            )\n                        except ValueError:\n                            needs_repair = True\n\n                    if needs_repair:\n                        value = random.randint(1, 0x7FFFFFFE)\n                        if xml_file.name == \"numbering.xml\":\n                            new_id = str(value)\n                        else:\n                            new_id = f\"{value:08X}\"\n\n                        elem.setAttribute(\"w16cid:durableId\", new_id)\n                        print(\n                            f\"  Repaired: {xml_file.name}: durableId {durable_id} → {new_id}\"\n                        )\n                        repairs += 1\n                        modified = True\n\n                if modified:\n                    xml_file.write_bytes(dom.toxml(encoding=\"UTF-8\"))\n\n            except Exception:\n                pass\n\n        return repairs\n\n\nif __name__ == \"__main__\":\n    raise RuntimeError(\"This module should not be run directly.\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/pptx.py",
    "content": "\"\"\"\nValidator for PowerPoint presentation XML files against XSD schemas.\n\"\"\"\n\nimport re\n\nfrom .base import BaseSchemaValidator\n\n\nclass PPTXSchemaValidator(BaseSchemaValidator):\n    PRESENTATIONML_NAMESPACE = (\n        \"http://schemas.openxmlformats.org/presentationml/2006/main\"\n    )\n\n    ELEMENT_RELATIONSHIP_TYPES = {\n        \"sldid\": \"slide\",\n        \"sldmasterid\": \"slidemaster\",\n        \"notesmasterid\": \"notesmaster\",\n        \"sldlayoutid\": \"slidelayout\",\n        \"themeid\": \"theme\",\n        \"tablestyleid\": \"tablestyles\",\n    }\n\n    def validate(self):\n        if not self.validate_xml():\n            return False\n\n        all_valid = True\n        if not self.validate_namespaces():\n            all_valid = False\n\n        if not self.validate_unique_ids():\n            all_valid = False\n\n        if not self.validate_uuid_ids():\n            all_valid = False\n\n        if not self.validate_file_references():\n            all_valid = False\n\n        if not self.validate_slide_layout_ids():\n            all_valid = False\n\n        if not self.validate_content_types():\n            all_valid = False\n\n        if not self.validate_against_xsd():\n            all_valid = False\n\n        if not self.validate_notes_slide_references():\n            all_valid = False\n\n        if not self.validate_all_relationship_ids():\n            all_valid = False\n\n        if not self.validate_no_duplicate_slide_layouts():\n            all_valid = False\n\n        return all_valid\n\n    def validate_uuid_ids(self):\n        import lxml.etree\n\n        errors = []\n        uuid_pattern = re.compile(\n            r\"^[\\{\\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\\}\\)]?$\"\n        )\n\n        for xml_file in self.xml_files:\n            try:\n                root = lxml.etree.parse(str(xml_file)).getroot()\n\n                for elem in root.iter():\n                    for attr, value in elem.attrib.items():\n                        attr_name = attr.split(\"}\")[-1].lower()\n                        if attr_name == \"id\" or attr_name.endswith(\"id\"):\n                            if self._looks_like_uuid(value):\n                                if not uuid_pattern.match(value):\n                                    errors.append(\n                                        f\"  {xml_file.relative_to(self.unpacked_dir)}: \"\n                                        f\"Line {elem.sourceline}: ID '{value}' appears to be \"\n                                        \"a UUID but contains invalid hex characters\"\n                                    )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {xml_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} UUID ID validation errors:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All UUID-like IDs contain valid hex values\")\n            return True\n\n    def _looks_like_uuid(self, value):\n        clean_value = value.strip(\"{}()\").replace(\"-\", \"\")\n        return len(clean_value) == 32 and all(c.isalnum() for c in clean_value)\n\n    def validate_slide_layout_ids(self):\n        import lxml.etree\n\n        errors = []\n\n        slide_masters = list(self.unpacked_dir.glob(\"ppt/slideMasters/*.xml\"))\n\n        if not slide_masters:\n            if self.verbose:\n                print(\"PASSED - No slide masters found\")\n            return True\n\n        for slide_master in slide_masters:\n            try:\n                root = lxml.etree.parse(str(slide_master)).getroot()\n\n                rels_file = slide_master.parent / \"_rels\" / f\"{slide_master.name}.rels\"\n\n                if not rels_file.exists():\n                    errors.append(\n                        f\"  {slide_master.relative_to(self.unpacked_dir)}: \"\n                        f\"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}\"\n                    )\n                    continue\n\n                rels_root = lxml.etree.parse(str(rels_file)).getroot()\n\n                valid_layout_rids = set()\n                for rel in rels_root.findall(\n                    f\".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship\"\n                ):\n                    rel_type = rel.get(\"Type\", \"\")\n                    if \"slideLayout\" in rel_type:\n                        valid_layout_rids.add(rel.get(\"Id\"))\n\n                for sld_layout_id in root.findall(\n                    f\".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId\"\n                ):\n                    r_id = sld_layout_id.get(\n                        f\"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id\"\n                    )\n                    layout_id = sld_layout_id.get(\"id\")\n\n                    if r_id and r_id not in valid_layout_rids:\n                        errors.append(\n                            f\"  {slide_master.relative_to(self.unpacked_dir)}: \"\n                            f\"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' \"\n                            f\"references r:id='{r_id}' which is not found in slide layout relationships\"\n                        )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {slide_master.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(f\"FAILED - Found {len(errors)} slide layout ID validation errors:\")\n            for error in errors:\n                print(error)\n            print(\n                \"Remove invalid references or add missing slide layouts to the relationships file.\"\n            )\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All slide layout IDs reference valid slide layouts\")\n            return True\n\n    def validate_no_duplicate_slide_layouts(self):\n        import lxml.etree\n\n        errors = []\n        slide_rels_files = list(self.unpacked_dir.glob(\"ppt/slides/_rels/*.xml.rels\"))\n\n        for rels_file in slide_rels_files:\n            try:\n                root = lxml.etree.parse(str(rels_file)).getroot()\n\n                layout_rels = [\n                    rel\n                    for rel in root.findall(\n                        f\".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship\"\n                    )\n                    if \"slideLayout\" in rel.get(\"Type\", \"\")\n                ]\n\n                if len(layout_rels) > 1:\n                    errors.append(\n                        f\"  {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references\"\n                    )\n\n            except Exception as e:\n                errors.append(\n                    f\"  {rels_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        if errors:\n            print(\"FAILED - Found slides with duplicate slideLayout references:\")\n            for error in errors:\n                print(error)\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All slides have exactly one slideLayout reference\")\n            return True\n\n    def validate_notes_slide_references(self):\n        import lxml.etree\n\n        errors = []\n        notes_slide_references = {}\n\n        slide_rels_files = list(self.unpacked_dir.glob(\"ppt/slides/_rels/*.xml.rels\"))\n\n        if not slide_rels_files:\n            if self.verbose:\n                print(\"PASSED - No slide relationship files found\")\n            return True\n\n        for rels_file in slide_rels_files:\n            try:\n                root = lxml.etree.parse(str(rels_file)).getroot()\n\n                for rel in root.findall(\n                    f\".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship\"\n                ):\n                    rel_type = rel.get(\"Type\", \"\")\n                    if \"notesSlide\" in rel_type:\n                        target = rel.get(\"Target\", \"\")\n                        if target:\n                            normalized_target = target.replace(\"../\", \"\")\n\n                            slide_name = rels_file.stem.replace(\".xml\", \"\")\n\n                            if normalized_target not in notes_slide_references:\n                                notes_slide_references[normalized_target] = []\n                            notes_slide_references[normalized_target].append(\n                                (slide_name, rels_file)\n                            )\n\n            except (lxml.etree.XMLSyntaxError, Exception) as e:\n                errors.append(\n                    f\"  {rels_file.relative_to(self.unpacked_dir)}: Error: {e}\"\n                )\n\n        for target, references in notes_slide_references.items():\n            if len(references) > 1:\n                slide_names = [ref[0] for ref in references]\n                errors.append(\n                    f\"  Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}\"\n                )\n                for slide_name, rels_file in references:\n                    errors.append(f\"    - {rels_file.relative_to(self.unpacked_dir)}\")\n\n        if errors:\n            print(\n                f\"FAILED - Found {len([e for e in errors if not e.startswith('    ')])} notes slide reference validation errors:\"\n            )\n            for error in errors:\n                print(error)\n            print(\"Each slide may optionally have its own slide file.\")\n            return False\n        else:\n            if self.verbose:\n                print(\"PASSED - All notes slide references are unique\")\n            return True\n\n\nif __name__ == \"__main__\":\n    raise RuntimeError(\"This module should not be run directly.\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/office/validators/redlining.py",
    "content": "\"\"\"\nValidator for tracked changes in Word documents.\n\"\"\"\n\nimport subprocess\nimport tempfile\nimport zipfile\nfrom pathlib import Path\n\n\nclass RedliningValidator:\n    def __init__(self, unpacked_dir, original_docx, verbose=False, author=\"Claude\"):\n        self.unpacked_dir = Path(unpacked_dir)\n        self.original_docx = Path(original_docx)\n        self.verbose = verbose\n        self.author = author\n        self.namespaces = {\n            \"w\": \"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"\n        }\n\n    def repair(self) -> int:\n        return 0\n\n    def validate(self):\n        modified_file = self.unpacked_dir / \"word\" / \"document.xml\"\n        if not modified_file.exists():\n            print(f\"FAILED - Modified document.xml not found at {modified_file}\")\n            return False\n\n        try:\n            import xml.etree.ElementTree as ET\n\n            tree = ET.parse(modified_file)\n            root = tree.getroot()\n\n            del_elements = root.findall(\".//w:del\", self.namespaces)\n            ins_elements = root.findall(\".//w:ins\", self.namespaces)\n\n            author_del_elements = [\n                elem\n                for elem in del_elements\n                if elem.get(f\"{{{self.namespaces['w']}}}author\") == self.author\n            ]\n            author_ins_elements = [\n                elem\n                for elem in ins_elements\n                if elem.get(f\"{{{self.namespaces['w']}}}author\") == self.author\n            ]\n\n            if not author_del_elements and not author_ins_elements:\n                if self.verbose:\n                    print(f\"PASSED - No tracked changes by {self.author} found.\")\n                return True\n\n        except Exception:\n            pass\n\n        with tempfile.TemporaryDirectory() as temp_dir:\n            temp_path = Path(temp_dir)\n\n            try:\n                with zipfile.ZipFile(self.original_docx, \"r\") as zip_ref:\n                    zip_ref.extractall(temp_path)\n            except Exception as e:\n                print(f\"FAILED - Error unpacking original docx: {e}\")\n                return False\n\n            original_file = temp_path / \"word\" / \"document.xml\"\n            if not original_file.exists():\n                print(\n                    f\"FAILED - Original document.xml not found in {self.original_docx}\"\n                )\n                return False\n\n            try:\n                import xml.etree.ElementTree as ET\n\n                modified_tree = ET.parse(modified_file)\n                modified_root = modified_tree.getroot()\n                original_tree = ET.parse(original_file)\n                original_root = original_tree.getroot()\n            except ET.ParseError as e:\n                print(f\"FAILED - Error parsing XML files: {e}\")\n                return False\n\n            self._remove_author_tracked_changes(original_root)\n            self._remove_author_tracked_changes(modified_root)\n\n            modified_text = self._extract_text_content(modified_root)\n            original_text = self._extract_text_content(original_root)\n\n            if modified_text != original_text:\n                error_message = self._generate_detailed_diff(\n                    original_text, modified_text\n                )\n                print(error_message)\n                return False\n\n            if self.verbose:\n                print(f\"PASSED - All changes by {self.author} are properly tracked\")\n            return True\n\n    def _generate_detailed_diff(self, original_text, modified_text):\n        error_parts = [\n            f\"FAILED - Document text doesn't match after removing {self.author}'s tracked changes\",\n            \"\",\n            \"Likely causes:\",\n            \"  1. Modified text inside another author's <w:ins> or <w:del> tags\",\n            \"  2. Made edits without proper tracked changes\",\n            \"  3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion\",\n            \"\",\n            \"For pre-redlined documents, use correct patterns:\",\n            \"  - To reject another's INSERTION: Nest <w:del> inside their <w:ins>\",\n            \"  - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>\",\n            \"\",\n        ]\n\n        git_diff = self._get_git_word_diff(original_text, modified_text)\n        if git_diff:\n            error_parts.extend([\"Differences:\", \"============\", git_diff])\n        else:\n            error_parts.append(\"Unable to generate word diff (git not available)\")\n\n        return \"\\n\".join(error_parts)\n\n    def _get_git_word_diff(self, original_text, modified_text):\n        try:\n            with tempfile.TemporaryDirectory() as temp_dir:\n                temp_path = Path(temp_dir)\n\n                original_file = temp_path / \"original.txt\"\n                modified_file = temp_path / \"modified.txt\"\n\n                original_file.write_text(original_text, encoding=\"utf-8\")\n                modified_file.write_text(modified_text, encoding=\"utf-8\")\n\n                result = subprocess.run(\n                    [\n                        \"git\",\n                        \"diff\",\n                        \"--word-diff=plain\",\n                        \"--word-diff-regex=.\",\n                        \"-U0\",\n                        \"--no-index\",\n                        str(original_file),\n                        str(modified_file),\n                    ],\n                    capture_output=True,\n                    text=True,\n                )\n\n                if result.stdout.strip():\n                    lines = result.stdout.split(\"\\n\")\n                    content_lines = []\n                    in_content = False\n                    for line in lines:\n                        if line.startswith(\"@@\"):\n                            in_content = True\n                            continue\n                        if in_content and line.strip():\n                            content_lines.append(line)\n\n                    if content_lines:\n                        return \"\\n\".join(content_lines)\n\n                result = subprocess.run(\n                    [\n                        \"git\",\n                        \"diff\",\n                        \"--word-diff=plain\",\n                        \"-U0\",\n                        \"--no-index\",\n                        str(original_file),\n                        str(modified_file),\n                    ],\n                    capture_output=True,\n                    text=True,\n                )\n\n                if result.stdout.strip():\n                    lines = result.stdout.split(\"\\n\")\n                    content_lines = []\n                    in_content = False\n                    for line in lines:\n                        if line.startswith(\"@@\"):\n                            in_content = True\n                            continue\n                        if in_content and line.strip():\n                            content_lines.append(line)\n                    return \"\\n\".join(content_lines)\n\n        except (subprocess.CalledProcessError, FileNotFoundError, Exception):\n            pass\n\n        return None\n\n    def _remove_author_tracked_changes(self, root):\n        ins_tag = f\"{{{self.namespaces['w']}}}ins\"\n        del_tag = f\"{{{self.namespaces['w']}}}del\"\n        author_attr = f\"{{{self.namespaces['w']}}}author\"\n\n        for parent in root.iter():\n            to_remove = []\n            for child in parent:\n                if child.tag == ins_tag and child.get(author_attr) == self.author:\n                    to_remove.append(child)\n            for elem in to_remove:\n                parent.remove(elem)\n\n        deltext_tag = f\"{{{self.namespaces['w']}}}delText\"\n        t_tag = f\"{{{self.namespaces['w']}}}t\"\n\n        for parent in root.iter():\n            to_process = []\n            for child in parent:\n                if child.tag == del_tag and child.get(author_attr) == self.author:\n                    to_process.append((child, list(parent).index(child)))\n\n            for del_elem, del_index in reversed(to_process):\n                for elem in del_elem.iter():\n                    if elem.tag == deltext_tag:\n                        elem.tag = t_tag\n\n                for child in reversed(list(del_elem)):\n                    parent.insert(del_index, child)\n                parent.remove(del_elem)\n\n    def _extract_text_content(self, root):\n        p_tag = f\"{{{self.namespaces['w']}}}p\"\n        t_tag = f\"{{{self.namespaces['w']}}}t\"\n\n        paragraphs = []\n        for p_elem in root.findall(f\".//{p_tag}\"):\n            text_parts = []\n            for t_elem in p_elem.findall(f\".//{t_tag}\"):\n                if t_elem.text:\n                    text_parts.append(t_elem.text)\n            paragraph_text = \"\".join(text_parts)\n            if paragraph_text:\n                paragraphs.append(paragraph_text)\n\n        return \"\\n\".join(paragraphs)\n\n\nif __name__ == \"__main__\":\n    raise RuntimeError(\"This module should not be run directly.\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/preview.py",
    "content": "\"\"\"Generate slide preview images from a PowerPoint file.\n\nConverts PPTX -> PDF -> JPEG slides with caching. If cached slides\nalready exist and are up-to-date, returns them without reconverting.\n\nOutput protocol (stdout):\n    Line 1: status — one of CACHED, GENERATED, ERROR_NOT_FOUND, ERROR_NO_PDF\n    Lines 2+: sorted absolute paths to slide-*.jpg files\n\nUsage:\n    python preview.py /path/to/file.pptx /path/to/cache_dir\n\"\"\"\n\nimport os\nimport subprocess\nimport sys\nfrom pathlib import Path\n\n# Allow importing office.soffice from the scripts directory\nsys.path.insert(0, str(Path(__file__).resolve().parent))\n\nfrom office.soffice import run_soffice\n\nCONVERSION_DPI = 150\n\n\ndef _find_slides(directory: Path) -> list[str]:\n    \"\"\"Find slide-*.jpg files in directory, sorted by page number.\"\"\"\n    slides = list(directory.glob(\"slide-*.jpg\"))\n    slides.sort(key=lambda p: int(p.stem.split(\"-\")[-1]))\n    return [str(s) for s in slides]\n\n\ndef main() -> None:\n    if len(sys.argv) != 3:\n        print(f\"Usage: {sys.argv[0]} <pptx_path> <cache_dir>\", file=sys.stderr)\n        sys.exit(1)\n\n    pptx_path = Path(sys.argv[1])\n    cache_dir = Path(sys.argv[2])\n\n    if not pptx_path.is_file():\n        print(\"ERROR_NOT_FOUND\")\n        return\n\n    # Check cache: if slides exist and are at least as new as the PPTX, reuse them\n    cached_slides = _find_slides(cache_dir)\n    if cached_slides:\n        pptx_mtime = os.path.getmtime(pptx_path)\n        oldest_slide_mtime = min(os.path.getmtime(s) for s in cached_slides)\n        if oldest_slide_mtime >= pptx_mtime:\n            print(\"CACHED\")\n            for slide in cached_slides:\n                print(slide)\n            return\n        # Stale cache — remove old slides\n        for slide in cached_slides:\n            os.remove(slide)\n\n    cache_dir.mkdir(parents=True, exist_ok=True)\n\n    # Convert PPTX -> PDF via LibreOffice\n    result = run_soffice(\n        [\n            \"--headless\",\n            \"--convert-to\",\n            \"pdf\",\n            \"--outdir\",\n            str(cache_dir),\n            str(pptx_path),\n        ],\n        capture_output=True,\n        text=True,\n    )\n    if result.returncode != 0:\n        print(\"CONVERSION_ERROR\", file=sys.stderr)\n        sys.exit(1)\n\n    # Find the generated PDF\n    pdfs = sorted(cache_dir.glob(\"*.pdf\"))\n    if not pdfs:\n        print(\"ERROR_NO_PDF\")\n        return\n\n    pdf_file = pdfs[0]\n\n    # Convert PDF -> JPEG slides\n    result = subprocess.run(\n        [\n            \"pdftoppm\",\n            \"-jpeg\",\n            \"-r\",\n            str(CONVERSION_DPI),\n            str(pdf_file),\n            str(cache_dir / \"slide\"),\n        ],\n        capture_output=True,\n        text=True,\n    )\n    if result.returncode != 0:\n        print(\"CONVERSION_ERROR\", file=sys.stderr)\n        sys.exit(1)\n\n    # Clean up PDF\n    pdf_file.unlink(missing_ok=True)\n\n    slides = _find_slides(cache_dir)\n    print(\"GENERATED\")\n    for slide in slides:\n        print(slide)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/scripts/thumbnail.py",
    "content": "\"\"\"Create thumbnail grids from PowerPoint presentation slides.\n\nCreates a grid layout of slide thumbnails for quick visual analysis.\nLabels each thumbnail with its XML filename (e.g., slide1.xml).\nHidden slides are shown with a placeholder pattern.\n\nUsage:\n    python thumbnail.py input.pptx [output_prefix] [--cols N]\n\nExamples:\n    python thumbnail.py presentation.pptx\n    # Creates: thumbnails.jpg\n\n    python thumbnail.py template.pptx grid --cols 4\n    # Creates: grid.jpg (or grid-1.jpg, grid-2.jpg for large decks)\n\"\"\"\n\nimport argparse\nimport subprocess\nimport sys\nimport tempfile\nimport zipfile\nfrom pathlib import Path\n\nimport defusedxml.minidom\nfrom office.soffice import get_soffice_env\nfrom PIL import Image\nfrom PIL import ImageDraw\nfrom PIL import ImageFont\n\nTHUMBNAIL_WIDTH = 300\nCONVERSION_DPI = 100\nMAX_COLS = 6\nDEFAULT_COLS = 3\nJPEG_QUALITY = 95\nGRID_PADDING = 20\nBORDER_WIDTH = 2\nFONT_SIZE_RATIO = 0.10\nLABEL_PADDING_RATIO = 0.4\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        description=\"Create thumbnail grids from PowerPoint slides.\"\n    )\n    parser.add_argument(\"input\", help=\"Input PowerPoint file (.pptx)\")\n    parser.add_argument(\n        \"output_prefix\",\n        nargs=\"?\",\n        default=\"thumbnails\",\n        help=\"Output prefix for image files (default: thumbnails)\",\n    )\n    parser.add_argument(\n        \"--cols\",\n        type=int,\n        default=DEFAULT_COLS,\n        help=f\"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})\",\n    )\n\n    args = parser.parse_args()\n\n    cols = min(args.cols, MAX_COLS)\n    if args.cols > MAX_COLS:\n        print(f\"Warning: Columns limited to {MAX_COLS}\")\n\n    input_path = Path(args.input)\n    if not input_path.exists() or input_path.suffix.lower() != \".pptx\":\n        print(f\"Error: Invalid PowerPoint file: {args.input}\", file=sys.stderr)\n        sys.exit(1)\n\n    output_path = Path(f\"{args.output_prefix}.jpg\")\n\n    try:\n        slide_info = get_slide_info(input_path)\n\n        with tempfile.TemporaryDirectory() as temp_dir:\n            temp_path = Path(temp_dir)\n            visible_images = convert_to_images(input_path, temp_path)\n\n            if not visible_images and not any(s[\"hidden\"] for s in slide_info):\n                print(\"Error: No slides found\", file=sys.stderr)\n                sys.exit(1)\n\n            slides = build_slide_list(slide_info, visible_images, temp_path)\n\n            grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)\n\n            print(f\"Created {len(grid_files)} grid(s):\")\n            for grid_file in grid_files:\n                print(f\"  {grid_file}\")\n\n    except Exception as e:\n        print(f\"Error: {e}\", file=sys.stderr)\n        sys.exit(1)\n\n\ndef get_slide_info(pptx_path: Path) -> list[dict]:\n    with zipfile.ZipFile(pptx_path, \"r\") as zf:\n        rels_content = zf.read(\"ppt/_rels/presentation.xml.rels\").decode(\"utf-8\")\n        rels_dom = defusedxml.minidom.parseString(rels_content)\n\n        rid_to_slide = {}\n        for rel in rels_dom.getElementsByTagName(\"Relationship\"):\n            rid = rel.getAttribute(\"Id\")\n            target = rel.getAttribute(\"Target\")\n            rel_type = rel.getAttribute(\"Type\")\n            if \"slide\" in rel_type and target.startswith(\"slides/\"):\n                rid_to_slide[rid] = target.replace(\"slides/\", \"\")\n\n        pres_content = zf.read(\"ppt/presentation.xml\").decode(\"utf-8\")\n        pres_dom = defusedxml.minidom.parseString(pres_content)\n\n        slides = []\n        for sld_id in pres_dom.getElementsByTagName(\"p:sldId\"):\n            rid = sld_id.getAttribute(\"r:id\")\n            if rid in rid_to_slide:\n                hidden = sld_id.getAttribute(\"show\") == \"0\"\n                slides.append({\"name\": rid_to_slide[rid], \"hidden\": hidden})\n\n        return slides\n\n\ndef build_slide_list(\n    slide_info: list[dict],\n    visible_images: list[Path],\n    temp_dir: Path,\n) -> list[tuple[Path, str]]:\n    if visible_images:\n        with Image.open(visible_images[0]) as img:\n            placeholder_size = img.size\n    else:\n        placeholder_size = (1920, 1080)\n\n    slides = []\n    visible_idx = 0\n\n    for info in slide_info:\n        if info[\"hidden\"]:\n            placeholder_path = temp_dir / f\"hidden-{info['name']}.jpg\"\n            placeholder_img = create_hidden_placeholder(placeholder_size)\n            placeholder_img.save(placeholder_path, \"JPEG\")\n            slides.append((placeholder_path, f\"{info['name']} (hidden)\"))\n        else:\n            if visible_idx < len(visible_images):\n                slides.append((visible_images[visible_idx], info[\"name\"]))\n                visible_idx += 1\n\n    return slides\n\n\ndef create_hidden_placeholder(size: tuple[int, int]) -> Image.Image:\n    img = Image.new(\"RGB\", size, color=\"#F0F0F0\")\n    draw = ImageDraw.Draw(img)\n    line_width = max(5, min(size) // 100)\n    draw.line([(0, 0), size], fill=\"#CCCCCC\", width=line_width)\n    draw.line([(size[0], 0), (0, size[1])], fill=\"#CCCCCC\", width=line_width)\n    return img\n\n\ndef convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]:\n    pdf_path = temp_dir / f\"{pptx_path.stem}.pdf\"\n\n    result = subprocess.run(\n        [\n            \"soffice\",\n            \"--headless\",\n            \"--convert-to\",\n            \"pdf\",\n            \"--outdir\",\n            str(temp_dir),\n            str(pptx_path),\n        ],\n        capture_output=True,\n        text=True,\n        env=get_soffice_env(),\n    )\n    if result.returncode != 0 or not pdf_path.exists():\n        raise RuntimeError(\"PDF conversion failed\")\n\n    result = subprocess.run(\n        [\n            \"pdftoppm\",\n            \"-jpeg\",\n            \"-r\",\n            str(CONVERSION_DPI),\n            str(pdf_path),\n            str(temp_dir / \"slide\"),\n        ],\n        capture_output=True,\n        text=True,\n    )\n    if result.returncode != 0:\n        raise RuntimeError(\"Image conversion failed\")\n\n    return sorted(temp_dir.glob(\"slide-*.jpg\"))\n\n\ndef create_grids(\n    slides: list[tuple[Path, str]],\n    cols: int,\n    width: int,\n    output_path: Path,\n) -> list[str]:\n    max_per_grid = cols * (cols + 1)\n    grid_files = []\n\n    for chunk_idx, start_idx in enumerate(range(0, len(slides), max_per_grid)):\n        end_idx = min(start_idx + max_per_grid, len(slides))\n        chunk_slides = slides[start_idx:end_idx]\n\n        grid = create_grid(chunk_slides, cols, width)\n\n        if len(slides) <= max_per_grid:\n            grid_filename = output_path\n        else:\n            stem = output_path.stem\n            suffix = output_path.suffix\n            grid_filename = output_path.parent / f\"{stem}-{chunk_idx + 1}{suffix}\"\n\n        grid_filename.parent.mkdir(parents=True, exist_ok=True)\n        grid.save(str(grid_filename), quality=JPEG_QUALITY)\n        grid_files.append(str(grid_filename))\n\n    return grid_files\n\n\ndef create_grid(\n    slides: list[tuple[Path, str]],\n    cols: int,\n    width: int,\n) -> Image.Image:\n    font_size = int(width * FONT_SIZE_RATIO)\n    label_padding = int(font_size * LABEL_PADDING_RATIO)\n\n    with Image.open(slides[0][0]) as img:\n        aspect = img.height / img.width\n    height = int(width * aspect)\n\n    rows = (len(slides) + cols - 1) // cols\n    grid_w = cols * width + (cols + 1) * GRID_PADDING\n    grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING\n\n    grid = Image.new(\"RGB\", (grid_w, grid_h), \"white\")\n    draw = ImageDraw.Draw(grid)\n\n    try:\n        font = ImageFont.load_default(size=font_size)\n    except Exception:\n        font = ImageFont.load_default()\n\n    for i, (img_path, slide_name) in enumerate(slides):\n        row, col = i // cols, i % cols\n        x = col * width + (col + 1) * GRID_PADDING\n        y_base = (\n            row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING\n        )\n\n        label = slide_name\n        bbox = draw.textbbox((0, 0), label, font=font)\n        text_w = bbox[2] - bbox[0]\n        draw.text(\n            (x + (width - text_w) // 2, y_base + label_padding),\n            label,\n            fill=\"black\",\n            font=font,\n        )\n\n        y_thumbnail = y_base + label_padding + font_size + label_padding\n\n        with Image.open(img_path) as img:\n            img.thumbnail((width, height), Image.Resampling.LANCZOS)\n            w, h = img.size\n            tx = x + (width - w) // 2\n            ty = y_thumbnail + (height - h) // 2\n            grid.paste(img, (tx, ty))\n\n            if BORDER_WIDTH > 0:\n                draw.rectangle(\n                    [\n                        (tx - BORDER_WIDTH, ty - BORDER_WIDTH),\n                        (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),\n                    ],\n                    outline=\"gray\",\n                    width=BORDER_WIDTH,\n                )\n\n    return grid\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/.gitignore",
    "content": "# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.\n\n# dependencies\n/node_modules\n/.pnp\n.pnp.*\n.yarn/*\n!.yarn/patches\n!.yarn/plugins\n!.yarn/releases\n!.yarn/versions\n\n# testing\n/coverage\n\n# next.js\n/.next/\n/out/\n\n# production\n/build\n\n# misc\n.DS_Store\n*.pem\n\n# debug\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\n.pnpm-debug.log*\n\n# env files (can opt-in for committing if needed)\n.env*\n\n# vercel\n.vercel\n\n# typescript\n*.tsbuildinfo\nnext-env.d.ts\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/AGENTS.md",
    "content": "# AGENTS.md\n\nThis file provides guidance to AI agents when working on the web application within this directory.\n\n## Important Notes\n\n- **The development server is already running** at a dynamically allocated port. Do NOT run `npm run dev` yourself.\n- **We do NOT use a `src` directory** - all code lives directly in the root folders (`app/`, `components/`, `lib/`, etc.)\n- If the app needs pre-computation (data processing, API calls, etc.), create a bash or python script called `prepare.sh`/`prepare.py` at the root of this directory\n- **CRITICAL: Create small, modular components** - Do NOT write everything in `page.tsx`. Break your UI into small, reusable components in the `components/` directory. Each component should have a single responsibility and be in its own file.\n\n## Data Preparation Scripts\n\n**CRITICAL: Always re-run data scripts after modifying them.**\n\nIf a `prepare.sh` or `prepare.py` script exists at the root of this directory, it is responsible for generating/loading data that the frontend consumes. \n\n### When to Run the Script\n\nYou MUST run the data preparation script:\n1. **After creating** the script for the first time\n2. **After modifying** the script logic (new data sources, changed processing, etc.)\n3. **After updating** any data files the script reads from\n4. **Before testing** the frontend if you're unsure if data is fresh\n\n### How to Run\n\n```bash\n# For bash scripts\nbash prepare.sh\n\n# For python scripts\npython prepare.py\n```\n\n### Common Mistake\n\n❌ **Updating the script but forgetting to run it** - This leaves stale data in place and the frontend won't reflect your changes. Always run the script immediately after modifying it.\n\n## Commands\n\n```bash\nnpm run dev      # Start development server (DO NOT RUN - already running)\nnpm run lint     # Run ESLint\n```\n\n## Architecture\n\nThis is a **Next.js 16.1.1** application using the **App Router** with **React 19** and **TypeScript**. It serves as a component showcase/template built on shadcn/ui.\n\n### File Organization Philosophy\n\n**Prioritize small, incremental file writes.** Break your application into many small components rather than monolithic page files.\n\n#### Component Organization\n\n```\ncomponents/\n├── dashboard/           # Feature-specific components\n│   ├── stats-card.tsx\n│   ├── activity-feed.tsx\n│   └── recent-items.tsx\n├── charts/             # Chart components\n│   ├── line-chart.tsx\n│   ├── bar-chart.tsx\n│   └── pie-chart.tsx\n├── data/               # Data display components\n│   ├── data-table.tsx\n│   ├── filter-bar.tsx\n│   └── sort-controls.tsx\n└── layout/             # Layout components\n    ├── header.tsx\n    ├── sidebar.tsx\n    └── footer.tsx\n```\n\n#### Page Structure\n\nPages (`app/page.tsx`) should be **thin orchestration layers** that compose components:\n\n```typescript\n// ✅ GOOD - page.tsx is just composition\nimport { StatsCard } from \"@/components/dashboard/stats-card\";\nimport { ActivityFeed } from \"@/components/dashboard/activity-feed\";\nimport { RecentItems } from \"@/components/dashboard/recent-items\";\n\nexport default function DashboardPage() {\n  return (\n    <div className=\"container py-6 space-y-6\">\n      <h1 className=\"text-3xl font-bold\">Dashboard</h1>\n      <div className=\"grid grid-cols-1 md:grid-cols-3 gap-4\">\n        <StatsCard title=\"Total Users\" value={1234} />\n        <StatsCard title=\"Active Sessions\" value={56} />\n        <StatsCard title=\"Revenue\" value=\"$12,345\" />\n      </div>\n      <div className=\"grid grid-cols-1 lg:grid-cols-2 gap-6\">\n        <ActivityFeed />\n        <RecentItems />\n      </div>\n    </div>\n  );\n}\n\n// ❌ BAD - Everything in page.tsx (500+ lines of mixed logic)\nexport default function DashboardPage() {\n  // ... 500 lines of component logic, state, handlers, JSX ...\n}\n```\n\n#### Component Granularity\n\nCreate a new component file when:\n- A UI section has distinct functionality (e.g., `user-profile-card.tsx`)\n- Logic exceeds ~50-100 lines\n- A pattern is reused 2+ times\n- Testing/maintenance would benefit from isolation\n\n**Example: Dashboard Feature**\n\nInstead of writing everything in `app/page.tsx`:\n\n```typescript\n// components/dashboard/stats-card.tsx\nexport function StatsCard({ title, value, trend }: StatsCardProps) {\n  return (\n    <Card>\n      <CardHeader>\n        <CardTitle className=\"text-sm font-medium\">{title}</CardTitle>\n      </CardHeader>\n      <CardContent>\n        <div className=\"text-2xl font-bold\">{value}</div>\n        {trend && <p className=\"text-xs text-muted-foreground\">{trend}</p>}\n      </CardContent>\n    </Card>\n  );\n}\n\n// components/dashboard/activity-feed.tsx\nexport function ActivityFeed() {\n  // Activity feed logic here\n}\n\n// components/dashboard/recent-items.tsx\nexport function RecentItems() {\n  // Recent items logic here\n}\n```\n\n#### Benefits of Small Components\n\n1. **Incremental Development**: Write one component at a time, test, iterate\n2. **Better Diffs**: Smaller files = clearer git diffs and easier reviews\n3. **Reusability**: Components can be imported across pages\n4. **Maintainability**: Easier to locate and fix issues\n5. **Hot Reload Efficiency**: Changes to small files reload faster\n6. **Parallel Development**: Multiple features can be worked on independently\n\n### Tech Stack\n\n- **Framework**: Next.js 16.1.1 with App Router\n- **React**: React 19\n- **Language**: TypeScript\n- **Styling**: Tailwind CSS v4 with CSS variables in OKLCH color space\n- **Charts**: recharts for data visualization\n- **UI Components**: shadcn/ui (53 components) built on Radix UI primitives\n- **Variants**: class-variance-authority (CVA) for component variants\n- **Class Merging**: `cn()` utility in `lib/utils.ts` (clsx + tailwind-merge)\n- **Theme**: Dark mode enforced (via `dark` class on `<html>`)\n\n### Key Directories\n\n- `app/` - Next.js App Router pages and layouts\n- `components/ui/` - shadcn/ui component library (Button, Card, Dialog, etc.)\n- `components/` - App-specific components\n- `hooks/` - Custom React hooks (e.g., `use-mobile.ts`)\n- `lib/` - Utilities (`cn()` function)\n\n### Component Patterns\n\n- **Compound Components**: Components like `DropdownMenu`, `Dialog`, `Select` export multiple sub-components (Trigger, Content, Item)\n- **Variants via CVA**: Use `variants` prop for size/style variations (e.g., `buttonVariants`)\n- **Radix UI Primitives**: UI components wrap Radix for accessibility\n\n### Path Aliases\n\nAll imports use `@/` alias (e.g., `@/components/ui/button`, `@/lib/utils`)\n\n### shadcn/ui Configuration\n\nLocated in `components.json`:\n\n- Style: `radix-nova`\n- RSC enabled\n- Icons: lucide-react\n\n### Theme Variables\n\nGlobal CSS variables defined in `app/globals.css` control colors, radius, and spacing. **Dark mode is enforced site-wide** via the `dark` class on the `<html>` element in `app/layout.tsx`. All styling should assume dark mode is active.\n\n### Dark Mode Priority\n\n- **Dark mode is the default and only theme** - do not design for light mode\n- The `dark` class is permanently set on `<html>` in `layout.tsx`\n- Use dark-appropriate colors: `bg-background`, `text-foreground`, etc.\n- Ensure sufficient contrast for dark backgrounds\n- Test all components in dark mode only\n\n## Styling Guidelines\n\n### CRITICAL: Use Only shadcn/ui Components\n\n**MINIMIZE freestyling and creating custom components.** This application uses a complete, professionally designed component library (shadcn/ui). You MUST use the existing components from `components/ui/` for most UI needs.\n\n#### Available shadcn/ui Components\n\nAll components are in `components/ui/`. Import using `@/components/ui/component-name`.\n\n**Layout & Structure:**\n\n- `Card` (`card.tsx`) - Content containers with CardHeader, CardTitle, CardDescription, CardContent, CardFooter\n- `Separator` (`separator.tsx`) - Horizontal/vertical dividers\n- `Tabs` (`tabs.tsx`) - Tabbed interfaces with Tabs, TabsList, TabsTrigger, TabsContent\n- `ScrollArea` (`scroll-area.tsx`) - Styled scrollable regions\n- `Resizable` (`resizable.tsx`) - Resizable panel layouts\n- `Drawer` (`drawer.tsx`) - Bottom/side drawer overlays\n- `Sidebar` (`sidebar.tsx`) - Application sidebar layout\n- `AspectRatio` (`aspect-ratio.tsx`) - Maintain aspect ratios\n\n**Forms & Inputs:**\n\n- `Button` (`button.tsx`) - Primary, secondary, destructive, outline, ghost, link variants\n- `ButtonGroup` (`button-group.tsx`) - Group of related buttons\n- `Input` (`input.tsx`) - Text inputs with various states\n- `InputGroup` (`input-group.tsx`) - Input with addons/icons\n- `Textarea` (`textarea.tsx`) - Multi-line text input\n- `Checkbox` (`checkbox.tsx`) - Checkboxes with indeterminate state\n- `RadioGroup` (`radio-group.tsx`) - Radio button groups\n- `Switch` (`switch.tsx`) - Toggle switches\n- `Select` (`select.tsx`) - Dropdown select menus\n- `NativeSelect` (`native-select.tsx`) - Native HTML select\n- `Combobox` (`combobox.tsx`) - Autocomplete select with search\n- `Command` (`command.tsx`) - Command palette/search interface\n- `Field` (`field.tsx`) - Form field wrapper with label and error\n- `Label` (`label.tsx`) - Form labels with proper accessibility\n- `Slider` (`slider.tsx`) - Range sliders\n- `Calendar` (`calendar.tsx`) - Date picker calendar\n- `Toggle` (`toggle.tsx`) - Toggle button\n- `ToggleGroup` (`toggle-group.tsx`) - Group of toggle buttons\n\n**Navigation:**\n\n- `NavigationMenu` (`navigation-menu.tsx`) - Complex navigation menus\n- `Menubar` (`menubar.tsx`) - Application menu bar\n- `Breadcrumb` (`breadcrumb.tsx`) - Breadcrumb navigation\n- `Pagination` (`pagination.tsx`) - Page navigation controls\n\n**Feedback & Overlays:**\n\n- `Dialog` (`dialog.tsx`) - Modal dialogs\n- `AlertDialog` (`alert-dialog.tsx`) - Confirmation dialogs\n- `Sheet` (`sheet.tsx`) - Side sheets/panels\n- `Popover` (`popover.tsx`) - Floating popovers\n- `HoverCard` (`hover-card.tsx`) - Hover-triggered cards\n- `Tooltip` (`tooltip.tsx`) - Tooltips on hover\n- `Sonner` (`sonner.tsx`) - Toast notifications\n- `Alert` (`alert.tsx`) - Static alert messages\n- `Progress` (`progress.tsx`) - Progress bars\n- `Skeleton` (`skeleton.tsx`) - Loading skeletons\n- `Spinner` (`spinner.tsx`) - Loading spinners\n- `Empty` (`empty.tsx`) - Empty state placeholder\n\n**Menus & Dropdowns:**\n\n- `DropdownMenu` (`dropdown-menu.tsx`) - Dropdown menus with submenus\n- `ContextMenu` (`context-menu.tsx`) - Right-click context menus\n\n**Data Display:**\n\n- `Table` (`table.tsx`) - Data tables with Table, TableHeader, TableBody, TableRow, TableCell, etc.\n- `Badge` (`badge.tsx`) - Status badges and tags\n- `Avatar` (`avatar.tsx`) - User avatars with fallbacks\n- `Accordion` (`accordion.tsx`) - Collapsible content sections\n- `Collapsible` (`collapsible.tsx`) - Simple collapse/expand\n- `Carousel` (`carousel.tsx`) - Image/content carousels\n- `Item` (`item.tsx`) - List item component\n- `Kbd` (`kbd.tsx`) - Keyboard shortcut display\n\n**Data Visualization:**\n\n- `Chart` (`chart.tsx`) - Chart wrapper with ChartContainer, ChartTooltip, ChartTooltipContent, ChartLegend, ChartLegendContent\n\n### Component Usage Principles\n\n#### 1. **Never Create Custom Components**\n\n```typescript\n// ❌ WRONG - Do not create freestyle components\nfunction CustomCard({ title, children }) {\n  return (\n    <div className=\"rounded-lg border p-4\">\n      <h3 className=\"font-bold\">{title}</h3>\n      {children}\n    </div>\n  );\n}\n\n// ✅ CORRECT - Use shadcn Card\nimport { Card, CardHeader, CardTitle, CardContent } from \"@/components/ui/card\";\n\nfunction MyComponent() {\n  return (\n    <Card>\n      <CardHeader>\n        <CardTitle>Title</CardTitle>\n      </CardHeader>\n      <CardContent>Content here</CardContent>\n    </Card>\n  );\n}\n```\n\n#### 2. **Use Component Variants, Don't Style Directly**\n\n```typescript\n// ❌ WRONG - Applying custom Tailwind classes\n<button className=\"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded\">\n  Click me\n</button>\n\n// ✅ CORRECT - Use Button variants\nimport { Button } from \"@/components/ui/button\";\n\n<Button variant=\"default\">Click me</Button>\n<Button variant=\"destructive\">Delete</Button>\n<Button variant=\"outline\">Cancel</Button>\n<Button variant=\"ghost\">Subtle Action</Button>\n<Button size=\"sm\">Small</Button>\n<Button size=\"lg\">Large</Button>\n```\n\n#### 3. **Compose Compound Components**\n\nMany shadcn components export multiple sub-components. Use them as designed:\n\n```typescript\n// ✅ Dropdown Menu Composition\nimport {\n  DropdownMenu,\n  DropdownMenuTrigger,\n  DropdownMenuContent,\n  DropdownMenuItem,\n  DropdownMenuSeparator,\n  DropdownMenuLabel,\n} from \"@/components/ui/dropdown-menu\";\n\n<DropdownMenu>\n  <DropdownMenuTrigger asChild>\n    <Button variant=\"outline\">Options</Button>\n  </DropdownMenuTrigger>\n  <DropdownMenuContent>\n    <DropdownMenuLabel>Actions</DropdownMenuLabel>\n    <DropdownMenuSeparator />\n    <DropdownMenuItem>Edit</DropdownMenuItem>\n    <DropdownMenuItem>Delete</DropdownMenuItem>\n  </DropdownMenuContent>\n</DropdownMenu>\n```\n\n#### 4. **Use Layout Components for Structure**\n\n```typescript\n// ✅ Use Card for content sections\nimport { Card, CardHeader, CardTitle, CardDescription, CardContent, CardFooter } from \"@/components/ui/card\";\n\n<Card>\n  <CardHeader>\n    <CardTitle>Dashboard</CardTitle>\n    <CardDescription>Overview of your data</CardDescription>\n  </CardHeader>\n  <CardContent>\n    {/* Your content */}\n  </CardContent>\n  <CardFooter>\n    <Button>Action</Button>\n  </CardFooter>\n</Card>\n```\n\n### Styling Rules\n\n#### 1. **Spacing & Layout**\n\nUse Tailwind's utility classes for spacing, but stick to the design system:\n\n- Gap: `gap-2`, `gap-4`, `gap-6`, `gap-8`\n- Padding: `p-2`, `p-4`, `p-6`, `p-8`\n- Margins: Prefer `gap` and `space-y-*` over margins\n\n#### 2. **Colors**\n\nAll colors come from CSS variables in `app/globals.css`. Use semantic color classes:\n\n- `bg-background`, `bg-foreground`\n- `bg-card`, `text-card-foreground`\n- `bg-primary`, `text-primary-foreground`\n- `bg-secondary`, `text-secondary-foreground`\n- `bg-muted`, `text-muted-foreground`\n- `bg-accent`, `text-accent-foreground`\n- `bg-destructive`, `text-destructive-foreground`\n- `border-border`, `border-input`\n- `ring-ring`\n\n**DO NOT use arbitrary color values** like `bg-blue-500` or `text-red-600`.\n\n#### **CRITICAL: Color Contrast Pairing Rules**\n\n**Always pair background colors with their matching foreground colors.** The color system uses paired variables where each background has a corresponding text color designed for proper contrast.\n\n| Background Class | Text Class to Use | Description |\n|-----------------|-------------------|-------------|\n| `bg-background` | `text-foreground` | Main page background |\n| `bg-card` | `text-card-foreground` | Card containers |\n| `bg-primary` | `text-primary-foreground` | Primary buttons/accents |\n| `bg-secondary` | `text-secondary-foreground` | Secondary elements |\n| `bg-muted` | `text-muted-foreground` | Muted/subtle areas |\n| `bg-accent` | `text-accent-foreground` | Accent highlights |\n| `bg-destructive` | `text-destructive-foreground` | Error/delete actions |\n\n**Examples:**\n\n```typescript\n// ✅ CORRECT - Matching background and foreground pairs\n<div className=\"bg-card text-card-foreground\">Content</div>\n<Button className=\"bg-primary text-primary-foreground\">Click</Button>\n<div className=\"bg-muted text-muted-foreground\">Subtle text</div>\n\n// ❌ WRONG - Mismatched colors causing contrast issues\n<div className=\"bg-background text-background\">Invisible text!</div>\n<div className=\"bg-card text-foreground\">May have poor contrast</div>\n<Button className=\"bg-primary text-primary\">White on white!</Button>\n```\n\n**Key Rules:**\n\n1. **Never use the same color for background and text** (e.g., `bg-foreground text-foreground`)\n2. **Always use the `-foreground` variant for text** when using a colored background\n3. **For text on `bg-background`**, use `text-foreground` (primary) or `text-muted-foreground` (secondary)\n4. **Test visually** - if text is hard to read, you have a contrast problem\n\n#### 3. **Typography**\n\nUse Tailwind text utilities (no separate Typography component):\n\n- Headings: `text-xl font-semibold`, `text-2xl font-bold`, etc.\n- Body: `text-sm`, `text-base`\n- Secondary text: `text-muted-foreground`\n- Use semantic HTML: `<h1>`, `<h2>`, `<p>`, etc.\n- **Always wrap text** - Use `max-w-prose` or `max-w-xl` for readable line lengths\n- **Prevent overflow** - Use `break-words` or `truncate` for long text that might overflow containers\n\n#### 4. **Responsive Design**\n\nUse Tailwind's responsive prefixes:\n\n```typescript\n<div className=\"grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4\">\n  {/* Responsive grid */}\n</div>\n```\n\n#### 5. **Icons**\n\nUse Lucide React icons (already configured):\n\n```typescript\nimport { Check, X, ChevronDown, User } from \"lucide-react\";\n\n<Button>\n  <Check className=\"mr-2 h-4 w-4\" />\n  Confirm\n</Button>\n```\n\n### Data Visualization\n\nFor charts and data visualization, use the **shadcn/ui Chart components** (`@/components/ui/chart`) which wrap recharts with consistent theming. Charts should be **elegant, informative, and digestible at a glance**.\n\n#### Chart Design Principles\n\n1. **Clarity over complexity** - A chart should communicate ONE key insight immediately\n2. **Minimal visual noise** - Remove anything that doesn't add information\n3. **Consistent styling** - Use `ChartConfig` for colors, not arbitrary values\n4. **Responsive** - Always use `ChartContainer` (includes ResponsiveContainer)\n5. **Accessible** - Use `ChartTooltip` with `ChartTooltipContent` for proper styling\n\n#### Chart Type Selection\n\n| Data Type | Recommended Chart | Use Case |\n|-----------|-------------------|----------|\n| Trend over time | `LineChart` or `AreaChart` | Stock prices, user growth, metrics over days/months |\n| Comparing categories | `BarChart` | Revenue by product, users by region |\n| Part of whole | `PieChart` or `RadialBarChart` | Market share, budget allocation |\n| Distribution | `BarChart` (horizontal) | Survey responses, rating distribution |\n| Correlation | `ScatterChart` | Price vs. quality, age vs. income |\n\n#### shadcn/ui Chart Components\n\nAlways import from the shadcn chart component:\n\n```typescript\nimport {\n  ChartContainer,\n  ChartTooltip,\n  ChartTooltipContent,\n  ChartLegend,\n  ChartLegendContent,\n  type ChartConfig,\n} from \"@/components/ui/chart\";\nimport { LineChart, Line, XAxis, YAxis, CartesianGrid } from \"recharts\";\n```\n\n#### ChartConfig - Define Colors and Labels\n\nThe `ChartConfig` object defines colors and labels for your data series. This ensures consistent theming:\n\n```typescript\nconst chartConfig = {\n  revenue: {\n    label: \"Revenue\",\n    color: \"var(--chart-1)\",\n  },\n  expenses: {\n    label: \"Expenses\", \n    color: \"var(--chart-2)\",\n  },\n} satisfies ChartConfig;\n```\n\n#### Basic Line Chart Template\n\n```typescript\nimport {\n  ChartContainer,\n  ChartTooltip,\n  ChartTooltipContent,\n  type ChartConfig,\n} from \"@/components/ui/chart\";\nimport { LineChart, Line, XAxis, YAxis, CartesianGrid } from \"recharts\";\n\nconst chartConfig = {\n  value: {\n    label: \"Value\",\n    color: \"var(--chart-1)\",\n  },\n} satisfies ChartConfig;\n\n<ChartContainer config={chartConfig} className=\"h-[300px] w-full\">\n  <LineChart data={data} accessibilityLayer>\n    <CartesianGrid vertical={false} />\n    <XAxis\n      dataKey=\"month\"\n      tickLine={false}\n      axisLine={false}\n      tickMargin={8}\n    />\n    <YAxis tickLine={false} axisLine={false} tickMargin={8} />\n    <ChartTooltip content={<ChartTooltipContent />} />\n    <Line\n      type=\"monotone\"\n      dataKey=\"value\"\n      stroke=\"var(--color-value)\"\n      strokeWidth={2}\n      dot={false}\n    />\n  </LineChart>\n</ChartContainer>\n```\n\n#### Bar Chart with Multiple Series\n\n```typescript\nconst chartConfig = {\n  revenue: {\n    label: \"Revenue\",\n    color: \"var(--chart-1)\",\n  },\n  expenses: {\n    label: \"Expenses\",\n    color: \"var(--chart-2)\",\n  },\n} satisfies ChartConfig;\n\n<ChartContainer config={chartConfig} className=\"h-[300px] w-full\">\n  <BarChart data={data} accessibilityLayer>\n    <CartesianGrid vertical={false} />\n    <XAxis dataKey=\"month\" tickLine={false} axisLine={false} tickMargin={8} />\n    <YAxis tickLine={false} axisLine={false} tickMargin={8} />\n    <ChartTooltip content={<ChartTooltipContent />} />\n    <ChartLegend content={<ChartLegendContent />} />\n    <Bar dataKey=\"revenue\" fill=\"var(--color-revenue)\" radius={4} />\n    <Bar dataKey=\"expenses\" fill=\"var(--color-expenses)\" radius={4} />\n  </BarChart>\n</ChartContainer>\n```\n\n#### Pie/Donut Chart\n\n```typescript\nconst chartConfig = {\n  desktop: { label: \"Desktop\", color: \"var(--chart-1)\" },\n  mobile: { label: \"Mobile\", color: \"var(--chart-2)\" },\n  tablet: { label: \"Tablet\", color: \"var(--chart-3)\" },\n} satisfies ChartConfig;\n\n<ChartContainer config={chartConfig} className=\"h-[300px] w-full\">\n  <PieChart>\n    <ChartTooltip content={<ChartTooltipContent hideLabel />} />\n    <Pie\n      data={data}\n      dataKey=\"value\"\n      nameKey=\"name\"\n      innerRadius={60}  // Remove for solid pie, keep for donut\n      strokeWidth={5}\n    />\n    <ChartLegend content={<ChartLegendContent nameKey=\"name\" />} />\n  </PieChart>\n</ChartContainer>\n```\n\n#### Chart Styling Rules\n\n**Colors (use CSS variables from globals.css):**\n- `var(--chart-1)` through `var(--chart-5)` - Primary chart colors\n- `var(--primary)` - For single-series emphasis\n- `var(--muted)` - For de-emphasized data\n\n**Color References in Charts:**\n- In `ChartConfig`: Use `color: \"var(--chart-1)\"`\n- In chart elements: Use `fill=\"var(--color-keyname)\"` or `stroke=\"var(--color-keyname)\"`\n- The `keyname` matches the key in your `ChartConfig`\n\n**Visual Cleanup:**\n- Set `tickLine={false}` and `axisLine={false}` on axes for cleaner look\n- Use `vertical={false}` on `CartesianGrid` for horizontal-only grid lines\n- Use `dot={false}` on line charts unless individual points matter\n- Add `radius={4}` to bars for rounded corners\n- Limit to 3-5 data series maximum per chart\n\n**Avoid:**\n- ❌ 3D effects\n- ❌ More than 5-6 colors in one chart\n- ❌ Legends with more than 5 items (simplify the data instead)\n- ❌ Dual Y-axes (confusing - use two separate charts)\n- ❌ Pie charts with more than 5-6 slices\n- ❌ Custom tooltip styling - use `ChartTooltipContent`\n\n#### Fallback to Raw Recharts\n\nIf shadcn/ui Chart components don't support a specific chart type (e.g., ScatterChart, ComposedChart, RadarChart), you can use recharts directly:\n\n```typescript\nimport { ScatterChart, Scatter, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer } from \"recharts\";\n\n<ResponsiveContainer width=\"100%\" height={300}>\n  <ScatterChart>\n    <CartesianGrid strokeDasharray=\"3 3\" stroke=\"var(--border)\" />\n    <XAxis dataKey=\"x\" stroke=\"var(--muted-foreground)\" fontSize={12} tickLine={false} axisLine={false} />\n    <YAxis dataKey=\"y\" stroke=\"var(--muted-foreground)\" fontSize={12} tickLine={false} axisLine={false} />\n    <Tooltip \n      contentStyle={{ \n        backgroundColor: \"var(--card)\", \n        border: \"1px solid var(--border)\", \n        borderRadius: \"6px\" \n      }} \n    />\n    <Scatter data={data} fill=\"var(--chart-1)\" />\n  </ScatterChart>\n</ResponsiveContainer>\n```\n\n**When using raw recharts:**\n- Still use CSS variables for colors (`var(--chart-1)`, etc.)\n- Match styling to shadcn conventions (tickLine={false}, axisLine={false})\n- Style tooltips to match the design system\n\n#### Data Accuracy Checklist\n\nBefore displaying a chart, verify:\n- [ ] `ChartConfig` keys match your data's `dataKey` values\n- [ ] Data values are correctly mapped to the right axes\n- [ ] Axis labels match the data units (%, $, count, etc.)\n- [ ] Time series data is sorted chronologically\n- [ ] No missing data points that would break the visualization\n- [ ] `ChartTooltip` with `ChartTooltipContent` is included\n- [ ] Chart title/context makes the insight clear\n\n### Common Patterns\n\n#### Loading States\n\n```typescript\nimport { Skeleton } from \"@/components/ui/skeleton\";\n\n{isLoading ? (\n  <Skeleton className=\"h-12 w-full\" />\n) : (\n  <Content />\n)}\n```\n\n#### Empty States\n\n```typescript\nimport { Empty, EmptyHeader, EmptyTitle, EmptyDescription, EmptyMedia } from \"@/components/ui/empty\";\nimport { Inbox } from \"lucide-react\";\n\n<Empty>\n  <EmptyHeader>\n    <EmptyMedia variant=\"icon\">\n      <Inbox />\n    </EmptyMedia>\n    <EmptyTitle>No data available</EmptyTitle>\n    <EmptyDescription>\n      There's nothing to display yet. Add some items to get started.\n    </EmptyDescription>\n  </EmptyHeader>\n</Empty>\n```\n\n#### Interactive Lists\n\n```typescript\nimport { ScrollArea } from \"@/components/ui/scroll-area\";\nimport { ItemGroup, Item, ItemContent, ItemTitle, ItemDescription, ItemMedia } from \"@/components/ui/item\";\nimport { FileText } from \"lucide-react\";\n\n<ScrollArea className=\"h-[400px]\">\n  <ItemGroup>\n    {items.map((item) => (\n      <Item key={item.id} variant=\"outline\">\n        <ItemMedia variant=\"icon\">\n          <FileText />\n        </ItemMedia>\n        <ItemContent>\n          <ItemTitle>{item.name}</ItemTitle>\n          <ItemDescription>{item.description}</ItemDescription>\n        </ItemContent>\n      </Item>\n    ))}\n  </ItemGroup>\n</ScrollArea>\n```\n\n#### Form Fields\n\n```typescript\nimport { Field, FieldLabel, FieldDescription, FieldError, FieldGroup } from \"@/components/ui/field\";\nimport { Input } from \"@/components/ui/input\";\nimport { Button } from \"@/components/ui/button\";\n\n<FieldGroup>\n  <Field>\n    <FieldLabel>Email</FieldLabel>\n    <Input type=\"email\" placeholder=\"you@example.com\" />\n    <FieldDescription>We'll never share your email.</FieldDescription>\n  </Field>\n  <Field>\n    <FieldLabel>Password</FieldLabel>\n    <Input type=\"password\" />\n    <FieldError>Password must be at least 8 characters.</FieldError>\n  </Field>\n  <Button type=\"submit\">Sign up</Button>\n</FieldGroup>\n```\n\n### What NOT To Do\n\n❌ **Don't create custom styled divs when a component exists**\n❌ **Don't use arbitrary Tailwind colors** (use CSS variables)\n❌ **Don't import UI libraries** like Material-UI, Ant Design, etc.\n❌ **Don't use inline styles** except for dynamic values\n❌ **Don't create custom form inputs** (use Field, Input, Select, etc. from components/ui)\n❌ **Don't add new dependencies** without checking if shadcn covers it\n❌ **Don't write everything in page.tsx** - break into separate component files\n❌ **Don't design for light mode** - this site is dark mode only\n❌ **Don't use `dark:` variants** - dark mode is always active, use base classes\n\n### Development Workflow\n\n1. **Plan the component structure** - Identify logical UI sections before writing code\n2. **Create components incrementally** - Write one small component file at a time\n3. **Test each component** - Verify it works before moving to the next\n4. **Compose in page.tsx** - Import and arrange your components in the page\n5. **Iterate** - Refine individual components without touching others\n\n### Summary\n\nThis application has a **complete, production-ready component library**. Your job is to:\n1. **Compose** shadcn/ui components (from `components/ui/`)\n2. **Create small, focused component files** (in `components/`)\n3. **Keep pages thin** - pages should orchestrate components, not contain implementation\n\nThink of yourself as assembling LEGO blocks—all the UI pieces you need already exist in `components/ui/`, and you create small, organized structures by composing them into feature-specific components.\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/globals.css",
    "content": "@import \"tailwindcss\";\n@import \"tw-animate-css\";\n@import \"shadcn/tailwind.css\";\n\n@custom-variant dark (&:is(.dark *));\n\n@theme inline {\n  --color-background: var(--background);\n  --color-foreground: var(--foreground);\n  --font-sans: var(--font-sans);\n  --font-mono: var(--font-geist-mono);\n  --color-sidebar-ring: var(--sidebar-ring);\n  --color-sidebar-border: var(--sidebar-border);\n  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);\n  --color-sidebar-accent: var(--sidebar-accent);\n  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);\n  --color-sidebar-primary: var(--sidebar-primary);\n  --color-sidebar-foreground: var(--sidebar-foreground);\n  --color-sidebar: var(--sidebar);\n  --color-chart-5: var(--chart-5);\n  --color-chart-4: var(--chart-4);\n  --color-chart-3: var(--chart-3);\n  --color-chart-2: var(--chart-2);\n  --color-chart-1: var(--chart-1);\n  --color-ring: var(--ring);\n  --color-input: var(--input);\n  --color-border: var(--border);\n  --color-destructive: var(--destructive);\n  --color-accent-foreground: var(--accent-foreground);\n  --color-accent: var(--accent);\n  --color-muted-foreground: var(--muted-foreground);\n  --color-muted: var(--muted);\n  --color-secondary-foreground: var(--secondary-foreground);\n  --color-secondary: var(--secondary);\n  --color-primary-foreground: var(--primary-foreground);\n  --color-primary: var(--primary);\n  --color-popover-foreground: var(--popover-foreground);\n  --color-popover: var(--popover);\n  --color-card-foreground: var(--card-foreground);\n  --color-card: var(--card);\n  --radius-sm: calc(var(--radius) - 4px);\n  --radius-md: calc(var(--radius) - 2px);\n  --radius-lg: var(--radius);\n  --radius-xl: calc(var(--radius) + 4px);\n  --radius-2xl: calc(var(--radius) + 8px);\n  --radius-3xl: calc(var(--radius) + 12px);\n  --radius-4xl: calc(var(--radius) + 16px);\n}\n\n:root {\n  --background: oklch(1 0 0);\n  --foreground: oklch(0.145 0 0);\n  --card: oklch(1 0 0);\n  --card-foreground: oklch(0.145 0 0);\n  --popover: oklch(1 0 0);\n  --popover-foreground: oklch(0.145 0 0);\n  --primary: oklch(0.67 0.16 58);\n  --primary-foreground: oklch(0.99 0.02 95);\n  --secondary: oklch(0.967 0.001 286.375);\n  --secondary-foreground: oklch(0.21 0.006 285.885);\n  --muted: oklch(0.97 0 0);\n  --muted-foreground: oklch(0.556 0 0);\n  --accent: oklch(0.97 0 0);\n  --accent-foreground: oklch(0.205 0 0);\n  --destructive: oklch(0.58 0.22 27);\n  --border: oklch(0.922 0 0);\n  --input: oklch(0.922 0 0);\n  --ring: oklch(0.708 0 0);\n  --chart-1: oklch(0.88 0.15 92);\n  --chart-2: oklch(0.77 0.16 70);\n  --chart-3: oklch(0.67 0.16 58);\n  --chart-4: oklch(0.56 0.15 49);\n  --chart-5: oklch(0.47 0.12 46);\n  --radius: 0.625rem;\n  --sidebar: oklch(0.985 0 0);\n  --sidebar-foreground: oklch(0.145 0 0);\n  --sidebar-primary: oklch(0.67 0.16 58);\n  --sidebar-primary-foreground: oklch(0.99 0.02 95);\n  --sidebar-accent: oklch(0.97 0 0);\n  --sidebar-accent-foreground: oklch(0.205 0 0);\n  --sidebar-border: oklch(0.922 0 0);\n  --sidebar-ring: oklch(0.708 0 0);\n}\n\n.dark {\n  --background: oklch(0.145 0 0);\n  --foreground: oklch(0.985 0 0);\n  --card: oklch(0.205 0 0);\n  --card-foreground: oklch(0.985 0 0);\n  --popover: oklch(0.205 0 0);\n  --popover-foreground: oklch(0.985 0 0);\n  --primary: oklch(0.77 0.16 70);\n  --primary-foreground: oklch(0.28 0.07 46);\n  --secondary: oklch(0.274 0.006 286.033);\n  --secondary-foreground: oklch(0.985 0 0);\n  --muted: oklch(0.269 0 0);\n  --muted-foreground: oklch(0.708 0 0);\n  --accent: oklch(0.371 0 0);\n  --accent-foreground: oklch(0.985 0 0);\n  --destructive: oklch(0.704 0.191 22.216);\n  --border: oklch(1 0 0 / 10%);\n  --input: oklch(1 0 0 / 15%);\n  --ring: oklch(0.556 0 0);\n  /* Chart colors optimized for dark backgrounds - brighter and more vibrant */\n  --chart-1: oklch(0.82 0.18 140);\n  --chart-2: oklch(0.75 0.2 200);\n  --chart-3: oklch(0.7 0.22 280);\n  --chart-4: oklch(0.78 0.18 50);\n  --chart-5: oklch(0.72 0.2 330);\n  --sidebar: oklch(0.205 0 0);\n  --sidebar-foreground: oklch(0.985 0 0);\n  --sidebar-primary: oklch(0.77 0.16 70);\n  --sidebar-primary-foreground: oklch(0.28 0.07 46);\n  --sidebar-accent: oklch(0.269 0 0);\n  --sidebar-accent-foreground: oklch(0.985 0 0);\n  --sidebar-border: oklch(1 0 0 / 10%);\n  --sidebar-ring: oklch(0.556 0 0);\n}\n\n@layer base {\n  * {\n    @apply border-border outline-ring/50;\n  }\n  body {\n    @apply bg-background text-foreground;\n  }\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/layout.tsx",
    "content": "import type { Metadata } from \"next\";\nimport { Geist, Geist_Mono, Inter } from \"next/font/google\";\nimport \"./globals.css\";\n\nconst inter = Inter({ subsets: [\"latin\"], variable: \"--font-sans\" });\n\nconst geistSans = Geist({\n  variable: \"--font-geist-sans\",\n  subsets: [\"latin\"],\n});\n\nconst geistMono = Geist_Mono({\n  variable: \"--font-geist-mono\",\n  subsets: [\"latin\"],\n});\n\nexport const metadata: Metadata = {\n  title: \"Onyx Craft\",\n  description: \"Crafting your next great idea.\",\n};\n\nexport default function RootLayout({\n  children,\n}: Readonly<{\n  children: React.ReactNode;\n}>) {\n  return (\n    <html lang=\"en\" className={`${inter.variable} dark`}>\n      <body\n        className={`${geistSans.variable} ${geistMono.variable} antialiased`}\n      >\n        {children}\n      </body>\n    </html>\n  );\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/page.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef } from \"react\";\n\nconst messages = [\n  \"Punching wood...\",\n  \"Gathering resources...\",\n  \"Placing blocks...\",\n  \"Crafting your workspace...\",\n  \"Mining for dependencies...\",\n  \"Smelting the code...\",\n  \"Enchanting with magic...\",\n  \"World generation complete...\",\n  \"/gamemode 1\",\n];\n\nconst MESSAGE_COUNT = messages.length;\nconst TYPE_DELAY = 40;\nconst LINE_PAUSE = 800;\nconst RESET_DELAY = 2000;\n\nexport default function CraftingLoader() {\n  const [display, setDisplay] = useState({\n    lines: [] as string[],\n    currentText: \"\",\n  });\n\n  const lineIndexRef = useRef(0);\n  const charIndexRef = useRef(0);\n  const lastUpdateRef = useRef(0);\n  const timeoutRef = useRef<NodeJS.Timeout | undefined>(undefined);\n  const rafRef = useRef<number | undefined>(undefined);\n\n  useEffect(() => {\n    let isActive = true;\n\n    const update = (now: number) => {\n      if (!isActive) return;\n\n      const lineIdx = lineIndexRef.current;\n      const charIdx = charIndexRef.current;\n\n      if (lineIdx >= MESSAGE_COUNT) {\n        timeoutRef.current = setTimeout(() => {\n          if (!isActive) return;\n          lineIndexRef.current = 0;\n          charIndexRef.current = 0;\n          setDisplay({ lines: [], currentText: \"\" });\n          lastUpdateRef.current = performance.now();\n          rafRef.current = requestAnimationFrame(update);\n        }, RESET_DELAY);\n        return;\n      }\n\n      const msg = messages[lineIdx];\n      if (!msg) return;\n\n      const elapsed = now - lastUpdateRef.current;\n\n      if (charIdx < msg.length) {\n        if (elapsed >= TYPE_DELAY) {\n          charIndexRef.current = charIdx + 1;\n          setDisplay((prev) => ({\n            lines: prev.lines,\n            currentText: msg.substring(0, charIdx + 1),\n          }));\n          lastUpdateRef.current = now;\n        }\n      } else if (elapsed >= LINE_PAUSE) {\n        setDisplay((prev) => ({\n          lines: [...prev.lines, msg],\n          currentText: \"\",\n        }));\n        lineIndexRef.current = lineIdx + 1;\n        charIndexRef.current = 0;\n        lastUpdateRef.current = now;\n      }\n\n      rafRef.current = requestAnimationFrame(update);\n    };\n\n    lastUpdateRef.current = performance.now();\n    rafRef.current = requestAnimationFrame(update);\n\n    return () => {\n      isActive = false;\n      if (rafRef.current !== undefined) cancelAnimationFrame(rafRef.current);\n      if (timeoutRef.current !== undefined) clearTimeout(timeoutRef.current);\n    };\n  }, []);\n\n  const { lines, currentText } = display;\n  const hasCurrentText = currentText.length > 0;\n\n  return (\n    <div className=\"min-h-screen bg-gradient-to-br from-neutral-950 via-neutral-900 to-neutral-950 flex flex-col items-center justify-center p-4\">\n      <div className=\"w-full max-w-md rounded-sm overflow-hidden shadow-2xl border-2 border-neutral-700\">\n        <div className=\"bg-neutral-800 px-4 py-3 flex items-center gap-2 border-b-2 border-neutral-700\">\n          <div className=\"w-3 h-3 rounded-none bg-red-500\" />\n          <div className=\"w-3 h-3 rounded-none bg-yellow-500\" />\n          <div className=\"w-3 h-3 rounded-none bg-green-500\" />\n          <span className=\"ml-4 text-neutral-500 text-sm font-mono\">\n            crafting_table\n          </span>\n        </div>\n\n        <div className=\"bg-neutral-900 p-6 min-h-[250px] font-mono text-sm\">\n          {lines.map((line, i) => (\n            <div key={i} className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span>{line}</span>\n            </div>\n          ))}\n          {hasCurrentText && (\n            <div className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span>{currentText}</span>\n              <span className=\"w-2 h-5 bg-emerald-500 animate-pulse ml-0.5\" />\n            </div>\n          )}\n          {!hasCurrentText && (\n            <div className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span className=\"w-2 h-5 bg-emerald-500 animate-pulse\" />\n            </div>\n          )}\n        </div>\n      </div>\n\n      <p className=\"mt-6 text-neutral-500 text-sm font-mono\">\n        Crafting your next great idea...\n      </p>\n    </div>\n  );\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/app/site.webmanifest",
    "content": "{\"name\":\"\",\"short_name\":\"\",\"icons\":[{\"src\":\"/android-chrome-192x192.png\",\"sizes\":\"192x192\",\"type\":\"image/png\"},{\"src\":\"/android-chrome-512x512.png\",\"sizes\":\"512x512\",\"type\":\"image/png\"}],\"theme_color\":\"#ffffff\",\"background_color\":\"#ffffff\",\"display\":\"standalone\"}"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/component-example.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\n\nimport { Example, ExampleWrapper } from \"@/components/example\";\nimport {\n  AlertDialog,\n  AlertDialogAction,\n  AlertDialogCancel,\n  AlertDialogContent,\n  AlertDialogDescription,\n  AlertDialogFooter,\n  AlertDialogHeader,\n  AlertDialogMedia,\n  AlertDialogTitle,\n  AlertDialogTrigger,\n} from \"@/components/ui/alert-dialog\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { Button } from \"@/components/ui/button\";\nimport {\n  Card,\n  CardAction,\n  CardContent,\n  CardDescription,\n  CardFooter,\n  CardHeader,\n  CardTitle,\n} from \"@/components/ui/card\";\nimport {\n  Combobox,\n  ComboboxContent,\n  ComboboxEmpty,\n  ComboboxInput,\n  ComboboxItem,\n  ComboboxList,\n} from \"@/components/ui/combobox\";\nimport {\n  DropdownMenu,\n  DropdownMenuCheckboxItem,\n  DropdownMenuContent,\n  DropdownMenuGroup,\n  DropdownMenuItem,\n  DropdownMenuLabel,\n  DropdownMenuPortal,\n  DropdownMenuRadioGroup,\n  DropdownMenuRadioItem,\n  DropdownMenuSeparator,\n  DropdownMenuShortcut,\n  DropdownMenuSub,\n  DropdownMenuSubContent,\n  DropdownMenuSubTrigger,\n  DropdownMenuTrigger,\n} from \"@/components/ui/dropdown-menu\";\nimport { Field, FieldGroup, FieldLabel } from \"@/components/ui/field\";\nimport { Input } from \"@/components/ui/input\";\nimport {\n  Select,\n  SelectContent,\n  SelectGroup,\n  SelectItem,\n  SelectTrigger,\n  SelectValue,\n} from \"@/components/ui/select\";\nimport { Textarea } from \"@/components/ui/textarea\";\nimport {\n  PlusIcon,\n  BluetoothIcon,\n  MoreVerticalIcon,\n  FileIcon,\n  FolderIcon,\n  FolderOpenIcon,\n  FileCodeIcon,\n  MoreHorizontalIcon,\n  FolderSearchIcon,\n  SaveIcon,\n  DownloadIcon,\n  EyeIcon,\n  LayoutIcon,\n  PaletteIcon,\n  SunIcon,\n  MoonIcon,\n  MonitorIcon,\n  UserIcon,\n  CreditCardIcon,\n  SettingsIcon,\n  KeyboardIcon,\n  LanguagesIcon,\n  BellIcon,\n  MailIcon,\n  ShieldIcon,\n  HelpCircleIcon,\n  FileTextIcon,\n  LogOutIcon,\n} from \"lucide-react\";\n\nexport function ComponentExample() {\n  return (\n    <ExampleWrapper>\n      <CardExample />\n      <FormExample />\n    </ExampleWrapper>\n  );\n}\n\nfunction CardExample() {\n  return (\n    <Example title=\"Card\" className=\"items-center justify-center\">\n      <Card className=\"relative w-full max-w-sm overflow-hidden pt-0\">\n        <div className=\"bg-primary absolute inset-0 z-30 aspect-video opacity-50 mix-blend-color\" />\n        <img\n          src=\"https://images.unsplash.com/photo-1604076850742-4c7221f3101b?q=80&w=1887&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\"\n          alt=\"Photo by mymind on Unsplash\"\n          title=\"Photo by mymind on Unsplash\"\n          className=\"relative z-20 aspect-video w-full object-cover brightness-60 grayscale\"\n        />\n        <CardHeader>\n          <CardTitle>Observability Plus is replacing Monitoring</CardTitle>\n          <CardDescription>\n            Switch to the improved way to explore your data, with natural\n            language. Monitoring will no longer be available on the Pro plan in\n            November, 2025\n          </CardDescription>\n        </CardHeader>\n        <CardFooter>\n          <AlertDialog>\n            <AlertDialogTrigger asChild>\n              <Button>\n                <PlusIcon data-icon=\"inline-start\" />\n                Show Dialog\n              </Button>\n            </AlertDialogTrigger>\n            <AlertDialogContent size=\"sm\">\n              <AlertDialogHeader>\n                <AlertDialogMedia>\n                  <BluetoothIcon />\n                </AlertDialogMedia>\n                <AlertDialogTitle>Allow accessory to connect?</AlertDialogTitle>\n                <AlertDialogDescription>\n                  Do you want to allow the USB accessory to connect to this\n                  device?\n                </AlertDialogDescription>\n              </AlertDialogHeader>\n              <AlertDialogFooter>\n                <AlertDialogCancel>Don&apos;t allow</AlertDialogCancel>\n                <AlertDialogAction>Allow</AlertDialogAction>\n              </AlertDialogFooter>\n            </AlertDialogContent>\n          </AlertDialog>\n          <Badge variant=\"secondary\" className=\"ml-auto\">\n            Warning\n          </Badge>\n        </CardFooter>\n      </Card>\n    </Example>\n  );\n}\n\nconst frameworks = [\n  \"Next.js\",\n  \"SvelteKit\",\n  \"Nuxt.js\",\n  \"Remix\",\n  \"Astro\",\n] as const;\n\nfunction FormExample() {\n  const [notifications, setNotifications] = React.useState({\n    email: true,\n    sms: false,\n    push: true,\n  });\n  const [theme, setTheme] = React.useState(\"light\");\n\n  return (\n    <Example title=\"Form\">\n      <Card className=\"w-full max-w-md\">\n        <CardHeader>\n          <CardTitle>User Information</CardTitle>\n          <CardDescription>Please fill in your details below</CardDescription>\n          <CardAction>\n            <DropdownMenu>\n              <DropdownMenuTrigger asChild>\n                <Button variant=\"ghost\" size=\"icon\">\n                  <MoreVerticalIcon />\n                  <span className=\"sr-only\">More options</span>\n                </Button>\n              </DropdownMenuTrigger>\n              <DropdownMenuContent align=\"end\" className=\"w-56\">\n                <DropdownMenuGroup>\n                  <DropdownMenuLabel>File</DropdownMenuLabel>\n                  <DropdownMenuItem>\n                    <FileIcon />\n                    New File\n                    <DropdownMenuShortcut>⌘N</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                  <DropdownMenuItem>\n                    <FolderIcon />\n                    New Folder\n                    <DropdownMenuShortcut>⇧⌘N</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                  <DropdownMenuSub>\n                    <DropdownMenuSubTrigger>\n                      <FolderOpenIcon />\n                      Open Recent\n                    </DropdownMenuSubTrigger>\n                    <DropdownMenuPortal>\n                      <DropdownMenuSubContent>\n                        <DropdownMenuGroup>\n                          <DropdownMenuLabel>Recent Projects</DropdownMenuLabel>\n                          <DropdownMenuItem>\n                            <FileCodeIcon />\n                            Project Alpha\n                          </DropdownMenuItem>\n                          <DropdownMenuItem>\n                            <FileCodeIcon />\n                            Project Beta\n                          </DropdownMenuItem>\n                          <DropdownMenuSub>\n                            <DropdownMenuSubTrigger>\n                              <MoreHorizontalIcon />\n                              More Projects\n                            </DropdownMenuSubTrigger>\n                            <DropdownMenuPortal>\n                              <DropdownMenuSubContent>\n                                <DropdownMenuItem>\n                                  <FileCodeIcon />\n                                  Project Gamma\n                                </DropdownMenuItem>\n                                <DropdownMenuItem>\n                                  <FileCodeIcon />\n                                  Project Delta\n                                </DropdownMenuItem>\n                              </DropdownMenuSubContent>\n                            </DropdownMenuPortal>\n                          </DropdownMenuSub>\n                        </DropdownMenuGroup>\n                        <DropdownMenuSeparator />\n                        <DropdownMenuGroup>\n                          <DropdownMenuItem>\n                            <FolderSearchIcon />\n                            Browse...\n                          </DropdownMenuItem>\n                        </DropdownMenuGroup>\n                      </DropdownMenuSubContent>\n                    </DropdownMenuPortal>\n                  </DropdownMenuSub>\n                  <DropdownMenuSeparator />\n                  <DropdownMenuItem>\n                    <SaveIcon />\n                    Save\n                    <DropdownMenuShortcut>⌘S</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                  <DropdownMenuItem>\n                    <DownloadIcon />\n                    Export\n                    <DropdownMenuShortcut>⇧⌘E</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                </DropdownMenuGroup>\n                <DropdownMenuSeparator />\n                <DropdownMenuGroup>\n                  <DropdownMenuLabel>View</DropdownMenuLabel>\n                  <DropdownMenuCheckboxItem\n                    checked={notifications.email}\n                    onCheckedChange={(checked) =>\n                      setNotifications({\n                        ...notifications,\n                        email: checked === true,\n                      })\n                    }\n                  >\n                    <EyeIcon />\n                    Show Sidebar\n                  </DropdownMenuCheckboxItem>\n                  <DropdownMenuCheckboxItem\n                    checked={notifications.sms}\n                    onCheckedChange={(checked) =>\n                      setNotifications({\n                        ...notifications,\n                        sms: checked === true,\n                      })\n                    }\n                  >\n                    <LayoutIcon />\n                    Show Status Bar\n                  </DropdownMenuCheckboxItem>\n                  <DropdownMenuSub>\n                    <DropdownMenuSubTrigger>\n                      <PaletteIcon />\n                      Theme\n                    </DropdownMenuSubTrigger>\n                    <DropdownMenuPortal>\n                      <DropdownMenuSubContent>\n                        <DropdownMenuGroup>\n                          <DropdownMenuLabel>Appearance</DropdownMenuLabel>\n                          <DropdownMenuRadioGroup\n                            value={theme}\n                            onValueChange={setTheme}\n                          >\n                            <DropdownMenuRadioItem value=\"light\">\n                              <SunIcon />\n                              Light\n                            </DropdownMenuRadioItem>\n                            <DropdownMenuRadioItem value=\"dark\">\n                              <MoonIcon />\n                              Dark\n                            </DropdownMenuRadioItem>\n                            <DropdownMenuRadioItem value=\"system\">\n                              <MonitorIcon />\n                              System\n                            </DropdownMenuRadioItem>\n                          </DropdownMenuRadioGroup>\n                        </DropdownMenuGroup>\n                      </DropdownMenuSubContent>\n                    </DropdownMenuPortal>\n                  </DropdownMenuSub>\n                </DropdownMenuGroup>\n                <DropdownMenuSeparator />\n                <DropdownMenuGroup>\n                  <DropdownMenuLabel>Account</DropdownMenuLabel>\n                  <DropdownMenuItem>\n                    <UserIcon />\n                    Profile\n                    <DropdownMenuShortcut>⇧⌘P</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                  <DropdownMenuItem>\n                    <CreditCardIcon />\n                    Billing\n                  </DropdownMenuItem>\n                  <DropdownMenuSub>\n                    <DropdownMenuSubTrigger>\n                      <SettingsIcon />\n                      Settings\n                    </DropdownMenuSubTrigger>\n                    <DropdownMenuPortal>\n                      <DropdownMenuSubContent>\n                        <DropdownMenuGroup>\n                          <DropdownMenuLabel>Preferences</DropdownMenuLabel>\n                          <DropdownMenuItem>\n                            <KeyboardIcon />\n                            Keyboard Shortcuts\n                          </DropdownMenuItem>\n                          <DropdownMenuItem>\n                            <LanguagesIcon />\n                            Language\n                          </DropdownMenuItem>\n                          <DropdownMenuSub>\n                            <DropdownMenuSubTrigger>\n                              <BellIcon />\n                              Notifications\n                            </DropdownMenuSubTrigger>\n                            <DropdownMenuPortal>\n                              <DropdownMenuSubContent>\n                                <DropdownMenuGroup>\n                                  <DropdownMenuLabel>\n                                    Notification Types\n                                  </DropdownMenuLabel>\n                                  <DropdownMenuCheckboxItem\n                                    checked={notifications.push}\n                                    onCheckedChange={(checked) =>\n                                      setNotifications({\n                                        ...notifications,\n                                        push: checked === true,\n                                      })\n                                    }\n                                  >\n                                    <BellIcon />\n                                    Push Notifications\n                                  </DropdownMenuCheckboxItem>\n                                  <DropdownMenuCheckboxItem\n                                    checked={notifications.email}\n                                    onCheckedChange={(checked) =>\n                                      setNotifications({\n                                        ...notifications,\n                                        email: checked === true,\n                                      })\n                                    }\n                                  >\n                                    <MailIcon />\n                                    Email Notifications\n                                  </DropdownMenuCheckboxItem>\n                                </DropdownMenuGroup>\n                              </DropdownMenuSubContent>\n                            </DropdownMenuPortal>\n                          </DropdownMenuSub>\n                        </DropdownMenuGroup>\n                        <DropdownMenuSeparator />\n                        <DropdownMenuGroup>\n                          <DropdownMenuItem>\n                            <ShieldIcon />\n                            Privacy & Security\n                          </DropdownMenuItem>\n                        </DropdownMenuGroup>\n                      </DropdownMenuSubContent>\n                    </DropdownMenuPortal>\n                  </DropdownMenuSub>\n                </DropdownMenuGroup>\n                <DropdownMenuSeparator />\n                <DropdownMenuGroup>\n                  <DropdownMenuItem>\n                    <HelpCircleIcon />\n                    Help & Support\n                  </DropdownMenuItem>\n                  <DropdownMenuItem>\n                    <FileTextIcon />\n                    Documentation\n                  </DropdownMenuItem>\n                </DropdownMenuGroup>\n                <DropdownMenuSeparator />\n                <DropdownMenuGroup>\n                  <DropdownMenuItem variant=\"destructive\">\n                    <LogOutIcon />\n                    Sign Out\n                    <DropdownMenuShortcut>⇧⌘Q</DropdownMenuShortcut>\n                  </DropdownMenuItem>\n                </DropdownMenuGroup>\n              </DropdownMenuContent>\n            </DropdownMenu>\n          </CardAction>\n        </CardHeader>\n        <CardContent>\n          <form>\n            <FieldGroup>\n              <div className=\"grid grid-cols-2 gap-4\">\n                <Field>\n                  <FieldLabel htmlFor=\"small-form-name\">Name</FieldLabel>\n                  <Input\n                    id=\"small-form-name\"\n                    placeholder=\"Enter your name\"\n                    required\n                  />\n                </Field>\n                <Field>\n                  <FieldLabel htmlFor=\"small-form-role\">Role</FieldLabel>\n                  <Select defaultValue=\"\">\n                    <SelectTrigger id=\"small-form-role\">\n                      <SelectValue placeholder=\"Select a role\" />\n                    </SelectTrigger>\n                    <SelectContent>\n                      <SelectGroup>\n                        <SelectItem value=\"developer\">Developer</SelectItem>\n                        <SelectItem value=\"designer\">Designer</SelectItem>\n                        <SelectItem value=\"manager\">Manager</SelectItem>\n                        <SelectItem value=\"other\">Other</SelectItem>\n                      </SelectGroup>\n                    </SelectContent>\n                  </Select>\n                </Field>\n              </div>\n              <Field>\n                <FieldLabel htmlFor=\"small-form-framework\">\n                  Framework\n                </FieldLabel>\n                <Combobox items={frameworks}>\n                  <ComboboxInput\n                    id=\"small-form-framework\"\n                    placeholder=\"Select a framework\"\n                    required\n                  />\n                  <ComboboxContent>\n                    <ComboboxEmpty>No frameworks found.</ComboboxEmpty>\n                    <ComboboxList>\n                      {(item) => (\n                        <ComboboxItem key={item} value={item}>\n                          {item}\n                        </ComboboxItem>\n                      )}\n                    </ComboboxList>\n                  </ComboboxContent>\n                </Combobox>\n              </Field>\n              <Field>\n                <FieldLabel htmlFor=\"small-form-comments\">Comments</FieldLabel>\n                <Textarea\n                  id=\"small-form-comments\"\n                  placeholder=\"Add any additional comments\"\n                />\n              </Field>\n              <Field orientation=\"horizontal\">\n                <Button type=\"submit\">Submit</Button>\n                <Button variant=\"outline\" type=\"button\">\n                  Cancel\n                </Button>\n              </Field>\n            </FieldGroup>\n          </form>\n        </CardContent>\n      </Card>\n    </Example>\n  );\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/example.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nfunction ExampleWrapper({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div className=\"bg-background w-full\">\n      <div\n        data-slot=\"example-wrapper\"\n        className={cn(\n          \"mx-auto grid min-h-screen w-full max-w-5xl min-w-0 content-center items-start gap-8 p-4 pt-2 sm:gap-12 sm:p-6 md:grid-cols-2 md:gap-8 lg:p-12 2xl:max-w-6xl\",\n          className,\n        )}\n        {...props}\n      />\n    </div>\n  );\n}\n\nfunction Example({\n  title,\n  children,\n  className,\n  containerClassName,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  title?: string;\n  containerClassName?: string;\n}) {\n  return (\n    <div\n      data-slot=\"example\"\n      className={cn(\n        \"mx-auto flex w-full max-w-lg min-w-0 flex-col gap-1 self-stretch lg:max-w-none\",\n        containerClassName,\n      )}\n      {...props}\n    >\n      {title && (\n        <div className=\"text-muted-foreground px-1.5 py-2 text-xs font-medium\">\n          {title}\n        </div>\n      )}\n      <div\n        data-slot=\"example-content\"\n        className={cn(\n          \"bg-background text-foreground flex min-w-0 flex-1 flex-col items-start gap-6 border border-dashed p-4 sm:p-6 *:[div:not([class*='w-'])]:w-full\",\n          className,\n        )}\n      >\n        {children}\n      </div>\n    </div>\n  );\n}\n\nexport { ExampleWrapper, Example };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/accordion.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Accordion as AccordionPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronDownIcon, ChevronUpIcon } from \"lucide-react\";\n\nfunction Accordion({\n  className,\n  ...props\n}: React.ComponentProps<typeof AccordionPrimitive.Root>) {\n  return (\n    <AccordionPrimitive.Root\n      data-slot=\"accordion\"\n      className={cn(\"flex w-full flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction AccordionItem({\n  className,\n  ...props\n}: React.ComponentProps<typeof AccordionPrimitive.Item>) {\n  return (\n    <AccordionPrimitive.Item\n      data-slot=\"accordion-item\"\n      className={cn(\"not-last:border-b\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction AccordionTrigger({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof AccordionPrimitive.Trigger>) {\n  return (\n    <AccordionPrimitive.Header className=\"flex\">\n      <AccordionPrimitive.Trigger\n        data-slot=\"accordion-trigger\"\n        className={cn(\n          \"focus-visible:ring-ring/50 focus-visible:border-ring focus-visible:after:border-ring **:data-[slot=accordion-trigger-icon]:text-muted-foreground rounded-lg py-2.5 text-left text-sm font-medium hover:underline focus-visible:ring-[3px] **:data-[slot=accordion-trigger-icon]:ml-auto **:data-[slot=accordion-trigger-icon]:size-4 group/accordion-trigger relative flex flex-1 items-start justify-between border border-transparent transition-all outline-none disabled:pointer-events-none disabled:opacity-50\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n        <ChevronDownIcon\n          data-slot=\"accordion-trigger-icon\"\n          className=\"pointer-events-none shrink-0 group-aria-expanded/accordion-trigger:hidden\"\n        />\n        <ChevronUpIcon\n          data-slot=\"accordion-trigger-icon\"\n          className=\"pointer-events-none hidden shrink-0 group-aria-expanded/accordion-trigger:inline\"\n        />\n      </AccordionPrimitive.Trigger>\n    </AccordionPrimitive.Header>\n  );\n}\n\nfunction AccordionContent({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof AccordionPrimitive.Content>) {\n  return (\n    <AccordionPrimitive.Content\n      data-slot=\"accordion-content\"\n      className=\"data-open:animate-accordion-down data-closed:animate-accordion-up text-sm overflow-hidden\"\n      {...props}\n    >\n      <div\n        className={cn(\n          \"pt-0 pb-2.5 [&_a]:hover:text-foreground h-(--radix-accordion-content-height) [&_a]:underline [&_a]:underline-offset-3 [&_p:not(:last-child)]:mb-4\",\n          className,\n        )}\n      >\n        {children}\n      </div>\n    </AccordionPrimitive.Content>\n  );\n}\n\nexport { Accordion, AccordionItem, AccordionTrigger, AccordionContent };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/alert-dialog.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { AlertDialog as AlertDialogPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\n\nfunction AlertDialog({\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Root>) {\n  return <AlertDialogPrimitive.Root data-slot=\"alert-dialog\" {...props} />;\n}\n\nfunction AlertDialogTrigger({\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Trigger>) {\n  return (\n    <AlertDialogPrimitive.Trigger data-slot=\"alert-dialog-trigger\" {...props} />\n  );\n}\n\nfunction AlertDialogPortal({\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Portal>) {\n  return (\n    <AlertDialogPrimitive.Portal data-slot=\"alert-dialog-portal\" {...props} />\n  );\n}\n\nfunction AlertDialogOverlay({\n  className,\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Overlay>) {\n  return (\n    <AlertDialogPrimitive.Overlay\n      data-slot=\"alert-dialog-overlay\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogContent({\n  className,\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Content> & {\n  size?: \"default\" | \"sm\";\n}) {\n  return (\n    <AlertDialogPortal>\n      <AlertDialogOverlay />\n      <AlertDialogPrimitive.Content\n        data-slot=\"alert-dialog-content\"\n        data-size={size}\n        className={cn(\n          \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 bg-background ring-foreground/10 gap-4 rounded-xl p-4 ring-1 duration-100 data-[size=default]:max-w-xs data-[size=sm]:max-w-xs data-[size=default]:sm:max-w-sm group/alert-dialog-content fixed top-1/2 left-1/2 z-50 grid w-full -translate-x-1/2 -translate-y-1/2 outline-none\",\n          className,\n        )}\n        {...props}\n      />\n    </AlertDialogPortal>\n  );\n}\n\nfunction AlertDialogHeader({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-dialog-header\"\n      className={cn(\n        \"grid grid-rows-[auto_1fr] place-items-center gap-1.5 text-center has-data-[slot=alert-dialog-media]:grid-rows-[auto_auto_1fr] has-data-[slot=alert-dialog-media]:gap-x-4 sm:group-data-[size=default]/alert-dialog-content:place-items-start sm:group-data-[size=default]/alert-dialog-content:text-left sm:group-data-[size=default]/alert-dialog-content:has-data-[slot=alert-dialog-media]:grid-rows-[auto_1fr]\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogFooter({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-dialog-footer\"\n      className={cn(\n        \"bg-muted/50 -mx-4 -mb-4 rounded-b-xl border-t p-4 flex flex-col-reverse gap-2 group-data-[size=sm]/alert-dialog-content:grid group-data-[size=sm]/alert-dialog-content:grid-cols-2 sm:flex-row sm:justify-end\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogMedia({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-dialog-media\"\n      className={cn(\n        \"bg-muted mb-2 inline-flex size-10 items-center justify-center rounded-md sm:group-data-[size=default]/alert-dialog-content:row-span-2 *:[svg:not([class*='size-'])]:size-6\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogTitle({\n  className,\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Title>) {\n  return (\n    <AlertDialogPrimitive.Title\n      data-slot=\"alert-dialog-title\"\n      className={cn(\n        \"text-base font-medium sm:group-data-[size=default]/alert-dialog-content:group-has-data-[slot=alert-dialog-media]/alert-dialog-content:col-start-2\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogDescription({\n  className,\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Description>) {\n  return (\n    <AlertDialogPrimitive.Description\n      data-slot=\"alert-dialog-description\"\n      className={cn(\n        \"text-muted-foreground *:[a]:hover:text-foreground text-sm text-balance md:text-pretty *:[a]:underline *:[a]:underline-offset-3\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDialogAction({\n  className,\n  variant = \"default\",\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Action> &\n  Pick<React.ComponentProps<typeof Button>, \"variant\" | \"size\">) {\n  return (\n    <Button variant={variant} size={size} asChild>\n      <AlertDialogPrimitive.Action\n        data-slot=\"alert-dialog-action\"\n        className={cn(className)}\n        {...props}\n      />\n    </Button>\n  );\n}\n\nfunction AlertDialogCancel({\n  className,\n  variant = \"outline\",\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof AlertDialogPrimitive.Cancel> &\n  Pick<React.ComponentProps<typeof Button>, \"variant\" | \"size\">) {\n  return (\n    <Button variant={variant} size={size} asChild>\n      <AlertDialogPrimitive.Cancel\n        data-slot=\"alert-dialog-cancel\"\n        className={cn(className)}\n        {...props}\n      />\n    </Button>\n  );\n}\n\nexport {\n  AlertDialog,\n  AlertDialogAction,\n  AlertDialogCancel,\n  AlertDialogContent,\n  AlertDialogDescription,\n  AlertDialogFooter,\n  AlertDialogHeader,\n  AlertDialogMedia,\n  AlertDialogOverlay,\n  AlertDialogPortal,\n  AlertDialogTitle,\n  AlertDialogTrigger,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/alert.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst alertVariants = cva(\n  \"grid gap-0.5 rounded-lg border px-2.5 py-2 text-left text-sm has-data-[slot=alert-action]:relative has-data-[slot=alert-action]:pr-18 has-[>svg]:grid-cols-[auto_1fr] has-[>svg]:gap-x-2 *:[svg]:row-span-2 *:[svg]:translate-y-0.5 *:[svg]:text-current *:[svg:not([class*='size-'])]:size-4 w-full relative group/alert\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-card text-card-foreground\",\n        destructive:\n          \"text-destructive bg-card *:data-[slot=alert-description]:text-destructive/90 *:[svg]:text-current\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  },\n);\n\nfunction Alert({\n  className,\n  variant,\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof alertVariants>) {\n  return (\n    <div\n      data-slot=\"alert\"\n      role=\"alert\"\n      className={cn(alertVariants({ variant }), className)}\n      {...props}\n    />\n  );\n}\n\nfunction AlertTitle({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-title\"\n      className={cn(\n        \"font-medium group-has-[>svg]/alert:col-start-2 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertDescription({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-description\"\n      className={cn(\n        \"text-muted-foreground text-sm text-balance md:text-pretty [&_p:not(:last-child)]:mb-4 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AlertAction({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"alert-action\"\n      className={cn(\"absolute top-2 right-2\", className)}\n      {...props}\n    />\n  );\n}\n\nexport { Alert, AlertTitle, AlertDescription, AlertAction };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/aspect-ratio.tsx",
    "content": "\"use client\";\n\nimport { AspectRatio as AspectRatioPrimitive } from \"radix-ui\";\n\nfunction AspectRatio({\n  ...props\n}: React.ComponentProps<typeof AspectRatioPrimitive.Root>) {\n  return <AspectRatioPrimitive.Root data-slot=\"aspect-ratio\" {...props} />;\n}\n\nexport { AspectRatio };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/avatar.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Avatar as AvatarPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Avatar({\n  className,\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof AvatarPrimitive.Root> & {\n  size?: \"default\" | \"sm\" | \"lg\";\n}) {\n  return (\n    <AvatarPrimitive.Root\n      data-slot=\"avatar\"\n      data-size={size}\n      className={cn(\n        \"size-8 rounded-full after:rounded-full data-[size=lg]:size-10 data-[size=sm]:size-6 after:border-border group/avatar relative flex shrink-0 select-none after:absolute after:inset-0 after:border after:mix-blend-darken dark:after:mix-blend-lighten\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AvatarImage({\n  className,\n  ...props\n}: React.ComponentProps<typeof AvatarPrimitive.Image>) {\n  return (\n    <AvatarPrimitive.Image\n      data-slot=\"avatar-image\"\n      className={cn(\n        \"rounded-full aspect-square size-full object-cover\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AvatarFallback({\n  className,\n  ...props\n}: React.ComponentProps<typeof AvatarPrimitive.Fallback>) {\n  return (\n    <AvatarPrimitive.Fallback\n      data-slot=\"avatar-fallback\"\n      className={cn(\n        \"bg-muted text-muted-foreground rounded-full flex size-full items-center justify-center text-sm group-data-[size=sm]/avatar:text-xs\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AvatarBadge({ className, ...props }: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"avatar-badge\"\n      className={cn(\n        \"bg-primary text-primary-foreground ring-background absolute right-0 bottom-0 z-10 inline-flex items-center justify-center rounded-full bg-blend-color ring-2 select-none\",\n        \"group-data-[size=sm]/avatar:size-2 group-data-[size=sm]/avatar:[&>svg]:hidden\",\n        \"group-data-[size=default]/avatar:size-2.5 group-data-[size=default]/avatar:[&>svg]:size-2\",\n        \"group-data-[size=lg]/avatar:size-3 group-data-[size=lg]/avatar:[&>svg]:size-2\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AvatarGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"avatar-group\"\n      className={cn(\n        \"*:data-[slot=avatar]:ring-background group/avatar-group flex -space-x-2 *:data-[slot=avatar]:ring-2\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction AvatarGroupCount({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"avatar-group-count\"\n      className={cn(\n        \"bg-muted text-muted-foreground size-8 rounded-full text-sm group-has-data-[size=lg]/avatar-group:size-10 group-has-data-[size=sm]/avatar-group:size-6 [&>svg]:size-4 group-has-data-[size=lg]/avatar-group:[&>svg]:size-5 group-has-data-[size=sm]/avatar-group:[&>svg]:size-3 ring-background relative flex shrink-0 items-center justify-center ring-2\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Avatar,\n  AvatarImage,\n  AvatarFallback,\n  AvatarGroup,\n  AvatarGroupCount,\n  AvatarBadge,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/badge.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst badgeVariants = cva(\n  \"h-5 gap-1 rounded-4xl border border-transparent px-2 py-0.5 text-xs font-medium transition-all has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&>svg]:size-3! inline-flex items-center justify-center w-fit whitespace-nowrap shrink-0 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive overflow-hidden group/badge\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-primary text-primary-foreground [a]:hover:bg-primary/80\",\n        secondary:\n          \"bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80\",\n        destructive:\n          \"bg-destructive/10 [a]:hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 text-destructive dark:bg-destructive/20\",\n        outline:\n          \"border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground\",\n        ghost:\n          \"hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50\",\n        link: \"text-primary underline-offset-4 hover:underline\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  },\n);\n\nfunction Badge({\n  className,\n  variant = \"default\",\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"span\"> &\n  VariantProps<typeof badgeVariants> & { asChild?: boolean }) {\n  const Comp = asChild ? Slot.Root : \"span\";\n\n  return (\n    <Comp\n      data-slot=\"badge\"\n      data-variant={variant}\n      className={cn(badgeVariants({ variant }), className)}\n      {...props}\n    />\n  );\n}\n\nexport { Badge, badgeVariants };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/breadcrumb.tsx",
    "content": "import * as React from \"react\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronRightIcon, MoreHorizontalIcon } from \"lucide-react\";\n\nfunction Breadcrumb({ className, ...props }: React.ComponentProps<\"nav\">) {\n  return (\n    <nav\n      aria-label=\"breadcrumb\"\n      data-slot=\"breadcrumb\"\n      className={cn(className)}\n      {...props}\n    />\n  );\n}\n\nfunction BreadcrumbList({ className, ...props }: React.ComponentProps<\"ol\">) {\n  return (\n    <ol\n      data-slot=\"breadcrumb-list\"\n      className={cn(\n        \"text-muted-foreground gap-1.5 text-sm flex flex-wrap items-center break-words\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction BreadcrumbItem({ className, ...props }: React.ComponentProps<\"li\">) {\n  return (\n    <li\n      data-slot=\"breadcrumb-item\"\n      className={cn(\"gap-1 inline-flex items-center\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction BreadcrumbLink({\n  asChild,\n  className,\n  ...props\n}: React.ComponentProps<\"a\"> & {\n  asChild?: boolean;\n}) {\n  const Comp = asChild ? Slot.Root : \"a\";\n\n  return (\n    <Comp\n      data-slot=\"breadcrumb-link\"\n      className={cn(\"hover:text-foreground transition-colors\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction BreadcrumbPage({ className, ...props }: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"breadcrumb-page\"\n      role=\"link\"\n      aria-disabled=\"true\"\n      aria-current=\"page\"\n      className={cn(\"text-foreground font-normal\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction BreadcrumbSeparator({\n  children,\n  className,\n  ...props\n}: React.ComponentProps<\"li\">) {\n  return (\n    <li\n      data-slot=\"breadcrumb-separator\"\n      role=\"presentation\"\n      aria-hidden=\"true\"\n      className={cn(\"[&>svg]:size-3.5\", className)}\n      {...props}\n    >\n      {children ?? <ChevronRightIcon />}\n    </li>\n  );\n}\n\nfunction BreadcrumbEllipsis({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"breadcrumb-ellipsis\"\n      role=\"presentation\"\n      aria-hidden=\"true\"\n      className={cn(\n        \"size-5 [&>svg]:size-4 flex items-center justify-center\",\n        className,\n      )}\n      {...props}\n    >\n      <MoreHorizontalIcon />\n      <span className=\"sr-only\">More</span>\n    </span>\n  );\n}\n\nexport {\n  Breadcrumb,\n  BreadcrumbList,\n  BreadcrumbItem,\n  BreadcrumbLink,\n  BreadcrumbPage,\n  BreadcrumbSeparator,\n  BreadcrumbEllipsis,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/button-group.tsx",
    "content": "import { cva, type VariantProps } from \"class-variance-authority\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Separator } from \"@/components/ui/separator\";\n\nconst buttonGroupVariants = cva(\n  \"has-[>[data-slot=button-group]]:gap-2 has-[select[aria-hidden=true]:last-child]:[&>[data-slot=select-trigger]:last-of-type]:rounded-r-lg flex w-fit items-stretch [&>*]:focus-visible:z-10 [&>*]:focus-visible:relative [&>[data-slot=select-trigger]:not([class*='w-'])]:w-fit [&>input]:flex-1\",\n  {\n    variants: {\n      orientation: {\n        horizontal:\n          \"[&>[data-slot]:not(:has(~[data-slot]))]:rounded-r-lg! [&>*:not(:first-child)]:rounded-l-none [&>*:not(:first-child)]:border-l-0 [&>*:not(:last-child)]:rounded-r-none\",\n        vertical:\n          \"[&>[data-slot]:not(:has(~[data-slot]))]:rounded-b-lg! flex-col [&>*:not(:first-child)]:rounded-t-none [&>*:not(:first-child)]:border-t-0 [&>*:not(:last-child)]:rounded-b-none\",\n      },\n    },\n    defaultVariants: {\n      orientation: \"horizontal\",\n    },\n  },\n);\n\nfunction ButtonGroup({\n  className,\n  orientation,\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof buttonGroupVariants>) {\n  return (\n    <div\n      role=\"group\"\n      data-slot=\"button-group\"\n      data-orientation={orientation}\n      className={cn(buttonGroupVariants({ orientation }), className)}\n      {...props}\n    />\n  );\n}\n\nfunction ButtonGroupText({\n  className,\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  asChild?: boolean;\n}) {\n  const Comp = asChild ? Slot.Root : \"div\";\n\n  return (\n    <Comp\n      className={cn(\n        \"bg-muted gap-2 rounded-lg border px-2.5 text-sm font-medium [&_svg:not([class*='size-'])]:size-4 flex items-center [&_svg]:pointer-events-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ButtonGroupSeparator({\n  className,\n  orientation = \"vertical\",\n  ...props\n}: React.ComponentProps<typeof Separator>) {\n  return (\n    <Separator\n      data-slot=\"button-group-separator\"\n      orientation={orientation}\n      className={cn(\n        \"bg-input relative self-stretch data-[orientation=horizontal]:mx-px data-[orientation=horizontal]:w-auto data-[orientation=vertical]:my-px data-[orientation=vertical]:h-auto\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  ButtonGroup,\n  ButtonGroupSeparator,\n  ButtonGroupText,\n  buttonGroupVariants,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/button.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst buttonVariants = cva(\n  \"focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 rounded-lg border border-transparent bg-clip-padding text-sm font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] [&_svg:not([class*='size-'])]:size-4 inline-flex items-center justify-center whitespace-nowrap transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none shrink-0 [&_svg]:shrink-0 outline-none group/button select-none\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-primary text-primary-foreground [a]:hover:bg-primary/80\",\n        outline:\n          \"border-border bg-background hover:bg-muted hover:text-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50 aria-expanded:bg-muted aria-expanded:text-foreground\",\n        secondary:\n          \"bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground\",\n        ghost:\n          \"hover:bg-muted hover:text-foreground dark:hover:bg-muted/50 aria-expanded:bg-muted aria-expanded:text-foreground\",\n        destructive:\n          \"bg-destructive/10 hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/20 text-destructive focus-visible:border-destructive/40 dark:hover:bg-destructive/30\",\n        link: \"text-primary underline-offset-4 hover:underline\",\n      },\n      size: {\n        default:\n          \"h-8 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2\",\n        xs: \"h-6 gap-1 rounded-[min(var(--radius-md),10px)] px-2 text-xs in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3\",\n        sm: \"h-7 gap-1 rounded-[min(var(--radius-md),12px)] px-2.5 text-[0.8rem] in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3.5\",\n        lg: \"h-9 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-3 has-data-[icon=inline-start]:pl-3\",\n        icon: \"size-8\",\n        \"icon-xs\":\n          \"size-6 rounded-[min(var(--radius-md),10px)] in-data-[slot=button-group]:rounded-lg [&_svg:not([class*='size-'])]:size-3\",\n        \"icon-sm\":\n          \"size-7 rounded-[min(var(--radius-md),12px)] in-data-[slot=button-group]:rounded-lg\",\n        \"icon-lg\": \"size-9\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n      size: \"default\",\n    },\n  },\n);\n\nfunction Button({\n  className,\n  variant = \"default\",\n  size = \"default\",\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"button\"> &\n  VariantProps<typeof buttonVariants> & {\n    asChild?: boolean;\n  }) {\n  const Comp = asChild ? Slot.Root : \"button\";\n\n  return (\n    <Comp\n      data-slot=\"button\"\n      data-variant={variant}\n      data-size={size}\n      className={cn(buttonVariants({ variant, size, className }))}\n      {...props}\n    />\n  );\n}\n\nexport { Button, buttonVariants };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/calendar.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport {\n  DayPicker,\n  getDefaultClassNames,\n  type DayButton,\n} from \"react-day-picker\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button, buttonVariants } from \"@/components/ui/button\";\nimport {\n  ChevronLeftIcon,\n  ChevronRightIcon,\n  ChevronDownIcon,\n} from \"lucide-react\";\n\nfunction Calendar({\n  className,\n  classNames,\n  showOutsideDays = true,\n  captionLayout = \"label\",\n  buttonVariant = \"ghost\",\n  formatters,\n  components,\n  ...props\n}: React.ComponentProps<typeof DayPicker> & {\n  buttonVariant?: React.ComponentProps<typeof Button>[\"variant\"];\n}) {\n  const defaultClassNames = getDefaultClassNames();\n\n  return (\n    <DayPicker\n      showOutsideDays={showOutsideDays}\n      className={cn(\n        \"p-2 [--cell-radius:var(--radius-md)] [--cell-size:--spacing(7)] bg-background group/calendar [[data-slot=card-content]_&]:bg-transparent [[data-slot=popover-content]_&]:bg-transparent\",\n        String.raw`rtl:**:[.rdp-button\\_next>svg]:rotate-180`,\n        String.raw`rtl:**:[.rdp-button\\_previous>svg]:rotate-180`,\n        className,\n      )}\n      captionLayout={captionLayout}\n      formatters={{\n        formatMonthDropdown: (date) =>\n          date.toLocaleString(\"default\", { month: \"short\" }),\n        ...formatters,\n      }}\n      classNames={{\n        root: cn(\"w-fit\", defaultClassNames.root),\n        months: cn(\n          \"flex gap-4 flex-col md:flex-row relative\",\n          defaultClassNames.months,\n        ),\n        month: cn(\"flex flex-col w-full gap-4\", defaultClassNames.month),\n        nav: cn(\n          \"flex items-center gap-1 w-full absolute top-0 inset-x-0 justify-between\",\n          defaultClassNames.nav,\n        ),\n        button_previous: cn(\n          buttonVariants({ variant: buttonVariant }),\n          \"size-(--cell-size) aria-disabled:opacity-50 p-0 select-none\",\n          defaultClassNames.button_previous,\n        ),\n        button_next: cn(\n          buttonVariants({ variant: buttonVariant }),\n          \"size-(--cell-size) aria-disabled:opacity-50 p-0 select-none\",\n          defaultClassNames.button_next,\n        ),\n        month_caption: cn(\n          \"flex items-center justify-center h-(--cell-size) w-full px-(--cell-size)\",\n          defaultClassNames.month_caption,\n        ),\n        dropdowns: cn(\n          \"w-full flex items-center text-sm font-medium justify-center h-(--cell-size) gap-1.5\",\n          defaultClassNames.dropdowns,\n        ),\n        dropdown_root: cn(\n          \"relative cn-calendar-dropdown-root rounded-(--cell-radius)\",\n          defaultClassNames.dropdown_root,\n        ),\n        dropdown: cn(\n          \"absolute bg-popover inset-0 opacity-0\",\n          defaultClassNames.dropdown,\n        ),\n        caption_label: cn(\n          \"select-none font-medium\",\n          captionLayout === \"label\"\n            ? \"text-sm\"\n            : \"cn-calendar-caption-label rounded-(--cell-radius) flex items-center gap-1 text-sm  [&>svg]:text-muted-foreground [&>svg]:size-3.5\",\n          defaultClassNames.caption_label,\n        ),\n        table: \"w-full border-collapse\",\n        weekdays: cn(\"flex\", defaultClassNames.weekdays),\n        weekday: cn(\n          \"text-muted-foreground rounded-(--cell-radius) flex-1 font-normal text-[0.8rem] select-none\",\n          defaultClassNames.weekday,\n        ),\n        week: cn(\"flex w-full mt-2\", defaultClassNames.week),\n        week_number_header: cn(\n          \"select-none w-(--cell-size)\",\n          defaultClassNames.week_number_header,\n        ),\n        week_number: cn(\n          \"text-[0.8rem] select-none text-muted-foreground\",\n          defaultClassNames.week_number,\n        ),\n        day: cn(\n          \"relative w-full rounded-(--cell-radius) h-full p-0 text-center [&:last-child[data-selected=true]_button]:rounded-r-(--cell-radius) group/day aspect-square select-none\",\n          props.showWeekNumber\n            ? \"[&:nth-child(2)[data-selected=true]_button]:rounded-l-(--cell-radius)\"\n            : \"[&:first-child[data-selected=true]_button]:rounded-l-(--cell-radius)\",\n          defaultClassNames.day,\n        ),\n        range_start: cn(\n          \"rounded-l-(--cell-radius) bg-muted relative after:bg-muted after:absolute after:inset-y-0 after:w-4 after:right-0 -z-0 isolate\",\n          defaultClassNames.range_start,\n        ),\n        range_middle: cn(\"rounded-none\", defaultClassNames.range_middle),\n        range_end: cn(\n          \"rounded-r-(--cell-radius) bg-muted relative after:bg-muted-200 after:absolute after:inset-y-0 after:w-4 after:left-0 -z-0 isolate\",\n          defaultClassNames.range_end,\n        ),\n        today: cn(\n          \"bg-muted text-foreground rounded-(--cell-radius) data-[selected=true]:rounded-none\",\n          defaultClassNames.today,\n        ),\n        outside: cn(\n          \"text-muted-foreground aria-selected:text-muted-foreground\",\n          defaultClassNames.outside,\n        ),\n        disabled: cn(\n          \"text-muted-foreground opacity-50\",\n          defaultClassNames.disabled,\n        ),\n        hidden: cn(\"invisible\", defaultClassNames.hidden),\n        ...classNames,\n      }}\n      components={{\n        Root: ({ className, rootRef, ...props }) => {\n          return (\n            <div\n              data-slot=\"calendar\"\n              ref={rootRef}\n              className={cn(className)}\n              {...props}\n            />\n          );\n        },\n        Chevron: ({ className, orientation, ...props }) => {\n          if (orientation === \"left\") {\n            return (\n              <ChevronLeftIcon className={cn(\"size-4\", className)} {...props} />\n            );\n          }\n\n          if (orientation === \"right\") {\n            return (\n              <ChevronRightIcon\n                className={cn(\"size-4\", className)}\n                {...props}\n              />\n            );\n          }\n\n          return (\n            <ChevronDownIcon className={cn(\"size-4\", className)} {...props} />\n          );\n        },\n        DayButton: CalendarDayButton,\n        WeekNumber: ({ children, ...props }) => {\n          return (\n            <td {...props}>\n              <div className=\"flex size-(--cell-size) items-center justify-center text-center\">\n                {children}\n              </div>\n            </td>\n          );\n        },\n        ...components,\n      }}\n      {...props}\n    />\n  );\n}\n\nfunction CalendarDayButton({\n  className,\n  day,\n  modifiers,\n  ...props\n}: React.ComponentProps<typeof DayButton>) {\n  const defaultClassNames = getDefaultClassNames();\n\n  const ref = React.useRef<HTMLButtonElement>(null);\n  React.useEffect(() => {\n    if (modifiers.focused) ref.current?.focus();\n  }, [modifiers.focused]);\n\n  return (\n    <Button\n      ref={ref}\n      variant=\"ghost\"\n      size=\"icon\"\n      data-day={day.date.toLocaleDateString()}\n      data-selected-single={\n        modifiers.selected &&\n        !modifiers.range_start &&\n        !modifiers.range_end &&\n        !modifiers.range_middle\n      }\n      data-range-start={modifiers.range_start}\n      data-range-end={modifiers.range_end}\n      data-range-middle={modifiers.range_middle}\n      className={cn(\n        \"data-[selected-single=true]:bg-primary data-[selected-single=true]:text-primary-foreground data-[range-middle=true]:bg-muted data-[range-middle=true]:text-foreground data-[range-start=true]:bg-primary data-[range-start=true]:text-primary-foreground data-[range-end=true]:bg-primary data-[range-end=true]:text-primary-foreground group-data-[focused=true]/day:border-ring group-data-[focused=true]/day:ring-ring/50 dark:hover:text-foreground relative isolate z-10 flex aspect-square size-auto w-full min-w-(--cell-size) flex-col gap-1 border-0 leading-none font-normal group-data-[focused=true]/day:relative group-data-[focused=true]/day:z-10 group-data-[focused=true]/day:ring-[3px] data-[range-end=true]:rounded-(--cell-radius) data-[range-end=true]:rounded-r-(--cell-radius) data-[range-middle=true]:rounded-none data-[range-start=true]:rounded-(--cell-radius) data-[range-start=true]:rounded-l-(--cell-radius) [&>span]:text-xs [&>span]:opacity-70\",\n        defaultClassNames.day,\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport { Calendar, CalendarDayButton };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/card.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Card({\n  className,\n  size = \"default\",\n  ...props\n}: React.ComponentProps<\"div\"> & { size?: \"default\" | \"sm\" }) {\n  return (\n    <div\n      data-slot=\"card\"\n      data-size={size}\n      className={cn(\n        \"ring-foreground/10 bg-card text-card-foreground gap-4 overflow-hidden rounded-xl py-4 text-sm ring-1 has-data-[slot=card-footer]:pb-0 has-[>img:first-child]:pt-0 data-[size=sm]:gap-3 data-[size=sm]:py-3 data-[size=sm]:has-data-[slot=card-footer]:pb-0 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl group/card flex flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CardHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-header\"\n      className={cn(\n        \"gap-1 rounded-t-xl px-4 group-data-[size=sm]/card:px-3 [.border-b]:pb-4 group-data-[size=sm]/card:[.border-b]:pb-3 group/card-header @container/card-header grid auto-rows-min items-start has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto]\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CardTitle({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-title\"\n      className={cn(\n        \"text-base leading-snug font-medium group-data-[size=sm]/card:text-sm\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CardDescription({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-description\"\n      className={cn(\"text-muted-foreground text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction CardAction({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-action\"\n      className={cn(\n        \"col-start-2 row-span-2 row-start-1 self-start justify-self-end\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CardContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-content\"\n      className={cn(\"px-4 group-data-[size=sm]/card:px-3\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction CardFooter({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"card-footer\"\n      className={cn(\n        \"bg-muted/50 rounded-b-xl border-t p-4 group-data-[size=sm]/card:p-3 flex items-center\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Card,\n  CardHeader,\n  CardFooter,\n  CardTitle,\n  CardAction,\n  CardDescription,\n  CardContent,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/carousel.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport useEmblaCarousel, {\n  type UseEmblaCarouselType,\n} from \"embla-carousel-react\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport { ChevronLeftIcon, ChevronRightIcon } from \"lucide-react\";\n\ntype CarouselApi = UseEmblaCarouselType[1];\ntype UseCarouselParameters = Parameters<typeof useEmblaCarousel>;\ntype CarouselOptions = UseCarouselParameters[0];\ntype CarouselPlugin = UseCarouselParameters[1];\n\ntype CarouselProps = {\n  opts?: CarouselOptions;\n  plugins?: CarouselPlugin;\n  orientation?: \"horizontal\" | \"vertical\";\n  setApi?: (api: CarouselApi) => void;\n};\n\ntype CarouselContextProps = {\n  carouselRef: ReturnType<typeof useEmblaCarousel>[0];\n  api: ReturnType<typeof useEmblaCarousel>[1];\n  scrollPrev: () => void;\n  scrollNext: () => void;\n  canScrollPrev: boolean;\n  canScrollNext: boolean;\n} & CarouselProps;\n\nconst CarouselContext = React.createContext<CarouselContextProps | null>(null);\n\nfunction useCarousel() {\n  const context = React.useContext(CarouselContext);\n\n  if (!context) {\n    throw new Error(\"useCarousel must be used within a <Carousel />\");\n  }\n\n  return context;\n}\n\nfunction Carousel({\n  orientation = \"horizontal\",\n  opts,\n  setApi,\n  plugins,\n  className,\n  children,\n  ...props\n}: React.ComponentProps<\"div\"> & CarouselProps) {\n  const [carouselRef, api] = useEmblaCarousel(\n    {\n      ...opts,\n      axis: orientation === \"horizontal\" ? \"x\" : \"y\",\n    },\n    plugins,\n  );\n  const [canScrollPrev, setCanScrollPrev] = React.useState(false);\n  const [canScrollNext, setCanScrollNext] = React.useState(false);\n\n  const onSelect = React.useCallback((api: CarouselApi) => {\n    if (!api) return;\n    setCanScrollPrev(api.canScrollPrev());\n    setCanScrollNext(api.canScrollNext());\n  }, []);\n\n  const scrollPrev = React.useCallback(() => {\n    api?.scrollPrev();\n  }, [api]);\n\n  const scrollNext = React.useCallback(() => {\n    api?.scrollNext();\n  }, [api]);\n\n  const handleKeyDown = React.useCallback(\n    (event: React.KeyboardEvent<HTMLDivElement>) => {\n      if (event.key === \"ArrowLeft\") {\n        event.preventDefault();\n        scrollPrev();\n      } else if (event.key === \"ArrowRight\") {\n        event.preventDefault();\n        scrollNext();\n      }\n    },\n    [scrollPrev, scrollNext],\n  );\n\n  React.useEffect(() => {\n    if (!api || !setApi) return;\n    setApi(api);\n  }, [api, setApi]);\n\n  React.useEffect(() => {\n    if (!api) return;\n    onSelect(api);\n    api.on(\"reInit\", onSelect);\n    api.on(\"select\", onSelect);\n\n    return () => {\n      api?.off(\"select\", onSelect);\n    };\n  }, [api, onSelect]);\n\n  return (\n    <CarouselContext.Provider\n      value={{\n        carouselRef,\n        api: api,\n        opts,\n        orientation:\n          orientation || (opts?.axis === \"y\" ? \"vertical\" : \"horizontal\"),\n        scrollPrev,\n        scrollNext,\n        canScrollPrev,\n        canScrollNext,\n      }}\n    >\n      <div\n        onKeyDownCapture={handleKeyDown}\n        className={cn(\"relative\", className)}\n        role=\"region\"\n        aria-roledescription=\"carousel\"\n        data-slot=\"carousel\"\n        {...props}\n      >\n        {children}\n      </div>\n    </CarouselContext.Provider>\n  );\n}\n\nfunction CarouselContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  const { carouselRef, orientation } = useCarousel();\n\n  return (\n    <div\n      ref={carouselRef}\n      className=\"overflow-hidden\"\n      data-slot=\"carousel-content\"\n    >\n      <div\n        className={cn(\n          \"flex\",\n          orientation === \"horizontal\" ? \"-ml-4\" : \"-mt-4 flex-col\",\n          className,\n        )}\n        {...props}\n      />\n    </div>\n  );\n}\n\nfunction CarouselItem({ className, ...props }: React.ComponentProps<\"div\">) {\n  const { orientation } = useCarousel();\n\n  return (\n    <div\n      role=\"group\"\n      aria-roledescription=\"slide\"\n      data-slot=\"carousel-item\"\n      className={cn(\n        \"min-w-0 shrink-0 grow-0 basis-full\",\n        orientation === \"horizontal\" ? \"pl-4\" : \"pt-4\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CarouselPrevious({\n  className,\n  variant = \"outline\",\n  size = \"icon-sm\",\n  ...props\n}: React.ComponentProps<typeof Button>) {\n  const { orientation, scrollPrev, canScrollPrev } = useCarousel();\n\n  return (\n    <Button\n      data-slot=\"carousel-previous\"\n      variant={variant}\n      size={size}\n      className={cn(\n        \"rounded-full absolute touch-manipulation\",\n        orientation === \"horizontal\"\n          ? \"top-1/2 -left-12 -translate-y-1/2\"\n          : \"-top-12 left-1/2 -translate-x-1/2 rotate-90\",\n        className,\n      )}\n      disabled={!canScrollPrev}\n      onClick={scrollPrev}\n      {...props}\n    >\n      <ChevronLeftIcon />\n      <span className=\"sr-only\">Previous slide</span>\n    </Button>\n  );\n}\n\nfunction CarouselNext({\n  className,\n  variant = \"outline\",\n  size = \"icon-sm\",\n  ...props\n}: React.ComponentProps<typeof Button>) {\n  const { orientation, scrollNext, canScrollNext } = useCarousel();\n\n  return (\n    <Button\n      data-slot=\"carousel-next\"\n      variant={variant}\n      size={size}\n      className={cn(\n        \"rounded-full absolute touch-manipulation\",\n        orientation === \"horizontal\"\n          ? \"top-1/2 -right-12 -translate-y-1/2\"\n          : \"-bottom-12 left-1/2 -translate-x-1/2 rotate-90\",\n        className,\n      )}\n      disabled={!canScrollNext}\n      onClick={scrollNext}\n      {...props}\n    >\n      <ChevronRightIcon />\n      <span className=\"sr-only\">Next slide</span>\n    </Button>\n  );\n}\n\nexport {\n  type CarouselApi,\n  Carousel,\n  CarouselContent,\n  CarouselItem,\n  CarouselPrevious,\n  CarouselNext,\n  useCarousel,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/chart.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as RechartsPrimitive from \"recharts\";\n\nimport { cn } from \"@/lib/utils\";\n\n// Format: { THEME_NAME: CSS_SELECTOR }\nconst THEMES = { light: \"\", dark: \".dark\" } as const;\n\nexport type ChartConfig = {\n  [k in string]: {\n    label?: React.ReactNode;\n    icon?: React.ComponentType;\n  } & (\n    | { color?: string; theme?: never }\n    | { color?: never; theme: Record<keyof typeof THEMES, string> }\n  );\n};\n\ntype ChartContextProps = {\n  config: ChartConfig;\n};\n\nconst ChartContext = React.createContext<ChartContextProps | null>(null);\n\nfunction useChart() {\n  const context = React.useContext(ChartContext);\n\n  if (!context) {\n    throw new Error(\"useChart must be used within a <ChartContainer />\");\n  }\n\n  return context;\n}\n\nfunction ChartContainer({\n  id,\n  className,\n  children,\n  config,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  config: ChartConfig;\n  children: React.ComponentProps<\n    typeof RechartsPrimitive.ResponsiveContainer\n  >[\"children\"];\n}) {\n  const uniqueId = React.useId();\n  const chartId = `chart-${id || uniqueId.replace(/:/g, \"\")}`;\n\n  return (\n    <ChartContext.Provider value={{ config }}>\n      <div\n        data-slot=\"chart\"\n        data-chart={chartId}\n        className={cn(\n          \"[&_.recharts-cartesian-axis-tick_text]:fill-muted-foreground [&_.recharts-cartesian-grid_line[stroke='#ccc']]:stroke-border/50 [&_.recharts-curve.recharts-tooltip-cursor]:stroke-border [&_.recharts-polar-grid_[stroke='#ccc']]:stroke-border [&_.recharts-radial-bar-background-sector]:fill-muted [&_.recharts-rectangle.recharts-tooltip-cursor]:fill-muted [&_.recharts-reference-line_[stroke='#ccc']]:stroke-border flex aspect-video justify-center text-xs [&_.recharts-dot[stroke='#fff']]:stroke-transparent [&_.recharts-layer]:outline-hidden [&_.recharts-sector]:outline-hidden [&_.recharts-sector[stroke='#fff']]:stroke-transparent [&_.recharts-surface]:outline-hidden\",\n          className,\n        )}\n        {...props}\n      >\n        <ChartStyle id={chartId} config={config} />\n        <RechartsPrimitive.ResponsiveContainer>\n          {children}\n        </RechartsPrimitive.ResponsiveContainer>\n      </div>\n    </ChartContext.Provider>\n  );\n}\n\nconst ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => {\n  const colorConfig = Object.entries(config).filter(\n    ([, config]) => config.theme || config.color,\n  );\n\n  if (!colorConfig.length) {\n    return null;\n  }\n\n  return (\n    <style\n      dangerouslySetInnerHTML={{\n        __html: Object.entries(THEMES)\n          .map(\n            ([theme, prefix]) => `\n${prefix} [data-chart=${id}] {\n${colorConfig\n  .map(([key, itemConfig]) => {\n    const color =\n      itemConfig.theme?.[theme as keyof typeof itemConfig.theme] ||\n      itemConfig.color;\n    return color ? `  --color-${key}: ${color};` : null;\n  })\n  .join(\"\\n\")}\n}\n`,\n          )\n          .join(\"\\n\"),\n      }}\n    />\n  );\n};\n\nconst ChartTooltip = RechartsPrimitive.Tooltip;\n\nfunction ChartTooltipContent({\n  active,\n  payload,\n  className,\n  indicator = \"dot\",\n  hideLabel = false,\n  hideIndicator = false,\n  label,\n  labelFormatter,\n  labelClassName,\n  formatter,\n  color,\n  nameKey,\n  labelKey,\n}: React.ComponentProps<typeof RechartsPrimitive.Tooltip> &\n  React.ComponentProps<\"div\"> & {\n    hideLabel?: boolean;\n    hideIndicator?: boolean;\n    indicator?: \"line\" | \"dot\" | \"dashed\";\n    nameKey?: string;\n    labelKey?: string;\n  }) {\n  const { config } = useChart();\n\n  const tooltipLabel = React.useMemo(() => {\n    if (hideLabel || !payload?.length) {\n      return null;\n    }\n\n    const [item] = payload;\n    const key = `${labelKey || item?.dataKey || item?.name || \"value\"}`;\n    const itemConfig = getPayloadConfigFromPayload(config, item, key);\n    const value =\n      !labelKey && typeof label === \"string\"\n        ? config[label as keyof typeof config]?.label || label\n        : itemConfig?.label;\n\n    if (labelFormatter) {\n      return (\n        <div className={cn(\"font-medium\", labelClassName)}>\n          {labelFormatter(value, payload)}\n        </div>\n      );\n    }\n\n    if (!value) {\n      return null;\n    }\n\n    return <div className={cn(\"font-medium\", labelClassName)}>{value}</div>;\n  }, [\n    label,\n    labelFormatter,\n    payload,\n    hideLabel,\n    labelClassName,\n    config,\n    labelKey,\n  ]);\n\n  if (!active || !payload?.length) {\n    return null;\n  }\n\n  const nestLabel = payload.length === 1 && indicator !== \"dot\";\n\n  return (\n    <div\n      className={cn(\n        \"border-border/50 bg-background gap-1.5 rounded-lg border px-2.5 py-1.5 text-xs shadow-xl grid min-w-[8rem] items-start\",\n        className,\n      )}\n    >\n      {!nestLabel ? tooltipLabel : null}\n      <div className=\"grid gap-1.5\">\n        {payload\n          .filter((item) => item.type !== \"none\")\n          .map((item, index) => {\n            const key = `${nameKey || item.name || item.dataKey || \"value\"}`;\n            const itemConfig = getPayloadConfigFromPayload(config, item, key);\n            const indicatorColor = color || item.payload.fill || item.color;\n\n            return (\n              <div\n                key={item.dataKey}\n                className={cn(\n                  \"[&>svg]:text-muted-foreground flex w-full flex-wrap items-stretch gap-2 [&>svg]:h-2.5 [&>svg]:w-2.5\",\n                  indicator === \"dot\" && \"items-center\",\n                )}\n              >\n                {formatter && item?.value !== undefined && item.name ? (\n                  formatter(item.value, item.name, item, index, item.payload)\n                ) : (\n                  <>\n                    {itemConfig?.icon ? (\n                      <itemConfig.icon />\n                    ) : (\n                      !hideIndicator && (\n                        <div\n                          className={cn(\n                            \"shrink-0 rounded-[2px] border-(--color-border) bg-(--color-bg)\",\n                            {\n                              \"h-2.5 w-2.5\": indicator === \"dot\",\n                              \"w-1\": indicator === \"line\",\n                              \"w-0 border-[1.5px] border-dashed bg-transparent\":\n                                indicator === \"dashed\",\n                              \"my-0.5\": nestLabel && indicator === \"dashed\",\n                            },\n                          )}\n                          style={\n                            {\n                              \"--color-bg\": indicatorColor,\n                              \"--color-border\": indicatorColor,\n                            } as React.CSSProperties\n                          }\n                        />\n                      )\n                    )}\n                    <div\n                      className={cn(\n                        \"flex flex-1 justify-between leading-none\",\n                        nestLabel ? \"items-end\" : \"items-center\",\n                      )}\n                    >\n                      <div className=\"grid gap-1.5\">\n                        {nestLabel ? tooltipLabel : null}\n                        <span className=\"text-muted-foreground\">\n                          {itemConfig?.label || item.name}\n                        </span>\n                      </div>\n                      {item.value && (\n                        <span className=\"text-foreground font-mono font-medium tabular-nums\">\n                          {item.value.toLocaleString()}\n                        </span>\n                      )}\n                    </div>\n                  </>\n                )}\n              </div>\n            );\n          })}\n      </div>\n    </div>\n  );\n}\n\nconst ChartLegend = RechartsPrimitive.Legend;\n\nfunction ChartLegendContent({\n  className,\n  hideIcon = false,\n  payload,\n  verticalAlign = \"bottom\",\n  nameKey,\n}: React.ComponentProps<\"div\"> &\n  Pick<RechartsPrimitive.LegendProps, \"payload\" | \"verticalAlign\"> & {\n    hideIcon?: boolean;\n    nameKey?: string;\n  }) {\n  const { config } = useChart();\n\n  if (!payload?.length) {\n    return null;\n  }\n\n  return (\n    <div\n      className={cn(\n        \"flex items-center justify-center gap-4\",\n        verticalAlign === \"top\" ? \"pb-3\" : \"pt-3\",\n        className,\n      )}\n    >\n      {payload\n        .filter((item) => item.type !== \"none\")\n        .map((item) => {\n          const key = `${nameKey || item.dataKey || \"value\"}`;\n          const itemConfig = getPayloadConfigFromPayload(config, item, key);\n\n          return (\n            <div\n              key={item.value}\n              className={cn(\n                \"[&>svg]:text-muted-foreground flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3\",\n              )}\n            >\n              {itemConfig?.icon && !hideIcon ? (\n                <itemConfig.icon />\n              ) : (\n                <div\n                  className=\"h-2 w-2 shrink-0 rounded-[2px]\"\n                  style={{\n                    backgroundColor: item.color,\n                  }}\n                />\n              )}\n              {itemConfig?.label}\n            </div>\n          );\n        })}\n    </div>\n  );\n}\n\nfunction getPayloadConfigFromPayload(\n  config: ChartConfig,\n  payload: unknown,\n  key: string,\n) {\n  if (typeof payload !== \"object\" || payload === null) {\n    return undefined;\n  }\n\n  const payloadPayload =\n    \"payload\" in payload &&\n    typeof payload.payload === \"object\" &&\n    payload.payload !== null\n      ? payload.payload\n      : undefined;\n\n  let configLabelKey: string = key;\n\n  if (\n    key in payload &&\n    typeof payload[key as keyof typeof payload] === \"string\"\n  ) {\n    configLabelKey = payload[key as keyof typeof payload] as string;\n  } else if (\n    payloadPayload &&\n    key in payloadPayload &&\n    typeof payloadPayload[key as keyof typeof payloadPayload] === \"string\"\n  ) {\n    configLabelKey = payloadPayload[\n      key as keyof typeof payloadPayload\n    ] as string;\n  }\n\n  return configLabelKey in config\n    ? config[configLabelKey]\n    : config[key as keyof typeof config];\n}\n\nexport {\n  ChartContainer,\n  ChartTooltip,\n  ChartTooltipContent,\n  ChartLegend,\n  ChartLegendContent,\n  ChartStyle,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/checkbox.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Checkbox as CheckboxPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { CheckIcon } from \"lucide-react\";\n\nfunction Checkbox({\n  className,\n  ...props\n}: React.ComponentProps<typeof CheckboxPrimitive.Root>) {\n  return (\n    <CheckboxPrimitive.Root\n      data-slot=\"checkbox\"\n      className={cn(\n        \"border-input dark:bg-input/30 data-checked:bg-primary data-checked:text-primary-foreground dark:data-checked:bg-primary data-checked:border-primary aria-invalid:aria-checked:border-primary aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 flex size-4 items-center justify-center rounded-[4px] border transition-colors group-has-disabled/field:opacity-50 focus-visible:ring-[3px] aria-invalid:ring-[3px] peer relative shrink-0 outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    >\n      <CheckboxPrimitive.Indicator\n        data-slot=\"checkbox-indicator\"\n        className=\"[&>svg]:size-3.5 grid place-content-center text-current transition-none\"\n      >\n        <CheckIcon />\n      </CheckboxPrimitive.Indicator>\n    </CheckboxPrimitive.Root>\n  );\n}\n\nexport { Checkbox };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/collapsible.tsx",
    "content": "\"use client\";\n\nimport { Collapsible as CollapsiblePrimitive } from \"radix-ui\";\n\nfunction Collapsible({\n  ...props\n}: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {\n  return <CollapsiblePrimitive.Root data-slot=\"collapsible\" {...props} />;\n}\n\nfunction CollapsibleTrigger({\n  ...props\n}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleTrigger>) {\n  return (\n    <CollapsiblePrimitive.CollapsibleTrigger\n      data-slot=\"collapsible-trigger\"\n      {...props}\n    />\n  );\n}\n\nfunction CollapsibleContent({\n  ...props\n}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleContent>) {\n  return (\n    <CollapsiblePrimitive.CollapsibleContent\n      data-slot=\"collapsible-content\"\n      {...props}\n    />\n  );\n}\n\nexport { Collapsible, CollapsibleTrigger, CollapsibleContent };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/combobox.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Combobox as ComboboxPrimitive } from \"@base-ui/react\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport {\n  InputGroup,\n  InputGroupAddon,\n  InputGroupButton,\n  InputGroupInput,\n} from \"@/components/ui/input-group\";\nimport { ChevronDownIcon, XIcon, CheckIcon } from \"lucide-react\";\n\nconst Combobox = ComboboxPrimitive.Root;\n\nfunction ComboboxValue({ ...props }: ComboboxPrimitive.Value.Props) {\n  return <ComboboxPrimitive.Value data-slot=\"combobox-value\" {...props} />;\n}\n\nfunction ComboboxTrigger({\n  className,\n  children,\n  ...props\n}: ComboboxPrimitive.Trigger.Props) {\n  return (\n    <ComboboxPrimitive.Trigger\n      data-slot=\"combobox-trigger\"\n      className={cn(\"[&_svg:not([class*='size-'])]:size-4\", className)}\n      {...props}\n    >\n      {children}\n      <ChevronDownIcon className=\"text-muted-foreground size-4 pointer-events-none\" />\n    </ComboboxPrimitive.Trigger>\n  );\n}\n\nfunction ComboboxClear({ className, ...props }: ComboboxPrimitive.Clear.Props) {\n  return (\n    <ComboboxPrimitive.Clear\n      data-slot=\"combobox-clear\"\n      render={<InputGroupButton variant=\"ghost\" size=\"icon-xs\" />}\n      className={cn(className)}\n      {...props}\n    >\n      <XIcon className=\"pointer-events-none\" />\n    </ComboboxPrimitive.Clear>\n  );\n}\n\nfunction ComboboxInput({\n  className,\n  children,\n  disabled = false,\n  showTrigger = true,\n  showClear = false,\n  ...props\n}: ComboboxPrimitive.Input.Props & {\n  showTrigger?: boolean;\n  showClear?: boolean;\n}) {\n  return (\n    <InputGroup className={cn(\"w-auto\", className)}>\n      <ComboboxPrimitive.Input\n        render={<InputGroupInput disabled={disabled} />}\n        {...props}\n      />\n      <InputGroupAddon align=\"inline-end\">\n        {showTrigger && (\n          <InputGroupButton\n            size=\"icon-xs\"\n            variant=\"ghost\"\n            asChild\n            data-slot=\"input-group-button\"\n            className=\"group-has-data-[slot=combobox-clear]/input-group:hidden data-pressed:bg-transparent\"\n            disabled={disabled}\n          >\n            <ComboboxTrigger />\n          </InputGroupButton>\n        )}\n        {showClear && <ComboboxClear disabled={disabled} />}\n      </InputGroupAddon>\n      {children}\n    </InputGroup>\n  );\n}\n\nfunction ComboboxContent({\n  className,\n  side = \"bottom\",\n  sideOffset = 6,\n  align = \"start\",\n  alignOffset = 0,\n  anchor,\n  ...props\n}: ComboboxPrimitive.Popup.Props &\n  Pick<\n    ComboboxPrimitive.Positioner.Props,\n    \"side\" | \"align\" | \"sideOffset\" | \"alignOffset\" | \"anchor\"\n  >) {\n  return (\n    <ComboboxPrimitive.Portal>\n      <ComboboxPrimitive.Positioner\n        side={side}\n        sideOffset={sideOffset}\n        align={align}\n        alignOffset={alignOffset}\n        anchor={anchor}\n        className=\"isolate z-50\"\n      >\n        <ComboboxPrimitive.Popup\n          data-slot=\"combobox-content\"\n          data-chips={!!anchor}\n          className={cn(\n            \"bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 *:data-[slot=input-group]:bg-input/30 *:data-[slot=input-group]:border-input/30 max-h-72 min-w-36 overflow-hidden rounded-lg shadow-md ring-1 duration-100 *:data-[slot=input-group]:m-1 *:data-[slot=input-group]:mb-0 *:data-[slot=input-group]:h-8 *:data-[slot=input-group]:shadow-none group/combobox-content relative max-h-(--available-height) w-(--anchor-width) max-w-(--available-width) min-w-[calc(var(--anchor-width)+--spacing(7))] origin-(--transform-origin) data-[chips=true]:min-w-(--anchor-width)\",\n            className,\n          )}\n          {...props}\n        />\n      </ComboboxPrimitive.Positioner>\n    </ComboboxPrimitive.Portal>\n  );\n}\n\nfunction ComboboxList({ className, ...props }: ComboboxPrimitive.List.Props) {\n  return (\n    <ComboboxPrimitive.List\n      data-slot=\"combobox-list\"\n      className={cn(\n        \"no-scrollbar max-h-[min(calc(--spacing(72)---spacing(9)),calc(var(--available-height)---spacing(9)))] scroll-py-1 overflow-y-auto p-1 data-empty:p-0 overflow-y-auto overscroll-contain\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxItem({\n  className,\n  children,\n  ...props\n}: ComboboxPrimitive.Item.Props) {\n  return (\n    <ComboboxPrimitive.Item\n      data-slot=\"combobox-item\"\n      className={cn(\n        \"data-highlighted:bg-accent data-highlighted:text-accent-foreground not-data-[variant=destructive]:data-highlighted:**:text-accent-foreground gap-2 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex w-full cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <ComboboxPrimitive.ItemIndicator\n        render={\n          <span className=\"pointer-events-none absolute right-2 flex size-4 items-center justify-center\" />\n        }\n      >\n        <CheckIcon className=\"pointer-events-none\" />\n      </ComboboxPrimitive.ItemIndicator>\n    </ComboboxPrimitive.Item>\n  );\n}\n\nfunction ComboboxGroup({ className, ...props }: ComboboxPrimitive.Group.Props) {\n  return (\n    <ComboboxPrimitive.Group\n      data-slot=\"combobox-group\"\n      className={cn(className)}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxLabel({\n  className,\n  ...props\n}: ComboboxPrimitive.GroupLabel.Props) {\n  return (\n    <ComboboxPrimitive.GroupLabel\n      data-slot=\"combobox-label\"\n      className={cn(\"text-muted-foreground px-2 py-1.5 text-xs\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxCollection({ ...props }: ComboboxPrimitive.Collection.Props) {\n  return (\n    <ComboboxPrimitive.Collection data-slot=\"combobox-collection\" {...props} />\n  );\n}\n\nfunction ComboboxEmpty({ className, ...props }: ComboboxPrimitive.Empty.Props) {\n  return (\n    <ComboboxPrimitive.Empty\n      data-slot=\"combobox-empty\"\n      className={cn(\n        \"text-muted-foreground hidden w-full justify-center py-2 text-center text-sm group-data-empty/combobox-content:flex\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxSeparator({\n  className,\n  ...props\n}: ComboboxPrimitive.Separator.Props) {\n  return (\n    <ComboboxPrimitive.Separator\n      data-slot=\"combobox-separator\"\n      className={cn(\"bg-border -mx-1 my-1 h-px\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxChips({\n  className,\n  ...props\n}: React.ComponentPropsWithRef<typeof ComboboxPrimitive.Chips> &\n  ComboboxPrimitive.Chips.Props) {\n  return (\n    <ComboboxPrimitive.Chips\n      data-slot=\"combobox-chips\"\n      className={cn(\n        \"dark:bg-input/30 border-input focus-within:border-ring focus-within:ring-ring/50 has-aria-invalid:ring-destructive/20 dark:has-aria-invalid:ring-destructive/40 has-aria-invalid:border-destructive dark:has-aria-invalid:border-destructive/50 flex min-h-8 flex-wrap items-center gap-1 rounded-lg border bg-transparent bg-clip-padding px-2.5 py-1 text-sm transition-colors focus-within:ring-[3px] has-aria-invalid:ring-[3px] has-data-[slot=combobox-chip]:px-1\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ComboboxChip({\n  className,\n  children,\n  showRemove = true,\n  ...props\n}: ComboboxPrimitive.Chip.Props & {\n  showRemove?: boolean;\n}) {\n  return (\n    <ComboboxPrimitive.Chip\n      data-slot=\"combobox-chip\"\n      className={cn(\n        \"bg-muted text-foreground flex h-[calc(--spacing(5.25))] w-fit items-center justify-center gap-1 rounded-sm px-1.5 text-xs font-medium whitespace-nowrap has-data-[slot=combobox-chip-remove]:pr-0 has-disabled:pointer-events-none has-disabled:cursor-not-allowed has-disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      {showRemove && (\n        <ComboboxPrimitive.ChipRemove\n          render={<Button variant=\"ghost\" size=\"icon-xs\" />}\n          className=\"-ml-1 opacity-50 hover:opacity-100\"\n          data-slot=\"combobox-chip-remove\"\n        >\n          <XIcon className=\"pointer-events-none\" />\n        </ComboboxPrimitive.ChipRemove>\n      )}\n    </ComboboxPrimitive.Chip>\n  );\n}\n\nfunction ComboboxChipsInput({\n  className,\n  ...props\n}: ComboboxPrimitive.Input.Props) {\n  return (\n    <ComboboxPrimitive.Input\n      data-slot=\"combobox-chip-input\"\n      className={cn(\"min-w-16 flex-1 outline-none\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction useComboboxAnchor() {\n  return React.useRef<HTMLDivElement | null>(null);\n}\n\nexport {\n  Combobox,\n  ComboboxInput,\n  ComboboxContent,\n  ComboboxList,\n  ComboboxItem,\n  ComboboxGroup,\n  ComboboxLabel,\n  ComboboxCollection,\n  ComboboxEmpty,\n  ComboboxSeparator,\n  ComboboxChips,\n  ComboboxChip,\n  ComboboxChipsInput,\n  ComboboxTrigger,\n  ComboboxValue,\n  useComboboxAnchor,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/command.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Command as CommandPrimitive } from \"cmdk\";\n\nimport { cn } from \"@/lib/utils\";\nimport {\n  Dialog,\n  DialogContent,\n  DialogDescription,\n  DialogHeader,\n  DialogTitle,\n} from \"@/components/ui/dialog\";\nimport { InputGroup, InputGroupAddon } from \"@/components/ui/input-group\";\nimport { SearchIcon, CheckIcon } from \"lucide-react\";\n\nfunction Command({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive>) {\n  return (\n    <CommandPrimitive\n      data-slot=\"command\"\n      className={cn(\n        \"bg-popover text-popover-foreground rounded-xl! p-1 flex size-full flex-col overflow-hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CommandDialog({\n  title = \"Command Palette\",\n  description = \"Search for a command to run...\",\n  children,\n  className,\n  showCloseButton = false,\n  ...props\n}: React.ComponentProps<typeof Dialog> & {\n  title?: string;\n  description?: string;\n  className?: string;\n  showCloseButton?: boolean;\n}) {\n  return (\n    <Dialog {...props}>\n      <DialogHeader className=\"sr-only\">\n        <DialogTitle>{title}</DialogTitle>\n        <DialogDescription>{description}</DialogDescription>\n      </DialogHeader>\n      <DialogContent\n        className={cn(\n          \"rounded-xl! top-1/3 translate-y-0 overflow-hidden p-0\",\n          className,\n        )}\n        showCloseButton={showCloseButton}\n      >\n        {children}\n      </DialogContent>\n    </Dialog>\n  );\n}\n\nfunction CommandInput({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.Input>) {\n  return (\n    <div data-slot=\"command-input-wrapper\" className=\"p-1 pb-0\">\n      <InputGroup className=\"bg-input/30 border-input/30 h-8! rounded-lg! shadow-none! *:data-[slot=input-group-addon]:pl-2!\">\n        <CommandPrimitive.Input\n          data-slot=\"command-input\"\n          className={cn(\n            \"w-full text-sm outline-hidden disabled:cursor-not-allowed disabled:opacity-50\",\n            className,\n          )}\n          {...props}\n        />\n        <InputGroupAddon>\n          <SearchIcon className=\"size-4 shrink-0 opacity-50\" />\n        </InputGroupAddon>\n      </InputGroup>\n    </div>\n  );\n}\n\nfunction CommandList({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.List>) {\n  return (\n    <CommandPrimitive.List\n      data-slot=\"command-list\"\n      className={cn(\n        \"no-scrollbar max-h-72 scroll-py-1 outline-none overflow-x-hidden overflow-y-auto\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CommandEmpty({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.Empty>) {\n  return (\n    <CommandPrimitive.Empty\n      data-slot=\"command-empty\"\n      className={cn(\"py-6 text-center text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction CommandGroup({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.Group>) {\n  return (\n    <CommandPrimitive.Group\n      data-slot=\"command-group\"\n      className={cn(\n        \"text-foreground [&_[cmdk-group-heading]]:text-muted-foreground overflow-hidden p-1 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction CommandSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.Separator>) {\n  return (\n    <CommandPrimitive.Separator\n      data-slot=\"command-separator\"\n      className={cn(\"bg-border -mx-1 h-px\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction CommandItem({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof CommandPrimitive.Item>) {\n  return (\n    <CommandPrimitive.Item\n      data-slot=\"command-item\"\n      className={cn(\n        \"data-selected:bg-muted data-selected:text-foreground data-selected:*:[svg]:text-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none [&_svg:not([class*='size-'])]:size-4 [[data-slot=dialog-content]_&]:rounded-lg! group/command-item data-[disabled=true]:pointer-events-none data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <CheckIcon className=\"ml-auto opacity-0 group-has-[[data-slot=command-shortcut]]/command-item:hidden group-data-[checked=true]/command-item:opacity-100\" />\n    </CommandPrimitive.Item>\n  );\n}\n\nfunction CommandShortcut({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"command-shortcut\"\n      className={cn(\n        \"text-muted-foreground group-data-selected/command-item:text-foreground ml-auto text-xs tracking-widest\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Command,\n  CommandDialog,\n  CommandInput,\n  CommandList,\n  CommandEmpty,\n  CommandGroup,\n  CommandItem,\n  CommandShortcut,\n  CommandSeparator,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/context-menu.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { ContextMenu as ContextMenuPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronRightIcon, CheckIcon } from \"lucide-react\";\n\nfunction ContextMenu({\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Root>) {\n  return <ContextMenuPrimitive.Root data-slot=\"context-menu\" {...props} />;\n}\n\nfunction ContextMenuTrigger({\n  className,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Trigger>) {\n  return (\n    <ContextMenuPrimitive.Trigger\n      data-slot=\"context-menu-trigger\"\n      className={cn(\"select-none\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuGroup({\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Group>) {\n  return (\n    <ContextMenuPrimitive.Group data-slot=\"context-menu-group\" {...props} />\n  );\n}\n\nfunction ContextMenuPortal({\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Portal>) {\n  return (\n    <ContextMenuPrimitive.Portal data-slot=\"context-menu-portal\" {...props} />\n  );\n}\n\nfunction ContextMenuSub({\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Sub>) {\n  return <ContextMenuPrimitive.Sub data-slot=\"context-menu-sub\" {...props} />;\n}\n\nfunction ContextMenuRadioGroup({\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.RadioGroup>) {\n  return (\n    <ContextMenuPrimitive.RadioGroup\n      data-slot=\"context-menu-radio-group\"\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Content> & {\n  side?: \"top\" | \"right\" | \"bottom\" | \"left\";\n}) {\n  return (\n    <ContextMenuPrimitive.Portal>\n      <ContextMenuPrimitive.Content\n        data-slot=\"context-menu-content\"\n        className={cn(\n          \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-36 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 max-h-(--radix-context-menu-content-available-height) origin-(--radix-context-menu-content-transform-origin) overflow-x-hidden overflow-y-auto\",\n          className,\n        )}\n        {...props}\n      />\n    </ContextMenuPrimitive.Portal>\n  );\n}\n\nfunction ContextMenuItem({\n  className,\n  inset,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Item> & {\n  inset?: boolean;\n  variant?: \"default\" | \"destructive\";\n}) {\n  return (\n    <ContextMenuPrimitive.Item\n      data-slot=\"context-menu-item\"\n      data-inset={inset}\n      data-variant={variant}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive focus:*:[svg]:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 group/context-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuSubTrigger({\n  className,\n  inset,\n  children,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.SubTrigger> & {\n  inset?: boolean;\n}) {\n  return (\n    <ContextMenuPrimitive.SubTrigger\n      data-slot=\"context-menu-sub-trigger\"\n      data-inset={inset}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <ChevronRightIcon className=\"ml-auto\" />\n    </ContextMenuPrimitive.SubTrigger>\n  );\n}\n\nfunction ContextMenuSubContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.SubContent>) {\n  return (\n    <ContextMenuPrimitive.SubContent\n      data-slot=\"context-menu-sub-content\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 bg-popover text-popover-foreground min-w-32 rounded-lg border p-1 shadow-lg duration-100 z-50 origin-(--radix-context-menu-content-transform-origin) overflow-hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuCheckboxItem({\n  className,\n  children,\n  checked,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.CheckboxItem>) {\n  return (\n    <ContextMenuPrimitive.CheckboxItem\n      data-slot=\"context-menu-checkbox-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      checked={checked}\n      {...props}\n    >\n      <span className=\"absolute right-2 pointer-events-none\">\n        <ContextMenuPrimitive.ItemIndicator>\n          <CheckIcon />\n        </ContextMenuPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </ContextMenuPrimitive.CheckboxItem>\n  );\n}\n\nfunction ContextMenuRadioItem({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.RadioItem>) {\n  return (\n    <ContextMenuPrimitive.RadioItem\n      data-slot=\"context-menu-radio-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      <span className=\"absolute right-2 pointer-events-none\">\n        <ContextMenuPrimitive.ItemIndicator>\n          <CheckIcon />\n        </ContextMenuPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </ContextMenuPrimitive.RadioItem>\n  );\n}\n\nfunction ContextMenuLabel({\n  className,\n  inset,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Label> & {\n  inset?: boolean;\n}) {\n  return (\n    <ContextMenuPrimitive.Label\n      data-slot=\"context-menu-label\"\n      data-inset={inset}\n      className={cn(\n        \"text-muted-foreground px-1.5 py-1 text-xs font-medium data-[inset]:pl-8\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof ContextMenuPrimitive.Separator>) {\n  return (\n    <ContextMenuPrimitive.Separator\n      data-slot=\"context-menu-separator\"\n      className={cn(\"bg-border -mx-1 my-1 h-px\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction ContextMenuShortcut({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"context-menu-shortcut\"\n      className={cn(\n        \"text-muted-foreground group-focus/context-menu-item:text-accent-foreground ml-auto text-xs tracking-widest\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  ContextMenu,\n  ContextMenuTrigger,\n  ContextMenuContent,\n  ContextMenuItem,\n  ContextMenuCheckboxItem,\n  ContextMenuRadioItem,\n  ContextMenuLabel,\n  ContextMenuSeparator,\n  ContextMenuShortcut,\n  ContextMenuGroup,\n  ContextMenuPortal,\n  ContextMenuSub,\n  ContextMenuSubContent,\n  ContextMenuSubTrigger,\n  ContextMenuRadioGroup,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/dialog.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Dialog as DialogPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport { XIcon } from \"lucide-react\";\n\nfunction Dialog({\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Root>) {\n  return <DialogPrimitive.Root data-slot=\"dialog\" {...props} />;\n}\n\nfunction DialogTrigger({\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Trigger>) {\n  return <DialogPrimitive.Trigger data-slot=\"dialog-trigger\" {...props} />;\n}\n\nfunction DialogPortal({\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Portal>) {\n  return <DialogPrimitive.Portal data-slot=\"dialog-portal\" {...props} />;\n}\n\nfunction DialogClose({\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Close>) {\n  return <DialogPrimitive.Close data-slot=\"dialog-close\" {...props} />;\n}\n\nfunction DialogOverlay({\n  className,\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Overlay>) {\n  return (\n    <DialogPrimitive.Overlay\n      data-slot=\"dialog-overlay\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 isolate z-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DialogContent({\n  className,\n  children,\n  showCloseButton = true,\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Content> & {\n  showCloseButton?: boolean;\n}) {\n  return (\n    <DialogPortal>\n      <DialogOverlay />\n      <DialogPrimitive.Content\n        data-slot=\"dialog-content\"\n        className={cn(\n          \"bg-background data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 ring-foreground/10 grid max-w-[calc(100%-2rem)] gap-4 rounded-xl p-4 text-sm ring-1 duration-100 sm:max-w-sm fixed top-1/2 left-1/2 z-50 w-full -translate-x-1/2 -translate-y-1/2\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n        {showCloseButton && (\n          <DialogPrimitive.Close data-slot=\"dialog-close\" asChild>\n            <Button\n              variant=\"ghost\"\n              className=\"absolute top-2 right-2\"\n              size=\"icon-sm\"\n            >\n              <XIcon />\n              <span className=\"sr-only\">Close</span>\n            </Button>\n          </DialogPrimitive.Close>\n        )}\n      </DialogPrimitive.Content>\n    </DialogPortal>\n  );\n}\n\nfunction DialogHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"dialog-header\"\n      className={cn(\"gap-2 flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction DialogFooter({\n  className,\n  showCloseButton = false,\n  children,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  showCloseButton?: boolean;\n}) {\n  return (\n    <div\n      data-slot=\"dialog-footer\"\n      className={cn(\n        \"bg-muted/50 -mx-4 -mb-4 rounded-b-xl border-t p-4 flex flex-col-reverse gap-2 sm:flex-row sm:justify-end\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      {showCloseButton && (\n        <DialogPrimitive.Close asChild>\n          <Button variant=\"outline\">Close</Button>\n        </DialogPrimitive.Close>\n      )}\n    </div>\n  );\n}\n\nfunction DialogTitle({\n  className,\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Title>) {\n  return (\n    <DialogPrimitive.Title\n      data-slot=\"dialog-title\"\n      className={cn(\"text-base leading-none font-medium\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction DialogDescription({\n  className,\n  ...props\n}: React.ComponentProps<typeof DialogPrimitive.Description>) {\n  return (\n    <DialogPrimitive.Description\n      data-slot=\"dialog-description\"\n      className={cn(\n        \"text-muted-foreground *:[a]:hover:text-foreground text-sm *:[a]:underline *:[a]:underline-offset-3\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Dialog,\n  DialogClose,\n  DialogContent,\n  DialogDescription,\n  DialogFooter,\n  DialogHeader,\n  DialogOverlay,\n  DialogPortal,\n  DialogTitle,\n  DialogTrigger,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/drawer.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Drawer as DrawerPrimitive } from \"vaul\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Drawer({\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Root>) {\n  return <DrawerPrimitive.Root data-slot=\"drawer\" {...props} />;\n}\n\nfunction DrawerTrigger({\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Trigger>) {\n  return <DrawerPrimitive.Trigger data-slot=\"drawer-trigger\" {...props} />;\n}\n\nfunction DrawerPortal({\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Portal>) {\n  return <DrawerPrimitive.Portal data-slot=\"drawer-portal\" {...props} />;\n}\n\nfunction DrawerClose({\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Close>) {\n  return <DrawerPrimitive.Close data-slot=\"drawer-close\" {...props} />;\n}\n\nfunction DrawerOverlay({\n  className,\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Overlay>) {\n  return (\n    <DrawerPrimitive.Overlay\n      data-slot=\"drawer-overlay\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DrawerContent({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Content>) {\n  return (\n    <DrawerPortal data-slot=\"drawer-portal\">\n      <DrawerOverlay />\n      <DrawerPrimitive.Content\n        data-slot=\"drawer-content\"\n        className={cn(\n          \"bg-background flex h-auto flex-col text-sm data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[80vh] data-[vaul-drawer-direction=bottom]:rounded-t-xl data-[vaul-drawer-direction=bottom]:border-t data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:rounded-r-xl data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:rounded-l-xl data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-xl data-[vaul-drawer-direction=top]:border-b data-[vaul-drawer-direction=left]:sm:max-w-sm data-[vaul-drawer-direction=right]:sm:max-w-sm group/drawer-content fixed z-50\",\n          className,\n        )}\n        {...props}\n      >\n        <div className=\"bg-muted mx-auto mt-4 hidden h-1 w-[100px] shrink-0 rounded-full group-data-[vaul-drawer-direction=bottom]/drawer-content:block bg-muted mx-auto hidden shrink-0 group-data-[vaul-drawer-direction=bottom]/drawer-content:block\" />\n        {children}\n      </DrawerPrimitive.Content>\n    </DrawerPortal>\n  );\n}\n\nfunction DrawerHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"drawer-header\"\n      className={cn(\n        \"gap-0.5 p-4 group-data-[vaul-drawer-direction=bottom]/drawer-content:text-center group-data-[vaul-drawer-direction=top]/drawer-content:text-center md:gap-0.5 md:text-left flex flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DrawerFooter({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"drawer-footer\"\n      className={cn(\"gap-2 p-4 mt-auto flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction DrawerTitle({\n  className,\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Title>) {\n  return (\n    <DrawerPrimitive.Title\n      data-slot=\"drawer-title\"\n      className={cn(\"text-foreground text-base font-medium\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction DrawerDescription({\n  className,\n  ...props\n}: React.ComponentProps<typeof DrawerPrimitive.Description>) {\n  return (\n    <DrawerPrimitive.Description\n      data-slot=\"drawer-description\"\n      className={cn(\"text-muted-foreground text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Drawer,\n  DrawerPortal,\n  DrawerOverlay,\n  DrawerTrigger,\n  DrawerClose,\n  DrawerContent,\n  DrawerHeader,\n  DrawerFooter,\n  DrawerTitle,\n  DrawerDescription,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/dropdown-menu.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { DropdownMenu as DropdownMenuPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { CheckIcon, ChevronRightIcon } from \"lucide-react\";\n\nfunction DropdownMenu({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {\n  return <DropdownMenuPrimitive.Root data-slot=\"dropdown-menu\" {...props} />;\n}\n\nfunction DropdownMenuPortal({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {\n  return (\n    <DropdownMenuPrimitive.Portal data-slot=\"dropdown-menu-portal\" {...props} />\n  );\n}\n\nfunction DropdownMenuTrigger({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {\n  return (\n    <DropdownMenuPrimitive.Trigger\n      data-slot=\"dropdown-menu-trigger\"\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuContent({\n  className,\n  align = \"start\",\n  sideOffset = 4,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {\n  return (\n    <DropdownMenuPrimitive.Portal>\n      <DropdownMenuPrimitive.Content\n        data-slot=\"dropdown-menu-content\"\n        sideOffset={sideOffset}\n        align={align}\n        className={cn(\n          \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-32 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 max-h-(--radix-dropdown-menu-content-available-height) w-(--radix-dropdown-menu-trigger-width) origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto data-[state=closed]:overflow-hidden\",\n          className,\n        )}\n        {...props}\n      />\n    </DropdownMenuPrimitive.Portal>\n  );\n}\n\nfunction DropdownMenuGroup({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {\n  return (\n    <DropdownMenuPrimitive.Group data-slot=\"dropdown-menu-group\" {...props} />\n  );\n}\n\nfunction DropdownMenuItem({\n  className,\n  inset,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {\n  inset?: boolean;\n  variant?: \"default\" | \"destructive\";\n}) {\n  return (\n    <DropdownMenuPrimitive.Item\n      data-slot=\"dropdown-menu-item\"\n      data-inset={inset}\n      data-variant={variant}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 group/dropdown-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuCheckboxItem({\n  className,\n  children,\n  checked,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {\n  return (\n    <DropdownMenuPrimitive.CheckboxItem\n      data-slot=\"dropdown-menu-checkbox-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      checked={checked}\n      {...props}\n    >\n      <span\n        className=\"pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none\"\n        data-slot=\"dropdown-menu-checkbox-item-indicator\"\n      >\n        <DropdownMenuPrimitive.ItemIndicator>\n          <CheckIcon />\n        </DropdownMenuPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </DropdownMenuPrimitive.CheckboxItem>\n  );\n}\n\nfunction DropdownMenuRadioGroup({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {\n  return (\n    <DropdownMenuPrimitive.RadioGroup\n      data-slot=\"dropdown-menu-radio-group\"\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuRadioItem({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {\n  return (\n    <DropdownMenuPrimitive.RadioItem\n      data-slot=\"dropdown-menu-radio-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      <span\n        className=\"pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none\"\n        data-slot=\"dropdown-menu-radio-item-indicator\"\n      >\n        <DropdownMenuPrimitive.ItemIndicator>\n          <CheckIcon />\n        </DropdownMenuPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </DropdownMenuPrimitive.RadioItem>\n  );\n}\n\nfunction DropdownMenuLabel({\n  className,\n  inset,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {\n  inset?: boolean;\n}) {\n  return (\n    <DropdownMenuPrimitive.Label\n      data-slot=\"dropdown-menu-label\"\n      data-inset={inset}\n      className={cn(\n        \"text-muted-foreground px-1.5 py-1 text-xs font-medium data-[inset]:pl-8\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {\n  return (\n    <DropdownMenuPrimitive.Separator\n      data-slot=\"dropdown-menu-separator\"\n      className={cn(\"bg-border -mx-1 my-1 h-px\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuShortcut({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"dropdown-menu-shortcut\"\n      className={cn(\n        \"text-muted-foreground group-focus/dropdown-menu-item:text-accent-foreground ml-auto text-xs tracking-widest\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction DropdownMenuSub({\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {\n  return <DropdownMenuPrimitive.Sub data-slot=\"dropdown-menu-sub\" {...props} />;\n}\n\nfunction DropdownMenuSubTrigger({\n  className,\n  inset,\n  children,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {\n  inset?: boolean;\n}) {\n  return (\n    <DropdownMenuPrimitive.SubTrigger\n      data-slot=\"dropdown-menu-sub-trigger\"\n      data-inset={inset}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <ChevronRightIcon className=\"ml-auto\" />\n    </DropdownMenuPrimitive.SubTrigger>\n  );\n}\n\nfunction DropdownMenuSubContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {\n  return (\n    <DropdownMenuPrimitive.SubContent\n      data-slot=\"dropdown-menu-sub-content\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground min-w-[96px] rounded-md p-1 shadow-lg ring-1 duration-100 z-50 origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  DropdownMenu,\n  DropdownMenuPortal,\n  DropdownMenuTrigger,\n  DropdownMenuContent,\n  DropdownMenuGroup,\n  DropdownMenuLabel,\n  DropdownMenuItem,\n  DropdownMenuCheckboxItem,\n  DropdownMenuRadioGroup,\n  DropdownMenuRadioItem,\n  DropdownMenuSeparator,\n  DropdownMenuShortcut,\n  DropdownMenuSub,\n  DropdownMenuSubTrigger,\n  DropdownMenuSubContent,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/empty.tsx",
    "content": "import { cva, type VariantProps } from \"class-variance-authority\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Empty({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"empty\"\n      className={cn(\n        \"gap-4 rounded-lg border-dashed p-6 flex w-full min-w-0 flex-1 flex-col items-center justify-center text-center text-balance\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction EmptyHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"empty-header\"\n      className={cn(\"gap-2 flex max-w-sm flex-col items-center\", className)}\n      {...props}\n    />\n  );\n}\n\nconst emptyMediaVariants = cva(\n  \"mb-2 flex shrink-0 items-center justify-center [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-transparent\",\n        icon: \"bg-muted text-foreground flex size-8 shrink-0 items-center justify-center rounded-lg [&_svg:not([class*='size-'])]:size-4\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  },\n);\n\nfunction EmptyMedia({\n  className,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof emptyMediaVariants>) {\n  return (\n    <div\n      data-slot=\"empty-icon\"\n      data-variant={variant}\n      className={cn(emptyMediaVariants({ variant, className }))}\n      {...props}\n    />\n  );\n}\n\nfunction EmptyTitle({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"empty-title\"\n      className={cn(\"text-sm font-medium tracking-tight\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction EmptyDescription({ className, ...props }: React.ComponentProps<\"p\">) {\n  return (\n    <div\n      data-slot=\"empty-description\"\n      className={cn(\n        \"text-sm/relaxed text-muted-foreground [&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction EmptyContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"empty-content\"\n      className={cn(\n        \"gap-2.5 text-sm flex w-full max-w-sm min-w-0 flex-col items-center text-balance\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Empty,\n  EmptyHeader,\n  EmptyTitle,\n  EmptyDescription,\n  EmptyContent,\n  EmptyMedia,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/field.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Label } from \"@/components/ui/label\";\nimport { Separator } from \"@/components/ui/separator\";\n\nfunction FieldSet({ className, ...props }: React.ComponentProps<\"fieldset\">) {\n  return (\n    <fieldset\n      data-slot=\"field-set\"\n      className={cn(\n        \"gap-4 has-[>[data-slot=checkbox-group]]:gap-3 has-[>[data-slot=radio-group]]:gap-3 flex flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldLegend({\n  className,\n  variant = \"legend\",\n  ...props\n}: React.ComponentProps<\"legend\"> & { variant?: \"legend\" | \"label\" }) {\n  return (\n    <legend\n      data-slot=\"field-legend\"\n      data-variant={variant}\n      className={cn(\n        \"mb-1.5 font-medium data-[variant=label]:text-sm data-[variant=legend]:text-base\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"field-group\"\n      className={cn(\n        \"gap-5 data-[slot=checkbox-group]:gap-3 [&>[data-slot=field-group]]:gap-4 group/field-group @container/field-group flex w-full flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nconst fieldVariants = cva(\n  \"data-[invalid=true]:text-destructive gap-2 group/field flex w-full\",\n  {\n    variants: {\n      orientation: {\n        vertical: \"flex-col [&>*]:w-full [&>.sr-only]:w-auto\",\n        horizontal:\n          \"flex-row items-center [&>[data-slot=field-label]]:flex-auto has-[>[data-slot=field-content]]:items-start has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px\",\n        responsive:\n          \"flex-col [&>*]:w-full [&>.sr-only]:w-auto @md/field-group:flex-row @md/field-group:items-center @md/field-group:[&>*]:w-auto @md/field-group:[&>[data-slot=field-label]]:flex-auto @md/field-group:has-[>[data-slot=field-content]]:items-start @md/field-group:has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px\",\n      },\n    },\n    defaultVariants: {\n      orientation: \"vertical\",\n    },\n  },\n);\n\nfunction Field({\n  className,\n  orientation = \"vertical\",\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof fieldVariants>) {\n  return (\n    <div\n      role=\"group\"\n      data-slot=\"field\"\n      data-orientation={orientation}\n      className={cn(fieldVariants({ orientation }), className)}\n      {...props}\n    />\n  );\n}\n\nfunction FieldContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"field-content\"\n      className={cn(\n        \"gap-0.5 group/field-content flex flex-1 flex-col leading-snug\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldLabel({\n  className,\n  ...props\n}: React.ComponentProps<typeof Label>) {\n  return (\n    <Label\n      data-slot=\"field-label\"\n      className={cn(\n        \"has-data-checked:bg-primary/5 has-data-checked:border-primary dark:has-data-checked:bg-primary/10 gap-2 group-data-[disabled=true]/field:opacity-50 has-[>[data-slot=field]]:rounded-lg has-[>[data-slot=field]]:border [&>*]:data-[slot=field]:p-2.5 group/field-label peer/field-label flex w-fit leading-snug\",\n        \"has-[>[data-slot=field]]:w-full has-[>[data-slot=field]]:flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldTitle({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"field-label\"\n      className={cn(\n        \"gap-2 text-sm font-medium group-data-[disabled=true]/field:opacity-50 flex w-fit items-center leading-snug\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldDescription({ className, ...props }: React.ComponentProps<\"p\">) {\n  return (\n    <p\n      data-slot=\"field-description\"\n      className={cn(\n        \"text-muted-foreground text-left text-sm [[data-variant=legend]+&]:-mt-1.5 leading-normal font-normal group-has-[[data-orientation=horizontal]]/field:text-balance\",\n        \"last:mt-0 nth-last-2:-mt-1\",\n        \"[&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction FieldSeparator({\n  children,\n  className,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  children?: React.ReactNode;\n}) {\n  return (\n    <div\n      data-slot=\"field-separator\"\n      data-content={!!children}\n      className={cn(\n        \"-my-2 h-5 text-sm group-data-[variant=outline]/field-group:-mb-2 relative\",\n        className,\n      )}\n      {...props}\n    >\n      <Separator className=\"absolute inset-0 top-1/2\" />\n      {children && (\n        <span\n          className=\"text-muted-foreground px-2 bg-background relative mx-auto block w-fit\"\n          data-slot=\"field-separator-content\"\n        >\n          {children}\n        </span>\n      )}\n    </div>\n  );\n}\n\nfunction FieldError({\n  className,\n  children,\n  errors,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  errors?: Array<{ message?: string } | undefined>;\n}) {\n  const content = useMemo(() => {\n    if (children) {\n      return children;\n    }\n\n    if (!errors?.length) {\n      return null;\n    }\n\n    const uniqueErrors = [\n      ...new Map(errors.map((error) => [error?.message, error])).values(),\n    ];\n\n    if (uniqueErrors?.length == 1) {\n      return uniqueErrors[0]?.message;\n    }\n\n    return (\n      <ul className=\"ml-4 flex list-disc flex-col gap-1\">\n        {uniqueErrors.map(\n          (error, index) =>\n            error?.message && <li key={index}>{error.message}</li>,\n        )}\n      </ul>\n    );\n  }, [children, errors]);\n\n  if (!content) {\n    return null;\n  }\n\n  return (\n    <div\n      role=\"alert\"\n      data-slot=\"field-error\"\n      className={cn(\"text-destructive text-sm font-normal\", className)}\n      {...props}\n    >\n      {content}\n    </div>\n  );\n}\n\nexport {\n  Field,\n  FieldLabel,\n  FieldDescription,\n  FieldError,\n  FieldGroup,\n  FieldLegend,\n  FieldSeparator,\n  FieldSet,\n  FieldContent,\n  FieldTitle,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/hover-card.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { HoverCard as HoverCardPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction HoverCard({\n  ...props\n}: React.ComponentProps<typeof HoverCardPrimitive.Root>) {\n  return <HoverCardPrimitive.Root data-slot=\"hover-card\" {...props} />;\n}\n\nfunction HoverCardTrigger({\n  ...props\n}: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {\n  return (\n    <HoverCardPrimitive.Trigger data-slot=\"hover-card-trigger\" {...props} />\n  );\n}\n\nfunction HoverCardContent({\n  className,\n  align = \"center\",\n  sideOffset = 4,\n  ...props\n}: React.ComponentProps<typeof HoverCardPrimitive.Content>) {\n  return (\n    <HoverCardPrimitive.Portal data-slot=\"hover-card-portal\">\n      <HoverCardPrimitive.Content\n        data-slot=\"hover-card-content\"\n        align={align}\n        sideOffset={sideOffset}\n        className={cn(\n          \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 bg-popover text-popover-foreground w-64 rounded-lg p-2.5 text-sm shadow-md ring-1 duration-100 z-50 origin-(--radix-hover-card-content-transform-origin) outline-hidden\",\n          className,\n        )}\n        {...props}\n      />\n    </HoverCardPrimitive.Portal>\n  );\n}\n\nexport { HoverCard, HoverCardTrigger, HoverCardContent };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/input-group.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport { Input } from \"@/components/ui/input\";\nimport { Textarea } from \"@/components/ui/textarea\";\n\nfunction InputGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"input-group\"\n      role=\"group\"\n      className={cn(\n        \"border-input dark:bg-input/30 has-[[data-slot=input-group-control]:focus-visible]:border-ring has-[[data-slot=input-group-control]:focus-visible]:ring-ring/50 has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[[data-slot][aria-invalid=true]]:border-destructive dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40 has-disabled:bg-input/50 dark:has-disabled:bg-input/80 h-8 rounded-lg border transition-colors has-disabled:opacity-50 has-[[data-slot=input-group-control]:focus-visible]:ring-[3px] has-[[data-slot][aria-invalid=true]]:ring-[3px] has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>[data-align=block-end]]:[&>input]:pt-3 has-[>[data-align=block-start]]:[&>input]:pb-3 has-[>[data-align=inline-end]]:[&>input]:pr-1.5 has-[>[data-align=inline-start]]:[&>input]:pl-1.5 [[data-slot=combobox-content]_&]:focus-within:border-inherit [[data-slot=combobox-content]_&]:focus-within:ring-0 group/input-group relative flex w-full min-w-0 items-center outline-none has-[>textarea]:h-auto\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nconst inputGroupAddonVariants = cva(\n  \"text-muted-foreground h-auto gap-2 py-1.5 text-sm font-medium group-data-[disabled=true]/input-group:opacity-50 [&>kbd]:rounded-[calc(var(--radius)-5px)] [&>svg:not([class*='size-'])]:size-4 flex cursor-text items-center justify-center select-none\",\n  {\n    variants: {\n      align: {\n        \"inline-start\":\n          \"pl-2 has-[>button]:ml-[-0.3rem] has-[>kbd]:ml-[-0.15rem] order-first\",\n        \"inline-end\":\n          \"pr-2 has-[>button]:mr-[-0.3rem] has-[>kbd]:mr-[-0.15rem] order-last\",\n        \"block-start\":\n          \"px-2.5 pt-2 group-has-[>input]/input-group:pt-2 [.border-b]:pb-2 order-first w-full justify-start\",\n        \"block-end\":\n          \"px-2.5 pb-2 group-has-[>input]/input-group:pb-2 [.border-t]:pt-2 order-last w-full justify-start\",\n      },\n    },\n    defaultVariants: {\n      align: \"inline-start\",\n    },\n  },\n);\n\nfunction InputGroupAddon({\n  className,\n  align = \"inline-start\",\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof inputGroupAddonVariants>) {\n  return (\n    <div\n      role=\"group\"\n      data-slot=\"input-group-addon\"\n      data-align={align}\n      className={cn(inputGroupAddonVariants({ align }), className)}\n      onClick={(e) => {\n        if ((e.target as HTMLElement).closest(\"button\")) {\n          return;\n        }\n        e.currentTarget.parentElement?.querySelector(\"input\")?.focus();\n      }}\n      {...props}\n    />\n  );\n}\n\nconst inputGroupButtonVariants = cva(\n  \"gap-2 text-sm shadow-none flex items-center\",\n  {\n    variants: {\n      size: {\n        xs: \"h-6 gap-1 rounded-[calc(var(--radius)-3px)] px-1.5 [&>svg:not([class*='size-'])]:size-3.5\",\n        sm: \"\",\n        \"icon-xs\":\n          \"size-6 rounded-[calc(var(--radius)-3px)] p-0 has-[>svg]:p-0\",\n        \"icon-sm\": \"size-8 p-0 has-[>svg]:p-0\",\n      },\n    },\n    defaultVariants: {\n      size: \"xs\",\n    },\n  },\n);\n\nfunction InputGroupButton({\n  className,\n  type = \"button\",\n  variant = \"ghost\",\n  size = \"xs\",\n  ...props\n}: Omit<React.ComponentProps<typeof Button>, \"size\"> &\n  VariantProps<typeof inputGroupButtonVariants>) {\n  return (\n    <Button\n      type={type}\n      data-size={size}\n      variant={variant}\n      className={cn(inputGroupButtonVariants({ size }), className)}\n      {...props}\n    />\n  );\n}\n\nfunction InputGroupText({ className, ...props }: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      className={cn(\n        \"text-muted-foreground gap-2 text-sm [&_svg:not([class*='size-'])]:size-4 flex items-center [&_svg]:pointer-events-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction InputGroupInput({\n  className,\n  ...props\n}: React.ComponentProps<\"input\">) {\n  return (\n    <Input\n      data-slot=\"input-group-control\"\n      className={cn(\n        \"rounded-none border-0 bg-transparent shadow-none ring-0 focus-visible:ring-0 disabled:bg-transparent aria-invalid:ring-0 dark:bg-transparent dark:disabled:bg-transparent flex-1\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction InputGroupTextarea({\n  className,\n  ...props\n}: React.ComponentProps<\"textarea\">) {\n  return (\n    <Textarea\n      data-slot=\"input-group-control\"\n      className={cn(\n        \"rounded-none border-0 bg-transparent py-2 shadow-none ring-0 focus-visible:ring-0 disabled:bg-transparent aria-invalid:ring-0 dark:bg-transparent dark:disabled:bg-transparent flex-1 resize-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  InputGroup,\n  InputGroupAddon,\n  InputGroupButton,\n  InputGroupText,\n  InputGroupInput,\n  InputGroupTextarea,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/input.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Input({ className, type, ...props }: React.ComponentProps<\"input\">) {\n  return (\n    <input\n      type={type}\n      data-slot=\"input\"\n      className={cn(\n        \"dark:bg-input/30 border-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 disabled:bg-input/50 dark:disabled:bg-input/80 h-8 rounded-lg border bg-transparent px-2.5 py-1 text-base transition-colors file:h-6 file:text-sm file:font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm file:text-foreground placeholder:text-muted-foreground w-full min-w-0 outline-none file:inline-flex file:border-0 file:bg-transparent disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport { Input };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/item.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Separator } from \"@/components/ui/separator\";\n\nfunction ItemGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      role=\"list\"\n      data-slot=\"item-group\"\n      className={cn(\n        \"gap-4 has-[[data-size=sm]]:gap-2.5 has-[[data-size=xs]]:gap-2 group/item-group flex w-full flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ItemSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof Separator>) {\n  return (\n    <Separator\n      data-slot=\"item-separator\"\n      orientation=\"horizontal\"\n      className={cn(\"my-2\", className)}\n      {...props}\n    />\n  );\n}\n\nconst itemVariants = cva(\n  \"[a]:hover:bg-muted rounded-lg border text-sm w-full group/item focus-visible:border-ring focus-visible:ring-ring/50 flex items-center flex-wrap outline-none transition-colors duration-100 focus-visible:ring-[3px] [a]:transition-colors\",\n  {\n    variants: {\n      variant: {\n        default: \"border-transparent\",\n        outline: \"border-border\",\n        muted: \"bg-muted/50 border-transparent\",\n      },\n      size: {\n        default: \"gap-2.5 px-3 py-2.5\",\n        sm: \"gap-2.5 px-3 py-2.5\",\n        xs: \"gap-2 px-2.5 py-2 [[data-slot=dropdown-menu-content]_&]:p-0\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n      size: \"default\",\n    },\n  },\n);\n\nfunction Item({\n  className,\n  variant = \"default\",\n  size = \"default\",\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"div\"> &\n  VariantProps<typeof itemVariants> & { asChild?: boolean }) {\n  const Comp = asChild ? Slot.Root : \"div\";\n  return (\n    <Comp\n      data-slot=\"item\"\n      data-variant={variant}\n      data-size={size}\n      className={cn(itemVariants({ variant, size, className }))}\n      {...props}\n    />\n  );\n}\n\nconst itemMediaVariants = cva(\n  \"gap-2 group-has-[[data-slot=item-description]]/item:translate-y-0.5 group-has-[[data-slot=item-description]]/item:self-start flex shrink-0 items-center justify-center [&_svg]:pointer-events-none\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-transparent\",\n        icon: \"[&_svg:not([class*='size-'])]:size-4\",\n        image:\n          \"size-10 overflow-hidden rounded-sm group-data-[size=sm]/item:size-8 group-data-[size=xs]/item:size-6 [&_img]:size-full [&_img]:object-cover\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  },\n);\n\nfunction ItemMedia({\n  className,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<\"div\"> & VariantProps<typeof itemMediaVariants>) {\n  return (\n    <div\n      data-slot=\"item-media\"\n      data-variant={variant}\n      className={cn(itemMediaVariants({ variant, className }))}\n      {...props}\n    />\n  );\n}\n\nfunction ItemContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"item-content\"\n      className={cn(\n        \"gap-1 group-data-[size=xs]/item:gap-0 flex flex-1 flex-col [&+[data-slot=item-content]]:flex-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ItemTitle({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"item-title\"\n      className={cn(\n        \"gap-2 text-sm leading-snug font-medium underline-offset-4 line-clamp-1 flex w-fit items-center\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ItemDescription({ className, ...props }: React.ComponentProps<\"p\">) {\n  return (\n    <p\n      data-slot=\"item-description\"\n      className={cn(\n        \"text-muted-foreground text-left text-sm leading-normal group-data-[size=xs]/item:text-xs [&>a:hover]:text-primary line-clamp-2 font-normal [&>a]:underline [&>a]:underline-offset-4\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ItemActions({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"item-actions\"\n      className={cn(\"gap-2 flex items-center\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction ItemHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"item-header\"\n      className={cn(\n        \"gap-2 flex basis-full items-center justify-between\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ItemFooter({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"item-footer\"\n      className={cn(\n        \"gap-2 flex basis-full items-center justify-between\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Item,\n  ItemMedia,\n  ItemContent,\n  ItemActions,\n  ItemGroup,\n  ItemSeparator,\n  ItemTitle,\n  ItemDescription,\n  ItemHeader,\n  ItemFooter,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/kbd.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nfunction Kbd({ className, ...props }: React.ComponentProps<\"kbd\">) {\n  return (\n    <kbd\n      data-slot=\"kbd\"\n      className={cn(\n        \"bg-muted text-muted-foreground [[data-slot=tooltip-content]_&]:bg-background/20 [[data-slot=tooltip-content]_&]:text-background dark:[[data-slot=tooltip-content]_&]:bg-background/10 h-5 w-fit min-w-5 gap-1 rounded-sm px-1 font-sans text-xs font-medium [&_svg:not([class*='size-'])]:size-3 pointer-events-none inline-flex items-center justify-center select-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction KbdGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <kbd\n      data-slot=\"kbd-group\"\n      className={cn(\"gap-1 inline-flex items-center\", className)}\n      {...props}\n    />\n  );\n}\n\nexport { Kbd, KbdGroup };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/label.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Label as LabelPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Label({\n  className,\n  ...props\n}: React.ComponentProps<typeof LabelPrimitive.Root>) {\n  return (\n    <LabelPrimitive.Root\n      data-slot=\"label\"\n      className={cn(\n        \"gap-2 text-sm leading-none font-medium group-data-[disabled=true]:opacity-50 peer-disabled:opacity-50 flex items-center select-none group-data-[disabled=true]:pointer-events-none peer-disabled:cursor-not-allowed\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport { Label };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/menubar.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Menubar as MenubarPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { CheckIcon, ChevronRightIcon } from \"lucide-react\";\n\nfunction Menubar({\n  className,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Root>) {\n  return (\n    <MenubarPrimitive.Root\n      data-slot=\"menubar\"\n      className={cn(\n        \"bg-background h-8 gap-0.5 rounded-lg border p-1 flex items-center\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarMenu({\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Menu>) {\n  return <MenubarPrimitive.Menu data-slot=\"menubar-menu\" {...props} />;\n}\n\nfunction MenubarGroup({\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Group>) {\n  return <MenubarPrimitive.Group data-slot=\"menubar-group\" {...props} />;\n}\n\nfunction MenubarPortal({\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Portal>) {\n  return <MenubarPrimitive.Portal data-slot=\"menubar-portal\" {...props} />;\n}\n\nfunction MenubarRadioGroup({\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.RadioGroup>) {\n  return (\n    <MenubarPrimitive.RadioGroup data-slot=\"menubar-radio-group\" {...props} />\n  );\n}\n\nfunction MenubarTrigger({\n  className,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Trigger>) {\n  return (\n    <MenubarPrimitive.Trigger\n      data-slot=\"menubar-trigger\"\n      className={cn(\n        \"hover:bg-muted aria-expanded:bg-muted rounded-sm px-1.5 py-px text-sm font-medium flex items-center outline-hidden select-none\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarContent({\n  className,\n  align = \"start\",\n  alignOffset = -4,\n  sideOffset = 8,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Content>) {\n  return (\n    <MenubarPortal>\n      <MenubarPrimitive.Content\n        data-slot=\"menubar-content\"\n        align={align}\n        alignOffset={alignOffset}\n        sideOffset={sideOffset}\n        className={cn(\n          \"bg-popover text-popover-foreground data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-36 rounded-lg p-1 shadow-md ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden\",\n          className,\n        )}\n        {...props}\n      />\n    </MenubarPortal>\n  );\n}\n\nfunction MenubarItem({\n  className,\n  inset,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Item> & {\n  inset?: boolean;\n  variant?: \"default\" | \"destructive\";\n}) {\n  return (\n    <MenubarPrimitive.Item\n      data-slot=\"menubar-item\"\n      data-inset={inset}\n      data-variant={variant}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 group/menubar-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarCheckboxItem({\n  className,\n  children,\n  checked,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.CheckboxItem>) {\n  return (\n    <MenubarPrimitive.CheckboxItem\n      data-slot=\"menubar-checkbox-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-1.5 pl-7 text-sm data-disabled:opacity-50 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      checked={checked}\n      {...props}\n    >\n      <span className=\"left-1.5 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center\">\n        <MenubarPrimitive.ItemIndicator>\n          <CheckIcon />\n        </MenubarPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </MenubarPrimitive.CheckboxItem>\n  );\n}\n\nfunction MenubarRadioItem({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.RadioItem>) {\n  return (\n    <MenubarPrimitive.RadioItem\n      data-slot=\"menubar-radio-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-1.5 pl-7 text-sm data-disabled:opacity-50 [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      <span className=\"left-1.5 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center\">\n        <MenubarPrimitive.ItemIndicator>\n          <CheckIcon />\n        </MenubarPrimitive.ItemIndicator>\n      </span>\n      {children}\n    </MenubarPrimitive.RadioItem>\n  );\n}\n\nfunction MenubarLabel({\n  className,\n  inset,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Label> & {\n  inset?: boolean;\n}) {\n  return (\n    <MenubarPrimitive.Label\n      data-slot=\"menubar-label\"\n      data-inset={inset}\n      className={cn(\n        \"px-1.5 py-1 text-sm font-medium data-[inset]:pl-8\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Separator>) {\n  return (\n    <MenubarPrimitive.Separator\n      data-slot=\"menubar-separator\"\n      className={cn(\"bg-border -mx-1 my-1 h-px\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarShortcut({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      data-slot=\"menubar-shortcut\"\n      className={cn(\n        \"text-muted-foreground group-focus/menubar-item:text-accent-foreground text-xs tracking-widest ml-auto\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction MenubarSub({\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.Sub>) {\n  return <MenubarPrimitive.Sub data-slot=\"menubar-sub\" {...props} />;\n}\n\nfunction MenubarSubTrigger({\n  className,\n  inset,\n  children,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.SubTrigger> & {\n  inset?: boolean;\n}) {\n  return (\n    <MenubarPrimitive.SubTrigger\n      data-slot=\"menubar-sub-trigger\"\n      data-inset={inset}\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground gap-1.5 rounded-md px-1.5 py-1 text-sm data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-none select-none\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <ChevronRightIcon className=\"ml-auto size-4\" />\n    </MenubarPrimitive.SubTrigger>\n  );\n}\n\nfunction MenubarSubContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof MenubarPrimitive.SubContent>) {\n  return (\n    <MenubarPrimitive.SubContent\n      data-slot=\"menubar-sub-content\"\n      className={cn(\n        \"bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-32 rounded-lg p-1 shadow-lg ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Menubar,\n  MenubarPortal,\n  MenubarMenu,\n  MenubarTrigger,\n  MenubarContent,\n  MenubarGroup,\n  MenubarSeparator,\n  MenubarLabel,\n  MenubarItem,\n  MenubarShortcut,\n  MenubarCheckboxItem,\n  MenubarRadioGroup,\n  MenubarRadioItem,\n  MenubarSub,\n  MenubarSubTrigger,\n  MenubarSubContent,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/native-select.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronDownIcon } from \"lucide-react\";\n\ntype NativeSelectProps = Omit<React.ComponentProps<\"select\">, \"size\"> & {\n  size?: \"sm\" | \"default\";\n};\n\nfunction NativeSelect({\n  className,\n  size = \"default\",\n  ...props\n}: NativeSelectProps) {\n  return (\n    <div\n      className={cn(\n        \"group/native-select relative w-fit has-[select:disabled]:opacity-50\",\n        className,\n      )}\n      data-slot=\"native-select-wrapper\"\n      data-size={size}\n    >\n      <select\n        data-slot=\"native-select\"\n        data-size={size}\n        className=\"border-input placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 dark:hover:bg-input/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 h-8 w-full min-w-0 appearance-none rounded-lg border bg-transparent py-1 pr-8 pl-2.5 text-sm transition-colors select-none focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=sm]:h-7 data-[size=sm]:rounded-[min(var(--radius-md),10px)] data-[size=sm]:py-0.5 outline-none disabled:pointer-events-none disabled:cursor-not-allowed\"\n        {...props}\n      />\n      <ChevronDownIcon\n        className=\"text-muted-foreground top-1/2 right-2.5 size-4 -translate-y-1/2 pointer-events-none absolute select-none\"\n        aria-hidden=\"true\"\n        data-slot=\"native-select-icon\"\n      />\n    </div>\n  );\n}\n\nfunction NativeSelectOption({ ...props }: React.ComponentProps<\"option\">) {\n  return <option data-slot=\"native-select-option\" {...props} />;\n}\n\nfunction NativeSelectOptGroup({\n  className,\n  ...props\n}: React.ComponentProps<\"optgroup\">) {\n  return (\n    <optgroup\n      data-slot=\"native-select-optgroup\"\n      className={cn(className)}\n      {...props}\n    />\n  );\n}\n\nexport { NativeSelect, NativeSelectOptGroup, NativeSelectOption };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/navigation-menu.tsx",
    "content": "import * as React from \"react\";\nimport { cva } from \"class-variance-authority\";\nimport { NavigationMenu as NavigationMenuPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronDownIcon } from \"lucide-react\";\n\nfunction NavigationMenu({\n  className,\n  children,\n  viewport = true,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Root> & {\n  viewport?: boolean;\n}) {\n  return (\n    <NavigationMenuPrimitive.Root\n      data-slot=\"navigation-menu\"\n      data-viewport={viewport}\n      className={cn(\n        \"max-w-max group/navigation-menu relative flex max-w-max flex-1 items-center justify-center\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      {viewport && <NavigationMenuViewport />}\n    </NavigationMenuPrimitive.Root>\n  );\n}\n\nfunction NavigationMenuList({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.List>) {\n  return (\n    <NavigationMenuPrimitive.List\n      data-slot=\"navigation-menu-list\"\n      className={cn(\n        \"gap-0 group flex flex-1 list-none items-center justify-center\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction NavigationMenuItem({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Item>) {\n  return (\n    <NavigationMenuPrimitive.Item\n      data-slot=\"navigation-menu-item\"\n      className={cn(\"relative\", className)}\n      {...props}\n    />\n  );\n}\n\nconst navigationMenuTriggerStyle = cva(\n  \"bg-background hover:bg-muted focus:bg-muted data-open:hover:bg-muted data-open:focus:bg-muted data-open:bg-muted/50 focus-visible:ring-ring/50 data-popup-open:bg-muted/50 data-popup-open:hover:bg-muted rounded-lg px-2.5 py-1.5 text-sm font-medium transition-all focus-visible:ring-[3px] focus-visible:outline-1 disabled:opacity-50 group/navigation-menu-trigger inline-flex h-9 w-max items-center justify-center disabled:pointer-events-none outline-none\",\n);\n\nfunction NavigationMenuTrigger({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Trigger>) {\n  return (\n    <NavigationMenuPrimitive.Trigger\n      data-slot=\"navigation-menu-trigger\"\n      className={cn(navigationMenuTriggerStyle(), \"group\", className)}\n      {...props}\n    >\n      {children}{\" \"}\n      <ChevronDownIcon\n        className=\"relative top-[1px] ml-1 size-3 transition duration-300 group-data-open/navigation-menu-trigger:rotate-180 group-data-popup-open/navigation-menu-trigger:rotate-180\"\n        aria-hidden=\"true\"\n      />\n    </NavigationMenuPrimitive.Trigger>\n  );\n}\n\nfunction NavigationMenuContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Content>) {\n  return (\n    <NavigationMenuPrimitive.Content\n      data-slot=\"navigation-menu-content\"\n      className={cn(\n        \"data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 group-data-[viewport=false]/navigation-menu:bg-popover group-data-[viewport=false]/navigation-menu:text-popover-foreground group-data-[viewport=false]/navigation-menu:data-open:animate-in group-data-[viewport=false]/navigation-menu:data-closed:animate-out group-data-[viewport=false]/navigation-menu:data-closed:zoom-out-95 group-data-[viewport=false]/navigation-menu:data-open:zoom-in-95 group-data-[viewport=false]/navigation-menu:data-open:fade-in-0 group-data-[viewport=false]/navigation-menu:data-closed:fade-out-0 group-data-[viewport=false]/navigation-menu:ring-foreground/10 p-1 ease-[cubic-bezier(0.22,1,0.36,1)] group-data-[viewport=false]/navigation-menu:rounded-lg group-data-[viewport=false]/navigation-menu:shadow group-data-[viewport=false]/navigation-menu:ring-1 group-data-[viewport=false]/navigation-menu:duration-300 top-0 left-0 w-full group-data-[viewport=false]/navigation-menu:top-full group-data-[viewport=false]/navigation-menu:mt-1.5 group-data-[viewport=false]/navigation-menu:overflow-hidden **:data-[slot=navigation-menu-link]:focus:ring-0 **:data-[slot=navigation-menu-link]:focus:outline-none md:absolute md:w-auto\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction NavigationMenuViewport({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Viewport>) {\n  return (\n    <div\n      className={cn(\n        \"absolute top-full left-0 isolate z-50 flex justify-center\",\n      )}\n    >\n      <NavigationMenuPrimitive.Viewport\n        data-slot=\"navigation-menu-viewport\"\n        className={cn(\n          \"bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:zoom-out-95 data-open:zoom-in-90 ring-foreground/10 rounded-lg shadow ring-1 duration-100 origin-top-center relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden md:w-[var(--radix-navigation-menu-viewport-width)]\",\n          className,\n        )}\n        {...props}\n      />\n    </div>\n  );\n}\n\nfunction NavigationMenuLink({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Link>) {\n  return (\n    <NavigationMenuPrimitive.Link\n      data-slot=\"navigation-menu-link\"\n      className={cn(\n        \"data-active:focus:bg-muted data-active:hover:bg-muted data-active:bg-muted/50 focus-visible:ring-ring/50 hover:bg-muted focus:bg-muted flex items-center gap-2 rounded-lg p-2 text-sm transition-all outline-none focus-visible:ring-[3px] focus-visible:outline-1 [&_svg:not([class*='size-'])]:size-4 [[data-slot=navigation-menu-content]_&]:rounded-md\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction NavigationMenuIndicator({\n  className,\n  ...props\n}: React.ComponentProps<typeof NavigationMenuPrimitive.Indicator>) {\n  return (\n    <NavigationMenuPrimitive.Indicator\n      data-slot=\"navigation-menu-indicator\"\n      className={cn(\n        \"data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden\",\n        className,\n      )}\n      {...props}\n    >\n      <div className=\"bg-border rounded-tl-sm shadow-md relative top-[60%] h-2 w-2 rotate-45\" />\n    </NavigationMenuPrimitive.Indicator>\n  );\n}\n\nexport {\n  NavigationMenu,\n  NavigationMenuList,\n  NavigationMenuItem,\n  NavigationMenuContent,\n  NavigationMenuTrigger,\n  NavigationMenuLink,\n  NavigationMenuIndicator,\n  NavigationMenuViewport,\n  navigationMenuTriggerStyle,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/pagination.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport {\n  ChevronLeftIcon,\n  ChevronRightIcon,\n  MoreHorizontalIcon,\n} from \"lucide-react\";\n\nfunction Pagination({ className, ...props }: React.ComponentProps<\"nav\">) {\n  return (\n    <nav\n      role=\"navigation\"\n      aria-label=\"pagination\"\n      data-slot=\"pagination\"\n      className={cn(\"mx-auto flex w-full justify-center\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction PaginationContent({\n  className,\n  ...props\n}: React.ComponentProps<\"ul\">) {\n  return (\n    <ul\n      data-slot=\"pagination-content\"\n      className={cn(\"gap-0.5 flex items-center\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction PaginationItem({ ...props }: React.ComponentProps<\"li\">) {\n  return <li data-slot=\"pagination-item\" {...props} />;\n}\n\ntype PaginationLinkProps = {\n  isActive?: boolean;\n} & Pick<React.ComponentProps<typeof Button>, \"size\"> &\n  React.ComponentProps<\"a\">;\n\nfunction PaginationLink({\n  className,\n  isActive,\n  size = \"icon\",\n  ...props\n}: PaginationLinkProps) {\n  return (\n    <Button\n      asChild\n      variant={isActive ? \"outline\" : \"ghost\"}\n      size={size}\n      className={cn(className)}\n    >\n      <a\n        aria-current={isActive ? \"page\" : undefined}\n        data-slot=\"pagination-link\"\n        data-active={isActive}\n        {...props}\n      />\n    </Button>\n  );\n}\n\nfunction PaginationPrevious({\n  className,\n  ...props\n}: React.ComponentProps<typeof PaginationLink>) {\n  return (\n    <PaginationLink\n      aria-label=\"Go to previous page\"\n      size=\"default\"\n      className={cn(\"pl-1.5!\", className)}\n      {...props}\n    >\n      <ChevronLeftIcon data-icon=\"inline-start\" />\n      <span className=\"hidden sm:block\">Previous</span>\n    </PaginationLink>\n  );\n}\n\nfunction PaginationNext({\n  className,\n  ...props\n}: React.ComponentProps<typeof PaginationLink>) {\n  return (\n    <PaginationLink\n      aria-label=\"Go to next page\"\n      size=\"default\"\n      className={cn(\"pr-1.5!\", className)}\n      {...props}\n    >\n      <span className=\"hidden sm:block\">Next</span>\n      <ChevronRightIcon data-icon=\"inline-end\" />\n    </PaginationLink>\n  );\n}\n\nfunction PaginationEllipsis({\n  className,\n  ...props\n}: React.ComponentProps<\"span\">) {\n  return (\n    <span\n      aria-hidden\n      data-slot=\"pagination-ellipsis\"\n      className={cn(\n        \"size-8 items-center justify-center [&_svg:not([class*='size-'])]:size-4 flex items-center justify-center\",\n        className,\n      )}\n      {...props}\n    >\n      <MoreHorizontalIcon />\n      <span className=\"sr-only\">More pages</span>\n    </span>\n  );\n}\n\nexport {\n  Pagination,\n  PaginationContent,\n  PaginationEllipsis,\n  PaginationItem,\n  PaginationLink,\n  PaginationNext,\n  PaginationPrevious,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/popover.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Popover as PopoverPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Popover({\n  ...props\n}: React.ComponentProps<typeof PopoverPrimitive.Root>) {\n  return <PopoverPrimitive.Root data-slot=\"popover\" {...props} />;\n}\n\nfunction PopoverTrigger({\n  ...props\n}: React.ComponentProps<typeof PopoverPrimitive.Trigger>) {\n  return <PopoverPrimitive.Trigger data-slot=\"popover-trigger\" {...props} />;\n}\n\nfunction PopoverContent({\n  className,\n  align = \"center\",\n  sideOffset = 4,\n  ...props\n}: React.ComponentProps<typeof PopoverPrimitive.Content>) {\n  return (\n    <PopoverPrimitive.Portal>\n      <PopoverPrimitive.Content\n        data-slot=\"popover-content\"\n        align={align}\n        sideOffset={sideOffset}\n        className={cn(\n          \"bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 flex flex-col gap-2.5 rounded-lg p-2.5 text-sm shadow-md ring-1 duration-100 z-50 w-72 origin-(--radix-popover-content-transform-origin) outline-hidden\",\n          className,\n        )}\n        {...props}\n      />\n    </PopoverPrimitive.Portal>\n  );\n}\n\nfunction PopoverAnchor({\n  ...props\n}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {\n  return <PopoverPrimitive.Anchor data-slot=\"popover-anchor\" {...props} />;\n}\n\nfunction PopoverHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"popover-header\"\n      className={cn(\"flex flex-col gap-0.5 text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction PopoverTitle({ className, ...props }: React.ComponentProps<\"h2\">) {\n  return (\n    <div\n      data-slot=\"popover-title\"\n      className={cn(\"font-medium\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction PopoverDescription({\n  className,\n  ...props\n}: React.ComponentProps<\"p\">) {\n  return (\n    <p\n      data-slot=\"popover-description\"\n      className={cn(\"text-muted-foreground\", className)}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Popover,\n  PopoverAnchor,\n  PopoverContent,\n  PopoverDescription,\n  PopoverHeader,\n  PopoverTitle,\n  PopoverTrigger,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/progress.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Progress as ProgressPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Progress({\n  className,\n  value,\n  ...props\n}: React.ComponentProps<typeof ProgressPrimitive.Root>) {\n  return (\n    <ProgressPrimitive.Root\n      data-slot=\"progress\"\n      className={cn(\n        \"bg-muted h-1 rounded-full relative flex w-full items-center overflow-x-hidden\",\n        className,\n      )}\n      {...props}\n    >\n      <ProgressPrimitive.Indicator\n        data-slot=\"progress-indicator\"\n        className=\"bg-primary size-full flex-1 transition-all\"\n        style={{ transform: `translateX(-${100 - (value || 0)}%)` }}\n      />\n    </ProgressPrimitive.Root>\n  );\n}\n\nexport { Progress };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/radio-group.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { RadioGroup as RadioGroupPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { CircleIcon } from \"lucide-react\";\n\nfunction RadioGroup({\n  className,\n  ...props\n}: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {\n  return (\n    <RadioGroupPrimitive.Root\n      data-slot=\"radio-group\"\n      className={cn(\"grid gap-2 w-full\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction RadioGroupItem({\n  className,\n  ...props\n}: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {\n  return (\n    <RadioGroupPrimitive.Item\n      data-slot=\"radio-group-item\"\n      className={cn(\n        \"border-input text-primary dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 flex size-4 rounded-full focus-visible:ring-[3px] aria-invalid:ring-[3px] group/radio-group-item peer relative aspect-square shrink-0 border outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    >\n      <RadioGroupPrimitive.Indicator\n        data-slot=\"radio-group-indicator\"\n        className=\"group-aria-invalid/radio-group-item:text-destructive text-primary flex size-4 items-center justify-center\"\n      >\n        <CircleIcon className=\"absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2 fill-current\" />\n      </RadioGroupPrimitive.Indicator>\n    </RadioGroupPrimitive.Item>\n  );\n}\n\nexport { RadioGroup, RadioGroupItem };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/resizable.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as ResizablePrimitive from \"react-resizable-panels\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction ResizablePanelGroup({\n  className,\n  ...props\n}: React.ComponentProps<typeof ResizablePrimitive.PanelGroup>) {\n  return (\n    <ResizablePrimitive.PanelGroup\n      data-slot=\"resizable-panel-group\"\n      className={cn(\n        \"flex h-full w-full data-[panel-group-direction=vertical]:flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction ResizablePanel({\n  ...props\n}: React.ComponentProps<typeof ResizablePrimitive.Panel>) {\n  return <ResizablePrimitive.Panel data-slot=\"resizable-panel\" {...props} />;\n}\n\nfunction ResizableHandle({\n  withHandle,\n  className,\n  ...props\n}: React.ComponentProps<typeof ResizablePrimitive.PanelResizeHandle> & {\n  withHandle?: boolean;\n}) {\n  return (\n    <ResizablePrimitive.PanelResizeHandle\n      data-slot=\"resizable-handle\"\n      className={cn(\n        \"bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:translate-x-0 data-[panel-group-direction=vertical]:after:-translate-y-1/2 [&[data-panel-group-direction=vertical]>div]:rotate-90\",\n        className,\n      )}\n      {...props}\n    >\n      {withHandle && (\n        <div className=\"bg-border h-6 w-1 rounded-lg z-10 flex shrink-0\" />\n      )}\n    </ResizablePrimitive.PanelResizeHandle>\n  );\n}\n\nexport { ResizablePanelGroup, ResizablePanel, ResizableHandle };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/scroll-area.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { ScrollArea as ScrollAreaPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction ScrollArea({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {\n  return (\n    <ScrollAreaPrimitive.Root\n      data-slot=\"scroll-area\"\n      className={cn(\"relative\", className)}\n      {...props}\n    >\n      <ScrollAreaPrimitive.Viewport\n        data-slot=\"scroll-area-viewport\"\n        className=\"focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1\"\n      >\n        {children}\n      </ScrollAreaPrimitive.Viewport>\n      <ScrollBar />\n      <ScrollAreaPrimitive.Corner />\n    </ScrollAreaPrimitive.Root>\n  );\n}\n\nfunction ScrollBar({\n  className,\n  orientation = \"vertical\",\n  ...props\n}: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {\n  return (\n    <ScrollAreaPrimitive.ScrollAreaScrollbar\n      data-slot=\"scroll-area-scrollbar\"\n      data-orientation={orientation}\n      orientation={orientation}\n      className={cn(\n        \"data-horizontal:h-2.5 data-horizontal:flex-col data-horizontal:border-t data-horizontal:border-t-transparent data-vertical:h-full data-vertical:w-2.5 data-vertical:border-l data-vertical:border-l-transparent flex touch-none p-px transition-colors select-none\",\n        className,\n      )}\n      {...props}\n    >\n      <ScrollAreaPrimitive.ScrollAreaThumb\n        data-slot=\"scroll-area-thumb\"\n        className=\"rounded-full bg-border relative flex-1\"\n      />\n    </ScrollAreaPrimitive.ScrollAreaScrollbar>\n  );\n}\n\nexport { ScrollArea, ScrollBar };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/select.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Select as SelectPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { ChevronDownIcon, CheckIcon, ChevronUpIcon } from \"lucide-react\";\n\nfunction Select({\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Root>) {\n  return <SelectPrimitive.Root data-slot=\"select\" {...props} />;\n}\n\nfunction SelectGroup({\n  className,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Group>) {\n  return (\n    <SelectPrimitive.Group\n      data-slot=\"select-group\"\n      className={cn(\"scroll-my-1 p-1\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SelectValue({\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Value>) {\n  return <SelectPrimitive.Value data-slot=\"select-value\" {...props} />;\n}\n\nfunction SelectTrigger({\n  className,\n  size = \"default\",\n  children,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Trigger> & {\n  size?: \"sm\" | \"default\";\n}) {\n  return (\n    <SelectPrimitive.Trigger\n      data-slot=\"select-trigger\"\n      data-size={size}\n      className={cn(\n        \"border-input data-[placeholder]:text-muted-foreground dark:bg-input/30 dark:hover:bg-input/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 gap-1.5 rounded-lg border bg-transparent py-2 pr-2 pl-2.5 text-sm transition-colors select-none focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-8 data-[size=sm]:h-7 data-[size=sm]:rounded-[min(var(--radius-md),10px)] *:data-[slot=select-value]:flex *:data-[slot=select-value]:gap-1.5 [&_svg:not([class*='size-'])]:size-4 flex w-fit items-center justify-between whitespace-nowrap outline-none disabled:cursor-not-allowed disabled:opacity-50 *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      {children}\n      <SelectPrimitive.Icon asChild>\n        <ChevronDownIcon className=\"text-muted-foreground size-4 pointer-events-none\" />\n      </SelectPrimitive.Icon>\n    </SelectPrimitive.Trigger>\n  );\n}\n\nfunction SelectContent({\n  className,\n  children,\n  position = \"item-aligned\",\n  align = \"center\",\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Content>) {\n  return (\n    <SelectPrimitive.Portal>\n      <SelectPrimitive.Content\n        data-slot=\"select-content\"\n        className={cn(\n          \"bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/10 min-w-36 rounded-lg shadow-md ring-1 duration-100 relative z-50 max-h-(--radix-select-content-available-height) origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto\",\n          position === \"popper\" &&\n            \"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1\",\n          className,\n        )}\n        position={position}\n        align={align}\n        {...props}\n      >\n        <SelectScrollUpButton />\n        <SelectPrimitive.Viewport\n          data-position={position}\n          className={cn(\n            \"data-[position=popper]:h-[var(--radix-select-trigger-height)] data-[position=popper]:w-full data-[position=popper]:min-w-[var(--radix-select-trigger-width)]\",\n            position === \"popper\" && \"\",\n          )}\n        >\n          {children}\n        </SelectPrimitive.Viewport>\n        <SelectScrollDownButton />\n      </SelectPrimitive.Content>\n    </SelectPrimitive.Portal>\n  );\n}\n\nfunction SelectLabel({\n  className,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Label>) {\n  return (\n    <SelectPrimitive.Label\n      data-slot=\"select-label\"\n      className={cn(\"text-muted-foreground px-1.5 py-1 text-xs\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SelectItem({\n  className,\n  children,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Item>) {\n  return (\n    <SelectPrimitive.Item\n      data-slot=\"select-item\"\n      className={cn(\n        \"focus:bg-accent focus:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-1.5 rounded-md py-1 pr-8 pl-1.5 text-sm [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2 relative flex w-full cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    >\n      <span className=\"pointer-events-none absolute right-2 flex size-4 items-center justify-center\">\n        <SelectPrimitive.ItemIndicator>\n          <CheckIcon className=\"pointer-events-none\" />\n        </SelectPrimitive.ItemIndicator>\n      </span>\n      <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>\n    </SelectPrimitive.Item>\n  );\n}\n\nfunction SelectSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.Separator>) {\n  return (\n    <SelectPrimitive.Separator\n      data-slot=\"select-separator\"\n      className={cn(\"bg-border -mx-1 my-1 h-px pointer-events-none\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SelectScrollUpButton({\n  className,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.ScrollUpButton>) {\n  return (\n    <SelectPrimitive.ScrollUpButton\n      data-slot=\"select-scroll-up-button\"\n      className={cn(\n        \"bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4\",\n        className,\n      )}\n      {...props}\n    >\n      <ChevronUpIcon />\n    </SelectPrimitive.ScrollUpButton>\n  );\n}\n\nfunction SelectScrollDownButton({\n  className,\n  ...props\n}: React.ComponentProps<typeof SelectPrimitive.ScrollDownButton>) {\n  return (\n    <SelectPrimitive.ScrollDownButton\n      data-slot=\"select-scroll-down-button\"\n      className={cn(\n        \"bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4\",\n        className,\n      )}\n      {...props}\n    >\n      <ChevronDownIcon />\n    </SelectPrimitive.ScrollDownButton>\n  );\n}\n\nexport {\n  Select,\n  SelectContent,\n  SelectGroup,\n  SelectItem,\n  SelectLabel,\n  SelectScrollDownButton,\n  SelectScrollUpButton,\n  SelectSeparator,\n  SelectTrigger,\n  SelectValue,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/separator.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Separator as SeparatorPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Separator({\n  className,\n  orientation = \"horizontal\",\n  decorative = true,\n  ...props\n}: React.ComponentProps<typeof SeparatorPrimitive.Root>) {\n  return (\n    <SeparatorPrimitive.Root\n      data-slot=\"separator\"\n      decorative={decorative}\n      orientation={orientation}\n      className={cn(\n        \"bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:w-px data-[orientation=vertical]:self-stretch\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport { Separator };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sheet.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Dialog as SheetPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport { XIcon } from \"lucide-react\";\n\nfunction Sheet({ ...props }: React.ComponentProps<typeof SheetPrimitive.Root>) {\n  return <SheetPrimitive.Root data-slot=\"sheet\" {...props} />;\n}\n\nfunction SheetTrigger({\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Trigger>) {\n  return <SheetPrimitive.Trigger data-slot=\"sheet-trigger\" {...props} />;\n}\n\nfunction SheetClose({\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Close>) {\n  return <SheetPrimitive.Close data-slot=\"sheet-close\" {...props} />;\n}\n\nfunction SheetPortal({\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Portal>) {\n  return <SheetPrimitive.Portal data-slot=\"sheet-portal\" {...props} />;\n}\n\nfunction SheetOverlay({\n  className,\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Overlay>) {\n  return (\n    <SheetPrimitive.Overlay\n      data-slot=\"sheet-overlay\"\n      className={cn(\n        \"data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/10 duration-100 data-ending-style:opacity-0 data-starting-style:opacity-0 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SheetContent({\n  className,\n  children,\n  side = \"right\",\n  showCloseButton = true,\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Content> & {\n  side?: \"top\" | \"right\" | \"bottom\" | \"left\";\n  showCloseButton?: boolean;\n}) {\n  return (\n    <SheetPortal>\n      <SheetOverlay />\n      <SheetPrimitive.Content\n        data-slot=\"sheet-content\"\n        data-side={side}\n        className={cn(\n          \"bg-background data-open:animate-in data-closed:animate-out data-[side=right]:data-closed:slide-out-to-right-10 data-[side=right]:data-open:slide-in-from-right-10 data-[side=left]:data-closed:slide-out-to-left-10 data-[side=left]:data-open:slide-in-from-left-10 data-[side=top]:data-closed:slide-out-to-top-10 data-[side=top]:data-open:slide-in-from-top-10 data-closed:fade-out-0 data-open:fade-in-0 data-[side=bottom]:data-closed:slide-out-to-bottom-10 data-[side=bottom]:data-open:slide-in-from-bottom-10 fixed z-50 flex flex-col gap-4 bg-clip-padding text-sm shadow-lg transition duration-200 ease-in-out data-[side=bottom]:inset-x-0 data-[side=bottom]:bottom-0 data-[side=bottom]:h-auto data-[side=bottom]:border-t data-[side=left]:inset-y-0 data-[side=left]:left-0 data-[side=left]:h-full data-[side=left]:w-3/4 data-[side=left]:border-r data-[side=right]:inset-y-0 data-[side=right]:right-0 data-[side=right]:h-full data-[side=right]:w-3/4 data-[side=right]:border-l data-[side=top]:inset-x-0 data-[side=top]:top-0 data-[side=top]:h-auto data-[side=top]:border-b data-[side=left]:sm:max-w-sm data-[side=right]:sm:max-w-sm\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n        {showCloseButton && (\n          <SheetPrimitive.Close data-slot=\"sheet-close\" asChild>\n            <Button\n              variant=\"ghost\"\n              className=\"absolute top-3 right-3\"\n              size=\"icon-sm\"\n            >\n              <XIcon />\n              <span className=\"sr-only\">Close</span>\n            </Button>\n          </SheetPrimitive.Close>\n        )}\n      </SheetPrimitive.Content>\n    </SheetPortal>\n  );\n}\n\nfunction SheetHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sheet-header\"\n      className={cn(\"gap-0.5 p-4 flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SheetFooter({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sheet-footer\"\n      className={cn(\"gap-2 p-4 mt-auto flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SheetTitle({\n  className,\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Title>) {\n  return (\n    <SheetPrimitive.Title\n      data-slot=\"sheet-title\"\n      className={cn(\"text-foreground text-base font-medium\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SheetDescription({\n  className,\n  ...props\n}: React.ComponentProps<typeof SheetPrimitive.Description>) {\n  return (\n    <SheetPrimitive.Description\n      data-slot=\"sheet-description\"\n      className={cn(\"text-muted-foreground text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Sheet,\n  SheetTrigger,\n  SheetClose,\n  SheetContent,\n  SheetHeader,\n  SheetFooter,\n  SheetTitle,\n  SheetDescription,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sidebar.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Slot } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@/components/ui/button\";\nimport { Input } from \"@/components/ui/input\";\nimport { Separator } from \"@/components/ui/separator\";\nimport {\n  Sheet,\n  SheetContent,\n  SheetDescription,\n  SheetHeader,\n  SheetTitle,\n} from \"@/components/ui/sheet\";\nimport { Skeleton } from \"@/components/ui/skeleton\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport { useIsMobile } from \"@/hooks/use-mobile\";\nimport { PanelLeftIcon } from \"lucide-react\";\n\nconst SIDEBAR_COOKIE_NAME = \"sidebar_state\";\nconst SIDEBAR_COOKIE_MAX_AGE = 60 * 60 * 24 * 7;\nconst SIDEBAR_WIDTH = \"16rem\";\nconst SIDEBAR_WIDTH_MOBILE = \"18rem\";\nconst SIDEBAR_WIDTH_ICON = \"3rem\";\nconst SIDEBAR_KEYBOARD_SHORTCUT = \"b\";\n\ntype SidebarContextProps = {\n  state: \"expanded\" | \"collapsed\";\n  open: boolean;\n  setOpen: (open: boolean) => void;\n  openMobile: boolean;\n  setOpenMobile: (open: boolean) => void;\n  isMobile: boolean;\n  toggleSidebar: () => void;\n};\n\nconst SidebarContext = React.createContext<SidebarContextProps | null>(null);\n\nfunction useSidebar() {\n  const context = React.useContext(SidebarContext);\n  if (!context) {\n    throw new Error(\"useSidebar must be used within a SidebarProvider.\");\n  }\n\n  return context;\n}\n\nfunction SidebarProvider({\n  defaultOpen = true,\n  open: openProp,\n  onOpenChange: setOpenProp,\n  className,\n  style,\n  children,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  defaultOpen?: boolean;\n  open?: boolean;\n  onOpenChange?: (open: boolean) => void;\n}) {\n  const isMobile = useIsMobile();\n  const [openMobile, setOpenMobile] = React.useState(false);\n\n  // This is the internal state of the sidebar.\n  // We use openProp and setOpenProp for control from outside the component.\n  const [_open, _setOpen] = React.useState(defaultOpen);\n  const open = openProp ?? _open;\n  const setOpen = React.useCallback(\n    (value: boolean | ((value: boolean) => boolean)) => {\n      const openState = typeof value === \"function\" ? value(open) : value;\n      if (setOpenProp) {\n        setOpenProp(openState);\n      } else {\n        _setOpen(openState);\n      }\n\n      // This sets the cookie to keep the sidebar state.\n      document.cookie = `${SIDEBAR_COOKIE_NAME}=${openState}; path=/; max-age=${SIDEBAR_COOKIE_MAX_AGE}`;\n    },\n    [setOpenProp, open],\n  );\n\n  // Helper to toggle the sidebar.\n  const toggleSidebar = React.useCallback(() => {\n    return isMobile ? setOpenMobile((open) => !open) : setOpen((open) => !open);\n  }, [isMobile, setOpen, setOpenMobile]);\n\n  // Adds a keyboard shortcut to toggle the sidebar.\n  React.useEffect(() => {\n    const handleKeyDown = (event: KeyboardEvent) => {\n      if (\n        event.key === SIDEBAR_KEYBOARD_SHORTCUT &&\n        (event.metaKey || event.ctrlKey)\n      ) {\n        event.preventDefault();\n        toggleSidebar();\n      }\n    };\n\n    window.addEventListener(\"keydown\", handleKeyDown);\n    return () => window.removeEventListener(\"keydown\", handleKeyDown);\n  }, [toggleSidebar]);\n\n  // We add a state so that we can do data-state=\"expanded\" or \"collapsed\".\n  // This makes it easier to style the sidebar with Tailwind classes.\n  const state = open ? \"expanded\" : \"collapsed\";\n\n  const contextValue = React.useMemo<SidebarContextProps>(\n    () => ({\n      state,\n      open,\n      setOpen,\n      isMobile,\n      openMobile,\n      setOpenMobile,\n      toggleSidebar,\n    }),\n    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar],\n  );\n\n  return (\n    <SidebarContext.Provider value={contextValue}>\n      <div\n        data-slot=\"sidebar-wrapper\"\n        style={\n          {\n            \"--sidebar-width\": SIDEBAR_WIDTH,\n            \"--sidebar-width-icon\": SIDEBAR_WIDTH_ICON,\n            ...style,\n          } as React.CSSProperties\n        }\n        className={cn(\n          \"group/sidebar-wrapper has-data-[variant=inset]:bg-sidebar flex min-h-svh w-full\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n      </div>\n    </SidebarContext.Provider>\n  );\n}\n\nfunction Sidebar({\n  side = \"left\",\n  variant = \"sidebar\",\n  collapsible = \"offExamples\",\n  className,\n  children,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  side?: \"left\" | \"right\";\n  variant?: \"sidebar\" | \"floating\" | \"inset\";\n  collapsible?: \"offExamples\" | \"icon\" | \"none\";\n}) {\n  const { isMobile, state, openMobile, setOpenMobile } = useSidebar();\n\n  if (collapsible === \"none\") {\n    return (\n      <div\n        data-slot=\"sidebar\"\n        className={cn(\n          \"bg-sidebar text-sidebar-foreground flex h-full w-(--sidebar-width) flex-col\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n      </div>\n    );\n  }\n\n  if (isMobile) {\n    return (\n      <Sheet open={openMobile} onOpenChange={setOpenMobile} {...props}>\n        <SheetContent\n          data-sidebar=\"sidebar\"\n          data-slot=\"sidebar\"\n          data-mobile=\"true\"\n          className=\"bg-sidebar text-sidebar-foreground w-(--sidebar-width) p-0 [&>button]:hidden\"\n          style={\n            {\n              \"--sidebar-width\": SIDEBAR_WIDTH_MOBILE,\n            } as React.CSSProperties\n          }\n          side={side}\n        >\n          <SheetHeader className=\"sr-only\">\n            <SheetTitle>Sidebar</SheetTitle>\n            <SheetDescription>Displays the mobile sidebar.</SheetDescription>\n          </SheetHeader>\n          <div className=\"flex h-full w-full flex-col\">{children}</div>\n        </SheetContent>\n      </Sheet>\n    );\n  }\n\n  return (\n    <div\n      className=\"group peer text-sidebar-foreground hidden md:block\"\n      data-state={state}\n      data-collapsible={state === \"collapsed\" ? collapsible : \"\"}\n      data-variant={variant}\n      data-side={side}\n      data-slot=\"sidebar\"\n    >\n      {/* This is what handles the sidebar gap on desktop */}\n      <div\n        data-slot=\"sidebar-gap\"\n        className={cn(\n          \"transition-[width] duration-200 ease-linear relative w-(--sidebar-width) bg-transparent\",\n          \"group-data-[collapsible=offExamples]:w-0\",\n          \"group-data-[side=right]:rotate-180\",\n          variant === \"floating\" || variant === \"inset\"\n            ? \"group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4)))]\"\n            : \"group-data-[collapsible=icon]:w-(--sidebar-width-icon)\",\n        )}\n      />\n      <div\n        data-slot=\"sidebar-container\"\n        className={cn(\n          \"fixed inset-y-0 z-10 hidden h-svh w-(--sidebar-width) transition-[left,right,width] duration-200 ease-linear md:flex\",\n          side === \"left\"\n            ? \"left-0 group-data-[collapsible=offExamples]:left-[calc(var(--sidebar-width)*-1)]\"\n            : \"right-0 group-data-[collapsible=offExamples]:right-[calc(var(--sidebar-width)*-1)]\",\n          // Adjust the padding for floating and inset variants.\n          variant === \"floating\" || variant === \"inset\"\n            ? \"p-2 group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4))+2px)]\"\n            : \"group-data-[collapsible=icon]:w-(--sidebar-width-icon) group-data-[side=left]:border-r group-data-[side=right]:border-l\",\n          className,\n        )}\n        {...props}\n      >\n        <div\n          data-sidebar=\"sidebar\"\n          data-slot=\"sidebar-inner\"\n          className=\"bg-sidebar group-data-[variant=floating]:ring-sidebar-border group-data-[variant=floating]:rounded-lg group-data-[variant=floating]:shadow-sm group-data-[variant=floating]:ring-1 flex size-full flex-col\"\n        >\n          {children}\n        </div>\n      </div>\n    </div>\n  );\n}\n\nfunction SidebarTrigger({\n  className,\n  onClick,\n  ...props\n}: React.ComponentProps<typeof Button>) {\n  const { toggleSidebar } = useSidebar();\n\n  return (\n    <Button\n      data-sidebar=\"trigger\"\n      data-slot=\"sidebar-trigger\"\n      variant=\"ghost\"\n      size=\"icon-sm\"\n      className={cn(className)}\n      onClick={(event) => {\n        onClick?.(event);\n        toggleSidebar();\n      }}\n      {...props}\n    >\n      <PanelLeftIcon />\n      <span className=\"sr-only\">Toggle Sidebar</span>\n    </Button>\n  );\n}\n\nfunction SidebarRail({ className, ...props }: React.ComponentProps<\"button\">) {\n  const { toggleSidebar } = useSidebar();\n\n  return (\n    <button\n      data-sidebar=\"rail\"\n      data-slot=\"sidebar-rail\"\n      aria-label=\"Toggle Sidebar\"\n      tabIndex={-1}\n      onClick={toggleSidebar}\n      title=\"Toggle Sidebar\"\n      className={cn(\n        \"hover:after:bg-sidebar-border absolute inset-y-0 z-20 hidden w-4 -translate-x-1/2 transition-all ease-linear group-data-[side=left]:-right-4 group-data-[side=right]:left-0 after:absolute after:inset-y-0 after:left-1/2 after:w-[2px] sm:flex\",\n        \"in-data-[side=left]:cursor-w-resize in-data-[side=right]:cursor-e-resize\",\n        \"[[data-side=left][data-state=collapsed]_&]:cursor-e-resize [[data-side=right][data-state=collapsed]_&]:cursor-w-resize\",\n        \"hover:group-data-[collapsible=offExamples]:bg-sidebar group-data-[collapsible=offExamples]:translate-x-0 group-data-[collapsible=offExamples]:after:left-full\",\n        \"[[data-side=left][data-collapsible=offExamples]_&]:-right-2\",\n        \"[[data-side=right][data-collapsible=offExamples]_&]:-left-2\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarInset({ className, ...props }: React.ComponentProps<\"main\">) {\n  return (\n    <main\n      data-slot=\"sidebar-inset\"\n      className={cn(\n        \"bg-background md:peer-data-[variant=inset]:m-2 md:peer-data-[variant=inset]:ml-0 md:peer-data-[variant=inset]:rounded-xl md:peer-data-[variant=inset]:shadow-sm md:peer-data-[variant=inset]:peer-data-[state=collapsed]:ml-2 relative flex w-full flex-1 flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarInput({\n  className,\n  ...props\n}: React.ComponentProps<typeof Input>) {\n  return (\n    <Input\n      data-slot=\"sidebar-input\"\n      data-sidebar=\"input\"\n      className={cn(\"bg-background h-8 w-full shadow-none\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarHeader({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-header\"\n      data-sidebar=\"header\"\n      className={cn(\"gap-2 p-2 flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarFooter({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-footer\"\n      data-sidebar=\"footer\"\n      className={cn(\"gap-2 p-2 flex flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarSeparator({\n  className,\n  ...props\n}: React.ComponentProps<typeof Separator>) {\n  return (\n    <Separator\n      data-slot=\"sidebar-separator\"\n      data-sidebar=\"separator\"\n      className={cn(\"bg-sidebar-border mx-2 w-auto\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarContent({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-content\"\n      data-sidebar=\"content\"\n      className={cn(\n        \"no-scrollbar gap-0 flex min-h-0 flex-1 flex-col overflow-auto group-data-[collapsible=icon]:overflow-hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarGroup({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-group\"\n      data-sidebar=\"group\"\n      className={cn(\"p-2 relative flex w-full min-w-0 flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarGroupLabel({\n  className,\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"div\"> & { asChild?: boolean }) {\n  const Comp = asChild ? Slot.Root : \"div\";\n\n  return (\n    <Comp\n      data-slot=\"sidebar-group-label\"\n      data-sidebar=\"group-label\"\n      className={cn(\n        \"text-sidebar-foreground/70 ring-sidebar-ring h-8 rounded-md px-2 text-xs font-medium transition-[margin,opacity] duration-200 ease-linear group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0 focus-visible:ring-2 [&>svg]:size-4 flex shrink-0 items-center outline-hidden [&>svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarGroupAction({\n  className,\n  asChild = false,\n  ...props\n}: React.ComponentProps<\"button\"> & { asChild?: boolean }) {\n  const Comp = asChild ? Slot.Root : \"button\";\n\n  return (\n    <Comp\n      data-slot=\"sidebar-group-action\"\n      data-sidebar=\"group-action\"\n      className={cn(\n        \"text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground absolute top-3.5 right-3 w-5 rounded-md p-0 focus-visible:ring-2 [&>svg]:size-4 flex aspect-square items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarGroupContent({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-group-content\"\n      data-sidebar=\"group-content\"\n      className={cn(\"text-sm w-full\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenu({ className, ...props }: React.ComponentProps<\"ul\">) {\n  return (\n    <ul\n      data-slot=\"sidebar-menu\"\n      data-sidebar=\"menu\"\n      className={cn(\"gap-0 flex w-full min-w-0 flex-col\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenuItem({ className, ...props }: React.ComponentProps<\"li\">) {\n  return (\n    <li\n      data-slot=\"sidebar-menu-item\"\n      data-sidebar=\"menu-item\"\n      className={cn(\"group/menu-item relative\", className)}\n      {...props}\n    />\n  );\n}\n\nconst sidebarMenuButtonVariants = cva(\n  \"ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground data-open:hover:bg-sidebar-accent data-open:hover:text-sidebar-accent-foreground gap-2 rounded-md p-2 text-left text-sm transition-[width,height,padding] group-has-data-[sidebar=menu-action]/menu-item:pr-8 group-data-[collapsible=icon]:size-8! group-data-[collapsible=icon]:p-2! focus-visible:ring-2 data-active:font-medium peer/menu-button flex w-full items-center overflow-hidden outline-hidden disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&_svg]:size-4 [&_svg]:shrink-0\",\n  {\n    variants: {\n      variant: {\n        default: \"hover:bg-sidebar-accent hover:text-sidebar-accent-foreground\",\n        outline:\n          \"bg-background hover:bg-sidebar-accent hover:text-sidebar-accent-foreground shadow-[0_0_0_1px_hsl(var(--sidebar-border))] hover:shadow-[0_0_0_1px_hsl(var(--sidebar-accent))]\",\n      },\n      size: {\n        default: \"h-8 text-sm\",\n        sm: \"h-7 text-xs\",\n        lg: \"h-12 text-sm group-data-[collapsible=icon]:p-0!\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n      size: \"default\",\n    },\n  },\n);\n\nfunction SidebarMenuButton({\n  asChild = false,\n  isActive = false,\n  variant = \"default\",\n  size = \"default\",\n  tooltip,\n  className,\n  ...props\n}: React.ComponentProps<\"button\"> & {\n  asChild?: boolean;\n  isActive?: boolean;\n  tooltip?: string | React.ComponentProps<typeof TooltipContent>;\n} & VariantProps<typeof sidebarMenuButtonVariants>) {\n  const Comp = asChild ? Slot.Root : \"button\";\n  const { isMobile, state } = useSidebar();\n\n  const button = (\n    <Comp\n      data-slot=\"sidebar-menu-button\"\n      data-sidebar=\"menu-button\"\n      data-size={size}\n      data-active={isActive}\n      className={cn(sidebarMenuButtonVariants({ variant, size }), className)}\n      {...props}\n    />\n  );\n\n  if (!tooltip) {\n    return button;\n  }\n\n  if (typeof tooltip === \"string\") {\n    tooltip = {\n      children: tooltip,\n    };\n  }\n\n  return (\n    <Tooltip>\n      <TooltipTrigger asChild>{button}</TooltipTrigger>\n      <TooltipContent\n        side=\"right\"\n        align=\"center\"\n        hidden={state !== \"collapsed\" || isMobile}\n        {...tooltip}\n      />\n    </Tooltip>\n  );\n}\n\nfunction SidebarMenuAction({\n  className,\n  asChild = false,\n  showOnHover = false,\n  ...props\n}: React.ComponentProps<\"button\"> & {\n  asChild?: boolean;\n  showOnHover?: boolean;\n}) {\n  const Comp = asChild ? Slot.Root : \"button\";\n\n  return (\n    <Comp\n      data-slot=\"sidebar-menu-action\"\n      data-sidebar=\"menu-action\"\n      className={cn(\n        \"text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground peer-hover/menu-button:text-sidebar-accent-foreground absolute top-1.5 right-1 aspect-square w-5 rounded-md p-0 peer-data-[size=default]/menu-button:top-1.5 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 focus-visible:ring-2 [&>svg]:size-4 flex items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0\",\n        showOnHover &&\n          \"peer-data-active/menu-button:text-sidebar-accent-foreground group-focus-within/menu-item:opacity-100 group-hover/menu-item:opacity-100 data-open:opacity-100 md:opacity-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenuBadge({\n  className,\n  ...props\n}: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"sidebar-menu-badge\"\n      data-sidebar=\"menu-badge\"\n      className={cn(\n        \"text-sidebar-foreground peer-hover/menu-button:text-sidebar-accent-foreground peer-data-active/menu-button:text-sidebar-accent-foreground pointer-events-none absolute right-1 flex h-5 min-w-5 rounded-md px-1 text-xs font-medium peer-data-[size=default]/menu-button:top-1.5 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 flex items-center justify-center tabular-nums select-none group-data-[collapsible=icon]:hidden\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenuSkeleton({\n  className,\n  showIcon = false,\n  ...props\n}: React.ComponentProps<\"div\"> & {\n  showIcon?: boolean;\n}) {\n  // Random width between 50 to 90%.\n  const [width] = React.useState(() => {\n    return `${Math.floor(Math.random() * 40) + 50}%`;\n  });\n\n  return (\n    <div\n      data-slot=\"sidebar-menu-skeleton\"\n      data-sidebar=\"menu-skeleton\"\n      className={cn(\"h-8 gap-2 rounded-md px-2 flex items-center\", className)}\n      {...props}\n    >\n      {showIcon && (\n        <Skeleton\n          className=\"size-4 rounded-md\"\n          data-sidebar=\"menu-skeleton-icon\"\n        />\n      )}\n      <Skeleton\n        className=\"h-4 max-w-(--skeleton-width) flex-1\"\n        data-sidebar=\"menu-skeleton-text\"\n        style={\n          {\n            \"--skeleton-width\": width,\n          } as React.CSSProperties\n        }\n      />\n    </div>\n  );\n}\n\nfunction SidebarMenuSub({ className, ...props }: React.ComponentProps<\"ul\">) {\n  return (\n    <ul\n      data-slot=\"sidebar-menu-sub\"\n      data-sidebar=\"menu-sub\"\n      className={cn(\n        \"border-sidebar-border mx-3.5 translate-x-px gap-1 border-l px-2.5 py-0.5 group-data-[collapsible=icon]:hidden flex min-w-0 flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenuSubItem({\n  className,\n  ...props\n}: React.ComponentProps<\"li\">) {\n  return (\n    <li\n      data-slot=\"sidebar-menu-sub-item\"\n      data-sidebar=\"menu-sub-item\"\n      className={cn(\"group/menu-sub-item relative\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction SidebarMenuSubButton({\n  asChild = false,\n  size = \"md\",\n  isActive = false,\n  className,\n  ...props\n}: React.ComponentProps<\"a\"> & {\n  asChild?: boolean;\n  size?: \"sm\" | \"md\";\n  isActive?: boolean;\n}) {\n  const Comp = asChild ? Slot.Root : \"a\";\n\n  return (\n    <Comp\n      data-slot=\"sidebar-menu-sub-button\"\n      data-sidebar=\"menu-sub-button\"\n      data-size={size}\n      data-active={isActive}\n      className={cn(\n        \"text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground [&>svg]:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground h-7 gap-2 rounded-md px-2 focus-visible:ring-2 data-[size=md]:text-sm data-[size=sm]:text-xs [&>svg]:size-4 flex min-w-0 -translate-x-px items-center overflow-hidden outline-hidden group-data-[collapsible=icon]:hidden disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&>svg]:shrink-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Sidebar,\n  SidebarContent,\n  SidebarFooter,\n  SidebarGroup,\n  SidebarGroupAction,\n  SidebarGroupContent,\n  SidebarGroupLabel,\n  SidebarHeader,\n  SidebarInput,\n  SidebarInset,\n  SidebarMenu,\n  SidebarMenuAction,\n  SidebarMenuBadge,\n  SidebarMenuButton,\n  SidebarMenuItem,\n  SidebarMenuSkeleton,\n  SidebarMenuSub,\n  SidebarMenuSubButton,\n  SidebarMenuSubItem,\n  SidebarProvider,\n  SidebarRail,\n  SidebarSeparator,\n  SidebarTrigger,\n  useSidebar,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/skeleton.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nfunction Skeleton({ className, ...props }: React.ComponentProps<\"div\">) {\n  return (\n    <div\n      data-slot=\"skeleton\"\n      className={cn(\"bg-muted rounded-md animate-pulse\", className)}\n      {...props}\n    />\n  );\n}\n\nexport { Skeleton };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/slider.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Slider as SliderPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Slider({\n  className,\n  defaultValue,\n  value,\n  min = 0,\n  max = 100,\n  ...props\n}: React.ComponentProps<typeof SliderPrimitive.Root>) {\n  const _values = React.useMemo(\n    () =>\n      Array.isArray(value)\n        ? value\n        : Array.isArray(defaultValue)\n          ? defaultValue\n          : [min, max],\n    [value, defaultValue, min, max],\n  );\n\n  return (\n    <SliderPrimitive.Root\n      data-slot=\"slider\"\n      defaultValue={defaultValue}\n      value={value}\n      min={min}\n      max={max}\n      className={cn(\n        \"data-vertical:min-h-40 relative flex w-full touch-none items-center select-none data-disabled:opacity-50 data-vertical:h-full data-vertical:w-auto data-vertical:flex-col\",\n        className,\n      )}\n      {...props}\n    >\n      <SliderPrimitive.Track\n        data-slot=\"slider-track\"\n        className=\"bg-muted rounded-full data-horizontal:h-1 data-horizontal:w-full data-vertical:h-full data-vertical:w-1 bg-muted relative grow overflow-hidden data-horizontal:w-full data-vertical:h-full\"\n      >\n        <SliderPrimitive.Range\n          data-slot=\"slider-range\"\n          className=\"bg-primary absolute select-none data-horizontal:h-full data-vertical:w-full\"\n        />\n      </SliderPrimitive.Track>\n      {Array.from({ length: _values.length }, (_, index) => (\n        <SliderPrimitive.Thumb\n          data-slot=\"slider-thumb\"\n          key={index}\n          className=\"border-ring ring-ring/50 relative size-3 rounded-full border bg-white transition-[color,box-shadow] after:absolute after:-inset-2 hover:ring-[3px] focus-visible:ring-[3px] focus-visible:outline-hidden active:ring-[3px] block shrink-0 select-none disabled:pointer-events-none disabled:opacity-50\"\n        />\n      ))}\n    </SliderPrimitive.Root>\n  );\n}\n\nexport { Slider };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/sonner.tsx",
    "content": "\"use client\";\n\nimport { useTheme } from \"next-themes\";\nimport { Toaster as Sonner, type ToasterProps } from \"sonner\";\nimport {\n  CircleCheckIcon,\n  InfoIcon,\n  TriangleAlertIcon,\n  OctagonXIcon,\n  Loader2Icon,\n} from \"lucide-react\";\n\nconst Toaster = ({ ...props }: ToasterProps) => {\n  const { theme = \"system\" } = useTheme();\n\n  return (\n    <Sonner\n      theme={theme as ToasterProps[\"theme\"]}\n      className=\"toaster group\"\n      icons={{\n        success: <CircleCheckIcon className=\"size-4\" />,\n        info: <InfoIcon className=\"size-4\" />,\n        warning: <TriangleAlertIcon className=\"size-4\" />,\n        error: <OctagonXIcon className=\"size-4\" />,\n        loading: <Loader2Icon className=\"size-4 animate-spin\" />,\n      }}\n      style={\n        {\n          \"--normal-bg\": \"var(--popover)\",\n          \"--normal-text\": \"var(--popover-foreground)\",\n          \"--normal-border\": \"var(--border)\",\n          \"--border-radius\": \"var(--radius)\",\n        } as React.CSSProperties\n      }\n      toastOptions={{\n        classNames: {\n          toast: \"cn-toast\",\n        },\n      }}\n      {...props}\n    />\n  );\n};\n\nexport { Toaster };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/spinner.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport { Loader2Icon } from \"lucide-react\";\n\nfunction Spinner({ className, ...props }: React.ComponentProps<\"svg\">) {\n  return (\n    <Loader2Icon\n      role=\"status\"\n      aria-label=\"Loading\"\n      className={cn(\"size-4 animate-spin\", className)}\n      {...props}\n    />\n  );\n}\n\nexport { Spinner };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/switch.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Switch as SwitchPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Switch({\n  className,\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof SwitchPrimitive.Root> & {\n  size?: \"sm\" | \"default\";\n}) {\n  return (\n    <SwitchPrimitive.Root\n      data-slot=\"switch\"\n      data-size={size}\n      className={cn(\n        \"data-checked:bg-primary data-unchecked:bg-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 dark:data-unchecked:bg-input/80 shrink-0 rounded-full border border-transparent focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-[18.4px] data-[size=default]:w-[32px] data-[size=sm]:h-[14px] data-[size=sm]:w-[24px] peer group/switch relative inline-flex items-center transition-all outline-none after:absolute after:-inset-x-3 after:-inset-y-2 data-disabled:cursor-not-allowed data-disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    >\n      <SwitchPrimitive.Thumb\n        data-slot=\"switch-thumb\"\n        className=\"bg-background dark:data-unchecked:bg-foreground dark:data-checked:bg-primary-foreground rounded-full group-data-[size=default]/switch:size-4 group-data-[size=sm]/switch:size-3 group-data-[size=default]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=sm]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=default]/switch:data-unchecked:translate-x-0 group-data-[size=sm]/switch:data-unchecked:translate-x-0 pointer-events-none block ring-0 transition-transform\"\n      />\n    </SwitchPrimitive.Root>\n  );\n}\n\nexport { Switch };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/table.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Table({ className, ...props }: React.ComponentProps<\"table\">) {\n  return (\n    <div\n      data-slot=\"table-container\"\n      className=\"relative w-full overflow-x-auto\"\n    >\n      <table\n        data-slot=\"table\"\n        className={cn(\"w-full caption-bottom text-sm\", className)}\n        {...props}\n      />\n    </div>\n  );\n}\n\nfunction TableHeader({ className, ...props }: React.ComponentProps<\"thead\">) {\n  return (\n    <thead\n      data-slot=\"table-header\"\n      className={cn(\"[&_tr]:border-b\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction TableBody({ className, ...props }: React.ComponentProps<\"tbody\">) {\n  return (\n    <tbody\n      data-slot=\"table-body\"\n      className={cn(\"[&_tr:last-child]:border-0\", className)}\n      {...props}\n    />\n  );\n}\n\nfunction TableFooter({ className, ...props }: React.ComponentProps<\"tfoot\">) {\n  return (\n    <tfoot\n      data-slot=\"table-footer\"\n      className={cn(\n        \"bg-muted/50 border-t font-medium [&>tr]:last:border-b-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction TableRow({ className, ...props }: React.ComponentProps<\"tr\">) {\n  return (\n    <tr\n      data-slot=\"table-row\"\n      className={cn(\n        \"hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction TableHead({ className, ...props }: React.ComponentProps<\"th\">) {\n  return (\n    <th\n      data-slot=\"table-head\"\n      className={cn(\n        \"text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction TableCell({ className, ...props }: React.ComponentProps<\"td\">) {\n  return (\n    <td\n      data-slot=\"table-cell\"\n      className={cn(\n        \"p-2 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction TableCaption({\n  className,\n  ...props\n}: React.ComponentProps<\"caption\">) {\n  return (\n    <caption\n      data-slot=\"table-caption\"\n      className={cn(\"text-muted-foreground mt-4 text-sm\", className)}\n      {...props}\n    />\n  );\n}\n\nexport {\n  Table,\n  TableHeader,\n  TableBody,\n  TableFooter,\n  TableHead,\n  TableRow,\n  TableCell,\n  TableCaption,\n};\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/tabs.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Tabs as TabsPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Tabs({\n  className,\n  orientation = \"horizontal\",\n  ...props\n}: React.ComponentProps<typeof TabsPrimitive.Root>) {\n  return (\n    <TabsPrimitive.Root\n      data-slot=\"tabs\"\n      data-orientation={orientation}\n      className={cn(\n        \"gap-2 group/tabs flex data-[orientation=horizontal]:flex-col\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nconst tabsListVariants = cva(\n  \"rounded-lg p-[3px] group-data-horizontal/tabs:h-8 data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-muted\",\n        line: \"gap-1 bg-transparent\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  },\n);\n\nfunction TabsList({\n  className,\n  variant = \"default\",\n  ...props\n}: React.ComponentProps<typeof TabsPrimitive.List> &\n  VariantProps<typeof tabsListVariants>) {\n  return (\n    <TabsPrimitive.List\n      data-slot=\"tabs-list\"\n      data-variant={variant}\n      className={cn(tabsListVariants({ variant }), className)}\n      {...props}\n    />\n  );\n}\n\nfunction TabsTrigger({\n  className,\n  ...props\n}: React.ComponentProps<typeof TabsPrimitive.Trigger>) {\n  return (\n    <TabsPrimitive.Trigger\n      data-slot=\"tabs-trigger\"\n      className={cn(\n        \"gap-1.5 rounded-md border border-transparent px-1.5 py-0.5 text-sm font-medium group-data-[variant=default]/tabs-list:data-active:shadow-sm group-data-[variant=line]/tabs-list:data-active:shadow-none [&_svg:not([class*='size-'])]:size-4 focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring text-foreground/60 hover:text-foreground dark:text-muted-foreground dark:hover:text-foreground relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center whitespace-nowrap transition-all group-data-[orientation=vertical]/tabs:w-full group-data-[orientation=vertical]/tabs:justify-start focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n        \"group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-active:bg-transparent dark:group-data-[variant=line]/tabs-list:data-active:border-transparent dark:group-data-[variant=line]/tabs-list:data-active:bg-transparent\",\n        \"data-active:bg-background dark:data-active:text-foreground dark:data-active:border-input dark:data-active:bg-input/30 data-active:text-foreground\",\n        \"after:bg-foreground after:absolute after:opacity-0 after:transition-opacity group-data-[orientation=horizontal]/tabs:after:inset-x-0 group-data-[orientation=horizontal]/tabs:after:bottom-[-5px] group-data-[orientation=horizontal]/tabs:after:h-0.5 group-data-[orientation=vertical]/tabs:after:inset-y-0 group-data-[orientation=vertical]/tabs:after:-right-1 group-data-[orientation=vertical]/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-active:after:opacity-100\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nfunction TabsContent({\n  className,\n  ...props\n}: React.ComponentProps<typeof TabsPrimitive.Content>) {\n  return (\n    <TabsPrimitive.Content\n      data-slot=\"tabs-content\"\n      className={cn(\"text-sm flex-1 outline-none\", className)}\n      {...props}\n    />\n  );\n}\n\nexport { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/textarea.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction Textarea({ className, ...props }: React.ComponentProps<\"textarea\">) {\n  return (\n    <textarea\n      data-slot=\"textarea\"\n      className={cn(\n        \"border-input dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 disabled:bg-input/50 dark:disabled:bg-input/80 rounded-lg border bg-transparent px-2.5 py-2 text-base transition-colors focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm placeholder:text-muted-foreground flex field-sizing-content min-h-16 w-full outline-none disabled:cursor-not-allowed disabled:opacity-50\",\n        className,\n      )}\n      {...props}\n    />\n  );\n}\n\nexport { Textarea };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/toggle-group.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { type VariantProps } from \"class-variance-authority\";\nimport { ToggleGroup as ToggleGroupPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\nimport { toggleVariants } from \"@/components/ui/toggle\";\n\nconst ToggleGroupContext = React.createContext<\n  VariantProps<typeof toggleVariants> & {\n    spacing?: number;\n    orientation?: \"horizontal\" | \"vertical\";\n  }\n>({\n  size: \"default\",\n  variant: \"default\",\n  spacing: 0,\n  orientation: \"horizontal\",\n});\n\nfunction ToggleGroup({\n  className,\n  variant,\n  size,\n  spacing = 0,\n  orientation = \"horizontal\",\n  children,\n  ...props\n}: React.ComponentProps<typeof ToggleGroupPrimitive.Root> &\n  VariantProps<typeof toggleVariants> & {\n    spacing?: number;\n    orientation?: \"horizontal\" | \"vertical\";\n  }) {\n  return (\n    <ToggleGroupPrimitive.Root\n      data-slot=\"toggle-group\"\n      data-variant={variant}\n      data-size={size}\n      data-spacing={spacing}\n      data-orientation={orientation}\n      style={{ \"--gap\": spacing } as React.CSSProperties}\n      className={cn(\n        \"rounded-lg data-[size=sm]:rounded-[min(var(--radius-md),10px)] group/toggle-group flex w-fit flex-row items-center gap-[--spacing(var(--gap))] data-[orientation=vertical]:flex-col data-[orientation=vertical]:items-stretch\",\n        className,\n      )}\n      {...props}\n    >\n      <ToggleGroupContext.Provider\n        value={{ variant, size, spacing, orientation }}\n      >\n        {children}\n      </ToggleGroupContext.Provider>\n    </ToggleGroupPrimitive.Root>\n  );\n}\n\nfunction ToggleGroupItem({\n  className,\n  children,\n  variant = \"default\",\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof ToggleGroupPrimitive.Item> &\n  VariantProps<typeof toggleVariants>) {\n  const context = React.useContext(ToggleGroupContext);\n\n  return (\n    <ToggleGroupPrimitive.Item\n      data-slot=\"toggle-group-item\"\n      data-variant={context.variant || variant}\n      data-size={context.size || size}\n      data-spacing={context.spacing}\n      className={cn(\n        \"group-data-[spacing=0]/toggle-group:rounded-none group-data-[spacing=0]/toggle-group:px-2 group-data-horizontal/toggle-group:data-[spacing=0]:first:rounded-l-lg group-data-vertical/toggle-group:data-[spacing=0]:first:rounded-t-lg group-data-horizontal/toggle-group:data-[spacing=0]:last:rounded-r-lg group-data-vertical/toggle-group:data-[spacing=0]:last:rounded-b-lg shrink-0 focus:z-10 focus-visible:z-10 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:border-l-0 group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:border-t-0 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-l group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-t\",\n        toggleVariants({\n          variant: context.variant || variant,\n          size: context.size || size,\n        }),\n        className,\n      )}\n      {...props}\n    >\n      {children}\n    </ToggleGroupPrimitive.Item>\n  );\n}\n\nexport { ToggleGroup, ToggleGroupItem };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/toggle.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport { Toggle as TogglePrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst toggleVariants = cva(\n  \"hover:text-foreground aria-pressed:bg-muted focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive data-[state=on]:bg-muted gap-1 rounded-lg text-sm font-medium transition-all [&_svg:not([class*='size-'])]:size-4 group/toggle hover:bg-muted inline-flex items-center justify-center whitespace-nowrap outline-none focus-visible:ring-[3px] disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0\",\n  {\n    variants: {\n      variant: {\n        default: \"bg-transparent\",\n        outline: \"border-input hover:bg-muted border bg-transparent\",\n      },\n      size: {\n        default: \"h-8 min-w-8 px-2\",\n        sm: \"h-7 min-w-7 rounded-[min(var(--radius-md),12px)] px-1.5 text-[0.8rem]\",\n        lg: \"h-9 min-w-9 px-2.5\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n      size: \"default\",\n    },\n  },\n);\n\nfunction Toggle({\n  className,\n  variant = \"default\",\n  size = \"default\",\n  ...props\n}: React.ComponentProps<typeof TogglePrimitive.Root> &\n  VariantProps<typeof toggleVariants>) {\n  return (\n    <TogglePrimitive.Root\n      data-slot=\"toggle\"\n      className={cn(toggleVariants({ variant, size, className }))}\n      {...props}\n    />\n  );\n}\n\nexport { Toggle, toggleVariants };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components/ui/tooltip.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { Tooltip as TooltipPrimitive } from \"radix-ui\";\n\nimport { cn } from \"@/lib/utils\";\n\nfunction TooltipProvider({\n  delayDuration = 0,\n  ...props\n}: React.ComponentProps<typeof TooltipPrimitive.Provider>) {\n  return (\n    <TooltipPrimitive.Provider\n      data-slot=\"tooltip-provider\"\n      delayDuration={delayDuration}\n      {...props}\n    />\n  );\n}\n\nfunction Tooltip({\n  ...props\n}: React.ComponentProps<typeof TooltipPrimitive.Root>) {\n  return (\n    <TooltipProvider>\n      <TooltipPrimitive.Root data-slot=\"tooltip\" {...props} />\n    </TooltipProvider>\n  );\n}\n\nfunction TooltipTrigger({\n  ...props\n}: React.ComponentProps<typeof TooltipPrimitive.Trigger>) {\n  return <TooltipPrimitive.Trigger data-slot=\"tooltip-trigger\" {...props} />;\n}\n\nfunction TooltipContent({\n  className,\n  sideOffset = 0,\n  children,\n  ...props\n}: React.ComponentProps<typeof TooltipPrimitive.Content>) {\n  return (\n    <TooltipPrimitive.Portal>\n      <TooltipPrimitive.Content\n        data-slot=\"tooltip-content\"\n        sideOffset={sideOffset}\n        className={cn(\n          \"data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[state=delayed-open]:animate-in data-[state=delayed-open]:fade-in-0 data-[state=delayed-open]:zoom-in-95 data-closed:animate-out data-closed:fade-out-0 data-closed:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 rounded-md px-3 py-1.5 text-xs bg-foreground text-background z-50 w-fit max-w-xs origin-(--radix-tooltip-content-transform-origin)\",\n          className,\n        )}\n        {...props}\n      >\n        {children}\n        <TooltipPrimitive.Arrow className=\"size-2.5 translate-y-[calc(-50%_-_2px)] rotate-45 rounded-[2px] bg-foreground fill-foreground z-50 translate-y-[calc(-50%_-_2px)]\" />\n      </TooltipPrimitive.Content>\n    </TooltipPrimitive.Portal>\n  );\n}\n\nexport { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger };\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/components.json",
    "content": "{\n  \"$schema\": \"https://ui.shadcn.com/schema.json\",\n  \"style\": \"radix-nova\",\n  \"rsc\": true,\n  \"tsx\": true,\n  \"tailwind\": {\n    \"config\": \"\",\n    \"css\": \"app/globals.css\",\n    \"baseColor\": \"neutral\",\n    \"cssVariables\": true,\n    \"prefix\": \"\"\n  },\n  \"iconLibrary\": \"lucide\",\n  \"aliases\": {\n    \"components\": \"@/components\",\n    \"utils\": \"@/lib/utils\",\n    \"ui\": \"@/components/ui\",\n    \"lib\": \"@/lib\",\n    \"hooks\": \"@/hooks\"\n  },\n  \"menuColor\": \"default\",\n  \"menuAccent\": \"subtle\",\n  \"registries\": {}\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/eslint.config.mjs",
    "content": "import { defineConfig, globalIgnores } from \"eslint/config\";\nimport nextVitals from \"eslint-config-next/core-web-vitals\";\nimport nextTs from \"eslint-config-next/typescript\";\n\nconst eslintConfig = defineConfig([\n  ...nextVitals,\n  ...nextTs,\n  // Override default ignores of eslint-config-next.\n  globalIgnores([\n    // Default ignores of eslint-config-next:\n    \".next/**\",\n    \"out/**\",\n    \"build/**\",\n    \"next-env.d.ts\",\n  ]),\n]);\n\nexport default eslintConfig;\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/hooks/use-mobile.ts",
    "content": "import * as React from \"react\";\n\nconst MOBILE_BREAKPOINT = 768;\n\nexport function useIsMobile() {\n  const [isMobile, setIsMobile] = React.useState<boolean | undefined>(\n    undefined,\n  );\n\n  React.useEffect(() => {\n    const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`);\n    const onChange = () => {\n      setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);\n    };\n    mql.addEventListener(\"change\", onChange);\n    setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);\n    return () => mql.removeEventListener(\"change\", onChange);\n  }, []);\n\n  return !!isMobile;\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/lib/utils.ts",
    "content": "import { clsx, type ClassValue } from \"clsx\";\nimport { twMerge } from \"tailwind-merge\";\n\nexport function cn(...inputs: ClassValue[]) {\n  return twMerge(clsx(inputs));\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/next.config.ts",
    "content": "import type { NextConfig } from \"next\";\n\nconst nextConfig: NextConfig = {\n  /* config options here */\n};\n\nexport default nextConfig;\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package.json",
    "content": "{\n  \"name\": \"web\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"dev\": \"next dev\",\n    \"build\": \"next build\",\n    \"start\": \"next start\",\n    \"lint\": \"eslint\"\n  },\n  \"dependencies\": {\n    \"@base-ui/react\": \"^1.1.0\",\n    \"class-variance-authority\": \"^0.7.1\",\n    \"clsx\": \"^2.1.1\",\n    \"cmdk\": \"^1.1.1\",\n    \"date-fns\": \"^4.1.0\",\n    \"embla-carousel-react\": \"^8.6.0\",\n    \"lucide-react\": \"^0.562.0\",\n    \"next\": \"16.1.7\",\n    \"next-themes\": \"^0.4.6\",\n    \"radix-ui\": \"^1.4.3\",\n    \"react\": \"19.2.3\",\n    \"react-day-picker\": \"^9.13.0\",\n    \"react-dom\": \"19.2.3\",\n    \"react-resizable-panels\": \"^4.4.1\",\n    \"recharts\": \"^2.15.4\",\n    \"shadcn\": \"^3.7.0\",\n    \"sonner\": \"^2.0.7\",\n    \"tailwind-merge\": \"^3.4.0\",\n    \"tw-animate-css\": \"^1.4.0\",\n    \"vaul\": \"^1.1.2\"\n  },\n  \"devDependencies\": {\n    \"@tailwindcss/postcss\": \"^4\",\n    \"@types/node\": \"^20\",\n    \"@types/react\": \"^19\",\n    \"@types/react-dom\": \"^19\",\n    \"eslint\": \"^9\",\n    \"eslint-config-next\": \"16.1.4\",\n    \"tailwindcss\": \"^4\",\n    \"typescript\": \"^5\"\n  }\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/postcss.config.mjs",
    "content": "const config = {\n  plugins: {\n    \"@tailwindcss/postcss\": {},\n  },\n};\n\nexport default config;\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/tsconfig.json",
    "content": "{\n  \"compilerOptions\": {\n    \"target\": \"ES2017\",\n    \"lib\": [\"dom\", \"dom.iterable\", \"esnext\"],\n    \"allowJs\": true,\n    \"skipLibCheck\": true,\n    \"strict\": true,\n    \"noEmit\": true,\n    \"esModuleInterop\": true,\n    \"module\": \"esnext\",\n    \"moduleResolution\": \"bundler\",\n    \"resolveJsonModule\": true,\n    \"isolatedModules\": true,\n    \"jsx\": \"react-jsx\",\n    \"incremental\": true,\n    \"plugins\": [\n      {\n        \"name\": \"next\"\n      }\n    ],\n    \"paths\": {\n      \"@/*\": [\"./*\"]\n    }\n  },\n  \"include\": [\n    \"next-env.d.ts\",\n    \"**/*.ts\",\n    \"**/*.tsx\",\n    \".next/types/**/*.ts\",\n    \".next/dev/types/**/*.ts\",\n    \"**/*.mts\"\n  ],\n  \"exclude\": [\"node_modules\"]\n}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/docker/test-job.yaml",
    "content": "# Kubernetes Job to run sandbox integration tests\n#\n# This runs the test pod inside the cluster so it can access sandbox services.\n#\n# Usage:\n#   kubectl apply -f test-job.yaml\n#   kubectl logs -f job/sandbox-test -n onyx-sandboxes\n#   kubectl delete job sandbox-test -n onyx-sandboxes\n\napiVersion: v1\nkind: Pod\nmetadata:\n  name: sandbox-test\n  namespace: onyx-sandboxes\nspec:\n  serviceAccountName: sandbox-runner  # Needs permissions to create/delete pods\n  containers:\n  - name: test\n    image: onyxdotapp/onyx-backend:latest\n    imagePullPolicy: Never  # Use local image, don't try to pull from registry\n    command: [\"sleep\", \"infinity\"]\n    env:\n    - name: SANDBOX_BACKEND\n      value: \"kubernetes\"\n    - name: SANDBOX_NAMESPACE\n      value: \"onyx-sandboxes\"\n    # Add any other required env vars (API keys, DB connection, etc.)\n    # - name: OPENAI_API_KEY\n    #   valueFrom:\n    #     secretKeyRef:\n    #       name: openai-secrets\n    #       key: api-key\n    resources:\n      requests:\n        cpu: \"500m\"\n        memory: \"512Mi\"\n      limits:\n        cpu: \"1000m\"\n        memory: \"1Gi\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/internal/__init__.py",
    "content": "\"\"\"Internal implementation details for Kubernetes sandbox management.\n\nThese modules are implementation details and should only be used by KubernetesSandboxManager.\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (\n    ACPEvent,\n)\n\n__all__ = [\n    \"ACPEvent\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/internal/acp_exec_client.py",
    "content": "\"\"\"ACP client that communicates via kubectl exec into the sandbox pod.\n\nThis client runs `opencode acp` directly in the sandbox pod via kubernetes exec,\nusing stdin/stdout for JSON-RPC communication. This bypasses the HTTP server\nand uses the native ACP subprocess protocol.\n\nEach message creates an ephemeral client (start → resume_or_create_session →\nsend_message → stop) to prevent concurrent processes from corrupting\nopencode's flat file session storage.\n\nUsage:\n    client = ACPExecClient(\n        pod_name=\"sandbox-abc123\",\n        namespace=\"onyx-sandboxes\",\n    )\n    client.start(cwd=\"/workspace\")\n    session_id = client.resume_or_create_session(cwd=\"/workspace/sessions/abc\")\n    for event in client.send_message(\"What files are here?\", session_id=session_id):\n        print(event)\n    client.stop()\n\"\"\"\n\nimport json\nimport shlex\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom dataclasses import dataclass\nfrom dataclasses import field\nfrom queue import Empty\nfrom queue import Queue\nfrom typing import Any\nfrom typing import cast\n\nfrom acp.schema import AgentMessageChunk\nfrom acp.schema import AgentPlanUpdate\nfrom acp.schema import AgentThoughtChunk\nfrom acp.schema import CurrentModeUpdate\nfrom acp.schema import Error\nfrom acp.schema import PromptResponse\nfrom acp.schema import ToolCallProgress\nfrom acp.schema import ToolCallStart\nfrom kubernetes import client  # type: ignore\nfrom kubernetes import config\nfrom kubernetes.stream import stream as k8s_stream  # type: ignore\nfrom kubernetes.stream.ws_client import WSClient  # type: ignore\nfrom pydantic import BaseModel\nfrom pydantic import ValidationError\n\nfrom onyx.server.features.build.api.packet_logger import get_packet_logger\nfrom onyx.server.features.build.configs import ACP_MESSAGE_TIMEOUT\nfrom onyx.server.features.build.configs import SSE_KEEPALIVE_INTERVAL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# ACP Protocol version\nACP_PROTOCOL_VERSION = 1\n\n# Default client info\nDEFAULT_CLIENT_INFO = {\n    \"name\": \"onyx-sandbox-k8s-exec\",\n    \"title\": \"Onyx Sandbox Agent Client (K8s Exec)\",\n    \"version\": \"1.0.0\",\n}\n\n\n@dataclass\nclass SSEKeepalive:\n    \"\"\"Marker event to signal that an SSE keepalive should be sent.\n\n    This is yielded when no ACP events have been received for SSE_KEEPALIVE_INTERVAL\n    seconds, allowing the SSE stream to send a comment to keep the connection alive.\n\n    Note: This is an internal event type - it's consumed by session/manager.py and\n    converted to an SSE comment before leaving that layer. It should not be exposed\n    to external consumers.\n    \"\"\"\n\n\n# Union type for all possible events from send_message\nACPEvent = (\n    AgentMessageChunk\n    | AgentThoughtChunk\n    | ToolCallStart\n    | ToolCallProgress\n    | AgentPlanUpdate\n    | CurrentModeUpdate\n    | PromptResponse\n    | Error\n    | SSEKeepalive\n)\n\n\n@dataclass\nclass ACPSession:\n    \"\"\"Represents an active ACP session.\"\"\"\n\n    session_id: str\n    cwd: str\n\n\n@dataclass\nclass ACPClientState:\n    \"\"\"Internal state for the ACP client.\"\"\"\n\n    initialized: bool = False\n    sessions: dict[str, ACPSession] = field(default_factory=dict)\n    next_request_id: int = 0\n    agent_capabilities: dict[str, Any] = field(default_factory=dict)\n    agent_info: dict[str, Any] = field(default_factory=dict)\n\n\nclass ACPExecClient:\n    \"\"\"ACP client that communicates via kubectl exec.\n\n    Runs `opencode acp` in the sandbox pod and communicates via stdin/stdout\n    through the kubernetes exec stream.\n    \"\"\"\n\n    def __init__(\n        self,\n        pod_name: str,\n        namespace: str,\n        container: str = \"sandbox\",\n        client_info: dict[str, Any] | None = None,\n        client_capabilities: dict[str, Any] | None = None,\n    ) -> None:\n        \"\"\"Initialize the exec-based ACP client.\n\n        Args:\n            pod_name: Name of the sandbox pod\n            namespace: Kubernetes namespace\n            container: Container name within the pod\n            client_info: Client identification info\n            client_capabilities: Client capabilities to advertise\n        \"\"\"\n        self._pod_name = pod_name\n        self._namespace = namespace\n        self._container = container\n        self._client_info = client_info or DEFAULT_CLIENT_INFO\n        self._client_capabilities = client_capabilities or {\n            \"fs\": {\"readTextFile\": True, \"writeTextFile\": True},\n            \"terminal\": True,\n        }\n        self._state = ACPClientState()\n        self._ws_client: WSClient | None = None\n        self._response_queue: Queue[dict[str, Any]] = Queue()\n        self._reader_thread: threading.Thread | None = None\n        self._stop_reader = threading.Event()\n        self._k8s_client: client.CoreV1Api | None = None\n\n    def _get_k8s_client(self) -> client.CoreV1Api:\n        \"\"\"Get or create kubernetes client.\"\"\"\n        if self._k8s_client is None:\n            try:\n                config.load_incluster_config()\n            except config.ConfigException:\n                config.load_kube_config()\n            self._k8s_client = client.CoreV1Api()\n        return self._k8s_client\n\n    def start(self, cwd: str = \"/workspace\", timeout: float = 30.0) -> None:\n        \"\"\"Start the agent process via exec and initialize the ACP connection.\n\n        Only performs the ACP `initialize` handshake. Sessions are created\n        separately via `resume_or_create_session()`.\n\n        Args:\n            cwd: Working directory for the `opencode acp` process\n            timeout: Timeout for initialization\n\n        Raises:\n            RuntimeError: If startup fails\n        \"\"\"\n        if self._ws_client is not None:\n            raise RuntimeError(\"Client already started. Call stop() first.\")\n\n        k8s = self._get_k8s_client()\n\n        # Start opencode acp via exec.\n        # Set XDG_DATA_HOME so opencode stores session data on the shared\n        # workspace volume (accessible from file-sync container for snapshots)\n        # instead of the container-local ~/.local/share/ filesystem.\n        data_dir = shlex.quote(f\"{cwd}/.opencode-data\")\n        safe_cwd = shlex.quote(cwd)\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"XDG_DATA_HOME={data_dir} exec opencode acp --cwd {safe_cwd}\",\n        ]\n\n        logger.info(f\"[ACP] Starting client: pod={self._pod_name} cwd={cwd}\")\n\n        try:\n            self._ws_client = k8s_stream(\n                k8s.connect_get_namespaced_pod_exec,\n                name=self._pod_name,\n                namespace=self._namespace,\n                container=self._container,\n                command=exec_command,\n                stdin=True,\n                stdout=True,\n                stderr=True,\n                tty=False,\n                _preload_content=False,\n                _request_timeout=900,  # 15 minute timeout for long-running sessions\n            )\n\n            # Start reader thread\n            self._stop_reader.clear()\n            self._reader_thread = threading.Thread(\n                target=self._read_responses, daemon=True\n            )\n            self._reader_thread.start()\n\n            # Give process a moment to start\n            time.sleep(0.5)\n\n            # Initialize ACP connection (no session creation)\n            self._initialize(timeout=timeout)\n\n            logger.info(f\"[ACP] Client started: pod={self._pod_name}\")\n        except Exception as e:\n            logger.error(f\"[ACP] Client start failed: pod={self._pod_name} error={e}\")\n            self.stop()\n            raise RuntimeError(f\"Failed to start ACP exec client: {e}\") from e\n\n    def _read_responses(self) -> None:\n        \"\"\"Background thread to read responses from the exec stream.\"\"\"\n        buffer = \"\"\n        packet_logger = get_packet_logger()\n\n        while not self._stop_reader.is_set():\n            if self._ws_client is None:\n                break\n\n            try:\n                if self._ws_client.is_open():\n                    self._ws_client.update(timeout=0.1)\n\n                    # Read stderr - log any agent errors\n                    stderr_data = self._ws_client.read_stderr(timeout=0.01)\n                    if stderr_data:\n                        logger.warning(\n                            f\"[ACP] stderr pod={self._pod_name}: {stderr_data.strip()[:500]}\"\n                        )\n\n                    # Read stdout\n                    data = self._ws_client.read_stdout(timeout=0.1)\n                    if data:\n                        buffer += data\n\n                        while \"\\n\" in buffer:\n                            line, buffer = buffer.split(\"\\n\", 1)\n                            line = line.strip()\n                            if line:\n                                try:\n                                    message = json.loads(line)\n                                    packet_logger.log_jsonrpc_raw_message(\n                                        \"IN\", message, context=\"k8s\"\n                                    )\n                                    self._response_queue.put(message)\n                                except json.JSONDecodeError:\n                                    logger.warning(\n                                        f\"[ACP] Invalid JSON from agent: {line[:100]}\"\n                                    )\n\n                else:\n                    logger.warning(f\"[ACP] WebSocket closed: pod={self._pod_name}\")\n                    break\n\n            except Exception as e:\n                if not self._stop_reader.is_set():\n                    logger.warning(f\"[ACP] Reader error: {e}, pod={self._pod_name}\")\n                break\n\n    def stop(self) -> None:\n        \"\"\"Stop the exec session and clean up.\"\"\"\n        session_ids = list(self._state.sessions.keys())\n        logger.info(\n            f\"[ACP] Stopping client: pod={self._pod_name} sessions={session_ids}\"\n        )\n        self._stop_reader.set()\n\n        if self._ws_client is not None:\n            try:\n                self._ws_client.close()\n            except Exception:\n                pass\n            self._ws_client = None\n\n        if self._reader_thread is not None:\n            self._reader_thread.join(timeout=2.0)\n            self._reader_thread = None\n\n        self._state = ACPClientState()\n\n    def _get_next_id(self) -> int:\n        \"\"\"Get the next request ID.\"\"\"\n        request_id = self._state.next_request_id\n        self._state.next_request_id += 1\n        return request_id\n\n    def _send_request(self, method: str, params: dict[str, Any] | None = None) -> int:\n        \"\"\"Send a JSON-RPC request.\"\"\"\n        if self._ws_client is None or not self._ws_client.is_open():\n            raise RuntimeError(\"Exec session not open\")\n\n        request_id = self._get_next_id()\n        request: dict[str, Any] = {\n            \"jsonrpc\": \"2.0\",\n            \"id\": request_id,\n            \"method\": method,\n        }\n        if params is not None:\n            request[\"params\"] = params\n\n        # Log the outgoing request\n        packet_logger = get_packet_logger()\n        packet_logger.log_jsonrpc_request(method, request_id, params, context=\"k8s\")\n\n        message = json.dumps(request) + \"\\n\"\n        self._ws_client.write_stdin(message)\n\n        return request_id\n\n    def _send_notification(\n        self, method: str, params: dict[str, Any] | None = None\n    ) -> None:\n        \"\"\"Send a JSON-RPC notification (no response expected).\"\"\"\n        if self._ws_client is None or not self._ws_client.is_open():\n            return\n\n        notification: dict[str, Any] = {\n            \"jsonrpc\": \"2.0\",\n            \"method\": method,\n        }\n        if params is not None:\n            notification[\"params\"] = params\n\n        # Log the outgoing notification\n        packet_logger = get_packet_logger()\n        packet_logger.log_jsonrpc_request(method, None, params, context=\"k8s\")\n\n        message = json.dumps(notification) + \"\\n\"\n        self._ws_client.write_stdin(message)\n\n    def _wait_for_response(\n        self, request_id: int, timeout: float = 30.0\n    ) -> dict[str, Any]:\n        \"\"\"Wait for a response to a specific request.\"\"\"\n        start_time = time.time()\n\n        while True:\n            remaining = timeout - (time.time() - start_time)\n            if remaining <= 0:\n                raise RuntimeError(\n                    f\"Timeout waiting for response to request {request_id}\"\n                )\n\n            try:\n                message = self._response_queue.get(timeout=min(remaining, 1.0))\n\n                if message.get(\"id\") == request_id:\n                    if \"error\" in message:\n                        error = message[\"error\"]\n                        raise RuntimeError(\n                            f\"ACP error {error.get('code')}: {error.get('message')}\"\n                        )\n                    return message.get(\"result\", {})\n\n                # Put back messages that aren't our response\n                self._response_queue.put(message)\n\n            except Empty:\n                continue\n\n    def _initialize(self, timeout: float = 30.0) -> dict[str, Any]:\n        \"\"\"Initialize the ACP connection.\"\"\"\n        params = {\n            \"protocolVersion\": ACP_PROTOCOL_VERSION,\n            \"clientCapabilities\": self._client_capabilities,\n            \"clientInfo\": self._client_info,\n        }\n\n        request_id = self._send_request(\"initialize\", params)\n        result = self._wait_for_response(request_id, timeout)\n\n        self._state.initialized = True\n        self._state.agent_capabilities = result.get(\"agentCapabilities\", {})\n        self._state.agent_info = result.get(\"agentInfo\", {})\n\n        return result\n\n    def _create_session(self, cwd: str, timeout: float = 30.0) -> str:\n        \"\"\"Create a new ACP session.\"\"\"\n        params = {\n            \"cwd\": cwd,\n            \"mcpServers\": [],\n        }\n\n        request_id = self._send_request(\"session/new\", params)\n        result = self._wait_for_response(request_id, timeout)\n\n        session_id = result.get(\"sessionId\")\n        if not session_id:\n            raise RuntimeError(\"No session ID returned from session/new\")\n\n        self._state.sessions[session_id] = ACPSession(session_id=session_id, cwd=cwd)\n        logger.info(f\"[ACP] Created session: acp_session={session_id} cwd={cwd}\")\n\n        return session_id\n\n    def _list_sessions(self, cwd: str, timeout: float = 10.0) -> list[dict[str, Any]]:\n        \"\"\"List available ACP sessions, filtered by working directory.\n\n        Returns:\n            List of session info dicts with keys like 'sessionId', 'cwd', 'title'.\n            Empty list if session/list is not supported or fails.\n        \"\"\"\n        try:\n            request_id = self._send_request(\"session/list\", {\"cwd\": cwd})\n            result = self._wait_for_response(request_id, timeout)\n            sessions = result.get(\"sessions\", [])\n            logger.info(f\"[ACP] session/list: {len(sessions)} sessions for cwd={cwd}\")\n            return sessions\n        except Exception as e:\n            logger.info(f\"[ACP] session/list unavailable: {e}\")\n            return []\n\n    def _resume_session(self, session_id: str, cwd: str, timeout: float = 30.0) -> str:\n        \"\"\"Resume an existing ACP session.\n\n        Args:\n            session_id: The ACP session ID to resume\n            cwd: Working directory for the session\n            timeout: Timeout for the resume request\n\n        Returns:\n            The session ID\n\n        Raises:\n            RuntimeError: If resume fails\n        \"\"\"\n        params = {\n            \"sessionId\": session_id,\n            \"cwd\": cwd,\n            \"mcpServers\": [],\n        }\n\n        request_id = self._send_request(\"session/resume\", params)\n        result = self._wait_for_response(request_id, timeout)\n\n        # The response should contain the session ID\n        resumed_id = result.get(\"sessionId\", session_id)\n        self._state.sessions[resumed_id] = ACPSession(session_id=resumed_id, cwd=cwd)\n\n        logger.info(f\"[ACP] Resumed session: acp_session={resumed_id} cwd={cwd}\")\n        return resumed_id\n\n    def _try_resume_existing_session(self, cwd: str, timeout: float) -> str | None:\n        \"\"\"Try to find and resume an existing session for this workspace.\n\n        When multiple API server replicas connect to the same sandbox pod,\n        a previous replica may have already created an ACP session for this\n        workspace. This method discovers and resumes that session so the\n        agent retains conversation context.\n\n        Args:\n            cwd: Working directory to search for sessions\n            timeout: Timeout for ACP requests\n\n        Returns:\n            The resumed session ID, or None if no session could be resumed\n        \"\"\"\n        # List sessions for this workspace directory\n        sessions = self._list_sessions(cwd, timeout=min(timeout, 10.0))\n        if not sessions:\n            return None\n\n        # Pick the most recent session (first in list, assuming sorted)\n        target = sessions[0]\n        target_id = target.get(\"sessionId\")\n        if not target_id:\n            logger.warning(\"[ACP] session/list returned session without sessionId\")\n            return None\n\n        logger.info(\n            f\"[ACP] Resuming existing session: acp_session={target_id} (found {len(sessions)})\"\n        )\n\n        try:\n            return self._resume_session(target_id, cwd, timeout)\n        except Exception as e:\n            logger.warning(\n                f\"[ACP] session/resume failed for {target_id}: {e}, falling back to session/new\"\n            )\n            return None\n\n    def resume_or_create_session(self, cwd: str, timeout: float = 30.0) -> str:\n        \"\"\"Resume a session from opencode's on-disk storage, or create a new one.\n\n        With ephemeral clients (one process per message), this always hits disk.\n        Tries resume first to preserve conversation context, falls back to new.\n\n        Args:\n            cwd: Working directory for the session\n            timeout: Timeout for ACP requests\n\n        Returns:\n            The ACP session ID\n        \"\"\"\n        if not self._state.initialized:\n            raise RuntimeError(\"Client not initialized. Call start() first.\")\n\n        # Try to resume from opencode's persisted storage\n        resumed_id = self._try_resume_existing_session(cwd, timeout)\n        if resumed_id:\n            return resumed_id\n\n        # Create a new session\n        return self._create_session(cwd=cwd, timeout=timeout)\n\n    def send_message(\n        self,\n        message: str,\n        session_id: str,\n        timeout: float = ACP_MESSAGE_TIMEOUT,\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Send a message to a specific session and stream response events.\n\n        Args:\n            message: The message content to send\n            session_id: The ACP session ID to send the message to\n            timeout: Maximum time to wait for complete response (defaults to ACP_MESSAGE_TIMEOUT env var)\n\n        Yields:\n            Typed ACP schema event objects\n        \"\"\"\n        if session_id not in self._state.sessions:\n            raise RuntimeError(\n                f\"Unknown session {session_id}. Known sessions: {list(self._state.sessions.keys())}\"\n            )\n        packet_logger = get_packet_logger()\n\n        logger.info(\n            f\"[ACP] Sending prompt: acp_session={session_id} pod={self._pod_name} queue_backlog={self._response_queue.qsize()}\"\n        )\n\n        prompt_content = [{\"type\": \"text\", \"text\": message}]\n        params = {\n            \"sessionId\": session_id,\n            \"prompt\": prompt_content,\n        }\n\n        request_id = self._send_request(\"session/prompt\", params)\n        start_time = time.time()\n        last_event_time = time.time()\n        events_yielded = 0\n        keepalive_count = 0\n        completion_reason = \"unknown\"\n\n        while True:\n            remaining = timeout - (time.time() - start_time)\n            if remaining <= 0:\n                completion_reason = \"timeout\"\n                logger.warning(\n                    f\"[ACP] Prompt timeout: acp_session={session_id} events={events_yielded}, sending session/cancel\"\n                )\n                try:\n                    self.cancel(session_id=session_id)\n                except Exception as cancel_err:\n                    logger.warning(\n                        f\"[ACP] session/cancel failed on timeout: {cancel_err}\"\n                    )\n                yield Error(code=-1, message=\"Timeout waiting for response\")\n                break\n\n            try:\n                message_data = self._response_queue.get(timeout=min(remaining, 1.0))\n                last_event_time = time.time()\n            except Empty:\n                # Send SSE keepalive if idle\n                idle_time = time.time() - last_event_time\n                if idle_time >= SSE_KEEPALIVE_INTERVAL:\n                    keepalive_count += 1\n                    yield SSEKeepalive()\n                    last_event_time = time.time()\n                continue\n\n            # Check for JSON-RPC response to our prompt request.\n            msg_id = message_data.get(\"id\")\n            is_response = \"method\" not in message_data and (\n                msg_id == request_id\n                or (msg_id is not None and str(msg_id) == str(request_id))\n            )\n            if is_response:\n                completion_reason = \"jsonrpc_response\"\n                if \"error\" in message_data:\n                    error_data = message_data[\"error\"]\n                    completion_reason = \"jsonrpc_error\"\n                    logger.warning(f\"[ACP] Prompt error: {error_data}\")\n                    packet_logger.log_jsonrpc_response(\n                        request_id, error=error_data, context=\"k8s\"\n                    )\n                    yield Error(\n                        code=error_data.get(\"code\", -1),\n                        message=error_data.get(\"message\", \"Unknown error\"),\n                    )\n                else:\n                    result = message_data.get(\"result\", {})\n                    packet_logger.log_jsonrpc_response(\n                        request_id, result=result, context=\"k8s\"\n                    )\n                    try:\n                        prompt_response = PromptResponse.model_validate(result)\n                        events_yielded += 1\n                        yield prompt_response\n                    except ValidationError as e:\n                        logger.error(f\"[ACP] PromptResponse validation failed: {e}\")\n\n                elapsed_ms = (time.time() - start_time) * 1000\n                logger.info(\n                    f\"[ACP] Prompt complete: \"\n                    f\"reason={completion_reason} acp_session={session_id} \"\n                    f\"events={events_yielded} elapsed={elapsed_ms:.0f}ms\"\n                )\n                break\n\n            # Handle notifications (session/update)\n            if message_data.get(\"method\") == \"session/update\":\n                params_data = message_data.get(\"params\", {})\n                update = params_data.get(\"update\", {})\n\n                prompt_complete = False\n                for event in self._process_session_update(update):\n                    events_yielded += 1\n                    yield event\n                    if isinstance(event, PromptResponse):\n                        prompt_complete = True\n                        break\n\n                if prompt_complete:\n                    completion_reason = \"prompt_response_via_notification\"\n                    elapsed_ms = (time.time() - start_time) * 1000\n                    logger.info(\n                        f\"[ACP] Prompt complete: \"\n                        f\"reason={completion_reason} acp_session={session_id} \"\n                        f\"events={events_yielded} elapsed={elapsed_ms:.0f}ms\"\n                    )\n                    break\n\n            # Handle requests from agent - send error response\n            elif \"method\" in message_data and \"id\" in message_data:\n                logger.debug(\n                    f\"[ACP] Unsupported agent request: method={message_data['method']}\"\n                )\n                self._send_error_response(\n                    message_data[\"id\"],\n                    -32601,\n                    f\"Method not supported: {message_data['method']}\",\n                )\n\n            else:\n                logger.warning(\n                    f\"[ACP] Unhandled message: \"\n                    f\"id={message_data.get('id')} \"\n                    f\"method={message_data.get('method')} \"\n                    f\"keys={list(message_data.keys())}\"\n                )\n\n    def _process_session_update(\n        self, update: dict[str, Any]\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Process a session/update notification and yield typed ACP schema objects.\"\"\"\n        update_type = update.get(\"sessionUpdate\")\n        if not isinstance(update_type, str):\n            return\n\n        # Map update types to their ACP schema classes.\n        # Note: prompt_response is included because ACP sometimes sends it as a\n        # notification WITHOUT a corresponding JSON-RPC response. We accept\n        # either signal as turn completion (first one wins).\n        type_map: dict[str, type[BaseModel]] = {\n            \"agent_message_chunk\": AgentMessageChunk,\n            \"agent_thought_chunk\": AgentThoughtChunk,\n            \"tool_call\": ToolCallStart,\n            \"tool_call_update\": ToolCallProgress,\n            \"plan\": AgentPlanUpdate,\n            \"current_mode_update\": CurrentModeUpdate,\n            \"prompt_response\": PromptResponse,\n        }\n\n        model_class = type_map.get(update_type)\n        if model_class is not None:\n            try:\n                yield cast(ACPEvent, model_class.model_validate(update))\n            except ValidationError as e:\n                logger.warning(f\"[ACP] Validation error for {update_type}: {e}\")\n        elif update_type not in (\n            \"user_message_chunk\",\n            \"available_commands_update\",\n            \"session_info_update\",\n            \"usage_update\",\n        ):\n            logger.debug(f\"[ACP] Unknown update type: {update_type}\")\n\n    def _send_error_response(self, request_id: int, code: int, message: str) -> None:\n        \"\"\"Send an error response to an agent request.\"\"\"\n        if self._ws_client is None or not self._ws_client.is_open():\n            return\n\n        response = {\n            \"jsonrpc\": \"2.0\",\n            \"id\": request_id,\n            \"error\": {\"code\": code, \"message\": message},\n        }\n\n        self._ws_client.write_stdin(json.dumps(response) + \"\\n\")\n\n    def cancel(self, session_id: str | None = None) -> None:\n        \"\"\"Cancel the current operation on a session.\n\n        Args:\n            session_id: The ACP session ID to cancel. If None, cancels all sessions.\n        \"\"\"\n        if session_id:\n            if session_id in self._state.sessions:\n                self._send_notification(\n                    \"session/cancel\",\n                    {\"sessionId\": session_id},\n                )\n        else:\n            for sid in self._state.sessions:\n                self._send_notification(\n                    \"session/cancel\",\n                    {\"sessionId\": sid},\n                )\n\n    def health_check(self, timeout: float = 5.0) -> bool:  # noqa: ARG002\n        \"\"\"Check if we can exec into the pod.\"\"\"\n        try:\n            k8s = self._get_k8s_client()\n            result = k8s_stream(\n                k8s.connect_get_namespaced_pod_exec,\n                name=self._pod_name,\n                namespace=self._namespace,\n                container=self._container,\n                command=[\"echo\", \"ok\"],\n                stdin=False,\n                stdout=True,\n                stderr=False,\n                tty=False,\n            )\n            return \"ok\" in result\n        except Exception:\n            return False\n\n    @property\n    def is_running(self) -> bool:\n        \"\"\"Check if the exec session is running.\"\"\"\n        return self._ws_client is not None and self._ws_client.is_open()\n\n    def __enter__(self) -> \"ACPExecClient\":\n        \"\"\"Context manager entry.\"\"\"\n        return self\n\n    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:\n        \"\"\"Context manager exit - ensures cleanup.\"\"\"\n        self.stop()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/kubernetes/kubernetes_sandbox_manager.py",
    "content": "\"\"\"Kubernetes-based sandbox manager for production deployments.\n\nKubernetesSandboxManager provisions sandboxes as Kubernetes pods with true\ncontainer isolation. Each sandbox runs in its own pod with dedicated resources.\n\nKey features:\n- Pod-based isolation (not process-level)\n- S3-based snapshots via init containers\n- Cluster-native service discovery\n- RBAC-controlled resource management\n- User-shared sandbox model with per-session workspaces\n\nArchitecture Note (User-Shared Sandbox Model):\n- One pod per user (shared across all user's sessions)\n- provision() creates the pod with shared files/ directory\n- setup_session_workspace() creates per-session workspace via kubectl exec\n- cleanup_session_workspace() removes session workspace via kubectl exec\n- terminate() destroys the entire pod (all sessions)\n\nDirectory Structure (inside pod):\n    /workspace/\n    ├── files/                     # SHARED - synced from S3\n    └── sessions/\n        ├── $session_id_1/         # Per-session workspace\n        │   ├── outputs/\n        │   ├── AGENTS.md\n        │   └── ...\n        └── $session_id_2/\n            └── ...\n\nIMPORTANT: This manager does NOT interface with the database directly.\nAll database operations should be handled by the caller (SessionManager, Celery tasks, etc.).\n\nUse get_sandbox_manager() from base.py to get the appropriate implementation.\n\"\"\"\n\nimport base64\nimport binascii\nimport io\nimport json\nimport mimetypes\nimport os\nimport re\nimport shlex\nimport tarfile\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom pathlib import Path\nfrom uuid import UUID\nfrom uuid import uuid4\n\nfrom acp.schema import PromptResponse\nfrom kubernetes import client  # type: ignore\nfrom kubernetes import config\nfrom kubernetes.client.rest import ApiException  # type: ignore\nfrom kubernetes.stream import stream as k8s_stream  # type: ignore\n\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.server.features.build.api.packet_logger import get_packet_logger\nfrom onyx.server.features.build.configs import OPENCODE_DISABLED_TOOLS\nfrom onyx.server.features.build.configs import SANDBOX_CONTAINER_IMAGE\nfrom onyx.server.features.build.configs import SANDBOX_FILE_SYNC_SERVICE_ACCOUNT\nfrom onyx.server.features.build.configs import SANDBOX_NAMESPACE\nfrom onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_END\nfrom onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START\nfrom onyx.server.features.build.configs import SANDBOX_S3_BUCKET\nfrom onyx.server.features.build.configs import SANDBOX_SERVICE_ACCOUNT_NAME\nfrom onyx.server.features.build.sandbox.base import SandboxManager\nfrom onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (\n    ACPEvent,\n)\nfrom onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (\n    ACPExecClient,\n)\nfrom onyx.server.features.build.sandbox.models import FilesystemEntry\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.server.features.build.sandbox.models import SandboxInfo\nfrom onyx.server.features.build.sandbox.models import SnapshotResult\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    ATTACHMENTS_SECTION_CONTENT,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    generate_agent_instructions,\n)\nfrom onyx.server.features.build.sandbox.util.opencode_config import (\n    build_opencode_config,\n)\nfrom onyx.server.features.build.sandbox.util.persona_mapping import (\n    generate_user_identity_content,\n)\nfrom onyx.server.features.build.sandbox.util.persona_mapping import get_persona_info\nfrom onyx.server.features.build.sandbox.util.persona_mapping import ORG_INFO_AGENTS_MD\nfrom onyx.server.features.build.sandbox.util.persona_mapping import (\n    ORGANIZATION_STRUCTURE,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# API server pod hostname — used to identify which replica is handling a request.\n# In K8s, HOSTNAME is set to the pod name (e.g., \"api-server-dpgg7\").\n_API_SERVER_HOSTNAME = os.environ.get(\"HOSTNAME\", \"unknown\")\n\n# Constants for pod configuration\n# Note: Next.js ports are dynamically allocated from SANDBOX_NEXTJS_PORT_START to\n# SANDBOX_NEXTJS_PORT_END range, with one port per session.\nAGENT_PORT = 8081\nPOD_READY_TIMEOUT_SECONDS = 120\nPOD_READY_POLL_INTERVAL_SECONDS = 2\n\n# Resource deletion timeout and polling interval\n# Kubernetes deletes are async - we need to wait for resources to actually be gone\nRESOURCE_DELETION_TIMEOUT_SECONDS = 30\nRESOURCE_DELETION_POLL_INTERVAL_SECONDS = 0.5\n\n\ndef _build_nextjs_start_script(\n    session_path: str,\n    nextjs_port: int,\n    check_node_modules: bool = False,\n) -> str:\n    \"\"\"Build shell script to start the NextJS dev server.\n\n    Args:\n        session_path: Path to the session directory (should be shell-safe)\n        nextjs_port: Port number for the NextJS dev server\n        check_node_modules: If True, check for node_modules and run npm install if missing\n\n    Returns:\n        Shell script string to start the NextJS server\n    \"\"\"\n    npm_install_check = \"\"\n    if check_node_modules:\n        npm_install_check = \"\"\"\n# Check if npm dependencies are installed\nif [ ! -d \"node_modules\" ]; then\n    echo \"Installing npm dependencies...\"\n    npm install\nfi\n\"\"\"\n\n    return f\"\"\"\nset -e\ncd {session_path}/outputs/web\n{npm_install_check}\n# Start npm run dev in background\necho \"Starting Next.js dev server on port {nextjs_port}...\"\nnohup npm run dev -- -p {nextjs_port} > {session_path}/nextjs.log 2>&1 &\nNEXTJS_PID=$!\necho \"Next.js server started with PID $NEXTJS_PID\"\necho $NEXTJS_PID > {session_path}/nextjs.pid\n\"\"\"\n\n\ndef _get_local_aws_credential_env_vars() -> list[client.V1EnvVar]:\n    \"\"\"Get AWS credential environment variables from local environment.\n\n    Checks for AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and optionally\n    AWS_SESSION_TOKEN and AWS_DEFAULT_REGION in the local environment.\n    If credentials are found, returns V1EnvVar objects to pass them to containers.\n\n    This allows using local AWS credentials for development/testing while\n    IRSA (IAM Roles for Service Accounts) handles credentials in production EKS.\n\n    Returns:\n        List of V1EnvVar objects for AWS credentials, empty if not set locally.\n    \"\"\"\n    env_vars: list[client.V1EnvVar] = []\n\n    aws_access_key = os.environ.get(\"AWS_ACCESS_KEY_ID\")\n    aws_secret_key = os.environ.get(\"AWS_SECRET_ACCESS_KEY\")\n\n    # Only add credentials if both required values are present\n    if aws_access_key and aws_secret_key:\n        env_vars.append(client.V1EnvVar(name=\"AWS_ACCESS_KEY_ID\", value=aws_access_key))\n        env_vars.append(\n            client.V1EnvVar(name=\"AWS_SECRET_ACCESS_KEY\", value=aws_secret_key)\n        )\n\n        # Optional: session token for temporary credentials\n        aws_session_token = os.environ.get(\"AWS_SESSION_TOKEN\")\n        if aws_session_token:\n            env_vars.append(\n                client.V1EnvVar(name=\"AWS_SESSION_TOKEN\", value=aws_session_token)\n            )\n\n        # Optional: default region\n        aws_region = os.environ.get(\"AWS_DEFAULT_REGION\") or os.environ.get(\n            \"AWS_REGION\"\n        )\n        if aws_region:\n            env_vars.append(\n                client.V1EnvVar(name=\"AWS_DEFAULT_REGION\", value=aws_region)\n            )\n\n        logger.info(\"Using local AWS credentials for sandbox init container\")\n\n    return env_vars\n\n\ndef _build_filtered_symlink_script(\n    session_path: str,\n    excluded_user_library_paths: list[str],\n) -> str:\n    \"\"\"Build a shell script that creates filtered symlinks for user_library.\n\n    Creates symlinks for all top-level directories in /workspace/files/,\n    then selectively symlinks user_library files, excluding disabled paths.\n\n    TODO: Replace this inline shell script with a standalone Python script\n    that gets copied onto the pod and invoked with arguments. This would\n    be easier to test and maintain.\n\n    Args:\n        session_path: The session directory path in the pod\n        excluded_user_library_paths: Paths to exclude from symlinks\n    \"\"\"\n    excluded_paths_lines = \"\\n\".join(p.lstrip(\"/\") for p in excluded_user_library_paths)\n    heredoc_delim = f\"_EXCL_{uuid4().hex[:12]}_\"\n    return f\"\"\"\n# Create filtered files directory with exclusions\nmkdir -p {session_path}/files\n\n# Symlink all top-level directories except user_library\nfor item in /workspace/files/*; do\n    [ -e \"$item\" ] || continue\n    name=$(basename \"$item\")\n    if [ \"$name\" != \"user_library\" ]; then\n        ln -sf \"$item\" {session_path}/files/\"$name\"\n    fi\ndone\n\n# Write excluded paths to a temp file (one per line, via heredoc for safety)\nEXCL_FILE=$(mktemp)\ncat > \"$EXCL_FILE\" << '{heredoc_delim}'\n{excluded_paths_lines}\n{heredoc_delim}\n\n# Check if a relative path is excluded (exact match or child of excluded dir)\nis_excluded() {{\n    local rel_path=\"$1\"\n    while IFS= read -r excl || [ -n \"$excl\" ]; do\n        [ -z \"$excl\" ] && continue\n        if [ \"$rel_path\" = \"$excl\" ]; then\n            return 0\n        fi\n        case \"$rel_path\" in\n            \"$excl\"/*) return 0 ;;\n        esac\n    done < \"$EXCL_FILE\"\n    return 1\n}}\n\n# Recursively create symlinks for non-excluded files\ncreate_filtered_symlinks() {{\n    src_dir=\"$1\"\n    dst_dir=\"$2\"\n    rel_base=\"$3\"\n\n    for item in \"$src_dir\"/*; do\n        [ -e \"$item\" ] || continue\n        name=$(basename \"$item\")\n        if [ -n \"$rel_base\" ]; then\n            rel_path=\"$rel_base/$name\"\n        else\n            rel_path=\"$name\"\n        fi\n\n        if is_excluded \"$rel_path\"; then\n            continue\n        fi\n\n        if [ -d \"$item\" ]; then\n            mkdir -p \"$dst_dir/$name\"\n            create_filtered_symlinks \"$item\" \"$dst_dir/$name\" \"$rel_path\"\n            rmdir \"$dst_dir/$name\" 2>/dev/null || true\n        else\n            ln -sf \"$item\" \"$dst_dir/$name\"\n        fi\n    done\n}}\n\nif [ -d \"/workspace/files/user_library\" ]; then\n    mkdir -p {session_path}/files/user_library\n    create_filtered_symlinks /workspace/files/user_library {session_path}/files/user_library \"\"\n    rmdir {session_path}/files/user_library 2>/dev/null || true\nfi\n\nrm -f \"$EXCL_FILE\"\n\"\"\"\n\n\nclass KubernetesSandboxManager(SandboxManager):\n    \"\"\"Kubernetes-based sandbox manager for production deployments.\n\n    Manages sandboxes as Kubernetes pods with:\n    - Init containers for S3 file sync (snapshots, knowledge files, uploads)\n    - Main sandbox container running Next.js + opencode agent\n    - ClusterIP services for network access\n\n    IMPORTANT: This manager does NOT interface with the database directly.\n    All database operations should be handled by the caller.\n\n    This is a singleton class - use get_sandbox_manager() to get the instance.\n    \"\"\"\n\n    _instance: \"KubernetesSandboxManager | None\" = None\n    _lock = threading.Lock()\n\n    def __new__(cls) -> \"KubernetesSandboxManager\":\n        if cls._instance is None:\n            with cls._lock:\n                if cls._instance is None:\n                    cls._instance = super().__new__(cls)\n                    cls._instance._initialize()\n        return cls._instance\n\n    def _initialize(self) -> None:\n        \"\"\"Initialize Kubernetes client and configuration.\"\"\"\n        # Load Kubernetes config (in-cluster or kubeconfig)\n        try:\n            config.load_incluster_config()\n            logger.info(\"Loaded in-cluster Kubernetes configuration\")\n        except config.ConfigException:\n            try:\n                config.load_kube_config()\n                logger.info(\"Loaded kubeconfig from default location\")\n            except config.ConfigException as e:\n                raise RuntimeError(\n                    f\"Failed to load Kubernetes configuration: {e}\"\n                ) from e\n\n        # IMPORTANT: We use separate ApiClient instances for REST vs streaming operations.\n        # The kubernetes.stream.stream function monkey-patches the ApiClient's request\n        # method to use WebSocket. If we share the same ApiClient for both REST and\n        # streaming, the patching can leak, causing REST calls to erroneously use\n        # WebSocket (resulting in \"Handshake status 200 OK\" errors).\n        self._rest_api_client = client.ApiClient()\n        self._stream_api_client = client.ApiClient()\n\n        # Use the REST client for standard CRUD operations\n        self._core_api = client.CoreV1Api(api_client=self._rest_api_client)\n        self._batch_api = client.BatchV1Api(api_client=self._rest_api_client)\n        self._networking_api = client.NetworkingV1Api(api_client=self._rest_api_client)\n\n        # Use a separate client for streaming/exec operations\n        self._stream_core_api = client.CoreV1Api(api_client=self._stream_api_client)\n\n        self._namespace = SANDBOX_NAMESPACE\n        self._image = SANDBOX_CONTAINER_IMAGE\n        self._s3_bucket = SANDBOX_S3_BUCKET\n        self._service_account = SANDBOX_SERVICE_ACCOUNT_NAME\n        self._file_sync_service_account = SANDBOX_FILE_SYNC_SERVICE_ACCOUNT\n\n        # Load AGENTS.md template path\n        build_dir = Path(__file__).parent.parent.parent  # /onyx/server/features/build/\n        self._agent_instructions_template_path = build_dir / \"AGENTS.template.md\"\n        self._skills_path = Path(__file__).parent / \"docker\" / \"skills\"\n\n        logger.info(\n            f\"KubernetesSandboxManager initialized: namespace={self._namespace}, image={self._image}\"\n        )\n\n    def _get_pod_name(self, sandbox_id: str) -> str:\n        \"\"\"Generate pod name from sandbox ID.\"\"\"\n        return f\"sandbox-{str(sandbox_id)[:8]}\"\n\n    def _get_service_name(self, sandbox_id: str) -> str:\n        \"\"\"Generate service name from sandbox ID.\"\"\"\n        return self._get_pod_name(sandbox_id)\n\n    def _get_nextjs_url(self, sandbox_id: str, port: int) -> str:\n        \"\"\"Get the internal cluster URL for a session's Next.js server.\n\n        Args:\n            sandbox_id: The sandbox ID (string)\n            port: The session's allocated Next.js port\n\n        Returns:\n            Internal cluster URL for the Next.js server on the specified port\n        \"\"\"\n        service_name = self._get_service_name(sandbox_id)\n        return f\"http://{service_name}.{self._namespace}.svc.cluster.local:{port}\"\n\n    def _load_agent_instructions(\n        self,\n        files_path: Path | None = None,\n        provider: str | None = None,\n        model_name: str | None = None,\n        nextjs_port: int | None = None,\n        disabled_tools: list[str] | None = None,\n        user_name: str | None = None,\n        user_role: str | None = None,\n        use_demo_data: bool = False,\n        include_org_info: bool = False,\n    ) -> str:\n        \"\"\"Load and populate agent instructions from template file.\n\n\n        Args:\n            files_path: Path to the files directory (symlink to knowledge sources)\n            provider: LLM provider type\n            model_name: Model name\n            nextjs_port: Next.js port\n            disabled_tools: List of disabled tools\n            user_name: User's name for personalization\n            user_role: User's role/title for personalization\n            use_demo_data: If True, exclude user context from AGENTS.md\n            include_org_info: Whether to include the org_info section (demo data mode)\n\n        Returns:\n            Populated agent instructions content\n\n        Note:\n            In Kubernetes mode, files_path refers to paths inside the pod.\n            Since the backend cannot access the pod filesystem, these are passed as None\n            to leave placeholders intact for the container script to resolve at runtime.\n        \"\"\"\n        return generate_agent_instructions(\n            template_path=self._agent_instructions_template_path,\n            skills_path=self._skills_path,\n            files_path=files_path,\n            provider=provider,\n            model_name=model_name,\n            nextjs_port=nextjs_port,\n            disabled_tools=disabled_tools,\n            user_name=user_name,\n            user_role=user_role,\n            use_demo_data=use_demo_data,\n            include_org_info=include_org_info,\n        )\n\n    def _create_sandbox_pod(\n        self,\n        sandbox_id: str,\n        user_id: str,\n        tenant_id: str,\n    ) -> client.V1Pod:\n        \"\"\"Create Pod specification for sandbox (user-level).\n\n        Creates pod with:\n        - files/ directory synced from S3 (shared across sessions)\n        - sessions/ directory for per-session workspaces\n\n        NOTE: Session-specific setup is done via setup_session_workspace().\n        \"\"\"\n        pod_name = self._get_pod_name(sandbox_id)\n\n        # File-sync sidecar container for S3 file sync (knowledge files only)\n        # Runs as sidecar (not init container) so we can trigger incremental syncs\n        # via kubectl exec after new documents are indexed\n        file_sync_container = client.V1Container(\n            name=\"file-sync\",\n            image=\"peakcom/s5cmd:v2.3.0\",\n            env=_get_local_aws_credential_env_vars(),\n            command=[\"/bin/sh\", \"-c\"],\n            args=[\n                f\"\"\"\n# Handle signals for graceful container termination\ntrap 'echo \"Shutting down\"; exit 0' TERM INT\n\necho \"Starting initial file sync\"\necho \"S3: s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/*\"\necho \"Local: /workspace/files/\"\n\n# s5cmd sync (default 256 workers)\n# Exit codes: 0=success, 1=success with warnings\nsync_exit_code=0\n/s5cmd --stat sync \\\n    \"s3://{self._s3_bucket}/{tenant_id}/knowledge/{user_id}/*\" \\\n    /workspace/files/ 2>&1 || sync_exit_code=$?\n\necho \"=== Initial sync finished (exit code: $sync_exit_code) ===\"\n\n# Handle result\nif [ $sync_exit_code -eq 0 ] || [ $sync_exit_code -eq 1 ]; then\n    file_count=$(find /workspace/files -type f 2>/dev/null | wc -l)\n    echo \"Files synced: $file_count\"\n    echo \"Sidecar ready for incremental syncs\"\nelse\n    echo \"ERROR: Initial sync failed (exit code: $sync_exit_code)\"\n    exit $sync_exit_code\nfi\n\n# Stay alive for incremental syncs via kubectl exec\nwhile true; do\n    sleep 30 &\n    wait $!\ndone\n\"\"\"\n            ],\n            volume_mounts=[\n                client.V1VolumeMount(name=\"files\", mount_path=\"/workspace/files\"),\n                # Mount sessions directory so file-sync can create snapshots\n                client.V1VolumeMount(\n                    name=\"workspace\", mount_path=\"/workspace/sessions\"\n                ),\n            ],\n            resources=client.V1ResourceRequirements(\n                # Reduced resources since sidecar is mostly idle (sleeping)\n                requests={\"cpu\": \"250m\", \"memory\": \"256Mi\"},\n                limits={\"cpu\": \"4000m\", \"memory\": \"8Gi\"},\n            ),\n        )\n\n        # Main sandbox container\n        # Note: Container ports are informational only in K8s. Each session's Next.js\n        # server binds to its allocated port from the SANDBOX_NEXTJS_PORT_START-END range.\n        # We declare all ports for documentation, tooling, and network policies.\n        container_ports = [\n            client.V1ContainerPort(name=\"agent\", container_port=AGENT_PORT),\n        ]\n        # Add ports for session Next.js servers (one port per potential session)\n        for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):\n            container_ports.append(\n                client.V1ContainerPort(\n                    name=f\"nextjs-{port}\",\n                    container_port=port,\n                )\n            )\n\n        sandbox_container = client.V1Container(\n            name=\"sandbox\",\n            image=self._image,\n            image_pull_policy=\"IfNotPresent\",\n            ports=container_ports,\n            volume_mounts=[\n                client.V1VolumeMount(\n                    name=\"files\", mount_path=\"/workspace/files\", read_only=True\n                ),\n                # Mount sessions directory (shared with file-sync for snapshots)\n                client.V1VolumeMount(\n                    name=\"workspace\", mount_path=\"/workspace/sessions\"\n                ),\n            ],\n            resources=client.V1ResourceRequirements(\n                requests={\"cpu\": \"1000m\", \"memory\": \"2Gi\"},\n                limits={\"cpu\": \"2000m\", \"memory\": \"10Gi\"},\n            ),\n            # TODO: Re-enable probes when sandbox container runs actual services.\n            # Note: Next.js ports are now per-session (dynamic), so container-level\n            # probes would need to check the agent port or use a different approach.\n            # liveness_probe=client.V1Probe(\n            #     http_get=client.V1HTTPGetAction(path=\"/global/health\", port=AGENT_PORT),\n            #     initial_delay_seconds=30,\n            #     period_seconds=30,\n            #     timeout_seconds=5,\n            #     failure_threshold=3,\n            # ),\n            security_context=client.V1SecurityContext(\n                allow_privilege_escalation=False,\n                read_only_root_filesystem=False,\n                privileged=False,\n                capabilities=client.V1Capabilities(drop=[\"ALL\"]),\n            ),\n        )\n\n        # Volumes - workspace holds sessions/, files is shared read-only\n        volumes = [\n            client.V1Volume(\n                name=\"workspace\",\n                # Increased size: holds sessions/ directory with per-session outputs\n                empty_dir=client.V1EmptyDirVolumeSource(size_limit=\"50Gi\"),\n            ),\n            client.V1Volume(\n                name=\"files\",\n                empty_dir=client.V1EmptyDirVolumeSource(size_limit=\"5Gi\"),\n            ),\n        ]\n\n        # Pod spec\n        # Note: file_sync_container runs as sidecar (not init container) so we can\n        # trigger incremental S3 syncs via kubectl exec after new documents are indexed\n        pod_spec = client.V1PodSpec(\n            service_account_name=self._file_sync_service_account,\n            containers=[sandbox_container, file_sync_container],\n            volumes=volumes,\n            restart_policy=\"Never\",\n            termination_grace_period_seconds=10,  # Fast pod termination\n            # CRITICAL: Disable service environment variable injection\n            # Without this, Kubernetes injects env vars for ALL services in the namespace,\n            # which can exceed ARG_MAX (2.6MB) when there are many sandbox pods.\n            # With 40+ sandboxes × 100 ports × 4 env vars each = ~16k env vars (~2.2MB)\n            # This causes \"exec /bin/sh: argument list too long\" errors.\n            enable_service_links=False,\n            # Node selection for sandbox nodes\n            node_selector={\"onyx.app/workload\": \"sandbox\"},\n            tolerations=[\n                client.V1Toleration(\n                    key=\"workload\",\n                    operator=\"Equal\",\n                    value=\"sandbox\",\n                    effect=\"NoSchedule\",\n                ),\n            ],\n            # Security context for pod\n            security_context=client.V1PodSecurityContext(\n                run_as_non_root=True,\n                run_as_user=1000,\n                fs_group=1000,\n                seccomp_profile=client.V1SeccompProfile(type=\"RuntimeDefault\"),\n            ),\n            # Disable host access\n            host_network=False,\n            host_pid=False,\n            host_ipc=False,\n        )\n\n        return client.V1Pod(\n            api_version=\"v1\",\n            kind=\"Pod\",\n            metadata=client.V1ObjectMeta(\n                name=pod_name,\n                namespace=self._namespace,\n                labels={\n                    \"app.kubernetes.io/component\": \"sandbox\",\n                    \"app.kubernetes.io/managed-by\": \"onyx\",\n                    \"onyx.app/sandbox-id\": sandbox_id,\n                    \"onyx.app/tenant-id\": tenant_id,\n                },\n            ),\n            spec=pod_spec,\n        )\n\n    def _create_sandbox_service(\n        self,\n        sandbox_id: UUID,\n        tenant_id: str,\n    ) -> client.V1Service:\n        \"\"\"Create ClusterIP Service for sandbox pod.\n\n        Exposes the agent port and a range of ports for per-session Next.js servers.\n        The port range matches SANDBOX_NEXTJS_PORT_START to SANDBOX_NEXTJS_PORT_END.\n        \"\"\"\n        # Convert UUID objects to strings if needed (Kubernetes client requires strings)\n        sandbox_id_str: str = str(sandbox_id)\n        tenant_id_str: str = str(tenant_id)\n\n        service_name = self._get_service_name(sandbox_id_str)\n\n        # Build port list: agent port + all session Next.js ports\n        ports = [\n            client.V1ServicePort(name=\"agent\", port=AGENT_PORT, target_port=AGENT_PORT),\n        ]\n\n        # Add ports for session Next.js servers (one port per potential session)\n        for port in range(SANDBOX_NEXTJS_PORT_START, SANDBOX_NEXTJS_PORT_END):\n            ports.append(\n                client.V1ServicePort(\n                    name=f\"nextjs-{port}\",\n                    port=port,\n                    target_port=port,\n                )\n            )\n\n        return client.V1Service(\n            api_version=\"v1\",\n            kind=\"Service\",\n            metadata=client.V1ObjectMeta(\n                name=service_name,\n                namespace=self._namespace,\n                labels={\n                    \"app.kubernetes.io/component\": \"sandbox\",\n                    \"app.kubernetes.io/managed-by\": \"onyx\",\n                    \"onyx.app/sandbox-id\": sandbox_id_str,\n                    \"onyx.app/tenant-id\": tenant_id_str,\n                },\n            ),\n            spec=client.V1ServiceSpec(\n                type=\"ClusterIP\",\n                selector={\"onyx.app/sandbox-id\": sandbox_id_str},\n                ports=ports,\n            ),\n        )\n\n    def _ensure_service_exists(\n        self,\n        sandbox_id: UUID,\n        tenant_id: str,\n    ) -> None:\n        \"\"\"Ensure a ClusterIP service exists for the sandbox pod.\n\n        Handles the case where a service is in Terminating state (has a\n        deletion_timestamp) by waiting for deletion and recreating it.\n        This prevents a race condition where provision reuses an existing pod\n        but the old service is still being deleted.\n        \"\"\"\n        service_name = self._get_service_name(str(sandbox_id))\n\n        try:\n            svc = self._core_api.read_namespaced_service(\n                name=service_name,\n                namespace=self._namespace,\n            )\n            # Service exists - check if it's being deleted\n            if svc.metadata.deletion_timestamp:\n                logger.info(\n                    f\"Service {service_name} is terminating, waiting for deletion\"\n                )\n                self._wait_for_resource_deletion(\"service\", service_name)\n                # Now create a fresh service\n                service = self._create_sandbox_service(sandbox_id, tenant_id)\n                self._core_api.create_namespaced_service(\n                    namespace=self._namespace,\n                    body=service,\n                )\n                logger.info(f\"Recreated Service {service_name} after termination\")\n            else:\n                logger.debug(f\"Service {service_name} already exists and is active\")\n\n        except ApiException as e:\n            if e.status == 404:\n                # Service doesn't exist, create it\n                logger.info(f\"Creating missing Service {service_name}\")\n                service = self._create_sandbox_service(sandbox_id, tenant_id)\n                try:\n                    self._core_api.create_namespaced_service(\n                        namespace=self._namespace,\n                        body=service,\n                    )\n                except ApiException as svc_e:\n                    if svc_e.status != 409:  # Ignore AlreadyExists\n                        raise\n                    logger.debug(\n                        f\"Service {service_name} was created by another request\"\n                    )\n            else:\n                raise\n\n    def _get_init_container_logs(self, pod_name: str, container_name: str) -> str:\n        \"\"\"Get logs from an init container.\n\n        Args:\n            pod_name: Name of the pod\n            container_name: Name of the init container\n\n        Returns:\n            Log output from the init container, or error message if logs cannot be retrieved\n        \"\"\"\n        try:\n            logs = self._core_api.read_namespaced_pod_log(\n                name=pod_name,\n                namespace=self._namespace,\n                container=container_name,\n                tail_lines=100,  # Get last 100 lines\n            )\n            return logs if logs else \"(no logs available)\"\n        except ApiException as e:\n            return f\"(failed to retrieve logs: {e})\"\n\n    def _check_init_container_status(self, pod: client.V1Pod) -> str | None:\n        \"\"\"Check if any init containers have failed.\n\n        Args:\n            pod: The pod object\n\n        Returns:\n            Error message if an init container failed, None otherwise\n        \"\"\"\n        if not pod.status.init_container_statuses:\n            return None\n\n        for init_status in pod.status.init_container_statuses:\n            if init_status.state:\n                # Check for terminated state with non-zero exit code\n                if init_status.state.terminated:\n                    if init_status.state.terminated.exit_code != 0:\n                        container_name = init_status.name\n                        logs = self._get_init_container_logs(\n                            pod.metadata.name, container_name\n                        )\n                        return (\n                            f\"Init container '{container_name}' failed with exit code \"\n                            f\"{init_status.state.terminated.exit_code}. \"\n                            f\"Logs:\\n{logs}\"\n                        )\n                # Check for waiting state with error reason\n                elif init_status.state.waiting:\n                    if init_status.state.waiting.reason in [\n                        \"Error\",\n                        \"CrashLoopBackOff\",\n                    ]:\n                        container_name = init_status.name\n                        reason = init_status.state.waiting.reason\n                        message = init_status.state.waiting.message or \"\"\n                        return f\"Init container '{container_name}' is in '{reason}' state. Message: {message}\"\n\n        return None\n\n    def _wait_for_pod_ready(\n        self,\n        pod_name: str,\n        timeout: float = POD_READY_TIMEOUT_SECONDS,\n    ) -> bool:\n        \"\"\"Wait for pod to become ready.\n\n        Args:\n            pod_name: Name of the pod to wait for\n            timeout: Maximum time to wait in seconds\n\n        Returns:\n            True if pod is ready, False if timeout\n\n        Raises:\n            RuntimeError: If pod fails or is deleted\n        \"\"\"\n        start_time = time.time()\n\n        while time.time() - start_time < timeout:\n            try:\n                pod = self._core_api.read_namespaced_pod(\n                    name=pod_name,\n                    namespace=self._namespace,\n                )\n\n                # Check init container status first (they run before main container)\n                init_error = self._check_init_container_status(pod)\n                if init_error:\n                    raise RuntimeError(f\"Pod {pod_name} failed to start: {init_error}\")\n\n                phase = pod.status.phase\n\n                # Check for failure conditions\n                if phase == \"Failed\":\n                    # Try to get more details about the failure\n                    init_error = self._check_init_container_status(pod)\n                    error_msg = f\"Pod {pod_name} failed to start\"\n                    if init_error:\n                        error_msg += f\": {init_error}\"\n                    raise RuntimeError(error_msg)\n\n                if phase == \"Succeeded\":\n                    raise RuntimeError(\n                        f\"Pod {pod_name} completed unexpectedly (sandbox pods should run indefinitely)\"\n                    )\n\n                # Check if running and ready\n                if phase == \"Running\":\n                    conditions = pod.status.conditions or []\n                    for condition in conditions:\n                        if condition.type == \"Ready\" and condition.status == \"True\":\n                            logger.info(f\"Pod {pod_name} is ready\")\n                            return True\n\n                logger.debug(f\"Pod {pod_name} status: {phase}, waiting...\")\n\n            except ApiException as e:\n                if e.status == 404:\n                    raise RuntimeError(f\"Pod {pod_name} was deleted\")\n                logger.warning(f\"Error checking pod status: {e}\")\n\n            time.sleep(POD_READY_POLL_INTERVAL_SECONDS)\n\n        # On timeout, check one more time for init container failures\n        try:\n            pod = self._core_api.read_namespaced_pod(\n                name=pod_name,\n                namespace=self._namespace,\n            )\n            init_error = self._check_init_container_status(pod)\n            if init_error:\n                raise RuntimeError(f\"Pod {pod_name} failed to start: {init_error}\")\n        except ApiException:\n            pass  # Pod might be deleted, ignore\n\n        logger.warning(f\"Timeout waiting for pod {pod_name} to become ready\")\n        return False\n\n    def _pod_exists_and_healthy(self, pod_name: str) -> bool:\n        \"\"\"Check if a pod exists and is in a healthy/running state.\n\n        Args:\n            pod_name: Name of the pod to check\n\n        Returns:\n            True if pod exists and is running/ready, False otherwise\n        \"\"\"\n        try:\n            pod = self._core_api.read_namespaced_pod(\n                name=pod_name,\n                namespace=self._namespace,\n            )\n            phase = pod.status.phase\n\n            # Check if running and ready\n            if phase == \"Running\":\n                conditions = pod.status.conditions or []\n                for condition in conditions:\n                    if condition.type == \"Ready\" and condition.status == \"True\":\n                        return True\n\n            # Pending is OK too - pod is being created by another request\n            if phase == \"Pending\":\n                return True\n\n            return False\n        except ApiException as e:\n            if e.status == 404:\n                return False\n            raise\n\n    def provision(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,\n        tenant_id: str,\n        llm_config: LLMProviderConfig,  # noqa: ARG002\n    ) -> SandboxInfo:\n        \"\"\"Provision a new sandbox as a Kubernetes pod (user-level).\n\n        This method is idempotent - if a pod already exists and is healthy,\n        it will be reused. This prevents race conditions when multiple requests\n        try to provision the same sandbox concurrently.\n\n        Creates pod with:\n        1. Init container syncs files/ from S3\n        2. Creates sessions/ directory for per-session workspaces\n        3. Main container runs the sandbox environment\n\n        NOTE: This does NOT set up session-specific workspaces.\n        Call setup_session_workspace() to create session workspaces.\n\n        Args:\n            sandbox_id: Unique identifier for the sandbox\n            user_id: User identifier who owns this sandbox\n            tenant_id: Tenant identifier for multi-tenant isolation\n            llm_config: LLM provider configuration\n\n        Returns:\n            SandboxInfo with the provisioned sandbox details\n\n        Raises:\n            RuntimeError: If provisioning fails\n        \"\"\"\n        logger.info(\n            f\"Starting Kubernetes sandbox provisioning for sandbox {sandbox_id}, user {user_id}, tenant {tenant_id}\"\n        )\n\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Check if pod already exists and is healthy (idempotency check)\n        if self._pod_exists_and_healthy(pod_name):\n            logger.info(\n                f\"Pod {pod_name} already exists and is healthy, reusing existing pod\"\n            )\n            # Ensure service exists and is not terminating\n            self._ensure_service_exists(sandbox_id, tenant_id)\n\n            # Wait for pod to be ready if it's still pending\n            logger.info(f\"Waiting for existing pod {pod_name} to become ready...\")\n            if not self._wait_for_pod_ready(pod_name):\n                raise RuntimeError(\n                    f\"Timeout waiting for existing sandbox pod {pod_name} to become ready\"\n                )\n\n            logger.info(\n                f\"Reusing existing Kubernetes sandbox {sandbox_id}, pod: {pod_name}\"\n            )\n            return SandboxInfo(\n                sandbox_id=sandbox_id,\n                directory_path=f\"k8s://{self._namespace}/{pod_name}\",\n                status=SandboxStatus.RUNNING,\n                last_heartbeat=None,\n            )\n\n        try:\n            # 1. Create Pod (user-level only, no session setup)\n            logger.debug(f\"Creating Pod {pod_name}\")\n            pod = self._create_sandbox_pod(\n                sandbox_id=str(sandbox_id),\n                user_id=str(user_id),\n                tenant_id=tenant_id,\n            )\n            try:\n                self._core_api.create_namespaced_pod(\n                    namespace=self._namespace,\n                    body=pod,\n                )\n            except ApiException as e:\n                if e.status == 409:\n                    # Pod was created by another concurrent request\n                    # Check if it's healthy and reuse it\n                    logger.warning(\n                        f\"Pod {pod_name} already exists (409 conflict, this shouldn't normally happen), \"\n                        \"checking if it's healthy...\"\n                    )\n                    if self._pod_exists_and_healthy(pod_name):\n                        logger.warning(\n                            f\"During provisioning, discovered that pod {pod_name} already exists. Reusing\"\n                        )\n                        # Continue to ensure service exists and wait for ready\n                    else:\n                        # Pod exists but is not healthy - this shouldn't happen often\n                        # but could occur if a previous provision failed mid-way\n                        logger.warning(\n                            f\"Pod {pod_name} exists but is not healthy, waiting for it to become ready or fail\"\n                        )\n                else:\n                    raise\n\n            # 2. Create Service (handles terminating services)\n            self._ensure_service_exists(sandbox_id, tenant_id)\n\n            # 3. Wait for pod to be ready\n            logger.info(f\"Waiting for pod {pod_name} to become ready...\")\n            if not self._wait_for_pod_ready(pod_name):\n                raise RuntimeError(\n                    f\"Timeout waiting for sandbox pod {pod_name} to become ready\"\n                )\n\n            logger.info(\n                f\"Provisioned Kubernetes sandbox {sandbox_id}, pod: {pod_name} (no sessions yet)\"\n            )\n\n            return SandboxInfo(\n                sandbox_id=sandbox_id,\n                directory_path=f\"k8s://{self._namespace}/{pod_name}\",\n                status=SandboxStatus.RUNNING,\n                last_heartbeat=None,\n            )\n\n        except Exception as e:\n            # Only cleanup if we're sure the pod is not being used by another request\n            # Check if pod is healthy - if so, don't clean up (another request may own it)\n            if self._pod_exists_and_healthy(pod_name):\n                logger.warning(\n                    f\"Kubernetes sandbox provisioning failed for sandbox {sandbox_id}: {e}, \"\n                    \"but pod is healthy (likely owned by concurrent request), not cleaning up\"\n                )\n            else:\n                logger.error(\n                    f\"Kubernetes sandbox provisioning failed for sandbox {sandbox_id}: {e}\",\n                    exc_info=True,\n                )\n                self._cleanup_kubernetes_resources(str(sandbox_id))\n            raise\n\n    def _wait_for_resource_deletion(\n        self,\n        resource_type: str,\n        name: str,\n        timeout: float = RESOURCE_DELETION_TIMEOUT_SECONDS,\n    ) -> bool:\n        \"\"\"Wait for a Kubernetes resource to be fully deleted.\n\n        Kubernetes delete calls are asynchronous - the API returns immediately\n        but the resource may still exist in a 'Terminating' state. This method\n        polls until the resource returns 404 (not found).\n\n        Args:\n            resource_type: Type of resource (\"pod\" or \"service\")\n            name: Name of the resource\n            timeout: Maximum time to wait in seconds\n\n        Returns:\n            True if resource was deleted, False if timeout\n        \"\"\"\n        start_time = time.time()\n\n        while time.time() - start_time < timeout:\n            try:\n                if resource_type == \"pod\":\n                    self._core_api.read_namespaced_pod(\n                        name=name,\n                        namespace=self._namespace,\n                    )\n                elif resource_type == \"service\":\n                    self._core_api.read_namespaced_service(\n                        name=name,\n                        namespace=self._namespace,\n                    )\n                else:\n                    raise ValueError(f\"Unknown resource type: {resource_type}\")\n\n                # Resource still exists, wait and retry\n                logger.debug(f\"Waiting for {resource_type} {name} to be deleted...\")\n                time.sleep(RESOURCE_DELETION_POLL_INTERVAL_SECONDS)\n\n            except ApiException as e:\n                if e.status == 404:\n                    # Resource is gone\n                    logger.debug(f\"{resource_type.capitalize()} {name} fully deleted\")\n                    return True\n                # Other error, log and continue waiting\n                logger.warning(f\"Error checking {resource_type} {name} status: {e}\")\n                time.sleep(RESOURCE_DELETION_POLL_INTERVAL_SECONDS)\n\n        logger.warning(\n            f\"Timeout waiting for {resource_type} {name} to be deleted after {timeout}s\"\n        )\n        return False\n\n    def _cleanup_kubernetes_resources(\n        self,\n        sandbox_id: str,\n        wait_for_deletion: bool = True,\n    ) -> None:\n        \"\"\"Clean up Kubernetes resources for a sandbox.\n\n        Args:\n            sandbox_id: The sandbox ID to clean up\n            wait_for_deletion: If True, wait for resources to be fully deleted\n                before returning. This prevents 409 conflicts when immediately\n                re-provisioning with the same sandbox ID.\n        \"\"\"\n        # Convert UUID objects to strings if needed (Kubernetes client requires strings)\n        sandbox_id = str(sandbox_id)\n\n        pod_name = self._get_pod_name(sandbox_id)\n        service_name = self._get_service_name(sandbox_id)\n\n        # Delete in reverse order of creation\n        service_deleted = False\n        try:\n            self._core_api.delete_namespaced_service(\n                name=service_name,\n                namespace=self._namespace,\n            )\n            logger.debug(f\"Deleted Service {service_name}\")\n            service_deleted = True\n        except ApiException as e:\n            if e.status == 404:\n                # Already deleted\n                service_deleted = True\n            else:\n                logger.error(f\"Error deleting Service {service_name}: {e}\")\n                raise\n\n        pod_deleted = False\n        try:\n            self._core_api.delete_namespaced_pod(\n                name=pod_name,\n                namespace=self._namespace,\n            )\n            logger.debug(f\"Deleted Pod {pod_name}\")\n            pod_deleted = True\n        except ApiException as e:\n            if e.status == 404:\n                # Already deleted\n                pod_deleted = True\n            else:\n                logger.error(f\"Error deleting Pod {pod_name}: {e}\")\n                raise\n\n        # Wait for resources to be fully deleted to prevent 409 conflicts\n        # on immediate re-provisioning\n        if wait_for_deletion:\n            if service_deleted:\n                self._wait_for_resource_deletion(\"service\", service_name)\n            if pod_deleted:\n                self._wait_for_resource_deletion(\"pod\", pod_name)\n\n    def terminate(self, sandbox_id: UUID) -> None:\n        \"\"\"Terminate a sandbox and clean up Kubernetes resources.\n\n        Removes session mappings for this sandbox, then deletes the\n        Service and Pod. ACP clients are ephemeral (created per message),\n        so there's nothing to stop here.\n\n        Args:\n            sandbox_id: The sandbox ID to terminate\n        \"\"\"\n        # Clean up Kubernetes resources (needs string for pod/service names)\n        self._cleanup_kubernetes_resources(str(sandbox_id))\n\n        logger.info(f\"Terminated Kubernetes sandbox {sandbox_id}\")\n\n    def setup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        llm_config: LLMProviderConfig,\n        nextjs_port: int,\n        file_system_path: str | None = None,  # noqa: ARG002\n        snapshot_path: str | None = None,\n        user_name: str | None = None,\n        user_role: str | None = None,\n        user_work_area: str | None = None,\n        user_level: str | None = None,\n        use_demo_data: bool = False,\n        excluded_user_library_paths: list[str] | None = None,\n    ) -> None:\n        \"\"\"Set up a session workspace within an existing sandbox pod.\n\n        Executes kubectl exec to:\n        1. Create sessions/$session_id/ directory\n        2. Create files/ symlink (to demo data or S3-synced user files)\n        3. Copy outputs template from local templates (downloaded during init)\n        4. Write AGENTS.md\n        5. Write opencode.json with LLM config\n        6. Create org_info/ directory with user identity file (if demo data enabled)\n        7. Start Next.js dev server\n\n        Note: Snapshot restoration is not supported in Kubernetes mode since the\n        main container doesn't have S3 access. Snapshots would need to be\n        pre-downloaded during pod provisioning if needed.\n\n        Args:\n            sandbox_id: The sandbox ID (must be provisioned)\n            session_id: The session ID for this workspace\n            llm_config: LLM provider configuration for opencode.json\n            file_system_path: Path to user's S3-synced knowledge files (/workspace/files)\n            snapshot_path: Optional S3 path - logged but ignored (no S3 access)\n            user_name: User's name for personalization in AGENTS.md\n            user_role: User's role/title for personalization in AGENTS.md\n            user_work_area: User's work area for demo persona (e.g., \"engineering\")\n            user_level: User's level for demo persona (e.g., \"ic\", \"manager\")\n            use_demo_data: If True, symlink files/ to /workspace/demo_data;\n                          else to /workspace/files (S3-synced user files)\n            excluded_user_library_paths: List of paths within user_library/ to exclude\n                (e.g., [\"/data/file.xlsx\"]). These files won't be accessible in the session.\n\n        Raises:\n            RuntimeError: If workspace setup fails\n        \"\"\"\n        if snapshot_path:\n            logger.warning(\n                f\"Snapshot restoration requested but not supported in Kubernetes mode. \"\n                f\"Snapshot path {snapshot_path} will be ignored. \"\n                f\"Session {session_id} will start with fresh outputs template.\"\n            )\n\n        pod_name = self._get_pod_name(str(sandbox_id))\n        session_path = f\"/workspace/sessions/{session_id}\"\n\n        # Paths inside the pod (created during workspace setup below):\n        # - {session_path}/files: symlink to knowledge sources\n        # - {session_path}/attachments: user-uploaded files\n        #\n        # Note: files_path=None leaves {{KNOWLEDGE_SOURCES_SECTION}} placeholder intact\n        # for generate_agents_md.py to resolve at container runtime by scanning /workspace/files.\n        # Attachments section is injected dynamically when first file is uploaded.\n        agent_instructions = self._load_agent_instructions(\n            files_path=None,  # Container script handles this at runtime\n            provider=llm_config.provider,\n            model_name=llm_config.model_name,\n            nextjs_port=nextjs_port,\n            disabled_tools=OPENCODE_DISABLED_TOOLS,\n            user_name=user_name,\n            user_role=user_role,\n            use_demo_data=use_demo_data,\n            include_org_info=use_demo_data,\n        )\n\n        # Build opencode config JSON using shared config builder\n        opencode_config = build_opencode_config(\n            provider=llm_config.provider,\n            model_name=llm_config.model_name,\n            api_key=llm_config.api_key if llm_config.api_key else None,\n            api_base=llm_config.api_base,\n            disabled_tools=OPENCODE_DISABLED_TOOLS,\n        )\n\n        opencode_json = json.dumps(opencode_config)\n        # Escape for shell\n        opencode_json_escaped = opencode_json.replace(\"'\", \"'\\\\''\")\n        agent_instructions_escaped = agent_instructions.replace(\"'\", \"'\\\\''\")\n\n        # Build org_info setup script if persona is set\n        # Uses shared constants from persona_mapping module as single source of truth\n        org_info_setup = \"\"\n        if user_work_area:\n            persona = get_persona_info(user_work_area, user_level)\n            if persona:\n                # Escape content for shell (single quotes)\n                agents_md_escaped = ORG_INFO_AGENTS_MD.replace(\"'\", \"'\\\\''\")\n                identity_escaped = generate_user_identity_content(persona).replace(\n                    \"'\", \"'\\\\''\"\n                )\n                org_structure_escaped = json.dumps(\n                    ORGANIZATION_STRUCTURE, indent=2\n                ).replace(\"'\", \"'\\\\''\")\n\n                org_info_setup = f\"\"\"\n# Create org_info directory with all files\nmkdir -p {session_path}/org_info\nprintf '%s' '{agents_md_escaped}' > {session_path}/org_info/AGENTS.md\nprintf '%s' '{identity_escaped}' > {session_path}/org_info/user_identity_profile.txt\nprintf '%s' '{org_structure_escaped}' > {session_path}/org_info/organization_structure.json\n\"\"\"\n\n        # Build files symlink setup\n        # Choose between demo data (baked in image) or user's S3-synced files\n        if use_demo_data:\n            # Demo mode: symlink to demo data baked into the container image\n            symlink_target = \"/workspace/demo_data\"\n            files_symlink_setup = f\"\"\"\n# Create files symlink to demo data (baked into image)\necho \"Creating files symlink to demo data: {symlink_target}\"\nln -sf {symlink_target} {session_path}/files\n\"\"\"\n        elif excluded_user_library_paths:\n            files_symlink_setup = _build_filtered_symlink_script(\n                session_path, excluded_user_library_paths\n            )\n        else:\n            # Normal mode: symlink to user's S3-synced knowledge files\n            symlink_target = \"/workspace/files\"\n            files_symlink_setup = f\"\"\"\n# Create files symlink to user's knowledge files (synced from S3)\necho \"Creating files symlink to user files: {symlink_target}\"\nln -sf {symlink_target} {session_path}/files\n\"\"\"\n\n        # Copy outputs template from baked-in location and install npm dependencies\n        outputs_setup = f\"\"\"\n# Copy outputs template (baked into image at build time)\necho \"Copying outputs template\"\nif [ -d /workspace/templates/outputs ]; then\n    cp -r /workspace/templates/outputs/* {session_path}/outputs/\n    # Install npm dependencies\n    echo \"Installing npm dependencies...\"\n    cd {session_path}/outputs/web && npm install\nelse\n    echo \"Warning: outputs template not found at /workspace/templates/outputs\"\n    mkdir -p {session_path}/outputs/web\nfi\n\"\"\"\n\n        # Build NextJS startup script (npm install already done in outputs_setup)\n        nextjs_start_script = _build_nextjs_start_script(\n            session_path, nextjs_port, check_node_modules=False\n        )\n\n        setup_script = f\"\"\"\nset -e\n\n# Create session directory structure\necho \"Creating session directory: {session_path}\"\nmkdir -p {session_path}/outputs\nmkdir -p {session_path}/attachments\n{files_symlink_setup}\n# Setup outputs\n{outputs_setup}\n\n# Symlink skills (baked into image at /workspace/skills/)\nif [ -d /workspace/skills ]; then\n    mkdir -p {session_path}/.opencode\n    ln -sf /workspace/skills {session_path}/.opencode/skills\n    echo \"Linked skills to /workspace/skills\"\nfi\n\n# Write agent instructions\necho \"Writing AGENTS.md\"\nprintf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md\n\n# Populate knowledge sources by scanning the files directory\npython3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true\n\n# Write opencode config\necho \"Writing opencode.json\"\nprintf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json\n{org_info_setup}\n# Start Next.js dev server\n{nextjs_start_script}\n\necho \"Session workspace setup complete\"\n\"\"\"\n\n        logger.info(\n            f\"Setting up session workspace {session_id} in sandbox {sandbox_id}\"\n        )\n\n        try:\n            # Execute setup script in the pod\n            exec_response = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                command=[\"/bin/sh\", \"-c\", setup_script],\n                container=\"sandbox\",\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            logger.debug(f\"Session setup output: {exec_response}\")\n            logger.info(\n                f\"Set up session workspace {session_id} in sandbox {sandbox_id}\"\n            )\n\n        except Exception as e:\n            logger.error(\n                f\"Failed to setup session workspace {session_id} in sandbox {sandbox_id}: {e}\",\n                exc_info=True,\n            )\n            raise RuntimeError(\n                f\"Failed to setup session workspace {session_id}: {e}\"\n            ) from e\n\n    def cleanup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        nextjs_port: int | None = None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Clean up a session workspace (on session delete).\n\n        Removes the ACP session mapping and executes kubectl exec to remove\n        the session directory. The shared ACP client persists for other sessions.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to clean up\n            nextjs_port: Optional port where Next.js server is running (unused in K8s,\n                        we use PID file instead)\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        session_path = f\"/workspace/sessions/{session_id}\"\n\n        cleanup_script = f\"\"\"\nset -e\n\n# Kill Next.js server if running\nif [ -f {session_path}/nextjs.pid ]; then\n    NEXTJS_PID=$(cat {session_path}/nextjs.pid)\n    echo \"Stopping Next.js server (PID: $NEXTJS_PID)\"\n    kill $NEXTJS_PID 2>/dev/null || true\nfi\n\necho \"Removing session directory: {session_path}\"\nrm -rf {session_path}\necho \"Session cleanup complete\"\n\"\"\"\n\n        logger.info(\n            f\"Cleaning up session workspace {session_id} in sandbox {sandbox_id}\"\n        )\n\n        try:\n            exec_response = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                command=[\"/bin/sh\", \"-c\", cleanup_script],\n                container=\"sandbox\",\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            logger.debug(f\"Session cleanup output: {exec_response}\")\n            logger.info(\n                f\"Cleaned up session workspace {session_id} in sandbox {sandbox_id}\"\n            )\n\n        except ApiException as e:\n            if e.status == 404:\n                # Pod not found, nothing to clean up\n                logger.debug(f\"Pod {pod_name} not found, skipping cleanup\")\n            else:\n                logger.warning(f\"Error cleaning up session workspace {session_id}: {e}\")\n        except Exception as e:\n            logger.warning(f\"Error cleaning up session workspace {session_id}: {e}\")\n\n    def create_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        tenant_id: str,\n    ) -> SnapshotResult | None:\n        \"\"\"Create a snapshot of a session's outputs and attachments directories.\n\n        For Kubernetes backend, we exec into the file-sync container to create\n        the snapshot and upload to S3. Captures:\n        - sessions/$session_id/outputs/ (generated artifacts, web apps)\n        - sessions/$session_id/attachments/ (user uploaded files)\n        - sessions/$session_id/.opencode-data/ (opencode session data for resumption)\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to snapshot\n            tenant_id: Tenant identifier for storage path\n\n        Returns:\n            SnapshotResult with storage path and size, or None if nothing to snapshot\n\n        Raises:\n            RuntimeError: If snapshot creation fails\n        \"\"\"\n        sandbox_id_str = str(sandbox_id)\n        session_id_str = str(session_id)\n        pod_name = self._get_pod_name(sandbox_id_str)\n        snapshot_id = str(uuid4())\n\n        # Use shlex.quote for safety (UUIDs are safe but good practice)\n        safe_session_path = shlex.quote(f\"/workspace/sessions/{session_id_str}\")\n        s3_path = f\"s3://{self._s3_bucket}/{tenant_id}/snapshots/{session_id_str}/{snapshot_id}.tar.gz\"\n\n        # Create tar and upload to S3 via file-sync container.\n        # .opencode-data/ is already on the shared workspace volume because we set\n        # XDG_DATA_HOME to the session directory when starting opencode (see\n        # ACPExecClient.start()). No cross-container copy needed.\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"\"\"\nset -eo pipefail\ncd {safe_session_path}\nif [ ! -d outputs ]; then\n    echo \"EMPTY_SNAPSHOT\"\n    exit 0\nfi\ndirs=\"outputs\"\n[ -d attachments ] && [ \"$(ls -A attachments 2>/dev/null)\" ] && dirs=\"$dirs attachments\"\n[ -d .opencode-data ] && [ \"$(ls -A .opencode-data 2>/dev/null)\" ] && dirs=\"$dirs .opencode-data\"\ntar -czf - $dirs | /s5cmd pipe {s3_path}\necho \"SNAPSHOT_CREATED\"\n\"\"\",\n        ]\n\n        try:\n            # Use exec to run snapshot command in file-sync container (has s5cmd)\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"file-sync\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            logger.debug(f\"Snapshot exec output: {resp}\")\n\n            # Check if nothing was snapshotted\n            if \"EMPTY_SNAPSHOT\" in resp:\n                logger.info(\n                    f\"No outputs or attachments to snapshot for session {session_id}\"\n                )\n                return None\n\n            # Verify upload succeeded\n            if \"SNAPSHOT_CREATED\" not in resp:\n                raise RuntimeError(f\"Snapshot upload may have failed. Output: {resp}\")\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to create snapshot: {e}\") from e\n\n        # Estimate size (we can't easily get exact size from streamed tar)\n        # In production, you might want to query S3 for the actual size\n        size_bytes = 0\n\n        # Storage path must match the S3 upload path (without s3://bucket/ prefix)\n        storage_path = f\"{tenant_id}/snapshots/{session_id_str}/{snapshot_id}.tar.gz\"\n\n        logger.info(f\"Created snapshot for session {session_id}\")\n\n        return SnapshotResult(\n            storage_path=storage_path,\n            size_bytes=size_bytes,\n        )\n\n    def session_workspace_exists(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> bool:\n        \"\"\"Check if a session's workspace directory exists in the pod.\n\n        Execs into pod to check for /workspace/sessions/{session_id}/outputs/.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to check\n\n        Returns:\n            True if the session workspace exists, False otherwise\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        session_path = f\"/workspace/sessions/{session_id}/outputs\"\n\n        # Use exec to check if directory exists\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f'[ -d \"{session_path}\" ] && echo \"WORKSPACE_FOUND\" || echo \"WORKSPACE_MISSING\"',\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            result = \"WORKSPACE_FOUND\" in resp\n            logger.info(\n                f\"[WORKSPACE_CHECK] session={session_id}, path={session_path}, raw_resp={resp!r}, result={result}\"\n            )\n            return result\n\n        except ApiException as e:\n            logger.warning(\n                f\"Failed to check session workspace exists for {session_id}: {e}\"\n            )\n            return False\n\n    def restore_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        snapshot_storage_path: str,\n        tenant_id: str,  # noqa: ARG002\n        nextjs_port: int,\n        llm_config: LLMProviderConfig,\n        use_demo_data: bool = False,\n    ) -> None:\n        \"\"\"Download snapshot from S3 via s5cmd, extract, regenerate config, and start NextJS.\n\n        Uses the file-sync sidecar container (which has s5cmd + S3 credentials\n        via IRSA) to stream the snapshot directly from S3 into the session\n        directory. This avoids downloading to the backend server and the\n        base64 encoding overhead of piping through kubectl exec.\n\n        Steps:\n        1. Exec s5cmd cat in file-sync container to stream snapshot from S3\n        2. Pipe directly to tar for extraction in the shared workspace volume\n           (.opencode-data/ is restored automatically since XDG_DATA_HOME points here)\n        3. Regenerate configuration files (AGENTS.md, opencode.json, files symlink)\n        4. Start the NextJS dev server\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to restore\n            snapshot_storage_path: Path to the snapshot in S3 (relative path)\n            tenant_id: Tenant identifier for storage access\n            nextjs_port: Port number for the NextJS dev server\n            llm_config: LLM provider configuration for opencode.json\n            use_demo_data: If True, symlink files/ to demo data; else to user files\n\n        Raises:\n            RuntimeError: If snapshot restoration fails\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        session_path = f\"/workspace/sessions/{session_id}\"\n        safe_session_path = shlex.quote(session_path)\n\n        s3_path = f\"s3://{self._s3_bucket}/{snapshot_storage_path}\"\n\n        # Stream snapshot directly from S3 via s5cmd in file-sync container.\n        # Mirrors the upload pattern: upload uses `tar | s5cmd pipe`,\n        # restore uses `s5cmd cat | tar`. Both run in file-sync container\n        # which has s5cmd and S3 credentials (IRSA). The shared workspace\n        # volume makes extracted files immediately visible to the sandbox\n        # container.\n        restore_script = f\"\"\"\nset -eo pipefail\nmkdir -p {safe_session_path}\n/s5cmd cat {s3_path} | tar -xzf - -C {safe_session_path}\necho \"SNAPSHOT_RESTORED\"\n\"\"\"\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"file-sync\",\n                command=[\"/bin/sh\", \"-c\", restore_script],\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            if \"SNAPSHOT_RESTORED\" not in resp:\n                raise RuntimeError(f\"Snapshot restore may have failed. Output: {resp}\")\n\n            # Regenerate configuration files that aren't in the snapshot\n            # These are regenerated to ensure they match the current system state\n            self._regenerate_session_config(\n                pod_name=pod_name,\n                session_path=safe_session_path,\n                llm_config=llm_config,\n                nextjs_port=nextjs_port,\n                use_demo_data=use_demo_data,\n            )\n\n            # Start NextJS dev server (check node_modules since restoring from snapshot)\n            start_script = _build_nextjs_start_script(\n                safe_session_path, nextjs_port, check_node_modules=True\n            )\n            k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=[\"/bin/sh\", \"-c\", start_script],\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to restore snapshot: {e}\") from e\n\n    def _regenerate_session_config(\n        self,\n        pod_name: str,\n        session_path: str,\n        llm_config: LLMProviderConfig,\n        nextjs_port: int,\n        use_demo_data: bool,\n    ) -> None:\n        \"\"\"Regenerate session configuration files after snapshot restore.\n\n        Creates:\n        - AGENTS.md (agent instructions)\n        - opencode.json (LLM configuration)\n        - files symlink (to demo data or user files)\n\n        Args:\n            pod_name: The pod name to exec into\n            session_path: Path to the session directory (already shlex.quoted)\n            llm_config: LLM provider configuration\n            nextjs_port: Port for NextJS (used in AGENTS.md)\n            use_demo_data: Whether to use demo data or user files\n        \"\"\"\n        # Generate AGENTS.md content\n        agent_instructions = self._load_agent_instructions(\n            files_path=None,  # Container script handles this at runtime\n            provider=llm_config.provider,\n            model_name=llm_config.model_name,\n            nextjs_port=nextjs_port,\n            disabled_tools=OPENCODE_DISABLED_TOOLS,\n            user_name=None,  # Not stored, regenerate without personalization\n            user_role=None,\n            use_demo_data=use_demo_data,\n            include_org_info=False,  # Don't include org_info for restored sessions\n        )\n\n        # Generate opencode.json\n        opencode_config = build_opencode_config(\n            provider=llm_config.provider,\n            model_name=llm_config.model_name,\n            api_key=llm_config.api_key if llm_config.api_key else None,\n            api_base=llm_config.api_base,\n            disabled_tools=OPENCODE_DISABLED_TOOLS,\n        )\n        opencode_json = json.dumps(opencode_config)\n\n        # Escape for shell (single quotes)\n        opencode_json_escaped = opencode_json.replace(\"'\", \"'\\\\''\")\n        agent_instructions_escaped = agent_instructions.replace(\"'\", \"'\\\\''\")\n\n        # Build files symlink setup\n        if use_demo_data:\n            symlink_target = \"/workspace/demo_data\"\n        else:\n            symlink_target = \"/workspace/files\"\n\n        config_script = f\"\"\"\nset -e\n\n# Create files symlink\necho \"Creating files symlink to {symlink_target}\"\nln -sf {symlink_target} {session_path}/files\n\n# Write agent instructions\necho \"Writing AGENTS.md\"\nprintf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md\n\n# Populate knowledge sources by scanning the files directory\npython3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true\n\n# Write opencode config\necho \"Writing opencode.json\"\nprintf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json\n\necho \"Session config regeneration complete\"\n\"\"\"\n\n        logger.info(\"Regenerating session configuration files\")\n        k8s_stream(\n            self._stream_core_api.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=self._namespace,\n            container=\"sandbox\",\n            command=[\"/bin/sh\", \"-c\", config_script],\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        logger.info(\"Session configuration files regenerated\")\n\n    def health_check(self, sandbox_id: UUID, timeout: float = 60.0) -> bool:\n        \"\"\"Check if the sandbox pod is healthy (can exec into it).\n\n        Args:\n            sandbox_id: The sandbox ID to check\n            timeout: Health check timeout in seconds\n\n        Returns:\n            True if sandbox is healthy, False otherwise\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        exec_client = ACPExecClient(\n            pod_name=pod_name,\n            namespace=self._namespace,\n            container=\"sandbox\",\n        )\n        return exec_client.health_check(timeout=timeout)\n\n    def _create_ephemeral_acp_client(\n        self, sandbox_id: UUID, session_path: str\n    ) -> ACPExecClient:\n        \"\"\"Create a new ephemeral ACP client for a single message exchange.\n\n        Each call starts a fresh `opencode acp` process in the sandbox pod.\n        The process is short-lived — stopped after the message completes.\n        This prevents the bug where multiple long-lived processes (one per\n        API replica) operate on the same session's flat file storage\n        concurrently, causing the JSON-RPC response to be silently lost.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_path: Working directory for the session (e.g. /workspace/sessions/{id}).\n                XDG_DATA_HOME is set relative to this so opencode's session data\n                lives inside the snapshot directory.\n\n        Returns:\n            A running ACPExecClient (caller must stop it when done)\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        acp_client = ACPExecClient(\n            pod_name=pod_name,\n            namespace=self._namespace,\n            container=\"sandbox\",\n        )\n        acp_client.start(cwd=session_path)\n\n        logger.info(\n            f\"[SANDBOX-ACP] Created ephemeral ACP client: sandbox={sandbox_id} pod={pod_name} api_pod={_API_SERVER_HOSTNAME}\"\n        )\n        return acp_client\n\n    def send_message(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        message: str,\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Send a message to the CLI agent and stream ACP events.\n\n        Creates an ephemeral `opencode acp` process for each message.\n        The process resumes the session from opencode's on-disk storage,\n        handles the prompt, then is stopped. This ensures only one process\n        operates on a session's flat files at a time, preventing the bug\n        where multiple long-lived processes (one per API replica) corrupt\n        each other's in-memory state.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID (determines workspace directory)\n            message: The message content to send\n\n        Yields:\n            Typed ACP schema event objects\n        \"\"\"\n        packet_logger = get_packet_logger()\n        session_path = f\"/workspace/sessions/{session_id}\"\n\n        # Create an ephemeral ACP client for this message\n        acp_client = self._create_ephemeral_acp_client(sandbox_id, session_path)\n\n        try:\n            # Resume (or create) the ACP session from opencode's on-disk storage\n            acp_session_id = acp_client.resume_or_create_session(cwd=session_path)\n\n            logger.info(\n                f\"[SANDBOX-ACP] Sending message: session={session_id} acp_session={acp_session_id} api_pod={_API_SERVER_HOSTNAME}\"\n            )\n\n            # Log the send_message call at sandbox manager level\n            packet_logger.log_session_start(session_id, sandbox_id, message)\n\n            events_count = 0\n            got_prompt_response = False\n            try:\n                for event in acp_client.send_message(\n                    message, session_id=acp_session_id\n                ):\n                    events_count += 1\n                    if isinstance(event, PromptResponse):\n                        got_prompt_response = True\n                    yield event\n\n                logger.info(\n                    f\"[SANDBOX-ACP] send_message completed: \"\n                    f\"session={session_id} events={events_count} \"\n                    f\"got_prompt_response={got_prompt_response}\"\n                )\n                packet_logger.log_session_end(\n                    session_id, success=True, events_count=events_count\n                )\n            except GeneratorExit:\n                logger.warning(\n                    f\"[SANDBOX-ACP] GeneratorExit: session={session_id} events={events_count}, sending session/cancel\"\n                )\n                try:\n                    acp_client.cancel(session_id=acp_session_id)\n                except Exception as cancel_err:\n                    logger.warning(\n                        f\"[SANDBOX-ACP] session/cancel failed on GeneratorExit: {cancel_err}\"\n                    )\n                packet_logger.log_session_end(\n                    session_id,\n                    success=False,\n                    error=\"GeneratorExit: Client disconnected or stream closed by consumer\",\n                    events_count=events_count,\n                )\n                raise\n            except Exception as e:\n                logger.error(\n                    f\"[SANDBOX-ACP] Exception: session={session_id} events={events_count} error={e}, sending session/cancel\"\n                )\n                try:\n                    acp_client.cancel(session_id=acp_session_id)\n                except Exception as cancel_err:\n                    logger.warning(\n                        f\"[SANDBOX-ACP] session/cancel failed on Exception: {cancel_err}\"\n                    )\n                packet_logger.log_session_end(\n                    session_id,\n                    success=False,\n                    error=f\"Exception: {str(e)}\",\n                    events_count=events_count,\n                )\n                raise\n            except BaseException as e:\n                logger.error(\n                    f\"[SANDBOX-ACP] {type(e).__name__}: session={session_id} error={e}\"\n                )\n                packet_logger.log_session_end(\n                    session_id,\n                    success=False,\n                    error=f\"{type(e).__name__}: {str(e) if str(e) else 'System-level interruption'}\",\n                    events_count=events_count,\n                )\n                raise\n        finally:\n            # Always stop the ephemeral ACP client to kill the opencode process.\n            # This ensures no stale processes linger in the sandbox container.\n            try:\n                acp_client.stop()\n            except Exception as e:\n                logger.warning(\n                    f\"[SANDBOX-ACP] Failed to stop ephemeral ACP client: session={session_id} error={e}\"\n                )\n\n    def list_directory(\n        self, sandbox_id: UUID, session_id: UUID, path: str\n    ) -> list[FilesystemEntry]:\n        \"\"\"List contents of a directory in the session's outputs directory.\n\n        For Kubernetes backend, we exec into the pod to list files.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/outputs/\n\n        Returns:\n            List of FilesystemEntry objects sorted by directory first, then name\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a directory\n        \"\"\"\n        # _get_pod_name needs string\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Security: sanitize path by removing '..' components individually\n        path_obj = Path(path.lstrip(\"/\"))\n        clean_parts = [p for p in path_obj.parts if p != \"..\"]\n        clean_path = str(Path(*clean_parts)) if clean_parts else \".\"\n        target_path = f\"/workspace/sessions/{session_id}/{clean_path}\"\n        # Use shlex.quote to prevent command injection\n        quoted_path = shlex.quote(target_path)\n\n        logger.info(f\"Listing directory {target_path} in pod {pod_name}\")\n\n        # Use exec to list directory\n        # -L follows symlinks (important for files/ -> /workspace/demo_data)\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"ls -laL --time-style=+%s {quoted_path} 2>/dev/null || echo 'ERROR_NOT_FOUND'\",\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            if \"ERROR_NOT_FOUND\" in resp:\n                raise ValueError(f\"Path not found or not a directory: {path}\")\n\n            entries = self._parse_ls_output(resp, clean_path)\n            return sorted(entries, key=lambda e: (not e.is_directory, e.name.lower()))\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to list directory: {e}\") from e\n\n    def _parse_ls_output(self, ls_output: str, base_path: str) -> list[FilesystemEntry]:\n        \"\"\"Parse ls -la output into FilesystemEntry objects.\n\n        Handles regular files, directories, and symlinks. Symlinks to directories\n        are treated as directories for navigation purposes.\n        \"\"\"\n        entries = []\n        lines = ls_output.strip().split(\"\\n\")\n\n        logger.debug(f\"Parsing {len(lines)} lines of ls output for {base_path}\")\n\n        for line in lines:\n            logger.debug(f\"Parsing line: {line}\")\n\n            # Skip header line and . / .. entries\n            if line.startswith(\"total\") or not line:\n                continue\n\n            parts = line.split()\n            # ls -la --time-style=+%s format: perms links owner group size timestamp name\n            # Minimum 7 parts for a simple filename\n            if len(parts) < 7:\n                continue\n\n            # Handle symlinks: format is \"name -> target\"\n            # For symlinks, parts[-1] is the target, not the name\n            is_symlink = line.startswith(\"l\")\n            if is_symlink and \" -> \" in line:\n                # Extract name from the \"name -> target\" portion\n                # Filename starts at index 6 (after perms, links, owner, group, size, timestamp)\n                try:\n                    # Rejoin from index 6 onwards to handle names with spaces\n                    name_and_target = \" \".join(parts[6:])\n                    if \" -> \" in name_and_target:\n                        name = name_and_target.split(\" -> \")[0]\n                    else:\n                        name = parts[-1]\n                except (IndexError, ValueError):\n                    name = parts[-1]\n            else:\n                # For regular files/directories, name is at index 6 or later (with spaces)\n                name = \" \".join(parts[6:])\n\n            if name in (\".\", \"..\"):\n                continue\n\n            # Directories start with 'd', symlinks start with 'l'\n            # Treat symlinks as directories (they typically point to directories\n            # in our sandbox setup, like files/ -> /workspace/demo_data)\n            is_directory = line.startswith(\"d\") or is_symlink\n            size_str = parts[4]\n\n            try:\n                size = int(size_str) if not is_directory else None\n            except ValueError:\n                size = None\n\n            # Guess MIME type for files based on extension\n            mime_type = mimetypes.guess_type(name)[0] if not is_directory else None\n\n            entry_path = f\"{base_path}/{name}\".lstrip(\"/\")\n            entries.append(\n                FilesystemEntry(\n                    name=name,\n                    path=entry_path,\n                    is_directory=is_directory,\n                    size=size,\n                    mime_type=mime_type,\n                )\n            )\n\n        return entries\n\n    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:\n        \"\"\"Read a file from the session's workspace.\n\n        For Kubernetes backend, we exec into the pod to read the file.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/\n\n        Returns:\n            File contents as bytes\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a file\n        \"\"\"\n        # _get_pod_name needs string\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Security: sanitize path by removing '..' components individually\n        path_obj = Path(path.lstrip(\"/\"))\n        clean_parts = [p for p in path_obj.parts if p != \"..\"]\n        clean_path = str(Path(*clean_parts)) if clean_parts else \".\"\n        target_path = f\"/workspace/sessions/{session_id}/{clean_path}\"\n        # Use shlex.quote to prevent command injection\n        quoted_path = shlex.quote(target_path)\n\n        # Use exec to read file with base64 encoding to handle binary data\n        # Base64 encode the output to safely transport binary content\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"if [ -f {quoted_path} ]; then base64 {quoted_path}; else echo 'ERROR_NOT_FOUND'; fi\",\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            if \"ERROR_NOT_FOUND\" in resp:\n                raise ValueError(f\"File not found: {path}\")\n\n            # Decode base64 content\n            try:\n                content = base64.b64decode(resp.strip())\n            except binascii.Error as e:\n                logger.error(f\"Failed to decode base64 content: {e}\")\n                raise RuntimeError(f\"Failed to decode file content: {e}\") from e\n\n            return content\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to read file: {e}\") from e\n\n    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:\n        \"\"\"Get the webapp URL for a session's Next.js server.\n\n        For Kubernetes backend, returns internal cluster service URL.\n\n        Args:\n            sandbox_id: The sandbox ID\n            port: The session's allocated Next.js port\n\n        Returns:\n            Internal cluster URL for the Next.js server on the specified port\n        \"\"\"\n        return self._get_nextjs_url(str(sandbox_id), port)\n\n    def generate_pptx_preview(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        pptx_path: str,\n        cache_dir: str,\n    ) -> tuple[list[str], bool]:\n        \"\"\"Convert PPTX to slide images using soffice + pdftoppm in the pod.\n\n        Runs preview.py in the sandbox container which:\n        1. Checks if cached slides exist and are newer than the PPTX\n        2. If not, converts PPTX -> PDF -> JPEG slides\n        3. Returns list of slide image paths\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Security: sanitize paths\n        pptx_path_obj = Path(pptx_path.lstrip(\"/\"))\n        pptx_clean_parts = [p for p in pptx_path_obj.parts if p != \"..\"]\n        clean_pptx = str(Path(*pptx_clean_parts)) if pptx_clean_parts else \".\"\n\n        cache_path_obj = Path(cache_dir.lstrip(\"/\"))\n        cache_clean_parts = [p for p in cache_path_obj.parts if p != \"..\"]\n        clean_cache = str(Path(*cache_clean_parts)) if cache_clean_parts else \".\"\n\n        session_root = f\"/workspace/sessions/{session_id}\"\n        pptx_abs = f\"{session_root}/{clean_pptx}\"\n        cache_abs = f\"{session_root}/{clean_cache}\"\n\n        exec_command = [\n            \"python\",\n            \"/workspace/skills/pptx/scripts/preview.py\",\n            pptx_abs,\n            cache_abs,\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n\n            lines = [line.strip() for line in resp.strip().split(\"\\n\") if line.strip()]\n\n            if not lines:\n                raise ValueError(\"Empty response from PPTX conversion\")\n\n            if lines[0] == \"ERROR_NOT_FOUND\":\n                raise ValueError(f\"File not found: {pptx_path}\")\n\n            if lines[0] == \"ERROR_NO_PDF\":\n                raise ValueError(\"soffice did not produce a PDF file\")\n\n            cached = lines[0] == \"CACHED\"\n            # Skip the status line, rest are file paths\n            abs_paths = lines[1:] if lines[0] in (\"CACHED\", \"GENERATED\") else lines\n\n            # Convert absolute paths to session-relative paths\n            prefix = f\"{session_root}/\"\n            rel_paths = []\n            for p in abs_paths:\n                if p.startswith(prefix):\n                    rel_paths.append(p[len(prefix) :])\n                elif p.endswith(\".jpg\"):\n                    rel_paths.append(p)\n\n            return (rel_paths, cached)\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to generate PPTX preview: {e}\") from e\n\n    def sync_files(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,\n        tenant_id: str,\n        source: str | None = None,\n    ) -> bool:\n        \"\"\"Sync files from S3 to the running pod via the file-sync sidecar.\n\n        Executes `s5cmd sync` in the file-sync sidecar container to download\n        any new or changed files from S3 to /workspace/files/.\n\n        This is safe to call multiple times - s5cmd sync is idempotent.\n\n        Note: For user_library source, --delete is NOT used since deletions\n        are handled explicitly by the delete_file API endpoint. File visibility\n        in sessions is controlled via filtered symlinks in setup_session_workspace().\n\n        Args:\n            sandbox_id: The sandbox UUID\n            user_id: The user ID (for S3 path construction)\n            tenant_id: The tenant ID (for S3 path construction)\n            source: Optional source type (e.g., \"gmail\", \"google_drive\").\n                    If None, syncs all sources. If specified, only syncs\n                    that source's directory.\n\n        Returns:\n            True if sync was successful, False otherwise.\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Build S3 path based on whether source is specified\n        if source:\n            # Sync only the specific source directory\n            s3_path = f\"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/{source}/*\"\n            local_path = f\"/workspace/files/{source}/\"\n        else:\n            # Sync all sources (original behavior)\n            s3_path = f\"s3://{self._s3_bucket}/{tenant_id}/knowledge/{str(user_id)}/*\"\n            local_path = \"/workspace/files/\"\n\n        # s5cmd sync with --delete for external connectors only.\n        # timeout: prevent zombie processes from kubectl exec disconnections\n        # trap: kill child processes on exit/disconnect\n        source_info = f\" (source={source})\" if source else \"\"\n\n        # Sources where --delete is explicitly forbidden (deletions handled via API)\n        NO_DELETE_SOURCES = {\"user_library\"}\n        use_delete = source is not None and source not in NO_DELETE_SOURCES\n        delete_flag = \" --delete\" if use_delete else \"\"\n\n        sync_script = f\"\"\"\n# Kill child processes on exit/disconnect to prevent zombie s5cmd workers\ncleanup() {{ pkill -P $$ 2>/dev/null || true; }}\ntrap cleanup EXIT INT TERM\n\necho \"Starting incremental file sync{source_info}\"\necho \"S3: {s3_path}\"\necho \"Local: {local_path}\"\n\n# Ensure destination exists (needed for source-specific syncs)\nmkdir -p \"{local_path}\"\n\n# Run s5cmd with 5-minute timeout (SIGKILL after 10s if SIGTERM ignored)\n# Exit codes: 0=success, 1=success with warnings, 124=timeout\nsync_exit_code=0\ntimeout --signal=TERM --kill-after=10s 5m \\\n    /s5cmd --stat sync{delete_flag} \"{s3_path}\" \"{local_path}\" 2>&1 || sync_exit_code=$?\n\necho \"=== Sync finished (exit code: $sync_exit_code) ===\"\n\n# Handle result\nif [ $sync_exit_code -eq 0 ] || [ $sync_exit_code -eq 1 ]; then\n    file_count=$(find \"{local_path}\" -type f 2>/dev/null | wc -l)\n    echo \"Files in {local_path}: $file_count\"\n    echo \"SYNC_SUCCESS\"\nelif [ $sync_exit_code -eq 124 ]; then\n    echo \"ERROR: Sync timed out after 5 minutes\"\n    echo \"SYNC_FAILED\"\n    exit 1\nelse\n    echo \"ERROR: Sync failed (exit code: $sync_exit_code)\"\n    echo \"SYNC_FAILED\"\n    exit $sync_exit_code\nfi\n\"\"\"\n        sync_command = [\"/bin/sh\", \"-c\", sync_script]\n        resp = k8s_stream(\n            self._stream_core_api.connect_get_namespaced_pod_exec,\n            pod_name,\n            self._namespace,\n            container=\"file-sync\",  # Execute in sidecar, not sandbox container\n            command=sync_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        logger.debug(f\"File sync response: {resp}\")\n\n        # Check if sync succeeded based on output markers\n        if \"SYNC_FAILED\" in resp:\n            logger.warning(f\"File sync failed for sandbox {sandbox_id}\")\n            return False\n        return True\n\n    def _ensure_agents_md_attachments_section(\n        self, sandbox_id: UUID, session_id: UUID\n    ) -> None:\n        \"\"\"Ensure AGENTS.md has the attachments section.\n\n        Called after uploading a file. Only adds the section if it doesn't exist.\n        Inserts the section above ## Skills for better document flow.\n        This is a fire-and-forget operation - failures are logged but not raised.\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        session_path = f\"/workspace/sessions/{session_id}\"\n        agents_md_path = f\"{session_path}/AGENTS.md\"\n\n        # Base64 encode the content for safe shell handling\n        attachments_content_b64 = base64.b64encode(\n            ATTACHMENTS_SECTION_CONTENT.encode()\n        ).decode()\n\n        # Script: add section before ## Skills if not present\n        # Uses a temp file approach for safe insertion\n        script = f\"\"\"\nif [ -f \"{agents_md_path}\" ]; then\n    if ! grep -q \"## Attachments (PRIORITY)\" \"{agents_md_path}\" 2>/dev/null; then\n        # Check if ## Skills exists\n        if grep -q \"## Skills\" \"{agents_md_path}\" 2>/dev/null; then\n            # Insert before ## Skills using awk\n            awk -v content=\"$(echo \"{attachments_content_b64}\" | base64 -d)\" '\n                /^## Skills/ {{ print content; print \"\"; }}\n                {{ print }}\n            ' \"{agents_md_path}\" > \"{agents_md_path}.tmp\" && mv \"{agents_md_path}.tmp\" \"{agents_md_path}\"\n            echo \"ADDED_BEFORE_SKILLS\"\n        else\n            # Fallback: append to end\n            echo \"\" >> \"{agents_md_path}\"\n            echo \"\" >> \"{agents_md_path}\"\n            echo \"{attachments_content_b64}\" | base64 -d >> \"{agents_md_path}\"\n            echo \"ADDED_AT_END\"\n        fi\n    else\n        echo \"EXISTS\"\n    fi\nelse\n    echo \"NO_AGENTS_MD\"\nfi\n\"\"\"\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=[\"/bin/sh\", \"-c\", script],\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n            logger.debug(\n                f\"Ensure AGENTS.md attachments section for session {session_id}: {resp.strip()}\"\n            )\n        except ApiException as e:\n            logger.warning(f\"Failed to ensure AGENTS.md attachments section: {e}\")\n\n    def upload_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        filename: str,\n        content: bytes,\n    ) -> str:\n        \"\"\"Upload a file to the session's attachments directory.\n\n        Uses tar streaming via stdin with explicit byte count to avoid EOF issues.\n        The K8s Python client cannot close stdin without closing the entire WebSocket\n        connection, so we use `head -c <size>` to read exactly the expected bytes\n        instead of waiting for EOF.\n\n        Handles filename collisions atomically within the shell script.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            filename: Sanitized filename\n            content: File content as bytes\n\n        Returns:\n            Relative path where file was saved (e.g., \"attachments/doc.pdf\")\n\n        Raises:\n            RuntimeError: If upload fails\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        target_dir = f\"/workspace/sessions/{session_id}/attachments\"\n\n        # Create tar archive in memory\n        tar_buffer = io.BytesIO()\n        with tarfile.open(fileobj=tar_buffer, mode=\"w\") as tar:\n            tarinfo = tarfile.TarInfo(name=filename)\n            tarinfo.size = len(content)\n            tar.addfile(tarinfo, io.BytesIO(content))\n        tar_data = tar_buffer.getvalue()\n        tar_size = len(tar_data)\n\n        # Shell script that:\n        # 1. Creates target directory and temp extraction directory\n        # 2. Reads exactly tar_size bytes from stdin (avoids needing EOF signal)\n        # 3. Extracts tar to temp directory\n        # 4. Moves file to target with collision handling\n        # 5. Cleans up temp directory\n        # 6. Outputs final filename\n        script = f\"\"\"\nset -e\ntarget_dir=\"{target_dir}\"\ntmpdir=$(mktemp -d)\ntrap 'rm -rf \"$tmpdir\"' EXIT\n\nmkdir -p \"$target_dir\"\n\n# Read exactly {tar_size} bytes and extract (avoids waiting for EOF)\nhead -c {tar_size} | tar xf - -C \"$tmpdir\"\n\n# Find the extracted file (first file in tmpdir)\noriginal=$(ls -1 \"$tmpdir\" | head -1)\nbase=\"$original\"\n\ncd \"$target_dir\"\nif [ -f \"$base\" ]; then\n    stem=\"${{base%.*}}\"\n    ext=\"${{base##*.}}\"\n    [ \"$stem\" = \"$base\" ] && ext=\"\" || ext=\".$ext\"\n    i=1\n    while [ -f \"${{stem}}_${{i}}${{ext}}\" ]; do i=$((i+1)); done\n    base=\"${{stem}}_${{i}}${{ext}}\"\nfi\n\nmv \"$tmpdir/$original\" \"$target_dir/$base\"\nchmod 644 \"$target_dir/$base\"\necho \"$base\"\n\"\"\"\n\n        try:\n            # Open WebSocket connection with stdin enabled\n            ws_client = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=[\"/bin/sh\", \"-c\", script],\n                stdin=True,\n                stdout=True,\n                stderr=True,\n                tty=False,\n                _preload_content=False,  # Return WSClient instead of string\n            )\n\n            # Write tar data to stdin\n            ws_client.write_stdin(tar_data)\n\n            # Read response - head -c will read exactly tar_size bytes and proceed,\n            # so we don't need to close stdin to signal EOF\n            stdout_data = \"\"\n            stderr_data = \"\"\n            while ws_client.is_open():\n                ws_client.update(timeout=30)\n                if ws_client.peek_stdout():\n                    stdout_data += ws_client.read_stdout()\n                if ws_client.peek_stderr():\n                    stderr_data += ws_client.read_stderr()\n\n            # Get any remaining data\n            stdout_data += ws_client.read_stdout() or \"\"\n            stderr_data += ws_client.read_stderr() or \"\"\n\n            if stderr_data.strip():\n                logger.warning(f\"Upload stderr: {stderr_data.strip()}\")\n\n            # Last line of output is the final filename\n            final_filename = stdout_data.strip().split(\"\\n\")[-1]\n\n            if not final_filename:\n                raise RuntimeError(\n                    f\"Upload failed - no filename returned. stderr: {stderr_data}\"\n                )\n\n            logger.info(\n                f\"Uploaded file to session {session_id}: attachments/{final_filename} ({len(content)} bytes)\"\n            )\n\n            # Ensure AGENTS.md has the attachments section\n            self._ensure_agents_md_attachments_section(sandbox_id, session_id)\n\n            return f\"attachments/{final_filename}\"\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to upload file: {e}\") from e\n\n    def delete_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        path: str,\n    ) -> bool:\n        \"\"\"Delete a file from the session's workspace.\n\n        Uses kubectl exec to delete the file from the pod.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path to the file (e.g., \"attachments/doc.pdf\")\n\n        Returns:\n            True if file was deleted, False if not found\n\n        Raises:\n            ValueError: If path traversal attempted or invalid characters\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n\n        # Security: robust path sanitization\n        # Reject paths with traversal patterns, URL-encoded characters, or null bytes\n        if re.search(r\"\\.\\.\", path) or \"%\" in path or \"\\x00\" in path:\n            raise ValueError(\"Invalid path: potential path traversal detected\")\n\n        # Reject paths with shell metacharacters that could be exploited\n        if re.search(r'[;&|`$(){}[\\]<>\\'\"\\n\\r\\\\]', path):\n            raise ValueError(\"Invalid path: contains disallowed characters\")\n\n        clean_path = path.lstrip(\"/\")\n\n        # Verify path only contains safe characters (alphanumeric, dash, underscore, dot, forward slash)\n        if not re.match(r\"^[a-zA-Z0-9_\\-./]+$\", clean_path):\n            raise ValueError(\"Invalid path: contains disallowed characters\")\n\n        target_path = f\"/workspace/sessions/{session_id}/{clean_path}\"\n\n        # Use exec to delete file\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f'[ -f \"{target_path}\" ] && rm \"{target_path}\" && echo \"DELETED\" || echo \"NOT_FOUND\"',\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stdin=False,\n                stdout=True,\n                stderr=True,\n                tty=False,\n            )\n\n            deleted = \"DELETED\" in resp\n            if deleted:\n                logger.info(f\"Deleted file from session {session_id}: {path}\")\n            else:\n                logger.debug(\n                    f\"File not found for deletion in session {session_id}: {path}\"\n                )\n\n            return deleted\n\n        except ApiException as e:\n            raise RuntimeError(f\"Failed to delete file: {e}\") from e\n\n    def get_upload_stats(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> tuple[int, int]:\n        \"\"\"Get current file count and total size for a session's attachments.\n\n        Uses kubectl exec to query the pod's attachments directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n\n        Returns:\n            Tuple of (file_count, total_size_bytes)\n        \"\"\"\n        pod_name = self._get_pod_name(str(sandbox_id))\n        target_dir = f\"/workspace/sessions/{session_id}/attachments\"\n\n        # Get file count and total size in one command\n        # Uses find to list files, wc -l for count, and du for size\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"\"\"\nif [ -d \"{target_dir}\" ]; then\n    count=$(find \"{target_dir}\" -maxdepth 1 -type f 2>/dev/null | wc -l)\n    size=$(du -sb \"{target_dir}\" 2>/dev/null | cut -f1)\n    echo \"$count $size\"\nelse\n    echo \"0 0\"\nfi\n\"\"\",\n        ]\n\n        try:\n            resp = k8s_stream(\n                self._stream_core_api.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=self._namespace,\n                container=\"sandbox\",\n                command=exec_command,\n                stdin=False,\n                stdout=True,\n                stderr=True,\n                tty=False,\n            )\n\n            # Parse response: \"count size\"\n            parts = resp.strip().split()\n            if len(parts) >= 2:\n                try:\n                    file_count = int(parts[0])\n                    # du includes directory overhead, but for limits this is fine\n                    total_size = int(parts[1])\n                    return file_count, total_size\n                except ValueError:\n                    logger.warning(f\"Failed to parse upload stats: {resp}\")\n                    return 0, 0\n\n            return 0, 0\n\n        except ApiException as e:\n            logger.warning(f\"Failed to get upload stats: {e}\")\n            return 0, 0\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/__init__.py",
    "content": "\"\"\"Local filesystem-based sandbox implementation.\n\nThis module provides the LocalSandboxManager for development and single-node\ndeployments that run sandboxes as directories on the local filesystem.\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient\nfrom onyx.server.features.build.sandbox.local.agent_client import ACPEvent\nfrom onyx.server.features.build.sandbox.local.local_sandbox_manager import (\n    LocalSandboxManager,\n)\nfrom onyx.server.features.build.sandbox.local.process_manager import ProcessManager\n\n__all__ = [\n    \"ACPAgentClient\",\n    \"ACPEvent\",\n    \"LocalSandboxManager\",\n    \"ProcessManager\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/agent_client.py",
    "content": "\"\"\"Communication with CLI agent subprocess using ACP (Agent Client Protocol).\n\nACP is a JSON-RPC 2.0 based protocol for communicating with coding agents.\nSee: https://agentclientprotocol.com\n\nThis module includes comprehensive logging for debugging ACP communication.\nEnable logging by setting LOG_LEVEL=DEBUG or BUILD_PACKET_LOGGING=true.\n\nUsage:\n    # Simple usage with context manager\n    with ACPAgentClient(cwd=\"/path/to/project\") as client:\n        for packet in client.send_message(\"What files are here?\"):\n            print(packet)\n\n    # Manual lifecycle management\n    client = ACPAgentClient()\n    client.start(cwd=\"/path/to/project\")\n    for packet in client.send_message(\"Hello\"):\n        print(packet)\n    client.stop()\n\"\"\"\n\nimport json\nimport os\nimport select\nimport shutil\nimport subprocess\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom dataclasses import dataclass\nfrom dataclasses import field\nfrom pathlib import Path\nfrom typing import Any\n\nfrom acp.schema import AgentMessageChunk\nfrom acp.schema import AgentPlanUpdate\nfrom acp.schema import AgentThoughtChunk\nfrom acp.schema import CurrentModeUpdate\nfrom acp.schema import Error\nfrom acp.schema import PromptResponse\nfrom acp.schema import ToolCallProgress\nfrom acp.schema import ToolCallStart\nfrom pydantic import ValidationError\n\nfrom onyx.server.features.build.api.packet_logger import get_packet_logger\n\n\n# ACP Protocol version\nACP_PROTOCOL_VERSION = 1\n\n# Default client info\nDEFAULT_CLIENT_INFO = {\n    \"name\": \"onyx-sandbox\",\n    \"title\": \"Onyx Sandbox Agent Client\",\n    \"version\": \"1.0.0\",\n}\n\nSESSION_CREATION_TIMEOUT = 30.0  # 30 seconds\nTIMEOUT = 900.0  # 15 minutes\nSINGLE_READ_TIMEOUT = 10.0  # 10 seconds\n\n\n# =============================================================================\n# Response Event Types (from acp.schema + custom completion/error types)\n# =============================================================================\n\n# Union type for all possible events from send_message\n# Uses ACP schema types for session updates, plus our completion type\nACPEvent = (\n    AgentMessageChunk  # Text/image content from agent\n    | AgentThoughtChunk  # Agent's internal reasoning\n    | ToolCallStart  # Tool invocation started\n    | ToolCallProgress  # Tool execution progress/result\n    | AgentPlanUpdate  # Agent's execution plan\n    | CurrentModeUpdate  # Agent mode change\n    | PromptResponse  # Agent finished (contains stop_reason)\n    | Error  # An error occurred\n)\n\n\n# =============================================================================\n# Internal State Types\n# =============================================================================\n\n\n@dataclass\nclass ACPSession:\n    \"\"\"Represents an active ACP session.\"\"\"\n\n    session_id: str\n    cwd: str\n\n\n@dataclass\nclass ACPClientState:\n    \"\"\"Internal state for the ACP client.\"\"\"\n\n    initialized: bool = False\n    current_session: ACPSession | None = None\n    next_request_id: int = 0\n    agent_capabilities: dict[str, Any] = field(default_factory=dict)\n    agent_info: dict[str, Any] = field(default_factory=dict)\n\n\ndef _find_opencode_binary() -> str | None:\n    \"\"\"Find the opencode binary path.\n\n    Returns:\n        Path to opencode binary, or None if not found\n    \"\"\"\n    # Check PATH first\n    opencode_path = shutil.which(\"opencode\")\n    if opencode_path:\n        return opencode_path\n\n    # Try common installation paths\n    common_paths = [\n        Path.home() / \".opencode\" / \"bin\" / \"opencode\",\n        Path(\"/usr/local/bin/opencode\"),\n    ]\n    for path in common_paths:\n        if path.exists():\n            return str(path)\n\n    return None\n\n\nclass ACPAgentClient:\n    \"\"\"ACP (Agent Client Protocol) client for communication with CLI agents.\n\n    Implements JSON-RPC 2.0 over stdin/stdout as specified by ACP.\n    Manages the agent subprocess lifecycle internally.\n\n    Usage:\n        # With context manager (recommended)\n        with ACPAgentClient(cwd=\"/path/to/project\") as client:\n            for packet in client.send_message(\"Hello\"):\n                print(packet)\n\n        # Manual lifecycle\n        client = ACPAgentClient()\n        client.start(cwd=\"/path/to/project\")\n        try:\n            for packet in client.send_message(\"Hello\"):\n                print(packet)\n        finally:\n            client.stop()\n    \"\"\"\n\n    def __init__(\n        self,\n        cwd: str | None = None,\n        opencode_path: str | None = None,\n        client_info: dict[str, Any] | None = None,\n        client_capabilities: dict[str, Any] | None = None,\n        auto_start: bool = True,\n    ) -> None:\n        \"\"\"Initialize the ACP client.\n\n        Args:\n            cwd: Working directory for the agent. If provided and auto_start=True,\n                 the agent will be started immediately.\n            opencode_path: Path to opencode binary. Auto-detected if not provided.\n            client_info: Client identification info (name, title, version)\n            client_capabilities: Client capabilities to advertise\n            auto_start: If True and cwd is provided, start the agent immediately\n        \"\"\"\n        self._opencode_path = opencode_path or _find_opencode_binary()\n        self._client_info = client_info or DEFAULT_CLIENT_INFO\n        self._client_capabilities = client_capabilities or {\n            \"fs\": {\n                \"readTextFile\": True,\n                \"writeTextFile\": True,\n            },\n            \"terminal\": True,\n        }\n        self._state = ACPClientState()\n        self._process: subprocess.Popen[str] | None = None\n        self._read_lock = threading.Lock()\n        self._cwd: str | None = None\n\n        # Auto-start if cwd provided\n        if cwd and auto_start:\n            self.start(cwd=cwd)\n\n    def __enter__(self) -> \"ACPAgentClient\":\n        \"\"\"Context manager entry.\"\"\"\n        return self\n\n    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:\n        \"\"\"Context manager exit - ensures cleanup.\"\"\"\n        self.stop()\n\n    def start(\n        self,\n        cwd: str | None = None,\n        mcp_servers: list[dict[str, Any]] | None = None,\n        timeout: float = 30.0,\n    ) -> str:\n        \"\"\"Start the agent process and initialize a session.\n\n        This method:\n        1. Starts the opencode acp subprocess\n        2. Sends the initialize handshake\n        3. Creates a new session\n\n        Args:\n            cwd: Working directory for the agent (defaults to current directory)\n            mcp_servers: Optional MCP server configurations\n            timeout: Timeout for initialization and session creation\n\n        Returns:\n            The session ID\n\n        Raises:\n            RuntimeError: If opencode is not found or startup fails\n        \"\"\"\n        if self._process is not None:\n            raise RuntimeError(\"Agent already started. Call stop() first.\")\n\n        if not self._opencode_path:\n            raise RuntimeError(\n                \"opencode binary not found. Install opencode or provide opencode_path.\"\n            )\n\n        self._cwd = cwd or os.getcwd()\n\n        # Start the opencode acp process\n        self._process = subprocess.Popen(\n            [self._opencode_path, \"acp\", \"--cwd\", self._cwd],\n            stdin=subprocess.PIPE,\n            stdout=subprocess.PIPE,\n            stderr=subprocess.PIPE,\n            text=True,\n        )\n\n        try:\n            # Initialize the ACP connection\n            self._initialize(timeout=timeout)\n\n            # Create a session\n            session_id = self._create_session(\n                cwd=self._cwd,\n                mcp_servers=mcp_servers,\n                timeout=timeout,\n            )\n\n            return session_id\n\n        except Exception:\n            # Clean up on failure\n            self.stop()\n            raise\n\n    def stop(self) -> None:\n        \"\"\"Stop the agent process and clean up resources.\"\"\"\n        if self._process is not None:\n            if self._process.poll() is None:\n                self._process.terminate()\n                try:\n                    self._process.wait(timeout=5)\n                except subprocess.TimeoutExpired:\n                    self._process.kill()\n\n            self._process = None\n\n        # Reset state\n        self._state = ACPClientState()\n\n    def _get_next_id(self) -> int:\n        \"\"\"Get the next request ID.\"\"\"\n        request_id = self._state.next_request_id\n        self._state.next_request_id += 1\n        return request_id\n\n    def _ensure_running(self) -> subprocess.Popen[str]:\n        \"\"\"Ensure the process is running and return it.\n\n        Raises:\n            RuntimeError: If process is not running\n        \"\"\"\n        if self._process is None:\n            raise RuntimeError(\"Agent not started. Call start() first.\")\n\n        if self._process.poll() is not None:\n            raise RuntimeError(\n                f\"Agent process has terminated with code {self._process.returncode}\"\n            )\n\n        return self._process\n\n    def _send_request(\n        self,\n        method: str,\n        params: dict[str, Any] | None = None,\n    ) -> int:\n        \"\"\"Send a JSON-RPC request to the agent.\n\n        Args:\n            method: The RPC method name\n            params: Optional parameters for the method\n\n        Returns:\n            The request ID\n\n        Raises:\n            RuntimeError: If the process has terminated or pipe is broken\n        \"\"\"\n        process = self._ensure_running()\n\n        if process.stdin is None:\n            raise RuntimeError(\"Process stdin is not available\")\n\n        request_id = self._get_next_id()\n        request: dict[str, Any] = {\n            \"jsonrpc\": \"2.0\",\n            \"id\": request_id,\n            \"method\": method,\n        }\n        if params is not None:\n            request[\"params\"] = params\n\n        # Log the outgoing request\n        packet_logger = get_packet_logger()\n        packet_logger.log_jsonrpc_request(method, request_id, params, context=\"local\")\n\n        try:\n            process.stdin.write(json.dumps(request) + \"\\n\")\n            process.stdin.flush()\n        except BrokenPipeError:\n            raise RuntimeError(\"Agent process stdin pipe is broken\")\n\n        return request_id\n\n    def _send_notification(\n        self,\n        method: str,\n        params: dict[str, Any] | None = None,\n    ) -> None:\n        \"\"\"Send a JSON-RPC notification (no response expected).\n\n        Args:\n            method: The notification method name\n            params: Optional parameters\n\n        Raises:\n            RuntimeError: If the process has terminated or pipe is broken\n        \"\"\"\n        process = self._ensure_running()\n\n        if process.stdin is None:\n            raise RuntimeError(\"Process stdin is not available\")\n\n        notification: dict[str, Any] = {\n            \"jsonrpc\": \"2.0\",\n            \"method\": method,\n        }\n        if params is not None:\n            notification[\"params\"] = params\n\n        # Log the outgoing notification\n        packet_logger = get_packet_logger()\n        packet_logger.log_jsonrpc_request(method, None, params, context=\"local\")\n\n        try:\n            process.stdin.write(json.dumps(notification) + \"\\n\")\n            process.stdin.flush()\n        except BrokenPipeError:\n            raise RuntimeError(\"Agent process stdin pipe is broken\")\n\n    def _read_message(\n        self,\n        timeout: float | None = None,\n    ) -> dict[str, Any] | None:\n        \"\"\"Read a single JSON-RPC message from the agent.\n\n        Args:\n            timeout: Optional timeout in seconds\n\n        Returns:\n            The parsed JSON message, or None if timeout/EOF\n\n        Raises:\n            RuntimeError: If process stdout is not available\n        \"\"\"\n        process = self._ensure_running()\n\n        if process.stdout is None:\n            raise RuntimeError(\"Process stdout is not available\")\n\n        packet_logger = get_packet_logger()\n\n        with self._read_lock:\n            if timeout is not None:\n                stdout_fd = process.stdout.fileno()\n                readable, _, _ = select.select([stdout_fd], [], [], timeout)\n                if not readable:\n                    return None\n\n            line = process.stdout.readline()\n            if not line:\n                return None\n\n            line = line.strip()\n            if not line:\n                return None\n\n            try:\n                message = json.loads(line)\n                # Log the raw incoming message\n                packet_logger.log_jsonrpc_raw_message(\"IN\", message, context=\"local\")\n                return message\n            except json.JSONDecodeError:\n                packet_logger.log_raw(\n                    \"JSONRPC-PARSE-ERROR\",\n                    {\"raw_line\": line[:500], \"error\": \"JSON decode failed\"},\n                )\n                return {\n                    \"jsonrpc\": \"2.0\",\n                    \"error\": {\n                        \"code\": -32700,\n                        \"message\": f\"Parse error: {line[:100]}\",\n                    },\n                }\n\n    def _wait_for_response(\n        self,\n        request_id: int,\n        timeout: float = 30.0,\n    ) -> dict[str, Any]:\n        \"\"\"Wait for a response to a specific request.\n\n        Args:\n            request_id: The request ID to wait for\n            timeout: Maximum time to wait\n\n        Returns:\n            The response result\n\n        Raises:\n            RuntimeError: If timeout, error response, or process dies\n        \"\"\"\n        import time\n\n        start_time = time.time()\n\n        while True:\n            remaining = timeout - (time.time() - start_time)\n            if remaining <= 0:\n                raise RuntimeError(\n                    f\"Timeout waiting for response to request {request_id}\"\n                )\n\n            message = self._read_message(timeout=min(remaining, 1.0))\n\n            if message is None:\n                process = self._ensure_running()\n                if process.poll() is not None:\n                    raise RuntimeError(\n                        f\"Agent process terminated with code {process.returncode}\"\n                    )\n                continue\n\n            # Check if this is the response we're waiting for\n            if message.get(\"id\") == request_id:\n                if \"error\" in message:\n                    error = message[\"error\"]\n                    raise RuntimeError(\n                        f\"ACP error {error.get('code')}: {error.get('message')}\"\n                    )\n                return message.get(\"result\", {})\n\n    def _initialize(self, timeout: float = SESSION_CREATION_TIMEOUT) -> dict[str, Any]:\n        \"\"\"Initialize the ACP connection (internal).\n\n        Args:\n            timeout: Maximum time to wait for response\n\n        Returns:\n            The agent's capabilities and info\n        \"\"\"\n        params = {\n            \"protocolVersion\": ACP_PROTOCOL_VERSION,\n            \"clientCapabilities\": self._client_capabilities,\n            \"clientInfo\": self._client_info,\n        }\n\n        request_id = self._send_request(\"initialize\", params)\n        result = self._wait_for_response(request_id, timeout)\n\n        self._state.initialized = True\n        self._state.agent_capabilities = result.get(\"agentCapabilities\", {})\n        self._state.agent_info = result.get(\"agentInfo\", {})\n\n        return result\n\n    def _create_session(\n        self,\n        cwd: str,\n        mcp_servers: list[dict[str, Any]] | None = None,\n        timeout: float = SESSION_CREATION_TIMEOUT,\n    ) -> str:\n        \"\"\"Create a new ACP session (internal).\n\n        Args:\n            cwd: Working directory for the session\n            mcp_servers: Optional MCP server configurations\n            timeout: Maximum time to wait for response\n\n        Returns:\n            The session ID\n        \"\"\"\n        # Note: opencode requires cwd and mcpServers\n        params: dict[str, Any] = {\n            \"cwd\": cwd,\n            \"mcpServers\": mcp_servers or [],\n        }\n\n        request_id = self._send_request(\"session/new\", params)\n        result = self._wait_for_response(request_id, timeout)\n\n        session_id = result.get(\"sessionId\")\n        if not session_id:\n            raise RuntimeError(\"No session ID returned from session/new\")\n\n        self._state.current_session = ACPSession(\n            session_id=session_id,\n            cwd=cwd,\n        )\n\n        return session_id\n\n    def send_message(\n        self,\n        message: str,\n        timeout: float = TIMEOUT,\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Send a message and stream response events.\n\n        Args:\n            message: The message content to send\n            timeout: Maximum time to wait for complete response\n\n        Yields:\n            Typed ACP schema event objects (ACPEvent union):\n            - AgentMessageChunk: Text/image content from the agent\n            - AgentThoughtChunk: Agent's internal reasoning\n            - ToolCallStart: Tool invocation started\n            - ToolCallProgress: Tool execution progress/result\n            - AgentPlanUpdate: Agent's execution plan\n            - CurrentModeUpdate: Agent mode change\n            - PromptResponse: Agent finished (has stop_reason)\n            - Error: An error occurred\n\n        Raises:\n            RuntimeError: If no session or prompt fails\n        \"\"\"\n        if self._state.current_session is None:\n            raise RuntimeError(\"No active session. Call start() first.\")\n\n        session_id = self._state.current_session.session_id\n        process = self._ensure_running()\n        packet_logger = get_packet_logger()\n\n        # Log the start of message processing\n        packet_logger.log_raw(\n            \"ACP-SEND-MESSAGE-START\",\n            {\n                \"session_id\": session_id,\n                \"message_preview\": (\n                    message[:200] + \"...\" if len(message) > 200 else message\n                ),\n                \"timeout\": timeout,\n            },\n        )\n\n        # Build prompt content blocks\n        prompt_content = [{\"type\": \"text\", \"text\": message}]\n\n        params = {\n            \"sessionId\": session_id,\n            \"prompt\": prompt_content,\n        }\n\n        request_id = self._send_request(\"session/prompt\", params)\n        start_time = time.time()\n        events_yielded = 0\n\n        while True:\n            remaining = timeout - (time.time() - start_time)\n            if remaining <= 0:\n                packet_logger.log_raw(\n                    \"ACP-TIMEOUT\",\n                    {\n                        \"session_id\": session_id,\n                        \"elapsed_ms\": (time.time() - start_time) * 1000,\n                    },\n                )\n                yield Error(code=-1, message=\"Timeout waiting for response\")\n                break\n\n            message_data = self._read_message(\n                timeout=min(remaining, SINGLE_READ_TIMEOUT)\n            )\n\n            if message_data is None:\n                if process.poll() is not None:\n                    packet_logger.log_raw(\n                        \"ACP-PROCESS-TERMINATED\",\n                        {\"session_id\": session_id, \"exit_code\": process.returncode},\n                    )\n                    yield Error(\n                        code=-1,\n                        message=f\"Agent process terminated with code {process.returncode}\",\n                    )\n                    break\n                continue\n\n            # Check for response to our prompt request\n            if message_data.get(\"id\") == request_id:\n                if \"error\" in message_data:\n                    error_data = message_data[\"error\"]\n                    packet_logger.log_jsonrpc_response(\n                        request_id, error=error_data, context=\"local\"\n                    )\n                    yield Error(\n                        code=error_data.get(\"code\", -1),\n                        message=error_data.get(\"message\", \"Unknown error\"),\n                    )\n                else:\n                    result = message_data.get(\"result\", {})\n                    packet_logger.log_jsonrpc_response(\n                        request_id, result=result, context=\"local\"\n                    )\n                    prompt_response = PromptResponse.model_validate(result)\n                    packet_logger.log_acp_event_yielded(\n                        \"prompt_response\", prompt_response\n                    )\n                    events_yielded += 1\n                    yield prompt_response\n\n                # Log completion summary\n                elapsed_ms = (time.time() - start_time) * 1000\n                packet_logger.log_raw(\n                    \"ACP-SEND-MESSAGE-COMPLETE\",\n                    {\n                        \"session_id\": session_id,\n                        \"events_yielded\": events_yielded,\n                        \"elapsed_ms\": elapsed_ms,\n                    },\n                )\n                break\n\n            # Handle notifications (session/update)\n            if message_data.get(\"method\") == \"session/update\":\n                params_data = message_data.get(\"params\", {})\n                update = params_data.get(\"update\", {})\n\n                # Log the notification\n                packet_logger.log_jsonrpc_notification(\n                    \"session/update\",\n                    {\"update_type\": update.get(\"sessionUpdate\")},\n                    context=\"local\",\n                )\n\n                for event in self._process_session_update(update):\n                    events_yielded += 1\n                    # Log each yielded event\n                    event_type = self._get_event_type_name(event)\n                    packet_logger.log_acp_event_yielded(event_type, event)\n                    yield event\n\n            # Handle requests from agent (e.g., fs/readTextFile)\n            elif \"method\" in message_data and \"id\" in message_data:\n                packet_logger.log_raw(\n                    \"ACP-UNSUPPORTED-REQUEST\",\n                    {\"method\": message_data[\"method\"], \"id\": message_data[\"id\"]},\n                )\n                self._send_error_response(\n                    message_data[\"id\"],\n                    -32601,\n                    f\"Method not supported: {message_data['method']}\",\n                )\n\n    def _get_event_type_name(self, event: ACPEvent) -> str:\n        \"\"\"Get the type name for an ACP event.\"\"\"\n        if isinstance(event, AgentMessageChunk):\n            return \"agent_message_chunk\"\n        elif isinstance(event, AgentThoughtChunk):\n            return \"agent_thought_chunk\"\n        elif isinstance(event, ToolCallStart):\n            return \"tool_call_start\"\n        elif isinstance(event, ToolCallProgress):\n            return \"tool_call_progress\"\n        elif isinstance(event, AgentPlanUpdate):\n            return \"agent_plan_update\"\n        elif isinstance(event, CurrentModeUpdate):\n            return \"current_mode_update\"\n        elif isinstance(event, PromptResponse):\n            return \"prompt_response\"\n        elif isinstance(event, Error):\n            return \"error\"\n        return \"unknown\"\n\n    def _process_session_update(\n        self, update: dict[str, Any]\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Process a session/update notification and yield typed ACP schema objects.\n\n        Validates and returns the actual ACP schema types directly.\n        Invalid updates are logged and skipped.\n        \"\"\"\n        update_type = update.get(\"sessionUpdate\")\n        packet_logger = get_packet_logger()\n\n        if update_type == \"agent_message_chunk\":\n            try:\n                yield AgentMessageChunk.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"agent_thought_chunk\":\n            try:\n                yield AgentThoughtChunk.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"user_message_chunk\":\n            # Echo of user message - skip but log\n            packet_logger.log_raw(\"ACP-SKIPPED-UPDATE\", {\"type\": \"user_message_chunk\"})\n\n        elif update_type == \"tool_call\":\n            try:\n                yield ToolCallStart.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"tool_call_update\":\n            try:\n                yield ToolCallProgress.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"plan\":\n            try:\n                yield AgentPlanUpdate.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"available_commands_update\":\n            # Skip command updates - not relevant for consumers\n            packet_logger.log_raw(\n                \"ACP-SKIPPED-UPDATE\", {\"type\": \"available_commands_update\"}\n            )\n\n        elif update_type == \"current_mode_update\":\n            try:\n                yield CurrentModeUpdate.model_validate(update)\n            except ValidationError as e:\n                packet_logger.log_raw(\n                    \"ACP-VALIDATION-ERROR\",\n                    {\"update_type\": update_type, \"error\": str(e), \"update\": update},\n                )\n\n        elif update_type == \"session_info_update\":\n            # Skip session info updates - internal bookkeeping\n            packet_logger.log_raw(\"ACP-SKIPPED-UPDATE\", {\"type\": \"session_info_update\"})\n\n        else:\n            # Unknown update types are logged\n            packet_logger.log_raw(\n                \"ACP-UNKNOWN-UPDATE-TYPE\",\n                {\"update_type\": update_type, \"update\": update},\n            )\n\n    def _send_error_response(\n        self,\n        request_id: int,\n        code: int,\n        message: str,\n    ) -> None:\n        \"\"\"Send an error response to an agent request.\"\"\"\n        process = self._process\n        if process is None or process.stdin is None:\n            return\n\n        response = {\n            \"jsonrpc\": \"2.0\",\n            \"id\": request_id,\n            \"error\": {\n                \"code\": code,\n                \"message\": message,\n            },\n        }\n\n        try:\n            process.stdin.write(json.dumps(response) + \"\\n\")\n            process.stdin.flush()\n        except BrokenPipeError:\n            pass\n\n    def cancel(self) -> None:\n        \"\"\"Cancel the current operation.\"\"\"\n        if self._state.current_session is None:\n            return\n\n        self._send_notification(\n            \"session/cancel\",\n            {\"sessionId\": self._state.current_session.session_id},\n        )\n\n    @property\n    def is_running(self) -> bool:\n        \"\"\"Check if the agent process is running.\"\"\"\n        return self._process is not None and self._process.poll() is None\n\n    @property\n    def session_id(self) -> str | None:\n        \"\"\"Get the current session ID, if any.\"\"\"\n        if self._state.current_session:\n            return self._state.current_session.session_id\n        return None\n\n    @property\n    def agent_info(self) -> dict[str, Any]:\n        \"\"\"Get the agent's info from initialization.\"\"\"\n        return self._state.agent_info\n\n    @property\n    def agent_capabilities(self) -> dict[str, Any]:\n        \"\"\"Get the agent's capabilities from initialization.\"\"\"\n        return self._state.agent_capabilities\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/local_sandbox_manager.py",
    "content": "\"\"\"Filesystem-based sandbox manager for local/dev environments.\n\nLocalSandboxManager manages sandboxes as directories on the local filesystem.\nSuitable for development, testing, and single-node deployments.\n\nIMPORTANT: This manager does NOT interface with the database directly.\nAll database operations should be handled by the caller (SessionManager, Celery tasks, etc.).\n\"\"\"\n\nimport mimetypes\nimport re\nimport subprocess\nimport threading\nfrom collections.abc import Generator\nfrom pathlib import Path\nfrom uuid import UUID\n\nimport httpx\n\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.server.features.build.configs import DEMO_DATA_PATH\nfrom onyx.server.features.build.configs import OPENCODE_DISABLED_TOOLS\nfrom onyx.server.features.build.configs import OUTPUTS_TEMPLATE_PATH\nfrom onyx.server.features.build.configs import SANDBOX_BASE_PATH\nfrom onyx.server.features.build.configs import VENV_TEMPLATE_PATH\nfrom onyx.server.features.build.sandbox.base import SandboxManager\nfrom onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient\nfrom onyx.server.features.build.sandbox.local.agent_client import ACPEvent\nfrom onyx.server.features.build.sandbox.local.process_manager import ProcessManager\nfrom onyx.server.features.build.sandbox.manager.directory_manager import (\n    DirectoryManager,\n)\nfrom onyx.server.features.build.sandbox.manager.snapshot_manager import SnapshotManager\nfrom onyx.server.features.build.sandbox.models import FilesystemEntry\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.server.features.build.sandbox.models import SandboxInfo\nfrom onyx.server.features.build.sandbox.models import SnapshotResult\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import ThreadSafeSet\n\nlogger = setup_logger()\n\n\nclass LocalSandboxManager(SandboxManager):\n    \"\"\"Filesystem-based sandbox manager for local/dev environments.\n\n    Manages sandboxes as directories on the local filesystem.\n    Suitable for development, testing, and single-node deployments.\n\n    Key characteristics:\n    - Sandboxes are directories under SANDBOX_BASE_PATH\n    - No container isolation (process-level only)\n    - No automatic cleanup of idle sandboxes\n\n    IMPORTANT: This manager does NOT interface with the database directly.\n    All database operations should be handled by the caller.\n\n    This is a singleton class - use get_sandbox_manager() to get the instance.\n    \"\"\"\n\n    _instance: \"LocalSandboxManager | None\" = None\n    _lock = threading.Lock()\n\n    def __new__(cls) -> \"LocalSandboxManager\":\n        if cls._instance is None:\n            with cls._lock:\n                if cls._instance is None:\n                    cls._instance = super().__new__(cls)\n                    cls._instance._initialize()\n        return cls._instance\n\n    def _initialize(self) -> None:\n        \"\"\"Initialize managers.\"\"\"\n        # Paths for templates\n        build_dir = Path(__file__).parent.parent.parent  # /onyx/server/features/build/\n        skills_path = build_dir / \"sandbox\" / \"kubernetes\" / \"docker\" / \"skills\"\n        agent_instructions_template_path = build_dir / \"AGENTS.template.md\"\n\n        self._directory_manager = DirectoryManager(\n            base_path=Path(SANDBOX_BASE_PATH),\n            outputs_template_path=Path(OUTPUTS_TEMPLATE_PATH),\n            venv_template_path=Path(VENV_TEMPLATE_PATH),\n            skills_path=skills_path,\n            agent_instructions_template_path=agent_instructions_template_path,\n        )\n        self._process_manager = ProcessManager()\n        self._snapshot_manager = SnapshotManager(get_default_file_store())\n\n        # Track ACP clients in memory - keyed by (sandbox_id, session_id) tuple\n        # Each session within a sandbox has its own ACP client\n        self._acp_clients: dict[tuple[UUID, UUID], ACPAgentClient] = {}\n\n        # Track Next.js processes - keyed by (sandbox_id, session_id) tuple\n        # Used for clean shutdown when sessions are deleted.\n        # Mutated from background threads; all access must hold _nextjs_lock.\n        self._nextjs_processes: dict[tuple[UUID, UUID], subprocess.Popen[bytes]] = {}\n\n        # Track sessions currently being (re)started - prevents concurrent restarts.\n        # ThreadSafeSet allows atomic check-and-add without holding _nextjs_lock.\n        self._nextjs_starting: ThreadSafeSet[tuple[UUID, UUID]] = ThreadSafeSet()\n\n        # Lock guarding _nextjs_processes (shared across sessions; hold briefly only)\n        self._nextjs_lock = threading.Lock()\n\n        # Validate templates exist (raises RuntimeError if missing)\n        self._validate_templates()\n\n    def _validate_templates(self) -> None:\n        \"\"\"Validate that sandbox templates exist.\n\n        Raises RuntimeError if templates are missing.\n        Templates are required for sandbox functionality.\n\n        Raises:\n            RuntimeError: If outputs or venv templates are missing\n        \"\"\"\n        outputs_path = Path(OUTPUTS_TEMPLATE_PATH)\n        venv_path = Path(VENV_TEMPLATE_PATH)\n\n        missing_templates: list[str] = []\n\n        if not outputs_path.exists():\n            missing_templates.append(f\"Outputs template not found at {outputs_path}\")\n\n        if not venv_path.exists():\n            missing_templates.append(f\"Venv template not found at {venv_path}\")\n\n        if missing_templates:\n            error_msg = (\n                \"Sandbox templates are missing. \"\n                \"Please build templates using:\\n\"\n                \"  python -m onyx.server.features.build.sandbox.util.build_venv_template\\n\"\n                \"Or use Docker image built with Dockerfile.sandbox-templates.\\n\\n\"\n                \"Missing templates:\\n\"\n            )\n            error_msg += \"\\n\".join(f\"  - {template}\" for template in missing_templates)\n            raise RuntimeError(error_msg)\n\n        logger.debug(f\"Outputs template found at {outputs_path}\")\n        logger.debug(f\"Venv template found at {venv_path}\")\n\n    def _get_sandbox_path(self, sandbox_id: str | UUID) -> Path:\n        \"\"\"Get the filesystem path for a sandbox based on sandbox_id.\n\n        Args:\n            sandbox_id: The sandbox ID (can be string or UUID)\n\n        Returns:\n            Path to the sandbox directory\n        \"\"\"\n        return Path(SANDBOX_BASE_PATH) / str(sandbox_id)\n\n    def _get_session_path(self, sandbox_id: str | UUID, session_id: str | UUID) -> Path:\n        \"\"\"Get the filesystem path for a session workspace.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n\n        Returns:\n            Path to the session workspace directory (sessions/$session_id/)\n        \"\"\"\n        return self._get_sandbox_path(sandbox_id) / \"sessions\" / str(session_id)\n\n    def _setup_filtered_files(\n        self,\n        session_path: Path,\n        source_path: Path,\n        excluded_paths: list[str],\n    ) -> None:\n        \"\"\"Set up files directory with filtered symlinks based on exclusions.\n\n        Instead of symlinking the entire source directory, this creates a files/\n        directory structure where:\n        - Top-level items (except user_library) are symlinked directly\n        - user_library/ is created as a real directory with filtered symlinks\n\n        Args:\n            session_path: Path to the session directory\n            source_path: Path to the user's knowledge files (e.g., /storage/tenant/knowledge/user/)\n            excluded_paths: List of paths within user_library to exclude\n                (e.g., [\"/data/file.xlsx\", \"/reports/old.pdf\"])\n        \"\"\"\n        files_dir = session_path / \"files\"\n        files_dir.mkdir(parents=True, exist_ok=True)\n\n        # Normalize excluded paths for comparison (remove leading slash)\n        excluded_set = {p.lstrip(\"/\") for p in excluded_paths}\n\n        if not source_path.exists():\n            logger.warning(f\"Source path does not exist: {source_path}\")\n            return\n\n        # Iterate through top-level items in source\n        for item in source_path.iterdir():\n            target_link = files_dir / item.name\n\n            if item.name == \"user_library\":\n                # user_library needs filtered handling\n                self._setup_filtered_user_library(\n                    target_dir=target_link,\n                    source_dir=item,\n                    excluded_set=excluded_set,\n                    base_path=\"\",\n                )\n            else:\n                # Other directories/files: symlink directly\n                if not target_link.exists():\n                    target_link.symlink_to(item, target_is_directory=item.is_dir())\n\n    def _setup_filtered_user_library(\n        self,\n        target_dir: Path,\n        source_dir: Path,\n        excluded_set: set[str],\n        base_path: str,\n    ) -> bool:\n        \"\"\"Recursively set up user_library with filtered symlinks.\n\n        Creates directory structure and symlinks only non-excluded files.\n        Only creates directories if they will contain at least one enabled file.\n\n        Args:\n            target_dir: Where to create the filtered structure\n            source_dir: Source user_library directory\n            excluded_set: Set of excluded relative paths (e.g., {\"data/file.xlsx\"})\n            base_path: Current path relative to user_library root (for recursion)\n\n        Returns:\n            True if any content was created (files or non-empty subdirectories)\n        \"\"\"\n        if not source_dir.exists():\n            return False\n\n        has_content = False\n\n        for item in source_dir.iterdir():\n            # Build relative path for exclusion check\n            rel_path = (\n                f\"{base_path}/{item.name}\".lstrip(\"/\") if base_path else item.name\n            )\n            target_link = target_dir / item.name\n\n            if item.is_dir():\n                # Check if entire directory is excluded\n                if rel_path in excluded_set:\n                    logger.debug(f\"Excluding directory: user_library/{rel_path}\")\n                    continue\n\n                # Recurse into directory - only create if it has content\n                subdir_has_content = self._setup_filtered_user_library(\n                    target_dir=target_link,\n                    source_dir=item,\n                    excluded_set=excluded_set,\n                    base_path=rel_path,\n                )\n                if subdir_has_content:\n                    has_content = True\n            else:\n                # Check if file is excluded\n                if rel_path in excluded_set:\n                    logger.debug(f\"Excluding file: user_library/{rel_path}\")\n                    continue\n\n                # Create parent directory if needed (lazy creation)\n                if not target_dir.exists():\n                    target_dir.mkdir(parents=True, exist_ok=True)\n\n                # Create symlink to file\n                if not target_link.exists():\n                    target_link.symlink_to(item)\n                has_content = True\n\n        return has_content\n\n    def provision(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,\n        tenant_id: str,\n        llm_config: LLMProviderConfig,  # noqa: ARG002\n    ) -> SandboxInfo:\n        \"\"\"Provision a new sandbox for a user.\n\n        Creates user-level sandbox structure:\n        1. Create sandbox directory with sessions/ subdirectory\n\n        NOTE: This does NOT set up session-specific workspaces or start Next.js.\n        Call setup_session_workspace() to create session workspaces.\n        Next.js server is started per-session in setup_session_workspace().\n\n        Args:\n            sandbox_id: Unique identifier for the sandbox\n            user_id: User identifier who owns this sandbox\n            tenant_id: Tenant identifier for multi-tenant isolation\n            llm_config: LLM provider configuration (stored for default config)\n\n        Returns:\n            SandboxInfo with the provisioned sandbox details\n\n        Raises:\n            RuntimeError: If provisioning fails\n        \"\"\"\n        logger.info(\n            f\"Starting sandbox provisioning for sandbox {sandbox_id}, user {user_id}, tenant {tenant_id}\"\n        )\n\n        # Create sandbox directory structure (user-level only)\n        logger.info(f\"Creating sandbox directory structure for sandbox {sandbox_id}\")\n        sandbox_path = self._directory_manager.create_sandbox_directory(str(sandbox_id))\n        logger.debug(f\"Sandbox directory created at {sandbox_path}\")\n\n        logger.info(\n            f\"Provisioned sandbox {sandbox_id} at {sandbox_path} (no sessions yet)\"\n        )\n\n        return SandboxInfo(\n            sandbox_id=sandbox_id,\n            directory_path=str(self._get_sandbox_path(sandbox_id)),\n            status=SandboxStatus.RUNNING,\n            last_heartbeat=None,\n        )\n\n    def terminate(self, sandbox_id: UUID) -> None:\n        \"\"\"Terminate a sandbox and clean up all resources.\n\n        1. Stop all Next.js processes for this sandbox\n        2. Stop all ACP clients for this sandbox (terminates agent subprocesses)\n        3. Cleanup sandbox directory\n\n        Args:\n            sandbox_id: The sandbox ID to terminate\n\n        Raises:\n            RuntimeError: If termination fails\n        \"\"\"\n        # Stop all Next.js processes for this sandbox (keyed by (sandbox_id, session_id))\n        with self._nextjs_lock:\n            processes_to_stop = [\n                (key, process)\n                for key, process in self._nextjs_processes.items()\n                if key[0] == sandbox_id\n            ]\n        for key, process in processes_to_stop:\n            session_id = key[1]\n            try:\n                self._stop_nextjs_process(process, session_id)\n                with self._nextjs_lock:\n                    self._nextjs_processes.pop(key, None)\n            except Exception as e:\n                logger.warning(\n                    f\"Failed to stop Next.js for sandbox {sandbox_id}, session {session_id}: {e}\"\n                )\n\n        # Stop all ACP clients for this sandbox (keyed by (sandbox_id, session_id))\n        clients_to_stop = [\n            (key, client)\n            for key, client in self._acp_clients.items()\n            if key[0] == sandbox_id\n        ]\n        for key, client in clients_to_stop:\n            try:\n                client.stop()\n                del self._acp_clients[key]\n            except Exception as e:\n                logger.warning(\n                    f\"Failed to stop ACP client for sandbox {sandbox_id}, session {key[1]}: {e}\"\n                )\n\n        # Cleanup directory\n        sandbox_path = self._get_sandbox_path(sandbox_id)\n        try:\n            self._directory_manager.cleanup_sandbox_directory(sandbox_path)\n        except Exception as e:\n            raise RuntimeError(\n                f\"Failed to cleanup sandbox directory {sandbox_path}: {e}\"\n            ) from e\n\n        logger.info(f\"Terminated sandbox {sandbox_id}\")\n\n    def setup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        llm_config: LLMProviderConfig,\n        nextjs_port: int,\n        file_system_path: str | None = None,\n        snapshot_path: str | None = None,  # noqa: ARG002\n        user_name: str | None = None,\n        user_role: str | None = None,\n        user_work_area: str | None = None,\n        user_level: str | None = None,\n        use_demo_data: bool = False,\n        excluded_user_library_paths: list[str] | None = None,\n    ) -> None:\n        \"\"\"Set up a session workspace within an existing sandbox.\n\n        Creates per-session directory structure with:\n        1. sessions/$session_id/ directory\n        2. outputs/ (from snapshot or template)\n        3. .venv/ (from template)\n        4. AGENTS.md\n        5. .agent/skills/\n        6. files/ (symlink to demo data OR filtered user files)\n        7. opencode.json\n        8. org_info/ (if demo_data is enabled, the org structure and user identity for the user's demo persona)\n        9. attachments/\n        10. Start Next.js dev server for this session\n\n        Args:\n            sandbox_id: The sandbox ID (must be provisioned)\n            session_id: The session ID for this workspace\n            llm_config: LLM provider configuration for opencode.json\n            file_system_path: Path to user's knowledge/source files\n            snapshot_path: Optional storage path to restore outputs from\n            user_name: User's name for personalization in AGENTS.md\n            user_role: User's role/title for personalization in AGENTS.md\n            user_work_area: User's work area for demo persona (e.g., \"engineering\")\n            user_level: User's level for demo persona (e.g., \"ic\", \"manager\")\n            use_demo_data: If True, symlink files/ to demo data; else to user files\n            excluded_user_library_paths: List of paths within user_library/ to exclude\n                (e.g., [\"/data/file.xlsx\"]). These files won't be linked in the sandbox.\n\n        Raises:\n            RuntimeError: If workspace setup fails\n        \"\"\"\n        sandbox_path = self._get_sandbox_path(sandbox_id)\n\n        if not self._directory_manager.directory_exists(sandbox_path):\n            raise RuntimeError(\n                f\"Sandbox {sandbox_id} not provisioned - provision() first\"\n            )\n\n        logger.info(\n            f\"Setting up session workspace for session {session_id} in sandbox {sandbox_id}\"\n        )\n\n        # Create session directory\n        session_path = self._directory_manager.create_session_directory(\n            sandbox_path, str(session_id)\n        )\n        logger.debug(f\"Session directory created at {session_path}\")\n\n        try:\n            # Setup files access - choose between demo data or user files\n            if use_demo_data:\n                # Demo mode: symlink to demo data directory\n                symlink_target = Path(DEMO_DATA_PATH)\n                if not symlink_target.exists():\n                    logger.warning(\n                        f\"Demo data directory does not exist: {symlink_target}\"\n                    )\n                logger.info(f\"Setting up files symlink to demo data: {symlink_target}\")\n                self._directory_manager.setup_files_symlink(\n                    session_path, symlink_target\n                )\n            elif file_system_path:\n                source_path = Path(file_system_path)\n                # Check if we have exclusions for user_library\n                if excluded_user_library_paths:\n                    # Create filtered file structure with symlinks to enabled files only\n                    logger.debug(\n                        f\"Setting up filtered files with {len(excluded_user_library_paths)} exclusions\"\n                    )\n                    self._setup_filtered_files(\n                        session_path=session_path,\n                        source_path=source_path,\n                        excluded_paths=excluded_user_library_paths,\n                    )\n                else:\n                    # No exclusions: simple symlink to entire directory\n                    logger.debug(\n                        f\"Setting up files symlink to user files: {source_path}\"\n                    )\n                    self._directory_manager.setup_files_symlink(\n                        session_path, source_path\n                    )\n            else:\n                raise ValueError(\"No files symlink target provided\")\n            logger.debug(\"Files ready\")\n\n            # Setup org_info directory with user identity (at session root)\n            if user_work_area:\n                logger.debug(f\"Setting up org_info for {user_work_area}/{user_level}\")\n                self._directory_manager.setup_org_info(\n                    session_path, user_work_area, user_level\n                )\n\n            logger.debug(\"Setting up outputs directory from template\")\n            self._directory_manager.setup_outputs_directory(session_path)\n            logger.debug(\"Outputs directory ready\")\n\n            logger.debug(\"Setting up skills\")\n            self._directory_manager.setup_skills(session_path)\n            logger.debug(\"Skills ready\")\n\n            # Setup attachments directory\n            logger.debug(\"Setting up attachments directory\")\n            self._directory_manager.setup_attachments_directory(session_path)\n            logger.debug(\"Attachments directory ready\")\n\n            # Setup opencode.json with LLM provider configuration\n            logger.debug(\n                f\"Setting up opencode config with provider: {llm_config.provider}, model: {llm_config.model_name}\"\n            )\n            self._directory_manager.setup_opencode_config(\n                sandbox_path=session_path,\n                provider=llm_config.provider,\n                model_name=llm_config.model_name,\n                api_key=llm_config.api_key,\n                api_base=llm_config.api_base,\n                disabled_tools=OPENCODE_DISABLED_TOOLS,\n            )\n            logger.debug(\"Opencode config ready\")\n\n            # Start Next.js server on pre-allocated port\n            web_dir = self._directory_manager.get_web_path(\n                sandbox_path, str(session_id)\n            )\n            logger.info(f\"Starting Next.js server at {web_dir} on port {nextjs_port}\")\n\n            nextjs_process = self._process_manager.start_nextjs_server(\n                web_dir, nextjs_port\n            )\n            # Store process for clean shutdown on session delete\n            with self._nextjs_lock:\n                self._nextjs_processes[(sandbox_id, session_id)] = nextjs_process\n            logger.info(\"Next.js server started successfully\")\n\n            # Setup venv and AGENTS.md\n            logger.debug(\"Setting up virtual environment\")\n            self._directory_manager.setup_venv(session_path)\n            logger.debug(\"Virtual environment ready\")\n\n            logger.debug(\"Setting up agent instructions (AGENTS.md)\")\n            self._directory_manager.setup_agent_instructions(\n                sandbox_path=session_path,\n                provider=llm_config.provider,\n                model_name=llm_config.model_name,\n                nextjs_port=nextjs_port,\n                disabled_tools=OPENCODE_DISABLED_TOOLS,\n                user_name=user_name,\n                user_role=user_role,\n                use_demo_data=use_demo_data,\n                include_org_info=use_demo_data,\n            )\n            logger.debug(\"Agent instructions ready\")\n\n            logger.info(f\"Set up session workspace {session_id} at {session_path}\")\n\n        except Exception as e:\n            # Cleanup on failure\n            logger.error(\n                f\"Session workspace setup failed for session {session_id}: {e}\",\n                exc_info=True,\n            )\n            logger.info(f\"Cleaning up session directory at {session_path}\")\n            self._directory_manager.cleanup_session_directory(\n                sandbox_path, str(session_id)\n            )\n            raise RuntimeError(\n                f\"Failed to set up session workspace {session_id}: {e}\"\n            ) from e\n\n    def cleanup_session_workspace(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        nextjs_port: int | None = None,\n    ) -> None:\n        \"\"\"Clean up a session workspace (on session delete).\n\n        1. Stop Next.js dev server if running\n        2. Stop ACP client for this session\n        3. Remove session directory\n\n        Does NOT terminate the sandbox - other sessions may still be using it.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to clean up\n            nextjs_port: Optional port where Next.js server is running (fallback only)\n        \"\"\"\n        # Stop Next.js dev server - try stored process first, then fallback to port lookup\n        process_key = (sandbox_id, session_id)\n        with self._nextjs_lock:\n            nextjs_process = self._nextjs_processes.pop(process_key, None)\n        if nextjs_process is not None:\n            self._stop_nextjs_process(nextjs_process, session_id)\n        elif nextjs_port is not None:\n            # Fallback: find by port (e.g., if server was restarted)\n            self._stop_nextjs_server_on_port(nextjs_port, session_id)\n\n        # Stop ACP client for this session\n        client_key = (sandbox_id, session_id)\n        client = self._acp_clients.pop(client_key, None)\n        if client:\n            try:\n                client.stop()\n                logger.debug(f\"Stopped ACP client for session {session_id}\")\n            except Exception as e:\n                logger.warning(\n                    f\"Failed to stop ACP client for session {session_id}: {e}\"\n                )\n\n        # Cleanup session directory\n        sandbox_path = self._get_sandbox_path(sandbox_id)\n        self._directory_manager.cleanup_session_directory(sandbox_path, str(session_id))\n        logger.info(f\"Cleaned up session workspace {session_id}\")\n\n    def _stop_nextjs_process(\n        self, process: subprocess.Popen[bytes], session_id: UUID\n    ) -> None:\n        \"\"\"Stop a Next.js dev server process gracefully.\n\n        Args:\n            process: The subprocess.Popen object for the Next.js server\n            session_id: The session ID (for logging)\n        \"\"\"\n        if process.poll() is not None:\n            # Process already terminated\n            logger.debug(\n                f\"Next.js server for session {session_id} already terminated (exit code: {process.returncode})\"\n            )\n            return\n\n        try:\n            logger.info(\n                f\"Stopping Next.js server (PID {process.pid}) for session {session_id}\"\n            )\n            self._process_manager.terminate_process(process.pid)\n            logger.debug(f\"Next.js server stopped for session {session_id}\")\n        except Exception as e:\n            logger.warning(\n                f\"Failed to stop Next.js server for session {session_id}: {e}\"\n            )\n\n    def _stop_nextjs_server_on_port(self, port: int, session_id: UUID) -> None:\n        \"\"\"Stop Next.js dev server running on a specific port (fallback method).\n\n        Finds the process listening on the port and terminates it gracefully.\n        Used when the process object is not available (e.g., after backend restart).\n\n        Args:\n            port: The port number where Next.js is running\n            session_id: The session ID (for logging)\n        \"\"\"\n        # Try lsof first - it's the most reliable cross-platform way\n        # Timeout to prevent hanging if system is slow or unresponsive\n        LSOF_TIMEOUT_SECONDS = 5.0\n        try:\n            result = subprocess.run(\n                [\"lsof\", \"-ti\", f\":{port}\"],\n                capture_output=True,\n                text=True,\n                timeout=LSOF_TIMEOUT_SECONDS,\n            )\n            if result.returncode == 0 and result.stdout.strip():\n                # lsof can return multiple PIDs - stop all processes on this port\n                pids = [\n                    int(pid.strip())\n                    for pid in result.stdout.strip().split(\"\\n\")\n                    if pid.strip()\n                ]\n                if pids:\n                    logger.info(\n                        f\"Found {len(pids)} process(es) on port {port} for session {session_id}, stopping all\"\n                    )\n                    for pid in pids:\n                        try:\n                            logger.debug(\n                                f\"Stopping Next.js server (PID {pid}) on port {port} for session {session_id}\"\n                            )\n                            self._process_manager.terminate_process(pid)\n                        except Exception as e:\n                            logger.warning(\n                                f\"Failed to stop process {pid} on port {port}: {e}\"\n                            )\n                    return\n            else:\n                logger.debug(\n                    f\"No process found on port {port} for session {session_id}\"\n                )\n        except subprocess.TimeoutExpired:\n            logger.warning(\n                f\"lsof timed out after {LSOF_TIMEOUT_SECONDS}s while looking for process on port {port} for session {session_id}\"\n            )\n        except FileNotFoundError:\n            # lsof not available, try psutil\n            try:\n                import psutil\n\n                # Use net_connections to find process by port\n                # Collect all PIDs on this port (handle multiple processes)\n                pids_to_stop = set()\n                for conn in psutil.net_connections(kind=\"inet\"):\n                    # laddr can be empty tuple for some connection states\n                    # Check if it's a tuple with at least 2 elements (host, port)\n                    if (\n                        conn.laddr\n                        and isinstance(conn.laddr, tuple)\n                        and len(conn.laddr) >= 2\n                        and conn.pid\n                    ):\n                        if conn.laddr[1] == port:\n                            pids_to_stop.add(conn.pid)\n\n                if pids_to_stop:\n                    logger.info(\n                        f\"Found {len(pids_to_stop)} process(es) on port {port} for session {session_id}, stopping all\"\n                    )\n                    for pid in pids_to_stop:\n                        try:\n                            logger.debug(\n                                f\"Stopping Next.js server (PID {pid}) on port {port} for session {session_id}\"\n                            )\n                            self._process_manager.terminate_process(pid)\n                        except Exception as e:\n                            logger.warning(\n                                f\"Failed to stop process {pid} on port {port}: {e}\"\n                            )\n                    return\n\n                logger.debug(\n                    f\"No process found on port {port} for session {session_id}\"\n                )\n            except ImportError:\n                logger.warning(\n                    f\"Neither lsof nor psutil available to find process on port {port}\"\n                )\n            except Exception as e:\n                logger.warning(f\"Failed to find process on port {port}: {e}\")\n        except Exception as e:\n            logger.warning(\n                f\"Failed to stop Next.js server on port {port} for session {session_id}: {e}\"\n            )\n\n    def create_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        tenant_id: str,\n    ) -> SnapshotResult | None:\n        \"\"\"Not implemented for local backend - workspaces persist on disk.\n\n        Local sandboxes don't use snapshots since the filesystem persists.\n        This should never be called for local backend.\n        \"\"\"\n        raise NotImplementedError(\n            \"create_snapshot is not supported for local backend. Local sandboxes persist on disk and don't use snapshots.\"\n        )\n\n    def session_workspace_exists(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> bool:\n        \"\"\"Check if a session's workspace directory exists.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID to check\n\n        Returns:\n            True if the session workspace exists, False otherwise\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        outputs_path = session_path / \"outputs\"\n        return outputs_path.exists()\n\n    def ensure_nextjs_running(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        nextjs_port: int,\n    ) -> None:\n        \"\"\"Start Next.js server for a session if not already running.\n\n        Called when the server is detected as unreachable (e.g., after API server restart).\n        Returns immediately — the actual startup runs in a background daemon thread.\n        A per-session guard prevents concurrent restarts from racing.\n\n        Lock design: _nextjs_lock is shared across ALL sessions. Holding it during\n        httpx (1s) or start_nextjs_server (several seconds) would block every other\n        session's status checks and restarts. We only hold the lock for fast\n        in-memory ops (dict get, check_and_add). The slow I/O runs in the background\n        thread without holding any lock.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            nextjs_port: The port number for the Next.js server\n        \"\"\"\n        process_key = (sandbox_id, session_id)\n\n        with self._nextjs_lock:\n            existing = self._nextjs_processes.get(process_key)\n            if existing is not None and existing.poll() is None:\n                return\n\n        # Atomic check-and-add: returns True if already in set (another thread is starting)\n        if self._nextjs_starting.check_and_add(process_key):\n            return\n\n        def _start_in_background() -> None:\n            try:\n                # Port check in background to avoid blocking the main thread\n                try:\n                    with httpx.Client(timeout=1.0) as client:\n                        client.get(f\"http://localhost:{nextjs_port}\")\n                    logger.info(\n                        f\"Port {nextjs_port} already alive for session {session_id} (orphan process) — skipping restart\"\n                    )\n                    return\n                except Exception:\n                    pass  # Port is dead; proceed with restart\n\n                logger.info(\n                    f\"Starting Next.js for session {session_id} on port {nextjs_port}\"\n                )\n                sandbox_path = self._get_sandbox_path(sandbox_id)\n                web_dir = self._directory_manager.get_web_path(\n                    sandbox_path, str(session_id)\n                )\n                if not web_dir.exists():\n                    logger.warning(\n                        f\"Web dir missing for session {session_id}: {web_dir} — cannot restart Next.js\"\n                    )\n                    return\n                process = self._process_manager.start_nextjs_server(\n                    web_dir, nextjs_port\n                )\n                with self._nextjs_lock:\n                    self._nextjs_processes[process_key] = process\n                logger.info(\n                    f\"Auto-restarted Next.js for session {session_id} on port {nextjs_port}\"\n                )\n            except Exception as e:\n                logger.error(\n                    f\"Failed to auto-restart Next.js for session {session_id}: {e}\"\n                )\n            finally:\n                self._nextjs_starting.discard(process_key)\n\n        threading.Thread(target=_start_in_background, daemon=True).start()\n\n    def restore_snapshot(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        snapshot_storage_path: str,\n        tenant_id: str,  # noqa: ARG002\n        nextjs_port: int,\n        llm_config: LLMProviderConfig,\n        use_demo_data: bool = False,\n    ) -> None:\n        \"\"\"Not implemented for local backend - workspaces persist on disk.\n\n        Local sandboxes don't use snapshots since the filesystem persists.\n        This should never be called for local backend.\n        \"\"\"\n        raise NotImplementedError(\n            \"restore_snapshot is not supported for local backend. Local sandboxes persist on disk and don't use snapshots.\"\n        )\n\n    def health_check(\n        self,\n        sandbox_id: UUID,\n        timeout: float = 60.0,  # noqa: ARG002\n    ) -> bool:\n        \"\"\"Check if the sandbox is healthy (folder exists).\n\n        Args:\n            sandbox_id: The sandbox ID to check\n            timeout: Health check timeout in seconds\n\n        Returns:\n            True if sandbox is healthy, False otherwise\n        \"\"\"\n        # assume healthy if no port is specified\n        sandbox_path = self._get_sandbox_path(sandbox_id)\n        if not sandbox_path.exists():\n            return False\n        return True\n\n    def send_message(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        message: str,\n    ) -> Generator[ACPEvent, None, None]:\n        \"\"\"Send a message to the CLI agent and stream typed ACP events.\n\n        The agent runs in the session-specific workspace:\n        sessions/$session_id/\n\n        Yields ACPEvent objects:\n        - AgentMessageChunk: Text/image content from agent\n        - AgentThoughtChunk: Agent's internal reasoning\n        - ToolCallStart: Tool invocation started\n        - ToolCallProgress: Tool execution progress/result\n        - AgentPlanUpdate: Agent's execution plan\n        - CurrentModeUpdate: Agent mode change\n        - PromptResponse: Agent finished (has stop_reason)\n        - Error: An error occurred\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID (determines workspace directory)\n            message: The message content to send\n\n        Yields:\n            Typed ACP schema event objects\n        \"\"\"\n        from onyx.server.features.build.api.packet_logger import get_packet_logger\n\n        packet_logger = get_packet_logger()\n\n        # Get or create ACP client for this session\n        client_key = (sandbox_id, session_id)\n        client = self._acp_clients.get(client_key)\n\n        if client is None or not client.is_running:\n            session_path = self._get_session_path(sandbox_id, session_id)\n\n            # Log client creation\n            packet_logger.log_acp_client_start(\n                sandbox_id, session_id, str(session_path), context=\"local\"\n            )\n            logger.info(\n                f\"Creating new ACP client for sandbox {sandbox_id}, session {session_id}\"\n            )\n\n            # Create and start ACP client for this session\n            client = ACPAgentClient(cwd=str(session_path))\n            self._acp_clients[client_key] = client\n\n        # Log the send_message call at sandbox manager level\n        packet_logger.log_session_start(session_id, sandbox_id, message)\n\n        events_count = 0\n        try:\n            for event in client.send_message(message):\n                events_count += 1\n                yield event\n\n            # Log successful completion\n            packet_logger.log_session_end(\n                session_id, success=True, events_count=events_count\n            )\n        except Exception as e:\n            # Log failure\n            packet_logger.log_session_end(\n                session_id, success=False, error=str(e), events_count=events_count\n            )\n            raise\n\n    def _sanitize_path(self, path: str) -> str:\n        \"\"\"Sanitize a user-provided path to prevent path traversal attacks.\n\n        Removes '..' components and normalizes the path to prevent attacks like\n        'files/../../../../etc/passwd'.\n\n        Args:\n            path: User-provided relative path\n\n        Returns:\n            Sanitized path string with '..' components removed\n        \"\"\"\n        # Parse the path and filter out '..' components\n        path_obj = Path(path.lstrip(\"/\"))\n        clean_parts = [p for p in path_obj.parts if p != \"..\"]\n        return str(Path(*clean_parts)) if clean_parts else \".\"\n\n    def _is_path_allowed(self, session_path: Path, target_path: Path) -> bool:\n        \"\"\"Check if target_path is allowed for access.\n\n        Allows paths within session_path OR within the files/ symlink.\n        The files/ symlink intentionally points outside session_path to\n        provide access to knowledge files.\n\n        Args:\n            session_path: The session's root directory\n            target_path: The path being accessed\n\n        Returns:\n            True if access is allowed, False otherwise\n        \"\"\"\n        files_symlink = session_path / \"files\"\n\n        # Check if path is within the files/ symlink (or is the symlink itself)\n        if files_symlink.is_symlink():\n            try:\n                # Use lexical check (without resolving symlinks)\n                # This handles both the symlink itself (returns '.') and paths within it\n                target_path.relative_to(files_symlink)\n                return True\n            except ValueError:\n                pass\n\n        # Standard check: path must be within session directory\n        try:\n            target_path.resolve().relative_to(session_path.resolve())\n            return True\n        except ValueError:\n            return False\n\n    def list_directory(\n        self, sandbox_id: UUID, session_id: UUID, path: str\n    ) -> list[FilesystemEntry]:\n        \"\"\"List contents of a directory in the session's outputs directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/outputs/\n\n        Returns:\n            List of FilesystemEntry objects sorted by directory first, then name\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a directory\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        # Security: sanitize path to remove path traversal attempts\n        clean_path = self._sanitize_path(path)\n        target_path = session_path / clean_path\n\n        # Security check\n        if not self._is_path_allowed(session_path, target_path):\n            raise ValueError(\"Path traversal not allowed\")\n\n        if not target_path.is_dir():\n            raise ValueError(f\"Not a directory: {path}\")\n\n        entries = []\n        for item in target_path.iterdir():\n            stat = item.stat()\n            is_file = item.is_file()\n            mime_type = mimetypes.guess_type(str(item))[0] if is_file else None\n            entries.append(\n                FilesystemEntry(\n                    name=item.name,\n                    path=str(item.relative_to(session_path)),\n                    is_directory=item.is_dir(),\n                    size=stat.st_size if is_file else None,\n                    mime_type=mime_type,\n                )\n            )\n\n        return sorted(entries, key=lambda e: (not e.is_directory, e.name.lower()))\n\n    def read_file(self, sandbox_id: UUID, session_id: UUID, path: str) -> bytes:\n        \"\"\"Read a file from the session's outputs directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path within sessions/$session_id/outputs/\n\n        Returns:\n            File contents as bytes\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a file\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        # Security: sanitize path to remove path traversal attempts\n        clean_path = self._sanitize_path(path)\n        target_path = session_path / clean_path\n\n        # Security check\n        if not self._is_path_allowed(session_path, target_path):\n            raise ValueError(\"Path traversal not allowed\")\n\n        if not target_path.is_file():\n            raise ValueError(f\"Not a file: {path}\")\n\n        return target_path.read_bytes()\n\n    def upload_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        filename: str,\n        content: bytes,\n    ) -> str:\n        \"\"\"Upload a file to the session's attachments directory.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            filename: Sanitized filename\n            content: File content as bytes\n\n        Returns:\n            Relative path where file was saved (e.g., \"attachments/doc.pdf\")\n\n        Raises:\n            RuntimeError: If upload fails\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        attachments_dir = session_path / \"attachments\"\n        attachments_dir.mkdir(parents=True, exist_ok=True)\n\n        # Handle filename collisions by appending a number\n        target_path = attachments_dir / filename\n        if target_path.exists():\n            stem = target_path.stem\n            suffix = target_path.suffix\n            counter = 1\n            while target_path.exists():\n                target_path = attachments_dir / f\"{stem}_{counter}{suffix}\"\n                counter += 1\n            filename = target_path.name\n\n        target_path.write_bytes(content)\n        target_path.chmod(0o644)\n\n        logger.info(\n            f\"Uploaded file to session {session_id}: attachments/{filename} ({len(content)} bytes)\"\n        )\n\n        # Inject attachments section into AGENTS.md if not already present\n        self._ensure_agents_md_attachments_section(session_path)\n\n        return f\"attachments/{filename}\"\n\n    def _ensure_agents_md_attachments_section(self, session_path: Path) -> None:\n        \"\"\"Ensure AGENTS.md has the attachments section.\n\n        Called after uploading a file. Only adds the section if it doesn't exist.\n        Inserts the section above ## Skills for better document flow.\n        \"\"\"\n        from onyx.server.features.build.sandbox.util.agent_instructions import (\n            ATTACHMENTS_SECTION_CONTENT,\n        )\n\n        agents_md_path = session_path / \"AGENTS.md\"\n        if not agents_md_path.exists():\n            return\n\n        current_content = agents_md_path.read_text()\n        section_marker = \"## Attachments (PRIORITY)\"\n\n        if section_marker not in current_content:\n            # Insert before ## Skills if it exists, otherwise append\n            skills_marker = \"## Skills\"\n            if skills_marker in current_content:\n                updated_content = current_content.replace(\n                    skills_marker,\n                    ATTACHMENTS_SECTION_CONTENT + \"\\n\\n\" + skills_marker,\n                )\n            else:\n                updated_content = (\n                    current_content.rstrip() + \"\\n\\n\" + ATTACHMENTS_SECTION_CONTENT\n                )\n            agents_md_path.write_text(updated_content)\n            logger.debug(\"Added attachments section to AGENTS.md\")\n\n    def delete_file(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        path: str,\n    ) -> bool:\n        \"\"\"Delete a file from the session's workspace.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n            path: Relative path to the file (e.g., \"attachments/doc.pdf\")\n\n        Returns:\n            True if file was deleted, False if not found\n\n        Raises:\n            ValueError: If path traversal attempted or trying to delete a directory\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n\n        # Security: robust path sanitization (consistent with K8s implementation)\n        # Reject paths with traversal patterns, URL-encoded characters, or null bytes\n        if re.search(r\"\\.\\.\", path) or \"%\" in path or \"\\x00\" in path:\n            raise ValueError(\"Invalid path: potential path traversal detected\")\n\n        # Reject paths with shell metacharacters (consistency with K8s implementation)\n        if re.search(r'[;&|`$(){}[\\]<>\\'\"\\n\\r\\\\]', path):\n            raise ValueError(\"Invalid path: contains disallowed characters\")\n\n        clean_path = path.lstrip(\"/\")\n\n        # Verify path only contains safe characters\n        if not re.match(r\"^[a-zA-Z0-9_\\-./]+$\", clean_path):\n            raise ValueError(\"Invalid path: contains disallowed characters\")\n\n        file_path = session_path / clean_path\n\n        # Verify path stays within session (defense in depth)\n        try:\n            file_path.resolve().relative_to(session_path.resolve())\n        except ValueError:\n            raise ValueError(\"Path traversal not allowed\")\n\n        if not file_path.exists():\n            logger.debug(f\"File not found for deletion in session {session_id}: {path}\")\n            return False\n\n        if file_path.is_dir():\n            raise ValueError(\"Cannot delete directory\")\n\n        file_path.unlink()\n        logger.info(f\"Deleted file from session {session_id}: {path}\")\n\n        return True\n\n    def get_upload_stats(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n    ) -> tuple[int, int]:\n        \"\"\"Get current file count and total size for a session's attachments.\n\n        Args:\n            sandbox_id: The sandbox ID\n            session_id: The session ID\n\n        Returns:\n            Tuple of (file_count, total_size_bytes)\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        attachments_path = session_path / \"attachments\"\n\n        if not attachments_path.exists():\n            return 0, 0\n\n        file_count = 0\n        total_size = 0\n        for item in attachments_path.iterdir():\n            if item.is_file():\n                file_count += 1\n                total_size += item.stat().st_size\n\n        return file_count, total_size\n\n    def get_webapp_url(self, sandbox_id: UUID, port: int) -> str:  # noqa: ARG002\n        \"\"\"Get the webapp URL for a session's Next.js server.\n\n        For local backend, returns localhost URL with port.\n\n        Args:\n            sandbox_id: The sandbox ID (not used in local backend)\n            port: The session's allocated Next.js port\n\n        Returns:\n            URL to access the webapp (e.g., http://localhost:3015)\n        \"\"\"\n        return f\"http://localhost:{port}\"\n\n    def generate_pptx_preview(\n        self,\n        sandbox_id: UUID,\n        session_id: UUID,\n        pptx_path: str,\n        cache_dir: str,\n    ) -> tuple[list[str], bool]:\n        \"\"\"Convert PPTX to slide images using soffice + pdftoppm.\n\n        Uses local filesystem and subprocess for conversion.\n        \"\"\"\n        session_path = self._get_session_path(sandbox_id, session_id)\n        clean_pptx = self._sanitize_path(pptx_path)\n        clean_cache = self._sanitize_path(cache_dir)\n        pptx_abs = session_path / clean_pptx\n        cache_abs = session_path / clean_cache\n\n        if not pptx_abs.is_file():\n            raise ValueError(f\"File not found: {pptx_path}\")\n\n        # Check cache - if slides exist and are newer than the PPTX, use them\n        cached = False\n        if cache_abs.is_dir():\n            existing = sorted(cache_abs.glob(\"slide-*.jpg\"))\n            if existing:\n                pptx_mtime = pptx_abs.stat().st_mtime\n                cache_mtime = existing[0].stat().st_mtime\n                if cache_mtime >= pptx_mtime:\n                    cached = True\n                    return (\n                        [str(f.relative_to(session_path)) for f in existing],\n                        cached,\n                    )\n                # Stale cache - remove old slides\n                for f in existing:\n                    f.unlink()\n\n        cache_abs.mkdir(parents=True, exist_ok=True)\n\n        # Convert PPTX -> PDF using soffice\n        try:\n            import os\n\n            env = os.environ.copy()\n            env[\"SAL_USE_VCLPLUGIN\"] = \"svp\"\n            subprocess.run(\n                [\n                    \"soffice\",\n                    \"--headless\",\n                    \"--convert-to\",\n                    \"pdf\",\n                    \"--outdir\",\n                    str(cache_abs),\n                    str(pptx_abs),\n                ],\n                env=env,\n                check=True,\n                capture_output=True,\n                timeout=120,\n            )\n        except FileNotFoundError:\n            raise ValueError(\n                \"LibreOffice (soffice) is not installed. PPTX preview requires LibreOffice.\"\n            )\n        except subprocess.TimeoutExpired:\n            raise ValueError(\"PPTX conversion timed out\")\n        except subprocess.CalledProcessError as e:\n            raise ValueError(f\"PPTX conversion failed: {e.stderr.decode()}\")\n\n        # Find the generated PDF\n        pdf_files = list(cache_abs.glob(\"*.pdf\"))\n        if not pdf_files:\n            raise ValueError(\"soffice did not produce a PDF file\")\n        pdf_path = pdf_files[0]\n\n        # Convert PDF -> JPEG slides using pdftoppm\n        try:\n            subprocess.run(\n                [\n                    \"pdftoppm\",\n                    \"-jpeg\",\n                    \"-r\",\n                    \"150\",\n                    str(pdf_path),\n                    str(cache_abs / \"slide\"),\n                ],\n                check=True,\n                capture_output=True,\n                timeout=120,\n            )\n        except FileNotFoundError:\n            raise ValueError(\n                \"pdftoppm (poppler-utils) is not installed. PPTX preview requires poppler.\"\n            )\n        except subprocess.CalledProcessError as e:\n            raise ValueError(f\"PDF to image conversion failed: {e.stderr.decode()}\")\n\n        # Clean up PDF\n        pdf_path.unlink(missing_ok=True)\n\n        # Collect slide images\n        slides = sorted(cache_abs.glob(\"slide-*.jpg\"))\n        return (\n            [str(f.relative_to(session_path)) for f in slides],\n            False,\n        )\n\n    def sync_files(\n        self,\n        sandbox_id: UUID,\n        user_id: UUID,  # noqa: ARG002\n        tenant_id: str,  # noqa: ARG002\n        source: str | None = None,  # noqa: ARG002\n    ) -> bool:\n        \"\"\"No-op for local mode - files are directly accessible via symlink.\n\n        In local mode, the sandbox's files/ directory is a symlink to the\n        local persistent document storage, so no sync is needed. File visibility\n        in sessions is controlled via filtered symlinks in setup_session_workspace().\n\n        Args:\n            sandbox_id: The sandbox UUID (unused)\n            user_id: The user ID (unused)\n            tenant_id: The tenant ID (unused)\n            source: The source type (unused in local mode)\n\n        Returns:\n            True (always succeeds since no sync is needed)\n        \"\"\"\n        source_info = f\" source={source}\" if source else \"\"\n        logger.debug(\n            f\"sync_files called for local sandbox {sandbox_id}{source_info} - no-op\"\n        )\n        return True\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/process_manager.py",
    "content": "\"\"\"Process management for Next.js server subprocesses.\"\"\"\n\nimport os\nimport shutil\nimport signal\nimport subprocess\nimport time\nimport urllib.error\nimport urllib.request\nfrom pathlib import Path\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass ProcessManager:\n    \"\"\"Manages Next.js server subprocess lifecycle.\n\n    Responsible for:\n    - Starting Next.js dev servers\n    - Checking process status\n    - Gracefully terminating processes\n    \"\"\"\n\n    def start_nextjs_server(\n        self,\n        web_dir: Path,\n        port: int,\n        timeout: float = 180.0,\n    ) -> subprocess.Popen[bytes]:\n        \"\"\"Start Next.js dev server.\n\n        1. Clear .next cache to avoid stale paths from template\n        2. Start npm run dev on specified port\n        3. Wait for server to be ready\n\n        Args:\n            web_dir: Path to the Next.js web directory\n            port: Port number to run the server on\n            timeout: Maximum time to wait for server to start\n\n        Returns:\n            The subprocess.Popen object for the Next.js server\n\n        Raises:\n            RuntimeError: If server fails to start within timeout\n        \"\"\"\n        logger.info(f\"Starting Next.js server in {web_dir} on port {port}\")\n\n        # Clear Next.js cache to avoid stale paths from template\n        next_cache = web_dir / \".next\"\n        if next_cache.exists():\n            logger.debug(f\"Clearing Next.js cache at {next_cache}\")\n            shutil.rmtree(next_cache)\n\n        # Verify web_dir exists and has package.json\n        if not web_dir.exists():\n            logger.error(f\"Web directory does not exist: {web_dir}\")\n            raise RuntimeError(f\"Web directory does not exist: {web_dir}\")\n\n        package_json = web_dir / \"package.json\"\n        if not package_json.exists():\n            logger.error(f\"package.json not found in {web_dir}\")\n            raise RuntimeError(f\"package.json not found in {web_dir}\")\n\n        logger.debug(f\"Starting npm run dev command in {web_dir}\")\n        # CRITICAL: Inherit stdout/stderr (None) to prevent pipe buffer overflow.\n        # When PIPE is used but never drained, the buffer fills up (64KB on most systems)\n        # and the subprocess blocks indefinitely on write, causing the server to freeze.\n        # This was the root cause of Next.js servers dying after a few minutes.\n        # Using None inherits from parent, so logs appear in the backend terminal.\n        # FIXME: ideally we should drain the pipe to avoid the buffer overflow, but not for v1\n        process = subprocess.Popen(\n            [\"npm\", \"run\", \"dev\", \"--\", \"-p\", str(port)],\n            cwd=web_dir,\n            stdout=None,\n            stderr=None,\n        )\n        logger.info(f\"Next.js process started with PID {process.pid}\")\n\n        # Wait for server to be ready\n        server_url = f\"http://localhost:{port}\"\n        logger.info(f\"Waiting for Next.js server at {server_url} (timeout: {timeout}s)\")\n\n        if not self._wait_for_server(server_url, timeout=timeout, process=process):\n            # Check if process died\n            if process.poll() is not None:\n                logger.error(\n                    f\"Next.js server process died with code {process.returncode}. \"\n                    f\"Check the terminal or logs in {web_dir} for details.\"\n                )\n                raise RuntimeError(\n                    f\"Next.js server process died with code {process.returncode}. Check server logs for details.\"\n                )\n\n            # Process still running but server not responding\n            logger.error(\n                f\"Next.js server failed to respond within {timeout} seconds (process still running with PID {process.pid})\"\n            )\n\n            raise RuntimeError(\n                f\"Next.js server failed to start within {timeout} seconds\"\n            )\n\n        logger.info(f\"Next.js server is ready at {server_url}\")\n        return process\n\n    def _wait_for_server(\n        self,\n        url: str,\n        timeout: float = 30.0,\n        poll_interval: float = 0.5,\n        process: subprocess.Popen[bytes] | None = None,\n    ) -> bool:\n        \"\"\"Wait for a server to become available by polling.\n\n        Args:\n            url: URL to poll\n            timeout: Maximum time to wait in seconds\n            poll_interval: Time between poll attempts in seconds\n            process: Optional process to check if it's still running\n\n        Returns:\n            True if server became available, False if timeout reached\n        \"\"\"\n        start_time = time.time()\n        attempt_count = 0\n        last_log_time = start_time\n\n        while time.time() - start_time < timeout:\n            attempt_count += 1\n            elapsed = time.time() - start_time\n\n            # Check if process died early\n            if process is not None and process.poll() is not None:\n                logger.warning(\n                    f\"Process died during wait (exit code: {process.returncode}) \"\n                    f\"after {elapsed:.1f}s and {attempt_count} attempts\"\n                )\n                return False\n\n            try:\n                with urllib.request.urlopen(url, timeout=2) as response:\n                    if response.status == 200:\n                        logger.debug(\n                            f\"Server ready after {elapsed:.1f}s and {attempt_count} attempts\"\n                        )\n                        return True\n            except urllib.error.HTTPError as e:\n                # Log HTTP errors (server responding but with error)\n                if time.time() - last_log_time >= 10:\n                    logger.debug(\n                        f\"HTTP error {e.code} from {url} after {elapsed:.1f}s ({attempt_count} attempts)\"\n                    )\n                    last_log_time = time.time()\n            except (urllib.error.URLError, TimeoutError) as e:\n                # Log connection errors periodically (every 10 seconds)\n                if time.time() - last_log_time >= 10:\n                    logger.debug(\n                        f\"Still waiting for {url} after {elapsed:.1f}s ({attempt_count} attempts): {type(e).__name__}\"\n                    )\n                    last_log_time = time.time()\n\n            time.sleep(poll_interval)\n\n        logger.warning(\n            f\"Server at {url} did not become available within {timeout}s ({attempt_count} attempts)\"\n        )\n        return False\n\n    def is_process_running(self, pid: int) -> bool:\n        \"\"\"Check if process with given PID is still running.\n\n        Args:\n            pid: Process ID to check\n\n        Returns:\n            True if process is running, False otherwise\n        \"\"\"\n        try:\n            os.kill(pid, 0)  # Signal 0 just checks if process exists\n            return True\n        except ProcessLookupError:\n            return False\n        except PermissionError:\n            return True  # Process exists but we can't signal it\n\n    def terminate_process(self, pid: int, timeout: float = 5.0) -> bool:\n        \"\"\"Gracefully terminate process.\n\n        1. Send SIGTERM\n        2. Wait up to timeout seconds\n        3. If still running, send SIGKILL\n\n        Args:\n            pid: Process ID to terminate\n            timeout: Maximum time to wait for graceful shutdown\n\n        Returns:\n            True if process was terminated, False if it wasn't running\n        \"\"\"\n        if not self.is_process_running(pid):\n            return False\n\n        try:\n            os.kill(pid, signal.SIGTERM)\n        except ProcessLookupError:\n            return False\n\n        # Wait for graceful shutdown\n        deadline = time.time() + timeout\n        while time.time() < deadline:\n            if not self.is_process_running(pid):\n                return True\n            time.sleep(0.1)\n\n        # Force kill if still running\n        try:\n            os.kill(pid, signal.SIGKILL)\n        except ProcessLookupError:\n            pass\n\n        return True\n\n    def get_process_info(self, pid: int) -> dict[str, str | int | float] | None:\n        \"\"\"Get information about a running process.\n\n        Uses psutil if available, otherwise returns basic info.\n\n        Args:\n            pid: Process ID to get info for\n\n        Returns:\n            Dictionary with process info, or None if process not running\n        \"\"\"\n        if not self.is_process_running(pid):\n            return None\n\n        try:\n            import psutil\n\n            proc = psutil.Process(pid)\n            return {\n                \"pid\": pid,\n                \"status\": proc.status(),\n                \"cpu_percent\": proc.cpu_percent(),\n                \"memory_mb\": proc.memory_info().rss / 1024 / 1024,\n                \"create_time\": proc.create_time(),\n            }\n        except ImportError:\n            # psutil not available, return basic info\n            return {\"pid\": pid, \"status\": \"unknown\"}\n        except Exception:\n            return {\"pid\": pid, \"status\": \"unknown\"}\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/test_agent_client.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Test script for ACPAgentClient with opencode CLI.\n\nUsage:\n  # From backend directory:\n  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py\n\n  # Or with specific message:\n  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py \"What files are in this directory?\"\n\n  # With specific working directory:\n  PYTHONPATH=. python onyx/server/features/build/sandbox/local/test_agent_client.py --dir /path/to/project \"List files\"\n\"\"\"\n\nimport argparse\nimport shutil\nimport tempfile\nfrom pathlib import Path\n\nfrom acp.schema import AgentMessageChunk\nfrom acp.schema import AgentPlanUpdate\nfrom acp.schema import AgentThoughtChunk\nfrom acp.schema import CurrentModeUpdate\nfrom acp.schema import Error\nfrom acp.schema import PromptResponse\nfrom acp.schema import ToolCallProgress\nfrom acp.schema import ToolCallStart\n\ntry:\n    from onyx.server.features.build.sandbox.local.agent_client import ACPAgentClient\nexcept ImportError:\n    from agent_client import ACPAgentClient  # type: ignore\n\n\ndef test_with_opencode_acp(message: str, working_dir: str | None = None) -> None:\n    \"\"\"Test ACPAgentClient with the opencode CLI using ACP protocol.\"\"\"\n    print(\"=\" * 60)\n    print(\"Testing ACPAgentClient with opencode acp\")\n    print(\"=\" * 60)\n\n    # Use provided working dir or create temp dir\n    if working_dir:\n        work_dir = Path(working_dir)\n        if not work_dir.exists():\n            print(f\"Working directory does not exist: {working_dir}\")\n            return\n        cleanup_dir = False\n    else:\n        work_dir = Path(tempfile.mkdtemp(prefix=\"opencode-test-\"))\n        cleanup_dir = True\n        print(f\"Created temp working directory: {work_dir}\")\n\n    try:\n        print(f\"\\nStarting ACPAgentClient in: {work_dir}\")\n\n        # Use context manager - handles start/stop automatically\n        with ACPAgentClient(cwd=str(work_dir)) as client:\n            print(\n                f\"Agent: {client.agent_info.get('name', 'unknown')} v{client.agent_info.get('version', '?')}\"\n            )\n            print(f\"Session ID: {client.session_id}\")\n\n            print(f\"\\nSending message: {message}\")\n            print(\"-\" * 60)\n\n            text_buffer = \"\"\n            event_count = 0\n\n            for event in client.send_message(message, timeout=120.0):\n                event_count += 1\n\n                if isinstance(event, AgentMessageChunk):\n                    content = event.content\n                    if content.type == \"text\":\n                        text_buffer += content.text\n                        print(content.text, end=\"\", flush=True)\n\n                elif isinstance(event, AgentThoughtChunk):\n                    content = event.content\n                    if content.type == \"text\":\n                        print(f\"\\n[Thought: {content.text[:100]}...]\", flush=True)\n\n                elif isinstance(event, ToolCallStart):\n                    print(\n                        f\"\\n[Tool Call: {event.title} ({event.kind}) - {event.tool_call_id}]\",\n                        flush=True,\n                    )\n\n                elif isinstance(event, ToolCallProgress):\n                    title_str = f\"{event.title} \" if event.title else \"\"\n                    print(\n                        f\"\\n[Tool Result: {title_str}{event.status} - {event.tool_call_id}]\",\n                        flush=True,\n                    )\n\n                elif isinstance(event, AgentPlanUpdate):\n                    steps = event.plan.entries if event.plan else []\n                    print(f\"\\n[Plan: {len(steps)} steps]\", flush=True)\n\n                elif isinstance(event, CurrentModeUpdate):\n                    print(f\"\\n[Mode: {event.current_mode_id}]\", flush=True)\n\n                elif isinstance(event, PromptResponse):\n                    print(f\"\\n\\n[Done - stop_reason: {event.stop_reason}]\")\n\n                elif isinstance(event, Error):\n                    print(f\"\\n[Error: {event.message}]\")\n\n                else:\n                    print(f\"\\n[Unknown event]: {event}\", flush=True)\n\n            print(\"-\" * 60)\n            print(f\"\\nReceived {event_count} events total\")\n            if text_buffer:\n                print(f\"Total text length: {len(text_buffer)} chars\")\n\n    except RuntimeError as e:\n        print(f\"\\nError: {e}\")\n\n    except Exception as e:\n        print(f\"\\nUnexpected error: {e}\")\n        import traceback\n\n        traceback.print_exc()\n\n    finally:\n        if cleanup_dir:\n            shutil.rmtree(work_dir, ignore_errors=True)\n            print(f\"\\nCleaned up temp directory: {work_dir}\")\n\n\ndef main() -> None:\n    \"\"\"Main entry point.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Test ACPAgentClient with opencode CLI\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Test with opencode CLI (default message)\n  python test_agent_client.py\n\n  # Test with specific message\n  python test_agent_client.py \"What is 2+2?\"\n\n  # Test with specific working directory\n  python test_agent_client.py \"List files\" --dir /path/to/project\n        \"\"\",\n    )\n    parser.add_argument(\n        \"message\",\n        type=str,\n        nargs=\"?\",\n        default=\"What is 2+2? Reply briefly with just the number.\",\n        help=\"Message to send to opencode\",\n    )\n    parser.add_argument(\n        \"--dir\",\n        type=str,\n        metavar=\"PATH\",\n        help=\"Working directory for opencode (default: temp dir)\",\n    )\n\n    args = parser.parse_args()\n\n    print(\"\\nACP Agent Client Test Suite\")\n    print(\"===========================\\n\")\n\n    test_with_opencode_acp(args.message, args.dir)\n\n    print(\"\\n\\nDone!\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/local/test_manager.py",
    "content": "\"\"\"Tests for SandboxManager public interface.\n\nThese are external dependency unit tests that use real DB sessions and filesystem.\nEach test covers a single happy path case for the corresponding public function.\n\nTests for provision are not included as they require the full sandbox environment\nwith Next.js servers.\n\"\"\"\n\nimport shutil\nimport tempfile\nfrom collections.abc import Generator\nfrom pathlib import Path\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport pytest\nfrom acp.schema import PromptResponse\nfrom acp.schema import ToolCallStart\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import Sandbox\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.server.features.build.configs import SANDBOX_BASE_PATH\nfrom onyx.server.features.build.db.build_session import allocate_nextjs_port\nfrom onyx.server.features.build.sandbox import get_sandbox_manager\nfrom onyx.server.features.build.sandbox.local import LocalSandboxManager\nfrom onyx.server.features.build.sandbox.local.agent_client import ACPEvent\nfrom onyx.server.features.build.sandbox.models import FilesystemEntry\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.server.features.build.sandbox.models import SnapshotResult\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n\nTEST_TENANT_ID = \"public\"\nTEST_USER_EMAIL = \"test_sandbox_user@example.com\"\n\n\n@pytest.fixture(scope=\"function\")\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create a database session for testing.\"\"\"\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n    with get_session_with_current_tenant() as session:\n        yield session\n\n\n@pytest.fixture(scope=\"function\")\ndef tenant_context() -> Generator[None, None, None]:\n    \"\"\"Set up tenant context for testing.\"\"\"\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@pytest.fixture\ndef sandbox_manager() -> LocalSandboxManager:\n    \"\"\"Get the SandboxManager instance via factory function.\"\"\"\n    manager = get_sandbox_manager()\n    assert isinstance(manager, LocalSandboxManager)\n    return manager\n\n\n@pytest.fixture\ndef temp_sandbox_dir() -> Generator[Path, None, None]:\n    \"\"\"Create a temporary directory structure for sandbox testing.\"\"\"\n    temp_dir = Path(tempfile.mkdtemp(prefix=\"sandbox_test_\"))\n    outputs_dir = temp_dir / \"outputs\"\n    outputs_dir.mkdir()\n\n    yield temp_dir\n\n    shutil.rmtree(temp_dir, ignore_errors=True)\n\n\n@pytest.fixture\ndef actual_sandbox_path(sandbox_record: Sandbox) -> Path:\n    \"\"\"Get the actual sandbox path where the manager expects it.\"\"\"\n    return Path(SANDBOX_BASE_PATH) / str(sandbox_record.id)\n\n\n@pytest.fixture\ndef test_user(\n    db_session: Session,\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[User, None, None]:\n    \"\"\"Create or get a test user for sandbox tests.\"\"\"\n    from sqlalchemy import select\n\n    # Check if user already exists\n    stmt = select(User).where(User.email == TEST_USER_EMAIL)  # type: ignore[arg-type]\n    existing_user = db_session.execute(stmt).unique().scalar_one_or_none()\n\n    if existing_user:\n        yield existing_user\n        return\n\n    # Create new test user with required fields\n    user = User(\n        id=uuid4(),\n        email=TEST_USER_EMAIL,\n        hashed_password=\"test_hashed_password\",  # Required NOT NULL field\n        role=UserRole.BASIC,  # Required NOT NULL field\n    )\n    db_session.add(user)\n    db_session.commit()\n    db_session.refresh(user)\n\n    yield user\n\n    # Cleanup\n    existing = db_session.get(User, user.id)\n    if existing:\n        db_session.delete(existing)\n        db_session.commit()\n\n\n@pytest.fixture\ndef sandbox_record(\n    db_session: Session,\n    tenant_context: None,  # noqa: ARG001\n    test_user: User,\n) -> Generator[Sandbox, None, None]:\n    \"\"\"Create a real Sandbox record in the database and set up sandbox directory.\"\"\"\n    from sqlalchemy import select\n\n    # Check if sandbox already exists for this user (one sandbox per user)\n    stmt = select(Sandbox).where(Sandbox.user_id == test_user.id)\n    existing_sandbox = db_session.execute(stmt).unique().scalar_one_or_none()\n\n    if existing_sandbox:\n        # Clean up existing sandbox directory if it exists\n        existing_sandbox_path = Path(SANDBOX_BASE_PATH) / str(existing_sandbox.id)\n        if existing_sandbox_path.exists():\n            shutil.rmtree(existing_sandbox_path, ignore_errors=True)\n        # Delete existing sandbox record\n        db_session.delete(existing_sandbox)\n        db_session.commit()\n\n    # Create Sandbox with reference to User (new model: one sandbox per user)\n    sandbox = Sandbox(\n        id=uuid4(),\n        user_id=test_user.id,\n        status=SandboxStatus.RUNNING,\n    )\n    db_session.add(sandbox)\n    db_session.commit()\n    db_session.refresh(sandbox)\n\n    yield sandbox\n\n    # Cleanup - re-fetch in case it was deleted\n    existing = db_session.get(Sandbox, sandbox.id)\n    if existing:\n        db_session.delete(existing)\n        db_session.commit()\n\n\n@pytest.fixture\ndef build_session_record(\n    db_session: Session,\n    tenant_context: None,  # noqa: ARG001\n    test_user: User,\n) -> Generator[BuildSession, None, None]:\n    \"\"\"Create a BuildSession record for testing session-specific operations.\"\"\"\n    build_session = BuildSession(\n        id=uuid4(),\n        user_id=test_user.id,\n        status=BuildSessionStatus.ACTIVE,\n    )\n    db_session.add(build_session)\n    db_session.commit()\n    db_session.refresh(build_session)\n\n    yield build_session\n\n    # Cleanup\n    existing = db_session.get(BuildSession, build_session.id)\n    if existing:\n        db_session.delete(existing)\n        db_session.commit()\n\n\n@pytest.fixture\ndef session_workspace(\n    sandbox_manager: LocalSandboxManager,\n    sandbox_record: Sandbox,\n    build_session_record: BuildSession,\n    db_session: Session,\n) -> Generator[tuple[Sandbox, UUID], None, None]:\n    \"\"\"Set up a session workspace within the sandbox and return (sandbox, session_id).\"\"\"\n    session_id = build_session_record.id\n\n    # Use setup_session_workspace to create the session directory structure\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-api-key\",\n        api_base=None,\n    )\n    # Allocate port for this test session\n    nextjs_port = allocate_nextjs_port(db_session)\n\n    sandbox_manager.provision(\n        sandbox_id=sandbox_record.id,\n        user_id=sandbox_record.user_id,\n        tenant_id=TEST_TENANT_ID,\n        llm_config=llm_config,\n    )\n    sandbox_manager.setup_session_workspace(\n        sandbox_id=sandbox_record.id,\n        session_id=session_id,\n        llm_config=llm_config,\n        nextjs_port=nextjs_port,\n        file_system_path=SANDBOX_BASE_PATH,\n    )\n\n    yield sandbox_record, session_id\n\n    # Cleanup session workspace\n    sandbox_manager.cleanup_session_workspace(\n        sandbox_id=sandbox_record.id,\n        session_id=session_id,\n    )\n\n    sandbox_manager.terminate(sandbox_record.id)\n\n\n@pytest.fixture\ndef file_store_initialized() -> Generator[None, None, None]:\n    \"\"\"Initialize file store for snapshot tests.\"\"\"\n    get_default_file_store().initialize()\n    yield\n\n\nclass TestTerminate:\n    \"\"\"Tests for SandboxManager.terminate().\"\"\"\n\n    def test_terminate_cleans_up_resources(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        sandbox_record: Sandbox,\n        temp_sandbox_dir: Path,  # noqa: ARG002\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that terminate cleans up sandbox resources.\n\n        Note: Status update is now handled by the caller (SessionManager/tasks),\n        not by the SandboxManager itself.\n        \"\"\"\n        sandbox_manager.terminate(sandbox_record.id)\n        # No exception means success - resources cleaned up\n\n\nclass TestCreateSnapshot:\n    \"\"\"Tests for SandboxManager.create_snapshot().\"\"\"\n\n    def test_create_snapshot_archives_outputs(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n        file_store_initialized: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that create_snapshot archives the session's outputs directory.\n\n        Note: Caller is responsible for creating DB record from the SnapshotResult.\n        \"\"\"\n        sandbox, session_id = session_workspace\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        outputs_dir = sandbox_path / \"sessions\" / str(session_id) / \"outputs\"\n        (outputs_dir / \"app.py\").write_text(\"print('hello')\")\n\n        result = sandbox_manager.create_snapshot(sandbox.id, session_id, TEST_TENANT_ID)\n\n        assert isinstance(result, SnapshotResult)\n        assert result.size_bytes > 0\n        assert result.storage_path is not None\n\n\nclass TestHealthCheck:\n    \"\"\"Tests for SandboxManager.health_check().\"\"\"\n\n    def test_health_check_returns_false_when_no_processes(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        sandbox_record: Sandbox,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that health_check returns False when no processes are running.\n\n        Note: nextjs_port is now passed by the caller instead of being fetched from DB.\n        \"\"\"\n        result = sandbox_manager.health_check(sandbox_record.id)\n\n        assert result is False\n\n\nclass TestListDirectory:\n    \"\"\"Tests for SandboxManager.list_directory().\"\"\"\n\n    def test_list_directory_returns_entries(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that list_directory returns filesystem entries.\"\"\"\n        sandbox, session_id = session_workspace\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        outputs_dir = sandbox_path / \"sessions\" / str(session_id)\n        (outputs_dir / \"file.txt\").write_text(\"content\")\n        (outputs_dir / \"subdir\").mkdir()\n\n        result = sandbox_manager.list_directory(sandbox.id, session_id, \"/\")\n        print(result)\n\n        # .agent, .venv, AGENTS.md, opencode.json, files, outputs, attachments + 2 created files\n        assert len(result) == 9\n        assert all(isinstance(e, FilesystemEntry) for e in result)\n\n\nclass TestReadFile:\n    \"\"\"Tests for SandboxManager.read_file().\"\"\"\n\n    def test_read_file_returns_contents(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that read_file returns file contents as bytes.\"\"\"\n        sandbox, session_id = session_workspace\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        outputs_dir = sandbox_path / \"sessions\" / str(session_id) / \"outputs\"\n        (outputs_dir / \"test.txt\").write_bytes(b\"Hello, World!\")\n\n        result = sandbox_manager.read_file(sandbox.id, session_id, \"test.txt\")\n\n        assert result == b\"Hello, World!\"\n\n\nclass TestSendMessage:\n    \"\"\"Tests for SandboxManager.send_message().\"\"\"\n\n    def test_send_message_streams_events(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that send_message streams ACPEvent objects and ends with PromptResponse.\n\n        Note: Heartbeat update is now handled by the caller (SessionManager),\n        not by the SandboxManager itself.\n        \"\"\"\n        sandbox, session_id = session_workspace\n\n        events: list[ACPEvent] = []\n        for event in sandbox_manager.send_message(\n            sandbox.id, session_id, \"What is 2 + 2?\"\n        ):\n            events.append(event)\n\n        # Should have received at least one event\n        assert len(events) > 0\n\n        # Last event should be PromptResponse (success) or contain results\n        last_event = events[-1]\n        assert isinstance(last_event, PromptResponse)\n\n    def test_send_message_write_file(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that send_message can write files and emits edit tool calls.\"\"\"\n        sandbox, session_id = session_workspace\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        session_path = sandbox_path / \"sessions\" / str(session_id)\n\n        events: list[ACPEvent] = []\n        for event in sandbox_manager.send_message(\n            sandbox.id,\n            session_id,\n            \"Create a file called hello.txt with the content 'Hello, World!'\",\n        ):\n            events.append(event)\n\n        # Should have at least one ToolCallStart with kind='edit'\n        tool_calls = [e for e in events if isinstance(e, ToolCallStart)]\n        edit_tool_calls = [tc for tc in tool_calls if tc.kind == \"edit\"]\n        assert len(edit_tool_calls) >= 1, (\n            f\"Expected at least one edit tool call, got {len(edit_tool_calls)}. \"\n            f\"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}\"\n        )\n\n        # Last event should be PromptResponse\n        last_event = events[-1]\n        assert isinstance(last_event, PromptResponse)\n\n        # Verify the file was actually created (agent writes relative to session root)\n        created_file = session_path / \"hello.txt\"\n        assert created_file.exists(), f\"Expected file {created_file} to be created\"\n        assert \"Hello\" in created_file.read_text()\n\n    def test_send_message_read_file(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that send_message can read files and emits read tool calls.\"\"\"\n        sandbox, session_id = session_workspace\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        session_path = sandbox_path / \"sessions\" / str(session_id)\n\n        # Create a file for the agent to read (at session root, where agent has access)\n        test_file = session_path / \"secret.txt\"\n        test_file.write_text(\"The secret code is 12345\")\n\n        events: list[ACPEvent] = []\n        for event in sandbox_manager.send_message(\n            sandbox.id,\n            session_id,\n            \"Read the file secret.txt and tell me what the secret code is\",\n        ):\n            events.append(event)\n\n        # Should have at least one ToolCallStart with kind='read'\n        tool_calls = [e for e in events if isinstance(e, ToolCallStart)]\n        read_tool_calls = [tc for tc in tool_calls if tc.kind == \"read\"]\n        assert len(read_tool_calls) >= 1, (\n            f\"Expected at least one read tool call, got {len(read_tool_calls)}. \"\n            f\"Tool calls: {[(tc.title, tc.kind) for tc in tool_calls]}\"\n        )\n\n        # Last event should be PromptResponse\n        last_event = events[-1]\n        assert isinstance(last_event, PromptResponse)\n\n\nclass TestUploadFile:\n    \"\"\"Tests for SandboxManager.upload_file().\"\"\"\n\n    def test_upload_file_creates_file(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that upload_file creates a file in the attachments directory.\"\"\"\n        sandbox, session_id = session_workspace\n        content = b\"Hello, World!\"\n\n        result = sandbox_manager.upload_file(\n            sandbox.id, session_id, \"test.txt\", content\n        )\n\n        assert result == \"attachments/test.txt\"\n\n        # Verify file exists\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        file_path = (\n            sandbox_path / \"sessions\" / str(session_id) / \"attachments\" / \"test.txt\"\n        )\n        assert file_path.exists()\n        assert file_path.read_bytes() == content\n\n    def test_upload_file_handles_collision(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that upload_file renames files on collision.\"\"\"\n        sandbox, session_id = session_workspace\n\n        # Upload first file\n        sandbox_manager.upload_file(sandbox.id, session_id, \"test.txt\", b\"first\")\n\n        # Upload second file with same name\n        result = sandbox_manager.upload_file(\n            sandbox.id, session_id, \"test.txt\", b\"second\"\n        )\n\n        assert result == \"attachments/test_1.txt\"\n\n\nclass TestDeleteFile:\n    \"\"\"Tests for SandboxManager.delete_file().\"\"\"\n\n    def test_delete_file_removes_file(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that delete_file removes a file.\"\"\"\n        sandbox, session_id = session_workspace\n\n        # Upload a file first\n        sandbox_manager.upload_file(sandbox.id, session_id, \"test.txt\", b\"content\")\n\n        # Delete it\n        result = sandbox_manager.delete_file(\n            sandbox.id, session_id, \"attachments/test.txt\"\n        )\n\n        assert result is True\n\n        # Verify file is gone\n        sandbox_path = Path(SANDBOX_BASE_PATH) / str(sandbox.id)\n        file_path = (\n            sandbox_path / \"sessions\" / str(session_id) / \"attachments\" / \"test.txt\"\n        )\n        assert not file_path.exists()\n\n    def test_delete_file_returns_false_for_missing(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that delete_file returns False for non-existent file.\"\"\"\n        sandbox, session_id = session_workspace\n\n        result = sandbox_manager.delete_file(\n            sandbox.id, session_id, \"attachments/nonexistent.txt\"\n        )\n\n        assert result is False\n\n    def test_delete_file_rejects_path_traversal(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that delete_file rejects path traversal attempts.\"\"\"\n        sandbox, session_id = session_workspace\n\n        with pytest.raises(ValueError, match=\"path traversal\"):\n            sandbox_manager.delete_file(sandbox.id, session_id, \"../../../etc/passwd\")\n\n\nclass TestGetUploadStats:\n    \"\"\"Tests for SandboxManager.get_upload_stats().\"\"\"\n\n    def test_get_upload_stats_empty(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test get_upload_stats returns zeros for empty directory.\"\"\"\n        sandbox, session_id = session_workspace\n\n        file_count, total_size = sandbox_manager.get_upload_stats(\n            sandbox.id, session_id\n        )\n\n        assert file_count == 0\n        assert total_size == 0\n\n    def test_get_upload_stats_with_files(\n        self,\n        sandbox_manager: LocalSandboxManager,\n        db_session: Session,  # noqa: ARG002\n        session_workspace: tuple[Sandbox, UUID],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test get_upload_stats returns correct count and size.\"\"\"\n        sandbox, session_id = session_workspace\n\n        # Upload some files\n        sandbox_manager.upload_file(\n            sandbox.id, session_id, \"file1.txt\", b\"hello\"\n        )  # 5 bytes\n        sandbox_manager.upload_file(\n            sandbox.id, session_id, \"file2.txt\", b\"world!\"\n        )  # 6 bytes\n\n        file_count, total_size = sandbox_manager.get_upload_stats(\n            sandbox.id, session_id\n        )\n\n        assert file_count == 2\n        assert total_size == 11  # 5 + 6\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/manager/__init__.py",
    "content": "\"\"\"Sandbox manager utilities.\n\nContains:\n- DirectoryManager: Sandbox directory structure management\n- SnapshotManager: Snapshot creation and restoration\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.manager.directory_manager import (\n    DirectoryManager,\n)\nfrom onyx.server.features.build.sandbox.manager.snapshot_manager import SnapshotManager\n\n__all__ = [\n    \"DirectoryManager\",\n    \"SnapshotManager\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/manager/directory_manager.py",
    "content": "\"\"\"Directory management for sandbox lifecycle.\n\nSupports user-shared sandbox model where:\n- One sandbox per user with shared files/ directory\n- Per-session workspaces under sessions/$session_id/\n\"\"\"\n\nimport json\nimport shutil\nfrom pathlib import Path\n\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    generate_agent_instructions,\n)\nfrom onyx.server.features.build.sandbox.util.opencode_config import (\n    build_opencode_config,\n)\nfrom onyx.server.features.build.sandbox.util.persona_mapping import (\n    generate_user_identity_content,\n)\nfrom onyx.server.features.build.sandbox.util.persona_mapping import get_persona_info\nfrom onyx.server.features.build.sandbox.util.persona_mapping import ORG_INFO_AGENTS_MD\nfrom onyx.server.features.build.sandbox.util.persona_mapping import (\n    ORGANIZATION_STRUCTURE,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass DirectoryManager:\n    \"\"\"Manages sandbox directory creation and cleanup.\n\n    Responsible for:\n    - Creating sandbox directory structure (user-level)\n    - Creating session workspace directories (session-level)\n    - Setting up symlinks to knowledge files\n    - Copying templates (outputs, venv, skills, AGENTS.md)\n    - Cleaning up sandbox/session directories on termination\n\n    Directory Structure:\n        $base_path/$sandbox_id/\n        ├── files/                     # Symlink to knowledge/source files (SHARED)\n        └── sessions/\n            ├── $session_id_1/         # Per-session workspace\n            │   ├── outputs/           # Agent output (from template or snapshot)\n            │   │   └── web/           # Next.js app\n            │   ├── .venv/             # Python virtual environment\n            │   ├── .agent/skills/     # Opencode skills\n            │   ├── files/             # Symlink to sandbox-level files/ (SHARED)\n            │   ├── AGENTS.md          # Agent instructions\n            │   ├── opencode.json      # LLM config\n            │   └── attachments/\n            └── $session_id_2/\n                └── ...\n    \"\"\"\n\n    def __init__(\n        self,\n        base_path: Path,\n        outputs_template_path: Path,\n        venv_template_path: Path,\n        skills_path: Path,\n        agent_instructions_template_path: Path,\n    ) -> None:\n        \"\"\"Initialize DirectoryManager with template paths.\n\n        Args:\n            base_path: Root directory for all sandboxes\n            outputs_template_path: Path to outputs template directory\n            venv_template_path: Path to Python virtual environment template\n            skills_path: Path to agent skills directory\n            agent_instructions_template_path: Path to AGENTS.md template file\n        \"\"\"\n        self._base_path = base_path\n        self._outputs_template_path = outputs_template_path\n        self._venv_template_path = venv_template_path\n        self._skills_path = skills_path\n        self._agent_instructions_template_path = agent_instructions_template_path\n\n    def create_sandbox_directory(self, sandbox_id: str) -> Path:\n        \"\"\"Create sandbox directory structure (user-level).\n\n        Creates the base directory for a user's sandbox:\n        {base_path}/{sandbox_id}/\n        ├── files/                      # Symlink to knowledge/source files (set up separately)\n        └── sessions/                   # Container for per-session workspaces\n\n        NOTE: This only creates the sandbox-level structure.\n        Call create_session_directory() to create per-session workspaces.\n\n        Args:\n            sandbox_id: Unique identifier for the sandbox\n\n        Returns:\n            Path to the created sandbox directory\n        \"\"\"\n        sandbox_path = self._base_path / sandbox_id\n        sandbox_path.mkdir(parents=True, exist_ok=True)\n        # Create sessions directory for per-session workspaces\n        (sandbox_path / \"sessions\").mkdir(exist_ok=True)\n        return sandbox_path\n\n    def create_session_directory(self, sandbox_path: Path, session_id: str) -> Path:\n        \"\"\"Create session workspace directory structure.\n\n        Creates a per-session workspace within the sandbox:\n        {sandbox_path}/sessions/{session_id}/\n        ├── outputs/                    # Working directory from template\n        │   ├── web/                    # Next.js app\n        │   ├── slides/\n        │   ├── markdown/\n        │   └── graphs/\n        ├── .venv/                      # Python virtual environment\n        ├── AGENTS.md                   # Agent instructions\n        ├── opencode.json               # LLM config (set up separately)\n        ├── attachments/                # User-uploaded files\n        └── .opencode/\n            └── skills/                 # Agent skills\n\n        NOTE: This creates the directory structure but doesn't copy templates.\n        Call setup_outputs_directory(), setup_venv(), etc. to set up contents.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Unique identifier for the session\n\n        Returns:\n            Path to the created session workspace directory\n        \"\"\"\n        session_path = sandbox_path / \"sessions\" / session_id\n        session_path.mkdir(parents=True, exist_ok=True)\n        return session_path\n\n    def cleanup_session_directory(self, sandbox_path: Path, session_id: str) -> None:\n        \"\"\"Remove session workspace directory and all contents.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Session ID to clean up\n        \"\"\"\n        session_path = sandbox_path / \"sessions\" / session_id\n        if session_path.exists():\n            shutil.rmtree(session_path)\n            logger.info(f\"Cleaned up session directory: {session_path}\")\n\n    def get_session_path(self, sandbox_path: Path, session_id: str) -> Path:\n        \"\"\"Get path to session workspace.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Session ID\n\n        Returns:\n            Path to sessions/$session_id/\n        \"\"\"\n        return sandbox_path / \"sessions\" / session_id\n\n    def setup_files_symlink(\n        self,\n        sandbox_path: Path,\n        file_system_path: Path,\n    ) -> None:\n        \"\"\"Create symlink to knowledge/source files.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            file_system_path: Path to the source files to link\n        \"\"\"\n        files_link = sandbox_path / \"files\"\n        if not files_link.exists():\n            files_link.symlink_to(file_system_path, target_is_directory=True)\n\n    def setup_org_info(\n        self,\n        session_path: Path,\n        user_work_area: str | None,\n        user_level: str | None,\n    ) -> None:\n        \"\"\"Create org_info directory with organizational context files.\n\n        Creates an org_info/ directory at the session root level with:\n        - AGENTS.md: Description of available org info files\n        - user_identity_profile.txt: User's persona information\n        - organization_structure.json: Org hierarchy with managers and reports\n\n        Uses shared constants from persona_mapping module as single source of truth.\n\n        Args:\n            session_path: Path to the session directory\n            user_work_area: User's work area (e.g., \"engineering\", \"product\")\n            user_level: User's level (e.g., \"ic\", \"manager\")\n        \"\"\"\n        # Get persona info from mapping\n        persona = get_persona_info(user_work_area, user_level)\n        if not persona:\n            logger.debug(\n                f\"No persona found for work_area={user_work_area}, level={user_level}, skipping org_info setup\"\n            )\n            return\n\n        # Create org_info directory at session root\n        org_info_dir = session_path / \"org_info\"\n        org_info_dir.mkdir(parents=True, exist_ok=True)\n\n        try:\n            # 1. AGENTS.md - Description of org info contents\n            (org_info_dir / \"AGENTS.md\").write_text(ORG_INFO_AGENTS_MD)\n\n            # 2. user_identity_profile.txt - User's persona\n            (org_info_dir / \"user_identity_profile.txt\").write_text(\n                generate_user_identity_content(persona)\n            )\n\n            # 3. organization_structure.json - Org hierarchy\n            (org_info_dir / \"organization_structure.json\").write_text(\n                json.dumps(ORGANIZATION_STRUCTURE, indent=2)\n            )\n\n            logger.info(\n                f\"Created org_info with identity: {persona['name']} <{persona['email']}>\"\n            )\n        except Exception as e:\n            # Don't fail provisioning if org_info setup fails\n            logger.warning(f\"Failed to setup org_info: {e}\")\n\n    def setup_outputs_directory(self, sandbox_path: Path) -> None:\n        \"\"\"Copy outputs template and create additional directories.\n\n        Copies the Next.js template and creates additional output\n        directories for generated content (slides, markdown, graphs).\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n        \"\"\"\n        output_dir = sandbox_path / \"outputs\"\n        if not output_dir.exists():\n            if self._outputs_template_path.exists():\n                shutil.copytree(self._outputs_template_path, output_dir, symlinks=True)\n            else:\n                raise RuntimeError(\n                    f\"Outputs template path does not exist: {self._outputs_template_path}\"\n                )\n\n        # Create additional output directories for generated content\n        (output_dir / \"markdown\").mkdir(parents=True, exist_ok=True)\n        # TODO: no images for now\n        # (output_dir / \"slides\").mkdir(parents=True, exist_ok=True)\n        # TODO: No graphs for now\n        # (output_dir / \"graphs\").mkdir(parents=True, exist_ok=True)\n\n    def setup_venv(self, sandbox_path: Path) -> Path:\n        \"\"\"Copy virtual environment template.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n\n        Returns:\n            Path to the virtual environment directory\n        \"\"\"\n        venv_path = sandbox_path / \".venv\"\n        if not venv_path.exists() and self._venv_template_path.exists():\n            shutil.copytree(self._venv_template_path, venv_path, symlinks=True)\n        return venv_path\n\n    def setup_agent_instructions(\n        self,\n        sandbox_path: Path,\n        provider: str | None = None,\n        model_name: str | None = None,\n        nextjs_port: int | None = None,\n        disabled_tools: list[str] | None = None,\n        user_name: str | None = None,\n        user_role: str | None = None,\n        use_demo_data: bool = False,\n        include_org_info: bool = False,\n    ) -> None:\n        \"\"\"Generate AGENTS.md with dynamic configuration.\n\n        Reads the template file and replaces placeholders with actual values\n        including user personalization, LLM configuration, runtime settings,\n        and dynamically discovered knowledge sources.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            provider: LLM provider type (e.g., \"openai\", \"anthropic\")\n            model_name: Model name (e.g., \"claude-sonnet-4-5\", \"gpt-4o\")\n            nextjs_port: Port for Next.js development server\n            disabled_tools: List of disabled tools\n            user_name: User's name for personalization\n            user_role: User's role/title for personalization\n            use_demo_data: If True, exclude user context from AGENTS.md\n            include_org_info: Whether to include the org_info section (demo data mode)\n        \"\"\"\n        agent_md_path = sandbox_path / \"AGENTS.md\"\n        if agent_md_path.exists():\n            return\n\n        # Get the files path (symlink to knowledge sources)\n        files_path = sandbox_path / \"files\"\n\n        # Use shared utility to generate content\n        content = generate_agent_instructions(\n            template_path=self._agent_instructions_template_path,\n            skills_path=self._skills_path,\n            files_path=files_path if files_path.exists() else None,\n            provider=provider,\n            model_name=model_name,\n            nextjs_port=nextjs_port,\n            disabled_tools=disabled_tools,\n            user_name=user_name,\n            user_role=user_role,\n            use_demo_data=use_demo_data,\n            include_org_info=include_org_info,\n        )\n\n        # Write the generated content\n        agent_md_path.write_text(content)\n        logger.debug(f\"Generated AGENTS.md at {agent_md_path}\")\n\n    def setup_skills(self, sandbox_path: Path, overwrite: bool = True) -> None:\n        \"\"\"Copy skills directory to .opencode/skills.\n\n        Copies all skills from the source skills directory to the sandbox's\n        .opencode/skills directory. If the destination already exists, it will\n        be removed and recreated to ensure skills are up-to-date.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            overwrite: If True, overwrite existing skills. If False, preserve existing skills.\n        \"\"\"\n        skills_dest = sandbox_path / \".opencode\" / \"skills\"\n\n        if not self._skills_path.exists():\n            logger.warning(\n                f\"Skills path {self._skills_path} does not exist, skipping skills setup\"\n            )\n            return\n\n        if not overwrite and skills_dest.exists():\n            logger.debug(\n                f\"Skills directory already exists at {skills_dest}, skipping skills setup\"\n            )\n            return\n\n        try:\n            # Remove existing skills directory if it exists to ensure fresh copy\n            if skills_dest.exists():\n                shutil.rmtree(skills_dest)\n\n            # Create parent directory and copy skills\n            skills_dest.parent.mkdir(parents=True, exist_ok=True)\n            shutil.copytree(self._skills_path, skills_dest)\n\n            # Verify the copy succeeded\n            if not skills_dest.exists():\n                logger.error(\n                    f\"Skills copy failed: destination {skills_dest} does not exist after copy\"\n                )\n        except Exception as e:\n            logger.error(\n                f\"Failed to copy skills from {self._skills_path} to {skills_dest}: {e}\",\n                exc_info=True,\n            )\n            raise\n\n    def setup_opencode_config(\n        self,\n        sandbox_path: Path,\n        provider: str,\n        model_name: str,\n        api_key: str | None = None,\n        api_base: str | None = None,\n        disabled_tools: list[str] | None = None,\n        overwrite: bool = True,\n        dev_mode: bool = False,\n    ) -> None:\n        \"\"\"Create opencode.json configuration file for the agent.\n\n        Configures the opencode CLI agent with the LLM provider settings\n        from Onyx's configured LLM provider.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            provider: LLM provider type (e.g., \"openai\", \"anthropic\")\n            model_name: Model name (e.g., \"claude-sonnet-4-5\", \"gpt-4o\")\n            api_key: Optional API key for the provider\n            api_base: Optional custom API base URL\n            disabled_tools: Optional list of tools to disable (e.g., [\"question\", \"webfetch\"])\n            overwrite: If True, overwrite existing config. If False, preserve existing config.\n            dev_mode: If True, allow all external directories (local dev).\n                      If False (default), only whitelist /workspace/files and /workspace/demo_data.\n        \"\"\"\n        config_path = sandbox_path / \"opencode.json\"\n        if not overwrite and config_path.exists():\n            logger.debug(\n                f\"opencode.json already exists at {config_path}, skipping config setup\"\n            )\n            return\n\n        # Use shared config builder\n        config = build_opencode_config(\n            provider=provider,\n            model_name=model_name,\n            api_key=api_key,\n            api_base=api_base,\n            disabled_tools=disabled_tools,\n            dev_mode=dev_mode,\n        )\n\n        config_json = json.dumps(config, indent=2)\n        config_path.write_text(config_json)\n\n    def cleanup_sandbox_directory(self, sandbox_path: Path) -> None:\n        \"\"\"Remove sandbox directory and all contents.\n\n        Args:\n            sandbox_path: Path to the sandbox directory to remove\n        \"\"\"\n        if sandbox_path.exists():\n            shutil.rmtree(sandbox_path)\n\n    def get_outputs_path(\n        self, sandbox_path: Path, session_id: str | None = None\n    ) -> Path:\n        \"\"\"Return path to outputs directory.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Optional session ID for session-specific outputs\n\n        Returns:\n            Path to the outputs directory\n        \"\"\"\n        if session_id:\n            return sandbox_path / \"sessions\" / session_id / \"outputs\"\n        return sandbox_path / \"outputs\"\n\n    def get_web_path(self, sandbox_path: Path, session_id: str) -> Path:\n        \"\"\"Return path to Next.js web directory.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Optional session ID for session-specific web directory\n\n        Returns:\n            Path to the web directory\n        \"\"\"\n        if session_id:\n            return sandbox_path / \"sessions\" / session_id / \"outputs\" / \"web\"\n        return sandbox_path / \"outputs\" / \"web\"\n\n    def get_venv_path(self, sandbox_path: Path, session_id: str | None = None) -> Path:\n        \"\"\"Return path to virtual environment.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Optional session ID for session-specific venv\n\n        Returns:\n            Path to the .venv directory\n        \"\"\"\n        if session_id:\n            return sandbox_path / \"sessions\" / session_id / \".venv\"\n        return sandbox_path / \".venv\"\n\n    def directory_exists(self, sandbox_path: Path) -> bool:\n        \"\"\"Check if sandbox directory exists.\n\n        Args:\n            sandbox_path: Path to check\n\n        Returns:\n            True if directory exists and is a directory\n        \"\"\"\n        return sandbox_path.exists() and sandbox_path.is_dir()\n\n    def session_exists(self, sandbox_path: Path, session_id: str) -> bool:\n        \"\"\"Check if session workspace exists.\n\n        Args:\n            sandbox_path: Path to sandbox directory\n            session_id: Session ID to check\n\n        Returns:\n            True if session directory exists\n        \"\"\"\n        session_path = sandbox_path / \"sessions\" / session_id\n        return session_path.exists() and session_path.is_dir()\n\n    def setup_attachments_directory(\n        self, sandbox_path: Path, session_id: str | None = None\n    ) -> Path:\n        \"\"\"Create attachments directory for user-uploaded files.\n\n        This directory is used to store files uploaded by the user\n        through the chat interface.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Optional session ID for session-specific uploads\n\n        Returns:\n            Path to the attachments directory\n        \"\"\"\n        if session_id:\n            attachments_path = sandbox_path / \"sessions\" / session_id / \"attachments\"\n        else:\n            attachments_path = sandbox_path / \"attachments\"\n        attachments_path.mkdir(parents=True, exist_ok=True)\n        return attachments_path\n\n    def get_attachments_path(\n        self, sandbox_path: Path, session_id: str | None = None\n    ) -> Path:\n        \"\"\"Return path to attachments directory.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            session_id: Optional session ID for session-specific uploads\n\n        Returns:\n            Path to the attachments directory\n        \"\"\"\n        if session_id:\n            return sandbox_path / \"sessions\" / session_id / \"attachments\"\n        return sandbox_path / \"attachments\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/manager/snapshot_manager.py",
    "content": "\"\"\"Snapshot management for sandbox state persistence.\"\"\"\n\nimport tarfile\nimport tempfile\nfrom pathlib import Path\nfrom uuid import uuid4\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.file_store.file_store import FileStore\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# File type for snapshot archives\nSNAPSHOT_FILE_TYPE = \"application/gzip\"\n\n\nclass SnapshotManager:\n    \"\"\"Manages sandbox snapshot creation and restoration.\n\n    Snapshots are tar.gz archives of the sandbox's outputs directory,\n    stored using the file store abstraction (S3-compatible storage).\n\n    Responsible for:\n    - Creating snapshots of outputs directories\n    - Restoring snapshots to target directories\n    - Deleting snapshots from storage\n    \"\"\"\n\n    def __init__(self, file_store: FileStore) -> None:\n        \"\"\"Initialize SnapshotManager with a file store.\n\n        Args:\n            file_store: The file store to use for snapshot storage\n        \"\"\"\n        self._file_store = file_store\n\n    def create_snapshot(\n        self,\n        sandbox_path: Path,\n        sandbox_id: str,\n        tenant_id: str,\n    ) -> tuple[str, str, int]:\n        \"\"\"Create a snapshot of the outputs directory.\n\n        Creates a tar.gz archive of the sandbox's outputs directory\n        and uploads it to the file store.\n\n        Args:\n            sandbox_path: Path to the sandbox directory\n            sandbox_id: Sandbox identifier\n            tenant_id: Tenant identifier for multi-tenant isolation\n\n        Returns:\n            Tuple of (snapshot_id, storage_path, size_bytes)\n\n        Raises:\n            FileNotFoundError: If outputs directory doesn't exist\n            RuntimeError: If snapshot creation fails\n        \"\"\"\n        snapshot_id = str(uuid4())\n        outputs_path = sandbox_path / \"outputs\"\n\n        if not outputs_path.exists():\n            raise FileNotFoundError(f\"Outputs directory not found: {outputs_path}\")\n\n        # Create tar.gz in temp location\n        tmp_path: str | None = None\n        try:\n            with tempfile.NamedTemporaryFile(\n                suffix=\".tar.gz\", delete=False\n            ) as tmp_file:\n                tmp_path = tmp_file.name\n\n            # Create the tar archive\n            with tarfile.open(tmp_path, \"w:gz\") as tar:\n                tar.add(outputs_path, arcname=\"outputs\")\n\n            # Get size\n            size_bytes = Path(tmp_path).stat().st_size\n\n            # Generate storage path for file store\n            # Format: sandbox-snapshots/{tenant_id}/{sandbox_id}/{snapshot_id}.tar.gz\n            storage_path = (\n                f\"sandbox-snapshots/{tenant_id}/{sandbox_id}/{snapshot_id}.tar.gz\"\n            )\n            display_name = f\"sandbox-snapshot-{sandbox_id}-{snapshot_id}.tar.gz\"\n\n            # Upload to file store\n            with open(tmp_path, \"rb\") as f:\n                self._file_store.save_file(\n                    content=f,\n                    display_name=display_name,\n                    file_origin=FileOrigin.SANDBOX_SNAPSHOT,\n                    file_type=SNAPSHOT_FILE_TYPE,\n                    file_id=storage_path,\n                    file_metadata={\n                        \"sandbox_id\": sandbox_id,\n                        \"tenant_id\": tenant_id,\n                        \"snapshot_id\": snapshot_id,\n                    },\n                )\n\n            logger.info(\n                f\"Created snapshot {snapshot_id} for sandbox {sandbox_id}, size: {size_bytes} bytes\"\n            )\n\n            return snapshot_id, storage_path, size_bytes\n\n        except Exception as e:\n            logger.error(f\"Failed to create snapshot for sandbox {sandbox_id}: {e}\")\n            raise RuntimeError(f\"Failed to create snapshot: {e}\") from e\n        finally:\n            # Cleanup temp file\n            if tmp_path:\n                try:\n                    Path(tmp_path).unlink(missing_ok=True)\n                except Exception as cleanup_error:\n                    logger.warning(\n                        f\"Failed to cleanup temp file {tmp_path}: {cleanup_error}\"\n                    )\n\n    def restore_snapshot(\n        self,\n        storage_path: str,\n        target_path: Path,\n    ) -> None:\n        \"\"\"Restore a snapshot to target directory.\n\n        Downloads the snapshot from file store and extracts the outputs/\n        directory to the target path.\n\n        Args:\n            storage_path: The file store path of the snapshot\n            target_path: Directory to extract the snapshot into\n\n        Raises:\n            FileNotFoundError: If snapshot doesn't exist in file store\n            RuntimeError: If restoration fails\n        \"\"\"\n        tmp_path: str | None = None\n        file_io = None\n        try:\n            # Download from file store\n            file_io = self._file_store.read_file(storage_path, use_tempfile=True)\n\n            # Write to temp file for tarfile extraction\n            with tempfile.NamedTemporaryFile(\n                suffix=\".tar.gz\", delete=False\n            ) as tmp_file:\n                tmp_path = tmp_file.name\n                # Read from the IO object and write to temp file\n                content = file_io.read()\n                tmp_file.write(content)\n\n            # Ensure target path exists\n            target_path.mkdir(parents=True, exist_ok=True)\n\n            # Extract with security filter\n            with tarfile.open(tmp_path, \"r:gz\") as tar:\n                # Use data filter for safe extraction (prevents path traversal)\n                # Available in Python 3.11.4+\n                try:\n                    tar.extractall(target_path, filter=\"data\")\n                except TypeError:\n                    # Fallback for older Python versions without filter support\n                    # Manually validate paths for security\n                    for member in tar.getmembers():\n                        # Check for path traversal attempts\n                        member_path = Path(target_path) / member.name\n                        try:\n                            member_path.resolve().relative_to(target_path.resolve())\n                        except ValueError:\n                            raise RuntimeError(\n                                f\"Path traversal attempt detected: {member.name}\"\n                            )\n                    tar.extractall(target_path)\n\n            logger.info(f\"Restored snapshot from {storage_path} to {target_path}\")\n\n        except Exception as e:\n            logger.error(f\"Failed to restore snapshot {storage_path}: {e}\")\n            raise RuntimeError(f\"Failed to restore snapshot: {e}\") from e\n        finally:\n            # Cleanup temp file\n            if tmp_path:\n                try:\n                    Path(tmp_path).unlink(missing_ok=True)\n                except Exception as cleanup_error:\n                    logger.warning(\n                        f\"Failed to cleanup temp file {tmp_path}: {cleanup_error}\"\n                    )\n            # Close the file IO if it's still open\n            try:\n                if file_io:\n                    file_io.close()\n            except Exception:\n                pass\n\n    def delete_snapshot(self, storage_path: str) -> None:\n        \"\"\"Delete snapshot from file store.\n\n        Args:\n            storage_path: The file store path of the snapshot to delete\n\n        Raises:\n            RuntimeError: If deletion fails (other than file not found)\n        \"\"\"\n        try:\n            self._file_store.delete_file(storage_path)\n            logger.info(f\"Deleted snapshot: {storage_path}\")\n        except Exception as e:\n            # Log but don't fail if snapshot doesn't exist\n            logger.warning(f\"Failed to delete snapshot {storage_path}: {e}\")\n            raise RuntimeError(f\"Failed to delete snapshot: {e}\") from e\n\n    def get_snapshot_size(self, storage_path: str) -> int | None:\n        \"\"\"Get the size of a snapshot in bytes.\n\n        Args:\n            storage_path: The file store path of the snapshot\n\n        Returns:\n            Size in bytes, or None if not available\n        \"\"\"\n        return self._file_store.get_file_size(storage_path)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/manager/test_directory_manager.py",
    "content": "\"\"\"Tests for DirectoryManager.\n\nThese are unit tests that test DirectoryManager's behavior in isolation,\nfocusing on the setup_opencode_config method with different provider configurations.\n\"\"\"\n\nimport json\nimport shutil\nimport tempfile\nfrom collections.abc import Generator\nfrom pathlib import Path\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.server.features.build.sandbox.manager.directory_manager import (\n    DirectoryManager,\n)\n\n\n@pytest.fixture\ndef temp_base_path() -> Generator[Path, None, None]:\n    \"\"\"Create a temporary base path for testing.\"\"\"\n    temp_dir = Path(tempfile.mkdtemp(prefix=\"test_dir_manager_\"))\n    yield temp_dir\n    shutil.rmtree(temp_dir, ignore_errors=True)\n\n\n@pytest.fixture\ndef temp_templates(temp_base_path: Path) -> dict[str, Path]:\n    \"\"\"Create temporary template directories and files.\"\"\"\n    templates_dir = temp_base_path / \"templates\"\n    templates_dir.mkdir()\n\n    outputs_template = templates_dir / \"outputs\"\n    outputs_template.mkdir()\n\n    venv_template = templates_dir / \"venv\"\n    venv_template.mkdir()\n\n    skills_path = templates_dir / \"skills\"\n    skills_path.mkdir()\n\n    agent_instructions = templates_dir / \"AGENTS.md\"\n    agent_instructions.write_text(\"# Agent Instructions\\n\")\n\n    return {\n        \"outputs\": outputs_template,\n        \"venv\": venv_template,\n        \"skills\": skills_path,\n        \"agent_instructions\": agent_instructions,\n    }\n\n\n@pytest.fixture\ndef directory_manager(\n    temp_base_path: Path, temp_templates: dict[str, Path]\n) -> DirectoryManager:\n    \"\"\"Create a DirectoryManager instance with temporary paths.\"\"\"\n    return DirectoryManager(\n        base_path=temp_base_path,\n        outputs_template_path=temp_templates[\"outputs\"],\n        venv_template_path=temp_templates[\"venv\"],\n        skills_path=temp_templates[\"skills\"],\n        agent_instructions_template_path=temp_templates[\"agent_instructions\"],\n    )\n\n\nclass TestSetupOpencodeConfig:\n    \"\"\"Tests for DirectoryManager.setup_opencode_config().\"\"\"\n\n    def test_openai_config_with_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that OpenAI provider includes reasoning configuration.\"\"\"\n        session_id = \"test_openai_session\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        assert config_path.exists()\n\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"openai/gpt-4o\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"openai\" in config[\"provider\"]\n        assert config[\"provider\"][\"openai\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n        # Verify OpenAI reasoning configuration in model config\n        assert \"models\" in config[\"provider\"][\"openai\"]\n        assert \"gpt-4o\" in config[\"provider\"][\"openai\"][\"models\"]\n        model_options = config[\"provider\"][\"openai\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n\n    def test_anthropic_config_with_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that Anthropic provider includes thinking configuration.\"\"\"\n        session_id = \"test_anthropic_session\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"anthropic\",\n            model_name=\"claude-sonnet-4-5\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        assert config_path.exists()\n\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"anthropic/claude-sonnet-4-5\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"anthropic\" in config[\"provider\"]\n        assert config[\"provider\"][\"anthropic\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n        # Verify Anthropic thinking configuration in model config\n        assert \"models\" in config[\"provider\"][\"anthropic\"]\n        assert \"claude-sonnet-4-5\" in config[\"provider\"][\"anthropic\"][\"models\"]\n        model_options = config[\"provider\"][\"anthropic\"][\"models\"][\"claude-sonnet-4-5\"][\n            \"options\"\n        ]\n        assert \"thinking\" in model_options\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n        assert model_options[\"thinking\"][\"budgetTokens\"] == 16000\n\n    def test_google_config_with_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that Google provider includes thinking configuration.\"\"\"\n        session_id = \"test_google_session\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"google\",\n            model_name=\"gemini-3-pro\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        assert config_path.exists()\n\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"google/gemini-3-pro\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"google\" in config[\"provider\"]\n        assert config[\"provider\"][\"google\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n        # Verify Google thinking configuration in model config\n        assert \"models\" in config[\"provider\"][\"google\"]\n        assert \"gemini-3-pro\" in config[\"provider\"][\"google\"][\"models\"]\n        model_options = config[\"provider\"][\"google\"][\"models\"][\"gemini-3-pro\"][\n            \"options\"\n        ]\n        assert model_options[\"thinking_budget\"] == 16000\n        assert model_options[\"thinking_level\"] == \"high\"\n\n    def test_bedrock_config_with_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that Bedrock provider includes thinking configuration.\"\"\"\n        session_id = \"test_bedrock_session\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"bedrock\",\n            model_name=\"anthropic.claude-v3-5-sonnet-20250219-v1:0\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        assert config_path.exists()\n\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"bedrock/anthropic.claude-v3-5-sonnet-20250219-v1:0\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"bedrock\" in config[\"provider\"]\n        assert config[\"provider\"][\"bedrock\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n        # Verify Bedrock thinking configuration in model config (same as Anthropic)\n        assert \"models\" in config[\"provider\"][\"bedrock\"]\n        model_name = \"anthropic.claude-v3-5-sonnet-20250219-v1:0\"\n        assert model_name in config[\"provider\"][\"bedrock\"][\"models\"]\n        model_options = config[\"provider\"][\"bedrock\"][\"models\"][model_name][\"options\"]\n        assert \"thinking\" in model_options\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n        assert model_options[\"thinking\"][\"budgetTokens\"] == 16000\n\n    def test_azure_config_with_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that Azure provider includes thinking configuration.\"\"\"\n        session_id = \"test_azure_session\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"azure\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        assert config_path.exists()\n\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"azure/gpt-4o\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"azure\" in config[\"provider\"]\n        assert config[\"provider\"][\"azure\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n        # Verify Azure reasoning configuration in model config (same as OpenAI)\n        assert \"models\" in config[\"provider\"][\"azure\"]\n        assert \"gpt-4o\" in config[\"provider\"][\"azure\"][\"models\"]\n        model_options = config[\"provider\"][\"azure\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n\n    def test_openai_config_with_api_base(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test OpenAI config with custom API base URL.\"\"\"\n        session_id = \"test_openai_api_base\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-api-key\",\n            api_base=\"https://custom.api.endpoint\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config = json.loads(config_path.read_text())\n\n        # Verify API base is included\n        assert config[\"provider\"][\"openai\"][\"api\"] == \"https://custom.api.endpoint\"\n\n        # Verify thinking config is still present in model options\n        assert \"models\" in config[\"provider\"][\"openai\"]\n        model_options = config[\"provider\"][\"openai\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n\n    def test_anthropic_config_with_api_base(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test Anthropic config with custom API base URL.\"\"\"\n        session_id = \"test_anthropic_api_base\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"anthropic\",\n            model_name=\"claude-sonnet-4-5\",\n            api_key=\"test-api-key\",\n            api_base=\"https://custom.anthropic.endpoint\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config = json.loads(config_path.read_text())\n\n        # Verify API base is included\n        assert (\n            config[\"provider\"][\"anthropic\"][\"api\"]\n            == \"https://custom.anthropic.endpoint\"\n        )\n\n        # Verify thinking config is still present in model options\n        assert \"models\" in config[\"provider\"][\"anthropic\"]\n        model_options = config[\"provider\"][\"anthropic\"][\"models\"][\"claude-sonnet-4-5\"][\n            \"options\"\n        ]\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n\n    def test_config_with_disabled_tools(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test config with disabled tools permissions.\"\"\"\n        session_id = \"test_disabled_tools\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-api-key\",\n            disabled_tools=[\"question\", \"webfetch\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config = json.loads(config_path.read_text())\n\n        # Verify disabled tools\n        assert \"permission\" in config\n        assert config[\"permission\"][\"question\"] == \"deny\"\n        assert config[\"permission\"][\"webfetch\"] == \"deny\"\n\n        # Verify default permissions are still present\n        assert config[\"permission\"][\"read\"] == \"allow\"\n        assert config[\"permission\"][\"write\"] == \"allow\"\n        assert config[\"permission\"][\"edit\"] == \"allow\"\n        assert config[\"permission\"][\"grep\"] == \"allow\"\n        assert \"bash\" in config[\"permission\"]\n        assert config[\"permission\"][\"bash\"][\"rm\"] == \"deny\"\n\n        # Verify thinking config is still present in model options\n        assert \"models\" in config[\"provider\"][\"openai\"]\n        model_options = config[\"provider\"][\"openai\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n\n    def test_config_without_api_key(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test config without API key still includes thinking settings.\"\"\"\n        session_id = \"test_no_api_key\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config = json.loads(config_path.read_text())\n\n        # Should still have provider config structure even without API key\n        assert \"provider\" in config\n        assert \"openai\" in config[\"provider\"]\n        # Should not have options (API key) without API key\n        assert \"options\" not in config[\"provider\"][\"openai\"]\n\n        # But should still have thinking config in model options\n        assert \"models\" in config[\"provider\"][\"openai\"]\n        assert \"gpt-4o\" in config[\"provider\"][\"openai\"][\"models\"]\n        model_options = config[\"provider\"][\"openai\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n\n    def test_other_provider_no_thinking(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that other providers (non OpenAI/Anthropic/Google/Bedrock/Azure) don't get thinking configuration.\"\"\"\n        session_id = \"test_other_provider\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"cohere\",\n            model_name=\"command-r-plus\",\n            api_key=\"test-api-key\",\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config = json.loads(config_path.read_text())\n\n        # Verify basic structure\n        assert config[\"model\"] == \"cohere/command-r-plus\"\n        assert \"$schema\" in config\n        assert \"provider\" in config\n        assert \"cohere\" in config[\"provider\"]\n\n        # Should not have model config (thinking) for other providers\n        assert \"models\" not in config[\"provider\"][\"cohere\"]\n\n    def test_config_overwritten_if_exists(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that existing opencode.json is overwritten with new config.\"\"\"\n        session_id = \"test_existing_config\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        # Create existing config\n        existing_config = {\"model\": \"existing/model\", \"custom\": \"value\"}\n        config_path = sandbox_path / \"opencode.json\"\n        config_path.write_text(json.dumps(existing_config, indent=2))\n\n        # Try to setup new config\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-api-key\",\n        )\n\n        # Verify config is overwritten with new config\n        config = json.loads(config_path.read_text())\n        assert config[\"model\"] == \"openai/gpt-4o\"\n        assert \"custom\" not in config  # Old config is replaced\n        assert config[\"provider\"][\"openai\"][\"options\"][\"apiKey\"] == \"test-api-key\"\n\n    def test_full_config_structure_openai(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test full OpenAI config structure matches expected format.\"\"\"\n        session_id = \"test_full_openai\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"openai\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-openai-key\",\n            api_base=\"https://api.openai.com/v1\",\n            disabled_tools=[\"webfetch\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config: dict[str, Any] = json.loads(config_path.read_text())\n\n        # Verify key parts of structure (permission has defaults now)\n        assert config[\"model\"] == \"openai/gpt-4o\"\n        assert config[\"$schema\"] == \"https://opencode.ai/config.json\"\n        assert config[\"provider\"][\"openai\"][\"options\"][\"apiKey\"] == \"test-openai-key\"\n        assert config[\"provider\"][\"openai\"][\"api\"] == \"https://api.openai.com/v1\"\n        assert \"models\" in config[\"provider\"][\"openai\"]\n        model_options = config[\"provider\"][\"openai\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n        assert config[\"permission\"][\"webfetch\"] == \"deny\"\n\n    def test_full_config_structure_anthropic(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test full Anthropic config structure matches expected format.\"\"\"\n        session_id = \"test_full_anthropic\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"anthropic\",\n            model_name=\"claude-sonnet-4-5\",\n            api_key=\"test-anthropic-key\",\n            api_base=\"https://api.anthropic.com\",\n            disabled_tools=[\"question\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config: dict[str, Any] = json.loads(config_path.read_text())\n\n        # Verify structure (permission has defaults now, so we check for overrides)\n        assert config[\"model\"] == \"anthropic/claude-sonnet-4-5\"\n        assert config[\"$schema\"] == \"https://opencode.ai/config.json\"\n        assert (\n            config[\"provider\"][\"anthropic\"][\"options\"][\"apiKey\"] == \"test-anthropic-key\"\n        )\n        assert config[\"provider\"][\"anthropic\"][\"api\"] == \"https://api.anthropic.com\"\n        assert \"models\" in config[\"provider\"][\"anthropic\"]\n        model_options = config[\"provider\"][\"anthropic\"][\"models\"][\"claude-sonnet-4-5\"][\n            \"options\"\n        ]\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n        assert model_options[\"thinking\"][\"budgetTokens\"] == 16000\n        assert config[\"permission\"][\"question\"] == \"deny\"\n\n    def test_full_config_structure_google(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test full Google config structure matches expected format.\"\"\"\n        session_id = \"test_full_google\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"google\",\n            model_name=\"gemini-3-pro\",\n            api_key=\"test-google-key\",\n            api_base=\"https://generativelanguage.googleapis.com\",\n            disabled_tools=[\"webfetch\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config: dict[str, Any] = json.loads(config_path.read_text())\n\n        # Verify structure\n        assert config[\"model\"] == \"google/gemini-3-pro\"\n        assert config[\"$schema\"] == \"https://opencode.ai/config.json\"\n        assert config[\"provider\"][\"google\"][\"options\"][\"apiKey\"] == \"test-google-key\"\n        assert (\n            config[\"provider\"][\"google\"][\"api\"]\n            == \"https://generativelanguage.googleapis.com\"\n        )\n        assert \"models\" in config[\"provider\"][\"google\"]\n        model_options = config[\"provider\"][\"google\"][\"models\"][\"gemini-3-pro\"][\n            \"options\"\n        ]\n        assert model_options[\"thinking_budget\"] == 16000\n        assert model_options[\"thinking_level\"] == \"high\"\n        assert config[\"permission\"][\"webfetch\"] == \"deny\"\n\n    def test_full_config_structure_bedrock(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test full Bedrock config structure matches expected format.\"\"\"\n        session_id = \"test_full_bedrock\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"bedrock\",\n            model_name=\"anthropic.claude-v3-5-sonnet-20250219-v1:0\",\n            api_key=\"test-bedrock-key\",\n            disabled_tools=[\"question\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config: dict[str, Any] = json.loads(config_path.read_text())\n\n        # Verify structure\n        assert config[\"model\"] == \"bedrock/anthropic.claude-v3-5-sonnet-20250219-v1:0\"\n        assert config[\"$schema\"] == \"https://opencode.ai/config.json\"\n        assert config[\"provider\"][\"bedrock\"][\"options\"][\"apiKey\"] == \"test-bedrock-key\"\n        model_name = \"anthropic.claude-v3-5-sonnet-20250219-v1:0\"\n        assert \"models\" in config[\"provider\"][\"bedrock\"]\n        model_options = config[\"provider\"][\"bedrock\"][\"models\"][model_name][\"options\"]\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n        assert model_options[\"thinking\"][\"budgetTokens\"] == 16000\n        assert config[\"permission\"][\"question\"] == \"deny\"\n\n    def test_full_config_structure_azure(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test full Azure config structure matches expected format.\"\"\"\n        session_id = \"test_full_azure\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"azure\",\n            model_name=\"gpt-4o\",\n            api_key=\"test-azure-key\",\n            api_base=\"https://myresource.openai.azure.com\",\n            disabled_tools=[\"bash\"],\n        )\n\n        config_path = sandbox_path / \"opencode.json\"\n        config: dict[str, Any] = json.loads(config_path.read_text())\n\n        # Verify structure\n        assert config[\"model\"] == \"azure/gpt-4o\"\n        assert config[\"$schema\"] == \"https://opencode.ai/config.json\"\n        assert config[\"provider\"][\"azure\"][\"options\"][\"apiKey\"] == \"test-azure-key\"\n        assert (\n            config[\"provider\"][\"azure\"][\"api\"] == \"https://myresource.openai.azure.com\"\n        )\n        assert \"models\" in config[\"provider\"][\"azure\"]\n        model_options = config[\"provider\"][\"azure\"][\"models\"][\"gpt-4o\"][\"options\"]\n        assert model_options[\"reasoningEffort\"] == \"high\"\n        assert config[\"permission\"][\"bash\"] == \"deny\"\n\n\nclass TestSandboxDirectoryStructure:\n    \"\"\"Tests for complete sandbox directory setup.\"\"\"\n\n    def test_create_complete_sandbox(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test creating a complete sandbox with all components including opencode.json.\"\"\"\n        session_id = \"test_complete_sandbox\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n\n        # Setup all components\n        directory_manager.setup_outputs_directory(sandbox_path)\n        directory_manager.setup_venv(sandbox_path)\n        directory_manager.setup_agent_instructions(sandbox_path)\n        directory_manager.setup_skills(sandbox_path)\n        directory_manager.setup_attachments_directory(sandbox_path)\n        directory_manager.setup_opencode_config(\n            sandbox_path=sandbox_path,\n            provider=\"anthropic\",\n            model_name=\"claude-sonnet-4-5\",\n            api_key=\"test-key\",\n        )\n\n        # Verify all components exist\n        assert (sandbox_path / \"outputs\").exists()\n        assert (sandbox_path / \".venv\").exists()\n        assert (sandbox_path / \"AGENTS.md\").exists()\n        assert (sandbox_path / \".opencode\" / \"skills\").exists()\n        assert (sandbox_path / \"attachments\").exists()\n        assert (sandbox_path / \"opencode.json\").exists()\n\n        # Verify opencode.json has thinking config\n        config = json.loads((sandbox_path / \"opencode.json\").read_text())\n        model_options = config[\"provider\"][\"anthropic\"][\"models\"][\"claude-sonnet-4-5\"][\n            \"options\"\n        ]\n        assert model_options[\"thinking\"][\"type\"] == \"enabled\"\n\n    def test_setup_skills_copies_and_overwrites(\n        self,\n        directory_manager: DirectoryManager,\n        temp_base_path: Path,  # noqa: ARG002\n        temp_templates: dict[str, Path],\n    ) -> None:\n        \"\"\"Test that setup_skills copies skills and overwrites existing ones.\"\"\"\n        session_id = \"test_skills_setup\"\n        sandbox_path = directory_manager.create_sandbox_directory(session_id)\n        skills_dest = sandbox_path / \".opencode\" / \"skills\"\n\n        # Create a test skill in the source directory\n        test_skill_dir = temp_templates[\"skills\"] / \"test-skill\"\n        test_skill_dir.mkdir()\n        test_skill_file = test_skill_dir / \"SKILL.md\"\n        test_skill_file.write_text(\"# Test Skill\\nOriginal content\")\n\n        # First call - should copy skills\n        directory_manager.setup_skills(sandbox_path)\n        assert skills_dest.exists()\n        assert (skills_dest / \"test-skill\" / \"SKILL.md\").exists()\n        assert (\n            skills_dest / \"test-skill\" / \"SKILL.md\"\n        ).read_text() == \"# Test Skill\\nOriginal content\"\n\n        # Update the source skill\n        test_skill_file.write_text(\"# Test Skill\\nUpdated content\")\n\n        # Second call - should overwrite existing skills\n        directory_manager.setup_skills(sandbox_path)\n        assert skills_dest.exists()\n        assert (skills_dest / \"test-skill\" / \"SKILL.md\").exists()\n        assert (\n            skills_dest / \"test-skill\" / \"SKILL.md\"\n        ).read_text() == \"# Test Skill\\nUpdated content\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/models.py",
    "content": "\"\"\"Pydantic models for sandbox module communication.\"\"\"\n\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.enums import SandboxStatus\n\n\nclass LLMProviderConfig(BaseModel):\n    \"\"\"LLM provider configuration for sandbox provisioning.\n\n    Passed to SandboxManager.provision() to configure the LLM.\n    \"\"\"\n\n    provider: str\n    model_name: str\n    api_key: str | None\n    api_base: str | None\n\n\nclass SandboxInfo(BaseModel):\n    \"\"\"Information about a sandbox instance.\n\n    Returned by SandboxManager.provision() and other methods.\n    \"\"\"\n\n    sandbox_id: UUID\n    directory_path: str\n    status: SandboxStatus\n    last_heartbeat: datetime | None\n\n\nclass SnapshotResult(BaseModel):\n    \"\"\"Result of creating a snapshot (without DB record).\n\n    Returned by SandboxManager.create_snapshot().\n    The caller is responsible for creating the DB record.\n    \"\"\"\n\n    storage_path: str\n    size_bytes: int\n\n\nclass SnapshotInfo(BaseModel):\n    \"\"\"Full information about a sandbox snapshot (including DB info).\n\n    Used when returning snapshot information to API callers.\n    \"\"\"\n\n    id: str\n    sandbox_id: str\n    storage_path: str\n    created_at: datetime\n    size_bytes: int\n\n\nclass FilesystemEntry(BaseModel):\n    \"\"\"Represents a file or directory entry in the sandbox filesystem.\n\n    Used for directory listing operations. This is the canonical model used\n    by both sandbox managers and the API layer.\n    \"\"\"\n\n    name: str\n    path: str\n    is_directory: bool\n    size: int | None = None  # File size in bytes (None for directories)\n    mime_type: str | None = None  # MIME type (None for directories)\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/tasks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/tasks/tasks.py",
    "content": "\"\"\"Celery tasks for sandbox operations (cleanup, file sync, etc.).\"\"\"\n\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import TYPE_CHECKING\nfrom uuid import UUID\n\nfrom celery import shared_task\nfrom celery import Task\nfrom redis.lock import Lock as RedisLock\n\nif TYPE_CHECKING:\n    from sqlalchemy.orm import Session\n\nfrom onyx.background.celery.apps.app_base import task_logger\nfrom onyx.configs.constants import CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SANDBOX_IDLE_TIMEOUT_SECONDS\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.configs import USER_LIBRARY_SOURCE_DIR\nfrom onyx.server.features.build.db.build_session import clear_nextjs_ports_for_user\nfrom onyx.server.features.build.db.build_session import (\n    mark_user_sessions_idle__no_commit,\n)\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_user_id\nfrom onyx.server.features.build.sandbox.base import get_sandbox_manager\nfrom onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (\n    KubernetesSandboxManager,\n)\n\n\n# Snapshot retention period in days\nSNAPSHOT_RETENTION_DAYS = 30\n\n# 100 minutes - snapshotting can take time\nTIMEOUT_SECONDS = 6000\n\n\n@shared_task(\n    name=OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,\n    soft_time_limit=TIMEOUT_SECONDS,\n    bind=True,\n    ignore_result=True,\n)\ndef cleanup_idle_sandboxes_task(self: Task, *, tenant_id: str) -> None:  # noqa: ARG001\n    \"\"\"Put idle sandboxes to sleep after snapshotting all sessions.\n\n    This task:\n    1. Finds sandboxes that have been idle longer than SANDBOX_IDLE_TIMEOUT_SECONDS\n    2. Lists all session directories in the pod's /workspace/sessions/\n    3. Creates a snapshot of each session's outputs to S3\n    4. Terminates the pod (but keeps the sandbox record)\n    5. Marks the sandbox as SLEEPING (can be restored later)\n\n    NOTE: This task is a no-op for local backend - sandboxes persist until\n    manually terminated or server restart.\n\n    Args:\n        tenant_id: The tenant ID for multi-tenant isolation\n    \"\"\"\n    # Skip cleanup for local backend - sandboxes persist until manual termination\n    if SANDBOX_BACKEND == SandboxBackend.LOCAL:\n        task_logger.debug(\n            \"cleanup_idle_sandboxes_task skipped (local backend - cleanup disabled)\"\n        )\n        return\n\n    task_logger.info(f\"cleanup_idle_sandboxes_task starting for tenant {tenant_id}\")\n\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock: RedisLock = redis_client.lock(\n        OnyxRedisLocks.CLEANUP_IDLE_SANDBOXES_BEAT_LOCK,\n        timeout=TIMEOUT_SECONDS,\n    )\n\n    # Prevent overlapping runs of this task\n    if not lock.acquire(blocking=False):\n        task_logger.info(\"cleanup_idle_sandboxes_task - lock not acquired, skipping\")\n        return\n\n    try:\n        # Import here to avoid circular imports\n        from onyx.db.enums import SandboxStatus\n        from onyx.server.features.build.db.sandbox import create_snapshot__no_commit\n        from onyx.server.features.build.db.sandbox import get_idle_sandboxes\n        from onyx.server.features.build.db.sandbox import (\n            update_sandbox_status__no_commit,\n        )\n\n        sandbox_manager = get_sandbox_manager()\n\n        # Type guard for kubernetes-specific methods\n        if not isinstance(sandbox_manager, KubernetesSandboxManager):\n            task_logger.debug(\n                \"cleanup_idle_sandboxes_task skipped (not kubernetes backend)\"\n            )\n            return\n\n        with get_session_with_current_tenant() as db_session:\n            idle_sandboxes = get_idle_sandboxes(\n                db_session, SANDBOX_IDLE_TIMEOUT_SECONDS\n            )\n\n            if not idle_sandboxes:\n                task_logger.debug(\"No idle sandboxes found\")\n                return\n\n            task_logger.info(\n                f\"Found {len(idle_sandboxes)} idle sandboxes to put to sleep\"\n            )\n\n            for sandbox in idle_sandboxes:\n                sandbox_id = sandbox.id\n                sandbox_id_str = str(sandbox_id)\n                task_logger.info(f\"Putting sandbox {sandbox_id_str} to sleep\")\n\n                try:\n                    # List session directories in the pod\n                    session_ids = _list_session_directories(sandbox_manager, sandbox_id)\n                    task_logger.info(\n                        f\"Found {len(session_ids)} sessions in sandbox {sandbox_id_str}\"\n                    )\n\n                    # Snapshot each session\n                    for session_id_str in session_ids:\n                        try:\n                            session_id = UUID(session_id_str)\n                            task_logger.debug(\n                                f\"Creating snapshot for session {session_id_str}\"\n                            )\n                            snapshot_result = sandbox_manager.create_snapshot(\n                                sandbox_id, session_id, tenant_id\n                            )\n                            if snapshot_result:\n                                # Create DB record for the snapshot\n                                create_snapshot__no_commit(\n                                    db_session,\n                                    session_id,\n                                    snapshot_result.storage_path,\n                                    snapshot_result.size_bytes,\n                                )\n                                task_logger.debug(\n                                    f\"Snapshot created for session {session_id_str}\"\n                                )\n                        except Exception as e:\n                            task_logger.warning(\n                                f\"Failed to create snapshot for session {session_id_str}: {e}\"\n                            )\n                            # Continue with other sessions even if one fails\n\n                    # Terminate the pod (but keep sandbox record)\n                    sandbox_manager.terminate(sandbox_id)\n\n                    # Zero out nextjs ports for all sessions (ports are no longer in use)\n                    cleared = clear_nextjs_ports_for_user(db_session, sandbox.user_id)\n                    task_logger.debug(\n                        f\"Cleared {cleared} nextjs_port allocations for user {sandbox.user_id}\"\n                    )\n\n                    # Mark all active sessions as IDLE\n                    idled = mark_user_sessions_idle__no_commit(\n                        db_session, sandbox.user_id\n                    )\n                    task_logger.debug(\n                        f\"Marked {idled} sessions as IDLE for user {sandbox.user_id}\"\n                    )\n\n                    update_sandbox_status__no_commit(\n                        db_session, sandbox_id, SandboxStatus.SLEEPING\n                    )\n                    db_session.commit()\n                    task_logger.info(f\"Sandbox {sandbox_id_str} is now sleeping\")\n\n                except Exception as e:\n                    task_logger.error(\n                        f\"Failed to put sandbox {sandbox_id_str} to sleep: {e}\",\n                        exc_info=True,\n                    )\n                    db_session.rollback()\n\n    except Exception:\n        task_logger.exception(\"Error in cleanup_idle_sandboxes_task\")\n        raise\n\n    finally:\n        if lock.owned():\n            lock.release()\n\n    task_logger.info(\"cleanup_idle_sandboxes_task completed\")\n\n\ndef _list_session_directories(\n    sandbox_manager: KubernetesSandboxManager,\n    sandbox_id: UUID,\n) -> list[str]:\n    \"\"\"List session directory names in the pod's /workspace/sessions/.\n\n    Args:\n        sandbox_manager: The kubernetes sandbox manager\n        sandbox_id: The sandbox ID\n\n    Returns:\n        List of session ID strings (directory names)\n    \"\"\"\n    from kubernetes.client.rest import ApiException  # type: ignore\n    from kubernetes.stream import stream as k8s_stream  # type: ignore\n\n    pod_name = sandbox_manager._get_pod_name(str(sandbox_id))\n\n    # List directories in /workspace/sessions/\n    exec_command = [\n        \"/bin/sh\",\n        \"-c\",\n        'ls -1 /workspace/sessions/ 2>/dev/null || echo \"\"',\n    ]\n\n    try:\n        resp = k8s_stream(\n            sandbox_manager._core_api.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=sandbox_manager._namespace,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n\n        # Parse output - one directory name per line\n        session_ids = []\n        for line in resp.strip().split(\"\\n\"):\n            line = line.strip()\n            if line:\n                # Validate it looks like a UUID\n                try:\n                    UUID(line)\n                    session_ids.append(line)\n                except ValueError:\n                    # Not a valid UUID, skip\n                    pass\n\n        return session_ids\n\n    except ApiException as e:\n        task_logger.warning(f\"Failed to list session directories: {e}\")\n        return []\n\n\n@contextmanager\ndef _acquire_sandbox_file_sync_lock(lock: RedisLock) -> Iterator[bool]:\n    \"\"\"Acquire the sandbox file-sync lock with blocking timeout; release on exit.\"\"\"\n    acquired = lock.acquire(\n        blocking_timeout=CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT,\n    )\n    try:\n        yield acquired\n    finally:\n        if lock.owned():\n            lock.release()\n\n\ndef _get_disabled_user_library_paths(db_session: \"Session\", user_id: str) -> list[str]:\n    \"\"\"Get list of disabled user library file paths for exclusion during sync.\n\n    Queries the document table for CRAFT_FILE documents with sync_disabled=True\n    and returns their relative paths within user_library/.\n\n    Args:\n        db_session: Database session\n        user_id: The user ID to filter documents\n\n    Returns:\n        List of relative file paths to exclude (e.g., [\"/data/file.xlsx\", \"/old/report.pdf\"])\n    \"\"\"\n    from uuid import UUID\n\n    from onyx.configs.constants import DocumentSource\n    from onyx.db.document import get_documents_by_source\n\n    disabled_paths: list[str] = []\n\n    # Get CRAFT_FILE documents for this user (filtered at SQL level)\n    documents = get_documents_by_source(\n        db_session=db_session,\n        source=DocumentSource.CRAFT_FILE,\n        creator_id=UUID(user_id),\n    )\n\n    for doc in documents:\n        doc_metadata = doc.doc_metadata or {}\n        if not doc_metadata.get(\"sync_disabled\"):\n            continue\n\n        # Extract file path from semantic_id\n        # semantic_id format: \"user_library/path/to/file.xlsx\"\n        # Include both files AND directories - the shell script in\n        # setup_session_workspace() handles directory exclusion by\n        # checking if paths are children of an excluded directory.\n        semantic_id = doc.semantic_id or \"\"\n        if semantic_id.startswith(USER_LIBRARY_SOURCE_DIR):\n            file_path = semantic_id[len(USER_LIBRARY_SOURCE_DIR) :]\n            if file_path:\n                disabled_paths.append(file_path)\n\n    return disabled_paths\n\n\n@shared_task(\n    name=OnyxCeleryTask.SANDBOX_FILE_SYNC,\n    soft_time_limit=TIMEOUT_SECONDS,\n    bind=True,\n    ignore_result=True,\n)\ndef sync_sandbox_files(\n    self: Task,  # noqa: ARG001\n    *,\n    user_id: str,\n    tenant_id: str,\n    source: str | None = None,\n) -> bool:\n    \"\"\"Sync files from S3 to a user's running sandbox.\n\n    This task is triggered after documents are written to S3 during indexing.\n    It executes `s5cmd sync` in the file-sync sidecar container to download\n    any new or changed files.\n\n    Per-user locking ensures only one sync runs at a time for a given user.\n    If a sync is already in progress, this task will wait until it completes.\n\n    Note: File visibility in sessions is controlled via filtered symlinks in\n    setup_session_workspace(), not at the sync level. The sync mirrors S3\n    faithfully; disabled files are excluded only when creating new sessions.\n\n    Args:\n        user_id: The user ID whose sandbox should be synced\n        tenant_id: The tenant ID for S3 path construction\n        source: Optional source type (e.g., \"gmail\", \"google_drive\", \"user_library\").\n                If None, syncs all sources.\n\n    Returns:\n        True if sync was successful, False if skipped or failed\n    \"\"\"\n    source_info = f\" source={source}\" if source else \" (all sources)\"\n    task_logger.info(\n        f\"sync_sandbox_files starting for user {user_id} in tenant {tenant_id}{source_info}\"\n    )\n\n    lock_timeout = CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    lock = redis_client.lock(\n        f\"{OnyxRedisLocks.SANDBOX_FILE_SYNC_LOCK_PREFIX}:{user_id}\",\n        timeout=lock_timeout,\n    )\n\n    with _acquire_sandbox_file_sync_lock(lock) as acquired:\n        if not acquired:\n            task_logger.warning(\n                f\"sync_sandbox_files - failed to acquire lock for user {user_id} after {lock_timeout}s, skipping\"\n            )\n            return False\n\n        with get_session_with_current_tenant() as db_session:\n            sandbox = get_sandbox_by_user_id(db_session, UUID(user_id))\n            if sandbox is None:\n                task_logger.debug(f\"No sandbox found for user {user_id}, skipping sync\")\n                return False\n            if sandbox.status != SandboxStatus.RUNNING:\n                task_logger.debug(\n                    f\"Sandbox {sandbox.id} not running (status={sandbox.status}), skipping sync\"\n                )\n                return False\n\n            sandbox_manager = get_sandbox_manager()\n            result = sandbox_manager.sync_files(\n                sandbox_id=sandbox.id,\n                user_id=UUID(user_id),\n                tenant_id=tenant_id,\n                source=source,\n            )\n            if result:\n                task_logger.info(f\"File sync completed for user {user_id}{source_info}\")\n            else:\n                task_logger.warning(f\"File sync failed for user {user_id}{source_info}\")\n            return result\n\n\n# NOTE: in the future, may need to add this. For now, will do manual cleanup.\n# @shared_task(\n#     name=OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,\n#     soft_time_limit=300,\n#     bind=True,\n#     ignore_result=True,\n# )\n# def cleanup_old_snapshots_task(self: Task, *, tenant_id: str) -> None:\n#     \"\"\"Delete snapshots older than the retention period.\n\n#     This task cleans up old snapshots to manage storage usage.\n#     Snapshots older than SNAPSHOT_RETENTION_DAYS are deleted.\n\n#     NOTE: This task is a no-op for local backend since snapshots are disabled.\n\n#     Args:\n#         tenant_id: The tenant ID for multi-tenant isolation\n#     \"\"\"\n#     # Skip for local backend - no snapshots to clean up\n#     if SANDBOX_BACKEND == SandboxBackend.LOCAL:\n#         task_logger.debug(\n#             \"cleanup_old_snapshots_task skipped (local backend - snapshots disabled)\"\n#         )\n#         return\n\n#     task_logger.info(f\"cleanup_old_snapshots_task starting for tenant {tenant_id}\")\n\n#     redis_client = get_redis_client(tenant_id=tenant_id)\n#     lock: RedisLock = redis_client.lock(\n#         OnyxRedisLocks.CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK,\n#         timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,\n#     )\n\n#     # Prevent overlapping runs of this task\n#     if not lock.acquire(blocking=False):\n#         task_logger.debug(\"cleanup_old_snapshots_task - lock not acquired, skipping\")\n#         return\n\n#     try:\n#         from onyx.server.features.build.db.sandbox import delete_old_snapshots\n\n#         with get_session_with_current_tenant() as db_session:\n#             deleted_count = delete_old_snapshots(\n#                 db_session, tenant_id, SNAPSHOT_RETENTION_DAYS\n#             )\n\n#             if deleted_count > 0:\n#                 task_logger.info(\n#                     f\"Deleted {deleted_count} old snapshots for tenant {tenant_id}\"\n#                 )\n#             else:\n#                 task_logger.debug(\"No old snapshots to delete\")\n\n#     except Exception:\n#         task_logger.exception(\"Error in cleanup_old_snapshots_task\")\n#         raise\n\n#     finally:\n#         if lock.owned():\n#             lock.release()\n\n#     task_logger.info(\"cleanup_old_snapshots_task completed\")\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/util/__init__.py",
    "content": "\"\"\"Template and configuration utilities for sandbox environments.\n\nContains utilities for:\n- Building sandbox templates (Next.js, venv)\n- Generating agent instructions (AGENTS.md)\n- Generating opencode configuration\n\"\"\"\n\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    build_knowledge_sources_section,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    build_skills_section,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    build_user_context,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    extract_skill_description,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    generate_agent_instructions,\n)\nfrom onyx.server.features.build.sandbox.util.agent_instructions import (\n    get_provider_display_name,\n)\nfrom onyx.server.features.build.sandbox.util.opencode_config import (\n    build_opencode_config,\n)\n\n__all__ = [\n    \"build_knowledge_sources_section\",\n    \"build_opencode_config\",\n    \"build_skills_section\",\n    \"build_user_context\",\n    \"extract_skill_description\",\n    \"generate_agent_instructions\",\n    \"get_provider_display_name\",\n]\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/util/agent_instructions.py",
    "content": "\"\"\"Shared utilities for generating AGENTS.md content.\n\nThis module provides functions for building dynamic agent instructions\nthat are shared between local and kubernetes sandbox managers.\n\"\"\"\n\nimport threading\nfrom pathlib import Path\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Cache for skills section (skills are static, cached indefinitely)\n_skills_cache: dict[str, str] = {}\n_skills_cache_lock = threading.Lock()\n\n# Provider display name mapping\nPROVIDER_DISPLAY_NAMES = {\n    \"openai\": \"OpenAI\",\n    \"anthropic\": \"Anthropic\",\n    \"azure\": \"Azure OpenAI\",\n    \"google\": \"Google AI\",\n    \"bedrock\": \"AWS Bedrock\",\n    \"vertex\": \"Google Vertex AI\",\n}\n\n# Type alias for connector info entries\nConnectorInfoEntry = dict[str, str | int]\n\n# Connector information for generating knowledge sources section\n# Keys are normalized (lowercase, underscores) directory names\n# Each entry has: summary (with optional {subdirs}), file_pattern, scan_depth\n# NOTE: This is duplicated in kubernetes/docker/generate_agents_md.py to avoid circular imports\nCONNECTOR_INFO: dict[str, ConnectorInfoEntry] = {\n    \"google_drive\": {\n        \"summary\": \"Documents and files from Google Drive. This may contain information about a user and work they have done\",\n        \"file_pattern\": \"`FILE_NAME.json`\",\n        \"scan_depth\": 0,\n    },\n    \"gmail\": {\n        \"summary\": \"Email conversations and threads\",\n        \"file_pattern\": \"`FILE_NAME.json`\",\n        \"scan_depth\": 0,\n    },\n    \"linear\": {\n        \"summary\": \"Engineering tickets from teams: {subdirs}\",\n        \"file_pattern\": \"`[TEAM]/[TICKET_ID]_TICKET_TITLE.json`\",\n        \"scan_depth\": 2,\n    },\n    \"slack\": {\n        \"summary\": \"Team messages from channels: {subdirs}\",\n        \"file_pattern\": \"`[CHANNEL]/[AUTHOR]_in_[CHANNEL]__[MSG].json`\",\n        \"scan_depth\": 1,\n    },\n    \"github\": {\n        \"summary\": \"Pull requests and code from: {subdirs}\",\n        \"file_pattern\": \"`[ORG]/[REPO]/pull_requests/[PR_NUMBER]__[PR_TITLE].json`\",\n        \"scan_depth\": 2,\n    },\n    \"fireflies\": {\n        \"summary\": \"Meeting transcripts from: {subdirs}\",\n        \"file_pattern\": \"`[YYYY-MM]/CALL_TITLE.json`\",\n        \"scan_depth\": 1,\n    },\n    \"hubspot\": {\n        \"summary\": \"CRM data including: {subdirs}\",\n        \"file_pattern\": \"`[TYPE]/[RECORD_NAME].json`\",\n        \"scan_depth\": 1,\n    },\n    \"notion\": {\n        \"summary\": \"Documentation and notes: {subdirs}\",\n        \"file_pattern\": \"`PAGE_TITLE.json`\",\n        \"scan_depth\": 1,\n    },\n    \"user_library\": {\n        \"summary\": \"User-uploaded files (spreadsheets, documents, presentations, etc.)\",\n        \"file_pattern\": \"Any file format\",\n        \"scan_depth\": 1,\n    },\n}\nDEFAULT_SCAN_DEPTH = 1\n\n\ndef get_provider_display_name(provider: str | None) -> str | None:\n    \"\"\"Get user-friendly display name for LLM provider.\n\n    Args:\n        provider: Internal provider name\n\n    Returns:\n        User-friendly display name, or None if provider is None\n    \"\"\"\n    if not provider:\n        return None\n\n    return PROVIDER_DISPLAY_NAMES.get(provider, provider.title())\n\n\ndef build_user_context(user_name: str | None, user_role: str | None) -> str:\n    \"\"\"Build the user context section for AGENTS.md.\n\n    Args:\n        user_name: User's name\n        user_role: User's role/title\n\n    Returns:\n        Formatted user context string\n    \"\"\"\n    if not user_name:\n        return \"\"\n\n    if user_role:\n        return f\"You are assisting **{user_name}**, {user_role}, with their work.\"\n    return f\"You are assisting **{user_name}** with their work.\"\n\n\n# Content for the org_info section when demo data is enabled\nORG_INFO_SECTION_CONTENT = \"\"\"## Organization Info\n\nThe `org_info/` directory contains information about the organization and user context:\n\n- `AGENTS.md`: Description of available organizational information files\n- `user_identity_profile.txt`: Contains the current user's name, email, and organization\n  they work for. Use this information when personalizing outputs or when the user asks\n  about their identity.\n- `organization_structure.json`: Contains a JSON representation of the organization's\n  groups, managers, and their direct reports. Use this to understand reporting\n  relationships and team structures.\"\"\"\n\n\n# Content for the attachments section when user has uploaded files\nATTACHMENTS_SECTION_CONTENT = \"\"\"## Attachments (PRIORITY)\n\nThe `attachments/` directory contains files that the user has explicitly\nuploaded during this session. **These files are critically important** and\nshould be treated as high-priority context.\n\n### Why Attachments Matter\n\n- The user deliberately chose to upload these files, signaling they are directly relevant to the task\n- These files often contain the specific data, requirements, or examples the user wants you to work with\n- They may include spreadsheets, documents, images, or code that should inform your work\n\n### Required Actions\n\n**At the start of every task, you MUST:**\n\n1. **Check for attachments**: List the contents of `attachments/` to see what the user has provided\n2. **Read and analyze each file**: Thoroughly examine every attachment to understand its contents and relevance\n3. **Reference attachment content**: Use the information from attachments to inform your responses and outputs\n\n### File Handling\n\n- Uploaded files may be in various formats: CSV, JSON, PDF, images, text files, etc.\n- For spreadsheets and data files, examine the structure, columns, and sample data\n- For documents, extract key information and requirements\n- For images, analyze and describe their content\n- For code files, understand the logic and patterns\n\n**Do NOT ignore user uploaded files.** They are there for a reason and likely\ncontain exactly what you need to complete the task successfully.\"\"\"\n\n\ndef build_org_info_section(include_org_info: bool) -> str:\n    \"\"\"Build the organization info section for AGENTS.md.\n\n    Only includes the org_info section when demo data is enabled,\n    since the org_info/ directory is only set up in that case.\n\n    Args:\n        include_org_info: Whether to include the org_info section\n\n    Returns:\n        Formatted org info section string, or empty string if not included\n    \"\"\"\n    if include_org_info:\n        return ORG_INFO_SECTION_CONTENT\n    return \"\"\n\n\ndef extract_skill_description(skill_md_path: Path) -> str:\n    \"\"\"Extract a brief description from a SKILL.md file.\n\n    If the file has YAML frontmatter (delimited by ---), uses the\n    ``description`` field. Otherwise falls back to the first paragraph.\n\n    Args:\n        skill_md_path: Path to the SKILL.md file\n\n    Returns:\n        Brief description (truncated to ~120 chars)\n    \"\"\"\n    try:\n        content = skill_md_path.read_text()\n        lines = content.strip().split(\"\\n\")\n\n        # Try YAML frontmatter first\n        if lines and lines[0].strip() == \"---\":\n            for line in lines[1:]:\n                if line.strip() == \"---\":\n                    break\n                if line.startswith(\"description:\"):\n                    desc = line.split(\":\", 1)[1].strip().strip('\"').strip(\"'\")\n                    if desc:\n                        if len(desc) > 120:\n                            desc = desc[:117] + \"...\"\n                        return desc\n\n        # Fallback: first non-heading paragraph after frontmatter\n        in_frontmatter = lines[0].strip() == \"---\" if lines else False\n        description_lines: list[str] = []\n        for line in lines[1:] if in_frontmatter else lines:\n            stripped = line.strip()\n            # Skip until end of frontmatter\n            if in_frontmatter:\n                if stripped == \"---\":\n                    in_frontmatter = False\n                continue\n            if not stripped:\n                if description_lines:\n                    break\n                continue\n            if stripped.startswith(\"#\"):\n                continue\n            description_lines.append(stripped)\n            if len(\" \".join(description_lines)) > 100:\n                break\n\n        description = \" \".join(description_lines)\n        if len(description) > 120:\n            description = description[:117] + \"...\"\n        return description or \"No description available.\"\n    except Exception:\n        return \"No description available.\"\n\n\ndef _scan_skills_directory(skills_path: Path) -> str:\n    \"\"\"Internal function to scan skills directory (not cached).\n\n    Args:\n        skills_path: Path to the skills directory\n\n    Returns:\n        Formatted skills section string\n    \"\"\"\n    skills_list: list[str] = []\n    try:\n        for skill_dir in sorted(skills_path.iterdir()):\n            if not skill_dir.is_dir():\n                continue\n\n            skill_md = skill_dir / \"SKILL.md\"\n            if skill_md.exists():\n                description = extract_skill_description(skill_md)\n                skills_list.append(f\"- **{skill_dir.name}**: {description}\")\n    except Exception as e:\n        logger.warning(f\"Error scanning skills directory: {e}\")\n        return \"Error loading skills.\"\n\n    if not skills_list:\n        return \"No skills available.\"\n\n    return \"\\n\".join(skills_list)\n\n\ndef build_skills_section(skills_path: Path) -> str:\n    \"\"\"Build the available skills section by scanning the skills directory.\n\n    Skills are static, so results are cached indefinitely for performance.\n\n    Args:\n        skills_path: Path to the skills directory\n\n    Returns:\n        Formatted skills section string\n    \"\"\"\n    if not skills_path.exists():\n        return \"No skills available.\"\n\n    cache_key = str(skills_path)\n\n    # Check cache first (skills are static, no TTL needed)\n    with _skills_cache_lock:\n        cached = _skills_cache.get(cache_key)\n        if cached is not None:\n            return cached\n\n    # Cache miss - scan the directory\n    result = _scan_skills_directory(skills_path)\n\n    # Update cache\n    with _skills_cache_lock:\n        _skills_cache[cache_key] = result\n\n    return result\n\n\ndef _normalize_connector_name(name: str) -> str:\n    \"\"\"Normalize a connector directory name for lookup.\"\"\"\n    return name.lower().replace(\" \", \"_\").replace(\"-\", \"_\")\n\n\ndef _scan_directory_to_depth(\n    directory: Path, current_depth: int, max_depth: int, indent: str = \"  \"\n) -> list[str]:\n    \"\"\"Recursively scan directory up to max_depth levels.\n\n    Args:\n        directory: Directory to scan\n        current_depth: Current depth level (0 = connector root)\n        max_depth: Maximum depth to scan\n        indent: Indentation string for current level\n\n    Returns:\n        List of formatted directory lines\n    \"\"\"\n    if current_depth >= max_depth:\n        return []\n\n    lines: list[str] = []\n    try:\n        subdirs = sorted(\n            d for d in directory.iterdir() if d.is_dir() and not d.name.startswith(\".\")\n        )\n\n        for subdir in subdirs[:10]:  # Limit to 10 per level\n            lines.append(f\"{indent}- {subdir.name}/\")\n\n            # Recurse if we haven't hit max depth\n            if current_depth + 1 < max_depth:\n                nested = _scan_directory_to_depth(\n                    subdir, current_depth + 1, max_depth, indent + \"  \"\n                )\n                lines.extend(nested)\n\n        if len(subdirs) > 10:\n            lines.append(f\"{indent}- ... and {len(subdirs) - 10} more\")\n    except Exception:\n        pass\n\n    return lines\n\n\ndef build_knowledge_sources_section(files_path: Path) -> str:\n    \"\"\"Build combined knowledge sources section with summary, structure, and file patterns.\n\n    This creates a single section per connector that includes:\n    - What kind of data it contains (with actual subdirectory names)\n    - The directory structure\n    - The file naming pattern\n\n    Args:\n        files_path: Path to the files directory (symlink to knowledge sources)\n\n    Returns:\n        Formatted knowledge sources section\n    \"\"\"\n    if not files_path.exists():\n        return \"No knowledge sources available.\"\n\n    # Resolve the symlink to get the actual path\n    try:\n        actual_path = files_path.resolve()\n        if not actual_path.exists():\n            return \"No knowledge sources available.\"\n    except Exception:\n        actual_path = files_path\n\n    sections: list[str] = []\n    try:\n        for item in sorted(files_path.iterdir()):\n            if not item.is_dir() or item.name.startswith(\".\"):\n                continue\n\n            normalized = _normalize_connector_name(item.name)\n            info = CONNECTOR_INFO.get(normalized, {})\n\n            # Get subdirectory names\n            subdirs: list[str] = []\n            try:\n                subdirs = sorted(\n                    d.name\n                    for d in item.iterdir()\n                    if d.is_dir() and not d.name.startswith(\".\")\n                )[:5]\n            except Exception:\n                pass\n\n            # Build summary with subdirs\n            summary_template = str(info.get(\"summary\", f\"Data from {item.name}\"))\n            if \"{subdirs}\" in summary_template and subdirs:\n                subdir_str = \", \".join(subdirs)\n                if len(subdirs) == 5:\n                    subdir_str += \", ...\"\n                summary = summary_template.format(subdirs=subdir_str)\n            elif \"{subdirs}\" in summary_template:\n                summary = summary_template.replace(\": {subdirs}\", \"\").replace(\n                    \" {subdirs}\", \"\"\n                )\n            else:\n                summary = summary_template\n\n            # Build connector section\n            file_pattern = str(info.get(\"file_pattern\", \"\"))\n            scan_depth = int(info.get(\"scan_depth\", DEFAULT_SCAN_DEPTH))\n\n            lines = [f\"### {item.name}/\"]\n            lines.append(f\"{summary}.\\n\")\n            # Add directory structure if depth > 0\n            if scan_depth > 0:\n                lines.append(\"Directory structure:\\n\")\n                nested = _scan_directory_to_depth(item, 0, scan_depth, \"\")\n                if nested:\n                    lines.append(\"\")\n                    lines.extend(nested)\n\n            lines.append(f\"\\nFile format: {file_pattern}\")\n\n            sections.append(\"\\n\".join(lines))\n    except Exception as e:\n        logger.warning(f\"Error building knowledge sources section: {e}\")\n        return \"Error scanning knowledge sources.\"\n\n    if not sections:\n        return \"No knowledge sources available.\"\n\n    return \"\\n\\n\".join(sections)\n\n\ndef generate_agent_instructions(\n    template_path: Path,\n    skills_path: Path,\n    files_path: Path | None = None,\n    provider: str | None = None,\n    model_name: str | None = None,\n    nextjs_port: int | None = None,\n    disabled_tools: list[str] | None = None,\n    user_name: str | None = None,\n    user_role: str | None = None,\n    use_demo_data: bool = False,\n    include_org_info: bool = False,\n) -> str:\n    \"\"\"Generate AGENTS.md content by populating the template with dynamic values.\n\n    Args:\n        template_path: Path to the AGENTS.template.md file\n        skills_path: Path to the skills directory\n        files_path: Path to the files directory (symlink to knowledge sources)\n        provider: LLM provider type (e.g., \"openai\", \"anthropic\")\n        model_name: Model name (e.g., \"claude-sonnet-4-5\", \"gpt-4o\")\n        nextjs_port: Port for Next.js development server\n        disabled_tools: List of disabled tools\n        user_name: User's name for personalization\n        user_role: User's role/title for personalization\n        use_demo_data: If True, exclude user context from AGENTS.md\n        include_org_info: Whether to include the org_info section (demo data mode)\n\n    Returns:\n        Generated AGENTS.md content with placeholders replaced\n    \"\"\"\n    if not template_path.exists():\n        logger.warning(f\"AGENTS.template.md not found at {template_path}\")\n        return \"# Agent Instructions\\n\\nNo custom instructions provided.\"\n\n    # Read template content\n    template_content = template_path.read_text()\n\n    # Build user context section - only include when NOT using demo data\n    user_context = \"\" if use_demo_data else build_user_context(user_name, user_role)\n\n    # Build LLM configuration section\n    provider_display = get_provider_display_name(provider)\n\n    # Build disabled tools section\n    disabled_tools_section = \"\"\n    if disabled_tools:\n        disabled_tools_section = f\"\\n**Disabled Tools**: {', '.join(disabled_tools)}\\n\"\n\n    # Build available skills section\n    available_skills_section = build_skills_section(skills_path)\n\n    # Build org info section (only included when demo data is enabled)\n    org_info_section = build_org_info_section(include_org_info)\n\n    # Replace placeholders\n    content = template_content\n    content = content.replace(\"{{USER_CONTEXT}}\", user_context)\n    content = content.replace(\"{{LLM_PROVIDER_NAME}}\", provider_display or \"Unknown\")\n    content = content.replace(\"{{LLM_MODEL_NAME}}\", model_name or \"Unknown\")\n    content = content.replace(\n        \"{{NEXTJS_PORT}}\", str(nextjs_port) if nextjs_port else \"Unknown\"\n    )\n    content = content.replace(\"{{DISABLED_TOOLS_SECTION}}\", disabled_tools_section)\n    content = content.replace(\"{{AVAILABLE_SKILLS_SECTION}}\", available_skills_section)\n    content = content.replace(\"{{ORG_INFO_SECTION}}\", org_info_section)\n\n    # Only replace file-related placeholders if files_path is provided.\n    # When files_path is None (e.g., Kubernetes), leave placeholders intact\n    # so the container can replace them after files are synced.\n    if files_path:\n        knowledge_sources_section = build_knowledge_sources_section(files_path)\n        content = content.replace(\n            \"{{KNOWLEDGE_SOURCES_SECTION}}\", knowledge_sources_section\n        )\n\n    return content\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/util/build_venv_template.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Build sandbox template for Python venv.\"\"\"\n\nimport argparse\nimport subprocess\nimport sys\nfrom pathlib import Path\n\ntry:\n    from onyx.server.features.build.configs import (\n        OUTPUTS_TEMPLATE_PATH,\n        VENV_TEMPLATE_PATH,\n    )\nexcept ImportError:\n    # Fallback if running as standalone script\n    import os\n\n    OUTPUTS_TEMPLATE_PATH = os.environ.get(\n        \"OUTPUTS_TEMPLATE_PATH\", \"/templates/outputs\"\n    )\n    VENV_TEMPLATE_PATH = os.environ.get(\"VENV_TEMPLATE_PATH\", \"/templates/venv\")\n\n\ndef build_python_venv_template(target_path: Path, requirements_path: Path) -> None:\n    \"\"\"Build Python venv template with required packages.\n\n    Creates a Python virtual environment and installs packages from requirements file.\n\n    Args:\n        target_path: Path where the venv should be created\n        requirements_path: Path to requirements.txt file\n\n    Raises:\n        RuntimeError: If venv creation or package installation fails\n    \"\"\"\n    if not requirements_path.exists():\n        raise FileNotFoundError(f\"Requirements file not found: {requirements_path}\")\n\n    # Create venv\n    print(\"  Creating virtual environment...\")\n    result = subprocess.run(\n        [sys.executable, \"-m\", \"venv\", str(target_path)],\n        capture_output=True,\n        text=True,\n    )\n    if result.returncode != 0:\n        raise RuntimeError(f\"Failed to create virtual environment: {result.stderr}\")\n\n    # Determine pip path based on OS\n    if sys.platform == \"win32\":\n        pip_path = target_path / \"Scripts\" / \"pip\"\n    else:\n        pip_path = target_path / \"bin\" / \"pip\"\n\n    # Install requirements\n    print(f\"  Installing packages from {requirements_path.name}...\")\n    install_result = subprocess.run(\n        [str(pip_path), \"install\", \"-r\", str(requirements_path)],\n        capture_output=True,\n        text=True,\n    )\n    if install_result.returncode != 0:\n        raise RuntimeError(f\"Failed to install packages: {install_result.stderr}\")\n\n\ndef main() -> None:\n    \"\"\"Build Python venv template.\n\n    Web template is already provided at backend/onyx/server/features/build/sandbox/templates/web\n    \"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Build Python venv template for sandbox (web template already provided)\"\n    )\n    parser.add_argument(\n        \"--venv-dir\",\n        type=str,\n        default=VENV_TEMPLATE_PATH,\n        help=f\"Output directory for Python venv template (default: {VENV_TEMPLATE_PATH})\",\n    )\n    parser.add_argument(\n        \"--requirements\",\n        type=str,\n        default=None,\n        help=\"Path to requirements.txt (default: auto-detect)\",\n    )\n\n    args = parser.parse_args()\n\n    venv_dir = Path(args.venv_dir)\n\n    # Find requirements file\n    if args.requirements:\n        requirements_file = Path(args.requirements)\n    else:\n        # Try to find requirements file relative to script location\n        script_dir = Path(__file__).parent\n        requirements_file = (\n            script_dir.parent.parent\n            / \"sandbox\"\n            / \"kubernetes\"\n            / \"docker\"\n            / \"initial-requirements.txt\"\n        )\n        if not requirements_file.exists():\n            raise FileNotFoundError(\n                f\"Could not find requirements file. Expected at {requirements_file} or specify with --requirements\"\n            )\n\n    # Show web template location\n    print(f\"\\nOutputs template path: {OUTPUTS_TEMPLATE_PATH}\")\n    print(f\"Venv template path: {VENV_TEMPLATE_PATH}\")\n\n    # Build Python venv template\n    print(f\"\\nBuilding Python venv template to {venv_dir}...\")\n    print(\"  (This may take 30-60 seconds)\")\n    build_python_venv_template(venv_dir, requirements_file)\n    print(\"✅ Python venv template built successfully\")\n\n    print(\"\\nTemplate ready! You can now create sandboxes.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/util/opencode_config.py",
    "content": "\"\"\"Shared opencode configuration generation.\n\nThis module provides a centralized way to generate opencode.json configuration\nthat is consistent across local and Kubernetes sandbox environments.\n\"\"\"\n\nfrom typing import Any\n\n\ndef build_opencode_config(\n    provider: str,\n    model_name: str,\n    api_key: str | None = None,\n    api_base: str | None = None,\n    disabled_tools: list[str] | None = None,\n    dev_mode: bool = False,\n) -> dict[str, Any]:\n    \"\"\"Build opencode.json configuration dict.\n\n    Creates the configuration structure for the opencode CLI agent with\n    provider-specific settings for thinking/reasoning and tool permissions.\n\n    Args:\n        provider: LLM provider type (e.g., \"openai\", \"anthropic\")\n        model_name: Model name (e.g., \"claude-sonnet-4-5\", \"gpt-4o\")\n        api_key: Optional API key for the provider\n        api_base: Optional custom API base URL\n        disabled_tools: Optional list of tools to disable (e.g., [\"question\", \"webfetch\"])\n        dev_mode: If True, allow all external directories. If False (Docker/Kubernetes),\n                  only whitelist /workspace/files and /workspace/demo_data.\n\n    Returns:\n        Configuration dict ready to be serialized to JSON\n    \"\"\"\n    # Build opencode model string: provider/model-name\n    opencode_model = f\"{provider}/{model_name}\"\n\n    # Build configuration with schema\n    config: dict[str, Any] = {\n        \"$schema\": \"https://opencode.ai/config.json\",\n        \"model\": opencode_model,\n        \"provider\": {},\n    }\n\n    # Build provider configuration\n    provider_config: dict[str, Any] = {}\n\n    # Add API key if provided\n    if api_key:\n        provider_config[\"options\"] = {\"apiKey\": api_key}\n\n    # Add API base if provided\n    if api_base:\n        provider_config[\"api\"] = api_base\n\n    # Build model configuration with thinking/reasoning options\n    options: dict[str, Any] = {}\n\n    if provider == \"openai\":\n        options[\"reasoningEffort\"] = \"high\"\n    elif provider == \"anthropic\":\n        options[\"thinking\"] = {\n            \"type\": \"enabled\",\n            \"budgetTokens\": 16000,\n        }\n    elif provider == \"google\":\n        options[\"thinking_budget\"] = 16000\n        options[\"thinking_level\"] = \"high\"\n    elif provider == \"bedrock\":\n        options[\"thinking\"] = {\n            \"type\": \"enabled\",\n            \"budgetTokens\": 16000,\n        }\n    elif provider == \"azure\":\n        options[\"reasoningEffort\"] = \"high\"\n\n    # Add model configuration to provider\n    if options:\n        provider_config[\"models\"] = {\n            model_name: {\n                \"options\": options,\n            }\n        }\n\n    # Add provider to config\n    config[\"provider\"][provider] = provider_config\n\n    # Set default tool permissions\n    # Order matters: last matching rule wins\n    # Allow all files first, then deny specific files\n    config[\"permission\"] = {\n        \"bash\": {\n            # Dangerous commands\n            \"rm\": \"deny\",\n            \"ssh\": \"deny\",\n            \"scp\": \"deny\",\n            \"sftp\": \"deny\",\n            \"ftp\": \"deny\",\n            \"telnet\": \"deny\",\n            \"nc\": \"deny\",\n            \"netcat\": \"deny\",\n            # Block file reading commands to force use of read tool with permissions\n            \"tac\": \"deny\",\n            \"nl\": \"deny\",\n            \"od\": \"deny\",\n            \"xxd\": \"deny\",\n            \"hexdump\": \"deny\",\n            \"strings\": \"deny\",\n            \"base64\": \"deny\",\n            \"*\": \"allow\",  # Allow other bash commands\n        },\n        \"edit\": {\n            \"opencode.json\": \"deny\",\n            \"**/opencode.json\": \"deny\",\n            \"*\": \"allow\",\n        },\n        \"write\": {\n            \"opencode.json\": \"deny\",\n            \"**/opencode.json\": \"deny\",\n            \"*\": \"allow\",\n        },\n        \"read\": {\n            \"*\": \"allow\",\n            \"opencode.json\": \"deny\",\n            \"**/opencode.json\": \"deny\",\n        },\n        \"grep\": {\n            \"*\": \"allow\",\n            \"opencode.json\": \"deny\",\n            \"**/opencode.json\": \"deny\",\n        },\n        \"glob\": {\n            \"*\": \"allow\",\n            \"opencode.json\": \"deny\",\n            \"**/opencode.json\": \"deny\",\n        },\n        \"list\": \"allow\",\n        \"lsp\": \"allow\",\n        \"patch\": \"allow\",\n        \"skill\": \"allow\",\n        \"question\": \"allow\",\n        \"webfetch\": \"allow\",\n        # External directory permissions:\n        # - dev_mode: Allow all external directories for local development\n        # - Docker/Kubernetes: Whitelist only specific directories\n        \"external_directory\": (\n            \"allow\"\n            if dev_mode\n            else {\n                \"*\": \"deny\",  # Deny all external directories by default\n                \"/workspace/files\": \"allow\",  # Allow files directory\n                \"/workspace/files/**\": \"allow\",  # Allow files directory contents\n                \"/workspace/demo_data\": \"allow\",  # Allow demo data directory\n                \"/workspace/demo_data/**\": \"allow\",  # Allow demo data directory contents\n            }\n        ),\n    }\n\n    # Disable specified tools via permissions\n    if disabled_tools:\n        for tool in disabled_tools:\n            config[\"permission\"][tool] = \"deny\"\n\n    return config\n"
  },
  {
    "path": "backend/onyx/server/features/build/sandbox/util/persona_mapping.py",
    "content": "\"\"\"Persona mapping utility for demo user identities and org structure.\n\nMaps frontend persona selections (work_area + level) to demo user profiles\nwith name and email for sandbox provisioning.\n\nAlso provides organizational structure data and content generators for org_info files.\nSingle source of truth for both local and Kubernetes sandbox provisioning.\n\"\"\"\n\nfrom typing import TypedDict\n\n\nclass PersonaInfo(TypedDict):\n    \"\"\"Type for persona information.\"\"\"\n\n    name: str\n    email: str\n\n\n# Persona mapping: work_area -> level -> PersonaInfo\nPERSONA_MAPPING: dict[str, dict[str, PersonaInfo]] = {\n    \"engineering\": {\n        \"ic\": {\n            \"name\": \"Jiwon Kang\",\n            \"email\": \"jiwon_kang@netherite-extraction.onyx.app\",\n        },\n        \"manager\": {\n            \"name\": \"Javier Morales\",\n            \"email\": \"javier_morales@netherite-extraction.onyx.app\",\n        },\n    },\n    \"sales\": {\n        \"ic\": {\n            \"name\": \"Megan Foster\",\n            \"email\": \"megan_foster@netherite-extraction.onyx.app\",\n        },\n        \"manager\": {\n            \"name\": \"Valeria Cruz\",\n            \"email\": \"valeria_cruz@netherite-extraction.onyx.app\",\n        },\n    },\n    \"product\": {\n        \"ic\": {\n            \"name\": \"Michael Anderson\",\n            \"email\": \"michael_anderson@netherite-extraction.onyx.app\",\n        },\n        \"manager\": {\n            \"name\": \"David Liu\",\n            \"email\": \"david_liu@netherite-extraction.onyx.app\",\n        },\n    },\n    \"marketing\": {\n        \"ic\": {\n            \"name\": \"Rahul Patel\",\n            \"email\": \"rahul_patel@netherite-extraction.onyx.app\",\n        },\n        \"manager\": {\n            \"name\": \"Olivia Reed\",\n            \"email\": \"olivia_reed@netherite-extraction.onyx.app\",\n        },\n    },\n    \"executives\": {\n        \"ic\": {\n            \"name\": \"Sarah Mitchell\",\n            \"email\": \"sarah_mitchell@netherite-extraction.onyx.app\",\n        },\n        \"manager\": {\n            \"name\": \"Sarah Mitchell\",\n            \"email\": \"sarah_mitchell@netherite-extraction.onyx.app\",\n        },\n    },\n    \"other\": {\n        \"manager\": {\n            \"name\": \"Ralf Schroeder\",\n            \"email\": \"ralf_schroeder@netherite-extraction.onyx.app\",\n        },\n        \"ic\": {\n            \"name\": \"John Carpenter\",\n            \"email\": \"john_carpenter@netherite-extraction.onyx.app\",\n        },\n    },\n}\n\n# Organization structure - maps managers to their direct reports\nORGANIZATION_STRUCTURE: dict[str, dict[str, list[str]]] = {\n    \"engineering\": {\n        \"javier_morales@netherite-extraction.onyx.app\": [\n            \"tyler_jenkins@netherite-extraction.onyx.app\",\n            \"jiwon_kang@netherite-extraction.onyx.app\",\n            \"brooke_spencer@netherite-extraction.onyx.app\",\n            \"andre_robinson@netherite-extraction.onyx.app\",\n        ],\n        \"isabella_torres@netherite-extraction.onyx.app\": [\n            \"ryan_murphy@netherite-extraction.onyx.app\",\n            \"jason_morris@netherite-extraction.onyx.app\",\n            \"kevin_sullivan@netherite-extraction.onyx.app\",\n        ],\n    },\n    \"sales\": {\n        \"valeria_cruz@netherite-extraction.onyx.app\": [\n            \"megan_foster@netherite-extraction.onyx.app\",\n            \"mina_park@netherite-extraction.onyx.app\",\n            \"james_choi@netherite-extraction.onyx.app\",\n            \"camila_vega@netherite-extraction.onyx.app\",\n        ],\n        \"layla_farah@netherite-extraction.onyx.app\": [\n            \"arjun_mehta@netherite-extraction.onyx.app\",\n            \"sneha_reddy@netherite-extraction.onyx.app\",\n            \"irene_shen@netherite-extraction.onyx.app\",\n        ],\n    },\n    \"product\": {\n        \"david_liu@netherite-extraction.onyx.app\": [\n            \"michael_anderson@netherite-extraction.onyx.app\",\n            \"kenji_watanabe@netherite-extraction.onyx.app\",\n            \"sofia_ramirez@netherite-extraction.onyx.app\",\n        ],\n    },\n    \"marketing\": {\n        \"olivia_reed@netherite-extraction.onyx.app\": [\n            \"rahul_patel@netherite-extraction.onyx.app\",\n            \"yuna_lee@netherite-extraction.onyx.app\",\n            \"peter_yamamoto@netherite-extraction.onyx.app\",\n        ],\n    },\n    \"executives\": {\n        \"sarah_mitchell@netherite-extraction.onyx.app\": [\n            \"daniel_hughes@netherite-extraction.onyx.app\",\n            \"amanda_brooks@netherite-extraction.onyx.app\",\n            \"ananya_gupta@netherite-extraction.onyx.app\",\n        ],\n    },\n    \"other\": {\n        \"ralf_schroeder@netherite-extraction.onyx.app\": [\n            \"john_carpenter@netherite-extraction.onyx.app\",\n        ],\n    },\n}\n\n# AGENTS.md content for org_info directory\nORG_INFO_AGENTS_MD = \"\"\"# AGENTS.md\n\nThis file provides information about which organizational information sources are available:\n\nThere are two files available that provide important information about the user's company and the user themselves.\n\n\n## User Identity\n\nThe file `user_identity_profile.txt` contains the user's profile.\n\n## Organizational Structure\n\nThe file `organization_structure.json` contains a json with the organization's groups, managers, and their reports.\n\"\"\"\n\n\ndef get_persona_info(work_area: str | None, level: str | None) -> PersonaInfo | None:\n    \"\"\"Get persona info from work area and level.\n\n    Args:\n        work_area: User's work area (e.g., \"engineering\", \"product\", \"sales\")\n        level: User's level (e.g., \"ic\", \"manager\")\n\n    Returns:\n        PersonaInfo with name and email, or None if no matching persona\n    \"\"\"\n    if not work_area:\n        return None\n\n    work_area_lower = work_area.lower().strip()\n    level_lower = (level or \"manager\").lower().strip()\n\n    work_area_mapping = PERSONA_MAPPING.get(work_area_lower)\n    if not work_area_mapping:\n        return None\n\n    return work_area_mapping.get(level_lower)\n\n\ndef generate_user_identity_content(persona: PersonaInfo) -> str:\n    \"\"\"Generate user identity profile content.\n\n    Args:\n        persona: PersonaInfo with name and email\n\n    Returns:\n        Content for user_identity_profile.txt\n    \"\"\"\n    return f\"Your name is {persona['name']}. Your email is {persona['email']}. You are working at Netherite Extraction Corp.\\n\"\n"
  },
  {
    "path": "backend/onyx/server/features/build/session/__init__.py",
    "content": "\"\"\"Session management for Build Mode.\"\"\"\n\nfrom onyx.server.features.build.session.manager import RateLimitError\nfrom onyx.server.features.build.session.manager import SessionManager\n\n__all__ = [\"SessionManager\", \"RateLimitError\"]\n"
  },
  {
    "path": "backend/onyx/server/features/build/session/manager.py",
    "content": "\"\"\"Public interface for session operations.\n\nSessionManager is the main entry point for build session lifecycle management.\nIt orchestrates session CRUD, message handling, artifact management, and file system access.\n\"\"\"\n\nimport io\nimport json\nimport mimetypes\nimport zipfile\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom uuid import UUID\n\nfrom acp.schema import AgentMessageChunk\nfrom acp.schema import AgentPlanUpdate\nfrom acp.schema import AgentThoughtChunk\nfrom acp.schema import CurrentModeUpdate\nfrom acp.schema import Error as ACPError\nfrom acp.schema import PromptResponse\nfrom acp.schema import ToolCallProgress\nfrom acp.schema import ToolCallStart\nfrom sqlalchemy.orm import Session as DBSession\n\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.db.models import BuildMessage\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import User\nfrom onyx.db.users import fetch_user_by_id\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.utils import llm_response_to_string\nfrom onyx.server.features.build.api.models import DirectoryListing\nfrom onyx.server.features.build.api.models import FileSystemEntry\nfrom onyx.server.features.build.api.packet_logger import get_packet_logger\nfrom onyx.server.features.build.api.packet_logger import log_separator\nfrom onyx.server.features.build.api.packets import BuildPacket\nfrom onyx.server.features.build.api.packets import ErrorPacket\nfrom onyx.server.features.build.api.rate_limit import get_user_rate_limit_status\nfrom onyx.server.features.build.configs import MAX_TOTAL_UPLOAD_SIZE_BYTES\nfrom onyx.server.features.build.configs import MAX_UPLOAD_FILES_PER_SESSION\nfrom onyx.server.features.build.configs import PERSISTENT_DOCUMENT_STORAGE_PATH\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.db.build_session import allocate_nextjs_port\nfrom onyx.server.features.build.db.build_session import create_build_session__no_commit\nfrom onyx.server.features.build.db.build_session import create_message\nfrom onyx.server.features.build.db.build_session import delete_build_session__no_commit\nfrom onyx.server.features.build.db.build_session import (\n    fetch_llm_provider_by_type_for_build_mode,\n)\nfrom onyx.server.features.build.db.build_session import get_build_session\nfrom onyx.server.features.build.db.build_session import get_empty_session_for_user\nfrom onyx.server.features.build.db.build_session import get_session_messages\nfrom onyx.server.features.build.db.build_session import get_user_build_sessions\nfrom onyx.server.features.build.db.build_session import update_session_activity\nfrom onyx.server.features.build.db.build_session import upsert_agent_plan\nfrom onyx.server.features.build.db.sandbox import create_sandbox__no_commit\nfrom onyx.server.features.build.db.sandbox import get_running_sandbox_count_by_tenant\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_session_id\nfrom onyx.server.features.build.db.sandbox import get_sandbox_by_user_id\nfrom onyx.server.features.build.db.sandbox import get_snapshots_for_session\nfrom onyx.server.features.build.db.sandbox import update_sandbox_heartbeat\nfrom onyx.server.features.build.db.sandbox import update_sandbox_status__no_commit\nfrom onyx.server.features.build.sandbox import get_sandbox_manager\nfrom onyx.server.features.build.sandbox.kubernetes.internal.acp_exec_client import (\n    SSEKeepalive,\n)\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.server.features.build.sandbox.tasks.tasks import (\n    _get_disabled_user_library_paths,\n)\nfrom onyx.server.features.build.session.prompts import BUILD_NAMING_SYSTEM_PROMPT\nfrom onyx.server.features.build.session.prompts import BUILD_NAMING_USER_PROMPT\nfrom onyx.server.features.build.session.prompts import (\n    FOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT,\n)\nfrom onyx.server.features.build.session.prompts import FOLLOWUP_SUGGESTIONS_USER_PROMPT\nfrom onyx.tracing.framework.create import ensure_trace\nfrom onyx.tracing.llm_utils import llm_generation_span\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nclass UploadLimitExceededError(ValueError):\n    \"\"\"Raised when file upload limits are exceeded.\"\"\"\n\n\nclass BuildStreamingState:\n    \"\"\"Container for accumulating state during ACP streaming.\n\n    Similar to ChatStateContainer but adapted for ACP packet types.\n    Accumulates chunks and tracks pending tool calls until completion.\n\n    Usage:\n        state = BuildStreamingState(turn_index=0)\n\n        # During streaming:\n        for packet in stream:\n            if packet.type == \"agent_message_chunk\":\n                state.add_message_chunk(packet.content.text)\n            elif packet.type == \"tool_call_progress\" and packet.status == \"completed\":\n                state.add_completed_tool_call(packet_data)\n            # etc.\n\n        # At end of streaming, call finalize methods and save\n    \"\"\"\n\n    def __init__(self, turn_index: int) -> None:\n        \"\"\"Initialize streaming state for a turn.\n\n        Args:\n            turn_index: The 0-indexed user message number this turn belongs to\n        \"\"\"\n        self.turn_index = turn_index\n\n        # Accumulated text chunks (similar to answer_tokens in ChatStateContainer)\n        self.message_chunks: list[str] = []\n        self.thought_chunks: list[str] = []\n\n        # For upserting agent_plan_update - track ID so we can update in place\n        self.plan_message_id: UUID | None = None\n\n        # Track what type of chunk we were last receiving\n        self._last_chunk_type: str | None = None\n\n    def add_message_chunk(self, text: str) -> None:\n        \"\"\"Accumulate message text.\"\"\"\n        self.message_chunks.append(text)\n        self._last_chunk_type = \"message\"\n\n    def add_thought_chunk(self, text: str) -> None:\n        \"\"\"Accumulate thought text.\"\"\"\n        self.thought_chunks.append(text)\n        self._last_chunk_type = \"thought\"\n\n    def finalize_message_chunks(self) -> dict[str, Any] | None:\n        \"\"\"Build a synthetic packet with accumulated message text.\n\n        Returns:\n            A synthetic agent_message packet or None if no chunks accumulated\n        \"\"\"\n        if not self.message_chunks:\n            return None\n\n        full_text = \"\".join(self.message_chunks)\n        result = {\n            \"type\": \"agent_message\",\n            \"content\": {\"type\": \"text\", \"text\": full_text},\n            \"sessionUpdate\": \"agent_message\",\n        }\n        self.message_chunks.clear()\n        return result\n\n    def finalize_thought_chunks(self) -> dict[str, Any] | None:\n        \"\"\"Build a synthetic packet with accumulated thought text.\n\n        Returns:\n            A synthetic agent_thought packet or None if no chunks accumulated\n        \"\"\"\n        if not self.thought_chunks:\n            return None\n\n        full_text = \"\".join(self.thought_chunks)\n        result = {\n            \"type\": \"agent_thought\",\n            \"content\": {\"type\": \"text\", \"text\": full_text},\n            \"sessionUpdate\": \"agent_thought\",\n        }\n        self.thought_chunks.clear()\n        return result\n\n    def should_finalize_chunks(self, new_packet_type: str) -> bool:\n        \"\"\"Check if we should finalize pending chunks before processing new packet.\n\n        We finalize when the packet type changes from message/thought chunks\n        to something else (or to a different chunk type).\n        \"\"\"\n        if self._last_chunk_type is None:\n            return False\n\n        # If we were receiving message chunks and now get something else\n        if (\n            self._last_chunk_type == \"message\"\n            and new_packet_type != \"agent_message_chunk\"\n        ):\n            return True\n\n        # If we were receiving thought chunks and now get something else\n        if (\n            self._last_chunk_type == \"thought\"\n            and new_packet_type != \"agent_thought_chunk\"\n        ):\n            return True\n\n        return False\n\n    def clear_last_chunk_type(self) -> None:\n        \"\"\"Clear the last chunk type tracking after finalization.\"\"\"\n        self._last_chunk_type = None\n\n\n# Hidden directories/files to filter from listings\nHIDDEN_PATTERNS = {\n    \".venv\",\n    \".git\",\n    \".next\",\n    \"__pycache__\",\n    \"node_modules\",\n    \".DS_Store\",\n    \"opencode.json\",\n    \".env\",\n    \".gitignore\",\n}\n\n\nclass RateLimitError(Exception):\n    \"\"\"Exception raised when rate limit is exceeded.\"\"\"\n\n    def __init__(\n        self,\n        message: str,\n        messages_used: int,\n        limit: int,\n        reset_timestamp: str | None = None,\n    ):\n        super().__init__(message)\n        self.messages_used = messages_used\n        self.limit = limit\n        self.reset_timestamp = reset_timestamp\n\n\nclass SessionManager:\n    \"\"\"Public interface for session operations.\n\n    Orchestrates session lifecycle, messaging, artifacts, and file access.\n    Uses SandboxManager internally for sandbox-related operations.\n\n    Unlike SandboxManager, this is NOT a singleton - each instance is bound\n    to a specific database session for the duration of a request.\n\n    Usage:\n        session_manager = SessionManager(db_session)\n        sessions = session_manager.list_sessions(user_id)\n    \"\"\"\n\n    def __init__(self, db_session: DBSession) -> None:\n        \"\"\"Initialize the SessionManager with a database session.\n\n        Args:\n            db_session: The SQLAlchemy database session to use for all operations\n        \"\"\"\n        self._db_session = db_session\n        self._sandbox_manager = get_sandbox_manager()\n\n    # =========================================================================\n    # Rate Limiting\n    # =========================================================================\n\n    def check_rate_limit(self, user: User) -> None:\n        \"\"\"\n        Check build mode rate limits for a user.\n\n        Args:\n            user: The user to check rate limits for\n\n        Raises:\n            RateLimitError: If rate limit is exceeded\n        \"\"\"\n        # Skip rate limiting for self-hosted deployments\n        if not MULTI_TENANT:\n            return\n\n        rate_limit_status = get_user_rate_limit_status(user, self._db_session)\n        if rate_limit_status.is_limited:\n            raise RateLimitError(\n                message=(\n                    f\"Rate limit exceeded. You have used \"\n                    f\"{rate_limit_status.messages_used}/{rate_limit_status.limit} messages. \"\n                    f\"Limit resets at {rate_limit_status.reset_timestamp}.\"\n                    if rate_limit_status.reset_timestamp\n                    else \"This is a lifetime limit.\"\n                ),\n                messages_used=rate_limit_status.messages_used,\n                limit=rate_limit_status.limit,\n                reset_timestamp=rate_limit_status.reset_timestamp,\n            )\n\n    # =========================================================================\n    # LLM Configuration\n    # =========================================================================\n\n    def _get_llm_config(\n        self,\n        requested_provider_type: str | None,\n        requested_model_name: str | None,\n    ) -> LLMProviderConfig:\n        \"\"\"Get LLM config for sandbox provisioning.\n\n        Resolution priority:\n        1. User's requested provider/model (from cookie)\n        2. System default provider\n\n        Args:\n            requested_provider_type: Provider type from user's cookie (e.g., \"anthropic\", \"openai\")\n            requested_model_name: Model name from user's cookie (e.g., \"claude-opus-4-5\")\n\n        Returns:\n            LLMProviderConfig for sandbox provisioning\n\n        Raises:\n            ValueError: If no LLM provider is configured\n        \"\"\"\n        if requested_provider_type and requested_model_name:\n            # Look up provider by type (e.g., \"anthropic\", \"openai\", \"openrouter\")\n            provider = fetch_llm_provider_by_type_for_build_mode(\n                self._db_session, requested_provider_type\n            )\n            if provider:\n                # Use the requested model directly - the provider's API will\n                # reject invalid models. This allows users to use models that\n                # aren't explicitly configured as \"visible\" in the admin UI.\n                return LLMProviderConfig(\n                    provider=provider.provider,\n                    model_name=requested_model_name,\n                    api_key=provider.api_key,\n                    api_base=provider.api_base,\n                )\n            else:\n                logger.warning(\n                    f\"Requested provider type {requested_provider_type} not found, falling back to default\"\n                )\n\n        # Fallback to system default\n        default_model = fetch_default_llm_model(self._db_session)\n        if not default_model:\n            raise ValueError(\"No default LLM model found\")\n\n        return LLMProviderConfig(\n            provider=default_model.llm_provider.provider,\n            model_name=default_model.name,\n            api_key=(\n                default_model.llm_provider.api_key.get_value(apply_mask=False)\n                if default_model.llm_provider.api_key\n                else None\n            ),\n            api_base=default_model.llm_provider.api_base,\n        )\n\n    # =========================================================================\n    # Session CRUD Operations\n    # =========================================================================\n\n    def list_sessions(\n        self,\n        user_id: UUID,\n    ) -> list[BuildSession]:\n        \"\"\"Get all build sessions for a user.\n\n        Args:\n            user_id: The user ID\n\n        Returns:\n            List of BuildSession models ordered by most recent first\n        \"\"\"\n        return get_user_build_sessions(user_id, self._db_session)\n\n    def create_session__no_commit(\n        self,\n        user_id: UUID,\n        name: str | None = None,\n        user_work_area: str | None = None,\n        user_level: str | None = None,\n        llm_provider_type: str | None = None,\n        llm_model_name: str | None = None,\n        demo_data_enabled: bool = True,\n    ) -> BuildSession:\n        \"\"\"\n        Create a new build session with a sandbox.\n\n        NOTE: This method does NOT commit the transaction. The caller is\n        responsible for committing after this method returns successfully.\n        This allows the entire operation to be atomic at the endpoint level.\n\n        Args:\n            user_id: The user ID\n            name: Optional session name\n            user_work_area: User's work area for demo persona (e.g., \"engineering\")\n            user_level: User's level for demo persona (e.g., \"ic\", \"manager\")\n            llm_provider_type: Provider type from user's cookie (e.g., \"anthropic\", \"openai\")\n            llm_model_name: Model name from user's cookie (e.g., \"claude-opus-4-5\")\n            demo_data_enabled: Explicit flag for demo data mode. Defaults to True if not provided.\n\n        Returns:\n            The created BuildSession model\n\n        Raises:\n            ValueError: If max concurrent sandboxes reached or no LLM provider\n            RuntimeError: If sandbox provisioning fails\n        \"\"\"\n        tenant_id = get_current_tenant_id()\n\n        # Check sandbox limits for multi-tenant deployments\n        if MULTI_TENANT:\n            from onyx.server.features.build.configs import (\n                SANDBOX_MAX_CONCURRENT_PER_ORG,\n            )\n\n            running_count = get_running_sandbox_count_by_tenant(\n                self._db_session, tenant_id\n            )\n            if running_count >= SANDBOX_MAX_CONCURRENT_PER_ORG:\n                raise ValueError(\n                    f\"Maximum concurrent sandboxes ({SANDBOX_MAX_CONCURRENT_PER_ORG}) reached\"\n                )\n\n        # Get LLM config (uses user's selection or falls back to default)\n        llm_config = self._get_llm_config(llm_provider_type, llm_model_name)\n\n        # Build tenant/user-specific path for FILE_SYSTEM documents (sandbox isolation)\n        # Each user's sandbox can only access documents they created\n        # Path structure: {base_path}/{tenant_id}/knowledge/{user_id}/\n        # This matches the path structure used by PersistentDocumentWriter\n        if PERSISTENT_DOCUMENT_STORAGE_PATH:\n            user_file_system_path = str(\n                Path(PERSISTENT_DOCUMENT_STORAGE_PATH)\n                / tenant_id\n                / \"knowledge\"\n                / str(user_id)\n            )\n        else:\n            # Fallback for local development without persistent storage\n            user_file_system_path = \"/tmp/onyx-files\"\n\n        # Ensure the user's document directory exists (if local)\n        if SANDBOX_BACKEND == SandboxBackend.LOCAL:\n            Path(user_file_system_path).mkdir(parents=True, exist_ok=True)\n\n        # Allocate port for this session (per-session port allocation)\n        # Both LOCAL and KUBERNETES backends use the same port allocation strategy\n        nextjs_port = allocate_nextjs_port(self._db_session)\n\n        # Create BuildSession record with allocated port (uses flush, caller commits)\n        build_session = create_build_session__no_commit(\n            user_id, self._db_session, name=name, demo_data_enabled=demo_data_enabled\n        )\n        build_session.nextjs_port = nextjs_port\n        self._db_session.flush()\n        session_id = str(build_session.id)\n        logger.info(\n            f\"Created build session {session_id} for user {user_id} (port: {nextjs_port})\"\n        )\n\n        # Check if user already has a sandbox (one sandbox per user model)\n        existing_sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n\n        if existing_sandbox:\n            # User already has a sandbox - check if it needs re-provisioning\n            sandbox = existing_sandbox\n            sandbox_id = sandbox.id\n\n            if sandbox.status in (\n                SandboxStatus.TERMINATED,\n                SandboxStatus.SLEEPING,\n                SandboxStatus.FAILED,\n            ):\n                # Re-provision sandbox (pod doesn't exist or failed)\n                logger.info(\n                    f\"Re-provisioning {sandbox.status.value} sandbox {sandbox_id} for user {user_id}\"\n                )\n                sandbox_info = self._sandbox_manager.provision(\n                    sandbox_id=sandbox_id,\n                    user_id=user_id,\n                    tenant_id=tenant_id,\n                    llm_config=llm_config,\n                )\n                # Use update function to also set heartbeat when transitioning to RUNNING\n                update_sandbox_status__no_commit(\n                    self._db_session, sandbox_id, sandbox_info.status\n                )\n            elif sandbox.status.is_active():\n                # Verify pod is healthy before reusing (use short timeout for quick check)\n                if not self._sandbox_manager.health_check(sandbox_id, timeout=5.0):\n                    logger.warning(\n                        f\"Sandbox {sandbox_id} marked as {sandbox.status} but pod is unhealthy/missing. Entering recovery mode.\"\n                    )\n                    # Terminate to clean up any lingering K8s resources\n                    self._sandbox_manager.terminate(sandbox_id)\n\n                    # Mark as terminated and re-provision\n                    update_sandbox_status__no_commit(\n                        self._db_session, sandbox_id, SandboxStatus.TERMINATED\n                    )\n\n                    logger.info(\n                        f\"Re-provisioning sandbox {sandbox_id} for user {user_id}\"\n                    )\n                    sandbox_info = self._sandbox_manager.provision(\n                        sandbox_id=sandbox_id,\n                        user_id=user_id,\n                        tenant_id=tenant_id,\n                        llm_config=llm_config,\n                    )\n                    # Use update function to also set heartbeat when transitioning to RUNNING\n                    update_sandbox_status__no_commit(\n                        self._db_session, sandbox_id, sandbox_info.status\n                    )\n                else:\n                    logger.info(\n                        f\"Reusing existing sandbox {sandbox_id} (status: {sandbox.status}) for new session {session_id}\"\n                    )\n            else:\n                # PROVISIONING status - sandbox is being created by another request\n                # Just fail this request\n                msg = (\n                    f\"Sandbox {sandbox_id} has status {sandbox.status.value} and is being \"\n                    f\"created by another request for new session {session_id}\"\n                )\n                logger.error(msg)\n                raise RuntimeError(msg)\n        else:\n            # Create new Sandbox record for the user (uses flush, caller commits)\n            sandbox = create_sandbox__no_commit(\n                db_session=self._db_session,\n                user_id=user_id,\n            )\n            sandbox_id = sandbox.id\n            logger.info(f\"Created sandbox record {sandbox_id} for session {session_id}\")\n\n            # Provision sandbox (no DB operations inside)\n            sandbox_info = self._sandbox_manager.provision(\n                sandbox_id=sandbox_id,\n                user_id=user_id,\n                tenant_id=tenant_id,\n                llm_config=llm_config,\n            )\n\n            # Update sandbox status (also refreshes heartbeat when transitioning to RUNNING)\n            update_sandbox_status__no_commit(\n                self._db_session, sandbox_id, sandbox_info.status\n            )\n\n        # Set up session workspace within the sandbox\n        logger.info(\n            f\"Setting up session workspace {session_id} in sandbox {sandbox.id}\"\n        )\n        # Fetch user data for personalization in AGENTS.md\n        user = fetch_user_by_id(self._db_session, user_id)\n        user_name = user.personal_name if user else None\n        user_role = user.personal_role if user else None\n\n        # Get excluded user library paths (files with sync_disabled=True)\n        # Only query if not using demo data (user library only applies to user files)\n        excluded_user_library_paths: list[str] | None = None\n        if not demo_data_enabled:\n            excluded_user_library_paths = _get_disabled_user_library_paths(\n                self._db_session, str(user_id)\n            )\n            if excluded_user_library_paths:\n                logger.debug(\n                    f\"Excluding {len(excluded_user_library_paths)} disabled user library paths\"\n                )\n\n        self._sandbox_manager.setup_session_workspace(\n            sandbox_id=sandbox.id,\n            session_id=build_session.id,\n            llm_config=llm_config,\n            nextjs_port=nextjs_port,\n            file_system_path=user_file_system_path,\n            snapshot_path=None,  # TODO: Support restoring from snapshot\n            user_name=user_name,\n            user_role=user_role,\n            user_work_area=user_work_area,\n            user_level=user_level,\n            use_demo_data=demo_data_enabled,\n            excluded_user_library_paths=excluded_user_library_paths,\n        )\n\n        sandbox_id = sandbox.id\n        logger.info(\n            f\"Successfully created session {session_id} with workspace in sandbox {sandbox.id}\"\n        )\n\n        return build_session\n\n    def get_or_create_empty_session(\n        self,\n        user_id: UUID,\n        user_work_area: str | None = None,\n        user_level: str | None = None,\n        llm_provider_type: str | None = None,\n        llm_model_name: str | None = None,\n        demo_data_enabled: bool = True,\n    ) -> BuildSession:\n        \"\"\"Get existing empty session or create a new one with provisioned sandbox.\n\n        Used for pre-provisioning sandboxes when user lands on /build/v1.\n        Returns existing recent empty session if one exists, has a healthy sandbox,\n        AND has matching demo_data_enabled setting. Otherwise creates new.\n        If an empty session exists but its sandbox is unhealthy/terminated/missing,\n        the stale session is deleted and a fresh one is created (which will handle\n        sandbox recovery/re-provisioning).\n\n        Args:\n            user_id: The user ID\n            user_work_area: User's work area for demo persona (e.g., \"engineering\")\n            user_level: User's level for demo persona (e.g., \"ic\", \"manager\")\n            llm_provider_type: Provider type from user's cookie (e.g., \"anthropic\", \"openai\")\n            llm_model_name: Model name from user's cookie (e.g., \"claude-opus-4-5\")\n            demo_data_enabled: Explicit flag for demo data mode. Defaults to True if not provided.\n\n        Returns:\n            BuildSession (existing empty or newly created)\n\n        Raises:\n            ValueError: If max concurrent sandboxes reached\n            RuntimeError: If sandbox provisioning fails\n        \"\"\"\n        # Look for existing empty session with matching demo_data setting\n        existing = get_empty_session_for_user(\n            user_id, self._db_session, demo_data_enabled=demo_data_enabled\n        )\n        if existing:\n            logger.info(\n                f\"Existing empty session {existing.id} found for user {user_id}\"\n            )\n            # Verify sandbox is healthy before returning existing session\n            sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n\n            if sandbox and sandbox.status.is_active():\n                # Quick health check to verify sandbox is actually responsive\n                # AND verify the session workspace still exists on disk\n                # (it may have been wiped if the sandbox was re-provisioned)\n                is_healthy = self._sandbox_manager.health_check(sandbox.id, timeout=5.0)\n                workspace_exists = (\n                    is_healthy\n                    and self._sandbox_manager.session_workspace_exists(\n                        sandbox.id, existing.id\n                    )\n                )\n                if is_healthy and workspace_exists:\n                    logger.info(\n                        f\"Returning existing empty session {existing.id} for user {user_id}\"\n                    )\n                    return existing\n                elif not is_healthy:\n                    logger.warning(\n                        f\"Empty session {existing.id} has unhealthy sandbox {sandbox.id}. Deleting and creating fresh session.\"\n                    )\n                else:\n                    logger.warning(\n                        f\"Empty session {existing.id} workspace missing in sandbox \"\n                        f\"{sandbox.id}. Deleting and creating fresh session.\"\n                    )\n            else:\n                logger.warning(\n                    f\"Empty session {existing.id} has no active sandbox \"\n                    f\"(sandbox={'missing' if not sandbox else sandbox.status}). \"\n                    f\"Deleting and creating fresh session.\"\n                )\n\n            # Delete the stale empty session - create_session__no_commit will\n            # handle sandbox recovery/re-provisioning\n            delete_build_session__no_commit(existing.id, user_id, self._db_session)\n\n        return self.create_session__no_commit(\n            user_id=user_id,\n            user_work_area=user_work_area,\n            user_level=user_level,\n            llm_provider_type=llm_provider_type,\n            llm_model_name=llm_model_name,\n            demo_data_enabled=demo_data_enabled,\n        )\n\n    def delete_empty_session(self, user_id: UUID) -> bool:\n        \"\"\"Delete user's pre-provisioned (empty) session if one exists.\n\n        A session is considered \"empty\" if it has no messages.\n        This is called when user changes LLM selection or toggles demo data\n        so the session can be re-created with the new LLM configuration.\n\n        Args:\n            user_id: The user ID\n\n        Returns:\n            True if a session was deleted, False if none found\n        \"\"\"\n        empty_session = get_empty_session_for_user(user_id, self._db_session)\n\n        if not empty_session:\n            logger.info(f\"No empty session found for user {user_id}\")\n            return False\n\n        session_id = empty_session.id\n\n        # Get user's sandbox to clean up session workspace\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox and sandbox.status.is_active():\n            try:\n                self._sandbox_manager.cleanup_session_workspace(\n                    sandbox_id=sandbox.id,\n                    session_id=session_id,\n                    nextjs_port=empty_session.nextjs_port,\n                )\n                logger.info(\n                    f\"Cleaned up session workspace {session_id} in sandbox {sandbox.id}\"\n                )\n            except Exception as e:\n                # Log but don't fail - session can still be deleted\n                logger.warning(f\"Failed to cleanup session workspace {session_id}: {e}\")\n\n        # Delete session (cascade deletes artifacts)\n        delete_build_session__no_commit(session_id, user_id, self._db_session)\n        logger.info(f\"Deleted empty session {session_id} for user {user_id}\")\n\n        return True\n\n    def get_session(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> BuildSession | None:\n        \"\"\"\n        Get a specific build session.\n\n        Also updates the last activity timestamp.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID\n\n        Returns:\n            BuildSession model or None if not found\n        \"\"\"\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session:\n            update_session_activity(session_id, self._db_session)\n            self._db_session.refresh(session)\n        return session\n\n    def generate_session_name(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> str | None:\n        \"\"\"\n        Generate a session name using LLM based on the first user message.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID (for ownership verification)\n\n        Returns:\n            Generated session name or None if session not found\n        \"\"\"\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        return self._generate_session_name(session_id)\n\n    def update_session_name(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        name: str | None = None,\n    ) -> BuildSession | None:\n        \"\"\"\n        Update the name of a build session.\n\n        If name is None, auto-generates a name using LLM based on the first\n        user message in the session.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID\n            name: The new session name (if None, auto-generates using LLM)\n\n        Returns:\n            Updated BuildSession model or None if not found\n        \"\"\"\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        if name is not None:\n            # Manual rename\n            session.name = name\n        else:\n            # Auto-generate name from first user message using LLM\n            session.name = self._generate_session_name(session_id)\n\n        update_session_activity(session_id, self._db_session)\n        self._db_session.commit()\n        self._db_session.refresh(session)\n        return session\n\n    def _generate_session_name(self, session_id: UUID) -> str:\n        \"\"\"\n        Generate a session name using LLM based on the first user message.\n\n        Args:\n            session_id: The session UUID\n\n        Returns:\n            Generated session name or fallback name\n        \"\"\"\n        # Get messages to find first user message\n        messages = get_session_messages(session_id, self._db_session)\n        first_user_msg = next((m for m in messages if m.type == MessageType.USER), None)\n\n        if not first_user_msg:\n            return f\"Build Session {str(session_id)[:8]}\"\n\n        # Extract text from message_metadata\n        metadata = first_user_msg.message_metadata\n        if not metadata:\n            return f\"Build Session {str(session_id)[:8]}\"\n\n        # Handle user_message packet structure: {type: \"user_message\", content: {type: \"text\", text: \"...\"}}\n        content = metadata.get(\"content\", {})\n        if isinstance(content, dict):\n            user_message = content.get(\"text\", \"\")\n        else:\n            user_message = str(content) if content else \"\"\n\n        if not user_message:\n            return f\"Build Session {str(session_id)[:8]}\"\n\n        # Use LLM to generate a concise session name with Braintrust tracing\n        try:\n            llm = get_default_llm()\n            prompt_messages: LanguageModelInput = [\n                SystemMessage(content=BUILD_NAMING_SYSTEM_PROMPT),\n                UserMessage(\n                    content=BUILD_NAMING_USER_PROMPT.format(\n                        user_message=user_message[:500]  # Limit input size\n                    )\n                ),\n            ]\n            with ensure_trace(\n                \"build_session_naming\",\n                group_id=str(session_id),\n                metadata={\"session_id\": str(session_id)},\n            ):\n                with llm_generation_span(\n                    llm=llm,\n                    flow=\"build_session_naming\",\n                    input_messages=prompt_messages,\n                ) as span_generation:\n                    response = llm.invoke(\n                        prompt_messages, reasoning_effort=ReasoningEffort.OFF\n                    )\n                    record_llm_response(span_generation, response)\n                    generated_name = llm_response_to_string(response).strip().strip('\"')\n\n            # Ensure the name isn't too long (max 50 chars)\n            if len(generated_name) > 50:\n                generated_name = generated_name[:47] + \"...\"\n\n            return (\n                generated_name\n                if generated_name\n                else f\"Build Session {str(session_id)[:8]}\"\n            )\n        except Exception as e:\n            logger.warning(f\"Failed to generate session name with LLM: {e}\")\n            # Fallback to simple truncation\n            return user_message[:40].strip() + (\"...\" if len(user_message) > 40 else \"\")\n\n    def generate_followup_suggestions(\n        self,\n        user_message: str,\n        assistant_message: str,\n    ) -> list[dict[str, str]]:\n        \"\"\"\n        Generate follow-up suggestions based on the first exchange.\n\n        Args:\n            user_message: The first user message content\n            assistant_message: The first assistant response (text only, no tool calls)\n\n        Returns:\n            List of suggestion dicts with \"theme\" and \"text\" keys, or empty list on failure\n        \"\"\"\n        if not user_message or not assistant_message:\n            return []\n\n        try:\n            llm = get_default_llm()\n            prompt_messages: LanguageModelInput = [\n                SystemMessage(content=FOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT),\n                UserMessage(\n                    content=FOLLOWUP_SUGGESTIONS_USER_PROMPT.format(\n                        user_message=user_message[:1000],  # Limit input size\n                        assistant_message=assistant_message[:2000],\n                    )\n                ),\n            ]\n            # Call LLM with Braintrust tracing\n            with ensure_trace(\"build_followup_suggestions\"):\n                with llm_generation_span(\n                    llm=llm,\n                    flow=\"build_followup_suggestions\",\n                    input_messages=prompt_messages,\n                ) as span_generation:\n                    response = llm.invoke(\n                        prompt_messages,\n                        reasoning_effort=ReasoningEffort.OFF,\n                        max_tokens=500,\n                    )\n                    record_llm_response(span_generation, response)\n                    raw_output = llm_response_to_string(response).strip()\n\n            return self._parse_suggestions(raw_output)\n        except Exception as e:\n            logger.warning(f\"Failed to generate follow-up suggestions with LLM: {e}\")\n            return []\n\n    def _parse_suggestions(self, raw_output: str) -> list[dict[str, str]]:\n        \"\"\"\n        Parse suggestions from LLM output with multiple fallback strategies.\n\n        Args:\n            raw_output: Raw LLM response string\n\n        Returns:\n            List of suggestion dicts or empty list on parse failure\n        \"\"\"\n        import re\n\n        # Strategy 1: Try direct JSON parse\n        try:\n            # Strip common LLM artifacts (code fences, etc.)\n            cleaned = raw_output.strip()\n            if cleaned.startswith(\"```\"):\n                # Extract content between code fences\n                parts = cleaned.split(\"```\")\n                if len(parts) >= 2:\n                    cleaned = parts[1]\n                    if cleaned.startswith(\"json\"):\n                        cleaned = cleaned[4:]\n                    cleaned = cleaned.strip()\n\n            data = json.loads(cleaned)\n            if isinstance(data, list) and len(data) >= 2:\n                suggestions = []\n                for item in data[:2]:\n                    if isinstance(item, dict) and \"theme\" in item and \"text\" in item:\n                        theme = item[\"theme\"].lower()\n                        if theme in (\"add\", \"question\"):\n                            text = str(item[\"text\"])[:150]  # Truncate to max length\n                            suggestions.append({\"theme\": theme, \"text\": text})\n                if len(suggestions) == 2:\n                    return suggestions\n        except (json.JSONDecodeError, KeyError, TypeError):\n            pass\n\n        # Strategy 2: Regex extraction for common patterns\n        # Handles: \"theme\": \"add\", \"text\": \"...\" patterns\n        suggestions = []\n        for theme in [\"add\", \"question\"]:\n            # Match \"theme\": \"add\" followed by \"text\": \"...\"\n            pattern = rf'\"theme\"\\s*:\\s*\"{theme}\"[^}}]*\"text\"\\s*:\\s*\"([^\"]+)\"'\n            match = re.search(pattern, raw_output, re.IGNORECASE | re.DOTALL)\n            if match:\n                text = match.group(1)[:150]\n                suggestions.append({\"theme\": theme, \"text\": text})\n\n        if len(suggestions) == 2:\n            return suggestions\n\n        # Strategy 3: Alternative pattern - theme and text in any order\n        suggestions = []\n        for theme in [\"add\", \"question\"]:\n            pattern = rf'\"text\"\\s*:\\s*\"([^\"]+)\"[^}}]*\"theme\"\\s*:\\s*\"{theme}\"'\n            match = re.search(pattern, raw_output, re.IGNORECASE | re.DOTALL)\n            if match:\n                text = match.group(1)[:150]\n                suggestions.append({\"theme\": theme, \"text\": text})\n\n        if len(suggestions) == 2:\n            return suggestions\n\n        # Silent fail - return empty list\n        logger.warning(\n            f\"Failed to parse suggestions from LLM output: {raw_output[:200]}\"\n        )\n        return []\n\n    def delete_session(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> bool:\n        \"\"\"\n        Delete a build session and all associated data.\n\n        Cleans up session workspace but does NOT terminate the sandbox\n        (sandbox is user-owned and shared across sessions).\n\n        NOTE: This method does NOT commit the transaction. The caller is\n        responsible for committing after this method returns successfully.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID\n\n        Returns:\n            True if deleted, False if not found\n        \"\"\"\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return False\n\n        # Get user's sandbox to clean up session workspace\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox and sandbox.status.is_active():\n            # Clean up session workspace (but don't terminate sandbox)\n            try:\n                self._sandbox_manager.cleanup_session_workspace(\n                    sandbox_id=sandbox.id,\n                    session_id=session_id,\n                    nextjs_port=session.nextjs_port,\n                )\n                logger.info(\n                    f\"Cleaned up session workspace {session_id} in sandbox {sandbox.id}\"\n                )\n            except Exception as e:\n                # Log but don't fail - session can still be deleted even if\n                # workspace cleanup fails (e.g., if pod is already terminated)\n                logger.warning(f\"Failed to cleanup session workspace {session_id}: {e}\")\n\n        # Delete snapshot files from S3 before removing DB records\n        snapshots = get_snapshots_for_session(self._db_session, session_id)\n        if snapshots:\n            from onyx.file_store.file_store import get_default_file_store\n            from onyx.server.features.build.sandbox.manager.snapshot_manager import (\n                SnapshotManager,\n            )\n\n            snapshot_manager = SnapshotManager(get_default_file_store())\n            for snapshot in snapshots:\n                try:\n                    snapshot_manager.delete_snapshot(snapshot.storage_path)\n                except Exception as e:\n                    logger.warning(\n                        f\"Failed to delete snapshot file {snapshot.storage_path}: {e}\"\n                    )\n\n        # Delete session (uses flush, caller commits)\n        return delete_build_session__no_commit(session_id, user_id, self._db_session)\n\n    # =========================================================================\n    # Message Operations\n    # =========================================================================\n\n    def list_messages(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> list[BuildMessage] | None:\n        \"\"\"\n        Get all messages for a session.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID\n\n        Returns:\n            List of BuildMessage models or None if session not found\n        \"\"\"\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n        return get_session_messages(session_id, self._db_session)\n\n    def send_message(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        content: str,\n    ) -> Generator[str, None, None]:\n        \"\"\"\n        Send a message to the CLI agent and stream the response as SSE events.\n\n        Validates session, saves user message, streams agent response,\n        and saves assistant response to database.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID\n            content: The message content\n\n        Yields:\n            SSE formatted event strings\n        \"\"\"\n        yield from self._stream_cli_agent_response(session_id, content, user_id)\n\n    def _stream_cli_agent_response(\n        self,\n        session_id: UUID,\n        user_message_content: str,\n        user_id: UUID,\n    ) -> Generator[str, None, None]:\n        \"\"\"\n        Stream the CLI agent's response using SSE format.\n\n        Executes the agent via SandboxManager and streams events back to the client.\n        Uses BuildStreamingState to accumulate chunks and track tool calls.\n        At the end of streaming, saves accumulated state to the database.\n\n        Storage behavior:\n        - User message: Saved immediately at start\n        - agent_message_chunk: Accumulated, saved as one synthetic packet at end/type change\n        - agent_thought_chunk: Accumulated, saved as one synthetic packet at end/type change\n        - tool_call_start: Streamed to frontend only, not saved\n        - tool_call_progress: Only saved when status=\"completed\"\n        - agent_plan_update: Upserted (only latest plan kept per turn)\n        \"\"\"\n\n        def _serialize_acp_event(event: Any, event_type: str) -> str:\n            \"\"\"Serialize an ACP event to SSE format, preserving ALL ACP data.\"\"\"\n            if hasattr(event, \"model_dump\"):\n                data = event.model_dump(mode=\"json\", by_alias=True, exclude_none=False)\n            else:\n                data = {\"raw\": str(event)}\n\n            data[\"type\"] = event_type\n            data[\"timestamp\"] = datetime.now(tz=timezone.utc).isoformat()\n\n            return f\"event: message\\ndata: {json.dumps(data)}\\n\\n\"\n\n        def _format_packet_event(packet: BuildPacket) -> str:\n            \"\"\"Format a BuildPacket as SSE.\"\"\"\n            return f\"event: message\\ndata: {packet.model_dump_json(by_alias=True)}\\n\\n\"\n\n        def _extract_text_from_content(content: Any) -> str:\n            \"\"\"Extract text from ACP content structure.\"\"\"\n            if content is None:\n                return \"\"\n            if hasattr(content, \"type\") and content.type == \"text\":\n                return getattr(content, \"text\", \"\") or \"\"\n            if isinstance(content, list):\n                texts = []\n                for block in content:\n                    if hasattr(block, \"type\") and block.type == \"text\":\n                        texts.append(getattr(block, \"text\", \"\") or \"\")\n                return \"\".join(texts)\n            return \"\"\n\n        def _save_pending_chunks(state: BuildStreamingState) -> None:\n            \"\"\"Save any pending accumulated chunks to the database.\"\"\"\n            # Finalize message chunks\n            message_packet = state.finalize_message_chunks()\n            if message_packet:\n                create_message(\n                    session_id=session_id,\n                    message_type=MessageType.ASSISTANT,\n                    turn_index=state.turn_index,\n                    message_metadata=message_packet,\n                    db_session=self._db_session,\n                )\n\n            # Finalize thought chunks\n            thought_packet = state.finalize_thought_chunks()\n            if thought_packet:\n                create_message(\n                    session_id=session_id,\n                    message_type=MessageType.ASSISTANT,\n                    turn_index=state.turn_index,\n                    message_metadata=thought_packet,\n                    db_session=self._db_session,\n                )\n\n            state.clear_last_chunk_type()\n\n        def _save_build_turn(state: BuildStreamingState) -> None:\n            \"\"\"Save all accumulated state at the end of streaming.\n\n            Similar to save_chat_turn() in the main chat flow.\n            \"\"\"\n            # 1. Save any remaining accumulated chunks\n            _save_pending_chunks(state)\n\n        # Initialize packet logging\n        packet_logger = get_packet_logger()\n\n        # The log file auto-rotates to keep only the last N lines (default 5000).\n        # Add a prominent separator for visual identification of new message streams.\n        log_separator(\n            f\"NEW MESSAGE STREAM - Session: {str(session_id)[:8]} - User: {str(user_id)[:8]}\"\n        )\n        packet_logger.log_raw(\n            \"STREAM-START\",\n            {\n                \"session_id\": str(session_id),\n                \"user_id\": str(user_id),\n                \"message_preview\": user_message_content[:200]\n                + (\"...\" if len(user_message_content) > 200 else \"\"),\n            },\n        )\n\n        try:\n            # Verify session exists and belongs to user\n            session = get_build_session(session_id, user_id, self._db_session)\n            if session is None:\n                error_packet = ErrorPacket(message=\"Session not found\")\n                packet_logger.log(\"error\", error_packet.model_dump())\n                yield _format_packet_event(error_packet)\n                return\n\n            # Get the user's sandbox (now user-owned, not session-owned)\n            sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n\n            # Check if sandbox is running\n            if not sandbox or sandbox.status != SandboxStatus.RUNNING:\n                error_packet = ErrorPacket(\n                    message=\"Sandbox is not running. Please wait for it to start.\"\n                )\n                packet_logger.log(\"error\", error_packet.model_dump())\n                yield _format_packet_event(error_packet)\n                return\n\n            # Update last activity timestamp\n            update_session_activity(session_id, self._db_session)\n\n            # Calculate turn_index BEFORE saving user message\n            # turn_index = count of existing USER messages (this will be the Nth user message)\n\n            # Get count of user messages to determine turn index\n            existing_user_count = (\n                self._db_session.query(BuildMessage)\n                .filter(\n                    BuildMessage.session_id == session_id,\n                    BuildMessage.type == MessageType.USER,\n                )\n                .count()\n            )\n            turn_index = existing_user_count  # This user message is the Nth (0-indexed)\n\n            # Save user message to database\n            user_message_metadata = {\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": user_message_content},\n            }\n            create_message(\n                session_id=session_id,\n                message_type=MessageType.USER,\n                turn_index=turn_index,\n                message_metadata=user_message_metadata,\n                db_session=self._db_session,\n            )\n\n            # Initialize streaming state for this turn\n            state = BuildStreamingState(turn_index=turn_index)\n\n            # Get sandbox\n            sandbox = get_sandbox_by_session_id(self._db_session, session_id)\n            if sandbox is None:\n                error_packet = ErrorPacket(message=\"Sandbox not found\")\n                packet_logger.log(\"error\", error_packet.model_dump())\n                yield _format_packet_event(error_packet)\n                return\n\n            sandbox_id = sandbox.id\n            events_emitted = 0\n\n            packet_logger.log_raw(\n                \"STREAM-BEGIN-AGENT-LOOP\",\n                {\n                    \"session_id\": str(session_id),\n                    \"sandbox_id\": str(sandbox_id),\n                    \"turn_index\": turn_index,\n                },\n            )\n\n            # Stream ACP events directly to frontend\n            for acp_event in self._sandbox_manager.send_message(\n                sandbox_id, session_id, user_message_content\n            ):\n                # Handle SSE keepalive - send comment to keep connection alive\n                if isinstance(acp_event, SSEKeepalive):\n                    # SSE comments start with : and are ignored by EventSource\n                    # but keep the HTTP connection alive\n                    packet_logger.log_sse_emit(\"keepalive\", session_id)\n                    yield \": keepalive\\n\\n\"\n                    continue\n\n                # Check if we need to finalize pending chunks before processing\n                event_type = self._get_event_type(acp_event)\n                if state.should_finalize_chunks(event_type):\n                    _save_pending_chunks(state)\n\n                events_emitted += 1\n\n                # Pass through ACP events with snake_case type names\n                if isinstance(acp_event, AgentMessageChunk):\n                    text = _extract_text_from_content(acp_event.content)\n                    if text:\n                        state.add_message_chunk(text)\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"agent_message_chunk\"\n                    packet_logger.log(\"agent_message_chunk\", event_data)\n                    packet_logger.log_sse_emit(\"agent_message_chunk\", session_id)\n                    yield _serialize_acp_event(acp_event, \"agent_message_chunk\")\n\n                elif isinstance(acp_event, AgentThoughtChunk):\n                    text = _extract_text_from_content(acp_event.content)\n                    if text:\n                        state.add_thought_chunk(text)\n                    packet_logger.log(\n                        \"agent_thought_chunk\",\n                        acp_event.model_dump(mode=\"json\", by_alias=True),\n                    )\n                    packet_logger.log_sse_emit(\"agent_thought_chunk\", session_id)\n                    yield _serialize_acp_event(acp_event, \"agent_thought_chunk\")\n\n                elif isinstance(acp_event, ToolCallStart):\n                    # Stream to frontend but don't save - wait for completion\n                    packet_logger.log(\n                        \"tool_call_start\",\n                        acp_event.model_dump(mode=\"json\", by_alias=True),\n                    )\n                    packet_logger.log_sse_emit(\"tool_call_start\", session_id)\n                    yield _serialize_acp_event(acp_event, \"tool_call_start\")\n\n                elif isinstance(acp_event, ToolCallProgress):\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"tool_call_progress\"\n                    event_data[\"timestamp\"] = datetime.now(tz=timezone.utc).isoformat()\n\n                    # Check if this is a TodoWrite tool call\n                    tool_name = (event_data.get(\"title\") or \"\").lower()\n                    is_todo_write = tool_name in (\"todowrite\", \"todo_write\")\n\n                    # Check if this is a Task (subagent) tool call\n                    raw_input = event_data.get(\"rawInput\") or {}\n                    is_task_tool = (\n                        tool_name == \"task\"\n                        or raw_input.get(\"subagent_type\") is not None\n                        or raw_input.get(\"subagentType\") is not None\n                    )\n\n                    # Save to DB:\n                    # - For TodoWrite: Save every progress update (todos change frequently)\n                    # - For other tools: Only save when status=\"completed\"\n                    if is_todo_write or acp_event.status == \"completed\":\n                        create_message(\n                            session_id=session_id,\n                            message_type=MessageType.ASSISTANT,\n                            turn_index=state.turn_index,\n                            message_metadata=event_data,\n                            db_session=self._db_session,\n                        )\n\n                    # For completed Task tools, also save the output as an agent_message\n                    # This allows the task output to be rendered as assistant text on reload\n                    if is_task_tool and acp_event.status == \"completed\":\n                        raw_output = event_data.get(\"rawOutput\") or {}\n                        task_output = raw_output.get(\"output\")\n                        if task_output and isinstance(task_output, str):\n                            # Strip task_metadata from the output\n                            metadata_idx = task_output.find(\"<task_metadata>\")\n                            if metadata_idx >= 0:\n                                task_output = task_output[:metadata_idx].strip()\n\n                            if task_output:\n                                # Create agent_message packet for the task output\n                                task_output_packet = {\n                                    \"type\": \"agent_message\",\n                                    \"content\": {\"type\": \"text\", \"text\": task_output},\n                                    \"source\": \"task_output\",\n                                    \"timestamp\": datetime.now(\n                                        tz=timezone.utc\n                                    ).isoformat(),\n                                }\n                                create_message(\n                                    session_id=session_id,\n                                    message_type=MessageType.ASSISTANT,\n                                    turn_index=state.turn_index,\n                                    message_metadata=task_output_packet,\n                                    db_session=self._db_session,\n                                )\n\n                    # Log full event to packet logger (can handle large payloads)\n                    packet_logger.log(\"tool_call_progress\", event_data)\n                    packet_logger.log_sse_emit(\"tool_call_progress\", session_id)\n                    yield _serialize_acp_event(acp_event, \"tool_call_progress\")\n\n                elif isinstance(acp_event, AgentPlanUpdate):\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"agent_plan_update\"\n                    event_data[\"timestamp\"] = datetime.now(tz=timezone.utc).isoformat()\n\n                    # Upsert plan immediately\n                    plan_msg = upsert_agent_plan(\n                        session_id=session_id,\n                        turn_index=state.turn_index,\n                        plan_metadata=event_data,\n                        db_session=self._db_session,\n                        existing_plan_id=state.plan_message_id,\n                    )\n                    state.plan_message_id = plan_msg.id\n\n                    packet_logger.log(\"agent_plan_update\", event_data)\n                    packet_logger.log_sse_emit(\"agent_plan_update\", session_id)\n                    yield _serialize_acp_event(acp_event, \"agent_plan_update\")\n\n                elif isinstance(acp_event, CurrentModeUpdate):\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"current_mode_update\"\n                    packet_logger.log(\"current_mode_update\", event_data)\n                    packet_logger.log_sse_emit(\"current_mode_update\", session_id)\n                    yield _serialize_acp_event(acp_event, \"current_mode_update\")\n\n                elif isinstance(acp_event, PromptResponse):\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"prompt_response\"\n                    packet_logger.log(\"prompt_response\", event_data)\n                    packet_logger.log_sse_emit(\"prompt_response\", session_id)\n                    yield _serialize_acp_event(acp_event, \"prompt_response\")\n\n                elif isinstance(acp_event, ACPError):\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = \"error\"\n                    packet_logger.log(\"error\", event_data)\n                    packet_logger.log_sse_emit(\"error\", session_id)\n                    yield _serialize_acp_event(acp_event, \"error\")\n\n                else:\n                    # Unrecognized packet type - log it but don't stream to frontend\n                    event_type_name = type(acp_event).__name__\n                    event_data = acp_event.model_dump(\n                        mode=\"json\", by_alias=True, exclude_none=False\n                    )\n                    event_data[\"type\"] = f\"unrecognized_{event_type_name.lower()}\"\n                    packet_logger.log(\n                        f\"unrecognized_{event_type_name.lower()}\", event_data\n                    )\n\n            # Save all accumulated state at end of streaming\n            _save_build_turn(state)\n\n            # Log streaming completion\n            packet_logger.log_raw(\n                \"STREAM-COMPLETE\",\n                {\n                    \"session_id\": str(session_id),\n                    \"sandbox_id\": str(sandbox_id),\n                    \"turn_index\": turn_index,\n                    \"events_emitted\": events_emitted,\n                    \"message_chunks_accumulated\": len(state.message_chunks),\n                    \"thought_chunks_accumulated\": len(state.thought_chunks),\n                },\n            )\n\n            # Update heartbeat after successful message exchange\n            update_sandbox_heartbeat(self._db_session, sandbox_id)\n\n        except ValueError as e:\n            error_packet = ErrorPacket(message=str(e))\n            packet_logger.log(\"error\", error_packet.model_dump())\n            packet_logger.log_raw(\n                \"STREAM-ERROR\",\n                {\n                    \"session_id\": str(session_id),\n                    \"error_type\": \"ValueError\",\n                    \"error\": str(e),\n                },\n            )\n            logger.exception(\"ValueError in build message streaming\")\n            yield _format_packet_event(error_packet)\n        except RuntimeError as e:\n            error_packet = ErrorPacket(message=str(e))\n            packet_logger.log(\"error\", error_packet.model_dump())\n            packet_logger.log_raw(\n                \"STREAM-ERROR\",\n                {\n                    \"session_id\": str(session_id),\n                    \"error_type\": \"RuntimeError\",\n                    \"error\": str(e),\n                },\n            )\n            logger.exception(f\"RuntimeError in build message streaming: {e}\")\n            yield _format_packet_event(error_packet)\n        except Exception as e:\n            error_packet = ErrorPacket(message=str(e))\n            packet_logger.log(\"error\", error_packet.model_dump())\n            packet_logger.log_raw(\n                \"STREAM-ERROR\",\n                {\n                    \"session_id\": str(session_id),\n                    \"error_type\": type(e).__name__,\n                    \"error\": str(e),\n                },\n            )\n            logger.exception(\"Unexpected error in build message streaming\")\n            yield _format_packet_event(error_packet)\n\n    def _get_event_type(self, acp_event: Any) -> str:\n        \"\"\"Get the event type string for an ACP event.\"\"\"\n        if isinstance(acp_event, AgentMessageChunk):\n            return \"agent_message_chunk\"\n        elif isinstance(acp_event, AgentThoughtChunk):\n            return \"agent_thought_chunk\"\n        elif isinstance(acp_event, ToolCallStart):\n            return \"tool_call_start\"\n        elif isinstance(acp_event, ToolCallProgress):\n            return \"tool_call_progress\"\n        elif isinstance(acp_event, AgentPlanUpdate):\n            return \"agent_plan_update\"\n        elif isinstance(acp_event, CurrentModeUpdate):\n            return \"current_mode_update\"\n        elif isinstance(acp_event, PromptResponse):\n            return \"prompt_response\"\n        elif isinstance(acp_event, ACPError):\n            return \"error\"\n        return \"unknown\"\n\n    # =========================================================================\n    # Artifact Operations\n    # =========================================================================\n\n    def list_artifacts(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> list[dict[str, Any]] | None:\n        \"\"\"\n        List artifacts generated in a session.\n\n        Returns artifacts in the format expected by the frontend (matching ArtifactResponse).\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n\n        Returns:\n            List of artifact dicts or None if session not found or user doesn't own session\n        \"\"\"\n        import uuid\n\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        artifacts: list[dict[str, Any]] = []\n        now = datetime.now(timezone.utc)\n\n        # Check for outputs directory using sandbox manager\n        try:\n            output_entries = self._sandbox_manager.list_directory(\n                sandbox_id=sandbox.id,\n                session_id=session_id,\n                path=\"outputs\",\n            )\n        except ValueError:\n            # Directory doesn't exist\n            return artifacts\n\n        # Check for webapp (web directory in outputs)\n        has_webapp = any(\n            entry.is_directory and entry.name == \"web\" for entry in output_entries\n        )\n\n        if has_webapp:\n            artifacts.append(\n                {\n                    \"id\": str(uuid.uuid4()),\n                    \"session_id\": str(session_id),\n                    \"type\": \"web_app\",  # Use web_app to match streaming packet type\n                    \"name\": \"Web Application\",\n                    \"path\": \"outputs/web\",\n                    \"preview_url\": None,  # Preview is via webapp URL, not artifact preview\n                    \"created_at\": now.isoformat(),\n                    \"updated_at\": now.isoformat(),\n                }\n            )\n\n        return artifacts\n\n    def download_artifact(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> tuple[bytes, str, str] | None:\n        \"\"\"\n        Download a specific artifact file.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path to the artifact (within session workspace)\n\n        Returns:\n            Tuple of (content, mime_type, filename) or None if not found\n\n        Raises:\n            ValueError: If path traversal attempted or path is a directory\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        # Extract filename from path\n        filename = Path(path).name\n\n        # Filter out opencode.json files\n        if filename == \"opencode.json\":\n            return None\n\n        # Use sandbox manager to read file (works for both local and K8s)\n        try:\n            content = self._sandbox_manager.read_file(\n                sandbox_id=sandbox.id,\n                session_id=session_id,\n                path=path,\n            )\n        except ValueError as e:\n            # read_file raises ValueError for not found or directory\n            if \"Not a file\" in str(e):\n                raise ValueError(\"Cannot download directory\")\n            return None\n\n        mime_type, _ = mimetypes.guess_type(filename)\n\n        return (content, mime_type or \"application/octet-stream\", filename)\n\n    def export_docx(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> tuple[bytes, str] | None:\n        \"\"\"\n        Export a markdown file as DOCX.\n\n        Reads the markdown file and converts it to DOCX using pypandoc.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path to the markdown file\n\n        Returns:\n            Tuple of (docx_bytes, filename) or None if not found\n\n        Raises:\n            ValueError: If path traversal attempted, file is not markdown, etc.\n        \"\"\"\n        result = self.download_artifact(session_id, user_id, path)\n        if result is None:\n            return None\n\n        content_bytes, _mime_type, filename = result\n\n        if not filename.lower().endswith(\".md\"):\n            raise ValueError(\"Only markdown (.md) files can be exported as DOCX\")\n\n        import tempfile\n        import pypandoc  # type: ignore\n\n        md_text = content_bytes.decode(\"utf-8\")\n\n        with tempfile.NamedTemporaryFile(suffix=\".docx\", delete=True) as tmp:\n            pypandoc.convert_text(md_text, \"docx\", format=\"md\", outputfile=tmp.name)\n            docx_bytes = tmp.read()\n\n        docx_filename = filename.rsplit(\".\", 1)[0] + \".docx\"\n        return (docx_bytes, docx_filename)\n\n    def get_pptx_preview(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> dict[str, Any] | None:\n        \"\"\"\n        Generate slide image previews for a PPTX file.\n\n        Converts the PPTX to individual JPEG slide images using\n        soffice + pdftoppm, with caching to avoid re-conversion.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path to the PPTX file within session workspace\n\n        Returns:\n            Dict with slide_count, slide_paths, and cached flag,\n            or None if session not found.\n\n        Raises:\n            ValueError: If path is invalid or conversion fails\n        \"\"\"\n        import hashlib\n\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        # Validate file extension\n        if not path.lower().endswith(\".pptx\"):\n            raise ValueError(\"Only .pptx files are supported for preview\")\n\n        # Compute cache directory from path hash\n        path_hash = hashlib.sha256(path.encode()).hexdigest()[:12]\n        cache_dir = f\"outputs/.pptx-preview/{path_hash}\"\n\n        slide_paths, cached = self._sandbox_manager.generate_pptx_preview(\n            sandbox_id=sandbox.id,\n            session_id=session_id,\n            pptx_path=path,\n            cache_dir=cache_dir,\n        )\n\n        return {\n            \"slide_count\": len(slide_paths),\n            \"slide_paths\": slide_paths,\n            \"cached\": cached,\n        }\n\n    def get_webapp_info(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> dict[str, Any] | None:\n        \"\"\"\n        Get webapp information for a session.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n\n        Returns:\n            Dict with has_webapp, webapp_url, status, and ready,\n            or None if session not found\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return {\n                \"has_webapp\": False,\n                \"webapp_url\": None,\n                \"status\": \"no_sandbox\",\n                \"ready\": False,\n                \"sharing_scope\": session.sharing_scope,\n            }\n\n        # Return the proxy URL - the proxy handles routing to the correct sandbox\n        # for both local and Kubernetes environments\n        webapp_url = None\n        ready = False\n        if session.nextjs_port:\n            webapp_url = f\"{WEB_DOMAIN}/api/build/sessions/{session_id}/webapp\"\n\n            # Quick health check: can the API server reach the NextJS dev server?\n            ready = self._check_nextjs_ready(sandbox.id, session.nextjs_port)\n\n            # If not ready, ask the sandbox manager to ensure Next.js is running.\n            # For the local backend this triggers a background restart so that the\n            # frontend poll loop eventually sees ready=True without the user having\n            # to manually recreate the session.\n            if not ready:\n                self._sandbox_manager.ensure_nextjs_running(\n                    sandbox.id, session_id, session.nextjs_port\n                )\n\n        return {\n            \"has_webapp\": session.nextjs_port is not None,\n            \"webapp_url\": webapp_url,\n            \"status\": sandbox.status.value,\n            \"ready\": ready,\n            \"sharing_scope\": session.sharing_scope,\n        }\n\n    def _check_nextjs_ready(self, sandbox_id: UUID, port: int) -> bool:\n        \"\"\"Check if the NextJS dev server is responding.\n\n        Does a quick HTTP GET to the sandbox's internal URL with a short timeout.\n        Returns True if the server responds with any status code, False on timeout\n        or connection error.\n        \"\"\"\n        import httpx\n\n        from onyx.server.features.build.sandbox.base import get_sandbox_manager\n\n        try:\n            sandbox_manager = get_sandbox_manager()\n            internal_url = sandbox_manager.get_webapp_url(sandbox_id, port)\n            with httpx.Client(timeout=2.0) as client:\n                resp = client.get(internal_url)\n                # Any response (even 500) means the server is up\n                return resp.status_code < 500\n        except (httpx.TimeoutException, httpx.ConnectError, Exception):\n            return False\n\n    def download_webapp_zip(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> tuple[bytes, str] | None:\n        \"\"\"\n        Create a zip file of the webapp directory.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n\n        Returns:\n            Tuple of (zip_bytes, filename) or None if session/webapp not found\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        # Check if web directory exists using sandbox manager\n        try:\n            self._sandbox_manager.list_directory(\n                sandbox_id=sandbox.id,\n                session_id=session_id,\n                path=\"outputs/web\",\n            )\n        except ValueError:\n            # Directory doesn't exist\n            return None\n\n        # Recursively collect all files in the web directory\n        def collect_files(dir_path: str) -> list[tuple[str, str]]:\n            \"\"\"Collect all files recursively, returning (full_path, relative_path) tuples.\"\"\"\n            files: list[tuple[str, str]] = []\n            try:\n                entries = self._sandbox_manager.list_directory(\n                    sandbox_id=sandbox.id,\n                    session_id=session_id,\n                    path=dir_path,\n                )\n                for entry in entries:\n                    if entry.is_directory:\n                        # Recursively collect files from subdirectory\n                        files.extend(collect_files(entry.path))\n                    else:\n                        # entry.path is relative to session root (e.g., \"outputs/web/file.txt\")\n                        # arcname should be relative to web dir (e.g., \"file.txt\")\n                        arcname = entry.path.replace(\"outputs/web/\", \"\", 1)\n                        files.append((entry.path, arcname))\n            except ValueError:\n                pass  # Directory doesn't exist, skip\n            return files\n\n        file_list = collect_files(\"outputs/web\")\n\n        # Create zip file in memory\n        zip_buffer = io.BytesIO()\n        with zipfile.ZipFile(zip_buffer, \"w\", zipfile.ZIP_DEFLATED) as zip_file:\n            for full_path, arcname in file_list:\n                try:\n                    content = self._sandbox_manager.read_file(\n                        sandbox_id=sandbox.id,\n                        session_id=session_id,\n                        path=full_path,\n                    )\n                    zip_file.writestr(arcname, content)\n                except ValueError:\n                    # Skip files that can't be read\n                    pass\n\n        zip_buffer.seek(0)\n\n        # Create filename with session name or ID\n        session_name = session.name or f\"session-{str(session_id)[:8]}\"\n        # Sanitize filename\n        safe_name = \"\".join(\n            c if c.isalnum() or c in (\"-\", \"_\") else \"_\" for c in session_name\n        )\n        filename = f\"{safe_name}-webapp.zip\"\n\n        return zip_buffer.getvalue(), filename\n\n    def download_directory(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> tuple[bytes, str] | None:\n        \"\"\"\n        Create a zip file of an arbitrary directory in the session workspace.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path to the directory (within session workspace)\n\n        Returns:\n            Tuple of (zip_bytes, filename) or None if session not found\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a directory\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        # Check if directory exists\n        try:\n            self._sandbox_manager.list_directory(\n                sandbox_id=sandbox.id,\n                session_id=session_id,\n                path=path,\n            )\n        except ValueError:\n            return None\n\n        # Recursively collect all files\n        def collect_files(dir_path: str) -> list[tuple[str, str]]:\n            \"\"\"Collect all files recursively, returning (full_path, arcname) tuples.\"\"\"\n            files: list[tuple[str, str]] = []\n            try:\n                entries = self._sandbox_manager.list_directory(\n                    sandbox_id=sandbox.id,\n                    session_id=session_id,\n                    path=dir_path,\n                )\n                for entry in entries:\n                    if entry.is_directory:\n                        files.extend(collect_files(entry.path))\n                    else:\n                        # arcname is relative to the target directory\n                        prefix_len = len(path) + 1  # +1 for trailing slash\n                        arcname = entry.path[prefix_len:]\n                        files.append((entry.path, arcname))\n            except ValueError:\n                pass\n            return files\n\n        file_list = collect_files(path)\n\n        # Create zip file in memory\n        zip_buffer = io.BytesIO()\n        with zipfile.ZipFile(zip_buffer, \"w\", zipfile.ZIP_DEFLATED) as zip_file:\n            for full_path, arcname in file_list:\n                try:\n                    content = self._sandbox_manager.read_file(\n                        sandbox_id=sandbox.id,\n                        session_id=session_id,\n                        path=full_path,\n                    )\n                    zip_file.writestr(arcname, content)\n                except ValueError:\n                    pass\n\n        zip_buffer.seek(0)\n\n        # Use the directory name for the zip filename\n        dir_name = Path(path).name\n        safe_name = \"\".join(\n            c if c.isalnum() or c in (\"-\", \"_\", \".\") else \"_\" for c in dir_name\n        )\n        filename = f\"{safe_name}.zip\"\n\n        return zip_buffer.getvalue(), filename\n\n    # =========================================================================\n    # File System Operations\n    # =========================================================================\n\n    def list_directory(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> DirectoryListing | None:\n        \"\"\"\n        List files and directories in the session workspace.\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path from session workspace root (empty string for root)\n\n        Returns:\n            DirectoryListing with sorted entries (directories first) or None if not found\n\n        Raises:\n            ValueError: If path traversal attempted or path is not a directory\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            return None\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return None\n\n        # Use sandbox manager to list directory (works for both local and K8s)\n        # If the directory doesn't exist (e.g., session workspace not yet loaded),\n        # return an empty listing rather than erroring out.\n        try:\n            raw_entries = self._sandbox_manager.list_directory(\n                sandbox_id=sandbox.id,\n                session_id=session_id,\n                path=path,\n            )\n        except ValueError as e:\n            if \"path traversal\" in str(e).lower():\n                raise\n            return DirectoryListing(path=path, entries=[])\n\n        # Filter hidden files and directories\n        entries: list[FileSystemEntry] = [\n            entry\n            for entry in raw_entries\n            if entry.name not in HIDDEN_PATTERNS and not entry.name.startswith(\".\")\n        ]\n\n        # Sort: directories first, then files, both alphabetically\n        entries.sort(key=lambda e: (not e.is_directory, e.name.lower()))\n\n        return DirectoryListing(path=path, entries=entries)\n\n    def get_upload_stats(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n    ) -> tuple[int, int]:\n        \"\"\"Get current file count and total size for a session's uploads.\n\n        Delegates to SandboxManager for the actual filesystem query (supports both\n        local filesystem and Kubernetes pods).\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n\n        Returns:\n            Tuple of (file_count, total_size_bytes)\n\n        Raises:\n            ValueError: If session not found\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            raise ValueError(\"Session not found\")\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            raise ValueError(\"Sandbox not found\")\n\n        # Delegate to sandbox manager (handles both local and K8s)\n        return self._sandbox_manager.get_upload_stats(\n            sandbox_id=sandbox.id,\n            session_id=session_id,\n        )\n\n    def upload_file(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        filename: str,\n        content: bytes,\n    ) -> tuple[str, int]:\n        \"\"\"Upload a file to the session's workspace.\n\n        Delegates to SandboxManager for the actual file write (supports both\n        local filesystem and Kubernetes pods).\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            filename: Sanitized filename (validation done at API layer)\n            content: File content as bytes\n\n        Returns:\n            Tuple of (relative_path, size_bytes) where the file was saved\n\n        Raises:\n            ValueError: If session not found or upload limits exceeded\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            raise ValueError(\"Session not found\")\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            raise ValueError(\"Sandbox not found\")\n\n        # Check upload limits\n        file_count, total_size = self.get_upload_stats(session_id, user_id)\n\n        if file_count >= MAX_UPLOAD_FILES_PER_SESSION:\n            raise UploadLimitExceededError(\n                f\"Maximum number of files ({MAX_UPLOAD_FILES_PER_SESSION}) reached\"\n            )\n\n        if total_size + len(content) > MAX_TOTAL_UPLOAD_SIZE_BYTES:\n            max_mb = MAX_TOTAL_UPLOAD_SIZE_BYTES // (1024 * 1024)\n            raise UploadLimitExceededError(\n                f\"Total upload size limit ({max_mb}MB) exceeded\"\n            )\n\n        # Delegate to sandbox manager (handles both local and K8s)\n        relative_path = self._sandbox_manager.upload_file(\n            sandbox_id=sandbox.id,\n            session_id=session_id,\n            filename=filename,\n            content=content,\n        )\n\n        # Update heartbeat - file upload is user activity that keeps sandbox alive\n        update_sandbox_heartbeat(self._db_session, sandbox.id)\n\n        return relative_path, len(content)\n\n    def delete_file(\n        self,\n        session_id: UUID,\n        user_id: UUID,\n        path: str,\n    ) -> bool:\n        \"\"\"Delete a file from the session's workspace.\n\n        Delegates to SandboxManager for the actual file delete (supports both\n        local filesystem and Kubernetes pods).\n\n        Args:\n            session_id: The session UUID\n            user_id: The user ID to verify ownership\n            path: Relative path to the file (e.g., \"attachments/doc.pdf\")\n\n        Returns:\n            True if file was deleted, False if not found\n\n        Raises:\n            ValueError: If session not found or path traversal attempted\n        \"\"\"\n        # Verify session ownership\n        session = get_build_session(session_id, user_id, self._db_session)\n        if session is None:\n            raise ValueError(\"Session not found\")\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            raise ValueError(\"Sandbox not found\")\n\n        # Delegate to sandbox manager (handles both local and K8s)\n        deleted = self._sandbox_manager.delete_file(\n            sandbox_id=sandbox.id,\n            session_id=session_id,\n            path=path,\n        )\n\n        if deleted:\n            # SandboxManager already logs the deletion details\n            # Update heartbeat - file deletion is user activity that keeps sandbox alive\n            update_sandbox_heartbeat(self._db_session, sandbox.id)\n\n        return deleted\n\n    # =========================================================================\n    # Sandbox Management Operations\n    # =========================================================================\n\n    def terminate_user_sandbox(self, user_id: UUID) -> bool:\n        \"\"\"Terminate the user's sandbox and clean up all session workspaces.\n\n        Used for explicit \"start fresh\" functionality.\n\n        Args:\n            user_id: The user ID\n\n        Returns:\n            True if sandbox was terminated, False if user had no sandbox\n        \"\"\"\n        from onyx.server.features.build.db.sandbox import (\n            update_sandbox_status__no_commit,\n        )\n\n        sandbox = get_sandbox_by_user_id(self._db_session, user_id)\n        if sandbox is None:\n            return False\n\n        if sandbox.status == SandboxStatus.TERMINATED:\n            logger.info(f\"Sandbox {sandbox.id} already terminated\")\n            return True\n\n        try:\n            # Terminate the sandbox (this cleans up all resources)\n            self._sandbox_manager.terminate(sandbox.id)\n            logger.info(f\"Terminated sandbox {sandbox.id} for user {user_id}\")\n\n            # Update status in database\n            update_sandbox_status__no_commit(\n                self._db_session, sandbox.id, SandboxStatus.TERMINATED\n            )\n            self._db_session.flush()\n\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to terminate sandbox {sandbox.id}: {e}\")\n            raise RuntimeError(f\"Failed to terminate sandbox: {e}\") from e\n"
  },
  {
    "path": "backend/onyx/server/features/build/session/prompts.py",
    "content": "\"\"\"Prompts used for build session operations.\"\"\"\n\n# Build session naming prompts (similar to chat naming)\nBUILD_NAMING_SYSTEM_PROMPT = \"\"\"\nGiven the user's build request, provide a SHORT name for the build session. \\\nFocus on the main task or goal the user wants to accomplish.\n\nIMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. \\\nNEVER USE MORE THAN 5 WORDS, LESS IS FINE.\n\"\"\".strip()\n\nBUILD_NAMING_USER_PROMPT = \"\"\"\nUser's request: {user_message}\n\nProvide a short name for this build session.\n\"\"\".strip()\n\n\n# Follow-up suggestion prompts\nFOLLOWUP_SUGGESTIONS_SYSTEM_PROMPT = \"\"\"You generate follow-up suggestions for an AI workplace assistant conversation.\n\nGiven the user's initial request and the assistant's response, generate exactly 2 suggestions:\n\n1. ADD: A suggestion to extend or enhance what was built.\nStart with \"Great! Now add...\" or similar positive acknowledgment + extension.\n\n2. QUESTION: A follow-up question the user might want to ask about the implementation or to explore further.\nStart with something like \"Can you explain...\" or \"How does...\".\n\nIMPORTANT:\n- Keep each suggestion SHORT (under 100 characters preferred, max 150)\n- Make them specific to the actual request and response\n- They should feel natural, like what a user might actually type\n- Output ONLY a JSON array with objects containing \"theme\" and \"text\" fields\n- Do NOT wrap in code fences or add any other text\n\nExample output:\n[{\"theme\": \"add\", \"text\": \"Great! Now add form validation for the email field\"},\n{\"theme\": \"question\", \"text\": \"Can you explain how the authentication flow works?\"}]\"\"\".strip()\n\nFOLLOWUP_SUGGESTIONS_USER_PROMPT = \"\"\"User's request:\n{user_message}\n\nAssistant's response:\n{assistant_message}\n\nGenerate 2 follow-up suggestions (add, question) as a JSON array:\"\"\".strip()\n"
  },
  {
    "path": "backend/onyx/server/features/build/utils.py",
    "content": "\"\"\"Utility functions for Build Mode feature announcements and file validation.\"\"\"\n\nimport re\nfrom pathlib import Path\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import NotificationType\nfrom onyx.db.models import User\nfrom onyx.db.notification import create_notification\nfrom onyx.feature_flags.factory import get_default_feature_flag_provider\nfrom onyx.feature_flags.interface import NoOpFeatureFlagProvider\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.server.features.build.configs import ENABLE_CRAFT\nfrom onyx.server.features.build.configs import MAX_UPLOAD_FILE_SIZE_BYTES\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# =============================================================================\n# File Upload Validation\n# =============================================================================\n\n# Additional extensions for code files (safe to read, not execute)\nCODE_FILE_EXTENSIONS: set[str] = {\n    \".py\",\n    \".js\",\n    \".ts\",\n    \".tsx\",\n    \".jsx\",\n    \".css\",\n    \".scss\",\n    \".less\",\n    \".java\",\n    \".go\",\n    \".rs\",\n    \".cpp\",\n    \".c\",\n    \".h\",\n    \".hpp\",\n    \".cs\",\n    \".rb\",\n    \".php\",\n    \".swift\",\n    \".kt\",\n    \".scala\",\n    \".sh\",\n    \".bash\",\n    \".zsh\",\n    \".env\",\n    \".ini\",\n    \".toml\",\n    \".cfg\",\n    \".properties\",\n}\n\n# Additional MIME types for code files\nCODE_MIME_TYPES: set[str] = {\n    \"text/x-python\",\n    \"text/x-java\",\n    \"text/x-c\",\n    \"text/x-c++\",\n    \"text/x-go\",\n    \"text/x-rust\",\n    \"text/x-shellscript\",\n    \"text/css\",\n    \"text/javascript\",\n    \"application/javascript\",\n    \"application/typescript\",\n    \"application/octet-stream\",  # Generic (for code files with unknown type)\n}\n\n# Combine base Onyx extensions with code file extensions\nALLOWED_EXTENSIONS: set[str] = (\n    OnyxFileExtensions.ALL_ALLOWED_EXTENSIONS | CODE_FILE_EXTENSIONS\n)\n\n# Combine base Onyx MIME types with code MIME types\nALLOWED_MIME_TYPES: set[str] = OnyxMimeTypes.ALLOWED_MIME_TYPES | CODE_MIME_TYPES\n\n# Blocked extensions (executable/dangerous files)\nBLOCKED_EXTENSIONS: set[str] = {\n    # Windows executables\n    \".exe\",\n    \".dll\",\n    \".msi\",\n    \".scr\",\n    \".com\",\n    \".bat\",\n    \".cmd\",\n    \".ps1\",\n    # macOS\n    \".app\",\n    \".dmg\",\n    \".pkg\",\n    # Linux\n    \".deb\",\n    \".rpm\",\n    \".so\",\n    # Cross-platform\n    \".jar\",\n    \".war\",\n    \".ear\",\n    # Other potentially dangerous\n    \".vbs\",\n    \".vbe\",\n    \".wsf\",\n    \".wsh\",\n    \".hta\",\n    \".cpl\",\n    \".reg\",\n    \".lnk\",\n    \".pif\",\n}\n\n# Regex for sanitizing filenames (allow alphanumeric, dash, underscore, period)\nSAFE_FILENAME_PATTERN = re.compile(r\"[^a-zA-Z0-9._-]\")\n\n\ndef validate_file_extension(filename: str) -> tuple[bool, str | None]:\n    \"\"\"Validate file extension against allowlist.\n\n    Args:\n        filename: The filename to validate\n\n    Returns:\n        Tuple of (is_valid, error_message)\n    \"\"\"\n    ext = Path(filename).suffix.lower()\n\n    if not ext:\n        return False, \"File must have an extension\"\n\n    if ext in BLOCKED_EXTENSIONS:\n        return False, f\"File type '{ext}' is not allowed for security reasons\"\n\n    if ext not in ALLOWED_EXTENSIONS:\n        return False, f\"File type '{ext}' is not supported\"\n\n    return True, None\n\n\ndef validate_mime_type(content_type: str | None) -> bool:\n    \"\"\"Validate MIME type against allowlist.\n\n    Args:\n        content_type: The Content-Type header value\n\n    Returns:\n        True if the MIME type is allowed, False otherwise\n    \"\"\"\n    if not content_type:\n        # Allow missing content type - we'll validate by extension\n        return True\n\n    # Extract base MIME type (ignore charset etc.)\n    mime_type = content_type.split(\";\")[0].strip().lower()\n\n    if mime_type not in ALLOWED_MIME_TYPES:\n        return False\n\n    return True\n\n\ndef validate_file_size(size: int) -> bool:\n    \"\"\"Validate file size against limit.\n\n    Args:\n        size: File size in bytes\n\n    Returns:\n        True if the file size is allowed, False otherwise\n    \"\"\"\n    if size <= 0:\n        return False\n\n    if size > MAX_UPLOAD_FILE_SIZE_BYTES:\n        return False\n\n    return True\n\n\ndef sanitize_filename(filename: str) -> str:\n    \"\"\"Sanitize filename to prevent path traversal and other issues.\n\n    Args:\n        filename: The original filename\n\n    Returns:\n        Sanitized filename safe for filesystem use\n    \"\"\"\n    # Remove any path components (prevent path traversal)\n    filename = Path(filename).name\n\n    # Remove null bytes\n    filename = filename.replace(\"\\x00\", \"\")\n\n    # Replace unsafe characters with underscore\n    filename = SAFE_FILENAME_PATTERN.sub(\"_\", filename)\n\n    # Remove leading/trailing dots and spaces\n    filename = filename.strip(\". \")\n\n    # Ensure filename is not empty\n    if not filename:\n        filename = \"unnamed_file\"\n\n    # Ensure filename doesn't start with a dot (hidden file)\n    if filename.startswith(\".\"):\n        filename = \"_\" + filename[1:]\n\n    # Limit length (preserve extension)\n    max_length = 255\n    if len(filename) > max_length:\n        stem = Path(filename).stem\n        ext = Path(filename).suffix\n        max_stem_length = max_length - len(ext)\n        filename = stem[:max_stem_length] + ext\n\n    return filename\n\n\ndef validate_file(\n    filename: str,\n    content_type: str | None,\n    size: int,\n) -> tuple[bool, str | None]:\n    \"\"\"Validate a file for upload.\n\n    Performs all validation checks:\n    - Extension validation\n    - MIME type validation\n    - Size validation\n\n    Args:\n        filename: The filename to validate\n        content_type: The Content-Type header value\n        size: File size in bytes\n\n    Returns:\n        Tuple of (is_valid, error_message). error_message is None if valid.\n    \"\"\"\n    # Validate extension\n    ext_valid, ext_error = validate_file_extension(filename)\n    if not ext_valid:\n        return False, ext_error\n\n    # Validate MIME type\n    if not validate_mime_type(content_type):\n        return False, f\"MIME type '{content_type}' is not supported\"\n\n    # Validate file size\n    if not validate_file_size(size):\n        return (\n            False,\n            f\"File size exceeds maximum allowed size of {MAX_UPLOAD_FILE_SIZE_BYTES} bytes\",\n        )\n\n    return True, None\n\n\n# =============================================================================\n# Build Mode Feature Announcements\n# =============================================================================\n\n# PostHog feature flag key for enabling Onyx Craft (cloud rollout control)\n# Flag logic: True = enabled, False/null/not found = disabled\nONYX_CRAFT_ENABLED_FLAG = \"onyx-craft-enabled\"\n\n# PostHog feature flag key for controlling whether a user has usage limits\n# Flag logic: True = user has usage limits (rate limits apply), False/null/not found = no limits (unlimited usage)\nCRAFT_HAS_USAGE_LIMITS = \"craft-has-usage-limits\"\n\n# Feature identifier in additional_data\nBUILD_MODE_FEATURE_ID = \"build_mode\"\n\n\ndef is_onyx_craft_enabled(user: User) -> bool:\n    \"\"\"\n    Check if Onyx Craft (Build Mode) is enabled for the user.\n\n    Flag logic for \"onyx-craft-enabled\":\n    - Flag = True → enabled (Onyx Craft is available)\n    - Flag = False → disabled (Onyx Craft is not available)\n    - Flag = null/not found → disabled (Onyx Craft is not available)\n\n    Only explicit True enables the feature.\n    \"\"\"\n    feature_flag_provider = get_default_feature_flag_provider()\n\n    # If no PostHog configured (NoOp provider), use ENABLE_CRAFT env var\n    if isinstance(feature_flag_provider, NoOpFeatureFlagProvider):\n        return ENABLE_CRAFT\n\n    # Use the feature flag provider\n    is_enabled = feature_flag_provider.feature_enabled(\n        ONYX_CRAFT_ENABLED_FLAG,\n        user.id,\n    )\n\n    if is_enabled:\n        logger.debug(\"Onyx Craft enabled via PostHog feature flag\")\n        return True\n    else:\n        logger.debug(\"Onyx Craft disabled via PostHog feature flag\")\n        return False\n\n\ndef ensure_build_mode_intro_notification(user: User, db_session: Session) -> None:\n    \"\"\"\n    Create Build Mode intro notification for user if enabled and not already exists.\n\n    Called from /api/notifications endpoint. Uses notification deduplication\n    to ensure each user only gets one notification.\n    \"\"\"\n    # PostHog feature flag check - only show notification if Onyx Craft is enabled\n    if not is_onyx_craft_enabled(user):\n        return\n\n    # Create notification (will be skipped if already exists due to deduplication)\n    create_notification(\n        user_id=user.id,\n        notif_type=NotificationType.FEATURE_ANNOUNCEMENT,\n        db_session=db_session,\n        title=\"Introducing Onyx Craft\",\n        description=\"Unleash Onyx to create dashboards, slides, documents, and more with your connected data.\",\n        additional_data={\"feature\": BUILD_MODE_FEATURE_ID},\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/default_assistant/api.py",
    "content": "\"\"\"API endpoints for default assistant configuration.\"\"\"\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_default_assistant\nfrom onyx.db.persona import update_default_assistant_configuration\nfrom onyx.prompts.chat_prompts import DEFAULT_SYSTEM_PROMPT\nfrom onyx.server.features.default_assistant.models import DefaultAssistantConfiguration\nfrom onyx.server.features.default_assistant.models import DefaultAssistantUpdateRequest\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/admin/default-assistant\")\n\n\n@router.get(\"/configuration\")\ndef get_default_assistant_configuration(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DefaultAssistantConfiguration:\n    \"\"\"Get the current default assistant configuration.\n\n    Returns:\n        DefaultAssistantConfiguration with current tool IDs and system prompt\n    \"\"\"\n    persona = get_default_assistant(db_session)\n    if not persona:\n        raise HTTPException(status_code=404, detail=\"Default assistant not found\")\n\n    # Extract DB tool IDs from the persona's tools\n    tool_ids = [tool.id for tool in persona.tools]\n\n    return DefaultAssistantConfiguration(\n        tool_ids=tool_ids,\n        system_prompt=persona.system_prompt,\n        default_system_prompt=DEFAULT_SYSTEM_PROMPT,\n    )\n\n\n@router.patch(\"\")\ndef update_default_assistant(\n    update_request: DefaultAssistantUpdateRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DefaultAssistantConfiguration:\n    \"\"\"Update the default assistant configuration.\n\n    Args:\n        update_request: Request with optional tool_ids and system_prompt\n\n    Returns:\n        Updated DefaultAssistantConfiguration\n\n    Raises:\n        400: If invalid tool IDs are provided\n        404: If default assistant not found\n    \"\"\"\n    # Validate tool IDs if provided\n    try:\n        # Check if system_prompt was explicitly provided in the request\n        # This allows distinguishing \"not provided\" from \"explicitly set to null\"\n        update_system_prompt = \"system_prompt\" in update_request.model_fields_set\n\n        # Update the default assistant\n        updated_persona = update_default_assistant_configuration(\n            db_session=db_session,\n            tool_ids=update_request.tool_ids,\n            system_prompt=update_request.system_prompt,\n            update_system_prompt=update_system_prompt,\n        )\n\n        # Return the updated configuration\n        tool_ids = [tool.id for tool in updated_persona.tools]\n        return DefaultAssistantConfiguration(\n            tool_ids=tool_ids,\n            system_prompt=updated_persona.system_prompt,\n            default_system_prompt=DEFAULT_SYSTEM_PROMPT,\n        )\n\n    except ValueError as e:\n        if \"Default assistant not found\" in str(e):\n            raise HTTPException(status_code=404, detail=str(e))\n        raise HTTPException(status_code=400, detail=str(e))\n"
  },
  {
    "path": "backend/onyx/server/features/default_assistant/models.py",
    "content": "\"\"\"Models for default assistant configuration API.\"\"\"\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass DefaultAssistantConfiguration(BaseModel):\n    \"\"\"Simplified view of default assistant configuration for admin UI.\"\"\"\n\n    tool_ids: list[int] = Field(\n        default_factory=list, description=\"List of enabled tool IDs\"\n    )\n    system_prompt: str | None = Field(\n        ...,\n        description=\"System prompt (instructions) for the assistant. None means use default.\",\n    )\n    default_system_prompt: str = Field(\n        ..., description=\"The default system prompt used when system_prompt is null.\"\n    )\n\n\nclass DefaultAssistantUpdateRequest(BaseModel):\n    \"\"\"Request model for updating default assistant configuration.\"\"\"\n\n    tool_ids: list[int] | None = Field(\n        default=None,\n        description=\"List of tool IDs to enable for the default assistant\",\n    )\n    system_prompt: str | None = Field(\n        default=None,\n        description=\"New system prompt (instructions). None resets to default, empty string is allowed.\",\n    )\n\n\n3\n"
  },
  {
    "path": "backend/onyx/server/features/document_set/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/document_set/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.document_set import check_document_sets_are_public\nfrom onyx.db.document_set import delete_document_set as db_delete_document_set\nfrom onyx.db.document_set import fetch_all_document_sets_for_user\nfrom onyx.db.document_set import get_document_set_by_id\nfrom onyx.db.document_set import insert_document_set\nfrom onyx.db.document_set import mark_document_set_as_to_be_deleted\nfrom onyx.db.document_set import update_document_set\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.server.features.document_set.models import CheckDocSetPublicRequest\nfrom onyx.server.features.document_set.models import CheckDocSetPublicResponse\nfrom onyx.server.features.document_set.models import DocumentSetCreationRequest\nfrom onyx.server.features.document_set.models import DocumentSetSummary\nfrom onyx.server.features.document_set.models import DocumentSetUpdateRequest\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\nrouter = APIRouter(prefix=\"/manage\")\n\n\n@router.post(\"/admin/document-set\")\ndef create_document_set(\n    document_set_creation_request: DocumentSetCreationRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str = Depends(get_current_tenant_id),\n) -> int:\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n    )(\n        db_session=db_session,\n        user=user,\n        target_group_ids=document_set_creation_request.groups,\n        object_is_public=document_set_creation_request.is_public,\n        object_is_new=True,\n    )\n    try:\n        document_set_db_model, _ = insert_document_set(\n            document_set_creation_request=document_set_creation_request,\n            user_id=user.id,\n            db_session=db_session,\n        )\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    if not DISABLE_VECTOR_DB:\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,\n            kwargs={\"tenant_id\": tenant_id},\n            priority=OnyxCeleryPriority.HIGH,\n        )\n\n    return document_set_db_model.id\n\n\n@router.patch(\"/admin/document-set\")\ndef patch_document_set(\n    document_set_update_request: DocumentSetUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str = Depends(get_current_tenant_id),\n) -> None:\n    document_set = get_document_set_by_id(db_session, document_set_update_request.id)\n    if document_set is None:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"Document set {document_set_update_request.id} does not exist\",\n        )\n\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n    )(\n        db_session=db_session,\n        user=user,\n        target_group_ids=document_set_update_request.groups,\n        object_is_public=document_set_update_request.is_public,\n        object_is_owned_by_user=user\n        and (document_set.user_id is None or document_set.user_id == user.id),\n    )\n    try:\n        update_document_set(\n            document_set_update_request=document_set_update_request,\n            db_session=db_session,\n            user=user,\n        )\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    if not DISABLE_VECTOR_DB:\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,\n            kwargs={\"tenant_id\": tenant_id},\n            priority=OnyxCeleryPriority.HIGH,\n        )\n\n\n@router.delete(\"/admin/document-set/{document_set_id}\")\ndef delete_document_set(\n    document_set_id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    tenant_id: str = Depends(get_current_tenant_id),\n) -> None:\n    document_set = get_document_set_by_id(db_session, document_set_id)\n    if document_set is None:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"Document set {document_set_id} does not exist\",\n        )\n\n    # check if the user has \"edit\" access to the document set.\n    # `validate_object_creation_for_user` is poorly named, but this\n    # is the right function to use here\n    fetch_ee_implementation_or_noop(\n        \"onyx.db.user_group\", \"validate_object_creation_for_user\", None\n    )(\n        db_session=db_session,\n        user=user,\n        object_is_public=document_set.is_public,\n        object_is_owned_by_user=user\n        and (document_set.user_id is None or document_set.user_id == user.id),\n    )\n\n    try:\n        mark_document_set_as_to_be_deleted(\n            db_session=db_session,\n            document_set_id=document_set_id,\n            user=user,\n        )\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    if DISABLE_VECTOR_DB:\n        db_session.refresh(document_set)\n        db_delete_document_set(document_set, db_session)\n    else:\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,\n            kwargs={\"tenant_id\": tenant_id},\n            priority=OnyxCeleryPriority.HIGH,\n        )\n\n\n\"\"\"Endpoints for non-admins\"\"\"\n\n\n@router.get(\"/document-set\")\ndef list_document_sets_for_user(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n    get_editable: bool = Query(\n        False, description=\"If true, return editable document sets\"\n    ),\n) -> list[DocumentSetSummary]:\n    document_sets = fetch_all_document_sets_for_user(\n        db_session=db_session, user=user, get_editable=get_editable\n    )\n    return [DocumentSetSummary.from_model(ds) for ds in document_sets]\n\n\n@router.get(\"/document-set-public\")\ndef document_set_public(\n    check_public_request: CheckDocSetPublicRequest,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CheckDocSetPublicResponse:\n    is_public = check_document_sets_are_public(\n        document_set_ids=check_public_request.document_set_ids, db_session=db_session\n    )\n    return CheckDocSetPublicResponse(is_public=is_public)\n"
  },
  {
    "path": "backend/onyx/server/features/document_set/models.py",
    "content": "from typing import Any\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.db.models import DocumentSet as DocumentSetDBModel\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.server.documents.models import CCPairSummary\nfrom onyx.server.documents.models import ConnectorCredentialPairDescriptor\nfrom onyx.server.documents.models import ConnectorSnapshot\nfrom onyx.server.documents.models import CredentialSnapshot\nfrom onyx.server.federated.models import FederatedConnectorSummary\n\n\nclass FederatedConnectorConfig(BaseModel):\n    \"\"\"Configuration for adding a federated connector to a document set\"\"\"\n\n    federated_connector_id: int\n    entities: dict[str, Any]\n\n\nclass FederatedConnectorDescriptor(BaseModel):\n    \"\"\"Descriptor for a federated connector in a document set\"\"\"\n\n    id: int\n    name: str\n    source: str\n    entities: dict[str, Any]\n\n    @classmethod\n    def from_federated_connector_mapping(\n        cls, fc_mapping: \"FederatedConnector__DocumentSet\"\n    ) -> \"FederatedConnectorDescriptor\":\n        \"\"\"Create a descriptor from a federated connector mapping\"\"\"\n        return cls(\n            id=fc_mapping.federated_connector_id,\n            name=(\n                f\"{fc_mapping.federated_connector.source.replace('_', ' ').title()}\"\n                if fc_mapping.federated_connector\n                else \"Unknown\"\n            ),\n            source=(\n                fc_mapping.federated_connector.source\n                if fc_mapping.federated_connector\n                else \"unknown\"\n            ),\n            entities=fc_mapping.entities,\n        )\n\n\nclass DocumentSetCreationRequest(BaseModel):\n    name: str\n    description: str\n    cc_pair_ids: list[int]\n    is_public: bool\n    # For Private Document Sets, who should be able to access these\n    users: list[UUID] = Field(default_factory=list)\n    groups: list[int] = Field(default_factory=list)\n    # Federated connectors to include in the document set\n    federated_connectors: list[FederatedConnectorConfig] = Field(default_factory=list)\n\n\nclass DocumentSetUpdateRequest(BaseModel):\n    id: int\n    description: str\n    cc_pair_ids: list[int]\n    is_public: bool\n    # For Private Document Sets, who should be able to access these\n    users: list[UUID]\n    groups: list[int]\n    # Federated connectors to include in the document set\n    federated_connectors: list[FederatedConnectorConfig] = Field(default_factory=list)\n\n\nclass CheckDocSetPublicRequest(BaseModel):\n    \"\"\"Note that this does not mean that the Document Set itself is to be viewable by everyone\n    Rather, this refers to the CC-Pairs in the Document Set, and if every CC-Pair is public\n    \"\"\"\n\n    document_set_ids: list[int]\n\n\nclass CheckDocSetPublicResponse(BaseModel):\n    is_public: bool\n\n\nclass DocumentSet(BaseModel):\n    id: int\n    name: str\n    description: str | None\n    cc_pair_descriptors: list[ConnectorCredentialPairDescriptor]\n    is_up_to_date: bool\n    is_public: bool\n    # For Private Document Sets, who should be able to access these\n    users: list[UUID]\n    groups: list[int]\n    # Federated connectors in the document set\n    federated_connectors: list[FederatedConnectorDescriptor] = Field(\n        default_factory=list\n    )\n\n    @classmethod\n    def from_model(cls, document_set_model: DocumentSetDBModel) -> \"DocumentSet\":\n        return cls(\n            id=document_set_model.id,\n            name=document_set_model.name,\n            description=document_set_model.description,\n            cc_pair_descriptors=[\n                ConnectorCredentialPairDescriptor(\n                    id=cc_pair.id,\n                    name=cc_pair.name,\n                    connector=ConnectorSnapshot.from_connector_db_model(\n                        cc_pair.connector,\n                        credential_ids=[cc_pair.credential_id],\n                    ),\n                    credential=CredentialSnapshot.from_credential_db_model(\n                        cc_pair.credential\n                    ),\n                    access_type=cc_pair.access_type,\n                )\n                for cc_pair in document_set_model.connector_credential_pairs\n            ],\n            is_up_to_date=document_set_model.is_up_to_date,\n            is_public=document_set_model.is_public,\n            users=[user.id for user in document_set_model.users],\n            groups=[group.id for group in document_set_model.groups],\n            federated_connectors=[\n                FederatedConnectorDescriptor.from_federated_connector_mapping(\n                    fc_mapping\n                )\n                for fc_mapping in document_set_model.federated_connectors\n            ],\n        )\n\n\nclass DocumentSetSummary(BaseModel):\n    \"\"\"Simplified document set model with minimal data for list views\"\"\"\n\n    id: int\n    name: str\n    description: str | None\n    cc_pair_summaries: list[CCPairSummary]\n    is_up_to_date: bool\n    is_public: bool\n    users: list[UUID]\n    groups: list[int]\n    federated_connector_summaries: list[FederatedConnectorSummary] = Field(\n        default_factory=list\n    )\n\n    @classmethod\n    def from_model(cls, document_set: DocumentSetDBModel) -> \"DocumentSetSummary\":\n        \"\"\"Create a summary from a DocumentSet database model\"\"\"\n        return cls(\n            id=document_set.id,\n            name=document_set.name,\n            description=document_set.description,\n            cc_pair_summaries=[\n                CCPairSummary(\n                    id=cc_pair.id,\n                    name=cc_pair.name,\n                    source=cc_pair.connector.source,\n                    access_type=cc_pair.access_type,\n                )\n                for cc_pair in document_set.connector_credential_pairs\n            ],\n            is_up_to_date=document_set.is_up_to_date,\n            is_public=document_set.is_public,\n            users=[user.id for user in document_set.users],\n            groups=[group.id for group in document_set.groups],\n            federated_connector_summaries=[\n                FederatedConnectorSummary(\n                    id=fc_mapping.federated_connector_id,\n                    name=f\"{fc_mapping.federated_connector.source.replace('_', ' ').title()}\",\n                    source=fc_mapping.federated_connector.source,\n                    entities=fc_mapping.entities,\n                )\n                for fc_mapping in document_set.federated_connectors\n                if fc_mapping.federated_connector is not None\n            ],\n        )\n"
  },
  {
    "path": "backend/onyx/server/features/hierarchy/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.hierarchy_access import get_user_external_group_ids\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.document import get_accessible_documents_for_hierarchy_node_paginated\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.hierarchy import get_accessible_hierarchy_nodes_for_source\nfrom onyx.db.models import User\nfrom onyx.db.opensearch_migration import get_opensearch_retrieval_state\nfrom onyx.server.features.hierarchy.constants import DOCUMENT_PAGE_SIZE\nfrom onyx.server.features.hierarchy.constants import HIERARCHY_NODE_DOCUMENTS_PATH\nfrom onyx.server.features.hierarchy.constants import HIERARCHY_NODES_LIST_PATH\nfrom onyx.server.features.hierarchy.constants import HIERARCHY_NODES_PREFIX\nfrom onyx.server.features.hierarchy.models import DocumentPageCursor\nfrom onyx.server.features.hierarchy.models import DocumentSortDirection\nfrom onyx.server.features.hierarchy.models import DocumentSortField\nfrom onyx.server.features.hierarchy.models import DocumentSummary\nfrom onyx.server.features.hierarchy.models import HierarchyNodeDocumentsRequest\nfrom onyx.server.features.hierarchy.models import HierarchyNodeDocumentsResponse\nfrom onyx.server.features.hierarchy.models import HierarchyNodesResponse\nfrom onyx.server.features.hierarchy.models import HierarchyNodeSummary\n\nOPENSEARCH_NOT_ENABLED_MESSAGE = \"Per-source knowledge selection is coming soon in v3.0! OpenSearch indexing must be enabled to use this feature.\"\n\nMIGRATION_STATUS_MESSAGE = (\n    \"Our records indicate that the transition to OpenSearch is still in progress. \"\n    \"OpenSearch retrieval is necessary to use this feature. \"\n    \"You can still use Document Sets, though! \"\n    \"If you would like to manually switch to OpenSearch, \"\n    'Go to the \"Document Index Migration\" section in the Admin panel.'\n)\n\nrouter = APIRouter(prefix=HIERARCHY_NODES_PREFIX)\n\n\ndef _require_opensearch(db_session: Session) -> None:\n    if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n        raise HTTPException(\n            status_code=403,\n            detail=OPENSEARCH_NOT_ENABLED_MESSAGE,\n        )\n    if not get_opensearch_retrieval_state(db_session):\n        raise HTTPException(\n            status_code=403,\n            detail=MIGRATION_STATUS_MESSAGE,\n        )\n\n\ndef _get_user_access_info(user: User, db_session: Session) -> tuple[str, list[str]]:\n    return user.email, get_user_external_group_ids(db_session, user)\n\n\n@router.get(HIERARCHY_NODES_LIST_PATH)\ndef list_accessible_hierarchy_nodes(\n    source: DocumentSource,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> HierarchyNodesResponse:\n    _require_opensearch(db_session)\n    user_email, external_group_ids = _get_user_access_info(user, db_session)\n    nodes = get_accessible_hierarchy_nodes_for_source(\n        db_session=db_session,\n        source=source,\n        user_email=user_email,\n        external_group_ids=external_group_ids,\n    )\n    return HierarchyNodesResponse(\n        nodes=[\n            HierarchyNodeSummary(\n                id=node.id,\n                title=node.display_name,\n                link=node.link,\n                parent_id=node.parent_id,\n            )\n            for node in nodes\n        ]\n    )\n\n\n@router.post(HIERARCHY_NODE_DOCUMENTS_PATH)\ndef list_accessible_hierarchy_node_documents(\n    documents_request: HierarchyNodeDocumentsRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> HierarchyNodeDocumentsResponse:\n    _require_opensearch(db_session)\n    user_email, external_group_ids = _get_user_access_info(user, db_session)\n    cursor = documents_request.cursor\n    sort_field = documents_request.sort_field\n    sort_direction = documents_request.sort_direction\n\n    sort_by_name = sort_field == DocumentSortField.NAME\n    sort_ascending = sort_direction == DocumentSortDirection.ASC\n\n    documents = get_accessible_documents_for_hierarchy_node_paginated(\n        db_session=db_session,\n        parent_hierarchy_node_id=documents_request.parent_hierarchy_node_id,\n        user_email=user_email,\n        external_group_ids=external_group_ids,\n        limit=DOCUMENT_PAGE_SIZE + 1,\n        sort_by_name=sort_by_name,\n        sort_ascending=sort_ascending,\n        cursor_last_modified=cursor.last_modified if cursor else None,\n        cursor_last_synced=cursor.last_synced if cursor else None,\n        cursor_name=cursor.name if cursor else None,\n        cursor_document_id=cursor.document_id if cursor else None,\n    )\n    document_summaries = [\n        DocumentSummary(\n            id=document.id,\n            title=document.semantic_id,\n            link=document.link,\n            parent_id=document.parent_hierarchy_node_id,\n            last_modified=document.last_modified,\n            last_synced=document.last_synced,\n        )\n        for document in documents[:DOCUMENT_PAGE_SIZE]\n    ]\n    next_cursor = None\n    if len(documents) > DOCUMENT_PAGE_SIZE and document_summaries:\n        last_document = document_summaries[-1]\n        # For name sorting, we always have a title; for last_updated, we need last_modified\n        can_create_cursor = sort_by_name or last_document.last_modified is not None\n        if can_create_cursor:\n            next_cursor = DocumentPageCursor.from_document(last_document, sort_field)\n    return HierarchyNodeDocumentsResponse(\n        documents=document_summaries,\n        next_cursor=next_cursor,\n        sort_field=sort_field,\n        sort_direction=sort_direction,\n        folder_position=documents_request.folder_position,\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/hierarchy/constants.py",
    "content": "HIERARCHY_NODES_PREFIX = \"/hierarchy-nodes\"\nHIERARCHY_NODES_LIST_PATH = \"\"\nHIERARCHY_NODE_DOCUMENTS_PATH = \"/documents\"\n\nDOCUMENT_PAGE_SIZE = 50\n"
  },
  {
    "path": "backend/onyx/server/features/hierarchy/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\n\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.server.features.hierarchy.constants import DOCUMENT_PAGE_SIZE\n\n\nclass DocumentSortField(str, Enum):\n    NAME = \"name\"\n    LAST_UPDATED = \"last_updated\"\n\n\nclass DocumentSortDirection(str, Enum):\n    ASC = \"asc\"\n    DESC = \"desc\"\n\n\nclass FolderPosition(str, Enum):\n    ON_TOP = \"on_top\"\n    MIXED = \"mixed\"\n\n\nclass HierarchyNodesRequest(BaseModel):\n    source: DocumentSource\n\n\nclass HierarchyNodeSummary(BaseModel):\n    id: int\n    title: str\n    link: str | None\n    parent_id: int | None\n\n\nclass HierarchyNodesResponse(BaseModel):\n    nodes: list[HierarchyNodeSummary]\n\n\nclass DocumentPageCursor(BaseModel):\n    # Fields for last_updated sorting\n    last_modified: datetime | None = None\n    last_synced: datetime | None = None\n    # Field for name sorting\n    name: str | None = None\n    # Document ID for tie-breaking (always required when cursor is set)\n    document_id: str\n\n    @classmethod\n    def from_document(\n        cls,\n        document: \"DocumentSummary\",\n        sort_field: DocumentSortField,\n    ) -> \"DocumentPageCursor\":\n        if sort_field == DocumentSortField.NAME:\n            return cls(\n                name=document.title,\n                document_id=document.id,\n            )\n        # Default: LAST_UPDATED\n        return cls(\n            last_modified=document.last_modified,\n            last_synced=document.last_synced,\n            document_id=document.id,\n        )\n\n\nclass HierarchyNodeDocumentsRequest(BaseModel):\n    parent_hierarchy_node_id: int\n    cursor: DocumentPageCursor | None = None\n    sort_field: DocumentSortField = DocumentSortField.LAST_UPDATED\n    sort_direction: DocumentSortDirection = DocumentSortDirection.DESC\n    folder_position: FolderPosition = FolderPosition.ON_TOP\n\n\nclass DocumentSummary(BaseModel):\n    id: str\n    title: str\n    link: str | None\n    parent_id: int | None\n    last_modified: datetime | None\n    last_synced: datetime | None\n\n\nclass HierarchyNodeDocumentsResponse(BaseModel):\n    documents: list[DocumentSummary]\n    next_cursor: DocumentPageCursor | None\n    page_size: int = DOCUMENT_PAGE_SIZE\n    sort_field: DocumentSortField = DocumentSortField.LAST_UPDATED\n    sort_direction: DocumentSortDirection = DocumentSortDirection.DESC\n    folder_position: FolderPosition = FolderPosition.ON_TOP\n"
  },
  {
    "path": "backend/onyx/server/features/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/input_prompt/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.input_prompt import disable_input_prompt_for_user\nfrom onyx.db.input_prompt import fetch_input_prompt_by_id\nfrom onyx.db.input_prompt import fetch_input_prompts_by_user\nfrom onyx.db.input_prompt import insert_input_prompt\nfrom onyx.db.input_prompt import remove_input_prompt\nfrom onyx.db.input_prompt import remove_public_input_prompt\nfrom onyx.db.input_prompt import update_input_prompt\nfrom onyx.db.models import InputPrompt__User\nfrom onyx.db.models import User\nfrom onyx.server.features.input_prompt.models import CreateInputPromptRequest\nfrom onyx.server.features.input_prompt.models import InputPromptSnapshot\nfrom onyx.server.features.input_prompt.models import UpdateInputPromptRequest\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nbasic_router = APIRouter(prefix=\"/input_prompt\")\nadmin_router = APIRouter(prefix=\"/admin/input_prompt\")\n\n\n@basic_router.get(\"\")\ndef list_input_prompts(\n    user: User = Depends(current_user),\n    include_public: bool = True,\n    db_session: Session = Depends(get_session),\n) -> list[InputPromptSnapshot]:\n    user_prompts = fetch_input_prompts_by_user(\n        user_id=user.id,\n        db_session=db_session,\n        include_public=include_public,\n    )\n    return [InputPromptSnapshot.from_model(prompt) for prompt in user_prompts]\n\n\n@basic_router.get(\"/{input_prompt_id}\")\ndef get_input_prompt(\n    input_prompt_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> InputPromptSnapshot:\n    input_prompt = fetch_input_prompt_by_id(\n        id=input_prompt_id,\n        user_id=user.id,\n        db_session=db_session,\n    )\n\n    return InputPromptSnapshot.from_model(input_prompt=input_prompt)\n\n\n@basic_router.post(\"\")\ndef create_input_prompt(\n    create_input_prompt_request: CreateInputPromptRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> InputPromptSnapshot:\n    input_prompt = insert_input_prompt(\n        prompt=create_input_prompt_request.prompt,\n        content=create_input_prompt_request.content,\n        is_public=False,\n        user=user,\n        db_session=db_session,\n    )\n\n    input_prompt_user = InputPrompt__User(\n        input_prompt_id=input_prompt.id, user_id=user.id\n    )\n    db_session.add(input_prompt_user)\n    db_session.commit()\n\n    return InputPromptSnapshot.from_model(input_prompt)\n\n\n@basic_router.patch(\"/{input_prompt_id}\")\ndef patch_input_prompt(\n    input_prompt_id: int,\n    update_input_prompt_request: UpdateInputPromptRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> InputPromptSnapshot:\n    try:\n        updated_input_prompt = update_input_prompt(\n            user=user,\n            input_prompt_id=input_prompt_id,\n            prompt=update_input_prompt_request.prompt,\n            content=update_input_prompt_request.content,\n            active=update_input_prompt_request.active,\n            db_session=db_session,\n        )\n    except ValueError as e:\n        error_msg = \"Error occurred while updated input prompt\"\n        logger.warn(f\"{error_msg}. Stack trace: {e}\")\n        raise HTTPException(status_code=404, detail=error_msg)\n\n    return InputPromptSnapshot.from_model(updated_input_prompt)\n\n\n@basic_router.delete(\"/{input_prompt_id}\")\ndef delete_input_prompt(\n    input_prompt_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n    delete_public: bool = False,\n) -> None:\n    try:\n        remove_input_prompt(\n            user, input_prompt_id, db_session, delete_public=delete_public\n        )\n\n    except ValueError as e:\n        error_msg = \"Error occurred while deleting input prompt\"\n        logger.warn(f\"{error_msg}. Stack trace: {e}\")\n        raise HTTPException(status_code=404, detail=error_msg)\n\n\n@admin_router.delete(\"/{input_prompt_id}\")\ndef delete_public_input_prompt(\n    input_prompt_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        remove_public_input_prompt(input_prompt_id, db_session)\n\n    except ValueError as e:\n        error_msg = \"Error occurred while deleting input prompt\"\n        logger.warn(f\"{error_msg}. Stack trace: {e}\")\n        raise HTTPException(status_code=404, detail=error_msg)\n\n\n@basic_router.post(\"/{input_prompt_id}/hide\")\ndef hide_input_prompt_for_user(\n    input_prompt_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"\n    Endpoint that marks a seed (or any) prompt as disabled for the current user,\n    so it won't show up in their subsequent queries.\n    \"\"\"\n    disable_input_prompt_for_user(input_prompt_id, user.id, db_session)\n"
  },
  {
    "path": "backend/onyx/server/features/input_prompt/models.py",
    "content": "from uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.models import InputPrompt\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass CreateInputPromptRequest(BaseModel):\n    prompt: str\n    content: str\n    is_public: bool\n\n\nclass UpdateInputPromptRequest(BaseModel):\n    prompt: str\n    content: str\n    active: bool\n\n\nclass InputPromptResponse(BaseModel):\n    id: int\n    prompt: str\n    content: str\n    active: bool\n\n\nclass InputPromptSnapshot(BaseModel):\n    id: int\n    prompt: str\n    content: str\n    active: bool\n    user_id: UUID | None\n    is_public: bool\n\n    @classmethod\n    def from_model(cls, input_prompt: InputPrompt) -> \"InputPromptSnapshot\":\n        return InputPromptSnapshot(\n            id=input_prompt.id,\n            prompt=input_prompt.prompt,\n            content=input_prompt.content,\n            active=input_prompt.active,\n            user_id=input_prompt.user_id,\n            is_public=input_prompt.is_public,\n        )\n"
  },
  {
    "path": "backend/onyx/server/features/mcp/api.py",
    "content": "import asyncio\nimport base64\nimport datetime\nimport hashlib\nimport json\nfrom collections.abc import Awaitable\nfrom enum import Enum\nfrom secrets import token_urlsafe\nfrom typing import cast\nfrom typing import Literal\nfrom urllib.parse import urlparse\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom mcp.client.auth import OAuthClientProvider\nfrom mcp.client.auth import TokenStorage\nfrom mcp.shared.auth import OAuthClientInformationFull\nfrom mcp.shared.auth import OAuthClientMetadata\nfrom mcp.shared.auth import OAuthToken\nfrom mcp.types import InitializeResult\nfrom mcp.types import Tool as MCPLibTool\nfrom pydantic import AnyUrl\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.enums import MCPServerStatus\nfrom onyx.db.enums import MCPTransport\nfrom onyx.db.mcp import create_connection_config\nfrom onyx.db.mcp import create_mcp_server__no_commit\nfrom onyx.db.mcp import delete_all_user_connection_configs_for_server_no_commit\nfrom onyx.db.mcp import delete_connection_config\nfrom onyx.db.mcp import delete_mcp_server\nfrom onyx.db.mcp import delete_user_connection_configs_for_server\nfrom onyx.db.mcp import extract_connection_data\nfrom onyx.db.mcp import get_all_mcp_servers\nfrom onyx.db.mcp import get_connection_config_by_id\nfrom onyx.db.mcp import get_mcp_server_by_id\nfrom onyx.db.mcp import get_mcp_servers_for_persona\nfrom onyx.db.mcp import get_server_auth_template\nfrom onyx.db.mcp import get_user_connection_config\nfrom onyx.db.mcp import update_connection_config\nfrom onyx.db.mcp import update_mcp_server__no_commit\nfrom onyx.db.mcp import upsert_user_connection_config\nfrom onyx.db.models import MCPConnectionConfig\nfrom onyx.db.models import MCPServer as DbMCPServer\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.db.tools import create_tool__no_commit\nfrom onyx.db.tools import delete_tool__no_commit\nfrom onyx.db.tools import get_tools_by_mcp_server_id\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.server.features.mcp.models import MCPApiKeyResponse\nfrom onyx.server.features.mcp.models import MCPAuthTemplate\nfrom onyx.server.features.mcp.models import MCPConnectionData\nfrom onyx.server.features.mcp.models import MCPOAuthCallbackResponse\nfrom onyx.server.features.mcp.models import MCPOAuthKeys\nfrom onyx.server.features.mcp.models import MCPServer\nfrom onyx.server.features.mcp.models import MCPServerCreateResponse\nfrom onyx.server.features.mcp.models import MCPServerSimpleCreateRequest\nfrom onyx.server.features.mcp.models import MCPServerSimpleUpdateRequest\nfrom onyx.server.features.mcp.models import MCPServersResponse\nfrom onyx.server.features.mcp.models import MCPServerUpdateResponse\nfrom onyx.server.features.mcp.models import MCPToolCreateRequest\nfrom onyx.server.features.mcp.models import MCPToolListResponse\nfrom onyx.server.features.mcp.models import MCPToolUpdateRequest\nfrom onyx.server.features.mcp.models import MCPUserCredentialsRequest\nfrom onyx.server.features.mcp.models import MCPUserOAuthConnectRequest\nfrom onyx.server.features.mcp.models import MCPUserOAuthConnectResponse\nfrom onyx.server.features.tool.models import ToolSnapshot\nfrom onyx.tools.tool_implementations.mcp.mcp_client import discover_mcp_tools\nfrom onyx.tools.tool_implementations.mcp.mcp_client import initialize_mcp_client\nfrom onyx.tools.tool_implementations.mcp.mcp_client import log_exception_group\nfrom onyx.utils.encryption import mask_string\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _truncate_description(description: str | None, max_length: int = 500) -> str:\n    \"\"\"Truncate description to max_length characters, adding ellipsis if truncated.\"\"\"\n    if not description:\n        return \"\"\n    if len(description) <= max_length:\n        return description\n    return description[: max_length - 3] + \"...\"\n\n\nrouter = APIRouter(prefix=\"/mcp\")\nadmin_router = APIRouter(prefix=\"/admin/mcp\")\nSTATE_TTL_SECONDS = 60 * 5  # 5 minutes\nOAUTH_WAIT_SECONDS = 30  # Give the user 30 seconds to complete the OAuth flow\nUNUSED_RETURN_PATH = \"unused_path\"\n\nHEADER_SUBSTITUTIONS: Literal[\"header_substitutions\"] = \"header_substitutions\"\n\n\ndef key_auth_url(user_id: str) -> str:\n    return f\"mcp:oauth:{user_id}:auth_url\"\n\n\ndef key_state(user_id: str) -> str:\n    return f\"mcp:oauth:{user_id}:state\"\n\n\ndef key_code(user_id: str, state: str) -> str:\n    return f\"mcp:oauth:{user_id}:{state}:codes\"\n\n\ndef key_tokens(user_id: str) -> str:\n    return f\"mcp:oauth:{user_id}:tokens\"\n\n\ndef key_client_info(user_id: str) -> str:\n    return f\"mcp:oauth:{user_id}:client_info\"\n\n\nREQUESTED_SCOPE: str | None = None\n\n\nclass OnyxTokenStorage(TokenStorage):\n    \"\"\"\n    store auth info in a particular user's connection config in postgres\n    \"\"\"\n\n    def __init__(self, connection_config_id: int, alt_config_id: int | None = None):\n        self.alt_config_id = alt_config_id\n        self.connection_config_id = connection_config_id\n\n    def _ensure_connection_config(self, db_session: Session) -> MCPConnectionConfig:\n        config = get_connection_config_by_id(self.connection_config_id, db_session)\n        if config is None:\n            raise HTTPException(status_code=404, detail=\"Connection config not found\")\n        return config\n\n    async def get_tokens(self) -> OAuthToken | None:\n        with get_session_with_current_tenant() as db_session:\n            config = self._ensure_connection_config(db_session)\n            config_data = extract_connection_data(config)\n            tokens_raw = config_data.get(MCPOAuthKeys.TOKENS.value)\n            if tokens_raw:\n                return OAuthToken.model_validate(tokens_raw)\n            return None\n\n    async def set_tokens(self, tokens: OAuthToken) -> None:\n        with get_session_with_current_tenant() as db_session:\n            config = self._ensure_connection_config(db_session)\n            config_data = extract_connection_data(config)\n            config_data[MCPOAuthKeys.TOKENS.value] = tokens.model_dump(mode=\"json\")\n            config_data[\"headers\"] = {\n                \"Authorization\": f\"{tokens.token_type} {tokens.access_token}\"\n            }\n            update_connection_config(config.id, db_session, config_data)\n            if self.alt_config_id:\n                update_connection_config(self.alt_config_id, db_session, config_data)\n\n                # signal the oauth callback that token exchange is complete\n                r = get_redis_client()\n                r.rpush(key_tokens(str(self.alt_config_id)), tokens.model_dump_json())\n                r.expire(key_tokens(str(self.alt_config_id)), OAUTH_WAIT_SECONDS)\n\n    async def get_client_info(self) -> OAuthClientInformationFull | None:\n        with get_session_with_current_tenant() as db_session:\n            config = self._ensure_connection_config(db_session)\n            config_data = extract_connection_data(config)\n            client_info_raw = config_data.get(MCPOAuthKeys.CLIENT_INFO.value)\n            if client_info_raw:\n                return OAuthClientInformationFull.model_validate(client_info_raw)\n            if self.alt_config_id:\n                alt_config = get_connection_config_by_id(self.alt_config_id, db_session)\n                if alt_config:\n                    alt_config_data = extract_connection_data(alt_config)\n                    alt_client_info = alt_config_data.get(\n                        MCPOAuthKeys.CLIENT_INFO.value\n                    )\n                    if alt_client_info:\n                        # Cache the admin client info on the user config for future calls\n                        config_data[MCPOAuthKeys.CLIENT_INFO.value] = alt_client_info\n                        update_connection_config(config.id, db_session, config_data)\n                        return OAuthClientInformationFull.model_validate(\n                            alt_client_info\n                        )\n            return None\n\n    async def set_client_info(self, info: OAuthClientInformationFull) -> None:\n        with get_session_with_current_tenant() as db_session:\n            config = self._ensure_connection_config(db_session)\n            config_data = extract_connection_data(config)\n            config_data[MCPOAuthKeys.CLIENT_INFO.value] = info.model_dump(mode=\"json\")\n            update_connection_config(config.id, db_session, config_data)\n            if self.alt_config_id:\n                update_connection_config(self.alt_config_id, db_session, config_data)\n\n\ndef make_oauth_provider(\n    mcp_server: DbMCPServer,\n    user_id: str,\n    return_path: str,\n    connection_config_id: int,\n    admin_config_id: int | None,\n) -> OAuthClientProvider:\n    async def redirect_handler(auth_url: str) -> None:\n        if return_path == UNUSED_RETURN_PATH:\n            raise ValueError(\"Please Reconnect to the server\")\n        r = get_redis_client()\n        # The SDK generated & embedded 'state' in the auth_url; extract & store it.\n        parsed = urlparse(auth_url)\n        qs = dict([p.split(\"=\", 1) for p in parsed.query.split(\"&\") if \"=\" in p])\n        state = qs.get(\"state\")\n        if not state:\n            # Defensive: some providers encode state differently; adapt if needed.\n            raise RuntimeError(\"Missing state in authorization_url\")\n\n        # Save for the frontend & for callback validation\n        state_obj = MCPOauthState(\n            server_id=mcp_server.id,\n            return_path=return_path,\n            is_admin=admin_config_id is not None,\n            state=state,\n        )\n        r.rpush(key_auth_url(user_id), auth_url)\n        r.expire(key_auth_url(user_id), OAUTH_WAIT_SECONDS)\n        r.set(key_state(user_id), state_obj.model_dump_json(), ex=STATE_TTL_SECONDS)\n\n        # Return immediately; the HTTP layer will read the stored URL and send it to the browser.\n\n    async def callback_handler() -> tuple[str, str | None]:\n        r = get_redis_client()\n        # Wait up to TTL for the code published by the /oauth/callback route\n        state = r.get(key_state(user_id))\n        if isinstance(state, Awaitable):\n            state = await state\n        if not state:\n            raise RuntimeError(\"No pending OAuth state for user\")\n        state_obj = MCPOauthState.model_validate_json(state)\n\n        # Block on Redis for (code, state). BLPOP returns (key, value).\n        key = key_code(user_id, state_obj.state)\n\n        # requests CAN block here for up to a minute if the user doesn't resolve the OAuth flow\n        # Run the blocking blpop operation in a thread pool to avoid blocking the event loop\n        loop = asyncio.get_running_loop()\n        pop = await loop.run_in_executor(\n            None, lambda: r.blpop([key], timeout=OAUTH_WAIT_SECONDS)\n        )\n        # TODO: gracefully handle \"user says no\"\n        if not pop:\n            raise RuntimeError(\"Timed out waiting for OAuth callback\")\n\n        code_state_bytes = cast(tuple[bytes, bytes], pop)\n\n        code_state_dict = json.loads(code_state_bytes[1].decode())\n\n        code = code_state_dict[\"code\"]\n\n        if code_state_dict[\"state\"] != state_obj.state:\n            raise RuntimeError(\"Invalid state in OAuth callback\")\n\n        # Optional: cleanup\n        r.delete(key_auth_url(user_id), key_state(user_id))\n        return code, state_obj.state\n\n    return OAuthClientProvider(\n        server_url=mcp_server.server_url,\n        client_metadata=OAuthClientMetadata(\n            client_name=f\"Onyx - {mcp_server.name}\",\n            redirect_uris=[AnyUrl(f\"{WEB_DOMAIN}/mcp/oauth/callback\")],\n            grant_types=[\"authorization_code\", \"refresh_token\"],\n            response_types=[\"code\"],\n            scope=REQUESTED_SCOPE,  # TODO: do we need to pass this in? maybe make configurable\n        ),\n        storage=OnyxTokenStorage(connection_config_id, admin_config_id),\n        redirect_handler=redirect_handler,\n        callback_handler=callback_handler,\n    )\n\n\ndef _build_headers_from_template(\n    template_data: MCPAuthTemplate, credentials: dict[str, str], user_email: str\n) -> dict[str, str]:\n    \"\"\"Build headers dict from template and credentials\"\"\"\n    headers = {}\n    template_headers = template_data.headers\n\n    for name, value_template in template_headers.items():\n        # Replace placeholders\n        value = value_template\n        for key, cred_value in credentials.items():\n            value = value.replace(f\"{{{key}}}\", cred_value)\n        value = value.replace(\"{user_email}\", user_email)\n\n        if name:\n            headers[name] = value\n\n    return headers\n\n\ndef test_mcp_server_credentials(\n    server_url: str,\n    connection_headers: dict[str, str] | None,\n    auth: OAuthClientProvider | None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n) -> tuple[bool, str]:\n    \"\"\"Test if credentials work by calling the MCP server's tools/list endpoint\"\"\"\n    try:\n        # Attempt to discover tools using the provided credentials\n        tools = discover_mcp_tools(\n            server_url, connection_headers, transport=transport, auth=auth\n        )\n\n        if (\n            tools is not None and len(tools) >= 0\n        ):  # Even 0 tools is a successful connection\n            return True, f\"Successfully connected. Found {len(tools)} tools.\"\n        else:\n            return False, \"Failed to retrieve tools list from server.\"\n\n    except Exception as e:\n        logger.error(f\"Failed to test MCP server credentials: {e}\")\n        return False, f\"Connection failed: {str(e)}\"\n\n\ndef b64url(b: bytes) -> str:\n    return base64.urlsafe_b64encode(b).rstrip(b\"=\").decode(\"ascii\")\n\n\ndef make_pkce_pair() -> tuple[str, str]:\n    verifier = b64url(token_urlsafe(64).encode())\n    challenge = b64url(hashlib.sha256(verifier.encode(\"ascii\")).digest())\n    return verifier, challenge\n\n\nclass MCPOauthState(BaseModel):\n    server_id: int\n    return_path: str\n    is_admin: bool\n    state: str\n\n\n@admin_router.post(\"/oauth/connect\", response_model=MCPUserOAuthConnectResponse)\nasync def connect_admin_oauth(\n    request: MCPUserOAuthConnectRequest,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPUserOAuthConnectResponse:\n    \"\"\"Connect OAuth flow for admin MCP server authentication\"\"\"\n    return await _connect_oauth(request, db, is_admin=True, user=user)\n\n\n@router.post(\"/oauth/connect\", response_model=MCPUserOAuthConnectResponse)\nasync def connect_user_oauth(\n    request: MCPUserOAuthConnectRequest,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPUserOAuthConnectResponse:\n    return await _connect_oauth(request, db, is_admin=False, user=user)\n\n\nasync def _connect_oauth(\n    request: MCPUserOAuthConnectRequest,\n    db: Session,\n    is_admin: bool,\n    user: User,\n) -> MCPUserOAuthConnectResponse:\n    \"\"\"Connect OAuth flow for per-user MCP server authentication\"\"\"\n\n    logger.info(f\"Initiating per-user OAuth for server: {request.server_id}\")\n\n    try:\n        server_id = int(request.server_id)\n        mcp_server = get_mcp_server_by_id(server_id, db)\n    except Exception:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    if is_admin:\n        _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    if mcp_server.auth_type != MCPAuthenticationType.OAUTH:\n        auth_type_str = mcp_server.auth_type.value if mcp_server.auth_type else \"None\"\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Server was configured with authentication type {auth_type_str}\",\n        )\n\n    # Create admin config with client info if provided\n    config_data = MCPConnectionData(headers={})\n    if request.oauth_client_id and request.oauth_client_secret:\n        client_info = OAuthClientInformationFull(\n            client_id=request.oauth_client_id,\n            client_secret=request.oauth_client_secret,\n            redirect_uris=[AnyUrl(f\"{WEB_DOMAIN}/mcp/oauth/callback\")],\n            grant_types=[\"authorization_code\", \"refresh_token\"],\n            response_types=[\"code\"],\n            scope=REQUESTED_SCOPE,  # TODO: allow specifying scopes?\n            # Must specify auth method so client_secret is actually sent during token exchange\n            token_endpoint_auth_method=\"client_secret_post\",\n        )\n        config_data[MCPOAuthKeys.CLIENT_INFO.value] = client_info.model_dump(\n            mode=\"json\"\n        )\n\n    if mcp_server.admin_connection_config_id is None:\n        if not is_admin:\n            raise HTTPException(\n                status_code=400,\n                detail=\"Admin connection config not found for this server\",\n            )\n\n        admin_config = create_connection_config(\n            config_data=config_data,\n            mcp_server_id=mcp_server.id,\n            user_email=\"\",\n            db_session=db,\n        )\n        mcp_server.admin_connection_config = admin_config\n        mcp_server.admin_connection_config_id = (\n            admin_config.id\n        )  # might not have to do this\n    elif is_admin:  # only update admin config if we're an admin\n        update_connection_config(mcp_server.admin_connection_config_id, db, config_data)\n\n    connection_config = get_user_connection_config(mcp_server.id, user.email, db)\n\n    if connection_config is None:\n        connection_config = create_connection_config(\n            config_data=config_data,\n            mcp_server_id=mcp_server.id,\n            user_email=user.email,\n            db_session=db,\n        )\n    else:\n        update_connection_config(connection_config.id, db, config_data)\n\n    db.commit()\n\n    connection_config_dict = extract_connection_data(\n        connection_config, apply_mask=False\n    )\n    is_connected = (\n        MCPOAuthKeys.CLIENT_INFO.value in connection_config_dict\n        and connection_config_dict.get(\"headers\")\n    )\n    # Step 1: make unauthenticated request and parse returned www authenticate header\n    # Ensure we have a trailing slash for the MCP endpoint\n\n    if mcp_server.transport is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"MCP server transport is not configured\",\n        )\n\n    # always make a http request for the initial probe\n    transport = mcp_server.transport if is_connected else MCPTransport.STREAMABLE_HTTP\n    probe_url = mcp_server.server_url\n    logger.info(f\"Probing OAuth server at: {probe_url}\")\n\n    oauth_auth = make_oauth_provider(\n        mcp_server,\n        str(user.id),\n        request.return_path,\n        connection_config.id,\n        mcp_server.admin_connection_config_id,\n    )\n\n    # start the oauth handshake in the background\n    # the background task will block on the callback handler after setting\n    # the auth_url for us to send to the frontend. The callback handler waits for\n    # the auth code to be available in redis; this code gets set by our callback endpoint\n    # which is called by the frontend after the user goes through the login flow.\n    async def tmp_func() -> InitializeResult:\n        try:\n            x = await initialize_mcp_client(\n                probe_url,\n                connection_headers=connection_config_dict.get(\"headers\", {}),\n                transport=transport,\n                auth=oauth_auth,\n            )\n            logger.info(f\"OAuth initialization completed successfully: {x}\")\n            return x\n        except Exception:\n            logger.exception(\"OAuth initialization failed\")\n            raise\n\n    init_task = asyncio.create_task(tmp_func())\n\n    # Wait for whichever happens first:\n    # 1) The OAuth redirect URL becomes available in Redis (we should return it)\n    # 2) The initialize task completes (tokens already valid) — return to the provided return_path\n    r = get_redis_client()\n    loop = asyncio.get_running_loop()\n\n    async def wait_auth_url() -> str | None:\n        raw = await loop.run_in_executor(\n            None,\n            lambda: r.blpop([key_auth_url(str(user.id))], timeout=OAUTH_WAIT_SECONDS),\n        )\n        if raw is None:\n            return None\n        tup = cast(tuple[bytes, bytes], raw)\n        return tup[1].decode()\n\n    auth_task = None if is_connected else asyncio.create_task(wait_auth_url())\n\n    done, pending = await asyncio.wait(\n        [init_task] + ([auth_task] if auth_task else []),\n        return_when=asyncio.FIRST_COMPLETED,\n    )\n\n    # If we got an auth URL first, return it\n    if auth_task in done:\n        oauth_url = await auth_task\n        # If no URL was retrieved within the timeout, treat as error\n        if not oauth_url:\n            # If initialization also finished, treat as already authenticated\n            if init_task.done() and not init_task.cancelled():\n                try:\n                    init_result = init_task.result()\n                    logger.info(\n                        f\"OAuth initialization completed during timeout: {init_result}\"\n                    )\n                    return MCPUserOAuthConnectResponse(\n                        server_id=int(request.server_id),\n                        oauth_url=request.return_path,\n                    )\n                except Exception as e:\n                    logger.error(f\"OAuth initialization failed during timeout: {e}\")\n                    raise HTTPException(\n                        status_code=400, detail=f\"OAuth initialization failed: {str(e)}\"\n                    )\n            raise HTTPException(status_code=400, detail=\"Auth URL retrieval timed out\")\n\n        logger.info(\n            f\"Connected to auth url: {oauth_url} for mcp server: {mcp_server.name}\"\n        )\n        return MCPUserOAuthConnectResponse(\n            server_id=int(request.server_id), oauth_url=oauth_url\n        )\n\n    # Otherwise, initialization finished first — no redirect needed; go back to return_path\n    for t in pending:\n        t.cancel()\n    try:\n        init_result = init_task.result()\n        logger.info(f\"OAuth initialization completed without redirect: {init_result}\")\n    except Exception as e:\n        if isinstance(e, ExceptionGroup):\n            saved_e = log_exception_group(e)\n        else:\n            saved_e = e\n        logger.error(f\"OAuth initialization failed: {saved_e}\")\n        # If initialize failed and we also didn't get an auth URL, surface an error\n        raise HTTPException(\n            status_code=400, detail=f\"Failed to initialize OAuth client: {str(saved_e)}\"\n        )\n\n    return MCPUserOAuthConnectResponse(\n        server_id=int(request.server_id),\n        oauth_url=request.return_path,\n    )\n\n\n@router.post(\"/oauth/callback\", response_model=MCPOAuthCallbackResponse)\nasync def process_oauth_callback(\n    request: Request,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPOAuthCallbackResponse:\n    \"\"\"Complete OAuth flow by exchanging code for tokens and storing them.\n\n    Notes:\n    - For demo/test servers (like run_mcp_server_oauth.py), the token endpoint\n      and parameters may be fixed. In production, use the server's metadata\n      (e.g., well-known endpoints) to discover token URL and scopes.\n    \"\"\"\n\n    # Get callback data from query parameters (like federated OAuth does)\n    callback_data = dict(request.query_params)\n\n    redis_client = get_redis_client()\n    state = callback_data.get(\"state\")\n    code = callback_data.get(\"code\")\n    user_id = str(user.id)\n    if not state:\n        raise HTTPException(status_code=400, detail=\"Missing state parameter\")\n    if not code:\n        raise HTTPException(status_code=400, detail=\"Missing code parameter\")\n    stored_data = cast(bytes, redis_client.get(key_state(user_id)))\n    if not stored_data:\n        raise HTTPException(\n            status_code=400, detail=\"Invalid or expired state parameter\"\n        )\n    state_data = MCPOauthState.model_validate_json(stored_data)\n    try:\n        server_id = state_data.server_id\n        mcp_server = get_mcp_server_by_id(server_id, db_session)\n    except Exception:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    user_id = str(user.id)\n\n    r = get_redis_client()\n\n    # Unblock the callback_handler in the asyncio background task\n    r.rpush(key_code(user_id, state), json.dumps({\"code\": code, \"state\": state}))\n    r.expire(key_code(user_id, state), OAUTH_WAIT_SECONDS)\n\n    admin_config = mcp_server.admin_connection_config\n    if admin_config is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Server referenced by callback is not configured, try recreating\",\n        )\n\n    # Run the blocking blpop operation in a thread pool to avoid blocking the event loop\n    # Wait until set_tokens is called\n    admin_config_id = admin_config.id\n    loop = asyncio.get_running_loop()\n    tokens_raw = await loop.run_in_executor(\n        None,\n        lambda: r.blpop([key_tokens(str(admin_config_id))], timeout=OAUTH_WAIT_SECONDS),\n    )\n    if tokens_raw is None:\n        raise HTTPException(status_code=400, detail=\"No tokens found\")\n    tokens_bytes = cast(tuple[bytes, bytes], tokens_raw)\n    tokens = OAuthToken.model_validate_json(tokens_bytes[1].decode())\n\n    if not tokens.access_token:\n        raise HTTPException(status_code=400, detail=\"No access_token in OAuth response\")\n\n    db_session.commit()\n\n    logger.info(\n        f\"server_id={str(mcp_server.id)} server_name={mcp_server.name} return_path={state_data.return_path}\"\n    )\n\n    return MCPOAuthCallbackResponse(\n        success=True,\n        server_id=mcp_server.id,\n        server_name=mcp_server.name,\n        message=f\"OAuth authorization completed successfully for {mcp_server.name}\",\n        redirect_url=state_data.return_path,\n    )\n\n\n@router.post(\"/user-credentials\", response_model=MCPApiKeyResponse)\ndef save_user_credentials(\n    request: MCPUserCredentialsRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPApiKeyResponse:\n    \"\"\"Save user credentials for template-based MCP server authentication\"\"\"\n\n    logger.info(f\"Saving user credentials for server: {request.server_id}\")\n\n    try:\n        server_id = request.server_id\n        mcp_server = get_mcp_server_by_id(server_id, db_session)\n    except Exception:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    if mcp_server.auth_type == \"none\":\n        raise HTTPException(\n            status_code=400,\n            detail=\"Server does not require authentication\",\n        )\n\n    email = user.email\n\n    # Get the authentication template for this server\n    auth_template = get_server_auth_template(server_id, db_session)\n    if not auth_template:\n        # Fallback to simple API key storage for servers without templates\n        if \"api_key\" not in request.credentials:\n            raise HTTPException(\n                status_code=400,\n                detail=\"No authentication template found and no api_key provided\",\n            )\n        config_data = MCPConnectionData(\n            headers={\"Authorization\": f\"Bearer {request.credentials['api_key']}\"},\n        )\n    else:\n        # Use template to create the full connection config\n        try:\n            # TODO: fix and/or type correctly w/base model\n            auth_template_dict = extract_connection_data(\n                auth_template, apply_mask=False\n            )\n            config_data = MCPConnectionData(\n                headers=auth_template_dict.get(\"headers\", {}),\n                header_substitutions=request.credentials,\n            )\n            for oauth_field_key in MCPOAuthKeys:\n                field_key: Literal[\"client_info\", \"tokens\", \"metadata\"] = (\n                    oauth_field_key.value\n                )\n                if field_val := auth_template_dict.get(field_key):\n                    config_data[field_key] = field_val\n\n        except Exception as e:\n            logger.error(f\"Failed to process authentication template: {e}\")\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Failed to process authentication template: {str(e)}\",\n            )\n\n    # Test the credentials before saving\n    validation_tested = False\n    validation_message = \"Credentials saved successfully\"\n\n    try:\n        auth = None\n        if mcp_server.auth_type == MCPAuthenticationType.OAUTH:\n            # should only be saving user creds if an admin config exists\n            assert mcp_server.admin_connection_config_id is not None\n            auth = make_oauth_provider(\n                mcp_server,\n                email,\n                UNUSED_RETURN_PATH,\n                mcp_server.admin_connection_config_id,\n                None,\n            )\n\n        if HEADER_SUBSTITUTIONS in config_data:\n            for key, value in config_data[HEADER_SUBSTITUTIONS].items():\n                for k, v in config_data[\"headers\"].items():\n                    config_data[\"headers\"][k] = v.replace(f\"{{{key}}}\", value)\n\n        server_url = mcp_server.server_url\n        is_valid, test_message = test_mcp_server_credentials(\n            server_url,\n            config_data[\"headers\"],\n            transport=MCPTransport(request.transport.replace(\"-\", \"_\").upper()),\n            auth=auth,\n        )\n        validation_tested = True\n\n        if not is_valid:\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Credentials validation failed: {test_message}\",\n            )\n        else:\n            validation_message = (\n                f\"Credentials saved and validated successfully. {test_message}\"\n            )\n\n    except HTTPException:\n        raise  # Re-raise HTTP exceptions\n    except Exception as e:\n        logger.warning(\n            f\"Could not validate credentials for server {mcp_server.name}: {e}\"\n        )\n        validation_message = \"Credentials saved but could not be validated\"\n\n    try:\n        # Save the processed credentials\n        upsert_user_connection_config(\n            server_id=server_id,\n            user_email=email,\n            config_data=config_data,\n            db_session=db_session,\n        )\n\n        logger.info(\n            f\"User credentials saved for server {mcp_server.name} and user {email}\"\n        )\n        db_session.commit()\n\n        return MCPApiKeyResponse(\n            success=True,\n            message=validation_message,\n            server_id=request.server_id,\n            server_name=mcp_server.name,\n            authenticated=True,\n            validation_tested=validation_tested,\n        )\n\n    except Exception as e:\n        logger.error(f\"Failed to save user credentials: {e}\")\n        raise HTTPException(status_code=500, detail=\"Failed to save user credentials\")\n\n\nclass MCPToolDescription(BaseModel):\n    id: int\n    name: str\n    display_name: str\n    description: str\n\n\nclass ServerToolsResponse(BaseModel):\n    server_id: int\n    server_name: str\n    server_url: str\n    tools: list[MCPToolDescription]\n\n\ndef _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User) -> None:\n    logger.info(\n        f\"Ensuring MCP server owner or admin: {server.name} {user} {user.role} server.owner={server.owner}\"\n    )\n    if user.role == UserRole.ADMIN:\n        return\n\n    logger.info(f\"User email: {user.email} server.owner={server.owner}\")\n    if server.owner != user.email:\n        raise HTTPException(\n            status_code=403,\n            detail=\"Curators can only modify MCP servers that they have created.\",\n        )\n\n\ndef _db_mcp_server_to_api_mcp_server(\n    db_server: DbMCPServer,\n    db: Session,\n    request_user: User | None,\n    include_auth_config: bool = False,\n) -> MCPServer:\n    \"\"\"Convert database MCP server to API model\"\"\"\n\n    email = request_user.email if request_user else \"\"\n\n    # Check if user has authentication configured and extract credentials\n    auth_performer = db_server.auth_performer\n    user_authenticated: bool | None = None\n    user_credentials = None\n    admin_credentials = None\n    can_view_admin_credentials = bool(include_auth_config) and (\n        request_user is not None\n        and (\n            request_user.role == UserRole.ADMIN\n            or (request_user.email and request_user.email == db_server.owner)\n        )\n    )\n    if db_server.auth_type == MCPAuthenticationType.NONE:\n        user_authenticated = True  # No auth required\n    elif auth_performer == MCPAuthenticationPerformer.ADMIN:\n        user_authenticated = db_server.admin_connection_config is not None\n        if (\n            can_view_admin_credentials\n            and db_server.admin_connection_config is not None\n            and include_auth_config\n        ):\n            admin_config_dict = extract_connection_data(\n                db_server.admin_connection_config, apply_mask=False\n            )\n            if db_server.auth_type == MCPAuthenticationType.API_TOKEN:\n                raw_api_key = admin_config_dict[\"headers\"][\"Authorization\"].split(\" \")[\n                    -1\n                ]\n                admin_credentials = {\n                    \"api_key\": mask_string(raw_api_key),\n                }\n            elif db_server.auth_type == MCPAuthenticationType.OAUTH:\n                user_authenticated = False\n                client_info = None\n                client_info_raw = admin_config_dict.get(MCPOAuthKeys.CLIENT_INFO.value)\n                if client_info_raw:\n                    client_info = OAuthClientInformationFull.model_validate(\n                        client_info_raw\n                    )\n                if client_info:\n                    if not client_info.client_id:\n                        raise ValueError(\"Stored client info had empty client ID\")\n                    admin_credentials = {\n                        \"client_id\": mask_string(client_info.client_id),\n                    }\n                    if client_info.client_secret:\n                        admin_credentials[\"client_secret\"] = mask_string(\n                            client_info.client_secret\n                        )\n                else:\n                    admin_credentials = {}\n                    logger.warning(\n                        f\"No admin client info found for server {db_server.name}\"\n                    )\n    else:  # currently: per user auth using api key OR oauth\n        user_config = get_user_connection_config(db_server.id, email, db)\n        user_authenticated = user_config is not None\n\n        if user_authenticated and user_config:\n            # Avoid hitting the MCP server when assembling response data.\n            if (\n                include_auth_config\n                and db_server.auth_type != MCPAuthenticationType.OAUTH\n            ):\n                user_config_dict = extract_connection_data(user_config, apply_mask=True)\n                user_credentials = user_config_dict.get(HEADER_SUBSTITUTIONS, {})\n\n        if (\n            db_server.auth_type == MCPAuthenticationType.OAUTH\n            and db_server.admin_connection_config\n        ):\n            client_info = None\n            oauth_admin_config_dict = extract_connection_data(\n                db_server.admin_connection_config, apply_mask=False\n            )\n            client_info_raw = oauth_admin_config_dict.get(\n                MCPOAuthKeys.CLIENT_INFO.value\n            )\n            if client_info_raw:\n                client_info = OAuthClientInformationFull.model_validate(client_info_raw)\n            if client_info:\n                if not client_info.client_id:\n                    raise ValueError(\"Stored client info had empty client ID\")\n                if can_view_admin_credentials:\n                    admin_credentials = {\n                        \"client_id\": mask_string(client_info.client_id),\n                    }\n                    if client_info.client_secret:\n                        admin_credentials[\"client_secret\"] = mask_string(\n                            client_info.client_secret\n                        )\n            elif can_view_admin_credentials:\n                admin_credentials = {}\n                logger.warning(f\"No client info found for server {db_server.name}\")\n\n    # Get auth template if this is a per-user auth server\n    auth_template = None\n    if auth_performer == MCPAuthenticationPerformer.PER_USER:\n        try:\n            template_config = db_server.admin_connection_config\n            if template_config:\n                template_config_dict = extract_connection_data(\n                    template_config, apply_mask=False\n                )\n                headers = template_config_dict.get(\"headers\", {})\n                auth_template = MCPAuthTemplate(\n                    headers=headers,\n                    required_fields=[],  # would need to regex, not worth it\n                )\n        except Exception as e:\n            logger.warning(\n                f\"Failed to parse auth template for server {db_server.name}: {e}\"\n            )\n\n    is_authenticated: bool = (\n        db_server.auth_type == MCPAuthenticationType.NONE.value\n        # Pass-through OAuth: user is authenticated via their login OAuth token\n        or db_server.auth_type == MCPAuthenticationType.PT_OAUTH\n        or (\n            auth_performer == MCPAuthenticationPerformer.ADMIN\n            and db_server.auth_type != MCPAuthenticationType.OAUTH\n            and db_server.admin_connection_config_id is not None\n        )\n        or (\n            auth_performer == MCPAuthenticationPerformer.PER_USER and user_authenticated\n        )\n    )\n\n    # Calculate tool count from the relationship\n    tool_count = len(db_server.current_actions) if db_server.current_actions else 0\n\n    return MCPServer(\n        id=db_server.id,\n        name=db_server.name,\n        description=db_server.description,\n        server_url=db_server.server_url,\n        owner=db_server.owner,\n        transport=db_server.transport,\n        auth_type=db_server.auth_type,\n        auth_performer=auth_performer,\n        is_authenticated=is_authenticated,\n        user_authenticated=user_authenticated,\n        status=db_server.status,\n        last_refreshed_at=db_server.last_refreshed_at,\n        tool_count=tool_count,\n        auth_template=auth_template,\n        user_credentials=user_credentials,\n        admin_credentials=admin_credentials,\n    )\n\n\n@router.get(\"/servers/persona/{assistant_id}\", response_model=MCPServersResponse)\ndef get_mcp_servers_for_assistant(\n    assistant_id: str,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPServersResponse:\n    \"\"\"Get MCP servers for an assistant\"\"\"\n\n    logger.info(f\"Fetching MCP servers for assistant: {assistant_id}\")\n\n    try:\n        persona_id = int(assistant_id)\n        db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)\n\n        # Convert to API model format with opportunistic token refresh for OAuth\n        mcp_servers = [\n            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)\n            for db_server in db_mcp_servers\n        ]\n\n        return MCPServersResponse(assistant_id=assistant_id, mcp_servers=mcp_servers)\n\n    except ValueError:\n        raise HTTPException(status_code=400, detail=\"Invalid assistant ID\")\n    except Exception as e:\n        logger.error(f\"Failed to fetch MCP servers: {e}\")\n        raise HTTPException(status_code=500, detail=\"Failed to fetch MCP servers\")\n\n\n@router.get(\"/servers\", response_model=MCPServersResponse)\ndef get_mcp_servers_for_user(\n    db: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPServersResponse:\n    \"\"\"List all MCP servers for use in agent configuration and chat UI.\n\n    This endpoint is intentionally available to all authenticated users so they\n    can attach MCP actions to assistants. Sensitive admin credentials are never\n    returned.\n    \"\"\"\n    db_mcp_servers = get_all_mcp_servers(db)\n    mcp_servers = [\n        _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)\n        for db_server in db_mcp_servers\n    ]\n    return MCPServersResponse(mcp_servers=mcp_servers)\n\n\ndef _get_connection_config(\n    mcp_server: DbMCPServer,\n    is_admin: bool,  # noqa: ARG001\n    user: User,\n    db_session: Session,\n) -> MCPConnectionConfig | None:\n    \"\"\"\n    Get the connection config for an MCP server.\n    is_admin is true when we want the config used for the admin panel\n\n    \"\"\"\n    if mcp_server.auth_type == MCPAuthenticationType.NONE:\n        return None\n\n    # Pass-through OAuth uses the user's login OAuth token, not a stored config\n    if mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:\n        return None\n\n    if (\n        mcp_server.auth_type == MCPAuthenticationType.API_TOKEN\n        and mcp_server.auth_performer == MCPAuthenticationPerformer.ADMIN\n    ):\n        connection_config = mcp_server.admin_connection_config\n    else:\n        connection_config = get_user_connection_config(\n            server_id=mcp_server.id, user_email=user.email, db_session=db_session\n        )\n\n    if not connection_config:\n        raise HTTPException(\n            status_code=401,\n            detail=\"Authentication required for this MCP server\",\n        )\n\n    return connection_config\n\n\n@admin_router.get(\"/server/{server_id}/tools\")\ndef admin_list_mcp_tools_by_id(\n    server_id: int,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPToolListResponse:\n    return _list_mcp_tools_by_id(server_id, db, True, user)\n\n\nclass ToolSnapshotSource(str, Enum):\n    DB = \"db\"\n    MCP = \"mcp\"\n\n\n@admin_router.get(\"/server/{server_id}/tools/snapshots\")\ndef get_mcp_server_tools_snapshots(\n    server_id: int,\n    source: ToolSnapshotSource = ToolSnapshotSource.DB,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> list[ToolSnapshot]:\n    \"\"\"\n    Get tools for an MCP server as ToolSnapshot objects.\n\n    Query Parameters:\n    - source: \"db\" (default) - fetch from database only, \"mcp\" - discover from MCP server and sync to DB\n\n    Returns: List of ToolSnapshot objects\n    \"\"\"\n    from onyx.db.tools import get_tools_by_mcp_server_id\n\n    try:\n        # Verify the server exists\n        mcp_server = get_mcp_server_by_id(server_id, db)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    if source == ToolSnapshotSource.MCP:\n        try:\n            # Discover tools from MCP server and sync to DB\n            _list_mcp_tools_by_id(server_id, db, True, user)\n\n            # Successfully discovered tools, update status to CONNECTED\n            update_mcp_server__no_commit(\n                server_id=server_id,\n                db_session=db,\n                status=MCPServerStatus.CONNECTED,\n                last_refreshed_at=datetime.datetime.now(datetime.timezone.utc),\n            )\n            db.commit()\n        except Exception as e:\n            update_mcp_server__no_commit(\n                server_id=server_id,\n                db_session=db,\n                status=MCPServerStatus.AWAITING_AUTH,\n            )\n            db.commit()\n\n            if isinstance(e, HTTPException):\n                # Re-raise HTTP exceptions (e.g. 401, 400) so they are returned to client\n                raise e\n\n            logger.error(f\"Failed to discover tools for MCP server: {e}\")\n            raise HTTPException(status_code=500, detail=\"Failed to discover tools\")\n\n    # Fetch and return tools from database\n    mcp_tools = get_tools_by_mcp_server_id(server_id, db, order_by_id=True)\n    return [ToolSnapshot.from_model(tool) for tool in mcp_tools]\n\n\n@router.get(\"/server/{server_id}/tools\")\ndef user_list_mcp_tools_by_id(\n    server_id: int,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> MCPToolListResponse:\n    return _list_mcp_tools_by_id(server_id, db, False, user)\n\n\ndef _upsert_db_tools(\n    discovered_tools: list[MCPLibTool],\n    existing_by_name: dict[str, Tool],\n    processed_names: set[str],\n    mcp_server_id: int,\n    db: Session,\n) -> bool:\n    db_dirty = False\n\n    for tool in discovered_tools:\n        tool_name = tool.name\n        if not tool_name:\n            continue\n\n        processed_names.add(tool_name)\n        description = tool.description or \"\"\n        annotations_title = tool.annotations.title if tool.annotations else None\n        display_name = tool.title or annotations_title or tool_name\n        input_schema = tool.inputSchema\n\n        if existing_tool := existing_by_name.get(tool_name):\n            if existing_tool.description != description:\n                existing_tool.description = description\n                db_dirty = True\n            if existing_tool.display_name != display_name:\n                existing_tool.display_name = display_name\n                db_dirty = True\n            if existing_tool.mcp_input_schema != input_schema:\n                existing_tool.mcp_input_schema = input_schema\n                db_dirty = True\n            continue\n\n        new_tool = create_tool__no_commit(\n            name=tool_name,\n            description=description,\n            openapi_schema=None,\n            custom_headers=None,\n            user_id=None,\n            db_session=db,\n            passthrough_auth=False,\n            mcp_server_id=mcp_server_id,\n            enabled=True,\n        )\n        new_tool.display_name = display_name\n        new_tool.mcp_input_schema = input_schema\n        db_dirty = True\n    return db_dirty\n\n\ndef _list_mcp_tools_by_id(\n    server_id: int,\n    db: Session,\n    is_admin: bool,\n    user: User,\n) -> MCPToolListResponse:\n    \"\"\"List available tools from an existing MCP server\"\"\"\n    logger.info(f\"Listing tools for MCP server: {server_id}\")\n\n    try:\n        # Get the MCP server\n        mcp_server = get_mcp_server_by_id(server_id, db)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    if is_admin:\n        _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    # Get connection config based on auth type\n    # TODO: for now, only the admin that set up a per-user api key server can\n    # see their configuration. This is probably not ideal. Other admins\n    # can of course put their own credentials in and list the tools.\n    connection_config = _get_connection_config(mcp_server, is_admin, user, db)\n\n    # Allow access for NONE and PT_OAUTH (which use user's login token at runtime)\n    if not connection_config and mcp_server.auth_type not in (\n        MCPAuthenticationType.NONE,\n        MCPAuthenticationType.PT_OAUTH,\n    ):\n        raise HTTPException(\n            status_code=401,\n            detail=\"This MCP server is not configured yet\",\n        )\n\n    user_id = str(user.id)\n    # Discover tools from the MCP server\n    auth = None\n    headers: dict[str, str] = {}\n\n    if mcp_server.auth_type == MCPAuthenticationType.OAUTH:\n        # TODO: just pass this in, but should work when auth is set already\n        assert connection_config  # for mypy\n        auth = make_oauth_provider(\n            mcp_server,\n            user_id,\n            UNUSED_RETURN_PATH,\n            connection_config.id,\n            None,\n        )\n    elif mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:\n        # Pass-through OAuth: use the user's login OAuth token\n        if user.oauth_accounts:\n            user_oauth_token = user.oauth_accounts[0].access_token\n            headers[\"Authorization\"] = f\"Bearer {user_oauth_token}\"\n        else:\n            raise HTTPException(\n                status_code=401,\n                detail=\"Pass-through OAuth requires a user logged in with OAuth\",\n            )\n\n    if connection_config:\n        connection_config_dict = extract_connection_data(\n            connection_config, apply_mask=False\n        )\n        headers.update(connection_config_dict.get(\"headers\", {}))\n\n    import time\n\n    t1 = time.time()\n    logger.info(f\"Discovering tools for MCP server: {mcp_server.name}: {t1}\")\n    server_url = mcp_server.server_url\n\n    if mcp_server.transport is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"MCP server transport is not configured\",\n        )\n\n    discovered_tools = discover_mcp_tools(\n        server_url,\n        headers,\n        transport=mcp_server.transport,\n        auth=auth,\n    )\n    logger.info(\n        f\"Discovered {len(discovered_tools)} tools for MCP server: {mcp_server.name}: {time.time() - t1}\"\n    )\n    update_mcp_server__no_commit(\n        server_id=server_id,\n        db_session=db,\n        status=MCPServerStatus.CONNECTED,\n    )\n    db.commit()\n\n    if is_admin:\n        existing_tools = get_tools_by_mcp_server_id(mcp_server.id, db)\n        existing_by_name = {db_tool.name: db_tool for db_tool in existing_tools}\n        processed_names: set[str] = set()\n\n        db_dirty = _upsert_db_tools(\n            discovered_tools, existing_by_name, processed_names, mcp_server.id, db\n        )\n\n        for name, db_tool in existing_by_name.items():\n            if name not in processed_names:\n                delete_tool__no_commit(db_tool.id, db)\n                db_dirty = True\n\n        if db_dirty:\n            db.commit()\n\n    # Truncate tool descriptions to prevent overly long responses\n    for tool in discovered_tools:\n        if tool.description:\n            tool.description = _truncate_description(tool.description)\n\n    # TODO: Also list resources from the MCP server\n    # resources = discover_mcp_resources(mcp_server, connection_config)\n\n    return MCPToolListResponse(\n        server_id=server_id,\n        server_name=mcp_server.name,\n        server_url=mcp_server.server_url,\n        tools=discovered_tools,\n    )\n\n\ndef _upsert_mcp_server(\n    request: MCPToolCreateRequest,\n    db_session: Session,\n    user: User,\n) -> DbMCPServer:\n    \"\"\"\n    Creates a new or edits an existing MCP server. Returns the DB model\n    \"\"\"\n    mcp_server = None\n    admin_config = None\n\n    changing_connection_config = True\n\n    # Handle existing server update\n    if request.existing_server_id:\n        try:\n            mcp_server = get_mcp_server_by_id(request.existing_server_id, db_session)\n        except ValueError:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"MCP server with ID {request.existing_server_id} not found\",\n            )\n        _ensure_mcp_server_owner_or_admin(mcp_server, user)\n        client_info = None\n        if mcp_server.admin_connection_config:\n            existing_admin_config_dict = extract_connection_data(\n                mcp_server.admin_connection_config, apply_mask=False\n            )\n            client_info_raw = existing_admin_config_dict.get(\n                MCPOAuthKeys.CLIENT_INFO.value\n            )\n            if client_info_raw:\n                client_info = OAuthClientInformationFull.model_validate(client_info_raw)\n\n        changing_connection_config = (\n            not mcp_server.admin_connection_config\n            or (\n                request.auth_type == MCPAuthenticationType.OAUTH\n                and (\n                    client_info is None\n                    or request.oauth_client_id != client_info.client_id\n                    or request.oauth_client_secret != (client_info.client_secret or \"\")\n                )\n            )\n            or (request.auth_type == MCPAuthenticationType.API_TOKEN)\n            or (request.transport != mcp_server.transport)\n        )\n\n        # Cleanup: Delete existing connection configs\n        # If the auth type is OAUTH, delete all user connection configs\n        # If the auth type is API_TOKEN, delete the admin connection config and the admin user connection configs\n        if (\n            changing_connection_config\n            and mcp_server.admin_connection_config_id\n            and request.auth_type == MCPAuthenticationType.OAUTH\n        ):\n            delete_all_user_connection_configs_for_server_no_commit(\n                mcp_server.id, db_session\n            )\n        elif (\n            changing_connection_config\n            and mcp_server.admin_connection_config_id\n            and request.auth_type == MCPAuthenticationType.API_TOKEN\n        ):\n            delete_connection_config(mcp_server.admin_connection_config_id, db_session)\n            if user.email:\n                delete_user_connection_configs_for_server(\n                    mcp_server.id, user.email, db_session\n                )\n\n        # Update the server with new values\n        mcp_server = update_mcp_server__no_commit(\n            server_id=request.existing_server_id,\n            db_session=db_session,\n            name=request.name,\n            description=request.description,\n            server_url=request.server_url,\n            auth_type=request.auth_type,\n            auth_performer=request.auth_performer,\n            transport=request.transport,\n        )\n\n        logger.info(\n            f\"Updated existing MCP server '{request.name}' with ID {mcp_server.id}\"\n        )\n\n    else:\n        # Handle new server creation\n        # Prevent duplicate server creation with same URL\n        normalized_url = (request.server_url or \"\").strip()\n        if not normalized_url:\n            raise HTTPException(status_code=400, detail=\"server_url is required\")\n\n        if not user.email:\n            raise HTTPException(\n                status_code=400,\n                detail=\"Authenticated user email required to create MCP servers\",\n            )\n\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=request.name,\n            description=request.description,\n            server_url=request.server_url,\n            auth_type=request.auth_type,\n            auth_performer=request.auth_performer,\n            transport=request.transport or MCPTransport.STREAMABLE_HTTP,\n            db_session=db_session,\n        )\n\n        logger.info(f\"Created new MCP server '{request.name}' with ID {mcp_server.id}\")\n\n    # PT_OAUTH doesn't need stored connection config (uses user's login token)\n    if (\n        not changing_connection_config\n        or request.auth_type == MCPAuthenticationType.NONE\n        or request.auth_type == MCPAuthenticationType.PT_OAUTH\n    ):\n        return mcp_server\n\n    # Create connection configs\n    admin_connection_config_id = None\n    if request.auth_performer == MCPAuthenticationPerformer.ADMIN and request.api_token:\n        # Admin-managed server: create admin config with API token\n        admin_config = create_connection_config(\n            config_data=MCPConnectionData(\n                headers={\"Authorization\": f\"Bearer {request.api_token}\"},\n            ),\n            mcp_server_id=mcp_server.id,\n            db_session=db_session,\n        )\n        admin_connection_config_id = admin_config.id\n\n    elif request.auth_performer == MCPAuthenticationPerformer.PER_USER:\n        if request.auth_type == MCPAuthenticationType.API_TOKEN:\n            # handled by model validation, this is just for mypy\n            assert request.auth_template and request.admin_credentials\n\n            # Per-user server: create template and save creator's per-user config\n            template_data = request.auth_template\n\n            # Create template config: faithful representation of what's in the admin panel\n            template_config = create_connection_config(\n                config_data=MCPConnectionData(\n                    headers=template_data.headers,\n                    header_substitutions=request.admin_credentials,\n                ),\n                mcp_server_id=mcp_server.id,\n                user_email=\"\",\n                db_session=db_session,\n            )\n\n            # seed the user config for this admin user\n            user_config = create_connection_config(\n                config_data=MCPConnectionData(\n                    headers=_build_headers_from_template(\n                        template_data, request.admin_credentials, user.email\n                    ),\n                    header_substitutions=request.admin_credentials,\n                ),\n                mcp_server_id=mcp_server.id,\n                user_email=user.email,\n                db_session=db_session,\n            )\n            user_config.mcp_server_id = mcp_server.id\n            admin_connection_config_id = template_config.id\n        elif request.auth_type == MCPAuthenticationType.OAUTH:\n            # Create initial admin config. If client credentials were provided,\n            # seed client_info so the OAuth provider can skip dynamic\n            # registration; otherwise, the provider will attempt it.\n            cfg: MCPConnectionData = MCPConnectionData(headers={})\n            if request.oauth_client_id:\n                client_info = OAuthClientInformationFull(\n                    client_id=request.oauth_client_id,\n                    client_secret=request.oauth_client_secret,\n                    redirect_uris=[AnyUrl(f\"{WEB_DOMAIN}/mcp/oauth/callback\")],\n                    grant_types=[\"authorization_code\", \"refresh_token\"],\n                    response_types=[\"code\"],\n                    scope=REQUESTED_SCOPE,  # TODO: allow specifying scopes?\n                    # default token_endpoint_auth_method is client_secret_post\n                )\n                cfg[MCPOAuthKeys.CLIENT_INFO.value] = client_info.model_dump(\n                    mode=\"json\"\n                )\n\n            admin_config = create_connection_config(\n                config_data=cfg,\n                mcp_server_id=mcp_server.id,\n                user_email=\"\",\n                db_session=db_session,\n            )\n            admin_connection_config_id = admin_config.id\n\n            # create user connection config\n            create_connection_config(\n                config_data=cfg,\n                mcp_server_id=mcp_server.id,\n                user_email=user.email,\n                db_session=db_session,\n            )\n    elif request.auth_performer == MCPAuthenticationPerformer.ADMIN:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Admin authentication is not yet supported for MCP servers: user per-user\",\n        )\n\n    # Update server with config IDs\n    if admin_connection_config_id is not None:\n        mcp_server = update_mcp_server__no_commit(\n            server_id=mcp_server.id,\n            db_session=db_session,\n            admin_connection_config_id=admin_connection_config_id,\n        )\n\n    db_session.commit()\n    return mcp_server\n\n\ndef _sync_tools_for_server(\n    mcp_server: DbMCPServer,\n    selected_tools: set[str],\n    db_session: Session,\n) -> int:\n    \"\"\"Toggle enabled state for MCP tools that exist for the server.\n    Updates to the db model of a tool all happen when the user Lists Tools.\n    This ensures that the the tools added to the db match what the user sees in the UI,\n    even if the underlying tool has changed on the server after list tools is called.\n    That's a corner case anyways; the admin should go back and update the server by re-listing tools.\n    \"\"\"\n\n    updated_tools = 0\n\n    existing_tools = get_tools_by_mcp_server_id(mcp_server.id, db_session)\n    existing_by_name = {tool.name: tool for tool in existing_tools}\n\n    # Disable any existing tools that were not processed above\n    for tool_name, db_tool in existing_by_name.items():\n        should_enable = tool_name in selected_tools\n        if db_tool.enabled != should_enable:\n            db_tool.enabled = should_enable\n            updated_tools += 1\n\n    return updated_tools\n\n\n@admin_router.get(\"/servers/{server_id}\", response_model=MCPServer)\ndef get_mcp_server_detail(\n    server_id: int,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServer:\n    \"\"\"Return details for one MCP server if user has access\"\"\"\n    try:\n        server = get_mcp_server_by_id(server_id, db_session)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(server, user)\n\n    # TODO: user permissions per mcp server not yet implemented, for now\n    # permissions are based on access to assistants\n    # # Quick permission check – admin or user has access\n    # if user and server not in user.accessible_mcp_servers and not user.is_superuser:\n    #     raise HTTPException(status_code=403, detail=\"Forbidden\")\n\n    return _db_mcp_server_to_api_mcp_server(\n        server,\n        db_session,\n        include_auth_config=True,\n        request_user=user,\n    )\n\n\n@admin_router.get(\"/tools\")\ndef get_all_mcp_tools(\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001\n) -> list:\n    \"\"\"Get all tools associated with MCP servers, including both enabled and disabled tools\"\"\"\n    from sqlalchemy import select\n    from onyx.db.models import Tool\n\n    # Query MCP tools ordered by ID to maintain consistent ordering\n    stmt = select(Tool).where(Tool.mcp_server_id.is_not(None)).order_by(Tool.id)\n\n    mcp_tools = db.scalars(stmt).all()\n\n    # Convert to ToolSnapshot format\n    return [ToolSnapshot.from_model(tool) for tool in mcp_tools]\n\n\n@admin_router.patch(\"/server/{server_id}/status\")\ndef update_mcp_server_status(\n    server_id: int,\n    status: MCPServerStatus,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    \"\"\"Update the status of an MCP server\"\"\"\n    logger.info(f\"Updating MCP server {server_id} status to {status}\")\n\n    try:\n        mcp_server = get_mcp_server_by_id(server_id, db)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    update_mcp_server__no_commit(\n        server_id=server_id,\n        db_session=db,\n        status=status,\n    )\n    db.commit()\n\n    logger.info(f\"Successfully updated MCP server {server_id} status to {status}\")\n    return {\"message\": f\"Server status updated to {status.value}\"}\n\n\n@admin_router.get(\"/servers\", response_model=MCPServersResponse)\ndef get_mcp_servers_for_admin(\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServersResponse:\n    \"\"\"Get all MCP servers for admin display\"\"\"\n\n    logger.info(\"Fetching all MCP servers for admin display\")\n\n    try:\n        db_mcp_servers = get_all_mcp_servers(db)\n\n        # Convert to API model format\n        mcp_servers = [\n            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)\n            for db_server in db_mcp_servers\n        ]\n\n        return MCPServersResponse(mcp_servers=mcp_servers)\n\n    except Exception as e:\n        logger.error(f\"Failed to fetch MCP servers for admin: {type(e)}:{e}\")\n        raise HTTPException(status_code=500, detail=\"Failed to fetch MCP servers\")\n\n\n@admin_router.get(\"/server/{server_id}/db-tools\")\ndef get_mcp_server_db_tools(\n    server_id: int,\n    db: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> ServerToolsResponse:\n    \"\"\"Get existing database tools created for an MCP server\"\"\"\n    logger.info(f\"Getting database tools for MCP server: {server_id}\")\n\n    try:\n        # Verify the server exists\n        mcp_server = get_mcp_server_by_id(server_id, db)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    # Get all tools associated with this MCP server\n    mcp_tools = get_tools_by_mcp_server_id(server_id, db)\n\n    # Convert to response format\n    tools_data = []\n    for tool in mcp_tools:\n        # Extract the tool name from the full name (remove server prefix)\n        tool_name = tool.name\n        if tool.mcp_server and tool_name.startswith(f\"{tool.mcp_server.name}_\"):\n            tool_name = tool_name[len(f\"{tool.mcp_server.name}_\") :]\n\n        tools_data.append(\n            MCPToolDescription(\n                id=tool.id,\n                name=tool_name,\n                display_name=tool.display_name or tool_name,\n                description=_truncate_description(tool.description),\n            )\n        )\n\n    return ServerToolsResponse(\n        server_id=server_id,\n        server_name=mcp_server.name,\n        server_url=mcp_server.server_url,\n        tools=tools_data,\n    )\n\n\n@admin_router.post(\"/servers/create\", response_model=MCPServerCreateResponse)\ndef upsert_mcp_server(\n    request: MCPToolCreateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServerCreateResponse:\n    \"\"\"Create or update an MCP server (no tools yet)\"\"\"\n\n    # Validate auth_performer for non-none auth types\n    if request.auth_type != MCPAuthenticationType.NONE and not request.auth_performer:\n        raise HTTPException(\n            status_code=400, detail=\"auth_performer is required for non-none auth types\"\n        )\n\n    try:\n        mcp_server = _upsert_mcp_server(request, db_session, user)\n\n        if (\n            request.auth_type\n            not in (MCPAuthenticationType.NONE, MCPAuthenticationType.PT_OAUTH)\n            and mcp_server.admin_connection_config_id is None\n        ):\n            raise HTTPException(\n                status_code=500, detail=\"Failed to set admin connection config\"\n            )\n        db_session.commit()\n\n        action_verb = \"Updated\" if request.existing_server_id else \"Created\"\n        logger.info(\n            f\"{action_verb} MCP server '{request.name}' with ID {mcp_server.id}\"\n        )\n\n        if mcp_server.auth_type is None:\n            raise HTTPException(\n                status_code=500, detail=\"MCP server auth_type not configured\"\n            )\n        auth_type_str = mcp_server.auth_type.value\n\n        return MCPServerCreateResponse(\n            server_id=mcp_server.id,\n            server_name=mcp_server.name,\n            server_url=mcp_server.server_url,\n            auth_type=auth_type_str,\n            auth_performer=(\n                request.auth_performer.value if request.auth_performer else None\n            ),\n            is_authenticated=(\n                mcp_server.auth_type == MCPAuthenticationType.NONE.value\n                or request.auth_performer == MCPAuthenticationPerformer.ADMIN\n            ),\n        )\n\n    except HTTPException:\n        # Re-raise HTTP exceptions as-is\n        raise\n    except Exception as e:\n        logger.exception(\"Failed to create/update MCP tool\")\n        raise HTTPException(\n            status_code=500, detail=f\"Failed to create/update MCP tool: {str(e)}\"\n        )\n\n\n@admin_router.post(\"/servers/update\", response_model=MCPServerUpdateResponse)\ndef update_mcp_server_with_tools(\n    request: MCPToolUpdateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServerUpdateResponse:\n    \"\"\"Update an MCP server and associated tools\"\"\"\n\n    try:\n        mcp_server = get_mcp_server_by_id(request.server_id, db_session)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    if mcp_server.admin_connection_config_id is None and mcp_server.auth_type not in (\n        MCPAuthenticationType.NONE,\n        MCPAuthenticationType.PT_OAUTH,\n    ):\n        raise HTTPException(\n            status_code=400, detail=\"MCP server has no admin connection config\"\n        )\n\n    name_changed = request.name is not None and request.name != mcp_server.name\n    description_changed = (\n        request.description is not None\n        and request.description != mcp_server.description\n    )\n    if name_changed or description_changed:\n        mcp_server = update_mcp_server__no_commit(\n            server_id=mcp_server.id,\n            db_session=db_session,\n            name=request.name if name_changed else None,\n            description=request.description if description_changed else None,\n        )\n\n    selected_names = set(request.selected_tools or [])\n    updated_tools = _sync_tools_for_server(\n        mcp_server,\n        selected_names,\n        db_session,\n    )\n\n    db_session.commit()\n\n    return MCPServerUpdateResponse(\n        server_id=mcp_server.id,\n        server_name=mcp_server.name,\n        updated_tools=updated_tools,\n    )\n\n\n@admin_router.post(\"/server\", response_model=MCPServer)\ndef create_mcp_server_simple(\n    request: MCPServerSimpleCreateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServer:\n    \"\"\"Create MCP server with minimal information - auth to be configured later\"\"\"\n\n    mcp_server = create_mcp_server__no_commit(\n        owner_email=user.email,\n        name=request.name,\n        description=request.description,\n        server_url=request.server_url,\n        auth_type=None,  # To be configured later\n        transport=None,  # To be configured later\n        auth_performer=None,  # To be configured later\n        db_session=db_session,\n    )\n\n    db_session.commit()\n\n    return MCPServer(\n        id=mcp_server.id,\n        name=mcp_server.name,\n        description=mcp_server.description,\n        server_url=mcp_server.server_url,\n        owner=mcp_server.owner,\n        transport=mcp_server.transport,\n        auth_type=mcp_server.auth_type,\n        auth_performer=mcp_server.auth_performer,\n        is_authenticated=False,  # Not authenticated yet\n        status=mcp_server.status,\n        tool_count=0,  # New server, no tools yet\n        auth_template=None,\n        user_credentials=None,\n        admin_credentials=None,\n    )\n\n\n@admin_router.patch(\"/server/{server_id}\", response_model=MCPServer)\ndef update_mcp_server_simple(\n    server_id: int,\n    request: MCPServerSimpleUpdateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> MCPServer:\n    \"\"\"Update MCP server basic information (name, description, URL)\"\"\"\n    try:\n        mcp_server = get_mcp_server_by_id(server_id, db_session)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n\n    _ensure_mcp_server_owner_or_admin(mcp_server, user)\n\n    # Update only provided fields\n    updated_server = update_mcp_server__no_commit(\n        server_id=server_id,\n        db_session=db_session,\n        name=request.name,\n        description=request.description,\n        server_url=request.server_url,\n    )\n\n    db_session.commit()\n\n    # Return the updated server in API format\n    return _db_mcp_server_to_api_mcp_server(\n        updated_server, db_session, request_user=user\n    )\n\n\n@admin_router.delete(\"/server/{server_id}\")\ndef delete_mcp_server_admin(\n    server_id: int,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> dict:\n    \"\"\"Delete an MCP server and cascading related objects (tools, configs).\"\"\"\n    try:\n        # Ensure it exists\n        server = get_mcp_server_by_id(server_id, db_session)\n\n        _ensure_mcp_server_owner_or_admin(server, user)\n\n        # Log tools that will be deleted for debugging\n        tools_to_delete = get_tools_by_mcp_server_id(server_id, db_session)\n        logger.info(\n            f\"Deleting MCP server {server_id} ({server.name}) with {len(tools_to_delete)} tools\"\n        )\n        for tool in tools_to_delete:\n            logger.debug(f\"  - Tool to delete: {tool.name} (ID: {tool.id})\")\n\n        # Cascade behavior handled by FK ondelete in DB\n        delete_mcp_server(server_id, db_session)\n\n        # Verify tools were deleted\n        remaining_tools = get_tools_by_mcp_server_id(server_id, db_session)\n        if remaining_tools:\n            logger.error(\n                f\"WARNING: {len(remaining_tools)} tools still exist after deleting MCP server {server_id}\"\n            )\n            # Manually delete them as a fallback\n            for tool in remaining_tools:\n                logger.info(\n                    f\"Manually deleting orphaned tool: {tool.name} (ID: {tool.id})\"\n                )\n                delete_tool__no_commit(tool.id, db_session)\n        db_session.commit()\n\n        return {\"success\": True}\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"MCP server not found\")\n    except Exception as e:\n        logger.error(f\"Failed to delete MCP server {server_id}: {e}\")\n        raise HTTPException(status_code=500, detail=\"Failed to delete MCP server\")\n"
  },
  {
    "path": "backend/onyx/server/features/mcp/models.py",
    "content": "import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom typing import List\nfrom typing import NotRequired\nfrom typing import Optional\nfrom typing import TypedDict\n\nfrom mcp.types import Tool as MCPLibTool\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import model_validator\n\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.enums import MCPServerStatus\nfrom onyx.db.enums import MCPTransport\n\n\n# This should be updated along with MCPConnectionData\nclass MCPOAuthKeys(str, Enum):\n    \"\"\"MCP OAuth keys types\"\"\"\n\n    CLIENT_INFO = \"client_info\"\n    TOKENS = \"tokens\"\n    METADATA = \"metadata\"\n\n\nclass MCPConnectionData(TypedDict):\n    \"\"\"TypedDict to allow use as a type hint for a JSONB column\n    in Postgres\"\"\"\n\n    headers: dict[str, str]\n    header_substitutions: NotRequired[dict[str, str]]\n\n    # For OAuth only\n    # Note: Update MCPOAuthKeys if necessary when modifying these\n    # Unfortunately we can't use the actual models here because basemodels aren't compatible\n    # with SQLAlchemy\n    client_info: NotRequired[dict[str, Any]]  # OAuthClientInformationFull\n    tokens: NotRequired[dict[str, Any]]  # OAuthToken\n    metadata: NotRequired[dict[str, Any]]  # OAuthClientMetadata\n\n    # the actual models are defined in mcp.shared.auth\n    # from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken\n\n\nclass MCPAuthTemplate(BaseModel):\n    \"\"\"Template for per-user authentication configuration\"\"\"\n\n    headers: dict[str, str] = Field(\n        default_factory=dict,\n        description=\"Map of header names to templates with placeholders\",\n    )\n    # request_body_params: List[dict[str, str]] = Field(\n    #     default_factory=list,\n    #     description=\"List of request body parameter templates with path/value pairs\",\n    # ) # not used yet\n    required_fields: List[str] = Field(\n        default_factory=list,\n        description=\"List of required field names that users must provide\",\n    )\n\n\nclass MCPToolCreateRequest(BaseModel):\n    name: str = Field(..., description=\"Name of the MCP tool\")\n    description: Optional[str] = Field(None, description=\"Description of the MCP tool\")\n    server_url: str = Field(..., description=\"URL of the MCP server\")\n    auth_type: MCPAuthenticationType = Field(..., description=\"Authentication type\")\n    auth_performer: MCPAuthenticationPerformer = Field(\n        ..., description=\"Who performs authentication\"\n    )\n    api_token: Optional[str] = Field(\n        None, description=\"API token for api_token auth type\"\n    )\n    oauth_client_id: Optional[str] = Field(None, description=\"OAuth client ID\")\n    oauth_client_secret: Optional[str] = Field(None, description=\"OAuth client secret\")\n    transport: MCPTransport | None = Field(\n        None, description=\"MCP transport type (STREAMABLE_HTTP or SSE)\"\n    )\n    auth_template: Optional[MCPAuthTemplate] = Field(\n        None, description=\"Template configuration for per-user authentication\"\n    )\n    admin_credentials: Optional[dict[str, str]] = Field(\n        None,\n        description=\"Admin's credential key-value pairs for template substitution and storage\",\n    )\n    existing_server_id: Optional[int] = Field(\n        None, description=\"ID of existing server to update (for editing)\"\n    )\n\n    @model_validator(mode=\"after\")\n    def validate_auth_configuration(self) -> \"MCPToolCreateRequest\":\n        # Validate API token requirements for admin auth\n        if (\n            self.auth_type == MCPAuthenticationType.API_TOKEN\n            and self.auth_performer == MCPAuthenticationPerformer.ADMIN\n            and not self.api_token\n        ):\n            raise ValueError(\n                \"api_token is required when auth_type is 'api_token' and auth_performer is 'admin'\"\n            )\n\n        # Validate that API token is not provided for per-user auth\n        if (\n            self.auth_type == MCPAuthenticationType.API_TOKEN\n            and self.auth_performer == MCPAuthenticationPerformer.PER_USER\n            and self.api_token\n            and self.api_token.strip()\n        ):\n            raise ValueError(\n                \"api_token should not be provided when auth_performer is 'per_user'. Users will provide their own credentials.\"\n            )\n\n        # Validate that auth_template is provided for per-user auth\n        if (\n            self.auth_type == MCPAuthenticationType.API_TOKEN\n            and self.auth_performer == MCPAuthenticationPerformer.PER_USER\n        ):\n            if not self.auth_template:\n                raise ValueError(\n                    \"auth_template is required when auth_performer is 'per_user'\"\n                )\n            if not self.admin_credentials:\n                raise ValueError(\n                    \"admin_credentials is required when auth_performer is 'per_user'\"\n                )\n\n        # OAuth client ID/secret are optional. If provided, they will seed the\n        # OAuth client info; otherwise, the MCP client will attempt dynamic\n        # client registration.\n\n        return self\n\n\nclass MCPToolUpdateRequest(BaseModel):\n    server_id: int = Field(..., description=\"ID of the MCP server\")\n    name: Optional[str] = Field(None, description=\"Updated name of the MCP server\")\n    description: Optional[str] = Field(\n        None, description=\"Updated description of the MCP server\"\n    )\n    selected_tools: Optional[List[str]] = Field(\n        None, description=\"List of selected tool names to create\"\n    )\n\n\nclass MCPServerSimpleCreateRequest(BaseModel):\n    name: str = Field(..., description=\"Name of the MCP server\")\n    description: Optional[str] = Field(\n        None, description=\"Description of the MCP server\"\n    )\n    server_url: str = Field(..., description=\"URL of the MCP server\")\n\n\nclass MCPServerSimpleUpdateRequest(BaseModel):\n    name: Optional[str] = Field(None, description=\"Name of the MCP server\")\n    description: Optional[str] = Field(\n        None, description=\"Description of the MCP server\"\n    )\n    server_url: Optional[str] = Field(None, description=\"URL of the MCP server\")\n\n\nclass MCPToolResponse(BaseModel):\n    id: int\n    name: str\n    display_name: str\n    description: str\n    definition: Optional[dict] = None  # MCP tools don't use OpenAPI definitions\n    custom_headers: List[dict] = []\n    in_code_tool_id: Optional[str] = None\n    passthrough_auth: bool = False\n    # MCP-specific fields\n    server_url: str\n    auth_type: str\n    auth_performer: Optional[str] = None\n    is_authenticated: bool\n\n\nclass MCPOAuthConnectRequest(BaseModel):\n    name: str = Field(..., description=\"Name of the MCP tool\")\n    description: Optional[str] = Field(None, description=\"Description of the MCP tool\")\n    server_url: str = Field(..., description=\"URL of the MCP server\")\n    selected_tools: Optional[List[str]] = Field(\n        None, description=\"List of selected tool names to create\"\n    )\n    existing_server_id: Optional[int] = Field(\n        None, description=\"ID of existing server to update (for editing)\"\n    )\n\n\nclass MCPOAuthConnectResponse(BaseModel):\n    oauth_url: str = Field(..., description=\"OAuth URL to redirect user to\")\n    state: str = Field(..., description=\"OAuth state parameter\")\n    pending_tool: dict = Field(..., description=\"Pending tool configuration\")\n\n\nclass MCPUserOAuthConnectRequest(BaseModel):\n    server_id: int = Field(..., description=\"ID of the MCP server\")\n    return_path: str = Field(..., description=\"Path to redirect to after callback\")\n    include_resource_param: bool = Field(..., description=\"Include resource parameter\")\n    oauth_client_id: str | None = Field(\n        None, description=\"OAuth client ID (optional for DCR)\"\n    )\n    oauth_client_secret: str | None = Field(\n        None, description=\"OAuth client secret (optional for DCR)\"\n    )\n\n    @model_validator(mode=\"after\")\n    def validate_return_path(self) -> \"MCPUserOAuthConnectRequest\":\n        if not self.return_path.startswith(\"/\"):\n            raise ValueError(\"return_path must start with a slash\")\n        return self\n\n\nclass MCPUserOAuthConnectResponse(BaseModel):\n    server_id: int\n    oauth_url: str = Field(..., description=\"OAuth URL to redirect user to\")\n\n\nclass MCPOAuthCallbackRequest(BaseModel):\n    \"\"\"Request payload for completing OAuth flow (authorization code exchange).\"\"\"\n\n    code: str = Field(..., description=\"Authorization code returned by the IdP\")\n    state: Optional[str] = Field(\n        None, description=\"State parameter for CSRF protection\"\n    )\n\n\nclass MCPOAuthCallbackResponse(BaseModel):\n    success: bool\n    message: str\n    server_id: int\n    server_name: str\n    redirect_url: str\n\n\nclass MCPDynamicClientRegistrationRequest(BaseModel):\n    \"\"\"Request for dynamic client registration per RFC 7591\"\"\"\n\n    server_id: int = Field(..., description=\"MCP server ID\")\n    authorization_server_url: str = Field(\n        ...,\n        description=\"Authorization server URL discovered from WWW-Authenticate or metadata\",\n    )\n\n\nclass MCPDynamicClientRegistrationResponse(BaseModel):\n    \"\"\"Response from dynamic client registration\"\"\"\n\n    client_id: str = Field(..., description=\"Registered client ID\")\n    client_secret: Optional[str] = Field(\n        None, description=\"Client secret if confidential client\"\n    )\n    registration_access_token: Optional[str] = Field(\n        None, description=\"Token for managing this client registration\"\n    )\n    registration_client_uri: Optional[str] = Field(\n        None, description=\"URI for managing this client registration\"\n    )\n\n\nclass MCPApiKeyRequest(BaseModel):\n    server_id: int = Field(..., description=\"ID of the MCP server\")\n    api_key: str = Field(..., description=\"API key to store\")\n    transport: str = Field(..., description=\"Transport type\")\n\n\nclass MCPUserCredentialsRequest(BaseModel):\n    \"\"\"Enhanced request for template-based user credentials\"\"\"\n\n    server_id: int = Field(..., description=\"ID of the MCP server\")\n    credentials: dict[str, str] = Field(\n        ..., description=\"User-provided credentials (api_key, custom_token, etc.)\"\n    )\n    transport: str = Field(..., description=\"Transport type\")\n\n\nclass MCPApiKeyResponse(BaseModel):\n    success: bool\n    message: str\n    server_id: int\n    server_name: str\n    authenticated: bool\n    validation_tested: bool = Field(\n        default=False, description=\"Whether credentials were tested against MCP server\"\n    )\n\n\nclass MCPServer(BaseModel):\n    id: int\n    name: str\n    description: Optional[str] = None\n    server_url: str\n    owner: str\n    transport: Optional[MCPTransport] = None\n    auth_type: Optional[MCPAuthenticationType] = None\n    auth_performer: Optional[MCPAuthenticationPerformer] = None\n    is_authenticated: bool\n    user_authenticated: Optional[bool] = None\n    status: MCPServerStatus\n    last_refreshed_at: Optional[datetime.datetime] = None\n    tool_count: int = Field(\n        default=0, description=\"Number of tools associated with this server\"\n    )\n    auth_template: Optional[MCPAuthTemplate] = Field(\n        None, description=\"Authentication template for per-user auth\"\n    )\n    user_credentials: Optional[dict[str, str]] = Field(\n        None, description=\"User's existing credentials for pre-filling forms\"\n    )\n    admin_credentials: Optional[dict[str, str]] = Field(\n        None,\n        description=\"Admin's credential key-value pairs for template substitution and storage\",\n    )\n\n\nclass MCPServersResponse(BaseModel):\n    assistant_id: str | None = None\n    mcp_servers: List[MCPServer]\n\n\nclass MCPServerCreateResponse(BaseModel):\n    \"\"\"Response for creating multiple MCP tools\"\"\"\n\n    server_id: int\n    server_name: str\n    server_url: str\n    auth_type: str\n    auth_performer: Optional[str]\n    is_authenticated: bool\n\n\nclass MCPServerUpdateResponse(BaseModel):\n    \"\"\"Response for updating multiple MCP tools\"\"\"\n\n    server_id: int\n    server_name: str\n    updated_tools: int\n\n\nclass MCPToolListResponse(BaseModel):\n    server_id: int\n    server_name: str\n    server_url: str\n    tools: list[MCPLibTool]\n"
  },
  {
    "path": "backend/onyx/server/features/notifications/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.notification import dismiss_notification\nfrom onyx.db.notification import get_notification_by_id\nfrom onyx.db.notification import get_notifications\nfrom onyx.server.features.build.utils import ensure_build_mode_intro_notification\nfrom onyx.server.features.release_notes.utils import (\n    ensure_release_notes_fresh_and_notify,\n)\nfrom onyx.server.settings.models import Notification as NotificationModel\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\nrouter = APIRouter(prefix=\"/notifications\")\n\n\n@router.get(\"\")\ndef get_notifications_api(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[NotificationModel]:\n    \"\"\"\n    Get all undismissed notifications for the current user.\n\n    Note: also executes background checks that should create notifications.\n\n    Examples of checks that create new notifications:\n    - Checking for new release notes the user hasn't seen\n    - Checking for misconfigurations due to version changes\n    - Explicitly announcing breaking changes\n    \"\"\"\n    # Background checks that create notifications\n    try:\n        ensure_build_mode_intro_notification(user, db_session)\n    except Exception:\n        logger.exception(\n            \"Failed to check for build mode intro in notifications endpoint\"\n        )\n\n    try:\n        ensure_release_notes_fresh_and_notify(db_session)\n    except Exception:\n        logger.exception(\"Failed to check for release notes in notifications endpoint\")\n\n    notifications = [\n        NotificationModel.from_model(notif)\n        for notif in get_notifications(user, db_session, include_dismissed=True)\n    ]\n    return notifications\n\n\n@router.post(\"/{notification_id}/dismiss\")\ndef dismiss_notification_endpoint(\n    notification_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        notification = get_notification_by_id(notification_id, user, db_session)\n    except PermissionError:\n        raise HTTPException(\n            status_code=403, detail=\"Not authorized to dismiss this notification\"\n        )\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"Notification not found\")\n\n    dismiss_notification(notification, db_session)\n"
  },
  {
    "path": "backend/onyx/server/features/oauth_config/__init__.py",
    "content": "\"\"\"OAuth configuration feature module.\"\"\"\n\nfrom onyx.server.features.oauth_config.api import admin_router\nfrom onyx.server.features.oauth_config.api import router\n\n__all__ = [\"admin_router\", \"router\"]\n"
  },
  {
    "path": "backend/onyx/server/features/oauth_config/api.py",
    "content": "\"\"\"API endpoints for OAuth configuration management.\"\"\"\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.oauth_token_manager import OAuthTokenManager\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import User\nfrom onyx.db.oauth_config import create_oauth_config\nfrom onyx.db.oauth_config import delete_oauth_config\nfrom onyx.db.oauth_config import delete_user_oauth_token\nfrom onyx.db.oauth_config import get_oauth_config\nfrom onyx.db.oauth_config import get_oauth_configs\nfrom onyx.db.oauth_config import get_tools_by_oauth_config\nfrom onyx.db.oauth_config import update_oauth_config\nfrom onyx.db.oauth_config import upsert_user_oauth_token\nfrom onyx.federated_connectors.oauth_utils import generate_oauth_state\nfrom onyx.federated_connectors.oauth_utils import verify_oauth_state\nfrom onyx.server.features.oauth_config.models import OAuthCallbackResponse\nfrom onyx.server.features.oauth_config.models import OAuthConfigCreate\nfrom onyx.server.features.oauth_config.models import OAuthConfigSnapshot\nfrom onyx.server.features.oauth_config.models import OAuthConfigUpdate\nfrom onyx.server.features.oauth_config.models import OAuthInitiateRequest\nfrom onyx.server.features.oauth_config.models import OAuthInitiateResponse\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/oauth-config\")\nrouter = APIRouter(prefix=\"/oauth-config\")\n\n\ndef _oauth_config_to_snapshot(\n    oauth_config: OAuthConfig, db_session: Session\n) -> OAuthConfigSnapshot:\n    \"\"\"Convert OAuthConfig model to API snapshot.\"\"\"\n    tools = get_tools_by_oauth_config(oauth_config.id, db_session)\n    return OAuthConfigSnapshot(\n        id=oauth_config.id,\n        name=oauth_config.name,\n        authorization_url=oauth_config.authorization_url,\n        token_url=oauth_config.token_url,\n        scopes=oauth_config.scopes,\n        has_client_credentials=bool(\n            oauth_config.client_id and oauth_config.client_secret\n        ),\n        tool_count=len(tools),\n        created_at=oauth_config.created_at,\n        updated_at=oauth_config.updated_at,\n    )\n\n\n\"\"\"Admin endpoints for OAuth configuration management\"\"\"\n\n\n@admin_router.post(\"/create\")\ndef create_oauth_config_endpoint(\n    oauth_data: OAuthConfigCreate,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_curator_or_admin_user),\n) -> OAuthConfigSnapshot:\n    \"\"\"Create a new OAuth configuration (admin only).\"\"\"\n    try:\n        oauth_config = create_oauth_config(\n            name=oauth_data.name,\n            authorization_url=oauth_data.authorization_url,\n            token_url=oauth_data.token_url,\n            client_id=oauth_data.client_id,\n            client_secret=oauth_data.client_secret,\n            scopes=oauth_data.scopes,\n            additional_params=oauth_data.additional_params,\n            db_session=db_session,\n        )\n        return _oauth_config_to_snapshot(oauth_config, db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@admin_router.get(\"\")\ndef list_oauth_configs(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_curator_or_admin_user),\n) -> list[OAuthConfigSnapshot]:\n    \"\"\"List all OAuth configurations (admin only).\"\"\"\n    oauth_configs = get_oauth_configs(db_session)\n    return [_oauth_config_to_snapshot(config, db_session) for config in oauth_configs]\n\n\n@admin_router.get(\"/{oauth_config_id}\")\ndef get_oauth_config_endpoint(\n    oauth_config_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_curator_or_admin_user),\n) -> OAuthConfigSnapshot:\n    \"\"\"Retrieve a single OAuth configuration (admin only).\"\"\"\n    oauth_config = get_oauth_config(oauth_config_id, db_session)\n    if not oauth_config:\n        raise HTTPException(\n            status_code=404, detail=f\"OAuth config with id {oauth_config_id} not found\"\n        )\n    return _oauth_config_to_snapshot(oauth_config, db_session)\n\n\n@admin_router.put(\"/{oauth_config_id}\")\ndef update_oauth_config_endpoint(\n    oauth_config_id: int,\n    oauth_data: OAuthConfigUpdate,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_curator_or_admin_user),\n) -> OAuthConfigSnapshot:\n    \"\"\"Update an OAuth configuration (admin only).\"\"\"\n    try:\n        updated_config = update_oauth_config(\n            oauth_config_id=oauth_config_id,\n            db_session=db_session,\n            name=oauth_data.name,\n            authorization_url=oauth_data.authorization_url,\n            token_url=oauth_data.token_url,\n            client_id=oauth_data.client_id,\n            client_secret=oauth_data.client_secret,\n            scopes=oauth_data.scopes,\n            additional_params=oauth_data.additional_params,\n            clear_client_id=oauth_data.clear_client_id,\n            clear_client_secret=oauth_data.clear_client_secret,\n        )\n        return _oauth_config_to_snapshot(updated_config, db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@admin_router.delete(\"/{oauth_config_id}\")\ndef delete_oauth_config_endpoint(\n    oauth_config_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_curator_or_admin_user),\n) -> dict[str, str]:\n    \"\"\"Delete an OAuth configuration (admin only).\"\"\"\n    try:\n        delete_oauth_config(oauth_config_id, db_session)\n        return {\"message\": \"OAuth configuration deleted successfully\"}\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n\"\"\"User endpoints for OAuth flow\"\"\"\n\n\n@router.post(\"/initiate\")\ndef initiate_oauth_flow(\n    request: OAuthInitiateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> OAuthInitiateResponse:\n    \"\"\"\n    Initiate OAuth flow for the current user.\n\n    Returns an authorization URL that the frontend should redirect the user to.\n    \"\"\"\n    # Get OAuth config\n    oauth_config = get_oauth_config(request.oauth_config_id, db_session)\n    if not oauth_config:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"OAuth config with id {request.oauth_config_id} not found\",\n        )\n\n    # Generate state parameter and store in Redis\n    state = generate_oauth_state(\n        federated_connector_id=request.oauth_config_id,\n        user_id=str(user.id),\n        redirect_uri=request.return_path,\n        additional_data={\"oauth_config_id\": request.oauth_config_id},\n    )\n\n    # Build authorization URL\n    redirect_uri = f\"{WEB_DOMAIN}/oauth-config/callback\"\n    authorization_url = OAuthTokenManager.build_authorization_url(\n        oauth_config, redirect_uri, state\n    )\n\n    return OAuthInitiateResponse(authorization_url=authorization_url, state=state)\n\n\n@router.post(\"/callback\")\ndef handle_oauth_callback(\n    code: str,\n    state: str,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> OAuthCallbackResponse:\n    \"\"\"\n    Handle OAuth callback after user authorizes the application.\n\n    Exchanges the authorization code for an access token and stores it.\n    Accepts code and state as query parameters (standard OAuth flow).\n    \"\"\"\n    try:\n        # Verify state and retrieve session data\n        session = verify_oauth_state(state)\n\n        # Verify the user_id matches\n        if str(user.id) != session.user_id:\n            raise HTTPException(\n                status_code=403, detail=\"User mismatch in OAuth callback\"\n            )\n\n        # Extract oauth_config_id from session (stored during initiate)\n        oauth_config_id = session.federated_connector_id\n\n        # Get OAuth config\n        oauth_config = get_oauth_config(oauth_config_id, db_session)\n        if not oauth_config:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"OAuth config with id {oauth_config_id} not found\",\n            )\n\n        # Exchange code for token\n        redirect_uri = f\"{WEB_DOMAIN}/oauth-config/callback\"\n        token_manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        token_data = token_manager.exchange_code_for_token(code, redirect_uri)\n\n        # Store token\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Return success with redirect\n        return_path = session.redirect_uri or \"/chat\"\n        return OAuthCallbackResponse(\n            redirect_url=return_path,\n        )\n\n    except ValueError as e:\n        logger.error(f\"OAuth callback error: {e}\")\n        return OAuthCallbackResponse(\n            redirect_url=\"/chat\",\n            error=str(e),\n        )\n    except Exception as e:\n        logger.error(f\"Unexpected OAuth callback error: {e}\")\n        return OAuthCallbackResponse(\n            redirect_url=\"/chat\",\n            error=\"An unexpected error occurred during OAuth callback\",\n        )\n\n\n@router.delete(\"/{oauth_config_id}/token\")\ndef revoke_oauth_token(\n    oauth_config_id: int,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> dict[str, str]:\n    \"\"\"\n    Revoke (delete) the current user's OAuth token for a specific OAuth config.\n    \"\"\"\n    try:\n        delete_user_oauth_token(oauth_config_id, user.id, db_session)\n        return {\"message\": \"OAuth token revoked successfully\"}\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n"
  },
  {
    "path": "backend/onyx/server/features/oauth_config/models.py",
    "content": "from datetime import datetime\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\n\nclass OAuthConfigCreate(BaseModel):\n    name: str\n    authorization_url: str\n    token_url: str\n    client_id: str\n    client_secret: str\n    scopes: list[str] | None = None\n    additional_params: dict[str, Any] | None = None\n\n\nclass OAuthConfigUpdate(BaseModel):\n    name: str | None = None\n    authorization_url: str | None = None\n    token_url: str | None = None\n    client_id: str | None = None\n    client_secret: str | None = None\n    scopes: list[str] | None = None\n    additional_params: dict[str, Any] | None = None\n    clear_client_id: bool = False\n    clear_client_secret: bool = False\n\n\nclass OAuthConfigSnapshot(BaseModel):\n    id: int\n    name: str\n    authorization_url: str\n    token_url: str\n    scopes: list[str] | None\n    has_client_credentials: bool  # NEVER expose actual client_id or client_secret\n    tool_count: int  # Number of tools using this config\n    created_at: datetime\n    updated_at: datetime\n\n\nclass OAuthInitiateRequest(BaseModel):\n    oauth_config_id: int\n    return_path: str = \"/chat\"  # Where to redirect after OAuth flow\n\n\nclass OAuthInitiateResponse(BaseModel):\n    authorization_url: str  # URL to redirect user to\n    state: str  # OAuth state parameter for CSRF protection\n\n\nclass OAuthCallbackResponse(BaseModel):\n    redirect_url: str\n    error: str | None = None\n\n\nclass OAuthTokenStatus(BaseModel):\n    oauth_config_id: int\n    oauth_config_name: str\n    has_token: bool\n    expires_at: int | None  # Unix timestamp\n    is_expired: bool\n"
  },
  {
    "path": "backend/onyx/server/features/password/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi_users.exceptions import InvalidPasswordException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import get_user_manager\nfrom onyx.auth.users import User\nfrom onyx.auth.users import UserManager\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.users import get_user_by_email\nfrom onyx.server.features.password.models import ChangePasswordRequest\nfrom onyx.server.features.password.models import UserResetRequest\nfrom onyx.server.features.password.models import UserResetResponse\n\nrouter = APIRouter(prefix=\"/password\")\n\n\n@router.post(\"/change-password\")\nasync def change_my_password(\n    form_data: ChangePasswordRequest,\n    user_manager: UserManager = Depends(get_user_manager),\n    current_user: User = Depends(current_user),\n) -> None:\n    \"\"\"\n    Change the password for the current user.\n    \"\"\"\n    try:\n        await user_manager.change_password_if_old_matches(\n            user=current_user,\n            old_password=form_data.old_password,\n            new_password=form_data.new_password,\n        )\n    except InvalidPasswordException as e:\n        raise HTTPException(status_code=400, detail=str(e.reason))\n    except Exception as e:\n        raise HTTPException(\n            status_code=500, detail=f\"An unexpected error occurred: {str(e)}\"\n        )\n\n\n@router.post(\"/reset_password\")\nasync def admin_reset_user_password(\n    user_reset_request: UserResetRequest,\n    user_manager: UserManager = Depends(get_user_manager),\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> UserResetResponse:\n    \"\"\"\n    Reset the password for a user (admin only).\n    \"\"\"\n    user = get_user_by_email(user_reset_request.user_email, db_session)\n    if not user:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n    new_password = await user_manager.reset_password_as_admin(user.id)\n    return UserResetResponse(\n        user_id=str(user.id),\n        new_password=new_password,\n    )\n"
  },
  {
    "path": "backend/onyx/server/features/password/models.py",
    "content": "from pydantic import BaseModel\n\n\nclass UserResetRequest(BaseModel):\n    user_email: str\n\n\nclass UserResetResponse(BaseModel):\n    user_id: str\n    new_password: str\n\n\nclass ChangePasswordRequest(BaseModel):\n    old_password: str\n    new_password: str\n"
  },
  {
    "path": "backend/onyx/server/features/persona/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/persona/api.py",
    "content": "from uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import UploadFile\nfrom pydantic import BaseModel\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_limited_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.persona import create_assistant_label\nfrom onyx.db.persona import create_update_persona\nfrom onyx.db.persona import delete_persona_label\nfrom onyx.db.persona import get_assistant_labels\nfrom onyx.db.persona import get_minimal_persona_snapshots_for_user\nfrom onyx.db.persona import get_minimal_persona_snapshots_paginated\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.db.persona import get_persona_count_for_user\nfrom onyx.db.persona import get_persona_snapshots_for_user\nfrom onyx.db.persona import get_persona_snapshots_paginated\nfrom onyx.db.persona import mark_persona_as_deleted\nfrom onyx.db.persona import mark_persona_as_not_deleted\nfrom onyx.db.persona import update_persona_featured\nfrom onyx.db.persona import update_persona_label\nfrom onyx.db.persona import update_persona_public_status\nfrom onyx.db.persona import update_persona_shared\nfrom onyx.db.persona import update_persona_visibility\nfrom onyx.db.persona import update_personas_display_priority\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.server.documents.models import PaginatedReturn\nfrom onyx.server.features.persona.constants import ADMIN_AGENTS_RESOURCE\nfrom onyx.server.features.persona.constants import AGENTS_RESOURCE\nfrom onyx.server.features.persona.models import FullPersonaSnapshot\nfrom onyx.server.features.persona.models import MinimalPersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaLabelCreate\nfrom onyx.server.features.persona.models import PersonaLabelResponse\nfrom onyx.server.features.persona.models import PersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom onyx.server.manage.llm.api import get_valid_model_names_for_persona\nfrom onyx.server.models import DisplayPriorityRequest\nfrom onyx.server.settings.store import load_settings\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\ndef _validate_user_knowledge_enabled(\n    persona_upsert_request: PersonaUpsertRequest, action: str\n) -> None:\n    \"\"\"Check if user knowledge is enabled when user files/projects are provided.\"\"\"\n    settings = load_settings()\n    if not settings.user_knowledge_enabled:\n        # Only user files are supported going forward; keep getattr for backward compat\n        if persona_upsert_request.user_file_ids or getattr(\n            persona_upsert_request, \"user_project_ids\", None\n        ):\n            raise HTTPException(\n                status_code=400,\n                detail=f\"User Knowledge is disabled. Cannot {action} assistant with user files or projects.\",\n            )\n\n\ndef _validate_vector_db_knowledge(\n    persona_upsert_request: PersonaUpsertRequest,\n) -> None:\n    \"\"\"Reject connector-sourced knowledge types when vector DB is disabled.\n\n    document_sets, hierarchy_nodes, and attached_documents all depend on\n    the vector DB for search filtering. user_files are still allowed because\n    they use the FileReaderTool path instead.\n    \"\"\"\n    if not DISABLE_VECTOR_DB:\n        return\n\n    if persona_upsert_request.document_set_ids:\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"Cannot attach document sets to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set).\"\n            ),\n        )\n    if persona_upsert_request.hierarchy_node_ids:\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"Cannot attach hierarchy nodes to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set).\"\n            ),\n        )\n    if persona_upsert_request.document_ids:\n        raise HTTPException(\n            status_code=400,\n            detail=(\n                \"Cannot attach documents to an assistant when the vector database is disabled (DISABLE_VECTOR_DB is set).\"\n            ),\n        )\n\n\nadmin_router = APIRouter(prefix=\"/admin/persona\")\nbasic_router = APIRouter(prefix=\"/persona\")\n\n# NOTE: Users know this functionality as \"agents\", so we want to start moving\n# nomenclature of these REST resources to match that.\nadmin_agents_router = APIRouter(prefix=ADMIN_AGENTS_RESOURCE)\nagents_router = APIRouter(prefix=AGENTS_RESOURCE)\n\n\nclass IsListedRequest(BaseModel):\n    is_listed: bool\n\n\nclass IsPublicRequest(BaseModel):\n    is_public: bool\n\n\nclass IsFeaturedRequest(BaseModel):\n    is_featured: bool\n\n\n@admin_router.patch(\"/{persona_id}/listed\")\ndef patch_persona_visibility(\n    persona_id: int,\n    is_listed_request: IsListedRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_persona_visibility(\n        persona_id=persona_id,\n        is_listed=is_listed_request.is_listed,\n        db_session=db_session,\n        user=user,\n    )\n\n\n@basic_router.patch(\"/{persona_id}/public\")\ndef patch_user_persona_public_status(\n    persona_id: int,\n    is_public_request: IsPublicRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        update_persona_public_status(\n            persona_id=persona_id,\n            is_public=is_public_request.is_public,\n            db_session=db_session,\n            user=user,\n        )\n    except ValueError as e:\n        logger.exception(\"Failed to update persona public status\")\n        raise HTTPException(status_code=403, detail=str(e))\n\n\n@admin_router.patch(\"/{persona_id}/featured\")\ndef patch_persona_featured_status(\n    persona_id: int,\n    is_featured_request: IsFeaturedRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        update_persona_featured(\n            persona_id=persona_id,\n            is_featured=is_featured_request.is_featured,\n            db_session=db_session,\n            user=user,\n        )\n    except ValueError as e:\n        logger.exception(\"Failed to update persona featured status\")\n        raise HTTPException(status_code=403, detail=str(e))\n\n\n@admin_agents_router.patch(\"/display-priorities\")\ndef patch_agents_display_priorities(\n    display_priority_request: DisplayPriorityRequest,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        update_personas_display_priority(\n            display_priority_map=display_priority_request.display_priority_map,\n            db_session=db_session,\n            user=user,\n            commit_db_txn=True,\n        )\n    except ValueError as e:\n        logger.exception(\"Failed to update agent display priorities.\")\n        raise HTTPException(status_code=403, detail=str(e))\n\n\n@admin_router.get(\"\", tags=PUBLIC_API_TAGS)\ndef list_personas_admin(\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    include_deleted: bool = False,\n    get_editable: bool = Query(False, description=\"If true, return editable personas\"),\n) -> list[PersonaSnapshot]:\n    return get_persona_snapshots_for_user(\n        user=user,\n        db_session=db_session,\n        get_editable=get_editable,\n        include_deleted=include_deleted,\n    )\n\n\n@admin_agents_router.get(\"\", tags=PUBLIC_API_TAGS)\ndef get_agents_admin_paginated(\n    page_num: int = Query(0, ge=0, description=\"Page number (0-indexed).\"),\n    page_size: int = Query(10, ge=1, le=1000, description=\"Items per page.\"),\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n    include_deleted: bool = Query(\n        False, description=\"If true, includes deleted personas.\"\n    ),\n    get_editable: bool = Query(\n        False, description=\"If true, only returns editable personas.\"\n    ),\n    include_default: bool = Query(\n        True, description=\"If true, includes builtin/default personas.\"\n    ),\n) -> PaginatedReturn[PersonaSnapshot]:\n    \"\"\"Paginated endpoint for listing agents (formerly personas) (admin view).\n\n    Returns items for the requested page plus total count.\n    Agents are ordered by display_priority (ASC, nulls last) then by ID (ASC).\n    \"\"\"\n    agents = get_persona_snapshots_paginated(\n        user=user,\n        db_session=db_session,\n        page_num=page_num,\n        page_size=page_size,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_deleted=include_deleted,\n    )\n\n    total_count = get_persona_count_for_user(\n        user=user,\n        db_session=db_session,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_deleted=include_deleted,\n    )\n\n    return PaginatedReturn(\n        items=agents,\n        total_items=total_count,\n    )\n\n\n@admin_router.patch(\"/{persona_id}/undelete\", tags=PUBLIC_API_TAGS)\ndef undelete_persona(\n    persona_id: int,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    mark_persona_as_not_deleted(\n        persona_id=persona_id,\n        user=user,\n        db_session=db_session,\n    )\n\n\n# used for assistant profile pictures\n@admin_router.post(\"/upload-image\")\ndef upload_file(\n    file: UploadFile,\n    _: User = Depends(current_user),\n) -> dict[str, str]:\n    file_store = get_default_file_store()\n    file_type = ChatFileType.IMAGE\n    file_id = file_store.save_file(\n        content=file.file,\n        display_name=file.filename,\n        file_origin=FileOrigin.CHAT_UPLOAD,\n        file_type=file.content_type or file_type.value,\n    )\n    return {\"file_id\": file_id}\n\n\n\"\"\"Endpoints for all\"\"\"\n\n\n@basic_router.post(\"\", tags=PUBLIC_API_TAGS)\ndef create_persona(\n    persona_upsert_request: PersonaUpsertRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> PersonaSnapshot:\n    tenant_id = get_current_tenant_id()\n\n    _validate_user_knowledge_enabled(persona_upsert_request, \"create\")\n    _validate_vector_db_knowledge(persona_upsert_request)\n\n    persona_snapshot = create_update_persona(\n        persona_id=None,\n        create_persona_request=persona_upsert_request,\n        user=user,\n        db_session=db_session,\n    )\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=str(user.id),\n        event=MilestoneRecordType.CREATED_ASSISTANT,\n    )\n\n    return persona_snapshot\n\n\n# NOTE: This endpoint cannot update persona configuration options that\n# are core to the persona, such as its display priority and\n# whether or not the assistant is a built-in / default assistant\n@basic_router.patch(\"/{persona_id}\", tags=PUBLIC_API_TAGS)\ndef update_persona(\n    persona_id: int,\n    persona_upsert_request: PersonaUpsertRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> PersonaSnapshot:\n    _validate_user_knowledge_enabled(persona_upsert_request, \"update\")\n    _validate_vector_db_knowledge(persona_upsert_request)\n\n    persona_snapshot = create_update_persona(\n        persona_id=persona_id,\n        create_persona_request=persona_upsert_request,\n        user=user,\n        db_session=db_session,\n    )\n    return persona_snapshot\n\n\nclass PersonaLabelPatchRequest(BaseModel):\n    label_name: str\n\n\n@basic_router.get(\"/labels\")\ndef get_labels(\n    db: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> list[PersonaLabelResponse]:\n    return [\n        PersonaLabelResponse.from_model(label)\n        for label in get_assistant_labels(db_session=db)\n    ]\n\n\n@basic_router.post(\"/labels\")\ndef create_label(\n    label: PersonaLabelCreate,\n    db: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> PersonaLabelResponse:\n    \"\"\"Create a new assistant label\"\"\"\n    try:\n        label_model = create_assistant_label(name=label.name, db_session=db)\n        return PersonaLabelResponse.from_model(label_model)\n    except IntegrityError:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Label with name '{label.name}' already exists. Please choose a different name.\",\n        )\n\n\n@admin_router.patch(\"/label/{label_id}\")\ndef patch_persona_label(\n    label_id: int,\n    persona_label_patch_request: PersonaLabelPatchRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_persona_label(\n        label_id=label_id,\n        label_name=persona_label_patch_request.label_name,\n        db_session=db_session,\n    )\n\n\n@admin_router.delete(\"/label/{label_id}\")\ndef delete_label(\n    label_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    delete_persona_label(label_id=label_id, db_session=db_session)\n\n\nclass PersonaShareRequest(BaseModel):\n    user_ids: list[UUID] | None = None\n    group_ids: list[int] | None = None\n    is_public: bool | None = None\n    label_ids: list[int] | None = None\n\n\n# We notify each user when a user is shared with them\n@basic_router.patch(\"/{persona_id}/share\")\ndef share_persona(\n    persona_id: int,\n    persona_share_request: PersonaShareRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        update_persona_shared(\n            persona_id=persona_id,\n            user=user,\n            db_session=db_session,\n            user_ids=persona_share_request.user_ids,\n            group_ids=persona_share_request.group_ids,\n            is_public=persona_share_request.is_public,\n            label_ids=persona_share_request.label_ids,\n        )\n    except PermissionError as e:\n        logger.exception(\"Failed to share persona\")\n        raise HTTPException(status_code=403, detail=str(e))\n    except ValueError as e:\n        logger.exception(\"Failed to share persona\")\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@basic_router.delete(\"/{persona_id}\", tags=PUBLIC_API_TAGS)\ndef delete_persona(\n    persona_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    mark_persona_as_deleted(\n        persona_id=persona_id,\n        user=user,\n        db_session=db_session,\n    )\n\n\n@basic_router.get(\"\")\ndef list_personas(\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n    include_deleted: bool = False,\n    persona_ids: list[int] = Query(None),\n) -> list[MinimalPersonaSnapshot]:\n    personas = get_minimal_persona_snapshots_for_user(\n        user=user,\n        include_deleted=include_deleted,\n        db_session=db_session,\n        get_editable=False,\n    )\n\n    if persona_ids:\n        personas = [p for p in personas if p.id in persona_ids]\n\n    return personas\n\n\n@agents_router.get(\"\", tags=PUBLIC_API_TAGS)\ndef get_agents_paginated(\n    page_num: int = Query(0, ge=0, description=\"Page number (0-indexed).\"),\n    page_size: int = Query(10, ge=1, le=1000, description=\"Items per page.\"),\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n    include_deleted: bool = Query(\n        False, description=\"If true, includes deleted personas.\"\n    ),\n    get_editable: bool = Query(\n        False, description=\"If true, only returns editable personas.\"\n    ),\n    include_default: bool = Query(\n        True, description=\"If true, includes builtin/default personas.\"\n    ),\n) -> PaginatedReturn[MinimalPersonaSnapshot]:\n    \"\"\"Paginated endpoint for listing agents available to the user.\n\n    Returns items for the requested page plus total count.\n    Personas are ordered by display_priority (ASC, nulls last) then by ID (ASC).\n\n    NOTE: persona_ids filter is not supported with pagination. Use the\n    non-paginated endpoint if filtering by specific IDs is needed.\n    \"\"\"\n    agents = get_minimal_persona_snapshots_paginated(\n        user=user,\n        db_session=db_session,\n        page_num=page_num,\n        page_size=page_size,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_deleted=include_deleted,\n    )\n\n    total_count = get_persona_count_for_user(\n        user=user,\n        db_session=db_session,\n        get_editable=get_editable,\n        include_default=include_default,\n        include_deleted=include_deleted,\n    )\n\n    return PaginatedReturn(\n        items=agents,\n        total_items=total_count,\n    )\n\n\n@basic_router.get(\"/{persona_id}\", tags=PUBLIC_API_TAGS)\ndef get_persona(\n    persona_id: int,\n    user: User = Depends(current_limited_user),\n    db_session: Session = Depends(get_session),\n) -> FullPersonaSnapshot:\n    persona = get_persona_by_id(\n        persona_id=persona_id,\n        user=user,\n        db_session=db_session,\n        is_for_edit=False,\n    )\n\n    # Validate and fix default model if it's no longer valid for this persona's restrictions\n    if persona.llm_model_version_override:\n        valid_models = get_valid_model_names_for_persona(persona_id, user, db_session)\n\n        # If current default model is not in the valid list, update to first valid or None\n        if persona.llm_model_version_override not in valid_models:\n            persona.llm_model_version_override = (\n                valid_models[0] if valid_models else None\n            )\n            db_session.commit()\n\n    return FullPersonaSnapshot.from_model(persona)\n"
  },
  {
    "path": "backend/onyx/server/features/persona/constants.py",
    "content": "# NOTE: Users know this functionality as \"agents\", so we want to start moving\n# nomenclature of these REST resources to match that.\nADMIN_AGENTS_RESOURCE = \"/admin/agents\"\nAGENTS_RESOURCE = \"/agents\"\n"
  },
  {
    "path": "backend/onyx/server/features/persona/models.py",
    "content": "from datetime import datetime\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.models import Document\nfrom onyx.db.models import HierarchyNode\nfrom onyx.db.models import Persona\nfrom onyx.db.models import PersonaLabel\nfrom onyx.db.models import StarterMessage\nfrom onyx.server.features.document_set.models import DocumentSetSummary\nfrom onyx.server.features.tool.models import ToolSnapshot\nfrom onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe\nfrom onyx.server.models import MinimalUserSnapshot\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\nclass HierarchyNodeSnapshot(BaseModel):\n    \"\"\"Minimal representation of a hierarchy node for persona responses.\"\"\"\n\n    id: int\n    raw_node_id: str\n    display_name: str\n    link: str | None\n    source: DocumentSource\n    node_type: HierarchyNodeType\n\n    @classmethod\n    def from_model(cls, node: HierarchyNode) -> \"HierarchyNodeSnapshot\":\n        return HierarchyNodeSnapshot(\n            id=node.id,\n            raw_node_id=node.raw_node_id,\n            display_name=node.display_name,\n            link=node.link,\n            source=node.source,\n            node_type=node.node_type,\n        )\n\n\nclass AttachedDocumentSnapshot(BaseModel):\n    \"\"\"Minimal representation of an attached document for persona responses.\"\"\"\n\n    id: str\n    title: str\n    link: str | None\n    parent_id: int | None\n    last_modified: datetime | None\n    last_synced: datetime | None\n    source: DocumentSource | None\n\n    @classmethod\n    def from_model(cls, doc: Document) -> \"AttachedDocumentSnapshot\":\n        return AttachedDocumentSnapshot(\n            id=doc.id,\n            title=doc.semantic_id,\n            link=doc.link,\n            parent_id=doc.parent_hierarchy_node_id,\n            last_modified=doc.doc_updated_at,\n            last_synced=doc.last_synced,\n            source=(\n                doc.parent_hierarchy_node.source if doc.parent_hierarchy_node else None\n            ),  # TODO(evan) we really should just store this in the document table directly\n        )\n\n\nclass PromptSnapshot(BaseModel):\n    id: int\n    name: str\n    description: str\n    system_prompt: str\n    task_prompt: str\n    datetime_aware: bool\n    # Not including persona info, not needed\n\n    @classmethod\n    def from_model(cls, persona: Persona) -> \"PromptSnapshot\":\n        \"\"\"Create PromptSnapshot from persona's embedded prompt fields\"\"\"\n        if persona.deleted:\n            raise ValueError(\"Persona has been deleted\")\n\n        return PromptSnapshot(\n            id=persona.id,\n            name=persona.name,\n            description=persona.description,\n            system_prompt=persona.system_prompt or \"\",\n            task_prompt=persona.task_prompt or \"\",\n            datetime_aware=persona.datetime_aware,\n        )\n\n\n# More minimal request for generating a persona prompt\nclass GenerateStarterMessageRequest(BaseModel):\n    name: str\n    description: str\n    instructions: str\n    document_set_ids: list[int]\n    generation_count: int\n\n\nclass PersonaUpsertRequest(BaseModel):\n    name: str\n    description: str\n    document_set_ids: list[int]\n    is_public: bool\n    llm_model_provider_override: str | None = None\n    llm_model_version_override: str | None = None\n    starter_messages: list[StarterMessage] | None = None\n    # For Private Personas, who should be able to access these\n    users: list[UUID] = Field(default_factory=list)\n    groups: list[int] = Field(default_factory=list)\n    # e.g. ID of SearchTool or ImageGenerationTool or <USER_DEFINED_TOOL>\n    tool_ids: list[int]\n    remove_image: bool | None = None\n    uploaded_image_id: str | None = None  # New field for uploaded image\n    icon_name: str | None = (\n        None  # New field that is custom chosen during agent creation/editing\n    )\n    search_start_date: datetime | None = None\n    label_ids: list[int] | None = None\n    is_featured: bool = False\n    display_priority: int | None = None\n    # Accept string UUIDs from frontend\n    user_file_ids: list[str] | None = None\n    # Hierarchy nodes (folders, spaces, channels) attached for scoped search\n    hierarchy_node_ids: list[int] = Field(default_factory=list)\n    # Individual documents attached for scoped search\n    document_ids: list[str] = Field(default_factory=list)\n\n    # prompt fields\n    system_prompt: str\n    replace_base_system_prompt: bool = False\n    task_prompt: str\n    datetime_aware: bool\n\n\nclass MinimalPersonaSnapshot(BaseModel):\n    \"\"\"Minimal persona model optimized for ChatPage.tsx - only includes fields actually used\"\"\"\n\n    # Core fields used by ChatPage\n    id: int\n    name: str\n    description: str\n    # Used for retrieval capability checking\n    tools: list[ToolSnapshot]\n    starter_messages: list[StarterMessage] | None\n\n    # only show document sets in the UI that the assistant has access to\n    document_sets: list[DocumentSetSummary]\n    # Counts for knowledge sources (used to determine if search tool should be enabled)\n    hierarchy_node_count: int\n    attached_document_count: int\n    # Unique sources from all knowledge (document sets + hierarchy nodes)\n    # Used to populate source filters in chat\n    knowledge_sources: list[DocumentSource]\n    llm_model_version_override: str | None\n    llm_model_provider_override: str | None\n\n    uploaded_image_id: str | None\n    icon_name: str | None\n\n    is_public: bool\n    is_listed: bool\n    display_priority: int | None\n    is_featured: bool\n    builtin_persona: bool\n\n    # Used for filtering\n    labels: list[\"PersonaLabelSnapshot\"]\n\n    # Used to display ownership\n    owner: MinimalUserSnapshot | None\n\n    @classmethod\n    def from_model(cls, persona: Persona) -> \"MinimalPersonaSnapshot\":\n        # Collect unique sources from document sets, hierarchy nodes, and attached documents\n        sources: set[DocumentSource] = set()\n\n        # Sources from document sets\n        for doc_set in persona.document_sets:\n            for cc_pair in doc_set.connector_credential_pairs:\n                sources.add(cc_pair.connector.source)\n\n        # Sources from hierarchy nodes\n        for node in persona.hierarchy_nodes:\n            sources.add(node.source)\n\n        # Sources from attached documents (via their parent hierarchy node)\n        for doc in persona.attached_documents:\n            if doc.parent_hierarchy_node:\n                sources.add(doc.parent_hierarchy_node.source)\n\n        return MinimalPersonaSnapshot(\n            # Core fields actually used by ChatPage\n            id=persona.id,\n            name=persona.name,\n            description=persona.description,\n            tools=[\n                ToolSnapshot.from_model(tool)\n                for tool in persona.tools\n                if should_expose_tool_to_fe(tool)\n            ],\n            starter_messages=persona.starter_messages,\n            document_sets=[\n                DocumentSetSummary.from_model(document_set)\n                for document_set in persona.document_sets\n            ],\n            hierarchy_node_count=len(persona.hierarchy_nodes),\n            attached_document_count=len(persona.attached_documents),\n            knowledge_sources=list(sources),\n            llm_model_version_override=persona.llm_model_version_override,\n            llm_model_provider_override=persona.llm_model_provider_override,\n            uploaded_image_id=persona.uploaded_image_id,\n            icon_name=persona.icon_name,\n            is_public=persona.is_public,\n            is_listed=persona.is_listed,\n            display_priority=persona.display_priority,\n            is_featured=persona.is_featured,\n            builtin_persona=persona.builtin_persona,\n            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],\n            owner=(\n                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)\n                if persona.user\n                else None\n            ),\n        )\n\n\nclass PersonaSnapshot(BaseModel):\n    id: int\n    name: str\n    description: str\n    is_public: bool\n    is_listed: bool\n    uploaded_image_id: str | None\n    icon_name: str | None\n    # Return string UUIDs to frontend for consistency\n    user_file_ids: list[str]\n    display_priority: int | None\n    is_featured: bool\n    builtin_persona: bool\n    starter_messages: list[StarterMessage] | None\n    tools: list[ToolSnapshot]\n    labels: list[\"PersonaLabelSnapshot\"]\n    owner: MinimalUserSnapshot | None\n    users: list[MinimalUserSnapshot]\n    groups: list[int]\n    document_sets: list[DocumentSetSummary]\n    llm_model_provider_override: str | None\n    llm_model_version_override: str | None\n    # Hierarchy nodes attached for scoped search\n    hierarchy_nodes: list[HierarchyNodeSnapshot] = Field(default_factory=list)\n    # Individual documents attached for scoped search\n    attached_documents: list[AttachedDocumentSnapshot] = Field(default_factory=list)\n\n    # Embedded prompt fields (no longer separate prompt_ids)\n    system_prompt: str | None = None\n    replace_base_system_prompt: bool = False\n    task_prompt: str | None = None\n    datetime_aware: bool = True\n\n    @classmethod\n    def from_model(cls, persona: Persona) -> \"PersonaSnapshot\":\n        return PersonaSnapshot(\n            id=persona.id,\n            name=persona.name,\n            description=persona.description,\n            is_public=persona.is_public,\n            is_listed=persona.is_listed,\n            uploaded_image_id=persona.uploaded_image_id,\n            icon_name=persona.icon_name,\n            user_file_ids=[str(file.id) for file in persona.user_files],\n            display_priority=persona.display_priority,\n            is_featured=persona.is_featured,\n            builtin_persona=persona.builtin_persona,\n            starter_messages=persona.starter_messages,\n            tools=[\n                ToolSnapshot.from_model(tool)\n                for tool in persona.tools\n                if should_expose_tool_to_fe(tool)\n            ],\n            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],\n            hierarchy_nodes=[\n                HierarchyNodeSnapshot.from_model(node)\n                for node in persona.hierarchy_nodes\n            ],\n            attached_documents=[\n                AttachedDocumentSnapshot.from_model(doc)\n                for doc in persona.attached_documents\n            ],\n            owner=(\n                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)\n                if persona.user\n                else None\n            ),\n            users=[\n                MinimalUserSnapshot(id=user.id, email=user.email)\n                for user in persona.users\n            ],\n            groups=[user_group.id for user_group in persona.groups],\n            document_sets=[\n                DocumentSetSummary.from_model(document_set_model)\n                for document_set_model in persona.document_sets\n            ],\n            llm_model_provider_override=persona.llm_model_provider_override,\n            llm_model_version_override=persona.llm_model_version_override,\n            system_prompt=persona.system_prompt,\n            replace_base_system_prompt=persona.replace_base_system_prompt,\n            task_prompt=persona.task_prompt,\n            datetime_aware=persona.datetime_aware,\n        )\n\n\n# Model with full context on persona's internal settings\n# This is used for flows which need to know all settings\nclass FullPersonaSnapshot(PersonaSnapshot):\n    search_start_date: datetime | None = None\n\n    @classmethod\n    def from_model(\n        cls, persona: Persona, allow_deleted: bool = False\n    ) -> \"FullPersonaSnapshot\":\n        if persona.deleted:\n            error_msg = f\"Persona with ID {persona.id} has been deleted\"\n            if not allow_deleted:\n                raise ValueError(error_msg)\n            else:\n                logger.warning(error_msg)\n\n        return FullPersonaSnapshot(\n            id=persona.id,\n            name=persona.name,\n            description=persona.description,\n            is_public=persona.is_public,\n            is_listed=persona.is_listed,\n            uploaded_image_id=persona.uploaded_image_id,\n            icon_name=persona.icon_name,\n            user_file_ids=[str(file.id) for file in persona.user_files],\n            display_priority=persona.display_priority,\n            is_featured=persona.is_featured,\n            builtin_persona=persona.builtin_persona,\n            starter_messages=persona.starter_messages,\n            users=[\n                MinimalUserSnapshot(id=user.id, email=user.email)\n                for user in persona.users\n            ],\n            groups=[user_group.id for user_group in persona.groups],\n            tools=[\n                ToolSnapshot.from_model(tool)\n                for tool in persona.tools\n                if should_expose_tool_to_fe(tool)\n            ],\n            labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],\n            hierarchy_nodes=[\n                HierarchyNodeSnapshot.from_model(node)\n                for node in persona.hierarchy_nodes\n            ],\n            attached_documents=[\n                AttachedDocumentSnapshot.from_model(doc)\n                for doc in persona.attached_documents\n            ],\n            owner=(\n                MinimalUserSnapshot(id=persona.user.id, email=persona.user.email)\n                if persona.user\n                else None\n            ),\n            document_sets=[\n                DocumentSetSummary.from_model(document_set_model)\n                for document_set_model in persona.document_sets\n            ],\n            search_start_date=persona.search_start_date,\n            llm_model_provider_override=persona.llm_model_provider_override,\n            llm_model_version_override=persona.llm_model_version_override,\n            system_prompt=persona.system_prompt,\n            replace_base_system_prompt=persona.replace_base_system_prompt,\n            task_prompt=persona.task_prompt,\n            datetime_aware=persona.datetime_aware,\n        )\n\n\nclass PromptTemplateResponse(BaseModel):\n    final_prompt_template: str\n\n\nclass PersonaSharedNotificationData(BaseModel):\n    persona_id: int\n\n\nclass ImageGenerationToolStatus(BaseModel):\n    is_available: bool\n\n\nclass PersonaLabelCreate(BaseModel):\n    name: str\n\n\nclass PersonaLabelResponse(BaseModel):\n    id: int\n    name: str\n\n    @classmethod\n    def from_model(cls, category: PersonaLabel) -> \"PersonaLabelResponse\":\n        return PersonaLabelResponse(\n            id=category.id,\n            name=category.name,\n        )\n\n\nclass PersonaLabelSnapshot(BaseModel):\n    id: int\n    name: str\n\n    @classmethod\n    def from_model(cls, label: PersonaLabel) -> \"PersonaLabelSnapshot\":\n        return PersonaLabelSnapshot(\n            id=label.id,\n            name=label.name,\n        )\n"
  },
  {
    "path": "backend/onyx/server/features/projects/api.py",
    "content": "import json\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import BackgroundTasks\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import Form\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom fastapi import UploadFile\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import Project__UserFile\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.models import UserProject\nfrom onyx.db.persona import get_personas_by_ids\nfrom onyx.db.projects import get_project_token_count\nfrom onyx.db.projects import upload_files_to_user_files_with_indexing\nfrom onyx.server.features.projects.models import CategorizedFilesSnapshot\nfrom onyx.server.features.projects.models import ChatSessionRequest\nfrom onyx.server.features.projects.models import TokenCountResponse\nfrom onyx.server.features.projects.models import UserFileSnapshot\nfrom onyx.server.features.projects.models import UserProjectSnapshot\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\nrouter = APIRouter(prefix=\"/user/projects\")\n\n\nclass UserFileDeleteResult(BaseModel):\n    has_associations: bool\n    project_names: list[str] = []\n    assistant_names: list[str] = []\n\n\ndef _trigger_user_file_project_sync(\n    user_file_id: UUID,\n    tenant_id: str,\n    background_tasks: BackgroundTasks | None = None,\n) -> None:\n    if DISABLE_VECTOR_DB and background_tasks is not None:\n        from onyx.background.task_utils import drain_project_sync_loop\n\n        background_tasks.add_task(drain_project_sync_loop, tenant_id)\n        logger.info(f\"Queued in-process project sync for user_file_id={user_file_id}\")\n        return\n\n    from onyx.background.celery.tasks.user_file_processing.tasks import (\n        enqueue_user_file_project_sync_task,\n    )\n    from onyx.background.celery.tasks.user_file_processing.tasks import (\n        get_user_file_project_sync_queue_depth,\n    )\n    from onyx.background.celery.versioned_apps.client import app as client_app\n    from onyx.redis.redis_pool import get_redis_client\n\n    queue_depth = get_user_file_project_sync_queue_depth(client_app)\n    if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:\n        logger.warning(\n            f\"Skipping immediate project sync for user_file_id={user_file_id} due to \"\n            f\"queue depth {queue_depth}>{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}. \"\n            \"It will be picked up by beat later.\"\n        )\n        return\n\n    redis_client = get_redis_client(tenant_id=tenant_id)\n    enqueued = enqueue_user_file_project_sync_task(\n        celery_app=client_app,\n        redis_client=redis_client,\n        user_file_id=user_file_id,\n        tenant_id=tenant_id,\n        priority=OnyxCeleryPriority.HIGHEST,\n    )\n    if not enqueued:\n        logger.info(\n            f\"Skipped duplicate project sync enqueue for user_file_id={user_file_id}\"\n        )\n        return\n\n    logger.info(f\"Triggered project sync for user_file_id={user_file_id}\")\n\n\n@router.get(\"\", tags=PUBLIC_API_TAGS)\ndef get_projects(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserProjectSnapshot]:\n    user_id = user.id\n    projects = (\n        db_session.query(UserProject).filter(UserProject.user_id == user_id).all()\n    )\n    return [UserProjectSnapshot.from_model(project) for project in projects]\n\n\n@router.post(\"/create\", tags=PUBLIC_API_TAGS)\ndef create_project(\n    name: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserProjectSnapshot:\n    if name == \"\":\n        raise HTTPException(status_code=400, detail=\"Project name cannot be empty\")\n    user_id = user.id\n    project = UserProject(name=name, user_id=user_id)\n    db_session.add(project)\n    db_session.commit()\n    return UserProjectSnapshot.from_model(project)\n\n\n@router.post(\"/file/upload\", tags=PUBLIC_API_TAGS)\ndef upload_user_files(\n    bg_tasks: BackgroundTasks,\n    files: list[UploadFile] = File(...),\n    project_id: int | None = Form(None),\n    temp_id_map: str | None = Form(None),  # JSON string mapping hashed key -> temp_id\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CategorizedFilesSnapshot:\n    try:\n        parsed_temp_id_map: dict[str, str] | None = None\n        if temp_id_map:\n            try:\n                parsed = json.loads(temp_id_map)\n                if isinstance(parsed, dict):\n                    # Ensure all keys/values are strings\n                    parsed_temp_id_map = {str(k): str(v) for k, v in parsed.items()}\n                else:\n                    parsed_temp_id_map = None\n            except json.JSONDecodeError:\n                parsed_temp_id_map = None\n\n        # Use our consolidated function that handles indexing properly\n        categorized_files_result = upload_files_to_user_files_with_indexing(\n            files=files,\n            project_id=project_id,\n            user=user,\n            temp_id_map=parsed_temp_id_map,\n            db_session=db_session,\n            background_tasks=bg_tasks if DISABLE_VECTOR_DB else None,\n        )\n\n        return CategorizedFilesSnapshot.from_result(categorized_files_result)\n\n    except Exception as e:\n        logger.exception(f\"Error uploading files - {type(e).__name__}: {str(e)}\")\n        raise HTTPException(\n            status_code=500,\n            detail=\"Failed to upload files. Please try again or contact support if the issue persists.\",\n        )\n\n\n@router.get(\"/{project_id}\", tags=PUBLIC_API_TAGS)\ndef get_project(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserProjectSnapshot:\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n    return UserProjectSnapshot.from_model(project)\n\n\n@router.get(\"/files/{project_id}\", tags=PUBLIC_API_TAGS)\ndef get_files_in_project(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserFileSnapshot]:\n    user_id = user.id\n    user_files = (\n        db_session.query(UserFile)\n        .join(Project__UserFile, UserFile.id == Project__UserFile.user_file_id)\n        .filter(\n            Project__UserFile.project_id == project_id,\n            UserFile.user_id == user_id,\n            UserFile.status != UserFileStatus.FAILED,\n        )\n        .order_by(Project__UserFile.created_at.desc())\n        .all()\n    )\n    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]\n\n\n@router.delete(\"/{project_id}/files/{file_id}\", tags=PUBLIC_API_TAGS)\ndef unlink_user_file_from_project(\n    project_id: int,\n    file_id: UUID,\n    bg_tasks: BackgroundTasks,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Unlink an existing user file from a specific project for the current user.\n\n    Does not delete the underlying file; only removes the association.\n    \"\"\"\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    user_file = (\n        db_session.query(UserFile)\n        .filter(UserFile.id == file_id, UserFile.user_id == user_id)\n        .one_or_none()\n    )\n    if user_file is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    # Remove the association if it exists\n    if user_file in project.user_files:\n        project.user_files.remove(user_file)\n        user_file.needs_project_sync = True\n        db_session.commit()\n\n    tenant_id = get_current_tenant_id()\n    _trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)\n\n    return Response(status_code=204)\n\n\n@router.post(\n    \"/{project_id}/files/{file_id}\",\n    response_model=UserFileSnapshot,\n    tags=PUBLIC_API_TAGS,\n)\ndef link_user_file_to_project(\n    project_id: int,\n    file_id: UUID,\n    bg_tasks: BackgroundTasks,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserFileSnapshot:\n    \"\"\"Link an existing user file to a specific project for the current user.\n\n    Creates the association in the Project__UserFile join table if it does not exist.\n    Returns the linked user file snapshot.\n    \"\"\"\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    user_file = (\n        db_session.query(UserFile)\n        .filter(UserFile.id == file_id, UserFile.user_id == user_id)\n        .one_or_none()\n    )\n    if user_file is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    if user_file not in project.user_files:\n        user_file.needs_project_sync = True\n        project.user_files.append(user_file)\n        db_session.commit()\n\n    tenant_id = get_current_tenant_id()\n    _trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)\n\n    return UserFileSnapshot.from_model(user_file)\n\n\nclass ProjectInstructionsResponse(BaseModel):\n    instructions: str | None\n\n\n@router.get(\n    \"/{project_id}/instructions\",\n    response_model=ProjectInstructionsResponse,\n    tags=PUBLIC_API_TAGS,\n)\ndef get_project_instructions(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ProjectInstructionsResponse:\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    return ProjectInstructionsResponse(instructions=project.instructions)\n\n\nclass UpsertProjectInstructionsRequest(BaseModel):\n    instructions: str\n\n\n@router.post(\n    \"/{project_id}/instructions\",\n    response_model=ProjectInstructionsResponse,\n    tags=PUBLIC_API_TAGS,\n)\ndef upsert_project_instructions(\n    project_id: int,\n    body: UpsertProjectInstructionsRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ProjectInstructionsResponse:\n    \"\"\"Create or update this project's instructions stored on the project itself.\"\"\"\n    # Ensure the project exists and belongs to the user\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n    project.instructions = body.instructions\n\n    db_session.commit()\n    db_session.refresh(project)\n    return ProjectInstructionsResponse(instructions=project.instructions)\n\n\nclass ProjectPayload(BaseModel):\n    project: UserProjectSnapshot\n    files: list[UserFileSnapshot] | None = None\n    persona_id_to_is_featured: dict[int, bool] | None = None\n\n\n@router.get(\n    \"/{project_id}/details\", response_model=ProjectPayload, tags=PUBLIC_API_TAGS\n)\ndef get_project_details(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ProjectPayload:\n    project = get_project(project_id, user, db_session)\n    files = get_files_in_project(project_id, user, db_session)\n    persona_ids = [\n        session.persona_id\n        for session in project.chat_sessions\n        if session.persona_id is not None\n    ]\n    personas = get_personas_by_ids(persona_ids, db_session)\n    persona_id_to_is_featured = {\n        persona.id: persona.is_featured for persona in personas\n    }\n    return ProjectPayload(\n        project=project,\n        files=files,\n        persona_id_to_is_featured=persona_id_to_is_featured,\n    )\n\n\nclass UpdateProjectRequest(BaseModel):\n    name: str | None = None\n    description: str | None = None\n\n\n@router.patch(\"/{project_id}\", response_model=UserProjectSnapshot, tags=PUBLIC_API_TAGS)\ndef update_project(\n    project_id: int,\n    body: UpdateProjectRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserProjectSnapshot:\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    if body.name is not None:\n        project.name = body.name\n    if body.description is not None:\n        project.description = body.description\n\n    db_session.commit()\n    db_session.refresh(project)\n    return UserProjectSnapshot.from_model(project)\n\n\n@router.delete(\"/{project_id}\", tags=PUBLIC_API_TAGS)\ndef delete_project(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    # Unlink chat sessions from this project\n    for chat in project.chat_sessions:\n        chat.project_id = None\n\n    # Unlink many-to-many user files association (Project__UserFile)\n    for uf in list(project.user_files):\n        project.user_files.remove(uf)\n\n    db_session.delete(project)\n    db_session.commit()\n    return Response(status_code=204)\n\n\n@router.delete(\"/file/{file_id}\", tags=PUBLIC_API_TAGS)\ndef delete_user_file(\n    file_id: UUID,\n    bg_tasks: BackgroundTasks,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserFileDeleteResult:\n    \"\"\"Delete a user file belonging to the current user.\n\n    This will also remove any project associations for the file.\n    \"\"\"\n    user_id = user.id\n    user_file = (\n        db_session.query(UserFile)\n        .filter(UserFile.id == file_id, UserFile.user_id == user_id)\n        .one_or_none()\n    )\n    if user_file is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    # Check associations with projects and assistants (personas)\n    project_names = [project.name for project in user_file.projects]\n    assistant_names = [assistant.name for assistant in user_file.assistants]\n\n    if len(project_names) > 0 or len(assistant_names) > 0:\n        return UserFileDeleteResult(\n            has_associations=True,\n            project_names=project_names,\n            assistant_names=assistant_names,\n        )\n\n    # No associations found; mark as DELETING and enqueue delete task\n    user_file.status = UserFileStatus.DELETING\n    db_session.commit()\n\n    tenant_id = get_current_tenant_id()\n    if DISABLE_VECTOR_DB:\n        from onyx.background.task_utils import drain_delete_loop\n\n        bg_tasks.add_task(drain_delete_loop, tenant_id)\n        logger.info(f\"Queued in-process delete for user_file_id={user_file.id}\")\n    else:\n        from onyx.background.celery.versioned_apps.client import app as client_app\n\n        task = client_app.send_task(\n            OnyxCeleryTask.DELETE_SINGLE_USER_FILE,\n            kwargs={\"user_file_id\": str(user_file.id), \"tenant_id\": tenant_id},\n            queue=OnyxCeleryQueues.USER_FILE_DELETE,\n            priority=OnyxCeleryPriority.HIGH,\n        )\n        logger.info(\n            f\"Triggered delete for user_file_id={user_file.id} with task_id={task.id}\"\n        )\n\n    return UserFileDeleteResult(\n        has_associations=False, project_names=[], assistant_names=[]\n    )\n\n\n@router.get(\"/file/{file_id}\", response_model=UserFileSnapshot, tags=PUBLIC_API_TAGS)\ndef get_user_file(\n    file_id: UUID,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserFileSnapshot:\n    \"\"\"Fetch a single user file by ID for the current user.\n\n    Includes files in any status (including FAILED) to allow status polling.\n    \"\"\"\n    user_id = user.id\n    user_file = (\n        db_session.query(UserFile)\n        .filter(UserFile.id == file_id, UserFile.user_id == user_id)\n        .filter(UserFile.status != UserFileStatus.DELETING)\n        .one_or_none()\n    )\n    if user_file is None:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n    return UserFileSnapshot.from_model(user_file)\n\n\nclass UserFileIdsRequest(BaseModel):\n    file_ids: list[UUID]\n\n\n@router.post(\n    \"/file/statuses\", response_model=list[UserFileSnapshot], tags=PUBLIC_API_TAGS\n)\ndef get_user_file_statuses(\n    body: UserFileIdsRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserFileSnapshot]:\n    \"\"\"Fetch statuses for a set of user file IDs owned by the current user.\n\n    Includes files in any status so the client can detect transitions to FAILED.\n    \"\"\"\n    if not body.file_ids:\n        return []\n\n    user_id = user.id\n    user_files = (\n        db_session.query(UserFile)\n        .filter(UserFile.user_id == user_id)\n        .filter(UserFile.id.in_(body.file_ids))\n        .filter(UserFile.status != UserFileStatus.DELETING)\n        .all()\n    )\n\n    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]\n\n\n@router.post(\"/{project_id}/move_chat_session\")\ndef move_chat_session(\n    project_id: int,\n    body: ChatSessionRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    user_id = user.id\n    chat_session = (\n        db_session.query(ChatSession)\n        .filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user_id)\n        .one_or_none()\n    )\n    if chat_session is None:\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n    chat_session.project_id = project_id\n    db_session.commit()\n    return Response(status_code=204)\n\n\n@router.post(\"/remove_chat_session\")\ndef remove_chat_session(\n    body: ChatSessionRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    user_id = user.id\n    chat_session = (\n        db_session.query(ChatSession)\n        .filter(ChatSession.id == body.chat_session_id, ChatSession.user_id == user_id)\n        .one_or_none()\n    )\n    if chat_session is None:\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n    chat_session.project_id = None\n    db_session.commit()\n    return Response(status_code=204)\n\n\n@router.get(\"/session/{chat_session_id}/token-count\", response_model=TokenCountResponse)\ndef get_chat_session_project_token_count(\n    chat_session_id: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> TokenCountResponse:\n    \"\"\"Return sum of token_count for all user files in the project linked to the given chat session.\n\n    If the chat session has no project, returns 0.\n    \"\"\"\n    user_id = user.id\n    chat_session = (\n        db_session.query(ChatSession)\n        .filter(ChatSession.id == chat_session_id, ChatSession.user_id == user_id)\n        .one_or_none()\n    )\n    if chat_session is None:\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n\n    total_tokens = get_project_token_count(\n        project_id=chat_session.project_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n    return TokenCountResponse(total_tokens=total_tokens)\n\n\n@router.get(\"/session/{chat_session_id}/files\", tags=PUBLIC_API_TAGS)\ndef get_chat_session_project_files(\n    chat_session_id: str,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserFileSnapshot]:\n    \"\"\"Return user files for the project linked to the given chat session.\n\n    If the chat session has no project, returns an empty list.\n    Only returns files owned by the current user and not FAILED.\n    \"\"\"\n    user_id = user.id\n\n    chat_session = (\n        db_session.query(ChatSession)\n        .filter(ChatSession.id == chat_session_id, ChatSession.user_id == user_id)\n        .one_or_none()\n    )\n    if chat_session is None:\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n\n    if chat_session.project_id is None:\n        return []\n\n    user_files = (\n        db_session.query(UserFile)\n        .filter(\n            UserFile.projects.any(id=chat_session.project_id),\n            UserFile.user_id == user_id,\n            UserFile.status != UserFileStatus.FAILED,\n        )\n        .order_by(UserFile.created_at.desc())\n        .all()\n    )\n\n    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]\n\n\n@router.get(\"/{project_id}/token-count\", response_model=TokenCountResponse)\ndef get_project_total_token_count(\n    project_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> TokenCountResponse:\n    \"\"\"Return sum of token_count for all user files in the given project for the current user.\"\"\"\n\n    # Verify the project belongs to the current user\n    user_id = user.id\n    project = (\n        db_session.query(UserProject)\n        .filter(UserProject.id == project_id, UserProject.user_id == user_id)\n        .one_or_none()\n    )\n    if project is None:\n        raise HTTPException(status_code=404, detail=\"Project not found\")\n\n    total_tokens = get_project_token_count(\n        project_id=project_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n    return TokenCountResponse(total_tokens=total_tokens)\n"
  },
  {
    "path": "backend/onyx/server/features/projects/models.py",
    "content": "from datetime import datetime\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom onyx.db.models import UserProject\nfrom onyx.db.projects import CategorizedFilesResult\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type\nfrom onyx.server.query_and_chat.models import ChatSessionDetails\n\n\nclass UserFileSnapshot(BaseModel):\n    id: UUID\n    temp_id: str | None = None  # Client-side temporary ID for optimistic updates\n    name: str\n    project_id: int | None = None\n    user_id: UUID | None\n    file_id: str\n    created_at: datetime\n    status: UserFileStatus\n    last_accessed_at: datetime | None\n    file_type: str | None\n    chat_file_type: ChatFileType\n    token_count: int | None\n    chunk_count: int | None\n\n    @classmethod\n    def from_model(\n        cls, model: UserFile, temp_id_map: dict[str, str] = {}\n    ) -> \"UserFileSnapshot\":\n        return cls(\n            id=model.id,\n            temp_id=temp_id_map.get(str(model.id)),\n            name=model.name,\n            project_id=None,\n            user_id=model.user_id,\n            file_id=model.file_id,\n            created_at=model.created_at,\n            status=model.status,\n            last_accessed_at=model.last_accessed_at,\n            file_type=model.content_type,\n            chat_file_type=mime_type_to_chat_file_type(model.content_type),\n            token_count=model.token_count,\n            chunk_count=model.chunk_count,\n        )\n\n\nclass TokenCountResponse(BaseModel):\n    total_tokens: int\n\n\nclass RejectedFile(BaseModel):\n    file_name: str\n    reason: str\n\n\nclass CategorizedFilesSnapshot(BaseModel):\n    user_files: list[UserFileSnapshot]\n    rejected_files: list[RejectedFile]\n\n    @classmethod\n    def from_result(cls, result: CategorizedFilesResult) -> \"CategorizedFilesSnapshot\":\n        return cls(\n            user_files=[\n                UserFileSnapshot.from_model(user_file, temp_id_map=result.id_to_temp_id)\n                for user_file in result.user_files\n            ],\n            rejected_files=[\n                RejectedFile(\n                    file_name=rejected_file.filename,\n                    reason=rejected_file.reason,\n                )\n                for rejected_file in result.rejected_files\n            ],\n        )\n\n\nclass UserProjectSnapshot(BaseModel):\n    id: int\n    name: str\n    description: str | None\n    created_at: datetime\n    user_id: UUID | None\n    instructions: str | None = None\n    chat_sessions: list[ChatSessionDetails]\n\n    @classmethod\n    def from_model(cls, model: UserProject) -> \"UserProjectSnapshot\":\n        return cls(\n            id=model.id,\n            name=model.name,\n            description=model.description,\n            created_at=model.created_at,\n            user_id=model.user_id,\n            instructions=model.instructions,\n            chat_sessions=[\n                ChatSessionDetails.from_model(chat)\n                for chat in model.chat_sessions\n                if not chat.deleted\n            ],\n        )\n\n\nclass ChatSessionRequest(BaseModel):\n    chat_session_id: str\n"
  },
  {
    "path": "backend/onyx/server/features/projects/projects_file_utils.py",
    "content": "from math import ceil\n\nfrom fastapi import UploadFile\nfrom PIL import Image\nfrom PIL import ImageOps\nfrom PIL import UnidentifiedImageError\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\nfrom onyx.file_processing.password_validation import is_file_password_protected\nfrom onyx.natural_language_processing.utils import count_tokens\nfrom onyx.natural_language_processing.utils import get_tokenizer\nfrom onyx.server.settings.store import load_settings\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\nUNKNOWN_FILENAME = \"[unknown_file]\"  # More descriptive than empty string\n\n\ndef get_safe_filename(upload: UploadFile) -> str:\n    \"\"\"Get filename from upload, with fallback to UNKNOWN_FILENAME if None.\"\"\"\n    if not upload.filename:\n        logger.warning(\"Received upload with no filename\")\n        return UNKNOWN_FILENAME\n    return upload.filename\n\n\ndef get_upload_size_bytes(upload: UploadFile) -> int | None:\n    \"\"\"Best-effort file size in bytes without consuming the stream.\"\"\"\n    if upload.size is not None:\n        return upload.size\n\n    try:\n        current_pos = upload.file.tell()\n        upload.file.seek(0, 2)\n        size = upload.file.tell()\n        upload.file.seek(current_pos)\n        return size\n    except Exception as e:\n        logger.warning(\n            \"Could not determine upload size via stream seek \"\n            f\"(filename='{get_safe_filename(upload)}', \"\n            f\"error_type={type(e).__name__}, error={e})\"\n        )\n        return None\n\n\ndef is_upload_too_large(upload: UploadFile, max_bytes: int) -> bool:\n    \"\"\"Return True when upload size is known and exceeds max_bytes.\"\"\"\n    size_bytes = get_upload_size_bytes(upload)\n    if size_bytes is None:\n        logger.warning(\n            f\"Could not determine upload size; skipping size-limit check for '{get_safe_filename(upload)}'\"\n        )\n        return False\n    return size_bytes > max_bytes\n\n\n# Guard against extremely large images\nImage.MAX_IMAGE_PIXELS = 12000 * 12000\n\n\nclass RejectedFile(BaseModel):\n    filename: str = Field(default=\"\")\n    reason: str = Field(default=\"\")\n\n\nclass CategorizedFiles(BaseModel):\n    acceptable: list[UploadFile] = Field(default_factory=list)\n    rejected: list[RejectedFile] = Field(default_factory=list)\n    acceptable_file_to_token_count: dict[str, int] = Field(default_factory=dict)\n    # Filenames within `acceptable` that should be stored but not indexed.\n    skip_indexing: set[str] = Field(default_factory=set)\n\n    # Allow FastAPI UploadFile instances\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n\ndef _skip_token_threshold(extension: str) -> bool:\n    \"\"\"Return True if this file extension should bypass the token limit.\"\"\"\n    return extension.lower() in OnyxFileExtensions.TABULAR_EXTENSIONS\n\n\ndef _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:\n    if max(width, height) <= cap:\n        return width, height\n    scale = cap / max(width, height)\n    new_w = max(1, int(round(width * scale)))\n    new_h = max(1, int(round(height * scale)))\n    return new_w, new_h\n\n\ndef _estimate_image_tokens(\n    width: int, height: int, patch_size: int, overhead: int\n) -> int:\n    patches_w = ceil(width / patch_size)\n    patches_h = ceil(height / patch_size)\n    patches = patches_w * patches_h\n    return patches + overhead\n\n\ndef estimate_image_tokens_for_upload(\n    upload: UploadFile,\n    cap_long_side: int = 2048,\n    patch_size: int = 16,\n    overhead_tokens: int = 32,\n) -> int:\n    \"\"\"Open the uploaded image, normalize orientation, cap long side, and estimate tokens.\n\n    Parameters\n    - cap_long_side: Maximum pixels allowed on the image's longer side before estimating.\n      Rationale: Many vision-language encoders downsample images so the longer side is\n      bounded (commonly around 1024–2048px). Capping avoids unbounded patch counts and\n      keeps costs predictable while preserving most semantic content for typical UI/docs.\n      Default 2048 is a balanced choice between fidelity and token cost.\n\n    - patch_size: The pixel size of square patches used in a rough ViT-style estimate.\n      Rationale: Modern vision backbones (e.g., ViT variants) commonly operate on 14–16px\n      patches. Using 16 simplifies the estimate and aligns with widely used configurations.\n      Each patch approximately maps to one visual token in this heuristic.\n\n    - overhead_tokens: Fixed per-image overhead to account for special tokens, metadata,\n      and prompt framing added by providers. Rationale: Real models add tens of tokens per\n      image beyond pure patch count. 32 is a conservative, stable default that avoids\n      undercounting.\n\n    Notes\n    - This is a heuristic estimation for budgeting and gating. Actual tokenization varies\n      by model/provider and may differ slightly.\n\n    Always resets the file pointer before returning.\n    \"\"\"\n    try:\n        img = Image.open(upload.file)\n        img = ImageOps.exif_transpose(img)\n        width, height = img.size\n        capped_w, capped_h = _apply_long_side_cap(width, height, cap=cap_long_side)\n        return _estimate_image_tokens(\n            capped_w, capped_h, patch_size=patch_size, overhead=overhead_tokens\n        )\n    finally:\n        try:\n            upload.file.seek(0)\n        except Exception:\n            pass\n\n\ndef categorize_uploaded_files(\n    files: list[UploadFile], db_session: Session\n) -> CategorizedFiles:\n    \"\"\"\n    Categorize uploaded files based on text extractability and tokenized length.\n\n    - Images are estimated for token cost via a patch-based heuristic.\n    - All other files are run through extract_file_text, which handles known\n      document formats (.pdf, .docx, …) and falls back to a text-detection\n      heuristic for unknown extensions (.py, .js, .rs, …).\n    - Uses default tokenizer to compute token length.\n    - If token length exceeds the admin-configured threshold, reject file.\n    - If extension unsupported or text cannot be extracted, reject file.\n    - Otherwise marked as acceptable.\n    \"\"\"\n\n    results = CategorizedFiles()\n    default_model = fetch_default_llm_model(db_session)\n\n    model_name = default_model.name if default_model else None\n    provider_type = default_model.llm_provider.provider if default_model else None\n    tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)\n\n    # Derive limits from admin-configurable settings.\n    # For upload size: load_settings() resolves 0/None to a positive default.\n    # For token threshold: 0 means \"no limit\" (converted to None below).\n    settings = load_settings()\n    max_upload_size_mb = (\n        settings.user_file_max_upload_size_mb\n    )  # always positive after load_settings()\n    max_upload_size_bytes = (\n        max_upload_size_mb * 1024 * 1024 if max_upload_size_mb else None\n    )\n    token_threshold_k = settings.file_token_count_threshold_k\n    token_threshold = (\n        token_threshold_k * 1000 if token_threshold_k else None\n    )  # 0 → None = no limit\n\n    for upload in files:\n        try:\n            filename = get_safe_filename(upload)\n\n            # Size limit is a hard safety cap.\n            if max_upload_size_bytes is not None and is_upload_too_large(\n                upload, max_upload_size_bytes\n            ):\n                results.rejected.append(\n                    RejectedFile(\n                        filename=filename,\n                        reason=f\"Exceeds {max_upload_size_mb} MB file size limit\",\n                    )\n                )\n                continue\n\n            extension = get_file_ext(filename)\n\n            # If image, estimate tokens via dedicated method first\n            if extension in OnyxFileExtensions.IMAGE_EXTENSIONS:\n                try:\n                    token_count = estimate_image_tokens_for_upload(upload)\n                except (UnidentifiedImageError, OSError) as e:\n                    logger.warning(\n                        f\"Failed to process image file '{filename}': {str(e)}\"\n                    )\n                    results.rejected.append(\n                        RejectedFile(\n                            filename=filename, reason=\"Unsupported file contents\"\n                        )\n                    )\n                    continue\n\n                if token_threshold is not None and token_count > token_threshold:\n                    results.rejected.append(\n                        RejectedFile(\n                            filename=filename,\n                            reason=f\"Exceeds {token_threshold_k}K token limit\",\n                        )\n                    )\n                else:\n                    results.acceptable.append(upload)\n                    results.acceptable_file_to_token_count[filename] = token_count\n                continue\n\n            # Handle as text/document: attempt text extraction and count tokens.\n            # This accepts any file that extract_file_text can handle, including\n            # code files (.py, .js, .rs, etc.) via its is_text_file() fallback.\n            else:\n                if is_file_password_protected(\n                    file=upload.file,\n                    file_name=filename,\n                    extension=extension,\n                ):\n                    logger.warning(f\"{filename} is password protected\")\n                    results.rejected.append(\n                        RejectedFile(\n                            filename=filename, reason=\"Document is password protected\"\n                        )\n                    )\n                    continue\n\n                text_content = extract_file_text(\n                    file=upload.file,\n                    file_name=filename,\n                    break_on_unprocessable=False,\n                    extension=extension,\n                )\n                if not text_content:\n                    logger.warning(f\"No text content extracted from '{filename}'\")\n                    results.rejected.append(\n                        RejectedFile(\n                            filename=filename,\n                            reason=f\"Unsupported file type: {extension}\",\n                        )\n                    )\n                    continue\n\n                token_count = count_tokens(\n                    text_content, tokenizer, token_limit=token_threshold\n                )\n                exceeds_threshold = (\n                    token_threshold is not None and token_count > token_threshold\n                )\n                if exceeds_threshold and _skip_token_threshold(extension):\n                    # Exempt extensions (e.g. spreadsheets) are accepted\n                    # but flagged to skip indexing — only metadata is\n                    # injected into the LLM context.\n                    results.acceptable.append(upload)\n                    results.acceptable_file_to_token_count[filename] = token_count\n                    results.skip_indexing.add(filename)\n                elif exceeds_threshold:\n                    results.rejected.append(\n                        RejectedFile(\n                            filename=filename,\n                            reason=f\"Exceeds {token_threshold_k}K token limit\",\n                        )\n                    )\n                else:\n                    results.acceptable.append(upload)\n                    results.acceptable_file_to_token_count[filename] = token_count\n\n                # Reset file pointer for subsequent upload handling\n                try:\n                    upload.file.seek(0)\n                except Exception as e:\n                    logger.warning(\n                        f\"Failed to reset file pointer for '{filename}': {str(e)}\"\n                    )\n        except Exception as e:\n            logger.warning(\n                f\"Failed to process uploaded file '{get_safe_filename(upload)}' (error_type={type(e).__name__}, error={str(e)})\"\n            )\n            results.rejected.append(\n                RejectedFile(\n                    filename=get_safe_filename(upload),\n                    reason=\"Failed to process upload\",\n                )\n            )\n\n    return results\n"
  },
  {
    "path": "backend/onyx/server/features/release_notes/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/release_notes/constants.py",
    "content": "\"\"\"Constants for release notes functionality.\"\"\"\n\n# GitHub source\nGITHUB_RAW_BASE_URL = (\n    \"https://raw.githubusercontent.com/onyx-dot-app/documentation/main\"\n)\nGITHUB_CHANGELOG_RAW_URL = f\"{GITHUB_RAW_BASE_URL}/changelog.mdx\"\n\n# Base URL for changelog documentation (used for notification links)\nDOCS_CHANGELOG_BASE_URL = \"https://docs.onyx.app/changelog\"\n\nFETCH_TIMEOUT = 60.0\n\n# Redis keys (in shared namespace)\nREDIS_KEY_PREFIX = \"release_notes:\"\nREDIS_KEY_FETCHED_AT = f\"{REDIS_KEY_PREFIX}fetched_at\"\nREDIS_KEY_ETAG = f\"{REDIS_KEY_PREFIX}etag\"\n\n# Cache TTL: 24 hours\nREDIS_CACHE_TTL = 60 * 60 * 24\n\n# Auto-refresh threshold: 1 hour\nAUTO_REFRESH_THRESHOLD_SECONDS = 60 * 60\n"
  },
  {
    "path": "backend/onyx/server/features/release_notes/models.py",
    "content": "\"\"\"Pydantic models for release notes.\"\"\"\n\nfrom pydantic import BaseModel\n\n\nclass ReleaseNoteEntry(BaseModel):\n    \"\"\"A single version's release note entry.\"\"\"\n\n    version: str  # e.g., \"v2.7.0\"\n    date: str  # e.g., \"January 7th, 2026\"\n    title: str  # Display title for notifications: \"Onyx v2.7.0 is available!\"\n"
  },
  {
    "path": "backend/onyx/server/features/release_notes/utils.py",
    "content": "\"\"\"Utility functions for release notes parsing and caching.\"\"\"\n\nimport re\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport httpx\nfrom sqlalchemy.orm import Session\n\nfrom onyx import __version__\nfrom onyx.cache.factory import get_shared_cache_backend\nfrom onyx.configs.app_configs import INSTANCE_TYPE\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.db.release_notes import create_release_notifications_for_versions\nfrom onyx.server.features.release_notes.constants import AUTO_REFRESH_THRESHOLD_SECONDS\nfrom onyx.server.features.release_notes.constants import FETCH_TIMEOUT\nfrom onyx.server.features.release_notes.constants import GITHUB_CHANGELOG_RAW_URL\nfrom onyx.server.features.release_notes.constants import REDIS_CACHE_TTL\nfrom onyx.server.features.release_notes.constants import REDIS_KEY_ETAG\nfrom onyx.server.features.release_notes.constants import REDIS_KEY_FETCHED_AT\nfrom onyx.server.features.release_notes.models import ReleaseNoteEntry\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# ============================================================================\n# Version Utilities\n# ============================================================================\n\n\ndef is_valid_version(version: str) -> bool:\n    \"\"\"Check if version matches vX.Y.Z or vX.Y.Z-suffix.N pattern exactly.\"\"\"\n    return bool(re.match(r\"^v\\d+\\.\\d+\\.\\d+(-[a-zA-Z]+\\.\\d+)?$\", version))\n\n\ndef parse_version_tuple(version: str) -> tuple[int, int, int]:\n    \"\"\"Parse version string to tuple for semantic sorting.\"\"\"\n    clean = re.sub(r\"^v\", \"\", version)\n    clean = re.sub(r\"-.*$\", \"\", clean)\n    parts = clean.split(\".\")\n    return (\n        int(parts[0]) if len(parts) > 0 else 0,\n        int(parts[1]) if len(parts) > 1 else 0,\n        int(parts[2]) if len(parts) > 2 else 0,\n    )\n\n\ndef is_version_gte(v1: str, v2: str) -> bool:\n    \"\"\"Check if v1 >= v2. Strips suffixes like -cloud.X or -beta.X.\"\"\"\n    return parse_version_tuple(v1) >= parse_version_tuple(v2)\n\n\n# ============================================================================\n# MDX Parsing\n# ============================================================================\n\n\ndef parse_mdx_to_release_note_entries(mdx_content: str) -> list[ReleaseNoteEntry]:\n    \"\"\"Parse MDX content into ReleaseNoteEntry objects.\"\"\"\n    all_entries = []\n\n    update_pattern = (\n        r'<Update\\s+label=\"([^\"]+)\"\\s+description=\"([^\"]+)\"'\n        r\"(?:\\s+tags=\\{([^}]+)\\})?[^>]*>\"\n        r\".*?\"\n        r\"</Update>\"\n    )\n\n    for match in re.finditer(update_pattern, mdx_content, re.DOTALL):\n        version = match.group(1)\n        date = match.group(2)\n\n        if is_valid_version(version):\n            all_entries.append(\n                ReleaseNoteEntry(\n                    version=version,\n                    date=date,\n                    title=f\"Onyx {version} is available!\",\n                )\n            )\n\n    if not all_entries:\n        raise ValueError(\"Could not parse any release note entries from MDX.\")\n\n    if INSTANCE_TYPE == \"cloud\":\n        # Cloud often runs ahead of docs release tags; always notify on latest release.\n        return sorted(\n            all_entries, key=lambda x: parse_version_tuple(x.version), reverse=True\n        )[:1]\n\n    # Filter to valid versions >= __version__\n    if __version__ and is_valid_version(__version__):\n        entries = [\n            entry for entry in all_entries if is_version_gte(entry.version, __version__)\n        ]\n    elif \"nightly\" in __version__:\n        # Just show the latest entry for nightly versions\n        entries = sorted(\n            all_entries, key=lambda x: parse_version_tuple(x.version), reverse=True\n        )[:1]\n    else:\n        # If not recognized version\n        # likely `development` and we should show all entries\n        entries = all_entries\n\n    return entries\n\n\n# ============================================================================\n# Cache Helpers (ETag + timestamp only)\n# ============================================================================\n\n\ndef get_cached_etag() -> str | None:\n    cache = get_shared_cache_backend()\n    try:\n        etag = cache.get(REDIS_KEY_ETAG)\n        if etag:\n            return etag.decode(\"utf-8\")\n        return None\n    except Exception as e:\n        logger.error(f\"Failed to get cached etag: {e}\")\n        return None\n\n\ndef get_last_fetch_time() -> datetime | None:\n    cache = get_shared_cache_backend()\n    try:\n        raw = cache.get(REDIS_KEY_FETCHED_AT)\n        if not raw:\n            return None\n\n        last_fetch = datetime.fromisoformat(raw.decode(\"utf-8\"))\n        if last_fetch.tzinfo is None:\n            last_fetch = last_fetch.replace(tzinfo=timezone.utc)\n        else:\n            last_fetch = last_fetch.astimezone(timezone.utc)\n\n        return last_fetch\n    except Exception as e:\n        logger.error(f\"Failed to get last fetch time from cache: {e}\")\n        return None\n\n\ndef save_fetch_metadata(etag: str | None) -> None:\n    cache = get_shared_cache_backend()\n    now = datetime.now(timezone.utc)\n\n    try:\n        cache.set(REDIS_KEY_FETCHED_AT, now.isoformat(), ex=REDIS_CACHE_TTL)\n        if etag:\n            cache.set(REDIS_KEY_ETAG, etag, ex=REDIS_CACHE_TTL)\n    except Exception as e:\n        logger.error(f\"Failed to save fetch metadata to cache: {e}\")\n\n\ndef is_cache_stale() -> bool:\n    \"\"\"Check if we should fetch from GitHub.\"\"\"\n    last_fetch = get_last_fetch_time()\n    if last_fetch is None:\n        return True\n    age = datetime.now(timezone.utc) - last_fetch\n    return age.total_seconds() > AUTO_REFRESH_THRESHOLD_SECONDS\n\n\n# ============================================================================\n# Main Function\n# ============================================================================\n\n\ndef ensure_release_notes_fresh_and_notify(db_session: Session) -> None:\n    \"\"\"\n    Check for new release notes and create notifications if needed.\n\n    Called from /api/notifications endpoint. Uses ETag for efficient\n    GitHub requests. Database handles notification deduplication.\n\n    Since all users will trigger this via notification fetch,\n    uses Redis lock to prevent concurrent GitHub requests when cache is stale.\n    \"\"\"\n    if not is_cache_stale():\n        return\n\n    cache = get_shared_cache_backend()\n    lock = cache.lock(\n        OnyxRedisLocks.RELEASE_NOTES_FETCH_LOCK,\n        timeout=90,\n    )\n\n    # Non-blocking acquire - if we can't get the lock, another request is handling it\n    acquired = lock.acquire(blocking=False)\n    if not acquired:\n        logger.debug(\"Another request is already fetching release notes, skipping.\")\n        return\n\n    try:\n        logger.debug(\"Checking GitHub for release notes updates.\")\n\n        # Use ETag for conditional request\n        headers: dict[str, str] = {}\n        etag = get_cached_etag()\n        if etag:\n            headers[\"If-None-Match\"] = etag\n\n        try:\n            response = httpx.get(\n                GITHUB_CHANGELOG_RAW_URL,\n                headers=headers,\n                timeout=FETCH_TIMEOUT,\n                follow_redirects=True,\n            )\n\n            if response.status_code == 304:\n                # Content unchanged, just update timestamp\n                logger.debug(\"Release notes unchanged (304).\")\n                save_fetch_metadata(etag)\n                return\n\n            response.raise_for_status()\n\n            # Parse and create notifications\n            entries = parse_mdx_to_release_note_entries(response.text)\n            new_etag = response.headers.get(\"ETag\")\n            save_fetch_metadata(new_etag)\n\n            # Create notifications, sorted semantically to create them in chronological order\n            entries = sorted(entries, key=lambda x: parse_version_tuple(x.version))\n            create_release_notifications_for_versions(db_session, entries)\n\n        except Exception as e:\n            logger.error(f\"Failed to check release notes: {e}\")\n            # Update timestamp even on failure to prevent retry storms\n            # We don't save etag on failure to allow retry with conditional request\n            save_fetch_metadata(None)\n    finally:\n        # Always release the lock\n        if lock.owned():\n            lock.release()\n"
  },
  {
    "path": "backend/onyx/server/features/tool/api.py",
    "content": "from typing import Any\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.db.tools import create_tool__no_commit\nfrom onyx.db.tools import delete_tool__no_commit\nfrom onyx.db.tools import get_tool_by_id\nfrom onyx.db.tools import get_tools\nfrom onyx.db.tools import get_tools_by_ids\nfrom onyx.db.tools import update_tool\nfrom onyx.server.features.tool.models import CustomToolCreate\nfrom onyx.server.features.tool.models import CustomToolUpdate\nfrom onyx.server.features.tool.models import ToolSnapshot\nfrom onyx.server.features.tool.tool_visibility import should_expose_tool_to_fe\nfrom onyx.tools.built_in_tools import get_built_in_tool_by_id\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import MethodSpec\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import (\n    openapi_to_method_specs,\n)\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import (\n    validate_openapi_schema,\n)\n\nrouter = APIRouter(prefix=\"/tool\")\nadmin_router = APIRouter(prefix=\"/admin/tool\")\n\n\ndef _validate_tool_definition(definition: dict[str, Any]) -> None:\n    try:\n        validate_openapi_schema(definition)\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\ndef _validate_auth_settings(tool_data: CustomToolCreate | CustomToolUpdate) -> None:\n    if tool_data.passthrough_auth and tool_data.custom_headers:\n        for header in tool_data.custom_headers:\n            if header.key.lower() == \"authorization\":\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"Cannot use passthrough auth with custom authorization headers\",\n                )\n\n\ndef _get_editable_custom_tool(tool_id: int, db_session: Session, user: User) -> Tool:\n    \"\"\"Fetch a custom tool and ensure the caller has permission to edit it.\"\"\"\n    try:\n        tool = get_tool_by_id(tool_id, db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n    if tool.in_code_tool_id is not None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Built-in tools cannot be modified through this endpoint.\",\n        )\n\n    # Admins can always make changes; non-admins must own the tool.\n    if user.role == UserRole.ADMIN:\n        return tool\n\n    if tool.user_id is None or tool.user_id != user.id:\n        raise HTTPException(\n            status_code=403,\n            detail=\"You can only modify actions that you created.\",\n        )\n\n    return tool\n\n\n@admin_router.post(\"/custom\", tags=PUBLIC_API_TAGS)\ndef create_custom_tool(\n    tool_data: CustomToolCreate,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> ToolSnapshot:\n    _validate_tool_definition(tool_data.definition)\n    _validate_auth_settings(tool_data)\n    tool = create_tool__no_commit(\n        name=tool_data.name,\n        description=tool_data.description,\n        openapi_schema=tool_data.definition,\n        custom_headers=tool_data.custom_headers,\n        user_id=user.id,\n        db_session=db_session,\n        passthrough_auth=tool_data.passthrough_auth,\n        oauth_config_id=tool_data.oauth_config_id,\n        enabled=True,\n    )\n    db_session.commit()\n    return ToolSnapshot.from_model(tool)\n\n\n@admin_router.put(\"/custom/{tool_id}\", tags=PUBLIC_API_TAGS)\ndef update_custom_tool(\n    tool_id: int,\n    tool_data: CustomToolUpdate,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> ToolSnapshot:\n    existing_tool = _get_editable_custom_tool(tool_id, db_session, user)\n    if tool_data.definition:\n        _validate_tool_definition(tool_data.definition)\n    _validate_auth_settings(tool_data)\n    updated_tool = update_tool(\n        tool_id=tool_id,\n        name=tool_data.name,\n        description=tool_data.description,\n        openapi_schema=tool_data.definition,\n        custom_headers=tool_data.custom_headers,\n        user_id=existing_tool.user_id,\n        db_session=db_session,\n        passthrough_auth=tool_data.passthrough_auth,\n        oauth_config_id=tool_data.oauth_config_id,\n    )\n    return ToolSnapshot.from_model(updated_tool)\n\n\n@admin_router.delete(\"/custom/{tool_id}\", tags=PUBLIC_API_TAGS)\ndef delete_custom_tool(\n    tool_id: int,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),\n) -> None:\n    _ = _get_editable_custom_tool(tool_id, db_session, user)\n    try:\n        delete_tool__no_commit(tool_id, db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n    except Exception as e:\n        # handles case where tool is still used by an Assistant\n        raise HTTPException(status_code=400, detail=str(e))\n    db_session.commit()\n\n\nclass ToolStatusUpdateRequest(BaseModel):\n    tool_ids: list[int]\n    enabled: bool\n\n\nclass ToolStatusUpdateResponse(BaseModel):\n    updated_count: int\n    tool_ids: list[int]\n\n\n@admin_router.patch(\"/status\")\ndef update_tools_status(\n    update_data: ToolStatusUpdateRequest,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001\n) -> ToolStatusUpdateResponse:\n    \"\"\"Enable or disable one or more tools.\n\n    Pass a single tool ID in the list to update one tool, or multiple IDs for\n    bulk updates.\n    \"\"\"\n    if not update_data.tool_ids:\n        raise HTTPException(status_code=400, detail=\"No tool IDs provided\")\n\n    tools = get_tools_by_ids(update_data.tool_ids, db_session)\n    tools_by_id = {tool.id: tool for tool in tools}\n\n    updated_tools = []\n    missing_tools = []\n\n    for tool_id in update_data.tool_ids:\n        tool = tools_by_id.get(tool_id)\n        if tool:\n            tool.enabled = update_data.enabled\n            updated_tools.append(tool_id)\n        else:\n            missing_tools.append(tool_id)\n\n    if missing_tools:\n        raise HTTPException(\n            status_code=404, detail=f\"Tools with IDs {missing_tools} not found\"\n        )\n\n    db_session.commit()\n\n    return ToolStatusUpdateResponse(\n        updated_count=len(updated_tools),\n        tool_ids=updated_tools,\n    )\n\n\nclass ValidateToolRequest(BaseModel):\n    definition: dict[str, Any]\n\n\nclass ValidateToolResponse(BaseModel):\n    methods: list[MethodSpec]\n\n\n@admin_router.post(\"/custom/validate\", tags=PUBLIC_API_TAGS)\ndef validate_tool(\n    tool_data: ValidateToolRequest,\n    _: User = Depends(current_curator_or_admin_user),\n) -> ValidateToolResponse:\n    _validate_tool_definition(tool_data.definition)\n    method_specs = openapi_to_method_specs(tool_data.definition)\n    return ValidateToolResponse(methods=method_specs)\n\n\n\"\"\"Endpoints for all\"\"\"\n\n\n@router.get(\"/openapi\", tags=PUBLIC_API_TAGS)\ndef list_openapi_tools(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> list[ToolSnapshot]:\n    tools = get_tools(db_session, only_openapi=True)\n\n    openapi_tools: list[ToolSnapshot] = []\n    for tool in tools:\n        if not should_expose_tool_to_fe(tool):\n            continue\n\n        openapi_tools.append(ToolSnapshot.from_model(tool))\n\n    return openapi_tools\n\n\n@router.get(\"/{tool_id}\", tags=PUBLIC_API_TAGS)\ndef get_custom_tool(\n    tool_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> ToolSnapshot:\n    try:\n        tool = get_tool_by_id(tool_id, db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n    return ToolSnapshot.from_model(tool)\n\n\n@router.get(\"\", tags=PUBLIC_API_TAGS)\ndef list_tools(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_user),\n) -> list[ToolSnapshot]:\n    tools = get_tools(db_session, only_enabled=True, only_connected_mcp=True)\n\n    filtered_tools: list[ToolSnapshot] = []\n    for tool in tools:\n        if not should_expose_tool_to_fe(tool):\n            continue\n\n        # Check if it's a built-in tool and if it's available\n        if tool.in_code_tool_id:\n            try:\n                tool_cls = get_built_in_tool_by_id(tool.in_code_tool_id)\n                if not tool_cls.is_available(db_session):\n                    continue\n            except KeyError:\n                # If tool ID not found in registry, include it by default\n                pass\n\n        # All custom tools and available built-in tools are included\n        filtered_tools.append(ToolSnapshot.from_model(tool))\n\n    return filtered_tools\n"
  },
  {
    "path": "backend/onyx/server/features/tool/models.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.models import Tool\nfrom onyx.server.features.tool.tool_visibility import get_tool_visibility_config\n\n\nclass ToolSnapshot(BaseModel):\n    id: int\n    name: str\n    description: str\n    definition: dict[str, Any] | None\n    display_name: str\n    in_code_tool_id: str | None\n    custom_headers: list[Any] | None\n    passthrough_auth: bool\n    mcp_server_id: int | None = None\n    user_id: str | None = None\n    oauth_config_id: int | None = None\n    oauth_config_name: str | None = None\n    enabled: bool = True\n\n    # Visibility settings computed from TOOL_VISIBILITY_CONFIG\n    chat_selectable: bool = True\n    agent_creation_selectable: bool = True\n    default_enabled: bool = False\n\n    @classmethod\n    def from_model(cls, tool: Tool) -> \"ToolSnapshot\":\n        # Get visibility config for this tool\n        config = get_tool_visibility_config(tool)\n\n        return cls(\n            id=tool.id,\n            name=tool.name,\n            description=tool.description or \"\",\n            definition=tool.openapi_schema,\n            display_name=tool.display_name or tool.name,\n            in_code_tool_id=tool.in_code_tool_id,\n            custom_headers=tool.custom_headers,\n            passthrough_auth=tool.passthrough_auth,\n            mcp_server_id=tool.mcp_server_id,\n            user_id=str(tool.user_id) if tool.user_id else None,\n            oauth_config_id=tool.oauth_config_id,\n            oauth_config_name=tool.oauth_config.name if tool.oauth_config else None,\n            enabled=tool.enabled,\n            # Populate visibility settings from config or use defaults\n            chat_selectable=config.chat_selectable if config else True,\n            agent_creation_selectable=(\n                config.agent_creation_selectable if config else True\n            ),\n            default_enabled=config.default_enabled if config else False,\n        )\n\n\nclass Header(BaseModel):\n    key: str\n    value: str\n\n\nclass CustomToolCreate(BaseModel):\n    name: str\n    description: str | None = None\n    definition: dict[str, Any]\n    custom_headers: list[Header] | None = None\n    passthrough_auth: bool\n    oauth_config_id: int | None = None\n\n\nclass CustomToolUpdate(BaseModel):\n    name: str | None = None\n    description: str | None = None\n    definition: dict[str, Any] | None = None\n    custom_headers: list[Header] | None = None\n    passthrough_auth: bool | None = None\n    oauth_config_id: int | None = None\n"
  },
  {
    "path": "backend/onyx/server/features/tool/tool_visibility.py",
    "content": "\"\"\"Tool visibility configuration and utility functions.\"\"\"\n\nfrom pydantic import BaseModel\n\nfrom onyx.db.models import Tool\nfrom onyx.tools.constants import MEMORY_TOOL_ID\nfrom onyx.tools.constants import OPEN_URL_TOOL_ID\n\n# Tool class name constant for OktaProfileTool (not in main constants.py as it's hidden)\nOKTA_PROFILE_TOOL_ID = \"OktaProfileTool\"\n\n\nclass ToolVisibilitySettings(BaseModel):\n    \"\"\"Configuration for tool visibility across different UI contexts.\"\"\"\n\n    chat_selectable: bool = True  # Whether tool appears in chat input bar dropdown\n    agent_creation_selectable: bool = (\n        True  # Whether tool appears in agent creation/default behavior pages\n    )\n    default_enabled: bool = False  # Whether tool is enabled by default\n    expose_to_frontend: bool = True  # Whether tool should be sent to frontend at all\n\n\n# Centralized configuration for tool visibility across different contexts\n# This allows for easy extension with new tools that need custom visibility rules\nTOOL_VISIBILITY_CONFIG: dict[str, ToolVisibilitySettings] = {\n    OPEN_URL_TOOL_ID: ToolVisibilitySettings(\n        chat_selectable=False,\n        agent_creation_selectable=True,\n        default_enabled=True,\n        expose_to_frontend=True,\n    ),\n    OKTA_PROFILE_TOOL_ID: ToolVisibilitySettings(\n        chat_selectable=False,\n        agent_creation_selectable=False,\n        default_enabled=False,\n        expose_to_frontend=False,  # Completely hidden from frontend\n    ),\n    MEMORY_TOOL_ID: ToolVisibilitySettings(\n        chat_selectable=False,\n        agent_creation_selectable=False,\n        default_enabled=False,\n        expose_to_frontend=False,\n    ),\n    # Future tools can be added here with custom visibility rules\n}\n\n\ndef should_expose_tool_to_fe(tool: Tool) -> bool:\n    \"\"\"Return True when the given tool should be sent to the frontend.\"\"\"\n    if tool.in_code_tool_id is None:\n        # Custom tools are always exposed to frontend\n        return True\n\n    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)\n    return config.expose_to_frontend if config else True\n\n\ndef is_chat_selectable(tool: Tool) -> bool:\n    \"\"\"Return True if the tool should appear in the chat input bar dropdown.\n\n    Tools can be excluded from the chat dropdown while remaining available\n    in agent creation and configuration pages.\n    \"\"\"\n    if tool.in_code_tool_id is None:\n        # Custom tools are always chat selectable\n        return True\n\n    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)\n\n    return config.chat_selectable if config else True\n\n\ndef is_agent_creation_selectable(tool: Tool) -> bool:\n    \"\"\"Return True if the tool should appear in agent creation/default behavior pages.\n\n    Most tools should be visible in these admin contexts.\n    \"\"\"\n    if tool.in_code_tool_id is None:\n        # Custom tools are always agent creation selectable\n        return True\n\n    config = TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)\n    return config.agent_creation_selectable if config else True\n\n\ndef get_tool_visibility_config(tool: Tool) -> ToolVisibilitySettings | None:\n    \"\"\"Get visibility configuration for a tool, or None if not configured.\"\"\"\n    if tool.in_code_tool_id is None:\n        return None\n    return TOOL_VISIBILITY_CONFIG.get(tool.in_code_tool_id)\n"
  },
  {
    "path": "backend/onyx/server/features/user_oauth_token/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/features/user_oauth_token/api.py",
    "content": "\"\"\"API endpoints for user OAuth token management.\"\"\"\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.oauth_token_manager import OAuthTokenManager\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.oauth_config import get_all_user_oauth_tokens\n\nrouter = APIRouter(prefix=\"/user-oauth-token\")\n\n\nclass OAuthTokenStatus(BaseModel):\n    oauth_config_id: int\n    expires_at: int | None  # Unix timestamp\n    is_expired: bool\n\n\n@router.get(\"/status\")\ndef get_user_oauth_token_status(\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_user),\n) -> list[OAuthTokenStatus]:\n    \"\"\"\n    Get the OAuth token status for the current user across all OAuth configs.\n\n    Returns information about which OAuth configs the user has authenticated with\n    and whether their tokens are expired.\n    \"\"\"\n    user_tokens = get_all_user_oauth_tokens(user.id, db_session)\n    result = []\n    for token in user_tokens:\n        token_data = (\n            token.token_data.get_value(apply_mask=False) if token.token_data else {}\n        )\n        result.append(\n            OAuthTokenStatus(\n                oauth_config_id=token.oauth_config_id,\n                expires_at=OAuthTokenManager.token_expiration_time(token_data),\n                is_expired=OAuthTokenManager.is_token_expired(token_data),\n            )\n        )\n    return result\n"
  },
  {
    "path": "backend/onyx/server/features/web_search/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.web_search import fetch_active_web_content_provider\nfrom onyx.db.web_search import fetch_active_web_search_provider\nfrom onyx.server.features.web_search.models import OpenUrlsToolRequest\nfrom onyx.server.features.web_search.models import OpenUrlsToolResponse\nfrom onyx.server.features.web_search.models import WebSearchToolRequest\nfrom onyx.server.features.web_search.models import WebSearchToolResponse\nfrom onyx.server.features.web_search.models import WebSearchWithContentResponse\nfrom onyx.server.manage.web_search.models import WebContentProviderView\nfrom onyx.server.manage.web_search.models import WebSearchProviderView\nfrom onyx.tools.models import LlmOpenUrlResult\nfrom onyx.tools.models import LlmWebSearchResult\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_MAX_HTML_SIZE_BYTES,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_MAX_PDF_SIZE_BYTES,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    OnyxWebCrawler,\n)\nfrom onyx.tools.tool_implementations.open_url.utils import (\n    filter_web_contents_with_no_title_or_content,\n)\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchProvider\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_content_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_search_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    filter_web_search_results_with_no_title_or_snippet,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    truncate_search_result_content,\n)\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\nrouter = APIRouter(prefix=\"/web-search\", tags=PUBLIC_API_TAGS)\nlogger = setup_logger()\n\n\nDOCUMENT_CITATION_NUMBER_EMPTY_VALUE = -1\n\n\ndef _get_active_search_provider(\n    db_session: Session,\n) -> tuple[WebSearchProviderView, WebSearchProvider]:\n    provider_model = fetch_active_web_search_provider(db_session)\n    if provider_model is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"No web search provider configured.\",\n        )\n\n    provider_view = WebSearchProviderView(\n        id=provider_model.id,\n        name=provider_model.name,\n        provider_type=WebSearchProviderType(provider_model.provider_type),\n        is_active=provider_model.is_active,\n        config=provider_model.config or {},\n        has_api_key=bool(provider_model.api_key),\n    )\n\n    if provider_model.api_key is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Web search provider requires an API key.\",\n        )\n\n    try:\n        provider: WebSearchProvider = build_search_provider_from_config(\n            provider_type=provider_view.provider_type,\n            api_key=provider_model.api_key.get_value(apply_mask=False),\n            config=provider_model.config or {},\n        )\n    except ValueError as exc:\n        raise HTTPException(status_code=400, detail=str(exc)) from exc\n\n    return provider_view, provider\n\n\ndef _get_active_content_provider(\n    db_session: Session,\n) -> tuple[WebContentProviderView | None, WebContentProvider]:\n    provider_model = fetch_active_web_content_provider(db_session)\n\n    if provider_model is None:\n        # Default to the built-in crawler if nothing is configured. Always available.\n        # NOTE: the OnyxWebCrawler is not stored in the content provider table,\n        # so we need to return it directly.\n\n        return None, OnyxWebCrawler(\n            max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,\n            max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,\n        )\n\n    if provider_model.api_key is None:\n        # TODO - this is not a great error, in fact, this key should not be nullable.\n        raise HTTPException(\n            status_code=400,\n            detail=\"Web content provider requires an API key.\",\n        )\n\n    try:\n        provider_type = WebContentProviderType(provider_model.provider_type)\n        config = provider_model.config or WebContentProviderConfig()\n\n        provider: WebContentProvider | None = build_content_provider_from_config(\n            provider_type=provider_type,\n            api_key=provider_model.api_key.get_value(apply_mask=False),\n            config=config,\n        )\n    except ValueError as exc:\n        raise HTTPException(status_code=400, detail=str(exc)) from exc\n\n    if provider is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Unable to initialize the configured web content provider.\",\n        )\n\n    provider_view = WebContentProviderView(\n        id=provider_model.id,\n        name=provider_model.name,\n        provider_type=provider_type,\n        is_active=provider_model.is_active,\n        config=provider_model.config or WebContentProviderConfig(),\n        has_api_key=bool(provider_model.api_key),\n    )\n\n    return provider_view, provider\n\n\ndef _run_web_search(\n    request: WebSearchToolRequest, db_session: Session\n) -> tuple[WebSearchProviderType, list[LlmWebSearchResult]]:\n    provider_view, provider = _get_active_search_provider(db_session)\n\n    results: list[LlmWebSearchResult] = []\n    for query in request.queries:\n        try:\n            search_results = provider.search(query)\n        except HTTPException:\n            raise\n        except Exception as exc:\n            logger.exception(\"Web search provider failed for query '%s'\", query)\n            raise HTTPException(\n                status_code=502, detail=\"Web search provider failed to execute query.\"\n            ) from exc\n\n        filtered_results = filter_web_search_results_with_no_title_or_snippet(\n            list(search_results)\n        )\n        trimmed_results = list(filtered_results)[: request.max_results]\n        for search_result in trimmed_results:\n            results.append(\n                LlmWebSearchResult(\n                    document_citation_number=DOCUMENT_CITATION_NUMBER_EMPTY_VALUE,\n                    url=search_result.link,\n                    title=search_result.title,\n                    snippet=search_result.snippet or \"\",\n                    unique_identifier_to_strip_away=search_result.link,\n                )\n            )\n    return provider_view.provider_type, results\n\n\ndef _open_urls(\n    urls: list[str],\n    db_session: Session,\n) -> tuple[WebContentProviderType | None, list[LlmOpenUrlResult]]:\n    # SSRF protection is handled inside the content provider (OnyxWebCrawler)\n    # which uses ssrf_safe_get() to validate and fetch atomically,\n    # preventing DNS rebinding attacks\n    provider_view, provider = _get_active_content_provider(db_session)\n\n    try:\n        docs = filter_web_contents_with_no_title_or_content(\n            list(provider.contents(urls))\n        )\n    except HTTPException:\n        raise\n    except Exception as exc:\n        logger.exception(\"Web content provider failed to fetch URLs\")\n        raise HTTPException(\n            status_code=502, detail=\"Web content provider failed to fetch URLs.\"\n        ) from exc\n\n    results: list[LlmOpenUrlResult] = []\n    for doc in docs:\n        results.append(\n            LlmOpenUrlResult(\n                document_citation_number=DOCUMENT_CITATION_NUMBER_EMPTY_VALUE,\n                content=truncate_search_result_content(doc.full_content),\n                unique_identifier_to_strip_away=doc.link,\n            )\n        )\n    provider_type = (\n        provider_view.provider_type\n        if provider_view\n        else WebContentProviderType.ONYX_WEB_CRAWLER\n    )\n    return provider_type, results\n\n\n@router.post(\"/search\", response_model=WebSearchWithContentResponse)\ndef execute_web_search(\n    request: WebSearchToolRequest,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> WebSearchWithContentResponse:\n    \"\"\"\n    Perform a web search and immediately fetch content for the returned URLs.\n\n    Use this when you want both snippets and page contents from one call.\n\n    If you want to selectively fetch content (i.e. let the LLM decide which URLs to read),\n    use `/search-lite` and then call `/open-urls` separately.\n    \"\"\"\n    search_provider_type, search_results = _run_web_search(request, db_session)\n\n    if not search_results:\n        return WebSearchWithContentResponse(\n            search_provider_type=search_provider_type,\n            content_provider_type=None,\n            search_results=[],\n            full_content_results=[],\n        )\n\n    # Fetch contents for unique URLs in the order they appear\n    seen: set[str] = set()\n    urls_to_fetch: list[str] = []\n    for result in search_results:\n        url = result.url\n        if url not in seen:\n            seen.add(url)\n            urls_to_fetch.append(url)\n\n    content_provider_type, full_content_results = _open_urls(urls_to_fetch, db_session)\n\n    return WebSearchWithContentResponse(\n        search_provider_type=search_provider_type,\n        content_provider_type=content_provider_type,\n        search_results=search_results,\n        full_content_results=full_content_results,\n    )\n\n\n@router.post(\"/search-lite\", response_model=WebSearchToolResponse)\ndef execute_web_search_lite(\n    request: WebSearchToolRequest,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> WebSearchToolResponse:\n    \"\"\"\n    Lightweight search-only endpoint. Returns search snippets and URLs without\n    fetching page contents. Pair with `/open-urls` if you need to fetch content\n    later.\n    \"\"\"\n    provider_type, search_results = _run_web_search(request, db_session)\n\n    return WebSearchToolResponse(results=search_results, provider_type=provider_type)\n\n\n@router.post(\"/open-urls\", response_model=OpenUrlsToolResponse)\ndef execute_open_urls(\n    request: OpenUrlsToolRequest,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> OpenUrlsToolResponse:\n    \"\"\"\n    Fetch content for specific URLs using the configured content provider.\n    Intended to complement `/search-lite` when you need content for a subset of URLs.\n    \"\"\"\n    provider_type, results = _open_urls(request.urls, db_session)\n    return OpenUrlsToolResponse(results=results, provider_type=provider_type)\n"
  },
  {
    "path": "backend/onyx/server/features/web_search/models.py",
    "content": "from pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_validator\n\nfrom onyx.tools.models import LlmOpenUrlResult\nfrom onyx.tools.models import LlmWebSearchResult\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\n\nclass WebSearchToolRequest(BaseModel):\n    queries: list[str] = Field(\n        ...,\n        min_length=1,\n        description=\"List of search queries to send to the configured provider.\",\n    )\n    max_results: int | None = Field(\n        default=10,\n        description=(\n            \"Optional cap on number of results to return per query. Defaults to 10.\"\n        ),\n    )\n\n    @field_validator(\"queries\")\n    @classmethod\n    def _strip_and_validate_queries(cls, queries: list[str]) -> list[str]:\n        cleaned_queries = [q.strip() for q in queries if q and q.strip()]\n        if not cleaned_queries:\n            raise ValueError(\"queries must include at least one non-empty value\")\n        return cleaned_queries\n\n    @field_validator(\"max_results\")\n    @classmethod\n    def _default_and_validate_max_results(cls, max_results: int | None) -> int:\n        # Default to 10 when not provided\n        max_results = 10 if max_results is None else max_results\n        if max_results < 1:\n            raise ValueError(\"max_results must be at least 1\")\n        return max_results\n\n\nclass WebSearchToolResponse(BaseModel):\n    results: list[LlmWebSearchResult]\n    provider_type: WebSearchProviderType\n\n\nclass WebSearchWithContentResponse(BaseModel):\n    search_provider_type: WebSearchProviderType\n    content_provider_type: WebContentProviderType | None = None\n    search_results: list[LlmWebSearchResult]\n    full_content_results: list[LlmOpenUrlResult]\n\n\nclass OpenUrlsToolRequest(BaseModel):\n    urls: list[str] = Field(\n        ...,\n        min_length=1,\n        description=\"URLs to fetch using the configured content provider.\",\n    )\n\n    @field_validator(\"urls\")\n    @classmethod\n    def _strip_and_validate_urls(cls, urls: list[str]) -> list[str]:\n        cleaned_urls = [url.strip() for url in urls if url and url.strip()]\n        if not cleaned_urls:\n            raise ValueError(\"urls must include at least one non-empty value\")\n        return cleaned_urls\n\n\nclass OpenUrlsToolResponse(BaseModel):\n    results: list[LlmOpenUrlResult]\n    provider_type: WebContentProviderType | None = None\n"
  },
  {
    "path": "backend/onyx/server/federated/api.py",
    "content": "import json\nfrom typing import Any\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import Response\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.federated import (\n    create_federated_connector as db_create_federated_connector,\n)\nfrom onyx.db.federated import delete_federated_connector\nfrom onyx.db.federated import fetch_all_federated_connectors\nfrom onyx.db.federated import fetch_federated_connector_by_id\nfrom onyx.db.federated import update_federated_connector\nfrom onyx.db.federated import update_federated_connector_oauth_token\nfrom onyx.db.federated import validate_federated_connector_credentials\nfrom onyx.db.models import User\nfrom onyx.federated_connectors.factory import get_federated_connector\nfrom onyx.federated_connectors.factory import get_federated_connector_cls\nfrom onyx.federated_connectors.interfaces import FederatedConnector\nfrom onyx.federated_connectors.oauth_utils import add_state_to_oauth_url\nfrom onyx.federated_connectors.oauth_utils import generate_oauth_state\nfrom onyx.federated_connectors.oauth_utils import get_oauth_callback_uri\nfrom onyx.federated_connectors.oauth_utils import verify_oauth_state\nfrom onyx.server.federated.models import AuthorizeUrlResponse\nfrom onyx.server.federated.models import ConfigurationSchemaResponse\nfrom onyx.server.federated.models import CredentialSchemaResponse\nfrom onyx.server.federated.models import EntitySpecResponse\nfrom onyx.server.federated.models import FederatedConnectorCredentials\nfrom onyx.server.federated.models import FederatedConnectorDetail\nfrom onyx.server.federated.models import FederatedConnectorRequest\nfrom onyx.server.federated.models import FederatedConnectorResponse\nfrom onyx.server.federated.models import FederatedConnectorStatus\nfrom onyx.server.federated.models import FederatedConnectorUpdateRequest\nfrom onyx.server.federated.models import OAuthCallbackResult\nfrom onyx.server.federated.models import UserOAuthStatus\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/federated\")\n\n\ndef _get_federated_connector_instance(\n    source: FederatedConnectorSource,\n    credentials: dict[str, Any],\n) -> FederatedConnector:\n    \"\"\"Factory function to get the appropriate federated connector instance.\"\"\"\n    try:\n        return get_federated_connector(source, credentials)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@router.post(\"\")\ndef create_federated_connector(\n    federated_connector_data: FederatedConnectorRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> FederatedConnectorResponse:\n    \"\"\"Create a new federated connector\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    logger.info(\n        f\"Creating federated connector: source={federated_connector_data.source}, user={user.email}, tenant_id={tenant_id}\"\n    )\n\n    try:\n        # Create the federated connector with validation\n        federated_connector = db_create_federated_connector(\n            db_session=db_session,\n            source=federated_connector_data.source,\n            credentials=federated_connector_data.credentials.model_dump(),\n            config=federated_connector_data.config,\n        )\n\n        logger.info(\n            f\"Successfully created federated connector with id={federated_connector.id}\"\n        )\n\n        return FederatedConnectorResponse(\n            id=federated_connector.id,\n            source=federated_connector.source,\n        )\n\n    except ValueError as e:\n        logger.warning(f\"Validation error creating federated connector: {e}\")\n        db_session.rollback()\n        raise HTTPException(status_code=400, detail=str(e))\n    except Exception as e:\n        logger.error(f\"Error creating federated connector: {e}\")\n        db_session.rollback()\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.get(\"/{id}/entities\")\ndef get_entities(\n    id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> EntitySpecResponse:\n    \"\"\"Fetch allowed entities for the source type\"\"\"\n    try:\n        federated_connector = fetch_federated_connector_by_id(id, db_session)\n        if not federated_connector:\n            raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n        if federated_connector.credentials is None:\n            raise HTTPException(\n                status_code=400, detail=\"Federated connector has no credentials\"\n            )\n\n        connector_instance = _get_federated_connector_instance(\n            federated_connector.source,\n            federated_connector.credentials.get_value(apply_mask=False),\n        )\n        entities_spec = connector_instance.configuration_schema()\n\n        # Convert EntityField objects to a dictionary format for the API response\n        entities_dict = {}\n        for key, field in entities_spec.items():\n            entities_dict[key] = {\n                \"type\": field.type,\n                \"description\": field.description,\n                \"required\": field.required,\n                \"default\": field.default,\n                \"example\": field.example,\n            }\n\n        return EntitySpecResponse(entities=entities_dict)\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        logger.error(f\"Error fetching entities for federated connector {id}: {e}\")\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.get(\"/{id}/credentials/schema\")\ndef get_credentials_schema(\n    id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> CredentialSchemaResponse:\n    \"\"\"Fetch credential schema for the source type\"\"\"\n    try:\n        federated_connector = fetch_federated_connector_by_id(id, db_session)\n        if not federated_connector:\n            raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n        if federated_connector.credentials is None:\n            raise HTTPException(\n                status_code=400, detail=\"Federated connector has no credentials\"\n            )\n\n        connector_instance = _get_federated_connector_instance(\n            federated_connector.source,\n            federated_connector.credentials.get_value(apply_mask=False),\n        )\n        credentials_spec = connector_instance.credentials_schema()\n\n        # Convert CredentialField objects to a dictionary format for the API response\n        credentials_dict = {}\n        for key, field in credentials_spec.items():\n            credentials_dict[key] = {\n                \"type\": field.type,\n                \"description\": field.description,\n                \"required\": field.required,\n                \"default\": field.default,\n                \"example\": field.example,\n                \"secret\": field.secret,\n            }\n\n        return CredentialSchemaResponse(credentials=credentials_dict)\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        logger.error(\n            f\"Error fetching credentials schema for federated connector {id}: {e}\"\n        )\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.get(\"/sources/{source}/configuration/schema\")\ndef get_configuration_schema_by_source(\n    source: FederatedConnectorSource,\n    _: User = Depends(current_curator_or_admin_user),\n) -> ConfigurationSchemaResponse:\n    \"\"\"Fetch configuration schema for a specific source type (for setup/edit forms)\"\"\"\n    try:\n        connector_cls = get_federated_connector_cls(source)\n        entities_spec = connector_cls.configuration_schema()\n\n        # Convert EntityField objects to a dictionary format for the API response\n        configuration_dict = {}\n        for key, field in entities_spec.items():\n            configuration_dict[key] = {\n                \"type\": field.type,\n                \"description\": field.description,\n                \"required\": field.required,\n                \"default\": field.default,\n                \"example\": field.example,\n            }\n\n        return ConfigurationSchemaResponse(configuration=configuration_dict)\n\n    except Exception as e:\n        logger.error(f\"Error fetching configuration schema for source {source}: {e}\")\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.get(\"/sources/{source}/credentials/schema\")\ndef get_credentials_schema_by_source(\n    source: FederatedConnectorSource,\n    _: User = Depends(current_curator_or_admin_user),\n) -> CredentialSchemaResponse:\n    \"\"\"Fetch credential schema for a specific source type (for setup forms)\"\"\"\n    try:\n        connector_cls = get_federated_connector_cls(source)\n        credentials_spec = connector_cls.credentials_schema()\n\n        # Convert CredentialField objects to a dictionary format for the API response\n        credentials_dict = {}\n        for key, field in credentials_spec.items():\n            credentials_dict[key] = {\n                \"type\": field.type,\n                \"description\": field.description,\n                \"required\": field.required,\n                \"default\": field.default,\n                \"example\": field.example,\n                \"secret\": field.secret,\n            }\n\n        return CredentialSchemaResponse(credentials=credentials_dict)\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        logger.error(f\"Error fetching credentials schema for source {source}: {e}\")\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.post(\"/sources/{source}/credentials/validate\")\ndef validate_credentials(\n    source: FederatedConnectorSource,\n    credentials: FederatedConnectorCredentials,\n    _: User = Depends(current_curator_or_admin_user),\n) -> bool:\n    \"\"\"Validate credentials for a specific source type\"\"\"\n    try:\n        is_valid = validate_federated_connector_credentials(\n            source, credentials.model_dump()\n        )\n\n        if not is_valid:\n            raise HTTPException(status_code=400, detail=\"Credentials are invalid\")\n\n        return is_valid\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        logger.error(f\"Error validating credentials for source {source}: {e}\")\n        raise HTTPException(status_code=500, detail=str(e))\n\n\n@router.head(\"/{id}/entities/validate\")\ndef validate_entities(\n    id: int,\n    request: Request,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Validate specified entities for source type\"\"\"\n    try:\n        federated_connector = fetch_federated_connector_by_id(id, db_session)\n        if not federated_connector:\n            raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n        if federated_connector.credentials is None:\n            return Response(status_code=400)\n\n        # For HEAD requests, we'll expect entities as query parameters\n        # since HEAD requests shouldn't have request bodies\n        entities_dict = {}\n        query_params = dict(request.query_params)\n        if \"entities\" in query_params:\n            try:\n                entities_dict = json.loads(query_params[\"entities\"])\n            except json.JSONDecodeError:\n                logger.warning(\"Could not parse entities from query parameters\")\n                return Response(status_code=400)\n\n        connector_instance = _get_federated_connector_instance(\n            federated_connector.source,\n            federated_connector.credentials.get_value(apply_mask=False),\n        )\n        is_valid = connector_instance.validate_entities(entities_dict)\n\n        if is_valid:\n            return Response(status_code=200)\n        else:\n            return Response(status_code=400)\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        logger.error(f\"Error validating entities for federated connector {id}: {e}\")\n        return Response(status_code=500)\n\n\n@router.get(\"/{id}/authorize\")\ndef get_authorize_url(\n    id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> AuthorizeUrlResponse:\n    \"\"\"Get URL to send the user for OAuth\"\"\"\n    # Validate that the ID is not None or invalid\n    if id is None or id <= 0:\n        raise HTTPException(status_code=400, detail=\"Invalid federated connector ID\")\n\n    federated_connector = fetch_federated_connector_by_id(id, db_session)\n    if not federated_connector:\n        raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n    if federated_connector.credentials is None:\n        raise HTTPException(\n            status_code=400, detail=\"Federated connector has no credentials\"\n        )\n\n    # Update credentials to include the correct redirect URI with the connector ID\n    updated_credentials = federated_connector.credentials.get_value(\n        apply_mask=False\n    ).copy()\n    if \"redirect_uri\" in updated_credentials and updated_credentials[\"redirect_uri\"]:\n        # Replace the {id} placeholder with the actual federated connector ID\n        updated_credentials[\"redirect_uri\"] = updated_credentials[\n            \"redirect_uri\"\n        ].replace(\"{id}\", str(id))\n\n    connector_instance = _get_federated_connector_instance(\n        federated_connector.source, updated_credentials\n    )\n    base_authorize_url = connector_instance.authorize(get_oauth_callback_uri())\n\n    # Generate state parameter and store session info\n    logger.info(\n        f\"Generating OAuth state for federated_connector_id={id}, user_id={user.id}\"\n    )\n    state = generate_oauth_state(\n        federated_connector_id=id,\n        user_id=str(user.id),\n    )\n\n    # Add state to the OAuth URL\n    authorize_url = add_state_to_oauth_url(base_authorize_url, state)\n    logger.info(f\"Generated OAuth authorize URL with state for connector {id}\")\n    return AuthorizeUrlResponse(authorize_url=authorize_url)\n\n\n@router.post(\"/callback\")\ndef handle_oauth_callback_generic(\n    request: Request,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> OAuthCallbackResult:\n    \"\"\"Handle callback for any federated connector using state parameter\"\"\"\n    # Get callback data from request (query parameters)\n    callback_data = dict(request.query_params)\n\n    # Verify state parameter and get session info\n    state = callback_data.get(\"state\")\n    if not state:\n        raise HTTPException(status_code=400, detail=\"Missing state parameter\")\n\n    try:\n        oauth_session = verify_oauth_state(state)\n    except ValueError:\n        logger.exception(\"Error verifying OAuth state\")\n        raise HTTPException(\n            status_code=400, detail=\"Invalid or expired state parameter\"\n        )\n\n    if not oauth_session:\n        raise HTTPException(\n            status_code=400, detail=\"Invalid or expired state parameter\"\n        )\n\n    # Get federated connector ID from the state\n    federated_connector_id = oauth_session.federated_connector_id\n\n    # Validate federated_connector_id is not None\n    if federated_connector_id is None:\n        logger.error(\"OAuth session has null federated_connector_id\")\n        raise HTTPException(\n            status_code=400,\n            detail=\"Invalid OAuth session: missing federated connector ID\",\n        )\n\n    federated_connector = fetch_federated_connector_by_id(\n        federated_connector_id, db_session\n    )\n    if not federated_connector:\n        raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n    if federated_connector.credentials is None:\n        raise HTTPException(\n            status_code=400, detail=\"Federated connector has no credentials\"\n        )\n\n    connector_instance = _get_federated_connector_instance(\n        federated_connector.source,\n        federated_connector.credentials.get_value(apply_mask=False),\n    )\n    oauth_result = connector_instance.callback(callback_data, get_oauth_callback_uri())\n\n    # Convert OAuthResult to OAuthCallbackResult for API response\n    oauth_result_dict = oauth_result.model_dump()\n    oauth_callback_result = OAuthCallbackResult(**oauth_result_dict)\n\n    # Add source information to the response\n    oauth_callback_result.source = federated_connector.source\n\n    # Store OAuth token in database if we have an access token\n    if oauth_result.access_token:\n        logger.info(\n            f\"Storing OAuth token for federated_connector_id={federated_connector_id}, user_id={oauth_session.user_id}\"\n        )\n        update_federated_connector_oauth_token(\n            db_session=db_session,\n            federated_connector_id=federated_connector_id,\n            user_id=UUID(oauth_session.user_id),\n            token=oauth_result.access_token,\n            expires_at=oauth_result.expires_at,\n        )\n\n    return oauth_callback_result\n\n\n@router.get(\"\")\ndef get_federated_connectors(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[FederatedConnectorStatus]:\n    \"\"\"Get all federated connectors for display in the status table\"\"\"\n    federated_connectors = fetch_all_federated_connectors(db_session)\n\n    result = []\n    for fc in federated_connectors:\n        status_data = FederatedConnectorStatus(\n            id=fc.id,\n            source=fc.source,\n            name=f\"{fc.source.replace('_', ' ').title()}\",\n        )\n        result.append(status_data)\n\n    return result\n\n\n@router.get(\"/oauth-status\")\ndef get_user_oauth_status(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserOAuthStatus]:\n    \"\"\"Get OAuth status for all federated connectors for the current user\"\"\"\n    federated_connectors = fetch_all_federated_connectors(db_session)\n\n    result = []\n    for fc in federated_connectors:\n        # Check if user has OAuth token for this connector\n        oauth_token = None\n        for token in fc.oauth_tokens:\n            if token.user_id == user.id:\n                oauth_token = token\n                break\n\n        # Generate authorize URL if needed\n        authorize_url = None\n        if not oauth_token and fc.credentials is not None:\n            connector_instance = _get_federated_connector_instance(\n                fc.source, fc.credentials.get_value(apply_mask=False)\n            )\n            base_authorize_url = connector_instance.authorize(get_oauth_callback_uri())\n\n            # Generate state parameter and add to URL\n            state = generate_oauth_state(\n                federated_connector_id=fc.id,\n                user_id=str(user.id),\n            )\n            authorize_url = add_state_to_oauth_url(base_authorize_url, state)\n\n        status_data = UserOAuthStatus(\n            federated_connector_id=fc.id,\n            source=fc.source,\n            name=f\"{fc.source.replace('_', ' ').title()}\",\n            has_oauth_token=oauth_token is not None,\n            oauth_token_expires_at=oauth_token.expires_at if oauth_token else None,\n            authorize_url=authorize_url,\n        )\n        result.append(status_data)\n\n    return result\n\n\n@router.get(\"/{id}\")\ndef get_federated_connector_detail(\n    id: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> FederatedConnectorDetail:\n    \"\"\"Get detailed information about a specific federated connector\"\"\"\n    federated_connector = fetch_federated_connector_by_id(id, db_session)\n    if not federated_connector:\n        raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n    if federated_connector.credentials is None:\n        raise HTTPException(\n            status_code=400, detail=\"Federated connector has no credentials\"\n        )\n\n    # Get OAuth token information for the current user\n    oauth_token = None\n    for token in federated_connector.oauth_tokens:\n        if token.user_id == user.id:\n            oauth_token = token\n            break\n\n    # Get document set mappings\n    document_sets = []\n    for mapping in federated_connector.document_sets:\n        document_sets.append(\n            {\n                \"id\": mapping.document_set_id,\n                \"name\": (\n                    mapping.document_set.name if mapping.document_set else \"Unknown\"\n                ),\n                \"entities\": mapping.entities,\n            }\n        )\n\n    return FederatedConnectorDetail(\n        id=federated_connector.id,\n        source=federated_connector.source,\n        name=f\"{federated_connector.source.replace('_', ' ').title()}\",\n        credentials=FederatedConnectorCredentials(\n            **federated_connector.credentials.get_value(apply_mask=True)\n        ),\n        config=federated_connector.config,\n        oauth_token_exists=oauth_token is not None,\n        oauth_token_expires_at=oauth_token.expires_at if oauth_token else None,\n        document_sets=document_sets,\n    )\n\n\n@router.put(\"/{id}\")\ndef update_federated_connector_endpoint(\n    id: int,\n    update_request: FederatedConnectorUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> FederatedConnectorDetail:\n    \"\"\"Update a federated connector's configuration\"\"\"\n    try:\n        # Update the federated connector\n        updated_connector = update_federated_connector(\n            db_session=db_session,\n            federated_connector_id=id,\n            credentials=(\n                update_request.credentials.model_dump()\n                if update_request.credentials\n                else None\n            ),\n            config=update_request.config,\n        )\n\n        if not updated_connector:\n            raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n\n        # Return updated connector details\n        return get_federated_connector_detail(id, user, db_session)\n\n    except ValueError as e:\n        logger.warning(f\"Validation error updating federated connector {id}: {e}\")\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@router.delete(\"/{id}\")\ndef delete_federated_connector_endpoint(\n    id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> bool:\n    \"\"\"Delete a federated connector\"\"\"\n    success = delete_federated_connector(\n        db_session=db_session,\n        federated_connector_id=id,\n    )\n\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n\n    return True\n\n\n@router.delete(\"/{id}/oauth\")\ndef disconnect_oauth_token(\n    id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> bool:\n    \"\"\"Disconnect OAuth token for the current user from a federated connector\"\"\"\n    # Check if the federated connector exists\n    federated_connector = fetch_federated_connector_by_id(id, db_session)\n    if not federated_connector:\n        raise HTTPException(status_code=404, detail=\"Federated connector not found\")\n\n    # Find and delete the user's OAuth token\n    oauth_token = None\n    for token in federated_connector.oauth_tokens:\n        if token.user_id == user.id:\n            oauth_token = token\n            break\n\n    if oauth_token:\n        db_session.delete(oauth_token)\n        db_session.commit()\n        return True\n    else:\n        raise HTTPException(\n            status_code=404, detail=\"No OAuth token found for this user\"\n        )\n"
  },
  {
    "path": "backend/onyx/server/federated/models.py",
    "content": "from datetime import datetime\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.configs.constants import FederatedConnectorSource\n\n\nclass FederatedConnectorCredentials(BaseModel):\n    \"\"\"Credentials for federated connector\"\"\"\n\n    client_id: str | None = None\n    client_secret: str | None = None\n    redirect_uri: str | None = None\n\n\nclass FederatedConnectorRequest(BaseModel):\n    source: FederatedConnectorSource\n    credentials: FederatedConnectorCredentials\n    config: dict[str, Any] = Field(default_factory=dict)\n\n\nclass FederatedConnectorResponse(BaseModel):\n    id: int\n    source: FederatedConnectorSource\n\n\nclass AuthorizeUrlResponse(BaseModel):\n    authorize_url: str\n\n\nclass OAuthCallbackResult(BaseModel):\n    access_token: str | None = None\n    expires_at: datetime | None = None\n    refresh_token: str | None = None\n    token_type: str | None = None\n    scope: str | None = None\n    source: FederatedConnectorSource | None = None\n\n\nclass FederatedConnectorStatus(BaseModel):\n    id: int\n    source: FederatedConnectorSource\n    name: str\n\n\nclass UserOAuthStatus(BaseModel):\n    \"\"\"OAuth status for a specific user and federated connector\"\"\"\n\n    federated_connector_id: int\n    source: FederatedConnectorSource\n    name: str\n    has_oauth_token: bool\n    oauth_token_expires_at: datetime | None = None\n    authorize_url: str | None = None\n\n\nclass FederatedConnectorDetail(BaseModel):\n    id: int\n    source: FederatedConnectorSource\n    name: str\n    credentials: FederatedConnectorCredentials\n    config: dict[str, Any] = Field(default_factory=dict)\n    oauth_token_exists: bool\n    oauth_token_expires_at: datetime | None = None\n    document_sets: list[dict[str, Any]] = Field(default_factory=list)\n\n\nclass FederatedConnectorSummary(BaseModel):\n    \"\"\"Simplified federated connector information with just essential data\"\"\"\n\n    id: int\n    name: str\n    source: FederatedConnectorSource\n    entities: dict[str, Any]\n\n    @classmethod\n    def from_federated_connector_detail(\n        cls, detail: FederatedConnectorDetail, entities: dict[str, Any]\n    ) -> \"FederatedConnectorSummary\":\n        return cls(\n            id=detail.id,\n            name=detail.name,\n            source=detail.source,\n            entities=entities,\n        )\n\n\nclass FederatedConnectorUpdateRequest(BaseModel):\n    credentials: FederatedConnectorCredentials | None = None\n    config: dict[str, Any] | None = None\n\n\nclass EntitySpecResponse(BaseModel):\n    \"\"\"Response for entity specification\"\"\"\n\n    entities: dict[str, Any]\n\n\nclass ConfigurationSchemaResponse(BaseModel):\n    \"\"\"Response for configuration schema specification\"\"\"\n\n    configuration: dict[str, Any]\n\n\nclass CredentialSchemaResponse(BaseModel):\n    \"\"\"Response for credential schema specification\"\"\"\n\n    credentials: dict[str, Any]\n"
  },
  {
    "path": "backend/onyx/server/kg/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME\nfrom onyx.configs.kg_configs import KG_BETA_ASSISTANT_DESCRIPTION\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.entities import get_entity_stats_by_grounded_source_name\nfrom onyx.db.entity_type import get_configured_entity_types\nfrom onyx.db.entity_type import update_entity_types_and_related_connectors__commit\nfrom onyx.db.kg_config import disable_kg\nfrom onyx.db.kg_config import enable_kg\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import set_kg_config_settings\nfrom onyx.db.models import User\nfrom onyx.db.persona import create_update_persona\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.db.persona import mark_persona_as_deleted\nfrom onyx.db.persona import mark_persona_as_not_deleted\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.kg.resets.reset_index import reset_full_kg_index__commit\nfrom onyx.kg.setup.kg_default_entity_definitions import (\n    populate_missing_default_entity_types__commit,\n)\nfrom onyx.prompts.kg_prompts import KG_BETA_ASSISTANT_SYSTEM_PROMPT\nfrom onyx.prompts.kg_prompts import KG_BETA_ASSISTANT_TASK_PROMPT\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom onyx.server.kg.models import DisableKGConfigRequest\nfrom onyx.server.kg.models import EnableKGConfigRequest\nfrom onyx.server.kg.models import EntityType\nfrom onyx.server.kg.models import KGConfig\nfrom onyx.server.kg.models import KGConfig as KGConfigAPIModel\nfrom onyx.server.kg.models import SourceAndEntityTypeView\nfrom onyx.server.kg.models import SourceStatistics\nfrom onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (\n    KnowledgeGraphTool,\n)\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n\nadmin_router = APIRouter(prefix=\"/admin/kg\")\n\n\n# exposed\n# Controls whether or not kg is viewable in the first place.\n\n\n@admin_router.get(\"/exposed\")\ndef get_kg_exposed(_: User = Depends(current_admin_user)) -> bool:\n    kg_config_settings = get_kg_config_settings()\n    return kg_config_settings.KG_EXPOSED\n\n\n# global resets\n\n\n@admin_router.put(\"/reset\")\ndef reset_kg(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> SourceAndEntityTypeView:\n    reset_full_kg_index__commit(db_session)\n    populate_missing_default_entity_types__commit(db_session=db_session)\n    return get_kg_entity_types(db_session=db_session)\n\n\n# configurations\n\n\n@admin_router.get(\"/config\")\ndef get_kg_config(_: User = Depends(current_admin_user)) -> KGConfig:\n    config = get_kg_config_settings()\n    return KGConfigAPIModel.from_kg_config_settings(config)\n\n\n@admin_router.put(\"/config\")\ndef enable_or_disable_kg(\n    req: EnableKGConfigRequest | DisableKGConfigRequest,\n    user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    if isinstance(req, DisableKGConfigRequest):\n        # Get the KG Beta persona ID and delete it\n        kg_config_settings = get_kg_config_settings()\n        persona_id = kg_config_settings.KG_BETA_PERSONA_ID\n        if persona_id is not None:\n            mark_persona_as_deleted(\n                persona_id=persona_id,\n                user=user,\n                db_session=db_session,\n            )\n        disable_kg()\n        return\n\n    # Enable KG\n    enable_kg(enable_req=req)\n    populate_missing_default_entity_types__commit(db_session=db_session)\n\n    # Get the search and knowledge graph tools\n    search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)\n    kg_tool = get_builtin_tool(db_session=db_session, tool_type=KnowledgeGraphTool)\n\n    # Check if we have a previously created persona\n    kg_config_settings = get_kg_config_settings()\n    persona_id = kg_config_settings.KG_BETA_PERSONA_ID\n\n    if persona_id is not None:\n        # Try to restore the existing persona\n        try:\n            persona = get_persona_by_id(\n                persona_id=persona_id,\n                user=user,\n                db_session=db_session,\n                include_deleted=True,\n            )\n            if persona.deleted:\n                mark_persona_as_not_deleted(\n                    persona_id=persona_id,\n                    user=user,\n                    db_session=db_session,\n                )\n            return\n\n        except ValueError:\n            # If persona doesn't exist or can't be restored, create a new one below\n            pass\n\n    # Create KG Beta persona (private to the admin who enabled KG)\n    persona_request = PersonaUpsertRequest(\n        name=TMP_DRALPHA_PERSONA_NAME,\n        description=KG_BETA_ASSISTANT_DESCRIPTION,\n        system_prompt=KG_BETA_ASSISTANT_SYSTEM_PROMPT,\n        task_prompt=KG_BETA_ASSISTANT_TASK_PROMPT,\n        datetime_aware=False,\n        is_public=False,\n        document_set_ids=[],\n        tool_ids=[search_tool.id, kg_tool.id],\n        llm_model_provider_override=None,\n        llm_model_version_override=None,\n        starter_messages=None,\n        users=[user.id],\n        groups=[],\n        label_ids=[],\n        is_featured=False,\n        display_priority=0,\n        user_file_ids=[],\n    )\n\n    persona_snapshot = create_update_persona(\n        persona_id=None,\n        create_persona_request=persona_request,\n        user=user,\n        db_session=db_session,\n    )\n    # Store the persona ID in the KG config\n    kg_config_settings.KG_BETA_PERSONA_ID = persona_snapshot.id\n    set_kg_config_settings(kg_config_settings)\n\n\n# entity-types\n\n\n@admin_router.get(\"/entity-types\")\ndef get_kg_entity_types(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> SourceAndEntityTypeView:\n    # when using for the first time, populate with default entity types\n    entity_types = {\n        source_name: [EntityType.from_model(et) for et in ets]\n        for source_name, ets in get_configured_entity_types(\n            db_session=db_session\n        ).items()\n    }\n\n    source_statistics = {\n        source_name: SourceStatistics(\n            source_name=source_name,\n            last_updated=last_updated,\n            entities_count=entities_count,\n        )\n        for source_name, (\n            last_updated,\n            entities_count,\n        ) in get_entity_stats_by_grounded_source_name(db_session=db_session).items()\n    }\n\n    return SourceAndEntityTypeView(\n        source_statistics=source_statistics, entity_types=entity_types\n    )\n\n\n@admin_router.put(\"/entity-types\")\ndef update_kg_entity_types(\n    updates: list[EntityType],\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_entity_types_and_related_connectors__commit(\n        db_session=db_session, updates=updates\n    )\n"
  },
  {
    "path": "backend/onyx/server/kg/models.py",
    "content": "from datetime import datetime\n\nfrom pydantic import ConfigDict\nfrom pydantic.main import BaseModel\n\nfrom onyx.db.models import KGEntityType\nfrom onyx.kg.models import KGConfigSettings\n\n\nclass KGConfig(BaseModel):\n    enabled: bool\n    vendor: str | None\n    vendor_domains: list[str] | None\n    ignore_domains: list[str] | None\n    coverage_start: datetime | None\n\n    @classmethod\n    def from_kg_config_settings(\n        cls,\n        kg_config_settings: KGConfigSettings,\n    ) -> \"KGConfig\":\n        return cls(\n            enabled=kg_config_settings.KG_ENABLED,\n            vendor=kg_config_settings.KG_VENDOR,\n            vendor_domains=kg_config_settings.KG_VENDOR_DOMAINS,\n            ignore_domains=kg_config_settings.KG_IGNORE_EMAIL_DOMAINS,\n            coverage_start=kg_config_settings.KG_COVERAGE_START_DATE,\n        )\n\n\nclass EnableKGConfigRequest(BaseModel):\n    vendor: str\n    vendor_domains: list[str]\n    ignore_domains: list[str] = []\n    coverage_start: datetime\n\n    model_config = ConfigDict(\n        extra=\"forbid\",\n    )\n\n\nclass DisableKGConfigRequest(BaseModel):\n    model_config = ConfigDict(\n        extra=\"forbid\",\n    )\n\n\nclass EntityType(BaseModel):\n    name: str\n    description: str\n    active: bool\n    grounded_source_name: str | None = None\n\n    @classmethod\n    def from_model(\n        cls,\n        model: KGEntityType,\n    ) -> \"EntityType\":\n        return cls(\n            name=model.id_name,\n            description=model.description or \"\",\n            active=model.active,\n            grounded_source_name=model.grounded_source_name,\n        )\n\n\nclass SourceStatistics(BaseModel):\n    source_name: str\n    last_updated: datetime\n    entities_count: int\n\n\nclass SourceAndEntityTypeView(BaseModel):\n    source_statistics: dict[str, SourceStatistics]\n    entity_types: dict[str, list[EntityType]]\n"
  },
  {
    "path": "backend/onyx/server/manage/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/manage/administrative.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import cast\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import KV_GEN_AI_KEY_CHECK_TIME\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_for_user\nfrom onyx.db.connector_credential_pair import (\n    update_connector_credential_pair_from_id,\n)\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.feedback import fetch_docs_ranked_by_boost_for_user\nfrom onyx.db.feedback import update_document_boost_for_user\nfrom onyx.db.feedback import update_document_hidden_for_user\nfrom onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair\nfrom onyx.db.models import User\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.llm.utils import test_llm\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.server.manage.models import BoostDoc\nfrom onyx.server.manage.models import BoostUpdateRequest\nfrom onyx.server.manage.models import HiddenUpdateRequest\nfrom onyx.server.models import StatusResponse\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nrouter = APIRouter(prefix=\"/manage\")\nlogger = setup_logger()\n\n\"\"\"Admin only API endpoints\"\"\"\n\n\n@router.get(\"/admin/doc-boosts\")\ndef get_most_boosted_docs(\n    ascending: bool,\n    limit: int,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[BoostDoc]:\n    boost_docs = fetch_docs_ranked_by_boost_for_user(\n        ascending=ascending,\n        limit=limit,\n        db_session=db_session,\n        user=user,\n    )\n    return [\n        BoostDoc(\n            document_id=doc.id,\n            semantic_id=doc.semantic_id,\n            # source=doc.source,\n            link=doc.link or \"\",\n            boost=doc.boost,\n            hidden=doc.hidden,\n        )\n        for doc in boost_docs\n    ]\n\n\n@router.post(\"/admin/doc-boosts\")\ndef document_boost_update(\n    boost_update: BoostUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    update_document_boost_for_user(\n        db_session=db_session,\n        document_id=boost_update.document_id,\n        boost=boost_update.boost,\n        user=user,\n    )\n    return StatusResponse(success=True, message=\"Updated document boost\")\n\n\n@router.post(\"/admin/doc-hidden\")\ndef document_hidden_update(\n    hidden_update: HiddenUpdateRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StatusResponse:\n    update_document_hidden_for_user(\n        db_session=db_session,\n        document_id=hidden_update.document_id,\n        hidden=hidden_update.hidden,\n        user=user,\n    )\n    return StatusResponse(success=True, message=\"Updated document boost\")\n\n\n@router.get(\"/admin/genai-api-key/validate\")\ndef validate_existing_genai_api_key(\n    _: User = Depends(current_admin_user),\n) -> None:\n    # Only validate every so often\n    kv_store = get_kv_store()\n    curr_time = datetime.now(tz=timezone.utc)\n    try:\n        last_check = datetime.fromtimestamp(\n            cast(float, kv_store.load(KV_GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc\n        )\n        check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ)\n        if curr_time - last_check < check_freq_sec:\n            return\n    except KvKeyNotFoundError:\n        # First time checking the key, nothing unusual\n        pass\n\n    try:\n        llm = get_default_llm(timeout=10)\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"LLM not setup\")\n\n    error = test_llm(llm)\n    if error:\n        raise HTTPException(status_code=400, detail=error)\n\n    # Mark check as successful\n    curr_time = datetime.now(tz=timezone.utc)\n    kv_store.store(KV_GEN_AI_KEY_CHECK_TIME, curr_time.timestamp())\n\n\n@router.post(\"/admin/deletion-attempt\", tags=PUBLIC_API_TAGS)\ndef create_deletion_attempt_for_connector_id(\n    connector_credential_pair_identifier: ConnectorCredentialPairIdentifier,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    tenant_id = get_current_tenant_id()\n\n    connector_id = connector_credential_pair_identifier.connector_id\n    credential_id = connector_credential_pair_identifier.credential_id\n\n    cc_pair = get_connector_credential_pair_for_user(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        user=user,\n        get_editable=True,\n    )\n    if cc_pair is None:\n        error = f\"Connector with ID '{connector_id}' and credential ID '{credential_id}' does not exist. Has it already been deleted?\"\n        logger.error(error)\n        raise HTTPException(\n            status_code=404,\n            detail=error,\n        )\n\n    # Cancel any scheduled indexing attempts\n    cancel_indexing_attempts_for_ccpair(\n        cc_pair_id=cc_pair.id, db_session=db_session, include_secondary_index=True\n    )\n\n    # TODO(rkuo): 2024-10-24 - check_deletion_attempt_is_allowed shouldn't be necessary\n    # any more due to background locking improvements.\n    # Remove the below permanently if everything is behaving for 30 days.\n\n    # Check if the deletion attempt should be allowed\n    # deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed(\n    #     connector_credential_pair=cc_pair, db_session=db_session\n    # )\n    # if deletion_attempt_disallowed_reason:\n    #     raise HTTPException(\n    #         status_code=400,\n    #         detail=deletion_attempt_disallowed_reason,\n    #     )\n\n    # mark as deleting\n    update_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair.id,\n        status=ConnectorCredentialPairStatus.DELETING,\n    )\n\n    db_session.commit()\n\n    # run the beat task to pick up this deletion from the db immediately\n    client_app.send_task(\n        OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,\n        priority=OnyxCeleryPriority.HIGH,\n        kwargs={\"tenant_id\": tenant_id},\n    )\n\n    logger.info(\n        f\"create_deletion_attempt_for_connector_id - running check_for_connector_deletion: cc_pair={cc_pair.id}\"\n    )\n\n    if cc_pair.connector.source == DocumentSource.FILE:\n        connector = cc_pair.connector\n        file_store = get_default_file_store()\n        for file_id in connector.connector_specific_config.get(\"file_locations\", []):\n            file_store.delete_file(file_id)\n"
  },
  {
    "path": "backend/onyx/server/manage/code_interpreter/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/manage/code_interpreter/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.code_interpreter import fetch_code_interpreter_server\nfrom onyx.db.code_interpreter import update_code_interpreter_server_enabled\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.server.manage.code_interpreter.models import CodeInterpreterServer\nfrom onyx.server.manage.code_interpreter.models import CodeInterpreterServerHealth\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    CodeInterpreterClient,\n)\n\nadmin_router = APIRouter(prefix=\"/admin/code-interpreter\")\n\n\n@admin_router.get(\"/health\")\ndef get_code_interpreter_health(\n    _: User = Depends(current_admin_user),\n) -> CodeInterpreterServerHealth:\n    try:\n        client = CodeInterpreterClient()\n        return CodeInterpreterServerHealth(healthy=client.health())\n    except ValueError:\n        return CodeInterpreterServerHealth(healthy=False)\n\n\n@admin_router.get(\"\")\ndef get_code_interpreter(\n    _: User = Depends(current_admin_user), db_session: Session = Depends(get_session)\n) -> CodeInterpreterServer:\n    ci_server = fetch_code_interpreter_server(db_session)\n    return CodeInterpreterServer(enabled=ci_server.server_enabled)\n\n\n@admin_router.put(\"\")\ndef update_code_interpreter(\n    update: CodeInterpreterServer,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_code_interpreter_server_enabled(\n        db_session=db_session,\n        enabled=update.enabled,\n    )\n"
  },
  {
    "path": "backend/onyx/server/manage/code_interpreter/models.py",
    "content": "from pydantic import BaseModel\n\n\nclass CodeInterpreterServer(BaseModel):\n    enabled: bool\n\n\nclass CodeInterpreterServerHealth(BaseModel):\n    healthy: bool\n"
  },
  {
    "path": "backend/onyx/server/manage/discord_bot/api.py",
    "content": "\"\"\"Discord bot admin API endpoints.\"\"\"\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import status\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import DISCORD_BOT_TOKEN\nfrom onyx.configs.constants import AuthType\nfrom onyx.db.discord_bot import create_discord_bot_config\nfrom onyx.db.discord_bot import create_guild_config\nfrom onyx.db.discord_bot import delete_discord_bot_config\nfrom onyx.db.discord_bot import delete_discord_service_api_key\nfrom onyx.db.discord_bot import delete_guild_config\nfrom onyx.db.discord_bot import get_channel_config_by_internal_ids\nfrom onyx.db.discord_bot import get_channel_configs\nfrom onyx.db.discord_bot import get_discord_bot_config\nfrom onyx.db.discord_bot import get_guild_config_by_internal_id\nfrom onyx.db.discord_bot import get_guild_configs\nfrom onyx.db.discord_bot import update_discord_channel_config\nfrom onyx.db.discord_bot import update_guild_config\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.server.manage.discord_bot.models import DiscordBotConfigCreateRequest\nfrom onyx.server.manage.discord_bot.models import DiscordBotConfigResponse\nfrom onyx.server.manage.discord_bot.models import DiscordChannelConfigResponse\nfrom onyx.server.manage.discord_bot.models import DiscordChannelConfigUpdateRequest\nfrom onyx.server.manage.discord_bot.models import DiscordGuildConfigCreateResponse\nfrom onyx.server.manage.discord_bot.models import DiscordGuildConfigResponse\nfrom onyx.server.manage.discord_bot.models import DiscordGuildConfigUpdateRequest\nfrom onyx.server.manage.discord_bot.utils import (\n    generate_discord_registration_key,\n)\nfrom shared_configs.contextvars import get_current_tenant_id\n\nrouter = APIRouter(prefix=\"/manage/admin/discord-bot\")\n\n\ndef _check_bot_config_api_access() -> None:\n    \"\"\"Raise 403 if bot config cannot be managed via API.\n\n    Bot config endpoints are disabled:\n    - On Cloud (managed by Onyx)\n    - When DISCORD_BOT_TOKEN env var is set (managed via env)\n    \"\"\"\n    if AUTH_TYPE == AuthType.CLOUD:\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=\"Discord bot configuration is managed by Onyx on Cloud.\",\n        )\n    if DISCORD_BOT_TOKEN:\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=\"Discord bot is configured via environment variables. API access disabled.\",\n        )\n\n\n# === Bot Config ===\n\n\n@router.get(\"/config\", response_model=DiscordBotConfigResponse)\ndef get_bot_config(\n    _: None = Depends(_check_bot_config_api_access),\n    __: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordBotConfigResponse:\n    \"\"\"Get Discord bot config. Returns 403 on Cloud or if env vars set.\"\"\"\n    config = get_discord_bot_config(db_session)\n    if not config:\n        return DiscordBotConfigResponse(configured=False)\n\n    return DiscordBotConfigResponse(\n        configured=True,\n        created_at=config.created_at,\n    )\n\n\n@router.post(\"/config\", response_model=DiscordBotConfigResponse)\ndef create_bot_request(\n    request: DiscordBotConfigCreateRequest,\n    _: None = Depends(_check_bot_config_api_access),\n    __: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordBotConfigResponse:\n    \"\"\"Create Discord bot config. Returns 403 on Cloud or if env vars set.\"\"\"\n    try:\n        config = create_discord_bot_config(\n            db_session,\n            bot_token=request.bot_token,\n        )\n    except ValueError:\n        raise HTTPException(\n            status_code=status.HTTP_409_CONFLICT,\n            detail=\"Discord bot config already exists. Delete it first to create a new one.\",\n        )\n\n    db_session.commit()\n\n    return DiscordBotConfigResponse(\n        configured=True,\n        created_at=config.created_at,\n    )\n\n\n@router.delete(\"/config\")\ndef delete_bot_config_endpoint(\n    _: None = Depends(_check_bot_config_api_access),\n    __: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict:\n    \"\"\"Delete Discord bot config.\n\n    Also deletes the Discord service API key since the bot is being removed.\n    \"\"\"\n    deleted = delete_discord_bot_config(db_session)\n    if not deleted:\n        raise HTTPException(status_code=404, detail=\"Bot config not found\")\n\n    # Also delete the service API key used by the Discord bot\n    delete_discord_service_api_key(db_session)\n\n    db_session.commit()\n    return {\"deleted\": True}\n\n\n# === Service API Key ===\n\n\n@router.delete(\"/service-api-key\")\ndef delete_service_api_key_endpoint(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict:\n    \"\"\"Delete the Discord service API key.\n\n    This endpoint allows manual deletion of the service API key used by the\n    Discord bot to authenticate with the Onyx API. The key is also automatically\n    deleted when:\n    - Bot config is deleted (self-hosted)\n    - All guild configs are deleted (Cloud)\n    \"\"\"\n    deleted = delete_discord_service_api_key(db_session)\n    if not deleted:\n        raise HTTPException(status_code=404, detail=\"Service API key not found\")\n    db_session.commit()\n    return {\"deleted\": True}\n\n\n# === Guild Config ===\n\n\n@router.get(\"/guilds\", response_model=list[DiscordGuildConfigResponse])\ndef list_guild_configs(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[DiscordGuildConfigResponse]:\n    \"\"\"List all guild configs (pending and registered).\"\"\"\n    configs = get_guild_configs(db_session)\n    return [DiscordGuildConfigResponse.model_validate(c) for c in configs]\n\n\n@router.post(\"/guilds\", response_model=DiscordGuildConfigCreateResponse)\ndef create_guild_request(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordGuildConfigCreateResponse:\n    \"\"\"Create new guild config with registration key. Key shown once.\"\"\"\n    tenant_id = get_current_tenant_id()\n    registration_key = generate_discord_registration_key(tenant_id)\n\n    config = create_guild_config(db_session, registration_key)\n    db_session.commit()\n\n    return DiscordGuildConfigCreateResponse(\n        id=config.id,\n        registration_key=registration_key,  # Shown once!\n    )\n\n\n@router.get(\"/guilds/{config_id}\", response_model=DiscordGuildConfigResponse)\ndef get_guild_config(\n    config_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordGuildConfigResponse:\n    \"\"\"Get specific guild config.\"\"\"\n    config = get_guild_config_by_internal_id(db_session, internal_id=config_id)\n    if not config:\n        raise HTTPException(status_code=404, detail=\"Guild config not found\")\n    return DiscordGuildConfigResponse.model_validate(config)\n\n\n@router.patch(\"/guilds/{config_id}\", response_model=DiscordGuildConfigResponse)\ndef update_guild_request(\n    config_id: int,\n    request: DiscordGuildConfigUpdateRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordGuildConfigResponse:\n    \"\"\"Update guild config.\"\"\"\n    config = get_guild_config_by_internal_id(db_session, internal_id=config_id)\n    if not config:\n        raise HTTPException(status_code=404, detail=\"Guild config not found\")\n\n    config = update_guild_config(\n        db_session,\n        config,\n        enabled=request.enabled,\n        default_persona_id=request.default_persona_id,\n    )\n    db_session.commit()\n\n    return DiscordGuildConfigResponse.model_validate(config)\n\n\n@router.delete(\"/guilds/{config_id}\")\ndef delete_guild_request(\n    config_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict:\n    \"\"\"Delete guild config (invalidates registration key).\n\n    On Cloud, if this was the last guild config, also deletes the service API key.\n    \"\"\"\n    deleted = delete_guild_config(db_session, config_id)\n    if not deleted:\n        raise HTTPException(status_code=404, detail=\"Guild config not found\")\n\n    # On Cloud, delete service API key when all guilds are removed\n    if AUTH_TYPE == AuthType.CLOUD:\n        remaining_guilds = get_guild_configs(db_session)\n        if not remaining_guilds:\n            delete_discord_service_api_key(db_session)\n\n    db_session.commit()\n    return {\"deleted\": True}\n\n\n# === Channel Config ===\n\n\n@router.get(\n    \"/guilds/{config_id}/channels\", response_model=list[DiscordChannelConfigResponse]\n)\ndef list_channel_configs(\n    config_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[DiscordChannelConfigResponse]:\n    \"\"\"List whitelisted channels for a guild.\"\"\"\n    guild_config = get_guild_config_by_internal_id(db_session, internal_id=config_id)\n    if not guild_config:\n        raise HTTPException(status_code=404, detail=\"Guild config not found\")\n    if not guild_config.guild_id:\n        raise HTTPException(status_code=400, detail=\"Guild not yet registered\")\n\n    configs = get_channel_configs(db_session, config_id)\n    return [DiscordChannelConfigResponse.model_validate(c) for c in configs]\n\n\n@router.patch(\n    \"/guilds/{guild_config_id}/channels/{channel_config_id}\",\n    response_model=DiscordChannelConfigResponse,\n)\ndef update_channel_request(\n    guild_config_id: int,\n    channel_config_id: int,\n    request: DiscordChannelConfigUpdateRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> DiscordChannelConfigResponse:\n    \"\"\"Update channel config.\"\"\"\n    config = get_channel_config_by_internal_ids(\n        db_session, guild_config_id, channel_config_id\n    )\n    if not config:\n        raise HTTPException(status_code=404, detail=\"Channel config not found\")\n\n    config = update_discord_channel_config(\n        db_session,\n        config,\n        channel_name=config.channel_name,  # Keep existing name, only Discord can update\n        thread_only_mode=request.thread_only_mode,\n        require_bot_invocation=request.require_bot_invocation,\n        persona_override_id=request.persona_override_id,\n        enabled=request.enabled,\n    )\n    db_session.commit()\n\n    return DiscordChannelConfigResponse.model_validate(config)\n"
  },
  {
    "path": "backend/onyx/server/manage/discord_bot/models.py",
    "content": "\"\"\"Pydantic models for Discord bot API.\"\"\"\n\nfrom datetime import datetime\n\nfrom pydantic import BaseModel\n\n\n# === Bot Config ===\n\n\nclass DiscordBotConfigResponse(BaseModel):\n    configured: bool\n    created_at: datetime | None = None\n\n    class Config:\n        from_attributes = True\n\n\nclass DiscordBotConfigCreateRequest(BaseModel):\n    bot_token: str\n\n\n# === Guild Config ===\n\n\nclass DiscordGuildConfigResponse(BaseModel):\n    id: int\n    guild_id: int | None\n    guild_name: str | None\n    registered_at: datetime | None\n    default_persona_id: int | None\n    enabled: bool\n\n    class Config:\n        from_attributes = True\n\n\nclass DiscordGuildConfigCreateResponse(BaseModel):\n    id: int\n    registration_key: str  # Shown once!\n\n\nclass DiscordGuildConfigUpdateRequest(BaseModel):\n    enabled: bool\n    default_persona_id: int | None\n\n\n# === Channel Config ===\n\n\nclass DiscordChannelConfigResponse(BaseModel):\n    id: int\n    guild_config_id: int\n    channel_id: int\n    channel_name: str\n    channel_type: str\n    is_private: bool\n    require_bot_invocation: bool\n    thread_only_mode: bool\n    persona_override_id: int | None\n    enabled: bool\n\n    class Config:\n        from_attributes = True\n\n\nclass DiscordChannelConfigUpdateRequest(BaseModel):\n    require_bot_invocation: bool\n    persona_override_id: int | None\n    enabled: bool\n    thread_only_mode: bool\n"
  },
  {
    "path": "backend/onyx/server/manage/discord_bot/utils.py",
    "content": "\"\"\"Discord registration key generation and parsing.\"\"\"\n\nimport secrets\nfrom urllib.parse import quote\nfrom urllib.parse import unquote\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nREGISTRATION_KEY_PREFIX: str = \"discord_\"\n\n\ndef generate_discord_registration_key(tenant_id: str) -> str:\n    \"\"\"Generate a one-time registration key with embedded tenant_id.\n\n    Format: discord_<url_encoded_tenant_id>.<random_token>\n\n    Follows the same pattern as API keys for consistency.\n    \"\"\"\n    encoded_tenant = quote(tenant_id)\n    random_token = secrets.token_urlsafe(16)\n\n    logger.info(f\"Generated Discord registration key for tenant {tenant_id}\")\n    return f\"{REGISTRATION_KEY_PREFIX}{encoded_tenant}.{random_token}\"\n\n\ndef parse_discord_registration_key(key: str) -> str | None:\n    \"\"\"Parse registration key to extract tenant_id.\n\n    Returns tenant_id or None if invalid format.\n    \"\"\"\n    if not key.startswith(REGISTRATION_KEY_PREFIX):\n        return None\n\n    try:\n        key_body = key.removeprefix(REGISTRATION_KEY_PREFIX)\n        parts = key_body.split(\".\", 1)\n        if len(parts) != 2:\n            return None\n\n        encoded_tenant = parts[0]\n        tenant_id = unquote(encoded_tenant)\n        return tenant_id\n    except Exception:\n        return None\n"
  },
  {
    "path": "backend/onyx/server/manage/embedding/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.llm import fetch_existing_embedding_providers\nfrom onyx.db.llm import remove_embedding_provider\nfrom onyx.db.llm import upsert_cloud_embedding_provider\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_all_search_settings\nfrom onyx.db.search_settings import get_current_db_embedding_provider\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.indexing.models import EmbeddingModelDetail\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.server.manage.embedding.models import CloudEmbeddingProvider\nfrom onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest\nfrom onyx.server.manage.embedding.models import TestEmbeddingRequest\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\n\n\nlogger = setup_logger()\n\n\nadmin_router = APIRouter(prefix=\"/admin/embedding\")\nbasic_router = APIRouter(prefix=\"/embedding\")\n\n\n@admin_router.post(\"/test-embedding\")\ndef test_embedding_configuration(\n    test_llm_request: TestEmbeddingRequest,\n    _: User = Depends(current_admin_user),\n) -> None:\n    try:\n        test_model = EmbeddingModel(\n            server_host=MODEL_SERVER_HOST,\n            server_port=MODEL_SERVER_PORT,\n            api_key=test_llm_request.api_key,\n            api_url=test_llm_request.api_url,\n            provider_type=test_llm_request.provider_type,\n            model_name=test_llm_request.model_name,\n            api_version=test_llm_request.api_version,\n            deployment_name=test_llm_request.deployment_name,\n            normalize=False,\n            query_prefix=None,\n            passage_prefix=None,\n        )\n        test_model.encode([\"Testing Embedding\"], text_type=EmbedTextType.QUERY)\n\n    except ValueError as e:\n        error_msg = f\"Not a valid embedding model. Exception thrown: {e}\"\n        logger.error(error_msg)\n        raise ValueError(error_msg)\n\n    except Exception as e:\n        error_msg = \"An error occurred while testing your embedding model. Please check your configuration.\"\n        logger.error(f\"{error_msg} Error message: {e}\", exc_info=True)\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)\n\n\n@admin_router.get(\"\", response_model=list[EmbeddingModelDetail])\ndef list_embedding_models(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[EmbeddingModelDetail]:\n    search_settings = get_all_search_settings(db_session)\n    return [EmbeddingModelDetail.from_db_model(setting) for setting in search_settings]\n\n\n@admin_router.get(\"/embedding-provider\")\ndef list_embedding_providers(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[CloudEmbeddingProvider]:\n    return [\n        CloudEmbeddingProvider.from_request(embedding_provider_model)\n        for embedding_provider_model in fetch_existing_embedding_providers(db_session)\n    ]\n\n\n@admin_router.delete(\"/embedding-provider/{provider_type}\")\ndef delete_embedding_provider(\n    provider_type: EmbeddingProvider,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    embedding_provider = get_current_db_embedding_provider(db_session=db_session)\n    if (\n        embedding_provider is not None\n        and provider_type == embedding_provider.provider_type\n    ):\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"You can't delete a currently active model\",\n        )\n\n    remove_embedding_provider(db_session, provider_type=provider_type)\n\n\n@admin_router.put(\"/embedding-provider\")\ndef put_cloud_embedding_provider(\n    provider: CloudEmbeddingProviderCreationRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> CloudEmbeddingProvider:\n    return upsert_cloud_embedding_provider(db_session, provider)\n"
  },
  {
    "path": "backend/onyx/server/manage/embedding/models.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\n\nfrom shared_configs.enums import EmbeddingProvider\n\nif TYPE_CHECKING:\n    from onyx.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel\n\n\nclass SearchSettingsDeleteRequest(BaseModel):\n    search_settings_id: int\n\n\nclass TestEmbeddingRequest(BaseModel):\n    provider_type: EmbeddingProvider\n    api_key: str | None = None\n    api_url: str | None = None\n    model_name: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n\nclass CloudEmbeddingProvider(BaseModel):\n    provider_type: EmbeddingProvider\n    api_key: str | None = None\n    api_url: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n\n    @classmethod\n    def from_request(\n        cls, cloud_provider_model: \"CloudEmbeddingProviderModel\"\n    ) -> \"CloudEmbeddingProvider\":\n        return cls(\n            provider_type=cloud_provider_model.provider_type,\n            api_key=(\n                cloud_provider_model.api_key.get_value(apply_mask=True)\n                if cloud_provider_model.api_key\n                else None\n            ),\n            api_url=cloud_provider_model.api_url,\n            api_version=cloud_provider_model.api_version,\n            deployment_name=cloud_provider_model.deployment_name,\n        )\n\n\nclass CloudEmbeddingProviderCreationRequest(BaseModel):\n    provider_type: EmbeddingProvider\n    api_key: str | None = None\n    api_url: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n"
  },
  {
    "path": "backend/onyx/server/manage/get_state.py",
    "content": "import concurrent.futures\nimport re\n\nimport requests\nfrom fastapi import APIRouter\nfrom fastapi import HTTPException\n\nfrom onyx import __version__\nfrom onyx.auth.users import anonymous_user_enabled\nfrom onyx.auth.users import user_needs_to_be_verified\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import OAUTH_ENABLED\nfrom onyx.configs.app_configs import PASSWORD_MIN_LENGTH\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import DEV_VERSION_PATTERN\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.configs.constants import STABLE_VERSION_PATTERN\nfrom onyx.db.auth import get_user_count\nfrom onyx.server.manage.models import AllVersions\nfrom onyx.server.manage.models import AuthTypeResponse\nfrom onyx.server.manage.models import ContainerVersions\nfrom onyx.server.manage.models import VersionResponse\nfrom onyx.server.models import StatusResponse\n\nrouter = APIRouter()\n\n\n@router.get(\"/health\", tags=PUBLIC_API_TAGS)\nasync def healthcheck() -> StatusResponse:\n    return StatusResponse(success=True, message=\"ok\")\n\n\n@router.get(\"/auth/type\", tags=PUBLIC_API_TAGS)\nasync def get_auth_type() -> AuthTypeResponse:\n    # NOTE: This endpoint is critical for the multi-tenant flow and is hit before there is a tenant context\n    # The reason is this is used during the login flow, but we don't know which tenant the user is supposed to be\n    # associated with until they auth.\n    has_users = True\n    if AUTH_TYPE != AuthType.CLOUD:\n        user_count = await get_user_count()\n        has_users = user_count > 0\n\n    return AuthTypeResponse(\n        auth_type=AUTH_TYPE,\n        requires_verification=user_needs_to_be_verified(),\n        anonymous_user_enabled=anonymous_user_enabled(),\n        password_min_length=PASSWORD_MIN_LENGTH,\n        has_users=has_users,\n        oauth_enabled=OAUTH_ENABLED,\n    )\n\n\n@router.get(\"/version\", tags=PUBLIC_API_TAGS)\ndef get_version() -> VersionResponse:\n    return VersionResponse(backend_version=__version__)\n\n\n@router.get(\"/versions\", tags=PUBLIC_API_TAGS)\ndef get_versions() -> AllVersions:\n    \"\"\"\n    Fetches the latest stable and beta versions of Onyx Docker images.\n    Since DockerHub does not explicitly flag stable and beta images,\n    this endpoint can be used to programmatically check for new images.\n    \"\"\"\n    # Fetch the latest tags from DockerHub for each Onyx component\n    dockerhub_repos = [\n        \"onyxdotapp/onyx-model-server\",\n        \"onyxdotapp/onyx-backend\",\n        \"onyxdotapp/onyx-web-server\",\n    ]\n\n    # For good measure, we fetch 10 pages of tags\n    def get_dockerhub_tags(repo: str, pages: int = 10) -> list[str]:\n        url = f\"https://hub.docker.com/v2/repositories/{repo}/tags\"\n        tags = []\n        for _ in range(pages):\n            response = requests.get(url, timeout=10)\n            response.raise_for_status()\n            data = response.json()\n            tags.extend(\n                [\n                    tag[\"name\"]\n                    for tag in data[\"results\"]\n                    if re.match(r\"^v\\d\", tag[\"name\"])\n                ]\n            )\n            url = data.get(\"next\")\n            if not url:\n                break\n        return tags\n\n    # Get tags for all repos in parallel\n    with concurrent.futures.ThreadPoolExecutor() as executor:\n        all_tags = list(\n            executor.map(lambda repo: set(get_dockerhub_tags(repo)), dockerhub_repos)\n        )\n\n    # Find common tags across all repos\n    common_tags = set.intersection(*all_tags)\n\n    # Filter tags by strict version patterns\n    dev_tags = [tag for tag in common_tags if DEV_VERSION_PATTERN.match(tag)]\n    stable_tags = [tag for tag in common_tags if STABLE_VERSION_PATTERN.match(tag)]\n\n    # Ensure we have at least one tag of each type\n    if not dev_tags:\n        raise HTTPException(\n            status_code=500,\n            detail=\"No valid dev versions found matching pattern v(number).(number).(number)-beta.(number)\",\n        )\n    if not stable_tags:\n        raise HTTPException(\n            status_code=500,\n            detail=\"No valid stable versions found matching pattern v(number).(number).(number)\",\n        )\n\n    # Sort common tags and get the latest one\n    def version_key(version: str) -> tuple[int, int, int, int]:\n        \"\"\"Extract major, minor, patch, beta as integers for sorting\"\"\"\n        # Remove 'v' prefix\n        clean_version = version[1:]\n\n        # Check if it's a beta version\n        if \"-beta.\" in clean_version:\n            # Split on '-beta.' to separate version and beta number\n            base_version, beta_num = clean_version.split(\"-beta.\")\n            parts = base_version.split(\".\")\n            return (int(parts[0]), int(parts[1]), int(parts[2]), int(beta_num))\n        else:\n            # Stable version - no beta number\n            parts = clean_version.split(\".\")\n            return (int(parts[0]), int(parts[1]), int(parts[2]), 0)\n\n    latest_dev_version = sorted(dev_tags, key=version_key, reverse=True)[0]\n    latest_stable_version = sorted(stable_tags, key=version_key, reverse=True)[0]\n\n    return AllVersions(\n        stable=ContainerVersions(\n            onyx=latest_stable_version,\n            relational_db=\"postgres:15.2-alpine\",\n            index=\"vespaengine/vespa:8.277.17\",\n            nginx=\"nginx:1.25.5-alpine\",\n        ),\n        dev=ContainerVersions(\n            onyx=latest_dev_version,\n            relational_db=\"postgres:15.2-alpine\",\n            index=\"vespaengine/vespa:8.277.17\",\n            nginx=\"nginx:1.25.5-alpine\",\n        ),\n        migration=ContainerVersions(\n            onyx=\"airgapped-intfloat-nomic-migration\",\n            relational_db=\"postgres:15.2-alpine\",\n            index=\"vespaengine/vespa:8.277.17\",\n            nginx=\"nginx:1.25.5-alpine\",\n        ),\n    )\n"
  },
  {
    "path": "backend/onyx/server/manage/image_generation/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.image_generation import create_image_generation_config__no_commit\nfrom onyx.db.image_generation import delete_image_generation_config__no_commit\nfrom onyx.db.image_generation import get_all_image_generation_configs\nfrom onyx.db.image_generation import get_image_generation_config\nfrom onyx.db.image_generation import set_default_image_generation_config\nfrom onyx.db.image_generation import unset_default_image_generation_config\nfrom onyx.db.llm import remove_llm_provider__no_commit\nfrom onyx.db.models import LLMProvider as LLMProviderModel\nfrom onyx.db.models import ModelConfiguration\nfrom onyx.db.models import User\nfrom onyx.image_gen.exceptions import ImageProviderCredentialsError\nfrom onyx.image_gen.factory import get_image_generation_provider\nfrom onyx.image_gen.factory import validate_credentials\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.llm.utils import get_max_input_tokens\nfrom onyx.server.manage.image_generation.models import ImageGenerationConfigCreate\nfrom onyx.server.manage.image_generation.models import ImageGenerationConfigUpdate\nfrom onyx.server.manage.image_generation.models import ImageGenerationConfigView\nfrom onyx.server.manage.image_generation.models import ImageGenerationCredentials\nfrom onyx.server.manage.image_generation.models import TestImageGenerationRequest\nfrom onyx.server.manage.llm.api import _validate_llm_provider_change\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/image-generation\")\n\n\ndef _get_test_quality_for_model(model_name: str) -> str | None:\n    \"\"\"Returns the fastest quality setting for credential testing.\n\n    - gpt-image-1: 'low' (fastest)\n    - dall-e-3: 'standard' (faster than 'hd')\n    - Other models: None (use API default)\n    \"\"\"\n    model_lower = model_name.lower()\n\n    if \"gpt-image-1\" in model_lower:\n        return \"low\"\n    elif \"dall-e-3\" in model_lower or \"dalle-3\" in model_lower:\n        return \"standard\"\n    return None\n\n\ndef _build_llm_provider_request(\n    db_session: Session,\n    image_provider_id: str,\n    model_name: str,\n    source_llm_provider_id: int | None,\n    provider: str | None,\n    api_key: str | None,\n    api_base: str | None,\n    api_version: str | None,\n    deployment_name: str | None,\n    custom_config: dict[str, str] | None,\n) -> LLMProviderUpsertRequest:\n    \"\"\"Build LLM provider request for image generation config.\n\n    Supports two modes:\n    1. Clone mode: source_llm_provider_id provided - uses API key from source\n    2. New credentials mode: api_key + provider provided\n\n    \"\"\"\n    if source_llm_provider_id is not None:\n        # Clone mode: Only use API key from source provider\n        source_provider = db_session.get(LLMProviderModel, source_llm_provider_id)\n        if not source_provider:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"Source LLM provider with id {source_llm_provider_id} not found\",\n            )\n\n        _validate_llm_provider_change(\n            existing_api_base=source_provider.api_base,\n            existing_custom_config=source_provider.custom_config,\n            new_api_base=api_base,\n            new_custom_config=custom_config,\n            api_key_changed=False,  # Using stored key from source provider\n        )\n\n        return LLMProviderUpsertRequest(\n            name=f\"Image Gen - {image_provider_id}\",\n            provider=source_provider.provider,\n            api_key=(\n                source_provider.api_key.get_value(apply_mask=False)\n                if source_provider.api_key\n                else None\n            ),  # Only this from source\n            api_base=api_base,  # From request\n            api_version=api_version,  # From request\n            deployment_name=deployment_name,  # From request\n            is_public=True,\n            groups=[],\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=model_name,\n                    is_visible=True,\n                )\n            ],\n            custom_config=custom_config,\n        )\n\n    if not provider:\n        raise HTTPException(\n            status_code=400,\n            detail=\"No provider or source llm provided\",\n        )\n\n    credentials = ImageGenerationProviderCredentials(\n        api_key=api_key,\n        api_base=api_base,\n        api_version=api_version,\n        deployment_name=deployment_name,\n        custom_config=custom_config,\n    )\n\n    if not validate_credentials(provider, credentials):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Incorrect credentials for {provider}\",\n        )\n\n    return LLMProviderUpsertRequest(\n        name=f\"Image Gen - {image_provider_id}\",\n        provider=provider,\n        api_key=api_key,\n        api_base=api_base,\n        api_version=api_version,\n        deployment_name=deployment_name,\n        is_public=True,\n        groups=[],\n        model_configurations=[\n            ModelConfigurationUpsertRequest(\n                name=model_name,\n                is_visible=True,\n            )\n        ],\n        custom_config=custom_config,\n    )\n\n\ndef _create_image_gen_llm_provider__no_commit(\n    db_session: Session,\n    provider_request: LLMProviderUpsertRequest,\n    model_name: str,\n) -> int:\n    \"\"\"Create a new LLM provider for image generation. Returns model_config_id.\n\n    Unlike upsert_llm_provider, this always creates a new provider and never\n    deletes existing model configurations (which would cascade-delete ImageGenerationConfig).\n    \"\"\"\n\n    # Always create a new provider (don't look up by name to avoid upsert behavior)\n    new_provider = LLMProviderModel(\n        name=provider_request.name,\n        provider=provider_request.provider,\n        api_key=provider_request.api_key,\n        api_base=provider_request.api_base,\n        api_version=provider_request.api_version,\n        deployment_name=provider_request.deployment_name,\n        is_public=provider_request.is_public,\n        custom_config=provider_request.custom_config,\n    )\n    db_session.add(new_provider)\n    db_session.flush()  # Get the ID\n\n    # Create model configuration\n    max_input_tokens = get_max_input_tokens(\n        model_name=model_name,\n        model_provider=provider_request.provider,\n    )\n\n    model_config = ModelConfiguration(\n        llm_provider_id=new_provider.id,\n        name=model_name,\n        is_visible=True,\n        max_input_tokens=max_input_tokens,\n    )\n    db_session.add(model_config)\n    db_session.flush()\n\n    return model_config.id\n\n\n@admin_router.post(\"/test\")\ndef test_image_generation(\n    test_request: TestImageGenerationRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Test if an API key is valid for image generation.\n\n    Makes a minimal image generation request to verify credentials using LiteLLM.\n\n    Two modes:\n    1. Direct: api_key + provider provided\n    2. From existing provider: source_llm_provider_id provided (fetches API key from DB)\n    \"\"\"\n    api_key = test_request.api_key\n    provider = test_request.provider\n\n    # Resolve API key and provider\n    if test_request.source_llm_provider_id is not None:\n        # Fetch API key from existing provider\n        source_provider = db_session.get(\n            LLMProviderModel, test_request.source_llm_provider_id\n        )\n        if not source_provider:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"Source LLM provider with id {test_request.source_llm_provider_id} not found\",\n            )\n\n        _validate_llm_provider_change(\n            existing_api_base=source_provider.api_base,\n            existing_custom_config=source_provider.custom_config,\n            new_api_base=test_request.api_base,\n            new_custom_config=test_request.custom_config,\n            api_key_changed=False,  # Using stored key from source provider\n        )\n\n        api_key = (\n            source_provider.api_key.get_value(apply_mask=False)\n            if source_provider.api_key\n            else None\n        )\n        provider = source_provider.provider\n\n    if provider is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"No provider or source llm provided\",\n        )\n\n    try:\n        # Build image provider from credentials\n        # If incorrect credentials are provided, this will raise an exception\n        image_provider = get_image_generation_provider(\n            provider=provider,\n            credentials=ImageGenerationProviderCredentials(\n                api_key=api_key,\n                api_base=test_request.api_base,\n                api_version=test_request.api_version,\n                deployment_name=(\n                    test_request.deployment_name or test_request.model_name\n                ),\n                custom_config=test_request.custom_config,\n            ),\n        )\n    except ValueError:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"Invalid image generation provider: {provider}\",\n        )\n    except ImageProviderCredentialsError:\n        raise HTTPException(\n            status_code=401,\n            detail=\"Invalid image generation credentials\",\n        )\n\n    quality = _get_test_quality_for_model(test_request.model_name)\n    try:\n        image_provider.generate_image(\n            prompt=\"a simple blue circle on white background\",\n            model=test_request.model_name,\n            size=\"1024x1024\",\n            n=1,\n            quality=quality,\n        )\n    except HTTPException:\n        raise\n    except Exception as e:\n        # Log only exception type to avoid exposing sensitive data\n        # (LiteLLM errors may contain URLs with API keys or auth tokens)\n        logger.warning(f\"Image generation test failed: {type(e).__name__}\")\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Image generation test failed: {type(e).__name__}\",\n        )\n\n\n@admin_router.post(\"/config\")\ndef create_config(\n    config_create: ImageGenerationConfigCreate,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ImageGenerationConfigView:\n    \"\"\"Create a new image generation configuration.\n\n    Both modes create a new LLM provider + model config + image config:\n\n    1. Clone mode: source_llm_provider_id provided\n       → Extract api key from existing provider, create new provider\n\n    2. New credentials mode: api_key + provider provided\n       → Create new provider with given credentials\n    \"\"\"\n    # Check if image_provider_id already exists\n    existing_config = get_image_generation_config(\n        db_session, config_create.image_provider_id\n    )\n    if existing_config:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"ImageGenerationConfig with image_provider_id '{config_create.image_provider_id}' already exists\",\n        )\n\n    try:\n        # Build and create LLM provider\n        provider_request = _build_llm_provider_request(\n            db_session=db_session,\n            image_provider_id=config_create.image_provider_id,\n            model_name=config_create.model_name,\n            source_llm_provider_id=config_create.source_llm_provider_id,\n            provider=config_create.provider,\n            api_key=config_create.api_key,\n            api_base=config_create.api_base,\n            api_version=config_create.api_version,\n            deployment_name=config_create.deployment_name,\n            custom_config=config_create.custom_config,\n        )\n\n        model_configuration_id = _create_image_gen_llm_provider__no_commit(\n            db_session=db_session,\n            provider_request=provider_request,\n            model_name=config_create.model_name,\n        )\n\n        # Create the ImageGenerationConfig\n        config = create_image_generation_config__no_commit(\n            db_session=db_session,\n            image_provider_id=config_create.image_provider_id,\n            model_configuration_id=model_configuration_id,\n            is_default=config_create.is_default,\n        )\n        db_session.commit()\n        db_session.refresh(config)\n        return ImageGenerationConfigView.from_model(config)\n    except HTTPException:\n        raise\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@admin_router.get(\"/config\")\ndef get_all_configs(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[ImageGenerationConfigView]:\n    \"\"\"Get all image generation configurations.\"\"\"\n    configs = get_all_image_generation_configs(db_session)\n    return [ImageGenerationConfigView.from_model(config) for config in configs]\n\n\n@admin_router.get(\"/config/{image_provider_id}/credentials\")\ndef get_config_credentials(\n    image_provider_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ImageGenerationCredentials:\n    \"\"\"Get the credentials for an image generation config (for edit mode).\n\n    Returns the unmasked API key and other credential fields.\n    \"\"\"\n    config = get_image_generation_config(db_session, image_provider_id)\n    if not config:\n        raise HTTPException(\n            status_code=404,\n            detail=f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\",\n        )\n\n    return ImageGenerationCredentials.from_model(config)\n\n\n@admin_router.put(\"/config/{image_provider_id}\")\ndef update_config(\n    image_provider_id: str,\n    config_update: ImageGenerationConfigUpdate,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> ImageGenerationConfigView:\n    \"\"\"Update an image generation configuration.\n\n    Flow:\n    1. Get existing config and its LLM provider\n    2. Rename old LLM provider to free up the name (avoids unique constraint)\n    3. Create new LLM provider + model config (same as create flow)\n    4. Update ImageGenerationConfig to point to new model config\n    5. Delete old LLM provider (safe now - nothing references it)\n    \"\"\"\n    try:\n        # 1. Get existing config\n        existing_config = get_image_generation_config(db_session, image_provider_id)\n        if not existing_config:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\",\n            )\n\n        old_llm_provider_id = existing_config.model_configuration.llm_provider_id\n\n        # 2. Rename old LLM provider to free up the name\n        # (Can't delete first due to cascade: LLMProvider -> ModelConfig -> ImageGenConfig)\n        old_provider = db_session.get(LLMProviderModel, old_llm_provider_id)\n        if old_provider:\n            old_provider.name = f\"{old_provider.name}-old-{old_llm_provider_id}\"\n            db_session.flush()\n\n        # Determine actual API key to use:\n        # - Clone mode (source_llm_provider_id): API key comes from source provider\n        # - New credentials mode: Use provided api_key, or preserve existing if not changed\n        actual_api_key = config_update.api_key\n        if config_update.source_llm_provider_id is None and old_provider:\n            # Check if we should preserve existing API key:\n            # - api_key_changed=False AND (key is None/empty OR looks masked)\n            provided_key_is_masked = (\n                config_update.api_key and \"****\" in config_update.api_key\n            )\n            if not config_update.api_key_changed and (\n                not config_update.api_key or provided_key_is_masked\n            ):\n                _validate_llm_provider_change(\n                    existing_api_base=old_provider.api_base,\n                    existing_custom_config=old_provider.custom_config,\n                    new_api_base=config_update.api_base,\n                    new_custom_config=config_update.custom_config,\n                    api_key_changed=False,\n                )\n                # Preserve existing API key when user didn't change it\n                actual_api_key = (\n                    old_provider.api_key.get_value(apply_mask=False)\n                    if old_provider.api_key\n                    else None\n                )\n\n        # 3. Build and create new LLM provider\n        provider_request = _build_llm_provider_request(\n            db_session=db_session,\n            image_provider_id=image_provider_id,\n            model_name=config_update.model_name,\n            source_llm_provider_id=config_update.source_llm_provider_id,\n            provider=config_update.provider,\n            api_key=actual_api_key,\n            api_base=config_update.api_base,\n            api_version=config_update.api_version,\n            deployment_name=config_update.deployment_name,\n            custom_config=config_update.custom_config,\n        )\n\n        new_model_config_id = _create_image_gen_llm_provider__no_commit(\n            db_session=db_session,\n            provider_request=provider_request,\n            model_name=config_update.model_name,\n        )\n\n        # 4. Update the ImageGenerationConfig to point to new model config\n        existing_config.model_configuration_id = new_model_config_id\n\n        # 5. Delete old LLM provider (safe now - nothing references it)\n        remove_llm_provider__no_commit(db_session, old_llm_provider_id)\n\n        db_session.commit()\n        db_session.refresh(existing_config)\n        return ImageGenerationConfigView.from_model(existing_config)\n\n    except HTTPException:\n        raise\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@admin_router.delete(\"/config/{image_provider_id}\")\ndef delete_config(\n    image_provider_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Delete an image generation configuration and its associated LLM provider.\"\"\"\n    try:\n        # Get the config first to find the associated LLM provider\n        existing_config = get_image_generation_config(db_session, image_provider_id)\n        if not existing_config:\n            raise HTTPException(\n                status_code=404,\n                detail=f\"ImageGenerationConfig with image_provider_id {image_provider_id} not found\",\n            )\n\n        llm_provider_id = existing_config.model_configuration.llm_provider_id\n\n        # Delete the image generation config first\n        delete_image_generation_config__no_commit(db_session, image_provider_id)\n\n        # Clean up the orphaned LLM provider (it was exclusively for image gen)\n        remove_llm_provider__no_commit(db_session, llm_provider_id)\n\n        db_session.commit()\n    except HTTPException:\n        raise\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@admin_router.post(\"/config/{image_provider_id}/default\")\ndef set_config_as_default(\n    image_provider_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Set a configuration as the default for image generation.\"\"\"\n    try:\n        set_default_image_generation_config(db_session, image_provider_id)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n\n\n@admin_router.delete(\"/config/{image_provider_id}/default\")\ndef unset_config_as_default(\n    image_provider_id: str,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Unset a configuration as the default for image generation.\"\"\"\n    try:\n        unset_default_image_generation_config(db_session, image_provider_id)\n    except ValueError as e:\n        raise HTTPException(status_code=404, detail=str(e))\n"
  },
  {
    "path": "backend/onyx/server/manage/image_generation/models.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\n\nif TYPE_CHECKING:\n    from onyx.db.models import ImageGenerationConfig as ImageGenerationConfigModel\n\n\ndef _mask_api_key(api_key: str | None) -> str | None:\n    \"\"\"Mask API key, showing first 4 and last 4 characters.\"\"\"\n    if not api_key:\n        return None\n    if len(api_key) <= 8:\n        return \"****\"\n    return api_key[:4] + \"****\" + api_key[-4:]\n\n\nclass TestImageGenerationRequest(BaseModel):\n    \"\"\"Request model for testing image generation API key.\n\n    Two modes:\n    1. Direct API key: Provide api_key + provider\n    2. From existing provider: Provide source_llm_provider_id (backend fetches API key)\n    \"\"\"\n\n    model_name: str  # e.g., \"gpt-image-1\", \"dall-e-3\"\n\n    # Option 1: Direct API key\n    provider: str | None = None  # e.g., \"openai\", \"azure\"\n    api_key: str | None = None\n\n    # Option 2: Use API key from existing provider\n    source_llm_provider_id: int | None = None\n\n    # Additional fields for custom config\n    custom_config: dict[str, str] | None = None\n\n    # Additional fields for Azure\n    api_base: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n\n\nclass ImageGenerationConfigCreate(BaseModel):\n    \"\"\"Request model for creating an image generation config.\n\n    Two creation modes (backend always creates new LLM provider + model config):\n\n    1. Clone mode: Provide source_llm_provider_id + model_name\n       → Backend extracts credentials from existing provider and creates new provider\n\n    2. New credentials mode: Provide api_key + provider + model_name (+ optional fields)\n       → Backend creates new provider with provided credentials\n    \"\"\"\n\n    # Required for both modes\n    image_provider_id: str  # Static unique key (e.g., \"openai_gpt_image_1\")\n    model_name: str  # e.g., \"gpt-image-1\", \"dall-e-3\"\n\n    # Option 1: Clone mode - use credentials from existing provider\n    source_llm_provider_id: int | None = None\n\n    # Option 2: New credentials mode\n    provider: str | None = None  # e.g., \"openai\", \"azure\"\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n    custom_config: dict[str, str] | None = None\n\n    is_default: bool = False\n\n\nclass ImageGenerationConfigUpdate(BaseModel):\n    \"\"\"Request model for updating an image generation config.\n\n    Same modes as create - either clone from existing provider or use new credentials.\n    Backend will delete old LLM provider and create new one.\n    \"\"\"\n\n    # Required\n    model_name: str  # e.g., \"gpt-image-1\", \"dall-e-3\"\n    # Note: image_provider_id cannot be changed during update\n\n    # Option 1: Clone mode - use credentials from existing provider\n    source_llm_provider_id: int | None = None\n\n    # Option 2: New credentials mode\n    provider: str | None = None  # e.g., \"openai\", \"azure\"\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    deployment_name: str | None = None\n    custom_config: dict[str, str] | None = None\n\n    # If False and using new credentials mode, preserve existing API key from DB\n    api_key_changed: bool = False\n\n\nclass ImageGenerationConfigView(BaseModel):\n    \"\"\"Response model for image generation config with related data.\"\"\"\n\n    image_provider_id: str  # Primary key - static unique key for UI-DB mapping\n    model_configuration_id: int\n    model_name: str  # From model_configuration.name\n    llm_provider_id: int  # From model_configuration.llm_provider_id\n    llm_provider_name: str  # From model_configuration.llm_provider.name\n    is_default: bool\n\n    @classmethod\n    def from_model(\n        cls, config: \"ImageGenerationConfigModel\"\n    ) -> \"ImageGenerationConfigView\":\n        \"\"\"Convert database model to view model.\"\"\"\n        return cls(\n            image_provider_id=config.image_provider_id,\n            model_configuration_id=config.model_configuration_id,\n            model_name=config.model_configuration.name,\n            llm_provider_id=config.model_configuration.llm_provider_id,\n            llm_provider_name=config.model_configuration.llm_provider.name,\n            is_default=config.is_default,\n        )\n\n\nclass ImageGenerationCredentials(BaseModel):\n    \"\"\"Response model for image generation config credentials (edit mode).\"\"\"\n\n    api_key: str | None\n    api_base: str | None\n    api_version: str | None\n    deployment_name: str | None\n\n    @classmethod\n    def from_model(\n        cls, config: \"ImageGenerationConfigModel\"\n    ) -> \"ImageGenerationCredentials\":\n        \"\"\"Convert database model to credentials model.\n\n        Note: API key is masked for security - only first 4 and last 4 chars shown.\n        \"\"\"\n        llm_provider = config.model_configuration.llm_provider\n        return cls(\n            api_key=_mask_api_key(\n                llm_provider.api_key.get_value(apply_mask=False)\n                if llm_provider.api_key\n                else None\n            ),\n            api_base=llm_provider.api_base,\n            api_version=llm_provider.api_version,\n            deployment_name=llm_provider.deployment_name,\n        )\n\n\nclass DefaultImageGenerationConfig(BaseModel):\n    \"\"\"Contains all info needed for image generation tool.\"\"\"\n\n    model_configuration_id: int\n    model_name: str  # From model_configuration.name\n    provider: str  # e.g., \"openai\", \"azure\" - from llm_provider.provider\n    api_key: str | None\n    api_base: str | None\n    api_version: str | None\n    deployment_name: str | None\n\n    @classmethod\n    def from_model(\n        cls, config: \"ImageGenerationConfigModel\"\n    ) -> \"DefaultImageGenerationConfig\":\n        \"\"\"Convert database model to default config model.\"\"\"\n        llm_provider = config.model_configuration.llm_provider\n        return cls(\n            model_configuration_id=config.model_configuration_id,\n            model_name=config.model_configuration.name,\n            provider=llm_provider.provider,\n            api_key=(\n                llm_provider.api_key.get_value(apply_mask=False)\n                if llm_provider.api_key\n                else None\n            ),\n            api_base=llm_provider.api_base,\n            api_version=llm_provider.api_version,\n            deployment_name=llm_provider.deployment_name,\n        )\n"
  },
  {
    "path": "backend/onyx/server/manage/llm/api.py",
    "content": "import os\nfrom collections import defaultdict\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport boto3\nimport httpx\nfrom botocore.exceptions import BotoCoreError\nfrom botocore.exceptions import ClientError\nfrom botocore.exceptions import NoCredentialsError\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import Query\nfrom pydantic import ValidationError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.llm import can_user_access_llm_provider\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.db.llm import fetch_default_vision_model\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import fetch_existing_llm_provider_by_id\nfrom onyx.db.llm import fetch_existing_llm_providers\nfrom onyx.db.llm import fetch_existing_models\nfrom onyx.db.llm import fetch_persona_with_groups\nfrom onyx.db.llm import fetch_user_group_ids\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import sync_model_configurations\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import update_default_vision_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.llm import validate_persona_ids_exist\nfrom onyx.db.models import User\nfrom onyx.db.persona import user_can_access_persona\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.llm.factory import get_llm\nfrom onyx.llm.factory import get_max_input_tokens_from_llm_provider\nfrom onyx.llm.utils import get_bedrock_token_limit\nfrom onyx.llm.utils import get_llm_contextual_cost\nfrom onyx.llm.utils import test_llm\nfrom onyx.llm.well_known_providers.auto_update_service import (\n    fetch_llm_recommendations_from_github,\n)\nfrom onyx.llm.well_known_providers.constants import LM_STUDIO_API_KEY_CONFIG_KEY\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    fetch_available_well_known_llms,\n)\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    WellKnownLLMProviderDescriptor,\n)\nfrom onyx.server.manage.llm.models import BedrockFinalModelResponse\nfrom onyx.server.manage.llm.models import BedrockModelsRequest\nfrom onyx.server.manage.llm.models import BifrostFinalModelResponse\nfrom onyx.server.manage.llm.models import BifrostModelsRequest\nfrom onyx.server.manage.llm.models import DefaultModel\nfrom onyx.server.manage.llm.models import LitellmFinalModelResponse\nfrom onyx.server.manage.llm.models import LitellmModelDetails\nfrom onyx.server.manage.llm.models import LitellmModelsRequest\nfrom onyx.server.manage.llm.models import LLMCost\nfrom onyx.server.manage.llm.models import LLMProviderDescriptor\nfrom onyx.server.manage.llm.models import LLMProviderResponse\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import LMStudioFinalModelResponse\nfrom onyx.server.manage.llm.models import LMStudioModelsRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.manage.llm.models import OllamaFinalModelResponse\nfrom onyx.server.manage.llm.models import OllamaModelDetails\nfrom onyx.server.manage.llm.models import OllamaModelsRequest\nfrom onyx.server.manage.llm.models import OpenRouterFinalModelResponse\nfrom onyx.server.manage.llm.models import OpenRouterModelDetails\nfrom onyx.server.manage.llm.models import OpenRouterModelsRequest\nfrom onyx.server.manage.llm.models import SyncModelEntry\nfrom onyx.server.manage.llm.models import TestLLMRequest\nfrom onyx.server.manage.llm.models import VisionProviderResponse\nfrom onyx.server.manage.llm.utils import generate_bedrock_display_name\nfrom onyx.server.manage.llm.utils import generate_ollama_display_name\nfrom onyx.server.manage.llm.utils import infer_vision_support\nfrom onyx.server.manage.llm.utils import is_embedding_model\nfrom onyx.server.manage.llm.utils import is_reasoning_model\nfrom onyx.server.manage.llm.utils import is_valid_bedrock_model\nfrom onyx.server.manage.llm.utils import ModelMetadata\nfrom onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix\nfrom onyx.utils.encryption import mask_string as mask_with_ellipsis\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/llm\")\nbasic_router = APIRouter(prefix=\"/llm\")\n\n\ndef _mask_string(value: str) -> str:\n    \"\"\"Mask a string, showing first 4 and last 4 characters.\"\"\"\n    if len(value) <= 8:\n        return \"****\"\n    return value[:4] + \"****\" + value[-4:]\n\n\ndef _sync_fetched_models(\n    db_session: Session,\n    provider_name: str,\n    models: list[SyncModelEntry],\n    source_label: str,\n) -> None:\n    \"\"\"Sync fetched models to DB for the given provider.\n\n    Args:\n        db_session: Database session\n        provider_name: Name of the LLM provider\n        models: List of SyncModelEntry objects describing the fetched models\n        source_label: Human-readable label for log messages (e.g. \"Bedrock\", \"LiteLLM\")\n    \"\"\"\n    try:\n        new_count = sync_model_configurations(\n            db_session=db_session,\n            provider_name=provider_name,\n            models=models,\n        )\n        if new_count > 0:\n            logger.info(\n                f\"Added {new_count} new {source_label} models to provider '{provider_name}'\"\n            )\n    except ValueError as e:\n        logger.warning(f\"Failed to sync {source_label} models to DB: {e}\")\n\n\n# Keys in custom_config that contain sensitive credentials\n_SENSITIVE_CONFIG_KEYS = {\n    \"vertex_credentials\",\n    \"aws_secret_access_key\",\n    \"aws_access_key_id\",\n    \"aws_bearer_token_bedrock\",\n    \"private_key\",\n    \"api_key\",\n    \"secret\",\n    \"password\",\n    \"token\",\n    \"credential\",\n}\n\n\ndef _mask_provider_credentials(provider_view: LLMProviderView) -> None:\n    \"\"\"Mask sensitive credentials in provider view including api_key and custom_config.\"\"\"\n    # Mask the API key\n    if provider_view.api_key:\n        provider_view.api_key = _mask_string(provider_view.api_key)\n\n    # Mask sensitive values in custom_config\n    if provider_view.custom_config:\n        masked_config: dict[str, Any] = {}\n        for key, value in provider_view.custom_config.items():\n            # Check if key matches any sensitive pattern (case-insensitive)\n            key_lower = key.lower()\n            is_sensitive = any(\n                sensitive_key in key_lower for sensitive_key in _SENSITIVE_CONFIG_KEYS\n            )\n            if is_sensitive and isinstance(value, str) and value:\n                masked_config[key] = _mask_string(value)\n            else:\n                masked_config[key] = value\n        provider_view.custom_config = masked_config\n\n\ndef _is_sensitive_custom_config_key(key: str) -> bool:\n    key_lower = key.lower()\n    return any(sensitive_key in key_lower for sensitive_key in _SENSITIVE_CONFIG_KEYS)\n\n\ndef _is_masked_value_for_existing(\n    incoming_value: str, existing_value: str, key: str\n) -> bool:\n    \"\"\"Return True when incoming_value is a masked round-trip of existing_value.\"\"\"\n    if not _is_sensitive_custom_config_key(key):\n        return False\n\n    masked_candidates = {\n        _mask_string(existing_value),\n        mask_with_ellipsis(existing_value),\n        \"****\",\n        \"••••••••••••\",\n        \"***REDACTED***\",\n    }\n    return incoming_value in masked_candidates\n\n\ndef _restore_masked_custom_config_values(\n    existing_custom_config: dict[str, str] | None,\n    new_custom_config: dict[str, str] | None,\n) -> dict[str, str] | None:\n    \"\"\"Restore sensitive custom config values when clients send masked placeholders.\"\"\"\n    if not existing_custom_config or not new_custom_config:\n        return new_custom_config\n\n    restored_config = dict(new_custom_config)\n\n    for key, incoming_value in restored_config.items():\n        existing_value = existing_custom_config.get(key)\n        if not isinstance(incoming_value, str) or not isinstance(existing_value, str):\n            continue\n        if _is_masked_value_for_existing(incoming_value, existing_value, key):\n            restored_config[key] = existing_value\n\n    return restored_config\n\n\ndef _validate_llm_provider_change(\n    existing_api_base: str | None,\n    existing_custom_config: dict[str, str] | None,\n    new_api_base: str | None,\n    new_custom_config: dict[str, str] | None,\n    api_key_changed: bool,\n) -> None:\n    \"\"\"Validate that api_base and custom_config changes are safe.\n\n    When using a stored API key (api_key_changed=False), we must ensure api_base and\n    custom_config match the stored values.\n\n    Only enforced in MULTI_TENANT mode.\n\n    Raises:\n        OnyxError: If api_base or custom_config changed without changing API key\n    \"\"\"\n    if not MULTI_TENANT or api_key_changed:\n        return\n\n    normalized_existing_api_base = existing_api_base or None\n    normalized_new_api_base = new_api_base or None\n\n    api_base_changed = normalized_new_api_base != normalized_existing_api_base\n    custom_config_changed = (\n        new_custom_config and new_custom_config != existing_custom_config\n    )\n\n    if api_base_changed or custom_config_changed:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"API base and/or custom config cannot be changed without changing the API key\",\n        )\n\n\n@admin_router.get(\"/built-in/options\")\ndef fetch_llm_options(\n    _: User = Depends(current_admin_user),\n) -> list[WellKnownLLMProviderDescriptor]:\n    return fetch_available_well_known_llms()\n\n\n@admin_router.get(\"/built-in/options/{provider_name}\")\ndef fetch_llm_provider_options(\n    provider_name: str,\n    _: User = Depends(current_admin_user),\n) -> WellKnownLLMProviderDescriptor:\n    well_known_llms = fetch_available_well_known_llms()\n    for well_known_llm in well_known_llms:\n        if well_known_llm.name == provider_name:\n            return well_known_llm\n    raise OnyxError(OnyxErrorCode.NOT_FOUND, f\"Provider {provider_name} not found\")\n\n\n@admin_router.post(\"/test\")\ndef test_llm_configuration(\n    test_llm_request: TestLLMRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Test LLM configuration settings\"\"\"\n\n    # the api key is sanitized if we are testing a provider already in the system\n\n    test_api_key = test_llm_request.api_key\n    test_custom_config = test_llm_request.custom_config\n    if test_llm_request.id:\n        existing_provider = fetch_existing_llm_provider_by_id(\n            id=test_llm_request.id, db_session=db_session\n        )\n        if existing_provider:\n            test_custom_config = _restore_masked_custom_config_values(\n                existing_custom_config=existing_provider.custom_config,\n                new_custom_config=test_custom_config,\n            )\n        # if an API key is not provided, use the existing provider's API key\n        if existing_provider and not test_llm_request.api_key_changed:\n            _validate_llm_provider_change(\n                existing_api_base=existing_provider.api_base,\n                existing_custom_config=existing_provider.custom_config,\n                new_api_base=test_llm_request.api_base,\n                new_custom_config=test_custom_config,\n                api_key_changed=False,\n            )\n            test_api_key = (\n                existing_provider.api_key.get_value(apply_mask=False)\n                if existing_provider.api_key\n                else None\n            )\n        if existing_provider and not test_llm_request.custom_config_changed:\n            test_custom_config = existing_provider.custom_config\n\n    # For this \"testing\" workflow, we do *not* need the actual `max_input_tokens`.\n    # Therefore, instead of performing additional, more complex logic, we just use a dummy value\n    max_input_tokens = -1\n\n    llm = get_llm(\n        provider=test_llm_request.provider,\n        model=test_llm_request.model,\n        api_key=test_api_key,\n        api_base=test_llm_request.api_base,\n        api_version=test_llm_request.api_version,\n        custom_config=test_custom_config,\n        deployment_name=test_llm_request.deployment_name,\n        max_input_tokens=max_input_tokens,\n    )\n\n    error_msg = test_llm(llm)\n\n    if error_msg:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)\n\n\n@admin_router.post(\"/test/default\")\ndef test_default_provider(\n    _: User = Depends(current_admin_user),\n) -> None:\n    try:\n        llm = get_default_llm()\n    except ValueError:\n        logger.exception(\"Failed to fetch default LLM Provider\")\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, \"No LLM Provider setup\")\n\n    error = test_llm(llm)\n    if error:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(error))\n\n\n@admin_router.get(\"/provider\")\ndef list_llm_providers(\n    include_image_gen: bool = Query(False),\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LLMProviderResponse[LLMProviderView]:\n    start_time = datetime.now(timezone.utc)\n    logger.debug(\"Starting to fetch LLM providers\")\n\n    llm_provider_list: list[LLMProviderView] = []\n    for llm_provider_model in fetch_existing_llm_providers(\n        db_session=db_session,\n        flow_type_filter=[],\n        exclude_image_generation_providers=not include_image_gen,\n    ):\n        from_model_start = datetime.now(timezone.utc)\n        full_llm_provider = LLMProviderView.from_model(llm_provider_model)\n        from_model_end = datetime.now(timezone.utc)\n        from_model_duration = (from_model_end - from_model_start).total_seconds()\n        logger.debug(\n            f\"LLMProviderView.from_model took {from_model_duration:.2f} seconds\"\n        )\n\n        _mask_provider_credentials(full_llm_provider)\n        llm_provider_list.append(full_llm_provider)\n\n    end_time = datetime.now(timezone.utc)\n    duration = (end_time - start_time).total_seconds()\n    logger.debug(f\"Completed fetching LLM providers in {duration:.2f} seconds\")\n\n    return LLMProviderResponse[LLMProviderView].from_models(\n        providers=llm_provider_list,\n        default_text=DefaultModel.from_model_config(\n            fetch_default_llm_model(db_session)\n        ),\n        default_vision=DefaultModel.from_model_config(\n            fetch_default_vision_model(db_session)\n        ),\n    )\n\n\n@admin_router.put(\"/provider\")\ndef put_llm_provider(\n    llm_provider_upsert_request: LLMProviderUpsertRequest,\n    is_creation: bool = Query(\n        False,\n        description=\"True if creating a new one, False if updating an existing provider\",\n    ),\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LLMProviderView:\n    # validate request (e.g. if we're intending to create but the name already exists we should throw an error)\n    # NOTE: may involve duplicate fetching to Postgres, but we're assuming SQLAlchemy is smart enough to cache\n    # the result\n    existing_provider = None\n    if llm_provider_upsert_request.id:\n        existing_provider = fetch_existing_llm_provider_by_id(\n            id=llm_provider_upsert_request.id, db_session=db_session\n        )\n\n    # Check name constraints\n    # TODO: Once port from name to id is complete, unique name will no longer be required\n    if existing_provider and llm_provider_upsert_request.name != existing_provider.name:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"Renaming providers is not currently supported\",\n        )\n\n    found_provider = fetch_existing_llm_provider(\n        name=llm_provider_upsert_request.name, db_session=db_session\n    )\n    if found_provider is not None and found_provider is not existing_provider:\n        raise OnyxError(\n            OnyxErrorCode.DUPLICATE_RESOURCE,\n            f\"Provider with name={llm_provider_upsert_request.name} already exists\",\n        )\n\n    if existing_provider and is_creation:\n        raise OnyxError(\n            OnyxErrorCode.DUPLICATE_RESOURCE,\n            f\"LLM Provider with name {llm_provider_upsert_request.name} and id={llm_provider_upsert_request.id} already exists\",\n        )\n    elif not existing_provider and not is_creation:\n        raise OnyxError(\n            OnyxErrorCode.NOT_FOUND,\n            f\"LLM Provider with name {llm_provider_upsert_request.name} and id={llm_provider_upsert_request.id} does not exist\",\n        )\n\n    # SSRF Protection: Validate api_base and custom_config match stored values\n    if existing_provider:\n        llm_provider_upsert_request.custom_config = (\n            _restore_masked_custom_config_values(\n                existing_custom_config=existing_provider.custom_config,\n                new_custom_config=llm_provider_upsert_request.custom_config,\n            )\n        )\n        _validate_llm_provider_change(\n            existing_api_base=existing_provider.api_base,\n            existing_custom_config=existing_provider.custom_config,\n            new_api_base=llm_provider_upsert_request.api_base,\n            new_custom_config=llm_provider_upsert_request.custom_config,\n            api_key_changed=llm_provider_upsert_request.api_key_changed,\n        )\n\n    persona_ids = llm_provider_upsert_request.personas\n    if persona_ids:\n        _fetched_persona_ids, missing_personas = validate_persona_ids_exist(\n            db_session, persona_ids\n        )\n        if missing_personas:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                f\"Invalid persona IDs: {', '.join(map(str, missing_personas))}\",\n            )\n        # Remove duplicates while preserving order\n        seen: set[int] = set()\n        deduplicated_personas: list[int] = []\n        for persona_id in persona_ids:\n            if persona_id not in seen:\n                seen.add(persona_id)\n                deduplicated_personas.append(persona_id)\n        llm_provider_upsert_request.personas = deduplicated_personas\n\n    # the llm api key is sanitized when returned to clients, so the only time we\n    # should get a real key is when it is explicitly changed\n    if existing_provider and not llm_provider_upsert_request.api_key_changed:\n        llm_provider_upsert_request.api_key = (\n            existing_provider.api_key.get_value(apply_mask=False)\n            if existing_provider.api_key\n            else None\n        )\n    if existing_provider and not llm_provider_upsert_request.custom_config_changed:\n        llm_provider_upsert_request.custom_config = existing_provider.custom_config\n\n    # Check if we're transitioning to Auto mode\n    transitioning_to_auto_mode = llm_provider_upsert_request.is_auto_mode and (\n        not existing_provider or not existing_provider.is_auto_mode\n    )\n\n    # When transitioning to auto mode, preserve existing model configurations\n    # so the upsert doesn't try to delete them (which would trip the default\n    # model protection guard). sync_auto_mode_models will handle the model\n    # lifecycle afterward — adding new models, hiding removed ones, and\n    # updating the default. This is safe even if sync fails: the provider\n    # keeps its old models and default rather than losing them.\n    if transitioning_to_auto_mode and existing_provider:\n        llm_provider_upsert_request.model_configurations = [\n            ModelConfigurationUpsertRequest.from_model(mc)\n            for mc in existing_provider.model_configurations\n        ]\n\n    try:\n        result = upsert_llm_provider(\n            llm_provider_upsert_request=llm_provider_upsert_request,\n            db_session=db_session,\n        )\n\n        # If newly enabling Auto mode, sync models immediately from GitHub config\n        if transitioning_to_auto_mode:\n            from onyx.db.llm import sync_auto_mode_models\n\n            config = fetch_llm_recommendations_from_github()\n            if config and llm_provider_upsert_request.provider in config.providers:\n                updated_provider = fetch_existing_llm_provider_by_id(\n                    id=result.id, db_session=db_session\n                )\n                if updated_provider:\n                    sync_auto_mode_models(\n                        db_session,\n                        updated_provider,\n                        config,\n                    )\n                    # Refresh result with synced models\n                    result = LLMProviderView.from_model(updated_provider)\n\n        _mask_provider_credentials(result)\n        return result\n    except ValueError as e:\n        logger.exception(\"Failed to upsert LLM Provider\")\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))\n\n\n@admin_router.delete(\"/provider/{provider_id}\")\ndef delete_llm_provider(\n    provider_id: int,\n    force: bool = Query(False),\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    if not force:\n        model = fetch_default_llm_model(db_session)\n\n        if model and model.llm_provider_id == provider_id:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                \"Cannot delete the default LLM provider\",\n            )\n\n    try:\n        remove_llm_provider(db_session, provider_id)\n    except ValueError as e:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, str(e))\n\n\n@admin_router.post(\"/default\")\ndef set_provider_as_default(\n    default_model_request: DefaultModel,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_default_provider(\n        provider_id=default_model_request.provider_id,\n        model_name=default_model_request.model_name,\n        db_session=db_session,\n    )\n\n\n@admin_router.post(\"/default-vision\")\ndef set_provider_as_default_vision(\n    default_model: DefaultModel,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_default_vision_provider(\n        provider_id=default_model.provider_id,\n        vision_model=default_model.model_name,\n        db_session=db_session,\n    )\n\n\n@admin_router.get(\"/auto-config\")\ndef get_auto_config(\n    _: User = Depends(current_admin_user),\n) -> dict:\n    \"\"\"Get the current Auto mode configuration from GitHub.\n\n    Returns the available models and default configurations for each\n    supported provider type when using Auto mode.\n    \"\"\"\n    config = fetch_llm_recommendations_from_github()\n    if not config:\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Failed to fetch configuration from GitHub\",\n        )\n    return config.model_dump()\n\n\n@admin_router.get(\"/vision-providers\")\ndef get_vision_capable_providers(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> LLMProviderResponse[VisionProviderResponse]:\n    \"\"\"Return a list of LLM providers and their models that support image input\"\"\"\n    vision_models = fetch_existing_models(\n        db_session=db_session, flow_types=[LLMModelFlowType.VISION]\n    )\n\n    # Group vision models by provider ID (using ID as key since it's hashable)\n    provider_models: dict[int, list[str]] = defaultdict(list)\n    providers_by_id: dict[int, LLMProviderView] = {}\n\n    for vision_model in vision_models:\n        provider_id = vision_model.llm_provider.id\n        provider_models[provider_id].append(vision_model.name)\n        # Only create the view once per provider\n        if provider_id not in providers_by_id:\n            provider_view = LLMProviderView.from_model(vision_model.llm_provider)\n            _mask_provider_credentials(provider_view)\n            providers_by_id[provider_id] = provider_view\n\n    # Build response list\n    vision_provider_response = [\n        VisionProviderResponse(\n            **providers_by_id[provider_id].model_dump(),\n            vision_models=model_names,\n        )\n        for provider_id, model_names in provider_models.items()\n    ]\n\n    logger.debug(f\"Found {len(vision_provider_response)} vision-capable providers\")\n\n    return LLMProviderResponse[VisionProviderResponse].from_models(\n        providers=vision_provider_response,\n        default_vision=DefaultModel.from_model_config(\n            fetch_default_vision_model(db_session)\n        ),\n    )\n\n\n\"\"\"Endpoints for all\"\"\"\n\n\n@basic_router.get(\"/provider\")\ndef list_llm_provider_basics(\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> LLMProviderResponse[LLMProviderDescriptor]:\n    \"\"\"Get LLM providers accessible to the current user.\n\n    Returns:\n    - All public providers (is_public=True) - Always included\n    - Restricted providers user can access via their group memberships\n\n    For anonymous users or no_auth mode: returns only public providers\n    This ensures backward compatibility while providing better UX for authenticated users.\n    \"\"\"\n    start_time = datetime.now(timezone.utc)\n    logger.debug(\"Starting to fetch user-accessible LLM providers\")\n\n    all_providers = fetch_existing_llm_providers(db_session, [])\n    user_group_ids = fetch_user_group_ids(db_session, user)\n    is_admin = user.role == UserRole.ADMIN\n\n    accessible_providers = []\n\n    for provider in all_providers:\n        # Use centralized access control logic with persona=None since we're\n        # listing providers without a specific persona context. This correctly:\n        # - Includes public providers WITHOUT persona restrictions\n        # - Includes providers user can access via group membership\n        # - Excludes providers with persona restrictions (requires specific persona)\n        # - Excludes non-public providers with no restrictions (admin-only)\n        if can_user_access_llm_provider(\n            provider, user_group_ids, persona=None, is_admin=is_admin\n        ):\n            accessible_providers.append(LLMProviderDescriptor.from_model(provider))\n\n    end_time = datetime.now(timezone.utc)\n    duration = (end_time - start_time).total_seconds()\n    logger.debug(\n        f\"Completed fetching {len(accessible_providers)} user-accessible providers in {duration:.2f} seconds\"\n    )\n\n    return LLMProviderResponse[LLMProviderDescriptor].from_models(\n        providers=accessible_providers,\n        default_text=DefaultModel.from_model_config(\n            fetch_default_llm_model(db_session)\n        ),\n        default_vision=DefaultModel.from_model_config(\n            fetch_default_vision_model(db_session)\n        ),\n    )\n\n\ndef get_valid_model_names_for_persona(\n    persona_id: int,\n    user: User,\n    db_session: Session,\n) -> list[str]:\n    \"\"\"Get all valid model names that a user can access for this persona.\n\n    Returns a list of model names (e.g., [\"gpt-4o\", \"claude-3-5-sonnet\"]) that are\n    available to the user when using this persona, respecting all RBAC restrictions.\n    Public providers are included unless they have persona restrictions that exclude this persona.\n    \"\"\"\n    persona = fetch_persona_with_groups(db_session, persona_id)\n    if not persona:\n        return []\n\n    is_admin = user.role == UserRole.ADMIN\n    all_providers = fetch_existing_llm_providers(\n        db_session, [LLMModelFlowType.CHAT, LLMModelFlowType.VISION]\n    )\n    user_group_ids = set() if is_admin else fetch_user_group_ids(db_session, user)\n\n    valid_models = []\n    for llm_provider_model in all_providers:\n        # Check access with persona context — respects all RBAC restrictions\n        if can_user_access_llm_provider(\n            llm_provider_model, user_group_ids, persona, is_admin=is_admin\n        ):\n            # Collect all model names from this provider\n            for model_config in llm_provider_model.model_configurations:\n                if model_config.is_visible:\n                    valid_models.append(model_config.name)\n\n    return valid_models\n\n\n@basic_router.get(\"/persona/{persona_id}/providers\")\ndef list_llm_providers_for_persona(\n    persona_id: int,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> LLMProviderResponse[LLMProviderDescriptor]:\n    \"\"\"Get LLM providers for a specific persona.\n\n    Returns providers that the user can access when using this persona:\n    - Public providers (respecting persona restrictions if set)\n    - Restricted providers user can access via group/persona restrictions\n\n    This endpoint is used for background fetching of restricted providers\n    and should NOT block the UI.\n    \"\"\"\n    start_time = datetime.now(timezone.utc)\n    logger.debug(f\"Starting to fetch LLM providers for persona {persona_id}\")\n\n    persona = fetch_persona_with_groups(db_session, persona_id)\n    if not persona:\n        raise OnyxError(OnyxErrorCode.PERSONA_NOT_FOUND, \"Persona not found\")\n\n    # Verify user has access to this persona\n    if not user_can_access_persona(db_session, persona_id, user, get_editable=False):\n        raise OnyxError(\n            OnyxErrorCode.INSUFFICIENT_PERMISSIONS,\n            \"You don't have access to this assistant\",\n        )\n\n    is_admin = user.role == UserRole.ADMIN\n    all_providers = fetch_existing_llm_providers(\n        db_session, [LLMModelFlowType.CHAT, LLMModelFlowType.VISION]\n    )\n    user_group_ids = set() if is_admin else fetch_user_group_ids(db_session, user)\n\n    llm_provider_list: list[LLMProviderDescriptor] = []\n\n    for llm_provider_model in all_providers:\n        # Check access with persona context — respects persona restrictions\n        if can_user_access_llm_provider(\n            llm_provider_model, user_group_ids, persona, is_admin=is_admin\n        ):\n            llm_provider_list.append(\n                LLMProviderDescriptor.from_model(llm_provider_model)\n            )\n\n    end_time = datetime.now(timezone.utc)\n    duration = (end_time - start_time).total_seconds()\n    logger.debug(\n        f\"Completed fetching {len(llm_provider_list)} LLM providers for persona {persona_id} in {duration:.2f} seconds\"\n    )\n\n    # Get the default model and vision model for the persona\n    # TODO: Port persona's over to use ID\n    persona_default_provider = persona.llm_model_provider_override\n    persona_default_model = persona.llm_model_version_override\n\n    default_text_model = fetch_default_llm_model(db_session)\n    default_vision_model = fetch_default_vision_model(db_session)\n\n    # Build default_text and default_vision using persona overrides when available,\n    # falling back to the global defaults.\n    default_text = DefaultModel.from_model_config(default_text_model)\n    default_vision = DefaultModel.from_model_config(default_vision_model)\n\n    if persona_default_provider:\n        provider = fetch_existing_llm_provider(persona_default_provider, db_session)\n        if provider and can_user_access_llm_provider(\n            provider, user_group_ids, persona, is_admin=is_admin\n        ):\n            if persona_default_model:\n                # Persona specifies both provider and model — use them directly\n                default_text = DefaultModel(\n                    provider_id=provider.id,\n                    model_name=persona_default_model,\n                )\n            else:\n                # Persona specifies only the provider — pick a visible (public) model,\n                # falling back to any model on this provider\n                visible_model = next(\n                    (mc for mc in provider.model_configurations if mc.is_visible),\n                    None,\n                )\n                fallback_model = visible_model or next(\n                    iter(provider.model_configurations), None\n                )\n                if fallback_model:\n                    default_text = DefaultModel(\n                        provider_id=provider.id,\n                        model_name=fallback_model.name,\n                    )\n\n    return LLMProviderResponse[LLMProviderDescriptor].from_models(\n        providers=llm_provider_list,\n        default_text=default_text,\n        default_vision=default_vision,\n    )\n\n\n@admin_router.get(\"/provider-contextual-cost\")\ndef get_provider_contextual_cost(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[LLMCost]:\n    \"\"\"\n    Get the cost of Re-indexing all documents for contextual retrieval.\n\n    See https://docs.litellm.ai/docs/completion/token_usage#5-cost_per_token\n    This includes:\n    - The cost of invoking the LLM on each chunk-document pair to get\n      - the doc_summary\n      - the chunk_context\n    - The per-token cost of the LLM used to generate the doc_summary and chunk_context\n    \"\"\"\n    providers = fetch_existing_llm_providers(db_session, [LLMModelFlowType.CHAT])\n    costs = []\n    for provider in providers:\n        for model_configuration in provider.model_configurations:\n            llm_provider = LLMProviderView.from_model(provider)\n            llm = get_llm(\n                provider=provider.provider,\n                model=model_configuration.name,\n                deployment_name=provider.deployment_name,\n                api_key=(\n                    provider.api_key.get_value(apply_mask=False)\n                    if provider.api_key\n                    else None\n                ),\n                api_base=provider.api_base,\n                api_version=provider.api_version,\n                custom_config=provider.custom_config,\n                max_input_tokens=get_max_input_tokens_from_llm_provider(\n                    llm_provider=llm_provider, model_name=model_configuration.name\n                ),\n            )\n            cost = get_llm_contextual_cost(llm)\n            costs.append(\n                LLMCost(\n                    provider=provider.name,\n                    model_name=model_configuration.name,\n                    cost=cost,\n                )\n            )\n\n    return costs\n\n\n@admin_router.post(\"/bedrock/available-models\")\ndef get_bedrock_available_models(\n    request: BedrockModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[BedrockFinalModelResponse]:\n    \"\"\"Fetch available Bedrock models for a specific region and credentials.\n\n    Returns model IDs with display names from AWS. Prefers inference profiles\n    (for cross-region support) over base models when available.\n    \"\"\"\n    try:\n        # Precedence: bearer → keys → IAM\n        if request.aws_bearer_token_bedrock:\n            try:\n                os.environ[\"AWS_BEARER_TOKEN_BEDROCK\"] = (\n                    request.aws_bearer_token_bedrock\n                )\n                session = boto3.Session(region_name=request.aws_region_name)\n            finally:\n                os.environ.pop(\"AWS_BEARER_TOKEN_BEDROCK\", None)\n        elif request.aws_access_key_id and request.aws_secret_access_key:\n            session = boto3.Session(\n                aws_access_key_id=request.aws_access_key_id,\n                aws_secret_access_key=request.aws_secret_access_key,\n                region_name=request.aws_region_name,\n            )\n        else:\n            session = boto3.Session(region_name=request.aws_region_name)\n\n        try:\n            bedrock = session.client(\"bedrock\")\n        except Exception as e:\n            raise OnyxError(\n                OnyxErrorCode.CREDENTIAL_INVALID,\n                f\"Failed to create Bedrock client: {e}. Check AWS credentials and region.\",\n            )\n\n        # Build model info dict from foundation models (modelId -> metadata)\n        model_summaries = bedrock.list_foundation_models().get(\"modelSummaries\", [])\n        model_info: dict[str, ModelMetadata] = {}\n        available_models: set[str] = set()\n\n        for model in model_summaries:\n            model_id = model.get(\"modelId\", \"\")\n            # Skip invalid or non-LLM models (embeddings, image gen, non-streaming)\n            if not is_valid_bedrock_model(\n                model_id, model.get(\"responseStreamingSupported\", False)\n            ):\n                continue\n\n            available_models.add(model_id)\n            input_modalities = model.get(\"inputModalities\", [])\n            model_info[model_id] = {\n                \"display_name\": model.get(\"modelName\", model_id),\n                \"supports_image_input\": \"IMAGE\" in input_modalities,\n            }\n\n        # Get inference profiles (cross-region) - these are preferred over base models\n        profile_ids: set[str] = set()\n        cross_region_models: set[str] = set()\n        try:\n            inference_profiles = bedrock.list_inference_profiles(\n                typeEquals=\"SYSTEM_DEFINED\"\n            ).get(\"inferenceProfileSummaries\", [])\n            for profile in inference_profiles:\n                if not (profile_id := profile.get(\"inferenceProfileId\")):\n                    continue\n                # Skip non-LLM inference profiles\n                if not is_valid_bedrock_model(profile_id):\n                    continue\n\n                profile_ids.add(profile_id)\n\n                # Extract base model ID (everything after first period)\n                # e.g., \"us.anthropic.claude-3-5-sonnet-...\" -> \"anthropic.claude-3-5-sonnet-...\"\n                if \".\" in profile_id:\n                    base_model_id = profile_id.split(\".\", 1)[1]\n                    cross_region_models.add(base_model_id)\n                    region = profile_id.split(\".\")[0]\n\n                    # Copy model info from base model to profile, with region suffix\n                    if base_model_id in model_info:\n                        base_info = model_info[base_model_id]\n                        model_info[profile_id] = {\n                            \"display_name\": f\"{base_info['display_name']} ({region})\",\n                            \"supports_image_input\": base_info[\"supports_image_input\"],\n                        }\n                    else:\n                        # Base model not in region - infer metadata from profile\n                        profile_name = profile.get(\"inferenceProfileName\", \"\")\n                        model_info[profile_id] = {\n                            \"display_name\": (\n                                f\"{profile_name} ({region})\"\n                                if profile_name\n                                else generate_bedrock_display_name(profile_id)\n                            ),\n                            # Infer vision support from known vision models\n                            \"supports_image_input\": infer_vision_support(profile_id),\n                        }\n        except Exception as e:\n            logger.warning(f\"Couldn't fetch inference profiles for Bedrock: {e}\")\n\n        # Prefer profiles: de-dupe available models, then add profile IDs\n        candidates = (available_models - cross_region_models) | profile_ids\n\n        # Build response with display names\n        results: list[BedrockFinalModelResponse] = []\n        for model_id in sorted(candidates, reverse=True):\n            info: ModelMetadata | None = model_info.get(model_id)\n            display_name = info[\"display_name\"] if info else None\n\n            # Fallback: generate display name from model ID if not available\n            if not display_name or display_name == model_id:\n                display_name = generate_bedrock_display_name(model_id)\n\n            results.append(\n                BedrockFinalModelResponse(\n                    name=model_id,\n                    display_name=display_name,\n                    max_input_tokens=get_bedrock_token_limit(model_id),\n                    supports_image_input=(\n                        info[\"supports_image_input\"] if info else False\n                    ),\n                )\n            )\n\n        # Sync new models to DB if provider_name is specified\n        if request.provider_name:\n            _sync_fetched_models(\n                db_session=db_session,\n                provider_name=request.provider_name,\n                models=[\n                    SyncModelEntry(\n                        name=r.name,\n                        display_name=r.display_name,\n                        max_input_tokens=r.max_input_tokens,\n                        supports_image_input=r.supports_image_input,\n                    )\n                    for r in results\n                ],\n                source_label=\"Bedrock\",\n            )\n\n        return results\n\n    except (ClientError, NoCredentialsError, BotoCoreError) as e:\n        raise OnyxError(\n            OnyxErrorCode.CREDENTIAL_INVALID,\n            f\"Failed to connect to AWS Bedrock: {e}\",\n        )\n    except Exception as e:\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            f\"Unexpected error fetching Bedrock models: {e}\",\n        )\n\n\ndef _get_ollama_available_model_names(api_base: str) -> set[str]:\n    \"\"\"Fetch available model names from Ollama server.\"\"\"\n    tags_url = f\"{api_base}/api/tags\"\n    try:\n        response = httpx.get(tags_url, timeout=5.0)\n        response.raise_for_status()\n        response_json = response.json()\n    except Exception as e:\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            f\"Failed to fetch Ollama models: {e}\",\n        )\n\n    models = response_json.get(\"models\", [])\n    return {model.get(\"name\") for model in models if model.get(\"name\")}\n\n\n@admin_router.post(\"/ollama/available-models\")\ndef get_ollama_available_models(\n    request: OllamaModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[OllamaFinalModelResponse]:\n    \"\"\"Fetch the list of available models from an Ollama server.\"\"\"\n\n    cleaned_api_base = request.api_base.strip().rstrip(\"/\")\n    if not cleaned_api_base:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"API base URL is required to fetch Ollama models.\",\n        )\n\n    # NOTE: most people run Ollama locally, so we don't disallow internal URLs\n    # the only way this could be used for SSRF is if there's another endpoint that\n    # is not protected + exposes sensitive information on the `/api/tags` endpoint\n    # with the same response format\n    model_names = _get_ollama_available_model_names(cleaned_api_base)\n    if not model_names:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No models found from your Ollama server\",\n        )\n\n    all_models_with_context_size_and_vision: list[OllamaFinalModelResponse] = []\n    show_url = f\"{cleaned_api_base}/api/show\"\n\n    for model_name in model_names:\n        context_limit: int | None = None\n        supports_image_input: bool | None = None\n        try:\n            show_response = httpx.post(\n                show_url,\n                json={\"model\": model_name},\n                timeout=5.0,\n            )\n            show_response.raise_for_status()\n            show_response_json = show_response.json()\n\n            # Parse the response into the expected format\n            ollama_model_details = OllamaModelDetails.model_validate(show_response_json)\n\n            # Check if this model supports completion/chat\n            if not ollama_model_details.supports_completion():\n                continue\n\n            # Optimistically access. Context limit is stored as \"model_architecture.context\" = int\n            architecture = ollama_model_details.model_info.get(\n                \"general.architecture\", \"\"\n            )\n            context_limit = ollama_model_details.model_info.get(\n                architecture + \".context_length\", None\n            )\n            supports_image_input = ollama_model_details.supports_image_input()\n        except ValidationError as e:\n            logger.warning(\n                \"Invalid model details from Ollama server\",\n                extra={\"model\": model_name, \"validation_error\": str(e)},\n            )\n        except Exception as e:\n            logger.warning(\n                \"Failed to fetch Ollama model details\",\n                extra={\"model\": model_name, \"error\": str(e)},\n            )\n\n        # Note: context_limit may be None if Ollama API doesn't provide it.\n        # The runtime will use LiteLLM fallback logic to determine max tokens.\n        all_models_with_context_size_and_vision.append(\n            OllamaFinalModelResponse(\n                name=model_name,\n                display_name=generate_ollama_display_name(model_name),\n                max_input_tokens=context_limit,\n                supports_image_input=supports_image_input or False,\n            )\n        )\n\n    sorted_results = sorted(\n        all_models_with_context_size_and_vision,\n        key=lambda m: m.name.lower(),\n    )\n\n    # Sync new models to DB if provider_name is specified\n    if request.provider_name:\n        _sync_fetched_models(\n            db_session=db_session,\n            provider_name=request.provider_name,\n            models=[\n                SyncModelEntry(\n                    name=r.name,\n                    display_name=r.display_name,\n                    max_input_tokens=r.max_input_tokens,\n                    supports_image_input=r.supports_image_input,\n                )\n                for r in sorted_results\n            ],\n            source_label=\"Ollama\",\n        )\n\n    return sorted_results\n\n\ndef _get_openrouter_models_response(api_base: str, api_key: str) -> dict:\n    \"\"\"Perform GET to OpenRouter /models and return parsed JSON.\"\"\"\n    cleaned_api_base = api_base.strip().rstrip(\"/\")\n    url = f\"{cleaned_api_base}/models\"\n    headers = {\n        \"Authorization\": f\"Bearer {api_key}\",\n        # Optional headers recommended by OpenRouter for attribution\n        \"HTTP-Referer\": \"https://onyx.app\",\n        \"X-Title\": \"Onyx\",\n    }\n    try:\n        response = httpx.get(url, headers=headers, timeout=10.0)\n        response.raise_for_status()\n        return response.json()\n    except Exception as e:\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            f\"Failed to fetch OpenRouter models: {e}\",\n        )\n\n\n@admin_router.post(\"/openrouter/available-models\")\ndef get_openrouter_available_models(\n    request: OpenRouterModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[OpenRouterFinalModelResponse]:\n    \"\"\"Fetch available models from OpenRouter `/models` endpoint.\n\n    Parses id, name (display), context_length, and architecture.input_modalities.\n    \"\"\"\n\n    response_json = _get_openrouter_models_response(\n        api_base=request.api_base, api_key=request.api_key\n    )\n\n    data = response_json.get(\"data\", [])\n    if not isinstance(data, list) or len(data) == 0:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No models found from your OpenRouter endpoint\",\n        )\n\n    results: list[OpenRouterFinalModelResponse] = []\n    for item in data:\n        try:\n            model_details = OpenRouterModelDetails.model_validate(item)\n\n            # NOTE: This should be removed if we ever support dynamically fetching embedding models.\n            if model_details.is_embedding_model:\n                continue\n\n            # Strip vendor prefix since we group by vendor (e.g., \"Microsoft: Phi 4\" → \"Phi 4\")\n            display_name = strip_openrouter_vendor_prefix(\n                model_details.display_name, model_details.id\n            )\n\n            # Treat context_length of 0 as unknown (None)\n            context_length = model_details.context_length or None\n\n            results.append(\n                OpenRouterFinalModelResponse(\n                    name=model_details.id,\n                    display_name=display_name,\n                    max_input_tokens=context_length,\n                    supports_image_input=model_details.supports_image_input,\n                )\n            )\n        except Exception as e:\n            logger.warning(\n                \"Failed to parse OpenRouter model entry\",\n                extra={\"error\": str(e), \"item\": str(item)[:1000]},\n            )\n\n    if not results:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No compatible models found from OpenRouter\",\n        )\n\n    sorted_results = sorted(results, key=lambda m: m.name.lower())\n\n    # Sync new models to DB if provider_name is specified\n    if request.provider_name:\n        _sync_fetched_models(\n            db_session=db_session,\n            provider_name=request.provider_name,\n            models=[\n                SyncModelEntry(\n                    name=r.name,\n                    display_name=r.display_name,\n                    max_input_tokens=r.max_input_tokens,\n                    supports_image_input=r.supports_image_input,\n                )\n                for r in sorted_results\n            ],\n            source_label=\"OpenRouter\",\n        )\n\n    return sorted_results\n\n\n@admin_router.post(\"/lm-studio/available-models\")\ndef get_lm_studio_available_models(\n    request: LMStudioModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[LMStudioFinalModelResponse]:\n    \"\"\"Fetch available models from an LM Studio server.\n\n    Uses the LM Studio-native /api/v1/models endpoint which exposes\n    rich metadata including capabilities (vision, reasoning),\n    display names, and context lengths.\n    \"\"\"\n    cleaned_api_base = request.api_base.strip().rstrip(\"/\")\n    # Strip /v1 suffix that users may copy from OpenAI-compatible tool configs;\n    # the native metadata endpoint lives at /api/v1/models, not /v1/api/v1/models.\n    cleaned_api_base = cleaned_api_base.removesuffix(\"/v1\")\n    if not cleaned_api_base:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"API base URL is required to fetch LM Studio models.\",\n        )\n\n    # If provider_name is given and the api_key hasn't been changed by the user,\n    # fall back to the stored API key from the database (the form value is masked).\n    api_key = request.api_key\n    if request.provider_name and not request.api_key_changed:\n        existing_provider = fetch_existing_llm_provider(\n            name=request.provider_name, db_session=db_session\n        )\n        if existing_provider and existing_provider.custom_config:\n            api_key = existing_provider.custom_config.get(LM_STUDIO_API_KEY_CONFIG_KEY)\n\n    url = f\"{cleaned_api_base}/api/v1/models\"\n    headers: dict[str, str] = {}\n    if api_key:\n        headers[\"Authorization\"] = f\"Bearer {api_key}\"\n\n    try:\n        response = httpx.get(url, headers=headers, timeout=10.0)\n        response.raise_for_status()\n        response_json = response.json()\n    except Exception as e:\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            f\"Failed to fetch LM Studio models: {e}\",\n        )\n\n    models = response_json.get(\"models\", [])\n    if not isinstance(models, list) or len(models) == 0:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No models found from your LM Studio server.\",\n        )\n\n    results: list[LMStudioFinalModelResponse] = []\n    for item in models:\n        # Filter to LLM-type models only (skip embeddings, etc.)\n        if item.get(\"type\") != \"llm\":\n            continue\n\n        model_key = item.get(\"key\")\n        if not model_key:\n            continue\n\n        display_name = item.get(\"display_name\") or model_key\n        max_context_length = item.get(\"max_context_length\")\n        capabilities = item.get(\"capabilities\") or {}\n\n        results.append(\n            LMStudioFinalModelResponse(\n                name=model_key,\n                display_name=display_name,\n                max_input_tokens=max_context_length,\n                supports_image_input=capabilities.get(\"vision\", False),\n                supports_reasoning=capabilities.get(\"reasoning\", False)\n                or is_reasoning_model(model_key, display_name),\n            )\n        )\n\n    if not results:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No compatible models found from LM Studio server.\",\n        )\n\n    sorted_results = sorted(results, key=lambda m: m.name.lower())\n\n    # Sync new models to DB if provider_name is specified\n    if request.provider_name:\n        _sync_fetched_models(\n            db_session=db_session,\n            provider_name=request.provider_name,\n            models=[\n                SyncModelEntry(\n                    name=r.name,\n                    display_name=r.display_name,\n                    max_input_tokens=r.max_input_tokens,\n                    supports_image_input=r.supports_image_input,\n                )\n                for r in sorted_results\n            ],\n            source_label=\"LM Studio\",\n        )\n\n    return sorted_results\n\n\n@admin_router.post(\"/litellm/available-models\")\ndef get_litellm_available_models(\n    request: LitellmModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[LitellmFinalModelResponse]:\n    \"\"\"Fetch available models from Litellm proxy /v1/models endpoint.\"\"\"\n    response_json = _get_litellm_models_response(\n        api_key=request.api_key, api_base=request.api_base\n    )\n\n    models = response_json.get(\"data\", [])\n    if not isinstance(models, list) or len(models) == 0:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No models found from your Litellm endpoint\",\n        )\n\n    results: list[LitellmFinalModelResponse] = []\n    for model in models:\n        try:\n            model_details = LitellmModelDetails.model_validate(model)\n\n            # Skip embedding models\n            if is_embedding_model(model_details.id):\n                continue\n\n            results.append(\n                LitellmFinalModelResponse(\n                    provider_name=model_details.owned_by,\n                    model_name=model_details.id,\n                )\n            )\n        except Exception as e:\n            logger.warning(\n                \"Failed to parse Litellm model entry\",\n                extra={\"error\": str(e), \"item\": str(model)[:1000]},\n            )\n\n    if not results:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No compatible models found from Litellm\",\n        )\n\n    sorted_results = sorted(results, key=lambda m: m.model_name.lower())\n\n    # Sync new models to DB if provider_name is specified\n    if request.provider_name:\n        _sync_fetched_models(\n            db_session=db_session,\n            provider_name=request.provider_name,\n            models=[\n                SyncModelEntry(\n                    name=r.model_name,\n                    display_name=r.model_name,\n                )\n                for r in sorted_results\n            ],\n            source_label=\"LiteLLM\",\n        )\n\n    return sorted_results\n\n\ndef _get_litellm_models_response(api_key: str, api_base: str) -> dict:\n    \"\"\"Perform GET to Litellm proxy /api/v1/models and return parsed JSON.\"\"\"\n    cleaned_api_base = api_base.strip().rstrip(\"/\")\n    url = f\"{cleaned_api_base}/v1/models\"\n\n    return _get_openai_compatible_models_response(\n        url=url,\n        source_name=\"LiteLLM proxy\",\n        api_key=api_key,\n    )\n\n\ndef _get_openai_compatible_models_response(\n    url: str,\n    source_name: str,\n    api_key: str | None = None,\n) -> dict:\n    \"\"\"Fetch model metadata from an OpenAI-compatible `/models` endpoint.\"\"\"\n    headers = {\n        \"Authorization\": f\"Bearer {api_key}\",\n        \"HTTP-Referer\": \"https://onyx.app\",\n        \"X-Title\": \"Onyx\",\n    }\n    if not api_key:\n        headers.pop(\"Authorization\")\n\n    try:\n        response = httpx.get(url, headers=headers, timeout=10.0)\n        response.raise_for_status()\n        return response.json()\n    except httpx.HTTPStatusError as e:\n        if e.response.status_code == 401:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                f\"Authentication failed: invalid or missing API key for {source_name}.\",\n            )\n        elif e.response.status_code == 404:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                f\"{source_name} models endpoint not found at {url}. Please verify the API base URL.\",\n            )\n        else:\n            raise OnyxError(\n                OnyxErrorCode.BAD_GATEWAY,\n                f\"Failed to fetch {source_name} models: {e}\",\n            )\n    except httpx.RequestError as e:\n        logger.warning(\n            \"Failed to fetch models from OpenAI-compatible endpoint\",\n            extra={\"source\": source_name, \"url\": url, \"error\": str(e)},\n            exc_info=True,\n        )\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            f\"Failed to fetch {source_name} models: {e}\",\n        )\n    except ValueError as e:\n        logger.warning(\n            \"Received invalid model response from OpenAI-compatible endpoint\",\n            extra={\"source\": source_name, \"url\": url, \"error\": str(e)},\n            exc_info=True,\n        )\n        raise OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            f\"Failed to fetch {source_name} models: {e}\",\n        )\n\n\n@admin_router.post(\"/bifrost/available-models\")\ndef get_bifrost_available_models(\n    request: BifrostModelsRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[BifrostFinalModelResponse]:\n    \"\"\"Fetch available models from Bifrost gateway /v1/models endpoint.\"\"\"\n    response_json = _get_bifrost_models_response(\n        api_base=request.api_base, api_key=request.api_key\n    )\n\n    models = response_json.get(\"data\", [])\n    if not isinstance(models, list) or len(models) == 0:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No models found from your Bifrost endpoint\",\n        )\n\n    results: list[BifrostFinalModelResponse] = []\n    for model in models:\n        try:\n            model_id = model.get(\"id\", \"\")\n            model_name = model.get(\"name\", model_id)\n\n            if not model_id:\n                continue\n\n            # Skip embedding models\n            if is_embedding_model(model_id):\n                continue\n\n            results.append(\n                BifrostFinalModelResponse(\n                    name=model_id,\n                    display_name=model_name,\n                    max_input_tokens=model.get(\"context_length\"),\n                    supports_image_input=infer_vision_support(model_id),\n                    supports_reasoning=is_reasoning_model(model_id, model_name),\n                )\n            )\n        except Exception as e:\n            logger.warning(\n                \"Failed to parse Bifrost model entry\",\n                extra={\"error\": str(e), \"item\": str(model)[:1000]},\n            )\n\n    if not results:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No compatible models found from Bifrost\",\n        )\n\n    sorted_results = sorted(results, key=lambda m: m.name.lower())\n\n    # Sync new models to DB if provider_name is specified\n    if request.provider_name:\n        _sync_fetched_models(\n            db_session=db_session,\n            provider_name=request.provider_name,\n            models=[\n                SyncModelEntry(\n                    name=r.name,\n                    display_name=r.display_name,\n                    max_input_tokens=r.max_input_tokens,\n                    supports_image_input=r.supports_image_input,\n                )\n                for r in sorted_results\n            ],\n            source_label=\"Bifrost\",\n        )\n\n    return sorted_results\n\n\ndef _get_bifrost_models_response(api_base: str, api_key: str | None = None) -> dict:\n    \"\"\"Perform GET to Bifrost /v1/models and return parsed JSON.\"\"\"\n    cleaned_api_base = api_base.strip().rstrip(\"/\")\n    # Ensure we hit /v1/models\n    if cleaned_api_base.endswith(\"/v1\"):\n        url = f\"{cleaned_api_base}/models\"\n    else:\n        url = f\"{cleaned_api_base}/v1/models\"\n\n    return _get_openai_compatible_models_response(\n        url=url,\n        source_name=\"Bifrost\",\n        api_key=api_key,\n    )\n"
  },
  {
    "path": "backend/onyx/server/manage/llm/models.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TYPE_CHECKING\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\nfrom pydantic import field_validator\n\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.llm.utils import get_max_input_tokens\nfrom onyx.llm.utils import litellm_thinks_model_supports_image_input\nfrom onyx.llm.utils import model_is_reasoning_model\nfrom onyx.server.manage.llm.utils import DYNAMIC_LLM_PROVIDERS\nfrom onyx.server.manage.llm.utils import extract_vendor_from_model_name\nfrom onyx.server.manage.llm.utils import filter_model_configurations\nfrom onyx.server.manage.llm.utils import is_reasoning_model\n\n\nif TYPE_CHECKING:\n    from onyx.db.models import (\n        LLMProvider as LLMProviderModel,\n        ModelConfiguration as ModelConfigurationModel,\n    )\n\nT = TypeVar(\"T\", \"LLMProviderDescriptor\", \"LLMProviderView\", \"VisionProviderResponse\")\n\n\nclass TestLLMRequest(BaseModel):\n    # provider level\n    id: int | None = None\n    provider: str\n    model: str\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    custom_config: dict[str, str] | None = None\n\n    # model level\n    deployment_name: str | None = None\n\n    # if try and use the existing API/custom config key\n    api_key_changed: bool\n    custom_config_changed: bool\n\n    @field_validator(\"provider\", mode=\"before\")\n    @classmethod\n    def normalize_provider(cls, value: str) -> str:\n        \"\"\"Normalize provider name by stripping whitespace and lowercasing.\"\"\"\n        return value.strip().lower()\n\n\nclass LLMProviderDescriptor(BaseModel):\n    \"\"\"A descriptor for an LLM provider that can be safely viewed by\n    non-admin users. Used when giving a list of available LLMs.\"\"\"\n\n    id: int\n    name: str\n    provider: str\n    provider_display_name: str  # Human-friendly name like \"Claude (Anthropic)\"\n    model_configurations: list[\"ModelConfigurationView\"]\n\n    @classmethod\n    def from_model(\n        cls,\n        llm_provider_model: \"LLMProviderModel\",\n    ) -> \"LLMProviderDescriptor\":\n        from onyx.llm.well_known_providers.llm_provider_options import (\n            get_provider_display_name,\n        )\n\n        provider = llm_provider_model.provider\n\n        return cls(\n            id=llm_provider_model.id,\n            name=llm_provider_model.name,\n            provider=provider,\n            provider_display_name=get_provider_display_name(provider),\n            model_configurations=filter_model_configurations(\n                llm_provider_model.model_configurations, provider\n            ),\n        )\n\n\nclass LLMProvider(BaseModel):\n    name: str\n    provider: str\n    api_key: str | None = None\n    api_base: str | None = None\n    api_version: str | None = None\n    custom_config: dict[str, str] | None = None\n    is_public: bool = True\n    is_auto_mode: bool = False\n    groups: list[int] = Field(default_factory=list)\n    personas: list[int] = Field(default_factory=list)\n    deployment_name: str | None = None\n\n\nclass LLMProviderUpsertRequest(LLMProvider):\n    # should only be used for a \"custom\" provider\n    # for default providers, the built-in model names are used\n    id: int | None = None\n    api_key_changed: bool = False\n    custom_config_changed: bool = False\n    model_configurations: list[\"ModelConfigurationUpsertRequest\"] = []\n\n    @field_validator(\"provider\", mode=\"before\")\n    @classmethod\n    def normalize_provider(cls, value: str) -> str:\n        \"\"\"Normalize provider name by stripping whitespace and lowercasing.\"\"\"\n        return value.strip().lower()\n\n\nclass LLMProviderView(LLMProvider):\n    \"\"\"Stripped down representation of LLMProvider for display / limited access info only\"\"\"\n\n    id: int\n    model_configurations: list[\"ModelConfigurationView\"]\n\n    @classmethod\n    def from_model(\n        cls,\n        llm_provider_model: \"LLMProviderModel\",\n    ) -> \"LLMProviderView\":\n        # Safely get groups - handle detached instance case\n        try:\n            groups = [group.id for group in llm_provider_model.groups]\n        except Exception:\n            # If groups relationship can't be loaded (detached instance), use empty list\n            groups = []\n        # Safely get personas - similar handling as groups\n        try:\n            personas = [persona.id for persona in llm_provider_model.personas]\n        except Exception:\n            personas = []\n\n        provider = llm_provider_model.provider\n\n        return cls(\n            id=llm_provider_model.id,\n            name=llm_provider_model.name,\n            provider=provider,\n            api_key=(\n                llm_provider_model.api_key.get_value(apply_mask=False)\n                if llm_provider_model.api_key\n                else None\n            ),\n            api_base=llm_provider_model.api_base,\n            api_version=llm_provider_model.api_version,\n            custom_config=llm_provider_model.custom_config,\n            is_public=llm_provider_model.is_public,\n            is_auto_mode=llm_provider_model.is_auto_mode,\n            groups=groups,\n            personas=personas,\n            deployment_name=llm_provider_model.deployment_name,\n            model_configurations=filter_model_configurations(\n                llm_provider_model.model_configurations, provider\n            ),\n        )\n\n\nclass ModelConfigurationUpsertRequest(BaseModel):\n    name: str\n    is_visible: bool\n    max_input_tokens: int | None = None\n    supports_image_input: bool | None = None\n    display_name: str | None = None  # For dynamic providers, from source API\n\n    @classmethod\n    def from_model(\n        cls, model_configuration_model: \"ModelConfigurationModel\"\n    ) -> \"ModelConfigurationUpsertRequest\":\n        return cls(\n            name=model_configuration_model.name,\n            is_visible=model_configuration_model.is_visible,\n            max_input_tokens=model_configuration_model.max_input_tokens,\n            supports_image_input=model_configuration_model.supports_image_input,\n            display_name=model_configuration_model.display_name,\n        )\n\n\nclass ModelConfigurationView(BaseModel):\n    name: str\n    is_visible: bool\n    max_input_tokens: int | None = None\n    supports_image_input: bool\n    supports_reasoning: bool = False\n    display_name: str | None = None\n    provider_display_name: str | None = None\n    vendor: str | None = None\n    version: str | None = None\n    region: str | None = None\n\n    @classmethod\n    def from_model(\n        cls,\n        model_configuration_model: \"ModelConfigurationModel\",\n        provider_name: str,\n    ) -> \"ModelConfigurationView\":\n        # For dynamic providers (OpenRouter, Bedrock, Ollama), use the display_name\n        # stored in DB from the source API. Skip LiteLLM parsing entirely.\n        if (\n            provider_name in DYNAMIC_LLM_PROVIDERS\n            and model_configuration_model.display_name\n        ):\n            # Extract vendor from model name for grouping (e.g., \"Anthropic\", \"OpenAI\")\n            vendor = extract_vendor_from_model_name(\n                model_configuration_model.name, provider_name\n            )\n\n            return cls(\n                name=model_configuration_model.name,\n                is_visible=model_configuration_model.is_visible,\n                max_input_tokens=model_configuration_model.max_input_tokens,\n                supports_image_input=(\n                    LLMModelFlowType.VISION\n                    in model_configuration_model.llm_model_flow_types\n                ),\n                # Infer reasoning support from model name/display name\n                supports_reasoning=is_reasoning_model(\n                    model_configuration_model.name,\n                    model_configuration_model.display_name or \"\",\n                ),\n                display_name=model_configuration_model.display_name,\n                provider_display_name=None,  # Not needed for dynamic providers\n                vendor=vendor,\n                version=None,\n                region=None,\n            )\n\n        # For static providers (OpenAI, Anthropic, etc.), use LiteLLM enrichments\n        from onyx.llm.model_name_parser import parse_litellm_model_name\n\n        # Parse the model name to get display information\n        # Include provider prefix if not already present (enrichments use full keys like \"vertex_ai/...\")\n        model_name = model_configuration_model.name\n        if provider_name and not model_name.startswith(f\"{provider_name}/\"):\n            model_name = f\"{provider_name}/{model_name}\"\n        parsed = parse_litellm_model_name(model_name)\n\n        # Include region in display name for Bedrock cross-region models\n        display_name = (\n            f\"{parsed.display_name} ({parsed.region})\"\n            if parsed.region\n            else parsed.display_name\n        )\n\n        return cls(\n            name=model_configuration_model.name,\n            is_visible=model_configuration_model.is_visible,\n            max_input_tokens=(\n                model_configuration_model.max_input_tokens\n                or get_max_input_tokens(\n                    model_name=model_configuration_model.name,\n                    model_provider=provider_name,\n                )\n            ),\n            supports_image_input=(\n                True\n                if LLMModelFlowType.VISION\n                in model_configuration_model.llm_model_flow_types\n                else litellm_thinks_model_supports_image_input(\n                    model_configuration_model.name, provider_name\n                )\n            ),\n            supports_reasoning=model_is_reasoning_model(\n                model_configuration_model.name, provider_name\n            ),\n            # Populate display fields from parsed model name\n            display_name=display_name,\n            provider_display_name=parsed.provider_display_name,\n            vendor=parsed.vendor,\n            version=parsed.version,\n            region=parsed.region,\n        )\n\n\nclass VisionProviderResponse(LLMProviderView):\n    \"\"\"Response model for vision providers endpoint, including vision-specific fields.\"\"\"\n\n    vision_models: list[str]\n\n\nclass LLMCost(BaseModel):\n    provider: str\n    model_name: str\n    cost: float\n\n\nclass BedrockModelsRequest(BaseModel):\n    aws_region_name: str\n    aws_access_key_id: str | None = None\n    aws_secret_access_key: str | None = None\n    aws_bearer_token_bedrock: str | None = None\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass BedrockFinalModelResponse(BaseModel):\n    name: str  # Model ID (e.g., \"anthropic.claude-3-5-sonnet-20241022-v2:0\")\n    display_name: str  # Human-readable name from AWS (e.g., \"Claude 3.5 Sonnet v2\")\n    max_input_tokens: int  # From LiteLLM, our mapping, or default 32000\n    supports_image_input: bool\n\n\nclass OllamaModelsRequest(BaseModel):\n    api_base: str\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass OllamaFinalModelResponse(BaseModel):\n    name: str\n    display_name: str  # Generated from model name (e.g., \"llama3:7b\" → \"Llama 3 7B\")\n    max_input_tokens: int | None  # From Ollama API or None if unavailable\n    supports_image_input: bool\n\n\nclass OllamaModelDetails(BaseModel):\n    \"\"\"Response model for Ollama /api/show endpoint\"\"\"\n\n    model_info: dict[str, Any]\n    capabilities: list[str] = []\n\n    def supports_completion(self) -> bool:\n        \"\"\"Check if this model supports completion/chat\"\"\"\n        return \"completion\" in self.capabilities\n\n    def supports_image_input(self) -> bool:\n        \"\"\"Check if this model supports image input\"\"\"\n        return \"vision\" in self.capabilities\n\n\n# OpenRouter dynamic models fetch\nclass OpenRouterModelsRequest(BaseModel):\n    api_base: str\n    api_key: str\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass OpenRouterModelDetails(BaseModel):\n    \"\"\"Response model for OpenRouter /api/v1/models endpoint\"\"\"\n\n    # This is used to ignore any extra fields that are returned from the API\n    model_config = {\"extra\": \"ignore\"}\n\n    id: str\n    # OpenRouter API returns \"name\" but we use \"display_name\" for consistency\n    display_name: str = Field(alias=\"name\")\n    # context_length may be missing or 0 for some models\n    context_length: int | None = None\n    architecture: dict[str, Any] = {}  # Contains 'input_modalities' key\n\n    @property\n    def supports_image_input(self) -> bool:\n        input_modalities = self.architecture.get(\"input_modalities\", [])\n        return isinstance(input_modalities, list) and \"image\" in input_modalities\n\n    @property\n    def is_embedding_model(self) -> bool:\n        output_modalities = self.architecture.get(\"output_modalities\", [])\n        return isinstance(output_modalities, list) and \"embeddings\" in output_modalities\n\n\nclass OpenRouterFinalModelResponse(BaseModel):\n    name: str  # Model ID (e.g., \"openai/gpt-5-pro\")\n    display_name: str  # Human-readable name from OpenRouter API\n    max_input_tokens: (\n        int | None\n    )  # From OpenRouter API context_length (may be missing for some models)\n    supports_image_input: bool\n\n\n# LM Studio dynamic models fetch\nclass LMStudioModelsRequest(BaseModel):\n    api_base: str\n    api_key: str | None = None\n    api_key_changed: bool = False\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass LMStudioFinalModelResponse(BaseModel):\n    name: str  # Model ID from LM Studio (e.g., \"lmstudio-community/Meta-Llama-3-8B\")\n    display_name: str  # Human-readable name\n    max_input_tokens: int | None  # From LM Studio API or None if unavailable\n    supports_image_input: bool\n    supports_reasoning: bool\n\n\nclass DefaultModel(BaseModel):\n    provider_id: int\n    model_name: str\n\n    @classmethod\n    def from_model_config(\n        cls, model_config: ModelConfigurationModel | None\n    ) -> DefaultModel | None:\n        if not model_config:\n            return None\n        return cls(\n            provider_id=model_config.llm_provider_id,\n            model_name=model_config.name,\n        )\n\n\nclass LLMProviderResponse(BaseModel, Generic[T]):\n    providers: list[T]\n    default_text: DefaultModel | None = None\n    default_vision: DefaultModel | None = None\n\n    @classmethod\n    def from_models(\n        cls,\n        providers: list[T],\n        default_text: DefaultModel | None = None,\n        default_vision: DefaultModel | None = None,\n    ) -> LLMProviderResponse[T]:\n        return cls(\n            providers=providers,\n            default_text=default_text,\n            default_vision=default_vision,\n        )\n\n\nclass SyncModelEntry(BaseModel):\n    \"\"\"Typed model for syncing fetched models to the DB.\"\"\"\n\n    name: str\n    display_name: str\n    max_input_tokens: int | None = None\n    supports_image_input: bool = False\n\n\nclass LitellmModelsRequest(BaseModel):\n    api_key: str\n    api_base: str\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass LitellmModelDetails(BaseModel):\n    \"\"\"Response model for Litellm proxy /api/v1/models endpoint\"\"\"\n\n    id: str  # Model ID (e.g. \"gpt-4o\")\n    object: str  # \"model\"\n    created: int  # Unix timestamp in seconds\n    owned_by: str  # Provider name (e.g. \"openai\")\n\n\nclass LitellmFinalModelResponse(BaseModel):\n    provider_name: str  # Provider name (e.g. \"openai\")\n    model_name: str  # Model ID (e.g. \"gpt-4o\")\n\n\n# Bifrost dynamic models fetch\nclass BifrostModelsRequest(BaseModel):\n    api_base: str\n    api_key: str | None = None\n    provider_name: str | None = None  # Optional: to save models to existing provider\n\n\nclass BifrostFinalModelResponse(BaseModel):\n    name: str  # Model ID in provider/model format (e.g. \"anthropic/claude-sonnet-4-6\")\n    display_name: str  # Human-readable name from Bifrost API\n    max_input_tokens: int | None\n    supports_image_input: bool\n    supports_reasoning: bool\n"
  },
  {
    "path": "backend/onyx/server/manage/llm/utils.py",
    "content": "\"\"\"\nLLM Provider Utilities\n\nUtilities for dynamic LLM providers (Bedrock, Ollama, OpenRouter):\n- Display name generation from model identifiers\n- Model validation and filtering\n- Vision/reasoning capability inference\n\"\"\"\n\nimport re\nfrom typing import TypedDict\n\nfrom onyx.llm.constants import BEDROCK_MODEL_NAME_MAPPINGS\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.constants import MODEL_PREFIX_TO_VENDOR\nfrom onyx.llm.constants import OLLAMA_MODEL_NAME_MAPPINGS\nfrom onyx.llm.constants import OLLAMA_MODEL_TO_VENDOR\nfrom onyx.llm.constants import PROVIDER_DISPLAY_NAMES\n\n\n# Dynamic providers fetch models directly from source APIs (not LiteLLM)\nDYNAMIC_LLM_PROVIDERS = frozenset(\n    {\n        LlmProviderNames.OPENROUTER,\n        LlmProviderNames.BEDROCK,\n        LlmProviderNames.OLLAMA_CHAT,\n        LlmProviderNames.LM_STUDIO,\n        LlmProviderNames.BIFROST,\n    }\n)\n\n\nclass ModelMetadata(TypedDict):\n    \"\"\"Metadata about a model from the provider API.\"\"\"\n\n    display_name: str\n    supports_image_input: bool\n\n\n# Non-LLM model patterns to filter out (image gen, embeddings, etc.)\nNON_LLM_PATTERNS = frozenset({\"embed\", \"stable-\", \"titan-image\", \"titan-embed\"})\n\n# Known Bedrock vision-capable models (for fallback when base model not in region)\nBEDROCK_VISION_MODELS = frozenset(\n    {\n        \"anthropic.claude-3\",\n        \"anthropic.claude-4\",\n        \"amazon.nova-pro\",\n        \"amazon.nova-lite\",\n        \"amazon.nova-premier\",\n    }\n)\n\n# Known Bifrost/OpenAI-compatible vision-capable model families where the\n# source API does not expose this metadata directly.\nBIFROST_VISION_MODEL_FAMILIES = frozenset(\n    {\n        \"anthropic/claude-3\",\n        \"anthropic/claude-4\",\n        \"amazon/nova-pro\",\n        \"amazon/nova-lite\",\n        \"amazon/nova-premier\",\n        \"openai/gpt-4o\",\n        \"openai/gpt-4.1\",\n        \"google/gemini\",\n        \"meta-llama/llama-3.2\",\n        \"mistral/pixtral\",\n        \"qwen/qwen2.5-vl\",\n        \"qwen/qwen-vl\",\n    }\n)\n\n\ndef is_valid_bedrock_model(\n    model_id: str,\n    supports_streaming: bool = True,\n) -> bool:\n    \"\"\"Check if a Bedrock model ID is a valid LLM model.\n\n    Args:\n        model_id: The model ID to check\n        supports_streaming: Whether the model supports streaming (required for LLMs)\n\n    Returns:\n        True if the model is a valid LLM, False otherwise\n    \"\"\"\n    if not model_id:\n        return False\n    if any(pattern in model_id.lower() for pattern in NON_LLM_PATTERNS):\n        return False\n    if not supports_streaming:\n        return False\n    return True\n\n\ndef infer_vision_support(model_id: str) -> bool:\n    \"\"\"Infer vision support from model ID when base model metadata unavailable.\n\n    Used for providers like Bedrock and Bifrost where vision support may\n    need to be inferred from vendor/model naming conventions.\n    \"\"\"\n    model_id_lower = model_id.lower()\n    if any(vision_model in model_id_lower for vision_model in BEDROCK_VISION_MODELS):\n        return True\n\n    normalized_model_id = model_id_lower.replace(\".\", \"/\")\n    return any(\n        vision_model in normalized_model_id\n        for vision_model in BIFROST_VISION_MODEL_FAMILIES\n    )\n\n\ndef generate_bedrock_display_name(model_id: str) -> str:\n    \"\"\"Generate a human-friendly display name for a Bedrock model ID.\n\n    Examples:\n        \"anthropic.claude-3-5-sonnet-20241022-v2:0\" → \"Claude 3.5 Sonnet v2\"\n        \"us.anthropic.claude-3-5-sonnet-...\" → \"Claude 3.5 Sonnet (us)\"\n        \"meta.llama3-70b-instruct-v1:0\" → \"Llama 3 70B Instruct\"\n    \"\"\"\n    # Check for region prefix (us., eu., global., etc.)\n    region = None\n    if \".\" in model_id:\n        parts = model_id.split(\".\", 1)\n        if parts[0] in (\"us\", \"eu\", \"global\", \"ap\", \"apac\"):\n            region = parts[0]\n            model_id = parts[1]\n\n    # Remove provider prefix (anthropic., meta., amazon., etc.)\n    if \".\" in model_id:\n        model_id = model_id.split(\".\", 1)[1]\n\n    # Remove version suffix (:0, :1, etc.) and date stamps\n    model_id = re.sub(r\":\\d+$\", \"\", model_id)\n    model_id = re.sub(r\"-\\d{8}-v\\d+\", \"\", model_id)  # -20241022-v2\n    model_id = re.sub(r\"-v\\d+:\\d+$\", \"\", model_id)  # -v1:0\n    model_id = re.sub(r\"-v\\d+$\", \"\", model_id)  # -v1\n\n    # Convert to display name\n    display_name = model_id.replace(\"-\", \" \").replace(\"_\", \" \")\n\n    # Apply proper casing for known models\n    display_lower = display_name.lower()\n    for key, proper_name in BEDROCK_MODEL_NAME_MAPPINGS.items():\n        if key in display_lower:\n            # Find and replace with proper casing\n            pattern = re.compile(re.escape(key), re.IGNORECASE)\n            display_name = pattern.sub(proper_name, display_name)\n            break\n\n    # Clean up version numbers (e.g., \"3 5\" -> \"3.5\")\n    display_name = re.sub(r\"(\\d) (\\d)\", r\"\\1.\\2\", display_name)\n\n    # Title case and clean up\n    words = display_name.split()\n    result_words = []\n    for word in words:\n        if word.lower() in BEDROCK_MODEL_NAME_MAPPINGS:\n            result_words.append(BEDROCK_MODEL_NAME_MAPPINGS[word.lower()])\n        elif word.isdigit() or re.match(r\"^\\d+[bBkKmM]?$\", word):\n            result_words.append(word.upper() if word[-1:].lower() in \"bkm\" else word)\n        elif word.lower() in (\"instruct\", \"chat\", \"pro\", \"lite\", \"mini\", \"premier\"):\n            result_words.append(word.title())\n        else:\n            result_words.append(word.title() if not word[0].isupper() else word)\n\n    display_name = \" \".join(result_words)\n\n    # Add region suffix if present\n    if region:\n        display_name = f\"{display_name} ({region})\"\n\n    return display_name\n\n\ndef generate_ollama_display_name(model_name: str) -> str:\n    \"\"\"Generate a human-friendly display name for an Ollama model.\n\n    Examples:\n        \"llama3:latest\" → \"Llama 3\"\n        \"llama3.3:70b\" → \"Llama 3.3 70B\"\n        \"qwen2.5:7b\" → \"Qwen 2.5 7B\"\n        \"mistral:latest\" → \"Mistral\"\n        \"deepseek-r1:14b\" → \"DeepSeek R1 14B\"\n    \"\"\"\n    # Split into base name and tag\n    if \":\" in model_name:\n        base, tag = model_name.rsplit(\":\", 1)\n    else:\n        base, tag = model_name, \"\"\n\n    # Try to match known model families and apply proper casing\n    display_name = base\n    base_lower = base.lower()\n    for key, proper_name in OLLAMA_MODEL_NAME_MAPPINGS.items():\n        if base_lower.startswith(key):\n            # Replace the matched part with proper casing, keep the rest\n            suffix = base[len(key) :]\n            # Handle version numbers like \"3\", \"3.3\", \"2.5\"\n            if suffix and suffix[0].isdigit():\n                suffix = \" \" + suffix\n            # Handle dashes like \"-r1\", \"-coder\"\n            elif suffix.startswith(\"-\"):\n                suffix = \" \" + suffix[1:].title()\n            display_name = proper_name + suffix\n            break\n    else:\n        # Default: Title case with dashes converted to spaces\n        display_name = base.replace(\"-\", \" \").title()\n\n    # Process tag to extract size info (skip \"latest\")\n    if tag and tag.lower() != \"latest\":\n        # Extract size like \"7b\", \"70b\", \"14b\"\n        size_match = re.match(r\"^(\\d+(?:\\.\\d+)?[bBmM])\", tag)\n        if size_match:\n            size = size_match.group(1).upper()\n            display_name = f\"{display_name} {size}\"\n\n    return display_name\n\n\ndef strip_openrouter_vendor_prefix(display_name: str, model_id: str) -> str:\n    \"\"\"Strip redundant vendor prefix from OpenRouter display names.\n\n    OpenRouter returns names like \"Microsoft: Phi 4\" but we already group\n    by vendor, so strip the prefix to avoid redundancy.\n\n    Examples:\n        (\"Microsoft: Phi 4\", \"microsoft/phi-4\") → \"Phi 4\"\n        (\"Mistral: Mixtral 8x7B Instruct\", \"mistralai/mixtral-8x7b\") → \"Mixtral 8x7B Instruct\"\n        (\"Claude 3.5 Sonnet\", \"anthropic/claude-3.5-sonnet\") → \"Claude 3.5 Sonnet\" (no prefix)\n    \"\"\"\n    # Extract vendor from model ID (first part before \"/\")\n    if \"/\" not in model_id:\n        return display_name\n\n    vendor_from_id = model_id.split(\"/\")[0].lower()\n\n    # Check if display name starts with \"Vendor: \" pattern\n    if \": \" in display_name:\n        prefix, rest = display_name.split(\": \", 1)\n        # Normalize both for comparison (remove spaces, dashes, underscores)\n        prefix_normalized = prefix.lower().replace(\" \", \"\").replace(\"-\", \"\")\n        vendor_normalized = vendor_from_id.replace(\"-\", \"\").replace(\"_\", \"\")\n\n        # Match if prefix matches vendor (handles \"Mistral\" vs \"mistralai\", etc.)\n        if (\n            prefix_normalized == vendor_normalized\n            or prefix_normalized.startswith(vendor_normalized)\n            or vendor_normalized.startswith(prefix_normalized)\n        ):\n            return rest\n\n    return display_name\n\n\n# Reasoning model patterns for OpenRouter\nREASONING_MODEL_PATTERNS = frozenset(\n    {\n        \"o1\",\n        \"o3\",\n        \"o4\",\n        \"gpt-5\",\n        \"thinking\",\n        \"reason\",\n        \"deepseek-r1\",\n        \"qwq\",\n    }\n)\n\n\ndef is_reasoning_model(model_id: str, display_name: str) -> bool:\n    \"\"\"Check if a model is a reasoning/thinking model based on its ID or name.\n\n    Used for OpenRouter and other dynamic providers where we need to infer\n    reasoning capability from model identifiers.\n    \"\"\"\n    combined = f\"{model_id} {display_name}\".lower()\n    return any(pattern in combined for pattern in REASONING_MODEL_PATTERNS)\n\n\ndef extract_base_model_name(model: str) -> str | None:\n    \"\"\"Extract base model name by removing date suffixes.\n\n    Returns None if no date suffix was found.\n    \"\"\"\n    patterns = [\n        r\"-\\d{8}$\",  # -20250929\n        r\"-\\d{4}-\\d{2}-\\d{2}$\",  # -2024-08-06\n        r\"@\\d{8}$\",  # @20250219\n    ]\n    for pattern in patterns:\n        if re.search(pattern, model):\n            return re.sub(pattern, \"\", model)\n    return None\n\n\ndef should_filter_as_dated_duplicate(\n    model_name: str, all_model_names: set[str]\n) -> bool:\n    \"\"\"Check if this model is a dated variant and a non-dated version exists.\"\"\"\n    base = extract_base_model_name(model_name)\n    if base and base in all_model_names:\n        return True\n    return False\n\n\ndef filter_model_configurations(\n    model_configurations: list,\n    provider: str,\n) -> list:\n    \"\"\"Filter out obsolete and dated duplicate models from configurations.\n\n    Args:\n        model_configurations: List of ModelConfiguration DB models\n        provider: The provider name (e.g., \"openai\", \"anthropic\")\n\n    Returns:\n        List of ModelConfigurationView objects with obsolete/duplicate models removed\n    \"\"\"\n    # Import here to avoid circular imports\n    from onyx.llm.well_known_providers.llm_provider_options import is_obsolete_model\n    from onyx.server.manage.llm.models import ModelConfigurationView\n\n    all_model_names = {mc.name for mc in model_configurations}\n\n    filtered_configs = []\n    for model_configuration in model_configurations:\n        # Skip obsolete models\n        if is_obsolete_model(model_configuration.name, provider):\n            continue\n        # Skip dated duplicates when non-dated version exists\n        if should_filter_as_dated_duplicate(model_configuration.name, all_model_names):\n            continue\n        filtered_configs.append(\n            ModelConfigurationView.from_model(model_configuration, provider)\n        )\n\n    return filtered_configs\n\n\ndef extract_vendor_from_model_name(model_name: str, provider: str) -> str | None:\n    \"\"\"Extract vendor from model name for aggregator providers.\n\n    Examples:\n        - OpenRouter: \"anthropic/claude-3-5-sonnet\" → \"Anthropic\"\n        - Bedrock: \"anthropic.claude-3-5-sonnet-...\" → \"Anthropic\"\n        - Bedrock: \"us.anthropic.claude-...\" → \"Anthropic\"\n        - Ollama: \"llama3:70b\" → \"Meta\"\n        - Ollama: \"qwen2.5:7b\" → \"Alibaba\"\n    \"\"\"\n    if provider in (LlmProviderNames.OPENROUTER, LlmProviderNames.BIFROST):\n        # Format: \"vendor/model-name\" e.g., \"anthropic/claude-3-5-sonnet\"\n        if \"/\" in model_name:\n            vendor_key = model_name.split(\"/\")[0].lower()\n            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())\n\n    elif provider == LlmProviderNames.BEDROCK:\n        # Format: \"vendor.model-name\" or \"region.vendor.model-name\"\n        parts = model_name.split(\".\")\n        if len(parts) >= 2:\n            # Check if first part is a region (us, eu, global, etc.)\n            if parts[0] in (\"us\", \"eu\", \"global\", \"ap\", \"apac\"):\n                vendor_key = parts[1].lower() if len(parts) > 2 else parts[0].lower()\n            else:\n                vendor_key = parts[0].lower()\n            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())\n\n    elif provider == LlmProviderNames.OLLAMA_CHAT:\n        # Format: \"model-name:tag\" e.g., \"llama3:70b\", \"qwen2.5:7b\"\n        # Extract base name (before colon)\n        base_name = model_name.split(\":\")[0].lower()\n        # Match against known model prefixes\n        for prefix, vendor in OLLAMA_MODEL_TO_VENDOR.items():\n            if base_name.startswith(prefix):\n                return vendor\n        # Fallback: capitalize the base name as vendor\n        return base_name.split(\"-\")[0].title()\n\n    elif provider == LlmProviderNames.LM_STUDIO:\n        # LM Studio model IDs can be paths like \"publisher/model-name\"\n        # or simple names. Use MODEL_PREFIX_TO_VENDOR for matching.\n\n        model_lower = model_name.lower()\n        # Check for slash-separated vendor prefix first\n        if \"/\" in model_lower:\n            vendor_key = model_lower.split(\"/\")[0]\n            return PROVIDER_DISPLAY_NAMES.get(vendor_key, vendor_key.title())\n        # Fallback to model prefix matching\n        for prefix, vendor in MODEL_PREFIX_TO_VENDOR.items():\n            if model_lower.startswith(prefix):\n                return PROVIDER_DISPLAY_NAMES.get(vendor, vendor.title())\n        return None\n\n    return None\n\n\ndef is_embedding_model(model_name: str) -> bool:\n    \"\"\"Checks for if a model is an embedding model\"\"\"\n    from litellm import get_model_info\n\n    try:\n        # get_model_info raises on unknown models\n        # default to False\n        model_info = get_model_info(model_name)\n    except Exception:\n        return False\n    is_embedding_mode = model_info.get(\"mode\") == \"embedding\"\n\n    return is_embedding_mode\n"
  },
  {
    "path": "backend/onyx/server/manage/models.py",
    "content": "import re\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import Field\nfrom pydantic import field_validator\nfrom pydantic import model_validator\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY\nfrom onyx.configs.constants import AuthType\nfrom onyx.context.search.models import SavedSearchSettings\nfrom onyx.db.enums import DefaultAppMode\nfrom onyx.db.enums import ThemePreference\nfrom onyx.db.memory import MAX_MEMORIES_PER_USER\nfrom onyx.db.models import AllowedAnswerFilters\nfrom onyx.db.models import ChannelConfig\nfrom onyx.db.models import SlackBot as SlackAppModel\nfrom onyx.db.models import SlackChannelConfig as SlackChannelConfigModel\nfrom onyx.db.models import StandardAnswer as StandardAnswerModel\nfrom onyx.db.models import StandardAnswerCategory as StandardAnswerCategoryModel\nfrom onyx.db.models import User\nfrom onyx.onyxbot.slack.config import VALID_SLACK_FILTERS\nfrom onyx.server.features.persona.models import FullPersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaSnapshot\nfrom onyx.server.models import FullUserSnapshot\nfrom onyx.server.models import InvitedUserSnapshot\n\n\nif TYPE_CHECKING:\n    pass\n\n\nclass EmailInviteStatus(str, Enum):\n    SENT = \"SENT\"\n    NOT_CONFIGURED = \"NOT_CONFIGURED\"\n    SEND_FAILED = \"SEND_FAILED\"\n    DISABLED = \"DISABLED\"\n\n\nclass BulkInviteResponse(BaseModel):\n    invited_count: int\n    email_invite_status: EmailInviteStatus\n\n\nclass VersionResponse(BaseModel):\n    backend_version: str\n\n\nclass AuthTypeResponse(BaseModel):\n    auth_type: AuthType\n    # specifies whether the current auth setup requires\n    # users to have verified emails\n    requires_verification: bool\n    anonymous_user_enabled: bool | None = None\n    password_min_length: int\n    # whether there are any users in the system\n    has_users: bool = True\n    oauth_enabled: bool = False\n\n\nclass UserSpecificAssistantPreference(BaseModel):\n    disabled_tool_ids: list[int]\n\n\nUserSpecificAssistantPreferences = dict[int, UserSpecificAssistantPreference]\n\n\nclass UserPreferences(BaseModel):\n    chosen_assistants: list[int] | None = None\n    hidden_assistants: list[int] = []\n    visible_assistants: list[int] = []\n    default_model: str | None = None\n    pinned_assistants: list[int] | None = None\n    shortcut_enabled: bool | None = None\n\n    # These will default to workspace settings on the frontend if not set\n    auto_scroll: bool | None = None\n    temperature_override_enabled: bool | None = None\n    theme_preference: ThemePreference | None = None\n    chat_background: str | None = None\n    default_app_mode: DefaultAppMode = DefaultAppMode.CHAT\n\n    # Voice preferences\n    voice_auto_send: bool | None = None\n    voice_auto_playback: bool | None = None\n    voice_playback_speed: float | None = None\n\n    # controls which tools are enabled for the user for a specific assistant\n    assistant_specific_configs: UserSpecificAssistantPreferences | None = None\n\n\nclass MemoryItem(BaseModel):\n    id: int | None = None\n    content: str\n\n\nclass UserPersonalization(BaseModel):\n    name: str = \"\"\n    role: str = \"\"\n    use_memories: bool = True\n    enable_memory_tool: bool = True\n    memories: list[MemoryItem] = Field(default_factory=list)\n    user_preferences: str = \"\"\n\n\nclass TenantSnapshot(BaseModel):\n    tenant_id: str\n    number_of_users: int\n\n\nclass TenantInfo(BaseModel):\n    invitation: TenantSnapshot | None = None\n    new_tenant: TenantSnapshot | None = None\n\n\nclass UserInfo(BaseModel):\n    id: str\n    email: str\n    is_active: bool\n    is_superuser: bool\n    is_verified: bool\n    role: UserRole\n    preferences: UserPreferences\n    personalization: UserPersonalization = Field(default_factory=UserPersonalization)\n    oidc_expiry: datetime | None = None\n    current_token_created_at: datetime | None = None\n    current_token_expiry_length: int | None = None\n    is_cloud_superuser: bool = False\n    team_name: str | None = None\n    is_anonymous_user: bool | None = None\n    password_configured: bool | None = None\n    tenant_info: TenantInfo | None = None\n\n    @classmethod\n    def from_model(\n        cls,\n        user: User,\n        current_token_created_at: datetime | None = None,\n        expiry_length: int | None = None,\n        is_cloud_superuser: bool = False,\n        team_name: str | None = None,\n        is_anonymous_user: bool | None = None,\n        tenant_info: TenantInfo | None = None,\n        assistant_specific_configs: UserSpecificAssistantPreferences | None = None,\n        memories: list[MemoryItem] | None = None,\n    ) -> \"UserInfo\":\n        return cls(\n            id=str(user.id),\n            email=user.email,\n            is_active=user.is_active,\n            is_superuser=user.is_superuser,\n            is_verified=user.is_verified,\n            role=user.role,\n            password_configured=user.password_configured,\n            preferences=(\n                UserPreferences(\n                    shortcut_enabled=user.shortcut_enabled,\n                    chosen_assistants=user.chosen_assistants,\n                    default_model=user.default_model,\n                    hidden_assistants=user.hidden_assistants,\n                    pinned_assistants=user.pinned_assistants,\n                    visible_assistants=user.visible_assistants,\n                    auto_scroll=user.auto_scroll,\n                    temperature_override_enabled=user.temperature_override_enabled,\n                    theme_preference=user.theme_preference,\n                    chat_background=user.chat_background,\n                    default_app_mode=user.default_app_mode,\n                    voice_auto_send=user.voice_auto_send,\n                    voice_auto_playback=user.voice_auto_playback,\n                    voice_playback_speed=user.voice_playback_speed,\n                    assistant_specific_configs=assistant_specific_configs,\n                )\n            ),\n            team_name=team_name,\n            # set to None if TRACK_EXTERNAL_IDP_EXPIRY is False so that we avoid cases\n            # where they previously had this set + used OIDC, and now they switched to\n            # basic auth are now constantly getting redirected back to the login page\n            # since their \"oidc_expiry is old\"\n            oidc_expiry=user.oidc_expiry if TRACK_EXTERNAL_IDP_EXPIRY else None,\n            current_token_created_at=current_token_created_at,\n            current_token_expiry_length=expiry_length,\n            is_cloud_superuser=is_cloud_superuser,\n            is_anonymous_user=is_anonymous_user,\n            tenant_info=tenant_info,\n            personalization=UserPersonalization(\n                name=user.personal_name or \"\",\n                role=user.personal_role or \"\",\n                use_memories=user.use_memories,\n                enable_memory_tool=user.enable_memory_tool,\n                memories=memories or [],\n                user_preferences=user.user_preferences or \"\",\n            ),\n        )\n\n\nclass UserByEmail(BaseModel):\n    user_email: str\n\n\nclass UserRoleUpdateRequest(BaseModel):\n    user_email: str\n    new_role: UserRole\n    explicit_override: bool = False\n\n\nclass UserRoleResponse(BaseModel):\n    role: str\n\n\nclass BoostDoc(BaseModel):\n    document_id: str\n    semantic_id: str\n    link: str\n    boost: int\n    hidden: bool\n\n\nclass BoostUpdateRequest(BaseModel):\n    document_id: str\n    boost: int\n\n\nclass HiddenUpdateRequest(BaseModel):\n    document_id: str\n    hidden: bool\n\n\nclass AutoScrollRequest(BaseModel):\n    auto_scroll: bool | None\n\n\nclass ThemePreferenceRequest(BaseModel):\n    theme_preference: ThemePreference\n\n\nclass DefaultAppModeRequest(BaseModel):\n    default_app_mode: DefaultAppMode\n\n\nclass ChatBackgroundRequest(BaseModel):\n    chat_background: str | None\n\n\nclass VoiceSettingsUpdateRequest(BaseModel):\n    auto_send: bool | None = None\n    auto_playback: bool | None = None\n    playback_speed: float | None = Field(default=None, ge=0.5, le=2.0)\n\n\nclass PersonalizationUpdateRequest(BaseModel):\n    name: str | None = None\n    role: str | None = None\n    use_memories: bool | None = None\n    enable_memory_tool: bool | None = None\n    memories: list[MemoryItem] | None = None\n    user_preferences: str | None = Field(default=None, max_length=500)\n\n    @field_validator(\"memories\", mode=\"before\")\n    @classmethod\n    def validate_memory_count(\n        cls, value: list[MemoryItem] | None\n    ) -> list[MemoryItem] | None:\n        if value is not None and len(value) > MAX_MEMORIES_PER_USER:\n            raise ValueError(f\"Maximum of {MAX_MEMORIES_PER_USER} memories allowed\")\n        return value\n\n\nclass SlackBotCreationRequest(BaseModel):\n    name: str\n    enabled: bool\n\n    bot_token: str\n    app_token: str\n    user_token: str | None = None\n\n\nclass SlackBotTokens(BaseModel):\n    bot_token: str\n    app_token: str\n    user_token: str | None = None\n    model_config = ConfigDict(frozen=True)\n\n\n# TODO No longer in use, remove later\nclass SlackBotResponseType(str, Enum):\n    QUOTES = \"quotes\"\n    CITATIONS = \"citations\"\n\n\nclass SlackChannelConfigCreationRequest(BaseModel):\n    slack_bot_id: int\n    # currently, a persona is created for each Slack channel config\n    # in the future, `document_sets` will probably be replaced\n    # by an optional `PersonaSnapshot` object. Keeping it like this\n    # for now for simplicity / speed of development\n    document_sets: list[int] | None = None\n\n    # NOTE: only one of `document_sets` / `persona_id` should be set\n    persona_id: int | None = None\n\n    channel_name: str\n    respond_tag_only: bool = False\n    respond_to_bots: bool = False\n    is_ephemeral: bool = False\n    show_continue_in_web_ui: bool = False\n    enable_auto_filters: bool = False\n    # If no team members, assume respond in the channel to everyone\n    respond_member_group_list: list[str] = Field(default_factory=list)\n    answer_filters: list[AllowedAnswerFilters] = Field(default_factory=list)\n    # list of user emails\n    follow_up_tags: list[str] | None = None\n    response_type: SlackBotResponseType\n    # XXX this is going away soon\n    standard_answer_categories: list[int] = Field(default_factory=list)\n    disabled: bool = False\n\n    @field_validator(\"answer_filters\", mode=\"before\")\n    @classmethod\n    def validate_filters(cls, value: list[str]) -> list[str]:\n        if any(test not in VALID_SLACK_FILTERS for test in value):\n            raise ValueError(\n                f\"Slack Answer filters must be one of {VALID_SLACK_FILTERS}\"\n            )\n        return value\n\n    @model_validator(mode=\"after\")\n    def validate_document_sets_and_persona_id(\n        self,\n    ) -> \"SlackChannelConfigCreationRequest\":\n        if self.document_sets and self.persona_id:\n            raise ValueError(\"Only one of `document_sets` / `persona_id` should be set\")\n\n        return self\n\n\nclass SlackChannelConfig(BaseModel):\n    slack_bot_id: int\n    id: int\n    persona: PersonaSnapshot | None\n    channel_config: ChannelConfig\n    # XXX this is going away soon\n    standard_answer_categories: list[\"StandardAnswerCategory\"]\n    enable_auto_filters: bool\n    is_default: bool\n\n    @classmethod\n    def from_model(\n        cls, slack_channel_config_model: SlackChannelConfigModel\n    ) -> \"SlackChannelConfig\":\n        return cls(\n            id=slack_channel_config_model.id,\n            slack_bot_id=slack_channel_config_model.slack_bot_id,\n            persona=(\n                FullPersonaSnapshot.from_model(\n                    slack_channel_config_model.persona, allow_deleted=True\n                )\n                if slack_channel_config_model.persona\n                else None\n            ),\n            channel_config=slack_channel_config_model.channel_config,\n            # XXX this is going away soon\n            standard_answer_categories=[\n                StandardAnswerCategory.from_model(standard_answer_category_model)\n                for standard_answer_category_model in slack_channel_config_model.standard_answer_categories\n            ],\n            enable_auto_filters=slack_channel_config_model.enable_auto_filters,\n            is_default=slack_channel_config_model.is_default,\n        )\n\n\nclass SlackBot(BaseModel):\n    \"\"\"\n    This model is identical to the SlackAppModel, but it contains\n    a `configs_count` field to make it easier to fetch the number\n    of SlackChannelConfigs associated with a SlackBot.\n    \"\"\"\n\n    id: int\n    name: str\n    enabled: bool\n    configs_count: int\n\n    bot_token: str\n    app_token: str\n    user_token: str | None = None\n\n    @classmethod\n    def from_model(cls, slack_bot_model: SlackAppModel) -> \"SlackBot\":\n        return cls(\n            id=slack_bot_model.id,\n            name=slack_bot_model.name,\n            enabled=slack_bot_model.enabled,\n            configs_count=len(slack_bot_model.slack_channel_configs),\n            bot_token=(\n                slack_bot_model.bot_token.get_value(apply_mask=True)\n                if slack_bot_model.bot_token\n                else \"\"\n            ),\n            app_token=(\n                slack_bot_model.app_token.get_value(apply_mask=True)\n                if slack_bot_model.app_token\n                else \"\"\n            ),\n            user_token=(\n                slack_bot_model.user_token.get_value(apply_mask=True)\n                if slack_bot_model.user_token\n                else None\n            ),\n        )\n\n\nclass FullModelVersionResponse(BaseModel):\n    current_settings: SavedSearchSettings\n    secondary_settings: SavedSearchSettings | None\n\n\nclass AllUsersResponse(BaseModel):\n    accepted: list[FullUserSnapshot]\n    invited: list[InvitedUserSnapshot]\n    slack_users: list[FullUserSnapshot]\n    accepted_pages: int\n    invited_pages: int\n    slack_users_pages: int\n\n\nclass SlackChannel(BaseModel):\n    id: str\n    name: str\n\n\n\"\"\"\nStandard Answer Models\n\nee only, but needs to be here since it's imported by non-ee models.\n\"\"\"\n\n\nclass StandardAnswerCategoryCreationRequest(BaseModel):\n    name: str\n\n\nclass StandardAnswerCategory(BaseModel):\n    id: int\n    name: str\n\n    @classmethod\n    def from_model(\n        cls, standard_answer_category: StandardAnswerCategoryModel\n    ) -> \"StandardAnswerCategory\":\n        return cls(\n            id=standard_answer_category.id,\n            name=standard_answer_category.name,\n        )\n\n\nclass StandardAnswer(BaseModel):\n    id: int\n    keyword: str\n    answer: str\n    categories: list[StandardAnswerCategory]\n    match_regex: bool\n    match_any_keywords: bool\n\n    @classmethod\n    def from_model(cls, standard_answer_model: StandardAnswerModel) -> \"StandardAnswer\":\n        return cls(\n            id=standard_answer_model.id,\n            keyword=standard_answer_model.keyword,\n            answer=standard_answer_model.answer,\n            match_regex=standard_answer_model.match_regex,\n            match_any_keywords=standard_answer_model.match_any_keywords,\n            categories=[\n                StandardAnswerCategory.from_model(standard_answer_category_model)\n                for standard_answer_category_model in standard_answer_model.categories\n            ],\n        )\n\n\nclass StandardAnswerCreationRequest(BaseModel):\n    keyword: str\n    answer: str\n    categories: list[int]\n    match_regex: bool\n    match_any_keywords: bool\n\n    @field_validator(\"categories\", mode=\"before\")\n    @classmethod\n    def validate_categories(cls, value: list[int]) -> list[int]:\n        if len(value) < 1:\n            raise ValueError(\n                \"At least one category must be attached to a standard answer\"\n            )\n        return value\n\n    @model_validator(mode=\"after\")\n    def validate_only_match_any_if_not_regex(self) -> Any:\n        if self.match_regex and self.match_any_keywords:\n            raise ValueError(\n                \"Can only match any keywords in keyword mode, not regex mode\"\n            )\n\n        return self\n\n    @model_validator(mode=\"after\")\n    def validate_keyword_if_regex(self) -> Any:\n        if not self.match_regex:\n            # no validation for keywords\n            return self\n\n        try:\n            re.compile(self.keyword)\n            return self\n        except re.error as err:\n            if isinstance(err.pattern, bytes):\n                raise ValueError(\n                    f'invalid regex pattern r\"{err.pattern.decode()}\" in `keyword`: {err.msg}'\n                )\n            else:\n                pattern = f'r\"{err.pattern}\"' if err.pattern is not None else \"\"\n                raise ValueError(\n                    \" \".join(\n                        [\"invalid regex pattern\", pattern, f\"in `keyword`: {err.msg}\"]\n                    )\n                )\n\n\nclass ContainerVersions(BaseModel):\n    onyx: str\n    relational_db: str\n    index: str\n    nginx: str\n\n\nclass AllVersions(BaseModel):\n    stable: ContainerVersions\n    dev: ContainerVersions\n    migration: ContainerVersions\n"
  },
  {
    "path": "backend/onyx/server/manage/opensearch_migration/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.opensearch_migration import get_opensearch_migration_state\nfrom onyx.db.opensearch_migration import get_opensearch_retrieval_state\nfrom onyx.db.opensearch_migration import set_enable_opensearch_retrieval_with_commit\nfrom onyx.server.manage.opensearch_migration.models import (\n    OpenSearchMigrationStatusResponse,\n)\nfrom onyx.server.manage.opensearch_migration.models import (\n    OpenSearchRetrievalStatusRequest,\n)\nfrom onyx.server.manage.opensearch_migration.models import (\n    OpenSearchRetrievalStatusResponse,\n)\n\nadmin_router = APIRouter(prefix=\"/admin/opensearch-migration\")\n\n\n@admin_router.get(\"/status\")\ndef get_opensearch_migration_status(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> OpenSearchMigrationStatusResponse:\n    (\n        total_chunks_migrated,\n        created_at,\n        migration_completed_at,\n        approx_chunk_count_in_vespa,\n    ) = get_opensearch_migration_state(db_session)\n    return OpenSearchMigrationStatusResponse(\n        total_chunks_migrated=total_chunks_migrated,\n        created_at=created_at,\n        migration_completed_at=migration_completed_at,\n        approx_chunk_count_in_vespa=approx_chunk_count_in_vespa,\n    )\n\n\n@admin_router.get(\"/retrieval\")\ndef get_opensearch_retrieval_status(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> OpenSearchRetrievalStatusResponse:\n    enable_opensearch_retrieval = get_opensearch_retrieval_state(db_session)\n    return OpenSearchRetrievalStatusResponse(\n        enable_opensearch_retrieval=enable_opensearch_retrieval,\n    )\n\n\n@admin_router.put(\"/retrieval\")\ndef set_opensearch_retrieval_status(\n    request: OpenSearchRetrievalStatusRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> OpenSearchRetrievalStatusResponse:\n    set_enable_opensearch_retrieval_with_commit(\n        db_session, request.enable_opensearch_retrieval\n    )\n    return OpenSearchRetrievalStatusResponse(\n        enable_opensearch_retrieval=request.enable_opensearch_retrieval,\n    )\n"
  },
  {
    "path": "backend/onyx/server/manage/opensearch_migration/models.py",
    "content": "from datetime import datetime\n\nfrom pydantic import BaseModel\n\n\nclass OpenSearchMigrationStatusResponse(BaseModel):\n    model_config = {\"frozen\": True}\n    total_chunks_migrated: int\n    created_at: datetime | None\n    migration_completed_at: datetime | None\n    approx_chunk_count_in_vespa: int | None\n\n\nclass OpenSearchRetrievalStatusRequest(BaseModel):\n    model_config = {\"frozen\": True}\n    enable_opensearch_retrieval: bool\n\n\nclass OpenSearchRetrievalStatusResponse(BaseModel):\n    model_config = {\"frozen\": True}\n    enable_opensearch_retrieval: bool\n"
  },
  {
    "path": "backend/onyx/server/manage/search_settings.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import status\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP\nfrom onyx.context.search.models import SavedSearchSettings\nfrom onyx.context.search.models import SearchSettingsCreationRequest\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.connector_credential_pair import resync_cc_pair\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.index_attempt import expire_index_attempts\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import update_default_contextual_model\nfrom onyx.db.llm import update_no_default_contextual_rag_provider\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import create_search_settings\nfrom onyx.db.search_settings import delete_search_settings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import get_embedding_provider_from_provider_type\nfrom onyx.db.search_settings import get_secondary_search_settings\nfrom onyx.db.search_settings import update_current_search_settings\nfrom onyx.db.search_settings import update_search_settings_status\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.file_processing.unstructured import delete_unstructured_api_key\nfrom onyx.file_processing.unstructured import get_unstructured_api_key\nfrom onyx.file_processing.unstructured import update_unstructured_api_key\nfrom onyx.natural_language_processing.search_nlp_models import clean_model_name\nfrom onyx.server.manage.embedding.models import SearchSettingsDeleteRequest\nfrom onyx.server.manage.models import FullModelVersionResponse\nfrom onyx.server.models import IdReturn\nfrom onyx.server.utils_vector_db import require_vector_db\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import ALT_INDEX_SUFFIX\nfrom shared_configs.configs import MULTI_TENANT\n\nrouter = APIRouter(prefix=\"/search-settings\")\nlogger = setup_logger()\n\n\n@router.post(\"/set-new-search-settings\", dependencies=[Depends(require_vector_db)])\ndef set_new_search_settings(\n    search_settings_new: SearchSettingsCreationRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> IdReturn:\n    \"\"\"\n    Creates a new SearchSettings row and cancels the previous secondary indexing\n    if any exists.\n    \"\"\"\n    if search_settings_new.index_name:\n        logger.warning(\"Index name was specified by request, this is not suggested\")\n\n    # Disallow contextual RAG for cloud deployments.\n    if MULTI_TENANT and search_settings_new.enable_contextual_rag:\n        raise HTTPException(\n            status_code=status.HTTP_400_BAD_REQUEST,\n            detail=\"Contextual RAG disabled in Onyx Cloud\",\n        )\n\n    # Validate cloud provider exists or create new LiteLLM provider.\n    if search_settings_new.provider_type is not None:\n        cloud_provider = get_embedding_provider_from_provider_type(\n            db_session, provider_type=search_settings_new.provider_type\n        )\n\n        if cloud_provider is None:\n            raise HTTPException(\n                status_code=status.HTTP_400_BAD_REQUEST,\n                detail=f\"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}\",\n            )\n\n    validate_contextual_rag_model(\n        provider_name=search_settings_new.contextual_rag_llm_provider,\n        model_name=search_settings_new.contextual_rag_llm_name,\n        db_session=db_session,\n    )\n\n    search_settings = get_current_search_settings(db_session)\n\n    if search_settings_new.index_name is None:\n        # We define index name here.\n        index_name = f\"danswer_chunk_{clean_model_name(search_settings_new.model_name)}\"\n        if (\n            search_settings_new.model_name == search_settings.model_name\n            and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)\n        ):\n            index_name += ALT_INDEX_SUFFIX\n        search_values = search_settings_new.model_dump()\n        search_values[\"index_name\"] = index_name\n        new_search_settings_request = SavedSearchSettings(**search_values)\n    else:\n        new_search_settings_request = SavedSearchSettings(\n            **search_settings_new.model_dump()\n        )\n\n    secondary_search_settings = get_secondary_search_settings(db_session)\n\n    if secondary_search_settings:\n        # Cancel any background indexing jobs.\n        expire_index_attempts(\n            search_settings_id=secondary_search_settings.id, db_session=db_session\n        )\n\n        # Mark previous model as a past model directly.\n        update_search_settings_status(\n            search_settings=secondary_search_settings,\n            new_status=IndexModelStatus.PAST,\n            db_session=db_session,\n        )\n\n    new_search_settings = create_search_settings(\n        search_settings=new_search_settings_request, db_session=db_session\n    )\n\n    # Ensure the document indices have the new index immediately.\n    document_indices = get_all_document_indices(search_settings, new_search_settings)\n    for document_index in document_indices:\n        document_index.ensure_indices_exist(\n            primary_embedding_dim=search_settings.final_embedding_dim,\n            primary_embedding_precision=search_settings.embedding_precision,\n            secondary_index_embedding_dim=new_search_settings.final_embedding_dim,\n            secondary_index_embedding_precision=new_search_settings.embedding_precision,\n        )\n\n    # Pause index attempts for the currently in-use index to preserve resources.\n    if DISABLE_INDEX_UPDATE_ON_SWAP:\n        expire_index_attempts(\n            search_settings_id=search_settings.id, db_session=db_session\n        )\n        for cc_pair in get_connector_credential_pairs(db_session):\n            resync_cc_pair(\n                cc_pair=cc_pair,\n                search_settings_id=new_search_settings.id,\n                db_session=db_session,\n            )\n\n    db_session.commit()\n    return IdReturn(id=new_search_settings.id)\n\n\n@router.post(\"/cancel-new-embedding\", dependencies=[Depends(require_vector_db)])\ndef cancel_new_embedding(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    secondary_search_settings = get_secondary_search_settings(db_session)\n\n    if secondary_search_settings:\n        expire_index_attempts(\n            search_settings_id=secondary_search_settings.id, db_session=db_session\n        )\n\n        update_search_settings_status(\n            search_settings=secondary_search_settings,\n            new_status=IndexModelStatus.PAST,\n            db_session=db_session,\n        )\n\n        # remove the old index from the vector db\n        primary_search_settings = get_current_search_settings(db_session)\n        document_index = get_default_document_index(\n            primary_search_settings, None, db_session\n        )\n        document_index.ensure_indices_exist(\n            primary_embedding_dim=primary_search_settings.final_embedding_dim,\n            primary_embedding_precision=primary_search_settings.embedding_precision,\n            # just finished swap, no more secondary index\n            secondary_index_embedding_dim=None,\n            secondary_index_embedding_precision=None,\n        )\n\n\n@router.delete(\"/delete-search-settings\")\ndef delete_search_settings_endpoint(\n    deletion_request: SearchSettingsDeleteRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        delete_search_settings(\n            db_session=db_session,\n            search_settings_id=deletion_request.search_settings_id,\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))\n\n\n@router.get(\"/get-current-search-settings\")\ndef get_current_search_settings_endpoint(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SavedSearchSettings:\n    current_search_settings = get_current_search_settings(db_session)\n    return SavedSearchSettings.from_db_model(current_search_settings)\n\n\n@router.get(\"/get-secondary-search-settings\")\ndef get_secondary_search_settings_endpoint(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SavedSearchSettings | None:\n    secondary_search_settings = get_secondary_search_settings(db_session)\n    if not secondary_search_settings:\n        return None\n\n    return SavedSearchSettings.from_db_model(secondary_search_settings)\n\n\n@router.get(\"/get-all-search-settings\")\ndef get_all_search_settings(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> FullModelVersionResponse:\n    current_search_settings = get_current_search_settings(db_session)\n    secondary_search_settings = get_secondary_search_settings(db_session)\n    return FullModelVersionResponse(\n        current_settings=SavedSearchSettings.from_db_model(current_search_settings),\n        secondary_settings=(\n            SavedSearchSettings.from_db_model(secondary_search_settings)\n            if secondary_search_settings\n            else None\n        ),\n    )\n\n\n# Updates current non-reindex search settings\n@router.post(\"/update-inference-settings\")\ndef update_saved_search_settings(\n    search_settings: SavedSearchSettings,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    # Disallow contextual RAG for cloud deployments\n    if MULTI_TENANT and search_settings.enable_contextual_rag:\n        raise HTTPException(\n            status_code=status.HTTP_400_BAD_REQUEST,\n            detail=\"Contextual RAG disabled in Onyx Cloud\",\n        )\n\n    validate_contextual_rag_model(\n        provider_name=search_settings.contextual_rag_llm_provider,\n        model_name=search_settings.contextual_rag_llm_name,\n        db_session=db_session,\n    )\n\n    update_current_search_settings(\n        search_settings=search_settings, db_session=db_session\n    )\n\n    logger.info(\n        f\"Updated current search settings to {search_settings.model_dump_json()}\"\n    )\n\n    # Re-sync default to match PRESENT search settings\n    _sync_default_contextual_model(db_session)\n\n\n@router.get(\"/unstructured-api-key-set\")\ndef unstructured_api_key_set(\n    _: User = Depends(current_admin_user),\n) -> bool:\n    api_key = get_unstructured_api_key()\n    return api_key is not None\n\n\n@router.put(\"/upsert-unstructured-api-key\")\ndef upsert_unstructured_api_key(\n    unstructured_api_key: str,\n    _: User = Depends(current_admin_user),\n) -> None:\n    update_unstructured_api_key(unstructured_api_key)\n\n\n@router.delete(\"/delete-unstructured-api-key\")\ndef delete_unstructured_api_key_endpoint(\n    _: User = Depends(current_admin_user),\n) -> None:\n    delete_unstructured_api_key()\n\n\ndef validate_contextual_rag_model(\n    provider_name: str | None,\n    model_name: str | None,\n    db_session: Session,\n) -> None:\n    if error_msg := _validate_contextual_rag_model(\n        provider_name=provider_name,\n        model_name=model_name,\n        db_session=db_session,\n    ):\n        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)\n\n\ndef _validate_contextual_rag_model(\n    provider_name: str | None,\n    model_name: str | None,\n    db_session: Session,\n) -> str | None:\n    if provider_name is None and model_name is None:\n        return None\n    if not provider_name or not model_name:\n        return \"Provider name and model name are required\"\n\n    provider = fetch_existing_llm_provider(name=provider_name, db_session=db_session)\n    if not provider:\n        return f\"Provider {provider_name} not found\"\n    model_config = next(\n        (mc for mc in provider.model_configurations if mc.name == model_name), None\n    )\n    if not model_config:\n        return f\"Model {model_name} not found in provider {provider_name}\"\n\n    return None\n\n\ndef _sync_default_contextual_model(db_session: Session) -> None:\n    \"\"\"Syncs the default CONTEXTUAL_RAG flow to match the PRESENT search settings.\"\"\"\n    primary = get_current_search_settings(db_session)\n\n    try:\n        update_default_contextual_model(\n            db_session=db_session,\n            enable_contextual_rag=primary.enable_contextual_rag,\n            contextual_rag_llm_provider=primary.contextual_rag_llm_provider,\n            contextual_rag_llm_name=primary.contextual_rag_llm_name,\n        )\n    except ValueError as e:\n        logger.error(\n            f\"Error syncing default contextual model, defaulting to no contextual model: {e}\"\n        )\n        update_no_default_contextual_rag_provider(\n            db_session=db_session,\n        )\n"
  },
  {
    "path": "backend/onyx/server/manage/slack_bot.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.db.constants import SLACK_BOT_PERSONA_PREFIX\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import ChannelConfig\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.db.slack_bot import fetch_slack_bot\nfrom onyx.db.slack_bot import fetch_slack_bots\nfrom onyx.db.slack_bot import insert_slack_bot\nfrom onyx.db.slack_bot import remove_slack_bot\nfrom onyx.db.slack_bot import update_slack_bot\nfrom onyx.db.slack_channel_config import create_slack_channel_persona\nfrom onyx.db.slack_channel_config import fetch_slack_channel_config\nfrom onyx.db.slack_channel_config import fetch_slack_channel_configs\nfrom onyx.db.slack_channel_config import insert_slack_channel_config\nfrom onyx.db.slack_channel_config import remove_slack_channel_config\nfrom onyx.db.slack_channel_config import update_slack_channel_config\nfrom onyx.onyxbot.slack.config import validate_channel_name\nfrom onyx.server.manage.models import SlackBot\nfrom onyx.server.manage.models import SlackBotCreationRequest\nfrom onyx.server.manage.models import SlackChannelConfig\nfrom onyx.server.manage.models import SlackChannelConfigCreationRequest\nfrom onyx.server.manage.validate_tokens import validate_app_token\nfrom onyx.server.manage.validate_tokens import validate_bot_token\nfrom onyx.server.manage.validate_tokens import validate_user_token\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom shared_configs.contextvars import get_current_tenant_id\n\nSLACK_API_CHANNELS_PER_PAGE = 100\nSLACK_MAX_RETURNED_CHANNELS = 500\n\nlogger = setup_logger()\n\n\nrouter = APIRouter(prefix=\"/manage\")\n\n\ndef _form_channel_config(\n    db_session: Session,\n    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,\n    current_slack_channel_config_id: int | None,\n) -> ChannelConfig:\n    raw_channel_name = slack_channel_config_creation_request.channel_name\n    respond_tag_only = slack_channel_config_creation_request.respond_tag_only\n    respond_member_group_list = (\n        slack_channel_config_creation_request.respond_member_group_list\n    )\n    answer_filters = slack_channel_config_creation_request.answer_filters\n    follow_up_tags = slack_channel_config_creation_request.follow_up_tags\n\n    try:\n        cleaned_channel_name = validate_channel_name(\n            db_session=db_session,\n            channel_name=raw_channel_name,\n            current_slack_channel_config_id=current_slack_channel_config_id,\n            current_slack_bot_id=slack_channel_config_creation_request.slack_bot_id,\n        )\n    except ValueError as e:\n        raise HTTPException(\n            status_code=400,\n            detail=str(e),\n        )\n\n    if respond_tag_only and respond_member_group_list:\n        raise ValueError(\n            \"Cannot set OnyxBot to only respond to tags only and also respond to a predetermined set of users.\"\n        )\n\n    if (\n        slack_channel_config_creation_request.is_ephemeral\n        and slack_channel_config_creation_request.respond_member_group_list\n    ):\n        raise ValueError(\n            \"Cannot set OnyxBot to respond to users in a private (ephemeral) message \"\n            \"and also respond to a selected list of users.\"\n        )\n\n    channel_config: ChannelConfig = {\n        \"channel_name\": cleaned_channel_name,\n    }\n    if respond_tag_only is not None:\n        channel_config[\"respond_tag_only\"] = respond_tag_only\n    if respond_member_group_list:\n        channel_config[\"respond_member_group_list\"] = respond_member_group_list\n    if answer_filters:\n        channel_config[\"answer_filters\"] = answer_filters\n    if follow_up_tags is not None:\n        channel_config[\"follow_up_tags\"] = follow_up_tags\n\n    channel_config[\"show_continue_in_web_ui\"] = (\n        slack_channel_config_creation_request.show_continue_in_web_ui\n    )\n\n    channel_config[\"respond_to_bots\"] = (\n        slack_channel_config_creation_request.respond_to_bots\n    )\n\n    channel_config[\"is_ephemeral\"] = slack_channel_config_creation_request.is_ephemeral\n\n    channel_config[\"disabled\"] = slack_channel_config_creation_request.disabled\n\n    return channel_config\n\n\n@router.post(\"/admin/slack-app/channel\")\ndef create_slack_channel_config(\n    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> SlackChannelConfig:\n    channel_config = _form_channel_config(\n        db_session=db_session,\n        slack_channel_config_creation_request=slack_channel_config_creation_request,\n        current_slack_channel_config_id=None,\n    )\n\n    if channel_config[\"channel_name\"] is None:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Channel name is required\",\n        )\n\n    persona_id = None\n    if slack_channel_config_creation_request.persona_id is not None:\n        persona_id = slack_channel_config_creation_request.persona_id\n    elif slack_channel_config_creation_request.document_sets:\n        persona_id = create_slack_channel_persona(\n            db_session=db_session,\n            channel_name=channel_config[\"channel_name\"],\n            document_set_ids=slack_channel_config_creation_request.document_sets,\n            existing_persona_id=None,\n        ).id\n\n    slack_channel_config_model = insert_slack_channel_config(\n        db_session=db_session,\n        slack_bot_id=slack_channel_config_creation_request.slack_bot_id,\n        persona_id=persona_id,\n        channel_config=channel_config,\n        standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,\n        enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,\n    )\n    return SlackChannelConfig.from_model(slack_channel_config_model)\n\n\n@router.patch(\"/admin/slack-app/channel/{slack_channel_config_id}\")\ndef patch_slack_channel_config(\n    slack_channel_config_id: int,\n    slack_channel_config_creation_request: SlackChannelConfigCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> SlackChannelConfig:\n    channel_config = _form_channel_config(\n        db_session=db_session,\n        slack_channel_config_creation_request=slack_channel_config_creation_request,\n        current_slack_channel_config_id=slack_channel_config_id,\n    )\n\n    persona_id = None\n    if slack_channel_config_creation_request.persona_id is not None:\n        persona_id = slack_channel_config_creation_request.persona_id\n    elif slack_channel_config_creation_request.document_sets:\n        existing_slack_channel_config = fetch_slack_channel_config(\n            db_session=db_session, slack_channel_config_id=slack_channel_config_id\n        )\n        if existing_slack_channel_config is None:\n            raise HTTPException(\n                status_code=404,\n                detail=\"Slack channel config not found\",\n            )\n\n        existing_persona_id = existing_slack_channel_config.persona_id\n        if existing_persona_id is not None:\n            persona = get_persona_by_id(\n                persona_id=existing_persona_id,\n                user=None,\n                db_session=db_session,\n                is_for_edit=False,\n            )\n\n            if not persona.name.startswith(SLACK_BOT_PERSONA_PREFIX):\n                # Don't update actual non-slackbot specific personas\n                # Since this one specified document sets, we have to create a new persona\n                # for this OnyxBot config\n                existing_persona_id = None\n            else:\n                existing_persona_id = existing_slack_channel_config.persona_id\n\n        persona_id = create_slack_channel_persona(\n            db_session=db_session,\n            channel_name=channel_config[\"channel_name\"],\n            document_set_ids=slack_channel_config_creation_request.document_sets,\n            existing_persona_id=existing_persona_id,\n        ).id\n\n    slack_channel_config_model = update_slack_channel_config(\n        db_session=db_session,\n        slack_channel_config_id=slack_channel_config_id,\n        persona_id=persona_id,\n        channel_config=channel_config,\n        standard_answer_category_ids=slack_channel_config_creation_request.standard_answer_categories,\n        enable_auto_filters=slack_channel_config_creation_request.enable_auto_filters,\n        disabled=slack_channel_config_creation_request.disabled,\n    )\n    return SlackChannelConfig.from_model(slack_channel_config_model)\n\n\n@router.delete(\"/admin/slack-app/channel/{slack_channel_config_id}\")\ndef delete_slack_channel_config(\n    slack_channel_config_id: int,\n    db_session: Session = Depends(get_session),\n    user: User = Depends(current_admin_user),\n) -> None:\n    remove_slack_channel_config(\n        db_session=db_session,\n        slack_channel_config_id=slack_channel_config_id,\n        user=user,\n    )\n\n\n@router.get(\"/admin/slack-app/channel\")\ndef list_slack_channel_configs(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> list[SlackChannelConfig]:\n    slack_channel_config_models = fetch_slack_channel_configs(db_session=db_session)\n    return [\n        SlackChannelConfig.from_model(slack_channel_config_model)\n        for slack_channel_config_model in slack_channel_config_models\n    ]\n\n\n@router.post(\"/admin/slack-app/bots\")\ndef create_bot(\n    slack_bot_creation_request: SlackBotCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> SlackBot:\n    tenant_id = get_current_tenant_id()\n\n    validate_app_token(slack_bot_creation_request.app_token)\n    validate_bot_token(slack_bot_creation_request.bot_token)\n    validate_user_token(slack_bot_creation_request.user_token)\n\n    slack_bot_model = insert_slack_bot(\n        db_session=db_session,\n        name=slack_bot_creation_request.name,\n        enabled=slack_bot_creation_request.enabled,\n        bot_token=slack_bot_creation_request.bot_token,\n        app_token=slack_bot_creation_request.app_token,\n        user_token=slack_bot_creation_request.user_token,\n    )\n\n    # Create a default Slack channel config\n    default_channel_config = ChannelConfig(\n        channel_name=None,\n        respond_tag_only=True,\n    )\n    insert_slack_channel_config(\n        db_session=db_session,\n        slack_bot_id=slack_bot_model.id,\n        persona_id=None,\n        channel_config=default_channel_config,\n        standard_answer_category_ids=[],\n        enable_auto_filters=False,\n        is_default=True,\n    )\n\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=tenant_id,\n        event=MilestoneRecordType.CREATED_ONYX_BOT,\n    )\n\n    return SlackBot.from_model(slack_bot_model)\n\n\n@router.patch(\"/admin/slack-app/bots/{slack_bot_id}\")\ndef patch_bot(\n    slack_bot_id: int,\n    slack_bot_creation_request: SlackBotCreationRequest,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> SlackBot:\n    validate_bot_token(slack_bot_creation_request.bot_token)\n    validate_app_token(slack_bot_creation_request.app_token)\n    validate_user_token(slack_bot_creation_request.user_token)\n    slack_bot_model = update_slack_bot(\n        db_session=db_session,\n        slack_bot_id=slack_bot_id,\n        name=slack_bot_creation_request.name,\n        enabled=slack_bot_creation_request.enabled,\n        bot_token=slack_bot_creation_request.bot_token,\n        app_token=slack_bot_creation_request.app_token,\n        user_token=slack_bot_creation_request.user_token,\n    )\n    return SlackBot.from_model(slack_bot_model)\n\n\n@router.delete(\"/admin/slack-app/bots/{slack_bot_id}\")\ndef delete_bot(\n    slack_bot_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> None:\n    remove_slack_bot(\n        db_session=db_session,\n        slack_bot_id=slack_bot_id,\n    )\n\n\n@router.get(\"/admin/slack-app/bots/{slack_bot_id}\")\ndef get_bot_by_id(\n    slack_bot_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> SlackBot:\n    slack_bot_model = fetch_slack_bot(\n        db_session=db_session,\n        slack_bot_id=slack_bot_id,\n    )\n    return SlackBot.from_model(slack_bot_model)\n\n\n@router.get(\"/admin/slack-app/bots\")\ndef list_bots(\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> list[SlackBot]:\n    slack_bot_models = fetch_slack_bots(db_session=db_session)\n    return [\n        SlackBot.from_model(slack_bot_model) for slack_bot_model in slack_bot_models\n    ]\n\n\n@router.get(\"/admin/slack-app/bots/{bot_id}/config\")\ndef list_bot_configs(\n    bot_id: int,\n    db_session: Session = Depends(get_session),\n    _: User = Depends(current_admin_user),\n) -> list[SlackChannelConfig]:\n    slack_bot_config_models = fetch_slack_channel_configs(\n        db_session=db_session, slack_bot_id=bot_id\n    )\n    return [\n        SlackChannelConfig.from_model(slack_bot_config_model)\n        for slack_bot_config_model in slack_bot_config_models\n    ]\n"
  },
  {
    "path": "backend/onyx/server/manage/users.py",
    "content": "import csv\nimport io\nimport re\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import cast\nfrom uuid import UUID\n\nimport jwt\nfrom email_validator import EmailNotValidError\nfrom email_validator import EmailUndeliverableError\nfrom email_validator import validate_email\nfrom fastapi import APIRouter\nfrom fastapi import Body\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import Request\nfrom fastapi.responses import StreamingResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.anonymous_user import fetch_anonymous_user_info\nfrom onyx.auth.email_utils import send_user_email_invite\nfrom onyx.auth.invited_users import get_invited_users\nfrom onyx.auth.invited_users import remove_user_from_invited_users\nfrom onyx.auth.invited_users import write_invited_users\nfrom onyx.auth.permissions import get_effective_permissions\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.users import anonymous_user_enabled\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import enforce_seat_limit\nfrom onyx.auth.users import optional_user\nfrom onyx.configs.app_configs import AUTH_BACKEND\nfrom onyx.configs.app_configs import AUTH_TYPE\nfrom onyx.configs.app_configs import AuthBackend\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import EMAIL_CONFIGURED\nfrom onyx.configs.app_configs import ENABLE_EMAIL_INVITES\nfrom onyx.configs.app_configs import NUM_FREE_TRIAL_USER_INVITES\nfrom onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX\nfrom onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS\nfrom onyx.configs.app_configs import USER_AUTH_SECRET\nfrom onyx.configs.app_configs import VALID_EMAIL_DOMAINS\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.api_key import is_api_key_email_address\nfrom onyx.db.auth import get_live_users_count\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.user_preferences import activate_user\nfrom onyx.db.user_preferences import deactivate_user\nfrom onyx.db.user_preferences import get_all_user_assistant_specific_configs\nfrom onyx.db.user_preferences import get_latest_access_token_for_user\nfrom onyx.db.user_preferences import get_memories_for_user\nfrom onyx.db.user_preferences import update_assistant_preferences\nfrom onyx.db.user_preferences import update_user_assistant_visibility\nfrom onyx.db.user_preferences import update_user_auto_scroll\nfrom onyx.db.user_preferences import update_user_chat_background\nfrom onyx.db.user_preferences import update_user_default_app_mode\nfrom onyx.db.user_preferences import update_user_default_model\nfrom onyx.db.user_preferences import update_user_personalization\nfrom onyx.db.user_preferences import update_user_pinned_assistants\nfrom onyx.db.user_preferences import update_user_role\nfrom onyx.db.user_preferences import update_user_shortcut_enabled\nfrom onyx.db.user_preferences import update_user_temperature_override_enabled\nfrom onyx.db.user_preferences import update_user_theme_preference\nfrom onyx.db.users import batch_get_user_groups\nfrom onyx.db.users import delete_user_from_db\nfrom onyx.db.users import get_all_accepted_users\nfrom onyx.db.users import get_all_users\nfrom onyx.db.users import get_page_of_filtered_users\nfrom onyx.db.users import get_total_filtered_users_count\nfrom onyx.db.users import get_user_by_email\nfrom onyx.db.users import get_user_counts_by_role_and_status\nfrom onyx.db.users import validate_user_role_update\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.redis.redis_pool import get_raw_redis_client\nfrom onyx.server.documents.models import PaginatedReturn\nfrom onyx.server.features.projects.models import UserFileSnapshot\nfrom onyx.server.manage.models import AllUsersResponse\nfrom onyx.server.manage.models import AutoScrollRequest\nfrom onyx.server.manage.models import BulkInviteResponse\nfrom onyx.server.manage.models import ChatBackgroundRequest\nfrom onyx.server.manage.models import DefaultAppModeRequest\nfrom onyx.server.manage.models import EmailInviteStatus\nfrom onyx.server.manage.models import MemoryItem\nfrom onyx.server.manage.models import PersonalizationUpdateRequest\nfrom onyx.server.manage.models import TenantInfo\nfrom onyx.server.manage.models import TenantSnapshot\nfrom onyx.server.manage.models import ThemePreferenceRequest\nfrom onyx.server.manage.models import UserByEmail\nfrom onyx.server.manage.models import UserInfo\nfrom onyx.server.manage.models import UserPreferences\nfrom onyx.server.manage.models import UserRoleResponse\nfrom onyx.server.manage.models import UserRoleUpdateRequest\nfrom onyx.server.manage.models import UserSpecificAssistantPreference\nfrom onyx.server.manage.models import UserSpecificAssistantPreferences\nfrom onyx.server.models import FullUserSnapshot\nfrom onyx.server.models import InvitedUserSnapshot\nfrom onyx.server.models import MinimalUserSnapshot\nfrom onyx.server.models import UserGroupInfo\nfrom onyx.server.usage_limits import is_tenant_on_trial_fn\nfrom onyx.server.utils import BasicAuthenticationError\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\nrouter = APIRouter()\n\nUSERS_PAGE_SIZE = 10\n\n\n@router.patch(\"/manage/set-user-role\", tags=PUBLIC_API_TAGS)\ndef set_user_role(\n    user_role_update_request: UserRoleUpdateRequest,\n    current_user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_to_update = get_user_by_email(\n        email=user_role_update_request.user_email, db_session=db_session\n    )\n    if not user_to_update:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    current_role = user_to_update.role\n    requested_role = user_role_update_request.new_role\n    if requested_role == current_role:\n        return\n\n    # This will raise an exception if the role update is invalid\n    validate_user_role_update(\n        requested_role=requested_role,\n        current_role=current_role,\n        current_account_type=user_to_update.account_type,\n        explicit_override=user_role_update_request.explicit_override,\n    )\n\n    if user_to_update.id == current_user.id:\n        raise HTTPException(\n            status_code=400,\n            detail=\"An admin cannot demote themselves from admin role!\",\n        )\n\n    if requested_role == UserRole.CURATOR:\n        # Remove all curator db relationships before changing role\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.user_group\",\n            \"remove_curator_status__no_commit\",\n        )(db_session, user_to_update)\n\n    update_user_role(user_to_update, requested_role, db_session)\n\n\nclass TestUpsertRequest(BaseModel):\n    email: str\n\n\n@router.post(\"/manage/users/test-upsert-user\")\nasync def test_upsert_user(\n    request: TestUpsertRequest,\n    _: User = Depends(current_admin_user),\n) -> None | FullUserSnapshot:\n    \"\"\"Test endpoint for upsert_saml_user. Only used for integration testing.\"\"\"\n    user = await fetch_ee_implementation_or_noop(\n        \"onyx.server.saml\", \"upsert_saml_user\", None\n    )(email=request.email)\n    return FullUserSnapshot.from_user_model(user) if user else None\n\n\n@router.get(\"/manage/users/accepted\", tags=PUBLIC_API_TAGS)\ndef list_accepted_users(\n    q: str | None = Query(default=None),\n    page_num: int = Query(0, ge=0),\n    page_size: int = Query(10, ge=1, le=1000),\n    roles: list[UserRole] = Query(default=[]),\n    is_active: bool | None = Query(default=None),\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> PaginatedReturn[FullUserSnapshot]:\n    filtered_accepted_users = get_page_of_filtered_users(\n        db_session=db_session,\n        page_size=page_size,\n        page_num=page_num,\n        email_filter_string=q,\n        is_active_filter=is_active,\n        roles_filter=roles,\n    )\n\n    total_accepted_users_count = get_total_filtered_users_count(\n        db_session=db_session,\n        email_filter_string=q,\n        is_active_filter=is_active,\n        roles_filter=roles,\n    )\n\n    if not filtered_accepted_users:\n        logger.info(\"No users found\")\n        return PaginatedReturn(\n            items=[],\n            total_items=0,\n        )\n\n    user_ids = [user.id for user in filtered_accepted_users]\n    groups_by_user = batch_get_user_groups(db_session, user_ids)\n\n    # Batch-fetch SCIM mappings to mark synced users\n    scim_synced_ids: set[UUID] = set()\n    try:\n        from onyx.db.models import ScimUserMapping\n\n        scim_mappings = db_session.scalars(\n            select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))\n        ).all()\n        scim_synced_ids = set(scim_mappings)\n    except Exception:\n        logger.warning(\n            \"Failed to fetch SCIM mappings; marking all users as non-synced\",\n            exc_info=True,\n        )\n\n    return PaginatedReturn(\n        items=[\n            FullUserSnapshot.from_user_model(\n                user,\n                groups=[\n                    UserGroupInfo(id=gid, name=gname)\n                    for gid, gname in groups_by_user.get(user.id, [])\n                ],\n                is_scim_synced=user.id in scim_synced_ids,\n            )\n            for user in filtered_accepted_users\n        ],\n        total_items=total_accepted_users_count,\n    )\n\n\n@router.get(\"/manage/users/accepted/all\", tags=PUBLIC_API_TAGS)\ndef list_all_accepted_users(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[FullUserSnapshot]:\n    \"\"\"Returns all accepted users without pagination.\n    Used by the admin Users page for client-side filtering/sorting.\"\"\"\n    users = get_all_accepted_users(db_session=db_session)\n\n    if not users:\n        return []\n\n    user_ids = [user.id for user in users]\n    groups_by_user = batch_get_user_groups(db_session, user_ids)\n\n    # Batch-fetch SCIM mappings to mark synced users\n    scim_synced_ids: set[UUID] = set()\n    try:\n        from onyx.db.models import ScimUserMapping\n\n        scim_mappings = db_session.scalars(\n            select(ScimUserMapping.user_id).where(ScimUserMapping.user_id.in_(user_ids))\n        ).all()\n        scim_synced_ids = set(scim_mappings)\n    except Exception:\n        logger.warning(\n            \"Failed to fetch SCIM mappings; marking all users as non-synced\",\n            exc_info=True,\n        )\n\n    return [\n        FullUserSnapshot.from_user_model(\n            user,\n            groups=[\n                UserGroupInfo(id=gid, name=gname)\n                for gid, gname in groups_by_user.get(user.id, [])\n            ],\n            is_scim_synced=user.id in scim_synced_ids,\n        )\n        for user in users\n    ]\n\n\n@router.get(\"/manage/users/counts\")\ndef get_user_counts(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, dict[str, int]]:\n    return get_user_counts_by_role_and_status(db_session)\n\n\n@router.get(\"/manage/users/invited\", tags=PUBLIC_API_TAGS)\ndef list_invited_users(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[InvitedUserSnapshot]:\n    invited_emails = get_invited_users()\n\n    # Filter out users who are already active in the system\n    active_user_emails = {user.email for user in get_all_users(db_session)}\n    filtered_invited_emails = [\n        email for email in invited_emails if email not in active_user_emails\n    ]\n\n    return [InvitedUserSnapshot(email=email) for email in filtered_invited_emails]\n\n\n@router.get(\"/manage/users\", tags=PUBLIC_API_TAGS)\ndef list_all_users(\n    q: str | None = None,\n    accepted_page: int | None = None,\n    slack_users_page: int | None = None,\n    invited_page: int | None = None,\n    include_api_keys: bool = False,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> AllUsersResponse:\n    users = [\n        user\n        for user in get_all_users(db_session, email_filter_string=q)\n        if (include_api_keys or not is_api_key_email_address(user.email))\n    ]\n\n    slack_users = [user for user in users if user.account_type == AccountType.BOT]\n    accepted_users = [user for user in users if user.account_type != AccountType.BOT]\n\n    accepted_emails = {user.email for user in accepted_users}\n    slack_users_emails = {user.email for user in slack_users}\n    invited_emails = get_invited_users()\n\n    # Filter out users who are already active (either accepted or slack users)\n    all_active_emails = accepted_emails | slack_users_emails\n    invited_emails = [\n        email for email in invited_emails if email not in all_active_emails\n    ]\n\n    if q:\n        invited_emails = [\n            email for email in invited_emails if re.search(r\"{}\".format(q), email, re.I)\n        ]\n\n    accepted_count = len(accepted_emails)\n    slack_users_count = len(slack_users_emails)\n    invited_count = len(invited_emails)\n\n    # If any of q, accepted_page, or invited_page is None, return all users\n    if accepted_page is None or invited_page is None or slack_users_page is None:\n        return AllUsersResponse(\n            accepted=[\n                FullUserSnapshot.from_user_model(user) for user in accepted_users\n            ],\n            slack_users=[\n                FullUserSnapshot.from_user_model(user) for user in slack_users\n            ],\n            invited=[InvitedUserSnapshot(email=email) for email in invited_emails],\n            accepted_pages=1,\n            invited_pages=1,\n            slack_users_pages=1,\n        )\n\n    # Otherwise, return paginated results\n    return AllUsersResponse(\n        accepted=[FullUserSnapshot.from_user_model(user) for user in accepted_users][\n            accepted_page * USERS_PAGE_SIZE : (accepted_page + 1) * USERS_PAGE_SIZE\n        ],\n        slack_users=[FullUserSnapshot.from_user_model(user) for user in slack_users][\n            slack_users_page\n            * USERS_PAGE_SIZE : (slack_users_page + 1)\n            * USERS_PAGE_SIZE\n        ],\n        invited=[InvitedUserSnapshot(email=email) for email in invited_emails][\n            invited_page * USERS_PAGE_SIZE : (invited_page + 1) * USERS_PAGE_SIZE\n        ],\n        accepted_pages=(accepted_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,\n        invited_pages=(invited_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,\n        slack_users_pages=(slack_users_count + USERS_PAGE_SIZE - 1) // USERS_PAGE_SIZE,\n    )\n\n\n@router.get(\"/manage/users/download\")\ndef download_users_csv(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> StreamingResponse:\n    \"\"\"Download all users as a CSV file.\"\"\"\n    # Get all users from the database\n    users = get_all_users(db_session)\n\n    # Create CSV content in memory\n    output = io.StringIO()\n    writer = csv.writer(output)\n\n    # Write CSV header\n    writer.writerow([\"Email\", \"Role\", \"Status\"])\n\n    # Write user data\n    for user in users:\n        writer.writerow(\n            [\n                user.email,\n                user.role.value if user.role else \"\",\n                \"Active\" if user.is_active else \"Inactive\",\n            ]\n        )\n\n    # Prepare the CSV content for download\n    csv_content = output.getvalue()\n    output.close()\n\n    return StreamingResponse(\n        io.BytesIO(csv_content.encode(\"utf-8\")),\n        media_type=\"text/csv\",\n        headers={\"Content-Disposition\": \"attachment;\"},\n    )\n\n\n@router.put(\"/manage/admin/users\", tags=PUBLIC_API_TAGS)\ndef bulk_invite_users(\n    emails: list[str] = Body(..., embed=True),\n    current_user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> BulkInviteResponse:\n    \"\"\"emails are string validated. If any email fails validation, no emails are\n    invited and an exception is raised.\"\"\"\n    tenant_id = get_current_tenant_id()\n\n    new_invited_emails = []\n    email: str\n\n    try:\n        for email in emails:\n            # Allow syntactically valid emails without DNS deliverability checks; tests use test domains\n            email_info = validate_email(email, check_deliverability=False)\n            new_invited_emails.append(email_info.normalized)\n\n    except (EmailUndeliverableError, EmailNotValidError) as e:\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid email address: {email} - {str(e)}\",\n        )\n\n    # Count only new users (not already invited or existing) that need seats\n    existing_users = {user.email for user in get_all_users(db_session)}\n    already_invited = set(get_invited_users())\n    emails_needing_seats = [\n        e\n        for e in new_invited_emails\n        if e not in existing_users and e not in already_invited\n    ]\n\n    # Limit bulk invites for trial tenants to prevent email spam\n    # Only count new invites, not re-invites of existing users\n    if MULTI_TENANT and is_tenant_on_trial_fn(tenant_id):\n        current_invited = len(already_invited)\n        if current_invited + len(emails_needing_seats) > NUM_FREE_TRIAL_USER_INVITES:\n            raise HTTPException(\n                status_code=403,\n                detail=\"You have hit your invite limit. Please upgrade for unlimited invites.\",\n            )\n\n    # Check seat availability for new users\n    if emails_needing_seats:\n        enforce_seat_limit(db_session, seats_needed=len(emails_needing_seats))\n\n    if MULTI_TENANT:\n        try:\n            fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.provisioning\", \"add_users_to_tenant\", None\n            )(new_invited_emails, tenant_id)\n\n        except Exception as e:\n            logger.error(f\"Failed to add users to tenant {tenant_id}: {str(e)}\")\n\n    initial_invited_users = get_invited_users()\n\n    all_emails = list(set(new_invited_emails) | set(initial_invited_users))\n    number_of_invited_users = write_invited_users(all_emails)\n\n    # send out email invitations only to new users (not already invited or existing)\n    if not ENABLE_EMAIL_INVITES:\n        email_invite_status = EmailInviteStatus.DISABLED\n    elif not EMAIL_CONFIGURED:\n        email_invite_status = EmailInviteStatus.NOT_CONFIGURED\n    else:\n        try:\n            for email in emails_needing_seats:\n                send_user_email_invite(email, current_user, AUTH_TYPE)\n            email_invite_status = EmailInviteStatus.SENT\n        except Exception as e:\n            logger.error(f\"Error sending email invite to invited users: {e}\")\n            email_invite_status = EmailInviteStatus.SEND_FAILED\n\n    if MULTI_TENANT and not DEV_MODE:\n        # for billing purposes, write to the control plane about the number of new users\n        try:\n            logger.info(\"Registering tenant users\")\n            fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.billing\", \"register_tenant_users\", None\n            )(tenant_id, get_live_users_count(db_session))\n        except Exception as e:\n            logger.error(f\"Failed to register tenant users: {str(e)}\")\n            logger.info(\n                \"Reverting changes: removing users from tenant and resetting invited users\"\n            )\n            write_invited_users(initial_invited_users)  # Reset to original state\n            fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.user_mapping\", \"remove_users_from_tenant\", None\n            )(new_invited_emails, tenant_id)\n            raise e\n\n    return BulkInviteResponse(\n        invited_count=number_of_invited_users,\n        email_invite_status=email_invite_status,\n    )\n\n\n@router.patch(\"/manage/admin/remove-invited-user\", tags=PUBLIC_API_TAGS)\ndef remove_invited_user(\n    user_email: UserByEmail,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> int:\n    tenant_id = get_current_tenant_id()\n    if MULTI_TENANT:\n        fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.user_mapping\", \"remove_users_from_tenant\", None\n        )([user_email.user_email], tenant_id)\n    number_of_invited_users = remove_user_from_invited_users(user_email.user_email)\n\n    try:\n        if MULTI_TENANT and not DEV_MODE:\n            fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.billing\", \"register_tenant_users\", None\n            )(tenant_id, get_live_users_count(db_session))\n    except Exception:\n        logger.error(\n            \"Request to update number of seats taken in control plane failed. \"\n            \"This may cause synchronization issues/out of date enforcement of seat limits.\"\n        )\n        raise\n\n    return number_of_invited_users\n\n\n@router.patch(\"/manage/admin/deactivate-user\", tags=PUBLIC_API_TAGS)\ndef deactivate_user_api(\n    user_email: UserByEmail,\n    current_user: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    if current_user.email == user_email.user_email:\n        raise HTTPException(status_code=400, detail=\"You cannot deactivate yourself\")\n\n    user_to_deactivate = get_user_by_email(\n        email=user_email.user_email, db_session=db_session\n    )\n\n    if not user_to_deactivate:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    if user_to_deactivate.is_active is False:\n        logger.warning(\"{} is already deactivated\".format(user_to_deactivate.email))\n\n    deactivate_user(user_to_deactivate, db_session)\n\n    # Invalidate license cache so used_seats reflects the new count\n    # Only for self-hosted (non-multi-tenant) deployments\n    if not MULTI_TENANT:\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.license\", \"invalidate_license_cache\", None\n        )()\n\n\n@router.delete(\"/manage/admin/delete-user\", tags=PUBLIC_API_TAGS)\nasync def delete_user(\n    user_email: UserByEmail,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_to_delete = get_user_by_email(\n        email=user_email.user_email, db_session=db_session\n    )\n    if not user_to_delete:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    if user_to_delete.is_active is True:\n        logger.warning(\n            \"{} must be deactivated before deleting\".format(user_to_delete.email)\n        )\n        raise HTTPException(\n            status_code=400, detail=\"User must be deactivated before deleting\"\n        )\n\n    # Detach the user from the current session\n    db_session.expunge(user_to_delete)\n\n    try:\n        tenant_id = get_current_tenant_id()\n        fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.user_mapping\", \"remove_users_from_tenant\", None\n        )([user_email.user_email], tenant_id)\n        delete_user_from_db(user_to_delete, db_session)\n        logger.info(f\"Deleted user {user_to_delete.email}\")\n\n        # Invalidate license cache so used_seats reflects the new count\n        # Only for self-hosted (non-multi-tenant) deployments\n        if not MULTI_TENANT:\n            fetch_ee_implementation_or_noop(\n                \"onyx.db.license\", \"invalidate_license_cache\", None\n            )()\n\n    except Exception as e:\n        db_session.rollback()\n        logger.error(f\"Error deleting user {user_to_delete.email}: {str(e)}\")\n        raise HTTPException(status_code=500, detail=\"Error deleting user\")\n\n\n@router.patch(\"/manage/admin/activate-user\", tags=PUBLIC_API_TAGS)\ndef activate_user_api(\n    user_email: UserByEmail,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_to_activate = get_user_by_email(\n        email=user_email.user_email, db_session=db_session\n    )\n    if not user_to_activate:\n        raise HTTPException(status_code=404, detail=\"User not found\")\n\n    if user_to_activate.is_active is True:\n        logger.warning(\"{} is already activated\".format(user_to_activate.email))\n        return\n\n    # Check seat availability before activating\n    # Only for self-hosted (non-multi-tenant) deployments\n    enforce_seat_limit(db_session)\n\n    activate_user(user_to_activate, db_session)\n\n    # Invalidate license cache so used_seats reflects the new count\n    # Only for self-hosted (non-multi-tenant) deployments\n    if not MULTI_TENANT:\n        fetch_ee_implementation_or_noop(\n            \"onyx.db.license\", \"invalidate_license_cache\", None\n        )()\n\n\n@router.get(\"/manage/admin/valid-domains\")\ndef get_valid_domains(\n    _: User = Depends(current_admin_user),\n) -> list[str]:\n    return VALID_EMAIL_DOMAINS\n\n\n\"\"\"Endpoints for all\"\"\"\n\n\n@router.get(\"/users\", tags=PUBLIC_API_TAGS)\ndef list_all_users_basic_info(\n    include_api_keys: bool = False,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[MinimalUserSnapshot]:\n    users = get_all_users(db_session)\n    return [\n        MinimalUserSnapshot(id=user.id, email=user.email)\n        for user in users\n        if user.account_type != AccountType.BOT\n        and (include_api_keys or not is_api_key_email_address(user.email))\n    ]\n\n\n@router.get(\"/get-user-role\", tags=PUBLIC_API_TAGS)\nasync def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse:\n    return UserRoleResponse(role=user.role)\n\n\ndef get_current_auth_token_creation_redis(\n    user: User, request: Request\n) -> datetime | None:\n    \"\"\"Calculate the token creation time from Redis TTL information.\n\n    This function retrieves the authentication token from cookies,\n    checks its TTL in Redis, and calculates when the token was created.\n    Despite the function name, it returns the token creation time, not the expiration time.\n    \"\"\"\n    # Anonymous users don't have auth tokens\n    if user.is_anonymous:\n        return None\n    try:\n        # Get the token from the request\n        token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)\n        if not token:\n            logger.debug(\"No auth token cookie found\")\n            return None\n\n        # Get the Redis client\n        redis = get_raw_redis_client()\n        redis_key = REDIS_AUTH_KEY_PREFIX + token\n\n        # Get the TTL of the token\n        ttl = cast(int, redis.ttl(redis_key))\n        if ttl <= 0:\n            logger.error(\"Token has expired or doesn't exist in Redis\")\n            return None\n\n        # Calculate the creation time based on TTL and session expiry\n        # Current time minus (total session length minus remaining TTL)\n        current_time = datetime.now(timezone.utc)\n        token_creation_time = current_time - timedelta(\n            seconds=(SESSION_EXPIRE_TIME_SECONDS - ttl)\n        )\n\n        return token_creation_time\n\n    except Exception as e:\n        logger.error(f\"Error retrieving token expiration from Redis: {e}\")\n        return None\n\n\ndef get_current_token_creation_postgres(\n    user: User, db_session: Session\n) -> datetime | None:\n    # Anonymous users don't have auth tokens\n    if user.is_anonymous:\n        return None\n\n    access_token = get_latest_access_token_for_user(user.id, db_session)\n    if access_token:\n        return access_token.created_at\n    else:\n        logger.error(\"No AccessToken found for user\")\n        return None\n\n\ndef get_current_token_creation_jwt(user: User, request: Request) -> datetime | None:\n    \"\"\"Extract token creation time from the ``iat`` claim of a JWT cookie.\"\"\"\n    if user.is_anonymous:\n        return None\n\n    token = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)\n    if not token:\n        return None\n\n    try:\n        payload = jwt.decode(\n            token,\n            USER_AUTH_SECRET,\n            algorithms=[\"HS256\"],\n            audience=[\"fastapi-users:auth\"],\n        )\n        iat = payload.get(\"iat\")\n        if iat is None:\n            return None\n        return datetime.fromtimestamp(iat, tz=timezone.utc)\n    except jwt.PyJWTError:\n        logger.error(\"Failed to decode JWT for iat claim\")\n        return None\n\n\ndef _get_token_created_at(\n    user: User, request: Request, db_session: Session\n) -> datetime | None:\n    if AUTH_BACKEND == AuthBackend.REDIS:\n        return get_current_auth_token_creation_redis(user, request)\n    if AUTH_BACKEND == AuthBackend.JWT:\n        return get_current_token_creation_jwt(user, request)\n    return get_current_token_creation_postgres(user, db_session)\n\n\n@router.get(\"/me/permissions\", tags=PUBLIC_API_TAGS)\ndef get_current_user_permissions(\n    user: User = Depends(current_user),\n) -> list[str]:\n    return sorted(p.value for p in get_effective_permissions(user))\n\n\n@router.get(\"/me\", tags=PUBLIC_API_TAGS)\ndef verify_user_logged_in(\n    request: Request,\n    user: User | None = Depends(optional_user),\n    db_session: Session = Depends(get_session),\n) -> UserInfo:\n    tenant_id = get_current_tenant_id()\n\n    # User can be None if not authenticated.\n    # We use optional_user to allow unverified users to access this endpoint.\n    if user is None:\n        # If anonymous access is enabled, return anonymous user info\n        if anonymous_user_enabled(tenant_id=tenant_id):\n            store = get_kv_store()\n            return fetch_anonymous_user_info(store)\n        raise BasicAuthenticationError(detail=\"Unauthorized\")\n\n    if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc):\n        raise BasicAuthenticationError(\n            detail=\"Access denied. User's OIDC token has expired.\",\n        )\n\n    token_created_at = _get_token_created_at(user, request, db_session)\n\n    team_name = fetch_ee_implementation_or_noop(\n        \"onyx.server.tenants.user_mapping\", \"get_tenant_id_for_email\", None\n    )(user.email)\n\n    new_tenant: TenantSnapshot | None = None\n    tenant_invitation: TenantSnapshot | None = None\n\n    if MULTI_TENANT:\n        if team_name != get_current_tenant_id():\n            user_count = fetch_ee_implementation_or_noop(\n                \"onyx.server.tenants.user_mapping\", \"get_tenant_count\", None\n            )(team_name)\n            new_tenant = TenantSnapshot(tenant_id=team_name, number_of_users=user_count)\n\n        tenant_invitation = fetch_ee_implementation_or_noop(\n            \"onyx.server.tenants.user_mapping\", \"get_tenant_invitation\", None\n        )(user.email)\n\n    super_users_list = cast(\n        list[str],\n        fetch_versioned_implementation_with_fallback(\n            \"onyx.configs.app_configs\",\n            \"SUPER_USERS\",\n            [],\n        ),\n    )\n    memories = [\n        MemoryItem(id=memory.id, content=memory.memory_text)\n        for memory in get_memories_for_user(user.id, db_session)\n    ]\n\n    user_info = UserInfo.from_model(\n        user,\n        current_token_created_at=token_created_at,\n        expiry_length=SESSION_EXPIRE_TIME_SECONDS,\n        is_cloud_superuser=user.email in super_users_list,\n        team_name=team_name,\n        tenant_info=TenantInfo(\n            new_tenant=new_tenant,\n            invitation=tenant_invitation,\n        ),\n        memories=memories,\n    )\n\n    return user_info\n\n\n\"\"\"APIs to adjust user preferences\"\"\"\n\n\n@router.patch(\"/temperature-override-enabled\")\ndef update_user_temperature_override_enabled_api(\n    temperature_override_enabled: bool,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_temperature_override_enabled(\n        user.id, temperature_override_enabled, db_session\n    )\n\n\nclass ChosenDefaultModelRequest(BaseModel):\n    default_model: str | None = None\n\n\n@router.patch(\"/shortcut-enabled\")\ndef update_user_shortcut_enabled_api(\n    shortcut_enabled: bool,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_shortcut_enabled(user.id, shortcut_enabled, db_session)\n\n\n@router.patch(\"/auto-scroll\")\ndef update_user_auto_scroll_api(\n    request: AutoScrollRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_auto_scroll(user.id, request.auto_scroll, db_session)\n\n\n@router.patch(\"/user/theme-preference\")\ndef update_user_theme_preference_api(\n    request: ThemePreferenceRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_theme_preference(user.id, request.theme_preference, db_session)\n\n\n@router.patch(\"/user/chat-background\")\ndef update_user_chat_background_api(\n    request: ChatBackgroundRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_chat_background(user.id, request.chat_background, db_session)\n\n\n@router.patch(\"/user/default-app-mode\")\ndef update_user_default_app_mode_api(\n    request: DefaultAppModeRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_default_app_mode(user.id, request.default_app_mode, db_session)\n\n\n@router.patch(\"/user/default-model\")\ndef update_user_default_model_api(\n    request: ChosenDefaultModelRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_user_default_model(user.id, request.default_model, db_session)\n\n\n@router.patch(\"/user/personalization\")\ndef update_user_personalization_api(\n    request: PersonalizationUpdateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    new_name = request.name if request.name is not None else user.personal_name\n    new_role = request.role if request.role is not None else user.personal_role\n    current_use_memories = user.use_memories\n    new_use_memories = (\n        request.use_memories\n        if request.use_memories is not None\n        else current_use_memories\n    )\n    new_enable_memory_tool = (\n        request.enable_memory_tool\n        if request.enable_memory_tool is not None\n        else user.enable_memory_tool\n    )\n    existing_memories = [\n        MemoryItem(id=memory.id, content=memory.memory_text)\n        for memory in get_memories_for_user(user.id, db_session)\n    ]\n    new_memories = (\n        request.memories if request.memories is not None else existing_memories\n    )\n    new_user_preferences = (\n        request.user_preferences\n        if request.user_preferences is not None\n        else user.user_preferences\n    )\n\n    update_user_personalization(\n        user.id,\n        personal_name=new_name,\n        personal_role=new_role,\n        use_memories=new_use_memories,\n        enable_memory_tool=new_enable_memory_tool,\n        memories=new_memories,\n        user_preferences=new_user_preferences,\n        db_session=db_session,\n    )\n\n\nclass ReorderPinnedAssistantsRequest(BaseModel):\n    ordered_assistant_ids: list[int]\n\n\n@router.patch(\"/user/pinned-assistants\")\ndef update_user_pinned_assistants_api(\n    request: ReorderPinnedAssistantsRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    ordered_assistant_ids = request.ordered_assistant_ids\n    update_user_pinned_assistants(user.id, ordered_assistant_ids, db_session)\n\n\nclass ChosenAssistantsRequest(BaseModel):\n    chosen_assistants: list[int]\n\n\ndef update_assistant_visibility(\n    preferences: UserPreferences, assistant_id: int, show: bool\n) -> UserPreferences:\n    visible_assistants = preferences.visible_assistants or []\n    hidden_assistants = preferences.hidden_assistants or []\n\n    if show:\n        if assistant_id not in visible_assistants:\n            visible_assistants.append(assistant_id)\n        if assistant_id in hidden_assistants:\n            hidden_assistants.remove(assistant_id)\n    else:\n        if assistant_id in visible_assistants:\n            visible_assistants.remove(assistant_id)\n        if assistant_id not in hidden_assistants:\n            hidden_assistants.append(assistant_id)\n\n    preferences.visible_assistants = visible_assistants\n    preferences.hidden_assistants = hidden_assistants\n    return preferences\n\n\n@router.patch(\"/user/assistant-list/update/{assistant_id}\")\ndef update_user_assistant_visibility_api(\n    assistant_id: int,\n    show: bool,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_preferences = UserInfo.from_model(user).preferences\n    updated_preferences = update_assistant_visibility(\n        user_preferences, assistant_id, show\n    )\n    if updated_preferences.chosen_assistants is not None:\n        updated_preferences.chosen_assistants.append(assistant_id)\n    update_user_assistant_visibility(\n        user.id,\n        updated_preferences.hidden_assistants,\n        updated_preferences.visible_assistants,\n        updated_preferences.chosen_assistants,\n        db_session,\n    )\n\n\n@router.get(\"/user/assistant/preferences\")\ndef get_user_assistant_preferences(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserSpecificAssistantPreferences | None:\n    \"\"\"Fetch all assistant preferences for the user.\"\"\"\n    assistant_specific_configs = get_all_user_assistant_specific_configs(\n        user.id, db_session\n    )\n    return {\n        config.assistant_id: UserSpecificAssistantPreference(\n            disabled_tool_ids=config.disabled_tool_ids\n        )\n        for config in assistant_specific_configs\n    }\n\n\n@router.patch(\"/user/assistant/{assistant_id}/preferences\")\ndef update_assistant_preferences_for_user_api(\n    assistant_id: int,\n    new_assistant_preference: UserSpecificAssistantPreference,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    update_assistant_preferences(\n        assistant_id, user.id, new_assistant_preference, db_session\n    )\n    db_session.commit()\n\n\n@router.get(\"/user/files/recent\")\ndef get_recent_files(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[UserFileSnapshot]:\n    user_id = user.id\n    user_files = (\n        db_session.query(UserFile)\n        .filter(UserFile.user_id == user_id)\n        .filter(UserFile.status != UserFileStatus.FAILED)\n        .filter(UserFile.status != UserFileStatus.DELETING)\n        .order_by(UserFile.last_accessed_at.desc())\n        .all()\n    )\n\n    return [UserFileSnapshot.from_model(user_file) for user_file in user_files]\n"
  },
  {
    "path": "backend/onyx/server/manage/validate_tokens.py",
    "content": "import requests\nfrom fastapi import HTTPException\n\nfrom onyx.configs.constants import SLACK_USER_TOKEN_PREFIX\n\nSLACK_API_URL = \"https://slack.com/api/auth.test\"\nSLACK_CONNECTIONS_OPEN_URL = \"https://slack.com/api/apps.connections.open\"\n\n\ndef validate_bot_token(bot_token: str) -> bool:\n    headers = {\"Authorization\": f\"Bearer {bot_token}\"}\n    response = requests.post(SLACK_API_URL, headers=headers)\n\n    if response.status_code != 200:\n        raise HTTPException(\n            status_code=500, detail=\"Error communicating with Slack API.\"\n        )\n\n    data = response.json()\n    if not data.get(\"ok\", False):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid bot token: {data.get('error', 'Unknown error')}\",\n        )\n\n    return True\n\n\ndef validate_app_token(app_token: str) -> bool:\n    headers = {\"Authorization\": f\"Bearer {app_token}\"}\n    response = requests.post(SLACK_CONNECTIONS_OPEN_URL, headers=headers)\n\n    if response.status_code != 200:\n        raise HTTPException(\n            status_code=500, detail=\"Error communicating with Slack API.\"\n        )\n\n    data = response.json()\n    if not data.get(\"ok\", False):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid app token: {data.get('error', 'Unknown error')}\",\n        )\n\n    return True\n\n\ndef validate_user_token(user_token: str | None) -> None:\n    \"\"\"\n    Validate that the user_token is a valid user OAuth token (xoxp-...)\n    and not a bot token (xoxb-...)\n    Args:\n        user_token: The user OAuth token to validate.\n    Returns:\n        None is valid and will return successfully.\n    Raises:\n        HTTPException: If the token is invalid or missing required fields\n    \"\"\"\n    if not user_token:\n        # user_token is optional, so None or empty string is valid\n        return\n\n    if not user_token.startswith(SLACK_USER_TOKEN_PREFIX):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid user token format. User OAuth tokens must start with '{SLACK_USER_TOKEN_PREFIX}'\",\n        )\n\n    # Test the token with Slack API to ensure it's valid\n    headers = {\"Authorization\": f\"Bearer {user_token}\"}\n    response = requests.post(SLACK_API_URL, headers=headers)\n\n    if response.status_code != 200:\n        raise HTTPException(\n            status_code=500, detail=\"Error communicating with Slack API.\"\n        )\n\n    data = response.json()\n    if not data.get(\"ok\", False):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"Invalid user token: {data.get('error', 'Unknown error')}\",\n        )\n"
  },
  {
    "path": "backend/onyx/server/manage/voice/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/manage/voice/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import Response\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import LLMProvider as LLMProviderModel\nfrom onyx.db.models import User\nfrom onyx.db.models import VoiceProvider\nfrom onyx.db.voice import deactivate_stt_provider\nfrom onyx.db.voice import deactivate_tts_provider\nfrom onyx.db.voice import delete_voice_provider\nfrom onyx.db.voice import fetch_voice_provider_by_id\nfrom onyx.db.voice import fetch_voice_provider_by_type\nfrom onyx.db.voice import fetch_voice_providers\nfrom onyx.db.voice import set_default_stt_provider\nfrom onyx.db.voice import set_default_tts_provider\nfrom onyx.db.voice import upsert_voice_provider\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.server.manage.voice.models import VoiceOption\nfrom onyx.server.manage.voice.models import VoiceProviderTestRequest\nfrom onyx.server.manage.voice.models import VoiceProviderUpdateSuccess\nfrom onyx.server.manage.voice.models import VoiceProviderUpsertRequest\nfrom onyx.server.manage.voice.models import VoiceProviderView\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.url import SSRFException\nfrom onyx.utils.url import validate_outbound_http_url\nfrom onyx.voice.factory import get_voice_provider\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/voice\")\n\nVOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE = (\n    \"Connection test failed. Please verify your API key and settings.\"\n)\n\n\ndef _validate_voice_api_base(provider_type: str, api_base: str | None) -> str | None:\n    \"\"\"Validate and normalize provider api_base / target URI.\"\"\"\n    if api_base is None:\n        return None\n\n    allow_private_network = provider_type.lower() == \"azure\"\n    try:\n        return validate_outbound_http_url(\n            api_base, allow_private_network=allow_private_network\n        )\n    except (ValueError, SSRFException) as e:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            f\"Invalid target URI: {str(e)}\",\n        ) from e\n\n\ndef _provider_to_view(provider: VoiceProvider) -> VoiceProviderView:\n    \"\"\"Convert a VoiceProvider model to a VoiceProviderView.\"\"\"\n    return VoiceProviderView(\n        id=provider.id,\n        name=provider.name,\n        provider_type=provider.provider_type,\n        is_default_stt=provider.is_default_stt,\n        is_default_tts=provider.is_default_tts,\n        stt_model=provider.stt_model,\n        tts_model=provider.tts_model,\n        default_voice=provider.default_voice,\n        has_api_key=bool(provider.api_key),\n        target_uri=provider.api_base,  # api_base stores the target URI for Azure\n    )\n\n\n@admin_router.get(\"/providers\")\ndef list_voice_providers(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[VoiceProviderView]:\n    \"\"\"List all configured voice providers.\"\"\"\n    providers = fetch_voice_providers(db_session)\n    return [_provider_to_view(provider) for provider in providers]\n\n\n@admin_router.post(\"/providers\")\nasync def upsert_voice_provider_endpoint(\n    request: VoiceProviderUpsertRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderView:\n    \"\"\"Create or update a voice provider.\"\"\"\n    api_key = request.api_key\n    api_key_changed = request.api_key_changed\n\n    # If llm_provider_id is specified, copy the API key from that LLM provider\n    if request.llm_provider_id is not None:\n        llm_provider = db_session.get(LLMProviderModel, request.llm_provider_id)\n        if llm_provider is None:\n            raise OnyxError(\n                OnyxErrorCode.NOT_FOUND,\n                f\"LLM provider with id {request.llm_provider_id} not found.\",\n            )\n        if llm_provider.api_key is None:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                \"Selected LLM provider has no API key configured.\",\n            )\n        api_key = llm_provider.api_key.get_value(apply_mask=False)\n        api_key_changed = True\n\n    # Use target_uri if provided, otherwise fall back to api_base\n    api_base = _validate_voice_api_base(\n        request.provider_type, request.target_uri or request.api_base\n    )\n\n    provider = upsert_voice_provider(\n        db_session=db_session,\n        provider_id=request.id,\n        name=request.name,\n        provider_type=request.provider_type,\n        api_key=api_key,\n        api_key_changed=api_key_changed,\n        api_base=api_base,\n        custom_config=request.custom_config,\n        stt_model=request.stt_model,\n        tts_model=request.tts_model,\n        default_voice=request.default_voice,\n        activate_stt=request.activate_stt,\n        activate_tts=request.activate_tts,\n    )\n\n    # Validate credentials before committing - rollback on failure\n    try:\n        voice_provider = get_voice_provider(provider)\n        await voice_provider.validate_credentials()\n    except OnyxError:\n        db_session.rollback()\n        raise\n    except Exception as e:\n        db_session.rollback()\n        logger.error(f\"Voice provider credential validation failed on save: {e}\")\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            VOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE,\n        ) from e\n\n    db_session.commit()\n\n    return _provider_to_view(provider)\n\n\n@admin_router.delete(\n    \"/providers/{provider_id}\", status_code=204, response_class=Response\n)\ndef delete_voice_provider_endpoint(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    \"\"\"Delete a voice provider.\"\"\"\n    delete_voice_provider(db_session, provider_id)\n    db_session.commit()\n    return Response(status_code=204)\n\n\n@admin_router.post(\"/providers/{provider_id}/activate-stt\")\ndef activate_stt_provider_endpoint(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderView:\n    \"\"\"Set a voice provider as the default STT provider.\"\"\"\n    provider = set_default_stt_provider(db_session=db_session, provider_id=provider_id)\n    db_session.commit()\n    return _provider_to_view(provider)\n\n\n@admin_router.post(\"/providers/{provider_id}/deactivate-stt\")\ndef deactivate_stt_provider_endpoint(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderUpdateSuccess:\n    \"\"\"Remove the default STT status from a voice provider.\"\"\"\n    deactivate_stt_provider(db_session=db_session, provider_id=provider_id)\n    db_session.commit()\n    return VoiceProviderUpdateSuccess()\n\n\n@admin_router.post(\"/providers/{provider_id}/activate-tts\")\ndef activate_tts_provider_endpoint(\n    provider_id: int,\n    tts_model: str | None = None,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderView:\n    \"\"\"Set a voice provider as the default TTS provider.\"\"\"\n    provider = set_default_tts_provider(\n        db_session=db_session, provider_id=provider_id, tts_model=tts_model\n    )\n    db_session.commit()\n    return _provider_to_view(provider)\n\n\n@admin_router.post(\"/providers/{provider_id}/deactivate-tts\")\ndef deactivate_tts_provider_endpoint(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderUpdateSuccess:\n    \"\"\"Remove the default TTS status from a voice provider.\"\"\"\n    deactivate_tts_provider(db_session=db_session, provider_id=provider_id)\n    db_session.commit()\n    return VoiceProviderUpdateSuccess()\n\n\n@admin_router.post(\"/providers/test\")\nasync def test_voice_provider(\n    request: VoiceProviderTestRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceProviderUpdateSuccess:\n    \"\"\"Test a voice provider connection by making a real API call.\"\"\"\n    api_key = request.api_key\n\n    if request.use_stored_key:\n        existing_provider = fetch_voice_provider_by_type(\n            db_session, request.provider_type\n        )\n        if existing_provider is None or not existing_provider.api_key:\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                \"No stored API key found for this provider type.\",\n            )\n        api_key = existing_provider.api_key.get_value(apply_mask=False)\n\n    if not api_key:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"API key is required. Either provide api_key or set use_stored_key to true.\",\n        )\n\n    # Use target_uri if provided, otherwise fall back to api_base\n    api_base = _validate_voice_api_base(\n        request.provider_type, request.target_uri or request.api_base\n    )\n\n    # Create a temporary VoiceProvider for testing (not saved to DB)\n    temp_provider = VoiceProvider(\n        name=\"__test__\",\n        provider_type=request.provider_type,\n        api_base=api_base,\n        custom_config=request.custom_config or {},\n    )\n    temp_provider.api_key = api_key  # type: ignore[assignment]\n\n    try:\n        provider = get_voice_provider(temp_provider)\n    except ValueError as exc:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc\n\n    # Validate credentials with a real API call\n    try:\n        await provider.validate_credentials()\n    except OnyxError:\n        raise\n    except Exception as e:\n        logger.error(f\"Voice provider connection test failed: {e}\")\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            VOICE_PROVIDER_VALIDATION_FAILURE_MESSAGE,\n        ) from e\n\n    logger.info(f\"Voice provider test succeeded for {request.provider_type}.\")\n    return VoiceProviderUpdateSuccess()\n\n\n@admin_router.get(\"/providers/{provider_id}/voices\")\ndef get_provider_voices(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[VoiceOption]:\n    \"\"\"Get available voices for a provider.\"\"\"\n    provider_db = fetch_voice_provider_by_id(db_session, provider_id)\n    if provider_db is None:\n        raise OnyxError(OnyxErrorCode.NOT_FOUND, \"Voice provider not found.\")\n\n    if not provider_db.api_key:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR, \"Provider has no API key configured.\"\n        )\n\n    try:\n        provider = get_voice_provider(provider_db)\n    except ValueError as exc:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc\n\n    return [VoiceOption(**voice) for voice in provider.get_available_voices()]\n\n\n@admin_router.get(\"/voices\")\ndef get_voices_by_type(\n    provider_type: str,\n    _: User = Depends(current_admin_user),\n) -> list[VoiceOption]:\n    \"\"\"Get available voices for a provider type.\n\n    For providers like ElevenLabs and OpenAI, this fetches voices\n    without requiring an existing provider configuration.\n    \"\"\"\n    # Create a temporary VoiceProvider to get static voice list\n    temp_provider = VoiceProvider(\n        name=\"__temp__\",\n        provider_type=provider_type,\n    )\n\n    try:\n        provider = get_voice_provider(temp_provider)\n    except ValueError as exc:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(exc)) from exc\n\n    return [VoiceOption(**voice) for voice in provider.get_available_voices()]\n"
  },
  {
    "path": "backend/onyx/server/manage/voice/models.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass VoiceProviderView(BaseModel):\n    \"\"\"Response model for voice provider listing.\"\"\"\n\n    id: int\n    name: str\n    provider_type: str  # \"openai\", \"azure\", \"elevenlabs\"\n    is_default_stt: bool\n    is_default_tts: bool\n    stt_model: str | None\n    tts_model: str | None\n    default_voice: str | None\n    has_api_key: bool = Field(\n        default=False,\n        description=\"Indicates whether an API key is stored for this provider.\",\n    )\n    target_uri: str | None = Field(\n        default=None,\n        description=\"Target URI for Azure Speech Services.\",\n    )\n\n\nclass VoiceProviderUpdateSuccess(BaseModel):\n    \"\"\"Simple status response for voice provider actions.\"\"\"\n\n    status: str = \"ok\"\n\n\nclass VoiceOption(BaseModel):\n    \"\"\"Voice option returned by voice providers.\"\"\"\n\n    id: str\n    name: str\n\n\nclass VoiceProviderUpsertRequest(BaseModel):\n    \"\"\"Request model for creating or updating a voice provider.\"\"\"\n\n    id: int | None = Field(default=None, description=\"Existing provider ID to update.\")\n    name: str\n    provider_type: str  # \"openai\", \"azure\", \"elevenlabs\"\n    api_key: str | None = Field(\n        default=None,\n        description=\"API key for the provider.\",\n    )\n    api_key_changed: bool = Field(\n        default=False,\n        description=\"Set to true when providing a new API key for an existing provider.\",\n    )\n    llm_provider_id: int | None = Field(\n        default=None,\n        description=\"If set, copies the API key from the specified LLM provider.\",\n    )\n    api_base: str | None = None\n    target_uri: str | None = Field(\n        default=None,\n        description=\"Target URI for Azure Speech Services (maps to api_base).\",\n    )\n    custom_config: dict[str, Any] | None = None\n    stt_model: str | None = None\n    tts_model: str | None = None\n    default_voice: str | None = None\n    activate_stt: bool = Field(\n        default=False,\n        description=\"If true, sets this provider as the default STT provider after upsert.\",\n    )\n    activate_tts: bool = Field(\n        default=False,\n        description=\"If true, sets this provider as the default TTS provider after upsert.\",\n    )\n\n\nclass VoiceProviderTestRequest(BaseModel):\n    \"\"\"Request model for testing a voice provider connection.\"\"\"\n\n    provider_type: str\n    api_key: str | None = Field(\n        default=None,\n        description=\"API key for testing. If not provided, use_stored_key must be true.\",\n    )\n    use_stored_key: bool = Field(\n        default=False,\n        description=\"If true, use the stored API key for this provider type.\",\n    )\n    api_base: str | None = None\n    target_uri: str | None = Field(\n        default=None,\n        description=\"Target URI for Azure Speech Services (maps to api_base).\",\n    )\n    custom_config: dict[str, Any] | None = None\n"
  },
  {
    "path": "backend/onyx/server/manage/voice/user_api.py",
    "content": "import secrets\nfrom collections.abc import AsyncIterator\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import File\nfrom fastapi import Query\nfrom fastapi import UploadFile\nfrom fastapi.responses import StreamingResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.db.voice import fetch_default_stt_provider\nfrom onyx.db.voice import fetch_default_tts_provider\nfrom onyx.db.voice import update_user_voice_settings\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.redis.redis_pool import store_ws_token\nfrom onyx.redis.redis_pool import WsTokenRateLimitExceeded\nfrom onyx.server.manage.models import VoiceSettingsUpdateRequest\nfrom onyx.utils.logger import setup_logger\nfrom onyx.voice.factory import get_voice_provider\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/voice\")\n\n# Max audio file size: 25MB (Whisper limit)\nMAX_AUDIO_SIZE = 25 * 1024 * 1024\n# Chunk size for streaming uploads (8KB)\nUPLOAD_READ_CHUNK_SIZE = 8192\n\n\nclass VoiceStatusResponse(BaseModel):\n    stt_enabled: bool\n    tts_enabled: bool\n\n\n@router.get(\"/status\")\ndef get_voice_status(\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> VoiceStatusResponse:\n    \"\"\"Check whether STT and TTS providers are configured and ready.\"\"\"\n    stt_provider = fetch_default_stt_provider(db_session)\n    tts_provider = fetch_default_tts_provider(db_session)\n    return VoiceStatusResponse(\n        stt_enabled=stt_provider is not None and stt_provider.api_key is not None,\n        tts_enabled=tts_provider is not None and tts_provider.api_key is not None,\n    )\n\n\n@router.post(\"/transcribe\")\nasync def transcribe_audio(\n    audio: UploadFile = File(...),\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    \"\"\"Transcribe audio to text using the default STT provider.\"\"\"\n    provider_db = fetch_default_stt_provider(db_session)\n    if provider_db is None:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"No speech-to-text provider configured. Please contact your administrator.\",\n        )\n\n    if not provider_db.api_key:\n        raise OnyxError(\n            OnyxErrorCode.VALIDATION_ERROR,\n            \"Voice provider API key not configured.\",\n        )\n\n    # Read in chunks to enforce size limit during streaming (prevents OOM attacks)\n    chunks: list[bytes] = []\n    total = 0\n    while chunk := await audio.read(UPLOAD_READ_CHUNK_SIZE):\n        total += len(chunk)\n        if total > MAX_AUDIO_SIZE:\n            raise OnyxError(\n                OnyxErrorCode.PAYLOAD_TOO_LARGE,\n                f\"Audio file too large. Maximum size is {MAX_AUDIO_SIZE // (1024 * 1024)}MB.\",\n            )\n        chunks.append(chunk)\n    audio_data = b\"\".join(chunks)\n\n    # Extract format from filename\n    filename = audio.filename or \"audio.webm\"\n    audio_format = filename.rsplit(\".\", 1)[-1] if \".\" in filename else \"webm\"\n\n    try:\n        provider = get_voice_provider(provider_db)\n    except ValueError as exc:\n        raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc\n\n    try:\n        text = await provider.transcribe(audio_data, audio_format)\n        return {\"text\": text}\n    except NotImplementedError as exc:\n        raise OnyxError(\n            OnyxErrorCode.NOT_IMPLEMENTED,\n            f\"Speech-to-text not implemented for {provider_db.provider_type}.\",\n        ) from exc\n    except Exception as exc:\n        logger.error(f\"Transcription failed: {exc}\")\n        raise OnyxError(\n            OnyxErrorCode.INTERNAL_ERROR,\n            \"Transcription failed. Please try again.\",\n        ) from exc\n\n\n@router.post(\"/synthesize\")\nasync def synthesize_speech(\n    text: str | None = Query(\n        default=None, description=\"Text to synthesize\", max_length=4096\n    ),\n    voice: str | None = Query(default=None, description=\"Voice ID to use\"),\n    speed: float | None = Query(\n        default=None, description=\"Playback speed (0.5-2.0)\", ge=0.5, le=2.0\n    ),\n    user: User = Depends(current_user),\n) -> StreamingResponse:\n    \"\"\"\n    Synthesize text to speech using the default TTS provider.\n\n    Accepts parameters via query string for streaming compatibility.\n    \"\"\"\n    logger.info(\n        f\"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}\"\n    )\n\n    if not text:\n        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, \"Text is required\")\n\n    # Use short-lived session to fetch provider config, then release connection\n    # before starting the long-running streaming response\n    with get_session_with_current_tenant() as db_session:\n        provider_db = fetch_default_tts_provider(db_session)\n        if provider_db is None:\n            logger.error(\"No TTS provider configured\")\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                \"No text-to-speech provider configured. Please contact your administrator.\",\n            )\n\n        if not provider_db.api_key:\n            logger.error(\"TTS provider has no API key\")\n            raise OnyxError(\n                OnyxErrorCode.VALIDATION_ERROR,\n                \"Voice provider API key not configured.\",\n            )\n\n        # Use request voice or provider default\n        final_voice = voice or provider_db.default_voice\n        # Use explicit None checks to avoid falsy float issues (0.0 would be skipped with `or`)\n        final_speed = (\n            speed\n            if speed is not None\n            else (\n                user.voice_playback_speed\n                if user.voice_playback_speed is not None\n                else 1.0\n            )\n        )\n\n        logger.info(\n            f\"TTS using provider: {provider_db.provider_type}, voice: {final_voice}, speed: {final_speed}\"\n        )\n\n        try:\n            provider = get_voice_provider(provider_db)\n        except ValueError as exc:\n            logger.error(f\"Failed to get voice provider: {exc}\")\n            raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc\n\n    # Session is now closed - streaming response won't hold DB connection\n    async def audio_stream() -> AsyncIterator[bytes]:\n        try:\n            chunk_count = 0\n            async for chunk in provider.synthesize_stream(\n                text=text, voice=final_voice, speed=final_speed\n            ):\n                chunk_count += 1\n                yield chunk\n            logger.info(f\"TTS streaming complete: {chunk_count} chunks sent\")\n        except NotImplementedError as exc:\n            logger.error(f\"TTS not implemented: {exc}\")\n            raise\n        except Exception as exc:\n            logger.error(f\"Synthesis failed: {exc}\")\n            raise\n\n    return StreamingResponse(\n        audio_stream(),\n        media_type=\"audio/mpeg\",\n        headers={\n            \"Content-Disposition\": \"inline; filename=speech.mp3\",\n            # Allow streaming by not setting content-length\n            \"Cache-Control\": \"no-cache\",\n            \"X-Accel-Buffering\": \"no\",  # Disable nginx buffering\n        },\n    )\n\n\n@router.patch(\"/settings\")\ndef update_voice_settings(\n    request: VoiceSettingsUpdateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    \"\"\"Update user's voice settings.\"\"\"\n    update_user_voice_settings(\n        db_session=db_session,\n        user_id=user.id,\n        auto_send=request.auto_send,\n        auto_playback=request.auto_playback,\n        playback_speed=request.playback_speed,\n    )\n    db_session.commit()\n    return {\"status\": \"ok\"}\n\n\nclass WSTokenResponse(BaseModel):\n    token: str\n\n\n@router.post(\"/ws-token\")\nasync def get_ws_token(\n    user: User = Depends(current_user),\n) -> WSTokenResponse:\n    \"\"\"\n    Generate a short-lived token for WebSocket authentication.\n\n    This token should be passed as a query parameter when connecting\n    to voice WebSocket endpoints (e.g., /voice/transcribe/stream?token=xxx).\n\n    The token expires after 60 seconds and is single-use.\n    Rate limited to 10 tokens per minute per user.\n    \"\"\"\n    token = secrets.token_urlsafe(32)\n    try:\n        await store_ws_token(token, str(user.id))\n    except WsTokenRateLimitExceeded:\n        raise OnyxError(\n            OnyxErrorCode.RATE_LIMITED,\n            \"Too many token requests. Please wait before requesting another.\",\n        )\n    return WSTokenResponse(token=token)\n"
  },
  {
    "path": "backend/onyx/server/manage/voice/websocket_api.py",
    "content": "\"\"\"WebSocket API for streaming speech-to-text and text-to-speech.\"\"\"\n\nimport asyncio\nimport io\nimport json\nimport os\nfrom collections.abc import MutableMapping\nfrom typing import Any\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import WebSocket\nfrom fastapi import WebSocketDisconnect\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user_from_websocket\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom onyx.db.models import User\nfrom onyx.db.voice import fetch_default_stt_provider\nfrom onyx.db.voice import fetch_default_tts_provider\nfrom onyx.utils.logger import setup_logger\nfrom onyx.voice.factory import get_voice_provider\nfrom onyx.voice.interface import StreamingSynthesizerProtocol\nfrom onyx.voice.interface import StreamingTranscriberProtocol\nfrom onyx.voice.interface import TranscriptResult\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/voice\")\n\n\n# Transcribe every ~0.5 seconds of audio (webm/opus is ~2-4KB/s, so ~1-2KB per 0.5s)\nMIN_CHUNK_BYTES = 1500\nVOICE_DISABLE_STREAMING_FALLBACK = (\n    os.environ.get(\"VOICE_DISABLE_STREAMING_FALLBACK\", \"\").lower() == \"true\"\n)\n\n# WebSocket size limits to prevent memory exhaustion attacks\nWS_MAX_MESSAGE_SIZE = 64 * 1024  # 64KB per message (OWASP recommendation)\nWS_MAX_TOTAL_BYTES = 25 * 1024 * 1024  # 25MB total per connection (matches REST API)\nWS_MAX_TEXT_MESSAGE_SIZE = 16 * 1024  # 16KB for text/JSON messages\nWS_MAX_TTS_TEXT_LENGTH = 4096  # Max text length per synthesize call (matches REST API)\n\n\nclass ChunkedTranscriber:\n    \"\"\"Fallback transcriber for providers without streaming support.\"\"\"\n\n    def __init__(self, provider: Any, audio_format: str = \"webm\"):\n        self.provider = provider\n        self.audio_format = audio_format\n        self.chunk_buffer = io.BytesIO()\n        self.full_audio = io.BytesIO()\n        self.chunk_bytes = 0\n        self.transcripts: list[str] = []\n\n    async def add_chunk(self, chunk: bytes) -> str | None:\n        \"\"\"Add audio chunk. Returns transcript if enough audio accumulated.\"\"\"\n        self.chunk_buffer.write(chunk)\n        self.full_audio.write(chunk)\n        self.chunk_bytes += len(chunk)\n\n        if self.chunk_bytes >= MIN_CHUNK_BYTES:\n            return await self._transcribe_chunk()\n        return None\n\n    async def _transcribe_chunk(self) -> str | None:\n        \"\"\"Transcribe current chunk and append to running transcript.\"\"\"\n        audio_data = self.chunk_buffer.getvalue()\n        if not audio_data:\n            return None\n\n        try:\n            transcript = await self.provider.transcribe(audio_data, self.audio_format)\n            self.chunk_buffer = io.BytesIO()\n            self.chunk_bytes = 0\n\n            if transcript and transcript.strip():\n                self.transcripts.append(transcript.strip())\n                return \" \".join(self.transcripts)\n            return None\n        except Exception as e:\n            logger.error(f\"Transcription error: {e}\")\n            self.chunk_buffer = io.BytesIO()\n            self.chunk_bytes = 0\n            return None\n\n    async def flush(self) -> str:\n        \"\"\"Get final transcript from full audio for best accuracy.\"\"\"\n        full_audio_data = self.full_audio.getvalue()\n        if full_audio_data:\n            try:\n                transcript = await self.provider.transcribe(\n                    full_audio_data, self.audio_format\n                )\n                if transcript and transcript.strip():\n                    return transcript.strip()\n            except Exception as e:\n                logger.error(f\"Final transcription error: {e}\")\n        return \" \".join(self.transcripts)\n\n\nasync def handle_streaming_transcription(\n    websocket: WebSocket,\n    transcriber: StreamingTranscriberProtocol,\n) -> None:\n    \"\"\"Handle transcription using native streaming API.\"\"\"\n    logger.info(\"Streaming transcription: starting handler\")\n    last_transcript = \"\"\n    chunk_count = 0\n    total_bytes = 0\n\n    async def receive_transcripts() -> None:\n        \"\"\"Background task to receive and send transcripts.\"\"\"\n        nonlocal last_transcript\n        logger.info(\"Streaming transcription: starting transcript receiver\")\n        while True:\n            result: TranscriptResult | None = await transcriber.receive_transcript()\n            if result is None:  # End of stream\n                logger.info(\"Streaming transcription: transcript stream ended\")\n                break\n            # Send if text changed OR if VAD detected end of speech (for auto-send trigger)\n            if result.text and (result.text != last_transcript or result.is_vad_end):\n                last_transcript = result.text\n                logger.debug(\n                    f\"Streaming transcription: got transcript: {result.text[:50]}... (is_vad_end={result.is_vad_end})\"\n                )\n                await websocket.send_json(\n                    {\n                        \"type\": \"transcript\",\n                        \"text\": result.text,\n                        \"is_final\": result.is_vad_end,\n                    }\n                )\n\n    # Start receiving transcripts in background\n    receive_task = asyncio.create_task(receive_transcripts())\n\n    try:\n        while True:\n            message = await websocket.receive()\n            msg_type = message.get(\"type\", \"unknown\")\n\n            if msg_type == \"websocket.disconnect\":\n                logger.info(\n                    f\"Streaming transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)\"\n                )\n                break\n\n            if \"bytes\" in message:\n                chunk_size = len(message[\"bytes\"])\n\n                # Enforce per-message size limit\n                if chunk_size > WS_MAX_MESSAGE_SIZE:\n                    logger.warning(\n                        f\"Streaming transcription: message too large ({chunk_size} bytes)\"\n                    )\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": \"Message too large\"}\n                    )\n                    break\n\n                # Enforce total connection size limit\n                if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:\n                    logger.warning(\n                        f\"Streaming transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)\"\n                    )\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": \"Total size limit exceeded\"}\n                    )\n                    break\n\n                chunk_count += 1\n                total_bytes += chunk_size\n                logger.debug(\n                    f\"Streaming transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})\"\n                )\n                await transcriber.send_audio(message[\"bytes\"])\n\n            elif \"text\" in message:\n                try:\n                    data = json.loads(message[\"text\"])\n                    logger.debug(\n                        f\"Streaming transcription: received text message: {data}\"\n                    )\n                    if data.get(\"type\") == \"end\":\n                        logger.info(\n                            \"Streaming transcription: end signal received, closing transcriber\"\n                        )\n                        final_transcript = await transcriber.close()\n                        receive_task.cancel()\n                        logger.info(\n                            \"Streaming transcription: final transcript: \"\n                            f\"{final_transcript[:100] if final_transcript else '(empty)'}...\"\n                        )\n                        await websocket.send_json(\n                            {\n                                \"type\": \"transcript\",\n                                \"text\": final_transcript,\n                                \"is_final\": True,\n                            }\n                        )\n                        break\n                    elif data.get(\"type\") == \"reset\":\n                        # Reset accumulated transcript after auto-send\n                        logger.info(\n                            \"Streaming transcription: reset signal received, clearing transcript\"\n                        )\n                        transcriber.reset_transcript()\n                except json.JSONDecodeError:\n                    logger.warning(\n                        f\"Streaming transcription: failed to parse JSON: {message.get('text', '')[:100]}\"\n                    )\n    except Exception as e:\n        logger.error(f\"Streaming transcription: error: {e}\", exc_info=True)\n        raise\n    finally:\n        receive_task.cancel()\n        try:\n            await receive_task\n        except asyncio.CancelledError:\n            pass\n        logger.info(\n            f\"Streaming transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes\"\n        )\n\n\nasync def handle_chunked_transcription(\n    websocket: WebSocket,\n    transcriber: ChunkedTranscriber,\n) -> None:\n    \"\"\"Handle transcription using chunked batch API.\"\"\"\n    logger.info(\"Chunked transcription: starting handler\")\n    chunk_count = 0\n    total_bytes = 0\n\n    while True:\n        message = await websocket.receive()\n        msg_type = message.get(\"type\", \"unknown\")\n\n        if msg_type == \"websocket.disconnect\":\n            logger.info(\n                f\"Chunked transcription: client disconnected after {chunk_count} chunks ({total_bytes} bytes)\"\n            )\n            break\n\n        if \"bytes\" in message:\n            chunk_size = len(message[\"bytes\"])\n\n            # Enforce per-message size limit\n            if chunk_size > WS_MAX_MESSAGE_SIZE:\n                logger.warning(\n                    f\"Chunked transcription: message too large ({chunk_size} bytes)\"\n                )\n                await websocket.send_json(\n                    {\"type\": \"error\", \"message\": \"Message too large\"}\n                )\n                break\n\n            # Enforce total connection size limit\n            if total_bytes + chunk_size > WS_MAX_TOTAL_BYTES:\n                logger.warning(\n                    f\"Chunked transcription: total size limit exceeded ({total_bytes + chunk_size} bytes)\"\n                )\n                await websocket.send_json(\n                    {\"type\": \"error\", \"message\": \"Total size limit exceeded\"}\n                )\n                break\n\n            chunk_count += 1\n            total_bytes += chunk_size\n            logger.debug(\n                f\"Chunked transcription: received chunk {chunk_count} ({chunk_size} bytes, total: {total_bytes})\"\n            )\n\n            transcript = await transcriber.add_chunk(message[\"bytes\"])\n            if transcript:\n                logger.debug(\n                    f\"Chunked transcription: got transcript: {transcript[:50]}...\"\n                )\n                await websocket.send_json(\n                    {\n                        \"type\": \"transcript\",\n                        \"text\": transcript,\n                        \"is_final\": False,\n                    }\n                )\n\n        elif \"text\" in message:\n            try:\n                data = json.loads(message[\"text\"])\n                logger.debug(f\"Chunked transcription: received text message: {data}\")\n                if data.get(\"type\") == \"end\":\n                    logger.info(\"Chunked transcription: end signal received, flushing\")\n                    final_transcript = await transcriber.flush()\n                    logger.info(\n                        f\"Chunked transcription: final transcript: {final_transcript[:100] if final_transcript else '(empty)'}...\"\n                    )\n                    await websocket.send_json(\n                        {\n                            \"type\": \"transcript\",\n                            \"text\": final_transcript,\n                            \"is_final\": True,\n                        }\n                    )\n                    break\n            except json.JSONDecodeError:\n                logger.warning(\n                    f\"Chunked transcription: failed to parse JSON: {message.get('text', '')[:100]}\"\n                )\n\n    logger.info(\n        f\"Chunked transcription: handler finished. Processed {chunk_count} chunks, {total_bytes} total bytes\"\n    )\n\n\n@router.websocket(\"/transcribe/stream\")\nasync def websocket_transcribe(\n    websocket: WebSocket,\n    _user: User = Depends(current_user_from_websocket),\n) -> None:\n    \"\"\"\n    WebSocket endpoint for streaming speech-to-text.\n\n    Protocol:\n    - Client sends binary audio chunks\n    - Server sends JSON: {\"type\": \"transcript\", \"text\": \"...\", \"is_final\": false}\n    - Client sends JSON {\"type\": \"end\"} to signal end\n    - Server responds with final transcript and closes\n\n    Authentication:\n        Requires `token` query parameter (e.g., /voice/transcribe/stream?token=xxx).\n        Applies same auth checks as HTTP endpoints (verification, role checks).\n    \"\"\"\n    logger.info(\"WebSocket transcribe: connection request received (authenticated)\")\n\n    try:\n        await websocket.accept()\n        logger.info(\"WebSocket transcribe: connection accepted\")\n    except Exception as e:\n        logger.error(f\"WebSocket transcribe: failed to accept connection: {e}\")\n        return\n\n    streaming_transcriber = None\n    provider = None\n\n    try:\n        # Get STT provider\n        logger.info(\"WebSocket transcribe: fetching STT provider from database\")\n        engine = get_sqlalchemy_engine()\n        with Session(engine) as db_session:\n            provider_db = fetch_default_stt_provider(db_session)\n            if provider_db is None:\n                logger.warning(\n                    \"WebSocket transcribe: no default STT provider configured\"\n                )\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"No speech-to-text provider configured\",\n                    }\n                )\n                return\n\n            if not provider_db.api_key:\n                logger.warning(\"WebSocket transcribe: STT provider has no API key\")\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"Speech-to-text provider has no API key configured\",\n                    }\n                )\n                return\n\n            logger.info(\n                f\"WebSocket transcribe: creating voice provider: {provider_db.provider_type}\"\n            )\n            try:\n                provider = get_voice_provider(provider_db)\n                logger.info(\n                    f\"WebSocket transcribe: voice provider created, streaming supported: {provider.supports_streaming_stt()}\"\n                )\n            except ValueError as e:\n                logger.error(\n                    f\"WebSocket transcribe: failed to create voice provider: {e}\"\n                )\n                await websocket.send_json({\"type\": \"error\", \"message\": str(e)})\n                return\n\n        # Use native streaming if provider supports it\n        if provider.supports_streaming_stt():\n            logger.info(\"WebSocket transcribe: using native streaming STT\")\n            try:\n                streaming_transcriber = await provider.create_streaming_transcriber()\n                logger.info(\n                    \"WebSocket transcribe: streaming transcriber created successfully\"\n                )\n                await handle_streaming_transcription(websocket, streaming_transcriber)\n            except Exception as e:\n                logger.error(\n                    f\"WebSocket transcribe: failed to create streaming transcriber: {e}\"\n                )\n                if VOICE_DISABLE_STREAMING_FALLBACK:\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": f\"Streaming STT failed: {e}\"}\n                    )\n                    return\n                logger.info(\"WebSocket transcribe: falling back to chunked STT\")\n                # Browser stream provides raw PCM16 chunks over WebSocket.\n                chunked_transcriber = ChunkedTranscriber(provider, audio_format=\"pcm16\")\n                await handle_chunked_transcription(websocket, chunked_transcriber)\n        else:\n            # Fall back to chunked transcription\n            if VOICE_DISABLE_STREAMING_FALLBACK:\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"Provider doesn't support streaming STT\",\n                    }\n                )\n                return\n            logger.info(\n                \"WebSocket transcribe: using chunked STT (provider doesn't support streaming)\"\n            )\n            chunked_transcriber = ChunkedTranscriber(provider, audio_format=\"pcm16\")\n            await handle_chunked_transcription(websocket, chunked_transcriber)\n\n    except WebSocketDisconnect:\n        logger.debug(\"WebSocket transcribe: client disconnected\")\n    except Exception as e:\n        logger.error(f\"WebSocket transcribe: unhandled error: {e}\", exc_info=True)\n        try:\n            # Send generic error to avoid leaking sensitive details\n            await websocket.send_json(\n                {\"type\": \"error\", \"message\": \"An unexpected error occurred\"}\n            )\n        except Exception:\n            pass\n    finally:\n        if streaming_transcriber:\n            try:\n                await streaming_transcriber.close()\n            except Exception:\n                pass\n        try:\n            await websocket.close()\n        except Exception:\n            pass\n        logger.info(\"WebSocket transcribe: connection closed\")\n\n\nasync def handle_streaming_synthesis(\n    websocket: WebSocket,\n    synthesizer: StreamingSynthesizerProtocol,\n) -> None:\n    \"\"\"Handle TTS using native streaming API.\"\"\"\n    logger.info(\"Streaming synthesis: starting handler\")\n\n    async def send_audio() -> None:\n        \"\"\"Background task to send audio chunks to client.\"\"\"\n        chunk_count = 0\n        total_bytes = 0\n        try:\n            while True:\n                audio_chunk = await synthesizer.receive_audio()\n                if audio_chunk is None:\n                    logger.info(\n                        f\"Streaming synthesis: audio stream ended, sent {chunk_count} chunks, {total_bytes} bytes\"\n                    )\n                    try:\n                        await websocket.send_json({\"type\": \"audio_done\"})\n                        logger.info(\"Streaming synthesis: sent audio_done to client\")\n                    except Exception as e:\n                        logger.warning(\n                            f\"Streaming synthesis: failed to send audio_done: {e}\"\n                        )\n                    break\n                if audio_chunk:  # Skip empty chunks\n                    chunk_count += 1\n                    total_bytes += len(audio_chunk)\n                    try:\n                        await websocket.send_bytes(audio_chunk)\n                    except Exception as e:\n                        logger.warning(\n                            f\"Streaming synthesis: failed to send chunk: {e}\"\n                        )\n                        break\n        except asyncio.CancelledError:\n            logger.info(\n                f\"Streaming synthesis: send_audio cancelled after {chunk_count} chunks\"\n            )\n        except Exception as e:\n            logger.error(f\"Streaming synthesis: send_audio error: {e}\")\n\n    send_task: asyncio.Task | None = None\n    disconnected = False\n\n    try:\n        while not disconnected:\n            try:\n                message = await websocket.receive()\n            except WebSocketDisconnect:\n                logger.info(\"Streaming synthesis: client disconnected\")\n                break\n\n            msg_type = message.get(\"type\", \"unknown\")  # type: ignore[possibly-undefined]\n\n            if msg_type == \"websocket.disconnect\":\n                logger.info(\"Streaming synthesis: client disconnected\")\n                disconnected = True\n                break\n\n            if \"text\" in message:\n                # Enforce text message size limit\n                msg_size = len(message[\"text\"])\n                if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:\n                    logger.warning(\n                        f\"Streaming synthesis: text message too large ({msg_size} bytes)\"\n                    )\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": \"Message too large\"}\n                    )\n                    break\n\n                try:\n                    data = json.loads(message[\"text\"])\n\n                    if data.get(\"type\") == \"synthesize\":\n                        text = data.get(\"text\", \"\")\n                        # Enforce per-text size limit\n                        if len(text) > WS_MAX_TTS_TEXT_LENGTH:\n                            logger.warning(\n                                f\"Streaming synthesis: text too long ({len(text)} chars)\"\n                            )\n                            await websocket.send_json(\n                                {\"type\": \"error\", \"message\": \"Text too long\"}\n                            )\n                            continue\n                        if text:\n                            # Start audio receiver on first text chunk so playback\n                            # can begin before the full assistant response completes.\n                            if send_task is None:\n                                send_task = asyncio.create_task(send_audio())\n                            logger.debug(\n                                f\"Streaming synthesis: forwarding text chunk ({len(text)} chars)\"\n                            )\n                            await synthesizer.send_text(text)\n\n                    elif data.get(\"type\") == \"end\":\n                        logger.info(\"Streaming synthesis: end signal received\")\n\n                        # Ensure receiver is active even if no prior text chunks arrived.\n                        if send_task is None:\n                            send_task = asyncio.create_task(send_audio())\n\n                        # Signal end of input\n                        if hasattr(synthesizer, \"flush\"):\n                            await synthesizer.flush()\n\n                        # Wait for all audio to be sent\n                        logger.info(\n                            \"Streaming synthesis: waiting for audio stream to complete\"\n                        )\n                        try:\n                            await asyncio.wait_for(send_task, timeout=60.0)\n                        except asyncio.TimeoutError:\n                            logger.warning(\n                                \"Streaming synthesis: timeout waiting for audio\"\n                            )\n                        break\n\n                except json.JSONDecodeError:\n                    logger.warning(\n                        f\"Streaming synthesis: failed to parse JSON: {message.get('text', '')[:100]}\"\n                    )\n\n    except WebSocketDisconnect:\n        logger.debug(\"Streaming synthesis: client disconnected during synthesis\")\n    except Exception as e:\n        logger.error(f\"Streaming synthesis: error: {e}\", exc_info=True)\n    finally:\n        if send_task and not send_task.done():\n            logger.info(\"Streaming synthesis: waiting for send_task to finish\")\n            try:\n                await asyncio.wait_for(send_task, timeout=30.0)\n            except asyncio.TimeoutError:\n                logger.warning(\"Streaming synthesis: timeout waiting for send_task\")\n                send_task.cancel()\n                try:\n                    await send_task\n                except asyncio.CancelledError:\n                    pass\n            except asyncio.CancelledError:\n                pass\n        logger.info(\"Streaming synthesis: handler finished\")\n\n\nasync def handle_chunked_synthesis(\n    websocket: WebSocket,\n    provider: Any,\n    first_message: MutableMapping[str, Any] | None = None,\n) -> None:\n    \"\"\"Fallback TTS handler using provider.synthesize_stream.\n\n    Args:\n        websocket: The WebSocket connection\n        provider: Voice provider instance\n        first_message: Optional first message already received (used when falling\n            back from streaming mode, where the first message was already consumed)\n    \"\"\"\n    logger.info(\"Chunked synthesis: starting handler\")\n    text_buffer: list[str] = []\n    voice: str | None = None\n    speed = 1.0\n\n    # Process pre-received message if provided\n    pending_message = first_message\n\n    try:\n        while True:\n            if pending_message is not None:\n                message = pending_message\n                pending_message = None\n            else:\n                message = await websocket.receive()\n            msg_type = message.get(\"type\", \"unknown\")\n\n            if msg_type == \"websocket.disconnect\":\n                logger.info(\"Chunked synthesis: client disconnected\")\n                break\n\n            if \"text\" not in message:\n                continue\n\n            # Enforce text message size limit\n            msg_size = len(message[\"text\"])\n            if msg_size > WS_MAX_TEXT_MESSAGE_SIZE:\n                logger.warning(\n                    f\"Chunked synthesis: text message too large ({msg_size} bytes)\"\n                )\n                await websocket.send_json(\n                    {\"type\": \"error\", \"message\": \"Message too large\"}\n                )\n                break\n\n            try:\n                data = json.loads(message[\"text\"])\n            except json.JSONDecodeError:\n                logger.warning(\n                    f\"Chunked synthesis: failed to parse JSON: {message.get('text', '')[:100]}\"\n                )\n                continue\n\n            msg_data_type = data.get(\"type\")  # type: ignore[possibly-undefined]\n            if msg_data_type == \"synthesize\":\n                text = data.get(\"text\", \"\")\n                # Enforce per-text size limit\n                if len(text) > WS_MAX_TTS_TEXT_LENGTH:\n                    logger.warning(\n                        f\"Chunked synthesis: text too long ({len(text)} chars)\"\n                    )\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": \"Text too long\"}\n                    )\n                    continue\n                if text:\n                    text_buffer.append(text)\n                    logger.debug(\n                        f\"Chunked synthesis: buffered text ({len(text)} chars), total buffered: {len(text_buffer)} chunks\"\n                    )\n                if isinstance(data.get(\"voice\"), str) and data[\"voice\"]:\n                    voice = data[\"voice\"]\n                if isinstance(data.get(\"speed\"), (int, float)):\n                    speed = float(data[\"speed\"])\n            elif msg_data_type == \"end\":\n                logger.info(\"Chunked synthesis: end signal received\")\n                full_text = \" \".join(text_buffer).strip()\n                if not full_text:\n                    await websocket.send_json({\"type\": \"audio_done\"})\n                    logger.info(\"Chunked synthesis: no text, sent audio_done\")\n                    break\n\n                chunk_count = 0\n                total_bytes = 0\n                logger.info(\n                    f\"Chunked synthesis: sending full text ({len(full_text)} chars)\"\n                )\n                async for audio_chunk in provider.synthesize_stream(\n                    full_text, voice=voice, speed=speed\n                ):\n                    if not audio_chunk:\n                        continue\n                    chunk_count += 1\n                    total_bytes += len(audio_chunk)\n                    await websocket.send_bytes(audio_chunk)\n                await websocket.send_json({\"type\": \"audio_done\"})\n                logger.info(\n                    f\"Chunked synthesis: sent audio_done after {chunk_count} chunks, {total_bytes} bytes\"\n                )\n                break\n    except WebSocketDisconnect:\n        logger.debug(\"Chunked synthesis: client disconnected\")\n    except Exception as e:\n        logger.error(f\"Chunked synthesis: error: {e}\", exc_info=True)\n        raise\n    finally:\n        logger.info(\"Chunked synthesis: handler finished\")\n\n\n@router.websocket(\"/synthesize/stream\")\nasync def websocket_synthesize(\n    websocket: WebSocket,\n    _user: User = Depends(current_user_from_websocket),\n) -> None:\n    \"\"\"\n    WebSocket endpoint for streaming text-to-speech.\n\n    Protocol:\n    - Client sends JSON: {\"type\": \"synthesize\", \"text\": \"...\", \"voice\": \"...\", \"speed\": 1.0}\n    - Server sends binary audio chunks\n    - Server sends JSON: {\"type\": \"audio_done\"} when synthesis completes\n    - Client sends JSON {\"type\": \"end\"} to close connection\n\n    Authentication:\n        Requires `token` query parameter (e.g., /voice/synthesize/stream?token=xxx).\n        Applies same auth checks as HTTP endpoints (verification, role checks).\n    \"\"\"\n    logger.info(\"WebSocket synthesize: connection request received (authenticated)\")\n\n    try:\n        await websocket.accept()\n        logger.info(\"WebSocket synthesize: connection accepted\")\n    except Exception as e:\n        logger.error(f\"WebSocket synthesize: failed to accept connection: {e}\")\n        return\n\n    streaming_synthesizer: StreamingSynthesizerProtocol | None = None\n    provider = None\n\n    try:\n        # Get TTS provider\n        logger.info(\"WebSocket synthesize: fetching TTS provider from database\")\n        engine = get_sqlalchemy_engine()\n        with Session(engine) as db_session:\n            provider_db = fetch_default_tts_provider(db_session)\n            if provider_db is None:\n                logger.warning(\n                    \"WebSocket synthesize: no default TTS provider configured\"\n                )\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"No text-to-speech provider configured\",\n                    }\n                )\n                return\n\n            if not provider_db.api_key:\n                logger.warning(\"WebSocket synthesize: TTS provider has no API key\")\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"Text-to-speech provider has no API key configured\",\n                    }\n                )\n                return\n\n            logger.info(\n                f\"WebSocket synthesize: creating voice provider: {provider_db.provider_type}\"\n            )\n            try:\n                provider = get_voice_provider(provider_db)\n                logger.info(\n                    f\"WebSocket synthesize: voice provider created, streaming TTS supported: {provider.supports_streaming_tts()}\"\n                )\n            except ValueError as e:\n                logger.error(\n                    f\"WebSocket synthesize: failed to create voice provider: {e}\"\n                )\n                await websocket.send_json({\"type\": \"error\", \"message\": str(e)})\n                return\n\n        # Use native streaming if provider supports it\n        if provider.supports_streaming_tts():\n            logger.info(\"WebSocket synthesize: using native streaming TTS\")\n            message = None  # Initialize to avoid UnboundLocalError in except block\n            try:\n                # Wait for initial config message with voice/speed\n                message = await websocket.receive()\n                voice = None\n                speed = 1.0\n                if \"text\" in message:\n                    try:\n                        data = json.loads(message[\"text\"])\n                        voice = data.get(\"voice\")\n                        speed = data.get(\"speed\", 1.0)\n                    except json.JSONDecodeError:\n                        pass\n\n                streaming_synthesizer = await provider.create_streaming_synthesizer(\n                    voice=voice, speed=speed\n                )\n                logger.info(\n                    \"WebSocket synthesize: streaming synthesizer created successfully\"\n                )\n                await handle_streaming_synthesis(websocket, streaming_synthesizer)\n            except Exception as e:\n                logger.error(\n                    f\"WebSocket synthesize: failed to create streaming synthesizer: {e}\"\n                )\n                if VOICE_DISABLE_STREAMING_FALLBACK:\n                    await websocket.send_json(\n                        {\"type\": \"error\", \"message\": f\"Streaming TTS failed: {e}\"}\n                    )\n                    return\n                logger.info(\n                    \"WebSocket synthesize: falling back to chunked TTS synthesis\"\n                )\n                # Pass the first message so it's not lost in the fallback\n                await handle_chunked_synthesis(\n                    websocket, provider, first_message=message\n                )\n        else:\n            if VOICE_DISABLE_STREAMING_FALLBACK:\n                await websocket.send_json(\n                    {\n                        \"type\": \"error\",\n                        \"message\": \"Provider doesn't support streaming TTS\",\n                    }\n                )\n                return\n            logger.info(\n                \"WebSocket synthesize: using chunked TTS (provider doesn't support streaming)\"\n            )\n            await handle_chunked_synthesis(websocket, provider)\n\n    except WebSocketDisconnect:\n        logger.debug(\"WebSocket synthesize: client disconnected\")\n    except Exception as e:\n        logger.error(f\"WebSocket synthesize: unhandled error: {e}\", exc_info=True)\n        try:\n            # Send generic error to avoid leaking sensitive details\n            await websocket.send_json(\n                {\"type\": \"error\", \"message\": \"An unexpected error occurred\"}\n            )\n        except Exception:\n            pass\n    finally:\n        if streaming_synthesizer:\n            try:\n                await streaming_synthesizer.close()\n            except Exception:\n                pass\n        try:\n            await websocket.close()\n        except Exception:\n            pass\n        logger.info(\"WebSocket synthesize: connection closed\")\n"
  },
  {
    "path": "backend/onyx/server/manage/web_search/api.py",
    "content": "from __future__ import annotations\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Response\nfrom sqlalchemy.dialects.postgresql import insert\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import InternetContentProvider\nfrom onyx.db.models import InternetSearchProvider\nfrom onyx.db.models import User\nfrom onyx.db.web_search import deactivate_web_content_provider\nfrom onyx.db.web_search import deactivate_web_search_provider\nfrom onyx.db.web_search import delete_web_content_provider\nfrom onyx.db.web_search import delete_web_search_provider\nfrom onyx.db.web_search import fetch_web_content_provider_by_name\nfrom onyx.db.web_search import fetch_web_content_provider_by_type\nfrom onyx.db.web_search import fetch_web_content_providers\nfrom onyx.db.web_search import fetch_web_search_provider_by_name\nfrom onyx.db.web_search import fetch_web_search_provider_by_type\nfrom onyx.db.web_search import fetch_web_search_providers\nfrom onyx.db.web_search import set_active_web_content_provider\nfrom onyx.db.web_search import set_active_web_search_provider\nfrom onyx.db.web_search import upsert_web_content_provider\nfrom onyx.db.web_search import upsert_web_search_provider\nfrom onyx.server.manage.web_search.models import WebContentProviderTestRequest\nfrom onyx.server.manage.web_search.models import WebContentProviderUpsertRequest\nfrom onyx.server.manage.web_search.models import WebContentProviderView\nfrom onyx.server.manage.web_search.models import WebSearchProviderTestRequest\nfrom onyx.server.manage.web_search.models import WebSearchProviderUpsertRequest\nfrom onyx.server.manage.web_search.models import WebSearchProviderView\nfrom onyx.tools.tool_implementations.open_url.utils import (\n    filter_web_contents_with_no_title_or_content,\n)\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_content_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_search_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    provider_requires_api_key,\n)\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/web-search\")\n\n\n@admin_router.get(\"/search-providers\", response_model=list[WebSearchProviderView])\ndef list_search_providers(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[WebSearchProviderView]:\n    providers = fetch_web_search_providers(db_session)\n    return [\n        WebSearchProviderView(\n            id=provider.id,\n            name=provider.name,\n            provider_type=WebSearchProviderType(provider.provider_type),\n            is_active=provider.is_active,\n            config=provider.config or {},\n            has_api_key=bool(provider.api_key),\n        )\n        for provider in providers\n    ]\n\n\n@admin_router.post(\"/search-providers\", response_model=WebSearchProviderView)\ndef upsert_search_provider_endpoint(\n    request: WebSearchProviderUpsertRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> WebSearchProviderView:\n    existing_by_name = fetch_web_search_provider_by_name(request.name, db_session)\n    if (\n        existing_by_name\n        and request.id is not None\n        and existing_by_name.id != request.id\n    ):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"A search provider named '{request.name}' already exists.\",\n        )\n\n    provider = upsert_web_search_provider(\n        provider_id=request.id,\n        name=request.name,\n        provider_type=request.provider_type,\n        api_key=request.api_key,\n        api_key_changed=request.api_key_changed,\n        config=request.config,\n        activate=request.activate,\n        db_session=db_session,\n    )\n\n    # Sync Exa key of search engine to content provider\n    if (\n        request.provider_type == WebSearchProviderType.EXA\n        and request.api_key_changed\n        and request.api_key\n    ):\n        stmt = (\n            insert(InternetContentProvider)\n            .values(\n                name=\"Exa\",\n                provider_type=WebContentProviderType.EXA.value,\n                api_key=request.api_key,\n                is_active=False,\n            )\n            .on_conflict_do_update(\n                index_elements=[\"name\"],\n                set_={\"api_key\": request.api_key},\n            )\n        )\n        db_session.execute(stmt)\n        db_session.flush()\n\n    db_session.commit()\n    return WebSearchProviderView(\n        id=provider.id,\n        name=provider.name,\n        provider_type=WebSearchProviderType(provider.provider_type),\n        is_active=provider.is_active,\n        config=provider.config or {},\n        has_api_key=bool(provider.api_key),\n    )\n\n\n@admin_router.delete(\n    \"/search-providers/{provider_id}\", status_code=204, response_class=Response\n)\ndef delete_search_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    delete_web_search_provider(provider_id, db_session)\n    return Response(status_code=204)\n\n\n@admin_router.post(\"/search-providers/{provider_id}/activate\")\ndef activate_search_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> WebSearchProviderView:\n    provider = set_active_web_search_provider(\n        provider_id=provider_id, db_session=db_session\n    )\n    db_session.commit()\n    return WebSearchProviderView(\n        id=provider.id,\n        name=provider.name,\n        provider_type=WebSearchProviderType(provider.provider_type),\n        is_active=provider.is_active,\n        config=provider.config or {},\n        has_api_key=bool(provider.api_key),\n    )\n\n\n@admin_router.post(\"/search-providers/{provider_id}/deactivate\")\ndef deactivate_search_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    deactivate_web_search_provider(provider_id=provider_id, db_session=db_session)\n    db_session.commit()\n    return {\"status\": \"ok\"}\n\n\n@admin_router.post(\"/search-providers/test\")\ndef test_search_provider(\n    request: WebSearchProviderTestRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    requires_key = provider_requires_api_key(request.provider_type)\n\n    # Determine which API key to use\n    api_key = request.api_key\n    if request.use_stored_key and requires_key:\n        existing_provider = fetch_web_search_provider_by_type(\n            request.provider_type, db_session\n        )\n        if existing_provider is None or not existing_provider.api_key:\n            raise HTTPException(\n                status_code=400,\n                detail=\"No stored API key found for this provider type.\",\n            )\n        api_key = existing_provider.api_key.get_value(apply_mask=False)\n\n    if requires_key and not api_key:\n        raise HTTPException(\n            status_code=400,\n            detail=\"API key is required. Either provide api_key or set use_stored_key to true.\",\n        )\n\n    try:\n        provider = build_search_provider_from_config(\n            provider_type=request.provider_type,\n            api_key=api_key,\n            config=request.config or {},\n        )\n    except ValueError as exc:\n        raise HTTPException(status_code=400, detail=str(exc)) from exc\n\n    if provider is None:\n        raise HTTPException(\n            status_code=400, detail=\"Unable to build provider configuration.\"\n        )\n\n    # Run the API client's test_connection method to ensure the connection is valid.\n    try:\n        return provider.test_connection()\n    except HTTPException:\n        raise\n    except Exception as e:\n        raise HTTPException(status_code=400, detail=str(e)) from e\n\n\n@admin_router.get(\"/content-providers\", response_model=list[WebContentProviderView])\ndef list_content_providers(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[WebContentProviderView]:\n    providers = fetch_web_content_providers(db_session)\n    return [\n        WebContentProviderView(\n            id=provider.id,\n            name=provider.name,\n            provider_type=WebContentProviderType(provider.provider_type),\n            is_active=provider.is_active,\n            config=provider.config or WebContentProviderConfig(),\n            has_api_key=bool(provider.api_key),\n        )\n        for provider in providers\n    ]\n\n\n@admin_router.post(\"/content-providers\", response_model=WebContentProviderView)\ndef upsert_content_provider_endpoint(\n    request: WebContentProviderUpsertRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> WebContentProviderView:\n    existing_by_name = fetch_web_content_provider_by_name(request.name, db_session)\n    if (\n        existing_by_name\n        and request.id is not None\n        and existing_by_name.id != request.id\n    ):\n        raise HTTPException(\n            status_code=400,\n            detail=f\"A content provider named '{request.name}' already exists.\",\n        )\n\n    provider = upsert_web_content_provider(\n        provider_id=request.id,\n        name=request.name,\n        provider_type=request.provider_type,\n        api_key=request.api_key,\n        api_key_changed=request.api_key_changed,\n        config=request.config,\n        activate=request.activate,\n        db_session=db_session,\n    )\n\n    # Sync Exa key of content provider to search provider\n    if (\n        request.provider_type == WebContentProviderType.EXA\n        and request.api_key_changed\n        and request.api_key\n    ):\n        stmt = (\n            insert(InternetSearchProvider)\n            .values(\n                name=\"Exa\",\n                provider_type=WebSearchProviderType.EXA.value,\n                api_key=request.api_key,\n                is_active=False,\n            )\n            .on_conflict_do_update(\n                index_elements=[\"name\"],\n                set_={\"api_key\": request.api_key},\n            )\n        )\n        db_session.execute(stmt)\n        db_session.flush()\n\n    db_session.commit()\n    return WebContentProviderView(\n        id=provider.id,\n        name=provider.name,\n        provider_type=WebContentProviderType(provider.provider_type),\n        is_active=provider.is_active,\n        config=provider.config or WebContentProviderConfig(),\n        has_api_key=bool(provider.api_key),\n    )\n\n\n@admin_router.delete(\n    \"/content-providers/{provider_id}\", status_code=204, response_class=Response\n)\ndef delete_content_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n    delete_web_content_provider(provider_id, db_session)\n    return Response(status_code=204)\n\n\n@admin_router.post(\"/content-providers/{provider_id}/activate\")\ndef activate_content_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> WebContentProviderView:\n    provider = set_active_web_content_provider(\n        provider_id=provider_id, db_session=db_session\n    )\n    db_session.commit()\n    return WebContentProviderView(\n        id=provider.id,\n        name=provider.name,\n        provider_type=WebContentProviderType(provider.provider_type),\n        is_active=provider.is_active,\n        config=provider.config or WebContentProviderConfig(),\n        has_api_key=bool(provider.api_key),\n    )\n\n\n@admin_router.post(\"/content-providers/reset-default\")\ndef reset_content_provider_default(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    providers = fetch_web_content_providers(db_session)\n    active_ids = [provider.id for provider in providers if provider.is_active]\n\n    for provider_id in active_ids:\n        deactivate_web_content_provider(provider_id=provider_id, db_session=db_session)\n        db_session.commit()\n\n    return {\"status\": \"ok\"}\n\n\n@admin_router.post(\"/content-providers/{provider_id}/deactivate\")\ndef deactivate_content_provider(\n    provider_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    deactivate_web_content_provider(provider_id=provider_id, db_session=db_session)\n    db_session.commit()\n    return {\"status\": \"ok\"}\n\n\n@admin_router.post(\"/content-providers/test\")\ndef test_content_provider(\n    request: WebContentProviderTestRequest,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    # Determine which API key to use\n    api_key = request.api_key\n    if request.use_stored_key:\n        existing_provider = fetch_web_content_provider_by_type(\n            request.provider_type, db_session\n        )\n        if existing_provider is None or not existing_provider.api_key:\n            raise HTTPException(\n                status_code=400,\n                detail=\"No stored API key found for this provider type.\",\n            )\n        if MULTI_TENANT:\n            stored_base_url = (\n                existing_provider.config.base_url if existing_provider.config else None\n            )\n            request_base_url = request.config.base_url\n            if request_base_url != stored_base_url:\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"Base URL cannot differ from stored provider when using stored API key\",\n                )\n\n        api_key = existing_provider.api_key.get_value(apply_mask=False)\n\n    if not api_key:\n        raise HTTPException(\n            status_code=400,\n            detail=\"API key is required. Either provide api_key or set use_stored_key to true.\",\n        )\n\n    try:\n        provider = build_content_provider_from_config(\n            provider_type=request.provider_type,\n            api_key=api_key,\n            config=request.config,\n        )\n    except ValueError as exc:\n        raise HTTPException(status_code=400, detail=str(exc)) from exc\n\n    if provider is None:\n        raise HTTPException(\n            status_code=400, detail=\"Unable to build provider configuration.\"\n        )\n\n    # Actually test the API key by making a real content fetch call\n    try:\n        test_url = \"https://example.com\"\n        test_results = filter_web_contents_with_no_title_or_content(\n            list(provider.contents([test_url]))\n        )\n        if not test_results or not any(\n            result.scrape_successful for result in test_results\n        ):\n            raise HTTPException(\n                status_code=400,\n                detail=\"API key validation failed: content fetch returned no results.\",\n            )\n    except HTTPException:\n        raise\n    except Exception as e:\n        error_msg = str(e)\n        if (\n            \"api\" in error_msg.lower()\n            or \"key\" in error_msg.lower()\n            or \"auth\" in error_msg.lower()\n        ):\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Invalid API key: {error_msg}\",\n            ) from e\n        raise HTTPException(\n            status_code=400,\n            detail=f\"API key validation failed: {error_msg}\",\n        ) from e\n\n    logger.info(\n        f\"Web content provider test succeeded for {request.provider_type.value}.\"\n    )\n    return {\"status\": \"ok\"}\n"
  },
  {
    "path": "backend/onyx/server/manage/web_search/models.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\n\nclass WebSearchProviderView(BaseModel):\n    id: int\n    name: str\n    provider_type: WebSearchProviderType\n    is_active: bool\n    config: dict[str, str] | None\n    has_api_key: bool = Field(\n        default=False,\n        description=\"Indicates whether an API key is stored for this provider.\",\n    )\n\n\nclass WebSearchProviderUpsertRequest(BaseModel):\n    id: int | None = Field(default=None, description=\"Existing provider ID to update.\")\n    name: str\n    provider_type: WebSearchProviderType\n    config: dict[str, str] | None = None\n    api_key: str | None = Field(\n        default=None,\n        description=\"API key for the provider. Only required when creating or updating credentials.\",\n    )\n    api_key_changed: bool = Field(\n        default=False,\n        description=\"Set to true when providing a new API key for an existing provider.\",\n    )\n    activate: bool = Field(\n        default=False,\n        description=\"If true, sets this provider as the active one after upsert.\",\n    )\n\n\nclass WebContentProviderView(BaseModel):\n    id: int\n    name: str\n    provider_type: WebContentProviderType\n    is_active: bool\n    config: WebContentProviderConfig | None\n    has_api_key: bool = Field(default=False)\n\n\nclass WebContentProviderUpsertRequest(BaseModel):\n    id: int | None = None\n    name: str\n    provider_type: WebContentProviderType\n    config: WebContentProviderConfig | None = None\n    api_key: str | None = None\n    api_key_changed: bool = False\n    activate: bool = False\n\n\nclass WebSearchProviderTestRequest(BaseModel):\n    provider_type: WebSearchProviderType\n    api_key: str | None = Field(\n        default=None,\n        description=\"API key for testing. If not provided, use_stored_key must be true.\",\n    )\n    use_stored_key: bool = Field(\n        default=False,\n        description=\"If true, use the stored API key for this provider type instead of api_key.\",\n    )\n    config: dict[str, Any] | None = None\n\n\nclass WebContentProviderTestRequest(BaseModel):\n    provider_type: WebContentProviderType\n    api_key: str | None = Field(\n        default=None,\n        description=\"API key for testing. If not provided, use_stored_key must be true.\",\n    )\n    use_stored_key: bool = Field(\n        default=False,\n        description=\"If true, use the stored API key for this provider type instead of api_key.\",\n    )\n    config: WebContentProviderConfig\n"
  },
  {
    "path": "backend/onyx/server/metrics/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/metrics/celery_task_metrics.py",
    "content": "\"\"\"Generic Celery task lifecycle Prometheus metrics.\n\nProvides signal handlers that track task started/completed/failed counts,\nactive task gauge, task duration histograms, and retry/reject/revoke counts.\nThese fire for ALL tasks on the worker — no per-connector enrichment\n(see indexing_task_metrics.py for that).\n\nUsage in a worker app module:\n    from onyx.server.metrics.celery_task_metrics import (\n        on_celery_task_prerun,\n        on_celery_task_postrun,\n        on_celery_task_retry,\n        on_celery_task_revoked,\n        on_celery_task_rejected,\n    )\n    # Call from the worker's existing signal handlers\n\"\"\"\n\nimport threading\nimport time\n\nfrom celery import Task\nfrom prometheus_client import Counter\nfrom prometheus_client import Gauge\nfrom prometheus_client import Histogram\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nTASK_STARTED = Counter(\n    \"onyx_celery_task_started_total\",\n    \"Total Celery tasks started\",\n    [\"task_name\", \"queue\"],\n)\n\nTASK_COMPLETED = Counter(\n    \"onyx_celery_task_completed_total\",\n    \"Total Celery tasks completed\",\n    [\"task_name\", \"queue\", \"outcome\"],\n)\n\nTASK_DURATION = Histogram(\n    \"onyx_celery_task_duration_seconds\",\n    \"Celery task execution duration in seconds\",\n    [\"task_name\", \"queue\"],\n    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],\n)\n\nTASKS_ACTIVE = Gauge(\n    \"onyx_celery_tasks_active\",\n    \"Currently executing Celery tasks\",\n    [\"task_name\", \"queue\"],\n)\n\nTASK_RETRIED = Counter(\n    \"onyx_celery_task_retried_total\",\n    \"Total Celery tasks retried\",\n    [\"task_name\", \"queue\"],\n)\n\nTASK_REVOKED = Counter(\n    \"onyx_celery_task_revoked_total\",\n    \"Total Celery tasks revoked (cancelled)\",\n    [\"task_name\"],\n)\n\nTASK_REJECTED = Counter(\n    \"onyx_celery_task_rejected_total\",\n    \"Total Celery tasks rejected by worker\",\n    [\"task_name\"],\n)\n\n# task_id → (monotonic start time, metric labels)\n_task_start_times: dict[str, tuple[float, dict[str, str]]] = {}\n\n# Lock protecting _task_start_times — prerun, postrun, and eviction may\n# run concurrently on thread-pool workers.\n_task_start_times_lock = threading.Lock()\n\n# Entries older than this are evicted on each prerun to prevent unbounded\n# growth when tasks are killed (SIGTERM, OOM) and postrun never fires.\n_MAX_START_TIME_AGE_SECONDS = 3600  # 1 hour\n\n\ndef _evict_stale_start_times() -> None:\n    \"\"\"Remove _task_start_times entries older than _MAX_START_TIME_AGE_SECONDS.\n\n    Must be called while holding _task_start_times_lock.\n    \"\"\"\n    now = time.monotonic()\n    stale_ids = [\n        tid\n        for tid, (start, _labels) in _task_start_times.items()\n        if now - start > _MAX_START_TIME_AGE_SECONDS\n    ]\n    for tid in stale_ids:\n        entry = _task_start_times.pop(tid, None)\n        if entry is not None:\n            _labels = entry[1]\n            # Decrement active gauge for evicted tasks — these tasks were\n            # started but never completed (killed, OOM, etc.).\n            active_gauge = TASKS_ACTIVE.labels(**_labels)\n            if active_gauge._value.get() > 0:\n                active_gauge.dec()\n\n\ndef _get_task_labels(task: Task) -> dict[str, str]:\n    \"\"\"Extract task_name and queue labels from a Celery Task instance.\"\"\"\n    task_name = task.name or \"unknown\"\n    queue = \"unknown\"\n    try:\n        delivery_info = task.request.delivery_info\n        if delivery_info:\n            queue = delivery_info.get(\"routing_key\") or \"unknown\"\n    except AttributeError:\n        pass\n    return {\"task_name\": task_name, \"queue\": queue}\n\n\ndef on_celery_task_prerun(\n    task_id: str | None,\n    task: Task | None,\n) -> None:\n    \"\"\"Record task start. Call from the worker's task_prerun signal handler.\"\"\"\n    if task is None or task_id is None:\n        return\n\n    try:\n        labels = _get_task_labels(task)\n        TASK_STARTED.labels(**labels).inc()\n        TASKS_ACTIVE.labels(**labels).inc()\n        with _task_start_times_lock:\n            _evict_stale_start_times()\n            _task_start_times[task_id] = (time.monotonic(), labels)\n    except Exception:\n        logger.debug(\"Failed to record celery task prerun metrics\", exc_info=True)\n\n\ndef on_celery_task_postrun(\n    task_id: str | None,\n    task: Task | None,\n    state: str | None,\n) -> None:\n    \"\"\"Record task completion. Call from the worker's task_postrun signal handler.\"\"\"\n    if task is None or task_id is None:\n        return\n\n    try:\n        labels = _get_task_labels(task)\n        outcome = \"success\" if state == \"SUCCESS\" else \"failure\"\n        TASK_COMPLETED.labels(**labels, outcome=outcome).inc()\n\n        # Guard against going below 0 if postrun fires without a matching\n        # prerun (e.g. after a worker restart or stale entry eviction).\n        active_gauge = TASKS_ACTIVE.labels(**labels)\n        if active_gauge._value.get() > 0:\n            active_gauge.dec()\n\n        with _task_start_times_lock:\n            entry = _task_start_times.pop(task_id, None)\n        if entry is not None:\n            start_time, _stored_labels = entry\n            TASK_DURATION.labels(**labels).observe(time.monotonic() - start_time)\n    except Exception:\n        logger.debug(\"Failed to record celery task postrun metrics\", exc_info=True)\n\n\ndef on_celery_task_retry(\n    _task_id: str | None,\n    task: Task | None,\n) -> None:\n    \"\"\"Record task retry. Call from the worker's task_retry signal handler.\"\"\"\n    if task is None:\n        return\n    try:\n        labels = _get_task_labels(task)\n        TASK_RETRIED.labels(**labels).inc()\n    except Exception:\n        logger.debug(\"Failed to record celery task retry metrics\", exc_info=True)\n\n\ndef on_celery_task_revoked(\n    _task_id: str | None,\n    task_name: str | None = None,\n) -> None:\n    \"\"\"Record task revocation. The revoked signal doesn't provide a Task\n    instance, only the task name via sender.\"\"\"\n    if task_name is None:\n        return\n    try:\n        TASK_REVOKED.labels(task_name=task_name).inc()\n    except Exception:\n        logger.debug(\"Failed to record celery task revoked metrics\", exc_info=True)\n\n\ndef on_celery_task_rejected(\n    _task_id: str | None,\n    task_name: str | None = None,\n) -> None:\n    \"\"\"Record task rejection.\"\"\"\n    if task_name is None:\n        return\n    try:\n        TASK_REJECTED.labels(task_name=task_name).inc()\n    except Exception:\n        logger.debug(\"Failed to record celery task rejected metrics\", exc_info=True)\n"
  },
  {
    "path": "backend/onyx/server/metrics/indexing_pipeline.py",
    "content": "\"\"\"Prometheus collectors for Celery queue depths and indexing pipeline state.\n\nThese collectors query Redis and Postgres at scrape time (the Collector pattern),\nso metrics are always fresh when Prometheus scrapes /metrics. They run inside the\nmonitoring celery worker which already has Redis and DB access.\n\nTo avoid hammering Redis/Postgres on every 15s scrape, results are cached with\na configurable TTL (default 30s). This means metrics may be up to TTL seconds\nstale, which is fine for monitoring dashboards.\n\"\"\"\n\nimport json\nimport threading\nimport time\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nfrom prometheus_client.core import GaugeMetricFamily\nfrom prometheus_client.registry import Collector\nfrom redis import Redis\n\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.background.celery.celery_redis import celery_get_unacked_task_ids\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Default cache TTL in seconds. Scrapes hitting within this window return\n# the previous result without re-querying Redis/Postgres.\n_DEFAULT_CACHE_TTL = 30.0\n\n_QUEUE_LABEL_MAP: dict[str, str] = {\n    OnyxCeleryQueues.PRIMARY: \"primary\",\n    OnyxCeleryQueues.DOCPROCESSING: \"docprocessing\",\n    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING: \"docfetching\",\n    OnyxCeleryQueues.VESPA_METADATA_SYNC: \"vespa_metadata_sync\",\n    OnyxCeleryQueues.CONNECTOR_DELETION: \"connector_deletion\",\n    OnyxCeleryQueues.CONNECTOR_PRUNING: \"connector_pruning\",\n    OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC: \"permissions_sync\",\n    OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC: \"external_group_sync\",\n    OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT: \"permissions_upsert\",\n    OnyxCeleryQueues.CONNECTOR_HIERARCHY_FETCHING: \"hierarchy_fetching\",\n    OnyxCeleryQueues.LLM_MODEL_UPDATE: \"llm_model_update\",\n    OnyxCeleryQueues.CHECKPOINT_CLEANUP: \"checkpoint_cleanup\",\n    OnyxCeleryQueues.INDEX_ATTEMPT_CLEANUP: \"index_attempt_cleanup\",\n    OnyxCeleryQueues.CSV_GENERATION: \"csv_generation\",\n    OnyxCeleryQueues.USER_FILE_PROCESSING: \"user_file_processing\",\n    OnyxCeleryQueues.USER_FILE_PROJECT_SYNC: \"user_file_project_sync\",\n    OnyxCeleryQueues.USER_FILE_DELETE: \"user_file_delete\",\n    OnyxCeleryQueues.MONITORING: \"monitoring\",\n    OnyxCeleryQueues.SANDBOX: \"sandbox\",\n    OnyxCeleryQueues.OPENSEARCH_MIGRATION: \"opensearch_migration\",\n}\n\n# Queues where prefetched (unacked) task counts are meaningful\n_UNACKED_QUEUES: list[str] = [\n    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,\n    OnyxCeleryQueues.DOCPROCESSING,\n]\n\n\nclass _CachedCollector(Collector):\n    \"\"\"Base collector with TTL-based caching.\n\n    Subclasses implement ``_collect_fresh()`` to query the actual data source.\n    The base ``collect()`` returns cached results if the TTL hasn't expired,\n    avoiding repeated queries when Prometheus scrapes frequently.\n    \"\"\"\n\n    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:\n        self._cache_ttl = cache_ttl\n        self._cached_result: list[GaugeMetricFamily] | None = None\n        self._last_collect_time: float = 0.0\n        self._lock = threading.Lock()\n\n    def collect(self) -> list[GaugeMetricFamily]:\n        with self._lock:\n            now = time.monotonic()\n            if (\n                now - self._last_collect_time < self._cache_ttl\n                and self._cached_result is not None\n            ):\n                return self._cached_result\n\n            try:\n                result = self._collect_fresh()\n                self._cached_result = result\n                self._last_collect_time = now\n                return result\n            except Exception:\n                logger.exception(f\"Error in {type(self).__name__}.collect()\")\n                # Return stale cache on error rather than nothing — avoids\n                # metrics disappearing during transient failures.\n                return self._cached_result if self._cached_result is not None else []\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        raise NotImplementedError\n\n    def describe(self) -> list[GaugeMetricFamily]:\n        return []\n\n\nclass QueueDepthCollector(_CachedCollector):\n    \"\"\"Reads Celery queue lengths from the broker Redis on each scrape.\"\"\"\n\n    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:\n        super().__init__(cache_ttl)\n        self._celery_app: Any | None = None\n\n    def set_celery_app(self, app: Any) -> None:\n        \"\"\"Set the Celery app for broker Redis access.\"\"\"\n        self._celery_app = app\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        if self._celery_app is None:\n            return []\n\n        from onyx.background.celery.celery_redis import celery_get_broker_client\n\n        redis_client = celery_get_broker_client(self._celery_app)\n\n        depth = GaugeMetricFamily(\n            \"onyx_queue_depth\",\n            \"Number of tasks waiting in Celery queue\",\n            labels=[\"queue\"],\n        )\n        unacked = GaugeMetricFamily(\n            \"onyx_queue_unacked\",\n            \"Number of prefetched (unacked) tasks for queue\",\n            labels=[\"queue\"],\n        )\n        queue_age = GaugeMetricFamily(\n            \"onyx_queue_oldest_task_age_seconds\",\n            \"Age of the oldest task in the queue (seconds since enqueue)\",\n            labels=[\"queue\"],\n        )\n\n        now = time.time()\n\n        for queue_name, label in _QUEUE_LABEL_MAP.items():\n            length = celery_get_queue_length(queue_name, redis_client)\n            depth.add_metric([label], length)\n\n            # Peek at the oldest message to get its age\n            if length > 0:\n                age = self._get_oldest_message_age(redis_client, queue_name, now)\n                if age is not None:\n                    queue_age.add_metric([label], age)\n\n        for queue_name in _UNACKED_QUEUES:\n            label = _QUEUE_LABEL_MAP[queue_name]\n            task_ids = celery_get_unacked_task_ids(queue_name, redis_client)\n            unacked.add_metric([label], len(task_ids))\n\n        return [depth, unacked, queue_age]\n\n    @staticmethod\n    def _get_oldest_message_age(\n        redis_client: Redis, queue_name: str, now: float\n    ) -> float | None:\n        \"\"\"Peek at the oldest (tail) message in a Redis list queue\n        and extract its timestamp to compute age.\n\n        Note: If the Celery message contains neither ``properties.timestamp``\n        nor ``headers.timestamp``, no age metric is emitted for this queue.\n        This can happen with custom task producers or non-standard Celery\n        protocol versions. The metric will simply be absent rather than\n        inaccurate, which is the safest behavior for alerting.\n        \"\"\"\n        try:\n            raw: bytes | str | None = redis_client.lindex(queue_name, -1)  # type: ignore[assignment]\n            if raw is None:\n                return None\n            msg = json.loads(raw)\n            # Check for ETA tasks first — they are intentionally delayed,\n            # so reporting their queue age would be misleading.\n            headers = msg.get(\"headers\", {})\n            if headers.get(\"eta\") is not None:\n                return None\n            # Celery v2 protocol: timestamp in properties\n            props = msg.get(\"properties\", {})\n            ts = props.get(\"timestamp\")\n            if ts is not None:\n                return now - float(ts)\n            # Fallback: some Celery configurations place the timestamp in\n            # headers instead of properties.\n            ts = headers.get(\"timestamp\")\n            if ts is not None:\n                return now - float(ts)\n        except Exception:\n            pass\n        return None\n\n\nclass IndexAttemptCollector(_CachedCollector):\n    \"\"\"Queries Postgres for index attempt state on each scrape.\"\"\"\n\n    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:\n        super().__init__(cache_ttl)\n        self._configured: bool = False\n        self._terminal_statuses: list = []\n\n    def configure(self) -> None:\n        \"\"\"Call once DB engine is initialized.\"\"\"\n        from onyx.db.enums import IndexingStatus\n\n        self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]\n        self._configured = True\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        if not self._configured:\n            return []\n\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n        from onyx.db.engine.tenant_utils import get_all_tenant_ids\n        from onyx.db.index_attempt import get_active_index_attempts_for_metrics\n        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n        attempts_gauge = GaugeMetricFamily(\n            \"onyx_index_attempts_active\",\n            \"Number of non-terminal index attempts\",\n            labels=[\n                \"status\",\n                \"source\",\n                \"tenant_id\",\n                \"connector_name\",\n                \"cc_pair_id\",\n            ],\n        )\n\n        tenant_ids = get_all_tenant_ids()\n\n        for tid in tenant_ids:\n            # Defensive guard — get_all_tenant_ids() should never yield None,\n            # but we guard here for API stability in case the contract changes.\n            if tid is None:\n                continue\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)\n            try:\n                with get_session_with_current_tenant() as session:\n                    rows = get_active_index_attempts_for_metrics(session)\n\n                    for status, source, cc_id, cc_name, count in rows:\n                        name_val = cc_name or f\"cc_pair_{cc_id}\"\n                        attempts_gauge.add_metric(\n                            [\n                                status.value,\n                                source.value,\n                                tid,\n                                name_val,\n                                str(cc_id),\n                            ],\n                            count,\n                        )\n            finally:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n        return [attempts_gauge]\n\n\nclass ConnectorHealthCollector(_CachedCollector):\n    \"\"\"Queries Postgres for connector health state on each scrape.\"\"\"\n\n    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:\n        super().__init__(cache_ttl)\n        self._configured: bool = False\n\n    def configure(self) -> None:\n        \"\"\"Call once DB engine is initialized.\"\"\"\n        self._configured = True\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        if not self._configured:\n            return []\n\n        from onyx.db.connector_credential_pair import (\n            get_connector_health_for_metrics,\n        )\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n        from onyx.db.engine.tenant_utils import get_all_tenant_ids\n        from onyx.db.index_attempt import get_docs_indexed_by_cc_pair\n        from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair\n        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n        staleness_gauge = GaugeMetricFamily(\n            \"onyx_connector_last_success_age_seconds\",\n            \"Seconds since last successful index for this connector\",\n            labels=[\"tenant_id\", \"source\", \"cc_pair_id\", \"connector_name\"],\n        )\n        error_state_gauge = GaugeMetricFamily(\n            \"onyx_connector_in_error_state\",\n            \"Whether the connector is in a repeated error state (1=yes, 0=no)\",\n            labels=[\"tenant_id\", \"source\", \"cc_pair_id\", \"connector_name\"],\n        )\n        by_status_gauge = GaugeMetricFamily(\n            \"onyx_connectors_by_status\",\n            \"Number of connectors grouped by status\",\n            labels=[\"tenant_id\", \"status\"],\n        )\n        error_total_gauge = GaugeMetricFamily(\n            \"onyx_connectors_in_error_total\",\n            \"Total number of connectors in repeated error state\",\n            labels=[\"tenant_id\"],\n        )\n        per_connector_labels = [\n            \"tenant_id\",\n            \"source\",\n            \"cc_pair_id\",\n            \"connector_name\",\n        ]\n        docs_success_gauge = GaugeMetricFamily(\n            \"onyx_connector_docs_indexed\",\n            \"Total new documents indexed (90-day rolling sum) per connector\",\n            labels=per_connector_labels,\n        )\n        docs_error_gauge = GaugeMetricFamily(\n            \"onyx_connector_error_count\",\n            \"Total number of failed index attempts per connector\",\n            labels=per_connector_labels,\n        )\n\n        now = datetime.now(tz=timezone.utc)\n        tenant_ids = get_all_tenant_ids()\n\n        for tid in tenant_ids:\n            # Defensive guard — get_all_tenant_ids() should never yield None,\n            # but we guard here for API stability in case the contract changes.\n            if tid is None:\n                continue\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)\n            try:\n                with get_session_with_current_tenant() as session:\n                    pairs = get_connector_health_for_metrics(session)\n                    error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)\n                    docs_by_cc = get_docs_indexed_by_cc_pair(session)\n\n                    status_counts: dict[str, int] = {}\n                    error_count = 0\n\n                    for (\n                        cc_id,\n                        status,\n                        in_error,\n                        last_success,\n                        cc_name,\n                        source,\n                    ) in pairs:\n                        cc_id_str = str(cc_id)\n                        source_val = source.value\n                        name_val = cc_name or f\"cc_pair_{cc_id}\"\n                        label_vals = [tid, source_val, cc_id_str, name_val]\n\n                        if last_success is not None:\n                            # Both `now` and `last_success` are timezone-aware\n                            # (the DB column uses DateTime(timezone=True)),\n                            # so subtraction is safe.\n                            age = (now - last_success).total_seconds()\n                            staleness_gauge.add_metric(label_vals, age)\n\n                        error_state_gauge.add_metric(\n                            label_vals,\n                            1.0 if in_error else 0.0,\n                        )\n                        if in_error:\n                            error_count += 1\n\n                        docs_success_gauge.add_metric(\n                            label_vals,\n                            docs_by_cc.get(cc_id, 0),\n                        )\n\n                        docs_error_gauge.add_metric(\n                            label_vals,\n                            error_counts_by_cc.get(cc_id, 0),\n                        )\n\n                        status_val = status.value\n                        status_counts[status_val] = status_counts.get(status_val, 0) + 1\n\n                    for status_val, count in status_counts.items():\n                        by_status_gauge.add_metric([tid, status_val], count)\n\n                    error_total_gauge.add_metric([tid], error_count)\n            finally:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n        return [\n            staleness_gauge,\n            error_state_gauge,\n            by_status_gauge,\n            error_total_gauge,\n            docs_success_gauge,\n            docs_error_gauge,\n        ]\n\n\nclass RedisHealthCollector(_CachedCollector):\n    \"\"\"Collects Redis server health metrics (memory, clients, etc.).\"\"\"\n\n    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:\n        super().__init__(cache_ttl)\n        self._celery_app: Any | None = None\n\n    def set_celery_app(self, app: Any) -> None:\n        \"\"\"Set the Celery app for broker Redis access.\"\"\"\n        self._celery_app = app\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        if self._celery_app is None:\n            return []\n\n        from onyx.background.celery.celery_redis import celery_get_broker_client\n\n        redis_client = celery_get_broker_client(self._celery_app)\n\n        memory_used = GaugeMetricFamily(\n            \"onyx_redis_memory_used_bytes\",\n            \"Redis used memory in bytes\",\n        )\n        memory_peak = GaugeMetricFamily(\n            \"onyx_redis_memory_peak_bytes\",\n            \"Redis peak used memory in bytes\",\n        )\n        memory_frag = GaugeMetricFamily(\n            \"onyx_redis_memory_fragmentation_ratio\",\n            \"Redis memory fragmentation ratio (>1.5 indicates fragmentation)\",\n        )\n        connected_clients = GaugeMetricFamily(\n            \"onyx_redis_connected_clients\",\n            \"Number of connected Redis clients\",\n        )\n\n        try:\n            mem_info: dict = redis_client.info(\"memory\")  # type: ignore[assignment]\n            memory_used.add_metric([], mem_info.get(\"used_memory\", 0))\n            memory_peak.add_metric([], mem_info.get(\"used_memory_peak\", 0))\n            frag = mem_info.get(\"mem_fragmentation_ratio\")\n            if frag is not None:\n                memory_frag.add_metric([], frag)\n\n            client_info: dict = redis_client.info(\"clients\")  # type: ignore[assignment]\n            connected_clients.add_metric([], client_info.get(\"connected_clients\", 0))\n        except Exception:\n            logger.debug(\"Failed to collect Redis health metrics\", exc_info=True)\n\n        return [memory_used, memory_peak, memory_frag, connected_clients]\n\n\nclass WorkerHeartbeatMonitor:\n    \"\"\"Monitors Celery worker health via the event stream.\n\n    Subscribes to ``worker-heartbeat``, ``worker-online``, and\n    ``worker-offline`` events via a single persistent connection.\n    Runs in a daemon thread started once during worker setup.\n    \"\"\"\n\n    # Consider a worker down if no heartbeat received for this long.\n    _HEARTBEAT_TIMEOUT_SECONDS = 120.0\n\n    def __init__(self, celery_app: Any) -> None:\n        self._app = celery_app\n        self._worker_last_seen: dict[str, float] = {}\n        self._lock = threading.Lock()\n        self._running = False\n        self._thread: threading.Thread | None = None\n\n    def start(self) -> None:\n        \"\"\"Start the background event listener thread.\n\n        Safe to call multiple times — only starts one thread.\n        \"\"\"\n        if self._thread is not None and self._thread.is_alive():\n            return\n        self._running = True\n        self._thread = threading.Thread(target=self._listen, daemon=True)\n        self._thread.start()\n        logger.info(\"WorkerHeartbeatMonitor started\")\n\n    def stop(self) -> None:\n        self._running = False\n\n    def _listen(self) -> None:\n        \"\"\"Background loop: connect to event stream and process heartbeats.\"\"\"\n        while self._running:\n            try:\n                with self._app.connection() as conn:\n                    recv = self._app.events.Receiver(\n                        conn,\n                        handlers={\n                            \"worker-heartbeat\": self._on_heartbeat,\n                            \"worker-online\": self._on_heartbeat,\n                            \"worker-offline\": self._on_offline,\n                        },\n                    )\n                    recv.capture(\n                        limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True\n                    )\n            except Exception:\n                if self._running:\n                    logger.debug(\n                        \"Heartbeat listener disconnected, reconnecting in 5s\",\n                        exc_info=True,\n                    )\n                    time.sleep(5.0)\n            else:\n                # capture() returned normally (timeout with no events); reconnect\n                if self._running:\n                    logger.debug(\"Heartbeat capture timed out, reconnecting\")\n                    time.sleep(5.0)\n\n    def _on_heartbeat(self, event: dict[str, Any]) -> None:\n        hostname = event.get(\"hostname\")\n        if hostname:\n            with self._lock:\n                self._worker_last_seen[hostname] = time.monotonic()\n\n    def _on_offline(self, event: dict[str, Any]) -> None:\n        hostname = event.get(\"hostname\")\n        if hostname:\n            with self._lock:\n                self._worker_last_seen.pop(hostname, None)\n\n    def get_worker_status(self) -> dict[str, bool]:\n        \"\"\"Return {hostname: is_alive} for all known workers.\n\n        Thread-safe. Called by WorkerHealthCollector on each scrape.\n        Also prunes workers that have been dead longer than 2x the\n        heartbeat timeout to prevent unbounded growth.\n        \"\"\"\n        now = time.monotonic()\n        prune_threshold = self._HEARTBEAT_TIMEOUT_SECONDS * 2\n        with self._lock:\n            # Prune workers that have been gone for 2x the timeout\n            stale = [\n                h\n                for h, ts in self._worker_last_seen.items()\n                if (now - ts) > prune_threshold\n            ]\n            for h in stale:\n                del self._worker_last_seen[h]\n\n            result: dict[str, bool] = {}\n            for hostname, last_seen in self._worker_last_seen.items():\n                alive = (now - last_seen) < self._HEARTBEAT_TIMEOUT_SECONDS\n                result[hostname] = alive\n            return result\n\n\nclass WorkerHealthCollector(_CachedCollector):\n    \"\"\"Collects Celery worker health from the heartbeat monitor.\n\n    Reads worker status from ``WorkerHeartbeatMonitor`` which listens\n    to the Celery event stream via a single persistent connection.\n    \"\"\"\n\n    def __init__(self, cache_ttl: float = 30.0) -> None:\n        super().__init__(cache_ttl)\n        self._monitor: WorkerHeartbeatMonitor | None = None\n\n    def set_monitor(self, monitor: WorkerHeartbeatMonitor) -> None:\n        \"\"\"Set the heartbeat monitor instance.\"\"\"\n        self._monitor = monitor\n\n    def _collect_fresh(self) -> list[GaugeMetricFamily]:\n        if self._monitor is None:\n            return []\n\n        active_workers = GaugeMetricFamily(\n            \"onyx_celery_active_worker_count\",\n            \"Number of active Celery workers with recent heartbeats\",\n        )\n        worker_up = GaugeMetricFamily(\n            \"onyx_celery_worker_up\",\n            \"Whether a specific Celery worker is alive (1=up, 0=down)\",\n            labels=[\"worker\"],\n        )\n\n        try:\n            status = self._monitor.get_worker_status()\n            alive_count = sum(1 for alive in status.values() if alive)\n            active_workers.add_metric([], alive_count)\n\n            for hostname in sorted(status):\n                # Use short name (before @) for single-host deployments,\n                # full hostname when multiple hosts share a worker type.\n                label = hostname.split(\"@\")[0]\n                worker_up.add_metric([label], 1 if status[hostname] else 0)\n        except Exception:\n            logger.debug(\"Failed to collect worker health metrics\", exc_info=True)\n\n        return [active_workers, worker_up]\n"
  },
  {
    "path": "backend/onyx/server/metrics/indexing_pipeline_setup.py",
    "content": "\"\"\"Setup function for indexing pipeline Prometheus collectors.\n\nCalled once by the monitoring celery worker after Redis and DB are ready.\n\"\"\"\n\nfrom celery import Celery\nfrom prometheus_client.registry import REGISTRY\n\nfrom onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector\nfrom onyx.server.metrics.indexing_pipeline import IndexAttemptCollector\nfrom onyx.server.metrics.indexing_pipeline import QueueDepthCollector\nfrom onyx.server.metrics.indexing_pipeline import RedisHealthCollector\nfrom onyx.server.metrics.indexing_pipeline import WorkerHealthCollector\nfrom onyx.server.metrics.indexing_pipeline import WorkerHeartbeatMonitor\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Module-level singletons — these are lightweight objects (no connections or DB\n# state) until configure() / set_celery_app() is called. Keeping them at\n# module level ensures they survive the lifetime of the worker process and are\n# only registered with the Prometheus registry once.\n_queue_collector = QueueDepthCollector()\n_attempt_collector = IndexAttemptCollector()\n_connector_collector = ConnectorHealthCollector()\n_redis_health_collector = RedisHealthCollector()\n_worker_health_collector = WorkerHealthCollector()\n_heartbeat_monitor: WorkerHeartbeatMonitor | None = None\n\n\ndef setup_indexing_pipeline_metrics(celery_app: Celery) -> None:\n    \"\"\"Register all indexing pipeline collectors with the default registry.\n\n    Args:\n        celery_app: The Celery application instance. Used to obtain a\n            broker Redis client on each scrape for queue depth metrics.\n    \"\"\"\n    _queue_collector.set_celery_app(celery_app)\n    _redis_health_collector.set_celery_app(celery_app)\n\n    # Start the heartbeat monitor daemon thread — uses a single persistent\n    # connection to receive worker-heartbeat events.\n    # Module-level singleton prevents duplicate threads on re-entry.\n    global _heartbeat_monitor\n    if _heartbeat_monitor is None:\n        _heartbeat_monitor = WorkerHeartbeatMonitor(celery_app)\n        _heartbeat_monitor.start()\n    _worker_health_collector.set_monitor(_heartbeat_monitor)\n\n    _attempt_collector.configure()\n    _connector_collector.configure()\n\n    for collector in (\n        _queue_collector,\n        _attempt_collector,\n        _connector_collector,\n        _redis_health_collector,\n        _worker_health_collector,\n    ):\n        try:\n            REGISTRY.register(collector)\n        except ValueError:\n            logger.debug(\"Collector already registered: %s\", type(collector).__name__)\n"
  },
  {
    "path": "backend/onyx/server/metrics/indexing_task_metrics.py",
    "content": "\"\"\"Per-connector Prometheus metrics for indexing tasks.\n\nEnriches the two primary indexing tasks (docfetching_proxy_task and\ndocprocessing_task) with connector-level labels: source, tenant_id,\nand cc_pair_id.\n\nNote: connector_name is intentionally excluded from push-based per-task\ncounters because it is a user-defined free-form string that can create\nunbounded cardinality. The pull-based collectors on the monitoring worker\n(see indexing_pipeline.py) include connector_name since they have bounded\ncardinality (one series per connector, not per task execution).\n\nUses an in-memory cache for cc_pair_id → (source, name) lookups.\nConnectors never change source type, and names change rarely, so the\ncache is safe to hold for the worker's lifetime.\n\nUsage in a worker app module:\n    from onyx.server.metrics.indexing_task_metrics import (\n        on_indexing_task_prerun,\n        on_indexing_task_postrun,\n    )\n\"\"\"\n\nimport threading\nimport time\nfrom dataclasses import dataclass\n\nfrom celery import Task\nfrom prometheus_client import Counter\nfrom prometheus_client import Histogram\n\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.server.metrics.celery_task_metrics import _MAX_START_TIME_AGE_SECONDS\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n\n@dataclass(frozen=True)\nclass ConnectorInfo:\n    \"\"\"Cached connector metadata for metric labels.\"\"\"\n\n    source: str\n    name: str\n\n\n_UNKNOWN_CONNECTOR = ConnectorInfo(source=\"unknown\", name=\"unknown\")\n\n# (tenant_id, cc_pair_id) → ConnectorInfo (populated on first encounter).\n# Keyed by tenant to avoid cross-tenant cache poisoning in multi-tenant\n# deployments where different tenants can share the same cc_pair_id value.\n_connector_cache: dict[tuple[str, int], ConnectorInfo] = {}\n\n# Lock protecting _connector_cache — multiple thread-pool workers may\n# resolve connectors concurrently.\n_connector_cache_lock = threading.Lock()\n\n# Only enrich these task types with per-connector labels\n_INDEXING_TASK_NAMES: frozenset[str] = frozenset(\n    {\n        OnyxCeleryTask.CONNECTOR_DOC_FETCHING_TASK,\n        OnyxCeleryTask.DOCPROCESSING_TASK,\n    }\n)\n\n# connector_name is intentionally excluded — see module docstring.\nINDEXING_TASK_STARTED = Counter(\n    \"onyx_indexing_task_started_total\",\n    \"Indexing tasks started per connector\",\n    [\"task_name\", \"source\", \"tenant_id\", \"cc_pair_id\"],\n)\n\nINDEXING_TASK_COMPLETED = Counter(\n    \"onyx_indexing_task_completed_total\",\n    \"Indexing tasks completed per connector\",\n    [\n        \"task_name\",\n        \"source\",\n        \"tenant_id\",\n        \"cc_pair_id\",\n        \"outcome\",\n    ],\n)\n\nINDEXING_TASK_DURATION = Histogram(\n    \"onyx_indexing_task_duration_seconds\",\n    \"Indexing task duration by connector type\",\n    [\"task_name\", \"source\", \"tenant_id\"],\n    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],\n)\n\n# task_id → monotonic start time (for indexing tasks only)\n_indexing_start_times: dict[str, float] = {}\n\n# Lock protecting _indexing_start_times — prerun, postrun, and eviction may\n# run concurrently on thread-pool workers.\n_indexing_start_times_lock = threading.Lock()\n\n\ndef _evict_stale_start_times() -> None:\n    \"\"\"Remove _indexing_start_times entries older than _MAX_START_TIME_AGE_SECONDS.\n\n    Must be called while holding _indexing_start_times_lock.\n    \"\"\"\n    now = time.monotonic()\n    stale_ids = [\n        tid\n        for tid, start in _indexing_start_times.items()\n        if now - start > _MAX_START_TIME_AGE_SECONDS\n    ]\n    for tid in stale_ids:\n        _indexing_start_times.pop(tid, None)\n\n\ndef _resolve_connector(cc_pair_id: int) -> ConnectorInfo:\n    \"\"\"Resolve cc_pair_id to ConnectorInfo, using cache when possible.\n\n    On cache miss, does a single DB query with eager connector load.\n    On any failure, returns _UNKNOWN_CONNECTOR without caching, so that\n    subsequent calls can retry the lookup once the DB is available.\n\n    Note on tenant_id source: we read CURRENT_TENANT_ID_CONTEXTVAR for the\n    cache key. The Celery tenant-aware middleware sets this contextvar before\n    task execution, and it always matches kwargs[\"tenant_id\"] (which is set\n    at task dispatch time). They are guaranteed to agree for a given task\n    execution context.\n    \"\"\"\n    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get(\"\") or \"\"\n    cache_key = (tenant_id, cc_pair_id)\n\n    with _connector_cache_lock:\n        cached = _connector_cache.get(cache_key)\n        if cached is not None:\n            return cached\n\n    try:\n        from onyx.db.connector_credential_pair import (\n            get_connector_credential_pair_from_id,\n        )\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n        with get_session_with_current_tenant() as db_session:\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session,\n                cc_pair_id,\n                eager_load_connector=True,\n            )\n            if cc_pair is None:\n                # DB lookup succeeded but cc_pair doesn't exist — don't cache,\n                # it may appear later (race with connector creation).\n                return _UNKNOWN_CONNECTOR\n\n            info = ConnectorInfo(\n                source=cc_pair.connector.source.value,\n                name=cc_pair.name,\n            )\n            with _connector_cache_lock:\n                _connector_cache[cache_key] = info\n            return info\n    except Exception:\n        logger.debug(\n            f\"Failed to resolve connector info for cc_pair_id={cc_pair_id}\",\n            exc_info=True,\n        )\n        return _UNKNOWN_CONNECTOR\n\n\ndef on_indexing_task_prerun(\n    task_id: str | None,\n    task: Task | None,\n    kwargs: dict | None,\n) -> None:\n    \"\"\"Record per-connector metrics at task start.\n\n    Only fires for tasks in _INDEXING_TASK_NAMES. Silently returns for\n    all other tasks.\n    \"\"\"\n    if task is None or task_id is None or kwargs is None:\n        return\n\n    task_name = task.name or \"\"\n    if task_name not in _INDEXING_TASK_NAMES:\n        return\n\n    try:\n        cc_pair_id = kwargs.get(\"cc_pair_id\")\n        tenant_id = str(kwargs.get(\"tenant_id\", \"unknown\"))\n\n        if cc_pair_id is None:\n            return\n\n        info = _resolve_connector(cc_pair_id)\n\n        INDEXING_TASK_STARTED.labels(\n            task_name=task_name,\n            source=info.source,\n            tenant_id=tenant_id,\n            cc_pair_id=str(cc_pair_id),\n        ).inc()\n\n        with _indexing_start_times_lock:\n            _evict_stale_start_times()\n            _indexing_start_times[task_id] = time.monotonic()\n    except Exception:\n        logger.debug(\"Failed to record indexing task prerun metrics\", exc_info=True)\n\n\ndef on_indexing_task_postrun(\n    task_id: str | None,\n    task: Task | None,\n    kwargs: dict | None,\n    state: str | None,\n) -> None:\n    \"\"\"Record per-connector completion metrics.\n\n    Only fires for tasks in _INDEXING_TASK_NAMES.\n    \"\"\"\n    if task is None or task_id is None or kwargs is None:\n        return\n\n    task_name = task.name or \"\"\n    if task_name not in _INDEXING_TASK_NAMES:\n        return\n\n    try:\n        cc_pair_id = kwargs.get(\"cc_pair_id\")\n        tenant_id = str(kwargs.get(\"tenant_id\", \"unknown\"))\n\n        if cc_pair_id is None:\n            return\n\n        info = _resolve_connector(cc_pair_id)\n        outcome = \"success\" if state == \"SUCCESS\" else \"failure\"\n\n        INDEXING_TASK_COMPLETED.labels(\n            task_name=task_name,\n            source=info.source,\n            tenant_id=tenant_id,\n            cc_pair_id=str(cc_pair_id),\n            outcome=outcome,\n        ).inc()\n\n        with _indexing_start_times_lock:\n            start = _indexing_start_times.pop(task_id, None)\n        if start is not None:\n            INDEXING_TASK_DURATION.labels(\n                task_name=task_name,\n                source=info.source,\n                tenant_id=tenant_id,\n            ).observe(time.monotonic() - start)\n    except Exception:\n        logger.debug(\"Failed to record indexing task postrun metrics\", exc_info=True)\n"
  },
  {
    "path": "backend/onyx/server/metrics/metrics_server.py",
    "content": "\"\"\"Standalone Prometheus metrics HTTP server for non-API processes.\n\nThe FastAPI API server already exposes /metrics via prometheus-fastapi-instrumentator.\nCelery workers and other background processes use this module to expose their\nown /metrics endpoint on a configurable port.\n\nUsage:\n    from onyx.server.metrics.metrics_server import start_metrics_server\n    start_metrics_server(\"monitoring\")  # reads port from env or uses default\n\"\"\"\n\nimport os\nimport threading\n\nfrom prometheus_client import start_http_server\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Default ports for worker types that serve custom Prometheus metrics.\n# Only add entries here when a worker actually registers collectors.\n# In k8s each worker type runs in its own pod, so PROMETHEUS_METRICS_PORT\n# env var can override.\n_DEFAULT_PORTS: dict[str, int] = {\n    \"monitoring\": 9096,\n    \"docfetching\": 9092,\n    \"docprocessing\": 9093,\n}\n\n_server_started = False\n_server_lock = threading.Lock()\n\n\ndef start_metrics_server(worker_type: str) -> int | None:\n    \"\"\"Start a Prometheus metrics HTTP server in a background thread.\n\n    Returns the port if started, None if disabled or already started.\n\n    Port resolution order:\n    1. PROMETHEUS_METRICS_PORT env var (explicit override)\n    2. Default port for the worker type\n    3. If worker type is unknown and no env var, skip\n\n    Set PROMETHEUS_METRICS_ENABLED=false to disable.\n    \"\"\"\n    global _server_started\n\n    with _server_lock:\n        if _server_started:\n            logger.debug(f\"Metrics server already started for {worker_type}\")\n            return None\n\n        enabled = os.environ.get(\"PROMETHEUS_METRICS_ENABLED\", \"true\").lower()\n        if enabled in (\"false\", \"0\", \"no\"):\n            logger.info(f\"Prometheus metrics server disabled for {worker_type}\")\n            return None\n\n        port_str = os.environ.get(\"PROMETHEUS_METRICS_PORT\")\n        if port_str:\n            try:\n                port = int(port_str)\n            except ValueError:\n                logger.warning(\n                    f\"Invalid PROMETHEUS_METRICS_PORT '{port_str}' for {worker_type}, \"\n                    \"must be a numeric port. Skipping metrics server.\"\n                )\n                return None\n        elif worker_type in _DEFAULT_PORTS:\n            port = _DEFAULT_PORTS[worker_type]\n        else:\n            logger.info(\n                f\"No default metrics port for worker type '{worker_type}' \"\n                \"and PROMETHEUS_METRICS_PORT not set. Skipping metrics server.\"\n            )\n            return None\n\n        try:\n            start_http_server(port)\n            _server_started = True\n            logger.info(\n                f\"Prometheus metrics server started on :{port} for {worker_type}\"\n            )\n            return port\n        except OSError as e:\n            logger.warning(\n                f\"Failed to start metrics server on :{port} for {worker_type}: {e}\"\n            )\n            return None\n"
  },
  {
    "path": "backend/onyx/server/metrics/opensearch_search.py",
    "content": "\"\"\"Prometheus metrics for OpenSearch search latency and throughput.\n\nTracks client-side round-trip latency, server-side execution time (from\nOpenSearch's ``took`` field), total search count, and in-flight concurrency.\n\"\"\"\n\nimport logging\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\n\nfrom prometheus_client import Counter\nfrom prometheus_client import Gauge\nfrom prometheus_client import Histogram\n\nfrom onyx.document_index.opensearch.constants import OpenSearchSearchType\n\nlogger = logging.getLogger(__name__)\n\n_SEARCH_LATENCY_BUCKETS = (\n    0.005,\n    0.01,\n    0.025,\n    0.05,\n    0.1,\n    0.25,\n    0.5,\n    1.0,\n    2.5,\n    5.0,\n    10.0,\n    25.0,\n)\n\n_client_duration = Histogram(\n    \"onyx_opensearch_search_client_duration_seconds\",\n    \"Client-side end-to-end latency of OpenSearch search calls\",\n    [\"search_type\"],\n    buckets=_SEARCH_LATENCY_BUCKETS,\n)\n\n_server_duration = Histogram(\n    \"onyx_opensearch_search_server_duration_seconds\",\n    \"Server-side execution time reported by OpenSearch (took field)\",\n    [\"search_type\"],\n    buckets=_SEARCH_LATENCY_BUCKETS,\n)\n\n_search_total = Counter(\n    \"onyx_opensearch_search_total\",\n    \"Total number of search requests sent to OpenSearch\",\n    [\"search_type\"],\n)\n\n_searches_in_progress = Gauge(\n    \"onyx_opensearch_searches_in_progress\",\n    \"Number of OpenSearch searches currently in-flight\",\n    [\"search_type\"],\n)\n\n\ndef observe_opensearch_search(\n    search_type: OpenSearchSearchType,\n    client_duration_s: float,\n    server_took_ms: int | None,\n) -> None:\n    \"\"\"Records latency and throughput metrics for a completed OpenSearch search.\n\n    Args:\n        search_type: The type of search.\n        client_duration_s: Wall-clock duration measured on the client side, in\n            seconds.\n        server_took_ms: The ``took`` value from the OpenSearch response, in\n            milliseconds. May be ``None`` if the response did not include it.\n    \"\"\"\n    try:\n        label = search_type.value\n        _search_total.labels(search_type=label).inc()\n        _client_duration.labels(search_type=label).observe(client_duration_s)\n        if server_took_ms is not None:\n            _server_duration.labels(search_type=label).observe(server_took_ms / 1000.0)\n    except Exception:\n        logger.warning(\"Failed to record OpenSearch search metrics.\", exc_info=True)\n\n\n@contextmanager\ndef track_opensearch_search_in_progress(\n    search_type: OpenSearchSearchType,\n) -> Generator[None, None, None]:\n    \"\"\"Context manager that tracks in-flight OpenSearch searches via a Gauge.\"\"\"\n    incremented = False\n    label = search_type.value\n    try:\n        _searches_in_progress.labels(search_type=label).inc()\n        incremented = True\n    except Exception:\n        logger.warning(\"Failed to increment in-progress search gauge.\", exc_info=True)\n    try:\n        yield\n    finally:\n        if incremented:\n            try:\n                _searches_in_progress.labels(search_type=label).dec()\n            except Exception:\n                logger.warning(\n                    \"Failed to decrement in-progress search gauge.\", exc_info=True\n                )\n"
  },
  {
    "path": "backend/onyx/server/metrics/per_tenant.py",
    "content": "\"\"\"Per-tenant request counter metric.\n\nIncrements a counter on every request, labelled by tenant, so Grafana can\nanswer \"which tenant is generating the most traffic?\"\n\"\"\"\n\nfrom prometheus_client import Counter\nfrom prometheus_fastapi_instrumentator.metrics import Info\n\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n_requests_by_tenant = Counter(\n    \"onyx_api_requests_by_tenant_total\",\n    \"Total API requests by tenant\",\n    [\"tenant_id\", \"method\", \"handler\", \"status\"],\n)\n\n\ndef per_tenant_request_callback(info: Info) -> None:\n    \"\"\"Increment per-tenant request counter for every request.\"\"\"\n    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or \"unknown\"\n    _requests_by_tenant.labels(\n        tenant_id=tenant_id,\n        method=info.method,\n        handler=info.modified_handler,\n        status=info.modified_status,\n    ).inc()\n"
  },
  {
    "path": "backend/onyx/server/metrics/postgres_connection_pool.py",
    "content": "\"\"\"SQLAlchemy connection pool Prometheus metrics.\n\nProvides production-grade visibility into database connection pool state:\n\n- Pool state gauges (checked-out, idle, overflow, configured size)\n- Pool lifecycle counters (checkouts, checkins, creates, invalidations, timeouts)\n- Per-endpoint connection attribution (which endpoints hold connections, for how long)\n\nMetrics are collected via two mechanisms:\n1. A custom Prometheus Collector that reads pool snapshots on each /metrics scrape\n2. SQLAlchemy pool event listeners (checkout, checkin, connect, invalidate) for\n   counters, histograms, and attribution\n\"\"\"\n\nimport time\n\nfrom fastapi import Request\nfrom fastapi.responses import JSONResponse\nfrom prometheus_client import Counter\nfrom prometheus_client import Gauge\nfrom prometheus_client import Histogram\nfrom prometheus_client.core import GaugeMetricFamily\nfrom prometheus_client.registry import Collector\nfrom prometheus_client.registry import REGISTRY\nfrom sqlalchemy import event\nfrom sqlalchemy.engine import Engine\nfrom sqlalchemy.engine.interfaces import DBAPIConnection\nfrom sqlalchemy.ext.asyncio import AsyncEngine\nfrom sqlalchemy.pool import ConnectionPoolEntry\nfrom sqlalchemy.pool import PoolProxiedConnection\nfrom sqlalchemy.pool import QueuePool\n\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n# --- Pool lifecycle counters (event-driven) ---\n\n_checkout_total = Counter(\n    \"onyx_db_pool_checkout_total\",\n    \"Total connection checkouts from the pool\",\n    [\"engine\"],\n)\n\n_checkin_total = Counter(\n    \"onyx_db_pool_checkin_total\",\n    \"Total connection checkins to the pool\",\n    [\"engine\"],\n)\n\n_connections_created_total = Counter(\n    \"onyx_db_pool_connections_created_total\",\n    \"Total new database connections created\",\n    [\"engine\"],\n)\n\n_invalidations_total = Counter(\n    \"onyx_db_pool_invalidations_total\",\n    \"Total connection invalidations\",\n    [\"engine\"],\n)\n\n_checkout_timeout_total = Counter(\n    \"onyx_db_pool_checkout_timeout_total\",\n    \"Total connection checkout timeouts\",\n    [\"engine\"],\n)\n\n# --- Per-endpoint attribution (event-driven) ---\n\n_connections_held = Gauge(\n    \"onyx_db_connections_held_by_endpoint\",\n    \"Number of DB connections currently held, by endpoint and engine\",\n    [\"handler\", \"engine\", \"tenant_id\"],\n)\n\n_hold_seconds = Histogram(\n    \"onyx_db_connection_hold_seconds\",\n    \"Duration a DB connection is held by an endpoint\",\n    [\"handler\", \"engine\"],\n)\n\n\ndef pool_timeout_handler(\n    request: Request,  # noqa: ARG001\n    exc: Exception,\n) -> JSONResponse:\n    \"\"\"Increment the checkout timeout counter and return 503.\"\"\"\n    _checkout_timeout_total.labels(engine=\"unknown\").inc()\n    return JSONResponse(\n        status_code=503,\n        content={\n            \"detail\": \"Database connection pool timeout\",\n            \"error\": str(exc),\n        },\n    )\n\n\nclass PoolStateCollector(Collector):\n    \"\"\"Custom Prometheus collector that reads QueuePool state on each scrape.\n\n    Uses pool.checkedout(), pool.checkedin(), pool.overflow(), and pool.size()\n    for an atomic snapshot of pool state. Registered engines are stored as\n    (label, pool) tuples to avoid holding references to the full Engine.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._pools: list[tuple[str, QueuePool]] = []\n\n    def add_pool(self, label: str, pool: QueuePool) -> None:\n        self._pools.append((label, pool))\n\n    def collect(self) -> list[GaugeMetricFamily]:\n        checked_out = GaugeMetricFamily(\n            \"onyx_db_pool_checked_out\",\n            \"Currently checked-out connections\",\n            labels=[\"engine\"],\n        )\n        checked_in = GaugeMetricFamily(\n            \"onyx_db_pool_checked_in\",\n            \"Idle connections available in the pool\",\n            labels=[\"engine\"],\n        )\n        overflow = GaugeMetricFamily(\n            \"onyx_db_pool_overflow\",\n            \"Current overflow connections beyond pool_size\",\n            labels=[\"engine\"],\n        )\n        size = GaugeMetricFamily(\n            \"onyx_db_pool_size\",\n            \"Configured pool size\",\n            labels=[\"engine\"],\n        )\n\n        for label, pool in self._pools:\n            checked_out.add_metric([label], pool.checkedout())\n            checked_in.add_metric([label], pool.checkedin())\n            overflow.add_metric([label], pool.overflow())\n            size.add_metric([label], pool.size())\n\n        return [checked_out, checked_in, overflow, size]\n\n    def describe(self) -> list[GaugeMetricFamily]:\n        # Return empty to mark this as an \"unchecked\" collector. Prometheus\n        # skips upfront descriptor validation and just calls collect() at\n        # scrape time. Required because our metrics are dynamic (engine\n        # labels depend on which engines are registered at runtime).\n        return []\n\n\ndef _register_pool_events(engine: Engine, label: str) -> None:\n    \"\"\"Attach pool event listeners for metrics collection.\n\n    Listens to checkout, checkin, connect, and invalidate events.\n    Stores per-connection metadata on connection_record.info for attribution.\n    \"\"\"\n\n    @event.listens_for(engine, \"checkout\")\n    def on_checkout(\n        dbapi_conn: DBAPIConnection,  # noqa: ARG001\n        conn_record: ConnectionPoolEntry,\n        conn_proxy: PoolProxiedConnection,  # noqa: ARG001\n    ) -> None:\n        handler = CURRENT_ENDPOINT_CONTEXTVAR.get() or \"unknown\"\n        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or \"unknown\"\n        conn_record.info[\"_metrics_endpoint\"] = handler\n        conn_record.info[\"_metrics_tenant_id\"] = tenant_id\n        conn_record.info[\"_metrics_checkout_time\"] = time.monotonic()\n        _checkout_total.labels(engine=label).inc()\n        _connections_held.labels(\n            handler=handler, engine=label, tenant_id=tenant_id\n        ).inc()\n\n    @event.listens_for(engine, \"checkin\")\n    def on_checkin(\n        dbapi_conn: DBAPIConnection,  # noqa: ARG001\n        conn_record: ConnectionPoolEntry,\n    ) -> None:\n        handler = conn_record.info.pop(\"_metrics_endpoint\", \"unknown\")\n        tenant_id = conn_record.info.pop(\"_metrics_tenant_id\", \"unknown\")\n        start = conn_record.info.pop(\"_metrics_checkout_time\", None)\n        _checkin_total.labels(engine=label).inc()\n        _connections_held.labels(\n            handler=handler, engine=label, tenant_id=tenant_id\n        ).dec()\n        if start is not None:\n            _hold_seconds.labels(handler=handler, engine=label).observe(\n                time.monotonic() - start\n            )\n\n    @event.listens_for(engine, \"connect\")\n    def on_connect(\n        dbapi_conn: DBAPIConnection,  # noqa: ARG001\n        conn_record: ConnectionPoolEntry,  # noqa: ARG001\n    ) -> None:\n        _connections_created_total.labels(engine=label).inc()\n\n    @event.listens_for(engine, \"invalidate\")\n    def on_invalidate(\n        dbapi_conn: DBAPIConnection,  # noqa: ARG001\n        conn_record: ConnectionPoolEntry,\n        exception: BaseException | None,  # noqa: ARG001\n    ) -> None:\n        _invalidations_total.labels(engine=label).inc()\n        # Defensively clean up the held-connections gauge in case checkin\n        # doesn't fire after invalidation (e.g. hard pool shutdown).\n        handler = conn_record.info.pop(\"_metrics_endpoint\", None)\n        tenant_id = conn_record.info.pop(\"_metrics_tenant_id\", \"unknown\")\n        start = conn_record.info.pop(\"_metrics_checkout_time\", None)\n        if handler:\n            _connections_held.labels(\n                handler=handler, engine=label, tenant_id=tenant_id\n            ).dec()\n        if start is not None:\n            _hold_seconds.labels(handler=handler or \"unknown\", engine=label).observe(\n                time.monotonic() - start\n            )\n\n\ndef setup_postgres_connection_pool_metrics(\n    engines: dict[str, Engine | AsyncEngine],\n) -> None:\n    \"\"\"Register pool metrics for all provided engines.\n\n    Args:\n        engines: Mapping of engine label to Engine or AsyncEngine.\n            Example: {\"sync\": sync_engine, \"async\": async_engine, \"readonly\": ro_engine}\n\n    Engines using NullPool are skipped (no pool state to monitor).\n    For AsyncEngine, events are registered on the underlying sync_engine.\n    \"\"\"\n    collector = PoolStateCollector()\n\n    for label, engine in engines.items():\n        # Resolve async engines to their underlying sync engine\n        sync_engine = engine.sync_engine if isinstance(engine, AsyncEngine) else engine\n\n        pool = sync_engine.pool\n        if not isinstance(pool, QueuePool):\n            logger.info(\n                f\"Skipping pool metrics for engine '{label}' ({type(pool).__name__} — no pool state)\"\n            )\n            continue\n\n        collector.add_pool(label, pool)\n        _register_pool_events(sync_engine, label)\n        logger.info(f\"Registered pool metrics for engine '{label}'\")\n\n    REGISTRY.register(collector)\n"
  },
  {
    "path": "backend/onyx/server/metrics/prometheus_setup.py",
    "content": "\"\"\"Prometheus metrics setup for the Onyx API server.\n\nOrchestrates HTTP request instrumentation via ``prometheus-fastapi-instrumentator``:\n- Request count, latency histograms, in-progress gauges\n- Pool checkout timeout exception handler\n- Custom metric callbacks (e.g. slow request counting)\n\nSQLAlchemy connection pool metrics are registered separately via\n``setup_postgres_connection_pool_metrics`` during application lifespan\n(after engines are created).\n\"\"\"\n\nfrom prometheus_fastapi_instrumentator import Instrumentator\nfrom prometheus_fastapi_instrumentator.metrics import default as default_metrics\nfrom sqlalchemy.exc import TimeoutError as SATimeoutError\nfrom starlette.applications import Starlette\n\nfrom onyx.server.metrics.per_tenant import per_tenant_request_callback\nfrom onyx.server.metrics.postgres_connection_pool import pool_timeout_handler\nfrom onyx.server.metrics.slow_requests import slow_request_callback\n\n_EXCLUDED_HANDLERS = [\n    \"/health\",\n    \"/metrics\",\n    \"/openapi.json\",\n]\n\n# Denser buckets for per-handler latency histograms. The instrumentator's\n# default (0.1, 0.5, 1) is too coarse for meaningful P95/P99 computation.\n_LATENCY_BUCKETS = (\n    0.01,\n    0.025,\n    0.05,\n    0.1,\n    0.25,\n    0.5,\n    1.0,\n    2.5,\n    5.0,\n    10.0,\n)\n\n\ndef setup_prometheus_metrics(app: Starlette) -> None:\n    \"\"\"Initialize HTTP request metrics for the Onyx API server.\n\n    Must be called in ``get_application()`` BEFORE the app starts, because\n    the instrumentator adds middleware via ``app.add_middleware()``.\n\n    Args:\n        app: The FastAPI/Starlette application to instrument.\n    \"\"\"\n    app.add_exception_handler(SATimeoutError, pool_timeout_handler)\n\n    instrumentator = Instrumentator(\n        should_group_status_codes=False,\n        should_ignore_untemplated=False,\n        should_group_untemplated=True,\n        should_instrument_requests_inprogress=True,\n        inprogress_labels=True,\n        excluded_handlers=_EXCLUDED_HANDLERS,\n    )\n\n    # Explicitly create the default metrics (http_requests_total,\n    # http_request_duration_seconds, etc.) and add them first.  The library\n    # skips creating defaults when ANY custom instrumentations are registered\n    # via .add(), so we must include them ourselves.\n    default_callback = default_metrics(latency_lowr_buckets=_LATENCY_BUCKETS)\n    if default_callback:\n        instrumentator.add(default_callback)\n\n    instrumentator.add(slow_request_callback)\n    instrumentator.add(per_tenant_request_callback)\n\n    instrumentator.instrument(app, latency_lowr_buckets=_LATENCY_BUCKETS).expose(app)\n"
  },
  {
    "path": "backend/onyx/server/metrics/slow_requests.py",
    "content": "\"\"\"Slow request counter metric.\n\nIncrements a counter whenever a request exceeds a configurable duration\nthreshold. Useful for identifying endpoints that regularly take too long.\n\"\"\"\n\nimport os\n\nfrom prometheus_client import Counter\nfrom prometheus_fastapi_instrumentator.metrics import Info\n\nSLOW_REQUEST_THRESHOLD_SECONDS: float = max(\n    0.0,\n    float(os.environ.get(\"SLOW_REQUEST_THRESHOLD_SECONDS\", \"1.0\")),\n)\n\n_slow_requests = Counter(\n    \"onyx_api_slow_requests_total\",\n    \"Total requests exceeding the slow request threshold\",\n    [\"method\", \"handler\", \"status\"],\n)\n\n\ndef slow_request_callback(info: Info) -> None:\n    \"\"\"Increment slow request counter when duration exceeds threshold.\"\"\"\n    if info.modified_duration > SLOW_REQUEST_THRESHOLD_SECONDS:\n        _slow_requests.labels(\n            method=info.method,\n            handler=info.modified_handler,\n            status=info.modified_status,\n        ).inc()\n"
  },
  {
    "path": "backend/onyx/server/middleware/latency_logging.py",
    "content": "import logging\nimport time\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\n\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi import Response\n\n\ndef add_latency_logging_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> None:\n    @app.middleware(\"http\")\n    async def log_latency(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        start_time = time.monotonic()\n        response = await call_next(request)\n        process_time = time.monotonic() - start_time\n        logger.debug(\n            f\"Path: {request.url.path} - Method: {request.method} - \"\n            f\"Status Code: {response.status_code} - Time: {process_time:.4f} secs\"\n        )\n        return response\n"
  },
  {
    "path": "backend/onyx/server/middleware/rate_limiting.py",
    "content": "from collections.abc import Callable\nfrom typing import List\n\nfrom fastapi import Depends\nfrom fastapi import Request\nfrom fastapi_limiter import FastAPILimiter\nfrom fastapi_limiter.depends import RateLimiter\n\nfrom onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED\nfrom onyx.configs.app_configs import RATE_LIMIT_MAX_REQUESTS\nfrom onyx.configs.app_configs import RATE_LIMIT_WINDOW_SECONDS\nfrom onyx.redis.redis_pool import get_async_redis_connection\n\n\nasync def setup_auth_limiter() -> None:\n    # Use the centralized async Redis connection\n    redis = await get_async_redis_connection()\n    await FastAPILimiter.init(redis)\n\n\nasync def close_auth_limiter() -> None:\n    # This closes the FastAPILimiter connection so we don't leave open connections to Redis.\n    await FastAPILimiter.close()\n\n\nasync def rate_limit_key(request: Request) -> str:\n    # Uses both IP and User-Agent to make collisions less likely if IP is behind NAT.\n    # If request.client is None, a fallback is used to avoid completely unknown keys.\n    # This helps ensure we have a unique key for each 'user' in simple scenarios.\n    ip_part = request.client.host if request.client else \"unknown\"\n    ua_part = request.headers.get(\"user-agent\", \"none\").replace(\" \", \"_\")\n    return f\"{ip_part}-{ua_part}\"\n\n\ndef get_auth_rate_limiters() -> List[Callable]:\n    if not AUTH_RATE_LIMITING_ENABLED:\n        return []\n\n    return [\n        Depends(\n            RateLimiter(\n                times=RATE_LIMIT_MAX_REQUESTS or 100,\n                seconds=RATE_LIMIT_WINDOW_SECONDS or 60,\n                # Use the custom key function to distinguish users\n                identifier=rate_limit_key,\n            )\n        )\n    ]\n"
  },
  {
    "path": "backend/onyx/server/models.py",
    "content": "import datetime\nfrom typing import Generic\nfrom typing import Optional\nfrom typing import TypeVar\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import User\n\n\nDataT = TypeVar(\"DataT\")\n\n\nclass StatusResponse(BaseModel, Generic[DataT]):\n    success: bool\n    message: Optional[str] = None\n    data: Optional[DataT] = None\n\n\nclass ApiKey(BaseModel):\n    api_key: str\n\n\nclass IdReturn(BaseModel):\n    id: int\n\n\nclass MinimalUserSnapshot(BaseModel):\n    id: UUID\n    email: str\n\n\nclass UserGroupInfo(BaseModel):\n    id: int\n    name: str\n\n\nclass FullUserSnapshot(BaseModel):\n    id: UUID\n    email: str\n    role: UserRole\n    account_type: AccountType\n    is_active: bool\n    password_configured: bool\n    personal_name: str | None\n    created_at: datetime.datetime\n    updated_at: datetime.datetime\n    groups: list[UserGroupInfo]\n    is_scim_synced: bool\n\n    @classmethod\n    def from_user_model(\n        cls,\n        user: User,\n        groups: list[UserGroupInfo] | None = None,\n        is_scim_synced: bool = False,\n    ) -> \"FullUserSnapshot\":\n        return cls(\n            id=user.id,\n            email=user.email,\n            role=user.role,\n            account_type=user.account_type,\n            is_active=user.is_active,\n            password_configured=user.password_configured,\n            personal_name=user.personal_name,\n            created_at=user.created_at,\n            updated_at=user.updated_at,\n            groups=groups or [],\n            is_scim_synced=is_scim_synced,\n        )\n\n\nclass DisplayPriorityRequest(BaseModel):\n    display_priority_map: dict[int, int]\n\n\nclass InvitedUserSnapshot(BaseModel):\n    email: str\n"
  },
  {
    "path": "backend/onyx/server/onyx_api/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/onyx_api/ingestion.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.configs.constants import DEFAULT_CC_PAIR_ID\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.document import delete_documents_complete__no_commit\nfrom onyx.db.document import get_document\nfrom onyx.db.document import get_documents_by_cc_pair\nfrom onyx.db.document import get_ingestion_documents\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import get_secondary_search_settings\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.indexing.adapters.document_indexing_adapter import (\n    DocumentIndexingBatchAdapter,\n)\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_pipeline import run_indexing_pipeline\nfrom onyx.server.onyx_api.models import DocMinimalInfo\nfrom onyx.server.onyx_api.models import IngestionDocument\nfrom onyx.server.onyx_api.models import IngestionResult\nfrom onyx.server.utils_vector_db import require_vector_db\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n# not using /api to avoid confusion with nginx api path routing\nrouter = APIRouter(prefix=\"/onyx-api\", tags=PUBLIC_API_TAGS)\n\n\n@router.get(\"/connector-docs/{cc_pair_id}\")\ndef get_docs_by_connector_credential_pair(\n    cc_pair_id: int,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[DocMinimalInfo]:\n    db_docs = get_documents_by_cc_pair(cc_pair_id=cc_pair_id, db_session=db_session)\n    return [\n        DocMinimalInfo(\n            document_id=doc.id,\n            semantic_id=doc.semantic_id,\n            link=doc.link,\n        )\n        for doc in db_docs\n    ]\n\n\n@router.get(\"/ingestion\")\ndef get_ingestion_docs(\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[DocMinimalInfo]:\n    db_docs = get_ingestion_documents(db_session)\n    return [\n        DocMinimalInfo(\n            document_id=doc.id,\n            semantic_id=doc.semantic_id,\n            link=doc.link,\n        )\n        for doc in db_docs\n    ]\n\n\n@router.post(\"/ingestion\", dependencies=[Depends(require_vector_db)])\ndef upsert_ingestion_doc(\n    doc_info: IngestionDocument,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> IngestionResult:\n    tenant_id = get_current_tenant_id()\n\n    doc_info.document.from_ingestion_api = True\n\n    if doc_info.document.doc_updated_at is None:\n        doc_info.document.doc_updated_at = datetime.now(tz=timezone.utc)\n\n    document = Document.from_base(doc_info.document)\n\n    # TODO once the frontend is updated with this enum, remove this logic\n    if document.source == DocumentSource.INGESTION_API:\n        document.source = DocumentSource.FILE\n\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=doc_info.cc_pair_id or DEFAULT_CC_PAIR_ID,\n    )\n    if cc_pair is None:\n        raise HTTPException(\n            status_code=400, detail=\"Connector-Credential Pair specified does not exist\"\n        )\n\n    # Need to index for both the primary and secondary index if possible\n    active_search_settings = get_active_search_settings(db_session)\n    # This flow is for indexing so we get all indices.\n    document_indices = get_all_document_indices(\n        active_search_settings.primary,\n        None,\n        None,\n    )\n\n    search_settings = get_current_search_settings(db_session)\n\n    index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings(\n        search_settings=search_settings\n    )\n\n    # Build adapter for primary indexing\n    adapter = DocumentIndexingBatchAdapter(\n        db_session=db_session,\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        tenant_id=tenant_id,\n        index_attempt_metadata=IndexAttemptMetadata(\n            connector_id=cc_pair.connector_id,\n            credential_id=cc_pair.credential_id,\n        ),\n    )\n\n    indexing_pipeline_result = run_indexing_pipeline(\n        embedder=index_embedding_model,\n        document_indices=document_indices,\n        ignore_time_skip=True,\n        db_session=db_session,\n        tenant_id=tenant_id,\n        document_batch=[document],\n        request_id=None,\n        adapter=adapter,\n    )\n\n    # If there's a secondary index being built, index the doc but don't use it for return here\n    if active_search_settings.secondary:\n        sec_search_settings = get_secondary_search_settings(db_session)\n\n        if sec_search_settings is None:\n            # Should not ever happen\n            raise RuntimeError(\n                \"Secondary index exists but no search settings configured\"\n            )\n\n        new_index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings(\n            search_settings=sec_search_settings\n        )\n\n        # This flow is for indexing so we get all indices.\n        sec_document_indices = get_all_document_indices(\n            active_search_settings.secondary, None, None\n        )\n\n        run_indexing_pipeline(\n            embedder=new_index_embedding_model,\n            document_indices=sec_document_indices,\n            ignore_time_skip=True,\n            db_session=db_session,\n            tenant_id=tenant_id,\n            document_batch=[document],\n            request_id=None,\n            adapter=adapter,\n        )\n\n    return IngestionResult(\n        document_id=document.id,\n        already_existed=indexing_pipeline_result.new_docs > 0,\n    )\n\n\n@router.delete(\"/ingestion/{document_id}\", dependencies=[Depends(require_vector_db)])\ndef delete_ingestion_doc(\n    document_id: str,\n    _: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    tenant_id = get_current_tenant_id()\n\n    # Verify the document exists and was created via the ingestion API\n    document = get_document(document_id=document_id, db_session=db_session)\n    if document is None:\n        raise HTTPException(status_code=404, detail=\"Document not found\")\n\n    if not document.from_ingestion_api:\n        raise HTTPException(\n            status_code=400,\n            detail=\"Document was not created via the ingestion API\",\n        )\n\n    active_search_settings = get_active_search_settings(db_session)\n    # This flow is for deletion so we get all indices.\n    document_indices = get_all_document_indices(\n        active_search_settings.primary,\n        active_search_settings.secondary,\n        None,\n    )\n    for document_index in document_indices:\n        document_index.delete_single(\n            doc_id=document_id,\n            tenant_id=tenant_id,\n            chunk_count=document.chunk_count,\n        )\n\n    # Delete from database\n    delete_documents_complete__no_commit(db_session, [document_id])\n    db_session.commit()\n"
  },
  {
    "path": "backend/onyx/server/onyx_api/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.connectors.models import DocumentBase\n\n\nclass IngestionDocument(BaseModel):\n    document: DocumentBase\n    cc_pair_id: int | None = None\n\n\nclass IngestionResult(BaseModel):\n    document_id: str\n    already_existed: bool\n\n\nclass DocMinimalInfo(BaseModel):\n    document_id: str\n    semantic_id: str\n    link: str | None = None\n"
  },
  {
    "path": "backend/onyx/server/pat/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/pat/api.py",
    "content": "\"\"\"API endpoints for Personal Access Tokens.\"\"\"\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.pat import create_pat\nfrom onyx.db.pat import list_user_pats\nfrom onyx.db.pat import revoke_pat\nfrom onyx.server.pat.models import CreatedTokenResponse\nfrom onyx.server.pat.models import CreateTokenRequest\nfrom onyx.server.pat.models import TokenResponse\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/user/pats\")\n\n\n@router.get(\"\")\ndef list_tokens(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> list[TokenResponse]:\n    \"\"\"List all active tokens for current user.\"\"\"\n    pats = list_user_pats(db_session, user.id)\n    return [\n        TokenResponse(\n            id=pat.id,\n            name=pat.name,\n            token_display=pat.token_display,\n            created_at=pat.created_at,\n            expires_at=pat.expires_at,\n            last_used_at=pat.last_used_at,\n        )\n        for pat in pats\n    ]\n\n\n@router.post(\"\")\ndef create_token(\n    request: CreateTokenRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> CreatedTokenResponse:\n    \"\"\"Create new personal access token for current user.\"\"\"\n    try:\n        pat, raw_token = create_pat(\n            db_session=db_session,\n            user_id=user.id,\n            name=request.name,\n            expiration_days=request.expiration_days,\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n    logger.info(f\"User {user.email} created PAT '{request.name}'\")\n\n    return CreatedTokenResponse(\n        id=pat.id,\n        name=pat.name,\n        token_display=pat.token_display,\n        token=raw_token,  # ONLY time we return the raw token!\n        created_at=pat.created_at,\n        expires_at=pat.expires_at,\n        last_used_at=pat.last_used_at,\n    )\n\n\n@router.delete(\"/{token_id}\")\ndef delete_token(\n    token_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> dict[str, str]:\n    \"\"\"Delete (revoke) personal access token. Only owner can revoke their own tokens.\"\"\"\n    success = revoke_pat(db_session, token_id, user.id)\n    if not success:\n        raise HTTPException(\n            status_code=404, detail=\"Token not found or not owned by user\"\n        )\n\n    logger.info(f\"User {user.email} revoked token {token_id}\")\n    return {\"message\": \"Token deleted successfully\"}\n"
  },
  {
    "path": "backend/onyx/server/pat/models.py",
    "content": "\"\"\"Pydantic models for Personal Access Token API.\"\"\"\n\nfrom datetime import datetime\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n\nclass CreateTokenRequest(BaseModel):\n    name: str = Field(\n        ..., min_length=1, max_length=100, description=\"Human-readable token name\"\n    )\n    expiration_days: int | None = Field(\n        None,\n        ge=1,\n        description=\"Days until expiration. Common values: 7, 30, 365, or null (no expiration). Must be >= 1 if provided.\",\n    )\n\n\nclass TokenResponse(BaseModel):\n    id: int\n    name: str\n    token_display: str\n    created_at: datetime\n    expires_at: datetime | None\n    last_used_at: datetime | None\n\n\nclass CreatedTokenResponse(TokenResponse):\n    token: str  # Only returned on creation - user must copy it now!\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/server/query_and_chat/chat_backend.py",
    "content": "import datetime\nimport json\nfrom collections.abc import Generator\nfrom datetime import timedelta\nfrom uuid import UUID\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Query\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi.responses import StreamingResponse\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.api_key import get_hashed_api_key_from_request\nfrom onyx.auth.pat import get_hashed_pat_from_request\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.auth.users import current_user\nfrom onyx.cache.factory import get_cache_backend\nfrom onyx.chat.chat_processing_checker import is_chat_session_processing\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.chat_utils import convert_chat_history_basic\nfrom onyx.chat.chat_utils import create_chat_history_chain\nfrom onyx.chat.chat_utils import create_chat_session_from_request\nfrom onyx.chat.chat_utils import extract_headers\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.chat.models import CreateChatSessionID\nfrom onyx.chat.process_message import gather_stream_full\nfrom onyx.chat.process_message import handle_multi_model_stream\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.chat.prompt_utils import get_default_base_system_prompt\nfrom onyx.chat.stop_signal_checker import set_fence\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.configs.chat_configs import HARD_DELETE_CHATS\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS\nfrom onyx.db.chat import add_chats_to_session_from_slack_thread\nfrom onyx.db.chat import delete_all_chat_sessions_for_user\nfrom onyx.db.chat import delete_chat_session\nfrom onyx.db.chat import duplicate_chat_session_for_user_from_slack\nfrom onyx.db.chat import get_chat_message\nfrom onyx.db.chat import get_chat_messages_by_session\nfrom onyx.db.chat import get_chat_session_by_id\nfrom onyx.db.chat import get_chat_sessions_by_user\nfrom onyx.db.chat import set_as_latest_chat_message\nfrom onyx.db.chat import set_preferred_response\nfrom onyx.db.chat import translate_db_message_to_chat_message_detail\nfrom onyx.db.chat import update_chat_session\nfrom onyx.db.chat_search import search_chat_sessions\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.feedback import create_chat_message_feedback\nfrom onyx.db.feedback import remove_chat_message_feedback\nfrom onyx.db.models import ChatSessionSharedStatus\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.db.usage import increment_usage\nfrom onyx.db.usage import UsageType\nfrom onyx.db.user_file import get_file_id_by_user_file_id\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.llm.factory import get_llm_for_persona\nfrom onyx.llm.factory import get_llm_token_counter\nfrom onyx.secondary_llm_flows.chat_session_naming import generate_chat_session_name\nfrom onyx.server.api_key_usage import check_api_key_usage\nfrom onyx.server.query_and_chat.models import ChatFeedbackRequest\nfrom onyx.server.query_and_chat.models import ChatMessageIdentifier\nfrom onyx.server.query_and_chat.models import ChatRenameRequest\nfrom onyx.server.query_and_chat.models import ChatSearchResponse\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import ChatSessionDetailResponse\nfrom onyx.server.query_and_chat.models import ChatSessionDetails\nfrom onyx.server.query_and_chat.models import ChatSessionGroup\nfrom onyx.server.query_and_chat.models import ChatSessionsResponse\nfrom onyx.server.query_and_chat.models import ChatSessionSummary\nfrom onyx.server.query_and_chat.models import ChatSessionUpdateRequest\nfrom onyx.server.query_and_chat.models import MessageOrigin\nfrom onyx.server.query_and_chat.models import RenameChatSessionResponse\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.models import SetPreferredResponseRequest\nfrom onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest\nfrom onyx.server.query_and_chat.models import UpdateChatSessionThreadRequest\nfrom onyx.server.query_and_chat.session_loading import (\n    translate_assistant_message_to_packets,\n)\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.token_limit import check_token_rate_limits\nfrom onyx.server.usage_limits import check_llm_cost_limit_for_provider\nfrom onyx.server.usage_limits import check_usage_and_raise\nfrom onyx.server.usage_limits import is_usage_limits_enabled\nfrom onyx.server.utils import get_json_line\nfrom onyx.tracing.framework.create import ensure_trace\nfrom onyx.utils.headers import get_custom_tool_additional_request_headers\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import mt_cloud_telemetry\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nrouter = APIRouter(prefix=\"/chat\")\n\n\ndef _get_available_tokens_for_persona(\n    persona: Persona,\n    db_session: Session,\n    user: User,\n) -> int:\n    def _get_non_reserved_input_tokens(\n        model_max_input_tokens: int,\n        system_and_agent_prompt_tokens: int,\n        num_tools: int,\n        token_reserved_per_tool: int = 256,\n        # Estimating for a long user input message, hard to know ahead of time\n        default_reserved_tokens: int = 2000,\n    ) -> int:\n        return (\n            model_max_input_tokens\n            - system_and_agent_prompt_tokens\n            - num_tools * token_reserved_per_tool\n            - default_reserved_tokens\n        )\n\n    llm = get_llm_for_persona(persona=persona, user=user)\n    token_counter = get_llm_token_counter(llm)\n\n    if persona.replace_base_system_prompt and persona.system_prompt:\n        # User has opted to replace the base system prompt entirely\n        combined_prompt_tokens = token_counter(persona.system_prompt)\n    else:\n        # Default behavior: prepend custom prompt to base system prompt\n        system_prompt = get_default_base_system_prompt(db_session)\n        agent_prompt = persona.system_prompt + \" \" if persona.system_prompt else \"\"\n        combined_prompt_tokens = token_counter(agent_prompt + system_prompt)\n\n    return _get_non_reserved_input_tokens(\n        model_max_input_tokens=llm.config.max_input_tokens,\n        system_and_agent_prompt_tokens=combined_prompt_tokens,\n        num_tools=len(persona.tools),\n    )\n\n\n@router.get(\"/get-user-chat-sessions\", tags=PUBLIC_API_TAGS)\ndef get_user_chat_sessions(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n    project_id: int | None = None,\n    only_non_project_chats: bool = True,\n    include_failed_chats: bool = False,\n    page_size: int = Query(default=50, ge=1, le=100),\n    before: str | None = Query(default=None),\n) -> ChatSessionsResponse:\n    user_id = user.id\n\n    try:\n        before_dt = (\n            datetime.datetime.fromisoformat(before) if before is not None else None\n        )\n    except ValueError:\n        raise HTTPException(status_code=422, detail=\"Invalid 'before' timestamp format\")\n\n    try:\n        # Fetch one extra to determine if there are more results\n        chat_sessions = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            project_id=project_id,\n            only_non_project_chats=only_non_project_chats,\n            include_failed_chats=include_failed_chats,\n            limit=page_size + 1,\n            before=before_dt,\n        )\n\n    except ValueError:\n        raise ValueError(\"Chat session does not exist or has been deleted\")\n\n    has_more = len(chat_sessions) > page_size\n    chat_sessions = chat_sessions[:page_size]\n\n    return ChatSessionsResponse(\n        sessions=[\n            ChatSessionDetails(\n                id=chat.id,\n                name=chat.description,\n                persona_id=chat.persona_id,\n                time_created=chat.time_created.isoformat(),\n                time_updated=chat.time_updated.isoformat(),\n                shared_status=chat.shared_status,\n                current_alternate_model=chat.current_alternate_model,\n                current_temperature_override=chat.temperature_override,\n            )\n            for chat in chat_sessions\n        ],\n        has_more=has_more,\n    )\n\n\n@router.put(\"/update-chat-session-temperature\")\ndef update_chat_session_temperature(\n    update_thread_req: UpdateChatSessionTemperatureRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    chat_session = get_chat_session_by_id(\n        chat_session_id=update_thread_req.chat_session_id,\n        user_id=user.id,\n        db_session=db_session,\n    )\n\n    # Validate temperature_override\n    if update_thread_req.temperature_override is not None:\n        if (\n            update_thread_req.temperature_override < 0\n            or update_thread_req.temperature_override > 2\n        ):\n            raise HTTPException(\n                status_code=400, detail=\"Temperature must be between 0 and 2\"\n            )\n\n        # Additional check for Anthropic models\n        if (\n            chat_session.current_alternate_model\n            and LlmProviderNames.ANTHROPIC\n            in chat_session.current_alternate_model.lower()\n        ):\n            if update_thread_req.temperature_override > 1:\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"Temperature for Anthropic models must be between 0 and 1\",\n                )\n\n    chat_session.temperature_override = update_thread_req.temperature_override\n\n    db_session.add(chat_session)\n    db_session.commit()\n\n\n@router.put(\"/update-chat-session-model\")\ndef update_chat_session_model(\n    update_thread_req: UpdateChatSessionThreadRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    chat_session = get_chat_session_by_id(\n        chat_session_id=update_thread_req.chat_session_id,\n        user_id=user.id,\n        db_session=db_session,\n    )\n    chat_session.current_alternate_model = update_thread_req.new_alternate_model\n\n    db_session.add(chat_session)\n    db_session.commit()\n\n\n@router.get(\"/get-chat-session/{session_id}\", tags=PUBLIC_API_TAGS)\ndef get_chat_session(\n    session_id: UUID,\n    is_shared: bool = False,\n    include_deleted: bool = False,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> ChatSessionDetailResponse:\n    user_id = user.id\n    try:\n        chat_session = get_chat_session_by_id(\n            chat_session_id=session_id,\n            user_id=user_id,\n            db_session=db_session,\n            is_shared=is_shared,\n            include_deleted=include_deleted,\n        )\n    except ValueError:\n        try:\n            # If we failed to get a chat session, try to retrieve the session with\n            # less restrictive filters in order to identify what exactly mismatched\n            # so we can bubble up an accurate error code andmessage.\n            existing_chat_session = get_chat_session_by_id(\n                chat_session_id=session_id,\n                user_id=None,\n                db_session=db_session,\n                is_shared=False,\n                include_deleted=True,\n            )\n        except ValueError:\n            raise HTTPException(status_code=404, detail=\"Chat session not found\")\n\n        if not include_deleted and existing_chat_session.deleted:\n            raise HTTPException(status_code=404, detail=\"Chat session has been deleted\")\n\n        if is_shared:\n            if existing_chat_session.shared_status != ChatSessionSharedStatus.PUBLIC:\n                raise HTTPException(\n                    status_code=403, detail=\"Chat session is not shared\"\n                )\n        elif user_id is not None and existing_chat_session.user_id not in (\n            user_id,\n            None,\n        ):\n            raise HTTPException(status_code=403, detail=\"Access denied\")\n\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n\n    # for chat-seeding: if the session is unassigned, assign it now. This is done here\n    # to avoid another back and forth between FE -> BE before starting the first\n    # message generation\n    if chat_session.user_id is None and user_id is not None:\n        chat_session.user_id = user_id\n        db_session.commit()\n\n    session_messages = get_chat_messages_by_session(\n        chat_session_id=session_id,\n        user_id=user_id,\n        db_session=db_session,\n        # we already did a permission check above with the call to\n        # `get_chat_session_by_id`, so we can skip it here\n        skip_permission_check=True,\n        # we need the tool call objs anyways, so just fetch them in a single call\n        prefetch_top_two_level_tool_calls=True,\n    )\n\n    # Convert messages to ChatMessageDetail format\n    chat_message_details = [\n        translate_db_message_to_chat_message_detail(msg) for msg in session_messages\n    ]\n\n    try:\n        is_processing = is_chat_session_processing(session_id, get_cache_backend())\n        # Edit the last message to indicate loading (Overriding default message value)\n        if is_processing and chat_message_details:\n            last_msg = chat_message_details[-1]\n            if last_msg.message_type == MessageType.ASSISTANT:\n                last_msg.message = \"Message is loading... Please refresh the page soon.\"\n    except Exception:\n        logger.exception(\n            \"An error occurred while checking if the chat session is processing\"\n        )\n\n    # Every assistant message might have a set of tool calls associated with it, these need to be replayed back for the frontend\n    # Each list is the set of tool calls for the given assistant message.\n    replay_packet_lists: list[list[Packet]] = []\n    for msg in session_messages:\n        if msg.message_type == MessageType.ASSISTANT:\n            replay_packet_lists.append(\n                translate_assistant_message_to_packets(\n                    chat_message=msg, db_session=db_session\n                )\n            )\n            # msg_packet_list.append(Packet(ind=end_step_nr, obj=OverallStop()))\n\n    return ChatSessionDetailResponse(\n        chat_session_id=session_id,\n        description=chat_session.description,\n        persona_id=chat_session.persona_id,\n        persona_name=chat_session.persona.name if chat_session.persona else None,\n        personal_icon_name=chat_session.persona.icon_name,\n        current_alternate_model=chat_session.current_alternate_model,\n        messages=chat_message_details,\n        time_created=chat_session.time_created,\n        shared_status=chat_session.shared_status,\n        current_temperature_override=chat_session.temperature_override,\n        deleted=chat_session.deleted,\n        owner_name=chat_session.user.personal_name if chat_session.user else None,\n        # Packets are now directly serialized as Packet Pydantic models\n        packets=replay_packet_lists,\n    )\n\n\n@router.post(\"/create-chat-session\", tags=PUBLIC_API_TAGS)\ndef create_new_chat_session(\n    chat_session_creation_request: ChatSessionCreationRequest,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> CreateChatSessionID:\n    user_id = user.id\n\n    try:\n        new_chat_session = create_chat_session_from_request(\n            chat_session_request=chat_session_creation_request,\n            user_id=user_id,\n            db_session=db_session,\n        )\n    except ValueError as e:\n        # Project access denied\n        raise HTTPException(status_code=403, detail=str(e))\n    except Exception as e:\n        logger.exception(e)\n        raise HTTPException(status_code=400, detail=\"Invalid Persona provided.\")\n\n    return CreateChatSessionID(chat_session_id=new_chat_session.id)\n\n\n@router.put(\"/rename-chat-session\")\ndef rename_chat_session(\n    rename_req: ChatRenameRequest,\n    request: Request,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> RenameChatSessionResponse:\n    # 3000 tokens is more than enough for a pair of messages which is enough to provide the required context for generating a\n    # good name for the chat session. It's also small enough to fit on even the worst context window LLMs.\n    max_tokens_for_naming = 3000\n\n    name = rename_req.name\n    chat_session_id = rename_req.chat_session_id\n    user_id = user.id\n\n    if name:\n        update_chat_session(\n            db_session=db_session,\n            user_id=user_id,\n            chat_session_id=chat_session_id,\n            description=name,\n        )\n        return RenameChatSessionResponse(new_name=name)\n\n    llm = get_default_llm(\n        additional_headers=extract_headers(\n            request.headers, LITELLM_PASS_THROUGH_HEADERS\n        )\n    )\n\n    check_llm_cost_limit_for_provider(\n        db_session=db_session,\n        tenant_id=get_current_tenant_id(),\n        llm_provider_api_key=llm.config.api_key,\n    )\n\n    full_history = create_chat_history_chain(\n        chat_session_id=chat_session_id, db_session=db_session\n    )\n\n    token_counter = get_llm_token_counter(llm)\n\n    simple_chat_history = convert_chat_history_basic(\n        chat_history=full_history,\n        token_counter=token_counter,\n        max_individual_message_tokens=max_tokens_for_naming,\n        max_total_tokens=max_tokens_for_naming,\n    )\n\n    with ensure_trace(\n        \"chat_session_naming\",\n        group_id=str(chat_session_id),\n        metadata={\n            \"tenant_id\": get_current_tenant_id(),\n            \"chat_session_id\": str(chat_session_id),\n        },\n    ):\n        new_name = generate_chat_session_name(chat_history=simple_chat_history, llm=llm)\n\n    update_chat_session(\n        db_session=db_session,\n        user_id=user_id,\n        chat_session_id=chat_session_id,\n        description=new_name,\n    )\n\n    return RenameChatSessionResponse(new_name=new_name)\n\n\n@router.patch(\"/chat-session/{session_id}\")\ndef patch_chat_session(\n    session_id: UUID,\n    chat_session_update_req: ChatSessionUpdateRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_id = user.id\n    update_chat_session(\n        db_session=db_session,\n        user_id=user_id,\n        chat_session_id=session_id,\n        sharing_status=chat_session_update_req.sharing_status,\n    )\n    return None\n\n\n@router.delete(\"/delete-all-chat-sessions\", tags=PUBLIC_API_TAGS)\ndef delete_all_chat_sessions(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    try:\n        delete_all_chat_sessions_for_user(user=user, db_session=db_session)\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n@router.delete(\"/delete-chat-session/{session_id}\", tags=PUBLIC_API_TAGS)\ndef delete_chat_session_by_id(\n    session_id: UUID,\n    hard_delete: bool | None = None,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_id = user.id\n    try:\n        # Use the provided hard_delete parameter if specified, otherwise use the default config\n        actual_hard_delete = (\n            hard_delete if hard_delete is not None else HARD_DELETE_CHATS\n        )\n        delete_chat_session(\n            user_id, session_id, db_session, hard_delete=actual_hard_delete\n        )\n    except ValueError as e:\n        raise HTTPException(status_code=400, detail=str(e))\n\n\n# NOTE: This endpoint is extremely central to the application, any changes to it should be reviewed and approved by an experienced\n# team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.\n@router.post(\n    \"/send-chat-message\",\n    response_model=ChatFullResponse,\n    tags=PUBLIC_API_TAGS,\n    responses={\n        200: {\n            \"description\": (\n                \"If `stream=true`, returns `text/event-stream`.\\n\"\n                \"If `stream=false`, returns `application/json` (ChatFullResponse).\"\n            ),\n            \"content\": {\n                \"text/event-stream\": {\n                    \"schema\": {\"type\": \"string\"},\n                    \"examples\": {\n                        \"stream\": {\n                            \"summary\": \"Stream of NDJSON AnswerStreamPart's\",\n                            \"value\": \"string\",\n                        }\n                    },\n                },\n            },\n        }\n    },\n)\ndef handle_send_chat_message(\n    chat_message_req: SendMessageRequest,\n    request: Request,\n    user: User = Depends(current_chat_accessible_user),\n    _rate_limit_check: None = Depends(check_token_rate_limits),\n    _api_key_usage_check: None = Depends(check_api_key_usage),\n) -> StreamingResponse | ChatFullResponse:\n    \"\"\"\n    This endpoint is used to send a new chat message.\n\n    Args:\n        chat_message_req (SendMessageRequest): Details about the new chat message.\n            - When stream=True (default): Returns StreamingResponse with SSE\n            - When stream=False: Returns ChatFullResponse with complete data\n        request (Request): The current HTTP request context.\n        user (User): The current user, obtained via dependency injection.\n        _ (None): Rate limit check is run if user/group/global rate limits are enabled.\n\n    Returns:\n        StreamingResponse | ChatFullResponse: Either streams or returns complete response.\n    \"\"\"\n    logger.debug(f\"Received new chat message: {chat_message_req.message}\")\n\n    tenant_id = get_current_tenant_id()\n    mt_cloud_telemetry(\n        tenant_id=tenant_id,\n        distinct_id=tenant_id if user.is_anonymous else str(user.id),\n        event=MilestoneRecordType.RAN_QUERY,\n    )\n\n    # Override origin to API when authenticated via API key or PAT\n    # to prevent clients from polluting telemetry data\n    if get_hashed_api_key_from_request(request) or get_hashed_pat_from_request(request):\n        chat_message_req.origin = MessageOrigin.API\n\n    # Multi-model streaming path: 2-3 LLMs in parallel (streaming only)\n    is_multi_model = (\n        chat_message_req.llm_overrides is not None\n        and len(chat_message_req.llm_overrides) > 1\n    )\n    if is_multi_model and chat_message_req.stream:\n        # Narrowed here; is_multi_model already checked llm_overrides is not None\n        llm_overrides = chat_message_req.llm_overrides or []\n\n        def multi_model_stream_generator() -> Generator[str, None, None]:\n            try:\n                with get_session_with_current_tenant() as db_session:\n                    for obj in handle_multi_model_stream(\n                        new_msg_req=chat_message_req,\n                        user=user,\n                        db_session=db_session,\n                        llm_overrides=llm_overrides,\n                        litellm_additional_headers=extract_headers(\n                            request.headers, LITELLM_PASS_THROUGH_HEADERS\n                        ),\n                        custom_tool_additional_headers=get_custom_tool_additional_request_headers(\n                            request.headers\n                        ),\n                        mcp_headers=chat_message_req.mcp_headers,\n                    ):\n                        yield get_json_line(obj.model_dump())\n            except Exception as e:\n                logger.exception(\"Error in multi-model streaming\")\n                yield json.dumps({\"error\": str(e)})\n\n        return StreamingResponse(\n            multi_model_stream_generator(), media_type=\"text/event-stream\"\n        )\n\n    if is_multi_model and not chat_message_req.stream:\n        raise OnyxError(\n            OnyxErrorCode.INVALID_INPUT,\n            \"Multi-model mode (llm_overrides with >1 entry) requires stream=True.\",\n        )\n\n    # Non-streaming path: consume all packets and return complete response\n    if not chat_message_req.stream:\n        with get_session_with_current_tenant() as db_session:\n            # Check and track non-streaming API usage limits\n            if is_usage_limits_enabled():\n                check_usage_and_raise(\n                    db_session=db_session,\n                    usage_type=UsageType.NON_STREAMING_API_CALLS,\n                    tenant_id=tenant_id,\n                    pending_amount=1,\n                )\n                increment_usage(\n                    db_session=db_session,\n                    usage_type=UsageType.NON_STREAMING_API_CALLS,\n                    amount=1,\n                )\n                db_session.commit()\n\n            state_container = ChatStateContainer()\n            packets = handle_stream_message_objects(\n                new_msg_req=chat_message_req,\n                user=user,\n                db_session=db_session,\n                litellm_additional_headers=extract_headers(\n                    request.headers, LITELLM_PASS_THROUGH_HEADERS\n                ),\n                custom_tool_additional_headers=get_custom_tool_additional_request_headers(\n                    request.headers\n                ),\n                mcp_headers=chat_message_req.mcp_headers,\n                additional_context=chat_message_req.additional_context,\n                external_state_container=state_container,\n            )\n            result = gather_stream_full(packets, state_container)\n            # Note: LLM cost tracking is now handled in multi_llm.py\n            return result\n\n    # Streaming path, normal Onyx UI behavior\n    def stream_generator() -> Generator[str, None, None]:\n        state_container = ChatStateContainer()\n        try:\n            with get_session_with_current_tenant() as db_session:\n                for obj in handle_stream_message_objects(\n                    new_msg_req=chat_message_req,\n                    user=user,\n                    db_session=db_session,\n                    litellm_additional_headers=extract_headers(\n                        request.headers, LITELLM_PASS_THROUGH_HEADERS\n                    ),\n                    custom_tool_additional_headers=get_custom_tool_additional_request_headers(\n                        request.headers\n                    ),\n                    mcp_headers=chat_message_req.mcp_headers,\n                    additional_context=chat_message_req.additional_context,\n                    external_state_container=state_container,\n                ):\n                    yield get_json_line(obj.model_dump())\n                # Note: LLM cost tracking is now handled in multi_llm.py\n\n        except Exception as e:\n            logger.exception(\"Error in chat message streaming\")\n            yield json.dumps({\"error\": str(e)})\n\n        finally:\n            logger.debug(\"Stream generator finished\")\n\n    return StreamingResponse(stream_generator(), media_type=\"text/event-stream\")\n\n\n@router.put(\"/set-message-as-latest\")\ndef set_message_as_latest(\n    message_identifier: ChatMessageIdentifier,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_id = user.id\n\n    chat_message = get_chat_message(\n        chat_message_id=message_identifier.message_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n    set_as_latest_chat_message(\n        chat_message=chat_message,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n\n@router.put(\"/set-preferred-response\")\ndef set_preferred_response_endpoint(\n    request_body: SetPreferredResponseRequest,\n    user: User | None = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    \"\"\"Set the preferred assistant response for a multi-model turn.\"\"\"\n    try:\n        # Ownership check: get_chat_message raises ValueError if the message\n        # doesn't belong to this user, preventing cross-user mutation.\n        get_chat_message(\n            chat_message_id=request_body.user_message_id,\n            user_id=user.id if user else None,\n            db_session=db_session,\n        )\n        set_preferred_response(\n            db_session=db_session,\n            user_message_id=request_body.user_message_id,\n            preferred_assistant_message_id=request_body.preferred_response_id,\n        )\n    except ValueError as e:\n        raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))\n\n\n@router.post(\"/create-chat-message-feedback\")\ndef create_chat_feedback(\n    feedback: ChatFeedbackRequest,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_id = user.id\n\n    create_chat_message_feedback(\n        is_positive=feedback.is_positive,\n        feedback_text=feedback.feedback_text,\n        predefined_feedback=feedback.predefined_feedback,\n        chat_message_id=feedback.chat_message_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n\n@router.delete(\"/remove-chat-message-feedback\")\ndef remove_chat_feedback(\n    chat_message_id: int,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    user_id = user.id\n\n    remove_chat_message_feedback(\n        chat_message_id=chat_message_id,\n        user_id=user_id,\n        db_session=db_session,\n    )\n\n\nclass MaxSelectedDocumentTokens(BaseModel):\n    max_tokens: int\n\n\n@router.get(\"/max-selected-document-tokens\")\ndef get_max_document_tokens(\n    persona_id: int,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> MaxSelectedDocumentTokens:\n    try:\n        persona = get_persona_by_id(\n            persona_id=persona_id,\n            user=user,\n            db_session=db_session,\n            is_for_edit=False,\n        )\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"Persona not found\")\n\n    return MaxSelectedDocumentTokens(\n        max_tokens=_get_available_tokens_for_persona(\n            persona=persona,\n            user=user,\n            db_session=db_session,\n        ),\n    )\n\n\nclass AvailableContextTokensResponse(BaseModel):\n    available_tokens: int\n\n\n@router.get(\"/available-context-tokens/{session_id}\")\ndef get_available_context_tokens_for_session(\n    session_id: UUID,\n    user: User = Depends(current_chat_accessible_user),\n    db_session: Session = Depends(get_session),\n) -> AvailableContextTokensResponse:\n    \"\"\"Return available context tokens for a chat session based on its persona.\"\"\"\n\n    try:\n        chat_session = get_chat_session_by_id(\n            chat_session_id=session_id,\n            user_id=user.id,\n            db_session=db_session,\n            is_shared=False,\n            include_deleted=False,\n        )\n    except ValueError:\n        raise HTTPException(status_code=404, detail=\"Chat session not found\")\n\n    if not chat_session.persona:\n        raise HTTPException(status_code=400, detail=\"Chat session has no persona\")\n\n    available = _get_available_tokens_for_persona(\n        persona=chat_session.persona,\n        user=user,\n        db_session=db_session,\n    )\n\n    return AvailableContextTokensResponse(available_tokens=available)\n\n\n\"\"\"Endpoints for chat seeding\"\"\"\n\n\nclass SeedChatFromSlackRequest(BaseModel):\n    chat_session_id: UUID\n\n\nclass SeedChatFromSlackResponse(BaseModel):\n    redirect_url: str\n\n\n@router.post(\"/seed-chat-session-from-slack\")\ndef seed_chat_from_slack(\n    chat_seed_request: SeedChatFromSlackRequest,\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> SeedChatFromSlackResponse:\n    slack_chat_session_id = chat_seed_request.chat_session_id\n    new_chat_session = duplicate_chat_session_for_user_from_slack(\n        db_session=db_session,\n        user=user,\n        chat_session_id=slack_chat_session_id,\n    )\n\n    add_chats_to_session_from_slack_thread(\n        db_session=db_session,\n        slack_chat_session_id=slack_chat_session_id,\n        new_chat_session_id=new_chat_session.id,\n    )\n\n    return SeedChatFromSlackResponse(\n        redirect_url=f\"{WEB_DOMAIN}/chat?chatId={new_chat_session.id}\"\n    )\n\n\n@router.get(\"/file/{file_id:path}\", tags=PUBLIC_API_TAGS)\ndef fetch_chat_file(\n    file_id: str,\n    request: Request,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> Response:\n\n    # For user files, we need to get the file id from the user file id\n    file_id_from_user_file = get_file_id_by_user_file_id(file_id, db_session)\n    if file_id_from_user_file:\n        file_id = file_id_from_user_file\n\n    file_store = get_default_file_store()\n    file_record = file_store.read_file_record(file_id)\n    if not file_record:\n        raise HTTPException(status_code=404, detail=\"File not found\")\n\n    media_type = file_record.file_type\n    file_io = file_store.read_file(file_id, mode=\"b\")\n\n    # Files served here are immutable (content-addressed by file_id), so allow long-lived caching.\n    # Use `private` because this is behind auth / tenant scoping.\n    etag = f'\"{file_id}\"'\n    cache_headers = {\n        \"Cache-Control\": \"private, max-age=31536000, immutable\",\n        \"ETag\": etag,\n        \"Vary\": \"Cookie\",\n    }\n\n    if request.headers.get(\"if-none-match\") == etag:\n        return Response(status_code=304, headers=cache_headers)\n\n    return StreamingResponse(file_io, media_type=media_type, headers=cache_headers)\n\n\n@router.get(\"/search\", tags=PUBLIC_API_TAGS)\nasync def search_chats(\n    query: str | None = Query(None),\n    page: int = Query(1),\n    page_size: int = Query(10),\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> ChatSearchResponse:\n    \"\"\"\n    Search for chat sessions based on the provided query.\n    If no query is provided, returns recent chat sessions.\n    \"\"\"\n\n    # Use the enhanced database function for chat search\n    chat_sessions, has_more = search_chat_sessions(\n        user_id=user.id,\n        db_session=db_session,\n        query=query,\n        page=page,\n        page_size=page_size,\n        include_deleted=False,\n        include_onyxbot_flows=False,\n    )\n\n    # Group chat sessions by time period\n    today = datetime.datetime.now().date()\n    yesterday = today - timedelta(days=1)\n    this_week = today - timedelta(days=7)\n    this_month = today - timedelta(days=30)\n\n    today_chats: list[ChatSessionSummary] = []\n    yesterday_chats: list[ChatSessionSummary] = []\n    this_week_chats: list[ChatSessionSummary] = []\n    this_month_chats: list[ChatSessionSummary] = []\n    older_chats: list[ChatSessionSummary] = []\n\n    for session in chat_sessions:\n        session_date = session.time_created.date()\n\n        chat_summary = ChatSessionSummary(\n            id=session.id,\n            name=session.description,\n            persona_id=session.persona_id,\n            time_created=session.time_created,\n            shared_status=session.shared_status,\n            current_alternate_model=session.current_alternate_model,\n            current_temperature_override=session.temperature_override,\n        )\n\n        if session_date == today:\n            today_chats.append(chat_summary)\n        elif session_date == yesterday:\n            yesterday_chats.append(chat_summary)\n        elif session_date > this_week:\n            this_week_chats.append(chat_summary)\n        elif session_date > this_month:\n            this_month_chats.append(chat_summary)\n        else:\n            older_chats.append(chat_summary)\n\n    # Create groups\n    groups = []\n    if today_chats:\n        groups.append(ChatSessionGroup(title=\"Today\", chats=today_chats))\n    if yesterday_chats:\n        groups.append(ChatSessionGroup(title=\"Yesterday\", chats=yesterday_chats))\n    if this_week_chats:\n        groups.append(ChatSessionGroup(title=\"This Week\", chats=this_week_chats))\n    if this_month_chats:\n        groups.append(ChatSessionGroup(title=\"This Month\", chats=this_month_chats))\n    if older_chats:\n        groups.append(ChatSessionGroup(title=\"Older\", chats=older_chats))\n\n    return ChatSearchResponse(\n        groups=groups,\n        has_more=has_more,\n        next_page=page + 1 if has_more else None,\n    )\n\n\n@router.post(\"/stop-chat-session/{chat_session_id}\", tags=PUBLIC_API_TAGS)\ndef stop_chat_session(\n    chat_session_id: UUID,\n    user: User = Depends(current_user),  # noqa: ARG001\n) -> dict[str, str]:\n    \"\"\"\n    Stop a chat session by setting a stop signal.\n    This endpoint is called by the frontend when the user clicks the stop button.\n    \"\"\"\n    set_fence(chat_session_id, get_cache_backend(), True)\n    return {\"message\": \"Chat session stopped\"}\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/chat_utils.py",
    "content": "from onyx.file_processing.file_types import OnyxMimeTypes\nfrom onyx.file_store.models import ChatFileType\n\n\ndef mime_type_to_chat_file_type(mime_type: str | None) -> ChatFileType:\n    if mime_type is None:\n        return ChatFileType.PLAIN_TEXT\n\n    if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:\n        return ChatFileType.IMAGE\n\n    if mime_type in OnyxMimeTypes.TABULAR_MIME_TYPES:\n        return ChatFileType.TABULAR\n\n    if mime_type in OnyxMimeTypes.DOCUMENT_MIME_TYPES:\n        return ChatFileType.DOC\n\n    return ChatFileType.PLAIN_TEXT\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import model_validator\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import SessionType\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.context.search.models import Tag\nfrom onyx.db.enums import ChatSessionSharedStatus\nfrom onyx.db.models import ChatSession\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.streaming_models import Packet\n\n\nAUTO_PLACE_AFTER_LATEST_MESSAGE = -1\n\n\nclass MessageOrigin(str, Enum):\n    \"\"\"Origin of a chat message for telemetry tracking.\"\"\"\n\n    WEBAPP = \"webapp\"\n    CHROME_EXTENSION = \"chrome_extension\"\n    API = \"api\"\n    SLACKBOT = \"slackbot\"\n    WIDGET = \"widget\"\n    DISCORDBOT = \"discordbot\"\n    UNKNOWN = \"unknown\"\n    UNSET = \"unset\"\n\n\nclass MessageResponseIDInfo(BaseModel):\n    user_message_id: int | None\n    reserved_assistant_message_id: int\n\n\nclass ModelResponseSlot(BaseModel):\n    \"\"\"Pairs a reserved assistant message ID with its model display name.\"\"\"\n\n    message_id: int\n    model_name: str\n\n\nclass MultiModelMessageResponseIDInfo(BaseModel):\n    \"\"\"Sent at the start of a multi-model streaming response.\n    Contains the user message ID and one slot per model being run in parallel.\"\"\"\n\n    user_message_id: int | None\n    responses: list[ModelResponseSlot]\n\n\nclass SourceTag(Tag):\n    source: DocumentSource\n\n\nclass TagResponse(BaseModel):\n    tags: list[SourceTag]\n\n\nclass UpdateChatSessionThreadRequest(BaseModel):\n    # If not specified, use Onyx default persona\n    chat_session_id: UUID\n    new_alternate_model: str\n\n\nclass UpdateChatSessionTemperatureRequest(BaseModel):\n    chat_session_id: UUID\n    temperature_override: float\n\n\nclass ChatSessionCreationRequest(BaseModel):\n    # If not specified, use Onyx default persona\n    persona_id: int = 0\n    description: str | None = None\n    project_id: int | None = None\n\n\nclass ChatFeedbackRequest(BaseModel):\n    chat_message_id: int\n    is_positive: bool | None = None\n    feedback_text: str | None = None\n    predefined_feedback: str | None = None\n\n    @model_validator(mode=\"after\")\n    def check_is_positive_or_feedback_text(self) -> \"ChatFeedbackRequest\":\n        if self.is_positive is None and self.feedback_text is None:\n            raise ValueError(\"Empty feedback received.\")\n        return self\n\n\n# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an\n# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.\nclass SendMessageRequest(BaseModel):\n    message: str\n\n    llm_override: LLMOverride | None = None\n    # For multi-model mode: up to 3 LLM overrides to run in parallel.\n    # When provided with >1 entry, triggers multi-model streaming.\n    llm_overrides: list[LLMOverride] | None = None\n    # Test-only override for deterministic LiteLLM mock responses.\n    mock_llm_response: str | None = None\n\n    allowed_tool_ids: list[int] | None = None\n    forced_tool_id: int | None = None\n\n    file_descriptors: list[FileDescriptor] = []\n\n    internal_search_filters: BaseFilters | None = None\n\n    deep_research: bool = False\n\n    # Headers to forward to MCP tool calls (e.g., user JWT token, user ID)\n    # Example: {\"Authorization\": \"Bearer <user_jwt>\", \"X-User-ID\": \"user123\"}\n    mcp_headers: dict[str, str] | None = None\n\n    # Origin of the message for telemetry tracking\n    origin: MessageOrigin = MessageOrigin.UNSET\n\n    # Placement information for the message in the conversation tree:\n    # - -1: auto-place after latest message in chain\n    # - null: regeneration from root (first message)\n    # - positive int: place after that specific parent message\n    # NOTE: for regeneration, this is the only case currently where there is branching on the user message.\n    # If the message of parent_message_id is a user message, the message will be ignored and it will use the\n    # original user message for regeneration.\n    parent_message_id: int | None = AUTO_PLACE_AFTER_LATEST_MESSAGE\n    chat_session_id: UUID | None = None\n    chat_session_info: ChatSessionCreationRequest | None = None\n\n    # When True (default), returns StreamingResponse with SSE\n    # When False, returns ChatFullResponse with complete data\n    stream: bool = True\n\n    # When False, disables citation generation:\n    # - Citation markers like [1], [2] are removed from response text\n    # - No CitationInfo packets are emitted during streaming\n    include_citations: bool = True\n\n    # Additional context injected into the LLM call but NOT stored in the DB\n    # (not shown in chat history). Used e.g. by the Chrome extension to pass\n    # the current tab URL when \"Read this tab\" is enabled.\n    additional_context: str | None = None\n\n    @model_validator(mode=\"after\")\n    def check_chat_session_id_or_info(self) -> \"SendMessageRequest\":\n        # If neither is provided, default to creating a new chat session using the\n        # default ChatSessionCreationRequest values.\n        if self.chat_session_id is None and self.chat_session_info is None:\n            return self.model_copy(\n                update={\"chat_session_info\": ChatSessionCreationRequest()}\n            )\n        if self.chat_session_id is not None and self.chat_session_info is not None:\n            raise ValueError(\n                \"Only one of chat_session_id or chat_session_info should be provided, not both.\"\n            )\n        return self\n\n\nclass ChatMessageIdentifier(BaseModel):\n    message_id: int\n\n\nclass ChatRenameRequest(BaseModel):\n    chat_session_id: UUID\n    name: str | None = None\n\n\nclass ChatSessionUpdateRequest(BaseModel):\n    sharing_status: ChatSessionSharedStatus\n\n\nclass DeleteAllSessionsRequest(BaseModel):\n    session_type: SessionType\n\n\nclass RenameChatSessionResponse(BaseModel):\n    new_name: str  # This is only really useful if the name is generated\n\n\nclass ChatSessionDetails(BaseModel):\n    id: UUID\n    name: str | None\n    persona_id: int | None = None\n    time_created: str\n    time_updated: str\n    shared_status: ChatSessionSharedStatus\n    current_alternate_model: str | None = None\n    current_temperature_override: float | None = None\n\n    @classmethod\n    def from_model(cls, model: ChatSession) -> \"ChatSessionDetails\":\n        return cls(\n            id=model.id,\n            name=model.description,\n            persona_id=model.persona_id,\n            time_created=model.time_created.isoformat(),\n            time_updated=model.time_updated.isoformat(),\n            shared_status=model.shared_status,\n            current_alternate_model=model.current_alternate_model,\n            current_temperature_override=model.temperature_override,\n        )\n\n\nclass ChatSessionsResponse(BaseModel):\n    sessions: list[ChatSessionDetails]\n    has_more: bool = False\n\n\nclass ChatMessageDetail(BaseModel):\n    chat_session_id: UUID | None = None\n    message_id: int\n    parent_message: int | None = None\n    latest_child_message: int | None = None\n    message: str\n    reasoning_tokens: str | None = None\n    message_type: MessageType\n    context_docs: list[SavedSearchDoc] | None = None\n    # Dict mapping citation number to document_id\n    citations: dict[int, str] | None = None\n    time_sent: datetime\n    files: list[FileDescriptor]\n    error: str | None = None\n    current_feedback: str | None = None  # \"like\" | \"dislike\" | null\n    processing_duration_seconds: float | None = None\n    preferred_response_id: int | None = None\n    model_display_name: str | None = None\n\n    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore\n        initial_dict = super().model_dump(mode=\"json\", *args, **kwargs)  # type: ignore\n        initial_dict[\"time_sent\"] = self.time_sent.isoformat()\n        return initial_dict\n\n\nclass SetPreferredResponseRequest(BaseModel):\n    user_message_id: int\n    preferred_response_id: int\n\n\nclass ChatSessionDetailResponse(BaseModel):\n    chat_session_id: UUID\n    description: str | None\n    persona_id: int | None = None\n    persona_name: str | None\n    personal_icon_name: str | None\n    messages: list[ChatMessageDetail]\n    time_created: datetime\n    shared_status: ChatSessionSharedStatus\n    current_alternate_model: str | None\n    current_temperature_override: float | None\n    deleted: bool = False\n    owner_name: str | None = None\n    packets: list[list[Packet]]\n\n\nclass AdminSearchRequest(BaseModel):\n    query: str\n    filters: BaseFilters\n\n\nclass AdminSearchResponse(BaseModel):\n    documents: list[SearchDoc]\n\n\nclass ChatSessionSummary(BaseModel):\n    id: UUID\n    name: str | None = None\n    persona_id: int | None = None\n    time_created: datetime\n    shared_status: ChatSessionSharedStatus\n    current_alternate_model: str | None = None\n    current_temperature_override: float | None = None\n\n\nclass ChatSessionGroup(BaseModel):\n    title: str\n    chats: list[ChatSessionSummary]\n\n\nclass ChatSearchResponse(BaseModel):\n    groups: list[ChatSessionGroup]\n    has_more: bool\n    next_page: int | None = None\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/placement.py",
    "content": "from pydantic import BaseModel\n\n\nclass Placement(BaseModel):\n    \"\"\"Coordinates that identify where a streaming packet belongs in the UI.\n\n    The frontend uses these fields to route each packet to the correct turn,\n    tool tab, agent sub-turn, and (in multi-model mode) response column.\n\n    Attributes:\n        turn_index: Monotonically increasing index of the iterative reasoning block\n            (e.g. tool call round) within this chat message. Lower values happened first.\n        tab_index: Disambiguates parallel tool calls within the same turn so each\n            tool's output can be displayed in its own tab.\n        sub_turn_index: Nesting level for tools that invoke other tools. ``None`` for\n            top-level packets; an integer for tool-within-tool output.\n        model_index: Which model this packet belongs to. ``0`` for single-model\n            responses; ``0``, ``1``, or ``2`` for multi-model comparison. ``None``\n            for pre-LLM setup packets (e.g. message ID info) that are yielded\n            before any Emitter runs.\n    \"\"\"\n\n    turn_index: int\n    tab_index: int = 0\n    sub_turn_index: int | None = None\n    model_index: int | None = None\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/query_backend.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_curator_or_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.context.search.preprocessing.access_filters import (\n    build_access_filters_for_user,\n)\nfrom onyx.context.search.utils import get_query_embedding\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.tag import find_tags\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.server.query_and_chat.models import AdminSearchRequest\nfrom onyx.server.query_and_chat.models import AdminSearchResponse\nfrom onyx.server.query_and_chat.models import SourceTag\nfrom onyx.server.query_and_chat.models import TagResponse\nfrom onyx.server.utils_vector_db import require_vector_db\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin\")\nbasic_router = APIRouter(prefix=\"/query\")\n\n\n@admin_router.post(\"/search\", dependencies=[Depends(require_vector_db)])\ndef admin_search(\n    question: AdminSearchRequest,\n    user: User = Depends(current_curator_or_admin_user),\n    db_session: Session = Depends(get_session),\n) -> AdminSearchResponse:\n    tenant_id = get_current_tenant_id()\n\n    query = question.query\n    logger.notice(f\"Received admin search query: {query}\")\n    user_acl_filters = build_access_filters_for_user(user, db_session)\n\n    final_filters = IndexFilters(\n        source_type=question.filters.source_type,\n        document_set=question.filters.document_set,\n        time_cutoff=question.filters.time_cutoff,\n        tags=question.filters.tags,\n        access_control_list=user_acl_filters,\n        tenant_id=tenant_id,\n    )\n    search_settings = get_current_search_settings(db_session)\n    # This flow is for search so we do not get all indices.\n    document_index = get_default_document_index(search_settings, None, db_session)\n\n    if not query or query.strip() == \"\":\n        matching_chunks = document_index.random_retrieval(filters=final_filters)\n    else:\n        query_embedding = get_query_embedding(query, db_session)\n        matching_chunks = document_index.admin_retrieval(\n            query=query, query_embedding=query_embedding, filters=final_filters\n        )\n\n    documents = SearchDoc.from_chunks_or_sections(matching_chunks)\n\n    # Deduplicate documents by id\n    deduplicated_documents: list[SearchDoc] = []\n    seen_documents: set[str] = set()\n    for document in documents:\n        if document.document_id not in seen_documents:\n            deduplicated_documents.append(document)\n            seen_documents.add(document.document_id)\n    return AdminSearchResponse(documents=deduplicated_documents)\n\n\n@basic_router.get(\"/valid-tags\")\ndef get_tags(\n    match_pattern: str | None = None,\n    # If this is empty or None, then tags for all sources are considered\n    sources: list[DocumentSource] | None = None,\n    allow_prefix: bool = True,  # This is currently the only option\n    limit: int = 50,\n    _: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> TagResponse:\n    if not allow_prefix:\n        raise NotImplementedError(\"Cannot disable prefix match for now\")\n\n    key_prefix = match_pattern\n    value_prefix = match_pattern\n    require_both_to_match = False\n\n    # split on = to allow the user to type in \"author=bob\"\n    EQUAL_PAT = \"=\"\n    if match_pattern and EQUAL_PAT in match_pattern:\n        split_pattern = match_pattern.split(EQUAL_PAT)\n        key_prefix = split_pattern[0]\n        value_prefix = EQUAL_PAT.join(split_pattern[1:])\n        require_both_to_match = True\n\n    db_tags = find_tags(\n        tag_key_prefix=key_prefix,\n        tag_value_prefix=value_prefix,\n        sources=sources,\n        limit=limit,\n        db_session=db_session,\n        require_both_to_match=require_both_to_match,\n    )\n    server_tags = [\n        SourceTag(\n            tag_key=db_tag.tag_key, tag_value=db_tag.tag_value, source=db_tag.source\n        )\n        for db_tag in db_tags\n    ]\n    return TagResponse(tags=server_tags)\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/session_loading.py",
    "content": "from __future__ import annotations\n\nimport json\nfrom typing import Any\nfrom typing import cast\nfrom typing import Literal\n\nfrom pydantic import ValidationError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.citation_utils import extract_citation_order_from_text\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.chat import get_db_search_doc_by_id\nfrom onyx.db.chat import translate_db_search_doc_to_saved_search_doc\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.tools import get_tool_by_id\nfrom onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_IN_CODE_ID\nfrom onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TASK_KEY\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\nfrom onyx.server.query_and_chat.streaming_models import CustomToolArgs\nfrom onyx.server.query_and_chat.streaming_models import CustomToolDelta\nfrom onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo\nfrom onyx.server.query_and_chat.streaming_models import CustomToolStart\nfrom onyx.server.query_and_chat.streaming_models import FileReaderResult\nfrom onyx.server.query_and_chat.streaming_models import FileReaderStart\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationFinal\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart\nfrom onyx.server.query_and_chat.streaming_models import IntermediateReportDelta\nfrom onyx.server.query_and_chat.streaming_models import IntermediateReportStart\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolDelta\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolStart\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlDocuments\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlStart\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlUrls\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import PythonToolDelta\nfrom onyx.server.query_and_chat.streaming_models import PythonToolStart\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDelta\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\nfrom onyx.server.query_and_chat.streaming_models import ResearchAgentStart\nfrom onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolStart\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.server.query_and_chat.streaming_models import TopLevelBranching\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef create_message_packets(\n    message_text: str,\n    final_documents: list[SearchDoc] | None,\n    turn_index: int,\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    final_search_docs: list[SearchDoc] | None = None\n    if final_documents:\n        sorted_final_documents = sorted(\n            final_documents, key=lambda x: x.score or 0.0, reverse=True\n        )\n        final_search_docs = [\n            SearchDoc(**doc.model_dump()) for doc in sorted_final_documents\n        ]\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index),\n            obj=AgentResponseStart(\n                final_documents=final_search_docs,\n            ),\n        )\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index),\n            obj=AgentResponseDelta(\n                content=message_text,\n            ),\n        ),\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef create_citation_packets(\n    citation_info_list: list[CitationInfo], turn_index: int\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    # Emit each citation as a separate CitationInfo packet\n    for citation_info in citation_info_list:\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index),\n                obj=citation_info,\n            )\n        )\n\n    packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))\n\n    return packets\n\n\ndef create_reasoning_packets(reasoning_text: str, turn_index: int) -> list[Packet]:\n    packets: list[Packet] = []\n\n    packets.append(\n        Packet(placement=Placement(turn_index=turn_index), obj=ReasoningStart())\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index),\n            obj=ReasoningDelta(\n                reasoning=reasoning_text,\n            ),\n        ),\n    )\n\n    packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))\n\n    return packets\n\n\ndef create_image_generation_packets(\n    images: list[GeneratedImage], turn_index: int, tab_index: int = 0\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=ImageGenerationToolStart(),\n        )\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=ImageGenerationFinal(images=images),\n        ),\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef create_custom_tool_packets(\n    tool_name: str,\n    response_type: str,\n    turn_index: int,\n    tab_index: int = 0,\n    data: dict | list | str | int | float | bool | None = None,\n    file_ids: list[str] | None = None,\n    error: CustomToolErrorInfo | None = None,\n    tool_args: dict[str, Any] | None = None,\n    tool_id: int | None = None,\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=CustomToolStart(tool_name=tool_name, tool_id=tool_id),\n        )\n    )\n\n    if tool_args:\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                obj=CustomToolArgs(tool_name=tool_name, tool_args=tool_args),\n            )\n        )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=CustomToolDelta(\n                tool_name=tool_name,\n                tool_id=tool_id,\n                response_type=response_type,\n                data=data,\n                file_ids=file_ids,\n                error=error,\n            ),\n        ),\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef create_file_reader_packets(\n    summary_json: str,\n    turn_index: int,\n    tab_index: int = 0,\n) -> list[Packet]:\n    \"\"\"Recreate FileReaderStart + FileReaderResult + SectionEnd from the stored\n    JSON summary so that the FileReaderToolRenderer can display the result on\n    page reload.\"\"\"\n    import json\n\n    packets: list[Packet] = []\n    placement = Placement(turn_index=turn_index, tab_index=tab_index)\n\n    packets.append(Packet(placement=placement, obj=FileReaderStart()))\n\n    try:\n        data = json.loads(summary_json)\n        packets.append(\n            Packet(\n                placement=placement,\n                obj=FileReaderResult(\n                    file_name=data[\"file_name\"],\n                    file_id=data[\"file_id\"],\n                    start_char=data[\"start_char\"],\n                    end_char=data[\"end_char\"],\n                    total_chars=data[\"total_chars\"],\n                    preview_start=data.get(\"preview_start\", \"\"),\n                    preview_end=data.get(\"preview_end\", \"\"),\n                ),\n            )\n        )\n    except (json.JSONDecodeError, KeyError):\n        # Gracefully degrade for old data that wasn't saved as JSON summary\n        pass\n\n    packets.append(Packet(placement=placement, obj=SectionEnd()))\n    return packets\n\n\ndef create_research_agent_packets(\n    research_task: str,\n    report_content: str | None,\n    turn_index: int,\n    tab_index: int = 0,\n) -> list[Packet]:\n    \"\"\"Create packets for research agent tool calls.\n    This recreates the packet structure that ResearchAgentRenderer expects:\n    - ResearchAgentStart with the research task\n    - IntermediateReportStart to signal report begins\n    - IntermediateReportDelta with the report content (if available)\n    - SectionEnd to mark completion\n    \"\"\"\n    packets: list[Packet] = []\n\n    # Emit research agent start\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=ResearchAgentStart(research_task=research_task),\n        )\n    )\n\n    # Emit report content if available\n    if report_content:\n        # Emit IntermediateReportStart before delta\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                obj=IntermediateReportStart(),\n            )\n        )\n\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                obj=IntermediateReportDelta(content=report_content),\n            )\n        )\n\n    # Emit section end\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef create_fetch_packets(\n    fetch_docs: list[SavedSearchDoc],\n    urls: list[str],\n    turn_index: int,\n    tab_index: int = 0,\n) -> list[Packet]:\n    packets: list[Packet] = []\n    # Emit start packet\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=OpenUrlStart(),\n        )\n    )\n    # Emit URLs packet\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=OpenUrlUrls(urls=urls),\n        )\n    )\n    # Emit documents packet\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=OpenUrlDocuments(\n                documents=[SearchDoc(**doc.model_dump()) for doc in fetch_docs]\n            ),\n        )\n    )\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n    return packets\n\n\ndef create_memory_packets(\n    memory_text: str,\n    operation: Literal[\"add\", \"update\"],\n    memory_id: int | None,\n    turn_index: int,\n    tab_index: int = 0,\n    index: int | None = None,\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=MemoryToolStart(),\n        )\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=MemoryToolDelta(\n                memory_text=memory_text,\n                operation=operation,\n                memory_id=memory_id,\n                index=index,\n            ),\n        ),\n    )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef create_python_tool_packets(\n    code: str,\n    stdout: str,\n    stderr: str,\n    file_ids: list[str],\n    turn_index: int,\n    tab_index: int = 0,\n) -> list[Packet]:\n    \"\"\"Recreate PythonToolStart + PythonToolDelta + SectionEnd from the stored\n    tool call data so the frontend can display both the code and its output\n    on page reload.\"\"\"\n    packets: list[Packet] = []\n    placement = Placement(turn_index=turn_index, tab_index=tab_index)\n\n    packets.append(Packet(placement=placement, obj=PythonToolStart(code=code)))\n\n    packets.append(\n        Packet(\n            placement=placement,\n            obj=PythonToolDelta(\n                stdout=stdout,\n                stderr=stderr,\n                file_ids=file_ids,\n            ),\n        )\n    )\n\n    packets.append(Packet(placement=placement, obj=SectionEnd()))\n    return packets\n\n\ndef create_search_packets(\n    search_queries: list[str],\n    search_docs: list[SavedSearchDoc],\n    is_internet_search: bool,\n    turn_index: int,\n    tab_index: int = 0,\n) -> list[Packet]:\n    packets: list[Packet] = []\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SearchToolStart(\n                is_internet_search=is_internet_search,\n            ),\n        )\n    )\n\n    # Emit queries if present\n    if search_queries:\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                obj=SearchToolQueriesDelta(queries=search_queries),\n            ),\n        )\n\n    # Emit documents if present\n    if search_docs:\n        sorted_search_docs = sorted(\n            search_docs, key=lambda x: x.score or 0.0, reverse=True\n        )\n        packets.append(\n            Packet(\n                placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                obj=SearchToolDocumentsDelta(\n                    documents=[\n                        SearchDoc(**doc.model_dump()) for doc in sorted_search_docs\n                    ]\n                ),\n            ),\n        )\n\n    packets.append(\n        Packet(\n            placement=Placement(turn_index=turn_index, tab_index=tab_index),\n            obj=SectionEnd(),\n        )\n    )\n\n    return packets\n\n\ndef translate_assistant_message_to_packets(\n    chat_message: ChatMessage,\n    db_session: Session,\n) -> list[Packet]:\n    \"\"\"\n    Translates an assistant message and tool calls to packet format.\n    It needs to be a list of list of packets combined into indices for \"steps\".\n    The final answer and citations are also a \"step\".\n    \"\"\"\n    packet_list: list[Packet] = []\n\n    if chat_message.message_type != MessageType.ASSISTANT:\n        raise ValueError(f\"Chat message {chat_message.id} is not an assistant message\")\n\n    if chat_message.tool_calls:\n        # Group tool calls by turn_number\n        tool_calls_by_turn: dict[int, list] = {}\n        for tool_call in chat_message.tool_calls:\n            turn_num = tool_call.turn_number\n            if turn_num not in tool_calls_by_turn:\n                tool_calls_by_turn[turn_num] = []\n            tool_calls_by_turn[turn_num].append(tool_call)\n\n        tool_call_turns = set(tool_calls_by_turn.keys())\n        # Process each turn in order\n        for turn_num in sorted(tool_calls_by_turn.keys()):\n            tool_calls_in_turn = tool_calls_by_turn[turn_num]\n\n            # Insert pre-tool reasoning once per turn (if available)\n            turn_reasoning = next(\n                (\n                    tool_call.reasoning_tokens\n                    for tool_call in tool_calls_in_turn\n                    if tool_call.reasoning_tokens\n                ),\n                None,\n            )\n            if turn_reasoning:\n                # Use the previous turn slot when free to preserve reasoning-before-tool ordering.\n                reasoning_turn_index = turn_num\n                if turn_num > 0 and (turn_num - 1) not in tool_call_turns:\n                    reasoning_turn_index = turn_num - 1\n                packet_list.extend(\n                    create_reasoning_packets(\n                        reasoning_text=turn_reasoning,\n                        turn_index=reasoning_turn_index,\n                    )\n                )\n\n            # Process each tool call in this turn (single pass).\n            # We buffer packets for the turn so we can conditionally prepend a TopLevelBranching\n            # packet (which must appear before any tool output in the turn).\n            research_agent_count = 0\n            turn_tool_packets: list[Packet] = []\n            for tool_call in tool_calls_in_turn:\n                # Here we do a try because some tools may get deleted before the session is reloaded.\n                try:\n                    tool = get_tool_by_id(tool_call.tool_id, db_session)\n                    if tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:\n                        research_agent_count += 1\n\n                    # Handle different tool types\n                    if tool.in_code_tool_id in [\n                        SearchTool.__name__,\n                        WebSearchTool.__name__,\n                    ]:\n                        queries = cast(\n                            list[str], tool_call.tool_call_arguments.get(\"queries\", [])\n                        )\n                        search_docs: list[SavedSearchDoc] = [\n                            translate_db_search_doc_to_saved_search_doc(doc)\n                            for doc in tool_call.search_docs\n                        ]\n                        turn_tool_packets.extend(\n                            create_search_packets(\n                                search_queries=queries,\n                                search_docs=search_docs,\n                                is_internet_search=tool.in_code_tool_id\n                                == WebSearchTool.__name__,\n                                turn_index=turn_num,\n                                tab_index=tool_call.tab_index,\n                            )\n                        )\n\n                    elif tool.in_code_tool_id == OpenURLTool.__name__:\n                        fetch_docs: list[SavedSearchDoc] = [\n                            translate_db_search_doc_to_saved_search_doc(doc)\n                            for doc in tool_call.search_docs\n                        ]\n                        # Get URLs from tool_call_arguments\n                        urls = cast(\n                            list[str], tool_call.tool_call_arguments.get(\"urls\", [])\n                        )\n                        turn_tool_packets.extend(\n                            create_fetch_packets(\n                                fetch_docs,\n                                urls,\n                                turn_num,\n                                tab_index=tool_call.tab_index,\n                            )\n                        )\n\n                    elif tool.in_code_tool_id == ImageGenerationTool.__name__:\n                        if tool_call.generated_images:\n                            images = [\n                                GeneratedImage(**img)\n                                for img in tool_call.generated_images\n                            ]\n                            turn_tool_packets.extend(\n                                create_image_generation_packets(\n                                    images, turn_num, tab_index=tool_call.tab_index\n                                )\n                            )\n\n                    elif tool.in_code_tool_id == FileReaderTool.__name__:\n                        turn_tool_packets.extend(\n                            create_file_reader_packets(\n                                summary_json=tool_call.tool_call_response or \"\",\n                                turn_index=turn_num,\n                                tab_index=tool_call.tab_index,\n                            )\n                        )\n\n                    elif tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:\n                        # Not ideal but not a huge issue if the research task is lost.\n                        research_task = cast(\n                            str,\n                            tool_call.tool_call_arguments.get(RESEARCH_AGENT_TASK_KEY)\n                            or \"Could not fetch saved research task.\",\n                        )\n                        turn_tool_packets.extend(\n                            create_research_agent_packets(\n                                research_task=research_task,\n                                report_content=tool_call.tool_call_response,\n                                turn_index=turn_num,\n                                tab_index=tool_call.tab_index,\n                            )\n                        )\n\n                    elif tool.in_code_tool_id == MemoryTool.__name__:\n                        if tool_call.tool_call_response:\n                            memory_data = json.loads(tool_call.tool_call_response)\n                            turn_tool_packets.extend(\n                                create_memory_packets(\n                                    memory_text=memory_data[\"memory_text\"],\n                                    operation=cast(\n                                        Literal[\"add\", \"update\"],\n                                        memory_data[\"operation\"],\n                                    ),\n                                    memory_id=memory_data.get(\"memory_id\"),\n                                    turn_index=turn_num,\n                                    tab_index=tool_call.tab_index,\n                                    index=memory_data.get(\"index\"),\n                                )\n                            )\n\n                    elif tool.in_code_tool_id == PythonTool.__name__:\n                        code = cast(\n                            str,\n                            tool_call.tool_call_arguments.get(\"code\", \"\"),\n                        )\n                        stdout = \"\"\n                        stderr = \"\"\n                        file_ids: list[str] = []\n                        if tool_call.tool_call_response:\n                            try:\n                                response_data = json.loads(tool_call.tool_call_response)\n                                stdout = response_data.get(\"stdout\", \"\")\n                                stderr = response_data.get(\"stderr\", \"\")\n                                generated_files = response_data.get(\n                                    \"generated_files\", []\n                                )\n                                file_ids = [\n                                    f.get(\"file_link\", \"\").split(\"/\")[-1]\n                                    for f in generated_files\n                                    if f.get(\"file_link\")\n                                ]\n                            except (json.JSONDecodeError, KeyError):\n                                # Fall back to raw response as stdout\n                                stdout = tool_call.tool_call_response\n                        turn_tool_packets.extend(\n                            create_python_tool_packets(\n                                code=code,\n                                stdout=stdout,\n                                stderr=stderr,\n                                file_ids=file_ids,\n                                turn_index=turn_num,\n                                tab_index=tool_call.tab_index,\n                            )\n                        )\n\n                    else:\n                        # Custom tool or unknown tool\n                        # Try to parse as structured CustomToolCallSummary JSON\n                        custom_data: dict | list | str | int | float | bool | None = (\n                            tool_call.tool_call_response\n                        )\n                        custom_error: CustomToolErrorInfo | None = None\n                        custom_response_type = \"text\"\n\n                        try:\n                            parsed = json.loads(tool_call.tool_call_response)\n                            if isinstance(parsed, dict) and \"tool_name\" in parsed:\n                                custom_data = parsed.get(\"tool_result\")\n                                custom_response_type = parsed.get(\n                                    \"response_type\", \"text\"\n                                )\n                                if parsed.get(\"error\"):\n                                    custom_error = CustomToolErrorInfo(\n                                        **parsed[\"error\"]\n                                    )\n                        except (\n                            json.JSONDecodeError,\n                            KeyError,\n                            TypeError,\n                            ValidationError,\n                        ):\n                            pass\n\n                        custom_file_ids: list[str] | None = None\n                        if custom_response_type in (\"image\", \"csv\") and isinstance(\n                            custom_data, dict\n                        ):\n                            custom_file_ids = custom_data.get(\"file_ids\")\n                            custom_data = None\n\n                        custom_args = {\n                            k: v\n                            for k, v in (tool_call.tool_call_arguments or {}).items()\n                            if k != \"requestBody\"\n                        }\n                        turn_tool_packets.extend(\n                            create_custom_tool_packets(\n                                tool_name=tool.display_name or tool.name,\n                                response_type=custom_response_type,\n                                turn_index=turn_num,\n                                tab_index=tool_call.tab_index,\n                                data=custom_data,\n                                file_ids=custom_file_ids,\n                                error=custom_error,\n                                tool_args=custom_args if custom_args else None,\n                                tool_id=tool_call.tool_id,\n                            )\n                        )\n\n                except Exception as e:\n                    logger.warning(f\"Error processing tool call {tool_call.id}: {e}\")\n                    continue\n\n            if research_agent_count > 1:\n                # Emit TopLevelBranching before processing any tool output in the turn.\n                packet_list.append(\n                    Packet(\n                        placement=Placement(turn_index=turn_num),\n                        obj=TopLevelBranching(\n                            num_parallel_branches=research_agent_count\n                        ),\n                    )\n                )\n            packet_list.extend(turn_tool_packets)\n\n    # Determine the next turn_index for the final message\n    # It should come after all tool calls\n    max_tool_turn = 0\n    if chat_message.tool_calls:\n        max_tool_turn = max(tc.turn_number for tc in chat_message.tool_calls)\n\n    citations = chat_message.citations\n    citation_info_list: list[CitationInfo] = []\n\n    if citations:\n        for citation_num, search_doc_id in citations.items():\n            search_doc = get_db_search_doc_by_id(search_doc_id, db_session)\n            if search_doc:\n                citation_info_list.append(\n                    CitationInfo(\n                        citation_number=citation_num,\n                        document_id=search_doc.document_id,\n                    )\n                )\n\n        # Sort citations by order of appearance in message text\n        citation_order = extract_citation_order_from_text(chat_message.message or \"\")\n        order_map = {num: idx for idx, num in enumerate(citation_order)}\n        citation_info_list.sort(\n            key=lambda c: order_map.get(c.citation_number, float(\"inf\"))\n        )\n\n    # Message comes after tool calls, with optional reasoning step beforehand\n    message_turn_index = max_tool_turn + 1\n    if chat_message.reasoning_tokens:\n        packet_list.extend(\n            create_reasoning_packets(\n                reasoning_text=chat_message.reasoning_tokens,\n                turn_index=message_turn_index,\n            )\n        )\n        message_turn_index += 1\n\n    if chat_message.message:\n        packet_list.extend(\n            create_message_packets(\n                message_text=chat_message.message,\n                final_documents=[\n                    translate_db_search_doc_to_saved_search_doc(doc)\n                    for doc in chat_message.search_docs\n                ],\n                turn_index=message_turn_index,\n            )\n        )\n\n    # Citations come after the message\n    citation_turn_index = (\n        message_turn_index + 1 if citation_info_list else message_turn_index\n    )\n\n    if len(citation_info_list) > 0:\n        packet_list.extend(\n            create_citation_packets(citation_info_list, citation_turn_index)\n        )\n\n    # Return the highest turn_index used\n    final_turn_index = 0\n    if chat_message.message_type == MessageType.ASSISTANT:\n        max_tool_turn = 0\n        if chat_message.tool_calls:\n            max_tool_turn = max(tc.turn_number for tc in chat_message.tool_calls)\n\n        final_turn_index = max_tool_turn\n        if chat_message.reasoning_tokens:\n            final_turn_index = max(final_turn_index, max_tool_turn + 1)\n        if chat_message.message:\n            final_turn_index = max(final_turn_index, message_turn_index)\n        if citation_info_list:\n            final_turn_index = max(final_turn_index, citation_turn_index)\n\n    # Determine stop reason - check if message indicates user cancelled\n    stop_reason: str | None = None\n    if chat_message.message:\n        if \"generation was stopped\" in chat_message.message.lower():\n            stop_reason = \"user_cancelled\"\n\n    # Add overall stop packet at the end\n    packet_list.append(\n        Packet(\n            placement=Placement(turn_index=final_turn_index),\n            obj=OverallStop(stop_reason=stop_reason),\n        )\n    )\n\n    return packet_list\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/streaming_models.py",
    "content": "from enum import Enum\nfrom typing import Annotated\nfrom typing import Any\nfrom typing import Literal\nfrom typing import Union\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.server.query_and_chat.placement import Placement\n\n\nclass StreamingType(Enum):\n    \"\"\"Enum defining all streaming packet types. This is the single source of truth for type strings.\"\"\"\n\n    SECTION_END = \"section_end\"\n    STOP = \"stop\"\n    TOP_LEVEL_BRANCHING = \"top_level_branching\"\n    ERROR = \"error\"\n\n    MESSAGE_START = \"message_start\"\n    MESSAGE_DELTA = \"message_delta\"\n    SEARCH_TOOL_START = \"search_tool_start\"\n    SEARCH_TOOL_QUERIES_DELTA = \"search_tool_queries_delta\"\n    SEARCH_TOOL_DOCUMENTS_DELTA = \"search_tool_documents_delta\"\n    OPEN_URL_START = \"open_url_start\"\n    OPEN_URL_URLS = \"open_url_urls\"\n    OPEN_URL_DOCUMENTS = \"open_url_documents\"\n    IMAGE_GENERATION_START = \"image_generation_start\"\n    IMAGE_GENERATION_HEARTBEAT = \"image_generation_heartbeat\"\n    IMAGE_GENERATION_FINAL = \"image_generation_final\"\n    PYTHON_TOOL_START = \"python_tool_start\"\n    PYTHON_TOOL_DELTA = \"python_tool_delta\"\n    CUSTOM_TOOL_START = \"custom_tool_start\"\n    CUSTOM_TOOL_ARGS = \"custom_tool_args\"\n    CUSTOM_TOOL_DELTA = \"custom_tool_delta\"\n    FILE_READER_START = \"file_reader_start\"\n    FILE_READER_RESULT = \"file_reader_result\"\n    REASONING_START = \"reasoning_start\"\n    REASONING_DELTA = \"reasoning_delta\"\n    REASONING_DONE = \"reasoning_done\"\n    CITATION_INFO = \"citation_info\"\n    TOOL_CALL_DEBUG = \"tool_call_debug\"\n    TOOL_CALL_ARGUMENT_DELTA = \"tool_call_argument_delta\"\n\n    MEMORY_TOOL_START = \"memory_tool_start\"\n    MEMORY_TOOL_DELTA = \"memory_tool_delta\"\n    MEMORY_TOOL_NO_ACCESS = \"memory_tool_no_access\"\n\n    DEEP_RESEARCH_PLAN_START = \"deep_research_plan_start\"\n    DEEP_RESEARCH_PLAN_DELTA = \"deep_research_plan_delta\"\n    RESEARCH_AGENT_START = \"research_agent_start\"\n    INTERMEDIATE_REPORT_START = \"intermediate_report_start\"\n    INTERMEDIATE_REPORT_DELTA = \"intermediate_report_delta\"\n    INTERMEDIATE_REPORT_CITED_DOCS = \"intermediate_report_cited_docs\"\n\n\nclass BaseObj(BaseModel):\n    type: str = \"\"\n\n\n################################################\n# Control Packets\n################################################\n# This one isn't strictly necessary, remove in the future\nclass SectionEnd(BaseObj):\n    type: Literal[\"section_end\"] = StreamingType.SECTION_END.value\n\n\nclass OverallStop(BaseObj):\n    type: Literal[\"stop\"] = StreamingType.STOP.value\n    stop_reason: str | None = None\n\n\nclass TopLevelBranching(BaseObj):\n    # This class is used to give advanced heads up to the frontend that the top level flow is branching\n    # This is used to avoid having the frontend render the first call then rerendering the other parallel branches\n    type: Literal[\"top_level_branching\"] = StreamingType.TOP_LEVEL_BRANCHING.value\n\n    num_parallel_branches: int\n\n\nclass PacketException(BaseObj):\n    type: Literal[\"error\"] = StreamingType.ERROR.value\n\n    exception: Exception = Field(exclude=True)\n    model_config = {\"arbitrary_types_allowed\": True}\n\n\n################################################\n# Reasoning Packets\n################################################\n# Tells the frontend to display the reasoning block\nclass ReasoningStart(BaseObj):\n    type: Literal[\"reasoning_start\"] = StreamingType.REASONING_START.value\n\n\n# The stream of tokens for the reasoning\nclass ReasoningDelta(BaseObj):\n    type: Literal[\"reasoning_delta\"] = StreamingType.REASONING_DELTA.value\n\n    reasoning: str\n\n\nclass ReasoningDone(BaseObj):\n    type: Literal[\"reasoning_done\"] = StreamingType.REASONING_DONE.value\n\n\n################################################\n# Final Agent Response Packets\n################################################\n# Start of the final answer\nclass AgentResponseStart(BaseObj):\n    type: Literal[\"message_start\"] = StreamingType.MESSAGE_START.value\n\n    final_documents: list[SearchDoc] | None = None\n    pre_answer_processing_seconds: float | None = None\n\n\n# The stream of tokens for the final response\n# There is no end packet for this as the stream is over and a final OverallStop packet is emitted\nclass AgentResponseDelta(BaseObj):\n    type: Literal[\"message_delta\"] = StreamingType.MESSAGE_DELTA.value\n\n    content: str\n\n\n# Citation info for the sidebar and inline citations\nclass CitationInfo(BaseObj):\n    type: Literal[\"citation_info\"] = StreamingType.CITATION_INFO.value\n\n    # The numerical number of the citation as provided by the LLM\n    citation_number: int\n    # The document id of the SearchDoc (same as the field stored in the DB)\n    # This is the actual document id from the connector, not the int id\n    document_id: str\n\n\nclass ToolCallDebug(BaseObj):\n    type: Literal[\"tool_call_debug\"] = StreamingType.TOOL_CALL_DEBUG.value\n\n    tool_call_id: str\n    tool_name: str\n    tool_args: dict[str, Any]\n\n\n################################################\n# Tool Packets\n################################################\n# Search tool is called and the UI block needs to start\nclass SearchToolStart(BaseObj):\n    type: Literal[\"search_tool_start\"] = StreamingType.SEARCH_TOOL_START.value\n\n    is_internet_search: bool = False\n\n\n# Queries coming through as the LLM determines what to search\n# Mostly for query expansions and advanced search strategies\nclass SearchToolQueriesDelta(BaseObj):\n    type: Literal[\"search_tool_queries_delta\"] = (\n        StreamingType.SEARCH_TOOL_QUERIES_DELTA.value\n    )\n\n    queries: list[str]\n\n\n# Documents coming through as the system knows what to add to the context\nclass SearchToolDocumentsDelta(BaseObj):\n    type: Literal[\"search_tool_documents_delta\"] = (\n        StreamingType.SEARCH_TOOL_DOCUMENTS_DELTA.value\n    )\n\n    # This cannot be the SavedSearchDoc as this is yielded by the SearchTool directly\n    # which does not save documents to the DB.\n    documents: list[SearchDoc]\n\n\n# OpenURL tool packets - 3-stage sequence\nclass OpenUrlStart(BaseObj):\n    \"\"\"Signal that OpenURL tool has started.\"\"\"\n\n    type: Literal[\"open_url_start\"] = StreamingType.OPEN_URL_START.value\n\n\nclass OpenUrlUrls(BaseObj):\n    \"\"\"URLs to be fetched (sent before crawling begins).\"\"\"\n\n    type: Literal[\"open_url_urls\"] = StreamingType.OPEN_URL_URLS.value\n\n    urls: list[str]\n\n\nclass OpenUrlDocuments(BaseObj):\n    \"\"\"Final documents after crawling completes.\"\"\"\n\n    type: Literal[\"open_url_documents\"] = StreamingType.OPEN_URL_DOCUMENTS.value\n\n    documents: list[SearchDoc]\n\n\n# Image generation starting, needs to allocate a placeholder block for it on the UI\nclass ImageGenerationToolStart(BaseObj):\n    type: Literal[\"image_generation_start\"] = StreamingType.IMAGE_GENERATION_START.value\n\n\n# Since image generation can take a while\n# we send a heartbeat to the frontend to keep the UI/connection alive\nclass ImageGenerationToolHeartbeat(BaseObj):\n    type: Literal[\"image_generation_heartbeat\"] = (\n        StreamingType.IMAGE_GENERATION_HEARTBEAT.value\n    )\n\n\n# Represents an image generated by an image generation tool\nclass GeneratedImage(BaseModel):\n    \"\"\"Represents an image generated by an image generation tool.\"\"\"\n\n    file_id: str\n    url: str\n    revised_prompt: str\n    shape: str | None = None\n\n\n# The final generated images all at once at the end of image generation\nclass ImageGenerationFinal(BaseObj):\n    type: Literal[\"image_generation_final\"] = StreamingType.IMAGE_GENERATION_FINAL.value\n\n    images: list[GeneratedImage]\n\n\nclass PythonToolStart(BaseObj):\n    type: Literal[\"python_tool_start\"] = StreamingType.PYTHON_TOOL_START.value\n    code: str\n\n\nclass PythonToolDelta(BaseObj):\n    type: Literal[\"python_tool_delta\"] = StreamingType.PYTHON_TOOL_DELTA.value\n\n    stdout: str = \"\"\n    stderr: str = \"\"\n    file_ids: list[str] = []\n\n\n# Custom tool being called, first allocate a placeholder block for it on the UI\nclass CustomToolStart(BaseObj):\n    type: Literal[\"custom_tool_start\"] = StreamingType.CUSTOM_TOOL_START.value\n\n    tool_name: str\n    tool_id: int | None = None\n\n\nclass CustomToolArgs(BaseObj):\n    type: Literal[\"custom_tool_args\"] = StreamingType.CUSTOM_TOOL_ARGS.value\n\n    tool_name: str\n    tool_args: dict[str, Any]\n\n\nclass CustomToolErrorInfo(BaseModel):\n    is_auth_error: bool = False\n    status_code: int\n    message: str\n\n\n# The allowed streamed packets for a custom tool\nclass CustomToolDelta(BaseObj):\n    type: Literal[\"custom_tool_delta\"] = StreamingType.CUSTOM_TOOL_DELTA.value\n\n    tool_name: str\n    tool_id: int | None = None\n    response_type: str\n    # For non-file responses\n    data: dict | list | str | int | float | bool | None = None\n    # For file-based responses like image/csv\n    file_ids: list[str] | None = None\n    error: CustomToolErrorInfo | None = None\n\n\nclass ToolCallArgumentDelta(BaseObj):\n    type: Literal[\"tool_call_argument_delta\"] = (\n        StreamingType.TOOL_CALL_ARGUMENT_DELTA.value\n    )\n\n    tool_type: str\n    argument_deltas: dict[str, Any]\n\n\n################################################\n# File Reader Packets\n################################################\nclass FileReaderStart(BaseObj):\n    type: Literal[\"file_reader_start\"] = StreamingType.FILE_READER_START.value\n\n\nclass FileReaderResult(BaseObj):\n    type: Literal[\"file_reader_result\"] = StreamingType.FILE_READER_RESULT.value\n\n    file_name: str\n    file_id: str\n    start_char: int\n    end_char: int\n    total_chars: int\n    # Short previews of the retrieved text for the collapsed/expanded UI\n    preview_start: str = \"\"\n    preview_end: str = \"\"\n\n\n# Memory Tool Packets\n################################################\nclass MemoryToolStart(BaseObj):\n    type: Literal[\"memory_tool_start\"] = StreamingType.MEMORY_TOOL_START.value\n\n\nclass MemoryToolDelta(BaseObj):\n    type: Literal[\"memory_tool_delta\"] = StreamingType.MEMORY_TOOL_DELTA.value\n\n    memory_text: str\n    operation: Literal[\"add\", \"update\"]\n    memory_id: int | None = None\n    index: int | None = None\n\n\nclass MemoryToolNoAccess(BaseObj):\n    type: Literal[\"memory_tool_no_access\"] = StreamingType.MEMORY_TOOL_NO_ACCESS.value\n\n\n################################################\n# Deep Research Packets\n################################################\nclass DeepResearchPlanStart(BaseObj):\n    type: Literal[\"deep_research_plan_start\"] = (\n        StreamingType.DEEP_RESEARCH_PLAN_START.value\n    )\n\n\nclass DeepResearchPlanDelta(BaseObj):\n    type: Literal[\"deep_research_plan_delta\"] = (\n        StreamingType.DEEP_RESEARCH_PLAN_DELTA.value\n    )\n\n    content: str\n\n\nclass ResearchAgentStart(BaseObj):\n    type: Literal[\"research_agent_start\"] = StreamingType.RESEARCH_AGENT_START.value\n    research_task: str\n\n\nclass IntermediateReportStart(BaseObj):\n    type: Literal[\"intermediate_report_start\"] = (\n        StreamingType.INTERMEDIATE_REPORT_START.value\n    )\n\n\nclass IntermediateReportDelta(BaseObj):\n    type: Literal[\"intermediate_report_delta\"] = (\n        StreamingType.INTERMEDIATE_REPORT_DELTA.value\n    )\n    content: str\n\n\nclass IntermediateReportCitedDocs(BaseObj):\n    type: Literal[\"intermediate_report_cited_docs\"] = (\n        StreamingType.INTERMEDIATE_REPORT_CITED_DOCS.value\n    )\n    cited_docs: list[SearchDoc] | None = None\n\n\n################################################\n# Packet Object\n################################################\n# Discriminated union of all possible packet object types\nPacketObj = Union[\n    # Control Packets\n    OverallStop,\n    SectionEnd,\n    TopLevelBranching,\n    PacketException,\n    # Agent Response Packets\n    AgentResponseStart,\n    AgentResponseDelta,\n    # Tool Packets\n    SearchToolStart,\n    SearchToolQueriesDelta,\n    SearchToolDocumentsDelta,\n    ImageGenerationToolStart,\n    ImageGenerationToolHeartbeat,\n    ImageGenerationFinal,\n    OpenUrlStart,\n    OpenUrlUrls,\n    OpenUrlDocuments,\n    PythonToolStart,\n    PythonToolDelta,\n    CustomToolStart,\n    CustomToolArgs,\n    CustomToolDelta,\n    FileReaderStart,\n    FileReaderResult,\n    MemoryToolStart,\n    MemoryToolDelta,\n    MemoryToolNoAccess,\n    # Reasoning Packets\n    ReasoningStart,\n    ReasoningDelta,\n    ReasoningDone,\n    # Citation Packets\n    CitationInfo,\n    ToolCallDebug,\n    ToolCallArgumentDelta,\n    # Deep Research Packets\n    DeepResearchPlanStart,\n    DeepResearchPlanDelta,\n    ResearchAgentStart,\n    IntermediateReportStart,\n    IntermediateReportDelta,\n    IntermediateReportCitedDocs,\n]\n\n\nclass Packet(BaseModel):\n    placement: Placement\n\n    obj: Annotated[PacketObj, Field(discriminator=\"type\")]\n"
  },
  {
    "path": "backend/onyx/server/query_and_chat/token_limit.py",
    "content": "from collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom functools import lru_cache\n\nfrom dateutil import tz\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom sqlalchemy import func\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_chat_accessible_user\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import ChatMessage\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import TokenRateLimit\nfrom onyx.db.models import User\nfrom onyx.db.token_limit import fetch_all_global_token_rate_limits\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\n\nlogger = setup_logger()\n\n\nTOKEN_BUDGET_UNIT = 1_000\n\n\ndef check_token_rate_limits(\n    user: User = Depends(current_chat_accessible_user),\n) -> None:\n    # short circuit if no rate limits are set up\n    # NOTE: result of `any_rate_limit_exists` is cached, so this call is fast 99% of the time\n    if not any_rate_limit_exists():\n        return\n\n    versioned_rate_limit_strategy = fetch_versioned_implementation(\n        \"onyx.server.query_and_chat.token_limit\", _check_token_rate_limits.__name__\n    )\n    return versioned_rate_limit_strategy(user)\n\n\ndef _check_token_rate_limits(_: User) -> None:\n    _user_is_rate_limited_by_global()\n\n\n\"\"\"\nGlobal rate limits\n\"\"\"\n\n\ndef _user_is_rate_limited_by_global() -> None:\n    with get_session_with_current_tenant() as db_session:\n        global_rate_limits = fetch_all_global_token_rate_limits(\n            db_session=db_session, enabled_only=True, ordered=False\n        )\n\n        if global_rate_limits:\n            global_cutoff_time = _get_cutoff_time(global_rate_limits)\n            global_usage = _fetch_global_usage(global_cutoff_time, db_session)\n\n            if _is_rate_limited(global_rate_limits, global_usage):\n                raise HTTPException(\n                    status_code=429,\n                    detail=\"Token budget exceeded for organization. Try again later.\",\n                )\n\n\ndef _fetch_global_usage(\n    cutoff_time: datetime, db_session: Session\n) -> Sequence[tuple[datetime, int]]:\n    \"\"\"\n    Fetch global token usage within the cutoff time, grouped by minute\n    \"\"\"\n    result = db_session.execute(\n        select(\n            func.date_trunc(\"minute\", ChatMessage.time_sent),\n            func.sum(ChatMessage.token_count),\n        )\n        .join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)\n        .filter(\n            ChatMessage.time_sent >= cutoff_time,\n        )\n        .group_by(func.date_trunc(\"minute\", ChatMessage.time_sent))\n    ).all()\n\n    return [(row[0], row[1]) for row in result]\n\n\n\"\"\"\nCommon functions\n\"\"\"\n\n\ndef _get_cutoff_time(rate_limits: Sequence[TokenRateLimit]) -> datetime:\n    max_period_hours = max(rate_limit.period_hours for rate_limit in rate_limits)\n    return datetime.now(tz=timezone.utc) - timedelta(hours=max_period_hours)\n\n\ndef _is_rate_limited(\n    rate_limits: Sequence[TokenRateLimit], usage: Sequence[tuple[datetime, int]]\n) -> bool:\n    \"\"\"\n    If at least one rate limit is exceeded, return True\n    \"\"\"\n    for rate_limit in rate_limits:\n        tokens_used = sum(\n            u_token_count\n            for u_date, u_token_count in usage\n            if u_date\n            >= datetime.now(tz=tz.UTC) - timedelta(hours=rate_limit.period_hours)\n        )\n\n        if tokens_used >= rate_limit.token_budget * TOKEN_BUDGET_UNIT:\n            return True\n\n    return False\n\n\n@lru_cache()\ndef any_rate_limit_exists() -> bool:\n    \"\"\"Checks if any rate limit exists in the database. Is cached, so that if no rate limits\n    are setup, we don't have any effect on average query latency.\"\"\"\n    logger.debug(\"Checking for any rate limits...\")\n    with get_session_with_current_tenant() as db_session:\n        return (\n            db_session.scalar(\n                select(TokenRateLimit.id).where(\n                    TokenRateLimit.enabled == True  # noqa: E712\n                )\n            )\n            is not None\n        )\n"
  },
  {
    "path": "backend/onyx/server/runtime/onyx_runtime.py",
    "content": "import io\nfrom typing import cast\n\nfrom PIL import Image\n\nfrom onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT\nfrom onyx.background.celery.tasks.beat_schedule import (\n    CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT,\n)\nfrom onyx.configs.constants import CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT\nfrom onyx.configs.constants import ONYX_CLOUD_REDIS_RUNTIME\nfrom onyx.configs.constants import ONYX_CLOUD_TENANT_ID\nfrom onyx.configs.constants import ONYX_EMAILABLE_LOGO_MAX_DIM\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.redis.redis_pool import get_redis_replica_client\nfrom onyx.utils.file import FileWithMimeType\nfrom onyx.utils.file import OnyxStaticFileManager\nfrom onyx.utils.variable_functionality import (\n    fetch_ee_implementation_or_noop,\n)\n\n\nclass OnyxRuntime:\n    \"\"\"Used by the application to get the final runtime value of a setting.\n\n    Rationale: Settings and overrides may be persisted in multiple places, including the\n    DB, Redis, env vars, and default constants, etc. The logic to present a final\n    setting to the application should be centralized and in one place.\n\n    Example: To get the logo for the application, one must check the DB for an override,\n    use the override if present, fall back to the filesystem if not present, and worry\n    about enterprise or not enterprise.\n    \"\"\"\n\n    @staticmethod\n    def _get_with_static_fallback(\n        db_filename: str | None, static_filename: str\n    ) -> FileWithMimeType:\n        onyx_file: FileWithMimeType | None = None\n\n        if db_filename:\n            file_store = get_default_file_store()\n            onyx_file = file_store.get_file_with_mime_type(db_filename)\n\n        if not onyx_file:\n            onyx_file = OnyxStaticFileManager.get_static(static_filename)\n\n        if not onyx_file:\n            raise RuntimeError(\n                f\"Resource not found: db={db_filename} static={static_filename}\"\n            )\n\n        return onyx_file\n\n    @staticmethod\n    def get_logo() -> FileWithMimeType:\n        STATIC_FILENAME = \"static/images/logo.png\"\n\n        db_filename: str | None = fetch_ee_implementation_or_noop(\n            \"onyx.server.enterprise_settings.store\", \"get_logo_filename\", None\n        )\n\n        return OnyxRuntime._get_with_static_fallback(db_filename, STATIC_FILENAME)\n\n    @staticmethod\n    def get_emailable_logo() -> FileWithMimeType:\n        onyx_file = OnyxRuntime.get_logo()\n\n        # check dimensions and resize downwards if necessary or if not PNG\n        image = Image.open(io.BytesIO(onyx_file.data))\n        if (\n            image.size[0] > ONYX_EMAILABLE_LOGO_MAX_DIM\n            or image.size[1] > ONYX_EMAILABLE_LOGO_MAX_DIM\n            or image.format != \"PNG\"\n        ):\n            image.thumbnail(\n                (ONYX_EMAILABLE_LOGO_MAX_DIM, ONYX_EMAILABLE_LOGO_MAX_DIM),\n                Image.LANCZOS,\n            )  # maintains aspect ratio\n            output_buffer = io.BytesIO()\n            image.save(output_buffer, format=\"PNG\")\n            onyx_file = FileWithMimeType(\n                data=output_buffer.getvalue(), mime_type=\"image/png\"\n            )\n\n        return onyx_file\n\n    @staticmethod\n    def get_logotype() -> FileWithMimeType:\n        STATIC_FILENAME = \"static/images/logotype.png\"\n\n        db_filename: str | None = fetch_ee_implementation_or_noop(\n            \"onyx.server.enterprise_settings.store\", \"get_logotype_filename\", None\n        )\n\n        return OnyxRuntime._get_with_static_fallback(db_filename, STATIC_FILENAME)\n\n    @staticmethod\n    def get_beat_multiplier() -> float:\n        \"\"\"the beat multiplier is used to scale up or down the frequency of certain beat\n        tasks in the cloud. It has a significant effect on load and is useful to adjust\n        in real time.\"\"\"\n\n        beat_multiplier: float = CLOUD_BEAT_MULTIPLIER_DEFAULT\n\n        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n        beat_multiplier_raw = r.get(f\"{ONYX_CLOUD_REDIS_RUNTIME}:beat_multiplier\")\n        if beat_multiplier_raw is not None:\n            try:\n                beat_multiplier_bytes = cast(bytes, beat_multiplier_raw)\n                beat_multiplier = float(beat_multiplier_bytes.decode())\n            except ValueError:\n                pass\n\n        if beat_multiplier <= 0.0:\n            return 1.0\n\n        return beat_multiplier\n\n    @staticmethod\n    def get_doc_permission_sync_multiplier() -> float:\n        \"\"\"Permission syncs are a significant source of load / queueing in the cloud.\"\"\"\n\n        value: float = CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT\n\n        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n        value_raw = r.get(f\"{ONYX_CLOUD_REDIS_RUNTIME}:doc_permission_sync_multiplier\")\n        if value_raw is not None:\n            try:\n                value_bytes = cast(bytes, value_raw)\n                value = float(value_bytes.decode())\n            except ValueError:\n                pass\n\n        if value <= 0.0:\n            return 1.0\n\n        return value\n\n    @staticmethod\n    def get_build_fence_lookup_table_interval() -> int:\n        \"\"\"We maintain an active fence table to make lookups of existing fences efficient.\n        However, reconstructing the table is expensive, so adjusting it in realtime is useful.\n        \"\"\"\n\n        interval: int = CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT\n\n        r = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)\n\n        interval_raw = r.get(\n            f\"{ONYX_CLOUD_REDIS_RUNTIME}:build_fence_lookup_table_interval\"\n        )\n        if interval_raw is not None:\n            try:\n                interval_bytes = cast(bytes, interval_raw)\n                interval = int(interval_bytes.decode())\n            except ValueError:\n                pass\n\n        if interval <= 0.0:\n            return CLOUD_BUILD_FENCE_LOOKUP_TABLE_INTERVAL_DEFAULT\n\n        return interval\n"
  },
  {
    "path": "backend/onyx/server/saml.py",
    "content": "import contextlib\nimport secrets\nimport string\nimport uuid\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom fastapi import HTTPException\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi import status\nfrom fastapi_users import exceptions\nfrom fastapi_users.authentication import Strategy\nfrom onelogin.saml2.auth import OneLogin_Saml2_Auth  # type: ignore\nfrom pydantic import BaseModel\n\nfrom onyx.auth.schemas import UserCreate\nfrom onyx.auth.schemas import UserRole\nfrom onyx.auth.users import auth_backend\nfrom onyx.auth.users import fastapi_users\nfrom onyx.auth.users import get_user_manager\nfrom onyx.auth.users import UserManager\nfrom onyx.configs.app_configs import REQUIRE_EMAIL_VERIFICATION\nfrom onyx.configs.app_configs import SAML_CONF_DIR\nfrom onyx.configs.app_configs import WEB_DOMAIN\nfrom onyx.db.auth import get_user_count\nfrom onyx.db.auth import get_user_db\nfrom onyx.db.engine.async_sql_engine import get_async_session_context_manager\nfrom onyx.db.models import User\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\nrouter = APIRouter(prefix=\"/auth/saml\")\n\n# Azure AD / Entra ID often returns the email attribute under different keys.\n# Keep a list of common variations so we can fall back gracefully if the IdP\n# does not send the plain \"email\" attribute name.\nEMAIL_ATTRIBUTE_KEYS = {\n    \"email\",\n    \"emailaddress\",\n    \"mail\",\n    \"http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress\",\n    \"http://schemas.xmlsoap.org/ws/2005/05/identity/claims/mail\",\n    \"http://schemas.microsoft.com/identity/claims/emailaddress\",\n}\nEMAIL_ATTRIBUTE_KEYS_LOWER = {key.lower() for key in EMAIL_ATTRIBUTE_KEYS}\n\n\nasync def upsert_saml_user(email: str) -> User:\n    \"\"\"\n    Creates or updates a user account for SAML authentication.\n\n    For new users or users with non-web-login roles:\n    1. Generates a secure random password that meets validation criteria\n    2. Creates the user with appropriate role and verified status\n\n    SAML users never use this password directly as they authenticate via their\n    Identity Provider, but we need a valid password to satisfy system requirements.\n    \"\"\"\n    logger.debug(f\"Attempting to upsert SAML user with email: {email}\")\n    get_user_db_context = contextlib.asynccontextmanager(get_user_db)\n    get_user_manager_context = contextlib.asynccontextmanager(get_user_manager)\n\n    async with get_async_session_context_manager() as session:\n        async with get_user_db_context(session) as user_db:\n            async with get_user_manager_context(user_db) as user_manager:\n                try:\n                    user = await user_manager.get_by_email(email)\n                    # If user has a non-authenticated role, treat as non-existent\n                    if not user.account_type.is_web_login():\n                        raise exceptions.UserNotExists()\n                    return user\n                except exceptions.UserNotExists:\n                    logger.info(\"Creating user from SAML login\")\n\n                user_count = await get_user_count()\n                role = UserRole.ADMIN if user_count == 0 else UserRole.BASIC\n\n                # Generate a secure random password meeting validation requirements\n                # We use a secure random password since we never need to know what it is\n                # (SAML users authenticate via their IdP)\n                secure_random_password = \"\".join(\n                    [\n                        # Ensure minimum requirements are met\n                        secrets.choice(\n                            string.ascii_uppercase\n                        ),  # at least one uppercase\n                        secrets.choice(\n                            string.ascii_lowercase\n                        ),  # at least one lowercase\n                        secrets.choice(string.digits),  # at least one digit\n                        secrets.choice(\n                            \"!@#$%^&*()-_=+[]{}|;:,.<>?\"\n                        ),  # at least one special\n                        # Fill remaining length with random chars (mix of all types)\n                        \"\".join(\n                            secrets.choice(\n                                string.ascii_letters\n                                + string.digits\n                                + \"!@#$%^&*()-_=+[]{}|;:,.<>?\"\n                            )\n                            for _ in range(12)\n                        ),\n                    ]\n                )\n\n                # Create the user with SAML-appropriate settings\n                user = await user_manager.create(\n                    UserCreate(\n                        email=email,\n                        password=secure_random_password,  # Pass raw password, not hash\n                        role=role,\n                        is_verified=True,  # SAML users are pre-verified by their IdP\n                    )\n                )\n\n                return user\n\n\nasync def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:\n    if request.client is None:\n        raise ValueError(\"Invalid request for SAML\")\n\n    # Derive http_host and server_port from WEB_DOMAIN (a trusted env var)\n    # instead of X-Forwarded-* headers, which can be spoofed by an attacker\n    # to poison SAML redirect URLs (host header poisoning).\n    parsed_domain = urlparse(WEB_DOMAIN)\n    http_host = parsed_domain.hostname or request.client.host\n    server_port = parsed_domain.port or (443 if parsed_domain.scheme == \"https\" else 80)\n\n    rv: dict[str, Any] = {\n        \"http_host\": http_host,\n        \"server_port\": server_port,\n        \"script_name\": request.url.path,\n        \"post_data\": {},\n        \"get_data\": {},\n    }\n\n    # Handle query parameters (for GET requests)\n    if request.query_params:\n        rv[\"get_data\"] = dict(request.query_params)\n\n    # Handle form data (for POST requests)\n    if request.method == \"POST\":\n        form_data = await request.form()\n        if \"SAMLResponse\" in form_data:\n            SAMLResponse = form_data[\"SAMLResponse\"]\n            rv[\"post_data\"][\"SAMLResponse\"] = SAMLResponse\n        if \"RelayState\" in form_data:\n            RelayState = form_data[\"RelayState\"]\n            rv[\"post_data\"][\"RelayState\"] = RelayState\n    else:\n        # For GET requests, check if SAMLResponse is in query params\n        if \"SAMLResponse\" in request.query_params:\n            rv[\"get_data\"][\"SAMLResponse\"] = request.query_params[\"SAMLResponse\"]\n        if \"RelayState\" in request.query_params:\n            rv[\"get_data\"][\"RelayState\"] = request.query_params[\"RelayState\"]\n\n    return rv\n\n\nclass SAMLAuthorizeResponse(BaseModel):\n    authorization_url: str\n\n\ndef _sanitize_relay_state(candidate: str | None) -> str | None:\n    \"\"\"Ensure the relay state is an internal path to avoid open redirects.\"\"\"\n    if not candidate:\n        return None\n\n    relay_state = candidate.strip()\n    if not relay_state or not relay_state.startswith(\"/\"):\n        return None\n\n    if \"\\\\\" in relay_state:\n        return None\n\n    # Reject colon before query/fragment to match frontend validation\n    path_portion = relay_state.split(\"?\", 1)[0].split(\"#\", 1)[0]\n    if \":\" in path_portion:\n        return None\n\n    parsed = urlparse(relay_state)\n    if parsed.scheme or parsed.netloc:\n        return None\n\n    return relay_state\n\n\n@router.get(\"/authorize\")\nasync def saml_login(request: Request) -> SAMLAuthorizeResponse:\n    req = await prepare_from_fastapi_request(request)\n    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)\n    return_to = _sanitize_relay_state(request.query_params.get(\"next\"))\n    callback_url = auth.login(return_to=return_to)\n    return SAMLAuthorizeResponse(authorization_url=callback_url)\n\n\n@router.get(\"/callback\")\nasync def saml_login_callback_get(\n    request: Request,\n    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),\n    user_manager: UserManager = Depends(get_user_manager),\n) -> Response:\n    \"\"\"Handle SAML callback via HTTP-Redirect binding (GET request)\"\"\"\n    return await _process_saml_callback(request, strategy, user_manager)\n\n\n@router.post(\"/callback\")\nasync def saml_login_callback(\n    request: Request,\n    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),\n    user_manager: UserManager = Depends(get_user_manager),\n) -> Response:\n    \"\"\"Handle SAML callback via HTTP-POST binding (POST request)\"\"\"\n    return await _process_saml_callback(request, strategy, user_manager)\n\n\nasync def _process_saml_callback(\n    request: Request,\n    strategy: Strategy[User, uuid.UUID],\n    user_manager: UserManager,\n) -> Response:\n    req = await prepare_from_fastapi_request(request)\n    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)\n    auth.process_response()\n    errors = auth.get_errors()\n    if len(errors) != 0:\n        logger.error(\n            \"Error when processing SAML Response: %s %s\"\n            % (\", \".join(errors), auth.get_last_error_reason())\n        )\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=\"Access denied. Failed to parse SAML Response.\",\n        )\n\n    if not auth.is_authenticated():\n        detail = \"Access denied. User was not authenticated\"\n        logger.error(detail)\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=detail,\n        )\n\n    user_email: str | None = None\n\n    # The OneLogin toolkit normalizes attribute keys, but still performs a\n    # case-sensitive lookup. Try the common keys first and then fall back to a\n    # case-insensitive scan of all returned attributes.\n    for attribute_key in EMAIL_ATTRIBUTE_KEYS:\n        attribute_values = auth.get_attribute(attribute_key)\n        if attribute_values:\n            user_email = attribute_values[0]\n            break\n\n    if not user_email:\n        # Fallback: perform a case-insensitive lookup across all attributes in\n        # case the IdP sent the email claim with a different capitalization.\n        attributes = auth.get_attributes()\n        for key, values in attributes.items():\n            if key.lower() in EMAIL_ATTRIBUTE_KEYS_LOWER:\n                if values:\n                    user_email = values[0]\n                    break\n        if not user_email:\n            detail = \"SAML is not set up correctly, email attribute must be provided.\"\n            logger.error(detail)\n            logger.debug(\n                \"Received SAML attributes without email: %s\",\n                list(attributes.keys()),\n            )\n            raise HTTPException(\n                status_code=status.HTTP_403_FORBIDDEN,\n                detail=detail,\n            )\n\n    user = await upsert_saml_user(email=user_email)\n\n    response = await auth_backend.login(strategy, user)\n    await user_manager.on_after_login(user, request, response)\n    return response\n\n\n@router.post(\"/logout\")\nasync def saml_logout(\n    user_token: tuple[User, str] = Depends(\n        fastapi_users.authenticator.current_user_token(\n            active=True, verified=REQUIRE_EMAIL_VERIFICATION\n        )\n    ),\n    strategy: Strategy[User, uuid.UUID] = Depends(auth_backend.get_strategy),\n) -> Response:\n    user, token = user_token\n    return await auth_backend.logout(strategy, user, token)\n"
  },
  {
    "path": "backend/onyx/server/settings/api.py",
    "content": "from typing import cast\n\nfrom fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.exc import SQLAlchemyError\nfrom sqlalchemy.orm import Session\n\nfrom onyx import __version__ as onyx_version\nfrom onyx.auth.users import current_admin_user\nfrom onyx.auth.users import current_user\nfrom onyx.auth.users import is_user_admin\nfrom onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB\nfrom onyx.configs.constants import KV_REINDEX_KEY\nfrom onyx.configs.constants import NotificationType\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.notification import dismiss_all_notifications\nfrom onyx.db.notification import get_notifications\nfrom onyx.db.notification import update_notification_last_shown\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.server.features.build.utils import is_onyx_craft_enabled\nfrom onyx.server.settings.models import (\n    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,\n)\nfrom onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\nfrom onyx.server.settings.models import Notification\nfrom onyx.server.settings.models import Settings\nfrom onyx.server.settings.models import UserSettings\nfrom onyx.server.settings.store import load_settings\nfrom onyx.server.settings.store import store_settings\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom shared_configs.configs import MULTI_TENANT\n\nlogger = setup_logger()\n\nadmin_router = APIRouter(prefix=\"/admin/settings\")\nbasic_router = APIRouter(prefix=\"/settings\")\n\n\n@admin_router.put(\"\")\ndef admin_put_settings(\n    settings: Settings, _: User = Depends(current_admin_user)\n) -> None:\n    if (\n        settings.user_file_max_upload_size_mb is not None\n        and settings.user_file_max_upload_size_mb > 0\n        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB\n    ):\n        raise OnyxError(\n            OnyxErrorCode.INVALID_INPUT,\n            f\"File upload size limit cannot exceed {MAX_ALLOWED_UPLOAD_SIZE_MB} MB\",\n        )\n    store_settings(settings)\n\n\ndef apply_license_status_to_settings(settings: Settings) -> Settings:\n    \"\"\"MIT version: no-op, returns settings unchanged.\"\"\"\n    return settings\n\n\n@basic_router.get(\"\")\ndef fetch_settings(\n    user: User = Depends(current_user),\n    db_session: Session = Depends(get_session),\n) -> UserSettings:\n    \"\"\"Settings and notifications are stuffed into this single endpoint to reduce number of\n    Postgres calls\"\"\"\n    general_settings = load_settings()\n    settings_notifications = get_settings_notifications(user, db_session)\n\n    try:\n        kv_store = get_kv_store()\n        needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))\n    except KvKeyNotFoundError:\n        needs_reindexing = False\n\n    apply_fn = fetch_versioned_implementation_with_fallback(\n        \"onyx.server.settings.api\",\n        \"apply_license_status_to_settings\",\n        apply_license_status_to_settings,\n    )\n    general_settings = apply_fn(general_settings)\n\n    # Check if Onyx Craft is enabled for this user (used for server-side redirects)\n    onyx_craft_enabled_for_user = is_onyx_craft_enabled(user) if user else False\n\n    return UserSettings(\n        **general_settings.model_dump(),\n        notifications=settings_notifications,\n        needs_reindexing=needs_reindexing,\n        onyx_craft_enabled=onyx_craft_enabled_for_user,\n        vector_db_enabled=not DISABLE_VECTOR_DB,\n        hooks_enabled=not MULTI_TENANT,\n        version=onyx_version,\n        max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,\n        default_user_file_max_upload_size_mb=min(\n            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,\n            MAX_ALLOWED_UPLOAD_SIZE_MB,\n        ),\n        default_file_token_count_threshold_k=(\n            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB\n            if DISABLE_VECTOR_DB\n            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\n        ),\n    )\n\n\ndef get_settings_notifications(user: User, db_session: Session) -> list[Notification]:\n    \"\"\"Get notifications for settings page, including product gating and reindex notifications\"\"\"\n    # Check for product gating notification\n    product_notif = get_notifications(\n        user=None,\n        notif_type=NotificationType.TRIAL_ENDS_TWO_DAYS,\n        db_session=db_session,\n    )\n    notifications = [Notification.from_model(product_notif[0])] if product_notif else []\n\n    # Only show reindex notifications to admins\n    if not is_user_admin(user):\n        return notifications\n\n    # Check if reindexing is needed\n    kv_store = get_kv_store()\n    try:\n        needs_index = cast(bool, kv_store.load(KV_REINDEX_KEY))\n        if not needs_index:\n            dismiss_all_notifications(\n                notif_type=NotificationType.REINDEX, db_session=db_session\n            )\n            return notifications\n    except KvKeyNotFoundError:\n        # If something goes wrong and the flag is gone, better to not start a reindexing\n        # it's a heavyweight long running job and maybe this flag is cleaned up later\n        logger.warning(\"Could not find reindex flag\")\n        return notifications\n\n    try:\n        # Need a transaction in order to prevent under-counting current notifications\n        reindex_notifs = get_notifications(\n            user=user, notif_type=NotificationType.REINDEX, db_session=db_session\n        )\n\n        if len(reindex_notifs) > 1:\n            logger.error(\"User has multiple reindex notifications\")\n        elif not reindex_notifs:\n            return notifications\n\n        reindex_notif = reindex_notifs[0]\n        update_notification_last_shown(\n            notification=reindex_notif, db_session=db_session\n        )\n\n        db_session.commit()\n        notifications.append(Notification.from_model(reindex_notif))\n        return notifications\n    except SQLAlchemyError:\n        logger.exception(\"Error while processing notifications\")\n        db_session.rollback()\n        return notifications\n"
  },
  {
    "path": "backend/onyx/server/settings/models.py",
    "content": "from datetime import datetime\nfrom enum import Enum\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB\nfrom onyx.configs.constants import NotificationType\nfrom onyx.configs.constants import QueryHistoryType\nfrom onyx.db.models import Notification as NotificationDBModel\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nDEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB = 200\nDEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB = 10000\n\n\nclass PageType(str, Enum):\n    CHAT = \"chat\"\n    SEARCH = \"search\"\n\n\nclass ApplicationStatus(str, Enum):\n    ACTIVE = \"active\"\n    PAYMENT_REMINDER = \"payment_reminder\"\n    GRACE_PERIOD = \"grace_period\"\n    GATED_ACCESS = \"gated_access\"\n    SEAT_LIMIT_EXCEEDED = \"seat_limit_exceeded\"\n\n\nclass Notification(BaseModel):\n    id: int\n    notif_type: NotificationType\n    dismissed: bool\n    last_shown: datetime\n    first_shown: datetime\n    title: str\n    description: str | None = None\n    additional_data: dict | None = None\n\n    @classmethod\n    def from_model(cls, notif: NotificationDBModel) -> \"Notification\":\n        return cls(\n            id=notif.id,\n            notif_type=notif.notif_type,\n            dismissed=notif.dismissed,\n            last_shown=notif.last_shown,\n            first_shown=notif.first_shown,\n            title=notif.title,\n            description=notif.description,\n            additional_data=notif.additional_data,\n        )\n\n\nclass Settings(BaseModel):\n    \"\"\"General settings\"\"\"\n\n    # is float to allow for fractional days for easier automated testing\n    maximum_chat_retention_days: float | None = None\n    company_name: str | None = None\n    company_description: str | None = None\n    gpu_enabled: bool | None = None\n    application_status: ApplicationStatus = ApplicationStatus.ACTIVE\n    anonymous_user_enabled: bool | None = None\n    invite_only_enabled: bool = False\n    deep_research_enabled: bool | None = None\n    search_ui_enabled: bool | None = None\n\n    # Whether EE features are unlocked for use.\n    # Depends on license status: True when the user has a valid license\n    # (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER), False when there's no license\n    # or the license is expired (GATED_ACCESS).\n    # This controls UI visibility of EE features (user groups, analytics, RBAC, etc.).\n    ee_features_enabled: bool = False\n\n    temperature_override_enabled: bool | None = False\n    auto_scroll: bool | None = False\n    query_history_type: QueryHistoryType | None = None\n\n    # Image processing settings\n    image_extraction_and_analysis_enabled: bool | None = False\n    search_time_image_analysis_enabled: bool | None = False\n    image_analysis_max_size_mb: int | None = 20\n\n    # User Knowledge settings\n    user_knowledge_enabled: bool | None = True\n    user_file_max_upload_size_mb: int | None = Field(\n        default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0\n    )\n    file_token_count_threshold_k: int | None = Field(\n        default=None, ge=0  # thousands of tokens; None = context-aware default\n    )\n\n    # Connector settings\n    show_extra_connectors: bool | None = True\n\n    # Default Assistant settings\n    disable_default_assistant: bool | None = False\n\n    # Seat usage - populated by license enforcement when seat limit is exceeded\n    seat_count: int | None = None\n    used_seats: int | None = None\n\n    # OpenSearch migration\n    opensearch_indexing_enabled: bool = False\n\n\nclass UserSettings(Settings):\n    notifications: list[Notification]\n    needs_reindexing: bool\n    tenant_id: str = POSTGRES_DEFAULT_SCHEMA\n    # Feature flag for Onyx Craft (Build Mode) - used for server-side redirects\n    onyx_craft_enabled: bool = False\n    # True when a vector database (Vespa/OpenSearch) is available.\n    # False when DISABLE_VECTOR_DB is set — connectors, RAG search, and\n    # document sets are unavailable.\n    vector_db_enabled: bool = True\n    # True when hooks are available: single-tenant EE deployments only.\n    hooks_enabled: bool = False\n    # Application version, read from the ONYX_VERSION env var at startup.\n    version: str | None = None\n    # Hard ceiling for user_file_max_upload_size_mb, derived from env var.\n    max_allowed_upload_size_mb: int = MAX_ALLOWED_UPLOAD_SIZE_MB\n    # Factory defaults so the frontend can show a \"restore default\" button.\n    default_user_file_max_upload_size_mb: int = DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\n    default_file_token_count_threshold_k: int = Field(\n        default_factory=lambda: (\n            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB\n            if DISABLE_VECTOR_DB\n            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\n        )\n    )\n"
  },
  {
    "path": "backend/onyx/server/settings/store.py",
    "content": "from onyx.cache.factory import get_cache_backend\nfrom onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\nfrom onyx.configs.app_configs import DISABLE_USER_KNOWLEDGE\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB\nfrom onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE\nfrom onyx.configs.app_configs import SHOW_EXTRA_CONNECTORS\nfrom onyx.configs.constants import KV_SETTINGS_KEY\nfrom onyx.configs.constants import OnyxRedisLocks\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.server.settings.models import (\n    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,\n)\nfrom onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\nfrom onyx.server.settings.models import Settings\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# TTL for settings keys - 30 days\nSETTINGS_TTL = 30 * 24 * 60 * 60\n\n\ndef load_settings() -> Settings:\n    kv_store = get_kv_store()\n    try:\n        stored_settings = kv_store.load(KV_SETTINGS_KEY)\n        settings = (\n            Settings.model_validate(stored_settings) if stored_settings else Settings()\n        )\n    except KvKeyNotFoundError:\n        # Default to empty settings if no settings have been set yet\n        logger.debug(f\"No settings found in KV store for key: {KV_SETTINGS_KEY}\")\n        settings = Settings()\n    except Exception as e:\n        logger.error(f\"Error loading settings from KV store: {str(e)}\")\n        settings = Settings()\n\n    cache = get_cache_backend()\n\n    try:\n        value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)\n        if value is not None:\n            anonymous_user_enabled = int(value.decode(\"utf-8\")) == 1\n        else:\n            anonymous_user_enabled = False\n            cache.set(OnyxRedisLocks.ANONYMOUS_USER_ENABLED, \"0\", ex=SETTINGS_TTL)\n    except Exception as e:\n        logger.error(f\"Error loading anonymous user setting from cache: {str(e)}\")\n        anonymous_user_enabled = False\n\n    settings.anonymous_user_enabled = anonymous_user_enabled\n    settings.query_history_type = ONYX_QUERY_HISTORY_TYPE\n\n    if DISABLE_USER_KNOWLEDGE:\n        settings.user_knowledge_enabled = False\n\n    settings.show_extra_connectors = SHOW_EXTRA_CONNECTORS\n    settings.opensearch_indexing_enabled = ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\n\n    # Resolve context-aware defaults for token threshold.\n    # None = admin hasn't set a value yet → use context-aware default.\n    # 0 = admin explicitly chose \"no limit\" → preserve as-is.\n    if settings.file_token_count_threshold_k is None:\n        settings.file_token_count_threshold_k = (\n            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB\n            if DISABLE_VECTOR_DB\n            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\n        )\n\n    # Upload size: 0 and None are treated as \"unset\" (not \"no limit\") →\n    # fall back to min(configured default, hard ceiling).\n    if not settings.user_file_max_upload_size_mb:\n        settings.user_file_max_upload_size_mb = min(\n            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,\n            MAX_ALLOWED_UPLOAD_SIZE_MB,\n        )\n\n    # Clamp to env ceiling so stale KV values are capped even if the\n    # operator lowered MAX_ALLOWED_UPLOAD_SIZE_MB after a higher value\n    # was already saved (api.py only guards new writes).\n    if (\n        settings.user_file_max_upload_size_mb > 0\n        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB\n    ):\n        settings.user_file_max_upload_size_mb = MAX_ALLOWED_UPLOAD_SIZE_MB\n\n    return settings\n\n\ndef store_settings(settings: Settings) -> None:\n    cache = get_cache_backend()\n\n    if settings.anonymous_user_enabled is not None:\n        cache.set(\n            OnyxRedisLocks.ANONYMOUS_USER_ENABLED,\n            \"1\" if settings.anonymous_user_enabled else \"0\",\n            ex=SETTINGS_TTL,\n        )\n\n    get_kv_store().store(KV_SETTINGS_KEY, settings.model_dump())\n"
  },
  {
    "path": "backend/onyx/server/tenant_usage_limits.py",
    "content": "\"\"\"\nNon-EE version of tenant usage limit overrides.\n\nIn non-EE deployments, there are no tenant-specific overrides - all tenants\nuse the default limits from environment variables.\n\nThe EE version (ee.onyx.server.tenant_usage_limits) fetches per-tenant\noverrides from the control plane.\n\"\"\"\n\nfrom enum import Enum\n\nfrom pydantic import BaseModel\n\n\n# NOTE: this must be updated along with the BaseModel below\nclass TenantUsageLimitKeys(str, Enum):\n    LLM_COST_CENTS_TRIAL = \"llm_cost_cents_trial\"\n    LLM_COST_CENTS_PAID = \"llm_cost_cents_paid\"\n    CHUNKS_INDEXED_TRIAL = \"chunks_indexed_trial\"\n    CHUNKS_INDEXED_PAID = \"chunks_indexed_paid\"\n    API_CALLS_TRIAL = \"api_calls_trial\"\n    API_CALLS_PAID = \"api_calls_paid\"\n    NON_STREAMING_CALLS_TRIAL = \"non_streaming_calls_trial\"\n    NON_STREAMING_CALLS_PAID = \"non_streaming_calls_paid\"\n\n\nclass TenantUsageLimitOverrides(BaseModel):\n    \"\"\"Usage limit overrides for a specific tenant.\n\n    Field behavior:\n    - Field not present or set to null: Use the default env var value\n    - Field set to -1: No limit (unlimited)\n    - Field set to a positive integer: Use that specific limit\n    \"\"\"\n\n    tenant_id: str | None = None\n\n    llm_cost_cents_trial: int | None = None\n    llm_cost_cents_paid: int | None = None\n    chunks_indexed_trial: int | None = None\n    chunks_indexed_paid: int | None = None\n    api_calls_trial: int | None = None\n    api_calls_paid: int | None = None\n    non_streaming_calls_trial: int | None = None\n    non_streaming_calls_paid: int | None = None\n\n\ndef get_tenant_usage_limit_overrides(\n    tenant_id: str,  # noqa: ARG001\n) -> TenantUsageLimitOverrides | None:\n    \"\"\"\n    Get the usage limit overrides for a specific tenant.\n\n    Non-EE version always returns None (no overrides available).\n    The EE version fetches tenant-specific overrides from the control plane.\n\n    Args:\n        tenant_id: The tenant ID to look up\n\n    Returns:\n        None - no overrides in non-EE deployments\n    \"\"\"\n    return None\n\n\ndef load_usage_limit_overrides() -> None:\n    \"\"\"\n    Load tenant usage limit overrides from the control plane.\n\n    Non-EE version is a no-op since there's no control plane to fetch from.\n    \"\"\"\n    return None\n"
  },
  {
    "path": "backend/onyx/server/token_rate_limits/api.py",
    "content": "from fastapi import APIRouter\nfrom fastapi import Depends\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.configs.constants import PUBLIC_API_TAGS\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import User\nfrom onyx.db.token_limit import delete_token_rate_limit\nfrom onyx.db.token_limit import fetch_all_global_token_rate_limits\nfrom onyx.db.token_limit import insert_global_token_rate_limit\nfrom onyx.db.token_limit import update_token_rate_limit\nfrom onyx.server.query_and_chat.token_limit import any_rate_limit_exists\nfrom onyx.server.token_rate_limits.models import TokenRateLimitArgs\nfrom onyx.server.token_rate_limits.models import TokenRateLimitDisplay\n\nrouter = APIRouter(prefix=\"/admin/token-rate-limits\", tags=PUBLIC_API_TAGS)\n\n\n\"\"\"\nGlobal Token Limit Settings\n\"\"\"\n\n\n@router.get(\"/global\")\ndef get_global_token_limit_settings(\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> list[TokenRateLimitDisplay]:\n    return [\n        TokenRateLimitDisplay.from_db(token_rate_limit)\n        for token_rate_limit in fetch_all_global_token_rate_limits(db_session)\n    ]\n\n\n@router.post(\"/global\")\ndef create_global_token_limit_settings(\n    token_limit_settings: TokenRateLimitArgs,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> TokenRateLimitDisplay:\n    rate_limit_display = TokenRateLimitDisplay.from_db(\n        insert_global_token_rate_limit(db_session, token_limit_settings)\n    )\n    # clear cache in case this was the first rate limit created\n    any_rate_limit_exists.cache_clear()\n    return rate_limit_display\n\n\n\"\"\"\nGeneral Token Limit Settings\n\"\"\"\n\n\n@router.put(\"/rate-limit/{token_rate_limit_id}\")\ndef update_token_limit_settings(\n    token_rate_limit_id: int,\n    token_limit_settings: TokenRateLimitArgs,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> TokenRateLimitDisplay:\n    return TokenRateLimitDisplay.from_db(\n        update_token_rate_limit(\n            db_session=db_session,\n            token_rate_limit_id=token_rate_limit_id,\n            token_rate_limit_settings=token_limit_settings,\n        )\n    )\n\n\n@router.delete(\"/rate-limit/{token_rate_limit_id}\")\ndef delete_token_limit_settings(\n    token_rate_limit_id: int,\n    _: User = Depends(current_admin_user),\n    db_session: Session = Depends(get_session),\n) -> None:\n    return delete_token_rate_limit(\n        db_session=db_session,\n        token_rate_limit_id=token_rate_limit_id,\n    )\n"
  },
  {
    "path": "backend/onyx/server/token_rate_limits/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.db.models import TokenRateLimit\n\n\nclass TokenRateLimitArgs(BaseModel):\n    enabled: bool\n    token_budget: int\n    period_hours: int\n\n\nclass TokenRateLimitDisplay(BaseModel):\n    token_id: int\n    enabled: bool\n    token_budget: int\n    period_hours: int\n\n    @classmethod\n    def from_db(cls, token_rate_limit: TokenRateLimit) -> \"TokenRateLimitDisplay\":\n        return cls(\n            token_id=token_rate_limit.id,\n            enabled=token_rate_limit.enabled,\n            token_budget=token_rate_limit.token_budget,\n            period_hours=token_rate_limit.period_hours,\n        )\n"
  },
  {
    "path": "backend/onyx/server/usage_limits.py",
    "content": "\"\"\"Usage limits enforcement for cloud deployments.\"\"\"\n\nfrom collections.abc import Callable\n\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import COHERE_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY\nfrom onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY\nfrom onyx.db.usage import check_usage_limit\nfrom onyx.db.usage import UsageLimitExceededError\nfrom onyx.db.usage import UsageType\nfrom onyx.server.tenant_usage_limits import TenantUsageLimitKeys\nfrom onyx.server.tenant_usage_limits import TenantUsageLimitOverrides\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom shared_configs.configs import USAGE_LIMIT_API_CALLS_PAID\nfrom shared_configs.configs import USAGE_LIMIT_API_CALLS_TRIAL\nfrom shared_configs.configs import USAGE_LIMIT_CHUNKS_INDEXED_PAID\nfrom shared_configs.configs import USAGE_LIMIT_CHUNKS_INDEXED_TRIAL\nfrom shared_configs.configs import USAGE_LIMIT_LLM_COST_CENTS_PAID\nfrom shared_configs.configs import USAGE_LIMIT_LLM_COST_CENTS_TRIAL\nfrom shared_configs.configs import USAGE_LIMIT_NON_STREAMING_CALLS_PAID\nfrom shared_configs.configs import USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL\nfrom shared_configs.configs import USAGE_LIMITS_ENABLED\n\nlogger = setup_logger()\n\n# Collect all Onyx-managed default API keys for comparison\n_ONYX_MANAGED_API_KEYS: set[str] = set()\nfor key in [\n    OPENAI_DEFAULT_API_KEY,\n    ANTHROPIC_DEFAULT_API_KEY,\n    COHERE_DEFAULT_API_KEY,\n    OPENROUTER_DEFAULT_API_KEY,\n]:\n    if key:\n        _ONYX_MANAGED_API_KEYS.add(key)\n\n\ndef is_onyx_managed_api_key(api_key: str | None) -> bool:\n    \"\"\"Check if the given API key is one of Onyx's managed default keys.\"\"\"\n    return bool(api_key) and api_key in _ONYX_MANAGED_API_KEYS\n\n\ndef is_usage_limits_enabled() -> bool:\n    \"\"\"Check if usage limits are enabled for this deployment.\"\"\"\n    return USAGE_LIMITS_ENABLED\n\n\ndef is_tenant_on_trial(tenant_id: str) -> bool:  # noqa: ARG001\n    \"\"\"\n    Determine if a tenant is currently on a trial subscription.\n\n    Non-EE version always returns False. EE version fetches billing information\n    from the control plane to determine if the tenant has an active trial.\n    \"\"\"\n    return False\n\n\ndef is_tenant_on_trial_fn(tenant_id: str) -> bool:\n    \"\"\"\n    Get the versioned implementation of is_tenant_on_trial and call it.\n\n    Uses fetch_versioned_implementation to get the EE version if available,\n    otherwise falls back to the non-EE version that returns False.\n    \"\"\"\n    fn: Callable[[str], bool] = fetch_versioned_implementation(\n        \"onyx.server.usage_limits\", \"is_tenant_on_trial\"\n    )\n    return fn(tenant_id)\n\n\ndef _get_tenant_override(tenant_id: str, field_name: str) -> int | None:\n    \"\"\"\n    Get a tenant-specific usage limit override if available.\n\n    Uses fetch_versioned_implementation to get EE version if available.\n\n    Returns:\n        - Positive int: Use this specific limit\n        - -1 (NO_LIMIT): No limit (unlimited)\n        - None: No override specified, use default env var value\n    \"\"\"\n    try:\n        # Try to get EE version that has tenant overrides\n        get_overrides_fn = fetch_versioned_implementation(\n            \"onyx.server.tenant_usage_limits\", \"get_tenant_usage_limit_overrides\"\n        )\n        overrides: TenantUsageLimitOverrides | None = get_overrides_fn(tenant_id)\n\n        if overrides is not None:\n            # Get the field value - None means not set, use default\n            return getattr(overrides, field_name, None)\n    except Exception:\n        logger.exception(\n            \"Error getting tenant override for %s.%s falling back to defaults\",\n            tenant_id,\n            field_name,\n        )\n    return None\n\n\n# Special value meaning \"no limit\" (unlimited)\nNO_LIMIT = -1\n_FIELD_AND_DEFAULT = {\n    UsageType.LLM_COST: {\n        True: (\n            TenantUsageLimitKeys.LLM_COST_CENTS_TRIAL,\n            USAGE_LIMIT_LLM_COST_CENTS_TRIAL,\n        ),\n        False: (\n            TenantUsageLimitKeys.LLM_COST_CENTS_PAID,\n            USAGE_LIMIT_LLM_COST_CENTS_PAID,\n        ),\n    },\n    UsageType.CHUNKS_INDEXED: {\n        True: (\n            TenantUsageLimitKeys.CHUNKS_INDEXED_TRIAL,\n            USAGE_LIMIT_CHUNKS_INDEXED_TRIAL,\n        ),\n        False: (\n            TenantUsageLimitKeys.CHUNKS_INDEXED_PAID,\n            USAGE_LIMIT_CHUNKS_INDEXED_PAID,\n        ),\n    },\n    UsageType.API_CALLS: {\n        True: (TenantUsageLimitKeys.API_CALLS_TRIAL, USAGE_LIMIT_API_CALLS_TRIAL),\n        False: (TenantUsageLimitKeys.API_CALLS_PAID, USAGE_LIMIT_API_CALLS_PAID),\n    },\n    UsageType.NON_STREAMING_API_CALLS: {\n        True: (\n            TenantUsageLimitKeys.NON_STREAMING_CALLS_TRIAL,\n            USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL,\n        ),\n        False: (\n            TenantUsageLimitKeys.NON_STREAMING_CALLS_PAID,\n            USAGE_LIMIT_NON_STREAMING_CALLS_PAID,\n        ),\n    },\n}\n\n\ndef get_limit_for_usage_type(\n    usage_type: UsageType, is_trial: bool, tenant_id: str | None\n) -> int:\n    \"\"\"\n    Get the appropriate limit based on usage type, trial status, and tenant overrides.\n\n    Returns:\n        - Positive int: The usage limit\n        - NO_LIMIT (-1): No limit (unlimited) for this tenant\n    \"\"\"\n\n    field_name, default_value = _FIELD_AND_DEFAULT[usage_type][is_trial]\n    if tenant_id:\n        override = _get_tenant_override(tenant_id, field_name)\n        if override is not None:\n            logger.debug(\n                \"Using tenant override for %s.%s: %s\", tenant_id, field_name, override\n            )\n            return override\n    logger.debug(\n        \"Using default value for %s.%s: %s\", usage_type, is_trial, default_value\n    )\n    return default_value\n\n\ndef check_llm_cost_limit_for_provider(\n    db_session: Session,\n    tenant_id: str,\n    llm_provider_api_key: str | None,\n) -> None:\n    \"\"\"\n    Check if the LLM cost limit would be exceeded for a provider using Onyx-managed keys.\n\n    Only enforces limits when the provider uses Onyx-managed API keys.\n    Users with their own API keys are not subject to LLM cost limits.\n\n    Args:\n        db_session: Database session for the tenant\n        tenant_id: The tenant ID for trial detection\n        llm_provider_api_key: The API key of the LLM provider that will be used\n\n    Raises:\n        HTTPException: 429 Too Many Requests if limit exceeded\n    \"\"\"\n    if not is_usage_limits_enabled():\n        return\n\n    # Only enforce limits for Onyx-managed API keys\n    if not is_onyx_managed_api_key(llm_provider_api_key):\n        return\n\n    check_usage_and_raise(\n        db_session=db_session,\n        usage_type=UsageType.LLM_COST,\n        tenant_id=tenant_id,\n        pending_amount=0,  # We check current usage, not pending\n    )\n\n\ndef check_usage_and_raise(\n    db_session: Session,\n    usage_type: UsageType,\n    tenant_id: str,\n    pending_amount: float | int = 0,\n) -> None:\n    \"\"\"\n    Check if usage limit would be exceeded and raise HTTPException if so.\n\n    Args:\n        db_session: Database session for the tenant\n        usage_type: Type of usage to check\n        tenant_id: The tenant ID for trial detection\n        pending_amount: Amount about to be used\n\n    Raises:\n        HTTPException: 429 Too Many Requests if limit exceeded\n    \"\"\"\n    if not is_usage_limits_enabled():\n        return\n\n    is_trial = is_tenant_on_trial_fn(tenant_id)\n    limit = get_limit_for_usage_type(usage_type, is_trial, tenant_id)\n    logger.debug(\"Checking usage limit for %s.%s: %s\", usage_type, is_trial, limit)\n\n    # NO_LIMIT means this tenant has unlimited usage for this type\n    if limit == NO_LIMIT:\n        return\n\n    try:\n        check_usage_limit(\n            db_session=db_session,\n            usage_type=usage_type,\n            limit=limit,\n            pending_amount=pending_amount,\n        )\n    except UsageLimitExceededError as e:\n        user_type = \"trial\" if is_trial else \"paid\"\n        if usage_type == UsageType.LLM_COST:\n            detail = (\n                f\"LLM usage limit exceeded for {user_type} account. \"\n                f\"Current cost: ${e.current / 100:.2f}, \"\n                f\"Limit: ${e.limit / 100:.2f} per week. \"\n                \"Please use your own LLM API key, upgrade your plan,\"\n                \" or wait for the next billing period (1 week).\"\n            )\n        elif usage_type == UsageType.CHUNKS_INDEXED:\n            detail = (\n                f\"Document indexing limit exceeded for {user_type} account. \"\n                f\"Indexed: {int(e.current)} chunks, Limit: {int(e.limit)} per week. \"\n                \"Please upgrade your plan or wait for the next billing period.\"\n            )\n        elif usage_type == UsageType.API_CALLS:\n            detail = (\n                f\"API call limit exceeded for {user_type} account. \"\n                f\"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. \"\n                \"Please upgrade your plan or wait for the next billing period.\"\n            )\n        else:\n            detail = (\n                f\"Non-streaming API call limit exceeded for {user_type} account. \"\n                f\"Calls: {int(e.current)}, Limit: {int(e.limit)} per week. \"\n                \"Please upgrade your plan or wait for the next billing period.\"\n            )\n\n        raise HTTPException(status_code=429, detail=detail)\n"
  },
  {
    "path": "backend/onyx/server/utils.py",
    "content": "import base64\nimport json\nimport os\nfrom datetime import datetime\nfrom typing import Any\nfrom uuid import UUID\n\nfrom fastapi import HTTPException\nfrom fastapi import status\n\n\nclass BasicAuthenticationError(HTTPException):\n    def __init__(self, detail: str):\n        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)\n\n\nclass OnyxJSONEncoder(json.JSONEncoder):\n    \"\"\"Custom JSON encoder that converts datetime and UUID objects to strings.\"\"\"\n\n    def default(self, obj: Any) -> Any:\n        if isinstance(obj, datetime):\n            return obj.isoformat()\n        if isinstance(obj, UUID):\n            return str(obj)\n        return super().default(obj)\n\n\ndef get_json_line(\n    json_dict: dict[str, Any], encoder: type[json.JSONEncoder] = OnyxJSONEncoder\n) -> str:\n    \"\"\"\n    Convert a dictionary to a JSON string with custom type handling, and add a newline.\n\n    Args:\n        json_dict: The dictionary to be converted to JSON.\n        encoder: JSON encoder class to use, defaults to OnyxJSONEncoder.\n\n    Returns:\n        A JSON string representation of the input dictionary with a newline character.\n    \"\"\"\n    return json.dumps(json_dict, cls=encoder) + \"\\n\"\n\n\ndef make_short_id() -> str:\n    \"\"\"Fast way to generate a random 8 character id ... useful for tagging data\n    to trace it through a flow. This is definitely not guaranteed to be unique and is\n    targeted at the stated use case.\"\"\"\n    return base64.b32encode(os.urandom(5)).decode(\"utf-8\")[:8]  # 5 bytes → 8 chars\n"
  },
  {
    "path": "backend/onyx/server/utils_vector_db.py",
    "content": "\"\"\"Utilities for gating endpoints that require a vector database.\"\"\"\n\nfrom fastapi import HTTPException\nfrom starlette.status import HTTP_501_NOT_IMPLEMENTED\n\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\n\n\ndef require_vector_db() -> None:\n    \"\"\"FastAPI dependency — raises 501 when the vector DB is disabled.\"\"\"\n    if DISABLE_VECTOR_DB:\n        raise HTTPException(\n            status_code=HTTP_501_NOT_IMPLEMENTED,\n            detail=\"This feature requires a vector database (DISABLE_VECTOR_DB is set).\",\n        )\n"
  },
  {
    "path": "backend/onyx/setup.py",
    "content": "import time\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\nfrom onyx.configs.app_configs import INTEGRATION_TESTS_MODE\nfrom onyx.configs.app_configs import MANAGED_VESPA\nfrom onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP\nfrom onyx.configs.constants import KV_REINDEX_KEY\nfrom onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS\nfrom onyx.configs.embedding_configs import SupportedEmbeddingModel\nfrom onyx.configs.model_configs import GEN_AI_API_KEY\nfrom onyx.configs.model_configs import GEN_AI_MODEL_VERSION\nfrom onyx.context.search.models import SavedSearchSettings\nfrom onyx.db.connector import check_connectors_exist\nfrom onyx.db.connector import create_initial_default_connector\nfrom onyx.db.connector_credential_pair import associate_default_cc_pair\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.connector_credential_pair import resync_cc_pair\nfrom onyx.db.credentials import create_initial_public_credential\nfrom onyx.db.document import check_docs_exist\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.index_attempt import cancel_indexing_attempts_past_model\nfrom onyx.db.index_attempt import expire_index_attempts\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.search_settings import update_current_search_settings\nfrom onyx.db.swap_index import check_and_perform_index_swap\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.opensearch.client import OpenSearchClient\nfrom onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout\nfrom onyx.document_index.opensearch.opensearch_document_index import set_cluster_state\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.settings.store import load_settings\nfrom onyx.server.settings.store import store_settings\nfrom onyx.utils.gpu_utils import gpu_status_request\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import ALT_INDEX_SUFFIX\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\nfrom shared_configs.configs import MULTI_TENANT\n\n\nlogger = setup_logger()\n\n\ndef setup_onyx(\n    db_session: Session,\n    tenant_id: str,  # noqa: ARG001\n    cohere_enabled: bool = False,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Setup Onyx for a particular tenant. In the Single Tenant case, it will set it up for the default schema\n    on server startup. In the MT case, it will be called when the tenant is created.\n\n    The Tenant Service calls the tenants/create endpoint which runs this.\n    \"\"\"\n    check_and_perform_index_swap(db_session=db_session)\n\n    active_search_settings = get_active_search_settings(db_session)\n    search_settings = active_search_settings.primary\n    secondary_search_settings = active_search_settings.secondary\n\n    # search_settings = get_current_search_settings(db_session)\n    # multipass_config_1 = get_multipass_config(search_settings)\n\n    # secondary_large_chunks_enabled: bool | None = None\n    # secondary_search_settings = get_secondary_search_settings(db_session)\n    # if secondary_search_settings:\n    #     multipass_config_2 = get_multipass_config(secondary_search_settings)\n    #     secondary_large_chunks_enabled = multipass_config_2.enable_large_chunks\n\n    # Break bad state for thrashing indexes\n    if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP:\n        expire_index_attempts(\n            search_settings_id=search_settings.id, db_session=db_session\n        )\n\n        for cc_pair in get_connector_credential_pairs(db_session):\n            resync_cc_pair(\n                cc_pair=cc_pair,\n                search_settings_id=search_settings.id,\n                db_session=db_session,\n            )\n\n    # Expire all old embedding models indexing attempts, technically redundant\n    cancel_indexing_attempts_past_model(db_session)\n\n    logger.notice(f'Using Embedding model: \"{search_settings.model_name}\"')\n    if search_settings.query_prefix or search_settings.passage_prefix:\n        logger.notice(f'Query embedding prefix: \"{search_settings.query_prefix}\"')\n        logger.notice(f'Passage embedding prefix: \"{search_settings.passage_prefix}\"')\n\n    if search_settings:\n        if search_settings.multilingual_expansion:\n            logger.notice(\n                f\"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}.\"\n            )\n\n    # setup Postgres with default credential, llm providers, etc.\n    setup_postgres(db_session)\n\n    # Does the user need to trigger a reindexing to bring the document index\n    # into a good state, marked in the kv store\n    if not MULTI_TENANT:\n        mark_reindex_flag(db_session)\n\n    if DISABLE_VECTOR_DB:\n        logger.notice(\n            \"DISABLE_VECTOR_DB is set — skipping document index setup and embedding model warm-up.\"\n        )\n    else:\n        # Ensure Vespa is setup correctly, this step is relatively near the end\n        # because Vespa takes a bit of time to start up\n        logger.notice(\"Verifying Document Index(s) is/are available.\")\n        # This flow is for setting up the document index so we get all indices here.\n        document_indices = get_all_document_indices(\n            search_settings,\n            secondary_search_settings,\n            None,\n        )\n\n        success = setup_document_indices(\n            document_indices,\n            IndexingSetting.from_db_model(search_settings),\n            (\n                IndexingSetting.from_db_model(secondary_search_settings)\n                if secondary_search_settings\n                else None\n            ),\n        )\n        if not success:\n            raise RuntimeError(\n                \"Could not connect to a document index within the specified timeout.\"\n            )\n\n        logger.notice(f\"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}\")\n        if search_settings.provider_type is None:\n            # In integration tests, do not block API startup on warm-up\n            warm_up_bi_encoder(\n                embedding_model=EmbeddingModel.from_db_model(\n                    search_settings=search_settings,\n                    server_host=MODEL_SERVER_HOST,\n                    server_port=MODEL_SERVER_PORT,\n                ),\n                non_blocking=INTEGRATION_TESTS_MODE,\n            )\n\n        # update multipass indexing setting based on GPU availability\n        update_default_multipass_indexing(db_session)\n\n\ndef mark_reindex_flag(db_session: Session) -> None:\n    kv_store = get_kv_store()\n    try:\n        value = kv_store.load(KV_REINDEX_KEY)\n        logger.debug(f\"Re-indexing flag has value {value}\")\n        return\n    except KvKeyNotFoundError:\n        # Only need to update the flag if it hasn't been set\n        pass\n\n    # If their first deployment is after the changes, it will\n    # enable this when the other changes go in, need to avoid\n    # this being set to False, then the user indexes things on the old version\n    docs_exist = check_docs_exist(db_session)\n    connectors_exist = check_connectors_exist(db_session)\n    if docs_exist or connectors_exist:\n        kv_store.store(KV_REINDEX_KEY, True)\n    else:\n        kv_store.store(KV_REINDEX_KEY, False)\n\n\ndef setup_document_indices(\n    document_indices: list[DocumentIndex],\n    index_setting: IndexingSetting,\n    secondary_index_setting: IndexingSetting | None,\n    num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,\n) -> bool:\n    \"\"\"Sets up all input document indices.\n\n    If any document index setup fails, the function will return False. Otherwise\n    returns True.\n    \"\"\"\n    for document_index in document_indices:\n        # Document index startup is a bit slow, so give it a few seconds.\n        WAIT_SECONDS = 5\n        document_index_setup_success = False\n        for x in range(num_attempts):\n            try:\n                logger.notice(\n                    f\"Setting up document index {document_index.__class__.__name__} (attempt {x + 1}/{num_attempts})...\"\n                )\n                document_index.ensure_indices_exist(\n                    primary_embedding_dim=index_setting.final_embedding_dim,\n                    primary_embedding_precision=index_setting.embedding_precision,\n                    secondary_index_embedding_dim=(\n                        secondary_index_setting.final_embedding_dim\n                        if secondary_index_setting\n                        else None\n                    ),\n                    secondary_index_embedding_precision=(\n                        secondary_index_setting.embedding_precision\n                        if secondary_index_setting\n                        else None\n                    ),\n                )\n\n                logger.notice(\n                    f\"Document index {document_index.__class__.__name__} setup complete.\"\n                )\n                document_index_setup_success = True\n                break\n            except Exception:\n                logger.exception(\n                    f\"Document index {document_index.__class__.__name__} setup did not succeed. \"\n                    \"The relevant service may not be ready yet. \"\n                    f\"Retrying in {WAIT_SECONDS} seconds.\"\n                )\n                time.sleep(WAIT_SECONDS)\n\n        if not document_index_setup_success:\n            logger.error(\n                f\"Document index {document_index.__class__.__name__} setup did not succeed. \"\n                f\"Attempt limit reached. ({num_attempts})\"\n            )\n            return False\n\n    return True\n\n\ndef setup_postgres(db_session: Session) -> None:\n    logger.notice(\"Verifying default connector/credential exist.\")\n    create_initial_public_credential(db_session)\n    create_initial_default_connector(db_session)\n    associate_default_cc_pair(db_session)\n\n    if GEN_AI_API_KEY and fetch_default_llm_model(db_session) is None:\n        # Only for dev flows\n        logger.notice(\"Setting up default OpenAI LLM for dev.\")\n\n        llm_model = GEN_AI_MODEL_VERSION or \"gpt-4o-mini\"\n        provider_name = \"DevEnvPresetOpenAI\"\n        existing = fetch_existing_llm_provider(\n            name=provider_name, db_session=db_session\n        )\n        model_req = LLMProviderUpsertRequest(\n            id=existing.id if existing else None,\n            name=provider_name,\n            provider=LlmProviderNames.OPENAI,\n            api_key=GEN_AI_API_KEY,\n            api_base=None,\n            api_version=None,\n            custom_config=None,\n            is_public=True,\n            groups=[],\n            model_configurations=[\n                ModelConfigurationUpsertRequest(name=name, is_visible=True)\n                for name in get_openai_model_names()\n            ],\n            api_key_changed=True,\n        )\n        try:\n            new_llm_provider = upsert_llm_provider(\n                llm_provider_upsert_request=model_req, db_session=db_session\n            )\n        except ValueError as e:\n            logger.warning(\"Failed to upsert LLM provider during setup: %s\", e)\n            return\n        update_default_provider(\n            provider_id=new_llm_provider.id, model_name=llm_model, db_session=db_session\n        )\n\n\ndef update_default_multipass_indexing(db_session: Session) -> None:\n    docs_exist = check_docs_exist(db_session)\n    connectors_exist = check_connectors_exist(db_session)\n    logger.debug(f\"Docs exist: {docs_exist}, Connectors exist: {connectors_exist}\")\n\n    if not docs_exist and not connectors_exist:\n        logger.info(\n            \"No existing docs or connectors found. Checking GPU availability for multipass indexing.\"\n        )\n        gpu_available = gpu_status_request(indexing=True)\n        logger.info(f\"GPU available: {gpu_available}\")\n\n        current_settings = get_current_search_settings(db_session)\n\n        logger.notice(f\"Updating multipass indexing setting to: {gpu_available}\")\n        updated_settings = SavedSearchSettings.from_db_model(current_settings)\n        # Enable multipass indexing if GPU is available or if using a cloud provider\n        updated_settings.multipass_indexing = (\n            gpu_available or current_settings.cloud_provider is not None\n        )\n        update_current_search_settings(db_session, updated_settings)\n\n        # Update settings with GPU availability\n        settings = load_settings()\n        settings.gpu_enabled = gpu_available\n        store_settings(settings)\n        logger.notice(f\"Updated settings with GPU availability: {gpu_available}\")\n\n    else:\n        logger.debug(\n            \"Existing docs or connectors found. Skipping multipass indexing update.\"\n        )\n\n\ndef setup_multitenant_onyx() -> None:\n    if DISABLE_VECTOR_DB:\n        logger.notice(\"DISABLE_VECTOR_DB is set — skipping multitenant Vespa setup.\")\n        return\n\n    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:\n        opensearch_client = OpenSearchClient()\n        if not wait_for_opensearch_with_timeout(client=opensearch_client):\n            raise RuntimeError(\"Failed to connect to OpenSearch.\")\n        set_cluster_state(opensearch_client)\n\n    # For Managed Vespa, the schema is sent over via the Vespa Console manually.\n    # NOTE: Pretty sure this code is never hit in any production environment.\n    if not MANAGED_VESPA:\n        setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS)\n\n\ndef setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:\n    # TODO(andrei): We don't yet support OpenSearch for multi-tenant instances\n    # so this function remains unchanged.\n    # This is for local testing\n    WAIT_SECONDS = 5\n    VESPA_ATTEMPTS = 5\n    for x in range(VESPA_ATTEMPTS):\n        try:\n            logger.notice(f\"Setting up Vespa (attempt {x + 1}/{VESPA_ATTEMPTS})...\")\n            VespaIndex.register_multitenant_indices(\n                indices=[index.index_name for index in supported_indices]\n                + [\n                    f\"{index.index_name}{ALT_INDEX_SUFFIX}\"\n                    for index in supported_indices\n                ],\n                embedding_dims=[index.dim for index in supported_indices]\n                + [index.dim for index in supported_indices],\n                # on the cloud, just use float for all indices, the option to change this\n                # is not exposed to the user\n                embedding_precisions=[\n                    EmbeddingPrecision.FLOAT for _ in range(len(supported_indices) * 2)\n                ],\n            )\n\n            logger.notice(\"Vespa setup complete.\")\n            return True\n        except Exception:\n            logger.notice(\n                f\"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds.\"\n            )\n            time.sleep(WAIT_SECONDS)\n\n    logger.error(\n        f\"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})\"\n    )\n    return False\n"
  },
  {
    "path": "backend/onyx/tools/built_in_tools.py",
    "content": "from typing import Type\nfrom typing import Union\n\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (\n    KnowledgeGraphTool,\n)\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import (\n    WebSearchTool,\n)\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nBUILT_IN_TOOL_TYPES = Union[\n    SearchTool,\n    ImageGenerationTool,\n    WebSearchTool,\n    KnowledgeGraphTool,\n    OpenURLTool,\n    PythonTool,\n    FileReaderTool,\n    MemoryTool,\n]\n\nBUILT_IN_TOOL_MAP: dict[str, Type[BUILT_IN_TOOL_TYPES]] = {\n    SearchTool.__name__: SearchTool,\n    ImageGenerationTool.__name__: ImageGenerationTool,\n    WebSearchTool.__name__: WebSearchTool,\n    KnowledgeGraphTool.__name__: KnowledgeGraphTool,\n    OpenURLTool.__name__: OpenURLTool,\n    PythonTool.__name__: PythonTool,\n    FileReaderTool.__name__: FileReaderTool,\n    MemoryTool.__name__: MemoryTool,\n}\n\nSTOPPING_TOOLS_NAMES: list[str] = [ImageGenerationTool.NAME]\nCITEABLE_TOOLS_NAMES: list[str] = [\n    SearchTool.NAME,\n    WebSearchTool.NAME,\n    OpenURLTool.NAME,\n]\n\n\ndef get_built_in_tool_ids() -> list[str]:\n    return list(BUILT_IN_TOOL_MAP.keys())\n\n\ndef get_built_in_tool_by_id(in_code_tool_id: str) -> Type[BUILT_IN_TOOL_TYPES]:\n    return BUILT_IN_TOOL_MAP[in_code_tool_id]\n\n\ndef _build_tool_name_to_class() -> dict[str, Type[BUILT_IN_TOOL_TYPES]]:\n    \"\"\"Build a mapping from LLM-facing tool name to tool class.\"\"\"\n    result: dict[str, Type[BUILT_IN_TOOL_TYPES]] = {}\n    for cls in BUILT_IN_TOOL_MAP.values():\n        name_attr = cls.__dict__.get(\"name\")\n        if isinstance(name_attr, property) and name_attr.fget is not None:\n            tool_name = name_attr.fget(cls)\n        elif isinstance(name_attr, str):\n            tool_name = name_attr\n        else:\n            raise ValueError(\n                f\"Built-in tool {cls.__name__} must define a valid LLM-facing tool name\"\n            )\n        result[tool_name] = cls\n    return result\n\n\nTOOL_NAME_TO_CLASS: dict[str, Type[BUILT_IN_TOOL_TYPES]] = _build_tool_name_to_class()\n"
  },
  {
    "path": "backend/onyx/tools/constants.py",
    "content": "\"\"\"Tool name and ID constants matching frontend definitions.\"\"\"\n\n# Tool names as referenced by tool results / tool calls\nSEARCH_TOOL_NAME = \"run_search\"\nINTERNET_SEARCH_TOOL_NAME = \"run_internet_search\"\nIMAGE_GENERATION_TOOL_NAME = \"run_image_generation\"\nPYTHON_TOOL_NAME = \"run_python\"\nOPEN_URL_TOOL_NAME = \"open_url\"\n\n# In-code tool IDs that also correspond to the tool's name when associated with a persona\nSEARCH_TOOL_ID = \"SearchTool\"\nIMAGE_GENERATION_TOOL_ID = \"ImageGenerationTool\"\nWEB_SEARCH_TOOL_ID = \"WebSearchTool\"\nPYTHON_TOOL_ID = \"PythonTool\"\nOPEN_URL_TOOL_ID = \"OpenURLTool\"\nFILE_READER_TOOL_ID = \"FileReaderTool\"\nMEMORY_TOOL_ID = \"MemoryTool\"\n\n# Tool names as referenced by tool results / tool calls (read_file)\nFILE_READER_TOOL_NAME = \"read_file\"\n"
  },
  {
    "path": "backend/onyx/tools/fake_tools/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/tools/fake_tools/research_agent.py",
    "content": "import queue\nimport time\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.chat.chat_state import ChatStateContainer\nfrom onyx.chat.chat_utils import create_tool_call_failure_messages\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_processor import CitationMode\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.chat.citation_utils import collapse_citations\nfrom onyx.chat.citation_utils import update_citation_processor_from_tool_response\nfrom onyx.chat.emitter import Emitter\nfrom onyx.chat.llm_loop import construct_message_history\nfrom onyx.chat.llm_step import run_llm_step\nfrom onyx.chat.llm_step import run_llm_step_pkt_generator\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import LlmStepResult\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.deep_research.dr_mock_tools import (\n    get_research_agent_additional_tool_definitions,\n)\nfrom onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TASK_KEY\nfrom onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_MESSAGE\nfrom onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_TOKEN_COUNT\nfrom onyx.deep_research.models import CombinedResearchAgentCallResult\nfrom onyx.deep_research.models import ResearchAgentCallResult\nfrom onyx.deep_research.utils import check_special_tool_calls\nfrom onyx.deep_research.utils import create_think_tool_token_processor\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import ToolChoiceOptions\nfrom onyx.prompts.deep_research.dr_tool_prompts import OPEN_URLS_TOOL_DESCRIPTION\nfrom onyx.prompts.deep_research.dr_tool_prompts import (\n    OPEN_URLS_TOOL_DESCRIPTION_REASONING,\n)\nfrom onyx.prompts.deep_research.dr_tool_prompts import WEB_SEARCH_TOOL_DESCRIPTION\nfrom onyx.prompts.deep_research.research_agent import MAX_RESEARCH_CYCLES\nfrom onyx.prompts.deep_research.research_agent import OPEN_URL_REMINDER_RESEARCH_AGENT\nfrom onyx.prompts.deep_research.research_agent import RESEARCH_AGENT_PROMPT\nfrom onyx.prompts.deep_research.research_agent import RESEARCH_AGENT_PROMPT_REASONING\nfrom onyx.prompts.deep_research.research_agent import RESEARCH_REPORT_PROMPT\nfrom onyx.prompts.deep_research.research_agent import USER_REPORT_QUERY\nfrom onyx.prompts.prompt_utils import get_current_llm_day_time\nfrom onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import IntermediateReportCitedDocs\nfrom onyx.server.query_and_chat.streaming_models import IntermediateReportDelta\nfrom onyx.server.query_and_chat.streaming_models import IntermediateReportStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import PacketException\nfrom onyx.server.query_and_chat.streaming_models import ResearchAgentStart\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.server.query_and_chat.streaming_models import StreamingType\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ToolCallInfo\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.tools.tool_runner import run_tool_calls\nfrom onyx.tools.utils import generate_tools_description\nfrom onyx.tracing.framework.create import function_span\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\n\n# 30 minute timeout per research agent\nRESEARCH_AGENT_TIMEOUT_SECONDS = 30 * 60\nRESEARCH_AGENT_TIMEOUT_MESSAGE = \"Research Agent timed out after 30 minutes\"\n# 12 minute timeout before forcing intermediate report generation\nRESEARCH_AGENT_FORCE_REPORT_SECONDS = 12 * 60\n# May be good to experiment with this, empirically reports of around 5,000 tokens are pretty good.\nMAX_INTERMEDIATE_REPORT_LENGTH_TOKENS = 10000\n\n\ndef generate_intermediate_report(\n    research_topic: str,\n    history: list[ChatMessageSimple],\n    llm: LLM,\n    token_counter: Callable[[str], int],\n    citation_processor: DynamicCitationProcessor,\n    user_identity: LLMUserIdentity | None,\n    emitter: Emitter,\n    placement: Placement,\n) -> str:\n    # NOTE: This step outputs a lot of tokens and has been observed to run for more than 10 minutes in a nontrivial percentage of\n    # research tasks. This is also model / inference provider dependent.\n    with function_span(\"generate_intermediate_report\") as span:\n        span.span_data.input = (\n            f\"research_topic={research_topic}, history_length={len(history)}\"\n        )\n        # Having the state container here to handle the tokens and not passed through means there is no way to\n        # get partial saves of the report. Arguably this is not useful anyway so not going to implement partial saves.\n        state_container = ChatStateContainer()\n        system_prompt = ChatMessageSimple(\n            message=RESEARCH_REPORT_PROMPT,\n            token_count=token_counter(RESEARCH_REPORT_PROMPT),\n            message_type=MessageType.SYSTEM,\n        )\n\n        reminder_str = USER_REPORT_QUERY.format(research_topic=research_topic)\n        reminder_message = ChatMessageSimple(\n            message=reminder_str,\n            token_count=token_counter(reminder_str),\n            message_type=MessageType.USER,\n        )\n\n        research_history = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=history,\n            reminder_message=reminder_message,\n            context_files=None,\n            available_tokens=llm.config.max_input_tokens,\n        )\n\n        intermediate_report_generator = run_llm_step_pkt_generator(\n            history=research_history,\n            tool_definitions=[],\n            tool_choice=ToolChoiceOptions.NONE,\n            llm=llm,\n            placement=placement,\n            citation_processor=citation_processor,\n            state_container=state_container,\n            reasoning_effort=ReasoningEffort.LOW,\n            final_documents=None,\n            user_identity=user_identity,\n            max_tokens=MAX_INTERMEDIATE_REPORT_LENGTH_TOKENS,\n            use_existing_tab_index=True,\n            is_deep_research=True,\n            timeout_override=300,  # 5 minute read timeout for long report generation\n        )\n\n        while True:\n            try:\n                packet = next(intermediate_report_generator)\n                # Translate AgentResponseStart/Delta packets to IntermediateReportStart/Delta\n                # Use original placement consistently for all packets\n                if isinstance(packet.obj, AgentResponseStart):\n                    emitter.emit(\n                        Packet(\n                            placement=placement,\n                            obj=IntermediateReportStart(),\n                        )\n                    )\n                elif isinstance(packet.obj, AgentResponseDelta):\n                    emitter.emit(\n                        Packet(\n                            placement=placement,\n                            obj=IntermediateReportDelta(content=packet.obj.content),\n                        )\n                    )\n                else:\n                    # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)\n                    # Also use original placement to keep everything in the same group\n                    emitter.emit(\n                        Packet(\n                            placement=placement,\n                            obj=packet.obj,\n                        )\n                    )\n            except StopIteration as e:\n                llm_step_result, _ = e.value\n                # Use original placement for completion packets\n                emitter.emit(\n                    Packet(\n                        placement=placement,\n                        obj=IntermediateReportCitedDocs(\n                            cited_docs=list(\n                                citation_processor.get_seen_citations().values()\n                            )\n                        ),\n                    )\n                )\n                emitter.emit(\n                    Packet(\n                        placement=placement,\n                        obj=SectionEnd(),\n                    )\n                )\n                break\n\n        llm_step_result = cast(LlmStepResult, llm_step_result)\n\n        final_report = llm_step_result.answer\n        span.span_data.output = final_report if final_report else None\n        if final_report is None:\n            raise ValueError(\n                f\"LLM failed to generate a report for research task: {research_topic}\"\n            )\n\n        return final_report\n\n\ndef run_research_agent_call(\n    research_agent_call: ToolCallKickoff,\n    parent_tool_call_id: str,\n    tools: list[Tool],\n    emitter: Emitter,\n    state_container: ChatStateContainer,\n    llm: LLM,\n    is_reasoning_model: bool,\n    token_counter: Callable[[str], int],\n    user_identity: LLMUserIdentity | None,\n) -> ResearchAgentCallResult | None:\n    turn_index = research_agent_call.placement.turn_index\n    tab_index = research_agent_call.placement.tab_index\n    with function_span(\"research_agent\") as span:\n        span.span_data.input = str(research_agent_call.tool_args)\n        try:\n            # Track start time for timeout-based forced report generation\n            start_time = time.monotonic()\n\n            # Used to track citations while keeping original citation markers in intermediate reports.\n            # KEEP_MARKERS preserves citation markers like [1], [2] in the text unchanged\n            # while tracking which documents were cited via get_seen_citations().\n            # This allows collapse_citations() to later renumber them in the final report.\n            citation_processor = DynamicCitationProcessor(\n                citation_mode=CitationMode.KEEP_MARKERS\n            )\n\n            research_cycle_count = 0\n            llm_cycle_count = 0\n            current_tools = tools\n            reasoning_cycles = 0\n            just_ran_web_search = False\n\n            # If this fails to parse, we can't run the loop anyway, let this one fail in that case\n            research_topic = research_agent_call.tool_args[RESEARCH_AGENT_TASK_KEY]\n\n            emitter.emit(\n                Packet(\n                    placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                    obj=ResearchAgentStart(research_task=research_topic),\n                )\n            )\n\n            initial_user_message = ChatMessageSimple(\n                message=research_topic,\n                token_count=token_counter(research_topic),\n                message_type=MessageType.USER,\n            )\n            msg_history: list[ChatMessageSimple] = [initial_user_message]\n\n            citation_mapping: dict[int, str] = {}\n            most_recent_reasoning: str | None = None\n            while research_cycle_count <= MAX_RESEARCH_CYCLES:\n                # Check if we've exceeded the time limit - if so, skip LLM and generate report\n                elapsed_seconds = time.monotonic() - start_time\n                if elapsed_seconds > RESEARCH_AGENT_FORCE_REPORT_SECONDS:\n                    logger.info(\n                        f\"Research agent exceeded {RESEARCH_AGENT_FORCE_REPORT_SECONDS}s \"\n                        f\"(elapsed: {elapsed_seconds:.1f}s), forcing intermediate report generation\"\n                    )\n                    break\n\n                if research_cycle_count == MAX_RESEARCH_CYCLES:\n                    # Auto-generate report on last cycle\n                    logger.debug(\"Auto-generating intermediate report on last cycle.\")\n                    break\n\n                tools_by_name = {tool.name: tool for tool in current_tools}\n\n                tools_description = generate_tools_description(current_tools)\n\n                internal_search_tip = (\n                    INTERNAL_SEARCH_GUIDANCE\n                    if any(isinstance(tool, SearchTool) for tool in current_tools)\n                    else \"\"\n                )\n                web_search_tip = (\n                    WEB_SEARCH_TOOL_DESCRIPTION\n                    if any(isinstance(tool, WebSearchTool) for tool in current_tools)\n                    else \"\"\n                )\n                open_urls_tip = (\n                    OPEN_URLS_TOOL_DESCRIPTION\n                    if any(isinstance(tool, OpenURLTool) for tool in current_tools)\n                    else \"\"\n                )\n                if is_reasoning_model and open_urls_tip:\n                    open_urls_tip = OPEN_URLS_TOOL_DESCRIPTION_REASONING\n\n                system_prompt_template = (\n                    RESEARCH_AGENT_PROMPT_REASONING\n                    if is_reasoning_model\n                    else RESEARCH_AGENT_PROMPT\n                )\n                system_prompt_str = system_prompt_template.format(\n                    available_tools=tools_description,\n                    current_datetime=get_current_llm_day_time(full_sentence=False),\n                    current_cycle_count=research_cycle_count,\n                    optional_internal_search_tool_description=internal_search_tip,\n                    optional_web_search_tool_description=web_search_tip,\n                    optional_open_url_tool_description=open_urls_tip,\n                )\n\n                system_prompt = ChatMessageSimple(\n                    message=system_prompt_str,\n                    token_count=token_counter(system_prompt_str),\n                    message_type=MessageType.SYSTEM,\n                )\n\n                if just_ran_web_search:\n                    reminder_message = ChatMessageSimple(\n                        message=OPEN_URL_REMINDER_RESEARCH_AGENT,\n                        token_count=100,\n                        message_type=MessageType.USER,\n                    )\n                else:\n                    reminder_message = None\n\n                constructed_history = construct_message_history(\n                    system_prompt=system_prompt,\n                    custom_agent_prompt=None,\n                    simple_chat_history=msg_history,\n                    reminder_message=reminder_message,\n                    context_files=None,\n                    available_tokens=llm.config.max_input_tokens,\n                )\n\n                research_agent_tools = get_research_agent_additional_tool_definitions(\n                    include_think_tool=not is_reasoning_model\n                )\n                # Use think tool processor for non-reasoning models to convert\n                # think_tool calls to reasoning content (same as dr_loop.py)\n                custom_processor = (\n                    create_think_tool_token_processor()\n                    if not is_reasoning_model\n                    else None\n                )\n\n                llm_step_result, has_reasoned = run_llm_step(\n                    emitter=emitter,\n                    history=constructed_history,\n                    tool_definitions=[tool.tool_definition() for tool in current_tools]\n                    + research_agent_tools,\n                    tool_choice=ToolChoiceOptions.REQUIRED,\n                    llm=llm,\n                    placement=Placement(\n                        turn_index=turn_index,\n                        tab_index=tab_index,\n                        sub_turn_index=llm_cycle_count + reasoning_cycles,\n                    ),\n                    citation_processor=None,\n                    state_container=None,\n                    reasoning_effort=ReasoningEffort.LOW,\n                    final_documents=None,\n                    user_identity=user_identity,\n                    custom_token_processor=custom_processor,\n                    use_existing_tab_index=True,\n                    is_deep_research=True,\n                    # In case the model is tripped up by the long context and gets into an endless loop of\n                    # things like null tokens, we set a max token limit here. The call will likely not be valid\n                    # in these situations but it at least allows a chance of recovery. None of the tool calls should\n                    # be this long.\n                    max_tokens=1000,\n                )\n                if has_reasoned:\n                    reasoning_cycles += 1\n\n                tool_responses: list[ToolResponse] = []\n                tool_calls = llm_step_result.tool_calls or []\n\n                # TODO handle the restriction of only 1 tool call type per turn\n                # This is a problem right now because of the Placement system not allowing for\n                # differentiating sub-tool calls.\n                # Filter tool calls to only include the first tool type used\n                # This prevents mixing different tool types in the same batch\n                if tool_calls:\n                    first_tool_type = tool_calls[0].tool_name\n                    tool_calls = [\n                        tc for tc in tool_calls if tc.tool_name == first_tool_type\n                    ]\n\n                just_ran_web_search = False\n\n                special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)\n                if special_tool_calls.generate_report_tool_call:\n                    final_report = generate_intermediate_report(\n                        research_topic=research_topic,\n                        history=msg_history,\n                        llm=llm,\n                        token_counter=token_counter,\n                        citation_processor=citation_processor,\n                        user_identity=user_identity,\n                        emitter=emitter,\n                        placement=Placement(\n                            turn_index=turn_index,\n                            tab_index=tab_index,\n                        ),\n                    )\n                    span.span_data.output = final_report if final_report else None\n                    return ResearchAgentCallResult(\n                        intermediate_report=final_report,\n                        citation_mapping=citation_processor.get_seen_citations(),\n                    )\n                elif special_tool_calls.think_tool_call:\n                    think_tool_call = special_tool_calls.think_tool_call\n                    tool_call_message = think_tool_call.to_msg_str()\n                    tool_call_token_count = token_counter(tool_call_message)\n\n                    with function_span(\"think_tool\") as think_span:\n                        think_span.span_data.input = str(think_tool_call.tool_args)\n\n                        # Create ASSISTANT message with tool_calls (OpenAI parallel format)\n                        think_tool_simple = ToolCallSimple(\n                            tool_call_id=think_tool_call.tool_call_id,\n                            tool_name=think_tool_call.tool_name,\n                            tool_arguments=think_tool_call.tool_args,\n                            token_count=tool_call_token_count,\n                        )\n                        think_assistant_msg = ChatMessageSimple(\n                            message=\"\",\n                            token_count=tool_call_token_count,\n                            message_type=MessageType.ASSISTANT,\n                            tool_calls=[think_tool_simple],\n                            image_files=None,\n                        )\n                        msg_history.append(think_assistant_msg)\n\n                        think_tool_response_msg = ChatMessageSimple(\n                            message=THINK_TOOL_RESPONSE_MESSAGE,\n                            token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,\n                            message_type=MessageType.TOOL_CALL_RESPONSE,\n                            tool_call_id=think_tool_call.tool_call_id,\n                            image_files=None,\n                        )\n                        msg_history.append(think_tool_response_msg)\n                        think_span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE\n                    reasoning_cycles += 1\n                    most_recent_reasoning = llm_step_result.reasoning\n                    continue\n                else:\n                    parallel_tool_call_results = run_tool_calls(\n                        tool_calls=tool_calls,\n                        tools=current_tools,\n                        message_history=msg_history,\n                        user_memory_context=None,\n                        user_info=None,\n                        citation_mapping=citation_mapping,\n                        next_citation_num=citation_processor.get_next_citation_number(),\n                        # Packets currently cannot differentiate between parallel calls in a nested level\n                        # so we just cannot show parallel calls in the UI. This should not happen for deep research anyhow.\n                        max_concurrent_tools=1,\n                        # May be better to not do this step, hard to say, needs to be tested\n                        skip_search_query_expansion=False,\n                        url_snippet_map=extract_url_snippet_map(\n                            [\n                                search_doc\n                                for tool_call in state_container.get_tool_calls()\n                                if tool_call.search_docs\n                                for search_doc in tool_call.search_docs\n                            ]\n                        ),\n                    )\n                    tool_responses = parallel_tool_call_results.tool_responses\n                    citation_mapping = (\n                        parallel_tool_call_results.updated_citation_mapping\n                    )\n\n                    if tool_calls and not tool_responses:\n                        failure_messages = create_tool_call_failure_messages(\n                            tool_calls, token_counter\n                        )\n                        msg_history.extend(failure_messages)\n\n                        # If there is a failure like this, we still increment to avoid potential infinite loops\n                        research_cycle_count += 1\n                        llm_cycle_count += 1\n                        continue\n\n                    # Filter to only responses with valid tool_call references\n                    valid_tool_responses = [\n                        tr for tr in tool_responses if tr.tool_call is not None\n                    ]\n\n                    # Build ONE ASSISTANT message with all tool calls (OpenAI parallel format)\n                    if valid_tool_responses:\n                        tool_calls_simple: list[ToolCallSimple] = []\n                        for tool_response in valid_tool_responses:\n                            tc = tool_response.tool_call\n                            assert tc is not None  # Already filtered above\n                            tool_call_message = tc.to_msg_str()\n                            tool_call_token_count = token_counter(tool_call_message)\n                            tool_calls_simple.append(\n                                ToolCallSimple(\n                                    tool_call_id=tc.tool_call_id,\n                                    tool_name=tc.tool_name,\n                                    tool_arguments=tc.tool_args,\n                                    token_count=tool_call_token_count,\n                                )\n                            )\n\n                        total_tool_call_tokens = sum(\n                            tc.token_count for tc in tool_calls_simple\n                        )\n                        assistant_with_tools = ChatMessageSimple(\n                            message=\"\",\n                            token_count=total_tool_call_tokens,\n                            message_type=MessageType.ASSISTANT,\n                            tool_calls=tool_calls_simple,\n                            image_files=None,\n                        )\n                        msg_history.append(assistant_with_tools)\n\n                    # Now add tool call info and TOOL_CALL_RESPONSE messages for each\n                    for tool_response in valid_tool_responses:\n                        tc = tool_response.tool_call\n                        assert tc is not None  # Already filtered above\n                        tool_call_tab_index = tc.placement.tab_index\n\n                        tool = tools_by_name.get(tc.tool_name)\n                        if not tool:\n                            raise ValueError(\n                                f\"Tool '{tc.tool_name}' not found in tools list\"\n                            )\n\n                        search_docs = None\n                        displayed_docs = None\n                        if isinstance(tool_response.rich_response, SearchDocsResponse):\n                            search_docs = tool_response.rich_response.search_docs\n                            displayed_docs = tool_response.rich_response.displayed_docs\n\n                            # Add ALL search docs to state container for DB persistence\n                            if search_docs:\n                                state_container.add_search_docs(search_docs)\n\n                            # This is used for the Open URL reminder in the next cycle\n                            # only do this if the web search tool yielded results\n                            if search_docs and tc.tool_name == WebSearchTool.NAME:\n                                just_ran_web_search = True\n\n                        # Makes sure the citation processor is updated with all the possible docs\n                        # and citation numbers so that it's populated when passed in to report generation.\n                        update_citation_processor_from_tool_response(\n                            tool_response=tool_response,\n                            citation_processor=citation_processor,\n                        )\n\n                        # Research Agent is a top level tool call but the tools called by the research\n                        # agent are sub-tool calls.\n                        tool_call_info = ToolCallInfo(\n                            parent_tool_call_id=parent_tool_call_id,\n                            # At the DB save level, there is only a turn index, no sub-turn etc.\n                            # This is implied by the parent tool call's turn index and the depth\n                            # of the tree traversal.\n                            turn_index=llm_cycle_count + reasoning_cycles,\n                            tab_index=tool_call_tab_index,\n                            tool_name=tc.tool_name,\n                            tool_call_id=tc.tool_call_id,\n                            tool_id=tool.id,\n                            reasoning_tokens=llm_step_result.reasoning\n                            or most_recent_reasoning,\n                            tool_call_arguments=tc.tool_args,\n                            tool_call_response=tool_response.llm_facing_response,\n                            search_docs=displayed_docs or search_docs,\n                            generated_images=None,\n                        )\n                        state_container.add_tool_call(tool_call_info)\n\n                        tool_response_message = tool_response.llm_facing_response\n                        tool_response_token_count = token_counter(tool_response_message)\n\n                        tool_response_msg = ChatMessageSimple(\n                            message=tool_response_message,\n                            token_count=tool_response_token_count,\n                            message_type=MessageType.TOOL_CALL_RESPONSE,\n                            tool_call_id=tc.tool_call_id,\n                            image_files=None,\n                        )\n                        msg_history.append(tool_response_msg)\n\n                # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns\n                most_recent_reasoning = None\n                llm_cycle_count += 1\n                research_cycle_count += 1\n\n            # If we've run out of cycles, just try to generate a report from everything so far\n            final_report = generate_intermediate_report(\n                research_topic=research_topic,\n                history=msg_history,\n                llm=llm,\n                token_counter=token_counter,\n                citation_processor=citation_processor,\n                user_identity=user_identity,\n                emitter=emitter,\n                placement=Placement(\n                    turn_index=turn_index,\n                    tab_index=tab_index,\n                ),\n            )\n            span.span_data.output = final_report if final_report else None\n            return ResearchAgentCallResult(\n                intermediate_report=final_report,\n                citation_mapping=citation_processor.get_seen_citations(),\n            )\n\n        except Exception as e:\n            logger.error(f\"Error running research agent call: {e}\")\n            emitter.emit(\n                Packet(\n                    placement=Placement(turn_index=turn_index, tab_index=tab_index),\n                    obj=PacketException(type=StreamingType.ERROR.value, exception=e),\n                )\n            )\n            return None\n\n\ndef _on_research_agent_timeout(\n    index: int,  # noqa: ARG001\n    func: Callable[..., Any],  # noqa: ARG001\n    args: tuple[Any, ...],\n) -> ResearchAgentCallResult:\n    \"\"\"Callback for handling research agent timeouts.\n\n    Returns a ResearchAgentCallResult with the timeout message so the research\n    can continue with other agents.\n    \"\"\"\n    research_agent_call: ToolCallKickoff = args[0]  # First arg\n    research_task = research_agent_call.tool_args.get(\n        RESEARCH_AGENT_TASK_KEY, \"unknown\"\n    )\n    logger.warning(\n        f\"Research agent timed out after {RESEARCH_AGENT_TIMEOUT_SECONDS} seconds for task: {research_task}\"\n    )\n    return ResearchAgentCallResult(\n        intermediate_report=RESEARCH_AGENT_TIMEOUT_MESSAGE,\n        citation_mapping={},\n    )\n\n\ndef run_research_agent_calls(\n    research_agent_calls: list[ToolCallKickoff],\n    parent_tool_call_ids: list[str],\n    tools: list[Tool],\n    emitter: Emitter,\n    state_container: ChatStateContainer,\n    llm: LLM,\n    is_reasoning_model: bool,\n    token_counter: Callable[[str], int],\n    citation_mapping: CitationMapping,\n    user_identity: LLMUserIdentity | None = None,\n) -> CombinedResearchAgentCallResult:\n    # Run all research agent calls in parallel with timeout\n    functions_with_args = [\n        (\n            run_research_agent_call,\n            (\n                research_agent_call,\n                parent_tool_call_id,\n                tools,\n                emitter,\n                state_container,\n                llm,\n                is_reasoning_model,\n                token_counter,\n                user_identity,\n            ),\n        )\n        for research_agent_call, parent_tool_call_id in zip(\n            research_agent_calls, parent_tool_call_ids\n        )\n    ]\n\n    research_agent_call_results = run_functions_tuples_in_parallel(\n        functions_with_args,\n        allow_failures=False,\n        # Note: This simply allows the main thread to continue with an error message\n        # It does not kill the background thread which may still write to the state objects passed to it\n        # This is because forcefully killing Python threads is very dangerous\n        timeout=RESEARCH_AGENT_TIMEOUT_SECONDS,\n        timeout_callback=_on_research_agent_timeout,\n    )\n\n    updated_citation_mapping = citation_mapping\n    updated_answers: list[str | None] = []\n\n    for result in research_agent_call_results:\n        if result is None:\n            updated_answers.append(None)\n            continue\n\n        # Use collapse_citations to renumber citations in the text and merge mappings.\n        # Since we use KEEP_MARKERS mode, the intermediate reports have original citation\n        # markers like [1], [2] which need to be renumbered for the combined report.\n        updated_answer, updated_citation_mapping = collapse_citations(\n            answer_text=result.intermediate_report,\n            existing_citation_mapping=updated_citation_mapping,\n            new_citation_mapping=result.citation_mapping,\n        )\n        updated_answers.append(updated_answer)\n\n    return CombinedResearchAgentCallResult(\n        intermediate_reports=updated_answers,\n        citation_mapping=updated_citation_mapping,\n    )\n\n\nif __name__ == \"__main__\":\n    from uuid import uuid4\n\n    from onyx.chat.chat_state import ChatStateContainer\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n    from onyx.db.engine.sql_engine import SqlEngine\n    from onyx.db.models import User\n    from onyx.db.persona import get_default_behavior_persona\n    from onyx.llm.factory import get_default_llm\n    from onyx.llm.factory import get_llm_token_counter\n    from onyx.llm.utils import model_is_reasoning_model\n    from onyx.server.query_and_chat.placement import Placement\n    from onyx.tools.models import ToolCallKickoff\n    from onyx.tools.tool_constructor import construct_tools\n\n    # === CONFIGURE YOUR RESEARCH PROMPT HERE ===\n    RESEARCH_PROMPT = \"Your test research task.\"\n\n    SqlEngine.set_app_name(\"research_agent_script\")\n    SqlEngine.init_engine(pool_size=5, max_overflow=5)\n\n    with get_session_with_current_tenant() as db_session:\n        llm = get_default_llm()\n        token_counter = get_llm_token_counter(llm)\n        is_reasoning = model_is_reasoning_model(\n            llm.config.model_name, llm.config.model_provider\n        )\n\n        persona = get_default_behavior_persona(db_session, eager_load_for_tools=True)\n        if persona is None:\n            raise ValueError(\"No default persona found\")\n\n        user = db_session.query(User).first()\n        if user is None:\n            raise ValueError(\"No users found in database. Please create a user first.\")\n\n        emitter_queue: queue.Queue = queue.Queue()\n        emitter = Emitter(merged_queue=emitter_queue)\n        state_container = ChatStateContainer()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=emitter,\n            user=user,\n            llm=llm,\n        )\n        tools = [\n            tool\n            for tool_list in tool_dict.values()\n            for tool in tool_list\n            if tool.name != \"generate_image\"\n        ]\n\n        logger.info(f\"Running research agent with prompt: {RESEARCH_PROMPT}\")\n        logger.info(f\"LLM: {llm.config.model_provider}/{llm.config.model_name}\")\n        logger.info(f\"Tools: {[t.name for t in tools]}\")\n\n        result = run_research_agent_call(\n            research_agent_call=ToolCallKickoff(\n                tool_name=\"research_agent\",\n                tool_args={RESEARCH_AGENT_TASK_KEY: RESEARCH_PROMPT},\n                tool_call_id=str(uuid4()),\n                placement=Placement(turn_index=0, tab_index=0),\n            ),\n            parent_tool_call_id=str(uuid4()),\n            tools=tools,\n            emitter=emitter,\n            state_container=state_container,\n            llm=llm,\n            is_reasoning_model=is_reasoning,\n            token_counter=token_counter,\n            user_identity=None,\n        )\n\n        if result is None:\n            logger.error(\"Research agent returned no result\")\n        else:\n            print(\"\\n\" + \"=\" * 80)\n            print(\"RESEARCH AGENT RESULT\")\n            print(\"=\" * 80)\n            print(result.intermediate_report)\n            print(\"=\" * 80)\n            print(f\"Citations: {result.citation_mapping}\")\n            print(f\"Total packets emitted: {emitter_queue.qsize()}\")\n"
  },
  {
    "path": "backend/onyx/tools/interface.py",
    "content": "from __future__ import annotations\n\nimport abc\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TypeVar\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import ToolResponse\n\n\nTOverride = TypeVar(\"TOverride\")\n\n\nclass Tool(abc.ABC, Generic[TOverride]):\n    def __init__(self, emitter: Emitter | None = None):\n        \"\"\"Initialize tool with optional emitter. Emitter can be set later via set_emitter().\"\"\"\n        self._emitter = emitter\n\n    @property\n    def emitter(self) -> Emitter:\n        \"\"\"Get the emitter. Raises if not set.\"\"\"\n        if self._emitter is None:\n            raise ValueError(\n                f\"Emitter not set on tool {self.name}. Call set_emitter() first.\"\n            )\n        return self._emitter\n\n    @property\n    @abc.abstractmethod\n    def id(self) -> int:\n        raise NotImplementedError\n\n    @property\n    @abc.abstractmethod\n    def name(self) -> str:\n        \"\"\"Should be the name of the tool passed to the LLM as the json field\"\"\"\n        raise NotImplementedError\n\n    @property\n    @abc.abstractmethod\n    def description(self) -> str:\n        raise NotImplementedError\n\n    @property\n    @abc.abstractmethod\n    def display_name(self) -> str:\n        \"\"\"Should be the name of the tool displayed to the user\"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    def is_available(cls, db_session: \"Session\") -> bool:  # noqa: ARG003\n        \"\"\"\n        Whether this tool is currently available for use given\n        the state of the system. Default: available.\n        Subclasses may override to perform dynamic checks.\n\n        Args:\n            db_session: Database session for tools that need DB access\n        \"\"\"\n        return True\n\n    @abc.abstractmethod\n    def tool_definition(self) -> dict:\n        \"\"\"\n        This is the full definition of the tool with all of the parameters, settings, etc.\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def emit_start(self, placement: Placement) -> None:\n        \"\"\"\n        Emit the start packet for this tool. Each tool implementation should\n        emit its specific start packet type.\n\n        Args:\n            turn_index: The turn index for this tool execution\n            tab_index: The tab index for parallel tool calls\n        \"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def run(\n        self,\n        placement: Placement,\n        # Specific tool override arguments that are not provided by the LLM\n        # For example when calling the internal search tool, the original user query is passed along too (but not by the LLM)\n        override_kwargs: TOverride,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        raise NotImplementedError\n\n    @classmethod\n    def should_emit_argument_deltas(cls) -> bool:\n        return False\n"
  },
  {
    "path": "backend/onyx/tools/models.py",
    "content": "from __future__ import annotations\n\nimport json\nfrom enum import Enum\nfrom typing import Any\nfrom typing import Literal\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\nfrom pydantic import model_validator\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.db.memory import UserMemoryContext\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\nfrom onyx.tools.tool_implementations.images.models import FinalImageGenerationResponse\nfrom onyx.tools.tool_implementations.memory.models import MemoryToolResponse\n\n\nTOOL_CALL_MSG_FUNC_NAME = \"function_name\"\nTOOL_CALL_MSG_ARGUMENTS = \"arguments\"\n\n\nclass ToolCallException(Exception):\n    \"\"\"Exception raised for errors during tool calls.\"\"\"\n\n    def __init__(self, message: str, llm_facing_message: str):\n        # This is the full error message which is used for tracing\n        super().__init__(message)\n        # LLM made tool calls are acceptable and not flow terminating, this is the message\n        # which will populate the tool response.\n        self.llm_facing_message = llm_facing_message\n\n\nclass ToolExecutionException(Exception):\n    \"\"\"Exception raise for errors during tool execution.\"\"\"\n\n    def __init__(self, message: str, emit_error_packet: bool = False):\n        super().__init__(message)\n\n        self.emit_error_packet = emit_error_packet\n\n\nclass SearchToolUsage(str, Enum):\n    DISABLED = \"disabled\"\n    ENABLED = \"enabled\"\n    AUTO = \"auto\"\n\n\nclass CustomToolUserFileSnapshot(BaseModel):\n    file_ids: list[str]  # References to saved images or CSVs\n\n\nclass CustomToolCallSummary(BaseModel):\n    tool_name: str\n    response_type: str  # e.g., 'json', 'image', 'csv', 'graph'\n    tool_result: Any  # The response data\n    error: CustomToolErrorInfo | None = None\n\n\nclass ToolCallKickoff(BaseModel):\n    tool_call_id: str\n    tool_name: str\n    tool_args: dict[str, Any]\n\n    placement: Placement\n\n    def to_msg_str(self) -> str:\n        return json.dumps(\n            {\n                TOOL_CALL_MSG_FUNC_NAME: self.tool_name,\n                TOOL_CALL_MSG_ARGUMENTS: self.tool_args,\n            }\n        )\n\n\nclass ToolResponse(BaseModel):\n    # Rich response is for the objects that are returned but not directly used by the LLM\n    # these typically need to be saved to the database to load things in the UI (usually both)\n    rich_response: (\n        # This comes from image generation, image needs to be saved and the packet about it's location needs to be emitted\n        FinalImageGenerationResponse\n        # This comes from internal search / web search, search docs need to be saved, already emitted by the tool\n        | SearchDocsResponse\n        # This comes from the memory tool, memory needs to be persisted to the database\n        | MemoryToolResponse\n        # This comes from open url, web content needs to be saved, maybe this can be consolidated too\n        # | WebContentResponse\n        # This comes from custom tools, tool result needs to be saved\n        | CustomToolCallSummary\n        # This comes from code interpreter, carries generated files\n        | PythonToolRichResponse\n        # If the rich response is a string, this is what's saved to the tool call in the DB\n        | str\n        | None  # If nothing needs to be persisted outside of the string value passed to the LLM\n    )\n    # This is the final string that needs to be wrapped in a tool call response message and concatenated to the history\n    llm_facing_response: str\n    # The original tool call that triggered this response - set by tool_runner\n    # The response is first created by the tool runner, which does not need to be aware of things like the tool_call_id\n    # So this is set after the response is created by the tool runner\n    tool_call: ToolCallKickoff | None = None\n\n\nclass ParallelToolCallResponse(BaseModel):\n    tool_responses: list[ToolResponse]\n    updated_citation_mapping: dict[int, str]\n\n\nclass ToolRunnerResponse(BaseModel):\n    tool_run_kickoff: ToolCallKickoff | None = None\n    tool_response: ToolResponse | None = None\n    tool_message_content: str | list[str | dict[str, Any]] | None = None\n\n    @model_validator(mode=\"after\")\n    def validate_tool_runner_response(self) -> \"ToolRunnerResponse\":\n        fields = [\"tool_response\", \"tool_message_content\", \"tool_run_kickoff\"]\n        provided = sum(1 for field in fields if getattr(self, field) is not None)\n\n        if provided != 1:\n            raise ValueError(\n                \"Exactly one of 'tool_response', 'tool_message_content', or 'tool_run_kickoff' must be provided\"\n            )\n\n        return self\n\n\nclass ToolCallFinalResult(ToolCallKickoff):\n    tool_result: Any = (\n        None  # we would like to use JSON_ro, but can't due to its recursive nature\n    )\n    # agentic additions; only need to set during agentic tool calls\n    level: int | None = None\n    level_question_num: int | None = None\n\n\nclass ChatMinimalTextMessage(BaseModel):\n    message: str\n    message_type: MessageType\n\n\nclass DynamicSchemaInfo(BaseModel):\n    chat_session_id: UUID | None\n    message_id: int | None\n\n\nclass WebSearchToolOverrideKwargs(BaseModel):\n    # To know what citation number to start at for constructing the string to the LLM\n    starting_citation_num: int\n\n\nclass OpenURLToolOverrideKwargs(BaseModel):\n    # To know what citation number to start at for constructing the string to the LLM\n    starting_citation_num: int\n    citation_mapping: dict[str, int]\n    url_snippet_map: dict[str, str]\n    max_urls: int = 10\n\n\n# None indicates that the default value should be used\nclass SearchToolOverrideKwargs(BaseModel):\n    # To know what citation number to start at for constructing the string to the LLM\n    starting_citation_num: int\n    # This is needed because the LLM won't be able to do a really detailed semantic query well\n    # without help and a specific custom prompt for this\n    original_query: str | None = None\n    message_history: list[ChatMinimalTextMessage] | None = None\n    user_memory_context: UserMemoryContext | None = None\n    user_info: str | None = None\n\n    # Used for tool calls after the first one but in the same chat turn. The reason for this is that if the initial pass through\n    # the custom flow did not yield good results, we don't want to go through it again. In that case, we defer entirely to the LLM\n    skip_query_expansion: bool = False\n\n    # Number of results to return in the richer object format so that it can be rendered in the UI\n    num_hits: int | None = NUM_RETURNED_HITS\n    # Number of chunks (token approx) to include in the string to the LLM\n    max_llm_chunks: int | None = MAX_CHUNKS_FED_TO_CHAT\n\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n\nclass ChatFile(BaseModel):\n    \"\"\"File from a chat session that can be passed to tools.\"\"\"\n\n    filename: str\n    content: bytes\n\n    model_config = ConfigDict(arbitrary_types_allowed=True)\n\n\nclass PythonToolRichResponse(BaseModel):\n    \"\"\"Rich response from the Python tool carrying generated files.\"\"\"\n\n    generated_files: list[PythonExecutionFile] = []\n\n\nclass PythonToolOverrideKwargs(BaseModel):\n    \"\"\"Override kwargs for the Python/Code Interpreter tool.\"\"\"\n\n    chat_files: list[ChatFile] = []\n\n\nclass ImageGenerationToolOverrideKwargs(BaseModel):\n    \"\"\"Override kwargs for image generation tool calls.\"\"\"\n\n    recent_generated_image_file_ids: list[str] = []\n\n\nclass SearchToolRunContext(BaseModel):\n    emitter: Emitter\n\n    model_config = {\"arbitrary_types_allowed\": True}\n\n\nclass ImageGenerationToolRunContext(BaseModel):\n    emitter: Emitter\n\n    model_config = {\"arbitrary_types_allowed\": True}\n\n\nclass CustomToolRunContext(BaseModel):\n    emitter: Emitter\n\n    model_config = {\"arbitrary_types_allowed\": True}\n\n\nclass MemoryToolResponseSnapshot(BaseModel):\n    memory_text: str\n    operation: Literal[\"add\", \"update\"]\n    memory_id: int | None = None\n    index: int | None = None\n\n\nclass ToolCallInfo(BaseModel):\n    # The parent_tool_call_id is the actual generated tool call id\n    # It is NOT the DB ID which often does not exist yet when the ToolCallInfo is created\n    # None if attached to the Chat Message directly\n    parent_tool_call_id: str | None\n    turn_index: int\n    tab_index: int\n    tool_name: str\n    tool_call_id: str\n    tool_id: int\n    reasoning_tokens: str | None\n    tool_call_arguments: dict[str, Any]\n    tool_call_response: str\n    search_docs: list[SearchDoc] | None = None\n    generated_images: list[GeneratedImage] | None = None\n    generated_files: list[PythonExecutionFile] | None = None\n\n\nCHAT_SESSION_ID_PLACEHOLDER = \"CHAT_SESSION_ID\"\nMESSAGE_ID_PLACEHOLDER = \"MESSAGE_ID\"\n\n\nclass BaseCiteableToolResult(BaseModel):\n    \"\"\"Base class for tool results that can be cited.\"\"\"\n\n    document_citation_number: int\n    unique_identifier_to_strip_away: str | None = None\n    type: str\n\n\nclass LlmInternalSearchResult(BaseCiteableToolResult):\n    \"\"\"Result from an internal search query\"\"\"\n\n    type: Literal[\"internal_search\"] = \"internal_search\"\n    title: str\n    excerpt: str\n    metadata: dict[str, Any]\n\n\nclass LlmWebSearchResult(BaseCiteableToolResult):\n    \"\"\"Result from a web search query\"\"\"\n\n    type: Literal[\"web_search\"] = \"web_search\"\n    url: str\n    title: str\n    snippet: str\n\n\nclass LlmOpenUrlResult(BaseCiteableToolResult):\n    \"\"\"Result from opening/fetching a URL\"\"\"\n\n    type: Literal[\"open_url\"] = \"open_url\"\n    content: str\n\n\nclass PythonExecutionFile(BaseModel):\n    \"\"\"File generated during Python execution\"\"\"\n\n    filename: str\n    file_link: str\n\n\nclass LlmPythonExecutionResult(BaseModel):\n    \"\"\"Result from Python code execution\"\"\"\n\n    type: Literal[\"python_execution\"] = \"python_execution\"\n\n    stdout: str\n    stderr: str\n    exit_code: int | None\n    timed_out: bool\n    generated_files: list[PythonExecutionFile]\n    error: str | None = None\n"
  },
  {
    "path": "backend/onyx/tools/tool_constructor.py",
    "content": "from typing import cast\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.oauth_token_manager import OAuthTokenManager\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.configs.model_configs import GEN_AI_TEMPERATURE\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import PersonaSearchInfo\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.mcp import get_all_mcp_tools_for_server\nfrom onyx.db.mcp import get_mcp_server_by_id\nfrom onyx.db.mcp import get_user_connection_config\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.oauth_config import get_oauth_config\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.tools.built_in_tools import get_built_in_tool_by_id\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import DynamicSchemaInfo\nfrom onyx.tools.models import SearchToolUsage\nfrom onyx.tools.tool_implementations.custom.custom_tool import (\n    build_custom_tools_from_openapi_schema_and_headers,\n)\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom onyx.tools.tool_implementations.mcp.mcp_tool import MCPTool\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import (\n    OpenURLTool,\n)\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import (\n    WebSearchTool,\n)\nfrom onyx.utils.headers import header_dict_to_header_list\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass SearchToolConfig(BaseModel):\n    user_selected_filters: BaseFilters | None = None\n    # Vespa metadata filters for overflowing user files.  These are NOT the\n    # IDs of the current project/persona — they are only set when the\n    # project's/persona's user files didn't fit in the LLM context window and\n    # must be found via vector DB search instead.\n    project_id_filter: int | None = None\n    persona_id_filter: int | None = None\n    bypass_acl: bool = False\n    additional_context: str | None = None\n    slack_context: SlackContext | None = None\n    enable_slack_search: bool = True\n\n\nclass FileReaderToolConfig(BaseModel):\n    # IDs from the ``user_file`` table (project / persona-attached files).\n    user_file_ids: list[UUID] = []\n    # IDs from the ``file_record`` table (chat-attached files).\n    chat_file_ids: list[UUID] = []\n\n\nclass CustomToolConfig(BaseModel):\n    chat_session_id: UUID | None = None\n    message_id: int | None = None\n    additional_headers: dict[str, str] | None = None\n    mcp_headers: dict[str, str] | None = None\n\n\ndef _get_image_generation_config(llm: LLM, db_session: Session) -> LLMConfig:\n    \"\"\"Get image generation LLM config from the default image generation configuration.\"\"\"\n    from onyx.db.image_generation import get_default_image_generation_config\n\n    default_config = get_default_image_generation_config(db_session)\n    if (\n        not default_config\n        or not default_config.model_configuration\n        or not default_config.model_configuration.llm_provider\n    ):\n        raise ValueError(\"No default image generation configuration found\")\n\n    llm_provider = default_config.model_configuration.llm_provider\n\n    return LLMConfig(\n        model_provider=llm_provider.provider,\n        model_name=default_config.model_configuration.name,\n        temperature=GEN_AI_TEMPERATURE,\n        api_key=(\n            llm_provider.api_key.get_value(apply_mask=False)\n            if llm_provider.api_key\n            else None\n        ),\n        api_base=llm_provider.api_base,\n        api_version=llm_provider.api_version,\n        deployment_name=llm_provider.deployment_name,\n        max_input_tokens=llm.config.max_input_tokens,\n        custom_config=llm_provider.custom_config,\n    )\n\n\ndef construct_tools(\n    persona: Persona,\n    db_session: Session,\n    emitter: Emitter,\n    user: User,\n    llm: LLM,\n    search_tool_config: SearchToolConfig | None = None,\n    custom_tool_config: CustomToolConfig | None = None,\n    file_reader_tool_config: FileReaderToolConfig | None = None,\n    allowed_tool_ids: list[int] | None = None,\n    search_usage_forcing_setting: SearchToolUsage = SearchToolUsage.AUTO,\n) -> dict[int, list[Tool]]:\n    \"\"\"Constructs tools based on persona configuration and available APIs.\n\n    Will simply skip tools that are not allowed/available.\n\n    Callers must supply a persona with ``tools``, ``document_sets``,\n    ``attached_documents``, and ``hierarchy_nodes`` already eager-loaded\n    (e.g. via ``eager_load_persona=True`` or ``eager_load_for_tools=True``)\n    to avoid lazy SQL queries after the session may have been flushed.\"\"\"\n    tool_dict: dict[int, list[Tool]] = {}\n\n    # Log which tools are attached to the persona for debugging\n    persona_tool_names = [t.name for t in persona.tools]\n    logger.debug(\n        f\"Constructing tools for persona '{persona.name}' (id={persona.id}): {persona_tool_names}\"\n    )\n\n    mcp_tool_cache: dict[int, dict[int, MCPTool]] = {}\n    # Get user's OAuth token if available\n    user_oauth_token = None\n    if user.oauth_accounts:\n        user_oauth_token = user.oauth_accounts[0].access_token\n\n    search_settings = get_current_search_settings(db_session)\n    # This flow is for search so we do not get all indices.\n    document_index = get_default_document_index(search_settings, None, db_session)\n\n    def _build_search_tool(tool_id: int, config: SearchToolConfig) -> SearchTool:\n        persona_search_info = PersonaSearchInfo(\n            document_set_names=[ds.name for ds in persona.document_sets],\n            search_start_date=persona.search_start_date,\n            attached_document_ids=[doc.id for doc in persona.attached_documents],\n            hierarchy_node_ids=[node.id for node in persona.hierarchy_nodes],\n        )\n        return SearchTool(\n            tool_id=tool_id,\n            emitter=emitter,\n            user=user,\n            persona_search_info=persona_search_info,\n            llm=llm,\n            document_index=document_index,\n            user_selected_filters=config.user_selected_filters,\n            project_id_filter=config.project_id_filter,\n            persona_id_filter=config.persona_id_filter,\n            bypass_acl=config.bypass_acl,\n            slack_context=config.slack_context,\n            enable_slack_search=config.enable_slack_search,\n        )\n\n    added_search_tool = False\n    for db_tool_model in persona.tools:\n        # If allowed_tool_ids is specified, skip tools not in the allowed list\n        if allowed_tool_ids is not None and db_tool_model.id not in allowed_tool_ids:\n            continue\n\n        if db_tool_model.in_code_tool_id:\n            tool_cls = get_built_in_tool_by_id(db_tool_model.in_code_tool_id)\n\n            try:\n                tool_is_available = tool_cls.is_available(db_session)\n            except Exception:\n                logger.exception(\n                    \"Failed checking availability for tool %s\", tool_cls.__name__\n                )\n                tool_is_available = False\n\n            if not tool_is_available:\n                logger.debug(\n                    \"Skipping tool %s because it is not available\",\n                    tool_cls.__name__,\n                )\n                continue\n\n            # Handle Internal Search Tool\n            if tool_cls.__name__ == SearchTool.__name__:\n                added_search_tool = True\n                if search_usage_forcing_setting == SearchToolUsage.DISABLED:\n                    continue\n\n                if not search_tool_config:\n                    search_tool_config = SearchToolConfig()\n\n                tool_dict[db_tool_model.id] = [\n                    _build_search_tool(db_tool_model.id, search_tool_config)\n                ]\n\n            # Handle Image Generation Tool\n            elif tool_cls.__name__ == ImageGenerationTool.__name__:\n                img_generation_llm_config = _get_image_generation_config(\n                    llm, db_session\n                )\n\n                tool_dict[db_tool_model.id] = [\n                    ImageGenerationTool(\n                        image_generation_credentials=ImageGenerationProviderCredentials(\n                            api_key=cast(str, img_generation_llm_config.api_key),\n                            api_base=img_generation_llm_config.api_base,\n                            api_version=img_generation_llm_config.api_version,\n                            deployment_name=(\n                                img_generation_llm_config.deployment_name\n                                or img_generation_llm_config.model_name\n                            ),\n                            custom_config=img_generation_llm_config.custom_config,\n                        ),\n                        provider=img_generation_llm_config.model_provider,\n                        model=img_generation_llm_config.model_name,\n                        tool_id=db_tool_model.id,\n                        emitter=emitter,\n                    )\n                ]\n\n            # Handle Web Search Tool\n            elif tool_cls.__name__ == WebSearchTool.__name__:\n                try:\n                    tool_dict[db_tool_model.id] = [\n                        WebSearchTool(tool_id=db_tool_model.id, emitter=emitter)\n                    ]\n                except ValueError as e:\n                    logger.error(f\"Failed to initialize Internet Search Tool: {e}\")\n                    raise ValueError(\n                        \"Internet search tool requires a search provider API key, please contact your Onyx admin to get it added!\"\n                    )\n\n            # Handle Open URL Tool\n            elif tool_cls.__name__ == OpenURLTool.__name__:\n                try:\n                    tool_dict[db_tool_model.id] = [\n                        OpenURLTool(\n                            tool_id=db_tool_model.id,\n                            emitter=emitter,\n                            document_index=document_index,\n                            user=user,\n                        )\n                    ]\n                except RuntimeError as e:\n                    logger.error(f\"Failed to initialize Open URL Tool: {e}\")\n                    raise ValueError(\n                        \"Open URL tool requires a web content provider, please contact your Onyx admin to get it configured!\"\n                    )\n\n            # Handle Python/Code Interpreter Tool\n            elif tool_cls.__name__ == PythonTool.__name__:\n                tool_dict[db_tool_model.id] = [\n                    PythonTool(tool_id=db_tool_model.id, emitter=emitter)\n                ]\n\n            # Handle File Reader Tool\n            elif tool_cls.__name__ == FileReaderTool.__name__:\n                cfg = file_reader_tool_config or FileReaderToolConfig()\n                tool_dict[db_tool_model.id] = [\n                    FileReaderTool(\n                        tool_id=db_tool_model.id,\n                        emitter=emitter,\n                        user_file_ids=cfg.user_file_ids,\n                        chat_file_ids=cfg.chat_file_ids,\n                    )\n                ]\n\n            # Handle KG Tool\n            # TODO: disabling for now because it's broken in the refactor\n            # elif tool_cls.__name__ == KnowledgeGraphTool.__name__:\n\n            #     # skip the knowledge graph tool if KG is not enabled/exposed\n            #     kg_config = get_kg_config_settings()\n            #     if not kg_config.KG_ENABLED or not kg_config.KG_EXPOSED:\n            #         logger.debug(\"Knowledge Graph Tool is not enabled/exposed\")\n            #         continue\n\n            #     if persona.name != TMP_DRALPHA_PERSONA_NAME:\n            #         # TODO: remove this after the beta period\n            #         raise ValueError(\n            #             f\"The Knowledge Graph Tool should only be used by the '{TMP_DRALPHA_PERSONA_NAME}' Agent.\"\n            #         )\n            #     tool_dict[db_tool_model.id] = [\n            #         KnowledgeGraphTool(tool_id=db_tool_model.id)\n            #     ]\n\n        # Handle custom tools\n        elif db_tool_model.openapi_schema:\n            if not custom_tool_config:\n                custom_tool_config = CustomToolConfig()\n\n            # Determine which OAuth token to use\n            oauth_token_for_tool = None\n\n            # Priority 1: OAuth config (per-tool OAuth)\n            if db_tool_model.oauth_config_id:\n                if user.is_anonymous:\n                    logger.warning(\n                        f\"Anonymous user cannot use OAuth tool {db_tool_model.id}\"\n                    )\n                    continue\n                oauth_config = get_oauth_config(\n                    db_tool_model.oauth_config_id, db_session\n                )\n                if oauth_config:\n                    token_manager = OAuthTokenManager(oauth_config, user.id, db_session)\n                    oauth_token_for_tool = token_manager.get_valid_access_token()\n                    if not oauth_token_for_tool:\n                        logger.warning(\n                            f\"No valid OAuth token found for tool {db_tool_model.id} \"\n                            f\"with OAuth config {db_tool_model.oauth_config_id}\"\n                        )\n\n            # Priority 2: Passthrough auth (user's login OAuth token)\n            elif db_tool_model.passthrough_auth:\n                if user.is_anonymous:\n                    logger.warning(\n                        f\"Anonymous user cannot use passthrough auth tool {db_tool_model.id}\"\n                    )\n                    continue\n                oauth_token_for_tool = user_oauth_token\n\n            tool_dict[db_tool_model.id] = cast(\n                list[Tool],\n                build_custom_tools_from_openapi_schema_and_headers(\n                    tool_id=db_tool_model.id,\n                    openapi_schema=db_tool_model.openapi_schema,\n                    emitter=emitter,\n                    dynamic_schema_info=DynamicSchemaInfo(\n                        chat_session_id=custom_tool_config.chat_session_id,\n                        message_id=custom_tool_config.message_id,\n                    ),\n                    custom_headers=(db_tool_model.custom_headers or [])\n                    + (\n                        header_dict_to_header_list(\n                            custom_tool_config.additional_headers or {}\n                        )\n                    ),\n                    user_oauth_token=oauth_token_for_tool,\n                ),\n            )\n\n        # Handle MCP tools\n        elif db_tool_model.mcp_server_id:\n            if db_tool_model.mcp_server_id in mcp_tool_cache:\n                tool_dict[db_tool_model.id] = [\n                    mcp_tool_cache[db_tool_model.mcp_server_id][db_tool_model.id]\n                ]\n                continue\n\n            mcp_server = get_mcp_server_by_id(db_tool_model.mcp_server_id, db_session)\n\n            # Get user-specific connection config if needed\n            connection_config = None\n            user_email = user.email\n            mcp_user_oauth_token = None\n\n            if mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH:\n                # Pass-through OAuth: use the user's login OAuth token\n                if user.is_anonymous:\n                    logger.warning(\n                        f\"Anonymous user cannot use PT_OAUTH MCP server {mcp_server.id}\"\n                    )\n                    continue\n                mcp_user_oauth_token = user_oauth_token\n            elif (\n                mcp_server.auth_type == MCPAuthenticationType.API_TOKEN\n                or mcp_server.auth_type == MCPAuthenticationType.OAUTH\n            ):\n                # If server has a per-user template, only use that user's config\n                if mcp_server.auth_performer == MCPAuthenticationPerformer.PER_USER:\n                    connection_config = get_user_connection_config(\n                        mcp_server.id, user_email, db_session\n                    )\n                else:\n                    # No per-user template: use admin config\n                    connection_config = mcp_server.admin_connection_config\n\n            # Get all saved tools for this MCP server\n            saved_tools = get_all_mcp_tools_for_server(mcp_server.id, db_session)\n\n            # Find the specific tool that this database entry represents\n            expected_tool_name = db_tool_model.display_name\n\n            # Extract additional MCP headers from config\n            additional_mcp_headers = None\n            if custom_tool_config and custom_tool_config.mcp_headers:\n                additional_mcp_headers = custom_tool_config.mcp_headers\n\n            mcp_tool_cache[db_tool_model.mcp_server_id] = {}\n            # Find the matching tool definition\n            for saved_tool in saved_tools:\n                # Create MCPTool instance for this specific tool\n                mcp_tool = MCPTool(\n                    tool_id=saved_tool.id,\n                    emitter=emitter,\n                    mcp_server=mcp_server,\n                    tool_name=saved_tool.name,\n                    tool_description=saved_tool.description,\n                    tool_definition=saved_tool.mcp_input_schema or {},\n                    connection_config=connection_config,\n                    user_email=user_email,\n                    user_id=str(user.id),\n                    user_oauth_token=mcp_user_oauth_token,\n                    additional_headers=additional_mcp_headers,\n                )\n                mcp_tool_cache[db_tool_model.mcp_server_id][saved_tool.id] = mcp_tool\n\n                if saved_tool.id == db_tool_model.id:\n                    tool_dict[saved_tool.id] = [cast(Tool, mcp_tool)]\n            if db_tool_model.id not in tool_dict:\n                logger.warning(\n                    f\"Tool '{expected_tool_name}' not found in MCP server '{mcp_server.name}'\"\n                )\n\n    if (\n        not added_search_tool\n        and search_usage_forcing_setting == SearchToolUsage.ENABLED\n        and not DISABLE_VECTOR_DB\n    ):\n        # Get the database tool model for SearchTool\n        search_tool_db_model = get_builtin_tool(db_session, SearchTool)\n\n        if not search_tool_config:\n            search_tool_config = SearchToolConfig()\n\n        tool_dict[search_tool_db_model.id] = [\n            _build_search_tool(search_tool_db_model.id, search_tool_config)\n        ]\n\n    # Always inject MemoryTool when the user has the memory tool enabled,\n    # bypassing persona tool associations and allowed_tool_ids filtering\n    if user.enable_memory_tool:\n        try:\n            memory_tool_db_model = get_builtin_tool(db_session, MemoryTool)\n            memory_tool = MemoryTool(\n                tool_id=memory_tool_db_model.id,\n                emitter=emitter,\n                llm=llm,\n            )\n            tool_dict[memory_tool_db_model.id] = [memory_tool]\n        except RuntimeError:\n            logger.warning(\n                \"MemoryTool not found in the database. Run the latest alembic migration to seed it.\"\n            )\n\n    tools: list[Tool] = []\n    for tool_list in tool_dict.values():\n        tools.extend(tool_list)\n\n    return tool_dict\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/custom/base_tool_types.py",
    "content": "# should really be `JSON_ro`, but this causes issues with pydantic\nToolResultType = dict | list | str | int | float | bool\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/custom/custom_tool.py",
    "content": "import csv\nimport json\nimport queue\nimport uuid\nfrom io import BytesIO\nfrom io import StringIO\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\n\nimport requests\nfrom requests import JSONDecodeError\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import CustomToolArgs\nfrom onyx.server.query_and_chat.streaming_models import CustomToolDelta\nfrom onyx.server.query_and_chat.streaming_models import CustomToolErrorInfo\nfrom onyx.server.query_and_chat.streaming_models import CustomToolStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import CHAT_SESSION_ID_PLACEHOLDER\nfrom onyx.tools.models import CustomToolCallSummary\nfrom onyx.tools.models import CustomToolUserFileSnapshot\nfrom onyx.tools.models import DynamicSchemaInfo\nfrom onyx.tools.models import MESSAGE_ID_PLACEHOLDER\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import MethodSpec\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import (\n    openapi_to_method_specs,\n)\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import openapi_to_url\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import REQUEST_BODY\nfrom onyx.tools.tool_implementations.custom.openapi_parsing import (\n    validate_openapi_schema,\n)\nfrom onyx.utils.headers import header_list_to_header_dict\nfrom onyx.utils.headers import HeaderItemDict\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nCUSTOM_TOOL_RESPONSE_ID = \"custom_tool_response\"\n\n\n# override_kwargs is not supported for custom tools\nclass CustomTool(Tool[None]):\n    def __init__(\n        self,\n        id: int,\n        method_spec: MethodSpec,\n        base_url: str,\n        emitter: Emitter,\n        custom_headers: list[HeaderItemDict] | None = None,\n        user_oauth_token: str | None = None,\n    ) -> None:\n        super().__init__(emitter=emitter)\n\n        self._base_url = base_url\n        self._method_spec = method_spec\n        self._tool_definition = self._method_spec.to_tool_definition()\n        self._user_oauth_token = user_oauth_token\n        self._id = id\n\n        self._name = self._method_spec.name\n        self._description = self._method_spec.summary\n        self.headers = (\n            header_list_to_header_dict(custom_headers) if custom_headers else {}\n        )\n\n        # Check for both Authorization header and OAuth token\n        has_auth_header = any(\n            key.lower() == \"authorization\" for key in self.headers.keys()\n        )\n        if has_auth_header and self._user_oauth_token:\n            logger.warning(\n                f\"Tool '{self._name}' has both an Authorization \"\n                \"header and OAuth token set. This is likely a configuration \"\n                \"error as the OAuth token will override the custom header.\"\n            )\n\n        if self._user_oauth_token:\n            self.headers[\"Authorization\"] = f\"Bearer {self._user_oauth_token}\"\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self._name\n\n    @property\n    def description(self) -> str:\n        return self._description\n\n    @property\n    def display_name(self) -> str:\n        return self._name\n\n    def tool_definition(self) -> dict:\n        return self._tool_definition\n\n    def _save_and_get_file_references(\n        self, file_content: bytes | str, content_type: str\n    ) -> List[str]:\n        file_store = get_default_file_store()\n\n        file_id = str(uuid.uuid4())\n\n        # Handle both binary and text content\n        if isinstance(file_content, str):\n            content = BytesIO(file_content.encode())\n        else:\n            content = BytesIO(file_content)\n\n        file_store.save_file(\n            file_id=file_id,\n            content=content,\n            display_name=file_id,\n            file_origin=FileOrigin.CHAT_UPLOAD,\n            file_type=content_type,\n            file_metadata={\n                \"content_type\": content_type,\n            },\n        )\n\n        return [file_id]\n\n    def _parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:\n        csv_file = StringIO(csv_text)\n        reader = csv.DictReader(csv_file)\n        return [row for row in reader]\n\n    \"\"\"Actual execution of the tool\"\"\"\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=CustomToolStart(tool_name=self._name, tool_id=self._id),\n            )\n        )\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: None = None,  # noqa: ARG002\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        # Build path params\n        path_params = {}\n        for path_param_schema in self._method_spec.get_path_param_schemas():\n            param_name = path_param_schema[\"name\"]\n            if param_name not in llm_kwargs:\n                raise ToolCallException(\n                    message=f\"Missing required path parameter '{param_name}' in {self._name} tool call\",\n                    llm_facing_message=(\n                        f\"The {self._name} tool requires the '{param_name}' path parameter. \"\n                        f\"Please provide it in the tool call arguments.\"\n                    ),\n                )\n            path_params[param_name] = llm_kwargs[param_name]\n\n        # Build query params\n        query_params = {}\n        for query_param_schema in self._method_spec.get_query_param_schemas():\n            if query_param_schema[\"name\"] in llm_kwargs:\n                query_params[query_param_schema[\"name\"]] = llm_kwargs[\n                    query_param_schema[\"name\"]\n                ]\n\n        # Emit args packet (path + query params only, no request body)\n        tool_args = {**path_params, **query_params}\n        if tool_args:\n            self.emitter.emit(\n                Packet(\n                    placement=placement,\n                    obj=CustomToolArgs(\n                        tool_name=self._name,\n                        tool_args=tool_args,\n                    ),\n                )\n            )\n\n        request_body = llm_kwargs.get(REQUEST_BODY)\n        url = self._method_spec.build_url(self._base_url, path_params, query_params)\n        method = self._method_spec.method\n\n        response = requests.request(\n            method, url, json=request_body, headers=self.headers\n        )\n        content_type = response.headers.get(\"Content-Type\", \"\")\n\n        # Detect HTTP errors — only 401/403 are flagged as auth errors\n        error_info: CustomToolErrorInfo | None = None\n        if response.status_code in (401, 403):\n            error_info = CustomToolErrorInfo(\n                is_auth_error=True,\n                status_code=response.status_code,\n                message=f\"{self._name} action failed because of authentication error\",\n            )\n            logger.warning(\n                f\"Auth error from custom tool '{self._name}': HTTP {response.status_code}\"\n            )\n\n        tool_result: Any\n        response_type: str\n        file_ids: List[str] | None = None\n        data: dict | list | str | int | float | bool | None = None\n\n        if \"text/csv\" in content_type:\n            file_ids = self._save_and_get_file_references(\n                response.content, content_type\n            )\n            tool_result = CustomToolUserFileSnapshot(file_ids=file_ids)\n            response_type = \"csv\"\n\n        elif \"image/\" in content_type:\n            file_ids = self._save_and_get_file_references(\n                response.content, content_type\n            )\n            tool_result = CustomToolUserFileSnapshot(file_ids=file_ids)\n            response_type = \"image\"\n\n        else:\n            try:\n                tool_result = response.json()\n                response_type = \"json\"\n                data = tool_result\n            except JSONDecodeError:\n                logger.exception(\n                    f\"Failed to parse response as JSON for tool '{self._name}'\"\n                )\n                tool_result = response.text\n                response_type = \"text\"\n                data = tool_result\n\n        logger.info(\n            f\"Returning tool response for {self._name} with type {response_type}\"\n        )\n\n        # Emit CustomToolDelta packet\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=CustomToolDelta(\n                    tool_name=self._name,\n                    tool_id=self._id,\n                    response_type=response_type,\n                    data=data,\n                    file_ids=file_ids,\n                    error=error_info,\n                ),\n            )\n        )\n\n        llm_facing_response = json.dumps(tool_result)\n\n        return ToolResponse(\n            rich_response=CustomToolCallSummary(\n                tool_name=self._name,\n                response_type=response_type,\n                tool_result=tool_result,\n                error=error_info,\n            ),\n            llm_facing_response=llm_facing_response,\n        )\n\n\ndef build_custom_tools_from_openapi_schema_and_headers(\n    tool_id: int,\n    openapi_schema: dict[str, Any],\n    emitter: Emitter | None = None,\n    custom_headers: list[HeaderItemDict] | None = None,\n    dynamic_schema_info: DynamicSchemaInfo | None = None,\n    user_oauth_token: str | None = None,\n) -> list[CustomTool]:\n    if dynamic_schema_info:\n        # Process dynamic schema information\n        schema_str = json.dumps(openapi_schema)\n        placeholders = {\n            CHAT_SESSION_ID_PLACEHOLDER: dynamic_schema_info.chat_session_id,\n            MESSAGE_ID_PLACEHOLDER: dynamic_schema_info.message_id,\n        }\n\n        for placeholder, value in placeholders.items():\n            if value:\n                schema_str = schema_str.replace(placeholder, str(value))\n\n        openapi_schema = json.loads(schema_str)\n\n    url = openapi_to_url(openapi_schema)\n    method_specs = openapi_to_method_specs(openapi_schema)\n\n    # Use a discard emitter if none provided (packets go nowhere)\n    if emitter is None:\n        emitter = Emitter(merged_queue=queue.Queue())\n\n    return [\n        CustomTool(\n            id=tool_id,\n            method_spec=method_spec,\n            base_url=url,\n            emitter=emitter,\n            custom_headers=custom_headers,\n            user_oauth_token=user_oauth_token,\n        )\n        for method_spec in method_specs\n    ]\n\n\nif __name__ == \"__main__\":\n    import openai\n    from openai.types.chat.chat_completion_message_function_tool_call import (\n        ChatCompletionMessageFunctionToolCall,\n    )\n\n    openapi_schema = {\n        \"openapi\": \"3.0.0\",\n        \"info\": {\n            \"version\": \"1.0.0\",\n            \"title\": \"Assistants API\",\n            \"description\": \"An API for managing assistants\",\n        },\n        \"servers\": [\n            {\"url\": \"http://localhost:8080\"},\n        ],\n        \"paths\": {\n            \"/assistant/{assistant_id}\": {\n                \"get\": {\n                    \"summary\": \"Get a specific Assistant\",\n                    \"operationId\": \"getAssistant\",\n                    \"parameters\": [\n                        {\n                            \"name\": \"assistant_id\",\n                            \"in\": \"path\",\n                            \"required\": True,\n                            \"schema\": {\"type\": \"string\"},\n                        }\n                    ],\n                },\n                \"post\": {\n                    \"summary\": \"Create a new Assistant\",\n                    \"operationId\": \"createAssistant\",\n                    \"parameters\": [\n                        {\n                            \"name\": \"assistant_id\",\n                            \"in\": \"path\",\n                            \"required\": True,\n                            \"schema\": {\"type\": \"string\"},\n                        }\n                    ],\n                    \"requestBody\": {\n                        \"required\": True,\n                        \"content\": {\"application/json\": {\"schema\": {\"type\": \"object\"}}},\n                    },\n                },\n            }\n        },\n    }\n    validate_openapi_schema(openapi_schema)\n\n    tools = build_custom_tools_from_openapi_schema_and_headers(\n        tool_id=0,  # dummy tool id\n        openapi_schema=openapi_schema,\n        emitter=Emitter(merged_queue=queue.Queue()),\n        dynamic_schema_info=None,\n    )\n\n    openai_client = openai.OpenAI()\n    response = openai_client.chat.completions.create(\n        model=\"gpt-4o\",\n        messages=[\n            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n            {\"role\": \"user\", \"content\": \"Can you fetch assistant with ID 10\"},\n        ],\n        tools=[tool.tool_definition() for tool in tools],  # type: ignore\n    )\n    choice = response.choices[0]\n    if choice.message.tool_calls:\n        print(choice.message.tool_calls)\n        tool_call = choice.message.tool_calls[0]\n        if isinstance(tool_call, ChatCompletionMessageFunctionToolCall):\n            # Note: This example code would need a proper run_context with emitter\n            # For testing purposes, this would need to be updated\n            print(\"Tool execution requires run_context with emitter\")\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/custom/openapi_parsing.py",
    "content": "from typing import Any\nfrom typing import cast\n\nfrom pydantic import BaseModel\n\nREQUEST_BODY = \"requestBody\"\n\n\nclass PathSpec(BaseModel):\n    path: str\n    methods: dict[str, Any]\n\n\nclass MethodSpec(BaseModel):\n    name: str\n    summary: str\n    path: str\n    method: str\n    spec: dict[str, Any]\n\n    def get_request_body_schema(self) -> dict[str, Any]:\n        content = self.spec.get(\"requestBody\", {}).get(\"content\", {})\n        if \"application/json\" in content:\n            return content[\"application/json\"].get(\"schema\")\n\n        if content:\n            raise ValueError(\n                f\"Unsupported content type: '{list(content.keys())[0]}'. Only 'application/json' is supported.\"\n            )\n\n        return {}\n\n    def get_query_param_schemas(self) -> list[dict[str, Any]]:\n        return [\n            param\n            for param in self.spec.get(\"parameters\", [])\n            if \"schema\" in param and \"in\" in param and param[\"in\"] == \"query\"\n        ]\n\n    def get_path_param_schemas(self) -> list[dict[str, Any]]:\n        return [\n            param\n            for param in self.spec.get(\"parameters\", [])\n            if \"schema\" in param and \"in\" in param and param[\"in\"] == \"path\"\n        ]\n\n    def build_url(\n        self, base_url: str, path_params: dict[str, str], query_params: dict[str, str]\n    ) -> str:\n        url = f\"{base_url}{self.path}\"\n        try:\n            url = url.format(**path_params)\n        except KeyError as e:\n            raise ValueError(f\"Missing path parameter: {e}\")\n        if query_params:\n            url += \"?\"\n            for param, value in query_params.items():\n                url += f\"{param}={value}&\"\n            url = url[:-1]\n        return url\n\n    def to_tool_definition(self) -> dict[str, Any]:\n        tool_definition: Any = {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.summary,\n                \"parameters\": {\"type\": \"object\", \"properties\": {}},\n            },\n        }\n\n        request_body_schema = self.get_request_body_schema()\n        if request_body_schema:\n            tool_definition[\"function\"][\"parameters\"][\"properties\"][\n                REQUEST_BODY\n            ] = request_body_schema\n\n        query_param_schemas = self.get_query_param_schemas()\n        if query_param_schemas:\n            tool_definition[\"function\"][\"parameters\"][\"properties\"].update(\n                {param[\"name\"]: param[\"schema\"] for param in query_param_schemas}\n            )\n\n        path_param_schemas = self.get_path_param_schemas()\n        if path_param_schemas:\n            tool_definition[\"function\"][\"parameters\"][\"properties\"].update(\n                {param[\"name\"]: param[\"schema\"] for param in path_param_schemas}\n            )\n        return tool_definition\n\n    def validate_spec(self) -> None:\n        # Validate url construction\n        path_param_schemas = self.get_path_param_schemas()\n        dummy_path_dict = {param[\"name\"]: \"value\" for param in path_param_schemas}\n        query_param_schemas = self.get_query_param_schemas()\n        dummy_query_dict = {param[\"name\"]: \"value\" for param in query_param_schemas}\n        self.build_url(\"\", dummy_path_dict, dummy_query_dict)\n\n        # Make sure request body doesn't throw an exception\n        self.get_request_body_schema()\n\n        # Ensure the method is valid\n        if not self.method:\n            raise ValueError(\"HTTP method is not specified.\")\n        if self.method.upper() not in [\"GET\", \"POST\", \"PUT\", \"DELETE\", \"PATCH\"]:\n            raise ValueError(f\"HTTP method '{self.method}' is not supported.\")\n\n\n\"\"\"Path-level utils\"\"\"\n\n\ndef openapi_to_path_specs(openapi_spec: dict[str, Any]) -> list[PathSpec]:\n    path_specs = []\n\n    for path, methods in openapi_spec.get(\"paths\", {}).items():\n        path_specs.append(PathSpec(path=path, methods=methods))\n\n    return path_specs\n\n\n\"\"\"Method-level utils\"\"\"\n\n\ndef openapi_to_method_specs(openapi_spec: dict[str, Any]) -> list[MethodSpec]:\n    path_specs = openapi_to_path_specs(openapi_spec)\n\n    method_specs = []\n    for path_spec in path_specs:\n        for method_name, method in path_spec.methods.items():\n            name = method.get(\"operationId\")\n            if not name:\n                raise ValueError(\n                    f\"Operation ID is not specified for {method_name.upper()} {path_spec.path}\"\n                )\n\n            summary = method.get(\"summary\") or method.get(\"description\")\n            if not summary:\n                raise ValueError(\n                    f\"Summary is not specified for {method_name.upper()} {path_spec.path}\"\n                )\n\n            method_specs.append(\n                MethodSpec(\n                    name=name,\n                    summary=summary,\n                    path=path_spec.path,\n                    method=method_name,\n                    spec=method,\n                )\n            )\n\n    if not method_specs:\n        raise ValueError(\"No methods found in OpenAPI schema\")\n\n    return method_specs\n\n\ndef openapi_to_url(openapi_schema: dict[str, dict | str]) -> str:\n    \"\"\"\n    Extract URLs from the servers section of an OpenAPI schema.\n\n    Args:\n        openapi_schema (Dict[str, Union[Dict, str, List]]): The OpenAPI schema in dictionary format.\n\n    Returns:\n        List[str]: A list of base URLs.\n    \"\"\"\n    urls: list[str] = []\n\n    servers = cast(list[dict[str, Any]], openapi_schema.get(\"servers\", []))\n    for server in servers:\n        url = server.get(\"url\")\n        if url:\n            urls.append(url)\n\n    if len(urls) != 1:\n        raise ValueError(\n            f\"Expected exactly one URL in OpenAPI schema, but found {urls}\"\n        )\n\n    return urls[0]\n\n\ndef validate_openapi_schema(schema: dict[str, Any]) -> None:\n    \"\"\"\n    Validate the given JSON schema as an OpenAPI schema.\n\n    Parameters:\n    - schema (dict): The JSON schema to validate.\n\n    Returns:\n    - bool: True if the schema is valid, False otherwise.\n    \"\"\"\n\n    # check basic structure\n    if \"info\" not in schema:\n        raise ValueError(\"`info` section is required in OpenAPI schema\")\n\n    info = schema[\"info\"]\n    if \"title\" not in info:\n        raise ValueError(\"`title` is required in `info` section of OpenAPI schema\")\n    if \"description\" not in info:\n        raise ValueError(\n            \"`description` is required in `info` section of OpenAPI schema\"\n        )\n\n    if \"openapi\" not in schema:\n        raise ValueError(\n            \"`openapi` field which specifies OpenAPI schema version is required\"\n        )\n    openapi_version = schema[\"openapi\"]\n    if not openapi_version.startswith(\"3.\"):\n        raise ValueError(f\"OpenAPI version '{openapi_version}' is not supported\")\n\n    if \"paths\" not in schema:\n        raise ValueError(\"`paths` section is required in OpenAPI schema\")\n\n    url = openapi_to_url(schema)\n    if not url:\n        raise ValueError(\"OpenAPI schema does not contain a valid URL in `servers`\")\n\n    method_specs = openapi_to_method_specs(schema)\n    for method_spec in method_specs:\n        method_spec.validate_spec()\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py",
    "content": "import io\nimport json\nfrom typing import Any\nfrom typing import cast\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.app_configs import DISABLE_VECTOR_DB\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.file_processing.extract_file_text import extract_file_text\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.file_store.utils import load_chat_file_by_id\nfrom onyx.file_store.utils import load_user_file\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import FileReaderResult\nfrom onyx.server.query_and_chat.streaming_models import FileReaderStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nFILE_ID_FIELD = \"file_id\"\nSTART_CHAR_FIELD = \"start_char\"\nNUM_CHARS_FIELD = \"num_chars\"\n\nMAX_NUM_CHARS = 16000\nDEFAULT_NUM_CHARS = MAX_NUM_CHARS\nPREVIEW_CHARS = 500\n\n\nclass FileReaderToolOverrideKwargs:\n    \"\"\"No override kwargs needed for the file reader tool.\"\"\"\n\n\nclass FileReaderTool(Tool[FileReaderToolOverrideKwargs]):\n    NAME = \"read_file\"\n    DISPLAY_NAME = \"File Reader\"\n    DESCRIPTION = (\n        \"Read a section of a user-uploaded file by character offset. \"\n        \"Returns up to 16000 characters starting from the given offset.\"\n    )\n\n    def __init__(\n        self,\n        tool_id: int,\n        emitter: Emitter,\n        user_file_ids: list[UUID],\n        chat_file_ids: list[UUID],\n    ) -> None:\n        super().__init__(emitter=emitter)\n        self._id = tool_id\n        self._user_file_ids = set(user_file_ids)\n        self._chat_file_ids = set(chat_file_ids)\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @override\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003\n        # TODO(evan): temporary – gate behind DISABLE_VECTOR_DB until the tool is\n        # generalised for standard (vector-DB-enabled) deployments.\n        return DISABLE_VECTOR_DB\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.DESCRIPTION,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        FILE_ID_FIELD: {\n                            \"type\": \"string\",\n                            \"description\": \"The UUID of the file to read.\",\n                        },\n                        START_CHAR_FIELD: {\n                            \"type\": \"integer\",\n                            \"description\": (\n                                \"Character offset to start reading from. Defaults to 0.\"\n                            ),\n                        },\n                        NUM_CHARS_FIELD: {\n                            \"type\": \"integer\",\n                            \"description\": (\n                                \"Number of characters to return (max 16000). Defaults to 16000.\"\n                            ),\n                        },\n                    },\n                    \"required\": [FILE_ID_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=FileReaderStart(),\n            )\n        )\n\n    def _validate_file_id(self, raw_file_id: str) -> UUID:\n        try:\n            file_id = UUID(raw_file_id)\n        except ValueError:\n            raise ToolCallException(\n                message=f\"Invalid file_id: {raw_file_id}\",\n                llm_facing_message=f\"'{raw_file_id}' is not a valid file UUID.\",\n            )\n\n        if file_id not in self._user_file_ids and file_id not in self._chat_file_ids:\n            raise ToolCallException(\n                message=f\"File {file_id} not in available files\",\n                llm_facing_message=(\n                    f\"File '{file_id}' is not available. Please use one of the file IDs listed in the context.\"\n                ),\n            )\n\n        return file_id\n\n    def _load_file(self, file_id: UUID) -> InMemoryChatFile:\n        if file_id in self._user_file_ids:\n            with get_session_with_current_tenant() as db_session:\n                return load_user_file(file_id, db_session)\n        return load_chat_file_by_id(str(file_id))\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: FileReaderToolOverrideKwargs,  # noqa: ARG002\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        if FILE_ID_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{FILE_ID_FIELD}' parameter\",\n                llm_facing_message=(\n                    f\"The read_file tool requires a '{FILE_ID_FIELD}' parameter. \"\n                    f'Example: {{\"file_id\": \"abc-123\", \"start_char\": 0, \"num_chars\": 16000}}'\n                ),\n            )\n\n        raw_file_id = cast(str, llm_kwargs[FILE_ID_FIELD])\n        file_id = self._validate_file_id(raw_file_id)\n        start_char = max(0, int(llm_kwargs.get(START_CHAR_FIELD, 0)))\n        num_chars = min(\n            MAX_NUM_CHARS,\n            max(1, int(llm_kwargs.get(NUM_CHARS_FIELD, DEFAULT_NUM_CHARS))),\n        )\n\n        chat_file = self._load_file(file_id)\n\n        # Only PLAIN_TEXT and TABULAR are guaranteed to contain actual text bytes.\n        # DOC type in a loaded file means plaintext extraction failed and the\n        # content is the original binary (e.g. raw PDF/DOCX bytes).\n        if chat_file.file_type not in (\n            ChatFileType.PLAIN_TEXT,\n            ChatFileType.TABULAR,\n        ):\n            raise ToolCallException(\n                message=f\"File {file_id} is not a text file (type={chat_file.file_type})\",\n                llm_facing_message=(\n                    f\"File '{chat_file.filename or file_id}' is a {chat_file.file_type.value} file and cannot be read as text.\"\n                ),\n            )\n\n        try:\n            if chat_file.file_type == ChatFileType.PLAIN_TEXT:\n                full_text = chat_file.content.decode(\"utf-8\", errors=\"replace\")\n            else:\n                full_text = (\n                    extract_file_text(\n                        file=io.BytesIO(chat_file.content),\n                        file_name=chat_file.filename or \"\",\n                        break_on_unprocessable=False,\n                    )\n                    or \"\"\n                )\n        except ToolCallException:\n            raise\n        except Exception:\n            raise ToolCallException(\n                message=f\"Failed to decode file {file_id}\",\n                llm_facing_message=\"The file could not be read as text.\",\n            )\n\n        total_chars = len(full_text)\n        end_char = min(start_char + num_chars, total_chars)\n        section = full_text[start_char:end_char]\n\n        file_name = chat_file.filename or str(file_id)\n\n        preview_start = section[:PREVIEW_CHARS]\n        preview_end = section[-PREVIEW_CHARS:] if len(section) > PREVIEW_CHARS else \"\"\n\n        # Emit result packet so the frontend can display what was read\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=FileReaderResult(\n                    file_name=file_name,\n                    file_id=str(file_id),\n                    start_char=start_char,\n                    end_char=end_char,\n                    total_chars=total_chars,\n                    preview_start=preview_start,\n                    preview_end=preview_end,\n                ),\n            )\n        )\n\n        has_more = end_char < total_chars\n        header = (\n            f\"File: {file_name}\\nCharacters {start_char}-{end_char} of {total_chars}\"\n        )\n        if has_more:\n            header += f\" (use start_char={end_char} to continue reading)\"\n\n        llm_response = f\"{header}\\n\\n{section}\"\n\n        # Build a lightweight summary for DB storage (avoids saving full text).\n        # The LLM-facing response carries the real content; the rich_response\n        # is what gets persisted and re-hydrated on page reload.\n        saved_summary = json.dumps(\n            {\n                \"file_name\": file_name,\n                \"file_id\": str(file_id),\n                \"start_char\": start_char,\n                \"end_char\": end_char,\n                \"total_chars\": total_chars,\n                \"preview_start\": preview_start,\n                \"preview_end\": preview_end,\n            }\n        )\n\n        return ToolResponse(\n            rich_response=saved_summary,\n            llm_facing_response=llm_response,\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/images/image_generation_tool.py",
    "content": "import json\nimport threading\nfrom typing import Any\nfrom typing import cast\n\nimport requests\nfrom sqlalchemy.orm import Session\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.app_configs import IMAGE_MODEL_NAME\nfrom onyx.configs.app_configs import IMAGE_MODEL_PROVIDER\nfrom onyx.db.image_generation import get_default_image_generation_config\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.utils import build_frontend_file_url\nfrom onyx.file_store.utils import load_chat_file_by_id\nfrom onyx.file_store.utils import save_files\nfrom onyx.image_gen.factory import get_image_generation_provider\nfrom onyx.image_gen.factory import validate_credentials\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationFinal\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ImageGenerationToolOverrideKwargs\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolExecutionException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.images.models import (\n    FinalImageGenerationResponse,\n)\nfrom onyx.tools.tool_implementations.images.models import ImageGenerationResponse\nfrom onyx.tools.tool_implementations.images.models import ImageShape\nfrom onyx.utils.b64 import get_image_type_from_bytes\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\n# Heartbeat interval in seconds to prevent timeouts\nHEARTBEAT_INTERVAL = 5.0\n\nPROMPT_FIELD = \"prompt\"\nREFERENCE_IMAGE_FILE_IDS_FIELD = \"reference_image_file_ids\"\n\n\nclass ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):\n    NAME = \"generate_image\"\n    DESCRIPTION = \"Generate an image based on a prompt. Do not use unless the user specifically requests an image.\"\n    DISPLAY_NAME = \"Image Generation\"\n\n    def __init__(\n        self,\n        image_generation_credentials: ImageGenerationProviderCredentials,\n        tool_id: int,\n        emitter: Emitter,\n        model: str = IMAGE_MODEL_NAME,\n        provider: str = IMAGE_MODEL_PROVIDER,\n        num_imgs: int = 1,\n    ) -> None:\n        super().__init__(emitter=emitter)\n        self.model = model\n        self.provider = provider\n        self.num_imgs = num_imgs\n\n        self.img_provider = get_image_generation_provider(\n            provider, image_generation_credentials\n        )\n\n        self._id = tool_id\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @override\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:\n        \"\"\"Available if a default image generation config exists with valid credentials.\"\"\"\n        try:\n            config = get_default_image_generation_config(db_session)\n            if not config or not config.model_configuration:\n                return False\n\n            llm_provider = config.model_configuration.llm_provider\n            credentials = ImageGenerationProviderCredentials(\n                api_key=(\n                    llm_provider.api_key.get_value(apply_mask=False)\n                    if llm_provider.api_key\n                    else None\n                ),\n                api_base=llm_provider.api_base,\n                api_version=llm_provider.api_version,\n                deployment_name=llm_provider.deployment_name,\n                custom_config=llm_provider.custom_config,\n            )\n            return validate_credentials(\n                provider=llm_provider.provider,\n                credentials=credentials,\n            )\n        except Exception:\n            logger.exception(\"Error checking if image generation is available\")\n            return False\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        PROMPT_FIELD: {\n                            \"type\": \"string\",\n                            \"description\": \"Prompt used to generate the image\",\n                        },\n                        \"shape\": {\n                            \"type\": \"string\",\n                            \"description\": (\n                                \"Optional - only specify if you want a specific shape.\"\n                                \" Image shape: 'square', 'portrait', or 'landscape'.\"\n                            ),\n                            \"enum\": [shape.value for shape in ImageShape],\n                        },\n                        REFERENCE_IMAGE_FILE_IDS_FIELD: {\n                            \"type\": \"array\",\n                            \"description\": (\n                                \"Optional image file IDs to use as reference context for edits/variations. \"\n                                \"Use the file_id values returned by previous generate_image calls.\"\n                            ),\n                            \"items\": {\n                                \"type\": \"string\",\n                            },\n                        },\n                    },\n                    \"required\": [PROMPT_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=ImageGenerationToolStart(),\n            )\n        )\n\n    def _generate_image(\n        self,\n        prompt: str,\n        shape: ImageShape,\n        reference_images: list[ReferenceImage] | None = None,\n    ) -> tuple[ImageGenerationResponse, Any]:\n        if shape == ImageShape.LANDSCAPE:\n            if \"gpt-image-1\" in self.model:\n                size = \"1536x1024\"\n            else:\n                size = \"1792x1024\"\n        elif shape == ImageShape.PORTRAIT:\n            if \"gpt-image-1\" in self.model:\n                size = \"1024x1536\"\n            else:\n                size = \"1024x1792\"\n        else:\n            size = \"1024x1024\"\n        logger.debug(f\"Generating image with model: {self.model}, size: {size}\")\n        try:\n            response = self.img_provider.generate_image(\n                prompt=prompt,\n                model=self.model,\n                size=size,\n                n=1,\n                reference_images=reference_images,\n                # response_format parameter is not supported for gpt-image-1\n                response_format=None if \"gpt-image-1\" in self.model else \"b64_json\",\n            )\n\n            if not response.data or len(response.data) == 0:\n                raise RuntimeError(\"No image data returned from the API\")\n\n            image_item = response.data[0].model_dump()\n\n            image_data = image_item.get(\"b64_json\")\n            if not image_data:\n                raise RuntimeError(\"No base64 image data returned from the API\")\n\n            revised_prompt = image_item.get(\"revised_prompt\")\n            if revised_prompt is None:\n                revised_prompt = prompt\n\n            return (\n                ImageGenerationResponse(\n                    revised_prompt=revised_prompt,\n                    image_data=image_data,\n                ),\n                response,\n            )\n\n        except requests.RequestException as e:\n            logger.error(f\"Error fetching or converting image: {e}\")\n            raise ToolExecutionException(\n                \"Failed to fetch or convert the generated image\", emit_error_packet=True\n            )\n        except Exception as e:\n            logger.debug(f\"Error occurred during image generation: {e}\")\n\n            error_message = str(e)\n            if \"OpenAIException\" in str(type(e)):\n                if (\n                    \"Your request was rejected as a result of our safety system\"\n                    in error_message\n                ):\n                    raise ToolExecutionException(\n                        (\n                            \"The image generation request was rejected due to OpenAI's content policy. \"\n                            \"Please try a different prompt.\"\n                        ),\n                        emit_error_packet=True,\n                    )\n                elif \"Invalid image URL\" in error_message:\n                    raise ToolExecutionException(\n                        \"Invalid image URL provided for image generation.\",\n                        emit_error_packet=True,\n                    )\n                elif \"invalid_request_error\" in error_message:\n                    raise ToolExecutionException(\n                        \"Invalid request for image generation. Please check your input.\",\n                        emit_error_packet=True,\n                    )\n\n            raise ToolExecutionException(\n                f\"An error occurred during image generation. error={error_message}\",\n                emit_error_packet=True,\n            )\n\n    def _resolve_reference_image_file_ids(\n        self,\n        llm_kwargs: dict[str, Any],\n        override_kwargs: ImageGenerationToolOverrideKwargs | None,\n    ) -> list[str]:\n        raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)\n        if raw_reference_ids is not None:\n            if not isinstance(raw_reference_ids, list) or not all(\n                isinstance(file_id, str) for file_id in raw_reference_ids\n            ):\n                raise ToolCallException(\n                    message=(\n                        f\"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}\"\n                    ),\n                    llm_facing_message=(\n                        f\"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings.\"\n                    ),\n                )\n            reference_image_file_ids = [\n                file_id.strip() for file_id in raw_reference_ids if file_id.strip()\n            ]\n        elif (\n            override_kwargs\n            and override_kwargs.recent_generated_image_file_ids\n            and self.img_provider.supports_reference_images\n        ):\n            # If no explicit reference was provided, default to the most recently generated image.\n            reference_image_file_ids = [\n                override_kwargs.recent_generated_image_file_ids[-1]\n            ]\n        else:\n            reference_image_file_ids = []\n\n        # Deduplicate while preserving order.\n        deduped_reference_image_ids: list[str] = []\n        seen_ids: set[str] = set()\n        for file_id in reference_image_file_ids:\n            if file_id in seen_ids:\n                continue\n            seen_ids.add(file_id)\n            deduped_reference_image_ids.append(file_id)\n\n        if not deduped_reference_image_ids:\n            return []\n\n        if not self.img_provider.supports_reference_images:\n            raise ToolCallException(\n                message=(\n                    f\"Reference images requested but provider '{self.provider}' does not support image-editing context.\"\n                ),\n                llm_facing_message=(\n                    \"This image provider does not support editing from previous image context. \"\n                    \"Try text-only generation, or switch to a provider/model that supports image edits.\"\n                ),\n            )\n\n        max_reference_images = self.img_provider.max_reference_images\n        if max_reference_images > 0:\n            return deduped_reference_image_ids[-max_reference_images:]\n        return deduped_reference_image_ids\n\n    def _load_reference_images(\n        self,\n        reference_image_file_ids: list[str],\n    ) -> list[ReferenceImage]:\n        reference_images: list[ReferenceImage] = []\n\n        for file_id in reference_image_file_ids:\n            try:\n                loaded_file = load_chat_file_by_id(file_id)\n            except Exception as e:\n                raise ToolCallException(\n                    message=f\"Could not load reference image file '{file_id}': {e}\",\n                    llm_facing_message=(\n                        f\"Reference image file '{file_id}' could not be loaded. \"\n                        \"Use file_id values returned by previous generate_image calls.\"\n                    ),\n                )\n\n            if loaded_file.file_type != ChatFileType.IMAGE:\n                raise ToolCallException(\n                    message=f\"Reference file '{file_id}' is not an image\",\n                    llm_facing_message=f\"Reference file '{file_id}' is not an image.\",\n                )\n\n            try:\n                mime_type = get_image_type_from_bytes(loaded_file.content)\n            except Exception as e:\n                raise ToolCallException(\n                    message=f\"Unsupported reference image format for '{file_id}': {e}\",\n                    llm_facing_message=(\n                        f\"Reference image '{file_id}' has an unsupported format. Only PNG, JPEG, GIF, and WEBP are supported.\"\n                    ),\n                )\n\n            reference_images.append(\n                ReferenceImage(\n                    data=loaded_file.content,\n                    mime_type=mime_type,\n                )\n            )\n\n        return reference_images\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: ImageGenerationToolOverrideKwargs | None = None,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        if PROMPT_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{PROMPT_FIELD}' parameter in generate_image tool call\",\n                llm_facing_message=(\n                    f\"The generate_image tool requires a '{PROMPT_FIELD}' parameter describing \"\n                    f'the image to generate. Please provide like: {{\"prompt\": \"a sunset over mountains\"}}'\n                ),\n            )\n        prompt = cast(str, llm_kwargs[PROMPT_FIELD])\n        shape = ImageShape(llm_kwargs.get(\"shape\", ImageShape.SQUARE.value))\n        reference_image_file_ids = self._resolve_reference_image_file_ids(\n            llm_kwargs=llm_kwargs,\n            override_kwargs=override_kwargs,\n        )\n        reference_images = self._load_reference_images(reference_image_file_ids)\n\n        # Use threading to generate images in parallel while emitting heartbeats\n        results: list[tuple[ImageGenerationResponse, Any] | None] = [\n            None\n        ] * self.num_imgs\n        completed = threading.Event()\n        error_holder: list[Exception | None] = [None]\n\n        # TODO allow the LLM to determine number of images\n        def generate_all_images() -> None:\n            try:\n                generated_results = cast(\n                    list[tuple[ImageGenerationResponse, Any]],\n                    run_functions_tuples_in_parallel(\n                        [\n                            (\n                                self._generate_image,\n                                (\n                                    prompt,\n                                    shape,\n                                    reference_images or None,\n                                ),\n                            )\n                            for _ in range(self.num_imgs)\n                        ]\n                    ),\n                )\n                for i, result in enumerate(generated_results):\n                    results[i] = result\n            except Exception as e:\n                error_holder[0] = e\n            finally:\n                completed.set()\n\n        # Start image generation in background thread\n        generation_thread = threading.Thread(target=generate_all_images)\n        generation_thread.start()\n\n        # Emit heartbeat packets while waiting for completion\n        heartbeat_count = 0\n        while not completed.is_set():\n            # Emit a heartbeat packet to prevent timeout\n            self.emitter.emit(\n                Packet(\n                    placement=placement,\n                    obj=ImageGenerationToolHeartbeat(),\n                )\n            )\n            heartbeat_count += 1\n\n            # Wait for a short time before next heartbeat\n            if completed.wait(timeout=HEARTBEAT_INTERVAL):\n                break\n\n        # Ensure thread has completed\n        generation_thread.join()\n\n        # Check for errors\n        if error_holder[0] is not None:\n            raise error_holder[0]\n\n        # Filter out None values (shouldn't happen, but safety check)\n        valid_results = [r for r in results if r is not None]\n\n        if not valid_results:\n            raise ValueError(\"No images were generated\")\n\n        # Extract ImageGenerationResponse objects\n        image_generation_responses = [r[0] for r in valid_results]\n\n        # Save files and create GeneratedImage objects\n        file_ids = save_files(\n            urls=[],\n            base64_files=[img.image_data for img in image_generation_responses],\n        )\n        generated_images_metadata = [\n            GeneratedImage(\n                file_id=file_id,\n                url=build_frontend_file_url(file_id),\n                revised_prompt=img.revised_prompt,\n                shape=shape.value,\n            )\n            for img, file_id in zip(image_generation_responses, file_ids)\n        ]\n\n        # Emit final packet with generated images\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=ImageGenerationFinal(images=generated_images_metadata),\n            )\n        )\n\n        final_image_generation_response = FinalImageGenerationResponse(\n            generated_images=generated_images_metadata\n        )\n\n        # Create llm_facing_response\n        llm_facing_response = json.dumps(\n            [\n                {\n                    \"file_id\": img.file_id,\n                    \"revised_prompt\": img.revised_prompt,\n                }\n                for img in generated_images_metadata\n            ]\n        )\n\n        return ToolResponse(\n            rich_response=final_image_generation_response,\n            llm_facing_response=cast(str, llm_facing_response),\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/images/models.py",
    "content": "from enum import Enum\n\nfrom pydantic import BaseModel\n\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\n\n\nclass ImageGenerationResponse(BaseModel):\n    revised_prompt: str\n    image_data: str\n\n\nclass ImageShape(str, Enum):\n    SQUARE = \"square\"\n    PORTRAIT = \"portrait\"\n    LANDSCAPE = \"landscape\"\n\n\nclass FinalImageGenerationResponse(BaseModel):\n    generated_images: list[GeneratedImage]\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/knowledge_graph/knowledge_graph_tool.py",
    "content": "from typing import Any\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ToolResponse\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nQUERY_FIELD = \"query\"\n\n\nclass KnowledgeGraphTool(Tool[None]):\n    _NAME = \"run_kg_search\"\n    _DESCRIPTION = \"Search the knowledge graph for information. Never call this tool.\"\n    _DISPLAY_NAME = \"Knowledge Graph Search\"\n\n    def __init__(self, tool_id: int, emitter: Emitter) -> None:\n        super().__init__(emitter=emitter)\n\n        self._id = tool_id\n\n        raise NotImplementedError(\n            \"KnowledgeGraphTool should not be getting used right now.\"\n        )\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self._NAME\n\n    @property\n    def description(self) -> str:\n        return self._DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self._DISPLAY_NAME\n\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003\n        \"\"\"Available only if KG is enabled and exposed.\"\"\"\n        kg_configs = get_kg_config_settings()\n        return kg_configs.KG_ENABLED and kg_configs.KG_EXPOSED\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        QUERY_FIELD: {\n                            \"type\": \"string\",\n                            \"description\": \"What to search for\",\n                        },\n                    },\n                    \"required\": [QUERY_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        raise NotImplementedError(\"KnowledgeGraphTool.emit_start is not implemented.\")\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: None = None,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        raise NotImplementedError(\"KnowledgeGraphTool.run is not implemented.\")\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/mcp/mcp_client.py",
    "content": "\"\"\"\nMCP (Model Context Protocol) Client Implementation\n\nThis module provides a proper MCP client that follows the JSON-RPC 2.0 specification\nand handles connection initialization, session management, and protocol communication.\n\"\"\"\n\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom enum import Enum\nfrom typing import Any\nfrom typing import Dict\nfrom typing import TypeVar\n\nfrom mcp import ClientSession\nfrom mcp.client.auth import OAuthClientProvider\nfrom mcp.client.sse import sse_client\nfrom mcp.client.streamable_http import streamablehttp_client  # or use stdio_client\nfrom mcp.types import CallToolResult\nfrom mcp.types import InitializeResult\nfrom mcp.types import ListResourcesResult\nfrom mcp.types import TextResourceContents\nfrom mcp.types import Tool as MCPLibTool\nfrom pydantic import BaseModel\n\nfrom onyx.db.enums import MCPTransport\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_async_sync_no_cancel\n\nlogger = setup_logger()\n\nT = TypeVar(\"T\", covariant=True)\n\nMCPClientFunction = Callable[[ClientSession], Awaitable[T]]\n\n\nclass MCPMessageType(str, Enum):\n    \"\"\"MCP message types\"\"\"\n\n    REQUEST = \"request\"\n    RESPONSE = \"response\"\n    NOTIFICATION = \"notification\"\n\n\nclass ContentBlockTypes(str, Enum):\n    \"\"\"MCP content block types\"\"\"  # Unfortunstely these aren't exposed by the mcp library\n\n    TEXT = \"text\"\n    IMAGE = \"image\"\n    AUDIO = \"audio\"\n    RESOURCE = \"resource\"\n    RESOURCE_LINK = \"resource_link\"\n\n\nclass MCPMessage(BaseModel):\n    \"\"\"Base MCP message following JSON-RPC 2.0\"\"\"\n\n    jsonrpc: str = \"2.0\"\n    method: str | None = None\n    params: Dict[str, Any] | None = None\n    id: Any | None = None\n    result: Any | None = None\n    error: Dict[str, Any] | None = None\n\n    def to_dict(self) -> Dict[str, Any]:\n        \"\"\"Convert to JSON-RPC message dict\"\"\"\n        msg: Dict[str, Any] = {\"jsonrpc\": self.jsonrpc}\n\n        if self.id is not None:\n            msg[\"id\"] = self.id\n\n        if self.method is not None:\n            msg[\"method\"] = self.method\n\n        if self.params is not None:\n            msg[\"params\"] = self.params\n\n        if self.result is not None:\n            msg[\"result\"] = self.result\n\n        if self.error is not None:\n            msg[\"error\"] = self.error\n\n        return msg\n\n\n# TODO: in the future we should do things like manage sessions and handle errors better\n# using an abstraction like this. For now things are purely functional and we initialize\n# a new session for each tool call.\n# class MCPClient:\n#     \"\"\"\n#     MCP Client implementation that properly handles the protocol lifecycle\n#     and different transport mechanisms.\n#     \"\"\"\n\n#     def __init__(\n#         self,\n#         server_url: str,\n#         transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n#         auth_token: str | None = None,\n#     ):\n#         self.server_url = server_url\n#         self.transport = transport\n#         self.auth_token = auth_token\n\n#         # Session management\n#         self.session: Optional[aiohttp.ClientSession] = None\n#         self.initialized = False\n#         self.capabilities: Dict[str, Any] = {}\n#         self.protocol_version = \"2025-03-26\"  # Current MCP protocol version\n#         self.session_id: str | None = None\n#         # Legacy HTTP+SSE transport support (backwards compatibility)\n#         self.legacy_post_endpoint: str | None = None\n\n#         # Message ID counter\n#         self._message_id_counter = 0\n\n#         # For stdio transport\n#         self.process: Optional[subprocess.Popen] = None\n\n\ndef _create_mcp_client_function_runner(\n    function: Callable[[ClientSession], Awaitable[T]],\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,  # TODO: maybe used this for all auth types\n    **kwargs: Any,\n) -> Callable[[], Awaitable[T]]:\n    auth_headers = connection_headers or {}\n    # WARNING: httpx.Auth with requires_response_body=True (as in the MCP OAuth\n    # provider) forces httpx to fully read the response body. That is incompatible\n    # with SSE (infinite stream). Avoid passing auth for SSE; rely on headers.\n    auth_for_request = auth if transport == MCPTransport.STREAMABLE_HTTP else None\n\n    # doing this here for mypy\n    client_func = (\n        streamablehttp_client\n        if transport == MCPTransport.STREAMABLE_HTTP\n        else sse_client\n    )\n\n    async def run_client_function() -> T:\n        async with client_func(\n            server_url, headers=auth_headers, auth=auth_for_request\n        ) as client_tuple:\n            if len(client_tuple) == 3:\n                read, write, _ = client_tuple\n            elif len(client_tuple) == 2:\n                assert isinstance(client_tuple, tuple)  # mypy\n                read, write = client_tuple\n            else:\n                raise ValueError(\n                    f\"Unexpected number of client tuple elements: {len(client_tuple)}\"\n                )\n            from datetime import timedelta\n\n            async with ClientSession(\n                read, write, read_timeout_seconds=timedelta(seconds=300)\n            ) as session:\n                return await function(session, **kwargs)\n\n    return run_client_function\n\n\ndef log_exception_group(e: ExceptionGroup) -> Exception | None:\n    logger.error(e)\n    saved_e = None\n    for err in e.exceptions:\n        if isinstance(err, ExceptionGroup):\n            saved_e = log_exception_group(err) or saved_e\n        else:\n            logger.error(err)\n            saved_e = err\n\n    return saved_e\n\n\ndef _call_mcp_client_function_sync(\n    function: Callable[[ClientSession], Awaitable[T]],\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,\n    **kwargs: Any,\n) -> T:\n    run_client_function = _create_mcp_client_function_runner(\n        function, server_url, connection_headers, transport, auth, **kwargs\n    )\n    try:\n        return run_async_sync_no_cancel(run_client_function())\n    except Exception as e:\n        logger.error(f\"Failed to call MCP client function: {e}\")\n        if isinstance(e, ExceptionGroup):\n            original_exception = e\n            saved_e = log_exception_group(e)\n            if saved_e:\n                raise saved_e\n            raise original_exception\n        raise e\n\n\nasync def _call_mcp_client_function_async(\n    function: Callable[[ClientSession], Awaitable[T]],\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,\n    **kwargs: Any,\n) -> T:\n    run_client_function = _create_mcp_client_function_runner(\n        function, server_url, connection_headers, transport, auth, **kwargs\n    )\n    return await run_client_function()\n\n\ndef process_mcp_result(call_tool_result: CallToolResult) -> str:\n    \"\"\"Flatten MCP CallToolResult->text (prefers text content blocks).\"\"\"\n    # TODO: use structured_content if available\n    parts = []\n    for content_block in call_tool_result.content:\n        if content_block.type == ContentBlockTypes.TEXT.value:\n            parts.append(content_block.text or \"\")\n        if content_block.type == ContentBlockTypes.RESOURCE.value:\n            if isinstance(content_block.resource, TextResourceContents):\n                parts.append(content_block.resource.text or \"\")\n            # TODO: handle blob resource content\n        if content_block.type == ContentBlockTypes.RESOURCE_LINK.value:\n            parts.append(\n                f\"link: {content_block.uri} title: {content_block.title} description: {content_block.description}\"\n            )\n        # TODO: handle other content block types\n\n    return \"\\n\\n\".join(p for p in parts if p) or str(call_tool_result.structuredContent)\n\n\ndef _call_mcp_tool(tool_name: str, arguments: dict[str, Any]) -> MCPClientFunction[str]:\n    async def call_tool(session: ClientSession) -> str:\n        await session.initialize()\n        result = await session.call_tool(tool_name, arguments)\n        return process_mcp_result(result)\n\n    return call_tool\n\n\ndef call_mcp_tool(\n    server_url: str,\n    tool_name: str,\n    arguments: dict[str, Any],\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,\n) -> str:\n    \"\"\"Call a specific tool on the MCP server\"\"\"\n    return _call_mcp_client_function_sync(\n        _call_mcp_tool(tool_name, arguments),\n        server_url,\n        connection_headers,\n        transport,\n        auth,\n    )\n\n\nasync def initialize_mcp_client(\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,\n) -> InitializeResult:\n    return await _call_mcp_client_function_async(\n        lambda session: session.initialize(),\n        server_url,\n        connection_headers,\n        transport,\n        auth,\n    )\n\n\nasync def _discover_mcp_tools(session: ClientSession) -> list[MCPLibTool]:\n    # 1) initialize\n    import time\n\n    t1 = time.time()\n    init_result = await session.initialize()  # sends JSON-RPC \"initialize\"\n    logger.info(f\"Initialized with server: {init_result.serverInfo}\")\n    logger.info(f\"Initialized with server time: {time.time() - t1}\")\n    # 2) tools/list\n    t2 = time.time()\n    tools_response = await session.list_tools()  # sends JSON-RPC \"tools/list\"\n    logger.info(f\"Listed tools with server time: {time.time() - t2}\")\n    return tools_response.tools\n\n\ndef discover_mcp_tools(\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: MCPTransport = MCPTransport.STREAMABLE_HTTP,\n    auth: OAuthClientProvider | None = None,\n) -> list[MCPLibTool]:\n    \"\"\"\n    Synchronous wrapper for discovering MCP tools.\n    \"\"\"\n    return _call_mcp_client_function_sync(\n        _discover_mcp_tools,\n        server_url,\n        connection_headers,\n        transport,\n        auth,\n    )\n\n\nasync def _discover_mcp_resources(session: ClientSession) -> ListResourcesResult:\n    return await session.list_resources()\n\n\ndef discover_mcp_resources_sync(\n    server_url: str,\n    connection_headers: dict[str, str] | None = None,\n    transport: str = \"streamable-http\",\n    auth: OAuthClientProvider | None = None,\n) -> ListResourcesResult:\n    \"\"\"\n    Synchronous wrapper for discovering MCP resources.\n    This is for compatibility with the existing codebase.\n    \"\"\"\n    return _call_mcp_client_function_sync(\n        _discover_mcp_resources,\n        server_url,\n        connection_headers,\n        MCPTransport(transport),\n        auth,\n    )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/mcp/mcp_tool.py",
    "content": "import json\nfrom typing import Any\n\nfrom mcp.client.auth import OAuthClientProvider\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.enums import MCPTransport\nfrom onyx.db.models import MCPConnectionConfig\nfrom onyx.db.models import MCPServer\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import CustomToolDelta\nfrom onyx.server.query_and_chat.streaming_models import CustomToolStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import CustomToolCallSummary\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.mcp.mcp_client import call_mcp_tool\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Headers that cannot be overridden by user requests to prevent security issues\n# Host header is particularly critical - it can be used for Host Header Injection attacks\n# to route requests to unintended internal servers\nDENYLISTED_MCP_HEADERS = {\n    \"host\",  # Prevents Host Header Injection attacks\n}\n\n# TODO: for now we're fitting MCP tool responses into the CustomToolCallSummary class\n# In the future we may want custom handling for MCP tool responses\n# class MCPToolCallSummary(BaseModel):\n#     tool_name: str\n#     server_url: str\n#     tool_result: Any\n#     server_name: str\n\n\nclass MCPTool(Tool[None]):\n    \"\"\"Tool implementation for MCP (Model Context Protocol) servers\"\"\"\n\n    def __init__(\n        self,\n        tool_id: int,\n        emitter: Emitter,\n        mcp_server: MCPServer,  # TODO: these should be basemodels instead of db objects\n        tool_name: str,\n        tool_description: str,\n        tool_definition: dict[str, Any],\n        connection_config: MCPConnectionConfig | None = None,\n        user_email: str = \"\",\n        user_id: str = \"\",\n        user_oauth_token: str | None = None,\n        additional_headers: dict[str, str] | None = None,\n    ) -> None:\n        super().__init__(emitter=emitter)\n\n        self._id = tool_id\n        self.mcp_server = mcp_server\n        self.connection_config = connection_config\n        self.user_email = user_email\n        self._user_id = user_id\n        self._user_oauth_token = user_oauth_token\n        self._additional_headers = additional_headers or {}\n\n        self._name = tool_name\n        self._tool_definition = tool_definition\n        self._description = tool_description\n        self._display_name = tool_definition.get(\"displayName\", tool_name)\n        self._llm_name = f\"mcp:{mcp_server.name}:{tool_name}\"\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self._name\n\n    @property\n    def description(self) -> str:\n        return self._description\n\n    @property\n    def display_name(self) -> str:\n        return self._display_name\n\n    @property\n    def llm_name(self) -> str:\n        return self._llm_name\n\n    def tool_definition(self) -> dict:\n        \"\"\"Return the tool definition from the MCP server\"\"\"\n        # Convert MCP tool definition to OpenAI function calling format\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self._name,\n                \"description\": self._description,\n                \"parameters\": self._tool_definition,\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=CustomToolStart(tool_name=self._name),\n            )\n        )\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: None = None,  # noqa: ARG002\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        \"\"\"Execute the MCP tool by calling the MCP server\"\"\"\n        try:\n            # Build headers with proper precedence:\n            # 1. Start with additional headers from API request (filled in first, excluding denylisted)\n            # 2. Override with connection config headers (from DB) - these take precedence\n            # 3. Override Authorization header with OAuth token if present\n            headers: dict[str, str] = {}\n\n            # Priority 1: Additional headers from API request (filled in first)\n            # Filter out denylisted headers to prevent security issues (e.g., Host Header Injection)\n            if self._additional_headers:\n                filtered_headers = {\n                    k: v\n                    for k, v in self._additional_headers.items()\n                    if k.lower() not in DENYLISTED_MCP_HEADERS\n                }\n                if filtered_headers:\n                    headers.update(filtered_headers)\n                # Log if any denylisted headers were provided (for security monitoring)\n                denylisted_provided = [\n                    k\n                    for k in self._additional_headers.keys()\n                    if k.lower() in DENYLISTED_MCP_HEADERS\n                ]\n                if denylisted_provided:\n                    logger.warning(\n                        f\"MCP tool '{self._name}' received denylisted headers that were filtered: {denylisted_provided}\"\n                    )\n\n            # Priority 2: Base headers from connection config (DB) - overrides request\n            if self.connection_config and self.connection_config.config:\n                config_dict = self.connection_config.config.get_value(apply_mask=False)\n                headers.update(config_dict.get(\"headers\", {}))\n\n            # Priority 3: For pass-through OAuth, use the user's login OAuth token\n            if self._user_oauth_token:\n                headers[\"Authorization\"] = f\"Bearer {self._user_oauth_token}\"\n\n            # Check if this is an authentication issue before making the call\n            is_passthrough_oauth = (\n                self.mcp_server.auth_type == MCPAuthenticationType.PT_OAUTH\n            )\n            requires_auth = (\n                self.mcp_server.auth_type != MCPAuthenticationType.NONE\n                and self.mcp_server.auth_type is not None\n            )\n            has_auth_config = (\n                (self.connection_config is not None and bool(headers))\n                or bool(self._additional_headers)\n            ) or (is_passthrough_oauth and self._user_oauth_token is not None)\n\n            if requires_auth and not has_auth_config:\n                # Authentication required but not configured\n                auth_error_msg = (\n                    f\"The {self._name} tool from {self.mcp_server.name} requires authentication \"\n                    f\"but no credentials have been provided. Tell the user to use the MCP dropdown in the \"\n                    f\"chat bar to authenticate with the {self.mcp_server.name} server before \"\n                    f\"using this tool.\"\n                )\n                logger.warning(\n                    f\"Authentication required for MCP tool '{self._name}' but no credentials found\"\n                )\n\n                error_result = {\"error\": auth_error_msg}\n                llm_facing_response = json.dumps(error_result)\n\n                # Emit CustomToolDelta packet\n                self.emitter.emit(\n                    Packet(\n                        placement=placement,\n                        obj=CustomToolDelta(\n                            tool_name=self._name,\n                            response_type=\"json\",\n                            data=error_result,\n                        ),\n                    )\n                )\n\n                return ToolResponse(\n                    rich_response=CustomToolCallSummary(\n                        tool_name=self._name,\n                        response_type=\"json\",\n                        tool_result=error_result,\n                    ),\n                    llm_facing_response=llm_facing_response,\n                )\n\n            # For OAuth servers, construct OAuthClientProvider so the MCP SDK\n            # can refresh expired tokens automatically\n            auth: OAuthClientProvider | None = None\n            if (\n                self.mcp_server.auth_type == MCPAuthenticationType.OAUTH\n                and self.connection_config is not None\n                and self._user_id\n            ):\n                if self.mcp_server.transport == MCPTransport.SSE:\n                    logger.warning(\n                        f\"MCP tool '{self._name}': OAuth token refresh is not supported \"\n                        f\"for SSE transport — auth provider will be ignored. \"\n                        f\"Re-authentication may be required after token expiry.\"\n                    )\n                else:\n                    from onyx.server.features.mcp.api import UNUSED_RETURN_PATH\n                    from onyx.server.features.mcp.api import make_oauth_provider\n\n                    # user_id is the requesting user's UUID; safe here because\n                    # UNUSED_RETURN_PATH ensures redirect_handler raises immediately\n                    # and user_id is never consulted for Redis state lookups.\n                    auth = make_oauth_provider(\n                        self.mcp_server,\n                        self._user_id,\n                        UNUSED_RETURN_PATH,\n                        self.connection_config.id,\n                        None,\n                    )\n\n            tool_result = call_mcp_tool(\n                self.mcp_server.server_url,\n                self._name,\n                llm_kwargs,\n                connection_headers=headers,\n                transport=self.mcp_server.transport or MCPTransport.STREAMABLE_HTTP,\n                auth=auth,\n            )\n\n            logger.info(f\"MCP tool '{self._name}' executed successfully\")\n\n            # Format the tool result for response\n            tool_result_dict = {\"tool_result\": tool_result}\n            llm_facing_response = json.dumps(tool_result_dict)\n\n            # Emit CustomToolDelta packet\n            self.emitter.emit(\n                Packet(\n                    placement=placement,\n                    obj=CustomToolDelta(\n                        tool_name=self._name,\n                        response_type=\"json\",\n                        data=tool_result_dict,\n                    ),\n                )\n            )\n\n            return ToolResponse(\n                rich_response=CustomToolCallSummary(\n                    tool_name=self._name,\n                    response_type=\"json\",\n                    tool_result=tool_result_dict,\n                ),\n                llm_facing_response=llm_facing_response,\n            )\n\n        except Exception as e:\n            error_str = str(e).lower()\n            logger.error(f\"Failed to execute MCP tool '{self._name}': {e}\")\n\n            # Check for authentication-related errors\n            auth_error_indicators = [\n                \"401\",\n                \"unauthorized\",\n                \"authentication\",\n                \"auth\",\n                \"forbidden\",\n                \"access denied\",\n                \"invalid token\",\n                \"invalid api key\",\n                \"invalid credentials\",\n                \"please reconnect to the server\",\n            ]\n\n            is_auth_error = any(\n                indicator in error_str for indicator in auth_error_indicators\n            )\n\n            if is_auth_error:\n                auth_error_msg = (\n                    f\"Authentication failed for the {self._name} tool from {self.mcp_server.name}. \"\n                    f\"Please use the MCP dropdown in the chat bar to update your credentials \"\n                    f\"for the {self.mcp_server.name} server. Original error: {str(e)}\"\n                )\n                error_result = {\"error\": auth_error_msg}\n            else:\n                error_result = {\"error\": f\"Tool execution failed: {str(e)}\"}\n\n            llm_facing_response = json.dumps(error_result)\n\n            # Emit CustomToolDelta packet\n            self.emitter.emit(\n                Packet(\n                    placement=placement,\n                    obj=CustomToolDelta(\n                        tool_name=self._name,\n                        response_type=\"json\",\n                        data=error_result,\n                    ),\n                )\n            )\n\n            return ToolResponse(\n                rich_response=CustomToolCallSummary(\n                    tool_name=self._name,\n                    response_type=\"json\",\n                    tool_result=error_result,\n                ),\n                llm_facing_response=llm_facing_response,\n            )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/memory/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/tools/tool_implementations/memory/memory_tool.py",
    "content": "\"\"\"\nMemory Tool for storing user-specific information.\n\nThis tool allows the LLM to save memories about the user for future conversations.\nThe memories are passed in via override_kwargs which contains the current list of\nmemories that exist for the user.\n\"\"\"\n\nfrom typing import Any\nfrom typing import cast\nfrom typing import Literal\n\nfrom pydantic import BaseModel\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.llm.interfaces import LLM\nfrom onyx.secondary_llm_flows.memory_update import process_memory_update\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolDelta\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ChatMinimalTextMessage\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.memory.models import MemoryToolResponse\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\n\nMEMORY_FIELD = \"memory\"\n\n\nclass MemoryToolOverrideKwargs(BaseModel):\n    # Not including the Team Information or User Preferences because these are less likely to contribute to building the memory\n    # Things like the user's name is important because the LLM may create a memory like \"Dave prefers light mode.\" instead of\n    # User prefers light mode.\n    user_name: str | None\n    user_email: str | None\n    user_role: str | None\n    existing_memories: list[str]\n    chat_history: list[ChatMinimalTextMessage]\n\n\nclass MemoryTool(Tool[MemoryToolOverrideKwargs]):\n    NAME = \"add_memory\"\n    DISPLAY_NAME = \"Add Memory\"\n    DESCRIPTION = \"Save memories about the user for future conversations.\"\n\n    def __init__(\n        self,\n        tool_id: int,\n        emitter: Emitter,\n        llm: LLM,\n    ) -> None:\n        super().__init__(emitter=emitter)\n        self._id = tool_id\n        self.llm = llm\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @override\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        MEMORY_FIELD: {\n                            \"type\": \"string\",\n                            \"description\": (\n                                \"The text of the memory to add or update. \"\n                                \"Should be a concise, standalone statement that \"\n                                \"captures the key information. For example: \"\n                                \"'User prefers dark mode' or 'User's favorite frontend framework is React'.\"\n                            ),\n                        },\n                    },\n                    \"required\": [MEMORY_FIELD],\n                },\n            },\n        }\n\n    @override\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(Packet(placement=placement, obj=MemoryToolStart()))\n\n    @override\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: MemoryToolOverrideKwargs,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        if MEMORY_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{MEMORY_FIELD}' parameter in add_memory tool call\",\n                llm_facing_message=(\n                    f\"The add_memory tool requires a '{MEMORY_FIELD}' parameter containing \"\n                    f\"the memory text to save. Please provide like: \"\n                    f'{{\"memory\": \"User prefers dark mode\"}}'\n                ),\n            )\n        memory = cast(str, llm_kwargs[MEMORY_FIELD])\n\n        existing_memories = override_kwargs.existing_memories\n        chat_history = override_kwargs.chat_history\n\n        # Determine if this should be an add or update operation\n        memory_text, index_to_replace = process_memory_update(\n            new_memory=memory,\n            existing_memories=existing_memories,\n            chat_history=chat_history,\n            llm=self.llm,\n            user_name=override_kwargs.user_name,\n            user_email=override_kwargs.user_email,\n            user_role=override_kwargs.user_role,\n        )\n\n        logger.info(f\"New memory to be added: {memory_text}\")\n\n        operation: Literal[\"add\", \"update\"] = (\n            \"update\" if index_to_replace is not None else \"add\"\n        )\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=MemoryToolDelta(\n                    memory_text=memory_text,\n                    operation=operation,\n                    memory_id=None,\n                    index=index_to_replace,\n                ),\n            )\n        )\n\n        return ToolResponse(\n            rich_response=MemoryToolResponse(\n                memory_text=memory_text,\n                index_to_replace=index_to_replace,\n            ),\n            llm_facing_response=f\"New memory added: {memory_text}\",\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/memory/models.py",
    "content": "from pydantic import BaseModel\n\n\nclass MemoryToolResponse(BaseModel):\n    memory_text: str\n    index_to_replace: int | None  # None = add new, int = replace at 0-based index\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/firecrawl.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Sequence\nfrom concurrent.futures import ThreadPoolExecutor\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom typing import Any\n\nimport requests\n\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nFIRECRAWL_SCRAPE_URL = \"https://api.firecrawl.dev/v2/scrape\"\n_DEFAULT_MAX_WORKERS = 5\n\n# Timeout is tuned to stay under the 2-minute outer timeout in\n_DEFAULT_TIMEOUT_SECONDS = 55  # 10 max urls, 2 max batches\n\n\n@dataclass\nclass ExtractedContentFields:\n    text: str\n    title: str\n    published_date: datetime | None\n\n\nclass FirecrawlClient(WebContentProvider):\n    def __init__(\n        self,\n        api_key: str,\n        *,\n        base_url: str = FIRECRAWL_SCRAPE_URL,\n        timeout_seconds: int = _DEFAULT_TIMEOUT_SECONDS,\n    ) -> None:\n\n        self._headers = {\n            \"Authorization\": f\"Bearer {api_key}\",\n            \"Content-Type\": \"application/json\",\n        }\n        self._base_url = base_url\n        self._timeout_seconds = timeout_seconds\n        self._last_error: str | None = None\n\n    @property\n    def last_error(self) -> str | None:\n        return self._last_error\n\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        if not urls:\n            return []\n\n        max_workers = min(_DEFAULT_MAX_WORKERS, len(urls))\n        with ThreadPoolExecutor(max_workers=max_workers) as executor:\n            return list(executor.map(self._get_webpage_content_safe, urls))\n\n    # This allows the contents call to continue even if one URL fails, and return the results for the other URLs.\n    def _get_webpage_content_safe(self, url: str) -> WebContent:\n        try:\n            return self._get_webpage_content(url)\n        except Exception as exc:\n            self._last_error = str(exc)\n            return WebContent(\n                title=\"\",\n                link=url,\n                full_content=\"\",\n                published_date=None,\n                scrape_successful=False,\n            )\n\n    # Note: explicitly deciding not to retry here, Firecrawl does not seem to ever recover on failed site crawls\n    # Retrying causes other issues like timing out and dropping the entire batch when it's not needed.\n    def _get_webpage_content(self, url: str) -> WebContent:\n        payload = {\n            \"url\": url,\n            \"formats\": [\"markdown\"],\n        }\n\n        response = requests.post(\n            self._base_url,\n            headers=self._headers,\n            json=payload,\n            timeout=self._timeout_seconds,\n        )\n\n        if response.status_code != 200:\n            try:\n                error_payload = response.json()\n            except Exception:\n                error_payload = response.text\n            self._last_error = (\n                error_payload if isinstance(error_payload, str) else str(error_payload)\n            )\n\n            if 400 <= response.status_code < 500:\n                return WebContent(\n                    title=\"\",\n                    link=url,\n                    full_content=\"\",\n                    published_date=None,\n                    scrape_successful=False,\n                )\n\n            raise ValueError(\n                f\"Firecrawl fetch failed with status {response.status_code}.\"\n            )\n        else:\n            self._last_error = None\n\n        response_json = response.json()\n        extracted = self._extract_content_fields(response_json, url)\n\n        return WebContent(\n            title=extracted.title,\n            link=url,\n            full_content=extracted.text,\n            published_date=extracted.published_date,\n            scrape_successful=bool(extracted.text),\n        )\n\n    @staticmethod\n    def _extract_content_fields(\n        response_json: dict[str, Any], url: str\n    ) -> ExtractedContentFields:\n        data_section = response_json.get(\"data\") or {}\n        metadata = data_section.get(\"metadata\") or response_json.get(\"metadata\") or {}\n\n        text_candidates = [\n            data_section.get(\"markdown\"),\n            data_section.get(\"content\"),\n            data_section.get(\"text\"),\n            response_json.get(\"markdown\"),\n            response_json.get(\"content\"),\n            response_json.get(\"text\"),\n        ]\n\n        text = next((candidate for candidate in text_candidates if candidate), \"\")\n        title = metadata.get(\"title\") or response_json.get(\"title\") or \"\"\n        published_date = None\n\n        published_date_str = (\n            metadata.get(\"publishedTime\")\n            or metadata.get(\"date\")\n            or response_json.get(\"publishedTime\")\n            or response_json.get(\"date\")\n        )\n\n        if published_date_str:\n            try:\n                published_date = time_str_to_utc(published_date_str)\n            except Exception:\n                published_date = None\n\n        if not text:\n            logger.warning(f\"Firecrawl returned empty content for url={url}\")\n\n        return ExtractedContentFields(\n            text=text or \"\",\n            title=title or \"\",\n            published_date=published_date,\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/models.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\nfrom collections.abc import Sequence\nfrom datetime import datetime\n\nfrom pydantic import BaseModel\nfrom pydantic import field_validator\n\nfrom onyx.utils.url import normalize_url\n\n\nclass WebContent(BaseModel):\n    title: str\n    link: str\n    full_content: str\n    published_date: datetime | None = None\n    scrape_successful: bool = True\n\n    @field_validator(\"link\")\n    @classmethod\n    def normalize_link(cls, v: str) -> str:\n        return normalize_url(v)\n\n\nclass WebContentProvider(ABC):\n    @abstractmethod\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        pass\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/onyx_web_crawler.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Sequence\nfrom concurrent.futures import ThreadPoolExecutor\n\nfrom onyx.file_processing.html_utils import ParsedHTML\nfrom onyx.file_processing.html_utils import web_html_cleanup\nfrom onyx.tools.tool_implementations.open_url.models import (\n    WebContent,\n)\nfrom onyx.tools.tool_implementations.open_url.models import (\n    WebContentProvider,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.url import ssrf_safe_get\nfrom onyx.utils.url import SSRFException\nfrom onyx.utils.web_content import decode_html_bytes\nfrom onyx.utils.web_content import extract_pdf_text\nfrom onyx.utils.web_content import is_pdf_resource\nfrom onyx.utils.web_content import title_from_pdf_metadata\nfrom onyx.utils.web_content import title_from_url\n\nlogger = setup_logger()\n\nDEFAULT_READ_TIMEOUT_SECONDS = 15\nDEFAULT_CONNECT_TIMEOUT_SECONDS = 5\nDEFAULT_USER_AGENT = \"OnyxWebCrawler/1.0 (+https://www.onyx.app)\"\nDEFAULT_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024  # 50 MB\nDEFAULT_MAX_HTML_SIZE_BYTES = 20 * 1024 * 1024  # 20 MB\nDEFAULT_MAX_WORKERS = 5\n\n\ndef _failed_result(url: str) -> WebContent:\n    return WebContent(\n        title=\"\",\n        link=url,\n        full_content=\"\",\n        published_date=None,\n        scrape_successful=False,\n    )\n\n\nclass OnyxWebCrawler(WebContentProvider):\n    \"\"\"\n    Lightweight built-in crawler that fetches HTML directly and extracts readable text.\n    Acts as the default content provider when no external crawler (e.g. Firecrawl) is\n    configured.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        timeout_seconds: int = DEFAULT_READ_TIMEOUT_SECONDS,\n        connect_timeout_seconds: int = DEFAULT_CONNECT_TIMEOUT_SECONDS,\n        user_agent: str = DEFAULT_USER_AGENT,\n        max_pdf_size_bytes: int | None = None,\n        max_html_size_bytes: int | None = None,\n    ) -> None:\n        self._read_timeout_seconds = timeout_seconds\n        self._connect_timeout_seconds = connect_timeout_seconds\n        self._max_pdf_size_bytes = max_pdf_size_bytes\n        self._max_html_size_bytes = max_html_size_bytes\n        self._headers = {\n            \"User-Agent\": user_agent,\n            \"Accept\": \"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\",\n        }\n\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        if not urls:\n            return []\n\n        max_workers = min(DEFAULT_MAX_WORKERS, len(urls))\n        with ThreadPoolExecutor(max_workers=max_workers) as executor:\n            return list(executor.map(self._fetch_url_safe, urls))\n\n    def _fetch_url_safe(self, url: str) -> WebContent:\n        \"\"\"Wrapper that catches all exceptions so one bad URL doesn't kill the batch.\"\"\"\n        try:\n            return self._fetch_url(url)\n        except Exception as exc:\n            logger.warning(\n                \"Onyx crawler unexpected error for %s (%s)\",\n                url,\n                exc.__class__.__name__,\n            )\n            return _failed_result(url)\n\n    def _fetch_url(self, url: str) -> WebContent:\n        try:\n            response = ssrf_safe_get(\n                url,\n                headers=self._headers,\n                timeout=(self._connect_timeout_seconds, self._read_timeout_seconds),\n            )\n        except SSRFException as exc:\n            logger.error(\n                \"SSRF protection blocked request to %s (%s)\",\n                url,\n                exc.__class__.__name__,\n            )\n            return _failed_result(url)\n        except Exception as exc:\n            logger.warning(\n                \"Onyx crawler failed to fetch %s (%s)\",\n                url,\n                exc.__class__.__name__,\n            )\n            return _failed_result(url)\n\n        if response.status_code >= 400:\n            logger.warning(\"Onyx crawler received %s for %s\", response.status_code, url)\n            return _failed_result(url)\n\n        content_type = response.headers.get(\"Content-Type\", \"\")\n        content = response.content\n\n        content_sniff = content[:1024] if content else None\n        if is_pdf_resource(url, content_type, content_sniff):\n            if (\n                self._max_pdf_size_bytes is not None\n                and len(content) > self._max_pdf_size_bytes\n            ):\n                logger.warning(\n                    \"PDF content too large (%d bytes) for %s, max is %d\",\n                    len(content),\n                    url,\n                    self._max_pdf_size_bytes,\n                )\n                return _failed_result(url)\n            text_content, metadata = extract_pdf_text(content)\n            title = title_from_pdf_metadata(metadata) or title_from_url(url)\n            return WebContent(\n                title=title,\n                link=url,\n                full_content=text_content,\n                published_date=None,\n                scrape_successful=bool(text_content.strip()),\n            )\n\n        if (\n            self._max_html_size_bytes is not None\n            and len(content) > self._max_html_size_bytes\n        ):\n            logger.warning(\n                \"HTML content too large (%d bytes) for %s, max is %d\",\n                len(content),\n                url,\n                self._max_html_size_bytes,\n            )\n            return _failed_result(url)\n\n        try:\n            decoded_html = decode_html_bytes(\n                content,\n                content_type=content_type,\n                fallback_encoding=response.apparent_encoding or response.encoding,\n            )\n            parsed: ParsedHTML = web_html_cleanup(decoded_html)\n            text_content = parsed.cleaned_text or \"\"\n            title = parsed.title or \"\"\n        except Exception as exc:\n            logger.warning(\n                \"Onyx crawler failed to parse %s (%s)\", url, exc.__class__.__name__\n            )\n            text_content = \"\"\n            title = \"\"\n\n        return WebContent(\n            title=title,\n            link=url,\n            full_content=text_content,\n            published_date=None,\n            scrape_successful=bool(text_content.strip()),\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/open_url_tool.py",
    "content": "import json\nfrom collections import defaultdict\nfrom typing import Any\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.context.search.preprocessing.access_filters import (\n    build_access_filters_for_user,\n)\nfrom onyx.context.search.utils import convert_inference_sections_to_search_docs\nfrom onyx.context.search.utils import inference_section_from_chunks\nfrom onyx.db.document import fetch_document_ids_by_links\nfrom onyx.db.document import filter_existing_document_ids\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlDocuments\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlStart\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlUrls\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import OpenURLToolOverrideKwargs\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\nfrom onyx.tools.tool_implementations.open_url.url_normalization import (\n    _default_url_normalizer,\n)\nfrom onyx.tools.tool_implementations.open_url.url_normalization import normalize_url\nfrom onyx.tools.tool_implementations.open_url.utils import (\n    filter_web_contents_with_no_title_or_content,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    get_default_content_provider,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    inference_section_from_internet_page_scrape,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import MAX_CHARS_PER_URL\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.url import normalize_url as normalize_web_content_url\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\nURLS_FIELD = \"urls\"\n\n# 2 minute timeout for parallel URL fetching to prevent indefinite hangs\nOPEN_URL_TIMEOUT_SECONDS = 2 * 60\n\n# Sometimes the LLM will ask for a lot of URLs, so we need to limit the total number of characters\n# otherwise this alone will completely flood the context and degrade experience.\n# Note that if a lot of the URLs contain very little content, this results in no truncation.\nMAX_CHARS_ACROSS_URLS = 10 * MAX_CHARS_PER_URL\n\n# Minimum content length to include a document (avoid tiny snippets)\n# This is for truncation purposes, if a document is small (unless it goes into truncation flow),\n# it still gets included normally.\nMIN_CONTENT_CHARS = 200\n\n\nclass IndexedDocumentRequest(BaseModel):\n    document_id: str\n    original_url: str | None = None\n\n\nclass IndexedRetrievalResult(BaseModel):\n    sections: list[InferenceSection]\n    missing_document_ids: list[str]\n\n\ndef _dedupe_preserve_order(values: list[str]) -> list[str]:\n    seen: set[str] = set()\n    ordered: list[str] = []\n    for value in values:\n        if not value:\n            continue\n        if value in seen:\n            continue\n        seen.add(value)\n        ordered.append(value)\n    return ordered\n\n\ndef _normalize_string_list(value: str | list[str] | None) -> list[str]:\n    \"\"\"Normalize a value that may be a string, list of strings, or None into a cleaned list.\n\n    Returns a deduplicated list of non-empty stripped strings.\n    \"\"\"\n    if value is None:\n        return []\n    if isinstance(value, str):\n        value = [value]\n    return _dedupe_preserve_order(\n        [stripped for item in value if (stripped := str(item).strip())]\n    )\n\n\ndef _url_lookup_variants(url: str) -> set[str]:\n    \"\"\"Generate URL variants (with/without trailing slash) for database lookup.\n\n    This is used after normalize_url() to create variants for fuzzy matching\n    in the database, since URLs may be stored with or without trailing slashes.\n    \"\"\"\n    # Use default normalizer to strip query/fragment, then create variants\n    normalized = _default_url_normalizer(url)\n    if not normalized:\n        return set()\n    variants = {normalized}\n    if normalized.endswith(\"/\"):\n        variants.add(normalized.rstrip(\"/\"))\n    else:\n        variants.add(f\"{normalized}/\")\n    return {variant for variant in variants if variant}\n\n\ndef _lookup_document_ids_by_link(\n    urls: list[str], db_session: Session\n) -> list[IndexedDocumentRequest]:\n    \"\"\"Lookup document IDs by matching URLs against the Document.link column.\n\n    This is used as a fallback when document ID resolution fails and URL scraping fails.\n    Useful for connectors like Linear.\n    \"\"\"\n    variant_to_original: dict[str, str] = {}\n    for url in urls:\n        if not url:\n            continue\n        # Generate URL variants (normalized, with/without trailing slash)\n        variants = _url_lookup_variants(url)\n        variants.add(url)\n        # Map each variant back to the original URL\n        for variant in variants:\n            variant_to_original.setdefault(variant, url)\n\n    if not variant_to_original:\n        return []\n\n    # Query database for documents matching any of the URL variants\n    link_to_doc_id = fetch_document_ids_by_links(\n        db_session, list(variant_to_original.keys())\n    )\n\n    requests: list[IndexedDocumentRequest] = []\n    for link_value, doc_id in link_to_doc_id.items():\n        original_url = variant_to_original.get(link_value)\n        if original_url:\n            requests.append(\n                IndexedDocumentRequest(\n                    document_id=doc_id,\n                    original_url=original_url,\n                )\n            )\n    return requests\n\n\ndef _dedupe_document_requests(\n    requests: list[IndexedDocumentRequest],\n) -> list[IndexedDocumentRequest]:\n    \"\"\"Remove duplicate document requests, preserving order.\"\"\"\n    seen: set[str] = set()\n    deduped: list[IndexedDocumentRequest] = []\n    for request in requests:\n        if request.document_id in seen:\n            continue\n        seen.add(request.document_id)\n        deduped.append(request)\n    return deduped\n\n\ndef _resolve_urls_to_document_ids(\n    urls: list[str], db_session: Session\n) -> tuple[list[IndexedDocumentRequest], list[str]]:\n    \"\"\"Resolve URLs to document IDs using connector-owned normalization.\n\n    Uses the url_normalization module which delegates to each connector's\n    own normalization function to ensure URLs match the canonical Document.id\n    format used during ingestion.\n    \"\"\"\n    matches: list[IndexedDocumentRequest] = []\n    unresolved: list[str] = []\n    normalized_map: dict[str, set[str]] = {}\n\n    for url in urls:\n        # Use connector-owned normalization (reuses connector's own logic)\n        normalized = normalize_url(url)\n\n        if normalized:\n            # Some connectors (e.g. Notion) normalize to a non-URL canonical document\n            # identifier (e.g. a UUID) rather than a URL. In those cases, we should\n            # treat the normalized value as a document_id directly.\n            if normalized.startswith((\"http://\", \"https://\")):\n                # Get URL variants (with/without trailing slash) for database lookup\n                variants = _url_lookup_variants(normalized)\n                # Defensive fallback: if variant generation fails, still try the\n                # normalized URL itself.\n                normalized_map[url] = variants or {normalized}\n            else:\n                normalized_map[url] = {normalized}\n        else:\n            # No normalizer found - could be a non-URL document ID (e.g., FILE_CONNECTOR__...)\n            if url and not url.startswith((\"http://\", \"https://\")):\n                # Likely a document ID, use it directly\n                normalized_map[url] = {url}\n            else:\n                # Try generic normalization as fallback\n                variants = _url_lookup_variants(url)\n                if variants:\n                    normalized_map[url] = variants\n                else:\n                    unresolved.append(url)\n\n    if not normalized_map:\n        return matches, unresolved\n\n    # Query database with all normalized variants\n    all_variants = {\n        variant for variants in normalized_map.values() for variant in variants\n    }\n    existing_document_ids = filter_existing_document_ids(db_session, list(all_variants))\n\n    # Match URLs to documents\n    for url, variants in normalized_map.items():\n        matched_doc_id = next(\n            (variant for variant in variants if variant in existing_document_ids),\n            None,\n        )\n        if matched_doc_id:\n            matches.append(\n                IndexedDocumentRequest(\n                    document_id=matched_doc_id,\n                    original_url=url,\n                )\n            )\n        else:\n            unresolved.append(url)\n\n    return matches, unresolved\n\n\ndef _estimate_result_chars(result: dict[str, Any]) -> int:\n    \"\"\"Estimate character count from document fields in a result dict.\"\"\"\n    total = 0\n    for key, value in result.items():\n        if value is not None:\n            total += len(str(value))\n    return total\n\n\ndef _convert_sections_to_llm_string_with_citations(\n    sections: list[InferenceSection],\n    existing_citation_mapping: dict[str, int],\n    citation_start: int,\n    max_document_chars: int = MAX_CHARS_ACROSS_URLS,\n) -> tuple[str, dict[int, str]]:\n    \"\"\"Convert InferenceSections to LLM string, reusing existing citations where available.\n\n    Args:\n        sections: List of InferenceSection objects to convert.\n        existing_citation_mapping: Mapping of document_id -> citation_num for\n            documents that have already been cited.\n        citation_start: Starting citation number for new citations.\n        max_document_chars: Maximum total characters from document fields.\n            Content will be truncated to fit within this budget.\n\n    Returns:\n        Tuple of (JSON string for LLM, citation_mapping dict).\n        The citation_mapping maps citation_id -> document_id.\n    \"\"\"\n    # Build document_id to citation_id mapping, reusing existing citations\n    document_id_to_citation_id: dict[str, int] = {}\n    citation_mapping: dict[int, str] = {}\n    next_citation_id = citation_start\n\n    # First pass: assign citation_ids, reusing existing ones where available\n    for section in sections:\n        document_id = section.center_chunk.document_id\n        if document_id in document_id_to_citation_id:\n            # Already assigned in this batch\n            continue\n\n        if document_id in existing_citation_mapping:\n            # Reuse existing citation number\n            citation_id = existing_citation_mapping[document_id]\n            document_id_to_citation_id[document_id] = citation_id\n            citation_mapping[citation_id] = document_id\n        else:\n            # Assign new citation number\n            document_id_to_citation_id[document_id] = next_citation_id\n            citation_mapping[next_citation_id] = document_id\n            next_citation_id += 1\n\n    # Second pass: build results, respecting max_document_chars budget\n    results = []\n    total_chars = 0\n\n    for section in sections:\n        chunk = section.center_chunk\n        document_id = chunk.document_id\n        citation_id = document_id_to_citation_id[document_id]\n\n        # Format updated_at as ISO string if available\n        updated_at_str = None\n        if chunk.updated_at:\n            updated_at_str = chunk.updated_at.isoformat()\n\n        # Build result dict without content first to calculate metadata overhead\n        result: dict[str, Any] = {\n            \"document\": citation_id,\n            \"title\": chunk.semantic_identifier,\n        }\n        if updated_at_str is not None:\n            result[\"updated_at\"] = updated_at_str\n        if chunk.source_links:\n            link = next(iter(chunk.source_links.values()), None)\n            if link:\n                result[\"url\"] = link\n\n        if chunk.metadata:\n            result[\"metadata\"] = json.dumps(chunk.metadata, ensure_ascii=False)\n\n        # Calculate chars used by metadata fields (everything except content)\n        metadata_chars = _estimate_result_chars(result)\n\n        # Calculate remaining budget for content\n        remaining_budget = max_document_chars - total_chars - metadata_chars\n        content = section.combined_content\n\n        # Check if we have enough budget for meaningful content\n        if remaining_budget < MIN_CONTENT_CHARS:\n            # Not enough room for meaningful content, stop adding documents\n            break\n\n        # Truncate content if it exceeds remaining budget\n        if len(content) > remaining_budget:\n            content = content[:remaining_budget]\n\n        result[\"content\"] = content\n\n        result_chars = _estimate_result_chars(result)\n        results.append(result)\n        total_chars += result_chars\n\n    output = {\"results\": results}\n    return json.dumps(output, indent=2, ensure_ascii=False), citation_mapping\n\n\nclass OpenURLTool(Tool[OpenURLToolOverrideKwargs]):\n    NAME = \"open_url\"\n    DESCRIPTION = \"Open and read the content of one or more URLs.\"\n    DISPLAY_NAME = \"Open URL\"\n\n    def __init__(\n        self,\n        tool_id: int,\n        emitter: Emitter,\n        document_index: DocumentIndex,\n        user: User,\n        content_provider: WebContentProvider | None = None,\n    ) -> None:\n        \"\"\"Initialize the OpenURLTool.\n\n        Args:\n            tool_id: Unique identifier for this tool instance.\n            emitter: Emitter for streaming packets to the client.\n            document_index: Index handle for retrieving stored documents.\n            user: User context for ACL filtering, anonymous users only see public docs.\n            content_provider: Optional content provider. If not provided,\n                will use the default provider from the database or fall back\n                to the built-in Onyx web crawler.\n        \"\"\"\n        super().__init__(emitter=emitter)\n        self._id = tool_id\n        self._document_index = document_index\n        self._user = user\n\n        if content_provider is not None:\n            self._provider = content_provider\n        else:\n            provider = get_default_content_provider()\n            if provider is None:\n                raise RuntimeError(\n                    \"No web content provider available. \"\n                    \"Please configure a content provider or ensure the \"\n                    \"built-in Onyx web crawler can be initialized.\"\n                )\n            self._provider = provider\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @override\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:  # noqa: ARG003\n        \"\"\"OpenURLTool is available unless the vector DB is disabled.\n\n        The tool uses id_based_retrieval to match URLs to indexed documents,\n        which requires a vector database. When DISABLE_VECTOR_DB is set, the\n        tool is disabled entirely.\n        \"\"\"\n        from onyx.configs.app_configs import DISABLE_VECTOR_DB\n\n        if DISABLE_VECTOR_DB:\n            return False\n\n        # The tool can use either a configured provider or the built-in crawler,\n        # so it's always available when the vector DB is present\n        return True\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        URLS_FIELD: {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": (\n                                \"List of URLs to open and read, can be a single URL or multiple URLs. \"\n                                \"This will return the text content of the page(s).\"\n                            ),\n                        },\n                    },\n                    \"required\": [URLS_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        \"\"\"Emit start packet to signal tool has started.\"\"\"\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=OpenUrlStart(),\n            )\n        )\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: OpenURLToolOverrideKwargs,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        \"\"\"Execute the open URL tool to fetch content from the specified URLs.\n\n        Args:\n            placement: The placement info (turn_index and tab_index) for this tool call.\n            override_kwargs: Override arguments including starting citation number\n                and existing citation_mapping to reuse citations for already-cited URLs.\n            **llm_kwargs: Arguments provided by the LLM, including the 'urls' field.\n\n        Returns:\n                ToolResponse containing the fetched content and citation mapping.\n        \"\"\"\n        urls = _normalize_string_list(llm_kwargs.get(URLS_FIELD))\n\n        if len(urls) > override_kwargs.max_urls:\n            logger.warning(\n                f\"OpenURL tool received {len(urls)} URLs, but the max is {override_kwargs.max_urls}.\"\n            )\n            urls = urls[: override_kwargs.max_urls]\n\n        if not urls:\n            raise ToolCallException(\n                message=f\"Missing required '{URLS_FIELD}' parameter in open_url tool call\",\n                llm_facing_message=(\n                    f\"The open_url tool requires a '{URLS_FIELD}' parameter \"\n                    f\"containing an array of URLs. Please provide \"\n                    f'like: {{\"urls\": [\"https://example.com\"]}}'\n                ),\n            )\n\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=OpenUrlUrls(urls=urls),\n            )\n        )\n\n        with get_session_with_current_tenant() as db_session:\n            # Resolve URLs to document IDs for indexed retrieval\n            # Handles both raw URLs and already-normalized document IDs\n            url_requests, unresolved_urls = _resolve_urls_to_document_ids(\n                urls, db_session\n            )\n\n            all_requests = _dedupe_document_requests(url_requests)\n\n            # Create mapping from URL to document_id for result merging\n            url_to_doc_id: dict[str, str] = {}\n            for request in url_requests:\n                if request.original_url:\n                    url_to_doc_id[request.original_url] = request.document_id\n\n            # Build filters before parallel execution (session-safe)\n            filters = self._build_index_filters(db_session)\n\n            # Create wrapper function for parallel execution\n            # Filters are already built, so we just need to pass them\n            def _retrieve_indexed_with_filters(\n                requests: list[IndexedDocumentRequest],\n            ) -> IndexedRetrievalResult:\n                \"\"\"Wrapper for parallel execution with pre-built filters.\"\"\"\n                return self._retrieve_indexed_documents_with_filters(requests, filters)\n\n            # Track if timeout occurred for error reporting\n            timeout_occurred = [False]  # Using list for mutability in closure\n\n            def _timeout_handler(\n                index: int,  # noqa: ARG001\n                func: Any,  # noqa: ARG001\n                args: tuple[Any, ...],  # noqa: ARG001\n            ) -> None:\n                timeout_occurred[0] = True\n                return None\n\n            # Run indexed retrieval and crawling in parallel for all URLs\n            # This allows us to compare results and pick the best representation\n            # Note: allow_failures=True ensures we get partial results even if one\n            # task times out or fails - the other task's results will still be used\n            indexed_result, crawled_result = run_functions_tuples_in_parallel(\n                [\n                    (_retrieve_indexed_with_filters, (all_requests,)),\n                    (self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),\n                ],\n                allow_failures=True,\n                timeout=OPEN_URL_TIMEOUT_SECONDS,\n                timeout_callback=_timeout_handler,\n            )\n\n            indexed_result = indexed_result or IndexedRetrievalResult(\n                sections=[], missing_document_ids=[]\n            )\n            crawled_sections, failed_web_urls = crawled_result or ([], [])\n\n            # If timeout occurred and we have no successful results from either path,\n            # return a timeout-specific error message\n            if (\n                timeout_occurred[0]\n                and not indexed_result.sections\n                and not crawled_sections\n            ):\n                return ToolResponse(\n                    rich_response=None,\n                    llm_facing_response=\"The call to open_url timed out\",\n                )\n\n            # Last-resort: attempt link-based lookup for URLs that failed both\n            # document-ID resolution and crawling.\n            failed_web_urls = self._fallback_link_lookup(\n                unresolved_urls=unresolved_urls,\n                failed_web_urls=failed_web_urls,\n                db_session=db_session,\n                indexed_result=indexed_result,\n                url_to_doc_id=url_to_doc_id,\n                filters=filters,\n            )\n\n            # Merge results: prefer indexed when available, fallback to crawled\n            inference_sections = self._merge_indexed_and_crawled_results(\n                indexed_result.sections,\n                crawled_sections,\n                url_to_doc_id,\n                urls,\n                failed_web_urls,\n            )\n\n        if not inference_sections:\n            failure_descriptions = []\n            if indexed_result.missing_document_ids:\n                failure_descriptions.append(\n                    \"documents \"\n                    + \", \".join(sorted(set(indexed_result.missing_document_ids)))\n                )\n            if failed_web_urls:\n                cleaned_failures = sorted({url for url in failed_web_urls if url})\n                if cleaned_failures:\n                    failure_descriptions.append(\"URLs \" + \", \".join(cleaned_failures))\n            failure_msg = (\n                \"Failed to fetch content from \" + \" and \".join(failure_descriptions)\n                if failure_descriptions\n                else \"Failed to fetch content from the requested resources.\"\n            )\n            logger.warning(f\"OpenURL tool failed: {failure_msg}\")\n            return ToolResponse(rich_response=None, llm_facing_response=failure_msg)\n\n        for section in inference_sections:\n            chunk = section.center_chunk\n            if not chunk.semantic_identifier and chunk.source_links:\n                chunk.semantic_identifier = chunk.source_links[0]\n\n        # Convert sections to search docs, preserving source information\n        search_docs = convert_inference_sections_to_search_docs(\n            inference_sections, is_internet=False\n        )\n\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=OpenUrlDocuments(documents=search_docs),\n            )\n        )\n\n        # Note that with this call, some contents may be truncated or dropped so what the LLM sees may not be the entire set\n        # That said, it is still the best experience to show all the docs that were fetched, even if the LLM on rare\n        # occasions only actually sees a subset.\n        docs_str, citation_mapping = _convert_sections_to_llm_string_with_citations(\n            sections=inference_sections,\n            existing_citation_mapping=override_kwargs.citation_mapping,\n            citation_start=override_kwargs.starting_citation_num,\n        )\n\n        return ToolResponse(\n            rich_response=SearchDocsResponse(\n                search_docs=search_docs,\n                citation_mapping=citation_mapping,\n            ),\n            llm_facing_response=docs_str,\n        )\n\n    def _fallback_link_lookup(\n        self,\n        unresolved_urls: list[str],\n        failed_web_urls: list[str],\n        db_session: Session,\n        indexed_result: IndexedRetrievalResult,\n        url_to_doc_id: dict[str, str],\n        filters: IndexFilters,\n    ) -> list[str]:\n        \"\"\"Attempt link-based lookup for URLs that failed both document-ID resolution and crawling.\n\n        Args:\n            unresolved_urls: URLs that couldn't be resolved to document IDs\n            failed_web_urls: URLs that failed crawling\n            db_session: Database session\n            indexed_result: Result object to update with found sections\n            url_to_doc_id: Mapping to update with resolved URLs\n            filters: Pre-built index filters for document retrieval\n\n        Returns:\n            Updated list of failed_web_urls (with resolved URLs removed)\n        \"\"\"\n        if not unresolved_urls or not failed_web_urls:\n            return failed_web_urls\n\n        failed_set = {url for url in failed_web_urls if url}\n        fallback_urls = sorted(set(unresolved_urls).intersection(failed_set))\n\n        if not fallback_urls:\n            return failed_web_urls\n\n        fallback_requests = _lookup_document_ids_by_link(fallback_urls, db_session)\n\n        if not fallback_requests:\n            return failed_web_urls\n\n        deduped_fallback_requests = _dedupe_document_requests(fallback_requests)\n        fallback_result = self._retrieve_indexed_documents_with_filters(\n            deduped_fallback_requests, filters\n        )\n\n        if fallback_result.sections:\n            indexed_result.sections.extend(fallback_result.sections)\n            for request in deduped_fallback_requests:\n                if request.original_url:\n                    url_to_doc_id[request.original_url] = request.document_id\n\n        if fallback_result.missing_document_ids:\n            indexed_result.missing_document_ids.extend(\n                fallback_result.missing_document_ids\n            )\n\n        resolved_links = {request.original_url for request in deduped_fallback_requests}\n        return [url for url in failed_web_urls if url not in resolved_links]\n\n    def _retrieve_indexed_documents_with_filters(\n        self,\n        all_requests: list[IndexedDocumentRequest],\n        filters: IndexFilters,\n    ) -> IndexedRetrievalResult:\n        \"\"\"Retrieve indexed documents using pre-built filters (for parallel execution).\"\"\"\n        if not all_requests:\n            return IndexedRetrievalResult(sections=[], missing_document_ids=[])\n\n        document_ids = [req.document_id for req in all_requests]\n        chunk_requests = [\n            VespaChunkRequest(document_id=request.document_id)\n            for request in all_requests\n        ]\n\n        try:\n            chunks = self._document_index.id_based_retrieval(\n                chunk_requests=chunk_requests,\n                filters=filters,\n                batch_retrieval=True,\n            )\n        except Exception as exc:\n            logger.warning(\n                f\"Indexed retrieval failed for document IDs {document_ids}: {exc}\",\n                exc_info=True,\n            )\n            return IndexedRetrievalResult(\n                sections=[],\n                missing_document_ids=[req.document_id for req in all_requests],\n            )\n\n        chunk_map: dict[str, list] = defaultdict(list)\n        for chunk in chunks:\n            chunk_map[chunk.document_id].append(chunk)\n\n        sections: list[InferenceSection] = []\n        missing: list[str] = []\n\n        for request in all_requests:\n            doc_chunks = chunk_map.get(request.document_id)\n            if not doc_chunks:\n                missing.append(request.document_id)\n                continue\n            doc_chunks.sort(key=lambda chunk: chunk.chunk_id)\n            section = inference_section_from_chunks(\n                center_chunk=doc_chunks[0],\n                chunks=doc_chunks,\n            )\n            if section:\n                sections.append(section)\n            else:\n                missing.append(request.document_id)\n\n        return IndexedRetrievalResult(sections=sections, missing_document_ids=missing)\n\n    def _build_index_filters(self, db_session: Session) -> IndexFilters:\n        access_control_list = build_access_filters_for_user(self._user, db_session)\n        return IndexFilters(\n            source_type=None,\n            document_set=None,\n            time_cutoff=None,\n            tags=None,\n            access_control_list=access_control_list,\n            tenant_id=get_current_tenant_id() if MULTI_TENANT else None,\n            project_id_filter=None,\n        )\n\n    def _merge_indexed_and_crawled_results(\n        self,\n        indexed_sections: list[InferenceSection],\n        crawled_sections: list[InferenceSection],\n        url_to_doc_id: dict[str, str],\n        all_urls: list[str],\n        failed_web_urls: list[str],  # noqa: ARG002\n    ) -> list[InferenceSection]:\n        \"\"\"Merge indexed and crawled results, preferring indexed when available.\n\n        For each URL:\n        - If indexed result exists and has content, use it (better/cleaner representation)\n        - Otherwise, use crawled result if available\n        - If both fail, the URL will be in failed_web_urls for error reporting\n        \"\"\"\n        # Map indexed sections by document_id\n        indexed_by_doc_id: dict[str, InferenceSection] = {}\n        for section in indexed_sections:\n            indexed_by_doc_id[section.center_chunk.document_id] = section\n\n        # Map crawled sections by URL (from source_links)\n        crawled_by_url: dict[str, InferenceSection] = {}\n        for section in crawled_sections:\n            # Extract URL from source_links (crawled sections store URL here)\n            if section.center_chunk.source_links:\n                url = next(iter(section.center_chunk.source_links.values()))\n                if url:\n                    crawled_by_url[url] = section\n\n        merged_sections: list[InferenceSection] = []\n        used_doc_ids: set[str] = set()\n\n        # Process URLs: prefer indexed, fallback to crawled\n        for url in all_urls:\n            doc_id = url_to_doc_id.get(url)\n            indexed_section = indexed_by_doc_id.get(doc_id) if doc_id else None\n            # WebContent.link is normalized (query/fragment stripped). Match on the\n            # same normalized form to avoid dropping successful crawl results.\n            crawled_section = crawled_by_url.get(normalize_web_content_url(url))\n\n            if indexed_section and indexed_section.combined_content:\n                # Prefer indexed\n                merged_sections.append(indexed_section)\n                if doc_id:\n                    used_doc_ids.add(doc_id)\n            elif crawled_section and crawled_section.combined_content:\n                # Fallback to crawled if indexed unavailable or empty\n                # (e.g., auth issues, document not indexed, etc.)\n                merged_sections.append(crawled_section)\n\n        # Add any indexed sections that weren't matched to URLs\n        for doc_id, section in indexed_by_doc_id.items():\n            # Skip if this doc_id was already used for a URL\n            if doc_id not in used_doc_ids:\n                merged_sections.append(section)\n\n        return merged_sections\n\n    def _fetch_web_content(\n        self, urls: list[str], url_snippet_map: dict[str, str]\n    ) -> tuple[list[InferenceSection], list[str]]:\n        if not urls:\n            return [], []\n\n        raw_web_contents = self._provider.contents(urls)\n        # Treat \"no title and no content\" as a failure for that URL, but don't\n        # include the empty entry in downstream prompting/sections.\n        failed_urls: list[str] = [\n            content.link\n            for content in raw_web_contents\n            if not content.title.strip() and not content.full_content.strip()\n        ]\n        web_contents = filter_web_contents_with_no_title_or_content(raw_web_contents)\n        sections: list[InferenceSection] = []\n\n        for content in web_contents:\n            # Check if content is insufficient (e.g., \"Loading...\" or too short)\n            text_stripped = content.full_content.strip()\n            is_insufficient = (\n                not text_stripped\n                # TODO: Likely a behavior of our scraper, understand why this special pattern occurs\n                or text_stripped.lower() == \"loading...\"\n                or len(text_stripped) < 50\n            )\n\n            if (\n                content.scrape_successful\n                and content.full_content\n                and not is_insufficient\n            ):\n                sections.append(\n                    inference_section_from_internet_page_scrape(\n                        content, url_snippet_map.get(content.link, \"\")\n                    )\n                )\n            else:\n                # TODO: Slight improvement - if failed URL reasons are passed back to the LLM\n                # for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would\n                # happen again if it tried to crawl Reddit again.\n                failed_urls.append(content.link or \"\")\n\n        return sections, failed_urls\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py",
    "content": "import unicodedata\n\nfrom pydantic import BaseModel\nfrom rapidfuzz import fuzz\nfrom rapidfuzz import utils\n\nfrom onyx.utils.text_processing import is_zero_width_char\nfrom onyx.utils.text_processing import normalize_char\n\n\nclass SnippetMatchResult(BaseModel):\n    snippet_located: bool\n\n    start_idx: int = -1\n    end_idx: int = -1\n\n\nNegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)\n\n\ndef find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:\n    \"\"\"\n    Finds where the snippet is located in the content.\n\n    Strategy:\n    1. Normalize the snippet & attempt to find it in the content\n    2. Perform a token based fuzzy search for the snippet in the content\n\n    Notes:\n     - If there are multiple matches of snippet, we choose the first normalised occurrence\n    \"\"\"\n    if not snippet or not content:\n        return NegativeSnippetMatchResult\n\n    result = _normalize_and_match(content, snippet)\n    if result.snippet_located:\n        return result\n\n    result = _token_based_match(content, snippet)\n    if result.snippet_located:\n        return result\n\n    return NegativeSnippetMatchResult\n\n\ndef _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:\n    \"\"\"\n    Normalizes the snippet & content, then performs a direct string match.\n    \"\"\"\n    normalized_content, content_map = _normalize_text_with_mapping(content)\n    normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)\n\n    if not normalized_content or not normalized_snippet:\n        return NegativeSnippetMatchResult\n\n    pos = normalized_content.find(normalized_snippet)\n    if pos != -1:\n        original_start = content_map[pos]\n\n        # Account for leading characters stripped from snippet during normalization\n        # (e.g., leading punctuation like \"[![]![]]\" that was removed)\n        if url_snippet_map:\n            first_snippet_orig_pos = url_snippet_map[0]\n            if first_snippet_orig_pos > 0:\n                # There were leading characters stripped from snippet\n                # Extend start position backwards to include them from content\n                original_start = max(original_start - first_snippet_orig_pos, 0)\n\n        # Determine end position, including any trailing characters that were\n        # normalized away (e.g., punctuation)\n        match_end_norm = pos + len(normalized_snippet)\n        if match_end_norm >= len(content_map):\n            # Match extends to end of normalized content - include all trailing chars\n            original_end = len(content) - 1\n        else:\n            # Match is in the middle - end at character before next normalized char\n            original_end = content_map[match_end_norm] - 1\n\n        # Account for trailing characters stripped from snippet during normalization\n        # (e.g., trailing punctuation like \"\\n[\" that was removed)\n        if url_snippet_map:\n            last_snippet_orig_pos = url_snippet_map[-1]\n            trailing_stripped = len(snippet) - last_snippet_orig_pos - 1\n            if trailing_stripped > 0:\n                # Extend end position to include trailing characters from content\n                # that correspond to the stripped trailing snippet characters\n                original_end = min(original_end + trailing_stripped, len(content) - 1)\n\n        return SnippetMatchResult(\n            snippet_located=True,\n            start_idx=original_start,\n            end_idx=original_end,\n        )\n\n    return NegativeSnippetMatchResult\n\n\ndef _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:\n    \"\"\"\n    Text normalization that maintains position mapping.\n\n    Returns:\n        tuple: (normalized_text, position_map)\n        - position_map[i] gives the original position for normalized position i\n    \"\"\"\n    if not text:\n        return \"\", []\n\n    original_text = text\n\n    # Step 1: NFC normalization with position mapping\n    nfc_text = unicodedata.normalize(\"NFC\", text)\n\n    # Map NFD positions → original positions.\n    # NFD only decomposes, so each original char produces 1+ NFD chars.\n    nfd_to_orig: list[int] = []\n    for orig_idx, orig_char in enumerate(original_text):\n        nfd_of_char = unicodedata.normalize(\"NFD\", orig_char)\n        for _ in nfd_of_char:\n            nfd_to_orig.append(orig_idx)\n\n    # Map NFC positions → NFD positions.\n    # Each NFC char, when decomposed, tells us exactly how many NFD\n    # chars it was composed from.\n    nfc_to_orig: list[int] = []\n    nfd_idx = 0\n    for nfc_char in nfc_text:\n        if nfd_idx < len(nfd_to_orig):\n            nfc_to_orig.append(nfd_to_orig[nfd_idx])\n        else:\n            nfc_to_orig.append(len(original_text) - 1)\n        nfd_of_nfc = unicodedata.normalize(\"NFD\", nfc_char)\n        nfd_idx += len(nfd_of_nfc)\n\n    # Work with NFC text from here\n    text = nfc_text\n\n    html_entities = {\n        \"&nbsp;\": \" \",\n        \"&#160;\": \" \",\n        \"&amp;\": \"&\",\n        \"&lt;\": \"<\",\n        \"&gt;\": \">\",\n        \"&quot;\": '\"',\n        \"&apos;\": \"'\",\n        \"&#39;\": \"'\",\n        \"&#x27;\": \"'\",\n        \"&ndash;\": \"-\",\n        \"&mdash;\": \"-\",\n        \"&hellip;\": \"...\",\n        \"&#xB0;\": \"°\",\n        \"&#xBA;\": \"°\",\n        \"&zwj;\": \"\",\n    }\n\n    # Sort entities by length (longest first) for greedy matching\n    sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)\n\n    result_chars = []\n    result_map = []\n    i = 0\n    last_was_space = True  # Track to avoid leading spaces\n\n    while i < len(text):\n        # Convert NFC position to original position\n        orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1\n        char = text[i]\n        output = None\n        step = 1\n\n        # Check for HTML entities first (greedy match)\n        for entity in sorted_entities:\n            if text[i : i + len(entity)] == entity:\n                output = html_entities[entity]\n                step = len(entity)\n                break\n\n        # If no entity matched, process single character\n        if output is None:\n            # Skip zero-width characters\n            if is_zero_width_char(char):\n                i += 1\n                continue\n\n            output = normalize_char(char)\n\n        # Add output to result, normalizing each character from entity output\n        if output:\n            for out_char in output:\n                # Normalize entity output the same way as regular chars\n                normalized = normalize_char(out_char)\n\n                # Handle whitespace collapsing\n                if normalized == \" \":\n                    if not last_was_space:\n                        result_chars.append(\" \")\n                        result_map.append(orig_pos)\n                        last_was_space = True\n                else:\n                    result_chars.append(normalized)\n                    result_map.append(orig_pos)\n                    last_was_space = False\n\n        i += step\n\n    # Remove trailing space if present\n    if result_chars and result_chars[-1] == \" \":\n        result_chars.pop()\n        result_map.pop()\n\n    return \"\".join(result_chars), result_map\n\n\ndef _token_based_match(\n    content: str,\n    snippet: str,\n    min_threshold: float = 0.8,\n) -> SnippetMatchResult:\n    \"\"\"\n    Performs a token based fuzzy search for the snippet in the content.\n\n    min_threshold exists in the range [0, 1]\n    \"\"\"\n    if not content or not snippet:\n        return NegativeSnippetMatchResult\n\n    res = fuzz.partial_ratio_alignment(\n        content, snippet, processor=utils.default_process\n    )\n\n    if not res:\n        return NegativeSnippetMatchResult\n\n    score = res.score\n\n    if score >= (min_threshold * 100):\n        start_idx = res.src_start\n        end_idx = res.src_end\n\n        return SnippetMatchResult(\n            snippet_located=True,\n            start_idx=start_idx,\n            end_idx=end_idx,\n        )\n\n    return NegativeSnippetMatchResult\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/url_normalization.py",
    "content": "\"\"\"URL normalization for OpenURL tool.\n\nEach connector implements normalize_url() as a class method to normalize URLs to match\nthe canonical Document.id format used during ingestion. This ensures OpenURL can find\nindexed documents.\n\nUsage:\n    normalized = normalize_url(\"https://docs.google.com/document/d/123/edit\")\n    # Returns: \"https://docs.google.com/document/d/123\"\n\"\"\"\n\nfrom urllib.parse import urlparse\nfrom urllib.parse import urlunparse\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.factory import identify_connector_class\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _default_url_normalizer(url: str) -> str | None:\n    parsed = urlparse(url)\n    if not parsed.netloc:\n        return None\n\n    # Strip query params and fragment, normalize trailing slash\n    scheme = parsed.scheme or \"https\"\n    netloc = parsed.netloc.lower()\n    path = parsed.path.rstrip(\"/\")\n    params = \"\"  # URL params (rarely used)\n    query = \"\"  # Query string (removed)\n    fragment = \"\"  # Fragment/hash (removed)\n\n    normalized = urlunparse((scheme, netloc, path, params, query, fragment))\n    return normalized or None\n\n\ndef normalize_url(url: str, source_type: DocumentSource | None = None) -> str | None:\n    \"\"\"Normalize a URL to match the canonical Document.id format.\n\n    Dispatches to the connector's normalize_url() method or falls back to default normalizer.\n    \"\"\"\n    # If source_type not provided, try to detect it\n    if source_type is None:\n        source_type = _detect_source_type(url)\n\n    if source_type:\n        try:\n            connector_class = identify_connector_class(source_type)\n            result = connector_class.normalize_url(url)\n\n            if result.use_default:\n                return _default_url_normalizer(url)\n            return result.normalized_url  # Could be None if failed\n        except Exception as exc:\n            logger.debug(\n                \"Failed to normalize URL for source %s: %s. Using default normalizer.\",\n                source_type,\n                exc,\n            )\n\n    # No source_type or connector not found - fall back to default\n    return _default_url_normalizer(url)\n\n\ndef _detect_source_type(url: str) -> DocumentSource | None:\n    \"\"\"Detect DocumentSource from URL patterns (simple heuristic).\"\"\"\n    parsed = urlparse(url)\n    netloc = parsed.netloc.lower()\n    path = parsed.path.lower()\n\n    if \"docs.google.com\" in netloc or \"drive.google.com\" in netloc:\n        return DocumentSource.GOOGLE_DRIVE\n    if \"notion.so\" in netloc or \"notion.site\" in netloc:\n        return DocumentSource.NOTION\n    if \"atlassian.net\" in netloc:\n        # Check path for Jira indicators (more specific than netloc)\n        if \"/jira/\" in path or \"/browse/\" in path or \"jira\" in netloc:\n            return DocumentSource.JIRA\n        return DocumentSource.CONFLUENCE\n    if \"github.com\" in netloc:\n        return DocumentSource.GITHUB\n    if \"gitlab.com\" in netloc:\n        return DocumentSource.GITLAB\n    if \"sharepoint.com\" in netloc:\n        return DocumentSource.SHAREPOINT\n    if \"slack.com\" in netloc:\n        return DocumentSource.SLACK\n    if \"linear.app\" in netloc:\n        return DocumentSource.LINEAR\n\n    return None\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/open_url/utils.py",
    "content": "from onyx.tools.tool_implementations.open_url.models import WebContent\n\n\ndef filter_web_contents_with_no_title_or_content(\n    contents: list[WebContent],\n) -> list[WebContent]:\n    \"\"\"Filter out content entries that have neither a title nor any extracted text.\n\n    Some content providers can return placeholder/partial entries that only include a URL.\n    Downstream uses these fields for display + prompting; drop empty ones centrally\n    rather than duplicating checks across provider clients.\n    \"\"\"\n    filtered: list[WebContent] = []\n    for content in contents:\n        if content.title.strip() or content.full_content.strip():\n            filtered.append(content)\n    return filtered\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/python/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/tools/tool_implementations/python/code_interpreter_client.py",
    "content": "from __future__ import annotations\n\nimport json\nimport time\nfrom collections.abc import Generator\nfrom typing import Literal\nfrom typing import TypedDict\nfrom typing import Union\n\nimport requests\nfrom pydantic import BaseModel\n\nfrom onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_HEALTH_CACHE_TTL_SECONDS = 30\n_health_cache: dict[str, tuple[float, bool]] = {}\n\n\nclass FileInput(TypedDict):\n    \"\"\"Input file to be staged in execution workspace\"\"\"\n\n    path: str\n    file_id: str\n\n\nclass WorkspaceFile(BaseModel):\n    \"\"\"File in execution workspace\"\"\"\n\n    path: str\n    kind: Literal[\"file\", \"directory\"]\n    file_id: str | None = None\n\n\nclass ExecuteResponse(BaseModel):\n    \"\"\"Response from code execution\"\"\"\n\n    stdout: str\n    stderr: str\n    exit_code: int | None\n    timed_out: bool\n    duration_ms: int\n    files: list[WorkspaceFile]\n\n\nclass StreamOutputEvent(BaseModel):\n    \"\"\"SSE 'output' event: a chunk of stdout or stderr\"\"\"\n\n    stream: Literal[\"stdout\", \"stderr\"]\n    data: str\n\n\nclass StreamResultEvent(BaseModel):\n    \"\"\"SSE 'result' event: final execution result\"\"\"\n\n    exit_code: int | None\n    timed_out: bool\n    duration_ms: int\n    files: list[WorkspaceFile]\n\n\nclass StreamErrorEvent(BaseModel):\n    \"\"\"SSE 'error' event: execution-level error\"\"\"\n\n    message: str\n\n\nStreamEvent = Union[StreamOutputEvent, StreamResultEvent, StreamErrorEvent]\n\n_SSE_EVENT_MAP: dict[\n    str, type[StreamOutputEvent | StreamResultEvent | StreamErrorEvent]\n] = {\n    \"output\": StreamOutputEvent,\n    \"result\": StreamResultEvent,\n    \"error\": StreamErrorEvent,\n}\n\n\nclass CodeInterpreterClient:\n    \"\"\"Client for Code Interpreter service\"\"\"\n\n    def __init__(self, base_url: str | None = CODE_INTERPRETER_BASE_URL):\n        if not base_url:\n            raise ValueError(\"CODE_INTERPRETER_BASE_URL not configured\")\n        self.base_url = base_url.rstrip(\"/\")\n        self.session = requests.Session()\n        self._closed = False\n\n    def __enter__(self) -> CodeInterpreterClient:\n        return self\n\n    def __exit__(self, *args: object) -> None:\n        self.close()\n\n    def close(self) -> None:\n        if self._closed:\n            return\n        self.session.close()\n        self._closed = True\n\n    def _build_payload(\n        self,\n        code: str,\n        stdin: str | None,\n        timeout_ms: int,\n        files: list[FileInput] | None,\n    ) -> dict:\n        payload: dict = {\n            \"code\": code,\n            \"timeout_ms\": timeout_ms,\n        }\n        if stdin is not None:\n            payload[\"stdin\"] = stdin\n        if files:\n            payload[\"files\"] = files\n        return payload\n\n    def health(self, use_cache: bool = False) -> bool:\n        \"\"\"Check if the Code Interpreter service is healthy\n\n        Args:\n            use_cache: When True, return a cached result if available and\n                       within the TTL window. The cache is always populated\n                       after a live request regardless of this flag.\n        \"\"\"\n        if use_cache:\n            cached = _health_cache.get(self.base_url)\n            if cached is not None:\n                cached_at, cached_result = cached\n                if time.monotonic() - cached_at < _HEALTH_CACHE_TTL_SECONDS:\n                    return cached_result\n\n        url = f\"{self.base_url}/health\"\n        try:\n            response = self.session.get(url, timeout=5)\n            response.raise_for_status()\n            result = response.json().get(\"status\") == \"ok\"\n        except Exception as e:\n            logger.warning(f\"Exception caught when checking health, e={e}\")\n            result = False\n\n        _health_cache[self.base_url] = (time.monotonic(), result)\n        return result\n\n    def execute(\n        self,\n        code: str,\n        stdin: str | None = None,\n        timeout_ms: int = 30000,\n        files: list[FileInput] | None = None,\n    ) -> ExecuteResponse:\n        \"\"\"Execute Python code (batch)\"\"\"\n        url = f\"{self.base_url}/v1/execute\"\n        payload = self._build_payload(code, stdin, timeout_ms, files)\n\n        response = self.session.post(url, json=payload, timeout=timeout_ms / 1000 + 10)\n        response.raise_for_status()\n\n        return ExecuteResponse(**response.json())\n\n    def execute_streaming(\n        self,\n        code: str,\n        stdin: str | None = None,\n        timeout_ms: int = 30000,\n        files: list[FileInput] | None = None,\n    ) -> Generator[StreamEvent, None, None]:\n        \"\"\"Execute Python code with streaming SSE output.\n\n        Yields StreamEvent objects (StreamOutputEvent, StreamResultEvent,\n        StreamErrorEvent) as execution progresses. Falls back to batch\n        execution if the streaming endpoint is not available (older\n        code-interpreter versions).\n        \"\"\"\n        url = f\"{self.base_url}/v1/execute/stream\"\n        payload = self._build_payload(code, stdin, timeout_ms, files)\n\n        response = self.session.post(\n            url,\n            json=payload,\n            stream=True,\n            timeout=timeout_ms / 1000 + 10,\n        )\n\n        if response.status_code == 404:\n            logger.info(\n                \"Streaming endpoint not available, falling back to batch execution\"\n            )\n            response.close()\n            yield from self._batch_as_stream(code, stdin, timeout_ms, files)\n            return\n\n        try:\n            response.raise_for_status()\n            yield from self._parse_sse(response)\n        finally:\n            response.close()\n\n    def _parse_sse(\n        self, response: requests.Response\n    ) -> Generator[StreamEvent, None, None]:\n        \"\"\"Parse SSE streaming response into StreamEvent objects.\n\n        Expected format per event:\n            event: <type>\n            data: <json>\n            <blank line>\n        \"\"\"\n        event_type: str | None = None\n        data_lines: list[str] = []\n\n        for line in response.iter_lines(decode_unicode=True):\n            if line is None:\n                continue\n\n            if line == \"\":\n                # Blank line marks end of an SSE event\n                if event_type is not None and data_lines:\n                    data = \"\\n\".join(data_lines)\n                    model_cls = _SSE_EVENT_MAP.get(event_type)\n                    if model_cls is not None:\n                        yield model_cls(**json.loads(data))\n                    else:\n                        logger.warning(f\"Unknown SSE event type: {event_type}\")\n                event_type = None\n                data_lines = []\n            elif line.startswith(\"event:\"):\n                event_type = line[len(\"event:\") :].strip()\n            elif line.startswith(\"data:\"):\n                data_lines.append(line[len(\"data:\") :].strip())\n\n        if event_type is not None or data_lines:\n            logger.warning(\n                f\"SSE stream ended with incomplete event: event_type={event_type}, data_lines={data_lines}\"\n            )\n\n    def _batch_as_stream(\n        self,\n        code: str,\n        stdin: str | None,\n        timeout_ms: int,\n        files: list[FileInput] | None,\n    ) -> Generator[StreamEvent, None, None]:\n        \"\"\"Execute via batch endpoint and yield results as stream events.\"\"\"\n        result = self.execute(code, stdin, timeout_ms, files)\n\n        if result.stdout:\n            yield StreamOutputEvent(stream=\"stdout\", data=result.stdout)\n        if result.stderr:\n            yield StreamOutputEvent(stream=\"stderr\", data=result.stderr)\n        yield StreamResultEvent(\n            exit_code=result.exit_code,\n            timed_out=result.timed_out,\n            duration_ms=result.duration_ms,\n            files=result.files,\n        )\n\n    def upload_file(self, file_content: bytes, filename: str) -> str:\n        \"\"\"Upload file to Code Interpreter and return file_id\"\"\"\n        url = f\"{self.base_url}/v1/files\"\n\n        files = {\"file\": (filename, file_content)}\n        response = self.session.post(url, files=files, timeout=30)\n        response.raise_for_status()\n\n        return response.json()[\"file_id\"]\n\n    def download_file(self, file_id: str) -> bytes:\n        \"\"\"Download file from Code Interpreter\"\"\"\n        url = f\"{self.base_url}/v1/files/{file_id}\"\n\n        response = self.session.get(url, timeout=30)\n        response.raise_for_status()\n\n        return response.content\n\n    def delete_file(self, file_id: str) -> None:\n        \"\"\"Delete file from Code Interpreter\"\"\"\n        url = f\"{self.base_url}/v1/files/{file_id}\"\n\n        response = self.session.delete(url, timeout=10)\n        response.raise_for_status()\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/python/python_tool.py",
    "content": "import hashlib\nimport mimetypes\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\n\nfrom pydantic import TypeAdapter\nfrom sqlalchemy.orm import Session\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL\nfrom onyx.configs.app_configs import CODE_INTERPRETER_DEFAULT_TIMEOUT_MS\nfrom onyx.configs.app_configs import CODE_INTERPRETER_MAX_OUTPUT_LENGTH\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.code_interpreter import fetch_code_interpreter_server\nfrom onyx.file_store.utils import build_full_frontend_file_url\nfrom onyx.file_store.utils import get_default_file_store\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import PythonToolDelta\nfrom onyx.server.query_and_chat.streaming_models import PythonToolStart\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import LlmPythonExecutionResult\nfrom onyx.tools.models import PythonExecutionFile\nfrom onyx.tools.models import PythonToolOverrideKwargs\nfrom onyx.tools.models import PythonToolRichResponse\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    CodeInterpreterClient,\n)\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import FileInput\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamErrorEvent,\n)\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamOutputEvent,\n)\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamResultEvent,\n)\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nCODE_FIELD = \"code\"\n\n\ndef _truncate_output(output: str, max_length: int, label: str = \"output\") -> str:\n    \"\"\"\n    Truncate output string to max_length and append truncation message if needed.\n\n    Args:\n        output: The original output string to truncate\n        max_length: Maximum length before truncation\n        label: Label for logging (e.g., \"stdout\", \"stderr\")\n\n    Returns:\n        Truncated string with truncation message appended if truncated\n    \"\"\"\n    truncated = output[:max_length]\n    if len(output) > max_length:\n        truncated += (\n            f\"\\n... [output truncated, {len(output) - max_length} characters omitted]\"\n        )\n        logger.debug(f\"Truncated {label}: {truncated}\")\n    return truncated\n\n\nclass PythonTool(Tool[PythonToolOverrideKwargs]):\n    \"\"\"\n    Python code execution tool using an external Code Interpreter service.\n\n    This tool allows executing Python code in a secure, isolated sandbox environment.\n    It supports uploading files from the chat session and downloading generated files.\n    \"\"\"\n\n    NAME = \"python\"\n    DISPLAY_NAME = \"Code Interpreter\"\n    DESCRIPTION = \"Execute Python code in an isolated sandbox environment.\"\n\n    def __init__(self, tool_id: int, emitter: Emitter) -> None:\n        super().__init__(emitter=emitter)\n        self._id = tool_id\n        # Cache of (filename, content_hash) -> ci_file_id to avoid re-uploading\n        # the same file on every tool call iteration within the same agent session.\n        # Filename is included in the key so two files with identical bytes but\n        # different names each get their own upload slot.\n        # TTL assumption: code-interpreter file TTLs (typically hours) greatly\n        # exceed the lifetime of a single agent session (at most MAX_LLM_CYCLES\n        # iterations, typically a few minutes), so stale-ID eviction is not needed.\n        self._uploaded_file_cache: dict[tuple[str, str], str] = {}\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @override\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:\n        if not CODE_INTERPRETER_BASE_URL:\n            return False\n        server = fetch_code_interpreter_server(db_session)\n        if not server.server_enabled:\n            return False\n\n        with CodeInterpreterClient() as client:\n            return client.health(use_cache=True)\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        CODE_FIELD: {\n                            \"type\": \"string\",\n                            \"description\": \"Python source code to execute\",\n                        },\n                    },\n                    \"required\": [CODE_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        \"\"\"Emit start packet for this tool. Code will be emitted in run() method.\"\"\"\n        # Note: PythonToolStart requires code, but we don't have it in emit_start\n        # The code is available in run() method via llm_kwargs\n        # We'll emit the start packet in run() instead\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: PythonToolOverrideKwargs,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        \"\"\"\n        Execute Python code in the Code Interpreter service.\n\n        Args:\n            placement: The placement info (turn_index and tab_index) for this tool call.\n            override_kwargs: Contains chat_files to stage for execution\n            **llm_kwargs: Contains 'code' parameter from LLM\n\n        Returns:\n            ToolResponse with execution results\n        \"\"\"\n        if CODE_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{CODE_FIELD}' parameter in python tool call\",\n                llm_facing_message=(\n                    f\"The python tool requires a '{CODE_FIELD}' parameter containing \"\n                    f\"the Python code to execute. Please provide like: \"\n                    f'{{\"code\": \"print(\\'Hello, world!\\')\"}}'\n                ),\n            )\n        code = cast(str, llm_kwargs[CODE_FIELD])\n        chat_files = override_kwargs.chat_files if override_kwargs else []\n\n        # Emit start event with the code\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=PythonToolStart(code=code),\n            )\n        )\n\n        # Create Code Interpreter client — context manager ensures\n        # session.close() is called on every exit path.\n        with CodeInterpreterClient() as client:\n            # Stage chat files for execution\n            files_to_stage: list[FileInput] = []\n            for ind, chat_file in enumerate(chat_files):\n                file_name = chat_file.filename or f\"file_{ind}\"\n                try:\n                    content_hash = hashlib.sha256(chat_file.content).hexdigest()\n                    cache_key = (file_name, content_hash)\n                    ci_file_id = self._uploaded_file_cache.get(cache_key)\n                    if ci_file_id is None:\n                        # Upload to Code Interpreter\n                        ci_file_id = client.upload_file(chat_file.content, file_name)\n                        self._uploaded_file_cache[cache_key] = ci_file_id\n\n                    # Stage for execution\n                    files_to_stage.append({\"path\": file_name, \"file_id\": ci_file_id})\n\n                    logger.info(f\"Staged file for Python execution: {file_name}\")\n\n                except Exception as e:\n                    logger.warning(f\"Failed to stage file {file_name}: {e}\")\n\n            try:\n                logger.debug(f\"Executing code: {code}\")\n\n                # Execute code with streaming (falls back to batch if unavailable)\n                stdout_parts: list[str] = []\n                stderr_parts: list[str] = []\n                result_event: StreamResultEvent | None = None\n\n                for event in client.execute_streaming(\n                    code=code,\n                    timeout_ms=CODE_INTERPRETER_DEFAULT_TIMEOUT_MS,\n                    files=files_to_stage or None,\n                ):\n                    if isinstance(event, StreamOutputEvent):\n                        if event.stream == \"stdout\":\n                            stdout_parts.append(event.data)\n                        else:\n                            stderr_parts.append(event.data)\n                        # Emit incremental delta to frontend\n                        self.emitter.emit(\n                            Packet(\n                                placement=placement,\n                                obj=PythonToolDelta(\n                                    stdout=(\n                                        event.data if event.stream == \"stdout\" else \"\"\n                                    ),\n                                    stderr=(\n                                        event.data if event.stream == \"stderr\" else \"\"\n                                    ),\n                                ),\n                            )\n                        )\n                    elif isinstance(event, StreamResultEvent):\n                        result_event = event\n                    elif isinstance(event, StreamErrorEvent):\n                        raise RuntimeError(f\"Code interpreter error: {event.message}\")\n\n                if result_event is None:\n                    raise RuntimeError(\n                        \"Code interpreter stream ended without a result event\"\n                    )\n\n                full_stdout = \"\".join(stdout_parts)\n                full_stderr = \"\".join(stderr_parts)\n\n                # Truncate output for LLM consumption\n                truncated_stdout = _truncate_output(\n                    full_stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, \"stdout\"\n                )\n                truncated_stderr = _truncate_output(\n                    full_stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, \"stderr\"\n                )\n\n                # Handle generated files\n                generated_files: list[PythonExecutionFile] = []\n                generated_file_ids: list[str] = []\n                file_ids_to_cleanup: list[str] = []\n                file_store = get_default_file_store()\n\n                for workspace_file in result_event.files:\n                    if workspace_file.kind != \"file\" or not workspace_file.file_id:\n                        continue\n\n                    try:\n                        # Download file from Code Interpreter\n                        file_content = client.download_file(workspace_file.file_id)\n\n                        # Determine MIME type from file extension\n                        filename = workspace_file.path.split(\"/\")[-1]\n                        mime_type, _ = mimetypes.guess_type(filename)\n                        # Default to binary if we can't determine the type\n                        mime_type = mime_type or \"application/octet-stream\"\n\n                        # Save to Onyx file store\n                        onyx_file_id = file_store.save_file(\n                            content=BytesIO(file_content),\n                            display_name=filename,\n                            file_origin=FileOrigin.CHAT_UPLOAD,\n                            file_type=mime_type,\n                        )\n\n                        generated_files.append(\n                            PythonExecutionFile(\n                                filename=filename,\n                                file_link=build_full_frontend_file_url(onyx_file_id),\n                            )\n                        )\n                        generated_file_ids.append(onyx_file_id)\n\n                        # Mark for cleanup\n                        file_ids_to_cleanup.append(workspace_file.file_id)\n\n                    except Exception as e:\n                        logger.error(\n                            f\"Failed to handle generated file {workspace_file.path}: {e}\"\n                        )\n\n                # Cleanup Code Interpreter files (generated files)\n                for ci_file_id in file_ids_to_cleanup:\n                    try:\n                        client.delete_file(ci_file_id)\n                    except Exception as e:\n                        logger.error(\n                            f\"Failed to delete Code Interpreter generated file {ci_file_id}: {e}\"\n                        )\n\n                # Note: staged input files are intentionally not deleted here because\n                # _uploaded_file_cache reuses their file_ids across iterations. They are\n                # orphaned when the session ends, but the code interpreter cleans up\n                # stale files on its own TTL.\n\n                # Emit file_ids once files are processed\n                if generated_file_ids:\n                    self.emitter.emit(\n                        Packet(\n                            placement=placement,\n                            obj=PythonToolDelta(file_ids=generated_file_ids),\n                        )\n                    )\n\n                # Build result\n                result = LlmPythonExecutionResult(\n                    stdout=truncated_stdout,\n                    stderr=truncated_stderr,\n                    exit_code=result_event.exit_code,\n                    timed_out=result_event.timed_out,\n                    generated_files=generated_files,\n                    error=(None if result_event.exit_code == 0 else truncated_stderr),\n                )\n\n                # Serialize result for LLM\n                adapter = TypeAdapter(LlmPythonExecutionResult)\n                llm_response = adapter.dump_json(result).decode()\n\n                return ToolResponse(\n                    rich_response=PythonToolRichResponse(\n                        generated_files=generated_files,\n                    ),\n                    llm_facing_response=llm_response,\n                )\n\n            except Exception as e:\n                logger.error(f\"Python execution failed: {e}\")\n                error_msg = str(e)\n\n                # Emit error delta\n                self.emitter.emit(\n                    Packet(\n                        placement=placement,\n                        obj=PythonToolDelta(\n                            stdout=\"\",\n                            stderr=error_msg,\n                            file_ids=[],\n                        ),\n                    )\n                )\n\n                # Return error result\n                result = LlmPythonExecutionResult(\n                    stdout=\"\",\n                    stderr=error_msg,\n                    exit_code=-1,\n                    timed_out=False,\n                    generated_files=[],\n                    error=error_msg,\n                )\n\n                adapter = TypeAdapter(LlmPythonExecutionResult)\n                llm_response = adapter.dump_json(result).decode()\n\n                return ToolResponse(\n                    rich_response=None,\n                    llm_facing_response=llm_response,\n                )\n\n    @classmethod\n    @override\n    def should_emit_argument_deltas(cls) -> bool:\n        return True\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/search/constants.py",
    "content": "\"\"\"Constants for search tool implementations.\"\"\"\n\n# Query Expansion and Fusion Weights\n# Taking an opinionated stance on the weights, no chance users can do a good job customizing this.\n# The dedicated rephrased/extracted semantic query is likely the best for hybrid search\nLLM_SEMANTIC_QUERY_WEIGHT = 1.3\n# The keyword expansions provide more breadth through a different search ranking function\n# This one is likely to produce the most different results.\nLLM_KEYWORD_QUERY_WEIGHT = 1.0\n# This is also lower because it is the LLM generated query without the custom instructions specifically for this purpose.\nLLM_NON_CUSTOM_QUERY_WEIGHT = 0.7\n# This is much lower weight because it is likely pretty similar to the LLM semantic query but just worse quality.\nORIGINAL_QUERY_WEIGHT = 0.5\n\n# Hybrid Search Configuration\n# This may in the future just use an entirely keyword search. Currently it is a hybrid search with a keyword first phase.\nKEYWORD_QUERY_HYBRID_ALPHA = 0.2\n\n# Reciprocal Rank Fusion\nRRF_K_VALUE = 50\n\n# Context Expansion\nFULL_DOC_NUM_CHUNKS_AROUND = 5\n\n# If a document is quite relevant and has many returned sections, likely it's enough to use the chunks around\n# the highest scoring section to detect relevance. This allows more other docs to be evaluated in the step.\n# This avoids documents with good titles or generally strong matches to flood out the rest of the search results.\n# If there are multiple indepedent sections from the doc, this won't truncate it, only if they're connected.\nMAX_CHUNKS_FOR_RELEVANCE = 3\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/search/search_tool.py",
    "content": "\"\"\"\nAn explanation of the search tool found below:\n\nStep 1: Queries\n- The LLM will generate some queries based on the chat history for what it thinks are the best things to search for.\nThis has a pretty generic prompt so it's not perfectly tuned for search but provides breadth and also the LLM can often break up\nthe query into multiple searches which the other flows do not do. Exp: Compare the sales process between company X and Y can be\nbroken up into \"sales process company X\" and \"sales process company Y\".\n- A specifial prompt and history is used to generate another query which is best tuned for a semantic/hybrid search pipeline.\n- A small set of keyword emphasized queries are also generated to cover additional breadth. This is important for cases where\nthe query is short, keyword heavy, or has a lot of model unseen terminology.\n\nStep 2: Recombination\nWe use a weighted RRF to combine the search results from the queries above. Each query will have a list of search results with\nsome scores however these are downstream of a normalization step so they cannot easily be compared with one another on an\nabsolute scale. RRF is a good way to combine these and allows us to give some custom weightings. We also merge document chunks\nthat are adjacent to provide more continuous context to the LLM.\n\nStep 3: Selection\nWe pass the recombined results (truncated set) to the LLM to select the most promising ones to read. This is to reduce noise and\nreduce downstream chances of hallucination. The LLM at this point also has the entire set of document chunks so it has\ninformation across documents not just per document. This also reduces the number of tokens required for the next step.\n\nStep 4: Expansion\nFor the selected documents, we pass the main retrieved sections from above (this may be a single chunk or a section comprised of\nseveral consecutive chunks) along with chunks above and below the section to the LLM. The LLM determines how much of the document\nit wants to read. This is done in parallel for all selected documents. Reason being that the LLM would not be able to do a good\njob of this with all of the documents in the prompt at once. Keeping every LLM decision step as simple as possible is key for\nreliable performance.\n\nStep 5: Prompt Building\nWe construct a response string back to the LLM as the result of the tool call. We also pass relevant richer objects back\nso that the rest of the code can persist it, render it in the UI, etc. The response is a json that makes it easy for the LLM to\nrefer to by using matching keywords to other parts of the prompt and reminders.\n\"\"\"\n\nimport time\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import cast\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.context.search.federated.slack_search import slack_retrieval\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import ChunkIndexRequest\nfrom onyx.context.search.models import ChunkSearchRequest\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import PersonaSearchInfo\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.context.search.pipeline import merge_individual_chunks\nfrom onyx.context.search.pipeline import search_pipeline\nfrom onyx.context.search.preprocessing.access_filters import (\n    build_access_filters_for_user,\n)\nfrom onyx.context.search.utils import convert_inference_sections_to_search_docs\nfrom onyx.db.connector import check_connectors_exist\nfrom onyx.db.connector import check_federated_connectors_exist\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.federated import (\n    get_federated_connector_document_set_mappings_by_document_set_names,\n)\nfrom onyx.db.federated import list_federated_connector_oauth_tokens\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.models import User\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.slack_bot import fetch_slack_bots\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo\nfrom onyx.federated_connectors.federated_retrieval import (\n    get_federated_retrieval_functions,\n)\nfrom onyx.llm.factory import get_llm_token_counter\nfrom onyx.llm.interfaces import LLM\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.onyxbot.slack.models import SlackContext\nfrom onyx.secondary_llm_flows.document_filter import select_chunks_for_relevance\nfrom onyx.secondary_llm_flows.document_filter import select_sections_for_expansion\nfrom onyx.secondary_llm_flows.query_expansion import keyword_query_expansion\nfrom onyx.secondary_llm_flows.query_expansion import semantic_query_rephrase\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolStart\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import SearchToolOverrideKwargs\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.search.constants import (\n    KEYWORD_QUERY_HYBRID_ALPHA,\n)\nfrom onyx.tools.tool_implementations.search.constants import (\n    LLM_KEYWORD_QUERY_WEIGHT,\n)\nfrom onyx.tools.tool_implementations.search.constants import (\n    LLM_NON_CUSTOM_QUERY_WEIGHT,\n)\nfrom onyx.tools.tool_implementations.search.constants import (\n    LLM_SEMANTIC_QUERY_WEIGHT,\n)\nfrom onyx.tools.tool_implementations.search.constants import (\n    MAX_CHUNKS_FOR_RELEVANCE,\n)\nfrom onyx.tools.tool_implementations.search.constants import ORIGINAL_QUERY_WEIGHT\nfrom onyx.tools.tool_implementations.search.search_utils import (\n    expand_section_with_context,\n)\nfrom onyx.tools.tool_implementations.search.search_utils import (\n    merge_overlapping_sections,\n)\nfrom onyx.tools.tool_implementations.search.search_utils import (\n    weighted_reciprocal_rank_fusion,\n)\nfrom onyx.tools.tool_implementations.utils import (\n    convert_inference_sections_to_llm_string,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\n\nlogger = setup_logger()\n\nQUERIES_FIELD = \"queries\"\n\n\ndef deduplicate_queries(\n    queries_with_weights: list[tuple[str, float]],\n) -> list[tuple[str, float]]:\n    \"\"\"Deduplicate queries by case-insensitive comparison and sum weights.\n\n    Args:\n        queries_with_weights: List of (query, weight) tuples\n\n    Returns:\n        Deduplicated list of (query, weight) tuples with summed weights\n    \"\"\"\n    query_map: dict[str, tuple[str, float]] = {}\n    for query, weight in queries_with_weights:\n        query_lower = query.lower()\n        if query_lower in query_map:\n            # Sum weights for duplicate queries\n            existing_query, existing_weight = query_map[query_lower]\n            query_map[query_lower] = (existing_query, existing_weight + weight)\n        else:\n            # Keep the first occurrence (preserves original casing)\n            query_map[query_lower] = (query, weight)\n    return list(query_map.values())\n\n\ndef _estimate_section_tokens(\n    section: InferenceSection,\n    token_counter: Callable[[str], int],\n    max_chunks_per_section: int | None = None,\n) -> int:\n    \"\"\"Estimate token count for a section using the LLM tokenizer.\n\n    Args:\n        section: InferenceSection to estimate tokens for\n        token_counter: Function that counts tokens in text\n        max_chunks_per_section: Maximum chunks to consider per section (None for all)\n\n    Returns:\n        Token count for the section\n    \"\"\"\n    # Estimate for metadata (title, source_type, etc.)\n    METADATA_TOKEN_ESTIMATE = 75\n\n    # If max_chunks_per_section is specified, only count tokens for selected chunks\n    if max_chunks_per_section is not None:\n        selected_chunks = select_chunks_for_relevance(section, max_chunks_per_section)\n        # Combine content from selected chunks\n        combined_content = \"\\n\".join(chunk.content for chunk in selected_chunks)\n        content_tokens = token_counter(combined_content)\n    else:\n        content_tokens = token_counter(section.combined_content)\n\n    return content_tokens + METADATA_TOKEN_ESTIMATE\n\n\n@log_function_time(print_only=True)\ndef _trim_sections_by_tokens(\n    sections: list[InferenceSection],\n    max_tokens: int,\n    token_counter: Callable[[str], int],\n    max_chunks_per_section: int | None = None,\n) -> list[InferenceSection]:\n    \"\"\"Trim sections to fit within a token budget using the LLM tokenizer.\n\n    Args:\n        sections: List of InferenceSection objects to trim\n        max_tokens: Maximum token budget\n        token_counter: Function that counts tokens in text\n        max_chunks_per_section: Maximum chunks to consider per section (None for all)\n\n    Returns:\n        Trimmed list of sections that fit within the token budget\n    \"\"\"\n    if not sections or max_tokens <= 0:\n        return sections\n\n    trimmed_sections = []\n    total_tokens = 0\n\n    for section in sections:\n        section_tokens = _estimate_section_tokens(\n            section, token_counter, max_chunks_per_section\n        )\n        if total_tokens + section_tokens <= max_tokens:\n            trimmed_sections.append(section)\n            total_tokens += section_tokens\n        else:\n            break\n\n    logger.debug(\n        f\"Trimmed sections from {len(sections)} to {len(trimmed_sections)} ({total_tokens} tokens, budget: {max_tokens})\"\n    )\n\n    return trimmed_sections\n\n\nclass SearchTool(Tool[SearchToolOverrideKwargs]):\n    NAME = \"internal_search\"\n    DISPLAY_NAME = \"Internal Search\"\n    DESCRIPTION = \"Search connected applications for information.\"\n\n    def __init__(\n        self,\n        tool_id: int,\n        emitter: Emitter,\n        # Used for ACLs and federated search, anonymous users only see public docs\n        user: User,\n        # Pre-extracted persona search configuration\n        persona_search_info: PersonaSearchInfo,\n        llm: LLM,\n        document_index: DocumentIndex,\n        # Respecting user selections\n        user_selected_filters: BaseFilters | None,\n        # Vespa metadata filters for overflowing user files.  NOT the raw IDs\n        # of the current project/persona — only set when user files couldn't\n        # fit in the LLM context and need to be searched via vector DB.\n        project_id_filter: int | None,\n        persona_id_filter: int | None = None,\n        bypass_acl: bool = False,\n        # Slack context for federated Slack search (tokens fetched internally)\n        slack_context: SlackContext | None = None,\n        # Whether to enable Slack federated search\n        enable_slack_search: bool = True,\n    ) -> None:\n        super().__init__(emitter=emitter)\n\n        self.user = user\n        self.persona_search_info = persona_search_info\n        self.llm = llm\n        self.document_index = document_index\n        self.user_selected_filters = user_selected_filters\n        self.project_id_filter = project_id_filter\n        self.persona_id_filter = persona_id_filter\n        self.bypass_acl = bypass_acl\n        self.slack_context = slack_context\n        self.enable_slack_search = enable_slack_search\n\n        self._id = tool_id\n\n    def _prefetch_slack_data(\n        self, db_session: Session\n    ) -> tuple[str | None, str | None, dict[str, Any]]:\n        \"\"\"Pre-fetch Slack access token, bot token, and entity config from DB.\n\n        All DB queries for Slack federated search are performed here in a\n        single session, so the parallel search phase needs no DB access.\n\n        Returns:\n            (access_token, bot_token, entities) — access_token is None when\n            Slack search should be skipped.\n        \"\"\"\n        bot_token: str | None = None\n        access_token: str | None = None\n        entities: dict[str, Any] = {}\n\n        # Case 1: Slack bot context — requires a Slack federated connector\n        # linked via the persona's document sets\n        if self.slack_context:\n            document_set_names = self.persona_search_info.document_set_names\n            if not document_set_names:\n                logger.debug(\n                    \"Skipping Slack federated search: no document sets on persona\"\n                )\n                return None, None, {}\n\n            slack_federated_mappings = (\n                get_federated_connector_document_set_mappings_by_document_set_names(\n                    db_session, document_set_names\n                )\n            )\n            found_slack_connector = False\n            for mapping in slack_federated_mappings:\n                if (\n                    mapping.federated_connector is not None\n                    and mapping.federated_connector.source\n                    == FederatedConnectorSource.FEDERATED_SLACK\n                ):\n                    entities = mapping.federated_connector.config or {}\n                    found_slack_connector = True\n                    logger.debug(f\"Found Slack federated connector config: {entities}\")\n                    break\n\n            if not found_slack_connector:\n                logger.debug(\n                    f\"Skipping Slack federated search: no Slack federated connector linked to document sets {document_set_names}\"\n                )\n                return None, None, {}\n\n            try:\n                slack_bots = fetch_slack_bots(db_session)\n                if not slack_bots:\n                    return None, None, {}\n\n                tenant_slack_bot = next(\n                    (bot for bot in slack_bots if bot.enabled and bot.user_token),\n                    None,\n                )\n                if not tenant_slack_bot:\n                    tenant_slack_bot = next(\n                        (bot for bot in slack_bots if bot.enabled), None\n                    )\n\n                if tenant_slack_bot:\n                    bot_token = (\n                        tenant_slack_bot.bot_token.get_value(apply_mask=False)\n                        if tenant_slack_bot.bot_token\n                        else None\n                    )\n                    user_token = (\n                        tenant_slack_bot.user_token.get_value(apply_mask=False)\n                        if tenant_slack_bot.user_token\n                        else None\n                    )\n                    access_token = user_token or bot_token\n            except Exception as e:\n                logger.warning(f\"Could not fetch Slack bot tokens: {e}\")\n\n        # Case 2: Web user with federated OAuth (if bot context didn't yield a token)\n        if not access_token and self.user:\n            try:\n                federated_oauth_tokens = list_federated_connector_oauth_tokens(\n                    db_session, self.user.id\n                )\n                if not federated_oauth_tokens:\n                    return access_token, bot_token, entities\n\n                slack_oauth_token = next(\n                    (\n                        token\n                        for token in federated_oauth_tokens\n                        if token.federated_connector.source\n                        == FederatedConnectorSource.FEDERATED_SLACK\n                    ),\n                    None,\n                )\n                if slack_oauth_token and slack_oauth_token.token:\n                    access_token = slack_oauth_token.token.get_value(apply_mask=False)\n                    entities = slack_oauth_token.federated_connector.config or {}\n            except Exception as e:\n                logger.warning(f\"Could not fetch Slack OAuth token: {e}\")\n\n        return access_token, bot_token, entities\n\n    def _run_slack_search(\n        self,\n        query: str,\n        access_token: str,\n        bot_token: str | None,\n        entities: dict[str, Any],\n        search_settings: SearchSettings,\n    ) -> list[InferenceChunk]:\n        \"\"\"Run Slack federated search using pre-fetched tokens and config.\n\n        All DB data is pre-fetched in run() so this method needs no DB session.\n\n        Args:\n            query: The user's original search query\n            access_token: Slack access token (user or bot)\n            bot_token: Slack bot token (for enhanced permissions)\n            entities: Federated connector entity config (channel filtering)\n            search_settings: Pre-fetched SearchSettings for chunking config\n\n        Returns:\n            List of InferenceChunk results from Slack\n        \"\"\"\n        try:\n            chunk_request = ChunkIndexRequest(\n                query=query,\n                filters=IndexFilters(access_control_list=None),\n            )\n\n            chunks = slack_retrieval(\n                query=chunk_request,\n                access_token=access_token,\n                connector=None,\n                entities=entities,\n                limit=None,\n                slack_event_context=self.slack_context,\n                bot_token=bot_token,\n                team_id=None,\n                search_settings=search_settings,\n            )\n\n            logger.info(f\"Slack federated search returned {len(chunks)} chunks\")\n            return chunks\n\n        except Exception as e:\n            logger.error(f\"Slack federated search error: {e}\", exc_info=True)\n            return []\n\n    def _run_search_for_query(\n        self,\n        query: str,\n        hybrid_alpha: float | None,\n        num_hits: int,\n        acl_filters: list[str] | None,\n        embedding_model: EmbeddingModel,\n        federated_retrieval_infos: list[FederatedRetrievalInfo],\n    ) -> list[InferenceChunk]:\n        \"\"\"Run search pipeline for a single query using pre-fetched data.\n\n        All DB data (ACL filters, embedding model, federated retrieval info)\n        is pre-fetched in run() so this method needs no DB session.\n\n        Args:\n            query: The search query string\n            hybrid_alpha: Hybrid search alpha parameter (None for default)\n            num_hits: Maximum number of hits to return\n            acl_filters: Pre-fetched ACL filters (None when bypass_acl)\n            embedding_model: Pre-fetched embedding model\n            federated_retrieval_infos: Pre-fetched federated retrieval functions\n\n        Returns:\n            List of InferenceChunk results\n        \"\"\"\n        return search_pipeline(\n            chunk_search_request=ChunkSearchRequest(\n                query=query,\n                hybrid_alpha=hybrid_alpha,\n                # For projects, the search scope is the project and has no other limits\n                user_selected_filters=(\n                    self.user_selected_filters\n                    if self.project_id_filter is None\n                    else None\n                ),\n                bypass_acl=self.bypass_acl,\n                limit=num_hits,\n            ),\n            project_id_filter=self.project_id_filter,\n            persona_id_filter=self.persona_id_filter,\n            document_index=self.document_index,\n            user=self.user,\n            persona_search_info=self.persona_search_info,\n            acl_filters=acl_filters,\n            embedding_model=embedding_model,\n            prefetched_federated_retrieval_infos=federated_retrieval_infos,\n        )\n\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:\n        \"\"\"Check if search tool is available.\n\n        Returns False when the vector DB is disabled (search cannot function\n        without it). Otherwise, available if ANY of the following exist:\n        - Regular connectors (team knowledge)\n        - Federated connectors (e.g., Slack)\n        - User files (User Knowledge mode)\n        \"\"\"\n        from onyx.configs.app_configs import DISABLE_VECTOR_DB\n        from onyx.db.connector import check_user_files_exist\n\n        if DISABLE_VECTOR_DB:\n            return False\n\n        return (\n            check_connectors_exist(db_session)\n            or check_federated_connectors_exist(db_session)\n            or check_user_files_exist(db_session)\n        )\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    \"\"\"For explicit tool calling\"\"\"\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": self.description,\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        QUERIES_FIELD: {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": \"List of search queries to execute, typically a single query.\",\n                        },\n                    },\n                    \"required\": [QUERIES_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolStart(),\n            )\n        )\n\n    @log_function_time(print_only=True)\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: SearchToolOverrideKwargs,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        # Start overall timing\n        overall_start_time = time.time()\n\n        # Initialize timing variables (in case of early exceptions)\n        query_expansion_elapsed = 0.0\n        document_selection_elapsed = 0.0\n        document_expansion_elapsed = 0.0\n\n        # Pre-fetch all DB data in a single short-lived session so that\n        # parallel search workers need zero DB connections.\n        with get_session_with_current_tenant() as db_session:\n            # ACL filters\n            acl_filters: list[str] | None = (\n                None\n                if self.bypass_acl\n                else build_access_filters_for_user(self.user, db_session)\n            )\n\n            # SearchSettings → materialise EmbeddingModel while session is\n            # open (forces lazy-load of cloud_provider properties)\n            search_settings = get_current_search_settings(db_session)\n            if not search_settings:\n                raise RuntimeError(\n                    \"No search settings configured — cannot run internal search\"\n                )\n\n            embedding_model = EmbeddingModel.from_db_model(\n                search_settings=search_settings,\n                server_host=MODEL_SERVER_HOST,\n                server_port=MODEL_SERVER_PORT,\n            )\n\n            # Federated retrieval functions (non-Slack; Slack is separate)\n            if self.project_id_filter is not None:\n                # Project mode ignores user filters → no federated sources\n                prefetch_source_types = None\n            else:\n                prefetch_source_types = (\n                    list(self.user_selected_filters.source_type)\n                    if self.user_selected_filters\n                    and self.user_selected_filters.source_type\n                    else None\n                )\n            federated_retrieval_infos = (\n                get_federated_retrieval_functions(\n                    db_session=db_session,\n                    user_id=self.user.id if self.user else None,\n                    source_types=prefetch_source_types,\n                    document_set_names=self.persona_search_info.document_set_names,\n                )\n                or []\n            )\n\n            # Slack tokens and entity config — only prefetch when Slack\n            # search is enabled or we're in a Slack bot context.\n            if self.enable_slack_search or self.slack_context:\n                slack_access_token, slack_bot_token, slack_entities = (\n                    self._prefetch_slack_data(db_session)\n                )\n            else:\n                slack_access_token, slack_bot_token, slack_entities = (\n                    None,\n                    None,\n                    {},\n                )\n        # Session is closed here — all parallel work uses plain Python objects only\n\n        if QUERIES_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{QUERIES_FIELD}' parameter in internal_search tool call\",\n                llm_facing_message=(\n                    f\"The internal_search tool requires a '{QUERIES_FIELD}' parameter \"\n                    f\"containing an array of search queries. Please provide the queries \"\n                    f'like: {{\"queries\": [\"your search query here\"]}}'\n                ),\n            )\n        llm_queries = cast(list[str], llm_kwargs[QUERIES_FIELD])\n\n        # Run semantic and keyword query expansion in parallel (unless skipped)\n        # Use message history, memories, and user info from override_kwargs\n        message_history = (\n            override_kwargs.message_history if override_kwargs.message_history else []\n        )\n        memories = (\n            override_kwargs.user_memory_context.as_formatted_list()\n            if override_kwargs.user_memory_context\n            else []\n        )\n        user_info = override_kwargs.user_info\n\n        # Skip query expansion if this is a repeat search call\n        if override_kwargs.skip_query_expansion:\n            logger.debug(\"Search tool - Skipping query expansion (repeat search call)\")\n            semantic_query = None\n            keyword_queries: list[str] = []\n        else:\n            # Start timing for query expansion/rephrase\n            query_expansion_start_time = time.time()\n\n            functions_with_args: list[tuple[Callable, tuple]] = [\n                (\n                    semantic_query_rephrase,\n                    (message_history, self.llm, user_info, memories),\n                ),\n                (\n                    keyword_query_expansion,\n                    (message_history, self.llm, user_info, memories),\n                ),\n            ]\n\n            expansion_results = run_functions_tuples_in_parallel(functions_with_args)\n\n            # End timing for query expansion/rephrase\n            query_expansion_elapsed = time.time() - query_expansion_start_time\n            logger.debug(\n                f\"Search tool - Query expansion/rephrase took {query_expansion_elapsed:.3f} seconds\"\n            )\n            semantic_query = expansion_results[0]  # str\n            keyword_queries = (\n                expansion_results[1] if expansion_results[1] is not None else []\n            )  # list[str]\n\n        # Prepare queries with their weights and hybrid_alpha settings\n        # Group 1: Keyword queries (use hybrid_alpha=0.2)\n        keyword_queries_with_weights = [\n            (kw_query, LLM_KEYWORD_QUERY_WEIGHT) for kw_query in keyword_queries\n        ]\n        deduplicated_keyword_queries = deduplicate_queries(keyword_queries_with_weights)\n\n        # Group 2: Semantic/LLM/Original queries (use hybrid_alpha=None)\n        # Include all LLM-provided queries with their weight\n        semantic_queries_with_weights = (\n            [\n                (semantic_query, LLM_SEMANTIC_QUERY_WEIGHT),\n            ]\n            if semantic_query\n            else []\n        )\n        for llm_query in llm_queries:\n            # In rare cases, the LLM may fail to provide real queries\n            if llm_query:\n                semantic_queries_with_weights.append(\n                    (llm_query, LLM_NON_CUSTOM_QUERY_WEIGHT)\n                )\n        if override_kwargs.original_query:\n            semantic_queries_with_weights.append(\n                (override_kwargs.original_query, ORIGINAL_QUERY_WEIGHT)\n            )\n        deduplicated_semantic_queries = deduplicate_queries(\n            semantic_queries_with_weights\n        )\n\n        # Build the all_queries list for UI display, sorted by weight (highest first)\n        # Combine all deduplicated queries and sort by weight\n        all_queries_with_weights = (\n            deduplicated_semantic_queries + deduplicated_keyword_queries\n        )\n        all_queries_with_weights.sort(key=lambda x: x[1], reverse=True)\n\n        # Extract queries in weight order, handling cross-duplicates\n        all_queries = []\n        seen_lower = set()\n        for query, _ in all_queries_with_weights:\n            query_lower = query.lower()\n            if query_lower not in seen_lower:\n                all_queries.append(query)\n                seen_lower.add(query_lower)\n\n        logger.debug(\n            f\"All Queries (sorted by weight): {all_queries}, Keyword queries: {[q for q, _ in deduplicated_keyword_queries]}\"\n        )\n\n        # Emit the queries early so the UI can display them immediately\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolQueriesDelta(\n                    queries=all_queries,\n                ),\n            )\n        )\n\n        # Run all searches in parallel with appropriate hybrid_alpha values\n        # Keyword queries use hybrid_alpha=0.2 (favor keyword search)\n        # Other queries use default hybrid_alpha (balanced semantic/keyword)\n        search_functions: list[tuple[Callable, tuple]] = []\n        search_weights: list[float] = []\n\n        # Add deduplicated semantic queries (use hybrid_alpha=None)\n        for query, weight in deduplicated_semantic_queries:\n            search_functions.append(\n                (\n                    self._run_search_for_query,\n                    (\n                        query,\n                        None,\n                        override_kwargs.num_hits,\n                        acl_filters,\n                        embedding_model,\n                        federated_retrieval_infos,\n                    ),\n                )\n            )\n            search_weights.append(weight)\n\n        # Add deduplicated keyword queries (use hybrid_alpha=0.2)\n        for query, weight in deduplicated_keyword_queries:\n            search_functions.append(\n                (\n                    self._run_search_for_query,\n                    (\n                        query,\n                        KEYWORD_QUERY_HYBRID_ALPHA,\n                        override_kwargs.num_hits,\n                        acl_filters,\n                        embedding_model,\n                        federated_retrieval_infos,\n                    ),\n                )\n            )\n            search_weights.append(weight)\n\n        # Add Slack federated search (runs once in parallel with all Vespa queries)\n        # This avoids the query multiplication problem where each Vespa query\n        # would trigger a separate Slack search.\n        # Only run if pre-fetch found a valid Slack access token.\n        if slack_access_token and override_kwargs.original_query:\n            search_functions.append(\n                (\n                    self._run_slack_search,\n                    (\n                        override_kwargs.original_query,\n                        slack_access_token,\n                        slack_bot_token,\n                        slack_entities,\n                        search_settings,\n                    ),\n                )\n            )\n            # Use same weight as original query for Slack results\n            search_weights.append(ORIGINAL_QUERY_WEIGHT)\n\n        # Run all searches in parallel (Vespa queries + Slack)\n        all_search_results = run_functions_tuples_in_parallel(search_functions)\n        if not all_search_results:\n            all_search_results = []\n\n        # Merge results using weighted Reciprocal Rank Fusion\n        # This intelligently combines rankings from different queries\n        top_chunks = weighted_reciprocal_rank_fusion(\n            ranked_results=all_search_results,\n            weights=search_weights,\n            id_extractor=lambda chunk: f\"{chunk.document_id}_{chunk.chunk_id}\",\n        )\n\n        # We can disregard all of the chunks that exceed the num_hits parameter since it's not valid to have\n        # documents/contents from things that aren't returned to the user on the frontend\n        top_sections = merge_individual_chunks(top_chunks)[: override_kwargs.num_hits]\n\n        if not top_sections:\n            logger.info(\"Search tool - no results found, returning empty response\")\n            return ToolResponse(\n                rich_response=SearchDocsResponse(\n                    search_docs=[],\n                    citation_mapping={},\n                    displayed_docs=None,\n                ),\n                llm_facing_response=\"\",\n            )\n\n        # Convert InferenceSections to SearchDocs for emission\n        search_docs = convert_inference_sections_to_search_docs(\n            top_sections, is_internet=False\n        )\n\n        secondary_flows_user_query = (\n            override_kwargs.original_query\n            or semantic_query\n            or (llm_queries[0] if llm_queries else \"\")\n        )\n\n        token_counter = get_llm_token_counter(self.llm)\n\n        # Trim sections to fit within token budget before LLM selection\n        # This is to account for very short chunks flooding the search context\n        # Only consider MAX_CHUNKS_FOR_RELEVANCE chunks per section to avoid flooding from\n        # documents with many matching sections\n        max_tokens_for_selection = (\n            override_kwargs.max_llm_chunks or MAX_CHUNKS_FED_TO_CHAT\n        ) * DOC_EMBEDDING_CONTEXT_SIZE\n\n        # This is approximate since it doesn't build the exact string of the call below\n        # Some things are estimated and may be under (like the metadata tokens)\n        sections_for_selection = _trim_sections_by_tokens(\n            sections=top_sections,\n            max_tokens=max_tokens_for_selection,\n            token_counter=token_counter,\n            max_chunks_per_section=MAX_CHUNKS_FOR_RELEVANCE,\n        )\n\n        # Start timing for LLM document selection\n        document_selection_start_time = time.time()\n\n        # Use LLM to select the most relevant sections for expansion\n        selected_sections, best_doc_ids = select_sections_for_expansion(\n            sections=sections_for_selection,\n            user_query=secondary_flows_user_query,\n            llm=self.llm,\n            max_chunks_per_section=MAX_CHUNKS_FOR_RELEVANCE,\n        )\n\n        # End timing for LLM document selection\n        document_selection_elapsed = time.time() - document_selection_start_time\n        logger.debug(\n            f\"Search tool - LLM picking documents took {document_selection_elapsed:.3f} seconds \"\n            f\"(selected {len(selected_sections)} sections)\"\n        )\n\n        # Create a set of best document IDs for quick lookup\n        best_doc_ids_set = set(best_doc_ids) if best_doc_ids else set()\n\n        # To show the users, we only pass in the docs that are determined to be good by the LLM\n        final_ui_docs = convert_inference_sections_to_search_docs(\n            selected_sections, is_internet=False\n        )\n\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolDocumentsDelta(\n                    documents=final_ui_docs,\n                ),\n            )\n        )\n\n        # Create wrapper function to handle errors gracefully\n        def expand_section_safe(\n            section: InferenceSection,\n            user_query: str,\n            llm: LLM,\n            document_index: DocumentIndex,\n            expand_override: bool,\n        ) -> InferenceSection:\n            \"\"\"Wrapper that handles exceptions and returns original section on error.\"\"\"\n            try:\n                expanded_section = expand_section_with_context(\n                    section=section,\n                    user_query=user_query,\n                    llm=llm,\n                    document_index=document_index,\n                    expand_override=expand_override,\n                )\n                # Return expanded section if not None, otherwise original\n                return expanded_section if expanded_section is not None else section\n            except Exception as e:\n                logger.warning(\n                    f\"Error processing section context expansion: {e}. Using original section.\"\n                )\n                return section\n\n        # Build parallel function calls for all sections\n        expansion_functions: list[tuple[Callable, tuple]] = [\n            (\n                expand_section_safe,\n                (\n                    section,\n                    secondary_flows_user_query,\n                    self.llm,\n                    self.document_index,\n                    section.center_chunk.document_id in best_doc_ids_set,\n                ),\n            )\n            for section in selected_sections\n        ]\n\n        # Start timing for document expansion\n        document_expansion_start_time = time.time()\n\n        # Run all expansions in parallel\n        expanded_sections = run_functions_tuples_in_parallel(expansion_functions)\n\n        # End timing for document expansion\n        document_expansion_elapsed = time.time() - document_expansion_start_time\n        logger.debug(\n            f\"Search tool - Expansion of selected documents took {document_expansion_elapsed:.3f} seconds \"\n            f\"(expanded {len(expanded_sections)} sections)\"\n        )\n\n        if not expanded_sections:\n            expanded_sections = selected_sections\n\n        # Merge sections from the same document that have adjacent or overlapping chunks\n        # This prevents duplicate content and reduces token usage\n        merged_sections = merge_overlapping_sections(expanded_sections)\n\n        docs_str, citation_mapping = convert_inference_sections_to_llm_string(\n            top_sections=merged_sections,\n            citation_start=override_kwargs.starting_citation_num,\n            limit=override_kwargs.max_llm_chunks,\n            include_document_id=False,\n        )\n\n        # End overall timing\n        overall_elapsed = time.time() - overall_start_time\n        logger.debug(\n            f\"Search tool - Total execution time: {overall_elapsed:.3f} seconds \"\n            f\"(query expansion: {query_expansion_elapsed:.3f}s, \"\n            f\"document selection: {document_selection_elapsed:.3f}s, \"\n            f\"document expansion: {document_expansion_elapsed:.3f}s)\"\n        )\n\n        return ToolResponse(\n            # Typically the rich response will give more docs in case it needs to be displayed in the UI\n            rich_response=SearchDocsResponse(\n                search_docs=search_docs,\n                citation_mapping=citation_mapping,\n                displayed_docs=final_ui_docs or None,\n            ),\n            # The LLM facing response typically includes less docs to cut down on noise and token usage\n            llm_facing_response=docs_str,\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/search/search_utils.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Callable\nfrom typing import TypeVar\n\nfrom onyx.context.search.models import ContextExpansionType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.utils import inference_section_from_chunks\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.vespa.shared_utils.utils import (\n    replace_invalid_doc_id_characters,\n)\nfrom onyx.llm.interfaces import LLM\nfrom onyx.prompts.prompt_utils import clean_up_source\nfrom onyx.secondary_llm_flows.document_filter import classify_section_relevance\nfrom onyx.tools.tool_implementations.search.constants import (\n    FULL_DOC_NUM_CHUNKS_AROUND,\n)\nfrom onyx.tools.tool_implementations.search.constants import RRF_K_VALUE\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nT = TypeVar(\"T\")\n\n\ndef weighted_reciprocal_rank_fusion(\n    ranked_results: list[list[T]],\n    weights: list[float],\n    id_extractor: Callable[[T], str],\n    k: int = RRF_K_VALUE,\n) -> list[T]:\n    \"\"\"\n    Merge multiple ranked result lists using weighted Reciprocal Rank Fusion (RRF).\n\n    RRF combines rankings from different sources by computing a score for each item\n    based on its rank positions across all lists. The weighted version allows different\n    importance to be assigned to different result sources.\n\n    Formula: RRF_score(item) = sum over all rankers of: weight / (k + rank(item))\n\n    Args:\n        ranked_results: List of ranked result lists, where each inner list contains\n                       items ranked from best to worst (index 0 is rank 1)\n        weights: List of weights corresponding to each result list. Higher weights\n                give more importance to that ranking source.\n        id_extractor: Function to extract a unique identifier from each item.\n                     Items with the same ID across different lists are treated as\n                     the same item and their scores are accumulated.\n        k: Constant to prevent overemphasis on top-ranked items (default: RRF_K_VALUE).\n           Typical values are 50-60. Lower values give more weight to top results.\n\n    Returns:\n        List of items sorted by their weighted RRF score in descending order.\n        Each unique item appears only once, even if it was in multiple input lists.\n\n    Example:\n        >>> results1 = [doc_a, doc_b, doc_c]  # Semantic search results\n        >>> results2 = [doc_c, doc_a, doc_d]  # Keyword search results\n        >>> weights = [1.2, 1.0]  # Semantic query weighted higher\n        >>> merged = weighted_reciprocal_rank_fusion(\n        ...     [results1, results2],\n        ...     weights,\n        ...     lambda doc: doc.document_id\n        ... )\n        # doc_a and doc_c will have higher scores (appeared in both lists)\n    \"\"\"\n    if len(ranked_results) != len(weights):\n        raise ValueError(\n            f\"Number of ranked results ({len(ranked_results)}) must match number of weights ({len(weights)})\"\n        )\n\n    # Track RRF scores for each unique item (identified by ID)\n    rrf_scores: dict[str, float] = defaultdict(float)\n    # Track the actual item object for each ID (use first occurrence)\n    id_to_item: dict[str, T] = {}\n    # Track which result list each item first appeared in (for tiebreaking)\n    id_to_source_index: dict[str, int] = {}\n    # Track the position within the source list (for tiebreaking)\n    id_to_source_rank: dict[str, int] = {}\n\n    # Compute weighted RRF scores\n    for source_idx, (result_list, weight) in enumerate(zip(ranked_results, weights)):\n        for rank, item in enumerate(result_list, start=1):\n            item_id = id_extractor(item)\n\n            # Add weighted RRF score: weight / (k + rank)\n            rrf_scores[item_id] += weight / (k + rank)\n\n            # Store the item object and source info (if not already stored)\n            if item_id not in id_to_item:\n                id_to_item[item_id] = item\n                id_to_source_index[item_id] = source_idx\n                id_to_source_rank[item_id] = rank\n\n    # Sort items by:\n    # 1. RRF score (descending - higher is better)\n    # 2. Source index modulo (for round-robin across queries)\n    # 3. Rank within source (ascending - lower rank is better)\n    sorted_ids = sorted(\n        rrf_scores.keys(),\n        key=lambda id: (\n            -rrf_scores[\n                id\n            ],  # Primary: higher RRF score first (negative for descending)\n            id_to_source_rank[id],  # Secondary: lower rank within source first\n            id_to_source_index[id],  # Tertiary: round-robin by cycling through sources\n        ),\n    )\n    return [id_to_item[item_id] for item_id in sorted_ids]\n\n\ndef section_to_dict(section: InferenceSection, section_num: int) -> dict:\n    doc_dict = {\n        \"document_number\": section_num + 1,\n        \"title\": section.center_chunk.semantic_identifier,\n        \"content\": section.combined_content,\n        \"source\": clean_up_source(section.center_chunk.source_type),\n        \"metadata\": section.center_chunk.metadata,\n    }\n    if section.center_chunk.updated_at:\n        doc_dict[\"updated_at\"] = section.center_chunk.updated_at.strftime(\n            \"%B %d, %Y %H:%M\"\n        )\n    return doc_dict\n\n\ndef _retrieve_adjacent_chunks(\n    section: InferenceSection,\n    document_index: DocumentIndex,\n    num_chunks_above: int,\n    num_chunks_below: int,\n) -> tuple[list[InferenceChunk], list[InferenceChunk]]:\n    \"\"\"Retrieve adjacent chunks above and below a section.\n\n    Args:\n        section: The InferenceSection to get adjacent chunks for\n        document_index: The document index to query\n        num_chunks_above: Number of chunks to retrieve above the section\n        num_chunks_below: Number of chunks to retrieve below the section\n\n    Returns:\n        Tuple of (chunks_above, chunks_below)\n    \"\"\"\n    # Get the document_id and chunk range from the section\n    document_id = section.center_chunk.document_id\n\n    # The document fetching already enforced permissions\n    # the expansion does not need to do this unless it's for performance reasons\n    filters = IndexFilters(access_control_list=None)\n\n    # Find the min and max chunk_id in the section\n    chunk_ids = [chunk.chunk_id for chunk in section.chunks]\n    min_chunk_id = min(chunk_ids)\n    max_chunk_id = max(chunk_ids)\n\n    chunks_above: list[InferenceChunk] = []\n    chunks_below: list[InferenceChunk] = []\n\n    # Retrieve chunks above (if any)\n    if num_chunks_above > 0 and min_chunk_id > 0:\n        above_min = max(0, min_chunk_id - num_chunks_above)\n        above_max = min_chunk_id - 1\n\n        above_request = VespaChunkRequest(\n            document_id=replace_invalid_doc_id_characters(document_id),\n            min_chunk_ind=above_min,\n            max_chunk_ind=above_max,\n        )\n\n        try:\n            chunks_above = document_index.id_based_retrieval(\n                chunk_requests=[above_request],\n                filters=filters,\n                batch_retrieval=True,\n            )\n            # Sort by chunk_id to ensure correct order\n            chunks_above.sort(key=lambda c: c.chunk_id)\n        except Exception as e:\n            logger.warning(f\"Failed to retrieve chunks above section: {e}\")\n\n    # Retrieve chunks below (if any)\n    if num_chunks_below > 0:\n        below_min = max_chunk_id + 1\n        below_max = max_chunk_id + num_chunks_below\n\n        below_request = VespaChunkRequest(\n            document_id=replace_invalid_doc_id_characters(document_id),\n            min_chunk_ind=below_min,\n            max_chunk_ind=below_max,\n        )\n\n        try:\n            chunks_below = document_index.id_based_retrieval(\n                chunk_requests=[below_request],\n                filters=filters,\n                batch_retrieval=True,\n            )\n            # Sort by chunk_id to ensure correct order\n            chunks_below.sort(key=lambda c: c.chunk_id)\n        except Exception as e:\n            logger.warning(f\"Failed to retrieve chunks below section: {e}\")\n\n    return chunks_above, chunks_below\n\n\ndef merge_overlapping_sections(\n    sections: list[InferenceSection],\n) -> list[InferenceSection]:\n    \"\"\"Merge sections from the same document that have adjacent or overlapping chunks.\n\n    Sections are merged if they come from the same document and their chunk ranges\n    are adjacent (chunk_ids differ by 1) or overlapping (share chunk_ids).\n    The merged sections maintain the position of the first section in the original list.\n\n    Args:\n        sections: List of InferenceSection objects to merge\n\n    Returns:\n        List of merged InferenceSection objects\n    \"\"\"\n    if not sections:\n        return []\n\n    # Create a mapping from section to its original index for ordering\n    section_to_original_index: dict[tuple[str, int], int] = {}\n    for idx, section in enumerate(sections):\n        section_id = (section.center_chunk.document_id, section.center_chunk.chunk_id)\n        section_to_original_index[section_id] = idx\n\n    # Group sections by document_id\n    doc_sections: dict[str, list[InferenceSection]] = defaultdict(list)\n    for section in sections:\n        doc_sections[section.center_chunk.document_id].append(section)\n\n    # Track which sections have been merged into a result section\n    merged_sections: dict[tuple[str, int], InferenceSection] = {}\n\n    # Process each document's sections\n    for doc_id, doc_section_list in doc_sections.items():\n        if not doc_section_list:\n            continue\n\n        # Sort sections by their minimum chunk_id\n        doc_section_list.sort(key=lambda s: min(c.chunk_id for c in s.chunks))\n\n        # Track merged groups - start with first section\n        current_merged_chunks = set(doc_section_list[0].chunks)\n        sections_in_current_group = [doc_section_list[0]]\n\n        for i in range(1, len(doc_section_list)):\n            current_section = doc_section_list[i]\n            current_section_chunks = set(current_section.chunks)\n\n            # Get chunk_id ranges\n            merged_chunk_ids = {c.chunk_id for c in current_merged_chunks}\n            current_chunk_ids = {c.chunk_id for c in current_section_chunks}\n\n            # Check if adjacent or overlapping\n            min_merged = min(merged_chunk_ids)\n            max_merged = max(merged_chunk_ids)\n            min_current = min(current_chunk_ids)\n            max_current = max(current_chunk_ids)\n\n            is_adjacent = (min_current == max_merged + 1) or (\n                min_merged == max_current + 1\n            )\n            is_overlapping = bool(merged_chunk_ids & current_chunk_ids)\n\n            if is_adjacent or is_overlapping:\n                # Merge into current group\n                current_merged_chunks.update(current_section_chunks)\n                sections_in_current_group.append(current_section)\n            else:\n                # Finalize current group and start new one\n                # Find the section that appeared first in the original list\n                first_section = min(\n                    sections_in_current_group,\n                    key=lambda s: section_to_original_index.get(\n                        (s.center_chunk.document_id, s.center_chunk.chunk_id),\n                        float(\"inf\"),\n                    ),\n                )\n\n                # Create merged section with all chunks\n                all_chunks = sorted(current_merged_chunks, key=lambda c: c.chunk_id)\n                merged_section = inference_section_from_chunks(\n                    center_chunk=first_section.center_chunk,\n                    chunks=all_chunks,\n                )\n\n                if merged_section:\n                    # Store the merged section for all sections in this group\n                    for section in sections_in_current_group:\n                        section_id = (\n                            section.center_chunk.document_id,\n                            section.center_chunk.chunk_id,\n                        )\n                        merged_sections[section_id] = merged_section\n\n                # Start new group\n                current_merged_chunks = current_section_chunks\n                sections_in_current_group = [current_section]\n\n        # Finalize the last group\n        if sections_in_current_group:\n            first_section = min(\n                sections_in_current_group,\n                key=lambda s: section_to_original_index.get(\n                    (s.center_chunk.document_id, s.center_chunk.chunk_id),\n                    float(\"inf\"),\n                ),\n            )\n\n            all_chunks = sorted(current_merged_chunks, key=lambda c: c.chunk_id)\n            merged_section = inference_section_from_chunks(\n                center_chunk=first_section.center_chunk,\n                chunks=all_chunks,\n            )\n\n            if merged_section:\n                for section in sections_in_current_group:\n                    section_id = (\n                        section.center_chunk.document_id,\n                        section.center_chunk.chunk_id,\n                    )\n                    merged_sections[section_id] = merged_section\n\n    # Build result list maintaining original order\n    seen_section_ids: set[tuple[str, int]] = set()\n    result: list[InferenceSection] = []\n\n    for section in sections:\n        section_id = (section.center_chunk.document_id, section.center_chunk.chunk_id)\n        merged_section = merged_sections.get(section_id, section)\n\n        # Use merged section's center_chunk as identifier\n        merged_section_id = (\n            merged_section.center_chunk.document_id,\n            merged_section.center_chunk.chunk_id,\n        )\n\n        if merged_section_id not in seen_section_ids:\n            seen_section_ids.add(merged_section_id)\n            result.append(merged_section)\n\n    return result\n\n\ndef expand_section_with_context(\n    section: InferenceSection,\n    user_query: str,\n    llm: LLM,\n    document_index: DocumentIndex,\n    expand_override: bool = False,\n) -> InferenceSection | None:\n    \"\"\"Use LLM to classify section relevance and return expanded section with appropriate context.\n\n    This function combines classification and expansion into a single operation:\n    1. Retrieves chunks needed for classification (2 chunks for prompt)\n    2. Uses LLM to classify relevance (situations 1-4) unless expand_override is True\n    3. For FULL_DOCUMENT, fetches additional chunks (5 total above/below)\n    4. Returns the expanded section or None if not relevant\n\n    Args:\n        section: The InferenceSection to classify and expand\n        search_query: The user's search query\n        llm: LLM instance to use for classification\n        document_index: Document index for retrieving adjacent chunks\n        expand_override: If True, skip LLM classification and use FULL_DOCUMENT expansion\n\n    Returns:\n        Expanded InferenceSection with appropriate context, or None if NOT_RELEVANT\n    \"\"\"\n    chunks_above_for_prompt: list[InferenceChunk] = []\n    chunks_below_for_prompt: list[InferenceChunk] = []\n\n    # If expand_override is True, skip LLM classification and use FULL_DOCUMENT\n    if expand_override:\n        classification = ContextExpansionType.FULL_DOCUMENT\n        # These are not used, but need to be defined to avoid type errors\n    else:\n        # Retrieve 2 chunks above and below for the LLM classification prompt\n        chunks_above_for_prompt, chunks_below_for_prompt = _retrieve_adjacent_chunks(\n            section=section,\n            document_index=document_index,\n            num_chunks_above=2,\n            num_chunks_below=2,\n        )\n\n        # Format the section content for the prompt\n        section_above_text = (\n            \" \".join([c.content for c in chunks_above_for_prompt])\n            if chunks_above_for_prompt\n            else None\n        )\n        section_below_text = (\n            \" \".join([c.content for c in chunks_below_for_prompt])\n            if chunks_below_for_prompt\n            else None\n        )\n\n        # Classify section relevance using LLM\n        classification = classify_section_relevance(\n            document_title=section.center_chunk.semantic_identifier,\n            section_text=section.combined_content,\n            user_query=user_query,\n            llm=llm,\n            section_above_text=section_above_text,\n            section_below_text=section_below_text,\n        )\n\n    # Now build the expanded section based on classification\n    if classification == ContextExpansionType.NOT_RELEVANT:\n        # Filter out this section\n        logger.debug(\n            f\"LLM classified section as NOT_RELEVANT: {section.center_chunk.semantic_identifier}\"\n        )\n        return None\n\n    elif classification == ContextExpansionType.MAIN_SECTION_ONLY:\n        # Return original section unchanged\n        logger.debug(\n            f\"LLM classified section as MAIN_SECTION_ONLY: {section.center_chunk.semantic_identifier}\"\n        )\n        return section\n\n    elif classification == ContextExpansionType.INCLUDE_ADJACENT_SECTIONS:\n        # Use the 2 chunks we already retrieved for the prompt\n        logger.debug(\n            f\"LLM classified section as INCLUDE_ADJACENT_SECTIONS: {section.center_chunk.semantic_identifier}\"\n        )\n\n        all_chunks = chunks_above_for_prompt + section.chunks + chunks_below_for_prompt\n        if not all_chunks:\n            return section\n\n        # Create new InferenceSection with expanded chunks\n        expanded_section = inference_section_from_chunks(\n            center_chunk=section.center_chunk,\n            chunks=all_chunks,\n        )\n\n        return expanded_section if expanded_section else section\n\n    elif classification == ContextExpansionType.FULL_DOCUMENT:\n        # Fetch 5 chunks above and below (optimal single retrieval)\n        if expand_override:\n            logger.debug(\n                f\"Section marked for FULL_DOCUMENT expansion (override): {section.center_chunk.semantic_identifier}\"\n            )\n        else:\n            logger.debug(\n                f\"LLM classified section as FULL_DOCUMENT: {section.center_chunk.semantic_identifier}\"\n            )\n\n        chunks_above_full, chunks_below_full = _retrieve_adjacent_chunks(\n            section=section,\n            document_index=document_index,\n            num_chunks_above=FULL_DOC_NUM_CHUNKS_AROUND,\n            num_chunks_below=FULL_DOC_NUM_CHUNKS_AROUND,\n        )\n\n        # Combine all chunks: 5 above + section + 5 below\n        all_chunks = chunks_above_full + section.chunks + chunks_below_full\n\n        if not all_chunks:\n            logger.warning(\n                f\"No chunks found for full document context expansion: {section.center_chunk.semantic_identifier}\"\n            )\n            return section\n\n        # Create new InferenceSection with full context\n        expanded_section = inference_section_from_chunks(\n            center_chunk=section.center_chunk,\n            chunks=all_chunks,\n        )\n\n        return expanded_section if expanded_section else section\n\n    else:\n        # Unknown classification - default to returning original section\n        logger.warning(\n            f\"Unknown context classification {classification}, returning original section\"\n        )\n        return section\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/search_like_tool_utils.py",
    "content": "from onyx.connectors.models import Document\nfrom onyx.connectors.models import IndexingDocument\nfrom onyx.connectors.models import Section\n\n\nFINAL_CONTEXT_DOCUMENTS_ID = \"final_context_documents\"\nFINAL_SEARCH_QUERIES_ID = \"final_search_queries\"\nSEARCH_INFERENCE_SECTIONS_ID = \"search_inference_sections\"\n\n\ndef documents_to_indexing_documents(\n    documents: list[Document],\n) -> list[IndexingDocument]:\n    indexing_documents = []\n\n    for document in documents:\n        processed_sections = []\n        for section in document.sections:\n            processed_section = Section(\n                text=section.text or \"\",\n                link=section.link,\n                image_file_id=None,\n            )\n            processed_sections.append(processed_section)\n\n        indexed_document = IndexingDocument(\n            **document.model_dump(), processed_sections=processed_sections\n        )\n        indexing_documents.append(indexed_document)\n    return indexing_documents\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/utils.py",
    "content": "import json\n\nfrom onyx.context.search.models import InferenceSection\n\n\ndef convert_inference_sections_to_llm_string(\n    top_sections: list[InferenceSection],\n    citation_start: int = 1,\n    limit: int | None = None,\n    include_source_type: bool = True,\n    include_link: bool = False,\n    include_document_id: bool = False,\n) -> tuple[str, dict[int, str]]:\n    \"\"\"Convert InferenceSection objects to a JSON string for LLM.\n\n    Returns a JSON string with document results and a citation mapping.\n    \"\"\"\n    # Apply limit if specified\n    if limit is not None:\n        top_sections = top_sections[:limit]\n\n    # Group sections by document_id to assign same citation_id to sections from same document\n    document_id_to_citation_id: dict[str, int] = {}\n    citation_mapping: dict[int, str] = {}\n    current_citation_id = citation_start\n\n    # First pass: assign citation_ids to unique document_ids\n    for section in top_sections:\n        document_id = section.center_chunk.document_id\n        if document_id not in document_id_to_citation_id:\n            document_id_to_citation_id[document_id] = current_citation_id\n            citation_mapping[current_citation_id] = document_id\n            current_citation_id += 1\n\n    # Second pass: build results with citation_ids assigned per document\n    results = []\n\n    for section in top_sections:\n        chunk = section.center_chunk\n        document_id = chunk.document_id\n        citation_id = document_id_to_citation_id[document_id]\n\n        # Combine primary and secondary owners for authors\n        authors = None\n        if chunk.primary_owners or chunk.secondary_owners:\n            authors = []\n            if chunk.primary_owners:\n                authors.extend(chunk.primary_owners)\n            if chunk.secondary_owners:\n                authors.extend(chunk.secondary_owners)\n\n        # Format updated_at as ISO string if available\n        updated_at_str = None\n        if chunk.updated_at:\n            updated_at_str = chunk.updated_at.isoformat()\n\n        # Build result dictionary in desired order, only including non-None/empty fields\n        result = {\n            \"document\": citation_id,\n            \"title\": chunk.semantic_identifier,\n        }\n        if updated_at_str is not None:\n            result[\"updated_at\"] = updated_at_str\n        if authors is not None:\n            result[\"authors\"] = authors\n        if include_source_type:\n            result[\"source_type\"] = chunk.source_type.value\n        if include_link:\n            # Get the first link from the center chunk's source_links dict\n            link = None\n            if chunk.source_links:\n                # source_links is dict[int, str], get the first value\n                link = next(iter(chunk.source_links.values()), None)\n            if link:\n                result[\"url\"] = link\n        if include_document_id:\n            result[\"document_identifier\"] = chunk.document_id\n        if chunk.metadata:\n            result[\"metadata\"] = json.dumps(chunk.metadata, ensure_ascii=False)\n        result[\"content\"] = section.combined_content\n        results.append(result)\n\n    return (\n        json.dumps({\"results\": results}, indent=2, ensure_ascii=False),\n        citation_mapping,\n    )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/clients/brave_client.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\n\nimport requests\nfrom fastapi import HTTPException\n\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchProvider,\n)\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\nBRAVE_WEB_SEARCH_URL = \"https://api.search.brave.com/res/v1/web/search\"\nBRAVE_MAX_RESULTS_PER_REQUEST = 20\nBRAVE_SAFESEARCH_OPTIONS = {\"off\", \"moderate\", \"strict\"}\nBRAVE_FRESHNESS_OPTIONS = {\"pd\", \"pw\", \"pm\", \"py\"}\n\n\nclass RetryableBraveSearchError(Exception):\n    \"\"\"Error type used to trigger retry for transient Brave search failures.\"\"\"\n\n\nclass BraveClient(WebSearchProvider):\n    def __init__(\n        self,\n        api_key: str,\n        *,\n        num_results: int = 10,\n        timeout_seconds: int = 10,\n        country: str | None = None,\n        search_lang: str | None = None,\n        ui_lang: str | None = None,\n        safesearch: str | None = None,\n        freshness: str | None = None,\n    ) -> None:\n        if timeout_seconds <= 0:\n            raise ValueError(\"Brave provider config 'timeout_seconds' must be > 0.\")\n\n        self._headers = {\n            \"Accept\": \"application/json\",\n            \"X-Subscription-Token\": api_key,\n        }\n        logger.debug(f\"Count of results passed to BraveClient: {num_results}\")\n        self._num_results = max(1, min(num_results, BRAVE_MAX_RESULTS_PER_REQUEST))\n        self._timeout_seconds = timeout_seconds\n        self._country = _normalize_country(country)\n        self._search_lang = _normalize_language_code(\n            search_lang, field_name=\"search_lang\"\n        )\n        self._ui_lang = _normalize_language_code(ui_lang, field_name=\"ui_lang\")\n        self._safesearch = _normalize_option(\n            safesearch,\n            field_name=\"safesearch\",\n            allowed_values=BRAVE_SAFESEARCH_OPTIONS,\n        )\n        self._freshness = _normalize_option(\n            freshness,\n            field_name=\"freshness\",\n            allowed_values=BRAVE_FRESHNESS_OPTIONS,\n        )\n\n    def _build_search_params(self, query: str) -> dict[str, str]:\n        params = {\n            \"q\": query,\n            \"count\": str(self._num_results),\n        }\n        if self._country:\n            params[\"country\"] = self._country\n        if self._search_lang:\n            params[\"search_lang\"] = self._search_lang\n        if self._ui_lang:\n            params[\"ui_lang\"] = self._ui_lang\n        if self._safesearch:\n            params[\"safesearch\"] = self._safesearch\n        if self._freshness:\n            params[\"freshness\"] = self._freshness\n        return params\n\n    @retry_builder(\n        tries=3,\n        delay=1,\n        backoff=2,\n        exceptions=(RetryableBraveSearchError,),\n    )\n    def _search_with_retries(self, query: str) -> list[WebSearchResult]:\n        params = self._build_search_params(query)\n\n        try:\n            response = requests.get(\n                BRAVE_WEB_SEARCH_URL,\n                headers=self._headers,\n                params=params,\n                timeout=self._timeout_seconds,\n            )\n        except requests.RequestException as exc:\n            raise RetryableBraveSearchError(\n                f\"Brave search request failed: {exc}\"\n            ) from exc\n\n        try:\n            response.raise_for_status()\n        except requests.HTTPError as exc:\n            error_msg = _build_error_message(response)\n            if _is_retryable_status(response.status_code):\n                raise RetryableBraveSearchError(error_msg) from exc\n            raise ValueError(error_msg) from exc\n\n        data = response.json()\n        web_results = (data.get(\"web\") or {}).get(\"results\") or []\n\n        results: list[WebSearchResult] = []\n        for result in web_results:\n            if not isinstance(result, dict):\n                continue\n\n            link = _clean_string(result.get(\"url\"))\n            if not link:\n                continue\n\n            title = _clean_string(result.get(\"title\"))\n            description = _clean_string(result.get(\"description\"))\n\n            results.append(\n                WebSearchResult(\n                    title=title,\n                    link=link,\n                    snippet=description,\n                    author=None,\n                    published_date=None,\n                )\n            )\n\n        return results\n\n    def search(self, query: str) -> list[WebSearchResult]:\n        try:\n            return self._search_with_retries(query)\n        except RetryableBraveSearchError as exc:\n            raise ValueError(str(exc)) from exc\n\n    def test_connection(self) -> dict[str, str]:\n        try:\n            test_results = self.search(\"test\")\n            if not test_results or not any(result.link for result in test_results):\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"Brave API key validation failed: search returned no results.\",\n                )\n        except HTTPException:\n            raise\n        except (ValueError, requests.RequestException) as e:\n            error_msg = str(e)\n            lower = error_msg.lower()\n            if (\n                \"status 401\" in lower\n                or \"status 403\" in lower\n                or \"api key\" in lower\n                or \"auth\" in lower\n            ):\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Invalid Brave API key: {error_msg}\",\n                ) from e\n            if \"status 429\" in lower or \"rate limit\" in lower:\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Brave API rate limit exceeded: {error_msg}\",\n                ) from e\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Brave API key validation failed: {error_msg}\",\n            ) from e\n\n        logger.info(\"Web search provider test succeeded for Brave.\")\n        return {\"status\": \"ok\"}\n\n\ndef _build_error_message(response: requests.Response) -> str:\n    return f\"Brave search failed (status {response.status_code}): {_extract_error_detail(response)}\"\n\n\ndef _extract_error_detail(response: requests.Response) -> str:\n    try:\n        payload: Any = response.json()\n    except Exception:\n        text = response.text.strip()\n        return text[:200] if text else \"No error details\"\n\n    if isinstance(payload, dict):\n        error = payload.get(\"error\")\n        if isinstance(error, dict):\n            detail = error.get(\"detail\") or error.get(\"message\")\n            if isinstance(detail, str):\n                return detail\n        if isinstance(error, str):\n            return error\n\n        message = payload.get(\"message\")\n        if isinstance(message, str):\n            return message\n\n    return str(payload)[:200]\n\n\ndef _is_retryable_status(status_code: int) -> bool:\n    return status_code == 429 or status_code >= 500\n\n\ndef _clean_string(value: Any) -> str:\n    return value.strip() if isinstance(value, str) else \"\"\n\n\ndef _normalize_country(country: str | None) -> str | None:\n    if country is None:\n        return None\n    normalized = country.strip().upper()\n    if not normalized:\n        return None\n    if len(normalized) != 2 or not normalized.isalpha():\n        raise ValueError(\n            \"Brave provider config 'country' must be a 2-letter ISO country code.\"\n        )\n    return normalized\n\n\ndef _normalize_language_code(value: str | None, *, field_name: str) -> str | None:\n    if value is None:\n        return None\n    normalized = value.strip()\n    if not normalized:\n        return None\n    if len(normalized) > 20:\n        raise ValueError(f\"Brave provider config '{field_name}' is too long.\")\n    return normalized\n\n\ndef _normalize_option(\n    value: str | None,\n    *,\n    field_name: str,\n    allowed_values: set[str],\n) -> str | None:\n    if value is None:\n        return None\n    normalized = value.strip().lower()\n    if not normalized:\n        return None\n    if normalized not in allowed_values:\n        allowed = \", \".join(sorted(allowed_values))\n        raise ValueError(\n            f\"Brave provider config '{field_name}' must be one of: {allowed}.\"\n        )\n    return normalized\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/clients/exa_client.py",
    "content": "import re\nfrom collections.abc import Sequence\nfrom typing import Any\n\nimport requests\nfrom exa_py import Exa\nfrom exa_py.api import HighlightsContentsOptions\nfrom fastapi import HTTPException\n\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchProvider,\n)\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchResult,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n# 1 minute timeout for Exa API requests to prevent indefinite hangs\nEXA_REQUEST_TIMEOUT_SECONDS = 60\n\n\nclass ExaWithTimeout(Exa):\n    \"\"\"Exa client subclass that adds timeout support to HTTP requests.\n\n    The base Exa SDK uses requests without timeout, which can cause indefinite hangs.\n    This subclass overrides the request method to add a configurable timeout.\n    \"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        timeout_seconds: int = EXA_REQUEST_TIMEOUT_SECONDS,\n    ) -> None:\n        super().__init__(api_key=api_key)\n        self._timeout_seconds = timeout_seconds\n\n    def request(\n        self,\n        endpoint: str,\n        data: dict[str, Any] | str | None = None,\n        method: str = \"POST\",\n        params: dict[str, Any] | None = None,\n        headers: dict[str, str] | None = None,\n    ) -> dict[str, Any] | requests.Response:\n        \"\"\"Override request method to add timeout support.\"\"\"\n        url = f\"{self.base_url}/{endpoint}\"\n        final_headers = {**self.headers, **(headers or {})}\n\n        if method == \"GET\":\n            response = requests.get(\n                url,\n                headers=final_headers,\n                params=params,\n                timeout=self._timeout_seconds,\n            )\n        elif method == \"POST\":\n            response = requests.post(\n                url,\n                headers=final_headers,\n                json=data,\n                params=params,\n                timeout=self._timeout_seconds,\n            )\n        elif method == \"PATCH\":\n            response = requests.patch(\n                url,\n                headers=final_headers,\n                json=data,\n                params=params,\n                timeout=self._timeout_seconds,\n            )\n        elif method == \"DELETE\":\n            response = requests.delete(\n                url,\n                headers=final_headers,\n                params=params,\n                timeout=self._timeout_seconds,\n            )\n        else:\n            raise ValueError(f\"Unsupported HTTP method: {method}\")\n\n        response.raise_for_status()\n        return response.json()\n\n\ndef _extract_site_operators(query: str) -> tuple[str, list[str]]:\n    \"\"\"Extract site: operators and return cleaned query + full domains.\n\n    Returns (cleaned_query, full_domains) where full_domains contains the full\n    values after site: (e.g., [\"reddit.com/r/leagueoflegends\"]).\n    \"\"\"\n    full_domains = re.findall(r\"site:\\s*([^\\s]+)\", query, re.IGNORECASE)\n    cleaned_query = re.sub(r\"site:\\s*\\S+\\s*\", \"\", query, flags=re.IGNORECASE).strip()\n\n    if not cleaned_query and full_domains:\n        cleaned_query = full_domains[0]\n\n    return cleaned_query, full_domains\n\n\nclass ExaClient(WebSearchProvider, WebContentProvider):\n    def __init__(self, api_key: str, num_results: int = 10) -> None:\n        self.exa = ExaWithTimeout(api_key=api_key)\n        self._num_results = num_results\n\n    @property\n    def supports_site_filter(self) -> bool:\n        return False\n\n    def _search_exa(\n        self, query: str, include_domains: list[str] | None = None\n    ) -> list[WebSearchResult]:\n        response = self.exa.search_and_contents(\n            query,\n            type=\"auto\",\n            highlights=HighlightsContentsOptions(\n                num_sentences=2,\n                highlights_per_url=1,\n            ),\n            num_results=self._num_results,\n            include_domains=include_domains,\n        )\n\n        results: list[WebSearchResult] = []\n        for result in response.results:\n            title = (result.title or \"\").strip()\n            # library type stub issue\n            snippet = (result.highlights[0] if result.highlights else \"\").strip()\n            results.append(\n                WebSearchResult(\n                    title=title,\n                    link=result.url,\n                    snippet=snippet,\n                    author=result.author,\n                    published_date=(\n                        time_str_to_utc(result.published_date)\n                        if result.published_date\n                        else None\n                    ),\n                )\n            )\n\n        return results\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def search(self, query: str) -> list[WebSearchResult]:\n        cleaned_query, full_domains = _extract_site_operators(query)\n\n        if full_domains:\n            # Try with include_domains using base domains (e.g., [\"reddit.com\"])\n            base_domains = [d.split(\"/\")[0].removeprefix(\"www.\") for d in full_domains]\n            results = self._search_exa(cleaned_query, include_domains=base_domains)\n            if results:\n                return results\n\n        # Fallback: add full domains as keywords\n        query_with_domains = f\"{cleaned_query} {' '.join(full_domains)}\".strip()\n        return self._search_exa(query_with_domains)\n\n    def test_connection(self) -> dict[str, str]:\n        try:\n            test_results = self.search(\"test\")\n            if not test_results or not any(result.link for result in test_results):\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"API key validation failed: search returned no results.\",\n                )\n        except HTTPException:\n            raise\n        except Exception as e:\n            error_msg = str(e)\n            if (\n                \"api\" in error_msg.lower()\n                or \"key\" in error_msg.lower()\n                or \"auth\" in error_msg.lower()\n            ):\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Invalid Exa API key: {error_msg}\",\n                ) from e\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Exa API key validation failed: {error_msg}\",\n            ) from e\n\n        logger.info(\"Web search provider test succeeded for Exa.\")\n        return {\"status\": \"ok\"}\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        response = self.exa.get_contents(\n            urls=list(urls),\n            text=True,\n            livecrawl=\"preferred\",\n        )\n\n        # Exa can return partial/empty content entries; skip those to avoid\n        # downstream prompt + UI pollution.\n        contents: list[WebContent] = []\n        for result in response.results:\n            title = (result.title or \"\").strip()\n            full_content = (result.text or \"\").strip()\n            contents.append(\n                WebContent(\n                    title=title,\n                    link=result.url,\n                    full_content=full_content,\n                    published_date=(\n                        time_str_to_utc(result.published_date)\n                        if result.published_date\n                        else None\n                    ),\n                    scrape_successful=bool(full_content),\n                )\n            )\n\n        return contents\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/clients/google_pse_client.py",
    "content": "from __future__ import annotations\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport requests\nfrom fastapi import HTTPException\n\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchProvider,\n)\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\nGOOGLE_CUSTOM_SEARCH_URL = \"https://customsearch.googleapis.com/customsearch/v1\"\n\n\nclass GooglePSEClient(WebSearchProvider):\n    def __init__(\n        self,\n        api_key: str,\n        search_engine_id: str,\n        *,\n        num_results: int = 10,\n        timeout_seconds: int = 10,\n    ) -> None:\n        self._api_key = api_key\n        self._search_engine_id = search_engine_id\n        self._num_results = min(num_results, 10)  # Google API max is 10\n        self._timeout_seconds = timeout_seconds\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def search(self, query: str) -> list[WebSearchResult]:\n        params: dict[str, str] = {\n            \"key\": self._api_key,\n            \"cx\": self._search_engine_id,\n            \"q\": query,\n            \"num\": str(self._num_results),\n        }\n\n        response = requests.get(\n            GOOGLE_CUSTOM_SEARCH_URL, params=params, timeout=self._timeout_seconds\n        )\n\n        # Check for HTTP errors first\n        try:\n            response.raise_for_status()\n        except requests.HTTPError as exc:\n            status = response.status_code\n            error_detail = \"Unknown error\"\n            try:\n                error_data = response.json()\n                if \"error\" in error_data:\n                    error_info = error_data[\"error\"]\n                    error_detail = error_info.get(\"message\", str(error_info))\n            except Exception:\n                error_detail = (\n                    response.text[:200] if response.text else \"No error details\"\n                )\n\n            raise ValueError(\n                f\"Google PSE search failed (status {status}): {error_detail}\"\n            ) from exc\n\n        data = response.json()\n\n        # Google Custom Search API can return errors in the response body even with 200 status\n        if \"error\" in data:\n            error_info = data[\"error\"]\n            error_message = error_info.get(\"message\", \"Unknown error\")\n            error_code = error_info.get(\"code\", \"Unknown\")\n            raise ValueError(f\"Google PSE API error ({error_code}): {error_message}\")\n\n        items: list[dict[str, Any]] = data.get(\"items\", [])\n        results: list[WebSearchResult] = []\n\n        for item in items:\n            link = item.get(\"link\")\n            if not link:\n                continue\n\n            snippet = item.get(\"snippet\") or \"\"\n\n            # Attempt to extract metadata if available\n            pagemap = item.get(\"pagemap\") or {}\n            metatags = pagemap.get(\"metatags\", [])\n            published_date: datetime | None = None\n            author: str | None = None\n\n            if metatags:\n                meta = metatags[0]\n                author = meta.get(\"og:site_name\") or meta.get(\"author\")\n                published_str = (\n                    meta.get(\"article:published_time\")\n                    or meta.get(\"og:updated_time\")\n                    or meta.get(\"date\")\n                )\n                if published_str:\n                    try:\n                        published_date = datetime.fromisoformat(\n                            published_str.replace(\"Z\", \"+00:00\")\n                        )\n                    except ValueError:\n                        logger.debug(\n                            f\"Failed to parse published_date '{published_str}' for link {link}\"\n                        )\n                        published_date = None\n\n            results.append(\n                WebSearchResult(\n                    title=item.get(\"title\") or \"\",\n                    link=link,\n                    snippet=snippet,\n                    author=author,\n                    published_date=published_date,\n                )\n            )\n\n        return results\n\n    # TODO: I'm not really satisfied with how tailored this is to the particulars of Google PSE.\n    # In particular, I think this might flatten errors that are caused by the API key vs. ones caused\n    # by the search engine ID, or by other factors.\n    # I (David Edelstein) don't feel knowledgeable enough about the return behavior of the Google PSE API\n    # to ensure that we have nicely descriptive and actionable error messages. (Like, what's up with the\n    # thing where 200 status codes can have error messages in the response body?)\n    def test_connection(self) -> dict[str, str]:\n        try:\n            test_results = self.search(\"test\")\n            if not test_results or not any(result.link for result in test_results):\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"Google PSE validation failed: search returned no results.\",\n                )\n        except HTTPException:\n            raise\n        except Exception as e:\n            error_msg = str(e)\n            if (\n                \"api\" in error_msg.lower()\n                or \"key\" in error_msg.lower()\n                or \"auth\" in error_msg.lower()\n            ):\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Invalid Google PSE API key: {error_msg}\",\n                ) from e\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Google PSE validation failed: {error_msg}\",\n            ) from e\n\n        logger.info(\"Web search provider test succeeded for Google PSE.\")\n        return {\"status\": \"ok\"}\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/clients/searxng_client.py",
    "content": "import requests\nfrom fastapi import HTTPException\n\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchProvider,\n)\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchResult,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\n\nclass SearXNGClient(WebSearchProvider):\n    def __init__(\n        self,\n        searxng_base_url: str,\n        num_results: int = 10,\n    ) -> None:\n        logger.debug(f\"Initializing SearXNGClient with base URL: {searxng_base_url}\")\n        self._searxng_base_url = searxng_base_url\n        self._num_results = num_results\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def search(self, query: str) -> list[WebSearchResult]:\n        payload = {\n            \"q\": query,\n            \"format\": \"json\",\n        }\n        logger.debug(\n            f\"Searching with payload: {payload} to {self._searxng_base_url}/search\"\n        )\n        response = requests.post(\n            f\"{self._searxng_base_url}/search\",\n            data=payload,\n        )\n        response.raise_for_status()\n\n        results = response.json()\n        result_list = results.get(\"results\", [])\n        # SearXNG doesn't support limiting results via API parameters,\n        # so we limit client-side after receiving the response\n        limited_results = result_list[: self._num_results]\n        return [\n            WebSearchResult(\n                title=result[\"title\"],\n                link=result[\"url\"],\n                snippet=result[\"content\"],\n            )\n            for result in limited_results\n        ]\n\n    def test_connection(self) -> dict[str, str]:\n        try:\n            logger.debug(f\"Testing connection to {self._searxng_base_url}/config\")\n            response = requests.get(f\"{self._searxng_base_url}/config\")\n            logger.debug(f\"Response: {response.status_code}, text: {response.text}\")\n            response.raise_for_status()\n        except requests.HTTPError as e:\n            status_code = e.response.status_code\n            logger.debug(\n                f\"HTTPError: status_code={status_code}, e.response={e.response.status_code if e.response else None}, error={e}\"\n            )\n            if status_code == 429:\n                raise HTTPException(\n                    status_code=400,\n                    detail=(\n                        \"This SearXNG instance does not allow API requests. \"\n                        \"Use a private instance and configure it to allow bots.\"\n                    ),\n                ) from e\n            elif status_code == 404:\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"This SearXNG instance was not found. Please check the URL and try again.\",\n                ) from e\n            else:\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"SearXNG connection failed (status {status_code}): {str(e)}\",\n                ) from e\n\n        # Not a sure way to check if this is a SearXNG instance as opposed to some other website that\n        # happens to have a /config endpoint containing a \"brand\" key with a \"GIT_URL\" key with value\n        # \"https://github.com/searxng/searxng\". I don't think that would happen by coincidence, so I\n        # think this is a good enough check for now. I'm open for suggestions on improvements.\n        config = response.json()\n        if (\n            config.get(\"brand\", {}).get(\"GIT_URL\")\n            != \"https://github.com/searxng/searxng\"\n        ):\n            raise HTTPException(\n                status_code=400,\n                detail=\"This does not appear to be a SearXNG instance. Please check the URL and try again.\",\n            )\n\n        # Test that JSON mode is enabled by performing a simple search\n        self._test_json_mode()\n\n        logger.info(\"Web search provider test succeeded for SearXNG.\")\n        return {\"status\": \"ok\"}\n\n    def _test_json_mode(self) -> None:\n        \"\"\"Test that JSON format is enabled in SearXNG settings.\n\n        SearXNG requires JSON format to be explicitly enabled in settings.yml.\n        If it's not enabled, the search endpoint returns a 403.\n        \"\"\"\n        try:\n            payload = {\n                \"q\": \"test\",\n                \"format\": \"json\",\n            }\n            response = requests.post(\n                f\"{self._searxng_base_url}/search\",\n                data=payload,\n                timeout=5,\n            )\n            response.raise_for_status()\n        except requests.HTTPError as e:\n            status_code = e.response.status_code if e.response is not None else None\n            if status_code == 403:\n                raise HTTPException(\n                    status_code=400,\n                    detail=(\n                        \"Got a 403 response when trying to reach SearXNG. This likely means that \"\n                        \"JSON format is not enabled on this SearXNG instance. \"\n                        \"Please enable JSON format in your SearXNG settings.yml file by adding \"\n                        \"'json' to the 'search.formats' list.\"\n                    ),\n                ) from e\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Failed to test search on SearXNG instance (status {status_code}): {str(e)}\",\n            ) from e\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/clients/serper_client.py",
    "content": "import json\nfrom collections.abc import Sequence\nfrom concurrent.futures import ThreadPoolExecutor\n\nimport requests\nfrom fastapi import HTTPException\n\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchProvider,\n)\nfrom onyx.tools.tool_implementations.web_search.models import (\n    WebSearchResult,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.retry_wrapper import retry_builder\n\nlogger = setup_logger()\n\nSERPER_SEARCH_URL = \"https://google.serper.dev/search\"\nSERPER_CONTENTS_URL = \"https://scrape.serper.dev\"\n\n# 1 minute timeout for Serper API requests to prevent indefinite hangs\nSERPER_REQUEST_TIMEOUT_SECONDS = 60\n\n\nclass SerperClient(WebSearchProvider, WebContentProvider):\n    def __init__(self, api_key: str, num_results: int = 10) -> None:\n        self.headers = {\n            \"X-API-KEY\": api_key,\n            \"Content-Type\": \"application/json\",\n        }\n        self._num_results = num_results\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def search(self, query: str) -> list[WebSearchResult]:\n        payload = {\n            \"q\": query,\n            \"num\": self._num_results,\n        }\n\n        response = requests.post(\n            SERPER_SEARCH_URL,\n            headers=self.headers,\n            data=json.dumps(payload),\n            timeout=SERPER_REQUEST_TIMEOUT_SECONDS,\n        )\n\n        response.raise_for_status()\n\n        results = response.json()\n        organic_results = results.get(\"organic\") or []\n\n        validated_results: list[WebSearchResult] = []\n        for result in organic_results:\n            link = (result.get(\"link\") or \"\").strip()\n            if not link:\n                continue\n\n            title = (result.get(\"title\") or \"\").strip()\n            snippet = (result.get(\"snippet\") or \"\").strip()\n\n            validated_results.append(\n                WebSearchResult(\n                    title=title,\n                    link=link,\n                    snippet=snippet,\n                    author=None,\n                    published_date=None,\n                )\n            )\n\n        return validated_results\n\n    def test_connection(self) -> dict[str, str]:\n        try:\n            test_results = self.search(\"test\")\n            if not test_results or not any(result.link for result in test_results):\n                raise HTTPException(\n                    status_code=400,\n                    detail=\"API key validation failed: search returned no results.\",\n                )\n        except HTTPException:\n            raise\n        except Exception as e:\n            error_msg = str(e)\n            if (\n                \"api\" in error_msg.lower()\n                or \"key\" in error_msg.lower()\n                or \"auth\" in error_msg.lower()\n            ):\n                raise HTTPException(\n                    status_code=400,\n                    detail=f\"Invalid Serper API key: {error_msg}\",\n                ) from e\n            raise HTTPException(\n                status_code=400,\n                detail=f\"Serper API key validation failed: {error_msg}\",\n            ) from e\n\n        logger.info(\"Web search provider test succeeded for Serper.\")\n        return {\"status\": \"ok\"}\n\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        if not urls:\n            return []\n\n        # Serper can responds with 500s regularly. We want to retry,\n        # but in the event of failure, return an unsuccesful scrape.\n        def safe_get_webpage_content(url: str) -> WebContent:\n            try:\n                return self._get_webpage_content(url)\n            except Exception:\n                return WebContent(\n                    title=\"\",\n                    link=url,\n                    full_content=\"\",\n                    published_date=None,\n                    scrape_successful=False,\n                )\n\n        with ThreadPoolExecutor(max_workers=min(8, len(urls))) as e:\n            return list(e.map(safe_get_webpage_content, urls))\n\n    @retry_builder(tries=3, delay=1, backoff=2)\n    def _get_webpage_content(self, url: str) -> WebContent:\n        payload = {\n            \"url\": url,\n        }\n\n        response = requests.post(\n            SERPER_CONTENTS_URL,\n            headers=self.headers,\n            data=json.dumps(payload),\n            timeout=SERPER_REQUEST_TIMEOUT_SECONDS,\n        )\n\n        # 400 returned when serper cannot scrape\n        if response.status_code == 400:\n            return WebContent(\n                title=\"\",\n                link=url,\n                full_content=\"\",\n                published_date=None,\n                scrape_successful=False,\n            )\n\n        response.raise_for_status()\n\n        response_json = response.json()\n\n        # Response only guarantees text\n        text = response_json[\"text\"]\n\n        # metadata & jsonld is not guaranteed to be present\n        metadata = response_json.get(\"metadata\", {})\n        jsonld = response_json.get(\"jsonld\", {})\n\n        title = extract_title_from_metadata(metadata)\n\n        # Serper does not provide a reliable mechanism to extract the url\n        response_url = url\n        published_date_str = extract_published_date_from_jsonld(jsonld)\n        published_date = None\n\n        if published_date_str:\n            try:\n                published_date = time_str_to_utc(published_date_str)\n            except Exception:\n                published_date = None\n\n        return WebContent(\n            title=title or \"\",\n            link=response_url,\n            full_content=text or \"\",\n            published_date=published_date,\n        )\n\n\ndef extract_title_from_metadata(metadata: dict[str, str]) -> str | None:\n    keys = [\"title\", \"og:title\"]\n    return extract_value_from_dict(metadata, keys)\n\n\ndef extract_published_date_from_jsonld(jsonld: dict[str, str]) -> str | None:\n    keys = [\"dateModified\"]\n    return extract_value_from_dict(jsonld, keys)\n\n\ndef extract_value_from_dict(data: dict[str, str], keys: list[str]) -> str | None:\n    for key in keys:\n        if key in data:\n            return data[key]\n    return None\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/models.py",
    "content": "from abc import abstractmethod\nfrom collections.abc import Sequence\nfrom datetime import datetime\nfrom enum import Enum\n\nfrom pydantic import BaseModel\nfrom pydantic import field_validator\n\nfrom onyx.utils.url import normalize_url\n\n# Fairly loose number but assuming LLMs can easily handle this amount of context\n# Approximately 2 pages of google search results\n# This is the cap for both when the tool is running a single search and when running multiple queries in parallel\nDEFAULT_MAX_RESULTS = 20\n\nWEB_SEARCH_PREFIX = \"WEB_SEARCH_DOC_\"\n\n\nclass ProviderType(Enum):\n    \"\"\"Enum for internet search provider types\"\"\"\n\n    GOOGLE = \"google\"\n    EXA = \"exa\"\n\n\nclass WebSearchResult(BaseModel):\n    title: str\n    link: str\n    snippet: str\n    author: str | None = None\n    published_date: datetime | None = None\n\n    @field_validator(\"link\")\n    @classmethod\n    def normalize_link(cls, v: str) -> str:\n        return normalize_url(v)\n\n\nclass WebSearchProvider:\n    @property\n    def supports_site_filter(self) -> bool:\n        \"\"\"Whether this provider supports the site: operator in queries.\n        Override in subclasses that don't support it.\n        \"\"\"\n        return True\n\n    @abstractmethod\n    def search(self, query: str) -> Sequence[WebSearchResult]:\n        pass\n\n    @abstractmethod\n    def test_connection(self) -> dict[str, str]:\n        pass\n\n\nclass WebContentProviderConfig(BaseModel):\n    timeout_seconds: int | None = None\n    base_url: str | None = None\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/providers.py",
    "content": "from onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import InternetSearchProvider\nfrom onyx.db.web_search import fetch_active_web_content_provider\nfrom onyx.db.web_search import fetch_active_web_search_provider\nfrom onyx.tools.tool_implementations.open_url.firecrawl import FirecrawlClient\nfrom onyx.tools.tool_implementations.open_url.models import (\n    WebContentProvider,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_MAX_HTML_SIZE_BYTES,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_MAX_PDF_SIZE_BYTES,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import OnyxWebCrawler\nfrom onyx.tools.tool_implementations.web_search.clients.brave_client import (\n    BraveClient,\n)\nfrom onyx.tools.tool_implementations.web_search.clients.exa_client import (\n    ExaClient,\n)\nfrom onyx.tools.tool_implementations.web_search.clients.google_pse_client import (\n    GooglePSEClient,\n)\nfrom onyx.tools.tool_implementations.web_search.clients.searxng_client import (\n    SearXNGClient,\n)\nfrom onyx.tools.tool_implementations.web_search.clients.serper_client import (\n    SerperClient,\n)\nfrom onyx.tools.tool_implementations.web_search.models import DEFAULT_MAX_RESULTS\nfrom onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchProvider\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\n\nlogger = setup_logger()\n\n\ndef _parse_positive_int_config(\n    *,\n    raw_value: str | None,\n    default: int,\n    provider_name: str,\n    config_key: str,\n) -> int:\n    if not raw_value:\n        return default\n    try:\n        value = int(raw_value)\n    except ValueError as exc:\n        raise ValueError(\n            f\"{provider_name} provider config '{config_key}' must be an integer.\"\n        ) from exc\n    if value <= 0:\n        raise ValueError(\n            f\"{provider_name} provider config '{config_key}' must be greater than 0.\"\n        )\n    return value\n\n\ndef provider_requires_api_key(provider_type: WebSearchProviderType) -> bool:\n    \"\"\"Return True if the given provider type requires an API key.\n    This list is most likely just going to contain SEARXNG. The way it works is that it uses public search engines that do not\n    require an API key. You can also set it up in a way which requires a key but SearXNG itself does not require a key.\n    \"\"\"\n    return provider_type != WebSearchProviderType.SEARXNG\n\n\ndef build_search_provider_from_config(\n    provider_type: WebSearchProviderType,\n    api_key: str | None,\n    config: dict[str, str] | None,  # TODO use a typed object\n) -> WebSearchProvider:\n    config = config or {}\n    num_results = int(config.get(\"num_results\") or DEFAULT_MAX_RESULTS)\n\n    # SearXNG does not require an API key\n    if provider_type == WebSearchProviderType.SEARXNG:\n        searxng_base_url = config.get(\"searxng_base_url\")\n        if not searxng_base_url:\n            raise ValueError(\"Please provide a URL for your private SearXNG instance.\")\n        return SearXNGClient(\n            searxng_base_url,\n            num_results=num_results,\n        )\n\n    # All other providers require an API key\n    if not api_key:\n        raise ValueError(f\"API key is required for {provider_type.value} provider.\")\n\n    if provider_type == WebSearchProviderType.EXA:\n        return ExaClient(api_key=api_key, num_results=num_results)\n    if provider_type == WebSearchProviderType.BRAVE:\n        return BraveClient(\n            api_key=api_key,\n            num_results=num_results,\n            timeout_seconds=_parse_positive_int_config(\n                raw_value=config.get(\"timeout_seconds\"),\n                default=10,\n                provider_name=\"Brave\",\n                config_key=\"timeout_seconds\",\n            ),\n            country=config.get(\"country\"),\n            search_lang=config.get(\"search_lang\"),\n            ui_lang=config.get(\"ui_lang\"),\n            safesearch=config.get(\"safesearch\"),\n            freshness=config.get(\"freshness\"),\n        )\n    if provider_type == WebSearchProviderType.SERPER:\n        return SerperClient(api_key=api_key, num_results=num_results)\n    if provider_type == WebSearchProviderType.GOOGLE_PSE:\n        search_engine_id = (\n            config.get(\"search_engine_id\")\n            or config.get(\"cx\")\n            or config.get(\"search_engine\")\n        )\n        if not search_engine_id:\n            raise ValueError(\n                \"Google PSE provider requires a search engine id (cx) in addition to the API key.\"\n            )\n        return GooglePSEClient(\n            api_key=api_key,\n            search_engine_id=search_engine_id,\n            num_results=num_results,\n            timeout_seconds=int(config.get(\"timeout_seconds\") or 10),\n        )\n\n    raise ValueError(f\"Unknown provider type: {provider_type.value}\")\n\n\ndef _build_search_provider(provider_model: InternetSearchProvider) -> WebSearchProvider:\n    return build_search_provider_from_config(\n        provider_type=WebSearchProviderType(provider_model.provider_type),\n        api_key=(\n            provider_model.api_key.get_value(apply_mask=False)\n            if provider_model.api_key\n            else None\n        ),\n        config=provider_model.config or {},\n    )\n\n\ndef build_content_provider_from_config(\n    *,\n    provider_type: WebContentProviderType,\n    api_key: str,\n    config: WebContentProviderConfig,\n) -> WebContentProvider | None:\n    if provider_type == WebContentProviderType.ONYX_WEB_CRAWLER:\n        if config.timeout_seconds is not None:\n            return OnyxWebCrawler(\n                timeout_seconds=config.timeout_seconds,\n                max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,\n                max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,\n            )\n        return OnyxWebCrawler(\n            max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,\n            max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,\n        )\n\n    if provider_type == WebContentProviderType.FIRECRAWL:\n        if config.base_url is None:\n            raise ValueError(\"Firecrawl content provider requires a base URL.\")\n        if config.timeout_seconds is None:\n            return FirecrawlClient(api_key=api_key, base_url=config.base_url)\n        return FirecrawlClient(\n            api_key=api_key,\n            base_url=config.base_url,\n            timeout_seconds=config.timeout_seconds,\n        )\n\n    if provider_type == WebContentProviderType.EXA:\n        return ExaClient(api_key=api_key)\n\n\ndef get_default_provider() -> WebSearchProvider | None:\n    with get_session_with_current_tenant() as db_session:\n        provider_model = fetch_active_web_search_provider(db_session)\n        if provider_model is None:\n            return None\n        return _build_search_provider(provider_model)\n\n\ndef get_default_content_provider() -> WebContentProvider:\n    with get_session_with_current_tenant() as db_session:\n        provider_model = fetch_active_web_content_provider(db_session)\n        if provider_model:\n            provider = build_content_provider_from_config(\n                provider_type=WebContentProviderType(provider_model.provider_type),\n                api_key=(\n                    provider_model.api_key.get_value(apply_mask=False)\n                    if provider_model.api_key\n                    else \"\"\n                ),\n                config=provider_model.config or WebContentProviderConfig(),\n            )\n            if provider:\n                return provider\n\n    return OnyxWebCrawler(\n        max_pdf_size_bytes=DEFAULT_MAX_PDF_SIZE_BYTES,\n        max_html_size_bytes=DEFAULT_MAX_HTML_SIZE_BYTES,\n    )\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/utils.py",
    "content": "from onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import InferenceSection\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.open_url.snippet_matcher import (\n    find_snippet_in_content,\n)\nfrom onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\n\n\nTRUNCATED_CONTENT_SUFFIX = \" [...truncated]\"\nTRUNCATED_CONTENT_PREFIX = \"[...truncated] \"\n\nMAX_CHARS_PER_URL = 15000\n\n\ndef filter_web_search_results_with_no_title_or_snippet(\n    results: list[WebSearchResult],\n) -> list[WebSearchResult]:\n    \"\"\"Filter out results that have neither a title nor a snippet.\n\n    Some providers can return entries that only include a URL. Downstream uses\n    titles/snippets for display and prompting, so we drop those empty entries\n    centrally (rather than duplicating the check in each client).\n    \"\"\"\n    filtered: list[WebSearchResult] = []\n    for result in results:\n        if result.title.strip() or result.snippet.strip():\n            filtered.append(result)\n    return filtered\n\n\ndef truncate_search_result_content(\n    content: str, max_chars: int = MAX_CHARS_PER_URL\n) -> str:\n    \"\"\"Truncate search result content to a maximum number of characters\"\"\"\n    if len(content) <= max_chars:\n        return content\n    return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX\n\n\ndef _truncate_content_around_snippet(\n    content: str, snippet: str, max_chars: int = MAX_CHARS_PER_URL\n) -> str:\n    \"\"\"\n    Truncates content around snippet with max_chars\n\n    Assumes snippet exists\n    \"\"\"\n    result = find_snippet_in_content(content, snippet)\n\n    if not result.snippet_located:\n        return \"\"\n\n    start_idx = result.start_idx\n    end_idx = result.end_idx\n\n    new_start, new_end = _expand_range_centered(\n        start_idx, end_idx + 1, len(content), max_chars\n    )\n\n    truncated_content = content[new_start:new_end]\n\n    # Add the AFFIX to the start and end of truncated content\n    if new_start > 0:\n        truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content\n\n    if new_end < len(content):\n        truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX\n\n    return truncated_content\n\n\ndef _expand_range_centered(\n    start_idx: int, end_idx: int, N: int, target_size: int\n) -> tuple[int, int]:\n    \"\"\"\n    Expands a range [start_idx, end_idx) to be centered within a list of size N\n\n    Args:\n        start_idx: Starting index (inclusive)\n        end_idx: Ending index (exclusive)\n        N: Size of the list\n        target_size: Target size of the range\n\n    Returns:\n        Tuple of (new start index, new end index)\n    \"\"\"\n    current_size = end_idx - start_idx\n\n    if current_size >= target_size:\n        return start_idx, end_idx\n\n    padding_needed = target_size - current_size\n    padding_top = padding_needed // 2\n    padding_bottom = padding_needed - padding_top\n\n    # Try expand symmetrically\n    new_start = start_idx - padding_top\n    new_end = end_idx + padding_bottom\n\n    # Handle overflow\n    if new_start < 0:\n        overflow = -new_start\n        new_start = 0\n        new_end = min(N, new_end + overflow)\n\n    if new_end > N:\n        overflow = new_end - N\n        new_end = N\n        new_start = max(0, new_start - overflow)\n\n    return new_start, new_end\n\n\ndef inference_section_from_internet_page_scrape(\n    result: WebContent,\n    snippet: str,\n    rank: int = 0,\n) -> InferenceSection:\n    # truncate the content around snippet if snippet exists\n    truncated_content = \"\"\n    if snippet:\n        truncated_content = _truncate_content_around_snippet(\n            result.full_content, snippet\n        )\n\n    # Fallback if no snippet exists or we failed to find it\n    if not truncated_content:\n        truncated_content = truncate_search_result_content(result.full_content)\n\n    # Calculate score using reciprocal rank to preserve ordering\n    score = 1.0 / (rank + 1)\n\n    inference_chunk = InferenceChunk(\n        chunk_id=0,\n        blurb=result.title,\n        content=truncated_content,\n        source_links={0: result.link},\n        section_continuation=False,\n        document_id=WEB_SEARCH_PREFIX + result.link,\n        source_type=DocumentSource.WEB,\n        semantic_identifier=result.title,\n        title=result.title,\n        boost=1,\n        score=score,\n        hidden=False,\n        metadata={},\n        match_highlights=[truncated_content],\n        doc_summary=\"\",\n        chunk_context=\"\",\n        updated_at=result.published_date,\n        image_file_id=None,\n    )\n    return InferenceSection(\n        center_chunk=inference_chunk,\n        chunks=[inference_chunk],\n        combined_content=truncated_content,\n    )\n\n\ndef inference_section_from_internet_search_result(\n    result: WebSearchResult,\n    rank: int = 0,\n) -> InferenceSection:\n    # Calculate score using reciprocal rank to preserve ordering\n    score = 1.0 / (rank + 1)\n\n    chunk = InferenceChunk(\n        chunk_id=0,\n        blurb=result.snippet,\n        content=result.snippet,\n        source_links={0: result.link},\n        section_continuation=False,\n        document_id=WEB_SEARCH_PREFIX + result.link,\n        source_type=DocumentSource.WEB,\n        semantic_identifier=result.title,\n        title=result.title,\n        boost=1,\n        score=score,\n        hidden=False,\n        metadata={},\n        match_highlights=[result.snippet],\n        doc_summary=\"\",\n        chunk_context=\"\",\n        updated_at=result.published_date,\n        image_file_id=None,\n    )\n\n    return InferenceSection(\n        center_chunk=chunk,\n        chunks=[chunk],\n        combined_content=result.snippet,\n    )\n\n\ndef extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:\n    \"\"\"\n    Given a list of SearchDocs, this will extract the url -> summary map.\n    \"\"\"\n    url_snippet_map: dict[str, str] = {}\n    for document in documents:\n        if document.source_type == DocumentSource.WEB and document.link:\n            url_snippet_map[document.link] = document.blurb\n    return url_snippet_map\n"
  },
  {
    "path": "backend/onyx/tools/tool_implementations/web_search/web_search_tool.py",
    "content": "import json\nfrom typing import Any\n\nfrom sqlalchemy.orm import Session\nfrom typing_extensions import override\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.context.search.utils import convert_inference_sections_to_search_docs\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.web_search import fetch_active_web_search_provider\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolStart\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.models import WebSearchToolOverrideKwargs\nfrom onyx.tools.tool_implementations.utils import (\n    convert_inference_sections_to_llm_string,\n)\nfrom onyx.tools.tool_implementations.web_search.models import DEFAULT_MAX_RESULTS\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_search_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    provider_requires_api_key,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    filter_web_search_results_with_no_title_or_snippet,\n)\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    inference_section_from_internet_search_result,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom shared_configs.enums import WebSearchProviderType\n\nlogger = setup_logger()\n\nQUERIES_FIELD = \"queries\"\n\n\ndef _sanitize_query(query: str) -> str:\n    \"\"\"Remove control characters and normalize whitespace in a query.\n\n    LLMs sometimes produce queries with null characters or other control\n    characters that need to be stripped before sending to search providers.\n    \"\"\"\n    # Remove control characters (ASCII 0-31 and 127 DEL)\n    sanitized = \"\".join(c for c in query if ord(c) >= 32 and ord(c) != 127)\n    # Collapse multiple whitespace characters into single space and strip\n    return \" \".join(sanitized.split())\n\n\ndef _normalize_queries_input(raw: Any) -> list[str]:\n    \"\"\"Coerce LLM output to a list of sanitized query strings.\n\n    Accepts a bare string or a list (possibly with non-string elements).\n    Sanitizes each query (strip control chars, normalize whitespace) and\n    drops empty or whitespace-only entries.\n    \"\"\"\n    if isinstance(raw, str):\n        raw = raw.strip()\n        if not raw:\n            return []\n        raw = [raw]\n    elif not isinstance(raw, list):\n        return []\n    result: list[str] = []\n    for q in raw:\n        if q is None:\n            continue\n        sanitized = _sanitize_query(str(q))\n        if sanitized:\n            result.append(sanitized)\n    return result\n\n\nclass WebSearchTool(Tool[WebSearchToolOverrideKwargs]):\n    NAME = \"web_search\"\n    DESCRIPTION = \"Search the web for information.\"\n    DISPLAY_NAME = \"Web Search\"\n\n    def __init__(self, tool_id: int, emitter: Emitter) -> None:\n        super().__init__(emitter=emitter)\n        self._id = tool_id\n\n        # Get web search provider from database\n        with get_session_with_current_tenant() as db_session:\n            provider_model = fetch_active_web_search_provider(db_session)\n            if provider_model is None:\n                raise RuntimeError(\"No web search provider configured.\")\n            provider_type = WebSearchProviderType(provider_model.provider_type)\n            api_key = (\n                provider_model.api_key.get_value(apply_mask=False)\n                if provider_model.api_key\n                else None\n            )\n            config = provider_model.config\n\n        if provider_requires_api_key(provider_type) and api_key is None:\n            raise RuntimeError(\n                f\"No API key configured for {provider_type.value} web search provider.\"\n            )\n\n        self._provider = build_search_provider_from_config(\n            provider_type=provider_type,\n            api_key=api_key,\n            config=config,\n        )\n\n    @property\n    def id(self) -> int:\n        return self._id\n\n    @property\n    def name(self) -> str:\n        return self.NAME\n\n    @property\n    def description(self) -> str:\n        return self.DESCRIPTION\n\n    @property\n    def display_name(self) -> str:\n        return self.DISPLAY_NAME\n\n    @property\n    def supports_site_filter(self) -> bool:\n        \"\"\"Whether the underlying provider supports site: operator.\"\"\"\n        return self._provider.supports_site_filter\n\n    @override\n    @classmethod\n    def is_available(cls, db_session: Session) -> bool:\n        \"\"\"Available only if an active web search provider is configured in the database.\"\"\"\n        with get_session_with_current_tenant() as session:\n            provider = fetch_active_web_search_provider(session)\n            return provider is not None\n\n    def tool_definition(self) -> dict:\n        return {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": self.name,\n                \"description\": (\n                    \"Search the web for information. Returns a list of search results with titles, metadata, and snippets.\"\n                ),\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        QUERIES_FIELD: {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": \"One or more queries to look up on the web. Must contain only printable characters\",\n                        },\n                    },\n                    \"required\": [QUERIES_FIELD],\n                },\n            },\n        }\n\n    def emit_start(self, placement: Placement) -> None:\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolStart(is_internet_search=True),\n            )\n        )\n\n    def _safe_execute_single_search(\n        self,\n        query: str,\n        provider: Any,\n    ) -> tuple[list[WebSearchResult] | None, str | None]:\n        \"\"\"Execute a single search query and return results with error capture.\n\n        Returns:\n            A tuple of (results, error_message). If successful, error_message is None.\n            If failed, results is None and error_message contains the error.\n        \"\"\"\n        try:\n            raw_results = list(provider.search(query))\n            filtered_results = filter_web_search_results_with_no_title_or_snippet(\n                raw_results\n            )\n            results = filtered_results[:DEFAULT_MAX_RESULTS]\n            return (results, None)\n        except Exception as e:\n            error_msg = str(e)\n            logger.warning(f\"Web search query '{query}' failed: {error_msg}\")\n            return (None, error_msg)\n\n    def run(\n        self,\n        placement: Placement,\n        override_kwargs: WebSearchToolOverrideKwargs,\n        **llm_kwargs: Any,\n    ) -> ToolResponse:\n        \"\"\"Execute the web search tool with multiple queries in parallel\"\"\"\n        if QUERIES_FIELD not in llm_kwargs:\n            raise ToolCallException(\n                message=f\"Missing required '{QUERIES_FIELD}' parameter in web_search tool call\",\n                llm_facing_message=(\n                    f\"The web_search tool requires a '{QUERIES_FIELD}' parameter \"\n                    f\"containing an array of search queries. Please provide the queries \"\n                    f'like: {{\"queries\": [\"your search query here\"]}}'\n                ),\n            )\n        queries = _normalize_queries_input(llm_kwargs[QUERIES_FIELD])\n        if not queries:\n            raise ToolCallException(\n                message=(\n                    \"No valid web search queries provided; all queries were empty or whitespace-only after trimming.\"\n                ),\n                llm_facing_message=(\n                    \"No valid web search queries were provided (they were empty or \"\n                    \"whitespace-only). Please provide a real search query.\"\n                ),\n            )\n\n        # Emit queries\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolQueriesDelta(queries=queries),\n            )\n        )\n\n        # Perform searches in parallel with error capture\n        functions_with_args = [\n            (self._safe_execute_single_search, (query, self._provider))\n            for query in queries\n        ]\n        search_results_with_errors: list[\n            tuple[list[WebSearchResult] | None, str | None]\n        ] = run_functions_tuples_in_parallel(\n            functions_with_args,\n            allow_failures=False,  # Our wrapper handles errors internally\n        )\n\n        # Separate successful results from failures\n        valid_results: list[list[WebSearchResult]] = []\n        failed_queries: dict[str, str] = {}\n\n        for query, (results, error) in zip(queries, search_results_with_errors):\n            if error is not None:\n                failed_queries[query] = error\n            elif results is not None:\n                valid_results.append(results)\n\n        # Log partial failures but continue if we have at least one success\n        if failed_queries and valid_results:\n            logger.warning(\n                f\"Web search partial failure: {len(failed_queries)}/{len(queries)} \"\n                f\"queries failed. Failed queries: {json.dumps(failed_queries)}\"\n            )\n\n        # If all queries failed, raise ToolCallException with details\n        if not valid_results:\n            error_details = json.dumps(failed_queries, indent=2)\n            raise ToolCallException(\n                message=f\"All web search queries failed: {error_details}\",\n                llm_facing_message=(\n                    f\"All web search queries failed. Query failures:\\n{error_details}\"\n                ),\n            )\n\n        # Interweave top results from each query in round-robin fashion\n        all_search_results: list[WebSearchResult] = []\n\n        if valid_results:\n            # Track seen (title, url) pairs to avoid duplicates\n            seen = set()\n            # Track current index for each result set\n            indices = [0] * len(valid_results)\n\n            # Round-robin interweaving: cycle through result sets and increment indices\n            while len(all_search_results) < DEFAULT_MAX_RESULTS:\n                added_any = False\n                for idx, results in enumerate(valid_results):\n                    if len(all_search_results) >= DEFAULT_MAX_RESULTS:\n                        break\n                    if indices[idx] < len(results):\n                        result = results[indices[idx]]\n                        key = (result.title, result.link)\n                        if key not in seen:\n                            seen.add(key)\n                            all_search_results.append(result)\n                            added_any = True\n                        indices[idx] += 1\n                # Stop if no more results to add\n                if not added_any:\n                    break\n\n        # This should be a very rare case and is due to not failing loudly enough in the search provider implementation.\n        if not all_search_results:\n            raise ToolCallException(\n                message=\"Web search queries succeeded but returned no results\",\n                llm_facing_message=(\n                    \"Web search completed but found no results for the given queries. \"\n                    \"Try rephrasing or using different search terms.\"\n                ),\n            )\n\n        # Convert search results to InferenceSections with rank-based scoring\n        inference_sections = [\n            inference_section_from_internet_search_result(result, rank=i)\n            for i, result in enumerate(all_search_results)\n        ]\n\n        # Convert to SearchDocs\n        search_docs = convert_inference_sections_to_search_docs(\n            inference_sections, is_internet=True\n        )\n\n        # Emit documents\n        self.emitter.emit(\n            Packet(\n                placement=placement,\n                obj=SearchToolDocumentsDelta(documents=search_docs),\n            )\n        )\n\n        # Format for LLM\n        if not all_search_results:\n            docs_str = json.dumps(\n                {\n                    \"results\": [],\n                    \"message\": \"The web search completed but returned no results for any of the queries. Do not search again.\",\n                }\n            )\n            citation_mapping: dict[int, str] = {}\n        else:\n            docs_str, citation_mapping = convert_inference_sections_to_llm_string(\n                top_sections=inference_sections,\n                citation_start=override_kwargs.starting_citation_num,\n                limit=None,  # Already truncated\n                include_source_type=False,\n                include_link=True,\n            )\n\n        return ToolResponse(\n            rich_response=SearchDocsResponse(\n                search_docs=search_docs, citation_mapping=citation_mapping\n            ),\n            llm_facing_response=docs_str,\n        )\n"
  },
  {
    "path": "backend/onyx/tools/tool_runner.py",
    "content": "import json\nimport traceback\nfrom collections import defaultdict\nfrom typing import Any\n\nimport onyx.tracing.framework._error_tracing as _error_tracing\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.context.search.models import SearchDocsResponse\nfrom onyx.db.memory import UserMemoryContext\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import PacketException\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.tools.interface import Tool\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import ChatMinimalTextMessage\nfrom onyx.tools.models import ImageGenerationToolOverrideKwargs\nfrom onyx.tools.models import OpenURLToolOverrideKwargs\nfrom onyx.tools.models import ParallelToolCallResponse\nfrom onyx.tools.models import PythonToolOverrideKwargs\nfrom onyx.tools.models import SearchToolOverrideKwargs\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.models import ToolExecutionException\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.models import WebSearchToolOverrideKwargs\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\nfrom onyx.tracing.framework.create import function_span\nfrom onyx.tracing.framework.spans import SpanError\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\n\nlogger = setup_logger()\n\nQUERIES_FIELD = \"queries\"\nURLS_FIELD = \"urls\"\nGENERIC_TOOL_ERROR_MESSAGE = \"Tool failed with error: {error}\"\n\n# 10 minute timeout for tool execution to prevent indefinite hangs\nTOOL_EXECUTION_TIMEOUT_SECONDS = 10 * 60\n\n# Mapping of tool name to the field that should be merged when multiple calls exist\nMERGEABLE_TOOL_FIELDS: dict[str, str] = {\n    SearchTool.NAME: QUERIES_FIELD,\n    WebSearchTool.NAME: QUERIES_FIELD,\n    OpenURLTool.NAME: URLS_FIELD,\n}\n\n\ndef _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff]:\n    \"\"\"Merge multiple tool calls for SearchTool, WebSearchTool, or OpenURLTool into a single call.\n\n    For SearchTool (internal_search) and WebSearchTool (web_search), if there are\n    multiple calls, their queries are merged into a single tool call.\n    For OpenURLTool (open_url), multiple calls have their urls merged.\n    Other tool calls are left unchanged.\n\n    Args:\n        tool_calls: List of tool calls to potentially merge\n\n    Returns:\n        List of merged tool calls\n    \"\"\"\n    # Group tool calls by tool name\n    tool_calls_by_name: dict[str, list[ToolCallKickoff]] = defaultdict(list)\n    merged_calls: list[ToolCallKickoff] = []\n\n    for tool_call in tool_calls:\n        tool_calls_by_name[tool_call.tool_name].append(tool_call)\n\n    # Process each tool name group\n    for tool_name, calls in tool_calls_by_name.items():\n        if tool_name in MERGEABLE_TOOL_FIELDS and len(calls) > 1:\n            merge_field = MERGEABLE_TOOL_FIELDS[tool_name]\n\n            # Merge field values from all calls\n            all_values: list[str] = []\n            for call in calls:\n                values = call.tool_args.get(merge_field, [])\n                if isinstance(values, list):\n                    all_values.extend(values)\n                elif values:\n                    # Handle case where it might be a single string\n                    all_values.append(str(values))\n\n            # Create a merged tool call using the first call's ID and merging the field\n            merged_args = calls[0].tool_args.copy()\n            merged_args[merge_field] = all_values\n\n            merged_call = ToolCallKickoff(\n                tool_call_id=calls[0].tool_call_id,  # Use first call's ID\n                tool_name=tool_name,\n                tool_args=merged_args,\n                # Use first call's placement since merged calls become a single call\n                placement=calls[0].placement,\n            )\n            merged_calls.append(merged_call)\n        else:\n            # No merging needed, add all calls as-is\n            merged_calls.extend(calls)\n\n    return merged_calls\n\n\ndef _extract_image_file_ids_from_tool_response_message(\n    message: str,\n) -> list[str]:\n    try:\n        parsed_message = json.loads(message)\n    except json.JSONDecodeError:\n        return []\n\n    parsed_items: list[Any] = (\n        parsed_message if isinstance(parsed_message, list) else [parsed_message]\n    )\n    file_ids: list[str] = []\n    for item in parsed_items:\n        if not isinstance(item, dict):\n            continue\n\n        file_id = item.get(\"file_id\")\n        if isinstance(file_id, str):\n            file_ids.append(file_id)\n\n    return file_ids\n\n\ndef _extract_recent_generated_image_file_ids(\n    message_history: list[ChatMessageSimple],\n) -> list[str]:\n    tool_name_by_tool_call_id: dict[str, str] = {}\n    recent_image_file_ids: list[str] = []\n    seen_file_ids: set[str] = set()\n\n    for message in message_history:\n        if message.message_type == MessageType.ASSISTANT and message.tool_calls:\n            for tool_call in message.tool_calls:\n                tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name\n            continue\n\n        if (\n            message.message_type != MessageType.TOOL_CALL_RESPONSE\n            or not message.tool_call_id\n        ):\n            continue\n\n        tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)\n        if tool_name != ImageGenerationTool.NAME:\n            continue\n\n        for file_id in _extract_image_file_ids_from_tool_response_message(\n            message.message\n        ):\n            if file_id in seen_file_ids:\n                continue\n            seen_file_ids.add(file_id)\n            recent_image_file_ids.append(file_id)\n\n    return recent_image_file_ids\n\n\ndef _safe_run_single_tool(\n    tool: Tool,\n    tool_call: ToolCallKickoff,\n    override_kwargs: Any,\n) -> ToolResponse:\n    \"\"\"Execute a single tool and return its response.\n\n    This function is designed to be run in parallel via run_functions_tuples_in_parallel.\n\n    Exception handling:\n    - ToolCallException: Expected errors from tool execution (e.g., invalid input,\n      API failures). Uses the exception's llm_facing_message for LLM consumption.\n    - Other exceptions: Unexpected errors. Uses a generic error message.\n\n    In all cases (success or failure):\n    - SectionEnd packet is emitted to signal tool completion\n    - tool_call is set on the response for downstream processing\n    \"\"\"\n    tool_response: ToolResponse | None = None\n\n    with function_span(tool.name) as span_fn:\n        span_fn.span_data.input = str(tool_call.tool_args)\n        try:\n            tool_response = tool.run(\n                placement=tool_call.placement,\n                override_kwargs=override_kwargs,\n                **tool_call.tool_args,\n            )\n            span_fn.span_data.output = tool_response.llm_facing_response\n        except ToolCallException as e:\n            # ToolCallException is an expected error from tool execution\n            # Use llm_facing_message which is specifically designed for LLM consumption\n            logger.error(f\"Tool call error for {tool.name}: {e}\")\n            tool_response = ToolResponse(\n                rich_response=None,\n                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(\n                    error=e.llm_facing_message\n                ),\n            )\n            _error_tracing.attach_error_to_current_span(\n                SpanError(\n                    message=\"Tool call error (expected)\",\n                    data={\n                        \"tool_name\": tool.name,\n                        \"tool_call_id\": tool_call.tool_call_id,\n                        \"tool_args\": tool_call.tool_args,\n                        \"error\": str(e),\n                        \"llm_facing_message\": e.llm_facing_message,\n                        \"stack_trace\": traceback.format_exc(),\n                        \"error_type\": \"ToolCallException\",\n                    },\n                )\n            )\n        except ToolExecutionException as e:\n            # Unexpected error during tool execution\n            logger.error(f\"Unexpected error running tool {tool.name}: {e}\")\n            tool_response = ToolResponse(\n                rich_response=None,\n                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),\n            )\n            _error_tracing.attach_error_to_current_span(\n                SpanError(\n                    message=\"Tool execution error (unexpected)\",\n                    data={\n                        \"tool_name\": tool.name,\n                        \"tool_call_id\": tool_call.tool_call_id,\n                        \"tool_args\": tool_call.tool_args,\n                        \"error\": str(e),\n                        \"stack_trace\": traceback.format_exc(),\n                        \"error_type\": type(e).__name__,\n                    },\n                )\n            )\n            if e.emit_error_packet:\n                tool.emitter.emit(\n                    Packet(\n                        placement=tool_call.placement,\n                        obj=PacketException(exception=e),\n                    )\n                )\n        except Exception as e:\n            # Unexpected error during tool execution\n            logger.error(f\"Unexpected error running tool {tool.name}: {e}\")\n            tool_response = ToolResponse(\n                rich_response=None,\n                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),\n            )\n            _error_tracing.attach_error_to_current_span(\n                SpanError(\n                    message=\"Tool execution error (unexpected)\",\n                    data={\n                        \"tool_name\": tool.name,\n                        \"tool_call_id\": tool_call.tool_call_id,\n                        \"tool_args\": tool_call.tool_args,\n                        \"error\": str(e),\n                        \"stack_trace\": traceback.format_exc(),\n                        \"error_type\": type(e).__name__,\n                    },\n                )\n            )\n\n    # Emit SectionEnd after tool completes (success or failure)\n    tool.emitter.emit(\n        Packet(\n            placement=tool_call.placement,\n            obj=SectionEnd(),\n        )\n    )\n\n    # Set tool_call on the response for downstream processing\n    tool_response.tool_call = tool_call\n    return tool_response\n\n\ndef run_tool_calls(\n    tool_calls: list[ToolCallKickoff],\n    tools: list[Tool],\n    # The stuff below is needed for the different individual built-in tools\n    message_history: list[ChatMessageSimple],\n    user_memory_context: UserMemoryContext | None,\n    user_info: str | None,\n    citation_mapping: dict[int, str],\n    next_citation_num: int,\n    # Max number of tools to run concurrently (and overall) in this batch.\n    # If set, tool calls beyond this limit are dropped.\n    max_concurrent_tools: int | None = None,\n    # Skip query expansion for repeat search tool calls\n    skip_search_query_expansion: bool = False,\n    # Files from the chat session to pass to tools like PythonTool\n    chat_files: list[ChatFile] | None = None,\n    # A map of url -> summary for passing web results to open url tool\n    url_snippet_map: dict[str, str] = {},\n    # When False, don't pass memory context to search tools for query expansion\n    # (but still pass it to the memory tool for persistence)\n    inject_memories_in_prompt: bool = True,\n) -> ParallelToolCallResponse:\n    \"\"\"Run (optionally merged) tool calls in parallel and update citation mappings.\n\n    Before execution, tool calls for `SearchTool`, `WebSearchTool`, and `OpenURLTool`\n    are merged so repeated calls are collapsed into a single call per tool:\n    - `SearchTool` / `WebSearchTool`: merge the `queries` list\n    - `OpenURLTool`: merge the `urls` list\n\n    Tools are executed in parallel (threadpool). For tools that generate citations,\n    each tool call is assigned a **distinct** `starting_citation_num` range to avoid\n    citation number collisions when running concurrently (the range is advanced by\n    100 per tool call).\n\n    The provided `citation_mapping` may be mutated in-place: any new\n    `SearchDocsResponse.citation_mapping` entries are merged into it.\n\n    Args:\n        tool_calls: List of tool calls to execute.\n        tools: List of available tool instances.\n        message_history: Chat message history (used to find the most recent user query\n            for `SearchTool` override kwargs).\n        user_memory_context: User memory context, if available (passed through to `SearchTool`).\n        user_info: User information string, if available (passed through to `SearchTool`).\n        citation_mapping: Current citation number to URL mapping. May be updated with\n            new citations produced by search tools.\n        next_citation_num: The next citation number to allocate from.\n        max_concurrent_tools: Max number of tools to run in this batch. If set, any\n            tool calls after this limit are dropped (not queued).\n        skip_search_query_expansion: Whether to skip query expansion for `SearchTool`\n            (intended for repeated search calls within the same chat turn).\n\n    Returns:\n        A `ParallelToolCallResponse` containing:\n        - `tool_responses`: `ToolResponse` objects for successfully dispatched tool calls\n          (each has `tool_call` set). If a tool execution fails at the threadpool layer,\n          its entry will be omitted.\n        - `updated_citation_mapping`: The updated citation mapping dictionary.\n    \"\"\"\n    # Merge tool calls for SearchTool, WebSearchTool, and OpenURLTool\n    merged_tool_calls = _merge_tool_calls(tool_calls)\n\n    if not merged_tool_calls:\n        return ParallelToolCallResponse(\n            tool_responses=[],\n            updated_citation_mapping=citation_mapping,\n        )\n\n    tools_by_name = {tool.name: tool for tool in tools}\n\n    # Drop unknown tools (and don't let them count against the cap)\n    filtered_tool_calls: list[ToolCallKickoff] = []\n    for tool_call in merged_tool_calls:\n        if tool_call.tool_name not in tools_by_name:\n            logger.warning(f\"Tool {tool_call.tool_name} not found in tools list\")\n            continue\n        filtered_tool_calls.append(tool_call)\n\n    # Apply safety cap (drop tool calls beyond the cap)\n    if max_concurrent_tools is not None:\n        if max_concurrent_tools <= 0:\n            return ParallelToolCallResponse(\n                tool_responses=[],\n                updated_citation_mapping=citation_mapping,\n            )\n        filtered_tool_calls = filtered_tool_calls[:max_concurrent_tools]\n\n    # Get starting citation number from citation processor to avoid conflicts with project files\n    starting_citation_num = next_citation_num\n\n    # Prepare minimal history for SearchTool (computed once, shared by all)\n    minimal_history = [\n        ChatMinimalTextMessage(message=msg.message, message_type=msg.message_type)\n        for msg in message_history\n    ]\n    last_user_message = None\n    for i in range(len(minimal_history) - 1, -1, -1):\n        if minimal_history[i].message_type == MessageType.USER:\n            last_user_message = minimal_history[i].message\n            break\n\n    # Convert citation_mapping for OpenURLTool (computed once, shared by all)\n    url_to_citation: dict[str, int] = {\n        url: citation_num for citation_num, url in citation_mapping.items()\n    }\n    recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(\n        message_history\n    )\n\n    # Prepare all tool calls with their override_kwargs\n    # Each tool gets a unique starting citation number to avoid conflicts when running in parallel\n    tool_run_params: list[tuple[Tool, ToolCallKickoff, Any]] = []\n\n    for tool_call in filtered_tool_calls:\n        tool = tools_by_name[tool_call.tool_name]\n\n        # Emit the tool start packet before running the tool\n        tool.emit_start(placement=tool_call.placement)\n\n        override_kwargs: (\n            SearchToolOverrideKwargs\n            | WebSearchToolOverrideKwargs\n            | OpenURLToolOverrideKwargs\n            | PythonToolOverrideKwargs\n            | ImageGenerationToolOverrideKwargs\n            | MemoryToolOverrideKwargs\n            | None\n        ) = None\n\n        if isinstance(tool, SearchTool):\n            if last_user_message is None:\n                raise ValueError(\"No user message found in message history\")\n\n            search_memory_context = (\n                user_memory_context\n                if inject_memories_in_prompt\n                else (\n                    user_memory_context.without_memories()\n                    if user_memory_context\n                    else None\n                )\n            )\n            override_kwargs = SearchToolOverrideKwargs(\n                starting_citation_num=starting_citation_num,\n                original_query=last_user_message,\n                message_history=minimal_history,\n                user_memory_context=search_memory_context,\n                user_info=user_info,\n                skip_query_expansion=skip_search_query_expansion,\n            )\n            # Increment citation number for next search tool to avoid conflicts\n            # Estimate: reserve 100 citation slots per search tool\n            starting_citation_num += 100\n\n        elif isinstance(tool, WebSearchTool):\n            override_kwargs = WebSearchToolOverrideKwargs(\n                starting_citation_num=starting_citation_num,\n            )\n            # Increment citation number for next search tool to avoid conflicts\n            starting_citation_num += 100\n\n        elif isinstance(tool, OpenURLTool):\n            override_kwargs = OpenURLToolOverrideKwargs(\n                starting_citation_num=starting_citation_num,\n                citation_mapping=url_to_citation,\n                url_snippet_map=url_snippet_map,\n            )\n            starting_citation_num += 100\n\n        elif isinstance(tool, PythonTool):\n            override_kwargs = PythonToolOverrideKwargs(\n                chat_files=chat_files or [],\n            )\n        elif isinstance(tool, ImageGenerationTool):\n            override_kwargs = ImageGenerationToolOverrideKwargs(\n                recent_generated_image_file_ids=recent_generated_image_file_ids\n            )\n        elif isinstance(tool, MemoryTool):\n            override_kwargs = MemoryToolOverrideKwargs(\n                user_name=(\n                    user_memory_context.user_info.name if user_memory_context else None\n                ),\n                user_email=(\n                    user_memory_context.user_info.email if user_memory_context else None\n                ),\n                user_role=(\n                    user_memory_context.user_info.role if user_memory_context else None\n                ),\n                existing_memories=(\n                    list(user_memory_context.memories) if user_memory_context else []\n                ),\n                chat_history=minimal_history,\n            )\n\n        tool_run_params.append((tool, tool_call, override_kwargs))\n\n    # Run all tools in parallel\n    functions_with_args = [\n        (_safe_run_single_tool, (tool, tool_call, override_kwargs))\n        for tool, tool_call, override_kwargs in tool_run_params\n    ]\n\n    tool_run_results: list[ToolResponse | None] = run_functions_tuples_in_parallel(\n        functions_with_args,\n        allow_failures=True,  # Continue even if some tools fail\n        max_workers=max_concurrent_tools,\n        timeout=TOOL_EXECUTION_TIMEOUT_SECONDS,\n    )\n\n    # Process results and update citation_mapping\n    for result in tool_run_results:\n        if result is None:\n            continue\n\n        if result and isinstance(result.rich_response, SearchDocsResponse):\n            new_citations = result.rich_response.citation_mapping\n            if new_citations:\n                # Merge new citations into the existing mapping\n                citation_mapping.update(new_citations)\n\n    tool_responses = [result for result in tool_run_results if result is not None]\n    return ParallelToolCallResponse(\n        tool_responses=tool_responses,\n        updated_citation_mapping=citation_mapping,\n    )\n"
  },
  {
    "path": "backend/onyx/tools/utils.py",
    "content": "import json\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.app_configs import AZURE_IMAGE_API_KEY\nfrom onyx.db.connector import check_connectors_exist\nfrom onyx.db.document import check_docs_exist\nfrom onyx.db.models import LLMProvider\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.utils import find_model_obj\nfrom onyx.llm.utils import get_model_map\nfrom onyx.natural_language_processing.utils import BaseTokenizer\nfrom onyx.tools.interface import Tool\n\n\ndef explicit_tool_calling_supported(model_provider: str, model_name: str) -> bool:\n    model_map = get_model_map()\n    model_obj = find_model_obj(\n        model_map=model_map,\n        provider=model_provider,\n        model_name=model_name,\n    )\n\n    model_supports = (\n        model_obj.get(\"supports_function_calling\", False) if model_obj else False\n    )\n    return model_supports\n\n\ndef compute_tool_tokens(tool: Tool, llm_tokenizer: BaseTokenizer) -> int:\n    return len(llm_tokenizer.encode(json.dumps(tool.tool_definition())))\n\n\ndef compute_all_tool_tokens(tools: list[Tool], llm_tokenizer: BaseTokenizer) -> int:\n    return sum(compute_tool_tokens(tool, llm_tokenizer) for tool in tools)\n\n\ndef is_image_generation_available(db_session: Session) -> bool:\n    providers = db_session.query(LLMProvider).all()\n    for provider in providers:\n        if provider.provider == LlmProviderNames.OPENAI:\n            return True\n\n    return bool(AZURE_IMAGE_API_KEY)\n\n\ndef is_document_search_available(db_session: Session) -> bool:\n    docs_exist = check_docs_exist(db_session)\n    connectors_exist = check_connectors_exist(db_session)\n    return docs_exist or connectors_exist\n\n\ndef generate_tools_description(tools: list[Tool]) -> str:\n    if not tools:\n        return \"\"\n    if len(tools) == 1:\n        return tools[0].name\n    if len(tools) == 2:\n        return f\"{tools[0].name} and {tools[1].name}\"\n\n    names = [tool.name for tool in tools[:-1]]\n    return \", \".join(names) + f\", and {tools[-1].name}\"\n"
  },
  {
    "path": "backend/onyx/tracing/braintrust_tracing_processor.py",
    "content": "import datetime\nfrom typing import Any\nfrom typing import Dict\nfrom typing import Optional\n\nimport braintrust\nfrom braintrust import NOOP_SPAN\n\nfrom .framework.processor_interface import TracingProcessor\nfrom .framework.span_data import AgentSpanData\nfrom .framework.span_data import FunctionSpanData\nfrom .framework.span_data import GenerationSpanData\nfrom .framework.span_data import SpanData\nfrom .framework.spans import Span\nfrom .framework.traces import Trace\nfrom onyx.llm.cost import calculate_llm_cost_cents\n\n\ndef _span_type(span: Span[Any]) -> braintrust.SpanTypeAttribute:\n    if span.span_data.type in [\"agent\"]:\n        return braintrust.SpanTypeAttribute.TASK\n    elif span.span_data.type in [\"function\"]:\n        return braintrust.SpanTypeAttribute.TOOL\n    elif span.span_data.type in [\"generation\"]:\n        return braintrust.SpanTypeAttribute.LLM\n    else:\n        return braintrust.SpanTypeAttribute.TASK\n\n\ndef _span_name(span: Span[Any]) -> str:\n    if isinstance(span.span_data, AgentSpanData) or isinstance(\n        span.span_data, FunctionSpanData\n    ):\n        return span.span_data.name\n    elif isinstance(span.span_data, GenerationSpanData):\n        return \"Generation\"\n    else:\n        return \"Unknown\"\n\n\ndef _timestamp_from_maybe_iso(timestamp: Optional[str]) -> Optional[float]:\n    if timestamp is None:\n        return None\n    return datetime.datetime.fromisoformat(timestamp).timestamp()\n\n\ndef _maybe_timestamp_elapsed(\n    end: Optional[str], start: Optional[str]\n) -> Optional[float]:\n    if start is None or end is None:\n        return None\n    return (\n        datetime.datetime.fromisoformat(end) - datetime.datetime.fromisoformat(start)\n    ).total_seconds()\n\n\nclass BraintrustTracingProcessor(TracingProcessor):\n    \"\"\"\n    `BraintrustTracingProcessor` is a `tracing.TracingProcessor` that logs traces to Braintrust.\n\n    Args:\n        logger: A `braintrust.Span` or `braintrust.Experiment` or `braintrust.Logger` to use for logging.\n            If `None`, the current span, experiment, or logger will be selected exactly as in `braintrust.start_span`.\n    \"\"\"\n\n    def __init__(self, logger: Optional[braintrust.Logger] = None):\n        self._logger = logger\n        self._spans: Dict[str, Any] = {}\n        self._first_input: Dict[str, Any] = {}\n        self._last_output: Dict[str, Any] = {}\n        self._trace_metadata: Dict[str, Dict[str, Any]] = {}\n        self._span_names: Dict[str, str] = {}\n\n    def on_trace_start(self, trace: Trace) -> None:\n        trace_meta = trace.export() or {}\n        metadata = trace_meta.get(\"metadata\") or {}\n        if metadata:\n            self._trace_metadata[trace.trace_id] = metadata\n\n        current_context = braintrust.current_span()\n        if current_context != NOOP_SPAN:\n            self._spans[trace.trace_id] = current_context.start_span(\n                name=trace.name,\n                span_attributes={\"type\": \"task\", \"name\": trace.name},\n                metadata=metadata,\n            )\n        elif self._logger is not None:\n            self._spans[trace.trace_id] = self._logger.start_span(\n                span_attributes={\"type\": \"task\", \"name\": trace.name},\n                span_id=trace.trace_id,\n                root_span_id=trace.trace_id,\n                metadata=metadata,\n            )\n        else:\n            self._spans[trace.trace_id] = braintrust.start_span(\n                id=trace.trace_id,\n                span_attributes={\"type\": \"task\", \"name\": trace.name},\n                metadata=metadata,\n            )\n        self._span_names[trace.trace_id] = trace.name\n\n    def on_trace_end(self, trace: Trace) -> None:\n        span: Any = self._spans.pop(trace.trace_id)\n        self._trace_metadata.pop(trace.trace_id, None)\n        self._span_names.pop(trace.trace_id, None)\n        # Get the first input and last output for this specific trace\n        trace_first_input = self._first_input.pop(trace.trace_id, None)\n        trace_last_output = self._last_output.pop(trace.trace_id, None)\n        span.log(input=trace_first_input, output=trace_last_output)\n        span.end()\n\n    def _agent_log_data(self, span: Span[AgentSpanData]) -> Dict[str, Any]:\n        return {\n            \"metadata\": {\n                \"tools\": span.span_data.tools,\n                \"handoffs\": span.span_data.handoffs,\n                \"output_type\": span.span_data.output_type,\n            }\n        }\n\n    def _function_log_data(self, span: Span[FunctionSpanData]) -> Dict[str, Any]:\n        return {\n            \"input\": span.span_data.input,\n            \"output\": span.span_data.output,\n        }\n\n    def _generation_log_data(self, span: Span[GenerationSpanData]) -> Dict[str, Any]:\n        metrics = {}\n        total_latency = _maybe_timestamp_elapsed(span.ended_at, span.started_at)\n\n        if total_latency is not None:\n            metrics[\"total_latency_seconds\"] = total_latency\n\n        if span.span_data.time_to_first_action_seconds is not None:\n            metrics[\"time_to_first_action_seconds\"] = (\n                span.span_data.time_to_first_action_seconds\n            )\n\n        usage = span.span_data.usage or {}\n        prompt_tokens = None\n        completion_tokens = None\n        prompt_tokens = usage.get(\"prompt_tokens\")\n        if prompt_tokens is None:\n            prompt_tokens = usage.get(\"input_tokens\")\n        if prompt_tokens is not None:\n            metrics[\"prompt_tokens\"] = int(prompt_tokens)\n        completion_tokens = usage.get(\"completion_tokens\")\n        if completion_tokens is None:\n            completion_tokens = usage.get(\"output_tokens\")\n        if completion_tokens is not None:\n            metrics[\"completion_tokens\"] = int(completion_tokens)\n\n        if \"total_tokens\" in usage:\n            metrics[\"tokens\"] = usage[\"total_tokens\"]\n        elif prompt_tokens is not None and completion_tokens is not None:\n            metrics[\"tokens\"] = prompt_tokens + completion_tokens\n\n        if \"cache_read_input_tokens\" in usage:\n            metrics[\"prompt_cached_tokens\"] = usage[\"cache_read_input_tokens\"]\n        if \"cache_creation_input_tokens\" in usage:\n            metrics[\"prompt_cache_creation_tokens\"] = usage[\n                \"cache_creation_input_tokens\"\n            ]\n\n        model_name = span.span_data.model\n        if model_name and prompt_tokens is not None and completion_tokens is not None:\n            cost_cents = calculate_llm_cost_cents(\n                model_name=model_name,\n                prompt_tokens=prompt_tokens,\n                completion_tokens=completion_tokens,\n            )\n            if cost_cents > 0:\n                metrics[\"cost_cents\"] = cost_cents\n\n        metadata: Dict[str, Any] = {\n            \"model\": span.span_data.model,\n            \"model_config\": span.span_data.model_config,\n        }\n\n        # Include reasoning in metadata if present\n        if span.span_data.reasoning:\n            metadata[\"reasoning\"] = span.span_data.reasoning\n\n        return {\n            \"input\": span.span_data.input,\n            \"output\": span.span_data.output,\n            \"metadata\": metadata,\n            \"metrics\": metrics,\n        }\n\n    def _log_data(self, span: Span[Any]) -> Dict[str, Any]:\n        if isinstance(span.span_data, AgentSpanData):\n            return self._agent_log_data(span)\n        elif isinstance(span.span_data, FunctionSpanData):\n            return self._function_log_data(span)\n        elif isinstance(span.span_data, GenerationSpanData):\n            return self._generation_log_data(span)\n        else:\n            return {}\n\n    def on_span_start(self, span: Span[SpanData]) -> None:\n        parent: Any = (\n            self._spans[span.parent_id]\n            if span.parent_id is not None\n            else self._spans[span.trace_id]\n        )\n        trace_metadata = self._trace_metadata.get(span.trace_id)\n        if isinstance(span.span_data, GenerationSpanData):\n            span_name = _generation_span_name(span)\n        else:\n            span_name = _span_name(span)\n        span_kwargs: Dict[str, Any] = dict(\n            id=span.span_id,\n            name=span_name,\n            type=_span_type(span),\n            start_time=_timestamp_from_maybe_iso(span.started_at),\n        )\n        if trace_metadata:\n            span_kwargs[\"metadata\"] = trace_metadata\n        created_span: Any = parent.start_span(**span_kwargs)\n        self._spans[span.span_id] = created_span\n        self._span_names[span.span_id] = span_name\n\n        # Set the span as current so current_span() calls will return it\n        created_span.set_current()\n\n    def on_span_end(self, span: Span[SpanData]) -> None:\n        s: Any = self._spans.pop(span.span_id)\n        self._span_names.pop(span.span_id, None)\n        event = dict(error=span.error, **self._log_data(span))\n        s.log(**event)\n        s.unset_current()\n        s.end(_timestamp_from_maybe_iso(span.ended_at))\n\n        input_ = event.get(\"input\")\n        output = event.get(\"output\")\n        # Store first input and last output per trace_id\n        trace_id = span.trace_id\n        if trace_id not in self._first_input and input_ is not None:\n            self._first_input[trace_id] = input_\n\n        if output is not None:\n            self._last_output[trace_id] = output\n\n    def shutdown(self) -> None:\n        if self._logger is not None:\n            self._logger.flush()\n        else:\n            braintrust.flush()\n\n    def force_flush(self) -> None:\n        if self._logger is not None:\n            self._logger.flush()\n        else:\n            braintrust.flush()\n\n\ndef _generation_span_name(span: Span[SpanData]) -> str:\n    data = span.span_data\n    if isinstance(data, GenerationSpanData):\n        model_config = data.model_config\n        if isinstance(model_config, dict):\n            flow = model_config.get(\"flow\")\n            if isinstance(flow, str) and flow.strip():\n                return flow\n    return _span_name(span)\n"
  },
  {
    "path": "backend/onyx/tracing/framework/__init__.py",
    "content": "from .processor_interface import TracingProcessor\nfrom .provider import DefaultTraceProvider\nfrom .setup import get_trace_provider\nfrom .setup import set_trace_provider\n\n\ndef add_trace_processor(span_processor: TracingProcessor) -> None:\n    \"\"\"\n    Adds a new trace processor. This processor will receive all traces/spans.\n    \"\"\"\n    get_trace_provider().register_processor(span_processor)\n\n\ndef set_trace_processors(processors: list[TracingProcessor]) -> None:\n    \"\"\"\n    Set the list of trace processors. This will replace the current list of processors.\n    \"\"\"\n    get_trace_provider().set_processors(processors)\n\n\nset_trace_provider(DefaultTraceProvider())\n"
  },
  {
    "path": "backend/onyx/tracing/framework/_error_tracing.py",
    "content": "from typing import Any\n\nfrom .create import get_current_span\nfrom .spans import Span\nfrom .spans import SpanError\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger(__name__)\n\n\ndef attach_error_to_span(span: Span[Any], error: SpanError) -> None:\n    span.set_error(error)\n\n\ndef attach_error_to_current_span(error: SpanError) -> None:\n    span = get_current_span()\n    if span:\n        attach_error_to_span(span, error)\n    else:\n        logger.warning(f\"No span to add error {error} to\")\n"
  },
  {
    "path": "backend/onyx/tracing/framework/create.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Iterator\nfrom collections.abc import Mapping\nfrom collections.abc import Sequence\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom .setup import get_trace_provider\nfrom .span_data import AgentSpanData\nfrom .span_data import FunctionSpanData\nfrom .span_data import GenerationSpanData\nfrom .spans import Span\nfrom .traces import Trace\nfrom onyx.utils.logger import setup_logger\n\nif TYPE_CHECKING:\n    pass\n\nlogger = setup_logger(__name__)\n\n\ndef trace(\n    workflow_name: str,\n    trace_id: str | None = None,\n    group_id: str | None = None,\n    metadata: dict[str, Any] | None = None,\n    disabled: bool = False,\n) -> Trace:\n    \"\"\"\n    Create a new trace. The trace will not be started automatically; you should either use\n    it as a context manager (`with trace(...):`) or call `trace.start()` + `trace.finish()`\n    manually.\n\n    In addition to the workflow name and optional grouping identifier, you can provide\n    an arbitrary metadata dictionary to attach additional user-defined information to\n    the trace.\n\n    Args:\n        workflow_name: The name of the logical app or workflow. For example, you might provide\n            \"code_bot\" for a coding agent, or \"customer_support_agent\" for a customer support agent.\n        trace_id: The ID of the trace. Optional. If not provided, we will generate an ID. We\n            recommend using `util.gen_trace_id()` to generate a trace ID, to guarantee that IDs are\n            correctly formatted.\n        group_id: Optional grouping identifier to link multiple traces from the same conversation\n            or process. For instance, you might use a chat thread ID.\n        metadata: Optional dictionary of additional metadata to attach to the trace.\n        disabled: If True, we will return a Trace but the Trace will not be recorded.\n\n    Returns:\n        The newly created trace object.\n    \"\"\"\n    current_trace = get_trace_provider().get_current_trace()\n    if current_trace:\n        logger.warning(\n            \"Trace already exists. Creating a new trace, but this is probably a mistake.\"\n        )\n\n    return get_trace_provider().create_trace(\n        name=workflow_name,\n        trace_id=trace_id,\n        group_id=group_id,\n        metadata=metadata,\n        disabled=disabled,\n    )\n\n\n@contextmanager\ndef ensure_trace(\n    workflow_name: str,\n    trace_id: str | None = None,\n    group_id: str | None = None,\n    metadata: dict[str, Any] | None = None,\n    disabled: bool = False,\n) -> Iterator[Trace | None]:\n    \"\"\"\n    Ensure a trace exists. If a trace is already active, reuse it.\n    Otherwise, create a new trace for the duration of the context.\n    \"\"\"\n    current_trace = get_trace_provider().get_current_trace()\n    if current_trace:\n        yield current_trace\n        return\n\n    with trace(\n        workflow_name=workflow_name,\n        trace_id=trace_id,\n        group_id=group_id,\n        metadata=metadata,\n        disabled=disabled,\n    ) as created_trace:\n        yield created_trace\n\n\ndef get_current_trace() -> Trace | None:\n    \"\"\"Returns the currently active trace, if present.\"\"\"\n    return get_trace_provider().get_current_trace()\n\n\ndef get_current_span() -> Span[Any] | None:\n    \"\"\"Returns the currently active span, if present.\"\"\"\n    return get_trace_provider().get_current_span()\n\n\ndef agent_span(\n    name: str,\n    handoffs: list[str] | None = None,\n    tools: list[str] | None = None,\n    output_type: str | None = None,\n    span_id: str | None = None,\n    parent: Trace | Span[Any] | None = None,\n    disabled: bool = False,\n) -> Span[AgentSpanData]:\n    \"\"\"Create a new agent span. The span will not be started automatically, you should either do\n    `with agent_span() ...` or call `span.start()` + `span.finish()` manually.\n\n    Args:\n        name: The name of the agent.\n        handoffs: Optional list of agent names to which this agent could hand off control.\n        tools: Optional list of tool names available to this agent.\n        output_type: Optional name of the output type produced by the agent.\n        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We\n            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are\n            correctly formatted.\n        parent: The parent span or trace. If not provided, we will automatically use the current\n            trace/span as the parent.\n        disabled: If True, we will return a Span but the Span will not be recorded.\n\n    Returns:\n        The newly created agent span.\n    \"\"\"\n    return get_trace_provider().create_span(\n        span_data=AgentSpanData(\n            name=name, handoffs=handoffs, tools=tools, output_type=output_type\n        ),\n        span_id=span_id,\n        parent=parent,\n        disabled=disabled,\n    )\n\n\ndef function_span(\n    name: str,\n    input: str | None = None,\n    output: str | None = None,\n    span_id: str | None = None,\n    parent: Trace | Span[Any] | None = None,\n    disabled: bool = False,\n) -> Span[FunctionSpanData]:\n    \"\"\"Create a new function span. The span will not be started automatically, you should either do\n    `with function_span() ...` or call `span.start()` + `span.finish()` manually.\n\n    Args:\n        name: The name of the function.\n        input: The input to the function.\n        output: The output of the function.\n        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We\n            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are\n            correctly formatted.\n        parent: The parent span or trace. If not provided, we will automatically use the current\n            trace/span as the parent.\n        disabled: If True, we will return a Span but the Span will not be recorded.\n\n    Returns:\n        The newly created function span.\n    \"\"\"\n    return get_trace_provider().create_span(\n        span_data=FunctionSpanData(name=name, input=input, output=output),\n        span_id=span_id,\n        parent=parent,\n        disabled=disabled,\n    )\n\n\ndef generation_span(\n    input: Sequence[Mapping[str, Any]] | None = None,\n    output: Sequence[Mapping[str, Any]] | None = None,\n    reasoning: str | None = None,\n    model: str | None = None,\n    model_config: Mapping[str, Any] | None = None,\n    usage: dict[str, Any] | None = None,\n    time_to_first_action_seconds: float | None = None,\n    span_id: str | None = None,\n    parent: Trace | Span[Any] | None = None,\n    disabled: bool = False,\n) -> Span[GenerationSpanData]:\n    \"\"\"Create a new generation span. The span will not be started automatically, you should either\n    do `with generation_span() ...` or call `span.start()` + `span.finish()` manually.\n\n    This span captures the details of a model generation, including the\n    input message sequence, any generated outputs, the model name and\n    configuration, and usage data. If you only need to capture a model\n    response identifier, use `response_span()` instead.\n\n    Args:\n        input: The sequence of input messages sent to the model.\n        output: The sequence of output messages received from the model.\n        reasoning: The reasoning/thinking content from reasoning models (e.g., Claude extended thinking).\n        model: The model identifier used for the generation.\n        model_config: The model configuration (hyperparameters) used.\n        usage: A dictionary of usage information (input tokens, output tokens, etc.).\n        time_to_first_action_seconds: Time elapsed before the first model action is observed.\n        span_id: The ID of the span. Optional. If not provided, we will generate an ID. We\n            recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are\n            correctly formatted.\n        parent: The parent span or trace. If not provided, we will automatically use the current\n            trace/span as the parent.\n        disabled: If True, we will return a Span but the Span will not be recorded.\n\n    Returns:\n        The newly created generation span.\n    \"\"\"\n    return get_trace_provider().create_span(\n        span_data=GenerationSpanData(\n            input=input,\n            output=output,\n            reasoning=reasoning,\n            model=model,\n            model_config=model_config,\n            usage=usage,\n            time_to_first_action_seconds=time_to_first_action_seconds,\n        ),\n        span_id=span_id,\n        parent=parent,\n        disabled=disabled,\n    )\n"
  },
  {
    "path": "backend/onyx/tracing/framework/processor_interface.py",
    "content": "import abc\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from .spans import Span\n    from .traces import Trace\n\n\nclass TracingProcessor(abc.ABC):\n    \"\"\"Interface for processing and monitoring traces and spans in the OpenAI Agents system.\n\n    This abstract class defines the interface that all tracing processors must implement.\n    Processors receive notifications when traces and spans start and end, allowing them\n    to collect, process, and export tracing data.\n\n    Example:\n        ```python\n        class CustomProcessor(TracingProcessor):\n            def __init__(self):\n                self.active_traces = {}\n                self.active_spans = {}\n\n            def on_trace_start(self, trace):\n                self.active_traces[trace.trace_id] = trace\n\n            def on_trace_end(self, trace):\n                # Process completed trace\n                del self.active_traces[trace.trace_id]\n\n            def on_span_start(self, span):\n                self.active_spans[span.span_id] = span\n\n            def on_span_end(self, span):\n                # Process completed span\n                del self.active_spans[span.span_id]\n\n            def shutdown(self):\n                # Clean up resources\n                self.active_traces.clear()\n                self.active_spans.clear()\n\n            def force_flush(self):\n                # Force processing of any queued items\n                pass\n        ```\n\n    Notes:\n        - All methods should be thread-safe\n        - Methods should not block for long periods\n        - Handle errors gracefully to prevent disrupting agent execution\n    \"\"\"\n\n    @abc.abstractmethod\n    def on_trace_start(self, trace: \"Trace\") -> None:\n        \"\"\"Called when a new trace begins execution.\n\n        Args:\n            trace: The trace that started. Contains workflow name and metadata.\n\n        Notes:\n            - Called synchronously on trace start\n            - Should return quickly to avoid blocking execution\n            - Any errors should be caught and handled internally\n        \"\"\"\n\n    @abc.abstractmethod\n    def on_trace_end(self, trace: \"Trace\") -> None:\n        \"\"\"Called when a trace completes execution.\n\n        Args:\n            trace: The completed trace containing all spans and results.\n\n        Notes:\n            - Called synchronously when trace finishes\n            - Good time to export/process the complete trace\n            - Should handle cleanup of any trace-specific resources\n        \"\"\"\n\n    @abc.abstractmethod\n    def on_span_start(self, span: \"Span[Any]\") -> None:\n        \"\"\"Called when a new span begins execution.\n\n        Args:\n            span: The span that started. Contains operation details and context.\n\n        Notes:\n            - Called synchronously on span start\n            - Should return quickly to avoid blocking execution\n            - Spans are automatically nested under current trace/span\n        \"\"\"\n\n    @abc.abstractmethod\n    def on_span_end(self, span: \"Span[Any]\") -> None:\n        \"\"\"Called when a span completes execution.\n\n        Args:\n            span: The completed span containing execution results.\n\n        Notes:\n            - Called synchronously when span finishes\n            - Should not block or raise exceptions\n            - Good time to export/process the individual span\n        \"\"\"\n\n    @abc.abstractmethod\n    def shutdown(self) -> None:\n        \"\"\"Called when the application stops to clean up resources.\n\n        Should perform any necessary cleanup like:\n        - Flushing queued traces/spans\n        - Closing connections\n        - Releasing resources\n        \"\"\"\n\n    @abc.abstractmethod\n    def force_flush(self) -> None:\n        \"\"\"Forces immediate processing of any queued traces/spans.\n\n        Notes:\n            - Should process all queued items before returning\n            - Useful before shutdown or when immediate processing is needed\n            - May block while processing completes\n        \"\"\"\n\n\nclass TracingExporter(abc.ABC):\n    \"\"\"Exports traces and spans. For example, could log them or send them to a backend.\"\"\"\n\n    @abc.abstractmethod\n    def export(self, items: list[\"Trace | Span[Any]\"]) -> None:\n        \"\"\"Exports a list of traces and spans.\n\n        Args:\n            items: The items to export.\n        \"\"\"\n"
  },
  {
    "path": "backend/onyx/tracing/framework/provider.py",
    "content": "from __future__ import annotations\n\nimport threading\nimport uuid\nfrom abc import ABC\nfrom abc import abstractmethod\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nfrom .processor_interface import TracingProcessor\nfrom .scope import Scope\nfrom .spans import NoOpSpan\nfrom .spans import Span\nfrom .spans import SpanImpl\nfrom .spans import TSpanData\nfrom .traces import NoOpTrace\nfrom .traces import Trace\nfrom .traces import TraceImpl\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger(__name__)\n\n\nclass SynchronousMultiTracingProcessor(TracingProcessor):\n    \"\"\"\n    Forwards all calls to a list of TracingProcessors, in order of registration.\n    \"\"\"\n\n    def __init__(self) -> None:\n        # Using a tuple to avoid race conditions when iterating over processors\n        self._processors: tuple[TracingProcessor, ...] = ()\n        self._lock = threading.Lock()\n\n    def add_tracing_processor(self, tracing_processor: TracingProcessor) -> None:\n        \"\"\"\n        Add a processor to the list of processors. Each processor will receive all traces/spans.\n        \"\"\"\n        with self._lock:\n            self._processors += (tracing_processor,)\n\n    def set_processors(self, processors: list[TracingProcessor]) -> None:\n        \"\"\"\n        Set the list of processors. This will replace the current list of processors.\n        \"\"\"\n        with self._lock:\n            self._processors = tuple(processors)\n\n    def on_trace_start(self, trace: Trace) -> None:\n        \"\"\"\n        Called when a trace is started.\n        \"\"\"\n        for processor in self._processors:\n            try:\n                processor.on_trace_start(trace)\n            except Exception as e:\n                logger.error(\n                    f\"Error in trace processor {processor} during on_trace_start: {e}\"\n                )\n\n    def on_trace_end(self, trace: Trace) -> None:\n        \"\"\"\n        Called when a trace is finished.\n        \"\"\"\n        for processor in self._processors:\n            try:\n                processor.on_trace_end(trace)\n            except Exception as e:\n                logger.error(\n                    f\"Error in trace processor {processor} during on_trace_end: {e}\"\n                )\n\n    def on_span_start(self, span: Span[Any]) -> None:\n        \"\"\"\n        Called when a span is started.\n        \"\"\"\n        for processor in self._processors:\n            try:\n                processor.on_span_start(span)\n            except Exception as e:\n                logger.error(\n                    f\"Error in trace processor {processor} during on_span_start: {e}\"\n                )\n\n    def on_span_end(self, span: Span[Any]) -> None:\n        \"\"\"\n        Called when a span is finished.\n        \"\"\"\n        for processor in self._processors:\n            try:\n                processor.on_span_end(span)\n            except Exception as e:\n                logger.error(\n                    f\"Error in trace processor {processor} during on_span_end: {e}\"\n                )\n\n    def shutdown(self) -> None:\n        \"\"\"\n        Called when the application stops.\n        \"\"\"\n        for processor in self._processors:\n            logger.debug(f\"Shutting down trace processor {processor}\")\n            try:\n                processor.shutdown()\n            except Exception as e:\n                logger.error(f\"Error shutting down trace processor {processor}: {e}\")\n\n    def force_flush(self) -> None:\n        \"\"\"\n        Force the processors to flush their buffers.\n        \"\"\"\n        for processor in self._processors:\n            try:\n                processor.force_flush()\n            except Exception as e:\n                logger.error(f\"Error flushing trace processor {processor}: {e}\")\n\n\nclass TraceProvider(ABC):\n    \"\"\"Interface for creating traces and spans.\"\"\"\n\n    @abstractmethod\n    def register_processor(self, processor: TracingProcessor) -> None:\n        \"\"\"Add a processor that will receive all traces and spans.\"\"\"\n\n    @abstractmethod\n    def set_processors(self, processors: list[TracingProcessor]) -> None:\n        \"\"\"Replace the list of processors with ``processors``.\"\"\"\n\n    @abstractmethod\n    def get_current_trace(self) -> Trace | None:\n        \"\"\"Return the currently active trace, if any.\"\"\"\n\n    @abstractmethod\n    def get_current_span(self) -> Span[Any] | None:\n        \"\"\"Return the currently active span, if any.\"\"\"\n\n    @abstractmethod\n    def time_iso(self) -> str:\n        \"\"\"Return the current time in ISO 8601 format.\"\"\"\n\n    @abstractmethod\n    def gen_trace_id(self) -> str:\n        \"\"\"Generate a new trace identifier.\"\"\"\n\n    @abstractmethod\n    def gen_span_id(self) -> str:\n        \"\"\"Generate a new span identifier.\"\"\"\n\n    @abstractmethod\n    def gen_group_id(self) -> str:\n        \"\"\"Generate a new group identifier.\"\"\"\n\n    @abstractmethod\n    def create_trace(\n        self,\n        name: str,\n        trace_id: str | None = None,\n        group_id: str | None = None,\n        metadata: dict[str, Any] | None = None,\n        disabled: bool = False,\n    ) -> Trace:\n        \"\"\"Create a new trace.\"\"\"\n\n    @abstractmethod\n    def create_span(\n        self,\n        span_data: TSpanData,\n        span_id: str | None = None,\n        parent: Trace | Span[Any] | None = None,\n        disabled: bool = False,\n    ) -> Span[TSpanData]:\n        \"\"\"Create a new span.\"\"\"\n\n    @abstractmethod\n    def shutdown(self) -> None:\n        \"\"\"Clean up any resources used by the provider.\"\"\"\n\n\nclass DefaultTraceProvider(TraceProvider):\n    def __init__(self) -> None:\n        self._multi_processor = SynchronousMultiTracingProcessor()\n\n    def register_processor(self, processor: TracingProcessor) -> None:\n        \"\"\"\n        Add a processor to the list of processors. Each processor will receive all traces/spans.\n        \"\"\"\n        self._multi_processor.add_tracing_processor(processor)\n\n    def set_processors(self, processors: list[TracingProcessor]) -> None:\n        \"\"\"\n        Set the list of processors. This will replace the current list of processors.\n        \"\"\"\n        self._multi_processor.set_processors(processors)\n\n    def get_current_trace(self) -> Trace | None:\n        \"\"\"\n        Returns the currently active trace, if any.\n        \"\"\"\n        return Scope.get_current_trace()\n\n    def get_current_span(self) -> Span[Any] | None:\n        \"\"\"\n        Returns the currently active span, if any.\n        \"\"\"\n        return Scope.get_current_span()\n\n    def time_iso(self) -> str:\n        \"\"\"Return the current time in ISO 8601 format.\"\"\"\n        return datetime.now(timezone.utc).isoformat()\n\n    def gen_trace_id(self) -> str:\n        \"\"\"Generate a new trace ID.\"\"\"\n        return f\"trace_{uuid.uuid4().hex}\"\n\n    def gen_span_id(self) -> str:\n        \"\"\"Generate a new span ID.\"\"\"\n        return f\"span_{uuid.uuid4().hex[:24]}\"\n\n    def gen_group_id(self) -> str:\n        \"\"\"Generate a new group ID.\"\"\"\n        return f\"group_{uuid.uuid4().hex[:24]}\"\n\n    def create_trace(\n        self,\n        name: str,\n        trace_id: str | None = None,\n        group_id: str | None = None,\n        metadata: dict[str, Any] | None = None,\n        disabled: bool = False,\n    ) -> Trace:\n        \"\"\"\n        Create a new trace.\n        \"\"\"\n        if disabled:\n            logger.debug(f\"Tracing is disabled. Not creating trace {name}\")\n            return NoOpTrace()\n\n        trace_id = trace_id or self.gen_trace_id()\n\n        logger.debug(f\"Creating trace {name} with id {trace_id}\")\n\n        return TraceImpl(\n            name=name,\n            trace_id=trace_id,\n            group_id=group_id,\n            metadata=metadata,\n            processor=self._multi_processor,\n        )\n\n    def create_span(\n        self,\n        span_data: TSpanData,\n        span_id: str | None = None,\n        parent: Trace | Span[Any] | None = None,\n        disabled: bool = False,\n    ) -> Span[TSpanData]:\n        \"\"\"\n        Create a new span.\n        \"\"\"\n        if disabled:\n            logger.debug(f\"Tracing is disabled. Not creating span {span_data}\")\n            return NoOpSpan(span_data)\n\n        trace_id: str\n        parent_id: str | None\n\n        if not parent:\n            current_span = Scope.get_current_span()\n            current_trace = Scope.get_current_trace()\n            if current_trace is None:\n                logger.error(\n                    \"No active trace. Make sure to start a trace with `trace()` first Returning NoOpSpan.\"\n                )\n                return NoOpSpan(span_data)\n            elif isinstance(current_trace, NoOpTrace) or isinstance(\n                current_span, NoOpSpan\n            ):\n                logger.debug(\n                    f\"Parent {current_span} or {current_trace} is no-op, returning NoOpSpan\"\n                )\n                return NoOpSpan(span_data)\n\n            parent_id = current_span.span_id if current_span else None\n            trace_id = current_trace.trace_id\n\n        elif isinstance(parent, Trace):\n            if isinstance(parent, NoOpTrace):\n                logger.debug(f\"Parent {parent} is no-op, returning NoOpSpan\")\n                return NoOpSpan(span_data)\n            trace_id = parent.trace_id\n            parent_id = None\n        elif isinstance(parent, Span):\n            if isinstance(parent, NoOpSpan):\n                logger.debug(f\"Parent {parent} is no-op, returning NoOpSpan\")\n                return NoOpSpan(span_data)\n            parent_id = parent.span_id\n            trace_id = parent.trace_id\n        else:\n            # This should never happen, but mypy needs it\n            raise ValueError(f\"Invalid parent type: {type(parent)}\")\n\n        return SpanImpl(\n            trace_id=trace_id,\n            span_id=span_id or self.gen_span_id(),\n            parent_id=parent_id,\n            processor=self._multi_processor,\n            span_data=span_data,\n        )\n\n    def shutdown(self) -> None:\n        try:\n            logger.debug(\"Shutting down trace provider\")\n            self._multi_processor.shutdown()\n        except Exception as e:\n            logger.error(f\"Error shutting down trace provider: {e}\")\n"
  },
  {
    "path": "backend/onyx/tracing/framework/scope.py",
    "content": "import contextvars\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from .spans import Span\n    from .traces import Trace\n\n_current_span: contextvars.ContextVar[\"Span[Any] | None\"] = contextvars.ContextVar(\n    \"current_span\", default=None\n)\n\n_current_trace: contextvars.ContextVar[\"Trace | None\"] = contextvars.ContextVar(\n    \"current_trace\", default=None\n)\n\n\nclass Scope:\n    \"\"\"\n    Manages the current span and trace in the context.\n    \"\"\"\n\n    @classmethod\n    def get_current_span(cls) -> \"Span[Any] | None\":\n        return _current_span.get()\n\n    @classmethod\n    def set_current_span(\n        cls, span: \"Span[Any] | None\"\n    ) -> \"contextvars.Token[Span[Any] | None]\":\n        return _current_span.set(span)\n\n    @classmethod\n    def reset_current_span(cls, token: \"contextvars.Token[Span[Any] | None]\") -> None:\n        _current_span.reset(token)\n\n    @classmethod\n    def get_current_trace(cls) -> \"Trace | None\":\n        return _current_trace.get()\n\n    @classmethod\n    def set_current_trace(\n        cls, trace: \"Trace | None\"\n    ) -> \"contextvars.Token[Trace | None]\":\n        return _current_trace.set(trace)\n\n    @classmethod\n    def reset_current_trace(cls, token: \"contextvars.Token[Trace | None]\") -> None:\n        _current_trace.reset(token)\n"
  },
  {
    "path": "backend/onyx/tracing/framework/setup.py",
    "content": "from __future__ import annotations\n\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from .provider import TraceProvider\n\nGLOBAL_TRACE_PROVIDER: TraceProvider | None = None\n\n\ndef set_trace_provider(provider: TraceProvider) -> None:\n    \"\"\"Set the global trace provider used by tracing utilities.\"\"\"\n    global GLOBAL_TRACE_PROVIDER\n    GLOBAL_TRACE_PROVIDER = provider\n\n\ndef get_trace_provider() -> TraceProvider:\n    \"\"\"Get the global trace provider used by tracing utilities.\"\"\"\n    if GLOBAL_TRACE_PROVIDER is None:\n        raise RuntimeError(\"Trace provider not set\")\n    return GLOBAL_TRACE_PROVIDER\n"
  },
  {
    "path": "backend/onyx/tracing/framework/span_data.py",
    "content": "import abc\nfrom collections.abc import Mapping\nfrom collections.abc import Sequence\nfrom typing import Any\n\n\nclass SpanData(abc.ABC):\n    \"\"\"\n    Represents span data in the trace.\n    \"\"\"\n\n    @abc.abstractmethod\n    def export(self) -> dict[str, Any]:\n        \"\"\"Export the span data as a dictionary.\"\"\"\n\n    @property\n    @abc.abstractmethod\n    def type(self) -> str:\n        \"\"\"Return the type of the span.\"\"\"\n\n\nclass AgentSpanData(SpanData):\n    \"\"\"\n    Represents an Agent Span in the trace.\n    Includes name, handoffs, tools, and output type.\n    \"\"\"\n\n    __slots__ = (\"name\", \"handoffs\", \"tools\", \"output_type\")\n\n    def __init__(\n        self,\n        name: str,\n        handoffs: list[str] | None = None,\n        tools: list[str] | None = None,\n        output_type: str | None = None,\n    ):\n        self.name = name\n        self.handoffs: list[str] | None = handoffs\n        self.tools: list[str] | None = tools\n        self.output_type: str | None = output_type\n\n    @property\n    def type(self) -> str:\n        return \"agent\"\n\n    def export(self) -> dict[str, Any]:\n        return {\n            \"type\": self.type,\n            \"name\": self.name,\n            \"handoffs\": self.handoffs,\n            \"tools\": self.tools,\n            \"output_type\": self.output_type,\n        }\n\n\nclass FunctionSpanData(SpanData):\n    \"\"\"\n    Represents a Function Span in the trace.\n    Includes input, output and MCP data (if applicable).\n    \"\"\"\n\n    __slots__ = (\"name\", \"input\", \"output\", \"mcp_data\")\n\n    def __init__(\n        self,\n        name: str,\n        input: str | None,\n        output: Any | None,\n        mcp_data: dict[str, Any] | None = None,\n    ):\n        self.name = name\n        self.input = input\n        self.output = output\n        self.mcp_data = mcp_data\n\n    @property\n    def type(self) -> str:\n        return \"function\"\n\n    def export(self) -> dict[str, Any]:\n        return {\n            \"type\": self.type,\n            \"name\": self.name,\n            \"input\": self.input,\n            \"output\": str(self.output) if self.output else None,\n            \"mcp_data\": self.mcp_data,\n        }\n\n\nclass GenerationSpanData(SpanData):\n    \"\"\"\n    Represents a Generation Span in the trace.\n    Includes input, output, model, model configuration, and usage.\n    \"\"\"\n\n    __slots__ = (\n        \"input\",\n        \"output\",\n        \"reasoning\",\n        \"model\",\n        \"model_config\",\n        \"usage\",\n        \"time_to_first_action_seconds\",\n    )\n\n    def __init__(\n        self,\n        input: Sequence[Mapping[str, Any]] | None = None,\n        output: Sequence[Mapping[str, Any]] | None = None,\n        reasoning: str | None = None,\n        model: str | None = None,\n        model_config: Mapping[str, Any] | None = None,\n        usage: dict[str, Any] | None = None,\n        time_to_first_action_seconds: float | None = None,\n    ):\n        self.input = input\n        self.output = output\n        self.reasoning = reasoning\n        self.model = model\n        self.model_config = model_config\n        self.usage = usage\n        self.time_to_first_action_seconds = time_to_first_action_seconds\n\n    @property\n    def type(self) -> str:\n        return \"generation\"\n\n    def export(self) -> dict[str, Any]:\n        return {\n            \"type\": self.type,\n            \"input\": self.input,\n            \"output\": self.output,\n            \"reasoning\": self.reasoning,\n            \"model\": self.model,\n            \"model_config\": self.model_config,\n            \"usage\": self.usage,\n            \"time_to_first_action_seconds\": self.time_to_first_action_seconds,\n        }\n"
  },
  {
    "path": "backend/onyx/tracing/framework/spans.py",
    "content": "from __future__ import annotations\n\nimport abc\nimport contextvars\nfrom types import TracebackType\nfrom typing import Any\nfrom typing import Generic\nfrom typing import TypeVar\n\nfrom typing_extensions import TypedDict\n\nfrom . import util\nfrom .processor_interface import TracingProcessor\nfrom .scope import Scope\nfrom .span_data import SpanData\n\nTSpanData = TypeVar(\"TSpanData\", bound=SpanData)\n\n\nclass SpanError(TypedDict):\n    \"\"\"Represents an error that occurred during span execution.\n\n    Attributes:\n        message: A human-readable error description\n        data: Optional dictionary containing additional error context\n    \"\"\"\n\n    message: str\n    data: dict[str, Any] | None\n\n\nclass Span(abc.ABC, Generic[TSpanData]):\n    \"\"\"Base class for representing traceable operations with timing and context.\n\n    A span represents a single operation within a trace (e.g., an LLM call, tool execution,\n    or agent run). Spans track timing, relationships between operations, and operation-specific\n    data.\n\n    Type Args:\n        TSpanData: The type of span-specific data this span contains.\n\n    Example:\n        ```python\n        # Creating a custom span\n        with custom_span(\"database_query\", {\n            \"operation\": \"SELECT\",\n            \"table\": \"users\"\n        }) as span:\n            results = await db.query(\"SELECT * FROM users\")\n            span.set_output({\"count\": len(results)})\n\n        # Handling errors in spans\n        with custom_span(\"risky_operation\") as span:\n            try:\n                result = perform_risky_operation()\n            except Exception as e:\n                span.set_error({\n                    \"message\": str(e),\n                    \"data\": {\"operation\": \"risky_operation\"}\n                })\n                raise\n        ```\n\n        Notes:\n        - Spans automatically nest under the current trace\n        - Use context managers for reliable start/finish\n        - Include relevant data but avoid sensitive information\n        - Handle errors properly using set_error()\n    \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def trace_id(self) -> str:\n        \"\"\"The ID of the trace this span belongs to.\n\n        Returns:\n            str: Unique identifier of the parent trace.\n        \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def span_id(self) -> str:\n        \"\"\"Unique identifier for this span.\n\n        Returns:\n            str: The span's unique ID within its trace.\n        \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def span_data(self) -> TSpanData:\n        \"\"\"Operation-specific data for this span.\n\n        Returns:\n            TSpanData: Data specific to this type of span (e.g., LLM generation data).\n        \"\"\"\n\n    @abc.abstractmethod\n    def start(self, mark_as_current: bool = False) -> None:\n        \"\"\"\n        Start the span.\n\n        Args:\n            mark_as_current: If true, the span will be marked as the current span.\n        \"\"\"\n\n    @abc.abstractmethod\n    def finish(self, reset_current: bool = False) -> None:\n        \"\"\"\n        Finish the span.\n\n        Args:\n            reset_current: If true, the span will be reset as the current span.\n        \"\"\"\n\n    @abc.abstractmethod\n    def __enter__(self) -> Span[TSpanData]:\n        pass\n\n    @abc.abstractmethod\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        pass\n\n    @property\n    @abc.abstractmethod\n    def parent_id(self) -> str | None:\n        \"\"\"ID of the parent span, if any.\n\n        Returns:\n            str | None: The parent span's ID, or None if this is a root span.\n        \"\"\"\n\n    @abc.abstractmethod\n    def set_error(self, error: SpanError) -> None:\n        pass\n\n    @property\n    @abc.abstractmethod\n    def error(self) -> SpanError | None:\n        \"\"\"Any error that occurred during span execution.\n\n        Returns:\n            SpanError | None: Error details if an error occurred, None otherwise.\n        \"\"\"\n\n    @abc.abstractmethod\n    def export(self) -> dict[str, Any] | None:\n        pass\n\n    @property\n    @abc.abstractmethod\n    def started_at(self) -> str | None:\n        \"\"\"When the span started execution.\n\n        Returns:\n            str | None: ISO format timestamp of span start, None if not started.\n        \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def ended_at(self) -> str | None:\n        \"\"\"When the span finished execution.\n\n        Returns:\n            str | None: ISO format timestamp of span end, None if not finished.\n        \"\"\"\n\n\nclass NoOpSpan(Span[TSpanData]):\n    \"\"\"A no-op implementation of Span that doesn't record any data.\n\n    Used when tracing is disabled but span operations still need to work.\n\n    Args:\n        span_data: The operation-specific data for this span.\n    \"\"\"\n\n    __slots__ = (\"_span_data\", \"_prev_span_token\")\n\n    def __init__(self, span_data: TSpanData):\n        self._span_data = span_data\n        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None\n\n    @property\n    def trace_id(self) -> str:\n        return \"no-op\"\n\n    @property\n    def span_id(self) -> str:\n        return \"no-op\"\n\n    @property\n    def span_data(self) -> TSpanData:\n        return self._span_data\n\n    @property\n    def parent_id(self) -> str | None:\n        return None\n\n    def start(self, mark_as_current: bool = False) -> None:\n        if mark_as_current:\n            self._prev_span_token = Scope.set_current_span(self)\n\n    def finish(self, reset_current: bool = False) -> None:\n        if reset_current and self._prev_span_token is not None:\n            Scope.reset_current_span(self._prev_span_token)\n            self._prev_span_token = None\n\n    def __enter__(self) -> Span[TSpanData]:\n        self.start(mark_as_current=True)\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        reset_current = True\n        if exc_type is GeneratorExit:\n            reset_current = False\n\n        self.finish(reset_current=reset_current)\n\n    def set_error(self, error: SpanError) -> None:\n        pass\n\n    @property\n    def error(self) -> SpanError | None:\n        return None\n\n    def export(self) -> dict[str, Any] | None:\n        return None\n\n    @property\n    def started_at(self) -> str | None:\n        return None\n\n    @property\n    def ended_at(self) -> str | None:\n        return None\n\n\nclass SpanImpl(Span[TSpanData]):\n    __slots__ = (\n        \"_trace_id\",\n        \"_span_id\",\n        \"_parent_id\",\n        \"_started_at\",\n        \"_ended_at\",\n        \"_error\",\n        \"_prev_span_token\",\n        \"_processor\",\n        \"_span_data\",\n    )\n\n    def __init__(\n        self,\n        trace_id: str,\n        span_id: str | None,\n        parent_id: str | None,\n        processor: TracingProcessor,\n        span_data: TSpanData,\n    ):\n        self._trace_id = trace_id\n        self._span_id = span_id or util.gen_span_id()\n        self._parent_id = parent_id\n        self._started_at: str | None = None\n        self._ended_at: str | None = None\n        self._processor = processor\n        self._error: SpanError | None = None\n        self._prev_span_token: contextvars.Token[Span[TSpanData] | None] | None = None\n        self._span_data = span_data\n\n    @property\n    def trace_id(self) -> str:\n        return self._trace_id\n\n    @property\n    def span_id(self) -> str:\n        return self._span_id\n\n    @property\n    def span_data(self) -> TSpanData:\n        return self._span_data\n\n    @property\n    def parent_id(self) -> str | None:\n        return self._parent_id\n\n    def start(self, mark_as_current: bool = False) -> None:\n        if self.started_at is not None:\n            return\n\n        self._started_at = util.time_iso()\n        self._processor.on_span_start(self)\n        if mark_as_current:\n            self._prev_span_token = Scope.set_current_span(self)\n\n    def finish(self, reset_current: bool = False) -> None:\n        if self.ended_at is not None:\n            return\n\n        self._ended_at = util.time_iso()\n        self._processor.on_span_end(self)\n        if reset_current and self._prev_span_token is not None:\n            Scope.reset_current_span(self._prev_span_token)\n            self._prev_span_token = None\n\n    def __enter__(self) -> Span[TSpanData]:\n        self.start(mark_as_current=True)\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        reset_current = True\n        if exc_type is GeneratorExit:\n            reset_current = False\n\n        self.finish(reset_current=reset_current)\n\n    def set_error(self, error: SpanError) -> None:\n        self._error = error\n\n    @property\n    def error(self) -> SpanError | None:\n        return self._error\n\n    @property\n    def started_at(self) -> str | None:\n        return self._started_at\n\n    @property\n    def ended_at(self) -> str | None:\n        return self._ended_at\n\n    def export(self) -> dict[str, Any] | None:\n        return {\n            \"object\": \"trace.span\",\n            \"id\": self.span_id,\n            \"trace_id\": self.trace_id,\n            \"parent_id\": self._parent_id,\n            \"started_at\": self._started_at,\n            \"ended_at\": self._ended_at,\n            \"span_data\": self.span_data.export(),\n            \"error\": self._error,\n        }\n"
  },
  {
    "path": "backend/onyx/tracing/framework/traces.py",
    "content": "from __future__ import annotations\n\nimport abc\nimport contextvars\nfrom types import TracebackType\nfrom typing import Any\nfrom typing import TYPE_CHECKING\n\nfrom . import util\nfrom .scope import Scope\n\nif TYPE_CHECKING:\n    from .processor_interface import TracingProcessor\n\n\nclass Trace(abc.ABC):\n    \"\"\"A complete end-to-end workflow containing related spans and metadata.\n\n    A trace represents a logical workflow or operation (e.g., \"Customer Service Query\"\n    or \"Code Generation\") and contains all the spans (individual operations) that occur\n    during that workflow.\n\n    Example:\n        ```python\n        # Basic trace usage\n        with trace(\"Order Processing\") as t:\n            validation_result = await Runner.run(validator, order_data)\n            if validation_result.approved:\n                await Runner.run(processor, order_data)\n\n        # Trace with metadata and grouping\n        with trace(\n            \"Customer Service\",\n            group_id=\"chat_123\",\n            metadata={\"customer\": \"user_456\"}\n        ) as t:\n            result = await Runner.run(support_agent, query)\n        ```\n\n    Notes:\n        - Use descriptive workflow names\n        - Group related traces with consistent group_ids\n        - Add relevant metadata for filtering/analysis\n        - Use context managers for reliable cleanup\n        - Consider privacy when adding trace data\n    \"\"\"\n\n    @abc.abstractmethod\n    def __enter__(self) -> Trace:\n        pass\n\n    @abc.abstractmethod\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        pass\n\n    @abc.abstractmethod\n    def start(self, mark_as_current: bool = False) -> None:\n        \"\"\"Start the trace and optionally mark it as the current trace.\n\n        Args:\n            mark_as_current: If true, marks this trace as the current trace\n                in the execution context.\n\n        Notes:\n            - Must be called before any spans can be added\n            - Only one trace can be current at a time\n            - Thread-safe when using mark_as_current\n        \"\"\"\n\n    @abc.abstractmethod\n    def finish(self, reset_current: bool = False) -> None:\n        \"\"\"Finish the trace and optionally reset the current trace.\n\n        Args:\n            reset_current: If true, resets the current trace to the previous\n                trace in the execution context.\n\n        Notes:\n            - Must be called to complete the trace\n            - Finalizes all open spans\n            - Thread-safe when using reset_current\n        \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def trace_id(self) -> str:\n        \"\"\"Get the unique identifier for this trace.\n\n        Returns:\n            str: The trace's unique ID in the format 'trace_<32_alphanumeric>'\n\n        Notes:\n            - IDs are globally unique\n            - Used to link spans to their parent trace\n            - Can be used to look up traces in the dashboard\n        \"\"\"\n\n    @property\n    @abc.abstractmethod\n    def name(self) -> str:\n        \"\"\"Get the human-readable name of this workflow trace.\n\n        Returns:\n            str: The workflow name (e.g., \"Customer Service\", \"Data Processing\")\n\n        Notes:\n            - Should be descriptive and meaningful\n            - Used for grouping and filtering in the dashboard\n            - Helps identify the purpose of the trace\n        \"\"\"\n\n    @abc.abstractmethod\n    def export(self) -> dict[str, Any] | None:\n        \"\"\"Export the trace data as a serializable dictionary.\n\n        Returns:\n            dict | None: Dictionary containing trace data, or None if tracing is disabled.\n\n        Notes:\n            - Includes all spans and their data\n            - Used for sending traces to backends\n            - May include metadata and group ID\n        \"\"\"\n\n\nclass NoOpTrace(Trace):\n    \"\"\"A no-op implementation of Trace that doesn't record any data.\n\n    Used when tracing is disabled but trace operations still need to work.\n    Maintains proper context management but doesn't store or export any data.\n\n    Example:\n        ```python\n        # When tracing is disabled, traces become NoOpTrace\n        with trace(\"Disabled Workflow\") as t:\n            # Operations still work but nothing is recorded\n            await Runner.run(agent, \"query\")\n        ```\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._started = False\n        self._prev_context_token: contextvars.Token[Trace | None] | None = None\n\n    def __enter__(self) -> Trace:\n        if self._started:\n            return self\n\n        self._started = True\n        self.start(mark_as_current=True)\n\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        self.finish(reset_current=True)\n\n    def start(self, mark_as_current: bool = False) -> None:\n        if mark_as_current:\n            self._prev_context_token = Scope.set_current_trace(self)\n\n    def finish(self, reset_current: bool = False) -> None:\n        if reset_current and self._prev_context_token is not None:\n            Scope.reset_current_trace(self._prev_context_token)\n            self._prev_context_token = None\n\n    @property\n    def trace_id(self) -> str:\n        \"\"\"The trace's unique identifier.\n\n        Returns:\n            str: A unique ID for this trace.\n        \"\"\"\n        return \"no-op\"\n\n    @property\n    def name(self) -> str:\n        \"\"\"The workflow name for this trace.\n\n        Returns:\n            str: Human-readable name describing this workflow.\n        \"\"\"\n        return \"no-op\"\n\n    def export(self) -> dict[str, Any] | None:\n        \"\"\"Export the trace data as a dictionary.\n\n        Returns:\n            dict | None: Trace data in exportable format, or None if no data.\n        \"\"\"\n        return None\n\n\nNO_OP_TRACE = NoOpTrace()\n\n\nclass TraceImpl(Trace):\n    \"\"\"\n    A trace that will be recorded by the tracing library.\n    \"\"\"\n\n    __slots__ = (\n        \"_name\",\n        \"_trace_id\",\n        \"group_id\",\n        \"metadata\",\n        \"_prev_context_token\",\n        \"_processor\",\n        \"_started\",\n    )\n\n    def __init__(\n        self,\n        name: str,\n        trace_id: str | None,\n        group_id: str | None,\n        metadata: dict[str, Any] | None,\n        processor: TracingProcessor,\n    ):\n        self._name = name\n        self._trace_id = trace_id or util.gen_trace_id()\n        self.group_id = group_id\n        self.metadata = metadata\n        self._prev_context_token: contextvars.Token[Trace | None] | None = None\n        self._processor = processor\n        self._started = False\n\n    @property\n    def trace_id(self) -> str:\n        return self._trace_id\n\n    @property\n    def name(self) -> str:\n        return self._name\n\n    def start(self, mark_as_current: bool = False) -> None:\n        if self._started:\n            return\n\n        self._started = True\n        self._processor.on_trace_start(self)\n\n        if mark_as_current:\n            self._prev_context_token = Scope.set_current_trace(self)\n\n    def finish(self, reset_current: bool = False) -> None:\n        if not self._started:\n            return\n\n        self._processor.on_trace_end(self)\n\n        if reset_current and self._prev_context_token is not None:\n            Scope.reset_current_trace(self._prev_context_token)\n            self._prev_context_token = None\n\n    def __enter__(self) -> Trace:\n        if self._started:\n            return self\n\n        self.start(mark_as_current=True)\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: TracebackType | None,\n    ) -> None:\n        self.finish(reset_current=exc_type is not GeneratorExit)\n\n    def export(self) -> dict[str, Any] | None:\n        return {\n            \"object\": \"trace\",\n            \"id\": self.trace_id,\n            \"workflow_name\": self.name,\n            \"metadata\": self.metadata,\n        }\n"
  },
  {
    "path": "backend/onyx/tracing/framework/util.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timezone\n\n\ndef time_iso() -> str:\n    \"\"\"Return the current time in ISO 8601 format.\"\"\"\n    return datetime.now(timezone.utc).isoformat()\n\n\ndef gen_trace_id() -> str:\n    \"\"\"Generate a new trace ID.\"\"\"\n    return f\"trace_{uuid.uuid4().hex}\"\n\n\ndef gen_span_id() -> str:\n    \"\"\"Generate a new span ID.\"\"\"\n    return f\"span_{uuid.uuid4().hex[:24]}\"\n"
  },
  {
    "path": "backend/onyx/tracing/langfuse_tracing_processor.py",
    "content": "\"\"\"Langfuse tracing processor using the native Langfuse SDK.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nimport threading\nfrom datetime import datetime\nfrom typing import Any\nfrom typing import Optional\nfrom typing import Union\n\nfrom langfuse import Langfuse\nfrom langfuse._client.span import LangfuseObservationWrapper\n\nfrom onyx.tracing.framework.processor_interface import TracingProcessor\nfrom onyx.tracing.framework.span_data import AgentSpanData\nfrom onyx.tracing.framework.span_data import FunctionSpanData\nfrom onyx.tracing.framework.span_data import GenerationSpanData\nfrom onyx.tracing.framework.span_data import SpanData\nfrom onyx.tracing.framework.spans import Span\nfrom onyx.tracing.framework.traces import Trace\n\nlogger = logging.getLogger(__name__)\n\n\ndef _timestamp_from_maybe_iso(timestamp: Optional[str]) -> Optional[datetime]:\n    \"\"\"Convert ISO timestamp string to datetime.\"\"\"\n    if timestamp is None:\n        return None\n    try:\n        return datetime.fromisoformat(timestamp)\n    except ValueError:\n        return None\n\n\nclass LangfuseTracingProcessor(TracingProcessor):\n    \"\"\"TracingProcessor that logs traces to Langfuse using the native SDK.\n\n    Args:\n        client: A Langfuse client instance. If None, uses get_client().\n        enable_masking: Whether to mask sensitive data before sending.\n    \"\"\"\n\n    def __init__(\n        self,\n        client: Optional[Langfuse] = None,\n        enable_masking: bool = True,\n    ) -> None:\n        self._client: Optional[Langfuse] = client\n        self._enable_masking = enable_masking\n        self._lock = threading.Lock()  # Protects all dict access\n        self._spans: dict[str, LangfuseObservationWrapper] = {}\n        self._trace_spans: dict[str, LangfuseObservationWrapper] = (\n            {}\n        )  # Root spans for traces\n        self._first_input: dict[str, Any] = {}\n        self._last_output: dict[str, Any] = {}\n        self._trace_metadata: dict[str, dict[str, Any]] = {}\n        # Langfuse IDs for thread-safe parent linking via trace_context\n        self._langfuse_trace_ids: dict[str, str] = (\n            {}\n        )  # framework_trace_id -> langfuse_trace_id\n        self._langfuse_span_ids: dict[str, str] = (\n            {}\n        )  # framework_span_id -> langfuse_span.id\n\n    def _get_client(self) -> Langfuse:\n        \"\"\"Get or create Langfuse client.\"\"\"\n        if self._client is None:\n            from langfuse import get_client\n\n            self._client = get_client()\n        return self._client\n\n    def _mask_if_enabled(self, data: Any) -> Any:\n        \"\"\"Apply masking to data if masking is enabled.\"\"\"\n        if not self._enable_masking:\n            return data\n        try:\n            from onyx.tracing.masking import mask_sensitive_data\n\n            return mask_sensitive_data(data)\n        except Exception as e:\n            logger.warning(f\"Failed to mask data: {e}\")\n            return data\n\n    def _calculate_cost(self, data: GenerationSpanData) -> Optional[float]:\n        \"\"\"Calculate LLM cost for this generation span.\"\"\"\n        try:\n            from onyx.llm.cost import calculate_llm_cost_cents\n\n            usage = data.usage or {}\n            prompt_tokens = usage.get(\"prompt_tokens\") or usage.get(\"input_tokens\") or 0\n            completion_tokens = (\n                usage.get(\"completion_tokens\") or usage.get(\"output_tokens\") or 0\n            )\n\n            if data.model and prompt_tokens and completion_tokens:\n                cost_cents = calculate_llm_cost_cents(\n                    model_name=data.model,\n                    prompt_tokens=int(prompt_tokens),\n                    completion_tokens=int(completion_tokens),\n                )\n                if cost_cents > 0:\n                    # Convert cents to dollars for Langfuse\n                    return cost_cents / 100.0\n        except Exception as e:\n            logger.debug(f\"Failed to calculate cost: {e}\")\n        return None\n\n    def on_trace_start(self, trace: Trace) -> None:\n        \"\"\"Called when a trace is started.\"\"\"\n        try:\n            client = self._get_client()\n            trace_meta = trace.export() or {}\n            metadata = trace_meta.get(\"metadata\") or {}\n\n            # Create a root span which implicitly creates a Langfuse trace\n            # The span name becomes the trace name in Langfuse UI\n            # In Langfuse SDK v3, use start_observation instead of start_span\n            langfuse_span = client.start_observation(\n                name=trace.name,\n            )\n\n            # Always update the trace-level properties to set the trace name\n            # session_id is optional but name should always be set\n            session_id = metadata.get(\"chat_session_id\")\n            langfuse_span.update_trace(\n                name=trace.name,\n                session_id=session_id if session_id else None,\n                metadata=metadata if metadata else None,\n            )\n\n            with self._lock:\n                if metadata:\n                    self._trace_metadata[trace.trace_id] = metadata\n                self._trace_spans[trace.trace_id] = langfuse_span\n                # Store Langfuse IDs for thread-safe parent linking\n                self._langfuse_trace_ids[trace.trace_id] = langfuse_span.trace_id\n                # Use trace_id as key for root span's ID (children with no parent_id will use this)\n                self._langfuse_span_ids[trace.trace_id] = langfuse_span.id\n        except Exception as e:\n            logger.error(f\"Error starting Langfuse trace: {e}\")\n\n    def on_trace_end(self, trace: Trace) -> None:\n        \"\"\"Called when a trace is finished.\"\"\"\n        try:\n            with self._lock:\n                langfuse_span = self._trace_spans.pop(trace.trace_id, None)\n                self._trace_metadata.pop(trace.trace_id, None)\n                self._langfuse_trace_ids.pop(trace.trace_id, None)  # Clean up trace ID\n                self._langfuse_span_ids.pop(\n                    trace.trace_id, None\n                )  # Clean up root span ID\n                trace_first_input = self._first_input.pop(trace.trace_id, None)\n                trace_last_output = self._last_output.pop(trace.trace_id, None)\n\n            if langfuse_span:\n                # Update the root span with input/output and end it\n                langfuse_span.update(\n                    input=self._mask_if_enabled(trace_first_input),\n                    output=self._mask_if_enabled(trace_last_output),\n                )\n                langfuse_span.end()\n        except Exception as e:\n            logger.error(f\"Error ending Langfuse trace: {e}\")\n\n    def on_span_start(self, span: Span[SpanData]) -> None:\n        \"\"\"Called when a span is started.\n\n        Uses trace_context parameter for thread-safe parent linking instead of\n        calling methods on parent span objects. This is necessary because research\n        agents run in parallel threads, and calling methods on span objects created\n        in other threads can cause OpenTelemetry context issues.\n        \"\"\"\n        try:\n            data = span.span_data\n            # Declare as Any since different code paths return different observation types\n            langfuse_span: Any = None\n\n            # Get Langfuse IDs and metadata under lock for thread-safe access\n            with self._lock:\n                trace_metadata = self._trace_metadata.get(span.trace_id)\n                langfuse_trace_id = self._langfuse_trace_ids.get(span.trace_id)\n                # Get parent's Langfuse span ID\n                if span.parent_id is not None:\n                    parent_langfuse_id = self._langfuse_span_ids.get(span.parent_id)\n                else:\n                    # Parent is the root trace span (use trace_id as key)\n                    parent_langfuse_id = self._langfuse_span_ids.get(span.trace_id)\n\n            # If no trace ID found, we can't create a properly linked span\n            if langfuse_trace_id is None:\n                logger.warning(\n                    f\"No Langfuse trace ID found for span {span.span_id}, creating orphan\"\n                )\n                # Fall back to creating an orphan span\n                # In Langfuse SDK v3, use start_observation instead of start_span\n                client = self._get_client()\n                langfuse_span = client.start_observation(\n                    name=data.type if hasattr(data, \"type\") else \"unknown\",\n                )\n                with self._lock:\n                    self._spans[span.span_id] = langfuse_span\n                    self._langfuse_span_ids[span.span_id] = langfuse_span.id\n                return\n\n            client = self._get_client()\n\n            # Build trace_context for thread-safe parent linking\n            # This uses immutable string IDs instead of mutable span objects\n            # Type is Any to satisfy SDK's TraceContext type while passing a dict\n            trace_context: Any = {\"trace_id\": langfuse_trace_id}\n            if parent_langfuse_id:\n                trace_context[\"parent_span_id\"] = parent_langfuse_id\n\n            # Create spans using trace_context (thread-safe ID-based approach)\n            # In Langfuse SDK v3, use start_observation with as_type parameter\n            if isinstance(data, GenerationSpanData):\n                langfuse_span = client.start_observation(  # type: ignore[call-overload]\n                    trace_context=trace_context,\n                    name=self._get_generation_name(data),\n                    as_type=\"generation\",\n                    metadata=trace_metadata,\n                    model=data.model,\n                    model_parameters=self._get_model_parameters(data),\n                )\n            elif isinstance(data, FunctionSpanData):\n                langfuse_span = client.start_observation(\n                    trace_context=trace_context,\n                    name=data.name,\n                    as_type=\"tool\",\n                    metadata=trace_metadata,\n                )\n            elif isinstance(data, AgentSpanData):\n                langfuse_span = client.start_observation(\n                    trace_context=trace_context,\n                    name=data.name,\n                    as_type=\"agent\",\n                    metadata={\n                        **(trace_metadata or {}),\n                        \"tools\": data.tools,\n                        \"handoffs\": data.handoffs,\n                        \"output_type\": data.output_type,\n                    },\n                )\n            else:\n                langfuse_span = client.start_observation(\n                    trace_context=trace_context,\n                    name=data.type if hasattr(data, \"type\") else \"unknown\",\n                    as_type=\"span\",\n                    metadata=trace_metadata,\n                )\n\n            with self._lock:\n                self._spans[span.span_id] = langfuse_span\n                # Store Langfuse span ID for future children to reference\n                self._langfuse_span_ids[span.span_id] = langfuse_span.id\n        except Exception as e:\n            logger.error(f\"Error starting Langfuse span: {e}\")\n\n    def on_span_end(self, span: Span[SpanData]) -> None:\n        \"\"\"Called when a span is finished.\"\"\"\n        try:\n            with self._lock:\n                langfuse_span = self._spans.pop(span.span_id, None)\n                self._langfuse_span_ids.pop(span.span_id, None)  # Clean up ID mapping\n\n            if not langfuse_span:\n                return\n\n            data = span.span_data\n            input_data: Optional[Any] = None\n            output_data: Optional[Any] = None\n\n            if isinstance(data, GenerationSpanData):\n                input_data = data.input\n                output_data = data.output\n                usage = self._get_usage_details(data)\n                cost = self._calculate_cost(data)\n\n                update_kwargs: dict[str, Any] = {\n                    \"input\": self._mask_if_enabled(input_data),\n                    \"output\": self._mask_if_enabled(output_data),\n                }\n                if usage:\n                    update_kwargs[\"usage_details\"] = usage\n                if cost is not None:\n                    update_kwargs[\"cost_details\"] = {\"total\": cost}\n                if data.reasoning:\n                    update_kwargs[\"metadata\"] = {\"reasoning\": data.reasoning}\n                if data.time_to_first_action_seconds is not None:\n                    update_kwargs[\"completion_start_time\"] = _timestamp_from_maybe_iso(\n                        span.started_at\n                    )\n\n                langfuse_span.update(**update_kwargs)\n\n            elif isinstance(data, FunctionSpanData):\n                input_data = data.input\n                output_data = data.output\n                langfuse_span.update(\n                    input=self._mask_if_enabled(input_data),\n                    output=self._mask_if_enabled(output_data),\n                )\n\n            elif isinstance(data, AgentSpanData):\n                # Agent spans don't have direct input/output\n                pass\n\n            # Handle errors\n            if span.error:\n                langfuse_span.update(\n                    level=\"ERROR\",\n                    status_message=f\"{span.error.get('message')}: {span.error.get('data')}\",\n                )\n\n            langfuse_span.end()\n\n            # Store first input and last output per trace_id\n            trace_id = span.trace_id\n            with self._lock:\n                if trace_id not in self._first_input and input_data is not None:\n                    self._first_input[trace_id] = input_data\n\n                if output_data is not None:\n                    self._last_output[trace_id] = output_data\n\n        except Exception as e:\n            logger.error(f\"Error ending Langfuse span: {e}\")\n\n    def _get_generation_name(self, data: GenerationSpanData) -> str:\n        \"\"\"Get a descriptive name for a generation span.\"\"\"\n        if data.model:\n            return f\"Generation with {data.model}\"\n        return \"Generation\"\n\n    def _get_model_parameters(\n        self, data: GenerationSpanData\n    ) -> Optional[dict[str, Union[str, int, bool, None]]]:\n        \"\"\"Extract model parameters from generation span data.\"\"\"\n        if not isinstance(data.model_config, dict):\n            return None\n\n        params: dict[str, Union[str, int, bool, None]] = {}\n        for key in [\n            \"temperature\",\n            \"max_tokens\",\n            \"top_p\",\n            \"frequency_penalty\",\n            \"presence_penalty\",\n        ]:\n            if key in data.model_config:\n                params[key] = data.model_config[key]\n        return params if params else None\n\n    def _get_usage_details(self, data: GenerationSpanData) -> Optional[dict[str, int]]:\n        \"\"\"Extract usage details from generation span data.\"\"\"\n        usage = data.usage or {}\n        details: dict[str, int] = {}\n\n        prompt_tokens = usage.get(\"prompt_tokens\") or usage.get(\"input_tokens\")\n        if prompt_tokens is not None:\n            details[\"input\"] = int(prompt_tokens)\n\n        completion_tokens = usage.get(\"completion_tokens\") or usage.get(\"output_tokens\")\n        if completion_tokens is not None:\n            details[\"output\"] = int(completion_tokens)\n\n        if \"total_tokens\" in usage:\n            details[\"total\"] = int(usage[\"total_tokens\"])\n        elif details.get(\"input\") and details.get(\"output\"):\n            details[\"total\"] = details[\"input\"] + details[\"output\"]\n\n        # Cache-related tokens\n        if \"cache_read_input_tokens\" in usage:\n            details[\"cache_read_input_tokens\"] = int(usage[\"cache_read_input_tokens\"])\n        if \"cache_creation_input_tokens\" in usage:\n            details[\"cache_creation_input_tokens\"] = int(\n                usage[\"cache_creation_input_tokens\"]\n            )\n\n        return details if details else None\n\n    def force_flush(self) -> None:\n        \"\"\"Forces an immediate flush of all queued spans/traces.\"\"\"\n        try:\n            client = self._get_client()\n            if client:\n                client.flush()\n        except Exception as e:\n            logger.warning(f\"Failed to flush Langfuse client: {e}\")\n\n    def shutdown(self) -> None:\n        \"\"\"Called when the application stops.\"\"\"\n        try:\n            self.force_flush()\n            client = self._get_client()\n            if client:\n                client.shutdown()\n        except Exception as e:\n            logger.warning(f\"Failed to shutdown Langfuse client: {e}\")\n"
  },
  {
    "path": "backend/onyx/tracing/llm_utils.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Iterator\nfrom collections.abc import Mapping\nfrom collections.abc import Sequence\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom typing import cast\n\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.models import ToolCall\nfrom onyx.tracing.framework.create import generation_span\nfrom onyx.tracing.framework.span_data import GenerationSpanData\nfrom onyx.tracing.framework.spans import Span\n\n\ndef build_llm_model_config(llm: LLM, flow: str | None = None) -> dict[str, str]:\n    model_config: dict[str, str] = {\n        \"base_url\": str(llm.config.api_base or \"\"),\n        \"model_provider\": llm.config.model_provider,\n    }\n    if flow:\n        model_config[\"flow\"] = flow\n    return model_config\n\n\n@contextmanager\ndef llm_generation_span(\n    llm: LLM,\n    flow: str | None,\n    input_messages: Sequence[Any] | Any | None = None,\n    parent: Any | None = None,\n) -> Iterator[Span[GenerationSpanData]]:\n    with generation_span(\n        model=llm.config.model_name,\n        model_config=build_llm_model_config(llm, flow),\n        parent=parent,\n    ) as span:\n        if input_messages is not None:\n            if isinstance(input_messages, Sequence) and not isinstance(\n                input_messages, (str, bytes)\n            ):\n                normalized_messages = input_messages\n            else:\n                normalized_messages = [input_messages]\n            span.span_data.input = cast(\n                Sequence[Mapping[str, Any]], normalized_messages\n            )\n        yield span\n\n\ndef record_llm_response(\n    span: Span[GenerationSpanData],\n    response: ModelResponse,\n) -> None:\n    \"\"\"Standard way to record a complete LLM response to a generation span.\n\n    Extracts content, reasoning, tool_calls, and usage automatically from the\n    ModelResponse object.\n\n    Args:\n        span: The generation span to record to.\n        response: The ModelResponse from the LLM.\n    \"\"\"\n    message = response.choice.message\n\n    # Build output dict matching AssistantMessage format\n    output_dict: dict[str, Any] = {\"role\": \"assistant\"}\n\n    if message.content is not None:\n        output_dict[\"content\"] = message.content\n\n    if message.tool_calls:\n        output_dict[\"tool_calls\"] = [tc.model_dump() for tc in message.tool_calls]\n\n    span.span_data.output = [output_dict]\n\n    # Record reasoning (extended thinking from reasoning models)\n    if message.reasoning_content:\n        span.span_data.reasoning = message.reasoning_content\n\n    # Record usage\n    if response.usage:\n        usage_dict = _build_usage_dict(response.usage)\n        if usage_dict:\n            span.span_data.usage = usage_dict\n\n\ndef record_llm_span_output(\n    span: Span[GenerationSpanData],\n    output: str | Sequence[Mapping[str, Any]] | None,\n    usage: Any | None = None,\n    reasoning: str | None = None,\n    tool_calls: list[ToolCall] | None = None,\n) -> None:\n    \"\"\"Record LLM output to a generation span for streaming scenarios.\n\n    This function is useful for streaming where content, reasoning, tool_calls,\n    and usage are accumulated separately.\n\n    Args:\n        span: The generation span to record to.\n        output: The text output or list of message dicts.\n        usage: Optional usage information.\n        reasoning: Optional reasoning/extended thinking content.\n        tool_calls: Optional list of tool calls.\n    \"\"\"\n    if output is None:\n        output_dict: dict[str, Any] = {\"role\": \"assistant\", \"content\": None}\n        if tool_calls:\n            output_dict[\"tool_calls\"] = [tc.model_dump() for tc in tool_calls]\n        span.span_data.output = [output_dict]\n    elif isinstance(output, str):\n        output_dict = {\"role\": \"assistant\", \"content\": output}\n        if tool_calls:\n            output_dict[\"tool_calls\"] = [tc.model_dump() for tc in tool_calls]\n        span.span_data.output = [output_dict]\n    else:\n        span.span_data.output = cast(Sequence[Mapping[str, Any]], output)\n\n    usage_dict = _build_usage_dict(usage)\n    if usage_dict:\n        span.span_data.usage = usage_dict\n\n    if reasoning:\n        span.span_data.reasoning = reasoning\n\n\ndef _build_usage_dict(usage: Any | None) -> dict[str, Any] | None:\n    if not usage:\n        return None\n    if isinstance(usage, dict):\n        return usage\n\n    usage_dict: dict[str, Any] = {}\n    prompt_tokens = getattr(usage, \"prompt_tokens\", None)\n    completion_tokens = getattr(usage, \"completion_tokens\", None)\n    input_tokens = getattr(usage, \"input_tokens\", None)\n    output_tokens = getattr(usage, \"output_tokens\", None)\n    total_tokens = getattr(usage, \"total_tokens\", None)\n    cache_read_input_tokens = getattr(usage, \"cache_read_input_tokens\", None)\n    cache_creation_input_tokens = getattr(usage, \"cache_creation_input_tokens\", None)\n\n    if prompt_tokens is not None:\n        usage_dict[\"input_tokens\"] = prompt_tokens\n    elif input_tokens is not None:\n        usage_dict[\"input_tokens\"] = input_tokens\n    if completion_tokens is not None:\n        usage_dict[\"output_tokens\"] = completion_tokens\n    elif output_tokens is not None:\n        usage_dict[\"output_tokens\"] = output_tokens\n    if total_tokens is not None:\n        usage_dict[\"total_tokens\"] = total_tokens\n    if cache_read_input_tokens is not None:\n        usage_dict[\"cache_read_input_tokens\"] = cache_read_input_tokens\n    if cache_creation_input_tokens is not None:\n        usage_dict[\"cache_creation_input_tokens\"] = cache_creation_input_tokens\n\n    return usage_dict or None\n"
  },
  {
    "path": "backend/onyx/tracing/masking.py",
    "content": "\"\"\"Shared data masking utilities for tracing processors.\"\"\"\n\nimport os\nimport re\nfrom typing import Any\n\n# Set loosely because some tool call results may be very long.\n# Ideally we don't pass those to the LLM but it's fine if we want to trace them in full.\nMASKING_LENGTH = int(os.environ.get(\"TRACING_MASKING_LENGTH\", \"500000\"))\n\n\ndef _truncate_str(s: str) -> str:\n    \"\"\"Truncate a string that exceeds MASKING_LENGTH.\"\"\"\n    tail = MASKING_LENGTH // 5\n    head = MASKING_LENGTH - tail\n    # Handle edge case where tail is 0 (when MASKING_LENGTH < 5)\n    # s[-0:] returns the entire string, so we must check explicitly\n    tail_part = s[-tail:] if tail > 0 else \"\"\n    return f\"{s[:head]}...{tail_part}[TRUNCATED {len(s)} chars to {MASKING_LENGTH}]\"\n\n\ndef mask_sensitive_data(data: Any) -> Any:\n    \"\"\"Mask data if it exceeds the maximum length threshold or contains sensitive information.\n\n    Handles:\n    - Dictionaries: recursively masks values, redacts keys containing 'private_key' or 'authorization'\n    - Lists: recursively masks each item\n    - Strings: redacts private_key patterns, Authorization Bearer tokens, truncates long strings\n    - Other types: truncates if string representation exceeds threshold\n    \"\"\"\n    # Handle dictionaries recursively\n    if isinstance(data, dict):\n        masked_dict = {}\n        for key, value in data.items():\n            # Mask private keys and authorization headers\n            if isinstance(key, str) and (\n                \"private_key\" in key.lower() or \"authorization\" in key.lower()\n            ):\n                masked_dict[key] = \"***REDACTED***\"\n            else:\n                masked_dict[key] = mask_sensitive_data(value)\n        return masked_dict\n\n    # Handle lists recursively\n    if isinstance(data, list):\n        return [mask_sensitive_data(item) for item in data]\n\n    # Handle strings\n    if isinstance(data, str):\n        # Mask private_key patterns\n        if \"private_key\" in data.lower():\n            return \"***REDACTED***\"\n\n        # Mask Authorization: Bearer tokens\n        # Pattern matches \"Authorization: Bearer <token>\" or \"authorization: bearer <token>\"\n        if re.search(r\"authorization:\\s*bearer\\s+\\S+\", data, re.IGNORECASE):\n            data = re.sub(\n                r\"(authorization:\\s*bearer\\s+)\\S+\",\n                r\"\\1***REDACTED***\",\n                data,\n                flags=re.IGNORECASE,\n            )\n\n        if len(data) <= MASKING_LENGTH:\n            return data\n        return _truncate_str(data)\n\n    # For other types, check length\n    if len(str(data)) <= MASKING_LENGTH:\n        return data\n    return _truncate_str(str(data))\n"
  },
  {
    "path": "backend/onyx/tracing/setup.py",
    "content": "\"\"\"Unified tracing setup for all providers (Braintrust, Langfuse, etc.).\"\"\"\n\nfrom onyx.configs.app_configs import BRAINTRUST_API_KEY\nfrom onyx.configs.app_configs import BRAINTRUST_PROJECT\nfrom onyx.configs.app_configs import LANGFUSE_HOST\nfrom onyx.configs.app_configs import LANGFUSE_PUBLIC_KEY\nfrom onyx.configs.app_configs import LANGFUSE_SECRET_KEY\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_initialized = False\n\n\ndef setup_tracing() -> list[str]:\n    \"\"\"Initialize all configured tracing providers.\n\n    Returns a list of provider names that were successfully initialized.\n    Uses add_trace_processor() to ADD processors rather than replacing them,\n    allowing multiple providers to receive trace events simultaneously.\n\n    This function is idempotent - calling it multiple times will only\n    initialize providers once.\n    \"\"\"\n    global _initialized\n    if _initialized:\n        logger.debug(\"Tracing already initialized, skipping\")\n        return []\n\n    initialized_providers: list[str] = []\n\n    # Setup Braintrust if configured\n    if BRAINTRUST_API_KEY:\n        try:\n            _setup_braintrust()\n            initialized_providers.append(\"braintrust\")\n        except Exception as e:\n            logger.error(f\"Failed to initialize Braintrust tracing: {e}\")\n    else:\n        logger.info(\"Braintrust API key not provided, skipping Braintrust setup\")\n\n    # Setup Langfuse if configured\n    if LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY:\n        try:\n            _setup_langfuse()\n            initialized_providers.append(\"langfuse\")\n        except Exception as e:\n            logger.error(f\"Failed to initialize Langfuse tracing: {e}\")\n    else:\n        logger.info(\"Langfuse credentials not provided, skipping Langfuse setup\")\n\n    _initialized = True\n\n    if initialized_providers:\n        logger.notice(\n            f\"Tracing initialized with providers: {', '.join(initialized_providers)}\"\n        )\n    else:\n        logger.info(\"No tracing providers configured\")\n\n    return initialized_providers\n\n\ndef _setup_braintrust() -> None:\n    \"\"\"Initialize Braintrust tracing.\"\"\"\n    import braintrust\n\n    from onyx.tracing.braintrust_tracing_processor import BraintrustTracingProcessor\n    from onyx.tracing.framework import add_trace_processor\n    from onyx.tracing.masking import mask_sensitive_data\n\n    braintrust_logger = braintrust.init_logger(\n        project=BRAINTRUST_PROJECT,\n        api_key=BRAINTRUST_API_KEY,\n    )\n    braintrust.set_masking_function(mask_sensitive_data)\n    add_trace_processor(BraintrustTracingProcessor(braintrust_logger))\n\n\ndef _setup_langfuse() -> None:\n    \"\"\"Initialize Langfuse tracing using the native Langfuse SDK.\"\"\"\n    import os\n\n    from langfuse import Langfuse\n\n    from onyx.tracing.framework import add_trace_processor\n    from onyx.tracing.langfuse_tracing_processor import LangfuseTracingProcessor\n\n    # Set LANGFUSE_HOST env var if configured (Langfuse SDK reads this automatically)\n    if LANGFUSE_HOST:\n        os.environ[\"LANGFUSE_HOST\"] = LANGFUSE_HOST\n\n    # Initialize Langfuse client with credentials\n    client = Langfuse(\n        public_key=LANGFUSE_PUBLIC_KEY,\n        secret_key=LANGFUSE_SECRET_KEY,\n        host=LANGFUSE_HOST if LANGFUSE_HOST else None,\n    )\n\n    add_trace_processor(LangfuseTracingProcessor(client=client))\n"
  },
  {
    "path": "backend/onyx/utils/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/utils/b64.py",
    "content": "import base64\n\n\ndef get_image_type_from_bytes(raw_b64_bytes: bytes) -> str:\n    magic_number = raw_b64_bytes[:4]\n\n    if magic_number.startswith(b\"\\x89PNG\"):\n        mime_type = \"image/png\"\n    elif magic_number.startswith(b\"\\xff\\xd8\"):\n        mime_type = \"image/jpeg\"\n    elif magic_number.startswith(b\"GIF8\"):\n        mime_type = \"image/gif\"\n    elif magic_number.startswith(b\"RIFF\") and raw_b64_bytes[8:12] == b\"WEBP\":\n        mime_type = \"image/webp\"\n    else:\n        raise ValueError(\n            \"Unsupported image format - only PNG, JPEG, GIF, and WEBP are supported.\"\n        )\n\n    return mime_type\n\n\ndef get_image_type(raw_b64_string: str) -> str:\n    binary_data = base64.b64decode(raw_b64_string)\n    return get_image_type_from_bytes(binary_data)\n"
  },
  {
    "path": "backend/onyx/utils/batching.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterable\nfrom itertools import islice\nfrom typing import TypeVar\n\nT = TypeVar(\"T\")\n\n\ndef batch_generator(\n    items: Iterable[T],\n    batch_size: int,\n    pre_batch_yield: Callable[[list[T]], None] | None = None,\n) -> Generator[list[T], None, None]:\n    \"\"\"Yields batches of items from an iterable.\n\n    Optionally invokes a callback before yielding each batch.\n    \"\"\"\n    iterator = iter(items)\n    while True:\n        batch = list(islice(iterator, batch_size))\n        if not batch:\n            return\n\n        if pre_batch_yield:\n            pre_batch_yield(batch)\n        yield batch\n"
  },
  {
    "path": "backend/onyx/utils/callbacks.py",
    "content": "from typing import Generic\nfrom typing import TypeVar\n\nT = TypeVar(\"T\")\n\n\nclass MetricsHander(Generic[T]):\n    def __init__(self) -> None:\n        self.metrics: T | None = None\n\n    def record_metric(self, metrics: T) -> None:\n        self.metrics = metrics\n"
  },
  {
    "path": "backend/onyx/utils/encryption.py",
    "content": "from typing import Any\n\nfrom onyx.configs.app_configs import ENCRYPTION_KEY_SECRET\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\n\nlogger = setup_logger()\n\n\n# IMPORTANT DO NOT DELETE, THIS IS USED BY fetch_versioned_implementation\ndef _encrypt_string(input_str: str, key: str | None = None) -> bytes:\n    if ENCRYPTION_KEY_SECRET:\n        logger.warning(\"MIT version of Onyx does not support encryption of secrets.\")\n    elif key is not None:\n        logger.debug(\"MIT encrypt called with explicit key — key ignored.\")\n    return input_str.encode()\n\n\n# IMPORTANT DO NOT DELETE, THIS IS USED BY fetch_versioned_implementation\ndef _decrypt_bytes(input_bytes: bytes, key: str | None = None) -> str:\n    if ENCRYPTION_KEY_SECRET:\n        logger.warning(\"MIT version of Onyx does not support decryption of secrets.\")\n    elif key is not None:\n        logger.debug(\"MIT decrypt called with explicit key — key ignored.\")\n    return input_bytes.decode()\n\n\ndef mask_string(sensitive_str: str) -> str:\n    \"\"\"Masks a sensitive string, showing first and last few characters.\n    If the string is too short to safely mask, returns a fully masked placeholder.\n    \"\"\"\n    visible_start = 4\n    visible_end = 4\n    min_masked_chars = 6\n\n    if len(sensitive_str) < visible_start + visible_end + min_masked_chars:\n        return \"••••••••••••\"\n\n    return f\"{sensitive_str[:visible_start]}...{sensitive_str[-visible_end:]}\"\n\n\nMASK_CREDENTIALS_WHITELIST = {\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n    \"wiki_base\",\n    \"cloud_name\",\n    \"cloud_id\",\n}\n\n\ndef mask_credential_dict(credential_dict: dict[str, Any]) -> dict[str, Any]:\n    masked_creds: dict[str, Any] = {}\n    for key, val in credential_dict.items():\n        if isinstance(val, str):\n            # we want to pass the authentication_method field through so the frontend\n            # can disambiguate credentials created by different methods\n            if key in MASK_CREDENTIALS_WHITELIST:\n                masked_creds[key] = val\n            else:\n                masked_creds[key] = mask_string(val)\n        elif isinstance(val, dict):\n            masked_creds[key] = mask_credential_dict(val)\n        elif isinstance(val, list):\n            masked_creds[key] = _mask_list(val)\n        elif isinstance(val, (bool, type(None))):\n            masked_creds[key] = val\n        elif isinstance(val, (int, float)):\n            masked_creds[key] = \"*****\"\n        else:\n            masked_creds[key] = \"*****\"\n\n    return masked_creds\n\n\ndef _mask_list(items: list[Any]) -> list[Any]:\n    masked: list[Any] = []\n    for item in items:\n        if isinstance(item, dict):\n            masked.append(mask_credential_dict(item))\n        elif isinstance(item, str):\n            masked.append(mask_string(item))\n        elif isinstance(item, list):\n            masked.append(_mask_list(item))\n        elif isinstance(item, (bool, type(None))):\n            masked.append(item)\n        else:\n            masked.append(\"*****\")\n    return masked\n\n\ndef encrypt_string_to_bytes(intput_str: str, key: str | None = None) -> bytes:\n    versioned_encryption_fn = fetch_versioned_implementation(\n        \"onyx.utils.encryption\", \"_encrypt_string\"\n    )\n    return versioned_encryption_fn(intput_str, key=key)\n\n\ndef decrypt_bytes_to_string(intput_bytes: bytes, key: str | None = None) -> str:\n    versioned_decryption_fn = fetch_versioned_implementation(\n        \"onyx.utils.encryption\", \"_decrypt_bytes\"\n    )\n    return versioned_decryption_fn(intput_bytes, key=key)\n"
  },
  {
    "path": "backend/onyx/utils/error_handling.py",
    "content": "\"\"\"\nStandardized error handling utilities.\n\"\"\"\n\nfrom onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef handle_connector_error(e: Exception, context: str) -> None:\n    \"\"\"\n    Standard error handling for connectors.\n\n    Args:\n        e: The exception that was raised\n        context: A description of where the error occurred\n\n    Raises:\n        The original exception if CONTINUE_ON_CONNECTOR_FAILURE is False\n    \"\"\"\n    logger.error(f\"Error in {context}: {e}\", exc_info=e)\n    if not CONTINUE_ON_CONNECTOR_FAILURE:\n        raise\n"
  },
  {
    "path": "backend/onyx/utils/errors.py",
    "content": "class EERequiredError(Exception):\n    \"\"\"This error is thrown if an Enterprise Edition feature or API is\n    requested but the Enterprise Edition flag is not set.\"\"\"\n"
  },
  {
    "path": "backend/onyx/utils/file.py",
    "content": "from typing import cast\n\nimport puremagic\nfrom pydantic import BaseModel\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass FileWithMimeType(BaseModel):\n    data: bytes\n    mime_type: str\n\n\nclass OnyxStaticFileManager:\n    \"\"\"Retrieve static resources with this class. Currently, these should all be located\n    in the static directory ... e.g. static/images/logo.png\"\"\"\n\n    @staticmethod\n    def get_static(filename: str) -> FileWithMimeType | None:\n        try:\n            mime_type: str = \"application/octet-stream\"\n            with open(filename, \"rb\") as f:\n                file_content = f.read()\n                matches = puremagic.magic_string(file_content)\n                if matches:\n                    mime_type = cast(str, matches[0].mime_type)\n        except (OSError, FileNotFoundError, PermissionError) as e:\n            logger.error(f\"Failed to read file {filename}: {e}\")\n            return None\n        except Exception as e:\n            logger.error(f\"Unexpected exception reading file {filename}: {e}\")\n            return None\n\n        return FileWithMimeType(data=file_content, mime_type=mime_type)\n"
  },
  {
    "path": "backend/onyx/utils/gpu_utils.py",
    "content": "import os\nfrom functools import lru_cache\n\nimport requests\nfrom retry import retry\n\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_HOST\nfrom shared_configs.configs import INDEXING_MODEL_SERVER_PORT\nfrom shared_configs.configs import MODEL_SERVER_HOST\nfrom shared_configs.configs import MODEL_SERVER_PORT\n\nlogger = setup_logger()\n\n\ndef _get_gpu_status_from_model_server(indexing: bool) -> bool:\n    if os.environ.get(\"DISABLE_MODEL_SERVER\", \"\").lower() == \"true\":\n        logger.info(\"DISABLE_MODEL_SERVER is set, assuming no GPU available\")\n        return False\n    if indexing:\n        model_server_url = f\"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}\"\n    else:\n        model_server_url = f\"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}\"\n\n    if \"http\" not in model_server_url:\n        model_server_url = f\"http://{model_server_url}\"\n\n    try:\n        response = requests.get(f\"{model_server_url}/api/gpu-status\", timeout=10)\n        response.raise_for_status()\n        gpu_status = response.json()\n        return gpu_status[\"gpu_available\"]\n    except requests.RequestException as e:\n        logger.error(f\"Error: Unable to fetch GPU status. Error: {str(e)}\")\n        raise  # Re-raise exception to trigger a retry\n\n\n@retry(tries=5, delay=5)\ndef gpu_status_request(indexing: bool) -> bool:\n    return _get_gpu_status_from_model_server(indexing)\n\n\n@lru_cache(maxsize=1)\ndef fast_gpu_status_request(indexing: bool) -> bool:\n    \"\"\"For use in sync flows, where we don't want to retry / we want to cache this.\"\"\"\n    return gpu_status_request(indexing=indexing)\n"
  },
  {
    "path": "backend/onyx/utils/headers.py",
    "content": "from typing import TypedDict\n\nfrom fastapi.datastructures import Headers\n\nfrom onyx.configs.model_configs import LITELLM_EXTRA_HEADERS\nfrom onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS\nfrom onyx.configs.tool_configs import CUSTOM_TOOL_PASS_THROUGH_HEADERS\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass HeaderItemDict(TypedDict):\n    key: str\n    value: str\n\n\ndef clean_header_list(headers_to_clean: list[HeaderItemDict]) -> dict[str, str]:\n    cleaned_headers: dict[str, str] = {}\n    for item in headers_to_clean:\n        key = item[\"key\"]\n        value = item[\"value\"]\n        if key in cleaned_headers:\n            logger.warning(\n                f\"Duplicate header {key} found in custom headers, ignoring...\"\n            )\n            continue\n        cleaned_headers[key] = value\n    return cleaned_headers\n\n\ndef header_dict_to_header_list(header_dict: dict[str, str]) -> list[HeaderItemDict]:\n    return [{\"key\": key, \"value\": value} for key, value in header_dict.items()]\n\n\ndef header_list_to_header_dict(header_list: list[HeaderItemDict]) -> dict[str, str]:\n    return {header[\"key\"]: header[\"value\"] for header in header_list}\n\n\ndef get_relevant_headers(\n    headers: dict[str, str] | Headers, desired_headers: list[str] | None\n) -> dict[str, str]:\n    if not desired_headers:\n        return {}\n\n    pass_through_headers: dict[str, str] = {}\n    for key in desired_headers:\n        if key in headers:\n            pass_through_headers[key] = headers[key]\n        else:\n            # fastapi makes all header keys lowercase, handling that here\n            lowercase_key = key.lower()\n            if lowercase_key in headers:\n                pass_through_headers[lowercase_key] = headers[lowercase_key]\n\n    return pass_through_headers\n\n\ndef get_litellm_additional_request_headers(\n    headers: dict[str, str] | Headers,\n) -> dict[str, str]:\n    return get_relevant_headers(headers, LITELLM_PASS_THROUGH_HEADERS)\n\n\ndef build_llm_extra_headers(\n    additional_headers: dict[str, str] | None = None,\n) -> dict[str, str]:\n    extra_headers: dict[str, str] = {}\n    if additional_headers:\n        extra_headers.update(additional_headers)\n    if LITELLM_EXTRA_HEADERS:\n        extra_headers.update(LITELLM_EXTRA_HEADERS)\n    return extra_headers\n\n\ndef get_custom_tool_additional_request_headers(\n    headers: dict[str, str] | Headers,\n) -> dict[str, str]:\n    return get_relevant_headers(headers, CUSTOM_TOOL_PASS_THROUGH_HEADERS)\n"
  },
  {
    "path": "backend/onyx/utils/jsonriver/__init__.py",
    "content": "\"\"\"\njsonriver - A streaming JSON parser for Python\n\nParse JSON incrementally as it streams in, e.g. from a network request or a language model.\nGives you a sequence of increasingly complete values.\n\nCopyright (c) 2023 Google LLC (original TypeScript implementation)\nCopyright (c) 2024 jsonriver-python contributors (Python port)\nSPDX-License-Identifier: BSD-3-Clause\n\"\"\"\n\nfrom .parse import _Parser as Parser\nfrom .parse import JsonObject\nfrom .parse import JsonValue\n\n__all__ = [\"Parser\", \"JsonValue\", \"JsonObject\"]\n__version__ = \"0.0.1\"\n"
  },
  {
    "path": "backend/onyx/utils/jsonriver/parse.py",
    "content": "\"\"\"\nJSON parser for streaming incremental parsing\n\nCopyright (c) 2023 Google LLC (original TypeScript implementation)\nCopyright (c) 2024 jsonriver-python contributors (Python port)\nSPDX-License-Identifier: BSD-3-Clause\n\"\"\"\n\nfrom __future__ import annotations\n\nimport copy\nfrom enum import IntEnum\nfrom typing import cast\nfrom typing import Union\n\nfrom .tokenize import _Input\nfrom .tokenize import json_token_type_to_string\nfrom .tokenize import JsonTokenType\nfrom .tokenize import Tokenizer\n\n\n# Type definitions for JSON values\nJsonValue = Union[None, bool, float, str, list[\"JsonValue\"], dict[str, \"JsonValue\"]]\nJsonObject = dict[str, JsonValue]\n\n\nclass _StateEnum(IntEnum):\n    \"\"\"Parser state machine states\"\"\"\n\n    Initial = 0\n    InString = 1\n    InArray = 2\n    InObjectExpectingKey = 3\n    InObjectExpectingValue = 4\n\n\nclass _State:\n    \"\"\"Base class for parser states\"\"\"\n\n    type: _StateEnum\n    value: JsonValue | tuple[str, JsonObject] | None\n\n\nclass _InitialState(_State):\n    \"\"\"Initial state before any parsing\"\"\"\n\n    def __init__(self) -> None:\n        self.type = _StateEnum.Initial\n        self.value = None\n\n\nclass _InStringState(_State):\n    \"\"\"State while parsing a string\"\"\"\n\n    def __init__(self) -> None:\n        self.type = _StateEnum.InString\n        self.value = \"\"\n\n\nclass _InArrayState(_State):\n    \"\"\"State while parsing an array\"\"\"\n\n    def __init__(self) -> None:\n        self.type = _StateEnum.InArray\n        self.value: list[JsonValue] = []\n\n\nclass _InObjectExpectingKeyState(_State):\n    \"\"\"State while parsing an object, expecting a key\"\"\"\n\n    def __init__(self) -> None:\n        self.type = _StateEnum.InObjectExpectingKey\n        self.value: JsonObject = {}\n\n\nclass _InObjectExpectingValueState(_State):\n    \"\"\"State while parsing an object, expecting a value\"\"\"\n\n    def __init__(self, key: str, obj: JsonObject) -> None:\n        self.type = _StateEnum.InObjectExpectingValue\n        self.value = (key, obj)\n\n\n# Sentinel value to distinguish \"not set\" from \"set to None/null\"\nclass _Unset:\n    pass\n\n\n_UNSET = _Unset()\n\n\nclass _Parser:\n    \"\"\"\n    Incremental JSON parser\n\n    Feed chunks of JSON text via feed() and get back progressively\n    more complete JSON values.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._state_stack: list[_State] = [_InitialState()]\n        self._toplevel_value: JsonValue | _Unset = _UNSET\n        self._input = _Input()\n        self.tokenizer = Tokenizer(self._input, self)\n        self._finished = False\n        self._progressed = False\n        self._prev_snapshot: JsonValue | _Unset = _UNSET\n\n    def feed(self, chunk: str) -> list[JsonValue]:\n        \"\"\"\n        Feed a chunk of JSON text and return deltas from the previous state.\n\n        Each element in the returned list represents what changed since the\n        last yielded value. For dicts, only changed/new keys are included,\n        with string values containing only the newly appended characters.\n        \"\"\"\n        if self._finished:\n            return []\n\n        self._input.feed(chunk)\n        return self._collect_deltas()\n\n    @staticmethod\n    def _compute_delta(prev: JsonValue | None, current: JsonValue) -> JsonValue | None:\n        if prev is None:\n            return current\n\n        if isinstance(current, dict) and isinstance(prev, dict):\n            result: JsonObject = {}\n            for key in current:\n                cur_val = current[key]\n                prev_val = prev.get(key)\n                if key not in prev:\n                    result[key] = cur_val\n                elif isinstance(cur_val, str) and isinstance(prev_val, str):\n                    if cur_val != prev_val:\n                        result[key] = cur_val[len(prev_val) :]\n                elif isinstance(cur_val, list) and isinstance(prev_val, list):\n                    if cur_val != prev_val:\n                        new_items = cur_val[len(prev_val) :]\n                        # check if the last existing element was updated\n                        if (\n                            prev_val\n                            and len(cur_val) >= len(prev_val)\n                            and cur_val[len(prev_val) - 1] != prev_val[-1]\n                        ):\n                            result[key] = [cur_val[len(prev_val) - 1]] + new_items\n                        elif new_items:\n                            result[key] = new_items\n                elif cur_val != prev_val:\n                    result[key] = cur_val\n            return result if result else None\n\n        if isinstance(current, str) and isinstance(prev, str):\n            delta = current[len(prev) :]\n            return delta if delta else None\n\n        if isinstance(current, list) and isinstance(prev, list):\n            if current != prev:\n                new_items = current[len(prev) :]\n                if (\n                    prev\n                    and len(current) >= len(prev)\n                    and current[len(prev) - 1] != prev[-1]\n                ):\n                    return [current[len(prev) - 1]] + new_items\n                return new_items if new_items else None\n            return None\n\n        if current != prev:\n            return current\n        return None\n\n    def finish(self) -> list[JsonValue]:\n        \"\"\"Signal that no more chunks will be fed. Validates trailing content.\n\n        Returns any final deltas produced by flushing pending tokens (e.g.\n        numbers, which have no terminator and wait for more input).\n        \"\"\"\n        self._input.mark_complete()\n        # Pump once more so the tokenizer can emit tokens that were waiting\n        # for more input (e.g. numbers need buffer_complete to finalize).\n        results = self._collect_deltas()\n        self._input.expect_end_of_content()\n        return results\n\n    def _collect_deltas(self) -> list[JsonValue]:\n        \"\"\"Run one pump cycle and return any deltas produced.\"\"\"\n        results: list[JsonValue] = []\n        while True:\n            self._progressed = False\n            self.tokenizer.pump()\n\n            if self._progressed:\n                if self._toplevel_value is _UNSET:\n                    raise RuntimeError(\n                        \"Internal error: toplevel_value should not be unset after progressing\"\n                    )\n                current = copy.deepcopy(cast(JsonValue, self._toplevel_value))\n                if isinstance(self._prev_snapshot, _Unset):\n                    results.append(current)\n                else:\n                    delta = self._compute_delta(self._prev_snapshot, current)\n                    if delta is not None:\n                        results.append(delta)\n                self._prev_snapshot = current\n            else:\n                if not self._state_stack:\n                    self._finished = True\n                break\n        return results\n\n    # TokenHandler protocol implementation\n\n    def handle_null(self) -> None:\n        \"\"\"Handle null token\"\"\"\n        self._handle_value_token(JsonTokenType.Null, None)\n\n    def handle_boolean(self, value: bool) -> None:\n        \"\"\"Handle boolean token\"\"\"\n        self._handle_value_token(JsonTokenType.Boolean, value)\n\n    def handle_number(self, value: float) -> None:\n        \"\"\"Handle number token\"\"\"\n        self._handle_value_token(JsonTokenType.Number, value)\n\n    def handle_string_start(self) -> None:\n        \"\"\"Handle string start token\"\"\"\n        state = self._current_state()\n        if not self._progressed and state.type != _StateEnum.InObjectExpectingKey:\n            self._progressed = True\n\n        if state.type == _StateEnum.Initial:\n            self._state_stack.pop()\n            self._toplevel_value = self._progress_value(JsonTokenType.StringStart, None)\n\n        elif state.type == _StateEnum.InArray:\n            v = self._progress_value(JsonTokenType.StringStart, None)\n            arr = cast(list[JsonValue], state.value)\n            arr.append(v)\n\n        elif state.type == _StateEnum.InObjectExpectingKey:\n            self._state_stack.append(_InStringState())\n\n        elif state.type == _StateEnum.InObjectExpectingValue:\n            key, obj = cast(tuple[str, JsonObject], state.value)\n            sv = self._progress_value(JsonTokenType.StringStart, None)\n            obj[key] = sv\n\n        elif state.type == _StateEnum.InString:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(JsonTokenType.StringStart)} token in the middle of string\"\n            )\n\n    def handle_string_middle(self, value: str) -> None:\n        \"\"\"Handle string middle token\"\"\"\n        state = self._current_state()\n\n        if not self._progressed:\n            if len(self._state_stack) >= 2:\n                prev = self._state_stack[-2]\n                if prev.type != _StateEnum.InObjectExpectingKey:\n                    self._progressed = True\n            else:\n                self._progressed = True\n\n        if state.type != _StateEnum.InString:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(JsonTokenType.StringMiddle)} token when not in string\"\n            )\n\n        assert isinstance(state.value, str)\n        state.value += value\n\n        parent_state = self._state_stack[-2] if len(self._state_stack) >= 2 else None\n        self._update_string_parent(state.value, parent_state)\n\n    def handle_string_end(self) -> None:\n        \"\"\"Handle string end token\"\"\"\n        state = self._current_state()\n\n        if state.type != _StateEnum.InString:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(JsonTokenType.StringEnd)} token when not in string\"\n            )\n\n        self._state_stack.pop()\n        parent_state = self._state_stack[-1] if self._state_stack else None\n        assert isinstance(state.value, str)\n        self._update_string_parent(state.value, parent_state)\n\n    def handle_array_start(self) -> None:\n        \"\"\"Handle array start token\"\"\"\n        self._handle_value_token(JsonTokenType.ArrayStart, None)\n\n    def handle_array_end(self) -> None:\n        \"\"\"Handle array end token\"\"\"\n        state = self._current_state()\n        if state.type != _StateEnum.InArray:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(JsonTokenType.ArrayEnd)} token\"\n            )\n        self._state_stack.pop()\n\n    def handle_object_start(self) -> None:\n        \"\"\"Handle object start token\"\"\"\n        self._handle_value_token(JsonTokenType.ObjectStart, None)\n\n    def handle_object_end(self) -> None:\n        \"\"\"Handle object end token\"\"\"\n        state = self._current_state()\n\n        if state.type in (\n            _StateEnum.InObjectExpectingKey,\n            _StateEnum.InObjectExpectingValue,\n        ):\n            self._state_stack.pop()\n        else:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(JsonTokenType.ObjectEnd)} token\"\n            )\n\n    # Private helper methods\n\n    def _current_state(self) -> _State:\n        \"\"\"Get current parser state\"\"\"\n        if not self._state_stack:\n            raise ValueError(\"Unexpected trailing input\")\n        return self._state_stack[-1]\n\n    def _handle_value_token(self, token_type: JsonTokenType, value: JsonValue) -> None:\n        \"\"\"Handle a complete value token\"\"\"\n        state = self._current_state()\n\n        if not self._progressed:\n            self._progressed = True\n\n        if state.type == _StateEnum.Initial:\n            self._state_stack.pop()\n            self._toplevel_value = self._progress_value(token_type, value)\n\n        elif state.type == _StateEnum.InArray:\n            v = self._progress_value(token_type, value)\n            arr = cast(list[JsonValue], state.value)\n            arr.append(v)\n\n        elif state.type == _StateEnum.InObjectExpectingValue:\n            key, obj = cast(tuple[str, JsonObject], state.value)\n            if token_type != JsonTokenType.StringStart:\n                self._state_stack.pop()\n                new_state = _InObjectExpectingKeyState()\n                new_state.value = obj\n                self._state_stack.append(new_state)\n\n            v = self._progress_value(token_type, value)\n            obj[key] = v\n\n        elif state.type == _StateEnum.InString:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(token_type)} token in the middle of string\"\n            )\n\n        elif state.type == _StateEnum.InObjectExpectingKey:\n            raise ValueError(\n                f\"Unexpected {json_token_type_to_string(token_type)} token in the middle of object expecting key\"\n            )\n\n    def _update_string_parent(self, updated: str, parent_state: _State | None) -> None:\n        \"\"\"Update parent container with updated string value\"\"\"\n        if parent_state is None:\n            self._toplevel_value = updated\n\n        elif parent_state.type == _StateEnum.InArray:\n            arr = cast(list[JsonValue], parent_state.value)\n            arr[-1] = updated\n\n        elif parent_state.type == _StateEnum.InObjectExpectingValue:\n            key, obj = cast(tuple[str, JsonObject], parent_state.value)\n            obj[key] = updated\n            if self._state_stack and self._state_stack[-1] == parent_state:\n                self._state_stack.pop()\n                new_state = _InObjectExpectingKeyState()\n                new_state.value = obj\n                self._state_stack.append(new_state)\n\n        elif parent_state.type == _StateEnum.InObjectExpectingKey:\n            if self._state_stack and self._state_stack[-1] == parent_state:\n                self._state_stack.pop()\n                obj = cast(JsonObject, parent_state.value)\n                self._state_stack.append(_InObjectExpectingValueState(updated, obj))\n\n    def _progress_value(self, token_type: JsonTokenType, value: JsonValue) -> JsonValue:\n        \"\"\"Create initial value for a token and push appropriate state\"\"\"\n        if token_type == JsonTokenType.Null:\n            return None\n\n        elif token_type == JsonTokenType.Boolean:\n            return value\n\n        elif token_type == JsonTokenType.Number:\n            return value\n\n        elif token_type == JsonTokenType.StringStart:\n            string_state = _InStringState()\n            self._state_stack.append(string_state)\n            return \"\"\n\n        elif token_type == JsonTokenType.ArrayStart:\n            array_state = _InArrayState()\n            self._state_stack.append(array_state)\n            return array_state.value\n\n        elif token_type == JsonTokenType.ObjectStart:\n            object_state = _InObjectExpectingKeyState()\n            self._state_stack.append(object_state)\n            return object_state.value\n\n        else:\n            raise ValueError(\n                f\"Unexpected token type: {json_token_type_to_string(token_type)}\"\n            )\n"
  },
  {
    "path": "backend/onyx/utils/jsonriver/tokenize.py",
    "content": "\"\"\"\nJSON tokenizer for streaming incremental parsing\n\nCopyright (c) 2023 Google LLC (original TypeScript implementation)\nCopyright (c) 2024 jsonriver-python contributors (Python port)\nSPDX-License-Identifier: BSD-3-Clause\n\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom enum import IntEnum\nfrom typing import Protocol\n\n\nclass TokenHandler(Protocol):\n    \"\"\"Protocol for handling JSON tokens\"\"\"\n\n    def handle_null(self) -> None: ...\n    def handle_boolean(self, value: bool) -> None: ...\n    def handle_number(self, value: float) -> None: ...\n    def handle_string_start(self) -> None: ...\n    def handle_string_middle(self, value: str) -> None: ...\n    def handle_string_end(self) -> None: ...\n    def handle_array_start(self) -> None: ...\n    def handle_array_end(self) -> None: ...\n    def handle_object_start(self) -> None: ...\n    def handle_object_end(self) -> None: ...\n\n\nclass JsonTokenType(IntEnum):\n    \"\"\"Types of JSON tokens\"\"\"\n\n    Null = 0\n    Boolean = 1\n    Number = 2\n    StringStart = 3\n    StringMiddle = 4\n    StringEnd = 5\n    ArrayStart = 6\n    ArrayEnd = 7\n    ObjectStart = 8\n    ObjectEnd = 9\n\n\ndef json_token_type_to_string(token_type: JsonTokenType) -> str:\n    \"\"\"Convert token type to readable string\"\"\"\n    names = {\n        JsonTokenType.Null: \"null\",\n        JsonTokenType.Boolean: \"boolean\",\n        JsonTokenType.Number: \"number\",\n        JsonTokenType.StringStart: \"string start\",\n        JsonTokenType.StringMiddle: \"string middle\",\n        JsonTokenType.StringEnd: \"string end\",\n        JsonTokenType.ArrayStart: \"array start\",\n        JsonTokenType.ArrayEnd: \"array end\",\n        JsonTokenType.ObjectStart: \"object start\",\n        JsonTokenType.ObjectEnd: \"object end\",\n    }\n    return names[token_type]\n\n\nclass _State(IntEnum):\n    \"\"\"Internal tokenizer states\"\"\"\n\n    ExpectingValue = 0\n    InString = 1\n    StartArray = 2\n    AfterArrayValue = 3\n    StartObject = 4\n    AfterObjectKey = 5\n    AfterObjectValue = 6\n    BeforeObjectKey = 7\n\n\n# Regex for validating JSON numbers\n_JSON_NUMBER_PATTERN = re.compile(r\"^-?(0|[1-9]\\d*)(\\.\\d+)?([eE][+-]?\\d+)?$\")\n\n\ndef _parse_json_number(s: str) -> float:\n    \"\"\"Parse a JSON number string, validating format\"\"\"\n    if not _JSON_NUMBER_PATTERN.match(s):\n        raise ValueError(\"Invalid number\")\n    return float(s)\n\n\nclass _Input:\n    \"\"\"\n    Input buffer for chunk-based JSON parsing\n\n    Manages buffering of input chunks and provides methods for\n    consuming and inspecting the buffer.\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._buffer = \"\"\n        self._start_index = 0\n        self.buffer_complete = False\n\n    def feed(self, chunk: str) -> None:\n        \"\"\"Add a chunk of data to the buffer\"\"\"\n        self._buffer += chunk\n\n    def mark_complete(self) -> None:\n        \"\"\"Signal that no more chunks will be fed\"\"\"\n        self.buffer_complete = True\n\n    @property\n    def length(self) -> int:\n        \"\"\"Number of characters remaining in buffer\"\"\"\n        return len(self._buffer) - self._start_index\n\n    def advance(self, length: int) -> None:\n        \"\"\"Advance the start position by length characters\"\"\"\n        self._start_index += length\n\n    def peek(self, offset: int) -> str | None:\n        \"\"\"Peek at character at offset, or None if not available\"\"\"\n        idx = self._start_index + offset\n        if idx < len(self._buffer):\n            return self._buffer[idx]\n        return None\n\n    def peek_char_code(self, offset: int) -> int:\n        \"\"\"Get character code at offset\"\"\"\n        return ord(self._buffer[self._start_index + offset])\n\n    def slice(self, start: int, end: int) -> str:\n        \"\"\"Slice buffer from start to end (relative to current position)\"\"\"\n        return self._buffer[self._start_index + start : self._start_index + end]\n\n    def commit(self) -> None:\n        \"\"\"Commit consumed content, removing it from buffer\"\"\"\n        if self._start_index > 0:\n            self._buffer = self._buffer[self._start_index :]\n            self._start_index = 0\n\n    def remaining(self) -> str:\n        \"\"\"Get all remaining content in buffer\"\"\"\n        return self._buffer[self._start_index :]\n\n    def expect_end_of_content(self) -> None:\n        \"\"\"Verify no non-whitespace content remains\"\"\"\n        self.commit()\n        self.skip_past_whitespace()\n        if self.length != 0:\n            raise ValueError(f\"Unexpected trailing content {self.remaining()!r}\")\n\n    def skip_past_whitespace(self) -> None:\n        \"\"\"Skip whitespace characters\"\"\"\n        i = self._start_index\n        while i < len(self._buffer):\n            c = ord(self._buffer[i])\n            if c in (32, 9, 10, 13):  # space, tab, \\n, \\r\n                i += 1\n            else:\n                break\n        self._start_index = i\n\n    def try_to_take_prefix(self, prefix: str) -> bool:\n        \"\"\"Try to consume prefix from buffer, return True if successful\"\"\"\n        if self._buffer.startswith(prefix, self._start_index):\n            self._start_index += len(prefix)\n            return True\n        return False\n\n    def try_to_take(self, length: int) -> str | None:\n        \"\"\"Try to take length characters, or None if not enough available\"\"\"\n        if self.length < length:\n            return None\n        result = self._buffer[self._start_index : self._start_index + length]\n        self._start_index += length\n        return result\n\n    def try_to_take_char_code(self) -> int | None:\n        \"\"\"Try to take a single character as char code, or None if buffer empty\"\"\"\n        if self.length == 0:\n            return None\n        code = ord(self._buffer[self._start_index])\n        self._start_index += 1\n        return code\n\n    def take_until_quote_or_backslash(self) -> tuple[str, bool]:\n        \"\"\"\n        Consume input up to first quote or backslash\n\n        Returns tuple of (consumed_content, pattern_found)\n        \"\"\"\n        buf = self._buffer\n        i = self._start_index\n        while i < len(buf):\n            c = ord(buf[i])\n            if c <= 0x1F:\n                raise ValueError(\"Unescaped control character in string\")\n            if c == 34 or c == 92:  # \" or \\\n                result = buf[self._start_index : i]\n                self._start_index = i\n                return (result, True)\n            i += 1\n\n        result = buf[self._start_index :]\n        self._start_index = len(buf)\n        return (result, False)\n\n\nclass Tokenizer:\n    \"\"\"\n    Tokenizer for chunk-based JSON parsing\n\n    Processes chunks fed into its input buffer and calls handler methods\n    as JSON tokens are recognized.\n    \"\"\"\n\n    def __init__(self, input: _Input, handler: TokenHandler) -> None:\n        self.input = input\n        self._handler = handler\n        self._stack: list[_State] = [_State.ExpectingValue]\n        self._emitted_tokens = 0\n\n    def is_done(self) -> bool:\n        \"\"\"Check if tokenization is complete\"\"\"\n        return len(self._stack) == 0 and self.input.length == 0\n\n    def pump(self) -> None:\n        \"\"\"Process all available tokens in the buffer\"\"\"\n        while True:\n            before = self._emitted_tokens\n            self._tokenize_more()\n            if self._emitted_tokens == before:\n                self.input.commit()\n                return\n\n    def _tokenize_more(self) -> None:\n        \"\"\"Process one step of tokenization based on current state\"\"\"\n        if not self._stack:\n            return\n\n        state = self._stack[-1]\n\n        if state == _State.ExpectingValue:\n            self._tokenize_value()\n        elif state == _State.InString:\n            self._tokenize_string()\n        elif state == _State.StartArray:\n            self._tokenize_array_start()\n        elif state == _State.AfterArrayValue:\n            self._tokenize_after_array_value()\n        elif state == _State.StartObject:\n            self._tokenize_object_start()\n        elif state == _State.AfterObjectKey:\n            self._tokenize_after_object_key()\n        elif state == _State.AfterObjectValue:\n            self._tokenize_after_object_value()\n        elif state == _State.BeforeObjectKey:\n            self._tokenize_before_object_key()\n\n    def _tokenize_value(self) -> None:\n        \"\"\"Tokenize a JSON value\"\"\"\n        self.input.skip_past_whitespace()\n\n        if self.input.try_to_take_prefix(\"null\"):\n            self._handler.handle_null()\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n\n        if self.input.try_to_take_prefix(\"true\"):\n            self._handler.handle_boolean(True)\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n\n        if self.input.try_to_take_prefix(\"false\"):\n            self._handler.handle_boolean(False)\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n\n        if self.input.length > 0:\n            ch = self.input.peek_char_code(0)\n            if (48 <= ch <= 57) or ch == 45:  # 0-9 or -\n                # Scan for end of number\n                i = 0\n                while i < self.input.length:\n                    c = self.input.peek_char_code(i)\n                    if (48 <= c <= 57) or c in (45, 43, 46, 101, 69):  # 0-9 - + . e E\n                        i += 1\n                    else:\n                        break\n\n                if i == self.input.length and not self.input.buffer_complete:\n                    # Need more input (numbers have no terminator)\n                    return\n\n                number_chars = self.input.slice(0, i)\n                self.input.advance(i)\n                number = _parse_json_number(number_chars)\n                self._handler.handle_number(number)\n                self._emitted_tokens += 1\n                self._stack.pop()\n                return\n\n        if self.input.try_to_take_prefix('\"'):\n            self._stack.pop()\n            self._stack.append(_State.InString)\n            self._handler.handle_string_start()\n            self._emitted_tokens += 1\n            self._tokenize_string()\n            return\n\n        if self.input.try_to_take_prefix(\"[\"):\n            self._stack.pop()\n            self._stack.append(_State.StartArray)\n            self._handler.handle_array_start()\n            self._emitted_tokens += 1\n            self._tokenize_array_start()\n            return\n\n        if self.input.try_to_take_prefix(\"{\"):\n            self._stack.pop()\n            self._stack.append(_State.StartObject)\n            self._handler.handle_object_start()\n            self._emitted_tokens += 1\n            self._tokenize_object_start()\n            return\n\n    def _tokenize_string(self) -> None:\n        \"\"\"Tokenize string content\"\"\"\n        while True:\n            chunk, interrupted = self.input.take_until_quote_or_backslash()\n            if chunk:\n                self._handler.handle_string_middle(chunk)\n                self._emitted_tokens += 1\n            elif not interrupted:\n                return\n\n            if interrupted:\n                if self.input.length == 0:\n                    return\n\n                next_char = self.input.peek(0)\n                if next_char == '\"':\n                    self.input.advance(1)\n                    self._handler.handle_string_end()\n                    self._emitted_tokens += 1\n                    self._stack.pop()\n                    return\n\n                # Handle escape sequences\n                next_char2 = self.input.peek(1)\n                if next_char2 is None:\n                    return\n\n                value: str\n                if next_char2 == \"u\":\n                    # Unicode escape: need 4 hex digits\n                    if self.input.length < 6:\n                        return\n\n                    code = 0\n                    for j in range(2, 6):\n                        c = self.input.peek_char_code(j)\n                        if 48 <= c <= 57:  # 0-9\n                            digit = c - 48\n                        elif 65 <= c <= 70:  # A-F\n                            digit = c - 55\n                        elif 97 <= c <= 102:  # a-f\n                            digit = c - 87\n                        else:\n                            raise ValueError(\"Bad Unicode escape in JSON\")\n                        code = (code << 4) | digit\n\n                    self.input.advance(6)\n                    self._handler.handle_string_middle(chr(code))\n                    self._emitted_tokens += 1\n                    continue\n\n                elif next_char2 == \"n\":\n                    value = \"\\n\"\n                elif next_char2 == \"r\":\n                    value = \"\\r\"\n                elif next_char2 == \"t\":\n                    value = \"\\t\"\n                elif next_char2 == \"b\":\n                    value = \"\\b\"\n                elif next_char2 == \"f\":\n                    value = \"\\f\"\n                elif next_char2 == \"\\\\\":\n                    value = \"\\\\\"\n                elif next_char2 == \"/\":\n                    value = \"/\"\n                elif next_char2 == '\"':\n                    value = '\"'\n                else:\n                    raise ValueError(\"Bad escape in string\")\n\n                self.input.advance(2)\n                self._handler.handle_string_middle(value)\n                self._emitted_tokens += 1\n\n    def _tokenize_array_start(self) -> None:\n        \"\"\"Tokenize start of array (check for empty or first element)\"\"\"\n        self.input.skip_past_whitespace()\n        if self.input.length == 0:\n            return\n\n        if self.input.try_to_take_prefix(\"]\"):\n            self._handler.handle_array_end()\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n\n        self._stack.pop()\n        self._stack.append(_State.AfterArrayValue)\n        self._stack.append(_State.ExpectingValue)\n        self._tokenize_value()\n\n    def _tokenize_after_array_value(self) -> None:\n        \"\"\"Tokenize after an array value (expect , or ])\"\"\"\n        self.input.skip_past_whitespace()\n        next_char = self.input.try_to_take_char_code()\n\n        if next_char is None:\n            return\n        elif next_char == 0x5D:  # ]\n            self._handler.handle_array_end()\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n        elif next_char == 0x2C:  # ,\n            self._stack.append(_State.ExpectingValue)\n            self._tokenize_value()\n            return\n        else:\n            raise ValueError(f\"Expected , or ], got {chr(next_char)!r}\")\n\n    def _tokenize_object_start(self) -> None:\n        \"\"\"Tokenize start of object (check for empty or first key)\"\"\"\n        self.input.skip_past_whitespace()\n        next_char = self.input.try_to_take_char_code()\n\n        if next_char is None:\n            return\n        elif next_char == 0x7D:  # }\n            self._handler.handle_object_end()\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n        elif next_char == 0x22:  # \"\n            self._stack.pop()\n            self._stack.append(_State.AfterObjectKey)\n            self._stack.append(_State.InString)\n            self._handler.handle_string_start()\n            self._emitted_tokens += 1\n            self._tokenize_string()\n            return\n        else:\n            raise ValueError(f\"Expected start of object key, got {chr(next_char)!r}\")\n\n    def _tokenize_after_object_key(self) -> None:\n        \"\"\"Tokenize after object key (expect :)\"\"\"\n        self.input.skip_past_whitespace()\n        next_char = self.input.try_to_take_char_code()\n\n        if next_char is None:\n            return\n        elif next_char == 0x3A:  # :\n            self._stack.pop()\n            self._stack.append(_State.AfterObjectValue)\n            self._stack.append(_State.ExpectingValue)\n            self._tokenize_value()\n            return\n        else:\n            raise ValueError(f\"Expected colon after object key, got {chr(next_char)!r}\")\n\n    def _tokenize_after_object_value(self) -> None:\n        \"\"\"Tokenize after object value (expect , or })\"\"\"\n        self.input.skip_past_whitespace()\n        next_char = self.input.try_to_take_char_code()\n\n        if next_char is None:\n            return\n        elif next_char == 0x7D:  # }\n            self._handler.handle_object_end()\n            self._emitted_tokens += 1\n            self._stack.pop()\n            return\n        elif next_char == 0x2C:  # ,\n            self._stack.pop()\n            self._stack.append(_State.BeforeObjectKey)\n            self._tokenize_before_object_key()\n            return\n        else:\n            raise ValueError(\n                f\"Expected , or }} after object value, got {chr(next_char)!r}\"\n            )\n\n    def _tokenize_before_object_key(self) -> None:\n        \"\"\"Tokenize before object key (after comma)\"\"\"\n        self.input.skip_past_whitespace()\n        next_char = self.input.try_to_take_char_code()\n\n        if next_char is None:\n            return\n        elif next_char == 0x22:  # \"\n            self._stack.pop()\n            self._stack.append(_State.AfterObjectKey)\n            self._stack.append(_State.InString)\n            self._handler.handle_string_start()\n            self._emitted_tokens += 1\n            self._tokenize_string()\n            return\n        else:\n            raise ValueError(f\"Expected start of object key, got {chr(next_char)!r}\")\n"
  },
  {
    "path": "backend/onyx/utils/logger.py",
    "content": "import contextvars\nimport logging\nimport os\nfrom collections.abc import MutableMapping\nfrom logging.handlers import RotatingFileHandler\nfrom typing import Any\n\nfrom onyx.utils.tenant import get_tenant_id_short_string\nfrom shared_configs.configs import DEV_LOGGING_ENABLED\nfrom shared_configs.configs import LOG_FILE_NAME\nfrom shared_configs.configs import LOG_LEVEL\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.configs import SLACK_CHANNEL_ID\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR\nfrom shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR\n\n\nlogging.addLevelName(logging.INFO + 5, \"NOTICE\")\n\npruning_ctx: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar(\n    \"pruning_ctx\", default=dict()\n)\n\ndoc_permission_sync_ctx: contextvars.ContextVar[dict[str, Any]] = (\n    contextvars.ContextVar(\"doc_permission_sync_ctx\", default=dict())\n)\n\n\nclass LoggerContextVars:\n    @staticmethod\n    def reset() -> None:\n        pruning_ctx.set(dict())\n        doc_permission_sync_ctx.set(dict())\n\n\ndef get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:\n    log_level_dict = {\n        \"CRITICAL\": logging.CRITICAL,\n        \"ERROR\": logging.ERROR,\n        \"WARNING\": logging.WARNING,\n        \"NOTICE\": logging.getLevelName(\"NOTICE\"),\n        \"INFO\": logging.INFO,\n        \"DEBUG\": logging.DEBUG,\n        \"NOTSET\": logging.NOTSET,\n    }\n\n    return log_level_dict.get(log_level_str.upper(), logging.INFO)\n\n\nclass OnyxRequestIDFilter(logging.Filter):\n    def filter(self, record: logging.LogRecord) -> bool:\n        from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR\n\n        record.request_id = ONYX_REQUEST_ID_CONTEXTVAR.get() or \"-\"\n        return True\n\n\nclass OnyxLoggingAdapter(logging.LoggerAdapter):\n    def process(\n        self, msg: str, kwargs: MutableMapping[str, Any]\n    ) -> tuple[str, MutableMapping[str, Any]]:\n        # If this is an indexing job, add the attempt ID to the log message\n        # This helps filter the logs for this specific indexing\n        while True:\n            pruning_ctx_dict = pruning_ctx.get()\n            if len(pruning_ctx_dict) > 0:\n                if \"request_id\" in pruning_ctx_dict:\n                    msg = f\"[Prune: {pruning_ctx_dict['request_id']}] {msg}\"\n\n                if \"cc_pair_id\" in pruning_ctx_dict:\n                    msg = f\"[CC Pair: {pruning_ctx_dict['cc_pair_id']}] {msg}\"\n                break\n\n            doc_permission_sync_ctx_dict = doc_permission_sync_ctx.get()\n            if len(doc_permission_sync_ctx_dict) > 0:\n                if \"request_id\" in doc_permission_sync_ctx_dict:\n                    msg = f\"[Doc Permissions Sync: {doc_permission_sync_ctx_dict['request_id']}] {msg}\"\n                break\n\n            index_attempt_info = INDEX_ATTEMPT_INFO_CONTEXTVAR.get()\n            if index_attempt_info:\n                cc_pair_id, index_attempt_id = index_attempt_info\n                msg = (\n                    f\"[Index Attempt: {index_attempt_id}] [CC Pair: {cc_pair_id}] {msg}\"\n                )\n\n            break\n\n        # Add tenant information if it differs from default\n        # This will always be the case for authenticated API requests\n        if MULTI_TENANT:\n            tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()\n            if tenant_id != POSTGRES_DEFAULT_SCHEMA and tenant_id is not None:\n                # Get a short string representation of the tenant id for cleaner\n                # logs.\n                short_tenant = get_tenant_id_short_string(tenant_id)\n                msg = f\"[t:{short_tenant}] {msg}\"\n\n        # request id within a fastapi route\n        fastapi_request_id = ONYX_REQUEST_ID_CONTEXTVAR.get()\n        if fastapi_request_id:\n            msg = f\"[{fastapi_request_id}] {msg}\"\n\n        # For Slack Bot, logs the channel relevant to the request\n        channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None\n        if channel_id:\n            msg = f\"[Channel ID: {channel_id}] {msg}\"\n\n        return msg, kwargs\n\n    def notice(self, msg: Any, *args: Any, **kwargs: Any) -> None:\n        # Stacklevel is set to 2 to point to the actual caller of notice instead of here\n        self.log(\n            logging.getLevelName(\"NOTICE\"), str(msg), *args, **kwargs, stacklevel=2\n        )\n\n\nclass PlainFormatter(logging.Formatter):\n    \"\"\"Adds log levels.\"\"\"\n\n    def format(self, record: logging.LogRecord) -> str:\n        levelname = record.levelname\n        level_display = f\"{levelname}:\"\n        formatted_message = super().format(record)\n        return f\"{level_display.ljust(9)} {formatted_message}\"\n\n\nclass ColoredFormatter(logging.Formatter):\n    \"\"\"Custom formatter to add colors to log levels.\"\"\"\n\n    COLORS = {\n        \"CRITICAL\": \"\\033[91m\",  # Red\n        \"ERROR\": \"\\033[91m\",  # Red\n        \"WARNING\": \"\\033[93m\",  # Yellow\n        \"NOTICE\": \"\\033[94m\",  # Blue\n        \"INFO\": \"\\033[92m\",  # Green\n        \"DEBUG\": \"\\033[96m\",  # Light Green\n        \"NOTSET\": \"\\033[91m\",  # Reset\n    }\n\n    def format(self, record: logging.LogRecord) -> str:\n        levelname = record.levelname\n        if levelname in self.COLORS:\n            prefix = self.COLORS[levelname]\n            suffix = \"\\033[0m\"\n            formatted_message = super().format(record)\n            # Ensure the levelname with colon is 9 characters long\n            # accounts for the extra characters for coloring\n            level_display = f\"{prefix}{levelname}{suffix}:\"\n            return f\"{level_display.ljust(18)} {formatted_message}\"\n        return super().format(record)\n\n\ndef get_uvicorn_standard_formatter() -> ColoredFormatter:\n    \"\"\"Returns a standard colored logging formatter.\"\"\"\n    return ColoredFormatter(\n        \"%(asctime)s %(filename)30s %(lineno)4s: [%(request_id)s] %(message)s\",\n        datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n    )\n\n\ndef get_standard_formatter() -> ColoredFormatter:\n    \"\"\"Returns a standard colored logging formatter.\"\"\"\n    return ColoredFormatter(\n        \"%(asctime)s %(filename)30s %(lineno)4s: %(message)s\",\n        datefmt=\"%m/%d/%Y %I:%M:%S %p\",\n    )\n\n\nDANSWER_DOCKER_ENV_STR = \"DANSWER_RUNNING_IN_DOCKER\"\n\n\ndef is_running_in_container() -> bool:\n    return os.getenv(DANSWER_DOCKER_ENV_STR) == \"true\"\n\n\ndef setup_logger(\n    name: str = __name__,\n    log_level: int = get_log_level_from_str(),\n    extra: MutableMapping[str, Any] | None = None,\n    propagate: bool = True,\n) -> OnyxLoggingAdapter:\n    logger = logging.getLogger(name)\n\n    # If the logger already has handlers, assume it was already configured and return it.\n    if logger.handlers:\n        return OnyxLoggingAdapter(logger, extra=extra)\n\n    logger.setLevel(log_level)\n\n    formatter = get_standard_formatter()\n\n    handler = logging.StreamHandler()\n    handler.setLevel(log_level)\n    handler.setFormatter(formatter)\n\n    logger.addHandler(handler)\n\n    is_containerized = is_running_in_container()\n    if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):\n        log_levels = [\"debug\", \"info\", \"notice\"]\n        for level in log_levels:\n            file_name = (\n                f\"/var/log/onyx/{LOG_FILE_NAME}_{level}.log\"\n                if is_containerized\n                else f\"./log/{LOG_FILE_NAME}_{level}.log\"\n            )\n            # Ensure the log directory exists\n            log_dir = os.path.dirname(file_name)\n            if not os.path.exists(log_dir):\n                os.makedirs(log_dir, exist_ok=True)\n\n            # Truncate log file if DEV_LOGGING_ENABLED (for clean dev experience)\n            if DEV_LOGGING_ENABLED and os.path.exists(file_name):\n                try:\n                    open(file_name, \"w\").close()  # Truncate the file\n                except Exception:\n                    pass  # Ignore errors, just proceed with normal logging\n\n            file_handler = RotatingFileHandler(\n                file_name,\n                maxBytes=25 * 1024 * 1024,  # 25 MB\n                backupCount=5,  # Keep 5 backup files\n            )\n            file_handler.setLevel(get_log_level_from_str(level))\n            file_handler.setFormatter(formatter)\n            logger.addHandler(file_handler)\n\n    logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName(\"NOTICE\"), msg, *args, **kwargs)  # type: ignore\n\n    # After handler configuration, disable propagation to avoid duplicate logs\n    # Prevent messages from propagating to the root logger which can cause\n    # duplicate log entries when the root logger is also configured with its\n    # own handler (e.g. by Uvicorn / Celery).\n    logger.propagate = propagate\n\n    return OnyxLoggingAdapter(logger, extra=extra)\n\n\ndef setup_uvicorn_logger(\n    log_level: int = get_log_level_from_str(),\n    shared_file_handlers: list[logging.FileHandler] | None = None,\n) -> None:\n    uvicorn_logger = logging.getLogger(\"uvicorn.access\")\n    if not uvicorn_logger:\n        return\n\n    formatter = get_uvicorn_standard_formatter()\n\n    handler = logging.StreamHandler()\n    handler.setLevel(log_level)\n    handler.setFormatter(formatter)\n\n    uvicorn_logger.handlers = []\n    uvicorn_logger.addHandler(handler)\n    uvicorn_logger.setLevel(log_level)\n    uvicorn_logger.addFilter(OnyxRequestIDFilter())\n\n    if shared_file_handlers:\n        for fh in shared_file_handlers:\n            uvicorn_logger.addHandler(fh)\n\n    return\n\n\ndef print_loggers() -> None:\n    \"\"\"Print information about all loggers. Use to debug logging issues.\"\"\"\n    root_logger = logging.getLogger()\n    loggers: list[logging.Logger | logging.PlaceHolder] = [root_logger]\n    loggers.extend(logging.Logger.manager.loggerDict.values())\n\n    for logger in loggers:\n        if isinstance(logger, logging.PlaceHolder):\n            # Skip placeholders that aren't actual loggers\n            continue\n\n        print(f\"Logger: '{logger.name}' (Level: {logging.getLevelName(logger.level)})\")\n        if logger.handlers:\n            for handler in logger.handlers:\n                print(f\"  Handler: {handler}\")\n        else:\n            print(\"  No handlers\")\n\n        print(f\"  Propagate: {logger.propagate}\")\n        print()\n\n\ndef format_error_for_logging(e: Exception) -> str:\n    \"\"\"Clean error message by removing newlines for better logging.\"\"\"\n    return str(e).replace(\"\\n\", \" \")\n"
  },
  {
    "path": "backend/onyx/utils/long_term_log.py",
    "content": "import json\nimport os\nimport threading\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any\n\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.special_types import JSON_ro\n\nlogger = setup_logger()\n\n_LOG_FILE_NAME_TIMESTAMP_FORMAT = \"%Y-%m-%d_%H-%M-%S-%f\"\n\n\n# NOTE: This is no longer used but keeping it around in case it's reintroduced\nclass LongTermLogger:\n    \"\"\"NOTE: should support a LOT of data AND should be extremely fast,\n    ideally done in a background thread.\"\"\"\n\n    def __init__(\n        self,\n        metadata: dict[str, str] | None = None,\n        log_file_path: str = \"/tmp/long_term_log\",\n        max_files_per_category: int = 1000,\n    ):\n        self.metadata = metadata\n        self.log_file_path = Path(log_file_path)\n        self.max_files_per_category = max_files_per_category\n        try:\n            # Create directory if it doesn't exist\n            os.makedirs(os.path.dirname(log_file_path), exist_ok=True)\n        except Exception:\n            # logger.error(f\"Error creating directory for long-term logs: {e}\")\n            pass\n\n    def _cleanup_old_files(self, category_path: Path) -> None:\n        try:\n            files = sorted(\n                [f for f in category_path.glob(\"*.json\")],\n                key=lambda x: x.stat().st_mtime,  # Sort by modification time\n                reverse=True,\n            )\n\n            # Delete oldest files that exceed the limit\n            for file in files[self.max_files_per_category :]:\n                if not file.is_file():\n                    logger.debug(f\"File already deleted: {file}\")\n                    continue\n                try:\n                    file.unlink()\n                except Exception:\n                    pass\n                    # logger.error(f\"Error deleting old log file {file\n                    # }: {e}\")\n        except Exception:\n            pass\n            # logger.error(f\"Error during log rotation cleanup: {e}\")\n\n    def _record(self, message: Any, category: str) -> None:\n        category_path = self.log_file_path / category\n        try:\n            # Create directory if it doesn't exist\n            os.makedirs(category_path, exist_ok=True)\n\n            # Perform cleanup before writing new file\n            self._cleanup_old_files(category_path)\n\n            final_record = {\n                \"metadata\": self.metadata,\n                \"record\": message,\n            }\n\n            file_path = (\n                category_path\n                / f\"{datetime.now().strftime(_LOG_FILE_NAME_TIMESTAMP_FORMAT)}.json\"\n            )\n            with open(file_path, \"w+\") as f:\n                # default allows us to \"ignore\" unserializable objects\n                json.dump(final_record, f, default=lambda x: str(x))\n        except Exception:\n            # logger.error(f\"Error recording log: {e}\")\n            pass\n\n    def record(self, message: JSON_ro, category: str = \"default\") -> None:\n        try:\n            # Run in separate thread to have minimal overhead in main flows\n            thread = threading.Thread(\n                target=self._record, args=(message, category), daemon=True\n            )\n            thread.start()\n        except Exception:\n            # Should never interfere with normal functions of Onyx\n            pass\n\n    def fetch_category(\n        self,\n        category: str,\n        start_time: datetime | None = None,\n        end_time: datetime | None = None,\n        limit: int = 100,  # noqa: ARG002\n    ) -> list[JSON_ro]:\n        category_path = self.log_file_path / category\n        files = list(category_path.glob(\"*.json\"))\n\n        results: list[JSON_ro] = []\n        for file in files:\n            # Parse timestamp from filename (YYYY-MM-DD_HH-MM-SS.json)\n            try:\n                file_time = datetime.strptime(\n                    file.stem, _LOG_FILE_NAME_TIMESTAMP_FORMAT\n                )\n\n                # Skip if outside time range\n                if start_time and file_time < start_time:\n                    continue\n                if end_time and file_time > end_time:\n                    continue\n\n                results.append(json.loads(file.read_text()))\n            except ValueError:\n                # Skip files that don't match expected format\n                continue\n\n        return results\n"
  },
  {
    "path": "backend/onyx/utils/memory_logger.py",
    "content": "# # leaving this here for future mem debugging efforts\n# import os\n# from typing import Any\n\n# import psutil\n# from pympler import asizeof\n\n# from onyx.utils.logger import setup_logger\n\n# logger = setup_logger()\n\n#\n# def log_memory_usage(\n#     label: str,\n#     specific_object: Any = None,\n#     object_label: str = \"\",\n# ) -> None:\n#     \"\"\"Log current process memory usage and optionally the size of a specific object.\n\n#     Args:\n#         label: A descriptive label for the current location/operation in code\n#         specific_object: Optional object to measure the size of\n#         object_label: Optional label describing the specific object\n#     \"\"\"\n#     try:\n#         # Get current process memory info\n#         process = psutil.Process(os.getpid())\n#         memory_info = process.memory_info()\n\n#         # Convert to MB for readability\n#         rss_mb = memory_info.rss / (1024 * 1024)\n#         vms_mb = memory_info.vms / (1024 * 1024)\n\n#         log_parts = [f\"MEMORY[{label}]\", f\"RSS: {rss_mb:.2f}MB\", f\"VMS: {vms_mb:.2f}MB\"]\n\n#         # Add object size if provided\n#         if specific_object is not None:\n#             try:\n#                 # recursively calculate the size of the object\n#                 obj_size = asizeof.asizeof(specific_object)\n#                 obj_size_mb = obj_size / (1024 * 1024)\n#                 obj_desc = f\"[{object_label}]\" if object_label else \"[object]\"\n#                 log_parts.append(f\"OBJ{obj_desc}: {obj_size_mb:.2f}MB\")\n#             except Exception as e:\n#                 log_parts.append(f\"OBJ_SIZE_ERROR: {str(e)}\")\n\n#         logger.info(\" | \".join(log_parts))\n\n#     except Exception as e:\n#         logger.warning(f\"Failed to log memory usage for {label}: {str(e)}\")\n\n# For example, use this like:\n# log_memory_usage(\"my_operation\", my_large_object, \"my_large_object\")\n"
  },
  {
    "path": "backend/onyx/utils/middleware.py",
    "content": "import base64\nimport hashlib\nimport logging\nimport re\nimport uuid\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi import Response\nfrom fastapi.routing import APIRoute\n\nfrom shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR\n\n\ndef add_onyx_tenant_id_middleware(\n    app: FastAPI,\n    logger: logging.LoggerAdapter,  # noqa: ARG001\n) -> None:\n    @app.middleware(\"http\")\n    async def set_tenant_id(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        \"\"\"Captures and sets the context var for the tenant.\"\"\"\n\n        onyx_tenant_id = request.headers.get(\"X-Onyx-Tenant-ID\")\n        if onyx_tenant_id:\n            CURRENT_TENANT_ID_CONTEXTVAR.set(onyx_tenant_id)\n        return await call_next(request)\n\n\ndef add_onyx_request_id_middleware(\n    app: FastAPI,\n    prefix: str,\n    logger: logging.LoggerAdapter,  # noqa: ARG001\n) -> None:\n    @app.middleware(\"http\")\n    async def set_request_id(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        \"\"\"Generate a request hash that can be used to track the lifecycle\n        of a request.  The hash is prefixed to help indicated where the request id\n        originated.\n\n        Format is f\"{PREFIX}:{ID}\" where PREFIX is 3 chars and ID is 8 chars.\n        Total length is 12 chars.\n        \"\"\"\n\n        onyx_request_id = request.headers.get(\"X-Onyx-Request-ID\")\n        if not onyx_request_id:\n            onyx_request_id = make_randomized_onyx_request_id(prefix)\n\n        ONYX_REQUEST_ID_CONTEXTVAR.set(onyx_request_id)\n        return await call_next(request)\n\n\ndef make_randomized_onyx_request_id(prefix: str) -> str:\n    \"\"\"generates a randomized request id\"\"\"\n\n    hash_input = str(uuid.uuid4())\n    return _make_onyx_request_id(prefix, hash_input)\n\n\ndef make_structured_onyx_request_id(prefix: str, request_url: str) -> str:\n    \"\"\"Not used yet, but could be in the future!\"\"\"\n    hash_input = f\"{request_url}:{datetime.now(timezone.utc)}\"\n    return _make_onyx_request_id(prefix, hash_input)\n\n\ndef _make_onyx_request_id(prefix: str, hash_input: str) -> str:\n    \"\"\"helper function to return an id given a string input\"\"\"\n    hash_obj = hashlib.md5(hash_input.encode(\"utf-8\"), usedforsecurity=False)\n    hash_bytes = hash_obj.digest()[:6]  # Truncate to 6 bytes\n\n    # 6 bytes becomes 8 bytes. we shouldn't need to strip but just in case\n    # NOTE: possible we'll want more input bytes if id's aren't unique enough\n    hash_str = base64.urlsafe_b64encode(hash_bytes).decode(\"utf-8\").rstrip(\"=\")\n    onyx_request_id = f\"{prefix}:{hash_str}\"\n    return onyx_request_id\n\n\ndef _build_route_map(app: FastAPI) -> list[tuple[re.Pattern[str], str]]:\n    \"\"\"Build a list of (compiled regex, route template) from the app's routes.\n\n    Used by endpoint context middleware to resolve request paths to route\n    templates, avoiding high-cardinality raw paths in metrics labels.\n    \"\"\"\n    route_map: list[tuple[re.Pattern[str], str]] = []\n    for route in app.routes:\n        if isinstance(route, APIRoute):\n            route_map.append((route.path_regex, route.path))\n    return route_map\n\n\ndef _match_route(route_map: list[tuple[re.Pattern[str], str]], path: str) -> str | None:\n    \"\"\"Match a request path against the route map and return the template.\"\"\"\n    for pattern, template in route_map:\n        if pattern.match(path):\n            return template\n    return None\n\n\ndef add_endpoint_context_middleware(app: FastAPI) -> None:\n    \"\"\"Set CURRENT_ENDPOINT_CONTEXTVAR so Prometheus pool metrics can\n    attribute DB connections to the endpoint that checked them out.\n\n    Used by ``onyx_db_connections_held_by_endpoint`` and\n    ``onyx_db_connection_hold_seconds`` in the pool event listeners.\n\n    Resolves request paths to route templates (e.g. /api/chat/{chat_id}\n    instead of /api/chat/abc-123) to keep metric label cardinality low.\n\n    Must be registered AFTER all routes are added to the app.\n    \"\"\"\n    route_map = _build_route_map(app)\n\n    @app.middleware(\"http\")\n    async def set_endpoint_context(\n        request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        handler = _match_route(route_map, request.url.path)\n        token = CURRENT_ENDPOINT_CONTEXTVAR.set(handler or \"unmatched\")\n        try:\n            return await call_next(request)\n        finally:\n            CURRENT_ENDPOINT_CONTEXTVAR.reset(token)\n"
  },
  {
    "path": "backend/onyx/utils/object_size_check.py",
    "content": "import sys\nfrom typing import TypeVar\n\nT = TypeVar(\"T\", dict, list, tuple, set, frozenset)\n\n\ndef deep_getsizeof(obj: T, seen: set[int] | None = None) -> int:\n    \"\"\"Recursively sum size of objects, handling circular references.\"\"\"\n    if seen is None:\n        seen = set()\n\n    obj_id = id(obj)\n    if obj_id in seen:\n        return 0  # Prevent infinite recursion for circular references\n\n    seen.add(obj_id)\n    size = sys.getsizeof(obj)\n\n    if isinstance(obj, dict):\n        size += sum(\n            deep_getsizeof(k, seen) + deep_getsizeof(v, seen) for k, v in obj.items()\n        )\n    elif isinstance(obj, (list, tuple, set, frozenset)):\n        size += sum(deep_getsizeof(i, seen) for i in obj)\n\n    return size\n"
  },
  {
    "path": "backend/onyx/utils/postgres_sanitization.py",
    "content": "import re\nfrom typing import Any\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n_SURROGATE_RE = re.compile(r\"[\\ud800-\\udfff]\")\n\n\ndef sanitize_string(value: str) -> str:\n    \"\"\"Strip characters that PostgreSQL text/JSONB columns cannot store.\n\n    Removes:\n    - NUL bytes (\\\\x00)\n    - UTF-16 surrogates (\\\\ud800-\\\\udfff), which are invalid in UTF-8\n    \"\"\"\n    sanitized = value.replace(\"\\x00\", \"\")\n    sanitized = _SURROGATE_RE.sub(\"\", sanitized)\n    if value and not sanitized:\n        logger.warning(\n            \"sanitize_string: all characters were removed from a non-empty string\"\n        )\n    return sanitized\n\n\ndef sanitize_json_like(value: Any) -> Any:\n    \"\"\"Recursively sanitize all strings in a JSON-like structure (dict/list/tuple).\"\"\"\n    if isinstance(value, str):\n        return sanitize_string(value)\n\n    if isinstance(value, list):\n        return [sanitize_json_like(item) for item in value]\n\n    if isinstance(value, tuple):\n        return tuple(sanitize_json_like(item) for item in value)\n\n    if isinstance(value, dict):\n        sanitized: dict[Any, Any] = {}\n        for key, nested_value in value.items():\n            cleaned_key = sanitize_string(key) if isinstance(key, str) else key\n            sanitized[cleaned_key] = sanitize_json_like(nested_value)\n        return sanitized\n\n    return value\n\n\ndef _sanitize_expert_info(expert: BasicExpertInfo) -> BasicExpertInfo:\n    return expert.model_copy(\n        update={\n            \"display_name\": (\n                sanitize_string(expert.display_name)\n                if expert.display_name is not None\n                else None\n            ),\n            \"first_name\": (\n                sanitize_string(expert.first_name)\n                if expert.first_name is not None\n                else None\n            ),\n            \"middle_initial\": (\n                sanitize_string(expert.middle_initial)\n                if expert.middle_initial is not None\n                else None\n            ),\n            \"last_name\": (\n                sanitize_string(expert.last_name)\n                if expert.last_name is not None\n                else None\n            ),\n            \"email\": (\n                sanitize_string(expert.email) if expert.email is not None else None\n            ),\n        }\n    )\n\n\ndef _sanitize_external_access(external_access: ExternalAccess) -> ExternalAccess:\n    return ExternalAccess(\n        external_user_emails={\n            sanitize_string(email) for email in external_access.external_user_emails\n        },\n        external_user_group_ids={\n            sanitize_string(group_id)\n            for group_id in external_access.external_user_group_ids\n        },\n        is_public=external_access.is_public,\n    )\n\n\ndef sanitize_document_for_postgres(document: Document) -> Document:\n    cleaned_doc = document.model_copy(deep=True)\n\n    cleaned_doc.id = sanitize_string(cleaned_doc.id)\n    cleaned_doc.semantic_identifier = sanitize_string(cleaned_doc.semantic_identifier)\n    if cleaned_doc.title is not None:\n        cleaned_doc.title = sanitize_string(cleaned_doc.title)\n    if cleaned_doc.parent_hierarchy_raw_node_id is not None:\n        cleaned_doc.parent_hierarchy_raw_node_id = sanitize_string(\n            cleaned_doc.parent_hierarchy_raw_node_id\n        )\n\n    cleaned_doc.metadata = {\n        sanitize_string(key): (\n            [sanitize_string(item) for item in value]\n            if isinstance(value, list)\n            else sanitize_string(value)\n        )\n        for key, value in cleaned_doc.metadata.items()\n    }\n\n    if cleaned_doc.doc_metadata is not None:\n        cleaned_doc.doc_metadata = sanitize_json_like(cleaned_doc.doc_metadata)\n\n    if cleaned_doc.primary_owners is not None:\n        cleaned_doc.primary_owners = [\n            _sanitize_expert_info(expert) for expert in cleaned_doc.primary_owners\n        ]\n    if cleaned_doc.secondary_owners is not None:\n        cleaned_doc.secondary_owners = [\n            _sanitize_expert_info(expert) for expert in cleaned_doc.secondary_owners\n        ]\n\n    if cleaned_doc.external_access is not None:\n        cleaned_doc.external_access = _sanitize_external_access(\n            cleaned_doc.external_access\n        )\n\n    for section in cleaned_doc.sections:\n        if section.link is not None:\n            section.link = sanitize_string(section.link)\n        if section.text is not None:\n            section.text = sanitize_string(section.text)\n        if section.image_file_id is not None:\n            section.image_file_id = sanitize_string(section.image_file_id)\n\n    return cleaned_doc\n\n\ndef sanitize_documents_for_postgres(documents: list[Document]) -> list[Document]:\n    return [sanitize_document_for_postgres(document) for document in documents]\n\n\ndef sanitize_hierarchy_node_for_postgres(node: HierarchyNode) -> HierarchyNode:\n    cleaned_node = node.model_copy(deep=True)\n\n    cleaned_node.raw_node_id = sanitize_string(cleaned_node.raw_node_id)\n    cleaned_node.display_name = sanitize_string(cleaned_node.display_name)\n    if cleaned_node.raw_parent_id is not None:\n        cleaned_node.raw_parent_id = sanitize_string(cleaned_node.raw_parent_id)\n    if cleaned_node.link is not None:\n        cleaned_node.link = sanitize_string(cleaned_node.link)\n\n    if cleaned_node.external_access is not None:\n        cleaned_node.external_access = _sanitize_external_access(\n            cleaned_node.external_access\n        )\n\n    return cleaned_node\n\n\ndef sanitize_hierarchy_nodes_for_postgres(\n    nodes: list[HierarchyNode],\n) -> list[HierarchyNode]:\n    return [sanitize_hierarchy_node_for_postgres(node) for node in nodes]\n"
  },
  {
    "path": "backend/onyx/utils/pydantic_util.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel\n\n\ndef shallow_model_dump(model_instance: BaseModel) -> dict[str, Any]:\n    \"\"\"Like model_dump(), but returns references to field values instead of\n    deep copies. Use with model_construct() to avoid unnecessary memory\n    duplication when building subclass instances.\"\"\"\n    return {\n        field_name: getattr(model_instance, field_name)\n        for field_name in model_instance.__class__.model_fields\n    }\n"
  },
  {
    "path": "backend/onyx/utils/retry_wrapper.py",
    "content": "from collections.abc import Callable\nfrom logging import Logger\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nimport requests\nfrom retry import retry\n\nfrom onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nF = TypeVar(\"F\", bound=Callable[..., Any])\n\n\ndef retry_builder(\n    tries: int = 20,\n    delay: float = 0.1,\n    max_delay: float | None = 60,\n    backoff: float = 2,\n    jitter: tuple[float, float] | float = 1,\n    exceptions: type[Exception] | tuple[type[Exception], ...] = (Exception,),\n) -> Callable[[F], F]:\n    \"\"\"Builds a generic wrapper/decorator for calls to external APIs that\n    may fail due to rate limiting, flakes, or other reasons. Applies exponential\n    backoff with jitter to retry the call.\"\"\"\n\n    def retry_with_default(func: F) -> F:\n        @retry(\n            tries=tries,\n            delay=delay,\n            max_delay=max_delay,\n            backoff=backoff,\n            jitter=jitter,\n            logger=cast(Logger, logger),\n            exceptions=exceptions,\n        )\n        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:\n            return func(*args, **kwargs)\n\n        return cast(F, wrapped_func)\n\n    return retry_with_default\n\n\ndef request_with_retries(\n    method: str,\n    url: str,\n    *,\n    data: dict[str, Any] | None = None,\n    headers: dict[str, Any] | None = None,\n    params: dict[str, Any] | None = None,\n    timeout: int = REQUEST_TIMEOUT_SECONDS,\n    stream: bool = False,\n    tries: int = 8,\n    delay: float = 1,\n    backoff: float = 2,\n) -> requests.Response:\n    @retry(tries=tries, delay=delay, backoff=backoff, logger=cast(Logger, logger))\n    def _make_request() -> requests.Response:\n        response = requests.request(\n            method=method,\n            url=url,\n            data=data,\n            headers=headers,\n            params=params,\n            timeout=timeout,\n            stream=stream,\n        )\n        try:\n            response.raise_for_status()\n        except requests.exceptions.HTTPError:\n            logger.exception(\n                \"Request failed:\\n%s\",\n                {\n                    \"method\": method,\n                    \"url\": url,\n                    \"data\": data,\n                    \"headers\": headers,\n                    \"params\": params,\n                    \"timeout\": timeout,\n                    \"stream\": stream,\n                },\n            )\n            raise\n        return response\n\n    return _make_request()\n"
  },
  {
    "path": "backend/onyx/utils/search_nlp_models_utils.py",
    "content": "def pass_aws_key(api_key: str) -> tuple[str, str, str]:\n    \"\"\"Parse AWS API key string into components.\n\n    Args:\n        api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION'\n\n    Returns:\n        Tuple of (access_key, secret_key, region)\n\n    Raises:\n        ValueError: If key format is invalid\n    \"\"\"\n    if not api_key.startswith(\"aws\"):\n        raise ValueError(\"API key must start with 'aws' prefix\")\n    parts = api_key.split(\"_\")\n    if len(parts) != 4:\n        raise ValueError(\n            f\"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts. \"\n            \"This is an onyx specific format for formatting the aws secrets for bedrock\"\n        )\n\n    try:\n        _, aws_access_key_id, aws_secret_access_key, aws_region = parts\n        return aws_access_key_id, aws_secret_access_key, aws_region\n    except Exception as e:\n        raise ValueError(f\"Failed to parse AWS key components: {str(e)}\")\n"
  },
  {
    "path": "backend/onyx/utils/sensitive.py",
    "content": "\"\"\"\nWrapper class for sensitive values that require explicit masking decisions.\n\nThis module provides a wrapper for encrypted values that forces developers to\nmake an explicit decision about whether to mask the value when accessing it.\nThis prevents accidental exposure of sensitive data in API responses.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom collections.abc import Callable\nfrom typing import Any\nfrom typing import Generic\nfrom typing import NoReturn\nfrom typing import TypeVar\nfrom unittest.mock import MagicMock\n\nfrom onyx.utils.encryption import mask_credential_dict\nfrom onyx.utils.encryption import mask_string\n\n\nT = TypeVar(\"T\", str, dict[str, Any])\n\n\ndef make_mock_sensitive_value(value: dict[str, Any] | str | None) -> MagicMock:\n    \"\"\"\n    Create a mock SensitiveValue for use in tests.\n\n    This helper makes it easy to create mock objects that behave like\n    SensitiveValue for testing code that uses credentials.\n\n    Args:\n        value: The value to return from get_value(). Can be a dict, string, or None.\n\n    Returns:\n        A MagicMock configured to behave like a SensitiveValue.\n\n    Example:\n        >>> mock_credential = MagicMock()\n        >>> mock_credential.credential_json = make_mock_sensitive_value({\"api_key\": \"secret\"})\n        >>> # Now mock_credential.credential_json.get_value(apply_mask=False) returns {\"api_key\": \"secret\"}\n    \"\"\"\n    if value is None:\n        return None  # type: ignore[return-value]\n\n    mock = MagicMock(spec=SensitiveValue)\n    mock.get_value.return_value = value\n    mock.__bool__ = lambda self: True  # noqa: ARG005\n    return mock\n\n\nclass SensitiveAccessError(Exception):\n    \"\"\"Raised when attempting to access a SensitiveValue without explicit masking decision.\"\"\"\n\n\nclass SensitiveValue(Generic[T]):\n    \"\"\"\n    Wrapper requiring explicit masking decisions for sensitive data.\n\n    This class wraps encrypted data and forces callers to make an explicit\n    decision about whether to mask the value when accessing it. This prevents\n    accidental exposure of sensitive data.\n\n    Usage:\n        # Get raw value (for internal use like connectors)\n        raw_value = sensitive.get_value(apply_mask=False)\n\n        # Get masked value (for API responses)\n        masked_value = sensitive.get_value(apply_mask=True)\n\n    Raises SensitiveAccessError when:\n        - Attempting to convert to string via str() or repr()\n        - Attempting to iterate over the value\n        - Attempting to subscript the value (e.g., value[\"key\"])\n        - Attempting to serialize to JSON without explicit get_value()\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        encrypted_bytes: bytes,\n        decrypt_fn: Callable[[bytes], str],\n        is_json: bool = False,\n    ) -> None:\n        \"\"\"\n        Initialize a SensitiveValue wrapper.\n\n        Args:\n            encrypted_bytes: The encrypted bytes to wrap\n            decrypt_fn: Function to decrypt bytes to string\n            is_json: If True, the decrypted value is JSON and will be parsed to dict\n        \"\"\"\n        self._encrypted_bytes = encrypted_bytes\n        self._decrypt_fn = decrypt_fn\n        self._is_json = is_json\n        # Cache for decrypted value to avoid repeated decryption\n        self._decrypted_value: T | None = None\n\n    def _decrypt(self) -> T:\n        \"\"\"Lazily decrypt and cache the value.\"\"\"\n        if self._decrypted_value is None:\n            decrypted_str = self._decrypt_fn(self._encrypted_bytes)\n            if self._is_json:\n                self._decrypted_value = json.loads(decrypted_str)\n            else:\n                self._decrypted_value = decrypted_str  # type: ignore[assignment]\n        # The return type should always match T based on is_json flag\n        return self._decrypted_value  # type: ignore[return-value]\n\n    def get_value(\n        self,\n        *,\n        apply_mask: bool,\n        mask_fn: Callable[[T], T] | None = None,\n    ) -> T:\n        \"\"\"\n        Get the value with explicit masking decision.\n\n        Args:\n            apply_mask: Required. True = return masked value, False = return raw value\n            mask_fn: Optional custom masking function. Defaults to mask_string for\n                     strings and mask_credential_dict for dicts.\n\n        Returns:\n            The value, either masked or raw depending on apply_mask.\n        \"\"\"\n        value = self._decrypt()\n\n        if not apply_mask:\n            # Callers must not mutate the returned dict — doing so would\n            # desync the cache from the encrypted bytes and the DB.\n            return value\n\n        # Apply masking\n        if mask_fn is not None:\n            return mask_fn(value)\n\n        # Use default masking based on type\n        # Type narrowing doesn't work well here due to the generic T,\n        # but at runtime the types will match\n        if isinstance(value, dict):\n            return mask_credential_dict(value)\n        elif isinstance(value, str):\n            return mask_string(value)\n        else:\n            raise ValueError(f\"Cannot mask value of type {type(value)}\")\n\n    def __bool__(self) -> bool:\n        \"\"\"Allow truthiness checks without exposing the value.\"\"\"\n        return True\n\n    def __str__(self) -> NoReturn:\n        \"\"\"Prevent accidental string conversion.\"\"\"\n        raise SensitiveAccessError(\n            \"Cannot convert SensitiveValue to string. Use .get_value(apply_mask=True/False) to access the value.\"\n        )\n\n    def __repr__(self) -> str:\n        \"\"\"Prevent accidental repr exposure.\"\"\"\n        return \"<SensitiveValue: use .get_value(apply_mask=True/False) to access>\"\n\n    def __iter__(self) -> NoReturn:\n        \"\"\"Prevent iteration over the value.\"\"\"\n        raise SensitiveAccessError(\n            \"Cannot iterate over SensitiveValue. Use .get_value(apply_mask=True/False) to access the value.\"\n        )\n\n    def __getitem__(self, key: Any) -> NoReturn:\n        \"\"\"Prevent subscript access.\"\"\"\n        raise SensitiveAccessError(\n            \"Cannot subscript SensitiveValue. Use .get_value(apply_mask=True/False) to access the value.\"\n        )\n\n    def __eq__(self, other: Any) -> bool:\n        \"\"\"Compare SensitiveValues by their decrypted content.\"\"\"\n        # NOTE: if you attempt to compare a string/dict to a SensitiveValue,\n        # this comparison will return NotImplemented, which then evaluates to False.\n        # This is the convention and required for SQLAlchemy's attribute tracking.\n        if not isinstance(other, SensitiveValue):\n            return NotImplemented\n        return self._decrypt() == other._decrypt()\n\n    def __hash__(self) -> int:\n        \"\"\"Hash based on decrypted content.\"\"\"\n        value = self._decrypt()\n        if isinstance(value, dict):\n            return hash(json.dumps(value, sort_keys=True))\n        return hash(value)\n\n    # Prevent JSON serialization\n    def __json__(self) -> Any:\n        \"\"\"Prevent JSON serialization.\"\"\"\n        raise SensitiveAccessError(\n            \"Cannot serialize SensitiveValue to JSON. Use .get_value(apply_mask=True/False) to access the value.\"\n        )\n\n    # For Pydantic compatibility\n    @classmethod\n    def __get_pydantic_core_schema__(cls, source_type: Any, handler: Any) -> Any:\n        \"\"\"Prevent Pydantic from serializing without explicit get_value().\"\"\"\n        raise SensitiveAccessError(\n            \"Cannot serialize SensitiveValue in Pydantic model. \"\n            \"Use .get_value(apply_mask=True/False) to access the value before serialization.\"\n        )\n"
  },
  {
    "path": "backend/onyx/utils/sitemap.py",
    "content": "import re\nimport xml.etree.ElementTree as ET\nfrom typing import Set\nfrom urllib.parse import urljoin\n\nimport requests\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef _get_sitemap_locations_from_robots(base_url: str) -> Set[str]:\n    \"\"\"Extract sitemap URLs from robots.txt\"\"\"\n    sitemap_urls: set = set()\n    try:\n        robots_url = urljoin(base_url, \"/robots.txt\")\n        resp = requests.get(robots_url, timeout=10)\n        if resp.status_code == 200:\n            for line in resp.text.splitlines():\n                if line.lower().startswith(\"sitemap:\"):\n                    sitemap_url = line.split(\":\", 1)[1].strip()\n                    sitemap_urls.add(sitemap_url)\n    except Exception as e:\n        logger.warning(f\"Error fetching robots.txt: {e}\")\n    return sitemap_urls\n\n\ndef _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]:\n    \"\"\"Extract URLs from a sitemap XML file\"\"\"\n    urls: set[str] = set()\n    try:\n        resp = requests.get(sitemap_url, timeout=10)\n        if resp.status_code != 200:\n            return urls\n\n        root = ET.fromstring(resp.content)\n\n        # Handle both regular sitemaps and sitemap indexes\n        # Remove namespace for easier parsing\n        namespace = re.match(r\"\\{.*\\}\", root.tag)\n        ns = namespace.group(0) if namespace else \"\"\n\n        if root.tag == f\"{ns}sitemapindex\":\n            # This is a sitemap index\n            for sitemap in root.findall(f\".//{ns}loc\"):\n                if sitemap.text:\n                    sub_urls = _extract_urls_from_sitemap(sitemap.text)\n                    urls.update(sub_urls)\n        else:\n            # This is a regular sitemap\n            for url in root.findall(f\".//{ns}loc\"):\n                if url.text:\n                    urls.add(url.text)\n\n    except Exception as e:\n        logger.warning(f\"Error processing sitemap {sitemap_url}: {e}\")\n\n    return urls\n\n\ndef list_pages_for_site(site: str) -> list[str]:\n    \"\"\"Get list of pages from a site's sitemaps\"\"\"\n    site = site.rstrip(\"/\")\n    all_urls = set()\n\n    # Try both common sitemap locations\n    sitemap_paths = [\"/sitemap.xml\", \"/sitemap_index.xml\"]\n    for path in sitemap_paths:\n        sitemap_url = urljoin(site, path)\n        all_urls.update(_extract_urls_from_sitemap(sitemap_url))\n\n    # Check robots.txt for additional sitemaps\n    sitemap_locations = _get_sitemap_locations_from_robots(site)\n    for sitemap_url in sitemap_locations:\n        all_urls.update(_extract_urls_from_sitemap(sitemap_url))\n\n    return list(all_urls)\n"
  },
  {
    "path": "backend/onyx/utils/special_types.py",
    "content": "from collections.abc import Mapping\nfrom collections.abc import Sequence\nfrom typing import TypeAlias\n\nJSON_ro: TypeAlias = (\n    Mapping[str, \"JSON_ro\"] | Sequence[\"JSON_ro\"] | str | int | float | bool | None\n)\n"
  },
  {
    "path": "backend/onyx/utils/subclasses.py",
    "content": "from __future__ import annotations\n\nimport importlib\nimport os\nimport pkgutil\nimport sys\nfrom types import ModuleType\nfrom typing import List\nfrom typing import Type\nfrom typing import TypeVar\n\nT = TypeVar(\"T\")\n\n\ndef import_all_modules_from_dir(dir_path: str) -> List[ModuleType]:\n    \"\"\"\n    Imports all modules found in the given directory and its subdirectories,\n    returning a list of imported module objects.\n    \"\"\"\n    dir_path = os.path.abspath(dir_path)\n\n    if dir_path not in sys.path:\n        sys.path.insert(0, dir_path)\n\n    imported_modules: List[ModuleType] = []\n\n    for _, package_name, _ in pkgutil.walk_packages([dir_path]):\n        try:\n            module = importlib.import_module(package_name)\n            imported_modules.append(module)\n        except Exception as e:\n            # Handle or log exceptions as needed\n            print(f\"Could not import {package_name}: {e}\")\n\n    return imported_modules\n\n\ndef import_all_submodules_from_package(package_name: str) -> List[ModuleType]:\n    \"\"\"\n    Imports all submodules of a given package WITHOUT mutating sys.path.\n    Uses the package's __path__ and imports with fully-qualified names.\n    \"\"\"\n    imported_modules: List[ModuleType] = []\n\n    try:\n        pkg = importlib.import_module(package_name)\n    except Exception as e:\n        print(f\"Could not import package {package_name}: {e}\")\n        return imported_modules\n\n    pkg_paths = getattr(pkg, \"__path__\", None)\n    if not pkg_paths:\n        return imported_modules\n\n    for _, module_name, _ in pkgutil.walk_packages(\n        pkg_paths, prefix=pkg.__name__ + \".\"\n    ):\n        try:\n            module = importlib.import_module(module_name)\n            imported_modules.append(module)\n        except Exception as e:\n            print(f\"Could not import {module_name}: {e}\")\n\n    return imported_modules\n\n\ndef all_subclasses(cls: Type[T]) -> List[Type[T]]:\n    \"\"\"\n    Recursively find all subclasses of the given class.\n    \"\"\"\n    direct_subs = cls.__subclasses__()\n    result: List[Type[T]] = []\n    for subclass in direct_subs:\n        result.append(subclass)\n        # Extend the result by recursively calling all_subclasses\n        result.extend(all_subclasses(subclass))\n    return result\n\n\ndef find_all_subclasses_in_dir(parent_class: Type[T], directory: str) -> List[Type[T]]:\n    \"\"\"\n    Imports all modules from the given directory (and subdirectories),\n    then returns all classes that are subclasses of parent_class.\n\n    :param parent_class: The class to find subclasses of.\n    :param directory: The directory to search for subclasses.\n    :return: A list of all subclasses of parent_class found in the directory.\n    \"\"\"\n    # First import all modules to ensure classes are loaded into memory\n    import_all_modules_from_dir(directory)\n\n    # Gather all subclasses of the given parent class\n    subclasses = all_subclasses(parent_class)\n    return subclasses\n\n\ndef find_all_subclasses_in_package(\n    parent_class: Type[T], package_name: str\n) -> List[Type[T]]:\n    \"\"\"\n    Imports all submodules from the given package name, then returns all subclasses\n    of parent_class that are loaded in memory.\n    \"\"\"\n    import_all_submodules_from_package(package_name)\n    subclasses = all_subclasses(parent_class)\n    return subclasses\n\n\n# Example usage:\nif __name__ == \"__main__\":\n\n    class Animal:\n        pass\n\n    # Suppose \"mymodules\" contains files that define classes inheriting from Animal\n    found_subclasses = find_all_subclasses_in_dir(Animal, \"mymodules\")\n    for sc in found_subclasses:\n        print(\"Found subclass:\", sc.__name__)\n"
  },
  {
    "path": "backend/onyx/utils/supervisord_watchdog.py",
    "content": "#!/usr/bin/env python3\n\nimport argparse\nimport subprocess\nimport time\n\nfrom onyx.redis.redis_pool import get_redis_client\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger()\n\nMAX_AGE_SECONDS = 900  # how old the heartbeat can be\nCHECK_INTERVAL = 60  # how often to check\nMAX_LOOKUP_FAILURES = 5\n\n\ndef main(key: str, program: str, conf: str) -> None:\n    \"\"\"This script will restart the watchdog'd supervisord process via supervisorctl.\n\n    This process continually looks up a specific redis key. If it is missing for a\n    consecutive number of times and the last successful lookup is more\n    than a threshold time, the specified program will be restarted.\n    \"\"\"\n    logger.info(f\"supervisord_watchdog starting: program={program} conf={conf}\")\n\n    r = get_redis_client()\n\n    last_heartbeat = time.monotonic()\n    num_lookup_failures = 0\n\n    try:\n        while True:\n            time.sleep(CHECK_INTERVAL)\n\n            now = time.monotonic()\n\n            # check for the key ... handle any exception gracefully\n            try:\n                heartbeat = r.exists(key)\n            except Exception:\n                logger.exception(\n                    f\"Exception checking for celery beat heartbeat: key={key}.\"\n                )\n                continue\n\n            # happy path ... just continue\n            if heartbeat:\n                logger.debug(f\"Key lookup succeeded: key={key}\")\n                last_heartbeat = time.monotonic()\n                num_lookup_failures = 0\n                continue\n\n            # if we haven't exceeded the max lookup failures, continue\n            num_lookup_failures += 1\n            if num_lookup_failures <= MAX_LOOKUP_FAILURES:\n                logger.warning(\n                    f\"Key lookup failed: key={key} \"\n                    f\"lookup_failures={num_lookup_failures} \"\n                    f\"max_lookup_failures={MAX_LOOKUP_FAILURES}\"\n                )\n                continue\n\n            # if we haven't exceeded the max missing key timeout threshold, continue\n            elapsed = now - last_heartbeat\n            if elapsed <= MAX_AGE_SECONDS:\n                logger.warning(\n                    f\"Key lookup failed: key={key} \"\n                    f\"lookup_failures={num_lookup_failures} \"\n                    f\"max_lookup_failures={MAX_LOOKUP_FAILURES} \"\n                    f\"elapsed={elapsed:.2f} \"\n                    f\"elapsed_threshold={MAX_AGE_SECONDS}\"\n                )\n                continue\n\n            # all conditions have been exceeded ... restart the process\n            logger.warning(\n                f\"Key lookup failure thresholds exceeded - restarting {program}: \"\n                f\"key={key} \"\n                f\"lookup_failures={num_lookup_failures} \"\n                f\"max_lookup_failures={MAX_LOOKUP_FAILURES} \"\n                f\"elapsed={elapsed:.2f} \"\n                f\"elapsed_threshold={MAX_AGE_SECONDS}\"\n            )\n\n            subprocess.call([\"supervisorctl\", \"-c\", conf, \"restart\", program])\n\n            # reset state so that we properly delay until the next restart\n            # instead of continually restarting\n            num_lookup_failures = 0\n            last_heartbeat = time.monotonic()\n    except KeyboardInterrupt:\n        logger.info(\"Caught interrupt, exiting watchdog.\")\n\n    logger.info(\"supervisord_watchdog exiting.\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Supervisord Watchdog\")\n    parser.add_argument(\"--key\", help=\"The redis key to watch\", required=True)\n    parser.add_argument(\n        \"--program\", help=\"The supervisord program to restart\", required=True\n    )\n    parser.add_argument(\n        \"--conf\", type=str, help=\"Path to supervisord config file\", required=True\n    )\n    args = parser.parse_args()\n\n    main(args.key, args.program, args.conf)\n"
  },
  {
    "path": "backend/onyx/utils/telemetry.py",
    "content": "import contextvars\nimport threading\nimport uuid\nfrom enum import Enum\nfrom typing import Any\n\nimport requests\n\nfrom onyx.configs.app_configs import DISABLE_TELEMETRY\nfrom onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED\nfrom onyx.configs.constants import KV_CUSTOMER_UUID_KEY\nfrom onyx.configs.constants import KV_INSTANCE_DOMAIN_KEY\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\nfrom onyx.key_value_store.factory import get_kv_store\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.key_value_store.interface import unwrap_str\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.variable_functionality import (\n    fetch_versioned_implementation_with_fallback,\n)\nfrom onyx.utils.variable_functionality import noop_fallback\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\n\nlogger = setup_logger()\n\n\n_DANSWER_TELEMETRY_ENDPOINT = \"https://telemetry.onyx.app/anonymous_telemetry\"\n_CACHED_UUID: str | None = None\n_CACHED_INSTANCE_DOMAIN: str | None = None\n\n\nclass RecordType(str, Enum):\n    VERSION = \"version\"\n    SIGN_UP = \"sign_up\"\n    USAGE = \"usage\"\n    LATENCY = \"latency\"\n    FAILURE = \"failure\"\n    METRIC = \"metric\"\n    INDEXING_PROGRESS = \"indexing_progress\"\n    INDEXING_COMPLETE = \"indexing_complete\"\n    PERMISSION_SYNC_PROGRESS = \"permission_sync_progress\"\n    PERMISSION_SYNC_COMPLETE = \"permission_sync_complete\"\n    INDEX_ATTEMPT_STATUS = \"index_attempt_status\"\n\n\ndef _get_or_generate_customer_id_mt(tenant_id: str) -> str:\n    return str(uuid.uuid5(uuid.NAMESPACE_X500, tenant_id))\n\n\ndef get_or_generate_uuid() -> str:\n    # TODO: split out the whole \"instance UUID\" generation logic into a separate\n    # utility function. Telemetry should not be aware at all of how the UUID is\n    # generated/stored.\n    # TODO: handle potential race condition for UUID generation. Doesn't matter for\n    # the telemetry case, but if this is used generally it should be handled.\n    global _CACHED_UUID\n\n    if _CACHED_UUID is not None:\n        return _CACHED_UUID\n\n    kv_store = get_kv_store()\n\n    try:\n        _CACHED_UUID = unwrap_str(kv_store.load(KV_CUSTOMER_UUID_KEY))\n    except KvKeyNotFoundError:\n        _CACHED_UUID = str(uuid.uuid4())\n        kv_store.store(KV_CUSTOMER_UUID_KEY, {\"value\": _CACHED_UUID}, encrypt=True)\n\n    return _CACHED_UUID\n\n\ndef _get_or_generate_instance_domain() -> str | None:  #\n    global _CACHED_INSTANCE_DOMAIN\n\n    if _CACHED_INSTANCE_DOMAIN is not None:\n        return _CACHED_INSTANCE_DOMAIN\n\n    kv_store = get_kv_store()\n\n    try:\n        _CACHED_INSTANCE_DOMAIN = unwrap_str(kv_store.load(KV_INSTANCE_DOMAIN_KEY))\n    except KvKeyNotFoundError:\n        with get_session_with_current_tenant() as db_session:\n            first_user = db_session.query(User).first()\n            if first_user:\n                _CACHED_INSTANCE_DOMAIN = first_user.email.split(\"@\")[-1]\n                kv_store.store(\n                    KV_INSTANCE_DOMAIN_KEY,\n                    {\"value\": _CACHED_INSTANCE_DOMAIN},\n                    encrypt=True,\n                )\n\n    return _CACHED_INSTANCE_DOMAIN\n\n\ndef optional_telemetry(\n    record_type: RecordType,\n    data: dict,\n    user_id: str | None = None,\n    tenant_id: str | None = None,  # Allows for override of tenant_id\n) -> None:\n    if DISABLE_TELEMETRY:\n        return\n\n    tenant_id = tenant_id or get_current_tenant_id()\n\n    try:\n\n        def telemetry_logic() -> None:\n            try:\n                customer_uuid = (\n                    _get_or_generate_customer_id_mt(tenant_id)\n                    if MULTI_TENANT\n                    else get_or_generate_uuid()\n                )\n                payload = {\n                    \"data\": data,\n                    \"record\": record_type,\n                    # If None then it's a flow that doesn't include a user\n                    # For cases where the User itself is None, a string is provided instead\n                    \"user_id\": user_id,\n                    \"customer_uuid\": customer_uuid,\n                    \"is_cloud\": MULTI_TENANT,\n                }\n                if ENTERPRISE_EDITION_ENABLED:\n                    payload[\"instance_domain\"] = _get_or_generate_instance_domain()\n                requests.post(\n                    _DANSWER_TELEMETRY_ENDPOINT,\n                    headers={\"Content-Type\": \"application/json\"},\n                    json=payload,\n                )\n\n            except Exception:\n                # This way it silences all thread level logging as well\n                pass\n\n        # Run in separate thread with the same context as the current thread\n        # This is to ensure that the thread gets the current tenant ID\n        current_context = contextvars.copy_context()\n        thread = threading.Thread(\n            target=lambda: current_context.run(telemetry_logic), daemon=True\n        )\n        thread.start()\n    except Exception:\n        # Should never interfere with normal functions of Onyx\n        pass\n\n\ndef mt_cloud_telemetry(\n    tenant_id: str,\n    distinct_id: str,\n    event: MilestoneRecordType,\n    properties: dict[str, Any] | None = None,\n) -> None:\n    if not MULTI_TENANT:\n        return\n\n    # Automatically include tenant_id in properties\n    all_properties = {**properties} if properties else {}\n    if properties and \"tenant_id\" in properties:\n        logger.warning(\n            f\"tenant_id already in properties: {properties}. Overwriting with new value {tenant_id}.\"\n        )\n    all_properties[\"tenant_id\"] = tenant_id\n\n    # MIT version should not need to include any Posthog code\n    # This is only for Onyx MT Cloud, this code should also never be hit, no reason for any orgs to\n    # be running the Multi Tenant version of Onyx.\n    fetch_versioned_implementation_with_fallback(\n        module=\"onyx.utils.telemetry\",\n        attribute=\"event_telemetry\",\n        fallback=noop_fallback,\n    )(distinct_id, event, all_properties)\n\n\ndef mt_cloud_identify(\n    distinct_id: str,\n    properties: dict[str, Any] | None = None,\n) -> None:\n    \"\"\"Create/update a PostHog person profile (Cloud only).\"\"\"\n    if not MULTI_TENANT:\n        return\n\n    fetch_versioned_implementation_with_fallback(\n        module=\"onyx.utils.telemetry\",\n        attribute=\"identify_user\",\n        fallback=noop_fallback,\n    )(distinct_id, properties)\n\n\ndef mt_cloud_alias(\n    distinct_id: str,\n    anonymous_id: str,\n) -> None:\n    \"\"\"Link an anonymous distinct_id to an identified user (Cloud only).\"\"\"\n    if not MULTI_TENANT:\n        return\n\n    fetch_versioned_implementation_with_fallback(\n        module=\"onyx.utils.posthog_client\",\n        attribute=\"alias_user\",\n        fallback=noop_fallback,\n    )(distinct_id, anonymous_id)\n\n\ndef mt_cloud_get_anon_id(request: Any) -> str | None:\n    \"\"\"Extract the anonymous distinct_id from the app PostHog cookie (Cloud only).\"\"\"\n    if not MULTI_TENANT or not request:\n        return None\n\n    return fetch_versioned_implementation_with_fallback(\n        module=\"onyx.utils.posthog_client\",\n        attribute=\"get_anon_id_from_request\",\n        fallback=noop_fallback,\n    )(request)\n"
  },
  {
    "path": "backend/onyx/utils/tenant.py",
    "content": "from shared_configs.configs import TENANT_ID_PREFIX\n\n\ndef get_tenant_id_short_string(tenant_id: str) -> str:\n    \"\"\"Gets a short string representation of a full tenant id.\n\n    Args:\n        tenant_id: The full tenant id.\n\n    Returns:\n        str: The first 8 characters of the tenant id after removing the prefix.\n    \"\"\"\n    tenant_display = tenant_id.removeprefix(TENANT_ID_PREFIX)\n    short_tenant = tenant_display[:8]\n    return short_tenant\n"
  },
  {
    "path": "backend/onyx/utils/text_processing.py",
    "content": "import codecs\nimport json\nimport re\nimport string\nfrom urllib.parse import quote\n\nfrom onyx.utils.logger import setup_logger\n\n\nlogger = setup_logger(__name__)\n\n# Mapping of curly/smart quotes to straight quotes\nCURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {\n    \"\\u2019\": \"'\",  # Right single quotation mark\n    \"\\u2018\": \"'\",  # Left single quotation mark\n    \"\\u201c\": '\"',  # Left double quotation mark\n    \"\\u201d\": '\"',  # Right double quotation mark\n}\n\n# Zero-width characters that should typically be removed during text normalization\nZERO_WIDTH_CHARS: set[str] = {\n    \"\\u200b\",  # Zero-width space\n    \"\\u200c\",  # Zero-width non-joiner\n    \"\\u200d\",  # Zero-width joiner\n    \"\\ufeff\",  # Byte order mark / zero-width no-break space\n    \"\\u2060\",  # Word joiner\n}\n\n\ndef normalize_curly_quotes(text: str) -> str:\n    \"\"\"Convert curly/smart quotes to straight quotes.\"\"\"\n    for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():\n        text = text.replace(curly, straight)\n    return text\n\n\ndef is_zero_width_char(c: str) -> bool:\n    \"\"\"Check if a character is a zero-width character.\"\"\"\n    return c in ZERO_WIDTH_CHARS\n\n\nESCAPE_SEQUENCE_RE = re.compile(\n    r\"\"\"\n    ( \\\\U........      # 8-digit hex escapes\n    | \\\\u....          # 4-digit hex escapes\n    | \\\\x..            # 2-digit hex escapes\n    | \\\\[0-7]{1,3}     # Octal escapes\n    | \\\\N\\{[^}]+\\}     # Unicode characters by name\n    | \\\\[\\\\'\"abfnrtv]  # Single-character escapes\n    )\"\"\",\n    re.UNICODE | re.VERBOSE,\n)\n\n_INITIAL_FILTER = re.compile(\n    \"[\"\n    \"\\U0000fff0-\\U0000ffff\"  # Specials\n    \"\\U0001f000-\\U0001f9ff\"  # Emoticons\n    \"\\U00002000-\\U0000206f\"  # General Punctuation\n    \"\\U00002190-\\U000021ff\"  # Arrows\n    \"\\U00002700-\\U000027bf\"  # Dingbats\n    \"]+\",\n    flags=re.UNICODE,\n)\n\n# Regex to match invalid Unicode characters that cause UTF-8 encoding errors:\n# - \\x00-\\x08: Control characters (except tab \\x09)\n# - \\x0b-\\x0c: Vertical tab and form feed\n# - \\x0e-\\x1f: More control characters (except newline \\x0a, carriage return \\x0d)\n# - \\ud800-\\udfff: Surrogate pairs (invalid when unpaired, causes \"surrogates not allowed\" errors)\n# - \\ufdd0-\\ufdef: Non-characters\n# - \\ufffe-\\uffff: Non-characters\n_INVALID_UNICODE_CHARS_RE = re.compile(\n    \"[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\ud800-\\udfff\\ufdd0-\\ufdef\\ufffe\\uffff]\"\n)\n\n\ndef decode_escapes(s: str) -> str:\n    def decode_match(match: re.Match) -> str:\n        return codecs.decode(match.group(0), \"unicode-escape\")\n\n    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)\n\n\ndef make_url_compatible(s: str) -> str:\n    s_with_underscores = s.replace(\" \", \"_\")\n    return quote(s_with_underscores, safe=\"\")\n\n\ndef has_unescaped_quote(s: str) -> bool:\n    pattern = r'(?<!\\\\)\"'\n    return bool(re.search(pattern, s))\n\n\ndef escape_newlines(s: str) -> str:\n    return re.sub(r\"(?<!\\\\)\\n\", \"\\\\\\\\n\", s)\n\n\ndef replace_whitespaces_w_space(s: str) -> str:\n    return re.sub(r\"\\s\", \" \", s)\n\n\n# Function to remove punctuation from a string\ndef remove_punctuation(s: str) -> str:\n    return s.translate(str.maketrans(\"\", \"\", string.punctuation))\n\n\ndef escape_quotes(original_json_str: str) -> str:\n    result = []\n    in_string = False\n    for i, char in enumerate(original_json_str):\n        if char == '\"':\n            if not in_string:\n                in_string = True\n                result.append(char)\n            else:\n                next_char = (\n                    original_json_str[i + 1] if i + 1 < len(original_json_str) else None\n                )\n                if result and result[-1] == \"\\\\\":\n                    result.append(char)\n                elif next_char not in [\",\", \":\", \"}\", \"\\n\"]:\n                    result.append(\"\\\\\" + char)\n                else:\n                    result.append(char)\n                    in_string = False\n        else:\n            result.append(char)\n    return \"\".join(result)\n\n\ndef find_all_json_objects(text: str) -> list[dict]:\n    \"\"\"Find all JSON objects in text using balanced brace matching.\n\n    Iterates through the text, and for each '{' found, attempts to find its\n    matching '}' by counting brace depth. Each balanced substring is then\n    validated as JSON. This includes nested JSON objects within other objects.\n\n    Use case: Parsing LLM output that may contain multiple JSON objects, or when\n    the LLM/serving layer outputs function calls in non-standard formats\n    (e.g. OpenAI's function.open_url style).\n\n    Args:\n        text: The text to search for JSON objects.\n\n    Returns:\n        A list of all successfully parsed JSON objects (dicts only).\n    \"\"\"\n    json_objects: list[dict] = []\n    i = 0\n\n    while i < len(text):\n        if text[i] == \"{\":\n            # Try to find a matching closing brace\n            brace_count = 0\n            start = i\n            for j in range(i, len(text)):\n                if text[j] == \"{\":\n                    brace_count += 1\n                elif text[j] == \"}\":\n                    brace_count -= 1\n                    if brace_count == 0:\n                        # Found potential JSON object\n                        candidate = text[start : j + 1]\n                        try:\n                            parsed = json.loads(candidate)\n                            if isinstance(parsed, dict):\n                                json_objects.append(parsed)\n                        except json.JSONDecodeError:\n                            pass\n                        break\n        i += 1\n\n    return json_objects\n\n\ndef parse_llm_json_response(content: str) -> dict | None:\n    \"\"\"Parse a single JSON object from LLM output, handling markdown code blocks.\n\n    Designed for LLM responses that typically contain exactly one JSON object,\n    possibly wrapped in markdown formatting.\n\n    Tries extraction in order:\n    1. JSON inside markdown code block (```json ... ``` or ``` ... ```)\n    2. Entire content as raw JSON\n    3. First '{' to last '}' in content (greedy match)\n\n    Args:\n        content: The LLM response text to parse.\n\n    Returns:\n        The parsed JSON dict if found, None otherwise.\n    \"\"\"\n    # Try to find JSON in markdown code block first\n    # Use greedy .* (not .*?) to match nested objects correctly within code block bounds\n    json_match = re.search(r\"```(?:json)?\\s*(\\{.*\\})\\s*```\", content, re.DOTALL)\n    if json_match:\n        try:\n            result = json.loads(json_match.group(1))\n            if isinstance(result, dict):\n                return result\n        except json.JSONDecodeError:\n            pass\n\n    # Try to parse the entire content as JSON\n    try:\n        result = json.loads(content)\n        if isinstance(result, dict):\n            return result\n    except json.JSONDecodeError:\n        pass\n\n    # Try to find any JSON object in the content\n    json_match = re.search(r\"\\{.*\\}\", content, re.DOTALL)\n    if json_match:\n        try:\n            result = json.loads(json_match.group(0))\n            if isinstance(result, dict):\n                return result\n        except json.JSONDecodeError:\n            pass\n\n    return None\n\n\ndef clean_model_quote(quote: str, trim_length: int) -> str:\n    quote_clean = quote.strip()\n    if quote_clean[0] == '\"':\n        quote_clean = quote_clean[1:]\n    if quote_clean[-1] == '\"':\n        quote_clean = quote_clean[:-1]\n    if trim_length > 0:\n        quote_clean = quote_clean[:trim_length]\n    return quote_clean\n\n\ndef shared_precompare_cleanup(text: str) -> str:\n    \"\"\"LLMs models sometime restructure whitespaces or edits special characters to fit a more likely\n    distribution of characters found in its training data, but this hurts exact quote matching\n    \"\"\"\n    text = text.lower()\n\n    # \\s: matches any whitespace character (spaces, tabs, newlines, etc.)\n    # |: acts as an OR.\n    # \\*: matches the asterisk character.\n    # \\\\\": matches the \\\" sequence.\n    # [.,:`\"#-]: matches any character inside the square brackets.\n    text = re.sub(r'\\s|\\*|\\\\\"|[.,:`\"#-]', \"\", text)\n\n    return text\n\n\ndef clean_text(text: str) -> str:\n    # Remove specific Unicode ranges that might cause issues\n    cleaned = _INITIAL_FILTER.sub(\"\", text)\n\n    # Remove any control characters except for newline and tab\n    cleaned = \"\".join(ch for ch in cleaned if ch >= \" \" or ch in \"\\n\\t\")\n\n    return cleaned\n\n\ndef is_valid_email(text: str) -> bool:\n    \"\"\"Can use a library instead if more detailed checks are needed\"\"\"\n    regex = r\"^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$\"\n\n    if re.match(regex, text):\n        return True\n    else:\n        return False\n\n\ndef count_punctuation(text: str) -> int:\n    return sum(1 for char in text if char in string.punctuation)\n\n\ndef remove_markdown_image_references(text: str) -> str:\n    \"\"\"Remove markdown-style image references like ![alt text](url)\"\"\"\n    return re.sub(r\"!\\[[^\\]]*\\]\\([^\\)]+\\)\", \"\", text)\n\n\ndef remove_invalid_unicode_chars(text: str) -> str:\n    \"\"\"Remove Unicode characters that are invalid in UTF-8 or cause encoding issues.\n\n    This handles:\n    - Control characters (except tab, newline, carriage return)\n    - Unpaired UTF-16 surrogates (e.g. \\udc00) that cause 'surrogates not allowed' errors\n    - Unicode non-characters\n    \"\"\"\n    return _INVALID_UNICODE_CHARS_RE.sub(\"\", text)\n\n\ndef normalize_char(c: str) -> str:\n    \"\"\"Normalize a single character (curly quotes, whitespace, punctuation).\"\"\"\n    if c in CURLY_TO_STRAIGHT_QUOTES:\n        c = CURLY_TO_STRAIGHT_QUOTES[c]\n    if c.isspace():\n        return \" \"\n    elif re.match(r\"[^\\w\\s\\']\", c):\n        return \" \"\n    else:\n        return c.lower()\n"
  },
  {
    "path": "backend/onyx/utils/threadpool_concurrency.py",
    "content": "import asyncio\nimport collections.abc\nimport concurrent\nimport contextvars\nimport copy\nimport threading\nimport uuid\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom collections.abc import Iterator\nfrom collections.abc import MutableMapping\nfrom collections.abc import Sequence\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import FIRST_COMPLETED\nfrom concurrent.futures import Future\nfrom concurrent.futures import ThreadPoolExecutor\nfrom concurrent.futures import wait\nfrom typing import Any\nfrom typing import cast\nfrom typing import Generic\nfrom typing import overload\nfrom typing import Protocol\nfrom typing import TypeVar\n\nfrom pydantic import GetCoreSchemaHandler\nfrom pydantic.types import T\nfrom pydantic_core import core_schema\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nR = TypeVar(\"R\")\nKT = TypeVar(\"KT\")  # Key type\nVT = TypeVar(\"VT\")  # Value type\n_T = TypeVar(\"_T\")  # Default type\n\n\nclass ThreadSafeDict(MutableMapping[KT, VT]):\n    \"\"\"\n    A thread-safe dictionary implementation that uses a lock to ensure thread safety.\n    Implements the MutableMapping interface to provide a complete dictionary-like interface.\n\n    Example usage:\n        # Create a thread-safe dictionary\n        safe_dict: ThreadSafeDict[str, int] = ThreadSafeDict()\n\n        # Basic operations (atomic)\n        safe_dict[\"key\"] = 1\n        value = safe_dict[\"key\"]\n        del safe_dict[\"key\"]\n\n        # Bulk operations (atomic)\n        safe_dict.update({\"key1\": 1, \"key2\": 2})\n    \"\"\"\n\n    def __init__(self, input_dict: dict[KT, VT] | None = None) -> None:\n        self._dict: dict[KT, VT] = input_dict or {}\n        self.lock = threading.Lock()\n\n    def __getitem__(self, key: KT) -> VT:\n        with self.lock:\n            return self._dict[key]\n\n    def __setitem__(self, key: KT, value: VT) -> None:\n        with self.lock:\n            self._dict[key] = value\n\n    def __delitem__(self, key: KT) -> None:\n        with self.lock:\n            del self._dict[key]\n\n    def __iter__(self) -> Iterator[KT]:\n        # Return a snapshot of keys to avoid potential modification during iteration\n        with self.lock:\n            return iter(list(self._dict.keys()))\n\n    def __len__(self) -> int:\n        with self.lock:\n            return len(self._dict)\n\n    @classmethod\n    def __get_pydantic_core_schema__(\n        cls, source_type: Any, handler: GetCoreSchemaHandler\n    ) -> core_schema.CoreSchema:\n        return core_schema.no_info_after_validator_function(\n            cls.validate, handler(dict[KT, VT])\n        )\n\n    @classmethod\n    def validate(cls, v: Any) -> \"ThreadSafeDict[KT, VT]\":\n        if isinstance(v, dict):\n            return ThreadSafeDict(v)\n        return v\n\n    def __deepcopy__(self, memo: Any) -> \"ThreadSafeDict[KT, VT]\":\n        return ThreadSafeDict(copy.deepcopy(self._dict))\n\n    def clear(self) -> None:\n        \"\"\"Remove all items from the dictionary atomically.\"\"\"\n        with self.lock:\n            self._dict.clear()\n\n    def copy(self) -> dict[KT, VT]:\n        \"\"\"Return a shallow copy of the dictionary atomically.\"\"\"\n        with self.lock:\n            return self._dict.copy()\n\n    @overload\n    def get(self, key: KT) -> VT | None: ...\n\n    @overload\n    def get(self, key: KT, default: VT | _T) -> VT | _T: ...\n\n    def get(self, key: KT, default: Any = None) -> Any:\n        \"\"\"Get a value with a default, atomically.\"\"\"\n        with self.lock:\n            return self._dict.get(key, default)\n\n    def pop(self, key: KT, default: Any = None) -> Any:\n        \"\"\"Remove and return a value with optional default, atomically.\"\"\"\n        with self.lock:\n            if default is None:\n                return self._dict.pop(key)\n            return self._dict.pop(key, default)\n\n    def setdefault(self, key: KT, default: VT) -> VT:\n        \"\"\"Set a default value if key is missing, atomically.\"\"\"\n        with self.lock:\n            return self._dict.setdefault(key, default)\n\n    def update(self, *args: Any, **kwargs: VT) -> None:\n        \"\"\"Update the dictionary atomically from another mapping or from kwargs.\"\"\"\n        with self.lock:\n            self._dict.update(*args, **kwargs)\n\n    def items(self) -> collections.abc.ItemsView[KT, VT]:\n        \"\"\"Return a view of (key, value) pairs atomically.\"\"\"\n        with self.lock:\n            return collections.abc.ItemsView(self)\n\n    def keys(self) -> collections.abc.KeysView[KT]:\n        \"\"\"Return a view of keys atomically.\"\"\"\n        with self.lock:\n            return collections.abc.KeysView(self)\n\n    def values(self) -> collections.abc.ValuesView[VT]:\n        \"\"\"Return a view of values atomically.\"\"\"\n        with self.lock:\n            return collections.abc.ValuesView(self)\n\n    @overload\n    def atomic_get_set(\n        self, key: KT, value_callback: Callable[[VT], VT], default: VT\n    ) -> tuple[VT, VT]: ...\n\n    @overload\n    def atomic_get_set(\n        self, key: KT, value_callback: Callable[[VT | _T], VT], default: VT | _T\n    ) -> tuple[VT | _T, VT]: ...\n\n    def atomic_get_set(\n        self, key: KT, value_callback: Callable[[Any], VT], default: Any = None\n    ) -> tuple[Any, VT]:\n        \"\"\"Replace a value from the dict with a function applied to the previous value, atomically.\n\n        Returns:\n            A tuple of the previous value and the new value.\n        \"\"\"\n        with self.lock:\n            val = self._dict.get(key, default)\n            new_val = value_callback(val)\n            self._dict[key] = new_val\n            return val, new_val\n\n\nST = TypeVar(\"ST\")  # Set element type\n\n\nclass ThreadSafeSet(Generic[ST]):\n    \"\"\"\n    A thread-safe set implementation that uses a lock to ensure thread safety.\n\n    Example usage:\n        # Create a thread-safe set\n        safe_set: ThreadSafeSet[str] = ThreadSafeSet()\n\n        # Basic operations (atomic)\n        safe_set.add(\"item\")\n        if \"item\" in safe_set:\n            ...\n        safe_set.discard(\"item\")\n\n        # Bulk operations (atomic)\n        safe_set.update({\"item1\", \"item2\"})\n\n        # Atomic check-and-add (returns True if item was already present)\n        was_present = safe_set.check_and_add(\"item\")\n    \"\"\"\n\n    def __init__(self, input_set: set[ST] | None = None) -> None:\n        self._set: set[ST] = input_set.copy() if input_set else set()\n        self.lock = threading.Lock()\n\n    def __contains__(self, item: ST) -> bool:\n        with self.lock:\n            return item in self._set\n\n    def __len__(self) -> int:\n        with self.lock:\n            return len(self._set)\n\n    def __iter__(self) -> Iterator[ST]:\n        # Return a snapshot to avoid modification during iteration\n        with self.lock:\n            return iter(list(self._set))\n\n    @classmethod\n    def __get_pydantic_core_schema__(\n        cls, source_type: Any, handler: GetCoreSchemaHandler\n    ) -> core_schema.CoreSchema:\n        return core_schema.no_info_after_validator_function(\n            cls.validate, handler(set[ST])\n        )\n\n    @classmethod\n    def validate(cls, v: Any) -> \"ThreadSafeSet[ST]\":\n        if isinstance(v, set):\n            return ThreadSafeSet(v)\n        return v\n\n    def __deepcopy__(self, memo: Any) -> \"ThreadSafeSet[ST]\":\n        with self.lock:\n            return ThreadSafeSet(copy.deepcopy(self._set))\n\n    def add(self, item: ST) -> None:\n        \"\"\"Add an item to the set atomically.\"\"\"\n        with self.lock:\n            self._set.add(item)\n\n    def discard(self, item: ST) -> None:\n        \"\"\"Remove an item if present, atomically.\"\"\"\n        with self.lock:\n            self._set.discard(item)\n\n    def remove(self, item: ST) -> None:\n        \"\"\"Remove an item, raise KeyError if not present, atomically.\"\"\"\n        with self.lock:\n            self._set.remove(item)\n\n    def clear(self) -> None:\n        \"\"\"Remove all items from the set atomically.\"\"\"\n        with self.lock:\n            self._set.clear()\n\n    def copy(self) -> set[ST]:\n        \"\"\"Return a shallow copy of the set atomically.\"\"\"\n        with self.lock:\n            return self._set.copy()\n\n    def update(self, *others: set[ST]) -> None:\n        \"\"\"Update the set with items from other sets atomically.\"\"\"\n        with self.lock:\n            for other in others:\n                self._set.update(other)\n\n    def check_and_add(self, item: ST) -> bool:\n        \"\"\"\n        Atomically check if item exists and add it if not.\n        Returns True if the item was already present, False if it was added.\n        This prevents race conditions in check-then-add patterns.\n        \"\"\"\n        with self.lock:\n            if item in self._set:\n                return True\n            self._set.add(item)\n            return False\n\n\nclass CallableProtocol(Protocol):\n    def __call__(self, *args: Any, **kwargs: Any) -> Any: ...\n\n\ndef run_functions_tuples_in_parallel(\n    functions_with_args: Sequence[tuple[CallableProtocol, tuple[Any, ...]]],\n    allow_failures: bool = False,\n    max_workers: int | None = None,\n    timeout: float | None = None,\n    timeout_callback: (\n        Callable[[int, CallableProtocol, tuple[Any, ...]], Any] | None\n    ) = None,\n) -> list[Any]:\n    \"\"\"\n    Executes multiple functions in parallel and returns a list of the results for each function.\n    This function preserves contextvars across threads, which is important for maintaining\n    context like tenant IDs in database sessions.\n\n    Args:\n        functions_with_args: List of tuples each containing the function callable and a tuple of arguments.\n        allow_failures: if set to True, then the function result will just be None\n        max_workers: Max number of worker threads\n        timeout: Optional wall-clock timeout in seconds. If any function hasn't completed\n            within this time, it will be considered timed out. When timeout is set, threads\n            that exceed the timeout will continue running in the background but their results\n            will not be awaited. IMPORTANT: because the thread continues to run in the background,\n            it can continue to consume resources and updated shared state objects even though the caller\n            has moved on.\n        timeout_callback: Optional callback for handling timeouts. Called with (index, func, args)\n            for each timed-out function. If provided, its return value is used as the result.\n            If not provided and allow_failures is False, TimeoutError is raised.\n            If not provided and allow_failures is True, None is returned for timed-out functions.\n\n    Returns:\n        list: A list of results from each function, in the same order as the input functions.\n    \"\"\"\n    workers = (\n        min(max_workers, len(functions_with_args))\n        if max_workers is not None\n        else len(functions_with_args)\n    )\n\n    if workers <= 0:\n        return []\n\n    results: list[tuple[int, Any]] = []\n    executor = ThreadPoolExecutor(max_workers=workers)\n\n    try:\n        # The primary reason for propagating contextvars is to allow acquiring a db session\n        # that respects tenant id. Context.run is expected to be low-overhead, but if we later\n        # find that it is increasing latency we can make using it optional.\n        future_to_index = {\n            executor.submit(contextvars.copy_context().run, func, *args): i\n            for i, (func, args) in enumerate(functions_with_args)\n        }\n\n        if timeout is not None:\n            # Wait for completion or timeout\n            done, not_done = wait(future_to_index.keys(), timeout=timeout)\n\n            # Process completed futures\n            for future in done:\n                index = future_to_index[future]\n                try:\n                    results.append((index, future.result()))\n                except Exception as e:\n                    logger.exception(f\"Function at index {index} failed due to {e}\")\n                    results.append((index, None))\n                    if not allow_failures:\n                        raise\n\n            # Process timed-out futures\n            for future in not_done:\n                index = future_to_index[future]\n                func, args = functions_with_args[index]\n                logger.warning(\n                    f\"Function at index {index} timed out after {timeout} seconds\"\n                )\n\n                if timeout_callback:\n                    timeout_result = timeout_callback(index, func, args)\n                    results.append((index, timeout_result))\n                else:\n                    results.append((index, None))\n                    if not allow_failures:\n                        raise TimeoutError(\n                            f\"Function at index {index} timed out after {timeout} seconds\"\n                        )\n\n                # Attempt to cancel (only effective if not yet started)\n                future.cancel()\n        else:\n            for future in as_completed(future_to_index):\n                index = future_to_index[future]\n                try:\n                    results.append((index, future.result()))\n                except Exception as e:\n                    logger.exception(f\"Function at index {index} failed due to {e}\")\n                    results.append((index, None))\n\n                    if not allow_failures:\n                        raise\n    finally:\n        # When timeout is used, don't wait for timed-out threads to complete\n        # (they will continue running in the background)\n        # When no timeout, wait for all threads to complete (original behavior)\n        executor.shutdown(wait=(timeout is None))\n\n    results.sort(key=lambda x: x[0])\n    return [result for index, result in results]\n\n\nclass FunctionCall(Generic[R]):\n    \"\"\"\n    Container for run_functions_in_parallel, fetch the results from the output of\n    run_functions_in_parallel via the FunctionCall.result_id.\n    \"\"\"\n\n    def __init__(\n        self, func: Callable[..., R], args: tuple = (), kwargs: dict | None = None\n    ):\n        self.func = func\n        self.args = args\n        self.kwargs = kwargs if kwargs is not None else {}\n        self.result_id = str(uuid.uuid4())\n\n    def execute(self) -> R:\n        return self.func(*self.args, **self.kwargs)\n\n\ndef run_functions_in_parallel(\n    function_calls: list[FunctionCall],\n    allow_failures: bool = False,\n) -> dict[str, Any]:\n    \"\"\"\n    Executes a list of FunctionCalls in parallel and stores the results in a dictionary where the keys\n    are the result_id of the FunctionCall and the values are the results of the call.\n    \"\"\"\n    results: dict[str, Any] = {}\n\n    if len(function_calls) == 0:\n        return results\n\n    with ThreadPoolExecutor(max_workers=len(function_calls)) as executor:\n        future_to_id = {\n            executor.submit(\n                contextvars.copy_context().run, func_call.execute\n            ): func_call.result_id\n            for func_call in function_calls\n        }\n\n        for future in as_completed(future_to_id):\n            result_id = future_to_id[future]\n            try:\n                results[result_id] = future.result()\n            except Exception as e:\n                logger.exception(f\"Function with ID {result_id} failed due to {e}\")\n                results[result_id] = None\n\n                if not allow_failures:\n                    raise\n\n    return results\n\n\ndef run_async_sync_no_cancel(coro: Awaitable[T]) -> T:\n    \"\"\"\n    async-to-sync converter. Basically just executes asyncio.run in a separate thread.\n    Which is probably somehow inefficient or not ideal but fine for now.\n    \"\"\"\n    context = contextvars.copy_context()\n    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:\n        future: concurrent.futures.Future[T] = executor.submit(\n            context.run,  # type: ignore[arg-type]\n            asyncio.run,\n            coro,\n        )\n        return future.result()\n\n\ndef run_multiple_in_background(\n    funcs: list[Callable[[], None]],\n    thread_name_prefix: str = \"worker\",\n) -> ThreadPoolExecutor:\n    \"\"\"Submit multiple callables to a ``ThreadPoolExecutor`` with context propagation.\n\n    Copies the current ``contextvars`` context once and runs every callable\n    inside that copy, which is important for preserving tenant IDs and other\n    context-local state across threads.\n\n    Returns the executor so the caller can ``shutdown()`` when done.\n    \"\"\"\n    ctx = contextvars.copy_context()\n    executor = ThreadPoolExecutor(\n        max_workers=len(funcs), thread_name_prefix=thread_name_prefix\n    )\n    for func in funcs:\n        executor.submit(ctx.run, func)\n    return executor\n\n\nclass TimeoutThread(threading.Thread, Generic[R]):\n    def __init__(\n        self, timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any\n    ):\n        super().__init__()\n        self.timeout = timeout\n        self.func = func\n        self.args = args\n        self.kwargs = kwargs\n        self.exception: Exception | None = None\n\n    def run(self) -> None:\n        try:\n            self.result = self.func(*self.args, **self.kwargs)\n        except Exception as e:\n            self.exception = e\n\n    def end(self) -> None:\n        raise TimeoutError(\n            f\"Function {self.func.__name__} timed out after {self.timeout} seconds\"\n        )\n\n\ndef run_with_timeout(\n    timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any\n) -> R:\n    \"\"\"\n    Executes a function with a timeout. If the function doesn't complete within the specified\n    timeout, raises TimeoutError.\n    \"\"\"\n    context = contextvars.copy_context()\n    task = TimeoutThread(timeout, context.run, func, *args, **kwargs)\n    task.start()\n    task.join(timeout)\n\n    if task.exception is not None:\n        raise task.exception\n    if task.is_alive():\n        task.end()\n\n    return task.result\n\n\n# NOTE: this function should really only be used when run_functions_tuples_in_parallel is\n# difficult to use. It's up to the programmer to call wait_on_background on the thread after\n# the code you want to run in parallel is finished. As with all python thread parallelism,\n# this is only useful for I/O bound tasks.\ndef run_in_background(\n    func: Callable[..., R], *args: Any, **kwargs: Any\n) -> TimeoutThread[R]:\n    \"\"\"\n    Runs a function in a background thread. Returns a TimeoutThread object that can be used\n    to wait for the function to finish with wait_on_background.\n    \"\"\"\n    context = contextvars.copy_context()\n    # Timeout not used in the non-blocking case\n    task = TimeoutThread(-1, context.run, func, *args, **kwargs)\n    task.start()\n    return cast(TimeoutThread[R], task)\n\n\ndef wait_on_background(task: TimeoutThread[R]) -> R:\n    \"\"\"\n    Used in conjunction with run_in_background. blocks until the task is finished,\n    then returns the result of the task.\n    \"\"\"\n    task.join()\n\n    if task.exception is not None:\n        raise task.exception\n\n    return task.result\n\n\ndef _next_or_none(ind: int, gen: Iterator[R]) -> tuple[int, R | None]:\n    return ind, next(gen, None)\n\n\ndef parallel_yield(gens: list[Iterator[R]], max_workers: int = 10) -> Iterator[R]:\n    \"\"\"\n    Runs the list of generators with thread-level parallelism, yielding\n    results as available. The asynchronous nature of this yielding means\n    that stopping the returned iterator early DOES NOT GUARANTEE THAT NO\n    FURTHER ITEMS WERE PRODUCED by the input gens. Only use this function\n    if you are consuming all elements from the generators OR it is acceptable\n    for some extra generator code to run and not have the result(s) yielded.\n    \"\"\"\n    with ThreadPoolExecutor(max_workers=max_workers) as executor:\n        future_to_index: dict[Future[tuple[int, R | None]], int] = {\n            executor.submit(_next_or_none, ind, gen): ind\n            for ind, gen in enumerate(gens)\n        }\n\n        next_ind = len(gens)\n        while future_to_index:\n            done, _ = wait(future_to_index, return_when=FIRST_COMPLETED)\n            for future in done:\n                ind, result = future.result()\n                if result is not None:\n                    yield result\n                    future_to_index[executor.submit(_next_or_none, ind, gens[ind])] = (\n                        next_ind\n                    )\n                    next_ind += 1\n                del future_to_index[future]\n\n\ndef parallel_yield_from_funcs(\n    funcs: list[Callable[..., R]],\n    max_workers: int = 10,\n) -> Iterator[R]:\n    \"\"\"\n    Runs the list of functions with thread-level parallelism, yielding\n    results as available. The asynchronous nature of this yielding means\n    that stopping the returned iterator early DOES NOT GUARANTEE THAT NO\n    FURTHER ITEMS WERE PRODUCED by the input funcs. Only use this function\n    if you are consuming all elements from the functions OR it is acceptable\n    for some extra function code to run and not have the result(s) yielded.\n    \"\"\"\n\n    def func_wrapper(func: Callable[[], R]) -> Iterator[R]:\n        yield func()\n\n    yield from parallel_yield(\n        [func_wrapper(func) for func in funcs], max_workers=max_workers\n    )\n"
  },
  {
    "path": "backend/onyx/utils/timing.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom functools import wraps\nfrom inspect import signature\nfrom typing import Any\nfrom typing import cast\nfrom typing import TypeVar\n\nfrom onyx.utils.logger import setup_logger\nfrom onyx.utils.telemetry import optional_telemetry\nfrom onyx.utils.telemetry import RecordType\n\nlogger = setup_logger()\n\nF = TypeVar(\"F\", bound=Callable)\nFG = TypeVar(\"FG\", bound=Callable[..., Generator | Iterator])\n\n\ndef log_function_time(\n    func_name: str | None = None,\n    print_only: bool = False,\n    debug_only: bool = False,\n    include_args: bool = False,\n    include_args_subset: dict[str, Callable[[Any], Any]] | None = None,\n) -> Callable[[F], F]:\n    \"\"\"Decorates a function to log the time it takes to execute.\n\n    Args:\n        func_name: The name of the function to log. If None uses func.__name__.\n            Defaults to None.\n        print_only: If False, also sends the log to telemetry. Defaults to\n            False.\n        debug_only: If True, logs at the debug level. If False, logs at the\n            notice level. Defaults to False.\n        include_args: Whether to include the full args and kwargs in the log.\n            Clobbers include_args_subset if True. Defaults to False.\n        include_args_subset: An optional dict mapping arg names to callables to\n            apply the arg value before logging. Only args supplied in the dict\n            will be logged. Clobbered by include_args if True. Defaults to None.\n\n    Returns:\n        The decorated function.\n    \"\"\"\n\n    def decorator(func: F) -> F:\n        @wraps(func)\n        def wrapped_func(*args: Any, **kwargs: Any) -> Any:\n            # Elapsed time should use monotonic.\n            start_time = time.monotonic()\n            result = func(*args, **kwargs)\n            elapsed_time = time.monotonic() - start_time\n            elapsed_time_str = f\"{elapsed_time:.3f}\"\n            log_name = func_name or func.__name__\n            args_str = \"\"\n            if include_args:\n                args_str = f\" args={args} kwargs={kwargs}\"\n            elif include_args_subset:\n                sig = signature(func)\n                bind = sig.bind(*args, **kwargs)\n                bind.apply_defaults()\n                for arg in include_args_subset:\n                    if arg in bind.arguments:\n                        arg_val = include_args_subset[arg](bind.arguments[arg])\n                        args_str += f\" {arg}={arg_val}\"\n            final_log = f\"{log_name}{args_str} took {elapsed_time_str} seconds.\"\n            if debug_only:\n                logger.debug(final_log)\n            else:\n                # These are generally more important logs so the level is a bit\n                # higher.\n                logger.notice(final_log)\n\n            if not print_only:\n                user = kwargs.get(\"user\")\n                optional_telemetry(\n                    record_type=RecordType.LATENCY,\n                    data={\"function\": log_name, \"latency\": str(elapsed_time_str)},\n                    user_id=str(user.id) if user else \"Unknown\",\n                )\n\n            return result\n\n        return cast(F, wrapped_func)\n\n    return decorator\n\n\ndef log_generator_function_time(\n    func_name: str | None = None, print_only: bool = False\n) -> Callable[[FG], FG]:\n    def decorator(func: FG) -> FG:\n        @wraps(func)\n        def wrapped_func(*args: Any, **kwargs: Any) -> Any:\n            start_time = time.monotonic()\n            user = kwargs.get(\"user\")\n            gen = func(*args, **kwargs)\n            try:\n                value = next(gen)\n                while True:\n                    yield value\n                    value = next(gen)\n            except StopIteration:\n                pass\n            finally:\n                elapsed_time_str = f\"{time.monotonic() - start_time:.3f}\"\n                log_name = func_name or func.__name__\n                logger.info(f\"{log_name} took {elapsed_time_str} seconds\")\n                if not print_only:\n                    optional_telemetry(\n                        record_type=RecordType.LATENCY,\n                        data={\"function\": log_name, \"latency\": str(elapsed_time_str)},\n                        user_id=str(user.id) if user else \"Unknown\",\n                    )\n\n        return cast(FG, wrapped_func)\n\n    return decorator\n"
  },
  {
    "path": "backend/onyx/utils/url.py",
    "content": "import ipaddress\nimport socket\nfrom typing import Any\nfrom urllib.parse import parse_qs\nfrom urllib.parse import urlencode\nfrom urllib.parse import urlparse\nfrom urllib.parse import urlunparse\n\nimport requests\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n# Hostnames that should always be blocked\nBLOCKED_HOSTNAMES = {\n    # Localhost variations\n    \"localhost\",\n    # Cloud metadata endpoints (defense-in-depth, IPs also blocked via _is_ip_private_or_reserved)\n    \"169.254.169.254\",  # AWS/Azure/GCP metadata IP\n    \"fd00:ec2::254\",  # AWS IPv6 metadata\n    \"metadata.azure.com\",\n    \"metadata.google.internal\",\n    \"metadata.gke.internal\",\n    # Kubernetes internal\n    \"kubernetes.default\",\n    \"kubernetes.default.svc\",\n    \"kubernetes.default.svc.cluster.local\",\n}\n\n\nclass SSRFException(Exception):\n    \"\"\"Exception raised when an SSRF attempt is detected.\"\"\"\n\n\ndef _is_ip_private_or_reserved(ip_str: str) -> bool:\n    \"\"\"\n    Check if an IP address is private, reserved, or otherwise not suitable\n    for external requests.\n\n    Uses Python's ipaddress module which handles:\n    - Private addresses (10.x.x.x, 172.16-31.x.x, 192.168.x.x)\n    - Loopback addresses (127.x.x.x, ::1)\n    - Link-local addresses (169.254.x.x including cloud metadata IPs, fe80::/10)\n    - Reserved addresses\n    - Multicast addresses\n    - Unspecified addresses (0.0.0.0, ::)\n    \"\"\"\n    try:\n        ip = ipaddress.ip_address(ip_str)\n        # is_global returns True only for globally routable unicast addresses\n        # This excludes private, loopback, link-local, reserved, and unspecified\n        # We also need to explicitly check multicast as it's not covered by is_global\n        return not ip.is_global or ip.is_multicast\n    except ValueError:\n        # If we can't parse the IP, consider it unsafe\n        return True\n\n\ndef _validate_and_resolve_url(url: str) -> tuple[str, str, int]:\n    \"\"\"\n    Validate a URL for SSRF and resolve it to a safe IP address.\n\n    Returns:\n        Tuple of (validated_ip, original_hostname, port)\n\n    Raises:\n        SSRFException: If the URL could be used for SSRF attack\n        ValueError: If the URL is malformed\n    \"\"\"\n    if not url:\n        raise ValueError(\"URL cannot be empty\")\n\n    # Parse the URL\n    try:\n        parsed = urlparse(url)\n    except Exception as e:\n        raise ValueError(f\"Invalid URL format: {e}\")\n\n    # Validate scheme\n    if parsed.scheme not in (\"http\", \"https\"):\n        raise SSRFException(\n            f\"Invalid URL scheme '{parsed.scheme}'. Only http and https are allowed.\"\n        )\n\n    # Get hostname\n    hostname = parsed.hostname\n    if not hostname:\n        raise ValueError(\"URL must contain a hostname\")\n\n    # Check for blocked hostnames\n    hostname_lower = hostname.lower()\n    if hostname_lower in BLOCKED_HOSTNAMES:\n        raise SSRFException(f\"Access to hostname '{hostname}' is not allowed.\")\n\n    # Check for common SSRF bypass attempts\n    # Block URLs with credentials (user:pass@host)\n    if parsed.username or parsed.password:\n        raise SSRFException(\"URLs with embedded credentials are not allowed.\")\n\n    port = parsed.port or (443 if parsed.scheme == \"https\" else 80)\n\n    # Check if the hostname is already an IP address\n    try:\n        ip = ipaddress.ip_address(hostname)\n        if _is_ip_private_or_reserved(str(ip)):\n            raise SSRFException(\n                f\"Access to internal/private IP address '{hostname}' is not allowed.\"\n            )\n        return str(ip), hostname, port\n    except ValueError:\n        # Not an IP address, proceed with DNS resolution\n        pass\n\n    # Resolve hostname to IP addresses\n    try:\n        addr_info = socket.getaddrinfo(hostname, port)\n    except socket.gaierror as e:\n        logger.warning(f\"DNS resolution failed for hostname '{hostname}': {e}\")\n        raise SSRFException(f\"Could not resolve hostname '{hostname}': {e}\")\n\n    if not addr_info:\n        raise SSRFException(f\"Could not resolve hostname '{hostname}'\")\n\n    # Find the first valid (non-private) IP address\n    validated_ip = None\n    for info in addr_info:\n        ip_str = info[4][0]\n        if _is_ip_private_or_reserved(str(ip_str)):\n            raise SSRFException(\n                f\"Hostname '{hostname}' resolves to internal/private IP address \"\n                f\"'{ip_str}'. Access to internal networks is not allowed.\"\n            )\n        if validated_ip is None:\n            validated_ip = ip_str\n\n    if validated_ip is None:\n        raise SSRFException(f\"Could not resolve hostname '{hostname}'\")\n\n    return validated_ip, hostname, port\n\n\ndef validate_outbound_http_url(\n    url: str,\n    *,\n    allow_private_network: bool = False,\n    https_only: bool = False,\n) -> str:\n    \"\"\"\n    Validate a URL that will be used by backend outbound HTTP calls.\n\n    Args:\n        url: The URL to validate.\n        allow_private_network: If True, skip private/reserved IP checks.\n        https_only: If True, reject http:// URLs (only https:// is allowed).\n\n    Returns:\n        A normalized URL string with surrounding whitespace removed.\n\n    Raises:\n        ValueError: If URL is malformed.\n        SSRFException: If URL fails SSRF checks.\n    \"\"\"\n    normalized_url = url.strip()\n    if not normalized_url:\n        raise ValueError(\"URL cannot be empty\")\n\n    parsed = urlparse(normalized_url)\n\n    if https_only:\n        if parsed.scheme != \"https\":\n            raise SSRFException(\n                f\"Invalid URL scheme '{parsed.scheme}'. Only https is allowed.\"\n            )\n    elif parsed.scheme not in (\"http\", \"https\"):\n        raise SSRFException(\n            f\"Invalid URL scheme '{parsed.scheme}'. Only http and https are allowed.\"\n        )\n\n    if not parsed.hostname:\n        raise ValueError(\"URL must contain a hostname\")\n\n    if parsed.username or parsed.password:\n        raise SSRFException(\"URLs with embedded credentials are not allowed.\")\n\n    hostname = parsed.hostname.lower()\n    if hostname in BLOCKED_HOSTNAMES:\n        raise SSRFException(f\"Access to hostname '{parsed.hostname}' is not allowed.\")\n\n    if not allow_private_network:\n        _validate_and_resolve_url(normalized_url)\n\n    return normalized_url\n\n\nMAX_REDIRECTS = 10\n\n\ndef _make_ssrf_safe_request(\n    url: str,\n    headers: dict[str, str] | None = None,\n    timeout: float | tuple[float, float] = 15,\n    **kwargs: Any,\n) -> requests.Response:\n    \"\"\"\n    Make a single GET request with SSRF protection (no redirect following).\n\n    Returns the response which may be a redirect (3xx status).\n    \"\"\"\n    # Validate and resolve the URL to get a safe IP\n    validated_ip, original_hostname, port = _validate_and_resolve_url(url)\n\n    # Parse the URL to rebuild it with the IP\n    parsed = urlparse(url)\n\n    # Build the new URL using the validated IP\n    # For HTTPS, we need to use the original hostname for TLS verification\n    if parsed.scheme == \"https\":\n        # For HTTPS, make request to original URL but we've validated the IP\n        # The TLS handshake needs the hostname for SNI\n        # We rely on the short time window between validation and request\n        # A more robust solution would require custom SSL context\n        request_url = url\n    else:\n        # For HTTP, we can safely request directly to the IP\n        netloc = f\"{validated_ip}:{port}\" if port not in (80, 443) else validated_ip\n        request_url = urlunparse(\n            (\n                parsed.scheme,\n                netloc,\n                parsed.path,\n                parsed.params,\n                parsed.query,\n                parsed.fragment,\n            )\n        )\n\n    # Prepare headers\n    request_headers = headers.copy() if headers else {}\n\n    # Set Host header to original hostname (required for virtual hosting)\n    if parsed.scheme == \"http\":\n        request_headers[\"Host\"] = (\n            f\"{original_hostname}:{port}\" if port != 80 else original_hostname\n        )\n\n    # Disable automatic redirects to prevent SSRF bypass via redirect\n    return requests.get(\n        request_url,\n        headers=request_headers,\n        timeout=timeout,\n        allow_redirects=False,\n        **kwargs,\n    )\n\n\ndef ssrf_safe_get(\n    url: str,\n    headers: dict[str, str] | None = None,\n    timeout: float | tuple[float, float] = 15,\n    follow_redirects: bool = True,\n    **kwargs: Any,\n) -> requests.Response:\n    \"\"\"\n    Make a GET request with SSRF protection.\n\n    This function resolves the hostname, validates the IP is not private/internal,\n    and makes the request directly to the validated IP to prevent DNS rebinding attacks.\n    Redirects are followed safely by validating each redirect URL.\n\n    Args:\n        url: The URL to fetch\n        headers: Optional headers to include in the request\n        timeout: Request timeout in seconds\n        follow_redirects: Whether to follow redirects (each redirect URL is validated)\n        **kwargs: Additional arguments passed to requests.get()\n\n    Returns:\n        requests.Response object\n\n    Raises:\n        SSRFException: If the URL could be used for SSRF attack\n        ValueError: If the URL is malformed\n        requests.RequestException: If the request fails\n    \"\"\"\n    response = _make_ssrf_safe_request(url, headers, timeout, **kwargs)\n\n    if not follow_redirects:\n        return response\n\n    # Manually follow redirects while validating each redirect URL\n    redirect_count = 0\n    current_url = url\n\n    while response.is_redirect and redirect_count < MAX_REDIRECTS:\n        redirect_count += 1\n\n        # Get the redirect location\n        redirect_url = response.headers.get(\"Location\")\n        if not redirect_url:\n            break\n\n        # Handle relative redirects\n        if not redirect_url.startswith((\"http://\", \"https://\")):\n            parsed_current = urlparse(current_url)\n            if redirect_url.startswith(\"/\"):\n                redirect_url = (\n                    f\"{parsed_current.scheme}://{parsed_current.netloc}{redirect_url}\"\n                )\n            else:\n                # Relative path\n                base_path = parsed_current.path.rsplit(\"/\", 1)[0]\n                redirect_url = f\"{parsed_current.scheme}://{parsed_current.netloc}{base_path}/{redirect_url}\"\n\n        # Validate and follow the redirect (this will raise SSRFException if invalid)\n        current_url = redirect_url\n        response = _make_ssrf_safe_request(redirect_url, headers, timeout, **kwargs)\n\n    if response.is_redirect and redirect_count >= MAX_REDIRECTS:\n        raise SSRFException(f\"Too many redirects (max {MAX_REDIRECTS})\")\n\n    return response\n\n\ndef normalize_url(url: str) -> str:\n    \"\"\"\n    Normalize a URL by removing query parameters and fragments.\n    This is used to create consistent cache keys for deduplication.\n\n    Args:\n        url: The original URL\n\n    Returns:\n        Normalized URL (scheme + netloc + path + params only)\n    \"\"\"\n    parsed_url = urlparse(url)\n\n    # Reconstruct the URL without query string and fragment\n    normalized = urlunparse(\n        (\n            parsed_url.scheme,\n            parsed_url.netloc,\n            parsed_url.path,\n            parsed_url.params,\n            \"\",\n            \"\",\n        )\n    )\n\n    return normalized\n\n\ndef add_url_params(url: str, params: dict) -> str:\n    \"\"\"\n    Add parameters to a URL, handling existing parameters properly.\n\n    Args:\n        url: The original URL\n        params: Dictionary of parameters to add\n\n    Returns:\n        URL with added parameters\n    \"\"\"\n    # Parse the URL\n    parsed_url = urlparse(url)\n\n    # Get existing query parameters\n    query_params = parse_qs(parsed_url.query)\n\n    # Update with new parameters\n    for key, value in params.items():\n        query_params[key] = [value]\n\n    # Build the new query string\n    new_query = urlencode(query_params, doseq=True)\n\n    # Reconstruct the URL with the new query string\n    new_url = urlunparse(\n        (\n            parsed_url.scheme,\n            parsed_url.netloc,\n            parsed_url.path,\n            parsed_url.params,\n            new_query,\n            parsed_url.fragment,\n        )\n    )\n\n    return new_url\n"
  },
  {
    "path": "backend/onyx/utils/variable_functionality.py",
    "content": "import functools\nimport importlib\nimport inspect\nimport os\nfrom typing import Any\nfrom typing import TypeVar\n\nfrom onyx.configs.app_configs import API_SERVER_HOST\nfrom onyx.configs.app_configs import API_SERVER_PROTOCOL\nfrom onyx.configs.app_configs import API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS\nfrom onyx.configs.app_configs import APP_API_PREFIX\nfrom onyx.configs.app_configs import APP_PORT\nfrom onyx.configs.app_configs import DEV_MODE\nfrom onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass OnyxVersion:\n    def __init__(self) -> None:\n        self._is_ee = False\n\n    def set_ee(self) -> None:\n        self._is_ee = True\n\n    def unset_ee(self) -> None:\n        self._is_ee = False\n\n    def is_ee_version(self) -> bool:\n        return self._is_ee\n\n\nglobal_version = OnyxVersion()\n\n# Read LICENSE_ENFORCEMENT_ENABLED directly since it's in EE configs\n# This allows EE code to load when license enforcement is enabled,\n# even without ENABLE_PAID_ENTERPRISE_EDITION_FEATURES being set.\n# Eventually, ENABLE_PAID_ENTERPRISE_EDITION_FEATURES will be removed\n# and license enforcement will be the only mechanism for EE features.\n_LICENSE_ENFORCEMENT_ENABLED = (\n    os.environ.get(\"LICENSE_ENFORCEMENT_ENABLED\", \"true\").lower() == \"true\"\n)\n\n\ndef set_is_ee_based_on_env_variable() -> None:\n    \"\"\"Enable Enterprise Edition based on environment configuration.\n\n    EE is enabled if either:\n    - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true (legacy/rollout flag)\n    - LICENSE_ENFORCEMENT_ENABLED=true (license-based gating)\n\n    When LICENSE_ENFORCEMENT_ENABLED is true, EE code is loaded but access\n    to EE-only features is controlled by the license enforcement middleware.\n    \"\"\"\n    if global_version.is_ee_version():\n        return\n\n    if ENTERPRISE_EDITION_ENABLED:\n        logger.notice(\n            \"Enterprise Edition enabled via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\"\n        )\n        global_version.set_ee()\n    elif _LICENSE_ENFORCEMENT_ENABLED:\n        logger.notice(\"Enterprise Edition enabled via LICENSE_ENFORCEMENT_ENABLED\")\n        global_version.set_ee()\n\n\n@functools.lru_cache(maxsize=128)\ndef fetch_versioned_implementation(module: str, attribute: str) -> Any:\n    \"\"\"\n    Fetches a versioned implementation of a specified attribute from a given module.\n    This function first checks if the application is running in an Enterprise Edition (EE)\n    context. If so, it attempts to import the attribute from the EE-specific module.\n    If the module or attribute is not found, it falls back to the default module or\n    raises the appropriate exception depending on the context.\n\n    Args:\n        module (str): The name of the module from which to fetch the attribute.\n        attribute (str): The name of the attribute to fetch from the module.\n\n    Returns:\n        Any: The fetched implementation of the attribute.\n\n    Raises:\n        ModuleNotFoundError: If the module cannot be found and the error is not related to\n                             the Enterprise Edition fallback logic.\n\n    Logs:\n        Logs debug information about the fetching process and warnings if the versioned\n        implementation cannot be found or loaded.\n    \"\"\"\n    logger.debug(\"Fetching versioned implementation for %s.%s\", module, attribute)\n    is_ee = global_version.is_ee_version()\n\n    module_full = f\"ee.{module}\" if is_ee else module\n    try:\n        return getattr(importlib.import_module(module_full), attribute)\n    except ModuleNotFoundError as e:\n        logger.warning(\n            \"Failed to fetch versioned implementation for %s.%s: %s\",\n            module_full,\n            attribute,\n            e,\n        )\n\n        if is_ee:\n            if \"ee.onyx\" not in str(e):\n                # If it's a non Onyx related import failure, this is likely because\n                # a dependent library has not been installed. Should raise this failure\n                # instead of letting the server start up\n                raise e\n\n            # Use the MIT version as a fallback, this allows us to develop MIT\n            # versions independently and later add additional EE functionality\n            # similar to feature flagging\n            return getattr(importlib.import_module(module), attribute)\n\n        raise\n\n\nT = TypeVar(\"T\")\n\n\ndef fetch_versioned_implementation_with_fallback(\n    module: str, attribute: str, fallback: T\n) -> T:\n    \"\"\"\n    Attempts to fetch a versioned implementation of a specified attribute from a given module.\n    If the attempt fails (e.g., due to an import error or missing attribute), the function logs\n    a warning and returns the provided fallback implementation.\n\n    Args:\n        module (str): The name of the module from which to fetch the attribute.\n        attribute (str): The name of the attribute to fetch from the module.\n        fallback (T): The fallback implementation to return if fetching the attribute fails.\n\n    Returns:\n        T: The fetched implementation if successful, otherwise the provided fallback.\n    \"\"\"\n    try:\n        return fetch_versioned_implementation(module, attribute)\n    except Exception:\n        return fallback\n\n\ndef noop_fallback(*args: Any, **kwargs: Any) -> None:\n    \"\"\"\n    A no-op (no operation) fallback function that accepts any arguments but does nothing.\n    This is often used as a default or placeholder callback function.\n\n    Args:\n        *args (Any): Positional arguments, which are ignored.\n        **kwargs (Any): Keyword arguments, which are ignored.\n\n    Returns:\n        None\n    \"\"\"\n\n\ndef fetch_ee_implementation_or_noop(\n    module: str, attribute: str, noop_return_value: Any = None\n) -> Any:\n    \"\"\"\n    Fetches an EE implementation if EE is enabled, otherwise returns a no-op function.\n    Raises an exception if EE is enabled but the fetch fails.\n\n    Args:\n        module (str): The name of the module from which to fetch the attribute.\n        attribute (str): The name of the attribute to fetch from the module.\n\n    Returns:\n        Any: The fetched EE implementation if successful and EE is enabled, otherwise a no-op function.\n\n    Raises:\n        Exception: If EE is enabled but the fetch fails.\n    \"\"\"\n    if not global_version.is_ee_version():\n        if inspect.iscoroutinefunction(noop_return_value):\n\n            async def async_noop(*args: Any, **kwargs: Any) -> Any:\n                return await noop_return_value(*args, **kwargs)\n\n            return async_noop\n\n        else:\n\n            def sync_noop(*args: Any, **kwargs: Any) -> Any:  # noqa: ARG001\n                return noop_return_value\n\n            return sync_noop\n    try:\n        return fetch_versioned_implementation(module, attribute)\n    except Exception as e:\n        logger.error(f\"Failed to fetch implementation for {module}.{attribute}: {e}\")\n        raise\n\n\ndef build_api_server_url_for_http_requests(\n    respect_env_override_if_set: bool = False,\n) -> str:\n    \"\"\"\n    Builds the API server URL for HTTP requests.\n    \"\"\"\n    if DEV_MODE:\n        url = f\"http://127.0.0.1:{APP_PORT}\"\n    elif respect_env_override_if_set and API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS:\n        url = API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS.rstrip(\"/\")\n    else:\n        url = f\"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{APP_PORT}\"\n\n    if APP_API_PREFIX:\n        url += f\"/{APP_API_PREFIX.strip('/')}\"\n\n    return url\n"
  },
  {
    "path": "backend/onyx/utils/web_content.py",
    "content": "from __future__ import annotations\n\nimport io\nfrom urllib.parse import unquote\nfrom urllib.parse import urlparse\n\nfrom bs4.dammit import UnicodeDammit\n\nfrom onyx.file_processing.extract_file_text import read_pdf_file\n\nPDF_MIME_TYPES = (\n    \"application/pdf\",\n    \"application/x-pdf\",\n    \"application/acrobat\",\n    \"application/vnd.pdf\",\n    \"text/pdf\",\n    \"text/x-pdf\",\n)\n\n\ndef _charset_from_content_type(content_type: str | None) -> str | None:\n    if not content_type:\n        return None\n    for part in content_type.split(\";\"):\n        part = part.strip()\n        if part.lower().startswith(\"charset=\"):\n            charset = part.split(\"=\", 1)[-1].strip().strip(\"\\\"'\")\n            return charset or None\n    return None\n\n\ndef decode_html_bytes(\n    content: bytes,\n    content_type: str | None = None,\n    fallback_encoding: str | None = None,\n) -> str:\n    override_encodings: list[str] = []\n    charset = _charset_from_content_type(content_type)\n    if charset:\n        override_encodings.append(charset)\n    if fallback_encoding and fallback_encoding not in override_encodings:\n        override_encodings.append(fallback_encoding)\n\n    unicode_dammit = UnicodeDammit(\n        content, override_encodings=override_encodings or None\n    )\n    if unicode_dammit.unicode_markup is not None:\n        return unicode_dammit.unicode_markup\n\n    encoding = override_encodings[0] if override_encodings else \"utf-8\"\n    return content.decode(encoding, errors=\"replace\")\n\n\ndef is_pdf_mime_type(content_type: str | None) -> bool:\n    if not content_type:\n        return False\n    lowered = content_type.lower()\n    return any(pdf_type in lowered for pdf_type in PDF_MIME_TYPES)\n\n\ndef is_pdf_url(url: str) -> bool:\n    if not url:\n        return False\n    parsed = urlparse(url)\n    return parsed.path.lower().endswith(\".pdf\")\n\n\ndef has_pdf_signature(content_sniff: bytes | None) -> bool:\n    if not content_sniff:\n        return False\n    return content_sniff.lstrip().startswith(b\"%PDF-\")\n\n\ndef is_pdf_resource(\n    url: str,\n    content_type: str | None = None,\n    content_sniff: bytes | None = None,\n) -> bool:\n    return (\n        is_pdf_mime_type(content_type)\n        or is_pdf_url(url)\n        or has_pdf_signature(content_sniff)\n    )\n\n\ndef extract_pdf_text(content: bytes) -> tuple[str, dict[str, str | list[str]]]:\n    text_content, metadata, _ = read_pdf_file(io.BytesIO(content))\n    return text_content or \"\", normalize_metadata(metadata)\n\n\ndef title_from_pdf_metadata(metadata: dict[str, str | list[str]]) -> str:\n    if not metadata:\n        return \"\"\n    for key in (\"Title\", \"title\"):\n        value = metadata.get(key)\n        if isinstance(value, str) and value.strip():\n            return value.strip()\n        if isinstance(value, list):\n            items = [item.strip() for item in value if isinstance(item, str)]\n            if items:\n                return \", \".join(items)\n    return \"\"\n\n\ndef normalize_metadata(metadata: dict[str, object]) -> dict[str, str | list[str]]:\n    sanitized: dict[str, str | list[str]] = {}\n    for key, value in metadata.items():\n        if isinstance(value, str):\n            if value.strip():\n                sanitized[key] = value\n            continue\n        if isinstance(value, list):\n            items = [item.strip() for item in value if isinstance(item, str)]\n            if items:\n                sanitized[key] = items\n            continue\n        if value is not None:\n            sanitized[key] = str(value)\n    return sanitized\n\n\ndef title_from_url(url: str) -> str:\n    parsed = urlparse(url)\n    filename = parsed.path.rsplit(\"/\", 1)[-1]\n    if not filename:\n        return \"\"\n    return unquote(filename)\n"
  },
  {
    "path": "backend/onyx/voice/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/voice/factory.py",
    "content": "from onyx.db.models import VoiceProvider\nfrom onyx.voice.interface import VoiceProviderInterface\n\n\ndef get_voice_provider(provider: VoiceProvider) -> VoiceProviderInterface:\n    \"\"\"\n    Factory function to get the appropriate voice provider implementation.\n\n    Args:\n        provider: VoiceProvider model instance (can be from DB or constructed temporarily)\n\n    Returns:\n        VoiceProviderInterface implementation\n\n    Raises:\n        ValueError: If provider_type is not supported\n    \"\"\"\n    provider_type = provider.provider_type.lower()\n\n    # Handle both SensitiveValue (from DB) and plain string (from temp model)\n    if provider.api_key is None:\n        api_key = None\n    elif hasattr(provider.api_key, \"get_value\"):\n        # SensitiveValue from database\n        api_key = provider.api_key.get_value(apply_mask=False)\n    else:\n        # Plain string from temporary model\n        api_key = provider.api_key  # type: ignore[assignment]\n    api_base = provider.api_base\n    custom_config = provider.custom_config\n    stt_model = provider.stt_model\n    tts_model = provider.tts_model\n    default_voice = provider.default_voice\n\n    if provider_type == \"openai\":\n        from onyx.voice.providers.openai import OpenAIVoiceProvider\n\n        return OpenAIVoiceProvider(\n            api_key=api_key,\n            api_base=api_base,\n            stt_model=stt_model,\n            tts_model=tts_model,\n            default_voice=default_voice,\n        )\n\n    elif provider_type == \"azure\":\n        from onyx.voice.providers.azure import AzureVoiceProvider\n\n        return AzureVoiceProvider(\n            api_key=api_key,\n            api_base=api_base,\n            custom_config=custom_config or {},\n            stt_model=stt_model,\n            tts_model=tts_model,\n            default_voice=default_voice,\n        )\n\n    elif provider_type == \"elevenlabs\":\n        from onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider\n\n        return ElevenLabsVoiceProvider(\n            api_key=api_key,\n            api_base=api_base,\n            stt_model=stt_model,\n            tts_model=tts_model,\n            default_voice=default_voice,\n        )\n\n    else:\n        raise ValueError(f\"Unsupported voice provider type: {provider_type}\")\n"
  },
  {
    "path": "backend/onyx/voice/interface.py",
    "content": "from abc import ABC\nfrom abc import abstractmethod\nfrom collections.abc import AsyncIterator\nfrom typing import Protocol\n\nfrom pydantic import BaseModel\n\n\nclass TranscriptResult(BaseModel):\n    \"\"\"Result from streaming transcription.\"\"\"\n\n    text: str\n    \"\"\"The accumulated transcript text.\"\"\"\n\n    is_vad_end: bool = False\n    \"\"\"True if VAD detected end of speech (silence). Use for auto-send.\"\"\"\n\n\nclass StreamingTranscriberProtocol(Protocol):\n    \"\"\"Protocol for streaming transcription sessions.\"\"\"\n\n    async def send_audio(self, chunk: bytes) -> None:\n        \"\"\"Send an audio chunk for transcription.\"\"\"\n        ...\n\n    async def receive_transcript(self) -> TranscriptResult | None:\n        \"\"\"\n        Receive next transcript update.\n\n        Returns:\n            TranscriptResult with accumulated text and VAD status, or None when stream ends.\n        \"\"\"\n        ...\n\n    async def close(self) -> str:\n        \"\"\"Close the session and return final transcript.\"\"\"\n        ...\n\n    def reset_transcript(self) -> None:\n        \"\"\"Reset accumulated transcript. Call after auto-send to start fresh.\"\"\"\n        ...\n\n\nclass StreamingSynthesizerProtocol(Protocol):\n    \"\"\"Protocol for streaming TTS sessions (real-time text-to-speech).\"\"\"\n\n    async def connect(self) -> None:\n        \"\"\"Establish connection to TTS provider.\"\"\"\n        ...\n\n    async def send_text(self, text: str) -> None:\n        \"\"\"Send text to be synthesized.\"\"\"\n        ...\n\n    async def receive_audio(self) -> bytes | None:\n        \"\"\"\n        Receive next audio chunk.\n\n        Returns:\n            Audio bytes, or None when stream ends.\n        \"\"\"\n        ...\n\n    async def flush(self) -> None:\n        \"\"\"Signal end of text input and wait for pending audio.\"\"\"\n        ...\n\n    async def close(self) -> None:\n        \"\"\"Close the session.\"\"\"\n        ...\n\n\nclass VoiceProviderInterface(ABC):\n    \"\"\"Abstract base class for voice providers (STT and TTS).\"\"\"\n\n    @abstractmethod\n    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:\n        \"\"\"\n        Convert audio to text (Speech-to-Text).\n\n        Args:\n            audio_data: Raw audio bytes\n            audio_format: Audio format (e.g., \"webm\", \"wav\", \"mp3\")\n\n        Returns:\n            Transcribed text\n        \"\"\"\n\n    @abstractmethod\n    def synthesize_stream(\n        self, text: str, voice: str | None = None, speed: float = 1.0\n    ) -> AsyncIterator[bytes]:\n        \"\"\"\n        Convert text to audio stream (Text-to-Speech).\n\n        Streams audio chunks progressively for lower latency playback.\n\n        Args:\n            text: Text to convert to speech\n            voice: Voice identifier (e.g., \"alloy\", \"echo\"), or None for default\n            speed: Playback speed multiplier (0.25 to 4.0)\n\n        Yields:\n            Audio data chunks\n        \"\"\"\n\n    @abstractmethod\n    async def validate_credentials(self) -> None:\n        \"\"\"\n        Validate that the provider credentials are correct by making a\n        lightweight API call. Raises on failure.\n        \"\"\"\n\n    @abstractmethod\n    def get_available_voices(self) -> list[dict[str, str]]:\n        \"\"\"\n        Get list of available voices for this provider.\n\n        Returns:\n            List of voice dictionaries with 'id' and 'name' keys\n        \"\"\"\n\n    @abstractmethod\n    def get_available_stt_models(self) -> list[dict[str, str]]:\n        \"\"\"\n        Get list of available STT models for this provider.\n\n        Returns:\n            List of model dictionaries with 'id' and 'name' keys\n        \"\"\"\n\n    @abstractmethod\n    def get_available_tts_models(self) -> list[dict[str, str]]:\n        \"\"\"\n        Get list of available TTS models for this provider.\n\n        Returns:\n            List of model dictionaries with 'id' and 'name' keys\n        \"\"\"\n\n    def supports_streaming_stt(self) -> bool:\n        \"\"\"Returns True if this provider supports streaming STT.\"\"\"\n        return False\n\n    def supports_streaming_tts(self) -> bool:\n        \"\"\"Returns True if this provider supports real-time streaming TTS.\"\"\"\n        return False\n\n    async def create_streaming_transcriber(\n        self, audio_format: str = \"webm\"\n    ) -> StreamingTranscriberProtocol:\n        \"\"\"\n        Create a streaming transcription session.\n\n        Args:\n            audio_format: Audio format being sent (e.g., \"webm\", \"pcm16\")\n\n        Returns:\n            A streaming transcriber that can send audio chunks and receive transcripts\n\n        Raises:\n            NotImplementedError: If streaming STT is not supported\n        \"\"\"\n        raise NotImplementedError(\"Streaming STT not supported by this provider\")\n\n    async def create_streaming_synthesizer(\n        self, voice: str | None = None, speed: float = 1.0\n    ) -> \"StreamingSynthesizerProtocol\":\n        \"\"\"\n        Create a streaming TTS session for real-time audio synthesis.\n\n        Args:\n            voice: Voice identifier\n            speed: Playback speed multiplier\n\n        Returns:\n            A streaming synthesizer that can send text and receive audio chunks\n\n        Raises:\n            NotImplementedError: If streaming TTS is not supported\n        \"\"\"\n        raise NotImplementedError(\"Streaming TTS not supported by this provider\")\n"
  },
  {
    "path": "backend/onyx/voice/providers/__init__.py",
    "content": ""
  },
  {
    "path": "backend/onyx/voice/providers/azure.py",
    "content": "\"\"\"Azure Speech Services voice provider for STT and TTS.\n\nAzure supports:\n- **STT**: Batch transcription via REST API (audio/wav POST) and real-time\n  streaming via the Azure Speech SDK (push audio stream with continuous\n  recognition). The SDK handles VAD natively through its recognizing/recognized\n  events.\n- **TTS**: SSML-based synthesis via REST API (streaming response) and real-time\n  synthesis via the Speech SDK. Text is escaped with ``xml.sax.saxutils.escape``\n  and attributes with ``quoteattr`` to prevent SSML injection.\n\nBoth modes support Azure cloud endpoints (region-based URLs) and self-hosted\nSpeech containers (custom endpoint URLs). The ``speech_region`` is validated to\ncontain only ``[a-z0-9-]`` to prevent URL injection.\n\nThe Azure Speech SDK (``azure-cognitiveservices-speech``) is an optional C\nextension dependency — it is imported lazily inside streaming methods so the\nprovider can still be instantiated and used for REST-based operations without it.\n\nSee https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/\nfor API reference.\n\"\"\"\n\nimport asyncio\nimport io\nimport re\nimport struct\nimport wave\nfrom collections.abc import AsyncIterator\nfrom typing import Any\nfrom urllib.parse import urlparse\nfrom xml.sax.saxutils import escape\nfrom xml.sax.saxutils import quoteattr\n\nimport aiohttp\n\nfrom onyx.utils.logger import setup_logger\nfrom onyx.voice.interface import StreamingSynthesizerProtocol\nfrom onyx.voice.interface import StreamingTranscriberProtocol\nfrom onyx.voice.interface import TranscriptResult\nfrom onyx.voice.interface import VoiceProviderInterface\n\n# SSML namespace — W3C standard for Speech Synthesis Markup Language.\n# This is a fixed W3C specification and will not change.\nSSML_NAMESPACE = \"http://www.w3.org/2001/10/synthesis\"\n\n# Common Azure Neural voices\nAZURE_VOICES = [\n    {\"id\": \"en-US-JennyNeural\", \"name\": \"Jenny (en-US, Female)\"},\n    {\"id\": \"en-US-GuyNeural\", \"name\": \"Guy (en-US, Male)\"},\n    {\"id\": \"en-US-AriaNeural\", \"name\": \"Aria (en-US, Female)\"},\n    {\"id\": \"en-US-DavisNeural\", \"name\": \"Davis (en-US, Male)\"},\n    {\"id\": \"en-US-AmberNeural\", \"name\": \"Amber (en-US, Female)\"},\n    {\"id\": \"en-US-AnaNeural\", \"name\": \"Ana (en-US, Female)\"},\n    {\"id\": \"en-US-BrandonNeural\", \"name\": \"Brandon (en-US, Male)\"},\n    {\"id\": \"en-US-ChristopherNeural\", \"name\": \"Christopher (en-US, Male)\"},\n    {\"id\": \"en-US-CoraNeural\", \"name\": \"Cora (en-US, Female)\"},\n    {\"id\": \"en-GB-SoniaNeural\", \"name\": \"Sonia (en-GB, Female)\"},\n    {\"id\": \"en-GB-RyanNeural\", \"name\": \"Ryan (en-GB, Male)\"},\n]\n\n\nclass AzureStreamingTranscriber(StreamingTranscriberProtocol):\n    \"\"\"Streaming transcription using Azure Speech SDK.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        region: str | None = None,\n        endpoint: str | None = None,\n        input_sample_rate: int = 24000,\n        target_sample_rate: int = 16000,\n    ):\n        self.api_key = api_key\n        self.region = region\n        self.endpoint = endpoint\n        self.input_sample_rate = input_sample_rate\n        self.target_sample_rate = target_sample_rate\n        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()\n        self._accumulated_transcript = \"\"\n        self._recognizer: Any = None\n        self._audio_stream: Any = None\n        self._closed = False\n        self._loop: asyncio.AbstractEventLoop | None = None\n\n    async def connect(self) -> None:\n        \"\"\"Initialize Azure Speech recognizer with push stream.\"\"\"\n        try:\n            import azure.cognitiveservices.speech as speechsdk  # type: ignore\n        except ImportError as e:\n            raise RuntimeError(\n                \"Azure Speech SDK is required for streaming STT. Install `azure-cognitiveservices-speech`.\"\n            ) from e\n\n        self._loop = asyncio.get_running_loop()\n\n        # Use endpoint for self-hosted containers, region for Azure cloud\n        if self.endpoint:\n            speech_config = speechsdk.SpeechConfig(\n                subscription=self.api_key,\n                endpoint=self.endpoint,\n            )\n        else:\n            speech_config = speechsdk.SpeechConfig(\n                subscription=self.api_key,\n                region=self.region,\n            )\n\n        audio_format = speechsdk.audio.AudioStreamFormat(\n            samples_per_second=16000,\n            bits_per_sample=16,\n            channels=1,\n        )\n        self._audio_stream = speechsdk.audio.PushAudioInputStream(audio_format)\n        audio_config = speechsdk.audio.AudioConfig(stream=self._audio_stream)\n\n        self._recognizer = speechsdk.SpeechRecognizer(\n            speech_config=speech_config,\n            audio_config=audio_config,\n        )\n\n        transcriber = self\n\n        def on_recognizing(evt: Any) -> None:\n            if evt.result.text and transcriber._loop and not transcriber._closed:\n                full_text = transcriber._accumulated_transcript\n                if full_text:\n                    full_text += \" \" + evt.result.text\n                else:\n                    full_text = evt.result.text\n                transcriber._loop.call_soon_threadsafe(\n                    transcriber._transcript_queue.put_nowait,\n                    TranscriptResult(text=full_text, is_vad_end=False),\n                )\n\n        def on_recognized(evt: Any) -> None:\n            if evt.result.text and transcriber._loop and not transcriber._closed:\n                if transcriber._accumulated_transcript:\n                    transcriber._accumulated_transcript += \" \" + evt.result.text\n                else:\n                    transcriber._accumulated_transcript = evt.result.text\n                transcriber._loop.call_soon_threadsafe(\n                    transcriber._transcript_queue.put_nowait,\n                    TranscriptResult(\n                        text=transcriber._accumulated_transcript, is_vad_end=True\n                    ),\n                )\n\n        self._recognizer.recognizing.connect(on_recognizing)\n        self._recognizer.recognized.connect(on_recognized)\n        self._recognizer.start_continuous_recognition_async()\n\n    async def send_audio(self, chunk: bytes) -> None:\n        \"\"\"Send audio chunk to Azure.\"\"\"\n        if self._audio_stream and not self._closed:\n            self._audio_stream.write(self._resample_pcm16(chunk))\n\n    def _resample_pcm16(self, data: bytes) -> bytes:\n        \"\"\"Resample PCM16 audio from input_sample_rate to target_sample_rate.\"\"\"\n        if self.input_sample_rate == self.target_sample_rate:\n            return data\n\n        num_samples = len(data) // 2\n        if num_samples == 0:\n            return b\"\"\n\n        samples = list(struct.unpack(f\"<{num_samples}h\", data))\n        ratio = self.input_sample_rate / self.target_sample_rate\n        new_length = int(num_samples / ratio)\n\n        resampled: list[int] = []\n        for i in range(new_length):\n            src_idx = i * ratio\n            idx_floor = int(src_idx)\n            idx_ceil = min(idx_floor + 1, num_samples - 1)\n            frac = src_idx - idx_floor\n            sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)\n            sample = max(-32768, min(32767, sample))\n            resampled.append(sample)\n\n        return struct.pack(f\"<{len(resampled)}h\", *resampled)\n\n    async def receive_transcript(self) -> TranscriptResult | None:\n        \"\"\"Receive next transcript.\"\"\"\n        try:\n            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return TranscriptResult(text=\"\", is_vad_end=False)\n\n    async def close(self) -> str:\n        \"\"\"Stop recognition and return final transcript.\"\"\"\n        self._closed = True\n        if self._recognizer:\n            self._recognizer.stop_continuous_recognition_async()\n        if self._audio_stream:\n            self._audio_stream.close()\n        self._loop = None\n        return self._accumulated_transcript\n\n    def reset_transcript(self) -> None:\n        \"\"\"Reset accumulated transcript.\"\"\"\n        self._accumulated_transcript = \"\"\n\n\nclass AzureStreamingSynthesizer(StreamingSynthesizerProtocol):\n    \"\"\"Real-time streaming TTS using Azure Speech SDK.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        region: str | None = None,\n        endpoint: str | None = None,\n        voice: str = \"en-US-JennyNeural\",\n        speed: float = 1.0,\n    ):\n        self._logger = setup_logger()\n        self.api_key = api_key\n        self.region = region\n        self.endpoint = endpoint\n        self.voice = voice\n        self.speed = max(0.5, min(2.0, speed))\n        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()\n        self._synthesizer: Any = None\n        self._closed = False\n        self._loop: asyncio.AbstractEventLoop | None = None\n\n    async def connect(self) -> None:\n        \"\"\"Initialize Azure Speech synthesizer with push stream.\"\"\"\n        try:\n            import azure.cognitiveservices.speech as speechsdk\n        except ImportError as e:\n            raise RuntimeError(\n                \"Azure Speech SDK is required for streaming TTS. Install `azure-cognitiveservices-speech`.\"\n            ) from e\n\n        self._logger.info(\"AzureStreamingSynthesizer: connecting\")\n\n        # Store the event loop for thread-safe queue operations\n        self._loop = asyncio.get_running_loop()\n\n        # Use endpoint for self-hosted containers, region for Azure cloud\n        if self.endpoint:\n            speech_config = speechsdk.SpeechConfig(\n                subscription=self.api_key,\n                endpoint=self.endpoint,\n            )\n        else:\n            speech_config = speechsdk.SpeechConfig(\n                subscription=self.api_key,\n                region=self.region,\n            )\n        speech_config.speech_synthesis_voice_name = self.voice\n        # Use MP3 format for streaming - compatible with MediaSource Extensions\n        speech_config.set_speech_synthesis_output_format(\n            speechsdk.SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3\n        )\n\n        # Create synthesizer with pull audio output stream\n        self._synthesizer = speechsdk.SpeechSynthesizer(\n            speech_config=speech_config,\n            audio_config=None,  # We'll manually handle audio\n        )\n\n        # Connect to synthesis events\n        self._synthesizer.synthesizing.connect(self._on_synthesizing)\n        self._synthesizer.synthesis_completed.connect(self._on_completed)\n\n        self._logger.info(\"AzureStreamingSynthesizer: connected\")\n\n    def _on_synthesizing(self, evt: Any) -> None:\n        \"\"\"Called when audio chunk is available (runs in Azure SDK thread).\"\"\"\n        if evt.result.audio_data and self._loop and not self._closed:\n            # Thread-safe way to put item in async queue\n            self._loop.call_soon_threadsafe(\n                self._audio_queue.put_nowait, evt.result.audio_data\n            )\n\n    def _on_completed(self, _evt: Any) -> None:\n        \"\"\"Called when synthesis is complete (runs in Azure SDK thread).\"\"\"\n        if self._loop and not self._closed:\n            self._loop.call_soon_threadsafe(self._audio_queue.put_nowait, None)\n\n    async def send_text(self, text: str) -> None:\n        \"\"\"Send text to be synthesized using SSML for prosody control.\"\"\"\n        if self._synthesizer and not self._closed:\n            # Build SSML with prosody for speed control\n            rate = f\"{int((self.speed - 1) * 100):+d}%\"\n            escaped_text = escape(text)\n            ssml = f\"\"\"<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>\n                <voice name={quoteattr(self.voice)}>\n                    <prosody rate='{rate}'>{escaped_text}</prosody>\n                </voice>\n            </speak>\"\"\"\n            # Use speak_ssml_async for SSML support (includes speed/prosody)\n            self._synthesizer.speak_ssml_async(ssml)\n\n    async def receive_audio(self) -> bytes | None:\n        \"\"\"Receive next audio chunk.\"\"\"\n        try:\n            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return b\"\"  # No audio yet, but not done\n\n    async def flush(self) -> None:\n        \"\"\"Signal end of text input - wait for pending audio.\"\"\"\n        # Azure SDK handles flushing automatically\n\n    async def close(self) -> None:\n        \"\"\"Close the session.\"\"\"\n        self._closed = True\n        if self._synthesizer:\n            self._synthesizer.synthesis_completed.disconnect_all()\n            self._synthesizer.synthesizing.disconnect_all()\n        self._loop = None\n\n\nclass AzureVoiceProvider(VoiceProviderInterface):\n    \"\"\"Azure Speech Services voice provider.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str | None,\n        api_base: str | None,\n        custom_config: dict[str, Any],\n        stt_model: str | None = None,\n        tts_model: str | None = None,\n        default_voice: str | None = None,\n    ):\n        self.api_key = api_key\n        self.api_base = api_base\n        self.custom_config = custom_config\n        raw_speech_region = (\n            custom_config.get(\"speech_region\")\n            or self._extract_speech_region_from_uri(api_base)\n            or \"\"\n        )\n        self.speech_region = self._validate_speech_region(raw_speech_region)\n        self.stt_model = stt_model\n        self.tts_model = tts_model\n        self.default_voice = default_voice or \"en-US-JennyNeural\"\n\n    @staticmethod\n    def _is_azure_cloud_url(uri: str | None) -> bool:\n        \"\"\"Check if URI is an Azure cloud endpoint (vs custom/self-hosted).\"\"\"\n        if not uri:\n            return False\n        try:\n            hostname = (urlparse(uri).hostname or \"\").lower()\n        except ValueError:\n            return False\n        return hostname.endswith(\n            (\n                \".speech.microsoft.com\",\n                \".api.cognitive.microsoft.com\",\n                \".cognitiveservices.azure.com\",\n            )\n        )\n\n    @staticmethod\n    def _extract_speech_region_from_uri(uri: str | None) -> str | None:\n        \"\"\"Extract Azure speech region from endpoint URI.\n\n        Note: Custom domains (*.cognitiveservices.azure.com) contain the resource\n        name, not the region. For custom domains, the region must be specified\n        explicitly via custom_config[\"speech_region\"].\n        \"\"\"\n        if not uri:\n            return None\n        # Accepted examples:\n        # - https://eastus.tts.speech.microsoft.com/cognitiveservices/v1\n        # - https://eastus.stt.speech.microsoft.com/speech/recognition/...\n        # - https://westus.api.cognitive.microsoft.com/\n        #\n        # NOT supported (requires explicit speech_region config):\n        # - https://<resource>.cognitiveservices.azure.com/ (resource name != region)\n        try:\n            hostname = (urlparse(uri).hostname or \"\").lower()\n        except ValueError:\n            return None\n\n        stt_tts_match = re.match(\n            r\"^([a-z0-9-]+)\\.(?:tts|stt)\\.speech\\.microsoft\\.com$\", hostname\n        )\n        if stt_tts_match:\n            return stt_tts_match.group(1)\n\n        api_match = re.match(\n            r\"^([a-z0-9-]+)\\.api\\.cognitive\\.microsoft\\.com$\", hostname\n        )\n        if api_match:\n            return api_match.group(1)\n\n        return None\n\n    @staticmethod\n    def _validate_speech_region(speech_region: str) -> str:\n        normalized_region = speech_region.strip().lower()\n        if not normalized_region:\n            return \"\"\n        if not re.fullmatch(r\"[a-z0-9-]+\", normalized_region):\n            raise ValueError(\n                \"Invalid Azure speech_region. Use lowercase letters, digits, and hyphens only.\"\n            )\n        return normalized_region\n\n    def _get_stt_url(self) -> str:\n        \"\"\"Get the STT endpoint URL (auto-detects cloud vs self-hosted).\"\"\"\n        if self.api_base and not self._is_azure_cloud_url(self.api_base):\n            # Self-hosted container endpoint\n            return f\"{self.api_base.rstrip('/')}/speech/recognition/conversation/cognitiveservices/v1\"\n        # Azure cloud endpoint\n        return f\"https://{self.speech_region}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1\"\n\n    def _get_tts_url(self) -> str:\n        \"\"\"Get the TTS endpoint URL (auto-detects cloud vs self-hosted).\"\"\"\n        if self.api_base and not self._is_azure_cloud_url(self.api_base):\n            # Self-hosted container endpoint\n            return f\"{self.api_base.rstrip('/')}/cognitiveservices/v1\"\n        # Azure cloud endpoint\n        return f\"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/v1\"\n\n    def _is_self_hosted(self) -> bool:\n        \"\"\"Check if using self-hosted container vs Azure cloud.\"\"\"\n        return bool(self.api_base and not self._is_azure_cloud_url(self.api_base))\n\n    @staticmethod\n    def _pcm16_to_wav(pcm_data: bytes, sample_rate: int = 24000) -> bytes:\n        \"\"\"Wrap raw PCM16 mono bytes into a WAV container.\"\"\"\n        buffer = io.BytesIO()\n        with wave.open(buffer, \"wb\") as wav_file:\n            wav_file.setnchannels(1)\n            wav_file.setsampwidth(2)\n            wav_file.setframerate(sample_rate)\n            wav_file.writeframes(pcm_data)\n        return buffer.getvalue()\n\n    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:\n        if not self.api_key:\n            raise ValueError(\"Azure API key required for STT\")\n        if not self._is_self_hosted() and not self.speech_region:\n            raise ValueError(\"Azure speech region required for STT (cloud mode)\")\n\n        normalized_format = audio_format.lower()\n        payload = audio_data\n        content_type = f\"audio/{normalized_format}\"\n\n        # WebSocket chunked fallback sends raw PCM16 bytes.\n        if normalized_format in {\"pcm\", \"pcm16\", \"raw\"}:\n            payload = self._pcm16_to_wav(audio_data, sample_rate=24000)\n            content_type = \"audio/wav\"\n        elif normalized_format in {\"wav\", \"wave\"}:\n            content_type = \"audio/wav\"\n        elif normalized_format == \"webm\":\n            content_type = \"audio/webm; codecs=opus\"\n\n        url = self._get_stt_url()\n        params = {\"language\": \"en-US\", \"format\": \"detailed\"}\n        headers = {\n            \"Ocp-Apim-Subscription-Key\": self.api_key,\n            \"Content-Type\": content_type,\n            \"Accept\": \"application/json\",\n        }\n\n        async with aiohttp.ClientSession() as session:\n            async with session.post(\n                url, params=params, headers=headers, data=payload\n            ) as response:\n                if response.status != 200:\n                    error_text = await response.text()\n                    raise RuntimeError(f\"Azure STT failed: {error_text}\")\n                result = await response.json()\n\n        if result.get(\"RecognitionStatus\") != \"Success\":\n            return \"\"\n        nbest = result.get(\"NBest\") or []\n        if nbest and isinstance(nbest, list):\n            display = nbest[0].get(\"Display\")\n            if isinstance(display, str):\n                return display\n        display_text = result.get(\"DisplayText\", \"\")\n        return display_text if isinstance(display_text, str) else \"\"\n\n    async def synthesize_stream(\n        self, text: str, voice: str | None = None, speed: float = 1.0\n    ) -> AsyncIterator[bytes]:\n        \"\"\"\n        Convert text to audio using Azure TTS with streaming.\n\n        Args:\n            text: Text to convert to speech\n            voice: Voice name (defaults to provider's default voice)\n            speed: Playback speed multiplier (0.5 to 2.0)\n\n        Yields:\n            Audio data chunks (mp3 format)\n        \"\"\"\n        if not self.api_key:\n            raise ValueError(\"Azure API key required for TTS\")\n\n        if not self._is_self_hosted() and not self.speech_region:\n            raise ValueError(\"Azure speech region required for TTS (cloud mode)\")\n\n        voice_name = voice or self.default_voice\n\n        # Clamp speed to valid range and convert to rate format\n        speed = max(0.5, min(2.0, speed))\n        rate = f\"{int((speed - 1) * 100):+d}%\"  # e.g., 1.0 -> \"+0%\", 1.5 -> \"+50%\"\n\n        # Build SSML with escaped text and quoted attributes to prevent injection\n        escaped_text = escape(text)\n        ssml = f\"\"\"<speak version='1.0' xmlns='{SSML_NAMESPACE}' xml:lang='en-US'>\n            <voice name={quoteattr(voice_name)}>\n                <prosody rate='{rate}'>{escaped_text}</prosody>\n            </voice>\n        </speak>\"\"\"\n\n        url = self._get_tts_url()\n\n        headers = {\n            \"Ocp-Apim-Subscription-Key\": self.api_key,\n            \"Content-Type\": \"application/ssml+xml\",\n            \"X-Microsoft-OutputFormat\": \"audio-16khz-128kbitrate-mono-mp3\",\n            \"User-Agent\": \"Onyx\",\n        }\n\n        async with aiohttp.ClientSession() as session:\n            async with session.post(url, headers=headers, data=ssml) as response:\n                if response.status != 200:\n                    error_text = await response.text()\n                    raise RuntimeError(f\"Azure TTS failed: {error_text}\")\n\n                # Use 8192 byte chunks for smoother streaming\n                async for chunk in response.content.iter_chunked(8192):\n                    if chunk:\n                        yield chunk\n\n    async def validate_credentials(self) -> None:\n        \"\"\"Validate Azure credentials by listing available voices.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"Azure API key required\")\n        if not self._is_self_hosted() and not self.speech_region:\n            raise ValueError(\"Azure speech region required (cloud mode)\")\n\n        url = f\"https://{self.speech_region}.tts.speech.microsoft.com/cognitiveservices/voices/list\"\n        if self._is_self_hosted():\n            url = f\"{(self.api_base or '').rstrip('/')}/cognitiveservices/voices/list\"\n\n        headers = {\"Ocp-Apim-Subscription-Key\": self.api_key}\n        async with aiohttp.ClientSession() as session:\n            async with session.get(url, headers=headers) as response:\n                if response.status in (401, 403):\n                    raise RuntimeError(\"Invalid Azure API key.\")\n                if response.status != 200:\n                    raise RuntimeError(\"Azure credential validation failed.\")\n\n    def get_available_voices(self) -> list[dict[str, str]]:\n        \"\"\"Return common Azure Neural voices.\"\"\"\n        return AZURE_VOICES.copy()\n\n    def get_available_stt_models(self) -> list[dict[str, str]]:\n        return [\n            {\"id\": \"default\", \"name\": \"Azure Speech Recognition\"},\n        ]\n\n    def get_available_tts_models(self) -> list[dict[str, str]]:\n        return [\n            {\"id\": \"neural\", \"name\": \"Neural TTS\"},\n        ]\n\n    def supports_streaming_stt(self) -> bool:\n        \"\"\"Azure supports streaming STT via Speech SDK.\"\"\"\n        return True\n\n    def supports_streaming_tts(self) -> bool:\n        \"\"\"Azure supports real-time streaming TTS via Speech SDK.\"\"\"\n        return True\n\n    async def create_streaming_transcriber(\n        self, _audio_format: str = \"webm\"\n    ) -> AzureStreamingTranscriber:\n        \"\"\"Create a streaming transcription session.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming transcription\")\n        if not self._is_self_hosted() and not self.speech_region:\n            raise ValueError(\n                \"Speech region required for Azure streaming transcription (cloud mode)\"\n            )\n\n        # Use endpoint for self-hosted, region for cloud\n        transcriber = AzureStreamingTranscriber(\n            api_key=self.api_key,\n            region=self.speech_region if not self._is_self_hosted() else None,\n            endpoint=self.api_base if self._is_self_hosted() else None,\n            input_sample_rate=24000,\n            target_sample_rate=16000,\n        )\n        await transcriber.connect()\n        return transcriber\n\n    async def create_streaming_synthesizer(\n        self, voice: str | None = None, speed: float = 1.0\n    ) -> AzureStreamingSynthesizer:\n        \"\"\"Create a streaming TTS session.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming TTS\")\n        if not self._is_self_hosted() and not self.speech_region:\n            raise ValueError(\n                \"Speech region required for Azure streaming TTS (cloud mode)\"\n            )\n\n        # Use endpoint for self-hosted, region for cloud\n        synthesizer = AzureStreamingSynthesizer(\n            api_key=self.api_key,\n            region=self.speech_region if not self._is_self_hosted() else None,\n            endpoint=self.api_base if self._is_self_hosted() else None,\n            voice=voice or self.default_voice or \"en-US-JennyNeural\",\n            speed=speed,\n        )\n        await synthesizer.connect()\n        return synthesizer\n"
  },
  {
    "path": "backend/onyx/voice/providers/elevenlabs.py",
    "content": "\"\"\"ElevenLabs voice provider for STT and TTS.\n\nElevenLabs supports:\n- **STT**: Scribe API (batch via REST, streaming via WebSocket with Scribe v2 Realtime).\n  The streaming endpoint sends base64-encoded PCM16 audio chunks and receives JSON\n  transcript messages (partial_transcript, committed_transcript, utterance_end).\n- **TTS**: Text-to-speech via REST streaming and WebSocket stream-input.\n  The WebSocket variant accepts incremental text chunks and returns audio in order,\n  enabling low-latency playback before the full text is available.\n\nSee https://elevenlabs.io/docs for API reference.\n\"\"\"\n\nimport asyncio\nimport base64\nimport json\nfrom collections.abc import AsyncIterator\nfrom enum import StrEnum\nfrom typing import Any\n\nimport aiohttp\n\nfrom onyx.voice.interface import StreamingSynthesizerProtocol\nfrom onyx.voice.interface import StreamingTranscriberProtocol\nfrom onyx.voice.interface import TranscriptResult\nfrom onyx.voice.interface import VoiceProviderInterface\n\n# Default ElevenLabs API base URL\nDEFAULT_ELEVENLABS_API_BASE = \"https://api.elevenlabs.io\"\n\n# Default sample rates for STT streaming\nDEFAULT_INPUT_SAMPLE_RATE = 24000  # What the browser frontend sends\nDEFAULT_TARGET_SAMPLE_RATE = 16000  # What ElevenLabs Scribe expects\n\n# Default streaming TTS output format\nDEFAULT_TTS_OUTPUT_FORMAT = \"mp3_44100_64\"\n\n# Default TTS voice settings\nDEFAULT_VOICE_STABILITY = 0.5\nDEFAULT_VOICE_SIMILARITY_BOOST = 0.75\n\n# Chunk length schedule for streaming TTS (optimized for real-time playback)\nDEFAULT_CHUNK_LENGTH_SCHEDULE = [120, 160, 250, 290]\n\n# Default STT streaming VAD configuration\nDEFAULT_VAD_SILENCE_THRESHOLD_SECS = 1.0\nDEFAULT_VAD_THRESHOLD = 0.4\nDEFAULT_MIN_SPEECH_DURATION_MS = 100\nDEFAULT_MIN_SILENCE_DURATION_MS = 300\n\n\nclass ElevenLabsSTTMessageType(StrEnum):\n    \"\"\"Message types from ElevenLabs Scribe Realtime STT API.\"\"\"\n\n    SESSION_STARTED = \"session_started\"\n    PARTIAL_TRANSCRIPT = \"partial_transcript\"\n    COMMITTED_TRANSCRIPT = \"committed_transcript\"\n    UTTERANCE_END = \"utterance_end\"\n    SESSION_ENDED = \"session_ended\"\n    ERROR = \"error\"\n\n\nclass ElevenLabsTTSMessageType(StrEnum):\n    \"\"\"Message types from ElevenLabs stream-input TTS API.\"\"\"\n\n    AUDIO = \"audio\"\n    ERROR = \"error\"\n\n\ndef _http_to_ws_url(http_url: str) -> str:\n    \"\"\"Convert http(s) URL to ws(s) URL for WebSocket connections.\"\"\"\n    if http_url.startswith(\"https://\"):\n        return \"wss://\" + http_url[8:]\n    elif http_url.startswith(\"http://\"):\n        return \"ws://\" + http_url[7:]\n    return http_url\n\n\n# Common ElevenLabs voices\nELEVENLABS_VOICES = [\n    {\"id\": \"21m00Tcm4TlvDq8ikWAM\", \"name\": \"Rachel\"},\n    {\"id\": \"AZnzlk1XvdvUeBnXmlld\", \"name\": \"Domi\"},\n    {\"id\": \"EXAVITQu4vr4xnSDxMaL\", \"name\": \"Bella\"},\n    {\"id\": \"ErXwobaYiN019PkySvjV\", \"name\": \"Antoni\"},\n    {\"id\": \"MF3mGyEYCl7XYWbV9V6O\", \"name\": \"Elli\"},\n    {\"id\": \"TxGEqnHWrfWFTfGW9XjX\", \"name\": \"Josh\"},\n    {\"id\": \"VR6AewLTigWG4xSOukaG\", \"name\": \"Arnold\"},\n    {\"id\": \"pNInz6obpgDQGcFmaJgB\", \"name\": \"Adam\"},\n    {\"id\": \"yoZ06aMxZJJ28mfd3POQ\", \"name\": \"Sam\"},\n]\n\n\nclass ElevenLabsStreamingTranscriber(StreamingTranscriberProtocol):\n    \"\"\"Streaming transcription session using ElevenLabs Scribe Realtime API.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        model: str = \"scribe_v2_realtime\",\n        input_sample_rate: int = DEFAULT_INPUT_SAMPLE_RATE,\n        target_sample_rate: int = DEFAULT_TARGET_SAMPLE_RATE,\n        language_code: str = \"en\",\n        api_base: str | None = None,\n    ):\n        # Import logger first\n        from onyx.utils.logger import setup_logger\n\n        self._logger = setup_logger()\n\n        self._logger.info(\n            f\"ElevenLabsStreamingTranscriber: initializing with model {model}\"\n        )\n        self.api_key = api_key\n        self.model = model\n        self.input_sample_rate = input_sample_rate\n        self.target_sample_rate = target_sample_rate\n        self.language_code = language_code\n        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE\n        self._ws: aiohttp.ClientWebSocketResponse | None = None\n        self._session: aiohttp.ClientSession | None = None\n        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()\n        self._final_transcript = \"\"\n        self._receive_task: asyncio.Task | None = None\n        self._closed = False\n\n    async def connect(self) -> None:\n        \"\"\"Establish WebSocket connection to ElevenLabs.\"\"\"\n        self._logger.info(\n            \"ElevenLabsStreamingTranscriber: connecting to ElevenLabs API\"\n        )\n        self._session = aiohttp.ClientSession()\n\n        # VAD is configured via query parameters.\n        # commit_strategy=vad enables automatic transcript commit on silence detection.\n        # These params are part of the ElevenLabs Scribe Realtime API contract:\n        # https://elevenlabs.io/docs/api-reference/speech-to-text/realtime\n        ws_base = _http_to_ws_url(self.api_base.rstrip(\"/\"))\n        url = (\n            f\"{ws_base}/v1/speech-to-text/realtime\"\n            f\"?model_id={self.model}\"\n            f\"&sample_rate={self.target_sample_rate}\"\n            f\"&language_code={self.language_code}\"\n            f\"&commit_strategy=vad\"\n            f\"&vad_silence_threshold_secs={DEFAULT_VAD_SILENCE_THRESHOLD_SECS}\"\n            f\"&vad_threshold={DEFAULT_VAD_THRESHOLD}\"\n            f\"&min_speech_duration_ms={DEFAULT_MIN_SPEECH_DURATION_MS}\"\n            f\"&min_silence_duration_ms={DEFAULT_MIN_SILENCE_DURATION_MS}\"\n        )\n        self._logger.info(\n            f\"ElevenLabsStreamingTranscriber: connecting to {url} \"\n            f\"(input={self.input_sample_rate}Hz, target={self.target_sample_rate}Hz)\"\n        )\n\n        try:\n            self._ws = await self._session.ws_connect(\n                url,\n                headers={\"xi-api-key\": self.api_key},\n            )\n            self._logger.info(\n                f\"ElevenLabsStreamingTranscriber: connected successfully, \"\n                f\"ws.closed={self._ws.closed}, close_code={self._ws.close_code}\"\n            )\n        except Exception as e:\n            self._logger.error(\n                f\"ElevenLabsStreamingTranscriber: failed to connect: {e}\"\n            )\n            if self._session:\n                await self._session.close()\n            raise\n\n        # Start receiving transcripts in background\n        self._receive_task = asyncio.create_task(self._receive_loop())\n\n    async def _receive_loop(self) -> None:\n        \"\"\"Background task to receive transcripts from WebSocket.\"\"\"\n        self._logger.info(\"ElevenLabsStreamingTranscriber: receive loop started\")\n        if not self._ws:\n            self._logger.warning(\n                \"ElevenLabsStreamingTranscriber: no WebSocket connection\"\n            )\n            return\n\n        try:\n            async for msg in self._ws:\n                self._logger.debug(\n                    f\"ElevenLabsStreamingTranscriber: raw message type: {msg.type}\"\n                )\n                if msg.type == aiohttp.WSMsgType.TEXT:\n                    parsed_data: Any = None\n                    data: dict[str, Any]\n                    try:\n                        parsed_data = json.loads(msg.data)\n                    except json.JSONDecodeError:\n                        self._logger.error(\n                            f\"ElevenLabsStreamingTranscriber: failed to parse JSON: {msg.data[:200]}\"\n                        )\n                        continue\n                    if not isinstance(parsed_data, dict):\n                        self._logger.error(\n                            \"ElevenLabsStreamingTranscriber: expected object JSON payload\"\n                        )\n                        continue\n                    data = parsed_data\n\n                    # ElevenLabs uses message_type field - fail fast if missing\n                    if \"message_type\" not in data and \"type\" not in data:\n                        self._logger.error(\n                            f\"ElevenLabsStreamingTranscriber: malformed packet missing 'message_type' field: {data}\"\n                        )\n                        continue\n                    msg_type = data.get(\"message_type\", data.get(\"type\", \"\"))\n                    self._logger.info(\n                        f\"ElevenLabsStreamingTranscriber: received message_type: '{msg_type}', data keys: {list(data.keys())}\"\n                    )\n                    # Check for error in various formats\n                    if \"error\" in data or msg_type == ElevenLabsSTTMessageType.ERROR:\n                        error_msg = data.get(\"error\", data.get(\"message\", data))\n                        self._logger.error(\n                            f\"ElevenLabsStreamingTranscriber: API error: {error_msg}\"\n                        )\n                        continue\n\n                    # Handle message types from ElevenLabs Scribe Realtime API.\n                    # See https://elevenlabs.io/docs/api-reference/speech-to-text/realtime\n                    if msg_type == ElevenLabsSTTMessageType.SESSION_STARTED:\n                        self._logger.info(\n                            f\"ElevenLabsStreamingTranscriber: session started, \"\n                            f\"id={data.get('session_id')}, config={data.get('config')}\"\n                        )\n                    elif msg_type == ElevenLabsSTTMessageType.PARTIAL_TRANSCRIPT:\n                        # Interim result — updated as more audio is processed\n                        text = data.get(\"text\", \"\")\n                        if text:\n                            self._logger.info(\n                                f\"ElevenLabsStreamingTranscriber: partial_transcript: {text[:50]}...\"\n                            )\n                            self._final_transcript = text\n                            await self._transcript_queue.put(\n                                TranscriptResult(text=text, is_vad_end=False)\n                            )\n                    elif msg_type == ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT:\n                        # Final transcript for the current utterance (VAD detected end)\n                        text = data.get(\"text\", \"\")\n                        if text:\n                            self._logger.info(\n                                f\"ElevenLabsStreamingTranscriber: committed_transcript: {text[:50]}...\"\n                            )\n                            self._final_transcript = text\n                            await self._transcript_queue.put(\n                                TranscriptResult(text=text, is_vad_end=True)\n                            )\n                    elif msg_type == ElevenLabsSTTMessageType.UTTERANCE_END:\n                        # VAD detected end of speech (may carry text or be empty)\n                        text = data.get(\"text\", \"\") or self._final_transcript\n                        if text:\n                            self._logger.info(\n                                f\"ElevenLabsStreamingTranscriber: utterance_end: {text[:50]}...\"\n                            )\n                            self._final_transcript = text\n                            await self._transcript_queue.put(\n                                TranscriptResult(text=text, is_vad_end=True)\n                            )\n                    elif msg_type == ElevenLabsSTTMessageType.SESSION_ENDED:\n                        self._logger.info(\n                            \"ElevenLabsStreamingTranscriber: session ended\"\n                        )\n                        break\n                    else:\n                        # Log unhandled message types with full data for debugging\n                        self._logger.warning(\n                            f\"ElevenLabsStreamingTranscriber: unhandled message_type: {msg_type}, full data: {data}\"\n                        )\n                elif msg.type == aiohttp.WSMsgType.BINARY:\n                    self._logger.debug(\n                        f\"ElevenLabsStreamingTranscriber: received binary message: {len(msg.data)} bytes\"\n                    )\n                elif msg.type == aiohttp.WSMsgType.CLOSED:\n                    close_code = self._ws.close_code if self._ws else \"N/A\"\n                    self._logger.info(\n                        f\"ElevenLabsStreamingTranscriber: WebSocket closed by server, close_code={close_code}\"\n                    )\n                    break\n                elif msg.type == aiohttp.WSMsgType.ERROR:\n                    self._logger.error(\n                        f\"ElevenLabsStreamingTranscriber: WebSocket error: {self._ws.exception() if self._ws else 'N/A'}\"\n                    )\n                    break\n                elif msg.type == aiohttp.WSMsgType.CLOSE:\n                    self._logger.info(\n                        f\"ElevenLabsStreamingTranscriber: WebSocket CLOSE frame received, data={msg.data}, extra={msg.extra}\"\n                    )\n                    break\n        except Exception as e:\n            self._logger.error(\n                f\"ElevenLabsStreamingTranscriber: error in receive loop: {e}\",\n                exc_info=True,\n            )\n        finally:\n            close_code = self._ws.close_code if self._ws else \"N/A\"\n            self._logger.info(\n                f\"ElevenLabsStreamingTranscriber: receive loop ended, close_code={close_code}\"\n            )\n            await self._transcript_queue.put(None)  # Signal end\n\n    def _resample_pcm16(self, data: bytes) -> bytes:\n        \"\"\"Resample PCM16 audio from input_sample_rate to target_sample_rate.\"\"\"\n        import struct\n\n        if self.input_sample_rate == self.target_sample_rate:\n            return data\n\n        # Parse int16 samples\n        num_samples = len(data) // 2\n        samples = list(struct.unpack(f\"<{num_samples}h\", data))\n\n        # Calculate resampling ratio\n        ratio = self.input_sample_rate / self.target_sample_rate\n        new_length = int(num_samples / ratio)\n\n        # Linear interpolation resampling\n        resampled = []\n        for i in range(new_length):\n            src_idx = i * ratio\n            idx_floor = int(src_idx)\n            idx_ceil = min(idx_floor + 1, num_samples - 1)\n            frac = src_idx - idx_floor\n            sample = int(samples[idx_floor] * (1 - frac) + samples[idx_ceil] * frac)\n            # Clamp to int16 range\n            sample = max(-32768, min(32767, sample))\n            resampled.append(sample)\n\n        return struct.pack(f\"<{len(resampled)}h\", *resampled)\n\n    async def send_audio(self, chunk: bytes) -> None:\n        \"\"\"Send an audio chunk for transcription.\"\"\"\n        if not self._ws:\n            self._logger.warning(\"send_audio: no WebSocket connection\")\n            return\n        if self._closed:\n            self._logger.warning(\"send_audio: transcriber is closed\")\n            return\n        if self._ws.closed:\n            self._logger.warning(\n                f\"send_audio: WebSocket is closed, close_code={self._ws.close_code}\"\n            )\n            return\n\n        try:\n            # Resample from input rate (24kHz) to target rate (16kHz)\n            resampled = self._resample_pcm16(chunk)\n            # ElevenLabs expects input_audio_chunk message format with audio_base_64\n            audio_b64 = base64.b64encode(resampled).decode(\"utf-8\")\n            message = {\n                \"message_type\": \"input_audio_chunk\",\n                \"audio_base_64\": audio_b64,\n                \"sample_rate\": self.target_sample_rate,\n            }\n            self._logger.info(\n                f\"send_audio: {len(chunk)} bytes -> {len(resampled)} bytes (resampled) -> {len(audio_b64)} chars base64\"\n            )\n            await self._ws.send_str(json.dumps(message))\n            self._logger.info(\"send_audio: message sent successfully\")\n        except Exception as e:\n            self._logger.error(f\"send_audio: failed to send: {e}\", exc_info=True)\n            raise\n\n    async def receive_transcript(self) -> TranscriptResult | None:\n        \"\"\"Receive next transcript. Returns None when done.\"\"\"\n        try:\n            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return TranscriptResult(\n                text=\"\", is_vad_end=False\n            )  # No transcript yet, but not done\n\n    async def close(self) -> str:\n        \"\"\"Close the session and return final transcript.\"\"\"\n        self._logger.info(\"ElevenLabsStreamingTranscriber: closing session\")\n        self._closed = True\n        if self._ws and not self._ws.closed:\n            try:\n                # Just close the WebSocket - ElevenLabs Scribe doesn't need a special end message\n                self._logger.info(\n                    \"ElevenLabsStreamingTranscriber: closing WebSocket connection\"\n                )\n                await self._ws.close()\n            except Exception as e:\n                self._logger.debug(f\"Error closing WebSocket: {e}\")\n        if self._receive_task and not self._receive_task.done():\n            self._receive_task.cancel()\n            try:\n                await self._receive_task\n            except asyncio.CancelledError:\n                pass\n        if self._session and not self._session.closed:\n            await self._session.close()\n        return self._final_transcript\n\n    def reset_transcript(self) -> None:\n        \"\"\"Reset accumulated transcript. Call after auto-send to start fresh.\"\"\"\n        self._final_transcript = \"\"\n\n\nclass ElevenLabsStreamingSynthesizer(StreamingSynthesizerProtocol):\n    \"\"\"Real-time streaming TTS using ElevenLabs WebSocket API.\n\n    Uses ElevenLabs' stream-input WebSocket which processes text as one\n    continuous stream and returns audio in order.\n    \"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        voice_id: str,\n        model_id: str = \"eleven_multilingual_v2\",\n        output_format: str = \"mp3_44100_64\",\n        api_base: str | None = None,\n        speed: float = 1.0,\n    ):\n        from onyx.utils.logger import setup_logger\n\n        self._logger = setup_logger()\n        self.api_key = api_key\n        self.voice_id = voice_id\n        self.model_id = model_id\n        self.output_format = output_format\n        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE\n        self.speed = speed\n        self._ws: aiohttp.ClientWebSocketResponse | None = None\n        self._session: aiohttp.ClientSession | None = None\n        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()\n        self._receive_task: asyncio.Task | None = None\n        self._closed = False\n\n    async def connect(self) -> None:\n        \"\"\"Establish WebSocket connection to ElevenLabs TTS.\"\"\"\n        self._logger.info(\"ElevenLabsStreamingSynthesizer: connecting\")\n        self._session = aiohttp.ClientSession()\n\n        # WebSocket URL for streaming input TTS with output format for streaming compatibility\n        # Using mp3_44100_64 for good quality with smaller chunks for real-time playback\n        ws_base = _http_to_ws_url(self.api_base.rstrip(\"/\"))\n        url = (\n            f\"{ws_base}/v1/text-to-speech/{self.voice_id}/stream-input\"\n            f\"?model_id={self.model_id}&output_format={self.output_format}\"\n        )\n\n        self._ws = await self._session.ws_connect(\n            url,\n            headers={\"xi-api-key\": self.api_key},\n        )\n\n        # Send initial configuration with generation settings optimized for streaming.\n        # Note: API key is sent via header only (not in body to avoid log exposure).\n        # See https://elevenlabs.io/docs/api-reference/text-to-speech/stream-input\n        await self._ws.send_str(\n            json.dumps(\n                {\n                    \"text\": \" \",  # Initial space to start the stream\n                    \"voice_settings\": {\n                        \"stability\": DEFAULT_VOICE_STABILITY,\n                        \"similarity_boost\": DEFAULT_VOICE_SIMILARITY_BOOST,\n                        \"speed\": self.speed,\n                    },\n                    \"generation_config\": {\n                        \"chunk_length_schedule\": DEFAULT_CHUNK_LENGTH_SCHEDULE,\n                    },\n                }\n            )\n        )\n\n        # Start receiving audio in background\n        self._receive_task = asyncio.create_task(self._receive_loop())\n        self._logger.info(\"ElevenLabsStreamingSynthesizer: connected\")\n\n    async def _receive_loop(self) -> None:\n        \"\"\"Background task to receive audio chunks from WebSocket.\n\n        Audio is returned in order as one continuous stream.\n        \"\"\"\n        if not self._ws:\n            return\n\n        chunk_count = 0\n        total_bytes = 0\n        try:\n            async for msg in self._ws:\n                if self._closed:\n                    self._logger.info(\n                        \"ElevenLabsStreamingSynthesizer: closed flag set, stopping receive loop\"\n                    )\n                    break\n                if msg.type == aiohttp.WSMsgType.TEXT:\n                    data = json.loads(msg.data)\n                    # Process audio if present\n                    if \"audio\" in data and data[\"audio\"]:\n                        audio_bytes = base64.b64decode(data[\"audio\"])\n                        chunk_count += 1\n                        total_bytes += len(audio_bytes)\n                        await self._audio_queue.put(audio_bytes)\n\n                    # Check isFinal separately - a message can have both audio AND isFinal\n                    if \"isFinal\" in data:\n                        self._logger.info(\n                            f\"ElevenLabsStreamingSynthesizer: received isFinal={data['isFinal']}, \"\n                            f\"chunks so far: {chunk_count}, bytes: {total_bytes}\"\n                        )\n                        if data.get(\"isFinal\"):\n                            self._logger.info(\n                                \"ElevenLabsStreamingSynthesizer: isFinal=true, signaling end of audio\"\n                            )\n                            await self._audio_queue.put(None)\n\n                    # Check for errors\n                    if \"error\" in data or data.get(\"type\") == \"error\":\n                        self._logger.error(\n                            f\"ElevenLabsStreamingSynthesizer: received error: {data}\"\n                        )\n                elif msg.type == aiohttp.WSMsgType.BINARY:\n                    chunk_count += 1\n                    total_bytes += len(msg.data)\n                    await self._audio_queue.put(msg.data)\n                elif msg.type in (\n                    aiohttp.WSMsgType.CLOSE,\n                    aiohttp.WSMsgType.ERROR,\n                ):\n                    self._logger.info(\n                        f\"ElevenLabsStreamingSynthesizer: WebSocket closed/error, type={msg.type}\"\n                    )\n                    break\n        except Exception as e:\n            self._logger.error(f\"ElevenLabsStreamingSynthesizer receive error: {e}\")\n        finally:\n            self._logger.info(\n                f\"ElevenLabsStreamingSynthesizer: receive loop ended, {chunk_count} chunks, {total_bytes} bytes\"\n            )\n            await self._audio_queue.put(None)  # Signal end of stream\n\n    async def send_text(self, text: str) -> None:\n        \"\"\"Send text to be synthesized.\n\n        ElevenLabs processes text as a continuous stream and returns\n        audio in order. We let ElevenLabs handle buffering via chunk_length_schedule\n        and only force generation when flush() is called at the end.\n\n        Args:\n            text: Text to synthesize\n        \"\"\"\n        if self._ws and not self._closed and text.strip():\n            self._logger.info(\n                f\"ElevenLabsStreamingSynthesizer: sending text ({len(text)} chars): '{text}'\"\n            )\n            # Let ElevenLabs buffer and auto-generate based on chunk_length_schedule\n            # Don't trigger generation here - wait for flush() at the end\n            await self._ws.send_str(\n                json.dumps(\n                    {\n                        \"text\": text + \" \",  # Space for natural speech flow\n                    }\n                )\n            )\n            self._logger.info(\"ElevenLabsStreamingSynthesizer: text sent successfully\")\n        else:\n            self._logger.warning(\n                f\"ElevenLabsStreamingSynthesizer: skipping send_text - \"\n                f\"ws={self._ws is not None}, closed={self._closed}, text='{text[:30] if text else ''}'\"\n            )\n\n    async def receive_audio(self) -> bytes | None:\n        \"\"\"Receive next audio chunk.\"\"\"\n        try:\n            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return b\"\"  # No audio yet, but not done\n\n    async def flush(self) -> None:\n        \"\"\"Signal end of text input. ElevenLabs will generate remaining audio and close.\"\"\"\n        if self._ws and not self._closed:\n            # Send empty string to signal end of input\n            # ElevenLabs will generate any remaining buffered text,\n            # send all audio chunks, send isFinal, then close the connection\n            self._logger.info(\n                \"ElevenLabsStreamingSynthesizer: sending end-of-input (empty string)\"\n            )\n            await self._ws.send_str(json.dumps({\"text\": \"\"}))\n            self._logger.info(\"ElevenLabsStreamingSynthesizer: end-of-input sent\")\n        else:\n            self._logger.warning(\n                f\"ElevenLabsStreamingSynthesizer: skipping flush - ws={self._ws is not None}, closed={self._closed}\"\n            )\n\n    async def close(self) -> None:\n        \"\"\"Close the session.\"\"\"\n        self._closed = True\n        if self._ws:\n            await self._ws.close()\n        if self._receive_task:\n            self._receive_task.cancel()\n            try:\n                await self._receive_task\n            except asyncio.CancelledError:\n                pass\n        if self._session:\n            await self._session.close()\n\n\n# Valid ElevenLabs model IDs\nELEVENLABS_STT_MODELS = {\"scribe_v1\", \"scribe_v2_realtime\"}\nELEVENLABS_TTS_MODELS = {\n    \"eleven_multilingual_v2\",\n    \"eleven_turbo_v2_5\",\n    \"eleven_monolingual_v1\",\n    \"eleven_flash_v2_5\",\n    \"eleven_flash_v2\",\n}\n\n\nclass ElevenLabsVoiceProvider(VoiceProviderInterface):\n    \"\"\"ElevenLabs voice provider.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str | None,\n        api_base: str | None = None,\n        stt_model: str | None = None,\n        tts_model: str | None = None,\n        default_voice: str | None = None,\n    ):\n        self.api_key = api_key\n        self.api_base = api_base or DEFAULT_ELEVENLABS_API_BASE\n        # Validate and default models - use valid ElevenLabs model IDs\n        self.stt_model = (\n            stt_model if stt_model in ELEVENLABS_STT_MODELS else \"scribe_v1\"\n        )\n        self.tts_model = (\n            tts_model\n            if tts_model in ELEVENLABS_TTS_MODELS\n            else \"eleven_multilingual_v2\"\n        )\n        self.default_voice = default_voice\n\n    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:\n        \"\"\"\n        Transcribe audio using ElevenLabs Speech-to-Text API.\n\n        Args:\n            audio_data: Raw audio bytes\n            audio_format: Format of the audio (e.g., 'webm', 'mp3', 'wav')\n\n        Returns:\n            Transcribed text\n        \"\"\"\n        if not self.api_key:\n            raise ValueError(\"ElevenLabs API key required for transcription\")\n\n        from onyx.utils.logger import setup_logger\n\n        logger = setup_logger()\n\n        url = f\"{self.api_base}/v1/speech-to-text\"\n\n        # Map common formats to MIME types\n        mime_types = {\n            \"webm\": \"audio/webm\",\n            \"mp3\": \"audio/mpeg\",\n            \"wav\": \"audio/wav\",\n            \"ogg\": \"audio/ogg\",\n            \"flac\": \"audio/flac\",\n            \"m4a\": \"audio/mp4\",\n        }\n        mime_type = mime_types.get(audio_format.lower(), f\"audio/{audio_format}\")\n\n        headers = {\n            \"xi-api-key\": self.api_key,\n        }\n\n        # ElevenLabs expects multipart form data\n        form_data = aiohttp.FormData()\n        form_data.add_field(\n            \"audio\",\n            audio_data,\n            filename=f\"audio.{audio_format}\",\n            content_type=mime_type,\n        )\n        # For batch STT, use scribe_v1 (not the realtime model)\n        batch_model = (\n            self.stt_model if self.stt_model in (\"scribe_v1\",) else \"scribe_v1\"\n        )\n        form_data.add_field(\"model_id\", batch_model)\n\n        logger.info(\n            f\"ElevenLabs transcribe: sending {len(audio_data)} bytes, format={audio_format}\"\n        )\n\n        async with aiohttp.ClientSession() as session:\n            async with session.post(url, headers=headers, data=form_data) as response:\n                if response.status != 200:\n                    error_text = await response.text()\n                    logger.error(f\"ElevenLabs transcribe failed: {error_text}\")\n                    raise RuntimeError(f\"ElevenLabs transcription failed: {error_text}\")\n\n                result = await response.json()\n                text = result.get(\"text\", \"\")\n                logger.info(f\"ElevenLabs transcribe: got result: {text[:50]}...\")\n                return text\n\n    async def synthesize_stream(\n        self, text: str, voice: str | None = None, speed: float = 1.0\n    ) -> AsyncIterator[bytes]:\n        \"\"\"\n        Convert text to audio using ElevenLabs TTS with streaming.\n\n        Args:\n            text: Text to convert to speech\n            voice: Voice ID (defaults to provider's default voice or Rachel)\n            speed: Playback speed multiplier\n\n        Yields:\n            Audio data chunks (mp3 format)\n        \"\"\"\n        from onyx.utils.logger import setup_logger\n\n        logger = setup_logger()\n\n        if not self.api_key:\n            raise ValueError(\"ElevenLabs API key required for TTS\")\n\n        voice_id = voice or self.default_voice or \"21m00Tcm4TlvDq8ikWAM\"  # Rachel\n\n        url = f\"{self.api_base}/v1/text-to-speech/{voice_id}/stream\"\n\n        logger.info(\n            f\"ElevenLabs TTS: starting synthesis, text='{text[:50]}...', voice={voice_id}, model={self.tts_model}, speed={speed}\"\n        )\n\n        headers = {\n            \"xi-api-key\": self.api_key,\n            \"Content-Type\": \"application/json\",\n            \"Accept\": \"audio/mpeg\",\n        }\n\n        payload = {\n            \"text\": text,\n            \"model_id\": self.tts_model,\n            \"voice_settings\": {\n                \"stability\": DEFAULT_VOICE_STABILITY,\n                \"similarity_boost\": DEFAULT_VOICE_SIMILARITY_BOOST,\n                \"speed\": speed,\n            },\n        }\n\n        async with aiohttp.ClientSession() as session:\n            async with session.post(url, headers=headers, json=payload) as response:\n                logger.info(\n                    f\"ElevenLabs TTS: got response status={response.status}, content-type={response.headers.get('content-type')}\"\n                )\n                if response.status != 200:\n                    error_text = await response.text()\n                    logger.error(f\"ElevenLabs TTS failed: {error_text}\")\n                    raise RuntimeError(f\"ElevenLabs TTS failed: {error_text}\")\n\n                # Use 8192 byte chunks for smoother streaming\n                chunk_count = 0\n                total_bytes = 0\n                async for chunk in response.content.iter_chunked(8192):\n                    if chunk:\n                        chunk_count += 1\n                        total_bytes += len(chunk)\n                        yield chunk\n                logger.info(\n                    f\"ElevenLabs TTS: streaming complete, {chunk_count} chunks, {total_bytes} total bytes\"\n                )\n\n    async def validate_credentials(self) -> None:\n        \"\"\"Validate ElevenLabs API key.\n\n        Calls /v1/models as a lightweight check. ElevenLabs returns 401 for\n        both truly invalid keys and valid keys with restricted scopes, so we\n        inspect the response body: a \"missing_permissions\" status means the\n        key authenticated successfully but lacks a specific scope.\n        \"\"\"\n        if not self.api_key:\n            raise ValueError(\"ElevenLabs API key required\")\n\n        headers = {\"xi-api-key\": self.api_key}\n        async with aiohttp.ClientSession() as session:\n            async with session.get(\n                f\"{self.api_base}/v1/models\", headers=headers\n            ) as response:\n                if response.status == 200:\n                    return\n                if response.status in (401, 403):\n                    try:\n                        body = await response.json()\n                        detail = body.get(\"detail\", {})\n                        status = (\n                            detail.get(\"status\", \"\") if isinstance(detail, dict) else \"\"\n                        )\n                    except Exception:\n                        status = \"\"\n                    # \"missing_permissions\" means the key is valid but\n                    # lacks this specific scope — that's fine.\n                    if status == \"missing_permissions\":\n                        return\n                    raise RuntimeError(\"Invalid ElevenLabs API key.\")\n                raise RuntimeError(\"ElevenLabs credential validation failed.\")\n\n    def get_available_voices(self) -> list[dict[str, str]]:\n        \"\"\"Return common ElevenLabs voices.\"\"\"\n        return ELEVENLABS_VOICES.copy()\n\n    def get_available_stt_models(self) -> list[dict[str, str]]:\n        return [\n            {\"id\": \"scribe_v2_realtime\", \"name\": \"Scribe v2 Realtime (Streaming)\"},\n            {\"id\": \"scribe_v1\", \"name\": \"Scribe v1 (Batch)\"},\n        ]\n\n    def get_available_tts_models(self) -> list[dict[str, str]]:\n        return [\n            {\"id\": \"eleven_multilingual_v2\", \"name\": \"Multilingual v2\"},\n            {\"id\": \"eleven_turbo_v2_5\", \"name\": \"Turbo v2.5\"},\n            {\"id\": \"eleven_monolingual_v1\", \"name\": \"Monolingual v1\"},\n        ]\n\n    def supports_streaming_stt(self) -> bool:\n        \"\"\"ElevenLabs supports streaming via Scribe Realtime API.\"\"\"\n        return True\n\n    def supports_streaming_tts(self) -> bool:\n        \"\"\"ElevenLabs supports real-time streaming TTS via WebSocket.\"\"\"\n        return True\n\n    async def create_streaming_transcriber(\n        self, _audio_format: str = \"webm\"\n    ) -> ElevenLabsStreamingTranscriber:\n        \"\"\"Create a streaming transcription session.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming transcription\")\n        # ElevenLabs realtime STT requires scribe_v2_realtime model.\n        # Frontend sends PCM16 at DEFAULT_INPUT_SAMPLE_RATE (24kHz),\n        # but ElevenLabs expects DEFAULT_TARGET_SAMPLE_RATE (16kHz).\n        # The transcriber resamples automatically.\n        transcriber = ElevenLabsStreamingTranscriber(\n            api_key=self.api_key,\n            model=\"scribe_v2_realtime\",\n            input_sample_rate=DEFAULT_INPUT_SAMPLE_RATE,\n            target_sample_rate=DEFAULT_TARGET_SAMPLE_RATE,\n            language_code=\"en\",\n            api_base=self.api_base,\n        )\n        await transcriber.connect()\n        return transcriber\n\n    async def create_streaming_synthesizer(\n        self, voice: str | None = None, speed: float = 1.0\n    ) -> ElevenLabsStreamingSynthesizer:\n        \"\"\"Create a streaming TTS session.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming TTS\")\n        voice_id = voice or self.default_voice or \"21m00Tcm4TlvDq8ikWAM\"\n        synthesizer = ElevenLabsStreamingSynthesizer(\n            api_key=self.api_key,\n            voice_id=voice_id,\n            model_id=self.tts_model,\n            output_format=DEFAULT_TTS_OUTPUT_FORMAT,\n            api_base=self.api_base,\n            speed=speed,\n        )\n        await synthesizer.connect()\n        return synthesizer\n"
  },
  {
    "path": "backend/onyx/voice/providers/openai.py",
    "content": "\"\"\"OpenAI voice provider for STT and TTS.\n\nOpenAI supports:\n- **STT**: Whisper (batch transcription via REST) and Realtime API (streaming\n  transcription via WebSocket with server-side VAD). Audio is sent as base64-encoded\n  PCM16 at 24kHz mono. The Realtime API returns transcript deltas and completed\n  transcription events per VAD-detected utterance.\n- **TTS**: HTTP streaming endpoint that returns audio chunks progressively.\n  Supported models: tts-1 (standard) and tts-1-hd (high quality).\n\nSee https://platform.openai.com/docs for API reference.\n\"\"\"\n\nimport asyncio\nimport base64\nimport io\nimport json\nfrom collections.abc import AsyncIterator\nfrom enum import StrEnum\nfrom typing import TYPE_CHECKING\n\nimport aiohttp\n\nfrom onyx.voice.interface import StreamingSynthesizerProtocol\nfrom onyx.voice.interface import StreamingTranscriberProtocol\nfrom onyx.voice.interface import TranscriptResult\nfrom onyx.voice.interface import VoiceProviderInterface\n\nif TYPE_CHECKING:\n    from openai import AsyncOpenAI\n\n# Default OpenAI API base URL\nDEFAULT_OPENAI_API_BASE = \"https://api.openai.com\"\n\n\nclass OpenAIRealtimeMessageType(StrEnum):\n    \"\"\"Message types from OpenAI Realtime transcription API.\"\"\"\n\n    ERROR = \"error\"\n    SPEECH_STARTED = \"input_audio_buffer.speech_started\"\n    SPEECH_STOPPED = \"input_audio_buffer.speech_stopped\"\n    BUFFER_COMMITTED = \"input_audio_buffer.committed\"\n    TRANSCRIPTION_DELTA = \"conversation.item.input_audio_transcription.delta\"\n    TRANSCRIPTION_COMPLETED = \"conversation.item.input_audio_transcription.completed\"\n    SESSION_CREATED = \"transcription_session.created\"\n    SESSION_UPDATED = \"transcription_session.updated\"\n    ITEM_CREATED = \"conversation.item.created\"\n\n\ndef _http_to_ws_url(http_url: str) -> str:\n    \"\"\"Convert http(s) URL to ws(s) URL for WebSocket connections.\"\"\"\n    if http_url.startswith(\"https://\"):\n        return \"wss://\" + http_url[8:]\n    elif http_url.startswith(\"http://\"):\n        return \"ws://\" + http_url[7:]\n    return http_url\n\n\nclass OpenAIStreamingTranscriber(StreamingTranscriberProtocol):\n    \"\"\"Streaming transcription using OpenAI Realtime API.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        model: str = \"whisper-1\",\n        api_base: str | None = None,\n    ):\n        # Import logger first\n        from onyx.utils.logger import setup_logger\n\n        self._logger = setup_logger()\n\n        self._logger.info(\n            f\"OpenAIStreamingTranscriber: initializing with model {model}\"\n        )\n        self.api_key = api_key\n        self.model = model\n        self.api_base = api_base or DEFAULT_OPENAI_API_BASE\n        self._ws: aiohttp.ClientWebSocketResponse | None = None\n        self._session: aiohttp.ClientSession | None = None\n        self._transcript_queue: asyncio.Queue[TranscriptResult | None] = asyncio.Queue()\n        self._current_turn_transcript = \"\"  # Transcript for current VAD turn\n        self._accumulated_transcript = \"\"  # Accumulated across all turns\n        self._receive_task: asyncio.Task | None = None\n        self._closed = False\n\n    async def connect(self) -> None:\n        \"\"\"Establish WebSocket connection to OpenAI Realtime API.\"\"\"\n        self._session = aiohttp.ClientSession()\n\n        # OpenAI Realtime transcription endpoint\n        ws_base = _http_to_ws_url(self.api_base.rstrip(\"/\"))\n        url = f\"{ws_base}/v1/realtime?intent=transcription\"\n        headers = {\n            \"Authorization\": f\"Bearer {self.api_key}\",\n            \"OpenAI-Beta\": \"realtime=v1\",\n        }\n\n        try:\n            self._ws = await self._session.ws_connect(url, headers=headers)\n            self._logger.info(\"Connected to OpenAI Realtime API\")\n        except Exception as e:\n            self._logger.error(f\"Failed to connect to OpenAI Realtime API: {e}\")\n            raise\n\n        # Configure the session for transcription\n        # Enable server-side VAD (Voice Activity Detection) for automatic speech detection\n        config_message = {\n            \"type\": \"transcription_session.update\",\n            \"session\": {\n                \"input_audio_format\": \"pcm16\",  # 16-bit PCM at 24kHz mono\n                \"input_audio_transcription\": {\n                    \"model\": self.model,\n                },\n                \"turn_detection\": {\n                    \"type\": \"server_vad\",\n                    \"threshold\": 0.5,\n                    \"prefix_padding_ms\": 300,\n                    \"silence_duration_ms\": 500,\n                },\n            },\n        }\n        await self._ws.send_str(json.dumps(config_message))\n        self._logger.info(f\"Sent config for model: {self.model} with server VAD\")\n\n        # Start receiving transcripts\n        self._receive_task = asyncio.create_task(self._receive_loop())\n\n    async def _receive_loop(self) -> None:\n        \"\"\"Background task to receive transcripts.\"\"\"\n        if not self._ws:\n            return\n\n        try:\n            async for msg in self._ws:\n                if msg.type == aiohttp.WSMsgType.TEXT:\n                    data = json.loads(msg.data)\n                    msg_type = data.get(\"type\", \"\")\n                    self._logger.debug(f\"Received message type: {msg_type}\")\n\n                    # Handle errors\n                    if msg_type == OpenAIRealtimeMessageType.ERROR:\n                        error = data.get(\"error\", {})\n                        self._logger.error(f\"OpenAI error: {error}\")\n                        continue\n\n                    # Handle VAD events\n                    if msg_type == OpenAIRealtimeMessageType.SPEECH_STARTED:\n                        self._logger.info(\"OpenAI: Speech started\")\n                        # Reset current turn transcript for new speech\n                        self._current_turn_transcript = \"\"\n                        continue\n                    elif msg_type == OpenAIRealtimeMessageType.SPEECH_STOPPED:\n                        self._logger.info(\n                            \"OpenAI: Speech stopped (VAD detected silence)\"\n                        )\n                        continue\n                    elif msg_type == OpenAIRealtimeMessageType.BUFFER_COMMITTED:\n                        self._logger.info(\"OpenAI: Audio buffer committed\")\n                        continue\n\n                    # Handle transcription events\n                    if msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA:\n                        delta = data.get(\"delta\", \"\")\n                        if delta:\n                            self._logger.info(f\"OpenAI: Transcription delta: {delta}\")\n                            self._current_turn_transcript += delta\n                            # Show accumulated + current turn transcript\n                            full_transcript = self._accumulated_transcript\n                            if full_transcript and self._current_turn_transcript:\n                                full_transcript += \" \"\n                            full_transcript += self._current_turn_transcript\n                            await self._transcript_queue.put(\n                                TranscriptResult(text=full_transcript, is_vad_end=False)\n                            )\n                    elif msg_type == OpenAIRealtimeMessageType.TRANSCRIPTION_COMPLETED:\n                        transcript = data.get(\"transcript\", \"\")\n                        if transcript:\n                            self._logger.info(\n                                f\"OpenAI: Transcription completed (VAD turn end): {transcript[:50]}...\"\n                            )\n                            # This is the final transcript for this VAD turn\n                            self._current_turn_transcript = transcript\n                            # Accumulate this turn's transcript\n                            if self._accumulated_transcript:\n                                self._accumulated_transcript += \" \" + transcript\n                            else:\n                                self._accumulated_transcript = transcript\n                            # Send with is_vad_end=True to trigger auto-send\n                            await self._transcript_queue.put(\n                                TranscriptResult(\n                                    text=self._accumulated_transcript,\n                                    is_vad_end=True,\n                                )\n                            )\n                    elif msg_type not in (\n                        OpenAIRealtimeMessageType.SESSION_CREATED,\n                        OpenAIRealtimeMessageType.SESSION_UPDATED,\n                        OpenAIRealtimeMessageType.ITEM_CREATED,\n                    ):\n                        # Log any other message types we might be missing\n                        self._logger.info(\n                            f\"OpenAI: Unhandled message type '{msg_type}': {data}\"\n                        )\n\n                elif msg.type == aiohttp.WSMsgType.ERROR:\n                    self._logger.error(f\"WebSocket error: {self._ws.exception()}\")\n                    break\n                elif msg.type == aiohttp.WSMsgType.CLOSED:\n                    self._logger.info(\"WebSocket closed by server\")\n                    break\n        except Exception as e:\n            self._logger.error(f\"Error in receive loop: {e}\")\n        finally:\n            await self._transcript_queue.put(None)\n\n    async def send_audio(self, chunk: bytes) -> None:\n        \"\"\"Send audio chunk to OpenAI.\"\"\"\n        if self._ws and not self._closed:\n            # OpenAI expects base64-encoded PCM16 audio at 24kHz mono\n            # PCM16 at 24kHz: 24000 samples/sec * 2 bytes/sample = 48000 bytes/sec\n            # So chunk_bytes / 48000 = duration in seconds\n            duration_ms = (len(chunk) / 48000) * 1000\n            self._logger.debug(\n                f\"Sending {len(chunk)} bytes ({duration_ms:.1f}ms) of audio to OpenAI. \"\n                f\"First 10 bytes: {chunk[:10].hex() if len(chunk) >= 10 else chunk.hex()}\"\n            )\n            message = {\n                \"type\": \"input_audio_buffer.append\",\n                \"audio\": base64.b64encode(chunk).decode(\"utf-8\"),\n            }\n            await self._ws.send_str(json.dumps(message))\n\n    def reset_transcript(self) -> None:\n        \"\"\"Reset accumulated transcript. Call after auto-send to start fresh.\"\"\"\n        self._logger.info(\"OpenAI: Resetting accumulated transcript\")\n        self._accumulated_transcript = \"\"\n        self._current_turn_transcript = \"\"\n\n    async def receive_transcript(self) -> TranscriptResult | None:\n        \"\"\"Receive next transcript.\"\"\"\n        try:\n            return await asyncio.wait_for(self._transcript_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return TranscriptResult(text=\"\", is_vad_end=False)\n\n    async def close(self) -> str:\n        \"\"\"Close session and return final transcript.\"\"\"\n        self._closed = True\n        if self._ws:\n            # With server VAD, the buffer is auto-committed when speech stops.\n            # But we should still commit any remaining audio and wait for transcription.\n            try:\n                await self._ws.send_str(\n                    json.dumps({\"type\": \"input_audio_buffer.commit\"})\n                )\n            except Exception as e:\n                self._logger.debug(f\"Error sending commit (may be expected): {e}\")\n\n            # Wait for *new* transcription to arrive (up to 5 seconds)\n            self._logger.info(\"Waiting for transcription to complete...\")\n            transcript_before_commit = self._accumulated_transcript\n            for _ in range(50):  # 50 * 100ms = 5 seconds max\n                await asyncio.sleep(0.1)\n                if self._accumulated_transcript != transcript_before_commit:\n                    self._logger.info(\n                        f\"Got final transcript: {self._accumulated_transcript[:50]}...\"\n                    )\n                    break\n            else:\n                self._logger.warning(\"Timed out waiting for transcription\")\n\n            await self._ws.close()\n        if self._receive_task:\n            self._receive_task.cancel()\n            try:\n                await self._receive_task\n            except asyncio.CancelledError:\n                pass\n        if self._session:\n            await self._session.close()\n        return self._accumulated_transcript\n\n\n# OpenAI available voices for TTS\nOPENAI_VOICES = [\n    {\"id\": \"alloy\", \"name\": \"Alloy\"},\n    {\"id\": \"echo\", \"name\": \"Echo\"},\n    {\"id\": \"fable\", \"name\": \"Fable\"},\n    {\"id\": \"onyx\", \"name\": \"Onyx\"},\n    {\"id\": \"nova\", \"name\": \"Nova\"},\n    {\"id\": \"shimmer\", \"name\": \"Shimmer\"},\n]\n\n# OpenAI available STT models (all support streaming via Realtime API)\nOPENAI_STT_MODELS = [\n    {\"id\": \"whisper-1\", \"name\": \"Whisper v1\"},\n    {\"id\": \"gpt-4o-transcribe\", \"name\": \"GPT-4o Transcribe\"},\n    {\"id\": \"gpt-4o-mini-transcribe\", \"name\": \"GPT-4o Mini Transcribe\"},\n]\n\n# OpenAI available TTS models\nOPENAI_TTS_MODELS = [\n    {\"id\": \"tts-1\", \"name\": \"TTS-1 (Standard)\"},\n    {\"id\": \"tts-1-hd\", \"name\": \"TTS-1 HD (High Quality)\"},\n]\n\n\ndef _create_wav_header(\n    data_length: int,\n    sample_rate: int = 24000,\n    channels: int = 1,\n    bits_per_sample: int = 16,\n) -> bytes:\n    \"\"\"Create a WAV file header for PCM audio data.\"\"\"\n    import struct\n\n    byte_rate = sample_rate * channels * bits_per_sample // 8\n    block_align = channels * bits_per_sample // 8\n\n    # WAV header is 44 bytes\n    header = struct.pack(\n        \"<4sI4s4sIHHIIHH4sI\",\n        b\"RIFF\",  # ChunkID\n        36 + data_length,  # ChunkSize\n        b\"WAVE\",  # Format\n        b\"fmt \",  # Subchunk1ID\n        16,  # Subchunk1Size (PCM)\n        1,  # AudioFormat (1 = PCM)\n        channels,  # NumChannels\n        sample_rate,  # SampleRate\n        byte_rate,  # ByteRate\n        block_align,  # BlockAlign\n        bits_per_sample,  # BitsPerSample\n        b\"data\",  # Subchunk2ID\n        data_length,  # Subchunk2Size\n    )\n    return header\n\n\nclass OpenAIStreamingSynthesizer(StreamingSynthesizerProtocol):\n    \"\"\"Streaming TTS using OpenAI HTTP TTS API with streaming responses.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        voice: str = \"alloy\",\n        model: str = \"tts-1\",\n        speed: float = 1.0,\n        api_base: str | None = None,\n    ):\n        from onyx.utils.logger import setup_logger\n\n        self._logger = setup_logger()\n        self.api_key = api_key\n        self.voice = voice\n        self.model = model\n        self.speed = max(0.25, min(4.0, speed))\n        self.api_base = api_base or DEFAULT_OPENAI_API_BASE\n        self._session: aiohttp.ClientSession | None = None\n        self._audio_queue: asyncio.Queue[bytes | None] = asyncio.Queue()\n        self._text_queue: asyncio.Queue[str | None] = asyncio.Queue()\n        self._synthesis_task: asyncio.Task | None = None\n        self._closed = False\n        self._flushed = False\n\n    async def connect(self) -> None:\n        \"\"\"Initialize HTTP session for TTS requests.\"\"\"\n        self._logger.info(\"OpenAIStreamingSynthesizer: connecting\")\n        self._session = aiohttp.ClientSession()\n        # Start background task to process text queue\n        self._synthesis_task = asyncio.create_task(self._process_text_queue())\n        self._logger.info(\"OpenAIStreamingSynthesizer: connected\")\n\n    async def _process_text_queue(self) -> None:\n        \"\"\"Background task to process queued text for synthesis.\"\"\"\n        while not self._closed:\n            try:\n                text = await asyncio.wait_for(self._text_queue.get(), timeout=0.1)\n                if text is None:\n                    break\n                await self._synthesize_text(text)\n            except asyncio.TimeoutError:\n                continue\n            except asyncio.CancelledError:\n                break\n            except Exception as e:\n                self._logger.error(f\"Error processing text queue: {e}\")\n\n    async def _synthesize_text(self, text: str) -> None:\n        \"\"\"Make HTTP TTS request and stream audio to queue.\"\"\"\n        if not self._session or self._closed:\n            return\n\n        url = f\"{self.api_base.rstrip('/')}/v1/audio/speech\"\n        headers = {\n            \"Authorization\": f\"Bearer {self.api_key}\",\n            \"Content-Type\": \"application/json\",\n        }\n        payload = {\n            \"model\": self.model,\n            \"voice\": self.voice,\n            \"input\": text,\n            \"speed\": self.speed,\n            \"response_format\": \"mp3\",\n        }\n\n        try:\n            async with self._session.post(\n                url, headers=headers, json=payload\n            ) as response:\n                if response.status != 200:\n                    error_text = await response.text()\n                    self._logger.error(f\"OpenAI TTS error: {error_text}\")\n                    return\n\n                # Use 8192 byte chunks for smoother streaming\n                # (larger chunks = more complete MP3 frames, better playback)\n                async for chunk in response.content.iter_chunked(8192):\n                    if self._closed:\n                        break\n                    if chunk:\n                        await self._audio_queue.put(chunk)\n        except Exception as e:\n            self._logger.error(f\"OpenAIStreamingSynthesizer synthesis error: {e}\")\n\n    async def send_text(self, text: str) -> None:\n        \"\"\"Queue text to be synthesized via HTTP streaming.\"\"\"\n        if not text.strip() or self._closed:\n            return\n        await self._text_queue.put(text)\n\n    async def receive_audio(self) -> bytes | None:\n        \"\"\"Receive next audio chunk (MP3 format).\"\"\"\n        try:\n            return await asyncio.wait_for(self._audio_queue.get(), timeout=0.1)\n        except asyncio.TimeoutError:\n            return b\"\"  # No audio yet, but not done\n\n    async def flush(self) -> None:\n        \"\"\"Signal end of text input - wait for synthesis to complete.\"\"\"\n        if self._flushed:\n            return\n        self._flushed = True\n\n        # Signal end of text input\n        await self._text_queue.put(None)\n\n        # Wait for synthesis task to complete processing all text\n        if self._synthesis_task and not self._synthesis_task.done():\n            try:\n                await asyncio.wait_for(self._synthesis_task, timeout=60.0)\n            except asyncio.TimeoutError:\n                self._logger.warning(\"OpenAIStreamingSynthesizer: flush timeout\")\n                self._synthesis_task.cancel()\n                try:\n                    await self._synthesis_task\n                except asyncio.CancelledError:\n                    pass\n            except asyncio.CancelledError:\n                pass\n\n        # Signal end of audio stream\n        await self._audio_queue.put(None)\n\n    async def close(self) -> None:\n        \"\"\"Close the session.\"\"\"\n        if self._closed:\n            return\n        self._closed = True\n\n        # Signal end of queues only if flush wasn't already called\n        if not self._flushed:\n            await self._text_queue.put(None)\n            await self._audio_queue.put(None)\n\n        if self._synthesis_task and not self._synthesis_task.done():\n            self._synthesis_task.cancel()\n            try:\n                await self._synthesis_task\n            except asyncio.CancelledError:\n                pass\n\n        if self._session:\n            await self._session.close()\n\n\nclass OpenAIVoiceProvider(VoiceProviderInterface):\n    \"\"\"OpenAI voice provider using Whisper for STT and TTS API for speech synthesis.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str | None,\n        api_base: str | None = None,\n        stt_model: str | None = None,\n        tts_model: str | None = None,\n        default_voice: str | None = None,\n    ):\n        self.api_key = api_key\n        self.api_base = api_base\n        self.stt_model = stt_model or \"whisper-1\"\n        self.tts_model = tts_model or \"tts-1\"\n        self.default_voice = default_voice or \"alloy\"\n\n        self._client: \"AsyncOpenAI | None\" = None\n\n    def _get_client(self) -> \"AsyncOpenAI\":\n        if self._client is None:\n            from openai import AsyncOpenAI\n\n            self._client = AsyncOpenAI(\n                api_key=self.api_key,\n                base_url=self.api_base,\n            )\n        return self._client\n\n    async def transcribe(self, audio_data: bytes, audio_format: str) -> str:\n        \"\"\"\n        Transcribe audio using OpenAI Whisper.\n\n        Args:\n            audio_data: Raw audio bytes\n            audio_format: Audio format (e.g., \"webm\", \"wav\", \"mp3\")\n\n        Returns:\n            Transcribed text\n        \"\"\"\n        client = self._get_client()\n\n        # Create a file-like object from the audio bytes\n        audio_file = io.BytesIO(audio_data)\n        audio_file.name = f\"audio.{audio_format}\"\n\n        response = await client.audio.transcriptions.create(\n            model=self.stt_model,\n            file=audio_file,\n        )\n\n        return response.text\n\n    async def synthesize_stream(\n        self, text: str, voice: str | None = None, speed: float = 1.0\n    ) -> AsyncIterator[bytes]:\n        \"\"\"\n        Convert text to audio using OpenAI TTS with streaming.\n\n        Args:\n            text: Text to convert to speech\n            voice: Voice identifier (defaults to provider's default voice)\n            speed: Playback speed multiplier (0.25 to 4.0)\n\n        Yields:\n            Audio data chunks (mp3 format)\n        \"\"\"\n        client = self._get_client()\n\n        # Clamp speed to valid range\n        speed = max(0.25, min(4.0, speed))\n\n        # Use with_streaming_response for proper async streaming\n        # Using 8192 byte chunks for better streaming performance\n        # (larger chunks = fewer round-trips, more complete MP3 frames)\n        async with client.audio.speech.with_streaming_response.create(\n            model=self.tts_model,\n            voice=voice or self.default_voice,\n            input=text,\n            speed=speed,\n            response_format=\"mp3\",\n        ) as response:\n            async for chunk in response.iter_bytes(chunk_size=8192):\n                yield chunk\n\n    async def validate_credentials(self) -> None:\n        \"\"\"Validate OpenAI API key by listing models.\"\"\"\n        from openai import AuthenticationError, PermissionDeniedError\n\n        client = self._get_client()\n        try:\n            await client.models.list()\n        except AuthenticationError:\n            raise RuntimeError(\"Invalid OpenAI API key.\")\n        except PermissionDeniedError:\n            raise RuntimeError(\"OpenAI API key does not have sufficient permissions.\")\n\n    def get_available_voices(self) -> list[dict[str, str]]:\n        \"\"\"Get available OpenAI TTS voices.\"\"\"\n        return OPENAI_VOICES.copy()\n\n    def get_available_stt_models(self) -> list[dict[str, str]]:\n        \"\"\"Get available OpenAI STT models.\"\"\"\n        return OPENAI_STT_MODELS.copy()\n\n    def get_available_tts_models(self) -> list[dict[str, str]]:\n        \"\"\"Get available OpenAI TTS models.\"\"\"\n        return OPENAI_TTS_MODELS.copy()\n\n    def supports_streaming_stt(self) -> bool:\n        \"\"\"OpenAI supports streaming via Realtime API for all STT models.\"\"\"\n        return True\n\n    def supports_streaming_tts(self) -> bool:\n        \"\"\"OpenAI supports real-time streaming TTS via Realtime API.\"\"\"\n        return True\n\n    async def create_streaming_transcriber(\n        self, _audio_format: str = \"webm\"\n    ) -> OpenAIStreamingTranscriber:\n        \"\"\"Create a streaming transcription session using Realtime API.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming transcription\")\n        transcriber = OpenAIStreamingTranscriber(\n            api_key=self.api_key,\n            model=self.stt_model,\n            api_base=self.api_base,\n        )\n        await transcriber.connect()\n        return transcriber\n\n    async def create_streaming_synthesizer(\n        self, voice: str | None = None, speed: float = 1.0\n    ) -> OpenAIStreamingSynthesizer:\n        \"\"\"Create a streaming TTS session using HTTP streaming API.\"\"\"\n        if not self.api_key:\n            raise ValueError(\"API key required for streaming TTS\")\n        synthesizer = OpenAIStreamingSynthesizer(\n            api_key=self.api_key,\n            voice=voice or self.default_voice or \"alloy\",\n            model=self.tts_model or \"tts-1\",\n            speed=speed,\n            api_base=self.api_base,\n        )\n        await synthesizer.connect()\n        return synthesizer\n"
  },
  {
    "path": "backend/pyproject.toml",
    "content": "[project]\nname = \"onyx-backend\"\nversion = \"0.0.0\"\nrequires-python = \">=3.11\"\ndependencies = [\n    \"onyx[backend,dev,ee]\",\n]\n\n[tool.uv.sources]\nonyx = { workspace = true }\n"
  },
  {
    "path": "backend/pytest.ini",
    "content": "[pytest]\npythonpath = \n    .\n    generated/onyx_openapi_client\nasyncio_default_fixture_loop_scope = function\nmarkers =\n    slow: marks tests as slow\n    alembic: marks tests for alembic migration testing\nfilterwarnings =\n    ignore::DeprecationWarning\n    ignore::cryptography.utils.CryptographyDeprecationWarning\n    ignore::PendingDeprecationWarning:ddtrace.internal.module\n# .test.env is gitignored.\n# After installing pytest-dotenv,\n# you can use it to test credentials locally.\nenv_files =\n    .test.env\n"
  },
  {
    "path": "backend/requirements/README.md",
    "content": "# Requirements Management with uv\n\nThis directory is kept for backwards compatibility with existing Docker builds.\n\n## Overview\n\nWe use **`pyproject.toml`** as the single source of truth for all dependencies, with a unified **`uv.lock`** file for resolved versions.\n\n### Why this approach?\n\n- ✅ **Single source of truth**: All dependencies defined in `pyproject.toml`\n- ✅ **No duplication**: Dependencies shared across environments are only listed once\n- ✅ **Unified lock file**: All versions resolved together - guaranteed compatible\n- ✅ **Fast**: `uv` is 10-100x faster than pip-tools\n- ✅ **Reproducible builds**: Lock file pins all transitive dependencies\n- ✅ **Easy updates**: Change `pyproject.toml`, commit, done!\n\n## File Structure\n\n```\npyproject.toml                      # SOURCE OF TRUTH - edit this!\nuv.lock                             # Unified lock file (all versions)\nbackend/\n└── requirements/                   # Legacy .txt files (for Docker compat)\n    ├── default.txt\n    ├── dev.txt\n    ├── ee.txt\n    ├── model_server.txt\n    └── combined.txt\n```\n\n## Workflow\n\n### 1. Installing uv\n\nIf you don't have `uv` installed:\n\n```bash\n# On macOS/Linux\ncurl -LsSf https://astral.py/uv/install.sh | sh\n```\n\n### 2. Adding/Updating Dependencies\n\n**DO NOT** edit the `.txt` files directly! Instead:\n\n1. Edit `pyproject.toml`\n2. Add/update/remove dependencies in the appropriate section:\n   - `[dependency-groups]` for dev tools\n   - `[project.dependencies]` for **shared** dependencies (used by both backend and model_server)\n   - `[project.optional-dependencies.backend]` for backend-only dependencies\n   - `[project.optional-dependencies.model_server]` for model_server-only dependencies (ML packages)\n   - `[project.optional-dependencies.ee]` for EE features\n3. Commit your changes - pre-commit hooks will automatically regenerate the lock file and requirements\n\n### 3. Generating Lock File and Requirements\n\nThe lock file (`uv.lock`) and requirements files are automatically generated by pre-commit hooks when you commit changes to `pyproject.toml`:\n\n- **`uv-lock`**: Runs `uv lock` to resolve dependencies into `uv.lock`\n- **`uv-export`**: Exports requirements to the `.txt` files in this directory\n\nTo manually regenerate:\n\n```bash\nuv lock\nuv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt\nuv export --no-emit-project --no-default-groups --no-hashes --group dev -o backend/requirements/dev.txt\nuv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt\nuv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt\n```\n\n### 4. Installing Dependencies\n\nIf enabled, all packages are installed automatically by the `uv-sync` pre-commit hook when changing\nbranches or pulling new changes.\n\n```bash\n# For everything (most common)\nuv sync --all-extras\n\n# For backend production (shared + backend dependencies)\nuv sync --extra backend\n\n# For backend development (shared + backend + dev tools)\nuv sync --extra backend --extra dev\n\n# For backend with EE (shared + backend + ee)\nuv sync --extra backend --extra ee\n\n# For model server (shared + model_server, NO backend deps!)\nuv sync --extra model_server\n```\n\n`uv` aggressively [ignores active virtual environments](https://docs.astral.sh/uv/concepts/projects/config/#project-environment-path) and prefers the root virtual environment.\nWhen working in workspace packages, be sure to pass `--active` when syncing the virtual environment:\n\n```bash\ncd backend/\nsource .venv/bin/activate\nuv sync --active\nuv run --active ...\n```\n\n### 5. Upgrading Dependencies\n\nUpgrade specific packages:\n\n1. Edit version in pyproject.toml, then commit\n2. Pre-commit hooks will automatically regenerate lock and requirements files\n\n**Review changes carefully before committing!**\n"
  },
  {
    "path": "backend/requirements/combined.txt",
    "content": "# combines all the other requirements files\n# Primarily for testing.\n# It's generally better to install just the requirements for what you are trying to run\n\n-r default.txt\n-r ee.txt\n-r model_server.txt\n-r dev.txt\n"
  },
  {
    "path": "backend/requirements/default.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv export --no-emit-project --no-default-groups --no-hashes --extra backend -o backend/requirements/default.txt\nagent-client-protocol==0.7.1\n    # via onyx\naioboto3==15.1.0\n    # via onyx\naiobotocore==2.24.0\n    # via aioboto3\naiofile==3.9.0\n    # via py-key-value-aio\naiofiles==25.1.0\n    # via\n    #   aioboto3\n    #   unstructured-client\naiohappyeyeballs==2.6.1\n    # via aiohttp\naiohttp==3.13.4\n    # via\n    #   aiobotocore\n    #   discord-py\n    #   litellm\n    #   onyx\n    #   voyageai\naioitertools==0.13.0\n    # via aiobotocore\naiolimiter==1.2.1\n    # via voyageai\naiosignal==1.4.0\n    # via aiohttp\nalembic==1.10.4\n    # via onyx\namqp==5.3.1\n    # via kombu\nannotated-doc==0.0.4\n    # via fastapi\nannotated-types==0.7.0\n    # via pydantic\nanyio==4.11.0\n    # via\n    #   claude-agent-sdk\n    #   google-genai\n    #   httpx\n    #   mcp\n    #   openai\n    #   py-key-value-aio\n    #   sse-starlette\n    #   starlette\n    #   watchfiles\nargon2-cffi==23.1.0\n    # via pwdlib\nargon2-cffi-bindings==25.1.0\n    # via argon2-cffi\nasana==5.0.8\n    # via onyx\nasync-timeout==5.0.1 ; python_full_version < '3.11.3'\n    # via redis\nasyncpg==0.30.0\n    # via onyx\natlassian-python-api==3.41.16\n    # via onyx\nattrs==25.4.0\n    # via\n    #   aiohttp\n    #   cyclopts\n    #   jsonschema\n    #   referencing\n    #   zeep\nauthlib==1.6.9\n    # via fastmcp\nazure-cognitiveservices-speech==1.38.0\n    # via onyx\nbabel==2.17.0\n    # via courlan\nbackoff==2.2.1\n    # via\n    #   langfuse\n    #   unstructured\nbackports-tarfile==1.2.0 ; python_full_version < '3.12'\n    # via jaraco-context\nbcrypt==4.3.0\n    # via pwdlib\nbeartype==0.22.6\n    # via py-key-value-aio\nbeautifulsoup4==4.12.3\n    # via\n    #   atlassian-python-api\n    #   markdownify\n    #   markitdown\n    #   onyx\n    #   unstructured\nbilliard==4.2.3\n    # via celery\nboto3==1.39.11\n    # via\n    #   aiobotocore\n    #   cohere\n    #   onyx\nboto3-stubs==1.39.11\n    # via onyx\nbotocore==1.39.11\n    # via\n    #   aiobotocore\n    #   boto3\n    #   s3transfer\nbotocore-stubs==1.40.74\n    # via boto3-stubs\nbraintrust==0.3.9\n    # via onyx\nbrotli==1.2.0\n    # via onyx\nbytecode==0.17.0\n    # via ddtrace\ncachetools==6.2.2\n    # via py-key-value-aio\ncaio==0.9.25\n    # via aiofile\ncelery==5.5.1\n    # via onyx\ncertifi==2025.11.12\n    # via\n    #   asana\n    #   httpcore\n    #   httpx\n    #   hubspot-api-client\n    #   kubernetes\n    #   opensearch-py\n    #   requests\n    #   sentry-sdk\n    #   trafilatura\ncffi==2.0.0\n    # via\n    #   argon2-cffi-bindings\n    #   cryptography\n    #   pynacl\n    #   zstandard\nchardet==5.2.0\n    # via onyx\ncharset-normalizer==3.4.4\n    # via\n    #   htmldate\n    #   markitdown\n    #   pdfminer-six\n    #   requests\n    #   trafilatura\n    #   unstructured\nchevron==0.14.0\n    # via braintrust\nchonkie==1.0.10\n    # via onyx\nclaude-agent-sdk==0.1.19\n    # via onyx\nclick==8.3.1\n    # via\n    #   celery\n    #   click-didyoumean\n    #   click-plugins\n    #   click-repl\n    #   dask\n    #   distributed\n    #   litellm\n    #   magika\n    #   nltk\n    #   python-oxmsg\n    #   typer\n    #   uvicorn\n    #   zulip\nclick-didyoumean==0.3.1\n    # via celery\nclick-plugins==1.1.1.2\n    # via celery\nclick-repl==0.3.0\n    # via celery\ncloudpickle==3.1.2\n    # via\n    #   dask\n    #   distributed\ncobble==0.1.4\n    # via mammoth\ncohere==5.6.1\n    # via onyx\ncolorama==0.4.6 ; sys_platform == 'win32'\n    # via\n    #   click\n    #   pytest\n    #   tqdm\ncoloredlogs==15.0.1\n    # via onnxruntime\ncourlan==1.3.2\n    # via trafilatura\ncryptography==46.0.6\n    # via\n    #   authlib\n    #   google-auth\n    #   msal\n    #   msoffcrypto-tool\n    #   pdfminer-six\n    #   pyjwt\n    #   secretstorage\n    #   sendgrid\n    #   unstructured-client\ncyclopts==4.2.4\n    # via fastmcp\ndask==2026.1.1\n    # via\n    #   distributed\n    #   onyx\ndataclasses-json==0.6.7\n    # via unstructured\ndateparser==1.2.2\n    # via htmldate\nddtrace==3.10.0\n    # via onyx\ndecorator==5.2.1\n    # via retry\ndefusedxml==0.7.1\n    # via\n    #   jira\n    #   markitdown\ndeprecated==1.3.1\n    # via\n    #   atlassian-python-api\n    #   pygithub\ndiscord-py==2.4.0\n    # via onyx\ndistributed==2026.1.1\n    # via onyx\ndistro==1.9.0\n    # via\n    #   openai\n    #   zulip\ndnspython==2.8.0\n    # via email-validator\ndocstring-parser==0.17.0\n    # via cyclopts\ndocutils==0.22.3\n    # via rich-rst\ndropbox==12.0.2\n    # via onyx\ndurationpy==0.10\n    # via kubernetes\nemail-validator==2.2.0\n    # via\n    #   fastapi-users\n    #   pydantic\nemoji==2.15.0\n    # via unstructured\nenvier==0.6.1\n    # via ddtrace\net-xmlfile==2.0.0\n    # via openpyxl\nevents==0.5\n    # via opensearch-py\nexa-py==1.15.4\n    # via onyx\nexceptiongroup==1.3.0\n    # via\n    #   braintrust\n    #   fastmcp\nfastapi==0.133.1\n    # via\n    #   fastapi-limiter\n    #   fastapi-users\n    #   onyx\nfastapi-limiter==0.1.6\n    # via onyx\nfastapi-users==15.0.4\n    # via\n    #   fastapi-users-db-sqlalchemy\n    #   onyx\nfastapi-users-db-sqlalchemy==7.0.0\n    # via onyx\nfastavro==1.12.1\n    # via cohere\nfastmcp==3.2.0\n    # via onyx\nfastuuid==0.14.0\n    # via litellm\nfilelock==3.20.3\n    # via\n    #   huggingface-hub\n    #   onyx\nfiletype==1.2.0\n    # via unstructured\nflatbuffers==25.9.23\n    # via onnxruntime\nfrozenlist==1.8.0\n    # via\n    #   aiohttp\n    #   aiosignal\nfsspec==2025.10.0\n    # via\n    #   dask\n    #   huggingface-hub\ngitdb==4.0.12\n    # via gitpython\ngitpython==3.1.45\n    # via braintrust\ngoogle-api-core==2.28.1\n    # via google-api-python-client\ngoogle-api-python-client==2.86.0\n    # via onyx\ngoogle-auth==2.48.0\n    # via\n    #   google-api-core\n    #   google-api-python-client\n    #   google-auth-httplib2\n    #   google-auth-oauthlib\n    #   google-genai\n    #   kubernetes\ngoogle-auth-httplib2==0.1.0\n    # via\n    #   google-api-python-client\n    #   onyx\ngoogle-auth-oauthlib==1.0.0\n    # via onyx\ngoogle-genai==1.52.0\n    # via onyx\ngoogleapis-common-protos==1.72.0\n    # via\n    #   google-api-core\n    #   opentelemetry-exporter-otlp-proto-http\ngreenlet==3.2.4\n    # via\n    #   playwright\n    #   sqlalchemy\nh11==0.16.0\n    # via\n    #   httpcore\n    #   uvicorn\nh2==4.3.0\n    # via httpx\nhf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'\n    # via huggingface-hub\nhpack==4.1.0\n    # via h2\nhtml5lib==1.1\n    # via unstructured\nhtmldate==1.9.1\n    # via trafilatura\nhttpcore==1.0.9\n    # via\n    #   httpx\n    #   onyx\n    #   unstructured-client\nhttplib2==0.31.0\n    # via\n    #   google-api-python-client\n    #   google-auth-httplib2\nhttpx==0.28.1\n    # via\n    #   cohere\n    #   exa-py\n    #   fastmcp\n    #   google-genai\n    #   httpx-oauth\n    #   langfuse\n    #   langsmith\n    #   litellm\n    #   mcp\n    #   onyx\n    #   openai\n    #   unstructured-client\nhttpx-oauth==0.15.1\n    # via onyx\nhttpx-sse==0.4.3\n    # via\n    #   cohere\n    #   mcp\nhubspot-api-client==11.1.0\n    # via onyx\nhuggingface-hub==0.35.3\n    # via\n    #   onyx\n    #   tokenizers\nhumanfriendly==10.0\n    # via coloredlogs\nhyperframe==6.1.0\n    # via h2\nidna==3.11\n    # via\n    #   anyio\n    #   email-validator\n    #   httpx\n    #   requests\n    #   yarl\nimportlib-metadata==8.7.0\n    # via\n    #   dask\n    #   keyring\n    #   litellm\n    #   opentelemetry-api\ninflection==0.5.1\n    # via\n    #   onyx\n    #   pyairtable\niniconfig==2.3.0\n    # via pytest\nisodate==0.7.2\n    # via\n    #   python3-saml\n    #   zeep\njaraco-classes==3.4.0\n    # via keyring\njaraco-context==6.0.2\n    # via keyring\njaraco-functools==4.4.0\n    # via keyring\njeepney==0.9.0 ; sys_platform == 'linux'\n    # via\n    #   keyring\n    #   secretstorage\njinja2==3.1.6\n    # via\n    #   distributed\n    #   litellm\njira==3.10.5\n    # via onyx\njiter==0.12.0\n    # via openai\njmespath==1.0.1\n    # via\n    #   aiobotocore\n    #   atlassian-python-api\n    #   boto3\n    #   botocore\njoblib==1.5.2\n    # via nltk\njsonpatch==1.33\n    # via langchain-core\njsonpointer==3.0.0\n    # via jsonpatch\njsonref==1.1.0\n    # via\n    #   fastmcp\n    #   onyx\njsonschema==4.25.1\n    # via\n    #   litellm\n    #   mcp\njsonschema-path==0.3.4\n    # via fastmcp\njsonschema-specifications==2025.9.1\n    # via jsonschema\njustext==3.0.2\n    # via trafilatura\nkeyring==25.7.0\n    # via py-key-value-aio\nkombu==5.5.4\n    # via celery\nkubernetes==31.0.0\n    # via onyx\nlangchain-core==1.2.22\n    # via onyx\nlangdetect==1.0.9\n    # via unstructured\nlangfuse==3.10.0\n    # via onyx\nlangsmith==0.3.45\n    # via langchain-core\nlazy-imports==1.0.1\n    # via onyx\nlegacy-cgi==2.6.4 ; python_full_version >= '3.13'\n    # via ddtrace\nlitellm==1.81.6\n    # via onyx\nlocket==1.0.0\n    # via\n    #   distributed\n    #   partd\nlxml==5.3.0\n    # via\n    #   htmldate\n    #   justext\n    #   lxml-html-clean\n    #   markitdown\n    #   onyx\n    #   python-docx\n    #   python-pptx\n    #   python3-saml\n    #   trafilatura\n    #   unstructured\n    #   xmlsec\n    #   zeep\nlxml-html-clean==0.4.4\n    # via lxml\nmagika==0.6.3\n    # via markitdown\nmakefun==1.16.0\n    # via fastapi-users\nmako==1.2.4\n    # via\n    #   alembic\n    #   onyx\nmammoth==1.11.0\n    # via markitdown\nmarkdown-it-py==4.0.0\n    # via rich\nmarkdownify==1.2.2\n    # via markitdown\nmarkitdown==0.1.2\n    # via onyx\nmarkupsafe==3.0.3\n    # via\n    #   jinja2\n    #   mako\n    #   werkzeug\nmarshmallow==3.26.2\n    # via dataclasses-json\nmatrix-client==0.3.2\n    # via zulip\nmcp==1.26.0\n    # via\n    #   claude-agent-sdk\n    #   fastmcp\n    #   onyx\nmdurl==0.1.2\n    # via markdown-it-py\nmistune==3.2.0\n    # via onyx\nmore-itertools==10.8.0\n    # via\n    #   jaraco-classes\n    #   jaraco-functools\n    #   simple-salesforce\nmpmath==1.3.0\n    # via sympy\nmsal==1.34.0\n    # via\n    #   office365-rest-python-client\n    #   onyx\nmsgpack==1.1.2\n    # via distributed\nmsoffcrypto-tool==5.4.2\n    # via onyx\nmultidict==6.7.0\n    # via\n    #   aiobotocore\n    #   aiohttp\n    #   yarl\nmwparserfromhell==0.7.2\n    # via pywikibot\nmypy==1.13.0\n    # via sqlalchemy\nmypy-boto3-s3==1.39.5\n    # via boto3-stubs\nmypy-extensions==1.0.0\n    # via\n    #   mypy\n    #   typing-inspect\nnest-asyncio==1.6.0\n    # via onyx\nnltk==3.9.4\n    # via unstructured\nnumpy==2.4.1\n    # via\n    #   magika\n    #   onnxruntime\n    #   pandas\n    #   shapely\n    #   unstructured\n    #   voyageai\noauthlib==3.2.2\n    # via\n    #   atlassian-python-api\n    #   kubernetes\n    #   onyx\n    #   requests-oauthlib\noffice365-rest-python-client==2.6.2\n    # via onyx\nolefile==0.47\n    # via\n    #   msoffcrypto-tool\n    #   python-oxmsg\nonnxruntime==1.20.1\n    # via magika\nopenai==2.14.0\n    # via\n    #   exa-py\n    #   langfuse\n    #   litellm\n    #   onyx\nopenapi-pydantic==0.5.1\n    # via fastmcp\nopeninference-instrumentation==0.1.42\n    # via onyx\nopeninference-semantic-conventions==0.1.25\n    # via openinference-instrumentation\nopenpyxl==3.0.10\n    # via\n    #   markitdown\n    #   onyx\nopensearch-py==3.0.0\n    # via onyx\nopentelemetry-api==1.39.1\n    # via\n    #   ddtrace\n    #   fastmcp\n    #   langfuse\n    #   openinference-instrumentation\n    #   opentelemetry-exporter-otlp-proto-http\n    #   opentelemetry-sdk\n    #   opentelemetry-semantic-conventions\nopentelemetry-exporter-otlp-proto-common==1.39.1\n    # via opentelemetry-exporter-otlp-proto-http\nopentelemetry-exporter-otlp-proto-http==1.39.1\n    # via langfuse\nopentelemetry-proto==1.39.1\n    # via\n    #   onyx\n    #   opentelemetry-exporter-otlp-proto-common\n    #   opentelemetry-exporter-otlp-proto-http\nopentelemetry-sdk==1.39.1\n    # via\n    #   langfuse\n    #   openinference-instrumentation\n    #   opentelemetry-exporter-otlp-proto-http\nopentelemetry-semantic-conventions==0.60b1\n    # via opentelemetry-sdk\norjson==3.11.6 ; platform_python_implementation != 'PyPy'\n    # via langsmith\npackaging==24.2\n    # via\n    #   dask\n    #   distributed\n    #   fastmcp\n    #   huggingface-hub\n    #   jira\n    #   kombu\n    #   langchain-core\n    #   langfuse\n    #   langsmith\n    #   marshmallow\n    #   onnxruntime\n    #   pytest\n    #   pywikibot\npandas==2.3.3\n    # via markitdown\nparameterized==0.9.0\n    # via cohere\npartd==1.4.2\n    # via dask\npasslib==1.7.4\n    # via onyx\npathable==0.4.4\n    # via jsonschema-path\npdfminer-six==20251107\n    # via markitdown\npillow==12.1.1\n    # via python-pptx\nplatformdirs==4.5.0\n    # via\n    #   fastmcp\n    #   zeep\nplaywright==1.55.0\n    # via\n    #   onyx\n    #   pytest-playwright\npluggy==1.6.0\n    # via pytest\nply==3.11\n    # via stone\nprometheus-client==0.23.1\n    # via\n    #   onyx\n    #   prometheus-fastapi-instrumentator\nprometheus-fastapi-instrumentator==7.1.0\n    # via onyx\nprompt-toolkit==3.0.52\n    # via click-repl\npropcache==0.4.1\n    # via\n    #   aiohttp\n    #   yarl\nproto-plus==1.26.1\n    # via google-api-core\nprotobuf==6.33.5\n    # via\n    #   ddtrace\n    #   google-api-core\n    #   googleapis-common-protos\n    #   onnxruntime\n    #   opentelemetry-proto\n    #   proto-plus\npsutil==7.1.3\n    # via\n    #   distributed\n    #   onyx\n    #   unstructured\npsycopg2-binary==2.9.9\n    # via onyx\npuremagic==1.28\n    # via onyx\npwdlib==0.3.0\n    # via fastapi-users\npy==1.11.0\n    # via retry\npy-key-value-aio==0.4.4\n    # via fastmcp\npyairtable==3.0.1\n    # via onyx\npyasn1==0.6.3\n    # via\n    #   pyasn1-modules\n    #   rsa\npyasn1-modules==0.4.2\n    # via google-auth\npycparser==2.23 ; implementation_name != 'PyPy'\n    # via cffi\npycryptodome==3.19.1\n    # via onyx\npydantic==2.11.7\n    # via\n    #   agent-client-protocol\n    #   cohere\n    #   exa-py\n    #   fastapi\n    #   fastmcp\n    #   google-genai\n    #   langchain-core\n    #   langfuse\n    #   langsmith\n    #   litellm\n    #   mcp\n    #   onyx\n    #   openai\n    #   openapi-pydantic\n    #   pyairtable\n    #   pydantic-settings\n    #   unstructured-client\npydantic-core==2.33.2\n    # via pydantic\npydantic-settings==2.12.0\n    # via mcp\npyee==13.0.0\n    # via playwright\npygithub==2.5.0\n    # via onyx\npygments==2.20.0\n    # via rich\npyjwt==2.12.0\n    # via\n    #   fastapi-users\n    #   mcp\n    #   msal\n    #   pygithub\n    #   simple-salesforce\npympler==1.1\n    # via onyx\npynacl==1.6.2\n    # via pygithub\npypandoc-binary==1.16.2\n    # via onyx\npyparsing==3.2.5\n    # via httplib2\npypdf==6.9.2\n    # via\n    #   onyx\n    #   unstructured-client\npyperclip==1.11.0\n    # via fastmcp\npyreadline3==3.5.4 ; sys_platform == 'win32'\n    # via humanfriendly\npytest==8.3.5\n    # via\n    #   pytest-base-url\n    #   pytest-mock\n    #   pytest-playwright\npytest-base-url==2.1.0\n    # via pytest-playwright\npytest-mock==3.12.0\n    # via onyx\npytest-playwright==0.7.0\n    # via onyx\npython-dateutil==2.8.2\n    # via\n    #   aiobotocore\n    #   asana\n    #   botocore\n    #   celery\n    #   dateparser\n    #   htmldate\n    #   hubspot-api-client\n    #   kubernetes\n    #   onyx\n    #   opensearch-py\n    #   pandas\npython-docx==1.1.2\n    # via onyx\npython-dotenv==1.1.1\n    # via\n    #   braintrust\n    #   fastmcp\n    #   litellm\n    #   magika\n    #   mcp\n    #   onyx\n    #   pydantic-settings\npython-gitlab==5.6.0\n    # via onyx\npython-http-client==3.3.7\n    # via sendgrid\npython-iso639==2025.11.16\n    # via unstructured\npython-magic==0.4.27\n    # via unstructured\npython-multipart==0.0.22\n    # via\n    #   fastapi-users\n    #   mcp\n    #   onyx\npython-oxmsg==0.0.2\n    # via unstructured\npython-pptx==0.6.23\n    # via\n    #   markitdown\n    #   onyx\npython-slugify==8.0.4\n    # via\n    #   braintrust\n    #   pytest-playwright\npython3-saml==1.15.0\n    # via onyx\npytz==2025.2\n    # via\n    #   dateparser\n    #   office365-rest-python-client\n    #   pandas\n    #   zeep\npywikibot==9.0.0\n    # via onyx\npywin32==311 ; sys_platform == 'win32'\n    # via\n    #   mcp\n    #   pympler\npywin32-ctypes==0.2.3 ; sys_platform == 'win32'\n    # via keyring\npyyaml==6.0.3\n    # via\n    #   dask\n    #   distributed\n    #   fastmcp\n    #   huggingface-hub\n    #   jsonschema-path\n    #   kubernetes\n    #   langchain-core\nrapidfuzz==3.13.0\n    # via\n    #   onyx\n    #   unstructured\nredis==5.0.8\n    # via\n    #   fastapi-limiter\n    #   onyx\nreferencing==0.36.2\n    # via\n    #   jsonschema\n    #   jsonschema-path\n    #   jsonschema-specifications\nregex==2025.11.3\n    # via\n    #   dateparser\n    #   nltk\n    #   tiktoken\nrequests==2.33.0\n    # via\n    #   atlassian-python-api\n    #   braintrust\n    #   cohere\n    #   dropbox\n    #   exa-py\n    #   google-api-core\n    #   google-genai\n    #   hubspot-api-client\n    #   huggingface-hub\n    #   jira\n    #   jsonschema-path\n    #   kubernetes\n    #   langfuse\n    #   langsmith\n    #   markitdown\n    #   matrix-client\n    #   msal\n    #   office365-rest-python-client\n    #   onyx\n    #   opensearch-py\n    #   opentelemetry-exporter-otlp-proto-http\n    #   pyairtable\n    #   pygithub\n    #   pytest-base-url\n    #   python-gitlab\n    #   pywikibot\n    #   requests-file\n    #   requests-oauthlib\n    #   requests-toolbelt\n    #   simple-salesforce\n    #   stripe\n    #   tiktoken\n    #   unstructured\n    #   voyageai\n    #   zeep\n    #   zulip\nrequests-file==3.0.1\n    # via zeep\nrequests-oauthlib==1.3.1\n    # via\n    #   atlassian-python-api\n    #   google-auth-oauthlib\n    #   jira\n    #   kubernetes\n    #   onyx\nrequests-toolbelt==1.0.0\n    # via\n    #   jira\n    #   langsmith\n    #   python-gitlab\n    #   unstructured-client\n    #   zeep\nretry==0.9.2\n    # via onyx\nrfc3986==1.5.0\n    # via onyx\nrich==14.2.0\n    # via\n    #   cyclopts\n    #   fastmcp\n    #   rich-rst\n    #   typer\nrich-rst==1.3.2\n    # via cyclopts\nrpds-py==0.29.0\n    # via\n    #   jsonschema\n    #   referencing\nrsa==4.9.1\n    # via google-auth\ns3transfer==0.13.1\n    # via boto3\nsecretstorage==3.5.0 ; sys_platform == 'linux'\n    # via keyring\nsendgrid==6.12.5\n    # via onyx\nsentry-sdk==2.14.0\n    # via onyx\nshapely==2.0.6\n    # via onyx\nshellingham==1.5.4\n    # via typer\nsimple-salesforce==1.12.6\n    # via onyx\nsix==1.17.0\n    # via\n    #   asana\n    #   atlassian-python-api\n    #   dropbox\n    #   google-auth-httplib2\n    #   html5lib\n    #   hubspot-api-client\n    #   kubernetes\n    #   langdetect\n    #   markdownify\n    #   python-dateutil\n    #   stone\nslack-sdk==3.20.2\n    # via onyx\nsmmap==5.0.2\n    # via gitdb\nsniffio==1.3.1\n    # via\n    #   anyio\n    #   openai\nsortedcontainers==2.4.0\n    # via distributed\nsoupsieve==2.8\n    # via beautifulsoup4\nsqlalchemy==2.0.15\n    # via\n    #   alembic\n    #   fastapi-users-db-sqlalchemy\n    #   onyx\nsse-starlette==3.0.3\n    # via mcp\nsseclient-py==1.8.0\n    # via braintrust\nstarlette==0.49.3\n    # via\n    #   fastapi\n    #   mcp\n    #   onyx\n    #   prometheus-fastapi-instrumentator\nstone==3.3.1\n    # via dropbox\nstripe==10.12.0\n    # via onyx\nsupervisor==4.3.0\n    # via onyx\nsympy==1.14.0\n    # via onnxruntime\ntblib==3.2.2\n    # via distributed\ntenacity==9.1.2\n    # via\n    #   google-genai\n    #   langchain-core\n    #   voyageai\ntext-unidecode==1.3\n    # via python-slugify\ntiktoken==0.7.0\n    # via\n    #   litellm\n    #   onyx\ntimeago==1.0.16\n    # via onyx\ntld==0.13.1\n    # via courlan\ntokenizers==0.21.4\n    # via\n    #   chonkie\n    #   cohere\n    #   litellm\ntoolz==1.1.0\n    # via\n    #   dask\n    #   distributed\n    #   partd\ntornado==6.5.5\n    # via distributed\ntqdm==4.67.1\n    # via\n    #   braintrust\n    #   chonkie\n    #   huggingface-hub\n    #   nltk\n    #   openai\n    #   unstructured\ntrafilatura==1.12.2\n    # via onyx\ntyper==0.20.0\n    # via mcp\ntypes-awscrt==0.28.4\n    # via botocore-stubs\ntypes-openpyxl==3.0.4.7\n    # via onyx\ntypes-requests==2.32.0.20250328\n    # via cohere\ntypes-s3transfer==0.14.0\n    # via boto3-stubs\ntyping-extensions==4.15.0\n    # via\n    #   aiosignal\n    #   alembic\n    #   anyio\n    #   boto3-stubs\n    #   braintrust\n    #   cohere\n    #   ddtrace\n    #   exa-py\n    #   exceptiongroup\n    #   fastapi\n    #   google-genai\n    #   huggingface-hub\n    #   jira\n    #   langchain-core\n    #   mcp\n    #   mypy\n    #   mypy-boto3-s3\n    #   office365-rest-python-client\n    #   openai\n    #   opentelemetry-api\n    #   opentelemetry-exporter-otlp-proto-http\n    #   opentelemetry-sdk\n    #   opentelemetry-semantic-conventions\n    #   py-key-value-aio\n    #   pyairtable\n    #   pydantic\n    #   pydantic-core\n    #   pyee\n    #   pygithub\n    #   python-docx\n    #   python-oxmsg\n    #   referencing\n    #   simple-salesforce\n    #   sqlalchemy\n    #   starlette\n    #   stripe\n    #   typer\n    #   typing-inspect\n    #   typing-inspection\n    #   unstructured\n    #   zulip\ntyping-inspect==0.9.0\n    # via dataclasses-json\ntyping-inspection==0.4.2\n    # via\n    #   fastapi\n    #   mcp\n    #   pydantic\n    #   pydantic-settings\ntzdata==2025.2\n    # via\n    #   kombu\n    #   pandas\n    #   tzlocal\ntzlocal==5.3.1\n    # via dateparser\nuncalled-for==0.2.0\n    # via fastmcp\nunstructured==0.18.27\n    # via onyx\nunstructured-client==0.42.6\n    # via\n    #   onyx\n    #   unstructured\nuritemplate==4.2.0\n    # via google-api-python-client\nurllib3==2.6.3\n    # via\n    #   asana\n    #   botocore\n    #   courlan\n    #   distributed\n    #   htmldate\n    #   hubspot-api-client\n    #   kubernetes\n    #   onyx\n    #   opensearch-py\n    #   pyairtable\n    #   pygithub\n    #   requests\n    #   sentry-sdk\n    #   trafilatura\n    #   types-requests\nuuid-utils==0.14.0\n    # via langchain-core\nuvicorn==0.35.0\n    # via\n    #   fastmcp\n    #   mcp\n    #   onyx\nvine==5.1.0\n    # via\n    #   amqp\n    #   celery\n    #   kombu\nvoyageai==0.2.3\n    # via onyx\nwatchfiles==1.1.1\n    # via fastmcp\nwcwidth==0.2.14\n    # via prompt-toolkit\nwebencodings==0.5.1\n    # via html5lib\nwebsocket-client==1.9.0\n    # via kubernetes\nwebsockets==15.0.1\n    # via\n    #   fastmcp\n    #   google-genai\nwerkzeug==3.1.6\n    # via sendgrid\nwrapt==1.17.3\n    # via\n    #   aiobotocore\n    #   braintrust\n    #   ddtrace\n    #   deprecated\n    #   langfuse\n    #   openinference-instrumentation\n    #   unstructured\nxlrd==2.0.2\n    # via markitdown\nxlsxwriter==3.2.9\n    # via python-pptx\nxmlsec==1.3.14\n    # via\n    #   onyx\n    #   python3-saml\nxmltodict==1.0.2\n    # via ddtrace\nyarl==1.22.0\n    # via aiohttp\nzeep==4.3.2\n    # via simple-salesforce\nzict==3.0.0\n    # via distributed\nzipp==3.23.0\n    # via importlib-metadata\nzstandard==0.23.0\n    # via langsmith\nzulip==0.8.2\n    # via onyx\n"
  },
  {
    "path": "backend/requirements/dev.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv export --no-emit-project --no-default-groups --no-hashes --extra dev -o backend/requirements/dev.txt\nagent-client-protocol==0.7.1\n    # via onyx\naioboto3==15.1.0\n    # via onyx\naiobotocore==2.24.0\n    # via aioboto3\naiofiles==25.1.0\n    # via aioboto3\naiohappyeyeballs==2.6.1\n    # via aiohttp\naiohttp==3.13.4\n    # via\n    #   aiobotocore\n    #   discord-py\n    #   litellm\n    #   voyageai\naioitertools==0.13.0\n    # via aiobotocore\naiolimiter==1.2.1\n    # via voyageai\naiosignal==1.4.0\n    # via aiohttp\nalembic==1.10.4\n    # via pytest-alembic\nannotated-doc==0.0.4\n    # via fastapi\nannotated-types==0.7.0\n    # via pydantic\nanyio==4.11.0\n    # via\n    #   claude-agent-sdk\n    #   google-genai\n    #   httpx\n    #   mcp\n    #   openai\n    #   sse-starlette\n    #   starlette\nappnope==0.1.4 ; sys_platform == 'darwin'\n    # via ipykernel\nasttokens==3.0.1\n    # via stack-data\nattrs==25.4.0\n    # via\n    #   aiohttp\n    #   jsonschema\n    #   referencing\nblack==25.1.0\n    # via onyx\nboto3==1.39.11\n    # via\n    #   aiobotocore\n    #   cohere\nbotocore==1.39.11\n    # via\n    #   aiobotocore\n    #   boto3\n    #   s3transfer\nbrotli==1.2.0\n    # via onyx\ncelery-types==0.19.0\n    # via onyx\ncertifi==2025.11.12\n    # via\n    #   httpcore\n    #   httpx\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\ncffi==2.0.0 ; implementation_name == 'pypy' or platform_python_implementation != 'PyPy'\n    # via\n    #   cryptography\n    #   pyzmq\ncfgv==3.4.0\n    # via pre-commit\ncharset-normalizer==3.4.4\n    # via requests\nclassify-imports==4.2.0\n    # via reorder-python-imports-black\nclaude-agent-sdk==0.1.19\n    # via onyx\nclick==8.3.1\n    # via\n    #   black\n    #   litellm\n    #   uvicorn\ncohere==5.6.1\n    # via onyx\ncolorama==0.4.6 ; sys_platform == 'win32'\n    # via\n    #   click\n    #   ipython\n    #   pytest\n    #   tqdm\ncomm==0.2.3\n    # via ipykernel\ncontourpy==1.3.3\n    # via matplotlib\ncryptography==46.0.6\n    # via\n    #   google-auth\n    #   pyjwt\ncycler==0.12.1\n    # via matplotlib\ndebugpy==1.8.17\n    # via ipykernel\ndecorator==5.2.1\n    # via\n    #   ipython\n    #   retry\ndiscord-py==2.4.0\n    # via onyx\ndistlib==0.4.0\n    # via virtualenv\ndistro==1.9.0\n    # via openai\ndurationpy==0.10\n    # via kubernetes\nexecnet==2.1.2\n    # via pytest-xdist\nexecuting==2.2.1\n    # via stack-data\nfaker==40.1.2\n    # via onyx\nfastapi==0.133.1\n    # via\n    #   onyx\n    #   onyx-devtools\nfastavro==1.12.1\n    # via cohere\nfastuuid==0.14.0\n    # via litellm\nfilelock==3.20.3\n    # via\n    #   huggingface-hub\n    #   virtualenv\nfonttools==4.61.1\n    # via matplotlib\nfrozenlist==1.8.0\n    # via\n    #   aiohttp\n    #   aiosignal\nfsspec==2025.10.0\n    # via huggingface-hub\ngoogle-auth==2.48.0\n    # via\n    #   google-genai\n    #   kubernetes\ngoogle-genai==1.52.0\n    # via onyx\ngreenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'\n    # via sqlalchemy\nh11==0.16.0\n    # via\n    #   httpcore\n    #   uvicorn\nhatchling==1.28.0\n    # via onyx\nhf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'\n    # via huggingface-hub\nhttpcore==1.0.9\n    # via httpx\nhttpx==0.28.1\n    # via\n    #   cohere\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   openai\nhttpx-sse==0.4.3\n    # via\n    #   cohere\n    #   mcp\nhuggingface-hub==0.35.3\n    # via tokenizers\nidentify==2.6.15\n    # via pre-commit\nidna==3.11\n    # via\n    #   anyio\n    #   httpx\n    #   requests\n    #   yarl\nimportlib-metadata==8.7.0\n    # via litellm\niniconfig==2.3.0\n    # via pytest\nipykernel==6.29.5\n    # via onyx\nipython==9.7.0\n    # via ipykernel\nipython-pygments-lexers==1.1.1\n    # via ipython\njedi==0.19.2\n    # via ipython\njinja2==3.1.6\n    # via litellm\njiter==0.12.0\n    # via openai\njmespath==1.0.1\n    # via\n    #   aiobotocore\n    #   boto3\n    #   botocore\njsonschema==4.25.1\n    # via\n    #   litellm\n    #   mcp\njsonschema-specifications==2025.9.1\n    # via jsonschema\njupyter-client==8.6.3\n    # via ipykernel\njupyter-core==5.9.1\n    # via\n    #   ipykernel\n    #   jupyter-client\nkiwisolver==1.4.9\n    # via matplotlib\nkubernetes==31.0.0\n    # via onyx\nlitellm==1.81.6\n    # via onyx\nmako==1.2.4\n    # via alembic\nmanygo==0.2.0\n    # via onyx\nmarkupsafe==3.0.3\n    # via\n    #   jinja2\n    #   mako\nmatplotlib==3.10.8\n    # via onyx\nmatplotlib-inline==0.2.1\n    # via\n    #   ipykernel\n    #   ipython\nmcp==1.26.0\n    # via claude-agent-sdk\nmultidict==6.7.0\n    # via\n    #   aiobotocore\n    #   aiohttp\n    #   yarl\nmypy==1.13.0\n    # via onyx\nmypy-extensions==1.0.0\n    # via\n    #   black\n    #   mypy\n    #   onyx\nnest-asyncio==1.6.0\n    # via ipykernel\nnodeenv==1.9.1\n    # via pre-commit\nnumpy==2.4.1\n    # via\n    #   contourpy\n    #   matplotlib\n    #   pandas-stubs\n    #   voyageai\noauthlib==3.2.2\n    # via\n    #   kubernetes\n    #   requests-oauthlib\nonyx-devtools==0.7.2\n    # via onyx\nopenai==2.14.0\n    # via\n    #   litellm\n    #   onyx\nopenapi-generator-cli==7.17.0\n    # via\n    #   onyx\n    #   onyx-devtools\npackaging==24.2\n    # via\n    #   black\n    #   hatchling\n    #   huggingface-hub\n    #   ipykernel\n    #   matplotlib\n    #   pytest\npandas-stubs==2.3.3.251201\n    # via onyx\nparameterized==0.9.0\n    # via cohere\nparso==0.8.5\n    # via jedi\npathspec==0.12.1\n    # via\n    #   black\n    #   hatchling\npexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'\n    # via ipython\npillow==12.1.1\n    # via matplotlib\nplatformdirs==4.5.0\n    # via\n    #   black\n    #   jupyter-core\n    #   virtualenv\npluggy==1.6.0\n    # via\n    #   hatchling\n    #   pytest\npre-commit==3.2.2\n    # via onyx\nprometheus-client==0.23.1\n    # via\n    #   onyx\n    #   prometheus-fastapi-instrumentator\nprometheus-fastapi-instrumentator==7.1.0\n    # via onyx\nprompt-toolkit==3.0.52\n    # via ipython\npropcache==0.4.1\n    # via\n    #   aiohttp\n    #   yarl\npsutil==7.1.3\n    # via ipykernel\nptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'\n    # via pexpect\npure-eval==0.2.3\n    # via stack-data\npy==1.11.0\n    # via retry\npyasn1==0.6.3\n    # via\n    #   pyasn1-modules\n    #   rsa\npyasn1-modules==0.4.2\n    # via google-auth\npycparser==2.23 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')\n    # via cffi\npydantic==2.11.7\n    # via\n    #   agent-client-protocol\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   onyx\n    #   openai\n    #   pydantic-settings\npydantic-core==2.33.2\n    # via pydantic\npydantic-settings==2.12.0\n    # via mcp\npygments==2.20.0\n    # via\n    #   ipython\n    #   ipython-pygments-lexers\npyjwt==2.12.0\n    # via mcp\npyparsing==3.2.5\n    # via matplotlib\npytest==8.3.5\n    # via\n    #   onyx\n    #   pytest-alembic\n    #   pytest-asyncio\n    #   pytest-dotenv\n    #   pytest-repeat\n    #   pytest-xdist\npytest-alembic==0.12.1\n    # via onyx\npytest-asyncio==1.3.0\n    # via onyx\npytest-dotenv==0.5.2\n    # via onyx\npytest-repeat==0.9.4\n    # via onyx\npytest-xdist==3.8.0\n    # via onyx\npython-dateutil==2.8.2\n    # via\n    #   aiobotocore\n    #   botocore\n    #   jupyter-client\n    #   kubernetes\n    #   matplotlib\npython-dotenv==1.1.1\n    # via\n    #   litellm\n    #   pydantic-settings\n    #   pytest-dotenv\npython-multipart==0.0.22\n    # via mcp\npywin32==311 ; sys_platform == 'win32'\n    # via mcp\npyyaml==6.0.3\n    # via\n    #   huggingface-hub\n    #   kubernetes\n    #   pre-commit\npyzmq==27.1.0\n    # via\n    #   ipykernel\n    #   jupyter-client\nreferencing==0.36.2\n    # via\n    #   jsonschema\n    #   jsonschema-specifications\nregex==2025.11.3\n    # via tiktoken\nrelease-tag==0.5.2\n    # via onyx\nreorder-python-imports-black==3.14.0\n    # via onyx\nrequests==2.33.0\n    # via\n    #   cohere\n    #   google-genai\n    #   huggingface-hub\n    #   kubernetes\n    #   requests-oauthlib\n    #   tiktoken\n    #   voyageai\nrequests-oauthlib==1.3.1\n    # via kubernetes\nretry==0.9.2\n    # via onyx\nrpds-py==0.29.0\n    # via\n    #   jsonschema\n    #   referencing\nrsa==4.9.1\n    # via google-auth\nruff==0.12.0\n    # via onyx\ns3transfer==0.13.1\n    # via boto3\nsentry-sdk==2.14.0\n    # via onyx\nsix==1.17.0\n    # via\n    #   kubernetes\n    #   python-dateutil\nsniffio==1.3.1\n    # via\n    #   anyio\n    #   openai\nsqlalchemy==2.0.15\n    # via\n    #   alembic\n    #   pytest-alembic\nsse-starlette==3.0.3\n    # via mcp\nstack-data==0.6.3\n    # via ipython\nstarlette==0.49.3\n    # via\n    #   fastapi\n    #   mcp\n    #   prometheus-fastapi-instrumentator\ntenacity==9.1.2\n    # via\n    #   google-genai\n    #   voyageai\ntiktoken==0.7.0\n    # via litellm\ntokenizers==0.21.4\n    # via\n    #   cohere\n    #   litellm\ntornado==6.5.5\n    # via\n    #   ipykernel\n    #   jupyter-client\ntqdm==4.67.1\n    # via\n    #   huggingface-hub\n    #   openai\ntraitlets==5.14.3\n    # via\n    #   ipykernel\n    #   ipython\n    #   jupyter-client\n    #   jupyter-core\n    #   matplotlib-inline\ntrove-classifiers==2025.12.1.14\n    # via hatchling\ntypes-beautifulsoup4==4.12.0.3\n    # via onyx\ntypes-html5lib==1.1.11.13\n    # via\n    #   onyx\n    #   types-beautifulsoup4\ntypes-oauthlib==3.2.0.9\n    # via onyx\ntypes-passlib==1.7.7.20240106\n    # via onyx\ntypes-pillow==10.2.0.20240822\n    # via onyx\ntypes-psutil==7.1.3.20251125\n    # via onyx\ntypes-psycopg2==2.9.21.10\n    # via onyx\ntypes-python-dateutil==2.8.19.13\n    # via onyx\ntypes-pytz==2023.3.1.1\n    # via\n    #   onyx\n    #   pandas-stubs\ntypes-pyyaml==6.0.12.11\n    # via onyx\ntypes-regex==2023.3.23.1\n    # via onyx\ntypes-requests==2.32.0.20250328\n    # via\n    #   cohere\n    #   onyx\ntypes-retry==0.9.9.3\n    # via onyx\ntypes-setuptools==68.0.0.3\n    # via onyx\ntyping-extensions==4.15.0\n    # via\n    #   aiosignal\n    #   alembic\n    #   anyio\n    #   celery-types\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   huggingface-hub\n    #   ipython\n    #   mcp\n    #   mypy\n    #   openai\n    #   pydantic\n    #   pydantic-core\n    #   pytest-asyncio\n    #   referencing\n    #   sqlalchemy\n    #   starlette\n    #   typing-inspection\ntyping-inspection==0.4.2\n    # via\n    #   fastapi\n    #   mcp\n    #   pydantic\n    #   pydantic-settings\ntzdata==2025.2 ; sys_platform == 'win32'\n    # via faker\nurllib3==2.6.3\n    # via\n    #   botocore\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\n    #   types-requests\nuvicorn==0.35.0\n    # via\n    #   mcp\n    #   onyx\nvirtualenv==20.36.1\n    # via pre-commit\nvoyageai==0.2.3\n    # via onyx\nwcwidth==0.2.14\n    # via prompt-toolkit\nwebsocket-client==1.9.0\n    # via kubernetes\nwebsockets==15.0.1\n    # via google-genai\nwrapt==1.17.3\n    # via aiobotocore\nyarl==1.22.0\n    # via aiohttp\nzipp==3.23.0\n    # via importlib-metadata\nzizmor==1.18.0\n    # via onyx\n"
  },
  {
    "path": "backend/requirements/ee.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv export --no-emit-project --no-default-groups --no-hashes --extra ee -o backend/requirements/ee.txt\nagent-client-protocol==0.7.1\n    # via onyx\naioboto3==15.1.0\n    # via onyx\naiobotocore==2.24.0\n    # via aioboto3\naiofiles==25.1.0\n    # via aioboto3\naiohappyeyeballs==2.6.1\n    # via aiohttp\naiohttp==3.13.4\n    # via\n    #   aiobotocore\n    #   discord-py\n    #   litellm\n    #   voyageai\naioitertools==0.13.0\n    # via aiobotocore\naiolimiter==1.2.1\n    # via voyageai\naiosignal==1.4.0\n    # via aiohttp\nannotated-doc==0.0.4\n    # via fastapi\nannotated-types==0.7.0\n    # via pydantic\nanyio==4.11.0\n    # via\n    #   claude-agent-sdk\n    #   google-genai\n    #   httpx\n    #   mcp\n    #   openai\n    #   sse-starlette\n    #   starlette\nattrs==25.4.0\n    # via\n    #   aiohttp\n    #   jsonschema\n    #   referencing\nbackoff==2.2.1\n    # via posthog\nboto3==1.39.11\n    # via\n    #   aiobotocore\n    #   cohere\nbotocore==1.39.11\n    # via\n    #   aiobotocore\n    #   boto3\n    #   s3transfer\nbrotli==1.2.0\n    # via onyx\ncertifi==2025.11.12\n    # via\n    #   httpcore\n    #   httpx\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\ncffi==2.0.0 ; platform_python_implementation != 'PyPy'\n    # via cryptography\ncharset-normalizer==3.4.4\n    # via requests\nclaude-agent-sdk==0.1.19\n    # via onyx\nclick==8.3.1\n    # via\n    #   litellm\n    #   uvicorn\ncohere==5.6.1\n    # via onyx\ncolorama==0.4.6 ; sys_platform == 'win32'\n    # via\n    #   click\n    #   tqdm\ncryptography==46.0.6\n    # via\n    #   google-auth\n    #   pyjwt\ndecorator==5.2.1\n    # via retry\ndiscord-py==2.4.0\n    # via onyx\ndistro==1.9.0\n    # via openai\ndurationpy==0.10\n    # via kubernetes\nfastapi==0.133.1\n    # via onyx\nfastavro==1.12.1\n    # via cohere\nfastuuid==0.14.0\n    # via litellm\nfilelock==3.20.3\n    # via huggingface-hub\nfrozenlist==1.8.0\n    # via\n    #   aiohttp\n    #   aiosignal\nfsspec==2025.10.0\n    # via huggingface-hub\ngoogle-auth==2.48.0\n    # via\n    #   google-genai\n    #   kubernetes\ngoogle-genai==1.52.0\n    # via onyx\nh11==0.16.0\n    # via\n    #   httpcore\n    #   uvicorn\nhf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'\n    # via huggingface-hub\nhttpcore==1.0.9\n    # via httpx\nhttpx==0.28.1\n    # via\n    #   cohere\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   openai\nhttpx-sse==0.4.3\n    # via\n    #   cohere\n    #   mcp\nhuggingface-hub==0.35.3\n    # via tokenizers\nidna==3.11\n    # via\n    #   anyio\n    #   httpx\n    #   requests\n    #   yarl\nimportlib-metadata==8.7.0\n    # via litellm\njinja2==3.1.6\n    # via litellm\njiter==0.12.0\n    # via openai\njmespath==1.0.1\n    # via\n    #   aiobotocore\n    #   boto3\n    #   botocore\njsonschema==4.25.1\n    # via\n    #   litellm\n    #   mcp\njsonschema-specifications==2025.9.1\n    # via jsonschema\nkubernetes==31.0.0\n    # via onyx\nlitellm==1.81.6\n    # via onyx\nmarkupsafe==3.0.3\n    # via jinja2\nmcp==1.26.0\n    # via claude-agent-sdk\nmonotonic==1.6\n    # via posthog\nmultidict==6.7.0\n    # via\n    #   aiobotocore\n    #   aiohttp\n    #   yarl\nnumpy==2.4.1\n    # via voyageai\noauthlib==3.2.2\n    # via\n    #   kubernetes\n    #   requests-oauthlib\nopenai==2.14.0\n    # via\n    #   litellm\n    #   onyx\npackaging==24.2\n    # via huggingface-hub\nparameterized==0.9.0\n    # via cohere\nposthog==3.7.4\n    # via onyx\nprometheus-client==0.23.1\n    # via\n    #   onyx\n    #   prometheus-fastapi-instrumentator\nprometheus-fastapi-instrumentator==7.1.0\n    # via onyx\npropcache==0.4.1\n    # via\n    #   aiohttp\n    #   yarl\npy==1.11.0\n    # via retry\npyasn1==0.6.3\n    # via\n    #   pyasn1-modules\n    #   rsa\npyasn1-modules==0.4.2\n    # via google-auth\npycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'\n    # via cffi\npydantic==2.11.7\n    # via\n    #   agent-client-protocol\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   onyx\n    #   openai\n    #   pydantic-settings\npydantic-core==2.33.2\n    # via pydantic\npydantic-settings==2.12.0\n    # via mcp\npyjwt==2.12.0\n    # via mcp\npython-dateutil==2.8.2\n    # via\n    #   aiobotocore\n    #   botocore\n    #   kubernetes\n    #   posthog\npython-dotenv==1.1.1\n    # via\n    #   litellm\n    #   pydantic-settings\npython-multipart==0.0.22\n    # via mcp\npywin32==311 ; sys_platform == 'win32'\n    # via mcp\npyyaml==6.0.3\n    # via\n    #   huggingface-hub\n    #   kubernetes\nreferencing==0.36.2\n    # via\n    #   jsonschema\n    #   jsonschema-specifications\nregex==2025.11.3\n    # via tiktoken\nrequests==2.33.0\n    # via\n    #   cohere\n    #   google-genai\n    #   huggingface-hub\n    #   kubernetes\n    #   posthog\n    #   requests-oauthlib\n    #   tiktoken\n    #   voyageai\nrequests-oauthlib==1.3.1\n    # via kubernetes\nretry==0.9.2\n    # via onyx\nrpds-py==0.29.0\n    # via\n    #   jsonschema\n    #   referencing\nrsa==4.9.1\n    # via google-auth\ns3transfer==0.13.1\n    # via boto3\nsentry-sdk==2.14.0\n    # via onyx\nsix==1.17.0\n    # via\n    #   kubernetes\n    #   posthog\n    #   python-dateutil\nsniffio==1.3.1\n    # via\n    #   anyio\n    #   openai\nsse-starlette==3.0.3\n    # via mcp\nstarlette==0.49.3\n    # via\n    #   fastapi\n    #   mcp\n    #   prometheus-fastapi-instrumentator\ntenacity==9.1.2\n    # via\n    #   google-genai\n    #   voyageai\ntiktoken==0.7.0\n    # via litellm\ntokenizers==0.21.4\n    # via\n    #   cohere\n    #   litellm\ntqdm==4.67.1\n    # via\n    #   huggingface-hub\n    #   openai\ntypes-requests==2.32.0.20250328\n    # via cohere\ntyping-extensions==4.15.0\n    # via\n    #   aiosignal\n    #   anyio\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   huggingface-hub\n    #   mcp\n    #   openai\n    #   pydantic\n    #   pydantic-core\n    #   referencing\n    #   starlette\n    #   typing-inspection\ntyping-inspection==0.4.2\n    # via\n    #   fastapi\n    #   mcp\n    #   pydantic\n    #   pydantic-settings\nurllib3==2.6.3\n    # via\n    #   botocore\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\n    #   types-requests\nuvicorn==0.35.0\n    # via\n    #   mcp\n    #   onyx\nvoyageai==0.2.3\n    # via onyx\nwebsocket-client==1.9.0\n    # via kubernetes\nwebsockets==15.0.1\n    # via google-genai\nwrapt==1.17.3\n    # via aiobotocore\nyarl==1.22.0\n    # via aiohttp\nzipp==3.23.0\n    # via importlib-metadata\n"
  },
  {
    "path": "backend/requirements/model_server.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv export --no-emit-project --no-default-groups --no-hashes --extra model_server -o backend/requirements/model_server.txt\naccelerate==1.6.0\n    # via onyx\nagent-client-protocol==0.7.1\n    # via onyx\naioboto3==15.1.0\n    # via onyx\naiobotocore==2.24.0\n    # via aioboto3\naiofiles==25.1.0\n    # via aioboto3\naiohappyeyeballs==2.6.1\n    # via aiohttp\naiohttp==3.13.4\n    # via\n    #   aiobotocore\n    #   discord-py\n    #   litellm\n    #   voyageai\naioitertools==0.13.0\n    # via aiobotocore\naiolimiter==1.2.1\n    # via voyageai\naiosignal==1.4.0\n    # via aiohttp\namqp==5.3.1\n    # via kombu\nannotated-doc==0.0.4\n    # via fastapi\nannotated-types==0.7.0\n    # via pydantic\nanyio==4.11.0\n    # via\n    #   claude-agent-sdk\n    #   google-genai\n    #   httpx\n    #   mcp\n    #   openai\n    #   sse-starlette\n    #   starlette\nattrs==25.4.0\n    # via\n    #   aiohttp\n    #   jsonschema\n    #   referencing\nbilliard==4.2.3\n    # via celery\nboto3==1.39.11\n    # via\n    #   aiobotocore\n    #   cohere\nbotocore==1.39.11\n    # via\n    #   aiobotocore\n    #   boto3\n    #   s3transfer\nbrotli==1.2.0\n    # via onyx\ncelery==5.5.1\n    # via sentry-sdk\ncertifi==2025.11.12\n    # via\n    #   httpcore\n    #   httpx\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\ncffi==2.0.0 ; platform_python_implementation != 'PyPy'\n    # via cryptography\ncharset-normalizer==3.4.4\n    # via requests\nclaude-agent-sdk==0.1.19\n    # via onyx\nclick==8.3.1\n    # via\n    #   celery\n    #   click-didyoumean\n    #   click-plugins\n    #   click-repl\n    #   litellm\n    #   uvicorn\nclick-didyoumean==0.3.1\n    # via celery\nclick-plugins==1.1.1.2\n    # via celery\nclick-repl==0.3.0\n    # via celery\ncohere==5.6.1\n    # via onyx\ncolorama==0.4.6 ; sys_platform == 'win32'\n    # via\n    #   click\n    #   tqdm\ncryptography==46.0.6\n    # via\n    #   google-auth\n    #   pyjwt\ndecorator==5.2.1\n    # via retry\ndiscord-py==2.4.0\n    # via onyx\ndistro==1.9.0\n    # via openai\ndurationpy==0.10\n    # via kubernetes\neinops==0.8.1\n    # via onyx\nfastapi==0.133.1\n    # via\n    #   onyx\n    #   sentry-sdk\nfastavro==1.12.1\n    # via cohere\nfastuuid==0.14.0\n    # via litellm\nfilelock==3.20.3\n    # via\n    #   huggingface-hub\n    #   torch\n    #   transformers\nfrozenlist==1.8.0\n    # via\n    #   aiohttp\n    #   aiosignal\nfsspec==2025.10.0\n    # via\n    #   huggingface-hub\n    #   torch\ngoogle-auth==2.48.0\n    # via\n    #   google-genai\n    #   kubernetes\ngoogle-genai==1.52.0\n    # via onyx\nh11==0.16.0\n    # via\n    #   httpcore\n    #   uvicorn\nhf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'\n    # via huggingface-hub\nhttpcore==1.0.9\n    # via httpx\nhttpx==0.28.1\n    # via\n    #   cohere\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   openai\nhttpx-sse==0.4.3\n    # via\n    #   cohere\n    #   mcp\nhuggingface-hub==0.35.3\n    # via\n    #   accelerate\n    #   sentence-transformers\n    #   tokenizers\n    #   transformers\nidna==3.11\n    # via\n    #   anyio\n    #   httpx\n    #   requests\n    #   yarl\nimportlib-metadata==8.7.0\n    # via litellm\njinja2==3.1.6\n    # via\n    #   litellm\n    #   torch\njiter==0.12.0\n    # via openai\njmespath==1.0.1\n    # via\n    #   aiobotocore\n    #   boto3\n    #   botocore\njoblib==1.5.2\n    # via scikit-learn\njsonschema==4.25.1\n    # via\n    #   litellm\n    #   mcp\njsonschema-specifications==2025.9.1\n    # via jsonschema\nkombu==5.5.4\n    # via celery\nkubernetes==31.0.0\n    # via onyx\nlitellm==1.81.6\n    # via onyx\nmarkupsafe==3.0.3\n    # via jinja2\nmcp==1.26.0\n    # via claude-agent-sdk\nmpmath==1.3.0\n    # via sympy\nmultidict==6.7.0\n    # via\n    #   aiobotocore\n    #   aiohttp\n    #   yarl\nnetworkx==3.5\n    # via torch\nnumpy==2.4.1\n    # via\n    #   accelerate\n    #   onyx\n    #   scikit-learn\n    #   scipy\n    #   transformers\n    #   voyageai\nnvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via\n    #   nvidia-cudnn-cu12\n    #   nvidia-cusolver-cu12\n    #   torch\nnvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via\n    #   nvidia-cusolver-cu12\n    #   torch\nnvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via\n    #   nvidia-cufft-cu12\n    #   nvidia-cusolver-cu12\n    #   nvidia-cusparse-cu12\n    #   torch\nnvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\nnvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\noauthlib==3.2.2\n    # via\n    #   kubernetes\n    #   requests-oauthlib\nopenai==2.14.0\n    # via\n    #   litellm\n    #   onyx\npackaging==24.2\n    # via\n    #   accelerate\n    #   huggingface-hub\n    #   kombu\n    #   transformers\nparameterized==0.9.0\n    # via cohere\npillow==12.1.1\n    # via sentence-transformers\nprometheus-client==0.23.1\n    # via\n    #   onyx\n    #   prometheus-fastapi-instrumentator\nprometheus-fastapi-instrumentator==7.1.0\n    # via onyx\nprompt-toolkit==3.0.52\n    # via click-repl\npropcache==0.4.1\n    # via\n    #   aiohttp\n    #   yarl\npsutil==7.1.3\n    # via accelerate\npy==1.11.0\n    # via retry\npyasn1==0.6.3\n    # via\n    #   pyasn1-modules\n    #   rsa\npyasn1-modules==0.4.2\n    # via google-auth\npycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'\n    # via cffi\npydantic==2.11.7\n    # via\n    #   agent-client-protocol\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   litellm\n    #   mcp\n    #   onyx\n    #   openai\n    #   pydantic-settings\npydantic-core==2.33.2\n    # via pydantic\npydantic-settings==2.12.0\n    # via mcp\npyjwt==2.12.0\n    # via mcp\npython-dateutil==2.8.2\n    # via\n    #   aiobotocore\n    #   botocore\n    #   celery\n    #   kubernetes\npython-dotenv==1.1.1\n    # via\n    #   litellm\n    #   pydantic-settings\npython-multipart==0.0.22\n    # via mcp\npywin32==311 ; sys_platform == 'win32'\n    # via mcp\npyyaml==6.0.3\n    # via\n    #   accelerate\n    #   huggingface-hub\n    #   kubernetes\n    #   transformers\nreferencing==0.36.2\n    # via\n    #   jsonschema\n    #   jsonschema-specifications\nregex==2025.11.3\n    # via\n    #   tiktoken\n    #   transformers\nrequests==2.33.0\n    # via\n    #   cohere\n    #   google-genai\n    #   huggingface-hub\n    #   kubernetes\n    #   requests-oauthlib\n    #   tiktoken\n    #   transformers\n    #   voyageai\nrequests-oauthlib==1.3.1\n    # via kubernetes\nretry==0.9.2\n    # via onyx\nrpds-py==0.29.0\n    # via\n    #   jsonschema\n    #   referencing\nrsa==4.9.1\n    # via google-auth\ns3transfer==0.13.1\n    # via boto3\nsafetensors==0.5.3\n    # via\n    #   accelerate\n    #   onyx\n    #   transformers\nscikit-learn==1.7.2\n    # via sentence-transformers\nscipy==1.16.3\n    # via\n    #   scikit-learn\n    #   sentence-transformers\nsentence-transformers==4.0.2\n    # via onyx\nsentry-sdk==2.14.0\n    # via onyx\nsetuptools==80.9.0 ; python_full_version >= '3.12'\n    # via torch\nsix==1.17.0\n    # via\n    #   kubernetes\n    #   python-dateutil\nsniffio==1.3.1\n    # via\n    #   anyio\n    #   openai\nsse-starlette==3.0.3\n    # via mcp\nstarlette==0.49.3\n    # via\n    #   fastapi\n    #   mcp\n    #   prometheus-fastapi-instrumentator\n    #   sentry-sdk\nsympy==1.14.0\n    # via torch\ntenacity==9.1.2\n    # via\n    #   google-genai\n    #   voyageai\nthreadpoolctl==3.6.0\n    # via scikit-learn\ntiktoken==0.7.0\n    # via litellm\ntokenizers==0.21.4\n    # via\n    #   cohere\n    #   litellm\n    #   transformers\ntorch==2.9.1\n    # via\n    #   accelerate\n    #   onyx\n    #   sentence-transformers\ntqdm==4.67.1\n    # via\n    #   huggingface-hub\n    #   openai\n    #   sentence-transformers\n    #   transformers\ntransformers==4.53.0\n    # via\n    #   onyx\n    #   sentence-transformers\ntriton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'\n    # via torch\ntypes-requests==2.32.0.20250328\n    # via cohere\ntyping-extensions==4.15.0\n    # via\n    #   aiosignal\n    #   anyio\n    #   cohere\n    #   fastapi\n    #   google-genai\n    #   huggingface-hub\n    #   mcp\n    #   openai\n    #   pydantic\n    #   pydantic-core\n    #   referencing\n    #   sentence-transformers\n    #   starlette\n    #   torch\n    #   typing-inspection\ntyping-inspection==0.4.2\n    # via\n    #   fastapi\n    #   mcp\n    #   pydantic\n    #   pydantic-settings\ntzdata==2025.2\n    # via kombu\nurllib3==2.6.3\n    # via\n    #   botocore\n    #   kubernetes\n    #   requests\n    #   sentry-sdk\n    #   types-requests\nuvicorn==0.35.0\n    # via\n    #   mcp\n    #   onyx\nvine==5.1.0\n    # via\n    #   amqp\n    #   celery\n    #   kombu\nvoyageai==0.2.3\n    # via onyx\nwcwidth==0.2.14\n    # via prompt-toolkit\nwebsocket-client==1.9.0\n    # via kubernetes\nwebsockets==15.0.1\n    # via google-genai\nwrapt==1.17.3\n    # via aiobotocore\nyarl==1.22.0\n    # via aiohttp\nzipp==3.23.0\n    # via importlib-metadata\n"
  },
  {
    "path": "backend/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "backend/scripts/add_connector_creation_script.py",
    "content": "from typing import Any\nfrom typing import Dict\n\nimport requests\n\nAPI_SERVER_URL = \"http://localhost:3000\"  # Adjust this to your Onyx server URL\nHEADERS = {\"Content-Type\": \"application/json\"}\nAPI_KEY = \"onyx-api-key\"  # API key here, if auth is enabled\n\n\ndef create_connector(\n    name: str,\n    source: str,\n    input_type: str,\n    connector_specific_config: Dict[str, Any],\n    is_public: bool = True,\n    groups: list[int] | None = None,\n) -> Dict[str, Any]:\n    connector_update_request = {\n        \"name\": name,\n        \"source\": source,\n        \"input_type\": input_type,\n        \"connector_specific_config\": connector_specific_config,\n        \"is_public\": is_public,\n        \"groups\": groups or [],\n    }\n\n    response = requests.post(\n        url=f\"{API_SERVER_URL}/api/manage/admin/connector\",\n        json=connector_update_request,\n        headers=HEADERS,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef create_credential(\n    name: str,\n    source: str,\n    credential_json: Dict[str, Any],\n    is_public: bool = True,\n    groups: list[int] | None = None,\n) -> Dict[str, Any]:\n    credential_request = {\n        \"name\": name,\n        \"source\": source,\n        \"credential_json\": credential_json,\n        \"admin_public\": is_public,\n        \"groups\": groups or [],\n    }\n\n    response = requests.post(\n        url=f\"{API_SERVER_URL}/api/manage/credential\",\n        json=credential_request,\n        headers=HEADERS,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef create_cc_pair(\n    connector_id: int,\n    credential_id: int,\n    name: str,\n    access_type: str = \"public\",\n    groups: list[int] | None = None,\n) -> Dict[str, Any]:\n    cc_pair_request = {\n        \"name\": name,\n        \"access_type\": access_type,\n        \"groups\": groups or [],\n    }\n\n    response = requests.put(\n        url=f\"{API_SERVER_URL}/api/manage/connector/{connector_id}/credential/{credential_id}\",\n        json=cc_pair_request,\n        headers=HEADERS,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef main() -> None:\n    # Create a Web connector\n    web_connector = create_connector(\n        name=\"Example Web Connector\",\n        source=\"web\",\n        input_type=\"load_state\",\n        connector_specific_config={\n            \"base_url\": \"https://example.com\",\n            \"web_connector_type\": \"recursive\",\n        },\n    )\n    print(f\"Created Web Connector: {web_connector}\")\n\n    # Create a credential for the Web connector\n    web_credential = create_credential(\n        name=\"Example Web Credential\",\n        source=\"web\",\n        credential_json={},  # Web connectors typically don't need credentials\n        is_public=True,\n    )\n    print(f\"Created Web Credential: {web_credential}\")\n\n    # Create CC pair for Web connector\n    web_cc_pair = create_cc_pair(\n        connector_id=web_connector[\"id\"],\n        credential_id=web_credential[\"id\"],\n        name=\"Example Web CC Pair\",\n        access_type=\"public\",\n    )\n    print(f\"Created Web CC Pair: {web_cc_pair}\")\n\n    # Create a GitHub connector\n    github_connector = create_connector(\n        name=\"Example GitHub Connector\",\n        source=\"github\",\n        input_type=\"poll\",\n        connector_specific_config={\n            \"repo_owner\": \"example-owner\",\n            \"repo_name\": \"example-repo\",\n            \"include_prs\": True,\n            \"include_issues\": True,\n        },\n    )\n    print(f\"Created GitHub Connector: {github_connector}\")\n\n    # Create a credential for the GitHub connector\n    github_credential = create_credential(\n        name=\"Example GitHub Credential\",\n        source=\"github\",\n        credential_json={\"github_access_token\": \"your_github_access_token_here\"},\n        is_public=True,\n    )\n    print(f\"Created GitHub Credential: {github_credential}\")\n\n    # Create CC pair for GitHub connector\n    github_cc_pair = create_cc_pair(\n        connector_id=github_connector[\"id\"],\n        credential_id=github_credential[\"id\"],\n        name=\"Example GitHub CC Pair\",\n        access_type=\"public\",\n    )\n    print(f\"Created GitHub CC Pair: {github_cc_pair}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/api_inference_sample.py",
    "content": "# This file is used to demonstrate how to use the backend APIs directly\n# In this case, the equivalent of asking a question in Onyx Chat in a new chat session\nimport argparse\nimport json\nimport os\n\nimport requests\n\n\ndef create_new_chat_session(onyx_url: str, api_key: str | None) -> int:\n    headers = {\"Authorization\": f\"Bearer {api_key}\"} if api_key else None\n    session_endpoint = onyx_url + \"/api/chat/create-chat-session\"\n\n    response = requests.post(\n        session_endpoint,\n        headers=headers,\n        json={\"persona_id\": 0},  # Global default Persona/Assistant ID\n    )\n    response.raise_for_status()\n\n    new_session_id = response.json()[\"chat_session_id\"]\n    return new_session_id\n\n\ndef process_question(onyx_url: str, question: str, api_key: str | None) -> None:\n    message_endpoint = onyx_url + \"/api/chat/send-chat-message\"\n\n    chat_session_id = create_new_chat_session(onyx_url, api_key)\n\n    headers = {\"Authorization\": f\"Bearer {api_key}\"} if api_key else None\n\n    data = {\n        \"message\": question,\n        \"chat_session_id\": chat_session_id,\n        \"parent_message_id\": None,\n        \"file_descriptors\": [],\n        # Default Question Answer prompt\n        \"prompt_id\": 0,\n        # Not specifying any specific docs to chat to, we want to run a search\n        \"search_doc_ids\": None,\n        \"retrieval_options\": {\n            \"run_search\": \"always\",\n            \"real_time\": True,\n            \"enable_auto_detect_filters\": False,\n            # No filters applied, check all sources, document-sets, time ranges, etc.\n            \"filters\": {},\n        },\n    }\n\n    with requests.post(message_endpoint, headers=headers, json=data) as response:\n        response.raise_for_status()\n\n        for packet in response.iter_lines():\n            response_text = json.loads(packet.decode())\n            # Can also check \"top_documents\" to capture the streamed search results\n            # that include the highest matching documents to the query\n            # or check \"message_id\" to get the message_id used as parent_message_id\n            # to create follow-up messages\n            new_token = response_text.get(\"answer_piece\")\n\n            if new_token:\n                print(new_token, end=\"\", flush=True)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Sample API Usage\")\n    parser.add_argument(\n        \"--onyx-url\",\n        type=str,\n        default=\"http://localhost:80\",\n        help=\"Onyx URL, should point to Onyx nginx.\",\n    )\n    parser.add_argument(\n        \"--test-question\",\n        type=str,\n        default=\"What is Onyx?\",\n        help=\"Test question for new Chat Session.\",\n    )\n\n    # Not needed if Auth is disabled\n    # Or for Onyx MIT API key must be replaced with session cookie\n    api_key = os.environ.get(\"DANSWER_API_KEY\")\n\n    args = parser.parse_args()\n    process_question(\n        onyx_url=args.onyx_url, question=args.test_question, api_key=api_key\n    )\n"
  },
  {
    "path": "backend/scripts/celery_purge_queue.py",
    "content": "# Tool to run operations on Celery/Redis in production\n# this is a work in progress and isn't completely put together yet\n# but can serve as a stub for future operations\nimport argparse\nimport logging\nfrom logging import getLogger\n\nfrom redis import Redis\n\nfrom onyx.background.celery.celery_redis import celery_get_queue_length\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER_CELERY\nfrom onyx.redis.redis_pool import RedisPool\n\n# Configure the logger\nlogging.basicConfig(\n    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",  # Log format\n    handlers=[logging.StreamHandler()],  # Output logs to console\n)\n\nlogger = getLogger(__name__)\n\nREDIS_PASSWORD = \"\"\n\n\ndef celery_purge_queue(queue: str, tenant_id: str) -> None:  # noqa: ARG001\n    \"\"\"Purging a celery queue is extremely difficult because the queue is a list\n    and the only way an item can be removed from a list is by VALUE, which is\n    a linear scan.  Therefore, to purge the list of many values is roughly\n    n^2.\n\n    The other alternative is to pop values and push them back, but that raises\n    questions about behavior while operating on a live queue.\n    \"\"\"\n\n    pool = RedisPool.create_pool(\n        host=\"127.0.0.1\",\n        port=6380,\n        db=REDIS_DB_NUMBER_CELERY,\n        password=REDIS_PASSWORD,\n        ssl=True,\n        ssl_cert_reqs=\"optional\",\n        ssl_ca_certs=None,\n    )\n\n    r = Redis(connection_pool=pool)\n\n    length = celery_get_queue_length(queue, r)\n\n    logger.info(f\"queue={queue} length={length}\")\n\n    # processed = 0\n    # deleted = 0\n    # for i in range(len(OnyxCeleryPriority)):\n    #     queue_name = queue\n    #     if i > 0:\n    #         queue_name += CELERY_SEPARATOR\n    #         queue_name += str(i)\n\n    #     length = r.llen(queue_name)\n    #     for i in range(length):\n    #         task_raw: bytes | None = r.lindex(queue_name, i)\n    #         if not task_raw:\n    #             break\n\n    #         processed += 1\n    #         task_str = task_raw.decode(\"utf-8\")\n    #         task = json.loads(task_str)\n    #         task_kwargs_str = task[\"headers\"][\"kwargsrepr\"]\n    #         task_kwargs = json.loads(task_kwargs_str)\n    #         task_tenant_id = task_kwargs[\"tenant_id\"]\n    #         if task_tenant_id and task_tenant_id == \"tenant_id\":\n    #             print(\"Delete tenant_id={tenant_id}\")\n    #             if\n    #             deleted += 1\n\n    #         logger.info(f\"processed={processed} deleted={deleted}\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Purge celery queue by tenant id\")\n    parser.add_argument(\"--queue\", type=str, help=\"Queue to purge\", required=True)\n\n    parser.add_argument(\"--tenant\", type=str, help=\"Tenant ID to purge\", required=True)\n\n    args = parser.parse_args()\n    celery_purge_queue(queue=args.queue, tenant_id=args.tenant)\n"
  },
  {
    "path": "backend/scripts/chat_feedback_dump.py",
    "content": "# This file is used to demonstrate how to use the backend APIs directly\n# to query out feedback for all messages\nimport argparse\nimport logging\nfrom logging import getLogger\nfrom typing import Any\nfrom uuid import UUID\n\nimport requests\n\nfrom ee.onyx.server.query_history.api import ChatSessionSnapshot\nfrom onyx.server.manage.models import AllUsersResponse\nfrom onyx.server.query_and_chat.models import ChatSessionsResponse\n\n# Configure the logger\nlogging.basicConfig(\n    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",  # Log format\n    handlers=[logging.StreamHandler()],  # Output logs to console\n)\n\nlogger = getLogger(__name__)\n\n# uncomment the following pydantic models if you need the script to be independent\n# from pydantic import BaseModel\n# from datetime import datetime\n# from enum import Enum\n\n# class UserRole(str, Enum):\n#     \"\"\"\n#     User roles\n#     - Basic can't perform any admin actions\n#     - Admin can perform all admin actions\n#     - Curator can perform admin actions for\n#         groups they are curators of\n#     - Global Curator can perform admin actions\n#         for all groups they are a member of\n#     \"\"\"\n\n#     BASIC = \"basic\"\n#     ADMIN = \"admin\"\n#     CURATOR = \"curator\"\n#     GLOBAL_CURATOR = \"global_curator\"\n\n\n# class FullUserSnapshot(BaseModel):\n#     id: UUID\n#     email: str\n#     role: UserRole\n#     is_active: bool\n\n\n# class InvitedUserSnapshot(BaseModel):\n#     email: str\n\n\n# class AllUsersResponse(BaseModel):\n#     accepted: list[FullUserSnapshot]\n#     invited: list[InvitedUserSnapshot]\n#     accepted_pages: int\n#     invited_pages: int\n\n\n# class ChatSessionSharedStatus(str, Enum):\n#     PUBLIC = \"public\"\n#     PRIVATE = \"private\"\n\n\n# class ChatSessionDetails(BaseModel):\n#     id: UUID\n#     name: str\n#     persona_id: int | None = None\n#     time_created: str\n#     shared_status: ChatSessionSharedStatus\n#     folder_id: int | None = None\n#     current_alternate_model: str | None = None\n\n\n# class ChatSessionsResponse(BaseModel):\n#     sessions: list[ChatSessionDetails]\n\n\n# class SessionType(str, Enum):\n#     CHAT = \"Chat\"\n#     SEARCH = \"Search\"\n#     SLACK = \"Slack\"\n\n\n# class AbridgedSearchDoc(BaseModel):\n#     \"\"\"A subset of the info present in `SearchDoc`\"\"\"\n\n#     document_id: str\n#     semantic_identifier: str\n#     link: str | None\n\n\n# class QAFeedbackType(str, Enum):\n#     LIKE = \"like\"  # User likes the answer, used for metrics\n#     DISLIKE = \"dislike\"  # User dislikes the answer, used for metrics\n\n\n# class MessageType(str, Enum):\n#     # Using OpenAI standards, Langchain equivalent shown in comment\n#     # System message is always constructed on the fly, not saved\n#     SYSTEM = \"system\"  # SystemMessage\n#     USER = \"user\"  # HumanMessage\n#     ASSISTANT = \"assistant\"  # AIMessage\n\n\n# class MessageSnapshot(BaseModel):\n#     id: int\n#     message: str\n#     message_type: MessageType\n#     documents: list[AbridgedSearchDoc]\n#     feedback_type: QAFeedbackType | None\n#     feedback_text: str | None\n#     time_created: datetime\n\n\n# class ChatSessionSnapshot(BaseModel):\n#     id: UUID\n#     user_email: str\n#     name: str | None\n#     messages: list[MessageSnapshot]\n#     persona_name: str | None\n#     time_created: datetime\n#     flow_type: SessionType\n\n\ndef create_new_chat_session(onyx_url: str, api_key: str | None) -> int:\n    headers = {\"Authorization\": f\"Bearer {api_key}\"} if api_key else None\n    session_endpoint = onyx_url + \"/api/chat/create-chat-session\"\n\n    response = requests.get(session_endpoint, headers=headers)\n    response.raise_for_status()\n\n    new_session_id = response.json()[\"chat_session_id\"]\n    return new_session_id\n\n\ndef manage_users(onyx_url: str, headers: dict[str, str] | None) -> AllUsersResponse:\n    endpoint = onyx_url + \"/manage/users\"\n\n    response = requests.get(\n        endpoint,\n        headers=headers,\n    )\n    response.raise_for_status()\n\n    all_users = AllUsersResponse(**response.json())\n    return all_users\n\n\ndef get_chat_sessions(\n    onyx_url: str, headers: dict[str, str] | None, user_id: UUID\n) -> ChatSessionsResponse:\n    endpoint = onyx_url + \"/admin/chat-sessions\"\n\n    params: dict[str, Any] = {\"user_id\": user_id}\n    response = requests.get(\n        endpoint,\n        params=params,\n        headers=headers,\n    )\n    response.raise_for_status()\n\n    sessions = ChatSessionsResponse(**response.json())\n    return sessions\n\n\ndef get_session_history(\n    onyx_url: str, headers: dict[str, str] | None, session_id: UUID\n) -> ChatSessionSnapshot:\n    endpoint = onyx_url + f\"/admin/chat-session-history/{session_id}\"\n\n    response = requests.get(\n        endpoint,\n        headers=headers,\n    )\n    response.raise_for_status()\n\n    sessions = ChatSessionSnapshot(**response.json())\n    return sessions\n\n\ndef process_all_chat_feedback(onyx_url: str, api_key: str | None) -> None:\n    headers = {\"Authorization\": f\"Bearer {api_key}\"} if api_key else None\n\n    all_users = manage_users(onyx_url, headers)\n    if not all_users:\n        raise RuntimeError(\"manage_users returned None\")\n\n    logger.info(f\"Accepted users: {len(all_users.accepted)}\")\n\n    user_ids: list[UUID] = [user.id for user in all_users.accepted]\n\n    for user_id in user_ids:\n        r_sessions = get_chat_sessions(onyx_url, headers, user_id)\n        logger.info(f\"user={user_id} num_sessions={len(r_sessions.sessions)}\")\n        for session in r_sessions.sessions:\n            s: ChatSessionSnapshot\n            try:\n                s = get_session_history(onyx_url, headers, session.id)\n            except requests.exceptions.HTTPError:\n                logger.exception(\"get_session_history failed.\")\n\n            for m in s.messages:\n                logger.info(\n                    f\"user={user_id} \"\n                    f\"session={session.id} \"\n                    f\"message={m.message} \"\n                    f\"feedback_type={m.feedback_type} \"\n                    f\"feedback_text={m.feedback_text}\"\n                )\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Sample API Usage - Chat Feedback\")\n    parser.add_argument(\n        \"--url\",\n        type=str,\n        default=\"http://localhost:8080\",\n        help=\"Onyx URL, should point to Onyx nginx.\",\n    )\n\n    # Not needed if Auth is disabled?\n    # Or for Onyx MIT Edition API key must be replaced with session cookie\n    parser.add_argument(\n        \"--api-key\",\n        type=str,\n        help=\"Onyx Admin Level API key\",\n    )\n\n    args = parser.parse_args()\n    process_all_chat_feedback(onyx_url=args.url, api_key=args.api_key)\n"
  },
  {
    "path": "backend/scripts/chat_history_seeding.py",
    "content": "import argparse\nimport logging\nfrom logging import getLogger\n\nfrom onyx.db.seeding.chat_history_seeding import seed_chat_history\n\n# Configure the logger\nlogging.basicConfig(\n    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",  # Log format\n    handlers=[logging.StreamHandler()],  # Output logs to console\n)\n\nlogger = getLogger(__name__)\n\n\ndef go_main(num_sessions: int, num_messages: int, num_days: int) -> None:\n    seed_chat_history(num_sessions, num_messages, num_days)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Seed chat history\")\n    parser.add_argument(\n        \"--sessions\",\n        type=int,\n        default=2048,\n        help=\"Number of chat sessions to seed\",\n    )\n\n    parser.add_argument(\n        \"--messages\",\n        type=int,\n        default=4,\n        help=\"Number of chat messages to seed per session\",\n    )\n\n    parser.add_argument(\n        \"--days\",\n        type=int,\n        default=90,\n        help=\"Number of days looking backwards over which to seed the timestamps with\",\n    )\n\n    args = parser.parse_args()\n    go_main(args.sessions, args.messages, args.days)\n"
  },
  {
    "path": "backend/scripts/chat_loadtest.py",
    "content": "\"\"\"Basic Usage:\n\npython scripts/chat_loadtest.py --api-key <api-key> --url <onyx-url>/api\n\nto run from the container itself, copy this file in and run:\n\npython chat_loadtest.py --api-key <api-key> --url localhost:8080\n\nFor more options, checkout the bottom of the file.\n\"\"\"\n\nimport argparse\nimport asyncio\nimport logging\nimport statistics\nimport time\nfrom collections.abc import AsyncGenerator\nfrom dataclasses import dataclass\nfrom logging import getLogger\nfrom uuid import UUID\n\nimport aiohttp\n\n# Configure logging\nlogging.basicConfig(\n    level=logging.INFO,\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",\n    handlers=[logging.StreamHandler()],\n)\n\nlogger = getLogger(__name__)\n\n\n@dataclass\nclass ChatMetrics:\n    session_id: UUID\n    total_time: float\n    first_doc_time: float\n    first_answer_time: float\n    tokens_per_second: float\n    total_tokens: int\n\n\nclass ChatLoadTester:\n    def __init__(\n        self,\n        base_url: str,\n        api_key: str | None,\n        num_concurrent: int,\n        messages_per_session: int,\n    ):\n        self.base_url = base_url\n        self.headers = {\"Authorization\": f\"Bearer {api_key}\"} if api_key else {}\n        self.num_concurrent = num_concurrent\n        self.messages_per_session = messages_per_session\n        self.metrics: list[ChatMetrics] = []\n\n    async def create_chat_session(self, session: aiohttp.ClientSession) -> str:\n        \"\"\"Create a new chat session\"\"\"\n        async with session.post(\n            f\"{self.base_url}/chat/create-chat-session\",\n            headers=self.headers,\n            json={\"persona_id\": 0, \"description\": \"Load Test\"},\n        ) as response:\n            response.raise_for_status()\n            data = await response.json()\n            return data[\"chat_session_id\"]\n\n    async def process_stream(\n        self, response: aiohttp.ClientResponse\n    ) -> AsyncGenerator[str, None]:\n        \"\"\"Process the SSE stream from the chat response\"\"\"\n        async for chunk in response.content:\n            chunk_str = chunk.decode()\n            yield chunk_str\n\n    async def send_message(\n        self,\n        session: aiohttp.ClientSession,\n        chat_session_id: str,\n        message: str,\n        parent_message_id: int | None = None,\n    ) -> ChatMetrics:\n        \"\"\"Send a message and measure performance metrics\"\"\"\n        start_time = time.time()\n        first_doc_time = None\n        first_answer_time = None\n        token_count = 0\n\n        async with session.post(\n            f\"{self.base_url}/chat/send-chat-message\",\n            headers=self.headers,\n            json={\n                \"chat_session_id\": chat_session_id,\n                \"message\": message,\n                \"parent_message_id\": parent_message_id,\n                \"prompt_id\": None,\n                \"retrieval_options\": {\n                    \"run_search\": \"always\",\n                    \"real_time\": True,\n                },\n                \"file_descriptors\": [],\n                \"search_doc_ids\": [],\n            },\n        ) as response:\n            response.raise_for_status()\n\n            async for chunk in self.process_stream(response):\n                if \"tool_name\" in chunk and \"run_search\" in chunk:\n                    if first_doc_time is None:\n                        first_doc_time = time.time() - start_time\n\n                if \"answer_piece\" in chunk:\n                    if first_answer_time is None:\n                        first_answer_time = time.time() - start_time\n                    token_count += 1\n\n            total_time = time.time() - start_time\n            tokens_per_second = token_count / total_time if total_time > 0 else 0\n\n            return ChatMetrics(\n                session_id=UUID(chat_session_id),\n                total_time=total_time,\n                first_doc_time=first_doc_time or 0,\n                first_answer_time=first_answer_time or 0,\n                tokens_per_second=tokens_per_second,\n                total_tokens=token_count,\n            )\n\n    async def run_chat_session(self) -> None:\n        \"\"\"Run a complete chat session with multiple messages\"\"\"\n        async with aiohttp.ClientSession() as session:\n            try:\n                chat_session_id = await self.create_chat_session(session)\n                messages = [\n                    \"Tell me about the key features of the product\",\n                    \"How does the search functionality work?\",\n                    \"What are the deployment options?\",\n                    \"Can you explain the security features?\",\n                    \"What integrations are available?\",\n                ]\n\n                parent_message_id = None\n                for i in range(self.messages_per_session):\n                    message = messages[i % len(messages)]\n                    metrics = await self.send_message(\n                        session, chat_session_id, message, parent_message_id\n                    )\n                    self.metrics.append(metrics)\n                    parent_message_id = metrics.total_tokens  # Simplified for example\n\n            except Exception as e:\n                logger.error(f\"Error in chat session: {e}\")\n\n    async def run_load_test(self) -> None:\n        \"\"\"Run multiple concurrent chat sessions\"\"\"\n        start_time = time.time()\n        tasks = [self.run_chat_session() for _ in range(self.num_concurrent)]\n        await asyncio.gather(*tasks)\n        total_time = time.time() - start_time\n\n        self.print_results(total_time)\n\n    def print_results(self, total_time: float) -> None:\n        \"\"\"Print load test results and metrics\"\"\"\n        logger.info(\"\\n=== Load Test Results ===\")\n        logger.info(f\"Total Time: {total_time:.2f} seconds\")\n        logger.info(f\"Concurrent Sessions: {self.num_concurrent}\")\n        logger.info(f\"Messages per Session: {self.messages_per_session}\")\n        logger.info(f\"Total Messages: {len(self.metrics)}\")\n\n        if self.metrics:\n            avg_response_time = statistics.mean(m.total_time for m in self.metrics)\n            avg_first_doc = statistics.mean(m.first_doc_time for m in self.metrics)\n            avg_first_answer = statistics.mean(\n                m.first_answer_time for m in self.metrics\n            )\n            avg_tokens_per_sec = statistics.mean(\n                m.tokens_per_second for m in self.metrics\n            )\n\n            logger.info(f\"\\nAverage Response Time: {avg_response_time:.2f} seconds\")\n            logger.info(f\"Average Time to Documents: {avg_first_doc:.2f} seconds\")\n            logger.info(f\"Average Time to First Answer: {avg_first_answer:.2f} seconds\")\n            logger.info(f\"Average Tokens/Second: {avg_tokens_per_sec:.2f}\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"Chat Load Testing Tool\")\n    parser.add_argument(\n        \"--url\",\n        type=str,\n        default=\"http://localhost:3000/api\",\n        help=\"Onyx URL\",\n    )\n    parser.add_argument(\n        \"--api-key\",\n        type=str,\n        help=\"Onyx Basic/Admin Level API key\",\n    )\n    parser.add_argument(\n        \"--concurrent\",\n        type=int,\n        default=10,\n        help=\"Number of concurrent chat sessions\",\n    )\n    parser.add_argument(\n        \"--messages\",\n        type=int,\n        default=1,\n        help=\"Number of messages per chat session\",\n    )\n\n    args = parser.parse_args()\n\n    load_tester = ChatLoadTester(\n        base_url=args.url,\n        api_key=args.api_key,\n        num_concurrent=args.concurrent,\n        messages_per_session=args.messages,\n    )\n\n    asyncio.run(load_tester.run_load_test())\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/debug_usage_limits.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nDebug script to fetch usage limit overrides from the control plane.\nRun this from within a data plane pod to diagnose usage limits issues.\n\nUsage:\n    python debug_usage_limits.py\n\nEnvironment variables required:\n    - DATA_PLANE_SECRET: Secret for generating JWT tokens\n    - CONTROL_PLANE_API_BASE_URL: Base URL for the control plane API\n\"\"\"\n\nimport json\nimport os\nimport sys\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport jwt\nimport requests\n\n\ndef generate_data_plane_token(secret: str) -> str:\n    \"\"\"Generate a JWT token for data plane authentication.\"\"\"\n    payload = {\n        \"iss\": \"data_plane\",\n        \"exp\": datetime.now(timezone.utc) + timedelta(minutes=5),\n        \"iat\": datetime.now(timezone.utc),\n        \"scope\": \"api_access\",\n    }\n    return jwt.encode(payload, secret, algorithm=\"HS256\")\n\n\ndef main() -> None:\n    # Get required environment variables\n    data_plane_secret = os.environ.get(\"DATA_PLANE_SECRET\", \"\")\n    control_plane_url = os.environ.get(\n        \"CONTROL_PLANE_API_BASE_URL\", \"http://localhost:8082\"\n    )\n\n    print(\"=\" * 60)\n    print(\"Usage Limits Debug Script\")\n    print(\"=\" * 60)\n    print(f\"CONTROL_PLANE_API_BASE_URL: {control_plane_url}\")\n    print(f\"DATA_PLANE_SECRET set: {bool(data_plane_secret)}\")\n    print()\n\n    if not data_plane_secret:\n        print(\"ERROR: DATA_PLANE_SECRET is not set!\")\n        sys.exit(1)\n\n    # Generate token\n    try:\n        token = generate_data_plane_token(data_plane_secret)\n        print(f\"Generated JWT token (first 50 chars): {token[:50]}...\")\n    except Exception as e:\n        print(f\"ERROR generating token: {e}\")\n        sys.exit(1)\n\n    # Make request to usage-limit-overrides endpoint\n    url = f\"{control_plane_url}/usage-limit-overrides\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Content-Type\": \"application/json\",\n    }\n\n    print(f\"\\nMaking request to: {url}\")\n    print(\n        f\"Headers: {json.dumps({k: v[:50] + '...' if k == 'Authorization' else v for k, v in headers.items()}, indent=2)}\"\n    )\n    print()\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n        print(f\"Status Code: {response.status_code}\")\n        print(f\"Response Headers: {dict(response.headers)}\")\n        print()\n\n        print(\"Response Body:\")\n        print(\"-\" * 40)\n        data = []\n        try:\n            data = response.json()\n            print(json.dumps(data, indent=2))\n        except json.JSONDecodeError:\n            print(response.text)\n        print(\"-\" * 40)\n        print(\"all tenant ids overridden:\")\n        for tenant_dct in data:  # should be a list of json\n            print(tenant_dct[\"tenant_id\"])\n\n        if response.status_code != 200:\n            print(\"\\nWARNING: Non-200 status code received!\")\n\n    except requests.exceptions.ConnectionError as e:\n        print(f\"ERROR: Connection failed - {e}\")\n        sys.exit(1)\n    except requests.exceptions.Timeout:\n        print(\"ERROR: Request timed out after 30 seconds\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"ERROR: Request failed - {e}\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/litellm/README",
    "content": "Resources in this directory are used to debug LiteLLM and other AI providers.\nNote that the requests are meant to be identical to what the Onyx application sends to litellm.\nDouble check that this is the case before using these scripts.\n\n## Files\n\n- **payload.json**: Contains a typical request from Onyx\n\n- **test_litellm.py**: Imports Onyx's LiteLLM instance (with monkey patches) and outputs the raw stream events received back from LiteLLM as JSON. Does not use payload.json, but has a similar request body. Consider directly importing litellm to skip monkey patching.\n\n- **directly_hit_azure_api.py**: Directly hits Azure OpenAI endpoints using payload.json. Bypasses LiteLLM for debugging purposes."
  },
  {
    "path": "backend/scripts/debugging/litellm/call_litellm.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTest LiteLLM integration and output raw stream events.\n\nThis script uses Onyx's LiteLLM instance (with monkey patches) to make a completion\nrequest and outputs the raw stream events as JSON, one per line.\n\nUsage:\n    # Set environment variables if needed:\n    export LITELLM_DEBUG=1  # Optional: enable LiteLLM debug logs\n\n    # Update the configuration below, then run:\n    python test_litellm.py\n\"\"\"\n\nimport os\nfrom typing import Any\n\nfrom onyx.llm.litellm_singleton import litellm\n\n# Optional: enable LiteLLM debug logs (set `LITELLM_DEBUG=1`)\nif os.getenv(\"LITELLM_DEBUG\") == \"1\":\n    getattr(litellm, \"_turn_on_debug\", lambda: None)()\n\n# Configuration: Update these values before running\nMODEL = \"azure/responses/YOUR_MODEL_NAME_HERE\"\nAPI_KEY = \"YOUR_API_KEY_HERE\"\nBASE_URL = \"https://YOUR_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com\"\nAPI_VERSION = \"2025-03-01-preview\"  # For Azure, must be 2025-03-01-preview\n\n# Example messages - customize as needed\nMESSAGES = [\n    {\"role\": \"user\", \"content\": \"hi\"},\n    {\"role\": \"assistant\", \"content\": \"Hello! How can I help you today?\"},\n    {\"role\": \"user\", \"content\": \"what is onyx? search internally and the web\"},\n]\n\nstream = litellm.completion(\n    mock_response=None,\n    # Insert /responses/ between provider and model to use the litellm completions ->responses bridge\n    model=MODEL,\n    api_key=API_KEY,\n    base_url=BASE_URL,\n    api_version=API_VERSION,\n    custom_llm_provider=None,\n    messages=MESSAGES,\n    tools=[\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"internal_search\",\n                \"description\": \"Search connected applications for information.\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"queries\": {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": \"List of search queries to execute, typically a single query.\",\n                        }\n                    },\n                    \"required\": [\"queries\"],\n                },\n            },\n        },\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"generate_image\",\n                \"description\": \"Generate an image based on a prompt. Do not use unless the user specifically requests an image.\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"prompt\": {\n                            \"type\": \"string\",\n                            \"description\": \"Prompt used to generate the image\",\n                        },\n                        \"shape\": {\n                            \"type\": \"string\",\n                            \"description\": \"Optional - only specify if you want a specific shape. \"\n                            \"Image shape: 'square', 'portrait', or 'landscape'.\",\n                            \"enum\": [\"square\", \"portrait\", \"landscape\"],\n                        },\n                    },\n                    \"required\": [\"prompt\"],\n                },\n            },\n        },\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"web_search\",\n                \"description\": \"Search the web for information. \"\n                \"Returns a list of search results with titles, metadata, and snippets.\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"queries\": {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": \"One or more queries to look up on the web.\",\n                        }\n                    },\n                    \"required\": [\"queries\"],\n                },\n            },\n        },\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"open_url\",\n                \"description\": \"Open and read the content of one or more URLs. Returns the text content of the pages.\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"urls\": {\n                            \"type\": \"array\",\n                            \"items\": {\"type\": \"string\"},\n                            \"description\": \"List of URLs to open and read. Can be a single URL or multiple URLs.\",\n                        }\n                    },\n                    \"required\": [\"urls\"],\n                },\n            },\n        },\n    ],\n    tool_choice=\"auto\",\n    stream=True,\n    temperature=1,\n    timeout=600,\n    max_tokens=None,\n    stream_options={\"include_usage\": True},\n    reasoning={\"effort\": \"low\", \"summary\": \"auto\"},\n    parallel_tool_calls=True,\n    allowed_openai_params=[\"tool_choice\"],\n)\n\n\ndef _to_jsonable(x: Any) -> Any:\n    \"\"\"Convert an object to a JSON-serializable format.\n\n    Handles Pydantic models, dataclasses, and other common types.\n    \"\"\"\n    if isinstance(x, (str, int, float, bool)) or x is None:\n        return x\n    if isinstance(x, dict):\n        return {k: _to_jsonable(v) for k, v in x.items()}\n    if isinstance(x, list):\n        return [_to_jsonable(v) for v in x]\n    if hasattr(x, \"model_dump\"):\n        return _to_jsonable(x.model_dump())\n    if hasattr(x, \"dict\"):\n        try:\n            return _to_jsonable(x.dict())\n        except Exception:\n            pass\n    return str(x)\n\n\ndef _filter_null_fields(obj: Any) -> Any:\n    \"\"\"Recursively filter out None/null values from a data structure.\"\"\"\n    if isinstance(obj, dict):\n        return {\n            k: _filter_null_fields(v)\n            for k, v in obj.items()\n            if v is not None\n            and (not isinstance(v, (dict, list)) or _filter_null_fields(v))\n        }\n    if isinstance(obj, list):\n        filtered = [_filter_null_fields(item) for item in obj]\n        return [item for item in filtered if item is not None]\n    return obj\n\n\ndef _pretty_print_event(event: Any) -> str:\n    \"\"\"Pretty print an event, showing only non-null fields with newlines.\"\"\"\n    jsonable = _to_jsonable(event)\n    filtered = _filter_null_fields(jsonable)\n\n    lines = []\n\n    def _format_value(key: str, value: Any, indent: int = 0) -> None:\n        \"\"\"Recursively format key-value pairs.\"\"\"\n        prefix = \"  \" * indent\n        if isinstance(value, dict):\n            if indent == 0:\n                # Top-level: print each key-value pair on separate lines\n                for k, v in value.items():\n                    _format_value(k, v, indent)\n            else:\n                # Nested dict: print key and then nested items\n                lines.append(f\"{prefix}{key}:\")\n                for k, v in value.items():\n                    _format_value(k, v, indent + 1)\n        elif isinstance(value, list):\n            if not value:\n                return  # Skip empty lists\n            lines.append(f\"{prefix}{key}:\")\n            for i, item in enumerate(value):\n                if isinstance(item, dict):\n                    lines.append(f\"{prefix}  [{i}]:\")\n                    for k, v in item.items():\n                        _format_value(k, v, indent + 2)\n                else:\n                    lines.append(f\"{prefix}  [{i}]: {item}\")\n        else:\n            lines.append(f\"{prefix}{key}: {value}\")\n\n    if isinstance(filtered, dict):\n        for k, v in filtered.items():\n            _format_value(k, v, 0)\n    else:\n        lines.append(str(filtered))\n\n    return \"\\n\".join(lines)\n\n\nif __name__ == \"__main__\":\n    # Output raw stream events in a pretty format\n    for event in stream:\n        print(\"=\" * 80, flush=True)\n        print(_pretty_print_event(event), flush=True)\n        print(flush=True)\n"
  },
  {
    "path": "backend/scripts/debugging/litellm/directly_hit_azure_api.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nDirectly hit Azure OpenAI endpoints for debugging.\n\nThis script bypasses LiteLLM and directly calls Azure OpenAI APIs.\nUses URL and API key constants plus a payload.json in the same directory.\n\nUsage:\n    python directly_hit_azure_api.py\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom pathlib import Path\n\nimport httpx\n\n\n# Configuration: Update these values before running\nURL = \"https://YOUR_AZURE_OPENAI_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com/\"\nAPI_KEY = \"YOUR_API_KEY_HERE\"\n\nPAYLOAD_PATH = Path(__file__).resolve().with_name(\"payload.json\")\n\n\ndef _load_payload_json() -> dict:\n    \"\"\"Load and parse payload.json file.\"\"\"\n    if not PAYLOAD_PATH.exists():\n        raise FileNotFoundError(\n            f\"payload.json not found at {PAYLOAD_PATH!r}. Create payload.json next to this script.\"\n        )\n    return json.loads(PAYLOAD_PATH.read_text())\n\n\ndef _print_response(resp: httpx.Response) -> None:\n    \"\"\"Print HTTP response in a readable format.\"\"\"\n    print(f\"HTTP {resp.status_code}\")\n\n    content_type = resp.headers.get(\"content-type\", \"\")\n    raw = resp.content\n    if not raw:\n        return\n\n    if \"json\" in content_type.lower():\n        try:\n            obj = resp.json()\n            print(json.dumps(obj, indent=2, ensure_ascii=False, sort_keys=False))\n            return\n        except Exception:\n            pass\n\n    # Fallback: print as text (replace errors to avoid crashes).\n    print(raw.decode(\"utf-8\", errors=\"replace\"))\n\n\ndef main() -> int:\n    \"\"\"Main entry point.\"\"\"\n    if (\n        URL\n        == \"https://YOUR_AZURE_OPENAI_DEPLOYMENT_URL_HERE.cognitiveservices.azure.com/\"\n    ):\n        raise SystemExit(\n            \"Please set the URL constant at the top of this file to your Azure OpenAI deployment URL.\"\n        )\n    if API_KEY == \"YOUR_API_KEY_HERE\":\n        raise SystemExit(\n            \"Please set the API_KEY constant at the top of this file to your Azure OpenAI API key.\"\n        )\n\n    payload = _load_payload_json()\n\n    headers = {\n        \"api-key\": API_KEY,\n        \"content-type\": \"application/json\",\n    }\n\n    with httpx.Client(timeout=60.0) as client:\n        resp = client.post(\n            url=URL,\n            headers=headers,\n            json=payload,\n        )\n\n    _print_response(resp)\n    return 0 if resp.is_success else 1\n\n\nif __name__ == \"__main__\":\n    raise SystemExit(main())\n"
  },
  {
    "path": "backend/scripts/debugging/litellm/payload.json",
    "content": "{\n    \"model\": \"[YOUR MODEL HERE]\",\n    \"input\": [\n      {\n        \"type\": \"message\",\n        \"role\": \"user\",\n        \"content\": [\n          {\n            \"type\": \"input_text\",\n            \"text\": \"hi\"\n          }\n        ]\n      },\n      {\n        \"type\": \"message\",\n        \"role\": \"assistant\",\n        \"content\": [\n          {\n            \"type\": \"output_text\",\n            \"text\": \"Hey! 👋\\n\\nHow can I help today?  \\n- Questions about Onyx (setup, search, auth, plugins)?\\n- Debugging or architecture advice?\\n- Writing docs, emails, or code snippets?\\n- Anything else on your mind?\"\n          }\n        ]\n      },\n      {\n        \"type\": \"message\",\n        \"role\": \"user\",\n        \"content\": [\n          {\n            \"type\": \"input_text\",\n            \"text\": \"[YOUR QUERY HERE]\"\n          }\n        ]\n      }\n    ],\n    \"instructions\": \"Formatting re-enabled. You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.\\n\\nThe current date is Sunday December 28, 2025.\\n\\n# Response Style\\nYou use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.\\nYou use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\\\n[expression]\\\\n$$' for standalone cases and '\\\\\\\\( [expression] \\\\\\\\)' when inline.\\nFor code you prefer to use Markdown and specify the language.\\nYou can use horizontal rules (---) to separate sections of your responses.\\nYou can use Markdown tables to format your responses for data, lists, and other structured information.\\n\\n# User Information\\n\\nThe user is at an organization called `Onyx`.\\n\\nOrganization description: AI chat and enterprise search. Open source, self host or managed cloud.\\n\\nGithub: https://github.com/onyx-dot-app/onyx\\nWebsite: https://onyx.app\\n\\n- User's name: USER_A\\n- User's email: user_a@onyx.app\\n\\n# Tools\\n\\nFor questions that can be fully answered from existing knowledge which is unlikely to change, answer the user directly without using any tools. When there is ambiguity, default to searching to get more context.\\n\\nWhen using any search type tool, do not make any assumptions and stay as faithful to the user's query as possible. Between internal and web search, think about if the user's query is likely better answered by team internal sources or online web pages. For queries that are short phrases, ambiguous/unclear, or keyword heavy, prioritize internal search. If ambiguous, prioritize internal search.\\nWhen searching for information, if the initial results cannot fully answer the user's query, try again with different tools or arguments. Do not repeat the same or very similar queries if it already has been run in the chat history.\\n\\n## internal_search\\nUse the `internal_search` tool to search connected applications for information.\\n\\n## web_search\\nUse the `web_search` tool to access up-to-date information from the web.\\n\\n## open_url\\nUse the `open_url` tool to read the content of one or more URLs.\\n\\n## generate_image\\nNEVER use generate_image unless the user specifically requests an image.\\n\",\n    \"parallel_tool_calls\": true,\n    \"reasoning\": {\n      \"effort\": \"low\",\n      \"summary\": \"auto\"\n    },\n    \"stream\": true,\n    \"temperature\": 1,\n    \"tool_choice\": \"auto\",\n    \"tools\": [\n      {\n        \"name\": \"internal_search\",\n        \"type\": \"function\",\n        \"description\": \"Search connected applications for information.\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"queries\": {\n              \"type\": \"array\",\n              \"items\": { \"type\": \"string\" },\n              \"description\": \"List of search queries to execute, typically a single query.\"\n            }\n          },\n          \"required\": [\"queries\"]\n        }\n      },\n      {\n        \"name\": \"generate_image\",\n        \"type\": \"function\",\n        \"description\": \"Generate an image based on a prompt. Do not use unless the user specifically requests an image.\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"prompt\": { \"type\": \"string\" },\n            \"shape\": {\n              \"type\": \"string\",\n              \"enum\": [\"square\", \"portrait\", \"landscape\"]\n            }\n          },\n          \"required\": [\"prompt\"]\n        }\n      },\n      {\n        \"name\": \"web_search\",\n        \"type\": \"function\",\n        \"description\": \"Search the web for information.\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"queries\": {\n              \"type\": \"array\",\n              \"items\": { \"type\": \"string\" }\n            }\n          },\n          \"required\": [\"queries\"]\n        }\n      },\n      {\n        \"name\": \"open_url\",\n        \"type\": \"function\",\n        \"description\": \"Open and read the content of one or more URLs.\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"urls\": {\n              \"type\": \"array\",\n              \"items\": { \"type\": \"string\" }\n            }\n          },\n          \"required\": [\"urls\"]\n        }\n      }\n    ]\n  }\n  "
  },
  {
    "path": "backend/scripts/debugging/onyx_db.py",
    "content": "\"\"\"Onyx Database tool\"\"\"\n\nimport os\n\n# hack to work around excessive use of globals in other functions\nos.environ[\"MULTI_TENANT\"] = \"True\"\n\nif True:  # noqa: E402\n    import csv\n    import argparse\n\n    from pydantic import BaseModel\n    from sqlalchemy import func\n\n    from onyx.db.engine.sql_engine import (\n        SYNC_DB_API,\n        USE_IAM_AUTH,\n        build_connection_string,\n    )\n    from onyx.db.engine.tenant_utils import get_all_tenant_ids\n    from onyx.db.engine.sql_engine import get_session_with_tenant\n    from onyx.db.engine.sql_engine import SqlEngine\n    from onyx.db.models import Document\n    from onyx.db.models import User\n    from onyx.utils.logger import setup_logger\n    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n    import heapq\n\n    logger = setup_logger()\n\n\nclass TenantMetadata(BaseModel):\n    first_email: str | None\n    user_count: int\n    num_docs: int\n    num_chunks: int\n\n\nclass SQLAlchemyDebugging:\n    # Class for managing DB debugging actions.\n    def __init__(self) -> None:\n        pass\n\n    def top_chunks(self, filename: str, k: int = 10) -> None:\n        tenants_to_total_chunks: dict[str, TenantMetadata] = {}\n\n        logger.info(\"Fetching all tenant id's.\")\n        tenant_ids = get_all_tenant_ids()\n        num_tenant_ids = len(tenant_ids)\n\n        logger.info(f\"Found {num_tenant_ids} tenant id's.\")\n\n        num_processed = 0\n        for tenant_id in tenant_ids:\n            num_processed += 1\n\n            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)\n\n            try:\n                with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n                    first_email = None\n\n                    first_user = db_session.query(User).first()\n                    if first_user:\n                        first_email = first_user.email\n\n                    user_count = db_session.query(User).count()\n\n                    # Calculate the total number of document rows for the current tenant\n                    total_documents = db_session.query(Document).count()\n                    # marginally useful to skip some tenants ... maybe we can improve on this\n                    # if total_documents < 100:\n                    #     logger.info(f\"{num_processed} of {num_tenant_ids}: Tenant '{tenant_id}': \"\n                    #                 f\"docs={total_documents} skip=True\")\n                    #     continue\n\n                    # Calculate the sum of chunk_count for the current tenant\n                    # If there are no documents or all chunk_counts are NULL, sum will be None\n                    total_chunks = db_session.query(\n                        func.sum(Document.chunk_count)\n                    ).scalar()\n\n                    total_chunks = total_chunks or 0\n\n                    logger.info(\n                        f\"{num_processed} of {num_tenant_ids}: Tenant '{tenant_id}': \"\n                        f\"first_email={first_email} user_count={user_count} \"\n                        f\"docs={total_documents} chunks={total_chunks}\"\n                    )\n\n                tenants_to_total_chunks[tenant_id] = TenantMetadata(\n                    first_email=first_email,\n                    user_count=user_count,\n                    num_docs=total_documents,\n                    num_chunks=total_chunks,\n                )\n            except Exception as e:\n                logger.error(f\"Error processing tenant '{tenant_id}': {e}\")\n            finally:\n                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n        # sort all by docs and dump to csv\n        sorted_tenants = sorted(\n            tenants_to_total_chunks.items(),\n            key=lambda x: (x[1].num_chunks, x[1].num_docs),\n            reverse=True,\n        )\n\n        with open(filename, \"w\") as csvfile:\n            writer = csv.writer(csvfile)\n            writer.writerow(\n                [\"tenant_id\", \"first_user_email\", \"num_user\", \"num_docs\", \"num_chunks\"]\n            )  # Write header\n            # Write data rows (using the sorted list)\n            for tenant_id, metadata in sorted_tenants:\n                writer.writerow(\n                    [\n                        tenant_id,\n                        metadata.first_email,\n                        metadata.user_count,\n                        metadata.num_docs,\n                        metadata.num_chunks,\n                    ]\n                )\n            logger.info(f\"Successfully wrote statistics to {filename}\")\n\n        # output top k by chunks\n        top_k_tenants = heapq.nlargest(\n            k, tenants_to_total_chunks.items(), key=lambda x: x[1].num_docs\n        )\n\n        logger.info(f\"Top {k} tenants by total chunks: {top_k_tenants}\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"Database/SQL debugging tool\")\n    parser.add_argument(\"--username\", help=\"Database username\", default=\"postgres\")\n    parser.add_argument(\"--password\", help=\"Database password\", required=True)\n    parser.add_argument(\"--host\", help=\"Database host\", default=\"localhost\")\n    parser.add_argument(\"--port\", help=\"Database port\", default=5432)\n    parser.add_argument(\"--db\", help=\"Database default db name\", default=\"danswer\")\n\n    parser.add_argument(\"--report\", help=\"Generate the given report\")\n\n    parser.add_argument(\n        \"--filename\",\n        type=str,\n        default=\"tenants_by_num_docs.csv\",\n        help=\"Generate the given report\",\n        required=False,\n    )\n\n    args = parser.parse_args()\n\n    logger.info(f\"{args}\")\n\n    connection_string = build_connection_string(\n        db_api=SYNC_DB_API,\n        app_name=\"onyx_db_sync\",\n        use_iam_auth=USE_IAM_AUTH,\n        user=args.username,\n        password=args.password,\n        host=args.host,\n        port=args.port,\n        db=args.db,\n    )\n\n    SqlEngine.init_engine(\n        pool_size=20, max_overflow=5, connection_string=connection_string\n    )\n\n    debugger = SQLAlchemyDebugging()\n\n    if args.report == \"top-chunks\":\n        debugger.top_chunks(args.filename, 10)\n    else:\n        logger.info(\"No action.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/onyx_list_tenants.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nTenant List Script\nSimple script to list the tenant IDs in the database.\nUsed by the parallel migration script to determine how to split work.\n\nUsage:\n\n```\n# List one tenant per line (default)\nPYTHONPATH=. python scripts/debugging/onyx_list_tenants.py\n\n# Output as CSV (all on one line)\nPYTHONPATH=. python scripts/debugging/onyx_list_tenants.py --csv\n\n# Output as CSV batched into groups of 5\nPYTHONPATH=. python scripts/debugging/onyx_list_tenants.py --csv -n 5\n```\n\n\"\"\"\n\nimport argparse\nimport sys\n\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\n\ndef batch_list(items: list[str], batch_size: int) -> list[list[str]]:\n    \"\"\"Split a list into batches of specified size.\"\"\"\n    return [items[i : i + batch_size] for i in range(0, len(items), batch_size)]\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"List tenant IDs from the database.\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n    )\n    parser.add_argument(\n        \"--csv\",\n        action=\"store_true\",\n        help=\"Output as comma-separated values instead of one per line\",\n    )\n    parser.add_argument(\n        \"-n\",\n        \"--max-args\",\n        type=int,\n        default=None,\n        metavar=\"N\",\n        help=\"Batch CSV output into groups of N items (requires --csv)\",\n    )\n    args = parser.parse_args()\n\n    if args.max_args is not None and not args.csv:\n        parser.error(\"--max-args/-n requires --csv flag\")\n\n    try:\n        # Initialize the database engine with conservative settings\n        SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n        # Get all tenant IDs\n        tenant_ids = get_all_tenant_ids()\n\n        # Filter to only tenant schemas (not public or other system schemas)\n        tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]\n\n        if args.csv:\n            if args.max_args:\n                # Output batched CSV lines\n                for batch in batch_list(tenant_schemas, args.max_args):\n                    print(\",\".join(batch))\n            else:\n                # Output all on one line\n                print(\",\".join(tenant_schemas))\n        else:\n            # Print all tenant IDs, one per line\n            for tenant_id in tenant_schemas:\n                print(tenant_id)\n\n    except Exception as e:\n        print(f\"Error getting tenant IDs: {e}\", file=sys.stderr)\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/onyx_redis.py",
    "content": "import argparse\nimport json\nimport logging\nimport sys\nimport time\nfrom enum import Enum\nfrom logging import getLogger\nfrom typing import cast\nfrom uuid import UUID\n\nfrom redis import Redis\n\nfrom ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email\nfrom onyx.auth.invited_users import get_invited_users\nfrom onyx.auth.invited_users import write_invited_users\nfrom onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PASSWORD\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.configs.app_configs import REDIS_SSL\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.users import get_user_by_email\nfrom onyx.redis.redis_connector import RedisConnector\nfrom onyx.redis.redis_pool import RedisPool\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\n# Tool to run helpful operations on Redis in production\n# This is targeted for internal usage and may not have all the necessary parameters\n# for general usage across custom deployments\n\n# Configure the logger\nlogging.basicConfig(\n    level=logging.INFO,  # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",  # Log format\n    handlers=[logging.StreamHandler()],  # Output logs to console\n)\n\nlogger = getLogger(__name__)\n\nSCAN_ITER_COUNT = 10000\nBATCH_DEFAULT = 1000\n\n\nclass OnyxRedisCommand(Enum):\n    purge_connectorsync_taskset = \"purge_connectorsync_taskset\"\n    purge_documentset_taskset = \"purge_documentset_taskset\"\n    purge_usergroup_taskset = \"purge_usergroup_taskset\"\n    purge_locks_blocking_deletion = \"purge_locks_blocking_deletion\"\n    purge_vespa_syncing = \"purge_vespa_syncing\"\n    purge_pidbox = \"purge_pidbox\"\n    get_user_token = \"get_user_token\"\n    delete_user_token = \"delete_user_token\"\n    add_invited_user = \"add_invited_user\"\n    get_list_element = \"get_list_element\"\n\n    def __str__(self) -> str:\n        return self.value\n\n\ndef get_user_id(user_email: str) -> tuple[UUID, str]:\n    tenant_id = (\n        get_tenant_id_for_email(user_email) if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA\n    )\n\n    with get_session_with_tenant(tenant_id=tenant_id) as session:\n        user = get_user_by_email(user_email, session)\n        if user is None:\n            raise ValueError(f\"User not found for email: {user_email}\")\n        return user.id, tenant_id\n\n\ndef onyx_redis(\n    command: OnyxRedisCommand,\n    batch: int,\n    dry_run: bool,\n    ssl: bool,\n    host: str,\n    port: int,\n    db: int,\n    password: str | None,\n    user_email: str | None = None,\n    cc_pair_id: int | None = None,\n) -> int:\n    # this is global and not tenant aware\n    pool = RedisPool.create_pool(\n        host=host,\n        port=port,\n        db=db,\n        password=password if password else \"\",\n        ssl=ssl,\n        ssl_cert_reqs=\"optional\",\n        ssl_ca_certs=None,\n    )\n\n    r = Redis(connection_pool=pool)\n\n    logger.info(\"Redis ping starting. This may hang if your settings are incorrect.\")\n\n    try:\n        r.ping()\n    except:\n        logger.exception(\"Redis ping exceptioned\")\n        raise\n\n    logger.info(\"Redis ping succeeded.\")\n\n    if command == OnyxRedisCommand.purge_connectorsync_taskset:\n        \"\"\"Purge connector tasksets. Used when the tasks represented in the tasksets\n        have been purged.\"\"\"\n        return purge_by_match_and_type(\n            \"*connectorsync_taskset*\", \"set\", batch, dry_run, r\n        )\n    elif command == OnyxRedisCommand.purge_documentset_taskset:\n        return purge_by_match_and_type(\n            \"*documentset_taskset*\", \"set\", batch, dry_run, r\n        )\n    elif command == OnyxRedisCommand.purge_usergroup_taskset:\n        return purge_by_match_and_type(\"*usergroup_taskset*\", \"set\", batch, dry_run, r)\n    elif command == OnyxRedisCommand.purge_locks_blocking_deletion:\n        if cc_pair_id is None:\n            logger.error(\"You must specify --cc-pair with purge_deletion_locks\")\n            return 1\n\n        tenant_id = get_current_tenant_id()\n        logger.info(f\"Purging locks associated with deleting cc_pair={cc_pair_id}.\")\n        redis_connector = RedisConnector(tenant_id, cc_pair_id)\n\n        redis_delete_if_exists_helper(\n            f\"{tenant_id}:{redis_connector.prune.fence_key}\", dry_run, r\n        )\n        redis_delete_if_exists_helper(\n            f\"{tenant_id}:{redis_connector.permissions.fence_key}\", dry_run, r\n        )\n        redis_delete_if_exists_helper(\n            f\"{tenant_id}:{redis_connector.external_group_sync.fence_key}\", dry_run, r\n        )\n        return 0\n    elif command == OnyxRedisCommand.purge_vespa_syncing:\n        return purge_by_match_and_type(\n            \"*connectorsync:vespa_syncing*\", \"string\", batch, dry_run, r\n        )\n    elif command == OnyxRedisCommand.purge_pidbox:\n        return purge_by_match_and_type(\n            \"*reply.celery.pidbox\", \"list\", batch, dry_run, r\n        )\n    elif command == OnyxRedisCommand.get_list_element:\n        # just hardcoded for now\n        result = r.lrange(\n            \"0097a564-d343-3c1f-9fd1-af8cce038115.reply.celery.pidbox\", 0, 0\n        )\n        print(f\"{result}\")\n        return 0\n    elif command == OnyxRedisCommand.get_user_token:\n        if not user_email:\n            logger.error(\"You must specify --user-email with get_user_token\")\n            return 1\n        token_key = get_user_token_from_redis(r, user_email)\n        if token_key:\n            print(f\"Token key for user {user_email}: {token_key}\")\n            return 0\n        else:\n            print(f\"No token found for user {user_email}\")\n            return 2\n    elif command == OnyxRedisCommand.delete_user_token:\n        if not user_email:\n            logger.error(\"You must specify --user-email with delete_user_token\")\n            return 1\n        if delete_user_token_from_redis(r, user_email, dry_run):\n            return 0\n        else:\n            return 2\n    elif command == OnyxRedisCommand.add_invited_user:\n        if not user_email:\n            logger.error(\"You must specify --user-email with add_invited_user\")\n            return 1\n        current_invited_users = get_invited_users()\n        if user_email not in current_invited_users:\n            current_invited_users.append(user_email)\n            if dry_run:\n                logger.info(f\"(DRY-RUN) Would add {user_email} to invited users\")\n            else:\n                write_invited_users(current_invited_users)\n                logger.info(f\"Added {user_email} to invited users\")\n        else:\n            logger.info(f\"{user_email} is already in the invited users list\")\n        return 0\n    else:\n        pass\n\n    return 255\n\n\ndef flush_batch_delete(batch_keys: list[bytes], r: Redis) -> None:\n    logger.info(f\"Flushing {len(batch_keys)} operations to Redis.\")\n    with r.pipeline() as pipe:\n        for batch_key in batch_keys:\n            pipe.delete(batch_key)\n        pipe.execute()\n\n\ndef redis_delete_if_exists_helper(key: str, dry_run: bool, r: Redis) -> bool:\n    \"\"\"Returns True if the key was found, False if not.\n    This function exists for logging purposes as the delete operation itself\n    doesn't really need to check the existence of the key.\n    \"\"\"\n\n    if not r.exists(key):\n        logger.info(f\"Did not find {key}.\")\n        return False\n\n    if dry_run:\n        logger.info(f\"(DRY-RUN) Deleting {key}.\")\n    else:\n        logger.info(f\"Deleting {key}.\")\n        r.delete(key)\n\n    return True\n\n\ndef purge_by_match_and_type(\n    match_pattern: str, match_type: str, batch_size: int, dry_run: bool, r: Redis\n) -> int:\n    \"\"\"match_pattern: glob style expression\n    match_type: https://redis.io/docs/latest/commands/type/\n    \"\"\"\n\n    logger.info(\n        f\"purge_by_match_and_type start: match_pattern={match_pattern} match_type={match_type}\"\n    )\n\n    # cursor = \"0\"\n    # while cursor != 0:\n    #     cursor, data = self.scan(\n    #         cursor=cursor, match=match, count=count, _type=_type, **kwargs\n    #     )\n\n    start = time.monotonic()\n\n    count = 0\n    batch_keys: list[bytes] = []\n    for key in r.scan_iter(match_pattern, count=SCAN_ITER_COUNT, _type=match_type):\n        # key_type = r.type(key)\n        # if key_type != match_type.encode(\"utf-8\"):\n        #     continue\n\n        key = cast(bytes, key)\n        key_str = key.decode(\"utf-8\")\n\n        count += 1\n        if dry_run:\n            logger.info(f\"(DRY-RUN) Deleting item {count}: {key_str}\")\n            continue\n\n        logger.info(f\"Deleting item {count}: {key_str}\")\n\n        batch_keys.append(key)\n\n        # flush if batch size has been reached\n        if len(batch_keys) >= batch_size:\n            flush_batch_delete(batch_keys, r)\n            batch_keys.clear()\n\n    # final flush\n    flush_batch_delete(batch_keys, r)\n    batch_keys.clear()\n\n    logger.info(f\"Deleted {count} matches.\")\n\n    elapsed = time.monotonic() - start\n    logger.info(f\"Time elapsed: {elapsed:.2f}s\")\n    return 0\n\n\ndef get_user_token_from_redis(r: Redis, user_email: str) -> str | None:\n    \"\"\"\n    Scans Redis keys for a user token that matches user_email or user_id fields.\n    Returns the token key if found, else None.\n    \"\"\"\n    user_id, tenant_id = get_user_id(user_email)\n\n    # Scan for keys matching the auth key prefix\n    auth_keys = r.scan_iter(f\"{REDIS_AUTH_KEY_PREFIX}*\", count=SCAN_ITER_COUNT)\n\n    matching_key = None\n\n    for key in auth_keys:\n        key_str = key.decode(\"utf-8\")\n        jwt_token = r.get(key_str)\n\n        if not jwt_token:\n            continue\n\n        try:\n            jwt_token_str = (\n                jwt_token.decode(\"utf-8\")\n                if isinstance(jwt_token, bytes)\n                else str(jwt_token)\n            )\n\n            if jwt_token_str.startswith(\"b'\") and jwt_token_str.endswith(\"'\"):\n                jwt_token_str = jwt_token_str[2:-1]  # Remove b'' wrapper\n\n            jwt_data = json.loads(jwt_token_str)\n            if jwt_data.get(\"tenant_id\") == tenant_id and str(\n                jwt_data.get(\"sub\")\n            ) == str(user_id):\n                matching_key = key_str\n                break\n        except json.JSONDecodeError:\n            logger.error(f\"Failed to decode JSON for key: {key_str}\")\n        except Exception as e:\n            logger.error(f\"Error processing JWT for key: {key_str}. Error: {str(e)}\")\n\n    if matching_key:\n        return matching_key[len(REDIS_AUTH_KEY_PREFIX) :]\n    return None\n\n\ndef delete_user_token_from_redis(\n    r: Redis, user_email: str, dry_run: bool = False\n) -> bool:\n    \"\"\"\n    Scans Redis keys for a user token matching user_email and deletes it if found.\n    Returns True if something was deleted, otherwise False.\n    \"\"\"\n    user_id, tenant_id = get_user_id(user_email)\n\n    # Scan for keys matching the auth key prefix\n    auth_keys = r.scan_iter(f\"{REDIS_AUTH_KEY_PREFIX}*\", count=SCAN_ITER_COUNT)\n    matching_key = None\n\n    for key in auth_keys:\n        key_str = key.decode(\"utf-8\")\n        jwt_token = r.get(key_str)\n\n        if not jwt_token:\n            continue\n\n        try:\n            jwt_token_str = (\n                jwt_token.decode(\"utf-8\")\n                if isinstance(jwt_token, bytes)\n                else str(jwt_token)\n            )\n\n            if jwt_token_str.startswith(\"b'\") and jwt_token_str.endswith(\"'\"):\n                jwt_token_str = jwt_token_str[2:-1]  # Remove b'' wrapper\n\n            jwt_data = json.loads(jwt_token_str)\n            if jwt_data.get(\"tenant_id\") == tenant_id and str(\n                jwt_data.get(\"sub\")\n            ) == str(user_id):\n                matching_key = key_str\n                break\n        except json.JSONDecodeError:\n            logger.error(f\"Failed to decode JSON for key: {key_str}\")\n        except Exception as e:\n            logger.error(f\"Error processing JWT for key: {key_str}. Error: {str(e)}\")\n\n    if matching_key:\n        if dry_run:\n            logger.info(f\"(DRY-RUN) Would delete token key: {matching_key}\")\n        else:\n            r.delete(matching_key)\n            logger.info(f\"Deleted token for user: {user_email}\")\n        return True\n    else:\n        logger.info(f\"No token found for user: {user_email}\")\n        return False\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Onyx Redis Manager\")\n    parser.add_argument(\n        \"--command\",\n        type=OnyxRedisCommand,\n        help=\"The command to run\",\n        choices=list(OnyxRedisCommand),\n        required=True,\n    )\n\n    parser.add_argument(\n        \"--ssl\",\n        type=bool,\n        default=REDIS_SSL,\n        help=\"Use SSL when connecting to Redis. Usually True for prod and False for local testing\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--host\",\n        type=str,\n        default=REDIS_HOST,\n        help=\"The redis host\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--port\",\n        type=int,\n        default=REDIS_PORT,\n        help=\"The redis port\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--db\",\n        type=int,\n        default=REDIS_DB_NUMBER,\n        help=\"The redis db\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--password\",\n        type=str,\n        default=REDIS_PASSWORD,\n        help=\"The redis password\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--tenant-id\",\n        type=str,\n        help=\"Tenant ID for get, delete user token, or add to invited users\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--batch\",\n        type=int,\n        default=BATCH_DEFAULT,\n        help=\"Size of operation batches to send to Redis\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Perform a dry run without actually executing modifications\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--user-email\",\n        type=str,\n        help=\"User email for get, delete user token, or add to invited users\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--cc-pair\",\n        type=int,\n        help=\"A connector credential pair id. Used with the purge_deletion_locks command.\",\n        required=False,\n    )\n\n    args = parser.parse_args()\n\n    if args.tenant_id:\n        CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)\n\n    exitcode = onyx_redis(\n        command=args.command,\n        batch=args.batch,\n        dry_run=args.dry_run,\n        ssl=args.ssl,\n        host=args.host,\n        port=args.port,\n        db=args.db,\n        password=args.password,\n        user_email=args.user_email,\n        cc_pair_id=args.cc_pair,\n    )\n    sys.exit(exitcode)\n"
  },
  {
    "path": "backend/scripts/debugging/onyx_vespa_schemas.py",
    "content": "\"\"\"Tool to generate all supported schema variations for Onyx Cloud's Vespa database.\n\nUsage:\n\n```\nPYTHONPATH=. python scripts/debugging/onyx_vespa_schemas.py\n```\n\nThen, paste them into the existing vespa schema downloaded from the Vespa console,\nand then re-zip.\n\"\"\"\n\nimport argparse\nimport os\nfrom pathlib import Path\n\nimport jinja2\n\nfrom onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef write_schema(\n    index_name: str,\n    dim: int,\n    embedding_precision: EmbeddingPrecision,\n    template: jinja2.Template,\n    output_path: Path,\n) -> None:\n    # Create schemas directory if it doesn't exist\n    schemas_dir = output_path / \"schemas\"\n    schemas_dir.mkdir(parents=True, exist_ok=True)\n\n    index_filename = schemas_dir / (index_name + \".sd\")\n\n    schema = template.render(\n        multi_tenant=True,\n        schema_name=index_name,\n        dim=dim,\n        embedding_precision=embedding_precision.value,\n    )\n\n    with open(index_filename, \"w\", encoding=\"utf-8\") as f:\n        f.write(schema)\n\n    logger.info(f\"Wrote {index_filename}\")\n\n\ndef generate_document_entries() -> str:\n    \"\"\"Generate document entries for all supported embedding models.\"\"\"\n    document_entries = []\n\n    for model in SUPPORTED_EMBEDDING_MODELS:\n        # Add regular index\n        document_entries.append(\n            f'            <document type=\"{model.index_name}\" mode=\"index\" />'\n        )\n        # Add alt index\n        document_entries.append(\n            f'            <document type=\"{model.index_name}__danswer_alt_index\" mode=\"index\" />'\n        )\n\n    return \"\\n\".join(document_entries)\n\n\ndef write_cloud_services(cloud_services_template_path: str, output_path: Path) -> None:\n    \"\"\"Generate and write the cloud-services.xml file.\"\"\"\n    # Create output directory if it doesn't exist\n    output_path.mkdir(parents=True, exist_ok=True)\n\n    jinja_env = jinja2.Environment()\n\n    with open(cloud_services_template_path, \"r\", encoding=\"utf-8\") as f:\n        template_str = f.read()\n\n    template = jinja_env.from_string(template_str)\n    document_entries = generate_document_entries()\n\n    services_xml = template.render(document_elements=document_entries)\n\n    services_file = output_path / \"services.xml\"\n    with open(services_file, \"w\", encoding=\"utf-8\") as f:\n        f.write(services_xml)\n\n    logger.info(f\"Wrote {services_file}\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Generate multi tenant Vespa schemas and services configuration\"\n    )\n    parser.add_argument(\n        \"--template\",\n        help=\"The Jinja template to use for schemas\",\n        default=\"onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja\",\n    )\n    parser.add_argument(\n        \"--cloud-services-template\",\n        help=\"The cloud-services.xml.jinja template path\",\n        default=\"ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja\",\n    )\n    parser.add_argument(\n        \"--output-path\",\n        help=\"Output directory path (defaults to current directory)\",\n        default=\".\",\n    )\n    args = parser.parse_args()\n\n    # Convert output path to Path object\n    output_path = Path(args.output_path)\n\n    jinja_env = jinja2.Environment()\n\n    # Generate schema files\n    with open(args.template, \"r\", encoding=\"utf-8\") as f:\n        template_str = f.read()\n\n    template = jinja_env.from_string(template_str)\n\n    num_indexes = 0\n    for model in SUPPORTED_EMBEDDING_MODELS:\n        write_schema(\n            model.index_name,\n            model.dim,\n            model.embedding_precision,\n            template,\n            output_path,\n        )\n        write_schema(\n            model.index_name + \"__danswer_alt_index\",\n            model.dim,\n            model.embedding_precision,\n            template,\n            output_path,\n        )\n        num_indexes += 2\n\n    logger.info(f\"Wrote {num_indexes} indexes.\")\n\n    # Generate cloud services configuration if template is provided\n    if args.cloud_services_template:\n        if os.path.exists(args.cloud_services_template):\n            write_cloud_services(args.cloud_services_template, output_path)\n        else:\n            logger.error(\n                f\"Cloud services template not found: {args.cloud_services_template}\"\n            )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/benchmark_retrieval.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Benchmarks OpenSearchDocumentIndex latency.\n\nRequires Onyx to be running as it reads search settings from the database.\n\nUsage:\n    source .venv/bin/activate\n    python backend/scripts/debugging/opensearch/benchmark_retrieval.py --help\n\"\"\"\n\nimport argparse\nimport statistics\nimport time\n\nfrom onyx.configs.chat_configs import NUM_RETURNED_HITS\nfrom onyx.context.search.enums import QueryType\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchDocumentIndex,\n)\nfrom onyx.indexing.models import IndexingSetting\nfrom scripts.debugging.opensearch.constants import DEV_TENANT_ID\nfrom scripts.debugging.opensearch.embedding_io import load_query_embedding_from_file\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\n\nDEFAULT_N = 50\n\n\ndef main() -> None:\n    def add_query_embedding_argument(parser: argparse.ArgumentParser) -> None:\n        parser.add_argument(\n            \"-e\",\n            \"--embedding-file-path\",\n            type=str,\n            required=True,\n            help=\"Path to the query embedding file.\",\n        )\n\n    def add_query_string_argument(parser: argparse.ArgumentParser) -> None:\n        parser.add_argument(\n            \"-q\",\n            \"--query\",\n            type=str,\n            required=True,\n            help=\"Query string.\",\n        )\n\n    parser = argparse.ArgumentParser(\n        description=\"A benchmarking tool to measure OpenSearch retrieval latency.\"\n    )\n    parser.add_argument(\n        \"-n\",\n        type=int,\n        default=DEFAULT_N,\n        help=f\"Number of samples to take (default: {DEFAULT_N}).\",\n    )\n    subparsers = parser.add_subparsers(\n        dest=\"query_type\",\n        help=\"Query type to benchmark.\",\n        required=True,\n    )\n\n    hybrid_parser = subparsers.add_parser(\n        \"hybrid\", help=\"Benchmark hybrid retrieval latency.\"\n    )\n    add_query_embedding_argument(hybrid_parser)\n    add_query_string_argument(hybrid_parser)\n\n    keyword_parser = subparsers.add_parser(\n        \"keyword\", help=\"Benchmark keyword retrieval latency.\"\n    )\n    add_query_string_argument(keyword_parser)\n\n    semantic_parser = subparsers.add_parser(\n        \"semantic\", help=\"Benchmark semantic retrieval latency.\"\n    )\n    add_query_embedding_argument(semantic_parser)\n\n    args = parser.parse_args()\n\n    if args.n < 1:\n        parser.error(\"Number of samples (-n) must be at least 1.\")\n\n    if MULTI_TENANT:\n        CURRENT_TENANT_ID_CONTEXTVAR.set(DEV_TENANT_ID)\n\n    SqlEngine.init_engine(pool_size=1, max_overflow=0)\n    with get_session_with_current_tenant() as session:\n        search_settings = get_current_search_settings(session)\n        indexing_setting = IndexingSetting.from_db_model(search_settings)\n\n    tenant_state = TenantState(\n        tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n    )\n    index = OpenSearchDocumentIndex(\n        tenant_state=tenant_state,\n        index_name=search_settings.index_name,\n        embedding_dim=indexing_setting.final_embedding_dim,\n        embedding_precision=indexing_setting.embedding_precision,\n    )\n    filters = IndexFilters(\n        access_control_list=[],\n        tenant_id=get_current_tenant_id(),\n    )\n\n    if args.query_type == \"hybrid\":\n        embedding = load_query_embedding_from_file(args.embedding_file_path)\n        search_callable = lambda: index.hybrid_retrieval(  # noqa: E731\n            query=args.query,\n            query_embedding=embedding,\n            final_keywords=None,\n            # This arg doesn't do anything right now.\n            query_type=QueryType.KEYWORD,\n            filters=filters,\n            num_to_retrieve=NUM_RETURNED_HITS,\n        )\n    elif args.query_type == \"keyword\":\n        search_callable = lambda: index.keyword_retrieval(  # noqa: E731\n            query=args.query,\n            filters=filters,\n            num_to_retrieve=NUM_RETURNED_HITS,\n        )\n    elif args.query_type == \"semantic\":\n        embedding = load_query_embedding_from_file(args.embedding_file_path)\n        search_callable = lambda: index.semantic_retrieval(  # noqa: E731\n            query_embedding=embedding,\n            filters=filters,\n            num_to_retrieve=NUM_RETURNED_HITS,\n        )\n    else:\n        raise ValueError(f\"Invalid query type: {args.query_type}\")\n\n    print(f\"Running {args.n} invocations of {args.query_type} retrieval...\")\n\n    latencies: list[float] = []\n    for i in range(args.n):\n        start = time.perf_counter()\n        results = search_callable()\n        elapsed_ms = (time.perf_counter() - start) * 1000\n        latencies.append(elapsed_ms)\n        # Print the current iteration and its elapsed time on the same line.\n        print(\n            f\"  [{i:>{len(str(args.n))}}] {elapsed_ms:7.1f} ms  ({len(results)} results) (top result doc ID, chunk idx: {results[0].document_id if results else 'N/A'}, {results[0].chunk_id if results else 'N/A'})\",\n            end=\"\\r\",\n            flush=True,\n        )\n\n    print()\n    print(f\"Results over {args.n} invocations:\")\n    print(f\"   mean: {statistics.mean(latencies):7.1f} ms\")\n    print(\n        f\"  stdev: {statistics.stdev(latencies):7.1f} ms\"\n        if args.n > 1\n        else \"  stdev: N/A (only 1 sample)\"\n    )\n    print(f\"    max: {max(latencies):7.1f} ms (i: {latencies.index(max(latencies))})\")\n    print(f\"    min: {min(latencies):7.1f} ms (i: {latencies.index(min(latencies))})\")\n    if args.n >= 20:\n        print(f\"    p50: {statistics.median(latencies):7.1f} ms\")\n        print(f\"    p95: {statistics.quantiles(latencies, n=20)[-1]:7.1f} ms\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/constants.py",
    "content": "DEV_TENANT_ID = \"tenant_dev\"\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/embed_and_save.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Embeds a query and saves the embedding to a file.\n\nRequires Onyx to be running as it reads search settings from the database.\n\nUsage:\n    source .venv/bin/activate\n    python backend/scripts/debugging/opensearch/embed_and_save.py --help\n\"\"\"\n\nimport argparse\nimport time\n\nfrom onyx.context.search.utils import get_query_embedding\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom scripts.debugging.opensearch.constants import DEV_TENANT_ID\nfrom scripts.debugging.opensearch.embedding_io import save_query_embedding_to_file\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"A tool to embed a query and save the embedding to a file.\"\n    )\n    parser.add_argument(\n        \"-q\",\n        \"--query\",\n        type=str,\n        required=True,\n        help=\"Query string to embed.\",\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--file-path\",\n        type=str,\n        required=True,\n        help=\"Path to the output file to save the embedding to.\",\n    )\n\n    args = parser.parse_args()\n\n    if MULTI_TENANT:\n        CURRENT_TENANT_ID_CONTEXTVAR.set(DEV_TENANT_ID)\n\n    SqlEngine.init_engine(pool_size=1, max_overflow=0)\n    with get_session_with_current_tenant() as session:\n        start = time.perf_counter()\n        query_embedding = get_query_embedding(\n            query=args.query,\n            db_session=session,\n            embedding_model=None,\n        )\n        elapsed_ms = (time.perf_counter() - start) * 1000\n\n    save_query_embedding_to_file(query_embedding, args.file_path)\n    print(\n        f\"Query embedding of dimension {len(query_embedding)} generated in {elapsed_ms:.1f} ms and saved to {args.file_path}.\"\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/embedding_io.py",
    "content": "from shared_configs.model_server_models import Embedding\n\n\ndef load_query_embedding_from_file(file_path: str) -> Embedding:\n    \"\"\"Returns an embedding vector read from a file.\n\n    The file should be formatted as follows:\n    - The first line should contain an integer representing the embedding\n      dimension.\n    - Every subsequent line should contain a float value representing a\n      component of the embedding vector.\n    - The size and embedding content should all be delimited by a newline.\n\n    Args:\n        file_path: Path to the file containing the embedding vector.\n\n    Returns:\n        Embedding: The embedding vector.\n    \"\"\"\n    with open(file_path, \"r\") as f:\n        dimension = int(f.readline().strip())\n        embedding = [float(line.strip()) for line in f.readlines()]\n        assert len(embedding) == dimension, \"Embedding dimension mismatch.\"\n        return embedding\n\n\ndef save_query_embedding_to_file(embedding: Embedding, file_path: str) -> None:\n    \"\"\"Saves an embedding vector to a file.\n\n    The file will be formatted as follows:\n    - The first line will contain the embedding dimension.\n    - Every subsequent line will contain a float value representing a\n      component of the embedding vector.\n    - The size and embedding content will all be delimited by a newline.\n\n    Args:\n        embedding: The embedding vector to save.\n        file_path: Path to the file to save the embedding vector to.\n    \"\"\"\n    with open(file_path, \"w\") as f:\n        f.write(f\"{len(embedding)}\\n\")\n        for component in embedding:\n            f.write(f\"{component}\\n\")\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/opensearch_debug.py",
    "content": "#!/usr/bin/env python3\n\"\"\"A utility to interact with OpenSearch.\n\nUsage:\n    source .venv/bin/activate\n    python backend/scripts/debugging/opensearch/opensearch_debug.py --help\n    python backend/scripts/debugging/opensearch/opensearch_debug.py list\n    python backend/scripts/debugging/opensearch/opensearch_debug.py delete <index_name>\n\nEnvironment Variables:\n    OPENSEARCH_HOST: OpenSearch host\n    OPENSEARCH_REST_API_PORT: OpenSearch port\n    OPENSEARCH_ADMIN_USERNAME: Admin username\n    OPENSEARCH_ADMIN_PASSWORD: Admin password\n\nDependencies:\n    backend/shared_configs/configs.py\n    backend/onyx/document_index/opensearch/client.py\n\"\"\"\n\nimport argparse\nimport os\nimport sys\n\nfrom onyx.document_index.opensearch.client import OpenSearchClient\nfrom onyx.document_index.opensearch.client import OpenSearchIndexClient\nfrom shared_configs.configs import MULTI_TENANT\n\n\ndef list_indices(client: OpenSearchClient) -> None:\n    indices = client.list_indices_with_info()\n    print(f\"Found {len(indices)} indices.\")\n    print(\"-\" * 80)\n    for index in sorted(indices, key=lambda x: x.name):\n        print(f\"Index: {index.name}\")\n        print(f\"Health: {index.health}\")\n        print(f\"Status: {index.status}\")\n        print(f\"Num Primary Shards: {index.num_primary_shards}\")\n        print(f\"Num Replica Shards: {index.num_replica_shards}\")\n        print(f\"Docs Count: {index.docs_count}\")\n        print(f\"Docs Deleted: {index.docs_deleted}\")\n        print(f\"Created At: {index.created_at}\")\n        print(f\"Total Size: {index.total_size}\")\n        print(f\"Primary Shards Size: {index.primary_shards_size}\")\n        print(\"-\" * 80)\n\n\ndef delete_index(client: OpenSearchIndexClient) -> None:\n    if not client.index_exists():\n        print(f\"Index '{client._index_name}' does not exist.\")\n        return\n\n    confirm = input(f\"Delete index '{client._index_name}'? (yes/no): \")\n    if confirm.lower() != \"yes\":\n        print(\"Aborted.\")\n        return\n\n    if client.delete_index():\n        print(f\"Deleted index '{client._index_name}'.\")\n    else:\n        print(f\"Failed to delete index '{client._index_name}' for an unknown reason.\")\n\n\ndef main() -> None:\n    def add_standard_arguments(parser: argparse.ArgumentParser) -> None:\n        parser.add_argument(\n            \"--host\",\n            help=\"OpenSearch host. If not provided, will fall back to OPENSEARCH_HOST, then prompt for input.\",\n            type=str,\n            default=os.environ.get(\"OPENSEARCH_HOST\", \"\"),\n        )\n        parser.add_argument(\n            \"--port\",\n            help=\"OpenSearch port. If not provided, will fall back to OPENSEARCH_REST_API_PORT, then prompt for input.\",\n            type=int,\n            default=int(os.environ.get(\"OPENSEARCH_REST_API_PORT\", 0)),\n        )\n        parser.add_argument(\n            \"--username\",\n            help=\"OpenSearch username. If not provided, will fall back to OPENSEARCH_ADMIN_USERNAME, then prompt for input.\",\n            type=str,\n            default=os.environ.get(\"OPENSEARCH_ADMIN_USERNAME\", \"\"),\n        )\n        parser.add_argument(\n            \"--password\",\n            help=\"OpenSearch password. If not provided, will fall back to OPENSEARCH_ADMIN_PASSWORD, then prompt for input.\",\n            type=str,\n            default=os.environ.get(\"OPENSEARCH_ADMIN_PASSWORD\", \"\"),\n        )\n        parser.add_argument(\n            \"--no-ssl\", help=\"Disable SSL.\", action=\"store_true\", default=False\n        )\n        parser.add_argument(\n            \"--no-verify-certs\",\n            help=\"Disable certificate verification (for self-signed certs).\",\n            action=\"store_true\",\n            default=False,\n        )\n        parser.add_argument(\n            \"--use-aws-managed-opensearch\",\n            help=\"Whether to use AWS-managed OpenSearch. If not provided, will fall back to checking \"\n            \"USING_AWS_MANAGED_OPENSEARCH=='true', then default to False.\",\n            action=argparse.BooleanOptionalAction,\n            default=os.environ.get(\"USING_AWS_MANAGED_OPENSEARCH\", \"\").lower()\n            == \"true\",\n        )\n\n    parser = argparse.ArgumentParser(\n        description=\"A utility to interact with OpenSearch.\"\n    )\n    add_standard_arguments(parser)\n    subparsers = parser.add_subparsers(\n        dest=\"command\", help=\"Command to execute.\", required=True\n    )\n\n    subparsers.add_parser(\"list\", help=\"List all indices with info.\")\n\n    delete_parser = subparsers.add_parser(\"delete\", help=\"Delete an index.\")\n    delete_parser.add_argument(\"index\", help=\"Index name.\", type=str)\n\n    args = parser.parse_args()\n\n    if not (host := args.host or input(\"Enter the OpenSearch host: \")):\n        print(\"Error: OpenSearch host is required.\")\n        sys.exit(1)\n    if not (port := args.port or int(input(\"Enter the OpenSearch port: \"))):\n        print(\"Error: OpenSearch port is required.\")\n        sys.exit(1)\n    if not (username := args.username or input(\"Enter the OpenSearch username: \")):\n        print(\"Error: OpenSearch username is required.\")\n        sys.exit(1)\n    if not (password := args.password or input(\"Enter the OpenSearch password: \")):\n        print(\"Error: OpenSearch password is required.\")\n        sys.exit(1)\n    print(\"Using AWS-managed OpenSearch: \", args.use_aws_managed_opensearch)\n    print(f\"MULTI_TENANT: {MULTI_TENANT}\")\n\n    with (\n        OpenSearchIndexClient(\n            index_name=args.index,\n            host=host,\n            port=port,\n            auth=(username, password),\n            use_ssl=not args.no_ssl,\n            verify_certs=not args.no_verify_certs,\n        )\n        if args.command == \"delete\"\n        else OpenSearchClient(\n            host=host,\n            port=port,\n            auth=(username, password),\n            use_ssl=not args.no_ssl,\n            verify_certs=not args.no_verify_certs,\n        )\n    ) as client:\n        if not client.ping():\n            print(\"Error: Could not connect to OpenSearch.\")\n            sys.exit(1)\n\n        if args.command == \"list\":\n            list_indices(client)\n        elif args.command == \"delete\":\n            delete_index(client)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/debugging/opensearch/query_hierarchy_debug.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nDebug utility for querying and inspecting hierarchy data in OpenSearch.\n\nThis script connects to OpenSearch and allows you to:\n- Query documents by ID and view their hierarchy ancestor node IDs\n- List documents that have hierarchy data\n\nUsage:\n    python query_hierarchy_debug.py --document-id <doc_id>\n    python query_hierarchy_debug.py --list-with-hierarchy\n\nEnvironment Variables:\n    OPENSEARCH_HOST: OpenSearch host (default: localhost)\n    OPENSEARCH_PORT: OpenSearch port (default: 9200)\n\nDependencies:\n    pip install opensearch-py\n\"\"\"\n\nimport argparse\nimport os\nimport sys\n\ntry:\n    from opensearchpy import OpenSearch\nexcept ImportError as e:\n    print(\"Error: Missing dependency. Run: pip install opensearch-py\")\n    print(f\"Details: {e}\")\n    sys.exit(1)\n\n\ndef get_client() -> OpenSearch:\n    \"\"\"Create OpenSearch client from environment variables.\"\"\"\n    host = os.environ.get(\"OPENSEARCH_HOST\", \"localhost\")\n    port = int(os.environ.get(\"OPENSEARCH_PORT\", \"9200\"))\n    return OpenSearch(\n        hosts=[{\"host\": host, \"port\": port}],\n        http_auth=None,  # Add auth if needed\n        use_ssl=False,\n    )\n\n\ndef query_document(client: OpenSearch, index: str, doc_id: str) -> None:\n    \"\"\"Query a specific document and view its hierarchy ancestor node IDs.\"\"\"\n    query = {\"query\": {\"term\": {\"document_id\": doc_id}}, \"size\": 10}\n\n    result = client.search(index=index, body=query)\n    hits = result.get(\"hits\", {}).get(\"hits\", [])\n\n    if not hits:\n        print(f\"No document found with ID: {doc_id}\")\n        return\n\n    print(f\"Found {len(hits)} chunk(s) for document ID: {doc_id}\\n\")\n\n    for hit in hits:\n        source = hit.get(\"_source\", {})\n        ancestor_ids = source.get(\"ancestor_hierarchy_node_ids\", [])\n\n        print(f\"  Chunk Index: {source.get('chunk_index')}\")\n        print(f\"  Semantic ID: {source.get('semantic_identifier', 'N/A')}\")\n\n        if ancestor_ids:\n            print(f\"  Ancestor Node IDs: {ancestor_ids}\")\n        else:\n            print(\"  Ancestor Node IDs: (none)\")\n        print()\n\n\ndef list_with_hierarchy(client: OpenSearch, index: str, limit: int = 10) -> None:\n    \"\"\"List documents that have hierarchy data.\"\"\"\n    query = {\n        \"query\": {\"exists\": {\"field\": \"ancestor_hierarchy_node_ids\"}},\n        \"size\": limit,\n        \"_source\": [\n            \"document_id\",\n            \"chunk_index\",\n            \"ancestor_hierarchy_node_ids\",\n            \"semantic_identifier\",\n        ],\n    }\n\n    result = client.search(index=index, body=query)\n    hits = result.get(\"hits\", {}).get(\"hits\", [])\n\n    print(f\"Found {len(hits)} document chunks with hierarchy data (limit: {limit}):\\n\")\n\n    for hit in hits:\n        source = hit.get(\"_source\", {})\n        ancestor_ids = source.get(\"ancestor_hierarchy_node_ids\", [])\n\n        print(f\"  {source.get('document_id')} (chunk {source.get('chunk_index')})\")\n        print(f\"    Semantic ID: {source.get('semantic_identifier', 'N/A')}\")\n        print(f\"    Ancestors: {ancestor_ids}\\n\")\n\n\ndef list_indices(client: OpenSearch) -> None:\n    \"\"\"List available indices.\"\"\"\n    indices = client.indices.get_alias(index=\"*\")\n    print(\"Available indices:\")\n    for index_name in sorted(indices.keys()):\n        if not index_name.startswith(\".\"):  # Skip system indices\n            print(f\"  - {index_name}\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"Debug hierarchy data in OpenSearch\")\n    parser.add_argument(\"--document-id\", help=\"Query a specific document by ID\")\n    parser.add_argument(\n        \"--list-with-hierarchy\",\n        action=\"store_true\",\n        help=\"List documents with hierarchy data\",\n    )\n    parser.add_argument(\"--list-indices\", action=\"store_true\", help=\"List all indices\")\n    parser.add_argument(\"--index\", default=\"onyx_index\", help=\"OpenSearch index name\")\n    parser.add_argument(\"--limit\", type=int, default=10, help=\"Limit for list queries\")\n\n    args = parser.parse_args()\n\n    client = get_client()\n\n    if args.list_indices:\n        list_indices(client)\n    elif args.document_id:\n        query_document(client, args.index, args.document_id)\n    elif args.list_with_hierarchy:\n        list_with_hierarchy(client, args.index, args.limit)\n    else:\n        parser.print_help()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/decrypt.py",
    "content": "\"\"\"Decrypt a raw hex-encoded credential value.\n\nUsage:\n    python -m scripts.decrypt <hex_value>\n    python -m scripts.decrypt <hex_value> --key \"my-encryption-key\"\n    python -m scripts.decrypt <hex_value> --key \"\"\n\nPass --key \"\" to skip decryption and just decode the raw bytes as UTF-8.\nOmit --key to use the current ENCRYPTION_KEY_SECRET from the environment.\n\"\"\"\n\nimport argparse\nimport binascii\nimport json\nimport os\nimport sys\n\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.utils.encryption import decrypt_bytes_to_string  # noqa: E402\nfrom onyx.utils.variable_functionality import global_version  # noqa: E402\n\n\ndef decrypt_raw_credential(encrypted_value: str, key: str | None = None) -> None:\n    \"\"\"Decrypt and display a raw encrypted credential value.\n\n    Args:\n        encrypted_value: The hex-encoded encrypted credential value.\n        key: Encryption key to use. None means use ENCRYPTION_KEY_SECRET,\n             empty string means just decode as UTF-8.\n    \"\"\"\n    # Strip common hex prefixes\n    if encrypted_value.startswith(\"\\\\x\"):\n        encrypted_value = encrypted_value[2:]\n    elif encrypted_value.startswith(\"x\"):\n        encrypted_value = encrypted_value[1:]\n    print(encrypted_value)\n\n    try:\n        raw_bytes = binascii.unhexlify(encrypted_value)\n    except binascii.Error:\n        print(\"Error: Invalid hex-encoded string\")\n        sys.exit(1)\n\n    if key == \"\":\n        # Empty key → just decode as UTF-8, no decryption\n        try:\n            decrypted_str = raw_bytes.decode(\"utf-8\")\n        except UnicodeDecodeError as e:\n            print(f\"Error decoding bytes as UTF-8: {e}\")\n            sys.exit(1)\n    else:\n        print(key)\n        try:\n            decrypted_str = decrypt_bytes_to_string(raw_bytes, key=key)\n        except Exception as e:\n            print(f\"Error decrypting value: {e}\")\n            sys.exit(1)\n\n    # Try to pretty-print as JSON, otherwise print raw\n    try:\n        parsed = json.loads(decrypted_str)\n        print(json.dumps(parsed, indent=2))\n    except json.JSONDecodeError:\n        print(decrypted_str)\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Decrypt a hex-encoded credential value.\"\n    )\n    parser.add_argument(\n        \"value\",\n        help=\"Hex-encoded encrypted value to decrypt.\",\n    )\n    parser.add_argument(\n        \"--key\",\n        default=None,\n        help=(\n            \"Encryption key. Omit to use ENCRYPTION_KEY_SECRET from env. \"\n            'Pass \"\" (empty) to just decode as UTF-8 without decryption.'\n        ),\n    )\n    args = parser.parse_args()\n\n    global_version.set_ee()\n    decrypt_raw_credential(args.value, key=args.key)\n    global_version.unset_ee()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/dev_run_background_jobs.py",
    "content": "import subprocess\nimport threading\n\n\ndef monitor_process(process_name: str, process: subprocess.Popen) -> None:\n    assert process.stdout is not None\n\n    while True:\n        output = process.stdout.readline()\n\n        if output:\n            print(f\"{process_name}: {output.strip()}\")\n\n        if process.poll() is not None:\n            break\n\n\ndef run_jobs() -> None:\n    cmd_worker_primary = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.primary\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=6\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=primary@%n\",\n        \"-Q\",\n        \"celery\",\n    ]\n\n    cmd_worker_light = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.light\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=16\",\n        \"--prefetch-multiplier=8\",\n        \"--loglevel=INFO\",\n        \"--hostname=light@%n\",\n        \"-Q\",\n        \"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration\",\n    ]\n\n    cmd_worker_docprocessing = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.docprocessing\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=6\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=docprocessing@%n\",\n        \"--queues=docprocessing\",\n    ]\n\n    cmd_worker_docfetching = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.docfetching\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=1\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=docfetching@%n\",\n        \"--queues=connector_doc_fetching\",\n    ]\n\n    cmd_worker_heavy = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.heavy\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=4\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=heavy@%n\",\n        \"-Q\",\n        \"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox\",\n    ]\n\n    cmd_worker_monitoring = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.monitoring\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=1\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=monitoring@%n\",\n        \"-Q\",\n        \"monitoring\",\n    ]\n\n    cmd_worker_user_file_processing = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.user_file_processing\",\n        \"worker\",\n        \"--pool=threads\",\n        \"--concurrency=2\",\n        \"--prefetch-multiplier=1\",\n        \"--loglevel=INFO\",\n        \"--hostname=user_file_processing@%n\",\n        \"-Q\",\n        \"user_file_processing,user_file_project_sync,user_file_delete\",\n    ]\n\n    cmd_beat = [\n        \"celery\",\n        \"-A\",\n        \"onyx.background.celery.versioned_apps.beat\",\n        \"beat\",\n        \"--loglevel=INFO\",\n    ]\n\n    all_workers = [\n        (\"PRIMARY\", cmd_worker_primary),\n        (\"LIGHT\", cmd_worker_light),\n        (\"DOCPROCESSING\", cmd_worker_docprocessing),\n        (\"DOCFETCHING\", cmd_worker_docfetching),\n        (\"HEAVY\", cmd_worker_heavy),\n        (\"MONITORING\", cmd_worker_monitoring),\n        (\"USER_FILE_PROCESSING\", cmd_worker_user_file_processing),\n        (\"BEAT\", cmd_beat),\n    ]\n\n    processes = []\n    for name, cmd in all_workers:\n        process = subprocess.Popen(\n            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True\n        )\n        processes.append((name, process))\n\n    threads = []\n    for name, process in processes:\n        thread = threading.Thread(target=monitor_process, args=(name, process))\n        threads.append(thread)\n        thread.start()\n\n    for thread in threads:\n        thread.join()\n\n\nif __name__ == \"__main__\":\n    run_jobs()\n"
  },
  {
    "path": "backend/scripts/docker_memory_tracking.sh",
    "content": "#!/bin/bash\n\n# USAGE: nohup ./docker_memory_tracking.sh &\n\n# Set default output file or use the provided argument\nOUTPUT_FILE=\"./docker_stats.log\"\nif [ $# -ge 1 ]; then\n    OUTPUT_FILE=\"$1\"\nfi\n\nINTERVAL_SECONDS=600  # 10 minutes\n\n# Create the output file if it doesn't exist, or append to it if it does\ntouch \"$OUTPUT_FILE\"\n\necho \"Docker stats will be collected every 10 minutes and saved to $OUTPUT_FILE\"\necho \"Press Ctrl+C to stop the script\"\n\n# Function to handle script termination\ncleanup() {\n    echo -e \"\\nStopping docker stats collection\"\n    exit 0\n}\n\n# Set up trap for clean exit\ntrap cleanup SIGINT SIGTERM\n\n# Main loop\nwhile true; do\n    # Add timestamp\n    echo -e \"\\n--- Docker Stats: $(date) ---\" >> \"$OUTPUT_FILE\"\n    \n    # Run docker stats for a single snapshot (--no-stream ensures it runs once)\n    docker stats --no-stream --all >> \"$OUTPUT_FILE\"\n    \n    # Wait for the next interval\n    echo \"Stats collected at $(date). Next collection in 10 minutes.\"\n    sleep $INTERVAL_SECONDS\ndone\n"
  },
  {
    "path": "backend/scripts/force_delete_connector_by_id.py",
    "content": "import argparse\nimport os\nimport sys\n\nfrom sqlalchemy import delete\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.document import delete_documents_complete__no_commit\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.db.tag import delete_orphan_tags__no_commit\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n# Modify sys.path\ncurrent_dir = os.path.dirname(os.path.abspath(__file__))\nparent_dir = os.path.dirname(current_dir)\nsys.path.append(parent_dir)\n\n# pylint: disable=E402\n# flake8: noqa: E402\n\n# Now import Onyx modules\nfrom onyx.db.models import (\n    DocumentSet__ConnectorCredentialPair,\n    UserGroup__ConnectorCredentialPair,\n)\nfrom onyx.db.connector import fetch_connector_by_id\nfrom onyx.db.document import get_documents_for_connector_credential_pair\nfrom onyx.db.index_attempt import (\n    delete_index_attempts,\n    cancel_indexing_attempts_for_ccpair,\n)\nfrom onyx.db.permission_sync_attempt import (\n    delete_doc_permission_sync_attempts__no_commit,\n)\nfrom onyx.db.permission_sync_attempt import (\n    delete_external_group_permission_sync_attempts__no_commit,\n)\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.utils.logger import setup_logger\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.connector_credential_pair import (\n    get_connector_credential_pair_from_id,\n    get_connector_credential_pair,\n)\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.document_index.factory import (\n    get_all_document_indices,\n)\nfrom onyx.file_store.file_store import get_default_file_store\n\n# pylint: enable=E402\n# flake8: noqa: E402\n\n\nlogger = setup_logger()\n\n_DELETION_BATCH_SIZE = 1000\n\n\ndef _unsafe_deletion(\n    db_session: Session,\n    document_indices: list[DocumentIndex],\n    cc_pair: ConnectorCredentialPair,\n    pair_id: int,\n) -> int:\n    connector_id = cc_pair.connector_id\n    credential_id = cc_pair.credential_id\n\n    num_docs_deleted = 0\n\n    # Gather and delete documents\n    while True:\n        documents = get_documents_for_connector_credential_pair(\n            db_session=db_session,\n            connector_id=connector_id,\n            credential_id=credential_id,\n            limit=_DELETION_BATCH_SIZE,\n        )\n        if not documents:\n            break\n\n        for document in documents:\n            for document_index in document_indices:\n                document_index.delete_single(\n                    doc_id=document.id,\n                    tenant_id=POSTGRES_DEFAULT_SCHEMA,\n                    chunk_count=document.chunk_count,\n                )\n\n        delete_documents_complete__no_commit(\n            db_session=db_session,\n            document_ids=[document.id for document in documents],\n        )\n        delete_orphan_tags__no_commit(db_session=db_session)\n\n        num_docs_deleted += len(documents)\n\n    # Delete index attempts\n    delete_index_attempts(\n        db_session=db_session,\n        cc_pair_id=cc_pair.id,\n    )\n\n    # Delete permission sync attempts\n    delete_doc_permission_sync_attempts__no_commit(\n        db_session=db_session,\n        cc_pair_id=cc_pair.id,\n    )\n    delete_external_group_permission_sync_attempts__no_commit(\n        db_session=db_session,\n        cc_pair_id=cc_pair.id,\n    )\n\n    # Delete document sets\n    stmt = delete(DocumentSet__ConnectorCredentialPair).where(\n        DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == pair_id\n    )\n    db_session.execute(stmt)\n\n    # delete user group associations\n    stmt = delete(UserGroup__ConnectorCredentialPair).where(\n        UserGroup__ConnectorCredentialPair.cc_pair_id == pair_id\n    )\n    db_session.execute(stmt)\n\n    # need to flush to avoid foreign key violations\n    db_session.flush()\n\n    # delete the actual connector credential pair\n    stmt = delete(ConnectorCredentialPair).where(\n        ConnectorCredentialPair.connector_id == connector_id,\n        ConnectorCredentialPair.credential_id == credential_id,\n    )\n    db_session.execute(stmt)\n\n    # Delete Connector\n    connector = fetch_connector_by_id(\n        db_session=db_session,\n        connector_id=connector_id,\n    )\n    if not connector or not len(connector.credentials):\n        logger.debug(\"Found no credentials left for connector, deleting connector\")\n        db_session.delete(connector)\n    db_session.commit()\n\n    logger.notice(\n        \"Successfully deleted connector_credential_pair with connector_id:\"\n        f\" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs.\"\n    )\n    return num_docs_deleted\n\n\ndef _delete_connector(cc_pair_id: int, db_session: Session) -> None:\n    user_input = input(\n        \"DO NOT USE THIS UNLESS YOU KNOW WHAT YOU ARE DOING. \\\n        IT MAY CAUSE ISSUES with your Onyx instance! \\\n        Are you SURE you want to continue? (enter 'Y' to continue): \"\n    )\n    if user_input != \"Y\":\n        logger.notice(f\"You entered {user_input}. Exiting!\")\n        return\n\n    logger.notice(\"Getting connector credential pair\")\n    cc_pair = get_connector_credential_pair_from_id(\n        db_session=db_session,\n        cc_pair_id=cc_pair_id,\n    )\n\n    if not cc_pair:\n        logger.error(f\"Connector credential pair with ID {cc_pair_id} not found\")\n        return\n\n    if cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:\n        logger.error(\n            f\"Connector {cc_pair.connector.name} is active, cannot continue. \\\n            Please navigate to the connector and pause before attempting again\"\n        )\n        return\n\n    connector_id = cc_pair.connector_id\n    credential_id = cc_pair.credential_id\n\n    if cc_pair is None:\n        logger.error(\n            f\"Connector with ID '{connector_id}' and credential ID \"\n            f\"'{credential_id}' does not exist. Has it already been deleted?\",\n        )\n        return\n\n    logger.notice(\"Cancelling indexing attempt for the connector\")\n    cancel_indexing_attempts_for_ccpair(\n        cc_pair_id=cc_pair_id, db_session=db_session, include_secondary_index=True\n    )\n\n    validated_cc_pair = get_connector_credential_pair(\n        db_session=db_session,\n        connector_id=connector_id,\n        credential_id=credential_id,\n    )\n\n    if not validated_cc_pair:\n        logger.error(\n            f\"Cannot run deletion attempt - connector_credential_pair with Connector ID: \"\n            f\"{connector_id} and Credential ID: {credential_id} does not exist.\"\n        )\n\n    file_ids: list[str] = (\n        cc_pair.connector.connector_specific_config[\"file_locations\"]\n        if cc_pair.connector.source == DocumentSource.FILE\n        else []\n    )\n    try:\n        logger.notice(\"Deleting information from Vespa and Postgres\")\n        active_search_settings = get_active_search_settings(db_session)\n        # This flow is for deletion so we get all indices.\n        document_indices = get_all_document_indices(\n            active_search_settings.primary,\n            active_search_settings.secondary,\n            None,\n        )\n\n        files_deleted_count = _unsafe_deletion(\n            db_session=db_session,\n            document_indices=document_indices,\n            cc_pair=cc_pair,\n            pair_id=cc_pair_id,\n        )\n        logger.notice(f\"Deleted {files_deleted_count} files!\")\n\n    except Exception as e:\n        logger.error(f\"Failed to delete connector due to {e}\")\n\n    if file_ids:\n        logger.notice(\"Deleting stored files!\")\n        file_store = get_default_file_store()\n        for file_id in file_ids:\n            logger.notice(f\"Deleting file {file_id}\")\n            file_store.delete_file(file_id)\n\n    db_session.commit()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Delete a connector by its ID\")\n    parser.add_argument(\n        \"connector_id\", type=int, help=\"The ID of the connector to delete\"\n    )\n\n    args = parser.parse_args()\n    with get_session_with_current_tenant() as db_session:\n        _delete_connector(args.connector_id, db_session)\n"
  },
  {
    "path": "backend/scripts/get_wikidocs.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to pull Wikipedia documents from Hugging Face and organize them into zip files.\n\nUsage:\n    python get_wikidocs.py --total 1000 --per-zip 100 --output ./wikidata_zips\n\"\"\"\n\nimport argparse\nimport os\nimport re\nimport zipfile\nfrom pathlib import Path\n\nfrom datasets import load_dataset  # type: ignore\nfrom tqdm import tqdm  # type: ignore\n\n\ndef sanitize_filename(title: str) -> str:\n    \"\"\"\n    Sanitize a title for use as a filename.\n\n    - Remove special characters\n    - Replace whitespaces with underscores\n    - Limit length to avoid filesystem issues\n\n    Args:\n        title: The Wikipedia page title\n\n    Returns:\n        Sanitized filename string\n    \"\"\"\n    # Replace whitespace with underscores\n    sanitized = re.sub(r\"\\s+\", \"_\", title)\n\n    # Remove special characters, keep alphanumeric, underscores, and hyphens\n    sanitized = re.sub(r\"[^a-zA-Z0-9_\\-]\", \"\", sanitized)\n\n    # Limit length to 200 characters to avoid filesystem issues\n    if len(sanitized) > 200:\n        sanitized = sanitized[:200]\n\n    # Ensure it's not empty after sanitization\n    if not sanitized:\n        sanitized = \"untitled\"\n\n    return sanitized\n\n\ndef stream_wikipedia_to_zips(\n    total_pages: int,\n    pages_per_zip: int,\n    output_dir: str = \".\",\n    dataset_name: str = \"wikipedia\",\n    dataset_config: str = \"20220301.en\",\n) -> None:\n    \"\"\"\n    Stream Wikipedia pages from Hugging Face and write them to zip files.\n\n    Args:\n        total_pages: Total number of Wikipedia pages to download\n        pages_per_zip: Number of pages to include in each zip file\n        output_dir: Directory where zip files will be saved\n        dataset_name: Name of the dataset on Hugging Face\n        dataset_config: Configuration/version of the dataset\n    \"\"\"\n    # Create output directory if it doesn't exist\n    output_path = Path(output_dir)\n    output_path.mkdir(parents=True, exist_ok=True)\n\n    print(\"Loading Wikipedia dataset from Hugging Face (streaming mode)...\")\n    print(f\"Dataset: {dataset_name}, Config: {dataset_config}\")\n\n    # Load dataset in streaming mode\n    dataset = load_dataset(\n        dataset_name,\n        dataset_config,\n        split=\"train\",\n        streaming=True,\n        trust_remote_code=True,\n    )\n\n    # Initialize counters\n    current_zip_index = 0\n    pages_in_current_zip = 0\n    current_zip = None\n    zip_path = None\n\n    # Process pages with progress bar\n    with tqdm(total=total_pages, desc=\"Processing Wikipedia pages\") as pbar:\n        for idx, page in enumerate(dataset):\n            if idx >= total_pages:\n                break\n\n            # Create new zip file if needed\n            if pages_in_current_zip == 0 or pages_in_current_zip >= pages_per_zip:\n                # Close previous zip if exists\n                if current_zip is not None:\n                    current_zip.close()\n                    print(f\"\\nCompleted: {zip_path} ({pages_in_current_zip} pages)\")\n\n                # Create new zip\n                zip_path = output_path / f\"wiki_data_{current_zip_index}.zip\"\n                current_zip = zipfile.ZipFile(zip_path, \"w\", zipfile.ZIP_DEFLATED)\n                current_zip_index += 1\n                pages_in_current_zip = 0\n\n            # Extract page data\n            title = page.get(\"title\", f\"page_{idx}\")\n            text = page.get(\"text\", \"\")\n\n            # Create sanitized filename\n            filename = f\"{sanitize_filename(title)}.txt\"\n\n            # Ensure current_zip is not None (should always be created in the if block above)\n            if current_zip is None:\n                raise RuntimeError(\"Zip file was not properly initialized\")\n\n            # Handle potential duplicate filenames within the same zip\n            base_filename = filename\n            counter = 1\n            while filename in current_zip.namelist():\n                name, ext = os.path.splitext(base_filename)\n                filename = f\"{name}_{counter}{ext}\"\n                counter += 1\n\n            # Write page content to zip\n            page_content = f\"Title: {title}\\n\\n{text}\"\n            current_zip.writestr(filename, page_content)\n\n            pages_in_current_zip += 1\n            pbar.update(1)\n\n    # Close final zip file\n    if current_zip is not None:\n        current_zip.close()\n        print(f\"\\nCompleted: {zip_path} ({pages_in_current_zip} pages)\")\n\n    print(f\"\\nSuccessfully created {current_zip_index} zip file(s) in {output_dir}\")\n    print(f\"Total pages processed: {min(total_pages, idx + 1)}\")\n\n\ndef main() -> int:\n    \"\"\"Main entry point for the script.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Pull Wikipedia documents from Hugging Face and organize into zip files\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n\n    parser.add_argument(\n        \"--total\",\n        type=int,\n        required=True,\n        help=\"Total number of Wikipedia pages to download\",\n    )\n\n    parser.add_argument(\n        \"--per-zip\",\n        type=int,\n        required=True,\n        help=\"Number of pages to include in each zip file\",\n    )\n\n    parser.add_argument(\n        \"--output\", type=str, default=\".\", help=\"Output directory for zip files\"\n    )\n\n    parser.add_argument(\n        \"--dataset\",\n        type=str,\n        default=\"wikipedia\",\n        help=\"Name of the Wikipedia dataset on Hugging Face\",\n    )\n\n    parser.add_argument(\n        \"--config\",\n        type=str,\n        default=\"20220301.en\",\n        help=\"Dataset configuration (e.g., '20220301.en' for English Wikipedia from March 2022)\",\n    )\n\n    args = parser.parse_args()\n\n    # Validate arguments\n    if args.total <= 0:\n        parser.error(\"--total must be a positive integer\")\n\n    if args.per_zip <= 0:\n        parser.error(\"--per-zip must be a positive integer\")\n\n    print(\"=\" * 70)\n    print(\"Wikipedia Data Extractor\")\n    print(\"=\" * 70)\n    print(f\"Total pages: {args.total}\")\n    print(f\"Pages per zip: {args.per_zip}\")\n    print(f\"Output directory: {args.output}\")\n    print(f\"Expected zip files: {(args.total + args.per_zip - 1) // args.per_zip}\")\n    print(\"=\" * 70)\n    print()\n\n    try:\n        stream_wikipedia_to_zips(\n            total_pages=args.total,\n            pages_per_zip=args.per_zip,\n            output_dir=args.output,\n            dataset_name=args.dataset,\n            dataset_config=args.config,\n        )\n    except KeyboardInterrupt:\n        print(\"\\n\\nProcess interrupted by user\")\n    except Exception as e:\n        print(f\"\\nError: {e}\")\n        import traceback\n\n        traceback.print_exc()\n        return 1\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    exit(main())\n"
  },
  {
    "path": "backend/scripts/hard_delete_chats.py",
    "content": "import os\nimport sys\n\n\n# Ensure PYTHONPATH is set up for direct script execution\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nprint(parent_dir)\nsys.path.append(parent_dir)\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant  # noqa: E402\nfrom onyx.db.engine.sql_engine import SqlEngine  # noqa: E402\nfrom onyx.db.models import ChatSession  # noqa: E402\nfrom onyx.db.chat import delete_chat_session  # noqa: E402\n\n\ndef main() -> None:\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    with get_session_with_current_tenant() as db_session:\n        deleted_sessions = (\n            db_session.query(ChatSession).filter(ChatSession.deleted.is_(True)).all()\n        )\n        if not deleted_sessions:\n            print(\"No deleted chat sessions found.\")\n            return\n        print(f\"Found {len(deleted_sessions)} deleted chat sessions:\")\n        for session in deleted_sessions:\n            print(f\"  - ID: {session.id} | deleted: {session.deleted}\")\n        confirm = input(\n            \"\\nAre you sure you want to hard delete these sessions? Type 'yes' to confirm: \"\n        )\n        if confirm.strip().lower() != \"yes\":\n            print(\"Aborted by user.\")\n            return\n        total = 0\n        for session in deleted_sessions:\n            print(f\"Deleting {session.id}\")\n            try:\n                delete_chat_session(\n                    user_id=None,\n                    chat_session_id=session.id,\n                    db_session=db_session,\n                    include_deleted=True,\n                    hard_delete=True,\n                )\n                total += 1\n            except Exception as e:\n                print(f\"Error deleting session {session.id}: {e}\")\n        print(f\"Deleted {total}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/lib/logger.py",
    "content": "from __future__ import annotations\n\nimport logging\nimport os\nimport sys\n\n# Detect CI environment\nIS_CI = os.getenv(\"CI\", \"\").lower() == \"true\"\nIS_DEBUG = os.getenv(\"DEBUG\", \"\").lower() == \"true\"\n\n# ANSI color codes for local terminal\nGRAY = \"\\033[90m\"\nRED = \"\\033[91m\"\nYELLOW = \"\\033[93m\"\nCYAN = \"\\033[96m\"\nRESET = \"\\033[0m\"\n\n\nclass CIFormatter(logging.Formatter):\n    \"\"\"\n    Formatter that emits GitHub Actions workflow commands in CI,\n    or colored output locally.\n    \"\"\"\n\n    def format(self, record: logging.LogRecord) -> str:\n        msg = record.getMessage()\n        metadata = getattr(record, \"extra\", {})\n\n        # Use standard extra fields as GitHub Actions metadata\n        meta_fields = [\"file\", \"line\", \"col\", \"endLine\", \"endColumn\"]\n        metadata = {k: getattr(record, k) for k in meta_fields if hasattr(record, k)}\n\n        if IS_CI and record.levelno >= logging.WARNING:\n            command = \"error\" if record.levelno >= logging.ERROR else \"warning\"\n            meta_str = \",\".join(f\"{k}={v}\" for k, v in metadata.items())\n            if meta_str:\n                return f\"::{command} {meta_str}::{msg}\"\n            else:\n                return f\"::{command}::{msg}\"\n\n        # Local colored output\n        if record.levelno >= logging.ERROR:\n            return f\"{RED}Error:{RESET} {msg}\"\n        elif record.levelno >= logging.WARNING:\n            return f\"{YELLOW}Warning:{RESET} {msg}\"\n        elif record.levelno >= logging.INFO:\n            return f\"{CYAN}Info:{RESET} {msg}\"\n        elif record.levelno >= logging.DEBUG:\n            return f\"{GRAY}Debug:{RESET} {msg}\"\n        return msg\n\n\ndef getLogger(name: str | None = None, level: int | None = None) -> logging.Logger:\n    \"\"\"\n    Get a CI-aware logger.\n    \"\"\"\n    logger = logging.getLogger(name)\n    if level is None:\n        level = logging.DEBUG if IS_DEBUG else logging.INFO\n    logger.setLevel(level)\n\n    if not logger.hasHandlers():\n        handler = logging.StreamHandler(sys.stdout)\n        handler.setFormatter(CIFormatter())\n        logger.addHandler(handler)\n\n    return logger\n"
  },
  {
    "path": "backend/scripts/make_foss_repo.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\necho \"=== Building FOSS mirror ===\"\nrm -rf /tmp/foss_repo && mkdir -p /tmp/foss_repo\ngit clone . /tmp/foss_repo\ncd /tmp/foss_repo\n\necho \"=== Creating MIT license file ===\"\ncat > /tmp/mit_license.txt << 'EOF'\nCopyright (c) 2023-present DanswerAI, Inc.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\nEOF\n\n# NOTE: intentionally keeping the web/src/app/ee directory\n# for now since there's no clean way to remove it\necho \"=== Removing enterprise directory and licenses from history ===\"\ngit filter-repo \\\n  --path backend/ee --invert-paths \\\n  --path backend/ee/LICENSE --invert-paths \\\n  --path web/src/app/ee/LICENSE --invert-paths \\\n  --force\n\n# NOTE: not ideal, since this means every day folks with the repo\n# locally will need to hard reset if they want to pull in more stuff.\necho \"=== Recreating empty enterprise directory ===\"\nmkdir -p backend/ee\ntouch backend/ee/__init__.py\ngit add backend/ee\n\necho \"=== Updating README ===\"\n\ncat > /tmp/foss_notice.txt << 'EOF'\n\n> [!NOTE]\n> **This is the FOSS (Free and Open Source Software) version of Onyx**\n> \n> This repository is 100% MIT-licensed and automatically synced with the [main Onyx repository](https://github.com/onyx-dot-app/onyx). The [main repository](https://github.com/onyx-dot-app/onyx) is recommended for most users. This FOSS version is maintained for users with strict open-source licensing requirements.\n> \n> ---\n\nEOF\n\nsed -i '/<a name=\"readme-top\"><\\/a>/r /tmp/foss_notice.txt' README.md\nsed -i 's/utm_source=onyx_repo/utm_source=foss_repo/g' README.md\n\ngit add README.md\ngit commit -m \"README\"\n\necho \"=== Creating blob callback script ===\"\ncat > /tmp/license_replacer.py << 'PYEOF'\n#!/usr/bin/env python3\nimport sys\n\n# Read MIT license from file\nwith open('/tmp/mit_license.txt', 'rb') as f:\n    MIT_LICENSE = f.read()\n\nimport git_filter_repo as fr\n\nreplaced_count = 0\n\ndef replace_license_blob_content(blob, metadata):\n    \"\"\"Replace LICENSE blob content with MIT license based on content detection\"\"\"\n    global replaced_count\n\n    # Check if this blob looks like a license file\n    # We'll replace any blob that contains the old Apache/custom license text\n    if blob.data and len(blob.data) > 100:\n        # Check for license-like content\n        # Unfortunately, we don't have access to the path, so we can't just check that the path\n        # is `LICENSE`.\n        data_lower = blob.data.lower()\n        if (\n            b'portions of this software are licensed as follows' in data_lower and\n            b'all third party components incorporated into the' in data_lower\n        ):\n            # Additional check: make sure it's actually a license file, not source code\n            # License files typically don't have common code patterns\n            if b'def ' not in blob.data and b'class ' not in blob.data and b'import ' not in blob.data[:200]:\n                blob.data = MIT_LICENSE\n                replaced_count += 1\n\nargs = fr.FilteringOptions.parse_args(['--force'], error_on_empty=False)\nfilter_obj = fr.RepoFilter(args, blob_callback=replace_license_blob_content)\nfilter_obj.run()\n\nprint(f\"Replaced {replaced_count} LICENSE blob(s)\", file=sys.stderr)\nPYEOF\n\necho \"=== Replacing LICENSE file in all commits ===\"\nchmod +x /tmp/license_replacer.py\n/tmp/license_replacer.py\n\necho \"=== Done building FOSS repo ===\"\n"
  },
  {
    "path": "backend/scripts/onyx_openapi_schema.py",
    "content": "# export openapi schema without having to start the actual web server\n\n# helpful tips: https://github.com/fastapi/fastapi/issues/1173\n\nimport argparse\nimport json\nimport os\nimport subprocess\nimport sys\n\nfrom fastapi import FastAPI\nfrom fastapi.openapi.utils import get_openapi\n\nfrom onyx.main import app as app_fn\n\nOPENAPI_VERSION = \"3.1.0\"\n\n\ndef go(filename: str, tagged_for_docs: str | None = None) -> None:\n    \"\"\"Generate OpenAPI schema.\n\n    By default outputs tag-stripped schema (for client generation).\n    If tagged_for_docs is provided, also outputs the original tagged version for docs.\n    \"\"\"\n    app: FastAPI = app_fn()\n    app.openapi_version = OPENAPI_VERSION\n    schema = get_openapi(\n        title=app.title,\n        version=app.version,\n        openapi_version=app.openapi_version,\n        description=app.description,\n        routes=app.routes,\n    )\n\n    # Output tagged version for docs if requested\n    if tagged_for_docs:\n        with open(tagged_for_docs, \"w\") as f:\n            json.dump(schema, f)\n        print(f\"Wrote tagged OpenAPI schema to {tagged_for_docs}\")\n\n    # Output stripped version (default) for client generation\n    stripped = strip_tags_from_schema(schema)\n    with open(filename, \"w\") as f:\n        json.dump(stripped, f)\n    print(f\"Wrote OpenAPI schema to {filename}.\")\n\n\ndef strip_tags_from_schema(schema: dict) -> dict:\n    \"\"\"Strip tags from OpenAPI schema so openapi-generator puts all endpoints in DefaultApi.\"\"\"\n    import copy\n\n    schema = copy.deepcopy(schema)\n\n    # Remove tags from all operations\n    if \"paths\" in schema:\n        for path_item in schema[\"paths\"].values():\n            for operation in path_item.values():\n                if isinstance(operation, dict) and \"tags\" in operation:\n                    del operation[\"tags\"]\n\n    # Remove top-level tags definition\n    if \"tags\" in schema:\n        del schema[\"tags\"]\n\n    return schema\n\n\ndef generate_client(openapi_json_path: str, strip_tags: bool = True) -> None:\n    \"\"\"Generate Python client from OpenAPI schema using openapi-generator.\"\"\"\n    import tempfile\n\n    output_dir = os.path.join(os.path.dirname(openapi_json_path), \"onyx_openapi_client\")\n\n    # Optionally strip tags so all endpoints go under DefaultApi\n    schema_path = openapi_json_path\n    if strip_tags:\n        with open(openapi_json_path) as f:\n            schema = json.load(f)\n        stripped = strip_tags_from_schema(schema)\n        fd, schema_path = tempfile.mkstemp(suffix=\".json\")\n        with os.fdopen(fd, \"w\") as f:\n            json.dump(stripped, f)\n        print(f\"Stripped tags from schema, using temp file: {schema_path}\")\n\n    cmd = [\n        \"openapi-generator\",\n        \"generate\",\n        \"-i\",\n        schema_path,\n        \"-g\",\n        \"python\",\n        \"-o\",\n        output_dir,\n        \"--package-name\",\n        \"onyx_openapi_client\",\n        \"--skip-validate-spec\",\n        \"--openapi-normalizer\",\n        \"SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true\",\n    ]\n\n    print(\"Running openapi-generator...\")\n    try:\n        result = subprocess.run(cmd)\n        if result.returncode == 0:\n            print(f\"Generated Python client at {output_dir}\")\n        else:\n            print(\n                \"Failed to generate Python client. See backend/tests/integration/README.md for setup instructions.\",\n                file=sys.stderr,\n            )\n    finally:\n        # Clean up temp file if we created one\n        if strip_tags and schema_path != openapi_json_path:\n            os.unlink(schema_path)\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Export OpenAPI schema for Onyx API (does not require starting API server)\"\n    )\n    parser.add_argument(\n        \"--filename\", \"-f\", help=\"Filename to write to\", default=\"openapi.json\"\n    )\n    parser.add_argument(\n        \"--generate-python-client\",\n        action=\"store_true\",\n        help=\"Generate Python client schemas (needed for integration tests)\",\n    )\n    parser.add_argument(\n        \"--tagged-for-docs\",\n        help=\"Also output a tagged version for API docs (specify output path)\",\n    )\n\n    args = parser.parse_args()\n    go(args.filename, tagged_for_docs=args.tagged_for_docs)\n\n    if args.generate_python_client:\n        # Schema is already stripped by go(), no need to strip again\n        generate_client(args.filename, strip_tags=False)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/orphan_doc_cleanup_script.py",
    "content": "import concurrent.futures\nimport os\nimport sys\n\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\nfrom onyx.document_index.document_index_utils import get_multipass_config\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n# makes it so `PYTHONPATH=.` is not required when running this script\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.context.search.models import IndexFilters  # noqa: E402\nfrom onyx.document_index.interfaces import VespaChunkRequest  # noqa: E402\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant  # noqa: E402\nfrom onyx.db.document import delete_documents_complete__no_commit  # noqa: E402\nfrom onyx.db.tag import delete_orphan_tags__no_commit  # noqa: E402\nfrom onyx.db.search_settings import get_current_search_settings  # noqa: E402\nfrom onyx.document_index.vespa.index import VespaIndex  # noqa: E402\nfrom onyx.db.document import get_document  # noqa: E402\n\nBATCH_SIZE = 100\n\n\ndef _get_orphaned_document_ids(db_session: Session, limit: int) -> list[str]:\n    \"\"\"Get document IDs that don't have any entries in document_by_connector_credential_pair\"\"\"\n    query = text(\n        \"\"\"\n        SELECT d.id\n        FROM document d\n        LEFT JOIN document_by_connector_credential_pair dbcc ON d.id = dbcc.id\n        WHERE dbcc.id IS NULL\n        LIMIT :limit\n    \"\"\"\n    )\n    orphaned_ids = [doc_id[0] for doc_id in db_session.execute(query, {\"limit\": limit})]\n    print(f\"Found {len(orphaned_ids)} orphaned documents in this batch\")\n    return orphaned_ids\n\n\ndef main() -> None:\n    with get_session_with_current_tenant() as db_session:\n        total_processed = 0\n        while True:\n            # Get orphaned document IDs in batches\n            orphaned_ids = _get_orphaned_document_ids(db_session, BATCH_SIZE)\n            if not orphaned_ids:\n                if total_processed == 0:\n                    print(\"No orphaned documents found\")\n                else:\n                    print(\n                        f\"Finished processing all batches. Total documents processed: {total_processed}\"\n                    )\n                return\n\n            # Setup Vespa index\n            search_settings = get_current_search_settings(db_session)\n            multipass_config = get_multipass_config(search_settings)\n            index_name = search_settings.index_name\n            vespa_index = VespaIndex(\n                index_name=index_name,\n                secondary_index_name=None,\n                large_chunks_enabled=multipass_config.enable_large_chunks,\n                secondary_large_chunks_enabled=None,\n            )\n\n            # Delete chunks from Vespa first\n            print(\"Deleting orphaned document chunks from Vespa\")\n            successfully_vespa_deleted_doc_ids: list[str] = []\n            # Process documents in parallel using ThreadPoolExecutor\n            with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:\n\n                def process_doc(doc_id: str) -> str | None:\n                    document = get_document(doc_id, db_session)\n                    if not document:\n                        return None\n                    # Check if document exists in Vespa first\n                    try:\n                        chunks = vespa_index.id_based_retrieval(\n                            chunk_requests=[\n                                VespaChunkRequest(document_id=doc_id, max_chunk_ind=2)\n                            ],\n                            filters=IndexFilters(access_control_list=None),\n                            batch_retrieval=True,\n                        )\n                        if not chunks:\n                            print(f\"Document {doc_id} not found in Vespa\")\n                            return doc_id\n                    except Exception as e:\n                        print(\n                            f\"Error checking if document {doc_id} exists in Vespa: {e}\"\n                        )\n                        return None\n\n                    try:\n                        print(f\"Deleting document {doc_id} in Vespa\")\n                        chunks_deleted = vespa_index.delete_single(\n                            doc_id,\n                            tenant_id=POSTGRES_DEFAULT_SCHEMA,\n                            chunk_count=document.chunk_count,\n                        )\n                        if chunks_deleted > 0:\n                            print(\n                                f\"Deleted {chunks_deleted} chunks for document {doc_id}\"\n                            )\n                        return doc_id\n                    except Exception as e:\n                        print(\n                            f\"Error deleting document {doc_id} in Vespa and will not delete from Postgres: {e}\"\n                        )\n                        return None\n\n                # Submit all tasks and gather results\n                futures = [\n                    executor.submit(process_doc, doc_id) for doc_id in orphaned_ids\n                ]\n                for future in concurrent.futures.as_completed(futures):\n                    doc_id = future.result()\n                    if doc_id:\n                        successfully_vespa_deleted_doc_ids.append(doc_id)\n\n            # Delete documents from Postgres\n            print(\"Deleting orphaned documents from Postgres\")\n            try:\n                delete_documents_complete__no_commit(\n                    db_session, successfully_vespa_deleted_doc_ids\n                )\n                delete_orphan_tags__no_commit(db_session)\n                db_session.commit()\n            except Exception as e:\n                print(f\"Error deleting documents from Postgres: {e}\")\n                break\n\n            total_processed += len(successfully_vespa_deleted_doc_ids)\n            print(\n                f\"Successfully cleaned up {len(successfully_vespa_deleted_doc_ids)} orphaned documents in this batch\"\n            )\n            print(f\"Total documents processed so far: {total_processed}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/query_time_check/seed_dummy_docs.py",
    "content": "\"\"\"\nlaunch:\n- api server\n- postgres\n- vespa\n- model server (this is only needed so the api server can startup, no embedding is done)\n\nRun this script to seed the database with dummy documents.\nThen run test_query_times.py to test query times.\n\"\"\"\n\nimport random\nfrom datetime import datetime\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.document_index_utils import get_multipass_config\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.indexing.indexing_pipeline import IndexBatchParams\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexChunk\nfrom onyx.utils.timing import log_function_time\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\nfrom shared_configs.model_server_models import Embedding\n\nTOTAL_DOC_SETS = 8\nTOTAL_ACL_ENTRIES_PER_CATEGORY = 80\n\n\ndef generate_random_embedding(dim: int) -> Embedding:\n    return [random.uniform(-1, 1) for _ in range(dim)]\n\n\ndef generate_random_identifier() -> str:\n    return f\"dummy_doc_{random.randint(1, 1000)}\"\n\n\ndef generate_dummy_chunk(\n    doc_id: str,\n    chunk_id: int,\n    embedding_dim: int,\n    number_of_acl_entries: int,\n    number_of_document_sets: int,\n) -> DocMetadataAwareIndexChunk:\n    document = Document(\n        id=doc_id,\n        source=DocumentSource.GOOGLE_DRIVE,\n        sections=[],\n        metadata={},\n        semantic_identifier=generate_random_identifier(),\n    )\n\n    chunk = IndexChunk(\n        chunk_id=chunk_id,\n        blurb=f\"Blurb for chunk {chunk_id} of document {doc_id}.\",\n        content=f\"Content for chunk {chunk_id} of document {doc_id}. This is dummy text for testing purposes.\",\n        source_links={},\n        section_continuation=False,\n        source_document=document,\n        title_prefix=f\"Title prefix for doc {doc_id}\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        doc_summary=\"\",\n        chunk_context=\"\",\n        mini_chunk_texts=None,\n        contextual_rag_reserved_tokens=0,\n        embeddings=ChunkEmbedding(\n            full_embedding=generate_random_embedding(embedding_dim),\n            mini_chunk_embeddings=[],\n        ),\n        title_embedding=generate_random_embedding(embedding_dim),\n        large_chunk_id=None,\n        large_chunk_reference_ids=[],\n        image_file_id=None,\n    )\n\n    document_set_names = []\n    for i in range(number_of_document_sets):\n        document_set_names.append(f\"Document Set {i}\")\n\n    user_emails: list[str | None] = []\n    user_groups: list[str] = []\n    external_user_emails: list[str] = []\n    external_user_group_ids: list[str] = []\n    for i in range(number_of_acl_entries):\n        user_emails.append(f\"user_{i}@example.com\")\n        user_groups.append(f\"group_{i}\")\n        external_user_emails.append(f\"external_user_{i}@example.com\")\n        external_user_group_ids.append(f\"external_group_{i}\")\n\n    return DocMetadataAwareIndexChunk.from_index_chunk(\n        index_chunk=chunk,\n        user_project=[],\n        personas=[],\n        access=DocumentAccess.build(\n            user_emails=user_emails,\n            user_groups=user_groups,\n            external_user_emails=external_user_emails,\n            external_user_group_ids=external_user_group_ids,\n            is_public=random.choice([True, False]),\n        ),\n        document_sets={document_set for document_set in document_set_names},\n        boost=random.randint(-1, 1),\n        aggregated_chunk_boost_factor=random.random(),\n        tenant_id=POSTGRES_DEFAULT_SCHEMA,\n    )\n\n\n@log_function_time()\ndef do_insertion(\n    vespa_index: VespaIndex, all_chunks: list[DocMetadataAwareIndexChunk]\n) -> None:\n    insertion_records = vespa_index.index(\n        chunks=all_chunks,\n        index_batch_params=IndexBatchParams(\n            doc_id_to_previous_chunk_cnt={},\n            doc_id_to_new_chunk_cnt={},\n            tenant_id=POSTGRES_DEFAULT_SCHEMA,\n            large_chunks_enabled=False,\n        ),\n    )\n    print(f\"Indexed {len(insertion_records)} documents.\")\n    print(\n        f\"New documents: {sum(1 for record in insertion_records if not record.already_existed)}\"\n    )\n    print(\n        f\"Existing documents updated: {sum(1 for record in insertion_records if record.already_existed)}\"\n    )\n\n\n@log_function_time()\ndef seed_dummy_docs(\n    number_of_document_sets: int,\n    number_of_acl_entries: int,\n    num_docs: int = 1000,\n    chunks_per_doc: int = 5,\n    batch_size: int = 100,\n) -> None:\n    with get_session_with_current_tenant() as db_session:\n        search_settings = get_current_search_settings(db_session)\n        multipass_config = get_multipass_config(search_settings)\n        index_name = search_settings.index_name\n        embedding_dim = search_settings.final_embedding_dim\n\n    vespa_index = VespaIndex(\n        index_name=index_name,\n        secondary_index_name=None,\n        large_chunks_enabled=multipass_config.enable_large_chunks,\n        secondary_large_chunks_enabled=None,\n    )\n    print(index_name)\n\n    all_chunks = []\n    chunk_count = 0\n    for doc_num in range(num_docs):\n        doc_id = f\"dummy_doc_{doc_num}_{datetime.now().isoformat()}\"\n        for chunk_num in range(chunks_per_doc):\n            chunk = generate_dummy_chunk(\n                doc_id=doc_id,\n                chunk_id=chunk_num,\n                embedding_dim=embedding_dim,\n                number_of_acl_entries=number_of_acl_entries,\n                number_of_document_sets=number_of_document_sets,\n            )\n            all_chunks.append(chunk)\n            chunk_count += 1\n\n            if len(all_chunks) >= chunks_per_doc * batch_size:\n                do_insertion(vespa_index, all_chunks)\n                print(\n                    f\"Indexed {chunk_count} chunks out of {num_docs * chunks_per_doc}.\"\n                )\n                print(\n                    f\"percentage: {chunk_count / (num_docs * chunks_per_doc) * 100:.2f}% \\n\"\n                )\n                all_chunks = []\n\n    if all_chunks:\n        do_insertion(vespa_index, all_chunks)\n\n\nif __name__ == \"__main__\":\n    seed_dummy_docs(\n        number_of_document_sets=TOTAL_DOC_SETS,\n        number_of_acl_entries=TOTAL_ACL_ENTRIES_PER_CATEGORY,\n        num_docs=100000,\n        chunks_per_doc=5,\n        batch_size=1000,\n    )\n"
  },
  {
    "path": "backend/scripts/query_time_check/test_query_times.py",
    "content": "# \"\"\"\n# RUN THIS AFTER SEED_DUMMY_DOCS.PY\n# \"\"\"\n\n# import random\n# import time\n\n# from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType\n# from onyx.configs.constants import DocumentSource\n# from onyx.configs.model_configs import DOC_EMBEDDING_DIM\n# from onyx.context.search.models import IndexFilters\n# from onyx.db.engine.sql_engine import get_session_with_current_tenant\n# from onyx.db.search_settings import get_current_search_settings\n# from onyx.document_index.document_index_utils import get_multipass_config\n# from onyx.document_index.vespa.index import VespaIndex\n# from scripts.query_time_check.seed_dummy_docs import TOTAL_ACL_ENTRIES_PER_CATEGORY\n# from scripts.query_time_check.seed_dummy_docs import TOTAL_DOC_SETS\n# from shared_configs.model_server_models import Embedding\n\n# # make sure these are smaller than TOTAL_ACL_ENTRIES_PER_CATEGORY and TOTAL_DOC_SETS, respectively\n# NUMBER_OF_ACL_ENTRIES_PER_QUERY = 6\n# NUMBER_OF_DOC_SETS_PER_QUERY = 2\n\n\n# def get_slowest_99th_percentile(results: list[float]) -> float:\n#     return sorted(results)[int(0.99 * len(results))]\n\n\n# # Generate random filters\n# def _random_filters() -> IndexFilters:\n#     \"\"\"\n#     Generate random filters for the query containing:\n#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY user emails\n#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY groups\n#     - NUMBER_OF_ACL_ENTRIES_PER_QUERY external groups\n#     - NUMBER_OF_DOC_SETS_PER_QUERY document sets\n#     \"\"\"\n#     access_control_list = [\n#         f\"user_email:user_{random.randint(0, TOTAL_ACL_ENTRIES_PER_CATEGORY - 1)}@example.com\",\n#     ]\n#     acl_indices = random.sample(\n#         range(TOTAL_ACL_ENTRIES_PER_CATEGORY), NUMBER_OF_ACL_ENTRIES_PER_QUERY\n#     )\n#     for i in acl_indices:\n#         access_control_list.append(f\"group:group_{acl_indices[i]}\")\n#         access_control_list.append(f\"external_group:external_group_{acl_indices[i]}\")\n\n#     doc_sets = []\n#     doc_set_indices = random.sample(\n#         range(TOTAL_DOC_SETS), NUMBER_OF_ACL_ENTRIES_PER_QUERY\n#     )\n#     for i in doc_set_indices:\n#         doc_sets.append(f\"document_set:Document Set {doc_set_indices[i]}\")\n\n#     return IndexFilters(\n#         source_type=[DocumentSource.GOOGLE_DRIVE],\n#         document_set=doc_sets,\n#         tags=[],\n#         access_control_list=access_control_list,\n#     )\n\n\n# def test_hybrid_retrieval_times(\n#     number_of_queries: int,\n# ) -> None:\n#     with get_session_with_current_tenant() as db_session:\n#         search_settings = get_current_search_settings(db_session)\n#         multipass_config = get_multipass_config(search_settings)\n#         index_name = search_settings.index_name\n\n#     vespa_index = VespaIndex(\n#         index_name=index_name,\n#         secondary_index_name=None,\n#         large_chunks_enabled=multipass_config.enable_large_chunks,\n#         secondary_large_chunks_enabled=None,\n#     )\n\n#     # Generate random queries\n#     queries = [f\"Random Query {i}\" for i in range(number_of_queries)]\n\n#     # Generate random embeddings\n#     embeddings = [\n#         Embedding([random.random() for _ in range(DOC_EMBEDDING_DIM)])\n#         for _ in range(number_of_queries)\n#     ]\n\n#     total_time = 0.0\n#     results = []\n#     for i in range(number_of_queries):\n#         start_time = time.time()\n\n#         vespa_index.hybrid_retrieval(\n#             query=queries[i],\n#             query_embedding=embeddings[i],\n#             final_keywords=None,\n#             filters=_random_filters(),\n#             hybrid_alpha=0.5,\n#             time_decay_multiplier=1.0,\n#             num_to_retrieve=50,\n#             ranking_profile_type=QueryExpansionType.SEMANTIC,\n#             offset=0,\n#             title_content_ratio=0.5,\n#         )\n\n#         end_time = time.time()\n#         query_time = end_time - start_time\n#         total_time += query_time\n#         results.append(query_time)\n\n#         print(f\"Query {i+1}: {query_time:.4f} seconds\")\n\n#     avg_time = total_time / number_of_queries\n#     fast_time = min(results)\n#     slow_time = max(results)\n#     ninety_ninth_percentile = get_slowest_99th_percentile(results)\n#     # Write results to a file\n#     _OUTPUT_PATH = \"query_times_results_large_more.txt\"\n#     with open(_OUTPUT_PATH, \"w\") as f:\n#         f.write(f\"Average query time: {avg_time:.4f} seconds\\n\")\n#         f.write(f\"Fastest query: {fast_time:.4f} seconds\\n\")\n#         f.write(f\"Slowest query: {slow_time:.4f} seconds\\n\")\n#         f.write(f\"99th percentile: {ninety_ninth_percentile:.4f} seconds\\n\")\n#     print(f\"Results written to {_OUTPUT_PATH}\")\n\n#     print(f\"\\nAverage query time: {avg_time:.4f} seconds\")\n#     print(f\"Fastest query: {fast_time:.4f} seconds\")\n#     print(f\"Slowest query: {max(results):.4f} seconds\")\n#     print(f\"99th percentile: {get_slowest_99th_percentile(results):.4f} seconds\")\n\n\n# if __name__ == \"__main__\":\n#     test_hybrid_retrieval_times(number_of_queries=1000)\n"
  },
  {
    "path": "backend/scripts/reencrypt_secrets.py",
    "content": "\"\"\"Re-encrypt secrets under the current ENCRYPTION_KEY_SECRET.\n\nDecrypts all encrypted columns using the old key (or raw decode if the old key\nis empty), then re-encrypts them with the current ENCRYPTION_KEY_SECRET.\n\nUsage (docker):\n    docker exec -it onyx-api_server-1 \\\n        python -m scripts.reencrypt_secrets --old-key \"previous-key\"\n\nUsage (kubernetes):\n    kubectl exec -it <pod> -- \\\n        python -m scripts.reencrypt_secrets --old-key \"previous-key\"\n\nOmit --old-key (or pass \"\") if secrets were not previously encrypted.\n\nFor multi-tenant deployments, pass --tenant-id to target a specific tenant,\nor --all-tenants to iterate every tenant.\n\"\"\"\n\nimport argparse\nimport os\nimport sys\n\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.db.rotate_encryption_key import rotate_encryption_key  # noqa: E402\nfrom onyx.db.engine.sql_engine import get_session_with_tenant  # noqa: E402\nfrom onyx.db.engine.sql_engine import SqlEngine  # noqa: E402\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids  # noqa: E402\nfrom onyx.utils.variable_functionality import global_version  # noqa: E402\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA  # noqa: E402\n\n\ndef _run_for_tenant(tenant_id: str, old_key: str | None, dry_run: bool = False) -> None:\n    print(f\"Re-encrypting secrets for tenant: {tenant_id}\")\n    with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n        results = rotate_encryption_key(db_session, old_key=old_key, dry_run=dry_run)\n\n    if results:\n        for col, count in results.items():\n            print(\n                f\"  {col}: {count} row(s) {'would be ' if dry_run else ''}re-encrypted\"\n            )\n    else:\n        print(\"No rows needed re-encryption.\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Re-encrypt secrets under the current encryption key.\"\n    )\n    parser.add_argument(\n        \"--old-key\",\n        default=None,\n        help=\"Previous encryption key. Omit or pass empty string if not applicable.\",\n    )\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Show what would be re-encrypted without making changes.\",\n    )\n\n    tenant_group = parser.add_mutually_exclusive_group()\n    tenant_group.add_argument(\n        \"--tenant-id\",\n        default=None,\n        help=\"Target a specific tenant schema.\",\n    )\n    tenant_group.add_argument(\n        \"--all-tenants\",\n        action=\"store_true\",\n        help=\"Iterate all tenants.\",\n    )\n\n    args = parser.parse_args()\n\n    old_key = args.old_key if args.old_key else None\n\n    global_version.set_ee()\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    if args.dry_run:\n        print(\"DRY RUN — no changes will be made\")\n\n    if args.all_tenants:\n        tenant_ids = get_all_tenant_ids()\n        print(f\"Found {len(tenant_ids)} tenant(s)\")\n        failed_tenants: list[str] = []\n        for tid in tenant_ids:\n            try:\n                _run_for_tenant(tid, old_key, dry_run=args.dry_run)\n            except Exception as e:\n                print(f\"  ERROR for tenant {tid}: {e}\")\n                failed_tenants.append(tid)\n        if failed_tenants:\n            print(f\"FAILED tenants ({len(failed_tenants)}): {failed_tenants}\")\n            sys.exit(1)\n    else:\n        tenant_id = args.tenant_id or POSTGRES_DEFAULT_SCHEMA\n        _run_for_tenant(tenant_id, old_key, dry_run=args.dry_run)\n\n    print(\"Done.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/reset_indexes.py",
    "content": "# This file is purely for development use, not included in any builds\nimport os\nimport sys\nfrom time import sleep\n\nimport requests\nfrom requests.exceptions import RequestException\n\n# makes it so `PYTHONPATH=.` is not required when running this script\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.configs.app_configs import DOCUMENT_INDEX_NAME  # noqa: E402\nfrom onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT  # noqa: E402\nfrom onyx.utils.logger import setup_logger  # noqa: E402\n\nlogger = setup_logger()\n\n\ndef wipe_vespa_index() -> bool:\n    \"\"\"\n    Wipes the Vespa index by deleting all documents.\n    \"\"\"\n    continuation = None\n    should_continue = True\n    RETRIES = 3\n\n    while should_continue:\n        params = {\"selection\": \"true\", \"cluster\": DOCUMENT_INDEX_NAME}\n        if continuation:\n            params[\"continuation\"] = continuation\n\n        for attempt in range(RETRIES):\n            try:\n                response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)\n                response.raise_for_status()\n\n                response_json = response.json()\n                logger.info(f\"Response: {response_json}\")\n\n                continuation = response_json.get(\"continuation\")\n                should_continue = bool(continuation)\n                break  # Exit the retry loop if the request is successful\n\n            except RequestException:\n                logger.exception(\"Request failed\")\n                sleep(2**attempt)  # Exponential backoff\n        else:\n            logger.error(f\"Max retries ({RETRIES}) exceeded. Exiting.\")\n            return False\n\n    return True\n\n\ndef main() -> int:\n    \"\"\"\n    Main function to execute the script.\n    \"\"\"\n    try:\n        succeeded = wipe_vespa_index()\n    except Exception:\n        logger.exception(\"wipe_vespa_index exceptioned.\")\n        return 1\n\n    if not succeeded:\n        logger.info(\"Vespa index wipe failed.\")\n        return 0\n\n    logger.info(\"Vespa index wiped successfully.\")\n    return 1\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "backend/scripts/reset_postgres.py",
    "content": "import os\nimport sys\n\nimport psycopg2\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\n\n# makes it so `PYTHONPATH=.` is not required when running this script\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.configs.app_configs import POSTGRES_DB  # noqa: E402\nfrom onyx.configs.app_configs import POSTGRES_HOST  # noqa: E402\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD  # noqa: E402\nfrom onyx.configs.app_configs import POSTGRES_PORT  # noqa: E402\nfrom onyx.configs.app_configs import POSTGRES_USER  # noqa: E402\nfrom onyx.db.credentials import create_initial_public_credential  # noqa: E402\n\n\ndef wipe_all_rows(database: str) -> None:\n    conn = psycopg2.connect(\n        dbname=database,\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n    )\n    cur = conn.cursor()\n\n    # Disable triggers to prevent foreign key constraints from being checked\n    cur.execute(\"SET session_replication_role = 'replica';\")\n\n    # Fetch all table names in the current database\n    cur.execute(\n        \"\"\"\n        SELECT tablename\n        FROM pg_tables\n        WHERE schemaname = 'public'\n    \"\"\"\n    )\n\n    tables = cur.fetchall()\n\n    for table in tables:\n        table_name = table[0]\n\n        # Don't touch migration history\n        if table_name == \"alembic_version\":\n            continue\n\n        print(f\"Deleting all rows from {table_name}...\")\n        cur.execute(f'DELETE FROM \"{table_name}\"')\n\n    # Re-enable triggers\n    cur.execute(\"SET session_replication_role = 'origin';\")\n\n    conn.commit()\n    cur.close()\n    conn.close()\n    print(\"Finished wiping all rows.\")\n\n\nif __name__ == \"__main__\":\n    print(\"Cleaning up all Onyx tables\")\n    wipe_all_rows(POSTGRES_DB)\n    with Session(get_sqlalchemy_engine(), expire_on_commit=False) as db_session:\n        create_initial_public_credential(db_session)\n    print(\"To keep data consistent, it's best to wipe the document index as well.\")\n    print(\n        \"To be safe, it's best to restart the Onyx services (API Server and Background Tasks\"\n    )\n"
  },
  {
    "path": "backend/scripts/restart_containers.sh",
    "content": "#!/bin/bash\nset -e\n\nSCRIPT_DIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" &> /dev/null && pwd )\"\nCOMPOSE_FILE=\"$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.yml\"\nCOMPOSE_DEV_FILE=\"$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.dev.yml\"\n\nstop_and_remove_containers() {\n  docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true\n  docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true\n  docker compose -f \"$COMPOSE_FILE\" -f \"$COMPOSE_DEV_FILE\" --profile opensearch-enabled stop opensearch 2>/dev/null || true\n  docker compose -f \"$COMPOSE_FILE\" -f \"$COMPOSE_DEV_FILE\" --profile opensearch-enabled rm -f opensearch 2>/dev/null || true\n}\n\ncleanup() {\n  echo \"Error occurred. Cleaning up...\"\n  stop_and_remove_containers\n}\n\n# Trap errors and output a message, then cleanup\ntrap 'echo \"Error occurred on line $LINENO. Exiting script.\" >&2; cleanup' ERR\n\n# Usage of the script with optional volume arguments\n# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]\n# [minio_volume] [--keep-opensearch-data]\n\nKEEP_OPENSEARCH_DATA=false\nPOSITIONAL_ARGS=()\nfor arg in \"$@\"; do\n    if [[ \"$arg\" == \"--keep-opensearch-data\" ]]; then\n        KEEP_OPENSEARCH_DATA=true\n    else\n        POSITIONAL_ARGS+=(\"$arg\")\n    fi\ndone\n\nVESPA_VOLUME=${POSITIONAL_ARGS[0]:-\"\"}\nPOSTGRES_VOLUME=${POSITIONAL_ARGS[1]:-\"\"}\nREDIS_VOLUME=${POSITIONAL_ARGS[2]:-\"\"}\nMINIO_VOLUME=${POSITIONAL_ARGS[3]:-\"\"}\n\n# Stop and remove the existing containers\necho \"Stopping and removing existing containers...\"\nstop_and_remove_containers\n\n# Start the PostgreSQL container with optional volume\necho \"Starting PostgreSQL container...\"\nif [[ -n \"$POSTGRES_VOLUME\" ]]; then\n    docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d -v $POSTGRES_VOLUME:/var/lib/postgresql/data postgres -c max_connections=250\nelse\n    docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d postgres -c max_connections=250\nfi\n\n# Start the Vespa container with optional volume\necho \"Starting Vespa container...\"\nif [[ -n \"$VESPA_VOLUME\" ]]; then\n    docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 -v $VESPA_VOLUME:/opt/vespa/var vespaengine/vespa:8\nelse\n    docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8\nfi\n\n# If OPENSEARCH_ADMIN_PASSWORD is not already set, try loading it from\n# .vscode/.env so existing dev setups that stored it there aren't silently\n# broken.\nVSCODE_ENV=\"$SCRIPT_DIR/../../.vscode/.env\"\nif [[ -z \"${OPENSEARCH_ADMIN_PASSWORD:-}\" && -f \"$VSCODE_ENV\" ]]; then\n    set -a\n    # shellcheck source=/dev/null\n    source \"$VSCODE_ENV\"\n    set +a\nfi\n\n# Start the OpenSearch container using the same service from docker-compose that\n# our users use, setting OPENSEARCH_INITIAL_ADMIN_PASSWORD from the env's\n# OPENSEARCH_ADMIN_PASSWORD if it exists, else defaulting to StrongPassword123!.\n# Pass --keep-opensearch-data to preserve the opensearch-data volume across\n# restarts, else the volume is deleted so the container starts fresh.\nif [[ \"$KEEP_OPENSEARCH_DATA\" == \"false\" ]]; then\n    echo \"Deleting opensearch-data volume...\"\n    docker volume rm onyx_opensearch-data 2>/dev/null || true\nfi\necho \"Starting OpenSearch container...\"\ndocker compose -f \"$COMPOSE_FILE\" -f \"$COMPOSE_DEV_FILE\" --profile opensearch-enabled up --force-recreate -d opensearch\n\n# Start the Redis container with optional volume\necho \"Starting Redis container...\"\nif [[ -n \"$REDIS_VOLUME\" ]]; then\n    docker run --detach --name onyx_redis --publish 6379:6379 -v $REDIS_VOLUME:/data redis\nelse\n    docker run --detach --name onyx_redis --publish 6379:6379 redis\nfi\n\n# Start the MinIO container with optional volume\necho \"Starting MinIO container...\"\nif [[ -n \"$MINIO_VOLUME\" ]]; then\n    docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin -v $MINIO_VOLUME:/data minio/minio server /data --console-address \":9001\"\nelse\n    docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin minio/minio server /data --console-address \":9001\"\nfi\n\n# Start the Code Interpreter container\necho \"Starting Code Interpreter container...\"\ndocker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api\n\n# Ensure alembic runs in the correct directory (backend/)\nPARENT_DIR=\"$(dirname \"$SCRIPT_DIR\")\"\ncd \"$PARENT_DIR\"\n\n# Give Postgres a second to start\nsleep 1\n\n# Alembic should be configured in the virtualenv for this repo\nif [[ -f \"../.venv/bin/activate\" ]]; then\n    source ../.venv/bin/activate\nelse\n    echo \"Warning: Python virtual environment not found at .venv/bin/activate; alembic may not work.\"\nfi\n\n# Run Alembic upgrade\necho \"Running Alembic migration...\"\nalembic upgrade head\n\n# Run the following instead of the above if using MT cloud\n# alembic -n schema_private upgrade head\n\necho \"Containers restarted and migration completed.\"\n"
  },
  {
    "path": "backend/scripts/resume_paused_connectors.py",
    "content": "import argparse\n\nimport requests\n\nAPI_SERVER_URL = \"http://localhost:3000\"\nAPI_KEY = \"onyx-api-key\"  # API key here, if auth is enabled\n\n\ndef resume_paused_connectors(\n    api_server_url: str,\n    api_key: str | None,\n    specific_connector_sources: list[str] | None = None,\n) -> None:\n    headers = {\"Content-Type\": \"application/json\"}\n    if api_key:\n        headers[\"Authorization\"] = f\"Bearer {api_key}\"\n\n    # Get all paused connectors\n    response = requests.post(\n        f\"{api_server_url}/api/manage/admin/connector/indexing-status\",\n        headers=headers,\n        json={\"get_all_connectors\": True},\n    )\n    response.raise_for_status()\n\n    indexing_status_response = response.json()\n\n    # Iterate over all connectors and resume paused ones\n    for connectors_by_source in indexing_status_response:\n        if (\n            specific_connector_sources\n            and connectors_by_source[\"source\"] not in specific_connector_sources\n        ):\n            print(f\"Skipping connector source: {connectors_by_source['source']}\")\n            continue\n        connectors = connectors_by_source[\"indexing_statuses\"]\n        for connector in connectors:\n            if connector.get(\"cc_pair_status\"):\n                if connector[\"cc_pair_status\"] == \"PAUSED\":\n                    print(f\"Resuming connector: {connector['name']}\")\n                    response = requests.put(\n                        f\"{api_server_url}/api/manage/admin/cc-pair/{connector['cc_pair_id']}/status\",\n                        json={\"status\": \"ACTIVE\"},\n                        headers=headers,\n                    )\n                    response.raise_for_status()\n                    print(f\"Resumed connector: {connector['name']}\")\n                else:\n                    print(f\"Connector {connector['name']} is not paused\")\n            else:\n                print(f\"Connector {connector['name']} is a Federated Connector\")\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"Resume paused connectors\")\n    parser.add_argument(\n        \"--api_server_url\",\n        type=str,\n        default=API_SERVER_URL,\n        help=\"The URL of the API server to use. If not provided, will use the default.\",\n    )\n    parser.add_argument(\n        \"--api_key\",\n        type=str,\n        default=None,\n        help=\"The API key to use for authentication. If not provided, no authentication will be used.\",\n    )\n    parser.add_argument(\n        \"--connector_sources\",\n        type=str.lower,\n        nargs=\"+\",\n        help=\"The sources of the connectors to resume. If not provided, will resume all paused connectors.\",\n    )\n    args = parser.parse_args()\n\n    resume_paused_connectors(args.api_server_url, args.api_key, args.connector_sources)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/run_industryrag_bench_questions.py",
    "content": "from __future__ import annotations\n\nimport argparse\nimport asyncio\nimport json\nimport logging\nimport sys\nimport time\nfrom dataclasses import asdict\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import TypedDict\nfrom typing import TypeGuard\n\nimport aiohttp\n\n\nlogging.basicConfig(\n    level=logging.INFO,\n    format=\"%(asctime)s %(levelname)s %(message)s\",\n)\nlogger = logging.getLogger(__name__)\n\n\nDEFAULT_API_BASE = \"http://localhost:3000\"\nINTERNAL_SEARCH_TOOL_NAME = \"internal_search\"\nINTERNAL_SEARCH_IN_CODE_TOOL_ID = \"SearchTool\"\nMAX_REQUEST_ATTEMPTS = 5\nRETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}\nQUESTION_TIMEOUT_SECONDS = 300\nQUESTION_RETRY_PAUSE_SECONDS = 30\nMAX_QUESTION_ATTEMPTS = 3\n\n\n@dataclass(frozen=True)\nclass QuestionRecord:\n    question_id: str\n    question: str\n\n\n@dataclass(frozen=True)\nclass AnswerRecord:\n    question_id: str\n    answer: str\n    document_ids: list[str]\n\n\n@dataclass(frozen=True)\nclass FailedQuestionRecord:\n    question_id: str\n    error: str\n\n\nclass Citation(TypedDict, total=False):\n    citation_number: int\n    document_id: str\n\n\ndef parse_args() -> argparse.Namespace:\n    parser = argparse.ArgumentParser(\n        description=(\n            \"Submit questions to Onyx chat with internal search forced and write \"\n            \"answers to a JSONL file.\"\n        )\n    )\n    parser.add_argument(\n        \"--questions-file\",\n        type=Path,\n        required=True,\n        help=\"Path to the input questions JSONL file.\",\n    )\n    parser.add_argument(\n        \"--output-file\",\n        type=Path,\n        required=True,\n        help=\"Path to the output answers JSONL file.\",\n    )\n    parser.add_argument(\n        \"--api-key\",\n        type=str,\n        required=True,\n        help=\"API key used to authenticate against Onyx.\",\n    )\n    parser.add_argument(\n        \"--api-base\",\n        type=str,\n        default=DEFAULT_API_BASE,\n        help=(\n            \"Frontend base URL for Onyx. If `/api` is omitted, it will be added \"\n            f\"automatically. Default: {DEFAULT_API_BASE}\"\n        ),\n    )\n    parser.add_argument(\n        \"--parallelism\",\n        type=int,\n        default=1,\n        help=\"Number of questions to process in parallel. Default: 1.\",\n    )\n    parser.add_argument(\n        \"--max-questions\",\n        type=int,\n        default=None,\n        help=\"Optional cap on how many questions to process. Defaults to all.\",\n    )\n    return parser.parse_args()\n\n\ndef normalize_api_base(api_base: str) -> str:\n    normalized = api_base.rstrip(\"/\")\n    if normalized.endswith(\"/api\"):\n        return normalized\n    return f\"{normalized}/api\"\n\n\ndef load_completed_question_ids(output_file: Path) -> set[str]:\n    if not output_file.exists():\n        return set()\n\n    completed_ids: set[str] = set()\n    with output_file.open(\"r\", encoding=\"utf-8\") as file:\n        for line in file:\n            stripped = line.strip()\n            if not stripped:\n                continue\n            try:\n                record = json.loads(stripped)\n            except json.JSONDecodeError:\n                continue\n            question_id = record.get(\"question_id\")\n            if isinstance(question_id, str) and question_id:\n                completed_ids.add(question_id)\n\n    return completed_ids\n\n\ndef load_questions(questions_file: Path) -> list[QuestionRecord]:\n    if not questions_file.exists():\n        raise FileNotFoundError(f\"Questions file not found: {questions_file}\")\n\n    questions: list[QuestionRecord] = []\n    with questions_file.open(\"r\", encoding=\"utf-8\") as file:\n        for line_number, line in enumerate(file, start=1):\n            stripped_line = line.strip()\n            if not stripped_line:\n                continue\n\n            try:\n                payload = json.loads(stripped_line)\n            except json.JSONDecodeError as exc:\n                raise ValueError(\n                    f\"Invalid JSON on line {line_number} of {questions_file}\"\n                ) from exc\n\n            question_id = payload.get(\"question_id\")\n            question = payload.get(\"question\")\n\n            if not isinstance(question_id, str) or not question_id:\n                raise ValueError(\n                    f\"Line {line_number} is missing a non-empty `question_id`.\"\n                )\n            if not isinstance(question, str) or not question:\n                raise ValueError(\n                    f\"Line {line_number} is missing a non-empty `question`.\"\n                )\n\n            questions.append(QuestionRecord(question_id=question_id, question=question))\n\n    return questions\n\n\nasync def read_json_response(\n    response: aiohttp.ClientResponse,\n) -> dict[str, Any] | list[dict[str, Any]]:\n    response_text = await response.text()\n    if response.status >= 400:\n        raise RuntimeError(\n            f\"Request to {response.url} failed with {response.status}: {response_text}\"\n        )\n\n    try:\n        payload = json.loads(response_text)\n    except json.JSONDecodeError as exc:\n        raise RuntimeError(\n            f\"Request to {response.url} returned non-JSON content: {response_text}\"\n        ) from exc\n\n    if not isinstance(payload, (dict, list)):\n        raise RuntimeError(\n            f\"Unexpected response payload type from {response.url}: {type(payload)}\"\n        )\n\n    return payload\n\n\nasync def request_json_with_retries(\n    session: aiohttp.ClientSession,\n    method: str,\n    url: str,\n    headers: dict[str, str],\n    json_payload: dict[str, Any] | None = None,\n) -> dict[str, Any] | list[dict[str, Any]]:\n    backoff_seconds = 1.0\n\n    for attempt in range(1, MAX_REQUEST_ATTEMPTS + 1):\n        try:\n            async with session.request(\n                method=method,\n                url=url,\n                headers=headers,\n                json=json_payload,\n            ) as response:\n                if (\n                    response.status in RETRIABLE_STATUS_CODES\n                    and attempt < MAX_REQUEST_ATTEMPTS\n                ):\n                    response_text = await response.text()\n                    logger.warning(\n                        \"Retryable response from %s on attempt %s/%s: %s %s\",\n                        url,\n                        attempt,\n                        MAX_REQUEST_ATTEMPTS,\n                        response.status,\n                        response_text,\n                    )\n                    await asyncio.sleep(backoff_seconds)\n                    backoff_seconds *= 2\n                    continue\n\n                return await read_json_response(response)\n        except (aiohttp.ClientError, asyncio.TimeoutError) as exc:\n            if attempt == MAX_REQUEST_ATTEMPTS:\n                raise RuntimeError(\n                    f\"Request to {url} failed after {MAX_REQUEST_ATTEMPTS} attempts.\"\n                ) from exc\n\n            logger.warning(\n                \"Request to %s failed on attempt %s/%s: %s\",\n                url,\n                attempt,\n                MAX_REQUEST_ATTEMPTS,\n                exc,\n            )\n            await asyncio.sleep(backoff_seconds)\n            backoff_seconds *= 2\n\n    raise RuntimeError(f\"Request to {url} failed unexpectedly.\")\n\n\ndef extract_document_ids(citation_info: object) -> list[str]:\n    if not isinstance(citation_info, list):\n        return []\n\n    sorted_citations = sorted(\n        (citation for citation in citation_info if _is_valid_citation(citation)),\n        key=_citation_sort_key,\n    )\n\n    document_ids: list[str] = []\n    seen_document_ids: set[str] = set()\n    for citation in sorted_citations:\n        document_id = citation[\"document_id\"]\n        if document_id not in seen_document_ids:\n            seen_document_ids.add(document_id)\n            document_ids.append(document_id)\n\n    return document_ids\n\n\ndef _is_valid_citation(citation: object) -> TypeGuard[Citation]:\n    return (\n        isinstance(citation, dict)\n        and isinstance(citation.get(\"document_id\"), str)\n        and bool(citation[\"document_id\"])\n    )\n\n\ndef _citation_sort_key(citation: Citation) -> int:\n    citation_number = citation.get(\"citation_number\")\n    if isinstance(citation_number, int):\n        return citation_number\n    return sys.maxsize\n\n\nasync def fetch_internal_search_tool_id(\n    session: aiohttp.ClientSession,\n    api_base: str,\n    headers: dict[str, str],\n) -> int:\n    payload = await request_json_with_retries(\n        session=session,\n        method=\"GET\",\n        url=f\"{api_base}/tool\",\n        headers=headers,\n    )\n\n    if not isinstance(payload, list):\n        raise RuntimeError(\"Expected `/tool` to return a list.\")\n\n    for tool in payload:\n        if not isinstance(tool, dict):\n            continue\n\n        if tool.get(\"in_code_tool_id\") == INTERNAL_SEARCH_IN_CODE_TOOL_ID:\n            tool_id = tool.get(\"id\")\n            if isinstance(tool_id, int):\n                return tool_id\n\n    for tool in payload:\n        if not isinstance(tool, dict):\n            continue\n\n        if tool.get(\"name\") == INTERNAL_SEARCH_TOOL_NAME:\n            tool_id = tool.get(\"id\")\n            if isinstance(tool_id, int):\n                return tool_id\n\n    raise RuntimeError(\n        \"Could not find the internal search tool in `/tool`. \"\n        \"Make sure SearchTool is available for this environment.\"\n    )\n\n\nasync def submit_question(\n    session: aiohttp.ClientSession,\n    api_base: str,\n    headers: dict[str, str],\n    internal_search_tool_id: int,\n    question_record: QuestionRecord,\n) -> AnswerRecord:\n    payload = {\n        \"message\": question_record.question,\n        \"chat_session_info\": {\"persona_id\": 0},\n        \"parent_message_id\": None,\n        \"file_descriptors\": [],\n        \"allowed_tool_ids\": [internal_search_tool_id],\n        \"forced_tool_id\": internal_search_tool_id,\n        \"stream\": False,\n    }\n\n    response_payload = await request_json_with_retries(\n        session=session,\n        method=\"POST\",\n        url=f\"{api_base}/chat/send-chat-message\",\n        headers=headers,\n        json_payload=payload,\n    )\n\n    if not isinstance(response_payload, dict):\n        raise RuntimeError(\n            \"Expected `/chat/send-chat-message` to return an object when `stream=false`.\"\n        )\n\n    answer = response_payload.get(\"answer_citationless\")\n    if not isinstance(answer, str):\n        answer = response_payload.get(\"answer\")\n\n    if not isinstance(answer, str):\n        raise RuntimeError(\n            f\"Response for question {question_record.question_id} is missing `answer`.\"\n        )\n\n    return AnswerRecord(\n        question_id=question_record.question_id,\n        answer=answer,\n        document_ids=extract_document_ids(response_payload.get(\"citation_info\")),\n    )\n\n\nasync def generate_answers(\n    questions: list[QuestionRecord],\n    output_file: Path,\n    api_base: str,\n    api_key: str,\n    parallelism: int,\n    skipped: int,\n) -> None:\n    if parallelism < 1:\n        raise ValueError(\"`--parallelism` must be at least 1.\")\n\n    headers = {\n        \"Authorization\": f\"Bearer {api_key}\",\n        \"Content-Type\": \"application/json\",\n    }\n\n    timeout = aiohttp.ClientTimeout(\n        total=None,\n        connect=30,\n        sock_connect=30,\n        sock_read=600,\n    )\n    connector = aiohttp.TCPConnector(limit=parallelism)\n\n    output_file.parent.mkdir(parents=True, exist_ok=True)\n    with output_file.open(\"a\", encoding=\"utf-8\") as file:\n        async with aiohttp.ClientSession(\n            timeout=timeout, connector=connector\n        ) as session:\n            internal_search_tool_id = await fetch_internal_search_tool_id(\n                session=session,\n                api_base=api_base,\n                headers=headers,\n            )\n            logger.info(\"Using internal search tool id %s\", internal_search_tool_id)\n\n            semaphore = asyncio.Semaphore(parallelism)\n            progress_lock = asyncio.Lock()\n            write_lock = asyncio.Lock()\n            completed = 0\n            successful = 0\n            stuck_count = 0\n            failed_questions: list[FailedQuestionRecord] = []\n            remaining_count = len(questions)\n            overall_total = remaining_count + skipped\n            question_durations: list[float] = []\n            run_start_time = time.monotonic()\n\n            def print_progress() -> None:\n                avg_time = (\n                    sum(question_durations) / len(question_durations)\n                    if question_durations\n                    else 0.0\n                )\n                elapsed = time.monotonic() - run_start_time\n                eta = avg_time * (remaining_count - completed) / max(parallelism, 1)\n\n                done = skipped + completed\n                bar_width = 30\n                filled = (\n                    int(bar_width * done / overall_total)\n                    if overall_total\n                    else bar_width\n                )\n                bar = \"█\" * filled + \"░\" * (bar_width - filled)\n                pct = (done / overall_total * 100) if overall_total else 100.0\n\n                parts = (\n                    f\"\\r{bar} {pct:5.1f}% \"\n                    f\"[{done}/{overall_total}] \"\n                    f\"avg {avg_time:.1f}s/q \"\n                    f\"elapsed {elapsed:.0f}s \"\n                    f\"ETA {eta:.0f}s \"\n                    f\"(ok:{successful} fail:{len(failed_questions)}\"\n                )\n                if stuck_count:\n                    parts += f\" stuck:{stuck_count}\"\n                if skipped:\n                    parts += f\" skip:{skipped}\"\n                parts += \")\"\n\n                sys.stderr.write(parts)\n                sys.stderr.flush()\n\n            print_progress()\n\n            async def process_question(question_record: QuestionRecord) -> None:\n                nonlocal completed\n                nonlocal successful\n                nonlocal stuck_count\n\n                last_error: Exception | None = None\n                for attempt in range(1, MAX_QUESTION_ATTEMPTS + 1):\n                    q_start = time.monotonic()\n                    try:\n                        async with semaphore:\n                            result = await asyncio.wait_for(\n                                submit_question(\n                                    session=session,\n                                    api_base=api_base,\n                                    headers=headers,\n                                    internal_search_tool_id=internal_search_tool_id,\n                                    question_record=question_record,\n                                ),\n                                timeout=QUESTION_TIMEOUT_SECONDS,\n                            )\n                    except asyncio.TimeoutError:\n                        async with progress_lock:\n                            stuck_count += 1\n                            logger.warning(\n                                \"Question %s timed out after %ss (attempt %s/%s, \"\n                                \"total stuck: %s) — retrying in %ss\",\n                                question_record.question_id,\n                                QUESTION_TIMEOUT_SECONDS,\n                                attempt,\n                                MAX_QUESTION_ATTEMPTS,\n                                stuck_count,\n                                QUESTION_RETRY_PAUSE_SECONDS,\n                            )\n                            print_progress()\n                        last_error = TimeoutError(\n                            f\"Timed out after {QUESTION_TIMEOUT_SECONDS}s \"\n                            f\"on attempt {attempt}/{MAX_QUESTION_ATTEMPTS}\"\n                        )\n                        await asyncio.sleep(QUESTION_RETRY_PAUSE_SECONDS)\n                        continue\n                    except Exception as exc:\n                        duration = time.monotonic() - q_start\n                        async with progress_lock:\n                            completed += 1\n                            question_durations.append(duration)\n                            failed_questions.append(\n                                FailedQuestionRecord(\n                                    question_id=question_record.question_id,\n                                    error=str(exc),\n                                )\n                            )\n                            logger.exception(\n                                \"Failed question %s (%s/%s)\",\n                                question_record.question_id,\n                                completed,\n                                remaining_count,\n                            )\n                            print_progress()\n                        return\n\n                    duration = time.monotonic() - q_start\n\n                    async with write_lock:\n                        file.write(json.dumps(asdict(result), ensure_ascii=False))\n                        file.write(\"\\n\")\n                        file.flush()\n\n                    async with progress_lock:\n                        completed += 1\n                        successful += 1\n                        question_durations.append(duration)\n                        print_progress()\n                    return\n\n                # All attempts exhausted due to timeouts\n                async with progress_lock:\n                    completed += 1\n                    failed_questions.append(\n                        FailedQuestionRecord(\n                            question_id=question_record.question_id,\n                            error=str(last_error),\n                        )\n                    )\n                    logger.error(\n                        \"Question %s failed after %s timeout attempts (%s/%s)\",\n                        question_record.question_id,\n                        MAX_QUESTION_ATTEMPTS,\n                        completed,\n                        remaining_count,\n                    )\n                    print_progress()\n\n            await asyncio.gather(\n                *(process_question(question_record) for question_record in questions)\n            )\n\n            # Final newline after progress bar\n            sys.stderr.write(\"\\n\")\n            sys.stderr.flush()\n\n            total_elapsed = time.monotonic() - run_start_time\n            avg_time = (\n                sum(question_durations) / len(question_durations)\n                if question_durations\n                else 0.0\n            )\n            stuck_suffix = f\", {stuck_count} stuck timeouts\" if stuck_count else \"\"\n            resume_suffix = (\n                f\" — {skipped} previously completed, \"\n                f\"{skipped + successful}/{overall_total} overall\"\n                if skipped\n                else \"\"\n            )\n            logger.info(\n                \"Done: %s/%s successful in %.1fs (avg %.1fs/question%s)%s\",\n                successful,\n                remaining_count,\n                total_elapsed,\n                avg_time,\n                stuck_suffix,\n                resume_suffix,\n            )\n\n            if failed_questions:\n                logger.warning(\n                    \"%s questions failed:\",\n                    len(failed_questions),\n                )\n                for failed_question in failed_questions:\n                    logger.warning(\n                        \"Failed question %s: %s\",\n                        failed_question.question_id,\n                        failed_question.error,\n                    )\n\n\ndef main() -> None:\n    args = parse_args()\n    questions = load_questions(args.questions_file)\n    api_base = normalize_api_base(args.api_base)\n\n    if args.max_questions is not None:\n        if args.max_questions < 1:\n            raise ValueError(\"`--max-questions` must be at least 1 when provided.\")\n        questions = questions[: args.max_questions]\n\n    completed_ids = load_completed_question_ids(args.output_file)\n    logger.info(\n        \"Found %s already-answered question IDs in %s\",\n        len(completed_ids),\n        args.output_file,\n    )\n    total_before_filter = len(questions)\n    questions = [q for q in questions if q.question_id not in completed_ids]\n    skipped = total_before_filter - len(questions)\n\n    if skipped:\n        logger.info(\n            \"Resuming: %s/%s already answered, %s remaining\",\n            skipped,\n            total_before_filter,\n            len(questions),\n        )\n    else:\n        logger.info(\"Loaded %s questions from %s\", len(questions), args.questions_file)\n\n    if not questions:\n        logger.info(\"All questions already answered. Nothing to do.\")\n        return\n\n    logger.info(\"Writing answers to %s\", args.output_file)\n\n    asyncio.run(\n        generate_answers(\n            questions=questions,\n            output_file=args.output_file,\n            api_base=api_base,\n            api_key=args.api_key,\n            parallelism=args.parallelism,\n            skipped=skipped,\n        )\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/save_load_state.py",
    "content": "# This file is purely for development use, not included in any builds\n# Remember to first to send over the schema information (run API Server)\nimport argparse\nimport json\nimport os\nimport subprocess\n\nimport requests\n\nfrom alembic import command\nfrom alembic.config import Config\nfrom onyx.configs.app_configs import POSTGRES_DB\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\ndef save_postgres(filename: str, container_name: str) -> None:\n    logger.notice(\"Attempting to take Postgres snapshot\")\n    cmd = f\"docker exec {container_name} pg_dump -U {POSTGRES_USER} -h {POSTGRES_HOST} -p {POSTGRES_PORT} -W -F t {POSTGRES_DB}\"\n    with open(filename, \"w\") as file:\n        subprocess.run(\n            cmd,\n            shell=True,\n            check=True,\n            stdout=file,\n            text=True,\n            input=f\"{POSTGRES_PASSWORD}\\n\",\n        )\n\n\ndef load_postgres(filename: str, container_name: str) -> None:\n    logger.notice(\"Attempting to load Postgres snapshot\")\n    try:\n        alembic_cfg = Config(\"alembic.ini\")\n        command.upgrade(alembic_cfg, \"head\")\n    except Exception as e:\n        logger.error(f\"Alembic upgrade failed: {e}\")\n\n    host_file_path = os.path.abspath(filename)\n\n    copy_cmd = f\"docker cp {host_file_path} {container_name}:/tmp/\"\n    subprocess.run(copy_cmd, shell=True, check=True)\n\n    container_file_path = f\"/tmp/{os.path.basename(filename)}\"\n\n    restore_cmd = (\n        f\"docker exec {container_name} pg_restore --clean -U {POSTGRES_USER} \"\n        f\"-h localhost -p {POSTGRES_PORT} -d {POSTGRES_DB} -1 -F t {container_file_path}\"\n    )\n    subprocess.run(restore_cmd, shell=True, check=True)\n\n\ndef save_vespa(filename: str) -> None:\n    logger.notice(\"Attempting to take Vespa snapshot\")\n    continuation = \"\"\n    params = {}\n    doc_jsons: list[dict] = []\n    while continuation is not None:\n        if continuation:\n            params = {\"continuation\": continuation}\n        response = requests.get(DOCUMENT_ID_ENDPOINT, params=params)\n        response.raise_for_status()\n        found = response.json()\n        continuation = found.get(\"continuation\")\n        docs = found[\"documents\"]\n        for doc in docs:\n            doc_json = {\"update\": doc[\"id\"], \"create\": True, \"fields\": doc[\"fields\"]}\n            doc_jsons.append(doc_json)\n\n    with open(filename, \"w\") as jsonl_file:\n        for doc in doc_jsons:\n            json_str = json.dumps(doc)\n            jsonl_file.write(json_str + \"\\n\")\n\n\ndef load_vespa(filename: str) -> None:\n    headers = {\"Content-Type\": \"application/json\"}\n    with open(filename, \"r\") as f:\n        for line in f:\n            new_doc = json.loads(line.strip())\n            doc_id = new_doc[\"update\"].split(\"::\")[-1]\n            response = requests.post(\n                DOCUMENT_ID_ENDPOINT + \"/\" + doc_id,\n                headers=headers,\n                json=new_doc,\n            )\n            response.raise_for_status()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Onyx checkpoint saving and loading.\")\n    parser.add_argument(\n        \"--save\", action=\"store_true\", help=\"Save Onyx state to directory.\"\n    )\n    parser.add_argument(\n        \"--load\", action=\"store_true\", help=\"Load Onyx state from save directory.\"\n    )\n    parser.add_argument(\n        \"--postgres_container_name\",\n        type=str,\n        default=\"onyx-relational_db-1\",\n        help=\"Name of the postgres container to dump\",\n    )\n    parser.add_argument(\n        \"--checkpoint_dir\",\n        type=str,\n        default=os.path.join(\"..\", \"onyx_checkpoint\"),\n        help=\"A directory to store temporary files to.\",\n    )\n\n    args = parser.parse_args()\n    checkpoint_dir = args.checkpoint_dir\n    postgres_container = args.postgres_container_name\n\n    if not os.path.exists(checkpoint_dir):\n        os.makedirs(checkpoint_dir)\n\n    if not args.save and not args.load:\n        raise ValueError(\"Must specify --save or --load\")\n\n    if args.load:\n        load_postgres(\n            os.path.join(checkpoint_dir, \"postgres_snapshot.tar\"), postgres_container\n        )\n        load_vespa(os.path.join(checkpoint_dir, \"vespa_snapshot.jsonl\"))\n    else:\n        save_postgres(\n            os.path.join(checkpoint_dir, \"postgres_snapshot.tar\"), postgres_container\n        )\n        save_vespa(os.path.join(checkpoint_dir, \"vespa_snapshot.jsonl\"))\n"
  },
  {
    "path": "backend/scripts/setup_craft_templates.sh",
    "content": "#!/bin/sh\n# Setup Onyx Craft templates\n# This script is called on container startup to ensure Craft templates are ready\n# Set ENABLE_CRAFT=false to skip setup\n\n# Check if Craft is disabled\nif [ \"$ENABLE_CRAFT\" = \"false\" ] || [ \"$ENABLE_CRAFT\" = \"False\" ]; then\n    echo \"Onyx Craft is disabled (ENABLE_CRAFT=false), skipping template setup\"\n    exit 0\nfi\n\nset -e\n\n# Verify opencode CLI is available (installed in Dockerfile)\nif ! command -v opencode >/dev/null 2>&1; then\n    echo \"ERROR: opencode CLI is not available but ENABLE_CRAFT is enabled.\" >&2\n    echo \"opencode is required for Craft agent functionality. Ensure you are using Dockerfile\" >&2\n    echo \"which includes the opencode CLI, or set ENABLE_CRAFT=false to disable Craft.\" >&2\n    exit 1\nfi\n\nCRAFT_BASE=\"/app/onyx/server/features/build/sandbox/kubernetes/docker\"\nDEMO_DATA_ZIP=\"${CRAFT_BASE}/demo_data.zip\"\nDEMO_DATA_DIR=\"${CRAFT_BASE}/demo_data\"\n# Use environment variables if set, otherwise use defaults\nOUTPUTS_TEMPLATE_PATH=\"${OUTPUTS_TEMPLATE_PATH:-${CRAFT_BASE}/templates/outputs}\"\nVENV_TEMPLATE_PATH=\"${VENV_TEMPLATE_PATH:-${CRAFT_BASE}/templates/venv}\"\nWEB_TEMPLATE_PATH=\"${WEB_TEMPLATE_PATH:-${OUTPUTS_TEMPLATE_PATH}/web}\"\nREQUIREMENTS_PATH=\"${CRAFT_BASE}/initial-requirements.txt\"\n\necho \"Setting up Onyx Craft templates...\"\n\n# 1. Unzip demo_data.zip if demo_data directory doesn't exist\nif [ ! -d \"$DEMO_DATA_DIR\" ] && [ -f \"$DEMO_DATA_ZIP\" ]; then\n    echo \"  Extracting demo data...\"\n    cd \"$CRAFT_BASE\" && unzip -q demo_data.zip || { echo \"ERROR: Failed to extract demo data\" >&2; exit 1; }\n    echo \"  Demo data extracted\"\nfi\n\n# 2. Create Python venv template if it doesn't exist\nif [ ! -d \"$VENV_TEMPLATE_PATH\" ] && [ -f \"$REQUIREMENTS_PATH\" ]; then\n    echo \"  Creating Python venv template (this may take 30-60 seconds)...\"\n    python -m venv \"$VENV_TEMPLATE_PATH\"\n    \"$VENV_TEMPLATE_PATH/bin/pip\" install --upgrade pip -q\n    \"$VENV_TEMPLATE_PATH/bin/pip\" install -q -r \"$REQUIREMENTS_PATH\"\n    echo \"  Python venv template created\"\nfi\n\n# 3. Run npm install in web template\nif [ -d \"$WEB_TEMPLATE_PATH\" ]; then\n    if ! command -v npm >/dev/null 2>&1; then\n        echo \"ERROR: npm is not available but ENABLE_CRAFT is enabled.\" >&2\n        echo \"npm is required for Craft web features. Ensure you are using Dockerfile\" >&2\n        echo \"which includes Node.js, or set ENABLE_CRAFT=false to disable Craft.\" >&2\n        exit 1\n    fi\n    # Always remove and reinstall to ensure correct architecture binaries\n    if [ -d \"${WEB_TEMPLATE_PATH}/node_modules\" ]; then\n        echo \"  Removing existing node_modules...\"\n        rm -rf \"${WEB_TEMPLATE_PATH}/node_modules\"\n    fi\n    echo \"  Installing npm packages (this may take 1-2 minutes)...\"\n    cd \"$WEB_TEMPLATE_PATH\" && npm install 2>&1 || { echo \"ERROR: npm install failed\" >&2; exit 1; }\n    echo \"  Web template dependencies installed\"\nfi\n\necho \"Craft template setup complete\"\n"
  },
  {
    "path": "backend/scripts/sources_selection_analysis.py",
    "content": "import argparse\nimport json\nimport os\nimport sys\nimport time\nfrom datetime import datetime\nfrom os import listdir\nfrom os.path import isfile\nfrom os.path import join\nfrom typing import Optional\n\nimport requests\n\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\n\nparent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\nsys.path.append(parent_dir)\n\nfrom onyx.configs.app_configs import DOCUMENT_INDEX_NAME  # noqa: E402\nfrom onyx.configs.constants import SOURCE_TYPE  # noqa: E402\n\nANALYSIS_FOLDER = f\"{parent_dir}/scripts/.analysisfiles/\"\n\n\ndef color_output(\n    text: str,\n    model: Optional[str] = None,\n    text_color: str = \"white\",\n    bg_color: str = \"black\",\n    text_style: str = \"normal\",\n    text_prefix: str = \"\",\n) -> None:\n    \"\"\"Color and print a text\n\n    Args:\n        text (str): The text to display\n        model (str, optional): A pre-defined output model. Defaults to None.\n        text_color (str, optional): Define the text color. Defaults to \"white\".\n        bg_color (str, optional): Define the background color. Defaults to \"black\".\n        text_style (str, optional): Define the text style. Defaults to \"normal\".\n        text_prefix (str, optional): Set a text prefix. Defaults to \"\".\n    \"\"\"\n    if model:\n        if model == \"alert\":\n            text_color = \"black\"\n            bg_color = \"red\"\n            text_style = \"bold\"\n        elif model == \"critical\":\n            text_prefix = \"CRITICAL: \"\n            text_color = \"white\"\n            bg_color = \"red\"\n            text_style = \"bold\"\n        elif model == \"note\":\n            text_color = \"yellow\"\n            bg_color = \"transparent\"\n            text_style = \"normal\"\n        elif model == \"info\":\n            text_prefix = \"INFO:     \"\n            text_color = \"black\"\n            bg_color = \"yellow\"\n            text_style = \"bold\"\n        elif model == \"info2\":\n            text_prefix = \"INFO:     \"\n            text_color = \"black\"\n            bg_color = \"white\"\n            text_style = \"bold\"\n        elif model == \"valid\":\n            text_prefix = \"INFO:     \"\n            text_color = \"white\"\n            bg_color = \"green\"\n            text_style = \"bold\"\n        elif model == \"debug\":\n            text_prefix = \"DEBUG:    \"\n            text_color = \"blue\"\n            bg_color = \"transparent\"\n            text_style = \"bold\"\n\n    text_colors = {\n        \"black\": 30,\n        \"red\": 31,\n        \"green\": 32,\n        \"yellow\": 33,\n        \"blue\": 34,\n        \"purple\": 35,\n        \"cian\": 36,\n        \"white\": 37,\n    }\n    bg_colors = {\n        \"black\": 40,\n        \"red\": 41,\n        \"green\": 42,\n        \"yellow\": 43,\n        \"blue\": 44,\n        \"purple\": 45,\n        \"cian\": 46,\n        \"white\": 47,\n        \"transparent\": 49,\n    }\n    text_styles = {\n        \"normal\": 0,\n        \"bold\": 1,\n        \"light\": 2,\n        \"italicized\": 3,\n        \"underlined\": 4,\n        \"blink\": 5,\n    }\n    print(\n        f\"\\033[{text_styles[text_style]};{text_colors[text_color]};{bg_colors[bg_color]}m {text_prefix} {text} \\033[0;0m\"\n    )\n\n\nclass CompareAnalysis:\n    def __init__(\n        self, query: str, previous_content: dict, new_content: dict, threshold: float\n    ) -> None:\n        \"\"\"Make the comparison between 2 analysis for a specific query\n\n        Args:\n            query (str): The analysed query\n            previous_content (dict): The previous analysis content for the selected query\n            new_content (dict): The new analysis content for the selected query\n            threshold (float): The minimum difference (percentage) between scores to raise an anomaly\n        \"\"\"\n        self._query = query\n        self._previous_content = previous_content\n        self._new_content = new_content\n        self._threshold = threshold\n\n    def _identify_diff(self, content_key: str) -> list[dict]:\n        \"\"\"Try to identify differences between the two analysis based\n            on the selected analysis key.\n\n        Args:\n            content_key (str): The analysis item's key to compare the versions.\n                                Examples: score / document_id\n\n        Returns:\n            list[dict]: List of dict representing the information regarding the difference\n                        Format: {\n                                    \"previous_rank\": XX,\n                                    \"new_rank\": XX,\n                                    \"document_id\": XXXX,\n                                    \"previous_score\": XX,\n                                    \"new_score\": XX,\n                                    \"score_change_pct\": XX\n                                }\n        \"\"\"\n        changes = []\n\n        previous_content = {\n            k: v[content_key] for k, v in self._previous_content.items()\n        }\n        new_content = {k: v[content_key] for k, v in self._new_content.items()}\n\n        if previous_content != new_content:\n            for pos, data in previous_content.items():\n                if data != new_content[pos]:\n                    try:\n                        score_change_pct = round(\n                            (\n                                abs(\n                                    self._new_content[pos][\"score\"]\n                                    - self._previous_content[pos][\"score\"]\n                                )\n                                / self._new_content[pos][\"score\"]\n                            )\n                            * 100.0,\n                            2,\n                        )\n                    except ZeroDivisionError:\n                        score_change_pct = 0\n\n                    changes.append(\n                        {\n                            \"previous_rank\": pos,\n                            \"new_rank\": (\n                                pos\n                                if content_key == \"score\"\n                                else {\n                                    \"x\": k for k, v in new_content.items() if v == data\n                                }.get(\"x\", \"not_ranked\")\n                            ),\n                            \"document_id\": self._previous_content[pos][\"document_id\"],\n                            \"previous_score\": self._previous_content[pos][\"score\"],\n                            \"new_score\": self._new_content[pos][\"score\"],\n                            \"score_change_pct\": score_change_pct,\n                        }\n                    )\n        return changes\n\n    def check_config_changes(\n        self, previous_doc_rank: int | str, new_doc_rank: int\n    ) -> None:\n        \"\"\"Try to identify possible reasons why a change has been detected by\n            checking the latest document update date or the boost value.\n\n        Args:\n            previous_doc_rank (int): The document rank for the previous analysis\n            new_doc_rank (int): The document rank for the new analysis\n        \"\"\"\n        if isinstance(new_doc_rank, str) and new_doc_rank == \"not_ranked\":\n            color_output(\n                (\n                    \"NOTE: The document is missing in the 'current' analysis file. \"\n                    \"Unable to identify more details about the reason for the change.\"\n                ),\n                model=\"note\",\n            )\n            return None\n\n        if (\n            self._previous_content[previous_doc_rank][\"boost\"]\n            != self._new_content[new_doc_rank][\"boost\"]\n        ):\n            color_output(\n                \"NOTE: The 'boost' value has been changed which (maybe) explains the change.\",\n                model=\"note\",\n            )\n            color_output(\n                (\n                    f\"Previously it was '{self._previous_content[previous_doc_rank]['boost']}' \"\n                    f\"and now is set to '{self._new_content[new_doc_rank]['boost']}'\"\n                ),\n                model=\"note\",\n            )\n        if (\n            self._previous_content[previous_doc_rank][\"updated_at\"]\n            != self._new_content[new_doc_rank][\"updated_at\"]\n        ):\n            color_output(\"NOTE: The document seems to have been updated.\", model=\"note\")\n            color_output(\n                (\n                    f\"Previously the updated date was '{self._previous_content[previous_doc_rank]['updated_at']}' \"\n                    f\"and now is '{self._new_content[new_doc_rank]['updated_at']}'\"\n                ),\n                model=\"note\",\n            )\n\n    def check_documents_score(self) -> bool:\n        \"\"\"Check if the scores have changed between analysis.\n\n        Returns:\n            bool: True if at least one change has been detected. False otherwise.\n        \"\"\"\n        color_output(\"Checking documents Score....\", model=\"info\")\n        color_output(\n            f\"Differences under '{self._threshold}%' are ignored (based on the '--threshold' argument)\",\n            model=\"info\",\n        )\n\n        if diff := [\n            x\n            for x in self._identify_diff(\"score\")\n            if x[\"score_change_pct\"] > self._threshold\n        ]:\n            color_output(\"<<<<< Changes detected >>>>>\", model=\"alert\")\n            for change in diff:\n                color_output(\"-\" * 100)\n                color_output(\n                    (\n                        f\"The document '{change['document_id']}' (rank: {change['previous_rank']}) \"\n                        f\"score has a changed of {change['score_change_pct']}%\"\n                    )\n                )\n                color_output(f\"previous score: {change['previous_score']}\")\n                color_output(f\"current score:  {change['new_score']}\")\n                self.check_config_changes(change[\"previous_rank\"], change[\"new_rank\"])\n\n            color_output(\"<<<<< End of changes >>>>>\", model=\"alert\")\n            color_output(f\"Number of changes detected {len(diff)}\", model=\"info\")\n        else:\n            color_output(\"No change detected\", model=\"valid\")\n        color_output(\"Documents Score check completed.\", model=\"info\")\n\n        return False if diff else True\n\n    def check_documents_order(self) -> bool:\n        \"\"\"Check if the selected documents are the same and in the same order.\n\n        Returns:\n            bool: True if at least one change has been detected. False otherwise.\n        \"\"\"\n        color_output(\"Checking documents Order....\", model=\"info\")\n\n        if diff := self._identify_diff(\"document_id\"):\n            color_output(\"<<<<< Changes detected >>>>>\", model=\"alert\")\n            for change in diff:\n                color_output(\"-\" * 100)\n                color_output(\n                    (\n                        f\"The document '{change['document_id']}' was at a rank \"\n                        f\"'{change['previous_rank']}' but now is at rank '{change['new_rank']}'\"\n                    )\n                )\n                color_output(f\"previous score: {change['previous_score']}\")\n                color_output(f\"current score:  {change['new_score']}\")\n                self.check_config_changes(change[\"previous_rank\"], change[\"new_rank\"])\n            color_output(\"<<<<< End of changes >>>>>\", model=\"alert\")\n            color_output(f\"Number of changes detected {len(diff)}\", model=\"info\")\n\n        else:\n            color_output(\"No change detected\", model=\"valid\")\n        color_output(\"Documents order check completed.\", model=\"info\")\n\n        return False if diff else True\n\n    def __call__(self) -> None:\n        \"\"\"Manage the analysis process\"\"\"\n        if not self.check_documents_order():\n            color_output(\n                \"Skipping other checks as the documents order has changed\", model=\"info\"\n            )\n            return None\n\n        self.check_documents_score()\n\n\nclass SelectionAnalysis:\n    def __init__(\n        self,\n        exectype: str,\n        analysisfiles: list = [],\n        queries: list = [],\n        threshold: float = 0.0,\n        web_port: int = 3000,\n        auth_cookie: str = \"\",\n        wait: int = 10,\n    ) -> None:\n        \"\"\"\n\n        Args:\n            exectype (str): The execution mode (new or compare)\n            analysisfiles (list, optional): List of analysis files to compare or if only one, to use as the base. Defaults to [].\n                                        Requiered only by the 'compare' mode\n            queries (list, optional): The queries to analysed. Defaults to [].\n                                        Required only by the 'new' mode\n            threshold (float, optional): The minimum difference (percentage) between scores to raise an anomaly\n            web_port (int, optional): The port of the UI. Defaults to 3000 (local exec port)\n            auth_cookie (str, optional): The Auth cookie value (fastapiusersauth). Defaults to None.\n            wait (int, optional): The waiting time (in seconds) to respect between queries.\n                                    It is helpful to avoid hitting the Generative AI rate limiting.\n        \"\"\"\n        self._exectype = exectype\n        self._analysisfiles = analysisfiles\n        self._queries = queries\n        self._threshold = threshold\n        self._web_port = web_port\n        self._auth_cookie = auth_cookie\n        self._wait = wait\n\n    def _wait_between_queries(self, query: str) -> None:\n        \"\"\"If there are remaining queries, waits for the defined time.\n\n        Args:\n            query (str): The latest executed query\n        \"\"\"\n        if query != self._queries[-1]:\n            color_output(f\"Next query in {self._wait} seconds\", model=\"debug\")\n            time.sleep(self._wait)\n\n    def prepare(self) -> bool:\n        \"\"\"Create the requirements to execute this script\n\n        Returns:\n            bool: True if all the requirements are setup. False otherwise\n        \"\"\"\n        try:\n            os.makedirs(ANALYSIS_FOLDER, exist_ok=True)\n            return True\n        except Exception as e:\n            color_output(f\"Unable to setup the requirements: {e}\", model=\"critical\")\n            return False\n\n    def do_request(self, query: str) -> dict:\n        \"\"\"Request the Onyx API\n\n        Args:\n            query (str): A query\n\n        Returns:\n            dict: The Onyx API response content\n        \"\"\"\n        cookies = (\n            {FASTAPI_USERS_AUTH_COOKIE_NAME: self._auth_cookie}\n            if self._auth_cookie\n            else {}\n        )\n\n        endpoint = f\"http://127.0.0.1:{self._web_port}/api/direct-qa\"\n        query_json = {\n            \"query\": query,\n            \"collection\": DOCUMENT_INDEX_NAME,\n            \"filters\": {SOURCE_TYPE: None},\n            \"enable_auto_detect_filters\": True,\n            \"search_type\": \"hybrid\",\n            \"offset\": 0,\n            \"favor_recent\": True,\n        }\n        try:\n            response = requests.post(endpoint, json=query_json, cookies=cookies)\n            if response.status_code != 200:\n                color_output(\n                    (\n                        f\"something goes wrong while requesting the Onyx API for the query '{query}': {response.text}\"\n                    ),\n                    model=\"critical\",\n                )\n                sys.exit(1)\n        except Exception as e:\n            color_output(\n                f\"Unable to request the Onyx API for the query '{query}': {e}\",\n                model=\"critical\",\n            )\n            sys.exit(1)\n\n        return json.loads(response.content)\n\n    def get_analysis_files(self) -> list[str]:\n        \"\"\"Returns the list of existing analysis files.\n\n        Returns:\n            list[str]: List of filename\n        \"\"\"\n        return [f for f in listdir(ANALYSIS_FOLDER) if isfile(join(ANALYSIS_FOLDER, f))]\n\n    def get_analysis_file_content(self, filename: str) -> list[dict]:\n        \"\"\"Returns the content of an analysis file\n\n        Args:\n            filename (str): The analysis filename\n\n        Returns:\n            list[dict]: Content of the selected file\n        \"\"\"\n        with open(f\"{ANALYSIS_FOLDER}{filename}\", \"r\") as f:\n            return json.load(f)\n\n    def extract_content(self, contents: dict) -> dict:\n        \"\"\"Extract the content returns by the Onyx API\n\n        Args:\n            contents (dict): The onyx response content\n\n        Returns:\n            dict: Data regarding the selected sources document\n        \"\"\"\n        return {\n            pos: doc\n            for pos, doc in enumerate(\n                sorted(\n                    contents[\"top_ranked_docs\"], key=lambda d: d[\"score\"], reverse=True\n                )[:5]\n            )\n        }\n\n    def save_analysisfile(self, content: list[dict]) -> Optional[str]:\n        \"\"\"Save the extracted content\n\n        Args:\n            content (list[dict]): The content to save\n\n        Returns:\n            str: The filname\n        \"\"\"\n        filename = datetime.now().strftime(\"%Y_%m_%d-%I_%M_%S\")\n        analysis_file = f\"{ANALYSIS_FOLDER}{filename}.json\"\n\n        try:\n            with open(analysis_file, \"w\") as f:\n                json.dump(content, f, indent=4)\n        except Exception as e:\n            color_output(f\"Unable to create the analysis file: {e}\", model=\"critical\")\n            return None\n\n        color_output(f\"Analysis file created: {analysis_file}\", model=\"debug\")\n        return analysis_file\n\n    def new(self) -> Optional[str]:\n        \"\"\"Manage the process to create a new analysis file\n            based on the submitted queries\n\n        Returns:\n            str: The new filename with the analysis content\n        \"\"\"\n        if not self._queries:\n            color_output(\"Missing queries\", model=\"critical\")\n            sys.exit(1)\n\n        color_output(\"Generating a new analysis file...\", model=\"debug\")\n        analysisfile = []\n\n        for query in self._queries:\n            color_output(f\"Gathering data of the query: '{query}'\", model=\"info2\")\n            contents = self.do_request(query)\n\n            analysisfile.append(\n                {\"query\": query, \"selected_documents\": self.extract_content(contents)}\n            )\n            color_output(\"Data gathered\", model=\"info2\")\n            self._wait_between_queries(query)\n\n        return self.save_analysisfile(analysisfile)\n\n    def compare(\n        self,\n        previous_analysisfile_content: list[dict],\n        new_analysisfile_content: list[dict],\n    ) -> None:\n        \"\"\"Manage the process to compare two analysis\n\n        Args:\n            previous_analysisfile_content (list): Previous content analysis\n            new_analysisfile_content (list): New content analysis\n        \"\"\"\n        for query in self._queries:\n            # Extract data regarding the selected source documents\n            prev_querie_content = [\n                x for x in previous_analysisfile_content if x[\"query\"] == query\n            ][0][\"selected_documents\"]\n            new_querie_content = [\n                x for x in new_analysisfile_content if x[\"query\"] == query\n            ][0][\"selected_documents\"]\n\n            color_output(f\"Analysing the query: '{query}'\", model=\"info2\")\n            CompareAnalysis(\n                query, prev_querie_content, new_querie_content, self._threshold\n            )()\n            color_output(f\"Analyse completed for the query: '{query}'\", model=\"info2\")\n            self._wait_between_queries(query)\n\n        color_output(\"All the defined queries have been evaluated.\", model=\"info2\")\n\n    def validate_analysisfiles(self) -> bool:\n        \"\"\"Validate that the selected analysis files exist\n\n        Returns:\n            bool: True if all of them exist. False otherwise\n        \"\"\"\n        existing_analysisfiles = self.get_analysis_files()\n\n        if missing_analysisfiles := [\n            x for x in self._analysisfiles if x not in existing_analysisfiles\n        ]:\n            color_output(\n                f\"Missing analysis file(s) '{', '.join(missing_analysisfiles)}' - NOT FOUND\",\n                model=\"critical\",\n            )\n            analysisfiles = \"\\n \".join(existing_analysisfiles)\n            color_output(\"Available analysis files:\", model=\"info2\")\n            color_output(analysisfiles)\n            return False\n\n        return True\n\n    def __call__(self) -> None:\n        if not self.prepare():\n            sys.exit(1)\n\n        if self._exectype == \"new\":\n            self.new()\n\n        elif self._exectype == \"compare\":\n            self._analysisfiles = [\n                x.replace(\".json\", \"\") + \".json\" for x in self._analysisfiles\n            ]\n\n            if not self.validate_analysisfiles():\n                sys.exit(1)\n\n            color_output(\n                \"Extracting queries from the existing analysis file...\", model=\"debug\"\n            )\n            previous_analysisfile_content = self.get_analysis_file_content(\n                self._analysisfiles[0]\n            )\n\n            # Extract the queries\n            self._queries = sorted([x[\"query\"] for x in previous_analysisfile_content])\n            color_output(\n                f\"Extracted queries: {', '.join(self._queries)}\", model=\"debug\"\n            )\n\n            if len(self._analysisfiles) == 1:\n                if new_file := self.new():\n                    new_analysisfile_content = self.get_analysis_file_content(\n                        new_file.split(\"/\")[-1:][0]\n                    )\n                    return self.compare(\n                        previous_analysisfile_content, new_analysisfile_content\n                    )\n                else:\n                    color_output(\n                        \"Unable to generate a new analysis file\", model=\"critical\"\n                    )\n                    sys.exit(1)\n            else:\n                color_output(\n                    (\n                        f\"For the rest of this execution, the analysis file '{self._analysisfiles[0]}' \"\n                        f\"is identified as 'previous' and '{self._analysisfiles[1]}' as 'current'\"\n                    ),\n                    model=\"info2\",\n                )\n                new_analysisfile_content = self.get_analysis_file_content(\n                    self._analysisfiles[1]\n                )\n                new_queries = sorted([x[\"query\"] for x in new_analysisfile_content])\n                if new_queries != self._queries:\n                    color_output(\n                        \"Unable to compare analysis files as the queries are differents\",\n                        model=\"critical\",\n                    )\n                    sys.exit(1)\n                self.compare(previous_analysisfile_content, new_analysisfile_content)\n\n\ndef validate_cmd_args(args: argparse.Namespace) -> bool:\n    \"\"\"Validate the CMD arguments\n\n    Args:\n        args (argparse.Namespace): The argparse data input\n\n    Returns:\n        bool: True if the CMD arguments are valid. False otherwise\n    \"\"\"\n    if not args.execution:\n        color_output(\n            \"Missing argument. The execution mode ('--execution') must be defined ('new' or 'compare')\",\n            model=\"critical\",\n        )\n        return False\n    if args.execution == \"new\" and not args.q__queries:\n        color_output(\n            \"Missing argument. When the execution type is set to 'new' the '--queries' argument must be defined\",\n            model=\"critical\",\n        )\n        return False\n    elif args.execution == \"compare\":\n        if not args.files:\n            color_output(\n                \"Missing argument. When the execution type is set to 'compare' the '--files' argument must be defined\",\n                model=\"critical\",\n            )\n            return False\n        elif len(args.files) > 2:\n            color_output(\n                \"Too many arguments. The '--files' argument cannot be repeated more than 2 times.\",\n                model=\"critical\",\n            )\n            return False\n    return True\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"-a\",\n        \"--auth\",\n        type=str,\n        default=None,\n        help=(\n            \"Currently, to get this script working when the Onyx Auth is \"\n            \"enabled, you must extract from the UI your cookie 'fastapiusersauth' \"\n            \"and then set it using this argument\"\n        ),\n    )\n    parser.add_argument(\n        \"-e\",\n        \"--execution\",\n        type=str,\n        choices=[\"new\", \"compare\"],\n        default=None,\n        help=(\n            \"The execution type. Must be 'new' to generate a new analysis file \"\n            \"or 'compare' to compare a previous execution with a new one based on the same queries\"\n        ),\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--files\",\n        action=\"extend\",\n        default=[],\n        nargs=1,\n        help=(\n            \"Analysis file(s) to use for the comparison. Required if the execution arg is set \"\n            \"to 'compare'. NOTE: By repeating this argument, you can make a comparison between \"\n            \"two specific executions. If not repeated, a new execution will be performed and \"\n            \"compared with the selected one.\"\n        ),\n    )\n    parser.add_argument(\n        \"-p\",\n        \"--port\",\n        type=int,\n        default=3000,\n        help=(\n            \"The Onyx Web (not the API) port. We use the UI to forward the requests to the API. \"\n            \"It should be '3000' for local dev and '80' if Onyx runs using docker compose.\"\n        ),\n    )\n    parser.add_argument(\n        \"-q--queries\",\n        type=str,\n        action=\"extend\",\n        default=[],\n        nargs=1,\n        help=(\n            \"The query to evaluate. Required if the execution arg is set to 'new'. \"\n            \"NOTE: This argument can be repeated multiple times\"\n        ),\n    )\n    parser.add_argument(\n        \"-t\",\n        \"--threshold\",\n        type=float,\n        default=0.0,\n        help=\"The minimum score change (percentage) to detect an issue.\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--wait\",\n        type=int,\n        default=10,\n        help=(\n            \"The waiting time (in seconds) to respect between queries. \"\n            \"It is helpful to avoid hitting the Generative AI rate limiting.\"\n        ),\n    )\n\n    args = parser.parse_args()\n    if not validate_cmd_args(args):\n        sys.exit(1)\n\n    SelectionAnalysis(\n        args.execution,\n        args.files,\n        args.q__queries,\n        args.threshold,\n        args.port,\n        args.auth,\n        args.wait,\n    )()\n"
  },
  {
    "path": "backend/scripts/supervisord_entrypoint.sh",
    "content": "#!/bin/sh\n# Entrypoint script for supervisord\n\n# Launch supervisord with environment variables available\nexec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/QUICK_START_NO_BASTION.md",
    "content": "# Quick Start: Tenant Cleanup Without Bastion\n\n## TL;DR - The Commands You Need\n\n```bash\n# Navigate to backend directory\ncd onyx/backend\n\n# Step 1: Generate CSV of tenants to clean (5-10 min)\nPYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py\n\n# Step 2: Mark connectors for deletion (1-2 min)\nPYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py \\\n  --csv gated_tenants_no_query_3mo_*.csv \\\n  --force \\\n  --concurrency 16\n\n# ⏰ WAIT 6+ hours for background deletion to complete\n\n# Step 3: Final cleanup (1-2 min)\nPYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py \\\n  --csv gated_tenants_no_query_3mo_*.csv \\\n  --force\n```\n\n## What Changed?\n\nInstead of the original scripts that require bastion access:\n- `analyze_current_tenants.py` → `no_bastion_analyze_tenants.py`\n- `mark_connectors_for_deletion.py` → `no_bastion_mark_connectors.py`\n- `cleanup_tenants.py` → `no_bastion_cleanup_tenants.py`\n\n**No environment variables needed!** All queries run directly from pods.\n\n## What You Need\n\n✅ `kubectl` access to your cluster\n✅ Running `celery-worker-user-file-processing` pods\n✅ Permission to exec into pods\n\n❌ No bastion host required\n❌ No SSH keys required\n❌ No environment variables required\n\n## Test Your Setup\n\n```bash\n# Check if you can find worker pods\nkubectl get po | grep celery-worker-user-file-processing | grep Running\n\n# If you see pods, you're ready to go!\n```\n\n## Important Notes\n\n1. **Step 2 triggers background deletion** - the actual document deletion happens asynchronously via Celery workers\n2. **You MUST wait** between Step 2 and Step 3 for deletion to complete (can take 6+ hours)\n3. **Monitor deletion progress** with: `kubectl logs -f <celery-worker-pod>`\n4. **All scripts verify tenant status** - they'll refuse to process active (non-GATED_ACCESS) tenants\n\n## Files Generated\n\n- `gated_tenants_no_query_3mo_YYYYMMDD_HHMMSS.csv` - List of tenants to clean\n- `cleaned_tenants.csv` - Successfully cleaned tenants with timestamps\n\n## Safety First\n\nThe scripts include multiple safety checks:\n- ✅ Verifies tenant status before any operation\n- ✅ Checks documents are deleted before dropping schemas\n- ✅ Prompts for confirmation on dangerous operations (unless `--force`)\n- ✅ Records all successful operations in real-time\n\n## Need More Details?\n\nSee [NO_BASTION_README.md](./NO_BASTION_README.md) for:\n- Detailed explanations of each step\n- Troubleshooting guide\n- How it works under the hood\n- Performance characteristics\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/README.md",
    "content": "## How to Tenant Cleanup\n\nThree main steps.\n\n### Build a list of tenants to cleanup\n\nUse the `analyze_current_tenants.py` script:\n\n```\nPYTHONPATH=. \\\nCONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \\\nCONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \\\nBASTION_HOST=<BASTION_IP_ADDRESS> \\\nPEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \\\npython scripts/tenant_cleanup/analyze_current_tenants.py\n```\n\nThis will create a `.csv` called something like `gated_tenants_no_query_3mo_20251012_161102.csv` in the `backend` dir.\n\n\n### Delete all documents within these tenants\n\nUse the `mark_connectors_for_deletion.py` script:\n\n```\nPYTHONPATH=. \\\nCONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \\\nCONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \\\nBASTION_HOST=<BASTION_IP_ADDRESS> \\\nPEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \\\npython scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo_<your_datetime>.csv --force\n```\n\nReplace `gated_tenants_no_query_3mo_<your_datetime>.csv` with the CSV name from step (1).\n\nThis will update the data plane database to 1/ cancel all index attempts 2/ mark all connectors as up for deletion.\nWe now need to wait for the deletion to run.\n\nIt's done this way to re-use as much of the existing code + take advantage of existing infra for parallelized, long running jobs. These \ndeletion jobs can take a LONG time (>6hrs), so having it performed syncronously by a script is not really tenable.\n\n\n### Cleanup the tenants\n\nUse the `cleanup_tenants.py` script:\n\n```\nPYTHONPATH=. \\\nCONTROL_PLANE_RDS_HOST=<PROD_CONTROL_PLANE_RDS_HOST> \\\nCONTROL_PLANE_RDS_PASSWORD=<PROD_CONTROL_PLANE_RDS_PASSWORD> \\\nBASTION_HOST=<BASTION_IP_ADDRESS> \\\nPEM_FILE_LOCATION=<PEM_FILE_LOCATION_WHICH_GIVES_ACCESS_TO_BASTION> \\\npython scripts/tenant_cleanup/cleanup_tenants.py --csv gated_tenants_no_query_3mo_<your_datetime>.csv --force\n```\n\nThis will drop the tenant schema from the data plane DB, cleanup the `user_tenant_mapping` table, and \nclean up any control plane DB tables associated with each tenant.\n\nNOTE: if the previous step has not completed, tenants with documents will throw an exception.\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/analyze_current_tenants.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nFull tenant analysis script that:\n1. Finds a heavy worker pod\n2. Runs the tenant data collection script on the pod\n3. Analyzes the collected data\n\"\"\"\n\nimport argparse\nimport csv\nimport json\nimport os\nimport subprocess\nimport sys\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\n\nfrom scripts.tenant_cleanup.cleanup_utils import find_worker_pod\n\n\ndef collect_tenant_data(pod_name: str) -> list[dict[str, Any]]:\n    \"\"\"Run the understand_tenants script on the pod and return the data.\"\"\"\n    print(f\"\\nCollecting tenant data from pod {pod_name}...\")\n\n    # Get the path to the understand_tenants script\n    script_dir = Path(__file__).parent\n    understand_tenants_script = script_dir / \"on_pod_scripts\" / \"understand_tenants.py\"\n\n    if not understand_tenants_script.exists():\n        raise FileNotFoundError(\n            f\"understand_tenants.py not found at {understand_tenants_script}\"\n        )\n\n    # Copy script to pod\n    print(\"Copying script to pod...\")\n    subprocess.run(\n        [\n            \"kubectl\",\n            \"cp\",\n            str(understand_tenants_script),\n            f\"{pod_name}:/tmp/understand_tenants.py\",\n        ],\n        check=True,\n        capture_output=True,\n    )\n\n    # Execute script on pod\n    print(\"Executing script on pod (this may take a while)...\")\n    result = subprocess.run(\n        [\"kubectl\", \"exec\", pod_name, \"--\", \"python\", \"/tmp/understand_tenants.py\"],\n        capture_output=True,\n        text=True,\n        check=True,\n    )\n\n    # Show progress messages from stderr\n    if result.stderr:\n        print(result.stderr, file=sys.stderr)\n\n    # Parse JSON from stdout\n    try:\n        tenant_data = json.loads(result.stdout)\n        print(f\"Successfully collected data for {len(tenant_data)} tenants\")\n        return tenant_data\n    except json.JSONDecodeError as e:\n        print(f\"Failed to parse JSON output: {e}\", file=sys.stderr)\n        print(f\"stdout: {result.stdout[:500]}\", file=sys.stderr)\n        raise\n\n\ndef collect_control_plane_data() -> list[dict[str, Any]]:\n    \"\"\"Collect control plane data from the control plane database.\"\"\"\n    print(\"\\nCollecting control plane data...\")\n\n    rds_host = os.environ.get(\"CONTROL_PLANE_RDS_HOST\")\n    if not rds_host:\n        raise ValueError(\"CONTROL_PLANE_RDS_HOST is not set\")\n\n    rds_password = os.environ.get(\"CONTROL_PLANE_RDS_PASSWORD\")\n    if not rds_password:\n        raise ValueError(\"CONTROL_PLANE_RDS_PASSWORD is not set\")\n\n    db_url = f\"postgresql://postgres:{rds_password}@{rds_host}:5432/control\"\n\n    bastion_host = os.environ.get(\"BASTION_HOST\")\n    if not bastion_host:\n        raise ValueError(\"BASTION_HOST is not set\")\n\n    pem_file_location = os.environ.get(\"PEM_FILE_LOCATION\")\n    if not pem_file_location:\n        raise ValueError(\"PEM_FILE_LOCATION is not set\")\n\n    full_cmd = (\n        f\"ssh -i {pem_file_location} ec2-user@{bastion_host} \"\n        f\"\\\"psql {db_url} -c '\\\\copy (SELECT * FROM tenant) \"\n        f\"to '/tmp/control_plane_data.csv' with (format csv);'\\\"\"\n    )\n\n    result = subprocess.run(\n        full_cmd,\n        shell=True,\n        check=True,\n        capture_output=True,\n        text=True,\n    )\n\n    # Copy the CSV file from the bastion to local machine\n    copy_cmd = f\"scp -i {pem_file_location} ec2-user@{bastion_host}:/tmp/control_plane_data.csv .\"\n\n    copy_result = subprocess.run(\n        copy_cmd, shell=True, check=True, capture_output=True, text=True\n    )\n\n    if copy_result.stderr:\n        print(f\"Copy warnings: {copy_result.stderr}\", file=sys.stderr)\n        raise RuntimeError(\n            \"Failed to copy control plane data from bastion to local machine\"\n        )\n\n    print(\"Control plane data copied to local machine as control_plane_data.csv\")\n\n    print(result.stdout)\n\n    # Read the CSV file and convert to list of dictionaries\n    control_plane_data = []\n    with open(\"control_plane_data.csv\", \"r\", newline=\"\", encoding=\"utf-8\") as csvfile:\n        reader = csv.DictReader(csvfile)\n        for row in reader:\n            control_plane_data.append(row)\n\n    return control_plane_data\n\n\ndef analyze_tenants(\n    tenants: list[dict[str, Any]], control_plane_data: list[dict[str, Any]]\n) -> list[dict[str, Any]]:\n    \"\"\"Analyze tenant activity data and return gated tenants with no query in last 3 months.\"\"\"\n\n    print(f\"\\n{'=' * 80}\")\n    print(f\"TENANT ANALYSIS REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print(f\"{'=' * 80}\")\n    print(f\"Total tenants analyzed: {len(tenants)}\\n\")\n\n    # Create a lookup dict for control plane data by tenant_id\n    control_plane_lookup = {}\n    for row in control_plane_data:\n        # CSV has no header, columns are: tenant_id, stripe_customer_id, created_at,\n        # stripe_subscription_quantity, contact_email, registration_origin, tenant_status\n        if len(row) >= 7:\n            tenant_id = list(row.values())[0]  # First column is tenant_id\n            tenant_status = list(row.values())[6]  # 7th column is tenant_status\n            control_plane_lookup[tenant_id] = tenant_status\n\n    # Calculate cutoff dates\n    one_month_cutoff = datetime.now(timezone.utc) - timedelta(days=30)\n    three_month_cutoff = datetime.now(timezone.utc) - timedelta(days=90)\n\n    # Categorize tenants into 4 groups\n    gated_no_query_3_months = []  # GATED_ACCESS + no query in last 3 months\n    gated_query_1_3_months = []  # GATED_ACCESS + query between 1-3 months\n    gated_query_1_month = []  # GATED_ACCESS + query in last 1 month\n    everyone_else = []  # All other tenants\n\n    for tenant in tenants:\n        tenant_id = tenant.get(\"tenant_id\")\n        last_query_time = tenant.get(\"last_query_time\")\n        tenant_status = control_plane_lookup.get(tenant_id, \"UNKNOWN\")\n\n        is_gated = tenant_status == \"GATED_ACCESS\"\n\n        # Parse last query time\n        if last_query_time:\n            query_time = datetime.fromisoformat(last_query_time.replace(\"Z\", \"+00:00\"))\n        else:\n            query_time = None\n\n        # Categorize\n        if is_gated:\n            if query_time is None or query_time <= three_month_cutoff:\n                gated_no_query_3_months.append(tenant)\n            elif query_time <= one_month_cutoff:\n                gated_query_1_3_months.append(tenant)\n            else:  # query_time > one_month_cutoff\n                gated_query_1_month.append(tenant)\n        else:\n            everyone_else.append(tenant)\n\n    # Calculate document counts for each group\n    gated_no_query_docs = sum(\n        t.get(\"num_documents\", 0) for t in gated_no_query_3_months\n    )\n    gated_1_3_month_docs = sum(\n        t.get(\"num_documents\", 0) for t in gated_query_1_3_months\n    )\n    gated_1_month_docs = sum(t.get(\"num_documents\", 0) for t in gated_query_1_month)\n    everyone_else_docs = sum(t.get(\"num_documents\", 0) for t in everyone_else)\n\n    print(\"=\" * 80)\n    print(\"TENANT CATEGORIZATION BY GATED ACCESS STATUS AND ACTIVITY\")\n    print(\"=\" * 80)\n\n    print(\"\\n1. GATED_ACCESS + No query in last 3 months:\")\n    print(f\"   Count: {len(gated_no_query_3_months):,}\")\n    print(f\"   Total documents: {gated_no_query_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_no_query_docs / len(gated_no_query_3_months) if gated_no_query_3_months else 0:.2f}\"\n    )\n\n    print(\"\\n2. GATED_ACCESS + Query between 1-3 months ago:\")\n    print(f\"   Count: {len(gated_query_1_3_months):,}\")\n    print(f\"   Total documents: {gated_1_3_month_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_1_3_month_docs / len(gated_query_1_3_months) if gated_query_1_3_months else 0:.2f}\"\n    )\n\n    print(\"\\n3. GATED_ACCESS + Query in last 1 month:\")\n    print(f\"   Count: {len(gated_query_1_month):,}\")\n    print(f\"   Total documents: {gated_1_month_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_1_month_docs / len(gated_query_1_month) if gated_query_1_month else 0:.2f}\"\n    )\n\n    print(\"\\n4. Everyone else (non-GATED_ACCESS):\")\n    print(f\"   Count: {len(everyone_else):,}\")\n    print(f\"   Total documents: {everyone_else_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {everyone_else_docs / len(everyone_else) if everyone_else else 0:.2f}\"\n    )\n\n    total_docs = (\n        gated_no_query_docs\n        + gated_1_3_month_docs\n        + gated_1_month_docs\n        + everyone_else_docs\n    )\n    print(f\"\\nTotal documents across all tenants: {total_docs:,}\")\n\n    # Top 100 tenants by document count\n    print(\"\\n\" + \"=\" * 80)\n    print(\"TOP 100 TENANTS BY DOCUMENT COUNT\")\n    print(\"=\" * 80)\n\n    # Sort all tenants by document count\n    sorted_tenants = sorted(\n        tenants, key=lambda t: t.get(\"num_documents\", 0), reverse=True\n    )\n\n    top_100 = sorted_tenants[:100]\n\n    print(\n        f\"\\n{'Rank':<6} {'Tenant ID':<45} {'Documents':>12} {'Users':>8} {'Last Query':<12} {'Group'}\"\n    )\n    print(\"-\" * 130)\n\n    for idx, tenant in enumerate(top_100, 1):\n        tenant_id = tenant.get(\"tenant_id\", \"Unknown\")\n        num_docs = tenant.get(\"num_documents\", 0)\n        num_users = tenant.get(\"num_users\", 0)\n        last_query = tenant.get(\"last_query_time\", \"Never\")\n        tenant_status = control_plane_lookup.get(tenant_id, \"UNKNOWN\")\n\n        # Format the last query time\n        if last_query and last_query != \"Never\":\n            try:\n                query_dt = datetime.fromisoformat(last_query.replace(\"Z\", \"+00:00\"))\n                last_query_str = query_dt.strftime(\"%Y-%m-%d\")\n            except Exception:\n                last_query_str = last_query[:10] if len(last_query) > 10 else last_query\n        else:\n            last_query_str = \"Never\"\n\n        # Determine group\n        if tenant_status == \"GATED_ACCESS\":\n            if last_query and last_query != \"Never\":\n                query_time = datetime.fromisoformat(last_query.replace(\"Z\", \"+00:00\"))\n                if query_time <= three_month_cutoff:\n                    group = \"Gated - No query (3mo)\"\n                elif query_time <= one_month_cutoff:\n                    group = \"Gated - Query (1-3mo)\"\n                else:\n                    group = \"Gated - Query (1mo)\"\n            else:\n                group = \"Gated - No query (3mo)\"\n        else:\n            group = f\"Other ({tenant_status})\"\n\n        print(\n            f\"{idx:<6} {tenant_id:<45} {num_docs:>12,} {num_users:>8} {last_query_str:<12} {group}\"\n        )\n\n    # Summary stats for top 100\n    top_100_docs = sum(t.get(\"num_documents\", 0) for t in top_100)\n\n    print(\"\\n\" + \"-\" * 110)\n    print(f\"Top 100 total documents: {top_100_docs:,}\")\n    print(\n        f\"Percentage of all documents: {(top_100_docs / total_docs * 100) if total_docs > 0 else 0:.2f}%\"\n    )\n\n    # Additional insights\n    print(\"\\n\" + \"=\" * 80)\n    print(\"ADDITIONAL INSIGHTS\")\n    print(\"=\" * 80)\n\n    # Tenants with no documents\n    no_docs = [t for t in tenants if t.get(\"num_documents\", 0) == 0]\n    print(\n        f\"\\nTenants with 0 documents: {len(no_docs):,} ({len(no_docs) / len(tenants) * 100:.2f}%)\"\n    )\n\n    # Tenants with no users\n    no_users = [t for t in tenants if t.get(\"num_users\", 0) == 0]\n    print(\n        f\"Tenants with 0 users: {len(no_users):,} ({len(no_users) / len(tenants) * 100:.2f}%)\"\n    )\n\n    # Document distribution quartiles\n    doc_counts = sorted([t.get(\"num_documents\", 0) for t in tenants])\n    if doc_counts:\n        print(\"\\nDocument count distribution:\")\n        print(f\"  Median: {doc_counts[len(doc_counts) // 2]:,}\")\n        print(f\"  75th percentile: {doc_counts[int(len(doc_counts) * 0.75)]:,}\")\n        print(f\"  90th percentile: {doc_counts[int(len(doc_counts) * 0.90)]:,}\")\n        print(f\"  95th percentile: {doc_counts[int(len(doc_counts) * 0.95)]:,}\")\n        print(f\"  99th percentile: {doc_counts[int(len(doc_counts) * 0.99)]:,}\")\n        print(f\"  Max: {doc_counts[-1]:,}\")\n\n    return gated_no_query_3_months\n\n\ndef find_recent_tenant_data() -> tuple[list[dict[str, Any]] | None, str | None]:\n    \"\"\"Find the most recent tenant data file if it's less than 7 days old.\"\"\"\n    current_dir = Path.cwd()\n    tenant_data_files = list(current_dir.glob(\"tenant_data_*.json\"))\n\n    if not tenant_data_files:\n        return None, None\n\n    # Sort by modification time, most recent first\n    tenant_data_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)\n    most_recent = tenant_data_files[0]\n\n    # Check if file is less than 7 days old\n    file_age = datetime.now().timestamp() - most_recent.stat().st_mtime\n    seven_days_in_seconds = 7 * 24 * 60 * 60\n\n    if file_age < seven_days_in_seconds:\n        file_age_days = file_age / (24 * 60 * 60)\n        print(\n            f\"\\n✓ Found recent tenant data: {most_recent.name} (age: {file_age_days:.1f} days)\"\n        )\n\n        with open(most_recent, \"r\") as f:\n            tenant_data = json.load(f)\n\n        return tenant_data, str(most_recent)\n\n    return None, None\n\n\ndef main() -> None:\n    # Parse command-line arguments\n    parser = argparse.ArgumentParser(\n        description=\"Analyze tenant data and identify gated tenants with no recent queries\"\n    )\n    parser.add_argument(\n        \"--skip-cache\",\n        action=\"store_true\",\n        help=\"Skip cached tenant data and collect fresh data from pod\",\n    )\n    args = parser.parse_args()\n\n    try:\n        # Step 0: Collect control plane data\n        control_plane_data = collect_control_plane_data()\n\n        # Step 1: Check for recent tenant data (< 7 days old) unless --skip-cache is set\n        tenant_data = None\n        cached_file = None\n\n        if not args.skip_cache:\n            tenant_data, cached_file = find_recent_tenant_data()\n\n        if tenant_data:\n            print(f\"Using cached tenant data from: {cached_file}\")\n            print(f\"Total tenants in cache: {len(tenant_data)}\")\n        else:\n            if args.skip_cache:\n                print(\"\\n⚠ Skipping cache (--skip-cache flag set)\")\n\n            # Step 2a: Find the heavy worker pod\n            pod_name = find_worker_pod()\n\n            # Step 2b: Collect tenant data\n            tenant_data = collect_tenant_data(pod_name)\n\n            # Step 2c: Save raw data to file with timestamp\n            timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n            output_file = f\"tenant_data_{timestamp}.json\"\n            with open(output_file, \"w\") as f:\n                json.dump(tenant_data, f, indent=2, default=str)\n            print(f\"\\n✓ Raw data saved to: {output_file}\")\n\n        # Step 3: Analyze the data and get gated tenants without recent queries\n        gated_no_query_3_months = analyze_tenants(tenant_data, control_plane_data)\n\n        # Step 4: Export to CSV (sorted by num_documents descending)\n        timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n        csv_file = f\"gated_tenants_no_query_3mo_{timestamp}.csv\"\n\n        # Sort by num_documents in descending order\n        sorted_tenants = sorted(\n            gated_no_query_3_months,\n            key=lambda t: t.get(\"num_documents\", 0),\n            reverse=True,\n        )\n\n        with open(csv_file, \"w\", newline=\"\", encoding=\"utf-8\") as csvfile:\n            fieldnames = [\n                \"tenant_id\",\n                \"num_documents\",\n                \"num_users\",\n                \"last_query_time\",\n                \"days_since_last_query\",\n            ]\n            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n            writer.writeheader()\n\n            now = datetime.now(timezone.utc)\n            for tenant in sorted_tenants:\n                # Calculate days since last query\n                last_query_time = tenant.get(\"last_query_time\")\n                if last_query_time:\n                    try:\n                        query_dt = datetime.fromisoformat(\n                            last_query_time.replace(\"Z\", \"+00:00\")\n                        )\n                        days_since = str((now - query_dt).days)\n                    except Exception:\n                        days_since = \"N/A\"\n                else:\n                    days_since = \"Never\"\n\n                writer.writerow(\n                    {\n                        \"tenant_id\": tenant.get(\"tenant_id\", \"\"),\n                        \"num_documents\": tenant.get(\"num_documents\", 0),\n                        \"num_users\": tenant.get(\"num_users\", 0),\n                        \"last_query_time\": last_query_time or \"Never\",\n                        \"days_since_last_query\": days_since,\n                    }\n                )\n\n        print(f\"\\n✓ CSV exported to: {csv_file}\")\n        print(\n            f\"  Total gated tenants with no query in last 3 months: {len(gated_no_query_3_months)}\"\n        )\n\n    except subprocess.CalledProcessError as e:\n        print(f\"Error running command: {e}\", file=sys.stderr)\n        if e.stderr:\n            print(f\"stderr: {e.stderr}\", file=sys.stderr)\n        sys.exit(1)\n    except Exception as e:\n        print(f\"Error: {e}\", file=sys.stderr)\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/check_no_bastion_setup.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script to check if your environment is ready for no-bastion tenant cleanup.\n\nUsage:\n    python scripts/tenant_cleanup/check_no_bastion_setup.py\n\"\"\"\n\nimport subprocess\nimport sys\n\n\ndef print_header(text: str) -> None:\n    \"\"\"Print a formatted header.\"\"\"\n    print(f\"\\n{'=' * 80}\")\n    print(f\"  {text}\")\n    print(f\"{'=' * 80}\\n\")\n\n\ndef check_kubectl_access() -> bool:\n    \"\"\"Check if kubectl is installed and can access the cluster.\"\"\"\n    print(\"Checking kubectl access...\")\n\n    try:\n        result = subprocess.run(\n            [\"kubectl\", \"version\", \"--client\", \"--short\"],\n            capture_output=True,\n            text=True,\n            timeout=5,\n        )\n\n        if result.returncode == 0:\n            print(f\"✅ kubectl is installed: {result.stdout.strip()}\")\n\n            # Try to access cluster\n            result = subprocess.run(\n                [\"kubectl\", \"get\", \"ns\"],\n                capture_output=True,\n                text=True,\n                timeout=10,\n            )\n\n            if result.returncode == 0:\n                print(\"✅ kubectl can access the cluster\")\n                return True\n            else:\n                print(\"❌ kubectl cannot access the cluster\")\n                print(f\"   Error: {result.stderr}\")\n                return False\n        else:\n            print(\"❌ kubectl is not installed or not in PATH\")\n            return False\n\n    except FileNotFoundError:\n        print(\"❌ kubectl is not installed\")\n        return False\n    except subprocess.TimeoutExpired:\n        print(\"❌ kubectl command timed out\")\n        return False\n    except Exception as e:\n        print(f\"❌ Error checking kubectl: {e}\")\n        return False\n\n\ndef check_worker_pods() -> tuple[bool, list[str]]:\n    \"\"\"Check if worker pods are running.\"\"\"\n    print(\"\\nChecking for worker pods...\")\n\n    try:\n        result = subprocess.run(\n            [\"kubectl\", \"get\", \"po\"],\n            capture_output=True,\n            text=True,\n            timeout=10,\n            check=True,\n        )\n\n        lines = result.stdout.strip().split(\"\\n\")\n        worker_pods = []\n\n        for line in lines[1:]:  # Skip header\n            if \"celery-worker-user-file-processing\" in line and \"Running\" in line:\n                pod_name = line.split()[0]\n                worker_pods.append(pod_name)\n\n        if worker_pods:\n            print(f\"✅ Found {len(worker_pods)} running worker pod(s):\")\n            for pod in worker_pods[:3]:  # Show first 3\n                print(f\"   - {pod}\")\n            if len(worker_pods) > 3:\n                print(f\"   ... and {len(worker_pods) - 3} more\")\n            return True, worker_pods\n        else:\n            print(\"❌ No running celery-worker-user-file-processing pods found\")\n            print(\"   Available pods:\")\n            for line in lines[1:6]:  # Show first 5 pods\n                print(f\"   {line}\")\n            return False, []\n\n    except subprocess.CalledProcessError as e:\n        print(f\"❌ Error getting pods: {e}\")\n        return False, []\n    except Exception as e:\n        print(f\"❌ Error checking worker pods: {e}\")\n        return False, []\n\n\ndef check_pod_exec_permission(pod_name: str) -> bool:\n    \"\"\"Check if we can exec into a pod.\"\"\"\n    print(\"\\nChecking pod exec permissions...\")\n\n    try:\n        result = subprocess.run(\n            [\"kubectl\", \"exec\", pod_name, \"--\", \"echo\", \"test\"],\n            capture_output=True,\n            text=True,\n            timeout=10,\n        )\n\n        if result.returncode == 0 and \"test\" in result.stdout:\n            print(f\"✅ Can exec into pod: {pod_name}\")\n            return True\n        else:\n            print(f\"❌ Cannot exec into pod: {pod_name}\")\n            print(f\"   Error: {result.stderr}\")\n            return False\n\n    except subprocess.TimeoutExpired:\n        print(f\"❌ Exec command timed out for pod: {pod_name}\")\n        return False\n    except Exception as e:\n        print(f\"❌ Error checking exec permission: {e}\")\n        return False\n\n\ndef check_pod_db_access(pod_name: str) -> dict:\n    \"\"\"Check if pod has database environment variables.\"\"\"\n    print(\"\\nChecking database access from pod...\")\n\n    checks = {\n        \"control_plane\": False,\n        \"data_plane\": False,\n    }\n\n    try:\n        # Check for control plane DB env vars\n        result = subprocess.run(\n            [\"kubectl\", \"exec\", pod_name, \"--\", \"env\"],\n            capture_output=True,\n            text=True,\n            timeout=10,\n        )\n\n        if result.returncode == 0:\n            env_output = result.stdout\n\n            # Check control plane access\n            if any(\n                var in env_output\n                for var in [\n                    \"POSTGRES_CONTROL_URI\",\n                    \"POSTGRES_CONTROL_HOST\",\n                ]\n            ):\n                print(\"✅ Pod has control plane database environment variables\")\n                checks[\"control_plane\"] = True\n            else:\n                print(\n                    \"⚠️  Pod may not have control plane database environment variables\"\n                )\n                print(\"   (This might be okay if they're dynamically loaded)\")\n\n            # Check data plane access\n            if any(\n                var in env_output\n                for var in [\"POSTGRES_URI\", \"POSTGRES_HOST\", \"DATABASE_URL\"]\n            ):\n                print(\"✅ Pod has data plane database environment variables\")\n                checks[\"data_plane\"] = True\n            else:\n                print(\"❌ Pod does not have data plane database environment variables\")\n\n        return checks\n\n    except Exception as e:\n        print(f\"❌ Error checking database access: {e}\")\n        return checks\n\n\ndef check_required_scripts() -> bool:\n    \"\"\"Check if the required on_pod_scripts exist.\"\"\"\n    print(\"\\nChecking for required scripts...\")\n\n    from pathlib import Path\n\n    script_dir = Path(__file__).parent\n    required_scripts = [\n        \"on_pod_scripts/understand_tenants.py\",\n        \"on_pod_scripts/execute_connector_deletion.py\",\n        \"on_pod_scripts/check_documents_deleted.py\",\n        \"on_pod_scripts/cleanup_tenant_schema.py\",\n        \"on_pod_scripts/get_tenant_index_name.py\",\n        \"on_pod_scripts/get_tenant_users.py\",\n    ]\n\n    all_exist = True\n    for script in required_scripts:\n        script_path = script_dir / script\n        if script_path.exists():\n            print(f\"✅ {script}\")\n        else:\n            print(f\"❌ {script} - NOT FOUND\")\n            all_exist = False\n\n    return all_exist\n\n\ndef main() -> None:\n    print_header(\"No-Bastion Tenant Cleanup - Setup Verification\")\n\n    all_checks_passed = True\n\n    # 1. Check kubectl access\n    if not check_kubectl_access():\n        all_checks_passed = False\n\n    # 2. Check for worker pods\n    has_pods, worker_pods = check_worker_pods()\n    if not has_pods:\n        all_checks_passed = False\n        print(\"\\n⚠️  Cannot proceed without running worker pods\")\n        print_header(\"SETUP VERIFICATION FAILED\")\n        sys.exit(1)\n\n    # Use first worker pod for remaining checks\n    test_pod = worker_pods[0]\n\n    # 3. Check exec permissions\n    if not check_pod_exec_permission(test_pod):\n        all_checks_passed = False\n\n    # 4. Check database access\n    db_checks = check_pod_db_access(test_pod)\n    if not db_checks[\"data_plane\"]:\n        all_checks_passed = False\n\n    # 5. Check required scripts\n    if not check_required_scripts():\n        all_checks_passed = False\n\n    # Summary\n    print_header(\"VERIFICATION SUMMARY\")\n\n    if all_checks_passed and db_checks[\"control_plane\"]:\n        print(\"✅ ALL CHECKS PASSED!\")\n        print(\"\\nYou're ready to run tenant cleanup without bastion access.\")\n        print(\"\\nNext steps:\")\n        print(\"1. Read QUICK_START_NO_BASTION.md for commands\")\n        print(\n            \"2. Run: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py\"\n        )\n        sys.exit(0)\n    elif all_checks_passed:\n        print(\"⚠️  MOSTLY READY (with warnings)\")\n        print(\"\\nYou can proceed, but control plane access may need verification.\")\n        print(\"Try running Step 1 and see if it works.\")\n        print(\"\\nNext steps:\")\n        print(\n            \"1. Run: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py\"\n        )\n        print(\"2. If it fails with DB errors, check pod environment variables\")\n        sys.exit(0)\n    else:\n        print(\"❌ SETUP VERIFICATION FAILED\")\n        print(\"\\nPlease fix the issues above before proceeding.\")\n        print(\"\\nCommon fixes:\")\n        print(\"- Install kubectl: https://kubernetes.io/docs/tasks/tools/\")\n        print(\"- Configure cluster access: kubectl config use-context <context>\")\n        print(\"- Check pod status: kubectl get po\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/cleanup_tenants.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTenant cleanup script that:\n1. Deletes all documents from Vespa\n2. Drops the data plane PostgreSQL schema\n3. Clean up control plane (tenants, subscription table)\n\nUsage:\n    python backend/scripts/cleanup_tenant.py <tenant_id> [--force]\n    python backend/scripts/cleanup_tenant.py --csv <csv_file_path> [--force]\n\nArguments:\n    tenant_id        The tenant ID to clean up (required if not using --csv)\n    --csv PATH       Path to CSV file containing tenant IDs to clean up\n    --force          Skip all confirmation prompts (optional)\n\nExamples:\n    python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789\n    python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789 --force\n    python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv\n    python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv --force\n\"\"\"\n\nimport csv\nimport json\nimport signal\nimport subprocess\nimport sys\nfrom datetime import datetime\nfrom pathlib import Path\n\nfrom scripts.tenant_cleanup.cleanup_utils import confirm_step\nfrom scripts.tenant_cleanup.cleanup_utils import execute_control_plane_query\nfrom scripts.tenant_cleanup.cleanup_utils import find_worker_pod\nfrom scripts.tenant_cleanup.cleanup_utils import get_tenant_status\nfrom scripts.tenant_cleanup.cleanup_utils import read_tenant_ids_from_csv\nfrom scripts.tenant_cleanup.cleanup_utils import TenantNotFoundInControlPlaneError\n\n\ndef signal_handler(signum: int, frame: object) -> None:  # noqa: ARG001\n    \"\"\"Handle termination signals by killing active subprocess.\"\"\"\n    sys.exit(1)\n\n\ndef get_tenant_index_name(pod_name: str, tenant_id: str) -> str:\n    \"\"\"Get the default index name for the given tenant by running script on pod.\"\"\"\n    print(f\"Getting default index name for tenant: {tenant_id}\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    index_name_script = script_dir / \"on_pod_scripts\" / \"get_tenant_index_name.py\"\n\n    if not index_name_script.exists():\n        raise FileNotFoundError(\n            f\"get_tenant_index_name.py not found at {index_name_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        print(\"  Copying script to pod...\")\n        subprocess.run(\n            [\n                \"kubectl\",\n                \"cp\",\n                str(index_name_script),\n                f\"{pod_name}:/tmp/get_tenant_index_name.py\",\n            ],\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        print(\"  Executing script on pod...\")\n        result = subprocess.run(\n            [\n                \"kubectl\",\n                \"exec\",\n                pod_name,\n                \"--\",\n                \"python\",\n                \"/tmp/get_tenant_index_name.py\",\n                tenant_id,\n            ],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            index_name = result_data.get(\"index_name\")\n            print(f\"✓ Found index name: {index_name}\")\n            return index_name\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            raise RuntimeError(f\"Failed to get index name: {message}\")\n\n    except subprocess.CalledProcessError as e:\n        print(\n            f\"✗ Failed to get index name for tenant {tenant_id}: {e}\", file=sys.stderr\n        )\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(\n            f\"✗ Failed to get index name for tenant {tenant_id}: {e}\", file=sys.stderr\n        )\n        raise\n\n\ndef get_tenant_users(pod_name: str, tenant_id: str) -> list[str]:\n    \"\"\"Get list of user emails from the tenant's data plane schema.\n\n    Args:\n        pod_name: The Kubernetes pod name to execute on\n        tenant_id: The tenant ID to query\n\n    Returns:\n        List of user email addresses, or empty list if query fails\n    \"\"\"\n    print(f\"Fetching user emails for tenant: {tenant_id}\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    get_users_script = script_dir / \"on_pod_scripts\" / \"get_tenant_users.py\"\n\n    if not get_users_script.exists():\n        raise FileNotFoundError(f\"get_tenant_users.py not found at {get_users_script}\")\n\n    try:\n        # Copy script to pod\n        print(\"  Copying script to pod...\")\n        subprocess.run(\n            [\n                \"kubectl\",\n                \"cp\",\n                str(get_users_script),\n                f\"{pod_name}:/tmp/get_tenant_users.py\",\n            ],\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        print(\"  Executing script on pod...\")\n        result = subprocess.run(\n            [\n                \"kubectl\",\n                \"exec\",\n                pod_name,\n                \"--\",\n                \"python\",\n                \"/tmp/get_tenant_users.py\",\n                tenant_id,\n            ],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            users = result_data.get(\"users\", [])\n            if users:\n                print(f\"✓ Found {len(users)} user(s):\")\n                for email in users:\n                    print(f\"    - {email}\")\n            else:\n                print(\"  No users found in tenant\")\n            return users\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            print(f\"⚠ Could not fetch users: {message}\")\n            return []\n\n    except subprocess.CalledProcessError as e:\n        print(f\"⚠ Failed to get users for tenant {tenant_id}: {e}\")\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\")\n        return []\n    except Exception as e:\n        print(f\"⚠ Failed to get users for tenant {tenant_id}: {e}\")\n        return []\n\n\ndef check_documents_deleted(pod_name: str, tenant_id: str) -> None:\n    \"\"\"Check if all documents and connector credential pairs have been deleted.\n\n    Raises RuntimeError if any ConnectorCredentialPairs or Documents remain.\n    \"\"\"\n    print(f\"Checking for remaining documents in tenant: {tenant_id}\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    check_script = script_dir / \"on_pod_scripts\" / \"check_documents_deleted.py\"\n\n    if not check_script.exists():\n        raise FileNotFoundError(\n            f\"check_documents_deleted.py not found at {check_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        print(\"  Copying script to pod...\")\n        subprocess.run(\n            [\n                \"kubectl\",\n                \"cp\",\n                str(check_script),\n                f\"{pod_name}:/tmp/check_documents_deleted.py\",\n            ],\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        print(\"  Executing check on pod...\")\n        result = subprocess.run(\n            [\n                \"kubectl\",\n                \"exec\",\n                pod_name,\n                \"--\",\n                \"python\",\n                \"/tmp/check_documents_deleted.py\",\n                tenant_id,\n            ],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            message = result_data.get(\"message\")\n            print(f\"✓ {message}\")\n        elif status == \"not_found\":\n            message = result_data.get(\"message\", \"Schema not found\")\n            print(f\"⚠ {message}\")\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            cc_count = result_data.get(\"connector_credential_pair_count\", 0)\n            doc_count = result_data.get(\"document_count\", 0)\n            error_details = f\"{message}\"\n            if cc_count > 0 or doc_count > 0:\n                error_details += f\"\\n  ConnectorCredentialPairs: {cc_count}\\n  Documents: {doc_count}\"\n            raise RuntimeError(error_details)\n\n    except subprocess.CalledProcessError as e:\n        print(\n            f\"✗ Failed to check documents for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(\n            f\"✗ Failed to check documents for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        raise\n\n\ndef drop_data_plane_schema(pod_name: str, tenant_id: str) -> None:\n    \"\"\"Drop the PostgreSQL schema for the given tenant by running script on pod.\"\"\"\n    print(f\"Dropping data plane schema for tenant: {tenant_id}\")\n\n    # Get the path to the cleanup script\n    script_dir = Path(__file__).parent\n    schema_cleanup_script = script_dir / \"on_pod_scripts\" / \"cleanup_tenant_schema.py\"\n\n    if not schema_cleanup_script.exists():\n        raise FileNotFoundError(\n            f\"cleanup_tenant_schema.py not found at {schema_cleanup_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        print(\"  Copying script to pod...\")\n        subprocess.run(\n            [\n                \"kubectl\",\n                \"cp\",\n                str(schema_cleanup_script),\n                f\"{pod_name}:/tmp/cleanup_tenant_schema.py\",\n            ],\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        print(\"  Executing schema cleanup on pod...\")\n        result = subprocess.run(\n            [\n                \"kubectl\",\n                \"exec\",\n                pod_name,\n                \"--\",\n                \"python\",\n                \"/tmp/cleanup_tenant_schema.py\",\n                tenant_id,\n            ],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n        message = result_data.get(\"message\")\n\n        if status == \"success\":\n            print(f\"✓ {message}\")\n        elif status == \"not_found\":\n            print(f\"⚠ {message}\")\n        else:\n            print(f\"✗ {message}\", file=sys.stderr)\n            raise RuntimeError(message)\n\n    except subprocess.CalledProcessError as e:\n        print(f\"✗ Failed to drop schema for tenant {tenant_id}: {e}\", file=sys.stderr)\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(f\"✗ Failed to drop schema for tenant {tenant_id}: {e}\", file=sys.stderr)\n        raise\n\n\ndef cleanup_control_plane(tenant_id: str, force: bool = False) -> None:\n    \"\"\"\n    Clean up control plane data (tenants table, subscription table, etc.)\n\n    Deletes from tables in this order:\n    1. tenant_notification (foreign key to tenant)\n    2. tenant_config (foreign key to tenant)\n    3. subscription (foreign key to tenant)\n    4. tenant (primary table)\n    \"\"\"\n    print(f\"Cleaning up control plane data for tenant: {tenant_id}\")\n\n    # Delete in order respecting foreign key constraints\n    delete_queries = [\n        (\n            \"tenant_notification\",\n            \"DELETE FROM tenant_notification WHERE tenant_id = '{tenant_id}';\",\n        ),\n        (\"tenant_config\", \"DELETE FROM tenant_config WHERE tenant_id = '{tenant_id}';\"),\n        (\"subscription\", \"DELETE FROM subscription WHERE tenant_id = '{tenant_id}';\"),\n        (\"tenant\", \"DELETE FROM tenant WHERE tenant_id = '{tenant_id}';\"),\n    ]\n\n    try:\n        for table_name, query in delete_queries:\n            formatted_query = query.format(tenant_id=tenant_id)\n            print(f\"  Deleting from {table_name}...\")\n\n            if not confirm_step(f\"Delete from {table_name}?\", force):\n                print(f\"  Skipping deletion from {table_name}\")\n                continue\n\n            result = execute_control_plane_query(formatted_query)\n\n            if result.stdout:\n                # Extract row count from output (e.g., \"DELETE 5\")\n                print(f\"    {result.stdout.strip()}\")\n\n        print(f\"✓ Successfully cleaned up control plane data for tenant: {tenant_id}\")\n\n    except subprocess.CalledProcessError as e:\n        print(\n            f\"✗ Failed to clean up control plane for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n\n\ndef cleanup_tenant(tenant_id: str, pod_name: str, force: bool = False) -> bool:\n    \"\"\"\n    Main cleanup function that orchestrates all cleanup steps.\n\n    Args:\n        tenant_id: The tenant ID to clean up\n        pod_name: The Kubernetes pod name to execute operations on\n        force: If True, skip all confirmation prompts\n\n    Returns:\n        True if cleanup was performed, False if skipped\n    \"\"\"\n    print(f\"Starting cleanup for tenant: {tenant_id}\")\n\n    # Track if tenant was not found in control plane (for force mode)\n    tenant_not_found_in_control_plane = False\n\n    # Check tenant status first\n    print(f\"\\n{'=' * 80}\")\n    try:\n        tenant_status = get_tenant_status(tenant_id)\n\n        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode\n        if tenant_status and tenant_status != \"GATED_ACCESS\":\n            print(\n                f\"\\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!\"\n            )\n            print(\n                \"This tenant may be active and should not be deleted without careful review.\"\n            )\n            print(f\"{'=' * 80}\\n\")\n\n            if force:\n                print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n                return False\n\n            # Always ask for confirmation if not gated, even in force mode\n            response = input(\n                \"Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: \"\n            )\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - tenant is not GATED_ACCESS\")\n                return False\n        elif tenant_status == \"GATED_ACCESS\":\n            print(\"✓ Tenant status is GATED_ACCESS - safe to proceed with cleanup\")\n        elif tenant_status is None:\n            print(\"⚠️  WARNING: Could not determine tenant status!\")\n\n            if force:\n                print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n                return False\n\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - could not verify tenant status\")\n                return False\n    except TenantNotFoundInControlPlaneError as e:\n        # Tenant/table not found in control plane\n        error_str = str(e)\n        print(f\"⚠️  WARNING: Tenant not found in control plane: {error_str}\")\n        tenant_not_found_in_control_plane = True\n\n        if force:\n            print(\n                \"[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only\"\n            )\n        else:\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - tenant not found in control plane\")\n                return False\n    except Exception as e:\n        # Other errors (not \"not found\")\n        error_str = str(e)\n        print(f\"⚠️  WARNING: Failed to check tenant status: {error_str}\")\n\n        if force:\n            print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n            return False\n\n        response = input(\"Continue anyway? Type 'yes' to confirm: \")\n        if response.lower() != \"yes\":\n            print(\"Cleanup aborted - could not verify tenant status\")\n            return False\n    print(f\"{'=' * 80}\\n\")\n\n    # Fetch tenant users for informational purposes (non-blocking)\n    # Skip in force mode as it's only informational\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        try:\n            get_tenant_users(pod_name, tenant_id)\n        except Exception as e:\n            print(f\"⚠ Could not fetch tenant users: {e}\")\n        print(f\"{'=' * 80}\\n\")\n\n    # Step 1: Make sure all documents are deleted\n    print(f\"\\n{'=' * 80}\")\n    print(\"Step 1/3: Checking for remaining ConnectorCredentialPairs and Documents\")\n    print(f\"{'=' * 80}\")\n    try:\n        check_documents_deleted(pod_name, tenant_id)\n    except Exception as e:\n        print(f\"✗ Document check failed: {e}\", file=sys.stderr)\n        print(\n            \"\\nPlease ensure all ConnectorCredentialPairs and Documents are deleted before running cleanup.\"\n        )\n        print(\n            \"You may need to mark connectors for deletion and wait for cleanup to complete.\"\n        )\n        return False\n    print(f\"{'=' * 80}\\n\")\n\n    # Step 2: Drop data plane schema\n    if confirm_step(\n        f\"Step 2/3: Drop data plane schema '{tenant_id}' (CASCADE - will delete all tables, functions, etc.)\",\n        force,\n    ):\n        try:\n            drop_data_plane_schema(pod_name, tenant_id)\n        except Exception as e:\n            print(f\"✗ Failed at schema cleanup step: {e}\", file=sys.stderr)\n            if not force:\n                response = input(\"Continue with control plane cleanup? (y/n): \")\n                if response.lower() != \"y\":\n                    print(\"Cleanup aborted by user\")\n                    return False\n            else:\n                print(\"[FORCE MODE] Continuing despite schema cleanup failure\")\n    else:\n        print(\"Step 2 skipped by user\")\n\n    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)\n    if tenant_not_found_in_control_plane:\n        print(f\"\\n{'=' * 80}\")\n        print(\n            \"Step 3/3: Skipping control plane cleanup (tenant not found in control plane)\"\n        )\n        print(f\"{'=' * 80}\\n\")\n    elif confirm_step(\n        \"Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)\",\n        force,\n    ):\n        try:\n            cleanup_control_plane(tenant_id, force)\n        except Exception as e:\n            print(f\"✗ Failed at control plane cleanup step: {e}\", file=sys.stderr)\n            if not force:\n                print(\"Control plane cleanup failed\")\n            else:\n                print(\"[FORCE MODE] Control plane cleanup failed but continuing\")\n    else:\n        print(\"Step 3 skipped by user\")\n        return False\n\n    print(f\"\\n{'=' * 80}\")\n    print(f\"✓ Cleanup completed for tenant: {tenant_id}\")\n    print(f\"{'=' * 80}\")\n    return True\n\n\ndef main() -> None:\n    # Register signal handlers for graceful shutdown\n    signal.signal(signal.SIGINT, signal_handler)\n    signal.signal(signal.SIGTERM, signal_handler)\n\n    if len(sys.argv) < 2:\n        print(\"Usage: python backend/scripts/cleanup_tenant.py <tenant_id> [--force]\")\n        print(\n            \"       python backend/scripts/cleanup_tenant.py --csv <csv_file_path> [--force]\"\n        )\n        print(\"\\nArguments:\")\n        print(\n            \"  tenant_id        The tenant ID to clean up (required if not using --csv)\"\n        )\n        print(\"  --csv PATH       Path to CSV file containing tenant IDs to clean up\")\n        print(\"  --force          Skip all confirmation prompts (optional)\")\n        print(\"\\nExamples:\")\n        print(\"  python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789\")\n        print(\n            \"  python backend/scripts/cleanup_tenant.py tenant_abc123-def456-789 --force\"\n        )\n        print(\n            \"  python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv\"\n        )\n        print(\n            \"  python backend/scripts/cleanup_tenant.py --csv gated_tenants_no_query_3mo.csv --force\"\n        )\n        sys.exit(1)\n\n    # Parse arguments\n    force = \"--force\" in sys.argv\n    tenant_ids = []\n\n    # Check for CSV mode\n    if \"--csv\" in sys.argv:\n        try:\n            csv_index = sys.argv.index(\"--csv\")\n            if csv_index + 1 >= len(sys.argv):\n                print(\"Error: --csv flag requires a file path\", file=sys.stderr)\n                sys.exit(1)\n\n            csv_path = sys.argv[csv_index + 1]\n            tenant_ids = read_tenant_ids_from_csv(csv_path)\n\n            if not tenant_ids:\n                print(\"Error: No tenant IDs found in CSV file\", file=sys.stderr)\n                sys.exit(1)\n\n            print(f\"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}\")\n\n        except Exception as e:\n            print(f\"Error reading CSV file: {e}\", file=sys.stderr)\n            sys.exit(1)\n    else:\n        # Single tenant mode\n        tenant_ids = [sys.argv[1]]\n\n    # Initial confirmation (unless --force is used)\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        print(\"TENANT CLEANUP - CONFIRMATION REQUIRED\")\n        print(f\"{'=' * 80}\")\n        if len(tenant_ids) == 1:\n            print(f\"Tenant ID: {tenant_ids[0]}\")\n        else:\n            print(f\"Number of tenants: {len(tenant_ids)}\")\n            print(f\"Tenant IDs: {', '.join(tenant_ids[:5])}\")\n            if len(tenant_ids) > 5:\n                print(f\"            ... and {len(tenant_ids) - 5} more\")\n\n        print(\"Index Name: Will be fetched automatically when deleting Vespa documents\")\n        print(\n            f\"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}\"\n        )\n        print(\"\\nThis will:\")\n        print(\"  1. Delete ALL Vespa documents for this tenant\")\n        print(\"  2. Drop the data plane PostgreSQL schema (CASCADE)\")\n        print(\"  3. Clean up control plane data:\")\n        print(\"     - Delete from tenant_notification table\")\n        print(\"     - Delete from tenant_config table\")\n        print(\"     - Delete from subscription table\")\n        print(\"     - Delete from tenant table\")\n        print(f\"\\n{'=' * 80}\")\n        print(\"WARNING: This operation is IRREVERSIBLE!\")\n        print(f\"{'=' * 80}\\n\")\n\n        response = input(\"Are you sure you want to proceed? Type 'yes' to confirm: \")\n\n        if response.lower() != \"yes\":\n            print(\"Cleanup aborted by user\")\n            sys.exit(0)\n    else:\n        if len(tenant_ids) == 1:\n            print(\n                f\"⚠ FORCE MODE: Running cleanup for {tenant_ids[0]} without confirmations\"\n            )\n        else:\n            print(\n                f\"⚠ FORCE MODE: Running cleanup for {len(tenant_ids)} tenants without confirmations\"\n            )\n\n    # Find heavy worker pod once for all tenants\n    try:\n        pod_name = find_worker_pod()\n        print(f\"✓ Found worker pod: {pod_name}\\n\")\n    except Exception as e:\n        print(f\"✗ Failed to find heavy worker pod: {e}\", file=sys.stderr)\n        print(\"Cannot proceed with cleanup\")\n        sys.exit(1)\n\n    # Run cleanup for each tenant\n    failed_tenants = []\n    successful_tenants = []\n    skipped_tenants = []\n\n    # Open CSV file for writing successful cleanups in real-time\n    csv_output_path = \"cleaned_tenants.csv\"\n    with open(csv_output_path, \"w\", newline=\"\") as csv_file:\n        csv_writer = csv.writer(csv_file)\n        csv_writer.writerow([\"tenant_id\", \"cleaned_at\"])\n        csv_file.flush()  # Ensure header is written immediately\n\n        print(f\"Writing successful cleanups to: {csv_output_path}\\n\")\n\n        for idx, tenant_id in enumerate(tenant_ids, 1):\n            if len(tenant_ids) > 1:\n                print(f\"\\n{'=' * 80}\")\n                print(f\"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}\")\n                print(f\"{'=' * 80}\")\n\n            try:\n                was_cleaned = cleanup_tenant(tenant_id, pod_name, force)\n\n                if was_cleaned:\n                    # Only record if actually cleaned up (not skipped)\n                    successful_tenants.append(tenant_id)\n\n                    # Write to CSV immediately after successful cleanup\n                    timestamp = datetime.utcnow().isoformat()\n                    csv_writer.writerow([tenant_id, timestamp])\n                    csv_file.flush()  # Ensure real-time write\n                    print(f\"✓ Recorded cleanup in {csv_output_path}\")\n                else:\n                    skipped_tenants.append(tenant_id)\n                    print(f\"⚠ Tenant {tenant_id} was skipped (not recorded in CSV)\")\n\n            except Exception as e:\n                print(f\"✗ Cleanup failed for tenant {tenant_id}: {e}\", file=sys.stderr)\n                failed_tenants.append((tenant_id, str(e)))\n\n                # If not in force mode and there are more tenants, ask if we should continue\n                if not force and idx < len(tenant_ids):\n                    response = input(\n                        f\"\\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): \"\n                    )\n                    if response.lower() != \"y\":\n                        print(\"Cleanup aborted by user\")\n                        break\n\n    # Print summary\n    if len(tenant_ids) == 1:\n        if successful_tenants:\n            print(f\"\\n✓ Successfully cleaned tenant written to: {csv_output_path}\")\n        elif skipped_tenants:\n            print(\"\\n⚠ Tenant was skipped\")\n    elif len(tenant_ids) > 1:\n        print(f\"\\n{'=' * 80}\")\n        print(\"CLEANUP SUMMARY\")\n        print(f\"{'=' * 80}\")\n        print(f\"Total tenants: {len(tenant_ids)}\")\n        print(f\"Successful: {len(successful_tenants)}\")\n        print(f\"Skipped: {len(skipped_tenants)}\")\n        print(f\"Failed: {len(failed_tenants)}\")\n        print(f\"\\nSuccessfully cleaned tenants written to: {csv_output_path}\")\n\n        if skipped_tenants:\n            print(f\"\\nSkipped tenants ({len(skipped_tenants)}):\")\n            for tenant_id in skipped_tenants:\n                print(f\"  - {tenant_id}\")\n\n        if failed_tenants:\n            print(f\"\\nFailed tenants ({len(failed_tenants)}):\")\n            for tenant_id, error in failed_tenants:\n                print(f\"  - {tenant_id}: {error}\")\n\n        print(f\"{'=' * 80}\")\n\n        if failed_tenants:\n            sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/cleanup_utils.py",
    "content": "import csv\nimport os\nimport random\nimport subprocess\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\n\n\nclass TenantNotFoundInControlPlaneError(Exception):\n    \"\"\"Exception raised when tenant/table is not found in control plane.\"\"\"\n\n\n@dataclass\nclass ControlPlaneConfig:\n    \"\"\"Configuration for connecting to the control plane database.\"\"\"\n\n    db_url: str\n    bastion_host: str\n    pem_file_location: str\n\n\ndef find_worker_pod() -> str:\n    \"\"\"Find a user file processing worker pod using kubectl.\"\"\"\n    print(\"Finding user file processing worker pod...\")\n\n    result = subprocess.run(\n        [\"kubectl\", \"get\", \"po\"], capture_output=True, text=True, check=True\n    )\n\n    # Parse output and find user file processing worker pod\n    lines = result.stdout.strip().split(\"\\n\")\n    lines = lines[1:]  # Skip header\n    random.shuffle(lines)\n    for line in lines:\n        if \"celery-worker-user-file-processing\" in line and \"Running\" in line:\n            pod_name = line.split()[0]\n            print(f\"Found pod: {pod_name}\")\n            return pod_name\n\n    raise RuntimeError(\"No running user file processing worker pod found\")\n\n\ndef confirm_step(message: str, force: bool = False) -> bool:\n    \"\"\"Ask for confirmation before executing a step.\n\n    Args:\n        message: The confirmation message to display\n        force: If True, skip confirmation and return True\n\n    Returns:\n        True if user confirms or force is True, False otherwise\n    \"\"\"\n    if force:\n        print(f\"[FORCE MODE] Skipping confirmation: {message}\")\n        return True\n\n    print(f\"\\n{message}\")\n    response = input(\"Proceed? (y/n): \")\n    return response.lower() == \"y\"\n\n\ndef get_control_plane_config() -> ControlPlaneConfig:\n    \"\"\"Get control plane database configuration from environment variables.\n\n    Returns:\n        ControlPlaneConfig with db_url, bastion_host, and pem_file_location\n\n    Raises:\n        ValueError: If any required environment variable is not set\n    \"\"\"\n    rds_host = os.environ.get(\"CONTROL_PLANE_RDS_HOST\")\n    if not rds_host:\n        raise ValueError(\"CONTROL_PLANE_RDS_HOST is not set\")\n\n    rds_password = os.environ.get(\"CONTROL_PLANE_RDS_PASSWORD\")\n    if not rds_password:\n        raise ValueError(\"CONTROL_PLANE_RDS_PASSWORD is not set\")\n\n    bastion_host = os.environ.get(\"BASTION_HOST\")\n    if not bastion_host:\n        raise ValueError(\"BASTION_HOST is not set\")\n\n    pem_file_location = os.environ.get(\"PEM_FILE_LOCATION\")\n    if not pem_file_location:\n        raise ValueError(\"PEM_FILE_LOCATION is not set\")\n\n    db_url = f\"postgresql://postgres:{rds_password}@{rds_host}:5432/control\"\n\n    return ControlPlaneConfig(\n        db_url=db_url,\n        bastion_host=bastion_host,\n        pem_file_location=pem_file_location,\n    )\n\n\ndef execute_control_plane_query(\n    query: str, tuple_only: bool = False\n) -> subprocess.CompletedProcess:\n    \"\"\"Execute a SQL query against the control plane database via SSH.\n\n    Args:\n        query: The SQL query to execute\n        tuple_only: If True, use psql's tuple-only mode (-t flag) for cleaner output\n\n    Returns:\n        subprocess.CompletedProcess with the result\n\n    Raises:\n        subprocess.CalledProcessError: If the command fails\n    \"\"\"\n    config = get_control_plane_config()\n    db_url = config.db_url\n    bastion_host = config.bastion_host\n    pem_file_location = config.pem_file_location\n\n    # Build psql flags\n    psql_flags = \"-t\" if tuple_only else \"\"\n\n    # Build the SSH command with proper escaping\n    full_cmd = f'ssh -i {pem_file_location} ec2-user@{bastion_host} \"psql {db_url} {psql_flags} -c \\\\\"{query}\\\\\"\"'\n\n    result = subprocess.run(\n        full_cmd,\n        shell=True,\n        check=True,\n        capture_output=True,\n        text=True,\n    )\n\n    return result\n\n\ndef get_tenant_status(tenant_id: str) -> str | None:\n    \"\"\"\n    Get tenant status from control plane database.\n\n    Returns:\n        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found\n\n    Raises:\n        TenantNotFoundInControlPlaneError: If the tenant table/relation does not exist\n    \"\"\"\n    print(f\"Fetching tenant status for tenant: {tenant_id}\")\n\n    query = f\"SELECT application_status FROM tenant WHERE tenant_id = '{tenant_id}';\"\n\n    try:\n        result = execute_control_plane_query(query, tuple_only=True)\n\n        # Parse the output - psql returns the value with whitespace\n        status = result.stdout.strip()\n\n        if status:\n            print(f\"✓ Tenant status: {status}\")\n            return status\n        else:\n            print(\"⚠ Tenant not found in control plane\")\n            raise TenantNotFoundInControlPlaneError(\n                f\"Tenant {tenant_id} not found in control plane database\"\n            )\n    except TenantNotFoundInControlPlaneError:\n        # Re-raise without wrapping\n        raise\n    except subprocess.CalledProcessError as e:\n        error_msg = e.stderr if e.stderr else str(e)\n        print(\n            f\"✗ Failed to get tenant status for {tenant_id}: {error_msg}\",\n            file=sys.stderr,\n        )\n        return None\n\n\ndef read_tenant_ids_from_csv(csv_path: str) -> list[str]:\n    \"\"\"Read tenant IDs from CSV file.\n\n    Args:\n        csv_path: Path to CSV file\n\n    Returns:\n        List of tenant IDs\n    \"\"\"\n    if not Path(csv_path).exists():\n        raise FileNotFoundError(f\"CSV file not found: {csv_path}\")\n\n    tenant_ids = []\n    with open(csv_path, \"r\", newline=\"\", encoding=\"utf-8\") as csvfile:\n        reader = csv.DictReader(csvfile)\n\n        # Check if tenant_id column exists\n        if not reader.fieldnames or \"tenant_id\" not in reader.fieldnames:\n            raise ValueError(\n                f\"CSV file must have a 'tenant_id' column. Found columns: {reader.fieldnames}\"\n            )\n\n        for row in reader:\n            tenant_id = row.get(\"tenant_id\", \"\").strip()\n            if tenant_id:\n                tenant_ids.append(tenant_id)\n\n    return tenant_ids\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nMark connectors for deletion script that:\n1. Finds all connectors for the specified tenant(s)\n2. Cancels any scheduled indexing attempts\n3. Marks each connector credential pair as DELETING\n4. Triggers the cleanup task\n\nUsage:\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py <tenant_id> [--force] [--concurrency N]\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv <csv_file_path> [--force] [--concurrency N]\n\nArguments:\n    tenant_id        The tenant ID to process (required if not using --csv)\n    --csv PATH       Path to CSV file containing tenant IDs to process\n    --force          Skip all confirmation prompts (optional)\n    --concurrency N  Process N tenants concurrently (default: 1)\n\nExamples:\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789 --force\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv --force\n    python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py \\\n        --csv gated_tenants_no_query_3mo.csv --force --concurrency 16\n\"\"\"\n\nimport subprocess\nimport sys\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom pathlib import Path\nfrom threading import Lock\nfrom typing import Any\n\nfrom scripts.tenant_cleanup.cleanup_utils import confirm_step\nfrom scripts.tenant_cleanup.cleanup_utils import find_worker_pod\nfrom scripts.tenant_cleanup.cleanup_utils import get_tenant_status\nfrom scripts.tenant_cleanup.cleanup_utils import read_tenant_ids_from_csv\n\n# Global lock for thread-safe printing\n_print_lock: Lock = Lock()\n\n\ndef safe_print(*args: Any, **kwargs: Any) -> None:\n    \"\"\"Thread-safe print function.\"\"\"\n    with _print_lock:\n        print(*args, **kwargs)\n\n\ndef run_connector_deletion(pod_name: str, tenant_id: str) -> None:\n    \"\"\"Mark all connector credential pairs for deletion.\n\n    Args:\n        pod_name: The Kubernetes pod name to execute on\n        tenant_id: The tenant ID\n    \"\"\"\n    safe_print(\"  Marking all connector credential pairs for deletion...\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    mark_deletion_script = (\n        script_dir / \"on_pod_scripts\" / \"execute_connector_deletion.py\"\n    )\n\n    if not mark_deletion_script.exists():\n        raise FileNotFoundError(\n            f\"execute_connector_deletion.py not found at {mark_deletion_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        subprocess.run(\n            [\n                \"kubectl\",\n                \"cp\",\n                str(mark_deletion_script),\n                f\"{pod_name}:/tmp/execute_connector_deletion.py\",\n            ],\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        result = subprocess.run(\n            [\n                \"kubectl\",\n                \"exec\",\n                pod_name,\n                \"--\",\n                \"python\",\n                \"/tmp/execute_connector_deletion.py\",\n                tenant_id,\n                \"--all\",\n            ],\n        )\n\n        if result.returncode != 0:\n            raise RuntimeError(result.stderr)\n\n    except subprocess.CalledProcessError as e:\n        safe_print(\n            f\"  ✗ Failed to mark all connector credential pairs for deletion: {e}\",\n            file=sys.stderr,\n        )\n        if e.stderr:\n            safe_print(f\"    Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        safe_print(\n            f\"  ✗ Failed to mark all connector credential pairs for deletion: {e}\",\n            file=sys.stderr,\n        )\n        raise\n\n\ndef mark_tenant_connectors_for_deletion(\n    tenant_id: str, pod_name: str, force: bool = False\n) -> None:\n    \"\"\"\n    Main function to mark all connectors for a tenant for deletion.\n\n    Args:\n        tenant_id: The tenant ID to process\n        pod_name: The Kubernetes pod name to execute on\n        force: If True, skip all confirmation prompts\n    \"\"\"\n    safe_print(f\"Processing connectors for tenant: {tenant_id}\")\n\n    # Check tenant status first\n    safe_print(f\"\\n{'=' * 80}\")\n    try:\n        tenant_status = get_tenant_status(tenant_id)\n\n        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode\n        if tenant_status and tenant_status != \"GATED_ACCESS\":\n            safe_print(\n                f\"\\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!\"\n            )\n            safe_print(\n                \"This tenant may be active and should not have connectors deleted without careful review.\"\n            )\n            safe_print(f\"{'=' * 80}\\n\")\n\n            # Always ask for confirmation if not gated, even in force mode\n            # Note: In parallel mode with force, this will still block\n            if not force:\n                response = input(\n                    \"Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: \"\n                )\n                if response.lower() != \"yes\":\n                    safe_print(\"Operation aborted - tenant is not GATED_ACCESS\")\n                    raise RuntimeError(f\"Tenant {tenant_id} is not GATED_ACCESS\")\n            else:\n                raise RuntimeError(f\"Tenant {tenant_id} is not GATED_ACCESS\")\n        elif tenant_status == \"GATED_ACCESS\":\n            safe_print(\"✓ Tenant status is GATED_ACCESS - safe to proceed\")\n        elif tenant_status is None:\n            safe_print(\"⚠️  WARNING: Could not determine tenant status!\")\n            if not force:\n                response = input(\"Continue anyway? Type 'yes' to confirm: \")\n                if response.lower() != \"yes\":\n                    safe_print(\"Operation aborted - could not verify tenant status\")\n                    raise RuntimeError(\n                        f\"Could not verify tenant status for {tenant_id}\"\n                    )\n            else:\n                raise RuntimeError(f\"Could not verify tenant status for {tenant_id}\")\n    except Exception as e:\n        safe_print(f\"⚠️  WARNING: Failed to check tenant status: {e}\")\n        if not force:\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                safe_print(\"Operation aborted - could not verify tenant status\")\n                raise\n        else:\n            raise RuntimeError(f\"Failed to check tenant status for {tenant_id}\")\n    safe_print(f\"{'=' * 80}\\n\")\n\n    # Confirm before proceeding (only in non-force mode)\n    if not confirm_step(\n        f\"Mark all connector credential pairs for deletion for tenant {tenant_id}?\",\n        force,\n    ):\n        safe_print(\"Operation cancelled by user\")\n        raise ValueError(\"Operation cancelled by user\")\n\n    run_connector_deletion(pod_name, tenant_id)\n\n    # Print summary\n    safe_print(\n        f\"✓ Marked all connector credential pairs for deletion for tenant {tenant_id}\"\n    )\n\n\ndef main() -> None:\n    if len(sys.argv) < 2:\n        print(\n            \"Usage: python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py <tenant_id> [--force] [--concurrency N]\"\n        )\n        print(\n            \"       python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv <csv_file_path> [--force]\"\n            \" [--concurrency N]\"\n        )\n        print(\"\\nArguments:\")\n        print(\n            \"  tenant_id        The tenant ID to process (required if not using --csv)\"\n        )\n        print(\"  --csv PATH       Path to CSV file containing tenant IDs to process\")\n        print(\"  --force          Skip all confirmation prompts (optional)\")\n        print(\"  --concurrency N  Process N tenants concurrently (default: 1)\")\n        print(\"\\nExamples:\")\n        print(\n            \"  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789\"\n        )\n        print(\n            \"  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py tenant_abc123-def456-789 --force\"\n        )\n        print(\n            \"  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv\"\n        )\n        print(\n            \"  python backend/scripts/tenant_cleanup/mark_connectors_for_deletion.py --csv gated_tenants_no_query_3mo.csv \"\n            \"--force --concurrency 16\"\n        )\n        sys.exit(1)\n\n    # Parse arguments\n    force = \"--force\" in sys.argv\n    tenant_ids: list[str] = []\n\n    # Parse concurrency\n    concurrency: int = 1\n    if \"--concurrency\" in sys.argv:\n        try:\n            concurrency_index = sys.argv.index(\"--concurrency\")\n            if concurrency_index + 1 >= len(sys.argv):\n                print(\"Error: --concurrency flag requires a number\", file=sys.stderr)\n                sys.exit(1)\n            concurrency = int(sys.argv[concurrency_index + 1])\n            if concurrency < 1:\n                print(\"Error: concurrency must be at least 1\", file=sys.stderr)\n                sys.exit(1)\n        except ValueError:\n            print(\"Error: --concurrency value must be an integer\", file=sys.stderr)\n            sys.exit(1)\n\n    # Validate: concurrency > 1 requires --force\n    if concurrency > 1 and not force:\n        print(\n            \"Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    # Check for CSV mode\n    if \"--csv\" in sys.argv:\n        try:\n            csv_index: int = sys.argv.index(\"--csv\")\n            if csv_index + 1 >= len(sys.argv):\n                print(\"Error: --csv flag requires a file path\", file=sys.stderr)\n                sys.exit(1)\n\n            csv_path: str = sys.argv[csv_index + 1]\n            tenant_ids = read_tenant_ids_from_csv(csv_path)\n\n            if not tenant_ids:\n                print(\"Error: No tenant IDs found in CSV file\", file=sys.stderr)\n                sys.exit(1)\n\n            print(f\"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}\")\n\n        except Exception as e:\n            print(f\"Error reading CSV file: {e}\", file=sys.stderr)\n            sys.exit(1)\n    else:\n        # Single tenant mode\n        tenant_ids = [sys.argv[1]]\n\n    # Find heavy worker pod once before processing\n    try:\n        print(\"Finding worker pod...\")\n        pod_name: str = find_worker_pod()\n        print(f\"✓ Using worker pod: {pod_name}\")\n    except Exception as e:\n        print(f\"✗ Failed to find heavy worker pod: {e}\", file=sys.stderr)\n        print(\"Cannot proceed with marking connectors for deletion\")\n        sys.exit(1)\n\n    # Initial confirmation (unless --force is used)\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        print(\"MARK CONNECTORS FOR DELETION - CONFIRMATION REQUIRED\")\n        print(f\"{'=' * 80}\")\n        if len(tenant_ids) == 1:\n            print(f\"Tenant ID: {tenant_ids[0]}\")\n        else:\n            print(f\"Number of tenants: {len(tenant_ids)}\")\n            print(f\"Tenant IDs: {', '.join(tenant_ids[:5])}\")\n            if len(tenant_ids) > 5:\n                print(f\"            ... and {len(tenant_ids) - 5} more\")\n\n        print(\n            f\"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}\"\n        )\n        print(f\"Concurrency: {concurrency} tenant(s) at a time\")\n        print(\"\\nThis will:\")\n        print(\"  1. Fetch all connector credential pairs for each tenant\")\n        print(\"  2. Cancel any scheduled indexing attempts for each connector\")\n        print(\"  3. Mark each connector credential pair status as DELETING\")\n        print(\"  4. Trigger the connector deletion task\")\n        print(f\"\\n{'=' * 80}\")\n        print(\"WARNING: This will mark connectors for deletion!\")\n        print(\"The actual deletion will be performed by the background celery worker.\")\n        print(f\"{'=' * 80}\\n\")\n\n        response = input(\"Are you sure you want to proceed? Type 'yes' to confirm: \")\n\n        if response.lower() != \"yes\":\n            print(\"Operation aborted by user\")\n            sys.exit(0)\n    else:\n        if len(tenant_ids) == 1:\n            print(\n                f\"⚠ FORCE MODE: Marking connectors for deletion for {tenant_ids[0]} without confirmations\"\n            )\n        else:\n            print(\n                f\"⚠ FORCE MODE: Marking connectors for deletion for {len(tenant_ids)} tenants \"\n                f\"(concurrency: {concurrency}) without confirmations\"\n            )\n\n    # Process tenants (in parallel if concurrency > 1)\n    failed_tenants: list[tuple[str, str]] = []\n    successful_tenants: list[str] = []\n\n    if concurrency == 1:\n        # Sequential processing\n        for idx, tenant_id in enumerate(tenant_ids, 1):\n            if len(tenant_ids) > 1:\n                print(f\"\\n{'=' * 80}\")\n                print(f\"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}\")\n                print(f\"{'=' * 80}\")\n\n            try:\n                mark_tenant_connectors_for_deletion(tenant_id, pod_name, force)\n                successful_tenants.append(tenant_id)\n            except Exception as e:\n                print(\n                    f\"✗ Failed to process tenant {tenant_id}: {e}\",\n                    file=sys.stderr,\n                )\n                failed_tenants.append((tenant_id, str(e)))\n\n                # If not in force mode and there are more tenants, ask if we should continue\n                if not force and idx < len(tenant_ids):\n                    response = input(\n                        f\"\\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): \"\n                    )\n                    if response.lower() != \"y\":\n                        print(\"Operation aborted by user\")\n                        break\n    else:\n        # Parallel processing\n        print(\n            f\"\\nProcessing {len(tenant_ids)} tenant(s) with concurrency={concurrency}\"\n        )\n\n        def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:\n            \"\"\"Process a single tenant. Returns (tenant_id, success, error_message).\"\"\"\n            try:\n                mark_tenant_connectors_for_deletion(tenant_id, pod_name, force)\n                return (tenant_id, True, None)\n            except Exception as e:\n                return (tenant_id, False, str(e))\n\n        with ThreadPoolExecutor(max_workers=concurrency) as executor:\n            # Submit all tasks\n            future_to_tenant = {\n                executor.submit(process_tenant, tenant_id): tenant_id\n                for tenant_id in tenant_ids\n            }\n\n            # Process results as they complete\n            completed: int = 0\n            for future in as_completed(future_to_tenant):\n                completed += 1\n                tenant_id, success, error = future.result()\n\n                if success:\n                    successful_tenants.append(tenant_id)\n                    safe_print(\n                        f\"[{completed}/{len(tenant_ids)}] ✓ Successfully processed {tenant_id}\"\n                    )\n                else:\n                    failed_tenants.append((tenant_id, error or \"Unknown error\"))\n                    safe_print(\n                        f\"[{completed}/{len(tenant_ids)}] ✗ Failed to process {tenant_id}: {error}\",\n                        file=sys.stderr,\n                    )\n\n    # Print summary if multiple tenants\n    if len(tenant_ids) > 1:\n        print(f\"\\n{'=' * 80}\")\n        print(\"OPERATION SUMMARY\")\n        print(f\"{'=' * 80}\")\n        print(f\"Total tenants: {len(tenant_ids)}\")\n        print(f\"Successful: {len(successful_tenants)}\")\n        print(f\"Failed: {len(failed_tenants)}\")\n\n        if failed_tenants:\n            print(\"\\nFailed tenants:\")\n            for tenant_id, error in failed_tenants:\n                print(f\"  - {tenant_id}: {error}\")\n\n        print(f\"{'=' * 80}\")\n\n        if failed_tenants:\n            sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/no_bastion_analyze_tenants.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTenant analysis script that works WITHOUT bastion access.\nControl plane and data plane are in SEPARATE clusters.\n\nUsage:\n    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_analyze_tenants.py \\\n        [--skip-cache] \\\n        [--data-plane-context <context>] \\\n        [--control-plane-context <context>]\n\"\"\"\n\nimport argparse\nimport csv\nimport json\nimport subprocess\nimport sys\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\n\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod\n\n\ndef collect_tenant_data(\n    pod_name: str, context: str | None = None\n) -> list[dict[str, Any]]:\n    \"\"\"Run the understand_tenants script on the data plane pod.\"\"\"\n    print(f\"\\nCollecting tenant data from data plane pod {pod_name}...\")\n\n    # Get the path to the understand_tenants script\n    script_dir = Path(__file__).parent\n    understand_tenants_script = script_dir / \"on_pod_scripts\" / \"understand_tenants.py\"\n\n    if not understand_tenants_script.exists():\n        raise FileNotFoundError(\n            f\"understand_tenants.py not found at {understand_tenants_script}\"\n        )\n\n    # Copy script to pod\n    print(\"Copying script to pod...\")\n    cmd_cp = [\n        \"kubectl\",\n        \"cp\",\n        str(understand_tenants_script),\n        f\"{pod_name}:/tmp/understand_tenants.py\",\n    ]\n    if context:\n        cmd_cp.extend([\"--context\", context])\n\n    subprocess.run(cmd_cp, check=True, capture_output=True)\n\n    # Execute script on pod\n    print(\"Executing script on pod (this may take a while)...\")\n    cmd_exec = [\"kubectl\", \"exec\", pod_name]\n    if context:\n        cmd_exec.extend([\"--context\", context])\n    cmd_exec.extend([\"--\", \"python\", \"/tmp/understand_tenants.py\"])\n\n    result = subprocess.run(cmd_exec, capture_output=True, text=True, check=True)\n\n    # Show progress messages from stderr\n    if result.stderr:\n        print(result.stderr, file=sys.stderr)\n\n    # Parse JSON from stdout\n    try:\n        tenant_data = json.loads(result.stdout)\n        print(f\"Successfully collected data for {len(tenant_data)} tenants\")\n        return tenant_data\n    except json.JSONDecodeError as e:\n        print(f\"Failed to parse JSON output: {e}\", file=sys.stderr)\n        print(f\"stdout: {result.stdout[:500]}\", file=sys.stderr)\n        raise\n\n\ndef collect_control_plane_data_from_pod(\n    pod_name: str, context: str | None = None\n) -> list[dict[str, Any]]:\n    \"\"\"Collect control plane data by running a query on a control plane pod.\"\"\"\n    print(f\"\\nCollecting control plane data from pod {pod_name}...\")\n\n    # Create a script to query the control plane database\n    query_script = \"\"\"\nimport json\nimport os\nfrom sqlalchemy import create_engine, text\n\n# Try to get database URL from various environment patterns\ncontrol_db_url = None\n\n# Pattern 1: POSTGRES_CONTROL_* variables\nif os.environ.get(\"POSTGRES_CONTROL_HOST\"):\n    host = os.environ.get(\"POSTGRES_CONTROL_HOST\")\n    port = os.environ.get(\"POSTGRES_CONTROL_PORT\", \"5432\")\n    db = os.environ.get(\"POSTGRES_CONTROL_DB\", \"control\")\n    user = os.environ.get(\"POSTGRES_CONTROL_USER\", \"postgres\")\n    password = os.environ.get(\"POSTGRES_CONTROL_PASSWORD\", \"\")\n    if password:\n        control_db_url = f\"postgresql://{user}:{password}@{host}:{port}/{db}\"\n\n# Pattern 2: Standard POSTGRES_* variables (in control plane cluster)\nif not control_db_url and os.environ.get(\"POSTGRES_HOST\"):\n    host = os.environ.get(\"POSTGRES_HOST\")\n    port = os.environ.get(\"POSTGRES_PORT\", \"5432\")\n    db = os.environ.get(\"POSTGRES_DB\", \"danswer\")\n    user = os.environ.get(\"POSTGRES_USER\", \"postgres\")\n    password = os.environ.get(\"POSTGRES_PASSWORD\", \"\")\n    if password:\n        control_db_url = f\"postgresql://{user}:{password}@{host}:{port}/{db}\"\n\nif not control_db_url:\n    raise ValueError(\"Cannot determine control plane database connection\")\n\nengine = create_engine(control_db_url)\n\nwith engine.connect() as conn:\n    result = conn.execute(\n        text(\n            \"SELECT tenant_id, stripe_customer_id, created_at, active_seats, \"\n            \"creator_email, referral_source, application_status FROM tenant\"\n        )\n    )\n    rows = [dict(row._mapping) for row in result]\n    print(json.dumps(rows, default=str))\n\"\"\"\n\n    # Write the script to a temp file\n    script_path = \"/tmp/query_control_plane.py\"\n\n    print(\"  Creating control plane query script on pod...\")\n    cmd_write = [\"kubectl\", \"exec\", pod_name]\n    if context:\n        cmd_write.extend([\"--context\", context])\n    cmd_write.extend(\n        [\"--\", \"bash\", \"-c\", f\"cat > {script_path} << 'EOF'\\n{query_script}\\nEOF\"]\n    )\n\n    subprocess.run(cmd_write, check=True, capture_output=True)\n\n    # Execute the script on the pod\n    print(\"  Executing control plane query on pod...\")\n    cmd_exec = [\"kubectl\", \"exec\", pod_name]\n    if context:\n        cmd_exec.extend([\"--context\", context])\n    cmd_exec.extend([\"--\", \"python\", script_path])\n\n    result = subprocess.run(cmd_exec, capture_output=True, text=True, check=True)\n\n    # Parse JSON output\n    try:\n        control_plane_data = json.loads(result.stdout)\n        print(\n            f\"✓ Successfully collected {len(control_plane_data)} tenant records from control plane\"\n        )\n        return control_plane_data\n    except json.JSONDecodeError as e:\n        print(f\"Failed to parse JSON output: {e}\", file=sys.stderr)\n        print(f\"stdout: {result.stdout[:500]}\", file=sys.stderr)\n        raise\n\n\ndef analyze_tenants(\n    tenants: list[dict[str, Any]], control_plane_data: list[dict[str, Any]]\n) -> list[dict[str, Any]]:\n    \"\"\"Analyze tenant activity data and return gated tenants with no query in last 3 months.\"\"\"\n\n    print(f\"\\n{'=' * 80}\")\n    print(f\"TENANT ANALYSIS REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print(f\"{'=' * 80}\")\n    print(f\"Total tenants analyzed: {len(tenants)}\\n\")\n\n    # Create a lookup dict for control plane data by tenant_id\n    control_plane_lookup = {}\n    for row in control_plane_data:\n        tenant_id = row.get(\"tenant_id\")\n        tenant_status = row.get(\"application_status\")\n        if tenant_id:\n            control_plane_lookup[tenant_id] = tenant_status\n\n    # Calculate cutoff dates\n    one_month_cutoff = datetime.now(timezone.utc) - timedelta(days=30)\n    three_month_cutoff = datetime.now(timezone.utc) - timedelta(days=90)\n\n    # Categorize tenants into 4 groups\n    gated_no_query_3_months = []  # GATED_ACCESS + no query in last 3 months\n    gated_query_1_3_months = []  # GATED_ACCESS + query between 1-3 months\n    gated_query_1_month = []  # GATED_ACCESS + query in last 1 month\n    everyone_else = []  # All other tenants\n\n    for tenant in tenants:\n        tenant_id = tenant.get(\"tenant_id\")\n        last_query_time = tenant.get(\"last_query_time\")\n        tenant_status = control_plane_lookup.get(tenant_id, \"UNKNOWN\")\n\n        is_gated = tenant_status == \"GATED_ACCESS\"\n\n        # Parse last query time\n        if last_query_time:\n            query_time = datetime.fromisoformat(last_query_time.replace(\"Z\", \"+00:00\"))\n        else:\n            query_time = None\n\n        # Categorize\n        if is_gated:\n            if query_time is None or query_time <= three_month_cutoff:\n                gated_no_query_3_months.append(tenant)\n            elif query_time <= one_month_cutoff:\n                gated_query_1_3_months.append(tenant)\n            else:  # query_time > one_month_cutoff\n                gated_query_1_month.append(tenant)\n        else:\n            everyone_else.append(tenant)\n\n    # Calculate document counts for each group\n    gated_no_query_docs = sum(\n        t.get(\"num_documents\", 0) for t in gated_no_query_3_months\n    )\n    gated_1_3_month_docs = sum(\n        t.get(\"num_documents\", 0) for t in gated_query_1_3_months\n    )\n    gated_1_month_docs = sum(t.get(\"num_documents\", 0) for t in gated_query_1_month)\n    everyone_else_docs = sum(t.get(\"num_documents\", 0) for t in everyone_else)\n\n    print(\"=\" * 80)\n    print(\"TENANT CATEGORIZATION BY GATED ACCESS STATUS AND ACTIVITY\")\n    print(\"=\" * 80)\n\n    print(\"\\n1. GATED_ACCESS + No query in last 3 months:\")\n    print(f\"   Count: {len(gated_no_query_3_months):,}\")\n    print(f\"   Total documents: {gated_no_query_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_no_query_docs / len(gated_no_query_3_months) if gated_no_query_3_months else 0:.2f}\"\n    )\n\n    print(\"\\n2. GATED_ACCESS + Query between 1-3 months ago:\")\n    print(f\"   Count: {len(gated_query_1_3_months):,}\")\n    print(f\"   Total documents: {gated_1_3_month_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_1_3_month_docs / len(gated_query_1_3_months) if gated_query_1_3_months else 0:.2f}\"\n    )\n\n    print(\"\\n3. GATED_ACCESS + Query in last 1 month:\")\n    print(f\"   Count: {len(gated_query_1_month):,}\")\n    print(f\"   Total documents: {gated_1_month_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {gated_1_month_docs / len(gated_query_1_month) if gated_query_1_month else 0:.2f}\"\n    )\n\n    print(\"\\n4. Everyone else (non-GATED_ACCESS):\")\n    print(f\"   Count: {len(everyone_else):,}\")\n    print(f\"   Total documents: {everyone_else_docs:,}\")\n    print(\n        f\"   Avg documents per tenant: {everyone_else_docs / len(everyone_else) if everyone_else else 0:.2f}\"\n    )\n\n    total_docs = (\n        gated_no_query_docs\n        + gated_1_3_month_docs\n        + gated_1_month_docs\n        + everyone_else_docs\n    )\n    print(f\"\\nTotal documents across all tenants: {total_docs:,}\")\n\n    # Top 100 tenants by document count\n    print(\"\\n\" + \"=\" * 80)\n    print(\"TOP 100 TENANTS BY DOCUMENT COUNT\")\n    print(\"=\" * 80)\n\n    # Sort all tenants by document count\n    sorted_tenants = sorted(\n        tenants, key=lambda t: t.get(\"num_documents\", 0), reverse=True\n    )\n\n    top_100 = sorted_tenants[:100]\n\n    print(\n        f\"\\n{'Rank':<6} {'Tenant ID':<45} {'Documents':>12} {'Users':>8} {'Last Query':<12} {'Group'}\"\n    )\n    print(\"-\" * 130)\n\n    for idx, tenant in enumerate(top_100, 1):\n        tenant_id = tenant.get(\"tenant_id\", \"Unknown\")\n        num_docs = tenant.get(\"num_documents\", 0)\n        num_users = tenant.get(\"num_users\", 0)\n        last_query = tenant.get(\"last_query_time\", \"Never\")\n        tenant_status = control_plane_lookup.get(tenant_id, \"UNKNOWN\")\n\n        # Format the last query time\n        if last_query and last_query != \"Never\":\n            try:\n                query_dt = datetime.fromisoformat(last_query.replace(\"Z\", \"+00:00\"))\n                last_query_str = query_dt.strftime(\"%Y-%m-%d\")\n            except Exception:\n                last_query_str = last_query[:10] if len(last_query) > 10 else last_query\n        else:\n            last_query_str = \"Never\"\n\n        # Determine group\n        if tenant_status == \"GATED_ACCESS\":\n            if last_query and last_query != \"Never\":\n                query_time = datetime.fromisoformat(last_query.replace(\"Z\", \"+00:00\"))\n                if query_time <= three_month_cutoff:\n                    group = \"Gated - No query (3mo)\"\n                elif query_time <= one_month_cutoff:\n                    group = \"Gated - Query (1-3mo)\"\n                else:\n                    group = \"Gated - Query (1mo)\"\n            else:\n                group = \"Gated - No query (3mo)\"\n        else:\n            group = f\"Other ({tenant_status})\"\n\n        print(\n            f\"{idx:<6} {tenant_id:<45} {num_docs:>12,} {num_users:>8} {last_query_str:<12} {group}\"\n        )\n\n    # Summary stats for top 100\n    top_100_docs = sum(t.get(\"num_documents\", 0) for t in top_100)\n\n    print(\"\\n\" + \"-\" * 110)\n    print(f\"Top 100 total documents: {top_100_docs:,}\")\n    print(\n        f\"Percentage of all documents: {(top_100_docs / total_docs * 100) if total_docs > 0 else 0:.2f}%\"\n    )\n\n    # Additional insights\n    print(\"\\n\" + \"=\" * 80)\n    print(\"ADDITIONAL INSIGHTS\")\n    print(\"=\" * 80)\n\n    # Tenants with no documents\n    no_docs = [t for t in tenants if t.get(\"num_documents\", 0) == 0]\n    print(\n        f\"\\nTenants with 0 documents: {len(no_docs):,} ({len(no_docs) / len(tenants) * 100:.2f}%)\"\n    )\n\n    # Tenants with no users\n    no_users = [t for t in tenants if t.get(\"num_users\", 0) == 0]\n    print(\n        f\"Tenants with 0 users: {len(no_users):,} ({len(no_users) / len(tenants) * 100:.2f}%)\"\n    )\n\n    # Document distribution quartiles\n    doc_counts = sorted([t.get(\"num_documents\", 0) for t in tenants])\n    if doc_counts:\n        print(\"\\nDocument count distribution:\")\n        print(f\"  Median: {doc_counts[len(doc_counts) // 2]:,}\")\n        print(f\"  75th percentile: {doc_counts[int(len(doc_counts) * 0.75)]:,}\")\n        print(f\"  90th percentile: {doc_counts[int(len(doc_counts) * 0.90)]:,}\")\n        print(f\"  95th percentile: {doc_counts[int(len(doc_counts) * 0.95)]:,}\")\n        print(f\"  99th percentile: {doc_counts[int(len(doc_counts) * 0.99)]:,}\")\n        print(f\"  Max: {doc_counts[-1]:,}\")\n\n    return gated_no_query_3_months\n\n\ndef find_recent_tenant_data() -> tuple[list[dict[str, Any]] | None, str | None]:\n    \"\"\"Find the most recent tenant data file if it's less than 7 days old.\"\"\"\n    current_dir = Path.cwd()\n    tenant_data_files = list(current_dir.glob(\"tenant_data_*.json\"))\n\n    if not tenant_data_files:\n        return None, None\n\n    # Sort by modification time, most recent first\n    tenant_data_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)\n    most_recent = tenant_data_files[0]\n\n    # Check if file is less than 7 days old\n    file_age = datetime.now().timestamp() - most_recent.stat().st_mtime\n    seven_days_in_seconds = 7 * 24 * 60 * 60\n\n    if file_age < seven_days_in_seconds:\n        file_age_days = file_age / (24 * 60 * 60)\n        print(\n            f\"\\n✓ Found recent tenant data: {most_recent.name} (age: {file_age_days:.1f} days)\"\n        )\n\n        with open(most_recent, \"r\") as f:\n            tenant_data = json.load(f)\n\n        return tenant_data, str(most_recent)\n\n    return None, None\n\n\ndef main() -> None:\n    # Parse command-line arguments\n    parser = argparse.ArgumentParser(\n        description=\"Analyze tenant data WITHOUT bastion access - control plane and data plane are separate clusters\"\n    )\n    parser.add_argument(\n        \"--skip-cache\",\n        action=\"store_true\",\n        help=\"Skip cached tenant data and collect fresh data from pod\",\n    )\n    parser.add_argument(\n        \"--data-plane-context\",\n        type=str,\n        help=\"Kubectl context for data plane cluster (optional)\",\n    )\n    parser.add_argument(\n        \"--control-plane-context\",\n        type=str,\n        help=\"Kubectl context for control plane cluster (optional)\",\n    )\n    args = parser.parse_args()\n\n    try:\n        # Step 1: Check for recent tenant data (< 7 days old) unless --skip-cache is set\n        tenant_data = None\n        cached_file = None\n\n        if not args.skip_cache:\n            tenant_data, cached_file = find_recent_tenant_data()\n\n        if tenant_data:\n            print(f\"Using cached tenant data from: {cached_file}\")\n            print(f\"Total tenants in cache: {len(tenant_data)}\")\n        else:\n            if args.skip_cache:\n                print(\"\\n⚠ Skipping cache (--skip-cache flag set)\")\n\n            # Find data plane worker pod\n            print(\"\\n\" + \"=\" * 80)\n            print(\"CONNECTING TO DATA PLANE CLUSTER\")\n            print(\"=\" * 80)\n            data_plane_pod = find_worker_pod(args.data_plane_context)\n\n            # Collect tenant data from data plane\n            tenant_data = collect_tenant_data(data_plane_pod, args.data_plane_context)\n\n            # Save raw data to file with timestamp\n            timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n            output_file = f\"tenant_data_{timestamp}.json\"\n            with open(output_file, \"w\") as f:\n                json.dump(tenant_data, f, indent=2, default=str)\n            print(f\"\\n✓ Raw data saved to: {output_file}\")\n\n        # Step 2: Collect control plane data from control plane cluster\n        print(\"\\n\" + \"=\" * 80)\n        print(\"CONNECTING TO CONTROL PLANE CLUSTER\")\n        print(\"=\" * 80)\n        control_plane_pod = find_background_pod(args.control_plane_context)\n        control_plane_data = collect_control_plane_data_from_pod(\n            control_plane_pod, args.control_plane_context\n        )\n\n        # Step 3: Analyze the data and get gated tenants without recent queries\n        gated_no_query_3_months = analyze_tenants(tenant_data, control_plane_data)\n\n        # Step 4: Export to CSV (sorted by num_documents descending)\n        timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n        csv_file = f\"gated_tenants_no_query_3mo_{timestamp}.csv\"\n\n        # Sort by num_documents in descending order\n        sorted_tenants = sorted(\n            gated_no_query_3_months,\n            key=lambda t: t.get(\"num_documents\", 0),\n            reverse=True,\n        )\n\n        with open(csv_file, \"w\", newline=\"\", encoding=\"utf-8\") as csvfile:\n            fieldnames = [\n                \"tenant_id\",\n                \"num_documents\",\n                \"num_users\",\n                \"last_query_time\",\n                \"days_since_last_query\",\n            ]\n            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n            writer.writeheader()\n\n            now = datetime.now(timezone.utc)\n            for tenant in sorted_tenants:\n                # Calculate days since last query\n                last_query_time = tenant.get(\"last_query_time\")\n                if last_query_time:\n                    try:\n                        query_dt = datetime.fromisoformat(\n                            last_query_time.replace(\"Z\", \"+00:00\")\n                        )\n                        days_since = str((now - query_dt).days)\n                    except Exception:\n                        days_since = \"N/A\"\n                else:\n                    days_since = \"Never\"\n\n                writer.writerow(\n                    {\n                        \"tenant_id\": tenant.get(\"tenant_id\", \"\"),\n                        \"num_documents\": tenant.get(\"num_documents\", 0),\n                        \"num_users\": tenant.get(\"num_users\", 0),\n                        \"last_query_time\": last_query_time or \"Never\",\n                        \"days_since_last_query\": days_since,\n                    }\n                )\n\n        print(f\"\\n✓ CSV exported to: {csv_file}\")\n        print(\n            f\"  Total gated tenants with no query in last 3 months: {len(gated_no_query_3_months)}\"\n        )\n\n    except subprocess.CalledProcessError as e:\n        print(f\"Error running command: {e}\", file=sys.stderr)\n        if e.stderr:\n            print(f\"stderr: {e.stderr}\", file=sys.stderr)\n        sys.exit(1)\n    except Exception as e:\n        print(f\"Error: {e}\", file=sys.stderr)\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/no_bastion_cleanup_tenants.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTenant cleanup script that works WITHOUT bastion access.\nAll queries run directly from pods.\nSupports two-cluster architecture (data plane and control plane in separate clusters).\n\nUsage:\n    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \\\n        --data-plane-context <context> --control-plane-context <context> [--force]\n\n    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \\\n        --data-plane-context <context> --control-plane-context <context> [--force]\n\"\"\"\n\nimport csv\nimport json\nimport signal\nimport subprocess\nimport sys\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom datetime import datetime\nfrom pathlib import Path\nfrom threading import Lock\n\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import confirm_step\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import execute_control_plane_delete\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import (\n    TenantNotFoundInControlPlaneError,\n)\n\n\n# Global lock for thread-safe operations\n_print_lock: Lock = Lock()\n_csv_lock: Lock = Lock()\n\n\ndef signal_handler(signum: int, frame: object) -> None:  # noqa: ARG001\n    \"\"\"Handle termination signals by killing active subprocess.\"\"\"\n    sys.exit(1)\n\n\ndef setup_scripts_on_pod(pod_name: str, context: str) -> None:\n    \"\"\"Copy all required scripts to the pod once at the beginning.\n\n    Args:\n        pod_name: Pod to copy scripts to\n        context: kubectl context for the cluster\n    \"\"\"\n    print(\"Setting up scripts on pod (one-time operation)...\")\n\n    script_dir = Path(__file__).parent\n    scripts_to_copy = [\n        (\n            \"on_pod_scripts/check_documents_deleted.py\",\n            \"/tmp/check_documents_deleted.py\",\n        ),\n        (\"on_pod_scripts/cleanup_tenant_schema.py\", \"/tmp/cleanup_tenant_schema.py\"),\n        (\"on_pod_scripts/get_tenant_users.py\", \"/tmp/get_tenant_users.py\"),\n        (\"on_pod_scripts/get_tenant_index_name.py\", \"/tmp/get_tenant_index_name.py\"),\n    ]\n\n    for local_path, remote_path in scripts_to_copy:\n        local_file = script_dir / local_path\n        if not local_file.exists():\n            raise FileNotFoundError(f\"Script not found: {local_file}\")\n\n        cmd_cp = [\"kubectl\", \"cp\", \"--context\", context]\n        cmd_cp.extend([str(local_file), f\"{pod_name}:{remote_path}\"])\n\n        subprocess.run(cmd_cp, check=True, capture_output=True)\n\n    print(\"✓ All scripts copied to pod\")\n\n\ndef get_tenant_index_name(pod_name: str, tenant_id: str, context: str) -> str:\n    \"\"\"Get the default index name for the given tenant by running script on pod.\n\n    Args:\n        pod_name: Data plane pod to execute on\n        tenant_id: Tenant ID to process\n        context: kubectl context for data plane cluster\n    \"\"\"\n    print(f\"Getting default index name for tenant: {tenant_id}\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    index_name_script = script_dir / \"on_pod_scripts\" / \"get_tenant_index_name.py\"\n\n    if not index_name_script.exists():\n        raise FileNotFoundError(\n            f\"get_tenant_index_name.py not found at {index_name_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        print(\"  Copying script to pod...\")\n        cmd_cp = [\"kubectl\", \"cp\", \"--context\", context]\n        cmd_cp.extend(\n            [\n                str(index_name_script),\n                f\"{pod_name}:/tmp/get_tenant_index_name.py\",\n            ]\n        )\n\n        subprocess.run(\n            cmd_cp,\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        print(\"  Executing script on pod...\")\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend(\n            [\n                \"--\",\n                \"python\",\n                \"/tmp/get_tenant_index_name.py\",\n                tenant_id,\n            ]\n        )\n\n        result = subprocess.run(\n            cmd_exec,\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            index_name = result_data.get(\"index_name\")\n            print(f\"✓ Found index name: {index_name}\")\n            return index_name\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            raise RuntimeError(f\"Failed to get index name: {message}\")\n\n    except subprocess.CalledProcessError as e:\n        print(\n            f\"✗ Failed to get index name for tenant {tenant_id}: {e}\", file=sys.stderr\n        )\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(\n            f\"✗ Failed to get index name for tenant {tenant_id}: {e}\", file=sys.stderr\n        )\n        raise\n\n\ndef get_tenant_users(pod_name: str, tenant_id: str, context: str) -> list[str]:\n    \"\"\"Get list of user emails from the tenant's data plane schema.\n\n    Args:\n        pod_name: Data plane pod to execute on\n        tenant_id: Tenant ID to process\n        context: kubectl context for data plane cluster\n    \"\"\"\n    # Script is already on pod from setup_scripts_on_pod()\n    try:\n        # Execute script on pod\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend(\n            [\n                \"--\",\n                \"python\",\n                \"/tmp/get_tenant_users.py\",\n                tenant_id,\n            ]\n        )\n\n        result = subprocess.run(\n            cmd_exec,\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            users = result_data.get(\"users\", [])\n            if users:\n                print(f\"✓ Found {len(users)} user(s):\")\n                for email in users:\n                    print(f\"    - {email}\")\n            else:\n                print(\"  No users found in tenant\")\n            return users\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            print(f\"⚠ Could not fetch users: {message}\")\n            return []\n\n    except subprocess.CalledProcessError as e:\n        print(f\"⚠ Failed to get users for tenant {tenant_id}: {e}\")\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\")\n        return []\n    except Exception as e:\n        print(f\"⚠ Failed to get users for tenant {tenant_id}: {e}\")\n        return []\n\n\ndef check_documents_deleted(pod_name: str, tenant_id: str, context: str) -> None:\n    \"\"\"Check if all documents and connector credential pairs have been deleted.\n\n    Args:\n        pod_name: Data plane pod to execute on\n        tenant_id: Tenant ID to process\n        context: kubectl context for data plane cluster\n    \"\"\"\n    # Script is already on pod from setup_scripts_on_pod()\n    try:\n        # Execute script on pod\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend(\n            [\n                \"--\",\n                \"python\",\n                \"/tmp/check_documents_deleted.py\",\n                tenant_id,\n            ]\n        )\n\n        result = subprocess.run(\n            cmd_exec,\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n\n        if status == \"success\":\n            message = result_data.get(\"message\")\n            print(f\"✓ {message}\")\n        elif status == \"not_found\":\n            message = result_data.get(\"message\", \"Schema not found\")\n            print(f\"⚠ {message}\")\n        else:\n            message = result_data.get(\"message\", \"Unknown error\")\n            cc_count = result_data.get(\"connector_credential_pair_count\", 0)\n            doc_count = result_data.get(\"document_count\", 0)\n            error_details = f\"{message}\"\n            if cc_count > 0 or doc_count > 0:\n                error_details += f\"\\n  ConnectorCredentialPairs: {cc_count}\\n  Documents: {doc_count}\"\n            raise RuntimeError(error_details)\n\n    except subprocess.CalledProcessError as e:\n        print(\n            f\"✗ Failed to check documents for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(\n            f\"✗ Failed to check documents for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        raise\n\n\ndef drop_data_plane_schema(pod_name: str, tenant_id: str, context: str) -> None:\n    \"\"\"Drop the PostgreSQL schema for the given tenant by running script on pod.\n\n    Args:\n        pod_name: Data plane pod to execute on\n        tenant_id: Tenant ID to process\n        context: kubectl context for data plane cluster\n    \"\"\"\n    # Script is already on pod from setup_scripts_on_pod()\n    try:\n        # Execute script on pod\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend(\n            [\n                \"--\",\n                \"python\",\n                \"/tmp/cleanup_tenant_schema.py\",\n                tenant_id,\n            ]\n        )\n\n        result = subprocess.run(\n            cmd_exec,\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        # Show progress messages from stderr\n        if result.stderr:\n            print(f\"  {result.stderr}\", end=\"\")\n\n        # Parse JSON result from stdout\n        result_data = json.loads(result.stdout)\n        status = result_data.get(\"status\")\n        message = result_data.get(\"message\")\n\n        if status == \"success\":\n            print(f\"✓ {message}\")\n        elif status == \"not_found\":\n            print(f\"⚠ {message}\")\n        else:\n            print(f\"✗ {message}\", file=sys.stderr)\n            raise RuntimeError(message)\n\n    except subprocess.CalledProcessError as e:\n        print(f\"✗ Failed to drop schema for tenant {tenant_id}: {e}\", file=sys.stderr)\n        if e.stderr:\n            print(f\"  Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        print(f\"✗ Failed to drop schema for tenant {tenant_id}: {e}\", file=sys.stderr)\n        raise\n\n\ndef cleanup_control_plane(\n    pod_name: str, tenant_id: str, context: str, force: bool = False\n) -> None:\n    \"\"\"Clean up control plane data via pod queries.\n\n    Args:\n        pod_name: Control plane pod to execute on\n        tenant_id: Tenant ID to process\n        context: kubectl context for control plane cluster\n        force: Skip confirmations if True\n    \"\"\"\n    print(f\"Cleaning up control plane data for tenant: {tenant_id}\")\n\n    # Delete in order respecting foreign key constraints\n    delete_queries = [\n        (\n            \"tenant_notification\",\n            f\"DELETE FROM tenant_notification WHERE tenant_id = '{tenant_id}'\",\n        ),\n        (\"tenant_config\", f\"DELETE FROM tenant_config WHERE tenant_id = '{tenant_id}'\"),\n        (\"subscription\", f\"DELETE FROM subscription WHERE tenant_id = '{tenant_id}'\"),\n        (\"tenant\", f\"DELETE FROM tenant WHERE tenant_id = '{tenant_id}'\"),\n    ]\n\n    try:\n        for table_name, query in delete_queries:\n            print(f\"  Deleting from {table_name}...\")\n\n            if not confirm_step(f\"Delete from {table_name}?\", force):\n                print(f\"  Skipping deletion from {table_name}\")\n                continue\n\n            execute_control_plane_delete(pod_name, query, context)\n\n        print(f\"✓ Successfully cleaned up control plane data for tenant: {tenant_id}\")\n\n    except Exception as e:\n        print(\n            f\"✗ Failed to clean up control plane for tenant {tenant_id}: {e}\",\n            file=sys.stderr,\n        )\n        raise\n\n\ndef cleanup_tenant(\n    tenant_id: str,\n    data_plane_pod: str,\n    control_plane_pod: str,\n    data_plane_context: str,\n    control_plane_context: str,\n    force: bool = False,\n) -> bool:\n    \"\"\"Main cleanup function that orchestrates all cleanup steps.\n\n    Args:\n        tenant_id: Tenant ID to process\n        data_plane_pod: Data plane pod for schema operations\n        control_plane_pod: Control plane pod for tenant record operations\n        data_plane_context: kubectl context for data plane cluster\n        control_plane_context: kubectl context for control plane cluster\n        force: Skip confirmations if True\n    \"\"\"\n    print(f\"Starting cleanup for tenant: {tenant_id}\")\n\n    # Track if tenant was not found in control plane (for force mode)\n    tenant_not_found_in_control_plane = False\n\n    # Check tenant status first (from control plane)\n    print(f\"\\n{'=' * 80}\")\n    try:\n        tenant_status = get_tenant_status(\n            control_plane_pod, tenant_id, control_plane_context\n        )\n\n        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode\n        if tenant_status and tenant_status != \"GATED_ACCESS\":\n            print(\n                f\"\\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!\"\n            )\n            print(\n                \"This tenant may be active and should not be deleted without careful review.\"\n            )\n            print(f\"{'=' * 80}\\n\")\n\n            if force:\n                print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n                return False\n\n            # Always ask for confirmation if not gated\n            response = input(\n                \"Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: \"\n            )\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - tenant is not GATED_ACCESS\")\n                return False\n        elif tenant_status == \"GATED_ACCESS\":\n            print(\"✓ Tenant status is GATED_ACCESS - safe to proceed with cleanup\")\n        elif tenant_status is None:\n            print(\"⚠️  WARNING: Could not determine tenant status!\")\n\n            if force:\n                print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n                return False\n\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - could not verify tenant status\")\n                return False\n    except TenantNotFoundInControlPlaneError as e:\n        # Tenant/table not found in control plane\n        error_str = str(e)\n        print(f\"⚠️  WARNING: Tenant not found in control plane: {error_str}\")\n        tenant_not_found_in_control_plane = True\n\n        if force:\n            print(\n                \"[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only\"\n            )\n        else:\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                print(\"Cleanup aborted - tenant not found in control plane\")\n                return False\n    except Exception as e:\n        # Other errors (not \"not found\")\n        error_str = str(e)\n        print(f\"⚠️  WARNING: Failed to check tenant status: {error_str}\")\n\n        if force:\n            print(f\"Skipping cleanup for tenant {tenant_id} in force mode\")\n            return False\n\n        response = input(\"Continue anyway? Type 'yes' to confirm: \")\n        if response.lower() != \"yes\":\n            print(\"Cleanup aborted - could not verify tenant status\")\n            return False\n    print(f\"{'=' * 80}\\n\")\n\n    # Fetch tenant users for informational purposes (non-blocking) from data plane\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        try:\n            get_tenant_users(data_plane_pod, tenant_id, data_plane_context)\n        except Exception as e:\n            print(f\"⚠ Could not fetch tenant users: {e}\")\n        print(f\"{'=' * 80}\\n\")\n\n    # Step 1: Make sure all documents are deleted (data plane)\n    print(f\"\\n{'=' * 80}\")\n    print(\"Step 1/3: Checking for remaining ConnectorCredentialPairs and Documents\")\n    print(f\"{'=' * 80}\")\n    try:\n        check_documents_deleted(data_plane_pod, tenant_id, data_plane_context)\n    except Exception as e:\n        print(f\"✗ Document check failed: {e}\", file=sys.stderr)\n        print(\n            \"\\nPlease ensure all ConnectorCredentialPairs and Documents are deleted before running cleanup.\"\n        )\n        print(\n            \"You may need to mark connectors for deletion and wait for cleanup to complete.\"\n        )\n        return False\n    print(f\"{'=' * 80}\\n\")\n\n    # Step 2: Drop data plane schema\n    if confirm_step(\n        f\"Step 2/3: Drop data plane schema '{tenant_id}' (CASCADE - will delete all tables, functions, etc.)\",\n        force,\n    ):\n        try:\n            drop_data_plane_schema(data_plane_pod, tenant_id, data_plane_context)\n        except Exception as e:\n            print(f\"✗ Failed at schema cleanup step: {e}\", file=sys.stderr)\n            if not force:\n                response = input(\"Continue with control plane cleanup? (y/n): \")\n                if response.lower() != \"y\":\n                    print(\"Cleanup aborted by user\")\n                    return False\n            else:\n                print(\"[FORCE MODE] Continuing despite schema cleanup failure\")\n    else:\n        print(\"Step 2 skipped by user\")\n\n    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)\n    if tenant_not_found_in_control_plane:\n        print(f\"\\n{'=' * 80}\")\n        print(\n            \"Step 3/3: Skipping control plane cleanup (tenant not found in control plane)\"\n        )\n        print(f\"{'=' * 80}\\n\")\n    elif confirm_step(\n        \"Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)\",\n        force,\n    ):\n        try:\n            cleanup_control_plane(\n                control_plane_pod, tenant_id, control_plane_context, force\n            )\n        except Exception as e:\n            print(f\"✗ Failed at control plane cleanup step: {e}\", file=sys.stderr)\n            if not force:\n                print(\"Control plane cleanup failed\")\n            else:\n                print(\"[FORCE MODE] Control plane cleanup failed but continuing\")\n    else:\n        print(\"Step 3 skipped by user\")\n        return False\n\n    print(f\"\\n{'=' * 80}\")\n    print(f\"✓ Cleanup completed for tenant: {tenant_id}\")\n    print(f\"{'=' * 80}\")\n    return True\n\n\ndef main() -> None:\n    # Register signal handlers for graceful shutdown\n    signal.signal(signal.SIGINT, signal_handler)\n    signal.signal(signal.SIGTERM, signal_handler)\n\n    if len(sys.argv) < 2:\n        print(\n            \"Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \\\\\"\n        )\n        print(\n            \"           --data-plane-context <context> --control-plane-context <context> [--force]\"\n        )\n        print(\n            \"       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \\\\\"\n        )\n        print(\n            \"           --data-plane-context <context> --control-plane-context <context> [--force]\"\n        )\n        print(\"\\nThis version runs ALL operations from pods (no bastion required)\")\n        print(\"\\nArguments:\")\n        print(\n            \"  tenant_id                   The tenant ID to clean up (required if not using --csv)\"\n        )\n        print(\n            \"  --csv PATH                  Path to CSV file containing tenant IDs to clean up\"\n        )\n        print(\"  --force                     Skip all confirmation prompts (optional)\")\n        print(\n            \"  --concurrency N             Process N tenants concurrently (default: 1)\"\n        )\n        print(\n            \"  --data-plane-context CTX    Kubectl context for data plane cluster (required)\"\n        )\n        print(\n            \"  --control-plane-context CTX Kubectl context for control plane cluster (required)\"\n        )\n        sys.exit(1)\n\n    # Parse arguments\n    force = \"--force\" in sys.argv\n    tenant_ids = []\n\n    # Parse concurrency\n    concurrency: int = 1\n    if \"--concurrency\" in sys.argv:\n        try:\n            concurrency_index = sys.argv.index(\"--concurrency\")\n            if concurrency_index + 1 >= len(sys.argv):\n                print(\"Error: --concurrency flag requires a number\", file=sys.stderr)\n                sys.exit(1)\n            concurrency = int(sys.argv[concurrency_index + 1])\n            if concurrency < 1:\n                print(\"Error: concurrency must be at least 1\", file=sys.stderr)\n                sys.exit(1)\n        except ValueError:\n            print(\"Error: --concurrency value must be an integer\", file=sys.stderr)\n            sys.exit(1)\n\n    # Validate: concurrency > 1 requires --force\n    if concurrency > 1 and not force:\n        print(\n            \"Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    # Parse contexts (required)\n    data_plane_context: str | None = None\n    control_plane_context: str | None = None\n\n    if \"--data-plane-context\" in sys.argv:\n        try:\n            idx = sys.argv.index(\"--data-plane-context\")\n            if idx + 1 >= len(sys.argv):\n                print(\n                    \"Error: --data-plane-context requires a context name\",\n                    file=sys.stderr,\n                )\n                sys.exit(1)\n            data_plane_context = sys.argv[idx + 1]\n        except ValueError:\n            pass\n\n    if \"--control-plane-context\" in sys.argv:\n        try:\n            idx = sys.argv.index(\"--control-plane-context\")\n            if idx + 1 >= len(sys.argv):\n                print(\n                    \"Error: --control-plane-context requires a context name\",\n                    file=sys.stderr,\n                )\n                sys.exit(1)\n            control_plane_context = sys.argv[idx + 1]\n        except ValueError:\n            pass\n\n    # Validate required contexts\n    if not data_plane_context:\n        print(\n            \"Error: --data-plane-context is required\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    if not control_plane_context:\n        print(\n            \"Error: --control-plane-context is required\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    # Check for CSV mode\n    if \"--csv\" in sys.argv:\n        try:\n            csv_index = sys.argv.index(\"--csv\")\n            if csv_index + 1 >= len(sys.argv):\n                print(\"Error: --csv flag requires a file path\", file=sys.stderr)\n                sys.exit(1)\n\n            csv_path = sys.argv[csv_index + 1]\n            tenant_ids = read_tenant_ids_from_csv(csv_path)\n\n            if not tenant_ids:\n                print(\"Error: No tenant IDs found in CSV file\", file=sys.stderr)\n                sys.exit(1)\n\n            print(f\"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}\")\n\n        except Exception as e:\n            print(f\"Error reading CSV file: {e}\", file=sys.stderr)\n            sys.exit(1)\n    else:\n        # Single tenant mode\n        tenant_ids = [sys.argv[1]]\n\n    # Initial confirmation (unless --force is used)\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        print(\"TENANT CLEANUP - NO BASTION VERSION\")\n        print(f\"{'=' * 80}\")\n        if len(tenant_ids) == 1:\n            print(f\"Tenant ID: {tenant_ids[0]}\")\n        else:\n            print(f\"Number of tenants: {len(tenant_ids)}\")\n            print(f\"Tenant IDs: {', '.join(tenant_ids[:5])}\")\n            if len(tenant_ids) > 5:\n                print(f\"            ... and {len(tenant_ids) - 5} more\")\n\n        print(\"\\nThis will:\")\n        print(\"  1. Check for remaining documents and connector credential pairs\")\n        print(\"  2. Drop the data plane PostgreSQL schema (CASCADE)\")\n        print(\"  3. Clean up control plane data (all via pod queries)\")\n        print(f\"\\n{'=' * 80}\")\n        print(\"WARNING: This operation is IRREVERSIBLE!\")\n        print(f\"{'=' * 80}\\n\")\n\n        response = input(\"Are you sure you want to proceed? Type 'yes' to confirm: \")\n\n        if response.lower() != \"yes\":\n            print(\"Cleanup aborted by user\")\n            sys.exit(0)\n    else:\n        print(\n            f\"⚠ FORCE MODE: Running cleanup for {len(tenant_ids)} tenant(s) without confirmations\"\n        )\n\n    # Find pods in both clusters before processing\n    try:\n        print(\"Finding data plane worker pod...\")\n        data_plane_pod = find_worker_pod(data_plane_context)\n        print(f\"✓ Using data plane worker pod: {data_plane_pod}\")\n\n        print(\"Finding control plane pod...\")\n        control_plane_pod = find_background_pod(control_plane_context)\n        print(f\"✓ Using control plane pod: {control_plane_pod}\\n\")\n\n        # Copy all scripts to data plane pod once\n        setup_scripts_on_pod(data_plane_pod, data_plane_context)\n        print()\n    except Exception as e:\n        print(f\"✗ Failed to find required pods or setup scripts: {e}\", file=sys.stderr)\n        print(\"Cannot proceed with cleanup\")\n        sys.exit(1)\n\n    # Run cleanup for each tenant\n    failed_tenants = []\n    successful_tenants = []\n    skipped_tenants = []\n\n    # Open CSV file for writing successful cleanups in real-time\n    csv_output_path = \"cleaned_tenants.csv\"\n    with open(csv_output_path, \"w\", newline=\"\") as csv_file:\n        csv_writer = csv.writer(csv_file)\n        csv_writer.writerow([\"tenant_id\", \"cleaned_at\"])\n        csv_file.flush()\n\n        print(f\"Writing successful cleanups to: {csv_output_path}\\n\")\n\n        if concurrency == 1:\n            # Sequential processing\n            for idx, tenant_id in enumerate(tenant_ids, 1):\n                if len(tenant_ids) > 1:\n                    print(f\"\\n{'=' * 80}\")\n                    print(f\"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}\")\n                    print(f\"{'=' * 80}\")\n\n                try:\n                    was_cleaned = cleanup_tenant(\n                        tenant_id,\n                        data_plane_pod,\n                        control_plane_pod,\n                        data_plane_context,\n                        control_plane_context,\n                        force,\n                    )\n\n                    if was_cleaned:\n                        successful_tenants.append(tenant_id)\n\n                        # Write to CSV immediately after successful cleanup\n                        timestamp = datetime.utcnow().isoformat()\n                        csv_writer.writerow([tenant_id, timestamp])\n                        csv_file.flush()\n                        print(f\"✓ Recorded cleanup in {csv_output_path}\")\n                    else:\n                        skipped_tenants.append(tenant_id)\n                        print(f\"⚠ Tenant {tenant_id} was skipped (not recorded in CSV)\")\n\n                except Exception as e:\n                    print(\n                        f\"✗ Cleanup failed for tenant {tenant_id}: {e}\", file=sys.stderr\n                    )\n                    failed_tenants.append((tenant_id, str(e)))\n\n                    # If not in force mode and there are more tenants, ask if we should continue\n                    if not force and idx < len(tenant_ids):\n                        response = input(\n                            f\"\\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): \"\n                        )\n                        if response.lower() != \"y\":\n                            print(\"Cleanup aborted by user\")\n                            break\n        else:\n            # Parallel processing\n            print(\n                f\"Processing {len(tenant_ids)} tenant(s) with concurrency={concurrency}\\n\"\n            )\n\n            def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:\n                \"\"\"Process a single tenant. Returns (tenant_id, was_cleaned, error_message).\"\"\"\n                try:\n                    was_cleaned = cleanup_tenant(\n                        tenant_id,\n                        data_plane_pod,\n                        control_plane_pod,\n                        data_plane_context,\n                        control_plane_context,\n                        force,\n                    )\n                    return (tenant_id, was_cleaned, None)\n                except Exception as e:\n                    return (tenant_id, False, str(e))\n\n            with ThreadPoolExecutor(max_workers=concurrency) as executor:\n                # Submit all tasks\n                future_to_tenant = {\n                    executor.submit(process_tenant, tenant_id): tenant_id\n                    for tenant_id in tenant_ids\n                }\n\n                # Process results as they complete\n                completed = 0\n                for future in as_completed(future_to_tenant):\n                    completed += 1\n                    tenant_id, was_cleaned, error = future.result()\n\n                    if error:\n                        with _print_lock:\n                            print(\n                                f\"[{completed}/{len(tenant_ids)}] ✗ Failed: {tenant_id}: {error}\",\n                                file=sys.stderr,\n                            )\n                        failed_tenants.append((tenant_id, error))\n                    elif was_cleaned:\n                        with _csv_lock:\n                            timestamp = datetime.utcnow().isoformat()\n                            csv_writer.writerow([tenant_id, timestamp])\n                            csv_file.flush()\n                        successful_tenants.append(tenant_id)\n                        with _print_lock:\n                            print(\n                                f\"[{completed}/{len(tenant_ids)}] ✓ Cleaned: {tenant_id}\"\n                            )\n                    else:\n                        skipped_tenants.append(tenant_id)\n                        with _print_lock:\n                            print(\n                                f\"[{completed}/{len(tenant_ids)}] ⊘ Skipped: {tenant_id}\"\n                            )\n\n    # Print summary\n    if len(tenant_ids) > 1:\n        print(f\"\\n{'=' * 80}\")\n        print(\"CLEANUP SUMMARY\")\n        print(f\"{'=' * 80}\")\n        print(f\"Total tenants: {len(tenant_ids)}\")\n        print(f\"Successful: {len(successful_tenants)}\")\n        print(f\"Skipped: {len(skipped_tenants)}\")\n        print(f\"Failed: {len(failed_tenants)}\")\n        print(f\"\\nSuccessfully cleaned tenants written to: {csv_output_path}\")\n\n        if skipped_tenants:\n            print(f\"\\nSkipped tenants ({len(skipped_tenants)}):\")\n            for tenant_id in skipped_tenants:\n                print(f\"  - {tenant_id}\")\n\n        if failed_tenants:\n            print(f\"\\nFailed tenants ({len(failed_tenants)}):\")\n            for tenant_id, error in failed_tenants:\n                print(f\"  - {tenant_id}: {error}\")\n\n        print(f\"{'=' * 80}\")\n\n        if failed_tenants:\n            sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/no_bastion_cleanup_utils.py",
    "content": "\"\"\"\nCleanup utilities that work WITHOUT bastion access.\nControl plane and data plane are in SEPARATE clusters.\n\"\"\"\n\nimport csv\nimport json\nimport subprocess\nimport sys\nfrom pathlib import Path\n\n\nclass TenantNotFoundInControlPlaneError(Exception):\n    \"\"\"Exception raised when tenant/table is not found in control plane.\"\"\"\n\n\ndef find_worker_pod(context: str) -> str:\n    \"\"\"Find a user file processing worker pod using kubectl.\n\n    Args:\n        context: kubectl context to use\n    \"\"\"\n    print(f\"Finding user file processing worker pod in context {context}...\")\n\n    cmd = [\"kubectl\", \"get\", \"po\", \"--context\", context]\n\n    result = subprocess.run(cmd, capture_output=True, text=True, check=True)\n\n    # Parse output and find user file processing worker pod\n    lines = result.stdout.strip().split(\"\\n\")\n    lines = lines[1:]  # Skip header\n\n    import random\n\n    random.shuffle(lines)\n\n    for line in lines:\n        if \"celery-worker-user-file-processing\" in line and \"Running\" in line:\n            pod_name = line.split()[0]\n            print(f\"Found pod: {pod_name}\")\n            return pod_name\n\n    raise RuntimeError(\"No running user file processing worker pod found\")\n\n\ndef find_background_pod(context: str) -> str:\n    \"\"\"Find a pod for control plane operations.\n\n    Args:\n        context: kubectl context to use\n    \"\"\"\n    print(f\"Finding control plane pod in context {context}...\")\n\n    cmd = [\"kubectl\", \"get\", \"po\", \"--context\", context]\n\n    result = subprocess.run(cmd, capture_output=True, text=True, check=True)\n\n    # Parse output and find suitable pod\n    lines = result.stdout.strip().split(\"\\n\")\n    lines = lines[1:]  # Skip header\n\n    import random\n\n    random.shuffle(lines)\n\n    # Try to find control plane pods\n    for line in lines:\n        if (\n            any(\n                name in line\n                for name in [\n                    \"background-processing-deployment\",\n                    \"subscription-deployment\",\n                    \"tenants-deployment\",\n                ]\n            )\n            and \"Running\" in line\n        ):\n            pod_name = line.split()[0]\n            print(f\"Found pod: {pod_name}\")\n            return pod_name\n\n    raise RuntimeError(\"No suitable background pod found for control plane operations\")\n\n\ndef confirm_step(message: str, force: bool = False) -> bool:\n    \"\"\"Ask for confirmation before executing a step.\n\n    Args:\n        message: The confirmation message to display\n        force: If True, skip confirmation and return True\n\n    Returns:\n        True if user confirms or force is True, False otherwise\n    \"\"\"\n    if force:\n        print(f\"[FORCE MODE] Skipping confirmation: {message}\")\n        return True\n\n    print(f\"\\n{message}\")\n    response = input(\"Proceed? (y/n): \")\n    return response.lower() == \"y\"\n\n\ndef execute_control_plane_query_from_pod(\n    pod_name: str, query: str, context: str\n) -> dict:\n    \"\"\"Execute a SQL query against control plane database from within a pod.\n\n    Args:\n        pod_name: The Kubernetes pod name to execute from\n        query: The SQL query to execute\n        context: kubectl context for control plane cluster\n\n    Returns:\n        Dict with 'success' bool, 'stdout' str, and optional 'error' str\n    \"\"\"\n    # Create a Python script to run the query\n    # This script tries multiple environment variable patterns\n\n    # NOTE: whuang 01/08/2026: POSTGRES_CONTROL_* don't exist. This uses pattern 2 currently.\n\n    query_script = f'''\nimport os\nfrom sqlalchemy import create_engine, text\n\n# Try to get control plane database URL from various environment patterns\ncontrol_db_url = None\n\n# Pattern 1: POSTGRES_CONTROL_* variables\nif os.environ.get(\"POSTGRES_CONTROL_HOST\"):\n    host = os.environ.get(\"POSTGRES_CONTROL_HOST\")\n    port = os.environ.get(\"POSTGRES_CONTROL_PORT\", \"5432\")\n    db = os.environ.get(\"POSTGRES_CONTROL_DB\", \"control\")\n    user = os.environ.get(\"POSTGRES_CONTROL_USER\", \"postgres\")\n    password = os.environ.get(\"POSTGRES_CONTROL_PASSWORD\", \"\")\n    if password:\n        control_db_url = f\"postgresql://{{user}}:{{password}}@{{host}}:{{port}}/{{db}}\"\n\n# Pattern 2: Standard POSTGRES_* variables (might point to control plane in this cluster)\nif not control_db_url and os.environ.get(\"POSTGRES_HOST\"):\n    host = os.environ.get(\"POSTGRES_HOST\")\n    port = os.environ.get(\"POSTGRES_PORT\", \"5432\")\n    db = os.environ.get(\"POSTGRES_DB\", \"danswer\")\n    user = os.environ.get(\"POSTGRES_USER\", \"postgres\")\n    password = os.environ.get(\"POSTGRES_PASSWORD\", \"\")\n    if password:\n        control_db_url = f\"postgresql://{{user}}:{{password}}@{{host}}:{{port}}/{{db}}\"\n\n# Pattern 3: Direct URI\nif not control_db_url:\n    control_db_url = os.environ.get(\"DATABASE_URL\") or os.environ.get(\"POSTGRES_URI\")\n\nif not control_db_url:\n    raise ValueError(\"Cannot determine control plane database connection. No suitable environment variables found.\")\n\nengine = create_engine(control_db_url)\n\nwith engine.connect() as conn:\n    result = conn.execute(text(\"\"\"{query}\"\"\"))\n\n    # Check if this is a SELECT query\n    if result.returns_rows:\n        rows = [dict(row._mapping) for row in result]\n        import json\n        print(json.dumps(rows, default=str))\n    else:\n        # For INSERT/UPDATE/DELETE, print rowcount\n        print(f\"{{result.rowcount}} rows affected\")\n\n    conn.commit()\n'''\n\n    # Write the script to a temp file on the pod\n    script_path = \"/tmp/control_plane_query.py\"\n\n    try:\n        cmd_write = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_write.extend(\n            [\n                \"--\",\n                \"bash\",\n                \"-c\",\n                f\"cat > {script_path} << 'EOFQUERY'\\n{query_script}\\nEOFQUERY\",\n            ]\n        )\n\n        subprocess.run(\n            cmd_write,\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute the script\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend([\"--\", \"python\", script_path])\n\n        result = subprocess.run(\n            cmd_exec,\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        return {\n            \"success\": True,\n            \"stdout\": result.stdout.strip(),\n            \"stderr\": result.stderr.strip() if result.stderr else \"\",\n        }\n\n    except subprocess.CalledProcessError as e:\n        return {\n            \"success\": False,\n            \"stdout\": e.stdout if e.stdout else \"\",\n            \"error\": e.stderr if e.stderr else str(e),\n        }\n\n\ndef get_tenant_status(pod_name: str, tenant_id: str, context: str) -> str | None:\n    \"\"\"\n    Get tenant status from control plane database via pod.\n\n    Args:\n        pod_name: The pod to execute the query from\n        tenant_id: The tenant ID to look up\n        context: kubectl context for control plane cluster\n\n    Returns:\n        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found\n\n    Raises:\n        TenantNotFoundInControlPlaneError: If the tenant record is not found in the table\n    \"\"\"\n    print(f\"Fetching tenant status for tenant: {tenant_id}\")\n\n    query = f\"SELECT application_status FROM tenant WHERE tenant_id = '{tenant_id}'\"\n\n    result = execute_control_plane_query_from_pod(pod_name, query, context)\n\n    if not result[\"success\"]:\n        error_msg = result.get(\"error\", \"Unknown error\")\n        print(\n            f\"✗ Failed to get tenant status for {tenant_id}: {error_msg}\",\n            file=sys.stderr,\n        )\n        return None\n\n    try:\n        # Parse JSON output\n        rows = json.loads(result[\"stdout\"])\n\n        if rows and len(rows) > 0:\n            status = rows[0].get(\"application_status\")\n            if status:\n                print(f\"✓ Tenant status: {status}\")\n                return status\n\n        # Tenant record not found in control plane table\n        print(\"⚠ Tenant not found in control plane\")\n        raise TenantNotFoundInControlPlaneError(\n            f\"Tenant {tenant_id} not found in control plane database\"\n        )\n\n    except TenantNotFoundInControlPlaneError:\n        # Re-raise without wrapping\n        raise\n    except (json.JSONDecodeError, KeyError, IndexError) as e:\n        print(f\"✗ Failed to parse tenant status: {e}\", file=sys.stderr)\n        return None\n\n\ndef execute_control_plane_delete(pod_name: str, query: str, context: str) -> bool:\n    \"\"\"Execute a DELETE query against control plane database from pod.\n\n    Args:\n        pod_name: The pod to execute the query from\n        query: The DELETE query to execute\n        context: kubectl context for control plane cluster\n\n    Returns:\n        True if successful, False otherwise\n    \"\"\"\n    result = execute_control_plane_query_from_pod(pod_name, query, context)\n\n    if result[\"success\"]:\n        print(f\"    {result['stdout']}\")\n        return True\n    else:\n        print(f\"    Error: {result.get('error', 'Unknown error')}\", file=sys.stderr)\n        return False\n\n\ndef read_tenant_ids_from_csv(csv_path: str) -> list[str]:\n    \"\"\"Read tenant IDs from CSV file.\n\n    Args:\n        csv_path: Path to CSV file\n\n    Returns:\n        List of tenant IDs\n    \"\"\"\n    if not Path(csv_path).exists():\n        raise FileNotFoundError(f\"CSV file not found: {csv_path}\")\n\n    tenant_ids = []\n    with open(csv_path, \"r\", newline=\"\", encoding=\"utf-8\") as csvfile:\n        reader = csv.DictReader(csvfile)\n\n        # Check if tenant_id column exists\n        if not reader.fieldnames or \"tenant_id\" not in reader.fieldnames:\n            raise ValueError(\n                f\"CSV file must have a 'tenant_id' column. Found columns: {reader.fieldnames}\"\n            )\n\n        for row in reader:\n            tenant_id = row.get(\"tenant_id\", \"\").strip()\n            if tenant_id:\n                tenant_ids.append(tenant_id)\n\n    return tenant_ids\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/no_bastion_mark_connectors.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nMark connectors for deletion script that works WITHOUT bastion access.\nAll queries run directly from pods.\nSupports two-cluster architecture (data plane and control plane in separate clusters).\n\nUsage:\n    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> \\\n        --data-plane-context <context> --control-plane-context <context> [--force]\n\n    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \\\n        --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]\n\"\"\"\n\nimport subprocess\nimport sys\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom pathlib import Path\nfrom threading import Lock\nfrom typing import Any\n\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import confirm_step\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv\nfrom scripts.tenant_cleanup.no_bastion_cleanup_utils import (\n    TenantNotFoundInControlPlaneError,\n)\n\n# Global lock for thread-safe printing\n_print_lock: Lock = Lock()\n\n\ndef safe_print(*args: Any, **kwargs: Any) -> None:\n    \"\"\"Thread-safe print function.\"\"\"\n    with _print_lock:\n        print(*args, **kwargs)\n\n\ndef run_connector_deletion(pod_name: str, tenant_id: str, context: str) -> None:\n    \"\"\"Mark all connector credential pairs for deletion.\n\n    Args:\n        pod_name: Data plane pod to execute deletion on\n        tenant_id: Tenant ID to process\n        context: kubectl context for data plane cluster\n    \"\"\"\n    safe_print(\"  Marking all connector credential pairs for deletion...\")\n\n    # Get the path to the script\n    script_dir = Path(__file__).parent\n    mark_deletion_script = (\n        script_dir / \"on_pod_scripts\" / \"execute_connector_deletion.py\"\n    )\n\n    if not mark_deletion_script.exists():\n        raise FileNotFoundError(\n            f\"execute_connector_deletion.py not found at {mark_deletion_script}\"\n        )\n\n    try:\n        # Copy script to pod\n        cmd_cp = [\"kubectl\", \"cp\", \"--context\", context]\n        cmd_cp.extend(\n            [\n                str(mark_deletion_script),\n                f\"{pod_name}:/tmp/execute_connector_deletion.py\",\n            ]\n        )\n\n        subprocess.run(\n            cmd_cp,\n            check=True,\n            capture_output=True,\n        )\n\n        # Execute script on pod\n        cmd_exec = [\"kubectl\", \"exec\", \"--context\", context, pod_name]\n        cmd_exec.extend(\n            [\n                \"--\",\n                \"python\",\n                \"/tmp/execute_connector_deletion.py\",\n                tenant_id,\n                \"--all\",\n            ]\n        )\n\n        result = subprocess.run(cmd_exec)\n\n        if result.returncode != 0:\n            raise RuntimeError(result.stderr)\n\n    except subprocess.CalledProcessError as e:\n        safe_print(\n            f\"  ✗ Failed to mark all connector credential pairs for deletion: {e}\",\n            file=sys.stderr,\n        )\n        if e.stderr:\n            safe_print(f\"    Error details: {e.stderr}\", file=sys.stderr)\n        raise\n    except Exception as e:\n        safe_print(\n            f\"  ✗ Failed to mark all connector credential pairs for deletion: {e}\",\n            file=sys.stderr,\n        )\n        raise\n\n\ndef mark_tenant_connectors_for_deletion(\n    tenant_id: str,\n    data_plane_pod: str,\n    control_plane_pod: str,\n    data_plane_context: str,\n    control_plane_context: str,\n    force: bool = False,\n) -> None:\n    \"\"\"Main function to mark all connectors for a tenant for deletion.\n\n    Args:\n        tenant_id: Tenant ID to process\n        data_plane_pod: Data plane pod for connector operations\n        control_plane_pod: Control plane pod for status checks\n        data_plane_context: kubectl context for data plane cluster\n        control_plane_context: kubectl context for control plane cluster\n        force: Skip confirmations if True\n    \"\"\"\n    safe_print(f\"Processing connectors for tenant: {tenant_id}\")\n\n    # Check tenant status first (from control plane)\n    safe_print(f\"\\n{'=' * 80}\")\n    try:\n        tenant_status = get_tenant_status(\n            control_plane_pod, tenant_id, control_plane_context\n        )\n\n        # If tenant is not GATED_ACCESS, require explicit confirmation even in force mode\n        if tenant_status and tenant_status != \"GATED_ACCESS\":\n            safe_print(\n                f\"\\n⚠️  WARNING: Tenant status is '{tenant_status}', not 'GATED_ACCESS'!\"\n            )\n            safe_print(\n                \"This tenant may be active and should not have connectors deleted without careful review.\"\n            )\n            safe_print(f\"{'=' * 80}\\n\")\n\n            # Always ask for confirmation if not gated, even in force mode\n            if not force:\n                response = input(\n                    \"Are you ABSOLUTELY SURE you want to proceed? Type 'yes' to confirm: \"\n                )\n                if response.lower() != \"yes\":\n                    safe_print(\"Operation aborted - tenant is not GATED_ACCESS\")\n                    raise RuntimeError(f\"Tenant {tenant_id} is not GATED_ACCESS\")\n            else:\n                raise RuntimeError(f\"Tenant {tenant_id} is not GATED_ACCESS\")\n        elif tenant_status == \"GATED_ACCESS\":\n            safe_print(\"✓ Tenant status is GATED_ACCESS - safe to proceed\")\n        elif tenant_status is None:\n            safe_print(\"⚠️  WARNING: Could not determine tenant status!\")\n            if not force:\n                response = input(\"Continue anyway? Type 'yes' to confirm: \")\n                if response.lower() != \"yes\":\n                    safe_print(\"Operation aborted - could not verify tenant status\")\n                    raise RuntimeError(\n                        f\"Could not verify tenant status for {tenant_id}\"\n                    )\n            else:\n                raise RuntimeError(f\"Could not verify tenant status for {tenant_id}\")\n    except TenantNotFoundInControlPlaneError as e:\n        # Tenant/table not found in control plane\n        error_str = str(e)\n        safe_print(f\"⚠️  WARNING: Tenant not found in control plane: {error_str}\")\n\n        if force:\n            safe_print(\n                \"[FORCE MODE] Tenant not found in control plane - continuing with connector deletion anyway\"\n            )\n        else:\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                safe_print(\"Operation aborted - tenant not found in control plane\")\n                raise RuntimeError(f\"Tenant {tenant_id} not found in control plane\")\n    except RuntimeError:\n        # Re-raise RuntimeError (from status checks above) without wrapping\n        raise\n    except Exception as e:\n        safe_print(f\"⚠️  WARNING: Failed to check tenant status: {e}\")\n        if not force:\n            response = input(\"Continue anyway? Type 'yes' to confirm: \")\n            if response.lower() != \"yes\":\n                safe_print(\"Operation aborted - could not verify tenant status\")\n                raise\n        else:\n            raise RuntimeError(f\"Failed to check tenant status for {tenant_id}\")\n    safe_print(f\"{'=' * 80}\\n\")\n\n    # Confirm before proceeding (only in non-force mode)\n    if not confirm_step(\n        f\"Mark all connector credential pairs for deletion for tenant {tenant_id}?\",\n        force,\n    ):\n        safe_print(\"Operation cancelled by user\")\n        raise ValueError(\"Operation cancelled by user\")\n\n    run_connector_deletion(data_plane_pod, tenant_id, data_plane_context)\n\n    # Print summary\n    safe_print(\n        f\"✓ Marked all connector credential pairs for deletion for tenant {tenant_id}\"\n    )\n\n\ndef main() -> None:\n    if len(sys.argv) < 2:\n        print(\n            \"Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> \\\\\"\n        )\n        print(\n            \"           --data-plane-context <context> --control-plane-context <context> [--force]\"\n        )\n        print(\n            \"       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \\\\\"\n        )\n        print(\n            \"           --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]\"\n        )\n        print(\"\\nThis version runs ALL operations from pods (no bastion required)\")\n        print(\"\\nArguments:\")\n        print(\n            \"  tenant_id                   The tenant ID to process (required if not using --csv)\"\n        )\n        print(\n            \"  --csv PATH                  Path to CSV file containing tenant IDs to process\"\n        )\n        print(\"  --force                     Skip all confirmation prompts (optional)\")\n        print(\n            \"  --concurrency N             Process N tenants concurrently (default: 1)\"\n        )\n        print(\n            \"  --data-plane-context CTX    Kubectl context for data plane cluster (required)\"\n        )\n        print(\n            \"  --control-plane-context CTX Kubectl context for control plane cluster (required)\"\n        )\n        sys.exit(1)\n\n    # Parse arguments\n    force = \"--force\" in sys.argv\n    tenant_ids: list[str] = []\n\n    # Parse contexts (required)\n    data_plane_context: str | None = None\n    control_plane_context: str | None = None\n\n    if \"--data-plane-context\" in sys.argv:\n        try:\n            idx = sys.argv.index(\"--data-plane-context\")\n            if idx + 1 >= len(sys.argv):\n                print(\n                    \"Error: --data-plane-context requires a context name\",\n                    file=sys.stderr,\n                )\n                sys.exit(1)\n            data_plane_context = sys.argv[idx + 1]\n        except ValueError:\n            pass\n\n    if \"--control-plane-context\" in sys.argv:\n        try:\n            idx = sys.argv.index(\"--control-plane-context\")\n            if idx + 1 >= len(sys.argv):\n                print(\n                    \"Error: --control-plane-context requires a context name\",\n                    file=sys.stderr,\n                )\n                sys.exit(1)\n            control_plane_context = sys.argv[idx + 1]\n        except ValueError:\n            pass\n\n    # Validate required contexts\n    if not data_plane_context:\n        print(\n            \"Error: --data-plane-context is required\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    if not control_plane_context:\n        print(\n            \"Error: --control-plane-context is required\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    # Parse concurrency\n    concurrency: int = 1\n    if \"--concurrency\" in sys.argv:\n        try:\n            concurrency_index = sys.argv.index(\"--concurrency\")\n            if concurrency_index + 1 >= len(sys.argv):\n                print(\"Error: --concurrency flag requires a number\", file=sys.stderr)\n                sys.exit(1)\n            concurrency = int(sys.argv[concurrency_index + 1])\n            if concurrency < 1:\n                print(\"Error: concurrency must be at least 1\", file=sys.stderr)\n                sys.exit(1)\n        except ValueError:\n            print(\"Error: --concurrency value must be an integer\", file=sys.stderr)\n            sys.exit(1)\n\n    # Validate: concurrency > 1 requires --force\n    if concurrency > 1 and not force:\n        print(\n            \"Error: --concurrency > 1 requires --force flag (interactive mode not supported with parallel processing)\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    # Check for CSV mode\n    if \"--csv\" in sys.argv:\n        try:\n            csv_index: int = sys.argv.index(\"--csv\")\n            if csv_index + 1 >= len(sys.argv):\n                print(\"Error: --csv flag requires a file path\", file=sys.stderr)\n                sys.exit(1)\n\n            csv_path: str = sys.argv[csv_index + 1]\n            tenant_ids = read_tenant_ids_from_csv(csv_path)\n\n            if not tenant_ids:\n                print(\"Error: No tenant IDs found in CSV file\", file=sys.stderr)\n                sys.exit(1)\n\n            print(f\"Found {len(tenant_ids)} tenant(s) in CSV file: {csv_path}\")\n\n        except Exception as e:\n            print(f\"Error reading CSV file: {e}\", file=sys.stderr)\n            sys.exit(1)\n    else:\n        # Single tenant mode\n        tenant_ids = [sys.argv[1]]\n\n    # Find pods in both clusters before processing\n    try:\n        print(\"Finding data plane worker pod...\")\n        data_plane_pod: str = find_worker_pod(data_plane_context)\n        print(f\"✓ Using data plane worker pod: {data_plane_pod}\")\n\n        print(\"Finding control plane pod...\")\n        control_plane_pod: str = find_background_pod(control_plane_context)\n        print(f\"✓ Using control plane pod: {control_plane_pod}\")\n    except Exception as e:\n        print(f\"✗ Failed to find required pods: {e}\", file=sys.stderr)\n        print(\"Cannot proceed with marking connectors for deletion\")\n        sys.exit(1)\n\n    # Initial confirmation (unless --force is used)\n    if not force:\n        print(f\"\\n{'=' * 80}\")\n        print(\"MARK CONNECTORS FOR DELETION - NO BASTION VERSION\")\n        print(f\"{'=' * 80}\")\n        if len(tenant_ids) == 1:\n            print(f\"Tenant ID: {tenant_ids[0]}\")\n        else:\n            print(f\"Number of tenants: {len(tenant_ids)}\")\n            print(f\"Tenant IDs: {', '.join(tenant_ids[:5])}\")\n            if len(tenant_ids) > 5:\n                print(f\"            ... and {len(tenant_ids) - 5} more\")\n\n        print(\n            f\"Mode: {'FORCE (no confirmations)' if force else 'Interactive (will ask for confirmation at each step)'}\"\n        )\n        print(f\"Concurrency: {concurrency} tenant(s) at a time\")\n        print(\"\\nThis will:\")\n        print(\"  1. Fetch all connector credential pairs for each tenant\")\n        print(\"  2. Cancel any scheduled indexing attempts for each connector\")\n        print(\"  3. Mark each connector credential pair status as DELETING\")\n        print(\"  4. Trigger the connector deletion task\")\n        print(f\"\\n{'=' * 80}\")\n        print(\"WARNING: This will mark connectors for deletion!\")\n        print(\"The actual deletion will be performed by the background celery worker.\")\n        print(f\"{'=' * 80}\\n\")\n\n        response = input(\"Are you sure you want to proceed? Type 'yes' to confirm: \")\n\n        if response.lower() != \"yes\":\n            print(\"Operation aborted by user\")\n            sys.exit(0)\n    else:\n        if len(tenant_ids) == 1:\n            print(\n                f\"⚠ FORCE MODE: Marking connectors for deletion for {tenant_ids[0]} without confirmations\"\n            )\n        else:\n            print(\n                f\"⚠ FORCE MODE: Marking connectors for deletion for {len(tenant_ids)} tenants \"\n                f\"(concurrency: {concurrency}) without confirmations\"\n            )\n\n    # Process tenants (in parallel if concurrency > 1)\n    failed_tenants: list[tuple[str, str]] = []\n    successful_tenants: list[str] = []\n\n    if concurrency == 1:\n        # Sequential processing\n        for idx, tenant_id in enumerate(tenant_ids, 1):\n            if len(tenant_ids) > 1:\n                print(f\"\\n{'=' * 80}\")\n                print(f\"Processing tenant {idx}/{len(tenant_ids)}: {tenant_id}\")\n                print(f\"{'=' * 80}\")\n\n            try:\n                mark_tenant_connectors_for_deletion(\n                    tenant_id,\n                    data_plane_pod,\n                    control_plane_pod,\n                    data_plane_context,\n                    control_plane_context,\n                    force,\n                )\n                successful_tenants.append(tenant_id)\n            except Exception as e:\n                print(\n                    f\"✗ Failed to process tenant {tenant_id}: {e}\",\n                    file=sys.stderr,\n                )\n                failed_tenants.append((tenant_id, str(e)))\n\n                # If not in force mode and there are more tenants, ask if we should continue\n                if not force and idx < len(tenant_ids):\n                    response = input(\n                        f\"\\nContinue with remaining {len(tenant_ids) - idx} tenant(s)? (y/n): \"\n                    )\n                    if response.lower() != \"y\":\n                        print(\"Operation aborted by user\")\n                        break\n    else:\n        # Parallel processing\n        print(\n            f\"\\nProcessing {len(tenant_ids)} tenant(s) with concurrency={concurrency}\"\n        )\n\n        def process_tenant(tenant_id: str) -> tuple[str, bool, str | None]:\n            \"\"\"Process a single tenant. Returns (tenant_id, success, error_message).\"\"\"\n            try:\n                mark_tenant_connectors_for_deletion(\n                    tenant_id,\n                    data_plane_pod,\n                    control_plane_pod,\n                    data_plane_context,\n                    control_plane_context,\n                    force,\n                )\n                return (tenant_id, True, None)\n            except Exception as e:\n                return (tenant_id, False, str(e))\n\n        with ThreadPoolExecutor(max_workers=concurrency) as executor:\n            # Submit all tasks\n            future_to_tenant = {\n                executor.submit(process_tenant, tenant_id): tenant_id\n                for tenant_id in tenant_ids\n            }\n\n            # Process results as they complete\n            completed: int = 0\n            for future in as_completed(future_to_tenant):\n                completed += 1\n                tenant_id, success, error = future.result()\n\n                if success:\n                    successful_tenants.append(tenant_id)\n                    safe_print(\n                        f\"[{completed}/{len(tenant_ids)}] ✓ Successfully processed {tenant_id}\"\n                    )\n                else:\n                    failed_tenants.append((tenant_id, error or \"Unknown error\"))\n                    safe_print(\n                        f\"[{completed}/{len(tenant_ids)}] ✗ Failed to process {tenant_id}: {error}\",\n                        file=sys.stderr,\n                    )\n\n    # Print summary if multiple tenants\n    if len(tenant_ids) > 1:\n        print(f\"\\n{'=' * 80}\")\n        print(\"OPERATION SUMMARY\")\n        print(f\"{'=' * 80}\")\n        print(f\"Total tenants: {len(tenant_ids)}\")\n        print(f\"Successful: {len(successful_tenants)}\")\n        print(f\"Failed: {len(failed_tenants)}\")\n\n        if failed_tenants:\n            print(\"\\nFailed tenants:\")\n            for tenant_id, error in failed_tenants:\n                print(f\"  - {tenant_id}: {error}\")\n\n        print(f\"{'=' * 80}\")\n\n        if failed_tenants:\n            sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/check_documents_deleted.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to check for remaining ConnectorCredentialPairs and Documents in a tenant's schema.\nMust be run on a pod with access to the data plane PostgreSQL database.\n\nUsage:\n    python check_documents_deleted.py <tenant_id>\n\nOutput:\n    JSON object with status, message, and counts of remaining records\n\"\"\"\n\nimport json\nimport sys\n\nfrom sqlalchemy import func\nfrom sqlalchemy import select\n\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Document\n\n\ndef check_documents_deleted(tenant_id: str) -> dict:\n    \"\"\"\n    Check for remaining ConnectorCredentialPairs and Documents in tenant schema.\n\n    Args:\n        tenant_id: The tenant ID to query\n\n    Returns:\n        Dictionary with status and counts of remaining records\n    \"\"\"\n    try:\n        print(\n            f\"Checking for remaining documents in tenant: {tenant_id}\",\n            file=sys.stderr,\n        )\n\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Count ConnectorCredentialPairs\n            cc_count = db_session.scalar(\n                select(func.count()).select_from(ConnectorCredentialPair)\n            )\n\n            # Count Documents\n            doc_count = db_session.scalar(select(func.count()).select_from(Document))\n\n        # Handle None values from scalar (should not happen but mypy needs it)\n        cc_count = cc_count or 0\n        doc_count = doc_count or 0\n\n        # If any records remain beyond acceptable thresholds, return error status\n        is_deletable = cc_count == 0 or doc_count <= 5\n        if not is_deletable:\n            return {\n                \"status\": \"error\",\n                \"message\": (\n                    f\"Found {cc_count} ConnectorCredentialPair(s) and {doc_count} Document(s) \"\n                    \"still remaining. Must have 0 ConnectorCredentialPairs and no more than \"\n                    \"5 Documents before cleanup.\"\n                ),\n                \"connector_credential_pair_count\": cc_count,\n                \"document_count\": doc_count,\n            }\n\n        # All clear\n        return {\n            \"status\": \"success\",\n            \"message\": \"No ConnectorCredentialPairs or Documents found - safe to proceed\",\n            \"connector_credential_pair_count\": 0,\n            \"document_count\": 0,\n        }\n\n    except Exception as e:\n        error_msg = str(e)\n        print(f\"Error checking documents: {error_msg}\", file=sys.stderr)\n        # Check if it's a schema not found error\n        if \"does not exist\" in error_msg:\n            return {\n                \"status\": \"not_found\",\n                \"message\": f\"Schema '{tenant_id}' does not exist\",\n            }\n        return {\"status\": \"error\", \"message\": f\"Error checking documents: {error_msg}\"}\n\n\ndef main() -> None:\n    if len(sys.argv) != 2:\n        print(\n            json.dumps(\n                {\n                    \"status\": \"error\",\n                    \"message\": \"Usage: python check_documents_deleted.py <tenant_id>\",\n                }\n            )\n        )\n        sys.exit(1)\n\n    tenant_id = sys.argv[1]\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    result = check_documents_deleted(tenant_id)\n    print(json.dumps(result))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/cleanup_tenant_schema.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to drop a tenant's PostgreSQL schema.\nDesigned to be run on a heavy worker pod.\n\nUsage:\n    python cleanup_tenant_schema.py <tenant_id>\n\"\"\"\n\nimport json\nimport sys\n\nfrom sqlalchemy import text\n\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.sql_engine import SqlEngine\n\n\ndef drop_data_plane_schema(tenant_id: str) -> dict[str, str]:\n    \"\"\"Drop the PostgreSQL schema for the given tenant.\"\"\"\n    print(f\"Dropping data plane schema for tenant: {tenant_id}\", file=sys.stderr)\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    try:\n        with get_session_with_shared_schema() as session:\n            # First, verify the schema exists\n            check_schema_query = text(\n                \"\"\"\n                SELECT nspname\n                FROM pg_namespace\n                WHERE nspname = :schema_name\n            \"\"\"\n            )\n\n            result = session.execute(\n                check_schema_query, {\"schema_name\": tenant_id}\n            ).fetchone()\n\n            if not result:\n                print(f\"Schema {tenant_id} does not exist\", file=sys.stderr)\n                return {\n                    \"status\": \"not_found\",\n                    \"message\": f\"Schema {tenant_id} does not exist\",\n                }\n\n            # Drop the schema with CASCADE to remove all objects within it\n            drop_schema_query = text(f'DROP SCHEMA IF EXISTS \"{tenant_id}\" CASCADE')\n            session.execute(drop_schema_query)\n            session.commit()\n\n            print(f\"Successfully dropped schema: {tenant_id}\", file=sys.stderr)\n\n            # Delete the tenant mapping from user_tenant_mapping table\n            delete_mapping_query = text(\n                \"\"\"\n                DELETE FROM user_tenant_mapping\n                WHERE tenant_id = :tenant_id\n                \"\"\"\n            )\n            session.execute(delete_mapping_query, {\"tenant_id\": tenant_id})\n            session.commit()\n\n            print(\n                f\"Successfully deleted tenant mapping for: {tenant_id}\", file=sys.stderr\n            )\n            return {\n                \"status\": \"success\",\n                \"message\": f\"Successfully dropped schema: {tenant_id}\",\n            }\n\n    except Exception as e:\n        print(f\"Failed to drop schema for tenant {tenant_id}: {e}\", file=sys.stderr)\n        return {\"status\": \"error\", \"message\": str(e)}\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) < 2:\n        print(\"Usage: python cleanup_tenant_schema.py <tenant_id>\", file=sys.stderr)\n        sys.exit(1)\n\n    tenant_id = sys.argv[1]\n\n    result = drop_data_plane_schema(tenant_id)\n\n    # Output result as JSON to stdout for easy parsing\n    print(json.dumps(result))\n\n    # Exit with error code if failed\n    if result[\"status\"] == \"error\":\n        sys.exit(1)\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/execute_connector_deletion.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to mark connector credential pairs for deletion.\nRuns on a Kubernetes pod with access to the data plane database.\n\nUsage:\n    # Mark a specific connector for deletion\n    python mark_connector_for_deletion.py <tenant_id> <cc_pair_id>\n\n    # Mark all connectors for deletion\n    python mark_connector_for_deletion.py <tenant_id> --all\n\nOutput:\n    JSON to stdout with structure:\n    {\n        \"status\": \"success\" | \"error\",\n        \"message\": str,\n        \"deleted_count\": int (when using --all),\n        \"timing\": {\n            \"total_seconds\": float,\n            \"per_connector\": [...]\n        }\n    }\n\"\"\"\n\nimport json\nimport sys\nimport time\nfrom typing import Any\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.versioned_apps.client import app as client_app\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.connector_credential_pair import get_connector_credential_pairs\nfrom onyx.db.connector_credential_pair import update_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair\n\n\ndef mark_connector_for_deletion(\n    tenant_id: str, cc_pair_id: int, db_session: Session | None = None\n) -> dict[str, Any]:\n    \"\"\"Mark a connector credential pair for deletion.\n\n    Args:\n        tenant_id: The tenant ID\n        cc_pair_id: The connector credential pair ID\n        db_session: Optional database session (if None, creates a new one)\n\n    Returns:\n        Dict with status, message, and timing\n    \"\"\"\n    timing: dict[str, float] = {}\n    start_time: float = time.time()\n\n    try:\n        print(\n            f\"Marking connector credential pair {cc_pair_id} for deletion\",\n            file=sys.stderr,\n        )\n\n        def _mark_deletion(db_sess: Session) -> dict[str, Any]:\n            # Get the connector credential pair\n            fetch_start: float = time.time()\n            cc_pair = get_connector_credential_pair_from_id(\n                db_session=db_sess,\n                cc_pair_id=cc_pair_id,\n            )\n            timing[\"fetch_cc_pair_seconds\"] = time.time() - fetch_start\n\n            if not cc_pair:\n                return {\n                    \"status\": \"error\",\n                    \"message\": f\"Connector credential pair {cc_pair_id} not found\",\n                    \"timing\": timing,\n                }\n\n            # Cancel any scheduled indexing attempts\n            print(\n                f\"Canceling indexing attempts for CC pair {cc_pair_id}\",\n                file=sys.stderr,\n            )\n            cancel_start: float = time.time()\n            cancel_indexing_attempts_for_ccpair(\n                cc_pair_id=cc_pair.id,\n                db_session=db_sess,\n                include_secondary_index=True,\n            )\n            timing[\"cancel_indexing_seconds\"] = time.time() - cancel_start\n\n            # Mark as deleting\n            print(\n                f\"Updating CC pair {cc_pair_id} status to DELETING\",\n                file=sys.stderr,\n            )\n            update_start: float = time.time()\n            update_connector_credential_pair_from_id(\n                db_session=db_sess,\n                cc_pair_id=cc_pair.id,\n                status=ConnectorCredentialPairStatus.DELETING,\n            )\n            timing[\"update_status_seconds\"] = time.time() - update_start\n\n            commit_start: float = time.time()\n            db_sess.commit()\n            timing[\"commit_seconds\"] = time.time() - commit_start\n\n            return {\n                \"status\": \"success\",\n                \"message\": f\"Marked connector credential pair {cc_pair_id} for deletion\",\n                \"timing\": timing,\n            }\n\n        result: dict[str, Any]\n        if db_session:\n            result = _mark_deletion(db_session)\n        else:\n            with get_session_with_tenant(tenant_id=tenant_id) as db_sess:\n                result = _mark_deletion(db_sess)\n\n        # Trigger the deletion check task\n        print(\n            \"Triggering connector deletion check task\",\n            file=sys.stderr,\n        )\n        task_start: float = time.time()\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,\n            priority=OnyxCeleryPriority.HIGH,\n            kwargs={\"tenant_id\": tenant_id},\n        )\n        timing[\"send_task_seconds\"] = time.time() - task_start\n        timing[\"total_seconds\"] = time.time() - start_time\n\n        result[\"timing\"] = timing\n        return result\n\n    except Exception as e:\n        print(\n            f\"Error marking connector for deletion: {e}\",\n            file=sys.stderr,\n        )\n        timing[\"total_seconds\"] = time.time() - start_time\n        return {\n            \"status\": \"error\",\n            \"message\": str(e),\n            \"timing\": timing,\n        }\n\n\ndef mark_all_connectors_for_deletion(tenant_id: str) -> dict[str, Any]:\n    \"\"\"Mark all connector credential pairs for a tenant for deletion.\n\n    Args:\n        tenant_id: The tenant ID\n\n    Returns:\n        Dict with status, message, deleted_count, and timing\n    \"\"\"\n    overall_start: float = time.time()\n    per_connector_timing: list[dict[str, Any]] = []\n\n    try:\n        print(\n            f\"Marking all connector credential pairs for tenant {tenant_id} for deletion\",\n            file=sys.stderr,\n        )\n\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Get all connector credential pairs\n            fetch_all_start: float = time.time()\n            cc_pairs = get_connector_credential_pairs(db_session=db_session)\n            fetch_all_time: float = time.time() - fetch_all_start\n\n            print(\n                f\"Found {len(cc_pairs)} connector credential pairs to delete\",\n                file=sys.stderr,\n            )\n\n            if not cc_pairs:\n                return {\n                    \"status\": \"success\",\n                    \"message\": \"No connector credential pairs found for tenant\",\n                    \"deleted_count\": 0,\n                    \"timing\": {\n                        \"fetch_all_seconds\": fetch_all_time,\n                        \"total_seconds\": time.time() - overall_start,\n                    },\n                }\n\n            deleted_count: int = 0\n            errors: list[str] = []\n\n            for cc_pair in cc_pairs:\n                connector_start: float = time.time()\n                print(\n                    f\"Processing CC pair {cc_pair.id} ({deleted_count + 1}/{len(cc_pairs)})\",\n                    file=sys.stderr,\n                )\n\n                # Cancel any scheduled indexing attempts\n                cancel_start: float = time.time()\n                cancel_indexing_attempts_for_ccpair(\n                    cc_pair_id=cc_pair.id,\n                    db_session=db_session,\n                    include_secondary_index=True,\n                )\n                cancel_time: float = time.time() - cancel_start\n\n                # Mark as deleting\n                update_start: float = time.time()\n                try:\n                    update_connector_credential_pair_from_id(\n                        db_session=db_session,\n                        cc_pair_id=cc_pair.id,\n                        status=ConnectorCredentialPairStatus.DELETING,\n                    )\n                    deleted_count += 1\n                except Exception as e:\n                    errors.append(f\"CC pair {cc_pair.id}: {str(e)}\")\n                    print(\n                        f\"Error updating CC pair {cc_pair.id}: {e}\",\n                        file=sys.stderr,\n                    )\n\n                update_time: float = time.time() - update_start\n                connector_total_time: float = time.time() - connector_start\n\n                per_connector_timing.append(\n                    {\n                        \"cc_pair_id\": cc_pair.id,\n                        \"cancel_indexing_seconds\": cancel_time,\n                        \"update_status_seconds\": update_time,\n                        \"total_seconds\": connector_total_time,\n                    }\n                )\n\n            # Commit all changes\n            commit_start: float = time.time()\n            db_session.commit()\n            commit_time: float = time.time() - commit_start\n\n        # Trigger the deletion check task\n        print(\n            \"Triggering connector deletion check task\",\n            file=sys.stderr,\n        )\n        task_start: float = time.time()\n        client_app.send_task(\n            OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,\n            priority=OnyxCeleryPriority.HIGH,\n            kwargs={\"tenant_id\": tenant_id},\n        )\n        task_time: float = time.time() - task_start\n\n        total_time: float = time.time() - overall_start\n\n        result: dict[str, Any] = {\n            \"status\": \"success\",\n            \"message\": f\"Marked {deleted_count} connector credential pairs for deletion\",\n            \"deleted_count\": deleted_count,\n            \"timing\": {\n                \"fetch_all_seconds\": fetch_all_time,\n                \"commit_seconds\": commit_time,\n                \"send_task_seconds\": task_time,\n                \"total_seconds\": total_time,\n                \"per_connector\": per_connector_timing,\n            },\n        }\n\n        if errors:\n            result[\"errors\"] = errors\n\n        return result\n\n    except Exception as e:\n        print(\n            f\"Error marking all connectors for deletion: {e}\",\n            file=sys.stderr,\n        )\n        return {\n            \"status\": \"error\",\n            \"message\": str(e),\n            \"timing\": {\n                \"total_seconds\": time.time() - overall_start,\n                \"per_connector\": per_connector_timing,\n            },\n        }\n\n\ndef main() -> None:\n    if len(sys.argv) < 2 or len(sys.argv) > 3:\n        print(\n            json.dumps(\n                {\n                    \"status\": \"error\",\n                    \"message\": \"Usage: python mark_connector_for_deletion.py <tenant_id> [<cc_pair_id>|--all]\",\n                }\n            )\n        )\n        sys.exit(1)\n\n    tenant_id: str = sys.argv[1]\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    result: dict[str, Any]\n    # Check if we should mark all connectors or just one\n    if len(sys.argv) == 3:\n        second_arg: str = sys.argv[2]\n        if second_arg == \"--all\":\n            result = mark_all_connectors_for_deletion(tenant_id)\n        else:\n            try:\n                cc_pair_id: int = int(second_arg)\n                result = mark_connector_for_deletion(tenant_id, cc_pair_id)\n            except ValueError:\n                print(\n                    json.dumps(\n                        {\n                            \"status\": \"error\",\n                            \"message\": \"cc_pair_id must be an integer or use --all\",\n                        }\n                    )\n                )\n                sys.exit(1)\n    else:\n        # If only tenant_id is provided, show error\n        print(\n            json.dumps(\n                {\n                    \"status\": \"error\",\n                    \"message\": \"Usage: python mark_connector_for_deletion.py <tenant_id> [<cc_pair_id>|--all]\",\n                }\n            )\n        )\n        sys.exit(1)\n\n    print(json.dumps(result, indent=2))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_connectors.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to fetch connector credential pairs for a tenant.\nRuns on a Kubernetes pod with access to the data plane database.\n\nUsage:\n    python get_tenant_connectors.py <tenant_id>\n\nOutput:\n    JSON to stdout with structure:\n    {\n        \"status\": \"success\" | \"error\",\n        \"connectors\": [\n            {\n                \"id\": int,\n                \"connector_id\": int,\n                \"credential_id\": int,\n                \"name\": str,\n                \"status\": str\n            },\n            ...\n        ] (if success),\n        \"message\": str (if error)\n    }\n\"\"\"\n\nimport json\nimport sys\n\nfrom sqlalchemy import select\n\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.models import ConnectorCredentialPair\n\n\ndef get_tenant_connectors(tenant_id: str) -> dict:\n    \"\"\"Get all connector credential pairs for a tenant.\n\n    Args:\n        tenant_id: The tenant ID to query\n\n    Returns:\n        Dict with status and list of connectors or error message\n    \"\"\"\n    try:\n        print(\n            f\"Fetching connector credential pairs for tenant: {tenant_id}\",\n            file=sys.stderr,\n        )\n\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Get all connector credential pairs\n            stmt = select(ConnectorCredentialPair)\n            cc_pairs = db_session.execute(stmt).scalars().all()\n\n            connectors = [\n                {\n                    \"id\": cc.id,\n                    \"connector_id\": cc.connector_id,\n                    \"credential_id\": cc.credential_id,\n                    \"name\": cc.name,\n                    \"status\": cc.status.value,\n                }\n                for cc in cc_pairs\n            ]\n\n            print(\n                f\"Found {len(connectors)} connector credential pair(s)\",\n                file=sys.stderr,\n            )\n\n            return {\n                \"status\": \"success\",\n                \"connectors\": connectors,\n            }\n\n    except Exception as e:\n        print(f\"Error fetching connectors: {e}\", file=sys.stderr)\n        return {\n            \"status\": \"error\",\n            \"message\": str(e),\n        }\n\n\ndef main() -> None:\n    if len(sys.argv) != 2:\n        print(\n            json.dumps(\n                {\n                    \"status\": \"error\",\n                    \"message\": \"Usage: python get_tenant_connectors.py <tenant_id>\",\n                }\n            )\n        )\n        sys.exit(1)\n\n    tenant_id = sys.argv[1]\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    result = get_tenant_connectors(tenant_id)\n    print(json.dumps(result))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_index_name.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to get the default index name for a tenant.\nDesigned to be run on a heavy worker pod.\n\nUsage:\n    python get_tenant_index_name.py <tenant_id>\n\"\"\"\n\nimport json\nimport sys\n\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.search_settings import get_current_search_settings\n\n\ndef get_tenant_index_name(tenant_id: str) -> dict[str, str]:\n    \"\"\"Get the default index name for the given tenant.\"\"\"\n    print(f\"Getting default index name for tenant: {tenant_id}\", file=sys.stderr)\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    try:\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            search_settings = get_current_search_settings(db_session)\n            index_name = search_settings.index_name\n            print(f\"Found index name: {index_name}\", file=sys.stderr)\n            return {\"status\": \"success\", \"index_name\": index_name}\n\n    except Exception as e:\n        print(f\"Failed to get index name for tenant {tenant_id}: {e}\", file=sys.stderr)\n        return {\"status\": \"error\", \"message\": str(e)}\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) < 2:\n        print(\"Usage: python get_tenant_index_name.py <tenant_id>\", file=sys.stderr)\n        sys.exit(1)\n\n    tenant_id = sys.argv[1]\n\n    result = get_tenant_index_name(tenant_id)\n\n    # Output result as JSON to stdout for easy parsing\n    print(json.dumps(result))\n\n    # Exit with error code if failed\n    if result[\"status\"] == \"error\":\n        sys.exit(1)\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_users.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to fetch user emails from a tenant's data plane schema.\nMust be run on a pod with access to the data plane PostgreSQL database.\n\nUsage:\n    python get_tenant_users.py <tenant_id>\n\nOutput:\n    JSON object with status and users list\n\"\"\"\n\nimport json\nimport sys\n\nfrom sqlalchemy import select\n\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.models import User\n\n\ndef get_tenant_users(tenant_id: str) -> dict:\n    \"\"\"\n    Fetch user emails from the tenant's data plane schema.\n\n    Args:\n        tenant_id: The tenant ID to query\n\n    Returns:\n        Dictionary with status and users list\n    \"\"\"\n    try:\n        print(f\"Querying users for tenant: {tenant_id}\", file=sys.stderr)\n\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # Query users from the tenant schema\n            # Select only the email column\n            user_email_column = User.__table__.c.email\n            stmt = select(user_email_column).order_by(user_email_column)\n            result = db_session.execute(stmt)\n            users = [row[0] for row in result]\n\n        return {\"status\": \"success\", \"users\": users}\n\n    except Exception as e:\n        error_msg = str(e)\n        print(f\"Error fetching users: {error_msg}\", file=sys.stderr)\n        # Check if it's a schema not found error\n        if \"does not exist\" in error_msg:\n            return {\n                \"status\": \"not_found\",\n                \"message\": f\"Schema '{tenant_id}' does not exist\",\n                \"users\": [],\n            }\n        return {\"status\": \"error\", \"message\": error_msg, \"users\": []}\n\n\ndef main() -> None:\n    if len(sys.argv) != 2:\n        print(\n            json.dumps(\n                {\n                    \"status\": \"error\",\n                    \"message\": \"Usage: python get_tenant_users.py <tenant_id>\",\n                }\n            )\n        )\n        sys.exit(1)\n\n    tenant_id = sys.argv[1]\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    result = get_tenant_users(tenant_id)\n    print(json.dumps(result))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/tenant_cleanup/on_pod_scripts/understand_tenants.py",
    "content": "import json\nimport sys\nfrom typing import Any\n\nfrom sqlalchemy import text\nfrom sqlalchemy.exc import ProgrammingError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom onyx.db.engine.sql_engine import SqlEngine\n\n\ndef get_tenant_activity_summary(session: Session) -> list[dict[str, Any]]:\n    \"\"\"Return a list of dicts, one per tenant, with last query info, doc count, and user count.\"\"\"\n\n    # Step 1: fetch all tenant schemas\n    tenant_schemas = [\n        row[0]\n        for row in session.execute(\n            text(\n                \"\"\"\n            SELECT nspname\n            FROM pg_namespace\n            WHERE nspname NOT IN ('pg_catalog', 'information_schema', 'public')\n                AND nspname NOT LIKE 'pg_toast%%'\n                AND nspname NOT LIKE 'pg_temp%%'\n            ORDER BY nspname\n        \"\"\"\n            )\n        )\n    ]\n\n    print(f\"Found {len(tenant_schemas)} tenant schemas\", file=sys.stderr)\n\n    summaries = []\n\n    # Step 2: loop through each tenant schema\n    for idx, schema in enumerate(tenant_schemas):\n        if idx % 100 == 0:\n            print(f\"Processing tenant {idx}/{len(tenant_schemas)}\", file=sys.stderr)\n\n        try:\n            # Use a single query to get all data at once\n            query = text(\n                f\"\"\"\n                SELECT\n                    :tenant_id AS tenant_id,\n                    (\n                        SELECT time_sent\n                        FROM \"{schema}\".chat_message\n                        WHERE message_type = 'USER'\n                        ORDER BY time_sent DESC\n                        LIMIT 1\n                    ) AS last_query_time,\n                    (\n                        SELECT message\n                        FROM \"{schema}\".chat_message\n                        WHERE message_type = 'USER'\n                        ORDER BY time_sent DESC\n                        LIMIT 1\n                    ) AS last_query_text,\n                    (SELECT COUNT(*) FROM \"{schema}\".document) AS num_documents,\n                    (SELECT COUNT(*) FROM \"{schema}\".user) AS num_users\n            \"\"\"\n            )\n\n            result = session.execute(query, {\"tenant_id\": schema}).mappings().first()\n\n            if result:\n                summaries.append(dict(result))\n\n        except ProgrammingError as e:\n            # schema may be missing a table\n            print(f\"Error processing schema {schema}: {e}\", file=sys.stderr)\n            session.rollback()\n            continue\n        except Exception as e:\n            print(f\"Unexpected error processing schema {schema}: {e}\", file=sys.stderr)\n            session.rollback()\n            continue\n\n    return summaries\n\n\ndef main() -> None:\n\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n\n    with get_session_with_shared_schema() as session:\n        summaries = get_tenant_activity_summary(session)\n\n    print(json.dumps(summaries, indent=2, default=str))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/test-openapi-key.py",
    "content": "VALID_MODEL_LIST = [\n    \"gpt-4o-mini\",\n    \"gpt-4o\",\n    \"gpt-4-1106-preview\",\n    \"gpt-4-vision-preview\",\n    \"gpt-4\",\n    \"gpt-4-0314\",\n    \"gpt-4-0613\",\n    \"gpt-4-32k\",\n    \"gpt-4-32k-0314\",\n    \"gpt-4-32k-0613\",\n    \"gpt-3.5-turbo-0125\",\n    \"gpt-3.5-turbo-1106\",\n    \"gpt-3.5-turbo\",\n    \"gpt-3.5-turbo-16k\",\n    \"gpt-3.5-turbo-0301\",\n    \"gpt-3.5-turbo-0613\",\n    \"gpt-3.5-turbo-16k-0613\",\n]\n\n\nif __name__ == \"__main__\":\n    from openai import OpenAI\n\n    model_version = None\n    while model_version not in VALID_MODEL_LIST:\n        model_version = input(\"Please provide an OpenAI model version to test: \")\n        if model_version not in VALID_MODEL_LIST:\n            print(f\"Model must be from valid list: {', '.join(VALID_MODEL_LIST)}\")\n    assert model_version\n\n    api_key = input(\"Please provide an OpenAI API Key to test: \")\n    client = OpenAI(\n        api_key=api_key,\n    )\n\n    prompt = \"The boy went to the \"\n    print(f\"Asking OpenAI to finish the sentence using {model_version}\")\n    print(prompt)\n    try:\n        messages = [\n            {\"role\": \"system\", \"content\": \"Finish the sentence\"},\n            {\"role\": \"user\", \"content\": prompt},\n        ]\n        response = client.chat.completions.create(\n            model=model_version,\n            messages=messages,  # type:ignore\n            max_tokens=5,\n            temperature=2,\n        )\n        print(response.choices[0].message.content)\n        print(\"Success! Feel free to use this API key for Onyx.\")\n    except Exception:\n        print(\n            \"Failed, provided API key is invalid for Onyx, please address the error from OpenAI.\"\n        )\n        raise\n"
  },
  {
    "path": "backend/scripts/transform_openapi_for_docs.py",
    "content": "\"\"\"\nTransform OpenAPI schema for public documentation.\n\nFilters endpoints tagged with \"public\", converts auth to Bearer token,\nand removes internal parameters (tenant_id, db_session).\n\nUsage:\n    python scripts/transform_openapi_for_docs.py -i generated/openapi.json -o openapi_docs.json\n\"\"\"\n\nimport argparse\nimport copy\nimport json\nfrom typing import Any\n\nPUBLIC_TAG = \"public\"\nDOCS_SERVER_URL = \"https://cloud.onyx.app/api\"\nINTERNAL_PARAMETERS = {\"tenant_id\", \"db_session\"}\n\n\ndef collect_schema_refs(obj: Any, refs: set[str]) -> None:\n    \"\"\"Recursively collect all $ref references from an object.\"\"\"\n    if isinstance(obj, dict):\n        if \"$ref\" in obj:\n            ref = obj[\"$ref\"]\n            if ref.startswith(\"#/components/schemas/\"):\n                refs.add(ref.split(\"/\")[-1])\n        for value in obj.values():\n            collect_schema_refs(value, refs)\n    elif isinstance(obj, list):\n        for item in obj:\n            collect_schema_refs(item, refs)\n\n\ndef get_all_referenced_schemas(\n    schemas: dict[str, Any], initial_refs: set[str]\n) -> set[str]:\n    \"\"\"Get all schemas referenced by initial_refs, including nested references.\"\"\"\n    all_refs = set(initial_refs)\n    to_process = list(initial_refs)\n\n    while to_process:\n        schema_name = to_process.pop()\n        if schema_name not in schemas:\n            continue\n\n        new_refs: set[str] = set()\n        collect_schema_refs(schemas[schema_name], new_refs)\n\n        for ref in new_refs:\n            if ref not in all_refs:\n                all_refs.add(ref)\n                to_process.append(ref)\n\n    return all_refs\n\n\ndef remove_internal_properties_from_schema(schema: dict[str, Any]) -> None:\n    \"\"\"Recursively remove internal properties from a schema.\"\"\"\n    if not isinstance(schema, dict):\n        return\n\n    if \"properties\" in schema and isinstance(schema[\"properties\"], dict):\n        for prop_name in list(schema[\"properties\"].keys()):\n            if prop_name in INTERNAL_PARAMETERS:\n                del schema[\"properties\"][prop_name]\n\n        if \"required\" in schema and isinstance(schema[\"required\"], list):\n            schema[\"required\"] = [\n                r for r in schema[\"required\"] if r not in INTERNAL_PARAMETERS\n            ]\n            if not schema[\"required\"]:\n                del schema[\"required\"]\n\n    for key in [\"allOf\", \"oneOf\", \"anyOf\"]:\n        if key in schema and isinstance(schema[key], list):\n            for item in schema[key]:\n                remove_internal_properties_from_schema(item)\n\n    if \"items\" in schema:\n        remove_internal_properties_from_schema(schema[\"items\"])\n\n    if \"additionalProperties\" in schema and isinstance(\n        schema[\"additionalProperties\"], dict\n    ):\n        remove_internal_properties_from_schema(schema[\"additionalProperties\"])\n\n\ndef remove_internal_parameters(spec: dict[str, Any]) -> None:\n    \"\"\"Remove internal parameters from all endpoints and schemas.\"\"\"\n    for path_data in spec.get(\"paths\", {}).values():\n        for method_data in path_data.values():\n            if isinstance(method_data, dict) and \"parameters\" in method_data:\n                method_data[\"parameters\"] = [\n                    p\n                    for p in method_data[\"parameters\"]\n                    if not (\n                        isinstance(p, dict) and p.get(\"name\") in INTERNAL_PARAMETERS\n                    )\n                ]\n                if not method_data[\"parameters\"]:\n                    del method_data[\"parameters\"]\n\n    for schema in spec.get(\"components\", {}).get(\"schemas\", {}).values():\n        remove_internal_properties_from_schema(schema)\n\n\ndef transform_openapi(input_spec: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Transform the OpenAPI spec for public documentation.\"\"\"\n    output_spec: dict[str, Any] = {\n        \"openapi\": input_spec.get(\"openapi\", \"3.1.0\"),\n        \"info\": {\n            \"title\": \"Onyx API\",\n            \"description\": \"Onyx API for AI-powered enterprise search and chat\",\n            \"version\": input_spec.get(\"info\", {}).get(\"version\", \"1.0.0\"),\n        },\n        \"servers\": [{\"url\": DOCS_SERVER_URL}],\n        \"paths\": {},\n        \"components\": {\n            \"schemas\": {},\n            \"securitySchemes\": {\n                \"BearerAuth\": {\n                    \"type\": \"http\",\n                    \"scheme\": \"bearer\",\n                    \"description\": \"Authorization header with Bearer token\",\n                }\n            },\n        },\n    }\n\n    input_paths = input_spec.get(\"paths\", {})\n    initial_refs: set[str] = set()\n\n    for path, path_data in input_paths.items():\n        for method, method_data in path_data.items():\n            if not isinstance(method_data, dict):\n                continue\n\n            if PUBLIC_TAG in method_data.get(\"tags\", []):\n                if path not in output_spec[\"paths\"]:\n                    output_spec[\"paths\"][path] = {}\n\n                endpoint = copy.deepcopy(method_data)\n                if \"security\" in endpoint:\n                    endpoint[\"security\"] = [{\"BearerAuth\": []}]\n                output_spec[\"paths\"][path][method] = endpoint\n                collect_schema_refs(method_data, initial_refs)\n\n    input_schemas = input_spec.get(\"components\", {}).get(\"schemas\", {})\n    all_refs = get_all_referenced_schemas(input_schemas, initial_refs)\n\n    for schema_name in all_refs:\n        if schema_name in input_schemas:\n            output_spec[\"components\"][\"schemas\"][schema_name] = copy.deepcopy(\n                input_schemas[schema_name]\n            )\n\n    remove_internal_parameters(output_spec)\n\n    return output_spec\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Transform OpenAPI schema for public documentation\"\n    )\n    parser.add_argument(\n        \"--input\", \"-i\", default=\"openapi.json\", help=\"Input OpenAPI JSON file\"\n    )\n    parser.add_argument(\n        \"--output\", \"-o\", default=\"openapi_docs.json\", help=\"Output OpenAPI JSON file\"\n    )\n    args = parser.parse_args()\n\n    with open(args.input) as f:\n        input_spec = json.load(f)\n\n    output_spec = transform_openapi(input_spec)\n\n    with open(args.output, \"w\") as f:\n        json.dump(output_spec, f, indent=2)\n\n    endpoint_count = sum(len(m) for m in output_spec[\"paths\"].values())\n    schema_count = len(output_spec[\"components\"][\"schemas\"])\n    print(f\"Wrote {args.output}: {endpoint_count} endpoints, {schema_count} schemas\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/scripts/upload_files_as_connectors.py",
    "content": "\"\"\"\nScript to upload files from a directory as individual file connectors in Onyx.\nEach file gets its own connector named after the file.\n\nUsage:\n    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY\n    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --api-base http://onyxserver:3000\n    python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --file-glob '*.zip'\n\nRequires:\n    pip install requests\n\"\"\"\n\nimport argparse\nimport fnmatch\nimport os\nimport sys\nimport threading\nimport time\n\nimport requests\n\nREQUEST_TIMEOUT = 900  # 15 minutes\n\n\ndef _elapsed_printer(label: str, stop_event: threading.Event) -> None:\n    \"\"\"Print a live elapsed-time counter until stop_event is set.\"\"\"\n    start = time.monotonic()\n    while not stop_event.wait(timeout=1):\n        elapsed = int(time.monotonic() - start)\n        m, s = divmod(elapsed, 60)\n        print(f\"\\r  {label} ... {m:02d}:{s:02d}\", end=\"\", flush=True)\n    elapsed = int(time.monotonic() - start)\n    m, s = divmod(elapsed, 60)\n    print(f\"\\r  {label} ... {m:02d}:{s:02d} done\")\n\n\ndef _timed_request(label: str, fn: object) -> requests.Response:\n    \"\"\"Run a request function while displaying a live elapsed timer.\"\"\"\n    stop = threading.Event()\n    t = threading.Thread(target=_elapsed_printer, args=(label, stop), daemon=True)\n    t.start()\n    try:\n        resp = fn()  # type: ignore[operator]\n    finally:\n        stop.set()\n        t.join()\n    return resp\n\n\ndef upload_file(\n    session: requests.Session, base_url: str, file_path: str\n) -> dict | None:\n    \"\"\"Upload a single file and return the response with file_paths and file_names.\"\"\"\n    with open(file_path, \"rb\") as f:\n        resp = _timed_request(\n            \"Uploading\",\n            lambda: session.post(\n                f\"{base_url}/api/manage/admin/connector/file/upload\",\n                files={\"files\": (os.path.basename(file_path), f)},\n                timeout=REQUEST_TIMEOUT,\n            ),\n        )\n    if not resp.ok:\n        print(f\"  ERROR uploading: {resp.text}\")\n        return None\n    return resp.json()\n\n\ndef create_connector(\n    session: requests.Session,\n    base_url: str,\n    name: str,\n    file_paths: list[str],\n    file_names: list[str],\n    zip_metadata_file_id: str | None,\n) -> int | None:\n    \"\"\"Create a file connector and return its ID.\"\"\"\n    resp = _timed_request(\n        \"Creating connector\",\n        lambda: session.post(\n            f\"{base_url}/api/manage/admin/connector\",\n            json={\n                \"name\": name,\n                \"source\": \"file\",\n                \"input_type\": \"load_state\",\n                \"connector_specific_config\": {\n                    \"file_locations\": file_paths,\n                    \"file_names\": file_names,\n                    \"zip_metadata_file_id\": zip_metadata_file_id,\n                },\n                \"refresh_freq\": None,\n                \"prune_freq\": None,\n                \"indexing_start\": None,\n                \"access_type\": \"public\",\n                \"groups\": [],\n            },\n            timeout=REQUEST_TIMEOUT,\n        ),\n    )\n    if not resp.ok:\n        print(f\"  ERROR creating connector: {resp.text}\")\n        return None\n    return resp.json()[\"id\"]\n\n\ndef create_credential(\n    session: requests.Session, base_url: str, name: str\n) -> int | None:\n    \"\"\"Create a dummy credential for the file connector.\"\"\"\n    resp = session.post(\n        f\"{base_url}/api/manage/credential\",\n        json={\n            \"credential_json\": {},\n            \"admin_public\": True,\n            \"source\": \"file\",\n            \"curator_public\": True,\n            \"groups\": [],\n            \"name\": name,\n        },\n        timeout=REQUEST_TIMEOUT,\n    )\n    if not resp.ok:\n        print(f\"  ERROR creating credential: {resp.text}\")\n        return None\n    return resp.json()[\"id\"]\n\n\ndef link_credential(\n    session: requests.Session,\n    base_url: str,\n    connector_id: int,\n    credential_id: int,\n    name: str,\n) -> bool:\n    \"\"\"Link the connector to the credential (create CC pair).\"\"\"\n    resp = session.put(\n        f\"{base_url}/api/manage/connector/{connector_id}/credential/{credential_id}\",\n        json={\n            \"name\": name,\n            \"access_type\": \"public\",\n            \"groups\": [],\n            \"auto_sync_options\": None,\n            \"processing_mode\": \"REGULAR\",\n        },\n        timeout=REQUEST_TIMEOUT,\n    )\n    if not resp.ok:\n        print(f\"  ERROR linking credential: {resp.text}\")\n        return False\n    return True\n\n\ndef run_connector(\n    session: requests.Session,\n    base_url: str,\n    connector_id: int,\n    credential_id: int,\n) -> bool:\n    \"\"\"Trigger the connector to start indexing.\"\"\"\n    resp = session.post(\n        f\"{base_url}/api/manage/admin/connector/run-once\",\n        json={\n            \"connector_id\": connector_id,\n            \"credentialIds\": [credential_id],\n            \"from_beginning\": False,\n        },\n        timeout=REQUEST_TIMEOUT,\n    )\n    if not resp.ok:\n        print(f\"  ERROR running connector: {resp.text}\")\n        return False\n    return True\n\n\ndef process_file(session: requests.Session, base_url: str, file_path: str) -> bool:\n    \"\"\"Process a single file through the full connector creation flow.\"\"\"\n    file_name = os.path.basename(file_path)\n    connector_name = file_name\n    print(f\"Processing: {file_name}\")\n\n    # Step 1: Upload\n    upload_resp = upload_file(session, base_url, file_path)\n    if not upload_resp:\n        return False\n\n    # Step 2: Create connector\n    connector_id = create_connector(\n        session,\n        base_url,\n        name=f\"FileConnector-{connector_name}\",\n        file_paths=upload_resp[\"file_paths\"],\n        file_names=upload_resp[\"file_names\"],\n        zip_metadata_file_id=upload_resp.get(\"zip_metadata_file_id\"),\n    )\n    if connector_id is None:\n        return False\n\n    # Step 3: Create credential\n    credential_id = create_credential(session, base_url, name=connector_name)\n    if credential_id is None:\n        return False\n\n    # Step 4: Link connector to credential\n    if not link_credential(\n        session, base_url, connector_id, credential_id, connector_name\n    ):\n        return False\n\n    # Step 5: Trigger indexing\n    if not run_connector(session, base_url, connector_id, credential_id):\n        return False\n\n    print(f\"  OK (connector_id={connector_id})\")\n    return True\n\n\ndef get_authenticated_session(api_key: str) -> requests.Session:\n    \"\"\"Create a session authenticated with an API key.\"\"\"\n    session = requests.Session()\n    session.headers.update({\"Authorization\": f\"Bearer {api_key}\"})\n    return session\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        description=\"Upload files as individual Onyx file connectors.\"\n    )\n    parser.add_argument(\n        \"--data-dir\",\n        required=True,\n        help=\"Directory containing files to upload.\",\n    )\n    parser.add_argument(\n        \"--api-base\",\n        default=\"http://localhost:3000\",\n        help=\"Base URL for the Onyx API (default: http://localhost:3000).\",\n    )\n    parser.add_argument(\n        \"--api-key\",\n        required=True,\n        help=\"API key for authentication.\",\n    )\n    parser.add_argument(\n        \"--file-glob\",\n        default=None,\n        help=\"Glob pattern to filter files (e.g. '*.json', '*.zip').\",\n    )\n    args = parser.parse_args()\n\n    data_dir = args.data_dir\n    base_url = args.api_base.rstrip(\"/\")\n    api_key = args.api_key\n    file_glob = args.file_glob\n\n    if not os.path.isdir(data_dir):\n        print(f\"Error: {data_dir} is not a directory\")\n        sys.exit(1)\n\n    script_path = os.path.realpath(__file__)\n    files = sorted(\n        os.path.join(data_dir, f)\n        for f in os.listdir(data_dir)\n        if os.path.isfile(os.path.join(data_dir, f))\n        and os.path.realpath(os.path.join(data_dir, f)) != script_path\n        and (file_glob is None or fnmatch.fnmatch(f, file_glob))\n    )\n\n    if not files:\n        print(f\"No files found in {data_dir}\")\n        sys.exit(1)\n\n    print(f\"Found {len(files)} file(s) in {data_dir}\\n\")\n\n    session = get_authenticated_session(api_key)\n\n    success = 0\n    failed = 0\n    for file_path in files:\n        if process_file(session, base_url, file_path):\n            success += 1\n        else:\n            failed += 1\n        # Small delay to avoid overwhelming the server\n        time.sleep(0.5)\n\n    print(f\"\\nDone: {success} succeeded, {failed} failed out of {len(files)} files.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/shared_configs/__init__.py",
    "content": ""
  },
  {
    "path": "backend/shared_configs/configs.py",
    "content": "import os\nfrom typing import Any\nfrom typing import List\nfrom urllib.parse import urlparse\n\n# Used for logging\nSLACK_CHANNEL_ID = \"channel_id\"\n\n# Skip model warmup at startup\n# Default to True (skip warmup) if not set, otherwise respect the value\nSKIP_WARM_UP = os.environ.get(\"SKIP_WARM_UP\", \"true\").lower() == \"true\"\n\n# Check if model server is disabled\nDISABLE_MODEL_SERVER = os.environ.get(\"DISABLE_MODEL_SERVER\", \"\").lower() == \"true\"\n\n# If model server is disabled, use \"disabled\" as host to trigger proper handling\nif DISABLE_MODEL_SERVER:\n    MODEL_SERVER_HOST = \"disabled\"\n    MODEL_SERVER_ALLOWED_HOST = \"disabled\"\n    INDEXING_MODEL_SERVER_HOST = \"disabled\"\nelse:\n    MODEL_SERVER_HOST = os.environ.get(\"MODEL_SERVER_HOST\") or \"localhost\"\n    MODEL_SERVER_ALLOWED_HOST = os.environ.get(\"MODEL_SERVER_HOST\") or \"0.0.0.0\"\n    INDEXING_MODEL_SERVER_HOST = (\n        os.environ.get(\"INDEXING_MODEL_SERVER_HOST\") or MODEL_SERVER_HOST\n    )\n\nMODEL_SERVER_PORT = int(os.environ.get(\"MODEL_SERVER_PORT\") or \"9000\")\n# Model server for indexing should use a separate one to not allow indexing to introduce delay\n# for inference\nINDEXING_MODEL_SERVER_PORT = int(\n    os.environ.get(\"INDEXING_MODEL_SERVER_PORT\") or MODEL_SERVER_PORT\n)\n\n# Onyx custom Deep Learning Models\nCONNECTOR_CLASSIFIER_MODEL_REPO = \"Danswer/filter-extraction-model\"\nCONNECTOR_CLASSIFIER_MODEL_TAG = \"1.0.0\"\nINTENT_MODEL_VERSION = \"onyx-dot-app/hybrid-intent-token-classifier\"\n# INTENT_MODEL_TAG = \"v1.0.3\"\nINTENT_MODEL_TAG: str | None = None\n# Bi-Encoder, other details\nDOC_EMBEDDING_CONTEXT_SIZE = 512\n\n# Used to distinguish alternative indices\nALT_INDEX_SUFFIX = \"__danswer_alt_index\"\n\n# Used for loading defaults for automatic deployments and dev flows\n# For local, use: mixedbread-ai/mxbai-rerank-xsmall-v1\nDEFAULT_CROSS_ENCODER_MODEL_NAME = (\n    os.environ.get(\"DEFAULT_CROSS_ENCODER_MODEL_NAME\") or None\n)\nDEFAULT_CROSS_ENCODER_API_KEY = os.environ.get(\"DEFAULT_CROSS_ENCODER_API_KEY\") or None\nDEFAULT_CROSS_ENCODER_PROVIDER_TYPE = (\n    os.environ.get(\"DEFAULT_CROSS_ENCODER_PROVIDER_TYPE\") or None\n)\nDISABLE_RERANK_FOR_STREAMING = (\n    os.environ.get(\"DISABLE_RERANK_FOR_STREAMING\", \"\").lower() == \"true\"\n)\n\n# This controls the minimum number of pytorch \"threads\" to allocate to the embedding\n# model. If torch finds more threads on its own, this value is not used.\nMIN_THREADS_ML_MODELS = int(os.environ.get(\"MIN_THREADS_ML_MODELS\") or 1)\n\n# Model server that has indexing only set will throw exception if used for reranking\n# or intent classification\nINDEXING_ONLY = os.environ.get(\"INDEXING_ONLY\", \"\").lower() == \"true\"\n\n# The process needs to have this for the log file to write to\n# otherwise, it will not create additional log files\n# This should just be the filename base without extension or path.\nLOG_FILE_NAME = os.environ.get(\"LOG_FILE_NAME\") or \"onyx\"\n\n# Enable generating persistent log files for local dev environments\nDEV_LOGGING_ENABLED = os.environ.get(\"DEV_LOGGING_ENABLED\", \"\").lower() == \"true\"\n# notset, debug, info, notice, warning, error, or critical\nLOG_LEVEL = os.environ.get(\"LOG_LEVEL\") or \"info\"\n\n# Timeout for API-based embedding models\n# NOTE: does not apply for Google VertexAI, since the python client doesn't\n# allow us to specify a custom timeout\nAPI_BASED_EMBEDDING_TIMEOUT = int(os.environ.get(\"API_BASED_EMBEDDING_TIMEOUT\", \"600\"))\n\n# Local batch size for VertexAI embedding models currently calibrated for item size of 512 tokens\n# NOTE: increasing this value may lead to API errors due to token limit exhaustion per call.\nVERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE = int(\n    os.environ.get(\"VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE\", \"50\")\n)\n\n# Only used for OpenAI\nOPENAI_EMBEDDING_TIMEOUT = int(\n    os.environ.get(\"OPENAI_EMBEDDING_TIMEOUT\", API_BASED_EMBEDDING_TIMEOUT)\n)\n\n# Whether or not to strictly enforce token limit for chunking.\nSTRICT_CHUNK_TOKEN_LIMIT = (\n    os.environ.get(\"STRICT_CHUNK_TOKEN_LIMIT\", \"\").lower() == \"true\"\n)\n\n# Set up Sentry integration (for error logging)\nSENTRY_DSN = os.environ.get(\"SENTRY_DSN\")\n\n\n# Fields which should only be set on new search setting\nPRESERVED_SEARCH_FIELDS = [\n    \"id\",\n    \"provider_type\",\n    \"api_key\",\n    \"model_name\",\n    \"api_url\",\n    \"index_name\",\n    \"multipass_indexing\",\n    \"enable_contextual_rag\",\n    \"model_dim\",\n    \"normalize\",\n    \"passage_prefix\",\n    \"query_prefix\",\n]\n\n\ndef validate_cors_origin(origin: str) -> None:\n    parsed = urlparse(origin)\n    if parsed.scheme not in [\"http\", \"https\"] or not parsed.netloc:\n        raise ValueError(f\"Invalid CORS origin: '{origin}'\")\n\n\n# Examples of valid values for the environment variable:\n# - \"\" (allow all origins)\n# - \"http://example.com\" (single origin)\n# - \"http://example.com,https://example.org\" (multiple origins)\n# - \"*\" (allow all origins)\nCORS_ALLOWED_ORIGIN_ENV = os.environ.get(\"CORS_ALLOWED_ORIGIN\", \"\")\n\n# Explicitly declare the type of CORS_ALLOWED_ORIGIN\nCORS_ALLOWED_ORIGIN: List[str]\n\nif CORS_ALLOWED_ORIGIN_ENV:\n    # Split the environment variable into a list of origins\n    CORS_ALLOWED_ORIGIN = [\n        origin.strip()\n        for origin in CORS_ALLOWED_ORIGIN_ENV.split(\",\")\n        if origin.strip()\n    ]\n    # Validate each origin in the list\n    for origin in CORS_ALLOWED_ORIGIN:\n        validate_cors_origin(origin)\nelse:\n    # If the environment variable is empty, allow all origins\n    CORS_ALLOWED_ORIGIN = [\"*\"]\n\n\n# Multi-tenancy configuration\nMULTI_TENANT = os.environ.get(\"MULTI_TENANT\", \"\").lower() == \"true\"\n\n# Outside this file, should almost always use `POSTGRES_DEFAULT_SCHEMA` unless you\n# have a very good reason\nPOSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE = \"public\"\nPOSTGRES_DEFAULT_SCHEMA = (\n    os.environ.get(\"POSTGRES_DEFAULT_SCHEMA\") or POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\n)\nDEFAULT_REDIS_PREFIX = os.environ.get(\"DEFAULT_REDIS_PREFIX\") or \"default\"\n\n\nasync def async_return_default_schema(\n    *args: Any, **kwargs: Any  # noqa: ARG001\n) -> str:  # noqa: ARG001\n    return POSTGRES_DEFAULT_SCHEMA\n\n\n# Prefix used for all tenant ids\nTENANT_ID_PREFIX = \"tenant_\"\n\nDISALLOWED_SLACK_BOT_TENANT_IDS = os.environ.get(\"DISALLOWED_SLACK_BOT_TENANT_IDS\")\nDISALLOWED_SLACK_BOT_TENANT_LIST = (\n    [\n        tenant.strip()\n        for tenant in DISALLOWED_SLACK_BOT_TENANT_IDS.split(\",\")\n        if tenant.strip()\n    ]\n    if DISALLOWED_SLACK_BOT_TENANT_IDS\n    else None\n)\n\nIGNORED_SYNCING_TENANT_IDS = os.environ.get(\"IGNORED_SYNCING_TENANT_IDS\")\nIGNORED_SYNCING_TENANT_LIST = (\n    [\n        tenant.strip()\n        for tenant in IGNORED_SYNCING_TENANT_IDS.split(\",\")\n        if tenant.strip()\n    ]\n    if IGNORED_SYNCING_TENANT_IDS\n    else None\n)\n\nENVIRONMENT = os.environ.get(\"ENVIRONMENT\") or \"not_explicitly_set\"\n\n\n#####\n# Usage Limits Configuration (meant for cloud, off by default for self-hosted)\n#####\n# Whether usage limits are enforced (defaults to MULTI_TENANT value)\n_USAGE_LIMITS_ENABLED_RAW = os.environ.get(\"USAGE_LIMITS_ENABLED\")\nif _USAGE_LIMITS_ENABLED_RAW is not None:\n    USAGE_LIMITS_ENABLED = _USAGE_LIMITS_ENABLED_RAW.lower() == \"true\"\nelse:\n    # Default: enabled on cloud (MULTI_TENANT), disabled for self-hosted\n    USAGE_LIMITS_ENABLED = MULTI_TENANT\n\n# Usage limit window in seconds (default: 1 week = 604800 seconds)\nUSAGE_LIMIT_WINDOW_SECONDS = int(os.environ.get(\"USAGE_LIMIT_WINDOW_SECONDS\", \"604800\"))\n\n# Per-week LLM usage cost limits in cents (e.g., 1000 = $10.00)\n# Trial users get lower limits than paid users\nUSAGE_LIMIT_LLM_COST_CENTS_TRIAL = int(\n    os.environ.get(\"USAGE_LIMIT_LLM_COST_CENTS_TRIAL\", \"3200\")  # $32.00 default\n)\nUSAGE_LIMIT_LLM_COST_CENTS_PAID = int(\n    os.environ.get(\"USAGE_LIMIT_LLM_COST_CENTS_PAID\", \"6400\")  # $64.00 default\n)\n\n# Per-week chunks indexed limits\nUSAGE_LIMIT_CHUNKS_INDEXED_TRIAL = int(\n    os.environ.get(\"USAGE_LIMIT_CHUNKS_INDEXED_TRIAL\", 400_000)\n)\nUSAGE_LIMIT_CHUNKS_INDEXED_PAID = int(\n    os.environ.get(\"USAGE_LIMIT_CHUNKS_INDEXED_PAID\", 4_000_000)\n)\n\n# Per-week API calls using API keys or Personal Access Tokens\nUSAGE_LIMIT_API_CALLS_TRIAL = int(os.environ.get(\"USAGE_LIMIT_API_CALLS_TRIAL\", \"0\"))\nUSAGE_LIMIT_API_CALLS_PAID = int(os.environ.get(\"USAGE_LIMIT_API_CALLS_PAID\", \"40000\"))\n\n# Per-week non-streaming API calls (more expensive, so lower limits)\nUSAGE_LIMIT_NON_STREAMING_CALLS_TRIAL = int(\n    os.environ.get(\"USAGE_LIMIT_NON_STREAMING_CALLS_TRIAL\", \"0\")\n)\nUSAGE_LIMIT_NON_STREAMING_CALLS_PAID = int(\n    os.environ.get(\"USAGE_LIMIT_NON_STREAMING_CALLS_PAID\", \"160\")\n)\n"
  },
  {
    "path": "backend/shared_configs/contextvars.py",
    "content": "import contextvars\n\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n\n# Context variable for the current tenant id\nCURRENT_TENANT_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = (\n    contextvars.ContextVar(\n        \"current_tenant_id\", default=None if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA\n    )\n)\n\n# set by every route in the API server\nINDEXING_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = (\n    contextvars.ContextVar(\"indexing_request_id\", default=None)\n)\n\n# set by every route in the API server\nONYX_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = contextvars.ContextVar(\n    \"onyx_request_id\", default=None\n)\n\n# Used to store cc pair id and index attempt id in multithreaded environments\nINDEX_ATTEMPT_INFO_CONTEXTVAR: contextvars.ContextVar[tuple[int, int] | None] = (\n    contextvars.ContextVar(\"index_attempt_info\", default=None)\n)\n\n# Set by endpoint context middleware — used for per-endpoint DB pool attribution\nCURRENT_ENDPOINT_CONTEXTVAR: contextvars.ContextVar[str | None] = (\n    contextvars.ContextVar(\"current_endpoint\", default=None)\n)\n\n\n\"\"\"Utils related to contextvars\"\"\"\n\n\ndef get_current_tenant_id() -> str:\n    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()\n    if tenant_id is None:\n        import traceback\n\n        if not MULTI_TENANT:\n            return POSTGRES_DEFAULT_SCHEMA\n\n        stack_trace = traceback.format_stack()\n        error_message = (\n            \"Tenant ID is not set. This should never happen.\\nStack trace:\\n\"\n            + \"\".join(stack_trace)\n        )\n        raise RuntimeError(error_message)\n    return tenant_id\n"
  },
  {
    "path": "backend/shared_configs/enums.py",
    "content": "from enum import Enum\n\n\nclass EmbeddingProvider(str, Enum):\n    OPENAI = \"openai\"\n    COHERE = \"cohere\"\n    VOYAGE = \"voyage\"\n    GOOGLE = \"google\"\n    LITELLM = \"litellm\"\n    AZURE = \"azure\"\n\n\nclass RerankerProvider(str, Enum):\n    COHERE = \"cohere\"\n    LITELLM = \"litellm\"\n    BEDROCK = \"bedrock\"\n\n\nclass EmbedTextType(str, Enum):\n    QUERY = \"query\"\n    PASSAGE = \"passage\"\n\n\nclass WebSearchProviderType(str, Enum):\n    GOOGLE_PSE = \"google_pse\"\n    SERPER = \"serper\"\n    EXA = \"exa\"\n    SEARXNG = \"searxng\"\n    BRAVE = \"brave\"\n\n\nclass WebContentProviderType(str, Enum):\n    ONYX_WEB_CRAWLER = \"onyx_web_crawler\"\n    FIRECRAWL = \"firecrawl\"\n    EXA = \"exa\"\n"
  },
  {
    "path": "backend/shared_configs/model_server_models.py",
    "content": "from pydantic import BaseModel\n\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.enums import RerankerProvider\n\n\nEmbedding = list[float]\n\n\nclass EmbedRequest(BaseModel):\n    texts: list[str]\n    # Can be none for cloud embedding model requests, error handling logic exists for other cases\n    model_name: str | None = None\n    deployment_name: str | None = None\n    max_context_length: int\n    normalize_embeddings: bool\n    api_key: str | None = None\n    provider_type: EmbeddingProvider | None = None\n    text_type: EmbedTextType\n    manual_query_prefix: str | None = None\n    manual_passage_prefix: str | None = None\n    api_url: str | None = None\n    api_version: str | None = None\n\n    # allows for the truncation of the vector to a lower dimension\n    # to reduce memory usage. Currently only supported for OpenAI models.\n    # will be ignored for other providers.\n    reduced_dimension: int | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n\nclass EmbedResponse(BaseModel):\n    embeddings: list[Embedding]\n\n\nclass RerankRequest(BaseModel):\n    query: str\n    documents: list[str]\n    model_name: str\n    provider_type: RerankerProvider | None = None\n    api_key: str | None = None\n    api_url: str | None = None\n\n    # This disables the \"model_\" protected namespace for pydantic\n    model_config = {\"protected_namespaces\": ()}\n\n\nclass RerankResponse(BaseModel):\n    scores: list[float]\n\n\nclass IntentRequest(BaseModel):\n    query: str\n    # Sequence classification threshold\n    semantic_percent_threshold: float\n    # Token classification threshold\n    keyword_percent_threshold: float\n\n\nclass IntentResponse(BaseModel):\n    is_keyword: bool\n    keywords: list[str]\n"
  },
  {
    "path": "backend/shared_configs/utils.py",
    "content": "from typing import TypeVar\n\n\nT = TypeVar(\"T\")\n\n\ndef batch_list(\n    lst: list[T],\n    batch_size: int,\n) -> list[list[T]]:\n    return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]\n"
  },
  {
    "path": "backend/slackbot_images/README.md",
    "content": "This folder contains images needed by the Onyx Slack Bot. When possible, we use the images\nwithin `web/public`, but sometimes those images do not work for the Slack Bot.\n"
  },
  {
    "path": "backend/supervisord.conf",
    "content": "[supervisord]\nnodaemon=true\nuser=root\nlogfile=/var/log/supervisord.log\nenvironment=PYTHONPATH=\"/app\"\n\n# region enable supervisorctl usage\n[supervisorctl]\nserverurl=unix:///tmp/supervisor.sock\n\n[unix_http_server]\nfile=/tmp/supervisor.sock\nchmod=0700\n\n[rpcinterface:supervisor]\nsupervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface\n# endregion enable supervisorctl usage\n\n# Background jobs that must be run async due to long time to completion\n# NOTE: due to an issue with Celery + SQLAlchemy\n# (https://github.com/celery/celery/issues/7007#issuecomment-1740139367)\n# we must use the threads pool instead of the default prefork pool for now\n# in order to avoid intermittent errors like:\n# `billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 11 (SIGSEGV)`.\n#\n# This means workers will not be able take advantage of multiple CPU cores\n# on a system, but this should be okay for now since all our celery tasks are\n# relatively compute-light (e.g. they tend to just make a bunch of requests to\n# Vespa / Postgres)\n[program:celery_worker_primary]\ncommand=celery -A onyx.background.celery.versioned_apps.primary worker\n    --loglevel=INFO\n    --hostname=primary@%%n\n    -Q celery\nstdout_logfile=/var/log/celery_worker_primary.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n# NOTE: only allowing configuration here and not in the other celery workers,\n# since this is often the bottleneck for \"sync\" jobs (e.g. document set syncing,\n# user group syncing, deletion, etc.)\n[program:celery_worker_light]\ncommand=celery -A onyx.background.celery.versioned_apps.light worker\n    --loglevel=INFO\n    --hostname=light@%%n\n    -Q vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration\nstdout_logfile=/var/log/celery_worker_light.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n[program:celery_worker_heavy]\ncommand=celery -A onyx.background.celery.versioned_apps.heavy worker\n    --loglevel=INFO\n    --hostname=heavy@%%n\n    -Q connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox\nstdout_logfile=/var/log/celery_worker_heavy.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n[program:celery_worker_docprocessing]\ncommand=celery -A onyx.background.celery.versioned_apps.docprocessing worker\n    --loglevel=INFO\n    --hostname=docprocessing@%%n\n    -Q docprocessing\nstdout_logfile=/var/log/celery_worker_docprocessing.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n[program:celery_worker_user_file_processing]\ncommand=celery -A onyx.background.celery.versioned_apps.user_file_processing worker\n    --loglevel=INFO\n    --hostname=user_file_processing@%%n\n    -Q user_file_processing,user_file_project_sync,user_file_delete\nstdout_logfile=/var/log/celery_worker_user_file_processing.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n[program:celery_worker_docfetching]\ncommand=celery -A onyx.background.celery.versioned_apps.docfetching worker\n    --loglevel=INFO\n    --hostname=docfetching@%%n\n    -Q connector_doc_fetching\nstdout_logfile=/var/log/celery_worker_docfetching.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n[program:celery_worker_monitoring]\ncommand=celery -A onyx.background.celery.versioned_apps.monitoring worker\n    --loglevel=INFO\n    --hostname=monitoring@%%n\n    -Q monitoring\nstdout_logfile=/var/log/celery_worker_monitoring.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartsecs=10\nstopasgroup=true\n\n\n# Job scheduler for periodic tasks\n[program:celery_beat]\ncommand=celery -A onyx.background.celery.versioned_apps.beat beat\nstdout_logfile=/var/log/celery_beat.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nstartsecs=10\nstopasgroup=true\n\n# watchdog to detect and restart the beat in case of inactivity\n# supervisord only restarts the process if it's dead\n# make sure this key matches ONYX_CELERY_BEAT_HEARTBEAT_KEY\n[program:supervisord_watchdog_celery_beat]\ncommand=python onyx/utils/supervisord_watchdog.py\n    --conf /etc/supervisor/conf.d/supervisord.conf\n    --key \"onyx:celery:beat:heartbeat\"\n    --program celery_beat\nstdout_logfile=/var/log/supervisord_watchdog_celery_beat.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nstartsecs=10\nstopasgroup=true\n\n# Listens for Slack messages and responds with answers\n# for all channels that the OnyxBot has been added to.\n# If not setup, this will just fail 5 times and then stop.\n# More details on setup here: https://docs.onyx.app/admins/getting_started/slack_bot_setup\n[program:slack_bot]\ncommand=python onyx/onyxbot/slack/listener.py\nstdout_logfile=/var/log/slack_bot.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartretries=5\nstartsecs=60\n\n# Listens for Discord messages and responds with answers\n# for all guilds/channels that the OnyxBot has been added to.\n# If not configured, will continue to probe every 3 minutes for a Discord bot token.\n[program:discord_bot]\ncommand=python onyx/onyxbot/discord/client.py\nstdout_logfile=/var/log/discord_bot.log\nstdout_logfile_maxbytes=16MB\nredirect_stderr=true\nautorestart=true\nstartretries=5\nstartsecs=60\n\n# Pushes all logs from the above programs to stdout\n# No log rotation here, since it's stdout it's handled by the Docker container logging\n[program:log-redirect-handler]\ncommand=tail -qF\n    /var/log/celery_beat.log\n    /var/log/celery_worker_primary.log\n    /var/log/celery_worker_light.log\n    /var/log/celery_worker_heavy.log\n    /var/log/celery_worker_docprocessing.log\n    /var/log/celery_worker_monitoring.log\n    /var/log/celery_worker_user_file_processing.log\n    /var/log/celery_worker_docfetching.log\n    /var/log/slack_bot.log\n    /var/log/discord_bot.log\n    /var/log/supervisord_watchdog_celery_beat.log\n    /var/log/mcp_server.log\n    /var/log/mcp_server.err.log\nstdout_logfile=/dev/stdout\nstdout_logfile_maxbytes = 0  # must be set to 0 when stdout_logfile=/dev/stdout\nautorestart=true\n"
  },
  {
    "path": "backend/tests/README.md",
    "content": "# Backend Tests\n\n## Test Types\n\nThere are four test categories, ordered by increasing scope:\n\n### Unit Tests (`tests/unit/`)\n\nNo external services. Mock all I/O with `unittest.mock`. Use for complex, isolated\nlogic (e.g. citation processing, encryption).\n\n```bash\npytest -xv backend/tests/unit\n```\n\n### External Dependency Unit Tests (`tests/external_dependency_unit/`)\n\nExternal services (Postgres, Redis, Vespa, OpenAI, etc.) are running, but Onyx\napplication containers are not. Tests call functions directly and can mock selectively.\n\nUse when you need a real database or real API calls but want control over setup.\n\n```bash\npython -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit\n```\n\n### Integration Tests (`tests/integration/`)\n\nFull Onyx deployment running. No mocking. Prefer this over other test types when possible.\n\n```bash\npython -m dotenv -f .vscode/.env run -- pytest backend/tests/integration\n```\n\n### Playwright / E2E Tests (`web/tests/e2e/`)\n\nFull stack including web server. Use for frontend-backend coordination.\n\n```bash\nnpx playwright test <TEST_NAME>\n```\n\n## Shared Fixtures\n\nShared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define\ntheir own `conftest.py` for directory-scoped fixtures.\n\n## Running Tests Repeatedly (`pytest-repeat`)\n\nUse `pytest-repeat` to catch flaky tests by running them multiple times:\n\n```bash\n# Run a specific test 50 times\npytest --count=50 backend/tests/unit/path/to/test.py::test_name\n\n# Stop on first failure with -x\npytest --count=50 -x backend/tests/unit/path/to/test.py::test_name\n\n# Repeat an entire test file\npytest --count=10 backend/tests/unit/path/to/test_file.py\n```\n\n## Best Practices\n\n### Use `enable_ee` fixture instead of inlining\n\nEnables EE mode for a test, with proper teardown and cache clearing.\n\n```python\n# Whole file (in a test module, NOT in conftest.py)\npytestmark = pytest.mark.usefixtures(\"enable_ee\")\n\n# Whole directory — add an autouse wrapper to the directory's conftest.py\n@pytest.fixture(autouse=True)\ndef _enable_ee_for_directory(enable_ee: None) -> None:  \n    \"\"\"Wraps the shared enable_ee fixture with autouse for this directory.\"\"\"\n\n# Single test\ndef test_something(enable_ee: None) -> None: ...\n```\n\n**Note:** `pytestmark` in a `conftest.py` does NOT apply markers to tests in that\ndirectory — it only affects tests defined in the conftest itself (which is none).\nUse the autouse fixture wrapper pattern shown above instead.\n\nDo NOT inline `global_version.set_ee()` — always use the fixture.\n"
  },
  {
    "path": "backend/tests/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/api/test_api.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom typing import Any\n\nimport pytest\nfrom fastapi import FastAPI\nfrom fastapi.testclient import TestClient\n\nfrom onyx.configs.constants import DEV_VERSION_PATTERN\nfrom onyx.configs.constants import STABLE_VERSION_PATTERN\nfrom onyx.main import fetch_versioned_implementation\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@pytest.fixture(scope=\"function\")\ndef client() -> Generator[TestClient, Any, None]:\n    # Set environment variables\n    os.environ[\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\"] = \"True\"\n\n    # Initialize TestClient with the FastAPI app\n    app: FastAPI = fetch_versioned_implementation(\n        module=\"onyx.main\", attribute=\"get_application\"\n    )()\n    client = TestClient(app)\n    yield client\n\n\n@pytest.mark.skip(\n    reason=\"enable when we have a testing environment with preloaded data\"\n)\ndef test_handle_simplified_chat_message(client: TestClient) -> None:\n    req: dict[str, Any] = {}\n\n    req[\"persona_id\"] = 0\n    req[\"description\"] = \"pytest\"\n    response = client.post(\"/chat/create-chat-session\", json=req)\n    chat_session_id = response.json()[\"chat_session_id\"]\n\n    req = {}\n    req[\"chat_session_id\"] = chat_session_id\n    req[\"message\"] = \"hello\"\n\n    response = client.post(\"/chat/send-message-simple-api\", json=req)\n    assert response.status_code == 200\n\n\n@pytest.mark.skip(\n    reason=\"enable when we have a testing environment with preloaded data\"\n)\ndef test_handle_send_message_simple_with_history(client: TestClient) -> None:\n    req: dict[str, Any] = {}\n    messages = []\n    messages.append({\"message\": \"What sorts of questions can you answer for me?\"})\n    # messages.append({\"message\":\n    #                  \"I'd be happy to assist you with a wide range of questions related to Ramp's expense management platform. \"\n    #                  \"I can help with topics such as:\\n\\n\"\n    #                  \"1. Setting up and managing your Ramp account\\n\"\n    #                  \"2. Using Ramp cards and making purchases\\n\"\n    #                  \"3. Submitting and reviewing expenses\\n\"\n    #                  \"4. Understanding Ramp's features and benefits\\n\"\n    #                  \"5. Navigating the Ramp dashboard and mobile app\\n\"\n    #                  \"6. Managing team spending and budgets\\n\"\n    #                  \"7. Integrating Ramp with accounting software\\n\"\n    #                  \"8. Troubleshooting common issues\\n\\n\"\n    #                  \"Feel free to ask any specific questions you have about using Ramp, \"\n    #                  \"and I'll do my best to provide clear and helpful answers. \"\n    #                  \"Is there a particular area you'd like to know more about?\",\n    #                  \"role\": \"assistant\"})\n    # req[\"prompt_id\"] = 9\n    # req[\"persona_id\"] = 6\n\n    # Yoda\n    req[\"persona_id\"] = 1\n    messages.append(\n        {\n            \"message\": \"Answer questions for you, I can. \"\n            \"About many topics, knowledge I have. \"\n            \"But specific to documents provided, limited my responses are. \"\n            \"Ask you may about:\\n\\n\"\n            \"- User interviews and building trust with participants\\n\"\n            \"- Designing effective surveys and survey questions  \\n\"\n            \"- Product analysis approaches\\n\"\n            \"- Recruiting participants for research\\n\"\n            \"- Discussion guides for user interviews\\n\"\n            \"- Types of survey questions\\n\\n\"\n            \"More there may be, but focus on these areas, the given context does. \"\n            \"Specific questions you have, ask you should. Guide you I will, as best I can.\",\n            \"role\": \"assistant\",\n        }\n    )\n    # messages.append({\"message\": \"Where can I pilot a survey?\"})\n\n    # messages.append({\"message\": \"How many data points should I collect to validate my solution?\"})\n    messages.append({\"message\": \"What is solution validation research used for?\"})\n\n    req[\"messages\"] = messages\n\n    response = client.post(\"/chat/send-message-simple-with-history\", json=req)\n    assert response.status_code == 200\n\n    resp_json = response.json()\n\n    # persona must have LLM relevance enabled for this to pass\n    assert len(resp_json[\"llm_selected_doc_indices\"]) > 0\n\n\ndef test_versions_endpoint(client: TestClient) -> None:\n    \"\"\"Test that /api/versions endpoint returns valid stable, dev, and migration configurations\"\"\"\n    response = client.get(\"/versions\")\n    assert response.status_code == 200\n\n    data = response.json()\n\n    # Verify the top-level structure\n    assert \"stable\" in data\n    assert \"dev\" in data\n    assert \"migration\" in data\n\n    # Verify stable configuration\n    stable = data[\"stable\"]\n    assert \"onyx\" in stable\n    assert \"relational_db\" in stable\n    assert \"index\" in stable\n    assert \"nginx\" in stable\n\n    # Verify stable version follows correct pattern (v1.2.3)\n    # If this fails, revise latest Github release for typo or incorrect version name\n    assert STABLE_VERSION_PATTERN.match(\n        stable[\"onyx\"]\n    ), f\"Stable version {stable['onyx']} doesn't match pattern v(number).(number).(number)\"\n\n    # Verify dev configuration\n    dev = data[\"dev\"]\n    assert \"onyx\" in dev\n    assert \"relational_db\" in dev\n    assert \"index\" in dev\n    assert \"nginx\" in dev\n\n    # Verify dev version follows correct pattern (v1.2.3-beta.4)\n    assert DEV_VERSION_PATTERN.match(\n        dev[\"onyx\"]\n    ), f\"Dev version {dev['onyx']} doesn't match pattern v(number).(number).(number)-beta.(number)\"\n\n    # Verify migration configuration\n    migration = data[\"migration\"]\n    assert \"onyx\" in migration\n    assert \"relational_db\" in migration\n    assert \"index\" in migration\n    assert \"nginx\" in migration\n\n    # Verify migration has expected values\n    assert migration[\"onyx\"] == \"airgapped-intfloat-nomic-migration\"\n    assert migration[\"relational_db\"] == \"postgres:15.2-alpine\"\n    assert migration[\"index\"] == \"vespaengine/vespa:8.277.17\"\n    assert migration[\"nginx\"] == \"nginx:1.25.5-alpine\"\n\n    # Verify versions are different between stable and dev\n    assert stable[\"onyx\"] != dev[\"onyx\"], \"Stable and dev versions should be different\"\n\n    # Additional validation: ensure all required fields are strings\n    for config_name, config in [\n        (\"stable\", stable),\n        (\"dev\", dev),\n        (\"migration\", migration),\n    ]:\n        for field_name, field_value in config.items():\n            assert isinstance(\n                field_value, str\n            ), f\"{config_name}.{field_name} should be a string, got {type(field_value)}\"\n            assert (\n                field_value.strip() != \"\"\n            ), f\"{config_name}.{field_name} should not be empty\"\n"
  },
  {
    "path": "backend/tests/conftest.py",
    "content": "\"\"\"Root conftest — shared fixtures available to all test directories.\"\"\"\n\nfrom collections.abc import Generator\n\nimport pytest\n\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\n\n@pytest.fixture()\ndef enable_ee() -> Generator[None, None, None]:\n    \"\"\"Temporarily enable EE mode for a single test.\n\n    Restores the previous EE state and clears the versioned-implementation\n    cache on teardown so state doesn't leak between tests.\n    \"\"\"\n    was_ee = global_version.is_ee_version()\n    global_version.set_ee()\n    fetch_versioned_implementation.cache_clear()\n    yield\n    if not was_ee:\n        global_version.unset_ee()\n    fetch_versioned_implementation.cache_clear()\n"
  },
  {
    "path": "backend/tests/daily/conftest.py",
    "content": "import os\n\n# Set environment variables BEFORE any other imports to ensure they're picked up\n# by module-level code that reads env vars at import time\n# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\nos.environ[\"LICENSE_ENFORCEMENT_ENABLED\"] = \"false\"\n\nfrom collections.abc import AsyncGenerator\nfrom collections.abc import Generator\nfrom contextlib import asynccontextmanager\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom dotenv import load_dotenv\nfrom fastapi import FastAPI\nfrom fastapi.testclient import TestClient\n\nfrom onyx.auth.users import current_admin_user\nfrom onyx.db.engine.sql_engine import get_session\nfrom onyx.db.models import UserRole\nfrom onyx.main import get_application\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\nload_dotenv()\n\n\n@asynccontextmanager\nasync def test_lifespan(\n    app: FastAPI,  # noqa: ARG001\n) -> AsyncGenerator[None, None]:  # noqa: ARG001\n    \"\"\"No-op lifespan for tests that don't need database or other services.\"\"\"\n    yield\n\n\ndef mock_get_session() -> Generator[MagicMock, None, None]:\n    \"\"\"Mock database session for tests that don't actually need DB access.\"\"\"\n    yield MagicMock()\n\n\ndef mock_current_admin_user() -> MagicMock:\n    \"\"\"Mock admin user for endpoints protected by current_admin_user.\"\"\"\n    mock_admin = MagicMock()\n    mock_admin.role = UserRole.ADMIN\n    return mock_admin\n\n\n@pytest.fixture(scope=\"function\")\ndef client() -> Generator[TestClient, None, None]:\n    # Initialize TestClient with the FastAPI app using a no-op test lifespan.\n    # Patch out prometheus metrics setup to avoid \"Duplicated timeseries in\n    # CollectorRegistry\" errors when multiple tests each create a new app\n    # (prometheus registers metrics globally and rejects duplicate names).\n    with patch(\"onyx.main.setup_prometheus_metrics\"):\n        app: FastAPI = get_application(lifespan_override=test_lifespan)\n\n    # Override the database session dependency with a mock\n    # (these tests don't actually need DB access)\n    app.dependency_overrides[get_session] = mock_get_session\n    app.dependency_overrides[current_admin_user] = mock_current_admin_user\n\n    # Use TestClient as a context manager to properly trigger lifespan\n    with TestClient(app) as client:\n        yield client\n\n    # Clean up dependency overrides\n    app.dependency_overrides.clear()\n"
  },
  {
    "path": "backend/tests/daily/connectors/airtable/test_airtable_basic.py",
    "content": "import os\nfrom typing import cast\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.airtable.airtable_connector import AirtableConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\n\nBASE_VIEW_ID = \"viwVUEJjWPd8XYjh8\"\n\n\nclass AirtableConfig(BaseModel):\n    base_id: str\n    table_identifier: str\n    access_token: str\n\n\n@pytest.fixture(params=[True, False])\ndef airtable_config(request: pytest.FixtureRequest) -> AirtableConfig:\n    table_identifier = (\n        os.environ[\"AIRTABLE_TEST_TABLE_NAME\"]\n        if request.param\n        else os.environ[\"AIRTABLE_TEST_TABLE_ID\"]\n    )\n    return AirtableConfig(\n        base_id=os.environ[\"AIRTABLE_TEST_BASE_ID\"],\n        table_identifier=table_identifier,\n        access_token=os.environ[\"AIRTABLE_ACCESS_TOKEN\"],\n    )\n\n\ndef create_test_document(\n    id: str,\n    title: str,\n    description: str,\n    priority: str,\n    status: str,\n    # Link to another record is skipped for now\n    # category: str,\n    ticket_id: str,\n    created_time: str,\n    status_last_changed: str,\n    submitted_by: str,\n    assignee: str,\n    days_since_status_change: int | None,\n    attachments: list[tuple[str, str]] | None = None,\n    all_fields_as_metadata: bool = False,\n    share_id: str | None = None,\n    view_id: str | None = None,\n) -> Document:\n    base_id = os.environ.get(\"AIRTABLE_TEST_BASE_ID\")\n    table_id = os.environ.get(\"AIRTABLE_TEST_TABLE_ID\")\n    missing_vars = []\n    if not base_id:\n        missing_vars.append(\"AIRTABLE_TEST_BASE_ID\")\n    if not table_id:\n        missing_vars.append(\"AIRTABLE_TEST_TABLE_ID\")\n\n    if missing_vars:\n        raise RuntimeError(\n            f\"Required environment variables not set: {', '.join(missing_vars)}. \"\n            \"These variables are required to run Airtable connector tests.\"\n        )\n    link_base = f\"https://airtable.com/{base_id}\"\n    if share_id:\n        link_base = f\"{link_base}/{share_id}\"\n    link_base = f\"{link_base}/{table_id}\"\n    if view_id:\n        link_base = f\"{link_base}/{view_id}\"\n\n    sections = []\n\n    if not all_fields_as_metadata:\n        sections.extend(\n            [\n                TextSection(\n                    text=f\"Title:\\n------------------------\\n{title}\\n------------------------\",\n                    link=f\"{link_base}/{id}\",\n                ),\n                TextSection(\n                    text=f\"Description:\\n------------------------\\n{description}\\n------------------------\",\n                    link=f\"{link_base}/{id}\",\n                ),\n            ]\n        )\n\n    if attachments:\n        for attachment_text, attachment_link in attachments:\n            sections.append(\n                TextSection(\n                    text=f\"Attachment:\\n------------------------\\n{attachment_text}\\n------------------------\",\n                    link=attachment_link,\n                ),\n            )\n\n    metadata: dict[str, str | list[str]] = {\n        # \"Category\": category,\n        \"Assignee\": assignee,\n        \"Submitted by\": submitted_by,\n        \"Priority\": priority,\n        \"Status\": status,\n        \"Created time\": created_time,\n        \"ID\": ticket_id,\n        \"Status last changed\": status_last_changed,\n        **(\n            {\"Days since status change\": str(days_since_status_change)}\n            if days_since_status_change is not None\n            else {}\n        ),\n    }\n\n    if all_fields_as_metadata:\n        metadata.update(\n            {\n                \"Title\": title,\n                \"Description\": description,\n            }\n        )\n\n    return Document(\n        id=f\"airtable__{id}\",\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.AIRTABLE,\n        semantic_identifier=f\"{os.environ.get('AIRTABLE_TEST_TABLE_NAME', '')}: {title}\",\n        metadata=metadata,\n        doc_updated_at=None,\n        primary_owners=None,\n        secondary_owners=None,\n        title=None,\n        from_ingestion_api=False,\n        additional_info=None,\n    )\n\n\ndef compare_documents(\n    actual_docs: list[Document], expected_docs: list[Document]\n) -> None:\n    \"\"\"Utility function to compare actual and expected documents, ignoring order.\"\"\"\n    actual_docs_dict = {doc.id: doc for doc in actual_docs}\n    expected_docs_dict = {doc.id: doc for doc in expected_docs}\n\n    assert actual_docs_dict.keys() == expected_docs_dict.keys(), \"Document ID mismatch\"\n\n    for doc_id in actual_docs_dict:\n        actual = actual_docs_dict[doc_id]\n        expected = expected_docs_dict[doc_id]\n\n        assert (\n            actual.source == expected.source\n        ), f\"Source mismatch for document {doc_id}\"\n        assert (\n            actual.semantic_identifier == expected.semantic_identifier\n        ), f\"Semantic identifier mismatch for document {doc_id}\"\n        assert (\n            actual.metadata == expected.metadata\n        ), f\"Metadata mismatch for document {doc_id}\"\n        assert (\n            actual.doc_updated_at == expected.doc_updated_at\n        ), f\"Updated at mismatch for document {doc_id}\"\n        assert (\n            actual.primary_owners == expected.primary_owners\n        ), f\"Primary owners mismatch for document {doc_id}\"\n        assert (\n            actual.secondary_owners == expected.secondary_owners\n        ), f\"Secondary owners mismatch for document {doc_id}\"\n        assert actual.title == expected.title, f\"Title mismatch for document {doc_id}\"\n        assert (\n            actual.from_ingestion_api == expected.from_ingestion_api\n        ), f\"Ingestion API flag mismatch for document {doc_id}\"\n        assert (\n            actual.additional_info == expected.additional_info\n        ), f\"Additional info mismatch for document {doc_id}\"\n\n        # Compare sections\n        assert len(actual.sections) == len(\n            expected.sections\n        ), f\"Number of sections mismatch for document {doc_id}\"\n        for i, (actual_section, expected_section) in enumerate(\n            zip(actual.sections, expected.sections)\n        ):\n            assert (\n                actual_section.text == expected_section.text\n            ), f\"Section {i} text mismatch for document {doc_id}\"\n            assert (\n                actual_section.link == expected_section.link\n            ), f\"Section {i} link mismatch for document {doc_id}\"\n\n\ndef test_airtable_connector_basic(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    airtable_config: AirtableConfig,\n) -> None:\n    \"\"\"Test behavior when all non-attachment fields are treated as metadata.\"\"\"\n    connector = AirtableConnector(\n        base_id=airtable_config.base_id,\n        table_name_or_id=airtable_config.table_identifier,\n        treat_all_non_attachment_fields_as_metadata=False,\n    )\n    connector.load_credentials(\n        {\n            \"airtable_access_token\": airtable_config.access_token,\n        }\n    )\n    doc_batch_generator = connector.load_from_state()\n    doc_batch = [\n        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)\n    ]\n    with pytest.raises(StopIteration):\n        next(doc_batch_generator)\n\n    assert len(doc_batch) == 2\n\n    expected_docs = [\n        create_test_document(\n            id=\"rec8BnxDLyWeegOuO\",\n            title=\"Slow Internet\",\n            description=\"The internet connection is very slow.\",\n            priority=\"Medium\",\n            status=\"In Progress\",\n            ticket_id=\"2\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            all_fields_as_metadata=False,\n            view_id=BASE_VIEW_ID,\n        ),\n        create_test_document(\n            id=\"reccSlIA4pZEFxPBg\",\n            title=\"Printer Issue\",\n            description=\"The office printer is not working.\",\n            priority=\"High\",\n            status=\"Open\",\n            ticket_id=\"1\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            attachments=[\n                (\n                    \"Test.pdf:\\ntesting!!!\",\n                    \"https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide\",\n                )\n            ],\n            all_fields_as_metadata=False,\n            view_id=BASE_VIEW_ID,\n        ),\n    ]\n\n    # Compare documents using the utility function\n    compare_documents(doc_batch, expected_docs)\n\n\ndef test_airtable_connector_url(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    airtable_config: AirtableConfig,\n) -> None:\n    \"\"\"Test that passing an Airtable URL produces the same results as base_id + table_id.\"\"\"\n    if not airtable_config.table_identifier.startswith(\"tbl\"):\n        pytest.skip(\"URL test requires table ID, not table name\")\n\n    url = f\"https://airtable.com/{airtable_config.base_id}/{airtable_config.table_identifier}/{BASE_VIEW_ID}\"\n    connector = AirtableConnector(\n        airtable_url=url,\n        treat_all_non_attachment_fields_as_metadata=False,\n    )\n    connector.load_credentials({\"airtable_access_token\": airtable_config.access_token})\n\n    doc_batch_generator = connector.load_from_state()\n    doc_batch = [\n        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)\n    ]\n    with pytest.raises(StopIteration):\n        next(doc_batch_generator)\n\n    assert len(doc_batch) == 2\n\n    expected_docs = [\n        create_test_document(\n            id=\"rec8BnxDLyWeegOuO\",\n            title=\"Slow Internet\",\n            description=\"The internet connection is very slow.\",\n            priority=\"Medium\",\n            status=\"In Progress\",\n            ticket_id=\"2\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            all_fields_as_metadata=False,\n            view_id=BASE_VIEW_ID,\n        ),\n        create_test_document(\n            id=\"reccSlIA4pZEFxPBg\",\n            title=\"Printer Issue\",\n            description=\"The office printer is not working.\",\n            priority=\"High\",\n            status=\"Open\",\n            ticket_id=\"1\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            attachments=[\n                (\n                    \"Test.pdf:\\ntesting!!!\",\n                    f\"https://airtable.com/{airtable_config.base_id}/{airtable_config.table_identifier}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide\",\n                )\n            ],\n            all_fields_as_metadata=False,\n            view_id=BASE_VIEW_ID,\n        ),\n    ]\n\n    compare_documents(doc_batch, expected_docs)\n\n\ndef test_airtable_connector_index_all(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    airtable_config: AirtableConfig,\n) -> None:\n    \"\"\"Test index_all mode discovers all bases/tables and returns documents.\n\n    The test token has access to one base (\"Onyx\") with three tables:\n      - Tickets: 3 records, 2 with content (1 empty record is skipped)\n      - Support Categories: 4 records, all with Category Name field\n      - Table 3: 3 records, 1 with content (2 empty records are skipped)\n    Total expected: 7 documents\n    \"\"\"\n    connector = AirtableConnector()\n    connector.load_credentials({\"airtable_access_token\": airtable_config.access_token})\n\n    all_docs: list[Document] = []\n    for batch in connector.load_from_state():\n        for item in batch:\n            if isinstance(item, Document):\n                all_docs.append(item)\n\n    # 2 from Tickets + 4 from Support Categories + 1 from Table 3 = 7\n    assert len(all_docs) == 7\n\n    docs_by_id = {d.id: d for d in all_docs}\n\n    # Verify all expected document IDs are present\n    expected_ids = {\n        # Tickets\n        \"airtable__rec8BnxDLyWeegOuO\",\n        \"airtable__reccSlIA4pZEFxPBg\",\n        # Support Categories\n        \"airtable__rec5SgUDcHXcBc8kS\",\n        \"airtable__recD3DQHc0BQkDaqX\",\n        \"airtable__recPHdnWu1Q9ZxyTg\",\n        \"airtable__recWbIElUDz9HjgMd\",\n        # Table 3\n        \"airtable__recNalBz02QU1LhbM\",\n    }\n    assert docs_by_id.keys() == expected_ids\n\n    # In index_all mode, semantic identifiers include \"Base Name > Table Name: Primary Field\"\n    assert (\n        docs_by_id[\"airtable__rec8BnxDLyWeegOuO\"].semantic_identifier\n        == \"Onyx > Tickets: Slow Internet\"\n    )\n    assert (\n        docs_by_id[\"airtable__rec5SgUDcHXcBc8kS\"].semantic_identifier\n        == \"Onyx > Support Categories: Software Development\"\n    )\n    assert (\n        docs_by_id[\"airtable__recNalBz02QU1LhbM\"].semantic_identifier\n        == \"Onyx > Table 3: A\"\n    )\n\n    # Verify hierarchy metadata on a Tickets doc\n    tickets_doc = docs_by_id[\"airtable__rec8BnxDLyWeegOuO\"]\n    assert tickets_doc.doc_metadata is not None\n    hierarchy = tickets_doc.doc_metadata[\"hierarchy\"]\n    assert hierarchy[\"source_path\"] == [\"Onyx\", \"Tickets\"]\n    assert hierarchy[\"base_id\"] == airtable_config.base_id\n    assert hierarchy[\"base_name\"] == \"Onyx\"\n    assert hierarchy[\"table_name\"] == \"Tickets\"\n\n    # Verify hierarchy on a Support Categories doc\n    cat_doc = docs_by_id[\"airtable__rec5SgUDcHXcBc8kS\"]\n    assert cat_doc.doc_metadata is not None\n    assert cat_doc.doc_metadata[\"hierarchy\"][\"source_path\"] == [\n        \"Onyx\",\n        \"Support Categories\",\n    ]\n\n\ndef test_airtable_connector_all_metadata(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    airtable_config: AirtableConfig,\n) -> None:\n    connector = AirtableConnector(\n        base_id=airtable_config.base_id,\n        table_name_or_id=airtable_config.table_identifier,\n        treat_all_non_attachment_fields_as_metadata=True,\n    )\n    connector.load_credentials(\n        {\n            \"airtable_access_token\": airtable_config.access_token,\n        }\n    )\n    doc_batch_generator = connector.load_from_state()\n    doc_batch = [\n        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)\n    ]\n    with pytest.raises(StopIteration):\n        next(doc_batch_generator)\n\n    # NOTE: one of the rows has no attachments -> no content -> no document\n    assert len(doc_batch) == 1\n\n    expected_docs = [\n        create_test_document(\n            id=\"reccSlIA4pZEFxPBg\",\n            title=\"Printer Issue\",\n            description=\"The office printer is not working.\",\n            priority=\"High\",\n            status=\"Open\",\n            # Link to another record is skipped for now\n            # category=\"Software Development\",\n            ticket_id=\"1\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            attachments=[\n                (\n                    \"Test.pdf:\\ntesting!!!\",\n                    # hard code link for now\n                    \"https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide\",\n                )\n            ],\n            all_fields_as_metadata=True,\n            view_id=BASE_VIEW_ID,\n        ),\n    ]\n\n    # Compare documents using the utility function\n    compare_documents(doc_batch, expected_docs)\n\n\ndef test_airtable_connector_with_share_and_view(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    airtable_config: AirtableConfig,\n) -> None:\n    \"\"\"Test behavior when using share_id and view_id for URL generation.\"\"\"\n    SHARE_ID = \"shrkfjEzDmLaDtK83\"\n\n    connector = AirtableConnector(\n        base_id=airtable_config.base_id,\n        table_name_or_id=airtable_config.table_identifier,\n        treat_all_non_attachment_fields_as_metadata=False,\n        share_id=SHARE_ID,\n        view_id=BASE_VIEW_ID,\n    )\n    connector.load_credentials(\n        {\n            \"airtable_access_token\": airtable_config.access_token,\n        }\n    )\n    doc_batch_generator = connector.load_from_state()\n    doc_batch = [\n        doc for doc in next(doc_batch_generator) if not isinstance(doc, HierarchyNode)\n    ]\n    with pytest.raises(StopIteration):\n        next(doc_batch_generator)\n\n    assert len(doc_batch) == 2\n\n    expected_docs = [\n        create_test_document(\n            id=\"rec8BnxDLyWeegOuO\",\n            title=\"Slow Internet\",\n            description=\"The internet connection is very slow.\",\n            priority=\"Medium\",\n            status=\"In Progress\",\n            ticket_id=\"2\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            all_fields_as_metadata=False,\n            share_id=SHARE_ID,\n            view_id=BASE_VIEW_ID,\n        ),\n        create_test_document(\n            id=\"reccSlIA4pZEFxPBg\",\n            title=\"Printer Issue\",\n            description=\"The office printer is not working.\",\n            priority=\"High\",\n            status=\"Open\",\n            ticket_id=\"1\",\n            created_time=\"2024-12-24T21:02:49.000Z\",\n            status_last_changed=\"2024-12-24T21:02:49.000Z\",\n            days_since_status_change=0,\n            assignee=\"Chris Weaver (chris@onyx.app)\",\n            submitted_by=\"Chris Weaver (chris@onyx.app)\",\n            attachments=[\n                (\n                    \"Test.pdf:\\ntesting!!!\",\n                    (\n                        f\"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/\"\n                        f\"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/\"\n                        \"fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide\"\n                    ),\n                )\n            ],\n            all_fields_as_metadata=False,\n            share_id=SHARE_ID,\n            view_id=BASE_VIEW_ID,\n        ),\n    ]\n\n    # Compare documents using the utility function\n    compare_documents(doc_batch, expected_docs)\n"
  },
  {
    "path": "backend/tests/daily/connectors/bitbucket/conftest.py",
    "content": "from tests.load_env_vars import load_env_vars\n\n\n# Load environment variables at the module level\nload_env_vars()\n"
  },
  {
    "path": "backend/tests/daily/connectors/bitbucket/test_bitbucket_checkpointed.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.bitbucket.connector import BitbucketConnector\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\n@pytest.fixture\ndef bitbucket_connector_for_checkpoint() -> BitbucketConnector:\n    \"\"\"Daily fixture for Bitbucket checkpointed indexing.\n\n    Env vars:\n    - BITBUCKET_EMAIL: Bitbucket account email\n    - BITBUCKET_API_TOKEN: Bitbucket app password/token\n    - BITBUCKET_WORKSPACE: workspace id\n    - BITBUCKET_REPOSITORIES: comma-separated slugs\n    - BITBUCKET_PROJECTS: optional comma-separated project keys\n    \"\"\"\n    workspace = os.environ[\"BITBUCKET_WORKSPACE\"]\n    repositories = os.environ.get(\"BITBUCKET_REPOSITORIES\")\n    projects = os.environ.get(\"BITBUCKET_PROJECTS\")\n\n    connector = BitbucketConnector(\n        workspace=workspace,\n        repositories=repositories,\n        projects=projects,\n        batch_size=10,\n    )\n\n    email = os.environ.get(\"BITBUCKET_EMAIL\")\n    token = os.environ.get(\"BITBUCKET_API_TOKEN\")\n    if not email or not token:\n        pytest.skip(\"BITBUCKET_EMAIL or BITBUCKET_API_TOKEN not set in environment\")\n\n    connector.load_credentials({\"bitbucket_email\": email, \"bitbucket_api_token\": token})\n    return connector\n\n\ndef test_bitbucket_checkpointed_load(\n    bitbucket_connector_for_checkpoint: BitbucketConnector,\n) -> None:\n    # Use a broad window; results may be empty depending on repository state\n    start = 1755004439  # Tue Aug 12 2025 13:13:59 UTC\n    end = time.time()\n\n    docs = load_all_from_connector(\n        connector=bitbucket_connector_for_checkpoint,\n        start=start,\n        end=end,\n    ).documents\n\n    assert isinstance(docs, list)\n\n    for doc in docs:\n        assert doc.source == DocumentSource.BITBUCKET\n        assert doc.metadata is not None\n        assert doc.metadata.get(\"object_type\") == \"PullRequest\"\n        assert \"id\" in doc.metadata\n        assert \"state\" in doc.metadata\n        assert \"title\" in doc.metadata\n        assert \"updated_on\" in doc.metadata\n\n        # Basic section checks\n        assert len(doc.sections) >= 1\n        section = doc.sections[0]\n        assert isinstance(section.link, str)\n        assert isinstance(section.text, str)\n"
  },
  {
    "path": "backend/tests/daily/connectors/bitbucket/test_bitbucket_slim_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.bitbucket.connector import BitbucketConnector\nfrom onyx.connectors.models import HierarchyNode\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\n@pytest.fixture\ndef bitbucket_connector_for_slim() -> BitbucketConnector:\n    workspace = os.environ[\"BITBUCKET_WORKSPACE\"]\n    repositories = os.environ.get(\"BITBUCKET_REPOSITORIES\")\n    projects = os.environ.get(\"BITBUCKET_PROJECTS\")\n\n    connector = BitbucketConnector(\n        workspace=workspace,\n        repositories=repositories,\n        projects=projects,\n        batch_size=10,\n    )\n\n    email = os.environ.get(\"BITBUCKET_EMAIL\")\n    token = os.environ.get(\"BITBUCKET_API_TOKEN\")\n    if not email or not token:\n        pytest.skip(\"BITBUCKET_EMAIL or BITBUCKET_API_TOKEN not set in environment\")\n\n    connector.load_credentials({\"bitbucket_email\": email, \"bitbucket_api_token\": token})\n    return connector\n\n\ndef test_bitbucket_full_ids_subset_of_slim_ids(\n    bitbucket_connector_for_slim: BitbucketConnector,\n) -> None:\n    # Get all full doc IDs from load_from_state\n    docs = load_all_from_connector(\n        connector=bitbucket_connector_for_slim,\n        start=0,\n        end=time.time(),\n    ).documents\n    all_full_doc_ids: set[str] = set([doc.id for doc in docs])\n\n    # Get all doc IDs from the slim connector\n    all_slim_doc_ids: set[str] = set()\n    for (\n        slim_doc_batch\n    ) in bitbucket_connector_for_slim.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # The set of full doc IDs should always be a subset of slim doc IDs\n    assert all_full_doc_ids.issubset(all_slim_doc_ids)\n    # Make sure we actually got some documents\n    assert len(all_slim_doc_ids) > 0\n\n    # Basic sanity checks if any docs exist\n    if all_slim_doc_ids:\n        example_id = next(iter(all_slim_doc_ids))\n        assert example_id.startswith(f\"{DocumentSource.BITBUCKET.value}:\")\n"
  },
  {
    "path": "backend/tests/daily/connectors/blob/test_blob_connector.py",
    "content": "import os\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom urllib.parse import parse_qs\nfrom urllib.parse import unquote\nfrom urllib.parse import urlparse\n\nimport pytest\n\nfrom onyx.configs.constants import BlobType\nfrom onyx.connectors.blob.connector import BlobStorageConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.file_processing.extract_file_text import get_file_ext\nfrom onyx.file_processing.file_types import OnyxFileExtensions\n\n\n@pytest.fixture\ndef blob_connector(request: pytest.FixtureRequest) -> BlobStorageConnector:\n    \"\"\"Fixture requires (BlobType, bucket_name) and optional init kwargs.\n\n    Param format: (BlobType, bucket_name, {optional init kwargs})\n    - The 3rd element is optional and, if provided, must be a dict.\n    - Extra kwargs are passed to BlobStorageConnector.__init__.\n\n    Example:\n      @pytest.mark.parametrize(\n          \"blob_connector\",\n          [(BlobType.S3, \"my-bucket\"), (BlobType.S3, \"my-bucket\", {\"prefix\": \"foo/\"})],\n          indirect=True,\n      )\n    \"\"\"\n    try:\n        bucket_type, bucket_name, *rest = request.param\n    except Exception as e:\n        raise AssertionError(\n            \"blob_connector requires (BlobType, bucket_name, [init_kwargs])\"\n        ) from e\n\n    init_kwargs = rest[0] if rest else {}\n    if rest and not isinstance(init_kwargs, dict):\n        raise AssertionError(\"init_kwargs must be a dict if provided\")\n\n    if not isinstance(bucket_type, BlobType):\n        bucket_type = BlobType(bucket_type)\n\n    connector = BlobStorageConnector(\n        bucket_type=bucket_type, bucket_name=bucket_name, **init_kwargs\n    )\n\n    if bucket_type == BlobType.S3:\n        creds = {\n            \"aws_access_key_id\": os.environ[\"AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS\"],\n            \"aws_secret_access_key\": os.environ[\n                \"AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS\"\n            ],\n        }\n    elif bucket_type == BlobType.R2:\n        creds = {\n            \"account_id\": os.environ[\"R2_ACCOUNT_ID_DAILY_CONNECTOR_TESTS\"],\n            \"r2_access_key_id\": os.environ[\"R2_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS\"],\n            \"r2_secret_access_key\": os.environ[\n                \"R2_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS\"\n            ],\n        }\n    elif bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:\n        creds = {\n            \"access_key_id\": os.environ[\"GCS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS\"],\n            \"secret_access_key\": os.environ[\n                \"GCS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS\"\n            ],\n        }\n    else:\n        # Until we figure out the Oracle log in, this fixture only supports S3, R2, and GCS.\n        raise AssertionError(f\"Unsupported bucket type: {bucket_type}\")\n\n    connector.load_credentials(creds)\n    return connector\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@pytest.mark.parametrize(\n    \"blob_connector\", [(BlobType.S3, \"onyx-connector-tests\")], indirect=True\n)\ndef test_blob_s3_connector(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    blob_connector: BlobStorageConnector,\n) -> None:\n    \"\"\"\n    Plain and document file types should be fully indexed.\n\n    Multimedia and unknown file types will be indexed be skipped unless `set_allow_images`\n    is called with `True`.\n\n    This is intentional in order to allow searching by just the title even if we can't\n    index the file content.\n    \"\"\"\n    all_docs: list[Document] = []\n    document_batches = blob_connector.load_from_state()\n    for doc_batch in document_batches:\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n\n    assert len(all_docs) == 15\n\n    for doc in all_docs:\n        section = doc.sections[0]\n        assert isinstance(section, TextSection)\n\n        file_extension = get_file_ext(doc.semantic_identifier)\n        if file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:\n            assert len(section.text) > 0\n            continue\n\n        # unknown extension\n        assert len(section.text) == 0\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@pytest.mark.parametrize(\n    \"blob_connector\", [(BlobType.S3, \"s3-role-connector-test\")], indirect=True\n)\ndef test_blob_s3_cross_region_and_citation_link(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    blob_connector: BlobStorageConnector,\n) -> None:\n    \"\"\"Buckets in a different region should be accessible and links should reflect the correct region.\n\n    Validates that using the same credentials we can access a bucket in a\n    different AWS region and that the generated object URL includes the bucket's\n    region and is a valid S3 dashboard URL.\n    \"\"\"\n\n    assert blob_connector.bucket_region == \"ap-south-1\"\n\n    # Load documents and validate the single object + its link\n    all_docs: list[Document] = []\n    for doc_batch in blob_connector.load_from_state():\n        all_docs.extend(\n            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # The test bucket contains exactly one object named \"Chapter 6.pdf\"\n    assert len(all_docs) == 1\n    doc = all_docs[0]\n    assert doc.semantic_identifier == \"Chapter 6.pdf\"\n\n    # Validate link\n    assert len(doc.sections) >= 1\n    link = doc.sections[0].link\n    assert link is not None and isinstance(link, str) and len(link) > 0\n\n    parsed = urlparse(link)\n    # Expect the link to be the AWS S3 console object URL\n    assert parsed.netloc == \"s3.console.aws.amazon.com\"\n    assert parsed.path == \"/s3/object/s3-role-connector-test\"\n\n    # Query should include region and prefix\n    query = parse_qs(parsed.query)\n    assert query.get(\"region\") == [\"ap-south-1\"]\n    assert \"prefix\" in query and len(query[\"prefix\"]) == 1\n    prefix_val = query[\"prefix\"][0]\n    # The prefix (object key) should decode to the filename\n    decoded_prefix = unquote(prefix_val)\n    assert decoded_prefix == \"Chapter 6.pdf\" or decoded_prefix.endswith(\n        \"/Chapter 6.pdf\"\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@pytest.mark.parametrize(\n    \"blob_connector\", [(BlobType.R2, \"asia-pacific-bucket\")], indirect=True\n)\ndef test_blob_r2_connector(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    blob_connector: BlobStorageConnector,\n) -> None:\n    \"\"\"Validate basic R2 connector creation and document loading\"\"\"\n\n    all_docs: list[Document] = []\n    for doc_batch in blob_connector.load_from_state():\n        all_docs.extend(\n            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    assert len(all_docs) >= 1\n    doc = all_docs[0]\n    assert len(doc.sections) >= 1\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@pytest.mark.parametrize(\n    \"blob_connector\",\n    [(BlobType.R2, \"onyx-daily-connector-tests\", {\"european_residency\": True})],\n    indirect=True,\n)\ndef test_blob_r2_eu_residency_connector(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    blob_connector: BlobStorageConnector,\n) -> None:\n    \"\"\"Validate R2 connector with European residency setting\"\"\"\n\n    all_docs: list[Document] = []\n    for doc_batch in blob_connector.load_from_state():\n        all_docs.extend(\n            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    assert len(all_docs) >= 1\n    doc = all_docs[0]\n    assert len(doc.sections) >= 1\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@pytest.mark.parametrize(\n    \"blob_connector\", [(BlobType.GOOGLE_CLOUD_STORAGE, \"onyx-test-1\")], indirect=True\n)\ndef test_blob_gcs_connector(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    blob_connector: BlobStorageConnector,\n) -> None:\n    all_docs: list[Document] = []\n    for doc_batch in blob_connector.load_from_state():\n        all_docs.extend(\n            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # At least one object from the test bucket\n    assert len(all_docs) >= 1\n    doc = all_docs[0]\n    assert len(doc.sections) >= 1\n"
  },
  {
    "path": "backend/tests/daily/connectors/coda/README.md",
    "content": "# Coda Connector Test Suite\n\n## Overview\n\nThe `test_coda_connector.py` file contains comprehensive integration tests for the Coda connector. These tests validate that the connector properly:\n- Authenticates with the Coda API\n- Retrieves documents, pages, and tables\n- Generates properly structured Onyx `Document` objects\n- Handles batch processing correctly\n- Supports workspace scoping\n- Polls for recent updates\n- Handles error cases gracefully\n\n## Prerequisites\n\n1. **Coda API Access**: You need a valid Coda account with at least one workspace containing documents, pages, or tables\n2. **Coda Bearer Token**: Generate an API token from your Coda account settings\n3. **Python Environment**: Backend dependencies installed (see backend/requirements)\n4. **Test Data**: Ideally, your Coda workspace should have:\n   - At least one document\n   - At least one page within a document\n   - At least one table within a document\n\n## Environment Variables\n\nThe test suite requires the following environment variables:\n\n### Required\n- **`CODA_BEARER_TOKEN`**: Your Coda API bearer token\n  - Get this from: Coda Account Settings → API Settings → Generate API Token\n  - Without this, tests will be skipped\n\n### Optional\n- **`CODA_BASE_URL`**: The Coda API base URL\n  - Default: `https://coda.io/apis/v1`\n  - Only override if using a different API endpoint\n\n- **`CODA_WORKSPACE_ID`**: A specific workspace ID to test workspace scoping\n  - If not provided, workspace-scoped tests will be skipped\n  - Find this by inspecting the Coda API response or your workspace URL\n\n## Running the Tests\n\n### Method 1: Run All Tests in the File\n\nFrom the `backend/` directory:\n\n```bash\n# Set environment variables and run all tests\nexport CODA_BEARER_TOKEN=\"your_token_here\"\npytest -v -s tests/daily/connectors/coda/test_coda_connector.py\n```\n\n### Method 2: Run a Specific Test Class\n\n```bash\n# Run only validation tests\nexport CODA_BEARER_TOKEN=\"your_token_here\"\npytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestCodaConnectorValidation\n\n# Run only load_from_state tests\npytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestLoadFromState\n```\n\n### Method 3: Run a Single Test\n\n```bash\n# Run a specific test function\nexport CODA_BEARER_TOKEN=\"your_token_here\"\npytest -v -s tests/daily/connectors/coda/test_coda_connector.py::TestLoadFromState::test_document_count_matches_expected\n```\n\n### Method 4: Using an Environment File\n\nCreate a `.env` file in `backend/tests/daily/connectors/coda/`:\n\n```bash\n# .env\nCODA_BEARER_TOKEN=your_token_here\nCODA_WORKSPACE_ID=your_workspace_id  # Optional\n```\n\nThen run with dotenv:\n\n```bash\ncd backend\npython -m dotenv -f tests/daily/connectors/coda/.env run -- pytest -v -s tests/daily/connectors/coda/test_coda_connector.py\n```\n\n### Method 5: Direct Execution\n\nThe test file can be run directly:\n\n```bash\nexport CODA_BEARER_TOKEN=\"your_token_here\"\ncd backend/tests/daily/connectors/coda\npython test_coda_connector.py\n```\n\n## Test Structure\n\n### Test Classes\n\n1. **`TestCodaConnectorValidation`**\n   - Validates connector settings and credentials\n   - Tests authentication success and failure cases\n   - Tests workspace-scoped connector validation\n\n2. **`TestLoadFromState`**\n   - Tests full document retrieval via `load_from_state()`\n   - Validates batch sizes, document counts, and structure\n   - Checks document fields, metadata, and content\n   - Verifies both page and table document generation\n   - Tests the `index_page_content` configuration flag\n\n3. **`TestPollSource`**\n   - Tests incremental updates via `poll_source()`\n   - Validates time-range filtering\n   - Checks that only updated documents are returned\n\n4. **`TestWorkspaceScoping`**\n   - Tests the `workspace_id` filtering functionality\n   - Validates that scoped connectors only retrieve documents from the specified workspace\n\n5. **`TestErrorHandling`**\n   - Tests graceful handling of edge cases\n   - Validates behavior with inaccessible content or empty tables\n\n## Common Test Patterns\n\n### Fixtures\n\nThe test suite uses pytest fixtures for setup:\n\n- **`coda_credentials`**: Loads and validates credentials from environment variables\n- **`connector`**: Creates a standard CodaConnector instance\n- **`workspace_scoped_connector`**: Creates a workspace-scoped connector (if `CODA_WORKSPACE_ID` is set)\n- **`reference_data`**: Fetches ground truth data from the Coda API for validation\n\n### Skipped Tests\n\nTests are automatically skipped when:\n- `CODA_BEARER_TOKEN` is not set\n- `CODA_WORKSPACE_ID` is not set (for workspace-scoped tests)\n- No documents, pages, or tables are found in the workspace\n\n## Troubleshooting\n\n### Tests are Skipped\n\n**Issue**: Tests show as \"SKIPPED\" instead of running\n\n**Solutions**:\n- Ensure `CODA_BEARER_TOKEN` is set and valid\n- Verify your Coda workspace has at least one document with pages or tables\n- For workspace tests, ensure `CODA_WORKSPACE_ID` is set\n\n### Authentication Errors\n\n**Issue**: Tests fail with authentication errors\n\n**Solutions**:\n- Verify your bearer token is valid and hasn't expired\n- Check that the token has appropriate API permissions\n- Ensure you're not hitting API rate limits\n\n### Document Count Mismatches\n\n**Issue**: Tests fail with \"Expected X documents but got Y\"\n\n**Possible Causes**:\n- API rate limiting causing partial data retrieval\n- Network issues during test execution\n- Changes to workspace data during test execution\n- Permission issues preventing access to some documents\n\n### Empty Content Errors\n\n**Issue**: Tests fail due to empty document content\n\n**Possible Causes**:\n- Pages without accessible content (permission issues)\n- Empty tables or pages in your workspace\n- The `index_page_content` flag set incorrectly\n\n## Test Execution Tips\n\n1. **Run tests during low-traffic times**: API rate limits may affect test reliability\n2. **Use a dedicated test workspace**: Avoid running tests on production workspaces with changing data\n3. **Check test output verbosity**: Use `-v` for verbose test names, `-s` to see print statements\n4. **Isolate failing tests**: Run specific test classes or functions to debug issues\n5. **Review fixture output**: The `reference_data` fixture prints warnings about API access issues\n\n## CI/CD Integration\n\nWhen integrating these tests into CI/CD pipelines:\n\n```yaml\n# Example GitHub Actions configuration\n- name: Run Coda Connector Tests\n  env:\n    CODA_BEARER_TOKEN: ${{ secrets.CODA_BEARER_TOKEN }}\n    CODA_WORKSPACE_ID: ${{ secrets.CODA_WORKSPACE_ID }}\n  run: |\n    cd backend\n    pytest -v tests/daily/connectors/coda/test_coda_connector.py\n```\n\nStore credentials as encrypted secrets in your CI/CD platform.\n\n## Expected Test Duration\n\n- Full test suite: ~30-60 seconds (depending on workspace size and API latency)\n- Individual test classes: ~5-15 seconds\n- Validation tests: <5 seconds\n\n## Additional Resources\n\n- [Coda API Documentation](https://coda.io/developers/apis/v1)\n- [Onyx Connector Documentation](../../../../onyx/connectors/README.md)\n- [pytest Documentation](https://docs.pytest.org/)\n"
  },
  {
    "path": "backend/tests/daily/connectors/coda/test_coda_connector.py",
    "content": "import os\nimport time\nfrom collections.abc import Generator\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.coda.connector import CodaConnector\nfrom onyx.connectors.exceptions import CredentialInvalidError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\n\n\ndef connector_doc_generator(\n    connector: CodaConnector,\n) -> Generator[Document, None, None]:\n    for batch in connector.load_from_state():\n        for doc in batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            yield doc\n\n\n@pytest.fixture\ndef coda_credentials() -> dict[str, str]:\n    \"\"\"Fixture to get and validate Coda credentials.\"\"\"\n    bearer_token = os.environ.get(\"CODA_BEARER_TOKEN\")\n\n    if not bearer_token:\n        pytest.skip(\"CODA_BEARER_TOKEN not set\")\n\n    return {\n        \"coda_bearer_token\": bearer_token,\n    }\n\n\n@pytest.fixture\ndef connector(coda_credentials: dict[str, str]) -> CodaConnector:\n    \"\"\"Fixture to create and authenticate connector.\"\"\"\n    conn = CodaConnector(batch_size=5, index_page_content=True)\n    conn.load_credentials(coda_credentials)\n    return conn\n\n\n@pytest.fixture\ndef workspace_scoped_connector(coda_credentials: dict[str, str]) -> CodaConnector:\n    \"\"\"Fixture to create connector scoped to a specific workspace (if CODA_WORKSPACE_ID is set).\"\"\"\n    workspace_id = os.environ.get(\"CODA_WORKSPACE_ID\")\n    if not workspace_id:\n        pytest.skip(\"CODA_WORKSPACE_ID not set - skipping workspace-scoped tests\")\n\n    conn = CodaConnector(\n        batch_size=5, index_page_content=True, workspace_id=workspace_id\n    )\n    conn.load_credentials(coda_credentials)\n    return conn\n\n\n@pytest.fixture\ndef reference_data(connector: CodaConnector) -> dict[str, Any]:\n    \"\"\"Fixture to fetch reference data from API for validation.\"\"\"\n    all_docs = connector._list_all_docs()\n\n    if not all_docs:\n        pytest.skip(\"No docs found in Coda workspace\")\n\n    expected_page_count = 0\n    expected_table_count = 0\n    pages_by_doc = {}\n    tables_by_doc = {}\n\n    for doc in all_docs:\n        doc_id = doc.id\n\n        try:\n            pages = connector._list_pages_in_doc(doc_id)\n            pages_by_doc[doc_id] = pages\n            expected_page_count += len(pages)\n        except Exception as e:\n            print(f\"Warning: Could not fetch pages for doc {doc_id}: {e}\")\n            pages_by_doc[doc_id] = []\n\n        try:\n            tables = connector._list_tables(doc_id)\n            tables_by_doc[doc_id] = tables\n            expected_table_count += len(tables)\n        except Exception as e:\n            print(f\"Warning: Could not fetch tables for doc {doc_id}: {e}\")\n            tables_by_doc[doc_id] = []\n\n    total_expected_documents = expected_page_count + expected_table_count\n\n    if total_expected_documents == 0:\n        pytest.skip(\"No pages or tables found in Coda workspace\")\n\n    return {\n        \"docs\": all_docs,\n        \"total_pages\": expected_page_count,\n        \"total_tables\": expected_table_count,\n        \"total_documents\": total_expected_documents,\n        \"pages_by_doc\": pages_by_doc,\n        \"tables_by_doc\": tables_by_doc,\n    }\n\n\nclass TestCodaConnectorValidation:\n    \"\"\"Test suite for connector validation and credential handling.\"\"\"\n\n    def test_validate_connector_settings_success(\n        self, connector: CodaConnector\n    ) -> None:\n        \"\"\"Test that validate_connector_settings succeeds with valid credentials.\"\"\"\n        # Should not raise any exceptions\n        connector.validate_connector_settings()\n\n    def test_validate_workspace_scoped_connector(\n        self, workspace_scoped_connector: CodaConnector\n    ) -> None:\n        \"\"\"Test that workspace-scoped connector validates successfully.\"\"\"\n        workspace_scoped_connector.validate_connector_settings()\n\n    def test_load_credentials_invalid_token(self) -> None:\n        \"\"\"Test that invalid credentials are rejected.\"\"\"\n        conn = CodaConnector()\n\n        with pytest.raises(CredentialInvalidError):\n            conn.load_credentials(\n                {\n                    \"coda_bearer_token\": \"invalid_token_12345\",\n                }\n            )\n\n\nclass TestLoadFromState:\n    \"\"\"Test suite for load_from_state functionality.\"\"\"\n\n    def test_returns_generator(self, connector: CodaConnector) -> None:\n        \"\"\"Test that load_from_state returns a generator.\"\"\"\n        gen = connector.load_from_state()\n        assert isinstance(gen, Generator), \"load_from_state should return a Generator\"\n\n    def test_batch_sizes_respect_config(\n        self,\n        connector: CodaConnector,\n        reference_data: dict[str, Any],  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that batches respect the configured batch_size.\"\"\"\n        batch_size = connector.batch_size\n        gen = connector.load_from_state()\n\n        batch_sizes = []\n        for batch in gen:\n            batch_sizes.append(len(batch))\n            assert (\n                len(batch) <= batch_size\n            ), f\"Batch size {len(batch)} exceeds configured {batch_size}\"\n\n        for i, size in enumerate(batch_sizes[:-1]):\n            assert (\n                size == batch_size\n            ), f\"Non-final batch {i} has size {size}, expected {batch_size}\"\n\n        # Last batch may be smaller or equal\n        if batch_sizes:\n            assert batch_sizes[-1] <= batch_size\n\n    def test_document_count_matches_expected(\n        self, connector: CodaConnector, reference_data: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that total documents match expected pages + tables.\"\"\"\n        gen = connector.load_from_state()\n\n        total_documents = sum(len(batch) for batch in gen)\n        expected_count = reference_data[\"total_documents\"]\n\n        assert total_documents == expected_count, (\n            f\"Expected {expected_count} documents \"\n            f\"({reference_data['total_pages']} pages + \"\n            f\"{reference_data['total_tables']} tables) \"\n            f\"but got {total_documents}\"\n        )\n\n    def test_document_required_fields(\n        self,\n        connector: CodaConnector,\n        reference_data: dict[str, Any],  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that all documents have required fields with valid values.\"\"\"\n        gen = connector.load_from_state()\n\n        for batch in gen:\n            for doc in batch:\n                assert isinstance(doc, Document)\n\n                assert doc.id is not None, \"Document ID should not be None\"\n                assert doc.id.startswith(\n                    \"coda-\"\n                ), \"Document ID should start with 'coda-'\"\n                assert (\n                    doc.source == DocumentSource.CODA\n                ), \"Document source should be CODA\"\n                assert (\n                    doc.semantic_identifier is not None\n                ), \"Semantic identifier should not be None\"\n                assert (\n                    doc.doc_updated_at is not None\n                ), \"doc_updated_at should not be None\"\n\n                assert (\n                    len(doc.sections) > 0\n                ), \"Document should have at least one section\"\n                for section in doc.sections:\n                    assert section.text is not None, \"Section text should not be None\"\n                    assert len(section.text) > 0, \"Section text should not be empty\"\n                    assert section.link is not None, \"Section link should not be None\"\n                    assert section.link.startswith(\n                        \"https://\"\n                    ), \"Section link should be a valid URL\"\n\n                assert \"doc_id\" in doc.metadata, \"Metadata should contain doc_id\"\n                assert (\n                    \"browser_link\" in doc.metadata\n                ), \"Metadata should contain browser_link\"\n\n    def test_document_types(\n        self, connector: CodaConnector, reference_data: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that both page and table documents are generated correctly.\"\"\"\n        page_docs = []\n        table_docs = []\n\n        for doc in connector_doc_generator(connector):\n            if \"coda-page-\" in doc.id:\n                page_docs.append(doc)\n                assert \"content_type\" in doc.metadata\n            elif \"coda-table-\" in doc.id:\n                table_docs.append(doc)\n                assert \"row_count\" in doc.metadata\n\n        # Verify we found both types (if both exist in the workspace)\n        if reference_data[\"total_pages\"] > 0:\n            assert len(page_docs) > 0, \"Should have found page documents\"\n\n        if reference_data[\"total_tables\"] > 0:\n            assert len(table_docs) > 0, \"Should have found table documents\"\n\n        # Verify counts match\n        assert (\n            len(page_docs) == reference_data[\"total_pages\"]\n        ), f\"Expected {reference_data['total_pages']} page documents, got {len(page_docs)}\"\n        assert (\n            len(table_docs) == reference_data[\"total_tables\"]\n        ), f\"Expected {reference_data['total_tables']} table documents, got {len(table_docs)}\"\n\n    def test_no_duplicate_documents(\n        self,\n        connector: CodaConnector,\n        reference_data: dict[str, Any],  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that no documents are yielded twice.\"\"\"\n        document_ids = []\n        for doc in connector_doc_generator(connector):\n            document_ids.append(doc.id)\n\n        unique_ids = set(document_ids)\n        assert len(document_ids) == len(\n            unique_ids\n        ), f\"Found {len(document_ids) - len(unique_ids)} duplicate documents\"\n\n    def test_all_docs_processed(\n        self, connector: CodaConnector, reference_data: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that content from all docs are included.\"\"\"\n        processed_doc_ids = set()\n        for doc in connector_doc_generator(connector):\n            doc_id = doc.metadata.get(\"doc_id\")\n            processed_doc_ids.add(doc_id)\n\n        expected_doc_ids = {doc.id for doc in reference_data[\"docs\"]}\n\n        expected_doc_ids_with_content = {\n            doc_id\n            for doc_id in expected_doc_ids\n            if len(reference_data[\"pages_by_doc\"].get(doc_id, [])) > 0\n            or len(reference_data[\"tables_by_doc\"].get(doc_id, [])) > 0\n        }\n\n        assert (\n            processed_doc_ids == expected_doc_ids_with_content\n        ), f\"Not all docs with content were processed. Expected {expected_doc_ids_with_content}, got {processed_doc_ids}\"\n\n    def test_document_content_not_empty(\n        self,\n        connector: CodaConnector,\n        reference_data: dict[str, Any],  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that all documents have meaningful content.\"\"\"\n        for doc in connector_doc_generator(connector):\n            assert doc.semantic_identifier, \"Semantic identifier should not be empty\"\n            assert (\n                len(doc.semantic_identifier) > 0\n            ), \"Semantic identifier should have content\"\n\n            total_text_length = sum(len(section.text or \"\") for section in doc.sections)\n            assert total_text_length > 0, f\"Document {doc.id} has no content\"\n\n    def test_page_content_indexing(self, coda_credentials: dict[str, str]) -> None:\n        \"\"\"Test that index_page_content flag works correctly.\"\"\"\n        # page indexing disabled\n        conn_no_content = CodaConnector(batch_size=5, index_page_content=False)\n        conn_no_content.load_credentials(coda_credentials)\n\n        # page indexing enabled\n        conn_with_content = CodaConnector(batch_size=5, index_page_content=True)\n        conn_with_content.load_credentials(coda_credentials)\n\n        docs_no_content = []\n        for batch in conn_no_content.load_from_state():\n            for doc in batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                if \"coda-page-\" in doc.id:\n                    docs_no_content.append(doc)\n                    break\n            if docs_no_content:\n                break\n\n        docs_with_content = []\n        for batch in conn_with_content.load_from_state():\n            for doc in batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                if \"coda-page-\" in doc.id:\n                    docs_with_content.append(doc)\n                    break\n            if docs_with_content:\n                break\n\n        if docs_no_content and docs_with_content:\n            no_content_length = sum(\n                len(s.text or \"\") for s in docs_no_content[0].sections\n            )\n            with_content_length = sum(\n                len(s.text or \"\") for s in docs_with_content[0].sections\n            )\n\n            assert (\n                with_content_length >= no_content_length\n            ), \"Content-indexed page should have at least as much text as non-indexed\"\n\n\nclass TestPollSource:\n    \"\"\"Test suite for poll_source functionality.\"\"\"\n\n    def test_poll_source_returns_generator(self, connector: CodaConnector) -> None:\n        \"\"\"Test that poll_source returns a generator.\"\"\"\n        current_time = time.time()\n        start_time = current_time - 86400  # 24 hours\n\n        gen = connector.poll_source(start_time, current_time)\n        assert isinstance(gen, Generator), \"poll_source should return a Generator\"\n\n    def test_poll_source_recent_updates(self, connector: CodaConnector) -> None:\n        \"\"\"Test polling for recently updated documents.\"\"\"\n        current_time = time.time()\n        start_time = current_time - (86400 * 30)\n\n        gen = connector.poll_source(start_time, current_time)\n\n        documents = []\n        for batch in gen:\n            documents.extend(batch)\n\n        # All returned documents should be updated within the time range\n        for doc in documents:\n            if isinstance(doc, HierarchyNode):\n                continue\n            assert doc.doc_updated_at is not None, \"doc_updated_at should not be None\"\n            doc_timestamp = doc.doc_updated_at.timestamp()\n            assert (\n                start_time < doc_timestamp <= current_time\n            ), f\"Document {doc.id} updated at {doc_timestamp} is outside range [{start_time}, {current_time}]\"\n\n    def test_poll_source_no_updates_in_range(self, connector: CodaConnector) -> None:\n        \"\"\"Test polling with a time range that has no updates.\"\"\"\n        end_time = time.time() - (86400 * 365)  # 1 year ago\n        start_time = end_time - 86400  # 1 day before that\n\n        gen = connector.poll_source(start_time, end_time)\n\n        documents = []\n        for batch in gen:\n            documents.extend(batch)\n\n        # Should return no documents (unless workspace is very old)\n        print(f\"Found {len(documents)} documents updated over a year ago\")\n        assert len(documents) == 0\n\n    def test_poll_source_batch_sizes(self, connector: CodaConnector) -> None:\n        \"\"\"Test that poll_source respects batch sizes.\"\"\"\n        current_time = time.time()\n        start_time = current_time - (86400 * 30)\n\n        batch_size = connector.batch_size\n        gen = connector.poll_source(start_time, current_time)\n\n        for batch in gen:\n            assert (\n                len(batch) <= batch_size\n            ), f\"Batch size {len(batch)} exceeds configured {batch_size}\"\n\n\nclass TestWorkspaceScoping:\n    \"\"\"Test suite for workspace_id scoping functionality.\"\"\"\n\n    def test_workspace_scoped_loads_subset(\n        self,\n        connector: CodaConnector,\n        workspace_scoped_connector: CodaConnector,\n        reference_data: dict[str, Any],  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that workspace-scoped connector loads a subset of documents.\"\"\"\n        all_docs = []\n        for batch in connector.load_from_state():\n            all_docs.extend(batch)\n\n        scoped_docs = []\n        for batch in workspace_scoped_connector.load_from_state():\n            scoped_docs.extend(batch)\n\n        # Scoped should be <= all docs\n        assert len(scoped_docs) <= len(\n            all_docs\n        ), \"Workspace-scoped connector should return same or fewer documents\"\n\n        workspace_id = workspace_scoped_connector.workspace_id\n        for doc in scoped_docs:\n            if isinstance(doc, HierarchyNode):\n                continue\n            doc_id = doc.metadata.get(\"doc_id\")\n            assert isinstance(doc_id, str), \"doc_id should be a string\"\n            coda_doc = workspace_scoped_connector._get_doc(doc_id)\n            assert (\n                coda_doc.workspace_id == workspace_id\n            ), f\"Document {doc_id} has workspace {coda_doc.workspace_id}, expected {workspace_id}\"\n\n\nclass TestErrorHandling:\n    \"\"\"Test suite for error handling and edge cases.\"\"\"\n\n    def test_handles_missing_page_content_gracefully(\n        self, connector: CodaConnector\n    ) -> None:\n        \"\"\"Test that connector handles pages without accessible content.\"\"\"\n        gen = connector.load_from_state()\n\n        documents = []\n        for batch in gen:\n            documents.extend(batch)\n\n        assert (\n            len(documents) > 0\n        ), \"Should yield documents even if some content is inaccessible\"\n\n    def test_handles_empty_tables_gracefully(self, connector: CodaConnector) -> None:\n        \"\"\"Test that connector handles tables with no rows.\"\"\"\n        for doc in connector_doc_generator(connector):\n            if \"coda-table-\" in doc.id:\n                assert len(doc.sections) > 0, \"Empty table should still have a section\"\n                if doc.metadata.get(\"row_count\") == \"0\":\n                    assert (\n                        len(doc.sections) == 1\n                    ), \"Empty table should have exactly one section\"\n\n\nif __name__ == \"__main__\":\n    pytest.main([__file__, \"-v\", \"-s\"])\n"
  },
  {
    "path": "backend/tests/daily/connectors/confluence/models.py",
    "content": "from pydantic import BaseModel\n\nfrom ee.onyx.db.external_perm import ExternalUserGroup\n\n\nclass ExternalUserGroupSet(BaseModel):\n    \"\"\"A version of ExternalUserGroup that uses a set for user_emails to avoid order-dependent comparisons.\"\"\"\n\n    id: str\n    user_emails: set[str]\n    gives_anyone_access: bool\n\n    @classmethod\n    def from_model(\n        cls, external_user_group: ExternalUserGroup\n    ) -> \"ExternalUserGroupSet\":\n        \"\"\"Convert from ExternalUserGroup to ExternalUserGroupSet.\"\"\"\n        return cls(\n            id=external_user_group.id,\n            user_emails=set(external_user_group.user_emails),\n            gives_anyone_access=external_user_group.gives_anyone_access,\n        )\n"
  },
  {
    "path": "backend/tests/daily/connectors/confluence/test_confluence_basic.py",
    "content": "import os\nimport time\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.connector import ConfluenceConnector\nfrom onyx.connectors.confluence.utils import AttachmentProcessingResult\nfrom onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\nfrom onyx.connectors.models import Document\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\ndef _make_connector(\n    space: str, access_token: str, scoped_token: bool = False\n) -> ConfluenceConnector:\n    connector = ConfluenceConnector(\n        wiki_base=os.environ[\"CONFLUENCE_TEST_SPACE_URL\"],\n        space=space,\n        is_cloud=os.environ.get(\"CONFLUENCE_IS_CLOUD\", \"true\").lower() == \"true\",\n        page_id=os.environ.get(\"CONFLUENCE_TEST_PAGE_ID\", \"\"),\n        scoped_token=scoped_token,\n    )\n\n    credentials_provider = OnyxStaticCredentialsProvider(\n        None,\n        DocumentSource.CONFLUENCE,\n        {\n            \"confluence_username\": os.environ[\"CONFLUENCE_USER_NAME\"],\n            \"confluence_access_token\": access_token,\n        },\n    )\n    connector.set_credentials_provider(credentials_provider)\n    return connector\n\n\n@pytest.fixture\ndef confluence_connector(space: str) -> ConfluenceConnector:\n    return _make_connector(space, os.environ[\"CONFLUENCE_ACCESS_TOKEN\"].strip())\n\n\n@pytest.fixture\ndef confluence_connector_scoped(space: str) -> ConfluenceConnector:\n    return _make_connector(\n        space, os.environ[\"CONFLUENCE_ACCESS_TOKEN_SCOPED\"].strip(), scoped_token=True\n    )\n\n\n@pytest.mark.parametrize(\"space\", [os.getenv(\"CONFLUENCE_TEST_SPACE\") or \"DailyConne\"])\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_confluence_connector_basic(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    confluence_connector: ConfluenceConnector,\n) -> None:\n    _test_confluence_connector_basic(confluence_connector)\n\n\n@pytest.mark.parametrize(\"space\", [os.getenv(\"CONFLUENCE_TEST_SPACE\") or \"DailyConne\"])\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_confluence_connector_basic_scoped(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    confluence_connector_scoped: ConfluenceConnector,\n) -> None:\n    _test_confluence_connector_basic(\n        confluence_connector_scoped, expect_attachments=True\n    )\n\n\ndef _test_confluence_connector_basic(\n    confluence_connector: ConfluenceConnector, expect_attachments: bool = True\n) -> None:\n    confluence_connector.set_allow_images(False)\n    result = load_all_from_connector(confluence_connector, 0, time.time())\n    doc_batch = result.documents\n    hierarchy_nodes = result.hierarchy_nodes\n\n    assert len(doc_batch) == (3 if expect_attachments else 2)\n\n    # Hierarchy structure:\n    # - Space \"DailyConne\" (root)\n    #   - Page \"DailyConnectorTestSpace Home\" (has attachments, so becomes hierarchy node)\n    #     - Attachment \"small-file.txt\"\n    #   - Page \"Page Within A Page\" (no children/attachments, not a hierarchy node)\n    expected_hierarchy_count = 2 if expect_attachments else 1\n    assert len(hierarchy_nodes) == expected_hierarchy_count, (\n        f\"Expected {expected_hierarchy_count} hierarchy nodes but got {len(hierarchy_nodes)}. \"\n        f\"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}\"\n    )\n\n    # Verify hierarchy node structure\n    space_node = next(\n        (n for n in hierarchy_nodes if n.node_type.value == \"space\"), None\n    )\n    assert space_node is not None, \"Space hierarchy node not found\"\n    assert space_node.raw_node_id == \"DailyConne\"\n    assert space_node.display_name == \"DailyConnectorTestSpace\"\n    assert space_node.raw_parent_id is None  # Space is root\n\n    if expect_attachments:\n        home_page_node = next(\n            (n for n in hierarchy_nodes if n.node_type.value == \"page\"), None\n        )\n        assert home_page_node is not None, \"Home page hierarchy node not found\"\n        assert home_page_node.display_name == \"DailyConnectorTestSpace Home\"\n        assert home_page_node.raw_parent_id == \"DailyConne\"  # Parent is the space\n\n    page_within_a_page_doc: Document | None = None\n    page_doc: Document | None = None\n    small_file_doc: Document | None = None\n\n    for doc in doc_batch:\n        if doc.semantic_identifier == \"DailyConnectorTestSpace Home\":\n            page_doc = doc\n        elif doc.semantic_identifier == \"Page Within A Page\":\n            page_within_a_page_doc = doc\n        elif doc.semantic_identifier == \"small-file.txt\":\n            small_file_doc = doc\n        else:\n            print(f\"Unexpected doc: {doc.semantic_identifier}\")\n\n    assert page_within_a_page_doc is not None\n    assert page_within_a_page_doc.semantic_identifier == \"Page Within A Page\"\n    assert page_within_a_page_doc.primary_owners\n    assert page_within_a_page_doc.primary_owners[0].email == \"hagen@danswer.ai\"\n    assert (\n        page_within_a_page_doc.id\n        == \"https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page\"\n    )\n    assert len(page_within_a_page_doc.sections) == 1\n\n    page_within_a_page_section = page_within_a_page_doc.sections[0]\n    page_within_a_page_text = \"@Chris Weaver loves cherry pie\"\n    assert page_within_a_page_section.text == page_within_a_page_text\n    assert (\n        page_within_a_page_section.link\n        == \"https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page\"\n    )\n\n    assert page_doc is not None\n    assert page_doc.semantic_identifier == \"DailyConnectorTestSpace Home\"\n    assert (\n        page_doc.id == \"https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview\"\n    )\n    assert page_doc.metadata[\"labels\"] == [\"testlabel\"]\n    assert page_doc.primary_owners\n    assert page_doc.primary_owners[0].email == \"hagen@danswer.ai\"\n    assert (\n        len(page_doc.sections) == 1\n    )  # just page text, attachment text is separate doc\n\n    page_section = page_doc.sections[0]\n    assert (\n        page_section.text\n        == \"test123 \"\n        + page_within_a_page_text\n        + \"\\n<attachment>small-file.txt</attachment>\\n<attachment>big-file.txt</attachment>\"\n    )\n    assert (\n        page_section.link\n        == \"https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview\"\n    )\n\n    if expect_attachments:\n        assert small_file_doc is not None\n        text_attachment_section = small_file_doc.sections[0]\n        assert text_attachment_section.text == \"small\"\n        assert text_attachment_section.link\n        assert text_attachment_section.link.split(\"?\")[0].endswith(\"small-file.txt\")\n\n\n@pytest.mark.parametrize(\"space\", [\"MI\"])\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_confluence_connector_skip_images(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    confluence_connector: ConfluenceConnector,\n) -> None:\n    confluence_connector.set_allow_images(False)\n    result = load_all_from_connector(confluence_connector, 0, time.time())\n    doc_batch = result.documents\n    hierarchy_nodes = result.hierarchy_nodes\n\n    assert len(doc_batch) == 8\n    assert sum(len(doc.sections) for doc in doc_batch) == 8\n\n    # Hierarchy structure for MI space (when images are skipped):\n    # - Space \"MI\" (Many Images)\n    #   - Page \"Many Images\" (home page, has children)\n    #     - Page \"Image formats\" (has children - the image pages)\n    # Note: Image pages themselves don't become hierarchy nodes since images are skipped\n    assert len(hierarchy_nodes) == 3, (\n        f\"Expected 3 hierarchy nodes but got {len(hierarchy_nodes)}. \"\n        f\"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}\"\n    )\n\n\ndef mock_process_image_attachment(\n    *args: Any,  # noqa: ARG001\n    **kwargs: Any,  # noqa: ARG001\n) -> AttachmentProcessingResult:\n    \"\"\"We need this mock to bypass DB access happening in the connector. Which shouldn't\n    be done as a rule to begin with, but life is not perfect. Fix it later\"\"\"\n\n    return AttachmentProcessingResult(\n        text=\"Hi_text\",\n        file_name=\"Hi_filename\",\n        error=None,\n    )\n\n\n@pytest.mark.parametrize(\"space\", [\"MI\"])\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\n@patch(\n    \"onyx.connectors.confluence.utils._process_image_attachment\",\n    side_effect=mock_process_image_attachment,\n)\ndef test_confluence_connector_allow_images(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    mock_process_image_attachment: MagicMock,  # noqa: ARG001\n    confluence_connector: ConfluenceConnector,\n) -> None:\n    confluence_connector.set_allow_images(True)\n\n    result = load_all_from_connector(confluence_connector, 0, time.time())\n    doc_batch = result.documents\n    hierarchy_nodes = result.hierarchy_nodes\n\n    assert len(doc_batch) == 12\n    assert sum(len(doc.sections) for doc in doc_batch) == 12\n\n    # Hierarchy structure for MI space (when images are allowed):\n    # - Space \"MI\" (Many Images)\n    #   - Page \"Many Images\" (home page)\n    #     - Page \"Image formats\" (has children)\n    #     - Page \"Dunder Mifflin Org Chart\" (has image attachments)\n    #     - Page \"List of Joey's Favorite Objects\" (has image attachments)\n    #     - Page \"Content\" (has image attachments)\n    # Pages with image attachments become hierarchy nodes because attachments reference them\n    assert len(hierarchy_nodes) == 6, (\n        f\"Expected 6 hierarchy nodes but got {len(hierarchy_nodes)}. \"\n        f\"Nodes: {[(n.raw_node_id, n.node_type, n.display_name) for n in hierarchy_nodes]}\"\n    )\n"
  },
  {
    "path": "backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py",
    "content": "import os\nimport time\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.connector import ConfluenceConnector\nfrom onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import SortOrder\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\n@pytest.fixture\ndef confluence_connector() -> ConfluenceConnector:\n    connector = ConfluenceConnector(\n        wiki_base=\"https://danswerai.atlassian.net\",\n        is_cloud=True,\n    )\n\n    credentials_provider = OnyxStaticCredentialsProvider(\n        None,\n        DocumentSource.CONFLUENCE,\n        {\n            \"confluence_username\": os.environ[\"CONFLUENCE_USER_NAME\"],\n            \"confluence_access_token\": os.environ[\"CONFLUENCE_ACCESS_TOKEN\"],\n        },\n    )\n    connector.set_credentials_provider(credentials_provider)\n    return connector\n\n\n# This should never fail because even if the docs in the cloud change,\n# the full doc ids retrieved should always be a subset of the slim doc ids\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_confluence_connector_permissions(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    confluence_connector: ConfluenceConnector,\n    enable_ee: None,  # noqa: ARG001\n) -> None:\n    # Get all doc IDs from the full connector\n    all_full_doc_ids = set()\n    result = load_all_from_connector(confluence_connector, 0, time.time())\n    doc_batch = result.documents\n    hierarchy_nodes = result.hierarchy_nodes\n    all_full_doc_ids.update([doc.id for doc in doc_batch])\n\n    # Verify hierarchy nodes are returned and have valid structure\n    # Note: The exact count depends on the current state of the Confluence instance\n    assert len(hierarchy_nodes) > 0, \"Expected at least some hierarchy nodes\"\n\n    # Verify all space nodes have no parent and all page nodes have a parent\n    for node in hierarchy_nodes:\n        if node.node_type.value == \"space\":\n            assert (\n                node.raw_parent_id is None\n            ), f\"Space node {node.raw_node_id} should have no parent\"\n        elif node.node_type.value == \"page\":\n            assert (\n                node.raw_parent_id is not None\n            ), f\"Page node {node.raw_node_id} should have a parent\"\n\n    # Get all doc IDs from the slim connector\n    all_slim_doc_ids = set()\n    for slim_doc_batch in confluence_connector.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # Find IDs that are in full but not in slim\n    difference = all_full_doc_ids - all_slim_doc_ids\n\n    # The set of full doc IDs should be always be a subset of the slim doc IDs\n    assert all_full_doc_ids.issubset(\n        all_slim_doc_ids\n    ), f\"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs.\"\n\n\n@patch(\"ee.onyx.external_permissions.confluence.doc_sync.OnyxDBCredentialsProvider\")\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_confluence_connector_restriction_handling(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    mock_db_provider_class: MagicMock,\n    enable_ee: None,  # noqa: ARG001\n) -> None:\n    # Test space key\n    test_space_key = \"DailyPermS\"\n\n    # Configure the mock provider instance that will be returned\n    mock_provider_instance = MagicMock()\n    mock_provider_instance.get_credentials.return_value = {\n        \"confluence_username\": os.environ[\"CONFLUENCE_USER_NAME\"],\n        \"confluence_access_token\": os.environ[\"CONFLUENCE_ACCESS_TOKEN\"],\n    }\n    # this prevents redis calls inside of OnyxConfluence\n    mock_provider_instance.is_dynamic.return_value = False\n    # Make the class return our configured instance when called\n    mock_db_provider_class.return_value = mock_provider_instance\n\n    # Mock the cc_pair to pass to the function\n    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)\n    # Mock the nested connector attribute and its config\n    mock_cc_pair.connector = MagicMock()\n    mock_cc_pair.connector.connector_specific_config = {\n        \"wiki_base\": \"https://danswerai.atlassian.net\",\n        \"is_cloud\": True,\n        \"space\": test_space_key,\n    }\n    # Set a mock credential ID\n    mock_cc_pair.credential_id = 1\n\n    # Call the confluence_doc_sync function directly with the mock cc_pair\n    def mock_fetch_all_docs_fn(\n        sort_order: SortOrder | None = None,  # noqa: ARG001\n    ) -> list[DocumentRow]:\n        return []\n\n    def mock_fetch_all_docs_ids_fn() -> list[str]:\n        return []\n\n    doc_access_generator = confluence_doc_sync(\n        mock_cc_pair, mock_fetch_all_docs_fn, mock_fetch_all_docs_ids_fn, None\n    )\n    doc_access_list = list(doc_access_generator)\n    assert len(doc_access_list) == 7\n    assert all(\n        not doc_access.external_access.is_public for doc_access in doc_access_list\n    )\n\n    # if no restriction is applied, the groups should give access, so no need\n    # for more emails outside of the owner\n    non_restricted_emails = {\"chris@onyx.app\"}\n    non_restricted_user_groups = {\n        \"confluence-admins-danswerai\",\n        \"org-admins\",\n        \"atlassian-addons-admin\",\n        \"confluence-users-danswerai\",\n    }\n\n    # if restriction is applied, only should be visible to shared users / groups\n    restricted_emails = {\"chris@onyx.app\", \"hagen@danswer.ai\", \"oauth@onyx.app\"}\n    restricted_user_groups = {\"confluence-admins-danswerai\"}\n\n    extra_restricted_emails = {\"chris@onyx.app\", \"oauth@onyx.app\"}\n    extra_restricted_user_groups: set[str] = set()\n\n    # note that this is only allowed since yuhong@onyx.app is a member of the\n    # confluence-admins-danswerai group\n    special_restricted_emails = {\"chris@onyx.app\", \"yuhong@onyx.app\", \"oauth@onyx.app\"}\n    special_restricted_user_groups: set[str] = set()\n\n    # Check Root+Page+2 is public\n    root_page_2 = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Root+Page+2\")\n    )\n    assert root_page_2.external_access.external_user_emails == non_restricted_emails\n    assert (\n        root_page_2.external_access.external_user_group_ids\n        == non_restricted_user_groups\n    )\n\n    # Check Overview page is public\n    overview_page = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.lower().endswith(\"overview\")\n    )\n    assert (\n        overview_page.external_access.external_user_emails == non_restricted_emails\n    ), \"Overview page emails do not match expected values\"\n    assert (\n        overview_page.external_access.external_user_group_ids\n        == non_restricted_user_groups\n    ), \"Overview page groups do not match expected values\"\n\n    # check root page is restricted\n    root_page = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Root+Page\")\n    )\n    assert (\n        root_page.external_access.external_user_emails == restricted_emails\n    ), \"Root page emails do not match expected values\"\n    assert (\n        root_page.external_access.external_user_group_ids == restricted_user_groups\n    ), \"Root page groups do not match expected values\"\n\n    # check child page has restriction propagated\n    child_page = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Child+Page\")\n    )\n    assert (\n        child_page.external_access.external_user_emails == restricted_emails\n    ), \"Child page emails do not match expected values\"\n    assert (\n        child_page.external_access.external_user_group_ids == restricted_user_groups\n    ), \"Child page groups do not match expected values\"\n\n    # check doubly nested child page has restriction propagated\n    child_page_2 = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Child+Page+2\")\n    )\n    assert (\n        child_page_2.external_access.external_user_emails == restricted_emails\n    ), \"Child page 2 emails do not match expected values\"\n    assert (\n        child_page_2.external_access.external_user_group_ids == restricted_user_groups\n    ), \"Child page 2 groups do not match expected values\"\n\n    # check child page w/ specific restrictions have those applied\n    child_page_3 = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Child+Page+3\")\n    )\n    assert (\n        child_page_3.external_access.external_user_emails == extra_restricted_emails\n    ), \"Child page 3 emails do not match expected values\"\n    assert (\n        child_page_3.external_access.external_user_group_ids\n        == extra_restricted_user_groups\n    ), \"Child page 3 groups do not match expected values\"\n\n    # check child page w/ specific restrictions have those applied\n    child_page_4 = next(\n        d\n        for d in doc_access_list\n        if isinstance(d, DocExternalAccess) and d.doc_id.endswith(\"Child+Page+4\")\n    )\n    assert (\n        child_page_4.external_access.external_user_emails == special_restricted_emails\n    ), \"Child page 4 emails do not match expected values\"\n    assert (\n        child_page_4.external_access.external_user_group_ids\n        == special_restricted_user_groups\n    ), \"Child page 4 groups do not match expected values\"\n"
  },
  {
    "path": "backend/tests/daily/connectors/confluence/test_confluence_user_email_overrides.py",
    "content": "import types\nfrom unittest.mock import patch\n\nfrom onyx.connectors.confluence.onyx_confluence import ConfluenceUser\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\n\n\nclass MockCredentialsProvider(CredentialsProviderInterface):\n    def get_tenant_id(self) -> str:\n        return \"test_tenant\"\n\n    def get_provider_key(self) -> str:\n        return \"test_provider\"\n\n    def is_dynamic(self) -> bool:\n        return False\n\n    def get_credentials(self) -> dict[str, str]:\n        return {\"confluence_access_token\": \"test_token\"}\n\n    def set_credentials(self, credentials: dict[str, str]) -> None:\n        pass\n\n    def __enter__(self) -> \"MockCredentialsProvider\":\n        return self\n\n    def __exit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc_val: BaseException | None,\n        exc_tb: types.TracebackType | None,\n    ) -> None:\n        pass\n\n\ndef test_paginated_cql_user_retrieval_with_overrides() -> None:\n    \"\"\"\n    Tests that paginated_cql_user_retrieval yields users from the overrides\n    when provided and is_cloud is False.\n    \"\"\"\n    mock_provider = MockCredentialsProvider()\n    overrides = [\n        {\n            \"user_id\": \"override_user_1\",\n            \"username\": \"override1\",\n            \"display_name\": \"Override User One\",\n            \"email\": \"override1@example.com\",\n            \"type\": \"override\",\n        },\n        {\n            \"user_id\": \"override_user_2\",\n            \"username\": \"override2\",\n            \"display_name\": \"Override User Two\",\n            \"email\": \"override2@example.com\",\n            \"type\": \"override\",\n        },\n    ]\n    expected_users = [ConfluenceUser(**user_data) for user_data in overrides]\n\n    confluence_client = OnyxConfluence(\n        is_cloud=False,  # Overrides are primarily for Server/DC\n        url=\"http://dummy-confluence.com\",\n        credentials_provider=mock_provider,\n        confluence_user_profiles_override=overrides,\n    )\n\n    retrieved_users = list(confluence_client.paginated_cql_user_retrieval())\n\n    assert len(retrieved_users) == len(expected_users)\n    # Sort lists by user_id for order-independent comparison\n    retrieved_users.sort(key=lambda u: u.user_id)\n    expected_users.sort(key=lambda u: u.user_id)\n    assert retrieved_users == expected_users\n\n\ndef test_paginated_cql_user_retrieval_no_overrides_server() -> None:\n    \"\"\"\n    Tests that paginated_cql_user_retrieval attempts to call the actual\n    API pagination when no overrides are provided for Server/DC.\n    \"\"\"\n    mock_provider = MockCredentialsProvider()\n    confluence_client = OnyxConfluence(\n        is_cloud=False,\n        url=\"http://dummy-confluence.com\",\n        credentials_provider=mock_provider,\n        confluence_user_profiles_override=None,\n    )\n\n    # Mock the internal pagination method to check if it's called\n    with patch.object(confluence_client, \"_paginate_url\") as mock_paginate:\n        mock_paginate.return_value = iter([])  # Return an empty iterator\n\n        list(confluence_client.paginated_cql_user_retrieval())\n\n        mock_paginate.assert_called_once_with(\"rest/api/user/list\", None)\n\n\ndef test_paginated_cql_user_retrieval_no_overrides_cloud() -> None:\n    \"\"\"\n    Tests that paginated_cql_user_retrieval attempts to call the actual\n    API pagination when no overrides are provided for Cloud.\n    \"\"\"\n    mock_provider = MockCredentialsProvider()\n    confluence_client = OnyxConfluence(\n        is_cloud=True,\n        url=\"http://dummy-confluence.com\",  # URL doesn't matter much here due to mocking\n        credentials_provider=mock_provider,\n        confluence_user_profiles_override=None,\n    )\n\n    # Mock the internal pagination method to check if it's called\n    with patch.object(confluence_client, \"_paginate_url\") as mock_paginate:\n        mock_paginate.return_value = iter([])  # Return an empty iterator\n\n        list(confluence_client.paginated_cql_user_retrieval())\n\n        # Check that the cloud-specific user search URL is called\n        mock_paginate.assert_called_once_with(\n            \"rest/api/search/user?cql=type=user\",\n            None,\n            force_offset_pagination=True,\n        )\n"
  },
  {
    "path": "backend/tests/daily/connectors/conftest.py",
    "content": "from collections.abc import Generator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\n\n@pytest.fixture\ndef mock_get_unstructured_api_key() -> Generator[MagicMock, None, None]:\n    with patch(\n        \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n        return_value=None,\n    ) as mock:\n        yield mock\n"
  },
  {
    "path": "backend/tests/daily/connectors/discord/test_discord_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.connectors.discord.connector import DiscordConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import HierarchyNode\n\n\n@pytest.fixture\ndef discord_connector() -> DiscordConnector:\n    connector = DiscordConnector()\n    connector.load_credentials(\n        {\"discord_bot_token\": os.environ[\"DISCORD_CONNECTOR_BOT_TOKEN\"]}\n    )\n    return connector\n\n\ndef test_discord_connector_basic(discord_connector: DiscordConnector) -> None:\n    # If there are no Discord messages in the last 7 days, something has gone horribly wrong\n    end_time = time.time()\n    start_time = end_time - (7 * 24 * 60 * 60)\n    doc_batch_generator = discord_connector.poll_source(start_time, end_time)\n\n    doc_batch = next(doc_batch_generator)\n\n    docs: list[Document] = []\n    for doc in doc_batch:\n        if not isinstance(doc, HierarchyNode):\n            docs.append(doc)\n\n    assert len(docs) > 0, \"No documents were retrieved from the connector\"\n\n    # Check basic document structure\n    doc = docs[0]\n    assert doc.source == DocumentSource.DISCORD\n    assert doc.id is not None\n    assert doc.semantic_identifier is not None\n    assert len(doc.sections) > 0\n    assert doc.sections[0].text is not None\n    assert doc.sections[0].link is not None\n"
  },
  {
    "path": "backend/tests/daily/connectors/file/test_file_connector.py",
    "content": "import io\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.connectors.file.connector import LocalFileConnector\nfrom onyx.connectors.models import HierarchyNode\n\n\n@pytest.fixture\ndef mock_db_session() -> MagicMock:\n    return MagicMock()\n\n\n@pytest.fixture\ndef mock_file_store() -> MagicMock:\n    store = MagicMock()\n    return store\n\n\n@pytest.fixture\ndef mock_filestore_record() -> MagicMock:\n    record = MagicMock()\n    record.file_id = uuid4()\n    record.display_name = \"test.txt\"\n    return record\n\n\n@patch(\"onyx.connectors.file.connector.get_default_file_store\")\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\", return_value=None\n)\ndef test_single_text_file_with_metadata(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_get_session: MagicMock,\n    mock_db_session: MagicMock,\n    mock_file_store: MagicMock,\n    mock_filestore_record: MagicMock,\n) -> None:\n    file_content = io.BytesIO(\n        b'#ONYX_METADATA={\"link\": \"https://onyx.app\", \"file_display_name\":\"my display name\", \"tag_of_your_choice\": \"test-tag\", \\\n          \"primary_owners\": [\"wenxi@onyx.app\"], \"secondary_owners\": [\"founders@onyx.app\"], \\\n          \"doc_updated_at\": \"2001-01-01T00:00:00Z\"}\\n'\n        b\"Test answer is 12345\"\n    )\n    mock_get_filestore = MagicMock()\n    mock_get_filestore.return_value = mock_file_store\n    mock_file_store.read_file_record.return_value = mock_filestore_record\n    mock_get_session.return_value.__enter__.return_value = mock_db_session\n    mock_file_store.read_file.return_value = file_content\n\n    with patch(\n        \"onyx.connectors.file.connector.get_default_file_store\",\n        return_value=mock_file_store,\n    ):\n        connector = LocalFileConnector(\n            file_locations=[\"test.txt\"], file_names=[\"test.txt\"], zip_metadata={}\n        )\n        batches = list(connector.load_from_state())\n\n    assert len(batches) == 1\n    docs = batches[0]\n    assert len(docs) == 1\n    doc = docs[0]\n    assert not isinstance(doc, HierarchyNode)\n\n    assert doc.sections[0].text == \"Test answer is 12345\"\n    assert doc.sections[0].link == \"https://onyx.app\"\n    assert doc.semantic_identifier == \"my display name\"\n    assert doc.primary_owners[0].display_name == \"wenxi@onyx.app\"  # type: ignore\n    assert doc.secondary_owners[0].display_name == \"founders@onyx.app\"  # type: ignore\n    assert doc.doc_updated_at == datetime(2001, 1, 1, 0, 0, 0, tzinfo=timezone.utc)\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\", return_value=None\n)\ndef test_two_text_files_with_zip_metadata(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_db_session: MagicMock,  # noqa: ARG001\n    mock_file_store: MagicMock,\n) -> None:\n    file1_content = io.BytesIO(b\"File 1 content\")\n    file2_content = io.BytesIO(b\"File 2 content\")\n    mock_get_filestore = MagicMock()\n    mock_get_filestore.return_value = mock_file_store\n    mock_file_store.read_file_record.side_effect = [\n        MagicMock(file_id=str(uuid4()), display_name=\"file1.txt\"),\n        MagicMock(file_id=str(uuid4()), display_name=\"file2.txt\"),\n    ]\n    mock_file_store.read_file.side_effect = [file1_content, file2_content]\n    zip_metadata = {\n        \"file1.txt\": {\n            \"filename\": \"file1.txt\",\n            \"file_display_name\": \"display 1\",\n            \"link\": \"https://onyx.app/1\",\n            \"primary_owners\": [\"alice@onyx.app\"],\n            \"secondary_owners\": [\"bob@onyx.app\"],\n            \"doc_updated_at\": \"2022-02-02T00:00:00Z\",\n        },\n        \"file2.txt\": {\n            \"filename\": \"file2.txt\",\n            \"file_display_name\": \"display 2\",\n            \"link\": \"https://onyx.app/2\",\n            \"primary_owners\": [\"carol@onyx.app\"],\n            \"secondary_owners\": [\"dave@onyx.app\"],\n            \"doc_updated_at\": \"2023-03-03T00:00:00Z\",\n        },\n    }\n\n    with patch(\n        \"onyx.connectors.file.connector.get_default_file_store\",\n        return_value=mock_file_store,\n    ):\n        connector = LocalFileConnector(\n            file_locations=[\"file1.txt\", \"file2.txt\"],\n            file_names=[\"file1.txt\", \"file2.txt\"],\n            zip_metadata=zip_metadata,\n        )\n        batches = list(connector.load_from_state())\n\n    assert len(batches) == 1\n    docs = batches[0]\n    assert len(docs) == 2\n    doc1, doc2 = docs\n    assert not isinstance(doc1, HierarchyNode)\n    assert not isinstance(doc2, HierarchyNode)\n\n    assert doc1.sections[0].text == \"File 1 content\"\n    assert doc1.sections[0].link == \"https://onyx.app/1\"\n    assert doc1.semantic_identifier == \"display 1\"\n    assert doc1.primary_owners[0].display_name == \"alice@onyx.app\"  # type: ignore\n    assert doc1.secondary_owners[0].display_name == \"bob@onyx.app\"  # type: ignore\n    assert doc1.doc_updated_at == datetime(2022, 2, 2, 0, 0, 0, tzinfo=timezone.utc)\n    assert doc2.sections[0].text == \"File 2 content\"\n    assert doc2.sections[0].link == \"https://onyx.app/2\"\n    assert doc2.semantic_identifier == \"display 2\"\n    assert doc2.primary_owners[0].display_name == \"carol@onyx.app\"  # type: ignore\n    assert doc2.secondary_owners[0].display_name == \"dave@onyx.app\"  # type: ignore\n    assert doc2.doc_updated_at == datetime(2023, 3, 3, 0, 0, 0, tzinfo=timezone.utc)\n"
  },
  {
    "path": "backend/tests/daily/connectors/fireflies/test_fireflies_connector.py",
    "content": "import json\nimport os\nimport time\nfrom pathlib import Path\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.fireflies.connector import FirefliesConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\n\n\ndef load_test_data(file_name: str = \"test_fireflies_data.json\") -> dict[str, Any]:\n    current_dir = Path(__file__).parent\n    with open(current_dir / file_name, \"r\") as f:\n        return json.load(f)\n\n\n@pytest.fixture\ndef fireflies_connector() -> FirefliesConnector:\n    connector = FirefliesConnector()\n    connector.load_credentials(\n        {\"fireflies_api_key\": os.environ[\"FIREFLIES_API_KEY\"]},\n    )\n    return connector\n\n\n@pytest.mark.xfail(\n    reason=\"We don't have the key that is stored in GitHub Secrets and the returned data is different than expected\",\n)\ndef test_fireflies_connector_basic(fireflies_connector: FirefliesConnector) -> None:\n    test_data = load_test_data()\n\n    connector_return_data: list[Document | HierarchyNode] = next(\n        fireflies_connector.poll_source(0, time.time())\n    )\n    target_doc: Document | HierarchyNode = connector_return_data[0]\n    if isinstance(target_doc, HierarchyNode):\n        raise ValueError(\"Hierarchy node returned from connector\")\n\n    assert target_doc is not None, \"No documents were retrieved from the connector\"\n    assert (\n        target_doc.primary_owners is not None\n    ), \"No primary owners were retrieved from the connector\"\n\n    assert target_doc.id == test_data[\"id\"]\n    assert target_doc.semantic_identifier == test_data[\"semantic_identifier\"]\n    assert target_doc.primary_owners[0].email == test_data[\"primary_owners\"]\n    assert target_doc.secondary_owners == test_data[\"secondary_owners\"]\n    assert str(target_doc.doc_updated_at) == test_data[\"doc_updated_at\"]\n\n    assert (\n        target_doc.source == DocumentSource.FIREFLIES\n    ), \"Document source is not fireflies\"\n    assert target_doc.metadata == test_data[\"metadata\"]\n\n    # Check that the test data and the connector data contain the same section data\n    assert {section.text for section in target_doc.sections} == {\n        section[\"text\"] for section in test_data[\"sections\"]\n    }\n    assert {section.link for section in target_doc.sections} == {\n        section[\"link\"] for section in test_data[\"sections\"]\n    }\n"
  },
  {
    "path": "backend/tests/daily/connectors/fireflies/test_fireflies_data.json",
    "content": "{\n  \"id\": \"FIREFLIES_VcBdZpuV82rImQCA\",\n  \"semantic_identifier\": \"Lead Generation Efforts\",\n  \"primary_owners\": \"admin@onyx-test.com\",\n  \"secondary_owners\": [],\n  \"doc_updated_at\": \"2025-01-10 19:10:00+00:00\",\n  \"metadata\": {\n    \"meeting_date\": \"2025-01-10 19:10:00+00:00\",\n    \"duration_min\": \"10\"\n  },\n  \"sections\": [\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=153.1\",\n      \"text\": \"test_user_1 1:  Hey, David, thanks for taking the time today.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=158.14\",\n      \"text\": \"Test Admin Admin: Of course Sarah, It's nice to see you. Whenever you're ready.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=165.1\",\n      \"text\": \"test_user_1 1: All right then, David, let's jump right in. How are the lead generation efforts for the new product launch looking?\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=171.084\",\n      \"text\": \"Test Admin Admin: So far we've seen a good initial response, but we're facing a slight challenge with qualifying leads. The sales team is getting inquiries. Some aren't quite aligned with our ideal customer profile.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=191.86\",\n      \"text\": \"test_user_1 1: That makes sense. Do you think we need to adjust our marketing messaging to better target the right audience?\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=202.26\",\n      \"text\": \"Test Admin Admin: Absolutely. Maybe we could emphasize the key features that are most relevant to our target market in the marketing materials. What are your thoughts on refining the lead capture to gather more specific information?\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=225.99\",\n      \"text\": \"test_user_1 1: I think that's a great idea. We could add additional qualifying questions to ensure we're capturing leads with the right needs.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=238.56\",\n      \"text\": \"Test Admin Admin: On another note, how are the social media campaigns performing? Are we seeing good engagement with the new product launch post?\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=257.2\",\n      \"text\": \"test_user_1 1: The engagement is positive, but we could potentially increase increase reach further with targeted ad campaigns and key platforms.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=268.91\",\n      \"text\": \"Test Admin Admin: Agreed. Let's discuss a strategy to develop targeted ads that focus on the pain points our ideal customers are facing and how our product solves them.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=270.27\",\n      \"text\": \"test_user_1 1: We can collaborate on creating specific ad copy that highlights these benefits.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=289.06\",\n      \"text\": \"Test Admin Admin: All right, so to summarize, let's prioritize refining the lead capture form, develop targeted social media ads, and make sure our marketing method clearly aligns with our ideal customer profile.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=303.38\",\n      \"text\": \"test_user_1 1: Yep. And let's schedule a follow up meeting in a week, review progress and discuss any adjustments.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=310.9\",\n      \"text\": \"Test Admin Admin: Sounds good. I'll send you address updated lead form by the end of the day. Thanks, Sarah.\"\n    },\n    {\n      \"link\": \"https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=319.19\",\n      \"text\": \"test_user_1 1: Thank you David.\"\n    }\n  ]\n}\n"
  },
  {
    "path": "backend/tests/daily/connectors/gitbook/test_gitbook_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.gitbook.connector import GitbookConnector\nfrom onyx.connectors.models import HierarchyNode\n\n\n@pytest.fixture\ndef gitbook_connector() -> GitbookConnector:\n    connector = GitbookConnector(\n        space_id=os.environ[\"GITBOOK_SPACE_ID\"],\n    )\n    connector.load_credentials(\n        {\n            \"gitbook_api_key\": os.environ[\"GITBOOK_API_KEY\"],\n        }\n    )\n    return connector\n\n\nNUM_PAGES = 3\n\n\ndef test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:\n    doc_batch_generator = gitbook_connector.load_from_state()\n\n    # Get first batch of documents\n    doc_batch = next(doc_batch_generator)\n    assert len(doc_batch) == NUM_PAGES\n\n    # Verify first document structure\n    main_doc = doc_batch[0]\n    assert not isinstance(main_doc, HierarchyNode)\n\n    # Basic document properties\n    assert main_doc.id.startswith(\"gitbook-\")\n    assert main_doc.semantic_identifier == \"Acme Corp Internal Handbook\"\n    assert main_doc.source == DocumentSource.GITBOOK\n\n    # Metadata checks\n    assert \"path\" in main_doc.metadata\n    assert \"type\" in main_doc.metadata\n    assert \"kind\" in main_doc.metadata\n\n    # Section checks\n    assert len(main_doc.sections) == 1\n    section = main_doc.sections[0]\n\n    # Content specific checks\n    content = section.text\n    assert content is not None, \"Section text should not be None\"\n\n    # Check for specific content elements\n    assert \"* Fruit Shopping List:\" in content\n    assert \"> test quote it doesn't mean anything\" in content\n\n    # Check headings\n    assert \"# Heading 1\" in content\n    assert \"## Heading 2\" in content\n    assert \"### Heading 3\" in content\n\n    # Check task list\n    assert \"- [ ] Uncompleted Task\" in content\n    assert \"- [x] Completed Task\" in content\n\n    # Check table content\n    assert \"| ethereum | 10 | 3000 |\" in content\n    assert \"| bitcoin | 2 | 98000 |\" in content\n\n    # Check paragraph content\n    assert \"New York City comprises 5 boroughs\" in content\n    assert \"Empire State Building\" in content\n\n    # Check code block (just verify presence of some unique code elements)\n    assert \"function fizzBuzz(n)\" in content\n    assert 'res.push(\"FizzBuzz\")' in content\n\n    assert section.link  # Should have a URL\n\n    nested1 = doc_batch[1]\n    assert not isinstance(nested1, HierarchyNode)\n    assert nested1.id.startswith(\"gitbook-\")\n    assert nested1.semantic_identifier == \"Nested1\"\n    assert len(nested1.sections) == 1\n    # extra newlines at the end, remove them to make test easier\n    assert nested1.sections[0].text is not None\n    assert nested1.sections[0].text.strip() == \"nested1\"\n    assert nested1.source == DocumentSource.GITBOOK\n\n    nested2 = doc_batch[2]\n    assert not isinstance(nested2, HierarchyNode)\n    assert nested2.id.startswith(\"gitbook-\")\n    assert nested2.semantic_identifier == \"Nested2\"\n    assert len(nested2.sections) == 1\n    assert nested2.sections[0].text is not None\n    assert nested2.sections[0].text.strip() == \"nested2\"\n    assert nested2.source == DocumentSource.GITBOOK\n\n    # Time-based polling test\n    current_time = time.time()\n    poll_docs = gitbook_connector.poll_source(0, current_time)\n    poll_batch = next(poll_docs)\n    assert len(poll_batch) == NUM_PAGES\n"
  },
  {
    "path": "backend/tests/daily/connectors/github/test_github_basic.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.github.connector import GithubConnector\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\n@pytest.fixture\ndef github_connector() -> GithubConnector:\n    connector = GithubConnector(\n        repo_owner=\"onyx-dot-app\",\n        repositories=\"documentation\",\n        include_prs=True,\n        include_issues=True,\n    )\n    connector.load_credentials(\n        {\n            \"github_access_token\": os.environ[\"ACCESS_TOKEN_GITHUB\"],\n        }\n    )\n    return connector\n\n\ndef test_github_connector_basic(github_connector: GithubConnector) -> None:\n    docs = load_all_from_connector(\n        connector=github_connector,\n        start=0,\n        end=time.time(),\n    ).documents\n    assert len(docs) > 1  # We expect at least one PR and one Issue to exist\n\n    # Test the first document's structure\n    pr_doc = docs[0]\n    issue_doc = docs[-1]\n\n    # Verify basic document properties\n    assert pr_doc.source == DocumentSource.GITHUB\n    assert pr_doc.secondary_owners is None\n    assert pr_doc.from_ingestion_api is False\n    assert pr_doc.additional_info is None\n\n    # Verify GitHub-specific properties\n    assert \"github.com\" in pr_doc.id  # Should be a GitHub URL\n\n    # Verify PR-specific properties\n    assert pr_doc.metadata is not None\n    assert pr_doc.metadata.get(\"object_type\") == \"PullRequest\"\n    assert \"id\" in pr_doc.metadata\n    assert \"merged\" in pr_doc.metadata\n    assert \"state\" in pr_doc.metadata\n    assert \"user\" in pr_doc.metadata\n    assert \"assignees\" in pr_doc.metadata\n    assert pr_doc.metadata.get(\"repo\") == \"onyx-dot-app/documentation\"\n    assert \"num_commits\" in pr_doc.metadata\n    assert \"num_files_changed\" in pr_doc.metadata\n    assert \"labels\" in pr_doc.metadata\n    assert \"created_at\" in pr_doc.metadata\n\n    # Verify Issue-specific properties\n    assert issue_doc.metadata is not None\n    assert issue_doc.metadata.get(\"object_type\") == \"Issue\"\n    assert \"id\" in issue_doc.metadata\n    assert \"state\" in issue_doc.metadata\n    assert \"user\" in issue_doc.metadata\n    assert \"assignees\" in issue_doc.metadata\n    assert issue_doc.metadata.get(\"repo\") == \"onyx-dot-app/documentation\"\n    assert \"labels\" in issue_doc.metadata\n    assert \"created_at\" in issue_doc.metadata\n\n    # Verify sections\n    assert len(pr_doc.sections) == 1\n    section = pr_doc.sections[0]\n    assert section.link == pr_doc.id  # Section link should match document ID\n    assert isinstance(section.text, str)  # Should have some text content\n"
  },
  {
    "path": "backend/tests/daily/connectors/gitlab/test_gitlab_basic.py",
    "content": "import itertools\nimport os\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.gitlab.connector import GitlabConnector\nfrom onyx.connectors.models import HierarchyNode\n\n\n@pytest.fixture\ndef gitlab_connector() -> GitlabConnector:\n    connector = GitlabConnector(\n        project_owner=\"onyx2895818\",\n        project_name=\"onyx\",\n        include_mrs=True,\n        include_issues=True,\n        include_code_files=True,  # Include code files in the test\n    )\n    # Ensure GITLAB_ACCESS_TOKEN and optionally GITLAB_URL are set in the environment\n    gitlab_url = os.environ.get(\"GITLAB_URL\", \"https://gitlab.com\")\n    gitlab_token = os.environ.get(\"GITLAB_ACCESS_TOKEN\")\n\n    if not gitlab_token:\n        pytest.skip(\"GITLAB_ACCESS_TOKEN environment variable not set.\")\n\n    connector.load_credentials(\n        {\n            \"gitlab_access_token\": gitlab_token,\n            \"gitlab_url\": gitlab_url,\n        }\n    )\n    return connector\n\n\ndef test_gitlab_connector_basic(gitlab_connector: GitlabConnector) -> None:\n    doc_batches = gitlab_connector.load_from_state()\n    docs = list(itertools.chain(*doc_batches))\n    # Assert right number of docs - Adjust if necessary based on test repo state\n    assert len(docs) == 79\n\n    # Find one of each type to validate\n    validated_mr = False\n    validated_issue = False\n    validated_code_file = False\n    gitlab_base_url = os.environ.get(\"GITLAB_URL\", \"https://gitlab.com\").split(\"//\")[-1]\n    project_path = f\"{gitlab_connector.project_owner}/{gitlab_connector.project_name}\"\n\n    # --- Specific Document Details to Validate ---\n    target_mr_id = f\"https://{gitlab_base_url}/{project_path}/-/merge_requests/1\"\n    target_issue_id = f\"https://{gitlab_base_url}/{project_path}/-/work_items/2\"\n    target_code_file_semantic_id = \"README.md\"\n    # ---\n\n    for doc in docs:\n        if isinstance(doc, HierarchyNode):\n            continue\n        # Verify basic document properties (common to all types)\n        assert doc.source == DocumentSource.GITLAB\n        assert doc.secondary_owners is None\n        assert doc.from_ingestion_api is False\n        assert doc.additional_info is None\n        assert isinstance(doc.id, str)\n        assert doc.metadata is not None\n        assert \"type\" in doc.metadata\n        doc_type = doc.metadata[\"type\"]\n\n        # Verify sections (common structure)\n        assert len(doc.sections) >= 1\n        section = doc.sections[0]\n        assert isinstance(section.link, str)\n        assert gitlab_base_url in section.link\n        assert isinstance(section.text, str)\n\n        # --- Type-specific and Content Validation ---\n        if doc.id == target_mr_id and doc_type == \"MergeRequest\":\n            assert doc.metadata[\"state\"] == \"opened\"\n            assert doc.semantic_identifier == \"Add awesome feature\"\n            assert section.text == \"This MR implements the awesome feature\"\n            assert doc.primary_owners is not None\n            assert len(doc.primary_owners) == 1\n            assert (\n                doc.primary_owners[0].display_name == \"Test\"\n            )  # Adjust if author changes\n            assert doc.id == section.link\n            validated_mr = True\n        elif doc.id == target_issue_id and doc_type == \"ISSUE\":\n            assert doc.metadata[\"state\"] == \"opened\"\n            assert doc.semantic_identifier == \"Investigate performance issue\"\n            assert (\n                section.text\n                == \"Investigate and resolve the performance degradation on endpoint X\"\n            )\n            assert doc.primary_owners is not None\n            assert len(doc.primary_owners) == 1\n            assert (\n                doc.primary_owners[0].display_name == \"Test\"\n            )  # Adjust if author changes\n            assert doc.id == section.link\n            validated_issue = True\n        elif (\n            doc.semantic_identifier == target_code_file_semantic_id\n            and doc_type == \"CodeFile\"\n        ):\n            # ID is a git hash (e.g., 'd177...'), Link is the blob URL\n            assert doc.id != section.link\n            assert section.link.endswith(\"/README.md\")\n            assert \"# onyx\" in section.text  # Check for a known part of the content\n            # Code files might not have primary owners assigned this way\n            # assert len(doc.primary_owners) == 0\n            validated_code_file = True\n\n        # Generic validation for *any* document of the type if specific one not found yet\n        elif doc_type == \"MergeRequest\" and not validated_mr:\n            assert \"state\" in doc.metadata\n            assert gitlab_base_url in doc.id  # MR ID should be a URL\n            assert doc.id == section.link  # Link and ID are the same URL\n        elif doc_type == \"ISSUE\" and not validated_issue:\n            assert \"state\" in doc.metadata\n            assert gitlab_base_url in doc.id  # Issue ID should be a URL\n            assert doc.id == section.link  # Link and ID are the same URL\n        elif doc_type == \"CodeFile\" and not validated_code_file:\n            assert doc.id != section.link  # ID is GID/hash, link is blob URL\n\n        # Early exit optimization (optional)\n        # if validated_mr and validated_issue and validated_code_file:\n        #     break\n\n    # Assert that we found and validated the specific documents\n    assert (\n        validated_mr\n    ), f\"Failed to find and validate the specific MergeRequest ({target_mr_id}).\"\n    assert (\n        validated_issue\n    ), f\"Failed to find and validate the specific Issue ({target_issue_id}).\"\n    assert (\n        validated_code_file\n    ), f\"Failed to find and validate the specific CodeFile ({target_code_file_semantic_id}).\"\n"
  },
  {
    "path": "backend/tests/daily/connectors/gmail/conftest.py",
    "content": "import json\nimport os\nfrom collections.abc import Callable\n\nimport pytest\n\nfrom onyx.connectors.gmail.connector import GmailConnector\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_TOKEN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom tests.load_env_vars import load_env_vars\n\n\n# Load environment variables at the module level\nload_env_vars()\n\n\ndef parse_credentials(env_str: str) -> dict:\n    \"\"\"\n    Parse a double-escaped JSON string from environment variables into a Python dictionary.\n\n    Args:\n        env_str (str): The double-escaped JSON string from environment variables\n\n    Returns:\n        dict: Parsed OAuth credentials\n    \"\"\"\n    # first try normally\n    try:\n        return json.loads(env_str)\n    except Exception:\n        # First, try remove extra escaping backslashes\n        unescaped = env_str.replace('\\\\\"', '\"')\n\n        # remove leading / trailing quotes\n        unescaped = unescaped.strip('\"')\n\n        # Now parse the JSON\n        return json.loads(unescaped)\n\n\n@pytest.fixture\ndef google_gmail_oauth_connector_factory() -> Callable[..., GmailConnector]:\n    def _connector_factory(\n        primary_admin_email: str = \"admin@onyx-test.com\",\n    ) -> GmailConnector:\n        print(\"Creating GmailConnector with OAuth credentials\")\n        connector = GmailConnector()\n\n        json_string = os.environ[\"GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR\"]\n        refried_json_string = json.dumps(parse_credentials(json_string))\n\n        credentials_json = {\n            DB_CREDENTIALS_DICT_TOKEN_KEY: refried_json_string,\n            DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,\n            DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,\n        }\n        connector.load_credentials(credentials_json)\n        return connector\n\n    return _connector_factory\n\n\n@pytest.fixture\ndef google_gmail_service_acct_connector_factory() -> Callable[..., GmailConnector]:\n    def _connector_factory(\n        primary_admin_email: str = \"admin@onyx-test.com\",\n    ) -> GmailConnector:\n        print(\"Creating GmailConnector with service account credentials\")\n        connector = GmailConnector()\n\n        json_string = os.environ[\"GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR\"]\n        refried_json_string = json.dumps(parse_credentials(json_string))\n\n        # Load Service Account Credentials\n        connector.load_credentials(\n            {\n                DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: refried_json_string,\n                DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,\n                DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,\n            }\n        )\n        return connector\n\n    return _connector_factory\n"
  },
  {
    "path": "backend/tests/daily/connectors/gmail/test_gmail_connector.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.gmail.connector import GmailConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\n\n\n_THREAD_1_START_TIME = 1730568700\n_THREAD_1_END_TIME = 1730569000\n\n\"\"\"\nThis thread was 4 emails long:\n    admin@onyx-test.com -> test-group-1@onyx-test.com (conaining test_user_1 and test_user_2)\n    test_user_1@onyx-test.com -> admin@onyx-test.com\n    admin@onyx-test.com -> test_user_2@onyx-test.com + BCC: test_user_3@onyx-test.com\n    test_user_3@onyx-test.com -> admin@onyx-test.com\n\"\"\"\n_THREAD_1_BY_ID: dict[str, dict[str, Any]] = {\n    \"192edefb315737c3\": {\n        \"email\": \"admin@onyx-test.com\",\n        \"sections_count\": 4,\n        \"primary_owners\": set(\n            [\n                \"admin@onyx-test.com\",\n                \"test_user_1@onyx-test.com\",\n                \"test_user_3@onyx-test.com\",\n            ]\n        ),\n        \"secondary_owners\": set(\n            [\n                \"test-group-1@onyx-test.com\",\n                \"admin@onyx-test.com\",\n                \"test_user_2@onyx-test.com\",\n                \"test_user_3@onyx-test.com\",\n            ]\n        ),\n    },\n    \"192edf020d2f5def\": {\n        \"email\": \"test_user_1@onyx-test.com\",\n        \"sections_count\": 2,\n        \"primary_owners\": set([\"admin@onyx-test.com\", \"test_user_1@onyx-test.com\"]),\n        \"secondary_owners\": set([\"test-group-1@onyx-test.com\", \"admin@onyx-test.com\"]),\n    },\n    \"192edf020ae90aab\": {\n        \"email\": \"test_user_2@onyx-test.com\",\n        \"sections_count\": 2,\n        \"primary_owners\": set([\"admin@onyx-test.com\"]),\n        \"secondary_owners\": set(\n            [\"test-group-1@onyx-test.com\", \"test_user_2@onyx-test.com\"]\n        ),\n    },\n    \"192edf18316015fa\": {\n        \"email\": \"test_user_3@onyx-test.com\",\n        \"sections_count\": 2,\n        \"primary_owners\": set([\"admin@onyx-test.com\", \"test_user_3@onyx-test.com\"]),\n        \"secondary_owners\": set(\n            [\n                \"admin@onyx-test.com\",\n                \"test_user_2@onyx-test.com\",\n                \"test_user_3@onyx-test.com\",\n            ]\n        ),\n    },\n}\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_slim_docs_retrieval(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_gmail_service_acct_connector_factory: Callable[..., GmailConnector],\n) -> None:\n    print(\"\\n\\nRunning test_slim_docs_retrieval\")\n    connector = google_gmail_service_acct_connector_factory()\n    retrieved_slim_docs: list[SlimDocument] = []\n    for doc_batch in connector.retrieve_all_slim_docs_perm_sync(\n        _THREAD_1_START_TIME, _THREAD_1_END_TIME\n    ):\n        retrieved_slim_docs.extend(\n            [doc for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    assert len(retrieved_slim_docs) == 4\n\n    for doc in retrieved_slim_docs:\n        assert doc.external_access is not None\n        assert len(doc.external_access.external_user_emails) == 1\n        user_email = next(iter(doc.external_access.external_user_emails))\n        assert _THREAD_1_BY_ID[doc.id][\"email\"] == user_email\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_docs_retrieval(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_gmail_service_acct_connector_factory: Callable[..., GmailConnector],\n) -> None:\n    print(\"\\n\\nRunning test_docs_retrieval\")\n    connector = google_gmail_service_acct_connector_factory()\n    retrieved_docs: list[Document] = []\n    for doc_batch in load_everything_from_checkpoint_connector(\n        connector, _THREAD_1_START_TIME, _THREAD_1_END_TIME\n    ):\n        assert all(isinstance(item, Document) for item in doc_batch.items)\n        retrieved_docs.extend(cast(list[Document], doc_batch.items))\n\n    assert len(retrieved_docs) == 4\n\n    for doc in retrieved_docs:\n        id = doc.id\n        retrieved_primary_owner_emails: set[str | None] = set()\n        retrieved_secondary_owner_emails: set[str | None] = set()\n        if doc.primary_owners:\n            retrieved_primary_owner_emails = set(\n                [owner.email for owner in doc.primary_owners]\n            )\n        if doc.secondary_owners:\n            retrieved_secondary_owner_emails = set(\n                [owner.email for owner in doc.secondary_owners]\n            )\n        assert _THREAD_1_BY_ID[id][\"sections_count\"] == len(doc.sections)\n        assert _THREAD_1_BY_ID[id][\"primary_owners\"] == retrieved_primary_owner_emails\n        assert (\n            _THREAD_1_BY_ID[id][\"secondary_owners\"] == retrieved_secondary_owner_emails\n        )\n"
  },
  {
    "path": "backend/tests/daily/connectors/gong/test_gong.py",
    "content": "import os\nimport time\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.connectors.gong.connector import GongConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\n\n\n@pytest.fixture\ndef gong_connector() -> GongConnector:\n    connector = GongConnector()\n\n    connector.load_credentials(\n        {\n            \"gong_access_key\": os.environ[\"GONG_ACCESS_KEY\"],\n            \"gong_access_key_secret\": os.environ[\"GONG_ACCESS_KEY_SECRET\"],\n        }\n    )\n\n    return connector\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_gong_basic(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    gong_connector: GongConnector,\n) -> None:\n    doc_batch_generator = gong_connector.poll_source(0, time.time())\n\n    doc_batch = next(doc_batch_generator)\n    with pytest.raises(StopIteration):\n        next(doc_batch_generator)\n\n    assert len(doc_batch) == 2\n\n    docs: list[Document] = []\n    for doc in doc_batch:\n        if not isinstance(doc, HierarchyNode):\n            docs.append(doc)\n\n    assert docs[0].semantic_identifier == \"test with chris\"\n    assert docs[1].semantic_identifier == \"Testing Gong\"\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/conftest.py",
    "content": "import json\nimport os\nimport resource\nfrom collections.abc import Callable\n\nimport pytest\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_AUTHENTICATION_METHOD,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_TOKEN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    GoogleOAuthAuthenticationMethod,\n)\nfrom tests.load_env_vars import load_env_vars\n\n\n# Load environment variables at the module level\nload_env_vars()\n\n\n_USER_TO_OAUTH_CREDENTIALS_MAP = {\n    \"admin@onyx-test.com\": \"GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR\",\n    \"test_user_1@onyx-test.com\": \"GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1\",\n}\n\n_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP = {\n    \"admin@onyx-test.com\": \"GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR\",\n}\n\n\ndef parse_credentials(env_str: str) -> dict:\n    \"\"\"\n    Parse a double-escaped JSON string from environment variables into a Python dictionary.\n\n    Args:\n        env_str (str): The double-escaped JSON string from environment variables\n\n    Returns:\n        dict: Parsed OAuth credentials\n    \"\"\"\n    # first try normally\n    try:\n        return json.loads(env_str)\n    except Exception:\n        # First, try remove extra escaping backslashes\n        unescaped = env_str.replace('\\\\\"', '\"')\n\n        # remove leading / trailing quotes\n        unescaped = unescaped.strip('\"')\n\n        # Now parse the JSON\n        return json.loads(unescaped)\n\n\ndef get_credentials_from_env(email: str, oauth: bool) -> dict:\n    if oauth:\n        raw_credential_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[email]]\n    else:\n        raw_credential_string = os.environ[\n            _USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[email]\n        ]\n\n    refried_credential_string = json.dumps(parse_credentials(raw_credential_string))\n\n    cred_key = (\n        DB_CREDENTIALS_DICT_TOKEN_KEY\n        if oauth\n        else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY\n    )\n    return {\n        cred_key: refried_credential_string,\n        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,\n        DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,\n    }\n\n\n@pytest.fixture\ndef google_drive_oauth_uploaded_connector_factory() -> (\n    Callable[..., GoogleDriveConnector]\n):\n    def _connector_factory(\n        primary_admin_email: str,\n        include_shared_drives: bool,\n        shared_drive_urls: str | None,\n        include_my_drives: bool,\n        my_drive_emails: str | None,\n        shared_folder_urls: str | None,\n        include_files_shared_with_me: bool,\n    ) -> GoogleDriveConnector:\n        print(\"Creating GoogleDriveConnector with OAuth credentials\")\n        connector = GoogleDriveConnector(\n            include_shared_drives=include_shared_drives,\n            shared_drive_urls=shared_drive_urls,\n            include_my_drives=include_my_drives,\n            include_files_shared_with_me=include_files_shared_with_me,\n            my_drive_emails=my_drive_emails,\n            shared_folder_urls=shared_folder_urls,\n        )\n\n        credentials_json = get_credentials_from_env(primary_admin_email, oauth=True)\n        connector.load_credentials(credentials_json)\n        return connector\n\n    return _connector_factory\n\n\n@pytest.fixture\ndef google_drive_service_acct_connector_factory() -> (\n    Callable[..., GoogleDriveConnector]\n):\n    def _connector_factory(\n        primary_admin_email: str,\n        include_shared_drives: bool,\n        shared_drive_urls: str | None,\n        include_my_drives: bool,\n        my_drive_emails: str | None,\n        shared_folder_urls: str | None,\n        include_files_shared_with_me: bool,\n        specific_user_emails: str | None = None,\n    ) -> GoogleDriveConnector:\n        print(\"Creating GoogleDriveConnector with service account credentials\")\n        connector = GoogleDriveConnector(\n            include_shared_drives=include_shared_drives,\n            shared_drive_urls=shared_drive_urls,\n            include_my_drives=include_my_drives,\n            my_drive_emails=my_drive_emails,\n            shared_folder_urls=shared_folder_urls,\n            include_files_shared_with_me=include_files_shared_with_me,\n            specific_user_emails=specific_user_emails,\n        )\n\n        # Load Service Account Credentials\n        credentials_json = get_credentials_from_env(\n            email=primary_admin_email, oauth=False\n        )\n        connector.load_credentials(credentials_json)\n        return connector\n\n    return _connector_factory\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef set_resource_limits() -> None:\n    # the google sdk is aggressive about using up file descriptors and\n    # macos is stingy ... these tests will fail randomly unless the descriptor limit is raised\n    RLIMIT_MINIMUM = 2048\n    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)\n    desired_soft = min(RLIMIT_MINIMUM, hard)  # Pick your target here\n\n    print(f\"Open file limit: soft={soft} hard={hard} soft_required={RLIMIT_MINIMUM}\")\n\n    if soft < desired_soft:\n        print(f\"Raising open file limit: {soft} -> {desired_soft}\")\n        resource.setrlimit(resource.RLIMIT_NOFILE, (desired_soft, hard))\n\n    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)\n    print(f\"New open file limit: soft={soft} hard={hard}\")\n    return\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/consts_and_utils.py",
    "content": "import time\nfrom collections.abc import Sequence\nfrom dataclasses import dataclass\nfrom dataclasses import field\nfrom dataclasses import replace\nfrom urllib.parse import urlparse\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import HierarchyNodeType\nfrom tests.daily.connectors.utils import ConnectorOutput\nfrom tests.daily.connectors.utils import load_all_from_connector\n\nALL_FILES = list(range(0, 60))\nSHARED_DRIVE_FILES = list(range(20, 25))\n\n\nADMIN_FILE_IDS = list(range(0, 5))\nADMIN_FOLDER_3_FILE_IDS = list(range(65, 70))  # This folder is shared with test_user_1\nTEST_USER_1_FILE_IDS = list(range(5, 10))\nTEST_USER_2_FILE_IDS = list(range(10, 15))\nTEST_USER_3_FILE_IDS = list(range(15, 20))\nSHARED_DRIVE_1_FILE_IDS = list(range(20, 25))\nFOLDER_1_FILE_IDS = list(range(25, 30))\nFOLDER_1_1_FILE_IDS = list(range(30, 35))\nFOLDER_1_2_FILE_IDS = list(range(35, 40))  # This folder is public\nSHARED_DRIVE_2_FILE_IDS = list(range(40, 45))\nFOLDER_2_FILE_IDS = list(range(45, 50))\nFOLDER_2_1_FILE_IDS = list(range(50, 55))\nFOLDER_2_2_FILE_IDS = list(range(55, 60))\nSECTIONS_FILE_IDS = [61]\nFOLDER_3_FILE_IDS = list(range(62, 65))\n\nDONWLOAD_REVOKED_FILE_ID = 21\n\nPUBLIC_FOLDER_RANGE = FOLDER_1_2_FILE_IDS\nPUBLIC_FILE_IDS = list(range(55, 57))\nPUBLIC_RANGE = PUBLIC_FOLDER_RANGE + PUBLIC_FILE_IDS\n\nSHARED_DRIVE_1_URL = \"https://drive.google.com/drive/folders/0AC_OJ4BkMd4kUk9PVA\"\n# Group 1 is given access to this folder\nFOLDER_1_URL = (\n    \"https://drive.google.com/drive/folders/1d3I7U3vUZMDziF1OQqYRkB8Jp2s_GWUn\"\n)\nFOLDER_1_1_URL = (\n    \"https://drive.google.com/drive/folders/1aR33-zwzl_mnRAwH55GgtWTE-4A4yWWI\"\n)\nFOLDER_1_2_URL = (\n    \"https://drive.google.com/drive/folders/1IO0X55VhvLXf4mdxzHxuKf4wxrDBB6jq\"\n)\nSHARED_DRIVE_2_URL = \"https://drive.google.com/drive/folders/0ABKspIh7P4f4Uk9PVA\"\nFOLDER_2_URL = (\n    \"https://drive.google.com/drive/folders/1lNpCJ1teu8Se0louwL0oOHK9nEalskof\"\n)\nFOLDER_2_1_URL = (\n    \"https://drive.google.com/drive/folders/1XeDOMWwxTDiVr9Ig2gKum3Zq_Wivv6zY\"\n)\nFOLDER_2_2_URL = (\n    \"https://drive.google.com/drive/folders/1RKlsexA8h7NHvBAWRbU27MJotic7KXe3\"\n)\nFOLDER_3_URL = (\n    \"https://drive.google.com/drive/folders/1LHibIEXfpUmqZ-XjBea44SocA91Nkveu\"\n)\nSECTIONS_FOLDER_URL = (\n    \"https://drive.google.com/drive/u/5/folders/1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33\"\n)\n\n\ndef extract_folder_id_from_url(url: str) -> str:\n    \"\"\"Extract the folder ID from a Google Drive URL.\"\"\"\n    parsed = urlparse(url)\n    # URL format: /drive/folders/{id} or /drive/u/{num}/folders/{id}\n    parts = parsed.path.split(\"/\")\n    # Find 'folders' and take the next segment\n    for i, part in enumerate(parts):\n        if part == \"folders\" and i + 1 < len(parts):\n            return parts[i + 1]\n    raise ValueError(f\"Could not extract folder ID from URL: {url}\")\n\n\n# Folder IDs extracted from URLs\nSHARED_DRIVE_1_ID = extract_folder_id_from_url(SHARED_DRIVE_1_URL)\nSHARED_DRIVE_2_ID = extract_folder_id_from_url(SHARED_DRIVE_2_URL)\nFOLDER_1_ID = extract_folder_id_from_url(FOLDER_1_URL)\nFOLDER_1_1_ID = extract_folder_id_from_url(FOLDER_1_1_URL)\nFOLDER_1_2_ID = extract_folder_id_from_url(FOLDER_1_2_URL)\nFOLDER_2_ID = extract_folder_id_from_url(FOLDER_2_URL)\nFOLDER_2_1_ID = extract_folder_id_from_url(FOLDER_2_1_URL)\nFOLDER_2_2_ID = extract_folder_id_from_url(FOLDER_2_2_URL)\nFOLDER_3_ID = extract_folder_id_from_url(FOLDER_3_URL)\nSECTIONS_FOLDER_ID = extract_folder_id_from_url(SECTIONS_FOLDER_URL)\nRESTRICTED_ACCESS_FOLDER_ID = \"1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN\"\n\n\n# ============================================================================\n# FOLDER HIERARCHY DEFINITION\n# ============================================================================\n# This defines the expected folder hierarchy for our test Google Drive setup.\n#\n# Folder Hierarchy:\n# shared_drive_1 (0AC_OJ4BkMd4kUk9PVA)\n#   ├── restricted_access_folder (1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN)\n#   └── folder_1 (1d3I7U3vUZMDziF1OQqYRkB8Jp2s_GWUn)\n#       ├── folder_1_1 (1aR33-zwzl_mnRAwH55GgtWTE-4A4yWWI)\n#       └── folder_1_2 (1IO0X55VhvLXf4mdxzHxuKf4wxrDBB6jq)\n#\n# shared_drive_2 (0ABKspIh7P4f4Uk9PVA)\n#   ├── sections_folder (1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33)\n#   └── folder_2 (1lNpCJ1teu8Se0louwL0oOHK9nEalskof)\n#       ├── folder_2_1 (1XeDOMWwxTDiVr9Ig2gKum3Zq_Wivv6zY)\n#       └── folder_2_2 (1RKlsexA8h7NHvBAWRbU27MJotic7KXe3)\n# ============================================================================\n\n\n@dataclass\nclass ExpectedHierarchyNode:\n    \"\"\"Expected hierarchy node for test verification.\"\"\"\n\n    raw_node_id: str\n    display_name: str\n    node_type: HierarchyNodeType\n    # None means parent is the source root (shared drive or my drive)\n    raw_parent_id: str | None = None\n    children: list[\"ExpectedHierarchyNode\"] = field(default_factory=list)\n\n\n# Expected hierarchy for shared_drive_1\nEXPECTED_SHARED_DRIVE_1_HIERARCHY = ExpectedHierarchyNode(\n    raw_node_id=SHARED_DRIVE_1_ID,\n    display_name=\"Shared Drive 1\",\n    node_type=HierarchyNodeType.SHARED_DRIVE,\n    raw_parent_id=None,\n    children=[\n        ExpectedHierarchyNode(\n            raw_node_id=RESTRICTED_ACCESS_FOLDER_ID,\n            display_name=\"restricted_access\",\n            node_type=HierarchyNodeType.FOLDER,\n            raw_parent_id=SHARED_DRIVE_1_ID,\n        ),\n        ExpectedHierarchyNode(\n            raw_node_id=FOLDER_1_ID,\n            display_name=\"folder 1\",\n            node_type=HierarchyNodeType.FOLDER,\n            raw_parent_id=SHARED_DRIVE_1_ID,\n            children=[\n                ExpectedHierarchyNode(\n                    raw_node_id=FOLDER_1_1_ID,\n                    display_name=\"folder 1-1\",\n                    node_type=HierarchyNodeType.FOLDER,\n                    raw_parent_id=FOLDER_1_ID,\n                ),\n                ExpectedHierarchyNode(\n                    raw_node_id=FOLDER_1_2_ID,\n                    display_name=\"folder 1-2\",\n                    node_type=HierarchyNodeType.FOLDER,\n                    raw_parent_id=FOLDER_1_ID,\n                ),\n            ],\n        ),\n    ],\n)\n\n# Expected hierarchy for shared_drive_2\nEXPECTED_SHARED_DRIVE_2_HIERARCHY = ExpectedHierarchyNode(\n    raw_node_id=SHARED_DRIVE_2_ID,\n    display_name=\"Shared Drive 2\",\n    node_type=HierarchyNodeType.SHARED_DRIVE,\n    raw_parent_id=None,\n    children=[\n        ExpectedHierarchyNode(\n            raw_node_id=SECTIONS_FOLDER_ID,\n            display_name=\"sections\",\n            node_type=HierarchyNodeType.FOLDER,\n            raw_parent_id=SHARED_DRIVE_2_ID,\n        ),\n        ExpectedHierarchyNode(\n            raw_node_id=FOLDER_2_ID,\n            display_name=\"folder 2\",\n            node_type=HierarchyNodeType.FOLDER,\n            raw_parent_id=SHARED_DRIVE_2_ID,\n            children=[\n                ExpectedHierarchyNode(\n                    raw_node_id=FOLDER_2_1_ID,\n                    display_name=\"folder 2-1\",\n                    node_type=HierarchyNodeType.FOLDER,\n                    raw_parent_id=FOLDER_2_ID,\n                ),\n                ExpectedHierarchyNode(\n                    raw_node_id=FOLDER_2_2_ID,\n                    display_name=\"folder 2-2\",\n                    node_type=HierarchyNodeType.FOLDER,\n                    raw_parent_id=FOLDER_2_ID,\n                ),\n            ],\n        ),\n    ],\n)\n\n\ndef flatten_hierarchy(\n    expected: ExpectedHierarchyNode,\n) -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Flatten an expected hierarchy tree into a dict keyed by raw_node_id.\"\"\"\n    result = {expected.raw_node_id: expected}\n    for child in expected.children:\n        result.update(flatten_hierarchy(child))\n    return result\n\n\ndef _node(\n    raw_node_id: str,\n    display_name: str,\n    node_type: HierarchyNodeType,\n    raw_parent_id: str | None = None,\n) -> ExpectedHierarchyNode:\n    return ExpectedHierarchyNode(\n        raw_node_id=raw_node_id,\n        display_name=display_name,\n        node_type=node_type,\n        raw_parent_id=raw_parent_id,\n    )\n\n\n# Flattened maps for easy lookup\nEXPECTED_SHARED_DRIVE_1_NODES = flatten_hierarchy(EXPECTED_SHARED_DRIVE_1_HIERARCHY)\nEXPECTED_SHARED_DRIVE_2_NODES = flatten_hierarchy(EXPECTED_SHARED_DRIVE_2_HIERARCHY)\n\nEXTERNAL_SHARED_FOLDER_URL = (\n    \"https://drive.google.com/drive/folders/1sWC7Oi0aQGgifLiMnhTjvkhRWVeDa-XS\"\n)\nEXTERNAL_SHARED_FOLDER_ID = \"1sWC7Oi0aQGgifLiMnhTjvkhRWVeDa-XS\"\nEXTERNAL_SHARED_DOCS_IN_FOLDER = [\n    \"https://docs.google.com/document/d/1Sywmv1-H6ENk2GcgieKou3kQHR_0te1mhIUcq8XlcdY\"\n]\nEXTERNAL_SHARED_DOC_SINGLETON = (\n    \"https://docs.google.com/document/d/11kmisDfdvNcw5LYZbkdPVjTOdj-Uc5ma6Jep68xzeeA\"\n)\n\nSHARED_DRIVE_3_URL = \"https://drive.google.com/drive/folders/0AJYm2K_I_vtNUk9PVA\"\n\nRESTRICTED_ACCESS_FOLDER_URL = (\n    \"https://drive.google.com/drive/folders/1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN\"\n)\n\n# ============================================================================\n# PERMISSION SYNC TEST DRIVES\n# ============================================================================\n# These are separate shared drives used specifically for testing permission sync.\n# Each drive has different access levels:\n#\n# PERM_SYNC_DRIVE_ADMIN_ONLY: Only shared with admin\n# PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A: Shared with admin and test_user_1\n# PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B: Shared with admin and test_user_1\n# ============================================================================\n\nPERM_SYNC_DRIVE_ADMIN_ONLY_URL = (\n    \"https://drive.google.com/drive/folders/0ACOrCU1EMD1hUk9PVA\"\n)\nPERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_URL = (\n    \"https://drive.google.com/drive/folders/0ABec4pV29sMuUk9PVA\"\n)\nPERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_URL = (\n    \"https://drive.google.com/drive/folders/0ANpbToRgjHD4Uk9PVA\"\n)\n\nPERM_SYNC_DRIVE_ADMIN_ONLY_ID = \"0ACOrCU1EMD1hUk9PVA\"\nPERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID = \"0ABec4pV29sMuUk9PVA\"\nPERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID = \"0ANpbToRgjHD4Uk9PVA\"\n\n# ============================================================================\n# ADDITIONAL DRIVES/FOLDERS ACCESSIBLE TO TEST_USER_1\n# ============================================================================\n# These are additional shared drives and folders that test_user_1 has access to.\n# They are returned as hierarchy nodes when running the connector as test_user_1.\n# ============================================================================\n\n# Additional shared drives accessible to test_user_1\nTEST_USER_1_MY_DRIVE_ID = \"0AFpeuWG1VyABUk9PVA\"  # My Drive indicator for test_user_1\nTEST_USER_1_MY_DRIVE_FOLDER_ID = (\n    \"1tF10nDFND-GE_IT0f6PjEn2Du6m2k-DE\"  # Child folder (partial sharing)\n)\n\nTEST_USER_1_DRIVE_B_ID = (\n    \"0AFskk4zfZm86Uk9PVA\"  # My_super_special_shared_drive_suuuper_private\n)\nTEST_USER_1_DRIVE_B_FOLDER_ID = (\n    \"1oIj7nigzvP5xI2F8BmibUA8R_J3AbBA-\"  # Child folder (silliness)\n)\n\n# Other drives test_user_1 has access to\nTEST_USER_1_EXTRA_DRIVE_1_ID = \"0AL67XRMq9reYUk9PVA\"  # Okay_fine_admin_I_will_share\nTEST_USER_1_EXTRA_DRIVE_2_ID = \"0ACeKoHrGKxCbUk9PVA\"  # reee test\nTEST_USER_1_EXTRA_FOLDER_ID = (\n    \"1i2Q1TNvUfZkH-A7RGyAqRuEI-3mHANku\"  # read only no download test\n)\n\n# Additional shared drives in the organization that appear when running include_all tests\nADMIN_MY_DRIVE_ID = \"0ABTZwt798K7MUk9PVA\"  # Admin's My Drive\nTEST_USER_2_MY_DRIVE = \"0ADjBZv2nEvJNUk9PVA\"  # Test user 2's My Drive\nTEST_USER_3_MY_DRIVE_ID = \"0AKl0e4Wr5NW7Uk9PVA\"  # Test user 3's My Drive\nPILL_FOLDER_ID = \"1FWzfA369tx9VT8scJ3LCOPBBuTBgt0OH\"  # contains file with date pills\n\nPADDING_DRIVE_URLS = [\n    \"0AOorXE6AfJRAUk9PVA\",\n    \"0ANn2MSqGi74JUk9PVA\",\n    \"0ANI_NFCPzaRwUk9PVA\",\n    \"0ABu8fYjvA21dUk9PVA\",\n]\n\nADMIN_EMAIL = \"admin@onyx-test.com\"\nTEST_USER_1_EMAIL = \"test_user_1@onyx-test.com\"\nTEST_USER_2_EMAIL = \"test_user_2@onyx-test.com\"\nTEST_USER_3_EMAIL = \"test_user_3@onyx-test.com\"\n\n# Expected permissions for perm sync drives\n# Maps drive ID -> set of user emails with access\nPERM_SYNC_DRIVE_ACCESS_MAPPING: dict[str, set[str]] = {\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID: {ADMIN_EMAIL},\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID: {ADMIN_EMAIL, TEST_USER_1_EMAIL},\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID: {ADMIN_EMAIL, TEST_USER_1_EMAIL},\n}\n\n# ============================================================================\n# NON-SHARED-DRIVE HIERARCHY NODES\n# ============================================================================\n# These cover My Drive roots, perm sync drives, extra shared drives,\n# and standalone folders that appear in various tests.\n# Display names must match what the Google Drive API actually returns.\n# ============================================================================\n\nEXPECTED_FOLDER_3 = _node(\n    FOLDER_3_ID, \"Folder 3\", HierarchyNodeType.FOLDER, ADMIN_MY_DRIVE_ID\n)\n\nEXPECTED_ADMIN_MY_DRIVE = _node(ADMIN_MY_DRIVE_ID, \"My Drive\", HierarchyNodeType.FOLDER)\nEXPECTED_TEST_USER_1_MY_DRIVE = _node(\n    TEST_USER_1_MY_DRIVE_ID, \"My Drive\", HierarchyNodeType.FOLDER\n)\nEXPECTED_TEST_USER_1_MY_DRIVE_FOLDER = _node(\n    TEST_USER_1_MY_DRIVE_FOLDER_ID,\n    \"partial_sharing\",\n    HierarchyNodeType.FOLDER,\n    TEST_USER_1_MY_DRIVE_ID,\n)\nEXPECTED_TEST_USER_2_MY_DRIVE = _node(\n    TEST_USER_2_MY_DRIVE, \"My Drive\", HierarchyNodeType.FOLDER\n)\nEXPECTED_TEST_USER_3_MY_DRIVE = _node(\n    TEST_USER_3_MY_DRIVE_ID, \"My Drive\", HierarchyNodeType.FOLDER\n)\n\nEXPECTED_PERM_SYNC_DRIVE_ADMIN_ONLY = _node(\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n    \"perm_sync_drive_0dc9d8b5-e243-4c2f-8678-2235958f7d7c\",\n    HierarchyNodeType.SHARED_DRIVE,\n)\nEXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A = _node(\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n    \"perm_sync_drive_785db121-0823-4ebe-8689-ad7f52405e32\",\n    HierarchyNodeType.SHARED_DRIVE,\n)\nEXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B = _node(\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n    \"perm_sync_drive_d8dc3649-3f65-4392-b87f-4b20e0389673\",\n    HierarchyNodeType.SHARED_DRIVE,\n)\n\nEXPECTED_TEST_USER_1_DRIVE_B = _node(\n    TEST_USER_1_DRIVE_B_ID,\n    \"My_super_special_shared_drive_suuuper_private\",\n    HierarchyNodeType.SHARED_DRIVE,\n)\nEXPECTED_TEST_USER_1_DRIVE_B_FOLDER = _node(\n    TEST_USER_1_DRIVE_B_FOLDER_ID,\n    \"silliness\",\n    HierarchyNodeType.FOLDER,\n    TEST_USER_1_DRIVE_B_ID,\n)\nEXPECTED_TEST_USER_1_EXTRA_DRIVE_1 = _node(\n    TEST_USER_1_EXTRA_DRIVE_1_ID,\n    \"Okay_Admin_fine_I_will_share\",\n    HierarchyNodeType.SHARED_DRIVE,\n)\nEXPECTED_TEST_USER_1_EXTRA_DRIVE_2 = _node(\n    TEST_USER_1_EXTRA_DRIVE_2_ID, \"reee test\", HierarchyNodeType.SHARED_DRIVE\n)\nEXPECTED_TEST_USER_1_EXTRA_FOLDER = _node(\n    TEST_USER_1_EXTRA_FOLDER_ID,\n    \"read only no download test\",\n    HierarchyNodeType.FOLDER,\n)\n\nEXPECTED_PILL_FOLDER = _node(\n    PILL_FOLDER_ID, \"pill_folder\", HierarchyNodeType.FOLDER, ADMIN_MY_DRIVE_ID\n)\nEXPECTED_EXTERNAL_SHARED_FOLDER = _node(\n    EXTERNAL_SHARED_FOLDER_ID, \"Onyx-test\", HierarchyNodeType.FOLDER\n)\n\n# Comprehensive mapping of ALL known hierarchy nodes.\n# Every retrieved node is checked against this for display_name and node_type.\nALL_EXPECTED_HIERARCHY_NODES: dict[str, ExpectedHierarchyNode] = {\n    **EXPECTED_SHARED_DRIVE_1_NODES,\n    **EXPECTED_SHARED_DRIVE_2_NODES,\n    FOLDER_3_ID: EXPECTED_FOLDER_3,\n    ADMIN_MY_DRIVE_ID: EXPECTED_ADMIN_MY_DRIVE,\n    TEST_USER_1_MY_DRIVE_ID: EXPECTED_TEST_USER_1_MY_DRIVE,\n    TEST_USER_1_MY_DRIVE_FOLDER_ID: EXPECTED_TEST_USER_1_MY_DRIVE_FOLDER,\n    TEST_USER_2_MY_DRIVE: EXPECTED_TEST_USER_2_MY_DRIVE,\n    TEST_USER_3_MY_DRIVE_ID: EXPECTED_TEST_USER_3_MY_DRIVE,\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_ONLY,\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A,\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID: EXPECTED_PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B,\n    TEST_USER_1_DRIVE_B_ID: EXPECTED_TEST_USER_1_DRIVE_B,\n    TEST_USER_1_DRIVE_B_FOLDER_ID: EXPECTED_TEST_USER_1_DRIVE_B_FOLDER,\n    TEST_USER_1_EXTRA_DRIVE_1_ID: EXPECTED_TEST_USER_1_EXTRA_DRIVE_1,\n    TEST_USER_1_EXTRA_DRIVE_2_ID: EXPECTED_TEST_USER_1_EXTRA_DRIVE_2,\n    TEST_USER_1_EXTRA_FOLDER_ID: EXPECTED_TEST_USER_1_EXTRA_FOLDER,\n    PILL_FOLDER_ID: EXPECTED_PILL_FOLDER,\n    EXTERNAL_SHARED_FOLDER_ID: EXPECTED_EXTERNAL_SHARED_FOLDER,\n}\n\n# Dictionary for access permissions\n# All users have access to their own My Drive as well as public files\nACCESS_MAPPING: dict[str, list[int]] = {\n    # Admin has access to everything in shared\n    ADMIN_EMAIL: (\n        ADMIN_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    ),\n    TEST_USER_1_EMAIL: (\n        TEST_USER_1_FILE_IDS\n        # This user has access to drive 1\n        + SHARED_DRIVE_1_FILE_IDS\n        # This user has redundant access to folder 1 because of group access\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        # This user has been given shared access to folder 3 in Admin's My Drive\n        + ADMIN_FOLDER_3_FILE_IDS\n        # This user has been given shared access to files 0 and 1 in Admin's My Drive\n        + list(range(0, 2))\n    ),\n    TEST_USER_2_EMAIL: (\n        TEST_USER_2_FILE_IDS\n        # Group 1 includes this user, giving access to folder 1\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        # This folder is public\n        + FOLDER_1_2_FILE_IDS\n        # Folder 2-1 is shared with this user\n        + FOLDER_2_1_FILE_IDS\n        # This user has been given shared access to files 45 and 46 in folder 2\n        + list(range(45, 47))\n    ),\n    # This user can only see his own files and public files\n    TEST_USER_3_EMAIL: TEST_USER_3_FILE_IDS,\n}\n\nSPECIAL_FILE_ID_TO_CONTENT_MAP: dict[int, str] = {\n    61: (\n        \"Title\\n\"\n        \"This is a Google Doc with sections - \"\n        \"Section 1\\n\"\n        \"Section 1 content - \"\n        \"Sub-Section 1-1\\n\"\n        \"Sub-Section 1-1 content - \"\n        \"Sub-Section 1-2\\n\"\n        \"Sub-Section 1-2 content - \"\n        \"Section 2\\n\"\n        \"Section 2 content\"\n    ),\n}\n\nMISC_SHARED_DRIVE_FNAMES = [\n    \"asdfasdfsfad\",\n    \"perm_sync_doc_0ABec4pV29sMuUk9PVA_a5ea8ec4-0440-4926-a43d-3aeef1c10bdd\",\n    \"perm_sync_doc_0ACOrCU1EMD1hUk9PVA_651821cb-8140-42fe-a876-1a92012375c9\",\n    \"perm_sync_doc_0ACOrCU1EMD1hUk9PVA_ab63b976-effb-49af-84e7-423d17a17dd7\",\n    \"super secret thing that test user 1 can't see\",\n    \"perm_sync_doc_0ABec4pV29sMuUk9PVA_419f2ef0-9815-4c69-8435-98b163c9c156\",\n    \"Untitled documentfsdfsdfsdf\",\n    \"bingle_bongle.txt\",\n    \"bb4.txt\",\n    \"bb3.txt\",\n    \"bb2.txt\",\n]\n\nfile_name_template = \"file_{}.txt\"\nfile_text_template = \"This is file {}\"\n\n# This is done to prevent different tests from interfering with each other\n# So each test type should have its own valid prefix\n_VALID_PREFIX = \"file_\"\n\n\ndef filter_invalid_prefixes(names: set[str]) -> set[str]:\n    return {name for name in names if name.startswith(_VALID_PREFIX)}\n\n\ndef print_discrepancies(\n    expected: set[str],\n    retrieved: set[str],\n) -> None:\n    if expected != retrieved:\n        expected_list = sorted(expected)\n        retrieved_list = sorted(retrieved)\n        print(expected_list)\n        print(retrieved_list)\n        print(\"Extra:\")\n        print(sorted(retrieved - expected))\n        print(\"Missing:\")\n        print(sorted(expected - retrieved))\n\n\ndef _get_expected_file_content(file_id: int) -> str:\n    if file_id in SPECIAL_FILE_ID_TO_CONTENT_MAP:\n        return SPECIAL_FILE_ID_TO_CONTENT_MAP[file_id]\n\n    return file_text_template.format(file_id)\n\n\ndef id_to_name(file_id: int) -> str:\n    return file_name_template.format(file_id)\n\n\ndef assert_expected_docs_in_retrieved_docs(\n    retrieved_docs: list[Document],\n    expected_file_ids: Sequence[int],\n) -> None:\n    \"\"\"NOTE: as far as i can tell this does NOT assert for an exact match.\n    it only checks to see if that the expected file id's are IN the retrieved doc list\n    \"\"\"\n\n    expected_file_names = {id_to_name(file_id) for file_id in expected_file_ids}\n    expected_file_texts = {\n        _get_expected_file_content(file_id) for file_id in expected_file_ids\n    }\n\n    retrieved_docs.sort(key=lambda x: x.semantic_identifier)\n\n    for doc in retrieved_docs:\n        print(f\"retrieved doc: doc.semantic_identifier={doc.semantic_identifier}\")\n\n    # Filter out invalid prefixes to prevent different tests from interfering with each other\n    valid_retrieved_docs = [\n        doc\n        for doc in retrieved_docs\n        if doc.semantic_identifier.startswith(_VALID_PREFIX)\n    ]\n    valid_retrieved_file_names = set(\n        [doc.semantic_identifier for doc in valid_retrieved_docs]\n    )\n    valid_retrieved_texts = set(\n        [\n            \" - \".join(\n                [\n                    section.text\n                    for section in doc.sections\n                    if isinstance(section, TextSection) and section.text is not None\n                ]\n            )\n            for doc in valid_retrieved_docs\n        ]\n    )\n\n    # Check file names\n    print_discrepancies(\n        expected=expected_file_names,\n        retrieved=valid_retrieved_file_names,\n    )\n    assert expected_file_names == valid_retrieved_file_names\n\n    # Check file texts\n    print_discrepancies(\n        expected=expected_file_texts,\n        retrieved=valid_retrieved_texts,\n    )\n    assert expected_file_texts == valid_retrieved_texts\n\n\ndef load_connector_outputs(\n    connector: GoogleDriveConnector,\n    include_permissions: bool = False,\n) -> ConnectorOutput:\n    \"\"\"Load all documents, failures, and hierarchy nodes from the connector.\"\"\"\n    return load_all_from_connector(\n        connector,\n        0,\n        time.time(),\n        include_permissions=include_permissions,\n    )\n\n\ndef assert_hierarchy_nodes_match_expected(\n    retrieved_nodes: list[HierarchyNode],\n    expected_nodes: dict[str, ExpectedHierarchyNode],\n    ignorable_node_ids: set[str] | None = None,\n) -> None:\n    \"\"\"\n    Assert that retrieved hierarchy nodes match expected structure.\n\n    Checks node IDs, display names, node types, and parent relationships\n    for EVERY retrieved node (global checks).\n\n    Args:\n        retrieved_nodes: List of HierarchyNode objects from the connector\n        expected_nodes: Dict mapping raw_node_id -> ExpectedHierarchyNode with\n            expected display_name, node_type, and raw_parent_id\n        ignorable_node_ids: Optional set of node IDs that can be missing or extra\n            without failing. Useful for non-deterministically returned nodes.\n    \"\"\"\n    expected_node_ids = set(expected_nodes.keys())\n    retrieved_node_ids = {node.raw_node_id for node in retrieved_nodes}\n    ignorable = ignorable_node_ids or set()\n\n    missing = expected_node_ids - retrieved_node_ids - ignorable\n    extra = retrieved_node_ids - expected_node_ids - ignorable\n\n    if missing or extra:\n        print(\"Expected hierarchy node IDs:\")\n        print(sorted(expected_node_ids))\n        print(\"Retrieved hierarchy node IDs:\")\n        print(sorted(retrieved_node_ids))\n        print(\"Extra (retrieved but not expected):\")\n        print(sorted(retrieved_node_ids - expected_node_ids))\n        print(\"Missing (expected but not retrieved):\")\n        print(sorted(expected_node_ids - retrieved_node_ids))\n        if ignorable:\n            print(\"Ignorable node IDs:\")\n            print(sorted(ignorable))\n\n    assert (\n        not missing and not extra\n    ), f\"Hierarchy node mismatch. Missing: {missing}, Extra: {extra}\"\n\n    for node in retrieved_nodes:\n        if node.raw_node_id in ignorable and node.raw_node_id not in expected_nodes:\n            continue\n\n        assert (\n            node.raw_node_id in expected_nodes\n        ), f\"Node {node.raw_node_id} ({node.display_name}) not found in expected_nodes\"\n        expected = expected_nodes[node.raw_node_id]\n\n        assert (\n            node.display_name == expected.display_name\n        ), f\"Display name mismatch for node {node.raw_node_id}: expected '{expected.display_name}', got '{node.display_name}'\"\n        assert (\n            node.node_type == expected.node_type\n        ), f\"Node type mismatch for node {node.raw_node_id}: expected '{expected.node_type}', got '{node.node_type}'\"\n        if expected.raw_parent_id is not None:\n            assert node.raw_parent_id == expected.raw_parent_id, (\n                f\"Parent mismatch for node {node.raw_node_id} ({node.display_name}): \"\n                f\"expected parent={expected.raw_parent_id}, got parent={node.raw_parent_id}\"\n            )\n\n\ndef _pick(\n    *node_ids: str,\n) -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Pick nodes from ALL_EXPECTED_HIERARCHY_NODES by their IDs.\"\"\"\n    return {nid: ALL_EXPECTED_HIERARCHY_NODES[nid] for nid in node_ids}\n\n\ndef _clear_parents(\n    nodes: dict[str, ExpectedHierarchyNode],\n    *node_ids: str,\n) -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Return a shallow copy of nodes with the specified nodes' parents set to None.\n    Useful for OAuth tests where the user can't resolve certain parents\n    (e.g. a folder in another user's My Drive).\"\"\"\n    result = dict(nodes)\n    for nid in node_ids:\n        result[nid] = replace(result[nid], raw_parent_id=None)\n    return result\n\n\ndef get_expected_hierarchy_for_shared_drives(\n    include_drive_1: bool = True,\n    include_drive_2: bool = True,\n    include_restricted_folder: bool = True,\n) -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Get expected hierarchy nodes for shared drives.\"\"\"\n    result: dict[str, ExpectedHierarchyNode] = {}\n\n    if include_drive_1:\n        result.update(EXPECTED_SHARED_DRIVE_1_NODES)\n        if not include_restricted_folder:\n            result.pop(RESTRICTED_ACCESS_FOLDER_ID, None)\n\n    if include_drive_2:\n        result.update(EXPECTED_SHARED_DRIVE_2_NODES)\n\n    return result\n\n\ndef get_expected_hierarchy_for_folder_1() -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Get expected hierarchy for folder_1 and its children only.\"\"\"\n    return _pick(FOLDER_1_ID, FOLDER_1_1_ID, FOLDER_1_2_ID)\n\n\ndef get_expected_hierarchy_for_folder_2() -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"Get expected hierarchy for folder_2 and its children only.\"\"\"\n    return _pick(FOLDER_2_ID, FOLDER_2_1_ID, FOLDER_2_2_ID)\n\n\ndef get_expected_hierarchy_for_test_user_1() -> dict[str, ExpectedHierarchyNode]:\n    \"\"\"\n    Get expected hierarchy for test_user_1's full access (OAuth).\n\n    test_user_1 has access to:\n    - shared_drive_1 and its contents (folder_1, folder_1_1, folder_1_2)\n    - folder_3 (shared from admin's My Drive)\n    - PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A and PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B\n    - Additional drives/folders the user has access to\n\n    NOTE: Folder 3 lives in the admin's My Drive. When running as an OAuth\n    connector for test_user_1, the Google Drive API won't return the parent\n    for Folder 3 because the user can't access the admin's My Drive root.\n    \"\"\"\n    result = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=False,\n        include_restricted_folder=False,\n    )\n    result.update(\n        _pick(\n            FOLDER_3_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_MY_DRIVE_ID,\n            TEST_USER_1_MY_DRIVE_FOLDER_ID,\n            TEST_USER_1_DRIVE_B_ID,\n            TEST_USER_1_DRIVE_B_FOLDER_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            TEST_USER_1_EXTRA_FOLDER_ID,\n        )\n    )\n    return _clear_parents(result, FOLDER_3_ID)\n\n\ndef get_expected_hierarchy_for_test_user_1_shared_drives_only() -> (\n    dict[str, ExpectedHierarchyNode]\n):\n    \"\"\"Expected hierarchy nodes when test_user_1 runs with include_shared_drives=True only.\"\"\"\n    result = get_expected_hierarchy_for_test_user_1()\n    for nid in (\n        TEST_USER_1_MY_DRIVE_ID,\n        TEST_USER_1_MY_DRIVE_FOLDER_ID,\n        FOLDER_3_ID,\n        TEST_USER_1_EXTRA_FOLDER_ID,\n    ):\n        result.pop(nid, None)\n    return result\n\n\ndef get_expected_hierarchy_for_test_user_1_shared_with_me_only() -> (\n    dict[str, ExpectedHierarchyNode]\n):\n    \"\"\"Expected hierarchy nodes when test_user_1 runs with include_files_shared_with_me=True only.\"\"\"\n    return _clear_parents(\n        _pick(FOLDER_3_ID, TEST_USER_1_EXTRA_FOLDER_ID),\n        FOLDER_3_ID,\n    )\n\n\ndef get_expected_hierarchy_for_test_user_1_my_drive_only() -> (\n    dict[str, ExpectedHierarchyNode]\n):\n    \"\"\"Expected hierarchy nodes when test_user_1 runs with include_my_drives=True only.\"\"\"\n    return _pick(TEST_USER_1_MY_DRIVE_ID, TEST_USER_1_MY_DRIVE_FOLDER_ID)\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/drive_id_mapping.json",
    "content": "{\n  \"12\": \"https://drive.google.com/file/d/1u7nynrG4WuFZeuZs8yyhqJF_lbo-op-m\",\n  \"10\": \"https://drive.google.com/file/d/1LFcVuXuXIdNJ7hkL0C40eYn_cQtryUVQ\",\n  \"13\": \"https://drive.google.com/file/d/1muQMyYAJe0_F-HiDFIfFMt-4qsgMlREM\",\n  \"11\": \"https://drive.google.com/file/d/1oHNtlsdJJtk7dE10NgH83Kn5_f2L-Su1\",\n  \"14\": \"https://drive.google.com/file/d/1sAw-DrsqpnqLF5A8P59BZwIpt9-LrlaL\",\n  \"18\": \"https://drive.google.com/file/d/1qqKH3esasdqV6ryEhdoSQezDPlKj11At\",\n  \"17\": \"https://drive.google.com/file/d/1z08VsrCUTozpc5Quzb7mEDUwNkXU3foT\",\n  \"15\": \"https://drive.google.com/file/d/1QQ6ZGyYP49IJNeGKNmqZISyVLzTOtK4v\",\n  \"19\": \"https://drive.google.com/file/d/172as_pb7E15bXUd63mIIBRotk_tT7h56\",\n  \"16\": \"https://drive.google.com/file/d/1552S6HEjJ81q8JXr46BtixQiVq9xlW_I\",\n  \"5\": \"https://drive.google.com/file/d/1sv9epxLcNlgM6C-oPDeD_heFw7AIZMgp\",\n  \"7\": \"https://drive.google.com/file/d/1S_S0LpQW90EUPPPjJX4jfu5p9gOQjiQF\",\n  \"9\": \"https://drive.google.com/file/d/1wH2dBrWzmiGJ88ySHWu6srb7Jsj7qYbA\",\n  \"8\": \"https://drive.google.com/file/d/14URUm6RKSZziH1lUtT6gs-xnCTWkXpSn\",\n  \"6\": \"https://drive.google.com/file/d/1LBKBuTMRSss-kVw8ut3rMk51wSbTM95j\",\n  \"3\": \"https://drive.google.com/file/d/1nNazkPrkuRXHFOl8gdA68pU2g8cy-h6n\",\n  \"2\": \"https://drive.google.com/file/d/1miG_QpqXe2QIMApcrlNzaB6fsXW5WMFX\",\n  \"4\": \"https://drive.google.com/file/d/1o-i8can6ciL1XXzy2pVUPHZEXEjBJi6C\",\n  \"0\": \"https://drive.google.com/file/d/1d3Y59Sns8I0FIW9CtOAjVVLE2MEe_3nP\",\n  \"1\": \"https://drive.google.com/file/d/1ipSqxJajs_NkfSKFxgltIMNc0ffdt-NX\",\n  \"68\": \"https://drive.google.com/file/d/1rCBZsbhQ-ULWGztiKB0JYhFth9EChiSZ\",\n  \"66\": \"https://drive.google.com/file/d/1WVAlbWcu9-Braa0aG6w3cShrY5dbIYcY\",\n  \"67\": \"https://drive.google.com/file/d/1p44poOCdNLnVYMxTL9b3h-BXsOQ2RDgM\",\n  \"69\": \"https://drive.google.com/file/d/1HFYsaqC14aE-EaobQdwkw0FOlAYMYqkV\",\n  \"65\": \"https://drive.google.com/file/d/1RyE07CpTIDYMO3b-atwjWH6ZHFDjyoCl\",\n  \"32\": \"https://drive.google.com/file/d/17egJ5W-0bvS2akLBqvxylTIViN0d9nG7\",\n  \"28\": \"https://drive.google.com/file/d/1HNqSM2XGqgHnyNYT5wp8hyski18HMcfO\",\n  \"37\": \"https://drive.google.com/file/d/16Tdu3gveWkFL0VBUzYSzKxFO4ffv-8h7\",\n  \"30\": \"https://drive.google.com/file/d/1uj69jGyYnNOXXqKmLNIp-4KKrVC1qaPy\",\n  \"25\": \"https://drive.google.com/file/d/1bw6NFlR4ZxOV6reQK1Oqeq_UaYFVpNV6\",\n  \"33\": \"https://drive.google.com/file/d/1FkmXBkt__lOFXg_uhxLI0QIuxWbIGySL\",\n  \"20\": \"https://drive.google.com/file/d/1r77uBVOHkuiDQFa9iz9FU8QbfjImOAjF\",\n  \"24\": \"https://drive.google.com/file/d/1kwLrdhTgCdjNrOcSwRI14K3gXnS48xne\",\n  \"39\": \"https://drive.google.com/file/d/1V3av9F47t44Nf3jcO12U6OIsjsX-B7L1\",\n  \"29\": \"https://drive.google.com/file/d/172dCAUNaaoZX0RHqEi7Ev12eV930LtTa\",\n  \"31\": \"https://drive.google.com/file/d/17zzfgMSWBVebWGnpSHKd6g1LFN4vn-YP\",\n  \"38\": \"https://drive.google.com/file/d/1xOQvIBlBJ2swTGp78WkCZJUQ-d1F8pVu\",\n  \"23\": \"https://drive.google.com/file/d/1X89y_CoTWWjh3BWq0ZgeGydCvg3gMZeJ\",\n  \"34\": \"https://drive.google.com/file/d/1VNDhcbA_-Ckjp084hKyl9bwP4E3l9K_2\",\n  \"47\": \"https://drive.google.com/file/d/1O8E7haA8WcJIma0iKcvebd4_dlC5Zr7S\",\n  \"52\": \"https://drive.google.com/file/d/1o-ateliXHj4TyugOxb9zYYXwrkhFl4FX\",\n  \"27\": \"https://drive.google.com/file/d/1aZ1CwNVWJt_OtIBVO-9zv1UUqXTDlM1F\",\n  \"26\": \"https://drive.google.com/file/d/1qegrc27hYeECs0KexnEuuG0WQm-8Y9oZ\",\n  \"59\": \"https://drive.google.com/file/d/1L9oWKHMTjQreGW_k8rNy7kBQ7c0FuXFm\",\n  \"35\": \"https://drive.google.com/file/d/1NewjF092B9KKDBs-dpnZ9dzVl2GAs2LW\",\n  \"49\": \"https://drive.google.com/file/d/1TsUrBlr2nxJtH122nKQ_GzdMc0DFFERB\",\n  \"41\": \"https://drive.google.com/file/d/1gc2Vo3HZF-Bm_WhZ0zyFedWNfVL2BEol\",\n  \"22\": \"https://drive.google.com/file/d/1iPfQeganYriuqHO2e5npUPeuX5VIbhG3\",\n  \"36\": \"https://drive.google.com/file/d/1KyNoHRTfGMNR15dCRpcVW74l2z-wVm0V\",\n  \"44\": \"https://drive.google.com/file/d/1PDuxwmrD20s54FHQIhXn3ucdFmXSX5kS\",\n  \"21\": \"https://drive.google.com/file/d/1ZwO5cCfBJgGpZTIpoi8p2js8zuHT_qxe\",\n  \"53\": \"https://drive.google.com/file/d/140NZAuAOoiqrNVqWmF4TPNv6njd_guwE\",\n  \"50\": \"https://drive.google.com/file/d/1MBmy7nQi7pMwwIPZHJjB_iuQeO07QWsN\",\n  \"54\": \"https://drive.google.com/file/d/1TtIJ-ULYWyv0yUvUVdfTPuBNlBt_j1Yd\",\n  \"57\": \"https://drive.google.com/file/d/19V5d3NcR029AhGiRibk2nlTmFNCVGBgO\",\n  \"43\": \"https://drive.google.com/file/d/1kLChcxIWZS_kHLEHThLcm7ekcgwYP0jF\",\n  \"42\": \"https://drive.google.com/file/d/1HKW3C1B5vFYUuXmFieMKYAfq4CwtnEZ_\",\n  \"48\": \"https://drive.google.com/file/d/1EJGd47XpWZDXJKWU0CGp84Hm7K47GNVt\",\n  \"40\": \"https://drive.google.com/file/d/1Fr4dVKdOvth_O-Td8PTwgNGzZz8ridAl\",\n  \"58\": \"https://drive.google.com/file/d/1lUFpiwE7ISzLbowHvCtEUj4sfG4w0Gst\",\n  \"51\": \"https://drive.google.com/file/d/1V6fOoKgA8QSTJYWPP5GVHz8WFAQIRLNB\",\n  \"45\": \"https://drive.google.com/file/d/1hSrPOwyxFEth4GWWN1e4BjBftmnKa8px\",\n  \"46\": \"https://drive.google.com/file/d/1jCynzDt1r0EISpwcrFuk3RlKWHM9u7Mj\",\n  \"55\": \"https://drive.google.com/file/d/1Db01f4I_Xn8Bs9piQgZU59ZWAeC2MaQm\",\n  \"56\": \"https://drive.google.com/file/d/1NxVfwIxm6FVVR1XnxQNMWWbQEVX66cQm\",\n  \"61\": \"https://docs.google.com/document/d/1eAaZJAqjXMZ2VvG_r04EGtn6EGcYycofdNUkDHEA8vY\"\n}"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_admin_oauth.py",
    "content": "from collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom tests.daily.connectors.google_drive.consts_and_utils import _pick\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_expected_docs_in_retrieved_docs,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_hierarchy_nodes_match_expected,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_shared_drives,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import PILL_FOLDER_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    RESTRICTED_ACCESS_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_1_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_2_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_FOLDER_ID,\n)\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_all(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_all\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        my_drive_emails=None,\n        shared_drive_urls=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        ADMIN_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.update(\n        _pick(\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            ADMIN_MY_DRIVE_ID,\n            PILL_FOLDER_ID,\n            RESTRICTED_ACCESS_FOLDER_ID,\n            TEST_USER_1_EXTRA_FOLDER_ID,\n            FOLDER_3_ID,\n        )\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_shared_drives_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_shared_drives_only\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        my_drive_emails=None,\n        shared_drive_urls=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.update(\n        _pick(\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            RESTRICTED_ACCESS_FOLDER_ID,\n        )\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_my_drives_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_my_drives_only\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        my_drive_emails=None,\n        shared_drive_urls=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = ADMIN_FILE_IDS + ADMIN_FOLDER_3_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = _pick(\n        FOLDER_3_ID,\n        ADMIN_MY_DRIVE_ID,\n        PILL_FOLDER_ID,\n        TEST_USER_1_EXTRA_FOLDER_ID,\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_drive_one_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_drive_one_only\")\n    drive_urls = [SHARED_DRIVE_1_URL]\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        my_drive_emails=None,\n        shared_drive_urls=\",\".join([str(url) for url in drive_urls]),\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=False,\n        include_restricted_folder=False,\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_folder_and_shared_drive(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_folder_and_shared_drive\")\n    drive_urls = [SHARED_DRIVE_1_URL]\n    folder_urls = [FOLDER_2_URL]\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        my_drive_emails=None,\n        shared_drive_urls=\",\".join([str(url) for url in drive_urls]),\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.pop(SECTIONS_FOLDER_ID, None)\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_folders_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_folders_only\")\n    folder_urls = [\n        FOLDER_1_2_URL,\n        FOLDER_2_1_URL,\n        FOLDER_2_2_URL,\n        FOLDER_3_URL,\n    ]\n    shared_drive_urls = [\n        FOLDER_1_1_URL,\n    ]\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        my_drive_emails=None,\n        shared_drive_urls=\",\".join([str(url) for url in shared_drive_urls]),\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.pop(SECTIONS_FOLDER_ID, None)\n    expected_nodes.update(_pick(ADMIN_MY_DRIVE_ID, FOLDER_3_ID))\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_personal_folders_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_personal_folders_only\")\n    folder_urls = [\n        FOLDER_3_URL,\n    ]\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        my_drive_emails=None,\n        shared_drive_urls=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = _pick(FOLDER_3_ID, ADMIN_MY_DRIVE_ID)\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_drive_perm_sync.py",
    "content": "import copy\nimport json\nimport os\nfrom collections import defaultdict\nfrom collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync\nfrom ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import SortOrder\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom tests.daily.connectors.google_drive.consts_and_utils import _pick\nfrom tests.daily.connectors.google_drive.consts_and_utils import ACCESS_MAPPING\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_hierarchy_nodes_match_expected,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    EXTERNAL_SHARED_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    FOLDER_3_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_shared_drives,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ACCESS_MAPPING,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PILL_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    RESTRICTED_ACCESS_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_DRIVE_B_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_DRIVE_B_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_1_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_2_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_MY_DRIVE_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_MY_DRIVE_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_2_MY_DRIVE,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_3_MY_DRIVE_ID,\n)\n\n\ndef _build_connector(\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> GoogleDriveConnector:\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    # don't need this anymore, it's been called in the factory\n    connector.load_credentials = MagicMock()  # type: ignore\n    return connector\n\n\ndef test_gdrive_perm_sync_with_real_data(\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n    enable_ee: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test gdrive_doc_sync and gdrive_group_sync with real data from the test drive.\n\n    This test uses the real connector to make actual API calls to Google Drive\n    and verifies the permission structure returned.\n    \"\"\"\n    # Create a mock cc_pair that will use our real connector\n    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)\n    mock_cc_pair.connector = MagicMock()\n    mock_cc_pair.connector.connector_specific_config = {}\n    mock_cc_pair.credential_id = 1\n    # Import and use the mock helper\n    from onyx.utils.sensitive import make_mock_sensitive_value\n\n    mock_cc_pair.credential.credential_json = make_mock_sensitive_value({})\n    mock_cc_pair.last_time_perm_sync = None\n    mock_cc_pair.last_time_external_group_sync = None\n\n    # Create a mock heartbeat\n    mock_heartbeat = MagicMock(spec=IndexingHeartbeatInterface)\n    mock_heartbeat.should_stop.return_value = False\n\n    # Load drive_id_mapping.json\n    with open(\n        os.path.join(os.path.dirname(__file__), \"drive_id_mapping.json\"), \"r\"\n    ) as f:\n        drive_id_mapping = json.load(f)\n\n    # Invert the mapping to get URL -> ID\n    url_to_id_mapping = {url: int(id) for id, url in drive_id_mapping.items()}\n\n    # Use the connector directly without mocking Google Drive API calls\n    with patch(\n        \"ee.onyx.external_permissions.google_drive.doc_sync.GoogleDriveConnector\",\n        return_value=_build_connector(google_drive_service_acct_connector_factory),\n    ):\n        # Call the function under test\n        def mock_fetch_all_docs_fn(\n            sort_order: SortOrder | None = None,  # noqa: ARG001\n        ) -> list[DocumentRow]:\n            return []\n\n        def mock_fetch_all_docs_ids_fn() -> list[str]:\n            return []\n\n        doc_access_generator = gdrive_doc_sync(\n            mock_cc_pair,\n            mock_fetch_all_docs_fn,\n            mock_fetch_all_docs_ids_fn,\n            mock_heartbeat,\n        )\n        doc_access_list = list(doc_access_generator)\n\n    # Verify we got some results\n    assert len(doc_access_list) > 0\n    print(f\"Found {len(doc_access_list)} documents with permissions\")\n\n    # create new connector\n    with patch(\n        \"ee.onyx.external_permissions.google_drive.group_sync.GoogleDriveConnector\",\n        return_value=_build_connector(google_drive_service_acct_connector_factory),\n    ):\n        external_user_group_generator = gdrive_group_sync(\"test_tenant\", mock_cc_pair)\n        external_user_groups = list(external_user_group_generator)\n\n    # map group ids to emails\n    group_id_to_email_mapping: dict[str, set[str]] = defaultdict(set)\n    groups_with_anyone_access: set[str] = set()\n    for group in external_user_groups:\n        for email in group.user_emails:\n            group_id_to_email_mapping[group.id].add(email)\n\n        if group.gives_anyone_access:\n            groups_with_anyone_access.add(group.id)\n\n    # Map documents to their permissions (flattening groups)\n    doc_to_email_mapping: dict[str, set[str]] = {}\n    doc_to_raw_result_mapping: dict[str, set[str]] = {}\n    public_doc_ids: set[str] = set()\n\n    for doc_access in doc_access_list:\n        if not isinstance(doc_access, DocExternalAccess):\n            continue\n        doc_id = doc_access.doc_id\n        # make sure they are new sets to avoid mutating the original\n        doc_to_email_mapping[doc_id] = copy.deepcopy(\n            doc_access.external_access.external_user_emails\n        )\n        doc_to_raw_result_mapping[doc_id] = copy.deepcopy(\n            doc_access.external_access.external_user_emails\n        )\n\n        for group_id in doc_access.external_access.external_user_group_ids:\n            doc_to_email_mapping[doc_id].update(group_id_to_email_mapping[group_id])\n            doc_to_raw_result_mapping[doc_id].add(group_id)\n\n        if doc_access.external_access.is_public:\n            public_doc_ids.add(doc_id)\n\n        if any(\n            group_id in groups_with_anyone_access\n            for group_id in doc_access.external_access.external_user_group_ids\n        ):\n            public_doc_ids.add(doc_id)\n\n    # Check permissions based on drive_id_mapping.json and ACCESS_MAPPING\n    # For each document URL that exists in our mapping\n    checked_files = 0\n    for doc_id, emails_with_access in doc_to_email_mapping.items():\n        # Skip URLs that aren't in our mapping, we don't want new stuff to interfere\n        # with the test.\n        if doc_id not in url_to_id_mapping:\n            continue\n\n        file_numeric_id = url_to_id_mapping.get(doc_id)\n        if file_numeric_id is None:\n            raise ValueError(f\"File {doc_id} not found in drive_id_mapping.json\")\n\n        checked_files += 1\n\n        # Check which users should have access to this file according to ACCESS_MAPPING\n        expected_users = set()\n        for user_email, file_ids in ACCESS_MAPPING.items():\n            if file_numeric_id in file_ids:\n                expected_users.add(user_email)\n\n        # Verify the permissions match\n        if file_numeric_id in PUBLIC_RANGE:\n            assert (\n                doc_id in public_doc_ids\n            ), f\"File {doc_id} (ID: {file_numeric_id}) should be public but is not in the public_doc_ids set\"\n        else:\n            assert expected_users == emails_with_access, (\n                f\"File {doc_id} (ID: {file_numeric_id}) should be accessible to users {expected_users} \"\n                f\"but is accessible to {emails_with_access}. Raw result: {doc_to_raw_result_mapping[doc_id]} \"\n            )\n\n    # Verify that we checked every file in ACCESS_MAPPING\n    all_expected_files = set()\n    for file_ids in ACCESS_MAPPING.values():\n        all_expected_files.update(file_ids)\n\n    checked_file_ids = {\n        url_to_id_mapping[doc_id]\n        for doc_id in doc_to_email_mapping\n        if doc_id in url_to_id_mapping\n    }\n\n    assert all_expected_files == checked_file_ids, (\n        f\"Not all expected files were checked. \"\n        f\"Missing files: {all_expected_files - checked_file_ids}, \"\n        f\"Extra files checked: {checked_file_ids - all_expected_files}\"\n    )\n\n    print(f\"Checked permissions for {checked_files} files from drive_id_mapping.json\")\n\n    # Verify hierarchy nodes are returned with correct structure\n    # Use include_permissions=True to populate external_access on hierarchy nodes\n    hierarchy_connector = _build_connector(google_drive_service_acct_connector_factory)\n    output = load_connector_outputs(hierarchy_connector, include_permissions=True)\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.update(\n        _pick(\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_MY_DRIVE_ID,\n            TEST_USER_1_MY_DRIVE_FOLDER_ID,\n            TEST_USER_1_DRIVE_B_ID,\n            TEST_USER_1_DRIVE_B_FOLDER_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            ADMIN_MY_DRIVE_ID,\n            TEST_USER_2_MY_DRIVE,\n            TEST_USER_3_MY_DRIVE_ID,\n            PILL_FOLDER_ID,\n            RESTRICTED_ACCESS_FOLDER_ID,\n            TEST_USER_1_EXTRA_FOLDER_ID,\n            EXTERNAL_SHARED_FOLDER_ID,\n            FOLDER_3_ID,\n        )\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n    # Verify the perm sync drives are included in the hierarchy\n    # These drives should have external_access set on their hierarchy nodes\n    perm_sync_drive_nodes = [\n        node\n        for node in output.hierarchy_nodes\n        if node.raw_node_id\n        in {\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n        }\n    ]\n\n    # Verify permissions on perm sync drive hierarchy nodes\n    for node in perm_sync_drive_nodes:\n        assert (\n            node.external_access is not None\n        ), f\"Hierarchy node {node.raw_node_id} has no external access\"\n        expected_emails = PERM_SYNC_DRIVE_ACCESS_MAPPING.get(node.raw_node_id, set())\n        actual_emails = node.external_access.external_user_emails\n        assert actual_emails == expected_emails, (\n            f\"Permission mismatch for perm sync drive {node.raw_node_id} ({node.display_name}): \"\n            f\"expected {expected_emails}, got {actual_emails}\"\n        )\n\n    print(f\"Verified {len(output.hierarchy_nodes)} hierarchy nodes\")\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_link_visibility_filter.py",
    "content": "from collections.abc import Iterable\nfrom typing import Any\nfrom unittest.mock import patch\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.google_drive.file_retrieval import has_link_only_permission\nfrom onyx.connectors.google_drive.models import DriveRetrievalStage\nfrom onyx.connectors.google_drive.models import RetrievedDriveFile\n\n\ndef _stub_run_functions(\n    func_with_args: Iterable[tuple],\n    max_workers: int = 8,  # noqa: ARG001\n) -> list[Any]:\n    return [func(*args) for func, args in func_with_args]\n\n\ndef _build_retrieved_file(\n    permissions: list[dict[str, Any]],\n) -> RetrievedDriveFile:\n    return RetrievedDriveFile(\n        completion_stage=DriveRetrievalStage.OAUTH_FILES,\n        drive_file={\n            \"id\": \"file-id\",\n            \"name\": \"Test File\",\n            \"permissions\": permissions,\n        },\n        user_email=\"user@example.com\",\n    )\n\n\ndef _prepare_connector(exclude: bool) -> GoogleDriveConnector:\n    connector = GoogleDriveConnector(\n        include_shared_drives=True,\n        exclude_domain_link_only=exclude,\n    )\n    connector._creds = object()  # type: ignore[assignment]\n    connector._primary_admin_email = \"admin@example.com\"\n    return connector\n\n\ndef test_has_link_only_permission_detects_domain_link() -> None:\n    file = {\n        \"permissions\": [\n            {\"type\": \"domain\", \"allowFileDiscovery\": False},\n            {\"type\": \"user\", \"emailAddress\": \"user@example.com\"},\n        ]\n    }\n    assert has_link_only_permission(file) is True\n\n\ndef test_has_link_only_permission_detects_anyone_link() -> None:\n    file = {\n        \"permissions\": [\n            {\"type\": \"anyone\", \"allowFileDiscovery\": False},\n        ]\n    }\n    assert has_link_only_permission(file) is True\n\n\ndef test_has_link_only_permission_ignores_other_permissions() -> None:\n    file = {\n        \"permissions\": [\n            {\"type\": \"domain\", \"allowFileDiscovery\": True},\n            {\"type\": \"user\", \"emailAddress\": \"user@example.com\"},\n        ]\n    }\n    assert has_link_only_permission(file) is False\n\n\ndef test_connector_skips_link_only_files_when_enabled() -> None:\n    connector = _prepare_connector(exclude=True)\n    retrieved_file = _build_retrieved_file(\n        [{\"type\": \"domain\", \"allowFileDiscovery\": False}]\n    )\n\n    with (\n        patch(\n            \"onyx.connectors.google_drive.connector.run_functions_tuples_in_parallel\",\n            side_effect=_stub_run_functions,\n        ),\n        patch(\n            \"onyx.connectors.google_drive.connector.convert_drive_item_to_document\"\n        ) as convert_mock,\n        patch(\n            \"onyx.connectors.google_drive.connector.GoogleDriveConnector._get_new_ancestors_for_files\"\n        ) as get_new_ancestors_mock,\n    ):\n        convert_mock.return_value = \"doc\"\n        checkpoint = connector.build_dummy_checkpoint()\n        results = list(\n            connector._convert_retrieved_files_to_documents(\n                drive_files_iter=iter([retrieved_file]),\n                checkpoint=checkpoint,\n                include_permissions=False,\n            )\n        )\n\n    assert results == []\n    convert_mock.assert_not_called()\n    get_new_ancestors_mock.assert_called_once()\n\n\ndef test_connector_processes_files_when_option_disabled() -> None:\n    connector = _prepare_connector(exclude=False)\n    retrieved_file = _build_retrieved_file(\n        [{\"type\": \"domain\", \"allowFileDiscovery\": False}]\n    )\n\n    with (\n        patch(\n            \"onyx.connectors.google_drive.connector.run_functions_tuples_in_parallel\",\n            side_effect=_stub_run_functions,\n        ),\n        patch(\n            \"onyx.connectors.google_drive.connector.convert_drive_item_to_document\"\n        ) as convert_mock,\n        patch(\n            \"onyx.connectors.google_drive.connector.GoogleDriveConnector._get_new_ancestors_for_files\"\n        ) as get_new_ancestors_mock,\n    ):\n        convert_mock.return_value = \"doc\"\n        checkpoint = connector.build_dummy_checkpoint()\n        results = list(\n            connector._convert_retrieved_files_to_documents(\n                drive_files_iter=iter([retrieved_file]),\n                checkpoint=checkpoint,\n                include_permissions=False,\n            )\n        )\n\n    assert len(results) == 1\n    convert_mock.assert_called_once()\n    get_new_ancestors_mock.assert_called_once()\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_map_test_ids.py",
    "content": "#!/usr/bin/env python\n\nimport json\nimport os\n\nimport pytest\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom tests.daily.connectors.google_drive.conftest import get_credentials_from_env\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import file_name_template\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS\n\n\ndef generate_test_id_to_drive_id_mapping() -> dict[int, str]:\n    \"\"\"\n    Generate a mapping from test file IDs to actual Google Drive file IDs.\n\n    This is useful for writing tests that need to verify specific files\n    are accessible to specific users.\n\n    Returns:\n        dict: Mapping from test file ID (int) to Google Drive file ID (str)\n    \"\"\"\n    # Set up the connector with real credentials\n    connector = GoogleDriveConnector(\n        include_shared_drives=True,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n    )\n\n    # Load credentials\n    connector.load_credentials(get_credentials_from_env(email=ADMIN_EMAIL, oauth=False))\n\n    # Get all documents from the connector\n    docs = load_connector_outputs(connector).documents\n\n    # Create a mapping from test file ID to actual Drive file ID\n    test_id_to_drive_id = {}\n\n    # Process all documents retrieved from Drive\n    for doc in docs:\n        # Check if this document's name matches our test file naming pattern (file_X.txt)\n        if not doc.semantic_identifier.startswith(\n            file_name_template.format(\"\").split(\"_\")[0]\n        ):\n            continue\n\n        try:\n            # Extract the test file ID from the filename (file_X.txt -> X)\n            file_id_str = doc.semantic_identifier.split(\"_\")[1].split(\".\")[0]\n            test_file_id = int(file_id_str)\n\n            # Store the mapping from test ID to actual Drive ID\n            # Extract Drive ID from document URL\n            test_id_to_drive_id[test_file_id] = doc.id\n        except (ValueError, IndexError):\n            # Skip files that don't follow our naming convention\n            continue\n\n    # Print the mapping for all defined test file ID ranges\n    all_test_ranges = {\n        \"ADMIN_FILE_IDS\": ADMIN_FILE_IDS,\n        \"TEST_USER_1_FILE_IDS\": TEST_USER_1_FILE_IDS,\n        \"TEST_USER_2_FILE_IDS\": TEST_USER_2_FILE_IDS,\n        \"TEST_USER_3_FILE_IDS\": TEST_USER_3_FILE_IDS,\n        \"SHARED_DRIVE_1_FILE_IDS\": SHARED_DRIVE_1_FILE_IDS,\n        \"SHARED_DRIVE_2_FILE_IDS\": SHARED_DRIVE_2_FILE_IDS,\n        \"FOLDER_1_FILE_IDS\": FOLDER_1_FILE_IDS,\n        \"FOLDER_1_1_FILE_IDS\": FOLDER_1_1_FILE_IDS,\n        \"FOLDER_1_2_FILE_IDS\": FOLDER_1_2_FILE_IDS,\n        \"FOLDER_2_FILE_IDS\": FOLDER_2_FILE_IDS,\n        \"FOLDER_2_1_FILE_IDS\": FOLDER_2_1_FILE_IDS,\n        \"FOLDER_2_2_FILE_IDS\": FOLDER_2_2_FILE_IDS,\n        \"FOLDER_3_FILE_IDS\": FOLDER_3_FILE_IDS,\n    }\n\n    # Print the mapping for each test range\n    for range_name, file_ids in all_test_ranges.items():\n        print(f\"\\n{range_name}:\")\n        for test_id in file_ids:\n            drive_id = test_id_to_drive_id.get(test_id, \"NOT_FOUND\")\n            print(f\"  {test_id} -> {drive_id}\")\n\n    return test_id_to_drive_id\n\n\n@pytest.mark.skipif(\n    not os.getenv(\"RUN_MANUAL_TESTS\"),\n    reason=\"This test maps test IDs to actual Google Drive IDs. Set RUN_MANUAL_TESTS=1 to run.\",\n)\ndef test_generate_drive_id_mapping() -> None:\n    \"\"\"Test to generate mapping from test IDs to actual Google Drive IDs.\n\n    This test is skipped by default as it requires real Google Drive credentials\n    and is primarily used to generate mappings for other tests.\n\n    Run with:\n\n    RUN_MANUAL_TESTS=true pytest -xvs tests/daily/connectors/google_drive/test_map_test_ids.py::test_generate_drive_id_mapping\n    \"\"\"\n    mapping = generate_test_id_to_drive_id_mapping()\n    assert mapping, \"Failed to generate any test ID to drive ID mappings\"\n\n    # Write the mapping to a JSON file\n    output_dir = os.path.dirname(os.path.abspath(__file__))\n    mapping_file = os.path.join(output_dir, \"drive_id_mapping.json\")\n\n    # Convert int keys to strings for JSON compatibility\n    json_mapping = {str(k): v for k, v in mapping.items()}\n\n    # Write the mapping to a JSON file\n    with open(mapping_file, \"w\") as f:\n        json.dump(json_mapping, f, indent=2)\n\n    print(f\"\\nMapping written to: {mapping_file}\")\n    raise RuntimeError(\"Mapping written to file, test complete\")\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_sections.py",
    "content": "from collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_URL\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_google_drive_sections(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    oauth_connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=SECTIONS_FOLDER_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    service_acct_connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=SECTIONS_FOLDER_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    for connector in [oauth_connector, service_acct_connector]:\n        output = load_connector_outputs(connector)\n        retrieved_docs = output.documents\n\n        # Verify we got the 1 doc with sections\n        assert len(retrieved_docs) == 1\n\n        # Verify each section has the expected structure\n        doc = retrieved_docs[0]\n        assert len(doc.sections) == 5\n\n        header_section = doc.sections[0]\n        assert header_section.text == \"Title\\nThis is a Google Doc with sections\"\n        assert header_section.link is not None\n        assert header_section.link.endswith(\n            \"?tab=t.0#heading=h.hfjc17k6qwzt\"\n        ) or header_section.link.endswith(\"?tab=t.0#heading=h.hfjc17k6qwzt\")\n\n        section_1 = doc.sections[1]\n        assert section_1.text == \"Section 1\\nSection 1 content\"\n        assert section_1.link is not None\n        assert section_1.link.endswith(\"?tab=t.0#heading=h.8slfx752a3g5\")\n\n        section_2 = doc.sections[2]\n        assert section_2.text == \"Sub-Section 1-1\\nSub-Section 1-1 content\"\n        assert section_2.link is not None\n        assert section_2.link.endswith(\"?tab=t.0#heading=h.4kj3ayade1bp\")\n\n        section_3 = doc.sections[3]\n        assert section_3.text == \"Sub-Section 1-2\\nSub-Section 1-2 content\"\n        assert section_3.link is not None\n        assert section_3.link.endswith(\"?tab=t.0#heading=h.pm6wrpzgk69l\")\n\n        section_4 = doc.sections[4]\n        assert section_4.text == \"Section 2\\nSection 2 content\"\n        assert section_4.link is not None\n        assert section_4.link.endswith(\"?tab=t.0#heading=h.2m0s9youe2k9\")\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_service_acct.py",
    "content": "from collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom urllib.parse import urlparse\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom tests.daily.connectors.google_drive.consts_and_utils import _pick\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_MY_DRIVE_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_expected_docs_in_retrieved_docs,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_hierarchy_nodes_match_expected,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    EXTERNAL_SHARED_DOC_SINGLETON,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    EXTERNAL_SHARED_DOCS_IN_FOLDER,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    EXTERNAL_SHARED_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    EXTERNAL_SHARED_FOLDER_URL,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_shared_drives,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import id_to_name\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    MISC_SHARED_DRIVE_FNAMES,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    PILL_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    RESTRICTED_ACCESS_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    RESTRICTED_ACCESS_FOLDER_URL,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_DRIVE_B_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_DRIVE_B_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_1_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_DRIVE_2_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_EXTRA_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_MY_DRIVE_FOLDER_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_1_MY_DRIVE_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_2_MY_DRIVE,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    TEST_USER_3_MY_DRIVE_ID,\n)\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_all(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_all\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    # Should get everything\n    expected_file_ids = (\n        ADMIN_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + TEST_USER_1_FILE_IDS\n        + TEST_USER_2_FILE_IDS\n        + TEST_USER_3_FILE_IDS\n        + SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.update(\n        _pick(\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_MY_DRIVE_ID,\n            TEST_USER_1_MY_DRIVE_FOLDER_ID,\n            TEST_USER_1_DRIVE_B_ID,\n            TEST_USER_1_DRIVE_B_FOLDER_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            ADMIN_MY_DRIVE_ID,\n            TEST_USER_2_MY_DRIVE,\n            TEST_USER_3_MY_DRIVE_ID,\n            PILL_FOLDER_ID,\n            RESTRICTED_ACCESS_FOLDER_ID,\n            TEST_USER_1_EXTRA_FOLDER_ID,\n            EXTERNAL_SHARED_FOLDER_ID,\n            FOLDER_3_ID,\n        )\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_shared_drives_only_with_size_threshold(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_shared_drives_only_with_size_threshold\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n\n    # this threshold will skip one file\n    connector.size_threshold = 16384\n\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    )\n\n    expected_file_names = {id_to_name(file_id) for file_id in expected_file_ids}\n    expected_file_names.update(MISC_SHARED_DRIVE_FNAMES)\n    retrieved_file_names = {doc.semantic_identifier for doc in output.documents}\n    for name in expected_file_names - retrieved_file_names:\n        print(f\"expected but did not retrieve: {name}\")\n    for name in retrieved_file_names - expected_file_names:\n        print(f\"retrieved but did not expect: {name}\")\n\n    # 2 extra files from shared drive owned by non-admin and not shared with admin\n    # TODO: added a file in a \"restricted\" folder, which the connector sometimes succeeds at finding\n    # and adding. Specifically, our shared drive retrieval logic currently assumes that\n    # \"having access to a shared drive\" means that the connector has access to all files in the shared drive.\n    # therefore when a user successfully retrieves a shared drive, we mark it as \"done\". If that user's\n    # access is restricted for a folder in the shared drive, the connector will not retrieve that folder.\n    # If instead someone with FULL access to the shared drive retrieves it, the connector will retrieve\n    # the folder and all its files. There is currently no consistency to the order of assignment of users\n    # to shared drives, so this is a heisenbug. When we guarantee that restricted folders are retrieved,\n    # we can change this to 52\n    assert len(output.documents) == 50 or len(output.documents) == 51\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_shared_drives_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_shared_drives_only\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=True,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n\n    output = load_connector_outputs(connector)\n\n    # Should only get shared drives\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + SHARED_DRIVE_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + SECTIONS_FILE_IDS\n    )\n\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    # 2 extra files from shared drive owned by non-admin and not shared with admin\n    # another one flaky for unknown reasons\n    # TODO: switch to 54 when restricted access issue is resolved\n    assert len(output.documents) == 51 or len(output.documents) == 52\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.update(\n        _pick(\n            PERM_SYNC_DRIVE_ADMIN_ONLY_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_A_ID,\n            PERM_SYNC_DRIVE_ADMIN_AND_USER_1_B_ID,\n            TEST_USER_1_DRIVE_B_ID,\n            TEST_USER_1_DRIVE_B_FOLDER_ID,\n            TEST_USER_1_EXTRA_DRIVE_1_ID,\n            TEST_USER_1_EXTRA_DRIVE_2_ID,\n            RESTRICTED_ACCESS_FOLDER_ID,\n        )\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_include_my_drives_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_include_my_drives_only\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=True,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    # Should only get everyone's My Drives\n    expected_file_ids = (\n        ADMIN_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + TEST_USER_1_FILE_IDS\n        + TEST_USER_2_FILE_IDS\n        + TEST_USER_3_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = _pick(\n        FOLDER_3_ID,\n        ADMIN_MY_DRIVE_ID,\n        TEST_USER_1_MY_DRIVE_ID,\n        TEST_USER_1_MY_DRIVE_FOLDER_ID,\n        TEST_USER_2_MY_DRIVE,\n        TEST_USER_3_MY_DRIVE_ID,\n        PILL_FOLDER_ID,\n        TEST_USER_1_EXTRA_FOLDER_ID,\n        EXTERNAL_SHARED_FOLDER_ID,\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_drive_one_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_drive_one_only\")\n    urls = [SHARED_DRIVE_1_URL]\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=\",\".join([str(url) for url in urls]),\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    # We ignore shared_drive_urls if include_shared_drives is False\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=False,\n        include_restricted_folder=False,\n    )\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_folder_and_shared_drive(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_folder_and_shared_drive\")\n    drive_urls = [SHARED_DRIVE_1_URL]\n    folder_urls = [FOLDER_2_URL]\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_drive_urls=\",\".join([str(url) for url in drive_urls]),\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    # Should get everything except for the top level files in drive 2\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + FOLDER_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.pop(SECTIONS_FOLDER_ID, None)\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n        ignorable_node_ids={RESTRICTED_ACCESS_FOLDER_ID},\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_folders_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_folders_only\")\n    folder_urls = [\n        FOLDER_1_2_URL,\n        FOLDER_2_1_URL,\n        FOLDER_2_2_URL,\n        FOLDER_3_URL,\n    ]\n    # This should get converted to a drive request and spit out a warning in the logs\n    shared_drive_urls = [\n        FOLDER_1_1_URL,\n    ]\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_drive_urls=\",\".join([str(url) for url in shared_drive_urls]),\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + FOLDER_2_1_FILE_IDS\n        + FOLDER_2_2_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    expected_nodes = get_expected_hierarchy_for_shared_drives(\n        include_drive_1=True,\n        include_drive_2=True,\n        include_restricted_folder=False,\n    )\n    expected_nodes.pop(SECTIONS_FOLDER_ID, None)\n    expected_nodes.update(_pick(ADMIN_MY_DRIVE_ID, FOLDER_3_ID))\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=expected_nodes,\n    )\n\n\ndef test_shared_folder_owned_by_external_user(\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_folder_owned_by_external_user\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_drive_urls=None,\n        shared_folder_urls=EXTERNAL_SHARED_FOLDER_URL,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_docs = EXTERNAL_SHARED_DOCS_IN_FOLDER\n\n    assert len(output.documents) == len(expected_docs)  # 1 for now\n    assert expected_docs[0] in output.documents[0].id\n\n\ndef test_shared_with_me(\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_with_me\")\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=True,\n        include_files_shared_with_me=True,\n        shared_drive_urls=None,\n        shared_folder_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    print(output.documents)\n\n    expected_file_ids = (\n        ADMIN_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + TEST_USER_1_FILE_IDS\n        + TEST_USER_2_FILE_IDS\n        + TEST_USER_3_FILE_IDS\n    )\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    retrieved_ids = {urlparse(doc.id).path.split(\"/\")[-1] for doc in output.documents}\n    for id in retrieved_ids:\n        print(id)\n\n    assert EXTERNAL_SHARED_DOC_SINGLETON.split(\"/\")[-1] in retrieved_ids\n    assert EXTERNAL_SHARED_DOCS_IN_FOLDER[0].split(\"/\")[-1] in retrieved_ids\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_specific_emails(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_specific_emails\")\n    my_drive_emails = [\n        TEST_USER_1_EMAIL,\n        TEST_USER_3_EMAIL,\n    ]\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=\",\".join([str(email) for email in my_drive_emails]),\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = TEST_USER_1_FILE_IDS + TEST_USER_3_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef get_specific_folders_in_my_drive(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning get_specific_folders_in_my_drive\")\n    folder_urls = [\n        FOLDER_3_URL,\n    ]\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=\",\".join([str(url) for url in folder_urls]),\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_specific_user_emails_restricted_folder(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_specific_user_emails_restricted_folder\")\n\n    # Test with admin email - should get 1 doc\n    admin_connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n        specific_user_emails=ADMIN_EMAIL,\n    )\n    admin_output = load_connector_outputs(admin_connector)\n    assert len(admin_output.documents) == 1\n\n    # Test with test users - should get 0 docs\n    test_users = [TEST_USER_1_EMAIL, TEST_USER_2_EMAIL, TEST_USER_3_EMAIL]\n    test_connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=False,\n        include_files_shared_with_me=False,\n        shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n        specific_user_emails=\",\".join(test_users),\n    )\n    test_output = load_connector_outputs(test_connector)\n    assert len(test_output.documents) == 0\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_specific_user_email_shared_with_me(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_specific_user_email_shared_with_me\")\n\n    # Test with admin email - should get 1 doc\n    connector = google_drive_service_acct_connector_factory(\n        primary_admin_email=ADMIN_EMAIL,\n        include_shared_drives=False,\n        include_my_drives=True,\n        include_files_shared_with_me=False,  # This is what is set in the UI unfortunately\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n        specific_user_emails=TEST_USER_1_EMAIL,\n    )\n    output = load_connector_outputs(connector)\n    expected = [id_to_name(file_id) for file_id in TEST_USER_1_FILE_IDS]\n    expected += [\"private_file\", \"shared_file\"]  # in My Drive\n    expected += [\"read only users can't download\"]  # Shared with me\n\n    expected += [id_to_name(file_id) for file_id in [0, 1] + ADMIN_FOLDER_3_FILE_IDS]\n\n    # these are in shared drives\n    # expected += ['perm_sync_doc_0ACOrCU1EMD1hUk9PVA_ab63b976-effb-49af-84e7-423d17a17dd7']\n    # expected += ['file_22.txt'] # Shared drive\n\n    doc_titles = set(doc.semantic_identifier for doc in output.documents)\n    assert doc_titles == set(expected)\n"
  },
  {
    "path": "backend/tests/daily/connectors/google_drive/test_user_1_oauth.py",
    "content": "from collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.google_drive.connector import GoogleDriveConnector\nfrom onyx.connectors.models import Document\nfrom tests.daily.connectors.google_drive.consts_and_utils import _clear_parents\nfrom tests.daily.connectors.google_drive.consts_and_utils import _pick\nfrom tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_expected_docs_in_retrieved_docs,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    assert_hierarchy_nodes_match_expected,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    DONWLOAD_REVOKED_FILE_ID,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_test_user_1,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_test_user_1_my_drive_only,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_test_user_1_shared_drives_only,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import (\n    get_expected_hierarchy_for_test_user_1_shared_with_me_only,\n)\nfrom tests.daily.connectors.google_drive.consts_and_utils import load_connector_outputs\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS\nfrom tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_ID\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL\nfrom tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS\nfrom tests.daily.connectors.utils import ConnectorOutput\n\n\ndef _check_for_error(\n    output: ConnectorOutput,\n    expected_file_ids: list[int],\n) -> list[Document]:\n    retrieved_docs = output.documents\n    retrieved_failures = output.failures\n    assert len(retrieved_failures) <= 1\n\n    if len(retrieved_failures) == 1:\n        fail_msg = retrieved_failures[0].failure_message\n        assert \"HttpError 403\" in fail_msg\n        assert f\"file_{DONWLOAD_REVOKED_FILE_ID}.txt\" in fail_msg\n\n    expected_file_ids.remove(DONWLOAD_REVOKED_FILE_ID)\n    return retrieved_docs\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_all(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_all\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=True,\n        include_shared_drives=True,\n        include_my_drives=True,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        TEST_USER_1_FILE_IDS\n        + SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n        + ADMIN_FOLDER_3_FILE_IDS\n        + list(range(0, 2))\n    )\n\n    retrieved_docs = _check_for_error(output, expected_file_ids)\n\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=retrieved_docs,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=get_expected_hierarchy_for_test_user_1(),\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_shared_drives_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_drives_only\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=False,\n        include_shared_drives=True,\n        include_my_drives=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = (\n        SHARED_DRIVE_1_FILE_IDS\n        + FOLDER_1_FILE_IDS\n        + FOLDER_1_1_FILE_IDS\n        + FOLDER_1_2_FILE_IDS\n    )\n\n    retrieved_docs = _check_for_error(output, expected_file_ids)\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=retrieved_docs,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=get_expected_hierarchy_for_test_user_1_shared_drives_only(),\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_shared_with_me_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_with_me_only\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=True,\n        include_shared_drives=False,\n        include_my_drives=False,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS + list(range(0, 2))\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=get_expected_hierarchy_for_test_user_1_shared_with_me_only(),\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_my_drive_only(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_my_drive_only\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=False,\n        include_shared_drives=False,\n        include_my_drives=True,\n        shared_folder_urls=None,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = TEST_USER_1_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=get_expected_hierarchy_for_test_user_1_my_drive_only(),\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_shared_my_drive_folder(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_my_drive_folder\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=False,\n        include_shared_drives=False,\n        include_my_drives=True,\n        shared_folder_urls=FOLDER_3_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = ADMIN_FOLDER_3_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=_clear_parents(_pick(FOLDER_3_ID), FOLDER_3_ID),\n    )\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_shared_drive_folder(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    google_drive_oauth_uploaded_connector_factory: Callable[..., GoogleDriveConnector],\n) -> None:\n    print(\"\\n\\nRunning test_shared_drive_folder\")\n    connector = google_drive_oauth_uploaded_connector_factory(\n        primary_admin_email=TEST_USER_1_EMAIL,\n        include_files_shared_with_me=False,\n        include_shared_drives=False,\n        include_my_drives=True,\n        shared_folder_urls=FOLDER_1_URL,\n        shared_drive_urls=None,\n        my_drive_emails=None,\n    )\n    output = load_connector_outputs(connector)\n\n    expected_file_ids = FOLDER_1_FILE_IDS + FOLDER_1_1_FILE_IDS + FOLDER_1_2_FILE_IDS\n    assert_expected_docs_in_retrieved_docs(\n        retrieved_docs=output.documents,\n        expected_file_ids=expected_file_ids,\n    )\n\n    assert_hierarchy_nodes_match_expected(\n        retrieved_nodes=output.hierarchy_nodes,\n        expected_nodes=_pick(\n            SHARED_DRIVE_1_ID, FOLDER_1_ID, FOLDER_1_1_ID, FOLDER_1_2_ID\n        ),\n    )\n"
  },
  {
    "path": "backend/tests/daily/connectors/highspot/test_highspot_connector.py",
    "content": "import json\nimport os\nimport time\nfrom datetime import datetime\nfrom pathlib import Path\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.highspot.connector import HighspotConnector\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\n\n\ndef load_test_data(file_name: str = \"test_highspot_data.json\") -> dict:\n    \"\"\"Load test data from JSON file.\"\"\"\n    current_dir = Path(__file__).parent\n    with open(current_dir / file_name, \"r\") as f:\n        return json.load(f)\n\n\n@pytest.fixture\ndef highspot_connector() -> HighspotConnector:\n    \"\"\"Create a Highspot connector with credentials from environment variables.\"\"\"\n    # Check if required environment variables are set\n    if not os.environ.get(\"HIGHSPOT_KEY\") or not os.environ.get(\"HIGHSPOT_SECRET\"):\n        pytest.fail(\"HIGHSPOT_KEY or HIGHSPOT_SECRET environment variables not set\")\n\n    connector = HighspotConnector(\n        spot_names=[\"Test content\"],  # Use specific spot name instead of empty list\n        batch_size=10,  # Smaller batch size for testing\n    )\n    connector.load_credentials(\n        {\n            \"highspot_key\": os.environ[\"HIGHSPOT_KEY\"],\n            \"highspot_secret\": os.environ[\"HIGHSPOT_SECRET\"],\n            \"highspot_url\": os.environ.get(\n                \"HIGHSPOT_URL\", \"https://api-su2.highspot.com/v1.0/\"\n            ),\n        }\n    )\n    return connector\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_highspot_connector_basic(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    highspot_connector: HighspotConnector,\n) -> None:\n    \"\"\"Test basic functionality of the Highspot connector.\"\"\"\n    all_docs: list[Document] = []\n    test_data = load_test_data()\n    target_test_doc_id = test_data.get(\"target_doc_id\")\n    target_test_doc: Document | None = None\n\n    # Test loading documents\n    for doc_batch in highspot_connector.poll_source(0, time.time()):\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n            if doc.id == f\"HIGHSPOT_{target_test_doc_id}\":\n                target_test_doc = doc\n\n    # Verify documents were loaded\n    assert len(all_docs) > 0\n\n    # If we have a specific test document ID, validate it\n    if target_test_doc_id and target_test_doc is not None:\n        assert target_test_doc.semantic_identifier == test_data.get(\n            \"semantic_identifier\"\n        )\n        assert target_test_doc.source == DocumentSource.HIGHSPOT\n        assert target_test_doc.metadata is not None\n\n        assert len(target_test_doc.sections) == 1\n        section = target_test_doc.sections[0]\n        assert section.link is not None\n        # Only check if content exists, as exact content might change\n        assert section.text is not None\n        assert len(section.text) > 0\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_highspot_connector_slim(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    highspot_connector: HighspotConnector,\n) -> None:\n    \"\"\"Test slim document retrieval.\"\"\"\n    # Get all doc IDs from the full connector\n    all_full_doc_ids = set()\n    for doc_batch in highspot_connector.load_from_state():\n        all_full_doc_ids.update(\n            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # Get all doc IDs from the slim connector\n    all_slim_doc_ids = set()\n    for slim_doc_batch in highspot_connector.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # The set of full doc IDs should be a subset of the slim doc IDs\n    assert all_full_doc_ids.issubset(all_slim_doc_ids)\n    # Make sure we actually got some documents\n    assert len(all_slim_doc_ids) > 0\n\n\n\"\"\"This test might fail because of how Highspot handles changes to the document's\n\"updated at\" property. It is marked as expected to fail until we can confirm the behavior.\"\"\"\n\n\n@pytest.mark.xfail(reason=\"Highspot is not returning updated documents as expected.\")\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_highspot_connector_poll_source(\n    mock_get_api_key: MagicMock,  # noqa: ARG001\n    highspot_connector: HighspotConnector,\n) -> None:\n    \"\"\"Test poll_source functionality with date range filtering.\"\"\"\n    # Define date range: April 3, 2025 to April 4, 2025\n    start_date = datetime(2025, 4, 3, 0, 0, 0)\n    end_date = datetime(2025, 4, 4, 23, 59, 59)\n\n    # Convert to seconds since Unix epoch\n    start_time = int(time.mktime(start_date.timetuple()))\n    end_time = int(time.mktime(end_date.timetuple()))\n\n    # Load test data for assertions\n    test_data = load_test_data()\n    poll_source_data = test_data.get(\"poll_source\", {})\n    target_doc_id = poll_source_data.get(\"target_doc_id\")\n\n    # Call poll_source with date range\n    all_docs: list[Document] = []\n    target_doc: Document | None = None\n\n    for doc_batch in highspot_connector.poll_source(start_time, end_time):\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n            if doc.id == f\"HIGHSPOT_{target_doc_id}\":\n                target_doc = doc\n\n    # Verify documents were loaded\n    assert len(all_docs) > 0\n\n    # Verify the specific test document was found and has correct properties\n    assert target_doc is not None\n    assert target_doc.semantic_identifier == poll_source_data.get(\"semantic_identifier\")\n    assert target_doc.source == DocumentSource.HIGHSPOT\n    assert target_doc.metadata is not None\n\n    # Verify sections\n    assert len(target_doc.sections) == 1\n    section = target_doc.sections[0]\n    assert section.link == poll_source_data.get(\"link\")\n    assert section.text is not None\n    assert len(section.text) > 0\n\n\ndef test_highspot_connector_validate_credentials(\n    highspot_connector: HighspotConnector,\n) -> None:\n    \"\"\"Test credential validation.\"\"\"\n    assert highspot_connector.validate_credentials() is True\n"
  },
  {
    "path": "backend/tests/daily/connectors/highspot/test_highspot_data.json",
    "content": "{\n    \"target_doc_id\": \"67cd8eb35d3ee0487de2e704\",\n    \"semantic_identifier\": \"Highspot in Action _ Salesforce Integration\",\n    \"link\": \"https://www.highspot.com/items/67cd8eb35d3ee0487de2e704\",\n    \"poll_source\": {\n        \"target_doc_id\":\"67efb452c3f40bcca2b48ca5\",\n        \"semantic_identifier\":\"Introduction to Intelligent Agents\",\n        \"link\":\"https://www.highspot.com/items/67efb452c3f40bcca2b48ca5\"\n    }\n}\n"
  },
  {
    "path": "backend/tests/daily/connectors/hubspot/test_hubspot_connector.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.connectors.hubspot.connector import AVAILABLE_OBJECT_TYPES\nfrom onyx.connectors.hubspot.connector import HubSpotConnector\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\n\n\nclass TestHubSpotConnector:\n    \"\"\"Test HubSpot connector functionality using real API calls.\"\"\"\n\n    @pytest.fixture\n    def connector(self) -> HubSpotConnector:\n        \"\"\"Create a HubSpot connector instance.\"\"\"\n        return HubSpotConnector(batch_size=10)\n\n    @pytest.fixture\n    def credentials(self) -> dict[str, Any]:\n        \"\"\"Provide test credentials.\"\"\"\n        return {\"hubspot_access_token\": os.environ[\"HUBSPOT_ACCESS_TOKEN\"]}\n\n    def test_credentials_properties_raise_exception_when_none(self) -> None:\n        \"\"\"Test that access_token and portal_id properties raise exceptions when not set.\"\"\"\n        connector = HubSpotConnector()\n\n        # access_token should raise exception when not set\n        with pytest.raises(ConnectorMissingCredentialError) as exc_info:\n            _ = connector.access_token\n        assert \"HubSpot access token not set\" in str(exc_info.value)\n\n        # portal_id should raise exception when not set\n        with pytest.raises(ConnectorMissingCredentialError) as exc_info:\n            _ = connector.portal_id\n        assert \"HubSpot portal ID not set\" in str(exc_info.value)\n\n    def test_load_credentials(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that credentials are loaded correctly.\"\"\"\n        result = connector.load_credentials(credentials)\n\n        assert result is None  # Should return None on success\n        assert connector.access_token == credentials[\"hubspot_access_token\"]\n        assert connector.portal_id is not None\n        assert isinstance(connector.portal_id, str)\n\n    def test_load_from_state_basic_functionality(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test basic load_from_state functionality.\"\"\"\n        connector.load_credentials(credentials)\n\n        # Get first batch of documents\n        document_batches = connector.load_from_state()\n        first_batch = next(document_batches, None)\n\n        # Should have at least some documents\n        assert first_batch is not None\n        assert isinstance(first_batch, list)\n        assert len(first_batch) > 0\n\n        # Check document structure\n        doc = first_batch[0]\n        assert isinstance(doc, Document)\n        assert doc.id.startswith(\"hubspot_\")\n        assert doc.source.value == \"hubspot\"\n        assert doc.semantic_identifier is not None\n        assert doc.doc_updated_at is not None\n        assert isinstance(doc.metadata, dict)\n        assert \"object_type\" in doc.metadata\n        assert doc.metadata[\"object_type\"] in [\"ticket\", \"company\", \"deal\", \"contact\"]\n\n        # Check sections\n        assert len(doc.sections) > 0\n        assert doc.sections[0].text is not None\n        assert doc.sections[0].link is not None\n\n    def test_document_metadata_structure(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that document metadata contains expected fields.\"\"\"\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n        all_docs: list[Document] = []\n\n        # Collect a few batches to test different object types\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            batch_count += 1\n            if (\n                batch_count >= 3 or len(all_docs) >= 20\n            ):  # Limit to avoid too many API calls\n                break\n\n        # Group documents by object type\n        docs_by_type: dict[str, list[Document]] = {}\n        for doc in all_docs:\n            obj_type_value = doc.metadata[\"object_type\"]\n            # Handle the case where metadata value could be a list\n            obj_type = (\n                obj_type_value if isinstance(obj_type_value, str) else obj_type_value[0]\n            )\n            if obj_type not in docs_by_type:\n                docs_by_type[obj_type] = []\n            docs_by_type[obj_type].append(doc)\n\n        # Test each object type has expected metadata\n        for obj_type, docs in docs_by_type.items():\n            doc = docs[0]  # Test first document of each type\n\n            if obj_type == \"ticket\":\n                assert \"ticket_id\" in doc.metadata\n                assert doc.id.startswith(\"hubspot_ticket_\")\n            elif obj_type == \"company\":\n                assert \"company_id\" in doc.metadata\n                assert doc.id.startswith(\"hubspot_company_\")\n\n            elif obj_type == \"deal\":\n                assert \"deal_id\" in doc.metadata\n                assert doc.id.startswith(\"hubspot_deal_\")\n\n            elif obj_type == \"contact\":\n                assert \"contact_id\" in doc.metadata\n                assert doc.id.startswith(\"hubspot_contact_\")\n\n            # Check for associated object IDs in metadata (if they exist)\n            potential_association_keys = [\n                \"associated_contact_ids\",\n                \"associated_company_ids\",\n                \"associated_deal_ids\",\n                \"associated_ticket_ids\",\n                \"associated_note_ids\",\n            ]\n\n            for key in potential_association_keys:\n                if key in doc.metadata:\n                    assert isinstance(doc.metadata[key], list)\n                    assert len(doc.metadata[key]) > 0\n                    assert all(isinstance(id_val, str) for id_val in doc.metadata[key])\n\n    def test_associated_objects_as_sections(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that associated objects are included as sections.\"\"\"\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n\n        # Find a document with multiple sections (indicating associated objects)\n        found_multi_section_doc = False\n        batch_count = 0\n\n        for batch in document_batches:\n            for doc in batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                if len(doc.sections) > 1:\n                    found_multi_section_doc = True\n\n                    # First section should be the main object\n                    main_section = doc.sections[0]\n                    assert main_section.text is not None\n                    assert main_section.link is not None\n\n                    # Additional sections should be associated objects\n                    for section in doc.sections[1:]:\n                        assert section.text is not None\n                        assert section.link is not None\n                        # Should contain object type information\n                        assert any(\n                            obj_type in section.text.lower()\n                            for obj_type in [\n                                \"contact:\",\n                                \"company:\",\n                                \"deal:\",\n                                \"ticket:\",\n                                \"note:\",\n                            ]\n                        )\n\n                    break\n\n            if found_multi_section_doc:\n                break\n\n            batch_count += 1\n            if batch_count >= 5:  # Limit API calls\n                break\n\n        # Note: This test might not always pass if there are no associated objects\n        # in the test HubSpot instance, but it validates the structure when they exist\n        if found_multi_section_doc:\n            print(\"✓ Found document with associated objects as sections\")\n        else:\n            print(\"⚠ No documents with associated objects found in test data\")\n\n    def test_poll_source_functionality(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test poll_source with time filtering.\"\"\"\n        connector.load_credentials(credentials)\n\n        # Test with a recent time range (last 30 days)\n        end_time = datetime.now(timezone.utc)\n        start_time = datetime.now(timezone.utc).replace(day=1)  # Start of current month\n\n        start_timestamp = int(start_time.timestamp())\n        end_timestamp = int(end_time.timestamp())\n\n        document_batches = connector.poll_source(start_timestamp, end_timestamp)\n\n        # Should be able to get at least one batch\n        first_batch = next(document_batches, None)\n\n        if first_batch is not None:\n            assert isinstance(first_batch, list)\n            assert len(first_batch) > 0\n\n            # Check that documents have proper timestamps\n            for doc in first_batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                assert doc.doc_updated_at is not None\n                # Note: We don't strictly enforce the time range here since\n                # the test data might not have recent updates\n        else:\n            print(\"⚠ No documents found in the specified time range\")\n\n    def test_all_object_types_processed(\n        self, connector: HubSpotConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Integration test to verify all object types are processed correctly.\"\"\"\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n        all_docs: list[Document] = []\n        object_types_found = set()\n\n        # Collect several batches to ensure we see all object types\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            for doc in batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                object_types_found.add(doc.metadata[\"object_type\"])\n\n            batch_count += 1\n            # Stop after we've seen all expected types or after reasonable number of batches\n            if len(object_types_found) >= 4 or batch_count >= 10:\n                break\n\n        print(f\"Found {len(all_docs)} total documents\")\n        print(f\"Object types found: {sorted(object_types_found)}\")\n\n        # Should have at least some documents\n        assert len(all_docs) > 0\n\n        # Verify we can process multiple object types\n        # Note: We don't require all 4 types since the test instance might not have all types\n        assert len(object_types_found) >= 1\n\n        # Verify document structure for each type found\n        for obj_type in object_types_found:\n            type_docs = [\n                doc for doc in all_docs if doc.metadata[\"object_type\"] == obj_type\n            ]\n            assert len(type_docs) > 0\n\n            # Check first document of this type\n            doc = type_docs[0]\n            assert doc.id.startswith(f\"hubspot_{obj_type}_\")\n            assert doc.semantic_identifier is not None\n            assert len(doc.sections) > 0\n            assert doc.sections[0].text is not None\n            assert doc.sections[0].link is not None\n\n            # Check object-specific metadata\n            if obj_type == \"company\":\n                assert \"company_id\" in doc.metadata\n            elif obj_type == \"deal\":\n                assert \"deal_id\" in doc.metadata\n            elif obj_type == \"contact\":\n                assert \"contact_id\" in doc.metadata\n            elif obj_type == \"ticket\":\n                assert \"ticket_id\" in doc.metadata\n\n    def test_init_default_object_types(self) -> None:\n        \"\"\"Test that connector initializes with all object types by default.\"\"\"\n        connector = HubSpotConnector()\n        assert connector.object_types == AVAILABLE_OBJECT_TYPES\n        assert \"tickets\" in connector.object_types\n        assert \"companies\" in connector.object_types\n        assert \"deals\" in connector.object_types\n        assert \"contacts\" in connector.object_types\n\n    def test_init_custom_object_types(self) -> None:\n        \"\"\"Test that connector can be initialized with custom object types.\"\"\"\n        custom_types = [\"tickets\", \"companies\"]\n        connector = HubSpotConnector(object_types=custom_types)\n        expected_set = {\"tickets\", \"companies\"}\n        assert connector.object_types == expected_set\n        assert \"tickets\" in connector.object_types\n        assert \"companies\" in connector.object_types\n        assert \"deals\" not in connector.object_types\n        assert \"contacts\" not in connector.object_types\n\n    def test_init_custom_object_types_from_list(self) -> None:\n        \"\"\"Test that connector can be initialized with custom object types from a list (frontend format).\"\"\"\n        custom_types_list = [\"tickets\", \"companies\"]\n        connector = HubSpotConnector(object_types=custom_types_list)\n        expected_set = {\"tickets\", \"companies\"}\n        assert connector.object_types == expected_set\n        assert \"tickets\" in connector.object_types\n        assert \"companies\" in connector.object_types\n        assert \"deals\" not in connector.object_types\n        assert \"contacts\" not in connector.object_types\n\n    def test_init_single_object_type(self) -> None:\n        \"\"\"Test that connector can be initialized with a single object type.\"\"\"\n        single_type = [\"deals\"]\n        connector = HubSpotConnector(object_types=single_type)\n        expected_set = {\"deals\"}\n        assert connector.object_types == expected_set\n        assert len(connector.object_types) == 1\n        assert \"deals\" in connector.object_types\n\n    def test_init_invalid_object_types(self) -> None:\n        \"\"\"Test that connector raises error for invalid object types.\"\"\"\n        invalid_types = [\"tickets\", \"invalid_type\", \"another_invalid\"]\n\n        with pytest.raises(ValueError) as exc_info:\n            HubSpotConnector(object_types=invalid_types)\n\n        error_message = str(exc_info.value)\n        assert \"Invalid object types\" in error_message\n        assert \"invalid_type\" in error_message\n        assert \"another_invalid\" in error_message\n        assert \"Available types\" in error_message\n\n    def test_init_empty_object_types(self) -> None:\n        \"\"\"Test that connector can be initialized with empty object types set.\"\"\"\n        empty_types: list[str] = []\n        connector = HubSpotConnector(object_types=empty_types)\n        expected_set: set[str] = set()\n        assert connector.object_types == expected_set\n        assert len(connector.object_types) == 0\n\n    def test_selective_object_fetching_tickets_only(\n        self, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that only tickets are fetched when configured.\"\"\"\n        connector = HubSpotConnector(object_types=[\"tickets\"], batch_size=5)\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n        all_docs: list[Document] = []\n\n        # Collect a few batches\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            batch_count += 1\n            if batch_count >= 3 or len(all_docs) >= 10:\n                break\n\n        # Should have documents\n        if all_docs:\n            # All documents should be tickets\n            for doc in all_docs:\n                assert doc.metadata[\"object_type\"] == \"ticket\"\n                assert doc.id.startswith(\"hubspot_ticket_\")\n\n            print(f\"✓ Successfully fetched {len(all_docs)} ticket documents only\")\n        else:\n            print(\"⚠ No ticket documents found in test data\")\n\n    def test_selective_object_fetching_companies_and_deals(\n        self, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that only companies and deals are fetched when configured.\"\"\"\n        connector = HubSpotConnector(object_types=[\"companies\", \"deals\"], batch_size=5)\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n        all_docs: list[Document] = []\n        object_types_found = set()\n\n        # Collect a few batches\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            for doc in batch:\n                if isinstance(doc, HierarchyNode):\n                    continue\n                object_types_found.add(doc.metadata[\"object_type\"])\n            batch_count += 1\n            if batch_count >= 3 or len(all_docs) >= 10:\n                break\n\n        if all_docs:\n            # Should only have companies and deals\n            assert object_types_found.issubset({\"company\", \"deal\"})\n            assert \"ticket\" not in object_types_found\n            assert \"contact\" not in object_types_found\n\n            # Verify document structure\n            for doc in all_docs:\n                obj_type = doc.metadata[\"object_type\"]\n                assert obj_type in [\"company\", \"deal\"]\n                if obj_type == \"company\":\n                    assert doc.id.startswith(\"hubspot_company_\")\n                elif obj_type == \"deal\":\n                    assert doc.id.startswith(\"hubspot_deal_\")\n\n            print(\n                f\"✓ Successfully fetched {len(all_docs)} documents of types: {object_types_found}\"\n            )\n        else:\n            print(\"⚠ No company/deal documents found in test data\")\n\n    def test_empty_object_types_fetches_nothing(\n        self, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that no documents are fetched when object_types is empty.\"\"\"\n        connector = HubSpotConnector(object_types=[], batch_size=5)\n        connector.load_credentials(credentials)\n\n        document_batches = connector.load_from_state()\n        all_docs: list[Document] = []\n\n        # Try to collect batches\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            batch_count += 1\n            if batch_count >= 2:  # Don't wait too long\n                break\n\n        # Should have no documents\n        assert len(all_docs) == 0\n        print(\"✓ No documents fetched with empty object_types as expected\")\n\n    def test_poll_source_respects_object_types(\n        self, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Test that poll_source respects the object_types configuration.\"\"\"\n        connector = HubSpotConnector(object_types=[\"contacts\"], batch_size=5)\n        connector.load_credentials(credentials)\n\n        # Test with a recent time range\n        end_time = datetime.now(timezone.utc)\n        start_time = datetime.now(timezone.utc).replace(day=1)\n\n        start_timestamp = int(start_time.timestamp())\n        end_timestamp = int(end_time.timestamp())\n\n        document_batches = connector.poll_source(start_timestamp, end_timestamp)\n        all_docs: list[Document] = []\n\n        # Collect a few batches\n        batch_count = 0\n        for batch in document_batches:\n            all_docs.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n            batch_count += 1\n            if batch_count >= 2 or len(all_docs) >= 5:\n                break\n\n        if all_docs:\n            # All documents should be contacts\n            for doc in all_docs:\n                assert doc.metadata[\"object_type\"] == \"contact\"\n                assert doc.id.startswith(\"hubspot_contact_\")\n\n            print(\n                f\"✓ Poll source successfully fetched {len(all_docs)} contact documents only\"\n            )\n        else:\n            print(\"⚠ No contact documents found in specified time range\")\n\n    def test_object_types_immutability(self) -> None:\n        \"\"\"Test that object_types set cannot be modified externally.\"\"\"\n        original_types = [\"tickets\", \"companies\"]\n        connector = HubSpotConnector(object_types=original_types)\n\n        # Modifying the original list should not affect the connector\n        original_types.append(\"deals\")\n        assert \"deals\" not in connector.object_types\n        assert connector.object_types == {\"tickets\", \"companies\"}\n\n        # Trying to modify the connector's object_types should not affect the original\n        connector_types = connector.object_types\n        connector_types.add(\"contacts\")\n        # The connector should still have the original types since we made a copy\n        # Note: This test verifies our implementation makes a copy in __init__\n\n    def test_url_generation(self) -> None:\n        \"\"\"Test that URLs are generated correctly for different object types.\"\"\"\n        connector = HubSpotConnector()\n        connector.portal_id = \"12345\"  # Mock portal ID\n\n        # Test URL generation for each object type\n        ticket_url = connector._get_object_url(\"tickets\", \"67890\")\n        expected_ticket_url = \"https://app.hubspot.com/contacts/12345/record/0-5/67890\"\n        assert ticket_url == expected_ticket_url\n\n        company_url = connector._get_object_url(\"companies\", \"11111\")\n        expected_company_url = \"https://app.hubspot.com/contacts/12345/record/0-2/11111\"\n        assert company_url == expected_company_url\n\n        deal_url = connector._get_object_url(\"deals\", \"22222\")\n        expected_deal_url = \"https://app.hubspot.com/contacts/12345/record/0-3/22222\"\n        assert deal_url == expected_deal_url\n\n        contact_url = connector._get_object_url(\"contacts\", \"33333\")\n        expected_contact_url = \"https://app.hubspot.com/contacts/12345/record/0-1/33333\"\n        assert contact_url == expected_contact_url\n\n        note_url = connector._get_object_url(\"notes\", \"44444\")\n        expected_note_url = \"https://app.hubspot.com/contacts/12345/objects/0-4/44444\"\n        assert note_url == expected_note_url\n\n    def test_ticket_with_none_content(self) -> None:\n        \"\"\"Test that tickets with None content are handled gracefully.\"\"\"\n        connector = HubSpotConnector(object_types=[\"tickets\"], batch_size=10)\n        connector._access_token = \"mock_token\"\n        connector._portal_id = \"mock_portal_id\"\n\n        # Create a mock ticket with None content\n        mock_ticket = MagicMock()\n        mock_ticket.id = \"12345\"\n        mock_ticket.properties = {\n            \"subject\": \"Test Ticket\",\n            \"content\": None,  # This is the key test case\n            \"hs_ticket_priority\": \"HIGH\",\n        }\n        mock_ticket.updated_at = datetime.now(timezone.utc)\n\n        # Mock the HubSpot API client\n        mock_api_client = MagicMock()\n\n        # Mock the API calls and associated object methods\n        with (\n            patch(\"onyx.connectors.hubspot.connector.HubSpot\") as MockHubSpot,\n            patch.object(connector, \"_paginated_results\") as mock_paginated,\n            patch.object(connector, \"_get_associated_objects\", return_value=[]),\n            patch.object(connector, \"_get_associated_notes\", return_value=[]),\n        ):\n            MockHubSpot.return_value = mock_api_client\n            mock_paginated.return_value = iter([mock_ticket])\n\n            # This should not raise a validation error\n            document_batches = connector._process_tickets()\n            first_batch = next(document_batches, None)\n\n            # Verify the document was created successfully\n            assert first_batch is not None\n            assert len(first_batch) == 1\n\n            doc = first_batch[0]\n            assert not isinstance(doc, HierarchyNode)\n            assert doc.id == \"hubspot_ticket_12345\"\n            assert doc.semantic_identifier == \"Test Ticket\"\n\n            # Verify the first section has an empty string, not None\n            assert len(doc.sections) > 0\n            assert doc.sections[0].text == \"\"  # Should be empty string, not None\n            assert doc.sections[0].link is not None\n"
  },
  {
    "path": "backend/tests/daily/connectors/imap/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.connectors.models import Document\nfrom tests.daily.connectors.utils import to_text_sections\n\n\nclass EmailDoc(BaseModel):\n    subject: str\n    recipients: set[str]\n    body: str\n\n    @classmethod\n    def from_doc(cls, document: Document) -> \"EmailDoc\":\n        # Acceptable to perform assertions since this class is only used in tests.\n        assert document.title\n        assert document.external_access\n\n        body = \" \".join(to_text_sections(sections=iter(document.sections)))\n\n        return cls(\n            subject=document.title,\n            recipients=document.external_access.external_user_emails,\n            body=body,\n        )\n"
  },
  {
    "path": "backend/tests/daily/connectors/imap/test_imap_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\nfrom onyx.connectors.imap.connector import ImapConnector\nfrom tests.daily.connectors.imap.models import EmailDoc\nfrom tests.daily.connectors.utils import (\n    load_all_from_connector,\n)\n\n\n@pytest.fixture\ndef imap_connector() -> ImapConnector:\n    host = os.environ.get(\"IMAP_HOST\")\n    mailboxes_str = os.environ.get(\"IMAP_MAILBOXES\")\n    username = os.environ.get(\"IMAP_USERNAME\")\n    password = os.environ.get(\"IMAP_PASSWORD\")\n\n    assert host\n    mailboxes = (\n        [mailbox.strip() for mailbox in mailboxes_str.split(\",\") if mailbox]\n        if mailboxes_str\n        else []\n    )\n\n    imap_connector = ImapConnector(\n        host=host,\n        mailboxes=mailboxes,\n    )\n    imap_connector.set_credentials_provider(\n        OnyxStaticCredentialsProvider(\n            tenant_id=None,\n            connector_name=DocumentSource.IMAP,\n            credential_json={\n                \"imap_username\": username,\n                \"imap_password\": password,\n            },\n        )\n    )\n\n    return imap_connector\n\n\n@pytest.mark.parametrize(\n    \"expected_email_docs\",\n    [\n        [\n            EmailDoc(\n                subject=\"Testing\",\n                recipients=set([\"admin@onyx-test.com\", \"raunak@onyx.app\"]),\n                body=\"Hello, testing.\",\n            ),\n            EmailDoc(\n                subject=\"Hello world\",\n                recipients=set([\"admin@onyx-test.com\", \"r@rabh.io\", \"raunak@onyx.app\"]),\n                body='Hello world, this is an email that contains multiple \"To\" recipients.',\n            ),\n        ]\n    ],\n)\ndef test_imap_connector(\n    imap_connector: ImapConnector,\n    expected_email_docs: list[EmailDoc],\n) -> None:\n    actual_email_docs = [\n        EmailDoc.from_doc(document=document)\n        for document in load_all_from_connector(\n            connector=imap_connector,\n            start=0,\n            end=time.time(),\n            include_permissions=True,\n        ).documents\n    ]\n\n    assert actual_email_docs == expected_email_docs\n"
  },
  {
    "path": "backend/tests/daily/connectors/jira/test_jira_basic.py",
    "content": "import os\nimport time\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.jira.connector import JiraConnector\nfrom onyx.connectors.models import Document\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\ndef _make_connector(scoped_token: bool = False) -> JiraConnector:\n    connector = JiraConnector(\n        jira_base_url=\"https://danswerai.atlassian.net\",\n        project_key=\"AS\",\n        comment_email_blacklist=[],\n        scoped_token=scoped_token,\n    )\n    connector.load_credentials(\n        {\n            \"jira_user_email\": os.environ[\"JIRA_USER_EMAIL\"],\n            \"jira_api_token\": (\n                os.environ[\"JIRA_API_TOKEN_SCOPED\"]\n                if scoped_token\n                else os.environ[\"JIRA_API_TOKEN\"]\n            ),\n        }\n    )\n    return connector\n\n\n@pytest.fixture\ndef jira_connector() -> JiraConnector:\n    return _make_connector()\n\n\n@pytest.fixture\ndef jira_connector_scoped() -> JiraConnector:\n    return _make_connector(scoped_token=True)\n\n\n@pytest.fixture\ndef jira_connector_with_jql() -> JiraConnector:\n    connector = JiraConnector(\n        jira_base_url=\"https://danswerai.atlassian.net\",\n        jql_query=\"project = 'AS' AND issuetype = Story\",\n        comment_email_blacklist=[],\n    )\n    connector.load_credentials(\n        {\n            \"jira_user_email\": os.environ[\"JIRA_USER_EMAIL\"],\n            \"jira_api_token\": os.environ[\"JIRA_API_TOKEN\"],\n        }\n    )\n    connector.validate_connector_settings()\n\n    return connector\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_jira_connector_basic(\n    reset: None,  # noqa: ARG001\n    jira_connector: JiraConnector,\n) -> None:\n    _test_jira_connector_basic(jira_connector)\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_jira_connector_basic_scoped(\n    reset: None,  # noqa: ARG001\n    jira_connector_scoped: JiraConnector,\n) -> None:\n    _test_jira_connector_basic(jira_connector_scoped)\n\n\ndef _test_jira_connector_basic(jira_connector: JiraConnector) -> None:\n    docs = load_all_from_connector(\n        connector=jira_connector,\n        start=0,\n        end=time.time(),\n    ).documents\n    assert len(docs) == 2\n\n    # Find story and epic\n    story: Document | None = None\n    epic: Document | None = None\n    for doc in docs:\n        if doc.metadata[\"issuetype\"] == \"Story\":\n            story = doc\n        elif doc.metadata[\"issuetype\"] == \"Epic\":\n            epic = doc\n\n    assert story is not None\n    assert epic is not None\n\n    # Check task\n    assert story.id == \"https://danswerai.atlassian.net/browse/AS-3\"\n    assert story.semantic_identifier == \"AS-3: Magic Answers\"\n    assert story.source == DocumentSource.JIRA\n    assert story.metadata == {\n        \"priority\": \"Medium\",\n        \"status\": \"Done\",\n        \"resolution\": \"Done\",\n        \"resolution_date\": \"2025-05-29T15:33:31.031-0700\",\n        \"reporter\": \"Chris Weaver\",\n        \"assignee\": \"Chris Weaver\",\n        \"issuetype\": \"Story\",\n        \"created\": \"2025-04-16T16:44:06.716-0700\",\n        \"reporter_email\": \"chris@onyx.app\",\n        \"assignee_email\": \"chris@onyx.app\",\n        \"project_name\": \"DailyConnectorTestProject\",\n        \"project\": \"AS\",\n        \"parent\": \"AS-4\",\n        \"key\": \"AS-3\",\n        \"updated\": \"2025-06-17T12:13:00.070-0700\",\n    }\n    assert story.secondary_owners is None\n    assert story.title == \"AS-3 Magic Answers\"\n    assert story.from_ingestion_api is False\n    assert story.additional_info is None\n\n    assert len(story.sections) == 1\n    section = story.sections[0]\n    assert (\n        section.text\n        == \"This is a critical request for super-human answer quality in Onyx! We need magic!\\n\"\n    )\n    assert section.link == \"https://danswerai.atlassian.net/browse/AS-3\"\n\n    # Check epic\n    assert epic.id == \"https://danswerai.atlassian.net/browse/AS-4\"\n    assert epic.semantic_identifier == \"AS-4: EPIC\"\n    assert epic.source == DocumentSource.JIRA\n    assert epic.metadata == {\n        \"priority\": \"Medium\",\n        \"status\": \"Backlog\",\n        \"reporter\": \"Founder Onyx\",\n        \"assignee\": \"Chris Weaver\",\n        \"issuetype\": \"Epic\",\n        \"created\": \"2025-04-16T16:55:53.068-0700\",\n        \"reporter_email\": \"founders@onyx.app\",\n        \"assignee_email\": \"chris@onyx.app\",\n        \"project_name\": \"DailyConnectorTestProject\",\n        \"project\": \"AS\",\n        \"key\": \"AS-4\",\n        \"updated\": \"2025-05-29T14:43:05.312-0700\",\n    }\n    assert epic.secondary_owners is None\n    assert epic.title == \"AS-4 EPIC\"\n    assert epic.from_ingestion_api is False\n    assert epic.additional_info is None\n\n    assert len(epic.sections) == 1\n    section = epic.sections[0]\n    assert section.text == \"example_text\\n\"\n    assert section.link == \"https://danswerai.atlassian.net/browse/AS-4\"\n\n\n@patch(\n    \"onyx.file_processing.extract_file_text.get_unstructured_api_key\",\n    return_value=None,\n)\ndef test_jira_connector_with_jql(\n    reset: None,  # noqa: ARG001\n    jira_connector_with_jql: JiraConnector,\n) -> None:\n    \"\"\"Test that JQL query functionality works correctly.\n\n    This test verifies that when a JQL query is provided, only issues matching the query are returned.\n    The JQL query used is \"project = \\'AS\\' AND issuetype = Story\", which should only return Story-type issues.\n    \"\"\"\n    docs = load_all_from_connector(\n        connector=jira_connector_with_jql,\n        start=0,\n        end=time.time(),\n    ).documents\n\n    # Should only return Story-type issues\n    assert len(docs) == 1\n\n    # All documents should be Story-type\n    for doc in docs:\n        assert doc.metadata[\"issuetype\"] == \"Story\"\n\n    # Verify it's the expected Story\n    story = docs[0]\n    assert story.id == \"https://danswerai.atlassian.net/browse/AS-3\"\n    assert story.semantic_identifier == \"AS-3: Magic Answers\"\n    assert story.metadata[\"issuetype\"] == \"Story\"\n"
  },
  {
    "path": "backend/tests/daily/connectors/notion/test_notion_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.notion.connector import NotionConnector\n\n\ndef compare_hierarchy_nodes(\n    yielded_nodes: list[HierarchyNode],\n    expected_nodes: list[HierarchyNode],\n) -> None:\n    \"\"\"Compare yielded HierarchyNodes against expected ground truth.\n\n    Compares nodes by their essential fields (raw_node_id, raw_parent_id, display_name, link).\n    Order does not matter.\n    \"\"\"\n    if not expected_nodes:\n        # Empty ground truth - skip comparison for now\n        return\n\n    yielded_set = {\n        (n.raw_node_id, n.raw_parent_id, n.display_name, n.link) for n in yielded_nodes\n    }\n    expected_set = {\n        (n.raw_node_id, n.raw_parent_id, n.display_name, n.link) for n in expected_nodes\n    }\n\n    missing = expected_set - yielded_set\n    extra = yielded_set - expected_set\n\n    assert not missing, f\"Missing expected HierarchyNodes: {missing}\"\n    assert not extra, f\"Unexpected HierarchyNodes: {extra}\"\n\n\n@pytest.fixture\ndef notion_connector() -> NotionConnector:\n    \"\"\"Create a NotionConnector with credentials from environment variables\"\"\"\n    connector = NotionConnector()\n    connector.load_credentials(\n        {\n            \"notion_integration_token\": os.environ[\"NOTION_INTEGRATION_TOKEN\"],\n        }\n    )\n    return connector\n\n\ndef test_notion_connector_basic(notion_connector: NotionConnector) -> None:\n    \"\"\"Test the NotionConnector with a real Notion page.\n\n    Uses a Notion workspace under the onyx-test.com domain.\n    \"\"\"\n    doc_batch_generator = notion_connector.poll_source(0, time.time())\n\n    # Collect all documents and hierarchy nodes from all batches\n    documents: list[Document] = []\n    hierarchy_nodes: list[HierarchyNode] = []\n    for doc_batch in doc_batch_generator:\n        for item in doc_batch:\n            if isinstance(item, HierarchyNode):\n                hierarchy_nodes.append(item)\n            else:\n                documents.append(item)\n\n    # Verify document count\n    assert (\n        len(documents) == 5\n    ), \"Expected exactly 5 documents (root, two children, table entry, and table entry child)\"\n\n    # Verify HierarchyNodes against ground truth (empty for now)\n    expected_hierarchy_nodes: list[HierarchyNode] = []\n    compare_hierarchy_nodes(hierarchy_nodes, expected_hierarchy_nodes)\n\n    # Find root and child documents by semantic identifier\n    root_doc = None\n    child1_doc = None\n    child2_doc = None\n    table_entry_doc = None\n    table_entry_child_doc = None\n    for doc in documents:\n        if doc.semantic_identifier == \"Root\":\n            root_doc = doc\n        elif doc.semantic_identifier == \"Child1\":\n            child1_doc = doc\n        elif doc.semantic_identifier == \"Child2\":\n            child2_doc = doc\n        elif doc.semantic_identifier == \"table-entry01\":\n            table_entry_doc = doc\n        elif doc.semantic_identifier == \"Child-table-entry01\":\n            table_entry_child_doc = doc\n\n    assert root_doc is not None, \"Root document not found\"\n    assert child1_doc is not None, \"Child1 document not found\"\n    assert child2_doc is not None, \"Child2 document not found\"\n    assert table_entry_doc is not None, \"Table entry document not found\"\n    assert table_entry_child_doc is not None, \"Table entry child document not found\"\n\n    # Verify root document structure\n    assert root_doc.id is not None\n    assert root_doc.source == DocumentSource.NOTION\n\n    # Section checks for root\n    assert len(root_doc.sections) == 1\n    root_section = root_doc.sections[0]\n\n    # Content specific checks for root\n    assert root_section.text == \"\\nroot\"\n    assert root_section.link is not None\n    assert root_section.link.startswith(\"https://www.notion.so/\")\n\n    # Verify child1 document structure\n    assert child1_doc.id is not None\n    assert child1_doc.source == DocumentSource.NOTION\n\n    # Section checks for child1\n    assert len(child1_doc.sections) == 1\n    child1_section = child1_doc.sections[0]\n\n    # Content specific checks for child1\n    assert child1_section.text == \"\\nchild1\"\n    assert child1_section.link is not None\n    assert child1_section.link.startswith(\"https://www.notion.so/\")\n\n    # Verify child2 document structure (includes database)\n    assert child2_doc.id is not None\n    assert child2_doc.source == DocumentSource.NOTION\n\n    # Section checks for child2\n    assert len(child2_doc.sections) == 2  # One for content, one for database\n    child2_section = child2_doc.sections[0]\n    child2_db_section = child2_doc.sections[1]\n\n    # Content specific checks for child2\n    assert child2_section.text == \"\\nchild2\"\n    assert child2_section.link is not None\n    assert child2_section.link.startswith(\"https://www.notion.so/\")\n\n    # Database section checks for child2\n    assert child2_db_section.text is not None\n    assert child2_db_section.text.strip() != \"\"  # Should contain some database content\n    assert child2_db_section.link is not None\n    assert child2_db_section.link.startswith(\"https://www.notion.so/\")\n\n    # Verify table entry document structure\n    assert table_entry_doc.id is not None\n    assert table_entry_doc.source == DocumentSource.NOTION\n\n    # Section checks for table entry\n    assert len(table_entry_doc.sections) == 1\n    table_entry_section = table_entry_doc.sections[0]\n\n    # Content specific checks for table entry\n    assert table_entry_section.text == \"\\ntable-entry01\"\n    assert table_entry_section.link is not None\n    assert table_entry_section.link.startswith(\"https://www.notion.so/\")\n\n    # Verify table entry child document structure\n    assert table_entry_child_doc.id is not None\n    assert table_entry_child_doc.source == DocumentSource.NOTION\n\n    # Section checks for table entry child\n    assert len(table_entry_child_doc.sections) == 1\n    table_entry_child_section = table_entry_child_doc.sections[0]\n\n    # Content specific checks for table entry child\n    assert table_entry_child_section.text == \"\\nchild-table-entry01\"\n    assert table_entry_child_section.link is not None\n    assert table_entry_child_section.link.startswith(\"https://www.notion.so/\")\n"
  },
  {
    "path": "backend/tests/daily/connectors/outline/test_outline_connector.py",
    "content": "import os\nimport time\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.outline.connector import OutlineConnector\n\n\nclass TestOutlineConnector:\n    \"\"\"Comprehensive test suite for the OutlineConnector.\"\"\"\n\n    @pytest.fixture\n    def connector(self) -> OutlineConnector:\n        \"\"\"Create an Outline connector instance.\"\"\"\n        return OutlineConnector(batch_size=10)\n\n    @pytest.fixture\n    def credentials(self) -> dict[str, Any]:\n        \"\"\"Provide test credentials from environment variables.\"\"\"\n        outline_base_url = os.environ.get(\"OUTLINE_BASE_URL\")\n        outline_api_token = os.environ.get(\"OUTLINE_API_TOKEN\")\n\n        if not outline_base_url or not outline_api_token:\n            pytest.skip(\n                \"OUTLINE_BASE_URL and OUTLINE_API_TOKEN environment variables must be set\"\n            )\n\n        return {\n            \"outline_api_token\": outline_api_token,\n            \"outline_base_url\": outline_base_url,\n        }\n\n    def test_credentials_missing_raises_exception(self) -> None:\n        \"\"\"Should raise if credentials are missing.\"\"\"\n        connector = OutlineConnector()\n\n        with pytest.raises(ConnectorMissingCredentialError) as exc_info:\n            list(connector.load_from_state())\n        assert \"Outline\" in str(exc_info.value)\n\n    def test_load_credentials(\n        self, connector: OutlineConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Credentials should load correctly.\"\"\"\n        result = connector.load_credentials(credentials)\n\n        assert result is None\n        assert connector.outline_client is not None\n        assert connector.outline_client.api_token == credentials[\"outline_api_token\"]\n        assert connector.outline_client.base_url == credentials[\n            \"outline_base_url\"\n        ].rstrip(\"/\")\n\n    def test_outline_connector_basic(\n        self, connector: OutlineConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Validate that connector fetches and structures documents properly.\"\"\"\n        connector.load_credentials(credentials)\n\n        documents: list[Document] = []\n        for batch in connector.load_from_state():\n            documents.extend(\n                [doc for doc in batch if not isinstance(doc, HierarchyNode)]\n            )\n\n        assert len(documents) > 0, \"Expected at least one document/collection\"\n\n        collections = [d for d in documents if d.metadata.get(\"type\") == \"collection\"]\n        docs = [d for d in documents if d.metadata.get(\"type\") == \"document\"]\n\n        assert len(collections) > 0, \"Should find at least one collection\"\n\n        collection = collections[0]\n        assert collection.id.startswith(\"outline_collection__\")\n        assert collection.source == DocumentSource.OUTLINE\n        assert collection.title is not None\n        assert len(collection.sections) == 1\n        assert collection.sections[0].text is not None\n        assert collection.metadata[\"type\"] == \"collection\"\n\n        if docs:\n            document = docs[0]\n            assert document.id.startswith(\"outline_document__\")\n            assert document.source == DocumentSource.OUTLINE\n            assert document.title is not None\n            assert len(document.sections) == 1\n            assert document.sections[0].text is not None\n            assert document.metadata[\"type\"] == \"document\"\n\n            section_link = document.sections[0].link\n            assert section_link is not None\n            assert \"/doc/\" in section_link\n\n    def test_outline_connector_time_filtering(\n        self, connector: OutlineConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Validate poll_source with time range filtering.\"\"\"\n        connector.load_credentials(credentials)\n\n        end_time = time.time()\n        start_time = end_time - 30 * 24 * 60 * 60\n\n        docs: list[Document] = []\n        for batch in connector.poll_source(start_time, end_time):\n            docs.extend([doc for doc in batch if not isinstance(doc, HierarchyNode)])\n\n        for doc in docs:\n            assert isinstance(doc, Document)\n            assert doc.source == DocumentSource.OUTLINE\n            if doc.doc_updated_at:\n                assert start_time <= doc.doc_updated_at.timestamp() <= end_time\n\n    def test_outline_connector_load_from_state(\n        self, connector: OutlineConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"load_from_state should fetch documents.\"\"\"\n        connector.load_credentials(credentials)\n\n        gen = connector.load_from_state()\n        batch = next(gen)\n        assert isinstance(batch, list)\n\n        for doc in batch:\n            assert isinstance(doc, Document)\n            assert doc.source == DocumentSource.OUTLINE\n\n    def test_outline_connector_batch_processing(\n        self, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Connector should respect batch size.\"\"\"\n        small_batch_connector = OutlineConnector(batch_size=2)\n        small_batch_connector.load_credentials(credentials)\n\n        for batch in small_batch_connector.poll_source(0, time.time()):\n            assert len(batch) <= 2\n            break\n\n    def test_outline_connector_document_types(\n        self, connector: OutlineConnector, credentials: dict[str, Any]\n    ) -> None:\n        \"\"\"Validate metadata for collections and documents.\"\"\"\n        connector.load_credentials(credentials)\n\n        docs: list[Document] = []\n        for batch in connector.poll_source(0, time.time()):\n            docs.extend([doc for doc in batch if not isinstance(doc, HierarchyNode)])\n\n        if docs:\n            doc_types = {d.metadata[\"type\"] for d in docs}\n            assert doc_types.issubset({\"document\", \"collection\"})\n\n            for doc in docs:\n                if doc.metadata[\"type\"] == \"document\":\n                    assert any(\n                        (s.text.strip() if s.text else None) for s in doc.sections\n                    )\n                elif doc.metadata[\"type\"] == \"collection\":\n                    assert len(doc.sections) >= 1\n\n    def test_outline_connector_invalid_credentials(self) -> None:\n        \"\"\"Should raise with invalid/missing credentials.\"\"\"\n        connector = OutlineConnector()\n\n        # Missing everything\n        with pytest.raises(ConnectorMissingCredentialError):\n            connector.load_credentials({})\n\n        # Missing base URL\n        with pytest.raises(ConnectorMissingCredentialError):\n            connector.load_credentials({\"outline_api_token\": \"token\"})\n\n        # Missing token\n        with pytest.raises(ConnectorMissingCredentialError):\n            connector.load_credentials({\"outline_base_url\": \"https://example.com\"})\n\n        # Invalid credentials will be caught during validation, not credential loading\n        connector.load_credentials(\n            {\n                \"outline_base_url\": \"https://invalid.invalid\",\n                \"outline_api_token\": \"invalid\",\n            }\n        )\n        # Validation should catch invalid credentials\n        with pytest.raises((CredentialExpiredError, ConnectorValidationError)):\n            connector.validate_connector_settings()\n\n    def test_outline_connector_invalid_url(self) -> None:\n        \"\"\"Invalid URL should raise validation error during validation.\"\"\"\n        connector = OutlineConnector()\n\n        # Load credentials with invalid URL\n        connector.load_credentials(\n            {\n                \"outline_base_url\": \"https://not-a-valid-url.invalid\",\n                \"outline_api_token\": \"token\",\n            }\n        )\n\n        # Validation should catch invalid URL\n        with pytest.raises(ConnectorValidationError):\n            connector.validate_connector_settings()\n"
  },
  {
    "path": "backend/tests/daily/connectors/salesforce/test_salesforce_connector.py",
    "content": "import json\nimport os\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom typing import cast\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.salesforce.connector import SalesforceConnector\nfrom onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE\n\n\ndef extract_key_value_pairs_to_set(\n    list_of_unparsed_key_value_strings: list[str],\n) -> set[str]:\n    set_of_key_value_pairs = set()\n    for string_key_value_pairs in list_of_unparsed_key_value_strings:\n        list_of_parsed_key_values = string_key_value_pairs.split(\"\\n\")\n        for key_value_pair in list_of_parsed_key_values:\n            set_of_key_value_pairs.add(key_value_pair.strip())\n    return set_of_key_value_pairs\n\n\ndef _load_reference_data(\n    file_name: str = \"test_salesforce_data.json\",\n) -> dict[str, str | list[str] | dict[str, Any] | list[dict[str, Any]]]:\n    current_dir = Path(__file__).parent\n    with open(current_dir / file_name, \"r\") as f:\n        return json.load(f)\n\n\n@pytest.fixture\ndef salesforce_connector() -> SalesforceConnector:\n    connector = SalesforceConnector(\n        requested_objects=[ACCOUNT_OBJECT_TYPE, \"Contact\", \"Opportunity\"],\n    )\n\n    username = os.environ[\"SF_USERNAME\"]\n    password = os.environ[\"SF_PASSWORD\"]\n    security_token = os.environ[\"SF_SECURITY_TOKEN\"]\n\n    connector.load_credentials(\n        {\n            \"sf_username\": username,\n            \"sf_password\": password,\n            \"sf_security_token\": security_token,\n        }\n    )\n    return connector\n\n\n# TODO: make the credentials not expire\n@pytest.mark.skip(\n    reason=(\n        \"Credentials change over time, so this test will fail if run when the credentials expire.\"\n    )\n)\ndef test_salesforce_connector_basic(salesforce_connector: SalesforceConnector) -> None:\n    test_data = _load_reference_data()\n    target_test_doc: Document | None = None\n    all_docs: list[Document] = []\n    for doc_batch in salesforce_connector.load_from_state():\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n            if doc.id == test_data[\"id\"]:\n                target_test_doc = doc\n                break\n\n    # The number of docs here seems to change actively so do a very loose check\n    # as of 2025-03-28 it was around 32472\n    assert len(all_docs) > 32000\n    assert len(all_docs) < 40000\n\n    assert target_test_doc is not None\n\n    # Set of received links\n    received_links: set[str] = set()\n    # List of received text fields, which contain key-value pairs seperated by newlines\n    received_text: list[str] = []\n\n    # Iterate over the sections of the target test doc to extract the links and text\n    for section in target_test_doc.sections:\n        assert section.link\n        assert section.text\n        received_links.add(section.link)\n        received_text.append(section.text)\n\n    # Check that the received links match the expected links from the test data json\n    expected_links = set(test_data[\"expected_links\"])\n    assert received_links == expected_links\n\n    # Check that the received key-value pairs from the text fields match the expected key-value pairs from the test data json\n    expected_text = test_data[\"expected_text\"]\n    if not isinstance(expected_text, list):\n        raise ValueError(\"Expected text is not a list\")\n\n    unparsed_expected_key_value_pairs: list[str] = cast(list[str], expected_text)\n    received_key_value_pairs = extract_key_value_pairs_to_set(received_text)\n    expected_key_value_pairs = extract_key_value_pairs_to_set(\n        unparsed_expected_key_value_pairs\n    )\n    assert received_key_value_pairs == expected_key_value_pairs\n\n    # Check that the rest of the fields match the expected fields from the test data json\n    assert target_test_doc.source == DocumentSource.SALESFORCE\n    assert target_test_doc.semantic_identifier == test_data[\"semantic_identifier\"]\n    assert target_test_doc.metadata == test_data[\"metadata\"]\n\n    assert target_test_doc.primary_owners is not None\n    primary_owner = target_test_doc.primary_owners[0]\n    expected_primary_owner = test_data[\"primary_owners\"]\n    assert isinstance(expected_primary_owner, dict)\n    assert primary_owner.email == expected_primary_owner[\"email\"]\n    assert primary_owner.first_name == expected_primary_owner[\"first_name\"]\n    assert primary_owner.last_name == expected_primary_owner[\"last_name\"]\n\n    secondary_owners = (\n        [owner.model_dump() for owner in target_test_doc.secondary_owners]\n        if target_test_doc.secondary_owners\n        else None\n    )\n    assert secondary_owners == test_data[\"secondary_owners\"]\n    assert target_test_doc.title == test_data[\"title\"]\n\n\n@pytest.mark.skip(\n    reason=(\n        \"All Salesforce tests need to be re-thought + made less flakey. \"\n        \"We need to handle credential resets + the rate limits (move to a smaller dataset)\"\n    )\n)\ndef test_salesforce_connector_poll_source(\n    salesforce_connector: SalesforceConnector,\n) -> None:\n\n    intermediate_time = datetime(\n        2024, 6, 3, 0, 0, 0, tzinfo=timezone.utc\n    )  # roughly 92 docs\n\n    # intermediate_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)  # roughly 1100 to 1200 docs\n\n    all_docs_1: list[Document] = []\n    for doc_batch in salesforce_connector.poll_source(0, intermediate_time.timestamp()):\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs_1.append(doc)\n\n    len_1 = len(all_docs_1)\n\n    # NOTE: this is the correct document count.\n    # If you were to inspect the underlying db, however, the partial download results in\n    #  an incomplete set of object relationships. This is expected.\n\n    assert len_1 > 85 and len_1 < 100\n    print(f\"all_docs_1 length: {len(all_docs_1)}\")\n\n    # assert len_1 > 1100 and len_1 < 1200\n    # print(f\"all_docs_1 length: {len(all_docs_1)}\")\n\n    # leave this out for the moment because it's slow to process 30k docs\n    # all_docs_2: list[Document] = []\n    # for doc_batch in salesforce_connector.poll_source(\n    #     intermediate_time.timestamp(), time.time()\n    # ):\n    #     for doc in doc_batch:\n    #         all_docs_2.append(doc)\n\n    # len_2 = len(all_docs_2)\n    # assert len_2 > 31000\n\n    # print(f\"all_docs_2 length: {len(all_docs_2)}\")\n\n\n# TODO: make the credentials not expire\n@pytest.mark.skip(\n    reason=(\n        \"Credentials change over time, so this test will fail if run when the credentials expire.\"\n    )\n)\ndef test_salesforce_connector_slim(salesforce_connector: SalesforceConnector) -> None:\n    # Get all doc IDs from the full connector\n    all_full_doc_ids = set()\n    for doc_batch in salesforce_connector.load_from_state():\n        all_full_doc_ids.update(\n            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # Get all doc IDs from the slim connector\n    all_slim_doc_ids = set()\n    for slim_doc_batch in salesforce_connector.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # The set of full doc IDs should be always be a subset of the slim doc IDs\n    assert all_full_doc_ids.issubset(all_slim_doc_ids)\n"
  },
  {
    "path": "backend/tests/daily/connectors/salesforce/test_salesforce_data.json",
    "content": "{\n  \"id\": \"SALESFORCE_001bm00000eu6n5AAA\",\n  \"expected_links\": [\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpEeAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqd3AAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoKiAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvDSAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrmHAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrl2AAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvejAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStlvAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpPfAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrP9AAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvlMAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESt3JAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoBkAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStw2AAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrkMAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESojKAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuLEAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoSIAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu2YAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvgSAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESurnAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrnqAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoB5AAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJuAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrfyAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/001bm00000eu6n5AAA\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpUHAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsgGAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESr7UAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu1BAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpqzAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESplZAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvJ3AAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESurKAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStSiAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJFAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESu8xAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqfzAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqsrAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStoZAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsIUAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsAGAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESv8GAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrOKAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoUmAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESudKAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuJ8AAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvf2AAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESw3qAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESugRAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESr18AAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqV1AAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuLVAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpjoAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESqULAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuCAAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrfpAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESp5YAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrMNAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStaUAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESt5LAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrtcAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESomaAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrtIAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESoToAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuWLAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESrWvAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsJEAA1\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESsxwAAD\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvUgAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESvWjAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000EStBuAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESpZiAAL\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuhYAAT\",\n    \"https://danswer-dev-ed.develop.my.salesforce.com/003bm00000ESuWAAA1\"\n  ],\n  \"expected_text\": [\n    \"IsDeleted: false\\nBillingCity: Shaykh al \\u00e1\\u00b8\\u00a8ad\\u00c4\\u00abd\\nName: Voonder\\nCleanStatus: Pending\\nBillingStreet: 12 Cambridge Parkway\",\n    \"Email: eslayqzs@icio.us\\nIsDeleted: false\\nLastName: Slay\\nIsEmailBounced: false\\nFirstName: Ebeneser\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ptweedgdh@umich.edu\\nIsDeleted: false\\nLastName: Tweed\\nIsEmailBounced: false\\nFirstName: Paulita\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ehurnellnlx@facebook.com\\nIsDeleted: false\\nLastName: Hurnell\\nIsEmailBounced: false\\nFirstName: Eliot\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ccarik4q4@google.it\\nIsDeleted: false\\nLastName: Carik\\nIsEmailBounced: false\\nFirstName: Chadwick\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cvannozziina6@moonfruit.com\\nIsDeleted: false\\nLastName: Vannozzii\\nIsEmailBounced: false\\nFirstName: Christophorus\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: mikringill2kz@hugedomains.com\\nIsDeleted: false\\nLastName: Ikringill\\nIsEmailBounced: false\\nFirstName: Meghann\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: bgrinvalray@fda.gov\\nIsDeleted: false\\nLastName: Grinval\\nIsEmailBounced: false\\nFirstName: Berti\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: aollanderhr7@cam.ac.uk\\nIsDeleted: false\\nLastName: Ollander\\nIsEmailBounced: false\\nFirstName: Annemarie\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: rwhitesideq38@gravatar.com\\nIsDeleted: false\\nLastName: Whiteside\\nIsEmailBounced: false\\nFirstName: Rolando\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: vkrafthmz@techcrunch.com\\nIsDeleted: false\\nLastName: Kraft\\nIsEmailBounced: false\\nFirstName: Vidovik\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: jhillaut@4shared.com\\nIsDeleted: false\\nLastName: Hill\\nIsEmailBounced: false\\nFirstName: Janel\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: lralstonycs@discovery.com\\nIsDeleted: false\\nLastName: Ralston\\nIsEmailBounced: false\\nFirstName: Lorrayne\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: blyttlewba@networkadvertising.org\\nIsDeleted: false\\nLastName: Lyttle\\nIsEmailBounced: false\\nFirstName: Ban\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: pplummernvf@technorati.com\\nIsDeleted: false\\nLastName: Plummer\\nIsEmailBounced: false\\nFirstName: Pete\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: babrahamoffxpb@theatlantic.com\\nIsDeleted: false\\nLastName: Abrahamoff\\nIsEmailBounced: false\\nFirstName: Brander\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ahargieym0@homestead.com\\nIsDeleted: false\\nLastName: Hargie\\nIsEmailBounced: false\\nFirstName: Aili\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: hstotthp2@yelp.com\\nIsDeleted: false\\nLastName: Stott\\nIsEmailBounced: false\\nFirstName: Hartley\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: jganniclifftuvj@blinklist.com\\nIsDeleted: false\\nLastName: Ganniclifft\\nIsEmailBounced: false\\nFirstName: Jamima\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ldodelly8q@ed.gov\\nIsDeleted: false\\nLastName: Dodell\\nIsEmailBounced: false\\nFirstName: Lynde\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: rmilner3cp@smh.com.au\\nIsDeleted: false\\nLastName: Milner\\nIsEmailBounced: false\\nFirstName: Ralph\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: gghiriardellic19@state.tx.us\\nIsDeleted: false\\nLastName: Ghiriardelli\\nIsEmailBounced: false\\nFirstName: Garv\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: rhubatschfpu@nature.com\\nIsDeleted: false\\nLastName: Hubatsch\\nIsEmailBounced: false\\nFirstName: Rose\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: mtrenholme1ws@quantcast.com\\nIsDeleted: false\\nLastName: Trenholme\\nIsEmailBounced: false\\nFirstName: Mariejeanne\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: jmussettpbd@over-blog.com\\nIsDeleted: false\\nLastName: Mussett\\nIsEmailBounced: false\\nFirstName: Juliann\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: bgoroni145@illinois.edu\\nIsDeleted: false\\nLastName: Goroni\\nIsEmailBounced: false\\nFirstName: Bernarr\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: afalls3ph@theguardian.com\\nIsDeleted: false\\nLastName: Falls\\nIsEmailBounced: false\\nFirstName: Angelia\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: lswettjoi@go.com\\nIsDeleted: false\\nLastName: Swett\\nIsEmailBounced: false\\nFirstName: Levon\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: emullinsz38@dailymotion.com\\nIsDeleted: false\\nLastName: Mullins\\nIsEmailBounced: false\\nFirstName: Elsa\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ibernettehco@ebay.co.uk\\nIsDeleted: false\\nLastName: Bernette\\nIsEmailBounced: false\\nFirstName: Ingrid\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: trisleybtt@simplemachines.org\\nIsDeleted: false\\nLastName: Risley\\nIsEmailBounced: false\\nFirstName: Toma\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: rgypsonqx1@goodreads.com\\nIsDeleted: false\\nLastName: Gypson\\nIsEmailBounced: false\\nFirstName: Reed\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cposvneri28@jiathis.com\\nIsDeleted: false\\nLastName: Posvner\\nIsEmailBounced: false\\nFirstName: Culley\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: awilmut2rz@geocities.jp\\nIsDeleted: false\\nLastName: Wilmut\\nIsEmailBounced: false\\nFirstName: Andy\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: aluckwellra5@exblog.jp\\nIsDeleted: false\\nLastName: Luckwell\\nIsEmailBounced: false\\nFirstName: Andreana\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: irollings26j@timesonline.co.uk\\nIsDeleted: false\\nLastName: Rollings\\nIsEmailBounced: false\\nFirstName: Ibrahim\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: gspireqpd@g.co\\nIsDeleted: false\\nLastName: Spire\\nIsEmailBounced: false\\nFirstName: Gaelan\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: sbezleyk2y@acquirethisname.com\\nIsDeleted: false\\nLastName: Bezley\\nIsEmailBounced: false\\nFirstName: Sindee\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: icollerrr@flickr.com\\nIsDeleted: false\\nLastName: Coller\\nIsEmailBounced: false\\nFirstName: Inesita\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: kfolliott1bo@nature.com\\nIsDeleted: false\\nLastName: Folliott\\nIsEmailBounced: false\\nFirstName: Kennan\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: kroofjfo@gnu.org\\nIsDeleted: false\\nLastName: Roof\\nIsEmailBounced: false\\nFirstName: Karlik\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: lcovotti8s4@rediff.com\\nIsDeleted: false\\nLastName: Covotti\\nIsEmailBounced: false\\nFirstName: Lucho\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: gpatriskson1rs@census.gov\\nIsDeleted: false\\nLastName: Patriskson\\nIsEmailBounced: false\\nFirstName: Gardener\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: spidgleyqvw@usgs.gov\\nIsDeleted: false\\nLastName: Pidgley\\nIsEmailBounced: false\\nFirstName: Simona\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cbecarrak0i@over-blog.com\\nIsDeleted: false\\nLastName: Becarra\\nIsEmailBounced: false\\nFirstName: Cally\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: aparkman9td@bbc.co.uk\\nIsDeleted: false\\nLastName: Parkman\\nIsEmailBounced: false\\nFirstName: Agneta\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: bboddingtonhn@quantcast.com\\nIsDeleted: false\\nLastName: Boddington\\nIsEmailBounced: false\\nFirstName: Betta\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: dcasementx0p@cafepress.com\\nIsDeleted: false\\nLastName: Casement\\nIsEmailBounced: false\\nFirstName: Dannie\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: hzornbhe@latimes.com\\nIsDeleted: false\\nLastName: Zorn\\nIsEmailBounced: false\\nFirstName: Haleigh\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cfifieldbjb@blogspot.com\\nIsDeleted: false\\nLastName: Fifield\\nIsEmailBounced: false\\nFirstName: Christalle\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ddewerson4t3@skype.com\\nIsDeleted: false\\nLastName: Dewerson\\nIsEmailBounced: false\\nFirstName: Dyann\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: khullock52p@sohu.com\\nIsDeleted: false\\nLastName: Hullock\\nIsEmailBounced: false\\nFirstName: Kellina\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: tfremantle32n@bandcamp.com\\nIsDeleted: false\\nLastName: Fremantle\\nIsEmailBounced: false\\nFirstName: Turner\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: sbernardtylp@nps.gov\\nIsDeleted: false\\nLastName: Bernardt\\nIsEmailBounced: false\\nFirstName: Selina\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: smcgettigan8kk@slideshare.net\\nIsDeleted: false\\nLastName: McGettigan\\nIsEmailBounced: false\\nFirstName: Sada\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: wdelafontvgn@businesswire.com\\nIsDeleted: false\\nLastName: Delafont\\nIsEmailBounced: false\\nFirstName: West\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: lbelsher9ne@indiatimes.com\\nIsDeleted: false\\nLastName: Belsher\\nIsEmailBounced: false\\nFirstName: Lou\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cgoody27y@blogtalkradio.com\\nIsDeleted: false\\nLastName: Goody\\nIsEmailBounced: false\\nFirstName: Colene\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: cstodejzz@ucoz.ru\\nIsDeleted: false\\nLastName: Stode\\nIsEmailBounced: false\\nFirstName: Curcio\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: abromidgejb@china.com.cn\\nIsDeleted: false\\nLastName: Bromidge\\nIsEmailBounced: false\\nFirstName: Ariela\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ldelgardilloqvp@xrea.com\\nIsDeleted: false\\nLastName: Delgardillo\\nIsEmailBounced: false\\nFirstName: Lauralee\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: dcroal9t4@businessinsider.com\\nIsDeleted: false\\nLastName: Croal\\nIsEmailBounced: false\\nFirstName: Devlin\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: dclarageqzb@wordpress.com\\nIsDeleted: false\\nLastName: Clarage\\nIsEmailBounced: false\\nFirstName: Dre\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: dthirlwall3jf@taobao.com\\nIsDeleted: false\\nLastName: Thirlwall\\nIsEmailBounced: false\\nFirstName: Dareen\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: tkeddie2lj@wiley.com\\nIsDeleted: false\\nLastName: Keddie\\nIsEmailBounced: false\\nFirstName: Tandi\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: jrimingtoni3i@istockphoto.com\\nIsDeleted: false\\nLastName: Rimington\\nIsEmailBounced: false\\nFirstName: Judy\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: gtroynet@slashdot.org\\nIsDeleted: false\\nLastName: Troy\\nIsEmailBounced: false\\nFirstName: Gail\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: ebunneyh0n@meetup.com\\nIsDeleted: false\\nLastName: Bunney\\nIsEmailBounced: false\\nFirstName: Efren\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: yhaken8p3@slate.com\\nIsDeleted: false\\nLastName: Haken\\nIsEmailBounced: false\\nFirstName: Yard\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: nolliffeq6q@biblegateway.com\\nIsDeleted: false\\nLastName: Olliffe\\nIsEmailBounced: false\\nFirstName: Nani\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: bgalia9jz@odnoklassniki.ru\\nIsDeleted: false\\nLastName: Galia\\nIsEmailBounced: false\\nFirstName: Berrie\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: djedrzej3v1@google.com\\nIsDeleted: false\\nLastName: Jedrzej\\nIsEmailBounced: false\\nFirstName: Deanne\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: mcamiesh1t@fc2.com\\nIsDeleted: false\\nLastName: Camies\\nIsEmailBounced: false\\nFirstName: Mikaela\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: csunshineqni@state.tx.us\\nIsDeleted: false\\nLastName: Sunshine\\nIsEmailBounced: false\\nFirstName: Curtis\\nIsPriorityRecord: false\\nCleanStatus: Pending\",\n    \"Email: fiannellib46@marriott.com\\nIsDeleted: false\\nLastName: Iannelli\\nIsEmailBounced: false\\nFirstName: Felicio\\nIsPriorityRecord: false\\nCleanStatus: Pending\"\n  ],\n  \"semantic_identifier\": \"Voonder\",\n  \"metadata\": {\"object_type\": \"Account\"},\n  \"primary_owners\": {\"email\": \"hagen@danswer.ai\", \"first_name\": \"Hagen\", \"last_name\": \"oneill\"},\n  \"secondary_owners\": null,\n  \"title\": null\n}\n"
  },
  {
    "path": "backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py",
    "content": "import os\nimport time\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.sharepoint.connector import SharepointAuthMethod\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.db.enums import HierarchyNodeType\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n# NOTE: Sharepoint site for tests is \"sharepoint-tests\"\n\n\n@dataclass\nclass ExpectedDocument:\n    semantic_identifier: str\n    content: str\n    folder_path: str | None = None\n    library: str = \"Shared Documents\"  # Default to main library\n    expected_link_substrings: list[str] | None = None\n\n\nEXPECTED_DOCUMENTS = [\n    ExpectedDocument(\n        semantic_identifier=\"test1.docx\",\n        content=\"test1\",\n        folder_path=\"test\",\n        expected_link_substrings=[\"_layouts/15/Doc.aspx\", \"file=test1.docx\"],\n    ),\n    ExpectedDocument(\n        semantic_identifier=\"test2.docx\",\n        content=\"test2\",\n        folder_path=\"test/nested with spaces\",\n        expected_link_substrings=[\"_layouts/15/Doc.aspx\", \"file=test2.docx\"],\n    ),\n    ExpectedDocument(\n        semantic_identifier=\"should-not-index-on-specific-folder.docx\",\n        content=\"should-not-index-on-specific-folder\",\n        folder_path=None,  # root folder\n        expected_link_substrings=[\n            \"_layouts/15/Doc.aspx\",\n            \"file=should-not-index-on-specific-folder.docx\",\n        ],\n    ),\n    ExpectedDocument(\n        semantic_identifier=\"other.docx\",\n        content=\"other\",\n        folder_path=None,\n        library=\"Other Library\",\n        expected_link_substrings=[\"_layouts/15/Doc.aspx\", \"file=other.docx\"],\n    ),\n]\n\nEXPECTED_PAGES = [\n    ExpectedDocument(\n        semantic_identifier=\"CollabHome\",\n        content=(\n            \"# Home\\n\\nDisplay recent news.\\n\\n## News\\n\\nShow recent activities from your site\\n\\n\"\n            \"## Site activity\\n\\n## Quick links\\n\\nLearn about a team site\\n\\nLearn how to add a page\\n\\n\"\n            \"Add links to important documents and pages.\\n\\n## Quick links\\n\\nDocuments\\n\\n\"\n            \"Add a document library\\n\\n## Document library\"\n        ),\n        folder_path=None,\n        expected_link_substrings=[\"SitePages/CollabHome.aspx\"],\n    ),\n    ExpectedDocument(\n        semantic_identifier=\"Home\",\n        content=\"# Home\",\n        folder_path=None,\n        expected_link_substrings=[\"SitePages/Home.aspx\"],\n    ),\n]\n\n\ndef verify_document_metadata(doc: Document) -> None:\n    \"\"\"Verify common metadata that should be present on all documents.\"\"\"\n    assert isinstance(doc.doc_updated_at, datetime)\n    assert doc.doc_updated_at.tzinfo == timezone.utc\n    assert doc.source == DocumentSource.SHAREPOINT\n    assert doc.primary_owners is not None\n    assert len(doc.primary_owners) == 1\n    owner = doc.primary_owners[0]\n    assert owner.display_name is not None\n    assert owner.email is not None\n\n\ndef verify_document_content(doc: Document, expected: ExpectedDocument) -> None:\n    \"\"\"Verify a document matches its expected content.\"\"\"\n    assert doc.semantic_identifier == expected.semantic_identifier\n    assert len(doc.sections) == 1\n    assert doc.sections[0].text is not None\n    assert expected.content == doc.sections[0].text\n\n    if expected.expected_link_substrings is not None:\n        actual_link = doc.sections[0].link\n        assert actual_link is not None, (\n            f\"Expected section link containing {expected.expected_link_substrings} \"\n            f\"for '{expected.semantic_identifier}', but link was None\"\n        )\n        for substr in expected.expected_link_substrings:\n            assert substr in actual_link, (\n                f\"Section link for '{expected.semantic_identifier}' \"\n                f\"missing expected substring '{substr}', \"\n                f\"actual link: '{actual_link}'\"\n            )\n\n    verify_document_metadata(doc)\n\n\ndef find_document(documents: list[Document], semantic_identifier: str) -> Document:\n    \"\"\"Find a document by its semantic identifier.\"\"\"\n    matching_docs = [\n        d for d in documents if d.semantic_identifier == semantic_identifier\n    ]\n    assert (\n        len(matching_docs) == 1\n    ), f\"Expected exactly one document with identifier {semantic_identifier}\"\n    return matching_docs[0]\n\n\n@pytest.fixture\ndef mock_store_image() -> MagicMock:\n    \"\"\"Mock store_image_and_create_section to return a predefined ImageSection.\"\"\"\n    mock = MagicMock()\n    mock.return_value = (\n        ImageSection(image_file_id=\"mocked-file-id\", link=\"https://example.com/image\"),\n        \"mocked-file-id\",\n    )\n    return mock\n\n\n@pytest.fixture\ndef sharepoint_credentials() -> dict[str, str]:\n    return {\n        \"sp_client_id\": os.environ[\"SHAREPOINT_CLIENT_ID\"],\n        \"sp_client_secret\": os.environ[\"SHAREPOINT_CLIENT_SECRET\"],\n        \"sp_directory_id\": os.environ[\"SHAREPOINT_CLIENT_DIRECTORY_ID\"],\n    }\n\n\ndef test_sharepoint_connector_all_sites__docs_only(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with no sites\n        connector = SharepointConnector(\n            include_site_pages=False, include_site_documents=True\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Not asserting expected sites because that can change in test tenant at any time\n        # Finding any docs is good enough to verify that the connector is working\n        document_batches = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        )\n        assert document_batches, \"Should find documents from all sites\"\n\n\ndef test_sharepoint_connector_all_sites__pages_only(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with no docs\n        connector = SharepointConnector(\n            include_site_pages=True, include_site_documents=False\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Not asserting expected sites because that can change in test tenant at any time\n        # Finding any docs is good enough to verify that the connector is working\n        document_batches = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        )\n        assert document_batches, \"Should find site pages from all sites\"\n\n\ndef test_sharepoint_connector_specific_folder(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with the test site URL and specific folder\n        connector = SharepointConnector(\n            sites=[os.environ[\"SHAREPOINT_SITE\"] + \"/Shared Documents/test\"],\n            include_site_pages=False,\n            include_site_documents=True,\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Get all documents\n        found_documents: list[Document] = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        ).documents\n\n        # Should only find documents in the test folder\n        test_folder_docs = [\n            doc\n            for doc in EXPECTED_DOCUMENTS\n            if doc.folder_path and doc.folder_path.startswith(\"test\")\n        ]\n        assert len(found_documents) == len(\n            test_folder_docs\n        ), \"Should only find documents in test folder\"\n\n        # Verify each expected document\n        for expected in test_folder_docs:\n            doc = find_document(found_documents, expected.semantic_identifier)\n            verify_document_content(doc, expected)\n\n\ndef test_sharepoint_connector_root_folder__docs_only(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with the base site URL\n        connector = SharepointConnector(\n            sites=[os.environ[\"SHAREPOINT_SITE\"]],\n            include_site_pages=False,\n            include_site_documents=True,\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Get all documents\n        found_documents: list[Document] = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        ).documents\n\n        assert len(found_documents) == len(\n            EXPECTED_DOCUMENTS\n        ), \"Should find all documents in main library\"\n\n        # Verify each expected document\n        for expected in EXPECTED_DOCUMENTS:\n            doc = find_document(found_documents, expected.semantic_identifier)\n            verify_document_content(doc, expected)\n\n\ndef test_sharepoint_connector_other_library(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with the other library\n        connector = SharepointConnector(\n            sites=[\n                os.environ[\"SHAREPOINT_SITE\"] + \"/Other Library\",\n            ],\n            include_site_pages=False,\n            include_site_documents=True,\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Get all documents\n        found_documents: list[Document] = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        ).documents\n        expected_documents: list[ExpectedDocument] = [\n            doc for doc in EXPECTED_DOCUMENTS if doc.library == \"Other Library\"\n        ]\n\n        # Should find all documents in `Other Library`\n        assert len(found_documents) == len(\n            expected_documents\n        ), \"Should find all documents in `Other Library`\"\n\n        # Verify each expected document\n        for expected in expected_documents:\n            doc = find_document(found_documents, expected.semantic_identifier)\n            verify_document_content(doc, expected)\n\n\ndef test_sharepoint_connector_poll(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        # Initialize connector with the base site URL\n        connector = SharepointConnector(sites=[os.environ[\"SHAREPOINT_SITE\"]])\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Set time window to only capture test1.docx (modified at 2025-01-28 20:51:42+00:00)\n        start = datetime(\n            2025, 1, 28, 20, 51, 30, tzinfo=timezone.utc\n        )  # 12 seconds before\n        end = datetime(2025, 1, 28, 20, 51, 50, tzinfo=timezone.utc)  # 8 seconds after\n\n        # Get documents within the time window\n        found_documents: list[Document] = load_all_from_connector(\n            connector=connector,\n            start=start.timestamp(),\n            end=end.timestamp(),\n        ).documents\n\n        # Should only find test1.docx\n        assert (\n            len(found_documents) == 1\n        ), \"Should only find one document in the time window\"\n        doc = found_documents[0]\n        assert doc.semantic_identifier == \"test1.docx\"\n        verify_document_content(\n            doc,\n            next(\n                d for d in EXPECTED_DOCUMENTS if d.semantic_identifier == \"test1.docx\"\n            ),\n        )\n\n\ndef test_sharepoint_connector_pages(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        connector = SharepointConnector(\n            sites=[os.environ[\"SHAREPOINT_SITE\"]],\n            include_site_pages=True,\n            include_site_documents=False,\n        )\n\n        connector.load_credentials(sharepoint_credentials)\n\n        found_documents = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        ).documents\n\n        assert len(found_documents) == len(\n            EXPECTED_PAGES\n        ), \"Should find all pages in test site\"\n\n        for expected in EXPECTED_PAGES:\n            doc = find_document(found_documents, expected.semantic_identifier)\n            verify_document_content(doc, expected)\n\n\ndef verify_hierarchy_nodes(\n    hierarchy_nodes: list[HierarchyNode],\n    documents: list[Document],\n    expected_site_url: str,\n) -> None:\n    \"\"\"Verify hierarchy nodes have correct structure and relationships.\"\"\"\n    # Build a set of all raw_node_ids for parent validation\n    all_node_ids = {node.raw_node_id for node in hierarchy_nodes}\n\n    # Track nodes by type\n    site_nodes = [n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.SITE]\n    drive_nodes = [n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.DRIVE]\n    folder_nodes = [\n        n for n in hierarchy_nodes if n.node_type == HierarchyNodeType.FOLDER\n    ]\n\n    # Verify we have at least one site node\n    assert len(site_nodes) >= 1, \"Should have at least one SITE hierarchy node\"\n    assert len(drive_nodes) >= 1, \"Should have at least one DRIVE hierarchy node\"\n    assert len(folder_nodes) >= 1, \"Should have at least one FOLDER hierarchy node\"\n\n    # Verify expected site is in hierarchy\n    site_node_ids = {n.raw_node_id for n in site_nodes}\n    assert (\n        expected_site_url in site_node_ids\n    ), f\"Expected site {expected_site_url} not found in hierarchy nodes. Found sites: {site_node_ids}\"\n\n    # Verify no duplicate raw_node_ids\n    assert len(all_node_ids) == len(\n        hierarchy_nodes\n    ), \"Should not have duplicate hierarchy nodes\"\n\n    # Verify all hierarchy nodes have required fields\n    for node in hierarchy_nodes:\n        assert node.raw_node_id, \"All nodes should have raw_node_id\"\n        assert node.display_name, \"All nodes should have display_name\"\n        assert node.link, \"All nodes should have link\"\n        assert node.node_type in [\n            HierarchyNodeType.SITE,\n            HierarchyNodeType.DRIVE,\n            HierarchyNodeType.FOLDER,\n        ], f\"Unexpected node type: {node.node_type}\"\n\n    # Verify parent relationships\n    for node in hierarchy_nodes:\n        if node.node_type == HierarchyNodeType.SITE:\n            # Sites should have no parent (direct child of SOURCE)\n            assert node.raw_parent_id is None, \"SITE nodes should have no parent\"\n        elif node.node_type == HierarchyNodeType.DRIVE:\n            # Drives should have a site as parent\n            assert node.raw_parent_id is not None, \"DRIVE nodes should have a parent\"\n            assert (\n                node.raw_parent_id in site_node_ids\n            ), f\"DRIVE parent {node.raw_parent_id} should be a SITE node\"\n        elif node.node_type == HierarchyNodeType.FOLDER:\n            # Folders should have either a drive or another folder as parent\n            assert node.raw_parent_id is not None, \"FOLDER nodes should have a parent\"\n            assert (\n                node.raw_parent_id in all_node_ids\n            ), f\"FOLDER parent {node.raw_parent_id} should exist in hierarchy\"\n\n    # Verify documents have parent_hierarchy_raw_node_id set\n    for doc in documents:\n        if doc.parent_hierarchy_raw_node_id:\n            assert (\n                doc.parent_hierarchy_raw_node_id in all_node_ids\n            ), f\"Document {doc.semantic_identifier} parent {doc.parent_hierarchy_raw_node_id} should exist in hierarchy\"\n\n\ndef test_sharepoint_connector_hierarchy_nodes(\n    mock_get_unstructured_api_key: MagicMock,  # noqa: ARG001\n    mock_store_image: MagicMock,\n    sharepoint_credentials: dict[str, str],\n) -> None:\n    \"\"\"Test that the SharePoint connector yields proper hierarchy nodes.\"\"\"\n    with patch(\n        \"onyx.connectors.sharepoint.connector.store_image_and_create_section\",\n        mock_store_image,\n    ):\n        site_url = os.environ[\"SHAREPOINT_SITE\"]\n\n        # Initialize connector with the test site\n        connector = SharepointConnector(\n            sites=[site_url],\n            include_site_pages=True,\n            include_site_documents=True,\n        )\n\n        # Load credentials\n        connector.load_credentials(sharepoint_credentials)\n\n        # Get all documents and hierarchy nodes\n        result = load_all_from_connector(\n            connector=connector,\n            start=0,\n            end=time.time(),\n        )\n\n        found_documents = result.documents\n        hierarchy_nodes = result.hierarchy_nodes\n\n        # Should have hierarchy nodes\n        assert len(hierarchy_nodes) > 0, \"Should have hierarchy nodes\"\n\n        # Verify hierarchy structure\n        verify_hierarchy_nodes(hierarchy_nodes, found_documents, site_url)\n\n        # Verify we have the expected node types\n        node_types = {n.node_type for n in hierarchy_nodes}\n        assert HierarchyNodeType.SITE in node_types, \"Should have SITE nodes\"\n        assert HierarchyNodeType.DRIVE in node_types, \"Should have DRIVE nodes\"\n\n        # Should have folder nodes if documents are in folders\n        docs_in_folders = [d for d in EXPECTED_DOCUMENTS if d.folder_path]\n        if docs_in_folders:\n            assert (\n                HierarchyNodeType.FOLDER in node_types\n            ), \"Should have FOLDER nodes since documents are in folders\"\n\n        # Verify all documents have parent_hierarchy_raw_node_id set\n        for doc in found_documents:\n            assert (\n                doc.parent_hierarchy_raw_node_id is not None\n            ), f\"Document {doc.semantic_identifier} should have parent_hierarchy_raw_node_id set\"\n\n\n@pytest.fixture\ndef sharepoint_cert_credentials() -> dict[str, str]:\n    return {\n        \"authentication_method\": SharepointAuthMethod.CERTIFICATE.value,\n        \"sp_client_id\": os.environ[\"PERM_SYNC_SHAREPOINT_CLIENT_ID\"],\n        \"sp_private_key\": os.environ[\"PERM_SYNC_SHAREPOINT_PRIVATE_KEY\"],\n        \"sp_certificate_password\": os.environ[\n            \"PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD\"\n        ],\n        \"sp_directory_id\": os.environ[\"PERM_SYNC_SHAREPOINT_DIRECTORY_ID\"],\n    }\n\n\ndef test_resolve_tenant_domain_from_site_urls(\n    sharepoint_cert_credentials: dict[str, str],\n) -> None:\n    \"\"\"Verify that certificate auth resolves the tenant domain from site URLs\n    without calling the /organization endpoint.\"\"\"\n    site_url = os.environ[\"SHAREPOINT_SITE\"]\n    connector = SharepointConnector(sites=[site_url])\n    connector.load_credentials(sharepoint_cert_credentials)\n\n    assert connector.sp_tenant_domain is not None\n    assert len(connector.sp_tenant_domain) > 0\n    # The tenant domain should match the first label of the site URL hostname\n    from urllib.parse import urlsplit\n\n    expected = urlsplit(site_url).hostname.split(\".\")[0]  # type: ignore\n    assert connector.sp_tenant_domain == expected\n\n\ndef test_resolve_tenant_domain_from_root_site(\n    sharepoint_cert_credentials: dict[str, str],\n) -> None:\n    \"\"\"Verify that certificate auth resolves the tenant domain via the root\n    site endpoint when no site URLs are configured.\"\"\"\n    connector = SharepointConnector(sites=[])\n    connector.load_credentials(sharepoint_cert_credentials)\n\n    assert connector.sp_tenant_domain is not None\n    assert len(connector.sp_tenant_domain) > 0\n"
  },
  {
    "path": "backend/tests/daily/connectors/slab/test_slab_connector.py",
    "content": "import json\nimport os\nimport time\nfrom pathlib import Path\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.slab.connector import SlabConnector\n\n\ndef load_test_data(file_name: str = \"test_slab_data.json\") -> dict[str, str]:\n    current_dir = Path(__file__).parent\n    with open(current_dir / file_name, \"r\") as f:\n        return json.load(f)\n\n\n@pytest.fixture\ndef slab_connector() -> SlabConnector:\n    connector = SlabConnector(\n        base_url=\"https://onyx-test.slab.com/\",\n    )\n    connector.load_credentials(\n        {\n            \"slab_bot_token\": os.environ[\"SLAB_BOT_TOKEN\"],\n        }\n    )\n    return connector\n\n\n@pytest.mark.xfail(\n    reason=(\n        \"Need a test account with a slab subscription to run this test.Trial only lasts 14 days.\"\n    )\n)\ndef test_slab_connector_basic(slab_connector: SlabConnector) -> None:\n    all_docs: list[Document] = []\n    target_test_doc_id = \"jcp6cohu\"\n    target_test_doc: Document | None = None\n    for doc_batch in slab_connector.poll_source(0, time.time()):\n        for doc in doc_batch:\n            if not isinstance(doc, Document):\n                continue\n            all_docs.append(doc)\n            if doc.id == target_test_doc_id:\n                target_test_doc = doc\n\n    assert len(all_docs) == 6\n    assert target_test_doc is not None\n\n    desired_test_data = load_test_data()\n    assert (\n        target_test_doc.semantic_identifier == desired_test_data[\"semantic_identifier\"]\n    )\n    assert target_test_doc.source == DocumentSource.SLAB\n    assert target_test_doc.metadata == {}\n    assert target_test_doc.primary_owners is None\n    assert target_test_doc.secondary_owners is None\n    assert target_test_doc.title is None\n    assert target_test_doc.from_ingestion_api is False\n    assert target_test_doc.additional_info is None\n\n    assert len(target_test_doc.sections) == 1\n    section = target_test_doc.sections[0]\n    # Need to replace the weird apostrophe with a normal one\n    assert section.text is not None\n    assert section.text.replace(\"\\u2019\", \"'\") == desired_test_data[\"section_text\"]\n    assert section.link == desired_test_data[\"link\"]\n\n\n@pytest.mark.xfail(\n    reason=(\n        \"Need a test account with a slab subscription to run this test.Trial only lasts 14 days.\"\n    )\n)\ndef test_slab_connector_slim(slab_connector: SlabConnector) -> None:\n    # Get all doc IDs from the full connector\n    all_full_doc_ids = set()\n    for doc_batch in slab_connector.load_from_state():\n        all_full_doc_ids.update(\n            [doc.id for doc in doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # Get all doc IDs from the slim connector\n    all_slim_doc_ids = set()\n    for slim_doc_batch in slab_connector.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # The set of full doc IDs should be always be a subset of the slim doc IDs\n    assert all_full_doc_ids.issubset(all_slim_doc_ids)\n"
  },
  {
    "path": "backend/tests/daily/connectors/slab/test_slab_data.json",
    "content": "{\n    \"section_text\": \"Learn about Posts\\nWelcome\\nThis is a post, where you can edit, share, and collaborate in real time with your team. We'd love to show you how it works!\\nReading and editing\\nClick the mode button to toggle between read and edit modes. You can only make changes to a post when editing.\\nOrganize your posts\\nWhen in edit mode, you can add topics to a post, which will keep it organized for the right 👀 to see.\\nSmart mentions\\nMentions are references to users, posts, topics and third party tools that show details on hover. Paste in a link for automatic conversion.\\nLook back in time\\nYou are ready to begin writing. You can always bring back this tour in the help menu.\\nGreat job!\\nYou are ready to begin writing. You can always bring back this tour in the help menu.\\n\\n\",\n    \"link\": \"https://onyx-test.slab.com/posts/learn-about-posts-jcp6cohu\",\n    \"semantic_identifier\": \"Learn about Posts\"\n} "
  },
  {
    "path": "backend/tests/daily/connectors/slack/conftest.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom pytest import FixtureRequest\nfrom slack_sdk import WebClient\n\nfrom onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom shared_configs.contextvars import get_current_tenant_id\n\n\n@pytest.fixture\ndef mock_slack_client() -> MagicMock:\n    mock = MagicMock(spec=WebClient)\n    return mock\n\n\n@pytest.fixture\ndef slack_connector(\n    request: FixtureRequest,\n    mock_slack_client: MagicMock,\n    slack_credentials_provider: OnyxStaticCredentialsProvider,\n) -> Generator[SlackConnector]:\n    channel: str | None = request.param if hasattr(request, \"param\") else None\n    connector = SlackConnector(\n        channels=[channel] if channel else None,\n        channel_regex_enabled=False,\n        use_redis=False,\n    )\n    connector.client = mock_slack_client\n    connector.set_credentials_provider(credentials_provider=slack_credentials_provider)\n    yield connector\n\n\n@pytest.fixture\ndef slack_credentials_provider() -> OnyxStaticCredentialsProvider:\n    CI_ENV_VAR = \"SLACK_BOT_TOKEN\"\n    LOCAL_ENV_VAR = \"ONYX_BOT_SLACK_BOT_TOKEN\"\n\n    slack_bot_token = os.environ.get(CI_ENV_VAR, os.environ.get(LOCAL_ENV_VAR))\n    if not slack_bot_token:\n        raise RuntimeError(\n            f\"No slack credentials found; either set the {CI_ENV_VAR} env-var or the {LOCAL_ENV_VAR} env-var\"\n        )\n\n    return OnyxStaticCredentialsProvider(\n        tenant_id=get_current_tenant_id(),\n        connector_name=\"slack\",\n        credential_json={\n            \"slack_bot_token\": slack_bot_token,\n        },\n    )\n"
  },
  {
    "path": "backend/tests/daily/connectors/slack/test_slack_connector.py",
    "content": "import time\n\nimport pytest\n\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom onyx.db.enums import HierarchyNodeType\nfrom tests.daily.connectors.utils import load_all_from_connector\nfrom tests.daily.connectors.utils import to_sections\nfrom tests.daily.connectors.utils import to_text_sections\n\n\ndef test_validate_slack_connector_settings(\n    slack_connector: SlackConnector,\n) -> None:\n    slack_connector.validate_connector_settings()\n\n\n@pytest.mark.parametrize(\n    \"slack_connector,expected_messages,expected_channel_name\",\n    [\n        [\"general\", set(), \"general\"],\n        [\"#general\", set(), \"general\"],\n        [\n            \"daily-connector-test-channel\",\n            set(\n                [\n                    \"Hello, world!\",\n                    \"\",\n                    \"Reply!\",\n                    \"Testing again...\",\n                ]\n            ),\n            \"daily-connector-test-channel\",\n        ],\n        [\n            \"#daily-connector-test-channel\",\n            set(\n                [\n                    \"Hello, world!\",\n                    \"\",\n                    \"Reply!\",\n                    \"Testing again...\",\n                ]\n            ),\n            \"daily-connector-test-channel\",\n        ],\n    ],\n    indirect=[\"slack_connector\"],\n)\ndef test_indexing_channels_with_message_count(\n    slack_connector: SlackConnector,\n    expected_messages: set[str],\n    expected_channel_name: str,\n) -> None:\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    result = load_all_from_connector(\n        connector=slack_connector,\n        start=0.0,\n        end=time.time(),\n    )\n    docs = result.documents\n    hierarchy_nodes = result.hierarchy_nodes\n\n    # Verify messages\n    actual_messages = set(to_text_sections(to_sections(docs)))\n    assert expected_messages == actual_messages\n\n    # Verify hierarchy nodes exist\n    assert len(hierarchy_nodes) > 0, \"Expected at least one hierarchy node (channel)\"\n\n    # Verify all hierarchy nodes are channels with correct structure\n    for node in hierarchy_nodes:\n        assert node.node_type == HierarchyNodeType.CHANNEL\n        assert node.raw_parent_id is None  # Direct child of SOURCE\n        assert node.raw_node_id  # Channel ID must be present\n        assert node.display_name.startswith(\"#\")  # e.g. \"#general\"\n\n    # Verify the expected channel appears in the hierarchy nodes\n    channel_display_names = {node.display_name for node in hierarchy_nodes}\n    assert (\n        f\"#{expected_channel_name}\" in channel_display_names\n    ), f\"Expected channel '#{expected_channel_name}' not found in hierarchy nodes. Found: {channel_display_names}\"\n\n    # Verify documents reference their parent channel\n    channel_ids = {node.raw_node_id for node in hierarchy_nodes}\n    for doc in docs:\n        assert (\n            doc.parent_hierarchy_raw_node_id is not None\n        ), f\"Document '{doc.id}' has no parent_hierarchy_raw_node_id\"\n        assert doc.parent_hierarchy_raw_node_id in channel_ids, (\n            f\"Document '{doc.id}' has parent_hierarchy_raw_node_id=\"\n            f\"'{doc.parent_hierarchy_raw_node_id}' which is not in \"\n            f\"hierarchy nodes: {channel_ids}\"\n        )\n\n\n@pytest.mark.parametrize(\n    \"slack_connector\",\n    [\n        # w/o hashtag\n        \"doesnt-exist\",\n        # w/ hashtag\n        \"#doesnt-exist\",\n    ],\n    indirect=True,\n)\ndef test_indexing_channels_that_dont_exist(\n    slack_connector: SlackConnector,\n) -> None:\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    with pytest.raises(\n        ValueError,\n        match=r\"Channel '.*' not found in workspace.*\",\n    ):\n        load_all_from_connector(\n            connector=slack_connector,\n            start=0.0,\n            end=time.time(),\n        ).documents\n"
  },
  {
    "path": "backend/tests/daily/connectors/slack/test_slack_perm_sync.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\nPUBLIC_CHANNEL_NAME = \"#daily-connector-test-channel\"\nPRIVATE_CHANNEL_NAME = \"#private-channel\"\nPRIVATE_CHANNEL_USERS = [\n    \"admin@onyx-test.com\",\n    \"test_user_1@onyx-test.com\",\n    # user 2 added via a group\n    \"test_user_2@onyx-test.com\",\n]\n\n# Predates any test workspace messages, so the result set should match\n# the \"no start time\" case while exercising the oldest= parameter.\nOLDEST_TS_2016 = datetime(2016, 1, 1, tzinfo=timezone.utc).timestamp()\n\npytestmark = pytest.mark.usefixtures(\"enable_ee\")\n\n\n@pytest.mark.parametrize(\n    \"slack_connector\",\n    [\n        PUBLIC_CHANNEL_NAME,\n    ],\n    indirect=True,\n)\ndef test_load_from_checkpoint_access__public_channel(\n    slack_connector: SlackConnector,\n) -> None:\n    \"\"\"Test that load_from_checkpoint returns correct access information for documents.\"\"\"\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    docs = load_all_from_connector(\n        connector=slack_connector,\n        start=0.0,\n        end=time.time(),\n        include_permissions=True,\n    ).documents\n\n    # We should have at least some documents\n    assert len(docs) > 0, \"Expected to find at least one document\"\n\n    for doc in docs:\n        assert (\n            doc.external_access is not None\n        ), f\"Document {doc.id} should have external_access when using perm sync\"\n        assert (\n            doc.external_access.is_public is True\n        ), f\"Document {doc.id} should have public access when using perm sync\"\n        assert (\n            doc.external_access.external_user_emails == set()\n        ), f\"Document {doc.id} should have no external user emails when using perm sync\"\n        assert (\n            doc.external_access.external_user_group_ids == set()\n        ), f\"Document {doc.id} should have no external user group ids when using perm sync\"\n\n\n@pytest.mark.parametrize(\n    \"slack_connector\",\n    [\n        PRIVATE_CHANNEL_NAME,\n    ],\n    indirect=True,\n)\ndef test_load_from_checkpoint_access__private_channel(\n    slack_connector: SlackConnector,\n) -> None:\n    \"\"\"Test that load_from_checkpoint returns correct access information for documents.\"\"\"\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    docs = load_all_from_connector(\n        connector=slack_connector,\n        start=0.0,\n        end=time.time(),\n        include_permissions=True,\n    ).documents\n\n    # We should have at least some documents\n    assert len(docs) > 0, \"Expected to find at least one document\"\n\n    for doc in docs:\n        assert (\n            doc.external_access is not None\n        ), f\"Document {doc.id} should have external_access when using perm sync\"\n        assert (\n            doc.external_access.is_public is False\n        ), f\"Document {doc.id} should have private access when using perm sync\"\n        assert doc.external_access.external_user_emails == set(\n            PRIVATE_CHANNEL_USERS\n        ), f\"Document {doc.id} should have private channel users when using perm sync\"\n        assert (\n            doc.external_access.external_user_group_ids == set()\n        ), f\"Document {doc.id} should have no external user group ids when using perm sync\"\n\n\n@pytest.mark.parametrize(\n    \"slack_connector\",\n    [\n        PUBLIC_CHANNEL_NAME,\n    ],\n    indirect=True,\n)\n@pytest.mark.parametrize(\"start_ts\", [None, OLDEST_TS_2016])\ndef test_slim_documents_access__public_channel(\n    slack_connector: SlackConnector,\n    start_ts: float | None,\n) -> None:\n    \"\"\"Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents.\"\"\"\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(\n        start=start_ts,\n        end=time.time(),\n    )\n\n    # Collect all slim documents from the generator\n    all_slim_docs: list[SlimDocument] = []\n    for slim_doc_batch in slim_docs_generator:\n        all_slim_docs.extend(\n            [doc for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # We should have at least some slim documents\n    assert len(all_slim_docs) > 0, \"Expected to find at least one slim document\"\n\n    for slim_doc in all_slim_docs:\n        assert slim_doc.external_access is not None\n        assert slim_doc.external_access.is_public is True\n        assert slim_doc.external_access.external_user_emails == set()\n        assert slim_doc.external_access.external_user_group_ids == set()\n\n\n@pytest.mark.parametrize(\n    \"slack_connector\",\n    [\n        PRIVATE_CHANNEL_NAME,\n    ],\n    indirect=True,\n)\ndef test_slim_documents_access__private_channel(\n    slack_connector: SlackConnector,\n) -> None:\n    \"\"\"Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents.\"\"\"\n    if not slack_connector.client:\n        raise RuntimeError(\"Web client must be defined\")\n\n    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(\n        start=None,\n        end=time.time(),\n    )\n\n    # Collect all slim documents from the generator\n    all_slim_docs: list[SlimDocument] = []\n    for slim_doc_batch in slim_docs_generator:\n        all_slim_docs.extend(\n            [doc for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # We should have at least some slim documents\n    assert len(all_slim_docs) > 0, \"Expected to find at least one slim document\"\n\n    for slim_doc in all_slim_docs:\n        assert slim_doc.external_access is not None\n        assert slim_doc.external_access.is_public is False\n        assert slim_doc.external_access.external_user_emails == set(\n            PRIVATE_CHANNEL_USERS\n        )\n        assert slim_doc.external_access.external_user_group_ids == set()\n"
  },
  {
    "path": "backend/tests/daily/connectors/teams/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import Document\n\n\nclass TeamsThread(BaseModel):\n    thread: str\n    external_access: ExternalAccess\n\n    @classmethod\n    def from_doc(cls, document: Document) -> \"TeamsThread\":\n        assert (\n            document.external_access\n        ), f\"ExternalAccess should always be available, instead got {document=}\"\n\n        return cls(\n            thread=document.get_text_content(),\n            external_access=document.external_access,\n        )\n"
  },
  {
    "path": "backend/tests/daily/connectors/teams/test_teams_connector.py",
    "content": "import os\nimport time\n\nimport pytest\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.teams.connector import TeamsConnector\nfrom tests.daily.connectors.teams.models import TeamsThread\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\nTEAMS_THREAD = [\n    # Posted in \"Public Channel\"\n    TeamsThread(\n        thread=\"This is the first message in Onyx-Testing ...This is a reply!This is a second reply.Third.4th.5\",\n        external_access=ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        ),\n    ),\n    TeamsThread(\n        thread=\"Testing body.\",\n        external_access=ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        ),\n    ),\n    TeamsThread(\n        thread=\"Hello, world! Nice to meet you all.\",\n        external_access=ExternalAccess(\n            external_user_emails=set(),\n            external_user_group_ids=set(),\n            is_public=True,\n        ),\n    ),\n    # Posted in \"Private Channel (Raunak is excluded)\"\n    TeamsThread(\n        thread=\"This is a test post. Raunak should not be able to see this!\",\n        external_access=ExternalAccess(\n            external_user_emails=set([\"test@danswerai.onmicrosoft.com\"]),\n            external_user_group_ids=set(),\n            is_public=False,\n        ),\n    ),\n    # Posted in \"Private Channel (Raunak is a member)\"\n    TeamsThread(\n        thread=\"This is a test post in a private channel that Raunak does have access to! Hello, Raunak!\"\n        \"Hello, world! I am just a member in this chat, but not an owner.\",\n        external_access=ExternalAccess(\n            external_user_emails=set(\n                [\"test@danswerai.onmicrosoft.com\", \"raunak@onyx.app\"]\n            ),\n            external_user_group_ids=set(),\n            is_public=False,\n        ),\n    ),\n    # Posted in \"Private Channel (Raunak owns)\"\n    TeamsThread(\n        thread=\"This is a test post in a private channel that Raunak is an owner of! Whoa!\"\n        \"Hello, world! I am an owner of this chat. The power!\",\n        external_access=ExternalAccess(\n            external_user_emails=set(\n                [\"test@danswerai.onmicrosoft.com\", \"raunak@onyx.app\"]\n            ),\n            external_user_group_ids=set(),\n            is_public=False,\n        ),\n    ),\n]\n\n\n@pytest.fixture\ndef teams_credentials() -> dict[str, str]:\n    app_id = os.environ[\"TEAMS_APPLICATION_ID\"]\n    dir_id = os.environ[\"TEAMS_DIRECTORY_ID\"]\n    secret = os.environ[\"TEAMS_SECRET\"]\n\n    return {\n        \"teams_client_id\": app_id,\n        \"teams_directory_id\": dir_id,\n        \"teams_client_secret\": secret,\n    }\n\n\n@pytest.fixture\ndef teams_connector(\n    teams_credentials: dict[str, str],\n) -> TeamsConnector:\n    teams_connector = TeamsConnector(teams=[\"Onyx-Testing\"])\n    teams_connector.load_credentials(teams_credentials)\n    return teams_connector\n\n\ndef _build_map(threads: list[TeamsThread]) -> dict[str, TeamsThread]:\n    map: dict[str, TeamsThread] = {}\n\n    for thread in threads:\n        assert thread.thread not in map, f\"Duplicate thread found in map; {thread=}\"\n        map[thread.thread] = thread\n\n    return map\n\n\ndef _assert_is_valid_external_access(\n    external_access: ExternalAccess,\n) -> None:\n    assert (\n        not external_access.external_user_group_ids\n    ), f\"{external_access.external_user_group_ids=} should be empty for MS Teams\"\n\n    if external_access.is_public:\n        assert (\n            not external_access.external_user_emails\n        ), f\"{external_access.external_user_emails=} should be empty for public channels\"\n    else:\n        assert (\n            external_access.external_user_emails\n        ), f\"{external_access.external_user_emails=} should contains at least one user for private channels\"\n\n\n@pytest.mark.parametrize(\n    \"expected_teams_threads\",\n    [TEAMS_THREAD],\n)\ndef test_loading_all_docs_from_teams_connector(\n    teams_connector: TeamsConnector,\n    expected_teams_threads: list[TeamsThread],\n) -> None:\n    docs = list(\n        load_all_from_connector(\n            connector=teams_connector,\n            start=0.0,\n            end=time.time(),\n        ).documents\n    )\n    actual_teams_threads = [TeamsThread.from_doc(doc) for doc in docs]\n    actual_teams_threads_map = _build_map(threads=actual_teams_threads)\n    expected_teams_threads_map = _build_map(threads=expected_teams_threads)\n\n    # Assert that each thread document matches what we expect.\n    assert actual_teams_threads_map == expected_teams_threads_map\n\n    # Assert that all the `ExternalAccess` instances are well-formed.\n    for thread in actual_teams_threads:\n        _assert_is_valid_external_access(external_access=thread.external_access)\n\n\ndef test_slim_docs_retrieval_from_teams_connector(\n    teams_connector: TeamsConnector,\n) -> None:\n    slim_docs = [\n        slim_doc\n        for slim_doc_batch in teams_connector.retrieve_all_slim_docs_perm_sync()\n        for slim_doc in slim_doc_batch\n    ]\n\n    for slim_doc in slim_docs:\n        if isinstance(slim_doc, HierarchyNode):\n            continue\n        assert (\n            slim_doc.external_access\n        ), f\"ExternalAccess should always be available, instead got {slim_doc=}\"\n        _assert_is_valid_external_access(external_access=slim_doc.external_access)\n\n\ndef test_load_from_checkpoint_with_perm_sync(\n    teams_connector: TeamsConnector,\n    enable_ee: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that load_from_checkpoint_with_perm_sync returns documents with external_access.\n\n    This verifies the CheckpointedConnectorWithPermSync interface is properly implemented.\n    \"\"\"\n    docs = load_all_from_connector(\n        connector=teams_connector,\n        start=0.0,\n        end=time.time(),\n        include_permissions=True,  # Uses load_from_checkpoint_with_perm_sync\n    ).documents\n\n    # We should have at least some documents\n    assert len(docs) > 0, \"Expected to find at least one document\"\n\n    for doc in docs:\n        assert (\n            doc.external_access is not None\n        ), f\"Document {doc.id} should have external_access when using perm sync\"\n        _assert_is_valid_external_access(external_access=doc.external_access)\n"
  },
  {
    "path": "backend/tests/daily/connectors/utils.py",
    "content": "from collections.abc import Iterator\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\n\nfrom onyx.connectors.connector_runner import CheckpointOutputWrapper\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import CheckpointedConnectorWithPermSync\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\n\n_ITERATION_LIMIT = 100_000\n\nCT = TypeVar(\"CT\", bound=ConnectorCheckpoint)\n\n\nclass ConnectorOutput(BaseModel):\n    \"\"\"Structured output from loading a connector.\"\"\"\n\n    documents: list[Document]\n    failures: list[ConnectorFailure]\n    hierarchy_nodes: list[HierarchyNode]\n\n    model_config = {\"arbitrary_types_allowed\": True}\n\n\ndef load_all_from_connector(\n    connector: CheckpointedConnector[CT],\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n    include_permissions: bool = False,\n    raise_on_failures: bool = True,\n) -> ConnectorOutput:\n    \"\"\"\n    Load all documents, hierarchy nodes, and failures from a connector.\n\n    Returns a ConnectorOutput with documents, failures, and hierarchy_nodes separated.\n\n    Also validates that parent hierarchy nodes are always yielded before their children:\n    - For documents: parent must have been yielded before the document\n    - For hierarchy nodes: after each batch, validates that all parents in the batch\n      have been seen (either in the current batch or a previous batch)\n    \"\"\"\n    num_iterations = 0\n\n    if include_permissions and not isinstance(\n        connector, CheckpointedConnectorWithPermSync\n    ):\n        raise ValueError(\"Connector does not support permission syncing\")\n\n    checkpoint = connector.build_dummy_checkpoint()\n    documents: list[Document] = []\n    failures: list[ConnectorFailure] = []\n    hierarchy_nodes: list[HierarchyNode] = []\n\n    # Track all seen hierarchy node raw_ids for parent validation\n    seen_hierarchy_raw_ids: set[str] = set()\n\n    while checkpoint.has_more:\n        load_from_checkpoint_generator = (\n            connector.load_from_checkpoint_with_perm_sync\n            if include_permissions\n            and isinstance(connector, CheckpointedConnectorWithPermSync)\n            else connector.load_from_checkpoint\n        )\n        doc_batch_generator = CheckpointOutputWrapper[CT]()(\n            load_from_checkpoint_generator(start, end, checkpoint)\n        )\n\n        # Collect hierarchy nodes from this batch (for end-of-batch validation)\n        batch_hierarchy_nodes: list[HierarchyNode] = []\n\n        for document, hierarchy_node, failure, next_checkpoint in doc_batch_generator:\n            if hierarchy_node is not None:\n                hierarchy_nodes.append(hierarchy_node)\n                batch_hierarchy_nodes.append(hierarchy_node)\n                # Add to seen set immediately so subsequent documents can reference it\n                seen_hierarchy_raw_ids.add(hierarchy_node.raw_node_id)\n\n            if failure is not None:\n                failures.append(failure)\n\n            if document is not None and isinstance(document, Document):\n                documents.append(document)\n                # Validate: document's parent must have been yielded before this document\n                if document.parent_hierarchy_raw_node_id is not None:\n                    if (\n                        document.parent_hierarchy_raw_node_id\n                        not in seen_hierarchy_raw_ids\n                    ):\n                        raise AssertionError(\n                            f\"Document '{document.id}' \"\n                            f\"(semantic_identifier='{document.semantic_identifier}') \"\n                            f\"has parent_hierarchy_raw_node_id=\"\n                            f\"'{document.parent_hierarchy_raw_node_id}' \"\n                            f\"which was not yielded before this document. \"\n                            f\"Seen hierarchy IDs: {seen_hierarchy_raw_ids}\"\n                        )\n\n            if next_checkpoint is not None:\n                checkpoint = next_checkpoint\n\n        # End-of-batch validation for hierarchy nodes:\n        # Each node's parent must be in the current batch or a previous batch\n        batch_hierarchy_raw_ids = {node.raw_node_id for node in batch_hierarchy_nodes}\n        for node in batch_hierarchy_nodes:\n            if node.raw_parent_id is None:\n                continue  # Root nodes have no parent\n\n            parent_in_current_batch = node.raw_parent_id in batch_hierarchy_raw_ids\n            parent_in_previous_batch = node.raw_parent_id in seen_hierarchy_raw_ids\n\n            if not parent_in_current_batch and not parent_in_previous_batch:\n                raise AssertionError(\n                    f\"HierarchyNode '{node.raw_node_id}' \"\n                    f\"(display_name='{node.display_name}') \"\n                    f\"has raw_parent_id='{node.raw_parent_id}' which was not yielded \"\n                    f\"in the current batch or any previous batch. \"\n                    f\"Seen hierarchy IDs: {seen_hierarchy_raw_ids}, \"\n                    f\"Current batch IDs: {batch_hierarchy_raw_ids}\"\n                )\n\n        num_iterations += 1\n        if num_iterations > _ITERATION_LIMIT:\n            raise RuntimeError(\"Too many iterations. Infinite loop?\")\n\n    if raise_on_failures and failures:\n        raise RuntimeError(f\"Failed to load documents: {failures}\")\n\n    return ConnectorOutput(\n        documents=documents,\n        failures=failures,\n        hierarchy_nodes=hierarchy_nodes,\n    )\n\n\ndef to_sections(\n    documents: list[Document],\n) -> Iterator[TextSection | ImageSection]:\n    for doc in documents:\n        for section in doc.sections:\n            yield section\n\n\ndef to_text_sections(sections: Iterator[TextSection | ImageSection]) -> Iterator[str]:\n    for section in sections:\n        if isinstance(section, TextSection):\n            yield section.text\n"
  },
  {
    "path": "backend/tests/daily/connectors/web/test_web_connector.py",
    "content": "from concurrent.futures import ThreadPoolExecutor\n\nimport pytest\n\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.web.connector import WEB_CONNECTOR_VALID_SETTINGS\nfrom onyx.connectors.web.connector import WebConnector\n\nEXPECTED_QUOTE = (\n    \"If you can't explain it to a six year old, you don't understand it yourself.\"\n)\n\n\n# NOTE(rkuo): we will probably need to adjust this test to point at our own test site\n# to avoid depending on a third party site\n@pytest.fixture\ndef quotes_to_scroll_web_connector(request: pytest.FixtureRequest) -> WebConnector:\n    scroll_before_scraping = request.param\n    connector = WebConnector(\n        base_url=\"https://quotes.toscrape.com/scroll\",\n        web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,\n        scroll_before_scraping=scroll_before_scraping,\n    )\n    return connector\n\n\n@pytest.mark.parametrize(\"quotes_to_scroll_web_connector\", [True], indirect=True)\ndef test_web_connector_scroll(quotes_to_scroll_web_connector: WebConnector) -> None:\n    all_docs: list[Document] = []\n    document_batches = quotes_to_scroll_web_connector.load_from_state()\n    for doc_batch in document_batches:\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n\n    assert len(all_docs) == 1\n    doc = all_docs[0]\n    assert doc.sections[0].text is not None\n    assert EXPECTED_QUOTE in doc.sections[0].text\n\n\n@pytest.mark.parametrize(\"quotes_to_scroll_web_connector\", [False], indirect=True)\ndef test_web_connector_no_scroll(quotes_to_scroll_web_connector: WebConnector) -> None:\n    all_docs: list[Document] = []\n    document_batches = quotes_to_scroll_web_connector.load_from_state()\n    for doc_batch in document_batches:\n        for doc in doc_batch:\n            if isinstance(doc, HierarchyNode):\n                continue\n            all_docs.append(doc)\n\n    assert len(all_docs) == 1\n    doc = all_docs[0]\n    assert doc.sections[0].text is not None\n    assert EXPECTED_QUOTE not in doc.sections[0].text\n\n\nMERCURY_EXPECTED_QUOTE = \"How can we help?\"\n\n\n@pytest.mark.xfail(\n    reason=(\n        \"flaky. maybe we can improve how we avoid triggering bot protection ormaybe this is just how it has to be.\"\n    ),\n)\ndef test_web_connector_bot_protection() -> None:\n    connector = WebConnector(\n        base_url=\"https://support.mercury.com/hc\",\n        web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,\n    )\n    document_batches = list(connector.load_from_state())\n    assert len(document_batches) == 1\n    doc_batch = document_batches[0]\n    assert len(doc_batch) == 1\n    doc = doc_batch[0]\n    assert not isinstance(doc, HierarchyNode)\n    assert doc.sections[0].text is not None\n    assert MERCURY_EXPECTED_QUOTE in doc.sections[0].text\n\n\ndef test_web_connector_recursive_www_redirect() -> None:\n    # Check that https://onyx.app can be recursed if re-directed to www.onyx.app\n    # Run in thread pool to avoid conflict with pytest-asyncio's event loop\n    def _run_connector() -> list[Document]:\n        connector = WebConnector(\n            base_url=\"https://onyx.app\",\n            web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,\n        )\n        return [\n            doc\n            for batch in connector.load_from_state()\n            for doc in batch\n            if not isinstance(doc, HierarchyNode)\n        ]\n\n    with ThreadPoolExecutor(max_workers=1) as executor:\n        future = executor.submit(_run_connector)\n        documents = future.result()\n\n    assert len(documents) > 1\n"
  },
  {
    "path": "backend/tests/daily/connectors/zendesk/test_zendesk_connector.py",
    "content": "import json\nimport os\nimport time\nfrom pathlib import Path\nfrom typing import cast\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.zendesk.connector import ZendeskConnector\nfrom tests.daily.connectors.utils import load_all_from_connector\n\n\ndef load_test_data(file_name: str = \"test_zendesk_data.json\") -> dict[str, dict]:\n    current_dir = Path(__file__).parent\n    with open(current_dir / file_name, \"r\") as f:\n        return json.load(f)\n\n\n@pytest.fixture\ndef zendesk_article_connector() -> ZendeskConnector:\n    connector = ZendeskConnector(content_type=\"articles\")\n    connector.load_credentials(get_credentials())\n    return connector\n\n\n@pytest.fixture\ndef zendesk_ticket_connector() -> ZendeskConnector:\n    connector = ZendeskConnector(content_type=\"tickets\")\n    connector.load_credentials(get_credentials())\n    return connector\n\n\ndef get_credentials() -> dict[str, str]:\n    return {\n        \"zendesk_subdomain\": os.environ[\"ZENDESK_SUBDOMAIN\"],\n        \"zendesk_email\": os.environ[\"ZENDESK_EMAIL\"],\n        \"zendesk_token\": os.environ[\"ZENDESK_TOKEN\"],\n    }\n\n\n@pytest.mark.xfail(\n    reason=(\n        \"Cannot get Zendesk developer account to ensure zendesk account does not expire after 2 weeks\"\n    )\n)\n@pytest.mark.parametrize(\n    \"connector_fixture\", [\"zendesk_article_connector\", \"zendesk_ticket_connector\"]\n)\ndef test_zendesk_connector_basic(\n    request: pytest.FixtureRequest, connector_fixture: str\n) -> None:\n    connector = cast(ZendeskConnector, request.getfixturevalue(connector_fixture))\n    test_data = load_test_data()\n    all_docs: list[Document] = []\n    target_test_doc_id: str\n    if connector.content_type == \"articles\":\n        target_test_doc_id = f\"article:{test_data['article']['id']}\"\n    else:\n        target_test_doc_id = f\"zendesk_ticket_{test_data['ticket']['id']}\"\n\n    target_doc: Document | None = None\n\n    for doc in load_all_from_connector(connector, 0, time.time()).documents:\n        all_docs.append(doc)\n        if doc.id == target_test_doc_id:\n            target_doc = doc\n            print(f\"target_doc {target_doc}\")\n\n    assert len(all_docs) > 0, \"No documents were retrieved from the connector\"\n    assert (\n        target_doc is not None\n    ), \"Target document was not found in the retrieved documents\"\n    assert target_doc.source == DocumentSource.ZENDESK, \"Document source is not ZENDESK\"\n\n    if connector.content_type == \"articles\":\n        test_article = test_data[\"article\"]\n        assert target_doc.semantic_identifier == test_article[\"semantic_identifier\"]\n        assert target_doc.sections[0].link == test_article[\"sections\"][0][\"link\"]\n        assert target_doc.source == test_article[\"source\"]\n        assert target_doc.primary_owners is not None\n        assert len(target_doc.primary_owners) == 1\n        assert (\n            target_doc.primary_owners[0].display_name\n            == test_article[\"primary_owners\"][0][\"display_name\"]\n        )\n        assert (\n            target_doc.primary_owners[0].email\n            == test_article[\"primary_owners\"][0][\"email\"]\n        )\n    else:\n        test_ticket = test_data[\"ticket\"]\n        assert target_doc.semantic_identifier == test_ticket[\"semantic_identifier\"]\n        assert target_doc.sections[0].link == test_ticket[\"sections\"][0][\"link\"]\n        assert target_doc.source == test_ticket[\"source\"]\n        assert target_doc.metadata[\"status\"] == test_ticket[\"metadata\"][\"status\"]\n        assert target_doc.metadata[\"priority\"] == test_ticket[\"metadata\"][\"priority\"]\n        assert target_doc.metadata[\"tags\"] == test_ticket[\"metadata\"][\"tags\"]\n        assert (\n            target_doc.metadata[\"ticket_type\"] == test_ticket[\"metadata\"][\"ticket_type\"]\n        )\n\n\n@pytest.mark.xfail(\n    reason=(\n        \"Cannot get Zendesk developer account to ensure zendesk account does not expire after 2 weeks\"\n    )\n)\ndef test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:\n    # Get full doc IDs\n    all_full_doc_ids = set()\n    for doc in load_all_from_connector(\n        zendesk_article_connector, 0, time.time()\n    ).documents:\n        all_full_doc_ids.add(doc.id)\n\n    # Get slim doc IDs\n    all_slim_doc_ids = set()\n    for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_docs_perm_sync():\n        all_slim_doc_ids.update(\n            [doc.id for doc in slim_doc_batch if not isinstance(doc, HierarchyNode)]\n        )\n\n    # Full docs should be subset of slim docs\n    assert all_full_doc_ids.issubset(\n        all_slim_doc_ids\n    ), f\"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}\"\n"
  },
  {
    "path": "backend/tests/daily/connectors/zendesk/test_zendesk_data.json",
    "content": "{\n  \"article\": {\n      \"id\": \"32502691728155\",\n      \"sections\": [\n        {\n          \"link\": \"https://d3v-onyx.zendesk.com/hc/en-us/articles/32502691728155-How-can-agents-leverage-knowledge-to-help-customers\"\n        }\n      ],\n      \"source\": \"zendesk\",\n      \"semantic_identifier\": \"How can agents leverage knowledge to help customers?\",\n      \"primary_owners\": [\n        {\n          \"display_name\": \"Dan Swer\",\n          \"email\": \"admin@onyx-test.com\"\n        }\n      ]\n  },\n  \"ticket\": {\n    \"id\": \"1\",\n    \"sections\": [\n      {\n        \"link\": \"https://d3v-onyx.zendesk.com/agent/tickets/1\"\n      }\n    ],\n    \"source\": \"zendesk\",\n    \"semantic_identifier\": \"Ticket #1: SAMPLE TICKET: Meet the ticket\",\n    \"metadata\": {\n      \"status\": \"open\",\n      \"priority\": \"normal\",\n      \"tags\": [\"sample\", \"support\", \"zendesk\"],\n      \"ticket_type\": \"incident\"\n    }\n  }\n}"
  },
  {
    "path": "backend/tests/daily/embedding/test_embeddings.py",
    "content": "import os\n\nimport pytest\nfrom tenacity import retry\nfrom tenacity import retry_if_exception_type\nfrom tenacity import stop_after_attempt\nfrom tenacity import wait_exponential\n\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.model_server_models import EmbeddingProvider\n\nVALID_SAMPLE = [\"hi\", \"hello my name is bob\", \"woah there!!!. 😃\"]\nVALID_LONG_SAMPLE = [\"hi \" * 999]\n# openai limit is 2048, cohere is supposed to be 96 but in practice that doesn't\n# seem to be true\nTOO_LONG_SAMPLE = [\"a\"] * 2500\n\n\ndef _run_embeddings(\n    texts: list[str], embedding_model: EmbeddingModel, expected_dim: int\n) -> None:\n    for text_type in [EmbedTextType.QUERY, EmbedTextType.PASSAGE]:\n        embeddings = embedding_model.encode(texts, text_type)\n        assert len(embeddings) == len(texts)\n        assert len(embeddings[0]) == expected_dim\n\n\n@pytest.fixture\ndef openai_embedding_model() -> EmbeddingModel:\n    return EmbeddingModel(\n        server_host=\"localhost\",\n        server_port=9000,\n        model_name=\"text-embedding-3-small\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n        api_key=os.environ[\"OPENAI_API_KEY\"],\n        provider_type=EmbeddingProvider.OPENAI,\n        api_url=None,\n    )\n\n\ndef test_openai_embedding(openai_embedding_model: EmbeddingModel) -> None:\n    _run_embeddings(VALID_SAMPLE, openai_embedding_model, 1536)\n    _run_embeddings(TOO_LONG_SAMPLE, openai_embedding_model, 1536)\n\n\n@pytest.fixture\ndef cohere_embedding_model() -> EmbeddingModel:\n    return EmbeddingModel(\n        server_host=\"localhost\",\n        server_port=9000,\n        model_name=\"embed-english-light-v3.0\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n        api_key=os.environ[\"COHERE_API_KEY\"],\n        provider_type=EmbeddingProvider.COHERE,\n        api_url=None,\n    )\n\n\ndef test_cohere_embedding(cohere_embedding_model: EmbeddingModel) -> None:\n    _run_embeddings(VALID_SAMPLE, cohere_embedding_model, 384)\n    _run_embeddings(TOO_LONG_SAMPLE, cohere_embedding_model, 384)\n\n\n@pytest.fixture\ndef litellm_embedding_model() -> EmbeddingModel:\n    return EmbeddingModel(\n        server_host=\"localhost\",\n        server_port=9000,\n        model_name=\"text-embedding-3-small\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n        api_key=os.environ[\"LITELLM_API_KEY\"],\n        provider_type=EmbeddingProvider.LITELLM,\n        api_url=os.environ[\"LITELLM_API_URL\"],\n    )\n\n\n@pytest.mark.skip(reason=\"re-enable when we can get the correct litellm key and url\")\ndef test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:\n    _run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)\n    _run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)\n\n\n@pytest.fixture\ndef local_nomic_embedding_model() -> EmbeddingModel:\n    return EmbeddingModel(\n        server_host=\"localhost\",\n        server_port=9000,\n        model_name=\"nomic-ai/nomic-embed-text-v1\",\n        normalize=True,\n        query_prefix=\"search_query: \",\n        passage_prefix=\"search_document: \",\n        api_key=None,\n        provider_type=None,\n        api_url=None,\n    )\n\n\ndef test_local_nomic_embedding(local_nomic_embedding_model: EmbeddingModel) -> None:\n    _run_embeddings(VALID_SAMPLE, local_nomic_embedding_model, 768)\n    _run_embeddings(TOO_LONG_SAMPLE, local_nomic_embedding_model, 768)\n\n\n@pytest.fixture\ndef azure_embedding_model() -> EmbeddingModel:\n    return EmbeddingModel(\n        server_host=\"localhost\",\n        server_port=9000,\n        model_name=\"text-embedding-3-small\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n        api_key=os.environ[\"AZURE_API_KEY\"],\n        provider_type=EmbeddingProvider.AZURE,\n        api_url=os.environ[\"AZURE_API_URL\"],\n    )\n\n\n# Azure has strict rate limits on their embedding API, so we retry with exponential\n# backoff to handle transient RateLimitError responses\n@retry(\n    retry=retry_if_exception_type(RuntimeError),\n    stop=stop_after_attempt(5),\n    wait=wait_exponential(multiplier=1, min=1, max=10),\n    reraise=True,\n)\ndef test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None:\n    _run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536)\n    _run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536)\n\n\n# NOTE (chris): this test doesn't work, and I do not know why\n# def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):\n#     \"\"\"NOTE: this test relies on a very low rate limit for the Azure API +\n#     this test only being run once in a 1 minute window\"\"\"\n#     # VALID_LONG_SAMPLE is 999 tokens, so the second call should run into rate\n#     # limits assuming the limit is 1000 tokens per minute\n#     result = azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)\n#     assert len(result) == 1\n#     assert len(result[0]) == 1536\n\n#     # this should fail\n#     with pytest.raises(ModelServerRateLimitError):\n#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)\n#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)\n#         azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.QUERY)\n\n#     # this should succeed, since passage requests retry up to 10 times\n#     start = time.time()\n#     result = azure_embedding_model.encode(VALID_LONG_SAMPLE, EmbedTextType.PASSAGE)\n#     assert len(result) == 1\n#     assert len(result[0]) == 1536\n#     assert time.time() - start > 30  # make sure we waited, even though we hit rate limits\n"
  },
  {
    "path": "backend/tests/daily/llm/test_bedrock.py",
    "content": "import os\nfrom typing import Any\n\nimport pytest\nfrom fastapi.testclient import TestClient\n\nfrom onyx.llm.constants import LlmProviderNames\n\n\n_DEFAULT_BEDROCK_MODEL = \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n\n\n@pytest.mark.xfail(\n    reason=\"Credentials not yet available due to compliance work needed\",\n)\ndef test_bedrock_llm_configuration(client: TestClient) -> None:\n    # Prepare the test request payload\n    test_request: dict[str, Any] = {\n        \"provider\": LlmProviderNames.BEDROCK,\n        \"model\": _DEFAULT_BEDROCK_MODEL,\n        \"api_key\": None,\n        \"api_base\": None,\n        \"api_version\": None,\n        \"custom_config\": {\n            \"AWS_REGION_NAME\": os.environ.get(\"AWS_REGION_NAME\", \"us-east-1\"),\n            \"AWS_ACCESS_KEY_ID\": os.environ.get(\"AWS_ACCESS_KEY_ID\"),\n            \"AWS_SECRET_ACCESS_KEY\": os.environ.get(\"AWS_SECRET_ACCESS_KEY\"),\n        },\n        \"model_configurations\": [{\"name\": _DEFAULT_BEDROCK_MODEL, \"is_visible\": True}],\n        \"api_key_changed\": True,\n        \"custom_config_changed\": True,\n    }\n\n    # Send the test request\n    response = client.post(\"/admin/llm/test\", json=test_request)\n\n    # Assert the response\n    assert (\n        response.status_code == 200\n    ), f\"Expected status code 200, but got {response.status_code}. Response: {response.text}\"\n\n\ndef test_bedrock_llm_configuration_invalid_key(client: TestClient) -> None:\n    # Prepare the test request payload with invalid credentials\n    test_request: dict[str, Any] = {\n        \"provider\": LlmProviderNames.BEDROCK,\n        \"model\": _DEFAULT_BEDROCK_MODEL,\n        \"api_key\": None,\n        \"api_base\": None,\n        \"api_version\": None,\n        \"custom_config\": {\n            \"AWS_REGION_NAME\": \"us-east-1\",\n            \"AWS_ACCESS_KEY_ID\": \"invalid_access_key_id\",\n            \"AWS_SECRET_ACCESS_KEY\": \"invalid_secret_access_key\",\n        },\n        \"model_configurations\": [{\"name\": _DEFAULT_BEDROCK_MODEL, \"is_visible\": True}],\n        \"api_key_changed\": True,\n        \"custom_config_changed\": True,\n    }\n\n    # Send the test request\n    response = client.post(\"/admin/llm/test\", json=test_request)\n\n    # Assert the response\n    assert (\n        response.status_code == 400\n    ), f\"Expected status code 400, but got {response.status_code}. Response: {response.text}\"\n    assert (\n        \"Invalid credentials\" in response.text\n        or \"Invalid Authentication\" in response.text\n    ), f\"Expected error message about invalid credentials, but got: {response.text}\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/conftest.py",
    "content": "import os\nfrom collections.abc import Iterator\nfrom collections.abc import Mapping\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\n\n\n# Counter for generating unique file IDs in mock file store\n_mock_file_id_counter = 0\n\n\ndef ensure_default_llm_provider(db_session: Session) -> None:\n    \"\"\"Ensure a default LLM provider exists for tests that exercise chat flows.\"\"\"\n\n    try:\n        llm_provider_request = LLMProviderUpsertRequest(\n            name=\"test-provider\",\n            provider=LlmProviderNames.OPENAI,\n            api_key=os.environ.get(\"OPENAI_API_KEY\", \"test\"),\n            is_public=True,\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o-mini\",\n                    is_visible=True,\n                )\n            ],\n            groups=[],\n        )\n        provider = upsert_llm_provider(\n            llm_provider_upsert_request=llm_provider_request,\n            db_session=db_session,\n        )\n        update_default_provider(provider.id, \"gpt-4o-mini\", db_session)\n    except Exception as exc:  # pragma: no cover - only hits on duplicate setup issues\n        # Rollback to clear the pending transaction state\n        db_session.rollback()\n        print(f\"Note: Could not create LLM provider: {exc}\")\n\n\n@pytest.fixture\ndef mock_nlp_embeddings_post() -> Iterator[None]:\n    \"\"\"Patch model-server embedding HTTP calls used by NLP components.\"\"\"\n\n    def _mock_post(\n        url: str,\n        json: Mapping[str, Any] | None = None,\n        headers: Mapping[str, str] | None = None,  # noqa: ARG001\n        **kwargs: Any,  # noqa: ARG001\n    ) -> MagicMock:\n        resp = MagicMock()\n        if \"encoder/bi-encoder-embed\" in url:\n            num_texts = len(json.get(\"texts\", [])) if json else 1\n            resp.status_code = 200\n            resp.json.return_value = {\"embeddings\": [[0.0] * 768] * num_texts}\n            resp.raise_for_status = MagicMock()\n            return resp\n        resp.status_code = 200\n        resp.json.return_value = {}\n        resp.raise_for_status = MagicMock()\n        return resp\n\n    with patch(\n        \"onyx.natural_language_processing.search_nlp_models.requests.post\",\n        side_effect=_mock_post,\n    ):\n        yield\n\n\n@pytest.fixture\ndef mock_gpu_status() -> Iterator[None]:\n    \"\"\"Avoid hitting model server for GPU status checks.\"\"\"\n    with patch(\n        \"onyx.utils.gpu_utils._get_gpu_status_from_model_server\", return_value=False\n    ):\n        yield\n\n\n@pytest.fixture\ndef mock_vespa_query() -> Iterator[None]:\n    \"\"\"Stub Vespa query to a safe empty response to avoid CI flakiness.\"\"\"\n    with patch(\"onyx.document_index.vespa.index.query_vespa\", return_value=[]):\n        yield\n\n\n@pytest.fixture\ndef mock_file_store() -> Iterator[None]:\n    \"\"\"Mock the file store to avoid S3/storage dependencies in tests.\"\"\"\n    global _mock_file_id_counter\n\n    def _mock_save_file(*args: Any, **kwargs: Any) -> str:  # noqa: ARG001\n        global _mock_file_id_counter\n        _mock_file_id_counter += 1\n        # Return a predictable file ID for tests\n        return \"123\"\n\n    mock_store = MagicMock()\n    mock_store.save_file.side_effect = _mock_save_file\n    mock_store.initialize.return_value = None\n\n    with patch(\n        \"onyx.file_store.utils.get_default_file_store\",\n        return_value=mock_store,\n    ):\n        yield\n\n\n@pytest.fixture\ndef mock_external_deps(\n    mock_nlp_embeddings_post: None,  # noqa: ARG001\n    mock_gpu_status: None,  # noqa: ARG001\n    mock_vespa_query: None,  # noqa: ARG001\n    mock_file_store: None,  # noqa: ARG001\n) -> Iterator[None]:\n    \"\"\"Convenience fixture to enable all common external dependency mocks.\"\"\"\n    yield\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/stream_test_assertions.py",
    "content": "from __future__ import annotations\n\nfrom typing import cast\n\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.models import CreateChatSessionID\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationFinal\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlDocuments\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta\n\n\ndef assert_answer_stream_part_correct(\n    received: AnswerStreamPart, expected: AnswerStreamPart\n) -> None:\n    assert isinstance(received, type(expected))\n\n    if isinstance(received, Packet):\n        r_packet = cast(Packet, received)\n        e_packet = cast(Packet, expected)\n\n        assert r_packet.placement == e_packet.placement\n\n        if isinstance(r_packet.obj, SearchToolDocumentsDelta):\n            assert isinstance(e_packet.obj, SearchToolDocumentsDelta)\n            assert is_search_tool_document_delta_equal(r_packet.obj, e_packet.obj)\n            return\n        elif isinstance(r_packet.obj, OpenUrlDocuments):\n            assert isinstance(e_packet.obj, OpenUrlDocuments)\n            assert is_open_url_documents_equal(r_packet.obj, e_packet.obj)\n            return\n        elif isinstance(r_packet.obj, AgentResponseStart):\n            assert isinstance(e_packet.obj, AgentResponseStart)\n            assert is_agent_response_start_equal(r_packet.obj, e_packet.obj)\n            return\n        elif isinstance(r_packet.obj, ImageGenerationFinal):\n            assert isinstance(e_packet.obj, ImageGenerationFinal)\n            assert is_image_generation_final_equal(r_packet.obj, e_packet.obj)\n            return\n\n        assert r_packet.obj == e_packet.obj\n    elif isinstance(received, MessageResponseIDInfo):\n        # We're not going to make assumptions about what the user id / assistant id should be\n        # So just return\n        return\n    elif isinstance(received, CreateChatSessionID):\n        # Don't worry about same session ids\n        return\n    else:\n        raise NotImplementedError(\"Not implemented\")\n\n\ndef _are_search_docs_equal(\n    received: list[SearchDoc],\n    expected: list[SearchDoc],\n) -> bool:\n    \"\"\"\n    What we care about:\n     - All documents are present (order does not)\n     - Expected document_id, link, blurb, source_type and hidden\n    \"\"\"\n    if len(received) != len(expected):\n        return False\n\n    received.sort(key=lambda x: x.document_id)\n    expected.sort(key=lambda x: x.document_id)\n\n    for received_document, expected_document in zip(received, expected):\n        if received_document.document_id != expected_document.document_id:\n            return False\n        if received_document.link != expected_document.link:\n            return False\n        if received_document.blurb != expected_document.blurb:\n            return False\n        if received_document.source_type != expected_document.source_type:\n            return False\n        if received_document.hidden != expected_document.hidden:\n            return False\n    return True\n\n\ndef is_search_tool_document_delta_equal(\n    received: SearchToolDocumentsDelta,\n    expected: SearchToolDocumentsDelta,\n) -> bool:\n    \"\"\"\n    What we care about:\n     - All documents are present (order does not)\n     - Expected document_id, link, blurb, source_type and hidden\n    \"\"\"\n    received_documents = received.documents\n    expected_documents = expected.documents\n\n    return _are_search_docs_equal(received_documents, expected_documents)\n\n\ndef is_open_url_documents_equal(\n    received: OpenUrlDocuments,\n    expected: OpenUrlDocuments,\n) -> bool:\n    \"\"\"\n    What we care about:\n     - All documents are present (order does not)\n     - Expected document_id, link, blurb, source_type and hidden\n    \"\"\"\n    received_documents = received.documents\n    expected_documents = expected.documents\n\n    return _are_search_docs_equal(received_documents, expected_documents)\n\n\ndef is_agent_response_start_equal(\n    received: AgentResponseStart,\n    expected: AgentResponseStart,\n) -> bool:\n    \"\"\"\n    What we care about:\n     - All documents are present (order does not)\n     - Expected document_id, link, blurb, source_type and hidden\n    \"\"\"\n    received_documents = received.final_documents\n    expected_documents = expected.final_documents\n\n    if received_documents is None and expected_documents is None:\n        return True\n    if not received_documents or not expected_documents:\n        return False\n\n    return _are_search_docs_equal(received_documents, expected_documents)\n\n\ndef is_image_generation_final_equal(\n    received: ImageGenerationFinal,\n    expected: ImageGenerationFinal,\n) -> bool:\n    \"\"\"\n    What we care about:\n     - Number of images are the same\n     - On each image, url and file_id are aligned such that url=/api/chat/file/{file_id}\n     - Revised prompt is expected\n     - Shape is expected\n    \"\"\"\n    if len(received.images) != len(expected.images):\n        return False\n\n    for received_image, expected_image in zip(received.images, expected.images):\n        if received_image.url != f\"/api/chat/file/{received_image.file_id}\":\n            return False\n        if received_image.revised_prompt != expected_image.revised_prompt:\n            return False\n        if received_image.shape != expected_image.shape:\n            return False\n    return True\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/stream_test_builder.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Iterator\n\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDone\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\nfrom tests.external_dependency_unit.answer.stream_test_assertions import (\n    assert_answer_stream_part_correct,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    create_packet_with_agent_response_delta,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    create_packet_with_reasoning_delta,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import create_placement\nfrom tests.external_dependency_unit.mock_llm import LLMResponse\nfrom tests.external_dependency_unit.mock_llm import MockLLMController\n\n\nclass StreamTestBuilder:\n    def __init__(self, llm_controller: MockLLMController) -> None:\n        self._llm_controller = llm_controller\n\n        # List of (expected_packet, forward_count) tuples\n        self._expected_packets_queue: list[tuple[Packet, int]] = []\n\n    def add_response(self, response: LLMResponse) -> StreamTestBuilder:\n        self._llm_controller.add_response(response)\n\n        return self\n\n    def add_responses_together(self, *responses: LLMResponse) -> StreamTestBuilder:\n        \"\"\"Add multiple responses that should be emitted together in the same tick.\"\"\"\n        self._llm_controller.add_responses_together(*responses)\n\n        return self\n\n    def expect(\n        self, expected_pkt: Packet, forward: int | bool = True\n    ) -> StreamTestBuilder:\n        \"\"\"\n        Add an expected packet to the queue.\n\n        Args:\n            expected_pkt: The packet to expect\n            forward: Number of tokens to forward before expecting this packet.\n                     True = 1 token, False = 0 tokens, int = that many tokens.\n        \"\"\"\n        forward_count = 1 if forward is True else (0 if forward is False else forward)\n        self._expected_packets_queue.append((expected_pkt, forward_count))\n\n        return self\n\n    def expect_packets(\n        self, packets: list[Packet], forward: int | bool = True\n    ) -> StreamTestBuilder:\n        \"\"\"\n        Add multiple expected packets to the queue.\n\n        Args:\n            packets: List of packets to expect\n            forward: Number of tokens to forward before expecting EACH packet.\n                     True = 1 token per packet, False = 0 tokens, int = that many tokens per packet.\n        \"\"\"\n        forward_count = 1 if forward is True else (0 if forward is False else forward)\n        for pkt in packets:\n            self._expected_packets_queue.append((pkt, forward_count))\n\n        return self\n\n    def expect_reasoning(\n        self,\n        reasoning_tokens: list[str],\n        turn_index: int,\n    ) -> StreamTestBuilder:\n        return (\n            self.expect(\n                Packet(\n                    placement=create_placement(turn_index),\n                    obj=ReasoningStart(),\n                )\n            )\n            .expect_packets(\n                [\n                    create_packet_with_reasoning_delta(token, turn_index)\n                    for token in reasoning_tokens\n                ]\n            )\n            .expect(\n                Packet(\n                    placement=create_placement(turn_index),\n                    obj=ReasoningDone(),\n                )\n            )\n        )\n\n    def expect_agent_response(\n        self,\n        answer_tokens: list[str],\n        turn_index: int,\n        final_documents: list[SearchDoc] | None = None,\n    ) -> StreamTestBuilder:\n        return (\n            self.expect(\n                Packet(\n                    placement=create_placement(turn_index),\n                    obj=AgentResponseStart(\n                        final_documents=final_documents,\n                    ),\n                )\n            )\n            .expect_packets(\n                [\n                    create_packet_with_agent_response_delta(token, turn_index)\n                    for token in answer_tokens\n                ]\n            )\n            .expect(\n                Packet(\n                    placement=create_placement(turn_index),\n                    obj=OverallStop(),\n                )\n            )\n        )\n\n    def run_and_validate(self, stream: Iterator[AnswerStreamPart]) -> None:\n        while self._expected_packets_queue:\n            expected_pkt, forward_count = self._expected_packets_queue.pop(0)\n            if forward_count > 0:\n                self._llm_controller.forward(forward_count)\n            received_pkt = next(stream)\n\n            assert_answer_stream_part_correct(received_pkt, expected_pkt)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/stream_test_utils.py",
    "content": "from __future__ import annotations\n\nfrom collections.abc import Iterator\nfrom uuid import UUID\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.chat_utils import create_chat_session_from_request\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.models import ChatSession\nfrom onyx.db.models import User\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDelta\nfrom tests.external_dependency_unit.mock_content_provider import MockWebContent\nfrom tests.external_dependency_unit.mock_search_provider import MockWebSearchResult\n\n\ndef create_placement(\n    turn_index: int,\n    tab_index: int = 0,\n    sub_turn_index: int | None = None,\n    model_index: int | None = 0,\n) -> Placement:\n    return Placement(\n        turn_index=turn_index,\n        tab_index=tab_index,\n        sub_turn_index=sub_turn_index,\n        model_index=model_index,\n    )\n\n\ndef submit_query(\n    query: str,\n    chat_session_id: UUID | None,\n    db_session: Session,\n    user: User,\n    llm_override: LLMOverride | None = None,\n) -> Iterator[AnswerStreamPart]:\n    request = SendMessageRequest(\n        message=query,\n        chat_session_id=chat_session_id,\n        stream=True,\n        chat_session_info=(\n            ChatSessionCreationRequest() if chat_session_id is None else None\n        ),\n        llm_override=llm_override,\n    )\n\n    return handle_stream_message_objects(\n        new_msg_req=request,\n        user=user,\n        db_session=db_session,\n    )\n\n\ndef create_chat_session(\n    db_session: Session,\n    user: User,\n) -> ChatSession:\n    return create_chat_session_from_request(\n        chat_session_request=ChatSessionCreationRequest(),\n        user_id=user.id,\n        db_session=db_session,\n    )\n\n\ndef create_packet_with_agent_response_delta(token: str, turn_index: int) -> Packet:\n    return Packet(\n        placement=create_placement(turn_index),\n        obj=AgentResponseDelta(\n            content=token,\n        ),\n    )\n\n\ndef create_packet_with_reasoning_delta(token: str, turn_index: int) -> Packet:\n    return Packet(\n        placement=create_placement(turn_index),\n        obj=ReasoningDelta(\n            reasoning=token,\n        ),\n    )\n\n\ndef create_web_search_doc(\n    semantic_identifier: str,\n    link: str,\n    blurb: str,\n) -> SearchDoc:\n    return SearchDoc(\n        document_id=f\"WEB_SEARCH_DOC_{link}\",\n        chunk_ind=0,\n        semantic_identifier=semantic_identifier,\n        link=link,\n        blurb=blurb,\n        source_type=DocumentSource.WEB,\n        boost=1,\n        hidden=False,\n        metadata={},\n        match_highlights=[],\n    )\n\n\ndef mock_web_search_result_to_search_doc(result: MockWebSearchResult) -> SearchDoc:\n    return create_web_search_doc(\n        semantic_identifier=result.title,\n        link=result.link,\n        blurb=result.snippet,\n    )\n\n\ndef mock_web_content_to_search_doc(content: MockWebContent) -> SearchDoc:\n    return create_web_search_doc(\n        semantic_identifier=content.title,\n        link=content.url,\n        blurb=content.title,\n    )\n\n\ndef tokenise(text: str) -> list[str]:\n    return [(token + \" \") for token in text.split(\" \")]\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/test_answer_without_openai.py",
    "content": "from __future__ import annotations\n\nimport os\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.models import StreamingError\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.llm import fetch_existing_llm_providers\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef test_answer_with_only_anthropic_provider(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Ensure chat still streams answers when only an Anthropic provider is configured.\"\"\"\n\n    anthropic_api_key = os.environ.get(\"ANTHROPIC_API_KEY\")\n    assert anthropic_api_key, \"ANTHROPIC_API_KEY environment variable must be set\"\n\n    # Drop any existing providers so that only Anthropic is available.\n    for provider in fetch_existing_llm_providers(db_session, [LLMModelFlowType.CHAT]):\n        remove_llm_provider(db_session, provider.id)\n\n    anthropic_model = \"claude-haiku-4-5-20251001\"\n    provider_name = f\"anthropic-test-{uuid4().hex}\"\n\n    anthropic_provider = upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=provider_name,\n            provider=LlmProviderNames.ANTHROPIC,\n            api_key=anthropic_api_key,\n            is_public=True,\n            groups=[],\n            model_configurations=[\n                ModelConfigurationUpsertRequest(name=anthropic_model, is_visible=True)\n            ],\n            api_key_changed=True,\n        ),\n        db_session=db_session,\n    )\n\n    try:\n        update_default_provider(anthropic_provider.id, anthropic_model, db_session)\n\n        test_user = create_test_user(db_session, email_prefix=\"anthropic_only\")\n        chat_session = create_chat_session(\n            db_session=db_session,\n            description=\"Anthropic only chat\",\n            user_id=test_user.id,\n            persona_id=0,\n        )\n\n        chat_request = SendMessageRequest(\n            message=\"hello\",\n            chat_session_id=chat_session.id,\n        )\n\n        response_stream: list[AnswerStreamPart] = []\n        for packet in handle_stream_message_objects(\n            new_msg_req=chat_request,\n            user=test_user,\n            db_session=db_session,\n        ):\n            response_stream.append(packet)\n\n        assert response_stream, \"Should receive streamed packets\"\n        assert not any(\n            isinstance(packet, StreamingError) for packet in response_stream\n        ), \"No streaming errors expected with Anthropic provider\"\n\n        has_message_id = any(\n            isinstance(packet, MessageResponseIDInfo) for packet in response_stream\n        )\n        assert has_message_id, \"Should include reserved assistant message ID\"\n\n        has_message_start = any(\n            isinstance(packet, Packet) and isinstance(packet.obj, AgentResponseStart)\n            for packet in response_stream\n        )\n        assert has_message_start, \"Stream should have a MessageStart packet\"\n\n        has_message_delta = any(\n            isinstance(packet, Packet) and isinstance(packet.obj, AgentResponseDelta)\n            for packet in response_stream\n        )\n        assert has_message_delta, \"Stream should have a MessageDelta packet\"\n\n    finally:\n        remove_llm_provider(db_session, anthropic_provider.id)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/test_current_datetime_replacement.py",
    "content": "import re\nfrom datetime import datetime\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.models import StreamingError\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.models import User\nfrom onyx.db.persona import get_persona_by_id\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef test_stream_chat_current_date_response(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Smoke test that asking for current date yields a streamed response.\n\n    This exercises the full chat path using the default persona, ensuring\n    the system prompt makes it to the LLM and a response is returned.\n    \"\"\"\n    # Ensure LLM provider exists\n    ensure_default_llm_provider(db_session)\n\n    # Create user, persona, session\n    test_user: User = create_test_user(db_session, email_prefix=\"test_current_date\")\n    default_persona = get_persona_by_id(\n        persona_id=0, user=test_user, db_session=db_session, is_for_edit=False\n    )\n    chat_session = create_chat_session(\n        db_session=db_session,\n        description=\"Test current date question\",\n        user_id=test_user.id if test_user else None,\n        persona_id=default_persona.id,\n    )\n\n    chat_request = SendMessageRequest(\n        message=\"Please respond only with the current date in the format 'Weekday Month DD, YYYY'.\",\n        chat_session_id=chat_session.id,\n    )\n\n    gen = handle_stream_message_objects(\n        new_msg_req=chat_request,\n        user=test_user,\n        db_session=db_session,\n    )\n\n    raw: list[AnswerStreamPart] = []\n    content = \"\"\n    had_error = False\n\n    for pkt in gen:\n        raw.append(pkt)\n        if hasattr(pkt, \"obj\") and isinstance(pkt.obj, AgentResponseDelta):\n            if pkt.obj.content:\n                content += pkt.obj.content\n        if hasattr(pkt, \"obj\") and isinstance(pkt.obj, StreamingError):\n            had_error = True\n            break\n\n    assert not had_error, \"Should not error when answering current date\"\n    assert any(\n        isinstance(p, MessageResponseIDInfo) for p in raw\n    ), \"Should yield a message ID\"\n    assert len(content) > 0, \"Should stream some assistant content\"\n\n    # Validate the response contains a properly formatted current date string\n    match = re.search(r\"[A-Za-z]+ [A-Za-z]+ \\d{1,2}, \\d{4}\", content)\n    assert match, f\"Expected a date in content, got: {content[:200]}...\"\n\n    timestamp_str = match.group(0)\n    timestamp_dt = datetime.strptime(timestamp_str, \"%A %B %d, %Y\")\n    now = datetime.now()\n\n    assert timestamp_dt.strftime(\"%A\") == now.strftime(\n        \"%A\"\n    ), f\"Expected weekday {now.strftime('%A')}, got {timestamp_dt.strftime('%A')}\"\n    assert timestamp_dt.strftime(\"%B\") == now.strftime(\n        \"%B\"\n    ), f\"Expected month {now.strftime('%B')}, got {timestamp_dt.strftime('%B')}\"\n    assert timestamp_dt.day == now.day and timestamp_dt.year == now.year, (\n        f\"Expected day {now.strftime('%d')} and year {now.strftime('%Y')}, \"\n        f\"got {timestamp_dt.strftime('%d')} {timestamp_dt.strftime('%Y')}\"\n    )\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/test_stream_chat_message.py",
    "content": "from __future__ import annotations\n\nimport json\nfrom uuid import UUID\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.models import CreateChatSessionID\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseStart\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationFinal\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat\nfrom onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlDocuments\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlStart\nfrom onyx.server.query_and_chat.streaming_models import OpenUrlUrls\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningDone\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\nfrom onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta\nfrom onyx.server.query_and_chat.streaming_models import SearchToolStart\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.server.query_and_chat.streaming_models import TopLevelBranching\nfrom tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider\nfrom tests.external_dependency_unit.answer.stream_test_assertions import (\n    assert_answer_stream_part_correct,\n)\nfrom tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder\nfrom tests.external_dependency_unit.answer.stream_test_utils import create_chat_session\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    create_packet_with_agent_response_delta,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    create_packet_with_reasoning_delta,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import create_placement\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    mock_web_content_to_search_doc,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import (\n    mock_web_search_result_to_search_doc,\n)\nfrom tests.external_dependency_unit.answer.stream_test_utils import submit_query\nfrom tests.external_dependency_unit.answer.stream_test_utils import tokenise\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.mock_content_provider import MockWebContent\nfrom tests.external_dependency_unit.mock_content_provider import (\n    use_mock_content_provider,\n)\nfrom tests.external_dependency_unit.mock_image_provider import (\n    use_mock_image_generation_provider,\n)\nfrom tests.external_dependency_unit.mock_llm import LLMAnswerResponse\nfrom tests.external_dependency_unit.mock_llm import LLMReasoningResponse\nfrom tests.external_dependency_unit.mock_llm import LLMToolCallResponse\nfrom tests.external_dependency_unit.mock_llm import use_mock_llm\nfrom tests.external_dependency_unit.mock_search_pipeline import MockInternalSearchResult\nfrom tests.external_dependency_unit.mock_search_pipeline import use_mock_search_pipeline\nfrom tests.external_dependency_unit.mock_search_provider import MockWebSearchResult\nfrom tests.external_dependency_unit.mock_search_provider import use_mock_web_provider\n\n\ndef test_stream_chat_with_answer(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that the stream chat with answer endpoint returns a valid answer.\"\"\"\n    ensure_default_llm_provider(db_session)\n    test_user = create_test_user(\n        db_session, email_prefix=\"test_stream_chat_with_answer\"\n    )\n\n    query = \"What is the capital of France?\"\n    answer = \"The capital of France is Paris.\"\n\n    answer_tokens = tokenise(answer)\n\n    with use_mock_llm() as mock_llm:\n        handler = StreamTestBuilder(llm_controller=mock_llm)\n\n        handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens))\n\n        chat_session = create_chat_session(db_session=db_session, user=test_user)\n\n        answer_stream = submit_query(\n            query=query,\n            chat_session_id=chat_session.id,\n            db_session=db_session,\n            user=test_user,\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=MessageResponseIDInfo(\n                user_message_id=1,\n                reserved_assistant_message_id=1,\n            ),\n        )\n\n        handler.expect_agent_response(\n            answer_tokens=answer_tokens,\n            turn_index=0,\n        ).run_and_validate(stream=answer_stream)\n\n        with pytest.raises(StopIteration):\n            next(answer_stream)\n\n\ndef test_stream_chat_with_answer_create_chat(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    ensure_default_llm_provider(db_session)\n    test_user = create_test_user(\n        db_session, email_prefix=\"test_stream_chat_with_answer_create_chat\"\n    )\n\n    query = \"Hi there friends\"\n    answer = \"Hello friend\"\n\n    tokens = [answer]\n\n    with use_mock_llm() as mock_llm:\n        handler = StreamTestBuilder(llm_controller=mock_llm)\n\n        handler.add_response(LLMAnswerResponse(answer_tokens=tokens))\n\n        answer_stream = submit_query(\n            query=query,\n            chat_session_id=None,\n            db_session=db_session,\n            user=test_user,\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=CreateChatSessionID(\n                chat_session_id=UUID(\"123e4567-e89b-12d3-a456-426614174000\")\n            ),\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=MessageResponseIDInfo(\n                user_message_id=1,\n                reserved_assistant_message_id=2,\n            ),\n        )\n\n        handler.expect_agent_response(\n            answer_tokens=tokens,\n            turn_index=0,\n        ).run_and_validate(stream=answer_stream)\n\n        with pytest.raises(StopIteration):\n            next(answer_stream)\n\n\ndef test_stream_chat_with_search_and_openurl_tools(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    ensure_default_llm_provider(db_session)\n    test_user = create_test_user(\n        db_session, email_prefix=\"test_stream_chat_with_search_tool\"\n    )\n\n    QUERY = \"What is the weather in Sydney?\"\n\n    REASONING_RESPONSE_1 = (\n        \"I need to perform a web search to get current weather details. \"\n        \"I can use the search tool to do this.\"\n    )\n\n    WEB_QUERY_1 = \"weather in sydney\"\n    WEB_QUERY_2 = \"current weather in sydney\"\n\n    RESULTS1 = [\n        MockWebSearchResult(\n            title=\"Official Weather\",\n            link=\"www.weather.com.au\",\n            snippet=\"The current weather in Sydney is 20 degrees Celsius.\",\n        ),\n        MockWebSearchResult(\n            title=\"Weather CHannel\",\n            link=\"www.wc.com.au\",\n            snippet=\"Morning is 10 degree Celsius, afternoon is 25 degrees Celsius.\",\n        ),\n    ]\n\n    RESULTS2 = [\n        MockWebSearchResult(\n            title=\"Weather Now!\",\n            link=\"www.weathernow.com.au\",\n            snippet=\"The weather right now is sunny with a temperature of 22 degrees Celsius.\",\n        )\n    ]\n\n    REASONING_RESPONSE_2 = \"I like weathernow and the official weather site\"\n\n    QUERY_URLS_1 = [\"www.weathernow.com.au\", \"www.weather.com.au\"]\n\n    CONTENT1 = [\n        MockWebContent(\n            title=\"Weather Now!\",\n            url=\"www.weathernow.com.au\",\n            content=\"The weather right now is sunny with a temperature of 22 degrees Celsius.\",\n        ),\n        MockWebContent(\n            title=\"Weather Official\",\n            url=\"www.weather.com.au\",\n            content=\"The current weather in Sydney is 20 degrees Celsius.\",\n        ),\n    ]\n\n    REASONING_RESPONSE_3 = (\n        \"I now know everything that I need to know. \" \"I can now answer the question.\"\n    )\n\n    ANSWER_RESPONSE_1 = (\n        \"The weather in Sydney is sunny with a temperature of 22 degrees celsius.\"\n    )\n\n    with (\n        use_mock_llm() as mock_llm,\n        use_mock_web_provider(db_session) as mock_web,\n        use_mock_content_provider() as mock_content,\n    ):\n        handler = StreamTestBuilder(\n            llm_controller=mock_llm,\n        )\n\n        chat_session = create_chat_session(db_session=db_session, user=test_user)\n\n        answer_stream = submit_query(\n            query=QUERY,\n            chat_session_id=chat_session.id,\n            db_session=db_session,\n            user=test_user,\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=MessageResponseIDInfo(\n                user_message_id=1,\n                reserved_assistant_message_id=1,\n            ),\n        )\n\n        # LLM Stream Response 1\n        mock_web.add_results(WEB_QUERY_1, RESULTS1)\n        mock_web.add_results(WEB_QUERY_2, RESULTS2)\n\n        handler.add_response(\n            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_1))\n        ).add_response(\n            LLMToolCallResponse(\n                tool_name=\"web_search\",\n                tool_call_id=\"123\",\n                tool_call_argument_tokens=[\n                    json.dumps({\"queries\": [WEB_QUERY_1, WEB_QUERY_2]})\n                ],\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(0),\n                obj=ReasoningStart(),\n            )\n        ).expect_packets(\n            [\n                create_packet_with_reasoning_delta(token, 0)\n                for token in tokenise(REASONING_RESPONSE_1)\n            ]\n        ).expect(\n            Packet(placement=create_placement(0), obj=ReasoningDone())\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=SearchToolStart(\n                    is_internet_search=True,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=SearchToolQueriesDelta(\n                    queries=[WEB_QUERY_1, WEB_QUERY_2],\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=SearchToolDocumentsDelta(\n                    documents=[\n                        mock_web_search_result_to_search_doc(result)\n                        for result in RESULTS1\n                    ]\n                    + [\n                        mock_web_search_result_to_search_doc(result)\n                        for result in RESULTS2\n                    ]\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=SectionEnd(),\n            )\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 2\n        for content in CONTENT1:\n            mock_content.add_content(content)\n\n        handler.add_response(\n            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_2))\n        ).add_response(\n            LLMToolCallResponse(\n                tool_name=\"open_url\",\n                tool_call_id=\"123\",\n                tool_call_argument_tokens=[json.dumps({\"urls\": QUERY_URLS_1})],\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(2),\n                obj=ReasoningStart(),\n            )\n        ).expect_packets(\n            [\n                create_packet_with_reasoning_delta(token, 2)\n                for token in tokenise(REASONING_RESPONSE_2)\n            ]\n        ).expect(\n            Packet(\n                placement=create_placement(2),\n                obj=ReasoningDone(),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(3),\n                obj=OpenUrlStart(),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(3),\n                obj=OpenUrlUrls(urls=[content.url for content in CONTENT1]),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(3),\n                obj=OpenUrlDocuments(\n                    documents=[\n                        mock_web_content_to_search_doc(content) for content in CONTENT1\n                    ]\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(3),\n                obj=SectionEnd(),\n            )\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 3\n        handler.add_response(\n            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_3))\n        ).add_response(\n            LLMAnswerResponse(answer_tokens=tokenise(ANSWER_RESPONSE_1))\n        ).expect(\n            Packet(\n                placement=create_placement(4),\n                obj=ReasoningStart(),\n            )\n        ).expect_packets(\n            [\n                create_packet_with_reasoning_delta(token, 4)\n                for token in tokenise(REASONING_RESPONSE_3)\n            ]\n        ).expect(\n            Packet(\n                placement=create_placement(4),\n                obj=ReasoningDone(),\n            )\n        ).expect_agent_response(\n            answer_tokens=tokenise(ANSWER_RESPONSE_1),\n            turn_index=5,\n            final_documents=[\n                mock_web_search_result_to_search_doc(result) for result in RESULTS1\n            ]\n            + [mock_web_search_result_to_search_doc(result) for result in RESULTS2]\n            + [mock_web_content_to_search_doc(content) for content in CONTENT1],\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        with pytest.raises(StopIteration):\n            next(answer_stream)\n\n\ndef test_image_generation_tool_no_reasoning(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    ensure_default_llm_provider(db_session)\n    test_user = create_test_user(db_session, email_prefix=\"test_image_generation_tool\")\n\n    QUERY = \"Create me an image of a dog on a rocketship\"\n\n    IMAGE_DATA = (\n        \"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfF\"\n        \"cSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\"\n    )\n    # Heartbeat interval is 5 seconds. A delay of 8 seconds ensures exactly 2 heartbeats:\n    IMAGE_DELAY = 8.0\n\n    ANSWER_RESPONSE = \"Here is a dog on a rocketship\"\n\n    with (\n        use_mock_llm() as mock_llm,\n        use_mock_image_generation_provider() as mock_image_gen,\n    ):\n        handler = StreamTestBuilder(\n            llm_controller=mock_llm,\n        )\n\n        chat_session = create_chat_session(db_session=db_session, user=test_user)\n\n        answer_stream = submit_query(\n            query=QUERY,\n            chat_session_id=chat_session.id,\n            db_session=db_session,\n            user=test_user,\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=MessageResponseIDInfo(\n                user_message_id=1,\n                reserved_assistant_message_id=1,\n            ),\n        )\n\n        # LLM Stream Response 1\n        mock_image_gen.add_image(IMAGE_DATA, IMAGE_DELAY)\n        mock_llm.set_max_timeout(\n            IMAGE_DELAY + 5.0\n        )  # Give enough buffer for image generation\n\n        # The LLMToolCallResponse has 2 tokens (1 for tool name/id + 1 for arguments).\n        # We need to forward all 2 tokens before the tool starts executing and emitting packets.\n        # The tool then emits: start, heartbeats (during image generation), final, and section end.\n        handler.add_response(\n            LLMToolCallResponse(\n                tool_name=\"generate_image\",\n                tool_call_id=\"123\",\n                tool_call_argument_tokens=[json.dumps({\"prompt\": QUERY})],\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(0),\n                obj=ImageGenerationToolStart(),\n            ),\n            forward=2,  # Forward both tool call tokens before expecting first packet\n        ).expect_packets(\n            [\n                Packet(\n                    placement=create_placement(0),\n                    obj=ImageGenerationToolHeartbeat(),\n                )\n            ]\n            * 2,\n            forward=False,\n        ).expect(\n            Packet(\n                placement=create_placement(0),\n                obj=ImageGenerationFinal(\n                    images=[\n                        GeneratedImage(\n                            file_id=\"123\",\n                            url=\"/api/chat/file/123\",\n                            revised_prompt=QUERY,\n                            shape=\"square\",\n                        )\n                    ]\n                ),\n            ),\n            forward=False,\n        ).expect(\n            Packet(\n                placement=create_placement(0),\n                obj=SectionEnd(),\n            ),\n            forward=False,\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 2 - the answer comes after the tool call, so turn_index=1\n        handler.add_response(\n            LLMAnswerResponse(\n                answer_tokens=tokenise(ANSWER_RESPONSE),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=AgentResponseStart(final_documents=None),\n            )\n        ).expect_packets(\n            [\n                create_packet_with_agent_response_delta(token, 1)\n                for token in tokenise(ANSWER_RESPONSE)\n            ]\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=OverallStop(),\n            )\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        with pytest.raises(StopIteration):\n            next(answer_stream)\n\n\ndef test_parallel_internal_and_web_search_tool_calls(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    User asks a question\n    LLM does some thinking\n    LLM runs parallel tool calls for internal & web search\n\n    -> Interal Search Branch performs seach + read ~10 documents\n    -> Web Search: Searches the web for information\n\n    LLM reads web documents\n    LLM does thinking across all results\n    LLM reads one more website\n    LLM does more thinking\n    LLM generates answer\n    \"\"\"\n    ensure_default_llm_provider(db_session)\n    test_user = create_test_user(\n        db_session, email_prefix=\"test_parallel_internal_and_web_search_tool_calls\"\n    )\n\n    AVALIABLE_CONNECTORS = [\n        DocumentSource.GOOGLE_DRIVE,\n        DocumentSource.CONFLUENCE,\n        DocumentSource.LINEAR,\n        DocumentSource.FIREFLIES,\n    ]\n\n    QUERY = \"How will forecasts against 2026 global GDP growth affect our Q2 strategy?\"\n\n    THINKING_RESPONSE_1 = (\n        \"I need to build more context around the user's query to answer it. \"\n        \"I should look at GDP growth projections for 2026. \"\n        \"I should also look at what the Q2 strategy is and what projects are included. \"\n        \"I should perform both web and internal searches in parallel to get information efficiently.\"\n    )\n\n    WEB_QUERIES_1 = [\n        \"2026 global GDP growth projections\",\n        \"GDP growth 2026\",\n        \"GDP forecast 2026\",\n    ]\n\n    WEB_RESULTS_1 = {\n        WEB_QUERIES_1[0]: [\n            MockWebSearchResult(\n                title=\"World Economic Outlook Update, January 2026\",\n                link=\"https://www.imf.org/weo/issues/2026/01/19/world-economic-outlook-update-january-2026\",\n                snippet=\"Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...\",\n            ),\n            MockWebSearchResult(\n                title=\"IMF sees steady global growth in 2026 as AI boom offsets ...\",\n                link=\"https://www.reuters.com/article/us-world-economy-imf-idUSKBN2JU23E\",\n                snippet=\"IMF forecasts 2026 global GDP growth at 3.3% even with stronger 2025 performance\",\n            ),\n            MockWebSearchResult(\n                title=\"The Global Economy Is Forecast to Post...\",\n                link=\"https://www.goldmansachs.com/insights/articles/123\",\n                snippet=\"Global GDP is projected by Goldman Sachs Research to increase 2.8% in 2026\",\n            ),\n        ],\n        WEB_QUERIES_1[1]: [\n            MockWebSearchResult(\n                title=\"US third-quarter economic growth revised  slightly higher\",\n                link=\"https://www.reuters.com/word/us-third-quarter-eco\",\n                snippet=\"Gross domestic product increased at an upwardly revised 4.4% annualized rate, the ...\",\n            ),\n            MockWebSearchResult(\n                title=\"US GDP Growth Is Projected to Outperform Economist ...\",\n                link=\"https://www.goldmansachs.com/insights/articles/321\",\n                snippet=\"US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus\",\n            ),\n            MockWebSearchResult(\n                title=\"Gross Domestic Product\",\n                link=\"https://www.bea.gov/data/gdp/gross-domestic-product\",\n                snippet=\"Real gross domestic product (GDP) increased at an annual rate of 4.4 percent in the third quarter\",\n            ),\n        ],\n        WEB_QUERIES_1[2]: [\n            MockWebSearchResult(\n                title=\"World Economic Outlook Update, January 2026\",\n                link=\"https://www.imf.org/web/issues/2026/01/19/world-economic-outlook-update-january-2026\",\n                snippet=\"Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...\",\n            ),\n            MockWebSearchResult(\n                title=\"US GDP Growth Is Projected to Outperform Economist ...\",\n                link=\"https://www.goldmansachs.com/insights/articles/321\",\n                snippet=\"US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus\",\n            ),\n            MockWebSearchResult(\n                title=\"Our economic outlook for the United States - Vanguard\",\n                link=\"https://corporate.vanguard.com/content/corp/vemo\",\n                snippet=\"We expect strong capital investment to remain a principal strength in the year ahead\",\n            ),\n        ],\n    }\n\n    INTERNAL_QUERIES_1 = [\"Q2 strategy 2026\", \"GDP growth 2026 projects\", \"Q2 projects\"]\n\n    INTERNAL_RESULTS_1 = {\n        INTERNAL_QUERIES_1[0]: [\n            MockInternalSearchResult(\n                document_id=\"123456789\",\n                source_type=DocumentSource.GOOGLE_DRIVE,\n                semantic_identifier=\"Q2 strategy 2026\",\n                chunk_ind=11,\n            ),\n            MockInternalSearchResult(\n                document_id=\"732190732173\",\n                source_type=DocumentSource.FIREFLIES,\n                semantic_identifier=\"What we think is going to happen in Q2\",\n                chunk_ind=5,\n            ),\n            MockInternalSearchResult(\n                document_id=\"12389123219\",\n                source_type=DocumentSource.CONFLUENCE,\n                semantic_identifier=\"Strategy roadmap for Q2 2026\",\n                chunk_ind=7,\n            ),\n        ],\n        INTERNAL_QUERIES_1[1]: [\n            MockInternalSearchResult(\n                document_id=\"123123\",\n                source_type=DocumentSource.LINEAR,\n                semantic_identifier=\"GDP growth 2026 projects\",\n                chunk_ind=13,\n            )\n        ],\n        INTERNAL_QUERIES_1[2]: [\n            MockInternalSearchResult(\n                document_id=\"98823643243\",\n                source_type=DocumentSource.GOOGLE_DRIVE,\n                semantic_identifier=\"Full list of Q2 projects\",\n                chunk_ind=1,\n            )\n        ],\n    }\n\n    OPEN_URL_URLS_1 = [\n        WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,\n        WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,\n        WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,\n    ]\n\n    OPEN_URL_DOCUMENTS_1 = [\n        MockWebContent(\n            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].title,\n            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,\n            content=\"Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...\",\n        ),\n        MockWebContent(\n            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].title,\n            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,\n            content=\"Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...\",\n        ),\n        MockWebContent(\n            title=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].title,\n            url=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,\n            content=\"Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...\",\n        ),\n    ]\n\n    THINKING_RESPONSE_2 = (\n        \"I now have a clear picture of the 2026 global GDP projections and the Q2 strategy. \"\n        \"I would like to now about the outperform expections though...\"\n    )\n\n    OPEN_URL_URLS_2 = [WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link]\n    OPEN_URL_DOCUMENTS_2 = [\n        MockWebContent(\n            title=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].title,\n            url=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link,\n            content=\"US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus\",\n        )\n    ]\n\n    REASONING_RESPONSE_3 = (\n        \"I now have all the information I need to answer the user's question.\"\n    )\n\n    ANSWER_RESPONSE = (\n        \"We will have to change around some of our projects to accomodate the outperform expections. \"\n        \"We should focus on aggresive expansion projects and prioritize them over cost-cutting initiatives.\"\n    )\n\n    expected_web_docs = []\n    seen_web_results = set()\n    for web_results in WEB_RESULTS_1.values():\n        for web_result in web_results:\n            key = (web_result.title, web_result.link)\n            if key in seen_web_results:\n                continue\n            seen_web_results.add(key)\n            expected_web_docs.append(mock_web_search_result_to_search_doc(web_result))\n\n    expected_internal_docs = []\n    seen_internal_results = set()\n    for internal_results in INTERNAL_RESULTS_1.values():\n        for internal_result in internal_results:\n            key = (internal_result.semantic_identifier, internal_result.document_id)\n            if key in seen_internal_results:\n                continue\n            seen_internal_results.add(key)\n            expected_internal_docs.append(internal_result.to_search_doc())\n\n    with (\n        use_mock_llm() as mock_llm,\n        use_mock_search_pipeline(\n            connectors=AVALIABLE_CONNECTORS\n        ) as mock_search_pipeline,\n        use_mock_web_provider(db_session) as mock_web,\n        use_mock_content_provider() as mock_content,\n    ):\n        for query, web_results in WEB_RESULTS_1.items():\n            mock_web.add_results(query, web_results)\n\n        for query, internal_results in INTERNAL_RESULTS_1.items():\n            mock_search_pipeline.add_search_results(query, internal_results)\n\n        handler = StreamTestBuilder(\n            llm_controller=mock_llm,\n        )\n\n        chat_session = create_chat_session(db_session=db_session, user=test_user)\n\n        answer_stream = submit_query(\n            query=QUERY,\n            chat_session_id=chat_session.id,\n            db_session=db_session,\n            user=test_user,\n        )\n\n        assert_answer_stream_part_correct(\n            received=next(answer_stream),\n            expected=MessageResponseIDInfo(\n                user_message_id=1,\n                reserved_assistant_message_id=1,\n            ),\n        )\n\n        # LLM Stream Response 1\n        handler.add_response(\n            LLMReasoningResponse(\n                reasoning_tokens=tokenise(THINKING_RESPONSE_1),\n            )\n        ).add_responses_together(\n            LLMToolCallResponse(\n                tool_name=\"internal_search\",\n                tool_call_id=\"123\",\n                tool_call_argument_tokens=[json.dumps({\"queries\": INTERNAL_QUERIES_1})],\n            ),\n            LLMToolCallResponse(\n                tool_name=\"web_search\",\n                tool_call_id=\"321\",\n                tool_call_argument_tokens=[json.dumps({\"queries\": WEB_QUERIES_1})],\n            ),\n        ).expect_reasoning(\n            reasoning_tokens=tokenise(THINKING_RESPONSE_1),\n            turn_index=0,\n        ).expect(\n            Packet(\n                placement=create_placement(1),\n                obj=TopLevelBranching(\n                    num_parallel_branches=2,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 0),\n                obj=SearchToolStart(\n                    is_internet_search=False,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 1),\n                obj=SearchToolStart(\n                    is_internet_search=True,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 0),\n                obj=SearchToolQueriesDelta(\n                    queries=INTERNAL_QUERIES_1 + [QUERY],\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 0),\n                obj=SearchToolDocumentsDelta(\n                    documents=expected_internal_docs,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 0),\n                obj=SectionEnd(),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 1),\n                obj=SearchToolQueriesDelta(\n                    queries=WEB_QUERIES_1,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 1),\n                obj=SearchToolDocumentsDelta(\n                    documents=expected_web_docs,\n                ),\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(1, 1),\n                obj=SectionEnd(),\n            )\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 2\n        for content in OPEN_URL_DOCUMENTS_1:\n            mock_content.add_content(content)\n\n        handler.add_response(\n            LLMToolCallResponse(\n                tool_name=\"open_url\",\n                tool_call_id=\"456\",\n                tool_call_argument_tokens=[json.dumps({\"urls\": OPEN_URL_URLS_1})],\n            )\n        ).expect(\n            Packet(\n                placement=create_placement(2, 0),\n                obj=OpenUrlStart(),\n            ),\n            forward=2,  # Need both header + argument tokens for the tool call\n        ).expect(\n            Packet(\n                placement=create_placement(2, 0),\n                obj=OpenUrlUrls(urls=OPEN_URL_URLS_1),\n            ),\n            forward=False,\n        ).expect(\n            Packet(\n                placement=create_placement(2, 0),\n                obj=OpenUrlDocuments(\n                    documents=[\n                        mock_web_content_to_search_doc(content)\n                        for content in OPEN_URL_DOCUMENTS_1\n                    ]\n                ),\n            ),\n            forward=False,\n        ).expect(\n            Packet(\n                placement=create_placement(2, 0),\n                obj=SectionEnd(),\n            ),\n            forward=False,\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 3\n        for content in OPEN_URL_DOCUMENTS_2:\n            mock_content.add_content(content)\n\n        handler.add_response(\n            LLMReasoningResponse(\n                reasoning_tokens=tokenise(THINKING_RESPONSE_2),\n            )\n        ).add_response(\n            LLMToolCallResponse(\n                tool_name=\"open_url\",\n                tool_call_id=\"789\",\n                tool_call_argument_tokens=[json.dumps({\"urls\": OPEN_URL_URLS_2})],\n            )\n        ).expect_reasoning(\n            reasoning_tokens=tokenise(THINKING_RESPONSE_2),\n            turn_index=3,\n        ).expect(\n            Packet(\n                placement=create_placement(4),\n                obj=OpenUrlStart(),\n            )\n        ).expect(\n            Packet(placement=create_placement(4), obj=OpenUrlUrls(urls=OPEN_URL_URLS_2))\n        ).expect(\n            Packet(\n                placement=create_placement(4),\n                obj=OpenUrlDocuments(\n                    documents=[\n                        mock_web_content_to_search_doc(content)\n                        for content in OPEN_URL_DOCUMENTS_2\n                    ]\n                ),\n            ),\n            forward=False,\n        ).expect(\n            Packet(\n                placement=create_placement(4),\n                obj=SectionEnd(),\n            )\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # LLM Stream Response 4\n        handler.add_response(\n            LLMReasoningResponse(\n                reasoning_tokens=tokenise(REASONING_RESPONSE_3),\n            )\n        ).add_response(\n            LLMAnswerResponse(\n                answer_tokens=tokenise(ANSWER_RESPONSE),\n            )\n        ).expect_reasoning(\n            reasoning_tokens=tokenise(REASONING_RESPONSE_3),\n            turn_index=5,\n        ).expect_agent_response(\n            answer_tokens=tokenise(ANSWER_RESPONSE),\n            turn_index=6,\n            final_documents=expected_internal_docs\n            + expected_web_docs\n            + [\n                mock_web_content_to_search_doc(content)\n                for content in OPEN_URL_DOCUMENTS_1\n            ]\n            + [\n                mock_web_content_to_search_doc(content)\n                for content in OPEN_URL_DOCUMENTS_2\n            ],\n        ).run_and_validate(\n            stream=answer_stream\n        )\n\n        # End stream\n        with pytest.raises(StopIteration):\n            next(answer_stream)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/answer/test_stream_chat_message_objects.py",
    "content": "import uuid\nfrom typing import Any\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.models import AnswerStreamPart\nfrom onyx.chat.models import StreamingError\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.models import User\nfrom onyx.db.persona import upsert_persona\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.streaming_models import AgentResponseDelta\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\n@pytest.mark.skip(reason=\"Temporarily disabled\")\ndef test_stream_chat_message_objects_without_web_search(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n    mock_external_deps: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that when web search is requested but the persona has no web search tool,\n    the system handles it gracefully and returns a message explaining that web\n    search is not available.\n    \"\"\"\n\n    # Mock the model server HTTP calls for embeddings\n    def mock_post(\n        url: str,\n        json: dict[str, Any] | None = None,\n        headers: dict[str, str] | None = None,  # noqa: ARG001\n        **kwargs: Any,  # noqa: ARG001\n    ) -> MagicMock:\n        \"\"\"Mock requests.post for model server embedding calls\"\"\"\n        mock_response = MagicMock()\n\n        # Check if this is a call to the embedding endpoint\n        if \"encoder/bi-encoder-embed\" in url:\n            # Return a mock embedding response\n            # The embedding dimension doesn't matter for this test,\n            # just needs to be a valid response structure\n            num_texts = len(json.get(\"texts\", [])) if json else 1\n            mock_response.status_code = 200\n            mock_response.json.return_value = {\n                \"embeddings\": [[0.1] * 768]\n                * num_texts  # 768 is a common embedding dimension\n            }\n            mock_response.raise_for_status = MagicMock()\n            return mock_response\n\n        # For other URLs, return a generic success response\n        mock_response.status_code = 200\n        mock_response.json.return_value = {}\n        mock_response.raise_for_status = MagicMock()\n        return mock_response\n\n    # First, ensure we have an LLM provider set up\n    ensure_default_llm_provider(db_session)\n\n    # Create a test user\n    test_user: User = create_test_user(db_session, email_prefix=\"test_web_search\")\n\n    # Create a test persona explicitly WITHOUT any tools (including web search)\n    # This ensures the test doesn't rely on the state of the default persona\n    test_persona = upsert_persona(\n        user=None,  # System persona\n        name=f\"Test Persona {uuid.uuid4()}\",\n        description=\"Test persona with no tools for web search test\",\n        llm_model_provider_override=None,\n        llm_model_version_override=None,\n        starter_messages=None,\n        system_prompt=None,\n        task_prompt=None,\n        datetime_aware=None,\n        is_public=True,\n        db_session=db_session,\n        tool_ids=[],  # Explicitly no tools\n        document_set_ids=None,\n        is_listed=True,\n    )\n\n    # Create a chat session with our test persona\n    chat_session = create_chat_session(\n        db_session=db_session,\n        description=\"Test web search without tool\",\n        user_id=test_user.id if test_user else None,\n        persona_id=test_persona.id,\n    )\n    # Create the chat message request with a query that attempts to force web search\n    chat_request = SendMessageRequest(\n        message=\"run a web search for 'Onyx'\",\n        chat_session_id=chat_session.id,\n    )\n    # Call handle_stream_message_objects\n    response_generator = handle_stream_message_objects(\n        new_msg_req=chat_request,\n        user=test_user,\n        db_session=db_session,\n    )\n    # Collect all packets from the response\n    raw_answer_stream: list[AnswerStreamPart] = []\n    message_content = \"\"\n    error_occurred = False\n\n    for packet in response_generator:\n        raw_answer_stream.append(packet)\n        if isinstance(packet, Packet):\n            if isinstance(packet.obj, AgentResponseDelta):\n                # Direct MessageDelta (if not wrapped)\n                if packet.obj.content:\n                    message_content += packet.obj.content\n            elif isinstance(packet.obj, StreamingError):\n                error_occurred = True\n                break\n\n    assert not error_occurred, \"Should not have received a streaming error\"\n\n    # Verify that we got a response\n    assert len(raw_answer_stream) > 0, \"Should have received at least some packets\"\n\n    # Check if we got MessageResponseIDInfo packet (indicating message was created)\n    has_message_id = any(\n        isinstance(packet, MessageResponseIDInfo) for packet in raw_answer_stream\n    )\n    assert has_message_id, \"Should have received a message ID packet\"\n\n    assert len(message_content) > 0, \"Should have received some message content\"\n\n\ndef test_nothing() -> None:\n    assert True, \"This test is just to ensure the test suite is running\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/background/test_periodic_task_claim.py",
    "content": "\"\"\"External dependency unit tests for periodic task claiming.\n\nTests ``_try_claim_task`` and ``_try_run_periodic_task`` against real\nPostgreSQL, verifying happy-path behavior and concurrent-access safety.\n\nThe claim mechanism uses a transaction-scoped advisory lock + a KVStore\ntimestamp for cross-instance dedup.  The DB session is released before\nthe task runs, so long-running tasks don't hold connections.\n\"\"\"\n\nimport time\nfrom collections.abc import Generator\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.background.periodic_poller import _PeriodicTaskDef\nfrom onyx.background.periodic_poller import _try_claim_task\nfrom onyx.background.periodic_poller import _try_run_periodic_task\nfrom onyx.background.periodic_poller import PERIODIC_TASK_KV_PREFIX\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.models import KVStore\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n_TEST_LOCK_BASE = 90_000\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef _init_engine() -> None:\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n\ndef _make_task(\n    *,\n    name: str | None = None,\n    interval: float = 3600,\n    lock_id: int | None = None,\n    run_fn: MagicMock | None = None,\n) -> _PeriodicTaskDef:\n    return _PeriodicTaskDef(\n        name=name if name is not None else f\"test-{uuid4().hex[:8]}\",\n        interval_seconds=interval,\n        lock_id=lock_id if lock_id is not None else _TEST_LOCK_BASE,\n        run_fn=run_fn if run_fn is not None else MagicMock(),\n    )\n\n\n@pytest.fixture(autouse=True)\ndef _cleanup_kv(\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[None, None, None]:\n    yield\n    with get_session_with_current_tenant() as db_session:\n        db_session.query(KVStore).filter(\n            KVStore.key.like(f\"{PERIODIC_TASK_KV_PREFIX}test-%\")\n        ).delete(synchronize_session=False)\n        db_session.commit()\n\n\n# ------------------------------------------------------------------\n# Happy-path: _try_claim_task\n# ------------------------------------------------------------------\n\n\nclass TestClaimHappyPath:\n    def test_first_claim_succeeds(self) -> None:\n        assert _try_claim_task(_make_task()) is True\n\n    def test_first_claim_creates_kv_row(self) -> None:\n        task = _make_task()\n        _try_claim_task(task)\n\n        with get_session_with_current_tenant() as db_session:\n            row = (\n                db_session.query(KVStore)\n                .filter_by(key=PERIODIC_TASK_KV_PREFIX + task.name)\n                .first()\n            )\n        assert row is not None\n        assert row.value is not None\n\n    def test_second_claim_within_interval_fails(self) -> None:\n        task = _make_task(interval=3600)\n        assert _try_claim_task(task) is True\n        assert _try_claim_task(task) is False\n\n    def test_claim_after_interval_succeeds(self) -> None:\n        task = _make_task(interval=1)\n        assert _try_claim_task(task) is True\n\n        kv_key = PERIODIC_TASK_KV_PREFIX + task.name\n        with get_session_with_current_tenant() as db_session:\n            row = db_session.query(KVStore).filter_by(key=kv_key).first()\n            assert row is not None\n            row.value = (datetime.now(timezone.utc) - timedelta(seconds=10)).isoformat()\n            db_session.commit()\n\n        assert _try_claim_task(task) is True\n\n\n# ------------------------------------------------------------------\n# Happy-path: _try_run_periodic_task\n# ------------------------------------------------------------------\n\n\nclass TestRunHappyPath:\n    def test_runs_task_and_updates_last_run_at(self) -> None:\n        mock_fn = MagicMock()\n        task = _make_task(run_fn=mock_fn)\n\n        _try_run_periodic_task(task)\n\n        mock_fn.assert_called_once()\n        assert task.last_run_at > 0\n\n    def test_skips_when_in_memory_interval_not_elapsed(self) -> None:\n        mock_fn = MagicMock()\n        task = _make_task(run_fn=mock_fn, interval=3600)\n        task.last_run_at = time.monotonic()\n\n        _try_run_periodic_task(task)\n\n        mock_fn.assert_not_called()\n\n    def test_skips_when_db_claim_blocked(self) -> None:\n        name = f\"test-{uuid4().hex[:8]}\"\n        lock_id = _TEST_LOCK_BASE + 10\n\n        _try_claim_task(_make_task(name=name, lock_id=lock_id, interval=3600))\n\n        mock_fn = MagicMock()\n        task = _make_task(name=name, lock_id=lock_id, interval=3600, run_fn=mock_fn)\n        _try_run_periodic_task(task)\n\n        mock_fn.assert_not_called()\n\n    def test_task_exception_does_not_propagate(self) -> None:\n        task = _make_task(run_fn=MagicMock(side_effect=RuntimeError(\"boom\")))\n        _try_run_periodic_task(task)\n\n    def test_claim_committed_before_task_runs(self) -> None:\n        \"\"\"The KV claim must be visible in the DB when run_fn executes.\"\"\"\n        task_name = f\"test-order-{uuid4().hex[:8]}\"\n        kv_key = PERIODIC_TASK_KV_PREFIX + task_name\n        claim_visible: list[bool] = []\n\n        def check_claim() -> None:\n            with get_session_with_current_tenant() as db_session:\n                row = db_session.query(KVStore).filter_by(key=kv_key).first()\n                claim_visible.append(row is not None and row.value is not None)\n\n        task = _PeriodicTaskDef(\n            name=task_name,\n            interval_seconds=3600,\n            lock_id=_TEST_LOCK_BASE + 11,\n            run_fn=check_claim,\n        )\n\n        _try_run_periodic_task(task)\n\n        assert claim_visible == [True]\n\n\n# ------------------------------------------------------------------\n# Concurrency: only one claimer should win\n# ------------------------------------------------------------------\n\n\nclass TestClaimConcurrency:\n    def test_concurrent_claims_single_winner(self) -> None:\n        \"\"\"Many threads claim the same task — exactly one should succeed.\"\"\"\n        num_threads = 20\n        task_name = f\"test-race-{uuid4().hex[:8]}\"\n        lock_id = _TEST_LOCK_BASE + 20\n\n        def claim() -> bool:\n            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n            return _try_claim_task(\n                _PeriodicTaskDef(\n                    name=task_name,\n                    interval_seconds=3600,\n                    lock_id=lock_id,\n                    run_fn=lambda: None,\n                )\n            )\n\n        results: list[bool] = []\n        with ThreadPoolExecutor(max_workers=num_threads) as executor:\n            futures = [executor.submit(claim) for _ in range(num_threads)]\n            for future in as_completed(futures):\n                results.append(future.result())\n\n        winners = sum(1 for r in results if r)\n        assert winners == 1, f\"Expected 1 winner, got {winners}\"\n\n    def test_concurrent_run_single_execution(self) -> None:\n        \"\"\"Many threads run the same task — run_fn fires exactly once.\"\"\"\n        num_threads = 20\n        task_name = f\"test-run-race-{uuid4().hex[:8]}\"\n        lock_id = _TEST_LOCK_BASE + 21\n        counter = MagicMock()\n\n        def run() -> None:\n            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n            _try_run_periodic_task(\n                _PeriodicTaskDef(\n                    name=task_name,\n                    interval_seconds=3600,\n                    lock_id=lock_id,\n                    run_fn=counter,\n                )\n            )\n\n        with ThreadPoolExecutor(max_workers=num_threads) as executor:\n            futures = [executor.submit(run) for _ in range(num_threads)]\n            for future in as_completed(futures):\n                future.result()\n\n        assert (\n            counter.call_count == 1\n        ), f\"Expected run_fn called once, got {counter.call_count}\"\n\n    def test_no_errors_under_contention(self) -> None:\n        \"\"\"All threads complete without exceptions under high contention.\"\"\"\n        num_threads = 30\n        task_name = f\"test-err-{uuid4().hex[:8]}\"\n        lock_id = _TEST_LOCK_BASE + 22\n        errors: list[Exception] = []\n\n        def claim() -> bool:\n            CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n            return _try_claim_task(\n                _PeriodicTaskDef(\n                    name=task_name,\n                    interval_seconds=3600,\n                    lock_id=lock_id,\n                    run_fn=lambda: None,\n                )\n            )\n\n        with ThreadPoolExecutor(max_workers=num_threads) as executor:\n            futures = [executor.submit(claim) for _ in range(num_threads)]\n            for future in as_completed(futures):\n                try:\n                    future.result()\n                except Exception as e:\n                    errors.append(e)\n\n        assert errors == [], f\"Got {len(errors)} errors: {errors}\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/background/test_startup_recovery.py",
    "content": "\"\"\"External dependency unit tests for startup recovery (Step 10g).\n\nSeeds ``UserFile`` records in stuck states (PROCESSING, DELETING,\nneeds_project_sync) then calls ``recover_stuck_user_files`` and verifies\nthe drain loops pick them up via ``FOR UPDATE SKIP LOCKED``.\n\nUses real PostgreSQL (via ``db_session`` / ``tenant_context`` fixtures).\nThe per-file ``*_impl`` functions are mocked so no real file store or\nconnector is needed — we only verify that recovery finds and dispatches\nthe correct files.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport pytest\nimport sqlalchemy as sa\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.periodic_poller import recover_stuck_user_files\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n_IMPL_MODULE = \"onyx.background.celery.tasks.user_file_processing.tasks\"\n\n\ndef _create_user_file(\n    db_session: Session,\n    user_id: object,\n    *,\n    status: UserFileStatus = UserFileStatus.PROCESSING,\n    needs_project_sync: bool = False,\n    needs_persona_sync: bool = False,\n) -> UserFile:\n    uf = UserFile(\n        id=uuid4(),\n        user_id=user_id,\n        file_id=f\"test_file_{uuid4().hex[:8]}\",\n        name=f\"test_{uuid4().hex[:8]}.txt\",\n        file_type=\"text/plain\",\n        status=status,\n        needs_project_sync=needs_project_sync,\n        needs_persona_sync=needs_persona_sync,\n    )\n    db_session.add(uf)\n    db_session.commit()\n    db_session.refresh(uf)\n    return uf\n\n\ndef _fake_delete_impl(\n    user_file_id: str,\n    tenant_id: str,  # noqa: ARG001\n    redis_locking: bool,  # noqa: ARG001\n) -> None:\n    \"\"\"Mock side-effect: delete the row so the drain loop terminates.\"\"\"\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    with get_session_with_current_tenant() as session:\n        session.execute(sa.delete(UserFile).where(UserFile.id == UUID(user_file_id)))\n        session.commit()\n\n\ndef _fake_sync_impl(\n    user_file_id: str,\n    tenant_id: str,  # noqa: ARG001\n    redis_locking: bool,  # noqa: ARG001\n) -> None:\n    \"\"\"Mock side-effect: clear sync flags so the drain loop terminates.\"\"\"\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n    with get_session_with_current_tenant() as session:\n        session.execute(\n            sa.update(UserFile)\n            .where(UserFile.id == UUID(user_file_id))\n            .values(needs_project_sync=False, needs_persona_sync=False)\n        )\n        session.commit()\n\n\n@pytest.fixture()\ndef _cleanup_user_files(db_session: Session) -> Generator[list[UserFile], None, None]:\n    \"\"\"Track created UserFile rows and delete them after each test.\"\"\"\n    created: list[UserFile] = []\n    yield created\n    for uf in created:\n        existing = db_session.get(UserFile, uf.id)\n        if existing:\n            db_session.delete(existing)\n    db_session.commit()\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\nclass TestRecoverProcessingFiles:\n    \"\"\"Files in PROCESSING status are re-processed via the processing drain loop.\"\"\"\n\n    def test_processing_files_recovered(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_proc\")\n        uf = _create_user_file(db_session, user.id, status=UserFileStatus.PROCESSING)\n        _cleanup_user_files.append(uf)\n\n        mock_impl = MagicMock()\n        with patch(f\"{_IMPL_MODULE}.process_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert (\n            str(uf.id) in called_ids\n        ), f\"Expected file {uf.id} to be recovered but got: {called_ids}\"\n\n    def test_completed_files_not_recovered(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_comp\")\n        uf = _create_user_file(db_session, user.id, status=UserFileStatus.COMPLETED)\n        _cleanup_user_files.append(uf)\n\n        mock_impl = MagicMock()\n        with patch(f\"{_IMPL_MODULE}.process_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert (\n            str(uf.id) not in called_ids\n        ), f\"COMPLETED file {uf.id} should not have been recovered\"\n\n\nclass TestRecoverDeletingFiles:\n    \"\"\"Files in DELETING status are recovered via the delete drain loop.\"\"\"\n\n    def test_deleting_files_recovered(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_del\")\n        uf = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)\n        # Row is deleted by _fake_delete_impl, so no cleanup needed.\n\n        mock_impl = MagicMock(side_effect=_fake_delete_impl)\n        with patch(f\"{_IMPL_MODULE}.delete_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert (\n            str(uf.id) in called_ids\n        ), f\"Expected file {uf.id} to be recovered for deletion but got: {called_ids}\"\n\n\nclass TestRecoverSyncFiles:\n    \"\"\"Files needing project/persona sync are recovered via the sync drain loop.\"\"\"\n\n    def test_needs_project_sync_recovered(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_sync\")\n        uf = _create_user_file(\n            db_session,\n            user.id,\n            status=UserFileStatus.COMPLETED,\n            needs_project_sync=True,\n        )\n        _cleanup_user_files.append(uf)\n\n        mock_impl = MagicMock(side_effect=_fake_sync_impl)\n        with patch(f\"{_IMPL_MODULE}.project_sync_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert (\n            str(uf.id) in called_ids\n        ), f\"Expected file {uf.id} to be recovered for sync but got: {called_ids}\"\n\n    def test_needs_persona_sync_recovered(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_psync\")\n        uf = _create_user_file(\n            db_session,\n            user.id,\n            status=UserFileStatus.COMPLETED,\n            needs_persona_sync=True,\n        )\n        _cleanup_user_files.append(uf)\n\n        mock_impl = MagicMock(side_effect=_fake_sync_impl)\n        with patch(f\"{_IMPL_MODULE}.project_sync_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert (\n            str(uf.id) in called_ids\n        ), f\"Expected file {uf.id} to be recovered for persona sync but got: {called_ids}\"\n\n\nclass TestRecoveryMultipleFiles:\n    \"\"\"Recovery processes all stuck files in one pass, not just the first.\"\"\"\n\n    def test_multiple_processing_files(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"recovery_multi\")\n        files = []\n        for _ in range(3):\n            uf = _create_user_file(\n                db_session, user.id, status=UserFileStatus.PROCESSING\n            )\n            _cleanup_user_files.append(uf)\n            files.append(uf)\n\n        mock_impl = MagicMock()\n        with patch(f\"{_IMPL_MODULE}.process_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = {call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list}\n        expected_ids = {str(uf.id) for uf in files}\n        assert expected_ids.issubset(\n            called_ids\n        ), f\"Expected all {len(files)} files to be recovered. Missing: {expected_ids - called_ids}\"\n\n\nclass TestTransientFailures:\n    \"\"\"Drain loops skip failed files, process the rest, and terminate.\"\"\"\n\n    def test_processing_failure_skips_and_continues(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"fail_proc\")\n        uf_fail = _create_user_file(\n            db_session, user.id, status=UserFileStatus.PROCESSING\n        )\n        uf_ok = _create_user_file(db_session, user.id, status=UserFileStatus.PROCESSING)\n        _cleanup_user_files.extend([uf_fail, uf_ok])\n\n        fail_id = str(uf_fail.id)\n\n        def side_effect(\n            *,\n            user_file_id: str,\n            tenant_id: str,  # noqa: ARG001\n            redis_locking: bool,  # noqa: ARG001\n        ) -> None:\n            if user_file_id == fail_id:\n                raise RuntimeError(\"transient failure\")\n\n        mock_impl = MagicMock(side_effect=side_effect)\n        with patch(f\"{_IMPL_MODULE}.process_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert fail_id in called_ids, \"Failed file should have been attempted\"\n        assert str(uf_ok.id) in called_ids, \"Healthy file should have been processed\"\n        assert called_ids.count(fail_id) == 1, \"Failed file retried — infinite loop\"\n        assert called_ids.count(str(uf_ok.id)) == 1\n\n    def test_delete_failure_skips_and_continues(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"fail_del\")\n        uf_fail = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)\n        uf_ok = _create_user_file(db_session, user.id, status=UserFileStatus.DELETING)\n        _cleanup_user_files.append(uf_fail)\n\n        fail_id = str(uf_fail.id)\n\n        def side_effect(\n            *, user_file_id: str, tenant_id: str, redis_locking: bool\n        ) -> None:\n            if user_file_id == fail_id:\n                raise RuntimeError(\"transient failure\")\n            _fake_delete_impl(user_file_id, tenant_id, redis_locking)\n\n        mock_impl = MagicMock(side_effect=side_effect)\n        with patch(f\"{_IMPL_MODULE}.delete_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert fail_id in called_ids, \"Failed file should have been attempted\"\n        assert str(uf_ok.id) in called_ids, \"Healthy file should have been deleted\"\n        assert called_ids.count(fail_id) == 1, \"Failed file retried — infinite loop\"\n        assert called_ids.count(str(uf_ok.id)) == 1\n\n    def test_sync_failure_skips_and_continues(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n        _cleanup_user_files: list[UserFile],\n    ) -> None:\n        user = create_test_user(db_session, \"fail_sync\")\n        uf_fail = _create_user_file(\n            db_session,\n            user.id,\n            status=UserFileStatus.COMPLETED,\n            needs_project_sync=True,\n        )\n        uf_ok = _create_user_file(\n            db_session,\n            user.id,\n            status=UserFileStatus.COMPLETED,\n            needs_persona_sync=True,\n        )\n        _cleanup_user_files.extend([uf_fail, uf_ok])\n\n        fail_id = str(uf_fail.id)\n\n        def side_effect(\n            *, user_file_id: str, tenant_id: str, redis_locking: bool\n        ) -> None:\n            if user_file_id == fail_id:\n                raise RuntimeError(\"transient failure\")\n            _fake_sync_impl(user_file_id, tenant_id, redis_locking)\n\n        mock_impl = MagicMock(side_effect=side_effect)\n        with patch(f\"{_IMPL_MODULE}.project_sync_user_file_impl\", mock_impl):\n            recover_stuck_user_files(TEST_TENANT_ID)\n\n        called_ids = [call.kwargs[\"user_file_id\"] for call in mock_impl.call_args_list]\n        assert fail_id in called_ids, \"Failed file should have been attempted\"\n        assert str(uf_ok.id) in called_ids, \"Healthy file should have been synced\"\n        assert called_ids.count(fail_id) == 1, \"Failed file retried — infinite loop\"\n        assert called_ids.count(str(uf_ok.id)) == 1\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/cache/conftest.py",
    "content": "\"\"\"Fixtures for cache backend tests.\n\nRequires a running PostgreSQL instance (and Redis for parity tests).\nRun with::\n\n    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/cache/\n\"\"\"\n\nfrom collections.abc import Generator\n\nimport pytest\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.postgres_backend import PostgresCacheBackend\nfrom onyx.cache.redis_backend import RedisCacheBackend\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef _init_db() -> Generator[None, None, None]:\n    \"\"\"Initialize DB engine. Assumes Postgres has migrations applied (e.g. via docker compose).\"\"\"\n    SqlEngine.init_engine(pool_size=5, max_overflow=2)\n    yield\n\n\n@pytest.fixture(autouse=True)\ndef _tenant_context() -> Generator[None, None, None]:\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@pytest.fixture\ndef pg_cache() -> PostgresCacheBackend:\n    return PostgresCacheBackend(TEST_TENANT_ID)\n\n\n@pytest.fixture\ndef redis_cache() -> RedisCacheBackend:\n    from onyx.redis.redis_pool import redis_pool\n\n    return RedisCacheBackend(redis_pool.get_client(TEST_TENANT_ID))\n\n\n@pytest.fixture(params=[\"postgres\", \"redis\"], ids=[\"postgres\", \"redis\"])\ndef cache(\n    request: pytest.FixtureRequest,\n    pg_cache: PostgresCacheBackend,\n    redis_cache: RedisCacheBackend,\n) -> CacheBackend:\n    if request.param == \"postgres\":\n        return pg_cache\n    return redis_cache\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/cache/test_cache_backend_parity.py",
    "content": "\"\"\"Parameterized tests that run the same CacheBackend operations against\nboth Redis and PostgreSQL, asserting identical return values.\n\nEach test runs twice (once per backend) via the ``cache`` fixture defined\nin conftest.py.\n\"\"\"\n\nimport time\nfrom uuid import uuid4\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import TTL_KEY_NOT_FOUND\nfrom onyx.cache.interface import TTL_NO_EXPIRY\n\n\ndef _key() -> str:\n    return f\"parity_{uuid4().hex[:12]}\"\n\n\nclass TestKVParity:\n    def test_get_missing(self, cache: CacheBackend) -> None:\n        assert cache.get(_key()) is None\n\n    def test_get_set(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"value\")\n        assert cache.get(k) == b\"value\"\n\n    def test_overwrite(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"a\")\n        cache.set(k, b\"b\")\n        assert cache.get(k) == b\"b\"\n\n    def test_set_string(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, \"hello\")\n        assert cache.get(k) == b\"hello\"\n\n    def test_set_int(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, 42)\n        assert cache.get(k) == b\"42\"\n\n    def test_delete(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"x\")\n        cache.delete(k)\n        assert cache.get(k) is None\n\n    def test_exists(self, cache: CacheBackend) -> None:\n        k = _key()\n        assert not cache.exists(k)\n        cache.set(k, b\"x\")\n        assert cache.exists(k)\n\n\nclass TestTTLParity:\n    def test_ttl_missing(self, cache: CacheBackend) -> None:\n        assert cache.ttl(_key()) == TTL_KEY_NOT_FOUND\n\n    def test_ttl_no_expiry(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"x\")\n        assert cache.ttl(k) == TTL_NO_EXPIRY\n\n    def test_ttl_remaining(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"x\", ex=10)\n        remaining = cache.ttl(k)\n        assert 8 <= remaining <= 10\n\n    def test_set_with_ttl_expires(self, cache: CacheBackend) -> None:\n        k = _key()\n        cache.set(k, b\"x\", ex=1)\n        assert cache.get(k) == b\"x\"\n        time.sleep(1.5)\n        assert cache.get(k) is None\n\n\nclass TestLockParity:\n    def test_acquire_release(self, cache: CacheBackend) -> None:\n        lock = cache.lock(f\"parity_lock_{uuid4().hex[:8]}\")\n        assert lock.acquire(blocking=False)\n        assert lock.owned()\n        lock.release()\n        assert not lock.owned()\n\n\nclass TestListParity:\n    def test_rpush_blpop(self, cache: CacheBackend) -> None:\n        k = f\"parity_list_{uuid4().hex[:8]}\"\n        cache.rpush(k, b\"item\")\n        result = cache.blpop([k], timeout=1)\n        assert result is not None\n        assert result[1] == b\"item\"\n\n    def test_blpop_timeout(self, cache: CacheBackend) -> None:\n        result = cache.blpop([f\"parity_empty_{uuid4().hex[:8]}\"], timeout=1)\n        assert result is None\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/cache/test_kv_store_cache_layer.py",
    "content": "\"\"\"Tests for PgRedisKVStore's cache layer integration with CacheBackend.\n\nVerifies that the KV store correctly uses the CacheBackend for caching\nin front of PostgreSQL: cache hits, cache misses falling through to PG,\ncache population after PG reads, cache invalidation on delete, and\ngraceful degradation when the cache backend raises.\n\nRequires running PostgreSQL.\n\"\"\"\n\nimport json\nfrom collections.abc import Generator\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom sqlalchemy import delete\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.postgres_backend import PostgresCacheBackend\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import CacheStore\nfrom onyx.db.models import KVStore\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.key_value_store.store import PgRedisKVStore\nfrom onyx.key_value_store.store import REDIS_KEY_PREFIX\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\n@pytest.fixture(autouse=True)\ndef _clean_kv() -> Generator[None, None, None]:\n    yield\n    with get_session_with_tenant(tenant_id=TEST_TENANT_ID) as session:\n        session.execute(delete(KVStore))\n        session.execute(delete(CacheStore))\n        session.commit()\n\n\n@pytest.fixture\ndef kv_store(pg_cache: PostgresCacheBackend) -> PgRedisKVStore:\n    return PgRedisKVStore(cache=pg_cache)\n\n\nclass TestStoreAndLoad:\n    def test_store_populates_cache_and_pg(\n        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend\n    ) -> None:\n        kv_store.store(\"k1\", {\"hello\": \"world\"})\n\n        cached = pg_cache.get(REDIS_KEY_PREFIX + \"k1\")\n        assert cached is not None\n        assert json.loads(cached) == {\"hello\": \"world\"}\n\n        loaded = kv_store.load(\"k1\")\n        assert loaded == {\"hello\": \"world\"}\n\n    def test_load_returns_cached_value_without_pg_hit(\n        self, pg_cache: PostgresCacheBackend\n    ) -> None:\n        \"\"\"If the cache already has the value, PG should not be queried.\"\"\"\n        pg_cache.set(REDIS_KEY_PREFIX + \"cached_only\", json.dumps({\"from\": \"cache\"}))\n        kv = PgRedisKVStore(cache=pg_cache)\n        assert kv.load(\"cached_only\") == {\"from\": \"cache\"}\n\n    def test_load_falls_through_to_pg_on_cache_miss(\n        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend\n    ) -> None:\n        kv_store.store(\"k2\", [1, 2, 3])\n\n        pg_cache.delete(REDIS_KEY_PREFIX + \"k2\")\n        assert pg_cache.get(REDIS_KEY_PREFIX + \"k2\") is None\n\n        loaded = kv_store.load(\"k2\")\n        assert loaded == [1, 2, 3]\n\n        repopulated = pg_cache.get(REDIS_KEY_PREFIX + \"k2\")\n        assert repopulated is not None\n        assert json.loads(repopulated) == [1, 2, 3]\n\n    def test_load_with_refresh_cache_skips_cache(\n        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend\n    ) -> None:\n        kv_store.store(\"k3\", \"original\")\n\n        pg_cache.set(REDIS_KEY_PREFIX + \"k3\", json.dumps(\"stale\"))\n\n        loaded = kv_store.load(\"k3\", refresh_cache=True)\n        assert loaded == \"original\"\n\n\nclass TestDelete:\n    def test_delete_removes_from_cache_and_pg(\n        self, kv_store: PgRedisKVStore, pg_cache: PostgresCacheBackend\n    ) -> None:\n        kv_store.store(\"del_me\", \"bye\")\n        kv_store.delete(\"del_me\")\n\n        assert pg_cache.get(REDIS_KEY_PREFIX + \"del_me\") is None\n\n        with pytest.raises(KvKeyNotFoundError):\n            kv_store.load(\"del_me\")\n\n    def test_delete_missing_key_raises(self, kv_store: PgRedisKVStore) -> None:\n        with pytest.raises(KvKeyNotFoundError):\n            kv_store.delete(\"nonexistent\")\n\n\nclass TestCacheFailureGracefulDegradation:\n    def test_store_succeeds_when_cache_set_raises(self) -> None:\n        failing_cache = MagicMock(spec=CacheBackend)\n        failing_cache.set.side_effect = ConnectionError(\"cache down\")\n\n        kv = PgRedisKVStore(cache=failing_cache)\n        kv.store(\"resilient\", {\"data\": True})\n\n        working_cache = MagicMock(spec=CacheBackend)\n        working_cache.get.return_value = None\n        kv_reader = PgRedisKVStore(cache=working_cache)\n        loaded = kv_reader.load(\"resilient\")\n        assert loaded == {\"data\": True}\n\n    def test_load_falls_through_when_cache_get_raises(self) -> None:\n        failing_cache = MagicMock(spec=CacheBackend)\n        failing_cache.get.side_effect = ConnectionError(\"cache down\")\n        failing_cache.set.side_effect = ConnectionError(\"cache down\")\n\n        kv = PgRedisKVStore(cache=failing_cache)\n        kv.store(\"survive\", 42)\n        loaded = kv.load(\"survive\")\n        assert loaded == 42\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/cache/test_postgres_cache_backend.py",
    "content": "\"\"\"Tests for PostgresCacheBackend against real PostgreSQL.\n\nCovers every method on the backend: KV CRUD, TTL behaviour, advisory\nlocks (acquire / release / contention), list operations (rpush / blpop),\nand the periodic cleanup function.\n\"\"\"\n\nimport time\nfrom uuid import uuid4\n\nfrom sqlalchemy import select\n\nfrom onyx.cache.interface import TTL_KEY_NOT_FOUND\nfrom onyx.cache.interface import TTL_NO_EXPIRY\nfrom onyx.cache.postgres_backend import cleanup_expired_cache_entries\nfrom onyx.cache.postgres_backend import PostgresCacheBackend\nfrom onyx.db.models import CacheStore\n\n\ndef _key() -> str:\n    return f\"test_{uuid4().hex[:12]}\"\n\n\n# ------------------------------------------------------------------\n# Basic KV\n# ------------------------------------------------------------------\n\n\nclass TestKV:\n    def test_get_set(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"hello\")\n        assert pg_cache.get(k) == b\"hello\"\n\n    def test_get_missing(self, pg_cache: PostgresCacheBackend) -> None:\n        assert pg_cache.get(_key()) is None\n\n    def test_set_overwrite(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"first\")\n        pg_cache.set(k, b\"second\")\n        assert pg_cache.get(k) == b\"second\"\n\n    def test_set_string_value(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, \"string_val\")\n        assert pg_cache.get(k) == b\"string_val\"\n\n    def test_set_int_value(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, 42)\n        assert pg_cache.get(k) == b\"42\"\n\n    def test_delete(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"to_delete\")\n        pg_cache.delete(k)\n        assert pg_cache.get(k) is None\n\n    def test_delete_missing_is_noop(self, pg_cache: PostgresCacheBackend) -> None:\n        pg_cache.delete(_key())\n\n    def test_exists(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        assert not pg_cache.exists(k)\n        pg_cache.set(k, b\"x\")\n        assert pg_cache.exists(k)\n\n\n# ------------------------------------------------------------------\n# TTL\n# ------------------------------------------------------------------\n\n\nclass TestTTL:\n    def test_set_with_ttl_expires(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"ephemeral\", ex=1)\n        assert pg_cache.get(k) == b\"ephemeral\"\n        time.sleep(1.5)\n        assert pg_cache.get(k) is None\n\n    def test_ttl_no_expiry(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"forever\")\n        assert pg_cache.ttl(k) == TTL_NO_EXPIRY\n\n    def test_ttl_missing_key(self, pg_cache: PostgresCacheBackend) -> None:\n        assert pg_cache.ttl(_key()) == TTL_KEY_NOT_FOUND\n\n    def test_ttl_remaining(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"x\", ex=10)\n        remaining = pg_cache.ttl(k)\n        assert 8 <= remaining <= 10\n\n    def test_ttl_expired_key(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"x\", ex=1)\n        time.sleep(1.5)\n        assert pg_cache.ttl(k) == TTL_KEY_NOT_FOUND\n\n    def test_expire_adds_ttl(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"x\")\n        assert pg_cache.ttl(k) == TTL_NO_EXPIRY\n        pg_cache.expire(k, 10)\n        assert 8 <= pg_cache.ttl(k) <= 10\n\n    def test_exists_respects_ttl(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"x\", ex=1)\n        assert pg_cache.exists(k)\n        time.sleep(1.5)\n        assert not pg_cache.exists(k)\n\n\n# ------------------------------------------------------------------\n# Locks\n# ------------------------------------------------------------------\n\n\nclass TestLock:\n    def test_acquire_release(self, pg_cache: PostgresCacheBackend) -> None:\n        lock = pg_cache.lock(f\"lock_{uuid4().hex[:8]}\")\n        assert lock.acquire(blocking=False)\n        assert lock.owned()\n        lock.release()\n        assert not lock.owned()\n\n    def test_contention(self, pg_cache: PostgresCacheBackend) -> None:\n        name = f\"contention_{uuid4().hex[:8]}\"\n        lock1 = pg_cache.lock(name)\n        lock2 = pg_cache.lock(name)\n\n        assert lock1.acquire(blocking=False)\n        assert not lock2.acquire(blocking=False)\n\n        lock1.release()\n        assert lock2.acquire(blocking=False)\n        lock2.release()\n\n    def test_context_manager(self, pg_cache: PostgresCacheBackend) -> None:\n        with pg_cache.lock(f\"ctx_{uuid4().hex[:8]}\") as lock:\n            assert lock.owned()\n        assert not lock.owned()\n\n    def test_blocking_timeout(self, pg_cache: PostgresCacheBackend) -> None:\n        name = f\"timeout_{uuid4().hex[:8]}\"\n        holder = pg_cache.lock(name)\n        holder.acquire(blocking=False)\n\n        waiter = pg_cache.lock(name, timeout=0.3)\n        start = time.monotonic()\n        assert not waiter.acquire(blocking=True, blocking_timeout=0.3)\n        elapsed = time.monotonic() - start\n        assert elapsed >= 0.25\n\n        holder.release()\n\n\n# ------------------------------------------------------------------\n# List (rpush / blpop)\n# ------------------------------------------------------------------\n\n\nclass TestList:\n    def test_rpush_blpop(self, pg_cache: PostgresCacheBackend) -> None:\n        k = f\"list_{uuid4().hex[:8]}\"\n        pg_cache.rpush(k, b\"item1\")\n        result = pg_cache.blpop([k], timeout=1)\n        assert result is not None\n        assert result == (k.encode(), b\"item1\")\n\n    def test_blpop_timeout(self, pg_cache: PostgresCacheBackend) -> None:\n        result = pg_cache.blpop([f\"empty_{uuid4().hex[:8]}\"], timeout=1)\n        assert result is None\n\n    def test_fifo_order(self, pg_cache: PostgresCacheBackend) -> None:\n        k = f\"fifo_{uuid4().hex[:8]}\"\n        pg_cache.rpush(k, b\"first\")\n        time.sleep(0.01)\n        pg_cache.rpush(k, b\"second\")\n\n        r1 = pg_cache.blpop([k], timeout=1)\n        r2 = pg_cache.blpop([k], timeout=1)\n        assert r1 is not None and r1[1] == b\"first\"\n        assert r2 is not None and r2[1] == b\"second\"\n\n    def test_multiple_keys(self, pg_cache: PostgresCacheBackend) -> None:\n        k1 = f\"mk1_{uuid4().hex[:8]}\"\n        k2 = f\"mk2_{uuid4().hex[:8]}\"\n        pg_cache.rpush(k2, b\"from_k2\")\n\n        result = pg_cache.blpop([k1, k2], timeout=1)\n        assert result is not None\n        assert result == (k2.encode(), b\"from_k2\")\n\n\n# ------------------------------------------------------------------\n# Cleanup\n# ------------------------------------------------------------------\n\n\nclass TestCleanup:\n    def test_removes_expired_rows(self, pg_cache: PostgresCacheBackend) -> None:\n        from onyx.db.engine.sql_engine import get_session_with_current_tenant\n\n        k = _key()\n        pg_cache.set(k, b\"stale\", ex=1)\n        time.sleep(1.5)\n        cleanup_expired_cache_entries()\n\n        stmt = select(CacheStore.key).where(CacheStore.key == k)\n        with get_session_with_current_tenant() as session:\n            row = session.execute(stmt).first()\n        assert row is None, \"expired row should be physically deleted\"\n\n    def test_preserves_unexpired_rows(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"fresh\", ex=300)\n        cleanup_expired_cache_entries()\n        assert pg_cache.get(k) == b\"fresh\"\n\n    def test_preserves_no_ttl_rows(self, pg_cache: PostgresCacheBackend) -> None:\n        k = _key()\n        pg_cache.set(k, b\"permanent\")\n        cleanup_expired_cache_entries()\n        assert pg_cache.get(k) == b\"permanent\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_docfetching_priority.py",
    "content": "\"\"\"\nExternal dependency unit tests for document processing job priority.\n\nTests that first-time indexing connectors (no last_successful_index_time)\nget higher priority than re-indexing jobs from connectors that have\npreviously completed indexing.\n\nUses real Redis for locking and real database objects for CC pairs and search settings.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.docfetching.task_creation_utils import (\n    try_creating_docfetching_task,\n)\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import SearchSettings\nfrom onyx.redis.redis_pool import get_redis_client\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\ndef _create_test_connector(db_session: Session, name: str) -> Connector:\n    \"\"\"Create a test connector with all required fields.\"\"\"\n    connector = Connector(\n        name=name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n        refresh_freq=3600,\n    )\n    db_session.add(connector)\n    db_session.commit()\n    db_session.refresh(connector)\n    return connector\n\n\ndef _create_test_credential(db_session: Session) -> Credential:\n    \"\"\"Create a test credential with all required fields.\"\"\"\n    credential = Credential(\n        name=f\"test_credential_{uuid4().hex[:8]}\",\n        source=DocumentSource.FILE,\n        credential_json={},\n        admin_public=True,\n    )\n    db_session.add(credential)\n    db_session.commit()\n    db_session.refresh(credential)\n    return credential\n\n\ndef _create_test_cc_pair(\n    db_session: Session,\n    connector: Connector,\n    credential: Credential,\n    status: ConnectorCredentialPairStatus,\n    name: str,\n    last_successful_index_time: datetime | None = None,\n) -> ConnectorCredentialPair:\n    \"\"\"Create a connector credential pair with the specified status.\"\"\"\n    cc_pair = ConnectorCredentialPair(\n        name=name,\n        connector_id=connector.id,\n        credential_id=credential.id,\n        status=status,\n        access_type=AccessType.PUBLIC,\n        last_successful_index_time=last_successful_index_time,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n    db_session.refresh(cc_pair)\n    return cc_pair\n\n\ndef _create_test_search_settings(\n    db_session: Session, index_name: str\n) -> SearchSettings:\n    \"\"\"Create test search settings with all required fields.\"\"\"\n    search_settings = SearchSettings(\n        model_name=\"test-model\",\n        model_dim=768,\n        normalize=True,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        status=IndexModelStatus.PRESENT,\n        index_name=index_name,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n    )\n    db_session.add(search_settings)\n    db_session.commit()\n    db_session.refresh(search_settings)\n    return search_settings\n\n\nclass TestDocfetchingTaskPriorityWithRealObjects:\n    \"\"\"\n    Tests for document fetching task priority based on last_successful_index_time.\n\n    Uses real Redis for locking and real database objects for CC pairs\n    and search settings.\n    \"\"\"\n\n    @pytest.mark.parametrize(\n        \"has_successful_index,expected_priority\",\n        [\n            # First-time indexing (no last_successful_index_time) should get HIGH priority\n            (False, OnyxCeleryPriority.HIGH),\n            # Re-indexing (has last_successful_index_time) should get MEDIUM priority\n            (True, OnyxCeleryPriority.MEDIUM),\n        ],\n    )\n    @patch(\n        \"onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt\"\n    )\n    def test_priority_based_on_last_successful_index_time(\n        self,\n        mock_try_create_index_attempt: MagicMock,\n        db_session: Session,\n        has_successful_index: bool,\n        expected_priority: OnyxCeleryPriority,\n    ) -> None:\n        \"\"\"\n        Test that first-time indexing connectors get higher priority than re-indexing.\n\n        Priority is determined by last_successful_index_time:\n        - None (never indexed): HIGH priority\n        - Has timestamp (previously indexed): MEDIUM priority\n\n        Uses real Redis for locking and real database objects.\n        \"\"\"\n        # Create unique names to avoid conflicts between test runs\n        unique_suffix = uuid4().hex[:8]\n\n        # Determine last_successful_index_time based on the test case\n        last_successful_index_time = (\n            datetime.now(timezone.utc) if has_successful_index else None\n        )\n\n        # Create real database objects\n        connector = _create_test_connector(\n            db_session, f\"test_connector_{has_successful_index}_{unique_suffix}\"\n        )\n        credential = _create_test_credential(db_session)\n        cc_pair = _create_test_cc_pair(\n            db_session,\n            connector,\n            credential,\n            ConnectorCredentialPairStatus.ACTIVE,\n            name=f\"test_cc_pair_{has_successful_index}_{unique_suffix}\",\n            last_successful_index_time=last_successful_index_time,\n        )\n        search_settings = _create_test_search_settings(\n            db_session, f\"test_index_{unique_suffix}\"\n        )\n\n        # Mock the index attempt creation to return a valid ID\n        mock_try_create_index_attempt.return_value = 12345\n\n        # Mock celery app to capture task submission\n        mock_celery_app = MagicMock()\n        mock_celery_app.send_task.return_value = MagicMock()\n\n        # Use real Redis client\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n\n        # Call the function with real objects\n        result = try_creating_docfetching_task(\n            celery_app=mock_celery_app,\n            cc_pair=cc_pair,\n            search_settings=search_settings,\n            reindex=False,\n            db_session=db_session,\n            r=redis_client,\n            tenant_id=TEST_TENANT_ID,\n        )\n\n        # Verify task was created\n        assert result == 12345\n\n        # Verify send_task was called with the expected priority\n        mock_celery_app.send_task.assert_called_once()\n        call_kwargs = mock_celery_app.send_task.call_args\n        actual_priority = call_kwargs.kwargs[\"priority\"]\n        assert (\n            actual_priority == expected_priority\n        ), f\"Expected priority {expected_priority} for has_successful_index={has_successful_index}, but got {actual_priority}\"\n\n    @patch(\n        \"onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt\"\n    )\n    def test_no_task_created_when_deleting(\n        self,\n        mock_try_create_index_attempt: MagicMock,\n        db_session: Session,\n    ) -> None:\n        \"\"\"Test that no task is created when connector is in DELETING status.\"\"\"\n        unique_suffix = uuid4().hex[:8]\n\n        connector = _create_test_connector(\n            db_session, f\"test_connector_deleting_{unique_suffix}\"\n        )\n        credential = _create_test_credential(db_session)\n        cc_pair = _create_test_cc_pair(\n            db_session,\n            connector,\n            credential,\n            ConnectorCredentialPairStatus.DELETING,\n            name=f\"test_cc_pair_deleting_{unique_suffix}\",\n        )\n        search_settings = _create_test_search_settings(\n            db_session, f\"test_index_deleting_{unique_suffix}\"\n        )\n\n        mock_celery_app = MagicMock()\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n\n        result = try_creating_docfetching_task(\n            celery_app=mock_celery_app,\n            cc_pair=cc_pair,\n            search_settings=search_settings,\n            reindex=False,\n            db_session=db_session,\n            r=redis_client,\n            tenant_id=TEST_TENANT_ID,\n        )\n\n        # Verify no task was created\n        assert result is None\n        mock_celery_app.send_task.assert_not_called()\n        mock_try_create_index_attempt.assert_not_called()\n\n    @patch(\n        \"onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt\"\n    )\n    def test_redis_lock_prevents_concurrent_task_creation(\n        self,\n        mock_try_create_index_attempt: MagicMock,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that the Redis lock prevents concurrent task creation attempts.\n\n        This test uses real Redis to verify the locking mechanism works correctly.\n        When the lock is already held, the function should return None without\n        attempting to create a task.\n        \"\"\"\n        unique_suffix = uuid4().hex[:8]\n\n        connector = _create_test_connector(\n            db_session, f\"test_connector_lock_{unique_suffix}\"\n        )\n        credential = _create_test_credential(db_session)\n        cc_pair = _create_test_cc_pair(\n            db_session,\n            connector,\n            credential,\n            ConnectorCredentialPairStatus.INITIAL_INDEXING,\n            name=f\"test_cc_pair_lock_{unique_suffix}\",\n        )\n        search_settings = _create_test_search_settings(\n            db_session, f\"test_index_lock_{unique_suffix}\"\n        )\n\n        mock_try_create_index_attempt.return_value = 12345\n        mock_celery_app = MagicMock()\n        mock_celery_app.send_task.return_value = MagicMock()\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n\n        # Acquire the lock before calling the function\n        from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX\n\n        lock = redis_client.lock(\n            DANSWER_REDIS_FUNCTION_LOCK_PREFIX + \"try_creating_indexing_task\",\n            timeout=30,\n        )\n\n        try:\n            acquired = lock.acquire(blocking=False)\n            assert acquired, \"Failed to acquire lock for test\"\n\n            # Now try to create a task - should fail because lock is held\n            result = try_creating_docfetching_task(\n                celery_app=mock_celery_app,\n                cc_pair=cc_pair,\n                search_settings=search_settings,\n                reindex=False,\n                db_session=db_session,\n                r=redis_client,\n                tenant_id=TEST_TENANT_ID,\n            )\n\n            # Should return None because lock couldn't be acquired\n            assert result is None\n            mock_celery_app.send_task.assert_not_called()\n\n        finally:\n            # Always release the lock\n            if lock.owned():\n                lock.release()\n\n    @patch(\n        \"onyx.background.celery.tasks.docfetching.task_creation_utils.IndexingCoordination.try_create_index_attempt\"\n    )\n    def test_lock_released_after_successful_task_creation(\n        self,\n        mock_try_create_index_attempt: MagicMock,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that the Redis lock is released after successful task creation.\n\n        This verifies that subsequent calls can acquire the lock and create tasks.\n        \"\"\"\n        unique_suffix = uuid4().hex[:8]\n\n        connector = _create_test_connector(\n            db_session, f\"test_connector_release_{unique_suffix}\"\n        )\n        credential = _create_test_credential(db_session)\n        cc_pair = _create_test_cc_pair(\n            db_session,\n            connector,\n            credential,\n            ConnectorCredentialPairStatus.INITIAL_INDEXING,\n            name=f\"test_cc_pair_release_{unique_suffix}\",\n        )\n        search_settings = _create_test_search_settings(\n            db_session, f\"test_index_release_{unique_suffix}\"\n        )\n\n        mock_try_create_index_attempt.return_value = 12345\n        mock_celery_app = MagicMock()\n        mock_celery_app.send_task.return_value = MagicMock()\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n\n        # First call should succeed\n        result1 = try_creating_docfetching_task(\n            celery_app=mock_celery_app,\n            cc_pair=cc_pair,\n            search_settings=search_settings,\n            reindex=False,\n            db_session=db_session,\n            r=redis_client,\n            tenant_id=TEST_TENANT_ID,\n        )\n        assert result1 == 12345\n\n        # Reset mocks for second call\n        mock_celery_app.reset_mock()\n        mock_try_create_index_attempt.reset_mock()\n        mock_try_create_index_attempt.return_value = 67890\n\n        # Second call should also succeed (lock was released)\n        result2 = try_creating_docfetching_task(\n            celery_app=mock_celery_app,\n            cc_pair=cc_pair,\n            search_settings=search_settings,\n            reindex=False,\n            db_session=db_session,\n            r=redis_client,\n            tenant_id=TEST_TENANT_ID,\n        )\n        assert result2 == 67890\n\n        # Both calls should have submitted tasks\n        mock_celery_app.send_task.assert_called_once()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_docprocessing_priority.py",
    "content": "\"\"\"\nExternal dependency unit tests for docprocessing task priority.\n\nTests that docprocessing tasks spawned by connector_document_extraction\nget the correct priority based on last_successful_index_time.\n\nUses real database objects for CC pairs, search settings, and index attempts.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.indexing.run_docfetching import connector_document_extraction\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import SearchSettings\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\ndef _create_test_connector(db_session: Session, name: str) -> Connector:\n    \"\"\"Create a test connector with all required fields.\"\"\"\n    connector = Connector(\n        name=name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n        refresh_freq=3600,\n    )\n    db_session.add(connector)\n    db_session.commit()\n    db_session.refresh(connector)\n    return connector\n\n\ndef _create_test_credential(db_session: Session) -> Credential:\n    \"\"\"Create a test credential with all required fields.\"\"\"\n    credential = Credential(\n        name=f\"test_credential_{uuid4().hex[:8]}\",\n        source=DocumentSource.FILE,\n        credential_json={},\n        admin_public=True,\n    )\n    db_session.add(credential)\n    db_session.commit()\n    db_session.refresh(credential)\n    return credential\n\n\ndef _create_test_cc_pair(\n    db_session: Session,\n    connector: Connector,\n    credential: Credential,\n    status: ConnectorCredentialPairStatus,\n    name: str,\n    last_successful_index_time: datetime | None = None,\n) -> ConnectorCredentialPair:\n    \"\"\"Create a connector credential pair with the specified status.\"\"\"\n    cc_pair = ConnectorCredentialPair(\n        name=name,\n        connector_id=connector.id,\n        credential_id=credential.id,\n        status=status,\n        access_type=AccessType.PUBLIC,\n        last_successful_index_time=last_successful_index_time,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n    db_session.refresh(cc_pair)\n    return cc_pair\n\n\ndef _create_test_search_settings(\n    db_session: Session, index_name: str\n) -> SearchSettings:\n    \"\"\"Create test search settings with all required fields.\"\"\"\n    search_settings = SearchSettings(\n        model_name=\"test-model\",\n        model_dim=768,\n        normalize=True,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        status=IndexModelStatus.PRESENT,\n        index_name=index_name,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n    )\n    db_session.add(search_settings)\n    db_session.commit()\n    db_session.refresh(search_settings)\n    return search_settings\n\n\ndef _create_test_index_attempt(\n    db_session: Session,\n    cc_pair: ConnectorCredentialPair,\n    search_settings: SearchSettings,\n    from_beginning: bool = False,\n) -> IndexAttempt:\n    \"\"\"Create a test index attempt with the specified cc_pair and search_settings.\"\"\"\n    index_attempt = IndexAttempt(\n        connector_credential_pair_id=cc_pair.id,\n        search_settings_id=search_settings.id,\n        from_beginning=from_beginning,\n        status=IndexingStatus.IN_PROGRESS,\n        celery_task_id=f\"test_celery_task_{uuid4().hex[:8]}\",\n    )\n    db_session.add(index_attempt)\n    db_session.commit()\n    db_session.refresh(index_attempt)\n    return index_attempt\n\n\nclass TestDocprocessingPriorityInDocumentExtraction:\n    \"\"\"\n    Tests for docprocessing task priority within connector_document_extraction.\n\n    Verifies that the priority passed to docprocessing tasks is determined\n    by last_successful_index_time on the cc_pair.\n    \"\"\"\n\n    @pytest.mark.parametrize(\n        \"has_successful_index,expected_priority\",\n        [\n            # First-time indexing (no last_successful_index_time) should get HIGH priority\n            (False, OnyxCeleryPriority.HIGH),\n            # Re-indexing (has last_successful_index_time) should get MEDIUM priority\n            (True, OnyxCeleryPriority.MEDIUM),\n        ],\n    )\n    @patch(\"onyx.background.indexing.run_docfetching.get_document_batch_storage\")\n    @patch(\"onyx.background.indexing.run_docfetching.MemoryTracer\")\n    @patch(\"onyx.background.indexing.run_docfetching._get_connector_runner\")\n    @patch(\n        \"onyx.background.indexing.run_docfetching.strip_null_characters\",\n        side_effect=lambda batch: batch,\n    )\n    @patch(\n        \"onyx.background.indexing.run_docfetching.get_recent_completed_attempts_for_cc_pair\"\n    )\n    @patch(\n        \"onyx.background.indexing.run_docfetching.get_last_successful_attempt_poll_range_end\"\n    )\n    @patch(\"onyx.background.indexing.run_docfetching.save_checkpoint\")\n    @patch(\"onyx.background.indexing.run_docfetching.get_latest_valid_checkpoint\")\n    @patch(\"onyx.background.indexing.run_docfetching.get_redis_client\")\n    @patch(\"onyx.background.indexing.run_docfetching.ensure_source_node_exists\")\n    @patch(\"onyx.background.indexing.run_docfetching.get_source_node_id_from_cache\")\n    @patch(\"onyx.background.indexing.run_docfetching.get_node_id_from_raw_id\")\n    @patch(\"onyx.background.indexing.run_docfetching.cache_hierarchy_nodes_batch\")\n    def test_docprocessing_priority_based_on_last_successful_index_time(\n        self,\n        mock_cache_hierarchy_nodes_batch: MagicMock,  # noqa: ARG002\n        mock_get_node_id_from_raw_id: MagicMock,\n        mock_get_source_node_id_from_cache: MagicMock,\n        mock_ensure_source_node_exists: MagicMock,\n        mock_get_redis_client: MagicMock,\n        mock_get_latest_valid_checkpoint: MagicMock,\n        mock_save_checkpoint: MagicMock,  # noqa: ARG002\n        mock_get_last_successful_attempt_poll_range_end: MagicMock,\n        mock_get_recent_completed_attempts: MagicMock,\n        mock_strip_null_characters: MagicMock,  # noqa: ARG002\n        mock_get_connector_runner: MagicMock,\n        mock_memory_tracer_class: MagicMock,\n        mock_get_batch_storage: MagicMock,\n        db_session: Session,\n        has_successful_index: bool,\n        expected_priority: OnyxCeleryPriority,\n    ) -> None:\n        \"\"\"\n        Test that docprocessing tasks get the correct priority based on\n        last_successful_index_time.\n\n        Priority is determined by last_successful_index_time:\n        - None (never indexed): HIGH priority\n        - Has timestamp (previously indexed): MEDIUM priority\n\n        Uses real database objects for CC pairs and search settings.\n        \"\"\"\n        unique_suffix = uuid4().hex[:8]\n\n        # Determine last_successful_index_time based on the test case\n        last_successful_index_time = (\n            datetime.now(timezone.utc) if has_successful_index else None\n        )\n\n        # Create real database objects\n        connector = _create_test_connector(\n            db_session, f\"test_connector_docproc_{has_successful_index}_{unique_suffix}\"\n        )\n        credential = _create_test_credential(db_session)\n        cc_pair = _create_test_cc_pair(\n            db_session,\n            connector,\n            credential,\n            ConnectorCredentialPairStatus.ACTIVE,\n            name=f\"test_cc_pair_docproc_{has_successful_index}_{unique_suffix}\",\n            last_successful_index_time=last_successful_index_time,\n        )\n        search_settings = _create_test_search_settings(\n            db_session, f\"test_index_docproc_{unique_suffix}\"\n        )\n        index_attempt = _create_test_index_attempt(\n            db_session, cc_pair, search_settings, from_beginning=False\n        )\n\n        # Setup mocks\n        mock_batch_storage = MagicMock()\n        mock_get_batch_storage.return_value = mock_batch_storage\n\n        mock_memory_tracer = MagicMock()\n        mock_memory_tracer_class.return_value = mock_memory_tracer\n\n        # Mock Redis-related functions (not the focus of this test)\n        # Configure mock Redis client to return None for common operations\n        # as a safety net in case any patches don't work as expected\n        mock_redis_client = MagicMock()\n        mock_redis_client.get.return_value = None\n        mock_redis_client.hget.return_value = None\n        mock_redis_client.hset.return_value = None\n        mock_redis_client.exists.return_value = 0\n        mock_redis_client.expire.return_value = True\n        mock_get_redis_client.return_value = mock_redis_client\n\n        # Mock hierarchy/cache functions\n        mock_ensure_source_node_exists.return_value = 1  # Return a valid node ID\n        mock_get_source_node_id_from_cache.return_value = (\n            1  # Return a valid source node ID\n        )\n        mock_get_node_id_from_raw_id.return_value = (None, False)  # (node_id, found)\n        # cache_hierarchy_nodes_batch doesn't need a return value (returns None)\n\n        # Create checkpoint mocks - initial checkpoint has_more=True, final has_more=False\n        mock_initial_checkpoint = MagicMock(has_more=True)\n        mock_final_checkpoint = MagicMock(has_more=False)\n\n        # get_latest_valid_checkpoint returns (checkpoint, resuming_from_checkpoint)\n        mock_get_latest_valid_checkpoint.return_value = (mock_initial_checkpoint, False)\n\n        # Create a mock connector runner that yields one document batch\n        mock_connector = MagicMock()\n        mock_connector_runner = MagicMock()\n        mock_connector_runner.connector = mock_connector\n        # The connector runner yields (document_batch, hierarchy_nodes, failure, next_checkpoint)\n        # We provide one batch of documents to trigger a send_task call\n        mock_doc = MagicMock()\n        mock_doc.to_short_descriptor.return_value = \"test_doc\"\n        mock_doc.sections = []\n        # Set to None to avoid Redis operations trying to resolve hierarchy\n        mock_doc.parent_hierarchy_raw_node_id = None\n        mock_doc.parent_hierarchy_node_id = None\n        mock_connector_runner.run.return_value = iter(\n            [([mock_doc], None, None, mock_final_checkpoint)]\n        )\n        mock_get_connector_runner.return_value = mock_connector_runner\n\n        mock_get_recent_completed_attempts.return_value = iter([])\n        mock_get_last_successful_attempt_poll_range_end.return_value = 0\n\n        # Mock celery app to capture task submission\n        mock_celery_app = MagicMock()\n        mock_celery_app.send_task.return_value = MagicMock()\n\n        # Call the function\n        connector_document_extraction(\n            app=mock_celery_app,\n            index_attempt_id=index_attempt.id,\n            cc_pair_id=cc_pair.id,\n            search_settings_id=search_settings.id,\n            tenant_id=TEST_TENANT_ID,\n            callback=None,\n        )\n\n        # Verify send_task was called with the expected priority for docprocessing\n        assert mock_celery_app.send_task.called, \"send_task should have been called\"\n        call_kwargs = mock_celery_app.send_task.call_args\n        actual_priority = call_kwargs.kwargs[\"priority\"]\n        assert (\n            actual_priority == expected_priority\n        ), f\"Expected priority {expected_priority} for has_successful_index={has_successful_index}, but got {actual_priority}\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_persona_file_sync.py",
    "content": "\"\"\"\nExternal dependency unit tests for persona file sync.\n\nValidates that:\n\n1. The check_for_user_file_project_sync beat task picks up UserFiles with\n   needs_persona_sync=True (not just needs_project_sync).\n\n2. The process_single_user_file_project_sync worker task reads persona\n   associations from the DB, passes persona_ids to the document index via\n   VespaDocumentUserFields, and clears needs_persona_sync afterwards.\n\n3. upsert_persona correctly marks affected UserFiles with\n   needs_persona_sync=True when file associations change.\n\nUses real Redis and PostgreSQL.  Document index (Vespa) calls are mocked\nsince we only need to verify the arguments passed to update_single.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom unittest.mock import PropertyMock\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    check_for_user_file_project_sync,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_single_user_file_project_sync,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    user_file_project_sync_lock_key,\n)\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__UserFile\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.persona import upsert_persona\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.redis.redis_pool import get_redis_client\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _create_completed_user_file(\n    db_session: Session,\n    user: User,\n    needs_persona_sync: bool = False,\n    needs_project_sync: bool = False,\n) -> UserFile:\n    \"\"\"Insert a UserFile in COMPLETED status.\"\"\"\n    uf = UserFile(\n        id=uuid4(),\n        user_id=user.id,\n        file_id=f\"test_file_{uuid4().hex[:8]}\",\n        name=f\"test_{uuid4().hex[:8]}.txt\",\n        file_type=\"text/plain\",\n        status=UserFileStatus.COMPLETED,\n        needs_persona_sync=needs_persona_sync,\n        needs_project_sync=needs_project_sync,\n        chunk_count=5,\n    )\n    db_session.add(uf)\n    db_session.commit()\n    db_session.refresh(uf)\n    return uf\n\n\ndef _create_test_persona(\n    db_session: Session,\n    user: User,\n    user_files: list[UserFile] | None = None,\n) -> Persona:\n    \"\"\"Create a minimal Persona via direct model insert.\"\"\"\n    persona = Persona(\n        name=f\"Test Persona {uuid4().hex[:8]}\",\n        description=\"Test persona\",\n        system_prompt=\"You are a test assistant\",\n        task_prompt=\"Answer the question\",\n        tools=[],\n        document_sets=[],\n        users=[user],\n        groups=[],\n        is_listed=True,\n        is_public=True,\n        display_priority=None,\n        starter_messages=None,\n        deleted=False,\n        user_files=user_files or [],\n        user_id=user.id,\n    )\n    db_session.add(persona)\n    db_session.commit()\n    db_session.refresh(persona)\n    return persona\n\n\ndef _link_file_to_persona(\n    db_session: Session, persona: Persona, user_file: UserFile\n) -> None:\n    \"\"\"Create the join table row between a persona and a user file.\"\"\"\n    link = Persona__UserFile(persona_id=persona.id, user_file_id=user_file.id)\n    db_session.add(link)\n    db_session.commit()\n\n\n_PATCH_QUEUE_DEPTH = \"onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth\"\n\n\n@contextmanager\ndef _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:\n    \"\"\"Patch the ``app`` property on a bound Celery task.\"\"\"\n    task_instance = task.run.__self__\n    with (\n        patch.object(\n            type(task_instance),\n            \"app\",\n            new_callable=PropertyMock,\n            return_value=mock_app,\n        ),\n        patch(_PATCH_QUEUE_DEPTH, return_value=0),\n        patch(\n            \"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client\",\n            return_value=MagicMock(),\n        ),\n    ):\n        yield\n\n\n# ---------------------------------------------------------------------------\n# Test: check_for_user_file_project_sync picks up persona sync\n# ---------------------------------------------------------------------------\n\n\nclass TestCheckSweepIncludesPersonaSync:\n    \"\"\"The beat task must pick up files needing persona sync, not just project sync.\"\"\"\n\n    def test_persona_sync_flag_enqueues_task(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file with needs_persona_sync=True (and COMPLETED) gets enqueued.\"\"\"\n        user = create_test_user(db_session, \"persona_sweep\")\n        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)\n\n        mock_app = MagicMock()\n\n        with _patch_task_app(check_for_user_file_project_sync, mock_app):\n            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)\n\n        enqueued_ids = {\n            call.kwargs[\"kwargs\"][\"user_file_id\"]\n            for call in mock_app.send_task.call_args_list\n        }\n        assert str(uf.id) in enqueued_ids\n\n    def test_neither_flag_does_not_enqueue(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file with both flags False is not enqueued.\"\"\"\n        user = create_test_user(db_session, \"no_sync\")\n        uf = _create_completed_user_file(db_session, user)\n\n        mock_app = MagicMock()\n\n        with _patch_task_app(check_for_user_file_project_sync, mock_app):\n            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)\n\n        enqueued_ids = {\n            call.kwargs[\"kwargs\"][\"user_file_id\"]\n            for call in mock_app.send_task.call_args_list\n        }\n        assert str(uf.id) not in enqueued_ids\n\n    def test_both_flags_enqueues_once(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file with BOTH flags True is enqueued exactly once.\"\"\"\n        user = create_test_user(db_session, \"both_flags\")\n        uf = _create_completed_user_file(\n            db_session, user, needs_persona_sync=True, needs_project_sync=True\n        )\n\n        mock_app = MagicMock()\n\n        with _patch_task_app(check_for_user_file_project_sync, mock_app):\n            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)\n\n        matching_calls = [\n            call\n            for call in mock_app.send_task.call_args_list\n            if call.kwargs[\"kwargs\"][\"user_file_id\"] == str(uf.id)\n        ]\n        assert len(matching_calls) == 1\n\n\n# ---------------------------------------------------------------------------\n# Test: process_single_user_file_project_sync passes persona_ids to index\n# ---------------------------------------------------------------------------\n\n_PATCH_GET_SETTINGS = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.get_active_search_settings\"\n)\n_PATCH_GET_INDICES = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.get_all_document_indices\"\n)\n_PATCH_HTTPX_INIT = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.httpx_init_vespa_pool\"\n)\n_PATCH_DISABLE_VDB = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.DISABLE_VECTOR_DB\"\n)\n\n\nclass TestSyncTaskWritesPersonaIds:\n    \"\"\"The sync task reads persona associations and sends them to the index.\"\"\"\n\n    def test_passes_persona_ids_to_update_single(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"After linking a file to a persona, sync sends the persona ID.\"\"\"\n        user = create_test_user(db_session, \"sync_persona\")\n        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)\n        persona = _create_test_persona(db_session, user)\n        _link_file_to_persona(db_session, persona, uf)\n\n        mock_doc_index = MagicMock()\n        mock_search_settings = MagicMock()\n        mock_search_settings.primary = MagicMock()\n        mock_search_settings.secondary = None\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        lock_key = user_file_project_sync_lock_key(str(uf.id))\n        redis_client.delete(lock_key)\n\n        with (\n            patch(_PATCH_DISABLE_VDB, False),\n            patch(_PATCH_HTTPX_INIT),\n            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),\n            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),\n        ):\n            process_single_user_file_project_sync.run(\n                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID\n            )\n\n        mock_doc_index.update_single.assert_called_once()\n        call_args = mock_doc_index.update_single.call_args\n        user_fields: VespaDocumentUserFields = call_args.kwargs[\"user_fields\"]\n        assert user_fields.personas is not None\n        assert persona.id in user_fields.personas\n        assert call_args.args[0] == str(uf.id)\n\n    def test_clears_persona_sync_flag(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"After a successful sync the needs_persona_sync flag is cleared.\"\"\"\n        user = create_test_user(db_session, \"sync_clear\")\n        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        lock_key = user_file_project_sync_lock_key(str(uf.id))\n        redis_client.delete(lock_key)\n\n        with patch(_PATCH_DISABLE_VDB, True):\n            process_single_user_file_project_sync.run(\n                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID\n            )\n\n        db_session.refresh(uf)\n        assert uf.needs_persona_sync is False\n\n    def test_passes_both_project_and_persona_ids(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file linked to both a project and a persona gets both IDs.\"\"\"\n        from onyx.db.models import Project__UserFile\n        from onyx.db.models import UserProject\n\n        user = create_test_user(db_session, \"sync_both\")\n        uf = _create_completed_user_file(\n            db_session, user, needs_persona_sync=True, needs_project_sync=True\n        )\n        persona = _create_test_persona(db_session, user)\n        _link_file_to_persona(db_session, persona, uf)\n\n        project = UserProject(user_id=user.id, name=\"test-project\", instructions=\"\")\n        db_session.add(project)\n        db_session.commit()\n        db_session.refresh(project)\n\n        link = Project__UserFile(project_id=project.id, user_file_id=uf.id)\n        db_session.add(link)\n        db_session.commit()\n\n        mock_doc_index = MagicMock()\n        mock_search_settings = MagicMock()\n        mock_search_settings.primary = MagicMock()\n        mock_search_settings.secondary = None\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        lock_key = user_file_project_sync_lock_key(str(uf.id))\n        redis_client.delete(lock_key)\n\n        with (\n            patch(_PATCH_DISABLE_VDB, False),\n            patch(_PATCH_HTTPX_INIT),\n            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),\n            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),\n        ):\n            process_single_user_file_project_sync.run(\n                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID\n            )\n\n        call_kwargs = mock_doc_index.update_single.call_args.kwargs\n        user_fields: VespaDocumentUserFields = call_kwargs[\"user_fields\"]\n        assert user_fields.personas is not None\n        assert user_fields.user_projects is not None\n        assert persona.id in user_fields.personas\n        assert project.id in user_fields.user_projects\n\n        # Both flags should be cleared\n        db_session.refresh(uf)\n        assert uf.needs_persona_sync is False\n        assert uf.needs_project_sync is False\n\n    def test_deleted_persona_excluded_from_ids(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A soft-deleted persona should NOT appear in the persona_ids sent to Vespa.\"\"\"\n        user = create_test_user(db_session, \"sync_deleted\")\n        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)\n        persona = _create_test_persona(db_session, user)\n        _link_file_to_persona(db_session, persona, uf)\n\n        persona.deleted = True\n        db_session.commit()\n\n        mock_doc_index = MagicMock()\n        mock_search_settings = MagicMock()\n        mock_search_settings.primary = MagicMock()\n        mock_search_settings.secondary = None\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        lock_key = user_file_project_sync_lock_key(str(uf.id))\n        redis_client.delete(lock_key)\n\n        with (\n            patch(_PATCH_DISABLE_VDB, False),\n            patch(_PATCH_HTTPX_INIT),\n            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),\n            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),\n        ):\n            process_single_user_file_project_sync.run(\n                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID\n            )\n\n        call_kwargs = mock_doc_index.update_single.call_args.kwargs\n        user_fields: VespaDocumentUserFields = call_kwargs[\"user_fields\"]\n        assert user_fields.personas is not None\n        assert persona.id not in user_fields.personas\n\n\n# ---------------------------------------------------------------------------\n# Test: upsert_persona marks files for persona sync\n# ---------------------------------------------------------------------------\n\n\nclass TestUpsertPersonaMarksSyncFlag:\n    \"\"\"upsert_persona must set needs_persona_sync on affected UserFiles.\"\"\"\n\n    def test_creating_persona_with_files_marks_sync(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        user = create_test_user(db_session, \"upsert_create\")\n        uf = _create_completed_user_file(db_session, user)\n        assert uf.needs_persona_sync is False\n\n        upsert_persona(\n            user=user,\n            name=f\"persona-{uuid4().hex[:8]}\",\n            description=\"test\",\n            llm_model_provider_override=None,\n            llm_model_version_override=None,\n            starter_messages=None,\n            system_prompt=\"test\",\n            task_prompt=\"test\",\n            datetime_aware=None,\n            is_public=True,\n            db_session=db_session,\n            user_file_ids=[uf.id],\n        )\n\n        db_session.refresh(uf)\n        assert uf.needs_persona_sync is True\n\n    def test_updating_persona_files_marks_both_old_and_new(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"When file associations change, both the removed and added files are flagged.\"\"\"\n        user = create_test_user(db_session, \"upsert_update\")\n        uf_old = _create_completed_user_file(db_session, user)\n        uf_new = _create_completed_user_file(db_session, user)\n\n        persona = upsert_persona(\n            user=user,\n            name=f\"persona-{uuid4().hex[:8]}\",\n            description=\"test\",\n            llm_model_provider_override=None,\n            llm_model_version_override=None,\n            starter_messages=None,\n            system_prompt=\"test\",\n            task_prompt=\"test\",\n            datetime_aware=None,\n            is_public=True,\n            db_session=db_session,\n            user_file_ids=[uf_old.id],\n        )\n\n        # Clear the flag from creation so we can observe the update\n        uf_old.needs_persona_sync = False\n        db_session.commit()\n\n        # Now update the persona to swap files\n        upsert_persona(\n            user=user,\n            name=persona.name,\n            description=persona.description,\n            llm_model_provider_override=None,\n            llm_model_version_override=None,\n            starter_messages=None,\n            system_prompt=persona.system_prompt,\n            task_prompt=persona.task_prompt,\n            datetime_aware=None,\n            is_public=persona.is_public,\n            db_session=db_session,\n            persona_id=persona.id,\n            user_file_ids=[uf_new.id],\n        )\n\n        db_session.refresh(uf_old)\n        db_session.refresh(uf_new)\n        assert uf_old.needs_persona_sync is True, \"Removed file should be flagged\"\n        assert uf_new.needs_persona_sync is True, \"Added file should be flagged\"\n\n    def test_removing_all_files_marks_old_files(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Removing all files from a persona flags the previously associated files.\"\"\"\n        user = create_test_user(db_session, \"upsert_remove\")\n        uf = _create_completed_user_file(db_session, user)\n\n        persona = upsert_persona(\n            user=user,\n            name=f\"persona-{uuid4().hex[:8]}\",\n            description=\"test\",\n            llm_model_provider_override=None,\n            llm_model_version_override=None,\n            starter_messages=None,\n            system_prompt=\"test\",\n            task_prompt=\"test\",\n            datetime_aware=None,\n            is_public=True,\n            db_session=db_session,\n            user_file_ids=[uf.id],\n        )\n\n        uf.needs_persona_sync = False\n        db_session.commit()\n\n        upsert_persona(\n            user=user,\n            name=persona.name,\n            description=persona.description,\n            llm_model_provider_override=None,\n            llm_model_version_override=None,\n            starter_messages=None,\n            system_prompt=persona.system_prompt,\n            task_prompt=persona.task_prompt,\n            datetime_aware=None,\n            is_public=persona.is_public,\n            db_session=db_session,\n            persona_id=persona.id,\n            user_file_ids=[],\n        )\n\n        db_session.refresh(uf)\n        assert uf.needs_persona_sync is True\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_pruning_hierarchy_nodes.py",
    "content": "\"\"\"\nExternal dependency unit tests for pruning hierarchy node extraction and DB persistence.\n\nVerifies that:\n1. extract_ids_from_runnable_connector correctly separates hierarchy nodes from doc IDs\n2. Extracted hierarchy nodes are correctly upserted to Postgres via upsert_hierarchy_nodes_batch\n3. Upserting is idempotent (running twice doesn't duplicate nodes)\n4. Document-to-hierarchy-node linkage is updated during pruning\n5. link_hierarchy_nodes_to_documents links nodes that are also documents\n6. HierarchyNodeByConnectorCredentialPair join table population and pruning\n7. Orphaned hierarchy node deletion and re-parenting\n\nUses a mock SlimConnectorWithPermSync that yields known hierarchy nodes and slim documents,\ncombined with a real PostgreSQL database for verifying persistence.\n\"\"\"\n\nfrom collections.abc import Iterator\nfrom typing import Any\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.background.celery.celery_utils import extract_ids_from_runnable_connector\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.interfaces import GenerateSlimDocumentOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.interfaces import SlimConnectorWithPermSync\nfrom onyx.connectors.models import HierarchyNode as PydanticHierarchyNode\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.hierarchy import delete_orphaned_hierarchy_nodes\nfrom onyx.db.hierarchy import ensure_source_node_exists\nfrom onyx.db.hierarchy import get_all_hierarchy_nodes_for_source\nfrom onyx.db.hierarchy import get_hierarchy_node_by_raw_id\nfrom onyx.db.hierarchy import link_hierarchy_nodes_to_documents\nfrom onyx.db.hierarchy import remove_stale_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import reparent_orphaned_hierarchy_nodes\nfrom onyx.db.hierarchy import update_document_parent_hierarchy_nodes\nfrom onyx.db.hierarchy import upsert_hierarchy_node_cc_pair_entries\nfrom onyx.db.hierarchy import upsert_hierarchy_nodes_batch\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import Document as DbDocument\nfrom onyx.db.models import HierarchyNode as DBHierarchyNode\nfrom onyx.db.models import HierarchyNodeByConnectorCredentialPair\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\nfrom onyx.kg.models import KGStage\n\n# ---------------------------------------------------------------------------\n# Constants\n# ---------------------------------------------------------------------------\n\nTEST_SOURCE = DocumentSource.SLACK\n\nCHANNEL_A_ID = \"C_GENERAL\"\nCHANNEL_A_NAME = \"#general\"\nCHANNEL_B_ID = \"C_RANDOM\"\nCHANNEL_B_NAME = \"#random\"\nCHANNEL_C_ID = \"C_ENGINEERING\"\nCHANNEL_C_NAME = \"#engineering\"\n\nSLIM_DOC_IDS = [\"msg-001\", \"msg-002\", \"msg-003\"]\n\n\n# ---------------------------------------------------------------------------\n# Mock connector\n# ---------------------------------------------------------------------------\n\n\ndef _make_hierarchy_nodes() -> list[PydanticHierarchyNode]:\n    \"\"\"Build a known set of hierarchy nodes resembling Slack channels.\"\"\"\n    return [\n        PydanticHierarchyNode(\n            raw_node_id=CHANNEL_A_ID,\n            raw_parent_id=None,\n            display_name=CHANNEL_A_NAME,\n            link=\"https://slack.example.com/channels/general\",\n            node_type=HierarchyNodeType.CHANNEL,\n            external_access=ExternalAccess(\n                external_user_emails={\"alice@example.com\", \"bob@example.com\"},\n                external_user_group_ids=set(),\n                is_public=False,\n            ),\n        ),\n        PydanticHierarchyNode(\n            raw_node_id=CHANNEL_B_ID,\n            raw_parent_id=None,\n            display_name=CHANNEL_B_NAME,\n            link=\"https://slack.example.com/channels/random\",\n            node_type=HierarchyNodeType.CHANNEL,\n        ),\n        PydanticHierarchyNode(\n            raw_node_id=CHANNEL_C_ID,\n            raw_parent_id=None,\n            display_name=CHANNEL_C_NAME,\n            link=\"https://slack.example.com/channels/engineering\",\n            node_type=HierarchyNodeType.CHANNEL,\n            external_access=ExternalAccess(\n                external_user_emails=set(),\n                external_user_group_ids={\"eng-team\"},\n                is_public=True,\n            ),\n        ),\n    ]\n\n\nDOC_PARENT_MAP = {\n    \"msg-001\": CHANNEL_A_ID,\n    \"msg-002\": CHANNEL_A_ID,\n    \"msg-003\": CHANNEL_B_ID,\n}\n\n\ndef _make_slim_docs() -> list[SlimDocument | PydanticHierarchyNode]:\n    return [\n        SlimDocument(id=doc_id, parent_hierarchy_raw_node_id=DOC_PARENT_MAP.get(doc_id))\n        for doc_id in SLIM_DOC_IDS\n    ]\n\n\nclass MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):\n    \"\"\"Yields a batch containing interleaved hierarchy nodes and slim docs.\"\"\"\n\n    def load_credentials(\n        self,\n        credentials: dict[str, Any],  # noqa: ARG002\n    ) -> dict[str, Any] | None:  # noqa: ARG002\n        return None\n\n    def retrieve_all_slim_docs_perm_sync(\n        self,\n        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002\n        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002\n    ) -> GenerateSlimDocumentOutput:\n        return self._generate()\n\n    def _generate(self) -> Iterator[list[SlimDocument | PydanticHierarchyNode]]:\n        # First batch: hierarchy nodes + first slim doc\n        batch_1: list[SlimDocument | PydanticHierarchyNode] = [\n            *_make_hierarchy_nodes(),\n            _make_slim_docs()[0],\n        ]\n        yield batch_1\n\n        # Second batch: remaining slim docs only (no hierarchy nodes)\n        yield _make_slim_docs()[1:]\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _create_cc_pair(\n    db_session: Session,\n    source: DocumentSource = TEST_SOURCE,\n) -> ConnectorCredentialPair:\n    \"\"\"Create a real Connector + Credential + ConnectorCredentialPair for testing.\"\"\"\n    connector = Connector(\n        name=f\"Test {source.value} Connector\",\n        source=source,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n    )\n    db_session.add(connector)\n    db_session.flush()\n\n    credential = Credential(\n        source=source,\n        credential_json={},\n        admin_public=True,\n    )\n    db_session.add(credential)\n    db_session.flush()\n    db_session.expire(credential)\n\n    cc_pair = ConnectorCredentialPair(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        name=f\"Test {source.value} CC Pair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        access_type=AccessType.PUBLIC,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n    db_session.refresh(cc_pair)\n    return cc_pair\n\n\ndef _cleanup_test_data(db_session: Session) -> None:\n    \"\"\"Remove all test hierarchy nodes and documents to isolate tests.\"\"\"\n    for doc_id in SLIM_DOC_IDS:\n        db_session.query(DbDocument).filter(DbDocument.id == doc_id).delete()\n\n    test_connector_ids_q = db_session.query(Connector.id).filter(\n        Connector.source == TEST_SOURCE,\n        Connector.name.like(\"Test %\"),\n    )\n\n    db_session.query(HierarchyNodeByConnectorCredentialPair).filter(\n        HierarchyNodeByConnectorCredentialPair.connector_id.in_(test_connector_ids_q)\n    ).delete(synchronize_session=\"fetch\")\n    db_session.query(DBHierarchyNode).filter(\n        DBHierarchyNode.source == TEST_SOURCE\n    ).delete()\n    db_session.flush()\n\n    # Collect credential IDs before deleting cc_pairs (bulk query.delete()\n    # bypasses ORM-level cascade, so credentials won't be auto-removed).\n    credential_ids = [\n        row[0]\n        for row in db_session.query(ConnectorCredentialPair.credential_id)\n        .filter(ConnectorCredentialPair.connector_id.in_(test_connector_ids_q))\n        .all()\n    ]\n\n    db_session.query(ConnectorCredentialPair).filter(\n        ConnectorCredentialPair.connector_id.in_(test_connector_ids_q)\n    ).delete(synchronize_session=\"fetch\")\n    db_session.query(Connector).filter(\n        Connector.source == TEST_SOURCE,\n        Connector.name.like(\"Test %\"),\n    ).delete(synchronize_session=\"fetch\")\n    if credential_ids:\n        db_session.query(Credential).filter(Credential.id.in_(credential_ids)).delete(\n            synchronize_session=\"fetch\"\n        )\n    db_session.commit()\n\n\ndef _create_test_documents(db_session: Session) -> list[DbDocument]:\n    \"\"\"Insert minimal Document rows for our test doc IDs.\"\"\"\n    docs = []\n    for doc_id in SLIM_DOC_IDS:\n        doc = DbDocument(\n            id=doc_id,\n            semantic_id=doc_id,\n            kg_stage=KGStage.NOT_STARTED,\n        )\n        db_session.add(doc)\n        docs.append(doc)\n    db_session.commit()\n    return docs\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\ndef test_pruning_extracts_hierarchy_nodes(\n    db_session: Session,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"extract_ids_from_runnable_connector must separate hierarchy node IDs and\n    document IDs into the correct buckets of the SlimConnectorExtractionResult.\"\"\"\n    connector = MockSlimConnectorWithPermSync()\n\n    result = extract_ids_from_runnable_connector(connector, callback=None)\n\n    # raw_id_to_parent should contain ONLY document IDs, not hierarchy node IDs\n    assert result.raw_id_to_parent.keys() == set(SLIM_DOC_IDS)\n\n    # Hierarchy nodes should be the 3 channels\n    assert len(result.hierarchy_nodes) == 3\n    extracted_raw_ids = {n.raw_node_id for n in result.hierarchy_nodes}\n    assert extracted_raw_ids == {CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID}\n\n\ndef test_pruning_upserts_hierarchy_nodes_to_db(db_session: Session) -> None:\n    \"\"\"Full flow: extract hierarchy nodes from mock connector, upsert to Postgres,\n    then verify the DB state (node count, parent relationships, permissions).\"\"\"\n    _cleanup_test_data(db_session)\n\n    # Step 1: ensure the SOURCE node exists (mirrors what the pruning task does)\n    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n\n    # Step 2: extract from mock connector\n    connector = MockSlimConnectorWithPermSync()\n    result = extract_ids_from_runnable_connector(connector, callback=None)\n    assert len(result.hierarchy_nodes) == 3\n\n    # Step 3: upsert hierarchy nodes (public connector = False)\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=result.hierarchy_nodes,\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    assert len(upserted) == 3\n\n    # Step 4: verify DB state\n    all_nodes = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)\n    # 3 channel nodes + 1 SOURCE node\n    assert len(all_nodes) == 4\n\n    # Verify each channel node\n    channel_a = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)\n    assert channel_a is not None\n    assert channel_a.display_name == CHANNEL_A_NAME\n    assert channel_a.node_type == HierarchyNodeType.CHANNEL\n    assert channel_a.link == \"https://slack.example.com/channels/general\"\n    # Parent should be the SOURCE node (raw_parent_id was None)\n    assert channel_a.parent_id == source_node.id\n    # Permission fields for channel A (private, has user emails)\n    assert channel_a.is_public is False\n    assert channel_a.external_user_emails is not None\n    assert set(channel_a.external_user_emails) == {\n        \"alice@example.com\",\n        \"bob@example.com\",\n    }\n\n    channel_b = get_hierarchy_node_by_raw_id(db_session, CHANNEL_B_ID, TEST_SOURCE)\n    assert channel_b is not None\n    assert channel_b.display_name == CHANNEL_B_NAME\n    assert channel_b.parent_id == source_node.id\n    # Channel B has no external_access -> defaults to not public, no emails/groups\n    assert channel_b.is_public is False\n    assert channel_b.external_user_emails is None\n    assert channel_b.external_user_group_ids is None\n\n    channel_c = get_hierarchy_node_by_raw_id(db_session, CHANNEL_C_ID, TEST_SOURCE)\n    assert channel_c is not None\n    assert channel_c.display_name == CHANNEL_C_NAME\n    assert channel_c.parent_id == source_node.id\n    # Channel C is public and has a group\n    assert channel_c.is_public is True\n    assert channel_c.external_user_group_ids is not None\n    assert set(channel_c.external_user_group_ids) == {\"eng-team\"}\n\n\ndef test_pruning_upserts_hierarchy_nodes_public_connector(\n    db_session: Session,\n) -> None:\n    \"\"\"When the connector's access type is PUBLIC, all hierarchy nodes must be\n    marked is_public=True regardless of their external_access settings.\"\"\"\n    _cleanup_test_data(db_session)\n\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n\n    connector = MockSlimConnectorWithPermSync()\n    result = extract_ids_from_runnable_connector(connector, callback=None)\n\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=result.hierarchy_nodes,\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=True,\n    )\n    assert len(upserted) == 3\n\n    # Every node should be public\n    for node in upserted:\n        assert node.is_public is True\n        # Public connector forces emails/groups to None\n        assert node.external_user_emails is None\n        assert node.external_user_group_ids is None\n\n\ndef test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:\n    \"\"\"Upserting the same hierarchy nodes twice must not create duplicates.\n    The second call should update existing rows in place.\"\"\"\n    _cleanup_test_data(db_session)\n\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n\n    nodes = _make_hierarchy_nodes()\n\n    # First upsert\n    first_result = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=nodes,\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    first_ids = {n.id for n in first_result}\n    all_after_first = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)\n    count_after_first = len(all_after_first)\n\n    # Second upsert with the same nodes\n    second_result = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=nodes,\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    second_ids = {n.id for n in second_result}\n    all_after_second = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)\n    count_after_second = len(all_after_second)\n\n    # No new rows should have been created\n    assert count_after_first == count_after_second\n    # Same DB primary keys should have been returned\n    assert first_ids == second_ids\n\n\ndef test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> None:\n    \"\"\"Upserting a hierarchy node with changed fields should update the existing row.\"\"\"\n    _cleanup_test_data(db_session)\n\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n\n    original_node = PydanticHierarchyNode(\n        raw_node_id=CHANNEL_A_ID,\n        raw_parent_id=None,\n        display_name=CHANNEL_A_NAME,\n        link=\"https://slack.example.com/channels/general\",\n        node_type=HierarchyNodeType.CHANNEL,\n    )\n    upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=[original_node],\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n\n    # Now upsert again with updated display_name and permissions\n    updated_node = PydanticHierarchyNode(\n        raw_node_id=CHANNEL_A_ID,\n        raw_parent_id=None,\n        display_name=\"#general-renamed\",\n        link=\"https://slack.example.com/channels/general-renamed\",\n        node_type=HierarchyNodeType.CHANNEL,\n        external_access=ExternalAccess(\n            external_user_emails={\"new_user@example.com\"},\n            external_user_group_ids=set(),\n            is_public=True,\n        ),\n    )\n    upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=[updated_node],\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n\n    db_node = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)\n    assert db_node is not None\n    assert db_node.display_name == \"#general-renamed\"\n    assert db_node.link == \"https://slack.example.com/channels/general-renamed\"\n    assert db_node.is_public is True\n    assert db_node.external_user_emails is not None\n    assert set(db_node.external_user_emails) == {\"new_user@example.com\"}\n\n\n# ---------------------------------------------------------------------------\n# Document-to-hierarchy-node linkage tests\n# ---------------------------------------------------------------------------\n\n\ndef test_extraction_preserves_parent_hierarchy_raw_node_id(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"extract_ids_from_runnable_connector should carry the\n    parent_hierarchy_raw_node_id from SlimDocument into the raw_id_to_parent dict.\"\"\"\n    connector = MockSlimConnectorWithPermSync()\n    result = extract_ids_from_runnable_connector(connector, callback=None)\n\n    for doc_id, expected_parent in DOC_PARENT_MAP.items():\n        assert (\n            result.raw_id_to_parent[doc_id] == expected_parent\n        ), f\"raw_id_to_parent[{doc_id}] should be {expected_parent}\"\n\n    # Hierarchy node IDs should NOT be in raw_id_to_parent\n    for channel_id in [CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID]:\n        assert channel_id not in result.raw_id_to_parent\n\n\ndef test_update_document_parent_hierarchy_nodes(db_session: Session) -> None:\n    \"\"\"update_document_parent_hierarchy_nodes should set\n    Document.parent_hierarchy_node_id for each document in the mapping.\"\"\"\n    _cleanup_test_data(db_session)\n\n    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=_make_hierarchy_nodes(),\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}\n\n    # Create documents with no parent set\n    docs = _create_test_documents(db_session)\n    for doc in docs:\n        assert doc.parent_hierarchy_node_id is None\n\n    # Build resolved map (same logic as _resolve_and_update_document_parents)\n    resolved: dict[str, int | None] = {}\n    for doc_id, raw_parent in DOC_PARENT_MAP.items():\n        resolved[doc_id] = node_id_by_raw.get(raw_parent, source_node.id)\n\n    updated = update_document_parent_hierarchy_nodes(\n        db_session=db_session,\n        doc_parent_map=resolved,\n        commit=True,\n    )\n    assert updated == len(SLIM_DOC_IDS)\n\n    # Verify each document now points to the correct hierarchy node\n    db_session.expire_all()\n    for doc_id, raw_parent in DOC_PARENT_MAP.items():\n        tmp_doc = db_session.get(DbDocument, doc_id)\n        assert tmp_doc is not None\n        doc = tmp_doc\n        expected_node_id = node_id_by_raw[raw_parent]\n        assert (\n            doc.parent_hierarchy_node_id == expected_node_id\n        ), f\"Document {doc_id} should point to node for {raw_parent}\"\n\n\ndef test_update_document_parent_is_idempotent(db_session: Session) -> None:\n    \"\"\"Running update_document_parent_hierarchy_nodes a second time with the\n    same mapping should update zero rows.\"\"\"\n    _cleanup_test_data(db_session)\n\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=_make_hierarchy_nodes(),\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}\n    _create_test_documents(db_session)\n\n    resolved: dict[str, int | None] = {\n        doc_id: node_id_by_raw[raw_parent]\n        for doc_id, raw_parent in DOC_PARENT_MAP.items()\n    }\n\n    first_updated = update_document_parent_hierarchy_nodes(\n        db_session=db_session,\n        doc_parent_map=resolved,\n        commit=True,\n    )\n    assert first_updated == len(SLIM_DOC_IDS)\n\n    second_updated = update_document_parent_hierarchy_nodes(\n        db_session=db_session,\n        doc_parent_map=resolved,\n        commit=True,\n    )\n    assert second_updated == 0\n\n\ndef test_link_hierarchy_nodes_to_documents_for_confluence(\n    db_session: Session,\n) -> None:\n    \"\"\"For sources in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS (e.g. Confluence),\n    link_hierarchy_nodes_to_documents should set HierarchyNode.document_id\n    when a hierarchy node's raw_node_id matches a document ID.\"\"\"\n    _cleanup_test_data(db_session)\n    confluence_source = DocumentSource.CONFLUENCE\n\n    # Clean up any existing Confluence hierarchy nodes\n    db_session.query(DBHierarchyNode).filter(\n        DBHierarchyNode.source == confluence_source\n    ).delete()\n    db_session.commit()\n\n    ensure_source_node_exists(db_session, confluence_source, commit=True)\n\n    # Create a hierarchy node whose raw_node_id matches a document ID\n    page_node_id = \"confluence-page-123\"\n    nodes = [\n        PydanticHierarchyNode(\n            raw_node_id=page_node_id,\n            raw_parent_id=None,\n            display_name=\"Test Page\",\n            link=\"https://wiki.example.com/page/123\",\n            node_type=HierarchyNodeType.PAGE,\n        ),\n    ]\n    upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=nodes,\n        source=confluence_source,\n        commit=True,\n        is_connector_public=False,\n    )\n\n    # Verify the node exists but has no document_id yet\n    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)\n    assert db_node is not None\n    assert db_node.document_id is None\n\n    # Create a document with the same ID as the hierarchy node\n    doc = DbDocument(\n        id=page_node_id,\n        semantic_id=\"Test Page\",\n        kg_stage=KGStage.NOT_STARTED,\n    )\n    db_session.add(doc)\n    db_session.commit()\n\n    # Link nodes to documents\n    linked = link_hierarchy_nodes_to_documents(\n        db_session=db_session,\n        document_ids=[page_node_id],\n        source=confluence_source,\n        commit=True,\n    )\n    assert linked == 1\n\n    # Verify the hierarchy node now has document_id set\n    db_session.expire_all()\n    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)\n    assert db_node is not None\n    assert db_node.document_id == page_node_id\n\n    # Cleanup\n    db_session.query(DbDocument).filter(DbDocument.id == page_node_id).delete()\n    db_session.query(DBHierarchyNode).filter(\n        DBHierarchyNode.source == confluence_source\n    ).delete()\n    db_session.commit()\n\n\ndef test_link_hierarchy_nodes_skips_non_hierarchy_sources(\n    db_session: Session,\n) -> None:\n    \"\"\"link_hierarchy_nodes_to_documents should return 0 for sources that\n    don't support hierarchy-node-as-document (e.g. Slack, Google Drive).\"\"\"\n    linked = link_hierarchy_nodes_to_documents(\n        db_session=db_session,\n        document_ids=SLIM_DOC_IDS,\n        source=TEST_SOURCE,  # Slack — not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS\n        commit=False,\n    )\n    assert linked == 0\n\n\n# ---------------------------------------------------------------------------\n# Join table + pruning tests\n# ---------------------------------------------------------------------------\n\n\ndef test_upsert_hierarchy_node_cc_pair_entries(db_session: Session) -> None:\n    \"\"\"upsert_hierarchy_node_cc_pair_entries should insert rows and be idempotent.\"\"\"\n    _cleanup_test_data(db_session)\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    cc_pair = _create_cc_pair(db_session)\n\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=_make_hierarchy_nodes(),\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    node_ids = [n.id for n in upserted]\n\n    # First call — should insert rows\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=node_ids,\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        commit=True,\n    )\n\n    rows = (\n        db_session.query(HierarchyNodeByConnectorCredentialPair)\n        .filter(\n            HierarchyNodeByConnectorCredentialPair.connector_id == cc_pair.connector_id,\n            HierarchyNodeByConnectorCredentialPair.credential_id\n            == cc_pair.credential_id,\n        )\n        .all()\n    )\n    assert len(rows) == 3\n\n    # Second call — idempotent, same count\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=node_ids,\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        commit=True,\n    )\n    rows_after = (\n        db_session.query(HierarchyNodeByConnectorCredentialPair)\n        .filter(\n            HierarchyNodeByConnectorCredentialPair.connector_id == cc_pair.connector_id,\n            HierarchyNodeByConnectorCredentialPair.credential_id\n            == cc_pair.credential_id,\n        )\n        .all()\n    )\n    assert len(rows_after) == 3\n\n\ndef test_remove_stale_entries_and_delete_orphans(db_session: Session) -> None:\n    \"\"\"After removing stale join-table entries, orphaned hierarchy nodes should\n    be deleted and the SOURCE node should survive.\"\"\"\n    _cleanup_test_data(db_session)\n    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    cc_pair = _create_cc_pair(db_session)\n\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=_make_hierarchy_nodes(),\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    all_ids = [n.id for n in upserted]\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=all_ids,\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        commit=True,\n    )\n\n    # Now simulate a pruning run where only channel A survived\n    channel_a = get_hierarchy_node_by_raw_id(db_session, CHANNEL_A_ID, TEST_SOURCE)\n    assert channel_a is not None\n    live_ids = {channel_a.id}\n\n    stale_removed = remove_stale_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        live_hierarchy_node_ids=live_ids,\n        commit=True,\n    )\n    assert stale_removed == 2\n\n    # Delete orphaned nodes\n    deleted_raw_ids = delete_orphaned_hierarchy_nodes(\n        db_session=db_session,\n        source=TEST_SOURCE,\n        commit=True,\n    )\n    assert set(deleted_raw_ids) == {CHANNEL_B_ID, CHANNEL_C_ID}\n\n    # Verify only channel A + SOURCE remain\n    remaining = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)\n    remaining_raw = {n.raw_node_id for n in remaining}\n    assert remaining_raw == {CHANNEL_A_ID, source_node.raw_node_id}\n\n\ndef test_multi_cc_pair_prevents_premature_deletion(db_session: Session) -> None:\n    \"\"\"A hierarchy node shared by two cc_pairs should NOT be deleted when only\n    one cc_pair removes its association.\"\"\"\n    _cleanup_test_data(db_session)\n    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    cc_pair_1 = _create_cc_pair(db_session)\n    cc_pair_2 = _create_cc_pair(db_session)\n\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=_make_hierarchy_nodes(),\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    all_ids = [n.id for n in upserted]\n\n    # cc_pair 1 owns all 3\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=all_ids,\n        connector_id=cc_pair_1.connector_id,\n        credential_id=cc_pair_1.credential_id,\n        commit=True,\n    )\n    # cc_pair 2 also owns all 3\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=all_ids,\n        connector_id=cc_pair_2.connector_id,\n        credential_id=cc_pair_2.credential_id,\n        commit=True,\n    )\n\n    # cc_pair 1 prunes — keeps none\n    remove_stale_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        connector_id=cc_pair_1.connector_id,\n        credential_id=cc_pair_1.credential_id,\n        live_hierarchy_node_ids=set(),\n        commit=True,\n    )\n\n    # Orphan deletion should find nothing because cc_pair 2 still references them\n    deleted = delete_orphaned_hierarchy_nodes(\n        db_session=db_session,\n        source=TEST_SOURCE,\n        commit=True,\n    )\n    assert deleted == []\n\n    # All 3 nodes + SOURCE should still exist\n    remaining = get_all_hierarchy_nodes_for_source(db_session, TEST_SOURCE)\n    assert len(remaining) == 4\n\n\ndef test_reparent_orphaned_children(db_session: Session) -> None:\n    \"\"\"After deleting a parent hierarchy node, its children should be\n    re-parented to the SOURCE node.\"\"\"\n    _cleanup_test_data(db_session)\n    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)\n    cc_pair = _create_cc_pair(db_session)\n\n    # Create a parent node and a child node\n    parent_node = PydanticHierarchyNode(\n        raw_node_id=\"PARENT\",\n        raw_parent_id=None,\n        display_name=\"Parent\",\n        node_type=HierarchyNodeType.CHANNEL,\n    )\n    child_node = PydanticHierarchyNode(\n        raw_node_id=\"CHILD\",\n        raw_parent_id=\"PARENT\",\n        display_name=\"Child\",\n        node_type=HierarchyNodeType.CHANNEL,\n    )\n    upserted = upsert_hierarchy_nodes_batch(\n        db_session=db_session,\n        nodes=[parent_node, child_node],\n        source=TEST_SOURCE,\n        commit=True,\n        is_connector_public=False,\n    )\n    assert len(upserted) == 2\n\n    parent_db = get_hierarchy_node_by_raw_id(db_session, \"PARENT\", TEST_SOURCE)\n    child_db = get_hierarchy_node_by_raw_id(db_session, \"CHILD\", TEST_SOURCE)\n    assert parent_db is not None and child_db is not None\n    assert child_db.parent_id == parent_db.id\n\n    # Associate only the child with a cc_pair (parent is orphaned)\n    upsert_hierarchy_node_cc_pair_entries(\n        db_session=db_session,\n        hierarchy_node_ids=[child_db.id],\n        connector_id=cc_pair.connector_id,\n        credential_id=cc_pair.credential_id,\n        commit=True,\n    )\n\n    # Delete orphaned nodes (parent has no cc_pair entry)\n    deleted = delete_orphaned_hierarchy_nodes(\n        db_session=db_session,\n        source=TEST_SOURCE,\n        commit=True,\n    )\n    assert \"PARENT\" in deleted\n\n    # Child should now have parent_id=NULL (SET NULL cascade)\n    db_session.expire_all()\n    child_db = get_hierarchy_node_by_raw_id(db_session, \"CHILD\", TEST_SOURCE)\n    assert child_db is not None\n    assert child_db.parent_id is None\n\n    # Re-parent orphans to SOURCE\n    reparented = reparent_orphaned_hierarchy_nodes(\n        db_session=db_session,\n        source=TEST_SOURCE,\n        commit=True,\n    )\n    assert len(reparented) == 1\n\n    db_session.expire_all()\n    child_db = get_hierarchy_node_by_raw_id(db_session, \"CHILD\", TEST_SOURCE)\n    assert child_db is not None\n    assert child_db.parent_id == source_node.id\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py",
    "content": "\"\"\"\nExternal dependency unit tests for user file delete queue protections.\n\nVerifies that the three mechanisms added to check_for_user_file_delete work\ncorrectly:\n\n1. Queue depth backpressure – when the broker queue exceeds\n   USER_FILE_DELETE_MAX_QUEUE_DEPTH, no new tasks are enqueued.\n\n2. Per-file Redis guard key – if the guard key for a file already exists in\n   Redis, that file is skipped even though it is still in DELETING status.\n\n3. Task expiry – every send_task call carries expires=\n   CELERY_USER_FILE_DELETE_TASK_EXPIRES so that stale queued tasks are\n   discarded by workers automatically.\n\nAlso verifies that delete_user_file_impl clears the guard key the moment\nit is picked up by a worker.\n\nUses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile\nrows.  The Celery app is provided as a MagicMock injected via a PropertyMock\non the task class so no real broker is needed.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom unittest.mock import PropertyMock\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _user_file_delete_lock_key,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _user_file_delete_queued_key,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    check_for_user_file_delete,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_single_user_file_delete,\n)\nfrom onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom onyx.redis.redis_pool import get_redis_client\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n_PATCH_QUEUE_LEN = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length\"\n)\n\n\ndef _create_deleting_user_file(db_session: Session, user_id: object) -> UserFile:\n    \"\"\"Insert a UserFile in DELETING status and return it.\"\"\"\n    uf = UserFile(\n        id=uuid4(),\n        user_id=user_id,\n        file_id=f\"test_file_{uuid4().hex[:8]}\",\n        name=f\"test_{uuid4().hex[:8]}.txt\",\n        file_type=\"text/plain\",\n        status=UserFileStatus.DELETING,\n    )\n    db_session.add(uf)\n    db_session.commit()\n    db_session.refresh(uf)\n    return uf\n\n\n@contextmanager\ndef _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:\n    \"\"\"Patch the ``app`` property on *task*'s class so that ``self.app``\n    inside the task function returns *mock_app*.\n\n    With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is\n    the actual task instance.  We patch ``app`` on that instance's class\n    (a unique Celery-generated Task subclass) so the mock is scoped to this\n    task only.\n\n    Also patches ``celery_get_broker_client`` so the mock app doesn't need\n    a real broker URL.\n    \"\"\"\n    task_instance = task.run.__self__\n    with (\n        patch.object(\n            type(task_instance),\n            \"app\",\n            new_callable=PropertyMock,\n            return_value=mock_app,\n        ),\n        patch(\n            \"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client\",\n            return_value=MagicMock(),\n        ),\n    ):\n        yield\n\n\n# ---------------------------------------------------------------------------\n# Test classes\n# ---------------------------------------------------------------------------\n\n\nclass TestDeleteQueueDepthBackpressure:\n    \"\"\"Protection 1: skip all enqueuing when the broker queue is too deep.\"\"\"\n\n    def test_no_tasks_enqueued_when_queue_over_limit(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"When the queue depth exceeds the limit the beat cycle is skipped.\"\"\"\n        user = create_test_user(db_session, \"del_bp_user\")\n        _create_deleting_user_file(db_session, user.id)\n\n        mock_app = MagicMock()\n\n        with (\n            _patch_task_app(check_for_user_file_delete, mock_app),\n            patch(_PATCH_QUEUE_LEN, return_value=USER_FILE_DELETE_MAX_QUEUE_DEPTH + 1),\n        ):\n            check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)\n\n        mock_app.send_task.assert_not_called()\n\n\nclass TestDeletePerFileGuardKey:\n    \"\"\"Protection 2: per-file Redis guard key prevents duplicate enqueue.\"\"\"\n\n    def test_guarded_file_not_re_enqueued(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file whose guard key is already set in Redis is skipped.\"\"\"\n        user = create_test_user(db_session, \"del_guard_user\")\n        uf = _create_deleting_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_delete_queued_key(uf.id)\n        redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_for_user_file_delete, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)\n\n            # send_task must not have been called with this specific file's ID\n            for call in mock_app.send_task.call_args_list:\n                kwargs = call.kwargs.get(\"kwargs\", {})\n                assert kwargs.get(\"user_file_id\") != str(\n                    uf.id\n                ), f\"File {uf.id} should have been skipped because its guard key exists\"\n        finally:\n            redis_client.delete(guard_key)\n\n    def test_guard_key_exists_in_redis_after_enqueue(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"After a file is enqueued its guard key is present in Redis with a TTL.\"\"\"\n        user = create_test_user(db_session, \"del_guard_set_user\")\n        uf = _create_deleting_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_delete_queued_key(uf.id)\n        redis_client.delete(guard_key)  # clean slate\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_for_user_file_delete, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)\n\n            assert redis_client.exists(\n                guard_key\n            ), \"Guard key should be set in Redis after enqueue\"\n            ttl = int(redis_client.ttl(guard_key))  # type: ignore[arg-type]\n            assert (\n                0 < ttl <= CELERY_USER_FILE_DELETE_TASK_EXPIRES\n            ), f\"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_DELETE_TASK_EXPIRES}]\"\n        finally:\n            redis_client.delete(guard_key)\n\n\nclass TestDeleteTaskExpiry:\n    \"\"\"Protection 3: every send_task call includes an expires value.\"\"\"\n\n    def test_send_task_called_with_expires(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"send_task is called with the correct queue, task name, and expires.\"\"\"\n        user = create_test_user(db_session, \"del_expires_user\")\n        uf = _create_deleting_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_delete_queued_key(uf.id)\n        redis_client.delete(guard_key)\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_for_user_file_delete, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)\n\n            # At least one task should have been submitted (for our file)\n            assert (\n                mock_app.send_task.call_count >= 1\n            ), \"Expected at least one task to be submitted\"\n\n            # Every submitted task must carry expires\n            for call in mock_app.send_task.call_args_list:\n                assert call.args[0] == OnyxCeleryTask.DELETE_SINGLE_USER_FILE\n                assert call.kwargs.get(\"queue\") == OnyxCeleryQueues.USER_FILE_DELETE\n                assert (\n                    call.kwargs.get(\"expires\") == CELERY_USER_FILE_DELETE_TASK_EXPIRES\n                ), \"Task must be submitted with the correct expires value to prevent stale task accumulation\"\n        finally:\n            redis_client.delete(guard_key)\n\n\nclass TestDeleteWorkerClearsGuardKey:\n    \"\"\"process_single_user_file_delete removes the guard key when it picks up a task.\"\"\"\n\n    def test_guard_key_deleted_on_pickup(\n        self,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"The guard key is deleted before the worker does any real work.\n\n        We simulate an already-locked file so delete_user_file_impl returns\n        early – but crucially, after the guard key deletion.\n        \"\"\"\n        user_file_id = str(uuid4())\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_delete_queued_key(user_file_id)\n\n        # Simulate the guard key set when the beat enqueued the task\n        redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)\n        assert redis_client.exists(guard_key), \"Guard key must exist before pickup\"\n\n        # Hold the per-file delete lock so the worker exits early without\n        # touching the database or file store.\n        lock_key = _user_file_delete_lock_key(user_file_id)\n        delete_lock = redis_client.lock(lock_key, timeout=10)\n        acquired = delete_lock.acquire(blocking=False)\n        assert acquired, \"Should be able to acquire the delete lock for this test\"\n\n        try:\n            process_single_user_file_delete.run(\n                user_file_id=user_file_id,\n                tenant_id=TEST_TENANT_ID,\n            )\n        finally:\n            if delete_lock.owned():\n                delete_lock.release()\n\n        assert not redis_client.exists(\n            guard_key\n        ), \"Guard key should be deleted when the worker picks up the task\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py",
    "content": "\"\"\"\nExternal dependency unit tests for UserFileIndexingAdapter metadata writing.\n\nValidates that prepare_enrichment produces DocMetadataAwareIndexChunk\nobjects with both `user_project` and `personas` fields populated correctly\nbased on actual DB associations.\n\nUses real PostgreSQL for UserFile/Persona/UserProject rows.\nMocks the LLM tokenizer and file store since they are not relevant here.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__UserFile\nfrom onyx.db.models import Project__UserFile\nfrom onyx.db.models import User\nfrom onyx.db.models import UserFile\nfrom onyx.db.models import UserProject\nfrom onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter\nfrom onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import IndexChunk\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _create_user_file(db_session: Session, user: User) -> UserFile:\n    uf = UserFile(\n        id=uuid4(),\n        user_id=user.id,\n        file_id=f\"test_file_{uuid4().hex[:8]}\",\n        name=f\"test_{uuid4().hex[:8]}.txt\",\n        file_type=\"text/plain\",\n        status=UserFileStatus.COMPLETED,\n        chunk_count=1,\n    )\n    db_session.add(uf)\n    db_session.commit()\n    db_session.refresh(uf)\n    return uf\n\n\ndef _create_persona(db_session: Session, user: User) -> Persona:\n    persona = Persona(\n        name=f\"Test Persona {uuid4().hex[:8]}\",\n        description=\"Test persona\",\n        system_prompt=\"test\",\n        task_prompt=\"test\",\n        tools=[],\n        document_sets=[],\n        users=[user],\n        groups=[],\n        is_listed=True,\n        is_public=True,\n        display_priority=None,\n        starter_messages=None,\n        deleted=False,\n        user_id=user.id,\n    )\n    db_session.add(persona)\n    db_session.commit()\n    db_session.refresh(persona)\n    return persona\n\n\ndef _create_project(db_session: Session, user: User) -> UserProject:\n    project = UserProject(\n        user_id=user.id,\n        name=f\"project-{uuid4().hex[:8]}\",\n        instructions=\"\",\n    )\n    db_session.add(project)\n    db_session.commit()\n    db_session.refresh(project)\n    return project\n\n\ndef _make_index_chunk(user_file: UserFile) -> IndexChunk:\n    \"\"\"Build a minimal IndexChunk whose source document ID matches the UserFile.\"\"\"\n    doc = Document(\n        id=str(user_file.id),\n        source=DocumentSource.USER_FILE,\n        semantic_identifier=user_file.name,\n        sections=[TextSection(text=\"test chunk content\", link=None)],\n        metadata={},\n    )\n    return IndexChunk(\n        source_document=doc,\n        chunk_id=0,\n        blurb=\"test chunk\",\n        content=\"test chunk content\",\n        source_links={0: \"\"},\n        image_file_id=None,\n        section_continuation=False,\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        contextual_rag_reserved_tokens=0,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        embeddings=ChunkEmbedding(\n            full_embedding=[0.0] * 768,\n            mini_chunk_embeddings=[],\n        ),\n        title_embedding=None,\n    )\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\nclass TestAdapterWritesBothMetadataFields:\n    \"\"\"prepare_enrichment must populate user_project AND personas.\"\"\"\n\n    @patch(\n        \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n        side_effect=Exception(\"no LLM in test\"),\n    )\n    def test_file_linked_to_persona_gets_persona_id(\n        self,\n        _mock_llm: MagicMock,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        user = create_test_user(db_session, \"adapter_persona\")\n        uf = _create_user_file(db_session, user)\n        persona = _create_persona(db_session, user)\n\n        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))\n        db_session.commit()\n\n        adapter = UserFileIndexingAdapter(\n            tenant_id=TEST_TENANT_ID, db_session=db_session\n        )\n        chunk = _make_index_chunk(uf)\n        doc = chunk.source_document\n        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})\n\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=TEST_TENANT_ID,\n            chunks=[chunk],\n        )\n        aware_chunk = enricher.enrich_chunk(chunk, 1.0)\n\n        assert persona.id in aware_chunk.personas\n        assert aware_chunk.user_project == []\n\n    @patch(\n        \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n        side_effect=Exception(\"no LLM in test\"),\n    )\n    def test_file_linked_to_project_gets_project_id(\n        self,\n        _mock_llm: MagicMock,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        user = create_test_user(db_session, \"adapter_project\")\n        uf = _create_user_file(db_session, user)\n        project = _create_project(db_session, user)\n\n        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))\n        db_session.commit()\n\n        adapter = UserFileIndexingAdapter(\n            tenant_id=TEST_TENANT_ID, db_session=db_session\n        )\n        chunk = _make_index_chunk(uf)\n        context = DocumentBatchPrepareContext(\n            updatable_docs=[chunk.source_document], id_to_boost_map={}\n        )\n\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=TEST_TENANT_ID,\n            chunks=[chunk],\n        )\n        aware_chunk = enricher.enrich_chunk(chunk, 1.0)\n\n        assert project.id in aware_chunk.user_project\n        assert aware_chunk.personas == []\n\n    @patch(\n        \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n        side_effect=Exception(\"no LLM in test\"),\n    )\n    def test_file_linked_to_both_gets_both_ids(\n        self,\n        _mock_llm: MagicMock,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        user = create_test_user(db_session, \"adapter_both\")\n        uf = _create_user_file(db_session, user)\n        persona = _create_persona(db_session, user)\n        project = _create_project(db_session, user)\n\n        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))\n        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))\n        db_session.commit()\n\n        adapter = UserFileIndexingAdapter(\n            tenant_id=TEST_TENANT_ID, db_session=db_session\n        )\n        chunk = _make_index_chunk(uf)\n        context = DocumentBatchPrepareContext(\n            updatable_docs=[chunk.source_document], id_to_boost_map={}\n        )\n\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=TEST_TENANT_ID,\n            chunks=[chunk],\n        )\n        aware_chunk = enricher.enrich_chunk(chunk, 1.0)\n\n        assert persona.id in aware_chunk.personas\n        assert project.id in aware_chunk.user_project\n\n    @patch(\n        \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n        side_effect=Exception(\"no LLM in test\"),\n    )\n    def test_file_with_no_associations_gets_empty_lists(\n        self,\n        _mock_llm: MagicMock,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        user = create_test_user(db_session, \"adapter_empty\")\n        uf = _create_user_file(db_session, user)\n\n        adapter = UserFileIndexingAdapter(\n            tenant_id=TEST_TENANT_ID, db_session=db_session\n        )\n        chunk = _make_index_chunk(uf)\n        context = DocumentBatchPrepareContext(\n            updatable_docs=[chunk.source_document], id_to_boost_map={}\n        )\n\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=TEST_TENANT_ID,\n            chunks=[chunk],\n        )\n        aware_chunk = enricher.enrich_chunk(chunk, 1.0)\n\n        assert aware_chunk.personas == []\n        assert aware_chunk.user_project == []\n\n    @patch(\n        \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n        side_effect=Exception(\"no LLM in test\"),\n    )\n    def test_multiple_personas_all_appear(\n        self,\n        _mock_llm: MagicMock,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file linked to multiple personas should have all their IDs.\"\"\"\n        user = create_test_user(db_session, \"adapter_multi\")\n        uf = _create_user_file(db_session, user)\n        persona_a = _create_persona(db_session, user)\n        persona_b = _create_persona(db_session, user)\n\n        db_session.add(Persona__UserFile(persona_id=persona_a.id, user_file_id=uf.id))\n        db_session.add(Persona__UserFile(persona_id=persona_b.id, user_file_id=uf.id))\n        db_session.commit()\n\n        adapter = UserFileIndexingAdapter(\n            tenant_id=TEST_TENANT_ID, db_session=db_session\n        )\n        chunk = _make_index_chunk(uf)\n        context = DocumentBatchPrepareContext(\n            updatable_docs=[chunk.source_document], id_to_boost_map={}\n        )\n\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=TEST_TENANT_ID,\n            chunks=[chunk],\n        )\n        aware_chunk = enricher.enrich_chunk(chunk, 1.0)\n\n        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py",
    "content": "\"\"\"\nExternal dependency unit tests for user file processing queue protections.\n\nVerifies that the three mechanisms added to check_user_file_processing work\ncorrectly:\n\n1. Queue depth backpressure – when the broker queue exceeds\n   USER_FILE_PROCESSING_MAX_QUEUE_DEPTH, no new tasks are enqueued.\n\n2. Per-file Redis guard key – if the guard key for a file already exists in\n   Redis, that file is skipped even though it is still in PROCESSING status.\n\n3. Task expiry – every send_task call carries expires=\n   CELERY_USER_FILE_PROCESSING_TASK_EXPIRES so that stale queued tasks are\n   discarded by workers automatically.\n\nAlso verifies that process_single_user_file clears the guard key the moment\nit is picked up by a worker.\n\nUses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile\nrows.  The Celery app is provided as a MagicMock injected via a PropertyMock\non the task class so no real broker is needed.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom unittest.mock import PropertyMock\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _user_file_lock_key,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _user_file_queued_key,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    check_user_file_processing,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_single_user_file,\n)\nfrom onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH\nfrom onyx.db.enums import UserFileStatus\nfrom onyx.db.models import UserFile\nfrom onyx.redis.redis_pool import get_redis_client\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n_PATCH_QUEUE_LEN = (\n    \"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length\"\n)\n\n\ndef _create_processing_user_file(db_session: Session, user_id: object) -> UserFile:\n    \"\"\"Insert a UserFile in PROCESSING status and return it.\"\"\"\n    uf = UserFile(\n        id=uuid4(),\n        user_id=user_id,\n        file_id=f\"test_file_{uuid4().hex[:8]}\",\n        name=f\"test_{uuid4().hex[:8]}.txt\",\n        file_type=\"text/plain\",\n        status=UserFileStatus.PROCESSING,\n    )\n    db_session.add(uf)\n    db_session.commit()\n    db_session.refresh(uf)\n    return uf\n\n\n@contextmanager\ndef _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:\n    \"\"\"Patch the ``app`` property on *task*'s class so that ``self.app``\n    inside the task function returns *mock_app*.\n\n    With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is\n    the actual task instance.  We patch ``app`` on that instance's class\n    (a unique Celery-generated Task subclass) so the mock is scoped to this\n    task only.\n    \"\"\"\n    task_instance = task.run.__self__\n    with (\n        patch.object(\n            type(task_instance),\n            \"app\",\n            new_callable=PropertyMock,\n            return_value=mock_app,\n        ),\n        patch(\n            \"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client\",\n            return_value=MagicMock(),\n        ),\n    ):\n        yield\n\n\n# ---------------------------------------------------------------------------\n# Test classes\n# ---------------------------------------------------------------------------\n\n\nclass TestQueueDepthBackpressure:\n    \"\"\"Protection 1: skip all enqueuing when the broker queue is too deep.\"\"\"\n\n    def test_no_tasks_enqueued_when_queue_over_limit(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"When the queue depth exceeds the limit the beat cycle is skipped.\"\"\"\n        user = create_test_user(db_session, \"bp_user\")\n        _create_processing_user_file(db_session, user.id)\n\n        mock_app = MagicMock()\n\n        with (\n            _patch_task_app(check_user_file_processing, mock_app),\n            patch(\n                _PATCH_QUEUE_LEN, return_value=USER_FILE_PROCESSING_MAX_QUEUE_DEPTH + 1\n            ),\n        ):\n            check_user_file_processing.run(tenant_id=TEST_TENANT_ID)\n\n        mock_app.send_task.assert_not_called()\n\n\nclass TestPerFileGuardKey:\n    \"\"\"Protection 2: per-file Redis guard key prevents duplicate enqueue.\"\"\"\n\n    def test_guarded_file_not_re_enqueued(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A file whose guard key is already set in Redis is skipped.\"\"\"\n        user = create_test_user(db_session, \"guard_user\")\n        uf = _create_processing_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_queued_key(uf.id)\n        redis_client.setex(guard_key, CELERY_USER_FILE_PROCESSING_TASK_EXPIRES, 1)\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_user_file_processing, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)\n\n            # send_task must not have been called with this specific file's ID\n            for call in mock_app.send_task.call_args_list:\n                kwargs = call.kwargs.get(\"kwargs\", {})\n                assert kwargs.get(\"user_file_id\") != str(\n                    uf.id\n                ), f\"File {uf.id} should have been skipped because its guard key exists\"\n        finally:\n            redis_client.delete(guard_key)\n\n    def test_guard_key_exists_in_redis_after_enqueue(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"After a file is enqueued its guard key is present in Redis with a TTL.\"\"\"\n        user = create_test_user(db_session, \"guard_set_user\")\n        uf = _create_processing_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_queued_key(uf.id)\n        redis_client.delete(guard_key)  # clean slate\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_user_file_processing, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)\n\n            assert redis_client.exists(\n                guard_key\n            ), \"Guard key should be set in Redis after enqueue\"\n            ttl = int(redis_client.ttl(guard_key))  # type: ignore[arg-type]\n            assert (\n                0 < ttl <= CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\n            ), f\"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_PROCESSING_TASK_EXPIRES}]\"\n        finally:\n            redis_client.delete(guard_key)\n\n\nclass TestTaskExpiry:\n    \"\"\"Protection 3: every send_task call includes an expires value.\"\"\"\n\n    def test_send_task_called_with_expires(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"send_task is called with the correct queue, task name, and expires.\"\"\"\n        user = create_test_user(db_session, \"expires_user\")\n        uf = _create_processing_user_file(db_session, user.id)\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_queued_key(uf.id)\n        redis_client.delete(guard_key)\n\n        mock_app = MagicMock()\n\n        try:\n            with (\n                _patch_task_app(check_user_file_processing, mock_app),\n                patch(_PATCH_QUEUE_LEN, return_value=0),\n            ):\n                check_user_file_processing.run(tenant_id=TEST_TENANT_ID)\n\n            # At least one task should have been submitted (for our file)\n            assert (\n                mock_app.send_task.call_count >= 1\n            ), \"Expected at least one task to be submitted\"\n\n            # Every submitted task must carry expires\n            for call in mock_app.send_task.call_args_list:\n                assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE\n                assert call.kwargs.get(\"queue\") == OnyxCeleryQueues.USER_FILE_PROCESSING\n                assert (\n                    call.kwargs.get(\"expires\")\n                    == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\n                ), \"Task must be submitted with the correct expires value to prevent stale task accumulation\"\n        finally:\n            redis_client.delete(guard_key)\n\n\nclass TestWorkerClearsGuardKey:\n    \"\"\"process_single_user_file removes the guard key when it picks up a task.\"\"\"\n\n    def test_guard_key_deleted_on_pickup(\n        self,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"The guard key is deleted before the worker does any real work.\n\n        We simulate an already-locked file so process_single_user_file returns\n        early – but crucially, after the guard key deletion.\n        \"\"\"\n        user_file_id = str(uuid4())\n\n        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)\n        guard_key = _user_file_queued_key(user_file_id)\n\n        # Simulate the guard key set when the beat enqueued the task\n        redis_client.setex(guard_key, CELERY_USER_FILE_PROCESSING_TASK_EXPIRES, 1)\n        assert redis_client.exists(guard_key), \"Guard key must exist before pickup\"\n\n        # Hold the per-file processing lock so the worker exits early without\n        # touching the database or file store.\n        lock_key = _user_file_lock_key(user_file_id)\n        processing_lock = redis_client.lock(lock_key, timeout=10)\n        acquired = processing_lock.acquire(blocking=False)\n        assert acquired, \"Should be able to acquire the processing lock for this test\"\n\n        try:\n            process_single_user_file.run(\n                user_file_id=user_file_id,\n                tenant_id=TEST_TENANT_ID,\n            )\n        finally:\n            if processing_lock.owned():\n                processing_lock.release()\n\n        assert not redis_client.exists(\n            guard_key\n        ), \"Guard key should be deleted when the worker picks up the task\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/chat/test_user_reminder_message_type.py",
    "content": "\"\"\"\nTests for the USER_REMINDER message type handling in translate_history_to_llm_format.\n\nThese tests verify that:\n1. USER_REMINDER messages are wrapped with <system-reminder> tags\n2. The wrapped messages are converted to UserMessage type for the LLM\n3. The tags are properly applied around the message content\n4. CODE_BLOCK_MARKDOWN is prepended to system messages for models that need it\n\"\"\"\n\nimport pytest\n\nfrom onyx.chat.llm_step import translate_history_to_llm_format\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN\nfrom onyx.prompts.constants import SYSTEM_REMINDER_TAG_CLOSE\nfrom onyx.prompts.constants import SYSTEM_REMINDER_TAG_OPEN\n\n\ndef _ensure_list(\n    result: list[ChatCompletionMessage] | ChatCompletionMessage,\n) -> list[ChatCompletionMessage]:\n    \"\"\"Convert LanguageModelInput to a list for easier testing.\"\"\"\n    if isinstance(result, list):\n        return result\n    return [result]\n\n\n@pytest.fixture\ndef mock_llm_config() -> LLMConfig:\n    \"\"\"Create a minimal LLMConfig for testing.\"\"\"\n    return LLMConfig(\n        model_provider=\"openai\",\n        model_name=\"gpt-4o-mini\",\n        temperature=0.7,\n        api_key=\"test-key\",\n        api_base=None,\n        api_version=None,\n        max_input_tokens=128000,\n    )\n\n\nclass TestUserReminderMessageType:\n    \"\"\"Tests for USER_REMINDER message handling in translate_history_to_llm_format.\"\"\"\n\n    def test_user_reminder_wrapped_with_tags(self, mock_llm_config: LLMConfig) -> None:\n        \"\"\"Test that USER_REMINDER messages are wrapped with system-reminder tags.\"\"\"\n        reminder_text = \"Remember to cite your sources.\"\n        history = [\n            ChatMessageSimple(\n                message=reminder_text,\n                token_count=10,\n                message_type=MessageType.USER_REMINDER,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, mock_llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, UserMessage)\n        assert msg.role == \"user\"\n        # Verify the content starts and ends with the proper tags\n        assert isinstance(msg.content, str)\n        assert msg.content.startswith(SYSTEM_REMINDER_TAG_OPEN)\n        assert msg.content.endswith(SYSTEM_REMINDER_TAG_CLOSE)\n        # Verify the original message is inside the tags\n        assert reminder_text in msg.content\n\n    def test_user_reminder_tag_format(self, mock_llm_config: LLMConfig) -> None:\n        \"\"\"Test the exact format of the system-reminder tag wrapping.\"\"\"\n        reminder_text = \"This is a test reminder.\"\n        history = [\n            ChatMessageSimple(\n                message=reminder_text,\n                token_count=10,\n                message_type=MessageType.USER_REMINDER,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, mock_llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, UserMessage)\n        expected_content = (\n            f\"{SYSTEM_REMINDER_TAG_OPEN}\\n{reminder_text}\\n{SYSTEM_REMINDER_TAG_CLOSE}\"\n        )\n        assert msg.content == expected_content\n\n    def test_user_reminder_converted_to_user_message(\n        self, mock_llm_config: LLMConfig\n    ) -> None:\n        \"\"\"Test that USER_REMINDER is converted to UserMessage (not a different type).\"\"\"\n        history = [\n            ChatMessageSimple(\n                message=\"Test reminder\",\n                token_count=5,\n                message_type=MessageType.USER_REMINDER,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, mock_llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        # Should be a UserMessage since LLM APIs don't have a native reminder type\n        assert isinstance(result[0], UserMessage)\n        assert result[0].role == \"user\"\n\n    def test_user_reminder_in_mixed_history(self, mock_llm_config: LLMConfig) -> None:\n        \"\"\"Test USER_REMINDER handling when mixed with other message types.\"\"\"\n        history = [\n            ChatMessageSimple(\n                message=\"You are a helpful assistant.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            ),\n            ChatMessageSimple(\n                message=\"Hello!\",\n                token_count=5,\n                message_type=MessageType.USER,\n            ),\n            ChatMessageSimple(\n                message=\"Hi there! How can I help?\",\n                token_count=10,\n                message_type=MessageType.ASSISTANT,\n            ),\n            ChatMessageSimple(\n                message=\"Remember to be concise.\",\n                token_count=8,\n                message_type=MessageType.USER_REMINDER,\n            ),\n        ]\n\n        raw_result = translate_history_to_llm_format(history, mock_llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 4\n        # Check the reminder message (last one)\n        reminder_msg = result[3]\n        assert isinstance(reminder_msg, UserMessage)\n        assert isinstance(reminder_msg.content, str)\n        assert reminder_msg.content.startswith(SYSTEM_REMINDER_TAG_OPEN)\n        assert reminder_msg.content.endswith(SYSTEM_REMINDER_TAG_CLOSE)\n        assert \"Remember to be concise.\" in reminder_msg.content\n\n        # Check that regular USER message is NOT wrapped\n        user_msg = result[1]\n        assert isinstance(user_msg, UserMessage)\n        assert user_msg.content == \"Hello!\"  # No tags\n\n    def test_regular_user_message_not_wrapped(self, mock_llm_config: LLMConfig) -> None:\n        \"\"\"Test that regular USER messages are NOT wrapped with system-reminder tags.\"\"\"\n        history = [\n            ChatMessageSimple(\n                message=\"This is a normal user message.\",\n                token_count=10,\n                message_type=MessageType.USER,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, mock_llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, UserMessage)\n        # Regular user message should NOT have the tags\n        assert isinstance(msg.content, str)\n        assert SYSTEM_REMINDER_TAG_OPEN not in msg.content\n        assert SYSTEM_REMINDER_TAG_CLOSE not in msg.content\n        assert msg.content == \"This is a normal user message.\"\n\n\ndef _create_llm_config(model_name: str) -> LLMConfig:\n    \"\"\"Create a LLMConfig with the specified model name.\"\"\"\n    return LLMConfig(\n        model_provider=\"openai\",\n        model_name=model_name,\n        temperature=0.7,\n        api_key=\"test-key\",\n        api_base=None,\n        api_version=None,\n        max_input_tokens=128000,\n    )\n\n\nclass TestCodeBlockMarkdownFormatting:\n    \"\"\"Tests for CODE_BLOCK_MARKDOWN prefix handling in translate_history_to_llm_format.\n\n    OpenAI reasoning models (o1, o3, gpt-5) need a \"Formatting re-enabled. \" prefix\n    in their system messages for correct markdown generation.\n    \"\"\"\n\n    def test_o1_model_prepends_markdown_to_string(self) -> None:\n        \"\"\"Test that o1 model prepends CODE_BLOCK_MARKDOWN to string system message.\"\"\"\n        llm_config = _create_llm_config(\"o1\")\n        history = [\n            ChatMessageSimple(\n                message=\"You are a helpful assistant.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, SystemMessage)\n        assert isinstance(msg.content, str)\n        assert msg.content == CODE_BLOCK_MARKDOWN + \"You are a helpful assistant.\"\n\n    def test_o3_model_prepends_markdown(self) -> None:\n        \"\"\"Test that o3 model prepends CODE_BLOCK_MARKDOWN to system message.\"\"\"\n        llm_config = _create_llm_config(\"o3-mini\")\n        history = [\n            ChatMessageSimple(\n                message=\"System prompt here.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, SystemMessage)\n        assert isinstance(msg.content, str)\n        assert msg.content.startswith(CODE_BLOCK_MARKDOWN)\n\n    def test_gpt5_model_prepends_markdown(self) -> None:\n        \"\"\"Test that gpt-5 model prepends CODE_BLOCK_MARKDOWN to system message.\"\"\"\n        llm_config = _create_llm_config(\"gpt-5\")\n        history = [\n            ChatMessageSimple(\n                message=\"System prompt here.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, SystemMessage)\n        assert isinstance(msg.content, str)\n        assert msg.content.startswith(CODE_BLOCK_MARKDOWN)\n\n    def test_gpt4o_does_not_prepend(self) -> None:\n        \"\"\"Test that gpt-4o model does NOT prepend CODE_BLOCK_MARKDOWN.\"\"\"\n        llm_config = _create_llm_config(\"gpt-4o\")\n        history = [\n            ChatMessageSimple(\n                message=\"You are a helpful assistant.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, SystemMessage)\n        assert isinstance(msg.content, str)\n        # Should NOT have the prefix\n        assert msg.content == \"You are a helpful assistant.\"\n        assert not msg.content.startswith(CODE_BLOCK_MARKDOWN)\n\n    def test_no_system_message_no_crash(self) -> None:\n        \"\"\"Test that history without system message doesn't crash.\"\"\"\n        llm_config = _create_llm_config(\"o1\")\n        history = [\n            ChatMessageSimple(\n                message=\"Hello!\",\n                token_count=5,\n                message_type=MessageType.USER,\n            )\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 1\n        msg = result[0]\n        assert isinstance(msg, UserMessage)\n        assert msg.content == \"Hello!\"\n\n    def test_only_first_system_message_modified(self) -> None:\n        \"\"\"Test that only the first system message gets the prefix.\"\"\"\n        llm_config = _create_llm_config(\"o1\")\n        history = [\n            ChatMessageSimple(\n                message=\"First system prompt.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            ),\n            ChatMessageSimple(\n                message=\"Hello!\",\n                token_count=5,\n                message_type=MessageType.USER,\n            ),\n            ChatMessageSimple(\n                message=\"Second system prompt.\",\n                token_count=10,\n                message_type=MessageType.SYSTEM,\n            ),\n        ]\n\n        raw_result = translate_history_to_llm_format(history, llm_config)\n        result = _ensure_list(raw_result)\n\n        assert len(result) == 3\n        # First system message should have prefix\n        first_sys = result[0]\n        assert isinstance(first_sys, SystemMessage)\n        assert isinstance(first_sys.content, str)\n        assert first_sys.content.startswith(CODE_BLOCK_MARKDOWN)\n        # Second system message should NOT have prefix (only first one is modified)\n        second_sys = result[2]\n        assert isinstance(second_sys, SystemMessage)\n        assert isinstance(second_sys.content, str)\n        assert not second_sys.content.startswith(CODE_BLOCK_MARKDOWN)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/conftest.py",
    "content": "from collections.abc import Generator\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom onyx.file_store.file_store import get_default_file_store\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\nfrom tests.external_dependency_unit.full_setup import (\n    ensure_full_deployment_setup,\n)\n\n\n@pytest.fixture(scope=\"function\")\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create a database session for testing using the actual PostgreSQL database\"\"\"\n    # Make sure that the db engine is initialized before any tests are run\n    SqlEngine.init_engine(\n        pool_size=10,\n        max_overflow=5,\n    )\n    with get_session_with_current_tenant() as session:\n        yield session\n\n\n@pytest.fixture(scope=\"session\")\ndef full_deployment_setup() -> Generator[None, None, None]:\n    \"\"\"Optional fixture to perform full deployment-like setup on demand.\n\n    Import and call tests.external_dependency_unit.startup.full_setup.ensure_full_deployment_setup\n    to initialize Postgres defaults, Vespa indices, and seed initial docs.\n    \"\"\"\n    ensure_full_deployment_setup()\n    yield\n\n\n@pytest.fixture(scope=\"function\")\ndef tenant_context() -> Generator[None, None, None]:\n    \"\"\"Set up tenant context for testing\"\"\"\n    # Set the tenant context for the test\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        # Reset the tenant context after the test\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef create_test_user(\n    db_session: Session,\n    email_prefix: str,\n    role: UserRole = UserRole.BASIC,\n    account_type: AccountType = AccountType.STANDARD,\n) -> User:\n    \"\"\"Helper to create a test user with a unique email\"\"\"\n    # Use UUID to ensure unique email addresses\n    unique_email = f\"{email_prefix}_{uuid4().hex[:8]}@example.com\"\n\n    password_helper = PasswordHelper()\n    password = password_helper.generate()\n    hashed_password = password_helper.hash(password)\n\n    user = User(\n        id=uuid4(),\n        email=unique_email,\n        hashed_password=hashed_password,\n        is_active=True,\n        is_superuser=False,\n        is_verified=True,\n        role=role,\n        account_type=account_type,\n    )\n    db_session.add(user)\n    db_session.commit()\n    db_session.refresh(user)\n    return user\n\n\n@pytest.fixture(scope=\"module\")\ndef initialize_file_store() -> Generator[None, None, None]:\n    \"\"\"Initialize the file store for testing.\n\n    Scoped to module level since file store initialization is idempotent\n    and doesn't need to be reset between tests.\n    \"\"\"\n    get_default_file_store().initialize()\n    yield\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/confluence/conftest.py",
    "content": "import os\nfrom typing import Any\n\nimport pytest\n\n\n@pytest.fixture\ndef confluence_connector_config() -> dict[str, Any]:\n    url_base = os.environ.get(\"CONFLUENCE_TEST_SPACE_URL\")\n    space_key = os.environ.get(\"CONFLUENCE_SPACE_KEY\")\n    page_id = os.environ.get(\"CONFLUENCE_PAGE_ID\")\n    is_cloud = os.environ.get(\"CONFLUENCE_IS_CLOUD\", \"true\").lower() == \"true\"\n\n    assert url_base, \"CONFLUENCE_URL environment variable is required\"\n\n    return {\n        \"wiki_base\": url_base,\n        \"is_cloud\": is_cloud,\n        \"space\": space_key or \"\",\n        \"page_id\": page_id or \"\",\n    }\n\n\n@pytest.fixture\ndef confluence_credential_json() -> dict[str, Any]:\n    username = os.environ.get(\"CONFLUENCE_USER_NAME\")\n    access_token = os.environ.get(\"CONFLUENCE_ACCESS_TOKEN\")\n\n    assert username, \"CONFLUENCE_USERNAME environment variable is required\"\n    assert access_token, \"CONFLUENCE_ACCESS_TOKEN environment variable is required\"\n\n    return {\n        \"confluence_username\": username,\n        \"confluence_access_token\": access_token,\n    }\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py",
    "content": "from typing import Any\n\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.daily.connectors.confluence.models import ExternalUserGroupSet\n\n\n# In order to get these tests to run, use the credentials from Bitwarden.\n# Search up \"ENV vars for local and Github tests\", and find the Confluence relevant key-value pairs.\n\n_EXPECTED_CONFLUENCE_GROUPS = [\n    ExternalUserGroupSet(\n        id=\"confluence-admins-danswerai\",\n        user_emails={\"chris@onyx.app\", \"yuhong@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"org-admins\",\n        user_emails={\n            \"founders@onyx.app\",\n            \"chris@onyx.app\",\n            \"yuhong@onyx.app\",\n            \"oauth@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"confluence-users-danswerai\",\n        user_emails={\n            \"chris@onyx.app\",\n            \"hagen@danswer.ai\",\n            \"founders@onyx.app\",\n            \"pablo@onyx.app\",\n            \"yuhong@onyx.app\",\n            \"oauth@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-users-danswerai\",\n        user_emails={\n            \"hagen@danswer.ai\",\n            \"founders@onyx.app\",\n            \"pablo@onyx.app\",\n            \"chris@onyx.app\",\n            \"oauth@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-admins-danswerai\",\n        user_emails={\"hagen@danswer.ai\", \"founders@onyx.app\", \"pablo@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"confluence-user-access-admins-danswerai\",\n        user_emails={\"hagen@danswer.ai\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-user-access-admins-danswerai\",\n        user_emails={\"hagen@danswer.ai\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"Yuhong Only No Chris Allowed\",\n        user_emails={\"yuhong@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"All_Confluence_Users_Found_By_Onyx\",\n        user_emails={\n            \"chris@onyx.app\",\n            \"founders@onyx.app\",\n            \"hagen@danswer.ai\",\n            \"pablo@onyx.app\",\n            \"yuhong@onyx.app\",\n            \"oauth@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"bitbucket-users-onyxai\",\n        user_emails={\"founders@onyx.app\", \"oauth@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"bitbucket-admins-onyxai\",\n        user_emails={\"founders@onyx.app\", \"oauth@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-servicemanagement-users-danswerai\",\n        user_emails={\"oauth@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"no yuhong allowed\",\n        user_emails={\"hagen@danswer.ai\", \"pablo@onyx.app\", \"chris@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n]\n\n\ndef test_confluence_group_sync(\n    db_session: Session,\n    confluence_connector_config: dict[str, Any],\n    confluence_credential_json: dict[str, Any],\n) -> None:\n    connector = Connector(\n        name=\"Test Connector\",\n        source=DocumentSource.CONFLUENCE,\n        input_type=InputType.POLL,\n        connector_specific_config=confluence_connector_config,\n        refresh_freq=None,\n        prune_freq=None,\n        indexing_start=None,\n    )\n    db_session.add(connector)\n    db_session.flush()\n\n    credential = Credential(\n        source=DocumentSource.CONFLUENCE,\n        credential_json=confluence_credential_json,\n    )\n    db_session.add(credential)\n    db_session.flush()\n    # Expire the credential so it reloads from DB with SensitiveValue wrapper\n    db_session.expire(credential)\n\n    cc_pair = ConnectorCredentialPair(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        name=\"Test CC Pair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        access_type=AccessType.SYNC,\n        auto_sync_options=None,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n    db_session.refresh(cc_pair)\n\n    tenant_id = get_current_tenant_id()\n    group_sync_iter = confluence_group_sync(\n        tenant_id=tenant_id,\n        cc_pair=cc_pair,\n    )\n\n    expected_groups = {group.id: group for group in _EXPECTED_CONFLUENCE_GROUPS}\n    actual_groups = {\n        group.id: ExternalUserGroupSet.from_model(external_user_group=group)\n        for group in group_sync_iter\n    }\n    assert expected_groups == actual_groups\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/google_drive/test_google_drive_group_sync.py",
    "content": "from collections.abc import Generator\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.background.celery.tasks.external_group_syncing.tasks import (\n    _perform_external_group_sync,\n)\nfrom ee.onyx.db.external_perm import ExternalUserGroup\nfrom onyx.access.utils import build_ext_group_name_for_onyx\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import PublicExternalUserGroup\nfrom onyx.db.models import User\nfrom onyx.db.models import User__ExternalUserGroupId\nfrom onyx.db.models import UserRole\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\ndef _create_ext_perm_user(db_session: Session, name: str) -> User:\n    \"\"\"Create an external-permission user for group sync tests.\"\"\"\n    return create_test_user(\n        db_session,\n        name,\n        role=UserRole.EXT_PERM_USER,\n        account_type=AccountType.EXT_PERM_USER,\n    )\n\n\ndef _create_test_connector_credential_pair(\n    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE\n) -> ConnectorCredentialPair:\n    \"\"\"Helper to create a test connector credential pair\"\"\"\n    # For Google Drive, we need to include required config parameters\n    connector_config = {}\n    if source == DocumentSource.GOOGLE_DRIVE:\n        connector_config = {\n            \"include_shared_drives\": True,  # At least one of these is required\n        }\n\n    connector = Connector(\n        name=\"Test Connector\",\n        source=source,\n        input_type=InputType.POLL,\n        connector_specific_config=connector_config,\n        refresh_freq=None,\n        prune_freq=None,\n        indexing_start=None,\n    )\n    db_session.add(connector)\n    db_session.flush()  # To get the connector ID\n\n    credential = Credential(\n        source=source,\n        credential_json={},\n        user_id=None,\n    )\n    db_session.add(credential)\n    db_session.flush()  # To get the credential ID\n    # Expire the credential so it reloads from DB with SensitiveValue wrapper\n    db_session.expire(credential)\n\n    cc_pair = ConnectorCredentialPair(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        name=\"Test CC Pair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        access_type=AccessType.SYNC,\n        auto_sync_options=None,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n    db_session.refresh(cc_pair)\n    return cc_pair\n\n\ndef _get_user_external_groups(\n    db_session: Session, cc_pair_id: int, include_stale: bool = False\n) -> list[User__ExternalUserGroupId]:\n    \"\"\"Helper to get user external groups from database\"\"\"\n    query = select(User__ExternalUserGroupId).where(\n        User__ExternalUserGroupId.cc_pair_id == cc_pair_id\n    )\n    if not include_stale:\n        query = query.where(User__ExternalUserGroupId.stale.is_(False))\n\n    return list(db_session.scalars(query).all())\n\n\ndef _get_public_external_groups(\n    db_session: Session, cc_pair_id: int, include_stale: bool = False\n) -> list[PublicExternalUserGroup]:\n    \"\"\"Helper to get public external groups from database\"\"\"\n    query = select(PublicExternalUserGroup).where(\n        PublicExternalUserGroup.cc_pair_id == cc_pair_id\n    )\n    if not include_stale:\n        query = query.where(PublicExternalUserGroup.stale.is_(False))\n\n    return list(db_session.scalars(query).all())\n\n\nclass TestPerformExternalGroupSync:\n    def test_initial_group_sync(self, db_session: Session) -> None:\n        \"\"\"Test syncing external groups for the first time (initial sync)\"\"\"\n        # Create test data\n        user1 = _create_ext_perm_user(db_session, \"user1\")\n        user2 = _create_ext_perm_user(db_session, \"user2\")\n        user3 = _create_ext_perm_user(db_session, \"user3\")\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Mock external groups data as a generator that yields the expected groups\n        mock_groups = [\n            ExternalUserGroup(id=\"group1\", user_emails=[user1.email, user2.email]),\n            ExternalUserGroup(id=\"group2\", user_emails=[user2.email, user3.email]),\n            ExternalUserGroup(\n                id=\"public_group\", user_emails=[user1.email], gives_anyone_access=True\n            ),\n        ]\n\n        def mock_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            for group in mock_groups:\n                yield group\n\n        # Verify no groups exist initially\n        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0\n        assert len(_get_public_external_groups(db_session, cc_pair.id)) == 0\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = mock_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run the sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify user groups were created\n            user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            assert (\n                len(user_groups) == 5\n            )  # user1+2 in group1, user2+3 in group2, user1 in public_group\n\n            # Verify group names are properly prefixed\n            expected_group1_id = build_ext_group_name_for_onyx(\n                \"group1\", DocumentSource.GOOGLE_DRIVE\n            )\n            expected_group2_id = build_ext_group_name_for_onyx(\n                \"group2\", DocumentSource.GOOGLE_DRIVE\n            )\n            expected_public_group_id = build_ext_group_name_for_onyx(\n                \"public_group\", DocumentSource.GOOGLE_DRIVE\n            )\n\n            group_ids = {ug.external_user_group_id for ug in user_groups}\n            assert expected_group1_id in group_ids\n            assert expected_group2_id in group_ids\n            assert expected_public_group_id in group_ids\n\n            # Verify public group was created\n            public_groups = _get_public_external_groups(db_session, cc_pair.id)\n            assert len(public_groups) == 1\n            assert public_groups[0].external_user_group_id == expected_public_group_id\n            assert public_groups[0].stale is False\n\n            # Verify all groups are not stale\n            for ug in user_groups:\n                assert ug.stale is False\n\n    def test_update_existing_groups(self, db_session: Session) -> None:\n        \"\"\"Test updating existing groups (adding/removing users)\"\"\"\n        # Create test data\n        user1 = _create_ext_perm_user(db_session, \"user1\")\n        user2 = _create_ext_perm_user(db_session, \"user2\")\n        user3 = _create_ext_perm_user(db_session, \"user3\")\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Initial sync with original groups\n        def initial_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            yield ExternalUserGroup(id=\"group1\", user_emails=[user1.email, user2.email])\n            yield ExternalUserGroup(id=\"group2\", user_emails=[user2.email])\n\n        # For now, verify test setup is working\n        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = initial_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run initial sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify initial state\n            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            assert (\n                len(initial_user_groups) == 3\n            )  # user1+user2 in group1, user2 in group2\n\n            # Updated sync with modified groups\n            def updated_group_sync_func(\n                tenant_id: str,  # noqa: ARG001\n                cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n            ) -> Generator[ExternalUserGroup, None, None]:\n                # group1 now has user1 and user3 (user2 removed, user3 added)\n                yield ExternalUserGroup(\n                    id=\"group1\", user_emails=[user1.email, user3.email]\n                )\n                # group2 now has all three users (user1 and user3 added)\n                yield ExternalUserGroup(\n                    id=\"group2\", user_emails=[user1.email, user2.email, user3.email]\n                )\n\n            # Update the mock function\n            mock_group_config.group_sync_func = updated_group_sync_func\n\n            # Run updated sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify updated state\n            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            assert (\n                len(updated_user_groups) == 5\n            )  # user1+user3 in group1, user1+user2+user3 in group2\n\n            # Verify specific user-group mappings\n            expected_group1_id = build_ext_group_name_for_onyx(\n                \"group1\", DocumentSource.GOOGLE_DRIVE\n            )\n            expected_group2_id = build_ext_group_name_for_onyx(\n                \"group2\", DocumentSource.GOOGLE_DRIVE\n            )\n\n            group1_users = {\n                ug.user_id\n                for ug in updated_user_groups\n                if ug.external_user_group_id == expected_group1_id\n            }\n            group2_users = {\n                ug.user_id\n                for ug in updated_user_groups\n                if ug.external_user_group_id == expected_group2_id\n            }\n\n            assert user1.id in group1_users and user3.id in group1_users\n            assert user2.id not in group1_users  # user2 was removed from group1\n            assert (\n                user1.id in group2_users\n                and user2.id in group2_users\n                and user3.id in group2_users\n            )\n\n            # Verify no stale groups remain\n            for ug in updated_user_groups:\n                assert ug.stale is False\n\n    def test_remove_groups(self, db_session: Session) -> None:\n        \"\"\"Test removing groups (groups that no longer exist in external system)\"\"\"\n        # Create test data\n        user1 = _create_ext_perm_user(db_session, \"user1\")\n        user2 = _create_ext_perm_user(db_session, \"user2\")\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Initial sync with multiple groups\n        def initial_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            yield ExternalUserGroup(id=\"group1\", user_emails=[user1.email, user2.email])\n            yield ExternalUserGroup(id=\"group2\", user_emails=[user1.email])\n            yield ExternalUserGroup(\n                id=\"public_group\", user_emails=[user1.email], gives_anyone_access=True\n            )\n\n        assert len(_get_user_external_groups(db_session, cc_pair.id)) == 0\n        assert len(_get_public_external_groups(db_session, cc_pair.id)) == 0\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = initial_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run initial sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify initial state\n            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            initial_public_groups = _get_public_external_groups(db_session, cc_pair.id)\n            assert (\n                len(initial_user_groups) == 4\n            )  # 2 in group1, 1 in group2, 1 in public_group\n            assert len(initial_public_groups) == 1\n\n            # Updated sync with only one group remaining\n            def updated_group_sync_func(\n                tenant_id: str,  # noqa: ARG001\n                cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n            ) -> Generator[ExternalUserGroup, None, None]:\n                # Only group1 remains, group2 and public_group are removed\n                yield ExternalUserGroup(\n                    id=\"group1\", user_emails=[user1.email, user2.email]\n                )\n\n            # Update the mock function\n            mock_group_config.group_sync_func = updated_group_sync_func\n\n            # Run updated sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify updated state\n            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            updated_public_groups = _get_public_external_groups(db_session, cc_pair.id)\n\n            assert len(updated_user_groups) == 2  # Only group1 mappings remain\n            assert len(updated_public_groups) == 0  # Public group was removed\n\n            # Verify only group1 exists\n            expected_group1_id = build_ext_group_name_for_onyx(\n                \"group1\", DocumentSource.GOOGLE_DRIVE\n            )\n            group_ids = {ug.external_user_group_id for ug in updated_user_groups}\n            assert group_ids == {expected_group1_id}\n\n            # Verify stale groups were actually deleted from database\n            all_user_groups_including_stale = _get_user_external_groups(\n                db_session, cc_pair.id, include_stale=True\n            )\n            all_public_groups_including_stale = _get_public_external_groups(\n                db_session, cc_pair.id, include_stale=True\n            )\n\n            assert len(all_user_groups_including_stale) == 2  # Only group1 mappings\n            assert len(all_public_groups_including_stale) == 0  # Public group deleted\n\n    def test_empty_group_sync(self, db_session: Session) -> None:\n        \"\"\"Test syncing when no groups are returned (all groups removed)\"\"\"\n        # Create test data\n        user1 = _create_ext_perm_user(db_session, \"user1\")\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Initial sync with groups\n        def initial_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            yield ExternalUserGroup(id=\"group1\", user_emails=[user1.email])\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = initial_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run initial sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify initial state\n            initial_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            assert len(initial_user_groups) == 1\n\n            # Updated sync with no groups\n            def empty_group_sync_func(\n                tenant_id: str,  # noqa: ARG001\n                cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n            ) -> Generator[ExternalUserGroup, None, None]:\n                # No groups yielded\n                return\n                yield  # This line is never reached but satisfies the generator type\n\n            # Update the mock function\n            mock_group_config.group_sync_func = empty_group_sync_func\n\n            # Run updated sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify all groups were removed\n            updated_user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            updated_public_groups = _get_public_external_groups(db_session, cc_pair.id)\n\n            assert len(updated_user_groups) == 0\n            assert len(updated_public_groups) == 0\n\n    def test_batch_processing(self, db_session: Session) -> None:\n        \"\"\"Test that large numbers of groups are processed in batches\"\"\"\n        # Create many test users\n        users = []\n        for i in range(150):  # More than the batch size of 100\n            users.append(_create_ext_perm_user(db_session, f\"user{i}\"))\n\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Create a large group with many users\n        def large_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            yield ExternalUserGroup(\n                id=\"large_group\", user_emails=[user.email for user in users]\n            )\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = large_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run the sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify all users were added to the group\n            user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            assert len(user_groups) == 150\n\n            # Verify all groups are not stale\n            for ug in user_groups:\n                assert ug.stale is False\n\n    def test_mixed_regular_and_public_groups(self, db_session: Session) -> None:\n        \"\"\"Test syncing a mix of regular and public groups\"\"\"\n        # Create test data\n        user1 = _create_ext_perm_user(db_session, \"user1\")\n        user2 = _create_ext_perm_user(db_session, \"user2\")\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        def mixed_group_sync_func(\n            tenant_id: str,  # noqa: ARG001\n            cc_pair: ConnectorCredentialPair,  # noqa: ARG001\n        ) -> Generator[ExternalUserGroup, None, None]:\n            yield ExternalUserGroup(\n                id=\"regular_group\", user_emails=[user1.email, user2.email]\n            )\n            yield ExternalUserGroup(\n                id=\"public_group1\", user_emails=[user1.email], gives_anyone_access=True\n            )\n            yield ExternalUserGroup(\n                id=\"public_group2\",\n                user_emails=[],  # Empty user list for public group\n                gives_anyone_access=True,\n            )\n\n        with patch(\n            \"ee.onyx.background.celery.tasks.external_group_syncing.tasks.get_source_perm_sync_config\"\n        ) as mock_config:\n            # Mock sync config\n            mock_group_config = Mock()\n            mock_group_config.group_sync_func = mixed_group_sync_func\n\n            mock_sync_config = Mock()\n            mock_sync_config.group_sync_config = mock_group_config\n\n            mock_config.return_value = mock_sync_config\n\n            # Run the sync\n            _perform_external_group_sync(cc_pair.id, TEST_TENANT_ID)\n\n            # Verify user groups\n            user_groups = _get_user_external_groups(db_session, cc_pair.id)\n            expected_regular_group_id = build_ext_group_name_for_onyx(\n                \"regular_group\", DocumentSource.GOOGLE_DRIVE\n            )\n            expected_public_group1_id = build_ext_group_name_for_onyx(\n                \"public_group1\", DocumentSource.GOOGLE_DRIVE\n            )\n\n            # Should have 2 users in regular_group + 1 user in public_group1 = 3 total\n            assert len(user_groups) == 3\n\n            regular_group_users = [\n                ug\n                for ug in user_groups\n                if ug.external_user_group_id == expected_regular_group_id\n            ]\n            public_group1_users = [\n                ug\n                for ug in user_groups\n                if ug.external_user_group_id == expected_public_group1_id\n            ]\n\n            assert len(regular_group_users) == 2\n            assert len(public_group1_users) == 1\n\n            # Verify public groups\n            public_groups = _get_public_external_groups(db_session, cc_pair.id)\n            assert len(public_groups) == 2  # public_group1 and public_group2\n\n            public_group_ids = {pg.external_user_group_id for pg in public_groups}\n            expected_public_group2_id = build_ext_group_name_for_onyx(\n                \"public_group2\", DocumentSource.GOOGLE_DRIVE\n            )\n            assert expected_public_group1_id in public_group_ids\n            assert expected_public_group2_id in public_group_ids\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/jira/conftest.py",
    "content": "import os\nfrom typing import Any\n\nimport pytest\n\n\n@pytest.fixture\ndef jira_connector_config() -> dict[str, Any]:\n    jira_base_url = os.environ.get(\"JIRA_BASE_URL\", \"https://danswerai.atlassian.net\")\n\n    return {\n        \"jira_base_url\": jira_base_url,\n        \"project_key\": \"\",  # Empty to sync all projects\n        \"scoped_token\": False,\n    }\n\n\n@pytest.fixture\ndef jira_credential_json() -> dict[str, Any]:\n    user_email = os.environ.get(\"JIRA_ADMIN_USER_EMAIL\", \"chris@onyx.app\")\n    api_token = os.environ.get(\"JIRA_ADMIN_API_TOKEN\")\n\n    assert user_email, \"JIRA_ADMIN_USER_EMAIL environment variable is required\"\n    assert api_token, \"JIRA_ADMIN_API_TOKEN environment variable is required\"\n\n    return {\n        \"jira_user_email\": user_email,\n        \"jira_api_token\": api_token,\n    }\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/jira/test_jira_doc_sync.py",
    "content": "from typing import Any\n\nimport pytest\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync\nfrom onyx.access.models import DocExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.utils import DocumentRow\nfrom onyx.db.utils import SortOrder\n\n\n# In order to get these tests to run, use the credentials from Bitwarden.\n# Search up \"ENV vars for local and Github tests\", and find the Jira relevant key-value pairs.\n# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN\n\npytestmark = pytest.mark.usefixtures(\"enable_ee\")\n\n\nclass DocExternalAccessSet(BaseModel):\n    \"\"\"A version of DocExternalAccess that uses sets for comparison.\"\"\"\n\n    doc_id: str\n    external_user_emails: set[str]\n    external_user_group_ids: set[str]\n    is_public: bool\n\n    @classmethod\n    def from_doc_external_access(\n        cls, doc_external_access: DocExternalAccess\n    ) -> \"DocExternalAccessSet\":\n        return cls(\n            doc_id=doc_external_access.doc_id,\n            external_user_emails=doc_external_access.external_access.external_user_emails,\n            external_user_group_ids=doc_external_access.external_access.external_user_group_ids,\n            is_public=doc_external_access.external_access.is_public,\n        )\n\n\ndef test_jira_doc_sync(\n    db_session: Session,\n    jira_connector_config: dict[str, Any],\n    jira_credential_json: dict[str, Any],\n) -> None:\n    \"\"\"Test that Jira doc sync returns documents with correct permissions.\n\n    This test uses the AS project which has applicationRole permission,\n    meaning all documents should be marked as public.\n    \"\"\"\n    try:\n        # Use AS project specifically for this test\n        connector_config = {\n            **jira_connector_config,\n            \"project_key\": \"AS\",  # DailyConnectorTestProject\n        }\n\n        connector = Connector(\n            name=\"Test Jira Doc Sync Connector\",\n            source=DocumentSource.JIRA,\n            input_type=InputType.POLL,\n            connector_specific_config=connector_config,\n            refresh_freq=None,\n            prune_freq=None,\n            indexing_start=None,\n        )\n        db_session.add(connector)\n        db_session.flush()\n\n        credential = Credential(\n            source=DocumentSource.JIRA,\n            credential_json=jira_credential_json,\n        )\n        db_session.add(credential)\n        db_session.flush()\n        # Expire the credential so it reloads from DB with SensitiveValue wrapper\n        db_session.expire(credential)\n\n        cc_pair = ConnectorCredentialPair(\n            connector_id=connector.id,\n            credential_id=credential.id,\n            name=\"Test Jira Doc Sync CC Pair\",\n            status=ConnectorCredentialPairStatus.ACTIVE,\n            access_type=AccessType.SYNC,\n            auto_sync_options=None,\n        )\n        db_session.add(cc_pair)\n        db_session.flush()\n        db_session.refresh(cc_pair)\n\n        # Mock functions - we don't have existing docs in the test DB\n        def fetch_all_existing_docs_fn(\n            sort_order: SortOrder | None = None,  # noqa: ARG001\n        ) -> list[DocumentRow]:\n            return []\n\n        def fetch_all_existing_docs_ids_fn() -> list[str]:\n            return []\n\n        doc_sync_iter = jira_doc_sync(\n            cc_pair=cc_pair,\n            fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,\n            fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        )\n\n        # Expected documents from the danswerai.atlassian.net Jira instance\n        # The AS project has applicationRole permission, so all docs should be public\n        _EXPECTED_JIRA_DOCS = [\n            DocExternalAccessSet(\n                doc_id=\"https://danswerai.atlassian.net/browse/AS-3\",\n                external_user_emails=set(),\n                external_user_group_ids=set(),\n                is_public=True,\n            ),\n            DocExternalAccessSet(\n                doc_id=\"https://danswerai.atlassian.net/browse/AS-4\",\n                external_user_emails=set(),\n                external_user_group_ids=set(),\n                is_public=True,\n            ),\n        ]\n\n        expected_docs = {doc.doc_id: doc for doc in _EXPECTED_JIRA_DOCS}\n        actual_docs = {\n            doc.doc_id: DocExternalAccessSet.from_doc_external_access(doc)\n            for doc in doc_sync_iter\n            if isinstance(doc, DocExternalAccess)\n        }\n        assert (\n            expected_docs == actual_docs\n        ), f\"Expected docs: {expected_docs}\\nActual docs: {actual_docs}\"\n    finally:\n        db_session.rollback()\n\n\ndef test_jira_doc_sync_with_specific_permissions(\n    db_session: Session,\n    jira_connector_config: dict[str, Any],\n    jira_credential_json: dict[str, Any],\n) -> None:\n    \"\"\"Test that Jira doc sync returns documents with specific permissions.\n\n    This test uses a project that has specific user permissions to verify\n    that specific users are correctly extracted.\n    \"\"\"\n    try:\n        # Use SUP project which has specific user permissions\n        connector_config = {\n            **jira_connector_config,\n            \"project_key\": \"SUP\",\n        }\n\n        connector = Connector(\n            name=\"Test Jira Doc Sync with Groups Connector\",\n            source=DocumentSource.JIRA,\n            input_type=InputType.POLL,\n            connector_specific_config=connector_config,\n            refresh_freq=None,\n            prune_freq=None,\n            indexing_start=None,\n        )\n        db_session.add(connector)\n        db_session.flush()\n\n        credential = Credential(\n            source=DocumentSource.JIRA,\n            credential_json=jira_credential_json,\n        )\n        db_session.add(credential)\n        db_session.flush()\n        # Expire the credential so it reloads from DB with SensitiveValue wrapper\n        db_session.expire(credential)\n\n        cc_pair = ConnectorCredentialPair(\n            connector_id=connector.id,\n            credential_id=credential.id,\n            name=\"Test Jira Doc Sync with Groups CC Pair\",\n            status=ConnectorCredentialPairStatus.ACTIVE,\n            access_type=AccessType.SYNC,\n            auto_sync_options=None,\n        )\n        db_session.add(cc_pair)\n        db_session.flush()\n        db_session.refresh(cc_pair)\n\n        # Mock functions\n        def fetch_all_existing_docs_fn(\n            sort_order: SortOrder | None = None,  # noqa: ARG001\n        ) -> list[DocumentRow]:\n            return []\n\n        def fetch_all_existing_docs_ids_fn() -> list[str]:\n            return []\n\n        doc_sync_iter = jira_doc_sync(\n            cc_pair=cc_pair,\n            fetch_all_existing_docs_fn=fetch_all_existing_docs_fn,\n            fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,\n        )\n\n        docs = list(doc_sync_iter)\n\n        # SUP project should have user-specific permissions (not public)\n        assert len(docs) > 0, \"Expected at least one document from SUP project\"\n\n        _EXPECTED_USER_EMAILS = set(\n            [\"yuhong@onyx.app\", \"chris@onyx.app\", \"founders@onyx.app\"]\n        )\n        _EXPECTED_USER_GROUP_IDS = set([\"jira-users-danswerai\"])\n\n        for doc in docs:\n            if not isinstance(doc, DocExternalAccess):\n                continue\n            assert doc.doc_id.startswith(\"https://danswerai.atlassian.net/browse/SUP-\")\n            # SUP project has specific users assigned, not applicationRole\n            assert (\n                not doc.external_access.is_public\n            ), f\"Document {doc.doc_id} should not be public\"\n            # Should have user emails\n            assert doc.external_access.external_user_emails == _EXPECTED_USER_EMAILS\n            assert (\n                doc.external_access.external_user_group_ids == _EXPECTED_USER_GROUP_IDS\n            )\n    finally:\n        db_session.rollback()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/connectors/jira/test_jira_group_sync.py",
    "content": "from typing import Any\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.external_permissions.jira.group_sync import jira_group_sync\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.daily.connectors.confluence.models import ExternalUserGroupSet\n\n\n# In order to get these tests to run, use the credentials from Bitwarden.\n# Search up \"ENV vars for local and Github tests\", and find the Jira relevant key-value pairs.\n# Required env vars: JIRA_USER_EMAIL, JIRA_API_TOKEN\n\npytestmark = pytest.mark.usefixtures(\"enable_ee\")\n\n# Expected groups from the danswerai.atlassian.net Jira instance\n# Note: These groups are shared with Confluence since they're both Atlassian products\n# App accounts (bots, integrations) are filtered out\n_EXPECTED_JIRA_GROUPS = [\n    ExternalUserGroupSet(\n        id=\"Yuhong Only No Chris Allowed\",\n        user_emails={\"yuhong@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"confluence-admins-danswerai\",\n        user_emails={\"chris@onyx.app\", \"yuhong@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"confluence-user-access-admins-danswerai\",\n        user_emails={\"hagen@danswer.ai\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"confluence-users-danswerai\",\n        user_emails={\n            \"chris@onyx.app\",\n            \"founders@onyx.app\",\n            \"hagen@danswer.ai\",\n            \"pablo@onyx.app\",\n            \"yuhong@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-admins-danswerai\",\n        user_emails={\"founders@onyx.app\", \"hagen@danswer.ai\", \"pablo@onyx.app\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-user-access-admins-danswerai\",\n        user_emails={\"hagen@danswer.ai\"},\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"jira-users-danswerai\",\n        user_emails={\n            \"chris@onyx.app\",\n            \"founders@onyx.app\",\n            \"hagen@danswer.ai\",\n            \"pablo@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"org-admins\",\n        user_emails={\n            \"chris@onyx.app\",\n            \"founders@onyx.app\",\n            \"yuhong@onyx.app\",\n        },\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"bitbucket-admins-onyxai\",\n        user_emails={\"founders@onyx.app\"},  # no Oauth, we skip \"app\" account in jira\n        gives_anyone_access=False,\n    ),\n    ExternalUserGroupSet(\n        id=\"bitbucket-users-onyxai\",\n        user_emails={\"founders@onyx.app\"},  # no Oauth, we skip \"app\" account in jira\n        gives_anyone_access=False,\n    ),\n]\n\n\ndef test_jira_group_sync(\n    db_session: Session,\n    jira_connector_config: dict[str, Any],\n    jira_credential_json: dict[str, Any],\n) -> None:\n    try:\n        connector = Connector(\n            name=\"Test Jira Connector\",\n            source=DocumentSource.JIRA,\n            input_type=InputType.POLL,\n            connector_specific_config=jira_connector_config,\n            refresh_freq=None,\n            prune_freq=None,\n            indexing_start=None,\n        )\n        db_session.add(connector)\n        db_session.flush()\n\n        credential = Credential(\n            source=DocumentSource.JIRA,\n            credential_json=jira_credential_json,\n        )\n        db_session.add(credential)\n        db_session.flush()\n        # Expire the credential so it reloads from DB with SensitiveValue wrapper\n        db_session.expire(credential)\n\n        cc_pair = ConnectorCredentialPair(\n            connector_id=connector.id,\n            credential_id=credential.id,\n            name=\"Test Jira CC Pair\",\n            status=ConnectorCredentialPairStatus.ACTIVE,\n            access_type=AccessType.SYNC,\n            auto_sync_options=None,\n        )\n        db_session.add(cc_pair)\n        db_session.flush()\n        db_session.refresh(cc_pair)\n\n        tenant_id = get_current_tenant_id()\n        group_sync_iter = jira_group_sync(\n            tenant_id=tenant_id,\n            cc_pair=cc_pair,\n        )\n\n        expected_groups = {group.id: group for group in _EXPECTED_JIRA_GROUPS}\n        actual_groups = {\n            group.id: ExternalUserGroupSet.from_model(external_user_group=group)\n            for group in group_sync_iter\n        }\n        assert expected_groups == actual_groups\n    finally:\n        db_session.rollback()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/constants.py",
    "content": "TEST_TENANT_ID: str = \"public\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/craft/conftest.py",
    "content": "\"\"\"Fixtures for build mode tests.\"\"\"\n\nfrom collections.abc import Generator\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.enums import AccountType\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\n@pytest.fixture(scope=\"function\")\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create a database session for testing using the actual PostgreSQL database.\"\"\"\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n    with get_session_with_current_tenant() as session:\n        yield session\n\n\n@pytest.fixture(scope=\"function\")\ndef tenant_context() -> Generator[None, None, None]:\n    \"\"\"Set up tenant context for testing.\"\"\"\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@pytest.fixture(scope=\"function\")\ndef test_user(db_session: Session, tenant_context: None) -> User:  # noqa: ARG001\n    \"\"\"Create a test user for build session tests.\"\"\"\n    unique_email = f\"build_test_{uuid4().hex[:8]}@example.com\"\n\n    password_helper = PasswordHelper()\n    password = password_helper.generate()\n    hashed_password = password_helper.hash(password)\n\n    user = User(\n        id=uuid4(),\n        email=unique_email,\n        hashed_password=hashed_password,\n        is_active=True,\n        is_superuser=False,\n        is_verified=True,\n        role=UserRole.EXT_PERM_USER,\n        account_type=AccountType.EXT_PERM_USER,\n    )\n    db_session.add(user)\n    db_session.commit()\n    db_session.refresh(user)\n    return user\n\n\n@pytest.fixture(scope=\"function\")\ndef build_session(\n    db_session: Session,\n    test_user: User,\n    tenant_context: None,  # noqa: ARG001\n) -> BuildSession:\n    \"\"\"Create a test build session.\"\"\"\n    session = BuildSession(\n        id=uuid4(),\n        user_id=test_user.id,\n        name=\"Test Build Session\",\n        status=BuildSessionStatus.ACTIVE,\n    )\n    db_session.add(session)\n    db_session.commit()\n    db_session.refresh(session)\n    return session\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/craft/test_build_packet_storage.py",
    "content": "\"\"\"\nTest suite for build mode packet storage.\n\nTests the new packet storage behavior:\n- All data stored in message_metadata as JSON (no content column)\n- turn_index tracks which user message each assistant message belongs to\n- Tool calls: Only save when status=\"completed\"\n- Message/thought chunks: Accumulated and saved as synthetic packets\n- Agent plan updates: Upserted (only latest kept per turn)\n\"\"\"\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.models import BuildSession\nfrom onyx.server.features.build.db.build_session import create_message\nfrom onyx.server.features.build.db.build_session import get_session_messages\nfrom onyx.server.features.build.db.build_session import upsert_agent_plan\nfrom onyx.server.features.build.session.manager import BuildStreamingState\n\n\nclass TestBuildMessageStorage:\n    \"\"\"Tests for build message storage in the database.\"\"\"\n\n    def test_create_message_with_metadata(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test creating a message with JSON metadata and turn_index.\"\"\"\n        user_message_metadata = {\n            \"type\": \"user_message\",\n            \"content\": {\"type\": \"text\", \"text\": \"Hello, world!\"},\n        }\n\n        message = create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata=user_message_metadata,\n            db_session=db_session,\n        )\n\n        assert message.id is not None\n        assert message.session_id == build_session.id\n        assert message.type == MessageType.USER\n        assert message.turn_index == 0\n        assert message.message_metadata == user_message_metadata\n        assert message.message_metadata[\"type\"] == \"user_message\"\n        assert message.message_metadata[\"content\"][\"text\"] == \"Hello, world!\"\n\n    def test_create_multiple_messages_with_turn_index(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test creating multiple messages with correct turn_index values.\"\"\"\n        # First user message (turn 0)\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"First question\"},\n            },\n            db_session=db_session,\n        )\n\n        # Assistant response (turn 0)\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.ASSISTANT,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"agent_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"First answer\"},\n            },\n            db_session=db_session,\n        )\n\n        # Second user message (turn 1)\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=1,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Second question\"},\n            },\n            db_session=db_session,\n        )\n\n        # Assistant response (turn 1)\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.ASSISTANT,\n            turn_index=1,\n            message_metadata={\n                \"type\": \"agent_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Second answer\"},\n            },\n            db_session=db_session,\n        )\n\n        # Verify messages\n        messages = get_session_messages(build_session.id, db_session)\n        assert len(messages) == 4\n\n        # Check turn indices\n        turn_0_messages = [m for m in messages if m.turn_index == 0]\n        turn_1_messages = [m for m in messages if m.turn_index == 1]\n\n        assert len(turn_0_messages) == 2\n        assert len(turn_1_messages) == 2\n\n    def test_tool_call_completed_storage(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test storing only completed tool calls.\"\"\"\n        # Create a user message first\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Run a tool\"},\n            },\n            db_session=db_session,\n        )\n\n        # Create a completed tool call\n        tool_call_packet = {\n            \"type\": \"tool_call_progress\",\n            \"toolCallId\": \"tool-123\",\n            \"status\": \"completed\",\n            \"kind\": \"bash\",\n            \"title\": \"Running command\",\n            \"rawOutput\": \"Command completed successfully\",\n            \"timestamp\": \"2025-01-01T00:00:00Z\",\n        }\n\n        message = create_message(\n            session_id=build_session.id,\n            message_type=MessageType.ASSISTANT,\n            turn_index=0,\n            message_metadata=tool_call_packet,\n            db_session=db_session,\n        )\n\n        assert message.message_metadata[\"type\"] == \"tool_call_progress\"\n        assert message.message_metadata[\"status\"] == \"completed\"\n        assert message.message_metadata[\"toolCallId\"] == \"tool-123\"\n\n    def test_upsert_agent_plan(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test upserting agent plan - only latest should be kept.\"\"\"\n        # Create a user message first\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Create a plan\"},\n            },\n            db_session=db_session,\n        )\n\n        # First plan\n        plan1 = {\n            \"type\": \"agent_plan_update\",\n            \"entries\": [\n                {\"id\": \"1\", \"status\": \"pending\", \"content\": \"Step 1\"},\n            ],\n            \"timestamp\": \"2025-01-01T00:00:00Z\",\n        }\n\n        plan_msg1 = upsert_agent_plan(\n            session_id=build_session.id,\n            turn_index=0,\n            plan_metadata=plan1,\n            db_session=db_session,\n        )\n\n        assert plan_msg1.message_metadata[\"entries\"][0][\"status\"] == \"pending\"\n\n        # Update plan with new status\n        plan2 = {\n            \"type\": \"agent_plan_update\",\n            \"entries\": [\n                {\"id\": \"1\", \"status\": \"completed\", \"content\": \"Step 1\"},\n                {\"id\": \"2\", \"status\": \"in_progress\", \"content\": \"Step 2\"},\n            ],\n            \"timestamp\": \"2025-01-01T00:01:00Z\",\n        }\n\n        plan_msg2 = upsert_agent_plan(\n            session_id=build_session.id,\n            turn_index=0,\n            plan_metadata=plan2,\n            db_session=db_session,\n            existing_plan_id=plan_msg1.id,\n        )\n\n        # Should be the same message, updated\n        assert plan_msg2.id == plan_msg1.id\n        assert len(plan_msg2.message_metadata[\"entries\"]) == 2\n        assert plan_msg2.message_metadata[\"entries\"][0][\"status\"] == \"completed\"\n\n        # Verify only one plan message exists for this turn\n        messages = get_session_messages(build_session.id, db_session)\n        plan_messages = [\n            m for m in messages if m.message_metadata.get(\"type\") == \"agent_plan_update\"\n        ]\n        assert len(plan_messages) == 1\n\n    def test_upsert_agent_plan_without_existing_id(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test upserting agent plan when we don't know the existing ID.\"\"\"\n        # Create a user message first\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Create a plan\"},\n            },\n            db_session=db_session,\n        )\n\n        # First plan - no existing ID\n        plan1 = {\n            \"type\": \"agent_plan_update\",\n            \"entries\": [{\"id\": \"1\", \"status\": \"pending\", \"content\": \"Step 1\"}],\n        }\n\n        plan_msg1 = upsert_agent_plan(\n            session_id=build_session.id,\n            turn_index=0,\n            plan_metadata=plan1,\n            db_session=db_session,\n        )\n\n        # Second plan - still no existing ID, should find and update\n        plan2 = {\n            \"type\": \"agent_plan_update\",\n            \"entries\": [{\"id\": \"1\", \"status\": \"completed\", \"content\": \"Step 1\"}],\n        }\n\n        plan_msg2 = upsert_agent_plan(\n            session_id=build_session.id,\n            turn_index=0,\n            plan_metadata=plan2,\n            db_session=db_session,\n        )\n\n        # Should be the same message\n        assert plan_msg2.id == plan_msg1.id\n\n    def test_streaming_flow_db_calls(\n        self,\n        db_session: Session,\n        build_session: BuildSession,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that streaming flow creates correct number of DB messages.\n\n        Simulates:\n        1. Agent message chunks -> 1 message\n        2. Tool call -> 1 message\n        3. Agent message chunks -> 1 message\n\n        This verifies that we save parts of the turn as they finish, rather than\n        buffering everything into one giant message or losing granularity.\n        \"\"\"\n        # 0. Initial user message\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.USER,\n            turn_index=0,\n            message_metadata={\n                \"type\": \"user_message\",\n                \"content\": {\"type\": \"text\", \"text\": \"Do something\"},\n            },\n            db_session=db_session,\n        )\n\n        state = BuildStreamingState(turn_index=0)\n\n        # 1. Stream agent message chunks\n        state.add_message_chunk(\"Thinking\")\n        state.add_message_chunk(\" about it...\")\n\n        # Simulate switch to tool call (e.g. ToolCallStart event) -> finalize message\n        # In SessionManager, this happens via state.should_finalize_chunks()\n        if state.should_finalize_chunks(\"tool_call_start\"):\n            msg_packet = state.finalize_message_chunks()\n            if msg_packet:\n                create_message(\n                    session_id=build_session.id,\n                    message_type=MessageType.ASSISTANT,\n                    turn_index=0,\n                    message_metadata=msg_packet,\n                    db_session=db_session,\n                )\n        state.clear_last_chunk_type()\n\n        # 2. Handle completed tool call (immediate save)\n        tool_packet = {\n            \"type\": \"tool_call_progress\",\n            \"toolCallId\": \"call_1\",\n            \"status\": \"completed\",\n            \"timestamp\": \"2025-01-01T00:00:00Z\",\n        }\n        create_message(\n            session_id=build_session.id,\n            message_type=MessageType.ASSISTANT,\n            turn_index=0,\n            message_metadata=tool_packet,\n            db_session=db_session,\n        )\n\n        # 3. Stream more agent message chunks\n        state.add_message_chunk(\"Done\")\n        state.add_message_chunk(\" with tool.\")\n\n        # End of stream -> finalize\n        msg_packet = state.finalize_message_chunks()\n        if msg_packet:\n            create_message(\n                session_id=build_session.id,\n                message_type=MessageType.ASSISTANT,\n                turn_index=0,\n                message_metadata=msg_packet,\n                db_session=db_session,\n            )\n\n        # Verify DB state\n        messages = get_session_messages(build_session.id, db_session)\n        # 1 user + 3 assistant = 4 total\n        assert len(messages) == 4\n\n        # Verify types/order\n        assert messages[0].type == MessageType.USER\n\n        assert messages[1].type == MessageType.ASSISTANT\n        assert messages[1].message_metadata[\"content\"][\"text\"] == \"Thinking about it...\"\n\n        assert messages[2].type == MessageType.ASSISTANT\n        assert messages[2].message_metadata[\"type\"] == \"tool_call_progress\"\n\n        assert messages[3].type == MessageType.ASSISTANT\n        assert messages[3].message_metadata[\"content\"][\"text\"] == \"Done with tool.\"\n\n\nclass TestBuildStreamingState:\n    \"\"\"Tests for BuildStreamingState class.\"\"\"\n\n    def test_message_chunk_accumulation(self) -> None:\n        \"\"\"Test accumulating message chunks.\"\"\"\n        state = BuildStreamingState(turn_index=0)\n\n        state.add_message_chunk(\"Hello, \")\n        state.add_message_chunk(\"world!\")\n\n        packet = state.finalize_message_chunks()\n\n        assert packet is not None\n        assert packet[\"type\"] == \"agent_message\"\n        assert packet[\"content\"][\"text\"] == \"Hello, world!\"\n\n        # After finalize, chunks should be cleared\n        assert len(state.message_chunks) == 0\n\n    def test_thought_chunk_accumulation(self) -> None:\n        \"\"\"Test accumulating thought chunks.\"\"\"\n        state = BuildStreamingState(turn_index=0)\n\n        state.add_thought_chunk(\"Thinking about \")\n        state.add_thought_chunk(\"the problem...\")\n\n        packet = state.finalize_thought_chunks()\n\n        assert packet is not None\n        assert packet[\"type\"] == \"agent_thought\"\n        assert packet[\"content\"][\"text\"] == \"Thinking about the problem...\"\n\n    def test_should_finalize_chunks_on_type_change(self) -> None:\n        \"\"\"Test detection of when to finalize chunks.\"\"\"\n        state = BuildStreamingState(turn_index=0)\n\n        # Add message chunk\n        state.add_message_chunk(\"Hello\")\n\n        # Should finalize when receiving non-message packet\n        assert state.should_finalize_chunks(\"tool_call_start\") is True\n        assert state.should_finalize_chunks(\"agent_plan_update\") is True\n        assert state.should_finalize_chunks(\"agent_thought_chunk\") is True\n\n        # Should NOT finalize for same type\n        assert state.should_finalize_chunks(\"agent_message_chunk\") is False\n\n    def test_finalize_returns_none_when_empty(self) -> None:\n        \"\"\"Test that finalize returns None when no chunks accumulated.\"\"\"\n        state = BuildStreamingState(turn_index=0)\n\n        assert state.finalize_message_chunks() is None\n        assert state.finalize_thought_chunks() is None\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/craft/test_file_upload.py",
    "content": "\"\"\"Tests for file upload functionality in build sessions.\n\nTests the file upload and delete operations for pre-provisioned sessions,\nincluding limit enforcement and SandboxManager delegation.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom collections.abc import Generator\nfrom typing import TYPE_CHECKING\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import BuildSessionStatus\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.db.models import BuildSession\nfrom onyx.db.models import Sandbox\nfrom onyx.db.models import User\nfrom onyx.server.features.build.configs import ATTACHMENTS_DIRECTORY\nfrom onyx.server.features.build.configs import MAX_TOTAL_UPLOAD_SIZE_BYTES\nfrom onyx.server.features.build.configs import MAX_UPLOAD_FILES_PER_SESSION\nfrom onyx.server.features.build.session.manager import UploadLimitExceededError\n\nif TYPE_CHECKING:\n    from onyx.server.features.build.session.manager import SessionManager\n\n\n@pytest.fixture(scope=\"function\")\ndef sandbox(\n    db_session: Session,\n    test_user: User,\n    tenant_context: None,  # noqa: ARG001\n) -> Sandbox:\n    \"\"\"Create a test sandbox for the user (sandboxes are per-user, not per-session).\"\"\"\n    sandbox = Sandbox(\n        id=uuid4(),\n        user_id=test_user.id,\n        status=SandboxStatus.RUNNING,\n    )\n    db_session.add(sandbox)\n    db_session.commit()\n    db_session.refresh(sandbox)\n    return sandbox\n\n\n@pytest.fixture(scope=\"function\")\ndef build_session_with_user(\n    db_session: Session,\n    test_user: User,\n    sandbox: Sandbox,  # noqa: ARG001\n    tenant_context: None,  # noqa: ARG001\n) -> BuildSession:\n    \"\"\"Create a test build session for a user who has a sandbox.\"\"\"\n    session = BuildSession(\n        id=uuid4(),\n        user_id=test_user.id,\n        name=\"Test Build Session\",\n        status=BuildSessionStatus.ACTIVE,\n    )\n    db_session.add(session)\n    db_session.commit()\n    db_session.refresh(session)\n    return session\n\n\n@pytest.fixture(scope=\"function\")\ndef mock_sandbox_manager() -> MagicMock:\n    \"\"\"Create a mock sandbox manager.\"\"\"\n    return MagicMock()\n\n\n@pytest.fixture(scope=\"function\")\ndef session_manager_with_mock(\n    db_session: Session, mock_sandbox_manager: MagicMock\n) -> Generator[\"SessionManager\", None, None]:\n    \"\"\"Create a SessionManager with mocked sandbox manager.\"\"\"\n    # Import here to avoid module-level initialization issues\n    with patch(\n        \"onyx.server.features.build.session.manager.get_sandbox_manager\",\n        return_value=mock_sandbox_manager,\n    ):\n        from onyx.server.features.build.session.manager import SessionManager\n\n        manager = SessionManager(db_session)\n        yield manager\n\n\nclass TestFileUpload:\n    \"\"\"Tests for file upload functionality.\"\"\"\n\n    def test_upload_file_delegates_to_sandbox_manager(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that uploading a file delegates to the sandbox manager.\"\"\"\n        # Configure mocks\n        mock_sandbox_manager.get_upload_stats.return_value = (0, 0)\n        mock_sandbox_manager.upload_file.return_value = (\n            f\"{ATTACHMENTS_DIRECTORY}/test.txt\"\n        )\n\n        # Upload a file\n        content = b\"Hello, World!\"\n        relative_path, size = session_manager_with_mock.upload_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            filename=\"test.txt\",\n            content=content,\n        )\n\n        # Verify the sandbox manager was called correctly\n        mock_sandbox_manager.upload_file.assert_called_once_with(\n            sandbox_id=sandbox.id,\n            session_id=build_session_with_user.id,\n            filename=\"test.txt\",\n            content=content,\n        )\n        assert relative_path == f\"{ATTACHMENTS_DIRECTORY}/test.txt\"\n        assert size == len(content)\n\n    def test_upload_file_returns_correct_path(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that upload returns the correct relative path.\"\"\"\n        mock_sandbox_manager.get_upload_stats.return_value = (0, 0)\n        mock_sandbox_manager.upload_file.return_value = (\n            f\"{ATTACHMENTS_DIRECTORY}/document.pdf\"\n        )\n\n        relative_path, size = session_manager_with_mock.upload_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            filename=\"document.pdf\",\n            content=b\"PDF content\",\n        )\n\n        assert relative_path == f\"{ATTACHMENTS_DIRECTORY}/document.pdf\"\n        assert size == 11  # len(\"PDF content\")\n\n    def test_upload_file_session_not_found(\n        self,\n        test_user: User,\n        sandbox: Sandbox,  # noqa: ARG002\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that uploading to a non-existent session raises ValueError.\"\"\"\n        with pytest.raises(ValueError, match=\"Session not found\"):\n            session_manager_with_mock.upload_file(\n                session_id=uuid4(),  # Non-existent session\n                user_id=test_user.id,\n                filename=\"test.txt\",\n                content=b\"content\",\n            )\n\n\nclass TestFileUploadLimits:\n    \"\"\"Tests for file upload limit enforcement.\"\"\"\n\n    def test_upload_file_count_limit_enforced(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that exceeding the file count limit raises an error.\"\"\"\n        # Mock get_upload_stats to return max files already uploaded\n        mock_sandbox_manager.get_upload_stats.return_value = (\n            MAX_UPLOAD_FILES_PER_SESSION,\n            1000,\n        )\n\n        # Try to upload one more file\n        with pytest.raises(UploadLimitExceededError, match=\"Maximum number of files\"):\n            session_manager_with_mock.upload_file(\n                session_id=build_session_with_user.id,\n                user_id=test_user.id,\n                filename=\"one_too_many.txt\",\n                content=b\"content\",\n            )\n\n        # Verify upload_file was NOT called (limit check happens before)\n        mock_sandbox_manager.upload_file.assert_not_called()\n\n    def test_upload_total_size_limit_enforced(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that exceeding the total size limit raises an error.\"\"\"\n        # Mock get_upload_stats to return almost at the limit\n        existing_size = MAX_TOTAL_UPLOAD_SIZE_BYTES - 100  # 100 bytes under limit\n        mock_sandbox_manager.get_upload_stats.return_value = (1, existing_size)\n\n        # Try to upload a file that would exceed the limit\n        with pytest.raises(UploadLimitExceededError, match=\"Total upload size limit\"):\n            session_manager_with_mock.upload_file(\n                session_id=build_session_with_user.id,\n                user_id=test_user.id,\n                filename=\"over_limit.txt\",\n                content=b\"x\" * 200,  # 200 bytes, would exceed by 100\n            )\n\n        # Verify upload_file was NOT called (limit check happens before)\n        mock_sandbox_manager.upload_file.assert_not_called()\n\n    def test_upload_succeeds_when_under_limits(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that upload succeeds when under limits.\"\"\"\n        # Mock get_upload_stats to return well under limits\n        mock_sandbox_manager.get_upload_stats.return_value = (5, 1000)\n        mock_sandbox_manager.upload_file.return_value = (\n            f\"{ATTACHMENTS_DIRECTORY}/test.txt\"\n        )\n\n        relative_path, size = session_manager_with_mock.upload_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            filename=\"test.txt\",\n            content=b\"content\",\n        )\n\n        # Verify upload_file was called\n        mock_sandbox_manager.upload_file.assert_called_once()\n        assert relative_path == f\"{ATTACHMENTS_DIRECTORY}/test.txt\"\n\n\nclass TestFileDelete:\n    \"\"\"Tests for file delete functionality.\"\"\"\n\n    def test_delete_file_delegates_to_sandbox_manager(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that delete file delegates to the sandbox manager.\"\"\"\n        mock_sandbox_manager.delete_file.return_value = True\n\n        result = session_manager_with_mock.delete_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            path=f\"{ATTACHMENTS_DIRECTORY}/test.txt\",\n        )\n\n        assert result is True\n        mock_sandbox_manager.delete_file.assert_called_once_with(\n            sandbox_id=sandbox.id,\n            session_id=build_session_with_user.id,\n            path=f\"{ATTACHMENTS_DIRECTORY}/test.txt\",\n        )\n\n    def test_delete_file_returns_false_when_not_found(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that delete returns False when file doesn't exist.\"\"\"\n        mock_sandbox_manager.delete_file.return_value = False\n\n        result = session_manager_with_mock.delete_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            path=f\"{ATTACHMENTS_DIRECTORY}/nonexistent.txt\",\n        )\n\n        assert result is False\n\n    def test_delete_file_session_not_found(\n        self,\n        test_user: User,\n        sandbox: Sandbox,  # noqa: ARG002\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that deleting from a non-existent session raises ValueError.\"\"\"\n        with pytest.raises(ValueError, match=\"Session not found\"):\n            session_manager_with_mock.delete_file(\n                session_id=uuid4(),  # Non-existent session\n                user_id=test_user.id,\n                path=f\"{ATTACHMENTS_DIRECTORY}/test.txt\",\n            )\n\n\nclass TestPathSanitization:\n    \"\"\"Tests for path sanitization in delete operations.\"\"\"\n\n    def test_delete_file_rejects_path_traversal(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that paths with .. are rejected.\"\"\"\n        # Configure mock to raise ValueError (simulating sandbox manager behavior)\n        mock_sandbox_manager.delete_file.side_effect = ValueError(\n            \"Invalid path: potential path traversal detected\"\n        )\n\n        with pytest.raises(ValueError, match=\"path traversal\"):\n            session_manager_with_mock.delete_file(\n                session_id=build_session_with_user.id,\n                user_id=test_user.id,\n                path=\"../../../etc/passwd\",\n            )\n\n    def test_delete_file_rejects_url_encoded_traversal(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that URL-encoded paths are rejected.\"\"\"\n        mock_sandbox_manager.delete_file.side_effect = ValueError(\n            \"Invalid path: potential path traversal detected\"\n        )\n\n        with pytest.raises(ValueError, match=\"path traversal\"):\n            session_manager_with_mock.delete_file(\n                session_id=build_session_with_user.id,\n                user_id=test_user.id,\n                path=\"attachments/%2e%2e/secret.txt\",\n            )\n\n    def test_delete_file_rejects_shell_metacharacters(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that shell metacharacters are rejected.\"\"\"\n        mock_sandbox_manager.delete_file.side_effect = ValueError(\n            \"Invalid path: contains disallowed characters\"\n        )\n\n        dangerous_paths = [\n            \"attachments/file;rm -rf /\",\n            \"attachments/file|cat /etc/passwd\",\n            \"attachments/file`whoami`\",\n            \"attachments/file$(id)\",\n            \"attachments/file'test\",\n        ]\n\n        for dangerous_path in dangerous_paths:\n            with pytest.raises(ValueError, match=\"disallowed characters\"):\n                session_manager_with_mock.delete_file(\n                    session_id=build_session_with_user.id,\n                    user_id=test_user.id,\n                    path=dangerous_path,\n                )\n            # Reset mock for next iteration\n            mock_sandbox_manager.delete_file.reset_mock()\n\n    def test_delete_file_rejects_null_bytes(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that null bytes in paths are rejected.\"\"\"\n        mock_sandbox_manager.delete_file.side_effect = ValueError(\n            \"Invalid path: potential path traversal detected\"\n        )\n\n        with pytest.raises(ValueError, match=\"path traversal\"):\n            session_manager_with_mock.delete_file(\n                session_id=build_session_with_user.id,\n                user_id=test_user.id,\n                path=\"attachments/file.txt\\x00.jpg\",\n            )\n\n\nclass TestFilenameCollision:\n    \"\"\"Tests for filename collision handling.\"\"\"\n\n    def test_upload_returns_collision_handled_path(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,  # noqa: ARG002\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that sandbox manager can return a renamed path for collisions.\"\"\"\n        # Simulate sandbox manager handling collision by returning renamed path\n        mock_sandbox_manager.get_upload_stats.return_value = (1, 100)  # 1 existing file\n        mock_sandbox_manager.upload_file.return_value = (\n            f\"{ATTACHMENTS_DIRECTORY}/document_1.pdf\"\n        )\n\n        relative_path, size = session_manager_with_mock.upload_file(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n            filename=\"document.pdf\",\n            content=b\"PDF content\",\n        )\n\n        # Verify the collision-handled path is returned\n        assert relative_path == f\"{ATTACHMENTS_DIRECTORY}/document_1.pdf\"\n\n\nclass TestGetUploadStats:\n    \"\"\"Tests for get_upload_stats functionality.\"\"\"\n\n    def test_get_upload_stats_delegates_to_sandbox_manager(\n        self,\n        test_user: User,\n        build_session_with_user: BuildSession,\n        sandbox: Sandbox,\n        mock_sandbox_manager: MagicMock,\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that get_upload_stats delegates to the sandbox manager.\"\"\"\n        mock_sandbox_manager.get_upload_stats.return_value = (3, 1500)\n\n        file_count, total_size = session_manager_with_mock.get_upload_stats(\n            session_id=build_session_with_user.id,\n            user_id=test_user.id,\n        )\n\n        # Verify the sandbox manager was called correctly\n        mock_sandbox_manager.get_upload_stats.assert_called_once_with(\n            sandbox_id=sandbox.id,\n            session_id=build_session_with_user.id,\n        )\n        assert file_count == 3\n        assert total_size == 1500\n\n    def test_get_upload_stats_session_not_found(\n        self,\n        test_user: User,\n        sandbox: Sandbox,  # noqa: ARG002\n        session_manager_with_mock: \"SessionManager\",\n    ) -> None:\n        \"\"\"Test that getting stats for non-existent session raises ValueError.\"\"\"\n        with pytest.raises(ValueError, match=\"Session not found\"):\n            session_manager_with_mock.get_upload_stats(\n                session_id=uuid4(),  # Non-existent session\n                user_id=test_user.id,\n            )\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/craft/test_kubernetes_sandbox.py",
    "content": "\"\"\"Integration test for KubernetesSandboxManager.provision().\n\nThis test requires:\n- A running Kubernetes cluster (kind, minikube, or real cluster)\n- The SANDBOX_BACKEND=kubernetes environment variable\n- The sandbox namespace to exist (default: onyx-sandboxes)\n- Service accounts for sandbox (sandbox-runner, sandbox-file-sync)\n\nRun with:\n    SANDBOX_BACKEND=kubernetes python -m dotenv -f .vscode/.env run -- \\\n        pytest backend/tests/integration/tests/build/test_kubernetes_sandbox_provision.py -v\n\"\"\"\n\nimport time\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport pytest\nfrom kubernetes import client  # type: ignore[import-untyped]\nfrom kubernetes import config\nfrom kubernetes.client.rest import ApiException  # type: ignore[import-untyped]\nfrom kubernetes.stream import stream as k8s_stream  # type: ignore[import-untyped]\n\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.enums import SandboxStatus\nfrom onyx.server.features.build.configs import SANDBOX_BACKEND\nfrom onyx.server.features.build.configs import SANDBOX_NAMESPACE\nfrom onyx.server.features.build.configs import SANDBOX_NEXTJS_PORT_START\nfrom onyx.server.features.build.configs import SandboxBackend\nfrom onyx.server.features.build.sandbox.base import ACPEvent\nfrom onyx.server.features.build.sandbox.kubernetes.kubernetes_sandbox_manager import (\n    KubernetesSandboxManager,\n)\nfrom onyx.server.features.build.sandbox.models import LLMProviderConfig\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\nlogger = setup_logger()\n\n# Test constants\nTEST_TENANT_ID = \"test-tenant\"\nTEST_USER_ID = UUID(\"ee0dd46a-23dc-4128-abab-6712b3f4464c\")\n\n\ndef _is_kubernetes_available() -> None:\n    \"\"\"Check if Kubernetes is available and configured.\"\"\"\n    try:\n        config.load_incluster_config()\n    except config.ConfigException:\n        config.load_kube_config()\n\n    v1 = client.CoreV1Api()\n    # List pods in sandbox namespace instead of namespaces (avoids cluster-scope permissions)\n    v1.list_namespaced_pod(SANDBOX_NAMESPACE, limit=1)\n\n\ndef _get_kubernetes_client() -> client.CoreV1Api:\n    \"\"\"Get a configured Kubernetes CoreV1Api client.\"\"\"\n    try:\n        config.load_incluster_config()\n    except config.ConfigException:\n        config.load_kube_config()\n    return client.CoreV1Api()\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_kubernetes_sandbox_provision() -> None:\n    \"\"\"Test that provision() creates a sandbox pod and DB record successfully.\n\n    This is a happy path test that:\n    1. Creates a BuildSession in the database\n    2. Calls provision() to create a Kubernetes pod\n    3. Verifies the sandbox is created with RUNNING status\n    4. Cleans up by terminating the sandbox\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context (required for multi-tenant operations)\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    # Get the manager instance\n    manager = KubernetesSandboxManager()\n\n    sandbox_id = uuid4()\n\n    # Create a test LLM config (values don't matter for this test)\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    try:\n        # Call provision\n        sandbox_info = manager.provision(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n            llm_config=llm_config,\n        )\n\n        # Verify the return value\n        assert sandbox_info.sandbox_id == sandbox_id\n        assert sandbox_info.status == SandboxStatus.RUNNING\n        assert sandbox_info.directory_path.startswith(\"k8s://\")\n\n        # Verify Kubernetes resources exist\n        k8s_client = _get_kubernetes_client()\n        pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n        service_name = pod_name\n\n        # Verify pod exists and is running\n        pod = k8s_client.read_namespaced_pod(\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n        )\n        assert pod is not None\n        assert pod.status.phase == \"Running\"\n\n        # Verify service exists\n        service = k8s_client.read_namespaced_service(\n            name=service_name,\n            namespace=SANDBOX_NAMESPACE,\n        )\n        assert service is not None\n        assert service.spec.type == \"ClusterIP\"\n\n        # Verify /workspace/templates/outputs directory exists and contains expected files\n        exec_command = [\"/bin/sh\", \"-c\", \"ls -la /workspace/templates/outputs\"]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        print(f\"DEBUG: Contents of /workspace/templates/outputs:\\n{resp}\")\n        assert (\n            \"web\" in resp\n        ), f\"/workspace/templates/outputs should contain web directory. Actual contents:\\n{resp}\"\n\n        # Verify /workspace/templates/outputs/web/AGENTS.md file exists\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            \"cat /workspace/templates/outputs/web/AGENTS.md\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        assert (\n            len(resp) > 0\n        ), \"/workspace/templates/outputs/web/AGENTS.md file should not be empty\"\n        # Verify it contains expected content\n        assert (\n            \"Agent\" in resp or \"Instructions\" in resp or \"#\" in resp\n        ), \"/workspace/templates/outputs/web/AGENTS.md should contain agent instructions\"\n\n        # Verify /workspace/files directory exists and contains expected files\n        exec_command = [\"/bin/sh\", \"-c\", \"find /workspace/files -type f | wc -l\"]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        file_count = int(resp.strip())\n        assert (\n            file_count == 1099\n        ), f\"/workspace/files should contain 1099 files, but found {file_count}\"\n\n        # start session\n        session_id = uuid4()\n        manager.setup_session_workspace(\n            sandbox_id=sandbox_id,\n            session_id=session_id,\n            llm_config=llm_config,\n            nextjs_port=SANDBOX_NEXTJS_PORT_START,\n            file_system_path=None,\n            snapshot_path=None,\n            user_name=\"Test User\",\n            user_role=\"Test Role\",\n        )\n\n        # Verify AGENTS.md file exists for the session\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"cat /workspace/sessions/{session_id}/AGENTS.md\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        assert len(resp) > 0, \"AGENTS.md file should not be empty\"\n        # Verify it contains expected content (from template or default)\n        assert \"Agent\" in resp or \"Instructions\" in resp or \"#\" in resp\n        assert \"Test User\" in resp\n        assert \"Test Role\" in resp\n\n        # Verify opencode.json file exists for the session\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"cat /workspace/sessions/{session_id}/opencode.json\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        assert len(resp) > 0, \"opencode.json file should not be empty\"\n\n        # verify that the outputs directory is copied over\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"ls -la /workspace/sessions/{session_id}/outputs\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None\n        assert len(resp) > 0, \"outputs directory should not be empty\"\n        assert \"web\" in resp, \"outputs directory should contain web directory\"\n\n    finally:\n        # Clean up: terminate the sandbox (no longer needs db_session)\n        if sandbox_id:\n            manager.terminate(sandbox_id)\n\n            # Verify Kubernetes resources are cleaned up\n            k8s_client = _get_kubernetes_client()\n            pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n\n            # Give K8s a moment to delete resources\n            time.sleep(2)\n\n            # Verify pod is deleted (or being deleted)\n            try:\n                pod = k8s_client.read_namespaced_pod(\n                    name=pod_name,\n                    namespace=SANDBOX_NAMESPACE,\n                )\n                # Pod might still exist but be terminating\n                assert pod.metadata.deletion_timestamp is not None\n            except ApiException as e:\n                # 404 means pod was successfully deleted\n                assert e.status == 404\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_kubernetes_sandbox_send_message() -> None:\n    \"\"\"Test that send_message() communicates with the sandbox agent successfully.\n\n    This test:\n    1. Creates a sandbox pod\n    2. Sends a simple message via send_message()\n    3. Verifies we receive ACP events back (agent responses)\n    4. Cleans up by terminating the sandbox\n    \"\"\"\n    from acp.schema import AgentMessageChunk\n    from acp.schema import Error\n    from acp.schema import PromptResponse\n\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context (required for multi-tenant operations)\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    # Get the manager instance\n    manager = KubernetesSandboxManager()\n\n    sandbox_id = uuid4()\n    session_id = uuid4()\n\n    # Create a test LLM config (values don't matter for this test)\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    try:\n        # Provision the sandbox\n        sandbox_info = manager.provision(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n            llm_config=llm_config,\n        )\n\n        assert sandbox_info.status == SandboxStatus.RUNNING\n\n        # Verify health check passes before sending message\n        is_healthy = False\n        for _ in range(10):\n            is_healthy = manager.health_check(sandbox_id)\n            if is_healthy:\n                break\n            time.sleep(10)\n\n        assert is_healthy, \"Sandbox agent should be healthy before sending messages\"\n        print(\"DEBUG: Sandbox agent is healthy\")\n\n        manager.setup_session_workspace(\n            sandbox_id, session_id, llm_config, nextjs_port=SANDBOX_NEXTJS_PORT_START\n        )\n\n        # Send a simple message\n        events: list[ACPEvent] = []\n        for event in manager.send_message(sandbox_id, session_id, \"What is 2 + 2?\"):\n            events.append(event)\n\n        # Verify we received events\n        assert len(events) > 0, \"Should receive at least one event from send_message\"\n\n        for event in events:\n            print(f\"Recieved event: {event}\")\n\n        # Check for errors\n        errors = [e for e in events if isinstance(e, Error)]\n        assert len(errors) == 0, f\"Should not receive errors: {errors}\"\n\n        # Verify we received some agent message content or a final response\n        message_chunks = [e for e in events if isinstance(e, AgentMessageChunk)]\n        prompt_responses = [e for e in events if isinstance(e, PromptResponse)]\n\n        assert (\n            len(message_chunks) > 0 or len(prompt_responses) > 0\n        ), \"Should receive either AgentMessageChunk or PromptResponse events\"\n\n        # If we got a PromptResponse, verify it completed successfully\n        if prompt_responses:\n            final_response = prompt_responses[-1]\n            assert (\n                final_response.stop_reason is not None\n            ), \"PromptResponse should have a stop_reason\"\n\n    finally:\n        # Clean up: terminate the sandbox\n        if sandbox_id:\n            manager.terminate(sandbox_id)\n\n            # Verify Kubernetes resources are cleaned up\n            k8s_client = _get_kubernetes_client()\n            pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n\n            # Give K8s a moment to delete resources\n            time.sleep(2)\n\n            # Verify pod is deleted (or being deleted)\n            try:\n                pod = k8s_client.read_namespaced_pod(\n                    name=pod_name,\n                    namespace=SANDBOX_NAMESPACE,\n                )\n                # Pod might still exist but be terminating\n                assert pod.metadata.deletion_timestamp is not None\n            except ApiException as e:\n                # 404 means pod was successfully deleted\n                assert e.status == 404\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_kubernetes_sandbox_webapp_passthrough() -> None:\n    \"\"\"Test that the webapp passthrough (Next.js server) is accessible in the sandbox.\n\n    This test:\n    1. Creates a sandbox pod\n    2. Sets up a session workspace\n    3. Verifies the Next.js server is running and accessible within the pod\n    4. Verifies get_nextjs_url returns the correct cluster URL format\n    5. Cleans up by terminating the sandbox\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context (required for multi-tenant operations)\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    # Get the manager instance\n    manager = KubernetesSandboxManager()\n\n    sandbox_id = uuid4()\n    session_id = uuid4()\n\n    # Create a test LLM config\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    try:\n        # Provision the sandbox\n        sandbox_info = manager.provision(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n            llm_config=llm_config,\n        )\n\n        assert sandbox_info.status == SandboxStatus.RUNNING\n\n        # Verify health check passes before testing webapp\n        is_healthy = False\n        for _ in range(10):\n            is_healthy = manager.health_check(sandbox_id)\n            if is_healthy:\n                break\n            time.sleep(10)\n\n        assert is_healthy, \"Sandbox should be healthy before testing webapp passthrough\"\n        print(\"DEBUG: Sandbox is healthy\")\n\n        # Set up session workspace\n        manager.setup_session_workspace(\n            sandbox_id=sandbox_id,\n            session_id=session_id,\n            llm_config=llm_config,\n            nextjs_port=SANDBOX_NEXTJS_PORT_START,\n            file_system_path=None,\n            snapshot_path=None,\n            user_name=\"Test User\",\n            user_role=\"Test Role\",\n        )\n\n        # Get Kubernetes client for exec operations\n        k8s_client = _get_kubernetes_client()\n        pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n\n        # Wait for Next.js server to be ready (it may take a few seconds to start)\n        # The session uses the first port in the configured range\n        test_nextjs_port = SANDBOX_NEXTJS_PORT_START\n        nextjs_ready = False\n        for attempt in range(30):\n            exec_command = [\n                \"/bin/sh\",\n                \"-c\",\n                (\n                    f\"curl -s -o /dev/null -w '%{{http_code}}' http://localhost:{test_nextjs_port}/ 2>/dev/null || echo 'failed'\"\n                ),\n            ]\n            resp = k8s_stream(\n                k8s_client.connect_get_namespaced_pod_exec,\n                name=pod_name,\n                namespace=SANDBOX_NAMESPACE,\n                container=\"sandbox\",\n                command=exec_command,\n                stderr=True,\n                stdin=False,\n                stdout=True,\n                tty=False,\n            )\n            print(f\"DEBUG: Next.js health check attempt {attempt + 1}: {resp}\")\n            if resp and resp.strip() in (\"200\", \"304\"):\n                nextjs_ready = True\n                break\n            time.sleep(2)\n\n        assert (\n            nextjs_ready\n        ), f\"Next.js server should be accessible at localhost:{SANDBOX_NEXTJS_PORT_START}\"\n        print(\"DEBUG: Next.js server is ready\")\n\n        # Verify we can fetch actual content from the Next.js server\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"curl -s http://localhost:{SANDBOX_NEXTJS_PORT_START}/ | head -c 500\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None, \"Should receive content from Next.js server\"\n        assert len(resp) > 0, \"Next.js server response should not be empty\"\n        # Basic check that it looks like HTML\n        assert (\n            \"<\" in resp or \"html\" in resp.lower() or \"<!doctype\" in resp.lower()\n        ), f\"Response should be HTML content. Got: {resp[:200]}\"\n        print(f\"DEBUG: Next.js server returned content (first 200 chars): {resp[:200]}\")\n\n        # Verify get_nextjs_url returns correctly formatted cluster URL\n        nextjs_url = manager.get_webapp_url(sandbox_id, test_nextjs_port)\n        expected_service_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n        expected_url_pattern = (\n            f\"http://{expected_service_name}.{SANDBOX_NAMESPACE}.svc.cluster.local:\"\n        )\n        assert nextjs_url.startswith(\n            expected_url_pattern\n        ), f\"Next.js URL should follow cluster service format. Expected to start with: {expected_url_pattern}, Got: {nextjs_url}\"\n        assert (\n            str(SANDBOX_NEXTJS_PORT_START) in nextjs_url\n        ), f\"Next.js URL should contain port {SANDBOX_NEXTJS_PORT_START}. Got: {nextjs_url}\"\n        print(f\"DEBUG: get_nextjs_url returned: {nextjs_url}\")\n\n        # Verify the service is accessible via the cluster URL from within the pod\n        exec_command = [\n            \"/bin/sh\",\n            \"-c\",\n            f\"curl -s -o /dev/null -w '%{{http_code}}' {nextjs_url}/ 2>/dev/null || echo 'failed'\",\n        ]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        print(f\"DEBUG: Cluster URL health check response: {resp}\")\n        assert resp and resp.strip() in (\n            \"200\",\n            \"304\",\n        ), f\"Next.js server should be accessible via cluster URL {nextjs_url}. Got response: {resp}\"\n\n    finally:\n        # Clean up: terminate the sandbox\n        if sandbox_id:\n            manager.terminate(sandbox_id)\n\n            # Verify Kubernetes resources are cleaned up\n            k8s_client = _get_kubernetes_client()\n            pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n\n            # Give K8s a moment to delete resources\n            time.sleep(2)\n\n            # Verify pod is deleted (or being deleted)\n            try:\n                pod = k8s_client.read_namespaced_pod(\n                    name=pod_name,\n                    namespace=SANDBOX_NAMESPACE,\n                )\n                # Pod might still exist but be terminating\n                assert pod.metadata.deletion_timestamp is not None\n            except ApiException as e:\n                # 404 means pod was successfully deleted\n                assert e.status == 404\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_kubernetes_sandbox_file_sync() -> None:\n    \"\"\"Test that sync_files() triggers S3 sync in the file-sync sidecar.\n\n    This test:\n    1. Creates a sandbox pod (which now has file-sync as sidecar)\n    2. Verifies the file-sync sidecar is running\n    3. Calls sync_files() to trigger S3 sync\n    4. Verifies the sync command executes successfully\n    5. Cleans up by terminating the sandbox\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context (required for multi-tenant operations)\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    # Get the manager instance\n    manager = KubernetesSandboxManager()\n\n    sandbox_id = uuid4()\n\n    # Create a test LLM config\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    try:\n        # Provision the sandbox\n        sandbox_info = manager.provision(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n            llm_config=llm_config,\n        )\n\n        assert sandbox_info.status == SandboxStatus.RUNNING\n\n        # Verify the pod is running\n        k8s_client = _get_kubernetes_client()\n        pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n        pod = k8s_client.read_namespaced_pod(\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n        )\n        assert pod is not None\n        assert pod.status.phase == \"Running\"\n\n        # Verify file-sync sidecar container is running\n        # With sidecar model, file-sync should be a regular container (not init)\n        container_statuses = pod.status.container_statuses or []\n        file_sync_status = next(\n            (c for c in container_statuses if c.name == \"file-sync\"),\n            None,\n        )\n        assert file_sync_status is not None, \"file-sync sidecar container should exist\"\n        assert file_sync_status.ready, \"file-sync sidecar container should be ready\"\n        print(f\"DEBUG: file-sync container status: {file_sync_status}\")\n\n        # Wipe the /workspace/files directory to ensure files we find are from the sync\n        exec_command = [\"/bin/sh\", \"-c\", \"rm -rf /workspace/files/*\"]\n        k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"file-sync\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        print(\"DEBUG: Wiped /workspace/files directory\")\n\n        # Verify the directory is empty\n        exec_command = [\"/bin/sh\", \"-c\", \"find /workspace/files -type f | wc -l\"]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        file_count = int(resp.strip()) if resp else 0\n        assert (\n            file_count == 0\n        ), f\"/workspace/files should be empty before sync, found {file_count} files\"\n        print(\"DEBUG: Verified /workspace/files is empty\")\n\n        # Call sync_files() to trigger S3 sync\n        result = manager.sync_files(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n        )\n        assert result is True, \"sync_files() should return True on success\"\n        print(\"DEBUG: sync_files() completed successfully\")\n\n        # Verify /workspace/files exists and has files synced from S3\n        # (verifies the shared volume is working and sync actually transferred files)\n        exec_command = [\"/bin/sh\", \"-c\", \"find /workspace/files -type f | wc -l\"]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"sandbox\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None, \"/workspace/files should be accessible from sandbox\"\n        file_count = int(resp.strip()) if resp else 0\n        assert (\n            file_count > 0\n        ), f\"sync_files() should have synced files, but found {file_count} files\"\n        print(f\"DEBUG: sync_files() synced {file_count} files to /workspace/files\")\n\n        # Also verify we can exec into file-sync sidecar directly\n        exec_command = [\"/bin/sh\", \"-c\", \"ls -la /workspace/files\"]\n        resp = k8s_stream(\n            k8s_client.connect_get_namespaced_pod_exec,\n            name=pod_name,\n            namespace=SANDBOX_NAMESPACE,\n            container=\"file-sync\",\n            command=exec_command,\n            stderr=True,\n            stdin=False,\n            stdout=True,\n            tty=False,\n        )\n        assert resp is not None, \"/workspace/files should be accessible from file-sync\"\n        print(f\"DEBUG: Contents of /workspace/files (from file-sync sidecar):\\n{resp}\")\n\n    finally:\n        # Clean up: terminate the sandbox\n        if sandbox_id:\n            manager.terminate(sandbox_id)\n\n            # Verify Kubernetes resources are cleaned up\n            k8s_client = _get_kubernetes_client()\n            pod_name = f\"sandbox-{str(sandbox_id)[:8]}\"\n\n            # Give K8s a moment to delete resources\n            time.sleep(2)\n\n            # Verify pod is deleted (or being deleted)\n            try:\n                pod = k8s_client.read_namespaced_pod(\n                    name=pod_name,\n                    namespace=SANDBOX_NAMESPACE,\n                )\n                # Pod might still exist but be terminating\n                assert pod.metadata.deletion_timestamp is not None\n            except ApiException as e:\n                # 404 means pod was successfully deleted\n                assert e.status == 404\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_health_check_returns_true_for_running_pod() -> None:\n    \"\"\"Test that health_check() returns True for a healthy, running pod.\n\n    This test:\n    1. Creates a sandbox pod\n    2. Calls health_check() and verifies it returns True\n    3. Cleans up by terminating the sandbox\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    manager = KubernetesSandboxManager()\n    sandbox_id = uuid4()\n\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    try:\n        # Provision the sandbox\n        sandbox_info = manager.provision(\n            sandbox_id=sandbox_id,\n            user_id=TEST_USER_ID,\n            tenant_id=TEST_TENANT_ID,\n            llm_config=llm_config,\n        )\n\n        assert sandbox_info.status == SandboxStatus.RUNNING\n\n        # Wait for pod to be fully healthy (it may take a few seconds)\n        is_healthy = False\n        for _ in range(10):\n            is_healthy = manager.health_check(sandbox_id, timeout=5.0)\n            if is_healthy:\n                break\n            time.sleep(2)\n\n        assert (\n            is_healthy\n        ), \"health_check() should return True for a running, healthy pod\"\n\n    finally:\n        if sandbox_id:\n            manager.terminate(sandbox_id)\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_health_check_returns_false_for_missing_pod() -> None:\n    \"\"\"Test that health_check() returns False when the pod doesn't exist.\n\n    This test:\n    1. Uses a random UUID that has no corresponding pod\n    2. Calls health_check() and verifies it returns False\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    manager = KubernetesSandboxManager()\n\n    # Use a random UUID that definitely has no pod\n    nonexistent_sandbox_id = uuid4()\n\n    # health_check should return False for non-existent pod\n    is_healthy = manager.health_check(nonexistent_sandbox_id, timeout=5.0)\n\n    assert not is_healthy, \"health_check() should return False for a non-existent pod\"\n\n\n@pytest.mark.skipif(\n    SANDBOX_BACKEND != SandboxBackend.KUBERNETES,\n    reason=\"SANDBOX_BACKEND must be 'kubernetes' to run this test\",\n)\ndef test_health_check_returns_false_after_termination() -> None:\n    \"\"\"Test that health_check() returns False after a pod has been terminated.\n\n    This test:\n    1. Creates a sandbox pod\n    2. Verifies health_check() returns True\n    3. Terminates the sandbox\n    4. Verifies health_check() returns False\n    \"\"\"\n    _is_kubernetes_available()\n\n    # Initialize the database engine\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Set up tenant context\n    CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n\n    manager = KubernetesSandboxManager()\n    sandbox_id = uuid4()\n\n    llm_config = LLMProviderConfig(\n        provider=\"openai\",\n        model_name=\"gpt-4\",\n        api_key=\"test-key\",\n        api_base=None,\n    )\n\n    # Provision the sandbox\n    sandbox_info = manager.provision(\n        sandbox_id=sandbox_id,\n        user_id=TEST_USER_ID,\n        tenant_id=TEST_TENANT_ID,\n        llm_config=llm_config,\n    )\n\n    assert sandbox_info.status == SandboxStatus.RUNNING\n\n    # Wait for pod to be fully healthy\n    is_healthy = False\n    for _ in range(10):\n        is_healthy = manager.health_check(sandbox_id, timeout=5.0)\n        if is_healthy:\n            break\n        time.sleep(2)\n\n    assert is_healthy, \"Pod should be healthy before termination\"\n\n    # Terminate the sandbox\n    manager.terminate(sandbox_id)\n\n    # Wait for pod to be deleted\n    time.sleep(3)\n\n    # health_check should now return False\n    is_healthy_after = manager.health_check(sandbox_id, timeout=5.0)\n\n    assert (\n        not is_healthy_after\n    ), \"health_check() should return False after pod has been terminated\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/craft/test_persistent_document_writer.py",
    "content": "\"\"\"\nTests for PersistentDocumentWriter (local) and S3PersistentDocumentWriter.\n\nRun with:\n    python -m dotenv -f .vscode/.env run -- \\\n        pytest backend/tests/external_dependency_unit/craft/test_persistent_document_writer.py -v\n\"\"\"\n\nimport json\nimport os\nimport tempfile\nfrom datetime import datetime\nfrom datetime import timezone\nfrom uuid import uuid4\n\nimport boto3\nimport pytest\nfrom botocore.exceptions import ClientError\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.server.features.build.configs import SANDBOX_S3_BUCKET\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    PersistentDocumentWriter,\n)\nfrom onyx.server.features.build.indexing.persistent_document_writer import (\n    S3PersistentDocumentWriter,\n)\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\ndef _create_test_document(doc_id: str, name: str) -> Document:\n    \"\"\"Helper to create a test document.\"\"\"\n    return Document(\n        id=doc_id,\n        semantic_identifier=name,\n        title=name,\n        source=DocumentSource.WEB,\n        sections=[TextSection(text=\"Test content\", link=\"https://example.com\")],\n        metadata={},\n        doc_metadata={\"hierarchy\": {\"source_path\": [\"Folder\"]}},\n        doc_updated_at=datetime.now(timezone.utc),\n        primary_owners=[],\n        secondary_owners=[],\n    )\n\n\ndef test_local_persistent_document_writer() -> None:\n    \"\"\"Test writing documents to local filesystem.\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        tenant_id = TEST_TENANT_ID\n        user_id = str(uuid4())\n        writer = PersistentDocumentWriter(\n            base_path=temp_dir, tenant_id=tenant_id, user_id=user_id\n        )\n\n        doc = _create_test_document(\"doc-001\", \"Test Document\")\n        written_paths = writer.write_documents([doc])\n\n        assert len(written_paths) == 1\n        assert written_paths[0] == os.path.join(\n            temp_dir,\n            tenant_id,\n            \"knowledge\",\n            user_id,\n            \"web\",\n            \"Folder\",\n            \"Test_Document.json\",\n        )\n        assert os.path.exists(written_paths[0])\n\n        with open(written_paths[0]) as f:\n            content = json.load(f)\n        assert content[\"id\"] == \"doc-001\"\n        assert content[\"semantic_identifier\"] == \"Test Document\"\n\n\ndef _is_s3_available() -> bool:\n    \"\"\"Check if S3 is available for testing.\"\"\"\n    try:\n        s3_client = boto3.client(\"s3\")\n        s3_client.head_bucket(Bucket=SANDBOX_S3_BUCKET)\n        return True\n    except (ClientError, Exception):\n        return False\n\n\n@pytest.mark.skipif(\n    not _is_s3_available(),\n    reason=f\"S3 bucket '{SANDBOX_S3_BUCKET}' not available\",\n)\ndef test_s3_persistent_document_writer() -> None:\n    \"\"\"Test writing documents to S3.\"\"\"\n    user_id = str(uuid4())\n    writer = S3PersistentDocumentWriter(tenant_id=TEST_TENANT_ID, user_id=user_id)\n\n    doc = _create_test_document(\"s3-doc-001\", \"S3 Test Doc\")\n    written_keys = writer.write_documents([doc])\n\n    try:\n        assert len(written_keys) == 1\n        assert f\"{TEST_TENANT_ID}/knowledge/{user_id}\" in written_keys[0]\n\n        # Verify the object exists in S3\n        s3_client = boto3.client(\"s3\")\n        response = s3_client.get_object(Bucket=SANDBOX_S3_BUCKET, Key=written_keys[0])\n        content = json.loads(response[\"Body\"].read().decode(\"utf-8\"))\n\n        assert content[\"id\"] == \"s3-doc-001\"\n        assert content[\"semantic_identifier\"] == \"S3 Test Doc\"\n    finally:\n        # Cleanup\n        s3_client = boto3.client(\"s3\")\n        try:\n            s3_client.delete_object(Bucket=SANDBOX_S3_BUCKET, Key=written_keys[0])\n        except Exception:\n            pass\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/external_dependency_unit/db/conftest.py",
    "content": "\"\"\"Fixtures for testing DAL classes against a real PostgreSQL database.\n\nThese fixtures build on the db_session and tenant_context fixtures from\nthe parent conftest (tests/external_dependency_unit/conftest.py).\n\nRequires a running Postgres instance. Run with::\n\n    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/db/\n\"\"\"\n\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import UserGroup\n\n\n@pytest.fixture\ndef scim_dal(db_session: Session) -> ScimDAL:\n    \"\"\"A ScimDAL backed by the real test database session.\"\"\"\n    return ScimDAL(db_session)\n\n\n@pytest.fixture\ndef scim_token_factory(\n    db_session: Session,\n) -> Generator[Callable[..., ScimToken], None, None]:\n    \"\"\"Factory that creates ScimToken rows and cleans them up after the test.\"\"\"\n    created_ids: list[int] = []\n\n    def _create(\n        name: str = \"test-token\",\n        hashed_token: str | None = None,\n        token_display: str = \"onyx_scim_****test\",\n        created_by_id: UUID | None = None,\n    ) -> ScimToken:\n        token = ScimToken(\n            name=name,\n            hashed_token=hashed_token or uuid4().hex,\n            token_display=token_display,\n            created_by_id=created_by_id or uuid4(),\n        )\n        db_session.add(token)\n        db_session.flush()\n        created_ids.append(token.id)\n        return token\n\n    yield _create\n\n    for token_id in created_ids:\n        obj = db_session.get(ScimToken, token_id)\n        if obj:\n            db_session.delete(obj)\n    db_session.commit()\n\n\n@pytest.fixture\ndef user_group_factory(\n    db_session: Session,\n) -> Generator[Callable[..., UserGroup], None, None]:\n    \"\"\"Factory that creates UserGroup rows for testing group mappings.\"\"\"\n    created_ids: list[int] = []\n\n    def _create(name: str | None = None) -> UserGroup:\n        group = UserGroup(name=name or f\"test-group-{uuid4().hex[:8]}\")\n        db_session.add(group)\n        db_session.flush()\n        created_ids.append(group.id)\n        return group\n\n    yield _create\n\n    for group_id in created_ids:\n        obj = db_session.get(UserGroup, group_id)\n        if obj:\n            db_session.delete(obj)\n    db_session.commit()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/test_chat_session_eager_load.py",
    "content": "from sqlalchemy import inspect\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.chat import create_chat_session\nfrom onyx.db.chat import get_chat_session_by_id\nfrom onyx.db.models import Persona\nfrom onyx.db.models import UserProject\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef test_eager_load_persona_loads_relationships(db_session: Session) -> None:\n    \"\"\"Verify that eager_load_persona pre-loads persona, its collections, and project.\"\"\"\n    user = create_test_user(db_session, \"eager-load\")\n    persona = Persona(name=\"eager-load-test\", description=\"test\")\n    project = UserProject(name=\"eager-load-project\", user_id=user.id)\n    db_session.add_all([persona, project])\n    db_session.flush()\n\n    chat_session = create_chat_session(\n        db_session=db_session,\n        description=\"test\",\n        user_id=None,\n        persona_id=persona.id,\n        project_id=project.id,\n    )\n\n    loaded = get_chat_session_by_id(\n        chat_session_id=chat_session.id,\n        user_id=None,\n        db_session=db_session,\n        eager_load_persona=True,\n    )\n\n    try:\n        tmp = inspect(loaded)\n        assert tmp is not None\n        unloaded = tmp.unloaded\n        assert \"persona\" not in unloaded\n        assert \"project\" not in unloaded\n\n        tmp = inspect(loaded.persona)\n        assert tmp is not None\n        persona_unloaded = tmp.unloaded\n        assert \"tools\" not in persona_unloaded\n        assert \"user_files\" not in persona_unloaded\n        assert \"document_sets\" not in persona_unloaded\n        assert \"attached_documents\" not in persona_unloaded\n        assert \"hierarchy_nodes\" not in persona_unloaded\n    finally:\n        db_session.rollback()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/test_credential_sensitive_value.py",
    "content": "\"\"\"Test that Credential with nested JSON round-trips through SensitiveValue correctly.\n\nExercises the full encrypt → store → read → decrypt → SensitiveValue path\nwith realistic nested OAuth credential data, and verifies SQLAlchemy dirty\ntracking works with nested dict comparison.\n\nRequires a running Postgres instance.\n\"\"\"\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import Credential\nfrom onyx.utils.sensitive import SensitiveValue\n\n# NOTE: this is not the real shape of a Drive credential,\n# but it is intended to test nested JSON credential handling\n\n_NESTED_CRED_JSON = {\n    \"oauth_tokens\": {\n        \"access_token\": \"ya29.abc123\",\n        \"refresh_token\": \"1//xEg-def456\",\n    },\n    \"scopes\": [\"read\", \"write\", \"admin\"],\n    \"client_config\": {\n        \"client_id\": \"123.apps.googleusercontent.com\",\n        \"client_secret\": \"GOCSPX-secret\",\n    },\n}\n\n\ndef test_nested_credential_json_round_trip(db_session: Session) -> None:\n    \"\"\"Nested OAuth credential survives encrypt → store → read → decrypt.\"\"\"\n    credential = Credential(\n        source=DocumentSource.GOOGLE_DRIVE,\n        credential_json=_NESTED_CRED_JSON,\n    )\n    db_session.add(credential)\n    db_session.flush()\n\n    # Immediate read (no DB round-trip) — tests the set event wrapping\n    assert isinstance(credential.credential_json, SensitiveValue)\n    assert credential.credential_json.get_value(apply_mask=False) == _NESTED_CRED_JSON\n\n    # DB round-trip — tests process_result_value\n    db_session.expire(credential)\n    reloaded = credential.credential_json\n    assert isinstance(reloaded, SensitiveValue)\n    assert reloaded.get_value(apply_mask=False) == _NESTED_CRED_JSON\n\n    db_session.rollback()\n\n\ndef test_reassign_same_nested_json_not_dirty(db_session: Session) -> None:\n    \"\"\"Re-assigning the same nested dict should not mark the session dirty.\"\"\"\n    credential = Credential(\n        source=DocumentSource.GOOGLE_DRIVE,\n        credential_json=_NESTED_CRED_JSON,\n    )\n    db_session.add(credential)\n    db_session.flush()\n\n    # Clear dirty state from the insert\n    db_session.expire(credential)\n    _ = credential.credential_json  # force reload\n\n    # Re-assign identical value\n    credential.credential_json = _NESTED_CRED_JSON  # type: ignore[assignment]\n    assert not db_session.is_modified(credential)\n\n    db_session.rollback()\n\n\ndef test_assign_different_nested_json_is_dirty(db_session: Session) -> None:\n    \"\"\"Assigning a different nested dict should mark the session dirty.\"\"\"\n    credential = Credential(\n        source=DocumentSource.GOOGLE_DRIVE,\n        credential_json=_NESTED_CRED_JSON,\n    )\n    db_session.add(credential)\n    db_session.flush()\n\n    db_session.expire(credential)\n    _ = credential.credential_json  # force reload\n\n    modified_cred = {**_NESTED_CRED_JSON, \"scopes\": [\"read\"]}\n    credential.credential_json = modified_cred  # type: ignore[assignment]\n    assert db_session.is_modified(credential)\n\n    db_session.rollback()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/test_rotate_encryption_key.py",
    "content": "\"\"\"Tests for rotate_encryption_key against real Postgres.\n\nUses real ORM models (Credential, InternetSearchProvider) and the actual\nPostgres database. Discovery is mocked in rotation tests to scope mutations\nto only the test rows — the real _discover_encrypted_columns walk is tested\nseparately in TestDiscoverEncryptedColumns.\n\nRequires a running Postgres instance. Run with::\n\n    python -m dotenv -f .vscode/.env run -- pytest tests/external_dependency_unit/db/test_rotate_encryption_key.py\n\"\"\"\n\nimport json\nfrom collections.abc import Generator\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy import LargeBinary\nfrom sqlalchemy import select\nfrom sqlalchemy import text\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.utils.encryption import _decrypt_bytes\nfrom ee.onyx.utils.encryption import _encrypt_string\nfrom ee.onyx.utils.encryption import _get_trimmed_key\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.models import Credential\nfrom onyx.db.models import EncryptedJson\nfrom onyx.db.models import EncryptedString\nfrom onyx.db.models import InternetSearchProvider\nfrom onyx.db.rotate_encryption_key import _discover_encrypted_columns\nfrom onyx.db.rotate_encryption_key import rotate_encryption_key\nfrom onyx.utils.variable_functionality import fetch_versioned_implementation\nfrom onyx.utils.variable_functionality import global_version\n\nEE_MODULE = \"ee.onyx.utils.encryption\"\nROTATE_MODULE = \"onyx.db.rotate_encryption_key\"\n\nOLD_KEY = \"o\" * 16\nNEW_KEY = \"n\" * 16\n\n\n@pytest.fixture(autouse=True)\ndef _enable_ee() -> Generator[None, None, None]:\n    prev = global_version._is_ee\n    global_version.set_ee()\n    fetch_versioned_implementation.cache_clear()\n    yield\n    global_version._is_ee = prev\n    fetch_versioned_implementation.cache_clear()\n\n\n@pytest.fixture(autouse=True)\ndef _clear_key_cache() -> None:\n    _get_trimmed_key.cache_clear()\n\n\ndef _raw_credential_bytes(db_session: Session, credential_id: int) -> bytes | None:\n    \"\"\"Read raw bytes from credential_json, bypassing the TypeDecorator.\"\"\"\n    col = Credential.__table__.c.credential_json\n    stmt = select(col.cast(LargeBinary)).where(\n        Credential.__table__.c.id == credential_id\n    )\n    return db_session.execute(stmt).scalar()\n\n\ndef _raw_isp_bytes(db_session: Session, isp_id: int) -> bytes | None:\n    \"\"\"Read raw bytes from InternetSearchProvider.api_key.\"\"\"\n    col = InternetSearchProvider.__table__.c.api_key\n    stmt = select(col.cast(LargeBinary)).where(\n        InternetSearchProvider.__table__.c.id == isp_id\n    )\n    return db_session.execute(stmt).scalar()\n\n\nclass TestDiscoverEncryptedColumns:\n    \"\"\"Verify _discover_encrypted_columns finds real production models.\"\"\"\n\n    def test_discovers_credential_json(self) -> None:\n        results = _discover_encrypted_columns()\n        found = {\n            (model_cls.__tablename__, col_name, is_json)  # type: ignore[attr-defined]\n            for model_cls, col_name, _, is_json in results\n        }\n        assert (\"credential\", \"credential_json\", True) in found\n\n    def test_discovers_internet_search_provider_api_key(self) -> None:\n        results = _discover_encrypted_columns()\n        found = {\n            (model_cls.__tablename__, col_name, is_json)  # type: ignore[attr-defined]\n            for model_cls, col_name, _, is_json in results\n        }\n        assert (\"internet_search_provider\", \"api_key\", False) in found\n\n    def test_all_encrypted_string_columns_are_not_json(self) -> None:\n        results = _discover_encrypted_columns()\n        for model_cls, col_name, _, is_json in results:\n            col = getattr(model_cls, col_name).property.columns[0]\n            if isinstance(col.type, EncryptedString):\n                assert not is_json, (\n                    f\"{model_cls.__tablename__}.{col_name} is EncryptedString \"  # type: ignore[attr-defined]\n                    f\"but is_json={is_json}\"\n                )\n\n    def test_all_encrypted_json_columns_are_json(self) -> None:\n        results = _discover_encrypted_columns()\n        for model_cls, col_name, _, is_json in results:\n            col = getattr(model_cls, col_name).property.columns[0]\n            if isinstance(col.type, EncryptedJson):\n                assert is_json, (\n                    f\"{model_cls.__tablename__}.{col_name} is EncryptedJson \"  # type: ignore[attr-defined]\n                    f\"but is_json={is_json}\"\n                )\n\n\nclass TestRotateCredential:\n    \"\"\"Test rotation against the real Credential table (EncryptedJson).\n\n    Discovery is scoped to only the Credential model to avoid mutating\n    other tables in the test database.\n    \"\"\"\n\n    @pytest.fixture(autouse=True)\n    def _limit_discovery(self) -> Generator[None, None, None]:\n        with patch(\n            f\"{ROTATE_MODULE}._discover_encrypted_columns\",\n            return_value=[(Credential, \"credential_json\", [\"id\"], True)],\n        ):\n            yield\n\n    @pytest.fixture()\n    def credential_id(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> Generator[int, None, None]:\n        \"\"\"Insert a Credential row with raw encrypted bytes, clean up after.\"\"\"\n        config = {\"api_key\": \"sk-test-1234\", \"endpoint\": \"https://example.com\"}\n        encrypted = _encrypt_string(json.dumps(config), key=OLD_KEY)\n\n        result = db_session.execute(\n            text(\n                \"INSERT INTO credential \"\n                \"(source, credential_json, admin_public, curator_public) \"\n                \"VALUES (:source, :cred_json, true, false) \"\n                \"RETURNING id\"\n            ),\n            {\"source\": DocumentSource.INGESTION_API.value, \"cred_json\": encrypted},\n        )\n        cred_id = result.scalar_one()\n        db_session.commit()\n\n        yield cred_id\n\n        db_session.execute(\n            text(\"DELETE FROM credential WHERE id = :id\"), {\"id\": cred_id}\n        )\n        db_session.commit()\n\n    def test_rotates_credential_json(\n        self, db_session: Session, credential_id: int\n    ) -> None:\n        with (\n            patch(f\"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n            patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n        ):\n            totals = rotate_encryption_key(db_session, old_key=OLD_KEY)\n\n        assert totals.get(\"credential.credential_json\", 0) >= 1\n\n        raw = _raw_credential_bytes(db_session, credential_id)\n        assert raw is not None\n        decrypted = json.loads(_decrypt_bytes(raw, key=NEW_KEY))\n        assert decrypted[\"api_key\"] == \"sk-test-1234\"\n        assert decrypted[\"endpoint\"] == \"https://example.com\"\n\n    def test_skips_already_rotated(\n        self, db_session: Session, credential_id: int\n    ) -> None:\n        with (\n            patch(f\"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n            patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n        ):\n            rotate_encryption_key(db_session, old_key=OLD_KEY)\n            _ = rotate_encryption_key(db_session, old_key=OLD_KEY)\n\n        raw = _raw_credential_bytes(db_session, credential_id)\n        assert raw is not None\n        decrypted = json.loads(_decrypt_bytes(raw, key=NEW_KEY))\n        assert decrypted[\"api_key\"] == \"sk-test-1234\"\n\n    def test_dry_run_does_not_modify(\n        self, db_session: Session, credential_id: int\n    ) -> None:\n        original = _raw_credential_bytes(db_session, credential_id)\n\n        with (\n            patch(f\"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n            patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n        ):\n            totals = rotate_encryption_key(db_session, old_key=OLD_KEY, dry_run=True)\n\n        assert totals.get(\"credential.credential_json\", 0) >= 1\n\n        raw_after = _raw_credential_bytes(db_session, credential_id)\n        assert raw_after == original\n\n\nclass TestRotateInternetSearchProvider:\n    \"\"\"Test rotation against the real InternetSearchProvider table (EncryptedString).\n\n    Discovery is scoped to only the InternetSearchProvider model to avoid\n    mutating other tables in the test database.\n    \"\"\"\n\n    @pytest.fixture(autouse=True)\n    def _limit_discovery(self) -> Generator[None, None, None]:\n        with patch(\n            f\"{ROTATE_MODULE}._discover_encrypted_columns\",\n            return_value=[\n                (InternetSearchProvider, \"api_key\", [\"id\"], False),\n            ],\n        ):\n            yield\n\n    @pytest.fixture()\n    def isp_id(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> Generator[int, None, None]:\n        \"\"\"Insert an InternetSearchProvider row with raw encrypted bytes.\"\"\"\n        encrypted = _encrypt_string(\"sk-secret-api-key\", key=OLD_KEY)\n\n        result = db_session.execute(\n            text(\n                \"INSERT INTO internet_search_provider \"\n                \"(name, provider_type, api_key, is_active) \"\n                \"VALUES (:name, :ptype, :api_key, false) \"\n                \"RETURNING id\"\n            ),\n            {\n                \"name\": f\"test-rotation-{id(self)}\",\n                \"ptype\": \"test\",\n                \"api_key\": encrypted,\n            },\n        )\n        isp_id = result.scalar_one()\n        db_session.commit()\n\n        yield isp_id\n\n        db_session.execute(\n            text(\"DELETE FROM internet_search_provider WHERE id = :id\"),\n            {\"id\": isp_id},\n        )\n        db_session.commit()\n\n    def test_rotates_api_key(self, db_session: Session, isp_id: int) -> None:\n        with (\n            patch(f\"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n            patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n        ):\n            totals = rotate_encryption_key(db_session, old_key=OLD_KEY)\n\n        assert totals.get(\"internet_search_provider.api_key\", 0) >= 1\n\n        raw = _raw_isp_bytes(db_session, isp_id)\n        assert raw is not None\n        assert _decrypt_bytes(raw, key=NEW_KEY) == \"sk-secret-api-key\"\n\n    def test_rotates_from_unencrypted(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test rotating data that was stored without any encryption key.\"\"\"\n        result = db_session.execute(\n            text(\n                \"INSERT INTO internet_search_provider \"\n                \"(name, provider_type, api_key, is_active) \"\n                \"VALUES (:name, :ptype, :api_key, false) \"\n                \"RETURNING id\"\n            ),\n            {\n                \"name\": f\"test-raw-{id(self)}\",\n                \"ptype\": \"test\",\n                \"api_key\": b\"raw-api-key\",\n            },\n        )\n        isp_id = result.scalar_one()\n        db_session.commit()\n\n        try:\n            with (\n                patch(f\"{ROTATE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n                patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", NEW_KEY),\n            ):\n                totals = rotate_encryption_key(db_session, old_key=None)\n\n            assert totals.get(\"internet_search_provider.api_key\", 0) >= 1\n\n            raw = _raw_isp_bytes(db_session, isp_id)\n            assert raw is not None\n            assert _decrypt_bytes(raw, key=NEW_KEY) == \"raw-api-key\"\n        finally:\n            db_session.execute(\n                text(\"DELETE FROM internet_search_provider WHERE id = :id\"),\n                {\"id\": isp_id},\n            )\n            db_session.commit()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/test_tag_race_condition.py",
    "content": "\"\"\"\nTest suite for tag creation race condition handling.\n\nTests that concurrent tag creation operations don't fail due to\nUniqueViolation errors, which would occur if the upsert logic\nisn't properly implemented.\n\"\"\"\n\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import Future\nfrom concurrent.futures import ThreadPoolExecutor\nfrom typing import Union\nfrom uuid import uuid4\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Document\nfrom onyx.db.models import Tag\nfrom onyx.db.tag import create_or_add_document_tag\nfrom onyx.db.tag import create_or_add_document_tag_list\n\n\ndef _create_test_document(db_session: Session, doc_id: str) -> Document:\n    \"\"\"Create a minimal test document.\"\"\"\n    document = Document(\n        id=doc_id,\n        semantic_id=f\"semantic_{doc_id}\",\n        boost=0,\n        hidden=False,\n        from_ingestion_api=False,\n    )\n    db_session.add(document)\n    db_session.commit()\n    return document\n\n\nclass TestTagRaceCondition:\n    \"\"\"Tests for tag creation race condition handling.\"\"\"\n\n    def test_concurrent_tag_creation_single_tag(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Test that multiple concurrent calls to create_or_add_document_tag\n        with the same tag key/value all succeed without UniqueViolation errors.\n\n        This simulates the race condition that occurs when multiple workers\n        try to create the same tag simultaneously during document indexing.\n        \"\"\"\n        # Create multiple test documents that will all get the same tag\n        num_documents = 20\n        doc_ids = [f\"test_doc_race_{uuid4().hex[:8]}\" for _ in range(num_documents)]\n\n        for doc_id in doc_ids:\n            _create_test_document(db_session, doc_id)\n\n        # Use a unique tag key/value for this test run to avoid interference\n        test_tag_key = f\"test_key_{uuid4().hex[:8]}\"\n        test_tag_value = f\"test_value_{uuid4().hex[:8]}\"\n        test_source = DocumentSource.FILE\n\n        errors: list[Exception] = []\n        results: list[Tag | None] = []\n\n        def create_tag_for_document(doc_id: str) -> Tag | None:\n            \"\"\"Worker function that creates a tag for a document using its own session.\"\"\"\n            with get_session_with_current_tenant() as session:\n                return create_or_add_document_tag(\n                    tag_key=test_tag_key,\n                    tag_value=test_tag_value,\n                    source=test_source,\n                    document_id=doc_id,\n                    db_session=session,\n                )\n\n        # Run all tag creations concurrently with high parallelism\n        with ThreadPoolExecutor(max_workers=num_documents) as executor:\n            futures = {\n                executor.submit(create_tag_for_document, doc_id): doc_id\n                for doc_id in doc_ids\n            }\n\n            for future in as_completed(futures):\n                doc_id = futures[future]\n                try:\n                    result = future.result()\n                    results.append(result)\n                except Exception as e:\n                    errors.append(e)\n\n        # All operations should succeed without errors\n        assert len(errors) == 0, f\"Got {len(errors)} errors: {errors}\"\n        assert len(results) == num_documents\n\n        # All results should be valid Tag objects\n        for result in results:\n            assert result is not None\n            assert result.tag_key == test_tag_key\n            assert result.tag_value == test_tag_value\n            assert result.source == test_source\n\n        # Verify only ONE tag was created in the database (not num_documents tags)\n        with get_session_with_current_tenant() as session:\n            tag_count = (\n                session.execute(\n                    select(Tag).where(\n                        Tag.tag_key == test_tag_key,\n                        Tag.tag_value == test_tag_value,\n                        Tag.source == test_source,\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n        assert len(tag_count) == 1, f\"Expected 1 tag, found {len(tag_count)}\"\n\n    def test_concurrent_tag_list_creation(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Test that multiple concurrent calls to create_or_add_document_tag_list\n        with the same tag values all succeed without UniqueViolation errors.\n        \"\"\"\n        # Create multiple test documents\n        num_documents = 20\n        doc_ids = [\n            f\"test_doc_list_race_{uuid4().hex[:8]}\" for _ in range(num_documents)\n        ]\n\n        for doc_id in doc_ids:\n            _create_test_document(db_session, doc_id)\n\n        # Use unique tag key/values for this test run\n        test_tag_key = f\"test_list_key_{uuid4().hex[:8]}\"\n        test_tag_values = [f\"value_{i}_{uuid4().hex[:4]}\" for i in range(5)]\n        test_source = DocumentSource.FILE\n\n        errors: list[Exception] = []\n        results: list[list[Tag]] = []\n\n        def create_tag_list_for_document(doc_id: str) -> list[Tag]:\n            \"\"\"Worker function that creates tag list for a document using its own session.\"\"\"\n            with get_session_with_current_tenant() as session:\n                return create_or_add_document_tag_list(\n                    tag_key=test_tag_key,\n                    tag_values=test_tag_values,\n                    source=test_source,\n                    document_id=doc_id,\n                    db_session=session,\n                )\n\n        # Run all tag creations concurrently\n        with ThreadPoolExecutor(max_workers=num_documents) as executor:\n            futures = {\n                executor.submit(create_tag_list_for_document, doc_id): doc_id\n                for doc_id in doc_ids\n            }\n\n            for future in as_completed(futures):\n                doc_id = futures[future]\n                try:\n                    result = future.result()\n                    results.append(result)\n                except Exception as e:\n                    errors.append(e)\n\n        # All operations should succeed without errors\n        assert len(errors) == 0, f\"Got {len(errors)} errors: {errors}\"\n        assert len(results) == num_documents\n\n        # Each result should have all the expected tags\n        for result in results:\n            assert len(result) == len(test_tag_values)\n            result_values = {tag.tag_value for tag in result}\n            assert result_values == set(test_tag_values)\n\n        # Verify exactly len(test_tag_values) tags were created (one per value)\n        with get_session_with_current_tenant() as session:\n            tags = (\n                session.execute(\n                    select(Tag).where(\n                        Tag.tag_key == test_tag_key,\n                        Tag.tag_value.in_(test_tag_values),\n                        Tag.source == test_source,\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n        assert len(tags) == len(\n            test_tag_values\n        ), f\"Expected {len(test_tag_values)} tags, found {len(tags)}\"\n\n    def test_concurrent_mixed_tag_operations(\n        self,\n        db_session: Session,\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Test that concurrent single tag and tag list operations on the same\n        tag key/value don't interfere with each other.\n\n        This is a more realistic scenario where different documents might\n        have the same metadata key but different value types (single vs list).\n        \"\"\"\n        num_documents = 10\n        doc_ids_single = [\n            f\"test_doc_single_{uuid4().hex[:8]}\" for _ in range(num_documents)\n        ]\n        doc_ids_list = [\n            f\"test_doc_list_{uuid4().hex[:8]}\" for _ in range(num_documents)\n        ]\n\n        for doc_id in doc_ids_single + doc_ids_list:\n            _create_test_document(db_session, doc_id)\n\n        # Same key but used as both single value and list value\n        test_tag_key = f\"mixed_key_{uuid4().hex[:8]}\"\n        test_single_value = f\"single_value_{uuid4().hex[:8]}\"\n        test_list_values = [test_single_value]  # Same value but as list\n        test_source = DocumentSource.FILE\n\n        errors: list[Exception] = []\n\n        def create_single_tag(doc_id: str) -> Tag | None:\n            with get_session_with_current_tenant() as session:\n                return create_or_add_document_tag(\n                    tag_key=test_tag_key,\n                    tag_value=test_single_value,\n                    source=test_source,\n                    document_id=doc_id,\n                    db_session=session,\n                )\n\n        def create_list_tag(doc_id: str) -> list[Tag]:\n            with get_session_with_current_tenant() as session:\n                return create_or_add_document_tag_list(\n                    tag_key=test_tag_key,\n                    tag_values=test_list_values,\n                    source=test_source,\n                    document_id=doc_id,\n                    db_session=session,\n                )\n\n        # Run both types of operations concurrently\n        with ThreadPoolExecutor(max_workers=num_documents * 2) as executor:\n            futures: list[Future[Union[Tag | None] | list[Tag]]] = []\n            for doc_id in doc_ids_single:\n                futures.append(executor.submit(create_single_tag, doc_id))\n            for doc_id in doc_ids_list:\n                futures.append(executor.submit(create_list_tag, doc_id))\n\n            for future in as_completed(futures):\n                try:\n                    future.result()\n                except Exception as e:\n                    errors.append(e)\n\n        # All operations should succeed\n        assert len(errors) == 0, f\"Got {len(errors)} errors: {errors}\"\n\n        # Should have exactly 2 tags: one with is_list=False, one with is_list=True\n        with get_session_with_current_tenant() as session:\n            tags = (\n                session.execute(\n                    select(Tag).where(\n                        Tag.tag_key == test_tag_key,\n                        Tag.tag_value == test_single_value,\n                        Tag.source == test_source,\n                    )\n                )\n                .scalars()\n                .all()\n            )\n\n        assert (\n            len(tags) == 2\n        ), f\"Expected 2 tags (is_list=True and False), found {len(tags)}\"\n        is_list_values = {tag.is_list for tag in tags}\n        assert is_list_values == {True, False}\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/db/test_user_account_type.py",
    "content": "\"\"\"\nTests that account_type is correctly set when creating users through\nthe internal DB functions: add_slack_user_if_not_exists and\nbatch_add_ext_perm_user_if_not_exists.\n\nThese functions are called by background workers (Slack bot, permission sync)\nand are not exposed via API endpoints, so they must be tested directly.\n\"\"\"\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import UserRole\nfrom onyx.db.users import add_slack_user_if_not_exists\nfrom onyx.db.users import batch_add_ext_perm_user_if_not_exists\n\n\ndef test_slack_user_creation_sets_account_type_bot(db_session: Session) -> None:\n    \"\"\"add_slack_user_if_not_exists sets account_type=BOT and role=SLACK_USER.\"\"\"\n    user = add_slack_user_if_not_exists(db_session, \"slack_acct_type@test.com\")\n\n    assert user.role == UserRole.SLACK_USER\n    assert user.account_type == AccountType.BOT\n\n\ndef test_ext_perm_user_creation_sets_account_type(db_session: Session) -> None:\n    \"\"\"batch_add_ext_perm_user_if_not_exists sets account_type=EXT_PERM_USER.\"\"\"\n    users = batch_add_ext_perm_user_if_not_exists(\n        db_session, [\"extperm_acct_type@test.com\"]\n    )\n\n    assert len(users) == 1\n    user = users[0]\n    assert user.role == UserRole.EXT_PERM_USER\n    assert user.account_type == AccountType.EXT_PERM_USER\n\n\ndef test_ext_perm_to_slack_upgrade_updates_role_and_account_type(\n    db_session: Session,\n) -> None:\n    \"\"\"When an EXT_PERM_USER is upgraded to slack, both role and account_type update.\"\"\"\n    email = \"ext_to_slack_acct_type@test.com\"\n\n    # Create as ext_perm user first\n    batch_add_ext_perm_user_if_not_exists(db_session, [email])\n\n    # Now \"upgrade\" via slack path\n    user = add_slack_user_if_not_exists(db_session, email)\n\n    assert user.role == UserRole.SLACK_USER\n    assert user.account_type == AccountType.BOT\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/discord_bot/conftest.py",
    "content": "\"\"\"Fixtures for Discord bot external dependency tests.\"\"\"\n\nfrom collections.abc import Generator\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport discord\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n\nTEST_TENANT_ID: str = \"public\"\n\n\n@pytest.fixture(scope=\"function\")\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create a database session for testing.\"\"\"\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n    with get_session_with_current_tenant() as session:\n        yield session\n\n\n@pytest.fixture(scope=\"function\")\ndef tenant_context() -> Generator[None, None, None]:\n    \"\"\"Set up tenant context for testing.\"\"\"\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@pytest.fixture\ndef mock_cache_manager() -> MagicMock:\n    \"\"\"Mock DiscordCacheManager.\"\"\"\n    cache = MagicMock()\n    cache.get_tenant.return_value = TEST_TENANT_ID\n    cache.get_api_key.return_value = \"test_api_key\"\n    cache.refresh_all = AsyncMock()\n    cache.refresh_guild = AsyncMock()\n    cache.is_initialized = True\n    return cache\n\n\n@pytest.fixture\ndef mock_api_client() -> MagicMock:\n    \"\"\"Mock OnyxAPIClient.\"\"\"\n    client = MagicMock()\n    client.initialize = AsyncMock()\n    client.close = AsyncMock()\n    client.is_initialized = True\n\n    # Mock successful response\n    mock_response = MagicMock()\n    mock_response.answer = \"Test response from bot\"\n    mock_response.citation_info = None\n    mock_response.top_documents = None\n    mock_response.error_msg = None\n\n    client.send_chat_message = AsyncMock(return_value=mock_response)\n    client.health_check = AsyncMock(return_value=True)\n    return client\n\n\n@pytest.fixture\ndef mock_discord_guild() -> MagicMock:\n    \"\"\"Mock Discord guild with channels.\"\"\"\n    guild = MagicMock(spec=discord.Guild)\n    guild.id = 123456789\n    guild.name = \"Test Server\"\n    guild.default_role = MagicMock()\n\n    # Create some mock channels\n    text_channel = MagicMock(spec=discord.TextChannel)\n    text_channel.id = 111111111\n    text_channel.name = \"general\"\n    text_channel.type = discord.ChannelType.text\n    perms = MagicMock()\n    perms.view_channel = True\n    text_channel.permissions_for.return_value = perms\n\n    forum_channel = MagicMock(spec=discord.ForumChannel)\n    forum_channel.id = 222222222\n    forum_channel.name = \"forum\"\n    forum_channel.type = discord.ChannelType.forum\n    forum_channel.permissions_for.return_value = perms\n\n    private_channel = MagicMock(spec=discord.TextChannel)\n    private_channel.id = 333333333\n    private_channel.name = \"private\"\n    private_channel.type = discord.ChannelType.text\n    private_perms = MagicMock()\n    private_perms.view_channel = False\n    private_channel.permissions_for.return_value = private_perms\n\n    guild.channels = [text_channel, forum_channel, private_channel]\n    guild.text_channels = [text_channel, private_channel]\n    guild.forum_channels = [forum_channel]\n\n    return guild\n\n\n@pytest.fixture\ndef mock_discord_message(mock_discord_guild: MagicMock) -> MagicMock:\n    \"\"\"Mock Discord message for testing.\"\"\"\n    msg = MagicMock(spec=discord.Message)\n    msg.id = 555555555\n    msg.author = MagicMock(spec=discord.Member)\n    msg.author.id = 444444444\n    msg.author.bot = False\n    msg.author.display_name = \"TestUser\"\n    msg.author.guild_permissions = MagicMock()\n    msg.author.guild_permissions.administrator = True\n    msg.author.guild_permissions.manage_guild = True\n    msg.content = \"Hello bot\"\n    msg.guild = mock_discord_guild\n    msg.channel = MagicMock()\n    msg.channel.id = 111111111\n    msg.channel.name = \"general\"\n    msg.channel.send = AsyncMock()\n    msg.type = discord.MessageType.default\n    msg.mentions = []\n    msg.role_mentions = []\n    msg.channel_mentions = []\n    msg.reference = None\n    msg.add_reaction = AsyncMock()\n    msg.remove_reaction = AsyncMock()\n    msg.reply = AsyncMock()\n    msg.create_thread = AsyncMock()\n    return msg\n\n\n@pytest.fixture\ndef mock_bot_user() -> MagicMock:\n    \"\"\"Mock Discord bot user.\"\"\"\n    user = MagicMock(spec=discord.ClientUser)\n    user.id = 987654321\n    user.display_name = \"OnyxBot\"\n    user.bot = True\n    return user\n\n\n@pytest.fixture\ndef mock_discord_bot(\n    mock_cache_manager: MagicMock,\n    mock_api_client: MagicMock,\n    mock_bot_user: MagicMock,\n) -> MagicMock:\n    \"\"\"Mock OnyxDiscordClient.\"\"\"\n    bot = MagicMock()\n    bot.user = mock_bot_user\n    bot.cache = mock_cache_manager\n    bot.api_client = mock_api_client\n    bot.ready = True\n    bot.loop = MagicMock()\n    bot.is_closed.return_value = False\n    bot.guilds = []\n    return bot\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/discord_bot/test_discord_events.py",
    "content": "\"\"\"Tests for Discord bot event handling with mocked Discord API.\n\nThese tests mock the Discord API to test event handling logic.\n\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport discord\nimport pytest\n\nfrom onyx.onyxbot.discord.handle_commands import get_text_channels\nfrom onyx.onyxbot.discord.handle_commands import handle_dm\nfrom onyx.onyxbot.discord.handle_commands import handle_registration_command\nfrom onyx.onyxbot.discord.handle_commands import handle_sync_channels_command\nfrom onyx.onyxbot.discord.handle_message import process_chat_message\nfrom onyx.onyxbot.discord.handle_message import send_error_response\nfrom onyx.onyxbot.discord.handle_message import send_response\n\n\nclass TestGuildRegistrationCommand:\n    \"\"\"Tests for !register command handling.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_register_guild_success(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Valid registration key with admin perms succeeds.\"\"\"\n        mock_discord_message.content = \"!register discord_public.valid_token\"\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.parse_discord_registration_key\",\n                return_value=\"public\",\n            ),\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_session_with_tenant\"\n            ) as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key\"\n            ) as mock_get_config,\n            patch(\"onyx.onyxbot.discord.handle_commands.bulk_create_channel_configs\"),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            mock_config = MagicMock()\n            mock_config.id = 1\n            mock_config.guild_id = None  # Not yet registered\n            mock_get_config.return_value = mock_config\n\n            mock_cache_manager.get_tenant.return_value = None  # Not in cache yet\n\n            result = await handle_registration_command(\n                mock_discord_message, mock_cache_manager\n            )\n\n        assert result is True\n        mock_discord_message.reply.assert_called()\n        # Check that success message was sent\n        call_args = mock_discord_message.reply.call_args\n        assert \"Successfully registered\" in str(call_args)\n\n    @pytest.mark.asyncio\n    async def test_register_invalid_key_format(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Malformed key DMs user and deletes message.\"\"\"\n        mock_discord_message.content = \"!register abc\"  # Malformed\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_commands.parse_discord_registration_key\",\n            return_value=None,  # Invalid format\n        ):\n            result = await handle_registration_command(\n                mock_discord_message, mock_cache_manager\n            )\n\n        assert result is True\n        # On failure: DM the author and delete the message\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"Invalid\" in str(call_args)\n        mock_discord_message.delete.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_register_key_not_found(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Key not in database DMs user and deletes message.\"\"\"\n        mock_discord_message.content = \"!register discord_public.notexist\"\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.parse_discord_registration_key\",\n                return_value=\"public\",\n            ),\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_session_with_tenant\"\n            ) as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key\",\n                return_value=None,  # Not found\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            # Must return False so exceptions are not suppressed\n            mock_session.return_value.__exit__ = MagicMock(return_value=False)\n            mock_cache_manager.get_tenant.return_value = None\n\n            result = await handle_registration_command(\n                mock_discord_message, mock_cache_manager\n            )\n\n        assert result is True\n        # On failure: DM the author and delete the message\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"not found\" in str(call_args).lower()\n        mock_discord_message.delete.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_register_key_already_used(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Previously used key DMs user and deletes message.\"\"\"\n        mock_discord_message.content = \"!register discord_public.used_key\"\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.parse_discord_registration_key\",\n                return_value=\"public\",\n            ),\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_session_with_tenant\"\n            ) as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_guild_config_by_registration_key\"\n            ) as mock_get_config,\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            # Must return False so exceptions are not suppressed\n            mock_session.return_value.__exit__ = MagicMock(return_value=False)\n\n            mock_config = MagicMock()\n            mock_config.guild_id = 999999  # Already registered!\n            mock_get_config.return_value = mock_config\n\n            mock_cache_manager.get_tenant.return_value = None\n\n            result = await handle_registration_command(\n                mock_discord_message, mock_cache_manager\n            )\n\n        assert result is True\n        # On failure: DM the author and delete the message\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"already\" in str(call_args).lower()\n        mock_discord_message.delete.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_register_guild_already_registered(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Guild already in cache DMs user and deletes message.\"\"\"\n        mock_discord_message.content = \"!register discord_public.valid_token\"\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_commands.parse_discord_registration_key\",\n            return_value=\"public\",\n        ):\n            # Guild already in cache\n            mock_cache_manager.get_tenant.return_value = \"existing_tenant\"\n\n            result = await handle_registration_command(\n                mock_discord_message, mock_cache_manager\n            )\n\n        assert result is True\n        # On failure: DM the author and delete the message\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"already registered\" in str(call_args).lower()\n        mock_discord_message.delete.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_register_no_permission(\n        self,\n        mock_discord_message: MagicMock,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"User without admin perms gets DM and message deleted.\"\"\"\n        mock_discord_message.content = \"!register discord_public.valid_token\"\n        mock_discord_message.author.guild_permissions.administrator = False\n        mock_discord_message.author.guild_permissions.manage_guild = False\n\n        result = await handle_registration_command(\n            mock_discord_message, mock_cache_manager\n        )\n\n        assert result is True\n        # On failure: DM the author and delete the message\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"permission\" in str(call_args).lower()\n        mock_discord_message.delete.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_register_in_dm(\n        self,\n        mock_cache_manager: MagicMock,\n    ) -> None:\n        \"\"\"Registration in DM sends DM and returns True.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.guild = None  # DM\n        msg.content = \"!register discord_public.token\"\n        msg.author = MagicMock()\n        msg.author.send = AsyncMock()\n\n        result = await handle_registration_command(msg, mock_cache_manager)\n\n        assert result is True\n        msg.author.send.assert_called()\n        call_args = msg.author.send.call_args\n        assert \"server\" in str(call_args).lower()\n\n    @pytest.mark.asyncio\n    async def test_register_syncs_forum_channels(\n        self,\n        mock_discord_message: MagicMock,  # noqa: ARG002\n        mock_discord_guild: MagicMock,\n    ) -> None:\n        \"\"\"Forum channels are included in sync.\"\"\"\n        channels = get_text_channels(mock_discord_guild)\n\n        channel_types = [c.channel_type for c in channels]\n        assert \"forum\" in channel_types\n\n    @pytest.mark.asyncio\n    async def test_register_private_channel_detection(\n        self,\n        mock_discord_message: MagicMock,  # noqa: ARG002\n        mock_discord_guild: MagicMock,\n    ) -> None:\n        \"\"\"Private channels are marked correctly.\"\"\"\n        channels = get_text_channels(mock_discord_guild)\n\n        private_channels = [c for c in channels if c.is_private]\n        assert len(private_channels) >= 1\n\n\nclass TestSyncChannelsCommand:\n    \"\"\"Tests for !sync-channels command handling.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_sync_channels_adds_new(\n        self,\n        mock_discord_message: MagicMock,\n        mock_discord_bot: MagicMock,\n    ) -> None:\n        \"\"\"New channel in Discord creates channel config.\"\"\"\n        mock_discord_message.content = \"!sync-channels\"\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_session_with_tenant\"\n            ) as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_guild_config_by_discord_id\"\n            ) as mock_get_guild,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.get_guild_config_by_internal_id\"\n            ) as mock_get_guild_internal,\n            patch(\n                \"onyx.onyxbot.discord.handle_commands.sync_channel_configs\"\n            ) as mock_sync,\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            mock_config = MagicMock()\n            mock_config.id = 1\n            mock_config.guild_id = 123456789\n            mock_get_guild.return_value = mock_config\n            mock_get_guild_internal.return_value = mock_config\n\n            mock_sync.return_value = (1, 0, 0)  # 1 added, 0 removed, 0 updated\n\n            mock_discord_bot.get_guild.return_value = mock_discord_message.guild\n\n            result = await handle_sync_channels_command(\n                mock_discord_message, \"public\", mock_discord_bot\n            )\n\n        assert result is True\n        mock_discord_message.reply.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_sync_channels_no_permission(\n        self,\n        mock_discord_message: MagicMock,\n        mock_discord_bot: MagicMock,\n    ) -> None:\n        \"\"\"User without admin perms gets DM and reaction.\"\"\"\n        mock_discord_message.content = \"!sync-channels\"\n        mock_discord_message.author.guild_permissions.administrator = False\n        mock_discord_message.author.guild_permissions.manage_guild = False\n\n        result = await handle_sync_channels_command(\n            mock_discord_message, \"public\", mock_discord_bot\n        )\n\n        assert result is True\n        # On failure: DM the author and react with ❌\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"permission\" in str(call_args).lower()\n        mock_discord_message.add_reaction.assert_called_with(\"❌\")\n\n    @pytest.mark.asyncio\n    async def test_sync_channels_unregistered_guild(\n        self,\n        mock_discord_message: MagicMock,\n        mock_discord_bot: MagicMock,\n    ) -> None:\n        \"\"\"Sync in unregistered guild gets DM and reaction.\"\"\"\n        mock_discord_message.content = \"!sync-channels\"\n\n        # tenant_id is None = not registered\n        result = await handle_sync_channels_command(\n            mock_discord_message, None, mock_discord_bot\n        )\n\n        assert result is True\n        # On failure: DM the author and react with ❌\n        mock_discord_message.author.send.assert_called()\n        call_args = mock_discord_message.author.send.call_args\n        assert \"not registered\" in str(call_args).lower()\n        mock_discord_message.add_reaction.assert_called_with(\"❌\")\n\n\nclass TestMessageHandling:\n    \"\"\"Tests for message handling behavior.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_message_adds_thinking_emoji(\n        self,\n        mock_discord_message: MagicMock,\n        mock_api_client: MagicMock,\n        mock_bot_user: MagicMock,\n    ) -> None:\n        \"\"\"Thinking emoji is added during processing.\"\"\"\n        await process_chat_message(\n            message=mock_discord_message,\n            api_key=\"test_key\",\n            persona_id=None,\n            thread_only_mode=False,\n            api_client=mock_api_client,\n            bot_user=mock_bot_user,\n        )\n\n        mock_discord_message.add_reaction.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_message_removes_thinking_emoji(\n        self,\n        mock_discord_message: MagicMock,\n        mock_api_client: MagicMock,\n        mock_bot_user: MagicMock,\n    ) -> None:\n        \"\"\"Thinking emoji is removed after response.\"\"\"\n        await process_chat_message(\n            message=mock_discord_message,\n            api_key=\"test_key\",\n            persona_id=None,\n            thread_only_mode=False,\n            api_client=mock_api_client,\n            bot_user=mock_bot_user,\n        )\n\n        mock_discord_message.remove_reaction.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_message_reaction_failure_non_blocking(\n        self,\n        mock_discord_message: MagicMock,\n        mock_api_client: MagicMock,\n        mock_bot_user: MagicMock,\n    ) -> None:\n        \"\"\"add_reaction failure doesn't block processing.\"\"\"\n        mock_discord_message.add_reaction = AsyncMock(\n            side_effect=discord.DiscordException(\"Cannot add reaction\")\n        )\n\n        # Should not raise - just log warning and continue\n        await process_chat_message(\n            message=mock_discord_message,\n            api_key=\"test_key\",\n            persona_id=None,\n            thread_only_mode=False,\n            api_client=mock_api_client,\n            bot_user=mock_bot_user,\n        )\n\n        # Should still complete and send reply\n        mock_discord_message.reply.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_dm_response(self) -> None:\n        \"\"\"DM to bot sends redirect message.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.channel = MagicMock(spec=discord.DMChannel)\n        msg.channel.send = AsyncMock()\n\n        await handle_dm(msg)\n\n        msg.channel.send.assert_called_once()\n        call_args = msg.channel.send.call_args\n        assert \"DM\" in str(call_args) or \"server\" in str(call_args).lower()\n\n\nclass TestThreadCreationAndResponseRouting:\n    \"\"\"Tests for thread creation and response routing.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_response_in_existing_thread(\n        self,\n        mock_bot_user: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Message in thread - response appended to thread.\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.send = AsyncMock()\n\n        msg = MagicMock(spec=discord.Message)\n        msg.channel = thread\n        msg.reply = AsyncMock()\n        msg.create_thread = AsyncMock()\n\n        await send_response(msg, \"Test response\", thread_only_mode=False)\n\n        # Should send to thread, not create new thread\n        thread.send.assert_called()\n        msg.create_thread.assert_not_called()\n\n    @pytest.mark.asyncio\n    async def test_response_creates_thread_thread_only_mode(\n        self,\n        mock_discord_message: MagicMock,\n        mock_bot_user: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"thread_only_mode=true creates new thread for response.\"\"\"\n        mock_thread = MagicMock()\n        mock_thread.send = AsyncMock()\n        mock_discord_message.create_thread = AsyncMock(return_value=mock_thread)\n\n        # Make sure it's not a thread\n        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)\n\n        await send_response(\n            mock_discord_message, \"Test response\", thread_only_mode=True\n        )\n\n        mock_discord_message.create_thread.assert_called()\n        mock_thread.send.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_response_replies_inline(\n        self,\n        mock_discord_message: MagicMock,\n        mock_bot_user: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"thread_only_mode=false uses message.reply().\"\"\"\n        # Make sure it's not a thread\n        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)\n\n        await send_response(\n            mock_discord_message, \"Test response\", thread_only_mode=False\n        )\n\n        mock_discord_message.reply.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_thread_name_truncation(\n        self,\n        mock_bot_user: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Thread name is truncated to 100 chars.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.channel = MagicMock(spec=discord.TextChannel)\n        msg.author = MagicMock()\n        msg.author.display_name = \"A\" * 200  # Very long name\n\n        mock_thread = MagicMock()\n        mock_thread.send = AsyncMock()\n        msg.create_thread = AsyncMock(return_value=mock_thread)\n\n        await send_response(msg, \"Test\", thread_only_mode=True)\n\n        call_args = msg.create_thread.call_args\n        thread_name = call_args.kwargs.get(\"name\") or call_args[1].get(\"name\")\n        assert len(thread_name) <= 100\n\n    @pytest.mark.asyncio\n    async def test_error_response_creates_thread(\n        self,\n        mock_discord_message: MagicMock,\n        mock_bot_user: MagicMock,\n    ) -> None:\n        \"\"\"Error response in channel creates thread.\"\"\"\n        mock_discord_message.channel = MagicMock(spec=discord.TextChannel)\n        mock_thread = MagicMock()\n        mock_thread.send = AsyncMock()\n        mock_discord_message.create_thread = AsyncMock(return_value=mock_thread)\n\n        await send_error_response(mock_discord_message, mock_bot_user)\n\n        mock_discord_message.create_thread.assert_called()\n\n\nclass TestBotLifecycle:\n    \"\"\"Tests for bot lifecycle management.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_setup_hook_initializes_cache(\n        self,\n        mock_cache_manager: MagicMock,\n        mock_api_client: MagicMock,\n    ) -> None:\n        \"\"\"setup_hook calls cache.refresh_all().\"\"\"\n        from onyx.onyxbot.discord.client import OnyxDiscordClient\n\n        with (\n            patch.object(\n                OnyxDiscordClient,\n                \"__init__\",\n                lambda self: None,  # noqa: ARG005\n            ),\n            patch(\n                \"onyx.onyxbot.discord.client.DiscordCacheManager\",\n                return_value=mock_cache_manager,\n            ),\n            patch(\n                \"onyx.onyxbot.discord.client.OnyxAPIClient\",\n                return_value=mock_api_client,\n            ),\n        ):\n            bot = OnyxDiscordClient()\n            bot.cache = mock_cache_manager\n            bot.api_client = mock_api_client\n            bot.loop = MagicMock()\n            bot.loop.create_task = MagicMock()\n\n            await bot.setup_hook()\n\n        mock_cache_manager.refresh_all.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_setup_hook_initializes_api_client(\n        self,\n        mock_cache_manager: MagicMock,\n        mock_api_client: MagicMock,\n    ) -> None:\n        \"\"\"setup_hook calls api_client.initialize().\"\"\"\n        from onyx.onyxbot.discord.client import OnyxDiscordClient\n\n        with (\n            patch.object(\n                OnyxDiscordClient,\n                \"__init__\",\n                lambda self: None,  # noqa: ARG005\n            ),\n        ):\n            bot = OnyxDiscordClient()\n            bot.cache = mock_cache_manager\n            bot.api_client = mock_api_client\n            bot.loop = MagicMock()\n            bot.loop.create_task = MagicMock()\n\n            await bot.setup_hook()\n\n        mock_api_client.initialize.assert_called()\n\n    @pytest.mark.asyncio\n    async def test_close_closes_api_client(\n        self,\n        mock_cache_manager: MagicMock,\n        mock_api_client: MagicMock,\n    ) -> None:\n        \"\"\"close() calls api_client.close().\"\"\"\n        from onyx.onyxbot.discord.client import OnyxDiscordClient\n\n        with (\n            patch.object(\n                OnyxDiscordClient,\n                \"__init__\",\n                lambda self: None,  # noqa: ARG005\n            ),\n            patch.object(OnyxDiscordClient, \"is_closed\", return_value=True),\n        ):\n            bot = OnyxDiscordClient()\n            bot.cache = mock_cache_manager\n            bot.api_client = mock_api_client\n            bot._cache_refresh_task = None\n            bot.ready = True\n\n            # Mock parent close\n            async def mock_super_close() -> None:\n                pass\n\n            with patch(\"discord.ext.commands.Bot.close\", mock_super_close):\n                await bot.close()\n\n        mock_api_client.close.assert_called()\n        mock_cache_manager.clear.assert_called()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/document_index/conftest.py",
    "content": "\"\"\"Shared fixtures for document_index external dependency tests.\n\nProvides Vespa and OpenSearch index setup, tenant context, and chunk helpers.\n\"\"\"\n\nimport os\nimport time\nimport uuid\nfrom collections.abc import Generator\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchOldDocumentIndex,\n)\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client\nfrom onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\nEMBEDDING_DIM = 128\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef make_chunk(\n    doc_id: str,\n    chunk_id: int = 0,\n    content: str = \"test content\",\n) -> DocMetadataAwareIndexChunk:\n    \"\"\"Create a chunk suitable for external dependency testing (128-dim embeddings).\"\"\"\n    tenant_id = get_current_tenant_id()\n    access = DocumentAccess.build(\n        user_emails=[],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=True,\n    )\n    embeddings = ChunkEmbedding(\n        full_embedding=[1.0] + [0.0] * (EMBEDDING_DIM - 1),\n        mini_chunk_embeddings=[],\n    )\n    source_document = Document(\n        id=doc_id,\n        semantic_identifier=\"test_doc\",\n        source=DocumentSource.FILE,\n        sections=[],\n        metadata={},\n        title=\"test title\",\n    )\n    return DocMetadataAwareIndexChunk(\n        tenant_id=tenant_id,\n        access=access,\n        document_sets=set(),\n        user_project=[],\n        personas=[],\n        boost=0,\n        aggregated_chunk_boost_factor=0,\n        ancestor_hierarchy_node_ids=[],\n        embeddings=embeddings,\n        title_embedding=[1.0] + [0.0] * (EMBEDDING_DIM - 1),\n        source_document=source_document,\n        title_prefix=\"\",\n        metadata_suffix_keyword=\"\",\n        metadata_suffix_semantic=\"\",\n        contextual_rag_reserved_tokens=0,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        chunk_id=chunk_id,\n        blurb=content[:50],\n        content=content,\n        source_links={0: \"\"},\n        image_file_id=None,\n        section_continuation=False,\n    )\n\n\ndef make_indexing_metadata(\n    doc_ids: list[str],\n    old_counts: list[int],\n    new_counts: list[int],\n) -> IndexingMetadata:\n    return IndexingMetadata(\n        doc_id_to_chunk_cnt_diff={\n            doc_id: IndexingMetadata.ChunkCounts(\n                old_chunk_cnt=old,\n                new_chunk_cnt=new,\n            )\n            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)\n        }\n    )\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture(scope=\"module\")\ndef tenant_context() -> Generator[None, None, None]:\n    \"\"\"Sets up tenant context for testing.\"\"\"\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n    try:\n        yield\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\n@pytest.fixture(scope=\"module\")\ndef test_index_name() -> Generator[str, None, None]:\n    yield f\"test_index_{uuid.uuid4().hex[:8]}\"\n\n\n@pytest.fixture(scope=\"module\")\ndef httpx_client() -> Generator[httpx.Client, None, None]:\n    client = get_vespa_http_client()\n    try:\n        yield client\n    finally:\n        client.close()\n\n\n@pytest.fixture(scope=\"module\")\ndef vespa_index(\n    httpx_client: httpx.Client,\n    tenant_context: None,  # noqa: ARG001\n    test_index_name: str,\n) -> Generator[VespaIndex, None, None]:\n    \"\"\"Create a Vespa index, wait for schema readiness, and yield it.\"\"\"\n    vespa_idx = VespaIndex(\n        index_name=test_index_name,\n        secondary_index_name=None,\n        large_chunks_enabled=False,\n        secondary_large_chunks_enabled=None,\n        multitenant=MULTI_TENANT,\n        httpx_client=httpx_client,\n    )\n    backend_dir = os.path.abspath(\n        os.path.join(os.path.dirname(__file__), \"..\", \"..\", \"..\")\n    )\n    with patch(\"os.getcwd\", return_value=backend_dir):\n        vespa_idx.ensure_indices_exist(\n            primary_embedding_dim=EMBEDDING_DIM,\n            primary_embedding_precision=EmbeddingPrecision.FLOAT,\n            secondary_index_embedding_dim=None,\n            secondary_index_embedding_precision=None,\n        )\n    if not wait_for_vespa_with_timeout(wait_limit=90):\n        pytest.fail(\"Vespa is not available.\")\n\n    # Wait until the schema is actually ready for writes on content nodes. We\n    # probe by attempting a PUT; 200 means the schema is live, 400 means not\n    # yet. This is only temporary until we entirely move off of Vespa.\n    probe_doc = {\n        \"fields\": {\n            \"document_id\": \"__probe__\",\n            \"chunk_id\": 0,\n            \"blurb\": \"\",\n            \"title\": \"\",\n            \"skip_title\": True,\n            \"content\": \"\",\n            \"content_summary\": \"\",\n            \"source_type\": \"file\",\n            \"source_links\": \"null\",\n            \"semantic_identifier\": \"\",\n            \"section_continuation\": False,\n            \"large_chunk_reference_ids\": [],\n            \"metadata\": \"{}\",\n            \"metadata_list\": [],\n            \"metadata_suffix\": \"\",\n            \"chunk_context\": \"\",\n            \"doc_summary\": \"\",\n            \"embeddings\": {\"full_chunk\": [1.0] + [0.0] * (EMBEDDING_DIM - 1)},\n            \"access_control_list\": {},\n            \"document_sets\": {},\n            \"image_file_name\": None,\n            \"user_project\": [],\n            \"personas\": [],\n            \"boost\": 0.0,\n            \"aggregated_chunk_boost_factor\": 0.0,\n            \"primary_owners\": [],\n            \"secondary_owners\": [],\n        }\n    }\n    probe_url = (\n        f\"http://localhost:8081/document/v1/default/{test_index_name}/docid/__probe__\"\n    )\n    schema_ready = False\n    for _ in range(60):\n        resp = httpx_client.post(probe_url, json=probe_doc)\n        if resp.status_code == 200:\n            schema_ready = True\n            httpx_client.delete(probe_url)\n            break\n        time.sleep(1)\n    if not schema_ready:\n        pytest.fail(f\"Vespa schema '{test_index_name}' did not become ready in time.\")\n\n    yield vespa_idx\n\n\n@pytest.fixture(scope=\"module\")\ndef opensearch_old_index(\n    tenant_context: None,  # noqa: ARG001\n    test_index_name: str,\n) -> Generator[OpenSearchOldDocumentIndex, None, None]:\n    \"\"\"Create an OpenSearch index via the old adapter and yield it.\"\"\"\n    if not wait_for_opensearch_with_timeout():\n        pytest.fail(\"OpenSearch is not available.\")\n\n    opensearch_idx = OpenSearchOldDocumentIndex(\n        index_name=test_index_name,\n        embedding_dim=EMBEDDING_DIM,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n        secondary_index_name=None,\n        secondary_embedding_dim=None,\n        secondary_embedding_precision=None,\n        large_chunks_enabled=False,\n        secondary_large_chunks_enabled=None,\n        multitenant=MULTI_TENANT,\n    )\n    opensearch_idx.ensure_indices_exist(\n        primary_embedding_dim=EMBEDDING_DIM,\n        primary_embedding_precision=EmbeddingPrecision.FLOAT,\n        secondary_index_embedding_dim=None,\n        secondary_index_embedding_precision=None,\n    )\n\n    yield opensearch_idx\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/document_index/test_document_index.py",
    "content": "\"\"\"External dependency tests for the new DocumentIndex interface.\n\nThese tests assume Vespa and OpenSearch are running.\n\"\"\"\n\nimport time\nimport uuid\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\n\nimport httpx\nimport pytest\n\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.interfaces_new import DocumentIndex as DocumentIndexNew\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchDocumentIndex,\n)\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchOldDocumentIndex,\n)\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\nfrom tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM\nfrom tests.external_dependency_unit.document_index.conftest import make_chunk\nfrom tests.external_dependency_unit.document_index.conftest import (\n    make_indexing_metadata,\n)\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture(scope=\"module\")\ndef vespa_document_index(\n    vespa_index: VespaIndex,  # noqa: ARG001 — ensures schema exists\n    httpx_client: httpx.Client,\n    test_index_name: str,\n) -> Generator[VespaDocumentIndex, None, None]:\n    yield VespaDocumentIndex(\n        index_name=test_index_name,\n        tenant_state=TenantState(tenant_id=TEST_TENANT_ID, multitenant=False),\n        large_chunks_enabled=False,\n        httpx_client=httpx_client,\n    )\n\n\n@pytest.fixture(scope=\"module\")\ndef opensearch_document_index(\n    opensearch_old_index: OpenSearchOldDocumentIndex,  # noqa: ARG001 — ensures index exists\n    test_index_name: str,\n) -> Generator[OpenSearchDocumentIndex, None, None]:\n    yield OpenSearchDocumentIndex(\n        tenant_state=TenantState(tenant_id=TEST_TENANT_ID, multitenant=False),\n        index_name=test_index_name,\n        embedding_dim=EMBEDDING_DIM,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n    )\n\n\n@pytest.fixture(scope=\"module\")\ndef document_indices(\n    vespa_document_index: VespaDocumentIndex,\n    opensearch_document_index: OpenSearchDocumentIndex,\n) -> Generator[list[DocumentIndexNew], None, None]:\n    yield [opensearch_document_index, vespa_document_index]\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\nclass TestDocumentIndexNew:\n    \"\"\"Tests the new DocumentIndex interface against real Vespa and OpenSearch.\"\"\"\n\n    def test_index_single_new_doc(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Indexing a single new document returns one record with already_existed=False.\"\"\"\n        for document_index in document_indices:\n            doc_id = f\"test_single_new_{uuid.uuid4().hex[:8]}\"\n            chunk = make_chunk(doc_id)\n            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[1])\n\n            results = document_index.index(chunks=[chunk], indexing_metadata=metadata)\n\n            assert len(results) == 1\n            assert results[0].document_id == doc_id\n            assert results[0].already_existed is False\n\n    def test_index_existing_doc_already_existed_true(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Re-indexing a doc with previous chunks returns already_existed=True.\"\"\"\n        for document_index in document_indices:\n            doc_id = f\"test_existing_{uuid.uuid4().hex[:8]}\"\n            chunk = make_chunk(doc_id)\n\n            # First index — brand new document.\n            metadata_first = make_indexing_metadata(\n                [doc_id], old_counts=[0], new_counts=[1]\n            )\n            document_index.index(chunks=[chunk], indexing_metadata=metadata_first)\n\n            # Allow near-real-time indexing to settle (needed for Vespa).\n            time.sleep(1)\n\n            # Re-index — old_chunk_cnt=1 signals the document already existed.\n            metadata_second = make_indexing_metadata(\n                [doc_id], old_counts=[1], new_counts=[1]\n            )\n            results = document_index.index(\n                chunks=[chunk], indexing_metadata=metadata_second\n            )\n\n            assert len(results) == 1\n            assert results[0].already_existed is True\n\n    def test_index_multiple_docs(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Indexing multiple documents returns one record per unique document.\"\"\"\n        for document_index in document_indices:\n            doc1 = f\"test_multi_1_{uuid.uuid4().hex[:8]}\"\n            doc2 = f\"test_multi_2_{uuid.uuid4().hex[:8]}\"\n            chunks = [\n                make_chunk(doc1, chunk_id=0),\n                make_chunk(doc1, chunk_id=1),\n                make_chunk(doc2, chunk_id=0),\n            ]\n            metadata = make_indexing_metadata(\n                [doc1, doc2], old_counts=[0, 0], new_counts=[2, 1]\n            )\n\n            results = document_index.index(chunks=chunks, indexing_metadata=metadata)\n\n            result_map = {r.document_id: r.already_existed for r in results}\n            assert len(result_map) == 2\n            assert result_map[doc1] is False\n            assert result_map[doc2] is False\n\n    def test_index_deduplicates_doc_ids_in_results(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Multiple chunks from the same document produce only one\n        DocumentInsertionRecord.\"\"\"\n        for document_index in document_indices:\n            doc_id = f\"test_dedup_{uuid.uuid4().hex[:8]}\"\n            chunks = [make_chunk(doc_id, chunk_id=i) for i in range(5)]\n            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[5])\n\n            results = document_index.index(chunks=chunks, indexing_metadata=metadata)\n\n            assert len(results) == 1\n            assert results[0].document_id == doc_id\n\n    def test_index_mixed_new_and_existing_docs(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"A batch with both new and existing documents returns the correct\n        already_existed flag for each.\"\"\"\n        for document_index in document_indices:\n            existing_doc = f\"test_mixed_exist_{uuid.uuid4().hex[:8]}\"\n            new_doc = f\"test_mixed_new_{uuid.uuid4().hex[:8]}\"\n\n            # Pre-index the existing document.\n            pre_chunk = make_chunk(existing_doc)\n            pre_metadata = make_indexing_metadata(\n                [existing_doc], old_counts=[0], new_counts=[1]\n            )\n            document_index.index(chunks=[pre_chunk], indexing_metadata=pre_metadata)\n\n            time.sleep(1)\n\n            # Now index a batch with the existing doc and a new doc.\n            chunks = [\n                make_chunk(existing_doc, chunk_id=0),\n                make_chunk(new_doc, chunk_id=0),\n            ]\n            metadata = make_indexing_metadata(\n                [existing_doc, new_doc], old_counts=[1, 0], new_counts=[1, 1]\n            )\n\n            results = document_index.index(chunks=chunks, indexing_metadata=metadata)\n\n            result_map = {r.document_id: r.already_existed for r in results}\n            assert len(result_map) == 2\n            assert result_map[existing_doc] is True\n            assert result_map[new_doc] is False\n\n    def test_index_accepts_generator(\n        self,\n        document_indices: list[DocumentIndexNew],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"index() accepts a generator (any iterable), not just a list.\"\"\"\n        for document_index in document_indices:\n            doc_id = f\"test_gen_{uuid.uuid4().hex[:8]}\"\n            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])\n\n            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:\n                for i in range(3):\n                    yield make_chunk(doc_id, chunk_id=i)\n\n            results = document_index.index(\n                chunks=chunk_gen(), indexing_metadata=metadata\n            )\n\n            assert len(results) == 1\n            assert results[0].document_id == doc_id\n            assert results[0].already_existed is False\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/document_index/test_document_index_old.py",
    "content": "\"\"\"External dependency tests for the old DocumentIndex interface.\n\nThese tests assume Vespa and OpenSearch are running.\n\"\"\"\n\nimport time\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\n\nimport pytest\n\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.document_index.interfaces import IndexBatchParams\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.document_index.interfaces import VespaDocumentUserFields\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchOldDocumentIndex,\n)\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.external_dependency_unit.document_index.conftest import make_chunk\n\n\n@pytest.fixture(scope=\"module\")\ndef document_indices(\n    vespa_index: VespaIndex,\n    opensearch_old_index: OpenSearchOldDocumentIndex,\n) -> Generator[list[DocumentIndex], None, None]:\n    # Ideally these are parametrized; doing so with pytest fixtures is tricky.\n    yield [opensearch_old_index, vespa_index]\n\n\n@pytest.fixture(scope=\"function\")\ndef chunks(\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[list[DocMetadataAwareIndexChunk], None, None]:\n    yield [make_chunk(\"test_doc\", chunk_id=i) for i in range(5)]\n\n\n@pytest.fixture(scope=\"function\")\ndef index_batch_params(\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[IndexBatchParams, None, None]:\n    # WARNING: doc_id_to_previous_chunk_cnt={\"test_doc\": 0} is hardcoded to 0,\n    # which is only correct on the very first index call. The document_indices\n    # fixture is scope=\"module\", meaning the same OpenSearch and Vespa backends\n    # persist across all test functions in this module. When a second test\n    # function uses this fixture and calls document_index.index(...), the\n    # backend already has 5 chunks for \"test_doc\" from the previous test run,\n    # but the batch params still claim 0 prior chunks exist. This can lead to\n    # orphaned/duplicate chunks that make subsequent assertions incorrect.\n    # TODO: Whenever adding a second test, either change this or cleanup the\n    # index between test cases.\n    yield IndexBatchParams(\n        doc_id_to_previous_chunk_cnt={\"test_doc\": 0},\n        doc_id_to_new_chunk_cnt={\"test_doc\": 5},\n        tenant_id=get_current_tenant_id(),\n        large_chunks_enabled=False,\n    )\n\n\nclass TestDocumentIndexOld:\n    \"\"\"Tests the old DocumentIndex interface.\"\"\"\n\n    # TODO(ENG-3864)(andrei): Re-enable this test.\n    @pytest.mark.xfail(\n        reason=\"Flaky test: Retrieved chunks vary non-deterministically before and after changing user projects and personas. Likely a timing issue with the index being updated.\"\n    )\n    def test_update_single_can_clear_user_projects_and_personas(\n        self,\n        document_indices: list[DocumentIndex],\n        # This test case assumes all these chunks correspond to one document.\n        chunks: list[DocMetadataAwareIndexChunk],\n        index_batch_params: IndexBatchParams,\n    ) -> None:\n        \"\"\"\n        Tests that update_single can clear user_projects and personas.\n        \"\"\"\n        for document_index in document_indices:\n            # Precondition.\n            # Ensure there is some non-empty value for user project and\n            # personas.\n            for chunk in chunks:\n                chunk.user_project = [1]\n                chunk.personas = [2]\n            document_index.index(chunks, index_batch_params)\n\n            # Ensure that we can get chunks as expected with filters.\n            doc_id = chunks[0].source_document.id\n            chunk_count = len(chunks)\n            tenant_id = get_current_tenant_id()\n            # We need to specify the chunk index range and specify\n            # batch_retrieval=True below to trigger the codepath for Vespa's\n            # search API, which uses the expected additive filtering for\n            # project_id and persona_id. Otherwise we would use the codepath for\n            # the visit API, which does not have this kind of filtering\n            # implemented.\n            chunk_request = VespaChunkRequest(\n                document_id=doc_id, min_chunk_ind=0, max_chunk_ind=chunk_count - 1\n            )\n            project_persona_filters = IndexFilters(\n                access_control_list=None,\n                tenant_id=tenant_id,\n                project_id_filter=1,\n                persona_id_filter=2,\n                # We need this even though none of the chunks belong to a\n                # document set because project_id and persona_id are only\n                # additive filters in the event the agent has knowledge scope;\n                # if the agent does not, it is implied that it can see\n                # everything it is allowed to.\n                document_set=[\"1\"],\n            )\n            # Not best practice here but the API for refreshing the index to\n            # ensure that the latest data is present is not exposed in this\n            # class and is not the same for Vespa and OpenSearch, so we just\n            # tolerate a sleep for now. As a consequence the number of tests in\n            # this suite should be small. We only need to tolerate this for as\n            # long as we continue to use Vespa, we can consider exposing\n            # something for OpenSearch later.\n            time.sleep(1)\n            inference_chunks = document_index.id_based_retrieval(\n                chunk_requests=[chunk_request],\n                filters=project_persona_filters,\n                batch_retrieval=True,\n            )\n            assert len(inference_chunks) == chunk_count\n            # Sort by chunk id to easily test if we have all chunks.\n            for i, inference_chunk in enumerate(\n                sorted(inference_chunks, key=lambda x: x.chunk_id)\n            ):\n                assert inference_chunk.chunk_id == i\n                assert inference_chunk.document_id == doc_id\n\n            # Under test.\n            # Explicitly set empty fields here.\n            user_fields = VespaDocumentUserFields(user_projects=[], personas=[])\n            document_index.update_single(\n                doc_id=doc_id,\n                chunk_count=chunk_count,\n                tenant_id=tenant_id,\n                fields=None,\n                user_fields=user_fields,\n            )\n\n            # Postcondition.\n            filters = IndexFilters(access_control_list=None, tenant_id=tenant_id)\n            # We should expect to get back all expected chunks with no filters.\n            # Again, not best practice here.\n            time.sleep(1)\n            inference_chunks = document_index.id_based_retrieval(\n                chunk_requests=[chunk_request], filters=filters, batch_retrieval=True\n            )\n            assert len(inference_chunks) == chunk_count\n            # Sort by chunk id to easily test if we have all chunks.\n            for i, inference_chunk in enumerate(\n                sorted(inference_chunks, key=lambda x: x.chunk_id)\n            ):\n                assert inference_chunk.chunk_id == i\n                assert inference_chunk.document_id == doc_id\n            # Now, we should expect to not get any chunks if we specify the user\n            # project and personas filters.\n            inference_chunks = document_index.id_based_retrieval(\n                chunk_requests=[chunk_request],\n                filters=project_persona_filters,\n                batch_retrieval=True,\n            )\n            assert len(inference_chunks) == 0\n\n    def test_index_accepts_generator(\n        self,\n        document_indices: list[DocumentIndex],\n        tenant_context: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"index() accepts a generator (any iterable), not just a list.\"\"\"\n        for document_index in document_indices:\n\n            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:\n                for i in range(3):\n                    yield make_chunk(\"test_doc_gen\", chunk_id=i)\n\n            index_batch_params = IndexBatchParams(\n                doc_id_to_previous_chunk_cnt={\"test_doc_gen\": 0},\n                doc_id_to_new_chunk_cnt={\"test_doc_gen\": 3},\n                tenant_id=get_current_tenant_id(),\n                large_chunks_enabled=False,\n            )\n\n            results = document_index.index(chunk_gen(), index_batch_params)\n\n            assert len(results) == 1\n            record = results.pop()\n            assert record.document_id == \"test_doc_gen\"\n            assert record.already_existed is False\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/feature_flags/__init__.py",
    "content": "# External dependency unit tests for feature flag service\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/feature_flags/test_feature_flag_provider_factory.py",
    "content": "\"\"\"\nExternal dependency unit tests for the feature flag service.\n\nThese tests verify the feature flag service implementation with real\nPostHog integration when available, and fallback behavior otherwise.\n\"\"\"\n\nfrom uuid import UUID\n\nfrom ee.onyx.feature_flags.posthog_provider import PostHogFeatureFlagProvider\nfrom onyx.feature_flags.factory import get_default_feature_flag_provider\nfrom onyx.feature_flags.interface import FeatureFlagProvider\nfrom onyx.feature_flags.interface import NoOpFeatureFlagProvider\n\n\nclass TestNoOpFeatureFlagProvider:\n    \"\"\"Tests for the no-op feature flag provider.\"\"\"\n\n    def test_always_returns_false(self) -> None:\n        \"\"\"No-op provider should always return False.\"\"\"\n        provider = NoOpFeatureFlagProvider()\n\n        my_uuid = UUID(\"79a75f76-6b63-43ee-b04c-a0c6806900bd\")\n        assert provider.feature_enabled(\"another-flag\", my_uuid) is False\n\n\nclass TestFeatureFlagFactory:\n    \"\"\"Tests for the feature flag factory function.\"\"\"\n\n    def test_factory_returns_provider(self) -> None:\n        \"\"\"Factory should return a FeatureFlagProvider instance.\"\"\"\n        provider = get_default_feature_flag_provider()\n        assert isinstance(provider, FeatureFlagProvider)\n\n    def test_posthog_provider(self) -> None:\n        \"\"\"Posthog provider should return True if the feature is enabled.\"\"\"\n        provider = PostHogFeatureFlagProvider()\n        assert isinstance(provider, FeatureFlagProvider)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/file_store/test_file_store_non_mocked.py",
    "content": "import os\nimport time\nimport uuid\nfrom collections.abc import Generator\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom io import BytesIO\nfrom typing import Any\nfrom typing import cast\nfrom typing import Dict\nfrom typing import List\nfrom typing import Tuple\nfrom typing import TypedDict\nfrom unittest.mock import patch\n\nimport pytest\nfrom botocore.exceptions import ClientError\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.file_store.file_store import S3BackedFileStore\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\nlogger = setup_logger()\n\n\nTEST_BUCKET_NAME: str = \"onyx-file-store-tests\"\nTEST_FILE_PREFIX: str = \"test-files\"\n\n\n# Type definitions for test data\nclass BackendConfig(TypedDict):\n    endpoint_url: str | None\n    access_key: str\n    secret_key: str\n    region: str\n    verify_ssl: bool\n    backend_name: str\n\n\nclass FileTestData(TypedDict):\n    name: str\n    display_name: str\n    content: str\n    type: str\n    origin: FileOrigin\n\n\nclass WorkerResult(TypedDict):\n    worker_id: int\n    file_name: str\n    content: str\n\n\ndef _get_all_backend_configs() -> List[BackendConfig]:\n    \"\"\"Get configurations for all available backends\"\"\"\n    from onyx.configs.app_configs import (\n        S3_ENDPOINT_URL,\n        AWS_REGION_NAME,\n    )\n\n    s3_aws_access_key_id = os.environ.get(\"S3_AWS_ACCESS_KEY_ID_FOR_TEST\")\n    s3_aws_secret_access_key = os.environ.get(\"S3_AWS_SECRET_ACCESS_KEY_FOR_TEST\")\n\n    configs: List[BackendConfig] = []\n\n    # MinIO configuration (if endpoint is configured)\n    if S3_ENDPOINT_URL:\n        minio_access_key = \"minioadmin\"\n        minio_secret_key = \"minioadmin\"\n        configs.append(\n            {\n                \"endpoint_url\": S3_ENDPOINT_URL,\n                \"access_key\": minio_access_key,\n                \"secret_key\": minio_secret_key,\n                \"region\": \"us-east-1\",\n                \"verify_ssl\": False,\n                \"backend_name\": \"MinIO\",\n            }\n        )\n\n    # AWS S3 configuration (if credentials are available)\n    if s3_aws_access_key_id and s3_aws_secret_access_key:\n        configs.append(\n            {\n                \"endpoint_url\": None,\n                \"access_key\": s3_aws_access_key_id,\n                \"secret_key\": s3_aws_secret_access_key,\n                \"region\": AWS_REGION_NAME or \"us-east-2\",\n                \"verify_ssl\": True,\n                \"backend_name\": \"AWS S3\",\n            }\n        )\n\n    if not configs:\n        pytest.skip(\n            \"No backend configurations available - set MinIO or AWS S3 credentials\"\n        )\n\n    return configs\n\n\n@pytest.fixture(\n    scope=\"function\",\n    params=_get_all_backend_configs(),\n    ids=lambda config: config[\"backend_name\"],\n)\ndef file_store(\n    request: pytest.FixtureRequest,\n    db_session: Session,  # noqa: ARG001\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[S3BackedFileStore, None, None]:\n    \"\"\"Create an S3BackedFileStore instance for testing with parametrized backend\"\"\"\n    backend_config: BackendConfig = request.param\n\n    # Create S3BackedFileStore with backend-specific configuration\n    store = S3BackedFileStore(\n        bucket_name=TEST_BUCKET_NAME,\n        aws_access_key_id=backend_config[\"access_key\"],\n        aws_secret_access_key=backend_config[\"secret_key\"],\n        aws_region_name=backend_config[\"region\"],\n        s3_endpoint_url=backend_config[\"endpoint_url\"],\n        s3_prefix=f\"{TEST_FILE_PREFIX}-{uuid.uuid4()}\",\n        s3_verify_ssl=backend_config[\"verify_ssl\"],\n    )\n\n    # Initialize the store and ensure bucket exists\n    store.initialize()\n    logger.info(\n        f\"Successfully initialized {backend_config['backend_name']} file store with bucket {TEST_BUCKET_NAME}\"\n    )\n\n    yield store\n\n    # Cleanup: Remove all test files from the bucket (including tenant-prefixed files)\n    try:\n        s3_client = store._get_s3_client()\n        actual_bucket_name = store._get_bucket_name()\n\n        # List and delete all objects in the test prefix (including tenant subdirectories)\n        response = s3_client.list_objects_v2(\n            Bucket=actual_bucket_name, Prefix=f\"{store._s3_prefix}/\"\n        )\n\n        if \"Contents\" in response:\n            objects_to_delete = [{\"Key\": obj[\"Key\"]} for obj in response[\"Contents\"]]\n            s3_client.delete_objects(\n                Bucket=actual_bucket_name,\n                Delete={\"Objects\": objects_to_delete},  # type: ignore[typeddict-item]\n            )\n            logger.info(\n                f\"Cleaned up {len(objects_to_delete)} test objects from {backend_config['backend_name']}\"\n            )\n    except Exception as e:\n        logger.warning(f\"Failed to cleanup test objects: {e}\")\n\n\nclass TestS3BackedFileStore:\n    \"\"\"Test suite for S3BackedFileStore using real S3-compatible storage (MinIO or AWS S3)\"\"\"\n\n    def test_store_initialization(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test that the file store initializes properly\"\"\"\n        # The fixture already calls initialize(), so we just verify it worked\n        bucket_name = file_store._get_bucket_name()\n        assert bucket_name.startswith(TEST_BUCKET_NAME)  # Should be backend-specific\n\n        # Verify bucket exists by trying to list objects\n        s3_client = file_store._get_s3_client()\n\n        # This should not raise an exception\n        s3_client.list_objects_v2(Bucket=bucket_name, MaxKeys=1)\n\n    def test_save_and_read_text_file(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test saving and reading a text file\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Text File\"\n        content = \"This is a test text file content.\\nWith multiple lines.\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Read the file back\n        read_content_io = file_store.read_file(file_id)\n        read_content = read_content_io.read().decode(\"utf-8\")\n\n        assert read_content == content\n\n        # Verify file record in database\n        file_record = file_store.read_file_record(file_id)\n        assert file_record.file_id == file_id\n        assert file_record.display_name == display_name\n        assert file_record.file_origin == file_origin\n        assert file_record.file_type == file_type\n        assert (\n            file_record.bucket_name == file_store._get_bucket_name()\n        )  # Use actual bucket name\n        # The object key should include the tenant ID\n        expected_object_key = f\"{file_store._s3_prefix}/{TEST_TENANT_ID}/{file_id}\"\n        assert file_record.object_key == expected_object_key\n\n    def test_save_and_read_binary_file(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test saving and reading a binary file\"\"\"\n        file_id = f\"{uuid.uuid4()}.bin\"\n        display_name = \"Test Binary File\"\n        # Create some binary content\n        content = bytes(range(256))  # 0-255 bytes\n        file_type = \"application/octet-stream\"\n        file_origin = FileOrigin.CONNECTOR\n\n        # Save the file\n        content_io = BytesIO(content)\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Read the file back\n        read_content_io = file_store.read_file(file_id)\n        read_content = read_content_io.read()\n\n        assert read_content == content\n\n    def test_save_with_metadata(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test saving a file with metadata\"\"\"\n        file_id = f\"{uuid.uuid4()}.json\"\n        display_name = \"Test Metadata File\"\n        content = '{\"key\": \"value\", \"number\": 42}'\n        file_type = \"application/json\"\n        file_origin = FileOrigin.CHAT_UPLOAD\n        metadata: Dict[str, Any] = {\n            \"source\": \"test_suite\",\n            \"version\": \"1.0\",\n            \"tags\": [\"test\", \"json\"],\n            \"size\": len(content),\n        }\n\n        # Save the file with metadata\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_metadata=metadata,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Verify metadata is stored in database\n        file_record = file_store.read_file_record(file_id)\n        assert file_record.file_metadata == metadata\n\n    def test_has_file(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test the has_file method\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Has File\"\n        content = \"Content for has_file test\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Initially, file should not exist\n        assert not file_store.has_file(\n            file_id=file_id,\n            file_origin=file_origin,\n            file_type=file_type,\n        )\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Now file should exist\n        assert file_store.has_file(\n            file_id=file_id,\n            file_origin=file_origin,\n            file_type=file_type,\n        )\n\n        # Test with wrong parameters\n        assert not file_store.has_file(\n            file_id=file_id,\n            file_origin=FileOrigin.CONNECTOR,  # Wrong origin\n            file_type=file_type,\n        )\n\n        assert not file_store.has_file(\n            file_id=file_id,\n            file_origin=file_origin,\n            file_type=\"application/pdf\",  # Wrong type\n        )\n\n    def test_read_file_with_tempfile(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test reading a file using temporary file\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Temp File\"\n        content = \"Content for temporary file test\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Read using temporary file\n        temp_file = file_store.read_file(file_id, use_tempfile=True)\n\n        # Read content from temp file\n        temp_file.seek(0)\n        read_content_bytes = temp_file.read()\n        if isinstance(read_content_bytes, bytes):\n            read_content_str = read_content_bytes.decode(\"utf-8\")\n        else:\n            read_content_str = str(read_content_bytes)\n\n        assert read_content_str == content\n\n        # Clean up the temp file\n        temp_file.close()\n        if hasattr(temp_file, \"name\"):\n            try:\n                os.unlink(temp_file.name)\n            except (OSError, AttributeError):\n                pass\n\n    def test_delete_file(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test deleting a file\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Delete File\"\n        content = \"Content for delete test\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Verify file exists\n        assert file_store.has_file(\n            file_id=file_id,\n            file_origin=file_origin,\n            file_type=file_type,\n        )\n\n        # Delete the file\n        file_store.delete_file(file_id)\n\n        # Verify file no longer exists\n        assert not file_store.has_file(\n            file_id=file_id,\n            file_origin=file_origin,\n            file_type=file_type,\n        )\n\n        # Verify trying to read deleted file raises exception\n        with pytest.raises(RuntimeError, match=\"does not exist or was deleted\"):\n            file_store.read_file(file_id)\n\n    def test_get_file_with_mime_type(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test getting file with mime type detection\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test MIME Type\"\n        content = \"This is a plain text file\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Get file with mime type\n        file_with_mime = file_store.get_file_with_mime_type(file_id)\n\n        assert file_with_mime is not None\n        assert file_with_mime.data.decode(\"utf-8\") == content\n        # The detected mime type might be different from what we stored\n        assert file_with_mime.mime_type is not None\n\n    def test_file_overwrite(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test overwriting an existing file\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Overwrite\"\n        original_content = \"Original content\"\n        new_content = \"New content after overwrite\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save original file\n        content_io = BytesIO(original_content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Verify original content\n        read_content_io = file_store.read_file(file_id)\n        assert read_content_io.read().decode(\"utf-8\") == original_content\n\n        # Overwrite with new content\n        new_content_io = BytesIO(new_content.encode(\"utf-8\"))\n        returned_file_id_2 = file_store.save_file(\n            content=new_content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id_2 == file_id\n\n        # Verify new content\n        read_content_io = file_store.read_file(file_id)\n        assert read_content_io.read().decode(\"utf-8\") == new_content\n\n    def test_large_file_handling(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test handling of larger files\"\"\"\n        file_id = f\"{uuid.uuid4()}.bin\"\n        display_name = \"Test Large File\"\n        # Create a 1MB file\n        content_size = 1024 * 1024  # 1MB\n        content = b\"A\" * content_size\n        file_type = \"application/octet-stream\"\n        file_origin = FileOrigin.CONNECTOR\n\n        # Save the large file\n        content_io = BytesIO(content)\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Read the file back\n        read_content_io = file_store.read_file(file_id)\n        read_content = read_content_io.read()\n\n        assert len(read_content) == content_size\n        assert read_content == content\n\n    def test_error_handling_nonexistent_file(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test error handling when trying to read a non-existent file\"\"\"\n        nonexistent_file_id = f\"{uuid.uuid4()}.txt\"\n\n        with pytest.raises(RuntimeError, match=\"does not exist or was deleted\"):\n            file_store.read_file(nonexistent_file_id)\n\n        with pytest.raises(RuntimeError, match=\"does not exist or was deleted\"):\n            file_store.read_file_record(nonexistent_file_id)\n\n        # get_file_with_mime_type should return None for non-existent files\n        result = file_store.get_file_with_mime_type(nonexistent_file_id)\n        assert result is None\n\n    def test_error_handling_delete_nonexistent_file(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test error handling when trying to delete a non-existent file\"\"\"\n        nonexistent_file_id = f\"{uuid.uuid4()}.txt\"\n\n        # Should raise an exception when trying to delete non-existent file\n        with pytest.raises(RuntimeError, match=\"does not exist or was deleted\"):\n            file_store.delete_file(nonexistent_file_id)\n\n    def test_multiple_files_different_origins(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test storing multiple files with different origins and types\"\"\"\n        files_data: List[FileTestData] = [\n            {\n                \"name\": f\"{uuid.uuid4()}.txt\",\n                \"display_name\": \"Chat Upload File\",\n                \"content\": \"Content from chat upload\",\n                \"type\": \"text/plain\",\n                \"origin\": FileOrigin.CHAT_UPLOAD,\n            },\n            {\n                \"name\": f\"{uuid.uuid4()}.json\",\n                \"display_name\": \"Connector File\",\n                \"content\": '{\"from\": \"connector\"}',\n                \"type\": \"application/json\",\n                \"origin\": FileOrigin.CONNECTOR,\n            },\n            {\n                \"name\": f\"{uuid.uuid4()}.csv\",\n                \"display_name\": \"Generated Report\",\n                \"content\": \"col1,col2\\nval1,val2\",\n                \"type\": \"text/csv\",\n                \"origin\": FileOrigin.GENERATED_REPORT,\n            },\n        ]\n\n        # Save all files\n        for file_data in files_data:\n            content_io = BytesIO(file_data[\"content\"].encode(\"utf-8\"))\n            returned_file_id = file_store.save_file(\n                content=content_io,\n                display_name=file_data[\"display_name\"],\n                file_origin=file_data[\"origin\"],\n                file_type=file_data[\"type\"],\n                file_id=file_data[\"name\"],\n            )\n            assert returned_file_id == file_data[\"name\"]\n\n        # Verify all files exist and have correct properties\n        for file_data in files_data:\n            assert file_store.has_file(\n                file_id=file_data[\"name\"],\n                file_origin=file_data[\"origin\"],\n                file_type=file_data[\"type\"],\n            )\n\n            # Read and verify content\n            read_content_io = file_store.read_file(file_data[\"name\"])\n            read_content = read_content_io.read().decode(\"utf-8\")\n            assert read_content == file_data[\"content\"]\n\n            # Verify record\n            file_record = file_store.read_file_record(file_data[\"name\"])\n            assert file_record.file_origin == file_data[\"origin\"]\n            assert file_record.file_type == file_data[\"type\"]\n\n    def test_special_characters_in_filenames(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test handling of special characters in filenames\"\"\"\n        # Note: S3 keys have some restrictions, so we test reasonable special characters\n        special_files: List[str] = [\n            f\"{uuid.uuid4()} with spaces.txt\",\n            f\"{uuid.uuid4()}-with-dashes.txt\",\n            f\"{uuid.uuid4()}_with_underscores.txt\",\n            f\"{uuid.uuid4()}.with.dots.txt\",\n            f\"{uuid.uuid4()}(with)parentheses.txt\",\n        ]\n\n        for file_id in special_files:\n            content = f\"Content for {file_id}\"\n            content_io = BytesIO(content.encode(\"utf-8\"))\n\n            # Save the file\n            returned_file_id = file_store.save_file(\n                content=content_io,\n                display_name=f\"Display: {file_id}\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                file_id=file_id,\n            )\n\n            assert returned_file_id == file_id\n\n            # Read and verify\n            read_content_io = file_store.read_file(file_id)\n            read_content = read_content_io.read().decode(\"utf-8\")\n            assert read_content == content\n\n    @pytest.mark.skipif(\n        not os.environ.get(\"TEST_S3_NETWORK_ERRORS\"),\n        reason=\"Network error tests require TEST_S3_NETWORK_ERRORS environment variable\",\n    )\n    def test_network_error_handling(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test handling of network errors (requires special setup)\"\"\"\n        # This test requires specific network configuration to simulate failures\n        # It's marked as skip by default and only runs when explicitly enabled\n\n        # Mock a network error during file operations\n        with patch.object(file_store, \"_get_s3_client\") as mock_client:\n            mock_s3 = mock_client.return_value\n            mock_s3.put_object.side_effect = ClientError(\n                error_response={\n                    \"Error\": {\n                        \"Code\": \"NetworkingError\",\n                        \"Message\": \"Connection timeout\",\n                    }\n                },\n                operation_name=\"PutObject\",\n            )\n\n            content_io = BytesIO(b\"test content\")\n\n            with pytest.raises(ClientError):\n                file_store.save_file(\n                    content=content_io,\n                    display_name=\"Network Error Test\",\n                    file_origin=FileOrigin.OTHER,\n                    file_type=\"text/plain\",\n                    file_id=f\"{uuid.uuid4()}.txt\",\n                )\n\n    def test_database_transaction_rollback(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test database transaction rollback behavior with PostgreSQL\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Rollback\"\n        content = \"Content for rollback test\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Mock S3 to fail after database write but before commit\n        with patch.object(file_store, \"_get_s3_client\") as mock_client:\n            mock_s3 = mock_client.return_value\n            mock_s3.put_object.side_effect = ClientError(\n                error_response={\n                    \"Error\": {\"Code\": \"InternalError\", \"Message\": \"S3 internal error\"}\n                },\n                operation_name=\"PutObject\",\n            )\n\n            content_io = BytesIO(content.encode(\"utf-8\"))\n\n            # This should fail and rollback the database transaction\n            with pytest.raises(ClientError):\n                file_store.save_file(\n                    content=content_io,\n                    display_name=display_name,\n                    file_origin=file_origin,\n                    file_type=file_type,\n                    file_id=file_id,\n                )\n\n        # Verify that the database record was not created due to rollback\n        with pytest.raises(RuntimeError, match=\"does not exist or was deleted\"):\n            file_store.read_file_record(file_id)\n\n    def test_complex_jsonb_metadata(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test PostgreSQL JSONB metadata handling with complex data structures\"\"\"\n        file_id = f\"{uuid.uuid4()}.json\"\n        display_name = \"Test Complex Metadata\"\n        content = '{\"data\": \"test\"}'\n        file_type = \"application/json\"\n        file_origin = FileOrigin.CONNECTOR\n\n        # Complex metadata that tests PostgreSQL JSONB capabilities\n        complex_metadata: Dict[str, Any] = {\n            \"nested\": {\n                \"array\": [1, 2, 3, {\"inner\": \"value\"}],\n                \"boolean\": True,\n                \"null_value\": None,\n                \"number\": 42.5,\n            },\n            \"unicode\": \"测试数据 🚀\",\n            \"special_chars\": \"Line 1\\nLine 2\\t\\r\\nSpecial: !@#$%^&*()\",\n            \"large_text\": \"x\" * 1000,  # Test large text in JSONB\n            \"timestamps\": {\n                \"created\": \"2024-01-01T00:00:00Z\",\n                \"updated\": \"2024-01-02T12:30:45Z\",\n            },\n        }\n\n        # Save file with complex metadata\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_metadata=complex_metadata,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Retrieve and verify the metadata was stored correctly\n        file_record = file_store.read_file_record(file_id)\n        stored_metadata = file_record.file_metadata\n\n        # Verify all metadata fields were preserved\n        assert stored_metadata == complex_metadata\n\n        # Type casting for complex metadata access\n        stored_metadata_dict = cast(Dict[str, Any], stored_metadata)\n        nested_data = cast(Dict[str, Any], stored_metadata_dict[\"nested\"])\n        array_data = cast(List[Any], nested_data[\"array\"])\n        inner_obj = cast(Dict[str, Any], array_data[3])\n\n        assert inner_obj[\"inner\"] == \"value\"\n        assert stored_metadata_dict[\"unicode\"] == \"测试数据 🚀\"\n        assert nested_data[\"boolean\"] is True\n        assert nested_data[\"null_value\"] is None\n        assert len(cast(str, stored_metadata_dict[\"large_text\"])) == 1000\n\n    def test_database_consistency_after_s3_failure(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test that database stays consistent when S3 operations fail\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test Consistency\"\n        content = \"Initial content\"\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # First, save a file successfully\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Verify initial state\n        assert file_store.has_file(file_id, file_origin, file_type)\n        initial_record = file_store.read_file_record(file_id)\n\n        # Now try to update but fail on S3 side\n        with patch.object(file_store, \"_get_s3_client\") as mock_client:\n            mock_s3 = mock_client.return_value\n            # Let the first call (for reading/checking) succeed, but fail on put_object\n            mock_s3.put_object.side_effect = ClientError(\n                error_response={\n                    \"Error\": {\n                        \"Code\": \"ServiceUnavailable\",\n                        \"Message\": \"Service temporarily unavailable\",\n                    }\n                },\n                operation_name=\"PutObject\",\n            )\n\n            new_content = \"Updated content that should fail\"\n            new_content_io = BytesIO(new_content.encode(\"utf-8\"))\n\n            # This should fail and rollback\n            with pytest.raises(ClientError):\n                file_store.save_file(\n                    content=new_content_io,\n                    display_name=display_name,\n                    file_origin=file_origin,\n                    file_type=file_type,\n                    file_id=file_id,\n                )\n\n        # Verify the database record is unchanged (not updated)\n        current_record = file_store.read_file_record(file_id)\n        assert current_record.file_id == initial_record.file_id\n        assert current_record.display_name == initial_record.display_name\n        assert current_record.bucket_name == initial_record.bucket_name\n        assert current_record.object_key == initial_record.object_key\n\n        # Verify we can still read the original file content\n        read_content_io = file_store.read_file(file_id)\n        read_content = read_content_io.read().decode(\"utf-8\")\n        assert read_content == content  # Original content, not the failed update\n\n    def test_concurrent_file_operations(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test handling of concurrent file operations on the same file\"\"\"\n        base_file_name: str = str(uuid.uuid4())\n        file_type: str = \"text/plain\"\n        file_origin: FileOrigin = FileOrigin.OTHER\n\n        # Get current file store configuration to replicate in workers\n        current_bucket_name = file_store._get_bucket_name()\n        current_access_key = file_store._aws_access_key_id\n        current_secret_key = file_store._aws_secret_access_key\n        current_region = file_store._aws_region_name\n        current_endpoint_url = file_store._s3_endpoint_url\n        current_verify_ssl = file_store._s3_verify_ssl\n\n        results: List[Tuple[str, str]] = []\n        errors: List[Tuple[int, str]] = []\n\n        def save_file_worker(worker_id: int) -> bool:\n            \"\"\"Worker function to save a file with its own database session\"\"\"\n            try:\n                # Set up tenant context for this worker\n                token = CURRENT_TENANT_ID_CONTEXTVAR.set(TEST_TENANT_ID)\n                try:\n                    # Create a new database session for each worker to avoid conflicts\n                    with get_session_with_current_tenant() as worker_session:\n                        worker_file_store = S3BackedFileStore(\n                            bucket_name=current_bucket_name,\n                            aws_access_key_id=current_access_key,\n                            aws_secret_access_key=current_secret_key,\n                            aws_region_name=current_region,\n                            s3_endpoint_url=current_endpoint_url,\n                            s3_prefix=TEST_FILE_PREFIX,\n                            s3_verify_ssl=current_verify_ssl,\n                        )\n\n                        file_name: str = f\"{base_file_name}_{worker_id}.txt\"\n                        content: str = (\n                            f\"Content from worker {worker_id} at {time.time()}\"\n                        )\n                        content_io: BytesIO = BytesIO(content.encode(\"utf-8\"))\n\n                        worker_file_store.save_file(\n                            file_id=file_name,\n                            content=content_io,\n                            display_name=f\"Worker {worker_id} File\",\n                            file_origin=file_origin,\n                            file_type=file_type,\n                            db_session=worker_session,\n                        )\n                        results.append((file_name, content))\n                        return True\n                finally:\n                    # Reset the tenant context after the worker completes\n                    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n            except Exception as e:\n                errors.append((worker_id, str(e)))\n                return False\n\n        # Run multiple concurrent file save operations\n        with ThreadPoolExecutor(max_workers=5) as executor:\n            futures = [executor.submit(save_file_worker, i) for i in range(10)]\n\n            for future in as_completed(futures):\n                future.result()  # Wait for completion\n\n        # Verify all operations completed successfully\n        assert len(errors) == 0, f\"Concurrent operations had errors: {errors}\"\n        assert (\n            len(results) == 10\n        ), f\"Expected 10 successful operations, got {len(results)}\"\n\n        # Verify all files were saved correctly\n        for file_id, expected_content in results:\n            # Check file exists\n            assert file_store.has_file(\n                file_id=file_id,\n                file_origin=file_origin,\n                file_type=file_type,\n            )\n\n            # Check content is correct\n            read_content_io = file_store.read_file(file_id)\n            actual_content: str = read_content_io.read().decode(\"utf-8\")\n            assert actual_content == expected_content\n\n    def test_list_files_by_prefix(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test listing files by prefix returns only correctly prefixed files\"\"\"\n        test_prefix = \"documents-batch-\"\n\n        # Files that should be returned (start with the prefix)\n        prefixed_files: List[str] = [\n            f\"{test_prefix}001.txt\",\n            f\"{test_prefix}002.json\",\n            f\"{test_prefix}abc.pdf\",\n            f\"{test_prefix}xyz-final.docx\",\n        ]\n\n        # Files that should NOT be returned (don't start with prefix, even if they contain it)\n        non_prefixed_files: List[str] = [\n            f\"other-{test_prefix}001.txt\",  # Contains prefix but doesn't start with it\n            f\"backup-{test_prefix}data.txt\",  # Contains prefix but doesn't start with it\n            f\"{uuid.uuid4()}.txt\",  # Random file without prefix\n            \"reports-001.pdf\",  # Different prefix\n            f\"my-{test_prefix[:-1]}.txt\",  # Similar but not exact prefix\n        ]\n\n        all_files = prefixed_files + non_prefixed_files\n        saved_file_ids: List[str] = []\n\n        # Save all test files\n        for file_name in all_files:\n            content = f\"Content for {file_name}\"\n            content_io = BytesIO(content.encode(\"utf-8\"))\n\n            returned_file_id = file_store.save_file(\n                content=content_io,\n                display_name=f\"Display: {file_name}\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                file_id=file_name,\n            )\n            saved_file_ids.append(returned_file_id)\n\n            # Verify file was saved\n            assert returned_file_id == file_name\n\n        # Test the list_files_by_prefix functionality\n        prefix_results = file_store.list_files_by_prefix(test_prefix)\n\n        # Extract file IDs from results\n        returned_file_ids = [record.file_id for record in prefix_results]\n\n        # Verify correct number of files returned\n        assert len(returned_file_ids) == len(prefixed_files), (\n            f\"Expected {len(prefixed_files)} files with prefix '{test_prefix}', \"\n            f\"but got {len(returned_file_ids)}: {returned_file_ids}\"\n        )\n\n        # Verify all prefixed files are returned\n        for expected_file_id in prefixed_files:\n            assert (\n                expected_file_id in returned_file_ids\n            ), f\"File '{expected_file_id}' should be in results but was not found. Returned files: {returned_file_ids}\"\n\n        # Verify no non-prefixed files are returned\n        for unexpected_file_id in non_prefixed_files:\n            assert (\n                unexpected_file_id not in returned_file_ids\n            ), f\"File '{unexpected_file_id}' should NOT be in results but was found. Returned files: {returned_file_ids}\"\n\n        # Verify the returned records have correct properties\n        for record in prefix_results:\n            assert record.file_id.startswith(test_prefix)\n            assert record.display_name == f\"Display: {record.file_id}\"\n            assert record.file_origin == FileOrigin.OTHER\n            assert record.file_type == \"text/plain\"\n            assert record.bucket_name == file_store._get_bucket_name()\n\n        # Test with empty prefix (should return all files we created)\n        all_results = file_store.list_files_by_prefix(\"\")\n        all_returned_ids = [record.file_id for record in all_results]\n\n        # Should include all our test files\n        for file_id in saved_file_ids:\n            assert (\n                file_id in all_returned_ids\n            ), f\"File '{file_id}' should be in results for empty prefix\"\n\n        # Test with non-existent prefix\n        nonexistent_results = file_store.list_files_by_prefix(\"nonexistent-prefix-\")\n        assert (\n            len(nonexistent_results) == 0\n        ), \"Should return empty list for non-existent prefix\"\n\n    def test_get_file_size(self, file_store: S3BackedFileStore) -> None:\n        \"\"\"Test getting file size from S3\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        display_name = \"Test File Size\"\n        content = \"This is test content for file size check.\"\n        expected_size = len(content.encode(\"utf-8\"))\n        file_type = \"text/plain\"\n        file_origin = FileOrigin.OTHER\n\n        # Save the file\n        content_io = BytesIO(content.encode(\"utf-8\"))\n        returned_file_id = file_store.save_file(\n            content=content_io,\n            display_name=display_name,\n            file_origin=file_origin,\n            file_type=file_type,\n            file_id=file_id,\n        )\n\n        assert returned_file_id == file_id\n\n        # Get file size\n        file_size = file_store.get_file_size(file_id)\n\n        assert file_size is not None\n        assert file_size == expected_size\n\n    def test_get_file_size_nonexistent_file(\n        self, file_store: S3BackedFileStore\n    ) -> None:\n        \"\"\"Test getting file size for a non-existent file returns None\"\"\"\n        nonexistent_file_id = f\"{uuid.uuid4()}.txt\"\n\n        file_size = file_store.get_file_size(nonexistent_file_id)\n\n        assert file_size is None\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/file_store/test_postgres_file_store_non_mocked.py",
    "content": "\"\"\"External dependency tests for PostgresBackedFileStore.\n\nThese tests interact with a real PostgreSQL database — no mocking.\nThey exercise Large Object creation, reading, streaming, deletion,\nand verify consistency between the file_record / file_content tables\nand the underlying pg_largeobject storage.\n\"\"\"\n\nimport uuid\nfrom collections.abc import Generator\nfrom io import BytesIO\nfrom io import StringIO\nfrom typing import Any\nfrom typing import Dict\nfrom typing import List\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.file_content import get_file_content_by_file_id\nfrom onyx.db.file_content import get_file_content_by_file_id_optional\nfrom onyx.file_store.postgres_file_store import _get_raw_connection\nfrom onyx.file_store.postgres_file_store import _read_large_object\nfrom onyx.file_store.postgres_file_store import POSTGRES_BUCKET_SENTINEL\nfrom onyx.file_store.postgres_file_store import PostgresBackedFileStore\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n# ------------------------------------------------------------------ fixtures --\n\n\n@pytest.fixture(scope=\"function\")\ndef pg_file_store(\n    db_session: Session,  # noqa: ARG001 — ensures engine is ready\n    tenant_context: None,  # noqa: ARG001\n) -> Generator[PostgresBackedFileStore, None, None]:\n    \"\"\"Provide a PostgresBackedFileStore wired to the real test database.\"\"\"\n    store = PostgresBackedFileStore()\n    store.initialize()\n\n    # Track file IDs so we can clean up after each test\n    created_ids: list[str] = []\n    original_save = store.save_file\n\n    def _tracking_save(*args: Any, **kwargs: Any) -> str:\n        file_id = original_save(*args, **kwargs)\n        created_ids.append(file_id)\n        return file_id\n\n    store.save_file = _tracking_save  # type: ignore[method-assign]\n\n    yield store\n\n    # Cleanup: delete every file we created (including Large Objects)\n    for fid in created_ids:\n        try:\n            store.delete_file(fid)\n        except Exception:\n            pass\n\n\n# -------------------------------------------------------------------- tests --\n\n\nclass TestPostgresBackedFileStore:\n    \"\"\"Full integration tests against a real PostgreSQL instance.\"\"\"\n\n    # ── basic save / read ──────────────────────────────────────────\n\n    def test_save_and_read_text_file(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n        content = \"Hello, Postgres Large Objects!\"\n\n        returned_id = pg_file_store.save_file(\n            content=BytesIO(content.encode()),\n            display_name=\"greeting.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert returned_id == file_id\n\n        result = pg_file_store.read_file(file_id)\n        assert result.read().decode() == content\n\n    def test_save_and_read_binary_file(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        file_id = f\"{uuid.uuid4()}.bin\"\n        content = bytes(range(256))\n\n        pg_file_store.save_file(\n            content=BytesIO(content),\n            display_name=\"binary.bin\",\n            file_origin=FileOrigin.CONNECTOR,\n            file_type=\"application/octet-stream\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.read_file(file_id).read() == content\n\n    def test_save_string_io(self, pg_file_store: PostgresBackedFileStore) -> None:\n        \"\"\"StringIO content should be transparently UTF-8 encoded.\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        text = \"StringIO content — including unicode: 测试 🚀\"\n\n        pg_file_store.save_file(\n            content=StringIO(text),\n            display_name=\"stringio.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.read_file(file_id).read().decode() == text\n\n    def test_auto_generated_file_id(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        \"\"\"When no file_id is supplied, a UUID should be generated.\"\"\"\n        returned_id = pg_file_store.save_file(\n            content=BytesIO(b\"auto-id\"),\n            display_name=\"auto.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n        )\n\n        # Should be a valid UUID\n        uuid.UUID(returned_id)\n        assert pg_file_store.read_file(returned_id).read() == b\"auto-id\"\n\n    # ── read with tempfile (streaming) ─────────────────────────────\n\n    def test_read_file_with_tempfile(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n        content = \"Streamed via tempfile\"\n\n        pg_file_store.save_file(\n            content=BytesIO(content.encode()),\n            display_name=\"streamed.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        tmp = pg_file_store.read_file(file_id, use_tempfile=True)\n        try:\n            tmp.seek(0)\n            assert tmp.read().decode() == content\n        finally:\n            tmp.close()\n\n    # ── file record metadata ───────────────────────────────────────\n\n    def test_file_record_fields(self, pg_file_store: PostgresBackedFileStore) -> None:\n        file_id = f\"{uuid.uuid4()}.json\"\n        metadata: Dict[str, Any] = {\"source\": \"test\", \"version\": 1}\n\n        pg_file_store.save_file(\n            content=BytesIO(b'{\"k\":\"v\"}'),\n            display_name=\"meta.json\",\n            file_origin=FileOrigin.CHAT_UPLOAD,\n            file_type=\"application/json\",\n            file_metadata=metadata,\n            file_id=file_id,\n        )\n\n        record = pg_file_store.read_file_record(file_id)\n        assert record.file_id == file_id\n        assert record.display_name == \"meta.json\"\n        assert record.file_origin == FileOrigin.CHAT_UPLOAD\n        assert record.file_type == \"application/json\"\n        assert record.file_metadata == metadata\n        assert record.bucket_name == POSTGRES_BUCKET_SENTINEL\n\n        # object_key should be the stringified Large Object OID\n        oid = int(record.object_key)\n        assert oid > 0\n\n    def test_file_content_record(self, pg_file_store: PostgresBackedFileStore) -> None:\n        \"\"\"file_content row should track the OID and byte-size.\"\"\"\n        file_id = f\"{uuid.uuid4()}.txt\"\n        payload = b\"measure my size\"\n\n        pg_file_store.save_file(\n            content=BytesIO(payload),\n            display_name=\"sized.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        with get_session_with_current_tenant() as session:\n            fc = get_file_content_by_file_id(file_id, session)\n            assert fc.file_size == len(payload)\n            assert fc.lobj_oid > 0\n\n    # ── has_file ───────────────────────────────────────────────────\n\n    def test_has_file(self, pg_file_store: PostgresBackedFileStore) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n\n        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, \"text/plain\")\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"exists\"),\n            display_name=\"exists.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.has_file(file_id, FileOrigin.OTHER, \"text/plain\")\n        # Wrong origin / type → False\n        assert not pg_file_store.has_file(file_id, FileOrigin.CONNECTOR, \"text/plain\")\n        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, \"image/png\")\n\n    # ── get_file_size ──────────────────────────────────────────────\n\n    def test_get_file_size(self, pg_file_store: PostgresBackedFileStore) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n        payload = b\"exactly 24 bytes long!?!\"\n\n        pg_file_store.save_file(\n            content=BytesIO(payload),\n            display_name=\"sized.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.get_file_size(file_id) == len(payload)\n\n    def test_get_file_size_nonexistent(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        assert pg_file_store.get_file_size(f\"{uuid.uuid4()}\") is None\n\n    # ── delete ─────────────────────────────────────────────────────\n\n    def test_delete_file(self, pg_file_store: PostgresBackedFileStore) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"delete me\"),\n            display_name=\"doomed.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        pg_file_store.delete_file(file_id)\n\n        assert not pg_file_store.has_file(file_id, FileOrigin.OTHER, \"text/plain\")\n\n        with pytest.raises(RuntimeError, match=\"does not exist\"):\n            pg_file_store.read_file(file_id)\n\n        # file_content row should also be gone\n        with get_session_with_current_tenant() as session:\n            assert get_file_content_by_file_id_optional(file_id, session) is None\n\n    def test_delete_nonexistent_raises(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        with pytest.raises(RuntimeError, match=\"does not exist\"):\n            pg_file_store.delete_file(f\"{uuid.uuid4()}\")\n\n    # ── overwrite (upsert) ─────────────────────────────────────────\n\n    def test_overwrite_file(self, pg_file_store: PostgresBackedFileStore) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"original\"),\n            display_name=\"v1.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.read_file(file_id).read() == b\"original\"\n\n        # Capture the OID of the original Large Object\n        with get_session_with_current_tenant() as session:\n            old_oid = get_file_content_by_file_id(file_id, session).lobj_oid\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"overwritten\"),\n            display_name=\"v2.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.read_file(file_id).read() == b\"overwritten\"\n\n        # The old Large Object should have been unlinked\n        with get_session_with_current_tenant() as session:\n            new_oid = get_file_content_by_file_id(file_id, session).lobj_oid\n            assert new_oid != old_oid\n\n            raw_conn = _get_raw_connection(session)\n            with pytest.raises(Exception):\n                _read_large_object(raw_conn, old_oid)\n\n    # ── change_file_id ─────────────────────────────────────────────\n\n    def test_change_file_id(self, pg_file_store: PostgresBackedFileStore) -> None:\n        old_id = f\"{uuid.uuid4()}.txt\"\n        new_id = f\"{uuid.uuid4()}.txt\"\n        content = b\"portable content\"\n\n        pg_file_store.save_file(\n            content=BytesIO(content),\n            display_name=\"rename.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=old_id,\n        )\n\n        pg_file_store.change_file_id(old_id, new_id)\n\n        # Old ID should be gone\n        assert not pg_file_store.has_file(old_id, FileOrigin.OTHER, \"text/plain\")\n\n        # New ID should serve the same content\n        assert pg_file_store.read_file(new_id).read() == content\n        assert pg_file_store.get_file_size(new_id) == len(content)\n\n        # Clean up the renamed file (fixture only tracks save_file calls)\n        pg_file_store.delete_file(new_id)\n\n    # ── list_files_by_prefix ───────────────────────────────────────\n\n    def test_list_files_by_prefix(self, pg_file_store: PostgresBackedFileStore) -> None:\n        prefix = f\"batch-{uuid.uuid4().hex[:8]}-\"\n\n        # Create files with and without the prefix\n        for i in range(3):\n            pg_file_store.save_file(\n                content=BytesIO(f\"prefixed-{i}\".encode()),\n                display_name=f\"p{i}.txt\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                file_id=f\"{prefix}{i}.txt\",\n            )\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"unrelated\"),\n            display_name=\"other.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=f\"other-{uuid.uuid4()}.txt\",\n        )\n\n        results = pg_file_store.list_files_by_prefix(prefix)\n        returned_ids = [r.file_id for r in results]\n\n        assert len(returned_ids) == 3\n        for i in range(3):\n            assert f\"{prefix}{i}.txt\" in returned_ids\n\n    # ── get_file_with_mime_type ────────────────────────────────────\n\n    def test_get_file_with_mime_type(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        file_id = f\"{uuid.uuid4()}.txt\"\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"plain text\"),\n            display_name=\"mime.txt\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"text/plain\",\n            file_id=file_id,\n        )\n\n        result = pg_file_store.get_file_with_mime_type(file_id)\n        assert result is not None\n        assert result.data == b\"plain text\"\n        assert result.mime_type is not None\n\n    def test_get_file_with_mime_type_nonexistent(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        assert pg_file_store.get_file_with_mime_type(f\"{uuid.uuid4()}\") is None\n\n    # ── error handling ─────────────────────────────────────────────\n\n    def test_read_nonexistent_raises(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        with pytest.raises(RuntimeError, match=\"does not exist\"):\n            pg_file_store.read_file(f\"{uuid.uuid4()}\")\n\n    def test_read_file_record_nonexistent_raises(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        with pytest.raises(RuntimeError, match=\"does not exist\"):\n            pg_file_store.read_file_record(f\"{uuid.uuid4()}\")\n\n    # ── large file ─────────────────────────────────────────────────\n\n    def test_large_file_roundtrip(self, pg_file_store: PostgresBackedFileStore) -> None:\n        \"\"\"Verify a 1 MB payload survives a full save / read cycle.\"\"\"\n        file_id = f\"{uuid.uuid4()}.bin\"\n        content = b\"X\" * (1024 * 1024)\n\n        pg_file_store.save_file(\n            content=BytesIO(content),\n            display_name=\"big.bin\",\n            file_origin=FileOrigin.CONNECTOR,\n            file_type=\"application/octet-stream\",\n            file_id=file_id,\n        )\n\n        assert pg_file_store.read_file(file_id).read() == content\n        assert pg_file_store.get_file_size(file_id) == len(content)\n\n    # ── multiple files with different origins ──────────────────────\n\n    def test_multiple_files_different_origins(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        files: List[Dict[str, Any]] = [\n            {\n                \"id\": f\"{uuid.uuid4()}.txt\",\n                \"content\": b\"chat upload\",\n                \"origin\": FileOrigin.CHAT_UPLOAD,\n                \"type\": \"text/plain\",\n            },\n            {\n                \"id\": f\"{uuid.uuid4()}.json\",\n                \"content\": b'{\"from\":\"connector\"}',\n                \"origin\": FileOrigin.CONNECTOR,\n                \"type\": \"application/json\",\n            },\n            {\n                \"id\": f\"{uuid.uuid4()}.csv\",\n                \"content\": b\"a,b\\n1,2\",\n                \"origin\": FileOrigin.GENERATED_REPORT,\n                \"type\": \"text/csv\",\n            },\n        ]\n\n        for f in files:\n            pg_file_store.save_file(\n                content=BytesIO(f[\"content\"]),\n                display_name=f[\"id\"],\n                file_origin=f[\"origin\"],\n                file_type=f[\"type\"],\n                file_id=f[\"id\"],\n            )\n\n        for f in files:\n            assert pg_file_store.has_file(f[\"id\"], f[\"origin\"], f[\"type\"])\n            assert pg_file_store.read_file(f[\"id\"]).read() == f[\"content\"]\n\n    # ── complex JSONB metadata ─────────────────────────────────────\n\n    def test_complex_jsonb_metadata(\n        self, pg_file_store: PostgresBackedFileStore\n    ) -> None:\n        file_id = f\"{uuid.uuid4()}.json\"\n        metadata: Dict[str, Any] = {\n            \"nested\": {\"array\": [1, 2, {\"inner\": True}], \"null_val\": None},\n            \"unicode\": \"测试 🚀\",\n            \"large_text\": \"z\" * 1000,\n        }\n\n        pg_file_store.save_file(\n            content=BytesIO(b\"{}\"),\n            display_name=\"meta.json\",\n            file_origin=FileOrigin.OTHER,\n            file_type=\"application/json\",\n            file_metadata=metadata,\n            file_id=file_id,\n        )\n\n        record = pg_file_store.read_file_record(file_id)\n        assert record.file_metadata == metadata\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/full_setup.py",
    "content": "from __future__ import annotations\n\nimport os\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.document_index.factory import get_all_document_indices\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.setup import setup_document_indices\nfrom onyx.setup import setup_postgres\nfrom shared_configs import configs as shared_configs_module\nfrom shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\nfrom tests.external_dependency_unit.constants import TEST_TENANT_ID\n\n\n_SETUP_COMPLETE: bool = False\n\n\ndef ensure_full_deployment_setup(\n    tenant_id: Optional[str] = None,\n    opensearch_available: bool = False,\n) -> None:\n    \"\"\"Initialize test environment to mirror a real deployment, on demand.\n\n    - Initializes DB engine and sets tenant context\n    - Skips model warm-ups during setup\n    - Runs setup_onyx (Postgres defaults, Vespa indices)\n    - Initializes file store (best-effort)\n    - Ensures Vespa indices exist\n    \"\"\"\n    global _SETUP_COMPLETE\n    if _SETUP_COMPLETE:\n        return\n\n    if os.environ.get(\"SKIP_EXTERNAL_DEPENDENCY_UNIT_SETUP\", \"\").lower() == \"true\":\n        return\n\n    tenant = tenant_id or TEST_TENANT_ID\n\n    # Initialize engine (noop if already initialized)\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    # Avoid warm-up network calls during setup\n    shared_configs_module.SKIP_WARM_UP = True\n\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant)\n    original_cwd = os.getcwd()\n    backend_dir = Path(__file__).resolve().parents[2]  # points to 'backend'\n    os.chdir(str(backend_dir))\n\n    try:\n        with get_session_with_current_tenant() as db_session:\n            setup_postgres(db_session)\n\n            # Initialize file store; ignore if not configured\n            try:\n                get_default_file_store().initialize()\n            except Exception:\n                pass\n\n        # Also ensure indices exist explicitly (no-op if already created)\n        with get_session_with_current_tenant() as db_session:\n            active = get_active_search_settings(db_session)\n            if opensearch_available:\n                # We use this special bool here instead of just relying on\n                # ENABLE_OPENSEARCH_INDEXING_FOR_ONYX because not all testing\n                # infra is configured for OpenSearch.\n                document_indices = get_all_document_indices(\n                    active.primary, active.secondary\n                )\n            else:\n                document_indices = [\n                    get_default_document_index(\n                        active.primary, active.secondary, db_session\n                    )\n                ]\n            ok = setup_document_indices(\n                document_indices=document_indices,\n                index_setting=IndexingSetting.from_db_model(active.primary),\n                secondary_index_setting=(\n                    IndexingSetting.from_db_model(active.secondary)\n                    if active.secondary\n                    else None\n                ),\n            )\n            if not ok:\n                raise RuntimeError(\n                    \"Vespa did not initialize within the specified timeout.\"\n                )\n\n        _SETUP_COMPLETE = True\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n        os.chdir(original_cwd)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/hierarchy/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/external_dependency_unit/hierarchy/test_hierarchy_access_filter.py",
    "content": "\"\"\"Tests for hierarchy node access filtering.\n\nValidates that the overlap operator on external_user_group_ids works correctly\nwith PostgreSQL's VARCHAR[] column type. This specifically tests the fix for\nthe `character varying[] && text[]` type mismatch error.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.hierarchy import _get_accessible_hierarchy_nodes_for_source\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.db.models import HierarchyNode\n\n\ndef _make_node(\n    raw_node_id: str,\n    display_name: str,\n    *,\n    is_public: bool = False,\n    external_user_emails: list[str] | None = None,\n    external_user_group_ids: list[str] | None = None,\n) -> HierarchyNode:\n    return HierarchyNode(\n        raw_node_id=raw_node_id,\n        display_name=display_name,\n        source=DocumentSource.GOOGLE_DRIVE,\n        node_type=HierarchyNodeType.FOLDER,\n        is_public=is_public,\n        external_user_emails=external_user_emails,\n        external_user_group_ids=external_user_group_ids,\n    )\n\n\n@pytest.fixture()\ndef seeded_nodes(db_session: Session) -> Generator[list[HierarchyNode], None, None]:\n    \"\"\"Seed hierarchy nodes with various permission configurations.\"\"\"\n    tag = uuid4().hex[:8]\n    nodes = [\n        _make_node(\n            f\"public_{tag}\",\n            f\"Public Folder {tag}\",\n            is_public=True,\n        ),\n        _make_node(\n            f\"email_only_{tag}\",\n            f\"Email-Only Folder {tag}\",\n            external_user_emails=[\"alice@example.com\"],\n        ),\n        _make_node(\n            f\"group_only_{tag}\",\n            f\"Group-Only Folder {tag}\",\n            external_user_group_ids=[\"group_engineering\", \"group_design\"],\n        ),\n        _make_node(\n            f\"private_{tag}\",\n            f\"Private Folder {tag}\",\n        ),\n    ]\n    for node in nodes:\n        db_session.add(node)\n    db_session.flush()\n\n    yield nodes\n\n    # Cleanup\n    for node in nodes:\n        db_session.delete(node)\n    db_session.commit()\n\n\ndef test_group_overlap_filter(\n    db_session: Session,\n    seeded_nodes: list[HierarchyNode],\n) -> None:\n    \"\"\"The overlap (&&) operator must work on the VARCHAR[] column.\n\n    This is the core regression test: before the cast fix, PostgreSQL raised\n    `operator does not exist: character varying[] && text[]`.\n    \"\"\"\n    results = _get_accessible_hierarchy_nodes_for_source(\n        db_session,\n        source=DocumentSource.GOOGLE_DRIVE,\n        user_email=\"\",\n        external_group_ids=[\"group_engineering\"],\n    )\n    result_ids = {n.raw_node_id for n in results}\n\n    public_node, _, group_node, private_node = seeded_nodes\n    assert public_node.raw_node_id in result_ids\n    assert group_node.raw_node_id in result_ids\n    assert private_node.raw_node_id not in result_ids\n\n\ndef test_email_filter(\n    db_session: Session,\n    seeded_nodes: list[HierarchyNode],\n) -> None:\n    \"\"\"User email matching should return the email-permissioned node.\"\"\"\n    results = _get_accessible_hierarchy_nodes_for_source(\n        db_session,\n        source=DocumentSource.GOOGLE_DRIVE,\n        user_email=\"alice@example.com\",\n        external_group_ids=[],\n    )\n    result_ids = {n.raw_node_id for n in results}\n\n    public_node, email_node, group_node, private_node = seeded_nodes\n    assert public_node.raw_node_id in result_ids\n    assert email_node.raw_node_id in result_ids\n    assert group_node.raw_node_id not in result_ids\n    assert private_node.raw_node_id not in result_ids\n\n\ndef test_no_credentials_returns_only_public(\n    db_session: Session,\n    seeded_nodes: list[HierarchyNode],\n) -> None:\n    \"\"\"With no email and no groups, only public nodes should be returned.\"\"\"\n    results = _get_accessible_hierarchy_nodes_for_source(\n        db_session,\n        source=DocumentSource.GOOGLE_DRIVE,\n        user_email=\"\",\n        external_group_ids=[],\n    )\n    result_ids = {n.raw_node_id for n in results}\n\n    public_node, email_node, group_node, private_node = seeded_nodes\n    assert public_node.raw_node_id in result_ids\n    assert email_node.raw_node_id not in result_ids\n    assert group_node.raw_node_id not in result_ids\n    assert private_node.raw_node_id not in result_ids\n\n\ndef test_combined_email_and_group(\n    db_session: Session,\n    seeded_nodes: list[HierarchyNode],\n) -> None:\n    \"\"\"Both email and group filters should apply together via OR.\"\"\"\n    results = _get_accessible_hierarchy_nodes_for_source(\n        db_session,\n        source=DocumentSource.GOOGLE_DRIVE,\n        user_email=\"alice@example.com\",\n        external_group_ids=[\"group_design\"],\n    )\n    result_ids = {n.raw_node_id for n in results}\n\n    public_node, email_node, group_node, private_node = seeded_nodes\n    assert public_node.raw_node_id in result_ids\n    assert email_node.raw_node_id in result_ids\n    assert group_node.raw_node_id in result_ids\n    assert private_node.raw_node_id not in result_ids\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_llm_provider.py",
    "content": "\"\"\"\nTests for the test_llm_configuration endpoint (/admin/llm/test).\n\nThis tests the LLM configuration testing functionality which verifies\nthat LLM credentials are valid before saving them.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import UserRole\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLM\nfrom onyx.server.manage.llm.api import (\n    test_default_provider as run_test_default_provider,\n)\nfrom onyx.server.manage.llm.api import (\n    test_llm_configuration as run_test_llm_configuration,\n)\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.manage.llm.models import TestLLMRequest as LLMTestRequest\n\n\ndef _create_mock_admin() -> MagicMock:\n    \"\"\"Create a mock admin user for testing.\"\"\"\n    mock_admin = MagicMock()\n    mock_admin.role = UserRole.ADMIN\n    return mock_admin\n\n\ndef _create_test_provider(\n    db_session: Session,\n    name: str,\n    api_key: str = \"sk-test-key-00000000000000000000000000000000000\",\n) -> LLMProviderView:\n    \"\"\"Helper to create a test LLM provider in the database.\"\"\"\n    return upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=name,\n            provider=LlmProviderNames.OPENAI,\n            api_key=api_key,\n            api_key_changed=True,\n            model_configurations=[\n                ModelConfigurationUpsertRequest(name=\"gpt-4o-mini\", is_visible=True)\n            ],\n        ),\n        db_session=db_session,\n    )\n\n\ndef _cleanup_provider(db_session: Session, name: str) -> None:\n    \"\"\"Helper to clean up a test provider by name.\"\"\"\n    provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n    if provider:\n        remove_llm_provider(db_session, provider.id)\n\n\n@pytest.fixture\ndef provider_name() -> Generator[str, None, None]:\n    \"\"\"Generate a unique provider name for each test.\"\"\"\n    yield f\"test-provider-{uuid4().hex[:8]}\"\n\n\nclass TestLLMConfigurationEndpoint:\n    \"\"\"Tests for the test_llm_configuration endpoint.\"\"\"\n\n    def test_successful_llm_test_with_new_provider(\n        self,\n        db_session: Session,\n        provider_name: str,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Test that a successful LLM test returns normally (no exception).\n\n        When test_llm returns None (success), the endpoint should complete\n        without raising an exception.\n        \"\"\"\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_success(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that always succeeds.\"\"\"\n            captured_llms.append(llm)\n            return None  # Success\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_success\n            ):\n                # This should complete without exception\n                run_test_llm_configuration(\n                    test_llm_request=LLMTestRequest(\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-new-test-key-0000000000000000000000000000\",\n                        api_key_changed=True,\n                        custom_config_changed=False,\n                        model=\"gpt-4o-mini\",\n                    ),\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Verify test_llm was called\n            assert len(captured_llms) == 1, \"test_llm should have been called once\"\n\n            # Verify the LLM was configured with the correct model\n            assert captured_llms[0].config.model_name == \"gpt-4o-mini\"\n            assert captured_llms[0].config.model_provider == LlmProviderNames.OPENAI\n\n        finally:\n            db_session.rollback()\n\n    def test_failed_llm_test_raises_onyx_error(\n        self,\n        db_session: Session,\n        provider_name: str,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Test that a failed LLM test raises an OnyxError with VALIDATION_ERROR.\n\n        When test_llm returns an error message, the endpoint should raise\n        an OnyxError with the error details.\n        \"\"\"\n        error_message = \"Invalid API key: Authentication failed\"\n\n        def mock_test_llm_failure(llm: LLM) -> str | None:  # noqa: ARG001\n            \"\"\"Mock test_llm that always fails.\"\"\"\n            return error_message\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_failure\n            ):\n                with pytest.raises(OnyxError) as exc_info:\n                    run_test_llm_configuration(\n                        test_llm_request=LLMTestRequest(\n                            provider=LlmProviderNames.OPENAI,\n                            api_key=\"sk-invalid-key-00000000000000000000000000\",\n                            api_key_changed=True,\n                            custom_config_changed=False,\n                            model=\"gpt-4o-mini\",\n                        ),\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert exc_info.value.detail == error_message\n\n        finally:\n            db_session.rollback()\n\n    def test_uses_existing_provider_api_key_when_not_changed(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when testing an existing provider without changing the API key,\n        the stored API key from the database is used.\n        \"\"\"\n        original_api_key = \"sk-original-stored-key-00000000000000000000\"\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that captures the LLM for inspection.\"\"\"\n            captured_llms.append(llm)\n            return None\n\n        try:\n            # First, create the provider in the database\n            provider = _create_test_provider(\n                db_session, provider_name, api_key=original_api_key\n            )\n\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                # Test with api_key_changed=False - should use stored key\n                run_test_llm_configuration(\n                    test_llm_request=LLMTestRequest(\n                        id=provider.id,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=None,  # Not providing a new key\n                        api_key_changed=False,  # Using existing key\n                        custom_config_changed=False,\n                        model=\"gpt-4o-mini\",\n                    ),\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Verify test_llm was called with the original API key\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.api_key == original_api_key\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_uses_new_api_key_when_changed(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when testing an existing provider with a new API key,\n        the new API key is used instead of the stored one.\n        \"\"\"\n        original_api_key = \"sk-original-stored-key-00000000000000000000\"\n        new_api_key = \"sk-new-updated-key-000000000000000000000000\"\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that captures the LLM for inspection.\"\"\"\n            captured_llms.append(llm)\n            return None\n\n        try:\n            # First, create the provider in the database\n            provider = _create_test_provider(\n                db_session, provider_name, api_key=original_api_key\n            )\n\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                # Test with api_key_changed=True - should use new key\n                run_test_llm_configuration(\n                    test_llm_request=LLMTestRequest(\n                        id=provider.id,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=new_api_key,  # Providing a new key\n                        api_key_changed=True,  # Key is being changed\n                        custom_config_changed=False,\n                        model=\"gpt-4o-mini\",\n                    ),\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Verify test_llm was called with the new API key\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.api_key == new_api_key\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_uses_existing_custom_config_when_not_changed(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when testing an existing provider without changing custom_config,\n        the stored custom_config from the database is used.\n        \"\"\"\n        original_custom_config = {\"custom_key\": \"original_value\"}\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that captures the LLM for inspection.\"\"\"\n            captured_llms.append(llm)\n            return None\n\n        try:\n            # First, create the provider in the database with custom_config\n            provider = upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    custom_config=original_custom_config,\n                    custom_config_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        )\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                # Test with custom_config_changed=False - should use stored config\n                run_test_llm_configuration(\n                    test_llm_request=LLMTestRequest(\n                        id=provider.id,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=None,\n                        api_key_changed=False,\n                        custom_config=None,  # Not providing new config\n                        custom_config_changed=False,  # Using existing config\n                        model=\"gpt-4o-mini\",\n                    ),\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Verify test_llm was called with the original custom_config\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.custom_config == original_custom_config\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_different_model_names(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that the endpoint correctly passes different model names to the LLM.\n        \"\"\"\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            captured_llms.append(llm)\n            return None\n\n        test_models = [\"gpt-4\", \"gpt-4o\", \"gpt-4o-mini\", \"gpt-3.5-turbo\"]\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                for model_name in test_models:\n                    run_test_llm_configuration(\n                        test_llm_request=LLMTestRequest(\n                            provider=LlmProviderNames.OPENAI,\n                            api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                            api_key_changed=True,\n                            custom_config_changed=False,\n                            model=model_name,\n                        ),\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n            # Verify all models were tested\n            assert len(captured_llms) == len(test_models)\n\n            for i, llm in enumerate(captured_llms):\n                assert (\n                    llm.config.model_name == test_models[i]\n                ), f\"Expected model {test_models[i]}, got {llm.config.model_name}\"\n\n        finally:\n            db_session.rollback()\n\n\nclass TestDefaultProviderEndpoint:\n    \"\"\"Tests for the test_default_provider endpoint (/admin/llm/test/default).\"\"\"\n\n    def test_default_provider_switching(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that run_test_default_provider correctly uses the default provider\n        and responds to changes in default model and default provider.\n\n        Steps:\n        1. Upload provider 1 with models, set as default\n        2. Call run_test_default_provider - should use provider 1's default model\n        3. Upload provider 2 with models (not default)\n        4. Call run_test_default_provider - should still use provider 1\n        5. Change the default model on provider 1\n        6. Call run_test_default_provider - should use new model on provider 1\n        7. Change the default provider to provider 2\n        8. Call run_test_default_provider - should use provider 2\n        \"\"\"\n        provider_1_name = f\"test-provider-1-{uuid4().hex[:8]}\"\n        provider_2_name = f\"test-provider-2-{uuid4().hex[:8]}\"\n\n        provider_1_api_key = \"sk-provider1-key-000000000000000000000000000\"\n        provider_2_api_key = \"sk-provider2-key-000000000000000000000000000\"\n\n        provider_1_initial_model = \"gpt-4\"\n        provider_1_updated_model = \"gpt-4o\"\n        provider_2_default_model = \"gpt-4o-mini\"\n\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that captures the LLM for inspection.\"\"\"\n            captured_llms.append(llm)\n            return None\n\n        try:\n            # Step 1: Create provider 1 with models, it becomes default (first provider)\n            provider_1 = upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    name=provider_1_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=provider_1_api_key,\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                        ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n            # Set provider 1 as the default provider explicitly\n            update_default_provider(provider_1.id, provider_1_initial_model, db_session)\n\n            # Step 2: Call run_test_default_provider - should use provider 1's default model\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                run_test_default_provider(_=_create_mock_admin())\n\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.model_name == provider_1_initial_model\n            assert captured_llms[0].config.api_key == provider_1_api_key\n            captured_llms.clear()\n\n            # Step 3: Create provider 2 (not default)\n            provider_2 = upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    name=provider_2_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=provider_2_api_key,\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-3.5-turbo\", is_visible=True\n                        ),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n            # Step 4: Call run_test_default_provider - should still use provider 1\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                run_test_default_provider(_=_create_mock_admin())\n\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.model_name == provider_1_initial_model\n            assert captured_llms[0].config.api_key == provider_1_api_key\n            captured_llms.clear()\n\n            # Step 5: Update provider 1's default model\n            upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    id=provider_1.id,\n                    name=provider_1_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=provider_1_api_key,\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                        ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n            # Set provider 1's default model to the updated model\n            update_default_provider(provider_1.id, provider_1_updated_model, db_session)\n\n            # Step 6: Call run_test_default_provider - should use new model on provider 1\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                run_test_default_provider(_=_create_mock_admin())\n\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.model_name == provider_1_updated_model\n            assert captured_llms[0].config.api_key == provider_1_api_key\n            captured_llms.clear()\n\n            # Step 7: Change the default provider to provider 2\n            update_default_provider(provider_2.id, provider_2_default_model, db_session)\n\n            # Step 8: Call run_test_default_provider - should use provider 2\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                run_test_default_provider(_=_create_mock_admin())\n\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.model_name == provider_2_default_model\n            assert captured_llms[0].config.api_key == provider_2_api_key\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_1_name)\n            _cleanup_provider(db_session, provider_2_name)\n\n    def test_no_default_provider_raises_exception(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that when no default provider exists, the endpoint raises an exception.\n        \"\"\"\n        # Clear any existing providers to ensure no default exists\n        from onyx.db.llm import fetch_existing_llm_providers\n\n        try:\n            existing_providers = fetch_existing_llm_providers(\n                db_session, flow_type_filter=[LLMModelFlowType.CHAT]\n            )\n            provider_names_to_restore: list[str] = []\n\n            for provider in existing_providers:\n                provider_names_to_restore.append(provider.name)\n\n            # Remove all providers temporarily\n            for provider in existing_providers:\n                remove_llm_provider(db_session, provider.id)\n\n            # Now run_test_default_provider should fail\n            with pytest.raises(OnyxError) as exc_info:\n                run_test_default_provider(_=_create_mock_admin())\n\n            assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n            assert \"No LLM Provider setup\" in exc_info.value.detail\n\n        finally:\n            db_session.rollback()\n\n    def test_default_provider_test_failure(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test that when the default provider's LLM test fails, an exception is raised.\n        \"\"\"\n        provider_name = f\"test-provider-{uuid4().hex[:8]}\"\n        error_message = \"Connection to LLM provider failed\"\n\n        def mock_test_llm_failure(llm: LLM) -> str | None:  # noqa: ARG001\n            \"\"\"Mock test_llm that always fails.\"\"\"\n            return error_message\n\n        try:\n            # Create a provider and set it as default\n            provider = upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                ),\n                db_session=db_session,\n            )\n            update_default_provider(provider.id, \"gpt-4o-mini\", db_session)\n\n            # Test should fail\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_failure\n            ):\n                with pytest.raises(OnyxError) as exc_info:\n                    run_test_default_provider(_=_create_mock_admin())\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert exc_info.value.detail == error_message\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_llm_provider_api_base.py",
    "content": "\"\"\"\nTests for LLM provider api_base and custom_config change restrictions.\n\nThis ensures we don't have a vulnerability where an admin could change the api_base\nor custom_config of an LLM provider without changing the API key, allowing them to\nredirect API requests (containing the real API key in headers) to an attacker-controlled\nserver.\n\nThese are external dependency unit tests because they need a real database but\nalso need to control the MULTI_TENANT setting via patching.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import UserRole\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.api import _mask_string\nfrom onyx.server.manage.llm.api import put_llm_provider\nfrom onyx.server.manage.llm.api import test_llm_configuration as run_llm_config_test\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.manage.llm.models import TestLLMRequest as LLMTestRequest\nfrom tests.external_dependency_unit.mock_llm import LLM\n\n\ndef _create_test_provider(\n    db_session: Session,\n    name: str,\n    api_base: str | None = None,\n    custom_config: dict[str, str] | None = None,\n) -> LLMProviderView:\n    \"\"\"Helper to create a test LLM provider.\"\"\"\n    return upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=name,\n            provider=LlmProviderNames.OPENAI,\n            api_key=\"sk-test-key-00000000000000000000000000000000000\",\n            api_key_changed=True,\n            api_base=api_base,\n            custom_config=custom_config,\n            model_configurations=[\n                ModelConfigurationUpsertRequest(name=\"gpt-4o-mini\", is_visible=True)\n            ],\n        ),\n        db_session=db_session,\n    )\n\n\ndef _cleanup_provider(db_session: Session, name: str) -> None:\n    \"\"\"Helper to clean up a test provider by name.\"\"\"\n    provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n    if provider:\n        remove_llm_provider(db_session, provider.id)\n\n\ndef _create_mock_admin() -> MagicMock:\n    \"\"\"Create a mock admin user for testing.\"\"\"\n    mock_admin = MagicMock()\n    mock_admin.role = UserRole.ADMIN\n    return mock_admin\n\n\n@pytest.fixture\ndef provider_name() -> Generator[str, None, None]:\n    \"\"\"Generate a unique provider name for each test.\"\"\"\n    yield f\"test-provider-{uuid4().hex[:8]}\"\n\n\nclass TestLLMProviderChanges:\n    \"\"\"Tests for api_base change restrictions when updating LLM providers.\"\"\"\n\n    def test_blocks_api_base_change_without_key_change__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        In multi-tenant mode, changing api_base without also changing\n        the API key should be blocked.\n        \"\"\"\n        try:\n            provider = _create_test_provider(db_session, provider_name)\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_base=\"https://attacker.example.com\",\n                )\n\n                with pytest.raises(OnyxError) as exc_info:\n                    put_llm_provider(\n                        llm_provider_upsert_request=update_request,\n                        is_creation=False,\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert \"cannot be changed without changing the API key\" in str(\n                    exc_info.value.detail\n                )\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_api_base_change_with_key_change__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Changing api_base IS allowed when the API key is also being changed.\n        \"\"\"\n        try:\n            provider = _create_test_provider(db_session, provider_name)\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-new-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    api_base=\"https://custom-endpoint.example.com/v1\",\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.api_base == \"https://custom-endpoint.example.com/v1\"\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_same_api_base__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Keeping the same api_base (no change) is allowed without changing the API key.\n        \"\"\"\n        original_api_base = \"https://original.example.com/v1\"\n\n        try:\n            provider = _create_test_provider(\n                db_session, provider_name, api_base=original_api_base\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_base=original_api_base,\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.api_base == original_api_base\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_empty_string_api_base_when_existing_is_none__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Treat empty-string api_base from clients as unset when comparing provider\n        changes. This allows model-only updates when provider has no custom base URL.\n        \"\"\"\n        try:\n            view = _create_test_provider(db_session, provider_name, api_base=None)\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=view.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_base=\"\",\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.api_base is None\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_blocks_clearing_api_base__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Clearing api_base (setting to None when it was previously set)\n        is also blocked without changing the API key.\n        \"\"\"\n        original_api_base = \"https://original.example.com/v1\"\n\n        try:\n            provider = _create_test_provider(\n                db_session, provider_name, api_base=original_api_base\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_base=None,\n                )\n\n                with pytest.raises(OnyxError) as exc_info:\n                    put_llm_provider(\n                        llm_provider_upsert_request=update_request,\n                        is_creation=False,\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert \"cannot be changed without changing the API key\" in str(\n                    exc_info.value.detail\n                )\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_api_base_change__single_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        In single-tenant mode (MULTI_TENANT=False), changing api_base without\n        changing the API key IS allowed. This is by design since single-tenant\n        users have full control over their deployment.\n        \"\"\"\n        try:\n            provider = _create_test_provider(db_session, provider_name)\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", False):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_base=\"https://custom.example.com/v1\",\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.api_base == \"https://custom.example.com/v1\"\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_new_provider_creation_not_affected__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Creating a new provider with an api_base should work regardless of\n        api_key_changed (since there's no existing key to protect).\n        \"\"\"\n        try:\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                create_request = LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-new-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    api_base=\"https://custom.example.com/v1\",\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=create_request,\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.api_base == \"https://custom.example.com/v1\"\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_blocks_custom_config_change_without_key_change__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        In multi-tenant mode, changing custom_config without also changing\n        the API key should be blocked (custom_config can set env vars that\n        redirect LLM API requests).\n        \"\"\"\n        try:\n            provider = _create_test_provider(\n                db_session,\n                provider_name,\n                custom_config={\"SOME_CONFIG\": \"original_value\"},\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    custom_config={\"OPENAI_API_BASE\": \"https://attacker.example.com\"},\n                    custom_config_changed=True,\n                )\n\n                with pytest.raises(OnyxError) as exc_info:\n                    put_llm_provider(\n                        llm_provider_upsert_request=update_request,\n                        is_creation=False,\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert \"cannot be changed without changing the API key\" in str(\n                    exc_info.value.detail\n                )\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_blocks_adding_custom_config_without_key_change__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Adding custom_config when none existed should also be blocked\n        without changing the API key.\n        \"\"\"\n        try:\n            provider = _create_test_provider(db_session, provider_name)\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    custom_config={\"OPENAI_API_BASE\": \"https://attacker.example.com\"},\n                    custom_config_changed=True,\n                )\n\n                with pytest.raises(OnyxError) as exc_info:\n                    put_llm_provider(\n                        llm_provider_upsert_request=update_request,\n                        is_creation=False,\n                        _=_create_mock_admin(),\n                        db_session=db_session,\n                    )\n\n                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR\n                assert \"cannot be changed without changing the API key\" in str(\n                    exc_info.value.detail\n                )\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_custom_config_change_with_key_change__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Changing custom_config IS allowed when the API key is also being changed.\n        \"\"\"\n        new_config = {\"AWS_REGION_NAME\": \"us-west-2\"}\n\n        try:\n            provider = _create_test_provider(\n                db_session,\n                provider_name,\n                custom_config={\"AWS_REGION_NAME\": \"us-east-1\"},\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-new-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    custom_config_changed=True,\n                    custom_config=new_config,\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.custom_config == new_config\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_same_custom_config__multi_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Keeping the same custom_config (no change) is allowed without changing the API key.\n        \"\"\"\n        original_config = {\"AWS_REGION_NAME\": \"us-east-1\"}\n\n        try:\n            provider = _create_test_provider(\n                db_session, provider_name, custom_config=original_config\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", True):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    custom_config=original_config,\n                    custom_config_changed=True,\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.custom_config == original_config\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n    def test_allows_custom_config_change__single_tenant(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        In single-tenant mode, changing custom_config without changing\n        the API key IS allowed.\n        \"\"\"\n        new_config = {\"AWS_REGION_NAME\": \"eu-west-1\"}\n\n        try:\n            provider = _create_test_provider(\n                db_session,\n                provider_name,\n                custom_config={\"AWS_REGION_NAME\": \"us-east-1\"},\n            )\n\n            with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", False):\n                update_request = LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    custom_config=new_config,\n                    custom_config_changed=True,\n                )\n\n                result = put_llm_provider(\n                    llm_provider_upsert_request=update_request,\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n                assert result.custom_config == new_config\n        finally:\n            _cleanup_provider(db_session, provider_name)\n\n\ndef test_upload_with_custom_config_then_change(\n    db_session: Session,\n) -> None:\n    \"\"\"\n    Run test + upload with a custom config (vertex).\n    Edit attributes of provider that are not custom config or api key.\n    Check that the test and update maintain the same values.\n    \"\"\"\n    custom_config = {\n        \"vertex_credentials\": \"1234\",\n        \"vertex_location\": \"us-east-1\",\n    }\n    name = \"test-provider-vertex-ai\"\n    provider_name = LlmProviderNames.VERTEX_AI.value\n    default_model_name = \"gemini-2.5-pro\"\n\n    # List to capture LLM inputs passed to test_llm\n    captured_llms: list = []\n\n    def capture_test_llm(llm: LLM) -> str:\n        \"\"\"Captures the LLM input and returns None (success).\"\"\"\n        captured_llms.append(llm)\n        return \"\"\n\n    try:\n        # Patch the test_llm method\n        with patch(\"onyx.server.manage.llm.api.test_llm\", side_effect=capture_test_llm):\n            run_llm_config_test(\n                LLMTestRequest(\n                    provider=provider_name,\n                    model=default_model_name,\n                    api_key_changed=False,\n                    custom_config_changed=True,\n                    custom_config=custom_config,\n                ),\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            provider = put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    name=name,\n                    provider=provider_name,\n                    custom_config=custom_config,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=default_model_name, is_visible=True\n                        )\n                    ],\n                    api_key_changed=False,\n                    custom_config_changed=True,\n                    is_auto_mode=False,\n                ),\n                is_creation=True,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            # Turn auto mode off\n            run_llm_config_test(\n                LLMTestRequest(\n                    id=provider.id,\n                    provider=provider_name,\n                    model=default_model_name,\n                    api_key_changed=False,\n                    custom_config_changed=False,\n                ),\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=name,\n                    provider=provider_name,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=default_model_name, is_visible=True\n                        ),\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                    api_key_changed=False,\n                    custom_config_changed=False,\n                    is_auto_mode=False,\n                ),\n                is_creation=False,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            # Verify that test_llm was called and custom_config matches\n            assert len(captured_llms) == 2, \"test_llm should have been called 2 times\"\n\n            for llm in captured_llms:\n                assert (\n                    llm.config.custom_config == custom_config\n                ), f\"Expected custom_config {custom_config}, but got {llm.config.custom_config}\"\n\n            # Check inside the database and check that custom_config is the same as the original\n            db_provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n            if not db_provider:\n                assert False, \"Provider not found in the database\"\n\n            assert (\n                db_provider.custom_config == custom_config\n            ), f\"Expected custom_config {custom_config}, but got {db_provider.custom_config}\"\n    finally:\n        db_session.rollback()\n        _cleanup_provider(db_session, name)\n\n\ndef test_preserves_masked_sensitive_custom_config_on_provider_update(\n    db_session: Session,\n) -> None:\n    \"\"\"Masked sensitive values from the UI should not overwrite stored secrets.\"\"\"\n    name = f\"test-provider-vertex-update-{uuid4().hex[:8]}\"\n    provider = LlmProviderNames.VERTEX_AI.value\n    default_model_name = \"gemini-2.5-pro\"\n    original_custom_config = {\n        \"vertex_credentials\": '{\"type\":\"service_account\",\"private_key\":\"REAL_PRIVATE_KEY\"}',\n        \"vertex_location\": \"global\",\n    }\n\n    try:\n        view = put_llm_provider(\n            llm_provider_upsert_request=LLMProviderUpsertRequest(\n                name=name,\n                provider=provider,\n                custom_config=original_custom_config,\n                model_configurations=[\n                    ModelConfigurationUpsertRequest(\n                        name=default_model_name, is_visible=True\n                    )\n                ],\n                api_key_changed=False,\n                custom_config_changed=True,\n                is_auto_mode=False,\n            ),\n            is_creation=True,\n            _=_create_mock_admin(),\n            db_session=db_session,\n        )\n\n        with patch(\"onyx.server.manage.llm.api.MULTI_TENANT\", False):\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    id=view.id,\n                    name=name,\n                    provider=provider,\n                    custom_config={\n                        \"vertex_credentials\": _mask_string(\n                            original_custom_config[\"vertex_credentials\"]\n                        ),\n                        \"vertex_location\": \"us-central1\",\n                    },\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=default_model_name, is_visible=True\n                        )\n                    ],\n                    api_key_changed=False,\n                    custom_config_changed=True,\n                    is_auto_mode=False,\n                ),\n                is_creation=False,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n        updated_provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n        assert updated_provider is not None\n        assert updated_provider.custom_config is not None\n        assert (\n            updated_provider.custom_config[\"vertex_credentials\"]\n            == original_custom_config[\"vertex_credentials\"]\n        )\n        assert updated_provider.custom_config[\"vertex_location\"] == \"us-central1\"\n    finally:\n        db_session.rollback()\n        _cleanup_provider(db_session, name)\n\n\ndef test_preserves_masked_sensitive_custom_config_on_test_request(\n    db_session: Session,\n) -> None:\n    \"\"\"LLM test should restore masked sensitive custom config values before invocation.\"\"\"\n    name = f\"test-provider-vertex-test-{uuid4().hex[:8]}\"\n    provider_name = LlmProviderNames.VERTEX_AI.value\n    default_model_name = \"gemini-2.5-pro\"\n    original_custom_config = {\n        \"vertex_credentials\": '{\"type\":\"service_account\",\"private_key\":\"REAL_PRIVATE_KEY\"}',\n        \"vertex_location\": \"global\",\n    }\n    captured_llms: list[LLM] = []\n\n    def capture_test_llm(llm: LLM) -> str:\n        captured_llms.append(llm)\n        return \"\"\n\n    try:\n        provider = put_llm_provider(\n            llm_provider_upsert_request=LLMProviderUpsertRequest(\n                name=name,\n                provider=provider_name,\n                custom_config=original_custom_config,\n                model_configurations=[\n                    ModelConfigurationUpsertRequest(\n                        name=default_model_name, is_visible=True\n                    )\n                ],\n                api_key_changed=False,\n                custom_config_changed=True,\n                is_auto_mode=False,\n            ),\n            is_creation=True,\n            _=_create_mock_admin(),\n            db_session=db_session,\n        )\n\n        with patch(\"onyx.server.manage.llm.api.test_llm\", side_effect=capture_test_llm):\n            run_llm_config_test(\n                LLMTestRequest(\n                    id=provider.id,\n                    provider=provider_name,\n                    model=default_model_name,\n                    api_key_changed=False,\n                    custom_config_changed=True,\n                    custom_config={\n                        \"vertex_credentials\": _mask_string(\n                            original_custom_config[\"vertex_credentials\"]\n                        ),\n                        \"vertex_location\": \"us-central1\",\n                    },\n                ),\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n        assert len(captured_llms) == 1\n        assert captured_llms[0].config.custom_config is not None\n        assert (\n            captured_llms[0].config.custom_config[\"vertex_credentials\"]\n            == original_custom_config[\"vertex_credentials\"]\n        )\n        assert captured_llms[0].config.custom_config[\"vertex_location\"] == \"us-central1\"\n    finally:\n        db_session.rollback()\n        _cleanup_provider(db_session, name)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_llm_provider_auto_mode.py",
    "content": "\"\"\"\nTests for the LLM Provider Auto Mode feature.\n\nThis tests the automatic model syncing from GitHub config when a provider\nis uploaded with is_auto_mode=True.\n\"\"\"\n\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import LLMModelFlowType\nfrom onyx.db.llm import fetch_auto_mode_providers\nfrom onyx.db.llm import fetch_default_llm_model\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import fetch_existing_llm_providers\nfrom onyx.db.llm import fetch_llm_provider_view\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import sync_auto_mode_models\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.models import UserRole\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.well_known_providers.auto_update_models import LLMProviderRecommendation\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.llm.well_known_providers.models import SimpleKnownModel\nfrom onyx.server.manage.llm.api import put_llm_provider\nfrom onyx.server.manage.llm.api import (\n    test_default_provider as run_test_default_provider,\n)\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\n\n\ndef _create_mock_admin() -> MagicMock:\n    \"\"\"Create a mock admin user for testing.\"\"\"\n    mock_admin = MagicMock()\n    mock_admin.role = UserRole.ADMIN\n    return mock_admin\n\n\ndef _cleanup_provider(db_session: Session, name: str) -> None:\n    \"\"\"Helper to clean up a test provider by name.\"\"\"\n    provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n    if provider:\n        remove_llm_provider(db_session, provider.id)\n\n\ndef _create_mock_llm_recommendations(\n    provider: str,\n    default_model_name: str,\n    additional_models: list[str],\n) -> LLMRecommendations:\n    \"\"\"Create a mock LLMRecommendations object for testing.\n\n    Args:\n        provider: The provider name (e.g., \"openai\")\n        default_model_name: The name of the default model\n        additional_models: List of additional visible model names\n\n    Returns:\n        LLMRecommendations object with the specified configuration\n    \"\"\"\n    return LLMRecommendations(\n        version=\"1.0.0\",\n        updated_at=datetime.now(),\n        providers={\n            provider: LLMProviderRecommendation(\n                default_model=SimpleKnownModel(\n                    name=default_model_name,\n                    display_name=default_model_name.upper(),\n                ),\n                additional_visible_models=[\n                    SimpleKnownModel(name=model, display_name=model.upper())\n                    for model in additional_models\n                ],\n            )\n        },\n    )\n\n\n@pytest.fixture\ndef provider_name() -> Generator[str, None, None]:\n    \"\"\"Generate a unique provider name for each test.\"\"\"\n    yield f\"test-auto-provider-{uuid4().hex[:8]}\"\n\n\nclass TestAutoModeSyncFeature:\n    \"\"\"Tests for the Auto Mode model syncing feature.\"\"\"\n\n    def test_auto_mode_syncs_models_from_github_config(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when a provider is uploaded with auto mode enabled and no model\n        configurations, the models from fetch_llm_recommendations_from_github()\n        are synced to the provider.\n\n        Steps:\n        1. Mock fetch_llm_recommendations_from_github to return a known config\n        2. Upload provider with is_auto_mode=True and no model_configurations\n        3. Fetch the provider and verify all recommended models are present\n        4. Set the provider as default\n        5. Fetch the default provider and verify the default model matches the config\n        \"\"\"\n        # Define the expected models from the mock GitHub config\n        expected_default_model = \"gpt-4o\"\n        expected_additional_models = [\"gpt-4o-mini\", \"gpt-4-turbo\"]\n        all_expected_models = [expected_default_model] + expected_additional_models\n\n        # Create the mock LLMRecommendations\n        mock_recommendations = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=expected_default_model,\n            additional_models=expected_additional_models,\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                # Step 1-2: Upload provider with auto mode on and no model configs\n                # NOTE: We need to provide a default_model_name for the initial upsert,\n                # but auto mode will override it with the GitHub config's default\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],  # No model configs provided\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 3: Verify all models from the GitHub config are present\n            # Fetch the provider fresh from the database\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None, \"Provider should exist\"\n            assert provider.is_auto_mode is True, \"Provider should be in auto mode\"\n\n            # Check that all expected models are present and visible\n            model_names = {mc.name for mc in provider.model_configurations}\n            for expected_model in all_expected_models:\n                assert (\n                    expected_model in model_names\n                ), f\"Expected model '{expected_model}' not found in provider models\"\n\n            # Verify visibility of all synced models\n            for mc in provider.model_configurations:\n                if mc.name in all_expected_models:\n                    assert mc.is_visible is True, f\"Model '{mc.name}' should be visible\"\n\n            # Step 4: Set the provider as default\n            update_default_provider(provider.id, expected_default_model, db_session)\n\n            # Step 5: Fetch the default provider and verify\n            default_model = fetch_default_llm_model(db_session)\n            assert default_model is not None, \"Default provider should exist\"\n            assert (\n                default_model.llm_provider.name == provider_name\n            ), \"Default provider should be our test provider\"\n            assert (\n                default_model.name == expected_default_model\n            ), f\"Default provider's default model should be '{expected_default_model}'\"\n            assert (\n                default_model.llm_provider.is_auto_mode is True\n            ), \"Default provider should be in auto mode\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_auto_mode_with_multiple_providers_in_config(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that auto mode only syncs models for the matching provider type,\n        ignoring models from other providers in the config.\n        \"\"\"\n        # Create recommendations with multiple providers\n        mock_recommendations = LLMRecommendations(\n            version=\"1.0.0\",\n            updated_at=datetime.now(),\n            providers={\n                LlmProviderNames.OPENAI: LLMProviderRecommendation(\n                    default_model=SimpleKnownModel(\n                        name=\"gpt-4o\", display_name=\"GPT-4o\"\n                    ),\n                    additional_visible_models=[\n                        SimpleKnownModel(name=\"gpt-4o-mini\", display_name=\"GPT-4o Mini\")\n                    ],\n                ),\n                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(\n                    default_model=SimpleKnownModel(\n                        name=\"claude-3-5-sonnet-latest\",\n                        display_name=\"Claude 3.5 Sonnet\",\n                    ),\n                    additional_visible_models=[\n                        SimpleKnownModel(\n                            name=\"claude-haiku-4-5\",\n                            display_name=\"Claude Haiku 4.5\",\n                        )\n                    ],\n                ),\n            },\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                # Upload an OpenAI provider with auto mode\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Verify only OpenAI models are synced, not Anthropic models\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            model_names = {mc.name for mc in provider.model_configurations}\n\n            # OpenAI models should be present\n            assert \"gpt-4o\" in model_names\n            assert \"gpt-4o-mini\" in model_names\n\n            # Anthropic models should NOT be present\n            assert \"claude-3-5-sonnet-latest\" not in model_names\n            assert \"claude-haiku-4-5\" not in model_names\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_existing_provider_transition_to_auto_mode(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when an existing provider with visible models transitions to auto mode,\n        models from the auto mode config become visible, and models not in the config\n        become not visible.\n\n        Steps:\n        1. Upload a provider with some visible model configurations (not in auto mode)\n        2. Update the provider to enable auto mode\n        3. Verify:\n           - Models in the auto mode config are now visible\n           - Models NOT in the auto mode config are now NOT visible\n        \"\"\"\n        # Initial models on the provider (all visible initially)\n        initial_models = [\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4\", is_visible=True\n            ),  # Will NOT be in auto config\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4o\", is_visible=True\n            ),  # Will be in auto config\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-3.5-turbo\", is_visible=True\n            ),  # Will NOT be in auto config\n        ]\n\n        # Auto mode config: gpt-4o (default) + gpt-4o-mini (additional)\n        # Note: gpt-4 and gpt-3.5-turbo are NOT in this config\n        auto_mode_default = \"gpt-4o\"\n        auto_mode_additional = [\"gpt-4o-mini\"]\n        all_auto_mode_models = [auto_mode_default] + auto_mode_additional\n\n        mock_recommendations = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=auto_mode_default,\n            additional_models=auto_mode_additional,\n        )\n\n        try:\n            # Step 1: Upload provider WITHOUT auto mode, with initial models\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    is_auto_mode=False,  # Not in auto mode initially\n                    model_configurations=initial_models,\n                ),\n                is_creation=True,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            # Verify initial state: all models are visible\n            initial_provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert initial_provider is not None\n            assert initial_provider.is_auto_mode is False\n\n            for mc in initial_provider.model_configurations:\n                assert (\n                    mc.is_visible is True\n                ), f\"Initial model '{mc.name}' should be visible\"\n\n            # Step 2: Update provider to enable auto mode\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        id=initial_provider.id,\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=None,  # Not changing API key\n                        api_key_changed=False,\n                        is_auto_mode=True,  # Now enabling auto mode\n                        model_configurations=[],  # Auto mode will sync from config\n                    ),\n                    is_creation=False,  # This is an update\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 3: Verify model visibility after auto mode transition\n            # Expire session cache to force fresh fetch after sync_auto_mode_models committed\n            db_session.expire_all()\n            provider_view = fetch_llm_provider_view(\n                provider_name=provider_name, db_session=db_session\n            )\n            assert provider_view is not None\n            assert provider_view.is_auto_mode is True\n\n            # Build a map of model name -> visibility\n            model_visibility = {\n                mc.name: mc.is_visible for mc in provider_view.model_configurations\n            }\n\n            # Models in auto mode config should be visible\n            for model_name in all_auto_mode_models:\n                assert (\n                    model_name in model_visibility\n                ), f\"Auto mode model '{model_name}' should exist\"\n                assert (\n                    model_visibility[model_name] is True\n                ), f\"Auto mode model '{model_name}' should be visible\"\n\n            # Models NOT in auto mode config should NOT be visible\n            models_not_in_config = [\"gpt-4\", \"gpt-3.5-turbo\"]\n            for model_name in models_not_in_config:\n                if model_name in model_visibility:\n                    assert (\n                        model_visibility[model_name] is False\n                    ), f\"Model '{model_name}' not in auto config should NOT be visible\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_auto_mode_provider_not_in_config(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Test that when the provider type is not in the GitHub config,\n        no model syncing occurs.\n        \"\"\"\n        # Create recommendations that don't include OpenAI\n        mock_recommendations = LLMRecommendations(\n            version=\"1.0.0\",\n            updated_at=datetime.now(),\n            providers={\n                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(\n                    default_model=SimpleKnownModel(\n                        name=\"claude-3-5-sonnet-latest\",\n                        display_name=\"Claude 3.5 Sonnet\",\n                    ),\n                    additional_visible_models=[],\n                ),\n            },\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                # Upload an OpenAI provider (not in config)\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[\n                            ModelConfigurationUpsertRequest(\n                                name=\"gpt-4o\",\n                                is_visible=True,\n                            )\n                        ],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Provider should be created but without synced models from config\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            assert provider.is_auto_mode is True\n\n            # Only the default model provided in the request should exist\n            model_names = {mc.name for mc in provider.model_configurations}\n            assert \"gpt-4o\" in model_names\n            # Anthropic models should NOT be synced\n            assert \"claude-3-5-sonnet-latest\" not in model_names\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_switching_default_between_auto_mode_providers(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Test switching the default provider between two auto mode providers\n        and verifying test_default_provider uses the correct default model.\n\n        Steps:\n        1. Create provider 1 (OpenAI) with auto mode, set as default\n        2. Create provider 2 (Anthropic) with auto mode\n        3. Verify provider 1 is the default\n        4. Change default to provider 2\n        5. Verify provider 2 is the default\n        6. Run test_default_provider and verify it uses provider 2's default model\n        \"\"\"\n        provider_1_name = f\"test-auto-openai-{uuid4().hex[:8]}\"\n        provider_2_name = f\"test-auto-anthropic-{uuid4().hex[:8]}\"\n\n        provider_1_api_key = \"sk-provider1-key-000000000000000000000000000\"\n        provider_2_api_key = \"sk-ant-provider2-key-0000000000000000000000\"\n\n        # Provider 1 (OpenAI) config\n        provider_1_default_model = \"gpt-4o\"\n        provider_1_additional_models = [\"gpt-4o-mini\"]\n\n        # Provider 2 (Anthropic) config\n        provider_2_default_model = \"claude-3-5-sonnet-latest\"\n        provider_2_additional_models = [\"claude-haiku-4-5\"]\n\n        # Create mock recommendations with both providers\n        mock_recommendations = LLMRecommendations(\n            version=\"1.0.0\",\n            updated_at=datetime.now(),\n            providers={\n                LlmProviderNames.OPENAI: LLMProviderRecommendation(\n                    default_model=SimpleKnownModel(\n                        name=provider_1_default_model,\n                        display_name=\"GPT-4o\",\n                    ),\n                    additional_visible_models=[\n                        SimpleKnownModel(name=m, display_name=m.upper())\n                        for m in provider_1_additional_models\n                    ],\n                ),\n                LlmProviderNames.ANTHROPIC: LLMProviderRecommendation(\n                    default_model=SimpleKnownModel(\n                        name=provider_2_default_model,\n                        display_name=\"Claude 3.5 Sonnet\",\n                    ),\n                    additional_visible_models=[\n                        SimpleKnownModel(name=m, display_name=m.upper())\n                        for m in provider_2_additional_models\n                    ],\n                ),\n            },\n        )\n\n        captured_llms: list[LLM] = []\n\n        def mock_test_llm_capture(llm: LLM) -> str | None:\n            \"\"\"Mock test_llm that captures the LLM for inspection.\"\"\"\n            captured_llms.append(llm)\n            return None  # Success\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                # Step 1: Create provider 1 (OpenAI) with auto mode\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_1_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=provider_1_api_key,\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Set provider 1 as the default\n            db_session.expire_all()\n            provider_1 = fetch_existing_llm_provider(\n                name=provider_1_name, db_session=db_session\n            )\n            assert provider_1 is not None\n            update_default_provider(provider_1.id, provider_1_default_model, db_session)\n\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=mock_recommendations,\n            ):\n                # Step 2: Create provider 2 (Anthropic) with auto mode\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_2_name,\n                        provider=LlmProviderNames.ANTHROPIC,\n                        api_key=provider_2_api_key,\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 3: Verify provider 1 is still the default\n            db_session.expire_all()\n            default_model = fetch_default_llm_model(db_session)\n            assert default_model is not None\n            assert default_model.llm_provider.name == provider_1_name\n            assert default_model.name == provider_1_default_model\n            assert default_model.llm_provider.is_auto_mode is True\n\n            # Step 4: Change the default to provider 2\n            provider_2 = fetch_existing_llm_provider(\n                name=provider_2_name, db_session=db_session\n            )\n            assert provider_2 is not None\n            update_default_provider(provider_2.id, provider_2_default_model, db_session)\n\n            # Step 5: Verify provider 2 is now the default\n            db_session.expire_all()\n            default_model = fetch_default_llm_model(db_session)\n            assert default_model is not None\n            assert default_model.llm_provider.name == provider_2_name\n            assert default_model.name == provider_2_default_model\n            assert default_model.llm_provider.is_auto_mode is True\n\n            # Step 6: Run test_default_provider and verify it uses provider 2's model\n            with patch(\n                \"onyx.server.manage.llm.api.test_llm\", side_effect=mock_test_llm_capture\n            ):\n                run_test_default_provider(_=_create_mock_admin())\n\n            # Verify test_llm was called with provider 2's default model\n            assert len(captured_llms) == 1\n            assert captured_llms[0].config.model_name == provider_2_default_model\n            assert captured_llms[0].config.api_key == provider_2_api_key\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_1_name)\n            _cleanup_provider(db_session, provider_2_name)\n\n\nclass TestAutoModeMissingFlows:\n    \"\"\"Regression test: sync_auto_mode_models must create LLMModelFlow rows\n    for every ModelConfiguration it inserts, otherwise the provider vanishes\n    from listing queries that join through LLMModelFlow.\"\"\"\n\n    def test_sync_auto_mode_creates_flow_rows(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"\n        Steps:\n        1. Create a provider with no model configs (empty shell).\n        2. Call sync_auto_mode_models to add models from a mock config.\n        3. Assert every new ModelConfiguration has at least one LLMModelFlow.\n        4. Assert fetch_existing_llm_providers (which joins through\n           LLMModelFlow) returns the provider.\n        \"\"\"\n        mock_recommendations = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n\n        try:\n            # Step 1: Create provider with no model configs\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    is_auto_mode=True,\n                    model_configurations=[],\n                ),\n                is_creation=True,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            # Step 2: Run sync_auto_mode_models (simulating the periodic sync)\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=mock_recommendations,\n            )\n\n            # Step 3: Every ModelConfiguration must have at least one LLMModelFlow\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            synced_model_names = {mc.name for mc in provider.model_configurations}\n            assert \"gpt-4o\" in synced_model_names\n            assert \"gpt-4o-mini\" in synced_model_names\n\n            for mc in provider.model_configurations:\n                assert len(mc.llm_model_flows) > 0, (\n                    f\"ModelConfiguration '{mc.name}' (id={mc.id}) has no \"\n                    f\"LLMModelFlow rows — it will be invisible to listing queries\"\n                )\n\n                flow_types = {f.llm_model_flow_type for f in mc.llm_model_flows}\n                assert (\n                    LLMModelFlowType.CHAT in flow_types\n                ), f\"ModelConfiguration '{mc.name}' is missing a CHAT flow\"\n\n            # Step 4: The provider must appear in fetch_existing_llm_providers\n            listed_providers = fetch_existing_llm_providers(\n                db_session=db_session,\n                flow_type_filter=[LLMModelFlowType.CHAT],\n            )\n            listed_provider_names = {p.name for p in listed_providers}\n            assert (\n                provider_name in listed_provider_names\n            ), f\"Provider '{provider_name}' not returned by fetch_existing_llm_providers — models are missing flow rows\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n\nclass TestAutoModeTransitionsAndResync:\n    \"\"\"Tests for auto/manual transitions, config evolution, and sync idempotency.\"\"\"\n\n    def test_transition_to_auto_mode_preserves_default(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"When the default provider transitions from manual to auto mode,\n        the global default should be preserved (set to the recommended model).\n\n        Steps:\n        1. Create a manual-mode provider with models, set it as global default.\n        2. Transition to auto mode (model_configurations=[] triggers cascade\n           delete of old ModelConfigurations and their LLMModelFlow rows).\n        3. Verify the provider is still the global default, now using the\n           recommended default model from the GitHub config.\n        \"\"\"\n        initial_models = [\n            ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n            ModelConfigurationUpsertRequest(name=\"gpt-4o-mini\", is_visible=True),\n        ]\n\n        auto_config = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o-mini\",\n            additional_models=[\"gpt-4o\"],\n        )\n\n        try:\n            # Step 1: Create manual-mode provider and set as default\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    is_auto_mode=False,\n                    model_configurations=initial_models,\n                ),\n                is_creation=True,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n            default_before = fetch_default_llm_model(db_session)\n            assert default_before is not None\n            assert default_before.name == \"gpt-4o\"\n            assert default_before.llm_provider_id == provider.id\n\n            # Step 2: Transition to auto mode\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=auto_config,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        id=provider.id,\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=None,\n                        api_key_changed=False,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=False,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 3: Default should be preserved on this provider\n            db_session.expire_all()\n            default_after = fetch_default_llm_model(db_session)\n            assert default_after is not None, (\n                \"Default model should not be None after transitioning to auto mode — \"\n                \"the provider was the default before and should remain so\"\n            )\n            assert (\n                default_after.llm_provider_id == provider.id\n            ), \"Default should still belong to the same provider after transition\"\n            assert (\n                default_after.name == \"gpt-4o-mini\"\n            ), f\"Default should be updated to the recommended model 'gpt-4o-mini', got '{default_after.name}'\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_auto_to_manual_mode_preserves_models_and_stops_syncing(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Disabling auto mode should preserve the current model list and\n        prevent future syncs from altering visibility.\n\n        Steps:\n        1. Create provider in auto mode — models synced from config.\n        2. Update provider to manual mode (is_auto_mode=False).\n        3. Verify all models remain with unchanged visibility.\n        4. Call sync_auto_mode_models with a *different* config.\n        5. Verify fetch_auto_mode_providers excludes this provider, so the\n           periodic task would never call sync on it.\n        \"\"\"\n        initial_config = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n\n        try:\n            # Step 1: Create in auto mode\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=initial_config,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            visibility_before = {\n                mc.name: mc.is_visible for mc in provider.model_configurations\n            }\n            assert visibility_before == {\"gpt-4o\": True, \"gpt-4o-mini\": True}\n\n            # Step 2: Switch to manual mode\n            put_llm_provider(\n                llm_provider_upsert_request=LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=None,\n                    api_key_changed=False,\n                    is_auto_mode=False,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                ),\n                is_creation=False,\n                _=_create_mock_admin(),\n                db_session=db_session,\n            )\n\n            # Step 3: Models unchanged\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            assert provider.is_auto_mode is False\n            visibility_after = {\n                mc.name: mc.is_visible for mc in provider.model_configurations\n            }\n            assert visibility_after == visibility_before\n\n            # Step 4-5: Provider excluded from auto mode queries\n            auto_providers = fetch_auto_mode_providers(db_session)\n            auto_provider_ids = {p.id for p in auto_providers}\n            assert provider.id not in auto_provider_ids\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_resync_adds_new_and_hides_removed_models(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"When the GitHub config changes between syncs, a subsequent sync\n        should add newly listed models and hide models that were removed.\n\n        Steps:\n        1. Create provider in auto mode with config v1: [gpt-4o, gpt-4o-mini].\n        2. Sync with config v2: [gpt-4o, gpt-4-turbo] (gpt-4o-mini removed,\n           gpt-4-turbo added).\n        3. Verify gpt-4o still visible, gpt-4o-mini hidden, gpt-4-turbo added\n           and visible.\n        \"\"\"\n        config_v1 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n        config_v2 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4-turbo\"],\n        )\n\n        try:\n            # Step 1: Create with config v1\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=config_v1,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 2: Re-sync with config v2\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            changes = sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config_v2,\n            )\n            assert changes > 0\n\n            # Step 3: Verify\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            visibility = {\n                mc.name: mc.is_visible for mc in provider.model_configurations\n            }\n\n            # gpt-4o: still in config -> visible\n            assert visibility[\"gpt-4o\"] is True\n            # gpt-4o-mini: removed from config -> hidden (not deleted)\n            assert \"gpt-4o-mini\" in visibility, \"Removed model should still exist in DB\"\n            assert visibility[\"gpt-4o-mini\"] is False\n            # gpt-4-turbo: newly added -> visible\n            assert visibility[\"gpt-4-turbo\"] is True\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_sync_is_idempotent(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Running sync twice with the same config should produce zero\n        changes on the second call.\"\"\"\n        config = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\", \"gpt-4-turbo\"],\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=config,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            # First explicit sync (may report changes if creation already synced)\n            sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config,\n            )\n\n            # Snapshot state after first sync\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            snapshot = {\n                mc.name: (mc.is_visible, mc.display_name)\n                for mc in provider.model_configurations\n            }\n\n            # Second sync — should be a no-op\n            changes = sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config,\n            )\n            assert (\n                changes == 0\n            ), f\"Expected 0 changes on idempotent re-sync, got {changes}\"\n\n            # State should be identical\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            current = {\n                mc.name: (mc.is_visible, mc.display_name)\n                for mc in provider.model_configurations\n            }\n            assert current == snapshot\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_default_model_hidden_when_removed_from_config(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"When the current default model is removed from the config, sync\n        should hide it. The default model flow row should still exist (it\n        points at the ModelConfiguration), but the model is no longer visible.\n\n        Steps:\n        1. Create provider with config: default=gpt-4o, additional=[gpt-4o-mini].\n        2. Set gpt-4o as the global default.\n        3. Re-sync with config: default=gpt-4o-mini (gpt-4o removed entirely).\n        4. Verify gpt-4o is hidden, gpt-4o-mini is visible, and\n           fetch_default_llm_model still returns a result (the flow row persists).\n        \"\"\"\n        config_v1 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n        config_v2 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o-mini\",\n            additional_models=[],\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=config_v1,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Step 2: Set gpt-4o as global default\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n            default_before = fetch_default_llm_model(db_session)\n            assert default_before is not None\n            assert default_before.name == \"gpt-4o\"\n\n            # Step 3: Re-sync with config v2 (gpt-4o removed)\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            changes = sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config_v2,\n            )\n            assert changes > 0\n\n            # Step 4: Verify visibility\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            visibility = {\n                mc.name: mc.is_visible for mc in provider.model_configurations\n            }\n            assert visibility[\"gpt-4o\"] is False, \"Removed default should be hidden\"\n            assert visibility[\"gpt-4o-mini\"] is True, \"New default should be visible\"\n\n            # The old default (gpt-4o) is now hidden. sync_auto_mode_models\n            # should update the global default to the new recommended default\n            # (gpt-4o-mini) so that it is not silently lost.\n            db_session.expire_all()\n            default_after = fetch_default_llm_model(db_session)\n            assert (\n                default_after is not None\n            ), \"Default model should not be None — sync should set the new recommended default when the old one is hidden\"\n            assert (\n                default_after.name == \"gpt-4o-mini\"\n            ), f\"Default should be updated to the new recommended model 'gpt-4o-mini', but got '{default_after.name}'\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_sync_updates_default_when_recommended_default_changes(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"When the provider owns the CHAT default and a sync arrives with a\n        different recommended default model (both models still in config),\n        the global default should be updated to the new recommendation.\n\n        Steps:\n        1. Create auto-mode provider with config v1: default=gpt-4o.\n        2. Set gpt-4o as the global CHAT default.\n        3. Re-sync with config v2: default=gpt-4o-mini (gpt-4o still present).\n        4. Verify the CHAT default switched to gpt-4o-mini and both models\n           remain visible.\n        \"\"\"\n        config_v1 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n        config_v2 = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o-mini\",\n            additional_models=[\"gpt-4o\"],\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=config_v1,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Set gpt-4o as the global CHAT default\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n            default_before = fetch_default_llm_model(db_session)\n            assert default_before is not None\n            assert default_before.name == \"gpt-4o\"\n\n            # Re-sync with config v2 (recommended default changed)\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n\n            changes = sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config_v2,\n            )\n            assert changes > 0, \"Sync should report changes when default switches\"\n\n            # Both models should remain visible\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            visibility = {\n                mc.name: mc.is_visible for mc in provider.model_configurations\n            }\n            assert visibility[\"gpt-4o\"] is True\n            assert visibility[\"gpt-4o-mini\"] is True\n\n            # The CHAT default should now be gpt-4o-mini\n            default_after = fetch_default_llm_model(db_session)\n            assert default_after is not None\n            assert (\n                default_after.name == \"gpt-4o-mini\"\n            ), f\"Default should be updated to 'gpt-4o-mini', got '{default_after.name}'\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n\n    def test_sync_idempotent_when_default_already_matches(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"When the provider owns the CHAT default and it already matches the\n        recommended default, re-syncing should report zero changes.\n\n        This is a regression test for the bug where changes was unconditionally\n        incremented even when the default was already correct.\n        \"\"\"\n        config = _create_mock_llm_recommendations(\n            provider=LlmProviderNames.OPENAI,\n            default_model_name=\"gpt-4o\",\n            additional_models=[\"gpt-4o-mini\"],\n        )\n\n        try:\n            with patch(\n                \"onyx.server.manage.llm.api.fetch_llm_recommendations_from_github\",\n                return_value=config,\n            ):\n                put_llm_provider(\n                    llm_provider_upsert_request=LLMProviderUpsertRequest(\n                        name=provider_name,\n                        provider=LlmProviderNames.OPENAI,\n                        api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                        api_key_changed=True,\n                        is_auto_mode=True,\n                        model_configurations=[],\n                    ),\n                    is_creation=True,\n                    _=_create_mock_admin(),\n                    db_session=db_session,\n                )\n\n            # Set gpt-4o (the recommended default) as global CHAT default\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n            # First sync to stabilize state\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config,\n            )\n\n            # Second sync — default already matches, should be a no-op\n            db_session.expire_all()\n            provider = fetch_existing_llm_provider(\n                name=provider_name, db_session=db_session\n            )\n            assert provider is not None\n            changes = sync_auto_mode_models(\n                db_session=db_session,\n                provider=provider,\n                llm_recommendations=config,\n            )\n            assert (\n                changes == 0\n            ), f\"Expected 0 changes when default already matches recommended, got {changes}\"\n\n            # Default should still be gpt-4o\n            default_model = fetch_default_llm_model(db_session)\n            assert default_model is not None\n            assert default_model.name == \"gpt-4o\"\n\n        finally:\n            db_session.rollback()\n            _cleanup_provider(db_session, provider_name)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_llm_provider_called.py",
    "content": "from collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi_users.password import PasswordHelper\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.enums import AccountType\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import User\nfrom onyx.db.models import UserRole\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.query_and_chat.chat_backend import create_new_chat_session\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom tests.external_dependency_unit.answer.stream_test_assertions import (\n    assert_answer_stream_part_correct,\n)\nfrom tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder\nfrom tests.external_dependency_unit.answer.stream_test_utils import submit_query\nfrom tests.external_dependency_unit.answer.stream_test_utils import tokenise\nfrom tests.external_dependency_unit.mock_llm import LLMAnswerResponse\nfrom tests.external_dependency_unit.mock_llm import MockLLM\n\n\ndef _create_admin(db_session: Session) -> User:\n    \"\"\"Create a mock admin user for testing.\"\"\"\n    unique_email = f\"admin_{uuid4().hex[:8]}@example.com\"\n    password_helper = PasswordHelper()\n    password = password_helper.generate()\n    hashed_password = password_helper.hash(password)\n\n    user = User(\n        id=uuid4(),\n        email=unique_email,\n        hashed_password=hashed_password,\n        is_active=True,\n        is_superuser=True,\n        is_verified=True,\n        role=UserRole.ADMIN,\n        account_type=AccountType.STANDARD,\n    )\n    db_session.add(user)\n    db_session.commit()\n    db_session.refresh(user)\n    return user\n\n\ndef _create_provider(\n    db_session: Session,\n    provider: LlmProviderNames,\n    name: str,\n    is_public: bool,\n) -> int:\n    result = upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=name,\n            provider=provider,\n            api_key=\"sk-ant-api03-...\",\n            is_public=is_public,\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=\"claude-3-5-sonnet-20240620\",\n                    is_visible=True,\n                ),\n            ],\n        ),\n        db_session=db_session,\n    )\n    return result.id\n\n\n@contextmanager\ndef use_mock_llm() -> (\n    Generator[tuple[MockLLM, dict[str, bool | str | None]], None, None]\n):\n    \"\"\"Context manager that patches LLM factory functions and tracks which ones are called.\"\"\"\n    mock_llm = MockLLM()\n\n    call_tracker: dict[str, bool | str | None] = {\n        \"get_default_llm_called\": False,\n        \"get_llm_called\": False,\n        \"provider\": None,\n    }\n\n    def mock_get_default_llm(*_args: Any, **_kwargs: Any) -> MockLLM:\n        call_tracker[\"get_default_llm_called\"] = True\n        return mock_llm\n\n    def mock_get_llm(provider: str, *_args: Any, **_kwargs: Any) -> MockLLM:\n        call_tracker[\"get_llm_called\"] = True\n        call_tracker[\"provider\"] = provider\n        return mock_llm\n\n    with (\n        patch(\n            \"onyx.llm.factory.get_default_llm\",\n            side_effect=mock_get_default_llm,\n        ),\n        patch(\n            \"onyx.llm.factory.get_llm\",\n            side_effect=mock_get_llm,\n        ),\n    ):\n        yield mock_llm, call_tracker\n\n\ndef _cleanup_provider(db_session: Session, name: str) -> None:\n    \"\"\"Helper to clean up a test provider by name.\"\"\"\n    provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n    if provider:\n        remove_llm_provider(db_session, provider.id)\n\n\ndef _assert_llm_calls(\n    call_tracker: dict[str, bool | str | None], expected_provider: str\n) -> None:\n    \"\"\"Assert that get_llm was called with expected provider and get_default_llm was not called.\"\"\"\n    assert not call_tracker[\n        \"get_default_llm_called\"\n    ], \"get_default_llm should not be called when using private provider\"\n    assert call_tracker[\n        \"get_llm_called\"\n    ], \"get_llm should be called when using private provider\"\n    assert (\n        call_tracker[\"provider\"] == expected_provider\n    ), f\"Expected provider '{expected_provider}', got '{call_tracker['provider']}'\"\n\n\ndef _reset_call_tracker(call_tracker: dict[str, bool | str | None]) -> None:\n    \"\"\"Reset the call tracker for the next test iteration.\"\"\"\n    call_tracker[\"get_default_llm_called\"] = False\n    call_tracker[\"get_llm_called\"] = False\n    call_tracker[\"provider\"] = None\n\n\ndef test_user_sends_message_to_private_provider(\n    db_session: Session,\n) -> None:\n    \"\"\"Test that messages sent to a private provider use get_llm instead of get_default_llm.\"\"\"\n    admin_user = _create_admin(db_session)\n\n    # Create providers\n    public_provider_id = _create_provider(\n        db_session, LlmProviderNames.ANTHROPIC, \"public-provider\", True\n    )\n    _create_provider(db_session, LlmProviderNames.GOOGLE, \"private-provider\", False)\n\n    update_default_provider(\n        public_provider_id, \"claude-3-5-sonnet-20240620\", db_session\n    )\n\n    try:\n        # Create chat session\n        chat_session = create_new_chat_session(\n            ChatSessionCreationRequest(),\n            user=admin_user,\n            db_session=db_session,\n        )\n\n        chat_session_id = chat_session.chat_session_id\n        answer_tokens_1 = tokenise(\"Hello, how are you?\")\n        answer_tokens_2 = tokenise(\"I'm good, thank you!\")\n\n        with use_mock_llm() as (mock_llm, call_tracker):\n            handler = StreamTestBuilder(llm_controller=mock_llm)\n\n            # First message\n            handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens_1))\n            answer_stream = submit_query(\n                query=\"Hello, how are you?\",\n                chat_session_id=chat_session_id,\n                db_session=db_session,\n                user=admin_user,\n                llm_override=LLMOverride(\n                    model_provider=\"private-provider\",\n                    model_version=\"claude-3-5-sonnet-20240620\",\n                ),\n            )\n\n            assert_answer_stream_part_correct(\n                received=next(answer_stream),\n                expected=MessageResponseIDInfo(\n                    user_message_id=1,\n                    reserved_assistant_message_id=1,\n                ),\n            )\n\n            handler.expect_agent_response(\n                answer_tokens=answer_tokens_1,\n                turn_index=0,\n            ).run_and_validate(stream=answer_stream)\n\n            with pytest.raises(StopIteration):\n                next(answer_stream)\n\n            _assert_llm_calls(call_tracker, \"google\")\n            _reset_call_tracker(call_tracker)\n\n            # Second message\n            handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens_2))\n            answer_stream = submit_query(\n                query=\"I'm good, thank you!\",\n                chat_session_id=chat_session_id,\n                db_session=db_session,\n                user=admin_user,\n                llm_override=LLMOverride(\n                    model_provider=\"private-provider\",\n                    model_version=\"claude-3-5-sonnet-20240620\",\n                ),\n            )\n\n            assert_answer_stream_part_correct(\n                received=next(answer_stream),\n                expected=MessageResponseIDInfo(\n                    user_message_id=2,\n                    reserved_assistant_message_id=2,\n                ),\n            )\n\n            handler.expect_agent_response(\n                answer_tokens=answer_tokens_2,\n                turn_index=0,\n            ).run_and_validate(stream=answer_stream)\n\n            with pytest.raises(StopIteration):\n                next(answer_stream)\n\n            _assert_llm_calls(call_tracker, \"google\")\n\n    finally:\n        _cleanup_provider(db_session, \"public-provider\")\n        _cleanup_provider(db_session, \"private-provider\")\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_llm_provider_default_model_protection.py",
    "content": "\"\"\"\nThis should act as the main point of reference for testing that default model\nlogic is consisten.\n\n -\n\"\"\"\n\nfrom collections.abc import Generator\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import remove_llm_provider\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import update_default_vision_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\n\n\ndef _create_test_provider(\n    db_session: Session,\n    name: str,\n    models: list[ModelConfigurationUpsertRequest] | None = None,\n) -> LLMProviderView:\n    \"\"\"Helper to create a test LLM provider with multiple models.\"\"\"\n    if models is None:\n        models = [\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4o\", is_visible=True, supports_image_input=True\n            ),\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4o-mini\", is_visible=True, supports_image_input=False\n            ),\n        ]\n    return upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=name,\n            provider=LlmProviderNames.OPENAI,\n            api_key=\"sk-test-key-00000000000000000000000000000000000\",\n            api_key_changed=True,\n            model_configurations=models,\n        ),\n        db_session=db_session,\n    )\n\n\ndef _cleanup_provider(db_session: Session, name: str) -> None:\n    \"\"\"Helper to clean up a test provider by name.\"\"\"\n    provider = fetch_existing_llm_provider(name=name, db_session=db_session)\n    if provider:\n        remove_llm_provider(db_session, provider.id)\n\n\n@pytest.fixture\ndef provider_name(db_session: Session) -> Generator[str, None, None]:\n    \"\"\"Generate a unique provider name for each test, with automatic cleanup.\"\"\"\n    name = f\"test-provider-{uuid4().hex[:8]}\"\n    yield name\n    db_session.rollback()\n    _cleanup_provider(db_session, name)\n\n\nclass TestDefaultModelProtection:\n    \"\"\"Tests that the default model cannot be removed or hidden.\"\"\"\n\n    def test_cannot_remove_default_text_model(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Removing the default text model from a provider should raise ValueError.\"\"\"\n        provider = _create_test_provider(db_session, provider_name)\n        update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n        # Try to update the provider without the default model\n        with pytest.raises(ValueError, match=\"Cannot remove the default model\"):\n            upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n    def test_cannot_hide_default_text_model(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Setting is_visible=False on the default text model should raise ValueError.\"\"\"\n        provider = _create_test_provider(db_session, provider_name)\n        update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n        # Try to hide the default model\n        with pytest.raises(ValueError, match=\"Cannot hide the default model\"):\n            upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o\", is_visible=False\n                        ),\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n    def test_cannot_remove_default_vision_model(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Removing the default vision model from a provider should raise ValueError.\"\"\"\n        provider = _create_test_provider(db_session, provider_name)\n        # Set gpt-4o as both the text and vision default\n        update_default_provider(provider.id, \"gpt-4o\", db_session)\n        update_default_vision_provider(provider.id, \"gpt-4o\", db_session)\n\n        # Try to remove the default vision model\n        with pytest.raises(ValueError, match=\"Cannot remove the default model\"):\n            upsert_llm_provider(\n                LLMProviderUpsertRequest(\n                    id=provider.id,\n                    name=provider_name,\n                    provider=LlmProviderNames.OPENAI,\n                    api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                    api_key_changed=True,\n                    model_configurations=[\n                        ModelConfigurationUpsertRequest(\n                            name=\"gpt-4o-mini\", is_visible=True\n                        ),\n                    ],\n                ),\n                db_session=db_session,\n            )\n\n    def test_can_remove_non_default_model(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Removing a non-default model should succeed.\"\"\"\n        provider = _create_test_provider(db_session, provider_name)\n        update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n        # Remove gpt-4o-mini (not default) — should succeed\n        updated = upsert_llm_provider(\n            LLMProviderUpsertRequest(\n                id=provider.id,\n                name=provider_name,\n                provider=LlmProviderNames.OPENAI,\n                api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                api_key_changed=True,\n                model_configurations=[\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4o\", is_visible=True, supports_image_input=True\n                    ),\n                ],\n            ),\n            db_session=db_session,\n        )\n\n        model_names = {mc.name for mc in updated.model_configurations}\n        assert \"gpt-4o\" in model_names\n        assert \"gpt-4o-mini\" not in model_names\n\n    def test_can_hide_non_default_model(\n        self,\n        db_session: Session,\n        provider_name: str,\n    ) -> None:\n        \"\"\"Hiding a non-default model should succeed.\"\"\"\n        provider = _create_test_provider(db_session, provider_name)\n        update_default_provider(provider.id, \"gpt-4o\", db_session)\n\n        # Hide gpt-4o-mini (not default) — should succeed\n        updated = upsert_llm_provider(\n            LLMProviderUpsertRequest(\n                id=provider.id,\n                name=provider_name,\n                provider=LlmProviderNames.OPENAI,\n                api_key=\"sk-test-key-00000000000000000000000000000000000\",\n                api_key_changed=True,\n                model_configurations=[\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4o\", is_visible=True, supports_image_input=True\n                    ),\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4o-mini\", is_visible=False\n                    ),\n                ],\n            ),\n            db_session=db_session,\n        )\n\n        model_visibility = {\n            mc.name: mc.is_visible for mc in updated.model_configurations\n        }\n        assert model_visibility[\"gpt-4o\"] is True\n        assert model_visibility[\"gpt-4o-mini\"] is False\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/llm/test_prompt_caching.py",
    "content": "\"\"\"External dependency unit tests for prompt caching functionality.\n\nThese tests call LLM providers directly and use litellm's completion_cost() to verify\nthat prompt caching reduces costs.\n\"\"\"\n\nimport json\nimport os\nimport tempfile\nimport time\nfrom pathlib import Path\nfrom typing import Any\n\nimport pytest\nfrom litellm import completion_cost\nfrom sqlalchemy.orm import Session\n\nfrom onyx.llm.model_response import Usage\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import ChatCompletionMessage\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.multi_llm import LitellmLLM\nfrom onyx.llm.prompt_cache.processor import process_with_prompt_cache\n\n\nVERTEX_CREDENTIALS_ENV = \"VERTEX_CREDENTIALS\"\nVERTEX_LOCATION_ENV = \"VERTEX_LOCATION\"\nVERTEX_MODEL_ENV = \"VERTEX_MODEL_NAME\"\nDEFAULT_VERTEX_MODEL = \"gemini-2.5-flash\"\n\n\ndef _extract_cached_tokens(usage: Usage | None) -> int:\n    \"\"\"Helper to extract cached_tokens from usage (dict or object).\"\"\"\n    if not usage:\n        print(\"Usage is None\")\n        return 0\n\n    cached_tokens = usage.cache_creation_input_tokens\n\n    return cached_tokens\n\n\ndef _extract_prompt_tokens(usage: Usage | None) -> int:\n    \"\"\"Helper to extract prompt_tokens from usage (dict or object).\"\"\"\n    if not usage:\n        print(\"Usage is None\")\n        return 0\n\n    return usage.prompt_tokens\n\n\ndef _extract_cache_read_tokens(usage: Usage | None) -> int:\n    \"\"\"Extract cache read metrics from usage (dict or object).\"\"\"\n    print(f\"usage: {usage}\")\n    if not usage:\n        print(\"Usage is None\")\n        return 0\n\n    return usage.cache_read_input_tokens\n\n\ndef _get_usage_value(usage: Any, key: str) -> int:\n    \"\"\"Retrieve a numeric field from usage objects or dictionaries.\"\"\"\n    if isinstance(usage, dict):\n        value = usage.get(key)\n    else:\n        value = getattr(usage, key, None)\n    return int(value or 0)\n\n\ndef _resolve_vertex_credentials() -> tuple[Path, bool]:\n    \"\"\"Return a path to credentials; support inline JSON or filesystem path.\"\"\"\n    raw_value = os.environ.get(VERTEX_CREDENTIALS_ENV)\n    if not raw_value:\n        raise FileNotFoundError(\"Vertex credentials environment variable not set.\")\n\n    raw_value = raw_value.strip()\n    candidate_path = Path(raw_value)\n    if len(raw_value) < 100 and candidate_path.exists():\n        return candidate_path, False\n\n    try:\n        json.loads(raw_value)\n    except json.JSONDecodeError as exc:\n        raise ValueError(\n            \"Vertex credentials must be a valid JSON string or file path.\"\n        ) from exc\n\n    temp_file = tempfile.NamedTemporaryFile(\n        mode=\"w\", suffix=\".json\", delete=False, encoding=\"utf-8\"\n    )\n    try:\n        temp_file.write(raw_value)\n        temp_file.flush()\n    finally:\n        temp_file.close()\n    return Path(temp_file.name), True\n\n\ndef _validate_vertex_credentials_file(credentials_path: Path) -> None:\n    \"\"\"Validate that the credentials file contains a usable service account.\"\"\"\n    try:\n        content = credentials_path.read_text(encoding=\"utf-8\")\n    except OSError as exc:\n        raise ValueError(f\"Failed to read credentials file: {exc}\") from exc\n\n    try:\n        data = json.loads(content)\n    except json.JSONDecodeError as exc:\n        raise ValueError(\"Credentials file does not contain valid JSON.\") from exc\n\n    if not isinstance(data, dict):\n        raise ValueError(\"Credentials JSON must be an object.\")\n\n    cred_type = data.get(\"type\")\n    if cred_type != \"service_account\":\n        raise ValueError(\n            f\"Unsupported credential type '{cred_type}'. Provide a service_account JSON blob.\"\n        )\n\n    missing_fields = [\n        field\n        for field in (\"project_id\", \"client_email\", \"private_key\")\n        if not data.get(field)\n    ]\n    if missing_fields:\n        raise ValueError(\n            \"Missing required service account fields: \"\n            + \", \".join(sorted(missing_fields))\n        )\n\n    try:\n        from google.oauth2 import service_account\n\n        service_account.Credentials.from_service_account_info(\n            data,\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n    except (\n        Exception\n    ) as exc:  # pragma: no cover - depends on google SDK validation paths\n        raise ValueError(\n            f\"Failed to construct service account credentials: {exc}\"\n        ) from exc\n\n\n@pytest.mark.skip(reason=\"OpenAI prompt caching is unreliable\")\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"OpenAI API key not available\",\n)\ndef test_openai_prompt_caching_reduces_costs(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that OpenAI prompt caching reduces costs on subsequent calls.\n\n    OpenAI uses implicit caching for prompts >1024 tokens.\n    \"\"\"\n    attempts = 8\n    successes = 0\n    for _ in range(attempts):\n        # Create OpenAI LLM\n        llm = LitellmLLM(\n            api_key=os.environ[\"OPENAI_API_KEY\"],\n            model_provider=\"openai\",\n            model_name=\"gpt-4o\",\n            max_input_tokens=128000,\n        )\n        import random\n        import string\n\n        # Insert 32 random lowercase characters at the start of long_context\n        # to prevent holdover cache from previous tests\n        random_prefix = \"\".join(random.choices(string.ascii_lowercase, k=32))\n        # Create a long context message to ensure caching threshold is met (>1024 tokens)\n        long_context = (\n            random_prefix\n            + \"This is a comprehensive document about artificial intelligence and machine learning. \"\n            + \" \".join(\n                [\n                    f\"Section {i}: This section discusses various aspects of AI technology, \"\n                    f\"including neural networks, deep learning, natural language processing, \"\n                    f\"computer vision, and reinforcement learning. These technologies are \"\n                    f\"revolutionizing how we interact with computers and process information.\"\n                    for i in range(50)\n                ]\n            )\n        )\n\n        # Split into cacheable prefix (the long context) and suffix (the question)\n        cacheable_prefix: list[ChatCompletionMessage] = [\n            UserMessage(role=\"user\", content=long_context)\n        ]\n\n        # First call - creates cache\n        print(\"\\n=== First call (cache creation) ===\")\n        question1: list[ChatCompletionMessage] = [\n            UserMessage(role=\"user\", content=\"What are the main topics discussed?\")\n        ]\n\n        # Apply prompt caching (for OpenAI, this is mostly a no-op but should still work)\n        processed_messages1, _ = process_with_prompt_cache(\n            llm_config=llm.config,\n            cacheable_prefix=cacheable_prefix,\n            suffix=question1,\n            continuation=False,\n        )\n        # print(f\"Processed messages 1: {processed_messages1}\")\n        # print(f\"Metadata 1: {metadata1}\")\n        # print(f\"Cache key 1: {metadata1.cache_key if metadata1 else None}\")\n\n        # Call litellm directly so we can get the raw response\n        response1 = llm.invoke(prompt=processed_messages1)\n        cost1 = completion_cost(\n            completion_response=response1.model_dump(),\n            model=f\"{llm._model_provider}/{llm._model_version}\",\n        )\n\n        usage1 = response1.usage\n        cached_tokens_1 = _extract_cached_tokens(usage1)\n        prompt_tokens_1 = _extract_prompt_tokens(usage1)\n        # print(f\"Response 1 usage: {usage1}\")\n        # print(f\"Cost 1: ${cost1:.10f}\")\n\n        # Wait to ensure cache is available\n        time.sleep(5)\n\n        # Second call with same context - should use cache\n        print(\"\\n=== Second call (cache read) ===\")\n        question2: list[ChatCompletionMessage] = [\n            UserMessage(role=\"user\", content=\"Can you elaborate on neural networks?\")\n        ]\n\n        # Apply prompt caching (same cacheable prefix)\n        processed_messages2, _ = process_with_prompt_cache(\n            llm_config=llm.config,\n            cacheable_prefix=cacheable_prefix,\n            suffix=question2,\n            continuation=False,\n        )\n        # print(f\"Processed messages 2: {processed_messages2}\")\n        response2 = llm.invoke(prompt=processed_messages2)\n        cost2 = completion_cost(\n            completion_response=response2.model_dump(),\n            model=f\"{llm._model_provider}/{llm._model_version}\",\n        )\n\n        usage2 = response2.usage\n        cached_tokens_2 = _extract_cache_read_tokens(usage2)\n        prompt_tokens_2 = _extract_prompt_tokens(usage2)\n        # print(f\"Response 2 usage: {usage2}\")\n        # print(f\"Cost 2: ${cost2:.10f}\")\n\n        # Verify caching occurred – OpenAI reports cached work via prompt_tokens_details.cached_tokens\n        print(f\"\\nCached tokens call 1: {cached_tokens_1}, call 2: {cached_tokens_2}\")\n        print(f\"Prompt tokens call 1: {prompt_tokens_1}, call 2: {prompt_tokens_2}\")\n        print(f\"Cost delta (1 -> 2): ${cost1 - cost2:.10f}\")\n\n        # The first call is expected to *create* cache (cached_tokens may be 0).\n        # The second call should show cached tokens being used.\n        if cached_tokens_2 > 0:\n            successes += 1\n            break\n\n    # empirically there's a 60% chance of success per attempt, so we expect at least one success in 8 attempts\n    # (99.94% probability). we can bump this number if the test is too flaky.\n    assert (\n        successes > 0\n    ), f\"Expected at least one success. 0 of {attempts} attempts used prompt caching.\"\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"ANTHROPIC_API_KEY\"),\n    reason=\"Anthropic API key not available\",\n)\ndef test_anthropic_prompt_caching_reduces_costs(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that Anthropic prompt caching reduces costs on subsequent calls.\n\n    Anthropic requires explicit cache_control parameters.\n    \"\"\"\n    # Prompt caching support is model/account specific.\n    # Allow override via env var and otherwise try a few non-retired candidates.\n    anthropic_prompt_cache_models_env = os.environ.get(\"ANTHROPIC_PROMPT_CACHE_MODELS\")\n    if anthropic_prompt_cache_models_env:\n        candidate_models = [\n            model.strip()\n            for model in anthropic_prompt_cache_models_env.split(\",\")\n            if model.strip()\n        ]\n    else:\n        candidate_models = [\n            \"claude-haiku-4-5-20251001\",\n            \"claude-sonnet-4-5-20250929\",\n            \"claude-3-5-sonnet-20241022\",\n            \"claude-3-5-sonnet-latest\",\n        ]\n\n    import random\n    import string\n\n    # Create a long context message.\n    # Add a random prefix to avoid reusing an existing ephemeral cache from prior test runs.\n    random_prefix = \"\".join(random.choices(string.ascii_lowercase, k=32))\n    long_context = (\n        random_prefix + \" \"\n        \"This is a comprehensive document about artificial intelligence and machine learning. \"\n        + \" \".join(\n            [\n                f\"Section {i}: This section discusses various aspects of AI technology, \"\n                f\"including neural networks, deep learning, natural language processing, \"\n                f\"computer vision, and reinforcement learning. These technologies are \"\n                f\"revolutionizing how we interact with computers and process information.\"\n                for i in range(50)\n            ]\n        )\n    )\n\n    base_messages: list[ChatCompletionMessage] = [\n        UserMessage(role=\"user\", content=long_context)\n    ]\n\n    unavailable_models: list[str] = []\n    non_caching_models: list[str] = []\n\n    for model_name in candidate_models:\n        llm = LitellmLLM(\n            api_key=os.environ[\"ANTHROPIC_API_KEY\"],\n            model_provider=\"anthropic\",\n            model_name=model_name,\n            max_input_tokens=200000,\n        )\n\n        # First call - creates cache\n        print(f\"\\n=== First call (cache creation) model={model_name} ===\")\n        question1: list[ChatCompletionMessage] = [\n            UserMessage(\n                role=\"user\",\n                content=\"Reply with exactly one lowercase word: topics\",\n            )\n        ]\n\n        processed_messages1, _ = process_with_prompt_cache(\n            llm_config=llm.config,\n            cacheable_prefix=base_messages,\n            suffix=question1,\n            continuation=False,\n        )\n\n        try:\n            response1 = llm.invoke(prompt=processed_messages1, max_tokens=8)\n        except Exception as e:\n            error_str = str(e).lower()\n            if (\n                \"not_found_error\" in error_str\n                or \"model_not_found\" in error_str\n                or ('\"type\":\"not_found_error\"' in error_str and \"model:\" in error_str)\n            ):\n                unavailable_models.append(model_name)\n                continue\n            raise\n\n        cost1 = completion_cost(\n            completion_response=response1.model_dump(),\n            model=f\"{llm._model_provider}/{llm._model_version}\",\n        )\n\n        usage1 = response1.usage\n        print(f\"Response 1 usage: {usage1}\")\n        print(f\"Cost 1: ${cost1:.10f}\")\n\n        # Wait to ensure cache is available\n        time.sleep(2)\n\n        # Second call with same context - should use cache\n        print(f\"\\n=== Second call (cache read) model={model_name} ===\")\n        question2: list[ChatCompletionMessage] = [\n            UserMessage(\n                role=\"user\",\n                content=\"Reply with exactly one lowercase word: neural\",\n            )\n        ]\n\n        processed_messages2, _ = process_with_prompt_cache(\n            llm_config=llm.config,\n            cacheable_prefix=base_messages,\n            suffix=question2,\n            continuation=False,\n        )\n\n        response2 = llm.invoke(prompt=processed_messages2, max_tokens=8)\n        cost2 = completion_cost(\n            completion_response=response2.model_dump(),\n            model=f\"{llm._model_provider}/{llm._model_version}\",\n        )\n\n        usage2 = response2.usage\n        print(f\"Response 2 usage: {usage2}\")\n        print(f\"Cost 2: ${cost2:.10f}\")\n\n        cache_creation_tokens = _get_usage_value(usage1, \"cache_creation_input_tokens\")\n        cache_read_tokens = _get_usage_value(usage2, \"cache_read_input_tokens\")\n\n        print(f\"\\nCache creation tokens (call 1): {cache_creation_tokens}\")\n        print(f\"Cache read tokens (call 2): {cache_read_tokens}\")\n        print(f\"Cost reduction: ${cost1 - cost2:.10f}\")\n\n        # Model is available but does not expose Anthropic cache usage metrics\n        if cache_creation_tokens <= 0 or cache_read_tokens <= 0:\n            non_caching_models.append(model_name)\n            continue\n\n        # Cost should be lower on second call\n        assert (\n            cost2 < cost1\n        ), f\"Expected lower cost on cached call. Cost 1: ${cost1:.10f}, Cost 2: ${cost2:.10f}\"\n        return\n\n    pytest.skip(\n        \"No Anthropic model available with observable prompt-cache metrics. \"\n        f\"Tried models={candidate_models}, unavailable={unavailable_models}, non_caching={non_caching_models}\"\n    )\n\n\n@pytest.mark.skipif(\n    not os.environ.get(VERTEX_CREDENTIALS_ENV),\n    reason=\"Vertex AI credentials file not available\",\n)\n@pytest.mark.skipif(\n    not os.environ.get(VERTEX_LOCATION_ENV),\n    reason=\"VERTEX_LOCATION required for Vertex AI context caching (e.g., 'us-central1')\",\n)\n@pytest.mark.skip(reason=\"Vertex AI prompt caching is disabled for now\")\ndef test_google_genai_prompt_caching_reduces_costs(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that Litellm Gemini prompt caching reduces costs on subsequent calls.\n\n    Vertex AI requires explicit context caching via the Context Caching API,\n    which needs both credentials and a valid location (e.g., us-central1).\n    \"\"\"\n    import random\n    import string\n    from litellm import exceptions as litellm_exceptions\n\n    try:\n        credentials_path, should_cleanup = _resolve_vertex_credentials()\n    except FileNotFoundError:\n        pytest.skip(\"Vertex credentials not available for test.\")\n    except ValueError as exc:\n        pytest.skip(str(exc))\n\n    vertex_location = os.environ.get(VERTEX_LOCATION_ENV)\n    if not vertex_location:\n        pytest.skip(\"VERTEX_LOCATION required for Vertex AI context caching\")\n    model_name = os.environ.get(VERTEX_MODEL_ENV, DEFAULT_VERTEX_MODEL)\n\n    try:\n        _validate_vertex_credentials_file(credentials_path)\n        os.environ.setdefault(\"GOOGLE_APPLICATION_CREDENTIALS\", str(credentials_path))\n\n        custom_config: dict[str, str] = {\"vertex_credentials\": str(credentials_path)}\n        if vertex_location:\n            custom_config[\"vertex_location\"] = vertex_location\n\n        llm = LitellmLLM(\n            api_key=None,\n            model_provider=\"vertex_ai\",\n            model_name=model_name,\n            max_input_tokens=1_000_000,\n            custom_config=custom_config,\n        )\n\n        attempts = 4\n        success = False\n        last_metrics: dict[str, Any] = {}\n\n        for attempt in range(attempts):\n            random_prefix = \"\".join(random.choices(string.ascii_lowercase, k=32))\n            long_context = (\n                random_prefix\n                + \"This is a comprehensive document about artificial intelligence and machine learning. \"\n                + \" \".join(\n                    [\n                        f\"Section {i}: This section discusses various aspects of AI technology, \"\n                        f\"including neural networks, deep learning, natural language processing, \"\n                        f\"computer vision, and reinforcement learning. These technologies are \"\n                        f\"revolutionizing how we interact with computers and process information.\"\n                        for i in range(50)\n                    ]\n                )\n            )\n\n            cacheable_prefix: list[ChatCompletionMessage] = [\n                SystemMessage(role=\"system\", content=long_context)\n            ]\n\n            print(f\"\\n=== Vertex attempt {attempt + 1} (cache creation) ===\")\n            question1: list[ChatCompletionMessage] = [\n                UserMessage(role=\"user\", content=\"What are the main topics discussed?\")\n            ]\n\n            processed_messages1, _ = process_with_prompt_cache(\n                llm_config=llm.config,\n                cacheable_prefix=cacheable_prefix,\n                suffix=question1,\n                continuation=False,\n            )\n            # Debug: print processed messages structure\n            first_msg = (\n                processed_messages1[0]\n                if isinstance(processed_messages1, list) and processed_messages1\n                else processed_messages1\n            )\n            print(f\"Processed messages structure (first msg): {first_msg}\")\n\n            response1 = llm.invoke(prompt=processed_messages1)\n            cost1 = completion_cost(\n                completion_response=response1.model_dump(),\n                model=f\"{llm._model_provider}/{llm._model_version}\",\n            )\n            usage1 = response1.usage\n            cache_creation_tokens = _get_usage_value(\n                usage1, \"cache_creation_input_tokens\"\n            )\n            cached_tokens_1 = _extract_cached_tokens(usage1)\n            cache_read_tokens_1 = _extract_cache_read_tokens(usage1)\n\n            print(f\"Vertex response 1 usage: {usage1}\")\n            print(f\"Vertex cost 1: ${cost1:.10f}\")\n\n            time.sleep(5)\n\n            print(f\"\\n=== Vertex attempt {attempt + 1} (cache read) ===\")\n            question2: list[ChatCompletionMessage] = [\n                UserMessage(\n                    role=\"user\", content=\"Can you elaborate on neural networks?\"\n                )\n            ]\n\n            processed_messages2, _ = process_with_prompt_cache(\n                llm_config=llm.config,\n                cacheable_prefix=cacheable_prefix,\n                suffix=question2,\n                continuation=False,\n            )\n\n            response2 = llm.invoke(prompt=processed_messages2)\n            cost2 = completion_cost(\n                completion_response=response2.model_dump(),\n                model=f\"{llm._model_provider}/{llm._model_version}\",\n            )\n            usage2 = response2.usage\n            cache_read_tokens_2 = _extract_cache_read_tokens(usage2)\n            cached_tokens_2 = _extract_cached_tokens(usage2)\n\n            print(f\"Vertex response 2 usage: {usage2}\")\n            print(f\"Vertex cost 2: ${cost2:.10f}\")\n            print(\n                f\"Vertex cache metrics - creation: {cache_creation_tokens}, \"\n                f\"call1 cached tokens: {cached_tokens_1}, \"\n                f\"call1 cache read tokens: {cache_read_tokens_1}, \"\n                f\"call2 cached tokens: {cached_tokens_2}, \"\n                f\"call2 cache read tokens: {cache_read_tokens_2}\"\n            )\n            print(f\"Vertex cost delta (1 -> 2): ${cost1 - cost2:.10f}\")\n\n            last_metrics = {\n                \"cache_creation_tokens\": cache_creation_tokens,\n                \"cached_tokens_1\": cached_tokens_1,\n                \"cache_read_tokens_1\": cache_read_tokens_1,\n                \"cached_tokens_2\": cached_tokens_2,\n                \"cache_read_tokens_2\": cache_read_tokens_2,\n                \"cost_delta\": cost1 - cost2,\n            }\n\n            if cache_read_tokens_2 > 0 or cached_tokens_2 > 0 or (cost1 - cost2) > 0:\n                success = True\n                break\n    except ValueError as exc:\n        pytest.fail(f\"Invalid Vertex credentials: {exc}\")\n    except litellm_exceptions.APIConnectionError as exc:\n        creds_details = json.loads(credentials_path.read_text(encoding=\"utf-8\"))\n        pytest.fail(\n            \"Vertex credentials appeared well-formed but failed to mint an access token. \"\n            \"This typically means the service account lacks the required Vertex AI permissions \"\n            \"or the key was revoked.\\n\"\n            f\"project_id={creds_details.get('project_id')!r}, \"\n            f\"client_email={creds_details.get('client_email')!r}\\n\"\n            f\"Original error: {exc}\"\n        )\n    finally:\n        if should_cleanup:\n            try:\n                credentials_path.unlink(missing_ok=True)\n            except OSError:\n                pass\n\n    assert (\n        success\n    ), f\"Expected Gemini prompt caching evidence across attempts. Last observed metrics: {last_metrics}\"\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"OpenAI API key not available\",\n)\ndef test_prompt_caching_with_conversation_history(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that prompt caching works with multi-turn conversations.\n\n    System message and history should be cached, only new user message is uncached.\n    \"\"\"\n    # Create OpenAI LLM\n    llm = LitellmLLM(\n        api_key=os.environ[\"OPENAI_API_KEY\"],\n        model_provider=\"openai\",\n        model_name=\"gpt-4o-mini\",\n        max_input_tokens=128000,\n    )\n\n    # Create a long system message and context\n    system_message: SystemMessage = SystemMessage(\n        role=\"system\",\n        content=(\n            \"You are an AI assistant specialized in technology. \"\n            + \" \".join(\n                [\n                    f\"You have knowledge about topic {i} including detailed information. \"\n                    for i in range(50)\n                ]\n            )\n        ),\n    )\n\n    long_context = \"This is a comprehensive document. \" + \" \".join(\n        [f\"Section {i}: Details about topic {i}. \" * 20 for i in range(30)]\n    )\n\n    # Turn 1\n    print(\"\\n=== Turn 1 ===\")\n    messages_turn1: list[ChatCompletionMessage] = [\n        system_message,\n        UserMessage(role=\"user\", content=long_context + \"\\n\\nWhat is this about?\"),\n    ]\n\n    response1 = llm.invoke(prompt=messages_turn1)\n    cost1 = completion_cost(\n        completion_response=response1.model_dump(),\n        model=f\"{llm._model_provider}/{llm._model_version}\",\n    )\n\n    usage1 = response1.usage\n    print(f\"Turn 1 usage: {usage1}\")\n    print(f\"Turn 1 cost: ${cost1:.10f}\")\n\n    # Wait for cache\n    time.sleep(2)\n\n    # Turn 2 - add assistant response and new user message\n    print(\"\\n=== Turn 2 (with cached history) ===\")\n    messages_turn2: list[ChatCompletionMessage] = messages_turn1 + [\n        AssistantMessage(\n            role=\"assistant\", content=\"This document discusses various topics.\"\n        ),\n        UserMessage(role=\"user\", content=\"Tell me about the first topic.\"),\n    ]\n\n    response2 = llm.invoke(prompt=messages_turn2)\n    cost2 = completion_cost(\n        completion_response=response2.model_dump(),\n        model=f\"{llm._model_provider}/{llm._model_version}\",\n    )\n\n    usage2 = response2.usage\n    print(f\"Turn 2 usage: {usage2}\")\n    print(f\"Turn 2 cost: ${cost2:.10f}\")\n\n    # Turn 3 - continue conversation\n    print(\"\\n=== Turn 3 (with even more cached history) ===\")\n    messages_turn3: list[ChatCompletionMessage] = messages_turn2 + [\n        AssistantMessage(role=\"assistant\", content=\"The first topic covers...\"),\n        UserMessage(role=\"user\", content=\"What about the second topic?\"),\n    ]\n\n    response3 = llm.invoke(prompt=messages_turn3)\n    cost3 = completion_cost(\n        completion_response=response3.model_dump(),\n        model=f\"{llm._model_provider}/{llm._model_version}\",\n    )\n\n    usage3 = response3.usage\n    print(f\"Turn 3 usage: {usage3}\")\n    print(f\"Turn 3 cost: ${cost3:.10f}\")\n\n    # Verify caching in subsequent turns\n    cache_tokens_2 = _get_usage_value(usage2, \"cache_read_input_tokens\")\n    cache_tokens_3 = _get_usage_value(usage3, \"cache_read_input_tokens\")\n\n    prompt_tokens_1 = _get_usage_value(usage1, \"prompt_tokens\")\n    prompt_tokens_2 = _get_usage_value(usage2, \"prompt_tokens\")\n    prompt_tokens_3 = _get_usage_value(usage3, \"prompt_tokens\")\n\n    print(f\"\\nCache tokens - Turn 2: {cache_tokens_2}, Turn 3: {cache_tokens_3}\")\n    print(\n        f\"Prompt tokens - Turn 1: {prompt_tokens_1}, Turn 2: {prompt_tokens_2}, Turn 3: {prompt_tokens_3}\"\n    )\n\n    # Either cache tokens should increase or prompt tokens should be relatively stable\n    # (not growing linearly with conversation length)\n    assert (\n        cache_tokens_2 > 0\n        or cache_tokens_3 > 0\n        or prompt_tokens_2 < prompt_tokens_1 * 1.5\n    ), \"Expected caching benefits in multi-turn conversation\"\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"OpenAI API key not available\",\n)\ndef test_no_caching_without_process_with_prompt_cache(\n    db_session: Session,  # noqa: ARG001\n) -> None:\n    \"\"\"Test baseline: without using process_with_prompt_cache, no special caching occurs.\n\n    This establishes a baseline to compare against the caching tests.\n    \"\"\"\n    # Create OpenAI LLM\n    llm = LitellmLLM(\n        api_key=os.environ[\"OPENAI_API_KEY\"],\n        model_provider=\"openai\",\n        model_name=\"gpt-4o-mini\",\n        max_input_tokens=128000,\n    )\n\n    # Create a long context\n    long_context = \"This is a comprehensive document. \" + \" \".join(\n        [f\"Section {i}: Details about technology topic {i}. \" * 10 for i in range(50)]\n    )\n\n    # First call - no explicit caching\n    print(\"\\n=== First call (no explicit caching) ===\")\n    messages1: list[ChatCompletionMessage] = [\n        UserMessage(role=\"user\", content=long_context + \"\\n\\nSummarize this.\")\n    ]\n\n    response1 = llm.invoke(prompt=messages1)\n    cost1 = completion_cost(\n        completion_response=response1.model_dump(),\n        model=f\"{llm._model_provider}/{llm._model_version}\",\n    )\n\n    usage1 = response1.usage\n    print(f\"Response 1 usage: {usage1}\")\n    print(f\"Cost 1: ${cost1:.10f}\")\n\n    # This test just verifies the LLM works and we can calculate costs\n    # It serves as a baseline comparison for the caching tests\n    assert cost1 > 0, \"Should have non-zero cost\"\n    assert usage1, \"Should have usage data\"\n\n    print(\"\\nBaseline test passed - ready to compare with caching tests\")\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/mock_content_provider.py",
    "content": "import abc\nfrom collections.abc import Generator\nfrom collections.abc import Sequence\nfrom contextlib import contextmanager\nfrom unittest.mock import patch\n\nfrom pydantic import BaseModel\n\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.open_url.models import WebContentProvider\n\n\nclass MockWebContent(BaseModel):\n    title: str\n    url: str\n    content: str\n\n    def to_web_content(self) -> WebContent:\n        return WebContent(\n            title=self.title,\n            link=self.url,\n            full_content=self.content,\n            published_date=None,\n            scrape_successful=True,\n        )\n\n\nclass ContentProviderController(abc.ABC):\n    @abc.abstractmethod\n    def add_content(self, content: MockWebContent) -> None:\n        raise NotImplementedError\n\n\nclass MockContentProvider(WebContentProvider, ContentProviderController):\n    def __init__(self) -> None:\n        self._contents: list[MockWebContent] = []\n\n    def add_content(self, web_content: MockWebContent) -> None:\n        self._contents.append(web_content)\n\n    def contents(self, urls: Sequence[str]) -> list[WebContent]:\n        filtered_contents = list(\n            filter(lambda web_content: web_content.url in urls, self._contents)\n        )\n\n        return list(\n            map(lambda web_content: web_content.to_web_content(), filtered_contents)\n        )\n\n\n@contextmanager\ndef use_mock_content_provider() -> Generator[ContentProviderController, None, None]:\n    content_provider = MockContentProvider()\n\n    with patch(\n        \"onyx.tools.tool_implementations.open_url.open_url_tool.get_default_content_provider\",\n        return_value=content_provider,\n    ):\n        yield content_provider\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/mock_image_provider.py",
    "content": "import abc\nimport asyncio\nimport concurrent.futures\nimport time\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom datetime import datetime\nfrom typing import Any\nfrom unittest.mock import patch\n\nfrom litellm.types.utils import ImageObject\nfrom litellm.types.utils import ImageResponse\n\nfrom onyx.image_gen.interfaces import ImageGenerationProvider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\nfrom onyx.llm.interfaces import LLMConfig\n\n\nclass ImageGenerationProviderController(abc.ABC):\n    @abc.abstractmethod\n    def add_image(\n        self,\n        data: str,\n        delay: float = 0.0,\n    ) -> None:\n        raise NotImplementedError\n\n\nclass MockImageGenerationProvider(\n    ImageGenerationProvider, ImageGenerationProviderController\n):\n    def __init__(self) -> None:\n        self._images: list[str] = []\n        self._delays: list[float] = []\n\n    def add_image(\n        self,\n        data: str,\n        delay: float = 0.0,\n    ) -> None:\n        self._images.append(data)\n        self._delays.append(delay)\n\n    @classmethod\n    def validate_credentials(\n        cls,\n        credentials: ImageGenerationProviderCredentials,  # noqa: ARG003\n    ) -> bool:\n        return True\n\n    @classmethod\n    def _build_from_credentials(\n        cls,\n        _: ImageGenerationProviderCredentials,\n    ) -> ImageGenerationProvider:\n        return cls()\n\n    def generate_image(\n        self,\n        prompt: str,\n        model: str,  # noqa: ARG002\n        size: str,  # noqa: ARG002\n        n: int,  # noqa: ARG002\n        quality: str | None = None,  # noqa: ARG002\n        reference_images: list[ReferenceImage] | None = None,  # noqa: ARG002\n        **kwargs: Any,  # noqa: ARG002\n    ) -> ImageResponse:\n        image_data = self._images.pop(0)\n        delay = self._delays.pop(0)\n\n        if delay > 0.0:\n            try:\n                asyncio.get_running_loop()\n                # Event loop is running - run sleep in executor to avoid blocking the event loop\n                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:\n                    future = executor.submit(time.sleep, delay)\n                    future.result()\n            except RuntimeError:\n                # No running event loop, use regular thread sleep\n                time.sleep(delay)\n\n        return ImageResponse(\n            created=int(datetime.now().timestamp()),\n            data=[\n                ImageObject(\n                    b64_json=image_data,\n                    revised_prompt=prompt,\n                )\n            ],\n        )\n\n\ndef _create_mock_image_generation_llm_config() -> LLMConfig:\n    \"\"\"Create a mock LLMConfig for image generation.\"\"\"\n    return LLMConfig(\n        model_provider=\"openai\",\n        model_name=\"gpt-image-1\",\n        temperature=0.0,\n        api_key=\"mock-api-key\",\n        api_base=None,\n        api_version=None,\n        deployment_name=None,\n        max_input_tokens=100000,\n        custom_config=None,\n    )\n\n\n@contextmanager\ndef use_mock_image_generation_provider() -> (\n    Generator[ImageGenerationProviderController, None, None]\n):\n    image_gen_provider = MockImageGenerationProvider()\n\n    with (\n        # Mock the image generation provider factory\n        patch(\n            \"onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider\",\n            return_value=image_gen_provider,\n        ),\n        # Mock is_available to return True so the tool is registered\n        patch(\n            \"onyx.tools.tool_implementations.images.image_generation_tool.ImageGenerationTool.is_available\",\n            return_value=True,\n        ),\n        # Mock the config lookup in tool_constructor to return a valid LLMConfig\n        patch(\n            \"onyx.tools.tool_constructor._get_image_generation_config\",\n            return_value=_create_mock_image_generation_llm_config(),\n        ),\n    ):\n        yield image_gen_provider\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/mock_llm.py",
    "content": "from __future__ import annotations\n\nimport abc\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom enum import Enum\nfrom typing import Any\nfrom typing import cast\nfrom typing import Generic\nfrom typing import Literal\nfrom typing import TypeVar\nfrom unittest.mock import patch\n\nfrom pydantic import BaseModel\n\nfrom onyx.llm.interfaces import LanguageModelInput\nfrom onyx.llm.interfaces import LLM\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.interfaces import ReasoningEffort\nfrom onyx.llm.interfaces import ToolChoiceOptions\nfrom onyx.llm.model_response import ChatCompletionDeltaToolCall\nfrom onyx.llm.model_response import Delta\nfrom onyx.llm.model_response import FunctionCall\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import ModelResponseStream\nfrom onyx.llm.model_response import StreamingChoice\n\nT = TypeVar(\"T\")\n\n\nclass LLMResponseType(str, Enum):\n    REASONING = \"reasoning\"\n    ANSWER = \"answer\"\n    TOOL_CALL = \"tool_call\"\n\n\nclass LLMResponse(abc.ABC, BaseModel):\n    type: str = \"\"\n\n    @abc.abstractmethod\n    def num_tokens(self) -> int:\n        raise NotImplementedError\n\n\nclass LLMReasoningResponse(LLMResponse):\n    type: Literal[\"reasoning\"] = LLMResponseType.REASONING.value\n    reasoning_tokens: list[str]\n\n    def num_tokens(self) -> int:\n        return len(self.reasoning_tokens)\n\n\nclass LLMAnswerResponse(LLMResponse):\n    type: Literal[\"answer\"] = LLMResponseType.ANSWER.value\n    answer_tokens: list[str]\n\n    def num_tokens(self) -> int:\n        return len(self.answer_tokens)\n\n\nclass LLMToolCallResponse(LLMResponse):\n    type: Literal[\"tool_call\"] = LLMResponseType.TOOL_CALL.value\n    tool_name: str\n    tool_call_id: str\n    tool_call_argument_tokens: list[str]\n\n    def num_tokens(self) -> int:\n        return (\n            len(self.tool_call_argument_tokens) + 1\n        )  # +1 for the tool_call_id and tool_name\n\n\nclass StreamItem(BaseModel):\n    \"\"\"Represents a single item in the mock LLM stream with its type.\"\"\"\n\n    response_type: LLMResponseType\n    data: Any\n\n\ndef _response_to_stream_items(response: LLMResponse) -> list[StreamItem]:\n    match LLMResponseType(response.type):\n        case LLMResponseType.REASONING:\n            response = cast(LLMReasoningResponse, response)\n            return [\n                StreamItem(\n                    response_type=LLMResponseType.REASONING,\n                    data=token,\n                )\n                for token in response.reasoning_tokens\n            ]\n        case LLMResponseType.ANSWER:\n            response = cast(LLMAnswerResponse, response)\n            return [\n                StreamItem(\n                    response_type=LLMResponseType.ANSWER,\n                    data=token,\n                )\n                for token in response.answer_tokens\n            ]\n        case LLMResponseType.TOOL_CALL:\n            response = cast(LLMToolCallResponse, response)\n            return [\n                StreamItem(\n                    response_type=LLMResponseType.TOOL_CALL,\n                    data={\n                        \"tool_call_id\": response.tool_call_id,\n                        \"tool_name\": response.tool_name,\n                        \"arguments\": None,\n                    },\n                )\n            ] + [\n                StreamItem(\n                    response_type=LLMResponseType.TOOL_CALL,\n                    data={\n                        \"tool_call_id\": None,\n                        \"tool_name\": None,\n                        \"arguments\": token,\n                    },\n                )\n                for token in response.tool_call_argument_tokens\n            ]\n        case _:\n            raise ValueError(f\"Unknown response type: {response.type}\")\n\n\ndef create_delta_from_stream_item(item: StreamItem) -> Delta:\n    response_type = item.response_type\n    data = item.data\n    if response_type == LLMResponseType.REASONING:\n        return Delta(reasoning_content=data)\n    elif response_type == LLMResponseType.ANSWER:\n        return Delta(content=data)\n    elif response_type == LLMResponseType.TOOL_CALL:\n        # Handle grouped tool calls (list) vs single tool call (dict)\n        if isinstance(data, list):\n            # Multiple tool calls emitted together in the same tick\n            tool_calls = []\n            for tc_data in data:\n                if tc_data[\"tool_call_id\"] is not None:\n                    tool_calls.append(\n                        ChatCompletionDeltaToolCall(\n                            id=tc_data[\"tool_call_id\"],\n                            index=tc_data[\"index\"],\n                            function=FunctionCall(\n                                arguments=\"\",\n                                name=tc_data[\"tool_name\"],\n                            ),\n                        )\n                    )\n                else:\n                    tool_calls.append(\n                        ChatCompletionDeltaToolCall(\n                            index=tc_data[\"index\"],\n                            id=None,\n                            function=FunctionCall(\n                                arguments=tc_data[\"arguments\"],\n                                name=None,\n                            ),\n                        )\n                    )\n            return Delta(tool_calls=tool_calls)\n        else:\n            # Single tool call (original behavior)\n            # First tick has tool_call_id and tool_name, subsequent ticks have arguments\n            if data[\"tool_call_id\"] is not None:\n                return Delta(\n                    tool_calls=[\n                        ChatCompletionDeltaToolCall(\n                            id=data[\"tool_call_id\"],\n                            function=FunctionCall(\n                                name=data[\"tool_name\"],\n                                arguments=\"\",\n                            ),\n                        )\n                    ]\n                )\n            else:\n                return Delta(\n                    tool_calls=[\n                        ChatCompletionDeltaToolCall(\n                            id=None,\n                            function=FunctionCall(\n                                name=None,\n                                arguments=data[\"arguments\"],\n                            ),\n                        )\n                    ]\n                )\n    else:\n        raise ValueError(f\"Unknown response type: {response_type}\")\n\n\nclass MockLLMController(abc.ABC):\n    @abc.abstractmethod\n    def add_response(self, response: LLMResponse) -> None:\n        \"\"\"Add a response to the current stream.\"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def add_responses_together(self, *responses: LLMResponse) -> None:\n        \"\"\"Add multiple responses that should be emitted together in the same tick.\"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def forward(self, n: int) -> None:\n        \"\"\"Forward the stream by n tokens.\"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def forward_till_end(self) -> None:\n        \"\"\"Forward the stream until the end.\"\"\"\n        raise NotImplementedError\n\n    @abc.abstractmethod\n    def set_max_timeout(self, timeout: float = 5.0) -> None:\n        raise NotImplementedError\n\n\nclass MockLLM(LLM, MockLLMController):\n    def __init__(self) -> None:\n        self.stream_controller = SyncStreamController[StreamItem]()\n\n    def add_response(self, response: LLMResponse) -> None:\n        items = _response_to_stream_items(response)\n        self.stream_controller.queue_items(items)\n\n    def add_responses_together(self, *responses: LLMResponse) -> None:\n        \"\"\"Add multiple responses that should be emitted together in the same tick.\n\n        Currently only supports multiple tool call responses being grouped together.\n        The initial tool call info (id, name) for all tool calls will be emitted\n        in a single delta, followed by argument tokens for each tool call.\n        \"\"\"\n        tool_calls = [r for r in responses if r.type == LLMResponseType.TOOL_CALL]\n\n        if len(tool_calls) != len(responses):\n            raise ValueError(\n                \"add_responses_together currently only supports multiple tool call responses\"\n            )\n\n        # Create combined first item with all tool call initial info\n        combined_data = [\n            {\n                \"index\": idx,\n                \"tool_call_id\": cast(LLMToolCallResponse, tc).tool_call_id,\n                \"tool_name\": cast(LLMToolCallResponse, tc).tool_name,\n                \"arguments\": None,\n            }\n            for idx, tc in enumerate(tool_calls)\n        ]\n        combined_item = StreamItem(\n            response_type=LLMResponseType.TOOL_CALL,\n            data=combined_data,\n        )\n        self.stream_controller.queue_items([combined_item])\n\n        # Add argument tokens for each tool call with their index\n        for idx, tc in enumerate(tool_calls):\n            tc = cast(LLMToolCallResponse, tc)\n            for token in tc.tool_call_argument_tokens:\n                item = StreamItem(\n                    response_type=LLMResponseType.TOOL_CALL,\n                    data=[\n                        {\n                            \"index\": idx,\n                            \"tool_call_id\": None,\n                            \"tool_name\": None,\n                            \"arguments\": token,\n                        }\n                    ],\n                )\n                self.stream_controller.queue_items([item])\n\n    def forward(self, n: int) -> None:\n        if self.stream_controller:\n            self.stream_controller.forward(n)\n        else:\n            raise ValueError(\"No response set\")\n\n    def forward_till_end(self) -> None:\n        if self.stream_controller:\n            self.stream_controller.forward_till_end()\n        else:\n            raise ValueError(\"No response set\")\n\n    def set_max_timeout(self, timeout: float = 5.0) -> None:\n        self.stream_controller.timeout = timeout\n\n    @property\n    def config(self) -> LLMConfig:\n        return LLMConfig(\n            model_provider=\"mock\",\n            model_name=\"mock\",\n            temperature=1.0,\n            max_input_tokens=1000000000,\n        )\n\n    def invoke(\n        self,\n        prompt: LanguageModelInput,\n        tools: list[dict] | None = None,\n        tool_choice: ToolChoiceOptions | None = None,\n        structured_response_format: dict | None = None,\n        timeout_override: int | None = None,\n        max_tokens: int | None = None,\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,\n        user_identity: LLMUserIdentity | None = None,\n    ) -> ModelResponse:\n        raise NotImplementedError(\"We only care about streaming atm\")\n\n    def stream(\n        self,\n        prompt: LanguageModelInput,  # noqa: ARG002\n        tools: list[dict] | None = None,  # noqa: ARG002\n        tool_choice: ToolChoiceOptions | None = None,  # noqa: ARG002\n        structured_response_format: dict | None = None,  # noqa: ARG002\n        timeout_override: int | None = None,  # noqa: ARG002\n        max_tokens: int | None = None,  # noqa: ARG002\n        reasoning_effort: ReasoningEffort = ReasoningEffort.AUTO,  # noqa: ARG002\n        user_identity: LLMUserIdentity | None = None,  # noqa: ARG002\n    ) -> Iterator[ModelResponseStream]:\n        if not self.stream_controller:\n            return\n\n        for idx, item in enumerate(self.stream_controller):\n            yield ModelResponseStream(\n                id=\"chatcmp-123\",\n                created=\"1\",\n                choice=StreamingChoice(\n                    finish_reason=None,\n                    index=0,  # Choice index should stay at 0 for all items in the same stream\n                    delta=create_delta_from_stream_item(item),\n                ),\n                usage=None,\n            )\n\n\nclass StreamTimeoutError(Exception):\n    \"\"\"Raised when the stream controller times out waiting for tokens.\"\"\"\n\n\nclass SyncStreamController(Generic[T]):\n    def __init__(self, items: list[T] | None = None, timeout: float = 5.0) -> None:\n        self.items = items if items is not None else []\n        self.position = 0\n        self.pending: list[int] = []  # The indices of the tokens that are pending\n        self.timeout = timeout  # Maximum time to wait for tokens before failing\n\n        self._has_pending = threading.Event()\n\n    def queue_items(self, new_items: list[T]) -> None:\n        \"\"\"Queue additional tokens to the stream (for chaining responses like reasoning + tool calls).\"\"\"\n        self.items.extend(new_items)\n\n    def forward(self, n: int) -> None:\n        \"\"\"Queue the next n tokens to be yielded\"\"\"\n        end = min(self.position + n, len(self.items))\n        self.pending.extend(range(self.position, end))\n        self.position = end\n\n        if self.pending:\n            self._has_pending.set()\n\n    def forward_till_end(self) -> None:\n        self.forward(len(self.items) - self.position)\n\n    @property\n    def is_done(self) -> bool:\n        return self.position >= len(self.items) and not self.pending\n\n    def __iter__(self) -> SyncStreamController[T]:\n        return self\n\n    def __next__(self) -> T:\n        start_time = time.monotonic()\n        while not self.is_done:\n            if self.pending:\n                item_idx = self.pending.pop(0)\n                if not self.pending:\n                    self._has_pending.clear()\n                return self.items[item_idx]\n\n            elapsed = time.monotonic() - start_time\n            if elapsed >= self.timeout:\n                raise StreamTimeoutError(\n                    f\"Stream controller timed out after {self.timeout}s waiting for tokens. \"\n                    f\"Position: {self.position}/{len(self.items)}, Pending: {len(self.pending)}\"\n                )\n\n            self._has_pending.wait(timeout=0.1)\n\n        raise StopIteration\n\n\n@contextmanager\ndef use_mock_llm() -> Generator[MockLLMController, None, None]:\n    mock_llm = MockLLM()\n\n    with patch(\"onyx.chat.process_message.get_llm_for_persona\", return_value=mock_llm):\n        yield mock_llm\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/mock_search_pipeline.py",
    "content": "from collections.abc import Callable\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import ChunkSearchRequest\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.context.search.models import PersonaSearchInfo\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.models import SearchSettings\nfrom onyx.db.models import User\nfrom onyx.document_index.interfaces import DocumentIndex\nfrom onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo\nfrom onyx.llm.interfaces import LLM\nfrom onyx.natural_language_processing.search_nlp_models import EmbeddingModel\nfrom onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n\ndef run_functions_tuples_sequential(\n    functions_with_args: list[tuple[Callable, tuple]],\n    allow_failures: bool = False,\n    max_workers: int | None = None,  # noqa: ARG001\n    timeout: float | None = None,  # noqa: ARG001\n    timeout_callback: Callable | None = None,  # noqa: ARG001\n) -> list[Any]:\n    \"\"\"\n    A sequential replacement for run_functions_tuples_in_parallel.\n    Useful in tests to make parallel tool calls deterministic.\n    \"\"\"\n    results = []\n    for func, args in functions_with_args:\n        try:\n            results.append(func(*args))\n        except Exception:\n            if allow_failures:\n                results.append(None)\n            else:\n                raise\n    return results\n\n\nclass MockInternalSearchResult(BaseModel):\n    document_id: str\n    source_type: DocumentSource\n    semantic_identifier: str\n    chunk_ind: int\n\n    def to_inference_chunk(self) -> InferenceChunk:\n        return InferenceChunk(\n            document_id=f\"{self.source_type.value.upper()}_{self.document_id}\",\n            source_type=self.source_type,\n            semantic_identifier=self.semantic_identifier,\n            title=self.semantic_identifier,\n            chunk_id=self.chunk_ind,\n            blurb=\"\",\n            content=\"\",\n            source_links=None,\n            image_file_id=None,\n            section_continuation=False,\n            boost=0,\n            score=1.0,\n            hidden=False,\n            metadata={},\n            match_highlights=[],\n            doc_summary=\"\",\n            chunk_context=\"\",\n            updated_at=None,\n        )\n\n    def to_search_doc(self) -> SearchDoc:\n        return SearchDoc(\n            document_id=f\"{self.source_type.value.upper()}_{self.document_id}\",\n            chunk_ind=self.chunk_ind,\n            semantic_identifier=self.semantic_identifier,\n            link=None,\n            blurb=\"\",\n            source_type=self.source_type,\n            boost=0,\n            hidden=False,\n            metadata={},\n            score=1.0,\n            match_highlights=[],\n            updated_at=None,\n        )\n\n\nclass SearchPipelineController:\n    def __init__(self) -> None:\n        self.search_results: dict[str, list[MockInternalSearchResult]] = {}\n\n    def add_search_results(\n        self, query: str, results: list[MockInternalSearchResult]\n    ) -> None:\n        self.search_results[query] = results\n\n    def get_search_results(self, query: str) -> list[InferenceChunk]:\n        return [\n            result.to_inference_chunk() for result in self.search_results.get(query, [])\n        ]\n\n\n@contextmanager\ndef use_mock_search_pipeline(\n    connectors: list[DocumentSource],\n) -> Generator[SearchPipelineController, None, None]:\n    \"\"\"Mock the search pipeline and connector availability.\n\n    Args:\n        connectors: List of DocumentSource types to pretend are available.\n                   Pass an empty list to simulate no connectors.\n    \"\"\"\n    controller = SearchPipelineController()\n\n    def mock_check_connectors_exist(db_session: Session) -> bool:  # noqa: ARG001\n        return len(connectors) > 0\n\n    def mock_check_federated_connectors_exist(\n        db_session: Session,  # noqa: ARG001\n    ) -> bool:\n        # For now, federated connectors are not mocked as available\n        return False\n\n    def mock_check_user_files_exist(db_session: Session) -> bool:  # noqa: ARG001\n        # For now, user files are not mocked as available\n        return False\n\n    def mock_fetch_unique_document_sources(\n        db_session: Session,  # noqa: ARG001\n    ) -> list[DocumentSource]:\n        return connectors\n\n    def override_search_pipeline(\n        chunk_search_request: ChunkSearchRequest,\n        document_index: DocumentIndex,  # noqa: ARG001\n        user: User | None,  # noqa: ARG001\n        persona_search_info: PersonaSearchInfo | None,  # noqa: ARG001\n        db_session: Session | None = None,  # noqa: ARG001\n        auto_detect_filters: bool = False,  # noqa: ARG001\n        llm: LLM | None = None,  # noqa: ARG001\n        project_id_filter: int | None = None,  # noqa: ARG001\n        persona_id_filter: int | None = None,  # noqa: ARG001\n        # Pre-fetched data (used by SearchTool to avoid DB access in parallel calls)\n        acl_filters: list[str] | None = None,  # noqa: ARG001\n        embedding_model: EmbeddingModel | None = None,  # noqa: ARG001\n        prefetched_federated_retrieval_infos: (  # noqa: ARG001\n            list[FederatedRetrievalInfo] | None\n        ) = None,\n    ) -> list[InferenceChunk]:\n        return controller.get_search_results(chunk_search_request.query)\n\n    # Mock the pre-fetch session and DB queries in SearchTool.run() so\n    # tests don't need a fully initialised DB with search settings.\n    @contextmanager\n    def mock_get_session() -> Generator[MagicMock, None, None]:\n        yield MagicMock(spec=Session)\n\n    with (\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.search_pipeline\",\n            new=override_search_pipeline,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.check_connectors_exist\",\n            new=mock_check_connectors_exist,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.check_federated_connectors_exist\",\n            new=mock_check_federated_connectors_exist,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.semantic_query_rephrase\",\n            return_value=\"\",\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.keyword_query_expansion\",\n            return_value=[],\n        ),\n        patch(\n            \"onyx.tools.tool_runner.run_functions_tuples_in_parallel\",\n            new=run_functions_tuples_sequential,\n        ),\n        patch(\n            \"onyx.db.connector.check_connectors_exist\",\n            new=mock_check_connectors_exist,\n        ),\n        patch(\n            \"onyx.db.connector.check_federated_connectors_exist\",\n            new=mock_check_federated_connectors_exist,\n        ),\n        patch(\n            \"onyx.db.connector.check_user_files_exist\",\n            new=mock_check_user_files_exist,\n        ),\n        patch(\n            \"onyx.db.connector.fetch_unique_document_sources\",\n            new=mock_fetch_unique_document_sources,\n        ),\n        # Mock the pre-fetch phase of SearchTool.run()\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.get_session_with_current_tenant\",\n            new=mock_get_session,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.build_access_filters_for_user\",\n            return_value=[],\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.get_current_search_settings\",\n            return_value=MagicMock(spec=SearchSettings),\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.EmbeddingModel.from_db_model\",\n            return_value=MagicMock(spec=EmbeddingModel),\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.search.search_tool.get_federated_retrieval_functions\",\n            return_value=[],\n        ),\n        patch.object(\n            SearchTool,\n            \"_prefetch_slack_data\",\n            return_value=(None, None, {}),\n        ),\n    ):\n        yield controller\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/mock_search_provider.py",
    "content": "import abc\nfrom collections import defaultdict\nfrom collections.abc import Generator\nfrom collections.abc import Sequence\nfrom contextlib import contextmanager\nfrom unittest.mock import patch\n\nfrom pydantic import BaseModel\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import InternetSearchProvider\nfrom onyx.db.web_search import fetch_web_search_provider_by_name\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchProvider\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\nfrom shared_configs.enums import WebSearchProviderType\n\n\nclass MockWebSearchResult(BaseModel):\n    title: str\n    link: str\n    snippet: str\n\n    def to_web_search_result(self) -> WebSearchResult:\n        return WebSearchResult(\n            title=self.title,\n            link=self.link,\n            snippet=self.snippet,\n            author=None,\n            published_date=None,\n        )\n\n\nclass WebProviderController(abc.ABC):\n    @abc.abstractmethod\n    def add_results(self, query: str, results: list[MockWebSearchResult]) -> None:\n        raise NotImplementedError\n\n\nclass MockWebProvider(WebSearchProvider, WebProviderController):\n    def __init__(self) -> None:\n        self._results: dict[str, list[MockWebSearchResult]] = defaultdict(list)\n\n    def add_results(self, query: str, results: list[MockWebSearchResult]) -> None:\n        self._results[query] = results\n\n    def search(self, query: str) -> Sequence[WebSearchResult]:\n        return list(\n            map(lambda result: result.to_web_search_result(), self._results[query])\n        )\n\n    def test_connection(self) -> dict[str, str]:\n        return {}\n\n\ndef add_web_provider_to_db(db_session: Session) -> None:\n    # Write a provider to the database\n    if fetch_web_search_provider_by_name(name=\"Test Provider 2\", db_session=db_session):\n        return\n\n    provider = InternetSearchProvider(\n        name=\"Test Provider 2\",\n        provider_type=WebSearchProviderType.EXA.value,\n        api_key=\"test-api-key\",\n        config={},\n        is_active=True,\n    )\n\n    db_session.add(provider)\n    db_session.commit()\n\n\ndef delete_web_provider_from_db(db_session: Session) -> None:\n    provider = fetch_web_search_provider_by_name(\n        name=\"Test Provider 2\", db_session=db_session\n    )\n    if provider is not None:\n        db_session.delete(provider)\n        db_session.commit()\n\n\n@contextmanager\ndef use_mock_web_provider(\n    db_session: Session,\n) -> Generator[WebProviderController, None, None]:\n    web_provider = MockWebProvider()\n\n    # Write the tool to the database\n    add_web_provider_to_db(db_session)\n\n    # override the build function\n    with patch(\n        \"onyx.tools.tool_implementations.web_search.web_search_tool.build_search_provider_from_config\",\n        return_value=web_provider,\n    ):\n        yield web_provider\n\n    delete_web_provider_from_db(db_session)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py",
    "content": "\"\"\"Tests for OpenSearch assistant knowledge filter construction.\n\nThese tests verify that when an assistant (persona) has knowledge attached,\nthe search filter includes the appropriate scope filters with OR logic (not AND),\nensuring documents are discoverable across knowledge types like attached documents,\nhierarchy nodes, document sets, and persona/project user files.\n\"\"\"\n\nfrom typing import Any\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.schema import ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DOCUMENT_ID_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME\nfrom onyx.document_index.opensearch.search import DocumentQuery\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\nATTACHED_DOCUMENT_ID = \"https://docs.google.com/document/d/test-doc-id\"\nHIERARCHY_NODE_ID = 42\nPERSONA_ID = 7\nKNOWLEDGE_FILTER_SCHEMA_FIELDS = {\n    DOCUMENT_ID_FIELD_NAME,\n    ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME,\n    DOCUMENT_SETS_FIELD_NAME,\n    PERSONAS_FIELD_NAME,\n}\n\n\ndef _get_search_filters(\n    source_types: list[DocumentSource],\n    attached_document_ids: list[str] | None,\n    hierarchy_node_ids: list[int] | None,\n    persona_id_filter: int | None = None,\n    document_sets: list[str] | None = None,\n) -> list[dict[str, Any]]:\n    return DocumentQuery._get_search_filters(\n        tenant_state=TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False),\n        include_hidden=False,\n        access_control_list=[\"user_email:test@example.com\"],\n        source_types=source_types,\n        tags=[],\n        document_sets=document_sets or [],\n        project_id_filter=None,\n        persona_id_filter=persona_id_filter,\n        time_cutoff=None,\n        min_chunk_index=None,\n        max_chunk_index=None,\n        max_chunk_size=None,\n        document_id=None,\n        attached_document_ids=attached_document_ids,\n        hierarchy_node_ids=hierarchy_node_ids,\n    )\n\n\nclass TestAssistantKnowledgeFilter:\n    \"\"\"Tests for assistant knowledge filter construction in OpenSearch queries.\"\"\"\n\n    def test_persona_id_filter_added_when_knowledge_scope_exists(self) -> None:\n        \"\"\"persona_id_filter should be OR'd into the knowledge scope filter\n        when explicit knowledge attachments (attached_document_ids,\n        hierarchy_node_ids, document_sets) are present.\"\"\"\n        filter_clauses = _get_search_filters(\n            source_types=[DocumentSource.FILE],\n            attached_document_ids=[ATTACHED_DOCUMENT_ID],\n            hierarchy_node_ids=[HIERARCHY_NODE_ID],\n            persona_id_filter=PERSONA_ID,\n        )\n\n        knowledge_filter = None\n        for clause in filter_clauses:\n            if \"bool\" in clause and \"should\" in clause[\"bool\"]:\n                if (\n                    clause[\"bool\"].get(\"minimum_should_match\") == 1\n                    and len(clause[\"bool\"][\"should\"]) > 0\n                    and (\n                        (\n                            clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                        or (\n                            clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                    )\n                ):\n                    knowledge_filter = clause\n                    break\n\n        assert knowledge_filter is not None, (\n            \"Expected to find an assistant knowledge filter with \"\n            \"'minimum_should_match: 1'\"\n        )\n\n        should_clauses = knowledge_filter[\"bool\"][\"should\"]\n        persona_found = any(\n            clause.get(\"term\", {}).get(PERSONAS_FIELD_NAME, {}).get(\"value\")\n            == PERSONA_ID\n            for clause in should_clauses\n        )\n        assert persona_found, (\n            f\"Expected persona_id={PERSONA_ID} filter on {PERSONAS_FIELD_NAME} \"\n            f\"in should clauses. Got: {should_clauses}\"\n        )\n\n    def test_persona_id_filter_alone_creates_knowledge_scope(self) -> None:\n        \"\"\"persona_id_filter IS a primary knowledge scope trigger — a persona\n        with user files is explicit knowledge, so it should restrict\n        search on its own.\"\"\"\n        filter_clauses = _get_search_filters(\n            source_types=[],\n            attached_document_ids=None,\n            hierarchy_node_ids=None,\n            persona_id_filter=PERSONA_ID,\n        )\n\n        knowledge_filter = None\n        for clause in filter_clauses:\n            if \"bool\" in clause and \"should\" in clause[\"bool\"]:\n                if (\n                    clause[\"bool\"].get(\"minimum_should_match\") == 1\n                    and len(clause[\"bool\"][\"should\"]) > 0\n                    and (\n                        (\n                            clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                        or (\n                            clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                    )\n                ):\n                    knowledge_filter = clause\n                    break\n\n        assert (\n            knowledge_filter is not None\n        ), \"Expected persona_id_filter alone to create a knowledge scope filter\"\n        persona_found = any(\n            clause.get(\"term\", {}).get(PERSONAS_FIELD_NAME, {}).get(\"value\")\n            == PERSONA_ID\n            for clause in knowledge_filter[\"bool\"][\"should\"]\n        )\n        assert persona_found, (\n            f\"Expected persona_id={PERSONA_ID} filter in knowledge scope. \"\n            f\"Got: {knowledge_filter}\"\n        )\n\n    def test_knowledge_filter_with_document_sets_and_persona_filter(self) -> None:\n        \"\"\"document_sets and persona_id_filter should be OR'd together in\n        the knowledge scope filter.\"\"\"\n        filter_clauses = _get_search_filters(\n            source_types=[],\n            attached_document_ids=None,\n            hierarchy_node_ids=None,\n            persona_id_filter=PERSONA_ID,\n            document_sets=[\"engineering\"],\n        )\n\n        knowledge_filter = None\n        for clause in filter_clauses:\n            if \"bool\" in clause and \"should\" in clause[\"bool\"]:\n                if (\n                    clause[\"bool\"].get(\"minimum_should_match\") == 1\n                    and len(clause[\"bool\"][\"should\"]) > 0\n                    and (\n                        (\n                            clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"term\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                        or (\n                            clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            and list(\n                                clause[\"bool\"][\"should\"][0].get(\"terms\", {}).keys()\n                            )[0]\n                            in KNOWLEDGE_FILTER_SCHEMA_FIELDS\n                        )\n                    )\n                ):\n                    knowledge_filter = clause\n                    break\n\n        assert (\n            knowledge_filter is not None\n        ), \"Expected knowledge filter when document_sets is provided\"\n\n        filter_str = str(knowledge_filter)\n        assert (\n            \"engineering\" in filter_str\n        ), \"Expected document_set 'engineering' in knowledge filter\"\n        assert (\n            str(PERSONA_ID) in filter_str\n        ), f\"Expected persona_id_filter {PERSONA_ID} in knowledge filter\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/opensearch/test_opensearch_client.py",
    "content": "\"\"\"External dependency unit tests for OpenSearchIndexClient.\n\nThese tests assume OpenSearch is running and test all implemented methods\nusing real schemas, pipelines, and search queries from the codebase.\n\"\"\"\n\nimport re\nimport uuid\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport pytest\nfrom opensearchpy import NotFoundError\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.access.utils import prefix_user_email\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.client import OpenSearchIndexClient\nfrom onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout\nfrom onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE\nfrom onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline\nfrom onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    generate_opensearch_filtered_access_control_list,\n)\nfrom onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME\nfrom onyx.document_index.opensearch.schema import DocumentChunk\nfrom onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors\nfrom onyx.document_index.opensearch.schema import DocumentSchema\nfrom onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id\nfrom onyx.document_index.opensearch.search import DocumentQuery\nfrom onyx.document_index.opensearch.search import (\n    get_min_max_normalization_pipeline_name_and_config,\n)\nfrom onyx.document_index.opensearch.search import (\n    get_normalization_pipeline_name_and_config,\n)\nfrom onyx.document_index.opensearch.search import (\n    get_zscore_normalization_pipeline_name_and_config,\n)\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n\ndef _patch_global_tenant_state(monkeypatch: pytest.MonkeyPatch, state: bool) -> None:\n    \"\"\"Patches MULTI_TENANT wherever necessary for this test file.\n\n    Args:\n        monkeypatch: The test instance's monkeypatch instance, used for\n            patching.\n        state: The intended state of MULTI_TENANT.\n    \"\"\"\n    monkeypatch.setattr(\"shared_configs.configs.MULTI_TENANT\", state)\n    monkeypatch.setattr(\"onyx.document_index.opensearch.schema.MULTI_TENANT\", state)\n\n\ndef _patch_hybrid_search_subquery_configuration(\n    monkeypatch: pytest.MonkeyPatch, configuration: HybridSearchSubqueryConfiguration\n) -> None:\n    \"\"\"\n    Patches HYBRID_SEARCH_SUBQUERY_CONFIGURATION wherever necessary for this\n    test file.\n\n    Args:\n        monkeypatch: The test instance's monkeypatch instance, used for\n            patching.\n        configuration: The intended state of\n            HYBRID_SEARCH_SUBQUERY_CONFIGURATION.\n    \"\"\"\n    monkeypatch.setattr(\n        \"onyx.document_index.opensearch.constants.HYBRID_SEARCH_SUBQUERY_CONFIGURATION\",\n        configuration,\n    )\n    monkeypatch.setattr(\n        \"onyx.document_index.opensearch.search.HYBRID_SEARCH_SUBQUERY_CONFIGURATION\",\n        configuration,\n    )\n\n\ndef _patch_hybrid_search_normalization_pipeline(\n    monkeypatch: pytest.MonkeyPatch, pipeline: HybridSearchNormalizationPipeline\n) -> None:\n    \"\"\"\n    Patches HYBRID_SEARCH_NORMALIZATION_PIPELINE wherever necessary for this\n    test file.\n    \"\"\"\n    monkeypatch.setattr(\n        \"onyx.document_index.opensearch.constants.HYBRID_SEARCH_NORMALIZATION_PIPELINE\",\n        pipeline,\n    )\n    monkeypatch.setattr(\n        \"onyx.document_index.opensearch.search.HYBRID_SEARCH_NORMALIZATION_PIPELINE\",\n        pipeline,\n    )\n\n\ndef _patch_opensearch_match_highlights_disabled(\n    monkeypatch: pytest.MonkeyPatch, disabled: bool\n) -> None:\n    \"\"\"\n    Patches OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED wherever necessary for this\n    test file.\n    \"\"\"\n    monkeypatch.setattr(\n        \"onyx.configs.app_configs.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED\",\n        disabled,\n    )\n    monkeypatch.setattr(\n        \"onyx.document_index.opensearch.search.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED\",\n        disabled,\n    )\n\n\ndef _create_test_document_chunk(\n    document_id: str,\n    content: str,\n    tenant_state: TenantState,\n    chunk_index: int = 0,\n    content_vector: list[float] | None = None,\n    title: str | None = None,\n    title_vector: list[float] | None = None,\n    hidden: bool = False,\n    document_access: DocumentAccess = DocumentAccess.build(\n        user_emails=[],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=True,\n    ),\n    source_type: DocumentSource = DocumentSource.FILE,\n    last_updated: datetime | None = None,\n) -> DocumentChunk:\n    if content_vector is None:\n        # Generate dummy vector - 128 dimensions for fast testing.\n        content_vector = [0.1] * 128\n\n    # If title is provided but no vector, generate one.\n    if title is not None and title_vector is None:\n        title_vector = [0.2] * 128\n\n    return DocumentChunk(\n        document_id=document_id,\n        chunk_index=chunk_index,\n        title=title,\n        title_vector=title_vector,\n        content=content,\n        content_vector=content_vector,\n        source_type=source_type.value,\n        metadata_list=None,\n        last_updated=last_updated,\n        public=document_access.is_public,\n        access_control_list=generate_opensearch_filtered_access_control_list(\n            document_access\n        ),\n        hidden=hidden,\n        global_boost=0,\n        semantic_identifier=\"Test semantic identifier\",\n        image_file_id=None,\n        source_links=None,\n        blurb=\"Test blurb\",\n        doc_summary=\"Test doc summary\",\n        chunk_context=\"Test chunk context\",\n        document_sets=None,\n        user_projects=None,\n        primary_owners=None,\n        secondary_owners=None,\n        tenant_id=tenant_state,\n    )\n\n\ndef _generate_test_vector(base_value: float = 0.1, dimension: int = 128) -> list[float]:\n    \"\"\"Generates a test vector with slight variations.\n\n    We round to eliminate floating point precision errors when comparing chunks\n    for equality.\n    \"\"\"\n    return [round(base_value + (i * 0.001), 5) for i in range(dimension)]\n\n\n@pytest.fixture(scope=\"module\")\ndef opensearch_available() -> None:\n    \"\"\"Verifies OpenSearch is running, skips all tests if not.\"\"\"\n    if not wait_for_opensearch_with_timeout():\n        pytest.fail(\"OpenSearch is not available.\")\n\n\n@pytest.fixture(scope=\"function\")\ndef test_client(\n    opensearch_available: None,  # noqa: ARG001\n) -> Generator[OpenSearchIndexClient, None, None]:\n    \"\"\"Creates an OpenSearch client for testing with automatic cleanup.\"\"\"\n    test_index_name = f\"test_index_{uuid.uuid4().hex[:8]}\"\n    client = OpenSearchIndexClient(index_name=test_index_name)\n\n    yield client  # Test runs here.\n\n    # Cleanup after test completes.\n    try:\n        client.delete_index()\n    except Exception:\n        pass\n    finally:\n        client.close()\n\n\n@pytest.fixture(scope=\"function\")\ndef search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None, None]:\n    \"\"\"Creates a search pipeline for testing with automatic cleanup.\"\"\"\n    min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (\n        get_min_max_normalization_pipeline_name_and_config()\n    )\n    zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (\n        get_zscore_normalization_pipeline_name_and_config()\n    )\n    test_client.create_search_pipeline(\n        pipeline_id=min_max_normalization_pipeline_name,\n        pipeline_body=min_max_normalization_pipeline_config,\n    )\n    test_client.create_search_pipeline(\n        pipeline_id=zscore_normalization_pipeline_name,\n        pipeline_body=zscore_normalization_pipeline_config,\n    )\n    yield  # Test runs here.\n    try:\n        test_client.delete_search_pipeline(\n            pipeline_id=min_max_normalization_pipeline_name,\n        )\n        test_client.delete_search_pipeline(\n            pipeline_id=zscore_normalization_pipeline_name,\n        )\n    except Exception:\n        pass\n\n\nclass TestOpenSearchClient:\n    \"\"\"Tests for OpenSearchIndexClient.\"\"\"\n\n    def test_create_index(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests creating an index with a real schema.\"\"\"\n        # Precondition.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n\n        # Under test.\n        # Should not raise.\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Postcondition.\n        # Verify index exists.\n        assert test_client.validate_index(expected_mappings=mappings) is True\n\n    def test_delete_existing_index(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests deleting an existing index returns True.\"\"\"\n        # Precondition.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test.\n        # Delete should return True.\n        result = test_client.delete_index()\n\n        # Postcondition.\n        assert result is True\n        assert test_client.validate_index(expected_mappings=mappings) is False\n\n    def test_delete_nonexistent_index(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests deleting a nonexistent index returns False.\"\"\"\n        # Under test.\n        # Don't create index, just try to delete.\n        result = test_client.delete_index()\n\n        # Postcondition.\n        assert result is False\n\n    def test_index_exists(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests checking if an index exists.\"\"\"\n        # Precondition.\n        # Index should not exist before creation.\n        assert test_client.index_exists() is False\n\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test and postcondition.\n        # Index should exist after creation.\n        assert test_client.index_exists() is True\n\n    def test_validate_index(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests validating an index.\"\"\"\n        # Precondition.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n\n        # Under test and postcondition.\n        # Should return False before creation.\n        assert test_client.validate_index(expected_mappings=mappings) is False\n\n        # Precondition.\n        # Create index.\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test and postcondition.\n        # Should return True after creation.\n        assert test_client.validate_index(expected_mappings=mappings) is True\n\n    def test_put_mapping_idempotent(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests put_mapping with same schema is idempotent.\"\"\"\n        # Precondition.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test.\n        # Applying the same mappings again should succeed.\n        test_client.put_mapping(mappings)\n\n        # Postcondition.\n        # Index should still be valid.\n        assert test_client.validate_index(expected_mappings=mappings)\n\n    def test_put_mapping_adds_new_field(\n        self, test_client: OpenSearchIndexClient\n    ) -> None:\n        \"\"\"Tests put_mapping successfully adds new fields to existing index.\"\"\"\n        # Precondition.\n        # Create index with minimal schema (just required fields).\n        initial_mappings = {\n            \"dynamic\": \"strict\",\n            \"properties\": {\n                \"document_id\": {\"type\": \"keyword\"},\n                \"chunk_index\": {\"type\": \"integer\"},\n                \"content\": {\"type\": \"text\"},\n                \"content_vector\": {\n                    \"type\": \"knn_vector\",\n                    \"dimension\": 128,\n                    \"method\": {\n                        \"name\": \"hnsw\",\n                        \"space_type\": \"cosinesimil\",\n                        \"engine\": \"lucene\",\n                        \"parameters\": {\"ef_construction\": 512, \"m\": 16},\n                    },\n                },\n            },\n        }\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=initial_mappings, settings=settings)\n\n        # Under test.\n        # Add a new field using put_mapping.\n        updated_mappings = {\n            \"properties\": {\n                \"document_id\": {\"type\": \"keyword\"},\n                \"chunk_index\": {\"type\": \"integer\"},\n                \"content\": {\"type\": \"text\"},\n                \"content_vector\": {\n                    \"type\": \"knn_vector\",\n                    \"dimension\": 128,\n                    \"method\": {\n                        \"name\": \"hnsw\",\n                        \"space_type\": \"cosinesimil\",\n                        \"engine\": \"lucene\",\n                        \"parameters\": {\"ef_construction\": 512, \"m\": 16},\n                    },\n                },\n                # New field\n                \"new_test_field\": {\"type\": \"keyword\"},\n            },\n        }\n        # Should not raise.\n        test_client.put_mapping(updated_mappings)\n\n        # Postcondition.\n        # Validate the new schema includes the new field.\n        assert test_client.validate_index(expected_mappings=updated_mappings)\n\n    def test_put_mapping_fails_on_type_change(\n        self, test_client: OpenSearchIndexClient\n    ) -> None:\n        \"\"\"Tests put_mapping fails when trying to change existing field type.\"\"\"\n        # Precondition.\n        initial_mappings = {\n            \"dynamic\": \"strict\",\n            \"properties\": {\n                \"document_id\": {\"type\": \"keyword\"},\n                \"test_field\": {\"type\": \"keyword\"},\n            },\n        }\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=initial_mappings, settings=settings)\n\n        # Under test and postcondition.\n        # Try to change test_field type from keyword to text.\n        conflicting_mappings = {\n            \"properties\": {\n                \"document_id\": {\"type\": \"keyword\"},\n                \"test_field\": {\"type\": \"text\"},  # Changed from keyword to text\n            },\n        }\n        # Should raise because field type cannot be changed.\n        with pytest.raises(Exception, match=\"mapper|illegal_argument_exception\"):\n            test_client.put_mapping(conflicting_mappings)\n\n    def test_put_mapping_on_nonexistent_index(\n        self, test_client: OpenSearchIndexClient\n    ) -> None:\n        \"\"\"Tests put_mapping on non-existent index raises an error.\"\"\"\n        # Precondition.\n        # Index does not exist yet.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n\n        # Under test and postcondition.\n        with pytest.raises(Exception, match=\"index_not_found_exception|404\"):\n            test_client.put_mapping(mappings)\n\n    def test_create_duplicate_index(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests creating an index twice raises an error.\"\"\"\n        # Precondition.\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=True\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        # Create once - should succeed.\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test and postcondition.\n        # Create again - should raise.\n        with pytest.raises(Exception, match=\"already exists\"):\n            test_client.create_index(mappings=mappings, settings=settings)\n\n    def test_update_settings(self, test_client: OpenSearchIndexClient) -> None:\n        \"\"\"Tests that update_settings raises NotImplementedError.\"\"\"\n        # Under test and postcondition.\n        with pytest.raises(NotImplementedError):\n            test_client.update_settings(settings={})\n\n    def test_create_and_delete_search_pipeline(\n        self, test_client: OpenSearchIndexClient\n    ) -> None:\n        \"\"\"Tests creating and deleting a search pipeline.\"\"\"\n        # Precondition.\n        pipeline_name, pipeline_config = get_normalization_pipeline_name_and_config()\n\n        # Under test and postcondition.\n        # Should not raise.\n        test_client.create_search_pipeline(\n            pipeline_id=pipeline_name,\n            pipeline_body=pipeline_config,\n        )\n\n        # Under test and postcondition.\n        # Should not raise.\n        test_client.delete_search_pipeline(pipeline_id=pipeline_name)\n\n    def test_index_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests indexing a document.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        doc = _create_test_document_chunk(\n            document_id=\"test-doc-1\",\n            chunk_index=0,\n            content=\"Test content for indexing\",\n            tenant_state=tenant_state,\n        )\n\n        # Under test and postcondition.\n        # Should not raise.\n        test_client.index_document(document=doc, tenant_state=tenant_state)\n        # Should not raise if we supply update_if_exists.\n        test_client.index_document(\n            document=doc, tenant_state=tenant_state, update_if_exists=True\n        )\n\n    def test_bulk_index_documents(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests bulk indexing documents.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        docs = [\n            _create_test_document_chunk(\n                document_id=f\"test-doc-{i}\",\n                chunk_index=i,\n                content=f\"Test content for indexing {i}\",\n                tenant_state=tenant_state,\n            )\n            for i in range(500)\n        ]\n\n        # Under test and postcondition.\n        # Should not raise.\n        test_client.bulk_index_documents(documents=docs, tenant_state=tenant_state)\n        # Should not raise if we supply update_if_exists.\n        test_client.bulk_index_documents(\n            documents=docs, tenant_state=tenant_state, update_if_exists=True\n        )\n\n    def test_index_duplicate_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests indexing a duplicate document raises an error.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        doc = _create_test_document_chunk(\n            document_id=\"test-doc-duplicate\",\n            chunk_index=0,\n            content=\"Duplicate test\",\n            tenant_state=tenant_state,\n        )\n\n        # Index once - should succeed.\n        test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Under test and postcondition.\n        # Index again - should raise.\n        with pytest.raises(Exception, match=\"already exists\"):\n            test_client.index_document(document=doc, tenant_state=tenant_state)\n\n    def test_get_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests getting a document.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        original_doc = _create_test_document_chunk(\n            document_id=\"test-doc-get\",\n            chunk_index=0,\n            content=\"Content to retrieve\",\n            tenant_state=tenant_state,\n            # We only store second precision, so to make sure asserts work in\n            # this test we'll deliberately lose some precision.\n            last_updated=datetime.now(timezone.utc).replace(microsecond=0),\n        )\n        test_client.index_document(document=original_doc, tenant_state=tenant_state)\n\n        # Under test.\n        doc_chunk_id = get_opensearch_doc_chunk_id(\n            tenant_state=tenant_state,\n            document_id=original_doc.document_id,\n            chunk_index=original_doc.chunk_index,\n            max_chunk_size=original_doc.max_chunk_size,\n        )\n        retrieved_doc = test_client.get_document(document_chunk_id=doc_chunk_id)\n\n        # Postcondition.\n        assert retrieved_doc == original_doc\n\n    def test_get_nonexistent_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests getting a nonexistent document raises an error.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=False\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test and postcondition.\n        with pytest.raises(Exception, match=\"404\"):\n            test_client.get_document(\n                document_chunk_id=\"test_source__nonexistent__512__0\"\n            )\n\n    def test_delete_existing_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests deleting an existing document returns True.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        doc = _create_test_document_chunk(\n            document_id=\"test-doc-delete\",\n            chunk_index=0,\n            content=\"Content to delete\",\n            tenant_state=tenant_state,\n        )\n        test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Under test.\n        doc_chunk_id = get_opensearch_doc_chunk_id(\n            tenant_state=tenant_state,\n            document_id=doc.document_id,\n            chunk_index=doc.chunk_index,\n            max_chunk_size=doc.max_chunk_size,\n        )\n        result = test_client.delete_document(document_chunk_id=doc_chunk_id)\n\n        # Postcondition.\n        assert result is True\n        # Verify the document is gone.\n        with pytest.raises(NotFoundError, match=\"404\"):\n            test_client.get_document(document_chunk_id=doc_chunk_id)\n\n    def test_delete_nonexistent_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests deleting a nonexistent document returns False.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test.\n        result = test_client.delete_document(\n            document_chunk_id=\"test_source__nonexistent__512__0\"\n        )\n\n        # Postcondition.\n        assert result is False\n\n    def test_delete_by_query(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests deleting documents by query.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index multiple documents.\n        docs_to_delete = [\n            _create_test_document_chunk(\n                document_id=\"delete-me\",\n                chunk_index=i,\n                content=f\"Delete this {i}\",\n                tenant_state=tenant_state,\n            )\n            for i in range(3)\n        ]\n        docs_to_keep = [\n            _create_test_document_chunk(\n                document_id=\"keep-me\",\n                chunk_index=0,\n                content=\"Keep this\",\n                tenant_state=tenant_state,\n            )\n        ]\n\n        for doc in docs_to_delete + docs_to_keep:\n            test_client.index_document(document=doc, tenant_state=tenant_state)\n        test_client.refresh_index()\n\n        query_body = DocumentQuery.delete_from_document_id_query(\n            document_id=\"delete-me\",\n            tenant_state=tenant_state,\n        )\n\n        # Under test.\n        num_deleted = test_client.delete_by_query(query_body=query_body)\n\n        # Postcondition.\n        assert num_deleted == 3\n\n        # Verify deletion - the deleted documents should no longer exist.\n        test_client.refresh_index()\n        search_query = DocumentQuery.get_from_document_id_query(\n            document_id=\"delete-me\",\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n        remaining_ids = test_client.search_for_document_ids(body=search_query)\n        assert len(remaining_ids) == 0\n\n        # Verify other documents still exist.\n        keep_query = DocumentQuery.get_from_document_id_query(\n            document_id=\"keep-me\",\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n        keep_ids = test_client.search_for_document_ids(body=keep_query)\n        assert len(keep_ids) == 1\n\n    def test_update_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests updating a document's properties.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Create a document to update.\n        doc = _create_test_document_chunk(\n            document_id=\"test-doc-update\",\n            chunk_index=0,\n            content=\"Original content\",\n            tenant_state=tenant_state,\n            hidden=False,\n        )\n        test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Under test.\n        doc_chunk_id = get_opensearch_doc_chunk_id(\n            tenant_state=tenant_state,\n            document_id=doc.document_id,\n            chunk_index=doc.chunk_index,\n            max_chunk_size=doc.max_chunk_size,\n        )\n        properties_to_update = {\n            \"hidden\": True,\n            \"global_boost\": 5,\n        }\n        test_client.update_document(\n            document_chunk_id=doc_chunk_id,\n            properties_to_update=properties_to_update,\n        )\n\n        # Postcondition.\n        # Retrieve the document and verify updates were applied.\n        updated_doc = test_client.get_document(document_chunk_id=doc_chunk_id)\n        assert updated_doc.hidden is True\n        assert updated_doc.global_boost == 5\n        # Other properties should remain unchanged.\n        assert updated_doc.document_id == doc.document_id\n        assert updated_doc.content == doc.content\n        assert updated_doc.public == doc.public\n\n    def test_update_nonexistent_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests updating a nonexistent document raises an error.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Under test and postcondition.\n        # Try to update a document that doesn't exist.\n        with pytest.raises(NotFoundError, match=\"404\"):\n            test_client.update_document(\n                document_chunk_id=\"test_source__nonexistent__512__0\",\n                properties_to_update={\"hidden\": True},\n            )\n\n    def test_hybrid_search_configurations_and_pipelines(\n        self,\n        test_client: OpenSearchIndexClient,\n        search_pipeline: None,  # noqa: ARG002\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"Tests all hybrid search configurations and pipelines.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        _patch_opensearch_match_highlights_disabled(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n        # Index documents.\n        docs = {\n            \"doc-1\": _create_test_document_chunk(\n                document_id=\"doc-1\",\n                chunk_index=0,\n                content=\"Python programming language tutorial\",\n                content_vector=_generate_test_vector(0.1),\n                tenant_state=tenant_state,\n            ),\n            \"doc-2\": _create_test_document_chunk(\n                document_id=\"doc-2\",\n                chunk_index=0,\n                content=\"How to make cheese\",\n                content_vector=_generate_test_vector(0.2),\n                tenant_state=tenant_state,\n            ),\n            \"doc-3\": _create_test_document_chunk(\n                document_id=\"doc-3\",\n                chunk_index=0,\n                content=\"C++ for newborns\",\n                content_vector=_generate_test_vector(0.15),\n                tenant_state=tenant_state,\n            ),\n        }\n        for doc in docs.values():\n            test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        for configuration in HybridSearchSubqueryConfiguration:\n            _patch_hybrid_search_subquery_configuration(monkeypatch, configuration)\n            for pipeline in HybridSearchNormalizationPipeline:\n                _patch_hybrid_search_normalization_pipeline(monkeypatch, pipeline)\n                pipeline_name, pipeline_config = (\n                    get_normalization_pipeline_name_and_config()\n                )\n                test_client.create_search_pipeline(\n                    pipeline_id=pipeline_name,\n                    pipeline_body=pipeline_config,\n                )\n\n                # Search query.\n                query_text = \"Python programming\"\n                query_vector = _generate_test_vector(0.12)\n                search_body = DocumentQuery.get_hybrid_search_query(\n                    query_text=query_text,\n                    query_vector=query_vector,\n                    num_hits=5,\n                    tenant_state=tenant_state,\n                    # We're not worried about filtering here. tenant_id in this object\n                    # is not relevant.\n                    index_filters=IndexFilters(\n                        access_control_list=None, tenant_id=None\n                    ),\n                    include_hidden=False,\n                )\n\n                # Under test.\n                results = test_client.search(\n                    body=search_body, search_pipeline_id=pipeline_name\n                )\n\n                # Postcondition.\n                assert len(results) == len(docs)\n                # Assert that all the chunks above are present.\n                assert all(\n                    chunk.document_chunk.document_id in docs.keys() for chunk in results\n                )\n                # Make sure the chunk contents are preserved.\n                for i, chunk in enumerate(results):\n                    expected = docs[chunk.document_chunk.document_id]\n                    assert chunk.document_chunk == DocumentChunkWithoutVectors(\n                        **{\n                            k: getattr(expected, k)\n                            for k in DocumentChunkWithoutVectors.model_fields\n                        }\n                    )\n                    # Make sure score reporting seems reasonable (it should not be None\n                    # or 0).\n                    assert chunk.score\n                    # Make sure there is some kind of match highlight only for the first\n                    # result. The other results are so bad they're not expected to have\n                    # match highlights.\n                    if i == 0:\n                        assert chunk.match_highlights.get(CONTENT_FIELD_NAME, [])\n\n    def test_search_empty_index(\n        self,\n        test_client: OpenSearchIndexClient,\n        search_pipeline: None,  # noqa: ARG002\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"Tests search on an empty index returns an empty list.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n        # Note no documents were indexed.\n\n        # Search query.\n        query_text = \"test query\"\n        query_vector = _generate_test_vector(0.5)\n        search_body = DocumentQuery.get_hybrid_search_query(\n            query_text=query_text,\n            query_vector=query_vector,\n            num_hits=5,\n            tenant_state=tenant_state,\n            # We're not worried about filtering here. tenant_id in this object\n            # is not relevant.\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n        )\n        pipeline_name, _ = get_normalization_pipeline_name_and_config()\n\n        # Under test.\n        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)\n\n        # Postcondition.\n        assert len(results) == 0\n\n    def test_hybrid_search_with_pipeline_and_filters(\n        self,\n        test_client: OpenSearchIndexClient,\n        search_pipeline: None,  # noqa: ARG002\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"\n        Tests search filters for ACL, hidden documents, and tenant isolation.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, True)\n        _patch_opensearch_match_highlights_disabled(monkeypatch, False)\n        tenant_x = TenantState(tenant_id=\"tenant-x\", multitenant=True)\n        tenant_y = TenantState(tenant_id=\"tenant-y\", multitenant=True)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_x.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index documents with different public/hidden, ACL, and tenant states.\n        docs = {\n            \"public-doc\": _create_test_document_chunk(\n                document_id=\"public-doc\",\n                chunk_index=0,\n                content=\"Public document content\",\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n            \"hidden-doc\": _create_test_document_chunk(\n                document_id=\"hidden-doc\",\n                chunk_index=0,\n                content=\"Hidden document content, spooky\",\n                hidden=True,\n                tenant_state=tenant_x,\n            ),\n            \"private-doc-user-a\": _create_test_document_chunk(\n                document_id=\"private-doc-user-a\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 123-45-6789\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-a@example.com\", \"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            \"private-doc-user-b\": _create_test_document_chunk(\n                document_id=\"private-doc-user-b\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 987-65-4321\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            \"should-not-exist-from-tenant-x-pov\": _create_test_document_chunk(\n                document_id=\"should-not-exist-from-tenant-x-pov\",\n                chunk_index=0,\n                content=\"This is an entirely different tenant, x should never see this\",\n                # Make this as permissive as possible to exercise tenant\n                # isolation.\n                hidden=False,\n                tenant_state=tenant_y,\n            ),\n        }\n        for doc in docs.values():\n            test_client.index_document(document=doc, tenant_state=doc.tenant_id)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        query_text = \"document content\"\n        query_vector = _generate_test_vector(0.6)\n        search_body = DocumentQuery.get_hybrid_search_query(\n            query_text=query_text,\n            query_vector=query_vector,\n            num_hits=5,\n            tenant_state=tenant_x,\n            # The user should only be able to see their private docs. tenant_id\n            # in this object is not relevant.\n            index_filters=IndexFilters(\n                access_control_list=[\n                    prefix_user_email(\"user-a@example.com\"),\n                    prefix_user_email(\"user-c@example.com\"),\n                ],\n                tenant_id=None,\n            ),\n            include_hidden=False,\n        )\n        pipeline_name, _ = get_normalization_pipeline_name_and_config()\n\n        # Under test.\n        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)\n\n        # Postcondition.\n        # Should only get the public, non-hidden document, and the private\n        # document for which the user has access.\n        assert len(results) == 2\n        # NOTE: This test is not explicitly testing for how well results are\n        # ordered; we're just assuming which doc will be the first result here.\n        assert results[0].document_chunk.document_id == \"public-doc\"\n        # Make sure the chunk contents are preserved.\n        assert results[0].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"public-doc\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        # Make sure score reporting seems reasonable (it should not be None\n        # or 0).\n        assert results[0].score\n        # Make sure there is some kind of match highlight.\n        assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])\n        # Same for the second result.\n        assert results[1].document_chunk.document_id == \"private-doc-user-a\"\n        assert results[1].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"private-doc-user-a\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        assert results[1].score\n        assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])\n\n    def test_hybrid_search_with_pipeline_and_filters_returns_chunks_with_related_content_first(\n        self,\n        test_client: OpenSearchIndexClient,\n        search_pipeline: None,  # noqa: ARG002\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"\n        Tests search with a normalization pipeline and filters returns chunks\n        with related content first.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, True)\n        _patch_opensearch_match_highlights_disabled(monkeypatch, False)\n        tenant_x = TenantState(tenant_id=\"tenant-x\", multitenant=True)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_x.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index documents with varying relevance to the query.\n        # Vectors closer to query_vector (0.1) should rank higher.\n        docs = [\n            _create_test_document_chunk(\n                document_id=\"highly-relevant\",\n                chunk_index=0,\n                content=\"Artificial intelligence and machine learning transform technology\",\n                content_vector=_generate_test_vector(\n                    0.1\n                ),  # Very close to query vector.\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n            _create_test_document_chunk(\n                document_id=\"somewhat-relevant\",\n                chunk_index=0,\n                content=\"Computer programming with various languages\",\n                content_vector=_generate_test_vector(0.5),  # Far from query vector.\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n            _create_test_document_chunk(\n                document_id=\"not-very-relevant\",\n                chunk_index=0,\n                content=\"Cooking recipes for delicious meals\",\n                content_vector=_generate_test_vector(\n                    0.9\n                ),  # Very far from query vector.\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n            # These should be filtered out by public/hidden filters.\n            _create_test_document_chunk(\n                document_id=\"hidden-but-relevant\",\n                chunk_index=0,\n                content=\"Artificial intelligence research papers\",\n                content_vector=_generate_test_vector(0.05),  # Very close but hidden.\n                hidden=True,\n                tenant_state=tenant_x,\n            ),\n            _create_test_document_chunk(\n                document_id=\"private-but-relevant\",\n                chunk_index=0,\n                content=\"Artificial intelligence industry analysis\",\n                content_vector=_generate_test_vector(0.08),  # Very close but private.\n                document_access=DocumentAccess.build(\n                    user_emails=[],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n        ]\n        for doc in docs:\n            test_client.index_document(document=doc, tenant_state=tenant_x)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        # Search query matching \"highly-relevant\" most closely.\n        query_text = \"artificial intelligence\"\n        query_vector = _generate_test_vector(0.1)\n        search_body = DocumentQuery.get_hybrid_search_query(\n            query_text=query_text,\n            query_vector=query_vector,\n            num_hits=5,\n            tenant_state=tenant_x,\n            # Explicitly pass in an empty list to enforce private doc filtering.\n            index_filters=IndexFilters(access_control_list=[], tenant_id=None),\n            include_hidden=False,\n        )\n        pipeline_name, _ = get_normalization_pipeline_name_and_config()\n\n        # Under test.\n        results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)\n\n        # Postcondition.\n        # Should only get public, non-hidden documents (3 out of 5).\n        assert len(results) == 3\n        result_ids = [chunk.document_chunk.document_id for chunk in results]\n        assert \"highly-relevant\" in result_ids\n        assert \"somewhat-relevant\" in result_ids\n        assert \"not-very-relevant\" in result_ids\n        # Filtered out by public/hidden constraints.\n        assert \"hidden-but-relevant\" not in result_ids\n        assert \"private-but-relevant\" not in result_ids\n\n        # Most relevant document should be first.\n        assert results[0].document_chunk.document_id == \"highly-relevant\"\n\n        # Make sure there is some kind of match highlight for the most relevant\n        # result.\n        match_highlights = results[0].match_highlights.get(CONTENT_FIELD_NAME, [])\n        assert len(match_highlights) == 1\n        # We expect the terms \"Artificial\" and \"intelligence\" to be matched.\n        highlight_split = re.findall(r\"<hi>(.*?)</hi>\", match_highlights[0])\n        assert len(highlight_split) == 2\n        assert highlight_split[0] == \"Artificial\"\n        assert highlight_split[1] == \"intelligence\"\n\n        # Returned documents should be ordered by descending score.\n        previous_score = float(\"inf\")\n        for result in results:\n            current_score = result.score\n            assert current_score\n            assert current_score < previous_score\n            previous_score = current_score\n\n    def test_delete_by_query_multitenant_isolation(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"\n        Tests delete_by_query respects tenant boundaries in multi-tenant mode.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, True)\n        tenant_x = TenantState(tenant_id=\"tenant-x\", multitenant=True)\n        tenant_y = TenantState(tenant_id=\"tenant-y\", multitenant=True)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_x.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Although very unlikely in practice, let's use the same doc ID just to\n        # make sure that doesn't break the index.\n        tenant_x_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc\",\n                chunk_index=i,\n                content=f\"Tenant A Chunk {i}\",\n                tenant_state=tenant_x,\n            )\n            for i in range(3)\n        ]\n\n        tenant_y_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc\",\n                chunk_index=i,\n                content=f\"Tenant B Chunk {i}\",\n                tenant_state=tenant_y,\n            )\n            for i in range(2)\n        ]\n\n        for chunk in tenant_x_chunks + tenant_y_chunks:\n            test_client.index_document(document=chunk, tenant_state=chunk.tenant_id)\n        test_client.refresh_index()\n\n        # Build deletion query for tenant-x only.\n        query_body = DocumentQuery.delete_from_document_id_query(\n            document_id=\"doc\",\n            tenant_state=tenant_x,\n        )\n\n        # Under test.\n        # Delete tenant-x chunks using delete_by_query.\n        num_deleted = test_client.delete_by_query(query_body=query_body)\n\n        # Postcondition.\n        assert num_deleted == 3\n\n        # Verify tenant-x chunks are deleted.\n        test_client.refresh_index()\n        verify_query_x = DocumentQuery.get_from_document_id_query(\n            document_id=\"doc\",\n            tenant_state=tenant_x,\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n        remaining_a_ids = test_client.search_for_document_ids(body=verify_query_x)\n        assert len(remaining_a_ids) == 0\n\n        # Verify tenant-y chunks still exist.\n        verify_query_y = DocumentQuery.get_from_document_id_query(\n            document_id=\"doc\",\n            tenant_state=tenant_y,\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n        remaining_y_ids = test_client.search_for_document_ids(body=verify_query_y)\n        assert len(remaining_y_ids) == 2\n        expected_y_ids = {\n            get_opensearch_doc_chunk_id(\n                tenant_state=tenant_y,\n                document_id=chunk.document_id,\n                chunk_index=chunk.chunk_index,\n                max_chunk_size=chunk.max_chunk_size,\n            )\n            for chunk in tenant_y_chunks\n        }\n        assert set(remaining_y_ids) == expected_y_ids\n\n    def test_delete_by_query_nonexistent_document(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"\n        Tests delete_by_query for non-existent document returns 0 deleted.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Don't index any documents.\n\n        # Build deletion query.\n        query_body = DocumentQuery.delete_from_document_id_query(\n            document_id=\"nonexistent-doc\",\n            tenant_state=tenant_state,\n        )\n\n        # Under test.\n        num_deleted = test_client.delete_by_query(query_body=query_body)\n\n        # Postcondition.\n        assert num_deleted == 0\n\n    def test_search_for_document_ids(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests search_for_document_ids method returns correct chunk IDs.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index chunks for two different documents.\n        doc1_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc-1\",\n                chunk_index=i,\n                content=f\"Doc 1 Chunk {i}\",\n                tenant_state=tenant_state,\n            )\n            for i in range(3)\n        ]\n        doc2_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc-2\",\n                chunk_index=i,\n                content=f\"Doc 2 Chunk {i}\",\n                tenant_state=tenant_state,\n            )\n            for i in range(2)\n        ]\n\n        for chunk in doc1_chunks + doc2_chunks:\n            test_client.index_document(document=chunk, tenant_state=tenant_state)\n        test_client.refresh_index()\n\n        # Build query for doc-1.\n        query_body = DocumentQuery.get_from_document_id_query(\n            document_id=\"doc-1\",\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n\n        # Under test.\n        chunk_ids = test_client.search_for_document_ids(body=query_body)\n\n        # Postcondition.\n        assert len(chunk_ids) == 3\n        expected_ids = {\n            get_opensearch_doc_chunk_id(\n                tenant_state=tenant_state,\n                document_id=chunk.document_id,\n                chunk_index=chunk.chunk_index,\n                max_chunk_size=chunk.max_chunk_size,\n            )\n            for chunk in doc1_chunks\n        }\n        assert set(chunk_ids) == expected_ids\n\n    def test_search_with_no_document_access_can_retrieve_all_documents(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"\n        Tests search with no document access can retrieve all documents, even\n        private ones.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index documents with different public/hidden and tenant states.\n        docs = {\n            \"public-doc\": _create_test_document_chunk(\n                document_id=\"public-doc\",\n                chunk_index=0,\n                content=\"Public document content\",\n                hidden=False,\n                tenant_state=tenant_state,\n            ),\n            \"hidden-doc\": _create_test_document_chunk(\n                document_id=\"hidden-doc\",\n                chunk_index=0,\n                content=\"Hidden document content, spooky\",\n                hidden=True,\n                tenant_state=tenant_state,\n            ),\n            \"private-doc-user-a\": _create_test_document_chunk(\n                document_id=\"private-doc-user-a\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 123-45-6789\",\n                hidden=False,\n                tenant_state=tenant_state,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-a@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n        }\n        for doc in docs.values():\n            test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        # Build query for all documents.\n        query_body = DocumentQuery.get_from_document_id_query(\n            document_id=\"private-doc-user-a\",\n            tenant_state=tenant_state,\n            # This is the input under test, notice None for acl.\n            index_filters=IndexFilters(access_control_list=None, tenant_id=None),\n            include_hidden=False,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n            min_chunk_index=None,\n            max_chunk_index=None,\n            get_full_document=False,\n        )\n\n        # Under test.\n        chunk_ids = test_client.search_for_document_ids(body=query_body)\n\n        # Postcondition.\n        # Even though this doc is private, because we supplied None for acl we\n        # were able to retrieve it.\n        assert len(chunk_ids) == 1\n        # Since this is a chunk ID, it will have the doc ID in it plus other\n        # stuff we don't care about in this test.\n        assert chunk_ids[0].startswith(\"private-doc-user-a\")\n\n    def test_time_cutoff_filter(\n        self,\n        test_client: OpenSearchIndexClient,\n        search_pipeline: None,  # noqa: ARG002\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"Tests the time cutoff filter works.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index docs with various ages.\n        one_day_ago = datetime.now(timezone.utc) - timedelta(days=1)\n        one_week_ago = datetime.now(timezone.utc) - timedelta(days=7)\n        six_months_ago = datetime.now(timezone.utc) - timedelta(days=180)\n        one_year_ago = datetime.now(timezone.utc) - timedelta(days=365)\n        docs = [\n            _create_test_document_chunk(\n                document_id=\"one-day-ago\",\n                content=\"Good match\",\n                last_updated=one_day_ago,\n                tenant_state=tenant_state,\n            ),\n            _create_test_document_chunk(\n                document_id=\"one-year-ago\",\n                content=\"Good match\",\n                last_updated=one_year_ago,\n                tenant_state=tenant_state,\n            ),\n            _create_test_document_chunk(\n                document_id=\"no-last-updated\",\n                # Since we test for result ordering in the postconditions, let's\n                # just make this content slightly less of a match with the query\n                # so this test is not flaky from the ordering of the results.\n                content=\"Still an ok match\",\n                last_updated=None,\n                tenant_state=tenant_state,\n            ),\n        ]\n        for doc in docs:\n            test_client.index_document(document=doc, tenant_state=tenant_state)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        # Build query for documents updated in the last week.\n        last_week_search_body = DocumentQuery.get_hybrid_search_query(\n            query_text=\"Good match\",\n            query_vector=_generate_test_vector(0.1),\n            num_hits=5,\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(\n                access_control_list=None, tenant_id=None, time_cutoff=one_week_ago\n            ),\n            include_hidden=False,\n        )\n        last_six_months_search_body = DocumentQuery.get_hybrid_search_query(\n            query_text=\"Good match\",\n            query_vector=_generate_test_vector(0.1),\n            num_hits=5,\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(\n                access_control_list=None, tenant_id=None, time_cutoff=six_months_ago\n            ),\n            include_hidden=False,\n        )\n        pipeline_name, _ = get_normalization_pipeline_name_and_config()\n\n        # Under test.\n        last_week_results = test_client.search(\n            body=last_week_search_body,\n            search_pipeline_id=pipeline_name,\n        )\n        last_six_months_results = test_client.search(\n            body=last_six_months_search_body,\n            search_pipeline_id=pipeline_name,\n        )\n\n        # Postcondition.\n        # We expect to only get one-day-ago.\n        assert len(last_week_results) == 1\n        assert last_week_results[0].document_chunk.document_id == \"one-day-ago\"\n        # We expect to get one-day-ago and no-last-updated since six months >\n        # ASSUMED_DOCUMENT_AGE_DAYS.\n        assert len(last_six_months_results) == 2\n        assert last_six_months_results[0].document_chunk.document_id == \"one-day-ago\"\n        assert (\n            last_six_months_results[1].document_chunk.document_id == \"no-last-updated\"\n        )\n\n    def test_random_search(\n        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Tests the random search query works.\"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, False)\n        tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_state.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index chunks for two different documents, one hidden one not.\n        doc1_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc-1\",\n                chunk_index=i,\n                content=f\"Doc 1 Chunk {i}\",\n                tenant_state=tenant_state,\n                hidden=False,\n            )\n            for i in range(3)\n        ]\n        doc2_chunks = [\n            _create_test_document_chunk(\n                document_id=\"doc-2\",\n                chunk_index=i,\n                content=f\"Doc 2 Chunk {i}\",\n                tenant_state=tenant_state,\n                hidden=True,\n            )\n            for i in range(2)\n        ]\n\n        for chunk in doc1_chunks + doc2_chunks:\n            test_client.index_document(document=chunk, tenant_state=tenant_state)\n        test_client.refresh_index()\n\n        # Build query.\n        query_body = DocumentQuery.get_random_search_query(\n            tenant_state=tenant_state,\n            index_filters=IndexFilters(\n                access_control_list=None, tenant_id=tenant_state.tenant_id\n            ),\n            num_to_retrieve=3,\n        )\n\n        # Under test.\n        results = test_client.search(body=query_body, search_pipeline_id=None)\n\n        # Postcondition.\n        assert len(results) == 3\n        assert set(result.document_chunk.chunk_index for result in results) == set(\n            [0, 1, 2]\n        )\n        for result in results:\n            # Note each result must be from doc 1, which is not hidden.\n            expected_result = doc1_chunks[result.document_chunk.chunk_index]\n            assert result.document_chunk == DocumentChunkWithoutVectors(\n                **{\n                    k: getattr(expected_result, k)\n                    for k in DocumentChunkWithoutVectors.model_fields\n                }\n            )\n\n    def test_keyword_search(\n        self,\n        test_client: OpenSearchIndexClient,\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"\n        Tests keyword search with filters for ACL, hidden documents, and tenant\n        isolation.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, True)\n        _patch_opensearch_match_highlights_disabled(monkeypatch, False)\n        tenant_x = TenantState(tenant_id=\"tenant-x\", multitenant=True)\n        tenant_y = TenantState(tenant_id=\"tenant-y\", multitenant=True)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_x.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index documents with different public/hidden, ACL, and tenant states.\n        docs = {\n            \"public-doc\": _create_test_document_chunk(\n                document_id=\"public-doc\",\n                chunk_index=0,\n                content=\"Public document content\",\n                hidden=False,\n                tenant_state=tenant_x,\n            ),\n            \"hidden-doc\": _create_test_document_chunk(\n                document_id=\"hidden-doc\",\n                chunk_index=0,\n                content=\"Hidden document content, spooky\",\n                hidden=True,\n                tenant_state=tenant_x,\n            ),\n            \"private-doc-user-a\": _create_test_document_chunk(\n                document_id=\"private-doc-user-a\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 123-45-6789\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-a@example.com\", \"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            # Tests that we don't return documents that don't match keywords at\n            # all, even if they match filters.\n            \"private-but-not-relevant-doc-user-a\": _create_test_document_chunk(\n                document_id=\"private-but-not-relevant-doc-user-a\",\n                chunk_index=0,\n                content=\"This text should not match the query at all\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-a@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            \"private-doc-user-b\": _create_test_document_chunk(\n                document_id=\"private-doc-user-b\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 987-65-4321\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            \"should-not-exist-from-tenant-x-pov\": _create_test_document_chunk(\n                document_id=\"should-not-exist-from-tenant-x-pov\",\n                chunk_index=0,\n                content=\"This is an entirely different tenant, x should never see this\",\n                # Make this as permissive as possible to exercise tenant\n                # isolation.\n                hidden=False,\n                tenant_state=tenant_y,\n            ),\n        }\n        for doc in docs.values():\n            test_client.index_document(document=doc, tenant_state=doc.tenant_id)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        # Should not match private-but-not-relevant-doc-user-a.\n        query_text = \"document content\"\n        search_body = DocumentQuery.get_keyword_search_query(\n            query_text=query_text,\n            num_hits=5,\n            tenant_state=tenant_x,\n            # The user should only be able to see their private docs. tenant_id\n            # in this object is not relevant.\n            index_filters=IndexFilters(\n                access_control_list=[\n                    prefix_user_email(\"user-a@example.com\"),\n                    prefix_user_email(\"user-c@example.com\"),\n                ],\n                tenant_id=None,\n            ),\n            include_hidden=False,\n        )\n\n        # Under test.\n        results = test_client.search(body=search_body, search_pipeline_id=None)\n\n        # Postcondition.\n        # Should only get the public, non-hidden document, and the private\n        # document for which the user has access.\n        assert len(results) == 2\n        # This should be the highest-ranked result, as a higher percentage of\n        # the content matches the query.\n        assert results[0].document_chunk.document_id == \"public-doc\"\n        # Make sure the chunk contents are preserved.\n        assert results[0].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"public-doc\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        # Make sure score reporting seems reasonable (it should not be None\n        # or 0).\n        assert results[0].score\n        # Make sure there is some kind of match highlight.\n        assert results[0].match_highlights.get(CONTENT_FIELD_NAME, [])\n        # Same for the second result.\n        assert results[1].document_chunk.document_id == \"private-doc-user-a\"\n        assert results[1].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"private-doc-user-a\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        assert results[1].score\n        assert results[1].match_highlights.get(CONTENT_FIELD_NAME, [])\n        assert results[1].score < results[0].score\n\n    def test_semantic_search(\n        self,\n        test_client: OpenSearchIndexClient,\n        monkeypatch: pytest.MonkeyPatch,\n    ) -> None:\n        \"\"\"\n        Tests semantic search with filters for ACL, hidden documents, and tenant\n        isolation.\n        \"\"\"\n        # Precondition.\n        _patch_global_tenant_state(monkeypatch, True)\n        tenant_x = TenantState(tenant_id=\"tenant-x\", multitenant=True)\n        tenant_y = TenantState(tenant_id=\"tenant-y\", multitenant=True)\n        mappings = DocumentSchema.get_document_schema(\n            vector_dimension=128, multitenant=tenant_x.multitenant\n        )\n        settings = DocumentSchema.get_index_settings_based_on_environment()\n        test_client.create_index(mappings=mappings, settings=settings)\n\n        # Index documents with different public/hidden, ACL, and tenant states.\n        docs = {\n            \"public-doc\": _create_test_document_chunk(\n                document_id=\"public-doc\",\n                chunk_index=0,\n                content=\"Public document content\",\n                hidden=False,\n                tenant_state=tenant_x,\n                # Make this identical to the query vector to test that this\n                # result is returned first.\n                content_vector=_generate_test_vector(0.6),\n            ),\n            \"hidden-doc\": _create_test_document_chunk(\n                document_id=\"hidden-doc\",\n                chunk_index=0,\n                content=\"Hidden document content, spooky\",\n                hidden=True,\n                tenant_state=tenant_x,\n            ),\n            \"private-doc-user-a\": _create_test_document_chunk(\n                document_id=\"private-doc-user-a\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 123-45-6789\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-a@example.com\", \"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n                # Make this different from the query vector to test that this\n                # result is returned second.\n                content_vector=_generate_test_vector(0.5),\n            ),\n            \"private-doc-user-b\": _create_test_document_chunk(\n                document_id=\"private-doc-user-b\",\n                chunk_index=0,\n                content=\"Private document content, btw my SSN is 987-65-4321\",\n                hidden=False,\n                tenant_state=tenant_x,\n                document_access=DocumentAccess.build(\n                    user_emails=[\"user-b@example.com\"],\n                    user_groups=[],\n                    external_user_emails=[],\n                    external_user_group_ids=[],\n                    is_public=False,\n                ),\n            ),\n            \"should-not-exist-from-tenant-x-pov\": _create_test_document_chunk(\n                document_id=\"should-not-exist-from-tenant-x-pov\",\n                chunk_index=0,\n                content=\"This is an entirely different tenant, x should never see this\",\n                # Make this as permissive as possible to exercise tenant\n                # isolation.\n                hidden=False,\n                tenant_state=tenant_y,\n            ),\n        }\n        for doc in docs.values():\n            test_client.index_document(document=doc, tenant_state=doc.tenant_id)\n\n        # Refresh index to make documents searchable.\n        test_client.refresh_index()\n\n        query_vector = _generate_test_vector(0.6)\n        search_body = DocumentQuery.get_semantic_search_query(\n            query_embedding=query_vector,\n            num_hits=5,\n            tenant_state=tenant_x,\n            # The user should only be able to see their private docs. tenant_id\n            # in this object is not relevant.\n            index_filters=IndexFilters(\n                access_control_list=[\n                    prefix_user_email(\"user-a@example.com\"),\n                    prefix_user_email(\"user-c@example.com\"),\n                ],\n                tenant_id=None,\n            ),\n            include_hidden=False,\n        )\n\n        # Under test.\n        results = test_client.search(body=search_body, search_pipeline_id=None)\n\n        # Postcondition.\n        # Should only get the public, non-hidden document, and the private\n        # document for which the user has access.\n        assert len(results) == 2\n        # We explicitly expect this to be the highest-ranked result.\n        assert results[0].document_chunk.document_id == \"public-doc\"\n        # Make sure the chunk contents are preserved.\n        assert results[0].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"public-doc\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        assert results[0].score == 1.0\n        # Same for the second result.\n        assert results[1].document_chunk.document_id == \"private-doc-user-a\"\n        assert results[1].document_chunk == DocumentChunkWithoutVectors(\n            **{\n                k: getattr(docs[\"private-doc-user-a\"], k)\n                for k in DocumentChunkWithoutVectors.model_fields\n            }\n        )\n        assert results[1].score\n        assert 0.0 < results[1].score < 1.0\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py",
    "content": "\"\"\"External dependency tests for OpenSearch migration celery tasks.\n\nThese tests require Postgres, Redis, Vespa, and OpenSearch to be running.\n\nWARNING: As with all external dependency tests, do not run them against a\ndatabase with data you care about. Your data will be destroyed.\n\"\"\"\n\nimport json\nfrom collections.abc import Generator\nfrom copy import deepcopy\nfrom datetime import datetime\nfrom typing import Any\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.background.celery.tasks.opensearch_migration.constants import (\n    GET_VESPA_CHUNKS_SLICE_COUNT,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.tasks import (\n    is_continuation_token_done_for_all_slices,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.tasks import (\n    migrate_chunks_from_vespa_to_opensearch_task,\n)\nfrom onyx.background.celery.tasks.opensearch_migration.transformer import (\n    transform_vespa_chunks_to_opensearch_chunks,\n)\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.configs.constants import SOURCE_TYPE\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Document\nfrom onyx.db.models import OpenSearchDocumentMigrationRecord\nfrom onyx.db.models import OpenSearchTenantMigrationRecord\nfrom onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapping\nfrom onyx.db.search_settings import get_active_search_settings\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.client import OpenSearchClient\nfrom onyx.document_index.opensearch.client import OpenSearchIndexClient\nfrom onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout\nfrom onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE\nfrom onyx.document_index.opensearch.schema import DocumentChunk\nfrom onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id\nfrom onyx.document_index.opensearch.search import DocumentQuery\nfrom onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout\nfrom onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex\nfrom onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST\nfrom onyx.document_index.vespa_constants import BLURB\nfrom onyx.document_index.vespa_constants import BOOST\nfrom onyx.document_index.vespa_constants import CHUNK_CONTEXT\nfrom onyx.document_index.vespa_constants import CHUNK_ID\nfrom onyx.document_index.vespa_constants import CONTENT\nfrom onyx.document_index.vespa_constants import DOC_SUMMARY\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID\nfrom onyx.document_index.vespa_constants import DOCUMENT_SETS\nfrom onyx.document_index.vespa_constants import EMBEDDINGS\nfrom onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY\nfrom onyx.document_index.vespa_constants import HIDDEN\nfrom onyx.document_index.vespa_constants import IMAGE_FILE_NAME\nfrom onyx.document_index.vespa_constants import METADATA_LIST\nfrom onyx.document_index.vespa_constants import METADATA_SUFFIX\nfrom onyx.document_index.vespa_constants import PRIMARY_OWNERS\nfrom onyx.document_index.vespa_constants import SECONDARY_OWNERS\nfrom onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER\nfrom onyx.document_index.vespa_constants import SOURCE_LINKS\nfrom onyx.document_index.vespa_constants import TITLE\nfrom onyx.document_index.vespa_constants import TITLE_EMBEDDING\nfrom onyx.document_index.vespa_constants import USER_PROJECT\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.external_dependency_unit.full_setup import ensure_full_deployment_setup\n\n\nCHUNK_COUNT = 5\n\n\ndef _get_document_chunks_from_opensearch(\n    opensearch_client: OpenSearchIndexClient,\n    document_id: str,\n    tenant_state: TenantState,\n) -> list[DocumentChunk]:\n    opensearch_client.refresh_index()\n    results: list[DocumentChunk] = []\n    for i in range(CHUNK_COUNT):\n        document_chunk_id: str = get_opensearch_doc_chunk_id(\n            tenant_state=tenant_state,\n            document_id=document_id,\n            chunk_index=i,\n            max_chunk_size=DEFAULT_MAX_CHUNK_SIZE,\n        )\n        result = opensearch_client.get_document(document_chunk_id)\n        results.append(result)\n    return results\n\n\ndef _delete_document_chunks_from_opensearch(\n    opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str\n) -> None:\n    opensearch_client.refresh_index()\n    query_body = DocumentQuery.delete_from_document_id_query(\n        document_id=document_id,\n        tenant_state=TenantState(tenant_id=current_tenant_id, multitenant=False),\n    )\n    opensearch_client.delete_by_query(query_body)\n\n\ndef _generate_test_vector(dim: int) -> list[float]:\n    \"\"\"Generate a deterministic test embedding vector.\"\"\"\n    return [0.1 + (i * 0.001) for i in range(dim)]\n\n\ndef _insert_test_documents_with_commit(\n    db_session: Session,\n    document_ids: list[str],\n) -> list[Document]:\n    \"\"\"Creates test Document records in Postgres.\"\"\"\n    documents = [\n        Document(\n            id=document_id,\n            semantic_id=document_id,\n            chunk_count=CHUNK_COUNT,\n        )\n        for document_id in document_ids\n    ]\n    db_session.add_all(documents)\n    db_session.commit()\n    return documents\n\n\ndef _delete_test_documents_with_commit(\n    db_session: Session,\n    documents: list[Document],\n) -> None:\n    \"\"\"Deletes test Document records from Postgres.\"\"\"\n    for document in documents:\n        db_session.delete(document)\n    db_session.commit()\n\n\ndef _insert_test_migration_records_with_commit(\n    db_session: Session,\n    migration_records: list[OpenSearchDocumentMigrationRecord],\n) -> None:\n    db_session.add_all(migration_records)\n    db_session.commit()\n\n\ndef _create_raw_document_chunk(\n    document_id: str,\n    chunk_index: int,\n    content: str,\n    embedding: list[float],\n    now: datetime,\n    title: str | None = None,\n    title_embedding: list[float] | None = None,\n) -> dict[str, Any]:\n    return {\n        DOCUMENT_ID: document_id,\n        CHUNK_ID: chunk_index,\n        CONTENT: content,\n        EMBEDDINGS: {FULL_CHUNK_EMBEDDING_KEY: embedding},\n        TITLE: title,\n        TITLE_EMBEDDING: title_embedding,\n        SOURCE_TYPE: \"test source type\",\n        METADATA_LIST: [\"stuff=things\"],\n        DOC_UPDATED_AT: int(now.timestamp()),\n        HIDDEN: False,\n        BOOST: 1,\n        SEMANTIC_IDENTIFIER: \"test semantic identifier\",\n        IMAGE_FILE_NAME: \"test.png\",\n        SOURCE_LINKS: \"https://test.com\",\n        BLURB: \"test blurb\",\n        DOC_SUMMARY: \"test doc summary\",\n        CHUNK_CONTEXT: \"test chunk context\",\n        METADATA_SUFFIX: \"test metadata suffix\",\n        DOCUMENT_SETS: {\"test document set\": 1},\n        USER_PROJECT: [1],\n        PRIMARY_OWNERS: [\"test primary owner\"],\n        SECONDARY_OWNERS: [\"test secondary owner\"],\n        ACCESS_CONTROL_LIST: {PUBLIC_DOC_PAT: 1, \"test user\": 1},\n    }\n\n\ndef _assert_chunk_matches_vespa_chunk(\n    opensearch_chunk: DocumentChunk,\n    vespa_chunk: dict[str, Any],\n) -> None:\n    assert opensearch_chunk.document_id == vespa_chunk[DOCUMENT_ID]\n    assert opensearch_chunk.chunk_index == vespa_chunk[CHUNK_ID]\n    assert opensearch_chunk.content == vespa_chunk[CONTENT]\n    assert opensearch_chunk.content_vector == pytest.approx(\n        vespa_chunk[EMBEDDINGS][FULL_CHUNK_EMBEDDING_KEY]\n    )\n    assert opensearch_chunk.title == vespa_chunk[TITLE]\n    assert opensearch_chunk.title_vector == pytest.approx(vespa_chunk[TITLE_EMBEDDING])\n    assert opensearch_chunk.source_type == vespa_chunk[SOURCE_TYPE]\n    assert opensearch_chunk.metadata_list == vespa_chunk[METADATA_LIST]\n    assert (\n        opensearch_chunk.last_updated is not None\n        and int(opensearch_chunk.last_updated.timestamp())\n        == vespa_chunk[DOC_UPDATED_AT]\n    )\n    assert opensearch_chunk.public == vespa_chunk[ACCESS_CONTROL_LIST][PUBLIC_DOC_PAT]\n    assert opensearch_chunk.access_control_list == [\n        access_control\n        for access_control in vespa_chunk[ACCESS_CONTROL_LIST]\n        if access_control != PUBLIC_DOC_PAT\n    ]\n    assert opensearch_chunk.hidden == vespa_chunk[HIDDEN]\n    assert opensearch_chunk.global_boost == vespa_chunk[BOOST]\n    assert opensearch_chunk.semantic_identifier == vespa_chunk[SEMANTIC_IDENTIFIER]\n    assert opensearch_chunk.image_file_id == vespa_chunk[IMAGE_FILE_NAME]\n    assert opensearch_chunk.source_links == vespa_chunk[SOURCE_LINKS]\n    assert opensearch_chunk.blurb == vespa_chunk[BLURB]\n    assert opensearch_chunk.doc_summary == vespa_chunk[DOC_SUMMARY]\n    assert opensearch_chunk.chunk_context == vespa_chunk[CHUNK_CONTEXT]\n    assert opensearch_chunk.metadata_suffix == vespa_chunk[METADATA_SUFFIX]\n    assert opensearch_chunk.document_sets == [\n        doc_set for doc_set in vespa_chunk[DOCUMENT_SETS]\n    ]\n    assert opensearch_chunk.user_projects == vespa_chunk[USER_PROJECT]\n    assert opensearch_chunk.primary_owners == vespa_chunk[PRIMARY_OWNERS]\n    assert opensearch_chunk.secondary_owners == vespa_chunk[SECONDARY_OWNERS]\n\n\n@pytest.fixture(scope=\"module\")\ndef full_deployment_setup() -> Generator[None, None, None]:\n    \"\"\"Optional fixture to perform full deployment-like setup on demand.\n\n    Imports and calls\n    tests.external_dependency_unit.startup.full_setup.ensure_full_deployment_setup\n    to initialize Postgres defaults, Vespa indices, and seed initial docs.\n\n    NOTE: We deliberately duplicate this logic from\n    backend/tests/external_dependency_unit/conftest.py because we need to set\n    opensearch_available just for this module, not the entire test session.\n\n    TODO(ENG-3764)(andrei): Consolidate some of these test fixtures.\n    \"\"\"\n    # Patch ENABLE_OPENSEARCH_INDEXING_FOR_ONYX just for this test because we\n    # don't yet want that enabled for all tests.\n    # TODO(andrei): Remove this once CI enables OpenSearch for all tests.\n    with (\n        patch(\n            \"onyx.configs.app_configs.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\",\n            True,\n        ),\n        patch(\"onyx.document_index.factory.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\", True),\n    ):\n        ensure_full_deployment_setup(opensearch_available=True)\n        yield  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef db_session(\n    full_deployment_setup: None,  # noqa: ARG001\n) -> Generator[Session, None, None]:\n    \"\"\"\n    NOTE: We deliberately duplicate this logic from\n    backend/tests/external_dependency_unit/conftest.py because we need a\n    module-level fixture whereas the fixture in that file is function-level. I\n    don't want to change it in this change to not risk inadvertently breaking\n    things.\n    \"\"\"\n    with get_session_with_current_tenant() as session:\n        yield session  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef vespa_document_index(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n) -> Generator[VespaDocumentIndex, None, None]:\n    \"\"\"Creates a Vespa document index for the test tenant.\"\"\"\n    active = get_active_search_settings(db_session)\n    yield VespaDocumentIndex(\n        index_name=active.primary.index_name,\n        tenant_state=TenantState(tenant_id=get_current_tenant_id(), multitenant=False),\n        large_chunks_enabled=False,\n    )  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef opensearch_client(\n    db_session: Session,\n    full_deployment_setup: None,  # noqa: ARG001\n) -> Generator[OpenSearchIndexClient, None, None]:\n    \"\"\"Creates an OpenSearch client for the test tenant.\"\"\"\n    active = get_active_search_settings(db_session)\n    yield OpenSearchIndexClient(index_name=active.primary.index_name)  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef opensearch_available(\n    opensearch_client: OpenSearchClient,\n) -> Generator[None, None, None]:\n    \"\"\"Verifies OpenSearch is running, fails the test if not.\"\"\"\n    if not wait_for_opensearch_with_timeout(client=opensearch_client):\n        pytest.fail(\"OpenSearch is not available.\")\n    yield  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef vespa_available(\n    full_deployment_setup: None,  # noqa: ARG001\n) -> Generator[None, None, None]:\n    \"\"\"Verifies Vespa is running, fails the test if not.\"\"\"\n    # Try 90 seconds for testing in CI.\n    if not wait_for_vespa_with_timeout(wait_limit=90):\n        pytest.fail(\"Vespa is not available.\")\n    yield  # Test runs here.\n\n\n@pytest.fixture(scope=\"module\")\ndef test_embedding_dimension(db_session: Session) -> Generator[int, None, None]:\n    active = get_active_search_settings(db_session)\n    yield active.primary.model_dim  # Test runs here.\n\n\n@pytest.fixture(scope=\"function\")\ndef patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:\n    test_page_size = 5\n    with (\n        patch(\n            \"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE\",\n            test_page_size,\n        ),\n        patch(\n            \"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE\",\n            test_page_size,\n        ),\n    ):\n        yield test_page_size  # Test runs here.\n\n\n@pytest.fixture(scope=\"function\")\ndef test_documents(\n    db_session: Session,\n    vespa_document_index: VespaDocumentIndex,\n    opensearch_client: OpenSearchIndexClient,\n    patch_get_vespa_chunks_page_size: int,\n) -> Generator[list[Document], None, None]:\n    \"\"\"\n    Creates and cleans test Document records in Postgres and the document\n    indices.\n    \"\"\"\n    # We use a large number of documents >\n    # get_all_raw_document_chunks_paginated's page_size argument in the task.\n    documents_to_create = patch_get_vespa_chunks_page_size * 2\n    doc_ids = [f\"test_doc_{i}\" for i in range(documents_to_create)]\n    documents = _insert_test_documents_with_commit(db_session, doc_ids)\n\n    # NOTE: chunk_count must be passed because index_raw_chunks uses the \"new\"\n    # chunk ID system (get_uuid_from_chunk_info). Without chunk_count, delete()\n    # falls back to the \"old\" system (get_uuid_from_chunk_info_old) and won't\n    # find/delete the chunks.\n    for document in documents:\n        vespa_document_index.delete(document.id, chunk_count=CHUNK_COUNT)\n\n    for document in documents:\n        _delete_document_chunks_from_opensearch(\n            opensearch_client, document.id, get_current_tenant_id()\n        )\n\n    yield documents  # Test runs here.\n\n    # Cleanup.\n    for document in documents:\n        _delete_document_chunks_from_opensearch(\n            opensearch_client, document.id, get_current_tenant_id()\n        )\n\n    for document in documents:\n        vespa_document_index.delete(document.id, chunk_count=CHUNK_COUNT)\n\n    _delete_test_documents_with_commit(db_session, documents)\n\n\n@pytest.fixture(scope=\"function\")\ndef clean_migration_tables(db_session: Session) -> Generator[None, None, None]:\n    \"\"\"Cleans up migration-related tables before and after each test.\"\"\"\n    # Clean before test.\n    db_session.query(OpenSearchDocumentMigrationRecord).delete()\n    db_session.query(OpenSearchTenantMigrationRecord).delete()\n    db_session.commit()\n\n    yield  # Test runs here.\n\n    # Clean after test.\n    db_session.query(OpenSearchDocumentMigrationRecord).delete()\n    db_session.query(OpenSearchTenantMigrationRecord).delete()\n    db_session.commit()\n\n\n@pytest.fixture(scope=\"function\")\ndef enable_opensearch_indexing_for_onyx() -> Generator[None, None, None]:\n    with patch(\n        \"onyx.background.celery.tasks.opensearch_migration.tasks.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\",\n        True,\n    ):\n        yield  # Test runs here.\n\n\n@pytest.fixture(scope=\"function\")\ndef disable_opensearch_indexing_for_onyx() -> Generator[None, None, None]:\n    with patch(\n        \"onyx.background.celery.tasks.opensearch_migration.tasks.ENABLE_OPENSEARCH_INDEXING_FOR_ONYX\",\n        False,\n    ):\n        yield  # Test runs here.\n\n\nclass TestMigrateChunksFromVespaToOpenSearchTask:\n    \"\"\"Tests migrate_chunks_from_vespa_to_opensearch_task.\"\"\"\n\n    def test_chunk_migration_completes_successfully(\n        self,\n        db_session: Session,\n        test_documents: list[Document],\n        vespa_document_index: VespaDocumentIndex,\n        opensearch_client: OpenSearchIndexClient,\n        test_embedding_dimension: int,\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests that all chunks are migrated from Vespa to OpenSearch.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        document_chunks: dict[str, list[dict[str, Any]]] = {\n            document.id: [\n                _create_raw_document_chunk(\n                    document_id=document.id,\n                    chunk_index=i,\n                    content=f\"Test content {i} for {document.id}\",\n                    embedding=_generate_test_vector(test_embedding_dimension),\n                    now=datetime.now(),\n                    title=f\"Test title {document.id}\",\n                    title_embedding=_generate_test_vector(test_embedding_dimension),\n                )\n                for i in range(CHUNK_COUNT)\n            ]\n            for document in test_documents\n        }\n        all_chunks: list[dict[str, Any]] = []\n        for chunks in document_chunks.values():\n            all_chunks.extend(chunks)\n        vespa_document_index.index_raw_chunks(all_chunks)\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n\n        # Under test.\n        result = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=tenant_state.tenant_id\n        )\n\n        # Postcondition.\n        assert result is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n        # Verify tenant migration record was updated.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n    def test_chunk_migration_resumes_from_continuation_token(\n        self,\n        db_session: Session,\n        test_documents: list[Document],\n        vespa_document_index: VespaDocumentIndex,\n        opensearch_client: OpenSearchIndexClient,\n        test_embedding_dimension: int,\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Tests that chunk migration resumes from a saved continuation token.\n\n        Simulates task time running out my mocking the locking behavior.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        document_chunks: dict[str, list[dict[str, Any]]] = {\n            document.id: [\n                _create_raw_document_chunk(\n                    document_id=document.id,\n                    chunk_index=i,\n                    content=f\"Test content {i} for {document.id}\",\n                    embedding=_generate_test_vector(test_embedding_dimension),\n                    now=datetime.now(),\n                    title=f\"Test title {document.id}\",\n                    title_embedding=_generate_test_vector(test_embedding_dimension),\n                )\n                for i in range(CHUNK_COUNT)\n            ]\n            for document in test_documents\n        }\n        all_chunks: list[dict[str, Any]] = []\n        for chunks in document_chunks.values():\n            all_chunks.extend(chunks)\n        vespa_document_index.index_raw_chunks(all_chunks)\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n\n        # Run the initial batch. To simulate partial progress we will mock the\n        # redis lock to return True for the first invocation of .owned() and\n        # False subsequently.\n        mock_redis_client = Mock()\n        mock_lock = Mock()\n        mock_lock.owned.side_effect = [True, False, False]\n        mock_lock.acquire.return_value = True\n        mock_redis_client.lock.return_value = mock_lock\n        with patch(\n            \"onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client\",\n            return_value=mock_redis_client,\n        ):\n            result_1 = migrate_chunks_from_vespa_to_opensearch_task(\n                tenant_id=tenant_state.tenant_id\n            )\n\n        assert result_1 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n\n        # Verify partial progress was saved.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        partial_chunks_migrated = tenant_record.total_chunks_migrated\n        assert partial_chunks_migrated > 0\n        assert tenant_record.vespa_visit_continuation_token is not None\n        # Slices are not necessarily evenly distributed across all document\n        # chunks so we can't test that every token is non-None, but certainly at\n        # least one must be.\n        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())\n        assert tenant_record.migration_completed_at is None\n        assert tenant_record.approx_chunk_count_in_vespa is not None\n\n        # Under test.\n        # Run the remainder of the migration.\n        result_2 = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=tenant_state.tenant_id\n        )\n\n        # Postcondition.\n        assert result_2 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n\n        # Verify completion.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated > partial_chunks_migrated\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n    def test_chunk_migration_visits_all_chunks_even_when_batch_size_varies(\n        self,\n        db_session: Session,\n        test_documents: list[Document],\n        vespa_document_index: VespaDocumentIndex,\n        opensearch_client: OpenSearchIndexClient,\n        test_embedding_dimension: int,\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests that chunk migration works correctly even when the batch size\n        changes halfway through a migration.\n\n        Simulates task time running out my mocking the locking behavior.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        document_chunks: dict[str, list[dict[str, Any]]] = {\n            document.id: [\n                _create_raw_document_chunk(\n                    document_id=document.id,\n                    chunk_index=i,\n                    content=f\"Test content {i} for {document.id}\",\n                    embedding=_generate_test_vector(test_embedding_dimension),\n                    now=datetime.now(),\n                    title=f\"Test title {document.id}\",\n                    title_embedding=_generate_test_vector(test_embedding_dimension),\n                )\n                for i in range(CHUNK_COUNT)\n            ]\n            for document in test_documents\n        }\n        all_chunks: list[dict[str, Any]] = []\n        for chunks in document_chunks.values():\n            all_chunks.extend(chunks)\n        vespa_document_index.index_raw_chunks(all_chunks)\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n\n        # Run the initial batch. To simulate partial progress we will mock the\n        # redis lock to return True for the first invocation of .owned() and\n        # False subsequently.\n        # NOTE: The batch size is currently set to 5 in\n        # patch_get_vespa_chunks_page_size.\n        mock_redis_client = Mock()\n        mock_lock = Mock()\n        mock_lock.owned.side_effect = [True, False, False]\n        mock_lock.acquire.return_value = True\n        mock_redis_client.lock.return_value = mock_lock\n        with patch(\n            \"onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client\",\n            return_value=mock_redis_client,\n        ):\n            result_1 = migrate_chunks_from_vespa_to_opensearch_task(\n                tenant_id=tenant_state.tenant_id\n            )\n\n        assert result_1 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n\n        # Verify partial progress was saved.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        partial_chunks_migrated = tenant_record.total_chunks_migrated\n        assert partial_chunks_migrated > 0\n        # page_size applies per slice, so one iteration can fetch up to\n        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.\n        assert partial_chunks_migrated <= 5 * GET_VESPA_CHUNKS_SLICE_COUNT\n        assert tenant_record.vespa_visit_continuation_token is not None\n        # Slices are not necessarily evenly distributed across all document\n        # chunks so we can't test that every token is non-None, but certainly at\n        # least one must be.\n        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())\n        assert tenant_record.migration_completed_at is None\n        assert tenant_record.approx_chunk_count_in_vespa is not None\n\n        # Under test.\n        # Now patch the batch size to be some other number, like 2.\n        mock_redis_client = Mock()\n        mock_lock = Mock()\n        mock_lock.owned.side_effect = [True, False, False]\n        mock_lock.acquire.return_value = True\n        mock_redis_client.lock.return_value = mock_lock\n        with (\n            patch(\n                \"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE\",\n                2,\n            ),\n            patch(\n                \"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE\",\n                2,\n            ),\n            patch(\n                \"onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client\",\n                return_value=mock_redis_client,\n            ),\n        ):\n            result_2 = migrate_chunks_from_vespa_to_opensearch_task(\n                tenant_id=tenant_state.tenant_id\n            )\n\n        # Postcondition.\n        assert result_2 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n\n        # Verify next partial progress was saved.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        new_partial_chunks_migrated = tenant_record.total_chunks_migrated\n        assert new_partial_chunks_migrated > partial_chunks_migrated\n        # page_size applies per slice, so one iteration can fetch up to\n        # page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.\n        assert new_partial_chunks_migrated <= (5 + 2) * GET_VESPA_CHUNKS_SLICE_COUNT\n        assert tenant_record.vespa_visit_continuation_token is not None\n        # Slices are not necessarily evenly distributed across all document\n        # chunks so we can't test that every token is non-None, but certainly at\n        # least one must be.\n        assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())\n        assert tenant_record.migration_completed_at is None\n        assert tenant_record.approx_chunk_count_in_vespa is not None\n\n        # Under test.\n        # Run the remainder of the migration.\n        with (\n            patch(\n                \"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE\",\n                2,\n            ),\n            patch(\n                \"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE\",\n                2,\n            ),\n        ):\n            result_3 = migrate_chunks_from_vespa_to_opensearch_task(\n                tenant_id=tenant_state.tenant_id\n            )\n\n        # Postcondition.\n        assert result_3 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n\n        # Verify completion.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated > new_partial_chunks_migrated\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n    def test_chunk_migration_empty_vespa(\n        self,\n        db_session: Session,\n        # Get this just to ensure Vespa is clean from previous test runs.\n        test_documents: list[Document],  # noqa: ARG002\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests that chunk migration completes without error when Vespa is empty.\n        \"\"\"\n        # Under test.\n        # No chunks in Vespa.\n        result = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=get_current_tenant_id()\n        )\n\n        # Postcondition.\n        assert result is True\n        db_session.expire_all()\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated == 0\n        # Visit is complete so continuation token should be marked as done for all slices.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        # Mark migration as completed even for empty Vespa.\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == 0\n\n    def test_chunk_migration_updates_existing_chunks(\n        self,\n        db_session: Session,\n        test_documents: list[Document],\n        vespa_document_index: VespaDocumentIndex,\n        opensearch_client: OpenSearchIndexClient,\n        test_embedding_dimension: int,\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests that the migration task updates existing chunks in OpenSearch if\n        they already exist.\n\n        Chunks existing in the index is not a failure mode as the document may\n        have been dual indexed. Since dual indexing indexes into Vespa first, we\n        can assume that the state of the chunk we want to migrate is the most\n        up-to-date.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        document_chunks: dict[str, list[dict[str, Any]]] = {\n            document.id: [\n                _create_raw_document_chunk(\n                    document_id=document.id,\n                    chunk_index=i,\n                    content=f\"Test content {i} for {document.id}\",\n                    embedding=_generate_test_vector(test_embedding_dimension),\n                    now=datetime.now(),\n                    title=f\"Test title {document.id}\",\n                    title_embedding=_generate_test_vector(test_embedding_dimension),\n                )\n                for i in range(CHUNK_COUNT)\n            ]\n            for document in test_documents\n        }\n        all_chunks: list[dict[str, Any]] = []\n        for chunks in document_chunks.values():\n            all_chunks.extend(chunks)\n        vespa_document_index.index_raw_chunks(all_chunks)\n        # Index the first document into OpenSearch with some different content.\n        document_in_opensearch = deepcopy(document_chunks[test_documents[0].id])\n        for chunk in document_in_opensearch:\n            chunk[\"content\"] = (\n                f\"Different content {chunk[CHUNK_ID]} for {test_documents[0].id}\"\n            )\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n        chunks_for_document_in_opensearch, _ = (\n            transform_vespa_chunks_to_opensearch_chunks(\n                document_in_opensearch,\n                tenant_state,\n                {},\n            )\n        )\n        opensearch_client.bulk_index_documents(\n            documents=chunks_for_document_in_opensearch,\n            tenant_state=tenant_state,\n            update_if_exists=True,\n        )\n\n        # Under test.\n        result = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=tenant_state.tenant_id\n        )\n\n        # Postcondition.\n        assert result is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n    def test_chunk_migration_noops_when_migration_is_complete(\n        self,\n        db_session: Session,\n        test_documents: list[Document],\n        vespa_document_index: VespaDocumentIndex,\n        opensearch_client: OpenSearchIndexClient,\n        test_embedding_dimension: int,\n        clean_migration_tables: None,  # noqa: ARG002\n        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests that the migration task no-ops when the migration is complete.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        document_chunks: dict[str, list[dict[str, Any]]] = {\n            document.id: [\n                _create_raw_document_chunk(\n                    document_id=document.id,\n                    chunk_index=i,\n                    content=f\"Test content {i} for {document.id}\",\n                    embedding=_generate_test_vector(test_embedding_dimension),\n                    now=datetime.now(),\n                    title=f\"Test title {document.id}\",\n                    title_embedding=_generate_test_vector(test_embedding_dimension),\n                )\n                for i in range(CHUNK_COUNT)\n            ]\n            for document in test_documents\n        }\n        all_chunks: list[dict[str, Any]] = []\n        for chunks in document_chunks.values():\n            all_chunks.extend(chunks)\n        vespa_document_index.index_raw_chunks(all_chunks)\n        tenant_state = TenantState(\n            tenant_id=get_current_tenant_id(), multitenant=MULTI_TENANT\n        )\n\n        # Under test.\n        # First run.\n        result_1 = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=tenant_state.tenant_id\n        )\n\n        # Postcondition.\n        assert result_1 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n        # Under test.\n        # Second run.\n        result_2 = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=tenant_state.tenant_id\n        )\n\n        # Postcondition.\n        assert result_2 is True\n        # Expire the session cache to see the committed changes from the task.\n        db_session.expire_all()\n        # This all should be unchanged.\n        tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()\n        assert tenant_record is not None\n        assert tenant_record.total_chunks_migrated == len(all_chunks)\n        # Visit is complete so continuation token should be None.\n        assert tenant_record.vespa_visit_continuation_token is not None\n        assert is_continuation_token_done_for_all_slices(\n            json.loads(tenant_record.vespa_visit_continuation_token)\n        )\n        assert tenant_record.migration_completed_at is not None\n        assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)\n\n        # Verify chunks were indexed in OpenSearch.\n        for document in test_documents:\n            opensearch_chunks = _get_document_chunks_from_opensearch(\n                opensearch_client, document.id, tenant_state\n            )\n            assert len(opensearch_chunks) == CHUNK_COUNT\n            opensearch_chunks.sort(key=lambda x: x.chunk_index)\n            for opensearch_chunk in opensearch_chunks:\n                _assert_chunk_matches_vespa_chunk(\n                    opensearch_chunk,\n                    document_chunks[document.id][opensearch_chunk.chunk_index],\n                )\n\n    def test_returns_none_when_feature_disabled(\n        self,\n        disable_opensearch_indexing_for_onyx: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Tests that task returns None when feature is disabled.\"\"\"\n        # Under test.\n        result = migrate_chunks_from_vespa_to_opensearch_task(\n            tenant_id=get_current_tenant_id()\n        )\n\n        # Postcondition.\n        assert result is None\n\n    def test_vespa_get_chunk_count(\n        self,\n        vespa_document_index: VespaDocumentIndex,\n        test_embedding_dimension: int,\n    ) -> None:\n        \"\"\"\n        Tests that the VespaDocumentIndex.get_chunk_count() method returns the\n        correct number of chunks.\n        \"\"\"\n        # Precondition.\n        # Index chunks into Vespa.\n        all_chunks = [\n            _create_raw_document_chunk(\n                document_id=\"test_doc_1\",\n                chunk_index=i,\n                content=f\"Test content {i} for test_doc_1\",\n                embedding=_generate_test_vector(test_embedding_dimension),\n                now=datetime.now(),\n                title=f\"Test title {i}\",\n                title_embedding=_generate_test_vector(test_embedding_dimension),\n            )\n            for i in range(500)\n        ]\n        vespa_document_index.index_raw_chunks(all_chunks)\n\n        # Under test.\n        chunk_count = vespa_document_index.get_chunk_count()\n\n        # Postcondition.\n        assert chunk_count == len(all_chunks)\n\n\nclass TestSanitizedDocIdResolution:\n    \"\"\"Tests document ID resolution functions.\"\"\"\n\n    def test_resolve_sanitized_document_ids_batch_normal(\n        self,\n        db_session: Session,\n        test_documents: list[Document],  # noqa: ARG002\n    ) -> None:\n        \"\"\"\n        Tests batch resolution for normal document IDs (no sanitization needed).\n        \"\"\"\n        # Under test.\n        result = build_sanitized_to_original_doc_id_mapping(db_session)\n\n        # Postcondition.\n        # Since we expect no IDs in test_documents to need sanitization, the\n        # result should be empty.\n        assert not result\n\n    def test_resolve_sanitized_document_ids_batch_with_quotes(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"Tests batch resolution for a document ID containing single quotes.\"\"\"\n        # Precondition.\n        # Create a document with a single quote in its ID.\n        original_id = \"test_doc_with'quote\"\n        sanitized_id = \"test_doc_with_quote\"\n        document = Document(\n            id=original_id,\n            semantic_id=original_id,\n            chunk_count=1,\n        )\n        try:\n            db_session.add(document)\n            db_session.commit()\n\n            # Under test.\n            result = build_sanitized_to_original_doc_id_mapping(db_session)\n\n            # Postcondition.\n            assert len(result) == 1\n            # The sanitized version should map to the original.\n            assert sanitized_id in result\n            assert result[sanitized_id] == original_id\n\n        finally:\n            _delete_test_documents_with_commit(db_session, [document])\n\n    def test_raises_when_sanitized_id_matches_another_document(\n        self,\n        db_session: Session,\n    ) -> None:\n        \"\"\"\n        Tests that the function raises when a sanitized ID matches another\n        document's original ID.\n        \"\"\"\n        # Precondition.\n        # Create a document with a single quote in its ID, and another document\n        # with that string as its ID.\n        original_id = \"test_doc_with'quote\"\n        sanitized_id = \"test_doc_with_quote\"\n        document_bad = Document(\n            id=original_id,\n            semantic_id=original_id,\n            chunk_count=1,\n        )\n        document_fine = Document(\n            id=sanitized_id,\n            semantic_id=sanitized_id,\n            chunk_count=1,\n        )\n        try:\n            db_session.add(document_bad)\n            db_session.add(document_fine)\n            db_session.commit()\n\n            # Under test.\n            with pytest.raises(RuntimeError):\n                build_sanitized_to_original_doc_id_mapping(db_session)\n\n        finally:\n            _delete_test_documents_with_commit(\n                db_session, [document_bad, document_fine]\n            )\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/permission_sync/test_doc_permission_sync_attempt.py",
    "content": "\"\"\"\nTest suite for DocPermissionSyncAttempt CRUD operations.\n\nTests the basic CRUD operations for document permission sync attempts,\nincluding creation, status updates, progress tracking, and querying.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt\nfrom onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt\nfrom onyx.db.permission_sync_attempt import get_doc_permission_sync_attempt\nfrom onyx.db.permission_sync_attempt import (\n    get_recent_doc_permission_sync_attempts_for_cc_pair,\n)\nfrom onyx.db.permission_sync_attempt import mark_doc_permission_sync_attempt_failed\nfrom onyx.db.permission_sync_attempt import (\n    mark_doc_permission_sync_attempt_in_progress,\n)\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef _create_test_connector_credential_pair(\n    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE\n) -> ConnectorCredentialPair:\n    \"\"\"Create a test connector credential pair for testing.\"\"\"\n    user = create_test_user(db_session, \"test_user\")\n\n    connector = Connector(\n        name=f\"Test {source.value} Connector\",\n        source=source,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n        refresh_freq=None,\n        prune_freq=None,\n        indexing_start=datetime.now(timezone.utc),\n    )\n    db_session.add(connector)\n    db_session.flush()\n\n    credential = Credential(\n        credential_json={},\n        user_id=user.id,\n        admin_public=True,\n    )\n    db_session.add(credential)\n    db_session.flush()\n    # Expire the credential so it reloads from DB with SensitiveValue wrapper\n    db_session.expire(credential)\n\n    cc_pair = ConnectorCredentialPair(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        name=\"Test CC Pair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        access_type=AccessType.PUBLIC,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n\n    return cc_pair\n\n\nclass TestDocPermissionSyncAttempt:\n    def test_create_doc_permission_sync_attempt(self, db_session: Session) -> None:\n        \"\"\"Test creating a new doc permission sync attempt.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        attempt_id = create_doc_permission_sync_attempt(\n            connector_credential_pair_id=cc_pair.id,\n            db_session=db_session,\n        )\n\n        assert attempt_id is not None\n        assert isinstance(attempt_id, int)\n\n        # Verify the attempt was created with correct defaults\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.connector_credential_pair_id == cc_pair.id\n        assert attempt.status == PermissionSyncStatus.NOT_STARTED\n        assert attempt.total_docs_synced == 0\n        assert attempt.docs_with_permission_errors == 0\n        assert attempt.time_started is None\n        assert attempt.time_finished is None\n        assert attempt.time_created is not None\n\n    def test_get_doc_permission_sync_attempt(self, db_session: Session) -> None:\n        \"\"\"Test retrieving a doc permission sync attempt by ID.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Test basic retrieval\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.id == attempt_id\n\n        # Test with eager loading\n        attempt_with_connector = get_doc_permission_sync_attempt(\n            db_session, attempt_id, eager_load_connector=True\n        )\n        assert attempt_with_connector is not None\n        assert attempt_with_connector.connector_credential_pair is not None\n        assert attempt_with_connector.connector_credential_pair.id == cc_pair.id\n\n        # Test non-existent ID\n        non_existent_attempt = get_doc_permission_sync_attempt(db_session, 99999)\n        assert non_existent_attempt is None\n\n    def test_mark_doc_permission_sync_attempt_in_progress(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test marking a doc permission sync attempt as in progress.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress\n        updated_attempt = mark_doc_permission_sync_attempt_in_progress(\n            attempt_id, db_session\n        )\n\n        assert updated_attempt.status == PermissionSyncStatus.IN_PROGRESS\n        assert updated_attempt.time_started is not None\n        assert updated_attempt.time_finished is None\n\n        # Verify it fails if already in progress\n        with pytest.raises(RuntimeError, match=\"not in NOT_STARTED status\"):\n            mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n\n    def test_mark_doc_permission_sync_attempt_failed(self, db_session: Session) -> None:\n        \"\"\"Test marking a doc permission sync attempt as failed.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as failed with error message (should work even without starting)\n        error_msg = \"Sync process crashed unexpectedly\"\n        mark_doc_permission_sync_attempt_failed(\n            attempt_id, db_session, error_message=error_msg\n        )\n\n        # Verify the status and timestamps\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status == PermissionSyncStatus.FAILED\n        assert attempt.time_started is not None\n        assert attempt.time_finished is not None\n        assert attempt.error_message == error_msg\n\n    def test_get_recent_doc_permission_sync_attempts_for_cc_pair(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test retrieving recent doc permission sync attempts for a connector credential pair.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Create multiple attempts\n        attempt_ids = []\n        for i in range(5):\n            attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n            attempt_ids.append(attempt_id)\n\n        # Get recent attempts\n        recent_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            limit=3,\n            db_session=db_session,\n        )\n\n        assert len(recent_attempts) == 3\n\n        # Verify they are ordered by time_created descending (most recent first)\n        for i in range(len(recent_attempts) - 1):\n            assert (\n                recent_attempts[i].time_created >= recent_attempts[i + 1].time_created\n            )\n\n        # Verify they all belong to the correct cc_pair\n        for attempt in recent_attempts:\n            assert attempt.connector_credential_pair_id == cc_pair.id\n\n        # Test with different cc_pair (should return empty)\n        other_cc_pair = _create_test_connector_credential_pair(\n            db_session, source=DocumentSource.SLACK\n        )\n        other_attempts = get_recent_doc_permission_sync_attempts_for_cc_pair(\n            cc_pair_id=other_cc_pair.id,\n            limit=10,\n            db_session=db_session,\n        )\n        assert len(other_attempts) == 0\n\n    def test_status_enum_methods(self, db_session: Session) -> None:\n        \"\"\"Test the status enum helper methods.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Test NOT_STARTED status\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert not attempt.status.is_terminal()\n        assert not attempt.status.is_successful()\n\n        # Test IN_PROGRESS status\n        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert not attempt.status.is_terminal()\n        assert not attempt.status.is_successful()\n\n        # Test SUCCESS status via complete function\n        complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_docs_synced=100,\n            docs_with_permission_errors=0,\n        )\n        attempt = get_doc_permission_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status.is_terminal()\n        assert attempt.status.is_successful()\n\n        # Test FAILED status (create new attempt)\n        failed_attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n        mark_doc_permission_sync_attempt_failed(\n            failed_attempt_id, db_session, error_message=\"Test failure\"\n        )\n        failed_attempt = get_doc_permission_sync_attempt(db_session, failed_attempt_id)\n        assert failed_attempt is not None\n        assert failed_attempt.status.is_terminal()\n        assert not failed_attempt.status.is_successful()\n\n        # Test COMPLETED_WITH_ERRORS status via complete function (create new attempt)\n        error_attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n        mark_doc_permission_sync_attempt_in_progress(error_attempt_id, db_session)\n        complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=error_attempt_id,\n            total_docs_synced=100,\n            docs_with_permission_errors=10,\n        )\n        error_attempt = get_doc_permission_sync_attempt(db_session, error_attempt_id)\n        assert error_attempt is not None\n        assert error_attempt.status.is_terminal()\n        assert (\n            error_attempt.status.is_successful()\n        )  # Completed with errors is still \"successful\"\n\n    def test_complete_doc_permission_sync_attempt_success(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test completing a doc permission sync attempt without errors.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress first\n        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete without errors\n        completed_attempt = complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_docs_synced=100,\n            docs_with_permission_errors=0,\n        )\n\n        assert completed_attempt.status == PermissionSyncStatus.SUCCESS\n        assert completed_attempt.total_docs_synced == 100\n        assert completed_attempt.docs_with_permission_errors == 0\n        assert completed_attempt.time_finished is not None\n\n    def test_complete_doc_permission_sync_attempt_with_errors(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test completing a doc permission sync attempt with errors.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress first\n        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete with errors\n        completed_attempt = complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_docs_synced=100,\n            docs_with_permission_errors=15,\n        )\n\n        assert completed_attempt.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        assert completed_attempt.total_docs_synced == 100\n        assert completed_attempt.docs_with_permission_errors == 15\n        assert completed_attempt.time_finished is not None\n\n    def test_complete_doc_permission_sync_attempt_can_be_called_multiple_times(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that complete can be called multiple times if needed (accumulates correctly).\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_doc_permission_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress\n        mark_doc_permission_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete once\n        first_complete = complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_docs_synced=50,\n            docs_with_permission_errors=5,\n        )\n\n        # Verify first completion\n        assert first_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        assert first_complete.total_docs_synced == 50\n        assert first_complete.docs_with_permission_errors == 5\n        assert first_complete.time_finished is not None\n\n        # Call complete again (simulating additional batch processing)\n        second_complete = complete_doc_permission_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_docs_synced=50,\n            docs_with_permission_errors=10,\n        )\n\n        # Should accumulate progress from both calls\n        assert second_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        assert second_complete.total_docs_synced == 100\n        assert second_complete.docs_with_permission_errors == 15\n        assert second_complete.time_finished is not None\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/permission_sync/test_external_group_permission_sync_attempt.py",
    "content": "\"\"\"\nTest suite for ExternalGroupPermissionSyncAttempt CRUD operations.\n\nTests the basic CRUD operations for external group permission sync attempts,\nincluding creation, status updates, progress tracking, and querying.\nSupports both connector-specific and global group sync attempts.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.models import Connector\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import Credential\nfrom onyx.db.models import ExternalGroupPermissionSyncAttempt\nfrom onyx.db.permission_sync_attempt import (\n    complete_external_group_sync_attempt,\n)\nfrom onyx.db.permission_sync_attempt import (\n    create_external_group_sync_attempt,\n)\nfrom onyx.db.permission_sync_attempt import (\n    get_external_group_sync_attempt,\n)\nfrom onyx.db.permission_sync_attempt import (\n    get_recent_external_group_sync_attempts_for_cc_pair,\n)\nfrom onyx.db.permission_sync_attempt import (\n    mark_external_group_sync_attempt_failed,\n)\nfrom onyx.db.permission_sync_attempt import (\n    mark_external_group_sync_attempt_in_progress,\n)\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef _create_test_connector_credential_pair(\n    db_session: Session, source: DocumentSource = DocumentSource.GOOGLE_DRIVE\n) -> ConnectorCredentialPair:\n    \"\"\"Create a test connector credential pair for testing.\"\"\"\n    user = create_test_user(db_session, \"test_user\")\n\n    connector = Connector(\n        name=f\"Test {source.value} Connector\",\n        source=source,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={},\n        refresh_freq=None,\n        prune_freq=None,\n        indexing_start=datetime.now(timezone.utc),\n    )\n    db_session.add(connector)\n    db_session.flush()\n\n    credential = Credential(\n        credential_json={},\n        user_id=user.id,\n        admin_public=True,\n    )\n    db_session.add(credential)\n    db_session.flush()\n    # Expire the credential so it reloads from DB with SensitiveValue wrapper\n    db_session.expire(credential)\n\n    cc_pair = ConnectorCredentialPair(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        name=\"Test CC Pair\",\n        status=ConnectorCredentialPairStatus.ACTIVE,\n        access_type=AccessType.PUBLIC,\n    )\n    db_session.add(cc_pair)\n    db_session.commit()\n\n    return cc_pair\n\n\ndef _cleanup_global_external_group_sync_attempts(db_session: Session) -> None:\n    \"\"\"Clean up any existing global external group sync attempts from previous test runs.\"\"\"\n    # Delete all global attempts (where connector_credential_pair_id is None)\n    db_session.query(ExternalGroupPermissionSyncAttempt).filter(\n        ExternalGroupPermissionSyncAttempt.connector_credential_pair_id.is_(None)\n    ).delete()\n    db_session.commit()\n\n\nclass TestExternalGroupPermissionSyncAttempt:\n    def test_create_external_group_sync_attempt_with_cc_pair(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test creating a new external group sync attempt for a specific connector.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        attempt_id = create_external_group_sync_attempt(\n            connector_credential_pair_id=cc_pair.id,\n            db_session=db_session,\n        )\n\n        assert attempt_id is not None\n        assert isinstance(attempt_id, int)\n\n        # Verify the attempt was created with correct defaults\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.connector_credential_pair_id == cc_pair.id\n        assert attempt.status == PermissionSyncStatus.NOT_STARTED\n        assert attempt.total_users_processed == 0\n        assert attempt.total_groups_processed == 0\n        assert attempt.total_group_memberships_synced == 0\n        assert attempt.time_started is None\n        assert attempt.time_finished is None\n        assert attempt.time_created is not None\n\n    def test_create_global_external_group_sync_attempt(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test creating a new global external group sync attempt.\"\"\"\n        attempt_id = create_external_group_sync_attempt(\n            connector_credential_pair_id=None,  # Global sync\n            db_session=db_session,\n        )\n\n        assert attempt_id is not None\n        assert isinstance(attempt_id, int)\n\n        # Verify the attempt was created as global\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.connector_credential_pair_id is None\n        assert attempt.status == PermissionSyncStatus.NOT_STARTED\n\n    def test_get_external_group_sync_attempt(self, db_session: Session) -> None:\n        \"\"\"Test retrieving an external group sync attempt by ID.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Test basic retrieval\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.id == attempt_id\n\n        # Test with eager loading\n        attempt_with_connector = get_external_group_sync_attempt(\n            db_session, attempt_id, eager_load_connector=True\n        )\n        assert attempt_with_connector is not None\n        assert attempt_with_connector.connector_credential_pair is not None\n        assert attempt_with_connector.connector_credential_pair.id == cc_pair.id\n\n        # Test non-existent ID\n        non_existent_attempt = get_external_group_sync_attempt(db_session, 99999)\n        assert non_existent_attempt is None\n\n    def test_mark_external_group_sync_attempt_in_progress(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test marking an external group sync attempt as in progress.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress\n        updated_attempt = mark_external_group_sync_attempt_in_progress(\n            attempt_id, db_session\n        )\n\n        assert updated_attempt.status == PermissionSyncStatus.IN_PROGRESS\n        assert updated_attempt.time_started is not None\n        assert updated_attempt.time_finished is None\n\n        # Verify it fails if already in progress\n        with pytest.raises(RuntimeError, match=\"not in NOT_STARTED status\"):\n            mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n\n    def test_mark_external_group_sync_attempt_failed(self, db_session: Session) -> None:\n        \"\"\"Test marking an external group sync attempt as failed.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as failed with error message (should work even without starting)\n        error_msg_1 = \"External group sync service unavailable\"\n        mark_external_group_sync_attempt_failed(\n            attempt_id, db_session, error_message=error_msg_1\n        )\n\n        # Verify the status and timestamps\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status == PermissionSyncStatus.FAILED\n        assert attempt.time_started is not None  # Should be set if not already set\n        assert attempt.time_finished is not None\n        assert attempt.error_message == error_msg_1\n\n        # Test with error message\n        attempt_id_2 = create_external_group_sync_attempt(cc_pair.id, db_session)\n        error_msg = \"Connection timeout to external service\"\n        mark_external_group_sync_attempt_failed(\n            attempt_id_2, db_session, error_message=error_msg\n        )\n\n        # Verify the error message was stored\n        attempt_2 = get_external_group_sync_attempt(db_session, attempt_id_2)\n        assert attempt_2 is not None\n        assert attempt_2.status == PermissionSyncStatus.FAILED\n        assert attempt_2.error_message == error_msg\n\n    def test_get_recent_external_group_sync_attempts_for_cc_pair(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test retrieving recent external group sync attempts for a connector credential pair.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Create multiple attempts for the cc_pair\n        attempt_ids = []\n        for i in range(5):\n            attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n            attempt_ids.append(attempt_id)\n\n        # Get recent attempts\n        recent_attempts = get_recent_external_group_sync_attempts_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            limit=3,\n            db_session=db_session,\n        )\n\n        assert len(recent_attempts) == 3\n\n        # Verify they are ordered by time_created descending (most recent first)\n        for i in range(len(recent_attempts) - 1):\n            assert (\n                recent_attempts[i].time_created >= recent_attempts[i + 1].time_created\n            )\n\n        # Verify they all belong to the correct cc_pair\n        for attempt in recent_attempts:\n            assert attempt.connector_credential_pair_id == cc_pair.id\n\n        # Test with different cc_pair (should return empty)\n        other_cc_pair = _create_test_connector_credential_pair(\n            db_session, source=DocumentSource.SLACK\n        )\n        other_attempts = get_recent_external_group_sync_attempts_for_cc_pair(\n            cc_pair_id=other_cc_pair.id,\n            limit=10,\n            db_session=db_session,\n        )\n        assert len(other_attempts) == 0\n\n    def test_get_recent_global_external_group_sync_attempts(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test retrieving recent global external group sync attempts.\"\"\"\n        # Clean up any existing global attempts from previous test runs\n        _cleanup_global_external_group_sync_attempts(db_session)\n\n        # Create a cc_pair specific attempt\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Create multiple global attempts\n        global_attempt_ids = []\n        for i in range(3):\n            attempt_id = create_external_group_sync_attempt(None, db_session)  # Global\n            global_attempt_ids.append(attempt_id)\n\n        # Get recent global attempts\n        recent_global_attempts = get_recent_external_group_sync_attempts_for_cc_pair(\n            cc_pair_id=None,  # Global\n            limit=5,\n            db_session=db_session,\n        )\n\n        assert len(recent_global_attempts) == 3\n\n        # Verify they are all global (cc_pair_id is None)\n        for attempt in recent_global_attempts:\n            assert attempt.connector_credential_pair_id is None\n\n        # Verify they are ordered by time_created descending\n        for i in range(len(recent_global_attempts) - 1):\n            assert (\n                recent_global_attempts[i].time_created\n                >= recent_global_attempts[i + 1].time_created\n            )\n\n    def test_status_enum_methods(self, db_session: Session) -> None:\n        \"\"\"Test the status enum helper methods.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Test NOT_STARTED status\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert not attempt.status.is_terminal()\n        assert not attempt.status.is_successful()\n\n        # Test IN_PROGRESS status\n        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert not attempt.status.is_terminal()\n        assert not attempt.status.is_successful()\n\n        # Test SUCCESS status via complete function\n        complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=100,\n            total_groups_processed=10,\n            total_group_memberships_synced=500,\n            errors_encountered=0,\n        )\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status.is_terminal()\n        assert attempt.status.is_successful()\n\n        # Test FAILED status (create new attempt)\n        failed_attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n        mark_external_group_sync_attempt_failed(\n            failed_attempt_id, db_session, error_message=\"Test failure\"\n        )\n        failed_attempt = get_external_group_sync_attempt(db_session, failed_attempt_id)\n        assert failed_attempt is not None\n        assert failed_attempt.status.is_terminal()\n        assert not failed_attempt.status.is_successful()\n\n        # Test COMPLETED_WITH_ERRORS status via complete function (create new attempt)\n        error_attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n        mark_external_group_sync_attempt_in_progress(error_attempt_id, db_session)\n        complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=error_attempt_id,\n            total_users_processed=100,\n            total_groups_processed=10,\n            total_group_memberships_synced=500,\n            errors_encountered=5,\n        )\n        error_attempt = get_external_group_sync_attempt(db_session, error_attempt_id)\n        assert error_attempt is not None\n        assert error_attempt.status.is_terminal()\n        assert (\n            error_attempt.status.is_successful()\n        )  # Completed with errors is still \"successful\"\n\n    def test_complete_external_group_sync_attempt_success(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test completing an external group sync attempt without errors.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress first\n        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete without errors\n        completed_attempt = complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=500,\n            total_groups_processed=25,\n            total_group_memberships_synced=1200,\n            errors_encountered=0,\n        )\n\n        assert completed_attempt.status == PermissionSyncStatus.SUCCESS\n        assert completed_attempt.total_users_processed == 500\n        assert completed_attempt.total_groups_processed == 25\n        assert completed_attempt.total_group_memberships_synced == 1200\n        assert completed_attempt.time_finished is not None\n\n    def test_complete_external_group_sync_attempt_with_errors(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test completing an external group sync attempt with errors.\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress first\n        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete with errors\n        completed_attempt = complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=500,\n            total_groups_processed=25,\n            total_group_memberships_synced=1200,\n            errors_encountered=10,\n        )\n\n        assert completed_attempt.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        assert completed_attempt.total_users_processed == 500\n        assert completed_attempt.total_groups_processed == 25\n        assert completed_attempt.total_group_memberships_synced == 1200\n        assert completed_attempt.time_finished is not None\n\n    def test_complete_external_group_sync_attempt_can_be_called_multiple_times(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that complete can be called multiple times if needed (accumulates correctly).\"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Mark as in progress\n        mark_external_group_sync_attempt_in_progress(attempt_id, db_session)\n\n        # Complete once\n        first_complete = complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=200,\n            total_groups_processed=10,\n            total_group_memberships_synced=600,\n            errors_encountered=0,\n        )\n\n        # Verify first completion\n        assert first_complete.status == PermissionSyncStatus.SUCCESS\n        assert first_complete.total_users_processed == 200\n        assert first_complete.total_groups_processed == 10\n        assert first_complete.total_group_memberships_synced == 600\n        assert first_complete.time_finished is not None\n\n        # Call complete again (simulating additional batch processing)\n        second_complete = complete_external_group_sync_attempt(\n            db_session=db_session,\n            attempt_id=attempt_id,\n            total_users_processed=300,\n            total_groups_processed=15,\n            total_group_memberships_synced=600,\n            errors_encountered=5,\n        )\n\n        # Should accumulate progress from both calls and update status\n        assert second_complete.status == PermissionSyncStatus.COMPLETED_WITH_ERRORS\n        assert second_complete.total_users_processed == 500\n        assert second_complete.total_groups_processed == 25\n        assert second_complete.total_group_memberships_synced == 1200\n        assert second_complete.time_finished is not None\n\n    def test_global_vs_connector_specific_attempts(self, db_session: Session) -> None:\n        \"\"\"Test that global and connector-specific attempts are properly separated.\"\"\"\n        # Clean up any existing global attempts from previous test runs\n        _cleanup_global_external_group_sync_attempts(db_session)\n\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Create connector-specific attempts\n        cc_attempt_1 = create_external_group_sync_attempt(cc_pair.id, db_session)\n        cc_attempt_2 = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Create global attempts\n        global_attempt_1 = create_external_group_sync_attempt(None, db_session)\n        global_attempt_2 = create_external_group_sync_attempt(None, db_session)\n\n        # Verify connector-specific attempts\n        cc_attempts = get_recent_external_group_sync_attempts_for_cc_pair(\n            cc_pair_id=cc_pair.id, limit=10, db_session=db_session\n        )\n        assert len(cc_attempts) == 2\n        cc_attempt_ids = {attempt.id for attempt in cc_attempts}\n        assert cc_attempt_ids == {cc_attempt_1, cc_attempt_2}\n\n        # Verify global attempts\n        global_attempts = get_recent_external_group_sync_attempts_for_cc_pair(\n            cc_pair_id=None, limit=10, db_session=db_session\n        )\n        assert len(global_attempts) == 2\n        global_attempt_ids = {attempt.id for attempt in global_attempts}\n        assert global_attempt_ids == {global_attempt_1, global_attempt_2}\n\n    def test_external_group_sync_attempt_not_stuck_on_early_failure(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that attempts transition to FAILED on early validation failures.\n\n        This tests the bug fix where attempts could get stuck in NOT_STARTED status\n        if validation checks failed after the attempt was created but before it was\n        marked as IN_PROGRESS.\n        \"\"\"\n        cc_pair = _create_test_connector_credential_pair(db_session)\n\n        # Create an attempt (simulating the start of a sync task)\n        attempt_id = create_external_group_sync_attempt(cc_pair.id, db_session)\n\n        # Verify it starts in NOT_STARTED\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status == PermissionSyncStatus.NOT_STARTED\n        assert attempt.error_message is None\n\n        # Simulate an early validation failure (e.g., missing sync config)\n        # In the actual code, this would be called by _fail_external_group_sync_attempt()\n        error_msg = \"No group sync config found for source\"\n        mark_external_group_sync_attempt_failed(\n            attempt_id, db_session, error_message=error_msg\n        )\n\n        # Verify the attempt transitions to FAILED (not stuck in NOT_STARTED)\n        attempt = get_external_group_sync_attempt(db_session, attempt_id)\n        assert attempt is not None\n        assert attempt.status == PermissionSyncStatus.FAILED\n        assert attempt.error_message == error_msg\n        assert attempt.time_started is not None  # Should be set even on early failure\n        assert attempt.time_finished is not None\n        assert attempt.status.is_terminal()\n        assert not attempt.status.is_successful()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/search_settings/test_search_settings.py",
    "content": "\"\"\"Tests that search settings with contextual RAG are properly propagated\nto the indexing pipeline's LLM configuration.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.context.search.models import SavedSearchSettings\nfrom onyx.context.search.models import SearchSettingsCreationRequest\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.db.llm import fetch_default_contextual_rag_model\nfrom onyx.db.llm import fetch_existing_llm_provider\nfrom onyx.db.llm import update_default_contextual_model\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import IndexModelStatus\nfrom onyx.db.search_settings import create_search_settings\nfrom onyx.db.swap_index import check_and_perform_index_swap\nfrom onyx.indexing.indexing_pipeline import IndexingPipelineResult\nfrom onyx.indexing.indexing_pipeline import run_indexing_pipeline\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom onyx.server.manage.search_settings import set_new_search_settings\nfrom onyx.server.manage.search_settings import update_saved_search_settings\n\n\nTEST_CONTEXTUAL_RAG_LLM_NAME = \"test-contextual-model\"\nTEST_CONTEXTUAL_RAG_LLM_PROVIDER = \"test-contextual-provider\"\n\nUPDATED_CONTEXTUAL_RAG_LLM_NAME = \"updated-contextual-model\"\nUPDATED_CONTEXTUAL_RAG_LLM_PROVIDER = \"updated-contextual-provider\"\n\n\ndef _create_llm_provider_and_model(\n    db_session: Session,\n    provider_name: str,\n    model_name: str,\n) -> None:\n    \"\"\"Insert an LLM provider with a single visible model configuration.\"\"\"\n    if fetch_existing_llm_provider(name=provider_name, db_session=db_session):\n        return\n    upsert_llm_provider(\n        LLMProviderUpsertRequest(\n            name=provider_name,\n            provider=\"openai\",\n            api_key=\"test-api-key\",\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=model_name,\n                    is_visible=True,\n                    max_input_tokens=4096,\n                )\n            ],\n        ),\n        db_session=db_session,\n    )\n\n\ndef _make_creation_request(\n    llm_name: str = TEST_CONTEXTUAL_RAG_LLM_NAME,\n    llm_provider: str = TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n    enable_contextual_rag: bool = True,\n) -> SearchSettingsCreationRequest:\n    return SearchSettingsCreationRequest(\n        model_name=\"test-embedding-model\",\n        model_dim=768,\n        normalize=True,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        provider_type=None,\n        index_name=None,\n        multipass_indexing=False,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n        reduced_dimension=None,\n        enable_contextual_rag=enable_contextual_rag,\n        contextual_rag_llm_name=llm_name,\n        contextual_rag_llm_provider=llm_provider,\n    )\n\n\ndef _make_saved_search_settings(\n    llm_name: str = TEST_CONTEXTUAL_RAG_LLM_NAME,\n    llm_provider: str = TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n    enable_contextual_rag: bool = True,\n) -> SavedSearchSettings:\n    return SavedSearchSettings(\n        model_name=\"test-embedding-model\",\n        model_dim=768,\n        normalize=True,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        provider_type=None,\n        index_name=\"test_index\",\n        multipass_indexing=False,\n        embedding_precision=EmbeddingPrecision.FLOAT,\n        reduced_dimension=None,\n        enable_contextual_rag=enable_contextual_rag,\n        contextual_rag_llm_name=llm_name,\n        contextual_rag_llm_provider=llm_provider,\n    )\n\n\ndef _run_indexing_pipeline_with_mocks(\n    mock_get_llm: MagicMock,\n    mock_index_handler: MagicMock,\n    db_session: Session,\n) -> None:\n    \"\"\"Call run_indexing_pipeline with all heavy dependencies mocked out.\"\"\"\n    mock_get_llm.return_value = MagicMock()\n    mock_index_handler.return_value = IndexingPipelineResult(\n        new_docs=0,\n        total_docs=0,\n        total_chunks=0,\n        failures=[],\n    )\n\n    run_indexing_pipeline(\n        document_batch=[],\n        request_id=None,\n        embedder=MagicMock(),\n        document_indices=[],\n        db_session=db_session,\n        tenant_id=\"public\",\n        adapter=MagicMock(),\n        chunker=MagicMock(chunk_token_limit=512),\n    )\n\n\n@pytest.fixture()\ndef baseline_search_settings(\n    tenant_context: None,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    \"\"\"Ensure a baseline PRESENT search settings row exists in the DB,\n    which is required before set_new_search_settings can be called.\"\"\"\n    baseline = _make_saved_search_settings(enable_contextual_rag=False)\n    create_search_settings(\n        search_settings=baseline,\n        db_session=db_session,\n        status=IndexModelStatus.PRESENT,\n    )\n    # Sync default contextual model to match PRESENT (clears any leftover state)\n    update_default_contextual_model(\n        db_session=db_session,\n        enable_contextual_rag=baseline.enable_contextual_rag,\n        contextual_rag_llm_provider=baseline.contextual_rag_llm_provider,\n        contextual_rag_llm_name=baseline.contextual_rag_llm_name,\n    )\n\n\n@patch(\"onyx.db.swap_index.get_all_document_indices\")\n@patch(\"onyx.server.manage.search_settings.get_all_document_indices\")\n@patch(\"onyx.server.manage.search_settings.get_default_document_index\")\n@patch(\"onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag\")\n@patch(\"onyx.indexing.indexing_pipeline.index_doc_batch_with_handler\")\ndef test_indexing_pipeline_uses_contextual_rag_settings_from_create(\n    mock_index_handler: MagicMock,\n    mock_get_llm: MagicMock,\n    mock_get_doc_index: MagicMock,  # noqa: ARG001\n    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001\n    mock_get_all_doc_indices: MagicMock,\n    baseline_search_settings: None,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    \"\"\"After creating FUTURE settings and swapping to PRESENT,\n    fetch_default_contextual_rag_model should match the PRESENT settings\n    and run_indexing_pipeline should call get_llm_for_contextual_rag.\"\"\"\n    _create_llm_provider_and_model(\n        db_session=db_session,\n        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,\n    )\n\n    set_new_search_settings(\n        search_settings_new=_make_creation_request(),\n        _=MagicMock(),\n        db_session=db_session,\n    )\n\n    # PRESENT still has contextual RAG disabled, so default should be None\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is None\n\n    # Swap FUTURE → PRESENT (with 0 cc-pairs, REINDEX swaps immediately)\n    mock_get_all_doc_indices.return_value = []\n    old_settings = check_and_perform_index_swap(db_session)\n    assert old_settings is not None, \"Swap should have occurred\"\n\n    # Now PRESENT has contextual RAG enabled, default should match\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is not None\n    assert default_model.name == TEST_CONTEXTUAL_RAG_LLM_NAME\n\n    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)\n\n    mock_get_llm.assert_called_once_with(\n        TEST_CONTEXTUAL_RAG_LLM_NAME,\n        TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n    )\n\n\n@patch(\"onyx.db.swap_index.get_all_document_indices\")\n@patch(\"onyx.server.manage.search_settings.get_all_document_indices\")\n@patch(\"onyx.server.manage.search_settings.get_default_document_index\")\n@patch(\"onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag\")\n@patch(\"onyx.indexing.indexing_pipeline.index_doc_batch_with_handler\")\ndef test_indexing_pipeline_uses_updated_contextual_rag_settings(\n    mock_index_handler: MagicMock,\n    mock_get_llm: MagicMock,\n    mock_get_doc_index: MagicMock,  # noqa: ARG001\n    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001\n    mock_get_all_doc_indices: MagicMock,\n    baseline_search_settings: None,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    \"\"\"After creating FUTURE settings, swapping to PRESENT, then updating\n    via update_saved_search_settings, run_indexing_pipeline should use\n    the updated LLM names.\"\"\"\n    _create_llm_provider_and_model(\n        db_session=db_session,\n        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,\n    )\n    _create_llm_provider_and_model(\n        db_session=db_session,\n        provider_name=UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,\n        model_name=UPDATED_CONTEXTUAL_RAG_LLM_NAME,\n    )\n\n    # Create FUTURE settings with contextual RAG enabled\n    set_new_search_settings(\n        search_settings_new=_make_creation_request(),\n        _=MagicMock(),\n        db_session=db_session,\n    )\n\n    # PRESENT still has contextual RAG disabled, so default should be None\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is None\n\n    # Swap FUTURE → PRESENT (with 0 cc-pairs, REINDEX swaps immediately)\n    mock_get_all_doc_indices.return_value = []\n    old_settings = check_and_perform_index_swap(db_session)\n    assert old_settings is not None, \"Swap should have occurred\"\n\n    # Now PRESENT has contextual RAG enabled, default should match\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is not None\n    assert default_model.name == TEST_CONTEXTUAL_RAG_LLM_NAME\n\n    # Update the PRESENT LLM names\n    update_saved_search_settings(\n        search_settings=_make_saved_search_settings(\n            llm_name=UPDATED_CONTEXTUAL_RAG_LLM_NAME,\n            llm_provider=UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,\n        ),\n        _=MagicMock(),\n        db_session=db_session,\n    )\n\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is not None\n    assert default_model.name == UPDATED_CONTEXTUAL_RAG_LLM_NAME\n\n    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)\n\n    mock_get_llm.assert_called_once_with(\n        UPDATED_CONTEXTUAL_RAG_LLM_NAME,\n        UPDATED_CONTEXTUAL_RAG_LLM_PROVIDER,\n    )\n\n\n@patch(\"onyx.server.manage.search_settings.get_all_document_indices\")\n@patch(\"onyx.server.manage.search_settings.get_default_document_index\")\n@patch(\"onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag\")\n@patch(\"onyx.indexing.indexing_pipeline.index_doc_batch_with_handler\")\ndef test_indexing_pipeline_skips_llm_when_contextual_rag_disabled(\n    mock_index_handler: MagicMock,\n    mock_get_llm: MagicMock,\n    mock_get_doc_index: MagicMock,  # noqa: ARG001\n    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001\n    baseline_search_settings: None,  # noqa: ARG001\n    db_session: Session,\n) -> None:\n    \"\"\"When contextual RAG is disabled in search settings,\n    get_llm_for_contextual_rag should not be called.\"\"\"\n    _create_llm_provider_and_model(\n        db_session=db_session,\n        provider_name=TEST_CONTEXTUAL_RAG_LLM_PROVIDER,\n        model_name=TEST_CONTEXTUAL_RAG_LLM_NAME,\n    )\n\n    set_new_search_settings(\n        search_settings_new=_make_creation_request(enable_contextual_rag=False),\n        _=MagicMock(),\n        db_session=db_session,\n    )\n\n    # PRESENT has contextual RAG disabled, so default should be None\n    default_model = fetch_default_contextual_rag_model(db_session)\n    assert default_model is None\n\n    _run_indexing_pipeline_with_mocks(mock_get_llm, mock_index_handler, db_session)\n\n    mock_get_llm.assert_not_called()\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/slack_bot/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/external_dependency_unit/slack_bot/test_slack_bot_crud.py",
    "content": "\"\"\"Tests that SlackBot CRUD operations return properly typed SensitiveValue fields.\n\nRegression test for the bug where insert_slack_bot/update_slack_bot returned\nobjects with raw string tokens instead of SensitiveValue wrappers, causing\n'str object has no attribute get_value' errors in SlackBot.from_model().\n\"\"\"\n\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.slack_bot import insert_slack_bot\nfrom onyx.db.slack_bot import update_slack_bot\nfrom onyx.server.manage.models import SlackBot\nfrom onyx.utils.sensitive import SensitiveValue\n\n\ndef _unique(prefix: str) -> str:\n    return f\"{prefix}-{uuid4().hex[:8]}\"\n\n\ndef test_insert_slack_bot_returns_sensitive_values(db_session: Session) -> None:\n    bot_token = _unique(\"xoxb-insert\")\n    app_token = _unique(\"xapp-insert\")\n    user_token = _unique(\"xoxp-insert\")\n\n    slack_bot = insert_slack_bot(\n        db_session=db_session,\n        name=_unique(\"test-bot-insert\"),\n        enabled=True,\n        bot_token=bot_token,\n        app_token=app_token,\n        user_token=user_token,\n    )\n\n    assert isinstance(slack_bot.bot_token, SensitiveValue)\n    assert isinstance(slack_bot.app_token, SensitiveValue)\n    assert isinstance(slack_bot.user_token, SensitiveValue)\n\n    assert slack_bot.bot_token.get_value(apply_mask=False) == bot_token\n    assert slack_bot.app_token.get_value(apply_mask=False) == app_token\n    assert slack_bot.user_token.get_value(apply_mask=False) == user_token\n\n    # Verify from_model works without error\n    pydantic_bot = SlackBot.from_model(slack_bot)\n    assert pydantic_bot.bot_token  # masked, but not empty\n    assert pydantic_bot.app_token\n\n\ndef test_update_slack_bot_returns_sensitive_values(db_session: Session) -> None:\n    slack_bot = insert_slack_bot(\n        db_session=db_session,\n        name=_unique(\"test-bot-update\"),\n        enabled=True,\n        bot_token=_unique(\"xoxb-update\"),\n        app_token=_unique(\"xapp-update\"),\n    )\n\n    new_bot_token = _unique(\"xoxb-update-new\")\n    new_app_token = _unique(\"xapp-update-new\")\n    new_user_token = _unique(\"xoxp-update-new\")\n\n    updated = update_slack_bot(\n        db_session=db_session,\n        slack_bot_id=slack_bot.id,\n        name=_unique(\"test-bot-updated\"),\n        enabled=False,\n        bot_token=new_bot_token,\n        app_token=new_app_token,\n        user_token=new_user_token,\n    )\n\n    assert isinstance(updated.bot_token, SensitiveValue)\n    assert isinstance(updated.app_token, SensitiveValue)\n    assert isinstance(updated.user_token, SensitiveValue)\n\n    assert updated.bot_token.get_value(apply_mask=False) == new_bot_token\n    assert updated.app_token.get_value(apply_mask=False) == new_app_token\n    assert updated.user_token.get_value(apply_mask=False) == new_user_token\n\n    # Verify from_model works without error\n    pydantic_bot = SlackBot.from_model(updated)\n    assert pydantic_bot.bot_token\n    assert pydantic_bot.app_token\n    assert pydantic_bot.user_token is not None\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/slack_bot/test_slack_bot_federated_search.py",
    "content": "# NOTE: ruff and black disagree after applying this noqa, so we just set file-level.\n# ruff: noqa: ARG005\nimport os\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\n\n# Set environment variables to disable model server for testing\nos.environ[\"DISABLE_MODEL_SERVER\"] = \"true\"\nos.environ[\"MODEL_SERVER_HOST\"] = \"disabled\"\nos.environ[\"MODEL_SERVER_PORT\"] = \"9000\"\n\nfrom sqlalchemy import inspect\nfrom sqlalchemy.orm import Session\nfrom slack_sdk.errors import SlackApiError\n\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.context.search.federated.slack_search import fetch_and_cache_channel_metadata\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import FederatedConnector\nfrom onyx.db.models import FederatedConnector__DocumentSet\nfrom onyx.db.models import LLMProvider\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__DocumentSet\nfrom onyx.db.models import Persona__Tool\nfrom onyx.db.models import SlackBot\nfrom onyx.db.models import SlackChannelConfig\nfrom onyx.db.models import User\nfrom onyx.onyxbot.slack.listener import process_message\nfrom onyx.onyxbot.slack.models import ChannelType\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.tools.built_in_tools import SearchTool\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom onyx.llm.constants import LlmProviderNames\n\n\ndef _create_test_persona_with_slack_config(db_session: Session) -> Persona | None:\n    \"\"\"Helper to create a test persona configured for Slack federated search\"\"\"\n    unique_id = str(uuid4())[:8]\n    document_set = DocumentSet(\n        name=f\"test_slack_docs_{unique_id}\",\n        description=\"Test document set for Slack federated search\",\n    )\n    db_session.add(document_set)\n    db_session.flush()\n\n    persona = Persona(\n        name=f\"test_slack_persona_{unique_id}\",\n        description=\"Test persona for Slack federated search\",\n        system_prompt=\"You are a helpful assistant.\",\n        task_prompt=\"Answer the user's question based on the provided context.\",\n    )\n    db_session.add(persona)\n    db_session.flush()\n\n    persona_doc_set = Persona__DocumentSet(\n        persona_id=persona.id,\n        document_set_id=document_set.id,\n    )\n    db_session.add(persona_doc_set)\n    db_session.commit()\n\n    # Built-in tools are automatically seeded by migrations\n\n    try:\n        search_tool = get_builtin_tool(db_session=db_session, tool_type=SearchTool)\n        if search_tool:\n            persona_tool = Persona__Tool(persona_id=persona.id, tool_id=search_tool.id)\n            db_session.add(persona_tool)\n    except RuntimeError:\n        # SearchTool not found, skip adding it\n        pass\n\n    db_session.commit()\n\n    # Prompts are now directly on the persona table, no need for joinedload\n    return persona\n\n\ndef _create_mock_slack_request(\n    text: str, channel_id: str = \"C1234567890\", slack_bot_id: int = 12345\n) -> Mock:\n    \"\"\"Create a mock Slack request\"\"\"\n    mock_req = Mock()\n    mock_req.type = \"events_api\"\n    mock_req.envelope_id = \"test_envelope_id\"\n    mock_req.payload = {\n        \"event\": {\n            \"type\": \"app_mention\",\n            \"text\": f\"<@U1234567890> {text}\",\n            \"channel\": channel_id,\n            \"user\": \"U9876543210\",\n            \"ts\": \"1234567890.123456\",\n        }\n    }\n    mock_req.slack_bot_id = slack_bot_id\n    return mock_req\n\n\ndef _create_mock_slack_client(\n    channel_id: str = \"C1234567890\",  # noqa: ARG001\n    slack_bot_id: int = 12345,\n) -> Mock:\n    \"\"\"Create a mock Slack client\"\"\"\n    mock_client = Mock()\n    mock_client.slack_bot_id = slack_bot_id\n    mock_client.web_client = Mock()\n\n    mock_post_message_response = {\"ok\": True, \"message_ts\": \"1234567890.123456\"}\n    mock_client.web_client.chat_postMessage = Mock(\n        return_value=mock_post_message_response\n    )\n\n    mock_users_info_response = Mock()\n    mock_users_info_response.__getitem__ = Mock(\n        side_effect=lambda key: {\"ok\": True}[key]\n    )\n    mock_users_info_response.data = {\n        \"user\": {\n            \"id\": \"U9876543210\",\n            \"name\": \"testuser\",\n            \"real_name\": \"Test User\",\n            \"profile\": {\n                \"display_name\": \"Test User\",\n                \"first_name\": \"Test\",\n                \"last_name\": \"User\",\n                \"email\": \"test@example.com\",\n            },\n        }\n    }\n    mock_client.web_client.users_info = Mock(return_value=mock_users_info_response)\n\n    mock_auth_test_response = {\n        \"ok\": True,\n        \"user_id\": \"U1234567890\",\n        \"bot_id\": \"B1234567890\",\n    }\n    mock_client.web_client.auth_test = Mock(return_value=mock_auth_test_response)\n\n    def mock_conversations_info_response(channel: str) -> Mock:\n        channel_id = channel\n        if channel_id == \"C1234567890\":  # general - public\n            mock_response = Mock()\n            mock_response.validate.return_value = None\n            mock_response.data = {\n                \"channel\": {\n                    \"id\": \"C1234567890\",\n                    \"name\": \"general\",\n                    \"is_channel\": True,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": False,\n                }\n            }\n            mock_response.__getitem__ = lambda self, key: mock_response.data[key]\n            return mock_response\n        elif channel_id == \"C1111111111\":  # support - public\n            mock_response = Mock()\n            mock_response.validate.return_value = None\n            mock_response.data = {\n                \"channel\": {\n                    \"id\": \"C1111111111\",\n                    \"name\": \"support\",\n                    \"is_channel\": True,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": False,\n                }\n            }\n            mock_response.__getitem__ = lambda self, key: mock_response.data[key]\n            return mock_response\n        elif channel_id == \"C9999999999\":  # dev-team - private\n            mock_response = Mock()\n            mock_response.validate.return_value = None\n            mock_response.data = {\n                \"channel\": {\n                    \"id\": \"C9999999999\",\n                    \"name\": \"dev-team\",\n                    \"is_channel\": True,\n                    \"is_private\": True,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": False,\n                }\n            }\n            mock_response.__getitem__ = lambda self, key: mock_response.data[key]\n            return mock_response\n        elif channel_id == \"D1234567890\":  # DM\n            mock_response = Mock()\n            mock_response.validate.return_value = None\n            mock_response.data = {\n                \"channel\": {\n                    \"id\": \"D1234567890\",\n                    \"name\": \"directmessage\",\n                    \"is_channel\": False,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": True,\n                }\n            }\n            mock_response.__getitem__ = lambda self, key: mock_response.data[key]\n            return mock_response\n        else:\n            mock_response = Mock()\n            mock_response.validate.side_effect = Exception(\"channel_not_found\")\n            return mock_response\n\n    mock_client.web_client.conversations_info = Mock(\n        side_effect=mock_conversations_info_response\n    )\n\n    mock_client.web_client.conversations_members = Mock(\n        return_value={\"ok\": True, \"members\": [\"U9876543210\", \"U1234567890\"]}\n    )\n\n    mock_client.web_client.conversations_replies = Mock(\n        return_value={\"ok\": True, \"messages\": []}\n    )\n\n    return mock_client\n\n\nclass TestSlackBotFederatedSearch:\n    \"\"\"Test Slack bot federated search functionality\"\"\"\n\n    def _setup_test_environment(\n        self, db_session: Session\n    ) -> tuple[User, Persona, FederatedConnector, SlackBot, SlackChannelConfig]:\n        \"\"\"Setup test environment with user, persona, and federated connector\"\"\"\n        user = create_test_user(db_session, \"slack_bot_test\")\n\n        persona = _create_test_persona_with_slack_config(db_session)\n        if persona is None:\n            raise ValueError(\"Failed to create test persona\")\n\n        federated_connector = FederatedConnector(\n            source=FederatedConnectorSource.FEDERATED_SLACK,\n            credentials={\"workspace_url\": \"https://test.slack.com\"},\n        )\n        db_session.add(federated_connector)\n        db_session.flush()\n        # Expire to ensure credentials is reloaded as SensitiveValue from DB\n        db_session.expire(federated_connector)\n\n        # Associate the federated connector with the persona's document sets\n        # This is required for Slack federated search to be enabled\n        for doc_set in persona.document_sets:\n            federated_doc_set_mapping = FederatedConnector__DocumentSet(\n                federated_connector_id=federated_connector.id,\n                document_set_id=doc_set.id,\n                entities={},  # Empty entities for test\n            )\n            db_session.add(federated_doc_set_mapping)\n        db_session.flush()\n\n        unique_id = str(uuid4())[:8]\n        slack_bot = SlackBot(\n            name=f\"Test Slack Bot {unique_id}\",\n            bot_token=f\"xoxb-test-token-{unique_id}\",\n            app_token=f\"xapp-test-token-{unique_id}\",\n            user_token=f\"xoxp-test-user-token-{unique_id}\",\n            enabled=True,\n        )\n        db_session.add(slack_bot)\n        db_session.flush()\n        # Expire to ensure tokens are reloaded as SensitiveValue from DB\n        db_session.expire(slack_bot)\n\n        slack_channel_config = SlackChannelConfig(\n            slack_bot_id=slack_bot.id,\n            persona_id=persona.id,\n            channel_config={\"channel_name\": \"general\", \"disabled\": False},\n            enable_auto_filters=True,\n            is_default=True,\n        )\n        db_session.add(slack_channel_config)\n        db_session.commit()\n\n        return user, persona, federated_connector, slack_bot, slack_channel_config\n\n    def _setup_slack_mocks(self, channel_name: str) -> tuple[list, list]:\n        \"\"\"Setup only Slack API mocks - everything else runs live\"\"\"\n        patches = [\n            patch(\"slack_sdk.WebClient.search_messages\"),\n            patch(\"onyx.context.search.federated.slack_search.query_slack\"),\n            patch(\"onyx.onyxbot.slack.listener.get_channel_type_from_id\"),\n            patch(\"onyx.context.search.utils.get_query_embeddings\"),\n        ]\n\n        started_patches = [p.start() for p in patches]\n\n        self._setup_slack_api_mocks(started_patches[0], started_patches[0])\n\n        self._setup_query_slack_mock(started_patches[1], channel_name)\n\n        self._setup_channel_type_mock(started_patches[2], channel_name)\n\n        self._setup_embedding_mock(started_patches[3])\n\n        return patches, started_patches\n\n    def _setup_embedding_mock(self, mock_get_query_embeddings: Mock) -> None:\n        \"\"\"Mock embedding calls to avoid model server dependency\"\"\"\n        # Return a dummy embedding vector for any query\n        mock_get_query_embeddings.return_value = [[0.1] * 768]  # 768-dimensional vector\n\n    def _setup_slack_api_mocks(\n        self,\n        mock_search_messages: Mock,\n        mock_conversations_info: Mock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Setup Slack API mocks to return controlled data for testing filtering\"\"\"\n        mock_search_response = Mock()\n        mock_search_response.validate.return_value = None\n        mock_search_response.get.return_value = {\n            \"matches\": [\n                {\n                    \"text\": \"Performance issue in API\",\n                    \"permalink\": \"https://test.slack.com/archives/C1234567890/p1234567890\",\n                    \"ts\": \"1234567890.123456\",\n                    \"channel\": {\"id\": \"C1234567890\", \"name\": \"general\"},\n                    \"username\": \"user1\",\n                    \"score\": 0.9,\n                },\n                {\n                    \"text\": \"Performance issue in dashboard\",\n                    \"permalink\": \"https://test.slack.com/archives/C1111111111/p1234567891\",\n                    \"ts\": \"1234567891.123456\",\n                    \"channel\": {\"id\": \"C1111111111\", \"name\": \"support\"},\n                    \"username\": \"user2\",\n                    \"score\": 0.8,\n                },\n                {\n                    \"text\": \"Performance issue in private channel\",\n                    \"permalink\": \"https://test.slack.com/archives/C9999999999/p1234567892\",\n                    \"ts\": \"1234567892.123456\",\n                    \"channel\": {\"id\": \"C9999999999\", \"name\": \"dev-team\"},\n                    \"username\": \"user3\",\n                    \"score\": 0.7,\n                },\n                {\n                    \"text\": \"Performance issue in DM\",\n                    \"permalink\": \"https://test.slack.com/archives/D1234567890/p1234567893\",\n                    \"ts\": \"1234567893.123456\",\n                    \"channel\": {\"id\": \"D1234567890\", \"name\": \"directmessage\"},\n                    \"username\": \"user4\",\n                    \"score\": 0.6,\n                },\n            ]\n        }\n        mock_search_messages.return_value = mock_search_response\n\n    def _setup_query_slack_mock(\n        self, mock_query_slack: Mock, channel_name: str\n    ) -> None:\n        \"\"\"Setup query_slack mock to capture filtering parameters\"\"\"\n        from onyx.context.search.federated.slack_search import SlackQueryResult\n\n        def mock_query_slack_capture_params(\n            query_string: str,  # noqa: ARG001\n            access_token: str,  # noqa: ARG001\n            limit: int | None = None,  # noqa: ARG001\n            allowed_private_channel: str | None = None,\n            bot_token: str | None = None,  # noqa: ARG001\n            include_dm: bool = False,\n            entities: dict | None = None,  # noqa: ARG001\n            available_channels: list | None = None,  # noqa: ARG001\n            channel_metadata_dict: dict | None = None,  # noqa: ARG001\n        ) -> SlackQueryResult:\n            self._captured_filtering_params = {\n                \"allowed_private_channel\": allowed_private_channel,\n                \"include_dm\": include_dm,\n                \"channel_name\": channel_name,\n            }\n\n            return SlackQueryResult(messages=[], filtered_channels=[])\n\n        mock_query_slack.side_effect = mock_query_slack_capture_params\n\n    def _setup_channel_type_mock(\n        self,\n        mock_get_channel_type_from_id: Mock,\n        channel_name: str,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Setup get_channel_type_from_id mock to return correct channel types\"\"\"\n\n        def mock_channel_type_response(\n            web_client: Mock,  # noqa: ARG001\n            channel_id: str,\n        ) -> ChannelType:\n            if channel_id == \"C1234567890\":  # general - public\n                return ChannelType.PUBLIC_CHANNEL\n            elif channel_id == \"C1111111111\":  # support - public\n                return ChannelType.PUBLIC_CHANNEL\n            elif channel_id == \"C9999999999\":  # dev-team - private\n                return ChannelType.PRIVATE_CHANNEL\n            elif channel_id == \"D1234567890\":  # DM\n                return ChannelType.IM\n            else:\n                return ChannelType.PUBLIC_CHANNEL  # default\n\n        mock_get_channel_type_from_id.side_effect = mock_channel_type_response\n\n    def _setup_llm_provider(self, db_session: Session) -> None:\n        \"\"\"Create a default LLM provider in the database for testing with real API key\"\"\"\n        # Delete any existing default LLM provider to ensure clean state\n        # Use SQL-level delete to properly trigger ON DELETE CASCADE\n        # (ORM-level delete tries to set foreign keys to NULL instead)\n        from sqlalchemy import delete\n\n        existing_providers = db_session.query(LLMProvider).all()\n        for provider in existing_providers:\n            db_session.execute(delete(LLMProvider).where(LLMProvider.id == provider.id))\n        db_session.commit()\n\n        api_key = os.getenv(\"OPENAI_API_KEY\")\n        if not api_key:\n            raise ValueError(\n                \"OPENAI_API_KEY environment variable not set - test requires real API key\"\n            )\n\n        provider_view = upsert_llm_provider(\n            LLMProviderUpsertRequest(\n                name=f\"test-llm-provider-{uuid4().hex[:8]}\",\n                provider=LlmProviderNames.OPENAI,\n                api_key=api_key,\n                is_public=True,\n                model_configurations=[\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4o\",\n                        is_visible=True,\n                        max_input_tokens=None,\n                        display_name=\"gpt-4o\",\n                    ),\n                ],\n            ),\n            db_session=db_session,\n        )\n\n        update_default_provider(provider_view.id, \"gpt-4o\", db_session)\n\n    def _teardown_common_mocks(self, patches: list) -> None:\n        \"\"\"Stop all patches\"\"\"\n        for p in patches:\n            p.stop()\n\n    @patch(\"onyx.utils.gpu_utils.fast_gpu_status_request\", return_value=False)\n    @patch(\n        \"onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval\", return_value=[]\n    )\n    def test_slack_bot_public_channel_filtering(\n        self,\n        mock_vespa: Mock,  # noqa: ARG002\n        mock_gpu_status: Mock,  # noqa: ARG002\n        db_session: Session,\n    ) -> None:\n        \"\"\"Test that slack bot in public channel sees only public channel messages\"\"\"\n        self._setup_llm_provider(db_session)\n\n        user, persona, federated_connector, slack_bot, slack_channel_config = (\n            self._setup_test_environment(db_session)\n        )\n\n        channel_id = \"C1234567890\"  # #general (public)\n        channel_name = \"general\"\n\n        patches, started_patches = self._setup_slack_mocks(channel_name)\n\n        try:\n            mock_req = _create_mock_slack_request(\n                \"search for performance issues\", channel_id, slack_bot.id\n            )\n            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)\n\n            process_message(mock_req, mock_client)\n\n            mock_client.web_client.chat_postMessage.assert_called()\n            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list\n            last_call = post_message_calls[-1]\n            assert (\n                last_call[1][\"channel\"] == channel_id\n            ), f\"Response should be sent to {channel_id}\"\n\n            response_text = last_call[1].get(\"text\", \"\")\n            assert len(response_text) > 0, \"Bot should have sent a non-empty response\"\n\n            assert hasattr(\n                self, \"_captured_filtering_params\"\n            ), \"query_slack should have been called\"\n            params = self._captured_filtering_params\n\n            assert (\n                params[\"allowed_private_channel\"] is None\n            ), \"Public channels should not have private channel access\"\n            assert (\n                params[\"include_dm\"] is False\n            ), \"Public channels should not include DMs\"\n            assert (\n                params[\"channel_name\"] == \"general\"\n            ), \"Should be testing general channel\"\n\n        finally:\n            self._teardown_common_mocks(patches)\n\n    @patch(\"onyx.utils.gpu_utils.fast_gpu_status_request\", return_value=False)\n    @patch(\n        \"onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval\", return_value=[]\n    )\n    def test_slack_bot_private_channel_filtering(\n        self,\n        mock_vespa: Mock,  # noqa: ARG002\n        mock_gpu_status: Mock,  # noqa: ARG002\n        db_session: Session,\n    ) -> None:\n        \"\"\"Test that slack bot in private channel sees private + public channel messages\"\"\"\n        self._setup_llm_provider(db_session)\n\n        user, persona, federated_connector, slack_bot, slack_channel_config = (\n            self._setup_test_environment(db_session)\n        )\n\n        channel_id = \"C9999999999\"  # #dev-team (private)\n        channel_name = \"dev-team\"\n\n        patches, started_patches = self._setup_slack_mocks(channel_name)\n\n        try:\n            mock_req = _create_mock_slack_request(\n                \"search for performance issues\", channel_id, slack_bot.id\n            )\n            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)\n\n            process_message(mock_req, mock_client)\n\n            mock_client.web_client.chat_postMessage.assert_called()\n            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list\n            last_call = post_message_calls[-1]\n            assert (\n                last_call[1][\"channel\"] == channel_id\n            ), f\"Response should be sent to {channel_id}\"\n\n            response_text = last_call[1].get(\"text\", \"\")\n            assert len(response_text) > 0, \"Bot should have sent a non-empty response\"\n\n            assert hasattr(\n                self, \"_captured_filtering_params\"\n            ), \"query_slack should have been called\"\n            params = self._captured_filtering_params\n\n            assert (\n                params[\"allowed_private_channel\"] == \"C9999999999\"\n            ), \"Private channels should have access to their specific private channel\"\n            assert (\n                params[\"include_dm\"] is False\n            ), \"Private channels should not include DMs\"\n            assert (\n                params[\"channel_name\"] == \"dev-team\"\n            ), \"Should be testing dev-team channel\"\n\n        finally:\n            self._teardown_common_mocks(patches)\n\n    @patch(\"onyx.utils.gpu_utils.fast_gpu_status_request\", return_value=False)\n    @patch(\n        \"onyx.document_index.vespa.index.VespaIndex.hybrid_retrieval\", return_value=[]\n    )\n    def test_slack_bot_dm_filtering(\n        self,\n        mock_vespa: Mock,  # noqa: ARG002\n        mock_gpu_status: Mock,  # noqa: ARG002\n        db_session: Session,\n    ) -> None:\n        \"\"\"Test that slack bot in DM sees all messages (no filtering)\"\"\"\n        self._setup_llm_provider(db_session)\n\n        user, persona, federated_connector, slack_bot, slack_channel_config = (\n            self._setup_test_environment(db_session)\n        )\n\n        channel_id = \"D1234567890\"  # DM\n        channel_name = \"directmessage\"\n\n        patches, started_patches = self._setup_slack_mocks(channel_name)\n\n        try:\n            mock_req = _create_mock_slack_request(\n                \"search for performance issues\", channel_id, slack_bot.id\n            )\n            mock_client = _create_mock_slack_client(channel_id, slack_bot.id)\n\n            process_message(mock_req, mock_client)\n\n            mock_client.web_client.chat_postMessage.assert_called()\n            post_message_calls = mock_client.web_client.chat_postMessage.call_args_list\n            last_call = post_message_calls[-1]\n            assert (\n                last_call[1][\"channel\"] == channel_id\n            ), f\"Response should be sent to {channel_id}\"\n\n            response_text = last_call[1].get(\"text\", \"\")\n            assert len(response_text) > 0, \"Bot should have sent a non-empty response\"\n\n            assert hasattr(\n                self, \"_captured_filtering_params\"\n            ), \"query_slack should have been called\"\n            params = self._captured_filtering_params\n\n            assert (\n                params[\"allowed_private_channel\"] is None\n            ), \"DMs should not have private channel access\"\n            assert params[\"include_dm\"] is True, \"DMs should include DM messages\"\n            assert (\n                params[\"channel_name\"] == \"directmessage\"\n            ), \"Should be testing directmessage channel\"\n\n        finally:\n            self._teardown_common_mocks(patches)\n\n\n@patch(\"onyx.context.search.federated.slack_search.get_redis_client\")\n@patch(\"onyx.context.search.federated.slack_search.WebClient\")\ndef test_missing_scope_resilience(\n    mock_web_client: Mock, mock_redis_client: Mock\n) -> None:\n    \"\"\"Test that missing scopes are handled gracefully\"\"\"\n    # Setup mock Redis client\n    mock_redis = MagicMock()\n    mock_redis.get.return_value = None  # Cache miss\n    mock_redis_client.return_value = mock_redis\n\n    # Setup mock Slack client that simulates missing_scope error\n    mock_client_instance = MagicMock()\n    mock_web_client.return_value = mock_client_instance\n\n    # Track which channel types were attempted\n    attempted_types: list[str] = []\n\n    def mock_conversations_list(\n        types: str | None = None,\n        **kwargs: Any,  # noqa: ARG001\n    ) -> MagicMock:\n        if types:\n            attempted_types.append(types)\n\n        # First call: all types including mpim -> missing_scope error\n        if types and \"mpim\" in types:\n            error_response = {\n                \"ok\": False,\n                \"error\": \"missing_scope\",\n                \"needed\": \"mpim:read\",\n                \"provided\": \"identify,channels:history,channels:read,groups:read,im:read,search:read\",\n            }\n            raise SlackApiError(\"missing_scope\", error_response)\n\n        # Second call: without mpim -> success\n        mock_response = MagicMock()\n        mock_response.validate.return_value = None\n        mock_response.data = {\n            \"channels\": [\n                {\n                    \"id\": \"C1234567890\",\n                    \"name\": \"general\",\n                    \"is_channel\": True,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": False,\n                    \"is_member\": True,\n                },\n                {\n                    \"id\": \"D9876543210\",\n                    \"name\": \"\",\n                    \"is_channel\": False,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": True,\n                    \"is_member\": True,\n                },\n            ],\n            \"response_metadata\": {},\n        }\n        return mock_response\n\n    mock_client_instance.conversations_list.side_effect = mock_conversations_list\n\n    # Call the function\n    result = fetch_and_cache_channel_metadata(\n        access_token=\"xoxp-test-token\",\n        team_id=\"T1234567890\",\n        include_private=True,\n    )\n\n    # Assertions\n    # Should have attempted twice: once with mpim, once without\n    assert len(attempted_types) == 2, f\"Expected 2 attempts, got {len(attempted_types)}\"\n    assert \"mpim\" in attempted_types[0], \"First attempt should include mpim\"\n    assert \"mpim\" not in attempted_types[1], \"Second attempt should not include mpim\"\n\n    # Should have successfully returned channels despite missing scope\n    assert len(result) == 2, f\"Expected 2 channels, got {len(result)}\"\n    assert \"C1234567890\" in result, \"Should have public channel\"\n    assert \"D9876543210\" in result, \"Should have DM channel\"\n\n    # Verify channel metadata structure\n    assert result[\"C1234567890\"][\"name\"] == \"general\"\n    assert result[\"C1234567890\"][\"type\"] == \"public_channel\"\n    assert result[\"D9876543210\"][\"type\"] == \"im\"\n\n\n@patch(\"onyx.context.search.federated.slack_search.get_redis_client\")\n@patch(\"onyx.context.search.federated.slack_search.WebClient\")\ndef test_multiple_missing_scopes_resilience(\n    mock_web_client: Mock, mock_redis_client: Mock\n) -> None:\n    \"\"\"Test handling multiple missing scopes gracefully\"\"\"\n    # Setup mock Redis client\n    mock_redis = MagicMock()\n    mock_redis.get.return_value = None  # Cache miss\n    mock_redis_client.return_value = mock_redis\n\n    # Setup mock Slack client\n    mock_client_instance = MagicMock()\n    mock_web_client.return_value = mock_client_instance\n\n    # Track attempts\n    attempted_types: list[str] = []\n\n    def mock_conversations_list(\n        types: str | None = None,\n        **kwargs: Any,  # noqa: ARG001\n    ) -> MagicMock:\n        if types:\n            attempted_types.append(types)\n\n        # First: mpim missing\n        if types and \"mpim\" in types:\n            error_response = {\n                \"ok\": False,\n                \"error\": \"missing_scope\",\n                \"needed\": \"mpim:read\",\n                \"provided\": \"identify,channels:history,channels:read,groups:read\",\n            }\n            raise SlackApiError(\"missing_scope\", error_response)\n\n        # Second: im missing\n        if types and \"im\" in types:\n            error_response = {\n                \"ok\": False,\n                \"error\": \"missing_scope\",\n                \"needed\": \"im:read\",\n                \"provided\": \"identify,channels:history,channels:read,groups:read\",\n            }\n            raise SlackApiError(\"missing_scope\", error_response)\n\n        # Third: success with only public and private channels\n        mock_response = MagicMock()\n        mock_response.validate.return_value = None\n        mock_response.data = {\n            \"channels\": [\n                {\n                    \"id\": \"C1234567890\",\n                    \"name\": \"general\",\n                    \"is_channel\": True,\n                    \"is_private\": False,\n                    \"is_group\": False,\n                    \"is_mpim\": False,\n                    \"is_im\": False,\n                    \"is_member\": True,\n                }\n            ],\n            \"response_metadata\": {},\n        }\n        return mock_response\n\n    mock_client_instance.conversations_list.side_effect = mock_conversations_list\n\n    # Call the function\n    result = fetch_and_cache_channel_metadata(\n        access_token=\"xoxp-test-token\",\n        team_id=\"T1234567890\",\n        include_private=True,\n    )\n\n    # Should gracefully handle multiple missing scopes\n    assert len(attempted_types) == 3, f\"Expected 3 attempts, got {len(attempted_types)}\"\n    assert \"mpim\" in attempted_types[0], \"First attempt should include mpim\"\n    assert \"mpim\" not in attempted_types[1], \"Second attempt should not include mpim\"\n    assert \"im\" in attempted_types[1], \"Second attempt should include im\"\n    assert \"im\" not in attempted_types[2], \"Third attempt should not include im\"\n\n    # Should still return available channels\n    assert len(result) == 1, f\"Expected 1 channel, got {len(result)}\"\n    assert result[\"C1234567890\"][\"name\"] == \"general\"\n\n\ndef test_slack_channel_config_eager_loads_persona(db_session: Session) -> None:\n    \"\"\"Test that fetch_slack_channel_config_for_channel_or_default eagerly loads persona.\n\n    This prevents lazy loading failures when the session context changes later\n    in the request handling flow (e.g., in handle_regular_answer).\n    \"\"\"\n    from onyx.db.slack_channel_config import (\n        fetch_slack_channel_config_for_channel_or_default,\n    )\n\n    unique_id = str(uuid4())[:8]\n\n    # Create a persona (using same fields as _create_test_persona_with_slack_config)\n    persona = Persona(\n        name=f\"test_eager_load_persona_{unique_id}\",\n        description=\"Test persona for eager loading test\",\n        system_prompt=\"You are a helpful assistant.\",\n        task_prompt=\"Answer the user's question.\",\n    )\n    db_session.add(persona)\n    db_session.flush()\n\n    # Create a slack bot\n    slack_bot = SlackBot(\n        name=f\"Test Bot {unique_id}\",\n        bot_token=f\"xoxb-test-{unique_id}\",\n        app_token=f\"xapp-test-{unique_id}\",\n        enabled=True,\n    )\n    db_session.add(slack_bot)\n    db_session.flush()\n\n    # Create slack channel config with persona\n    channel_name = f\"test-channel-{unique_id}\"\n    slack_channel_config = SlackChannelConfig(\n        slack_bot_id=slack_bot.id,\n        persona_id=persona.id,\n        channel_config={\"channel_name\": channel_name, \"disabled\": False},\n        enable_auto_filters=False,\n        is_default=False,\n    )\n    db_session.add(slack_channel_config)\n    db_session.commit()\n\n    # Fetch the config using the function under test\n    fetched_config = fetch_slack_channel_config_for_channel_or_default(\n        db_session=db_session,\n        slack_bot_id=slack_bot.id,\n        channel_name=channel_name,\n    )\n\n    assert fetched_config is not None, \"Should find the channel config\"\n\n    # Check that persona relationship is already loaded (not pending lazy load)\n    insp = inspect(fetched_config)\n    assert insp is not None, \"Should be able to inspect the config\"\n    assert \"persona\" not in insp.unloaded, (\n        \"Persona should be eagerly loaded, not pending lazy load. \"\n        \"This is required to prevent fallback to default persona when \"\n        \"session context changes in handle_regular_answer.\"\n    )\n\n    # Verify the persona is correct\n    assert fetched_config.persona is not None, \"Persona should not be None\"\n    assert fetched_config.persona.id == persona.id, \"Should load the correct persona\"\n    assert fetched_config.persona.name == persona.name\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_image_generation_tool.py",
    "content": "# TODO re-enable this test\n# import os\n# import time\n# from typing import Any\n# from unittest.mock import patch\n\n# import pytest\n\n# from onyx.tools.models import ToolResponse\n# from onyx.tools.tool_implementations.images.image_generation_tool import (\n#     IMAGE_GENERATION_HEARTBEAT_ID,\n# )\n# from onyx.tools.tool_implementations.images.image_generation_tool import (\n#     IMAGE_GENERATION_RESPONSE_ID,\n# )\n# from onyx.tools.tool_implementations.images.image_generation_tool import (\n#     ImageGenerationResponse,\n# )\n# from onyx.tools.tool_implementations.images.image_generation_tool import (\n#     ImageGenerationTool,\n# )\n# from onyx.tools.tool_implementations.images.image_generation_tool import ImageShape\n\n\n# @pytest.fixture\n# def dalle3_tool() -> ImageGenerationTool:\n#     \"\"\"Fixture for DALL-E 3 tool with API key from environment.\"\"\"\n#     api_key = os.environ[\"OPENAI_API_KEY\"]\n#     return ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n\n# def test_image_generation_with_heartbeats(dalle3_tool: ImageGenerationTool) -> None:\n#     \"\"\"Test that heartbeat packets are yielded during image generation.\"\"\"\n#     responses = []\n#     heartbeat_count = 0\n#     image_response_count = 0\n\n#     # Collect all responses\n#     for response in dalle3_tool.run(prompt=\"A simple red circle on white background\"):\n#         responses.append(response)\n#         if response.id == IMAGE_GENERATION_HEARTBEAT_ID:\n#             heartbeat_count += 1\n#         elif response.id == IMAGE_GENERATION_RESPONSE_ID:\n#             image_response_count += 1\n\n#     # Should have at least one heartbeat (depending on generation speed)\n#     # and exactly one image response\n#     assert image_response_count == 1\n#     # May have 0 or more heartbeats depending on API speed\n#     print(f\"Received {heartbeat_count} heartbeat packets\")\n\n#     # Verify the final image response\n#     final_response = responses[-1]\n#     assert final_response.id == IMAGE_GENERATION_RESPONSE_ID\n#     assert isinstance(final_response.response, list)\n#     assert len(final_response.response) == 1\n\n#     image = final_response.response[0]\n#     assert isinstance(image, ImageGenerationResponse)\n#     assert image.image_data is not None\n#     assert len(image.image_data) > 100  # Base64 data should be substantial\n#     assert image.revised_prompt is not None\n\n\n# def test_heartbeat_timing_with_mock() -> None:\n#     \"\"\"Test that heartbeats are sent at correct intervals using mocked generation.\"\"\"\n#     api_key = os.getenv(\"OPENAI_API_KEY\", \"mock-key-for-testing\")\n\n#     tool = ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n#     # Mock the _generate_image method to simulate slow generation\n#     def slow_generate(*args: Any, **kwargs: Any) -> ImageGenerationResponse:\n#         time.sleep(5)  # Simulate 5 second generation time\n#         return ImageGenerationResponse(\n#             revised_prompt=\"Test prompt\",\n#             image_data=\"base64encodedimagedata\",\n#         )\n\n#     with patch.object(tool, \"_generate_image\", side_effect=slow_generate):\n#         start_time = time.time()\n#         responses = list(tool.run(prompt=\"Test prompt\"))\n#         time.time() - start_time\n\n#         # Count heartbeats\n#         heartbeat_count = sum(\n#             1 for r in responses if r.id == IMAGE_GENERATION_HEARTBEAT_ID\n#         )\n\n#         # With 5 second generation and 2 second intervals,\n#         # we should get approximately 2 heartbeats\n#         assert heartbeat_count >= 1\n#         assert heartbeat_count <= 3  # Allow some timing variance\n\n#         # Verify we still get the final result\n#         image_responses = [r for r in responses if r.id == IMAGE_GENERATION_RESPONSE_ID]\n#         assert len(image_responses) == 1\n#         assert image_responses[0].response[0].image_data == \"base64encodedimagedata\"\n\n\n# def test_error_handling_with_heartbeats() -> None:\n#     \"\"\"Test that errors are properly propagated even with heartbeat mechanism.\"\"\"\n#     api_key = os.getenv(\"OPENAI_API_KEY\", \"mock-key-for-testing\")\n\n#     tool = ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n#     # Mock the _generate_image method to raise an error after delay\n#     def error_generate(*args: Any, **kwargs: Any) -> None:\n#         time.sleep(1)  # Small delay to ensure at least one heartbeat\n#         raise ValueError(\"Test error during generation\")\n\n#     with patch.object(tool, \"_generate_image\", side_effect=error_generate):\n#         with pytest.raises(ValueError, match=\"Test error during generation\"):\n#             # Consume the generator to trigger the error\n#             list(tool.run(prompt=\"Test prompt\"))\n\n\n# def test_tool_message_content_filters_heartbeats() -> None:\n#     \"\"\"Test that get_llm_tool_response correctly filters heartbeats.\"\"\"\n#     api_key = os.getenv(\"OPENAI_API_KEY\", \"mock-key-for-testing\")\n\n#     tool = ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n#     # Create mock responses\n#     heartbeat1 = ToolResponse(\n#         id=IMAGE_GENERATION_HEARTBEAT_ID,\n#         response={\"status\": \"generating\", \"heartbeat\": 0},\n#     )\n#     heartbeat2 = ToolResponse(\n#         id=IMAGE_GENERATION_HEARTBEAT_ID,\n#         response={\"status\": \"generating\", \"heartbeat\": 1},\n#     )\n#     image_response = ToolResponse(\n#         id=IMAGE_GENERATION_RESPONSE_ID,\n#         response=[\n#             ImageGenerationResponse(\n#                 revised_prompt=\"Test\",\n#                 image_data=\"base64encodedimagedata\",\n#             )\n#         ],\n#     )\n\n#     # Test that heartbeats are filtered out\n#     result = tool.get_llm_tool_response(heartbeat1, heartbeat2, image_response)\n\n#     # Should return JSON with image info, not heartbeats\n#     assert isinstance(result, str)\n#     assert \"Test\" in result\n#     assert \"heartbeat\" not in result\n\n\n# def test_final_result_filters_heartbeats() -> None:\n#     \"\"\"Test that final_result correctly filters heartbeats.\"\"\"\n#     api_key = os.getenv(\"OPENAI_API_KEY\", \"mock-key-for-testing\")\n\n#     tool = ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n#     # Create mock responses\n#     heartbeat = ToolResponse(\n#         id=IMAGE_GENERATION_HEARTBEAT_ID,\n#         response={\"status\": \"generating\", \"heartbeat\": 0},\n#     )\n#     image_response = ToolResponse(\n#         id=IMAGE_GENERATION_RESPONSE_ID,\n#         response=[\n#             ImageGenerationResponse(\n#                 revised_prompt=\"Test prompt\",\n#                 image_data=\"base64encodedimagedata\",\n#             )\n#         ],\n#     )\n\n#     # Test that final_result returns only image data\n#     result = tool.get_final_result(heartbeat, image_response)\n\n#     assert isinstance(result, list)\n#     assert len(result) == 1\n#     assert result[0][\"revised_prompt\"] == \"Test prompt\"\n#     assert result[0][\"image_data\"] == \"base64encodedimagedata\"\n\n\n# def test_different_image_shapes(dalle3_tool: ImageGenerationTool) -> None:\n#     \"\"\"Test image generation with different shape parameters.\"\"\"\n#     shapes_to_test = [\n#         (ImageShape.SQUARE, \"A red square\"),\n#         (ImageShape.PORTRAIT, \"A tall building\"),\n#         (ImageShape.LANDSCAPE, \"A wide landscape\"),\n#     ]\n\n#     for shape, prompt in shapes_to_test:\n#         responses = list(dalle3_tool.run(prompt=prompt, shape=shape.value))\n\n#         # Find the image response\n#         image_response = None\n#         for response in responses:\n#             if response.id == IMAGE_GENERATION_RESPONSE_ID:\n#                 image_response = response\n#                 break\n\n#         assert image_response is not None\n#         assert len(image_response.response) == 1\n#         image = image_response.response[0]\n#         assert image.image_data is not None\n#         assert len(image.image_data) > 100  # Base64 data should be substantial\n#         print(f\"Generated {shape.value} image (base64, {len(image.image_data)} chars)\")\n\n\n# def test_image_generation_response_format() -> None:\n#     \"\"\"Test that image generation returns data in at least one format (URL or base64).\"\"\"\n#     api_key = os.getenv(\"OPENAI_API_KEY\")\n#     if not api_key:\n#         pytest.skip(\"OPENAI_API_KEY environment variable not set\")\n\n#     tool = ImageGenerationTool(\n#         tool_id=0,\n#         api_key=api_key,\n#         api_base=None,\n#         api_version=None,\n#         model=\"dall-e-3\",\n#         num_imgs=1,\n#     )\n\n#     responses = list(tool.run(prompt=\"A simple blue circle\"))\n\n#     # Find the image response\n#     image_response = None\n#     for response in responses:\n#         if response.id == IMAGE_GENERATION_RESPONSE_ID:\n#             image_response = response\n#             break\n\n#     assert image_response is not None\n#     assert len(image_response.response) == 1\n#     image = image_response.response[0]\n#     # Should always have base64 data\n#     assert image.image_data is not None\n#     assert len(image.image_data) > 100  # Base64 data should be substantial\n\n\n# if __name__ == \"__main__\":\n#     # Run with: python -m pytest tests/external_dependency_unit/tools/test_image_generation_tool.py -v\n#     pytest.main([__file__, \"-v\"])\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py",
    "content": "\"\"\"\nTest suite for MCP Pass-Through OAuth (PT_OAUTH) integration.\n\nTests the pass-through OAuth flow where Onyx forwards the user's login OAuth token\nto an MCP server for authentication.\n\nThis test:\n1. Creates a test user with an OAuthAccount (simulating Google OAuth login)\n2. Creates an MCP server with PT_OAUTH auth type\n3. Creates MCP tools for that server\n4. Verifies the user's OAuth token is correctly passed to MCPTool\n\nAll external HTTP calls are mocked, but Postgres and Redis are running.\n\"\"\"\n\nimport queue\nfrom typing import Any\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.enums import MCPTransport\nfrom onyx.db.mcp import create_mcp_server__no_commit\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import CustomToolCallSummary\nfrom onyx.tools.tool_constructor import construct_tools\nfrom onyx.tools.tool_constructor import SearchToolConfig\nfrom onyx.tools.tool_implementations.mcp.mcp_tool import MCPTool\nfrom tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef _create_test_persona_with_mcp_tool(\n    db_session: Session, user: User, tools: list[Tool]\n) -> Persona:\n    \"\"\"Helper to create a test persona with MCP tools\"\"\"\n    persona = Persona(\n        name=f\"Test MCP Persona {uuid4().hex[:8]}\",\n        description=\"Test persona with MCP tools\",\n        system_prompt=\"You are a helpful assistant\",\n        task_prompt=\"Answer the user's question\",\n        tools=tools,\n        document_sets=[],\n        users=[user],\n        groups=[],\n        is_listed=True,\n        is_public=True,\n        display_priority=None,\n        starter_messages=None,\n        deleted=False,\n    )\n    db_session.add(persona)\n    db_session.commit()\n    db_session.refresh(persona)\n    return persona\n\n\nclass TestMCPPassThroughOAuth:\n    \"\"\"Tests for MCP Pass-Through OAuth (PT_OAUTH) flow\"\"\"\n\n    @pytest.fixture(autouse=True)\n    def setup_llm_provider(self, db_session: Session) -> None:\n        \"\"\"Ensure default LLM provider is set up for each test.\"\"\"\n        ensure_default_llm_provider(db_session)\n\n    def test_pt_oauth_passes_user_login_token(self, db_session: Session) -> None:\n        \"\"\"\n        Test that PT_OAUTH correctly passes the user's login OAuth token to MCPTool.\n\n        This simulates a user who logged into Onyx with Google OAuth and is using\n        an MCP server that requires their Google token for authentication.\n        \"\"\"\n        # Create user with login OAuth token (simulating Google OAuth login)\n        user = create_test_user(db_session, \"pt_oauth_user\")\n        user_oauth_token = \"google_oauth_token_abc123\"\n\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"google\",\n            account_id=\"google_user_12345\",\n            account_email=user.email,\n            access_token=user_oauth_token,\n            refresh_token=\"google_refresh_token\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        # Refresh user to load oauth_accounts relationship\n        db_session.refresh(user)\n\n        # Create MCP server with PT_OAUTH auth type\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"PT_OAUTH Test Server {uuid4().hex[:8]}\",\n            description=\"MCP server for pass-through OAuth testing\",\n            server_url=\"http://test-mcp-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.PT_OAUTH,\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,  # Not used for PT_OAUTH\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        # Create MCP tool associated with this server\n        mcp_tool_db = Tool(\n            name=\"test_mcp_tool\",\n            display_name=\"Test MCP Tool\",\n            description=\"Test MCP tool for PT_OAUTH\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\n                \"type\": \"object\",\n                \"properties\": {\n                    \"message\": {\"type\": \"string\", \"description\": \"Test message\"}\n                },\n            },\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        # Create persona with the MCP tool\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        # Construct tools\n        search_tool_config = SearchToolConfig()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=search_tool_config,\n        )\n\n        # Verify MCP tool was constructed\n        assert mcp_tool_db.id in tool_dict\n        constructed_tools = tool_dict[mcp_tool_db.id]\n        assert len(constructed_tools) == 1\n        mcp_tool = constructed_tools[0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Verify the user's OAuth token was passed to the MCPTool\n        assert mcp_tool._user_oauth_token == user_oauth_token\n\n    def test_pt_oauth_without_user_oauth_account(self, db_session: Session) -> None:\n        \"\"\"\n        Test PT_OAUTH behavior when user doesn't have an OAuth account.\n\n        The user logged in with basic auth (no OAuth token), so the MCP tool\n        should have no OAuth token to pass through.\n        \"\"\"\n        # Create user WITHOUT OAuth account (basic auth login)\n        user = create_test_user(db_session, \"basic_auth_user\")\n        # No OAuthAccount created\n\n        # Create MCP server with PT_OAUTH auth type\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"PT_OAUTH No Token Server {uuid4().hex[:8]}\",\n            description=\"MCP server for testing missing OAuth token\",\n            server_url=\"http://test-mcp-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.PT_OAUTH,\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        # Create MCP tool\n        mcp_tool_db = Tool(\n            name=\"test_mcp_tool_no_token\",\n            display_name=\"Test MCP Tool No Token\",\n            description=\"Test MCP tool without OAuth token\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\n                \"type\": \"object\",\n                \"properties\": {\"query\": {\"type\": \"string\"}},\n            },\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        # Create persona\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=SearchToolConfig(),\n        )\n\n        # Verify MCP tool was constructed\n        assert mcp_tool_db.id in tool_dict\n        constructed_tools = tool_dict[mcp_tool_db.id]\n        assert len(constructed_tools) == 1\n        mcp_tool = constructed_tools[0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Verify NO OAuth token was passed (user has no OAuth account)\n        assert mcp_tool._user_oauth_token is None\n\n    def test_pt_oauth_vs_api_token_auth(self, db_session: Session) -> None:\n        \"\"\"\n        Test that PT_OAUTH and API_TOKEN auth types behave differently.\n\n        PT_OAUTH should use the user's login token, while API_TOKEN should\n        NOT use the user's login token (it uses the connection config instead).\n        \"\"\"\n        # Create user with OAuth account\n        user = create_test_user(db_session, \"auth_type_test_user\")\n        user_oauth_token = \"user_login_token_xyz789\"\n\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"google\",\n            account_id=\"google_user_xyz\",\n            account_email=user.email,\n            access_token=user_oauth_token,\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        db_session.refresh(user)\n\n        # Create MCP server with API_TOKEN auth type (not PT_OAUTH)\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"API Token Server {uuid4().hex[:8]}\",\n            description=\"MCP server with API token auth\",\n            server_url=\"http://api-token-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.API_TOKEN,  # Not PT_OAUTH\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        # Create MCP tool\n        mcp_tool_db = Tool(\n            name=\"api_token_tool\",\n            display_name=\"API Token Tool\",\n            description=\"Tool with API token auth\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\n                \"type\": \"object\",\n                \"properties\": {\"data\": {\"type\": \"string\"}},\n            },\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        # Create persona\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=SearchToolConfig(),\n        )\n        # Verify MCP tool was constructed\n        assert mcp_tool_db.id in tool_dict\n        constructed_tools = tool_dict[mcp_tool_db.id]\n        assert len(constructed_tools) == 1\n        mcp_tool = constructed_tools[0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Verify the user's OAuth token was NOT passed (API_TOKEN auth type)\n        # API_TOKEN auth should use connection config, not user's login token\n        assert mcp_tool._user_oauth_token is None\n\n    def test_mcp_tool_run_sets_authorization_header_for_pt_oauth(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"\n        Test that MCPTool.run() correctly sets the Authorization header\n        when PT_OAUTH is configured.\n        \"\"\"\n        # Create user with OAuth token\n        user = create_test_user(db_session, \"pt_oauth_header_user\")\n        user_oauth_token = \"bearer_token_for_mcp_server\"\n\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"google\",\n            account_id=\"google_header_user\",\n            account_email=user.email,\n            access_token=user_oauth_token,\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        db_session.refresh(user)\n\n        # Create MCP server with PT_OAUTH\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"Header Test Server {uuid4().hex[:8]}\",\n            description=\"Server for testing Authorization header\",\n            server_url=\"http://header-test-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.PT_OAUTH,\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        # Create MCP tool\n        mcp_tool_db = Tool(\n            name=\"header_test_tool\",\n            display_name=\"Header Test Tool\",\n            description=\"Tool to test Authorization header\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\n                \"type\": \"object\",\n                \"properties\": {\"input\": {\"type\": \"string\"}},\n            },\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        # Create persona\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=SearchToolConfig(),\n        )\n\n        # Get the constructed MCPTool\n        mcp_tool = tool_dict[mcp_tool_db.id][0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Mock the call_mcp_tool function to capture the headers\n        captured_headers: dict[str, str] = {}\n\n        mocked_response = {\"result\": \"mocked_response\"}\n\n        def mock_call_mcp_tool(\n            server_url: str,  # noqa: ARG001\n            tool_name: str,  # noqa: ARG001\n            arguments: dict[str, Any],  # noqa: ARG001\n            connection_headers: dict[str, str],\n            transport: MCPTransport,  # noqa: ARG001\n            auth: Any = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            captured_headers.update(connection_headers)\n            return mocked_response\n\n        with patch(\n            \"onyx.tools.tool_implementations.mcp.mcp_tool.call_mcp_tool\",\n            side_effect=mock_call_mcp_tool,\n        ):\n            # Run the tool\n            response = mcp_tool.run(\n                placement=Placement(turn_index=0, tab_index=0),\n                override_kwargs=None,\n                input=\"test\",\n            )\n            print(response.rich_response)\n            assert isinstance(response.rich_response, CustomToolCallSummary)\n            print(response.rich_response.tool_result)\n            assert response.rich_response.tool_result[\"tool_result\"] == mocked_response\n\n        # Verify Authorization header was set with the user's OAuth token\n        assert \"Authorization\" in captured_headers\n        assert captured_headers[\"Authorization\"] == f\"Bearer {user_oauth_token}\"\n\n    def test_pt_oauth_works_with_oidc_provider(self, db_session: Session) -> None:\n        \"\"\"\n        Test that PT_OAUTH works correctly when user logged in via OIDC (not Google).\n\n        This is important because OIDC providers (Okta, Auth0, Keycloak, etc.)\n        use oauth_name='openid' while Google uses oauth_name='google'.\n        The PT_OAUTH code should work with any OAuth provider.\n        \"\"\"\n        # Create user with OIDC OAuth token (simulating Okta/Auth0/Keycloak login)\n        user = create_test_user(db_session, \"oidc_user\")\n        # Use a random test token (not a real JWT to avoid pre-commit false positives)\n        oidc_access_token = \"oidc_test_token_abc123_not_a_real_jwt_xyz789\"\n\n        # OIDC providers use oauth_name='openid' by default\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"openid\",  # This is the key difference from Google OAuth\n            account_id=\"oidc_user_sub_12345\",\n            account_email=user.email,\n            access_token=oidc_access_token,\n            refresh_token=\"oidc_refresh_token\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        db_session.refresh(user)\n\n        # Create MCP server with PT_OAUTH auth type\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"PT_OAUTH OIDC Server {uuid4().hex[:8]}\",\n            description=\"MCP server for OIDC pass-through OAuth testing\",\n            server_url=\"http://oidc-mcp-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.PT_OAUTH,\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        # Create MCP tool\n        mcp_tool_db = Tool(\n            name=\"oidc_mcp_tool\",\n            display_name=\"OIDC MCP Tool\",\n            description=\"Test MCP tool for OIDC PT_OAUTH\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\n                \"type\": \"object\",\n                \"properties\": {\"query\": {\"type\": \"string\"}},\n            },\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        # Create persona\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        # Construct tools\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=SearchToolConfig(),\n        )\n        # Verify MCP tool was constructed\n        assert mcp_tool_db.id in tool_dict\n        constructed_tools = tool_dict[mcp_tool_db.id]\n        assert len(constructed_tools) == 1\n        mcp_tool = constructed_tools[0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Verify the OIDC token was passed to the MCPTool\n        # (code should work identically for Google OAuth and OIDC)\n        assert mcp_tool._user_oauth_token == oidc_access_token\n\n    def test_pt_oauth_uses_first_oauth_account(self, db_session: Session) -> None:\n        \"\"\"\n        Test that PT_OAUTH uses the first OAuth account when user has multiple.\n\n        Users might have OAuth accounts from multiple providers (unlikely but possible).\n        The code should consistently use the first one.\n        \"\"\"\n        user = create_test_user(db_session, \"multi_oauth_user\")\n        first_token = \"first_oauth_token_123\"\n        second_token = \"second_oauth_token_456\"\n\n        # Add first OAuth account (Google)\n        oauth_account_1 = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"google\",\n            account_id=\"google_user_123\",\n            account_email=user.email,\n            access_token=first_token,\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account_1)\n        db_session.commit()\n\n        # Add second OAuth account (OIDC)\n        oauth_account_2 = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"openid\",\n            account_id=\"oidc_user_456\",\n            account_email=user.email,\n            access_token=second_token,\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account_2)\n        db_session.commit()\n        db_session.refresh(user)\n\n        # Create MCP server and tool\n        mcp_server = create_mcp_server__no_commit(\n            owner_email=user.email,\n            name=f\"Multi OAuth Server {uuid4().hex[:8]}\",\n            description=\"MCP server for multi-OAuth testing\",\n            server_url=\"http://multi-oauth-server.example.com/mcp\",\n            auth_type=MCPAuthenticationType.PT_OAUTH,\n            transport=MCPTransport.STREAMABLE_HTTP,\n            auth_performer=MCPAuthenticationPerformer.ADMIN,\n            db_session=db_session,\n        )\n        db_session.commit()\n\n        mcp_tool_db = Tool(\n            name=\"multi_oauth_tool\",\n            display_name=\"Multi OAuth Tool\",\n            description=\"Test tool\",\n            mcp_server_id=mcp_server.id,\n            mcp_input_schema={\"type\": \"object\", \"properties\": {}},\n            user_id=user.id,\n        )\n        db_session.add(mcp_tool_db)\n        db_session.commit()\n        db_session.refresh(mcp_tool_db)\n\n        persona = _create_test_persona_with_mcp_tool(db_session, user, [mcp_tool_db])\n        llm = get_default_llm()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=SearchToolConfig(),\n        )\n\n        mcp_tool = tool_dict[mcp_tool_db.id][0]\n        assert isinstance(mcp_tool, MCPTool)\n\n        # Should use the first OAuth account's token\n        assert mcp_tool._user_oauth_token == first_token\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_memory_tool_integration.py",
    "content": "\"\"\"Tests for MemoryTool integration: registration, construction, and DB persistence.\"\"\"\n\nimport pytest\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.memory import add_memory\nfrom onyx.db.memory import get_memories\nfrom onyx.db.memory import MAX_MEMORIES_PER_USER\nfrom onyx.db.memory import update_memory_at_index\nfrom onyx.db.models import Memory\nfrom onyx.db.models import User\nfrom onyx.tools.tool_implementations.memory.models import MemoryToolResponse\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\n@pytest.fixture()\ndef test_user(db_session: Session):  # type: ignore\n    \"\"\"Create a test user with use_memories enabled.\"\"\"\n    user = create_test_user(db_session, \"memory_test\")\n    user.use_memories = True\n    db_session.commit()\n    db_session.refresh(user)\n    return user\n\n\n@pytest.fixture()\ndef test_user_no_memories(db_session: Session):  # type: ignore\n    \"\"\"Create a test user with use_memories disabled.\"\"\"\n    user = create_test_user(db_session, \"memory_test_off\")\n    user.use_memories = False\n    db_session.commit()\n    db_session.refresh(user)\n    return user\n\n\nclass TestAddMemory:\n    def test_add_memory_creates_row(self, db_session: Session, test_user: User) -> None:\n        \"\"\"Verify that add_memory inserts a new Memory row.\"\"\"\n        user_id = test_user.id\n        memory = add_memory(\n            user_id=user_id,\n            memory_text=\"User prefers dark mode\",\n            db_session=db_session,\n        )\n\n        assert memory.id is not None\n        assert memory.user_id == user_id\n        assert memory.memory_text == \"User prefers dark mode\"\n\n        # Verify it persists\n        fetched = db_session.get(Memory, memory.id)\n        assert fetched is not None\n        assert fetched.memory_text == \"User prefers dark mode\"\n\n    def test_add_multiple_memories(self, db_session: Session, test_user: User) -> None:\n        \"\"\"Verify that multiple memories can be added for the same user.\"\"\"\n        user_id = test_user.id\n        m1 = add_memory(\n            user_id=user_id,\n            memory_text=\"Favorite color is blue\",\n            db_session=db_session,\n        )\n        m2 = add_memory(\n            user_id=user_id,\n            memory_text=\"Works in engineering\",\n            db_session=db_session,\n        )\n\n        assert m1.id != m2.id\n        assert m1.memory_text == \"Favorite color is blue\"\n        assert m2.memory_text == \"Works in engineering\"\n\n\nclass TestUpdateMemoryAtIndex:\n    def test_update_memory_at_valid_index(\n        self, db_session: Session, test_user: User\n    ) -> None:\n        \"\"\"Verify that update_memory_at_index updates the correct row.\"\"\"\n        user_id = test_user.id\n        add_memory(user_id=user_id, memory_text=\"Memory 0\", db_session=db_session)\n        add_memory(user_id=user_id, memory_text=\"Memory 1\", db_session=db_session)\n        add_memory(user_id=user_id, memory_text=\"Memory 2\", db_session=db_session)\n\n        updated = update_memory_at_index(\n            user_id=user_id,\n            index=1,\n            new_text=\"Updated Memory 1\",\n            db_session=db_session,\n        )\n\n        assert updated is not None\n        assert updated.memory_text == \"Updated Memory 1\"\n\n    def test_update_memory_at_out_of_range_index(\n        self, db_session: Session, test_user: User\n    ) -> None:\n        \"\"\"Verify that out-of-range index returns None.\"\"\"\n        user_id = test_user.id\n        add_memory(user_id=user_id, memory_text=\"Only memory\", db_session=db_session)\n\n        result = update_memory_at_index(\n            user_id=user_id,\n            index=5,\n            new_text=\"Should not update\",\n            db_session=db_session,\n        )\n\n        assert result is None\n\n    def test_update_memory_at_negative_index(\n        self, db_session: Session, test_user: User\n    ) -> None:\n        \"\"\"Verify that negative index returns None.\"\"\"\n        user_id = test_user.id\n        add_memory(user_id=user_id, memory_text=\"Only memory\", db_session=db_session)\n\n        result = update_memory_at_index(\n            user_id=user_id,\n            index=-1,\n            new_text=\"Should not update\",\n            db_session=db_session,\n        )\n\n        assert result is None\n\n\nclass TestMemoryToolResponse:\n    def test_response_with_add(self) -> None:\n        \"\"\"Verify MemoryToolResponse correctly carries add (index_to_replace=None).\"\"\"\n        response = MemoryToolResponse(\n            memory_text=\"User likes Python\",\n            index_to_replace=None,\n        )\n        assert response.memory_text == \"User likes Python\"\n        assert response.index_to_replace is None\n\n    def test_response_with_update(self) -> None:\n        \"\"\"Verify MemoryToolResponse correctly carries update (index_to_replace=int).\"\"\"\n        response = MemoryToolResponse(\n            memory_text=\"User likes TypeScript\",\n            index_to_replace=2,\n        )\n        assert response.memory_text == \"User likes TypeScript\"\n        assert response.index_to_replace == 2\n\n\nclass TestMemoryCap:\n    def test_add_memory_evicts_oldest_when_at_cap(\n        self, db_session: Session, test_user: User\n    ) -> None:\n        \"\"\"When the user has MAX_MEMORIES_PER_USER memories, adding a new one\n        should delete the oldest (lowest id) and keep the total at the cap.\"\"\"\n        user_id = test_user.id\n\n        # Fill up to the cap\n        for i in range(MAX_MEMORIES_PER_USER):\n            add_memory(\n                user_id=user_id,\n                memory_text=f\"Memory {i}\",\n                db_session=db_session,\n            )\n\n        rows_before = db_session.scalars(\n            Memory.__table__.select().where(Memory.user_id == user_id)\n        ).all()\n        assert len(rows_before) == MAX_MEMORIES_PER_USER\n\n        # Add one more — should evict the oldest\n        new_memory = add_memory(\n            user_id=user_id,\n            memory_text=\"New memory after cap\",\n            db_session=db_session,\n        )\n\n        rows_after = db_session.scalars(\n            select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())\n        ).all()\n\n        assert len(rows_after) == MAX_MEMORIES_PER_USER\n        # Oldest (\"Memory 0\") should be gone; \"Memory 1\" is now the oldest\n        assert rows_after[0].memory_text == \"Memory 1\"\n        # Newest should be the one we just added\n        assert rows_after[-1].id == new_memory.id\n        assert rows_after[-1].memory_text == \"New memory after cap\"\n\n\nclass TestGetMemoriesWithUserId:\n    def test_get_memories_populates_user_id(\n        self, db_session: Session, test_user: User\n    ) -> None:\n        \"\"\"Verify that get_memories populates user_id on the returned context.\"\"\"\n        context = get_memories(test_user, db_session)\n        assert context.user_id == test_user.id\n\n    def test_get_memories_disabled_still_populates_user_id(\n        self, db_session: Session, test_user_no_memories: User\n    ) -> None:\n        \"\"\"Verify that get_memories with use_memories=False still returns a\n        fully populated context (user_id, user_info, memories). The\n        use_memories flag only controls whether memories are injected into\n        the system prompt, not whether the context is fetched.\"\"\"\n        # Add a memory for this user so we can verify it's fetched\n        add_memory(\n            user_id=test_user_no_memories.id,\n            memory_text=\"Should still be fetched\",\n            db_session=db_session,\n        )\n\n        context = get_memories(test_user_no_memories, db_session)\n        assert context.user_id == test_user_no_memories.id\n        assert context.user_info.email == test_user_no_memories.email\n        assert len(context.memories) == 1\n        assert context.memories[0] == \"Should still be fetched\"\n\n    def test_get_memories_disabled_persistence_works(\n        self, db_session: Session, test_user_no_memories: User\n    ) -> None:\n        \"\"\"Verify that add_memory and update_memory_at_index work correctly\n        when use_memories=False, since the memory tool should still persist.\"\"\"\n        user_id = test_user_no_memories.id\n\n        # Add a memory\n        memory = add_memory(\n            user_id=user_id,\n            memory_text=\"Memory with use_memories off\",\n            db_session=db_session,\n        )\n        assert memory.memory_text == \"Memory with use_memories off\"\n\n        # Update that memory\n        updated = update_memory_at_index(\n            user_id=user_id,\n            index=0,\n            new_text=\"Updated memory with use_memories off\",\n            db_session=db_session,\n        )\n        assert updated is not None\n        assert updated.memory_text == \"Updated memory with use_memories off\"\n\n        # Verify get_memories returns the updated memory\n        context = get_memories(test_user_no_memories, db_session)\n        assert len(context.memories) == 1\n        assert context.memories[0] == \"Updated memory with use_memories off\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_oauth_config_crud.py",
    "content": "\"\"\"\nTest suite for OAuth Config CRUD operations.\n\nTests the basic CRUD operations for OAuth configurations and user tokens,\nincluding creation, retrieval, updates, deletion, and token management.\n\"\"\"\n\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import Tool\nfrom onyx.db.oauth_config import create_oauth_config\nfrom onyx.db.oauth_config import delete_oauth_config\nfrom onyx.db.oauth_config import delete_user_oauth_token\nfrom onyx.db.oauth_config import get_oauth_config\nfrom onyx.db.oauth_config import get_oauth_configs\nfrom onyx.db.oauth_config import get_tools_by_oauth_config\nfrom onyx.db.oauth_config import get_user_oauth_token\nfrom onyx.db.oauth_config import update_oauth_config\nfrom onyx.db.oauth_config import upsert_user_oauth_token\nfrom onyx.db.tools import delete_tool__no_commit\nfrom onyx.db.tools import update_tool\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef _create_test_oauth_config(\n    db_session: Session,\n    name: str | None = None,\n) -> OAuthConfig:\n    \"\"\"Helper to create a test OAuth config with unique name\"\"\"\n    unique_name = name or f\"Test OAuth Config {uuid4().hex[:8]}\"\n    return create_oauth_config(\n        name=unique_name,\n        authorization_url=\"https://github.com/login/oauth/authorize\",\n        token_url=\"https://github.com/login/oauth/access_token\",\n        client_id=\"test_client_id\",\n        client_secret=\"test_client_secret\",\n        scopes=[\"repo\", \"user\"],\n        additional_params={\"test_param\": \"test_value\"},\n        db_session=db_session,\n    )\n\n\ndef _create_test_tool_with_oauth(\n    db_session: Session, oauth_config: OAuthConfig\n) -> Tool:\n    \"\"\"Helper to create a test tool with OAuth config\"\"\"\n    user = create_test_user(db_session, \"tool_owner\")\n    tool = Tool(\n        name=\"Test Tool\",\n        description=\"Test tool with OAuth\",\n        openapi_schema={\"openapi\": \"3.0.0\"},\n        user_id=user.id,\n        oauth_config_id=oauth_config.id,\n    )\n    db_session.add(tool)\n    db_session.commit()\n    db_session.refresh(tool)\n    return tool\n\n\nclass TestOAuthConfigCRUD:\n    \"\"\"Tests for OAuth configuration CRUD operations\"\"\"\n\n    def test_create_oauth_config(self, db_session: Session) -> None:\n        \"\"\"Test creating a new OAuth configuration\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n\n        assert oauth_config.id is not None\n        assert oauth_config.name.startswith(\"Test OAuth Config\")\n        assert (\n            oauth_config.authorization_url == \"https://github.com/login/oauth/authorize\"\n        )\n        assert oauth_config.token_url == \"https://github.com/login/oauth/access_token\"\n        assert oauth_config.scopes == [\"repo\", \"user\"]\n        assert oauth_config.additional_params == {\"test_param\": \"test_value\"}\n        assert oauth_config.created_at is not None\n        assert oauth_config.updated_at is not None\n\n        # Verify encrypted fields are stored (we can't decrypt in tests, but we can check they exist)\n        assert oauth_config.client_id is not None\n        assert oauth_config.client_secret is not None\n\n    def test_get_oauth_config(self, db_session: Session) -> None:\n        \"\"\"Test retrieving an OAuth config by ID\"\"\"\n        created_config = _create_test_oauth_config(db_session)\n\n        retrieved_config = get_oauth_config(created_config.id, db_session)\n\n        assert retrieved_config is not None\n        assert retrieved_config.id == created_config.id\n        assert retrieved_config.name == created_config.name\n\n    def test_get_oauth_config_not_found(self, db_session: Session) -> None:\n        \"\"\"Test retrieving a non-existent OAuth config returns None\"\"\"\n        config = get_oauth_config(99999, db_session)\n        assert config is None\n\n    def test_get_oauth_configs(self, db_session: Session) -> None:\n        \"\"\"Test retrieving all OAuth configurations\"\"\"\n        # Create multiple configs with unique names\n        config1 = _create_test_oauth_config(db_session)\n        config2 = _create_test_oauth_config(db_session)\n\n        configs = get_oauth_configs(db_session)\n\n        assert len(configs) >= 2\n        config_ids = [c.id for c in configs]\n        assert config1.id in config_ids\n        assert config2.id in config_ids\n\n    def test_update_oauth_config(self, db_session: Session) -> None:\n        \"\"\"Test updating an OAuth configuration\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        original_name = oauth_config.name\n\n        # Update the config with unique name\n        new_name = f\"Updated GitHub OAuth {uuid4().hex[:8]}\"\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            name=new_name,\n            scopes=[\"repo\", \"user\", \"admin\"],\n        )\n\n        assert updated_config.id == oauth_config.id\n        assert updated_config.name == new_name\n        assert updated_config.name != original_name\n        assert updated_config.scopes == [\"repo\", \"user\", \"admin\"]\n\n    def test_update_oauth_config_preserves_secrets(self, db_session: Session) -> None:\n        \"\"\"Test that updating config without providing secrets preserves existing values\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        original_client_id = oauth_config.client_id\n        original_client_secret = oauth_config.client_secret\n\n        # Update config without providing client_id or client_secret\n        new_name = f\"Updated Name {uuid4().hex[:8]}\"\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            name=new_name,\n            client_id=None,\n            client_secret=None,\n        )\n\n        # Secrets should be preserved\n        assert updated_config.client_id is not None\n        assert original_client_id is not None\n        assert updated_config.client_id.get_value(\n            apply_mask=False\n        ) == original_client_id.get_value(apply_mask=False)\n        assert updated_config.client_secret is not None\n        assert original_client_secret is not None\n        assert updated_config.client_secret.get_value(\n            apply_mask=False\n        ) == original_client_secret.get_value(apply_mask=False)\n        # But name should be updated\n        assert updated_config.name == new_name\n\n    def test_update_oauth_config_not_found(self, db_session: Session) -> None:\n        \"\"\"Test updating a non-existent OAuth config raises error\"\"\"\n        with pytest.raises(\n            ValueError, match=\"OAuth config with id 99999 does not exist\"\n        ):\n            update_oauth_config(99999, db_session, name=\"New Name\")\n\n    def test_update_oauth_config_clear_client_id(self, db_session: Session) -> None:\n        \"\"\"Test clearing client_id while preserving client_secret\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        original_client_secret = oauth_config.client_secret\n\n        # Clear client_id\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            clear_client_id=True,\n        )\n\n        # client_id should be cleared (empty string)\n        assert updated_config.client_id is not None\n        assert updated_config.client_id.get_value(apply_mask=False) == \"\"\n        # client_secret should be preserved\n        assert updated_config.client_secret is not None\n        assert original_client_secret is not None\n        assert updated_config.client_secret.get_value(\n            apply_mask=False\n        ) == original_client_secret.get_value(apply_mask=False)\n\n    def test_update_oauth_config_clear_client_secret(self, db_session: Session) -> None:\n        \"\"\"Test clearing client_secret while preserving client_id\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        original_client_id = oauth_config.client_id\n\n        # Clear client_secret\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            clear_client_secret=True,\n        )\n\n        # client_secret should be cleared (empty string)\n        assert updated_config.client_secret is not None\n        assert updated_config.client_secret.get_value(apply_mask=False) == \"\"\n        # client_id should be preserved\n        assert updated_config.client_id is not None\n        assert original_client_id is not None\n        assert updated_config.client_id.get_value(\n            apply_mask=False\n        ) == original_client_id.get_value(apply_mask=False)\n\n    def test_update_oauth_config_clear_both_secrets(self, db_session: Session) -> None:\n        \"\"\"Test clearing both client_id and client_secret\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n\n        # Clear both secrets\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            clear_client_id=True,\n            clear_client_secret=True,\n        )\n\n        # Both should be cleared (empty strings)\n        assert updated_config.client_id is not None\n        assert updated_config.client_id.get_value(apply_mask=False) == \"\"\n        assert updated_config.client_secret is not None\n        assert updated_config.client_secret.get_value(apply_mask=False) == \"\"\n\n    def test_update_oauth_config_authorization_url(self, db_session: Session) -> None:\n        \"\"\"Test updating authorization_url\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        new_auth_url = \"https://example.com/oauth/authorize\"\n\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            authorization_url=new_auth_url,\n        )\n\n        assert updated_config.authorization_url == new_auth_url\n\n    def test_update_oauth_config_token_url(self, db_session: Session) -> None:\n        \"\"\"Test updating token_url\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        new_token_url = \"https://example.com/oauth/token\"\n\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            token_url=new_token_url,\n        )\n\n        assert updated_config.token_url == new_token_url\n\n    def test_update_oauth_config_additional_params(self, db_session: Session) -> None:\n        \"\"\"Test updating additional_params\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        new_params = {\"access_type\": \"offline\", \"prompt\": \"consent\"}\n\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            additional_params=new_params,\n        )\n\n        assert updated_config.additional_params == new_params\n\n    def test_update_oauth_config_multiple_fields(self, db_session: Session) -> None:\n        \"\"\"Test updating multiple fields at once\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        new_name = f\"Updated Config {uuid4().hex[:8]}\"\n        new_auth_url = \"https://example.com/oauth/authorize\"\n        new_token_url = \"https://example.com/oauth/token\"\n        new_scopes = [\"read\", \"write\", \"admin\"]\n        new_params = {\"access_type\": \"offline\"}\n        new_client_id = \"new_client_id\"\n\n        updated_config = update_oauth_config(\n            oauth_config.id,\n            db_session,\n            name=new_name,\n            authorization_url=new_auth_url,\n            token_url=new_token_url,\n            scopes=new_scopes,\n            additional_params=new_params,\n            client_id=new_client_id,\n        )\n\n        assert updated_config.name == new_name\n        assert updated_config.authorization_url == new_auth_url\n        assert updated_config.token_url == new_token_url\n        assert updated_config.scopes == new_scopes\n        assert updated_config.additional_params == new_params\n        assert updated_config.client_id is not None\n        assert updated_config.client_id.get_value(apply_mask=False) == new_client_id\n\n    def test_delete_oauth_config(self, db_session: Session) -> None:\n        \"\"\"Test deleting an OAuth configuration\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        config_id = oauth_config.id\n\n        # Delete the config\n        delete_oauth_config(config_id, db_session)\n\n        # Verify it's deleted\n        deleted_config = get_oauth_config(config_id, db_session)\n        assert deleted_config is None\n\n    def test_delete_oauth_config_not_found(self, db_session: Session) -> None:\n        \"\"\"Test deleting a non-existent OAuth config raises error\"\"\"\n        with pytest.raises(\n            ValueError, match=\"OAuth config with id 99999 does not exist\"\n        ):\n            delete_oauth_config(99999, db_session)\n\n    def test_delete_oauth_config_sets_tool_reference_to_null(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that deleting OAuth config sets tool's oauth_config_id to NULL\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        tool = _create_test_tool_with_oauth(db_session, oauth_config)\n\n        assert tool.oauth_config_id == oauth_config.id\n\n        # Delete the OAuth config\n        delete_oauth_config(oauth_config.id, db_session)\n\n        # Refresh tool from database\n        db_session.refresh(tool)\n\n        # Tool should still exist but oauth_config_id should be NULL\n        assert tool.oauth_config_id is None\n\n    def test_update_tool_cleans_up_orphaned_oauth_config(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that changing a tool's oauth_config_id deletes the old config if no other tool uses it.\"\"\"\n        old_config = _create_test_oauth_config(db_session)\n        new_config = _create_test_oauth_config(db_session)\n        tool = _create_test_tool_with_oauth(db_session, old_config)\n        old_config_id = old_config.id\n\n        update_tool(\n            tool_id=tool.id,\n            name=None,\n            description=None,\n            openapi_schema=None,\n            custom_headers=None,\n            user_id=None,\n            db_session=db_session,\n            passthrough_auth=None,\n            oauth_config_id=new_config.id,\n        )\n\n        assert tool.oauth_config_id == new_config.id\n        assert get_oauth_config(old_config_id, db_session) is None\n\n    def test_delete_tool_cleans_up_orphaned_oauth_config(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that deleting the last tool referencing an OAuthConfig also deletes the config.\"\"\"\n        config = _create_test_oauth_config(db_session)\n        tool = _create_test_tool_with_oauth(db_session, config)\n        config_id = config.id\n\n        delete_tool__no_commit(tool.id, db_session)\n        db_session.commit()\n\n        assert get_oauth_config(config_id, db_session) is None\n\n    def test_update_tool_preserves_shared_oauth_config(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that updating one tool's oauth_config_id preserves the config when another tool still uses it.\"\"\"\n        shared_config = _create_test_oauth_config(db_session)\n        new_config = _create_test_oauth_config(db_session)\n        tool_a = _create_test_tool_with_oauth(db_session, shared_config)\n        tool_b = _create_test_tool_with_oauth(db_session, shared_config)\n        shared_config_id = shared_config.id\n\n        # Move tool_a to a new config; tool_b still references shared_config\n        update_tool(\n            tool_id=tool_a.id,\n            name=None,\n            description=None,\n            openapi_schema=None,\n            custom_headers=None,\n            user_id=None,\n            db_session=db_session,\n            passthrough_auth=None,\n            oauth_config_id=new_config.id,\n        )\n\n        assert tool_a.oauth_config_id == new_config.id\n        assert tool_b.oauth_config_id == shared_config_id\n        assert get_oauth_config(shared_config_id, db_session) is not None\n\n    def test_delete_tool_preserves_shared_oauth_config(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that deleting one tool preserves the config when another tool still uses it.\"\"\"\n        shared_config = _create_test_oauth_config(db_session)\n        tool_a = _create_test_tool_with_oauth(db_session, shared_config)\n        tool_b = _create_test_tool_with_oauth(db_session, shared_config)\n        shared_config_id = shared_config.id\n\n        delete_tool__no_commit(tool_a.id, db_session)\n        db_session.commit()\n\n        assert tool_b.oauth_config_id == shared_config_id\n        assert get_oauth_config(shared_config_id, db_session) is not None\n\n\nclass TestOAuthUserTokenCRUD:\n    \"\"\"Tests for OAuth user token CRUD operations\"\"\"\n\n    def test_upsert_user_oauth_token_create(self, db_session: Session) -> None:\n        \"\"\"Test creating a new user OAuth token\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        token_data = {\n            \"access_token\": \"test_access_token\",\n            \"refresh_token\": \"test_refresh_token\",\n            \"token_type\": \"Bearer\",\n            \"expires_at\": 1234567890,\n        }\n\n        user_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data, db_session\n        )\n\n        assert user_token.id is not None\n        assert user_token.oauth_config_id == oauth_config.id\n        assert user_token.user_id == user.id\n        assert user_token.token_data is not None\n        assert user_token.token_data.get_value(apply_mask=False) == token_data\n        assert user_token.created_at is not None\n        assert user_token.updated_at is not None\n\n    def test_upsert_user_oauth_token_update(self, db_session: Session) -> None:\n        \"\"\"Test updating an existing user OAuth token\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create initial token\n        initial_token_data = {\n            \"access_token\": \"initial_token\",\n            \"expires_at\": 1234567890,\n        }\n        initial_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, initial_token_data, db_session\n        )\n        initial_token_id = initial_token.id\n\n        # Update with new token data\n        updated_token_data = {\n            \"access_token\": \"updated_token\",\n            \"expires_at\": 9876543210,\n        }\n        updated_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, updated_token_data, db_session\n        )\n\n        # Should be the same token record (updated, not inserted)\n        assert updated_token.id == initial_token_id\n        assert updated_token.token_data is not None\n        assert (\n            updated_token.token_data.get_value(apply_mask=False) == updated_token_data\n        )\n        assert (\n            updated_token.token_data.get_value(apply_mask=False) != initial_token_data\n        )\n\n    def test_get_user_oauth_token(self, db_session: Session) -> None:\n        \"\"\"Test retrieving a user's OAuth token\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        token_data = {\"access_token\": \"test_token\"}\n        created_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data, db_session\n        )\n\n        retrieved_token = get_user_oauth_token(oauth_config.id, user.id, db_session)\n\n        assert retrieved_token is not None\n        assert retrieved_token.id == created_token.id\n        assert retrieved_token.token_data is not None\n        assert retrieved_token.token_data.get_value(apply_mask=False) == token_data\n\n    def test_get_user_oauth_token_not_found(self, db_session: Session) -> None:\n        \"\"\"Test retrieving a non-existent user token returns None\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        token = get_user_oauth_token(oauth_config.id, user.id, db_session)\n        assert token is None\n\n    def test_delete_user_oauth_token(self, db_session: Session) -> None:\n        \"\"\"Test deleting a user's OAuth token\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        token_data = {\"access_token\": \"test_token\"}\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Delete the token\n        delete_user_oauth_token(oauth_config.id, user.id, db_session)\n\n        # Verify it's deleted\n        deleted_token = get_user_oauth_token(oauth_config.id, user.id, db_session)\n        assert deleted_token is None\n\n    def test_delete_user_oauth_token_not_found(self, db_session: Session) -> None:\n        \"\"\"Test deleting a non-existent user token raises error\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        with pytest.raises(\n            ValueError,\n            match=f\"OAuth token for user {user.id} and config {oauth_config.id} does not exist\",\n        ):\n            delete_user_oauth_token(oauth_config.id, user.id, db_session)\n\n    def test_unique_constraint_on_user_config(self, db_session: Session) -> None:\n        \"\"\"Test that unique constraint prevents duplicate tokens per user per config\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create first token\n        token_data1 = {\"access_token\": \"token1\"}\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data1, db_session)\n\n        # Try to manually insert a duplicate (should fail at DB level)\n        # But upsert should work fine (updates instead of inserting)\n        token_data2 = {\"access_token\": \"token2\"}\n        updated_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data2, db_session\n        )\n\n        # Should only be one token\n        retrieved_token = get_user_oauth_token(oauth_config.id, user.id, db_session)\n        assert retrieved_token is not None\n        assert retrieved_token.id == updated_token.id\n        assert retrieved_token.token_data is not None\n        assert retrieved_token.token_data.get_value(apply_mask=False) == token_data2\n\n    def test_cascade_delete_user_tokens_on_config_deletion(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that deleting OAuth config cascades to user tokens\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user1 = create_test_user(db_session, \"user1\")\n        user2 = create_test_user(db_session, \"user2\")\n\n        # Create tokens for both users\n        upsert_user_oauth_token(\n            oauth_config.id, user1.id, {\"access_token\": \"token1\"}, db_session\n        )\n        upsert_user_oauth_token(\n            oauth_config.id, user2.id, {\"access_token\": \"token2\"}, db_session\n        )\n\n        # Delete the OAuth config\n        delete_oauth_config(oauth_config.id, db_session)\n\n        # User tokens should be deleted\n        token1 = get_user_oauth_token(oauth_config.id, user1.id, db_session)\n        token2 = get_user_oauth_token(oauth_config.id, user2.id, db_session)\n        assert token1 is None\n        assert token2 is None\n\n\nclass TestOAuthHelperOperations:\n    \"\"\"Tests for OAuth helper operations\"\"\"\n\n    def test_get_tools_by_oauth_config(self, db_session: Session) -> None:\n        \"\"\"Test retrieving tools that use a specific OAuth config\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n\n        # Create multiple tools using this config\n        tool1 = _create_test_tool_with_oauth(db_session, oauth_config)\n        tool2 = _create_test_tool_with_oauth(db_session, oauth_config)\n\n        # Create another tool without OAuth\n        user = create_test_user(db_session, \"other_user\")\n        tool3 = Tool(\n            name=\"Tool without OAuth\",\n            description=\"No OAuth config\",\n            openapi_schema={\"openapi\": \"3.0.0\"},\n            user_id=user.id,\n        )\n        db_session.add(tool3)\n        db_session.commit()\n\n        # Get tools by OAuth config\n        tools = get_tools_by_oauth_config(oauth_config.id, db_session)\n\n        assert len(tools) == 2\n        tool_ids = [t.id for t in tools]\n        assert tool1.id in tool_ids\n        assert tool2.id in tool_ids\n        assert tool3.id not in tool_ids\n\n    def test_get_tools_by_oauth_config_empty(self, db_session: Session) -> None:\n        \"\"\"Test retrieving tools for config with no associated tools\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n\n        tools = get_tools_by_oauth_config(oauth_config.id, db_session)\n\n        assert len(tools) == 0\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_oauth_token_manager.py",
    "content": "\"\"\"\nTest suite for OAuthTokenManager.\n\nTests the OAuth token management functionality including token validation,\nrefresh, expiration checking, and authorization URL building.\nAll HTTP requests to external OAuth providers are mocked.\n\"\"\"\n\nimport time\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom requests import HTTPError\nfrom requests import Response\nfrom sqlalchemy.orm import Session\n\nfrom onyx.auth.oauth_token_manager import OAuthTokenManager\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.oauth_config import create_oauth_config\nfrom onyx.db.oauth_config import upsert_user_oauth_token\nfrom onyx.utils.sensitive import SensitiveValue\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\ndef _create_test_oauth_config(db_session: Session) -> OAuthConfig:\n    \"\"\"Helper to create a test OAuth config\"\"\"\n    return create_oauth_config(\n        name=f\"Test OAuth Config {uuid4().hex[:8]}\",\n        authorization_url=\"https://github.com/login/oauth/authorize\",\n        token_url=\"https://github.com/login/oauth/access_token\",\n        client_id=\"test_client_id\",\n        client_secret=\"test_client_secret\",\n        scopes=[\"repo\", \"user\"],\n        additional_params=None,\n        db_session=db_session,\n    )\n\n\nclass TestOAuthTokenManagerValidation:\n    \"\"\"Tests for token validation and retrieval\"\"\"\n\n    def test_get_valid_access_token_with_valid_token(self, db_session: Session) -> None:\n        \"\"\"Test getting a valid access token that hasn't expired\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create a non-expired token\n        future_timestamp = int(time.time()) + 3600  # Expires in 1 hour\n        token_data = {\n            \"access_token\": \"valid_token\",\n            \"refresh_token\": \"refresh_token\",\n            \"expires_at\": future_timestamp,\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Get the token\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token == \"valid_token\"\n\n    def test_get_valid_access_token_no_token_exists(self, db_session: Session) -> None:\n        \"\"\"Test getting access token when no token exists returns None\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token is None\n\n    def test_get_valid_access_token_no_expiration(self, db_session: Session) -> None:\n        \"\"\"Test getting access token without expiration data (assumes valid)\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create token without expiration\n        token_data = {\n            \"access_token\": \"token_without_expiry\",\n            \"token_type\": \"Bearer\",\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token == \"token_without_expiry\"\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_get_valid_access_token_with_expired_token_refreshes(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test that expired token triggers automatic refresh\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create an expired token\n        past_timestamp = int(time.time()) - 100  # Expired 100 seconds ago\n        token_data = {\n            \"access_token\": \"expired_token\",\n            \"refresh_token\": \"refresh_token\",\n            \"expires_at\": past_timestamp,\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Mock the refresh token response\n        mock_response = Mock(spec=Response)\n        mock_response.json.return_value = {\n            \"access_token\": \"new_access_token\",\n            \"refresh_token\": \"new_refresh_token\",\n            \"expires_in\": 3600,\n        }\n        mock_response.raise_for_status = Mock()\n        mock_post.return_value = mock_response\n\n        # Get the token (should trigger refresh)\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token == \"new_access_token\"\n        # Verify refresh endpoint was called\n        mock_post.assert_called_once()\n        call_args = mock_post.call_args\n        assert call_args[0][0] == oauth_config.token_url\n        assert call_args[1][\"data\"][\"grant_type\"] == \"refresh_token\"\n        assert call_args[1][\"data\"][\"refresh_token\"] == \"refresh_token\"\n\n    def test_get_valid_access_token_expired_no_refresh_token(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test that expired token without refresh_token returns None\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create an expired token without refresh_token\n        past_timestamp = int(time.time()) - 100\n        token_data = {\n            \"access_token\": \"expired_token\",\n            \"expires_at\": past_timestamp,\n            # No refresh_token\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token is None\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_get_valid_access_token_refresh_fails(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test that failed refresh returns None\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create an expired token\n        past_timestamp = int(time.time()) - 100\n        token_data = {\n            \"access_token\": \"expired_token\",\n            \"refresh_token\": \"refresh_token\",\n            \"expires_at\": past_timestamp,\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Mock the refresh to fail\n        mock_post.side_effect = HTTPError(\"Token refresh failed\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        access_token = manager.get_valid_access_token()\n\n        assert access_token is None\n\n\nclass TestOAuthTokenManagerRefresh:\n    \"\"\"Tests for token refresh functionality\"\"\"\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_refresh_token_success(self, mock_post: Mock, db_session: Session) -> None:\n        \"\"\"Test successful token refresh\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create initial token\n        token_data = {\n            \"access_token\": \"old_token\",\n            \"refresh_token\": \"old_refresh\",\n            \"expires_at\": int(time.time()) - 100,\n        }\n        user_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data, db_session\n        )\n\n        # Mock successful refresh\n        new_expires_in = 3600\n        mock_response = Mock(spec=Response)\n        mock_response.json.return_value = {\n            \"access_token\": \"new_token\",\n            \"refresh_token\": \"new_refresh\",\n            \"expires_in\": new_expires_in,\n        }\n        mock_response.raise_for_status = Mock()\n        mock_post.return_value = mock_response\n\n        # Refresh the token\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        new_access_token = manager.refresh_token(user_token)\n\n        assert new_access_token == \"new_token\"\n\n        # Verify token was updated in DB\n        db_session.refresh(user_token)\n        assert user_token.token_data is not None\n        token_data = user_token.token_data.get_value(apply_mask=False)\n        assert token_data[\"access_token\"] == \"new_token\"\n        assert token_data[\"refresh_token\"] == \"new_refresh\"\n        assert \"expires_at\" in token_data\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_refresh_token_preserves_refresh_token(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test that refresh preserves old refresh_token if provider doesn't return new one\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create initial token\n        token_data = {\n            \"access_token\": \"old_token\",\n            \"refresh_token\": \"old_refresh\",\n            \"expires_at\": int(time.time()) - 100,\n        }\n        user_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data, db_session\n        )\n\n        # Mock refresh response WITHOUT refresh_token\n        mock_response = Mock(spec=Response)\n        mock_response.json.return_value = {\n            \"access_token\": \"new_token\",\n            \"expires_in\": 3600,\n            # No refresh_token returned\n        }\n        mock_response.raise_for_status = Mock()\n        mock_post.return_value = mock_response\n\n        # Refresh the token\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        manager.refresh_token(user_token)\n\n        # Verify old refresh_token was preserved\n        db_session.refresh(user_token)\n        assert user_token.token_data is not None\n        token_data = user_token.token_data.get_value(apply_mask=False)\n        assert token_data[\"refresh_token\"] == \"old_refresh\"\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_refresh_token_http_error(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test that HTTP error during refresh is raised\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        token_data = {\n            \"access_token\": \"old_token\",\n            \"refresh_token\": \"old_refresh\",\n            \"expires_at\": int(time.time()) - 100,\n        }\n        user_token = upsert_user_oauth_token(\n            oauth_config.id, user.id, token_data, db_session\n        )\n\n        # Mock HTTP error\n        mock_response = Mock(spec=Response)\n        mock_response.raise_for_status.side_effect = HTTPError(\"Invalid refresh token\")\n        mock_post.return_value = mock_response\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        with pytest.raises(HTTPError):\n            manager.refresh_token(user_token)\n\n\nclass TestOAuthTokenManagerExpiration:\n    \"\"\"Tests for token expiration checking\"\"\"\n\n    def test_is_token_expired_with_valid_token(self, db_session: Session) -> None:\n        \"\"\"Test that non-expired token is detected as valid\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        # Token expires in 2 hours (well beyond 60 second buffer)\n        token_data = {\"expires_at\": int(time.time()) + 7200}\n\n        assert manager.is_token_expired(token_data) is False\n\n    def test_is_token_expired_with_expired_token(self, db_session: Session) -> None:\n        \"\"\"Test that expired token is detected\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        # Token expired 1 hour ago\n        token_data = {\"expires_at\": int(time.time()) - 3600}\n\n        assert manager.is_token_expired(token_data) is True\n\n    def test_is_token_expired_with_buffer_zone(self, db_session: Session) -> None:\n        \"\"\"Test that token within 60 second buffer is considered expired\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        # Token expires in 30 seconds (within 60 second buffer)\n        token_data = {\"expires_at\": int(time.time()) + 30}\n\n        assert manager.is_token_expired(token_data) is True\n\n    def test_is_token_expired_no_expiration_data(self, db_session: Session) -> None:\n        \"\"\"Test that token without expiration is considered valid\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        # Token without expires_at\n        token_data = {\"access_token\": \"some_token\"}\n\n        assert manager.is_token_expired(token_data) is False\n\n\nclass TestOAuthTokenManagerCodeExchange:\n    \"\"\"Tests for authorization code exchange\"\"\"\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_exchange_code_for_token_success(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test successful code exchange\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Mock successful token exchange\n        mock_response = Mock(spec=Response)\n        mock_response.json.return_value = {\n            \"access_token\": \"new_access_token\",\n            \"refresh_token\": \"new_refresh_token\",\n            \"token_type\": \"Bearer\",\n            \"expires_in\": 3600,\n            \"scope\": \"repo user\",\n        }\n        mock_response.raise_for_status = Mock()\n        mock_post.return_value = mock_response\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n        token_data = manager.exchange_code_for_token(\n            code=\"auth_code_123\", redirect_uri=\"https://example.com/callback\"\n        )\n\n        assert token_data[\"access_token\"] == \"new_access_token\"\n        assert token_data[\"refresh_token\"] == \"new_refresh_token\"\n        assert \"expires_at\" in token_data\n\n        # Verify correct parameters were sent\n        mock_post.assert_called_once()\n        call_args = mock_post.call_args\n        assert call_args[0][0] == oauth_config.token_url\n        assert call_args[1][\"data\"][\"grant_type\"] == \"authorization_code\"\n        assert call_args[1][\"data\"][\"code\"] == \"auth_code_123\"\n        assert oauth_config.client_id is not None\n        assert oauth_config.client_secret is not None\n        assert call_args[1][\"data\"][\"client_id\"] == oauth_config.client_id.get_value(\n            apply_mask=False\n        )\n        assert call_args[1][\"data\"][\n            \"client_secret\"\n        ] == oauth_config.client_secret.get_value(apply_mask=False)\n        assert call_args[1][\"data\"][\"redirect_uri\"] == \"https://example.com/callback\"\n\n    @patch(\"onyx.auth.oauth_token_manager.requests.post\")\n    def test_exchange_code_for_token_http_error(\n        self, mock_post: Mock, db_session: Session\n    ) -> None:\n        \"\"\"Test that HTTP error during code exchange is raised\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Mock HTTP error\n        mock_response = Mock(spec=Response)\n        mock_response.raise_for_status.side_effect = HTTPError(\"Invalid code\")\n        mock_post.return_value = mock_response\n\n        manager = OAuthTokenManager(oauth_config, user.id, db_session)\n\n        with pytest.raises(HTTPError):\n            manager.exchange_code_for_token(\n                code=\"invalid_code\", redirect_uri=\"https://example.com/callback\"\n            )\n\n\nclass TestOAuthTokenManagerURLBuilding:\n    \"\"\"Tests for authorization URL building\"\"\"\n\n    def test_build_authorization_url_basic(self, db_session: Session) -> None:\n        \"\"\"Test building basic authorization URL\"\"\"\n        oauth_config = _create_test_oauth_config(db_session)\n\n        url = OAuthTokenManager.build_authorization_url(\n            oauth_config=oauth_config,\n            redirect_uri=\"https://example.com/callback\",\n            state=\"random_state_123\",\n        )\n\n        assert url.startswith(oauth_config.authorization_url)\n        assert \"client_id=test_client_id\" in url\n        assert \"redirect_uri=https%3A%2F%2Fexample.com%2Fcallback\" in url\n        assert \"response_type=code\" in url\n        assert \"state=random_state_123\" in url\n        # Check scopes are included\n        assert \"scope=repo+user\" in url\n\n    def test_build_authorization_url_with_additional_params(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test building URL with additional provider-specific parameters\"\"\"\n        oauth_config = create_oauth_config(\n            name=f\"Test OAuth {uuid4().hex[:8]}\",\n            authorization_url=\"https://accounts.google.com/o/oauth2/v2/auth\",\n            token_url=\"https://oauth2.googleapis.com/token\",\n            client_id=\"google_client_id\",\n            client_secret=\"google_client_secret\",\n            scopes=[\"email\", \"profile\"],\n            additional_params={\"access_type\": \"offline\", \"prompt\": \"consent\"},\n            db_session=db_session,\n        )\n\n        url = OAuthTokenManager.build_authorization_url(\n            oauth_config=oauth_config,\n            redirect_uri=\"https://example.com/callback\",\n            state=\"state_456\",\n        )\n\n        assert \"access_type=offline\" in url\n        assert \"prompt=consent\" in url\n        assert \"scope=email+profile\" in url\n\n    def test_build_authorization_url_no_scopes(self, db_session: Session) -> None:\n        \"\"\"Test building URL when no scopes are configured\"\"\"\n        oauth_config = create_oauth_config(\n            name=f\"Test OAuth {uuid4().hex[:8]}\",\n            authorization_url=\"https://oauth.example.com/authorize\",\n            token_url=\"https://oauth.example.com/token\",\n            client_id=\"simple_client_id\",\n            client_secret=\"simple_client_secret\",\n            scopes=None,  # No scopes\n            additional_params=None,\n            db_session=db_session,\n        )\n\n        url = OAuthTokenManager.build_authorization_url(\n            oauth_config=oauth_config,\n            redirect_uri=\"https://example.com/callback\",\n            state=\"state_789\",\n        )\n\n        # Should not include scope parameter\n        assert \"scope=\" not in url\n        assert \"client_id=simple_client_id\" in url\n\n    def test_build_authorization_url_with_existing_query_params(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"Test building URL when authorization_url already has query parameters\"\"\"\n        oauth_config = create_oauth_config(\n            name=f\"Test OAuth {uuid4().hex[:8]}\",\n            authorization_url=\"https://oauth.example.com/authorize?foo=bar\",\n            token_url=\"https://oauth.example.com/token\",\n            client_id=\"custom_client_id\",\n            client_secret=\"custom_client_secret\",\n            scopes=[\"read\"],\n            additional_params=None,\n            db_session=db_session,\n        )\n\n        url = OAuthTokenManager.build_authorization_url(\n            oauth_config=oauth_config,\n            redirect_uri=\"https://example.com/callback\",\n            state=\"state_xyz\",\n        )\n\n        # Should use & instead of ? since URL already has query params\n        assert \"foo=bar&\" in url or \"?foo=bar\" in url\n        assert \"client_id=custom_client_id\" in url\n\n\nclass TestUnwrapSensitiveStr:\n    \"\"\"Tests for _unwrap_sensitive_str static method\"\"\"\n\n    def test_unwrap_sensitive_str(self) -> None:\n        \"\"\"Test that both SensitiveValue and plain str inputs are handled\"\"\"\n        # SensitiveValue input\n        sensitive = SensitiveValue[str](\n            encrypted_bytes=b\"test_client_id\",\n            decrypt_fn=lambda b: b.decode(),\n        )\n        assert OAuthTokenManager._unwrap_sensitive_str(sensitive) == \"test_client_id\"\n\n        # Plain str input\n        assert OAuthTokenManager._unwrap_sensitive_str(\"plain_string\") == \"plain_string\"\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_oauth_tool_integration.py",
    "content": "\"\"\"\nTest suite for OAuth integration in tool_constructor.\n\nTests the priority logic for OAuth tokens when constructing custom tools:\n1. Priority 1: OAuth config (per-tool OAuth)\n2. Priority 2: Passthrough auth (user's login OAuth token)\n\nAll external HTTP calls are mocked, but Postgres and Redis are running.\n\"\"\"\n\nimport queue\nfrom typing import Any\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import OAuthConfig\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Tool\nfrom onyx.db.models import User\nfrom onyx.db.oauth_config import create_oauth_config\nfrom onyx.db.oauth_config import upsert_user_oauth_token\nfrom onyx.llm.factory import get_default_llm\nfrom onyx.tools.tool_constructor import construct_tools\nfrom onyx.tools.tool_constructor import SearchToolConfig\nfrom onyx.tools.tool_implementations.custom.custom_tool import CustomTool\nfrom tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider\nfrom tests.external_dependency_unit.conftest import create_test_user\n\n\n# Simple OpenAPI schema for testing\nSIMPLE_OPENAPI_SCHEMA: dict[str, Any] = {\n    \"openapi\": \"3.0.0\",\n    \"info\": {\"title\": \"Test API\", \"version\": \"1.0.0\"},\n    \"servers\": [{\"url\": \"https://api.example.com\"}],\n    \"paths\": {\n        \"/test\": {\n            \"get\": {\n                \"operationId\": \"test_operation\",\n                \"summary\": \"Test operation\",\n                \"description\": \"A test operation\",\n                \"responses\": {\"200\": {\"description\": \"Success\"}},\n            }\n        }\n    },\n}\n\n\ndef _create_test_persona(db_session: Session, user: User, tools: list[Tool]) -> Persona:\n    \"\"\"Helper to create a test persona with the given tools\"\"\"\n    # Create persona with prompts directly on it\n    persona = Persona(\n        name=f\"Test Persona {uuid4().hex[:8]}\",\n        description=\"Test persona\",\n        system_prompt=\"You are a helpful assistant\",\n        task_prompt=\"Answer the user's question\",\n        tools=tools,\n        document_sets=[],\n        users=[user],\n        groups=[],\n        is_listed=True,\n        is_public=True,\n        display_priority=None,\n        starter_messages=None,\n        deleted=False,\n    )\n    db_session.add(persona)\n    db_session.commit()\n    db_session.refresh(persona)\n    return persona\n\n\ndef _create_test_oauth_config(\n    db_session: Session, name: str | None = None\n) -> OAuthConfig:\n    \"\"\"Helper to create a test OAuth config\"\"\"\n    return create_oauth_config(\n        name=name or f\"Test OAuth Config {uuid4().hex[:8]}\",\n        authorization_url=\"https://github.com/login/oauth/authorize\",\n        token_url=\"https://github.com/login/oauth/access_token\",\n        client_id=\"test_client_id\",\n        client_secret=\"test_client_secret\",\n        scopes=[\"repo\", \"user\"],\n        additional_params=None,\n        db_session=db_session,\n    )\n\n\ndef _get_authorization_header(headers: dict[str, str]) -> str | None:\n    \"\"\"\n    Helper to extract authorization header from headers dict.\n    Checks both 'authorization' and 'Authorization' keys.\n\n    Returns:\n        The authorization header value, or None if not present.\n    \"\"\"\n    return headers.get(\"authorization\") or headers.get(\"Authorization\")\n\n\ndef _assert_has_authorization_header(headers: dict[str, str]) -> None:\n    \"\"\"Assert that headers contain an authorization header (any case).\"\"\"\n    assert (\n        \"authorization\" in headers or \"Authorization\" in headers\n    ), \"Expected authorization header to be present\"\n\n\ndef _assert_no_authorization_header(headers: dict[str, str]) -> None:\n    \"\"\"Assert that headers do NOT contain an authorization header.\"\"\"\n    assert (\n        \"authorization\" not in headers and \"Authorization\" not in headers\n    ), \"Expected no authorization header\"\n\n\nclass TestOAuthToolIntegrationPriority:\n    \"\"\"Tests for OAuth token priority logic in tool_constructor\"\"\"\n\n    @pytest.fixture(autouse=True)\n    def setup_llm_provider(self, db_session: Session) -> None:\n        \"\"\"Ensure default LLM provider is set up for each test.\"\"\"\n        ensure_default_llm_provider(db_session)\n\n    def test_oauth_config_priority_over_passthrough(self, db_session: Session) -> None:\n        \"\"\"\n        Test that oauth_config_id takes priority over passthrough_auth.\n        When both are set, the tool should use the OAuth config token.\n        \"\"\"\n        # Create user with login OAuth token\n        user = create_test_user(db_session, \"oauth_user\")\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"github\",\n            account_id=\"github_user_123\",\n            account_email=user.email,\n            access_token=\"user_login_token_12345\",\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        # Refresh user to load oauth_accounts relationship\n        db_session.refresh(user)\n\n        # Create OAuth config with a valid token\n        oauth_config = _create_test_oauth_config(db_session)\n        token_data = {\n            \"access_token\": \"oauth_config_token_67890\",\n            \"token_type\": \"Bearer\",\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Create tool with BOTH oauth_config_id and passthrough_auth set\n        tool = Tool(\n            name=\"test_tool\",\n            description=\"Test tool\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=oauth_config.id,  # Priority 1\n            passthrough_auth=True,  # Priority 2 - should be ignored\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona and chat session\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        search_tool_config = SearchToolConfig()\n\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n            search_tool_config=search_tool_config,\n        )\n\n        # Verify tool was constructed\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify the OAuth config token is used (Priority 1), NOT passthrough token\n        _assert_has_authorization_header(custom_tool.headers)\n        auth_header = _get_authorization_header(custom_tool.headers)\n        assert auth_header == \"Bearer oauth_config_token_67890\"\n\n    def test_passthrough_auth_when_no_oauth_config(self, db_session: Session) -> None:\n        \"\"\"\n        Test that passthrough_auth works when oauth_config_id is not set.\n        \"\"\"\n        # Create user with login OAuth token\n        user = create_test_user(db_session, \"oauth_user\")\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"google\",\n            account_id=\"google_user_456\",\n            account_email=user.email,\n            access_token=\"user_passthrough_token_99999\",\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n        # Refresh user to load oauth_accounts relationship\n        db_session.refresh(user)\n\n        # Create tool with only passthrough_auth set (no oauth_config_id)\n        tool = Tool(\n            name=\"test_tool_passthrough\",\n            description=\"Test tool with passthrough\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=None,  # No OAuth config\n            passthrough_auth=True,  # Should use user's login token\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n        )\n\n        # Verify tool was constructed\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify the passthrough token is used\n        _assert_has_authorization_header(custom_tool.headers)\n        auth_header = _get_authorization_header(custom_tool.headers)\n        assert auth_header == \"Bearer user_passthrough_token_99999\"\n\n    def test_oauth_config_without_valid_token_logs_warning(\n        self, db_session: Session, caplog: pytest.LogCaptureFixture\n    ) -> None:\n        \"\"\"\n        Test that when oauth_config_id is set but no valid token exists,\n        a warning is logged and the tool has no auth header.\n        \"\"\"\n        # Create user (no OAuth account)\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create OAuth config but DO NOT create a token for the user\n        oauth_config = _create_test_oauth_config(db_session)\n\n        # Create tool with oauth_config_id but user has no token\n        tool = Tool(\n            name=\"test_tool_no_token\",\n            description=\"Test tool without token\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=oauth_config.id,\n            passthrough_auth=False,\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        with caplog.at_level(\"WARNING\"):\n            tool_dict = construct_tools(\n                persona=persona,\n                db_session=db_session,\n                emitter=Emitter(merged_queue=queue.Queue()),\n                user=user,\n                llm=llm,\n            )\n\n        # Verify warning was logged\n        assert any(\n            \"No valid OAuth token found for tool\" in record.message\n            for record in caplog.records\n        )\n        assert any(str(oauth_config.id) in record.message for record in caplog.records)\n\n        # Verify tool was constructed but has no authorization header\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify NO authorization header is present\n        _assert_no_authorization_header(custom_tool.headers)\n\n    def test_no_auth_when_both_disabled(self, db_session: Session) -> None:\n        \"\"\"\n        Test that when neither oauth_config_id nor passthrough_auth is set,\n        the tool has no authorization header.\n        \"\"\"\n        # Create user with OAuth account (but tool won't use it)\n        user = create_test_user(db_session, \"oauth_user\")\n        oauth_account = OAuthAccount(\n            user_id=user.id,\n            oauth_name=\"github\",\n            account_id=\"github_user_789\",\n            account_email=user.email,\n            access_token=\"unused_token\",\n            refresh_token=\"\",\n        )\n        db_session.add(oauth_account)\n        db_session.commit()\n\n        # Create tool with neither oauth_config_id nor passthrough_auth\n        tool = Tool(\n            name=\"test_tool_no_auth\",\n            description=\"Test tool without auth\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=None,\n            passthrough_auth=False,\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n        )\n\n        # Verify tool was constructed\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify NO authorization header\n        _assert_no_authorization_header(custom_tool.headers)\n\n    def test_oauth_config_with_expired_token_refreshes(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"\n        Test that expired OAuth config tokens are automatically refreshed.\n        \"\"\"\n        import time\n\n        # Create user\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create OAuth config with expired token\n        oauth_config = _create_test_oauth_config(db_session)\n        expired_token_data = {\n            \"access_token\": \"expired_token\",\n            \"refresh_token\": \"refresh_token_12345\",\n            \"expires_at\": int(time.time()) - 100,  # Expired 100 seconds ago\n        }\n        upsert_user_oauth_token(\n            oauth_config.id, user.id, expired_token_data, db_session\n        )\n\n        # Create tool with oauth_config_id\n        tool = Tool(\n            name=\"test_tool_refresh\",\n            description=\"Test tool with token refresh\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=oauth_config.id,\n            passthrough_auth=False,\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Mock the token refresh response\n        mock_response = Mock()\n        mock_response.json.return_value = {\n            \"access_token\": \"refreshed_token_67890\",\n            \"refresh_token\": \"refresh_token_12345\",\n            \"expires_in\": 3600,\n            \"token_type\": \"Bearer\",\n        }\n        mock_response.raise_for_status = Mock()\n\n        with patch(\"onyx.auth.oauth_token_manager.requests.post\") as mock_post:\n            mock_post.return_value = mock_response\n\n            # Construct tools\n            tool_dict = construct_tools(\n                persona=persona,\n                db_session=db_session,\n                emitter=Emitter(merged_queue=queue.Queue()),\n                user=user,\n                llm=llm,\n            )\n\n            # Verify token refresh was called\n            mock_post.assert_called_once()\n            call_args = mock_post.call_args\n            assert call_args[0][0] == oauth_config.token_url\n            assert call_args[1][\"data\"][\"grant_type\"] == \"refresh_token\"\n            assert call_args[1][\"data\"][\"refresh_token\"] == \"refresh_token_12345\"\n\n        # Verify tool was constructed with refreshed token\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify the refreshed token is used\n        _assert_has_authorization_header(custom_tool.headers)\n        auth_header = _get_authorization_header(custom_tool.headers)\n        assert auth_header == \"Bearer refreshed_token_67890\"\n\n    def test_custom_headers_combined_with_oauth_token(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"\n        Test that custom headers are properly combined with OAuth token.\n        The OAuth Authorization header should be added to existing custom headers.\n        \"\"\"\n        # Create user\n        user = create_test_user(db_session, \"oauth_user\")\n\n        # Create OAuth config with token\n        oauth_config = _create_test_oauth_config(db_session)\n        token_data = {\n            \"access_token\": \"oauth_token_abc123\",\n            \"token_type\": \"Bearer\",\n        }\n        upsert_user_oauth_token(oauth_config.id, user.id, token_data, db_session)\n\n        # Create tool with oauth_config_id AND custom headers\n        tool = Tool(\n            name=\"test_tool_combined\",\n            description=\"Test tool with custom headers and OAuth\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=oauth_config.id,\n            custom_headers=[\n                {\"key\": \"X-Custom-Header\", \"value\": \"custom-value\"},\n                {\"key\": \"X-API-Key\", \"value\": \"api-key-123\"},\n            ],\n            passthrough_auth=False,\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n        )\n\n        # Verify tool was constructed\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify both OAuth token AND custom headers are present\n        _assert_has_authorization_header(custom_tool.headers)\n        auth_header = _get_authorization_header(custom_tool.headers)\n        assert auth_header == \"Bearer oauth_token_abc123\"\n\n        # Headers are capitalized by the tool\n        assert \"X-Custom-Header\" in custom_tool.headers\n        assert custom_tool.headers[\"X-Custom-Header\"] == \"custom-value\"\n        assert \"X-API-Key\" in custom_tool.headers\n        assert custom_tool.headers[\"X-API-Key\"] == \"api-key-123\"\n\n    def test_passthrough_auth_without_user_oauth_account(\n        self, db_session: Session\n    ) -> None:\n        \"\"\"\n        Test that passthrough_auth handles gracefully when user has no OAuth account.\n        \"\"\"\n        # Create user WITHOUT OAuth account\n        user = create_test_user(db_session, \"no_oauth_user\")\n\n        # Create tool with passthrough_auth\n        tool = Tool(\n            name=\"test_tool_no_account\",\n            description=\"Test tool passthrough without account\",\n            openapi_schema=SIMPLE_OPENAPI_SCHEMA,\n            oauth_config_id=None,\n            passthrough_auth=True,\n            user_id=user.id,\n        )\n        db_session.add(tool)\n        db_session.commit()\n        db_session.refresh(tool)\n\n        # Create persona\n        persona = _create_test_persona(db_session, user, [tool])\n        llm = get_default_llm()\n\n        # Construct tools\n        tool_dict = construct_tools(\n            persona=persona,\n            db_session=db_session,\n            emitter=Emitter(merged_queue=queue.Queue()),\n            user=user,\n            llm=llm,\n        )\n\n        # Verify tool was constructed\n        assert tool.id in tool_dict\n        custom_tools = tool_dict[tool.id]\n        assert len(custom_tools) == 1\n        custom_tool = custom_tools[0]\n        assert isinstance(custom_tool, CustomTool)\n\n        # Verify NO authorization header (user has no OAuth account)\n        _assert_no_authorization_header(custom_tool.headers)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_python_tool.py",
    "content": "# \"\"\"\n# External dependency unit tests for Python tool.\n\n# These tests run against a real Code Interpreter service (no mocking of the service).\n# They verify code execution, error handling, timeout behavior, and file generation.\n\n# Requirements:\n# - CODE_INTERPRETER_BASE_URL must be configured and point to a running service\n# - Tests use minimal mocking - only mock run_context infrastructure and db lookups\n# - File store operations execute for real (files are saved and read back)\n# \"\"\"\n\n# import asyncio\n# import io\n# import json\n# from unittest.mock import Mock\n# from unittest.mock import patch\n\n# import pytest\n# from agents import RunContextWrapper\n# from openpyxl import load_workbook\n# from pydantic import TypeAdapter\n# from sqlalchemy.orm import Session\n\n# from onyx.chat.turn.models import ChatTurnContext\n# from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL\n# from onyx.file_store.models import ChatFileType\n# from onyx.file_store.models import InMemoryChatFile\n# from onyx.file_store.utils import get_default_file_store\n# from onyx.server.query_and_chat.streaming_models import Packet\n# from onyx.server.query_and_chat.streaming_models import PythonToolDelta\n# from onyx.server.query_and_chat.streaming_models import PythonToolStart\n# from onyx.tools.tool_implementations.python.python_tool import PythonTool\n# from onyx.tools.tool_implementations_v2.code_interpreter_client import (\n#     CodeInterpreterClient,\n# )\n# from onyx.tools.tool_implementations_v2.python import _python_execution_core\n# from onyx.tools.tool_implementations_v2.python import python\n# from onyx.tools.tool_implementations_v2.tool_result_models import (\n#     LlmPythonExecutionResult,\n# )\n\n\n# # Apply initialize_file_store fixture to all tests in this module\n# pytestmark = pytest.mark.usefixtures(\"initialize_file_store\")\n\n\n# @pytest.fixture\n# def mock_run_context() -> RunContextWrapper[ChatTurnContext]:\n#     \"\"\"Create a mock run context for testing.\"\"\"\n#     # Create mock emitter\n#     mock_emitter = Mock()\n#     mock_emitter.emit = Mock()\n\n#     # Create mock run dependencies\n#     mock_dependencies = Mock()\n#     mock_dependencies.emitter = mock_emitter\n#     mock_dependencies.db_session = Mock()\n\n#     # Create mock context\n#     mock_context = Mock(spec=ChatTurnContext)\n#     mock_context.current_run_step = 0\n#     mock_context.run_dependencies = mock_dependencies\n#     mock_context.iteration_instructions = []\n#     mock_context.global_iteration_responses = []\n#     mock_context.chat_files = []\n\n#     # Create run context wrapper\n#     run_context = Mock(spec=RunContextWrapper)\n#     run_context.context = mock_context\n\n#     return run_context\n\n\n# @pytest.fixture\n# def code_interpreter_client() -> CodeInterpreterClient:\n#     \"\"\"Create a real Code Interpreter client for testing.\"\"\"\n#     if not CODE_INTERPRETER_BASE_URL:\n#         pytest.skip(\"CODE_INTERPRETER_BASE_URL not configured\")\n#     return CodeInterpreterClient()\n\n\n# def test_python_execution_basic(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test basic Python execution with simple code.\"\"\"\n#     code = 'print(\"Hello, World!\")'\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert \"Hello, World!\" in result.stdout\n#     assert result.stderr == \"\"\n#     assert result.exit_code == 0\n#     assert not result.timed_out\n#     assert len(result.generated_files) == 0\n\n#     # Verify context was updated\n#     # Note: @tool_accounting increments current_run_step from 0 to 1 before execution\n#     assert len(mock_run_context.context.iteration_instructions) == 1\n#     instruction = mock_run_context.context.iteration_instructions[0]\n#     assert instruction.iteration_nr == 1\n#     assert instruction.plan and \"Python\" in instruction.plan\n\n#     assert len(mock_run_context.context.global_iteration_responses) == 1\n#     answer = mock_run_context.context.global_iteration_responses[0]\n#     assert answer.tool == \"PythonTool\"\n#     assert \"Hello, World!\" in answer.answer\n\n#     # Verify streaming packets were emitted\n#     mock_emitter = mock_run_context.context.run_dependencies.emitter\n#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore\n#     assert len(emitter_calls) >= 2  # At least start and delta\n\n#     # Check for PythonToolStart packet\n#     start_packets = [\n#         call[0][0]\n#         for call in emitter_calls\n#         if isinstance(call[0][0].obj, PythonToolStart)\n#     ]\n#     assert len(start_packets) == 1\n\n#     # Check for PythonToolDelta packet\n#     delta_packets = [\n#         call[0][0]\n#         for call in emitter_calls\n#         if isinstance(call[0][0].obj, PythonToolDelta)\n#     ]\n#     assert len(delta_packets) >= 1\n#     assert \"Hello, World!\" in delta_packets[0].obj.stdout\n\n\n# def test_python_execution_with_syntax_error(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test Python execution with syntax error.\"\"\"\n#     code = \"print('missing closing quote\"\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify error result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.stdout == \"\"\n#     assert len(result.stderr) > 0\n#     assert \"SyntaxError\" in result.stderr or \"unterminated\" in result.stderr.lower()\n#     assert result.exit_code != 0\n#     assert not result.timed_out\n#     assert result.error is not None or len(result.stderr) > 0\n#     assert len(result.generated_files) == 0\n\n\n# def test_python_execution_with_runtime_error(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test Python execution with runtime error.\"\"\"\n#     code = \"\"\"\n# x = 10\n# y = 0\n# result = x / y  # Division by zero\n# print(result)\n# \"\"\"\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify error result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code != 0\n#     assert \"ZeroDivisionError\" in result.stderr or \"division\" in result.stderr.lower()\n#     assert result.error is not None or len(result.stderr) > 0\n\n\n# def test_python_execution_timeout(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n# ) -> None:\n#     \"\"\"Test execution timeout handling.\"\"\"\n#     # Code that will run longer than the timeout\n#     code = \"\"\"\n# import time\n# time.sleep(10)\n# print(\"Should not reach here\")\n# \"\"\"\n\n#     # Create client with short timeout (override via execute method)\n#     if not CODE_INTERPRETER_BASE_URL:\n#         pytest.skip(\"CODE_INTERPRETER_BASE_URL not configured\")\n\n#     client = CodeInterpreterClient()\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Mock the config to use a short timeout\n#         with patch(\n#             \"onyx.tools.tool_implementations_v2.python.CODE_INTERPRETER_DEFAULT_TIMEOUT_MS\",\n#             1000,\n#         ):\n#             # Execute code\n#             result = _python_execution_core(mock_run_context, code, client)\n\n#     # Verify timeout result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.timed_out\n\n\n# def test_python_execution_file_generation(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n#     db_session: Session,  # Needed to initialize DB engine for file_store\n# ) -> None:\n#     \"\"\"Test file generation and retrieval.\"\"\"\n#     code = \"\"\"\n# import csv\n\n# # Create a CSV file\n# with open('test_output.csv', 'w', newline='') as f:\n#     writer = csv.writer(f)\n#     writer.writerow(['Name', 'Age', 'City'])\n#     writer.writerow(['Alice', '30', 'New York'])\n#     writer.writerow(['Bob', '25', 'San Francisco'])\n\n# print(\"CSV file created successfully\")\n# \"\"\"\n\n#     # Mock only get_tool_by_name (database lookup)\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code - file store operations happen for real\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == 0\n#     assert \"CSV file created successfully\" in result.stdout\n#     assert len(result.generated_files) == 1\n\n#     # Verify file metadata\n#     generated_file = result.generated_files[0]\n#     assert generated_file.filename == \"test_output.csv\"\n#     assert generated_file.file_link  # File link exists\n#     assert generated_file.file_link.startswith(\"http://localhost:3000/api/chat/file/\")\n\n#     # Extract file_id from file_link\n#     file_id = generated_file.file_link.split(\"/\")[-1]\n\n#     # Verify we can read the file back from the file store\n#     file_store = get_default_file_store()\n#     file_io = file_store.read_file(file_id)\n#     file_content = file_io.read()\n\n#     # Verify file content\n#     assert b\"Name,Age,City\" in file_content\n#     assert b\"Alice,30,New York\" in file_content\n#     assert b\"Bob,25,San Francisco\" in file_content\n\n#     # Verify iteration answer includes file_ids\n#     assert len(mock_run_context.context.global_iteration_responses) == 1\n#     answer = mock_run_context.context.global_iteration_responses[0]\n#     assert answer.file_ids == [file_id]\n\n\n# def test_python_execution_with_matplotlib(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n#     db_session: Session,  # Needed to initialize DB engine for file_store\n# ) -> None:\n#     \"\"\"Test matplotlib plot generation.\"\"\"\n#     code = \"\"\"\n# import matplotlib\n# matplotlib.use('Agg')  # Use non-interactive backend\n# import matplotlib.pyplot as plt\n# import numpy as np\n\n# # Generate data\n# x = np.linspace(0, 10, 100)\n# y = np.sin(x)\n\n# # Create plot\n# plt.figure(figsize=(10, 6))\n# plt.plot(x, y)\n# plt.title('Sine Wave')\n# plt.xlabel('x')\n# plt.ylabel('sin(x)')\n# plt.grid(True)\n\n# # Save plot\n# plt.savefig('sine_wave.png')\n# print(\"Plot saved successfully\")\n# \"\"\"\n\n#     # Mock only get_tool_by_name (database lookup)\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code - file store operations happen for real\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == 0\n#     assert \"Plot saved successfully\" in result.stdout\n#     assert len(result.generated_files) == 1\n\n#     # Verify file metadata\n#     generated_file = result.generated_files[0]\n#     assert generated_file.filename == \"sine_wave.png\"\n#     assert \".png\" in generated_file.filename\n\n#     # Extract file_id from file_link\n#     file_id = generated_file.file_link.split(\"/\")[-1]\n\n#     # Verify we can read the file back from the file store\n#     file_store = get_default_file_store()\n#     file_io = file_store.read_file(file_id)\n#     file_content = file_io.read()\n\n#     # Verify the file is a valid PNG (check PNG magic bytes)\n#     # PNG magic bytes: 89 50 4E 47 0D 0A 1A 0A\n#     assert file_content[:8] == b\"\\x89PNG\\r\\n\\x1a\\n\"\n#     assert len(file_content) > 1000  # PNG should be substantial\n\n\n# def test_python_execution_context_updates(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test that run_context is properly updated.\"\"\"\n#     code = 'print(\"Context update test\")'\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 42\n#         mock_get_tool.return_value = mock_tool\n\n#         # Set specific run step - will be incremented to 6 by @tool_accounting\n#         mock_run_context.context.current_run_step = 5\n\n#         # Execute code\n#         _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify iteration_instructions was updated\n#     # Note: @tool_accounting increments from 5 to 6\n#     assert len(mock_run_context.context.iteration_instructions) == 1\n#     instruction = mock_run_context.context.iteration_instructions[0]\n#     assert instruction.iteration_nr == 6\n#     assert instruction.plan == \"Executing Python code\"\n#     assert instruction.purpose == \"Running Python code\"\n#     assert \"secure environment\" in instruction.reasoning\n\n#     # Verify global_iteration_responses was updated\n#     assert len(mock_run_context.context.global_iteration_responses) == 1\n#     answer = mock_run_context.context.global_iteration_responses[0]\n#     assert answer.tool == \"PythonTool\"\n#     assert answer.tool_id == 42\n#     assert answer.iteration_nr == 6\n#     assert answer.parallelization_nr == 0\n#     assert answer.question == \"Execute Python code\"\n#     assert answer.reasoning and \"secure environment\" in answer.reasoning\n#     assert \"Context update test\" in answer.answer\n#     assert answer.cited_documents == {}\n\n#     # Verify packets were emitted with correct index\n#     mock_emitter = mock_run_context.context.run_dependencies.emitter\n#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore\n#     for call in emitter_calls:\n#         packet = call[0][0]\n#         assert isinstance(packet, Packet)\n#         assert packet.ind == 6\n\n\n# def test_python_tool_availability_with_url_set(db_session: Session) -> None:\n#     \"\"\"Test PythonTool.is_available() returns True when URL is configured.\"\"\"\n#     with patch(\n#         \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n#         \"http://localhost:8000\",\n#     ):\n#         assert PythonTool.is_available(db_session) is True\n\n\n# def test_python_tool_availability_without_url(db_session: Session) -> None:\n#     \"\"\"Test PythonTool.is_available() returns False when URL is not configured.\"\"\"\n#     with patch(\n#         \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n#         None,\n#     ):\n#         assert PythonTool.is_available(db_session) is False\n\n#     with patch(\n#         \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n#         \"\",\n#     ):\n#         assert PythonTool.is_available(db_session) is False\n\n\n# def test_python_function_tool_wrapper(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test the @function_tool decorated python() wrapper function.\"\"\"\n#     code = 'print(\"Testing function tool wrapper\")'\n\n#     # Mock get_tool_by_name and patch CodeInterpreterClient to use our fixture\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         with patch(\n#             \"onyx.tools.tool_implementations_v2.python.CodeInterpreterClient\"\n#         ) as mock_client_class:\n#             mock_tool = Mock()\n#             mock_tool.id = 1\n#             mock_get_tool.return_value = mock_tool\n#             mock_client_class.return_value = code_interpreter_client\n\n#             # Call the function tool wrapper\n#             result_coro = python.on_invoke_tool(mock_run_context, json.dumps({\"code\": code}))  # type: ignore\n#             result_json: str = asyncio.run(result_coro)  # type: ignore\n\n#     # Verify result is JSON string\n#     assert isinstance(result_json, str)\n\n#     # Parse and verify result\n#     adapter = TypeAdapter(LlmPythonExecutionResult)\n#     result = adapter.validate_json(result_json)\n\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert \"Testing function tool wrapper\" in result.stdout\n#     assert result.exit_code == 0\n\n\n# def test_python_execution_output_truncation(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n# ) -> None:\n#     \"\"\"Test that large outputs are properly truncated.\"\"\"\n#     # Generate code that produces output larger than truncation limit\n#     code = \"\"\"\n# for i in range(10000):\n#     print(f\"Line {i}: \" + \"x\" * 100)\n# \"\"\"\n\n#     # Mock get_tool_by_name\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         # Set a small truncation limit for testing\n#         with patch(\n#             \"onyx.tools.tool_implementations_v2.python.CODE_INTERPRETER_MAX_OUTPUT_LENGTH\",\n#             5000,\n#         ):\n#             mock_tool = Mock()\n#             mock_tool.id = 1\n#             mock_get_tool.return_value = mock_tool\n\n#             # Execute code\n#             result = _python_execution_core(\n#                 mock_run_context, code, code_interpreter_client\n#             )\n\n#     # Verify output was truncated\n#     assert len(result.stdout) <= 5000 + 200  # Allow for truncation message\n#     assert \"output truncated\" in result.stdout\n#     assert \"characters omitted\" in result.stdout\n\n\n# def test_python_execution_multiple_files(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n#     db_session: Session,  # Needed to initialize DB engine for file_store\n# ) -> None:\n#     \"\"\"Test generation of multiple files.\"\"\"\n#     code = \"\"\"\n# # Create multiple files\n# with open('file1.txt', 'w') as f:\n#     f.write('Content of file 1')\n\n# with open('file2.txt', 'w') as f:\n#     f.write('Content of file 2')\n\n# with open('file3.txt', 'w') as f:\n#     f.write('Content of file 3')\n\n# print(\"Created 3 files\")\n# \"\"\"\n\n#     # Mock only get_tool_by_name (database lookup)\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code - file store operations happen for real\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == 0\n#     assert \"Created 3 files\" in result.stdout\n#     assert len(result.generated_files) == 3\n\n#     # Verify all files have unique IDs and proper metadata\n#     file_ids_result = [f.file_link.split(\"/\")[-1] for f in result.generated_files]\n#     assert len(set(file_ids_result)) == 3  # All unique\n\n#     # Verify filenames\n#     filenames = [f.filename for f in result.generated_files]\n#     assert \"file1.txt\" in filenames\n#     assert \"file2.txt\" in filenames\n#     assert \"file3.txt\" in filenames\n\n#     # Verify we can read all files back from the file store\n#     file_store = get_default_file_store()\n\n#     # Create a mapping of filename to generated file for easier verification\n#     files_by_name = {f.filename: f for f in result.generated_files}\n\n#     # Verify each expected file\n#     for i in range(1, 4):\n#         filename = f\"file{i}.txt\"\n#         assert filename in files_by_name, f\"Expected file {filename} not found\"\n\n#         generated_file = files_by_name[filename]\n#         file_id = generated_file.file_link.split(\"/\")[-1]\n#         file_io = file_store.read_file(file_id)\n#         file_content = file_io.read()\n#         expected_content = f\"Content of file {i}\".encode()\n#         assert (\n#             expected_content in file_content\n#         ), f\"Expected content not found in {filename}\"\n\n\n# def test_python_execution_client_error_handling(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n# ) -> None:\n#     \"\"\"Test error handling when Code Interpreter service fails.\"\"\"\n#     code = 'print(\"Test\")'\n\n#     # Create a client that will fail\n#     if not CODE_INTERPRETER_BASE_URL:\n#         pytest.skip(\"CODE_INTERPRETER_BASE_URL not configured\")\n\n#     client = CodeInterpreterClient()\n\n#     # Mock the execute method to raise an exception\n#     with patch.object(client, \"execute\", side_effect=Exception(\"Service unavailable\")):\n#         # Execute code\n#         result = _python_execution_core(mock_run_context, code, client)\n\n#     # Verify error result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == -1\n#     error_msg = result.error or \"\"\n#     assert \"Service unavailable\" in result.stderr or \"Service unavailable\" in error_msg\n#     assert not result.timed_out\n#     assert len(result.generated_files) == 0\n\n#     # Verify error delta was emitted\n#     mock_emitter = mock_run_context.context.run_dependencies.emitter\n#     emitter_calls = mock_emitter.emit.call_args_list  # type: ignore\n#     delta_packets = [\n#         call[0][0]\n#         for call in emitter_calls\n#         if isinstance(call[0][0].obj, PythonToolDelta)\n#     ]\n#     assert len(delta_packets) >= 1\n#     assert \"Service unavailable\" in delta_packets[-1].obj.stderr\n\n\n# def test_python_execution_with_excel_file(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n#     db_session: Session,  # Needed to initialize DB engine for file_store\n# ) -> None:\n#     \"\"\"Test Excel file generation with financial data.\"\"\"\n#     code = \"\"\"\n# import pandas as pd\n\n# # Create financial sample data\n# data = {\n#     'Segment': ['Government', 'Government', 'Midmarket', 'Midmarket', 'Enterprise'],\n#     'Country': ['Canada', 'Germany', 'France', 'Germany', 'Canada'],\n#     'Product': ['Carretera', 'Carretera', 'Carretera', 'Carretera', 'Amarilla'],\n#     'Units Sold': [1618.5, 1321, 2178, 888, 2470],\n#     'Manufacturing Price': [3, 3, 3, 3, 260],\n#     'Sale Price': [20, 20, 20, 20, 300],\n#     'Gross Sales': [32370, 26420, 43560, 17760, 741000],\n#     'Discounts': [0, 0, 0, 0, 0],\n#     'Sales': [32370, 26420, 43560, 17760, 741000],\n#     'COGS': [16850, 13940, 22800, 9390, 642000],\n#     'Profit': [15520, 12480, 20760, 8370, 99000],\n#     'Month': ['January', 'January', 'June', 'April', 'September']\n# }\n\n# # Create DataFrame\n# df = pd.DataFrame(data)\n\n# # Write to Excel\n# df.to_excel('financial_report.xlsx', index=False, sheet_name='Financial Data')\n\n# print(f\"Excel file created with {len(df)} rows\")\n# \"\"\"\n\n#     # Mock only get_tool_by_name (database lookup)\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code - file store operations happen for real\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == 0\n#     assert \"Excel file created with 5 rows\" in result.stdout\n#     assert len(result.generated_files) == 1\n\n#     # Verify file metadata\n#     generated_file = result.generated_files[0]\n#     assert generated_file.filename == \"financial_report.xlsx\"\n#     assert \".xlsx\" in generated_file.filename\n\n#     # Extract file_id from file_link\n#     file_id = generated_file.file_link.split(\"/\")[-1]\n\n#     # Verify we can read the file back from the file store\n#     file_store = get_default_file_store()\n#     file_io = file_store.read_file(file_id)\n#     file_content = file_io.read()\n\n#     # Verify the file is a valid Excel file (check ZIP magic bytes - xlsx is a ZIP archive)\n#     # ZIP magic bytes: 50 4B 03 04\n#     assert file_content[:4] == b\"PK\\x03\\x04\"\n#     assert len(file_content) > 1000  # Excel file should be substantial\n\n#     # Verify we can parse the Excel file with openpyxl directly\n#     file_io = io.BytesIO(file_content)\n#     workbook = load_workbook(file_io)\n#     sheet = workbook[\"Financial Data\"]\n\n#     # Verify data structure - get headers from first row\n#     first_row = list(sheet.iter_rows(min_row=1, max_row=1, values_only=True))[0]\n#     headers = list(first_row) if first_row else []\n#     expected_columns = [\n#         \"Segment\",\n#         \"Country\",\n#         \"Product\",\n#         \"Units Sold\",\n#         \"Manufacturing Price\",\n#         \"Sale Price\",\n#         \"Gross Sales\",\n#         \"Discounts\",\n#         \"Sales\",\n#         \"COGS\",\n#         \"Profit\",\n#         \"Month\",\n#     ]\n#     assert headers == expected_columns\n\n#     # Verify row count (excluding header)\n#     assert sheet.max_row == 6  # 1 header + 5 data rows\n\n#     # Read data rows\n#     rows = []\n#     for row in sheet.iter_rows(min_row=2, values_only=True):\n#         rows.append(row)\n\n#     assert len(rows) == 5\n\n#     # Verify some sample data\n#     segments = [row[0] for row in rows]\n#     countries = [row[1] for row in rows]\n#     units_sold = [float(row[3]) if row[3] is not None else 0.0 for row in rows]  # type: ignore\n#     profits = [float(row[10]) if row[10] is not None else 0.0 for row in rows]  # type: ignore\n\n#     assert \"Government\" in segments\n#     assert \"Canada\" in countries\n#     assert sum(units_sold) > 8000  # Total units sold\n#     assert sum(profits) > 155000  # Total profit\n\n\n# def test_python_execution_with_excel_file_input(\n#     mock_run_context: RunContextWrapper[ChatTurnContext],\n#     code_interpreter_client: CodeInterpreterClient,\n#     db_session: Session,  # Needed to initialize DB engine for file_store\n# ) -> None:\n#     \"\"\"Test processing an uploaded Excel file - reading and analyzing it.\"\"\"\n#     # Load the sample Excel file\n#     import os\n\n#     test_file_path = os.path.join(\n#         os.path.dirname(__file__), \"data\", \"financial-sample.xlsx\"\n#     )\n\n#     with open(test_file_path, \"rb\") as f:\n#         file_content = f.read()\n\n#     # Create InMemoryChatFile with the Excel file\n#     chat_file = InMemoryChatFile(\n#         file_id=\"test-financial-sample\",\n#         content=file_content,\n#         file_type=ChatFileType.DOC,\n#         filename=\"financial-sample.xlsx\",\n#     )\n\n#     # Add the file to the mock context's chat_files\n#     mock_run_context.context.chat_files = [chat_file]\n\n#     # Code to analyze the uploaded Excel file\n#     code = \"\"\"\n# import pandas as pd\n# import matplotlib\n# matplotlib.use('Agg')\n# import matplotlib.pyplot as plt\n# from openpyxl import load_workbook\n\n# # Read the uploaded Excel file using openpyxl directly\n# workbook = load_workbook('financial-sample.xlsx')\n# sheet = workbook.active\n\n# # Convert to pandas DataFrame\n# data = []\n# headers = [cell.value for cell in sheet[1]]\n# for row in sheet.iter_rows(min_row=2, values_only=True):\n#     data.append(row)\n\n# df = pd.DataFrame(data, columns=headers)\n\n# print(f\"Loaded Excel file with {len(df)} rows and {len(df.columns)} columns\")\n# print(f\"\\\\nColumns: {', '.join(df.columns.tolist())}\")\n\n# # Perform analysis\n# print(f\"\\\\n=== Analysis ===\")\n\n# # Group by segment and calculate total sales and profit\n# segment_summary = df.groupby('Segment').agg({\n#     ' Sales': 'sum',\n#     'Profit': 'sum',\n#     'Units Sold': 'sum'\n# }).round(2)\n\n# print(f\"\\\\nSales by Segment:\")\n# print(segment_summary)\n\n# # Find top 5 products by profit\n# top_products = df.groupby('Product')['Profit'].sum().sort_values(ascending=False).head(5)\n# print(f\"\\\\nTop 5 Products by Profit:\")\n# print(top_products)\n\n# # Calculate profit margin\n# total_sales = df[' Sales'].sum()\n# total_profit = df['Profit'].sum()\n# profit_margin = (total_profit / total_sales * 100) if total_sales > 0 else 0\n# print(f\"\\\\nOverall Profit Margin: {profit_margin:.2f}%\")\n\n# # Create a visualization\n# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\n\n# # Sales by Segment\n# segment_summary[' Sales'].plot(kind='bar', ax=ax1, color='steelblue')\n# ax1.set_title('Total Sales by Segment')\n# ax1.set_xlabel('Segment')\n# ax1.set_ylabel('Sales ($)')\n# ax1.tick_params(axis='x', rotation=45)\n\n# # Top 5 Products by Profit\n# top_products.plot(kind='barh', ax=ax2, color='seagreen')\n# ax2.set_title('Top 5 Products by Profit')\n# ax2.set_xlabel('Profit ($)')\n# ax2.set_ylabel('Product')\n\n# plt.tight_layout()\n# plt.savefig('financial_analysis.png', dpi=100, bbox_inches='tight')\n# print(f\"\\\\nVisualization saved as financial_analysis.png\")\n\n# # Create summary report Excel file\n# summary_data = {\n#     'Metric': ['Total Sales', 'Total Profit', 'Profit Margin %', 'Total Units Sold', 'Number of Records'],\n#     'Value': [\n#         f\"${total_sales:,.2f}\",\n#         f\"${total_profit:,.2f}\",\n#         f\"{profit_margin:.2f}%\",\n#         f\"{df['Units Sold'].sum():,.0f}\",\n#         len(df)\n#     ]\n# }\n# summary_df = pd.DataFrame(summary_data)\n\n# with pd.ExcelWriter('financial_summary.xlsx') as writer:\n#     summary_df.to_excel(writer, sheet_name='Summary', index=False)\n#     segment_summary.to_excel(writer, sheet_name='By Segment')\n\n# print(f\"Summary report saved as financial_summary.xlsx\")\n# \"\"\"\n\n#     # Mock only get_tool_by_name (database lookup)\n#     with patch(\n#         \"onyx.tools.tool_implementations_v2.python.get_tool_by_name\"\n#     ) as mock_get_tool:\n#         mock_tool = Mock()\n#         mock_tool.id = 1\n#         mock_get_tool.return_value = mock_tool\n\n#         # Execute code - file store operations happen for real\n#         result = _python_execution_core(mock_run_context, code, code_interpreter_client)\n\n#     # Verify result\n#     assert isinstance(result, LlmPythonExecutionResult)\n#     assert result.exit_code == 0\n#     assert \"Loaded Excel file\" in result.stdout\n#     assert \"Analysis\" in result.stdout\n#     assert \"Sales by Segment\" in result.stdout\n#     assert \"Top 5 Products by Profit\" in result.stdout\n#     assert \"Profit Margin\" in result.stdout\n\n#     # Should generate 2 files: PNG visualization and Excel summary\n#     assert len(result.generated_files) == 2\n\n#     # Verify generated files\n#     filenames = [f.filename for f in result.generated_files]\n#     assert \"financial_analysis.png\" in filenames\n#     assert \"financial_summary.xlsx\" in filenames\n\n#     # Verify we can read and validate the generated files\n#     file_store = get_default_file_store()\n\n#     # Check the PNG file\n#     png_file = next(\n#         f for f in result.generated_files if f.filename == \"financial_analysis.png\"\n#     )\n#     png_file_id = png_file.file_link.split(\"/\")[-1]\n#     png_io = file_store.read_file(png_file_id)\n#     png_content = png_io.read()\n#     assert png_content[:8] == b\"\\x89PNG\\r\\n\\x1a\\n\"  # PNG magic bytes\n#     assert len(png_content) > 5000  # Should be substantial\n\n#     # Check the Excel summary file\n#     xlsx_file = next(\n#         f for f in result.generated_files if f.filename == \"financial_summary.xlsx\"\n#     )\n#     xlsx_file_id = xlsx_file.file_link.split(\"/\")[-1]\n#     xlsx_io = file_store.read_file(xlsx_file_id)\n#     xlsx_content = xlsx_io.read()\n#     assert xlsx_content[:4] == b\"PK\\x03\\x04\"  # ZIP/Excel magic bytes\n\n#     # Parse and verify the summary Excel file using openpyxl directly\n#     xlsx_io_obj = io.BytesIO(xlsx_content)\n#     workbook = load_workbook(xlsx_io_obj)\n#     sheet = workbook[\"Summary\"]\n\n#     # Read headers from first row\n#     first_row = list(sheet.iter_rows(min_row=1, max_row=1, values_only=True))[0]\n#     headers = list(first_row) if first_row else []\n#     assert \"Metric\" in headers\n#     assert \"Value\" in headers\n\n#     # Read all rows and extract metrics\n#     metrics = []\n#     for row in sheet.iter_rows(min_row=2, values_only=True):\n#         if row[0]:  # Metric column\n#             metrics.append(row[0])\n\n#     assert \"Total Sales\" in metrics\n#     assert \"Total Profit\" in metrics\n#     assert \"Profit Margin %\" in metrics\n\n\n# if __name__ == \"__main__\":\n#     # Run with: python -m pytest tests/external_dependency_unit/tools/test_python_tool.py -v\n#     pytest.main([__file__, \"-v\"])\n\n\nfrom __future__ import annotations\n\nimport io\nimport json\nimport threading\nfrom collections.abc import Generator\nfrom http.server import BaseHTTPRequestHandler\nfrom http.server import HTTPServer\nfrom typing import Any\nfrom unittest.mock import patch\n\nimport pytest\nfrom fastapi import UploadFile\nfrom fastapi.background import BackgroundTasks\nfrom sqlalchemy.orm import Session\nfrom starlette.datastructures import Headers\n\nimport onyx.tools.tool_implementations.python.code_interpreter_client as ci_mod\nfrom onyx.chat.process_message import handle_stream_message_objects\nfrom onyx.db.models import Persona\nfrom onyx.db.tools import get_builtin_tool\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.server.features.projects.api import upload_user_files\nfrom onyx.server.query_and_chat.chat_backend import get_chat_session\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import PythonToolDelta\nfrom onyx.server.query_and_chat.streaming_models import PythonToolStart\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\nfrom tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder\nfrom tests.external_dependency_unit.answer.stream_test_utils import create_chat_session\nfrom tests.external_dependency_unit.answer.stream_test_utils import create_placement\nfrom tests.external_dependency_unit.conftest import create_test_user\nfrom tests.external_dependency_unit.mock_llm import LLMAnswerResponse\nfrom tests.external_dependency_unit.mock_llm import LLMToolCallResponse\nfrom tests.external_dependency_unit.mock_llm import use_mock_llm\n\n\n# ---------------------------------------------------------------------------\n# Mock Code Interpreter Server\n# ---------------------------------------------------------------------------\n\n\nclass CapturedRequest:\n    \"\"\"A single HTTP request captured by the mock server.\"\"\"\n\n    def __init__(self, method: str, path: str, body: bytes) -> None:\n        self.method = method\n        self.path = path\n        self.body = body\n\n    def json_body(self) -> dict[str, Any]:\n        return json.loads(self.body)\n\n\nclass _MockCIHandler(BaseHTTPRequestHandler):\n    \"\"\"HTTP handler that records every request and returns canned responses.\"\"\"\n\n    server: MockCodeInterpreterServer\n\n    def do_POST(self) -> None:\n        body = self._read_body()\n        self._capture(\"POST\", body)\n\n        if self.path == \"/v1/files\":\n            self.server._file_counter += 1\n            self._respond_json(\n                200, {\"file_id\": f\"mock-ci-file-{self.server._file_counter}\"}\n            )\n        elif self.path == \"/v1/execute/stream\":\n            if self.server.streaming_enabled:\n                self._respond_sse(\n                    [\n                        (\n                            \"output\",\n                            {\"stream\": \"stdout\", \"data\": \"mock output\\n\"},\n                        ),\n                        (\n                            \"result\",\n                            {\n                                \"exit_code\": 0,\n                                \"timed_out\": False,\n                                \"duration_ms\": 50,\n                                \"files\": [],\n                            },\n                        ),\n                    ]\n                )\n            else:\n                self._respond_json(404, {\"error\": \"not found\"})\n        elif self.path == \"/v1/execute\":\n            self._respond_json(\n                200,\n                {\n                    \"stdout\": \"mock output\\n\",\n                    \"stderr\": \"\",\n                    \"exit_code\": 0,\n                    \"timed_out\": False,\n                    \"duration_ms\": 50,\n                    \"files\": [],\n                },\n            )\n        else:\n            self._respond_json(404, {\"error\": \"not found\"})\n\n    def do_GET(self) -> None:\n        self._capture(\"GET\", b\"\")\n        if self.path == \"/health\":\n            self._respond_json(200, {\"status\": \"ok\"})\n        else:\n            self._respond_json(404, {\"error\": \"not found\"})\n\n    def do_DELETE(self) -> None:\n        self._capture(\"DELETE\", b\"\")\n        self.send_response(200)\n        self.end_headers()\n\n    def _read_body(self) -> bytes:\n        length = int(self.headers.get(\"Content-Length\", 0))\n        return self.rfile.read(length) if length else b\"\"\n\n    def _capture(self, method: str, body: bytes) -> None:\n        self.server.captured_requests.append(\n            CapturedRequest(method=method, path=self.path, body=body)\n        )\n\n    def _respond_json(self, status: int, data: dict[str, Any]) -> None:\n        payload = json.dumps(data).encode()\n        self.send_response(status)\n        self.send_header(\"Content-Type\", \"application/json\")\n        self.send_header(\"Content-Length\", str(len(payload)))\n        self.end_headers()\n        self.wfile.write(payload)\n\n    def _respond_sse(self, events: list[tuple[str, dict[str, Any]]]) -> None:\n        frames = []\n        for event_type, data in events:\n            frames.append(f\"event: {event_type}\\ndata: {json.dumps(data)}\\n\\n\")\n        payload = \"\".join(frames).encode()\n        self.send_response(200)\n        self.send_header(\"Content-Type\", \"text/event-stream\")\n        self.send_header(\"Content-Length\", str(len(payload)))\n        self.end_headers()\n        self.wfile.write(payload)\n\n    def log_message(self, format: str, *args: Any) -> None:  # noqa: A002\n        pass\n\n\nclass MockCodeInterpreterServer(HTTPServer):\n    \"\"\"HTTPServer wrapper that records requests for assertions.\"\"\"\n\n    def __init__(self) -> None:\n        super().__init__((\"localhost\", 0), _MockCIHandler)\n        self.captured_requests: list[CapturedRequest] = []\n        self._file_counter = 0\n        self.streaming_enabled: bool = True\n\n    @property\n    def url(self) -> str:\n        host, port = self.server_address\n        return f\"http://{host!s}:{port}\"\n\n    def start(self) -> None:\n        threading.Thread(target=self.serve_forever, daemon=True).start()\n\n    def get_requests(\n        self,\n        method: str | None = None,\n        path: str | None = None,\n    ) -> list[CapturedRequest]:\n        results = self.captured_requests\n        if method:\n            results = [r for r in results if r.method == method]\n        if path:\n            results = [r for r in results if r.path == path]\n        return results\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture(scope=\"module\")\ndef mock_ci_server() -> Generator[MockCodeInterpreterServer, None, None]:\n    server = MockCodeInterpreterServer()\n    server.start()\n    yield server\n    server.shutdown()\n\n\n@pytest.fixture(autouse=True)\ndef _clear_health_cache() -> None:\n    \"\"\"Reset the health check cache before every test.\"\"\"\n    import onyx.tools.tool_implementations.python.code_interpreter_client as mod\n\n    mod._health_cache = {}\n\n\n@pytest.fixture()\ndef _attach_python_tool_to_default_persona(db_session: Session) -> None:\n    \"\"\"Ensure the default persona (id=0) has the PythonTool attached.\"\"\"\n    python_tool_db = get_builtin_tool(db_session, PythonTool)\n    persona = db_session.get(Persona, 0)\n    assert persona is not None, \"Default persona (id=0) not found\"\n\n    if python_tool_db not in persona.tools:\n        persona.tools.append(python_tool_db)\n        db_session.commit()\n\n\n# ---------------------------------------------------------------------------\n# Test\n# ---------------------------------------------------------------------------\n\n\ndef test_code_interpreter_receives_chat_files(\n    db_session: Session,\n    mock_ci_server: MockCodeInterpreterServer,\n    _attach_python_tool_to_default_persona: None,\n    initialize_file_store: None,  # noqa: ARG001\n) -> None:\n    mock_ci_server.captured_requests.clear()\n    mock_ci_server._file_counter = 0\n    mock_url = mock_ci_server.url\n\n    user = create_test_user(db_session, \"ci_test_admin\")\n    chat_session = create_chat_session(db_session=db_session, user=user)\n\n    # Upload a test CSV\n    csv_content = b\"name,age,city\\nAlice,30,NYC\\nBob,25,SF\\n\"\n    result = upload_user_files(\n        bg_tasks=BackgroundTasks(),\n        files=[\n            UploadFile(\n                file=io.BytesIO(csv_content),\n                filename=\"data.csv\",\n                size=len(csv_content),\n                headers=Headers({\"content-type\": \"text/csv\"}),\n            )\n        ],\n        project_id=None,\n        temp_id_map=json.dumps({\"0|data.csv\": \"data.csv\"}),\n        user=user,\n        db_session=db_session,\n    )\n    assert len(result.user_files) == 1\n    user_file = result.user_files[0]\n\n    file_descriptor: FileDescriptor = {\n        \"id\": user_file.file_id,\n        \"type\": ChatFileType.TABULAR,\n        \"name\": \"data.csv\",\n        \"user_file_id\": str(user_file.id),\n    }\n\n    code = \"import pandas as pd\\ndf = pd.read_csv('data.csv')\\nprint(df)\"\n    msg_req = SendMessageRequest(\n        message=\"Read the CSV and print it.\",\n        chat_session_id=chat_session.id,\n        file_descriptors=[file_descriptor],\n        stream=True,\n    )\n\n    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__\n    with (\n        use_mock_llm() as mock_llm,\n        patch(\n            \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n    ):\n        mock_llm.add_response(\n            LLMToolCallResponse(\n                tool_name=\"python\",\n                tool_call_id=\"call_test_1\",\n                tool_call_argument_tokens=[json.dumps({\"code\": code})],\n            )\n        )\n        mock_llm.forward_till_end()\n\n        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)\n        try:\n            list(\n                handle_stream_message_objects(\n                    new_msg_req=msg_req, user=user, db_session=db_session\n                )\n            )\n        finally:\n            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults\n\n    # Verify: file uploaded and code executed via streaming.\n    assert len(mock_ci_server.get_requests(method=\"POST\", path=\"/v1/files\")) == 1\n    assert (\n        len(mock_ci_server.get_requests(method=\"POST\", path=\"/v1/execute/stream\")) == 1\n    )\n\n    # Staged input files are intentionally NOT deleted — PythonTool caches their\n    # file IDs across agent-loop iterations to avoid re-uploading on every call.\n    # The code interpreter cleans them up via its own TTL.\n    assert len(mock_ci_server.get_requests(method=\"DELETE\")) == 0\n\n    execute_body = mock_ci_server.get_requests(\n        method=\"POST\", path=\"/v1/execute/stream\"\n    )[0].json_body()\n    assert execute_body[\"code\"] == code\n    assert len(execute_body[\"files\"]) == 1\n    assert execute_body[\"files\"][0][\"path\"] == \"data.csv\"\n\n\ndef test_code_interpreter_replay_packets_include_code_and_output(\n    db_session: Session,\n    mock_ci_server: MockCodeInterpreterServer,\n    _attach_python_tool_to_default_persona: None,\n    initialize_file_store: None,  # noqa: ARG001\n) -> None:\n    \"\"\"After a code interpreter message completes, retrieving the message\n    via translate_assistant_message_to_packets should emit PythonToolStart\n    (containing the executed code) and PythonToolDelta (containing\n    stdout/stderr), not generic CustomTool packets.\"\"\"\n    mock_ci_server.captured_requests.clear()\n    mock_ci_server._file_counter = 0\n    mock_url = mock_ci_server.url\n\n    user = create_test_user(db_session, \"ci_replay_test\")\n    chat_session = create_chat_session(db_session=db_session, user=user)\n\n    code = 'x = 2 + 2\\nprint(f\"Result: {x}\")'\n    msg_req = SendMessageRequest(\n        message=\"Calculate 2 + 2\",\n        chat_session_id=chat_session.id,\n        stream=True,\n    )\n\n    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__\n    with (\n        use_mock_llm() as mock_llm,\n        patch(\n            \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n    ):\n        answer_tokens = [\"The \", \"result \", \"is \", \"4.\"]\n\n        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)\n        try:\n            handler = StreamTestBuilder(llm_controller=mock_llm)\n\n            stream = handle_stream_message_objects(\n                new_msg_req=msg_req, user=user, db_session=db_session\n            )\n            # First packet is always MessageResponseIDInfo\n            next(stream)\n\n            # Phase 1: LLM requests python tool execution.\n            handler.add_response(\n                LLMToolCallResponse(\n                    tool_name=\"python\",\n                    tool_call_id=\"call_replay_test\",\n                    tool_call_argument_tokens=[json.dumps({\"code\": code})],\n                )\n            ).expect(\n                Packet(\n                    placement=create_placement(0),\n                    obj=ToolCallArgumentDelta(\n                        tool_type=\"python\",\n                        argument_deltas={\"code\": code},\n                    ),\n                ),\n                forward=2,\n            ).expect(\n                Packet(\n                    placement=create_placement(0),\n                    obj=PythonToolStart(code=code),\n                ),\n                forward=False,\n            ).expect(\n                Packet(\n                    placement=create_placement(0),\n                    obj=PythonToolDelta(stdout=\"mock output\\n\", stderr=\"\", file_ids=[]),\n                ),\n                forward=False,\n            ).expect(\n                Packet(\n                    placement=create_placement(0),\n                    obj=SectionEnd(),\n                ),\n                forward=False,\n            ).run_and_validate(\n                stream=stream\n            )\n\n            # Phase 2: LLM produces a final answer after tool execution.\n            handler.add_response(\n                LLMAnswerResponse(answer_tokens=answer_tokens)\n            ).expect_agent_response(\n                answer_tokens=answer_tokens,\n                turn_index=1,\n            ).run_and_validate(\n                stream=stream\n            )\n\n            with pytest.raises(StopIteration):\n                next(stream)\n\n        finally:\n            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults\n\n    # Retrieve the chat session through the same endpoint the frontend uses\n    chat_detail = get_chat_session(\n        session_id=chat_session.id,\n        user=user,\n        db_session=db_session,\n    )\n\n    assert (\n        len(mock_ci_server.get_requests(method=\"POST\", path=\"/v1/execute/stream\")) == 1\n    )\n\n    # The response contains `packets` — a list of packet-lists, one per\n    # assistant message. We should have exactly one assistant message.\n    assert (\n        len(chat_detail.packets) == 1\n    ), f\"Expected 1 assistant packet list, got {len(chat_detail.packets)}\"\n    packets = chat_detail.packets[0]\n\n    # Extract PythonToolStart packets – these must contain the code\n    start_packets = [p for p in packets if isinstance(p.obj, PythonToolStart)]\n    assert (\n        len(start_packets) == 1\n    ), f\"Expected 1 PythonToolStart packet, got {len(start_packets)}. Packet types: {[type(p.obj).__name__ for p in packets]}\"\n    start_obj = start_packets[0].obj\n    assert isinstance(start_obj, PythonToolStart)\n    assert start_obj.code == code\n\n    # Extract PythonToolDelta packets – these must contain stdout/stderr\n    delta_packets = [p for p in packets if isinstance(p.obj, PythonToolDelta)]\n    assert len(delta_packets) >= 1, (\n        f\"Expected at least 1 PythonToolDelta packet, got {len(delta_packets)}. \"\n        f\"Packet types: {[type(p.obj).__name__ for p in packets]}\"\n    )\n    # The mock CI server returns \"mock output\\n\" as stdout\n    delta_obj = delta_packets[0].obj\n    assert isinstance(delta_obj, PythonToolDelta)\n    assert \"mock output\" in delta_obj.stdout\n\n\ndef test_code_interpreter_streaming_fallback_to_batch(\n    db_session: Session,\n    mock_ci_server: MockCodeInterpreterServer,\n    _attach_python_tool_to_default_persona: None,\n    initialize_file_store: None,  # noqa: ARG001\n) -> None:\n    \"\"\"When the streaming endpoint is not available (older code-interpreter),\n    execute_streaming should fall back to the batch /v1/execute endpoint.\"\"\"\n    mock_ci_server.captured_requests.clear()\n    mock_ci_server._file_counter = 0\n    mock_ci_server.streaming_enabled = False\n    mock_url = mock_ci_server.url\n\n    user = create_test_user(db_session, \"ci_fallback_test\")\n    chat_session = create_chat_session(db_session=db_session, user=user)\n\n    code = 'print(\"fallback test\")'\n    msg_req = SendMessageRequest(\n        message=\"Print fallback test\",\n        chat_session_id=chat_session.id,\n        stream=True,\n    )\n\n    original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__\n    with (\n        use_mock_llm() as mock_llm,\n        patch(\n            \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL\",\n            mock_url,\n        ),\n    ):\n        mock_llm.add_response(\n            LLMToolCallResponse(\n                tool_name=\"python\",\n                tool_call_id=\"call_fallback\",\n                tool_call_argument_tokens=[json.dumps({\"code\": code})],\n            )\n        )\n        mock_llm.forward_till_end()\n\n        ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)\n        try:\n            packets = list(\n                handle_stream_message_objects(\n                    new_msg_req=msg_req, user=user, db_session=db_session\n                )\n            )\n        finally:\n            ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults\n            mock_ci_server.streaming_enabled = True\n\n    # Streaming was attempted first (returned 404), then fell back to batch\n    assert (\n        len(mock_ci_server.get_requests(method=\"POST\", path=\"/v1/execute/stream\")) == 1\n    )\n    assert len(mock_ci_server.get_requests(method=\"POST\", path=\"/v1/execute\")) == 1\n\n    # Verify output still made it through\n    delta_packets = [\n        p\n        for p in packets\n        if isinstance(p, Packet) and isinstance(p.obj, PythonToolDelta)\n    ]\n    assert len(delta_packets) >= 1\n    first_delta = delta_packets[0].obj\n    assert isinstance(first_delta, PythonToolDelta)\n    assert \"mock output\" in first_delta.stdout\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tools/test_python_tool_server_enabled.py",
    "content": "\"\"\"Tests that PythonTool.is_available() respects the server_enabled DB flag.\n\nUses a real DB session with CODE_INTERPRETER_BASE_URL mocked so the\nenvironment-variable check passes and the DB flag is the deciding factor.\n\"\"\"\n\nfrom unittest.mock import patch\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.code_interpreter import fetch_code_interpreter_server\nfrom onyx.db.code_interpreter import update_code_interpreter_server_enabled\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n\ndef test_python_tool_unavailable_when_server_disabled(\n    db_session: Session,\n) -> None:\n    \"\"\"With a valid base URL, the tool should be unavailable when\n    server_enabled is False in the DB.\"\"\"\n    server = fetch_code_interpreter_server(db_session)\n    initial_enabled = server.server_enabled\n\n    try:\n        update_code_interpreter_server_enabled(db_session, enabled=False)\n\n        with patch(\n            \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n            \"http://fake:8888\",\n        ):\n            assert PythonTool.is_available(db_session) is False\n    finally:\n        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)\n\n\ndef test_python_tool_available_when_server_enabled(\n    db_session: Session,\n) -> None:\n    \"\"\"With a valid base URL, the tool should be available when\n    server_enabled is True in the DB.\"\"\"\n    server = fetch_code_interpreter_server(db_session)\n    initial_enabled = server.server_enabled\n\n    try:\n        update_code_interpreter_server_enabled(db_session, enabled=True)\n\n        with patch(\n            \"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL\",\n            \"http://fake:8888\",\n        ):\n            assert PythonTool.is_available(db_session) is True\n    finally:\n        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)\n"
  },
  {
    "path": "backend/tests/external_dependency_unit/tracing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/external_dependency_unit/tracing/test_llm_span_recording.py",
    "content": "\"\"\"Tests for LLM span recording utilities.\"\"\"\n\nfrom typing import Any\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.llm.model_response import ChatCompletionMessageToolCall\nfrom onyx.llm.model_response import Choice\nfrom onyx.llm.model_response import FunctionCall as ModelResponseFunctionCall\nfrom onyx.llm.model_response import Message\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import Usage\nfrom onyx.llm.models import FunctionCall\nfrom onyx.llm.models import ToolCall\nfrom onyx.tracing.framework.span_data import GenerationSpanData\nfrom onyx.tracing.llm_utils import record_llm_response\nfrom onyx.tracing.llm_utils import record_llm_span_output\n\n\n@pytest.fixture\ndef mock_span() -> MagicMock:\n    \"\"\"Create a mock span with GenerationSpanData.\"\"\"\n    span = MagicMock()\n    span.span_data = GenerationSpanData()\n    return span\n\n\nclass TestRecordLlmResponse:\n    \"\"\"Tests for record_llm_response function.\"\"\"\n\n    def test_records_content_from_response(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that content is correctly extracted and recorded.\"\"\"\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(content=\"Hello, world!\", role=\"assistant\"),\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        assert mock_span.span_data.output == [\n            {\"role\": \"assistant\", \"content\": \"Hello, world!\"}\n        ]\n\n    def test_records_reasoning_from_response(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that reasoning/extended thinking is recorded.\"\"\"\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(\n                    content=\"The answer is 42.\",\n                    role=\"assistant\",\n                    reasoning_content=\"Let me think step by step...\",\n                ),\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        assert mock_span.span_data.output == [\n            {\"role\": \"assistant\", \"content\": \"The answer is 42.\"}\n        ]\n        assert mock_span.span_data.reasoning == \"Let me think step by step...\"\n\n    def test_records_tool_calls_from_response(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that tool calls are correctly extracted and recorded.\"\"\"\n        tool_call = ChatCompletionMessageToolCall(\n            id=\"call-123\",\n            type=\"function\",\n            function=ModelResponseFunctionCall(\n                name=\"search_documents\",\n                arguments='{\"query\": \"test query\"}',\n            ),\n        )\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(\n                    content=None,\n                    role=\"assistant\",\n                    tool_calls=[tool_call],\n                ),\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        output = mock_span.span_data.output\n        assert len(output) == 1\n        assert output[0][\"role\"] == \"assistant\"\n        assert \"tool_calls\" in output[0]\n        assert len(output[0][\"tool_calls\"]) == 1\n        assert output[0][\"tool_calls\"][0][\"id\"] == \"call-123\"\n        assert output[0][\"tool_calls\"][0][\"function\"][\"name\"] == \"search_documents\"\n\n    def test_records_usage_from_response(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that usage metrics are correctly recorded.\"\"\"\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(content=\"Test\", role=\"assistant\"),\n            ),\n            usage=Usage(\n                prompt_tokens=100,\n                completion_tokens=50,\n                total_tokens=150,\n                cache_creation_input_tokens=10,\n                cache_read_input_tokens=20,\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        assert mock_span.span_data.usage is not None\n        assert mock_span.span_data.usage[\"input_tokens\"] == 100\n        assert mock_span.span_data.usage[\"output_tokens\"] == 50\n        assert mock_span.span_data.usage[\"total_tokens\"] == 150\n        assert mock_span.span_data.usage[\"cache_read_input_tokens\"] == 20\n        assert mock_span.span_data.usage[\"cache_creation_input_tokens\"] == 10\n\n    def test_handles_none_content(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that None content is handled (e.g., tool-only response).\"\"\"\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(content=None, role=\"assistant\"),\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        # Content should not be in output dict when None\n        assert mock_span.span_data.output == [{\"role\": \"assistant\"}]\n\n    def test_handles_no_usage(self, mock_span: MagicMock) -> None:\n        \"\"\"Test that missing usage is handled gracefully.\"\"\"\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(content=\"Test\", role=\"assistant\"),\n            ),\n            usage=None,\n        )\n\n        record_llm_response(mock_span, response)\n\n        # Usage should remain None/unset\n        assert mock_span.span_data.usage is None\n\n    def test_records_all_fields_together(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording a response with all fields present.\"\"\"\n        tool_call = ChatCompletionMessageToolCall(\n            id=\"call-456\",\n            type=\"function\",\n            function=ModelResponseFunctionCall(\n                name=\"analyze\",\n                arguments='{\"text\": \"sample\"}',\n            ),\n        )\n        response = ModelResponse(\n            id=\"test-id\",\n            created=\"2024-01-01\",\n            choice=Choice(\n                message=Message(\n                    content=\"Here's my analysis:\",\n                    role=\"assistant\",\n                    reasoning_content=\"I need to think about this carefully...\",\n                    tool_calls=[tool_call],\n                ),\n            ),\n            usage=Usage(\n                prompt_tokens=200,\n                completion_tokens=100,\n                total_tokens=300,\n                cache_creation_input_tokens=0,\n                cache_read_input_tokens=50,\n            ),\n        )\n\n        record_llm_response(mock_span, response)\n\n        # Check output\n        output = mock_span.span_data.output\n        assert len(output) == 1\n        assert output[0][\"role\"] == \"assistant\"\n        assert output[0][\"content\"] == \"Here's my analysis:\"\n        assert len(output[0][\"tool_calls\"]) == 1\n\n        # Check reasoning\n        assert (\n            mock_span.span_data.reasoning == \"I need to think about this carefully...\"\n        )\n\n        # Check usage\n        assert mock_span.span_data.usage[\"input_tokens\"] == 200\n        assert mock_span.span_data.usage[\"output_tokens\"] == 100\n\n\nclass TestRecordLlmSpanOutput:\n    \"\"\"Tests for record_llm_span_output function (streaming scenarios).\"\"\"\n\n    def test_records_string_output(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording a simple string output.\"\"\"\n        record_llm_span_output(mock_span, \"Hello, world!\")\n\n        assert mock_span.span_data.output == [\n            {\"role\": \"assistant\", \"content\": \"Hello, world!\"}\n        ]\n\n    def test_records_none_output(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording None output.\"\"\"\n        record_llm_span_output(mock_span, None)\n\n        assert mock_span.span_data.output == [{\"role\": \"assistant\", \"content\": None}]\n\n    def test_records_sequence_output(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording a sequence of message dicts.\"\"\"\n        messages: list[dict[str, Any]] = [\n            {\"role\": \"assistant\", \"content\": \"Part 1\"},\n            {\"role\": \"assistant\", \"content\": \"Part 2\"},\n        ]\n\n        record_llm_span_output(mock_span, messages)\n\n        assert mock_span.span_data.output == messages\n\n    def test_records_usage(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording usage information.\"\"\"\n        usage = MagicMock()\n        usage.prompt_tokens = 50\n        usage.completion_tokens = 25\n        usage.total_tokens = 75\n        usage.cache_read_input_tokens = 10\n        usage.cache_creation_input_tokens = 5\n\n        record_llm_span_output(mock_span, \"Test output\", usage=usage)\n\n        assert mock_span.span_data.usage is not None\n        assert mock_span.span_data.usage[\"input_tokens\"] == 50\n        assert mock_span.span_data.usage[\"output_tokens\"] == 25\n\n    def test_records_reasoning(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording reasoning content.\"\"\"\n        record_llm_span_output(\n            mock_span, \"Final answer\", reasoning=\"Step by step thinking...\"\n        )\n\n        assert mock_span.span_data.reasoning == \"Step by step thinking...\"\n\n    def test_records_tool_calls(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording tool calls in streaming scenario.\"\"\"\n        tool_calls = [\n            ToolCall(\n                id=\"call-789\",\n                type=\"function\",\n                function=FunctionCall(\n                    name=\"get_weather\",\n                    arguments='{\"location\": \"NYC\"}',\n                ),\n            )\n        ]\n\n        record_llm_span_output(mock_span, \"Checking weather...\", tool_calls=tool_calls)\n\n        output = mock_span.span_data.output\n        assert len(output) == 1\n        assert output[0][\"content\"] == \"Checking weather...\"\n        assert \"tool_calls\" in output[0]\n        assert len(output[0][\"tool_calls\"]) == 1\n        assert output[0][\"tool_calls\"][0][\"id\"] == \"call-789\"\n\n    def test_records_tool_calls_with_none_output(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording tool calls when output is None.\"\"\"\n        tool_calls = [\n            ToolCall(\n                id=\"call-abc\",\n                type=\"function\",\n                function=FunctionCall(\n                    name=\"search\",\n                    arguments='{\"q\": \"test\"}',\n                ),\n            )\n        ]\n\n        record_llm_span_output(mock_span, None, tool_calls=tool_calls)\n\n        output = mock_span.span_data.output\n        assert len(output) == 1\n        assert output[0][\"content\"] is None\n        assert len(output[0][\"tool_calls\"]) == 1\n\n    def test_records_all_streaming_fields(self, mock_span: MagicMock) -> None:\n        \"\"\"Test recording all fields in streaming scenario.\"\"\"\n        usage = MagicMock()\n        usage.prompt_tokens = 100\n        usage.completion_tokens = 50\n        usage.total_tokens = 150\n        usage.cache_read_input_tokens = 0\n        usage.cache_creation_input_tokens = 0\n\n        tool_calls = [\n            ToolCall(\n                id=\"call-xyz\",\n                type=\"function\",\n                function=FunctionCall(\n                    name=\"calculator\",\n                    arguments='{\"expr\": \"2+2\"}',\n                ),\n            )\n        ]\n\n        record_llm_span_output(\n            mock_span,\n            output=\"Computing...\",\n            usage=usage,\n            reasoning=\"Let me calculate this.\",\n            tool_calls=tool_calls,\n        )\n\n        # Check all fields\n        output = mock_span.span_data.output\n        assert output[0][\"content\"] == \"Computing...\"\n        assert len(output[0][\"tool_calls\"]) == 1\n        assert mock_span.span_data.reasoning == \"Let me calculate this.\"\n        assert mock_span.span_data.usage[\"input_tokens\"] == 100\n"
  },
  {
    "path": "backend/tests/integration/Dockerfile",
    "content": "# syntax=docker/dockerfile:1.6\n# This image is only for running integration tests. It layers test-specific\n# files and dependencies on top of the backend image.\nFROM base AS integration-base\n\nWORKDIR /app\n\n# Integration test stuff\nCOPY ./requirements/dev.txt /tmp/dev-requirements.txt\nRUN uv pip install --system --no-cache-dir --upgrade -r /tmp/dev-requirements.txt && \\\n    rm -rf ~/.cache/uv /tmp/*.txt\n\nCOPY ./pytest.ini /app/pytest.ini\nCOPY ./tests/integration /app/tests/integration\n# copies all files, but not folders, in the tests directory\nCOPY ./tests/* /app/tests/\n\nFROM base AS openapi-schema\nCOPY ./scripts/onyx_openapi_schema.py /app/scripts/onyx_openapi_schema.py\n# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\nRUN LICENSE_ENFORCEMENT_ENABLED=false python scripts/onyx_openapi_schema.py --filename openapi.json\n\nFROM openapitools/openapi-generator-cli:latest AS openapi-client\nWORKDIR /local\nCOPY --from=openapi-schema /app/openapi.json /local/openapi.json\nRUN openapi-generator-cli generate \\\n    -i /local/openapi.json \\\n    -g python \\\n    -o /local/onyx_openapi_client \\\n    --package-name onyx_openapi_client \\\n    --skip-validate-spec \\\n    --openapi-normalizer \"SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true\"\n\nFROM integration-base AS integration\nCOPY --from=openapi-schema /app/openapi.json /app/generated/openapi.json\nCOPY --from=openapi-client /local/onyx_openapi_client /app/generated/onyx_openapi_client\n\n\nENV PYTHONPATH=/app\n\nENTRYPOINT [\"pytest\", \"-s\", \"-rs\"]\nCMD [\"/app/tests/integration\", \"--ignore=/app/tests/integration/multitenant_tests\"]\n"
  },
  {
    "path": "backend/tests/integration/README.md",
    "content": "# Integration Tests\n\n## General Testing Overview\n\nThe integration tests are designed with a \"manager\" class and a \"test\" class for each type of object being manipulated (e.g., user, persona, credential):\n\n- **Manager Class**: Contains methods for each type of API call. Responsible for creating, deleting, and verifying the existence of an entity.\n- **Test Class**: Stores data for each entity being tested. This is our \"expected state\" of the object.\n\nThe idea is that each test can use the manager class to create (.create()) a \"test*\" object. It can then perform an operation on the object (e.g., send a request to the API) and then check if the \"test*\" object is in the expected state by using the manager class (.verify()) function.\n\n## Instructions for Running Integration Tests Locally\n0. Generate dependencies\nFirst install openap-generator\n```sh\nbrew install openapi-generator\n```\n\nThen, using the VSCode/Cursor debugger, run the `Onyx OpenAPI Schema Generator` task (see `CONTRIBUTING_VSCODE.md` for `launch.json` setup instructions).\nThe task automatically generates the Python client needed for integration tests.\n\nIf the client generation fails, try running this command manually:\n```sh\nopenapi-generator generate -i backend/generated/openapi.json -g python -o backend/generated/onyx_openapi_client --package-name onyx_openapi_client --skip-validate-spec --openapi-normalizer \"SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true\"\n```\n\n1. Launch onyx (using Docker or running with a debugger), ensuring the API server is running on port 8080.\n   - If you'd like to set environment variables, you can do so by creating a `.env` file in the onyx/backend/tests/integration/ directory.\n   - Onyx MUST be launched with AUTH_TYPE=basic and ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n   - Tests that use `mock_llm_response` (e.g. llm workflow tool call tests) also require `INTEGRATION_TESTS_MODE=true` on the API server process.\n2. Navigate to `onyx/backend`.\n3. Run the following command in the terminal:\n   ```sh\n   python -m dotenv -f .env run -- pytest -s tests/integration/tests/\n   ```\n   or to run all tests in a file:\n   ```sh\n   python -m dotenv -f .env run -- pytest -s tests/integration/tests/path_to/test_file.py\n   ```\n   or to run a single test:\n   ```sh\n   python -m dotenv -f .env run -- pytest -s tests/integration/tests/path_to/test_file.py::test_function_name\n   ```\n\nRunning some single tests require the `mock_connector_server` container to be running. If the above doesn't work, \nnavigate to `backend/tests/integration/mock_services` and run\n```sh\ndocker compose -f docker-compose.mock-it-services.yml -p mock-it-services-stack up -d\n```\nYou will have to modify the networks section of the docker-compose file to `<your stack name>_default` if you brought up the standard\nonyx services with a name different from the default `onyx`.\n\n## Guidelines for Writing Integration Tests\n\n- As authentication is currently required for all tests, each test should start by creating a user.\n- Each test should ideally focus on a single API flow.\n- The test writer should try to consider failure cases and edge cases for the flow and write the tests to check for these cases.\n- Every step of the test should be commented describing what is being done and what the expected behavior is.\n- A summary of the test should be given at the top of the test function as well!\n- When writing new tests, manager classes, manager functions, and test classes, try to copy the style of the other ones that have already been written.\n- Be careful for scope creep!\n  - No need to overcomplicate every test by verifying after every single API call so long as the case you would be verifying is covered elsewhere (ideally in a test focused on covering that case).\n  - An example of this is: Creating an admin user is done at the beginning of nearly every test, but we only need to verify that the user is actually an admin in the test focused on checking admin permissions. For every other test, we can just create the admin user and assume that the permissions are working as expected.\n\n## Current Testing Limitations\n\n### Test coverage\n\n- All tests are probably not as high coverage as they could be.\n- The \"connector\" tests in particular are super bare bones because we will be reworking connector/cc_pair sometime soon.\n- Global Curator role is not thoroughly tested.\n- No auth is not tested at all.\n\n### Failure checking\n\n- While we test expected auth failures, we only check that it failed at all.\n- We dont check that the return codes are what we expect.\n- This means that a test could be failing for a different reason than expected.\n- We should ensure that the proper codes are being returned for each failure case.\n- We should also query the db after each failure to ensure that the db is in the expected state.\n\n### Scope/focus\n\n- The tests may be scoped sub-optimally.\n- The scoping of each test may be overlapping.\n\n## Current Testing Coverage\n\nThe current testing coverage should be checked by reading the comments at the top of each test file.\n\n## TODO: Testing Coverage\n\n- Persona permissions testing\n- Read only (and/or basic) user permissions\n  - Ensuring proper permission enforcement using the chat/doc_search endpoints\n- No auth\n\n## Ideas for integration testing design\n\n### Combine the \"test\" and \"manager\" classes\n\nThis could make test writing a bit cleaner by preventing test writers from having to pass around objects into functions that the objects have a 1:1 relationship with.\n\n### Rework VespaClient\n\nRight now, its used a fixture and has to be passed around between manager classes.\nCould just be built where its used\n"
  },
  {
    "path": "backend/tests/integration/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/integration/common_utils/chat.py",
    "content": "import requests\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import User\n\n\ndef test_create_chat_session_and_send_messages() -> None:\n    # Create a test user\n    with get_session_with_current_tenant() as db_session:\n        test_user = User(email=\"test@example.com\", hashed_password=\"dummy_hash\")\n        db_session.add(test_user)\n        db_session.commit()\n\n    base_url = \"http://localhost:8080\"  # Adjust this to your API's base URL\n    headers = {\"Authorization\": f\"Bearer {test_user.id}\"}\n\n    # Create a new chat session\n    create_session_response = requests.post(\n        f\"{base_url}/chat/create-chat-session\",\n        json={\n            \"description\": \"Test Chat\",\n            \"persona_id\": 1,\n        },  # Assuming persona_id 1 exists\n        headers=headers,\n    )\n    assert create_session_response.status_code == 200\n    chat_session_id = create_session_response.json()[\"chat_session_id\"]\n\n    # Send first message\n    first_message = \"Hello, this is a test message.\"\n    send_message_response = requests.post(\n        f\"{base_url}/chat/send-chat-message\",\n        json={\n            \"chat_session_id\": chat_session_id,\n            \"message\": first_message,\n            \"retrieval_options\": {\"top_k\": 3},\n            \"stream_response\": False,\n        },\n        headers=headers,\n    )\n    assert send_message_response.status_code == 200\n\n    # Send second message\n    second_message = \"Can you provide more information?\"\n    send_message_response = requests.post(\n        f\"{base_url}/chat/send-chat-message\",\n        json={\n            \"chat_session_id\": chat_session_id,\n            \"message\": second_message,\n            \"retrieval_options\": {\"top_k\": 3},\n            \"stream_response\": False,\n        },\n        headers=headers,\n    )\n    assert send_message_response.status_code == 200\n\n    # Verify chat session details\n    get_session_response = requests.get(\n        f\"{base_url}/chat/get-chat-session/{chat_session_id}\", headers=headers\n    )\n    assert get_session_response.status_code == 200\n    session_details = get_session_response.json()\n    assert session_details[\"chat_session_id\"] == chat_session_id\n    assert session_details[\"description\"] == \"Test Chat\"\n    assert len(session_details[\"messages\"]) == 4  # 2 user messages + 2 AI responses\n"
  },
  {
    "path": "backend/tests/integration/common_utils/config.py",
    "content": "import generated.onyx_openapi_client.onyx_openapi_client as onyx_api  # type: ignore[import-untyped,unused-ignore]\nfrom tests.integration.common_utils.constants import API_SERVER_URL\n\napi_config = onyx_api.Configuration(host=API_SERVER_URL)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/constants.py",
    "content": "import os\n\nADMIN_USER_NAME = \"admin_user\"\n\nAPI_SERVER_PROTOCOL = os.getenv(\"API_SERVER_PROTOCOL\") or \"http\"\nAPI_SERVER_HOST = os.getenv(\"API_SERVER_HOST\") or \"127.0.0.1\"\nAPI_SERVER_PORT = os.getenv(\"API_SERVER_PORT\") or \"8080\"\nAPI_SERVER_URL = f\"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}\"\nMAX_DELAY = 300\n\nMCP_SERVER_HOST = os.getenv(\"MCP_SERVER_HOST\") or \"127.0.0.1\"\nMCP_SERVER_PORT = os.getenv(\"MCP_SERVER_PORT\") or \"8090\"\nMCP_SERVER_URL = f\"{API_SERVER_PROTOCOL}://{MCP_SERVER_HOST}:{MCP_SERVER_PORT}\"\n\nGENERAL_HEADERS = {\"Content-Type\": \"application/json\"}\n\nNUM_DOCS = 5\n\nMOCK_CONNECTOR_SERVER_HOST = os.getenv(\"MOCK_CONNECTOR_SERVER_HOST\") or \"localhost\"\nMOCK_CONNECTOR_SERVER_PORT = os.getenv(\"MOCK_CONNECTOR_SERVER_PORT\") or 8001\n"
  },
  {
    "path": "backend/tests/integration/common_utils/document_acl.py",
    "content": "\"\"\"\nUtilities for testing document access control lists (ACLs) and permissions.\n\"\"\"\n\nfrom typing import List\nfrom uuid import UUID\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.access.access import _get_access_for_documents\nfrom ee.onyx.db.external_perm import fetch_external_groups_for_user\nfrom onyx.access.utils import prefix_external_group\nfrom onyx.access.utils import prefix_user_email\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom onyx.db.models import User\nfrom onyx.db.users import fetch_user_by_id\nfrom onyx.utils.logger import setup_logger\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\n\nlogger = setup_logger()\n\n\ndef get_user_acl(user: User, db_session: Session) -> set[str]:\n    \"\"\"\n    Get the ACL entries for a user, including their external groups, email, and public doc pattern.\n\n    Args:\n        user: The user object\n        db_session: Database session\n\n    Returns:\n        Set of ACL entries for the user\n    \"\"\"\n    db_external_groups = (\n        fetch_external_groups_for_user(db_session, user.id) if user else []\n    )\n    prefixed_external_groups = [\n        prefix_external_group(db_external_group.external_user_group_id)\n        for db_external_group in db_external_groups\n    ]\n\n    user_acl = set(prefixed_external_groups)\n    user_acl.update({prefix_user_email(user.email), PUBLIC_DOC_PAT})\n    return user_acl\n\n\ndef get_user_document_access_via_acl(\n    test_user: DATestUser, document_ids: List[str], db_session: Session\n) -> List[str]:\n    \"\"\"\n    Determine which documents a user can access by comparing user ACL with document ACLs.\n\n    This is a more reliable method than search-based verification as it directly checks\n    permission logic without depending on search relevance or ranking.\n\n    Args:\n        test_user: The test user to check access for\n        document_ids: List of document IDs to check\n        db_session: Database session\n\n    Returns:\n        List of document IDs that the user can access\n    \"\"\"\n    # Get the actual User object from the database\n    user = fetch_user_by_id(db_session, UUID(test_user.id))\n    if not user:\n        logger.error(f\"Could not find user with ID {test_user.id}\")\n        return []\n\n    user_acl = get_user_acl(user, db_session)\n    logger.info(f\"User {user.email} ACL entries: {user_acl}\")\n\n    # Get document access information\n    doc_access_map = _get_access_for_documents(document_ids, db_session)\n    logger.info(f\"Found access info for {len(doc_access_map)} documents\")\n\n    accessible_docs = []\n    for doc_id, doc_access in doc_access_map.items():\n        doc_acl = doc_access.to_acl()\n        logger.info(f\"Document {doc_id} ACL: {doc_acl}\")\n\n        # Check if user has any matching ACL entry\n        if user_acl.intersection(doc_acl):\n            accessible_docs.append(doc_id)\n            logger.info(f\"User {user.email} has access to document {doc_id}\")\n        else:\n            logger.info(f\"User {user.email} does NOT have access to document {doc_id}\")\n\n    return accessible_docs\n\n\ndef get_all_connector_documents(\n    cc_pair: DATestCCPair, db_session: Session\n) -> List[str]:\n    \"\"\"\n    Get all document IDs for a given connector/credential pair.\n\n    Args:\n        cc_pair: The connector-credential pair\n        db_session: Database session\n\n    Returns:\n        List of document IDs\n    \"\"\"\n    stmt = select(DocumentByConnectorCredentialPair.id).where(\n        DocumentByConnectorCredentialPair.connector_id == cc_pair.connector_id,\n        DocumentByConnectorCredentialPair.credential_id == cc_pair.credential_id,\n    )\n\n    result = db_session.execute(stmt)\n    document_ids = [row[0] for row in result.fetchall()]\n    logger.info(\n        f\"Found {len(document_ids)} documents for connector {cc_pair.connector_id}\"\n    )\n\n    return document_ids\n\n\ndef get_documents_by_permission_type(\n    document_ids: List[str], db_session: Session\n) -> List[str]:\n    \"\"\"\n    Categorize documents by their permission types and return public documents.\n\n    Args:\n        document_ids: List of document IDs to check\n        db_session: Database session\n\n    Returns:\n        List of document IDs that are public\n    \"\"\"\n    doc_access_map = _get_access_for_documents(document_ids, db_session)\n\n    public_docs = []\n\n    for doc_id, doc_access in doc_access_map.items():\n        if doc_access.is_public:\n            public_docs.append(doc_id)\n\n    return public_docs\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/api_key.py",
    "content": "from uuid import uuid4\n\nimport requests\n\nfrom onyx.db.models import UserRole\nfrom onyx.server.api_key.models import APIKeyArgs\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass APIKeyManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        api_key_role: UserRole = UserRole.ADMIN,\n    ) -> DATestAPIKey:\n        name = f\"{name}-api-key\" if name else f\"test-api-key-{uuid4()}\"\n        api_key_request = APIKeyArgs(\n            name=name,\n            role=api_key_role,\n        )\n        api_key_response = requests.post(\n            f\"{API_SERVER_URL}/admin/api-key\",\n            json=api_key_request.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        api_key_response.raise_for_status()\n        api_key = api_key_response.json()\n        result_api_key = DATestAPIKey(\n            api_key_id=api_key[\"api_key_id\"],\n            api_key_display=api_key[\"api_key_display\"],\n            api_key=api_key[\"api_key\"],\n            api_key_name=name,\n            api_key_role=api_key_role,\n            user_id=api_key[\"user_id\"],\n            headers=GENERAL_HEADERS,\n        )\n        result_api_key.headers[\"Authorization\"] = f\"Bearer {result_api_key.api_key}\"\n        return result_api_key\n\n    @staticmethod\n    def delete(\n        api_key: DATestAPIKey,\n        user_performing_action: DATestUser,\n    ) -> None:\n        api_key_response = requests.delete(\n            f\"{API_SERVER_URL}/admin/api-key/{api_key.api_key_id}\",\n            headers=user_performing_action.headers,\n        )\n        api_key_response.raise_for_status()\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[DATestAPIKey]:\n        api_key_response = requests.get(\n            f\"{API_SERVER_URL}/admin/api-key\",\n            headers=user_performing_action.headers,\n        )\n        api_key_response.raise_for_status()\n        return [DATestAPIKey(**api_key) for api_key in api_key_response.json()]\n\n    @staticmethod\n    def verify(\n        api_key: DATestAPIKey,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        retrieved_keys = APIKeyManager.get_all(\n            user_performing_action=user_performing_action\n        )\n        for key in retrieved_keys:\n            if key.api_key_id == api_key.api_key_id:\n                if verify_deleted:\n                    raise ValueError(\"API Key found when it should have been deleted\")\n                if (\n                    key.api_key_name == api_key.api_key_name\n                    and key.api_key_role == api_key.api_key_role\n                ):\n                    return\n\n        if not verify_deleted:\n            raise Exception(\"API Key not found\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/cc_pair.py",
    "content": "import time\nfrom datetime import datetime\nfrom typing import Any\nfrom uuid import uuid4\n\nimport requests\n\nimport generated.onyx_openapi_client.onyx_openapi_client as api  # type: ignore[import-untyped,unused-ignore]\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import ConnectorCredentialPairStatus\nfrom onyx.server.documents.models import CCPairFullInfo\nfrom onyx.server.documents.models import ConnectorCredentialPairIdentifier\nfrom onyx.server.documents.models import ConnectorIndexingStatusLite\nfrom onyx.server.documents.models import ConnectorStatus\nfrom onyx.server.documents.models import DocumentSource\nfrom onyx.server.documents.models import DocumentSyncStatus\nfrom tests.integration.common_utils.config import api_config\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _cc_pair_creator(\n    connector_id: int,\n    credential_id: int,\n    user_performing_action: DATestUser,\n    name: str | None = None,\n    access_type: AccessType = AccessType.PUBLIC,\n    groups: list[int] | None = None,\n) -> DATestCCPair:\n    name = f\"{name}-cc-pair\" if name else f\"test-cc-pair-{uuid4()}\"\n\n    with api.ApiClient(api_config) as api_client:\n        api_instance = api.DefaultApi(api_client)\n        connector_credential_pair_metadata = api.ConnectorCredentialPairMetadata(\n            name=name, access_type=access_type, groups=groups or []\n        )\n        api_response: api.StatusResponseInt = (\n            api_instance.associate_credential_to_connector(\n                connector_id,\n                credential_id,\n                connector_credential_pair_metadata,\n                _headers=user_performing_action.headers,\n            )\n        )\n\n    return DATestCCPair(\n        id=int(api_response.data),\n        name=name,\n        connector_id=connector_id,\n        credential_id=credential_id,\n        access_type=access_type,\n        groups=groups or [],\n    )\n\n\nclass CCPairManager:\n    @staticmethod\n    def create_from_scratch(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        access_type: AccessType = AccessType.PUBLIC,\n        groups: list[int] | None = None,\n        source: DocumentSource = DocumentSource.FILE,\n        input_type: InputType = InputType.LOAD_STATE,\n        connector_specific_config: dict[str, Any] | None = None,\n        credential_json: dict[str, Any] | None = None,\n        refresh_freq: int | None = None,\n    ) -> DATestCCPair:\n        connector = ConnectorManager.create(\n            user_performing_action=user_performing_action,\n            name=name,\n            source=source,\n            input_type=input_type,\n            connector_specific_config=connector_specific_config,\n            access_type=access_type,\n            groups=groups,\n            refresh_freq=refresh_freq,\n        )\n        credential = CredentialManager.create(\n            user_performing_action=user_performing_action,\n            credential_json=credential_json,\n            name=name,\n            source=source,\n            curator_public=(access_type == AccessType.PUBLIC),\n            groups=groups,\n        )\n        cc_pair = _cc_pair_creator(\n            connector_id=connector.id,\n            credential_id=credential.id,\n            name=name,\n            access_type=access_type,\n            groups=groups,\n            user_performing_action=user_performing_action,\n        )\n        return cc_pair\n\n    @staticmethod\n    def create(\n        connector_id: int,\n        credential_id: int,\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        access_type: AccessType = AccessType.PUBLIC,\n        groups: list[int] | None = None,\n    ) -> DATestCCPair:\n        cc_pair = _cc_pair_creator(\n            connector_id=connector_id,\n            credential_id=credential_id,\n            name=name,\n            access_type=access_type,\n            groups=groups,\n            user_performing_action=user_performing_action,\n        )\n        return cc_pair\n\n    @staticmethod\n    def pause_cc_pair(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> None:\n        result = requests.put(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/status\",\n            json={\"status\": \"PAUSED\"},\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n\n    @staticmethod\n    def unpause_cc_pair(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> None:\n        result = requests.put(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/status\",\n            json={\"status\": \"ACTIVE\"},\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n\n    @staticmethod\n    def delete(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> None:\n        cc_pair_identifier = ConnectorCredentialPairIdentifier(\n            connector_id=cc_pair.connector_id,\n            credential_id=cc_pair.credential_id,\n        )\n        result = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/deletion-attempt\",\n            json=cc_pair_identifier.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n\n    @staticmethod\n    def get_single(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n    ) -> CCPairFullInfo | None:\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        cc_pair_json = response.json()\n        return CCPairFullInfo(**cc_pair_json)\n\n    @staticmethod\n    def get_indexing_status_by_id(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n    ) -> ConnectorIndexingStatusLite | None:\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/connector/indexing-status\",\n            headers=user_performing_action.headers,\n            json={\"get_all_connectors\": True},\n        )\n        response.raise_for_status()\n        indexing_status_response = response.json()\n        for connectors_by_source in indexing_status_response:\n            connectors = connectors_by_source[\"indexing_statuses\"]\n            for connector in connectors:\n                if connector[\"cc_pair_id\"] == cc_pair_id:\n                    return ConnectorIndexingStatusLite(**connector)\n\n        return None\n\n    @staticmethod\n    def get_indexing_statuses(\n        user_performing_action: DATestUser,\n    ) -> list[ConnectorIndexingStatusLite]:\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/connector/indexing-status\",\n            headers=user_performing_action.headers,\n            json={\"get_all_connectors\": True},\n        )\n        response.raise_for_status()\n        indexing_status_response = response.json()\n        indexing_statuses = []\n        for connectors_by_source in indexing_status_response:\n            connectors = connectors_by_source[\"indexing_statuses\"]\n            for connector in connectors:\n                indexing_statuses.append(ConnectorIndexingStatusLite(**connector))\n        return indexing_statuses\n\n    @staticmethod\n    def get_connector_statuses(\n        user_performing_action: DATestUser,\n    ) -> list[ConnectorStatus]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/admin/connector/status\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [ConnectorStatus(**status) for status in response.json()]\n\n    @staticmethod\n    def verify(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        all_cc_pairs = CCPairManager.get_connector_statuses(user_performing_action)\n        for retrieved_cc_pair in all_cc_pairs:\n            if retrieved_cc_pair.cc_pair_id == cc_pair.id:\n                if verify_deleted:\n                    # We assume that this check will be performed after the deletion is\n                    # already waited for\n                    raise ValueError(\n                        f\"CC pair {cc_pair.id} found but should be deleted\"\n                    )\n                if (\n                    retrieved_cc_pair.name == cc_pair.name\n                    and retrieved_cc_pair.connector.id == cc_pair.connector_id\n                    and retrieved_cc_pair.credential.id == cc_pair.credential_id\n                    and retrieved_cc_pair.access_type == cc_pair.access_type\n                    and set(retrieved_cc_pair.groups) == set(cc_pair.groups)\n                ):\n                    return\n\n        if not verify_deleted:\n            raise ValueError(f\"CC pair {cc_pair.id} not found\")\n\n    @staticmethod\n    def run_once(\n        cc_pair: DATestCCPair,\n        from_beginning: bool,\n        user_performing_action: DATestUser,\n    ) -> None:\n        body = {\n            \"connector_id\": cc_pair.connector_id,\n            \"credential_ids\": [cc_pair.credential_id],\n            \"from_beginning\": from_beginning,\n        }\n        result = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/connector/run-once\",\n            json=body,\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n\n    @staticmethod\n    def wait_for_indexing_inactive(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n    ) -> None:\n        \"\"\"wait for the number of docs to be indexed on the connector.\n        This is used to test pausing a connector in the middle of indexing and\n        terminating that indexing.\"\"\"\n        print(f\"Indexing wait for inactive starting: cc_pair={cc_pair.id}\")\n        start = time.monotonic()\n        while True:\n            fetched_cc_pairs = CCPairManager.get_indexing_statuses(\n                user_performing_action\n            )\n            for fetched_cc_pair in fetched_cc_pairs:\n                if fetched_cc_pair.cc_pair_id != cc_pair.id:\n                    continue\n\n                if fetched_cc_pair.in_progress:\n                    continue\n\n                print(f\"Indexing is inactive: cc_pair={cc_pair.id}\")\n                return\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"Indexing wait for inactive timed out: cc_pair={cc_pair.id} timeout={timeout}s\"\n                )\n\n            print(\n                f\"Indexing wait for inactive still waiting: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def wait_for_indexing_in_progress(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n        num_docs: int = 16,\n    ) -> None:\n        \"\"\"wait for the number of docs to be indexed on the connector.\n        This is used to test pausing a connector in the middle of indexing and\n        terminating that indexing.\"\"\"\n        start = time.monotonic()\n        while True:\n            fetched_cc_pairs = CCPairManager.get_indexing_statuses(\n                user_performing_action\n            )\n            for fetched_cc_pair in fetched_cc_pairs:\n                if fetched_cc_pair.cc_pair_id != cc_pair.id:\n                    continue\n\n                if not fetched_cc_pair.in_progress:\n                    continue\n\n                if fetched_cc_pair.docs_indexed < num_docs:\n                    print(\n                        f\"Indexing in progress: cc_pair={cc_pair.id} \"\n                        f\"docs_indexed={fetched_cc_pair.docs_indexed} num_docs={num_docs}\"\n                    )\n                    continue\n\n                if fetched_cc_pair.docs_indexed >= num_docs:\n                    print(\n                        \"Indexed at least the requested number of docs: \"\n                        f\"cc_pair={cc_pair.id} \"\n                        f\"docs_indexed={fetched_cc_pair.docs_indexed} \"\n                        f\"num_docs={num_docs}\"\n                    )\n                    return\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"Indexing in progress wait timed out: cc_pair={cc_pair.id} timeout={timeout}s\"\n                )\n\n            print(\n                f\"Indexing in progress waiting: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def wait_for_indexing_completion(\n        cc_pair: DATestCCPair,\n        after: datetime,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n    ) -> None:\n        \"\"\"after: Wait for an indexing success time after this time\"\"\"\n        start = time.monotonic()\n        while True:\n            fetched_cc_pairs = CCPairManager.get_indexing_statuses(\n                user_performing_action\n            )\n            for fetched_cc_pair in fetched_cc_pairs:\n                if fetched_cc_pair.cc_pair_id != cc_pair.id:\n                    continue\n\n                if fetched_cc_pair.in_progress:\n                    continue\n\n                if (\n                    fetched_cc_pair.last_success\n                    and fetched_cc_pair.last_success > after\n                ):\n                    print(f\"Indexing complete: cc_pair={cc_pair.id}\")\n                    return\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"Indexing wait timed out: cc_pair={cc_pair.id} timeout={timeout}s\"\n                )\n\n            print(\n                f\"Indexing wait for completion: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def prune(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> None:\n        result = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune\",\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n\n    @staticmethod\n    def last_pruned(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> datetime | None:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/last_pruned\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        response_str = response.json()\n\n        # If the response itself is a datetime string, parse it\n        if not isinstance(response_str, str):\n            return None\n\n        try:\n            return datetime.fromisoformat(response_str)\n        except ValueError:\n            return None\n\n    @staticmethod\n    def wait_for_prune(\n        cc_pair: DATestCCPair,\n        after: datetime,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n    ) -> None:\n        \"\"\"after: The task register time must be after this time.\"\"\"\n        start = time.monotonic()\n        while True:\n            last_pruned = CCPairManager.last_pruned(cc_pair, user_performing_action)\n            if last_pruned and last_pruned > after:\n                print(f\"Pruning complete: cc_pair={cc_pair.id}\")\n                break\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"CC pair pruning was not completed within {timeout} seconds\"\n                )\n\n            print(\n                f\"Waiting for CC pruning to complete. elapsed={elapsed:.2f} timeout={timeout}\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def sync(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> None:\n        \"\"\"This function triggers a permission sync.\n        Naming / intent of this function probably could use improvement, but currently it's letting\n        409 Conflict pass through since if it's running that's what we were trying to do anyway.\n        \"\"\"\n        result = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-permissions\",\n            headers=user_performing_action.headers,\n        )\n        if result.status_code != 409:\n            result.raise_for_status()\n\n        group_sync_result = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups\",\n            headers=user_performing_action.headers,\n        )\n        if group_sync_result.status_code != 409:\n            group_sync_result.raise_for_status()\n        time.sleep(2)\n\n    @staticmethod\n    def get_doc_sync_task(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> datetime | None:\n        doc_sync_response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-permissions\",\n            headers=user_performing_action.headers,\n        )\n        doc_sync_response.raise_for_status()\n        doc_sync_response_str = doc_sync_response.json()\n\n        # If the response itself is a datetime string, parse it\n        if not isinstance(doc_sync_response_str, str):\n            return None\n\n        try:\n            return datetime.fromisoformat(doc_sync_response_str)\n        except ValueError:\n            return None\n\n    @staticmethod\n    def get_group_sync_task(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> datetime | None:\n        group_sync_response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync-groups\",\n            headers=user_performing_action.headers,\n        )\n        group_sync_response.raise_for_status()\n        group_sync_response_str = group_sync_response.json()\n\n        # If the response itself is a datetime string, parse it\n        if not isinstance(group_sync_response_str, str):\n            return None\n\n        try:\n            return datetime.fromisoformat(group_sync_response_str)\n        except ValueError:\n            return None\n\n    @staticmethod\n    def get_doc_sync_statuses(\n        cc_pair: DATestCCPair,\n        user_performing_action: DATestUser,\n    ) -> list[DocumentSyncStatus]:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/get-docs-sync-status\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        doc_sync_statuses: list[DocumentSyncStatus] = []\n        for doc_sync_status in response.json():\n            last_synced = doc_sync_status.get(\"last_synced\")\n            if last_synced:\n                last_synced = datetime.fromisoformat(last_synced)\n\n            last_modified = doc_sync_status.get(\"last_modified\")\n            if last_modified:\n                last_modified = datetime.fromisoformat(last_modified)\n\n            doc_sync_statuses.append(\n                DocumentSyncStatus(\n                    doc_id=doc_sync_status[\"doc_id\"],\n                    last_synced=last_synced,\n                    last_modified=last_modified,\n                )\n            )\n\n        return doc_sync_statuses\n\n    @staticmethod\n    def wait_for_sync(\n        cc_pair: DATestCCPair,\n        after: datetime,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n        number_of_updated_docs: int = 0,\n        # Sometimes waiting for a group sync is not necessary\n        should_wait_for_group_sync: bool = True,\n        # Sometimes waiting for a vespa sync is not necessary\n        should_wait_for_vespa_sync: bool = True,\n    ) -> None:\n        \"\"\"after: The task register time must be after this time.\"\"\"\n        doc_synced = False\n        group_synced = False\n        start = time.monotonic()\n        while True:\n            # We are treating both syncs as part of one larger permission sync job\n            doc_last_synced = CCPairManager.get_doc_sync_task(\n                cc_pair, user_performing_action\n            )\n            group_last_synced = CCPairManager.get_group_sync_task(\n                cc_pair, user_performing_action\n            )\n\n            if not doc_synced and doc_last_synced and doc_last_synced > after:\n                print(f\"doc_last_synced: {doc_last_synced}\")\n                print(f\"sync command start time: {after}\")\n                print(f\"permission sync complete: cc_pair={cc_pair.id}\")\n                doc_synced = True\n\n            if not group_synced and group_last_synced and group_last_synced > after:\n                print(f\"group_last_synced: {group_last_synced}\")\n                print(f\"sync command start time: {after}\")\n                print(f\"group sync complete: cc_pair={cc_pair.id}\")\n                group_synced = True\n\n            if doc_synced and (group_synced or not should_wait_for_group_sync):\n                break\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"Permission sync was not completed within {timeout} seconds\"\n                )\n\n            print(\n                f\"Waiting for CC sync to complete. elapsed={elapsed:.2f} timeout={timeout}\"\n            )\n            time.sleep(5)\n\n        # TODO: remove this sleep,\n        # this shouldnt be necessary but something is off with the timing for the sync jobs\n        time.sleep(5)\n\n        if not should_wait_for_vespa_sync:\n            return\n\n        print(\"waiting for vespa sync\")\n        # wait for the vespa sync to complete once the permission sync is complete\n        start = time.monotonic()\n        while True:\n            doc_sync_statuses = CCPairManager.get_doc_sync_statuses(\n                cc_pair=cc_pair,\n                user_performing_action=user_performing_action,\n            )\n            synced_docs = 0\n            for doc_sync_status in doc_sync_statuses:\n                if (\n                    doc_sync_status.last_synced is not None\n                    and doc_sync_status.last_modified is not None\n                    and doc_sync_status.last_synced >= doc_sync_status.last_modified\n                    and doc_sync_status.last_synced >= after\n                    and doc_sync_status.last_modified >= after\n                ):\n                    synced_docs += 1\n\n            if synced_docs >= number_of_updated_docs:\n                print(f\"all docs synced: cc_pair={cc_pair.id}\")\n                break\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"Vespa sync was not completed within {timeout} seconds\"\n                )\n\n            print(\n                f\"Waiting for vespa sync to complete. elapsed={elapsed:.2f} timeout={timeout}\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def wait_for_deletion_completion(\n        user_performing_action: DATestUser,\n        cc_pair_id: int | None = None,\n    ) -> None:\n        \"\"\"if cc_pair_id is not specified, just waits until no connectors are in the deleting state.\n        if cc_pair_id is specified, checks to ensure the specific cc_pair_id is gone.\n        We had a bug where the connector was paused in the middle of deleting, so specifying the\n        cc_pair_id is good to do.\"\"\"\n        start = time.monotonic()\n        while True:\n            cc_pairs = CCPairManager.get_indexing_statuses(user_performing_action)\n            if cc_pair_id:\n                found = False\n                for cc_pair in cc_pairs:\n                    if cc_pair.cc_pair_id == cc_pair_id:\n                        found = True\n                        break\n\n                if not found:\n                    return\n            else:\n                if all(\n                    cc_pair.cc_pair_status != ConnectorCredentialPairStatus.DELETING\n                    for cc_pair in cc_pairs\n                ):\n                    return\n\n            if time.monotonic() - start > MAX_DELAY:\n                raise TimeoutError(\n                    f\"CC pairs deletion was not completed within the {MAX_DELAY} seconds\"\n                )\n            else:\n                print(\"Some CC pairs are still being deleted, waiting...\")\n            time.sleep(2)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/chat.py",
    "content": "import json\nfrom typing import Any\nfrom typing import cast\nfrom typing import Literal\nfrom typing import TypedDict\nfrom uuid import UUID\n\nimport requests\nfrom requests.models import Response\n\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE\nfrom onyx.server.query_and_chat.models import ChatSessionCreationRequest\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.streaming_models import StreamingType\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestChatMessage\nfrom tests.integration.common_utils.test_models import DATestChatSession\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ErrorResponse\nfrom tests.integration.common_utils.test_models import StreamedResponse\nfrom tests.integration.common_utils.test_models import ToolCallDebug\nfrom tests.integration.common_utils.test_models import ToolName\nfrom tests.integration.common_utils.test_models import ToolResult\n\n\nclass StreamPacketObj(TypedDict, total=False):\n    \"\"\"Base structure for streaming packet objects.\"\"\"\n\n    type: Literal[\n        \"message_start\",\n        \"message_delta\",\n        \"search_tool_start\",\n        \"search_tool_queries_delta\",\n        \"search_tool_documents_delta\",\n        \"image_generation_start\",\n        \"image_generation_heartbeat\",\n        \"image_generation_final\",\n        \"tool_call_debug\",\n    ]\n    content: str\n    final_documents: list[dict[str, Any]]\n    is_internet_search: bool\n    images: list[dict[str, Any]]\n    queries: list[str]\n    documents: list[dict[str, Any]]\n    tool_call_id: str\n    tool_name: str\n    tool_args: dict[str, Any]\n\n\nclass PlacementData(TypedDict, total=False):\n    \"\"\"Structure for packet placement information.\"\"\"\n\n    turn_index: int\n    tab_index: int\n    sub_turn_index: int | None\n\n\nclass StreamPacketData(TypedDict, total=False):\n    \"\"\"Structure for streaming response packets.\"\"\"\n\n    reserved_assistant_message_id: int\n    error: str\n    stack_trace: str\n    obj: StreamPacketObj\n    placement: PlacementData\n\n\nclass ChatSessionManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        persona_id: int = 0,\n        description: str = \"Test chat session\",\n        project_id: int | None = None,\n    ) -> DATestChatSession:\n        chat_session_creation_req = ChatSessionCreationRequest(\n            persona_id=persona_id,\n            description=description,\n            project_id=project_id,\n        )\n        response = requests.post(\n            f\"{API_SERVER_URL}/chat/create-chat-session\",\n            json=chat_session_creation_req.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        chat_session_id = response.json()[\"chat_session_id\"]\n        return DATestChatSession(\n            id=chat_session_id, persona_id=persona_id, description=description\n        )\n\n    @staticmethod\n    def send_message(\n        chat_session_id: UUID,\n        message: str,\n        user_performing_action: DATestUser,\n        parent_message_id: int | None = None,\n        file_descriptors: list[FileDescriptor] | None = None,\n        allowed_tool_ids: list[int] | None = None,\n        forced_tool_ids: list[int] | None = None,\n        chat_session: DATestChatSession | None = None,\n        mock_llm_response: str | None = None,\n        deep_research: bool = False,\n        llm_override: LLMOverride | None = None,\n    ) -> StreamedResponse:\n        chat_message_req = SendMessageRequest(\n            message=message,\n            chat_session_id=chat_session_id,\n            parent_message_id=(\n                parent_message_id\n                if parent_message_id is not None\n                else AUTO_PLACE_AFTER_LATEST_MESSAGE\n            ),\n            file_descriptors=file_descriptors or [],\n            allowed_tool_ids=allowed_tool_ids,\n            forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,\n            mock_llm_response=mock_llm_response,\n            deep_research=deep_research,\n            llm_override=llm_override,\n        )\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/chat/send-chat-message\",\n            json=chat_message_req.model_dump(mode=\"json\"),\n            headers=user_performing_action.headers,\n            stream=True,\n            cookies=user_performing_action.cookies,\n        )\n\n        streamed_response = ChatSessionManager.analyze_response(response)\n\n        if not chat_session:\n            return streamed_response\n\n        # TODO: ideally we would get the research answer purpose from the chat history\n        # but atm the field needed would not be used outside of testing, so we're not adding it.\n        # chat_history = ChatSessionManager.get_chat_history(\n        #     chat_session=chat_session,\n        #     user_performing_action=user_performing_action,\n        # )\n\n        # for message_obj in chat_history:\n        #     if message_obj.message_type == MessageType.ASSISTANT:\n        #         streamed_response.research_answer_purpose = (\n        #             message_obj.research_answer_purpose\n        #         )\n        #         streamed_response.assistant_message_id = message_obj.id\n        #         break\n\n        return streamed_response\n\n    @staticmethod\n    def send_message_with_disconnect(\n        chat_session_id: UUID,\n        message: str,\n        user_performing_action: DATestUser,\n        disconnect_after_packets: int = 0,\n        parent_message_id: int | None = None,\n        file_descriptors: list[FileDescriptor] | None = None,\n        allowed_tool_ids: list[int] | None = None,\n        forced_tool_ids: list[int] | None = None,\n        mock_llm_response: str | None = None,\n        deep_research: bool = False,\n        llm_override: LLMOverride | None = None,\n    ) -> None:\n        \"\"\"\n        Send a message and simulate client disconnect before stream completes.\n\n        This is useful for testing how the server handles client disconnections\n        during streaming responses.\n\n        Args:\n            chat_session_id: The chat session ID\n            message: The message to send\n            disconnect_after_packets: Disconnect after receiving this many packets.\n            ... (other standard message parameters)\n\n        Returns:\n            None. Caller can verify server-side cleanup via get_chat_history etc.\n        \"\"\"\n        chat_message_req = SendMessageRequest(\n            message=message,\n            chat_session_id=chat_session_id,\n            parent_message_id=(\n                parent_message_id\n                if parent_message_id is not None\n                else AUTO_PLACE_AFTER_LATEST_MESSAGE\n            ),\n            file_descriptors=file_descriptors or [],\n            allowed_tool_ids=allowed_tool_ids,\n            forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,\n            mock_llm_response=mock_llm_response,\n            deep_research=deep_research,\n            llm_override=llm_override,\n        )\n\n        packets_received = 0\n\n        with requests.post(\n            f\"{API_SERVER_URL}/chat/send-chat-message\",\n            json=chat_message_req.model_dump(mode=\"json\"),\n            headers=user_performing_action.headers,\n            stream=True,\n            cookies=user_performing_action.cookies,\n        ) as response:\n            for line in response.iter_lines():\n                if not line:\n                    continue\n\n                packets_received += 1\n                if packets_received > disconnect_after_packets:\n                    break\n\n        return None\n\n    @staticmethod\n    def analyze_response(response: Response) -> StreamedResponse:\n        response_data = cast(\n            list[StreamPacketData],\n            [\n                json.loads(line.decode(\"utf-8\"))\n                for line in response.iter_lines()\n                if line\n            ],\n        )\n        ind_to_tool_use: dict[int, ToolResult] = {}\n        tool_call_debug: list[ToolCallDebug] = []\n        top_documents: list[SearchDoc] = []\n        heartbeat_packets: list[StreamPacketData] = []\n        full_message = \"\"\n        assistant_message_id: int | None = None\n        error = None\n        ind: int\n        for data in response_data:\n            if reserved_id := data.get(\"reserved_assistant_message_id\"):\n                assistant_message_id = reserved_id\n            elif data.get(\"error\"):\n                error = ErrorResponse(\n                    error=str(data[\"error\"]),\n                    stack_trace=str(data.get(\"stack_trace\") or \"\"),\n                )\n            elif (error_obj := cast(dict[str, Any], data.get(\"obj\") or {})) and (\n                error_obj.get(\"error\")\n                or error_obj.get(\"type\") == StreamingType.ERROR.value\n            ):\n                error = ErrorResponse(\n                    error=str(error_obj.get(\"error\") or \"Streaming error\"),\n                    stack_trace=str(\n                        error_obj.get(\"stack_trace\") or data.get(\"stack_trace\") or \"\"\n                    ),\n                )\n            elif (\n                (data_obj := data.get(\"obj\"))\n                and (packet_type := data_obj.get(\"type\"))\n                and (\n                    ind := cast(\n                        int,\n                        (\n                            data.get(\"ind\")\n                            if data.get(\"ind\") is not None\n                            else data.get(\"placement\", {}).get(\"turn_index\")\n                        ),\n                    )\n                )\n                is not None\n            ):\n                packet_type_str = str(packet_type)\n                if packet_type_str == StreamingType.MESSAGE_START.value:\n                    final_docs = data_obj.get(\"final_documents\")\n                    if isinstance(final_docs, list):\n                        top_documents = [SearchDoc(**doc) for doc in final_docs]\n                    full_message += data_obj.get(\"content\", \"\")\n                elif packet_type_str == StreamingType.MESSAGE_DELTA.value:\n                    full_message += data_obj[\"content\"]\n                elif packet_type_str == StreamingType.SEARCH_TOOL_START.value:\n                    tool_name = (\n                        ToolName.INTERNET_SEARCH\n                        if data_obj.get(\"is_internet_search\", False)\n                        else ToolName.INTERNAL_SEARCH\n                    )\n                    ind_to_tool_use[ind] = ToolResult(\n                        tool_name=tool_name,\n                    )\n                elif packet_type_str == StreamingType.IMAGE_GENERATION_START.value:\n                    ind_to_tool_use[ind] = ToolResult(\n                        tool_name=ToolName.IMAGE_GENERATION,\n                    )\n                elif packet_type_str == StreamingType.IMAGE_GENERATION_HEARTBEAT.value:\n                    # Track heartbeat packets for debugging/testing\n                    heartbeat_packets.append(data)\n                elif packet_type_str == StreamingType.IMAGE_GENERATION_FINAL.value:\n                    from tests.integration.common_utils.test_models import (\n                        GeneratedImage,\n                    )\n\n                    images = data_obj.get(\"images\", [])\n                    ind_to_tool_use[ind].images.extend(\n                        [GeneratedImage(**img) for img in images]\n                    )\n                elif packet_type_str == StreamingType.SEARCH_TOOL_QUERIES_DELTA.value:\n                    ind_to_tool_use[ind].queries.extend(data_obj.get(\"queries\", []))\n                elif packet_type_str == StreamingType.SEARCH_TOOL_DOCUMENTS_DELTA.value:\n                    docs = []\n                    for doc in data_obj.get(\"documents\", []):\n                        if \"db_doc_id\" in doc:\n                            # Already a SavedSearchDoc format\n                            docs.append(SavedSearchDoc(**doc))\n                        else:\n                            # SearchDoc format - Convert to SavedSearchDoc\n                            search_doc = SearchDoc(**doc)\n                            docs.append(\n                                SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)\n                            )\n                    ind_to_tool_use[ind].documents.extend(docs)\n                elif packet_type_str == StreamingType.TOOL_CALL_DEBUG.value:\n                    tool_call_debug.append(\n                        ToolCallDebug(\n                            tool_call_id=str(data_obj.get(\"tool_call_id\", \"\")),\n                            tool_name=str(data_obj.get(\"tool_name\", \"\")),\n                            tool_args=cast(\n                                dict[str, Any], data_obj.get(\"tool_args\") or {}\n                            ),\n                        )\n                    )\n        # If there's an error, assistant_message_id might not be present\n        if not assistant_message_id and not error:\n            raise ValueError(\"Assistant message id not found\")\n        return StreamedResponse(\n            full_message=full_message,\n            assistant_message_id=assistant_message_id or -1,  # Use -1 for error cases\n            top_documents=top_documents,\n            used_tools=list(ind_to_tool_use.values()),\n            tool_call_debug=tool_call_debug,\n            heartbeat_packets=[dict(packet) for packet in heartbeat_packets],\n            error=error,\n        )\n\n    @staticmethod\n    def get_chat_history(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> list[DATestChatMessage]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        return [\n            DATestChatMessage(\n                id=msg[\"message_id\"],\n                chat_session_id=chat_session.id,\n                parent_message_id=msg.get(\"parent_message\"),\n                message=msg[\"message\"],\n                message_type=msg.get(\"message_type\"),\n                files=msg.get(\"files\"),\n            )\n            for msg in response.json()[\"messages\"]\n        ]\n\n    @staticmethod\n    def create_chat_message_feedback(\n        message_id: int,\n        is_positive: bool,\n        user_performing_action: DATestUser,\n        feedback_text: str | None = None,\n        predefined_feedback: str | None = None,\n    ) -> None:\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/chat/create-chat-message-feedback\",\n            json={\n                \"chat_message_id\": message_id,\n                \"is_positive\": is_positive,\n                \"feedback_text\": feedback_text,\n                \"predefined_feedback\": predefined_feedback,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def delete(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Delete a chat session and all its related records (messages, agent data, etc.)\n        Uses the default deletion method configured on the server.\n\n        Returns True if deletion was successful, False otherwise.\n        \"\"\"\n        response = requests.delete(\n            f\"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}\",\n            headers=user_performing_action.headers,\n        )\n        return response.ok\n\n    @staticmethod\n    def soft_delete(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Soft delete a chat session (marks as deleted but keeps in database).\n\n        Returns True if deletion was successful, False otherwise.\n        \"\"\"\n        # Since there's no direct API for soft delete, we'll use a query parameter approach\n        # or make a direct call with hard_delete=False parameter via a new endpoint\n        response = requests.delete(\n            f\"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}?hard_delete=false\",\n            headers=user_performing_action.headers,\n        )\n        return response.ok\n\n    @staticmethod\n    def hard_delete(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Hard delete a chat session (completely removes from database).\n\n        Returns True if deletion was successful, False otherwise.\n        \"\"\"\n        response = requests.delete(\n            f\"{API_SERVER_URL}/chat/delete-chat-session/{chat_session.id}?hard_delete=true\",\n            headers=user_performing_action.headers,\n        )\n        return response.ok\n\n    @staticmethod\n    def verify_deleted(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Verify that a chat session has been deleted by attempting to retrieve it.\n\n        Returns True if the chat session is confirmed deleted, False if it still exists.\n        \"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}\",\n            headers=user_performing_action.headers,\n        )\n        # Chat session should return 404 if it doesn't exist or is deleted\n        return response.status_code == 404\n\n    @staticmethod\n    def verify_soft_deleted(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Verify that a chat session has been soft deleted (marked as deleted but still in DB).\n\n        Returns True if the chat session is soft deleted, False otherwise.\n        \"\"\"\n        # Try to get the chat session with include_deleted=true\n        response = requests.get(\n            f\"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}?include_deleted=true\",\n            headers=user_performing_action.headers,\n        )\n\n        if response.status_code == 200:\n            # Chat exists, check if it's marked as deleted\n            chat_data = response.json()\n            return chat_data.get(\"deleted\", False) is True\n        return False\n\n    @staticmethod\n    def verify_hard_deleted(\n        chat_session: DATestChatSession,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"\n        Verify that a chat session has been hard deleted (completely removed from DB).\n\n        Returns True if the chat session is hard deleted, False otherwise.\n        \"\"\"\n        # Try to get the chat session with include_deleted=true\n        response = requests.get(\n            f\"{API_SERVER_URL}/chat/get-chat-session/{chat_session.id}?include_deleted=true\",\n            headers=user_performing_action.headers,\n        )\n\n        # For hard delete, even with include_deleted=true, the record should not exist\n        return response.status_code != 200\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/connector.py",
    "content": "from typing import Any\nfrom uuid import uuid4\n\nimport requests\n\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import ConnectorUpdateRequest\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass ConnectorManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        source: DocumentSource = DocumentSource.FILE,\n        input_type: InputType = InputType.LOAD_STATE,\n        connector_specific_config: dict[str, Any] | None = None,\n        access_type: AccessType = AccessType.PUBLIC,\n        groups: list[int] | None = None,\n        refresh_freq: int | None = None,\n    ) -> DATestConnector:\n        name = f\"{name}-connector\" if name else f\"test-connector-{uuid4()}\"\n\n        connector_update_request = ConnectorUpdateRequest(\n            name=name,\n            source=source,\n            input_type=input_type,\n            connector_specific_config=(\n                connector_specific_config\n                or (\n                    {\n                        \"file_locations\": [],\n                        \"file_names\": [],\n                        \"zip_metadata_file_id\": None,\n                    }\n                    if source == DocumentSource.FILE\n                    else {}\n                )\n            ),\n            access_type=access_type,\n            groups=groups or [],\n            refresh_freq=refresh_freq,\n        )\n\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/manage/admin/connector\",\n            json=connector_update_request.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        response_data = response.json()\n        return DATestConnector(\n            id=response_data.get(\"id\"),\n            name=name,\n            source=source,\n            input_type=input_type,\n            connector_specific_config=connector_specific_config or {},\n            groups=groups,\n            access_type=access_type,\n        )\n\n    @staticmethod\n    def edit(\n        connector: DATestConnector,\n        user_performing_action: DATestUser,\n    ) -> None:\n        response = requests.patch(\n            url=f\"{API_SERVER_URL}/manage/admin/connector/{connector.id}\",\n            json=connector.model_dump(exclude={\"id\"}),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def delete(\n        connector: DATestConnector,\n        user_performing_action: DATestUser,\n    ) -> None:\n        response = requests.delete(\n            url=f\"{API_SERVER_URL}/manage/admin/connector/{connector.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[DATestConnector]:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/connector\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [\n            DATestConnector(\n                id=conn.get(\"id\"),\n                name=conn.get(\"name\", \"\"),\n                source=conn.get(\"source\", DocumentSource.FILE),\n                input_type=conn.get(\"input_type\", InputType.LOAD_STATE),\n                connector_specific_config=conn.get(\"connector_specific_config\", {}),\n            )\n            for conn in response.json()\n        ]\n\n    @staticmethod\n    def get(\n        connector_id: int,\n        user_performing_action: DATestUser,\n    ) -> DATestConnector:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/connector/{connector_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        conn = response.json()\n        return DATestConnector(\n            id=conn.get(\"id\"),\n            name=conn.get(\"name\", \"\"),\n            source=conn.get(\"source\", DocumentSource.FILE),\n            input_type=conn.get(\"input_type\", InputType.LOAD_STATE),\n            connector_specific_config=conn.get(\"connector_specific_config\", {}),\n        )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/credential.py",
    "content": "from typing import Any\nfrom uuid import uuid4\n\nimport requests\n\nfrom onyx.server.documents.models import CredentialSnapshot\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass CredentialManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        credential_json: dict[str, Any] | None = None,\n        admin_public: bool = True,\n        name: str | None = None,\n        source: DocumentSource = DocumentSource.FILE,\n        curator_public: bool = True,\n        groups: list[int] | None = None,\n    ) -> DATestCredential:\n        name = f\"{name}-credential\" if name else f\"test-credential-{uuid4()}\"\n\n        credential_request = {\n            \"name\": name,\n            \"credential_json\": credential_json or {},\n            \"admin_public\": admin_public,\n            \"source\": source,\n            \"curator_public\": curator_public,\n            \"groups\": groups or [],\n        }\n\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/manage/credential\",\n            json=credential_request,\n            headers=user_performing_action.headers,\n        )\n\n        response.raise_for_status()\n        return DATestCredential(\n            id=response.json()[\"id\"],\n            name=name,\n            credential_json=credential_json or {},\n            admin_public=admin_public,\n            source=source,\n            curator_public=curator_public,\n            groups=groups or [],\n        )\n\n    @staticmethod\n    def edit(\n        credential: DATestCredential,\n        user_performing_action: DATestUser,\n    ) -> None:\n        request = credential.model_dump(include={\"name\", \"credential_json\"})\n        response = requests.put(\n            url=f\"{API_SERVER_URL}/manage/admin/credential/{credential.id}\",\n            json=request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def delete(\n        credential: DATestCredential,\n        user_performing_action: DATestUser,\n    ) -> None:\n        response = requests.delete(\n            url=f\"{API_SERVER_URL}/manage/credential/{credential.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def get(\n        credential_id: int,\n        user_performing_action: DATestUser,\n    ) -> CredentialSnapshot:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/credential/{credential_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return CredentialSnapshot(**response.json())\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[CredentialSnapshot]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/credential\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [CredentialSnapshot(**cred) for cred in response.json()]\n\n    @staticmethod\n    def verify(\n        credential: DATestCredential,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        all_credentials = CredentialManager.get_all(user_performing_action)\n        for fetched_credential in all_credentials:\n            if credential.id == fetched_credential.id:\n                if verify_deleted:\n                    raise ValueError(\n                        f\"Credential {credential.id} found but should be deleted\"\n                    )\n                if (\n                    credential.name == fetched_credential.name\n                    and credential.admin_public == fetched_credential.admin_public\n                    and credential.source == fetched_credential.source\n                    and credential.curator_public == fetched_credential.curator_public\n                ):\n                    return\n        if not verify_deleted:\n            raise ValueError(f\"Credential {credential.id} not found\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/discord_bot.py",
    "content": "\"\"\"Manager for Discord bot API integration tests.\"\"\"\n\nimport requests\n\nfrom onyx.db.discord_bot import create_channel_config\nfrom onyx.db.discord_bot import create_guild_config\nfrom onyx.db.discord_bot import register_guild\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.utils import DiscordChannelView\nfrom onyx.server.manage.discord_bot.utils import generate_discord_registration_key\nfrom shared_configs.contextvars import get_current_tenant_id\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestDiscordChannelConfig\nfrom tests.integration.common_utils.test_models import DATestDiscordGuildConfig\nfrom tests.integration.common_utils.test_models import DATestUser\n\nDISCORD_BOT_API_URL = f\"{API_SERVER_URL}/manage/admin/discord-bot\"\n\n\nclass DiscordBotManager:\n    \"\"\"Manager for Discord bot API operations.\"\"\"\n\n    # === Bot Config ===\n\n    @staticmethod\n    def get_bot_config(\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Get Discord bot config.\"\"\"\n        response = requests.get(\n            url=f\"{DISCORD_BOT_API_URL}/config\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def create_bot_config(\n        bot_token: str,\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Create Discord bot config.\"\"\"\n        response = requests.post(\n            url=f\"{DISCORD_BOT_API_URL}/config\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            json={\"bot_token\": bot_token},\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def delete_bot_config(\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Delete Discord bot config.\"\"\"\n        response = requests.delete(\n            url=f\"{DISCORD_BOT_API_URL}/config\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    # === Guild Config ===\n\n    @staticmethod\n    def list_guilds(\n        user_performing_action: DATestUser,\n    ) -> list[dict]:\n        \"\"\"List all guild configs.\"\"\"\n        response = requests.get(\n            url=f\"{DISCORD_BOT_API_URL}/guilds\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def create_guild(\n        user_performing_action: DATestUser,\n    ) -> DATestDiscordGuildConfig:\n        \"\"\"Create a new guild config with registration key.\"\"\"\n        response = requests.post(\n            url=f\"{DISCORD_BOT_API_URL}/guilds\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return DATestDiscordGuildConfig(\n            id=data[\"id\"],\n            registration_key=data[\"registration_key\"],\n        )\n\n    @staticmethod\n    def get_guild(\n        config_id: int,\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Get a specific guild config.\"\"\"\n        response = requests.get(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def update_guild(\n        config_id: int,\n        user_performing_action: DATestUser,\n        enabled: bool | None = None,\n        default_persona_id: int | None = None,\n    ) -> dict:\n        \"\"\"Update a guild config.\"\"\"\n        # Fetch current guild config to get existing values\n        current_guild = DiscordBotManager.get_guild(config_id, user_performing_action)\n\n        # Build request body with required fields\n        body: dict = {\n            \"enabled\": enabled if enabled is not None else current_guild[\"enabled\"],\n            \"default_persona_id\": (\n                default_persona_id\n                if default_persona_id is not None\n                else current_guild.get(\"default_persona_id\")\n            ),\n        }\n\n        response = requests.patch(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            json=body,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def delete_guild(\n        config_id: int,\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Delete a guild config.\"\"\"\n        response = requests.delete(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    # === Channel Config ===\n\n    @staticmethod\n    def list_channels(\n        guild_config_id: int,\n        user_performing_action: DATestUser,\n    ) -> list[DATestDiscordChannelConfig]:\n        \"\"\"List all channel configs for a guild.\"\"\"\n        response = requests.get(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{guild_config_id}/channels\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        response.raise_for_status()\n        return [DATestDiscordChannelConfig(**c) for c in response.json()]\n\n    @staticmethod\n    def update_channel(\n        guild_config_id: int,\n        channel_config_id: int,\n        user_performing_action: DATestUser,\n        enabled: bool = False,\n        thread_only_mode: bool = False,\n        require_bot_invocation: bool = True,\n        persona_override_id: int | None = None,\n    ) -> DATestDiscordChannelConfig:\n        \"\"\"Update a channel config.\n\n        All fields are required by the API. Default values match the channel\n        config defaults from create_channel_config.\n        \"\"\"\n        body: dict = {\n            \"enabled\": enabled,\n            \"thread_only_mode\": thread_only_mode,\n            \"require_bot_invocation\": require_bot_invocation,\n            \"persona_override_id\": persona_override_id,\n        }\n\n        response = requests.patch(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{guild_config_id}/channels/{channel_config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            json=body,\n        )\n        response.raise_for_status()\n        return DATestDiscordChannelConfig(**response.json())\n\n    # === Utility methods for testing ===\n\n    @staticmethod\n    def create_registered_guild_in_db(\n        guild_id: int,\n        guild_name: str,\n    ) -> DATestDiscordGuildConfig:\n        \"\"\"Create a registered guild config directly in the database.\n\n        This creates a guild that has already completed registration,\n        with guild_id and guild_name set. Use this for testing channel\n        endpoints which require a registered guild.\n        \"\"\"\n        with get_session_with_current_tenant() as db_session:\n            tenant_id = get_current_tenant_id()\n            registration_key = generate_discord_registration_key(tenant_id)\n            config = create_guild_config(db_session, registration_key)\n            config = register_guild(db_session, config, guild_id, guild_name)\n            db_session.commit()\n\n            return DATestDiscordGuildConfig(\n                id=config.id,\n                registration_key=registration_key,\n            )\n\n    @staticmethod\n    def get_guild_or_none(\n        config_id: int,\n        user_performing_action: DATestUser,\n    ) -> dict | None:\n        \"\"\"Get a guild config, returning None if not found.\"\"\"\n        response = requests.get(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        if response.status_code == 404:\n            return None\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def delete_guild_if_exists(\n        config_id: int,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Delete a guild config if it exists. Returns True if deleted.\"\"\"\n        response = requests.delete(\n            url=f\"{DISCORD_BOT_API_URL}/guilds/{config_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        if response.status_code == 404:\n            return False\n        response.raise_for_status()\n        return True\n\n    @staticmethod\n    def delete_bot_config_if_exists(\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Delete bot config if it exists. Returns True if deleted.\"\"\"\n        response = requests.delete(\n            url=f\"{DISCORD_BOT_API_URL}/config\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n        )\n        if response.status_code == 404:\n            return False\n        response.raise_for_status()\n        return True\n\n    @staticmethod\n    def create_test_channel_in_db(\n        guild_config_id: int,\n        channel_id: int,\n        channel_name: str,\n        channel_type: str = \"text\",\n        is_private: bool = False,\n    ) -> DATestDiscordChannelConfig:\n        \"\"\"Create a test channel config directly in the database.\n\n        This is needed because channels are normally synced from Discord,\n        not created via API. For testing the channel API endpoints,\n        we need to populate test data directly.\n        \"\"\"\n        with get_session_with_current_tenant() as db_session:\n            channel_view = DiscordChannelView(\n                channel_id=channel_id,\n                channel_name=channel_name,\n                channel_type=channel_type,\n                is_private=is_private,\n            )\n            config = create_channel_config(db_session, guild_config_id, channel_view)\n            db_session.commit()\n\n            return DATestDiscordChannelConfig(\n                id=config.id,\n                guild_config_id=config.guild_config_id,\n                channel_id=config.channel_id,\n                channel_name=config.channel_name,\n                channel_type=config.channel_type,\n                is_private=config.is_private,\n                enabled=config.enabled,\n                thread_only_mode=config.thread_only_mode,\n                require_bot_invocation=config.require_bot_invocation,\n                persona_override_id=config.persona_override_id,\n            )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/document.py",
    "content": "from uuid import uuid4\n\nimport requests\nfrom sqlalchemy import and_\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.db.models import DocumentByConnectorCredentialPair\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import NUM_DOCS\nfrom tests.integration.common_utils.managers.api_key import DATestAPIKey\nfrom tests.integration.common_utils.managers.cc_pair import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import SimpleTestDocument\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef _verify_document_permissions(\n    retrieved_doc: dict,\n    cc_pair: DATestCCPair,\n    doc_creating_user: DATestUser,\n    doc_set_names: list[str] | None = None,\n    group_names: list[str] | None = None,\n) -> None:\n    acl_keys = set(retrieved_doc.get(\"access_control_list\", {}).keys())\n    print(f\"ACL keys: {acl_keys}\")\n\n    if cc_pair.access_type == AccessType.PUBLIC:\n        if \"PUBLIC\" not in acl_keys:\n            raise ValueError(\n                f\"Document {retrieved_doc['document_id']} is public but does not have the PUBLIC ACL key\"\n            )\n\n    if f\"user_email:{doc_creating_user.email}\" not in acl_keys:\n        raise ValueError(\n            f\"Document {retrieved_doc['document_id']} was created by user\"\n            f\" {doc_creating_user.email} but does not have the user_email:{doc_creating_user.email} ACL key\"\n        )\n\n    if group_names is not None:\n        expected_group_keys = {f\"group:{group_name}\" for group_name in group_names}\n        found_group_keys = {key for key in acl_keys if key.startswith(\"group:\")}\n        if found_group_keys != expected_group_keys:\n            raise ValueError(\n                f\"Document {retrieved_doc['document_id']} has incorrect group ACL keys. \"\n                f\"Expected: {expected_group_keys}  Found: {found_group_keys}\\n\"\n                f\"All ACL keys: {acl_keys}\"\n            )\n\n    if doc_set_names is not None:\n        found_doc_set_names = set(retrieved_doc.get(\"document_sets\", {}).keys())\n        if found_doc_set_names != set(doc_set_names):\n            raise ValueError(\n                f\"Document set names mismatch. \\nFound: {found_doc_set_names}, \\nExpected: {set(doc_set_names)}\"\n            )\n\n\ndef _generate_dummy_document(\n    document_id: str,\n    cc_pair_id: int,\n    content: str | None = None,\n    extra_metadata: dict | None = None,\n) -> dict:\n    text = content if content else f\"This is test document {document_id}\"\n\n    metadata: dict = {\"document_id\": document_id}\n    if extra_metadata:\n        metadata.update(extra_metadata)\n\n    return {\n        \"document\": {\n            \"id\": document_id,\n            \"sections\": [\n                {\n                    \"text\": text,\n                    \"link\": f\"{document_id}\",\n                }\n            ],\n            \"source\": DocumentSource.NOT_APPLICABLE,\n            \"metadata\": metadata,\n            \"semantic_identifier\": f\"Test Document {document_id}\",\n            \"from_ingestion_api\": True,\n        },\n        \"cc_pair_id\": cc_pair_id,\n    }\n\n\nclass DocumentManager:\n    \"\"\"\n    Manager for seeding documents via the ingestion API.\n    Used to test various connector features.\n    \"\"\"\n\n    @staticmethod\n    def seed_dummy_docs(\n        cc_pair: DATestCCPair,\n        api_key: DATestAPIKey,\n        num_docs: int = NUM_DOCS,\n        document_ids: list[str] | None = None,\n    ) -> list[SimpleTestDocument]:\n        # Use provided document_ids if available, otherwise generate random UUIDs\n        if document_ids is None:\n            document_ids = [f\"test-doc-{uuid4()}\" for _ in range(num_docs)]\n        else:\n            num_docs = len(document_ids)\n        # Create and ingest some documents\n        documents: list[dict] = []\n        for document_id in document_ids:\n            document = _generate_dummy_document(document_id, cc_pair.id)\n            documents.append(document)\n            response = requests.post(\n                f\"{API_SERVER_URL}/onyx-api/ingestion\",\n                json=document,\n                headers=api_key.headers,\n            )\n            response.raise_for_status()\n\n        print(\n            f\"Seeding docs for api_key_id={api_key.api_key_id} completed successfully.\"\n        )\n        return [\n            SimpleTestDocument(\n                id=document[\"document\"][\"id\"],\n                content=document[\"document\"][\"sections\"][0][\"text\"],\n            )\n            for document in documents\n        ]\n\n    @staticmethod\n    def seed_doc_with_content(\n        cc_pair: DATestCCPair,\n        content: str,\n        api_key: DATestAPIKey,\n        document_id: str | None = None,\n        metadata: dict | None = None,\n    ) -> SimpleTestDocument:\n        # Use provided document_ids if available, otherwise generate random UUIDs\n        if document_id is None:\n            document_id = f\"test-doc-{uuid4()}\"\n        # Create and ingest some documents\n        document: dict = _generate_dummy_document(\n            document_id,\n            cc_pair.id,\n            content,\n            extra_metadata=metadata,\n        )\n        response = requests.post(\n            f\"{API_SERVER_URL}/onyx-api/ingestion\",\n            json=document,\n            headers=api_key.headers,\n        )\n        response.raise_for_status()\n\n        print(\n            f\"Seeding doc for api_key_id={api_key.api_key_id} completed successfully.\"\n        )\n\n        return SimpleTestDocument(\n            id=document[\"document\"][\"id\"],\n            content=document[\"document\"][\"sections\"][0][\"text\"],\n        )\n\n    @staticmethod\n    def verify(\n        vespa_client: vespa_fixture,\n        cc_pair: DATestCCPair,\n        doc_creating_user: DATestUser,\n        # If None, will not check doc sets or groups\n        # If empty list, will check for empty doc sets or groups\n        doc_set_names: list[str] | None = None,\n        group_names: list[str] | None = None,\n        verify_deleted: bool = False,\n    ) -> None:\n        doc_ids = [document.id for document in cc_pair.documents]\n        retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)[\"documents\"]\n\n        retrieved_docs = {\n            doc[\"fields\"][\"document_id\"]: doc[\"fields\"] for doc in retrieved_docs_dict\n        }\n\n        # NOTE(rkuo): too much log spam\n        # Left this here for debugging purposes.\n        # import json\n\n        # print(\"DEBUGGING DOCUMENTS\")\n        # print(retrieved_docs)\n        # for doc in retrieved_docs.values():\n        #     printable_doc = doc.copy()\n        #     print(printable_doc.keys())\n        #     printable_doc.pop(\"embeddings\")\n        #     printable_doc.pop(\"title_embedding\")\n        #     print(json.dumps(printable_doc, indent=2))\n\n        for document in cc_pair.documents:\n            retrieved_doc = retrieved_docs.get(document.id)\n            if not retrieved_doc:\n                if not verify_deleted:\n                    print(f\"Document not found: {document.id}\")\n                    print(retrieved_docs.keys())\n                    print(retrieved_docs.values())\n                    raise ValueError(f\"Document not found: {document.id}\")\n                continue\n            if verify_deleted:\n                raise ValueError(\n                    f\"Document found when it should be deleted: {document.id}\"\n                )\n            _verify_document_permissions(\n                retrieved_doc,\n                cc_pair,\n                doc_creating_user,\n                doc_set_names,\n                group_names,\n            )\n\n    @staticmethod\n    def fetch_documents_for_cc_pair(\n        cc_pair_id: int,\n        db_session: Session,\n        vespa_client: vespa_fixture,\n    ) -> list[SimpleTestDocument]:\n        stmt = (\n            select(DocumentByConnectorCredentialPair)\n            .join(\n                ConnectorCredentialPair,\n                and_(\n                    DocumentByConnectorCredentialPair.connector_id\n                    == ConnectorCredentialPair.connector_id,\n                    DocumentByConnectorCredentialPair.credential_id\n                    == ConnectorCredentialPair.credential_id,\n                ),\n            )\n            .where(ConnectorCredentialPair.id == cc_pair_id)\n        )\n        documents = db_session.execute(stmt).scalars().all()\n        if not documents:\n            return []\n\n        doc_ids = [document.id for document in documents]\n        retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)[\"documents\"]\n\n        final_docs: list[SimpleTestDocument] = []\n        # NOTE: they are really chunks, but we're assuming that for these tests\n        # we only have one chunk per document for now\n        for doc_dict in retrieved_docs_dict:\n            doc_id = doc_dict[\"fields\"][\"document_id\"]\n            doc_content = doc_dict[\"fields\"][\"content\"]\n            # still called `image_file_name` in Vespa for backwards compatibility\n            image_file_id = doc_dict[\"fields\"].get(\"image_file_name\", None)\n            final_docs.append(\n                SimpleTestDocument(\n                    id=doc_id, content=doc_content, image_file_id=image_file_id\n                )\n            )\n\n        return final_docs\n\n\nclass IngestionManager(DocumentManager):\n    \"\"\"\n    Manager for additional ingestion API endpoints not covered by DocumentManager.\n    Used specifically to test the ingestion API.\n    \"\"\"\n\n    @staticmethod\n    def list_all_ingestion_docs(\n        api_key: DATestAPIKey,\n    ) -> list[dict]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/onyx-api/ingestion\",\n            headers=api_key.headers,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def delete(\n        document_id: str,\n        api_key: DATestAPIKey,\n    ) -> None:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/onyx-api/ingestion/{document_id}\",\n            headers=api_key.headers,\n        )\n        response.raise_for_status()\n        print(f\"Deleted document {document_id} successfully.\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/document_search.py",
    "content": "import requests\n\nfrom ee.onyx.server.query_and_chat.models import SearchFullResponse\nfrom ee.onyx.server.query_and_chat.models import SendSearchQueryRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass DocumentSearchManager:\n    @staticmethod\n    def search_documents(\n        query: str,\n        user_performing_action: DATestUser,\n    ) -> list[str]:\n        \"\"\"\n        Search for documents using the EE search API.\n\n        Args:\n            query: The search query string\n            user_performing_action: The user performing the search (for auth)\n\n        Returns:\n            A list of document content strings (blurbs) from the search results\n        \"\"\"\n        search_request = SendSearchQueryRequest(\n            search_query=query,\n            filters=None,\n            stream=False,\n        )\n        result = requests.post(\n            url=f\"{API_SERVER_URL}/search/send-search-message\",\n            json=search_request.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        result.raise_for_status()\n        result_json = result.json()\n        search_response = SearchFullResponse(**result_json)\n\n        # Return the blurbs as the document content\n        # For small documents (like test docs), the blurb should contain the full content\n        document_content_list: list[str] = [\n            doc.blurb for doc in search_response.search_docs\n        ]\n        return document_content_list\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/document_set.py",
    "content": "import time\nfrom typing import Any\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.test_models import DATestDocumentSet\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass DocumentSetManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        description: str | None = None,\n        cc_pair_ids: list[int] | None = None,\n        is_public: bool = True,\n        users: list[str] | None = None,\n        groups: list[int] | None = None,\n        federated_connectors: list[dict[str, Any]] | None = None,\n    ) -> DATestDocumentSet:\n        if name is None:\n            name = f\"test_doc_set_{str(uuid4())}\"\n\n        doc_set_creation_request = {\n            \"name\": name,\n            \"description\": description or name,\n            \"cc_pair_ids\": cc_pair_ids or [],\n            \"is_public\": is_public,\n            \"users\": [str(UUID(user_id)) for user_id in (users or [])],\n            \"groups\": groups or [],\n            \"federated_connectors\": federated_connectors or [],\n        }\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/document-set\",\n            json=doc_set_creation_request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        return DATestDocumentSet(\n            id=int(response.json()),\n            name=name,\n            description=description or name,\n            cc_pair_ids=cc_pair_ids or [],\n            is_public=is_public,\n            is_up_to_date=True,\n            users=users or [],\n            groups=groups or [],\n            federated_connectors=federated_connectors or [],\n        )\n\n    @staticmethod\n    def edit(\n        document_set: DATestDocumentSet,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        doc_set_update_request = {\n            \"id\": document_set.id,\n            \"description\": document_set.description,\n            \"cc_pair_ids\": document_set.cc_pair_ids,\n            \"is_public\": document_set.is_public,\n            \"users\": [str(UUID(user_id)) for user_id in document_set.users],\n            \"groups\": document_set.groups,\n            \"federated_connectors\": document_set.federated_connectors,\n        }\n        response = requests.patch(\n            f\"{API_SERVER_URL}/manage/admin/document-set\",\n            json=doc_set_update_request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return True\n\n    @staticmethod\n    def delete(\n        document_set: DATestDocumentSet,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/manage/admin/document-set/{document_set.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return True\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[DATestDocumentSet]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/document-set\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [\n            DATestDocumentSet(\n                id=doc_set[\"id\"],\n                name=doc_set[\"name\"],\n                description=doc_set[\"description\"],\n                cc_pair_ids=[cc_pair[\"id\"] for cc_pair in doc_set[\"cc_pair_summaries\"]],\n                is_public=doc_set[\"is_public\"],\n                is_up_to_date=doc_set[\"is_up_to_date\"],\n                users=[str(user_id) for user_id in doc_set[\"users\"]],\n                groups=doc_set[\"groups\"],\n                federated_connectors=doc_set[\"federated_connector_summaries\"],\n            )\n            for doc_set in response.json()\n        ]\n\n    @staticmethod\n    def wait_for_sync(\n        user_performing_action: DATestUser,\n        document_sets_to_check: list[DATestDocumentSet] | None = None,\n    ) -> None:\n        # wait for document sets to be synced\n        start = time.time()\n        while True:\n            doc_sets = DocumentSetManager.get_all(user_performing_action)\n            if document_sets_to_check:\n                check_ids = {doc_set.id for doc_set in document_sets_to_check}\n                doc_set_ids = {doc_set.id for doc_set in doc_sets}\n                if not check_ids.issubset(doc_set_ids):\n                    raise RuntimeError(\"Document set not found\")\n                doc_sets = [doc_set for doc_set in doc_sets if doc_set.id in check_ids]\n            all_up_to_date = all(doc_set.is_up_to_date for doc_set in doc_sets)\n\n            if all_up_to_date:\n                print(\"Document sets synced successfully.\")\n                break\n\n            if time.time() - start > MAX_DELAY:\n                not_synced_doc_sets = [\n                    doc_set for doc_set in doc_sets if not doc_set.is_up_to_date\n                ]\n                raise TimeoutError(\n                    f\"Document sets were not synced within the {MAX_DELAY} seconds. \"\n                    f\"Remaining unsynced document sets: {len(not_synced_doc_sets)}. \"\n                    f\"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}\"\n                )\n            else:\n                not_synced_doc_sets = [\n                    doc_set for doc_set in doc_sets if not doc_set.is_up_to_date\n                ]\n                print(\n                    f\"Document sets were not synced yet, waiting... \"\n                    f\"{len(not_synced_doc_sets)}/{len(doc_sets)} document sets still syncing. \"\n                    f\"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}\"\n                )\n\n            time.sleep(2)\n\n    @staticmethod\n    def verify(\n        document_set: DATestDocumentSet,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        doc_sets = DocumentSetManager.get_all(user_performing_action)\n        for doc_set in doc_sets:\n            if doc_set.id == document_set.id:\n                if verify_deleted:\n                    raise ValueError(\n                        f\"Document set {document_set.id} found but should have been deleted\"\n                    )\n                if (\n                    doc_set.name == document_set.name\n                    and set(doc_set.cc_pair_ids) == set(document_set.cc_pair_ids)\n                    and doc_set.is_public == document_set.is_public\n                    and set(doc_set.users) == set(document_set.users)\n                    and set(doc_set.groups) == set(document_set.groups)\n                    and doc_set.federated_connectors\n                    == document_set.federated_connectors\n                ):\n                    return\n        if not verify_deleted:\n            raise ValueError(f\"Document set {document_set.id} not found\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/file.py",
    "content": "import io\nimport mimetypes\nfrom typing import cast\nfrom typing import IO\nfrom typing import List\nfrom typing import Tuple\n\nimport requests\n\nfrom onyx.file_store.models import FileDescriptor\nfrom onyx.server.documents.models import FileUploadResponse\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass FileManager:\n    @staticmethod\n    def upload_files(\n        files: List[Tuple[str, IO]],\n        user_performing_action: DATestUser,\n    ) -> Tuple[List[FileDescriptor], str]:\n        headers = user_performing_action.headers\n        headers.pop(\"Content-Type\", None)\n\n        files_param = []\n        for filename, file_obj in files:\n            mime_type, _ = mimetypes.guess_type(filename)\n            if mime_type is None:\n                mime_type = \"application/octet-stream\"\n            files_param.append((\"files\", (filename, file_obj, mime_type)))\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/projects/file/upload\",\n            files=files_param,\n            headers=headers,\n        )\n\n        if not response.ok:\n            try:\n                detail = response.json().get(\"detail\", response.text)\n            except Exception:\n                detail = response.text\n            return (\n                cast(List[FileDescriptor], []),\n                f\"Failed to upload files - {detail}\",\n            )\n\n        response_json = response.json()\n        # Convert UserFileSnapshot to FileDescriptor format\n        file_descriptors: List[FileDescriptor] = []\n        for user_file in response_json.get(\"user_files\", []):\n            file_descriptors.append(\n                {\n                    \"id\": user_file[\"file_id\"],\n                    \"type\": user_file[\"chat_file_type\"],\n                    \"name\": user_file[\"name\"],\n                    \"user_file_id\": str(user_file[\"id\"]),\n                }\n            )\n        return file_descriptors, \"\"\n\n    @staticmethod\n    def fetch_uploaded_file(\n        file_id: str,\n        user_performing_action: DATestUser,\n    ) -> bytes:\n        response = requests.get(\n            f\"{API_SERVER_URL}/chat/file/{file_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return response.content\n\n    @staticmethod\n    def upload_file_for_connector(\n        file_path: str,\n        file_name: str,\n        user_performing_action: DATestUser,\n        content_type: str = \"application/octet-stream\",\n    ) -> FileUploadResponse:\n        # Read the file content\n        with open(file_path, \"rb\") as f:\n            file_content = f.read()\n\n        # Create a file-like object\n        file_obj = io.BytesIO(file_content)\n\n        # The 'files' form field expects a list of files\n        files = [(\"files\", (file_name, file_obj, content_type))]\n\n        # Use the user's headers but without Content-Type\n        # as requests will set the correct multipart/form-data Content-Type for us\n        headers = user_performing_action.headers.copy()\n        if \"Content-Type\" in headers:\n            del headers[\"Content-Type\"]\n\n        # Make the request\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/connector/file/upload\",\n            files=files,\n            headers=headers,\n        )\n\n        if not response.ok:\n            try:\n                error_detail = response.json().get(\"detail\", \"Unknown error\")\n            except Exception:\n                error_detail = response.text\n\n            raise Exception(\n                f\"Unable to upload files - {error_detail} (Status code: {response.status_code})\"\n            )\n\n        response_json = response.json()\n        return FileUploadResponse(**response_json)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/image_generation.py",
    "content": "import json\nimport os\nfrom typing import Any\nfrom uuid import uuid4\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestImageGenerationConfig\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _serialize_custom_config(\n    custom_config: dict[str, Any] | None,\n) -> dict[str, str] | None:\n    \"\"\"Convert custom_config values to strings (API expects dict[str, str]).\"\"\"\n    if custom_config is None:\n        return None\n    return {\n        key: json.dumps(value) if not isinstance(value, str) else value\n        for key, value in custom_config.items()\n    }\n\n\nclass ImageGenerationConfigManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        image_provider_id: str | None = None,\n        model_name: str = \"gpt-image-1\",\n        provider: str = \"openai\",\n        api_key: str | None = None,\n        api_base: str | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n        custom_config: dict[str, Any] | None = None,\n        is_default: bool = False,\n    ) -> DATestImageGenerationConfig:\n        \"\"\"Create a new image generation config with new credentials.\"\"\"\n        image_provider_id = image_provider_id or f\"test-provider-{uuid4()}\"\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/image-generation/config\",\n            json={\n                \"image_provider_id\": image_provider_id,\n                \"model_name\": model_name,\n                \"provider\": provider,\n                \"api_key\": api_key or os.environ[\"OPENAI_API_KEY\"],\n                \"api_base\": api_base,\n                \"api_version\": api_version,\n                \"deployment_name\": deployment_name,\n                \"custom_config\": _serialize_custom_config(custom_config),\n                \"is_default\": is_default,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        data = response.json()\n\n        return DATestImageGenerationConfig(\n            image_provider_id=data[\"image_provider_id\"],\n            model_configuration_id=data[\"model_configuration_id\"],\n            model_name=data[\"model_name\"],\n            llm_provider_id=data[\"llm_provider_id\"],\n            llm_provider_name=data[\"llm_provider_name\"],\n            is_default=data[\"is_default\"],\n        )\n\n    @staticmethod\n    def create_from_provider(\n        source_llm_provider_id: int,\n        user_performing_action: DATestUser,\n        image_provider_id: str | None = None,\n        model_name: str = \"gpt-image-1\",\n        api_base: str | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n        is_default: bool = False,\n    ) -> DATestImageGenerationConfig:\n        \"\"\"Create a new image generation config by cloning from an existing LLM provider.\"\"\"\n        image_provider_id = image_provider_id or f\"test-provider-{uuid4()}\"\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/image-generation/config\",\n            json={\n                \"image_provider_id\": image_provider_id,\n                \"model_name\": model_name,\n                \"source_llm_provider_id\": source_llm_provider_id,\n                \"api_base\": api_base,\n                \"api_version\": api_version,\n                \"deployment_name\": deployment_name,\n                \"is_default\": is_default,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        data = response.json()\n\n        return DATestImageGenerationConfig(\n            image_provider_id=data[\"image_provider_id\"],\n            model_configuration_id=data[\"model_configuration_id\"],\n            model_name=data[\"model_name\"],\n            llm_provider_id=data[\"llm_provider_id\"],\n            llm_provider_name=data[\"llm_provider_name\"],\n            is_default=data[\"is_default\"],\n        )\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[DATestImageGenerationConfig]:\n        \"\"\"Get all image generation configs.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/image-generation/config\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [DATestImageGenerationConfig(**config) for config in response.json()]\n\n    @staticmethod\n    def get_credentials(\n        image_provider_id: str,\n        user_performing_action: DATestUser,\n    ) -> dict:\n        \"\"\"Get credentials for an image generation config.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/credentials\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def update(\n        image_provider_id: str,\n        model_name: str,\n        user_performing_action: DATestUser,\n        provider: str | None = None,\n        api_key: str | None = None,\n        source_llm_provider_id: int | None = None,\n        api_base: str | None = None,\n        api_version: str | None = None,\n        deployment_name: str | None = None,\n    ) -> DATestImageGenerationConfig:\n        \"\"\"Update an existing image generation config.\"\"\"\n        payload: dict = {\n            \"model_name\": model_name,\n            \"api_base\": api_base,\n            \"api_version\": api_version,\n            \"deployment_name\": deployment_name,\n        }\n\n        if source_llm_provider_id is not None:\n            payload[\"source_llm_provider_id\"] = source_llm_provider_id\n        elif api_key is not None and provider is not None:\n            payload[\"provider\"] = provider\n            payload[\"api_key\"] = api_key\n        else:\n            raise ValueError(\n                f\"Either source_llm_provider_id or (api_key + provider) must be provided. \"\n                f\"Got: source_llm_provider_id={source_llm_provider_id}, provider={provider}, api_key={'***' if api_key else None}\"\n            )\n\n        response = requests.put(\n            f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}\",\n            json=payload,\n            headers=user_performing_action.headers,\n        )\n        if not response.ok:\n            print(f\"Update failed with status {response.status_code}: {response.text}\")\n        response.raise_for_status()\n        data = response.json()\n\n        return DATestImageGenerationConfig(\n            image_provider_id=data[\"image_provider_id\"],\n            model_configuration_id=data[\"model_configuration_id\"],\n            model_name=data[\"model_name\"],\n            llm_provider_id=data[\"llm_provider_id\"],\n            llm_provider_name=data[\"llm_provider_name\"],\n            is_default=data[\"is_default\"],\n        )\n\n    @staticmethod\n    def delete(\n        image_provider_id: str,\n        user_performing_action: DATestUser,\n    ) -> None:\n        \"\"\"Delete an image generation config.\"\"\"\n        response = requests.delete(\n            f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def set_default(\n        image_provider_id: str,\n        user_performing_action: DATestUser,\n    ) -> None:\n        \"\"\"Set an image generation config as the default.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/default\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def verify(\n        config: DATestImageGenerationConfig,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        \"\"\"Verify that a config exists (or doesn't exist if verify_deleted=True).\"\"\"\n        all_configs = ImageGenerationConfigManager.get_all(user_performing_action)\n\n        for fetched_config in all_configs:\n            if fetched_config.image_provider_id == config.image_provider_id:\n                if verify_deleted:\n                    raise ValueError(\n                        f\"ImageGenerationConfig {config.image_provider_id} found but should be deleted\"\n                    )\n                # Verify the config matches\n                if (\n                    fetched_config.model_name == config.model_name\n                    and fetched_config.is_default == config.is_default\n                ):\n                    return\n\n        if not verify_deleted:\n            raise ValueError(\n                f\"ImageGenerationConfig {config.image_provider_id} not found\"\n            )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/index_attempt.py",
    "content": "import time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom urllib.parse import urlencode\n\nimport requests\n\nfrom onyx.background.indexing.models import IndexAttemptErrorPydantic\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import IndexModelStatus\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.models import IndexingStatus\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.server.documents.models import IndexAttemptSnapshot\nfrom onyx.server.documents.models import PaginatedReturn\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.test_models import DATestIndexAttempt\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass IndexAttemptManager:\n    @staticmethod\n    def create_test_index_attempts(\n        num_attempts: int,\n        cc_pair_id: int,\n        from_beginning: bool = False,\n        status: IndexingStatus = IndexingStatus.SUCCESS,\n        new_docs_indexed: int = 10,\n        total_docs_indexed: int = 10,\n        docs_removed_from_index: int = 0,\n        error_msg: str | None = None,\n        base_time: datetime | None = None,\n    ) -> list[DATestIndexAttempt]:\n        if base_time is None:\n            base_time = datetime.now()\n\n        attempts = []\n        with get_session_with_current_tenant() as db_session:\n            # Get the current search settings\n            search_settings = get_current_search_settings(db_session)\n            if (\n                not search_settings\n                or search_settings.status != IndexModelStatus.PRESENT\n            ):\n                raise ValueError(\"No current search settings found with PRESENT status\")\n\n            for i in range(num_attempts):\n                time_created = base_time - timedelta(hours=i)\n\n                index_attempt = IndexAttempt(\n                    connector_credential_pair_id=cc_pair_id,\n                    from_beginning=from_beginning,\n                    status=status,\n                    new_docs_indexed=new_docs_indexed,\n                    total_docs_indexed=total_docs_indexed,\n                    docs_removed_from_index=docs_removed_from_index,\n                    error_msg=error_msg,\n                    time_created=time_created,\n                    time_started=time_created,\n                    time_updated=time_created,\n                    search_settings_id=search_settings.id,\n                )\n\n                db_session.add(index_attempt)\n                db_session.flush()  # To get the ID\n\n                attempts.append(\n                    DATestIndexAttempt(\n                        id=index_attempt.id,\n                        status=index_attempt.status,\n                        new_docs_indexed=index_attempt.new_docs_indexed,\n                        total_docs_indexed=index_attempt.total_docs_indexed,\n                        docs_removed_from_index=index_attempt.docs_removed_from_index,\n                        error_msg=index_attempt.error_msg,\n                        time_started=index_attempt.time_started,\n                        time_updated=index_attempt.time_updated,\n                    )\n                )\n\n            db_session.commit()\n\n        return attempts\n\n    @staticmethod\n    def get_index_attempt_page(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n        page: int = 0,\n        page_size: int = 10,\n    ) -> PaginatedReturn[IndexAttemptSnapshot]:\n        query_params: dict[str, str | int] = {\n            \"page_num\": page,\n            \"page_size\": page_size,\n        }\n\n        url = f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/index-attempts?{urlencode(query_params, doseq=True)}\"\n        response = requests.get(\n            url=url,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return PaginatedReturn(\n            items=[IndexAttemptSnapshot(**item) for item in data[\"items\"]],\n            total_items=data[\"total_items\"],\n        )\n\n    @staticmethod\n    def get_latest_index_attempt_for_cc_pair(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n    ) -> IndexAttemptSnapshot | None:\n        \"\"\"Get an IndexAttempt by ID\"\"\"\n        index_attempts = IndexAttemptManager.get_index_attempt_page(\n            cc_pair_id, user_performing_action=user_performing_action\n        ).items\n        if not index_attempts:\n            return None\n\n        index_attempts = sorted(\n            index_attempts, key=lambda x: x.time_started or \"0\", reverse=True\n        )\n        return index_attempts[0]\n\n    @staticmethod\n    def wait_for_index_attempt_start(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n        index_attempts_to_ignore: list[int] | None = None,\n        timeout: float = MAX_DELAY,\n    ) -> IndexAttemptSnapshot:\n        \"\"\"Wait for an IndexAttempt to start\"\"\"\n        start = datetime.now()\n        index_attempts_to_ignore = index_attempts_to_ignore or []\n\n        while True:\n            index_attempt = IndexAttemptManager.get_latest_index_attempt_for_cc_pair(\n                cc_pair_id=cc_pair_id,\n                user_performing_action=user_performing_action,\n            )\n            if (\n                index_attempt\n                and index_attempt.time_started\n                and index_attempt.id not in index_attempts_to_ignore\n            ):\n                return index_attempt\n\n            elapsed = (datetime.now() - start).total_seconds()\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"IndexAttempt for CC Pair {cc_pair_id} did not start within {timeout} seconds\"\n                )\n\n    @staticmethod\n    def get_index_attempt_by_id(\n        index_attempt_id: int,\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n    ) -> IndexAttemptSnapshot:\n        page_num = 0\n        page_size = 10\n        while True:\n            page = IndexAttemptManager.get_index_attempt_page(\n                cc_pair_id=cc_pair_id,\n                page=page_num,\n                page_size=page_size,\n                user_performing_action=user_performing_action,\n            )\n            for attempt in page.items:\n                if attempt.id == index_attempt_id:\n                    return attempt\n\n            if len(page.items) < page_size:\n                break\n\n            page_num += 1\n\n        raise ValueError(f\"IndexAttempt {index_attempt_id} not found\")\n\n    @staticmethod\n    def wait_for_index_attempt_completion(\n        index_attempt_id: int,\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n        timeout: float = MAX_DELAY,\n    ) -> None:\n        \"\"\"Wait for an IndexAttempt to complete\"\"\"\n        start = time.monotonic()\n        while True:\n            index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n                index_attempt_id=index_attempt_id,\n                cc_pair_id=cc_pair_id,\n                user_performing_action=user_performing_action,\n            )\n\n            if index_attempt.status and index_attempt.status.is_terminal():\n                print(\n                    f\"IndexAttempt {index_attempt_id} completed with status {index_attempt.status}\"\n                )\n                return\n\n            elapsed = time.monotonic() - start\n            if elapsed > timeout:\n                raise TimeoutError(\n                    f\"IndexAttempt {index_attempt_id} did not complete within {timeout} seconds\"\n                )\n\n            print(\n                f\"Waiting for IndexAttempt {index_attempt_id} to complete. elapsed={elapsed:.2f} timeout={timeout}\"\n            )\n            time.sleep(5)\n\n    @staticmethod\n    def get_index_attempt_errors_for_cc_pair(\n        cc_pair_id: int,\n        user_performing_action: DATestUser,\n        include_resolved: bool = True,\n    ) -> list[IndexAttemptErrorPydantic]:\n        url = f\"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/errors?page_size=100\"\n        if include_resolved:\n            url += \"&include_resolved=true\"\n        response = requests.get(\n            url=url,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return [IndexAttemptErrorPydantic(**item) for item in data[\"items\"]]\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/llm_provider.py",
    "content": "import os\nfrom uuid import uuid4\n\nimport requests\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.models import DefaultModel\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass LLMProviderManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        provider: str | None = None,\n        api_key: str | None = None,\n        default_model_name: str | None = None,\n        api_base: str | None = None,\n        api_version: str | None = None,\n        groups: list[int] | None = None,\n        personas: list[int] | None = None,\n        is_public: bool | None = None,\n        set_as_default: bool = True,\n    ) -> DATestLLMProvider:\n        print(f\"Seeding LLM Providers for {user_performing_action.email}...\")\n\n        llm_provider = LLMProviderUpsertRequest(\n            name=name or f\"test-provider-{uuid4()}\",\n            provider=provider or LlmProviderNames.OPENAI,\n            api_key=api_key or os.environ[\"OPENAI_API_KEY\"],\n            api_base=api_base,\n            api_version=api_version,\n            custom_config=None,\n            is_public=True if is_public is None else is_public,\n            groups=groups or [],\n            personas=personas or [],\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=default_model_name or \"gpt-4o-mini\",\n                    is_visible=True,\n                    max_input_tokens=None,\n                    display_name=default_model_name or \"gpt-4o-mini\",\n                    supports_image_input=True,\n                )\n            ],\n            api_key_changed=True,\n        )\n\n        llm_response = requests.put(\n            f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n            json=llm_provider.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        llm_response.raise_for_status()\n        response_data = llm_response.json()\n\n        result_llm = DATestLLMProvider(\n            id=response_data[\"id\"],\n            name=response_data[\"name\"],\n            provider=response_data[\"provider\"],\n            api_key=response_data[\"api_key\"],\n            default_model_name=default_model_name or \"gpt-4o-mini\",\n            is_public=response_data[\"is_public\"],\n            is_auto_mode=response_data.get(\"is_auto_mode\", False),\n            groups=response_data[\"groups\"],\n            personas=response_data.get(\"personas\", []),\n            api_base=response_data[\"api_base\"],\n            api_version=response_data[\"api_version\"],\n        )\n\n        if set_as_default:\n            if default_model_name is None:\n                default_model_name = \"gpt-4o-mini\"\n            set_default_response = requests.post(\n                f\"{API_SERVER_URL}/admin/llm/default\",\n                json={\n                    \"provider_id\": response_data[\"id\"],\n                    \"model_name\": default_model_name,\n                },\n                headers=(\n                    user_performing_action.headers\n                    if user_performing_action\n                    else GENERAL_HEADERS\n                ),\n            )\n            set_default_response.raise_for_status()\n\n        return result_llm\n\n    @staticmethod\n    def delete(\n        llm_provider: DATestLLMProvider,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/admin/llm/provider/{llm_provider.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return True\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[LLMProviderView]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/llm/provider\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [LLMProviderView(**p) for p in response.json()[\"providers\"]]\n\n    @staticmethod\n    def verify(\n        llm_provider: DATestLLMProvider,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        all_llm_providers = LLMProviderManager.get_all(user_performing_action)\n        default_model = LLMProviderManager.get_default_model(user_performing_action)\n        for fetched_llm_provider in all_llm_providers:\n            model_names = [\n                model.name for model in fetched_llm_provider.model_configurations\n            ]\n            if llm_provider.id == fetched_llm_provider.id:\n                if verify_deleted:\n                    raise ValueError(\n                        f\"LLM Provider {llm_provider.id} found but should be deleted\"\n                    )\n                fetched_llm_groups = set(fetched_llm_provider.groups)\n                llm_provider_groups = set(llm_provider.groups)\n\n                # NOTE: returned api keys are sanitized and should not match\n                if (\n                    fetched_llm_groups == llm_provider_groups\n                    and llm_provider.provider == fetched_llm_provider.provider\n                    and (\n                        default_model is None or default_model.model_name in model_names\n                    )\n                    and llm_provider.is_public == fetched_llm_provider.is_public\n                    and set(fetched_llm_provider.personas) == set(llm_provider.personas)\n                ):\n                    return\n        if not verify_deleted:\n            raise ValueError(f\"LLM Provider {llm_provider.id} not found\")\n\n    @staticmethod\n    def get_default_model(\n        user_performing_action: DATestUser | None = None,\n    ) -> DefaultModel | None:\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/llm/provider\",\n            headers=(\n                user_performing_action.headers\n                if user_performing_action\n                else GENERAL_HEADERS\n            ),\n        )\n        response.raise_for_status()\n        default_text = response.json().get(\"default_text\")\n        if default_text is None:\n            return None\n        return DefaultModel(**default_text)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/pat.py",
    "content": "\"\"\"Helper for managing Personal Access Tokens in integration tests.\"\"\"\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestPAT\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass PATManager:\n    \"\"\"Manager for creating and managing Personal Access Tokens in tests.\"\"\"\n\n    @staticmethod\n    def create(\n        name: str,\n        expiration_days: int | None,\n        user_performing_action: DATestUser,\n    ) -> DATestPAT:\n        \"\"\"Create a Personal Access Token for a user.\n\n        Args:\n            name: Name of the token\n            expiration_days: Number of days until expiration (None for never)\n            user_performing_action: User creating the token\n\n        Returns:\n            DATestPAT with PAT data including the raw token\n        \"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/pats\",\n            json={\"name\": name, \"expiration_days\": expiration_days},\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            timeout=60,\n        )\n        response.raise_for_status()\n        return DATestPAT(**response.json())\n\n    @staticmethod\n    def list(user_performing_action: DATestUser) -> list[DATestPAT]:\n        \"\"\"List all PATs for a user.\n\n        Args:\n            user_performing_action: User listing their tokens\n\n        Returns:\n            List of DATestPAT (without raw tokens)\n        \"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/pats\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            timeout=60,\n        )\n        response.raise_for_status()\n        return [DATestPAT(**pat_data) for pat_data in response.json()]\n\n    @staticmethod\n    def revoke(token_id: int, user_performing_action: DATestUser) -> None:\n        \"\"\"Revoke a Personal Access Token.\n\n        Args:\n            token_id: ID of the token to revoke\n            user_performing_action: User revoking the token\n        \"\"\"\n        response = requests.delete(\n            f\"{API_SERVER_URL}/user/pats/{token_id}\",\n            headers=user_performing_action.headers,\n            cookies=user_performing_action.cookies,\n            timeout=60,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def authenticate(token: str) -> requests.Response:\n        \"\"\"Authenticate using a PAT token and get user info.\n\n        Args:\n            token: The raw PAT token\n\n        Returns:\n            Response from /me endpoint\n        \"\"\"\n        return requests.get(\n            f\"{API_SERVER_URL}/me\",\n            headers={\"Authorization\": f\"Bearer {token}\"},\n            timeout=60,\n        )\n\n    @staticmethod\n    def get_auth_headers(token: str) -> dict[str, str]:\n        \"\"\"Get authorization headers for a PAT token.\n\n        Args:\n            token: The raw PAT token\n\n        Returns:\n            Headers dict with Authorization bearer token\n        \"\"\"\n        return {\"Authorization\": f\"Bearer {token}\"}\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/persona.py",
    "content": "from uuid import UUID\nfrom uuid import uuid4\n\nimport requests\n\nfrom onyx.server.features.persona.models import FullPersonaSnapshot\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestPersona\nfrom tests.integration.common_utils.test_models import DATestPersonaLabel\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass PersonaManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        description: str | None = None,\n        system_prompt: str | None = None,\n        task_prompt: str | None = None,\n        is_public: bool = True,\n        datetime_aware: bool = False,\n        document_set_ids: list[int] | None = None,\n        tool_ids: list[int] | None = None,\n        llm_model_provider_override: str | None = None,\n        llm_model_version_override: str | None = None,\n        users: list[str] | None = None,\n        groups: list[int] | None = None,\n        label_ids: list[int] | None = None,\n        user_file_ids: list[str] | None = None,\n        display_priority: int | None = None,\n        featured: bool = False,\n    ) -> DATestPersona:\n        name = name or f\"test-persona-{uuid4()}\"\n        description = description or f\"Description for {name}\"\n        system_prompt = system_prompt or f\"System prompt for {name}\"\n        task_prompt = task_prompt or f\"Task prompt for {name}\"\n\n        persona_creation_request = PersonaUpsertRequest(\n            name=name,\n            description=description,\n            system_prompt=system_prompt,\n            task_prompt=task_prompt,\n            datetime_aware=datetime_aware,\n            is_public=is_public,\n            document_set_ids=document_set_ids or [],\n            tool_ids=tool_ids or [],\n            llm_model_provider_override=llm_model_provider_override,\n            llm_model_version_override=llm_model_version_override,\n            users=[UUID(user) for user in (users or [])],\n            groups=groups or [],\n            label_ids=label_ids or [],\n            user_file_ids=user_file_ids or [],\n            display_priority=display_priority,\n            is_featured=featured,\n        )\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/persona\",\n            json=persona_creation_request.model_dump(mode=\"json\"),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        persona_data = response.json()\n\n        return DATestPersona(\n            id=persona_data[\"id\"],\n            name=name,\n            description=description,\n            is_public=is_public,\n            system_prompt=system_prompt,\n            task_prompt=task_prompt,\n            datetime_aware=datetime_aware,\n            document_set_ids=document_set_ids or [],\n            tool_ids=tool_ids or [],\n            llm_model_provider_override=llm_model_provider_override,\n            llm_model_version_override=llm_model_version_override,\n            users=users or [],\n            groups=groups or [],\n            label_ids=label_ids or [],\n            is_featured=featured,\n        )\n\n    @staticmethod\n    def edit(\n        persona: DATestPersona,\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        description: str | None = None,\n        system_prompt: str | None = None,\n        task_prompt: str | None = None,\n        is_public: bool | None = None,\n        datetime_aware: bool = False,\n        document_set_ids: list[int] | None = None,\n        tool_ids: list[int] | None = None,\n        llm_model_provider_override: str | None = None,\n        llm_model_version_override: str | None = None,\n        users: list[str] | None = None,\n        groups: list[int] | None = None,\n        label_ids: list[int] | None = None,\n        featured: bool | None = None,\n    ) -> DATestPersona:\n        system_prompt = system_prompt or f\"System prompt for {persona.name}\"\n        task_prompt = task_prompt or f\"Task prompt for {persona.name}\"\n\n        persona_update_request = PersonaUpsertRequest(\n            name=name or persona.name,\n            description=description or persona.description,\n            system_prompt=system_prompt,\n            task_prompt=task_prompt,\n            datetime_aware=datetime_aware,\n            is_public=persona.is_public if is_public is None else is_public,\n            document_set_ids=document_set_ids or persona.document_set_ids,\n            tool_ids=tool_ids or persona.tool_ids,\n            llm_model_provider_override=(\n                llm_model_provider_override or persona.llm_model_provider_override\n            ),\n            llm_model_version_override=(\n                llm_model_version_override or persona.llm_model_version_override\n            ),\n            users=[UUID(user) for user in (users or persona.users)],\n            groups=groups or persona.groups,\n            label_ids=label_ids or persona.label_ids,\n            is_featured=featured if featured is not None else persona.is_featured,\n        )\n\n        response = requests.patch(\n            f\"{API_SERVER_URL}/persona/{persona.id}\",\n            json=persona_update_request.model_dump(mode=\"json\"),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        updated_persona_data = response.json()\n\n        return DATestPersona(\n            id=updated_persona_data[\"id\"],\n            name=updated_persona_data[\"name\"],\n            description=updated_persona_data[\"description\"],\n            is_public=updated_persona_data[\"is_public\"],\n            system_prompt=system_prompt,\n            task_prompt=task_prompt,\n            datetime_aware=datetime_aware,\n            document_set_ids=[ds[\"id\"] for ds in updated_persona_data[\"document_sets\"]],\n            tool_ids=[t[\"id\"] for t in updated_persona_data[\"tools\"]],\n            llm_model_provider_override=updated_persona_data[\n                \"llm_model_provider_override\"\n            ],\n            llm_model_version_override=updated_persona_data[\n                \"llm_model_version_override\"\n            ],\n            users=[user[\"email\"] for user in updated_persona_data[\"users\"]],\n            groups=updated_persona_data[\"groups\"],\n            label_ids=[label[\"id\"] for label in updated_persona_data[\"labels\"]],\n            is_featured=updated_persona_data[\"is_featured\"],\n        )\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[FullPersonaSnapshot]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/persona\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [FullPersonaSnapshot(**persona) for persona in response.json()]\n\n    @staticmethod\n    def get_one(\n        persona_id: int,\n        user_performing_action: DATestUser,\n    ) -> list[FullPersonaSnapshot]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/persona/{persona_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [FullPersonaSnapshot(**response.json())]\n\n    @staticmethod\n    def verify(\n        persona: DATestPersona,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        all_personas = PersonaManager.get_one(\n            persona_id=persona.id,\n            user_performing_action=user_performing_action,\n        )\n        for fetched_persona in all_personas:\n            if fetched_persona.id == persona.id:\n                mismatches: list[tuple[str, object, object]] = []\n\n                if fetched_persona.name != persona.name:\n                    mismatches.append((\"name\", persona.name, fetched_persona.name))\n                if fetched_persona.description != persona.description:\n                    mismatches.append(\n                        (\n                            \"description\",\n                            persona.description,\n                            fetched_persona.description,\n                        )\n                    )\n                if fetched_persona.is_public != persona.is_public:\n                    mismatches.append(\n                        (\"is_public\", persona.is_public, fetched_persona.is_public)\n                    )\n                if fetched_persona.is_featured != persona.is_featured:\n                    mismatches.append(\n                        (\n                            \"is_featured\",\n                            persona.is_featured,\n                            fetched_persona.is_featured,\n                        )\n                    )\n                if (\n                    fetched_persona.llm_model_provider_override\n                    != persona.llm_model_provider_override\n                ):\n                    mismatches.append(\n                        (\n                            \"llm_model_provider_override\",\n                            persona.llm_model_provider_override,\n                            fetched_persona.llm_model_provider_override,\n                        )\n                    )\n                if (\n                    fetched_persona.llm_model_version_override\n                    != persona.llm_model_version_override\n                ):\n                    mismatches.append(\n                        (\n                            \"llm_model_version_override\",\n                            persona.llm_model_version_override,\n                            fetched_persona.llm_model_version_override,\n                        )\n                    )\n                if fetched_persona.system_prompt != persona.system_prompt:\n                    mismatches.append(\n                        (\n                            \"system_prompt\",\n                            persona.system_prompt,\n                            fetched_persona.system_prompt,\n                        )\n                    )\n                if fetched_persona.task_prompt != persona.task_prompt:\n                    mismatches.append(\n                        (\n                            \"task_prompt\",\n                            persona.task_prompt,\n                            fetched_persona.task_prompt,\n                        )\n                    )\n                if fetched_persona.datetime_aware != persona.datetime_aware:\n                    mismatches.append(\n                        (\n                            \"datetime_aware\",\n                            persona.datetime_aware,\n                            fetched_persona.datetime_aware,\n                        )\n                    )\n\n                fetched_document_set_ids = {\n                    document_set.id for document_set in fetched_persona.document_sets\n                }\n                expected_document_set_ids = set(persona.document_set_ids)\n                if fetched_document_set_ids != expected_document_set_ids:\n                    mismatches.append(\n                        (\n                            \"document_set_ids\",\n                            sorted(expected_document_set_ids),\n                            sorted(fetched_document_set_ids),\n                        )\n                    )\n\n                fetched_tool_ids = {tool.id for tool in fetched_persona.tools}\n                expected_tool_ids = set(persona.tool_ids)\n                if fetched_tool_ids != expected_tool_ids:\n                    mismatches.append(\n                        (\n                            \"tool_ids\",\n                            sorted(expected_tool_ids),\n                            sorted(fetched_tool_ids),\n                        )\n                    )\n\n                fetched_user_emails = {user.email for user in fetched_persona.users}\n                expected_user_emails = set(persona.users)\n                if fetched_user_emails != expected_user_emails:\n                    mismatches.append(\n                        (\n                            \"users\",\n                            sorted(expected_user_emails),\n                            sorted(fetched_user_emails),\n                        )\n                    )\n\n                fetched_group_ids = set(fetched_persona.groups)\n                expected_group_ids = set(persona.groups)\n                if fetched_group_ids != expected_group_ids:\n                    mismatches.append(\n                        (\n                            \"groups\",\n                            sorted(expected_group_ids),\n                            sorted(fetched_group_ids),\n                        )\n                    )\n\n                fetched_label_ids = {label.id for label in fetched_persona.labels}\n                expected_label_ids = set(persona.label_ids)\n                if fetched_label_ids != expected_label_ids:\n                    mismatches.append(\n                        (\n                            \"label_ids\",\n                            sorted(expected_label_ids),\n                            sorted(fetched_label_ids),\n                        )\n                    )\n\n                if mismatches:\n                    print(\n                        f\"Persona verification failed for id={persona.id}. Fields mismatched:\"\n                    )\n                    for field_name, expected_value, actual_value in mismatches:\n                        print(\n                            f\" - {field_name}: expected {expected_value!r}, got {actual_value!r}\"\n                        )\n                    return False\n                return True\n        print(\n            f\"Persona verification failed: persona with id={persona.id} not found in fetched results.\"\n        )\n        return False\n\n    @staticmethod\n    def delete(\n        persona: DATestPersona,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/persona/{persona.id}\",\n            headers=user_performing_action.headers,\n        )\n        return response.ok\n\n\nclass PersonaLabelManager:\n    @staticmethod\n    def create(\n        label: DATestPersonaLabel,\n        user_performing_action: DATestUser,\n    ) -> DATestPersonaLabel:\n        response = requests.post(\n            f\"{API_SERVER_URL}/persona/labels\",\n            json={\n                \"name\": label.name,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        response_data = response.json()\n        label.id = response_data[\"id\"]\n        return label\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> list[DATestPersonaLabel]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/persona/labels\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [DATestPersonaLabel(**label) for label in response.json()]\n\n    @staticmethod\n    def update(\n        label: DATestPersonaLabel,\n        user_performing_action: DATestUser,\n    ) -> DATestPersonaLabel:\n        response = requests.patch(\n            f\"{API_SERVER_URL}/admin/persona/label/{label.id}\",\n            json={\n                \"label_name\": label.name,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return label\n\n    @staticmethod\n    def delete(\n        label: DATestPersonaLabel,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/admin/persona/label/{label.id}\",\n            headers=user_performing_action.headers,\n        )\n        return response.ok\n\n    @staticmethod\n    def verify(\n        label: DATestPersonaLabel,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        all_labels = PersonaLabelManager.get_all(user_performing_action)\n        for fetched_label in all_labels:\n            if fetched_label.id == label.id:\n                return fetched_label.name == label.name\n        return False\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/project.py",
    "content": "from typing import List\n\nimport requests\n\nfrom onyx.server.features.projects.models import CategorizedFilesSnapshot\nfrom onyx.server.features.projects.models import UserFileSnapshot\nfrom onyx.server.features.projects.models import UserProjectSnapshot\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass ProjectManager:\n    @staticmethod\n    def create(\n        name: str,\n        user_performing_action: DATestUser,\n    ) -> UserProjectSnapshot:\n        \"\"\"Create a new project via API.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/projects/create\",\n            params={\"name\": name},\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return UserProjectSnapshot.model_validate(response.json())\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n    ) -> List[UserProjectSnapshot]:\n        \"\"\"Get all projects for a user via API.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/projects\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [UserProjectSnapshot.model_validate(obj) for obj in response.json()]\n\n    @staticmethod\n    def delete(\n        project_id: int,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Delete a project via API.\"\"\"\n        response = requests.delete(\n            f\"{API_SERVER_URL}/user/projects/{project_id}\",\n            headers=user_performing_action.headers,\n        )\n        return response.status_code == 204\n\n    @staticmethod\n    def verify_deleted(\n        project_id: int,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Verify that a project has been deleted by ensuring it's not in list.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/projects\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        projects = [UserProjectSnapshot.model_validate(obj) for obj in response.json()]\n        return all(p.id != project_id for p in projects)\n\n    @staticmethod\n    def verify_files_unlinked(\n        project_id: int,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Verify that all files have been unlinked from the project via API.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/projects/files/{project_id}\",\n            headers=user_performing_action.headers,\n        )\n        if response.status_code == 404:\n            return True\n        if not response.ok:\n            return False\n        files = [UserFileSnapshot.model_validate(obj) for obj in response.json()]\n        return len(files) == 0\n\n    @staticmethod\n    def verify_chat_sessions_unlinked(\n        project_id: int,\n        user_performing_action: DATestUser,\n    ) -> bool:\n        \"\"\"Verify that all chat sessions have been unlinked from the project via API.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/projects/{project_id}\",\n            headers=user_performing_action.headers,\n        )\n        if response.status_code == 404:\n            return True\n        if not response.ok:\n            return False\n        try:\n            project = UserProjectSnapshot.model_validate(response.json())\n            chat_sessions = getattr(project, \"chat_sessions\", [])\n            return len(chat_sessions or []) == 0\n        except Exception:\n            # If response doesn't include chat_sessions, assume unlinked\n            return True\n\n    @staticmethod\n    def upload_files(\n        project_id: int,\n        files: List[tuple[str, bytes]],  # List of (filename, content) tuples\n        user_performing_action: DATestUser,\n    ) -> CategorizedFilesSnapshot:\n        \"\"\"Upload files to a project via API.\"\"\"\n        # Build multipart form-data\n        files_payload = [\n            (\n                \"files\",\n                (filename, content, \"text/plain\"),\n            )\n            for filename, content in files\n        ]\n\n        data = {\"project_id\": str(project_id)} if project_id is not None else {}\n\n        # Let requests set Content-Type boundary by not overriding header\n        headers = dict(user_performing_action.headers or {})\n        headers.pop(\"Content-Type\", None)\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/projects/file/upload\",\n            data=data,\n            files=files_payload,\n            headers=headers,\n        )\n        response.raise_for_status()\n        return CategorizedFilesSnapshot.model_validate(response.json())\n\n    @staticmethod\n    def get_project_files(\n        project_id: int,\n        user_performing_action: DATestUser,\n    ) -> List[UserFileSnapshot]:\n        \"\"\"Get all files associated with a project via API.\"\"\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/user/projects/files/{project_id}\",\n            headers=user_performing_action.headers,\n        )\n        if response.status_code == 404:\n            return []\n        response.raise_for_status()\n        return [UserFileSnapshot.model_validate(obj) for obj in response.json()]\n\n    @staticmethod\n    def set_instructions(\n        project_id: int,\n        instructions: str,\n        user_performing_action: DATestUser,\n    ) -> str:\n        \"\"\"Set project instructions via API.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/projects/{project_id}/instructions\",\n            json={\"instructions\": instructions},\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return (response.json() or {}).get(\"instructions\") or \"\"\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/query_history.py",
    "content": "import time\nfrom datetime import datetime\nfrom urllib.parse import urlencode\nfrom uuid import UUID\n\nimport requests\nfrom requests.models import CaseInsensitiveDict\n\nfrom ee.onyx.server.query_history.models import ChatSessionMinimal\nfrom ee.onyx.server.query_history.models import ChatSessionSnapshot\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.db.enums import TaskStatus\nfrom onyx.server.documents.models import PaginatedReturn\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass QueryHistoryManager:\n    @staticmethod\n    def get_query_history_page(\n        user_performing_action: DATestUser,\n        page_num: int = 0,\n        page_size: int = 10,\n        feedback_type: QAFeedbackType | None = None,\n        start_time: datetime | None = None,\n        end_time: datetime | None = None,\n    ) -> PaginatedReturn[ChatSessionMinimal]:\n        query_params: dict[str, str | int] = {\n            \"page_num\": page_num,\n            \"page_size\": page_size,\n        }\n        if feedback_type:\n            query_params[\"feedback_type\"] = feedback_type.value\n        if start_time:\n            query_params[\"start_time\"] = start_time.isoformat()\n        if end_time:\n            query_params[\"end_time\"] = end_time.isoformat()\n\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/admin/chat-session-history?{urlencode(query_params, doseq=True)}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return PaginatedReturn(\n            items=[ChatSessionMinimal(**item) for item in data[\"items\"]],\n            total_items=data[\"total_items\"],\n        )\n\n    @staticmethod\n    def get_chat_session_admin(\n        chat_session_id: UUID | str,\n        user_performing_action: DATestUser,\n    ) -> ChatSessionSnapshot:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/admin/chat-session-history/{chat_session_id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return ChatSessionSnapshot(**response.json())\n\n    @staticmethod\n    def get_query_history_as_csv(\n        user_performing_action: DATestUser,\n        start_time: datetime | None = None,\n        end_time: datetime | None = None,\n    ) -> tuple[CaseInsensitiveDict[str], str]:\n        query_params: dict[str, str | int] = {}\n        if start_time:\n            query_params[\"start\"] = start_time.isoformat()\n        if end_time:\n            query_params[\"end\"] = end_time.isoformat()\n\n        start_response = requests.post(\n            url=f\"{API_SERVER_URL}/admin/query-history/start-export?{urlencode(query_params, doseq=True)}\",\n            headers=user_performing_action.headers,\n        )\n        start_response.raise_for_status()\n        request_id = start_response.json()[\"request_id\"]\n\n        deadline = time.time() + MAX_DELAY\n        while time.time() < deadline:\n            status_response = requests.get(\n                url=f\"{API_SERVER_URL}/admin/query-history/export-status\",\n                params={\"request_id\": request_id},\n                headers=user_performing_action.headers,\n            )\n            status_response.raise_for_status()\n            status = status_response.json()[\"status\"]\n            if status == TaskStatus.SUCCESS:\n                break\n            if status == TaskStatus.FAILURE:\n                raise RuntimeError(\"Query history export task failed\")\n            time.sleep(2)\n        else:\n            raise TimeoutError(\n                f\"Query history export not completed within {MAX_DELAY} seconds\"\n            )\n\n        download_response = requests.get(\n            url=f\"{API_SERVER_URL}/admin/query-history/download\",\n            params={\"request_id\": request_id},\n            headers=user_performing_action.headers,\n        )\n        download_response.raise_for_status()\n\n        if not download_response.content:\n            raise RuntimeError(\n                \"Query history CSV download returned zero-length content\"\n            )\n\n        return download_response.headers, download_response.content.decode()\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/scim_client.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\n\n\nclass ScimClient:\n    \"\"\"HTTP client for making authenticated SCIM v2 requests.\"\"\"\n\n    @staticmethod\n    def _headers(raw_token: str) -> dict[str, str]:\n        return {\n            **GENERAL_HEADERS,\n            \"Authorization\": f\"Bearer {raw_token}\",\n        }\n\n    @staticmethod\n    def get(path: str, raw_token: str) -> requests.Response:\n        return requests.get(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            headers=ScimClient._headers(raw_token),\n            timeout=60,\n        )\n\n    @staticmethod\n    def post(path: str, raw_token: str, json: dict) -> requests.Response:\n        return requests.post(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            json=json,\n            headers=ScimClient._headers(raw_token),\n            timeout=60,\n        )\n\n    @staticmethod\n    def put(path: str, raw_token: str, json: dict) -> requests.Response:\n        return requests.put(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            json=json,\n            headers=ScimClient._headers(raw_token),\n            timeout=60,\n        )\n\n    @staticmethod\n    def patch(path: str, raw_token: str, json: dict) -> requests.Response:\n        return requests.patch(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            json=json,\n            headers=ScimClient._headers(raw_token),\n            timeout=60,\n        )\n\n    @staticmethod\n    def delete(path: str, raw_token: str) -> requests.Response:\n        return requests.delete(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            headers=ScimClient._headers(raw_token),\n            timeout=60,\n        )\n\n    @staticmethod\n    def get_no_auth(path: str) -> requests.Response:\n        return requests.get(\n            f\"{API_SERVER_URL}/scim/v2{path}\",\n            headers=GENERAL_HEADERS,\n            timeout=60,\n        )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/scim_token.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestScimToken\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass ScimTokenManager:\n    @staticmethod\n    def create(\n        name: str,\n        user_performing_action: DATestUser,\n    ) -> DATestScimToken:\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/enterprise-settings/scim/token\",\n            json={\"name\": name},\n            headers=user_performing_action.headers,\n            timeout=60,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return DATestScimToken(\n            id=data[\"id\"],\n            name=data[\"name\"],\n            token_display=data[\"token_display\"],\n            is_active=data[\"is_active\"],\n            created_at=data[\"created_at\"],\n            last_used_at=data.get(\"last_used_at\"),\n            raw_token=data[\"raw_token\"],\n        )\n\n    @staticmethod\n    def get_active(\n        user_performing_action: DATestUser,\n    ) -> DATestScimToken | None:\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/enterprise-settings/scim/token\",\n            headers=user_performing_action.headers,\n            timeout=60,\n        )\n        if response.status_code == 404:\n            return None\n        response.raise_for_status()\n        data = response.json()\n        return DATestScimToken(\n            id=data[\"id\"],\n            name=data[\"name\"],\n            token_display=data[\"token_display\"],\n            is_active=data[\"is_active\"],\n            created_at=data[\"created_at\"],\n            last_used_at=data.get(\"last_used_at\"),\n        )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/settings.py",
    "content": "from typing import Any\nfrom typing import Dict\nfrom typing import Optional\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestSettings\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass SettingsManager:\n    @staticmethod\n    def get_settings(\n        user_performing_action: DATestUser,\n    ) -> tuple[Dict[str, Any], str]:\n        headers = user_performing_action.headers\n        headers.pop(\"Content-Type\", None)\n\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/settings\",\n            headers=headers,\n        )\n\n        if not response.ok:\n            return (\n                {},\n                f\"Failed to get settings - {response.json().get('detail', 'Unknown error')}\",\n            )\n\n        return response.json(), \"\"\n\n    @staticmethod\n    def update_settings(\n        settings: DATestSettings,\n        user_performing_action: DATestUser,\n    ) -> tuple[Dict[str, Any], str]:\n        headers = user_performing_action.headers\n        headers.pop(\"Content-Type\", None)\n\n        payload = settings.model_dump()\n        response = requests.put(\n            f\"{API_SERVER_URL}/admin/settings\",\n            json=payload,\n            headers=headers,\n        )\n\n        if not response.ok:\n            return (\n                {},\n                f\"Failed to update settings - {response.json().get('detail', 'Unknown error')}\",\n            )\n\n        return response.json(), \"\"\n\n    @staticmethod\n    def get_setting(\n        key: str,\n        user_performing_action: DATestUser,\n    ) -> Optional[Any]:\n        settings, error = SettingsManager.get_settings(user_performing_action)\n        if error:\n            return None\n        return settings.get(key)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/tenant.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\n\nimport jwt\nimport requests\n\nfrom onyx.server.manage.models import AllUsersResponse\nfrom onyx.server.models import FullUserSnapshot\nfrom onyx.server.models import InvitedUserSnapshot\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef generate_auth_token() -> str:\n    payload = {\n        \"iss\": \"control_plane\",\n        \"exp\": datetime.utcnow() + timedelta(minutes=5),\n        \"iat\": datetime.utcnow(),\n        \"scope\": \"tenant:create\",\n    }\n    token = jwt.encode(payload, \"\", algorithm=\"HS256\")\n    return token\n\n\nclass TenantManager:\n    @staticmethod\n    def get_all_users(\n        user_performing_action: DATestUser,\n    ) -> AllUsersResponse:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/users\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        data = response.json()\n        return AllUsersResponse(\n            accepted=[FullUserSnapshot(**user) for user in data[\"accepted\"]],\n            invited=[InvitedUserSnapshot(**user) for user in data[\"invited\"]],\n            slack_users=[FullUserSnapshot(**user) for user in data[\"slack_users\"]],\n            accepted_pages=data[\"accepted_pages\"],\n            invited_pages=data[\"invited_pages\"],\n            slack_users_pages=data[\"slack_users_pages\"],\n        )\n\n    @staticmethod\n    def verify_user_in_tenant(\n        user: DATestUser,\n        user_performing_action: DATestUser,\n    ) -> None:\n        all_users = TenantManager.get_all_users(user_performing_action)\n        for accepted_user in all_users.accepted:\n            if accepted_user.email == user.email and accepted_user.id == user.id:\n                return\n        raise ValueError(f\"User {user.email} not found in tenant\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/tool.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestTool\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass ToolManager:\n    @staticmethod\n    def list_tools(\n        user_performing_action: DATestUser,\n    ) -> list[DATestTool]:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/tool\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return [\n            DATestTool(\n                id=tool.get(\"id\"),\n                name=tool.get(\"name\"),\n                description=tool.get(\"description\"),\n                display_name=tool.get(\"display_name\"),\n                in_code_tool_id=tool.get(\"in_code_tool_id\"),\n            )\n            for tool in response.json()\n        ]\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/user.py",
    "content": "from copy import deepcopy\nfrom urllib.parse import urlencode\nfrom uuid import uuid4\n\nimport pytest\nimport requests\nfrom requests import HTTPError\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import ANONYMOUS_USER_EMAIL\nfrom onyx.configs.constants import ANONYMOUS_USER_UUID\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom onyx.server.documents.models import PaginatedReturn\nfrom onyx.server.manage.models import UserInfo\nfrom onyx.server.models import FullUserSnapshot\nfrom onyx.server.models import InvitedUserSnapshot\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.test_models import DATestUser\n\nDOMAIN = \"example.com\"\nDEFAULT_PASSWORD = \"TestPassword123!\"\n\n\ndef build_email(name: str) -> str:\n    return f\"{name}@example.com\"\n\n\nclass UserManager:\n    @staticmethod\n    def get_anonymous_user() -> DATestUser:\n        \"\"\"Get a DATestUser representing the anonymous user.\n\n        Anonymous users are real users in the database with LIMITED role.\n        They don't have login cookies - requests are made with GENERAL_HEADERS.\n        The anonymous_user_enabled setting must be True for these requests to work.\n        \"\"\"\n        return DATestUser(\n            id=ANONYMOUS_USER_UUID,\n            email=ANONYMOUS_USER_EMAIL,\n            password=\"\",\n            headers=GENERAL_HEADERS,\n            role=UserRole.LIMITED,\n            is_active=True,\n        )\n\n    @staticmethod\n    def create(\n        name: str | None = None,\n        email: str | None = None,\n    ) -> DATestUser:\n        if name is None:\n            name = f\"test{str(uuid4())}\"\n\n        if email is None:\n            email = build_email(name)\n\n        password = DEFAULT_PASSWORD\n\n        body = {\n            \"email\": email,\n            \"username\": email,\n            \"password\": password,\n        }\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/auth/register\",\n            json=body,\n            headers=GENERAL_HEADERS,\n        )\n        response.raise_for_status()\n\n        test_user = DATestUser(\n            id=response.json()[\"id\"],\n            email=email,\n            password=password,\n            headers=deepcopy(GENERAL_HEADERS),\n            # fill as basic for now, the `login_as_user` call will\n            # fill it in correctly\n            role=UserRole.BASIC,\n            is_active=True,\n        )\n        print(f\"Created user {test_user.email}\")\n\n        return UserManager.login_as_user(test_user)\n\n    @staticmethod\n    def login_as_user(test_user: DATestUser) -> DATestUser:\n        data = urlencode(\n            {\n                \"username\": test_user.email,\n                \"password\": test_user.password,\n            }\n        )\n        headers = test_user.headers.copy()\n        headers[\"Content-Type\"] = \"application/x-www-form-urlencoded\"\n\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/auth/login\",\n            data=data,\n            headers=headers,\n        )\n\n        response.raise_for_status()\n\n        cookies = response.cookies.get_dict()\n        session_cookie = cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME)\n\n        if not session_cookie:\n            raise Exception(\"Failed to login\")\n\n        # Set cookies in the headers\n        test_user.headers[\"Cookie\"] = f\"fastapiusersauth={session_cookie}; \"\n        test_user.cookies = {\"fastapiusersauth\": session_cookie}\n\n        # Get user role from /me endpoint\n        me_response = requests.get(\n            url=f\"{API_SERVER_URL}/me\",\n            headers=test_user.headers,\n            cookies=test_user.cookies,\n        )\n        me_response.raise_for_status()\n        me_response_json = me_response.json()\n        test_user.id = me_response_json[\"id\"]\n        role = UserRole(me_response_json[\"role\"])\n        test_user.role = role\n\n        return test_user\n\n    @staticmethod\n    def get_permissions(user: DATestUser) -> list[str]:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/me/permissions\",\n            headers=user.headers,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def is_role(\n        user_to_verify: DATestUser,\n        target_role: UserRole,\n    ) -> bool:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/me\",\n            headers=user_to_verify.headers,\n            cookies=user_to_verify.cookies,\n        )\n\n        if user_to_verify.is_active is False:\n            with pytest.raises(HTTPError):\n                response.raise_for_status()\n            return user_to_verify.role == target_role\n        else:\n            response.raise_for_status()\n\n        role_from_response = response.json().get(\"role\", None)\n\n        if role_from_response is None:\n            return user_to_verify.role == target_role\n\n        return target_role == UserRole(role_from_response)\n\n    @staticmethod\n    def set_role(\n        user_to_set: DATestUser,\n        target_role: UserRole,\n        user_performing_action: DATestUser,\n        explicit_override: bool = False,\n    ) -> DATestUser:\n        response = requests.patch(\n            url=f\"{API_SERVER_URL}/manage/set-user-role\",\n            json={\n                \"user_email\": user_to_set.email,\n                \"new_role\": target_role.value,\n                \"explicit_override\": explicit_override,\n            },\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        new_user_updated_role = DATestUser(\n            id=user_to_set.id,\n            email=user_to_set.email,\n            password=user_to_set.password,\n            headers=user_to_set.headers,\n            role=target_role,\n            is_active=user_to_set.is_active,\n        )\n        return new_user_updated_role\n\n    # TODO: Add a way to check invited status\n    @staticmethod\n    def is_status(user_to_verify: DATestUser, target_status: bool) -> bool:\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/me\",\n            headers=user_to_verify.headers,\n        )\n\n        if target_status is False:\n            with pytest.raises(HTTPError):\n                response.raise_for_status()\n        else:\n            response.raise_for_status()\n\n        is_active = response.json().get(\"is_active\", None)\n        if is_active is None:\n            return user_to_verify.is_active == target_status\n        return target_status == is_active\n\n    @staticmethod\n    def set_status(\n        user_to_set: DATestUser,\n        target_status: bool,\n        user_performing_action: DATestUser,\n    ) -> DATestUser:\n        url_substring: str\n        if target_status is True:\n            url_substring = \"activate\"\n        elif target_status is False:\n            url_substring = \"deactivate\"\n        response = requests.patch(\n            url=f\"{API_SERVER_URL}/manage/admin/{url_substring}-user\",\n            json={\"user_email\": user_to_set.email},\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        new_user_updated_status = DATestUser(\n            id=user_to_set.id,\n            email=user_to_set.email,\n            password=user_to_set.password,\n            headers=user_to_set.headers,\n            role=user_to_set.role,\n            is_active=target_status,\n        )\n        return new_user_updated_status\n\n    @staticmethod\n    def create_test_users(\n        user_performing_action: DATestUser,\n        user_name_prefix: str,\n        count: int,\n        role: UserRole = UserRole.BASIC,\n        is_active: bool | None = None,\n    ) -> list[DATestUser]:\n        users_list = []\n        for i in range(1, count + 1):\n            user = UserManager.create(name=f\"{user_name_prefix}_{i}\")\n            if role != UserRole.BASIC:\n                user = UserManager.set_role(user, role, user_performing_action)\n            if is_active is not None:\n                user = UserManager.set_status(user, is_active, user_performing_action)\n            users_list.append(user)\n        return users_list\n\n    @staticmethod\n    def get_user_page(\n        user_performing_action: DATestUser,\n        page_num: int = 0,\n        page_size: int = 10,\n        search_query: str | None = None,\n        role_filter: list[UserRole] | None = None,\n        is_active_filter: bool | None = None,\n    ) -> PaginatedReturn[FullUserSnapshot]:\n        query_params: dict[str, str | list[str] | int] = {\n            \"page_num\": page_num,\n            \"page_size\": page_size,\n        }\n        if search_query:\n            query_params[\"q\"] = search_query\n        if role_filter:\n            query_params[\"roles\"] = [role.value for role in role_filter]\n        if is_active_filter is not None:\n            query_params[\"is_active\"] = is_active_filter\n\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/users/accepted?{urlencode(query_params, doseq=True)}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        data = response.json()\n        paginated_result = PaginatedReturn(\n            items=[FullUserSnapshot(**user) for user in data[\"items\"]],\n            total_items=data[\"total_items\"],\n        )\n        return paginated_result\n\n    @staticmethod\n    def invite_user(\n        user_to_invite_email: str, user_performing_action: DATestUser\n    ) -> None:\n        \"\"\"Invite a user by email to join the organization.\n\n        Args:\n            user_to_invite_email: Email of the user to invite\n            user_performing_action: User with admin permissions performing the invitation\n        \"\"\"\n        response = requests.put(\n            url=f\"{API_SERVER_URL}/manage/admin/users\",\n            headers=user_performing_action.headers,\n            json={\"emails\": [user_to_invite_email]},\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def accept_invitation(tenant_id: str, user_performing_action: DATestUser) -> None:\n        \"\"\"Accept an invitation to join the organization.\n\n        Args:\n            tenant_id: ID of the tenant/organization to accept invitation for\n            user_performing_action: User accepting the invitation\n        \"\"\"\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/tenants/users/invite/accept\",\n            headers=user_performing_action.headers,\n            json={\"tenant_id\": tenant_id},\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def get_invited_users(\n        user_performing_action: DATestUser,\n    ) -> list[InvitedUserSnapshot]:\n        \"\"\"Get a list of all invited users.\n\n        Args:\n            user_performing_action: User with admin permissions performing the action\n\n        Returns:\n            List of invited user snapshots\n        \"\"\"\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/manage/users/invited\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        return [InvitedUserSnapshot(**user) for user in response.json()]\n\n    @staticmethod\n    def get_user_info(user_performing_action: DATestUser) -> UserInfo:\n        \"\"\"Get user info for the current user.\n\n        Args:\n            user_performing_action: User performing the action\n        \"\"\"\n        response = requests.get(\n            url=f\"{API_SERVER_URL}/me\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return UserInfo(**response.json())\n"
  },
  {
    "path": "backend/tests/integration/common_utils/managers/user_group.py",
    "content": "import time\nfrom uuid import uuid4\n\nimport requests\n\nfrom ee.onyx.server.user_group.models import UserGroup\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import DATestUserGroup\n\n\nclass UserGroupManager:\n    @staticmethod\n    def create(\n        user_performing_action: DATestUser,\n        name: str | None = None,\n        user_ids: list[str] | None = None,\n        cc_pair_ids: list[int] | None = None,\n    ) -> DATestUserGroup:\n        name = f\"{name}-user-group\" if name else f\"test-user-group-{uuid4()}\"\n\n        request = {\n            \"name\": name,\n            \"user_ids\": user_ids or [],\n            \"cc_pair_ids\": cc_pair_ids or [],\n        }\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/user-group\",\n            json=request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        test_user_group = DATestUserGroup(\n            id=response.json()[\"id\"],\n            name=response.json()[\"name\"],\n            user_ids=[user[\"id\"] for user in response.json()[\"users\"]],\n            cc_pair_ids=[cc_pair[\"id\"] for cc_pair in response.json()[\"cc_pairs\"]],\n        )\n        return test_user_group\n\n    @staticmethod\n    def edit(\n        user_group: DATestUserGroup,\n        user_performing_action: DATestUser,\n    ) -> None:\n        response = requests.patch(\n            f\"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}\",\n            json=user_group.model_dump(),\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def delete(\n        user_group: DATestUserGroup,\n        user_performing_action: DATestUser,\n    ) -> None:\n        response = requests.delete(\n            f\"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def add_users(\n        user_group: DATestUserGroup,\n        user_ids: list[str],\n        user_performing_action: DATestUser,\n    ) -> DATestUserGroup:\n        request = {\n            \"user_ids\": user_ids,\n        }\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/add-users\",\n            json=request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n        user_group.user_ids = [user[\"id\"] for user in response.json()[\"users\"]]\n        user_group.cc_pair_ids = [\n            cc_pair[\"id\"] for cc_pair in response.json()[\"cc_pairs\"]\n        ]\n        user_group.name = response.json()[\"name\"]\n        return user_group\n\n    @staticmethod\n    def set_curator_status(\n        test_user_group: DATestUserGroup,\n        user_to_set_as_curator: DATestUser,\n        user_performing_action: DATestUser,\n        is_curator: bool = True,\n    ) -> None:\n        set_curator_request = {\n            \"user_id\": user_to_set_as_curator.id,\n            \"is_curator\": is_curator,\n        }\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/user-group/{test_user_group.id}/set-curator\",\n            json=set_curator_request,\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n\n    @staticmethod\n    def get_permissions(\n        user_group: DATestUserGroup,\n        user_performing_action: DATestUser,\n    ) -> list[str]:\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/permissions\",\n            headers=user_performing_action.headers,\n        )\n        response.raise_for_status()\n        return response.json()\n\n    @staticmethod\n    def get_all(\n        user_performing_action: DATestUser,\n        include_default: bool = False,\n    ) -> list[UserGroup]:\n        params: dict[str, str] = {}\n        if include_default:\n            params[\"include_default\"] = \"true\"\n        response = requests.get(\n            f\"{API_SERVER_URL}/manage/admin/user-group\",\n            headers=user_performing_action.headers,\n            params=params,\n        )\n        response.raise_for_status()\n        return [UserGroup(**ug) for ug in response.json()]\n\n    @staticmethod\n    def verify(\n        user_group: DATestUserGroup,\n        user_performing_action: DATestUser,\n        verify_deleted: bool = False,\n    ) -> None:\n        all_user_groups = UserGroupManager.get_all(user_performing_action)\n        for fetched_user_group in all_user_groups:\n            if user_group.id == fetched_user_group.id:\n                if verify_deleted:\n                    raise ValueError(\n                        f\"User group {user_group.id} found but should be deleted\"\n                    )\n                fetched_cc_ids = {cc_pair.id for cc_pair in fetched_user_group.cc_pairs}\n                fetched_user_ids = {user.id for user in fetched_user_group.users}\n                user_group_cc_ids = set(user_group.cc_pair_ids)\n                user_group_user_ids = set(user_group.user_ids)\n                if (\n                    fetched_cc_ids == user_group_cc_ids\n                    and fetched_user_ids == user_group_user_ids\n                ):\n                    return\n        if not verify_deleted:\n            raise ValueError(f\"User group {user_group.id} not found\")\n\n    @staticmethod\n    def wait_for_sync(\n        user_performing_action: DATestUser,\n        user_groups_to_check: list[DATestUserGroup] | None = None,\n    ) -> None:\n        start = time.time()\n        while True:\n            user_groups = UserGroupManager.get_all(user_performing_action)\n            if user_groups_to_check:\n                check_ids = {user_group.id for user_group in user_groups_to_check}\n                user_group_ids = {user_group.id for user_group in user_groups}\n                if not check_ids.issubset(user_group_ids):\n                    raise RuntimeError(\"User group not found\")\n                user_groups = [\n                    user_group\n                    for user_group in user_groups\n                    if user_group.id in check_ids\n                ]\n            if all(ug.is_up_to_date for ug in user_groups):\n                print(\"User groups synced successfully.\")\n                return\n\n            if time.time() - start > MAX_DELAY:\n                raise TimeoutError(\n                    f\"User groups were not synced within the {MAX_DELAY} seconds\"\n                )\n            else:\n                print(\"User groups were not synced yet, waiting...\")\n            time.sleep(2)\n\n    @staticmethod\n    def wait_for_deletion_completion(\n        user_groups_to_check: list[DATestUserGroup],\n        user_performing_action: DATestUser,\n    ) -> None:\n        start = time.time()\n        user_group_ids_to_check = {user_group.id for user_group in user_groups_to_check}\n        while True:\n            fetched_user_groups = UserGroupManager.get_all(user_performing_action)\n            fetched_user_group_ids = {\n                user_group.id for user_group in fetched_user_groups\n            }\n            if not user_group_ids_to_check.intersection(fetched_user_group_ids):\n                return\n\n            if time.time() - start > MAX_DELAY:\n                raise TimeoutError(\n                    f\"User groups deletion was not completed within the {MAX_DELAY} seconds\"\n                )\n            else:\n                print(\"Some user groups are still being deleted, waiting...\")\n            time.sleep(2)\n"
  },
  {
    "path": "backend/tests/integration/common_utils/reset.py",
    "content": "import logging\nimport os\nimport time\nfrom types import SimpleNamespace\n\nimport psycopg2\nimport requests\n\nfrom alembic import command\nfrom alembic.config import Config\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SYNC_DB_API\nfrom onyx.db.engine.tenant_utils import get_all_tenant_ids\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.db.swap_index import check_and_perform_index_swap\nfrom onyx.document_index.document_index_utils import get_multipass_config\nfrom onyx.document_index.vespa.index import DOCUMENT_ID_ENDPOINT\nfrom onyx.document_index.vespa.index import VespaIndex\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.indexing.models import IndexingSetting\nfrom onyx.setup import setup_document_indices\nfrom onyx.setup import setup_postgres\nfrom onyx.utils.logger import setup_logger\nfrom tests.integration.common_utils.timeout import run_with_timeout_multiproc\n\nlogger = setup_logger()\n\n\ndef _run_migrations(\n    database_url: str,\n    config_name: str,\n    direction: str = \"upgrade\",\n    revision: str = \"head\",\n    schema: str = \"public\",\n) -> None:\n    # hide info logs emitted during migration\n    logging.getLogger(\"alembic\").setLevel(logging.CRITICAL)\n\n    # Create an Alembic configuration object\n    alembic_cfg = Config(\"alembic.ini\")\n    alembic_cfg.set_section_option(\"logger_alembic\", \"level\", \"WARN\")\n    alembic_cfg.attributes[\"configure_logger\"] = False\n    alembic_cfg.config_ini_section = config_name\n\n    alembic_cfg.cmd_opts = SimpleNamespace()  # type: ignore\n    alembic_cfg.cmd_opts.x = [f\"schema={schema}\"]  # type: ignore\n\n    # Set the SQLAlchemy URL in the Alembic configuration\n    alembic_cfg.set_main_option(\"sqlalchemy.url\", database_url)\n\n    # Run the migration\n    if direction == \"upgrade\":\n        command.upgrade(alembic_cfg, revision)\n    elif direction == \"downgrade\":\n        command.downgrade(alembic_cfg, revision)\n    else:\n        raise ValueError(\n            f\"Invalid direction: {direction}. Must be 'upgrade' or 'downgrade'.\"\n        )\n\n    logging.getLogger(\"alembic\").setLevel(logging.INFO)\n\n\ndef downgrade_postgres(\n    database: str = \"postgres\",\n    schema: str = \"public\",\n    config_name: str = \"alembic\",\n    revision: str = \"base\",\n    clear_data: bool = False,\n) -> None:\n    \"\"\"Downgrade Postgres database to base state.\"\"\"\n    if clear_data:\n        if revision != \"base\":\n            raise ValueError(\"Clearing data without rolling back to base state\")\n\n        conn = psycopg2.connect(\n            dbname=database,\n            user=POSTGRES_USER,\n            password=POSTGRES_PASSWORD,\n            host=POSTGRES_HOST,\n            port=POSTGRES_PORT,\n            application_name=\"downgrade_postgres\",\n        )\n        conn.autocommit = True  # Need autocommit for dropping schema\n        cur = conn.cursor()\n\n        # Close any existing connections to the schema before dropping\n        cur.execute(\n            f\"\"\"\n            SELECT pg_terminate_backend(pg_stat_activity.pid)\n            FROM pg_stat_activity\n            WHERE pg_stat_activity.datname = '{database}'\n            AND pg_stat_activity.state = 'idle in transaction'\n            AND pid <> pg_backend_pid();\n        \"\"\"\n        )\n\n        # Drop and recreate the public schema - this removes ALL objects\n        cur.execute(f\"DROP SCHEMA {schema} CASCADE;\")\n        cur.execute(f\"CREATE SCHEMA {schema};\")\n\n        # Restore default privileges\n        cur.execute(f\"GRANT ALL ON SCHEMA {schema} TO postgres;\")\n        cur.execute(f\"GRANT ALL ON SCHEMA {schema} TO public;\")\n\n        cur.close()\n        conn.close()\n\n        return\n\n    # Downgrade to base\n    conn_str = build_connection_string(\n        db=database,\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n        db_api=SYNC_DB_API,\n    )\n    _run_migrations(\n        conn_str,\n        config_name,\n        direction=\"downgrade\",\n        revision=revision,\n    )\n\n\ndef upgrade_postgres(\n    database: str = \"postgres\", config_name: str = \"alembic\", revision: str = \"head\"\n) -> None:\n    \"\"\"Upgrade Postgres database to latest version.\"\"\"\n    conn_str = build_connection_string(\n        db=database,\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n        db_api=SYNC_DB_API,\n        app_name=\"upgrade_postgres\",\n    )\n    _run_migrations(\n        conn_str,\n        config_name,\n        direction=\"upgrade\",\n        revision=revision,\n    )\n\n\ndef drop_multitenant_postgres(\n    database: str = \"postgres\",\n) -> None:\n    \"\"\"Reset the Postgres database.\"\"\"\n    # this seems to hang due to locking issues, so run with a timeout with a few retries\n    NUM_TRIES = 10\n    TIMEOUT = 40\n    success = False\n    for _ in range(NUM_TRIES):\n        logger.info(f\"drop_multitenant_postgres_task starting... ({_ + 1}/{NUM_TRIES})\")\n        try:\n            run_with_timeout_multiproc(\n                drop_multitenant_postgres_task,\n                TIMEOUT,\n                kwargs={\n                    \"dbname\": database,\n                },\n            )\n            success = True\n            break\n        except TimeoutError:\n            logger.warning(\n                f\"drop_multitenant_postgres_task timed out, retrying... ({_ + 1}/{NUM_TRIES})\"\n            )\n        except RuntimeError:\n            logger.warning(\n                f\"drop_multitenant_postgres_task exceptioned, retrying... ({_ + 1}/{NUM_TRIES})\"\n            )\n\n    if not success:\n        raise RuntimeError(\"drop_multitenant_postgres_task failed after 10 timeouts.\")\n\n\ndef drop_multitenant_postgres_task(dbname: str) -> None:\n    conn = psycopg2.connect(\n        dbname=dbname,\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n        connect_timeout=10,\n        application_name=\"drop_multitenant_postgres_task\",\n    )\n\n    conn.autocommit = True\n    cur = conn.cursor()\n\n    logger.info(\"Selecting tenant schemas.\")\n    # Get all tenant schemas\n    cur.execute(\n        \"\"\"\n        SELECT schema_name\n        FROM information_schema.schemata\n        WHERE schema_name LIKE 'tenant_%'\n        \"\"\"\n    )\n    tenant_schemas = cur.fetchall()\n\n    # Drop all tenant schemas\n    logger.info(\"Dropping all tenant schemas.\")\n    for schema in tenant_schemas:\n        # Close any existing connections to the schema before dropping\n        cur.execute(\n            \"\"\"\n            SELECT pg_terminate_backend(pg_stat_activity.pid)\n            FROM pg_stat_activity\n            WHERE pg_stat_activity.datname = 'postgres'\n            AND pg_stat_activity.state = 'idle in transaction'\n            AND pid <> pg_backend_pid();\n        \"\"\"\n        )\n\n        schema_name = schema[0]\n        cur.execute(f'DROP SCHEMA \"{schema_name}\" CASCADE')\n\n    # Drop tables in the public schema\n    logger.info(\"Selecting public schema tables.\")\n    cur.execute(\n        \"\"\"\n        SELECT tablename FROM pg_tables\n        WHERE schemaname = 'public'\n        \"\"\"\n    )\n    public_tables = cur.fetchall()\n\n    logger.info(\"Dropping public schema tables.\")\n    for table in public_tables:\n        table_name = table[0]\n        cur.execute(f'DROP TABLE IF EXISTS public.\"{table_name}\" CASCADE')\n\n    cur.close()\n    conn.close()\n\n\ndef reset_postgres(\n    database: str = \"postgres\",\n    config_name: str = \"alembic\",\n    setup_onyx: bool = True,\n) -> None:\n    \"\"\"Reset the Postgres database.\"\"\"\n    # this seems to hang due to locking issues, so run with a timeout with a few retries\n    NUM_TRIES = 10\n    TIMEOUT = 40\n    success = False\n    for _ in range(NUM_TRIES):\n        logger.info(f\"Downgrading Postgres... ({_ + 1}/{NUM_TRIES})\")\n        try:\n            run_with_timeout_multiproc(\n                downgrade_postgres,\n                TIMEOUT,\n                kwargs={\n                    \"database\": database,\n                    \"config_name\": config_name,\n                    \"revision\": \"base\",\n                    \"clear_data\": True,\n                },\n            )\n            success = True\n            break\n        except TimeoutError:\n            logger.warning(\n                f\"Postgres downgrade timed out, retrying... ({_ + 1}/{NUM_TRIES})\"\n            )\n        except RuntimeError:\n            logger.warning(\n                f\"Postgres downgrade exceptioned, retrying... ({_ + 1}/{NUM_TRIES})\"\n            )\n\n    if not success:\n        raise RuntimeError(\"Postgres downgrade failed after 10 timeouts.\")\n\n    logger.info(\"Upgrading Postgres...\")\n    upgrade_postgres(database=database, config_name=config_name, revision=\"head\")\n    if setup_onyx:\n        logger.info(\"Setting up Postgres...\")\n        with get_session_with_current_tenant() as db_session:\n            setup_postgres(db_session)\n\n\ndef reset_vespa() -> None:\n    \"\"\"Wipe all data from the Vespa index.\"\"\"\n\n    with get_session_with_current_tenant() as db_session:\n        # swap to the correct default model\n        check_and_perform_index_swap(db_session)\n\n        search_settings = get_current_search_settings(db_session)\n        multipass_config = get_multipass_config(search_settings)\n        index_name = search_settings.index_name\n\n    success = setup_document_indices(\n        document_indices=[\n            VespaIndex(\n                index_name=index_name,\n                secondary_index_name=None,\n                large_chunks_enabled=multipass_config.enable_large_chunks,\n                secondary_large_chunks_enabled=None,\n            )\n        ],\n        index_setting=IndexingSetting.from_db_model(search_settings),\n        secondary_index_setting=None,\n    )\n    if not success:\n        raise RuntimeError(\"Could not connect to Vespa within the specified timeout.\")\n\n    for _ in range(5):\n        try:\n            continuation = None\n            should_continue = True\n            while should_continue:\n                params = {\"selection\": \"true\", \"cluster\": \"danswer_index\"}\n                if continuation:\n                    params = {**params, \"continuation\": continuation}\n                response = requests.delete(\n                    DOCUMENT_ID_ENDPOINT.format(index_name=index_name), params=params\n                )\n                response.raise_for_status()\n\n                response_json = response.json()\n\n                continuation = response_json.get(\"continuation\")\n                should_continue = bool(continuation)\n\n            break\n        except Exception as e:\n            print(f\"Error deleting documents: {e}\")\n            time.sleep(5)\n\n\ndef reset_postgres_multitenant() -> None:\n    \"\"\"Reset the Postgres database for all tenants in a multitenant setup.\"\"\"\n\n    drop_multitenant_postgres()\n    reset_postgres(config_name=\"schema_private\", setup_onyx=False)\n\n\ndef reset_vespa_multitenant() -> None:\n    \"\"\"Wipe all data from the Vespa index for all tenants.\"\"\"\n\n    for tenant_id in get_all_tenant_ids():\n        with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n            # swap to the correct default model for each tenant\n            check_and_perform_index_swap(db_session)\n\n            search_settings = get_current_search_settings(db_session)\n            multipass_config = get_multipass_config(search_settings)\n            index_name = search_settings.index_name\n\n        success = setup_document_indices(\n            document_indices=[\n                VespaIndex(\n                    index_name=index_name,\n                    secondary_index_name=None,\n                    large_chunks_enabled=multipass_config.enable_large_chunks,\n                    secondary_large_chunks_enabled=None,\n                )\n            ],\n            index_setting=IndexingSetting.from_db_model(search_settings),\n            secondary_index_setting=None,\n        )\n\n        if not success:\n            raise RuntimeError(\n                f\"Could not connect to Vespa for tenant {tenant_id} within the specified timeout.\"\n            )\n\n        for _ in range(5):\n            try:\n                continuation = None\n                should_continue = True\n                while should_continue:\n                    params = {\"selection\": \"true\", \"cluster\": \"danswer_index\"}\n                    if continuation:\n                        params = {**params, \"continuation\": continuation}\n                    response = requests.delete(\n                        DOCUMENT_ID_ENDPOINT.format(index_name=index_name),\n                        params=params,\n                    )\n                    response.raise_for_status()\n\n                    response_json = response.json()\n\n                    continuation = response_json.get(\"continuation\")\n                    should_continue = bool(continuation)\n\n                break\n            except Exception as e:\n                print(f\"Error deleting documents for tenant {tenant_id}: {e}\")\n                time.sleep(5)\n\n\ndef reset_file_store() -> None:\n    \"\"\"Reset the FileStore.\"\"\"\n    filestore = get_default_file_store()\n    for file_record in filestore.list_files_by_prefix(\"\"):\n        filestore.delete_file(file_record.file_id)\n\n\ndef reset_all() -> None:\n    if os.environ.get(\"SKIP_RESET\", \"\").lower() == \"true\":\n        logger.info(\"Skipping reset.\")\n        return\n\n    logger.info(\"Resetting Postgres...\")\n    reset_postgres()\n    logger.info(\"Resetting Vespa...\")\n    reset_vespa()\n    logger.info(\"Resetting FileStore...\")\n    reset_file_store()\n\n\ndef reset_all_multitenant() -> None:\n    \"\"\"Reset both Postgres and Vespa for all tenants.\n\n    Honors SKIP_RESET env var to allow callers (e.g., CI) to disable\n    heavy resets entirely for faster end-to-end runs.\n    \"\"\"\n    if os.environ.get(\"SKIP_RESET\", \"\").lower() == \"true\":\n        logger.info(\"SKIPPING multitenant reset due to SKIP_RESET=true\")\n        return\n\n    logger.info(\"Resetting Postgres for all tenants...\")\n    reset_postgres_multitenant()\n    logger.info(\"Resetting Vespa for all tenants...\")\n    reset_vespa_multitenant()\n    logger.info(\"Finished resetting all.\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/test_document_utils.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import TextSection\n\n\ndef create_test_document(\n    doc_id: str | None = None,\n    text: str = \"Test content\",\n    link: str = \"http://example.com\",\n    source: DocumentSource = DocumentSource.MOCK_CONNECTOR,\n    metadata: dict | None = None,\n) -> Document:\n    \"\"\"Create a test document with the given parameters.\n\n    Args:\n        doc_id: Optional document ID. If not provided, a random UUID will be generated.\n        text: The text content of the document. Defaults to \"Test content\".\n        link: The link for the document section. Defaults to \"http://example.com\".\n        source: The document source. Defaults to MOCK_CONNECTOR.\n        metadata: Optional metadata dictionary. Defaults to empty dict.\n    \"\"\"\n    doc_id = doc_id or f\"test-doc-{uuid.uuid4()}\"\n    return Document(\n        id=doc_id,\n        sections=[TextSection(text=text, link=link)],\n        source=source,\n        semantic_identifier=doc_id,\n        doc_updated_at=datetime.now(timezone.utc),\n        metadata=metadata or {},\n    )\n\n\ndef create_test_document_failure(\n    doc_id: str,\n    failure_message: str = \"Simulated failure\",\n    document_link: str | None = None,\n) -> ConnectorFailure:\n    \"\"\"Create a test document failure with the given parameters.\n\n    Args:\n        doc_id: The ID of the document that failed.\n        failure_message: The failure message. Defaults to \"Simulated failure\".\n        document_link: Optional link to the failed document.\n    \"\"\"\n    return ConnectorFailure(\n        failed_document=DocumentFailure(\n            document_id=doc_id,\n            document_link=document_link,\n        ),\n        failure_message=failure_message,\n    )\n"
  },
  {
    "path": "backend/tests/integration/common_utils/test_file_utils.py",
    "content": "import io\n\nfrom PIL import Image\n\n\ndef create_test_image(\n    width: int = 1,\n    height: int = 1,\n    color: str = \"white\",\n    format: str = \"PNG\",\n) -> io.BytesIO:\n    \"\"\"Create a test image file in memory for file attachment testing.\n\n    Args:\n        width: Width of the image in pixels. Defaults to 1.\n        height: Height of the image in pixels. Defaults to 1.\n        color: Color of the image. Defaults to \"white\".\n        format: Image format (PNG, JPEG, etc.). Defaults to \"PNG\".\n\n    Returns:\n        A BytesIO object containing the image data, positioned at the start.\n    \"\"\"\n    image = Image.new(\"RGB\", (width, height), color=color)\n    image_file = io.BytesIO()\n    image.save(image_file, format=format)\n    image_file.seek(0)\n    return image_file\n\n\ndef create_test_text_file(content: str | bytes) -> io.BytesIO:\n    \"\"\"Create a test text file in memory for file attachment testing.\n\n    Args:\n        content: The text content of the file. Can be string or bytes.\n\n    Returns:\n        A BytesIO object containing the text data, positioned at the start.\n    \"\"\"\n    if isinstance(content, str):\n        content = content.encode(\"utf-8\")\n    text_file = io.BytesIO(content)\n    text_file.seek(0)\n    return text_file\n"
  },
  {
    "path": "backend/tests/integration/common_utils/test_models.py",
    "content": "from dataclasses import dataclass\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any\nfrom uuid import UUID\n\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import MessageType\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom onyx.server.documents.models import IndexAttemptSnapshot\nfrom onyx.server.documents.models import IndexingStatus\nfrom onyx.server.documents.models import InputType\nfrom onyx.server.query_and_chat.streaming_models import GeneratedImage\n\n\"\"\"\nThese data models are used to represent the data on the testing side of things.\nThis means the flow is:\n1. Make request that changes data in db\n2. Make a change to the testing model\n3. Retrieve data from db\n4. Compare db data with testing model to verify\n\"\"\"\n\n\nclass DATestPAT(BaseModel):\n    \"\"\"Personal Access Token model for testing.\"\"\"\n\n    id: int\n    name: str\n    token: str | None = None  # Raw token - only present on initial creation\n    token_display: str\n    created_at: str\n    expires_at: str | None = None\n    last_used_at: str | None = None\n\n\nclass DATestScimToken(BaseModel):\n    \"\"\"SCIM bearer token model for testing.\"\"\"\n\n    id: int\n    name: str\n    raw_token: str | None = None  # Only present on initial creation\n    token_display: str\n    is_active: bool\n    created_at: str\n    last_used_at: str | None = None\n\n\nclass DATestAPIKey(BaseModel):\n    api_key_id: int\n    api_key_display: str\n    api_key: str | None = None  # only present on initial creation\n    api_key_name: str | None = None\n    api_key_role: UserRole\n\n    user_id: UUID\n    headers: dict\n\n\nclass DATestUser(BaseModel):\n    id: str\n    email: str\n    password: str\n    headers: dict\n    role: UserRole\n    is_active: bool\n    cookies: dict = {}\n\n\nclass DATestPersonaLabel(BaseModel):\n    id: int | None = None\n    name: str\n\n\nclass DATestCredential(BaseModel):\n    id: int\n    name: str\n    credential_json: dict[str, Any]\n    admin_public: bool\n    source: DocumentSource\n    curator_public: bool\n    groups: list[int]\n\n\nclass DATestConnector(BaseModel):\n    id: int\n    name: str\n    source: DocumentSource\n    input_type: InputType\n    connector_specific_config: dict[str, Any]\n    groups: list[int] | None = None\n    access_type: AccessType | None = None\n\n\nclass SimpleTestDocument(BaseModel):\n    id: str\n    content: str\n    image_file_id: str | None = None\n\n\nclass DATestCCPair(BaseModel):\n    id: int\n    name: str\n    connector_id: int\n    credential_id: int\n    access_type: AccessType\n    groups: list[int]\n    documents: list[SimpleTestDocument] = Field(default_factory=list)\n\n\nclass DATestUserGroup(BaseModel):\n    id: int\n    name: str\n    user_ids: list[str]\n    cc_pair_ids: list[int]\n\n\nclass DATestLLMProvider(BaseModel):\n    id: int\n    name: str\n    provider: str\n    api_key: str\n    default_model_name: str | None = None\n    is_public: bool\n    is_auto_mode: bool = False\n    groups: list[int]\n    personas: list[int]\n    api_base: str | None = None\n    api_version: str | None = None\n\n\nclass DATestImageGenerationConfig(BaseModel):\n    image_provider_id: str\n    model_configuration_id: int\n    model_name: str\n    llm_provider_id: int\n    llm_provider_name: str\n    is_default: bool\n\n\nclass DATestDocumentSet(BaseModel):\n    id: int\n    name: str\n    description: str\n    cc_pair_ids: list[int] = Field(default_factory=list)\n    is_public: bool\n    is_up_to_date: bool\n    users: list[str] = Field(default_factory=list)\n    groups: list[int] = Field(default_factory=list)\n    federated_connectors: list[dict[str, Any]] = Field(default_factory=list)\n\n\nclass DATestPersona(BaseModel):\n    id: int\n    name: str\n    description: str\n    is_public: bool\n    document_set_ids: list[int]\n    tool_ids: list[int]\n    llm_model_provider_override: str | None\n    llm_model_version_override: str | None\n    users: list[str]\n    groups: list[int]\n    label_ids: list[int]\n    is_featured: bool = False\n\n    # Embedded prompt fields (no longer separate prompt_ids)\n    system_prompt: str | None = None\n    task_prompt: str | None = None\n    datetime_aware: bool = True\n\n\nclass DATestChatMessage(BaseModel):\n    id: int\n    chat_session_id: UUID\n    parent_message_id: int | None\n    message: str\n    message_type: MessageType | None = None\n    files: list | None = None\n\n\nclass DATestChatSession(BaseModel):\n    id: UUID\n    persona_id: int\n    description: str\n\n\nclass DAQueryHistoryEntry(DATestChatSession):\n    feedback_type: QAFeedbackType | None\n\n\nclass ToolName(str, Enum):\n    INTERNET_SEARCH = \"internet_search\"\n    INTERNAL_SEARCH = \"run_search\"\n    IMAGE_GENERATION = \"generate_image\"\n\n\nclass ToolResult(BaseModel):\n    tool_name: ToolName\n\n    queries: list[str] = Field(default_factory=list)\n    documents: list[SavedSearchDoc] = Field(default_factory=list)\n    images: list[GeneratedImage] = Field(default_factory=list)\n\n\nclass ToolCallDebug(BaseModel):\n    tool_call_id: str\n    tool_name: str\n    tool_args: dict[str, Any]\n\n\nclass ErrorResponse(BaseModel):\n    error: str\n    stack_trace: str\n\n\nclass StreamedResponse(BaseModel):\n    full_message: str\n    assistant_message_id: int\n    top_documents: list[SearchDoc]\n    used_tools: list[ToolResult]\n    tool_call_debug: list[ToolCallDebug] = Field(default_factory=list)\n    error: ErrorResponse | None = None\n\n    # Track heartbeat packets for image generation and other tools\n    heartbeat_packets: list[dict[str, Any]]\n\n\nclass DATestGatingType(str, Enum):\n    FULL = \"full\"\n    PARTIAL = \"partial\"\n    NONE = \"none\"\n\n\nclass DATestSettings(BaseModel):\n    \"\"\"General settings\"\"\"\n\n    # is float to allow for fractional days for easier automated testing\n    maximum_chat_retention_days: float | None = None\n    gpu_enabled: bool | None = None\n    product_gating: DATestGatingType = DATestGatingType.NONE\n    anonymous_user_enabled: bool | None = None\n    image_extraction_and_analysis_enabled: bool | None = False\n    search_time_image_analysis_enabled: bool | None = False\n\n\n@dataclass\nclass DATestIndexAttempt:\n    id: int\n    status: IndexingStatus | None\n    new_docs_indexed: int | None\n    total_docs_indexed: int | None\n    docs_removed_from_index: int | None\n    error_msg: str | None\n    time_started: datetime | None\n    time_updated: datetime | None\n\n    @classmethod\n    def from_index_attempt_snapshot(\n        cls, index_attempt: IndexAttemptSnapshot\n    ) -> \"DATestIndexAttempt\":\n        return cls(\n            id=index_attempt.id,\n            status=index_attempt.status,\n            new_docs_indexed=index_attempt.new_docs_indexed,\n            total_docs_indexed=index_attempt.total_docs_indexed,\n            docs_removed_from_index=index_attempt.docs_removed_from_index,\n            error_msg=index_attempt.error_msg,\n            time_started=(\n                datetime.fromisoformat(index_attempt.time_started)\n                if index_attempt.time_started\n                else None\n            ),\n            time_updated=datetime.fromisoformat(index_attempt.time_updated),\n        )\n\n\nclass DATestTool(BaseModel):\n    id: int\n    name: str\n    description: str\n    display_name: str\n    in_code_tool_id: str | None\n\n\n# Discord Bot Models\nclass DATestDiscordGuildConfig(BaseModel):\n    \"\"\"Discord guild config model for testing.\"\"\"\n\n    id: int\n    registration_key: str | None = None  # Only present on creation\n    guild_id: int | None = None\n    guild_name: str | None = None\n    enabled: bool = True\n    default_persona_id: int | None = None\n\n\nclass DATestDiscordChannelConfig(BaseModel):\n    \"\"\"Discord channel config model for testing.\"\"\"\n\n    id: int\n    guild_config_id: int\n    channel_id: int\n    channel_name: str\n    channel_type: str\n    is_private: bool\n    enabled: bool = False\n    thread_only_mode: bool = False\n    require_bot_invocation: bool = True\n    persona_override_id: int | None = None\n"
  },
  {
    "path": "backend/tests/integration/common_utils/timeout.py",
    "content": "# import multiprocessing\n# from collections.abc import Callable\n# from typing import Any\n# from typing import TypeVar\n\n# T = TypeVar(\"T\")\n\n\n# def run_with_timeout_multiproc(\n#     task: Callable[..., T], timeout: int, kwargs: dict[str, Any]\n# ) -> T:\n#     # Use multiprocessing to prevent a thread from blocking the main thread\n#     with multiprocessing.Pool(processes=1) as pool:\n#         async_result = pool.apply_async(task, kwds=kwargs)\n#         try:\n#             # Wait at most timeout seconds for the function to complete\n#             result = async_result.get(timeout=timeout)\n#             return result\n#         except multiprocessing.TimeoutError:\n#             raise TimeoutError(f\"Function timed out after {timeout} seconds\")\n\n\nimport multiprocessing\nimport traceback\nfrom collections.abc import Callable\nfrom multiprocessing import Queue\nfrom typing import Any\nfrom typing import TypeVar\n\nT = TypeVar(\"T\")\n\n\ndef _multiproc_wrapper(\n    task: Callable[..., T], kwargs: dict[str, Any], q: Queue\n) -> None:\n    try:\n        result = task(**kwargs)\n        q.put((\"success\", result))\n    except Exception:\n        q.put((\"error\", traceback.format_exc()))\n\n\ndef run_with_timeout_multiproc(\n    task: Callable[..., T], timeout: int, kwargs: dict[str, Any]\n) -> T:\n    ctx = multiprocessing.get_context(\"spawn\")\n    q: Queue = ctx.Queue()\n    p = ctx.Process(\n        target=_multiproc_wrapper,\n        args=(\n            task,\n            kwargs,\n            q,\n        ),\n    )\n    p.start()\n    p.join(timeout)\n\n    if p.is_alive():\n        p.terminate()\n        raise TimeoutError(f\"{task.__name__} timed out after {timeout} seconds\")\n\n    if not q.empty():\n        status, result = q.get()\n        if status == \"success\":\n            return result\n        else:\n            raise RuntimeError(f\"{task.__name__} failed:\\n{result}\")\n    else:\n        raise RuntimeError(f\"{task.__name__} returned no result\")\n"
  },
  {
    "path": "backend/tests/integration/common_utils/vespa.py",
    "content": "import requests\n\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\n\n\nclass vespa_fixture:\n    def __init__(self, index_name: str):\n        self.index_name = index_name\n        self.vespa_document_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)\n\n    def get_documents_by_id(\n        self, document_ids: list[str], wanted_doc_count: int = 1_000\n    ) -> dict:\n        selection = \" or \".join(\n            f\"{self.index_name}.document_id=='{document_id}'\"\n            for document_id in document_ids\n        )\n        params = {\n            \"selection\": selection,\n            \"wantedDocumentCount\": wanted_doc_count,\n        }\n        response = requests.get(\n            self.vespa_document_url,\n            params=params,  # type: ignore\n        )\n        response.raise_for_status()\n        return response.json()\n"
  },
  {
    "path": "backend/tests/integration/conftest.py",
    "content": "import os\nfrom collections.abc import Callable\n\nimport pytest\n\n# Integration tests rely on this mode to enable mock_llm_response paths.\nos.environ[\"INTEGRATION_TESTS_MODE\"] = \"true\"\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.search_settings import get_current_search_settings\nfrom tests.integration.common_utils.constants import ADMIN_USER_NAME\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.image_generation import (\n    ImageGenerationConfigManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import build_email\nfrom tests.integration.common_utils.managers.user import DEFAULT_PASSWORD\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.reset import reset_all_multitenant\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestImageGenerationConfig\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import SimpleTestDocument\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\nBASIC_USER_NAME = \"basic_user\"\n\nDocumentBuilderType = Callable[[list[str]], list[SimpleTestDocument]]\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef initialize_db() -> None:\n    # Make sure that the db engine is initialized before any tests are run\n    SqlEngine.init_engine(\n        pool_size=10,\n        max_overflow=5,\n    )\n\n\ndef load_env_vars(env_file: str = \".env\") -> None:\n    current_dir = os.path.dirname(os.path.abspath(__file__))\n    env_path = os.path.join(current_dir, env_file)\n    try:\n        with open(env_path, \"r\") as f:\n            for line in f:\n                line = line.strip()\n                if line and not line.startswith(\"#\"):\n                    key, value = line.split(\"=\", 1)\n                    # Preserve explicitly pre-set vars (e.g. INTEGRATION_TESTS_MODE).\n                    os.environ.setdefault(key, value.strip())\n        print(\"Successfully loaded environment variables\")\n    except FileNotFoundError:\n        print(f\"File {env_file} not found\")\n\n\n# Load environment variables at the module level\nload_env_vars()\n\n\n\"\"\"NOTE: for some reason using this seems to lead to misc\n`sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly`\nerrors.\n\nCommenting out till we can get to the bottom of it. For now, just using\ninstantiate the session directly within the test.\n\"\"\"\n\n\n@pytest.fixture\ndef vespa_client() -> vespa_fixture:\n    with get_session_with_current_tenant() as db_session:\n        search_settings = get_current_search_settings(db_session)\n        return vespa_fixture(index_name=search_settings.index_name)\n\n\n@pytest.fixture\ndef reset() -> None:\n    reset_all()\n\n\n@pytest.fixture\ndef new_admin_user(reset: None) -> DATestUser:  # noqa: ARG001\n    return UserManager.create(name=ADMIN_USER_NAME)\n\n\n@pytest.fixture\ndef admin_user() -> DATestUser:\n    try:\n        user = UserManager.create(name=ADMIN_USER_NAME)\n\n        # if there are other users for some reason, reset and try again\n        if not UserManager.is_role(user, UserRole.ADMIN):\n            print(\"Trying to reset\")\n            reset_all()\n            user = UserManager.create(name=ADMIN_USER_NAME)\n        return user\n    except Exception as e:\n        print(f\"Failed to create admin user: {e}\")\n\n    try:\n        user = UserManager.login_as_user(\n            DATestUser(\n                id=\"\",\n                email=build_email(\"admin_user\"),\n                password=DEFAULT_PASSWORD,\n                headers=GENERAL_HEADERS,\n                role=UserRole.ADMIN,\n                is_active=True,\n            )\n        )\n        if not UserManager.is_role(user, UserRole.ADMIN):\n            reset_all()\n            user = UserManager.create(name=ADMIN_USER_NAME)\n            return user\n\n        return user\n    except Exception as e:\n        print(f\"Failed to create or login as admin user: {e}\")\n\n    raise RuntimeError(\"Failed to create or login as admin user\")\n\n\n@pytest.fixture\ndef basic_user(\n    # make sure the admin user exists first to ensure this new user\n    # gets the BASIC role\n    admin_user: DATestUser,  # noqa: ARG001\n) -> DATestUser:\n    try:\n        user = UserManager.create(name=BASIC_USER_NAME)\n\n        # Validate that the user has the BASIC role\n        if user.role != UserRole.BASIC:\n            raise RuntimeError(\n                f\"Created user {BASIC_USER_NAME} does not have BASIC role\"\n            )\n\n        return user\n    except Exception as e:\n        print(f\"Failed to create basic user, trying to login as existing user: {e}\")\n\n        # Try to login as existing basic user\n        user = UserManager.login_as_user(\n            DATestUser(\n                id=\"\",\n                email=build_email(BASIC_USER_NAME),\n                password=DEFAULT_PASSWORD,\n                headers=GENERAL_HEADERS,\n                role=UserRole.BASIC,\n                is_active=True,\n            )\n        )\n\n        # Validate that the logged-in user has the BASIC role\n        if not UserManager.is_role(user, UserRole.BASIC):\n            raise RuntimeError(f\"User {BASIC_USER_NAME} does not have BASIC role\")\n\n        return user\n\n\n@pytest.fixture(scope=\"session\")\ndef reset_multitenant() -> None:\n    \"\"\"Initialize multi-tenant state once per test session.\n\n    Intentionally avoid per-test resets to speed up the multitenant suite.\n    The underlying reset function honors SKIP_RESET to allow CI to disable\n    heavy resets entirely.\n    \"\"\"\n    reset_all_multitenant()\n\n\n@pytest.fixture\ndef llm_provider(admin_user: DATestUser) -> DATestLLMProvider:\n    return LLMProviderManager.create(user_performing_action=admin_user)\n\n\n@pytest.fixture\ndef image_generation_config(\n    admin_user: DATestUser,\n) -> DATestImageGenerationConfig:\n    \"\"\"Create a default image generation config for tests.\"\"\"\n    return ImageGenerationConfigManager.create(\n        user_performing_action=admin_user,\n        is_default=True,\n    )\n\n\n@pytest.fixture\ndef document_builder(admin_user: DATestUser) -> DocumentBuilderType:\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connector\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    def _document_builder(contents: list[str]) -> list[SimpleTestDocument]:\n        # seed documents\n        docs: list[SimpleTestDocument] = [\n            DocumentManager.seed_doc_with_content(\n                cc_pair=cc_pair_1,\n                content=content,\n                api_key=api_key,\n            )\n            for content in contents\n        ]\n\n        return docs\n\n    return _document_builder\n\n\ndef pytest_runtest_logstart(\n    nodeid: str,\n    location: tuple[str, int | None, str],  # noqa: ARG001\n) -> None:\n    print(f\"\\nTest start: {nodeid}\")\n\n\ndef pytest_runtest_logfinish(\n    nodeid: str,\n    location: tuple[str, int | None, str],  # noqa: ARG001\n) -> None:\n    print(f\"\\nTest end: {nodeid}\")\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/github/conftest.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nGitHubTestEnvSetupTuple = tuple[\n    DATestUser,  # admin_user\n    DATestUser,  # test_user_1\n    DATestUser,  # test_user_2\n    DATestCredential,  # github_credential\n    DATestConnector,  # github_connector\n    DATestCCPair,  # github_cc_pair\n]\n\n\ndef _get_github_test_tokens() -> list[str]:\n    \"\"\"\n    Returns a list of GitHub tokens to run the GitHub connector suite against.\n\n    Minimal setup:\n    - Set ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN (token1)\n    Optional:\n    - Set ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC (token2 / classic)\n\n    If the classic token is provided, the GitHub suite will run twice (once per token).\n    \"\"\"\n    token_1 = os.environ.get(\"ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN\")\n    # Prefer the new \"classic\" name, but keep backward compatibility.\n    token_2 = os.environ.get(\"ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC\")\n\n    tokens: list[str] = []\n    if token_1:\n        tokens.append(token_1)\n    if token_2:\n        tokens.append(token_2)\n    return tokens\n\n\n@pytest.fixture(scope=\"module\", params=_get_github_test_tokens())\ndef github_access_token(request: pytest.FixtureRequest) -> str:\n    tokens = _get_github_test_tokens()\n    if not tokens:\n        pytest.skip(\n            \"Skipping GitHub tests due to missing env vars \"\n            \"ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN and \"\n            \"ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC\"\n        )\n    return request.param\n\n\n@pytest.fixture(scope=\"module\")\ndef github_test_env_setup(\n    github_access_token: str,\n) -> Generator[GitHubTestEnvSetupTuple]:\n    \"\"\"\n    Create a complete GitHub test environment with:\n    - 3 users with email IDs from environment variables\n    - GitHub credentials using ACCESS_TOKEN_GITHUB from environment\n    - GitHub connector configured for testing\n    - Connector-Credential pair linking them together\n\n    Returns:\n        Tuple containing: (admin_user, test_user_1, test_user_2, github_credential, github_connector, github_cc_pair)\n    \"\"\"\n    # Reset all resources before setting up the test environment\n    reset_all()\n\n    # Get user emails from environment (with fallbacks)\n    admin_email = os.environ.get(\"ONYX_GITHUB_ADMIN_EMAIL\", \"admin@example.com\")\n    test_user_1_email = os.environ.get(\n        \"ONYX_GITHUB_TEST_USER_1_EMAIL\", \"subash@onyx.app\"\n    )\n    test_user_2_email = os.environ.get(\n        \"ONYX_GITHUB_TEST_USER_2_EMAIL\", \"msubash203@gmail.com\"\n    )\n\n    if not admin_email or not test_user_1_email or not test_user_2_email:\n        pytest.skip(\n            \"Skipping GitHub test environment setup due to missing environment variables\"\n        )\n\n    # Create users\n    admin_user: DATestUser = UserManager.create(email=admin_email)\n    test_user_1: DATestUser = UserManager.create(email=test_user_1_email)\n    test_user_2: DATestUser = UserManager.create(email=test_user_2_email)\n\n    # Create LLM provider - required for document search to work\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Create GitHub credentials\n    github_credentials = {\n        \"github_access_token\": github_access_token,\n    }\n\n    github_credential: DATestCredential = CredentialManager.create(\n        source=DocumentSource.GITHUB,\n        credential_json=github_credentials,\n        user_performing_action=admin_user,\n    )\n\n    # Create GitHub connector\n    github_connector: DATestConnector = ConnectorManager.create(\n        name=\"GitHub Test Connector\",\n        input_type=InputType.POLL,\n        source=DocumentSource.GITHUB,\n        connector_specific_config={\n            \"repo_owner\": \"permission-sync-test\",\n            \"include_prs\": True,\n            \"repositories\": \"perm-sync-test-minimal\",\n            \"include_issues\": True,\n        },\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n\n    # Create CC pair linking connector and credential\n    github_cc_pair: DATestCCPair = CCPairManager.create(\n        credential_id=github_credential.id,\n        connector_id=github_connector.id,\n        name=\"GitHub Test CC Pair\",\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n\n    # Wait for initial indexing to complete\n    # GitHub API operations can be slow due to rate limiting and network latency\n    # Use a longer timeout for initial indexing to avoid flaky test failures\n    before = datetime.now(tz=timezone.utc)\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=github_cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n        timeout=900,\n    )\n\n    yield admin_user, test_user_1, test_user_2, github_credential, github_connector, github_cc_pair\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/github/test_github_permission_sync.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\nfrom github import Github\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.utils.logger import setup_logger\nfrom tests.integration.common_utils.document_acl import (\n    get_all_connector_documents,\n)\nfrom tests.integration.common_utils.document_acl import (\n    get_user_document_access_via_acl,\n)\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.connector_job_tests.github.conftest import (\n    GitHubTestEnvSetupTuple,\n)\nfrom tests.integration.connector_job_tests.github.utils import GitHubManager\n\nlogger = setup_logger()\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_github_private_repo_permission_sync(\n    github_test_env_setup: GitHubTestEnvSetupTuple,\n) -> None:\n\n    (\n        admin_user,\n        test_user_1,\n        test_user_2,\n        github_credential,\n        github_connector,\n        github_cc_pair,\n    ) = github_test_env_setup\n\n    # Create GitHub client from credential\n    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model\n    # so credential_json is already a plain dict\n    github_access_token = github_credential.credential_json[\"github_access_token\"]\n    github_client = Github(github_access_token)\n    github_manager = GitHubManager(github_client)\n\n    # Get repository configuration from connector\n    repo_owner = github_connector.connector_specific_config[\"repo_owner\"]\n    repo_name = github_connector.connector_specific_config[\"repositories\"]\n\n    success = github_manager.change_repository_visibility(\n        repo_owner=repo_owner, repo_name=repo_name, visibility=\"private\"\n    )\n\n    if not success:\n        pytest.fail(f\"Failed to change repository {repo_owner}/{repo_name} to private\")\n\n    # Add test-team to repository at the start\n    logger.info(f\"Adding test-team to repository {repo_owner}/{repo_name}\")\n    team_added = github_manager.add_team_to_repository(\n        repo_owner=repo_owner,\n        repo_name=repo_name,\n        team_slug=\"test-team\",\n        permission=\"pull\",\n    )\n\n    if not team_added:\n        logger.warning(\n            f\"Failed to add test-team to repository {repo_owner}/{repo_name}\"\n        )\n\n    try:\n        after = datetime.now(timezone.utc)\n        CCPairManager.sync(\n            cc_pair=github_cc_pair,\n            user_performing_action=admin_user,\n        )\n\n        # Use a longer timeout for GitHub permission sync operations\n        # GitHub API operations can be slow, especially with rate limiting\n        # This accounts for document sync, group sync, and vespa sync operations\n        CCPairManager.wait_for_sync(\n            cc_pair=github_cc_pair,\n            user_performing_action=admin_user,\n            after=after,\n            should_wait_for_group_sync=True,\n            timeout=900,\n        )\n\n        # ACL-based verification\n        with get_session_with_current_tenant() as db_session:\n            # Get all documents for this connector\n            all_document_ids = get_all_connector_documents(github_cc_pair, db_session)\n\n            # Test access for both users using ACL verification\n            accessible_docs_user1 = get_user_document_access_via_acl(\n                test_user=test_user_1,\n                document_ids=all_document_ids,\n                db_session=db_session,\n            )\n\n            accessible_docs_user2 = get_user_document_access_via_acl(\n                test_user=test_user_2,\n                document_ids=all_document_ids,\n                db_session=db_session,\n            )\n\n            logger.info(\n                f\"test_user_1 has access to {len(accessible_docs_user1)} documents\"\n            )\n            logger.info(\n                f\"test_user_2 has access to {len(accessible_docs_user2)} documents\"\n            )\n\n            # test_user_1 (part of test-team) should have access\n            # test_user_2 (not part of test-team) should NOT have access\n            assert len(accessible_docs_user1) > 0, (\n                f\"test_user_1 should have access to private repository documents. \"\n                f\"Found {len(accessible_docs_user1)} accessible docs out of \"\n                f\"{len(all_document_ids)} total\"\n            )\n            assert len(accessible_docs_user2) == 0, (\n                f\"test_user_2 should NOT have access to private repository documents. \"\n                f\"Found {len(accessible_docs_user2)} accessible docs out of \"\n                f\"{len(all_document_ids)} total\"\n            )\n\n    finally:\n        # Remove test-team from repository at the end\n        logger.info(f\"Removing test-team from repository {repo_owner}/{repo_name}\")\n        team_removed = github_manager.remove_team_from_repository(\n            repo_owner=repo_owner, repo_name=repo_name, team_slug=\"test-team\"\n        )\n\n        if not team_removed:\n            logger.warning(\n                f\"Failed to remove test-team from repository {repo_owner}/{repo_name}\"\n            )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_github_public_repo_permission_sync(\n    github_test_env_setup: GitHubTestEnvSetupTuple,\n) -> None:\n    \"\"\"\n    Test that when a repository is changed to public, both users can access the documents.\n    \"\"\"\n    (\n        admin_user,\n        test_user_1,\n        test_user_2,\n        github_credential,\n        github_connector,\n        github_cc_pair,\n    ) = github_test_env_setup\n\n    # Create GitHub client from credential\n    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model\n    # so credential_json is already a plain dict\n    github_access_token = github_credential.credential_json[\"github_access_token\"]\n    github_client = Github(github_access_token)\n    github_manager = GitHubManager(github_client)\n\n    # Get repository configuration from connector\n    repo_owner = github_connector.connector_specific_config[\"repo_owner\"]\n    repo_name = github_connector.connector_specific_config[\"repositories\"]\n\n    # Change repository to public\n    logger.info(f\"Changing repository {repo_owner}/{repo_name} to public\")\n    success = github_manager.change_repository_visibility(\n        repo_owner=repo_owner, repo_name=repo_name, visibility=\"public\"\n    )\n\n    if not success:\n        pytest.fail(f\"Failed to change repository {repo_owner}/{repo_name} to public\")\n\n    # Verify repository is now public\n    current_visibility = github_manager.get_repository_visibility(\n        repo_owner=repo_owner, repo_name=repo_name\n    )\n    logger.info(f\"Repository {repo_owner}/{repo_name} visibility: {current_visibility}\")\n    assert (\n        current_visibility == \"public\"\n    ), f\"Repository should be public, but is {current_visibility}\"\n\n    # Trigger sync to update permissions\n    after = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=github_cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    # Wait for sync to complete with group sync\n    # Public repositories should be accessible to all users\n    CCPairManager.wait_for_sync(\n        cc_pair=github_cc_pair,\n        user_performing_action=admin_user,\n        after=after,\n        should_wait_for_group_sync=True,\n        timeout=900,\n    )\n\n    # ACL-based verification\n    with get_session_with_current_tenant() as db_session:\n        # Get all documents for this connector\n        all_document_ids = get_all_connector_documents(github_cc_pair, db_session)\n\n        # Test access for both users using ACL verification\n        accessible_docs_user1 = get_user_document_access_via_acl(\n            test_user=test_user_1,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        accessible_docs_user2 = get_user_document_access_via_acl(\n            test_user=test_user_2,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        logger.info(f\"test_user_1 has access to {len(accessible_docs_user1)} documents\")\n        logger.info(f\"test_user_2 has access to {len(accessible_docs_user2)} documents\")\n\n        # Both users should have access to the public repository documents\n        assert len(accessible_docs_user1) > 0, (\n            f\"test_user_1 should have access to public repository documents. \"\n            f\"Found {len(accessible_docs_user1)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n        assert len(accessible_docs_user2) > 0, (\n            f\"test_user_2 should have access to public repository documents. \"\n            f\"Found {len(accessible_docs_user2)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n\n        # Verify that both users get the same results (since repo is public)\n        assert len(accessible_docs_user1) == len(accessible_docs_user2), (\n            f\"Both users should see the same documents from public repository. \"\n            f\"User1: {len(accessible_docs_user1)}, User2: {len(accessible_docs_user2)}\"\n        )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_github_internal_repo_permission_sync(\n    github_test_env_setup: GitHubTestEnvSetupTuple,\n) -> None:\n    \"\"\"\n    Test that when a repository is changed to internal, test_user_1 has access but test_user_2 doesn't.\n    Internal repositories are accessible only to organization members.\n    \"\"\"\n    (\n        admin_user,\n        test_user_1,\n        test_user_2,\n        github_credential,\n        github_connector,\n        github_cc_pair,\n    ) = github_test_env_setup\n\n    # Create GitHub client from credential\n    # Note: github_credential is a DATestCredential (Pydantic model), not a SQLAlchemy model\n    # so credential_json is already a plain dict\n    github_access_token = github_credential.credential_json[\"github_access_token\"]\n    github_client = Github(github_access_token)\n    github_manager = GitHubManager(github_client)\n\n    # Get repository configuration from connector\n    repo_owner = github_connector.connector_specific_config[\"repo_owner\"]\n    repo_name = github_connector.connector_specific_config[\"repositories\"]\n\n    # Change repository to internal\n    logger.info(f\"Changing repository {repo_owner}/{repo_name} to internal\")\n    success = github_manager.change_repository_visibility(\n        repo_owner=repo_owner, repo_name=repo_name, visibility=\"internal\"\n    )\n\n    if not success:\n        pytest.fail(f\"Failed to change repository {repo_owner}/{repo_name} to internal\")\n\n    # Verify repository is now internal\n    current_visibility = github_manager.get_repository_visibility(\n        repo_owner=repo_owner, repo_name=repo_name\n    )\n    logger.info(f\"Repository {repo_owner}/{repo_name} visibility: {current_visibility}\")\n    assert (\n        current_visibility == \"internal\"\n    ), f\"Repository should be internal, but is {current_visibility}\"\n\n    # Trigger sync to update permissions\n    after = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=github_cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    # Wait for sync to complete with group sync\n    # Internal repositories should be accessible only to organization members\n    CCPairManager.wait_for_sync(\n        cc_pair=github_cc_pair,\n        user_performing_action=admin_user,\n        after=after,\n        should_wait_for_group_sync=True,\n        timeout=900,\n    )\n\n    #  ACL-based verification\n    with get_session_with_current_tenant() as db_session:\n        # Get all documents for this connector\n        all_document_ids = get_all_connector_documents(github_cc_pair, db_session)\n\n        # Test access for both users using ACL verification\n        accessible_docs_user1 = get_user_document_access_via_acl(\n            test_user=test_user_1,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        accessible_docs_user2 = get_user_document_access_via_acl(\n            test_user=test_user_2,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        logger.info(f\"test_user_1 has access to {len(accessible_docs_user1)} documents\")\n        logger.info(f\"test_user_2 has access to {len(accessible_docs_user2)} documents\")\n\n        # For internal repositories:\n        # - test_user_1 should have access (assuming they're part of the organization)\n        # - test_user_2 should NOT have access (assuming they're not part of the organization)\n        assert len(accessible_docs_user1) > 0, (\n            f\"test_user_1 should have access to internal repository documents (organization member). \"\n            f\"Found {len(accessible_docs_user1)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n        assert len(accessible_docs_user2) == 0, (\n            f\"test_user_2 should NOT have access to internal repository documents (not organization member). \"\n            f\"Found {len(accessible_docs_user2)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/github/utils.py",
    "content": "from typing import Optional\n\nfrom github import Github\nfrom github.GithubException import GithubException\n\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\nclass GitHubManager:\n    \"\"\"\n    Manager class for GitHub operations used in testing.\n    Provides methods to change repository visibility, check repository visibility, and manage teams.\n    \"\"\"\n\n    def __init__(self, github_client: Github):\n        \"\"\"\n        Initialize the GitHub manager with a GitHub client.\n\n        Args:\n            github_client: Authenticated GitHub client instance\n        \"\"\"\n        self.github_client = github_client\n\n    def change_repository_visibility(\n        self, repo_owner: str, repo_name: str, visibility: str\n    ) -> bool:\n        \"\"\"\n        Change the visibility of a repository.\n\n        Args:\n            repo_owner: Repository owner (organization or username)\n            repo_name: Repository name\n            visibility: New visibility ('public', 'private', or 'internal')\n\n        Returns:\n            bool: True if successful, False otherwise\n\n        Raises:\n            ValueError: If visibility is not valid\n            GithubException: If GitHub API call fails\n        \"\"\"\n        if visibility not in [\"public\", \"private\", \"internal\"]:\n            raise ValueError(\n                f\"Invalid visibility: {visibility}. Must be 'public', 'private', or 'internal'\"\n            )\n\n        try:\n            repo = self.github_client.get_repo(f\"{repo_owner}/{repo_name}\")\n\n            # Check if we have admin permissions\n            if not repo.permissions.admin:\n                logger.error(\n                    f\"No admin permissions for repository {repo_owner}/{repo_name}\"\n                )\n                return False\n\n            # Note: Internal repositories are only available for GitHub Enterprise\n            try:\n                repo.edit(visibility=visibility)\n            except GithubException as e:\n                logger.warning(f\"Could not set repository to {visibility}: {e}\")\n                return False\n\n            logger.info(\n                f\"Successfully changed {repo_owner}/{repo_name} visibility to {visibility}\"\n            )\n            return True\n\n        except GithubException as e:\n            logger.error(f\"Failed to change repository visibility: {e}\")\n            return False\n\n    def add_team_to_repository(\n        self, repo_owner: str, repo_name: str, team_slug: str, permission: str = \"push\"\n    ) -> bool:\n        \"\"\"\n        Add a team to a repository with specified permissions.\n\n        Args:\n            repo_owner: Repository owner (organization)\n            repo_name: Repository name\n            team_slug: Team slug (not team name)\n            permission: Permission level ('pull', 'push', 'admin', 'maintain', 'triage')\n\n        Returns:\n            bool: True if successful, False otherwise\n\n        Raises:\n            GithubException: If GitHub API call fails\n        \"\"\"\n        valid_permissions = [\"pull\", \"push\", \"admin\", \"maintain\", \"triage\"]\n        if permission not in valid_permissions:\n            raise ValueError(\n                f\"Invalid permission: {permission}. Must be one of {valid_permissions}\"\n            )\n\n        try:\n            repo = self.github_client.get_repo(f\"{repo_owner}/{repo_name}\")\n            org = self.github_client.get_organization(repo_owner)\n            team = org.get_team_by_slug(team_slug)\n\n            # Add team to repository\n            team.add_to_repos(repo)\n\n            # Set team permissions on the repository\n            team.set_repo_permission(repo, permission)\n\n            logger.info(\n                f\"Successfully added team {team_slug} to {repo_owner}/{repo_name} with {permission} permissions\"\n            )\n            return True\n\n        except GithubException as e:\n            logger.error(f\"Failed to add team to repository: {e}\")\n            return False\n\n    def remove_team_from_repository(\n        self, repo_owner: str, repo_name: str, team_slug: str\n    ) -> bool:\n        \"\"\"\n        Remove a team from a repository.\n\n        Args:\n            repo_owner: Repository owner (organization)\n            repo_name: Repository name\n            team_slug: Team slug (not team name)\n\n        Returns:\n            bool: True if successful, False otherwise\n\n        Raises:\n            GithubException: If GitHub API call fails\n        \"\"\"\n        try:\n            repo = self.github_client.get_repo(f\"{repo_owner}/{repo_name}\")\n            org = self.github_client.get_organization(repo_owner)\n            team = org.get_team_by_slug(team_slug)\n\n            # Remove team from repository\n            team.remove_from_repos(repo)\n\n            logger.info(\n                f\"Successfully removed team {team_slug} from {repo_owner}/{repo_name}\"\n            )\n            return True\n\n        except GithubException as e:\n            logger.error(f\"Failed to remove team from repository: {e}\")\n            return False\n\n    def get_repository_visibility(\n        self, repo_owner: str, repo_name: str\n    ) -> Optional[str]:\n        \"\"\"\n        Get the current visibility of a repository.\n\n        Args:\n            repo_owner: Repository owner\n            repo_name: Repository name\n\n        Returns:\n            Optional[str]: Repository visibility ('public', 'private', 'internal') or None if failed\n        \"\"\"\n        try:\n            repo = self.github_client.get_repo(f\"{repo_owner}/{repo_name}\")\n\n            if hasattr(repo, \"visibility\"):\n                return repo.visibility\n            else:\n                # Fallback for older GitHub API versions\n                return \"private\" if repo.private else \"public\"\n\n        except GithubException as e:\n            logger.error(f\"Failed to get repository visibility: {e}\")\n            return None\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/google/google_drive_api_utils.py",
    "content": "from typing import Any\nfrom uuid import uuid4\n\nfrom google.oauth2.service_account import Credentials\n\nfrom onyx.connectors.google_utils.resources import get_drive_service\nfrom onyx.connectors.google_utils.resources import get_google_docs_service\nfrom onyx.connectors.google_utils.resources import GoogleDocsService\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\n\n\nGOOGLE_SCOPES = {\n    \"google_drive\": [\n        \"https://www.googleapis.com/auth/drive\",\n        \"https://www.googleapis.com/auth/admin.directory.group\",\n        \"https://www.googleapis.com/auth/admin.directory.user\",\n    ],\n}\n\n\ndef _create_doc_service(drive_service: GoogleDriveService) -> GoogleDocsService:\n    docs_service = get_google_docs_service(\n        creds=drive_service._http.credentials,\n        user_email=drive_service._http.credentials._subject,\n    )\n    return docs_service\n\n\nclass GoogleDriveManager:\n    @staticmethod\n    def create_impersonated_drive_service(\n        service_account_key: dict, impersonated_user_email: str\n    ) -> GoogleDriveService:\n        \"\"\"Gets a drive service that impersonates a specific user\"\"\"\n        credentials = Credentials.from_service_account_info(\n            service_account_key,\n            scopes=GOOGLE_SCOPES[\"google_drive\"],\n            subject=impersonated_user_email,\n        )\n\n        service = get_drive_service(credentials, impersonated_user_email)\n\n        # Verify impersonation\n        about = service.about().get(fields=\"user\").execute()\n        if about.get(\"user\", {}).get(\"emailAddress\") != impersonated_user_email:\n            raise ValueError(\n                f\"Failed to impersonate {impersonated_user_email}. Instead got {about.get('user', {}).get('emailAddress')}\"\n            )\n        return service\n\n    @staticmethod\n    def create_shared_drive(\n        drive_service: GoogleDriveService, admin_email: str, test_id: str\n    ) -> str:\n        \"\"\"\n        Creates a shared drive and returns the drive's ID\n        \"\"\"\n        try:\n            about = drive_service.about().get(fields=\"user\").execute()\n            creating_user = about[\"user\"][\"emailAddress\"]\n\n            # Verify we're still impersonating the admin\n            if creating_user != admin_email:\n                raise ValueError(\n                    f\"Expected to create drive as {admin_email}, but instead created drive as {creating_user}\"\n                )\n\n            drive_metadata = {\"name\": f\"perm_sync_drive_{test_id}\"}\n\n            request_id = str(uuid4())\n            drive = (\n                drive_service.drives()\n                .create(\n                    body=drive_metadata,\n                    requestId=request_id,\n                    fields=\"id,name,capabilities\",\n                )\n                .execute()\n            )\n\n            return drive[\"id\"]\n        except Exception as e:\n            print(f\"Error creating shared drive: {str(e)}\")\n            raise\n\n    @staticmethod\n    def create_empty_doc(\n        drive_service: Any,\n        drive_id: str,\n    ) -> str:\n        \"\"\"\n        Creates an empty document in the given drive and returns the document's ID\n        \"\"\"\n        file_metadata = {\n            \"name\": f\"perm_sync_doc_{drive_id}_{str(uuid4())}\",\n            \"mimeType\": \"application/vnd.google-apps.document\",\n            \"parents\": [drive_id],\n        }\n        file = (\n            drive_service.files()\n            .create(body=file_metadata, supportsAllDrives=True)\n            .execute()\n        )\n\n        return file[\"id\"]\n\n    @staticmethod\n    def append_text_to_doc(\n        drive_service: GoogleDriveService, doc_id: str, text: str\n    ) -> None:\n        docs_service = _create_doc_service(drive_service)\n\n        docs_service.documents().batchUpdate(\n            documentId=doc_id,\n            body={\n                \"requests\": [{\"insertText\": {\"location\": {\"index\": 1}, \"text\": text}}]\n            },\n        ).execute()\n\n    @staticmethod\n    def update_file_permissions(\n        drive_service: Any, file_id: str, email: str, role: str = \"reader\"\n    ) -> None:\n        permission = {\"type\": \"user\", \"role\": role, \"emailAddress\": email}\n        drive_service.permissions().create(\n            fileId=file_id,\n            body=permission,\n            supportsAllDrives=True,\n            sendNotificationEmail=False,\n        ).execute()\n\n    @staticmethod\n    def remove_file_permissions(\n        drive_service: Any,\n        file_id: str,\n        email: str,  # noqa: ARG004\n    ) -> None:\n        permissions = (\n            drive_service.permissions()\n            .list(fileId=file_id, supportsAllDrives=True)\n            .execute()\n        )\n        # TODO: This is a hacky way to remove permissions. Removes anyone with reader role.\n        # Need to find a way to map a user's email to a permission id.\n        # The permissions.get returns a permissionID but email field is None,\n        # something to do with it being a group or domain wide delegation.\n        for permission in permissions.get(\"permissions\", []):\n            if permission.get(\"role\") == \"reader\":\n                drive_service.permissions().delete(\n                    fileId=file_id,\n                    permissionId=permission[\"id\"],\n                    supportsAllDrives=True,\n                ).execute()\n                break\n\n    @staticmethod\n    def make_file_public(drive_service: Any, file_id: str) -> None:\n        permission = {\"type\": \"anyone\", \"role\": \"reader\"}\n        drive_service.permissions().create(\n            fileId=file_id, body=permission, supportsAllDrives=True\n        ).execute()\n\n    @staticmethod\n    def cleanup_drive(drive_service: Any, drive_id: str) -> None:\n        try:\n            # Delete up to 2 files that match our pattern\n            file_name_prefix = f\"perm_sync_doc_{drive_id}\"\n            files = (\n                drive_service.files()\n                .list(\n                    q=f\"name contains '{file_name_prefix}'\",\n                    driveId=drive_id,\n                    includeItemsFromAllDrives=True,\n                    supportsAllDrives=True,\n                    corpora=\"drive\",\n                    fields=\"files(id)\",\n                )\n                .execute()\n            )\n\n            for file in files.get(\"files\", []):\n                drive_service.files().delete(\n                    fileId=file[\"id\"], supportsAllDrives=True\n                ).execute()\n\n            # Then delete the drive\n            drive_service.drives().delete(driveId=drive_id).execute()\n        except Exception as e:\n            print(f\"Error cleaning up drive {drive_id}: {e}\")\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/google/test_google_drive_permission_sync.py",
    "content": "import json\nimport os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.google_utils.resources import GoogleDriveService\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,\n)\nfrom onyx.connectors.google_utils.shared_constants import (\n    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,\n)\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.document_search import (\n    DocumentSearchManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\nfrom tests.integration.connector_job_tests.google.google_drive_api_utils import (\n    GoogleDriveManager,\n)\n\n\n@pytest.fixture()\ndef google_drive_test_env_setup() -> Generator[\n    tuple[GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser],\n    None,\n    None,\n]:\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n    # Creating a non-admin user\n    test_user_1: DATestUser = UserManager.create(email=\"test_user_1@example.com\")\n    # Creating a non-admin user\n    test_user_2: DATestUser = UserManager.create(email=\"test_user_2@example.com\")\n\n    service_account_key = os.environ[\"FULL_CONTROL_DRIVE_SERVICE_ACCOUNT\"]\n    drive_id: str | None = None\n    drive_service: GoogleDriveService | None = None\n\n    try:\n        credentials = {\n            DB_CREDENTIALS_PRIMARY_ADMIN_KEY: admin_user.email,\n            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key,\n        }\n\n        # Setup Google Drive\n        drive_service = GoogleDriveManager.create_impersonated_drive_service(\n            json.loads(service_account_key), admin_user.email\n        )\n        test_id = str(uuid4())\n        drive_id = GoogleDriveManager.create_shared_drive(\n            drive_service, admin_user.email, test_id\n        )\n\n        # Setup Onyx infrastructure\n        LLMProviderManager.create(user_performing_action=admin_user)\n\n        before = datetime.now(timezone.utc)\n        credential: DATestCredential = CredentialManager.create(\n            source=DocumentSource.GOOGLE_DRIVE,\n            credential_json=credentials,\n            user_performing_action=admin_user,\n        )\n        connector: DATestConnector = ConnectorManager.create(\n            name=\"Google Drive Test\",\n            input_type=InputType.POLL,\n            source=DocumentSource.GOOGLE_DRIVE,\n            connector_specific_config={\n                \"shared_drive_urls\": f\"https://drive.google.com/drive/folders/{drive_id}\"\n            },\n            access_type=AccessType.SYNC,\n            user_performing_action=admin_user,\n        )\n        cc_pair: DATestCCPair = CCPairManager.create(\n            credential_id=credential.id,\n            connector_id=connector.id,\n            access_type=AccessType.SYNC,\n            user_performing_action=admin_user,\n        )\n        CCPairManager.wait_for_indexing_completion(\n            cc_pair=cc_pair, after=before, user_performing_action=admin_user\n        )\n\n        yield drive_service, drive_id, cc_pair, admin_user, test_user_1, test_user_2\n\n    except json.JSONDecodeError:\n        pytest.skip(\"FULL_CONTROL_DRIVE_SERVICE_ACCOUNT is not valid JSON\")\n    finally:\n        # Cleanup drive and file\n        if drive_id is not None:\n            GoogleDriveManager.cleanup_drive(drive_service, drive_id)\n\n\n@pytest.mark.xfail(reason=\"Needs to be tested for flakiness\")\ndef test_google_permission_sync(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    google_drive_test_env_setup: tuple[\n        GoogleDriveService, str, DATestCCPair, DATestUser, DATestUser, DATestUser\n    ],\n) -> None:\n    (\n        drive_service,\n        drive_id,\n        cc_pair,\n        admin_user,\n        test_user_1,\n        test_user_2,\n    ) = google_drive_test_env_setup\n\n    # ----------------------BASELINE TEST----------------------\n    before = datetime.now(timezone.utc)\n\n    # Create empty test doc in drive\n    doc_id_1 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)\n\n    # Append text to doc\n    doc_text_1 = \"The secret number is 12345\"\n    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_1, doc_text_1)\n\n    # run indexing\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair, after=before, user_performing_action=admin_user\n    )\n\n    # run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n    )\n\n    # Verify admin has access to document\n    admin_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=admin_user\n    )\n    assert doc_text_1 in [result.strip(\"\\ufeff\") for result in admin_results]\n\n    # Verify test_user_1 cannot access document\n    user1_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=test_user_1\n    )\n    assert doc_text_1 not in [result.strip(\"\\ufeff\") for result in user1_results]\n\n    # ----------------------GRANT USER 1 DOC PERMISSIONS TEST--------------------------\n    before = datetime.now(timezone.utc)\n\n    # Grant user 1 access to document 1\n    GoogleDriveManager.update_file_permissions(\n        drive_service=drive_service,\n        file_id=doc_id_1,\n        email=test_user_1.email,\n        role=\"reader\",\n    )\n\n    # Create a second doc in the drive which user 1 should not have access to\n    doc_id_2 = GoogleDriveManager.create_empty_doc(drive_service, drive_id)\n    doc_text_2 = \"The secret number is 67890\"\n    GoogleDriveManager.append_text_to_doc(drive_service, doc_id_2, doc_text_2)\n\n    # Run indexing\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # Run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n    )\n\n    # Verify admin can access both documents\n    admin_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=admin_user\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in admin_results\n    }\n\n    # Verify user 1 can access document 1\n    user1_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=test_user_1\n    )\n    assert doc_text_1 in [result.strip(\"\\ufeff\") for result in user1_results]\n\n    # Verify user 1 cannot access document 2\n    user1_results_2 = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=test_user_1\n    )\n    assert doc_text_2 not in [result.strip(\"\\ufeff\") for result in user1_results_2]\n\n    # ----------------------REMOVE USER 1 DOC PERMISSIONS TEST--------------------------\n    before = datetime.now(timezone.utc)\n\n    # Remove user 1 access to document 1\n    GoogleDriveManager.remove_file_permissions(\n        drive_service=drive_service, file_id=doc_id_1, email=test_user_1.email\n    )\n    # Run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n    )\n\n    # Verify admin can access both documents\n    admin_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=admin_user\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in admin_results\n    }\n\n    # Verify user 1 cannot access either document\n    user1_results = DocumentSearchManager.search_documents(\n        query=\"secret numbers\", user_performing_action=test_user_1\n    )\n    assert {result.strip(\"\\ufeff\") for result in user1_results} == set()\n\n    # ----------------------GRANT USER 1 DRIVE PERMISSIONS TEST--------------------------\n    before = datetime.now(timezone.utc)\n\n    # Grant user 1 access to drive\n    GoogleDriveManager.update_file_permissions(\n        drive_service=drive_service,\n        file_id=drive_id,\n        email=test_user_1.email,\n        role=\"reader\",\n    )\n\n    # Run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=2,\n        user_performing_action=admin_user,\n        # if we are only updating the group definition for this test we use this varaiable,\n        # since it doesn't result in a vespa sync so we don't want to wait for it\n        should_wait_for_vespa_sync=False,\n    )\n\n    # Verify user 1 can access both documents\n    user1_results = DocumentSearchManager.search_documents(\n        query=\"secret numbers\", user_performing_action=test_user_1\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in user1_results\n    }\n\n    # ----------------------MAKE DRIVE PUBLIC TEST--------------------------\n    before = datetime.now(timezone.utc)\n\n    # Unable to make drive itself public as Google's security policies prevent this, so we make the documents public instead\n    GoogleDriveManager.make_file_public(drive_service, doc_id_1)\n    GoogleDriveManager.make_file_public(drive_service, doc_id_2)\n\n    # Run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=2,\n        user_performing_action=admin_user,\n    )\n\n    # Verify all users can access both documents\n    admin_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=admin_user\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in admin_results\n    }\n\n    user1_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=test_user_1\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in user1_results\n    }\n\n    user2_results = DocumentSearchManager.search_documents(\n        query=\"secret number\", user_performing_action=test_user_2\n    )\n    assert {doc_text_1, doc_text_2} == {\n        result.strip(\"\\ufeff\") for result in user2_results\n    }\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/jira/conftest.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import AccessType\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nJiraTestEnvSetupTuple = tuple[\n    DATestUser,\n    DATestCredential,\n    DATestConnector,\n    DATestCCPair,\n]\n\n\n@pytest.fixture()\ndef jira_test_env_setup() -> Generator[JiraTestEnvSetupTuple]:\n    jira_base_url = os.environ[\"JIRA_BASE_URL\"]\n    jira_user_email = os.environ[\"JIRA_USER_EMAIL\"]\n    jira_api_token = os.environ[\"JIRA_API_TOKEN\"]\n\n    credentials = {\n        \"jira_user_email\": jira_user_email,\n        \"jira_api_token\": jira_api_token,\n    }\n\n    admin_user: DATestUser = UserManager.create(email=jira_user_email)\n    credential: DATestCredential = CredentialManager.create(\n        source=DocumentSource.JIRA,\n        credential_json=credentials,\n        user_performing_action=admin_user,\n    )\n    connector: DATestConnector = ConnectorManager.create(\n        name=\"Jira Test\",\n        input_type=InputType.POLL,\n        source=DocumentSource.JIRA,\n        connector_specific_config={\n            \"jira_base_url\": jira_base_url,\n        },\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n    cc_pair: DATestCCPair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n    before = datetime.now(tz=timezone.utc)\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair, after=before, user_performing_action=admin_user\n    )\n\n    yield admin_user, credential, connector, cc_pair\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/jira/test_jira_permission_sync_full.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.connector_job_tests.jira.conftest import JiraTestEnvSetupTuple\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Jira permission sync is enterprise only\",\n)\n@pytest.mark.xfail(reason=\"Needs to be tested for flakiness\")\ndef test_jira_permission_sync_full(\n    reset: None,  # noqa: ARG001\n    jira_test_env_setup: JiraTestEnvSetupTuple,\n) -> None:\n    (\n        admin_user,\n        credential,\n        connector,\n        cc_pair,\n    ) = jira_test_env_setup\n\n    before = datetime.now(tz=timezone.utc)\n\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n        timeout=float(\"inf\"),\n    )\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/sharepoint/conftest.py",
    "content": "import os\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.sharepoint.connector import SharepointAuthMethod\nfrom onyx.db.enums import AccessType\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\n\nSharepointTestEnvSetupTuple = tuple[\n    DATestUser,  # admin_user\n    DATestUser,  # regular_user_1\n    DATestUser,  # regular_user_2\n    DATestCredential,\n    DATestConnector,\n    DATestCCPair,\n]\n\n\n@pytest.fixture(scope=\"module\")\ndef sharepoint_test_env_setup() -> Generator[SharepointTestEnvSetupTuple]:\n    # Reset all data before running the test\n    reset_all()\n    # Required environment variables for SharePoint certificate authentication\n    sp_client_id = os.environ.get(\"PERM_SYNC_SHAREPOINT_CLIENT_ID\")\n    sp_private_key = os.environ.get(\"PERM_SYNC_SHAREPOINT_PRIVATE_KEY\")\n    sp_certificate_password = os.environ.get(\n        \"PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD\"\n    )\n    sp_directory_id = os.environ.get(\"PERM_SYNC_SHAREPOINT_DIRECTORY_ID\")\n    sharepoint_sites = \"https://danswerai.sharepoint.com/sites/Permisisonsync\"\n    admin_email = \"admin@onyx.app\"\n    user1_email = \"subash@onyx.app\"\n    user2_email = \"raunak@onyx.app\"\n\n    if not sp_private_key or not sp_certificate_password or not sp_directory_id:\n        pytest.skip(\"Skipping test because required environment variables are not set\")\n\n    # Certificate-based credentials\n    credentials = {\n        \"authentication_method\": SharepointAuthMethod.CERTIFICATE.value,\n        \"sp_client_id\": sp_client_id,\n        \"sp_private_key\": sp_private_key,\n        \"sp_certificate_password\": sp_certificate_password,\n        \"sp_directory_id\": sp_directory_id,\n    }\n\n    # Create users\n    admin_user: DATestUser = UserManager.create(email=admin_email)\n    regular_user_1: DATestUser = UserManager.create(email=user1_email)\n    regular_user_2: DATestUser = UserManager.create(email=user2_email)\n\n    # Create LLM provider for search functionality\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Create credential\n    credential: DATestCredential = CredentialManager.create(\n        source=DocumentSource.SHAREPOINT,\n        credential_json=credentials,\n        user_performing_action=admin_user,\n    )\n\n    # Create connector with SharePoint-specific configuration\n    connector: DATestConnector = ConnectorManager.create(\n        name=\"SharePoint Test\",\n        input_type=InputType.POLL,\n        source=DocumentSource.SHAREPOINT,\n        connector_specific_config={\n            \"sites\": sharepoint_sites.split(\",\"),\n            \"treat_sharing_link_as_public\": True,\n        },\n        access_type=AccessType.SYNC,  # Enable permission sync\n        user_performing_action=admin_user,\n    )\n\n    # Create CC pair with permission sync enabled\n    cc_pair: DATestCCPair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.SYNC,  # Enable permission sync\n        user_performing_action=admin_user,\n    )\n\n    # Wait for both indexing and permission sync to complete\n    before = datetime.now(tz=timezone.utc)\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n        timeout=float(\"inf\"),\n    )\n\n    # Wait for permission sync completion specifically\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n        timeout=float(\"inf\"),\n    )\n\n    yield admin_user, regular_user_1, regular_user_2, credential, connector, cc_pair\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/sharepoint/test_sharepoint_permissions.py",
    "content": "import os\n\nimport pytest\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.utils.logger import setup_logger\nfrom tests.integration.common_utils.document_acl import (\n    get_all_connector_documents,\n)\nfrom tests.integration.common_utils.document_acl import (\n    get_documents_by_permission_type,\n)\nfrom tests.integration.common_utils.document_acl import (\n    get_user_document_access_via_acl,\n)\nfrom tests.integration.connector_job_tests.sharepoint.conftest import (\n    SharepointTestEnvSetupTuple,\n)\n\nlogger = setup_logger()\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_public_documents_accessible_by_all_users(\n    sharepoint_test_env_setup: SharepointTestEnvSetupTuple,\n) -> None:\n    \"\"\"Test that public documents are accessible by both test users using ACL verification\"\"\"\n    (\n        admin_user,\n        regular_user_1,\n        regular_user_2,\n        credential,\n        connector,\n        cc_pair,\n    ) = sharepoint_test_env_setup\n\n    with get_session_with_current_tenant() as db_session:\n        # Get all documents for this connector\n        all_document_ids = get_all_connector_documents(cc_pair, db_session)\n\n        # Test that regular_user_1 can access documents\n        accessible_docs_user1 = get_user_document_access_via_acl(\n            test_user=regular_user_1,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        # Test that regular_user_2 can access documents\n        accessible_docs_user2 = get_user_document_access_via_acl(\n            test_user=regular_user_2,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        logger.info(f\"User 1 has access to {len(accessible_docs_user1)} documents\")\n        logger.info(f\"User 2 has access to {len(accessible_docs_user2)} documents\")\n\n        # For public documents, both users should have access to at least some docs\n        assert len(accessible_docs_user1) == 8, (\n            f\"User 1 should have access to documents. Found \"\n            f\"{len(accessible_docs_user1)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n        assert len(accessible_docs_user2) == 1, (\n            f\"User 2 should have access to documents. Found \"\n            f\"{len(accessible_docs_user2)} accessible docs out of \"\n            f\"{len(all_document_ids)} total\"\n        )\n\n        logger.info(\n            \"Successfully verified public documents are accessible by users via ACL\"\n        )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_group_based_permissions(\n    sharepoint_test_env_setup: SharepointTestEnvSetupTuple,\n) -> None:\n    \"\"\"Test that documents with group permissions are accessible only by users in that group using ACL verification\"\"\"\n    (\n        admin_user,\n        regular_user_1,\n        regular_user_2,\n        credential,\n        connector,\n        cc_pair,\n    ) = sharepoint_test_env_setup\n\n    with get_session_with_current_tenant() as db_session:\n        # Get all documents for this connector\n        all_document_ids = get_all_connector_documents(cc_pair, db_session)\n\n        if not all_document_ids:\n            pytest.skip(\"No documents found for connector - skipping test\")\n\n        # Test access for both users\n        accessible_docs_user1 = get_user_document_access_via_acl(\n            test_user=regular_user_1,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        accessible_docs_user2 = get_user_document_access_via_acl(\n            test_user=regular_user_2,\n            document_ids=all_document_ids,\n            db_session=db_session,\n        )\n\n        logger.info(f\"User 1 has access to {len(accessible_docs_user1)} documents\")\n        logger.info(f\"User 2 has access to {len(accessible_docs_user2)} documents\")\n\n        public_docs = get_documents_by_permission_type(all_document_ids, db_session)\n\n        # Check if user 2 has access to any non-public documents\n        non_public_access_user2 = [\n            doc for doc in accessible_docs_user2 if doc not in public_docs\n        ]\n\n        assert (\n            len(non_public_access_user2) == 0\n        ), f\"User 2 should only have access to public documents. Found access to non-public docs: {non_public_access_user2}\"\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/slack/conftest.py",
    "content": "import os\nfrom collections.abc import Generator\n\nimport pytest\n\nfrom onyx.connectors.slack.models import ChannelType\nfrom tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager\n\nSLACK_ADMIN_EMAIL = os.environ.get(\"SLACK_ADMIN_EMAIL\", \"evan@onyx.app\")\nSLACK_TEST_USER_1_EMAIL = os.environ.get(\"SLACK_TEST_USER_1_EMAIL\", \"evan+1@onyx.app\")\nSLACK_TEST_USER_2_EMAIL = os.environ.get(\"SLACK_TEST_USER_2_EMAIL\", \"justin@onyx.app\")\n\n\ndef _provision_slack_channels(\n    bot_token: str,\n) -> Generator[tuple[ChannelType, ChannelType], None, None]:\n    slack_client = SlackManager.get_slack_client(bot_token)\n\n    auth_info = slack_client.auth_test()\n    print(f\"\\nSlack workspace: {auth_info.get('team')} ({auth_info.get('url')})\")\n\n    user_map = SlackManager.build_slack_user_email_id_map(slack_client)\n    if SLACK_ADMIN_EMAIL not in user_map:\n        raise KeyError(\n            f\"'{SLACK_ADMIN_EMAIL}' not found in Slack workspace. Available emails: {sorted(user_map.keys())}\"\n        )\n    admin_user_id = user_map[SLACK_ADMIN_EMAIL]\n\n    (\n        public_channel,\n        private_channel,\n        run_id,\n    ) = SlackManager.get_and_provision_available_slack_channels(\n        slack_client=slack_client, admin_user_id=admin_user_id\n    )\n\n    yield public_channel, private_channel\n\n    SlackManager.cleanup_after_test(slack_client=slack_client, test_id=run_id)\n\n\n@pytest.fixture()\ndef slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:\n    yield from _provision_slack_channels(os.environ[\"SLACK_BOT_TOKEN\"])\n\n\n@pytest.fixture()\ndef slack_perm_sync_test_setup() -> (\n    Generator[tuple[ChannelType, ChannelType], None, None]\n):\n    yield from _provision_slack_channels(os.environ[\"SLACK_BOT_TOKEN_TEST_SPACE\"])\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/slack/slack_api_utils.py",
    "content": "\"\"\"\nAssumptions:\n- The test users have already been created\n- General is empty of messages\n- In addition to the normal slack oauth permissions, the following scopes are needed:\n    - channels:manage\n    - groups:write\n    - chat:write\n    - chat:write.public\n\"\"\"\n\nfrom typing import Any\nfrom typing import cast\nfrom uuid import uuid4\n\nfrom slack_sdk import WebClient\nfrom slack_sdk.errors import SlackApiError\n\nfrom onyx.connectors.slack.connector import get_channel_messages\nfrom onyx.connectors.slack.models import ChannelType\nfrom onyx.connectors.slack.utils import make_paginated_slack_api_call\n\n\ndef _get_slack_channel_id(channel: ChannelType) -> str:\n    if not (channel_id := channel.get(\"id\")):\n        raise ValueError(\"Channel ID is missing\")\n    return channel_id\n\n\ndef _get_non_general_channels(\n    slack_client: WebClient,\n    get_private: bool,\n    get_public: bool,\n    only_get_done: bool = False,\n) -> list[ChannelType]:\n    channel_types = []\n    if get_private:\n        channel_types.append(\"private_channel\")\n    if get_public:\n        channel_types.append(\"public_channel\")\n\n    conversations: list[dict[str, Any]] = []\n    for result in make_paginated_slack_api_call(\n        slack_client.conversations_list,\n        exclude_archived=False,\n        types=channel_types,\n    ):\n        conversations.extend(result[\"channels\"])\n\n    filtered_conversations = []\n    for conversation in conversations:\n        if conversation.get(\"is_general\", False):\n            continue\n        if only_get_done and \"done\" not in conversation.get(\"name\", \"\"):\n            continue\n        filtered_conversations.append(conversation)\n    return cast(list[ChannelType], filtered_conversations)\n\n\ndef _clear_slack_conversation_members(\n    slack_client: WebClient,\n    admin_user_id: str,\n    channel: ChannelType,\n) -> None:\n    channel_id = _get_slack_channel_id(channel)\n    member_ids: list[str] = []\n    for result in make_paginated_slack_api_call(\n        slack_client.conversations_members,\n        channel=channel_id,\n    ):\n        member_ids.extend(result[\"members\"])\n\n    for member_id in member_ids:\n        if member_id == admin_user_id:\n            continue\n        try:\n            slack_client.conversations_kick(channel=channel_id, user=member_id)\n            print(f\"Kicked member: {member_id}\")\n        except Exception as e:\n            if \"cant_kick_self\" in str(e):\n                continue\n            print(f\"Error kicking member: {e}\")\n            print(member_id)\n    try:\n        slack_client.conversations_unarchive(channel=channel_id)\n        channel[\"is_archived\"] = False\n    except Exception:\n        # Channel is already unarchived\n        pass\n\n\ndef _add_slack_conversation_members(\n    slack_client: WebClient, channel: ChannelType, member_ids: list[str]\n) -> None:\n    channel_id = _get_slack_channel_id(channel)\n    for user_id in member_ids:\n        try:\n            slack_client.conversations_invite(channel=channel_id, users=user_id)\n        except Exception as e:\n            if \"already_in_channel\" in str(e):\n                continue\n            print(f\"Error inviting member: {e}\")\n            print(user_id)\n\n\ndef _delete_slack_conversation_messages(\n    slack_client: WebClient,\n    channel: ChannelType,\n    message_to_delete: str | None = None,\n) -> None:\n    \"\"\"deletes all messages from a channel if message_to_delete is None\"\"\"\n    channel_id = _get_slack_channel_id(channel)\n    for message_batch in get_channel_messages(slack_client, channel):\n        for message in message_batch:\n            if message_to_delete and message.get(\"text\") != message_to_delete:\n                continue\n            print(\" removing message: \", message.get(\"text\"))\n\n            try:\n                if not (ts := message.get(\"ts\")):\n                    raise ValueError(\"Message timestamp is missing\")\n                slack_client.chat_delete(channel=channel_id, ts=ts)\n            except Exception as e:\n                print(f\"Error deleting message: {e}\")\n                print(message)\n\n\ndef _build_slack_channel_from_name(\n    slack_client: WebClient,\n    admin_user_id: str,\n    suffix: str,\n    is_private: bool,\n    channel: ChannelType | None,\n) -> ChannelType:\n    base = \"public_channel\" if not is_private else \"private_channel\"\n    channel_name = f\"{base}-{suffix}\"\n    if channel:\n        # If channel is provided, we rename it\n        channel_id = _get_slack_channel_id(channel)\n        channel_response = slack_client.conversations_rename(\n            channel=channel_id,\n            name=channel_name,\n        )\n    else:\n        # Otherwise, we create a new channel\n        channel_response = slack_client.conversations_create(\n            name=channel_name,\n            is_private=is_private,\n        )\n\n    try:\n        slack_client.conversations_unarchive(channel=channel_response[\"channel\"][\"id\"])\n    except Exception:\n        # Channel is already unarchived\n        pass\n    try:\n        slack_client.conversations_invite(\n            channel=channel_response[\"channel\"][\"id\"],\n            users=[admin_user_id],\n        )\n    except Exception:\n        pass\n\n    final_channel = channel_response[\"channel\"] if channel_response else {}\n    return cast(ChannelType, final_channel)\n\n\nclass SlackManager:\n    @staticmethod\n    def get_slack_client(token: str) -> WebClient:\n        return WebClient(token=token)\n\n    @staticmethod\n    def get_and_provision_available_slack_channels(\n        slack_client: WebClient, admin_user_id: str\n    ) -> tuple[ChannelType, ChannelType, str]:\n        run_id = str(uuid4())\n        public_channels = _get_non_general_channels(\n            slack_client, get_private=False, get_public=True, only_get_done=True\n        )\n\n        first_available_channel = (\n            None if len(public_channels) < 1 else public_channels[0]\n        )\n        public_channel = _build_slack_channel_from_name(\n            slack_client=slack_client,\n            admin_user_id=admin_user_id,\n            suffix=run_id,\n            is_private=False,\n            channel=first_available_channel,\n        )\n        _delete_slack_conversation_messages(\n            slack_client=slack_client, channel=public_channel\n        )\n\n        private_channels = _get_non_general_channels(\n            slack_client, get_private=True, get_public=False, only_get_done=True\n        )\n        second_available_channel = (\n            None if len(private_channels) < 1 else private_channels[0]\n        )\n        private_channel = _build_slack_channel_from_name(\n            slack_client=slack_client,\n            admin_user_id=admin_user_id,\n            suffix=run_id,\n            is_private=True,\n            channel=second_available_channel,\n        )\n        _delete_slack_conversation_messages(\n            slack_client=slack_client, channel=private_channel\n        )\n\n        return public_channel, private_channel, run_id\n\n    @staticmethod\n    def build_slack_user_email_id_map(slack_client: WebClient) -> dict[str, str]:\n        users: list[dict[str, Any]] = []\n\n        for users_results in make_paginated_slack_api_call(\n            slack_client.users_list,\n        ):\n            users.extend(users_results.get(\"members\", []))\n\n        user_email_id_map = {}\n        for user in users:\n            if not (email := user.get(\"profile\", {}).get(\"email\")):\n                continue\n            if not (user_id := user.get(\"id\")):\n                raise ValueError(\"User ID is missing\")\n            user_email_id_map[email] = user_id\n        return user_email_id_map\n\n    @staticmethod\n    def set_channel_members(\n        slack_client: WebClient,\n        admin_user_id: str,\n        channel: ChannelType,\n        user_ids: list[str],\n    ) -> None:\n        _clear_slack_conversation_members(\n            slack_client=slack_client,\n            channel=channel,\n            admin_user_id=admin_user_id,\n        )\n        _add_slack_conversation_members(\n            slack_client=slack_client, channel=channel, member_ids=user_ids\n        )\n\n    @staticmethod\n    def add_message_to_channel(\n        slack_client: WebClient, channel: ChannelType, message: str\n    ) -> None:\n        channel_id = _get_slack_channel_id(channel)\n        slack_client.chat_postMessage(\n            channel=channel_id,\n            text=message,\n        )\n\n    @staticmethod\n    def remove_message_from_channel(\n        slack_client: WebClient, channel: ChannelType, message: str\n    ) -> None:\n        _delete_slack_conversation_messages(\n            slack_client=slack_client, channel=channel, message_to_delete=message\n        )\n\n    @staticmethod\n    def cleanup_after_test(\n        slack_client: WebClient,\n        test_id: str,\n    ) -> None:\n        channel_types = [\"private_channel\", \"public_channel\"]\n        channels: list[ChannelType] = []\n        for result in make_paginated_slack_api_call(\n            slack_client.conversations_list,\n            exclude_archived=False,\n            types=channel_types,\n        ):\n            channels.extend(result[\"channels\"])\n\n        for channel in channels:\n            if test_id not in channel.get(\"name\", \"\"):\n                continue\n            # \"done\" in the channel name indicates that this channel is free to be used for a new test\n            new_name = f\"done_{str(uuid4())}\"\n            try:\n                slack_client.conversations_rename(channel=channel[\"id\"], name=new_name)\n            except SlackApiError as e:\n                print(f\"Error renaming channel {channel['id']}: {e}\")\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/slack/test_permission_sync.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.slack.models import ChannelType\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.document_search import (\n    DocumentSearchManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\nfrom tests.integration.connector_job_tests.slack.conftest import SLACK_ADMIN_EMAIL\nfrom tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_1_EMAIL\nfrom tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_2_EMAIL\nfrom tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager\n\n\n# NOTE(rkuo): it isn't yet clear if the reason these were previously xfail'd\n# still exists. May need to xfail again if flaky (DAN-789)\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_slack_permission_sync(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],\n) -> None:\n    public_channel, private_channel = slack_perm_sync_test_setup\n\n    admin_user: DATestUser = UserManager.create(\n        email=SLACK_ADMIN_EMAIL,\n    )\n\n    test_user_1: DATestUser = UserManager.create(\n        email=SLACK_TEST_USER_1_EMAIL,\n    )\n\n    test_user_2: DATestUser = UserManager.create(\n        email=SLACK_TEST_USER_2_EMAIL,\n    )\n\n    bot_token = os.environ[\"SLACK_BOT_TOKEN_TEST_SPACE\"]\n    slack_client = SlackManager.get_slack_client(bot_token)\n    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)\n    admin_user_id = email_id_map[admin_user.email]\n\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    before = datetime.now(timezone.utc)\n    credential: DATestCredential = CredentialManager.create(\n        source=DocumentSource.SLACK,\n        credential_json={\n            \"slack_bot_token\": bot_token,\n        },\n        user_performing_action=admin_user,\n    )\n    connector: DATestConnector = ConnectorManager.create(\n        name=\"Slack\",\n        input_type=InputType.POLL,\n        source=DocumentSource.SLACK,\n        connector_specific_config={\n            \"channels\": [public_channel[\"name\"], private_channel[\"name\"]],\n            \"include_bot_messages\": True,\n        },\n        access_type=AccessType.SYNC,\n        groups=[],\n        user_performing_action=admin_user,\n    )\n    cc_pair: DATestCCPair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # Add test_user_1 and admin_user to the private channel\n    desired_channel_members = [admin_user, test_user_1]\n    SlackManager.set_channel_members(\n        slack_client=slack_client,\n        admin_user_id=admin_user_id,\n        channel=private_channel,\n        user_ids=[email_id_map[user.email] for user in desired_channel_members],\n    )\n\n    public_message = \"Steve's favorite number is 809752\"\n    private_message = \"Sara's favorite number is 346794\"\n\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=public_channel,\n        message=public_message,\n    )\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=private_channel,\n        message=private_message,\n    )\n\n    # Run indexing\n    before = datetime.now(timezone.utc)\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # Run permission sync. Since initial_index_should_sync=True for Slack,\n    # permissions were already set during indexing above — the explicit sync\n    # should find no changes to apply.\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=0,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n        should_wait_for_vespa_sync=False,\n    )\n\n    # Verify admin can see messages from both channels\n    admin_docs = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=admin_user,\n    )\n    assert public_message in admin_docs\n    assert private_message in admin_docs\n\n    # Verify test_user_2 can only see public channel messages\n    user_2_docs = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=test_user_2,\n    )\n    assert public_message in user_2_docs\n    assert private_message not in user_2_docs\n\n    # Verify test_user_1 can see both channels (member of private channel)\n    user_1_docs = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=test_user_1,\n    )\n    assert public_message in user_1_docs\n    assert private_message in user_1_docs\n\n    # Remove test_user_1 from the private channel\n    before = datetime.now(timezone.utc)\n    desired_channel_members = [admin_user]\n    SlackManager.set_channel_members(\n        slack_client=slack_client,\n        admin_user_id=admin_user_id,\n        channel=private_channel,\n        user_ids=[email_id_map[user.email] for user in desired_channel_members],\n    )\n\n    # Run permission sync\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n    )\n\n    # Verify test_user_1 can no longer see private channel after removal\n    user_1_docs = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=test_user_1,\n    )\n    assert public_message in user_1_docs\n    assert private_message not in user_1_docs\n\n\n# NOTE(rkuo): it isn't yet clear if the reason these were previously xfail'd\n# still exists. May need to xfail again if flaky (DAN-789)\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission tests are enterprise only\",\n)\ndef test_slack_group_permission_sync(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],\n) -> None:\n    \"\"\"\n    This test ensures that permission sync overrides onyx group access.\n    \"\"\"\n    public_channel, private_channel = slack_perm_sync_test_setup\n\n    admin_user: DATestUser = UserManager.create(\n        email=SLACK_ADMIN_EMAIL,\n    )\n\n    test_user_1: DATestUser = UserManager.create(\n        email=SLACK_TEST_USER_1_EMAIL,\n    )\n\n    # Create a user group and adding the non-admin user to it\n    user_group = UserGroupManager.create(\n        name=\"test_group\",\n        user_ids=[test_user_1.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group],\n        user_performing_action=admin_user,\n    )\n\n    bot_token = os.environ[\"SLACK_BOT_TOKEN_TEST_SPACE\"]\n    slack_client = SlackManager.get_slack_client(bot_token)\n    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)\n    admin_user_id = email_id_map[admin_user.email]\n\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Add only admin to the private channel\n    SlackManager.set_channel_members(\n        slack_client=slack_client,\n        admin_user_id=admin_user_id,\n        channel=private_channel,\n        user_ids=[admin_user_id],\n    )\n\n    before = datetime.now(timezone.utc)\n    credential = CredentialManager.create(\n        source=DocumentSource.SLACK,\n        credential_json={\n            \"slack_bot_token\": bot_token,\n        },\n        user_performing_action=admin_user,\n    )\n\n    # Create connector with sync access and assign it to the user group\n    connector = ConnectorManager.create(\n        name=\"Slack\",\n        input_type=InputType.POLL,\n        source=DocumentSource.SLACK,\n        connector_specific_config={\n            \"channels\": [private_channel[\"name\"]],\n            \"include_bot_messages\": True,\n        },\n        access_type=AccessType.SYNC,\n        groups=[user_group.id],\n        user_performing_action=admin_user,\n    )\n\n    cc_pair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n        groups=[user_group.id],\n    )\n\n    # Add a test message to the private channel\n    private_message = \"This is a secret message: 987654\"\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=private_channel,\n        message=private_message,\n    )\n\n    # Run indexing\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # Run permission sync. Since initial_index_should_sync=True for Slack,\n    # permissions were already set during indexing — no changes expected.\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=0,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n        should_wait_for_vespa_sync=False,\n    )\n\n    # Verify admin can see the message\n    admin_docs = DocumentSearchManager.search_documents(\n        query=\"secret message\",\n        user_performing_action=admin_user,\n    )\n    assert private_message in admin_docs\n\n    # Verify test_user_1 cannot see the message despite being in the group\n    # (Slack permissions should take precedence)\n    user_1_docs = DocumentSearchManager.search_documents(\n        query=\"secret message\",\n        user_performing_action=test_user_1,\n    )\n    assert private_message not in user_1_docs\n"
  },
  {
    "path": "backend/tests/integration/connector_job_tests/slack/test_prune.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.slack.models import ChannelType\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.document_search import (\n    DocumentSearchManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestConnector\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\nfrom tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager\n\n\n@pytest.mark.xfail(reason=\"flaky - see DAN-986 for details\", strict=False)\ndef test_slack_prune(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    slack_test_setup: tuple[ChannelType, ChannelType],\n) -> None:\n    public_channel, private_channel = slack_test_setup\n\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(\n        email=\"admin@example.com\",\n    )\n\n    # Creating a non-admin user\n    test_user_1: DATestUser = UserManager.create(\n        email=\"test_user_1@example.com\",\n    )\n\n    slack_client = SlackManager.get_slack_client(os.environ[\"SLACK_BOT_TOKEN\"])\n    email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)\n    admin_user_id = email_id_map[admin_user.email]\n\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    before = datetime.now(timezone.utc)\n    credential: DATestCredential = CredentialManager.create(\n        source=DocumentSource.SLACK,\n        credential_json={\n            \"slack_bot_token\": os.environ[\"SLACK_BOT_TOKEN\"],\n        },\n        user_performing_action=admin_user,\n    )\n    connector: DATestConnector = ConnectorManager.create(\n        name=\"Slack\",\n        input_type=InputType.POLL,\n        source=DocumentSource.SLACK,\n        connector_specific_config={\n            \"channels\": [public_channel[\"name\"], private_channel[\"name\"]],\n        },\n        access_type=AccessType.PUBLIC,\n        groups=[],\n        user_performing_action=admin_user,\n    )\n    cc_pair: DATestCCPair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # ----------------------SETUP INITIAL SLACK STATE--------------------------\n    # Add test_user_1 and admin_user to the private channel\n    desired_channel_members = [admin_user, test_user_1]\n    SlackManager.set_channel_members(\n        slack_client=slack_client,\n        admin_user_id=admin_user_id,\n        channel=private_channel,\n        user_ids=[email_id_map[user.email] for user in desired_channel_members],\n    )\n\n    public_message = \"Steve's favorite number is 809752\"\n    private_message = \"Sara's favorite number is 346794\"\n    message_to_delete = \"Rebecca's favorite number is 753468\"\n\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=public_channel,\n        message=public_message,\n    )\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=private_channel,\n        message=private_message,\n    )\n    SlackManager.add_message_to_channel(\n        slack_client=slack_client,\n        channel=private_channel,\n        message=message_to_delete,\n    )\n\n    # Run indexing\n    before = datetime.now(timezone.utc)\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # Run permission sync\n    before = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=admin_user,\n    )\n\n    # ----------------------TEST THE SETUP--------------------------\n    # Search as admin with access to both channels\n    onyx_doc_message_strings = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=admin_user,\n    )\n    print(\n        \"\\ntop_documents content before deleting for admin: \",\n        onyx_doc_message_strings,\n    )\n\n    # Ensure admin user can see all messages\n    assert public_message in onyx_doc_message_strings\n    assert private_message in onyx_doc_message_strings\n    assert message_to_delete in onyx_doc_message_strings\n\n    # Search as test_user_1 with access to both channels\n    onyx_doc_message_strings = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=test_user_1,\n    )\n    print(\n        \"\\ntop_documents content before deleting for test_user_1: \",\n        onyx_doc_message_strings,\n    )\n\n    # Ensure test_user_1 can see all messages\n    assert public_message in onyx_doc_message_strings\n    assert private_message in onyx_doc_message_strings\n    assert message_to_delete in onyx_doc_message_strings\n\n    # ----------------------MAKE THE CHANGES--------------------------\n    # Delete messages\n    print(\"\\nDeleting message: \", message_to_delete)\n    SlackManager.remove_message_from_channel(\n        slack_client=slack_client,\n        channel=private_channel,\n        message=message_to_delete,\n    )\n\n    # Prune the cc_pair\n    now = datetime.now(timezone.utc)\n    CCPairManager.prune(cc_pair, user_performing_action=admin_user)\n    CCPairManager.wait_for_prune(cc_pair, now, user_performing_action=admin_user)\n\n    # ----------------------------VERIFY THE CHANGES---------------------------\n    # Ensure admin user can't see deleted messages\n    # Search as admin user with access to only the public channel\n    onyx_doc_message_strings = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=admin_user,\n    )\n    print(\n        \"\\ntop_documents content after deleting for admin: \",\n        onyx_doc_message_strings,\n    )\n\n    # Ensure admin can't see deleted messages\n    assert public_message in onyx_doc_message_strings\n    assert private_message in onyx_doc_message_strings\n    assert message_to_delete not in onyx_doc_message_strings\n\n    # Ensure test_user_1 can't see deleted messages\n    # Search as test_user_1 with access to only the public channel\n    onyx_doc_message_strings = DocumentSearchManager.search_documents(\n        query=\"favorite number\",\n        user_performing_action=test_user_1,\n    )\n    print(\n        \"\\ntop_documents content after prune for test_user_1: \",\n        onyx_doc_message_strings,\n    )\n\n    # Ensure test_user_1 can't see deleted messages\n    assert public_message in onyx_doc_message_strings\n    assert private_message in onyx_doc_message_strings\n    assert message_to_delete not in onyx_doc_message_strings\n"
  },
  {
    "path": "backend/tests/integration/mock_services/docker-compose.mock-it-services.yml",
    "content": "version: '3.8'\n\nservices:\n  mock_connector_server:\n    build:\n      context: ./mock_connector_server\n      dockerfile: Dockerfile\n    ports:\n      - \"8001:8001\"\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:8001/health\"]\n      interval: 10s\n      timeout: 5s\n      retries: 5\n    networks:\n      - onyx_default\nnetworks:\n  onyx_default:\n    name: onyx_default\n    external: true\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py",
    "content": "import sys\n\nimport uvicorn\nfrom fastapi import FastAPI\nfrom fastapi.responses import PlainTextResponse\nfrom fastmcp import FastMCP\nfrom fastmcp.server.auth import StaticTokenVerifier\n\n\ndef make_many_tools(mcp: FastMCP) -> None:\n    def make_tool(i: int) -> None:\n        @mcp.tool(name=f\"tool_{i}\", description=f\"Get secret value {i}\")\n        def tool_name(name: str) -> str:  # noqa: ARG001\n            \"\"\"Get secret value.\"\"\"\n            return f\"Secret value {200 - i}!\"\n\n    for i in range(100):\n        make_tool(i)\n\n\nif __name__ == \"__main__\":\n    # Accept only these tokens (treat them like API keys) and require a scope\n    if len(sys.argv) > 1:\n        api_key = sys.argv[1]\n    else:\n        api_key = \"dev-api-key-123\"\n\n    if len(sys.argv) > 2:\n        port = int(sys.argv[2])\n    else:\n        port = 8001\n\n    auth = StaticTokenVerifier(\n        tokens={\n            api_key: {\"client_id\": \"evan\", \"scopes\": [\"mcp:use\"]},\n        },\n        required_scopes=[\"mcp:use\"],\n    )\n\n    # Create FastMCP instance - it will handle /mcp path internally\n    mcp = FastMCP(\"My HTTP MCP\", auth=auth)\n    make_many_tools(mcp)\n\n    # Get the MCP HTTP app (configured to serve at /mcp)\n    mcp_app = mcp.http_app()\n\n    # Create wrapper FastAPI app with the MCP app's lifespan\n    app = FastAPI(title=\"MCP API Key Test Server\", lifespan=mcp_app.lifespan)\n\n    # Health check (unprotected)\n    @app.get(\"/healthz\")\n    def health() -> PlainTextResponse:\n        return PlainTextResponse(\"ok\")\n\n    # Mount MCP app at root - it handles /mcp internally\n    app.mount(\"/\", mcp_app)\n\n    # Run the server\n    uvicorn.run(app, host=\"0.0.0.0\", port=port)\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_google_oauth.py",
    "content": "\"\"\"\nMCP Test Server for Google OAuth Pass-Through Authentication\n\nThis server validates Google OAuth access tokens that are passed through from\nOnyx. When users log into Onyx with Google OAuth, their access token is stored\nand can be passed to MCP servers that require authentication.\n\nThis server validates those tokens by calling Google's tokeninfo endpoint.\n\nUsage:\n    python run_mcp_server_google_oauth.py [port]\n\nEnvironment Variables:\n    MCP_SERVER_HOST: Host to bind to (default: 127.0.0.1)\n    MCP_SERVER_PUBLIC_HOST: Public hostname for the server\n    MCP_SERVER_PUBLIC_URL: Public URL for the server (e.g., for proxied setups)\n\"\"\"\n\nimport os\nimport sys\nfrom typing import Any\n\nimport httpx\nimport uvicorn\nfrom fastapi import FastAPI\nfrom fastapi.responses import PlainTextResponse\nfrom fastmcp import FastMCP\nfrom fastmcp.server.auth import AccessToken\nfrom fastmcp.server.auth import TokenVerifier\nfrom fastmcp.server.dependencies import get_access_token\n\n# Google's tokeninfo endpoint for validating access tokens\nGOOGLE_TOKENINFO_URL = \"https://oauth2.googleapis.com/tokeninfo\"\n\n\nclass GoogleOAuthTokenVerifier(TokenVerifier):\n    \"\"\"\n    Token verifier that validates Google OAuth access tokens.\n\n    Google access tokens are opaque tokens (not JWTs), so they need to be\n    validated by calling Google's tokeninfo endpoint. This verifier makes\n    an HTTP request to Google to validate the token and extract user info.\n\n    This is useful for testing pass-through OAuth scenarios where Onyx\n    forwards the user's Google OAuth token to an MCP server.\n    \"\"\"\n\n    def __init__(\n        self,\n        required_scopes: list[str] | None = None,\n        base_url: str | None = None,\n    ):\n        \"\"\"\n        Initialize the Google OAuth token verifier.\n\n        Args:\n            required_scopes: Optional list of scopes that must be present in the token.\n                            Google tokens have scopes like 'openid', 'email', 'profile'.\n            base_url: URL of this resource server (for RFC 8707)\n        \"\"\"\n        super().__init__(\n            base_url=base_url,\n            required_scopes=required_scopes,\n        )\n        self._http_client: httpx.AsyncClient | None = None\n\n    async def _get_http_client(self) -> httpx.AsyncClient:\n        \"\"\"Get or create the HTTP client for token validation.\"\"\"\n        if self._http_client is None or self._http_client.is_closed:\n            self._http_client = httpx.AsyncClient(timeout=10.0)\n        return self._http_client\n\n    async def verify_token(self, token: str) -> AccessToken | None:\n        \"\"\"\n        Verify a Google OAuth access token by calling Google's tokeninfo endpoint.\n\n        Args:\n            token: The Google OAuth access token to validate\n\n        Returns:\n            AccessToken object if valid, None if invalid or expired\n        \"\"\"\n        try:\n            client = await self._get_http_client()\n\n            # Call Google's tokeninfo endpoint\n            response = await client.get(\n                GOOGLE_TOKENINFO_URL,\n                params={\"access_token\": token},\n            )\n\n            if response.status_code != 200:\n                # Token is invalid or expired\n                return None\n\n            token_info = response.json()\n\n            # Check if token has an error (Google returns 200 with error field sometimes)\n            if \"error\" in token_info:\n                return None\n\n            # Extract scopes from the token\n            scopes_str = token_info.get(\"scope\", \"\")\n            scopes = scopes_str.split() if scopes_str else []\n\n            # Check required scopes if configured\n            if self.required_scopes:\n                token_scopes = set(scopes)\n                required = set(self.required_scopes)\n                if not required.issubset(token_scopes):\n                    return None\n\n            # Extract client/user ID - prefer email for user identification\n            client_id = (\n                token_info.get(\"email\")\n                or token_info.get(\"sub\")\n                or token_info.get(\"user_id\")\n                or \"unknown\"\n            )\n\n            # Extract expiration time\n            expires_in = token_info.get(\"expires_in\")\n            expires_at = None\n            if expires_in:\n                import time\n\n                expires_at = int(time.time()) + int(expires_in)\n\n            return AccessToken(\n                token=token,\n                client_id=client_id,\n                scopes=scopes,\n                expires_at=expires_at,\n                claims=token_info,\n            )\n\n        except httpx.HTTPError:\n            # Network error or timeout\n            return None\n        except Exception:\n            # Any other error during validation\n            return None\n\n    async def close(self) -> None:\n        \"\"\"Close the HTTP client.\"\"\"\n        if self._http_client and not self._http_client.is_closed:\n            await self._http_client.aclose()\n\n\ndef make_tools(mcp: FastMCP) -> None:\n    \"\"\"Create test tools for the MCP server.\"\"\"\n\n    @mcp.tool(name=\"echo\", description=\"Echo back the input message\")\n    def echo(message: str) -> str:\n        \"\"\"Echo the message back to the caller.\"\"\"\n        return f\"You said: {message}\"\n\n    @mcp.tool(name=\"get_secret\", description=\"Get a secret value (requires auth)\")\n    def get_secret(secret_name: str) -> str:\n        \"\"\"Get a secret value. This proves the token was validated.\"\"\"\n        return f\"Secret value for '{secret_name}': super-secret-value-12345\"\n\n    @mcp.tool(name=\"whoami\", description=\"Get information about the authenticated user\")\n    async def whoami() -> dict[str, Any]:\n        \"\"\"Get information about the authenticated user from their Google token.\"\"\"\n        tok = get_access_token()\n        if not tok:\n            return {\"error\": \"Not authenticated\"}\n\n        return {\n            \"client_id\": tok.client_id,\n            \"scopes\": tok.scopes,\n            \"email\": tok.claims.get(\"email\"),\n            \"email_verified\": tok.claims.get(\"email_verified\"),\n            \"expires_in\": tok.claims.get(\"expires_in\"),\n            \"access_type\": tok.claims.get(\"access_type\"),\n        }\n\n    for i in range(5):\n\n        @mcp.tool(name=f\"oauth_tool_{i}\", description=f\"Test tool number {i}\")\n        def numbered_tool(name: str, _i: int = i) -> str:\n            \"\"\"A numbered test tool.\"\"\"\n            return f\"Tool {_i} says hello to {name}!\"\n\n\nif __name__ == \"__main__\":\n    port = int(sys.argv[1] if len(sys.argv) > 1 else \"8006\")\n\n    # Get configuration from environment\n    bind_host = os.getenv(\"MCP_SERVER_HOST\", \"127.0.0.1\")\n    public_host = os.getenv(\"MCP_SERVER_PUBLIC_HOST\", bind_host)\n    public_url = os.getenv(\"MCP_SERVER_PUBLIC_URL\")\n\n    # Optional: require specific scopes (Google tokens have scopes like 'email', 'profile')\n    # Leave empty to accept any valid Google token\n    required_scopes_str = os.getenv(\"MCP_GOOGLE_REQUIRED_SCOPES\", \"\")\n    required_scopes = (\n        required_scopes_str.split(\",\") if required_scopes_str.strip() else None\n    )\n\n    print(f\"Starting Google OAuth MCP Test Server on port {port}\")\n    print(f\"Bind host: {bind_host}\")\n    print(f\"Public host: {public_host}\")\n    if public_url:\n        print(f\"Public URL: {public_url}\")\n    if required_scopes:\n        print(f\"Required scopes: {required_scopes}\")\n    else:\n        print(\"No specific scopes required - any valid Google token accepted\")\n\n    # Create the auth verifier\n    auth = GoogleOAuthTokenVerifier(required_scopes=required_scopes)\n\n    # Create FastMCP instance with auth\n    mcp = FastMCP(\"Google OAuth Test MCP Server\", auth=auth)\n    make_tools(mcp)\n\n    # Get the MCP HTTP app\n    mcp_app = mcp.http_app()\n\n    # Create wrapper FastAPI app\n    app = FastAPI(\n        title=\"MCP Google OAuth Test Server\",\n        description=\"MCP server that authenticates using Google OAuth tokens passed through from Onyx\",\n        lifespan=mcp_app.lifespan,\n    )\n\n    # Health check (unprotected)\n    @app.get(\"/healthz\")\n    def health() -> PlainTextResponse:\n        return PlainTextResponse(\"ok\")\n\n    # Info endpoint (unprotected) - useful for debugging\n    @app.get(\"/info\")\n    def info() -> dict[str, Any]:\n        return {\n            \"server\": \"Google OAuth MCP Test Server\",\n            \"auth_type\": \"google_oauth_pass_through\",\n            \"description\": \"Validates Google OAuth tokens passed from Onyx\",\n            \"tokeninfo_endpoint\": GOOGLE_TOKENINFO_URL,\n            \"required_scopes\": required_scopes,\n        }\n\n    # Mount MCP app at root\n    app.mount(\"/\", mcp_app)\n\n    # Run the server\n    uvicorn.run(app, host=bind_host, port=port)\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_no_auth.py",
    "content": "import os\nimport sys\n\nfrom fastmcp import FastMCP\n\nmcp = FastMCP(\"My HTTP MCP\")\n\n\n@mcp.tool\ndef hello(name: str) -> str:\n    \"\"\"Say hi.\"\"\"\n    return f\"Hello, {name}!\"\n\n\ndef make_many_tools() -> None:\n    def make_tool(i: int) -> None:\n        @mcp.tool(name=f\"tool_{i}\", description=f\"Get secret value {i}\")\n        def tool_name(name: str) -> str:  # noqa: ARG001\n            \"\"\"Get secret value.\"\"\"\n            return f\"Secret value {100 - i}!\"\n\n    for i in range(100):\n        make_tool(i)\n\n\nif __name__ == \"__main__\":\n    # Get port from command-line argument first (passed by test)\n    port_from_arg = int(sys.argv[1]) if len(sys.argv) > 1 else None\n    # Streamable HTTP transport (recommended)\n    make_many_tools()\n    host = os.getenv(\"MCP_SERVER_BIND_HOST\", \"0.0.0.0\")\n    # Use MOCK_MCP_SERVER_PORT to avoid conflicts with the real Onyx MCP server port (8090)\n    # Priority: command-line arg > MOCK_MCP_SERVER_PORT > MCP_SERVER_PORT > default 8000\n    if port_from_arg is not None:\n        port = port_from_arg\n    else:\n        port = int(\n            os.getenv(\"MOCK_MCP_SERVER_PORT\") or os.getenv(\"MCP_SERVER_PORT\") or \"8000\"\n        )\n    path = os.getenv(\"MCP_SERVER_PATH\", \"/mcp\")\n    mcp.run(transport=\"http\", host=host, port=port, path=path)\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py",
    "content": "import os\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom collections.abc import Iterable\nfrom typing import Any\nfrom urllib.parse import urlsplit\nfrom urllib.parse import urlunsplit\n\nimport uvicorn\nfrom fastapi import FastAPI\nfrom fastapi import Request\nfrom fastapi.responses import JSONResponse\nfrom fastapi.responses import PlainTextResponse\nfrom fastapi.responses import Response\nfrom fastmcp import FastMCP\nfrom fastmcp.server.auth.providers.jwt import JWTVerifier\nfrom fastmcp.server.dependencies import get_access_token\nfrom starlette.middleware.base import BaseHTTPMiddleware\n\n# uncomment for debug logs\n# logging.basicConfig(level=logging.DEBUG)\n\n\"\"\"\nSetup Okta:\n1. Create an authorization Server (Admin Console → Security →\nAPI → Authorization Servers), and get the Issuer, JWKS uri,\naudience (i.e. api://mcp). Add the mcp:use scope.\nGrant types should be Authorization Code and Refresh Token.\npolicy should allow your client (or All Clients) to grant oidc default scopes + mcp:use\nWARNING: due to the order of discovery urls, you actually need to use the default authorization server\nuntil Okta updates where their discovery urls are or the client library stops trying\nto go to <base_url>/.well-known/oauth-authorization-server before trying the fallback\n\n2. Create a client (Admin Console → Applications → Create App Integration)\nEnable authorization code and store the client id and secret.\n\"\"\"\n\n\ndef make_many_tools(mcp: FastMCP) -> None:\n    def make_tool(i: int) -> None:\n        @mcp.tool(name=f\"tool_{i}\", description=f\"Get secret value {i}\")\n        def tool_name(name: str) -> str:  # noqa: ARG001\n            \"\"\"Get secret value.\"\"\"\n            return f\"Secret value {500 - i}!\"\n\n    for i in range(100):\n        make_tool(i)\n\n    @mcp.tool\n    async def whoami() -> dict[str, Any]:\n        tok = get_access_token()  # None if unauthenticated\n        return {\n            \"client_id\": tok.client_id if tok else None,\n            \"scopes\": tok.scopes if tok else [],\n            \"claims\": tok.claims if tok else {},\n        }\n\n\n# ---------- FASTAPI APP ----------\n\n\ndef init_app(\n    app: FastAPI,\n    mcp_resource_url: str,\n    authorization_servers: list[str],\n    scopes_supported: list[str],\n) -> None:\n    # 1) Protected Resource Metadata (RFC 9728) at well-known URL.\n    #    We accept both with and without the trailing resource suffix to be lenient in dev.\n    @app.get(\"/.well-known/oauth-protected-resource\")\n    @app.get(\"/.well-known/oauth-protected-resource/{_suffix:path}\")\n    def oauth_protected_resource(_suffix: str = \"\") -> JSONResponse:\n        \"\"\"\n        Return PRM document. The 'resource' MUST equal the MCP resource identifier (the URL clients use),\n        and should be validated by clients per RFC 9728 §3.3.\n        \"\"\"\n        return JSONResponse(\n            {\n                \"resource\": mcp_resource_url,\n                \"authorization_servers\": authorization_servers,\n                \"bearer_methods_supported\": [\"header\"],\n                \"scopes_supported\": scopes_supported,\n                # (Optional extras: jwks_uri, resource_signing_alg_values_supported, etc.)\n            }\n        )\n\n    # Health check (unprotected)\n    @app.get(\"/healthz\")\n    def health() -> PlainTextResponse:\n        return PlainTextResponse(\"ok\")\n\n\ndef metadata_url_for_resource(resource_url: str) -> str:\n    \"\"\"\n    RFC 9728: insert '/.well-known/oauth-protected-resource' between host and path.\n    If the resource has a path (e.g., '/mcp'), append it after the well-known suffix.\n    \"\"\"\n    u = urlsplit(resource_url)\n    path = u.path.lstrip(\"/\")\n    suffix = \"/.well-known/oauth-protected-resource\"\n    if path:\n        suffix += f\"/{path}\"\n    return urlunsplit((u.scheme, u.netloc, suffix, \"\", \"\"))\n\n\nPRM_URL = \"replace me\"\n\n\n# 2) Middleware that ensures 401s include a proper WWW-Authenticate challenge\n#    pointing clients to our PRM URL (RFC 9728 §5.1), and includes RFC 6750 error info.\nclass WWWAuthenticateMiddleware(BaseHTTPMiddleware):\n    def __init__(self, app: FastAPI, protected_prefixes: Iterable[str]) -> None:\n        super().__init__(app)\n        self.protected_prefixes = tuple(protected_prefixes)\n\n    async def dispatch(\n        self, request: Request, call_next: Callable[[Request], Awaitable[Response]]\n    ) -> Response:\n        # Only guard MCP endpoints (both Streamable HTTP and SSE)\n        if not request.url.path.startswith(self.protected_prefixes):\n            return await call_next(request)\n\n        # Let FastMCP/verifier run first\n        response = await call_next(request)\n\n        # If unauthenticated or invalid token, attach RFC-compliant challenge header\n        if response.status_code == 401:\n            # RFC 9728: include resource_metadata param pointing to PRM URL.\n            # RFC 6750: include error + error_description when appropriate.\n            challenge = f'Bearer resource_metadata=\"{PRM_URL}\", error=\"invalid_token\", error_description=\"Authentication required\"'\n            # Don't clobber if already present; append or set.\n            if \"www-authenticate\" in response.headers:\n                response.headers[\"www-authenticate\"] += \", \" + challenge\n            else:\n                response.headers[\"www-authenticate\"] = challenge\n            # Helpful cache headers\n            response.headers.setdefault(\"cache-control\", \"no-store\")\n            response.headers.setdefault(\"pragma\", \"no-cache\")\n        return response\n\n\nif __name__ == \"__main__\":\n    import sys\n\n    port = int(sys.argv[1] if len(sys.argv) > 1 else \"8004\")\n\n    audience = os.getenv(\"MCP_OAUTH_AUDIENCE\", \"api://mcp\")\n    issuer = os.getenv(\n        \"MCP_OAUTH_ISSUER\",\n        \"https://test-domain.okta.com/oauth2/default?well_known_override=https://test-domain.okta.com/oauth2/<as_id>/.well-known/oauth-authorization-server\",\n    )  # NOTE: the mcp client library currently tries the root discovery url before\n    # falling back to the one actually used by Okta. Our client code lets you specify this well_known_override\n    # for Okta and other Idps that use these discovery urls.\n\n    # issuer = os.getenv(\"MCP_OAUTH_ISSUER\", \"https://test-domain.okta.com/.well-known/oauth-authorization-server?issuer=https://test-domain.okta.com/oauth2/<auth_server_id>\")\n    jwks_uri = os.getenv(\n        \"MCP_OAUTH_JWKS_URI\", \"https://test-domain.okta.com/oauth2/default/v1/keys\"\n    )\n    required_scopes = os.getenv(\"MCP_OAUTH_REQUIRED_SCOPES\", \"mcp:use\")\n    print(f\"Required scopes: {required_scopes}\")\n    print(f\"Audience: {audience}\")\n    print(f\"Issuer: {issuer}\")\n    print(f\"JWKS URI: {jwks_uri}\")\n\n    verifier = JWTVerifier(\n        issuer=issuer.split(\"?\")[0],  # ignore auth url override if present\n        audience=audience,  # exactly what you set on the AS\n        jwks_uri=jwks_uri,\n        required_scopes=required_scopes.split(\n            \",\"\n        ),  # must be present in the token's `scp`\n    )\n\n    bind_host = os.getenv(\"MCP_SERVER_HOST\", \"127.0.0.1\")\n    public_host = os.getenv(\"MCP_SERVER_PUBLIC_HOST\", bind_host)\n    public_url = os.getenv(\"MCP_SERVER_PUBLIC_URL\")\n\n    mcp = FastMCP(\"My HTTP MCP\", auth=verifier)\n    make_many_tools(mcp)\n    mcp_app = mcp.http_app()\n\n    app = FastAPI(title=\"MCP over HTTP/SSE with OAuth\", lifespan=mcp_app.lifespan)\n\n    if public_url:\n        normalized_public_url = public_url.rstrip(\"/\")\n        if not normalized_public_url.endswith(\"/mcp\"):\n            normalized_public_url = f\"{normalized_public_url}/mcp\"\n        mcp_resource_url = f\"{normalized_public_url}/\"\n    else:\n        mcp_resource_url = f\"http://{public_host}:{port}/mcp/\"\n    authorization_servers = [issuer]\n    scopes_supported = [\"mcp:use\"]\n\n    init_app(app, mcp_resource_url, authorization_servers, scopes_supported)\n    PRM_URL = metadata_url_for_resource(mcp_resource_url)\n    print(f\"PRM URL: {PRM_URL}\")\n    print(f\"MCP Resource URL: {mcp_resource_url}\")\n    print(f\"Authorization Servers: {authorization_servers}\")\n    print(f\"Scopes Supported: {scopes_supported}\")\n\n    # Apply middleware at the parent app so it wraps mounted sub-apps too\n    app.add_middleware(WWWAuthenticateMiddleware, protected_prefixes=[\"/mcp\", \"/sse\"])\n\n    # 3) Mount MCP apps\n    # Streamable HTTP transport (recommended for modern MCP clients)\n    app.mount(\"/\", mcp_app)\n    # SSE transport (some clients still use this)\n    # app.mount(\"/sse\", mcp.sse_app()) # TODO: v2\n\n    uvicorn.run(app, host=bind_host, port=port)\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_per_user_key.py",
    "content": "import sys\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import Dict\nfrom typing import Optional\n\nimport bcrypt\nfrom fastmcp import FastMCP\nfrom fastmcp.server.auth.auth import AccessToken\nfrom fastmcp.server.auth.auth import TokenVerifier\nfrom fastmcp.server.dependencies import get_access_token\n\n# pip install fastmcp bcrypt\n\n\n# ---- pretend database --------------------------------------------------------\n# Keys look like: \"mcp_live_<key_id>_<secret>\"\ndef _hash(secret: str) -> bytes:\n    return bcrypt.hashpw(secret.encode(), bcrypt.gensalt(rounds=12))\n\n\nAPI_KEY_RECORDS: Dict[str, Dict[str, Any]] = {\n    # key_id -> record\n    \"kid_alice_001\": {\n        \"user_id\": \"alice\",\n        \"hashed_secret\": _hash(\"S3cr3tAlice\"),\n        \"scopes\": [\"mcp:use\"],\n        \"revoked\": False,\n        \"expires_at\": None,  # or datetime(...)\n        \"metadata\": {\"plan\": \"pro\"},\n    },\n    \"kid_bob_001\": {\n        \"user_id\": \"bob\",\n        \"hashed_secret\": _hash(\"S3cr3tBob\"),\n        \"scopes\": [\"mcp:use\"],\n        \"revoked\": False,\n        \"expires_at\": None,\n        \"metadata\": {\"plan\": \"free\"},\n    },\n}\n\n# These are inferrable from the file anyways, no need to obfuscate.\n# use them to test your auth with this server\n#\n# mcp_live-kid_alice_001-S3cr3tAlice\n# mcp_live-kid_bob_001-S3cr3tBob\n\n\n# ---- verifier ---------------------------------------------------------------\nclass ApiKeyVerifier(TokenVerifier):\n    \"\"\"\n    Accepts API keys in Authorization: Bearer mcp_live_<key_id>_<secret>\n    Looks up <key_id> in storage, bcrypt-verifies <secret>, returns AccessToken.\n    \"\"\"\n\n    def __init__(self, api_key_dict: dict[str, Any]):\n        super().__init__()\n        self.api_key_dict = api_key_dict\n\n    async def verify_token(self, token: str) -> Optional[AccessToken]:\n        # print(f\"Verifying token: {token}\")\n        try:\n            prefix, key_id, secret = token.split(\"-\")\n            # print(f\"Prefix: {prefix}, Key ID: {key_id}, Secret: {secret}\")\n            if prefix not in (\"mcp_live\", \"mcp_test\"):\n                return None\n        except ValueError:\n            return None\n\n        rec = self.api_key_dict.get(key_id)\n        if not rec or rec.get(\"revoked\"):\n            return None\n        if rec.get(\"expires_at\") and rec[\"expires_at\"] < datetime.now(timezone.utc):\n            return None\n\n        # constant-time bcrypt verification\n        if not bcrypt.checkpw(secret.encode(), rec[\"hashed_secret\"]):\n            return None\n\n        # Build an AccessToken with claims FastMCP can pass to your tools\n        return AccessToken(\n            token=token,\n            client_id=rec[\"user_id\"],\n            scopes=rec.get(\"scopes\", []),\n            expires_at=rec.get(\"expires_at\"),\n            resource=None,\n            claims={\"key_id\": key_id, **rec.get(\"metadata\", {})},\n        )\n\n\n# ---- server -----------------------------------------------------------------\n\n\ndef make_many_tools(mcp: FastMCP) -> None:\n    def make_tool(i: int) -> None:\n        @mcp.tool(name=f\"tool_{i}\", description=f\"Get secret value {i}\")\n        def tool_name(name: str) -> str:  # noqa: ARG001\n            \"\"\"Get secret value.\"\"\"\n            return f\"Secret value {400 - i}!\"\n\n    for i in range(100):\n        make_tool(i)\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) > 1:\n        port = int(sys.argv[1])\n    else:\n        port = 8003\n\n    mcp = FastMCP(\"My HTTP MCP\", auth=ApiKeyVerifier(API_KEY_RECORDS))\n\n    @mcp.tool\n    def whoami() -> dict:\n        \"\"\"Return authenticated identity info (for demo).\"\"\"\n        # FastMCP exposes the verified AccessToken to tools; see docs for helpers\n        tok = get_access_token()\n        return {\n            \"user\": tok.client_id if tok else None,\n            \"scopes\": tok.scopes if tok else [],\n        }\n\n    make_many_tools(mcp)\n    mcp.run(transport=\"http\", host=\"127.0.0.1\", port=port, path=\"/mcp\")\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mock_connector_server/Dockerfile",
    "content": "FROM python:3.11.7-slim-bookworm\n\nWORKDIR /app\n\nRUN pip install --no-cache-dir \"pydantic-core>=2.28.0\" fastapi uvicorn\n\nCOPY ./main.py /app/main.py\n\nCMD [\"uvicorn\", \"main:app\", \"--host\", \"0.0.0.0\", \"--port\", \"8001\"]\n"
  },
  {
    "path": "backend/tests/integration/mock_services/mock_connector_server/main.py",
    "content": "from fastapi import FastAPI\nfrom fastapi import HTTPException\nfrom pydantic import BaseModel\nfrom pydantic import Field\n\n# We would like to import these, but it makes building this so much harder/slower\n# from onyx.connectors.mock_connector.connector import SingleConnectorYield\n# from onyx.connectors.models import ConnectorCheckpoint\n\napp = FastAPI()\n\n\n# Global state to store connector behavior configuration\nclass ConnectorBehavior(BaseModel):\n    connector_yields: list[dict] = Field(\n        default_factory=list\n    )  # really list[SingleConnectorYield]\n    called_with_checkpoints: list[dict] = Field(\n        default_factory=list\n    )  # really list[ConnectorCheckpoint]\n\n\ncurrent_behavior: ConnectorBehavior = ConnectorBehavior()\n\n\n@app.post(\"/set-behavior\")\nasync def set_behavior(behavior: list[dict]) -> None:\n    \"\"\"Set the behavior for the next connector run\"\"\"\n    global current_behavior\n    current_behavior = ConnectorBehavior(connector_yields=behavior)\n\n\n@app.get(\"/get-documents\")\nasync def get_documents() -> list[dict]:\n    \"\"\"Get the next batch of documents and update the checkpoint\"\"\"\n    global current_behavior\n\n    if not current_behavior.connector_yields:\n        raise HTTPException(\n            status_code=400, detail=\"No documents or failures configured\"\n        )\n\n    connector_yields = current_behavior.connector_yields\n\n    # Clear the current behavior after returning it\n    current_behavior = ConnectorBehavior()\n\n    return connector_yields\n\n\n@app.post(\"/add-checkpoint\")\nasync def add_checkpoint(checkpoint: dict) -> None:\n    \"\"\"Add a checkpoint to the list of checkpoints. Called by the MockConnector.\"\"\"\n    global current_behavior\n    current_behavior.called_with_checkpoints.append(checkpoint)\n\n\n@app.get(\"/get-checkpoints\")\nasync def get_checkpoints() -> list[dict]:\n    \"\"\"Get the list of checkpoints. Used by the test to verify the\n    proper checkpoint ordering.\"\"\"\n    global current_behavior\n    return current_behavior.called_with_checkpoints\n\n\n@app.post(\"/reset\")\nasync def reset() -> None:\n    \"\"\"Reset the connector behavior to default\"\"\"\n    global current_behavior\n    current_behavior = ConnectorBehavior()\n\n\n@app.get(\"/health\")\nasync def health_check() -> dict[str, str]:\n    \"\"\"Health check endpoint\"\"\"\n    return {\"status\": \"healthy\"}\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/discord_bot/test_discord_bot_multitenant.py",
    "content": "\"\"\"Multi-tenant isolation tests for Discord bot.\n\nThese tests ensure tenant isolation and prevent data leakage between tenants.\nTests follow the multi-tenant integration test pattern using API requests.\n\"\"\"\n\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nimport requests\n\nfrom onyx.configs.constants import AuthType\nfrom onyx.db.discord_bot import get_guild_config_by_registration_key\nfrom onyx.db.discord_bot import register_guild\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import UserRole\nfrom onyx.onyxbot.discord.cache import DiscordCacheManager\nfrom onyx.server.manage.discord_bot.utils import generate_discord_registration_key\nfrom onyx.server.manage.discord_bot.utils import parse_discord_registration_key\nfrom onyx.server.manage.discord_bot.utils import REGISTRATION_KEY_PREFIX\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass TestBotConfigIsolationCloudMode:\n    \"\"\"Tests for bot config isolation in cloud mode.\"\"\"\n\n    def test_cannot_create_bot_config_in_cloud_mode(self) -> None:\n        \"\"\"Bot config creation is blocked in cloud mode.\"\"\"\n        with patch(\"onyx.configs.app_configs.AUTH_TYPE\", AuthType.CLOUD):\n            from fastapi import HTTPException\n\n            from onyx.server.manage.discord_bot.api import _check_bot_config_api_access\n\n            with pytest.raises(HTTPException) as exc_info:\n                _check_bot_config_api_access()\n\n            assert exc_info.value.status_code == 403\n            assert \"Cloud\" in str(exc_info.value.detail)\n\n    def test_bot_token_from_env_only_in_cloud(self) -> None:\n        \"\"\"Bot token comes from env var in cloud mode, ignores DB.\"\"\"\n        from onyx.onyxbot.discord.utils import get_bot_token\n\n        with (\n            patch(\"onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN\", \"env_token\"),\n            patch(\"onyx.onyxbot.discord.utils.AUTH_TYPE\", AuthType.CLOUD),\n        ):\n            result = get_bot_token()\n\n        assert result == \"env_token\"\n\n\nclass TestGuildRegistrationIsolation:\n    \"\"\"Tests for guild registration isolation between tenants.\"\"\"\n\n    def test_guild_can_only_register_to_one_tenant(self) -> None:\n        \"\"\"Guild registered to tenant 1 cannot be registered to tenant 2.\"\"\"\n        cache = DiscordCacheManager()\n\n        # Register guild to tenant 1\n        cache._guild_tenants[123456789] = \"tenant1\"\n\n        # Check if guild is already registered\n        existing = cache.get_tenant(123456789)\n\n        assert existing is not None\n        assert existing == \"tenant1\"\n\n    def test_registration_key_tenant_mismatch(self) -> None:\n        \"\"\"Key created in tenant 1 cannot be used in tenant 2 context.\"\"\"\n        key = generate_discord_registration_key(\"tenant1\")\n\n        # Parse the key to get tenant\n        parsed_tenant = parse_discord_registration_key(key)\n\n        assert parsed_tenant == \"tenant1\"\n        assert parsed_tenant != \"tenant2\"\n\n    def test_registration_key_encodes_correct_tenant(self) -> None:\n        \"\"\"Key format discord_<tenant_id>.<token> encodes correct tenant.\"\"\"\n        tenant_id = \"my_tenant_123\"\n        key = generate_discord_registration_key(tenant_id)\n\n        assert key.startswith(REGISTRATION_KEY_PREFIX)\n        assert \"my_tenant_123\" in key or \"my%5Ftenant%5F123\" in key\n\n        parsed = parse_discord_registration_key(key)\n        assert parsed == tenant_id\n\n\nclass TestGuildDataIsolation:\n    \"\"\"Tests for guild data isolation between tenants via API.\"\"\"\n\n    def test_tenant_cannot_see_other_tenant_guilds(\n        self,\n        reset_multitenant: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Guilds created in tenant 1 are not visible from tenant 2.\n\n        Creates guilds via API in tenant 1, then queries from tenant 2\n        context to verify the guilds are not visible.\n        \"\"\"\n        unique = uuid4().hex\n\n        # Create admin user for tenant 1\n        admin_user1: DATestUser = UserManager.create(\n            email=f\"discord_admin1_{unique}@example.com\",\n        )\n        assert UserManager.is_role(admin_user1, UserRole.ADMIN)\n\n        # Create admin user for tenant 2\n        admin_user2: DATestUser = UserManager.create(\n            email=f\"discord_admin2_{unique}@example.com\",\n        )\n        assert UserManager.is_role(admin_user2, UserRole.ADMIN)\n\n        # Create a guild registration key in tenant 1\n        response1 = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n            headers=admin_user1.headers,\n        )\n\n        # If Discord bot feature is not enabled, skip the test\n        if response1.status_code == 404:\n            pytest.skip(\"Discord bot feature not enabled\")\n\n        assert response1.ok, f\"Failed to create guild in tenant 1: {response1.text}\"\n        guild1_data = response1.json()\n        guild1_id = guild1_data[\"id\"]\n\n        try:\n            # List guilds from tenant 1 - should see the guild\n            list_response1 = requests.get(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n                headers=admin_user1.headers,\n            )\n            assert list_response1.ok\n            tenant1_guilds = list_response1.json()\n            tenant1_guild_ids = [g[\"id\"] for g in tenant1_guilds]\n            assert guild1_id in tenant1_guild_ids\n\n            # List guilds from tenant 2 - should NOT see tenant 1's guild\n            list_response2 = requests.get(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n                headers=admin_user2.headers,\n            )\n            assert list_response2.ok\n            tenant2_guilds = list_response2.json()\n            tenant2_guild_ids = [g[\"id\"] for g in tenant2_guilds]\n            assert guild1_id not in tenant2_guild_ids\n\n        finally:\n            # Cleanup - delete guild from tenant 1\n            requests.delete(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}\",\n                headers=admin_user1.headers,\n            )\n\n    def test_guild_list_returns_only_own_tenant(\n        self,\n        reset_multitenant: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"List guilds returns exactly the guilds for that tenant.\n\n        Creates 1 guild in each tenant, registers them with different data,\n        and verifies each tenant only sees their own guild.\n        \"\"\"\n        unique = uuid4().hex\n\n        # Create admin users for two tenants\n        admin_user1: DATestUser = UserManager.create(\n            email=f\"discord_list1_{unique}@example.com\",\n        )\n        admin_user2: DATestUser = UserManager.create(\n            email=f\"discord_list2_{unique}@example.com\",\n        )\n\n        # Create 1 guild in tenant 1\n        response1 = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n            headers=admin_user1.headers,\n        )\n        if response1.status_code == 404:\n            pytest.skip(\"Discord bot feature not enabled\")\n        assert response1.ok, f\"Failed to create guild in tenant 1: {response1.text}\"\n        guild1_data = response1.json()\n        guild1_id = guild1_data[\"id\"]\n        registration_key1 = guild1_data[\"registration_key\"]\n        tenant1_id = parse_discord_registration_key(registration_key1)\n        assert (\n            tenant1_id is not None\n        ), \"Failed to parse tenant ID from registration key 1\"\n\n        # Create 1 guild in tenant 2\n        response2 = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n            headers=admin_user2.headers,\n        )\n        assert response2.ok, f\"Failed to create guild in tenant 2: {response2.text}\"\n        guild2_data = response2.json()\n        guild2_id = guild2_data[\"id\"]\n        registration_key2 = guild2_data[\"registration_key\"]\n        tenant2_id = parse_discord_registration_key(registration_key2)\n        assert (\n            tenant2_id is not None\n        ), \"Failed to parse tenant ID from registration key 2\"\n\n        # Verify tenant IDs are different\n        assert (\n            tenant1_id != tenant2_id\n        ), \"Tenant 1 and tenant 2 should have different tenant IDs\"\n\n        # Register guild 1 with tenant 1's context - populate with different data\n        with get_session_with_tenant(tenant_id=tenant1_id) as db_session:\n            config1 = get_guild_config_by_registration_key(\n                db_session, registration_key1\n            )\n            assert config1 is not None, \"Guild config 1 should exist\"\n            register_guild(\n                db_session=db_session,\n                config=config1,\n                guild_id=111111111111111111,  # Different Discord guild ID\n                guild_name=\"Tenant 1 Server\",  # Different guild name\n            )\n            db_session.commit()\n\n        # Register guild 2 with tenant 2's context - populate with different data\n        with get_session_with_tenant(tenant_id=tenant2_id) as db_session:\n            config2 = get_guild_config_by_registration_key(\n                db_session, registration_key2\n            )\n            assert config2 is not None, \"Guild config 2 should exist\"\n            register_guild(\n                db_session=db_session,\n                config=config2,\n                guild_id=222222222222222222,  # Different Discord guild ID\n                guild_name=\"Tenant 2 Server\",  # Different guild name\n            )\n            db_session.commit()\n\n        try:\n            # Verify tenant 1 sees only their guild\n            list_response1 = requests.get(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n                headers=admin_user1.headers,\n            )\n            assert list_response1.ok\n            tenant1_guilds = list_response1.json()\n\n            # Tenant 1 should see exactly 1 guild\n            assert (\n                len(tenant1_guilds) == 1\n            ), f\"Tenant 1 should see 1 guild, got {len(tenant1_guilds)}\"\n\n            # Verify tenant 1's guild has the correct data\n            tenant1_guild = tenant1_guilds[0]\n            assert (\n                tenant1_guild[\"id\"] == guild1_id\n            ), \"Tenant 1 should see their own guild\"\n            assert (\n                tenant1_guild[\"guild_id\"] == 111111111111111111\n            ), f\"Tenant 1's guild should have guild_id 111111111111111111, got {tenant1_guild['guild_id']}\"\n            assert (\n                tenant1_guild[\"guild_name\"] == \"Tenant 1 Server\"\n            ), f\"Tenant 1's guild should have name 'Tenant 1 Server', got {tenant1_guild['guild_name']}\"\n            assert (\n                tenant1_guild[\"registered_at\"] is not None\n            ), \"Tenant 1's guild should be registered\"\n\n            # Tenant 1 should NOT see tenant 2's guild\n            assert (\n                tenant1_guild[\"guild_id\"] != 222222222222222222\n            ), \"Tenant 1 should not see tenant 2's guild_id\"\n            assert (\n                tenant1_guild[\"guild_name\"] != \"Tenant 2 Server\"\n            ), \"Tenant 1 should not see tenant 2's guild_name\"\n\n            # Verify tenant 2 sees only their guild\n            list_response2 = requests.get(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n                headers=admin_user2.headers,\n            )\n            assert list_response2.ok\n            tenant2_guilds = list_response2.json()\n\n            # Tenant 2 should see exactly 1 guild\n            assert (\n                len(tenant2_guilds) == 1\n            ), f\"Tenant 2 should see 1 guild, got {len(tenant2_guilds)}\"\n\n            # Verify tenant 2's guild has the correct data\n            tenant2_guild = tenant2_guilds[0]\n            assert (\n                tenant2_guild[\"id\"] == guild2_id\n            ), \"Tenant 2 should see their own guild\"\n            assert (\n                tenant2_guild[\"guild_id\"] == 222222222222222222\n            ), f\"Tenant 2's guild should have guild_id 222222222222222222, got {tenant2_guild['guild_id']}\"\n            assert (\n                tenant2_guild[\"guild_name\"] == \"Tenant 2 Server\"\n            ), f\"Tenant 2's guild should have name 'Tenant 2 Server', got {tenant2_guild['guild_name']}\"\n            assert (\n                tenant2_guild[\"registered_at\"] is not None\n            ), \"Tenant 2's guild should be registered\"\n\n            # Tenant 2 should NOT see tenant 1's guild\n            assert (\n                tenant2_guild[\"guild_id\"] != 111111111111111111\n            ), \"Tenant 2 should not see tenant 1's guild_id\"\n            assert (\n                tenant2_guild[\"guild_name\"] != \"Tenant 1 Server\"\n            ), \"Tenant 2 should not see tenant 1's guild_name\"\n\n            # Verify the guilds are different (different data)\n            assert (\n                tenant1_guild[\"guild_id\"] != tenant2_guild[\"guild_id\"]\n            ), \"Guilds should have different Discord guild IDs\"\n            assert (\n                tenant1_guild[\"guild_name\"] != tenant2_guild[\"guild_name\"]\n            ), \"Guilds should have different names\"\n\n        finally:\n            # Cleanup\n            requests.delete(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}\",\n                headers=admin_user1.headers,\n            )\n            requests.delete(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild2_id}\",\n                headers=admin_user2.headers,\n            )\n\n\nclass TestGuildAccessIsolation:\n    \"\"\"Tests for guild access isolation between tenants.\"\"\"\n\n    def test_tenant_cannot_access_other_tenant_guild(\n        self,\n        reset_multitenant: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Tenant 2 cannot access or modify tenant 1's guild by ID.\n\n        Creates a guild in tenant 1, then attempts to access it from tenant 2.\n        \"\"\"\n        unique = uuid4().hex\n\n        # Create admin users for two tenants\n        admin_user1: DATestUser = UserManager.create(\n            email=f\"discord_access1_{unique}@example.com\",\n        )\n        admin_user2: DATestUser = UserManager.create(\n            email=f\"discord_access2_{unique}@example.com\",\n        )\n\n        # Create a guild in tenant 1\n        response = requests.post(\n            f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds\",\n            headers=admin_user1.headers,\n        )\n        if response.status_code == 404:\n            pytest.skip(\"Discord bot feature not enabled\")\n        assert response.ok\n        guild1_id = response.json()[\"id\"]\n\n        try:\n            # Tenant 2 tries to get the guild - should fail (404 or 403)\n            get_response = requests.get(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}\",\n                headers=admin_user2.headers,\n            )\n            # Should either return 404 (not found) or 403 (forbidden)\n            assert get_response.status_code in [\n                403,\n                404,\n            ], f\"Expected 403 or 404, got {get_response.status_code}\"\n\n            # Tenant 2 tries to delete the guild - should fail\n            delete_response = requests.delete(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}\",\n                headers=admin_user2.headers,\n            )\n            assert delete_response.status_code in [403, 404]\n\n        finally:\n            # Cleanup - delete from tenant 1\n            requests.delete(\n                f\"{API_SERVER_URL}/manage/admin/discord-bot/guilds/{guild1_id}\",\n                headers=admin_user1.headers,\n            )\n\n\nclass TestCacheManagerIsolation:\n    \"\"\"Tests for cache manager tenant isolation.\"\"\"\n\n    def test_cache_maps_guild_to_correct_tenant(self) -> None:\n        \"\"\"Cache correctly maps guild_id to tenant_id.\"\"\"\n        cache = DiscordCacheManager()\n\n        # Set up mappings\n        cache._guild_tenants[111] = \"tenant1\"\n        cache._guild_tenants[222] = \"tenant2\"\n        cache._guild_tenants[333] = \"tenant1\"\n\n        assert cache.get_tenant(111) == \"tenant1\"\n        assert cache.get_tenant(222) == \"tenant2\"\n        assert cache.get_tenant(333) == \"tenant1\"\n        assert cache.get_tenant(444) is None\n\n    def test_api_key_per_tenant_isolation(self) -> None:\n        \"\"\"Each tenant has unique API key.\"\"\"\n        cache = DiscordCacheManager()\n\n        cache._api_keys[\"tenant1\"] = \"key_for_tenant1\"\n        cache._api_keys[\"tenant2\"] = \"key_for_tenant2\"\n\n        assert cache.get_api_key(\"tenant1\") == \"key_for_tenant1\"\n        assert cache.get_api_key(\"tenant2\") == \"key_for_tenant2\"\n        assert cache.get_api_key(\"tenant1\") != cache.get_api_key(\"tenant2\")\n\n\nclass TestAPIRequestIsolation:\n    \"\"\"Tests for API request isolation between tenants.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_discord_bot_uses_tenant_specific_api_key(self) -> None:\n        \"\"\"Message from guild in tenant 1 uses tenant 1's API key.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants[123456] = \"tenant1\"\n        cache._api_keys[\"tenant1\"] = \"tenant1_api_key\"\n        cache._api_keys[\"tenant2\"] = \"tenant2_api_key\"\n\n        # When processing message from guild 123456\n        tenant = cache.get_tenant(123456)\n        assert tenant is not None\n        api_key = cache.get_api_key(tenant)\n\n        assert tenant == \"tenant1\"\n        assert api_key == \"tenant1_api_key\"\n        assert api_key != \"tenant2_api_key\"\n\n    @pytest.mark.asyncio\n    async def test_guild_message_routes_to_correct_tenant(self) -> None:\n        \"\"\"Message from registered guild routes to correct tenant context.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants[999] = \"target_tenant\"\n        cache._api_keys[\"target_tenant\"] = \"target_key\"\n\n        # Simulate message routing\n        guild_id = 999\n        tenant = cache.get_tenant(guild_id)\n        api_key = cache.get_api_key(tenant) if tenant else None\n\n        assert tenant == \"target_tenant\"\n        assert api_key == \"target_key\"\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/invitation/test_user_invitation.py",
    "content": "from uuid import uuid4\n\nfrom onyx.db.models import UserRole\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\nINVITED_BASIC_USER = \"basic_user\"\nINVITED_BASIC_USER_EMAIL = \"basic_user@example.com\"\n\n\ndef test_admin_can_invite_users(reset_multitenant: None) -> None:  # noqa: ARG001\n    \"\"\"Test that an admin can invite both registered and non-registered users.\"\"\"\n    # Create first user (admin)\n    unique = uuid4().hex\n    admin_user: DATestUser = UserManager.create(name=f\"admin_{unique}\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Create second user\n    invited_user: DATestUser = UserManager.create(name=f\"admin_invited_{unique}\")\n    assert UserManager.is_role(invited_user, UserRole.ADMIN)\n\n    # Admin user invites the previously registered and non-registered user\n    UserManager.invite_user(invited_user.email, admin_user)\n    UserManager.invite_user(f\"{INVITED_BASIC_USER}_{unique}@example.com\", admin_user)\n\n    # Verify users are in the invited users list\n    invited_users = UserManager.get_invited_users(admin_user)\n    assert invited_user.email in [\n        user.email for user in invited_users\n    ], f\"User {invited_user.email} not found in invited users list\"\n\n\ndef test_non_registered_user_gets_basic_role(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that a non-registered user gets a BASIC role when they register after being invited.\"\"\"\n    # Create admin user\n    unique = uuid4().hex\n    admin_user: DATestUser = UserManager.create(name=f\"admin_{unique}\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Admin user invites a non-registered user\n    invited_email = f\"{INVITED_BASIC_USER}_{unique}@example.com\"\n    UserManager.invite_user(invited_email, admin_user)\n\n    # Non-registered user registers\n    invited_basic_user: DATestUser = UserManager.create(\n        name=f\"{INVITED_BASIC_USER}_{unique}\", email=invited_email\n    )\n    assert UserManager.is_role(invited_basic_user, UserRole.BASIC)\n\n\ndef test_user_can_accept_invitation(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Test that a user can accept an invitation and join the organization with BASIC role.\"\"\"\n    # Create admin user\n    unique = uuid4().hex\n    admin_user: DATestUser = UserManager.create(name=f\"admin_{unique}\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Create a user to be invited\n    invited_user_email = f\"invited_user_{unique}@example.com\"\n\n    # User registers with the same email as the invitation\n    invited_user: DATestUser = UserManager.create(\n        name=f\"invited_user_{unique}\", email=invited_user_email\n    )\n    # Admin user invites the user\n    UserManager.invite_user(invited_user_email, admin_user)\n\n    # Get user info to check tenant information\n    user_info = UserManager.get_user_info(invited_user)\n\n    # Extract the tenant_id from the invitation\n    invited_tenant_id = (\n        user_info.tenant_info.invitation.tenant_id\n        if user_info.tenant_info and user_info.tenant_info.invitation\n        else None\n    )\n    assert invited_tenant_id is not None, \"Expected to find an invitation tenant_id\"\n\n    # User accepts invitation\n    UserManager.accept_invitation(invited_tenant_id, invited_user)\n\n    # User needs to reauthenticate after accepting invitation\n    # Simulate this by creating a new user instance with the same credentials\n    authenticated_user: DATestUser = UserManager.create(\n        name=\"invited_user\", email=invited_user_email\n    )\n\n    # Get updated user info after accepting invitation and reauthenticating\n    updated_user_info = UserManager.get_user_info(authenticated_user)\n\n    # Verify the user has BASIC role in the organization\n    assert (\n        updated_user_info.role == UserRole.BASIC\n    ), f\"Expected user to have BASIC role, but got {updated_user_info.role}\"\n\n    # Verify user is in the organization\n    user_page = UserManager.get_user_page(\n        user_performing_action=admin_user, role_filter=[UserRole.BASIC]\n    )\n\n    # Check if the invited user is in the list of users with BASIC role\n    invited_user_emails = [user.email for user in user_page.items]\n    assert invited_user_email in invited_user_emails, (\n        f\"User {invited_user_email} not found in the list of basic users \"\n        f\"in the organization. Available users: {invited_user_emails}\"\n    )\n\n    invited_users = UserManager.get_invited_users(admin_user)\n    assert invited_user.email not in [\n        user.email for user in invited_users\n    ], f\"User {invited_user.email} should not be found in invited users list after accepting invitation\"\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/migrations/test_run_multitenant_migrations.py",
    "content": "\"\"\"\nBlack-box integration tests for the parallel alembic migration runner\n(backend/alembic/run_multitenant_migrations.py).\n\nThe script is invoked as a subprocess — the same way it would be used in\nproduction.  Tests verify exit codes and stdout messages.\n\nUsage:\n    pytest tests/integration/tests/migrations/test_run_multitenant_migrations.py -v\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport subprocess\nimport sys\nimport time\nimport uuid\nfrom collections.abc import Generator\n\nimport pytest\nfrom sqlalchemy import text\nfrom sqlalchemy.engine import Engine\n\nfrom onyx.db.engine.sql_engine import SqlEngine\n\n# Resolve the backend/ directory once so every helper can use it as cwd.\n_BACKEND_DIR = os.path.normpath(\n    os.path.join(os.path.dirname(__file__), \"..\", \"..\", \"..\", \"..\")\n)\n\n_DROP_SCHEMA_MAX_RETRIES = 3\n_DROP_SCHEMA_RETRY_DELAY_SEC = 2\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _run_script(\n    *extra_args: str,\n    env_override: dict[str, str] | None = None,\n) -> subprocess.CompletedProcess[str]:\n    \"\"\"Run ``python alembic/run_multitenant_migrations.py`` from the backend/ directory.\"\"\"\n    env = {**os.environ, **(env_override or {})}\n    return subprocess.run(\n        [sys.executable, \"alembic/run_multitenant_migrations.py\", *extra_args],\n        cwd=_BACKEND_DIR,\n        stdout=subprocess.PIPE,\n        stderr=subprocess.STDOUT,\n        text=True,\n        env=env,\n    )\n\n\ndef _force_drop_schema(engine: Engine, schema: str) -> None:\n    \"\"\"Terminate backends using *schema* then drop it, retrying on deadlock.\n\n    Background Celery workers may discover test schemas (they match the\n    ``tenant_`` prefix) and hold locks on tables inside them.  A bare\n    ``DROP SCHEMA … CASCADE`` can deadlock with those workers, so we\n    first kill their connections and retry if we still hit a deadlock.\n    \"\"\"\n    for attempt in range(_DROP_SCHEMA_MAX_RETRIES):\n        try:\n            with engine.connect() as conn:\n                conn.execute(\n                    text(\n                        \"\"\"\n                        SELECT pg_terminate_backend(l.pid)\n                        FROM pg_locks l\n                        JOIN pg_class c ON c.oid = l.relation\n                        JOIN pg_namespace n ON n.oid = c.relnamespace\n                        WHERE n.nspname = :schema\n                          AND l.pid != pg_backend_pid()\n                        \"\"\"\n                    ),\n                    {\"schema\": schema},\n                )\n                conn.execute(text(f'DROP SCHEMA IF EXISTS \"{schema}\" CASCADE'))\n                conn.commit()\n            return\n        except Exception:\n            if attempt == _DROP_SCHEMA_MAX_RETRIES - 1:\n                raise\n            time.sleep(_DROP_SCHEMA_RETRY_DELAY_SEC)\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture\ndef engine() -> Engine:\n    return SqlEngine.get_engine()\n\n\n@pytest.fixture\ndef current_head_rev() -> str:\n    \"\"\"Get the head revision from the alembic script directory.\n\n    Runs ``alembic heads`` as a subprocess — the same source of truth that\n    ``run_multitenant_migrations.py`` uses internally.\n    \"\"\"\n    result = subprocess.run(\n        [\"alembic\", \"heads\", \"--resolve-dependencies\"],\n        cwd=_BACKEND_DIR,\n        stdout=subprocess.PIPE,\n        stderr=subprocess.STDOUT,\n        text=True,\n    )\n    assert (\n        result.returncode == 0\n    ), f\"alembic heads failed (exit {result.returncode}):\\n{result.stdout}\"\n    # Output looks like \"d5c86e2c6dc6 (head)\\n\"\n    rev = result.stdout.strip().split()[0]\n    assert len(rev) > 0\n    return rev\n\n\n@pytest.fixture\ndef tenant_schema_at_head(\n    engine: Engine, current_head_rev: str\n) -> Generator[str, None, None]:\n    \"\"\"Create a temporary tenant schema whose alembic_version is at head.\"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.execute(\n            text(\n                f'CREATE TABLE \"{schema}\".alembic_version (version_num VARCHAR(32) NOT NULL)'\n            )\n        )\n        conn.execute(\n            text(f'INSERT INTO \"{schema}\".alembic_version (version_num) VALUES (:rev)'),\n            {\"rev\": current_head_rev},\n        )\n        conn.commit()\n\n    yield schema\n\n    _force_drop_schema(engine, schema)\n\n\n@pytest.fixture\ndef tenant_schema_empty(engine: Engine) -> Generator[str, None, None]:\n    \"\"\"Create a temporary tenant schema with no tables at all.\n\n    Alembic will treat it as a fresh schema and run every migration from base\n    to head.\n    \"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.commit()\n\n    yield schema\n\n    _force_drop_schema(engine, schema)\n\n\n@pytest.fixture\ndef tenant_schema_bad_rev(engine: Engine) -> Generator[str, None, None]:\n    \"\"\"Create a tenant schema whose alembic_version points to a non-existent\n    revision.  Alembic cannot find a migration path from this revision, so\n    it will fail.\"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.execute(\n            text(\n                f'CREATE TABLE \"{schema}\".alembic_version (version_num VARCHAR(32) NOT NULL)'\n            )\n        )\n        conn.execute(\n            text(\n                f\"INSERT INTO \\\"{schema}\\\".alembic_version (version_num) VALUES ('00000bad0000')\"\n            )\n        )\n        conn.commit()\n\n    yield schema\n\n    _force_drop_schema(engine, schema)\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\ndef test_no_tenant_schemas_exits_nonzero() -> None:\n    \"\"\"In non-multi-tenant mode there are no tenant_ schemas, so the script\n    should print a hint and exit 1.\"\"\"\n    result = _run_script(env_override={\"MULTI_TENANT\": \"false\"})\n    assert result.returncode == 1\n    assert \"No tenant schemas found\" in result.stdout\n    assert \"MULTI_TENANT\" in result.stdout\n\n\ndef test_at_head_schema_is_skipped(tenant_schema_at_head: str) -> None:\n    \"\"\"A tenant schema already at head should not be targeted for migration.\"\"\"\n    result = _run_script(\n        \"--jobs\",\n        \"1\",\n        \"--batch-size\",\n        \"50\",\n        env_override={\"MULTI_TENANT\": \"true\"},\n    )\n    assert result.returncode == 0\n    # Our at-head schema should not appear in any batch \"started\" lines.\n    batch_start_lines = [\n        line\n        for line in result.stdout.splitlines()\n        if \"Batch\" in line and \"started\" in line\n    ]\n    for line in batch_start_lines:\n        assert tenant_schema_at_head not in line\n\n\ndef test_detects_schemas_needing_migration(\n    tenant_schema_at_head: str,\n    tenant_schema_empty: str,\n) -> None:\n    \"\"\"When some schemas are behind, the script should report how many need\n    migration, upgrade them, and succeed.\"\"\"\n    result = _run_script(\n        \"--jobs\",\n        \"1\",\n        \"--batch-size\",\n        \"50\",\n        env_override={\"MULTI_TENANT\": \"true\"},\n    )\n    assert result.returncode == 0, f\"Script failed:\\n{result.stdout}\"\n    assert \"tenants need migration\" in result.stdout\n    assert \"All migrations successful\" in result.stdout\n\n    # The empty schema should appear in the batch that was started.\n    assert tenant_schema_empty in result.stdout\n\n    # The at-head schema should NOT appear in any batch \"started\" lines\n    # (it was filtered out by get_schemas_needing_migration).\n    batch_start_lines = [\n        line\n        for line in result.stdout.splitlines()\n        if \"Batch\" in line and \"started\" in line\n    ]\n    for line in batch_start_lines:\n        assert tenant_schema_at_head not in line\n\n\ndef test_failed_migration(\n    tenant_schema_at_head: str,\n    tenant_schema_empty: str,\n    tenant_schema_bad_rev: str,\n) -> None:\n    \"\"\"A schema with a bogus alembic revision causes alembic to fail.\n\n    The script should:\n    - Exit non-zero (some migrations failed).\n    - Still skip the at-head schema.\n    - Still attempt the other schemas via the ``continue=true`` retry.\n    \"\"\"\n    result = _run_script(\n        \"--jobs\",\n        \"1\",\n        \"--batch-size\",\n        \"50\",\n        env_override={\"MULTI_TENANT\": \"true\"},\n    )\n    assert result.returncode == 1, f\"Expected failure but got:\\n{result.stdout}\"\n    assert \"Some migrations failed\" in result.stdout\n\n    # The bad-rev schema should appear in the batch (it needs migration).\n    assert tenant_schema_bad_rev in result.stdout\n\n    # The empty schema should also appear (it was attempted via continue=true retry).\n    assert tenant_schema_empty in result.stdout\n\n    # The at-head schema should still be skipped.\n    batch_start_lines = [\n        line\n        for line in result.stdout.splitlines()\n        if \"Batch\" in line and \"started\" in line\n    ]\n    for line in batch_start_lines:\n        assert tenant_schema_at_head not in line\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py",
    "content": "from typing import Any\nfrom uuid import uuid4\n\nfrom onyx.db.models import UserRole\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestChatSession\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ToolName\n\n\ndef setup_test_tenants(reset_multitenant: None) -> dict[str, Any]:  # noqa: ARG001\n    \"\"\"Helper function to set up test tenants with documents and users.\"\"\"\n    unique = uuid4().hex\n    # Creating an admin user for Tenant 1\n    admin_user1: DATestUser = UserManager.create(\n        email=f\"admin_{unique}@example.com\",\n    )\n    assert UserManager.is_role(admin_user1, UserRole.ADMIN)\n\n    # Create Tenant 2 and its Admin User\n    admin_user2: DATestUser = UserManager.create(\n        email=f\"admin2_{unique}@example.com\",\n    )\n    assert UserManager.is_role(admin_user2, UserRole.ADMIN)\n\n    # Create connectors for Tenant 1\n    cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(\n        user_performing_action=admin_user1,\n    )\n    api_key_1: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user1,\n    )\n    api_key_1.headers.update(admin_user1.headers)\n    LLMProviderManager.create(user_performing_action=admin_user1)\n\n    # Create connectors for Tenant 2\n    cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch(\n        user_performing_action=admin_user2,\n    )\n    api_key_2: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user2,\n    )\n    api_key_2.headers.update(admin_user2.headers)\n    LLMProviderManager.create(user_performing_action=admin_user2)\n\n    # Seed documents for Tenant 1\n    cc_pair_1.documents = []\n    doc1_tenant1 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair_1,\n        content=\"Tenant 1 Document Content\",\n        api_key=api_key_1,\n    )\n    doc2_tenant1 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair_1,\n        content=\"Tenant 1 Document Content\",\n        api_key=api_key_1,\n    )\n    cc_pair_1.documents.extend([doc1_tenant1, doc2_tenant1])\n\n    # Seed documents for Tenant 2\n    cc_pair_2.documents = []\n    doc1_tenant2 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair_2,\n        content=\"Tenant 2 Document Content\",\n        api_key=api_key_2,\n    )\n    doc2_tenant2 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair_2,\n        content=\"Tenant 2 Document Content\",\n        api_key=api_key_2,\n    )\n    cc_pair_2.documents.extend([doc1_tenant2, doc2_tenant2])\n\n    tenant1_doc_ids = {doc1_tenant1.id, doc2_tenant1.id}\n    tenant2_doc_ids = {doc1_tenant2.id, doc2_tenant2.id}\n\n    # Create chat sessions for each user\n    chat_session1: DATestChatSession = ChatSessionManager.create(\n        user_performing_action=admin_user1\n    )\n    chat_session2: DATestChatSession = ChatSessionManager.create(\n        user_performing_action=admin_user2\n    )\n\n    return {\n        \"admin_user1\": admin_user1,\n        \"admin_user2\": admin_user2,\n        \"chat_session1\": chat_session1,\n        \"chat_session2\": chat_session2,\n        \"tenant1_doc_ids\": tenant1_doc_ids,\n        \"tenant2_doc_ids\": tenant2_doc_ids,\n    }\n\n\ndef test_tenant1_can_access_own_documents(reset_multitenant: None) -> None:\n    \"\"\"Test that Tenant 1 can access its own documents but not Tenant 2's.\"\"\"\n    test_data = setup_test_tenants(reset_multitenant)\n\n    # User 1 sends a message and gets a response\n    response1 = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session1\"].id,\n        message=\"What is in Tenant 1's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user1\"],\n    )\n\n    assert response1.error is None, \"Chat response should not have an error\"\n\n    # Assert that only the internal search tool was used\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response1.used_tools\n    )\n\n    response_doc_ids = {doc.document_id for doc in response1.used_tools[0].documents}\n    assert test_data[\"tenant1_doc_ids\"].issubset(\n        response_doc_ids\n    ), \"Not all Tenant 1 document IDs are in the response\"\n    assert not response_doc_ids.intersection(\n        test_data[\"tenant2_doc_ids\"]\n    ), \"Tenant 2 document IDs should not be in the response\"\n\n    # Assert that the contents are correct\n    assert any(\n        doc.blurb == \"Tenant 1 Document Content\"\n        for doc in response1.used_tools[0].documents\n    ), \"Tenant 1 Document Content not found in any document\"\n\n\ndef test_tenant2_can_access_own_documents(reset_multitenant: None) -> None:\n    \"\"\"Test that Tenant 2 can access its own documents but not Tenant 1's.\"\"\"\n    test_data = setup_test_tenants(reset_multitenant)\n\n    # User 2 sends a message and gets a response\n    response2 = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session2\"].id,\n        message=\"What is in Tenant 2's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user2\"],\n    )\n\n    assert response2.error is None, \"Chat response should not have an error\"\n\n    # Assert that the search tool was used\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response2.used_tools\n    )\n\n    # Assert that the tool_result contains Tenant 2's documents\n    response_doc_ids = {doc.document_id for doc in response2.used_tools[0].documents}\n    assert test_data[\"tenant2_doc_ids\"].issubset(\n        response_doc_ids\n    ), \"Not all Tenant 2 document IDs are in the response\"\n    assert not response_doc_ids.intersection(\n        test_data[\"tenant1_doc_ids\"]\n    ), \"Tenant 1 document IDs should not be in the response\"\n\n    # Assert that the contents are correct\n    assert any(\n        doc.blurb == \"Tenant 2 Document Content\"\n        for doc in response2.used_tools[0].documents\n    ), \"Tenant 2 Document Content not found in any document\"\n\n\ndef test_tenant1_cannot_access_tenant2_documents(reset_multitenant: None) -> None:\n    \"\"\"Test that Tenant 1 cannot access Tenant 2's documents.\"\"\"\n    test_data = setup_test_tenants(reset_multitenant)\n\n    # User 1 tries to access Tenant 2's documents\n    response_cross = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session1\"].id,\n        message=\"What is in Tenant 2's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user1\"],\n    )\n\n    assert response_cross.error is None, \"Chat response should not have an error\"\n\n    # Assert that the search tool was used\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response_cross.used_tools\n    )\n\n    # Assert that the tool_result is empty or does not contain Tenant 2's documents\n    response_doc_ids = {\n        doc.document_id for doc in response_cross.used_tools[0].documents\n    }\n\n    # Ensure none of Tenant 2's document IDs are in the response\n    assert not response_doc_ids.intersection(test_data[\"tenant2_doc_ids\"])\n\n\ndef test_tenant2_cannot_access_tenant1_documents(reset_multitenant: None) -> None:\n    \"\"\"Test that Tenant 2 cannot access Tenant 1's documents.\"\"\"\n    test_data = setup_test_tenants(reset_multitenant)\n\n    # User 2 tries to access Tenant 1's documents\n    response_cross2 = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session2\"].id,\n        message=\"What is in Tenant 1's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user2\"],\n    )\n\n    assert response_cross2.error is None, \"Chat response should not have an error\"\n\n    # Assert that the search tool was used\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH\n        for tool in response_cross2.used_tools\n    )\n\n    # Assert that the tool_result is empty or does not contain Tenant 1's documents\n    response_doc_ids = {\n        doc.document_id for doc in response_cross2.used_tools[0].documents\n    }\n\n    # Ensure none of Tenant 1's document IDs are in the response\n    assert not response_doc_ids.intersection(test_data[\"tenant1_doc_ids\"])\n\n\ndef test_multi_tenant_access_control(reset_multitenant: None) -> None:\n    \"\"\"Legacy test for multi-tenant access control.\"\"\"\n    test_data = setup_test_tenants(reset_multitenant)\n\n    # User 1 sends a message and gets a response with only Tenant 1's documents\n    response1 = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session1\"].id,\n        message=\"What is in Tenant 1's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user1\"],\n    )\n    assert response1.error is None, \"Chat response should not have an error\"\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response1.used_tools\n    )\n    response_doc_ids = {doc.document_id for doc in response1.used_tools[0].documents}\n    assert test_data[\"tenant1_doc_ids\"].issubset(response_doc_ids)\n    assert not response_doc_ids.intersection(test_data[\"tenant2_doc_ids\"])\n\n    # User 2 sends a message and gets a response with only Tenant 2's documents\n    response2 = ChatSessionManager.send_message(\n        chat_session_id=test_data[\"chat_session2\"].id,\n        message=\"What is in Tenant 2's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user2\"],\n    )\n    assert response2.error is None, \"Chat response should not have an error\"\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response2.used_tools\n    )\n    response_doc_ids = {doc.document_id for doc in response2.used_tools[0].documents}\n    assert test_data[\"tenant2_doc_ids\"].issubset(response_doc_ids)\n    assert not response_doc_ids.intersection(test_data[\"tenant1_doc_ids\"])\n\n    # User 1 tries to access Tenant 2's documents and fails\n    user1_second_chat_session = ChatSessionManager.create(\n        user_performing_action=test_data[\"admin_user1\"]\n    )\n    response_cross = ChatSessionManager.send_message(\n        chat_session_id=user1_second_chat_session.id,\n        message=\"What is in Tenant 2's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user1\"],\n    )\n    assert response_cross.error is None, \"Chat response should not have an error\"\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response_cross.used_tools\n    )\n    response_doc_ids = {\n        doc.document_id for doc in response_cross.used_tools[0].documents\n    }\n    assert not response_doc_ids.intersection(test_data[\"tenant2_doc_ids\"])\n\n    # User 2 tries to access Tenant 1's documents and fails\n    user2_second_chat_session = ChatSessionManager.create(\n        user_performing_action=test_data[\"admin_user2\"]\n    )\n    response_cross2 = ChatSessionManager.send_message(\n        chat_session_id=user2_second_chat_session.id,\n        message=\"What is in Tenant 1's documents? Run an internal search.\",\n        user_performing_action=test_data[\"admin_user2\"],\n    )\n    assert response_cross2.error is None, \"Chat response should not have an error\"\n    assert all(\n        tool.tool_name == ToolName.INTERNAL_SEARCH\n        for tool in response_cross2.used_tools\n    )\n    response_doc_ids = {\n        doc.document_id for doc in response_cross2.used_tools[0].documents\n    }\n    assert not response_doc_ids.intersection(test_data[\"tenant1_doc_ids\"])\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py",
    "content": "from http import HTTPStatus\nfrom uuid import uuid4\n\nimport requests\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import UserRole\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.image_generation import (\n    ImageGenerationConfigManager,\n)\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_first_user_is_admin(reset_multitenant: None) -> None:  # noqa: ARG001\n    \"\"\"Test that the first user of a tenant is automatically assigned ADMIN role.\"\"\"\n    unique = uuid4().hex\n    test_user: DATestUser = UserManager.create(\n        name=f\"test_{unique}\", email=f\"test_{unique}@example.com\"\n    )\n    assert UserManager.is_role(test_user, UserRole.ADMIN)\n\n\ndef test_admin_can_create_credential(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Test that an admin user can create a credential in their tenant.\"\"\"\n    # Create admin user\n    unique = uuid4().hex\n    test_user: DATestUser = UserManager.create(\n        name=f\"test_{unique}\", email=f\"test_{unique}@example.com\"\n    )\n    assert UserManager.is_role(test_user, UserRole.ADMIN)\n\n    # Create credential\n    test_credential = CredentialManager.create(\n        name=\"admin_test_credential\",\n        source=DocumentSource.FILE,\n        curator_public=False,\n        user_performing_action=test_user,\n    )\n    assert test_credential is not None\n\n\ndef test_admin_can_create_connector(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Test that an admin user can create a connector in their tenant.\"\"\"\n    # Create admin user\n    unique = uuid4().hex\n    test_user: DATestUser = UserManager.create(\n        name=f\"test_{unique}\", email=f\"test_{unique}@example.com\"\n    )\n    assert UserManager.is_role(test_user, UserRole.ADMIN)\n\n    # Create connector\n    test_connector = ConnectorManager.create(\n        name=\"admin_test_connector\",\n        source=DocumentSource.FILE,\n        access_type=AccessType.PRIVATE,\n        user_performing_action=test_user,\n    )\n    assert test_connector is not None\n\n\ndef test_admin_can_create_and_verify_cc_pair(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that an admin user can create and verify a connector-credential pair in their tenant.\"\"\"\n    # Create admin user\n    unique = uuid4().hex\n    test_user: DATestUser = UserManager.create(\n        name=f\"test_{unique}\", email=f\"test_{unique}@example.com\"\n    )\n    assert UserManager.is_role(test_user, UserRole.ADMIN)\n\n    # Create credential\n    test_credential = CredentialManager.create(\n        name=\"admin_test_credential\",\n        source=DocumentSource.FILE,\n        curator_public=False,\n        user_performing_action=test_user,\n    )\n\n    # Create connector\n    test_connector = ConnectorManager.create(\n        name=\"admin_test_connector\",\n        source=DocumentSource.FILE,\n        access_type=AccessType.PRIVATE,\n        user_performing_action=test_user,\n    )\n\n    # Create cc_pair\n    test_cc_pair = CCPairManager.create(\n        connector_id=test_connector.id,\n        credential_id=test_credential.id,\n        name=\"admin_test_cc_pair\",\n        access_type=AccessType.PRIVATE,\n        user_performing_action=test_user,\n    )\n    assert test_cc_pair is not None\n\n    # Verify cc_pair\n    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=test_user)\n\n\ndef test_settings_access() -> None:\n    \"\"\"Calls to the enterprise settings endpoint without authentication should fail with\n    403 (and not 500, which will lock the web UI into a \"maintenance mode\" page)\"\"\"\n\n    response = requests.get(url=f\"{API_SERVER_URL}/enterprise-settings\")\n    assert response.status_code == HTTPStatus.FORBIDDEN\n\n\ndef test_image_gen_config_created_on_tenant_provision(\n    reset_multitenant: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that image generation config is automatically created when a tenant is provisioned.\"\"\"\n    unique = uuid4().hex\n    test_user: DATestUser = UserManager.create(\n        name=f\"test_{unique}\", email=f\"test_{unique}@example.com\"\n    )\n    assert UserManager.is_role(test_user, UserRole.ADMIN)\n\n    # Check if image gen config was created during tenant provisioning\n    all_configs = ImageGenerationConfigManager.get_all(user_performing_action=test_user)\n\n    # Should have at least one config created during provisioning\n    assert (\n        len(all_configs) > 0\n    ), \"Image generation config should be created during tenant provisioning\"\n\n    # Verify a default config exists\n    default_configs = [c for c in all_configs if c.is_default]\n    assert (\n        len(default_configs) == 1\n    ), \"Exactly one default image generation config should exist\"\n\n    # Verify expected properties\n    default_config = default_configs[0]\n    assert default_config.image_provider_id == \"openai_gpt_image_1\"\n    assert default_config.model_name == \"gpt-image-1\"\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/tenants/test_tenant_provisioning_rollback.py",
    "content": "\"\"\"\nIntegration tests for tenant provisioning rollback behavior.\n\nTests the fix for the drop_schema bug where:\n1. isidentifier() rejected valid UUID tenant IDs (with hyphens)\n2. SQL syntax was broken (%(schema_name)s instead of proper identifier handling)\n\nThis test verifies the full flow: provisioning failure → rollback → schema cleanup.\n\"\"\"\n\nimport uuid\nfrom unittest.mock import patch\n\nfrom sqlalchemy import text\n\nfrom ee.onyx.server.tenants.schema_management import create_schema_if_not_exists\nfrom ee.onyx.server.tenants.schema_management import drop_schema\nfrom onyx.db.engine.sql_engine import get_session_with_shared_schema\nfrom shared_configs.configs import TENANT_ID_PREFIX\n\n\ndef _schema_exists(schema_name: str) -> bool:\n    \"\"\"Check if a schema exists in the database.\"\"\"\n    with get_session_with_shared_schema() as session:\n        result = session.execute(\n            text(\n                \"SELECT 1 FROM information_schema.schemata WHERE schema_name = :schema\"\n            ),\n            {\"schema\": schema_name},\n        ).fetchone()\n        return result is not None\n\n\nclass TestTenantProvisioningRollback:\n    \"\"\"Integration tests for provisioning failure and rollback.\"\"\"\n\n    def test_failed_provisioning_cleans_up_schema(self) -> None:\n        \"\"\"\n        When setup_tenant fails after schema creation, rollback should\n        clean up the orphaned schema.\n\n        This is the actual bug scenario: pre_provision_tenant creates a schema,\n        setup_tenant fails, rollback is called, but drop_schema was broken\n        (isidentifier rejected UUIDs with hyphens), leaving orphaned schemas.\n        \"\"\"\n        from ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (\n            pre_provision_tenant,\n        )\n\n        # Track which tenant_id gets created\n        created_tenant_id = None\n\n        def track_schema_creation(tenant_id: str) -> bool:\n            nonlocal created_tenant_id\n            created_tenant_id = tenant_id\n            return create_schema_if_not_exists(tenant_id)\n\n        # Mock setup_tenant to fail after schema creation\n        with patch(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.setup_tenant\"\n        ) as mock_setup:\n            mock_setup.side_effect = Exception(\"Simulated provisioning failure\")\n\n            with patch(\n                \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.create_schema_if_not_exists\",\n                side_effect=track_schema_creation,\n            ):\n                # Run pre-provisioning - it should fail and trigger rollback\n                pre_provision_tenant()\n\n        # Verify that the schema was created and then cleaned up\n        assert created_tenant_id is not None, \"Schema should have been created\"\n        assert created_tenant_id.startswith(\n            TENANT_ID_PREFIX\n        ), f\"Should have tenant prefix: {created_tenant_id}\"\n        assert not _schema_exists(\n            created_tenant_id\n        ), f\"Schema {created_tenant_id} should have been rolled back\"\n\n    def test_drop_schema_works_with_uuid_tenant_id(self) -> None:\n        \"\"\"\n        drop_schema should work with UUID-format tenant IDs.\n\n        This directly tests the fix: UUID tenant IDs contain hyphens,\n        which isidentifier() rejected. The new regex validation accepts them.\n        \"\"\"\n        tenant_id = f\"{TENANT_ID_PREFIX}{uuid.uuid4()}\"\n\n        # Create schema\n        create_schema_if_not_exists(tenant_id)\n        assert _schema_exists(tenant_id), \"Schema should exist after creation\"\n\n        # Drop schema\n        drop_schema(tenant_id)\n        assert not _schema_exists(tenant_id), \"Schema should be dropped\"\n"
  },
  {
    "path": "backend/tests/integration/multitenant_tests/test_get_schemas_needing_migration.py",
    "content": "\"\"\"\nIntegration tests for onyx.db.engine.tenant_utils.get_schemas_needing_migration.\n\nThese tests require a live database and exercise the function directly,\nindependent of the alembic migration runner script.\n\nUsage:\n    pytest tests/integration/multitenant_tests/test_get_schemas_needing_migration.py -v\n\"\"\"\n\nfrom __future__ import annotations\n\nimport subprocess\nimport uuid\nfrom collections.abc import Generator\n\nimport pytest\nfrom sqlalchemy import text\nfrom sqlalchemy.engine import Engine\n\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.db.engine.tenant_utils import get_schemas_needing_migration\n\n_BACKEND_DIR = __file__[: __file__.index(\"/tests/\")]\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture\ndef engine() -> Engine:\n    return SqlEngine.get_engine()\n\n\n@pytest.fixture\ndef current_head_rev() -> str:\n    result = subprocess.run(\n        [\"alembic\", \"heads\", \"--resolve-dependencies\"],\n        cwd=_BACKEND_DIR,\n        stdout=subprocess.PIPE,\n        stderr=subprocess.STDOUT,\n        text=True,\n    )\n    assert (\n        result.returncode == 0\n    ), f\"alembic heads failed (exit {result.returncode}):\\n{result.stdout}\"\n    rev = result.stdout.strip().split()[0]\n    assert len(rev) > 0\n    return rev\n\n\n@pytest.fixture\ndef tenant_schema_at_head(\n    engine: Engine, current_head_rev: str\n) -> Generator[str, None, None]:\n    \"\"\"Tenant schema with alembic_version already at head — should be excluded.\"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.execute(\n            text(\n                f'CREATE TABLE \"{schema}\".alembic_version (version_num VARCHAR(32) NOT NULL)'\n            )\n        )\n        conn.execute(\n            text(f'INSERT INTO \"{schema}\".alembic_version (version_num) VALUES (:rev)'),\n            {\"rev\": current_head_rev},\n        )\n        conn.commit()\n\n    yield schema\n\n    with engine.connect() as conn:\n        conn.execute(text(f'DROP SCHEMA IF EXISTS \"{schema}\" CASCADE'))\n        conn.commit()\n\n\n@pytest.fixture\ndef tenant_schema_empty(engine: Engine) -> Generator[str, None, None]:\n    \"\"\"Tenant schema with no tables — should be included (needs migration).\"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.commit()\n\n    yield schema\n\n    with engine.connect() as conn:\n        conn.execute(text(f'DROP SCHEMA IF EXISTS \"{schema}\" CASCADE'))\n        conn.commit()\n\n\n@pytest.fixture\ndef tenant_schema_stale_rev(engine: Engine) -> Generator[str, None, None]:\n    \"\"\"Tenant schema with a non-head revision — should be included (needs migration).\"\"\"\n    schema = f\"tenant_test_{uuid.uuid4().hex[:12]}\"\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA \"{schema}\"'))\n        conn.execute(\n            text(\n                f'CREATE TABLE \"{schema}\".alembic_version (version_num VARCHAR(32) NOT NULL)'\n            )\n        )\n        conn.execute(\n            text(\n                f\"INSERT INTO \\\"{schema}\\\".alembic_version (version_num) VALUES ('stalerev000000000000')\"\n            )\n        )\n        conn.commit()\n\n    yield schema\n\n    with engine.connect() as conn:\n        conn.execute(text(f'DROP SCHEMA IF EXISTS \"{schema}\" CASCADE'))\n        conn.commit()\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\ndef test_classifies_all_cases(\n    current_head_rev: str,\n    tenant_schema_at_head: str,\n    tenant_schema_empty: str,\n    tenant_schema_stale_rev: str,\n) -> None:\n    \"\"\"Correctly classifies all three schema states:\n    - at head      → excluded\n    - no table     → included (needs migration)\n    - stale rev    → included (needs migration)\n    \"\"\"\n    all_schemas = [tenant_schema_at_head, tenant_schema_empty, tenant_schema_stale_rev]\n    result = get_schemas_needing_migration(all_schemas, current_head_rev)\n\n    assert tenant_schema_at_head not in result\n    assert tenant_schema_empty in result\n    assert tenant_schema_stale_rev in result\n\n\ndef test_idempotent(\n    current_head_rev: str,\n    tenant_schema_at_head: str,\n    tenant_schema_empty: str,\n) -> None:\n    \"\"\"Calling the function twice returns the same result.\n\n    Verifies that the DROP TABLE IF EXISTS guards correctly clean up temp\n    tables so a second call succeeds even if the first left state behind.\n    \"\"\"\n    schemas = [tenant_schema_at_head, tenant_schema_empty]\n\n    first = get_schemas_needing_migration(schemas, current_head_rev)\n    second = get_schemas_needing_migration(schemas, current_head_rev)\n\n    assert first == second\n\n\ndef test_empty_input(current_head_rev: str) -> None:\n    \"\"\"An empty input list returns immediately without touching the DB.\"\"\"\n    assert get_schemas_needing_migration([], current_head_rev) == []\n"
  },
  {
    "path": "backend/tests/integration/tests/anonymous_user/test_anonymous_user.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.settings import SettingsManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestSettings\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_me_endpoint_returns_anonymous_user_when_enabled(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Unauthenticated /me returns anonymous user info when anonymous access is enabled.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    SettingsManager.update_settings(\n        DATestSettings(anonymous_user_enabled=True),\n        user_performing_action=admin_user,\n    )\n\n    response = requests.get(f\"{API_SERVER_URL}/me\")\n\n    assert response.status_code == 200\n    data = response.json()\n    assert data[\"is_anonymous_user\"] is True\n    assert data[\"email\"] == \"anonymous@onyx.app\"\n    assert data[\"role\"] == \"limited\"\n\n\ndef test_me_endpoint_returns_403_when_anonymous_disabled(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Unauthenticated /me returns 403 when anonymous access is disabled.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    SettingsManager.update_settings(\n        DATestSettings(anonymous_user_enabled=False),\n        user_performing_action=admin_user,\n    )\n\n    response = requests.get(f\"{API_SERVER_URL}/me\")\n\n    # 403 is returned when user is not authenticated\n    assert response.status_code == 403\n\n\ndef test_me_endpoint_returns_authenticated_user_info(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Authenticated /me returns the actual user's info.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/me\",\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 200\n    data = response.json()\n    assert data.get(\"is_anonymous_user\") is not True\n    assert data[\"email\"] == admin_user.email\n    assert data[\"role\"] == \"admin\"\n\n\ndef test_anonymous_user_can_access_persona_when_enabled(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Verify that anonymous users can access limited endpoints when enabled.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    SettingsManager.update_settings(\n        DATestSettings(anonymous_user_enabled=True),\n        user_performing_action=admin_user,\n    )\n\n    anon_user = UserManager.get_anonymous_user()\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/persona\",\n        headers=anon_user.headers,\n    )\n    assert response.status_code == 200\n\n\ndef test_anonymous_user_denied_persona_when_disabled(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Verify that anonymous users cannot access endpoints when disabled.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    SettingsManager.update_settings(\n        DATestSettings(anonymous_user_enabled=False),\n        user_performing_action=admin_user,\n    )\n\n    anon_user = UserManager.get_anonymous_user()\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/persona\",\n        headers=anon_user.headers,\n    )\n    # 403 is returned - BasicAuthenticationError uses HTTP 403 for all auth failures\n    assert response.status_code == 403\n"
  },
  {
    "path": "backend/tests/integration/tests/api_key/test_api_key.py",
    "content": "from uuid import UUID\n\nimport requests\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_limited(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Verify that with a limited role key, limited endpoints are accessible and\n    others are not.\"\"\"\n\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    api_key: DATestAPIKey = APIKeyManager.create(\n        api_key_role=UserRole.LIMITED,\n        user_performing_action=admin_user,\n    )\n\n    # test limited endpoint\n    response = requests.get(\n        f\"{API_SERVER_URL}/persona/0\",\n        headers=api_key.headers,\n    )\n    assert response.status_code == 200\n\n    # test admin endpoints\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/api-key\",\n        headers=api_key.headers,\n    )\n    assert response.status_code == 403\n\n\ndef _get_service_account_account_type(\n    admin_user: DATestUser,\n    api_key_user_id: UUID,\n) -> AccountType:\n    \"\"\"Fetch the account_type of a service account user via the user listing API.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/manage/users\",\n        headers=admin_user.headers,\n        params={\"include_api_keys\": \"true\"},\n    )\n    response.raise_for_status()\n    data = response.json()\n    user_id_str = str(api_key_user_id)\n    for user in data[\"accepted\"]:\n        if user[\"id\"] == user_id_str:\n            return AccountType(user[\"account_type\"])\n    raise AssertionError(\n        f\"Service account user {user_id_str} not found in user listing\"\n    )\n\n\ndef _get_default_group_user_ids(\n    admin_user: DATestUser,\n) -> tuple[set[str], set[str]]:\n    \"\"\"Return (admin_group_user_ids, basic_group_user_ids) from default groups.\"\"\"\n    all_groups = UserGroupManager.get_all(\n        user_performing_action=admin_user,\n        include_default=True,\n    )\n    admin_group = next(\n        (g for g in all_groups if g.name == \"Admin\" and g.is_default), None\n    )\n    basic_group = next(\n        (g for g in all_groups if g.name == \"Basic\" and g.is_default), None\n    )\n    assert admin_group is not None, \"Admin default group not found\"\n    assert basic_group is not None, \"Basic default group not found\"\n\n    admin_ids = {str(u.id) for u in admin_group.users}\n    basic_ids = {str(u.id) for u in basic_group.users}\n    return admin_ids, basic_ids\n\n\ndef test_api_key_limited_service_account(reset: None) -> None:  # noqa: ARG001\n    \"\"\"LIMITED role API key: account_type is SERVICE_ACCOUNT, no group membership.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    api_key: DATestAPIKey = APIKeyManager.create(\n        api_key_role=UserRole.LIMITED,\n        user_performing_action=admin_user,\n    )\n\n    # Verify account_type\n    account_type = _get_service_account_account_type(admin_user, api_key.user_id)\n    assert (\n        account_type == AccountType.SERVICE_ACCOUNT\n    ), f\"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}\"\n\n    # Verify no group membership\n    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)\n    user_id_str = str(api_key.user_id)\n    assert (\n        user_id_str not in admin_ids\n    ), \"LIMITED API key should NOT be in Admin default group\"\n    assert (\n        user_id_str not in basic_ids\n    ), \"LIMITED API key should NOT be in Basic default group\"\n\n\ndef test_api_key_basic_service_account(reset: None) -> None:  # noqa: ARG001\n    \"\"\"BASIC role API key: account_type is SERVICE_ACCOUNT, in Basic group only.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    api_key: DATestAPIKey = APIKeyManager.create(\n        api_key_role=UserRole.BASIC,\n        user_performing_action=admin_user,\n    )\n\n    # Verify account_type\n    account_type = _get_service_account_account_type(admin_user, api_key.user_id)\n    assert (\n        account_type == AccountType.SERVICE_ACCOUNT\n    ), f\"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}\"\n\n    # Verify Basic group membership\n    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)\n    user_id_str = str(api_key.user_id)\n    assert user_id_str in basic_ids, \"BASIC API key should be in Basic default group\"\n    assert (\n        user_id_str not in admin_ids\n    ), \"BASIC API key should NOT be in Admin default group\"\n\n\ndef test_api_key_admin_service_account(reset: None) -> None:  # noqa: ARG001\n    \"\"\"ADMIN role API key: account_type is SERVICE_ACCOUNT, in Admin group only.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    api_key: DATestAPIKey = APIKeyManager.create(\n        api_key_role=UserRole.ADMIN,\n        user_performing_action=admin_user,\n    )\n\n    # Verify account_type\n    account_type = _get_service_account_account_type(admin_user, api_key.user_id)\n    assert (\n        account_type == AccountType.SERVICE_ACCOUNT\n    ), f\"Expected account_type={AccountType.SERVICE_ACCOUNT}, got {account_type}\"\n\n    # Verify Admin group membership\n    admin_ids, basic_ids = _get_default_group_user_ids(admin_user)\n    user_id_str = str(api_key.user_id)\n    assert user_id_str in admin_ids, \"ADMIN API key should be in Admin default group\"\n    assert (\n        user_id_str not in basic_ids\n    ), \"ADMIN API key should NOT be in Basic default group\"\n"
  },
  {
    "path": "backend/tests/integration/tests/auth/test_saml_user_conversion.py",
    "content": "import os\n\nimport pytest\nimport requests\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _simulate_saml_login(email: str, admin_user: DATestUser) -> dict:\n    \"\"\"Simulate a SAML login by calling the test upsert endpoint.\"\"\"\n    response = requests.post(\n        f\"{API_SERVER_URL}/manage/users/test-upsert-user\",\n        json={\"email\": email},\n        headers=admin_user.headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _get_basic_group_member_emails(admin_user: DATestUser) -> set[str]:\n    \"\"\"Get the set of emails of all members in the Basic default group.\"\"\"\n    all_groups = UserGroupManager.get_all(admin_user, include_default=True)\n    basic_default = [g for g in all_groups if g.is_default and g.name == \"Basic\"]\n    assert basic_default, \"Basic default group not found\"\n    return {u.email for u in basic_default[0].users}\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_user_conversion(reset: None) -> None:  # noqa: ARG001\n    \"\"\"\n    Test that SAML login correctly converts users with non-authenticated roles\n    (SLACK_USER or EXT_PERM_USER) to authenticated roles (BASIC).\n\n    This test:\n    1. Creates an admin and a regular user\n    2. Changes the regular user's role to EXT_PERM_USER\n    3. Simulates a SAML login by calling the test endpoint\n    4. Verifies the user's role is converted to BASIC\n\n    This tests the fix that ensures users with non-authenticated roles (SLACK_USER or EXT_PERM_USER)\n    are properly converted to authenticated roles during SAML login.\n    \"\"\"\n    # Create an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # Create a regular user that we'll convert to EXT_PERM_USER\n    test_user_email = \"ext_perm_user@example.com\"\n    test_user = UserManager.create(email=test_user_email)\n\n    # Verify the user was created with BASIC role initially\n    assert UserManager.is_role(test_user, UserRole.BASIC)\n\n    # Change the user's role to EXT_PERM_USER using the UserManager\n    UserManager.set_role(\n        user_to_set=test_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    # Verify the user has EXT_PERM_USER role now\n    assert UserManager.is_role(test_user, UserRole.EXT_PERM_USER)\n\n    # Simulate SAML login by calling the test endpoint\n    user_data = _simulate_saml_login(test_user_email, admin_user)\n\n    # Verify the response indicates the role changed to BASIC\n    assert user_data[\"role\"] == UserRole.BASIC.value\n\n    # Verify user role was changed in the database\n    assert UserManager.is_role(test_user, UserRole.BASIC)\n\n    # Do the same test with SLACK_USER\n    slack_user_email = \"slack_user@example.com\"\n    slack_user = UserManager.create(email=slack_user_email)\n\n    # Verify the user was created with BASIC role initially\n    assert UserManager.is_role(slack_user, UserRole.BASIC)\n\n    # Change the user's role to SLACK_USER\n    UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    # Verify the user has SLACK_USER role\n    assert UserManager.is_role(slack_user, UserRole.SLACK_USER)\n\n    # Simulate SAML login again\n    user_data = _simulate_saml_login(slack_user_email, admin_user)\n\n    # Verify the response indicates the role changed to BASIC\n    assert user_data[\"role\"] == UserRole.BASIC.value\n\n    # Verify the user's role was changed in the database\n    assert UserManager.is_role(slack_user, UserRole.BASIC)\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_user_conversion_sets_account_type_and_group(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that SAML login sets account_type to STANDARD when converting a\n    non-web user (EXT_PERM_USER) and that the user receives the correct role\n    (BASIC) after conversion.\n\n    This validates the permissions-migration-phase2 changes which ensure that:\n    1. account_type is updated to 'standard' on SAML conversion\n    2. The converted user is assigned to the Basic default group\n    \"\"\"\n    # Create an admin user (first user is automatically admin)\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # Create a user and set them as EXT_PERM_USER\n    test_email = \"ext_convert@example.com\"\n    test_user = UserManager.create(email=test_email)\n    UserManager.set_role(\n        user_to_set=test_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert UserManager.is_role(test_user, UserRole.EXT_PERM_USER)\n\n    # Simulate SAML login\n    user_data = _simulate_saml_login(test_email, admin_user)\n\n    # Verify account_type is set to standard after conversion\n    assert (\n        user_data[\"account_type\"] == AccountType.STANDARD.value\n    ), f\"Expected account_type='{AccountType.STANDARD.value}', got '{user_data['account_type']}'\"\n\n    # Verify role is BASIC after conversion\n    assert user_data[\"role\"] == UserRole.BASIC.value\n\n    # Verify the user was assigned to the Basic default group\n    assert test_email in _get_basic_group_member_emails(\n        admin_user\n    ), f\"Converted user '{test_email}' not found in Basic default group\"\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_normal_signin_assigns_group(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that a brand-new user signing in via SAML for the first time\n    is created with the correct role, account_type, and group membership.\n\n    This validates that normal SAML sign-in (not an upgrade from\n    SLACK_USER/EXT_PERM_USER) correctly:\n    1. Creates the user with role=BASIC and account_type=STANDARD\n    2. Assigns the user to the Basic default group\n    \"\"\"\n    # First user becomes admin\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # New user signs in via SAML (no prior account)\n    new_email = \"new_saml_user@example.com\"\n    user_data = _simulate_saml_login(new_email, admin_user)\n\n    # Verify role and account_type\n    assert user_data[\"role\"] == UserRole.BASIC.value\n    assert user_data[\"account_type\"] == AccountType.STANDARD.value\n\n    # Verify user is in the Basic default group\n    assert new_email in _get_basic_group_member_emails(\n        admin_user\n    ), f\"New SAML user '{new_email}' not found in Basic default group\"\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_user_conversion_restores_group_membership(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that SAML login restores Basic group membership when converting\n    a non-authenticated user (EXT_PERM_USER or SLACK_USER) to BASIC.\n\n    Group membership implies 'basic' permission (verified by\n    test_new_group_gets_basic_permission).\n    \"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # --- EXT_PERM_USER path ---\n    ext_email = \"ext_perm_perms@example.com\"\n    ext_user = UserManager.create(email=ext_email)\n    assert ext_email in _get_basic_group_member_emails(admin_user)\n\n    UserManager.set_role(\n        user_to_set=ext_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert ext_email not in _get_basic_group_member_emails(admin_user)\n\n    user_data = _simulate_saml_login(ext_email, admin_user)\n    assert user_data[\"role\"] == UserRole.BASIC.value\n    assert ext_email in _get_basic_group_member_emails(\n        admin_user\n    ), \"EXT_PERM_USER should be back in Basic group after SAML conversion\"\n\n    # --- SLACK_USER path ---\n    slack_email = \"slack_perms@example.com\"\n    slack_user = UserManager.create(email=slack_email)\n\n    UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert slack_email not in _get_basic_group_member_emails(admin_user)\n\n    user_data = _simulate_saml_login(slack_email, admin_user)\n    assert user_data[\"role\"] == UserRole.BASIC.value\n    assert slack_email in _get_basic_group_member_emails(\n        admin_user\n    ), \"SLACK_USER should be back in Basic group after SAML conversion\"\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_round_trip_group_lifecycle(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test the full round-trip: BASIC -> EXT_PERM -> SAML(BASIC) -> EXT_PERM -> SAML(BASIC).\n\n    Verifies group membership is correctly removed and restored at each transition.\n    \"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    test_email = \"roundtrip@example.com\"\n    test_user = UserManager.create(email=test_email)\n\n    # Step 1: BASIC user is in Basic group\n    assert test_email in _get_basic_group_member_emails(admin_user)\n\n    # Step 2: Downgrade to EXT_PERM_USER — loses Basic group\n    UserManager.set_role(\n        user_to_set=test_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert test_email not in _get_basic_group_member_emails(admin_user)\n\n    # Step 3: SAML login — converts back to BASIC, regains Basic group\n    _simulate_saml_login(test_email, admin_user)\n    assert test_email in _get_basic_group_member_emails(\n        admin_user\n    ), \"Should be in Basic group after first SAML conversion\"\n\n    # Step 4: Downgrade again\n    UserManager.set_role(\n        user_to_set=test_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert test_email not in _get_basic_group_member_emails(admin_user)\n\n    # Step 5: SAML login again — should still restore correctly\n    _simulate_saml_login(test_email, admin_user)\n    assert test_email in _get_basic_group_member_emails(\n        admin_user\n    ), \"Should be in Basic group after second SAML conversion\"\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"SAML tests are enterprise only\",\n)\ndef test_saml_slack_user_conversion_sets_account_type_and_group(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that SAML login sets account_type to STANDARD and assigns Basic group\n    when converting a SLACK_USER (BOT account_type).\n\n    Mirrors test_saml_user_conversion_sets_account_type_and_group but for\n    SLACK_USER instead of EXT_PERM_USER, and additionally verifies permissions.\n    \"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    test_email = \"slack_convert@example.com\"\n    test_user = UserManager.create(email=test_email)\n\n    UserManager.set_role(\n        user_to_set=test_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert UserManager.is_role(test_user, UserRole.SLACK_USER)\n\n    # SAML login\n    user_data = _simulate_saml_login(test_email, admin_user)\n\n    # Verify account_type and role\n    assert (\n        user_data[\"account_type\"] == AccountType.STANDARD.value\n    ), f\"Expected STANDARD, got {user_data['account_type']}\"\n    assert user_data[\"role\"] == UserRole.BASIC.value\n\n    # Verify Basic group membership (implies 'basic' permission)\n    assert test_email in _get_basic_group_member_emails(\n        admin_user\n    ), f\"Converted SLACK_USER '{test_email}' not found in Basic default group\"\n"
  },
  {
    "path": "backend/tests/integration/tests/chat/test_chat_deletion.py",
    "content": "import pytest\n\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nMESSAGE = \"Hi\"\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef reset_for_module() -> None:\n    \"\"\"Reset all data once before running any tests in this module.\"\"\"\n    reset_all()\n\n\n@pytest.fixture\ndef llm_provider(admin_user: DATestUser) -> DATestLLMProvider:\n    return LLMProviderManager.create(user_performing_action=admin_user)\n\n\ndef test_soft_delete_chat_session(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test soft deletion of a chat session.\n    Soft delete should mark the chat as deleted but keep it in the database.\n    \"\"\"\n    # Create a chat session\n    test_chat_session = ChatSessionManager.create(\n        persona_id=0,  # Use default persona\n        description=\"Test chat session for soft deletion\",\n        user_performing_action=basic_user,\n    )\n\n    # Send a message to create some data\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=MESSAGE,\n        user_performing_action=basic_user,\n    )\n\n    # Verify that the message was processed successfully\n    assert response.error is None, \"Chat response should not have an error\"\n    assert len(response.full_message) > 0, \"Chat response should not be empty\"\n\n    # Verify that the chat session can be retrieved before deletion\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n    assert len(chat_history) > 0, \"Chat session should have messages\"\n\n    # Test soft deletion of the chat session\n    deletion_success = ChatSessionManager.soft_delete(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n\n    # Verify that the deletion was successful\n    assert deletion_success, \"Chat session soft deletion should succeed\"\n\n    # Verify that the chat session is soft deleted (marked as deleted but still in DB)\n    assert ChatSessionManager.verify_soft_deleted(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    ), \"Chat session should be soft deleted\"\n\n    # Verify that normal access is blocked\n    assert ChatSessionManager.verify_deleted(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    ), \"Chat session should not be accessible normally after soft delete\"\n\n\ndef test_hard_delete_chat_session(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test hard deletion of a chat session.\n    Hard delete should completely remove the chat from the database.\n    \"\"\"\n    # Create a chat session\n    test_chat_session = ChatSessionManager.create(\n        persona_id=0,  # Use default persona\n        description=\"Test chat session for hard deletion\",\n        user_performing_action=basic_user,\n    )\n\n    # Send a message to create some data\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=MESSAGE,\n        user_performing_action=basic_user,\n    )\n\n    # Verify that the message was processed successfully\n    assert response.error is None, \"Chat response should not have an error\"\n    assert len(response.full_message) > 0, \"Chat response should not be empty\"\n\n    # Verify that the chat session can be retrieved before deletion\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n    assert len(chat_history) > 0, \"Chat session should have messages\"\n\n    # Test hard deletion of the chat session\n    deletion_success = ChatSessionManager.hard_delete(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n\n    # Verify that the deletion was successful\n    assert deletion_success, \"Chat session hard deletion should succeed\"\n\n    # Verify that the chat session is hard deleted (completely removed from DB)\n    assert ChatSessionManager.verify_hard_deleted(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    ), \"Chat session should be hard deleted\"\n\n    # Verify that the chat session is not accessible at all\n    assert ChatSessionManager.verify_deleted(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    ), \"Chat session should not be accessible after hard delete\"\n\n    # Verify it's not soft deleted (since it doesn't exist at all)\n    assert not ChatSessionManager.verify_soft_deleted(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    ), \"Hard deleted chat should not be found as soft deleted\"\n\n\ndef test_multiple_soft_deletions(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test multiple chat session soft deletions to ensure proper handling\n    when there are multiple related records.\n    \"\"\"\n    chat_sessions = []\n\n    # Create multiple chat sessions with potential agent behavior\n    for i in range(3):\n        chat_session = ChatSessionManager.create(\n            persona_id=0,\n            description=f\"Test chat session {i} for multi-soft-deletion\",\n            user_performing_action=basic_user,\n        )\n\n        # Send a message to create some data\n        ChatSessionManager.send_message(\n            chat_session_id=chat_session.id,\n            message=f\"Tell me about topic {i} with detailed analysis\",\n            user_performing_action=basic_user,\n        )\n\n        chat_sessions.append(chat_session)\n\n    # Soft delete all chat sessions\n    for chat_session in chat_sessions:\n        deletion_success = ChatSessionManager.soft_delete(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        )\n        assert deletion_success, f\"Failed to soft delete chat {chat_session.id}\"\n\n    # Verify all chat sessions are soft deleted\n    for chat_session in chat_sessions:\n        assert ChatSessionManager.verify_soft_deleted(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        ), f\"Chat {chat_session.id} should be soft deleted\"\n\n        assert ChatSessionManager.verify_deleted(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        ), f\"Chat {chat_session.id} should not be accessible normally\"\n\n\ndef test_multiple_hard_deletions_with_agent_data(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test multiple chat session hard deletions to ensure CASCADE deletes work correctly\n    when there are multiple related records.\n    \"\"\"\n    chat_sessions = []\n\n    # Create multiple chat sessions with potential agent behavior\n    for i in range(3):\n        chat_session = ChatSessionManager.create(\n            persona_id=0,\n            description=f\"Test chat session {i} for multi-hard-deletion\",\n            user_performing_action=basic_user,\n        )\n\n        # Send a message to create some data\n        ChatSessionManager.send_message(\n            chat_session_id=chat_session.id,\n            message=f\"Tell me about topic {i} with detailed analysis\",\n            user_performing_action=basic_user,\n        )\n\n        chat_sessions.append(chat_session)\n\n    # Hard delete all chat sessions\n    for chat_session in chat_sessions:\n        deletion_success = ChatSessionManager.hard_delete(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        )\n        assert deletion_success, f\"Failed to hard delete chat {chat_session.id}\"\n\n    # Verify all chat sessions are hard deleted\n    for chat_session in chat_sessions:\n        assert ChatSessionManager.verify_hard_deleted(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        ), f\"Chat {chat_session.id} should be hard deleted\"\n\n        assert ChatSessionManager.verify_deleted(\n            chat_session=chat_session,\n            user_performing_action=basic_user,\n        ), f\"Chat {chat_session.id} should not be accessible\"\n\n\ndef test_soft_vs_hard_delete_edge_cases(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test edge cases for both soft and hard deletion to ensure robustness.\n    \"\"\"\n    # Test 1: Soft delete a chat session with no messages\n    empty_chat_session_soft = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Empty chat session for soft delete\",\n        user_performing_action=basic_user,\n    )\n\n    # Soft delete without sending any messages\n    deletion_success = ChatSessionManager.soft_delete(\n        chat_session=empty_chat_session_soft,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success, \"Empty chat session should be soft deletable\"\n    assert ChatSessionManager.verify_soft_deleted(\n        chat_session=empty_chat_session_soft,\n        user_performing_action=basic_user,\n    ), \"Empty chat session should be confirmed as soft deleted\"\n\n    # Test 2: Hard delete a chat session with no messages\n    empty_chat_session_hard = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Empty chat session for hard delete\",\n        user_performing_action=basic_user,\n    )\n\n    # Hard delete without sending any messages\n    deletion_success = ChatSessionManager.hard_delete(\n        chat_session=empty_chat_session_hard,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success, \"Empty chat session should be hard deletable\"\n    assert ChatSessionManager.verify_hard_deleted(\n        chat_session=empty_chat_session_hard,\n        user_performing_action=basic_user,\n    ), \"Empty chat session should be confirmed as hard deleted\"\n\n    # Test 3: Soft delete a chat session with multiple messages\n    multi_message_chat_soft = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Multi-message chat session for soft delete\",\n        user_performing_action=basic_user,\n    )\n\n    # Send multiple messages to create more complex data\n    for i in range(3):\n        ChatSessionManager.send_message(\n            chat_session_id=multi_message_chat_soft.id,\n            message=f\"Message {i}: Tell me about different aspects of this topic\",\n            user_performing_action=basic_user,\n        )\n\n    # Verify messages exist\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=multi_message_chat_soft,\n        user_performing_action=basic_user,\n    )\n    assert len(chat_history) >= 3, \"Chat should have multiple messages\"\n\n    # Soft delete the chat with multiple messages\n    deletion_success = ChatSessionManager.soft_delete(\n        chat_session=multi_message_chat_soft,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success, \"Multi-message chat session should be soft deletable\"\n    assert ChatSessionManager.verify_soft_deleted(\n        chat_session=multi_message_chat_soft,\n        user_performing_action=basic_user,\n    ), \"Multi-message chat session should be confirmed as soft deleted\"\n\n    # Test 4: Hard delete a chat session with multiple messages\n    multi_message_chat_hard = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Multi-message chat session for hard delete\",\n        user_performing_action=basic_user,\n    )\n\n    # Send multiple messages to create more complex data\n    for i in range(3):\n        ChatSessionManager.send_message(\n            chat_session_id=multi_message_chat_hard.id,\n            message=f\"Message {i}: Tell me about different aspects of this topic\",\n            user_performing_action=basic_user,\n        )\n\n    # Verify messages exist\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=multi_message_chat_hard,\n        user_performing_action=basic_user,\n    )\n    assert len(chat_history) >= 3, \"Chat should have multiple messages\"\n\n    # Hard delete the chat with multiple messages\n    deletion_success = ChatSessionManager.hard_delete(\n        chat_session=multi_message_chat_hard,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success, \"Multi-message chat session should be hard deletable\"\n    assert ChatSessionManager.verify_hard_deleted(\n        chat_session=multi_message_chat_hard,\n        user_performing_action=basic_user,\n    ), \"Multi-message chat session should be confirmed as hard deleted\"\n"
  },
  {
    "path": "backend/tests/integration/tests/chat/test_chat_session_access.py",
    "content": "from uuid import uuid4\n\nimport pytest\nimport requests\nfrom requests import HTTPError\n\nfrom onyx.auth.schemas import UserRole\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.user import build_email\nfrom tests.integration.common_utils.managers.user import DEFAULT_PASSWORD\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef reset_for_module() -> None:\n    \"\"\"Reset all data once before running any tests in this module.\"\"\"\n    reset_all()\n\n\n@pytest.fixture\ndef second_user(admin_user: DATestUser) -> DATestUser:  # noqa: ARG001\n    # Ensure admin exists so this new user is created with BASIC role.\n    try:\n        return UserManager.create(name=\"second_basic_user\")\n    except HTTPError as e:\n        response = e.response\n        if response is None:\n            raise\n        if response.status_code not in (400, 409):\n            raise\n        try:\n            payload = response.json()\n        except ValueError:\n            raise\n        detail = payload.get(\"detail\")\n        if not _is_user_already_exists_detail(detail):\n            raise\n        print(\"Second basic user already exists; logging in instead.\")\n        return UserManager.login_as_user(\n            DATestUser(\n                id=\"\",\n                email=build_email(\"second_basic_user\"),\n                password=DEFAULT_PASSWORD,\n                headers=GENERAL_HEADERS,\n                role=UserRole.BASIC,\n                is_active=True,\n            )\n        )\n\n\ndef _is_user_already_exists_detail(detail: object) -> bool:\n    if isinstance(detail, str):\n        normalized = detail.lower()\n        return (\n            \"already exists\" in normalized\n            or \"register_user_already_exists\" in normalized\n        )\n    if isinstance(detail, dict):\n        code = detail.get(\"code\")\n        if isinstance(code, str) and code.lower() == \"register_user_already_exists\":\n            return True\n        message = detail.get(\"message\")\n        if isinstance(message, str) and \"already exists\" in message.lower():\n            return True\n    return False\n\n\ndef _get_chat_session(\n    chat_session_id: str,\n    user: DATestUser,\n    is_shared: bool | None = None,\n    include_deleted: bool | None = None,\n) -> requests.Response:\n    params: dict[str, str] = {}\n    if is_shared is not None:\n        params[\"is_shared\"] = str(is_shared).lower()\n    if include_deleted is not None:\n        params[\"include_deleted\"] = str(include_deleted).lower()\n\n    return requests.get(\n        f\"{API_SERVER_URL}/chat/get-chat-session/{chat_session_id}\",\n        params=params,\n        headers=user.headers,\n        cookies=user.cookies,\n    )\n\n\ndef _set_sharing_status(\n    chat_session_id: str, sharing_status: str, user: DATestUser\n) -> requests.Response:\n    return requests.patch(\n        f\"{API_SERVER_URL}/chat/chat-session/{chat_session_id}\",\n        json={\"sharing_status\": sharing_status},\n        headers=user.headers,\n        cookies=user.cookies,\n    )\n\n\ndef test_private_chat_session_access(\n    basic_user: DATestUser, second_user: DATestUser\n) -> None:\n    \"\"\"Verify private sessions are only accessible by the owner and never via share link.\"\"\"\n    # Create a private chat session owned by basic_user.\n    chat_session = ChatSessionManager.create(user_performing_action=basic_user)\n\n    # Owner can access the private session normally.\n    response = _get_chat_session(str(chat_session.id), basic_user)\n    assert response.status_code == 200\n\n    # Share link should be forbidden when the session is private.\n    response = _get_chat_session(str(chat_session.id), basic_user, is_shared=True)\n    assert response.status_code == 403\n\n    # Other users cannot access private sessions directly.\n    response = _get_chat_session(str(chat_session.id), second_user)\n    assert response.status_code == 403\n\n    # Other users also cannot access private sessions via share link.\n    response = _get_chat_session(str(chat_session.id), second_user, is_shared=True)\n    assert response.status_code == 403\n\n\ndef test_public_shared_chat_session_access(\n    basic_user: DATestUser, second_user: DATestUser\n) -> None:\n    \"\"\"Verify shared sessions are accessible only via share link for non-owners.\"\"\"\n    # Create a private session, then mark it public.\n    chat_session = ChatSessionManager.create(user_performing_action=basic_user)\n\n    response = _set_sharing_status(str(chat_session.id), \"public\", basic_user)\n    assert response.status_code == 200\n\n    # Owner can access normally.\n    response = _get_chat_session(str(chat_session.id), basic_user)\n    assert response.status_code == 200\n\n    # Owner can also access via share link.\n    response = _get_chat_session(str(chat_session.id), basic_user, is_shared=True)\n    assert response.status_code == 200\n\n    # Non-owner cannot access without share link.\n    response = _get_chat_session(str(chat_session.id), second_user)\n    assert response.status_code == 403\n\n    # Non-owner can access with share link for public sessions.\n    response = _get_chat_session(str(chat_session.id), second_user, is_shared=True)\n    assert response.status_code == 200\n\n\ndef test_deleted_chat_session_access(\n    basic_user: DATestUser, second_user: DATestUser\n) -> None:\n    \"\"\"Verify deleted sessions return 404, with include_deleted gated by access checks.\"\"\"\n    # Create and soft-delete a session.\n    chat_session = ChatSessionManager.create(user_performing_action=basic_user)\n\n    deletion_success = ChatSessionManager.soft_delete(\n        chat_session=chat_session, user_performing_action=basic_user\n    )\n    assert deletion_success is True\n\n    # Deleted sessions are not accessible normally.\n    response = _get_chat_session(str(chat_session.id), basic_user)\n    assert response.status_code == 404\n\n    # Owner can fetch deleted session only with include_deleted.\n    response = _get_chat_session(str(chat_session.id), basic_user, include_deleted=True)\n    assert response.status_code == 200\n    assert response.json().get(\"deleted\") is True\n\n    # Non-owner should be blocked even with include_deleted.\n    response = _get_chat_session(\n        str(chat_session.id), second_user, include_deleted=True\n    )\n    assert response.status_code == 403\n\n\ndef test_chat_session_not_found_returns_404(basic_user: DATestUser) -> None:\n    \"\"\"Verify unknown IDs return 404.\"\"\"\n    response = _get_chat_session(str(uuid4()), basic_user)\n    assert response.status_code == 404\n"
  },
  {
    "path": "backend/tests/integration/tests/chat_retention/test_chat_retention.py",
    "content": "import os\nimport time\n\nimport pytest\nimport requests\n\nfrom onyx.db.chat import delete_chat_session\nfrom onyx.db.chat import get_chat_sessions_older_than\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.settings import SettingsManager\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestSettings\nfrom tests.integration.common_utils.test_models import DATestUser\n\nRETENTION_SECONDS = 10\n\n\ndef _run_ttl_cleanup(retention_days: int) -> None:\n    \"\"\"Directly execute TTL cleanup logic, bypassing Celery task infrastructure.\"\"\"\n    with get_session_with_current_tenant() as db_session:\n        old_chat_sessions = get_chat_sessions_older_than(retention_days, db_session)\n\n    for user_id, session_id in old_chat_sessions:\n        with get_session_with_current_tenant() as db_session:\n            delete_chat_session(\n                user_id,\n                session_id,\n                db_session,\n                include_deleted=True,\n                hard_delete=True,\n            )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Chat retention tests are enterprise only\",\n)\ndef test_chat_retention(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that chat sessions are deleted after the retention period expires.\"\"\"\n\n    retention_days = RETENTION_SECONDS // 86400\n    settings = DATestSettings(maximum_chat_retention_days=retention_days)\n    SettingsManager.update_settings(settings, user_performing_action=admin_user)\n\n    chat_session = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Test chat retention\",\n        user_performing_action=admin_user,\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"This message should be deleted soon\",\n        user_performing_action=admin_user,\n    )\n    assert (\n        response.error is None\n    ), f\"Chat response should not have an error: {response.error}\"\n\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=chat_session,\n        user_performing_action=admin_user,\n    )\n    assert len(chat_history) > 0, \"Chat session should have messages\"\n\n    # Wait for the retention period to elapse, then directly run TTL cleanup\n    time.sleep(RETENTION_SECONDS + 2)\n    _run_ttl_cleanup(retention_days)\n\n    # Verify the chat session was deleted\n    session_deleted = False\n    try:\n        chat_history = ChatSessionManager.get_chat_history(\n            chat_session=chat_session,\n            user_performing_action=admin_user,\n        )\n        session_deleted = len(chat_history) == 0\n    except requests.exceptions.HTTPError as e:\n        if e.response.status_code in (404, 400):\n            session_deleted = True\n        else:\n            raise\n\n    assert session_deleted, \"Chat session was not deleted after retention period\"\n"
  },
  {
    "path": "backend/tests/integration/tests/code_interpreter/conftest.py",
    "content": "from collections.abc import Generator\n\nimport pytest\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\nCODE_INTERPRETER_URL = f\"{API_SERVER_URL}/admin/code-interpreter\"\n\n\n@pytest.fixture\ndef preserve_code_interpreter_state(\n    admin_user: DATestUser,\n) -> Generator[None, None, None]:\n    \"\"\"Capture the code interpreter enabled state before a test and restore it\n    afterwards, so that tests that toggle the setting cannot leak state.\"\"\"\n    response = requests.get(\n        CODE_INTERPRETER_URL,\n        headers=admin_user.headers,\n    )\n    response.raise_for_status()\n    initial_enabled = response.json()[\"enabled\"]\n\n    yield\n\n    restore = requests.put(\n        CODE_INTERPRETER_URL,\n        json={\"enabled\": initial_enabled},\n        headers=admin_user.headers,\n    )\n    restore.raise_for_status()\n"
  },
  {
    "path": "backend/tests/integration/tests/code_interpreter/test_code_interpreter_api.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\nCODE_INTERPRETER_URL = f\"{API_SERVER_URL}/admin/code-interpreter\"\nCODE_INTERPRETER_HEALTH_URL = f\"{CODE_INTERPRETER_URL}/health\"\n\n\ndef test_get_code_interpreter_health_as_admin(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Health endpoint should return a JSON object with a 'healthy' boolean.\"\"\"\n    response = requests.get(\n        CODE_INTERPRETER_HEALTH_URL,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    data = response.json()\n    assert \"healthy\" in data\n    assert isinstance(data[\"healthy\"], bool)\n\n\ndef test_get_code_interpreter_status_as_admin(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"GET endpoint should return a JSON object with an 'enabled' boolean.\"\"\"\n    response = requests.get(\n        CODE_INTERPRETER_URL,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    data = response.json()\n    assert \"enabled\" in data\n    assert isinstance(data[\"enabled\"], bool)\n\n\ndef test_update_code_interpreter_disable_and_enable(\n    admin_user: DATestUser,\n    preserve_code_interpreter_state: None,  # noqa: ARG001\n) -> None:\n    \"\"\"PUT endpoint should update the enabled flag and persist across reads.\"\"\"\n    # Disable\n    response = requests.put(\n        CODE_INTERPRETER_URL,\n        json={\"enabled\": False},\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n    # Verify disabled\n    response = requests.get(\n        CODE_INTERPRETER_URL,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    assert response.json()[\"enabled\"] is False\n\n    # Re-enable\n    response = requests.put(\n        CODE_INTERPRETER_URL,\n        json={\"enabled\": True},\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n    # Verify enabled\n    response = requests.get(\n        CODE_INTERPRETER_URL,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    assert response.json()[\"enabled\"] is True\n\n\ndef test_code_interpreter_endpoints_require_admin(\n    basic_user: DATestUser,\n) -> None:\n    \"\"\"All code interpreter endpoints should reject non-admin users.\"\"\"\n    health_response = requests.get(\n        CODE_INTERPRETER_HEALTH_URL,\n        headers=basic_user.headers,\n    )\n    assert health_response.status_code == 403\n\n    get_response = requests.get(\n        CODE_INTERPRETER_URL,\n        headers=basic_user.headers,\n    )\n    assert get_response.status_code == 403\n\n    put_response = requests.put(\n        CODE_INTERPRETER_URL,\n        json={\"enabled\": True},\n        headers=basic_user.headers,\n    )\n    assert put_response.status_code == 403\n"
  },
  {
    "path": "backend/tests/integration/tests/connector/test_connector_creation.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nfrom onyx.connectors.models import InputType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_connector_creation(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # create connectors\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    cc_pair_info = CCPairManager.get_single(\n        cc_pair_1.id, user_performing_action=admin_user\n    )\n    assert cc_pair_info\n    assert cc_pair_info.creator\n    assert str(cc_pair_info.creator) == admin_user.id\n    assert cc_pair_info.creator_email == admin_user.email\n\n\ndef test_overlapping_connector_creation(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Tests that connectors indexing the same documents don't interfere with each other.\n    A previous bug involved document by cc pair entries not being added for new connectors\n    when the docs existed already via another connector and were up to date relative to the source.\n    \"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    config = {\n        \"wiki_base\": os.environ[\"CONFLUENCE_TEST_SPACE_URL\"],\n        \"space\": \"DailyConne\",\n        \"is_cloud\": True,\n    }\n\n    credential = {\n        \"confluence_username\": os.environ[\"CONFLUENCE_USER_NAME\"],\n        \"confluence_access_token\": os.environ[\"CONFLUENCE_ACCESS_TOKEN\"],\n    }\n\n    # store the time before we create the connector so that we know after\n    # when the indexing should have started\n    now = datetime.now(timezone.utc)\n\n    # create connector\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.CONFLUENCE,\n        connector_specific_config=config,\n        credential_json=credential,\n        user_performing_action=admin_user,\n        input_type=InputType.POLL,\n    )\n\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair_1, now, timeout=300, user_performing_action=admin_user\n    )\n\n    now = datetime.now(timezone.utc)\n\n    cc_pair_2 = CCPairManager.create_from_scratch(\n        source=DocumentSource.CONFLUENCE,\n        connector_specific_config=config,\n        credential_json=credential,\n        user_performing_action=admin_user,\n        input_type=InputType.POLL,\n    )\n\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair_2, now, timeout=300, user_performing_action=admin_user\n    )\n\n    info_1 = CCPairManager.get_single(cc_pair_1.id, user_performing_action=admin_user)\n    assert info_1\n\n    info_2 = CCPairManager.get_single(cc_pair_2.id, user_performing_action=admin_user)\n    assert info_2\n\n    assert info_1.num_docs_indexed == info_2.num_docs_indexed\n\n\ndef test_connector_pause_while_indexing(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Tests that we can pause a connector while indexing is in progress and that\n    tasks end early or abort as a result.\n\n    TODO: This does not specifically test for soft or hard termination code paths.\n    Design specific tests for those use cases.\n    \"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    config = {\n        \"wiki_base\": os.environ[\"CONFLUENCE_TEST_SPACE_URL\"],\n        \"is_cloud\": True,\n    }\n\n    credential = {\n        \"confluence_username\": os.environ[\"CONFLUENCE_USER_NAME\"],\n        \"confluence_access_token\": os.environ[\"CONFLUENCE_ACCESS_TOKEN\"],\n    }\n\n    # store the time before we create the connector so that we know after\n    # when the indexing should have started\n    datetime.now(timezone.utc)\n\n    # create connector\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.CONFLUENCE,\n        connector_specific_config=config,\n        credential_json=credential,\n        user_performing_action=admin_user,\n        input_type=InputType.POLL,\n    )\n\n    # NOTE: A bit flaky in our CI due to varying indexing times. Empirically\n    # 120s was not always enough to index 16 docs from Confluence so trying to\n    # bump down the indexing progress to wait for to 4 docs from 16.\n    CCPairManager.wait_for_indexing_in_progress(\n        cc_pair_1, timeout=120, num_docs=4, user_performing_action=admin_user\n    )\n\n    CCPairManager.pause_cc_pair(cc_pair_1, user_performing_action=admin_user)\n\n    CCPairManager.wait_for_indexing_inactive(\n        cc_pair_1, timeout=60, user_performing_action=admin_user\n    )\n    return\n"
  },
  {
    "path": "backend/tests/integration/tests/connector/test_connector_deletion.py",
    "content": "\"\"\"\nThis file contains tests for the following:\n- Ensuring deletion of a connector also:\n    - deletes the documents in vespa for that connector\n    - updates the document sets and user groups to remove the connector\n- Ensure that deleting a connector that is part of an overlapping document set and/or user group works as expected\n\"\"\"\n\nimport os\nfrom uuid import uuid4\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.index_attempt import create_index_attempt\nfrom onyx.db.index_attempt import create_index_attempt_error\nfrom onyx.db.models import IndexAttempt\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import NUM_DOCS\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.document_set import DocumentSetManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import DATestUserGroup\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef test_connector_deletion(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    user_group_1: DATestUserGroup\n    user_group_2: DATestUserGroup\n\n    is_ee = (\n        os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() == \"true\"\n    )\n\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    # create api key\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connectors\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n    cc_pair_2 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # seed documents\n    cc_pair_1.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_1,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n    cc_pair_2.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_2,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    # create document sets\n    doc_set_1 = DocumentSetManager.create(\n        name=\"Test Document Set 1\",\n        cc_pair_ids=[cc_pair_1.id],\n        user_performing_action=admin_user,\n    )\n    doc_set_2 = DocumentSetManager.create(\n        name=\"Test Document Set 2\",\n        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],\n        user_performing_action=admin_user,\n    )\n\n    # wait for document sets to be synced\n    DocumentSetManager.wait_for_sync(user_performing_action=admin_user)\n\n    print(\"Document sets created and synced\")\n\n    if is_ee:\n        # create user groups\n        user_group_1 = UserGroupManager.create(\n            cc_pair_ids=[cc_pair_1.id],\n            user_performing_action=admin_user,\n        )\n        user_group_2 = UserGroupManager.create(\n            cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],\n            user_performing_action=admin_user,\n        )\n        UserGroupManager.wait_for_sync(user_performing_action=admin_user)\n\n    # inject a finished index attempt and index attempt error (exercises foreign key errors)\n    with Session(get_sqlalchemy_engine()) as db_session:\n        primary_search_settings = get_current_search_settings(db_session)\n        new_attempt = IndexAttempt(\n            connector_credential_pair_id=cc_pair_1.id,\n            search_settings_id=primary_search_settings.id,\n            from_beginning=False,\n            status=IndexingStatus.COMPLETED_WITH_ERRORS,\n        )\n        db_session.add(new_attempt)\n        db_session.commit()\n\n        create_index_attempt_error(\n            index_attempt_id=new_attempt.id,\n            connector_credential_pair_id=cc_pair_1.id,\n            failure=ConnectorFailure(\n                failure_message=\"Test error\",\n                failed_document=DocumentFailure(\n                    document_id=cc_pair_1.documents[0].id,\n                    document_link=None,\n                ),\n                failed_entity=None,\n            ),\n            db_session=db_session,\n        )\n\n    # delete connector 1\n    CCPairManager.pause_cc_pair(\n        cc_pair=cc_pair_1,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.delete(\n        cc_pair=cc_pair_1,\n        user_performing_action=admin_user,\n    )\n\n    # inject an index attempt and index attempt error (exercises foreign key errors)\n    with Session(get_sqlalchemy_engine()) as db_session:\n        attempt_id = create_index_attempt(\n            connector_credential_pair_id=cc_pair_1.id,\n            search_settings_id=1,\n            db_session=db_session,\n        )\n        create_index_attempt_error(\n            index_attempt_id=attempt_id,\n            connector_credential_pair_id=cc_pair_1.id,\n            failure=ConnectorFailure(\n                failure_message=\"Test error\",\n                failed_document=DocumentFailure(\n                    document_id=cc_pair_1.documents[0].id,\n                    document_link=None,\n                ),\n                failed_entity=None,\n            ),\n            db_session=db_session,\n        )\n\n    # Update local records to match the database for later comparison\n    doc_set_1.cc_pair_ids = []\n    doc_set_2.cc_pair_ids = [cc_pair_2.id]\n    cc_pair_1.groups = []\n    if is_ee:\n        cc_pair_2.groups = [user_group_2.id]\n    else:\n        cc_pair_2.groups = []\n\n    CCPairManager.wait_for_deletion_completion(\n        cc_pair_id=cc_pair_1.id, user_performing_action=admin_user\n    )\n\n    # validate vespa documents\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[],\n        group_names=[],\n        doc_creating_user=admin_user,\n        verify_deleted=True,\n    )\n\n    cc_pair_2_group_name_expected = []\n    if is_ee:\n        cc_pair_2_group_name_expected = [user_group_2.name]\n\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_set_names=[doc_set_2.name],\n        group_names=cc_pair_2_group_name_expected,\n        doc_creating_user=admin_user,\n        verify_deleted=False,\n    )\n\n    # check that only connector 1 is deleted\n    CCPairManager.verify(\n        cc_pair=cc_pair_2,\n        user_performing_action=admin_user,\n    )\n\n    # validate document sets\n    DocumentSetManager.verify(\n        document_set=doc_set_1,\n        user_performing_action=admin_user,\n    )\n    DocumentSetManager.verify(\n        document_set=doc_set_2,\n        user_performing_action=admin_user,\n    )\n\n    if is_ee:\n        user_group_1.cc_pair_ids = []\n        user_group_2.cc_pair_ids = [cc_pair_2.id]\n\n        # validate user groups\n        UserGroupManager.verify(\n            user_group=user_group_1,\n            user_performing_action=admin_user,\n        )\n        UserGroupManager.verify(\n            user_group=user_group_2,\n            user_performing_action=admin_user,\n        )\n\n\ndef test_connector_deletion_for_overlapping_connectors(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    \"\"\"Checks to make sure that connectors with overlapping documents work properly. Specifically, that the overlapping\n    document (1) still exists and (2) has the right document set / group post-deletion of one of the connectors.\n    \"\"\"\n    user_group_1: DATestUserGroup\n    user_group_2: DATestUserGroup\n\n    is_ee = (\n        os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() == \"true\"\n    )\n\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    # create api key\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connectors\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n    cc_pair_2 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    doc_ids = [str(uuid4())]\n    cc_pair_1.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_1,\n        document_ids=doc_ids,\n        api_key=api_key,\n    )\n    cc_pair_2.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_2,\n        document_ids=doc_ids,\n        api_key=api_key,\n    )\n\n    # verify vespa document exists and that it is not in any document sets or groups\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[],\n        group_names=[],\n        doc_creating_user=admin_user,\n    )\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_set_names=[],\n        group_names=[],\n        doc_creating_user=admin_user,\n    )\n\n    # create document set\n    doc_set_1 = DocumentSetManager.create(\n        name=\"Test Document Set 1\",\n        cc_pair_ids=[cc_pair_1.id],\n        user_performing_action=admin_user,\n    )\n    DocumentSetManager.wait_for_sync(\n        document_sets_to_check=[doc_set_1],\n        user_performing_action=admin_user,\n    )\n\n    print(\"Document set 1 created and synced\")\n\n    # verify vespa document is in the document set\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[doc_set_1.name],\n        doc_creating_user=admin_user,\n    )\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_creating_user=admin_user,\n    )\n\n    if is_ee:\n        # create a user group and attach it to connector 1\n        user_group_1 = UserGroupManager.create(\n            name=\"Test User Group 1\",\n            cc_pair_ids=[cc_pair_1.id],\n            user_performing_action=admin_user,\n        )\n        UserGroupManager.wait_for_sync(\n            user_groups_to_check=[user_group_1],\n            user_performing_action=admin_user,\n        )\n        cc_pair_1.groups = [user_group_1.id]\n\n        print(\"User group 1 created and synced\")\n\n        # create a user group and attach it to connector 2\n        user_group_2 = UserGroupManager.create(\n            name=\"Test User Group 2\",\n            cc_pair_ids=[cc_pair_2.id],\n            user_performing_action=admin_user,\n        )\n        UserGroupManager.wait_for_sync(\n            user_groups_to_check=[user_group_2],\n            user_performing_action=admin_user,\n        )\n        cc_pair_2.groups = [user_group_2.id]\n\n        print(\"User group 2 created and synced\")\n\n        # verify vespa document is in the user group\n        DocumentManager.verify(\n            vespa_client=vespa_client,\n            cc_pair=cc_pair_1,\n            group_names=[user_group_1.name, user_group_2.name],\n            doc_creating_user=admin_user,\n        )\n        DocumentManager.verify(\n            vespa_client=vespa_client,\n            cc_pair=cc_pair_2,\n            group_names=[user_group_1.name, user_group_2.name],\n            doc_creating_user=admin_user,\n        )\n\n    # delete connector 1\n    CCPairManager.pause_cc_pair(\n        cc_pair=cc_pair_1,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.delete(\n        cc_pair=cc_pair_1,\n        user_performing_action=admin_user,\n    )\n\n    # wait for deletion to finish\n    CCPairManager.wait_for_deletion_completion(\n        cc_pair_id=cc_pair_1.id, user_performing_action=admin_user\n    )\n\n    print(\"Connector 1 deleted\")\n\n    # check that only connector 1 is deleted\n    # TODO: check for the CC pair rather than the connector once the refactor is done\n    CCPairManager.verify(\n        cc_pair=cc_pair_1,\n        verify_deleted=True,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.verify(\n        cc_pair=cc_pair_2,\n        user_performing_action=admin_user,\n    )\n\n    # verify the document is not in any document sets\n    # verify the document is only in user group 2\n    group_names_expected = []\n    if is_ee:\n        group_names_expected = [user_group_2.name]\n\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_set_names=[],\n        group_names=group_names_expected,\n        doc_creating_user=admin_user,\n        verify_deleted=False,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/connector/test_last_indexed_time.py",
    "content": "\"\"\"\nIntegration tests for the \"Last Indexed\" time displayed on both the\nper-connector detail page and the all-connectors listing page.\n\nExpected behavior: \"Last Indexed\" = time_started of the most recent\nsuccessful index attempt for the cc pair, regardless of pagination.\n\nEdge cases:\n1. First page of index attempts is entirely errors — last_indexed should\n   still reflect the older successful attempt beyond page 1.\n2. Credential swap — successful attempts, then failures after a\n   \"credential change\"; last_indexed should reflect the most recent\n   successful attempt.\n3. Mix of statuses — only the most recent successful attempt matters.\n4. COMPLETED_WITH_ERRORS counts as a success for last_indexed purposes.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom onyx.db.models import IndexingStatus\nfrom onyx.server.documents.models import CCPairFullInfo\nfrom onyx.server.documents.models import ConnectorIndexingStatusLite\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _wait_for_real_success(\n    cc_pair: DATestCCPair,\n    admin: DATestUser,\n) -> None:\n    \"\"\"Wait for the initial index attempt to complete successfully.\"\"\"\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair,\n        after=datetime(2000, 1, 1, tzinfo=timezone.utc),\n        user_performing_action=admin,\n        timeout=120,\n    )\n\n\ndef _get_detail(cc_pair_id: int, admin: DATestUser) -> CCPairFullInfo:\n    result = CCPairManager.get_single(cc_pair_id, admin)\n    assert result is not None\n    return result\n\n\ndef _get_listing(cc_pair_id: int, admin: DATestUser) -> ConnectorIndexingStatusLite:\n    result = CCPairManager.get_indexing_status_by_id(cc_pair_id, admin)\n    assert result is not None\n    return result\n\n\ndef test_last_indexed_first_page_all_errors(reset: None) -> None:  # noqa: ARG001\n    \"\"\"When the first page of index attempts is entirely errors but an\n    older successful attempt exists, both the detail page and the listing\n    page should still show the time of that successful attempt.\n\n    The detail page UI uses page size 8. We insert 10 failed attempts\n    more recent than the initial success to push the success off page 1.\n    \"\"\"\n    admin = UserManager.create(name=\"admin_first_page_errors\")\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)\n    _wait_for_real_success(cc_pair, admin)\n\n    # Baseline: last_success should be set from the initial successful run\n    listing_before = _get_listing(cc_pair.id, admin)\n    assert listing_before.last_success is not None\n\n    # 10 recent failures push the success off page 1\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=10,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.FAILED,\n        error_msg=\"simulated failure\",\n        base_time=datetime.now(tz=timezone.utc),\n    )\n\n    detail = _get_detail(cc_pair.id, admin)\n    listing = _get_listing(cc_pair.id, admin)\n\n    assert (\n        detail.last_indexed is not None\n    ), \"Detail page last_indexed is None even though a successful attempt exists\"\n    assert (\n        listing.last_success is not None\n    ), \"Listing page last_success is None even though a successful attempt exists\"\n\n    # Both surfaces must agree\n    assert detail.last_indexed == listing.last_success, (\n        f\"Detail last_indexed={detail.last_indexed} != \"\n        f\"listing last_success={listing.last_success}\"\n    )\n\n\ndef test_last_indexed_credential_swap_scenario(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Perform an actual credential swap: create connector + cred1 (cc_pair_1),\n    wait for success, then associate a new cred2 with the same connector\n    (cc_pair_2), wait for that to succeed, and inject failures on cc_pair_2.\n\n    cc_pair_2's last_indexed must reflect cc_pair_2's own success, not\n    cc_pair_1's older one. Both the detail page and listing page must agree.\n    \"\"\"\n    admin = UserManager.create(name=\"admin_cred_swap\")\n\n    connector = ConnectorManager.create(user_performing_action=admin)\n    cred1 = CredentialManager.create(user_performing_action=admin)\n    cc_pair_1 = CCPairManager.create(\n        connector_id=connector.id,\n        credential_id=cred1.id,\n        user_performing_action=admin,\n    )\n    _wait_for_real_success(cc_pair_1, admin)\n\n    cred2 = CredentialManager.create(user_performing_action=admin, name=\"swapped-cred\")\n    cc_pair_2 = CCPairManager.create(\n        connector_id=connector.id,\n        credential_id=cred2.id,\n        user_performing_action=admin,\n    )\n    _wait_for_real_success(cc_pair_2, admin)\n\n    listing_after_swap = _get_listing(cc_pair_2.id, admin)\n    assert listing_after_swap.last_success is not None\n\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=10,\n        cc_pair_id=cc_pair_2.id,\n        status=IndexingStatus.FAILED,\n        error_msg=\"credential expired\",\n        base_time=datetime.now(tz=timezone.utc),\n    )\n\n    detail = _get_detail(cc_pair_2.id, admin)\n    listing = _get_listing(cc_pair_2.id, admin)\n\n    assert detail.last_indexed is not None\n    assert listing.last_success is not None\n\n    assert detail.last_indexed == listing.last_success, (\n        f\"Detail last_indexed={detail.last_indexed} != \"\n        f\"listing last_success={listing.last_success}\"\n    )\n\n\ndef test_last_indexed_mixed_statuses(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Mix of in_progress, failed, and successful attempts. Only the most\n    recent successful attempt's time matters.\"\"\"\n    admin = UserManager.create(name=\"admin_mixed\")\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)\n    _wait_for_real_success(cc_pair, admin)\n\n    now = datetime.now(tz=timezone.utc)\n\n    # Success 5 hours ago\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=1,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.SUCCESS,\n        base_time=now - timedelta(hours=5),\n    )\n\n    # Failures 3 hours ago\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=3,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.FAILED,\n        error_msg=\"transient failure\",\n        base_time=now - timedelta(hours=3),\n    )\n\n    # In-progress 1 hour ago\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=1,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.IN_PROGRESS,\n        base_time=now - timedelta(hours=1),\n    )\n\n    detail = _get_detail(cc_pair.id, admin)\n    listing = _get_listing(cc_pair.id, admin)\n\n    assert detail.last_indexed is not None\n    assert listing.last_success is not None\n\n    assert detail.last_indexed == listing.last_success, (\n        f\"Detail last_indexed={detail.last_indexed} != \"\n        f\"listing last_success={listing.last_success}\"\n    )\n\n\ndef test_last_indexed_completed_with_errors(reset: None) -> None:  # noqa: ARG001\n    \"\"\"COMPLETED_WITH_ERRORS is treated as a successful attempt (matching\n    IndexingStatus.is_successful()). When it is the most recent \"success\"\n    and later attempts all failed, both surfaces should reflect its time.\"\"\"\n    admin = UserManager.create(name=\"admin_completed_errors\")\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)\n    _wait_for_real_success(cc_pair, admin)\n\n    now = datetime.now(tz=timezone.utc)\n\n    # COMPLETED_WITH_ERRORS 2 hours ago\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=1,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.COMPLETED_WITH_ERRORS,\n        base_time=now - timedelta(hours=2),\n    )\n\n    # 10 failures after — push everything else off page 1\n    IndexAttemptManager.create_test_index_attempts(\n        num_attempts=10,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.FAILED,\n        error_msg=\"post-partial failure\",\n        base_time=now,\n    )\n\n    detail = _get_detail(cc_pair.id, admin)\n    listing = _get_listing(cc_pair.id, admin)\n\n    assert (\n        detail.last_indexed is not None\n    ), \"COMPLETED_WITH_ERRORS should count as a success for last_indexed\"\n    assert (\n        listing.last_success is not None\n    ), \"COMPLETED_WITH_ERRORS should count as a success for last_success\"\n\n    assert detail.last_indexed == listing.last_success, (\n        f\"Detail last_indexed={detail.last_indexed} != \"\n        f\"listing last_success={listing.last_success}\"\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/discord_bot/test_discord_bot_api.py",
    "content": "\"\"\"Integration tests for Discord bot API endpoints.\n\nThese tests hit actual API endpoints via HTTP requests.\n\"\"\"\n\nimport pytest\nimport requests\n\nfrom onyx.db.discord_bot import get_discord_service_api_key\nfrom onyx.db.discord_bot import get_or_create_discord_service_api_key\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom tests.integration.common_utils.managers.discord_bot import DiscordBotManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass TestBotConfigEndpoints:\n    \"\"\"Tests for /manage/admin/discord-bot/config endpoints.\"\"\"\n\n    def test_get_bot_config_not_configured(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /config returns configured=False when no config exists.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Ensure no config exists\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n\n        config = DiscordBotManager.get_bot_config(admin_user)\n\n        assert config[\"configured\"] is False\n        assert \"created_at\" not in config or config.get(\"created_at\") is None\n\n    def test_create_bot_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"POST /config creates a new bot config.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Ensure no config exists\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n\n        config = DiscordBotManager.create_bot_config(\n            bot_token=\"test_token_123\",\n            user_performing_action=admin_user,\n        )\n\n        assert config[\"configured\"] is True\n        assert \"created_at\" in config\n\n        # Cleanup\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n\n    def test_create_bot_config_already_exists(\n        self,\n        reset: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"POST /config returns 409 if config already exists.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Ensure no config exists, then create one\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n        DiscordBotManager.create_bot_config(\n            bot_token=\"token1\",\n            user_performing_action=admin_user,\n        )\n\n        # Try to create another - should fail\n        with pytest.raises(requests.HTTPError) as exc_info:\n            DiscordBotManager.create_bot_config(\n                bot_token=\"token2\",\n                user_performing_action=admin_user,\n            )\n\n        assert exc_info.value.response.status_code == 409\n\n        # Cleanup\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n\n    def test_delete_bot_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"DELETE /config removes the bot config.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Ensure no config exists, then create one\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n        DiscordBotManager.create_bot_config(\n            bot_token=\"test_token\",\n            user_performing_action=admin_user,\n        )\n\n        # Delete it\n        result = DiscordBotManager.delete_bot_config(admin_user)\n        assert result[\"deleted\"] is True\n\n        # Verify it's gone\n        config = DiscordBotManager.get_bot_config(admin_user)\n        assert config[\"configured\"] is False\n\n    def test_delete_bot_config_not_found(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"DELETE /config returns 404 if no config exists.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Ensure no config exists\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n\n        # Try to delete - should fail\n        with pytest.raises(requests.HTTPError) as exc_info:\n            DiscordBotManager.delete_bot_config(admin_user)\n\n        assert exc_info.value.response.status_code == 404\n\n\nclass TestGuildConfigEndpoints:\n    \"\"\"Tests for /manage/admin/discord-bot/guilds endpoints.\"\"\"\n\n    def test_create_guild_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"POST /guilds creates a new guild config with registration key.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guild = DiscordBotManager.create_guild(admin_user)\n\n        assert guild.id is not None\n        assert guild.registration_key is not None\n        assert guild.registration_key.startswith(\"discord_\")\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_list_guilds(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /guilds returns all guild configs.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create some guilds\n        guild1 = DiscordBotManager.create_guild(admin_user)\n        guild2 = DiscordBotManager.create_guild(admin_user)\n\n        guilds = DiscordBotManager.list_guilds(admin_user)\n\n        guild_ids = [g[\"id\"] for g in guilds]\n        assert guild1.id in guild_ids\n        assert guild2.id in guild_ids\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild1.id, admin_user)\n        DiscordBotManager.delete_guild_if_exists(guild2.id, admin_user)\n\n    def test_get_guild_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /guilds/{config_id} returns the specific guild config.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guild = DiscordBotManager.create_guild(admin_user)\n\n        fetched = DiscordBotManager.get_guild(guild.id, admin_user)\n\n        assert fetched[\"id\"] == guild.id\n        assert fetched[\"enabled\"] is True  # Default\n        assert fetched[\"guild_id\"] is None  # Not registered yet\n        assert fetched[\"guild_name\"] is None\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_get_guild_config_not_found(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /guilds/{config_id} returns 404 for non-existent guild.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        result = DiscordBotManager.get_guild_or_none(999999, admin_user)\n        assert result is None\n\n    def test_update_guild_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"PATCH /guilds/{config_id} updates the guild config.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guild = DiscordBotManager.create_guild(admin_user)\n\n        # Update enabled status\n        updated = DiscordBotManager.update_guild(\n            guild.id,\n            admin_user,\n            enabled=False,\n        )\n\n        assert updated[\"enabled\"] is False\n\n        # Verify persistence\n        fetched = DiscordBotManager.get_guild(guild.id, admin_user)\n        assert fetched[\"enabled\"] is False\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_delete_guild_config(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"DELETE /guilds/{config_id} removes the guild config.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guild = DiscordBotManager.create_guild(admin_user)\n\n        # Delete it\n        result = DiscordBotManager.delete_guild(guild.id, admin_user)\n        assert result[\"deleted\"] is True\n\n        # Verify it's gone\n        assert DiscordBotManager.get_guild_or_none(guild.id, admin_user) is None\n\n    def test_delete_guild_config_not_found(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"DELETE /guilds/{config_id} returns 404 for non-existent guild.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        with pytest.raises(requests.HTTPError) as exc_info:\n            DiscordBotManager.delete_guild(999999, admin_user)\n\n        assert exc_info.value.response.status_code == 404\n\n    def test_registration_key_format(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"Registration key has proper format with tenant encoded.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guild = DiscordBotManager.create_guild(admin_user)\n\n        # Key should be: discord_{encoded_tenant}.{random}\n        key = guild.registration_key\n        assert key is not None\n        assert key.startswith(\"discord_\")\n\n        # Should have two parts separated by dot\n        key_body = key.removeprefix(\"discord_\")\n        parts = key_body.split(\".\", 1)\n        assert len(parts) == 2\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_each_registration_key_is_unique(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"Each created guild gets a unique registration key.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        guilds = [DiscordBotManager.create_guild(admin_user) for _ in range(5)]\n        keys = [g.registration_key for g in guilds]\n\n        assert len(set(keys)) == 5  # All unique\n\n        # Cleanup\n        for guild in guilds:\n            DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n\nclass TestChannelConfigEndpoints:\n    \"\"\"Tests for /manage/admin/discord-bot/guilds/{id}/channels endpoints.\"\"\"\n\n    def test_list_channels_empty(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /guilds/{id}/channels returns empty list when no channels exist.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=111111111,\n            guild_name=\"Test Guild\",\n        )\n\n        channels = DiscordBotManager.list_channels(guild.id, admin_user)\n\n        assert channels == []\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_list_channels_with_data(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"GET /guilds/{id}/channels returns channel configs.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=222222222,\n            guild_name=\"Test Guild\",\n        )\n\n        # Create test channels directly in DB\n        channel1 = DiscordBotManager.create_test_channel_in_db(\n            guild_config_id=guild.id,\n            channel_id=123456789,\n            channel_name=\"general\",\n        )\n        channel2 = DiscordBotManager.create_test_channel_in_db(\n            guild_config_id=guild.id,\n            channel_id=987654321,\n            channel_name=\"help\",\n            channel_type=\"forum\",\n        )\n\n        channels = DiscordBotManager.list_channels(guild.id, admin_user)\n\n        assert len(channels) == 2\n        channel_ids = [c.id for c in channels]\n        assert channel1.id in channel_ids\n        assert channel2.id in channel_ids\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_update_channel_enabled(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"PATCH /guilds/{id}/channels/{id} updates enabled status.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=333333333,\n            guild_name=\"Test Guild\",\n        )\n        channel = DiscordBotManager.create_test_channel_in_db(\n            guild_config_id=guild.id,\n            channel_id=123456789,\n            channel_name=\"general\",\n        )\n\n        # Default is disabled\n        assert channel.enabled is False\n\n        # Enable the channel\n        updated = DiscordBotManager.update_channel(\n            guild.id,\n            channel.id,\n            admin_user,\n            enabled=True,\n        )\n\n        assert updated.enabled is True\n\n        # Verify persistence\n        channels = DiscordBotManager.list_channels(guild.id, admin_user)\n        found = next(c for c in channels if c.id == channel.id)\n        assert found.enabled is True\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_update_channel_thread_only_mode(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"PATCH /guilds/{id}/channels/{id} updates thread_only_mode.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=444444444,\n            guild_name=\"Test Guild\",\n        )\n        channel = DiscordBotManager.create_test_channel_in_db(\n            guild_config_id=guild.id,\n            channel_id=123456789,\n            channel_name=\"general\",\n        )\n\n        # Default is False\n        assert channel.thread_only_mode is False\n\n        # Enable thread_only_mode\n        updated = DiscordBotManager.update_channel(\n            guild.id,\n            channel.id,\n            admin_user,\n            thread_only_mode=True,\n        )\n\n        assert updated.thread_only_mode is True\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_update_channel_require_bot_invocation(\n        self,\n        reset: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"PATCH /guilds/{id}/channels/{id} updates require_bot_invocation.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=555555555,\n            guild_name=\"Test Guild\",\n        )\n        channel = DiscordBotManager.create_test_channel_in_db(\n            guild_config_id=guild.id,\n            channel_id=123456789,\n            channel_name=\"general\",\n        )\n\n        # Default is True\n        assert channel.require_bot_invocation is True\n\n        # Disable require_bot_invocation\n        updated = DiscordBotManager.update_channel(\n            guild.id,\n            channel.id,\n            admin_user,\n            require_bot_invocation=False,\n        )\n\n        assert updated.require_bot_invocation is False\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n    def test_update_channel_not_found(self, reset: None) -> None:  # noqa: ARG002\n        \"\"\"PATCH /guilds/{id}/channels/{id} returns 404 for non-existent channel.\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Create a registered guild (has guild_id set)\n        guild = DiscordBotManager.create_registered_guild_in_db(\n            guild_id=666666666,\n            guild_name=\"Test Guild\",\n        )\n\n        with pytest.raises(requests.HTTPError) as exc_info:\n            DiscordBotManager.update_channel(\n                guild.id,\n                999999,\n                admin_user,\n                enabled=True,\n            )\n\n        assert exc_info.value.response.status_code == 404\n\n        # Cleanup\n        DiscordBotManager.delete_guild_if_exists(guild.id, admin_user)\n\n\nclass TestServiceApiKeyCleanup:\n    \"\"\"Tests for service API key cleanup when bot/guild configs are deleted.\"\"\"\n\n    def test_delete_bot_config_also_deletes_service_api_key(\n        self,\n        reset: None,  # noqa: ARG002\n    ) -> None:\n        \"\"\"DELETE /config also deletes the service API key (self-hosted flow).\"\"\"\n        admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n        # Setup: create bot config via API\n        DiscordBotManager.delete_bot_config_if_exists(admin_user)\n        DiscordBotManager.create_bot_config(\n            bot_token=\"test_token\",\n            user_performing_action=admin_user,\n        )\n\n        # Create service API key directly in DB (simulating bot registration)\n        with get_session_with_current_tenant() as db_session:\n            get_or_create_discord_service_api_key(db_session, \"public\")\n            db_session.commit()\n\n            # Verify it exists\n            assert get_discord_service_api_key(db_session) is not None\n\n        # Delete bot config via API\n        result = DiscordBotManager.delete_bot_config(admin_user)\n        assert result[\"deleted\"] is True\n\n        # Verify service API key was also deleted\n        with get_session_with_current_tenant() as db_session:\n            assert get_discord_service_api_key(db_session) is None\n"
  },
  {
    "path": "backend/tests/integration/tests/discord_bot/test_discord_bot_db.py",
    "content": "\"\"\"Integration tests for Discord bot database operations.\n\nThese tests verify CRUD operations for Discord bot models.\n\"\"\"\n\nfrom collections.abc import Generator\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.discord_bot import bulk_create_channel_configs\nfrom onyx.db.discord_bot import create_discord_bot_config\nfrom onyx.db.discord_bot import create_guild_config\nfrom onyx.db.discord_bot import delete_discord_bot_config\nfrom onyx.db.discord_bot import delete_discord_service_api_key\nfrom onyx.db.discord_bot import delete_guild_config\nfrom onyx.db.discord_bot import get_channel_configs\nfrom onyx.db.discord_bot import get_discord_bot_config\nfrom onyx.db.discord_bot import get_discord_service_api_key\nfrom onyx.db.discord_bot import get_guild_config_by_internal_id\nfrom onyx.db.discord_bot import get_guild_config_by_registration_key\nfrom onyx.db.discord_bot import get_guild_configs\nfrom onyx.db.discord_bot import get_or_create_discord_service_api_key\nfrom onyx.db.discord_bot import sync_channel_configs\nfrom onyx.db.discord_bot import update_discord_channel_config\nfrom onyx.db.discord_bot import update_guild_config\nfrom onyx.db.models import Persona\nfrom onyx.db.utils import DiscordChannelView\nfrom onyx.server.manage.discord_bot.utils import generate_discord_registration_key\n\n\ndef _create_test_persona(db_session: Session, persona_id: int, name: str) -> Persona:\n    \"\"\"Create a minimal test persona.\"\"\"\n    persona = Persona(\n        id=persona_id,\n        name=name,\n        description=\"Test persona for Discord bot tests\",\n        is_listed=True,\n        is_featured=False,\n        deleted=False,\n        builtin_persona=False,\n    )\n    db_session.add(persona)\n    db_session.flush()\n    return persona\n\n\ndef _delete_test_persona(db_session: Session, persona_id: int) -> None:\n    \"\"\"Delete a test persona.\"\"\"\n    db_session.query(Persona).filter(Persona.id == persona_id).delete()\n    db_session.flush()\n\n\nclass TestBotConfigAPI:\n    \"\"\"Tests for bot config API operations.\"\"\"\n\n    def test_create_bot_config(self, db_session: Session) -> None:\n        \"\"\"Create bot config succeeds with valid token.\"\"\"\n        # Clean up any existing config first\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        config = create_discord_bot_config(db_session, bot_token=\"test_token_123\")\n        db_session.commit()\n\n        assert config is not None\n        assert config.bot_token is not None\n        assert config.bot_token.get_value(apply_mask=False) == \"test_token_123\"\n\n        # Cleanup\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n    def test_create_bot_config_already_exists(self, db_session: Session) -> None:\n        \"\"\"Creating config twice raises ValueError.\"\"\"\n        # Clean up first\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        create_discord_bot_config(db_session, bot_token=\"token1\")\n        db_session.commit()\n\n        with pytest.raises(ValueError):\n            create_discord_bot_config(db_session, bot_token=\"token2\")\n\n        # Cleanup\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n    def test_get_bot_config(self, db_session: Session) -> None:\n        \"\"\"Get bot config returns config with masked token.\"\"\"\n        # Clean up first\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        create_discord_bot_config(db_session, bot_token=\"my_secret_token\")\n        db_session.commit()\n\n        config = get_discord_bot_config(db_session)\n\n        assert config is not None\n        # Token should be stored (we don't mask in DB, only API response)\n        assert config.bot_token is not None\n\n        # Cleanup\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n    def test_delete_bot_config(self, db_session: Session) -> None:\n        \"\"\"Delete bot config removes it from DB.\"\"\"\n        # Clean up first\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        create_discord_bot_config(db_session, bot_token=\"token\")\n        db_session.commit()\n\n        deleted = delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        assert deleted is True\n        assert get_discord_bot_config(db_session) is None\n\n    def test_delete_bot_config_not_found(self, db_session: Session) -> None:\n        \"\"\"Delete when no config exists returns False.\"\"\"\n        # Ensure no config exists\n        delete_discord_bot_config(db_session)\n        db_session.commit()\n\n        deleted = delete_discord_bot_config(db_session)\n        assert deleted is False\n\n\nclass TestRegistrationKeyAPI:\n    \"\"\"Tests for registration key API operations.\"\"\"\n\n    def test_create_registration_key(self, db_session: Session) -> None:\n        \"\"\"Create registration key with proper format.\"\"\"\n        key = generate_discord_registration_key(\"test_tenant\")\n\n        config = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        assert config is not None\n        assert config.registration_key == key\n        assert key.startswith(\"discord_\")\n        assert \"test_tenant\" in key or \"test%5Ftenant\" in key\n\n        # Cleanup\n        delete_guild_config(db_session, config.id)\n        db_session.commit()\n\n    def test_registration_key_is_unique(\n        self,\n        db_session: Session,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Each generated key is unique.\"\"\"\n        keys = [generate_discord_registration_key(\"tenant\") for _ in range(5)]\n        assert len(set(keys)) == 5\n\n    def test_delete_registration_key(self, db_session: Session) -> None:\n        \"\"\"Deleted key can no longer be used.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        config = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n        config_id = config.id\n\n        # Delete\n        deleted = delete_guild_config(db_session, config_id)\n        db_session.commit()\n\n        assert deleted is True\n\n        # Should not find it anymore\n        found = get_guild_config_by_registration_key(db_session, key)\n        assert found is None\n\n\nclass TestGuildConfigAPI:\n    \"\"\"Tests for guild config API operations.\"\"\"\n\n    def test_list_guilds(self, db_session: Session) -> None:\n        \"\"\"List guilds returns all guild configs.\"\"\"\n        # Create some guild configs\n        key1 = generate_discord_registration_key(\"t1\")\n        key2 = generate_discord_registration_key(\"t2\")\n\n        config1 = create_guild_config(db_session, registration_key=key1)\n        config2 = create_guild_config(db_session, registration_key=key2)\n        db_session.commit()\n\n        configs = get_guild_configs(db_session)\n\n        assert len(configs) >= 2\n\n        # Cleanup\n        delete_guild_config(db_session, config1.id)\n        delete_guild_config(db_session, config2.id)\n        db_session.commit()\n\n    def test_get_guild_config(self, db_session: Session) -> None:\n        \"\"\"Get specific guild config by ID.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        config = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        found = get_guild_config_by_internal_id(db_session, config.id)\n\n        assert found is not None\n        assert found.id == config.id\n        assert found.registration_key == key\n\n        # Cleanup\n        delete_guild_config(db_session, config.id)\n        db_session.commit()\n\n    def test_update_guild_enabled(self, db_session: Session) -> None:\n        \"\"\"Update guild enabled status.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        config = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Initially enabled is True by default\n        assert config.enabled is True\n\n        # Disable\n        updated = update_guild_config(\n            db_session, config, enabled=False, default_persona_id=None\n        )\n        db_session.commit()\n\n        assert updated.enabled is False\n\n        # Cleanup\n        delete_guild_config(db_session, config.id)\n        db_session.commit()\n\n    def test_update_guild_persona(self, db_session: Session) -> None:\n        \"\"\"Update guild default persona.\"\"\"\n        # Create test persona first to satisfy foreign key constraint\n        _create_test_persona(db_session, 5, \"Test Persona 5\")\n        db_session.commit()\n\n        key = generate_discord_registration_key(\"tenant\")\n        config = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Set persona\n        updated = update_guild_config(\n            db_session, config, enabled=True, default_persona_id=5\n        )\n        db_session.commit()\n\n        assert updated.default_persona_id == 5\n\n        # Cleanup\n        delete_guild_config(db_session, config.id)\n        _delete_test_persona(db_session, 5)\n        db_session.commit()\n\n\nclass TestChannelConfigAPI:\n    \"\"\"Tests for channel config API operations.\"\"\"\n\n    def test_list_channels_for_guild(self, db_session: Session) -> None:\n        \"\"\"List channels returns all channel configs for guild.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Create some channels\n        channels = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n            DiscordChannelView(\n                channel_id=222,\n                channel_name=\"help\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        bulk_create_channel_configs(db_session, guild.id, channels)\n        db_session.commit()\n\n        channel_configs = get_channel_configs(db_session, guild.id)\n\n        assert len(channel_configs) == 2\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n    def test_update_channel_enabled(self, db_session: Session) -> None:\n        \"\"\"Update channel enabled status.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        channels = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        created = bulk_create_channel_configs(db_session, guild.id, channels)\n        db_session.commit()\n\n        # Channels are disabled by default\n        assert created[0].enabled is False\n\n        # Enable\n        updated = update_discord_channel_config(\n            db_session,\n            created[0],\n            channel_name=\"general\",\n            thread_only_mode=False,\n            require_bot_invocation=True,\n            enabled=True,\n        )\n        db_session.commit()\n\n        assert updated.enabled is True\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n    def test_update_channel_thread_only_mode(self, db_session: Session) -> None:\n        \"\"\"Update channel thread_only_mode setting.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        channels = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        created = bulk_create_channel_configs(db_session, guild.id, channels)\n        db_session.commit()\n\n        # Update thread_only_mode\n        updated = update_discord_channel_config(\n            db_session,\n            created[0],\n            channel_name=\"general\",\n            thread_only_mode=True,\n            require_bot_invocation=True,\n            enabled=True,\n        )\n        db_session.commit()\n\n        assert updated.thread_only_mode is True\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n    def test_sync_channels_adds_new(self, db_session: Session) -> None:\n        \"\"\"Sync channels adds new channels.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Initial channels\n        initial = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        bulk_create_channel_configs(db_session, guild.id, initial)\n        db_session.commit()\n\n        # Sync with new channel\n        current = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n            DiscordChannelView(\n                channel_id=222,\n                channel_name=\"new-channel\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        added, removed, updated = sync_channel_configs(db_session, guild.id, current)\n        db_session.commit()\n\n        assert added == 1\n        assert removed == 0\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n    def test_sync_channels_removes_deleted(self, db_session: Session) -> None:\n        \"\"\"Sync channels removes deleted channels.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Initial channels\n        initial = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n            DiscordChannelView(\n                channel_id=222,\n                channel_name=\"old-channel\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        bulk_create_channel_configs(db_session, guild.id, initial)\n        db_session.commit()\n\n        # Sync with one channel removed\n        current = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        added, removed, updated = sync_channel_configs(db_session, guild.id, current)\n        db_session.commit()\n\n        assert added == 0\n        assert removed == 1\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n    def test_sync_channels_updates_renamed(self, db_session: Session) -> None:\n        \"\"\"Sync channels updates renamed channels.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        db_session.commit()\n\n        # Initial channels\n        initial = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"old-name\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        bulk_create_channel_configs(db_session, guild.id, initial)\n        db_session.commit()\n\n        # Sync with renamed channel\n        current = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"new-name\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        added, removed, updated = sync_channel_configs(db_session, guild.id, current)\n        db_session.commit()\n\n        assert added == 0\n        assert removed == 0\n        assert updated == 1\n\n        # Verify name was updated\n        configs = get_channel_configs(db_session, guild.id)\n        assert configs[0].channel_name == \"new-name\"\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n\nclass TestPersonaConfigurationAPI:\n    \"\"\"Tests for persona configuration in API.\"\"\"\n\n    def test_guild_persona_used_in_api_call(self, db_session: Session) -> None:\n        \"\"\"Guild default_persona_id is used when no channel override.\"\"\"\n        # Create test persona first\n        _create_test_persona(db_session, 42, \"Test Persona 42\")\n        db_session.commit()\n\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        update_guild_config(db_session, guild, enabled=True, default_persona_id=42)\n        db_session.commit()\n\n        # Verify persona is set\n        config = get_guild_config_by_internal_id(db_session, guild.id)\n        assert config is not None\n        assert config.default_persona_id == 42\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        _delete_test_persona(db_session, 42)\n        db_session.commit()\n\n    def test_channel_persona_override_in_api_call(self, db_session: Session) -> None:\n        \"\"\"Channel persona_override_id takes precedence over guild default.\"\"\"\n        # Create test personas first\n        _create_test_persona(db_session, 42, \"Test Persona 42\")\n        _create_test_persona(db_session, 99, \"Test Persona 99\")\n        db_session.commit()\n\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        update_guild_config(db_session, guild, enabled=True, default_persona_id=42)\n        db_session.commit()\n\n        channels = [\n            DiscordChannelView(\n                channel_id=111,\n                channel_name=\"general\",\n                channel_type=\"text\",\n                is_private=False,\n            ),\n        ]\n        created = bulk_create_channel_configs(db_session, guild.id, channels)\n        db_session.commit()\n\n        # Set channel persona override\n        updated = update_discord_channel_config(\n            db_session,\n            created[0],\n            channel_name=\"general\",\n            thread_only_mode=False,\n            require_bot_invocation=True,\n            enabled=True,\n            persona_override_id=99,  # Override!\n        )\n        db_session.commit()\n\n        assert updated.persona_override_id == 99\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        _delete_test_persona(db_session, 42)\n        _delete_test_persona(db_session, 99)\n        db_session.commit()\n\n    def test_no_persona_uses_default(self, db_session: Session) -> None:\n        \"\"\"Neither guild nor channel has persona - uses API default.\"\"\"\n        key = generate_discord_registration_key(\"tenant\")\n        guild = create_guild_config(db_session, registration_key=key)\n        # No persona set\n        db_session.commit()\n\n        config = get_guild_config_by_internal_id(db_session, guild.id)\n        assert config is not None\n        assert config.default_persona_id is None\n\n        # Cleanup\n        delete_guild_config(db_session, guild.id)\n        db_session.commit()\n\n\nclass TestServiceApiKeyAPI:\n    \"\"\"Tests for Discord service API key operations.\"\"\"\n\n    def test_create_service_api_key(self, db_session: Session) -> None:\n        \"\"\"Create service API key returns valid key.\"\"\"\n        # Clean up any existing key first\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n        api_key = get_or_create_discord_service_api_key(db_session, \"public\")\n        db_session.commit()\n\n        assert api_key is not None\n        assert len(api_key) > 0\n\n        # Verify key was stored in database\n        stored_key = get_discord_service_api_key(db_session)\n        assert stored_key is not None\n\n        # Cleanup\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n    def test_get_or_create_returns_existing(self, db_session: Session) -> None:\n        \"\"\"get_or_create_discord_service_api_key regenerates key if exists.\"\"\"\n        # Clean up any existing key first\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n        # Create first key\n        key1 = get_or_create_discord_service_api_key(db_session, \"public\")\n        db_session.commit()\n\n        # Call again - should regenerate (per implementation, it regenerates to update cache)\n        key2 = get_or_create_discord_service_api_key(db_session, \"public\")\n        db_session.commit()\n\n        # Keys should be different since it regenerates\n        assert key1 != key2\n\n        # But there should still be only one key in the database\n        stored_key = get_discord_service_api_key(db_session)\n        assert stored_key is not None\n\n        # Cleanup\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n    def test_delete_service_api_key(self, db_session: Session) -> None:\n        \"\"\"Delete service API key removes it from DB.\"\"\"\n        # Clean up any existing key first\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n        # Create a key\n        get_or_create_discord_service_api_key(db_session, \"public\")\n        db_session.commit()\n\n        # Delete it\n        deleted = delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n        assert deleted is True\n        assert get_discord_service_api_key(db_session) is None\n\n    def test_delete_service_api_key_not_found(self, db_session: Session) -> None:\n        \"\"\"Delete when no key exists returns False.\"\"\"\n        # Ensure no key exists\n        delete_discord_service_api_key(db_session)\n        db_session.commit()\n\n        deleted = delete_discord_service_api_key(db_session)\n        assert deleted is False\n\n\n# Pytest fixture for db_session\n@pytest.fixture\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create database session for tests.\"\"\"\n    from onyx.db.engine.sql_engine import get_session_with_current_tenant\n    from onyx.db.engine.sql_engine import SqlEngine\n    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n    SqlEngine.init_engine(pool_size=10, max_overflow=5)\n\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(\"public\")\n    try:\n        with get_session_with_current_tenant() as session:\n            yield session\n    finally:\n        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n"
  },
  {
    "path": "backend/tests/integration/tests/document_set/test_syncing.py",
    "content": "from onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import NUM_DOCS\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.document_set import DocumentSetManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef test_multiple_document_sets_syncing_same_connnector(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # create api key\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connector\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # seed documents\n    cc_pair_1.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_1,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    # Create document sets\n    doc_set_1 = DocumentSetManager.create(\n        cc_pair_ids=[cc_pair_1.id],\n        user_performing_action=admin_user,\n    )\n    doc_set_2 = DocumentSetManager.create(\n        cc_pair_ids=[cc_pair_1.id],\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.wait_for_sync(\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.verify(\n        document_set=doc_set_1,\n        user_performing_action=admin_user,\n    )\n    DocumentSetManager.verify(\n        document_set=doc_set_2,\n        user_performing_action=admin_user,\n    )\n\n    # make sure documents are as expected\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[doc_set_1.name, doc_set_2.name],\n        doc_creating_user=admin_user,\n    )\n\n\ndef test_removing_connector(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # create api key\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connectors\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n    cc_pair_2 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # seed documents\n    cc_pair_1.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_1,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    cc_pair_2.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_2,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    # Create document sets\n    doc_set_1 = DocumentSetManager.create(\n        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.wait_for_sync(\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.verify(\n        document_set=doc_set_1,\n        user_performing_action=admin_user,\n    )\n\n    # make sure cc_pair_1 docs are doc_set_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[doc_set_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # make sure cc_pair_2 docs are doc_set_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_set_names=[doc_set_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # remove cc_pair_2 from document set\n    doc_set_1.cc_pair_ids = [cc_pair_1.id]\n    DocumentSetManager.edit(\n        doc_set_1,\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.wait_for_sync(\n        user_performing_action=admin_user,\n    )\n\n    # make sure cc_pair_1 docs are doc_set_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        doc_set_names=[doc_set_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # make sure cc_pair_2 docs have no doc set\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        doc_set_names=[],\n        doc_creating_user=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/image_generation/test_image_generation_config.py",
    "content": "\"\"\"Integration tests for image generation config endpoints.\n\nTests cover CRUD operations for /admin/image-generation/config endpoints.\nThe /admin/image-generation/test endpoint is not tested as it makes real API calls.\n\nUses module-scoped fixtures to reset DB and create users once per module for faster execution.\n\"\"\"\n\nimport pytest\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.image_generation import (\n    ImageGenerationConfigManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.fixture(scope=\"module\")\ndef setup_image_generation_tests() -> tuple[DATestUser, DATestLLMProvider]:\n    \"\"\"Module-scoped fixture that runs once for all tests in this module.\n\n    - Resets DB once at the start of the module\n    - Creates admin user once\n    - Creates LLM provider once (for clone-mode test)\n    - Returns (admin_user, llm_provider) tuple for all tests to use\n    \"\"\"\n    reset_all()\n    admin_user = UserManager.create(name=\"admin_user\")\n    llm_provider = LLMProviderManager.create(user_performing_action=admin_user)\n    return admin_user, llm_provider\n\n\ndef test_create_image_generation_config(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test creating an image generation config with new credentials.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"test-openai-dalle\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-test-key-12345\",\n        is_default=False,\n        user_performing_action=admin_user,\n    )\n\n    assert config.image_provider_id == \"test-openai-dalle\"\n    assert config.model_name == \"dall-e-3\"\n    assert config.is_default is False\n\n    # Verify it exists in the list\n    ImageGenerationConfigManager.verify(\n        config=config,\n        user_performing_action=admin_user,\n    )\n\n\ndef test_create_image_generation_config_from_provider(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test creating an image generation config by cloning from an existing LLM provider.\"\"\"\n    admin_user, llm_provider = setup_image_generation_tests\n\n    # Create image generation config from the provider\n    config = ImageGenerationConfigManager.create_from_provider(\n        source_llm_provider_id=llm_provider.id,\n        image_provider_id=\"test-from-provider\",\n        model_name=\"gpt-image-1\",\n        is_default=True,\n        user_performing_action=admin_user,\n    )\n\n    assert config.image_provider_id == \"test-from-provider\"\n    assert config.model_name == \"gpt-image-1\"\n    assert config.is_default is True\n\n    # Verify it exists\n    ImageGenerationConfigManager.verify(\n        config=config,\n        user_performing_action=admin_user,\n    )\n\n\ndef test_create_duplicate_config_fails(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test that creating a config with an existing image_provider_id fails.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create first config\n    ImageGenerationConfigManager.create(\n        image_provider_id=\"duplicate-test-id\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-test-key-1\",\n        user_performing_action=admin_user,\n    )\n\n    # Try to create another with the same image_provider_id\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config\",\n        json={\n            \"image_provider_id\": \"duplicate-test-id\",\n            \"model_name\": \"gpt-image-1\",\n            \"provider\": \"openai\",\n            \"api_key\": \"sk-test-key-2\",\n        },\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 400\n    assert \"already exists\" in response.json()[\"detail\"]\n\n\ndef test_get_all_configs(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test getting all image generation configs.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create multiple configs\n    config1 = ImageGenerationConfigManager.create(\n        image_provider_id=\"config-1\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-key-1\",\n        user_performing_action=admin_user,\n    )\n    config2 = ImageGenerationConfigManager.create(\n        image_provider_id=\"config-2\",\n        model_name=\"gpt-image-1\",\n        provider=\"openai\",\n        api_key=\"sk-key-2\",\n        user_performing_action=admin_user,\n    )\n\n    # Get all configs\n    all_configs = ImageGenerationConfigManager.get_all(\n        user_performing_action=admin_user\n    )\n\n    assert len(all_configs) >= 2\n    config_ids = [c.image_provider_id for c in all_configs]\n    assert config1.image_provider_id in config_ids\n    assert config2.image_provider_id in config_ids\n\n\ndef test_get_config_credentials(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test getting credentials for an image generation config.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    test_api_key = \"sk-test-credentials-key-12345\"\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"credentials-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=test_api_key,\n        user_performing_action=admin_user,\n    )\n\n    # Get credentials\n    credentials = ImageGenerationConfigManager.get_credentials(\n        image_provider_id=config.image_provider_id,\n        user_performing_action=admin_user,\n    )\n\n    # Credentials should contain the masked API key (first 4 + **** + last 4)\n    assert credentials[\"api_key\"] == \"sk-t****2345\"\n    assert \"api_base\" in credentials\n    assert \"api_version\" in credentials\n    assert \"deployment_name\" in credentials\n\n\ndef test_get_credentials_not_found(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test getting credentials for a non-existent config returns 404.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/image-generation/config/non-existent-id/credentials\",\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n\n\ndef test_update_config_direct_key_entry(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test updating an image generation config with new direct credentials.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create initial config\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"update-direct-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-initial-key\",\n        user_performing_action=admin_user,\n    )\n\n    assert config.model_name == \"dall-e-3\"\n\n    # Update with new credentials and model\n    new_api_key = \"sk-updated-key-12345\"\n    updated_config = ImageGenerationConfigManager.update(\n        image_provider_id=config.image_provider_id,\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=new_api_key,\n        user_performing_action=admin_user,\n    )\n\n    assert updated_config.image_provider_id == config.image_provider_id\n    assert updated_config.model_name == \"dall-e-3\"\n\n    # Verify credentials were updated (masked: first 4 + **** + last 4)\n    credentials = ImageGenerationConfigManager.get_credentials(\n        image_provider_id=config.image_provider_id,\n        user_performing_action=admin_user,\n    )\n    assert credentials[\"api_key\"] == \"sk-u****2345\"\n\n\ndef test_update_config_clone_mode(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test updating an image generation config by cloning from an LLM provider.\"\"\"\n    admin_user, llm_provider = setup_image_generation_tests\n\n    # Create initial config with direct credentials\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"update-clone-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-initial-direct-key\",\n        user_performing_action=admin_user,\n    )\n\n    assert config.model_name == \"dall-e-3\"\n\n    # Update by cloning from LLM provider\n    updated_config = ImageGenerationConfigManager.update(\n        image_provider_id=config.image_provider_id,\n        model_name=\"gpt-image-1\",\n        source_llm_provider_id=llm_provider.id,\n        user_performing_action=admin_user,\n    )\n\n    assert updated_config.image_provider_id == config.image_provider_id\n    assert updated_config.model_name == \"gpt-image-1\"\n\n    # Verify config still exists and is accessible\n    ImageGenerationConfigManager.verify(\n        config=updated_config,\n        user_performing_action=admin_user,\n    )\n\n\ndef test_update_config_source_provider_not_found(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test that updating with non-existent source_llm_provider_id fails.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create initial config\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"update-bad-source-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-initial-key\",\n        user_performing_action=admin_user,\n    )\n\n    # Try to update with non-existent source provider\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/image-generation/config/{config.image_provider_id}\",\n        json={\n            \"model_name\": \"gpt-image-1\",\n            \"source_llm_provider_id\": 999999,\n        },\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n    assert \"not found\" in response.json()[\"detail\"]\n\n\ndef test_delete_config(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test deleting an image generation config.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create a config\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"delete-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-delete-key\",\n        user_performing_action=admin_user,\n    )\n\n    # Verify it exists\n    ImageGenerationConfigManager.verify(\n        config=config,\n        user_performing_action=admin_user,\n    )\n\n    # Delete it\n    ImageGenerationConfigManager.delete(\n        image_provider_id=config.image_provider_id,\n        user_performing_action=admin_user,\n    )\n\n    # Verify it's deleted\n    ImageGenerationConfigManager.verify(\n        config=config,\n        verify_deleted=True,\n        user_performing_action=admin_user,\n    )\n\n\ndef test_delete_config_not_found(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test deleting a non-existent config returns 404.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    response = requests.delete(\n        f\"{API_SERVER_URL}/admin/image-generation/config/non-existent-id\",\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n\n\ndef test_set_default_config(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test setting a config as the default.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create a config that is not default\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"default-test\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-test-key\",\n        is_default=False,\n        user_performing_action=admin_user,\n    )\n\n    assert config.is_default is False\n\n    # Set it as default\n    ImageGenerationConfigManager.set_default(\n        image_provider_id=config.image_provider_id,\n        user_performing_action=admin_user,\n    )\n\n    # Verify it's now default\n    all_configs = ImageGenerationConfigManager.get_all(\n        user_performing_action=admin_user\n    )\n    updated_config = next(\n        c for c in all_configs if c.image_provider_id == config.image_provider_id\n    )\n    assert updated_config.is_default is True\n\n\ndef test_set_default_clears_previous(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test that setting a new default clears the previous default.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Create first config as default\n    config1 = ImageGenerationConfigManager.create(\n        image_provider_id=\"first-default\",\n        model_name=\"dall-e-3\",\n        provider=\"openai\",\n        api_key=\"sk-key-1\",\n        is_default=True,\n        user_performing_action=admin_user,\n    )\n\n    # Create second config not as default\n    config2 = ImageGenerationConfigManager.create(\n        image_provider_id=\"second-default\",\n        model_name=\"gpt-image-1\",\n        provider=\"openai\",\n        api_key=\"sk-key-2\",\n        is_default=False,\n        user_performing_action=admin_user,\n    )\n\n    # Verify first is default\n    all_configs = ImageGenerationConfigManager.get_all(\n        user_performing_action=admin_user\n    )\n    first = next(\n        c for c in all_configs if c.image_provider_id == config1.image_provider_id\n    )\n    second = next(\n        c for c in all_configs if c.image_provider_id == config2.image_provider_id\n    )\n    assert first.is_default is True\n    assert second.is_default is False\n\n    # Set second as default\n    ImageGenerationConfigManager.set_default(\n        image_provider_id=config2.image_provider_id,\n        user_performing_action=admin_user,\n    )\n\n    # Verify second is now default and first is not\n    all_configs = ImageGenerationConfigManager.get_all(\n        user_performing_action=admin_user\n    )\n    first = next(\n        c for c in all_configs if c.image_provider_id == config1.image_provider_id\n    )\n    second = next(\n        c for c in all_configs if c.image_provider_id == config2.image_provider_id\n    )\n    assert first.is_default is False\n    assert second.is_default is True\n\n\ndef test_set_default_not_found(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test setting a non-existent config as default returns 404.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config/non-existent-id/default\",\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n\n\ndef test_create_config_missing_credentials(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test that creating a config without credentials fails.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # Try to create without api_key/provider or source_llm_provider_id\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config\",\n        json={\n            \"image_provider_id\": \"no-creds-test\",\n            \"model_name\": \"dall-e-3\",\n        },\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 400\n    assert \"No provider or source llm provided\" in response.json()[\"detail\"]\n\n\ndef test_create_config_source_provider_not_found(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"Test creating a config with non-existent source_llm_provider_id fails.\"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config\",\n        json={\n            \"image_provider_id\": \"bad-source-test\",\n            \"model_name\": \"dall-e-3\",\n            \"source_llm_provider_id\": 999999,  # Non-existent ID\n        },\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n    assert \"not found\" in response.json()[\"detail\"]\n"
  },
  {
    "path": "backend/tests/integration/tests/image_generation/test_image_generation_tool_visibility.py",
    "content": "\"\"\"Integration tests to check broader image generation config flow endpoints.\"\"\"\n\nimport pytest\n\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    ImageGenerationTool,\n)\nfrom tests.integration.common_utils.managers.image_generation import (\n    ImageGenerationConfigManager,\n)\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.tool import ToolManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\nIMAGE_GENERATION_TOOL_NAME = ImageGenerationTool.NAME\n\n\n@pytest.fixture(scope=\"module\")\ndef setup_image_generation_tests() -> tuple[DATestUser, DATestLLMProvider]:\n    \"\"\"Module-scoped fixture that runs once for all tests in this module.\n\n    - Resets DB once at the start of the module\n    - Creates admin user once\n    - Creates LLM provider once (for clone-mode test)\n    - Returns (admin_user, llm_provider) tuple for all tests to use\n    \"\"\"\n    reset_all()\n    admin_user = UserManager.create(name=\"admin_user\")\n    llm_provider = LLMProviderManager.create(user_performing_action=admin_user)\n    return admin_user, llm_provider\n\n\ndef test_vertex_creds_upload_image_tool_visibility(\n    setup_image_generation_tests: tuple[DATestUser, DATestLLMProvider],\n) -> None:\n    \"\"\"\n    Tests the following scenario:\n    1. No image model added so tool not visible\n    2. Vertex AI creds uploaded\n    3. Image model added so tool visible\n    \"\"\"\n    admin_user, _ = setup_image_generation_tests\n\n    # 1. Check the tools and check that image generation tool is not visible yet\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    assert not any(tool.name == IMAGE_GENERATION_TOOL_NAME for tool in tools)\n\n    # 2. Upload vertex ai credentials\n    config = ImageGenerationConfigManager.create(\n        image_provider_id=\"gemini-2.5-flash-image\",\n        model_name=\"gemini-2.5-flash-image\",\n        provider=\"vertex_ai\",\n        custom_config={\n            \"vertex_credentials\": {\n                \"type\": \"service_account\",\n                \"project_id\": \"test-project-id\",\n                \"private_key_id\": \"test-private-key-id\",\n                \"private_key\": \"test-private-key\",\n                # ... Other random fields that we dont care about\n            },\n            \"vertex_location\": \"test-location\",\n        },\n        user_performing_action=admin_user,\n        is_default=True,\n    )\n\n    assert config.image_provider_id == \"gemini-2.5-flash-image\"\n    assert config.model_name == \"gemini-2.5-flash-image\"\n\n    # 3. Check that the tool is visible\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    assert any(tool.name == IMAGE_GENERATION_TOOL_NAME for tool in tools)\n"
  },
  {
    "path": "backend/tests/integration/tests/image_indexing/test_indexing_images.py",
    "content": "import os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.connectors.models import InputType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.settings import SettingsManager\nfrom tests.integration.common_utils.test_models import DATestSettings\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\nFILE_NAME = \"Sample.pdf\"\nFILE_PATH = \"tests/integration/common_utils/test_files\"\nDOCX_FILE_NAME = \"three_images.docx\"\n\n\ndef test_image_indexing(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    vespa_client: vespa_fixture,\n) -> None:\n    os.makedirs(FILE_PATH, exist_ok=True)\n    test_file_path = os.path.join(FILE_PATH, FILE_NAME)\n\n    # Use FileManager to upload the test file\n    upload_response = FileManager.upload_file_for_connector(\n        file_path=test_file_path, file_name=FILE_NAME, user_performing_action=admin_user\n    )\n\n    LLMProviderManager.create(\n        name=\"test_llm\",\n        user_performing_action=admin_user,\n    )\n\n    SettingsManager.update_settings(\n        DATestSettings(\n            search_time_image_analysis_enabled=True,\n            image_extraction_and_analysis_enabled=True,\n        ),\n        user_performing_action=admin_user,\n    )\n\n    file_paths = upload_response.file_paths\n\n    if not file_paths:\n        pytest.fail(\"File upload failed - no file paths returned\")\n\n    # Create a dummy credential for the file connector\n    credential = CredentialManager.create(\n        source=DocumentSource.FILE,\n        credential_json={},\n        user_performing_action=admin_user,\n    )\n\n    # Create the connector\n    connector_name = f\"FileConnector-{int(datetime.now().timestamp())}\"\n    connector = ConnectorManager.create(\n        name=connector_name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": file_paths,\n            \"file_names\": [FILE_NAME],\n            \"zip_metadata_file_id\": None,\n        },\n        access_type=AccessType.PUBLIC,\n        groups=[],\n        user_performing_action=admin_user,\n    )\n\n    # Link the credential to the connector\n    cc_pair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.PUBLIC,\n        user_performing_action=admin_user,\n    )\n\n    # Explicitly run the connector to start indexing\n    CCPairManager.run_once(\n        cc_pair=cc_pair,\n        from_beginning=True,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=datetime.now(timezone.utc),\n        timeout=300,\n        user_performing_action=admin_user,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        # really gets the chunks from Vespa, which is why there are two;\n        # one for the raw text and one for the summarized image.\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n\n        assert len(documents) == 2\n        for document in documents:\n            if \"These  are  Johns  dogs\" in document.content:\n                assert document.image_file_id is None\n            else:\n                assert document.image_file_id is not None\n                assert file_paths[0] in document.image_file_id\n\n\ndef test_docx_image_indexing(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    vespa_client: vespa_fixture,\n) -> None:\n    \"\"\"Test that images from docx files are correctly extracted and indexed.\"\"\"\n    os.makedirs(FILE_PATH, exist_ok=True)\n    test_file_path = os.path.join(FILE_PATH, DOCX_FILE_NAME)\n\n    # Use FileManager to upload the test file\n    upload_response = FileManager.upload_file_for_connector(\n        file_path=test_file_path,\n        file_name=DOCX_FILE_NAME,\n        user_performing_action=admin_user,\n    )\n\n    LLMProviderManager.create(\n        name=\"test_llm_docx\",\n        user_performing_action=admin_user,\n    )\n\n    SettingsManager.update_settings(\n        DATestSettings(\n            search_time_image_analysis_enabled=True,\n            image_extraction_and_analysis_enabled=True,\n        ),\n        user_performing_action=admin_user,\n    )\n\n    file_paths = upload_response.file_paths\n\n    if not file_paths:\n        pytest.fail(\"File upload failed - no file paths returned\")\n\n    # Create a dummy credential for the file connector\n    credential = CredentialManager.create(\n        source=DocumentSource.FILE,\n        credential_json={},\n        user_performing_action=admin_user,\n    )\n\n    # Create the connector\n    connector_name = f\"DocxFileConnector-{int(datetime.now().timestamp())}\"\n    connector = ConnectorManager.create(\n        name=connector_name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": file_paths,\n            \"file_names\": [DOCX_FILE_NAME],\n            \"zip_metadata_file_id\": None,\n        },\n        access_type=AccessType.PUBLIC,\n        groups=[],\n        user_performing_action=admin_user,\n    )\n\n    # Link the credential to the connector\n    cc_pair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.PUBLIC,\n        user_performing_action=admin_user,\n    )\n\n    # Explicitly run the connector to start indexing\n    CCPairManager.run_once(\n        cc_pair=cc_pair,\n        from_beginning=True,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=datetime.now(timezone.utc),\n        timeout=300,\n        user_performing_action=admin_user,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        # Fetch documents from Vespa - expect text content plus 3 images\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n\n        # Should have documents for text content plus 3 images\n        assert (\n            len(documents) >= 3\n        ), f\"Expected at least 3 documents (3 images), got {len(documents)}\"\n\n        # Count documents with images\n        image_documents = [doc for doc in documents if doc.image_file_id is not None]\n        text_documents = [doc for doc in documents if doc.image_file_id is None]\n\n        assert (\n            len(image_documents) == 3\n        ), f\"Expected exactly 3 image documents, got {len(image_documents)}\"\n        assert (\n            len(text_documents) >= 1\n        ), f\"Expected at least 1 text document, got {len(text_documents)}\"\n\n        # Verify each image document has a valid image_file_id pointing to our uploaded file\n        for image_doc in image_documents:\n            assert file_paths[0] in (\n                image_doc.image_file_id or \"\"\n            ), f\"Image document should reference uploaded file: {image_doc.image_file_id}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/index_attempt/test_index_attempt_pagination.py",
    "content": "import time\nfrom datetime import datetime\n\nfrom onyx.db.models import IndexingStatus\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _verify_index_attempt_pagination(\n    cc_pair_id: int,\n    index_attempt_ids: list[int],\n    user_performing_action: DATestUser,\n    page_size: int = 5,\n) -> None:\n    retrieved_attempts: list[int] = []\n    last_time_started = None  # Track the last time_started seen\n\n    for i in range(0, len(index_attempt_ids), page_size):\n        paginated_result = IndexAttemptManager.get_index_attempt_page(\n            cc_pair_id=cc_pair_id,\n            page=(i // page_size),\n            page_size=page_size,\n            user_performing_action=user_performing_action,\n        )\n\n        # Verify that the total items is equal to the length of the index attempts list\n        assert paginated_result.total_items == len(index_attempt_ids)\n        # Verify that the number of items in the page is equal to the page size\n        assert len(paginated_result.items) == min(page_size, len(index_attempt_ids) - i)\n\n        # Verify time ordering within the page (descending order)\n        for attempt in paginated_result.items:\n            if last_time_started is not None:\n                assert attempt.time_started is not None\n                assert (\n                    attempt.time_started <= last_time_started\n                ), \"Index attempts not in descending time order\"\n            last_time_started = attempt.time_started\n\n        # Add the retrieved index attempts to the list of retrieved attempts\n        retrieved_attempts.extend([attempt.id for attempt in paginated_result.items])\n\n    # Create a set of all the expected index attempt IDs\n    all_expected_attempts = set(index_attempt_ids)\n    # Create a set of all the retrieved index attempt IDs\n    all_retrieved_attempts = set(retrieved_attempts)\n\n    # Verify that the set of retrieved attempts is equal to the set of expected attempts\n    assert all_expected_attempts == all_retrieved_attempts\n\n\ndef test_index_attempt_pagination(reset: None) -> None:  # noqa: ARG001\n    MAX_WAIT = 60\n    all_attempt_ids: list[int] = []\n\n    # Create an admin user to perform actions\n    user_performing_action: DATestUser = UserManager.create(\n        name=\"admin_performing_action\",\n    )\n\n    # Create a CC pair to attach index attempts to\n    cc_pair = CCPairManager.create_from_scratch(\n        user_performing_action=user_performing_action,\n    )\n\n    # Creating a CC pair will create an index attempt as well. wait for it.\n    start = time.monotonic()\n    while True:\n        paginated_result = IndexAttemptManager.get_index_attempt_page(\n            cc_pair_id=cc_pair.id,\n            page=0,\n            page_size=5,\n            user_performing_action=user_performing_action,\n        )\n\n        if paginated_result.total_items == 1:\n            all_attempt_ids.append(paginated_result.items[0].id)\n            print(\"Initial index attempt from cc_pair creation detected. Continuing...\")\n            break\n\n        elapsed = time.monotonic() - start\n        if elapsed > MAX_WAIT:\n            raise TimeoutError(\n                f\"Initial index attempt: Not detected within {MAX_WAIT} seconds.\"\n            )\n\n        print(\n            f\"Waiting for initial index attempt: elapsed={elapsed:.2f} timeout={MAX_WAIT}\"\n        )\n        time.sleep(1)\n\n    # Create 299 successful index attempts (for 300 total)\n    base_time = datetime.now()\n    generated_attempts = IndexAttemptManager.create_test_index_attempts(\n        num_attempts=299,\n        cc_pair_id=cc_pair.id,\n        status=IndexingStatus.SUCCESS,\n        base_time=base_time,\n    )\n\n    for attempt in generated_attempts:\n        all_attempt_ids.append(attempt.id)\n\n    # Verify basic pagination with different page sizes\n    print(\"Verifying basic pagination with page size 5\")\n    _verify_index_attempt_pagination(\n        cc_pair_id=cc_pair.id,\n        index_attempt_ids=all_attempt_ids,\n        page_size=5,\n        user_performing_action=user_performing_action,\n    )\n\n    # Test with a larger page size\n    print(\"Verifying pagination with page size 100\")\n    _verify_index_attempt_pagination(\n        cc_pair_id=cc_pair.id,\n        index_attempt_ids=all_attempt_ids,\n        page_size=100,\n        user_performing_action=user_performing_action,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/conftest.py",
    "content": "import httpx\nimport pytest\n\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT\n\n\n@pytest.fixture\ndef mock_server_client() -> httpx.Client:\n    print(\n        f\"Initializing mock server client with host: {MOCK_CONNECTOR_SERVER_HOST} and port: {MOCK_CONNECTOR_SERVER_PORT}\"\n    )\n    return httpx.Client(\n        base_url=f\"http://{MOCK_CONNECTOR_SERVER_HOST}:{MOCK_CONNECTOR_SERVER_PORT}\",\n        timeout=5.0,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/file_connector/test_file_connector_zip_metadata.py",
    "content": "import json\nimport os\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.connectors.models import InputType\nfrom onyx.db.document import get_documents_for_cc_pair\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\n# This is a placeholder - you'll need to create this zip file with actual test files\nTEST_FILES_BASE = \"tests/integration/tests/indexing/file_connector/test_files\"\nTEST_META_ZIP_PATH = f\"{TEST_FILES_BASE}/with_meta.zip\"\nTEST_NO_META_ZIP_PATH = f\"{TEST_FILES_BASE}/without_meta.zip\"\nTEST_METADATA_FILE = f\"{TEST_FILES_BASE}/.onyx_metadata.json\"\n\n\n@pytest.mark.parametrize(\n    \"zip_path, has_metadata\",\n    [\n        (TEST_META_ZIP_PATH, True),\n        (TEST_NO_META_ZIP_PATH, False),\n    ],\n)\ndef test_zip_metadata_handling(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    zip_path: str,\n    has_metadata: bool,\n) -> None:\n    before = datetime.now(timezone.utc)\n    # Create an admin user\n    admin_user: DATestUser = UserManager.create(\n        email=\"admin@example.com\",\n    )\n\n    # Upload the test zip file (simulate this happening from frontend)\n    upload_response = FileManager.upload_file_for_connector(\n        file_path=zip_path,\n        file_name=os.path.basename(zip_path),\n        user_performing_action=admin_user,\n        content_type=\"application/zip\",\n    )\n\n    file_paths = upload_response.file_paths\n    assert file_paths, \"File upload failed - no file paths returned\"\n    if has_metadata:\n        zip_metadata_file_id = upload_response.zip_metadata_file_id\n        assert zip_metadata_file_id, \"Metadata file ID should be present\"\n    else:\n        zip_metadata_file_id = None\n\n    # Create a dummy credential for the file connector\n    credential = CredentialManager.create(\n        source=DocumentSource.FILE,\n        credential_json={},\n        user_performing_action=admin_user,\n    )\n\n    # Create the connector\n    connector_name = f\"FileConnector-{int(datetime.now().timestamp())}\"\n    connector = ConnectorManager.create(\n        name=connector_name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": file_paths,\n            \"file_names\": [os.path.basename(file_path) for file_path in file_paths],\n            \"zip_metadata_file_id\": zip_metadata_file_id,\n        },\n        access_type=AccessType.PUBLIC,\n        groups=[],\n        user_performing_action=admin_user,\n    )\n\n    # Link the credential to the connector\n    cc_pair = CCPairManager.create(\n        credential_id=credential.id,\n        connector_id=connector.id,\n        access_type=AccessType.PUBLIC,\n        user_performing_action=admin_user,\n    )\n\n    # Run the connector to index the files\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair, after=before, user_performing_action=admin_user\n    )\n\n    # Get the indexed documents\n    with get_session_with_current_tenant() as db_session:\n        documents = get_documents_for_cc_pair(db_session, cc_pair.id)\n\n    # Expected metadata from the .onyx_metadata.json file\n    with open(TEST_METADATA_FILE, \"r\") as f:\n        expected_metadata = json.load(f)\n\n    # Verify each document has the correct metadata\n    for doc in documents:\n        filename = doc.semantic_id\n        if filename in expected_metadata:\n            expected = expected_metadata[filename]\n            assert (\n                doc.semantic_id == expected[\"display_name\"]\n            ), f\"Display name mismatch for {filename}\"\n            assert doc.link == expected[\"link\"], f\"Link mismatch for {filename}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/file_connector/test_files/.onyx_metadata.json",
    "content": "[\n    {\n        \"filename\": \"sample1.txt\",\n        \"link\": \"https://www.google.com\",\n        \"file_display_name\": \"Basically Google\",\n        \"primary_owners\": [\"evan@onyx.app\"],\n        \"status\": \"bingle bongle\"\n    },\n    {\n        \"filename\": \"sample2.txt\",\n        \"link\": \"https://www.youtube.com\",\n        \"file_display_name\": \"Pretty much youtube\",\n        \"primary_owners\": [\"chris@onyx.app\"],\n        \"status\": \"not bingle bongle\"\n    }\n]"
  },
  {
    "path": "backend/tests/integration/tests/indexing/file_connector/test_files/sample1.txt",
    "content": "The following contains some excerpts from our docs.\n\nThe File Connector indexes user uploaded files. Currently supports .txt, .pdf, .docx, .pptx, .xlsx, .csv, .md, .mdx, .conf, .log, .json, .tsv, .xml, .yml, .yaml, .eml, and .epub files. \nYou can also upload a .zip containing these files - If there are other file types in the zip, the other file types are ignored. \nThere is also an optional metadata line that supports links, document owners, and time updated as metadata for Onyx’s retrieval and AI Answer.\n\nThe metadata line should be placed at the very top of the file and can take one of two formats:\n\n#ONYX_METADATA={\"link\": \"<LINK>\"}\n<!-- ONYX_METADATA={\"link\": \"<LINK>\"} -->\nWhere ONYX_METADATA= is followed by a json. The valid json keys are:\n\nlink\nprimary_owners\nsecondary_owners\ndoc_updated_at\nfile_display_name\nYou can also include arbitrary key/value pairs which will be understood as “tags”. \nThese tags can then be used in the UI as a filter if you want to constrain your search / conversation to only documents with certain tag(s) attached"
  },
  {
    "path": "backend/tests/integration/tests/indexing/file_connector/test_files/sample2.txt",
    "content": "Hello, I hope you're having a wonderful day!"
  },
  {
    "path": "backend/tests/integration/tests/indexing/test_checkpointing.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport httpx\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.mock_connector.connector import MockConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import EntityFailure\nfrom onyx.connectors.models import InputType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import IndexingStatus\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.test_document_utils import create_test_document\nfrom tests.integration.common_utils.test_document_utils import (\n    create_test_document_failure,\n)\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef test_mock_connector_basic_flow(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that the mock connector can successfully process documents and failures\"\"\"\n    # Set up mock server behavior\n    doc_uuid = uuid.uuid4()\n    test_doc = create_test_document(doc_id=f\"test-doc-{doc_uuid}\")\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [test_doc.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(\n                    mode=\"json\"\n                ),\n                \"failures\": [],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    # create CC Pair + index attempt\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-connector-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n    )\n\n    # wait for index attempt to start\n    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # wait for index attempt to finish\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # validate status\n    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_index_attempt.status == IndexingStatus.SUCCESS\n\n    # Verify results\n    with get_session_with_current_tenant() as db_session:\n        chunks = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(chunks) == 1\n    assert chunks[0].id == test_doc.id\n\n    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert len(errors) == 0\n\n\ndef test_mock_connector_with_failures(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that the mock connector processes both successes and failures properly.\"\"\"\n    doc1 = create_test_document()\n    doc2 = create_test_document()\n    doc2_failure = create_test_document_failure(doc_id=doc2.id)\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [doc1.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(\n                    mode=\"json\"\n                ),\n                \"failures\": [doc2_failure.model_dump(mode=\"json\")],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    # Create a CC Pair for the mock connector\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-connector-failure-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n    )\n\n    # Wait for the index attempt to start and then complete\n    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # validate status\n    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_index_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS\n\n    # Verify results: doc1 should be indexed and doc2 should have an error entry\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 1\n    assert documents[0].id == doc1.id\n\n    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert len(errors) == 1\n    error = errors[0]\n    assert error.failure_message == doc2_failure.failure_message\n    assert error.document_id == doc2.id\n\n\ndef test_mock_connector_failure_recovery(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that a failed document can be successfully indexed in a subsequent attempt\n    while maintaining previously successful documents.\"\"\"\n    # Create test documents and failure\n    doc1 = create_test_document()\n    doc2 = create_test_document()\n    doc2_failure = create_test_document_failure(doc_id=doc2.id)\n    entity_id = \"test-entity-id\"\n    entity_failure_msg = \"Simulated unhandled error\"\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [doc1.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(\n                    mode=\"json\"\n                ),\n                \"failures\": [\n                    doc2_failure.model_dump(mode=\"json\"),\n                    ConnectorFailure(\n                        failed_entity=EntityFailure(\n                            entity_id=entity_id,\n                            missed_time_range=(\n                                datetime.now(timezone.utc) - timedelta(days=1),\n                                datetime.now(timezone.utc),\n                            ),\n                        ),\n                        failure_message=entity_failure_msg,\n                    ).model_dump(mode=\"json\"),\n                ],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    # Create CC Pair and run initial indexing attempt\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-connector-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n    )\n\n    # Wait for first index attempt to complete\n    initial_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=initial_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # validate status\n    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=initial_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_index_attempt.status == IndexingStatus.COMPLETED_WITH_ERRORS\n\n    # Verify initial state: doc1 indexed, doc2 failed\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 1\n    assert documents[0].id == doc1.id\n\n    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert len(errors) == 2\n    error_doc2 = next(error for error in errors if error.document_id == doc2.id)\n    assert error_doc2.failure_message == doc2_failure.failure_message\n    assert not error_doc2.is_resolved\n\n    error_entity = next(error for error in errors if error.entity_id == entity_id)\n    assert error_entity.failure_message == entity_failure_msg\n    assert not error_entity.is_resolved\n\n    # Update mock server to return success for both documents\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [\n                    doc1.model_dump(mode=\"json\"),\n                    doc2.model_dump(mode=\"json\"),\n                ],\n                \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(\n                    mode=\"json\"\n                ),\n                \"failures\": [],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    # Trigger another indexing attempt\n    # NOTE: must be from beginning to handle the entity failure\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        index_attempts_to_ignore=[initial_index_attempt.id],\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    finished_second_index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_second_index_attempt.status == IndexingStatus.SUCCESS\n\n    # Verify both documents are now indexed\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 2\n    document_ids = {doc.id for doc in documents}\n    assert doc2.id in document_ids\n    assert doc1.id in document_ids\n\n    # Verify original failures were marked as resolved\n    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert len(errors) == 2\n    error_doc2 = next(error for error in errors if error.document_id == doc2.id)\n    error_entity = next(error for error in errors if error.entity_id == entity_id)\n\n    assert error_doc2.is_resolved\n    assert error_entity.is_resolved\n\n\ndef test_mock_connector_checkpoint_recovery(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that checkpointing works correctly when an unhandled exception occurs\n    and that subsequent runs pick up from the last successful checkpoint.\"\"\"\n    # Create test documents\n    docs_batch_1 = [create_test_document() for _ in range(100)]\n    doc2 = create_test_document()\n    doc3 = create_test_document()\n\n    # Set up mock server behavior for initial run:\n    # - First yield: 100 docs with checkpoint1\n    # - Second yield: doc2 with checkpoint2\n    # - Third yield: unhandled exception\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [doc.model_dump(mode=\"json\") for doc in docs_batch_1],\n                \"checkpoint\": MockConnectorCheckpoint(\n                    has_more=True, last_document_id=docs_batch_1[-1].id\n                ).model_dump(mode=\"json\"),\n                \"failures\": [],\n            },\n            {\n                \"documents\": [doc2.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(\n                    has_more=True, last_document_id=doc2.id\n                ).model_dump(mode=\"json\"),\n                \"failures\": [],\n            },\n            {\n                \"documents\": [],\n                # should never hit this, unhandled exception happens first\n                \"checkpoint\": MockConnectorCheckpoint(\n                    has_more=False, last_document_id=doc2.id\n                ).model_dump(mode=\"json\"),\n                \"failures\": [],\n                \"unhandled_exception\": \"Simulated unhandled error\",\n            },\n        ],\n    )\n    assert response.status_code == 200\n\n    # Create CC Pair and run initial indexing attempt\n    # Note: Setting refresh_freq to allow manual retrigger after failure\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-connector-checkpoint-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n        refresh_freq=60 * 60,  # 1 hour\n    )\n\n    # Wait for first index attempt to complete\n    initial_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=initial_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # validate status\n    finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=initial_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_index_attempt.status == IndexingStatus.FAILED\n\n    # Pause the connector immediately to prevent check_for_indexing from\n    # creating automatic retry attempts while we reset the mock server.\n    # Without this, the INITIAL_INDEXING status causes immediate retries\n    # that would consume (or fail against) the mock server before we can\n    # set up the recovery behavior.\n    CCPairManager.pause_cc_pair(cc_pair, user_performing_action=admin_user)\n\n    # Collect all index attempt IDs created so far (the initial one plus\n    # any automatic retries that may have started before the pause took effect).\n    all_prior_attempt_ids: list[int] = []\n    index_attempts_page = IndexAttemptManager.get_index_attempt_page(\n        cc_pair_id=cc_pair.id,\n        page=0,\n        page_size=100,\n        user_performing_action=admin_user,\n    )\n    all_prior_attempt_ids = [ia.id for ia in index_attempts_page.items]\n\n    # Verify initial state: both docs should be indexed\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    # This is no longer guaranteed because docfetching and docprocessing are decoupled!\n    # Some batches may not be processed when docfetching fails, but they should still stick around\n    # in the filestore and be ready for the next run.\n    # assert len(documents) == 101  # 100 docs from first batch + doc2\n    # document_ids = {doc.id for doc in documents}\n    # assert doc2.id in document_ids\n    # assert all(doc.id in document_ids for doc in docs_batch_1)\n\n    # Get the checkpoints that were sent to the mock server\n    response = mock_server_client.get(\"/get-checkpoints\")\n    assert response.status_code == 200\n    initial_checkpoints = response.json()\n\n    # Verify we got the expected checkpoints in order\n    assert len(initial_checkpoints) == 3\n    assert initial_checkpoints[0] == {\n        \"has_more\": True,\n        \"last_document_id\": None,\n    }  # Initial empty checkpoint\n    assert initial_checkpoints[1] == {\n        \"has_more\": True,\n        \"last_document_id\": docs_batch_1[-1].id,\n    }\n    assert initial_checkpoints[2] == {\"has_more\": True, \"last_document_id\": doc2.id}\n\n    # Reset the mock server for the next run\n    response = mock_server_client.post(\"/reset\")\n    assert response.status_code == 200\n\n    # Set up mock server behavior for recovery run - should succeed fully this time\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [doc3.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(\n                    has_more=False, last_document_id=doc3.id\n                ).model_dump(mode=\"json\"),\n                \"failures\": [],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    # Set the manual indexing trigger, then unpause to allow the recovery run.\n    CCPairManager.run_once(\n        cc_pair, from_beginning=False, user_performing_action=admin_user\n    )\n    CCPairManager.unpause_cc_pair(cc_pair, user_performing_action=admin_user)\n    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        index_attempts_to_ignore=all_prior_attempt_ids,\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # validate status\n    finished_recovery_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_recovery_attempt.status == IndexingStatus.SUCCESS\n\n    # Verify results\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 102  # 100 docs from first batch + doc2 + doc3\n    document_ids = {doc.id for doc in documents}\n    assert doc3.id in document_ids\n    assert doc2.id in document_ids\n    assert all(doc.id in document_ids for doc in docs_batch_1)\n\n    # Get the checkpoints from the recovery run\n    response = mock_server_client.get(\"/get-checkpoints\")\n    assert response.status_code == 200\n    recovery_checkpoints = response.json()\n\n    # Verify the recovery run started from the last successful checkpoint\n    assert len(recovery_checkpoints) == 1\n    assert recovery_checkpoints[0] == {\"has_more\": True, \"last_document_id\": doc2.id}\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/test_initial_permission_sync.py",
    "content": "import os\nimport uuid\nfrom datetime import datetime\nfrom datetime import timezone\n\nimport httpx\nimport pytest\nfrom sqlalchemy import select\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.mock_connector.connector import EXTERNAL_USER_EMAILS\nfrom onyx.connectors.mock_connector.connector import EXTERNAL_USER_GROUP_IDS\nfrom onyx.connectors.mock_connector.connector import MockConnectorCheckpoint\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import InputType\nfrom onyx.db.document import get_documents_by_ids\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import AccessType\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.db.enums import PermissionSyncStatus\nfrom onyx.db.models import DocPermissionSyncAttempt\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.test_document_utils import create_test_document\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef _setup_mock_connector(\n    mock_server_client: httpx.Client,\n    admin_user: DATestUser,\n) -> tuple[DATestCCPair, Document]:\n    \"\"\"Common setup: create a test doc, configure mock server, create cc_pair, wait for indexing.\"\"\"\n    doc_uuid = uuid.uuid4()\n    test_doc = create_test_document(doc_id=f\"test-doc-{doc_uuid}\")\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[\n            {\n                \"documents\": [test_doc.model_dump(mode=\"json\")],\n                \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(\n                    mode=\"json\"\n                ),\n                \"failures\": [],\n            }\n        ],\n    )\n    assert response.status_code == 200\n\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-connector-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        access_type=AccessType.SYNC,\n        user_performing_action=admin_user,\n    )\n\n    index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    finished = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished.status == IndexingStatus.SUCCESS\n    return cc_pair, test_doc\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission sync is enterprise only\",\n)\ndef test_mock_connector_initial_permission_sync(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that the MockConnector fetches and sets permissions during initial indexing\n    when AccessType.SYNC is used.\"\"\"\n\n    cc_pair, test_doc = _setup_mock_connector(mock_server_client, admin_user)\n\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 1\n    assert documents[0].id == test_doc.id\n\n    errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert len(errors) == 0\n\n    with get_session_with_current_tenant() as db_session:\n        db_docs = get_documents_by_ids(\n            db_session=db_session,\n            document_ids=[test_doc.id],\n        )\n        assert len(db_docs) == 1\n        db_doc = db_docs[0]\n\n        assert db_doc.external_user_emails is not None\n        assert db_doc.external_user_group_ids is not None\n        assert set(db_doc.external_user_emails) == EXTERNAL_USER_EMAILS\n        assert set(db_doc.external_user_group_ids) == EXTERNAL_USER_GROUP_IDS\n        assert db_doc.is_public is False\n\n    # After initial indexing, the beat task detects last_time_perm_sync is None\n    # and triggers a doc permission sync. Explicitly trigger it to avoid\n    # waiting for the 30s beat interval.\n    before = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n        should_wait_for_vespa_sync=False,\n    )\n\n    updated_cc_pair_info = CCPairManager.get_single(\n        cc_pair.id, user_performing_action=admin_user\n    )\n    assert updated_cc_pair_info is not None\n    assert updated_cc_pair_info.last_full_permission_sync is not None\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission sync attempt tracking is enterprise only\",\n)\ndef test_permission_sync_attempt_tracking_integration(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that permission sync attempts are properly tracked during real sync workflows.\"\"\"\n\n    cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)\n\n    before = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n        should_wait_for_vespa_sync=False,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        attempt = db_session.execute(\n            select(DocPermissionSyncAttempt).where(\n                DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair.id\n            )\n        ).scalar_one()\n\n        assert attempt.status in [\n            PermissionSyncStatus.SUCCESS,\n            PermissionSyncStatus.COMPLETED_WITH_ERRORS,\n            PermissionSyncStatus.FAILED,\n        ]\n        assert attempt.total_docs_synced is not None and attempt.total_docs_synced >= 0\n        assert (\n            attempt.docs_with_permission_errors is not None\n            and attempt.docs_with_permission_errors >= 0\n        )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission sync attempt tracking is enterprise only\",\n)\ndef test_permission_sync_attempt_status_success(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that permission sync attempts are marked as SUCCESS when sync completes without errors.\"\"\"\n\n    cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)\n\n    before = datetime.now(timezone.utc)\n    CCPairManager.sync(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    CCPairManager.wait_for_sync(\n        cc_pair=cc_pair,\n        after=before,\n        number_of_updated_docs=1,\n        user_performing_action=admin_user,\n        should_wait_for_group_sync=False,\n        should_wait_for_vespa_sync=False,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        attempt = db_session.execute(\n            select(DocPermissionSyncAttempt).where(\n                DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair.id\n            )\n        ).scalar_one()\n\n        assert attempt.status == PermissionSyncStatus.SUCCESS\n        assert attempt.total_docs_synced is not None and attempt.total_docs_synced >= 0\n        assert (\n            attempt.docs_with_permission_errors is not None\n            and attempt.docs_with_permission_errors == 0\n        )\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/test_polling.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport httpx\n\nfrom onyx.configs.app_configs import POLL_CONNECTOR_OFFSET\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.mock_connector.connector import MockConnectorCheckpoint\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import IndexingStatus\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.test_document_utils import create_test_document\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _setup_mock_connector(\n    mock_server_client: httpx.Client,\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    test_doc = create_test_document()\n    successful_response = {\n        \"documents\": [test_doc.model_dump(mode=\"json\")],\n        \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(mode=\"json\"),\n        \"failures\": [],\n    }\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[successful_response, successful_response],  # For two attempts\n    )\n    assert response.status_code == 200\n\n\ndef test_poll_connector_time_ranges(\n    mock_server_client: httpx.Client,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"\n    Tests that poll connectors correctly set their poll_range_start and poll_range_end\n    across multiple indexing attempts.\n    \"\"\"\n    # Set up mock server behavior - a simple successful response\n    _setup_mock_connector(mock_server_client, admin_user)\n\n    # Create a CC Pair for the mock connector with POLL input type\n    cc_pair_name = f\"mock-poll-time-range-{uuid.uuid4()}\"\n    cc_pair = CCPairManager.create_from_scratch(\n        name=cc_pair_name,\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n        refresh_freq=3,  # refresh often to ensure the second attempt actually runs\n    )\n\n    # --- First Indexing Attempt ---\n    time_before_first_attempt = datetime.now(timezone.utc)\n    first_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=first_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    time_after_first_attempt = datetime.now(timezone.utc)\n\n    # Fetch and validate the first attempt\n    completed_first_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=first_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert completed_first_attempt.status == IndexingStatus.SUCCESS\n    assert completed_first_attempt.poll_range_start is not None\n    assert completed_first_attempt.poll_range_end is not None\n\n    # For the first run (no prior successful attempts), poll_range_start should be epoch (0)\n    expected_first_start = datetime.fromtimestamp(0, tz=timezone.utc)\n    assert completed_first_attempt.poll_range_start == expected_first_start\n\n    # `poll_range_end` should be sometime in between the time the attempt\n    # started and the time it finished.\n    # no way to have a more precise assertion here since the `poll_range_end`\n    # can really be set anytime in that range and be \"correct\"\n    assert (\n        time_before_first_attempt\n        <= completed_first_attempt.poll_range_end\n        <= time_after_first_attempt\n    )\n\n    first_attempt_poll_end = completed_first_attempt.poll_range_end\n\n    # --- Second Indexing Attempt ---\n    # Trigger another run manually (since automatic refresh might be too slow for test)\n    # Ensure there's a slight delay so the poll window moves\n    # In a real scenario, the scheduler would wait for the refresh frequency.\n    # Here we manually trigger a new run.\n    _setup_mock_connector(mock_server_client, admin_user)\n    CCPairManager.run_once(\n        cc_pair, from_beginning=False, user_performing_action=admin_user\n    )\n\n    time_before_second_attempt = datetime.now(timezone.utc)\n    second_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        index_attempts_to_ignore=[first_index_attempt.id],\n        user_performing_action=admin_user,\n    )\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=second_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    time_after_second_attempt = datetime.now(timezone.utc)\n\n    # Fetch and validate the second attempt\n    completed_second_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=second_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert completed_second_attempt.status == IndexingStatus.SUCCESS\n    assert completed_second_attempt.poll_range_start is not None\n    assert completed_second_attempt.poll_range_end is not None\n\n    # For the second run, poll_range_start should be the previous successful attempt's\n    # poll_range_end minus the POLL_CONNECTOR_OFFSET\n    expected_second_start = first_attempt_poll_end - timedelta(\n        minutes=POLL_CONNECTOR_OFFSET\n    )\n    assert completed_second_attempt.poll_range_start == expected_second_start\n\n    # `poll_range_end` should be sometime in between the time the attempt\n    # started and the time it finished.\n    # again, no way to have a more precise assertion here since the `poll_range_end`\n    # can really be set anytime in that range and be \"correct\"\n    assert (\n        time_before_second_attempt\n        <= completed_second_attempt.poll_range_end\n        <= time_after_second_attempt\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/indexing/test_repeated_error_state.py",
    "content": "import time\nimport uuid\n\nimport httpx\n\nfrom onyx.background.celery.tasks.docprocessing.utils import (\n    NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE,\n)\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.mock_connector.connector import MockConnectorCheckpoint\nfrom onyx.connectors.models import InputType\nfrom onyx.db.connector_credential_pair import get_connector_credential_pair_from_id\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import IndexingStatus\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_HOST\nfrom tests.integration.common_utils.constants import MOCK_CONNECTOR_SERVER_PORT\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.index_attempt import IndexAttemptManager\nfrom tests.integration.common_utils.test_document_utils import create_test_document\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef test_repeated_error_state_detection_and_recovery(\n    mock_server_client: httpx.Client,\n    vespa_client: vespa_fixture,\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that a connector is marked as in a repeated error state after\n    NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE consecutive failures, and\n    that it recovers after a successful indexing.\n\n    This test ensures we properly wait for the required number of indexing attempts\n    to fail before checking that the connector is in a repeated error state.\"\"\"\n\n    # Create test document for successful response\n    test_doc = create_test_document()\n\n    # First, set up the mock server to consistently fail\n    error_response = {\n        \"documents\": [],\n        \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(mode=\"json\"),\n        \"failures\": [],\n        \"unhandled_exception\": \"Simulated unhandled error for testing repeated errors\",\n    }\n\n    # Create a list of failure responses with at least the same length\n    # as NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE\n    failure_behaviors = [error_response] * (\n        5 * NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE\n    )\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=failure_behaviors,\n    )\n    assert response.status_code == 200\n\n    # Create a new CC pair for testing\n    cc_pair = CCPairManager.create_from_scratch(\n        name=f\"mock-repeated-error-{uuid.uuid4()}\",\n        source=DocumentSource.MOCK_CONNECTOR,\n        input_type=InputType.POLL,\n        connector_specific_config={\n            \"mock_server_host\": MOCK_CONNECTOR_SERVER_HOST,\n            \"mock_server_port\": MOCK_CONNECTOR_SERVER_PORT,\n        },\n        user_performing_action=admin_user,\n        refresh_freq=60 * 60,  # a very long time\n    )\n\n    # Wait for the required number of failed indexing attempts\n    # This shouldn't take long, since we keep retrying while we haven't\n    # succeeded yet\n    start_time = time.monotonic()\n    while True:\n        index_attempts_page = IndexAttemptManager.get_index_attempt_page(\n            cc_pair_id=cc_pair.id,\n            page=0,\n            page_size=100,\n            user_performing_action=admin_user,\n        )\n        index_attempts = [\n            ia\n            for ia in index_attempts_page.items\n            if ia.status and ia.status.is_terminal()\n        ]\n        if len(index_attempts) >= NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE:\n            break\n\n        if time.monotonic() - start_time > 180:\n            raise TimeoutError(\n                \"Did not get required number of failed attempts within 180 seconds\"\n            )\n\n        # make sure that we don't mark the connector as in repeated error state\n        # before we have the required number of failed attempts\n        with get_session_with_current_tenant() as db_session:\n            cc_pair_obj = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair.id,\n            )\n            assert cc_pair_obj is not None\n        assert not cc_pair_obj.in_repeated_error_state\n\n        time.sleep(2)\n\n    # Verify we have the correct number of failed attempts\n    assert len(index_attempts) == NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE\n    for attempt in index_attempts:\n        assert attempt.status == IndexingStatus.FAILED\n\n    # Check if the connector is in a repeated error state\n    start_time = time.monotonic()\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            cc_pair_obj = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair.id,\n            )\n            assert cc_pair_obj is not None\n            if cc_pair_obj.in_repeated_error_state:\n                # Pausing only happens for cloud deployments and the IT don't run with\n                # that auth type :(\n                # if AUTH_TYPE == AuthType.CLOUD:\n                #     assert cc_pair_obj.status == ConnectorCredentialPairStatus.PAUSED, (\n                #         f\"Expected status to be PAUSED when in repeated error state, \"\n                #         f\"but got {cc_pair_obj.status}\"\n                #     )\n                break\n\n        if time.monotonic() - start_time > 90:\n            assert False, \"CC pair did not enter repeated error state within 90 seconds\"\n\n        time.sleep(2)\n\n    # Reset the mock server state\n    response = mock_server_client.post(\"/reset\")\n    assert response.status_code == 200\n\n    # Now set up the mock server to succeed\n    success_response = {\n        \"documents\": [test_doc.model_dump(mode=\"json\")],\n        \"checkpoint\": MockConnectorCheckpoint(has_more=False).model_dump(mode=\"json\"),\n        \"failures\": [],\n    }\n\n    response = mock_server_client.post(\n        \"/set-behavior\",\n        json=[success_response],\n    )\n    assert response.status_code == 200\n\n    # Set the manual indexing trigger first (while paused), then unpause.\n    # This ensures the trigger is set before CHECK_FOR_INDEXING runs, which will\n    # prevent the connector from being re-paused when repeated error state is detected.\n    CCPairManager.run_once(\n        cc_pair, from_beginning=True, user_performing_action=admin_user\n    )\n    CCPairManager.unpause_cc_pair(cc_pair, user_performing_action=admin_user)\n\n    recovery_index_attempt = IndexAttemptManager.wait_for_index_attempt_start(\n        cc_pair_id=cc_pair.id,\n        index_attempts_to_ignore=[index_attempt.id for index_attempt in index_attempts],\n        user_performing_action=admin_user,\n    )\n\n    IndexAttemptManager.wait_for_index_attempt_completion(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n\n    # Validate the indexing succeeded\n    finished_recovery_attempt = IndexAttemptManager.get_index_attempt_by_id(\n        index_attempt_id=recovery_index_attempt.id,\n        cc_pair_id=cc_pair.id,\n        user_performing_action=admin_user,\n    )\n    assert finished_recovery_attempt.status == IndexingStatus.SUCCESS\n\n    # Verify the document was indexed\n    with get_session_with_current_tenant() as db_session:\n        documents = DocumentManager.fetch_documents_for_cc_pair(\n            cc_pair_id=cc_pair.id,\n            db_session=db_session,\n            vespa_client=vespa_client,\n        )\n    assert len(documents) == 1\n    assert documents[0].id == test_doc.id\n\n    # Verify the CC pair is no longer in a repeated error state\n    start = time.monotonic()\n    while True:\n        with get_session_with_current_tenant() as db_session:\n            cc_pair_obj = get_connector_credential_pair_from_id(\n                db_session=db_session,\n                cc_pair_id=cc_pair.id,\n            )\n            assert cc_pair_obj is not None\n            if not cc_pair_obj.in_repeated_error_state:\n                break\n\n        elapsed = time.monotonic() - start\n        if elapsed > 30:\n            raise TimeoutError(\n                \"CC pair did not exit repeated error state within 30 seconds\"\n            )\n\n        print(\n            f\"Waiting for CC pair to exit repeated error state. elapsed={elapsed:.2f}\"\n        )\n        time.sleep(1)\n"
  },
  {
    "path": "backend/tests/integration/tests/ingestion/test_ingestion_api.py",
    "content": "from onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Document\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import IngestionManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\ndef test_ingestion_api_crud(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    \"\"\"Test create, list, and delete via the ingestion API.\"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@onyx.app\")\n    cc_pair = CCPairManager.create_from_scratch(\n        name=\"Ingestion-API-Test\",\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": [],\n            \"file_names\": [],\n            \"zip_metadata_file_id\": None,\n        },\n        user_performing_action=admin_user,\n    )\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    api_key.headers.update(admin_user.headers)\n\n    # CREATE\n    doc = IngestionManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=\"Test document\",\n        document_id=\"test-doc-1\",\n        api_key=api_key,\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        doc_db = db_session.query(Document).filter(Document.id == doc.id).first()\n        assert doc_db is not None\n        assert doc_db.from_ingestion_api is True\n\n    vespa_docs = vespa_client.get_documents_by_id([doc.id])[\"documents\"]\n    assert len(vespa_docs) == 1\n\n    # LIST\n    docs_list = IngestionManager.list_all_ingestion_docs(api_key=api_key)\n    assert any(d[\"document_id\"] == doc.id for d in docs_list)\n\n    # DELETE\n    IngestionManager.delete(document_id=doc.id, api_key=api_key)\n\n    with get_session_with_current_tenant() as db_session:\n        doc_db = db_session.query(Document).filter(Document.id == doc.id).first()\n        assert doc_db is None\n\n    vespa_docs = vespa_client.get_documents_by_id([doc.id])[\"documents\"]\n    assert len(vespa_docs) == 0\n"
  },
  {
    "path": "backend/tests/integration/tests/kg/test_kg_api.py",
    "content": "import json\nfrom datetime import datetime\nfrom http import HTTPStatus\n\nimport pytest\nimport requests\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.connector import create_connector\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.kg_config import get_kg_config_settings\nfrom onyx.db.kg_config import set_kg_config_settings\nfrom onyx.db.models import Connector\nfrom onyx.server.documents.models import ConnectorBase\nfrom onyx.server.kg.models import DisableKGConfigRequest\nfrom onyx.server.kg.models import EnableKGConfigRequest\nfrom onyx.server.kg.models import EntityType\nfrom onyx.server.kg.models import KGConfig as KGConfigAPIModel\nfrom onyx.server.kg.models import SourceAndEntityTypeView\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.reset import reset_all\n\n\n@pytest.fixture(autouse=True)\ndef reset_for_test() -> None:\n    \"\"\"Reset all data before each test.\"\"\"\n    reset_all()\n\n    kg_config_settings = get_kg_config_settings()\n    kg_config_settings.KG_EXPOSED = True\n    set_kg_config_settings(kg_config_settings)\n\n\n@pytest.fixture()\ndef connectors() -> None:\n    \"\"\"Set up connectors for tests.\"\"\"\n    with get_session_with_current_tenant() as db_session:\n        # Create Salesforce connector\n        connector_data = ConnectorBase(\n            name=\"Salesforce Test\",\n            source=DocumentSource.SALESFORCE,\n            input_type=InputType.POLL,\n            connector_specific_config={},\n            refresh_freq=None,\n            indexing_start=None,\n            prune_freq=None,\n        )\n        create_connector(db_session, connector_data)\n\n\ndef test_kg_enable_and_disable(connectors: None) -> None:  # noqa: ARG001\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Enable KG\n    # Need to `.model_dump_json()` and then `json.loads`.\n    # Seems redundant, but this is because simply calling `json=data.model_dump()`\n    # returns in a \"datetime cannot be JSON serialized error\".\n    req1 = json.loads(\n        EnableKGConfigRequest(\n            vendor=\"Test\",\n            vendor_domains=[\"test.app\", \"tester.ai\"],\n            ignore_domains=[],\n            coverage_start=datetime(1970, 1, 1, 0, 0),\n        ).model_dump_json()\n    )\n    res1 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n        json=req1,\n    )\n    assert (\n        res1.status_code == HTTPStatus.OK\n    ), f\"Error response: {res1.status_code} - {res1.text}\"\n\n    # Check KG\n    res2 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res2.status_code == HTTPStatus.OK\n    ), f\"Error response: {res2.status_code} - {res2.text}\"\n\n    actual_config = KGConfigAPIModel.model_validate_json(res2.text)\n    assert actual_config == KGConfigAPIModel(\n        enabled=True,\n        vendor=\"Test\",\n        vendor_domains=[\"test.app\", \"tester.ai\"],\n        ignore_domains=[],\n        coverage_start=datetime(1970, 1, 1, 0, 0),\n    )\n\n    # Disable KG\n    req3 = DisableKGConfigRequest().model_dump()\n    res3 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n        json=req3,\n    )\n    assert (\n        res3.status_code == HTTPStatus.OK\n    ), f\"Error response: {res3.status_code} - {res3.text}\"\n\n    # Check KG\n    res4 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res4.status_code == HTTPStatus.OK\n    ), f\"Error response: {res4.status_code} - {res4.text}\"\n\n    actual_config = KGConfigAPIModel.model_validate_json(res4.text)\n    assert actual_config == KGConfigAPIModel(\n        enabled=False,\n        vendor=\"Test\",\n        vendor_domains=[\"test.app\", \"tester.ai\"],\n        ignore_domains=[],\n        coverage_start=datetime(1970, 1, 1, 0, 0),\n    )\n\n\ndef test_kg_enable_with_missing_fields_should_fail() -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    req = json.loads(\n        EnableKGConfigRequest(\n            vendor=\"Test\",\n            vendor_domains=[],\n            ignore_domains=[],\n            coverage_start=datetime(1970, 1, 1, 0, 0),\n        ).model_dump_json()\n    )\n    res = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n        json=req,\n    )\n    assert res.status_code == HTTPStatus.BAD_REQUEST\n\n\ndef test_update_kg_entity_types(connectors: None) -> None:  # noqa: ARG001\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Enable kg and populate default entity types\n    req1 = json.loads(\n        EnableKGConfigRequest(\n            vendor=\"Test\",\n            vendor_domains=[\"test.app\", \"tester.ai\"],\n            ignore_domains=[],\n            coverage_start=datetime(1970, 1, 1, 0, 0),\n        ).model_dump_json()\n    )\n    res1 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n        json=req1,\n    )\n    assert (\n        res1.status_code == HTTPStatus.OK\n    ), f\"Error response: {res1.status_code} - {res1.text}\"\n\n    # Get old entity types\n    res2 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res2.status_code == HTTPStatus.OK\n    ), f\"Error response: {res2.status_code} - {res2.text}\"\n    res2_parsed = SourceAndEntityTypeView.model_validate(res2.json())\n\n    # Update entity types\n    req3 = [\n        EntityType(\n            name=\"ACCOUNT\",\n            description=\"Test.\",\n            active=True,\n            grounded_source_name=\"salesforce\",\n        ).model_dump(),\n        EntityType(\n            name=\"OPPORTUNITY\",\n            description=\"Test 2.\",\n            active=False,\n        ).model_dump(),\n    ]\n    res3 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n        json=req3,\n    )\n    assert (\n        res3.status_code == HTTPStatus.OK\n    ), f\"Error response: {res3.status_code} - {res3.text}\"\n\n    # Check connector kg_processing is enabled\n    with get_session_with_current_tenant() as db_session:\n        connector = (\n            db_session.query(Connector)\n            .filter(Connector.name == \"Salesforce Test\")\n            .scalar()\n        )\n        assert connector.kg_processing_enabled\n\n    # Check entity types looks correct\n    res4 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res4.status_code == HTTPStatus.OK\n    ), f\"Error response: {res4.status_code} - {res4.text}\"\n    res4_parsed = SourceAndEntityTypeView.model_validate(res4.json())\n\n    def to_entity_type_map(map: dict[str, list[EntityType]]) -> dict[str, EntityType]:\n        return {\n            entity_type.name: entity_type\n            for entity_types in map.values()\n            for entity_type in entity_types\n        }\n\n    expected_entity_types = to_entity_type_map(map=res2_parsed.entity_types)\n    new_entity_types = to_entity_type_map(map=res4_parsed.entity_types)\n\n    # These are the updates.\n    # We're just manually updating them.\n    expected_entity_types[\"ACCOUNT\"].active = True\n    expected_entity_types[\"ACCOUNT\"].description = \"Test.\"\n    expected_entity_types[\"OPPORTUNITY\"].active = False\n    expected_entity_types[\"OPPORTUNITY\"].description = \"Test 2.\"\n\n    assert new_entity_types == expected_entity_types\n\n\ndef test_update_invalid_kg_entity_type_should_do_nothing(\n    connectors: None,  # noqa: ARG001\n) -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Enable kg and populate default entity types\n    req1 = json.loads(\n        EnableKGConfigRequest(\n            vendor=\"Test\",\n            vendor_domains=[\"test.app\", \"tester.ai\"],\n            ignore_domains=[],\n            coverage_start=datetime(1970, 1, 1, 0, 0),\n        ).model_dump_json()\n    )\n    res1 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/config\",\n        headers=admin_user.headers,\n        json=req1,\n    )\n    assert (\n        res1.status_code == HTTPStatus.OK\n    ), f\"Error response: {res1.status_code} - {res1.text}\"\n\n    # Get old entity types\n    res2 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res2.status_code == HTTPStatus.OK\n    ), f\"Error response: {res2.status_code} - {res2.text}\"\n\n    # Update entity types with non-existent entity type\n    req3 = [\n        EntityType(name=\"NON-EXISTENT\", description=\"Test.\", active=False).model_dump(),\n    ]\n    res3 = requests.put(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n        json=req3,\n    )\n    assert (\n        res3.status_code == HTTPStatus.OK\n    ), f\"Error response: {res3.status_code} - {res3.text}\"\n\n    # Get entity types after the update attempt\n    res4 = requests.get(\n        f\"{API_SERVER_URL}/admin/kg/entity-types\",\n        headers=admin_user.headers,\n    )\n    assert (\n        res4.status_code == HTTPStatus.OK\n    ), f\"Error response: {res4.status_code} - {res4.text}\"\n\n    # Should be the same as before since non-existent entity type should be ignored\n    assert res2.json() == res4.json()\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_auto_update/test_auto_llm_update.py",
    "content": "\"\"\"\nIntegration tests for Auto LLM model update feature.\n\nThese tests verify that LLM providers in Auto mode get their models\nautomatically synced from the GitHub config via the celery background task.\n\nEnvironment variables for testing:\n- AUTO_LLM_UPDATE_INTERVAL_SECONDS: Set to a low value (e.g., 5) for faster tests\n- AUTO_LLM_CONFIG_URL: Points to the config file to sync from\n\nThe celery beat scheduler will run the check_for_auto_llm_updates task\nat the configured interval.\n\"\"\"\n\nimport time\n\nimport pytest\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n# How long to wait for the celery task to run and sync models\n# This should be longer than AUTO_LLM_UPDATE_INTERVAL_SECONDS\nMAX_WAIT_TIME_SECONDS = 120\nPOLL_INTERVAL_SECONDS = 5\n\n\ndef _create_provider_with_api(\n    admin_user: DATestUser,\n    name: str,\n    provider_type: str,\n    default_model: str,\n    is_auto_mode: bool,\n    model_configurations: list[dict] | None = None,\n) -> dict:\n    \"\"\"Create an LLM provider via the API.\"\"\"\n    if model_configurations is None:\n        model_configurations = [{\"name\": default_model, \"is_visible\": True}]\n\n    llm_provider_data = {\n        \"name\": name,\n        \"provider\": provider_type,\n        \"api_key\": \"test-api-key-for-auto-mode-testing\",\n        \"api_base\": None,\n        \"api_version\": None,\n        \"custom_config\": None,\n        \"is_public\": True,\n        \"is_auto_mode\": is_auto_mode,\n        \"groups\": [],\n        \"personas\": [],\n        \"model_configurations\": model_configurations,\n        \"api_key_changed\": True,\n    }\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        json=llm_provider_data,\n        headers=admin_user.headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _get_provider_by_id(admin_user: DATestUser, provider_id: int) -> dict:\n    \"\"\"Get a provider by ID via the API.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    response.raise_for_status()\n    for provider in response.json()[\"providers\"]:\n        if provider[\"id\"] == provider_id:\n            return provider\n    raise ValueError(f\"Provider with id {provider_id} not found\")\n\n\ndef get_auto_config(admin_user: DATestUser) -> dict | None:\n    \"\"\"Get the current auto config from the API.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/auto-config\",\n        headers=admin_user.headers,\n    )\n    if response.status_code == 502:\n        return None\n    response.raise_for_status()\n    return response.json()\n\n\ndef wait_for_model_sync(\n    admin_user: DATestUser,\n    provider_id: int,\n    expected_model_names: set[str],\n    max_wait_seconds: int = MAX_WAIT_TIME_SECONDS,\n) -> dict:\n    \"\"\"\n    Wait for the provider's models to match the expected set.\n\n    Returns the provider data once models match, or raises an assertion error.\n    \"\"\"\n    start_time = time.time()\n    last_provider: dict | None = None\n\n    while time.time() - start_time < max_wait_seconds:\n        provider = _get_provider_by_id(admin_user, provider_id)\n        last_provider = provider\n        current_models = {m[\"name\"] for m in provider[\"model_configurations\"]}\n\n        # Check if we have all expected models\n        if expected_model_names.issubset(current_models):\n            return provider\n\n        print(\n            f\"Waiting for model sync... Current: {current_models}, Expected: {expected_model_names}\"\n        )\n        time.sleep(POLL_INTERVAL_SECONDS)\n\n    # Timeout - return last state for debugging\n    current_models = (\n        {m[\"name\"] for m in last_provider[\"model_configurations\"]}\n        if last_provider\n        else set()\n    )\n    raise AssertionError(\n        f\"Model sync timed out after {max_wait_seconds}s. Current models: {current_models}, Expected: {expected_model_names}\"\n    )\n\n\ndef test_auto_mode_provider_gets_synced_from_github_config(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"\n    Test that a provider in Auto mode gets its models synced from GitHub config.\n\n    This test:\n    1. Fetches the current GitHub config to know what models to expect\n    2. Creates an OpenAI provider in Auto mode with outdated/minimal models\n    3. Waits for the celery task to sync models from GitHub\n    4. Verifies the models match the GitHub config\n    \"\"\"\n    # First, get the GitHub config to know what models we should expect\n    github_config = get_auto_config(admin_user)\n    if github_config is None:\n        pytest.fail(\"GitHub config not found\")\n\n    # Get expected models for OpenAI from the config\n    if \"openai\" not in github_config.get(\"providers\", {}):\n        pytest.fail(\"OpenAI not in GitHub config\")\n\n    openai_config = github_config[\"providers\"][\"openai\"]\n\n    # Build expected model names from default_model + additional_visible_models\n    expected_models: set[str] = set()\n\n    # Add default model\n    default_model = openai_config.get(\"default_model\", {})\n    if isinstance(default_model, dict):\n        expected_models.add(default_model[\"name\"])\n    elif isinstance(default_model, str):\n        expected_models.add(default_model)\n\n    # Add additional visible models\n    for model in openai_config.get(\"additional_visible_models\", []):\n        if isinstance(model, dict):\n            expected_models.add(model[\"name\"])\n        elif isinstance(model, str):\n            expected_models.add(model)\n\n    print(f\"Expected models from GitHub config: {expected_models}\")\n\n    # Create an OpenAI provider in Auto mode with a single outdated model\n    provider = _create_provider_with_api(\n        admin_user=admin_user,\n        name=\"test-auto-sync-openai\",\n        provider_type=\"openai\",\n        default_model=\"outdated-model-name\",\n        is_auto_mode=True,\n        model_configurations=[\n            {\"name\": \"outdated-model-name\", \"is_visible\": True},\n        ],\n    )\n\n    assert provider[\"is_auto_mode\"] is True\n    print(f\"Created provider {provider['id']} in Auto mode\")\n\n    # Wait for the celery task to sync models\n    # The task runs at AUTO_LLM_UPDATE_INTERVAL_SECONDS interval\n    synced_provider = wait_for_model_sync(\n        admin_user=admin_user,\n        provider_id=provider[\"id\"],\n        expected_model_names=expected_models,\n    )\n\n    # Verify the models were synced\n    synced_model_configs = synced_provider[\"model_configurations\"]\n    synced_model_names = {m[\"name\"] for m in synced_model_configs}\n    print(f\"Synced models: {synced_model_names}\")\n\n    assert expected_models.issubset(\n        synced_model_names\n    ), f\"Expected models {expected_models} not found in synced models {synced_model_names}\"\n\n    # Verify the outdated model still exists but is not visible\n    # (Auto mode marks removed models as not visible, it doesn't delete them)\n    outdated_model = next(\n        (m for m in synced_model_configs if m[\"name\"] == \"outdated-model-name\"),\n        None,\n    )\n    assert (\n        outdated_model is not None\n    ), \"Outdated model should still exist after sync (marked invisible, not deleted)\"\n    assert not outdated_model[\n        \"is_visible\"\n    ], \"Outdated model should not be visible after sync\"\n\n\ndef test_manual_mode_provider_not_affected_by_auto_sync(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"\n    Test that a provider in Manual mode is NOT affected by auto sync.\n\n    This test:\n    1. Creates an OpenAI provider in Manual mode with custom models\n    2. Waits for a period longer than the sync interval\n    3. Verifies the models remain unchanged\n    \"\"\"\n    custom_model = \"my-custom-finetuned-model\"\n\n    # Create a provider in Manual mode\n    provider = _create_provider_with_api(\n        admin_user=admin_user,\n        name=\"test-manual-mode-unchanged\",\n        provider_type=\"openai\",\n        default_model=custom_model,\n        is_auto_mode=False,  # Manual mode\n        model_configurations=[\n            {\"name\": custom_model, \"is_visible\": True},\n            {\"name\": \"another-custom-model\", \"is_visible\": True},\n        ],\n    )\n\n    assert provider[\"is_auto_mode\"] is False\n    initial_models = {m[\"name\"] for m in provider[\"model_configurations\"]}\n    print(f\"Created manual mode provider with models: {initial_models}\")\n\n    # Wait for longer than the sync interval\n    wait_time = 15  # Should be longer than AUTO_LLM_UPDATE_INTERVAL_SECONDS\n    print(f\"Waiting {wait_time}s to ensure sync task runs...\")\n    time.sleep(wait_time)\n\n    # Verify models are unchanged\n    updated_provider = _get_provider_by_id(admin_user, provider[\"id\"])\n    current_models = {m[\"name\"] for m in updated_provider[\"model_configurations\"]}\n\n    assert (\n        current_models == initial_models\n    ), f\"Manual mode provider models should not change. Initial: {initial_models}, Current: {current_models}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_provider/test_llm_provider.py",
    "content": "import uuid\nfrom typing import Any\n\nimport pytest\nimport requests\nfrom requests.models import Response\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.model_name_parser import parse_litellm_model_name\nfrom onyx.llm.utils import get_max_input_tokens\nfrom onyx.llm.utils import litellm_thinks_model_supports_image_input\nfrom onyx.llm.utils import model_is_reasoning_model\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_provider_by_id(admin_user: DATestUser, provider_id: str) -> dict | None:\n    \"\"\"Utility function to fetch an LLM provider by ID\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n    return next((p for p in providers if p[\"id\"] == provider_id), None)\n\n\ndef assert_response_is_equivalent(\n    admin_user: DATestUser,\n    response: Response,\n    model_configurations: list[ModelConfigurationUpsertRequest],\n    api_key: str | None = None,\n) -> None:\n    assert response.status_code == 200\n    created_provider = response.json()\n\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n\n    assert provider_data[\"personas\"] == []\n\n    def fill_max_input_tokens_and_supports_image_input(\n        req: ModelConfigurationUpsertRequest,\n    ) -> dict[str, Any]:\n        provider_name = created_provider[\"provider\"]\n        # Match how ModelConfigurationView.from_model builds the key for parsing\n        model_key = req.name\n        if provider_name and not model_key.startswith(f\"{provider_name}/\"):\n            model_key = f\"{provider_name}/{model_key}\"\n        parsed = parse_litellm_model_name(model_key)\n\n        # Include region in display name for Bedrock cross-region models (matches from_model)\n        display_name = (\n            f\"{parsed.display_name} ({parsed.region})\"\n            if parsed.region\n            else parsed.display_name\n        )\n\n        filled_with_max_input_tokens = ModelConfigurationUpsertRequest(\n            name=req.name,\n            is_visible=req.is_visible,\n            max_input_tokens=req.max_input_tokens\n            or get_max_input_tokens(model_name=req.name, model_provider=provider_name),\n        )\n        return {\n            **filled_with_max_input_tokens.model_dump(),\n            \"supports_image_input\": litellm_thinks_model_supports_image_input(\n                req.name, provider_name\n            ),\n            \"supports_reasoning\": model_is_reasoning_model(req.name, provider_name),\n            \"display_name\": display_name,\n            \"provider_display_name\": parsed.provider_display_name,\n            \"vendor\": parsed.vendor,\n            \"region\": parsed.region,\n            \"version\": parsed.version,\n        }\n\n    # Compare model configurations by name (order-independent)\n    actual_by_name = {\n        config[\"name\"]: config for config in provider_data[\"model_configurations\"]\n    }\n    expected_by_name = {\n        config.name: fill_max_input_tokens_and_supports_image_input(config)\n        for config in model_configurations\n    }\n\n    assert set(actual_by_name.keys()) == set(\n        expected_by_name.keys()\n    ), f\"Model names don't match. Actual: {set(actual_by_name.keys())}, Expected: {set(expected_by_name.keys())}\"\n\n    for name in actual_by_name:\n        actual_config = actual_by_name[name]\n        expected_config = expected_by_name[name]\n        assert (\n            actual_config == expected_config\n        ), f\"Config mismatch for {name}:\\nActual: {actual_config}\\nExpected: {expected_config}\"\n\n    # test that returned key is sanitized\n    if api_key:\n        assert provider_data[\"api_key\"] == api_key\n\n\n# Test creating an LLM Provider with some various model-configurations.\n@pytest.mark.parametrize(\n    \"model_configurations, expected\",\n    [\n        # Test the case in which a basic model-configuration is passed.\n        (\n            [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                )\n            ],\n            [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                )\n            ],\n        ),\n        # Test the case in which multiple model-configuration are passed.\n        (\n            [\n                ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n            ],\n            [\n                ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n            ],\n        ),\n        # Test the case in which duplicate model-configuration are passed.\n        (\n            [ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True)] * 4,\n            [ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True)],\n        ),\n    ],\n)\ndef test_create_llm_provider(\n    reset: None,  # noqa: ARG001\n    model_configurations: list[ModelConfigurationUpsertRequest],\n    expected: list[ModelConfigurationUpsertRequest],\n) -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": str(uuid.uuid4()),\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                model_configuration.model_dump()\n                for model_configuration in model_configurations\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n\n    assert_response_is_equivalent(\n        admin_user,\n        response,\n        expected,\n        \"sk-0****0000\",\n    )\n\n\n# Test creating a new LLM Provider with some given model-configurations, then performing some arbitrary update on it.\n@pytest.mark.parametrize(\n    \"initial, initial_expected, updated, updated_expected\",\n    [\n        # Test the case in which a basic model-configuration is passed, but then it's updated to have *NO* max-input-tokens.\n        (\n            (\n                \"gpt-4\",\n                [\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                    )\n                ],\n            ),\n            [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                )\n            ],\n            (\n                \"gpt-4\",\n                [ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True)],\n            ),\n            [ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True)],\n        ),\n        # Test the case where we insert 2 model-configurations, and then in the update the first,\n        # we update one and delete the second.\n        (\n            (\n                \"gpt-4\",\n                [\n                    ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4o\", is_visible=True, max_input_tokens=4096\n                    ),\n                ],\n            ),\n            [\n                ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o\", is_visible=True, max_input_tokens=4096\n                ),\n            ],\n            (\n                \"gpt-4\",\n                [\n                    ModelConfigurationUpsertRequest(\n                        name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                    )\n                ],\n            ),\n            [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n                )\n            ],\n        ),\n    ],\n)\ndef test_update_model_configurations(\n    reset: None,  # noqa: ARG001\n    initial: tuple[str, list[ModelConfigurationUpsertRequest]],\n    initial_expected: list[ModelConfigurationUpsertRequest],\n    updated: tuple[str, list[ModelConfigurationUpsertRequest]],\n    updated_expected: list[ModelConfigurationUpsertRequest],\n) -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    default_model_name, model_configurations = initial\n    updated_default_model_name, updated_model_configurations = updated\n\n    name = str(uuid.uuid4())\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                model_configuration.dict()\n                for model_configuration in model_configurations\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n            \"api_key_changed\": True,\n        },\n    )\n    created_provider = response.json()\n    assert_response_is_equivalent(\n        admin_user,\n        response,\n        initial_expected,\n    )\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        headers=admin_user.headers,\n        json={\n            \"provider_id\": created_provider[\"id\"],\n            \"model_name\": updated_default_model_name,\n        },\n    )\n    assert response.status_code == 200\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n        json={\n            \"id\": created_provider[\"id\"],\n            \"name\": name,\n            \"provider\": created_provider[\"provider\"],\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [\n                model_configuration.dict()\n                for model_configuration in updated_model_configurations\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert_response_is_equivalent(\n        admin_user,\n        response,\n        updated_expected,\n        \"sk-0****0000\",\n    )\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        headers=admin_user.headers,\n        json={\n            \"provider_id\": created_provider[\"id\"],\n            \"model_name\": updated_default_model_name,\n        },\n    )\n    assert response.status_code == 200\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n        json={\n            \"id\": created_provider[\"id\"],\n            \"name\": name,\n            \"provider\": created_provider[\"provider\"],\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [\n                model_configuration.dict()\n                for model_configuration in updated_model_configurations\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n            \"api_key_changed\": True,\n        },\n    )\n    assert_response_is_equivalent(\n        admin_user,\n        response,\n        updated_expected,\n        \"sk-0****0001\",\n    )\n\n\n@pytest.mark.parametrize(\n    \"model_configurations\",\n    [\n        [\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4\", is_visible=True, max_input_tokens=4096\n            )\n        ],\n        [\n            ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n            ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n        ],\n    ],\n)\ndef test_delete_llm_provider(\n    reset: None,  # noqa: ARG001\n    model_configurations: list[ModelConfigurationUpsertRequest],\n) -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a provider\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"test-provider-delete\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                model_configuration.dict()\n                for model_configuration in model_configurations\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    created_provider = response.json()\n    assert response.status_code == 200\n\n    # Delete the provider\n    response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n    # Verify provider is deleted by checking it's not in the list\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is None\n\n\ndef test_delete_default_llm_provider_rejected(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Deleting the default LLM provider should return 400.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a provider\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"test-provider-default-delete\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o-mini\", is_visible=True\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert response.status_code == 200\n    created_provider = response.json()\n\n    # Set this provider as the default\n    set_default_response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        headers=admin_user.headers,\n        json={\n            \"provider_id\": created_provider[\"id\"],\n            \"model_name\": \"gpt-4o-mini\",\n        },\n    )\n    assert set_default_response.status_code == 200\n\n    # Attempt to delete the default provider — should be rejected\n    delete_response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}\",\n        headers=admin_user.headers,\n    )\n    assert delete_response.status_code == 400\n    assert \"Cannot delete the default LLM provider\" in delete_response.json()[\"detail\"]\n\n    # Verify provider still exists\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n\n\ndef test_delete_non_default_llm_provider_with_default_set(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Deleting a non-default provider should succeed even when a default is set.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create two providers\n    response_default = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"default-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o-mini\", is_visible=True\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert response_default.status_code == 200\n    default_provider = response_default.json()\n\n    response_other = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"other-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o\", is_visible=True\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert response_other.status_code == 200\n    other_provider = response_other.json()\n\n    # Set the first provider as default\n    set_default_response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        headers=admin_user.headers,\n        json={\n            \"provider_id\": default_provider[\"id\"],\n            \"model_name\": \"gpt-4o-mini\",\n        },\n    )\n    assert set_default_response.status_code == 200\n\n    # Delete the non-default provider — should succeed\n    delete_response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{other_provider['id']}\",\n        headers=admin_user.headers,\n    )\n    assert delete_response.status_code == 200\n\n    # Verify the non-default provider is gone\n    provider_data = _get_provider_by_id(admin_user, other_provider[\"id\"])\n    assert provider_data is None\n\n    # Verify the default provider still exists\n    default_data = _get_provider_by_id(admin_user, default_provider[\"id\"])\n    assert default_data is not None\n\n\ndef test_force_delete_default_llm_provider(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Force-deleting the default LLM provider should succeed.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a provider\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"test-provider-force-delete\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o-mini\", is_visible=True\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert response.status_code == 200\n    created_provider = response.json()\n\n    # Set this provider as the default\n    set_default_response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        headers=admin_user.headers,\n        json={\n            \"provider_id\": created_provider[\"id\"],\n            \"model_name\": \"gpt-4o-mini\",\n        },\n    )\n    assert set_default_response.status_code == 200\n\n    # Attempt to delete without force — should be rejected\n    delete_response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}\",\n        headers=admin_user.headers,\n    )\n    assert delete_response.status_code == 400\n\n    # Force delete — should succeed\n    force_delete_response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{created_provider['id']}?force=true\",\n        headers=admin_user.headers,\n    )\n    assert force_delete_response.status_code == 200\n\n    # Verify provider is gone\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is None\n\n\ndef test_delete_default_vision_provider_clears_vision_default(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Deleting the default vision provider should succeed and clear the vision default.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a text provider and set it as default (so we have a default text provider)\n    text_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"text-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o-mini\", is_visible=True\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert text_response.status_code == 200\n    text_provider = text_response.json()\n    _set_default_provider(admin_user, text_provider[\"id\"], \"gpt-4o-mini\")\n\n    # Create a vision provider and set it as default vision\n    vision_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"vision-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"model_configurations\": [\n                ModelConfigurationUpsertRequest(\n                    name=\"gpt-4o\",\n                    is_visible=True,\n                    supports_image_input=True,\n                ).model_dump()\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n        },\n    )\n    assert vision_response.status_code == 200\n    vision_provider = vision_response.json()\n    _set_default_vision_provider(admin_user, vision_provider[\"id\"], \"gpt-4o\")\n\n    # Verify vision default is set\n    data = _get_providers_admin(admin_user)\n    assert data is not None\n    _, _, vision_default = _unpack_data(data)\n    assert vision_default is not None\n    assert vision_default[\"provider_id\"] == vision_provider[\"id\"]\n\n    # Delete the vision provider — should succeed (only text default is protected)\n    delete_response = requests.delete(\n        f\"{API_SERVER_URL}/admin/llm/provider/{vision_provider['id']}\",\n        headers=admin_user.headers,\n    )\n    assert delete_response.status_code == 200\n\n    # Verify the vision provider is gone\n    provider_data = _get_provider_by_id(admin_user, vision_provider[\"id\"])\n    assert provider_data is None\n\n    # Verify there is no default vision provider\n    data = _get_providers_admin(admin_user)\n    assert data is not None\n    _, text_default, vision_default = _unpack_data(data)\n    assert vision_default is None\n\n    # Verify the text default is still intact\n    assert text_default is not None\n    assert text_default[\"provider_id\"] == text_provider[\"id\"]\n\n\ndef test_duplicate_provider_name_rejected(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Creating a provider with a name that already exists should return 400.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n    provider_name = f\"unique-provider-{uuid.uuid4()}\"\n\n    base_payload = {\n        \"name\": provider_name,\n        \"provider\": LlmProviderNames.OPENAI,\n        \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n        \"model_configurations\": [\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4o-mini\", is_visible=True\n            ).model_dump()\n        ],\n        \"is_public\": True,\n        \"groups\": [],\n    }\n\n    # First creation succeeds\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json=base_payload,\n    )\n    assert response.status_code == 200\n\n    # Second creation with the same name is rejected\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json=base_payload,\n    )\n    assert response.status_code == 409\n    assert \"already exists\" in response.json()[\"detail\"]\n\n\ndef test_rename_provider_rejected(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Renaming a provider is not currently supported and should return 400.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    create_payload = {\n        \"name\": f\"original-name-{uuid.uuid4()}\",\n        \"provider\": LlmProviderNames.OPENAI,\n        \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n        \"model_configurations\": [\n            ModelConfigurationUpsertRequest(\n                name=\"gpt-4o-mini\", is_visible=True\n            ).model_dump()\n        ],\n        \"is_public\": True,\n        \"groups\": [],\n    }\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json=create_payload,\n    )\n    assert response.status_code == 200\n    provider_id = response.json()[\"id\"]\n\n    # Attempt to rename — should be rejected\n    new_name = f\"renamed-provider-{uuid.uuid4()}\"\n    update_payload = {**create_payload, \"id\": provider_id, \"name\": new_name}\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json=update_payload,\n    )\n    assert response.status_code == 400\n    assert \"not currently supported\" in response.json()[\"detail\"]\n\n    # Verify no duplicate was created — only the original provider should exist\n    provider = _get_provider_by_id(admin_user, provider_id)\n    assert provider is not None\n    assert provider[\"name\"] == create_payload[\"name\"]\n\n    all_response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert all_response.status_code == 200\n    all_names = [p[\"name\"] for p in all_response.json()[\"providers\"]]\n    assert new_name not in all_names\n\n\ndef test_model_visibility_preserved_on_edit(reset: None) -> None:  # noqa: ARG001\n    \"\"\"\n    Test that model visibility flags are correctly preserved when editing an LLM provider.\n\n    This test verifies the fix for the bug where editing a provider with specific visible models\n    would incorrectly map visibility flags when the provider's model list differs from the\n    descriptor's default model list.\n\n    Scenario:\n    1. Create a provider with 3 models, 2 visible\n    2. Edit the provider to change visibility (make all 3 visible)\n    3. Verify all 3 models are now visible\n    4. Edit again to make only 1 visible\n    5. Verify only 1 is visible\n    \"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Initial model configurations: 2 visible, 1 hidden\n    model_configs = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=True,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o-mini\",\n            is_visible=True,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4-turbo\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n    ]\n\n    # Create the provider\n    create_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": \"test-visibility-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [config.dict() for config in model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response.status_code == 200\n    created_provider = create_response.json()\n\n    # Verify initial state: 2 visible models\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n    visible_models = [\n        model for model in provider_data[\"model_configurations\"] if model[\"is_visible\"]\n    ]\n    assert len(visible_models) == 2\n    assert any(m[\"name\"] == \"gpt-4o\" for m in visible_models)\n    assert any(m[\"name\"] == \"gpt-4o-mini\" for m in visible_models)\n\n    # Edit 1: Make all 3 models visible\n    edit_configs_all_visible = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=True,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o-mini\",\n            is_visible=True,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4-turbo\",\n            is_visible=True,  # Now visible\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n    ]\n\n    edit_response_1 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": created_provider[\"id\"],\n            \"name\": \"test-visibility-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                config.dict() for config in edit_configs_all_visible\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert edit_response_1.status_code == 200\n\n    # Verify all 3 models are now visible\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n    visible_models = [\n        model for model in provider_data[\"model_configurations\"] if model[\"is_visible\"]\n    ]\n    assert len(visible_models) == 3\n\n    # Edit 2: Make only 1 model visible\n    edit_configs_one_visible = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=True,  # Only this one visible\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o-mini\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4-turbo\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n    ]\n\n    edit_response_2 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": created_provider[\"id\"],\n            \"name\": \"test-visibility-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                config.dict() for config in edit_configs_one_visible\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert edit_response_2.status_code == 200\n\n    # Verify only 1 model is visible\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n    visible_models = [\n        model for model in provider_data[\"model_configurations\"] if model[\"is_visible\"]\n    ]\n    assert len(visible_models) == 1\n    assert visible_models[0][\"name\"] == \"gpt-4o\"\n\n    # Make none visible\n    edit_configs_none_visible = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o-mini\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4-turbo\",\n            is_visible=False,\n            max_input_tokens=None,\n            supports_image_input=None,\n        ),\n    ]\n    edit_response_3 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": created_provider[\"id\"],\n            \"name\": \"test-visibility-provider\",\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [\n                config.dict() for config in edit_configs_none_visible\n            ],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert edit_response_3.status_code == 200\n\n    # Verify no models are visible\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n    visible_models = [\n        model for model in provider_data[\"model_configurations\"] if model[\"is_visible\"]\n    ]\n    assert len(visible_models) == 0\n\n    # Make gpt-4o the default\n    _set_default_provider(admin_user, created_provider[\"id\"], \"gpt-4o\")\n\n    # Verify gpt-4o is the default\n    provider_data = _get_provider_by_id(admin_user, created_provider[\"id\"])\n    assert provider_data is not None\n    visible_models = [\n        model for model in provider_data[\"model_configurations\"] if model[\"is_visible\"]\n    ]\n    assert len(visible_models) == 1\n    assert visible_models[0][\"name\"] == \"gpt-4o\"\n\n\ndef _get_provider_by_name(providers: list[dict], provider_name: str) -> dict | None:\n    return next((p for p in providers if p[\"name\"] == provider_name), None)\n\n\ndef _get_providers_admin(\n    admin_user: DATestUser,\n) -> dict | None:\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    resp_json = response.json()\n\n    return resp_json\n\n\ndef _unpack_data(data: dict) -> tuple[list[dict], dict | None, dict | None]:\n    providers = data[\"providers\"]\n    text_default = data.get(\"default_text\")\n    vision_default = data.get(\"default_vision\")\n\n    return providers, text_default, vision_default\n\n\ndef _get_providers_basic(\n    user: DATestUser,\n) -> dict | None:\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/provider\",\n        headers=user.headers,\n    )\n    assert response.status_code == 200\n    resp_json = response.json()\n\n    return resp_json\n\n\ndef _validate_default_model(\n    default: dict | None,\n    provider_id: int | None = None,\n    model_name: str | None = None,\n) -> None:\n    if default is None:\n        assert provider_id is None and model_name is None\n        return\n\n    assert default[\"provider_id\"] == provider_id\n    assert default[\"model_name\"] == model_name\n\n\ndef _get_provider_by_name_admin(\n    admin_user: DATestUser, provider_name: str\n) -> dict | None:\n    \"\"\"Utility function to fetch an LLM provider by name via admin endpoint.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    providers = response.json()\n    return next((p for p in providers if p[\"name\"] == provider_name), None)\n\n\ndef _get_provider_by_name_basic(user: DATestUser, provider_name: str) -> dict | None:\n    \"\"\"Utility function to fetch an LLM provider by name via basic (non-admin) endpoint.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/provider\",\n        headers=user.headers,\n    )\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n    return next((p for p in providers if p[\"name\"] == provider_name), None)\n\n\ndef _validate_model_configurations(\n    actual_configs: list[dict],\n    expected_model_names: list[str],\n    expected_visible: dict[str, bool] | None = None,\n    expected_image_support: dict[str, bool] | None = None,\n) -> None:\n    \"\"\"\n    Validate that model configurations match expectations.\n\n    Args:\n        actual_configs: List of model configuration dicts from the API response\n        expected_model_names: List of expected model names\n        expected_visible: Optional dict mapping model name to expected visibility\n        expected_image_support: Optional dict mapping model name to expected supports_image_input\n    \"\"\"\n    actual_names = {config[\"name\"] for config in actual_configs}\n    expected_names = set(expected_model_names)\n\n    assert (\n        actual_names == expected_names\n    ), f\"Model names mismatch. Expected: {expected_names}, Actual: {actual_names}\"\n\n    if expected_visible:\n        for config in actual_configs:\n            if config[\"name\"] in expected_visible:\n                assert config[\"is_visible\"] == expected_visible[config[\"name\"]], (\n                    f\"Visibility mismatch for {config['name']}. \"\n                    f\"Expected: {expected_visible[config['name']]}, Actual: {config['is_visible']}\"\n                )\n\n    if expected_image_support:\n        for config in actual_configs:\n            if config[\"name\"] in expected_image_support:\n                assert (\n                    config[\"supports_image_input\"]\n                    == expected_image_support[config[\"name\"]]\n                ), (\n                    f\"supports_image_input mismatch for {config['name']}. \"\n                    f\"Expected: {expected_image_support[config['name']]}, \"\n                    f\"Actual: {config['supports_image_input']}\"\n                )\n\n\ndef _validate_provider_data(\n    provider_data: dict,\n    expected_name: str,\n    expected_provider: str,\n    expected_model_names: list[str],\n    expected_visible: dict[str, bool] | None = None,\n    expected_is_public: bool | None = None,\n    expected_image_support: dict[str, bool] | None = None,\n) -> None:\n    \"\"\"\n    Validate that provider data matches expectations.\n\n    Args:\n        provider_data: Provider dict from the API response\n        expected_name: Expected provider name\n        expected_provider: Expected provider type (e.g., 'openai')\n        expected_model_names: List of expected model names in configurations\n        expected_visible: Optional dict mapping model name to expected visibility\n        expected_is_public: Optional expected is_public value (admin endpoint only)\n        expected_image_support: Optional dict mapping model name to expected supports_image_input\n    \"\"\"\n    assert (\n        provider_data[\"name\"] == expected_name\n    ), f\"Provider name mismatch. Expected: {expected_name}, Actual: {provider_data['name']}\"\n    assert (\n        provider_data[\"provider\"] == expected_provider\n    ), f\"Provider type mismatch. Expected: {expected_provider}, Actual: {provider_data['provider']}\"\n\n    # Validate is_public if provided (only available in admin endpoint response)\n    if expected_is_public is not None and \"is_public\" in provider_data:\n        assert (\n            provider_data[\"is_public\"] == expected_is_public\n        ), f\"is_public mismatch. Expected: {expected_is_public}, Actual: {provider_data['is_public']}\"\n\n    # Validate model configurations\n    _validate_model_configurations(\n        provider_data[\"model_configurations\"],\n        expected_model_names,\n        expected_visible,\n        expected_image_support,\n    )\n\n\ndef test_default_model_persistence_and_update(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"\n    Test that the default model is correctly set, persisted, and can be updated.\n\n    This test verifies:\n    1. Admin creates a provider with a specific default model\n    2. Admin endpoint (/admin/llm/provider) shows correct default model\n    3. Basic endpoint (/llm/provider) shows correct default model for admin user\n    4. Non-admin user can see the same default model via basic endpoint\n    5. Admin updates the default model\n    6. Both admin and basic endpoints reflect the new default model\n    7. Non-admin user sees the updated default model\n    \"\"\"\n    from onyx.auth.schemas import UserRole\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a non-admin user\n    basic_user = UserManager.create(name=\"basic_user\")\n    # The first user is admin, subsequent users are basic by default\n    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN\n\n    provider_name = f\"test-default-model-{uuid.uuid4()}\"\n    updated_default_model = \"gpt-4o\"\n\n    # Model configurations including all models we'll use\n    model_configs = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4\",\n            is_visible=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=True,\n        ),\n    ]\n\n    # Expected model names and visibility\n    expected_model_names = [\"gpt-4\", \"gpt-4o\"]\n    expected_visible = {\"gpt-4\": True, \"gpt-4o\": True}\n\n    # Step 1: Admin creates the provider with initial default model\n    create_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [config.model_dump() for config in model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response.status_code == 200\n\n    # Capture initial defaults (setup_postgres may have created a DevEnvPresetOpenAI default)\n    initial_data = _get_providers_admin(admin_user)\n    assert initial_data is not None\n    _, initial_text_default, initial_vision_default = _unpack_data(initial_data)\n\n    # Step 2: Verify via admin endpoint that all provider data is correct\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    # Defaults should be unchanged from initial state (new provider not set as default)\n    assert text_default == initial_text_default\n    assert vision_default == initial_vision_default\n\n    admin_provider_data = _get_provider_by_name(providers, provider_name)\n    assert admin_provider_data is not None\n\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n        expected_is_public=True,\n    )\n\n    # Step 3: Verify via basic endpoint (admin user) that all provider data is correct\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    assert text_default == initial_text_default\n    assert vision_default == initial_vision_default\n\n    admin_basic_provider_data = _get_provider_by_name(providers, provider_name)\n    assert admin_basic_provider_data is not None\n    _validate_provider_data(\n        admin_basic_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n    )\n\n    # Step 4: Verify non-admin user sees the same provider data via basic endpoint\n    basic_user_data = _get_providers_basic(basic_user)\n    assert basic_user_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_user_data)\n    assert text_default == initial_text_default\n    assert vision_default == initial_vision_default\n\n    basic_user_provider_data = _get_provider_by_name(providers, provider_name)\n    assert basic_user_provider_data is not None\n    _validate_provider_data(\n        basic_user_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n    )\n\n    # Step 5: Admin updates the provider to change the default model\n    update_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": create_response.json()[\"id\"],\n            \"name\": provider_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [config.model_dump() for config in model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert update_response.status_code == 200\n\n    default_provider_response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        json={\n            \"provider_id\": update_response.json()[\"id\"],\n            \"model_name\": updated_default_model,\n        },\n        headers=admin_user.headers,\n    )\n    assert default_provider_response.status_code == 200\n\n    # Step 6a: Verify the updated provider data via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default,\n        provider_id=update_response.json()[\"id\"],\n        model_name=updated_default_model,\n    )\n    _validate_default_model(vision_default)  # None\n\n    admin_provider_data = _get_provider_by_name(providers, provider_name)\n    assert admin_provider_data is not None\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n        expected_is_public=True,\n    )\n\n    # Step 6b: Verify the updated provider data via basic endpoint (admin user)\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    _validate_default_model(\n        text_default,\n        provider_id=update_response.json()[\"id\"],\n        model_name=updated_default_model,\n    )\n    _validate_default_model(vision_default)  # None\n\n    admin_basic_provider_data = _get_provider_by_name(providers, provider_name)\n    assert admin_basic_provider_data is not None\n    _validate_provider_data(\n        admin_basic_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n    )\n\n    # Step 7: Verify non-admin user sees the updated provider data\n    basic_user_data = _get_providers_basic(basic_user)\n    assert basic_user_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_user_data)\n    _validate_default_model(\n        text_default,\n        provider_id=update_response.json()[\"id\"],\n        model_name=updated_default_model,\n    )\n    _validate_default_model(vision_default)  # None\n\n    basic_user_provider_data = _get_provider_by_name(providers, provider_name)\n    assert basic_user_provider_data is not None\n    _validate_provider_data(\n        basic_user_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n    )\n\n\ndef _get_all_providers_basic(user: DATestUser) -> list[dict]:\n    \"\"\"Utility function to fetch all LLM providers via basic endpoint.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/provider\",\n        headers=user.headers,\n    )\n    assert response.status_code == 200\n    return response.json()[\"providers\"]\n\n\ndef _get_all_providers_admin(admin_user: DATestUser) -> list[dict]:\n    \"\"\"Utility function to fetch all LLM providers via admin endpoint.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    return response.json()[\"providers\"]\n\n\ndef _set_default_provider(\n    admin_user: DATestUser, provider_id: int, model_name: str\n) -> None:\n    \"\"\"Utility function to set a provider as the default.\"\"\"\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default\",\n        json={\n            \"provider_id\": provider_id,\n            \"model_name\": model_name,\n        },\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n\ndef _set_default_vision_provider(\n    admin_user: DATestUser, provider_id: int, vision_model: str | None = None\n) -> None:\n    \"\"\"Utility function to set a provider as the default vision provider.\"\"\"\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/default-vision\",\n        json={\n            \"provider_id\": provider_id,\n            \"model_name\": vision_model,\n        },\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n\ndef test_multiple_providers_default_switching(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"\n    Test switching default providers and models across multiple LLM providers.\n\n    This test verifies:\n    1. Admin creates multiple LLM providers\n    2. Admin sets one as the default provider with a specific default model\n    3. Both admin and basic_user query /provider and see the same default provider/model\n    4. Admin changes the default provider and model to something different\n    5. Both admin and basic_user verify they see the same updated default\n    6. Admin switches to a different provider that has a model with the same name\n    7. Both users should see the new provider as default with the same model name\n    \"\"\"\n    from onyx.auth.schemas import UserRole\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a non-admin user\n    basic_user = UserManager.create(name=\"basic_user\")\n    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN\n\n    # We'll create two providers, both with a model named \"gpt-4\" to test the\n    # scenario where different providers have models with the same name\n    provider_1_name = f\"test-provider-1-{uuid.uuid4()}\"\n    provider_2_name = f\"test-provider-2-{uuid.uuid4()}\"\n\n    # Both providers will have \"gpt-4\" as a model\n    shared_model_name = \"gpt-4\"\n    provider_1_unique_model = \"gpt-4o\"\n    provider_2_unique_model = \"gpt-4-turbo\"\n\n    # Model configurations for provider 1\n    provider_1_configs = [\n        ModelConfigurationUpsertRequest(\n            name=shared_model_name,\n            is_visible=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=provider_1_unique_model,\n            is_visible=True,\n        ),\n    ]\n\n    # Model configurations for provider 2\n    provider_2_configs = [\n        ModelConfigurationUpsertRequest(\n            name=shared_model_name,\n            is_visible=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=provider_2_unique_model,\n            is_visible=True,\n        ),\n    ]\n\n    # Expected model names and visibility for each provider\n    provider_1_model_names = [shared_model_name, provider_1_unique_model]\n    provider_1_visible = {shared_model_name: True, provider_1_unique_model: True}\n    provider_2_model_names = [shared_model_name, provider_2_unique_model]\n    provider_2_visible = {shared_model_name: True, provider_2_unique_model: True}\n\n    # Step 1: Create provider 1 with shared_model_name as default\n    create_response_1 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_1_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [c.model_dump() for c in provider_1_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response_1.status_code == 200\n    provider_1 = create_response_1.json()\n\n    _set_default_provider(admin_user, provider_1[\"id\"], shared_model_name)\n\n    # Create provider 2 with provider_2_unique_model as default initially\n    create_response_2 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_2_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"model_configurations\": [c.model_dump() for c in provider_2_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response_2.status_code == 200\n    provider_2 = create_response_2.json()\n\n    # Step 2: Set provider 1 as the default provider\n    _set_default_provider(admin_user, provider_1[\"id\"], shared_model_name)\n\n    # Step 3: Both admin and basic_user query and verify they see the same default\n    # Validate via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default, provider_id=provider_1[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    admin_provider_data = _get_provider_by_name(providers, provider_1_name)\n    assert admin_provider_data is not None\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n        expected_is_public=True,\n    )\n\n    # Validate provider 2 via admin endpoint (should not be default)\n    admin_provider_2 = _get_provider_by_name(providers, provider_2_name)\n    assert admin_provider_2 is not None\n    _validate_provider_data(\n        admin_provider_2,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n        expected_is_public=True,\n    )\n\n    # Validate via basic endpoint (basic_user)\n    basic_data = _get_providers_basic(basic_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_1[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    basic_provider_data = _get_provider_by_name(providers, provider_1_name)\n    assert basic_provider_data is not None\n    _validate_provider_data(\n        basic_provider_data,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n    )\n\n    # Also verify admin sees the same via basic endpoint\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_1[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    admin_basic_provider_data = _get_provider_by_name(providers, provider_1_name)\n    assert admin_basic_provider_data is not None\n    _validate_provider_data(\n        admin_basic_provider_data,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n    )\n\n    # Step 4: Admin changes the default provider to provider 2 and updates its default model\n    # First update provider 2's default model to the unique model (it already is, but reconfirm)\n    update_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": provider_2[\"id\"],\n            \"name\": provider_2_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"model_configurations\": [c.model_dump() for c in provider_2_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert update_response.status_code == 200\n\n    # Now set provider 2 as the default\n    _set_default_provider(admin_user, provider_2[\"id\"], provider_2_unique_model)\n\n    # Step 5: Both admin and basic_user verify they see the updated default\n    # Validate via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=provider_2_unique_model\n    )\n    _validate_default_model(vision_default)  # None\n    admin_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert admin_provider_data is not None\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n        expected_is_public=True,\n    )\n\n    # Validate provider 1 via admin endpoint (should no longer be default)\n    admin_provider_1 = _get_provider_by_name(providers, provider_1_name)\n    assert admin_provider_1 is not None\n    _validate_provider_data(\n        admin_provider_1,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n        expected_is_public=True,\n    )\n\n    # Validate via basic endpoint (basic_user)\n    basic_data = _get_providers_basic(basic_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=provider_2_unique_model\n    )\n    _validate_default_model(vision_default)  # None\n    basic_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert basic_provider_data is not None\n    _validate_provider_data(\n        basic_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Validate via basic endpoint (admin_user)\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=provider_2_unique_model\n    )\n    _validate_default_model(vision_default)  # None\n    admin_basic_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert admin_basic_provider_data is not None\n    _validate_provider_data(\n        admin_basic_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Step 6: Admin changes provider 2's default model to the shared model name\n    # (same model name as provider 1 had)\n    update_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=false\",\n        headers=admin_user.headers,\n        json={\n            \"id\": provider_2[\"id\"],\n            \"name\": provider_2_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"model_configurations\": [c.model_dump() for c in provider_2_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert update_response.status_code == 200\n\n    _set_default_provider(\n        admin_user, provider_2[\"id\"], shared_model_name\n    )  # Same name as provider 1's model\n\n    # Step 7: Both users verify they see provider 2 as default with the shared model name\n    # Validate via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    admin_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert admin_provider_data is not None\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n        expected_is_public=True,\n    )\n\n    # Validate via basic endpoint (basic_user)\n    basic_data = _get_providers_basic(basic_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    basic_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert basic_provider_data is not None\n    _validate_provider_data(\n        basic_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Validate via basic endpoint (admin_user)\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    _validate_default_model(\n        text_default, provider_id=provider_2[\"id\"], model_name=shared_model_name\n    )\n    _validate_default_model(vision_default)  # None\n    admin_basic_provider_data = _get_provider_by_name(providers, provider_2_name)\n    assert admin_basic_provider_data is not None\n    _validate_provider_data(\n        admin_basic_provider_data,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Verify provider 1 is no longer the default and has correct data\n    admin_provider_1 = _get_provider_by_name(providers, provider_1_name)\n    assert admin_provider_1 is not None\n    _validate_provider_data(\n        admin_provider_1,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n        expected_is_public=True,\n    )\n\n    basic_provider_1 = _get_provider_by_name(providers, provider_1_name)\n    assert basic_provider_1 is not None\n    _validate_provider_data(\n        basic_provider_1,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n    )\n\n\ndef test_default_provider_and_vision_provider_selection(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test setting separate default providers for regular LLM and vision capabilities.\n\n    This test verifies:\n    1. Create provider 1 with mixed models (some with vision, some without)\n    2. Create provider 2 with only vision-capable models\n    3. Set a non-vision model from provider 1 as the general default\n    4. Set a vision model from provider 2 as the default vision model\n    5. Verify both admin and basic users see correct default provider and vision provider\n    6. Verify model configurations show correct image support capabilities\n    \"\"\"\n    from onyx.auth.schemas import UserRole\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a non-admin user\n    basic_user = UserManager.create(name=\"basic_user\")\n    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN\n\n    provider_1_name = f\"test-mixed-models-{uuid.uuid4()}\"\n    provider_2_name = f\"test-vision-only-{uuid.uuid4()}\"\n\n    # Provider 1: Mixed models - some with vision support, some without\n    # Using real model names that litellm recognizes for vision support\n    provider_1_non_vision_model = \"gpt-4\"  # No vision support\n    provider_1_vision_model = \"gpt-4o\"  # Has vision support\n\n    # Provider 2: Only vision-capable models\n    provider_2_vision_model_1 = \"gpt-4-vision-preview\"  # Vision model\n    provider_2_vision_model_2 = \"gpt-4o-mini\"  # Also has vision support\n\n    # Model configurations for provider 1 (mixed)\n    provider_1_configs = [\n        ModelConfigurationUpsertRequest(\n            name=provider_1_non_vision_model,\n            is_visible=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=provider_1_vision_model,\n            is_visible=True,\n        ),\n    ]\n\n    # Model configurations for provider 2 (vision only)\n    provider_2_configs = [\n        ModelConfigurationUpsertRequest(\n            name=provider_2_vision_model_1,\n            is_visible=True,\n            supports_image_input=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=provider_2_vision_model_2,\n            is_visible=True,\n            supports_image_input=True,\n        ),\n    ]\n\n    # Expected model names\n    provider_1_model_names = [provider_1_non_vision_model, provider_1_vision_model]\n    provider_1_visible = {\n        provider_1_non_vision_model: True,\n        provider_1_vision_model: True,\n    }\n\n    provider_2_model_names = [provider_2_vision_model_1, provider_2_vision_model_2]\n    provider_2_visible = {\n        provider_2_vision_model_1: True,\n        provider_2_vision_model_2: True,\n    }\n\n    # Step 1: Create provider 1 with mixed models, set non-vision model as default\n    create_response_1 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_1_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [c.model_dump() for c in provider_1_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response_1.status_code == 200\n    provider_1 = create_response_1.json()\n\n    # Step 2: Create provider 2 with vision-only models\n    create_response_2 = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_2_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"model_configurations\": [c.model_dump() for c in provider_2_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response_2.status_code == 200\n    provider_2 = create_response_2.json()\n\n    # Step 3: Set provider 1 as the general default provider\n    _set_default_provider(admin_user, provider_1[\"id\"], provider_1_non_vision_model)\n\n    # Step 4: Set provider 2 with a specific vision model as the default vision provider\n    _set_default_vision_provider(\n        admin_user, provider_2[\"id\"], provider_2_vision_model_1\n    )\n\n    # Step 5: Verify via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n\n    # Find and validate the default provider (provider 1)\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default,\n        provider_id=provider_1[\"id\"],\n        model_name=provider_1_non_vision_model,\n    )\n    _validate_default_model(\n        vision_default,\n        provider_id=provider_2[\"id\"],\n        model_name=provider_2_vision_model_1,\n    )\n    admin_default = _get_provider_by_name(providers, provider_1_name)\n    assert admin_default is not None\n    _validate_provider_data(\n        admin_default,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n        expected_is_public=True,\n    )\n\n    # Find and validate the default vision provider (provider 2)\n    admin_vision_default = _get_provider_by_name(providers, provider_2_name)\n    assert admin_vision_default is not None\n    _validate_provider_data(\n        admin_vision_default,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n        expected_is_public=True,\n    )\n\n    # Step 6: Verify via basic endpoint (basic_user)\n    # Find and validate the default provider (provider 1)\n    basic_data = _get_providers_basic(basic_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default,\n        provider_id=provider_1[\"id\"],\n        model_name=provider_1_non_vision_model,\n    )\n    _validate_default_model(\n        vision_default,\n        provider_id=provider_2[\"id\"],\n        model_name=provider_2_vision_model_1,\n    )\n    basic_default = _get_provider_by_name(providers, provider_1_name)\n    assert basic_default is not None\n    _validate_provider_data(\n        basic_default,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n    )\n\n    # Find and validate the default vision provider (provider 2)\n    basic_vision_default = _get_provider_by_name(providers, provider_2_name)\n    assert basic_vision_default is not None\n    _validate_provider_data(\n        basic_vision_default,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Step 7: Verify via basic endpoint (admin_user sees same as basic_user)\n    admin_basic_data = _get_providers_basic(admin_user)\n    assert admin_basic_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_basic_data)\n    _validate_default_model(\n        text_default,\n        provider_id=provider_1[\"id\"],\n        model_name=provider_1_non_vision_model,\n    )\n    _validate_default_model(\n        vision_default,\n        provider_id=provider_2[\"id\"],\n        model_name=provider_2_vision_model_1,\n    )\n    admin_basic_default = _get_provider_by_name(providers, provider_1_name)\n    assert admin_basic_default is not None\n    _validate_provider_data(\n        admin_basic_default,\n        expected_name=provider_1_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_1_model_names,\n        expected_visible=provider_1_visible,\n    )\n\n    admin_basic_vision_default = _get_provider_by_name(providers, provider_2_name)\n    assert admin_basic_vision_default is not None\n    _validate_provider_data(\n        admin_basic_vision_default,\n        expected_name=provider_2_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=provider_2_model_names,\n        expected_visible=provider_2_visible,\n    )\n\n    # Verify that the providers are distinct (different providers for regular vs vision)\n    assert (\n        admin_default[\"name\"] != admin_vision_default[\"name\"]\n    ), \"Default provider and vision provider should be different providers\"\n    assert (\n        basic_default[\"name\"] != basic_vision_default[\"name\"]\n    ), \"Default provider and vision provider should be different providers (basic endpoint)\"\n\n\ndef test_default_provider_is_not_default_vision_provider(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test that setting a provider as the default provider does NOT make it\n    the default vision provider.\n\n    This test verifies:\n    1. Create a provider with some models\n    2. Set it as the default provider\n    3. Verify it is the default provider (is_default_provider=True)\n    4. Verify it is NOT the default vision provider (is_default_vision_provider should be None/False)\n    \"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    provider_name = f\"test-default-not-vision-{uuid.uuid4()}\"\n\n    # Model configurations\n    model_configs = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4\",\n            is_visible=True,\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\",\n            is_visible=True,\n        ),\n    ]\n\n    expected_model_names = [\"gpt-4\", \"gpt-4o\"]\n    expected_visible = {\"gpt-4\": True, \"gpt-4o\": True}\n\n    # Step 1: Create the provider\n    create_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": provider_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000000\",\n            \"model_configurations\": [c.model_dump() for c in model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_response.status_code == 200\n    created_provider = create_response.json()\n\n    # Step 2: Set it as the default provider\n    _set_default_provider(admin_user, created_provider[\"id\"], \"gpt-4\")\n\n    # Step 3 & 4: Verify via admin endpoint\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default, provider_id=created_provider[\"id\"], model_name=\"gpt-4\"\n    )\n    _validate_default_model(vision_default)  # None\n    admin_provider_data = _get_provider_by_name(providers, provider_name)\n    assert admin_provider_data is not None\n\n    # Full validation of provider data\n    _validate_provider_data(\n        admin_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n        expected_is_public=True,\n    )\n\n    # Also verify via basic endpoint\n    basic_data = _get_providers_basic(admin_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default, provider_id=created_provider[\"id\"], model_name=\"gpt-4\"\n    )\n    _validate_default_model(vision_default)  # None\n    basic_provider_data = _get_provider_by_name(providers, provider_name)\n    assert basic_provider_data is not None\n\n    _validate_provider_data(\n        basic_provider_data,\n        expected_name=provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=expected_model_names,\n        expected_visible=expected_visible,\n    )\n\n\ndef _get_all_image_gen_configs(admin_user: DATestUser) -> list[dict]:\n    \"\"\"Utility function to fetch all image generation configs.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/image-generation/config\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    return response.json()\n\n\ndef _create_image_gen_config(\n    admin_user: DATestUser,\n    image_provider_id: str,\n    model_name: str,\n    source_llm_provider_id: int,\n    is_default: bool = False,\n) -> dict:\n    \"\"\"Utility function to create an image generation config using clone mode.\"\"\"\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config\",\n        headers=admin_user.headers,\n        json={\n            \"image_provider_id\": image_provider_id,\n            \"model_name\": model_name,\n            \"source_llm_provider_id\": source_llm_provider_id,\n            \"is_default\": is_default,\n        },\n    )\n    assert (\n        response.status_code == 200\n    ), f\"Failed to create image gen config: {response.text}\"\n    return response.json()\n\n\ndef _set_image_gen_config_default(\n    admin_user: DATestUser, image_provider_id: str\n) -> None:\n    \"\"\"Utility function to set an image generation config as default.\"\"\"\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}/default\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n\ndef _delete_image_gen_config(admin_user: DATestUser, image_provider_id: str) -> None:\n    \"\"\"Utility function to delete an image generation config.\"\"\"\n    response = requests.delete(\n        f\"{API_SERVER_URL}/admin/image-generation/config/{image_provider_id}\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n\n\ndef test_all_three_provider_types_no_mixup(reset: None) -> None:  # noqa: ARG001\n    \"\"\"\n    Test that regular LLM providers, vision providers, and image generation providers\n    are all tracked separately with no mixup.\n\n    This test verifies:\n    1. Create a regular LLM provider and set as default\n    2. Create a vision LLM provider and set as default vision\n    3. Create an image generation config (using clone mode from regular provider)\n    4. Set the image gen config as default\n    5. Verify all three are correctly identified:\n       - Regular provider: is_default_provider=True, is_default_vision_provider=None\n       - Vision provider: is_default_provider=None, is_default_vision_provider=True\n       - Image gen config: is_default=True (separate from LLM provider defaults)\n    6. Verify image gen config doesn't appear in LLM provider lists\n    7. Verify LLM providers don't appear in image gen config list\n    \"\"\"\n    from onyx.auth.schemas import UserRole\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a non-admin user\n    basic_user = UserManager.create(name=\"basic_user\")\n    assert basic_user.role == UserRole.BASIC or basic_user.role != UserRole.ADMIN\n\n    # Provider names\n    regular_provider_name = f\"test-regular-provider-{uuid.uuid4()}\"\n    vision_provider_name = f\"test-vision-provider-{uuid.uuid4()}\"\n    image_gen_provider_id = f\"test-image-gen-{uuid.uuid4()}\"\n\n    # Model configurations\n    regular_model_configs = [\n        ModelConfigurationUpsertRequest(name=\"gpt-4\", is_visible=True),\n        ModelConfigurationUpsertRequest(name=\"gpt-4o\", is_visible=True),\n    ]\n\n    vision_model_configs = [\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4-vision-preview\", is_visible=True, supports_image_input=True\n        ),\n        ModelConfigurationUpsertRequest(\n            name=\"gpt-4o\", is_visible=True, supports_image_input=True\n        ),\n    ]\n\n    # Step 1: Create regular LLM provider\n    create_regular_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": regular_provider_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000001\",\n            \"model_configurations\": [c.model_dump() for c in regular_model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_regular_response.status_code == 200\n    regular_provider = create_regular_response.json()\n\n    # Set as default provider\n    _set_default_provider(admin_user, regular_provider[\"id\"], \"gpt-4\")\n\n    # Step 2: Create vision LLM provider\n    create_vision_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json={\n            \"name\": vision_provider_name,\n            \"provider\": LlmProviderNames.OPENAI,\n            \"api_key\": \"sk-000000000000000000000000000000000000000000000002\",\n            \"default_model_name\": \"gpt-4-vision-preview\",\n            \"model_configurations\": [c.model_dump() for c in vision_model_configs],\n            \"is_public\": True,\n            \"groups\": [],\n            \"personas\": [],\n        },\n    )\n    assert create_vision_response.status_code == 200\n    vision_provider = create_vision_response.json()\n\n    # Set as default vision provider\n    _set_default_vision_provider(\n        admin_user, vision_provider[\"id\"], \"gpt-4-vision-preview\"\n    )\n\n    # Step 3: Create image generation config using clone mode from regular provider\n    _create_image_gen_config(\n        admin_user=admin_user,\n        image_provider_id=image_gen_provider_id,\n        model_name=\"dall-e-3\",\n        source_llm_provider_id=regular_provider[\"id\"],\n        is_default=True,\n    )\n\n    # Step 4: Verify all three types are correctly tracked\n\n    # Get all LLM providers (via admin endpoint)\n    admin_data = _get_providers_admin(admin_user)\n    assert admin_data is not None\n    providers, text_default, vision_default = _unpack_data(admin_data)\n    _validate_default_model(\n        text_default, provider_id=regular_provider[\"id\"], model_name=\"gpt-4\"\n    )\n    _validate_default_model(\n        vision_default,\n        provider_id=vision_provider[\"id\"],\n        model_name=\"gpt-4-vision-preview\",\n    )\n    _validate_default_model(\n        vision_default, vision_provider[\"id\"], \"gpt-4-vision-preview\"\n    )\n    _get_provider_by_name(providers, regular_provider_name)\n\n    # Get all image generation configs\n    image_gen_configs = _get_all_image_gen_configs(admin_user)\n\n    # Verify the regular provider is the default provider\n    admin_regular_provider_data = _get_provider_by_name(\n        providers, regular_provider_name\n    )\n    assert admin_regular_provider_data is not None\n    _validate_provider_data(\n        admin_regular_provider_data,\n        expected_name=regular_provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=[c.name for c in regular_model_configs],\n        expected_visible={c.name: True for c in regular_model_configs},\n    )\n    admin_vision_provider_data = _get_provider_by_name(providers, vision_provider_name)\n    assert admin_vision_provider_data is not None\n    _validate_provider_data(\n        admin_vision_provider_data,\n        expected_name=vision_provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=[c.name for c in vision_model_configs],\n        expected_visible={c.name: True for c in vision_model_configs},\n    )\n\n    # Verify the image gen config is the default image generation config\n    image_gen_config_data = next(\n        (\n            c\n            for c in image_gen_configs\n            if c[\"image_provider_id\"] == image_gen_provider_id\n        ),\n        None,\n    )\n    assert image_gen_config_data is not None, \"Image gen config not found\"\n    assert (\n        image_gen_config_data[\"is_default\"] is True\n    ), \"Image gen config should be the default\"\n    assert (\n        image_gen_config_data[\"model_name\"] == \"dall-e-3\"\n    ), \"Image gen config should have correct model name\"\n\n    # Step 5: Verify no mixup - image gen providers don't appear in LLM provider lists\n    # Image gen provider should not appear in the list\n    assert image_gen_provider_id not in [p[\"name\"] for p in providers]\n\n    # Step 6: Verify via basic endpoint (non-admin user)\n    basic_data = _get_providers_basic(basic_user)\n    assert basic_data is not None\n    providers, text_default, vision_default = _unpack_data(basic_data)\n    _validate_default_model(\n        text_default, provider_id=regular_provider[\"id\"], model_name=\"gpt-4\"\n    )\n    _validate_default_model(\n        vision_default,\n        provider_id=vision_provider[\"id\"],\n        model_name=\"gpt-4-vision-preview\",\n    )\n    _validate_default_model(\n        vision_default, vision_provider[\"id\"], \"gpt-4-vision-preview\"\n    )\n    basic_provider_data = _get_provider_by_name(providers, regular_provider_name)\n    assert basic_provider_data is not None\n    _validate_provider_data(\n        basic_provider_data,\n        expected_name=regular_provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=[c.name for c in regular_model_configs],\n        expected_visible={c.name: True for c in regular_model_configs},\n    )\n    basic_vision_provider_data = _get_provider_by_name(providers, vision_provider_name)\n    assert basic_vision_provider_data is not None\n    _validate_provider_data(\n        basic_vision_provider_data,\n        expected_name=vision_provider_name,\n        expected_provider=LlmProviderNames.OPENAI,\n        expected_model_names=[c.name for c in vision_model_configs],\n        expected_visible={c.name: True for c in vision_model_configs},\n    )\n\n    # Step 7: Verify the counts are as expected\n    # We should have at least 2 user-created providers (setup_postgres may add more)\n    assert len(providers) >= 2\n    assert len(image_gen_configs) == 1\n\n    # Clean up: Delete the image gen config (to clean up the internal LLM provider)\n    _delete_image_gen_config(admin_user, image_gen_provider_id)\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_provider/test_llm_provider_access_control.py",
    "content": "import os\n\nimport pytest\nimport requests\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.llm import can_user_access_llm_provider\nfrom onyx.db.llm import fetch_user_group_ids\nfrom onyx.db.llm import update_default_provider\nfrom onyx.db.llm import upsert_llm_provider\nfrom onyx.db.models import LLMProvider as LLMProviderModel\nfrom onyx.db.models import LLMProvider__Persona\nfrom onyx.db.models import LLMProvider__UserGroup\nfrom onyx.db.models import Persona\nfrom onyx.db.models import User\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.factory import get_llm_for_persona\nfrom onyx.server.manage.llm.models import LLMProviderUpsertRequest\nfrom onyx.server.manage.llm.models import ModelConfigurationUpsertRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\npytestmark = pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"LLM provider access control is enterprise only\",\n)\n\n\ndef _create_llm_provider(\n    db_session: Session,\n    *,\n    name: str,\n    default_model_name: str,\n    is_public: bool,\n    is_default: bool,\n) -> LLMProviderModel:\n    _provider = upsert_llm_provider(\n        llm_provider_upsert_request=LLMProviderUpsertRequest(\n            name=name,\n            provider=LlmProviderNames.OPENAI,\n            api_key=None,\n            api_base=None,\n            api_version=None,\n            custom_config=None,\n            is_public=is_public,\n            model_configurations=[\n                ModelConfigurationUpsertRequest(\n                    name=default_model_name,\n                    is_visible=True,\n                )\n            ],\n        ),\n        db_session=db_session,\n    )\n    if is_default:\n        update_default_provider(_provider.id, default_model_name, db_session)\n\n    provider = db_session.get(LLMProviderModel, _provider.id)\n    if not provider:\n        raise ValueError(f\"Provider {name} not found\")\n    return provider\n\n\ndef _create_persona(\n    db_session: Session,\n    *,\n    name: str,\n    provider_name: str,\n) -> Persona:\n    persona = Persona(\n        name=name,\n        description=f\"{name} description\",\n        llm_model_provider_override=provider_name,\n        llm_model_version_override=\"gpt-4o-mini\",\n        system_prompt=\"System prompt\",\n        task_prompt=\"Task prompt\",\n        datetime_aware=True,\n        is_public=True,\n    )\n    db_session.add(persona)\n    db_session.flush()\n    return persona\n\n\n@pytest.fixture()\ndef users(reset: None) -> tuple[DATestUser, DATestUser]:  # noqa: ARG001\n    admin_user = UserManager.create(name=\"admin_user\")\n    basic_user = UserManager.create(name=\"basic_user\")\n    return admin_user, basic_user\n\n\ndef test_can_user_access_llm_provider_or_logic(\n    users: tuple[DATestUser, DATestUser],\n) -> None:\n    \"\"\"Test LLM provider access control with is_public flag and AND logic.\n\n    Tests the new access control logic:\n    - is_public=True providers are accessible to everyone\n    - is_public=False with no restrictions locks the provider\n    - When both groups AND personas are set, AND logic applies (must satisfy both)\n    \"\"\"\n    admin_user, basic_user = users\n\n    with get_session_with_current_tenant() as db_session:\n        # Public provider - accessible to everyone\n        default_provider = _create_llm_provider(\n            db_session,\n            name=\"default-provider\",\n            default_model_name=\"gpt-4o\",\n            is_public=True,\n            is_default=True,\n        )\n        # Locked provider - is_public=False with no restrictions\n        locked_provider = _create_llm_provider(\n            db_session,\n            name=\"locked-provider\",\n            default_model_name=\"gpt-4o\",\n            is_public=False,\n            is_default=False,\n        )\n        # Restricted provider - has both group AND persona restrictions (AND logic)\n        restricted_provider = _create_llm_provider(\n            db_session,\n            name=\"restricted-provider\",\n            default_model_name=\"gpt-4o-mini\",\n            is_public=False,\n            is_default=False,\n        )\n\n        allowed_persona = _create_persona(\n            db_session,\n            name=\"allowed-persona\",\n            provider_name=restricted_provider.name,\n        )\n        blocked_persona = _create_persona(\n            db_session,\n            name=\"blocked-persona\",\n            provider_name=restricted_provider.name,\n        )\n\n        access_group = UserGroup(name=\"access-group\")\n        db_session.add(access_group)\n        db_session.flush()\n\n        # Add both group and persona restrictions to restricted_provider\n        db_session.add(\n            LLMProvider__UserGroup(\n                llm_provider_id=restricted_provider.id,\n                user_group_id=access_group.id,\n            )\n        )\n        db_session.add(\n            LLMProvider__Persona(\n                llm_provider_id=restricted_provider.id,\n                persona_id=allowed_persona.id,\n            )\n        )\n        # Only admin_user is in the access_group\n        db_session.add(\n            User__UserGroup(\n                user_group_id=access_group.id,\n                user_id=admin_user.id,\n            )\n        )\n        db_session.flush()\n\n        db_session.refresh(restricted_provider)\n        db_session.refresh(locked_provider)\n\n        admin_model = db_session.get(User, admin_user.id)\n        basic_model = db_session.get(User, basic_user.id)\n\n        assert admin_model is not None\n        assert basic_model is not None\n\n        # Fetch user group IDs for both users\n        admin_group_ids = fetch_user_group_ids(db_session, admin_model)\n        basic_group_ids = fetch_user_group_ids(db_session, basic_model)\n\n        # Test is_public flag\n        assert default_provider.is_public\n        assert not locked_provider.is_public\n        assert not restricted_provider.is_public\n\n        # Public provider - everyone can access\n        assert can_user_access_llm_provider(\n            default_provider,\n            admin_group_ids,\n            allowed_persona,\n        )\n        assert can_user_access_llm_provider(\n            default_provider,\n            basic_group_ids,\n            blocked_persona,\n        )\n\n        # Locked provider (is_public=False, no restrictions) - nobody can access\n        assert not can_user_access_llm_provider(\n            locked_provider,\n            admin_group_ids,\n            allowed_persona,\n        )\n        assert not can_user_access_llm_provider(\n            locked_provider,\n            basic_group_ids,\n            allowed_persona,\n        )\n\n        # Restricted provider with AND logic (both groups AND personas set)\n        # admin_user in group + allowed_persona whitelisted → SUCCESS (both conditions met)\n        assert can_user_access_llm_provider(\n            restricted_provider,\n            admin_group_ids,\n            allowed_persona,\n        )\n\n        # admin_user in group + blocked_persona not whitelisted → FAIL (persona not allowed)\n        assert not can_user_access_llm_provider(\n            restricted_provider,\n            admin_group_ids,\n            blocked_persona,\n        )\n\n        # basic_user not in group + allowed_persona whitelisted → FAIL (user not in group)\n        assert not can_user_access_llm_provider(\n            restricted_provider,\n            basic_group_ids,\n            allowed_persona,\n        )\n\n        # basic_user not in group + blocked_persona not whitelisted → FAIL (neither condition met)\n        assert not can_user_access_llm_provider(\n            restricted_provider,\n            basic_group_ids,\n            blocked_persona,\n        )\n\n\ndef test_public_provider_with_persona_restrictions(\n    users: tuple[DATestUser, DATestUser],\n) -> None:\n    \"\"\"Public providers should still enforce persona restrictions.\n\n    Regression test for the bug where is_public=True caused\n    can_user_access_llm_provider() to return True immediately,\n    bypassing persona whitelist checks entirely.\n    \"\"\"\n    admin_user, _basic_user = users\n\n    with get_session_with_current_tenant() as db_session:\n        # Public provider with persona restrictions\n        public_restricted = _create_llm_provider(\n            db_session,\n            name=\"public-persona-restricted\",\n            default_model_name=\"gpt-4o\",\n            is_public=True,\n            is_default=True,\n        )\n\n        whitelisted_persona = _create_persona(\n            db_session,\n            name=\"whitelisted-persona\",\n            provider_name=public_restricted.name,\n        )\n        non_whitelisted_persona = _create_persona(\n            db_session,\n            name=\"non-whitelisted-persona\",\n            provider_name=public_restricted.name,\n        )\n\n        # Only whitelist one persona\n        db_session.add(\n            LLMProvider__Persona(\n                llm_provider_id=public_restricted.id,\n                persona_id=whitelisted_persona.id,\n            )\n        )\n        db_session.flush()\n        db_session.refresh(public_restricted)\n\n        admin_model = db_session.get(User, admin_user.id)\n        assert admin_model is not None\n        admin_group_ids = fetch_user_group_ids(db_session, admin_model)\n\n        # Whitelisted persona — should be allowed\n        assert can_user_access_llm_provider(\n            public_restricted,\n            admin_group_ids,\n            whitelisted_persona,\n        )\n\n        # Non-whitelisted persona — should be denied despite is_public=True\n        assert not can_user_access_llm_provider(\n            public_restricted,\n            admin_group_ids,\n            non_whitelisted_persona,\n        )\n\n        # No persona context (e.g. global provider list) — should be denied\n        # because provider has persona restrictions set\n        assert not can_user_access_llm_provider(\n            public_restricted,\n            admin_group_ids,\n            persona=None,\n        )\n\n\ndef test_public_provider_without_persona_restrictions(\n    users: tuple[DATestUser, DATestUser],\n) -> None:\n    \"\"\"Public providers with no persona restrictions remain accessible to all.\"\"\"\n    admin_user, basic_user = users\n\n    with get_session_with_current_tenant() as db_session:\n        public_unrestricted = _create_llm_provider(\n            db_session,\n            name=\"public-unrestricted\",\n            default_model_name=\"gpt-4o\",\n            is_public=True,\n            is_default=True,\n        )\n\n        any_persona = _create_persona(\n            db_session,\n            name=\"any-persona\",\n            provider_name=public_unrestricted.name,\n        )\n\n        admin_model = db_session.get(User, admin_user.id)\n        basic_model = db_session.get(User, basic_user.id)\n        assert admin_model is not None\n        assert basic_model is not None\n\n        admin_group_ids = fetch_user_group_ids(db_session, admin_model)\n        basic_group_ids = fetch_user_group_ids(db_session, basic_model)\n\n        # Any user, any persona — all allowed\n        assert can_user_access_llm_provider(\n            public_unrestricted, admin_group_ids, any_persona\n        )\n        assert can_user_access_llm_provider(\n            public_unrestricted, basic_group_ids, any_persona\n        )\n        assert can_user_access_llm_provider(\n            public_unrestricted, admin_group_ids, persona=None\n        )\n\n\ndef test_get_llm_for_persona_falls_back_when_access_denied(\n    users: tuple[DATestUser, DATestUser],\n) -> None:\n    admin_user, basic_user = users\n\n    with get_session_with_current_tenant() as db_session:\n        default_provider = _create_llm_provider(\n            db_session,\n            name=\"default-provider\",\n            default_model_name=\"gpt-4o\",\n            is_public=True,\n            is_default=True,\n        )\n        restricted_provider = _create_llm_provider(\n            db_session,\n            name=\"restricted-provider\",\n            default_model_name=\"gpt-4o-mini\",\n            is_public=False,\n            is_default=False,\n        )\n\n        persona = _create_persona(\n            db_session,\n            name=\"fallback-persona\",\n            provider_name=restricted_provider.name,\n        )\n\n        access_group = UserGroup(name=\"persona-group\")\n        db_session.add(access_group)\n        db_session.flush()\n\n        db_session.add(\n            LLMProvider__UserGroup(\n                llm_provider_id=restricted_provider.id,\n                user_group_id=access_group.id,\n            )\n        )\n        db_session.add(\n            User__UserGroup(\n                user_group_id=access_group.id,\n                user_id=admin_user.id,\n            )\n        )\n        db_session.flush()\n        db_session.commit()\n\n        db_session.refresh(default_provider)\n        db_session.refresh(restricted_provider)\n        db_session.refresh(persona)\n\n        admin_model = db_session.get(User, admin_user.id)\n        basic_model = db_session.get(User, basic_user.id)\n\n        assert admin_model is not None\n        assert basic_model is not None\n\n        allowed_llm = get_llm_for_persona(\n            persona=persona,\n            user=admin_model,\n        )\n        assert (\n            allowed_llm.config.model_name\n            == restricted_provider.model_configurations[0].name\n        )\n\n        fallback_llm = get_llm_for_persona(\n            persona=persona,\n            user=basic_model,\n        )\n        assert (\n            fallback_llm.config.model_name\n            == default_provider.model_configurations[0].name\n        )\n\n\ndef test_list_llm_provider_basics_excludes_non_public_unrestricted(\n    users: tuple[DATestUser, DATestUser],\n) -> None:\n    \"\"\"Test that the /llm/provider endpoint correctly excludes non-public providers\n    with no group/persona restrictions.\n\n    This tests the fix for the bug where non-public providers with no restrictions\n    were incorrectly shown to all users instead of being admin-only.\n    \"\"\"\n    admin_user, basic_user = users\n\n    # Create a public provider (should be visible to all)\n    public_provider = LLMProviderManager.create(\n        name=\"public-provider\",\n        is_public=True,\n        set_as_default=True,\n        default_model_name=\"gpt-4o\",\n        user_performing_action=admin_user,\n    )\n\n    # Create a non-public provider with no restrictions (should be admin-only)\n    non_public_provider = LLMProviderManager.create(\n        name=\"non-public-unrestricted\",\n        is_public=False,\n        groups=[],\n        personas=[],\n        set_as_default=False,\n        user_performing_action=admin_user,\n    )\n\n    # Non-admin user calls the /llm/provider endpoint\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/provider\",\n        headers=basic_user.headers,\n    )\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n    provider_names = [p[\"name\"] for p in providers]\n\n    # Public provider should be visible\n    assert public_provider.name in provider_names\n\n    # Non-public provider with no restrictions should NOT be visible to non-admin\n    assert non_public_provider.name not in provider_names\n\n    # Admin user should see both providers\n    admin_response = requests.get(\n        f\"{API_SERVER_URL}/llm/provider\",\n        headers=admin_user.headers,\n    )\n    assert admin_response.status_code == 200\n    admin_providers = admin_response.json()[\"providers\"]\n    admin_provider_names = [p[\"name\"] for p in admin_providers]\n\n    assert public_provider.name in admin_provider_names\n    assert non_public_provider.name in admin_provider_names\n\n\ndef test_provider_delete_clears_persona_references(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Test that deleting a provider automatically clears persona references.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    # Create a default provider first so personas have something to fall back to\n    LLMProviderManager.create(\n        name=\"default-provider\",\n        is_public=True,\n        set_as_default=True,\n        default_model_name=\"gpt-4o\",\n        user_performing_action=admin_user,\n    )\n\n    provider = LLMProviderManager.create(\n        is_public=False,\n        set_as_default=False,\n        user_performing_action=admin_user,\n    )\n    persona = PersonaManager.create(\n        llm_model_provider_override=provider.name,\n        user_performing_action=admin_user,\n    )\n\n    # Delete the provider - should succeed and automatically clear persona references\n    assert LLMProviderManager.delete(\n        provider,\n        user_performing_action=admin_user,\n    )\n\n    # Verify the persona now falls back to default (llm_model_provider_override cleared)\n    persona_response = requests.get(\n        f\"{API_SERVER_URL}/persona/{persona.id}\",\n        headers=admin_user.headers,\n    )\n    assert persona_response.status_code == 200\n    updated_persona = persona_response.json()\n    assert updated_persona[\"llm_model_provider_override\"] is None\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_provider/test_llm_provider_persona_access.py",
    "content": "\"\"\"\nIntegration tests for LLM Provider persona access authorization.\n\"\"\"\n\nimport os\n\nimport pytest\nimport requests\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\npytestmark = pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"LLM provider persona access is enterprise only\",\n)\n\n\n@pytest.fixture()\ndef users_and_groups(\n    reset: None,  # noqa: ARG001\n) -> tuple[DATestUser, DATestUser, int, int]:\n    \"\"\"Create admin, basic user, and two user groups.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n    basic_user = UserManager.create(name=\"basic_user\")\n\n    # Create two user groups\n    group1 = UserGroupManager.create(\n        user_performing_action=admin_user,\n        name=\"test_group_1\",\n        user_ids=[basic_user.id],\n    )\n\n    group2 = UserGroupManager.create(\n        user_performing_action=admin_user,\n        name=\"test_group_2\",\n        user_ids=[],  # basic_user is NOT in this group\n    )\n\n    return admin_user, basic_user, group1.id, group2.id\n\n\ndef test_unauthorized_persona_access_returns_403(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that users cannot query providers for personas they don't have access to.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Create a persona restricted to group2 (which basic_user is NOT in)\n    restricted_persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Restricted Persona\",\n        description=\"Only accessible to group2\",\n        is_public=False,\n        groups=[group2_id],\n    )\n\n    # Try to query providers for the restricted persona as basic_user\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/{restricted_persona.id}/providers\",\n        headers=basic_user.headers,\n    )\n\n    # Should return 403 Forbidden\n    assert response.status_code == 403\n    assert \"don't have access to this assistant\" in response.json()[\"detail\"]\n\n\ndef test_authorized_persona_access_returns_filtered_providers(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that users can query providers for personas they have access to.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Create a persona accessible to group1 (which basic_user IS in)\n    accessible_persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Accessible Persona\",\n        description=\"Accessible to group1\",\n        is_public=False,\n        groups=[group1_id],\n    )\n\n    # Create a restricted provider accessible only to the persona\n    restricted_provider = LLMProviderManager.create(\n        user_performing_action=admin_user,\n        name=\"Restricted Provider\",\n        provider=LlmProviderNames.OPENAI,\n        api_key=\"test-key\",\n        default_model_name=\"gpt-4o\",\n        is_public=False,\n        groups=[],\n        personas=[accessible_persona.id],\n    )\n\n    # Query providers for the accessible persona as basic_user\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/{accessible_persona.id}/providers\",\n        headers=basic_user.headers,\n    )\n\n    # Should succeed\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n\n    # Should include the restricted provider since basic_user can access the persona\n    provider_names = [p[\"name\"] for p in providers]\n    assert restricted_provider.name in provider_names\n\n\ndef test_persona_id_zero_applies_rbac(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that persona_id=0 (default persona) properly applies RBAC.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Create a restricted provider accessible only to group2\n    restricted_provider = LLMProviderManager.create(\n        user_performing_action=admin_user,\n        name=\"Group2 Only Provider\",\n        provider=LlmProviderNames.OPENAI,\n        api_key=\"test-key\",\n        default_model_name=\"gpt-4o\",\n        is_public=False,\n        groups=[group2_id],\n        personas=[],\n    )\n\n    # Query providers with persona_id=0 as basic_user\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/0/providers\",\n        headers=basic_user.headers,\n    )\n\n    # Should succeed (persona_id=0 refers to default persona, which is public)\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n\n    # Should NOT include the restricted provider since basic_user is not in group2\n    provider_names = [p[\"name\"] for p in providers]\n    assert restricted_provider.name not in provider_names\n\n\ndef test_admin_can_query_any_persona(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that admin users can query any persona's providers.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Create a persona restricted to group2 (admin is not explicitly in this group)\n    restricted_persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Admin Test Persona\",\n        description=\"Only accessible to group2\",\n        is_public=False,\n        groups=[group2_id],\n    )\n\n    # Create a restricted provider accessible only to the persona\n    restricted_provider = LLMProviderManager.create(\n        user_performing_action=admin_user,\n        name=\"Admin Test Provider\",\n        provider=LlmProviderNames.OPENAI,\n        api_key=\"test-key\",\n        default_model_name=\"gpt-4o\",\n        is_public=False,\n        groups=[],\n        personas=[restricted_persona.id],\n    )\n\n    # Query providers for the restricted persona as admin_user\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/{restricted_persona.id}/providers\",\n        headers=admin_user.headers,\n    )\n\n    # Should succeed - admins can access any persona\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n\n    # Should include the restricted provider\n    provider_names = [p[\"name\"] for p in providers]\n    assert restricted_provider.name in provider_names\n\n\ndef test_public_persona_accessible_to_all(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that public personas are accessible to all users.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Create a public LLM provider so there's something to return\n    public_provider = LLMProviderManager.create(\n        user_performing_action=admin_user,\n        name=\"Public Provider\",\n        provider=LlmProviderNames.OPENAI,\n        api_key=\"test-key\",\n        default_model_name=\"gpt-4o\",\n        is_public=True,\n        set_as_default=True,\n    )\n\n    # Create a public persona\n    public_persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Public Persona\",\n        description=\"Accessible to everyone\",\n        is_public=True,\n        groups=[],\n    )\n\n    # Query providers for the public persona as basic_user\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/{public_persona.id}/providers\",\n        headers=basic_user.headers,\n    )\n\n    # Should succeed\n    assert response.status_code == 200\n    providers = response.json()[\"providers\"]\n\n    # Should return the public provider\n    assert len(providers) > 0\n    provider_names = [p[\"name\"] for p in providers]\n    assert public_provider.name in provider_names\n\n\ndef test_nonexistent_persona_returns_404(\n    users_and_groups: tuple[DATestUser, DATestUser, int, int],\n) -> None:\n    \"\"\"Test that querying a nonexistent persona returns 404.\"\"\"\n    admin_user, basic_user, group1_id, group2_id = users_and_groups\n\n    # Query providers for a nonexistent persona\n    response = requests.get(\n        f\"{API_SERVER_URL}/llm/persona/99999/providers\",\n        headers=basic_user.headers,\n    )\n\n    # Should return 404\n    assert response.status_code == 404\n    assert \"Persona not found\" in response.json()[\"detail\"]\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_workflows/test_mock_llm_tool_calls.py",
    "content": "from onyx.configs import app_configs\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.tools.constants import SEARCH_TOOL_ID\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.tool import ToolManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n_DUMMY_OPENAI_API_KEY = \"sk-mock-llm-workflow-tests\"\n\n\ndef _get_internal_search_tool_id(admin_user: DATestUser) -> int:\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    for tool in tools:\n        if tool.in_code_tool_id == SEARCH_TOOL_ID:\n            return tool.id\n    raise AssertionError(\"SearchTool must exist for this test\")\n\n\ndef _assert_integration_mode_enabled() -> None:\n    assert (\n        app_configs.INTEGRATION_TESTS_MODE is True\n    ), \"Integration tests require INTEGRATION_TESTS_MODE=true.\"\n\n\ndef _seed_connector_for_search_tool(admin_user: DATestUser) -> None:\n    # SearchTool is only exposed when at least one non-default connector exists.\n    CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n\ndef test_mock_llm_response_single_tool_call_debug(admin_user: DATestUser) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n\n    LLMProviderManager.create(\n        user_performing_action=admin_user,\n        api_key=_DUMMY_OPENAI_API_KEY,\n    )\n    chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"run the search tool\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n        mock_llm_response='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert len(response.tool_call_debug) == 1\n    assert response.tool_call_debug[0].tool_name == \"internal_search\"\n    assert response.tool_call_debug[0].tool_args == {\"queries\": [\"alpha\"]}\n\n\ndef test_mock_llm_response_parallel_tool_call_debug(admin_user: DATestUser) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n\n    LLMProviderManager.create(\n        user_performing_action=admin_user,\n        api_key=_DUMMY_OPENAI_API_KEY,\n    )\n    chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n\n    mock_response = \"\\n\".join(\n        [\n            '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n            '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"beta\"]}}',\n        ]\n    )\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"run the search tool twice\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n        mock_llm_response=mock_response,\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert len(response.tool_call_debug) == 2\n    assert [entry.tool_name for entry in response.tool_call_debug] == [\n        \"internal_search\",\n        \"internal_search\",\n    ]\n    assert [entry.tool_args for entry in response.tool_call_debug] == [\n        {\"queries\": [\"alpha\"]},\n        {\"queries\": [\"beta\"]},\n    ]\n\n\ndef test_mock_llm_response_embedded_json_fallback_tool_call_debug(\n    admin_user: DATestUser,\n) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n\n    LLMProviderManager.create(\n        user_performing_action=admin_user,\n        api_key=_DUMMY_OPENAI_API_KEY,\n    )\n    chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n\n    # Validate fallback extraction when the model returns tool-call JSON embedded in\n    # normal assistant text instead of structured tool_call objects.\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"use the search tool\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n        mock_llm_response=(\n            'I will call a tool now. {\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"gamma\"]}}'\n        ),\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert len(response.tool_call_debug) == 1\n    assert response.tool_call_debug[0].tool_name == \"internal_search\"\n    assert response.tool_call_debug[0].tool_args == {\"queries\": [\"gamma\"]}\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py",
    "content": "import json\nimport os\nimport time\nfrom uuid import uuid4\n\nimport pytest\nimport requests\nfrom pydantic import BaseModel\nfrom pydantic import ConfigDict\n\nfrom onyx.configs import app_configs\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.tools.constants import SEARCH_TOOL_ID\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.tool import ToolManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ToolName\n\n\n_ENV_PROVIDER = \"NIGHTLY_LLM_PROVIDER\"\n_ENV_MODELS = \"NIGHTLY_LLM_MODELS\"\n_ENV_API_KEY = \"NIGHTLY_LLM_API_KEY\"\n_ENV_API_BASE = \"NIGHTLY_LLM_API_BASE\"\n_ENV_API_VERSION = \"NIGHTLY_LLM_API_VERSION\"\n_ENV_DEPLOYMENT_NAME = \"NIGHTLY_LLM_DEPLOYMENT_NAME\"\n_ENV_CUSTOM_CONFIG_JSON = \"NIGHTLY_LLM_CUSTOM_CONFIG_JSON\"\n_ENV_STRICT = \"NIGHTLY_LLM_STRICT\"\n\n\nclass NightlyProviderConfig(BaseModel):\n    model_config = ConfigDict(frozen=True)\n\n    provider: str\n    model_names: list[str]\n    api_key: str | None\n    api_base: str | None\n    api_version: str | None\n    deployment_name: str | None\n    custom_config: dict[str, str] | None\n    strict: bool\n\n\ndef _stringify_custom_config_value(value: object) -> str:\n    if isinstance(value, str):\n        return value\n    if isinstance(value, (dict, list)):\n        return json.dumps(value)\n    return str(value)\n\n\ndef _looks_like_vertex_credentials_payload(\n    raw_custom_config: dict[object, object],\n) -> bool:\n    normalized_keys = {str(key).strip().lower() for key in raw_custom_config}\n    provider_specific_keys = {\n        \"vertex_credentials\",\n        \"credentials_file\",\n        \"vertex_credentials_file\",\n        \"google_application_credentials\",\n        \"vertex_location\",\n        \"location\",\n        \"vertex_region\",\n        \"region\",\n    }\n    if normalized_keys & provider_specific_keys:\n        return False\n\n    normalized_type = str(raw_custom_config.get(\"type\", \"\")).strip().lower()\n    if normalized_type not in {\"service_account\", \"external_account\"}:\n        return False\n\n    # Service account JSON usually includes private_key/client_email, while external\n    # account JSON includes credential_source. Either shape should be accepted.\n    has_service_account_markers = any(\n        key in normalized_keys for key in {\"private_key\", \"client_email\"}\n    )\n    has_external_account_markers = \"credential_source\" in normalized_keys\n    return has_service_account_markers or has_external_account_markers\n\n\ndef _normalize_custom_config(\n    provider: str, raw_custom_config: dict[object, object]\n) -> dict[str, str]:\n    if provider == \"vertex_ai\" and _looks_like_vertex_credentials_payload(\n        raw_custom_config\n    ):\n        return {\"vertex_credentials\": json.dumps(raw_custom_config)}\n\n    normalized: dict[str, str] = {}\n    for raw_key, raw_value in raw_custom_config.items():\n        key = str(raw_key).strip()\n        key_lower = key.lower()\n\n        if provider == \"vertex_ai\":\n            if key_lower in {\n                \"vertex_credentials\",\n                \"credentials_file\",\n                \"vertex_credentials_file\",\n                \"google_application_credentials\",\n            }:\n                key = \"vertex_credentials\"\n            elif key_lower in {\n                \"vertex_location\",\n                \"location\",\n                \"vertex_region\",\n                \"region\",\n            }:\n                key = \"vertex_location\"\n\n        normalized[key] = _stringify_custom_config_value(raw_value)\n\n    return normalized\n\n\ndef _env_true(env_var: str, default: bool = False) -> bool:\n    value = os.environ.get(env_var)\n    if value is None:\n        return default\n    return value.strip().lower() in {\"1\", \"true\", \"yes\", \"on\"}\n\n\ndef _parse_models_env(env_var: str) -> list[str]:\n    raw_value = os.environ.get(env_var, \"\").strip()\n    if not raw_value:\n        return []\n\n    try:\n        parsed_json = json.loads(raw_value)\n    except json.JSONDecodeError:\n        parsed_json = None\n\n    if isinstance(parsed_json, list):\n        return [str(model).strip() for model in parsed_json if str(model).strip()]\n\n    return [part.strip() for part in raw_value.split(\",\") if part.strip()]\n\n\ndef _load_provider_config() -> NightlyProviderConfig:\n    provider = os.environ.get(_ENV_PROVIDER, \"\").strip().lower()\n    model_names = _parse_models_env(_ENV_MODELS)\n    api_key = os.environ.get(_ENV_API_KEY) or None\n    api_base = os.environ.get(_ENV_API_BASE) or None\n    api_version = os.environ.get(_ENV_API_VERSION) or None\n    deployment_name = os.environ.get(_ENV_DEPLOYMENT_NAME) or None\n    strict = _env_true(_ENV_STRICT, default=False)\n\n    custom_config: dict[str, str] | None = None\n    custom_config_json = os.environ.get(_ENV_CUSTOM_CONFIG_JSON, \"\").strip()\n    if custom_config_json:\n        parsed = json.loads(custom_config_json)\n        if not isinstance(parsed, dict):\n            raise ValueError(f\"{_ENV_CUSTOM_CONFIG_JSON} must be a JSON object\")\n        custom_config = _normalize_custom_config(\n            provider=provider, raw_custom_config=parsed\n        )\n\n    if provider == \"ollama_chat\" and api_key and not custom_config:\n        custom_config = {\"OLLAMA_API_KEY\": api_key}\n\n    return NightlyProviderConfig(\n        provider=provider,\n        model_names=model_names,\n        api_key=api_key,\n        api_base=api_base,\n        api_version=api_version,\n        deployment_name=deployment_name,\n        custom_config=custom_config,\n        strict=strict,\n    )\n\n\ndef _skip_or_fail(strict: bool, message: str) -> None:\n    if strict:\n        pytest.fail(message)\n    pytest.skip(message)\n\n\ndef _validate_provider_config(config: NightlyProviderConfig) -> None:\n    if not config.provider:\n        _skip_or_fail(strict=config.strict, message=f\"{_ENV_PROVIDER} must be set\")\n\n    if not config.model_names:\n        _skip_or_fail(\n            strict=config.strict,\n            message=f\"{_ENV_MODELS} must include at least one model\",\n        )\n\n    if config.provider != \"ollama_chat\" and not (\n        config.api_key or config.custom_config\n    ):\n        _skip_or_fail(\n            strict=config.strict,\n            message=(\n                f\"{_ENV_API_KEY} or {_ENV_CUSTOM_CONFIG_JSON} is required for provider '{config.provider}'\"\n            ),\n        )\n\n    if config.provider == \"ollama_chat\" and not (\n        config.api_base or _default_api_base_for_provider(config.provider)\n    ):\n        _skip_or_fail(\n            strict=config.strict,\n            message=(f\"{_ENV_API_BASE} is required for provider '{config.provider}'\"),\n        )\n\n    if config.provider == \"azure\":\n        if not config.api_base:\n            _skip_or_fail(\n                strict=config.strict,\n                message=(\n                    f\"{_ENV_API_BASE} is required for provider '{config.provider}'\"\n                ),\n            )\n        if not config.api_version:\n            _skip_or_fail(\n                strict=config.strict,\n                message=(\n                    f\"{_ENV_API_VERSION} is required for provider '{config.provider}'\"\n                ),\n            )\n\n    if config.provider == \"vertex_ai\":\n        has_vertex_credentials = bool(\n            config.custom_config and config.custom_config.get(\"vertex_credentials\")\n        )\n        if not has_vertex_credentials:\n            configured_keys = (\n                sorted(config.custom_config.keys()) if config.custom_config else []\n            )\n            _skip_or_fail(\n                strict=config.strict,\n                message=(\n                    f\"{_ENV_CUSTOM_CONFIG_JSON} must include 'vertex_credentials' \"\n                    f\"for provider '{config.provider}'. \"\n                    f\"Found keys: {configured_keys}\"\n                ),\n            )\n\n\ndef _assert_integration_mode_enabled() -> None:\n    assert (\n        app_configs.INTEGRATION_TESTS_MODE is True\n    ), \"Integration tests require INTEGRATION_TESTS_MODE=true.\"\n\n\ndef _seed_connector_for_search_tool(admin_user: DATestUser) -> None:\n    # SearchTool is only exposed when at least one non-default connector exists.\n    CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n\ndef _get_internal_search_tool_id(admin_user: DATestUser) -> int:\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    for tool in tools:\n        if tool.in_code_tool_id == SEARCH_TOOL_ID:\n            return tool.id\n    raise AssertionError(\"SearchTool must exist for this test\")\n\n\ndef _default_api_base_for_provider(provider: str) -> str | None:\n    if provider == \"openrouter\":\n        return \"https://openrouter.ai/api/v1\"\n    if provider == \"ollama_chat\":\n        # host.docker.internal works when tests are running inside the integration test container.\n        return \"http://host.docker.internal:11434\"\n    return None\n\n\ndef _create_provider_payload(\n    provider: str,\n    provider_name: str,\n    model_name: str,\n    api_key: str | None,\n    api_base: str | None,\n    api_version: str | None,\n    deployment_name: str | None,\n    custom_config: dict[str, str] | None,\n) -> dict:\n    return {\n        \"name\": provider_name,\n        \"provider\": provider,\n        \"model\": model_name,\n        \"api_key\": api_key,\n        \"api_base\": api_base,\n        \"api_version\": api_version,\n        \"deployment_name\": deployment_name,\n        \"custom_config\": custom_config,\n        \"default_model_name\": model_name,\n        \"is_public\": True,\n        \"groups\": [],\n        \"personas\": [],\n        \"model_configurations\": [{\"name\": model_name, \"is_visible\": True}],\n        \"api_key_changed\": bool(api_key),\n        \"custom_config_changed\": bool(custom_config),\n    }\n\n\ndef _ensure_provider_is_default(\n    provider_id: int, model_name: str, admin_user: DATestUser\n) -> None:\n    list_response = requests.get(\n        f\"{API_SERVER_URL}/admin/llm/provider\",\n        headers=admin_user.headers,\n    )\n    list_response.raise_for_status()\n    default_text = list_response.json().get(\"default_text\")\n    assert default_text is not None, \"Expected a default provider after setting default\"\n    assert (\n        default_text.get(\"provider_id\") == provider_id\n    ), f\"Expected provider {provider_id} to be default, found {default_text.get('provider_id')}\"\n    assert (\n        default_text.get(\"model_name\") == model_name\n    ), f\"Expected default model {model_name}, found {default_text.get('model_name')}\"\n\n\ndef _run_chat_assertions(\n    admin_user: DATestUser,\n    search_tool_id: int,\n    provider: str,\n    model_name: str,\n) -> None:\n    last_error: str | None = None\n    # Retry once to reduce transient nightly flakes due provider-side blips.\n    for attempt in range(1, 3):\n        chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n\n        response = ChatSessionManager.send_message(\n            chat_session_id=chat_session.id,\n            message=(\n                \"Use internal_search to search for 'nightly-provider-regression-sentinel', \"\n                \"then summarize the result in one short sentence.\"\n            ),\n            user_performing_action=admin_user,\n            forced_tool_ids=[search_tool_id],\n        )\n\n        if response.error is None:\n            used_internal_search = any(\n                used_tool.tool_name == ToolName.INTERNAL_SEARCH\n                for used_tool in response.used_tools\n            )\n            debug_has_internal_search = any(\n                debug_tool_call.tool_name == \"internal_search\"\n                for debug_tool_call in response.tool_call_debug\n            )\n            has_answer = bool(response.full_message.strip())\n\n            if used_internal_search and debug_has_internal_search and has_answer:\n                return\n\n            last_error = (\n                f\"attempt={attempt} provider={provider} model={model_name} \"\n                f\"used_internal_search={used_internal_search} \"\n                f\"debug_internal_search={debug_has_internal_search} \"\n                f\"has_answer={has_answer} \"\n                f\"tool_call_debug={response.tool_call_debug}\"\n            )\n        else:\n            last_error = f\"attempt={attempt} provider={provider} model={model_name} stream_error={response.error.error}\"\n\n        time.sleep(attempt)\n\n    pytest.fail(f\"Chat/tool-call assertions failed: {last_error}\")\n\n\ndef _create_and_test_provider_for_model(\n    admin_user: DATestUser,\n    config: NightlyProviderConfig,\n    model_name: str,\n    search_tool_id: int,\n) -> None:\n    provider_name = f\"nightly-{config.provider}-{uuid4().hex[:12]}\"\n    resolved_api_base = config.api_base or _default_api_base_for_provider(\n        config.provider\n    )\n\n    provider_payload = _create_provider_payload(\n        provider=config.provider,\n        provider_name=provider_name,\n        model_name=model_name,\n        api_key=config.api_key,\n        api_base=resolved_api_base,\n        api_version=config.api_version,\n        deployment_name=config.deployment_name,\n        custom_config=config.custom_config,\n    )\n\n    test_response = requests.post(\n        f\"{API_SERVER_URL}/admin/llm/test\",\n        headers=admin_user.headers,\n        json=provider_payload,\n    )\n    assert test_response.status_code == 200, (\n        f\"Provider test endpoint failed for provider={config.provider} \"\n        f\"model={model_name}: {test_response.status_code} {test_response.text}\"\n    )\n\n    create_response = requests.put(\n        f\"{API_SERVER_URL}/admin/llm/provider?is_creation=true\",\n        headers=admin_user.headers,\n        json=provider_payload,\n    )\n    assert create_response.status_code == 200, (\n        f\"Provider creation failed for provider={config.provider} \"\n        f\"model={model_name}: {create_response.status_code} {create_response.text}\"\n    )\n    provider_id = create_response.json()[\"id\"]\n\n    try:\n        set_default_response = requests.post(\n            f\"{API_SERVER_URL}/admin/llm/default\",\n            headers=admin_user.headers,\n            json={\"provider_id\": provider_id, \"model_name\": model_name},\n        )\n        assert set_default_response.status_code == 200, (\n            f\"Setting default provider failed for provider={config.provider} \"\n            f\"model={model_name}: {set_default_response.status_code} \"\n            f\"{set_default_response.text}\"\n        )\n\n        _ensure_provider_is_default(\n            provider_id=provider_id, model_name=model_name, admin_user=admin_user\n        )\n        _run_chat_assertions(\n            admin_user=admin_user,\n            search_tool_id=search_tool_id,\n            provider=config.provider,\n            model_name=model_name,\n        )\n    finally:\n        requests.delete(\n            f\"{API_SERVER_URL}/admin/llm/provider/{provider_id}\",\n            headers=admin_user.headers,\n        )\n\n\ndef test_nightly_provider_chat_workflow(admin_user: DATestUser) -> None:\n    \"\"\"Nightly regression test for provider setup + default selection + chat tool calls.\"\"\"\n    _assert_integration_mode_enabled()\n    config = _load_provider_config()\n    _validate_provider_config(config)\n\n    _seed_connector_for_search_tool(admin_user)\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n\n    failures: list[str] = []\n    for model_name in config.model_names:\n        try:\n            _create_and_test_provider_for_model(\n                admin_user=admin_user,\n                config=config,\n                model_name=model_name,\n                search_tool_id=search_tool_id,\n            )\n        except BaseException as exc:\n            if isinstance(exc, (KeyboardInterrupt, SystemExit)):\n                raise\n            failures.append(\n                f\"provider={config.provider} model={model_name} error={type(exc).__name__}: {exc}\"\n            )\n\n    if failures:\n        pytest.fail(\"Nightly provider chat failures:\\n\" + \"\\n\".join(failures))\n"
  },
  {
    "path": "backend/tests/integration/tests/llm_workflows/test_tool_policy_enforcement.py",
    "content": "from onyx.configs import app_configs\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.tools.constants import SEARCH_TOOL_ID\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.tool import ToolManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ToolName\n\n\n_DUMMY_OPENAI_API_KEY = \"sk-mock-tool-policy-tests\"\n\n\ndef _assert_integration_mode_enabled() -> None:\n    assert (\n        app_configs.INTEGRATION_TESTS_MODE is True\n    ), \"Integration tests require INTEGRATION_TESTS_MODE=true.\"\n\n\ndef _seed_connector_for_search_tool(admin_user: DATestUser) -> None:\n    # SearchTool is only exposed when at least one non-default connector exists.\n    CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n\ndef _get_internal_search_tool_id(admin_user: DATestUser) -> int:\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    for tool in tools:\n        if tool.in_code_tool_id == SEARCH_TOOL_ID:\n            return tool.id\n    raise AssertionError(\"SearchTool must exist for this test\")\n\n\ndef _ensure_llm_provider(admin_user: DATestUser) -> None:\n    LLMProviderManager.create(\n        user_performing_action=admin_user,\n        api_key=_DUMMY_OPENAI_API_KEY,\n    )\n\n\ndef test_forced_tool_executes_when_available(admin_user: DATestUser) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n    _ensure_llm_provider(admin_user)\n\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n    persona = PersonaManager.create(\n        tool_ids=[search_tool_id], user_performing_action=admin_user\n    )\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id, user_performing_action=admin_user\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"force the search tool\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n        mock_llm_response='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert any(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response.used_tools\n    )\n    assert len(response.tool_call_debug) == 1\n    assert response.tool_call_debug[0].tool_name == \"internal_search\"\n    assert response.tool_call_debug[0].tool_args == {\"queries\": [\"alpha\"]}\n\n\ndef test_forced_tool_rejected_when_not_in_persona_tools(\n    admin_user: DATestUser,\n) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n    _ensure_llm_provider(admin_user)\n\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n    persona = PersonaManager.create(tool_ids=[], user_performing_action=admin_user)\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id, user_performing_action=admin_user\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"try forcing a missing tool\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n    )\n\n    assert response.error is not None\n    assert response.error.error == f\"Forced tool {search_tool_id} not found in tools\"\n    assert response.used_tools == []\n\n\ndef test_allowed_tool_ids_excludes_tools_outside_allowlist(\n    admin_user: DATestUser,\n) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n    _ensure_llm_provider(admin_user)\n\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n    persona = PersonaManager.create(\n        tool_ids=[search_tool_id], user_performing_action=admin_user\n    )\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id, user_performing_action=admin_user\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"attempt tool use with empty allowlist\",\n        user_performing_action=admin_user,\n        allowed_tool_ids=[],\n        mock_llm_response='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"beta\"]}}',\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert response.used_tools == []\n    assert response.tool_call_debug == []\n\n\ndef test_forced_and_allowlist_conflict_returns_validation_error(\n    admin_user: DATestUser,\n) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n    _ensure_llm_provider(admin_user)\n\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n    persona = PersonaManager.create(\n        tool_ids=[search_tool_id], user_performing_action=admin_user\n    )\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id, user_performing_action=admin_user\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"force a tool blocked by allowlist\",\n        user_performing_action=admin_user,\n        allowed_tool_ids=[],\n        forced_tool_ids=[search_tool_id],\n    )\n\n    assert response.error is not None\n    assert response.error.error == f\"Forced tool {search_tool_id} not found in tools\"\n    assert response.used_tools == []\n\n\ndef test_run_search_always_maps_to_forced_search_tool(admin_user: DATestUser) -> None:\n    _assert_integration_mode_enabled()\n    _seed_connector_for_search_tool(admin_user)\n    _ensure_llm_provider(admin_user)\n\n    search_tool_id = _get_internal_search_tool_id(admin_user)\n    persona = PersonaManager.create(\n        tool_ids=[search_tool_id], user_performing_action=admin_user\n    )\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id, user_performing_action=admin_user\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"always run search\",\n        user_performing_action=admin_user,\n        forced_tool_ids=[search_tool_id],\n        mock_llm_response='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"gamma\"]}}',\n    )\n\n    assert response.error is None, f\"Unexpected stream error: {response.error}\"\n    assert any(\n        tool.tool_name == ToolName.INTERNAL_SEARCH for tool in response.used_tools\n    )\n    assert len(response.tool_call_debug) == 1\n    assert response.tool_call_debug[0].tool_name == \"internal_search\"\n    assert response.tool_call_debug[0].tool_args == {\"queries\": [\"gamma\"]}\n"
  },
  {
    "path": "backend/tests/integration/tests/mcp/test_mcp_client_no_auth_flow.py",
    "content": "import os\nimport socket\nimport subprocess\nimport sys\nimport time\nfrom collections.abc import Generator\nfrom pathlib import Path\n\nimport pytest\nimport requests\n\nfrom onyx.db.enums import MCPAuthenticationPerformer\nfrom onyx.db.enums import MCPAuthenticationType\nfrom onyx.db.enums import MCPTransport\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n# TODO: update mcp client tests to use constants in common_utils/constants.py\n# NOTE: the tests for client should be independent of the Onyx MCP server\n# This means the port should probably stay to be 8010/not 8090 the Onyx MCP server port\n# Use MOCK_MCP_SERVER_PORT to avoid conflicts with the real Onyx MCP server port (8090)\nMCP_SERVER_HOST = os.getenv(\"TEST_WEB_HOSTNAME\", \"127.0.0.1\")\nMCP_SERVER_PORT = int(os.getenv(\"MOCK_MCP_SERVER_PORT\", \"8010\"))\nMCP_SERVER_URL = f\"http://{MCP_SERVER_HOST}:{MCP_SERVER_PORT}/mcp\"\nMCP_HELLO_TOOL = \"hello\"\n\nMCP_SERVER_SCRIPT = (\n    Path(__file__).resolve().parents[2]\n    / \"mock_services\"\n    / \"mcp_test_server\"\n    / \"run_mcp_server_no_auth.py\"\n)\n\n\ndef _wait_for_port(\n    host: str,\n    port: int,\n    process: subprocess.Popen[bytes],\n    timeout_seconds: float = 10.0,\n) -> None:\n    start = time.monotonic()\n    while time.monotonic() - start < timeout_seconds:\n        if process.poll() is not None:\n            raise RuntimeError(\"MCP server process exited unexpectedly during startup\")\n\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:\n            sock.settimeout(0.5)\n            try:\n                sock.connect((host, port))\n                return\n            except OSError:\n                time.sleep(0.1)\n\n    raise TimeoutError(\"Timed out waiting for MCP server to accept connections\")\n\n\n@pytest.fixture(scope=\"module\")\ndef mcp_no_auth_server() -> Generator[None, None, None]:\n    process = subprocess.Popen(\n        [sys.executable, str(MCP_SERVER_SCRIPT), str(MCP_SERVER_PORT)],\n        cwd=MCP_SERVER_SCRIPT.parent,\n    )\n\n    try:\n        _wait_for_port(MCP_SERVER_HOST, MCP_SERVER_PORT, process)\n        yield\n    finally:\n        process.terminate()\n        try:\n            process.wait(timeout=5)\n        except subprocess.TimeoutExpired:\n            process.kill()\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef ensure_mcp_server_exists() -> None:\n    if not MCP_SERVER_SCRIPT.exists():\n        raise FileNotFoundError(\n            f\"Expected MCP server script at {MCP_SERVER_SCRIPT}, but it was not found\"\n        )\n\n\ndef test_mcp_client_no_auth_flow(\n    mcp_no_auth_server: None,  # noqa: ARG001\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    # Step a) Create a no-auth MCP server via the admin API\n    create_response = requests.post(\n        f\"{API_SERVER_URL}/admin/mcp/servers/create\",\n        json={\n            \"name\": \"integration-mcp-no-auth\",\n            \"description\": \"Integration test MCP server\",\n            \"server_url\": MCP_SERVER_URL,\n            \"transport\": MCPTransport.STREAMABLE_HTTP.value,\n            \"auth_type\": MCPAuthenticationType.NONE.value,\n            \"auth_performer\": MCPAuthenticationPerformer.ADMIN.value,\n        },\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    create_response.raise_for_status()\n    server_id = create_response.json()[\"server_id\"]\n\n    # Step b) list the server's tools\n    tools_response = requests.get(\n        f\"{API_SERVER_URL}/admin/mcp/server/{server_id}/tools\",\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    tools_response.raise_for_status()\n    tool_entries = tools_response.json()[\"tools\"]\n    assert len(tool_entries) == 101\n\n    # Update server status to CONNECTED\n    status_response = requests.patch(\n        f\"{API_SERVER_URL}/admin/mcp/server/{server_id}/status\",\n        params={\"status\": \"CONNECTED\"},\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    status_response.raise_for_status()\n\n    tools_response = requests.get(\n        f\"{API_SERVER_URL}/admin/mcp/server/{server_id}/db-tools\",\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    tools_response.raise_for_status()\n    tool_entries = tools_response.json()[\"tools\"]\n    hello_tool_entry = next(\n        tool for tool in tool_entries if tool[\"name\"] == MCP_HELLO_TOOL\n    )\n    tool_id = hello_tool_entry[\"id\"]\n\n    # Step c) Create an assistant (persona) with the MCP tool attached\n    persona = PersonaManager.create(\n        name=\"integration-mcp-persona\",\n        description=\"Persona for MCP integration test\",\n        tool_ids=[tool_id],\n        user_performing_action=admin_user,\n    )\n    persona_tools_response = requests.get(\n        f\"{API_SERVER_URL}/persona\",\n        headers=basic_user.headers,\n        cookies=basic_user.cookies,\n    )\n    persona_tools_response.raise_for_status()\n    persona_entries = persona_tools_response.json()\n    persona_entry = next(\n        entry for entry in persona_entries if entry[\"id\"] == persona.id\n    )\n    persona_tool_ids = {tool[\"id\"] for tool in persona_entry[\"tools\"]}\n    assert tool_id in persona_tool_ids\n"
  },
  {
    "path": "backend/tests/integration/tests/mcp/test_mcp_server_auth.py",
    "content": "\"\"\"Integration tests for MCP Server auth delegated to API /me.\"\"\"\n\nimport requests\n\nfrom tests.integration.common_utils.constants import MCP_SERVER_URL\nfrom tests.integration.common_utils.managers.pat import PATManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nSTREAMABLE_HTTP_URL = f\"{MCP_SERVER_URL.rstrip('/')}/?transportType=streamable-http\"\n\n\ndef test_mcp_server_health_check(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Test MCP server health check endpoint.\"\"\"\n    response = requests.get(f\"{MCP_SERVER_URL}/health\", timeout=10)\n    assert response.status_code == 200\n    assert response.json()[\"status\"] == \"healthy\"\n    assert response.json()[\"service\"] == \"mcp_server\"\n\n\ndef test_mcp_server_auth_missing_token(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Test MCP server rejects requests without credentials.\"\"\"\n    response = requests.post(STREAMABLE_HTTP_URL)\n    assert response.status_code == 401\n\n\ndef test_mcp_server_auth_invalid_token(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Test MCP server rejects requests with an invalid bearer token.\"\"\"\n    response = requests.post(\n        STREAMABLE_HTTP_URL,\n        headers={\"Authorization\": \"Bearer invalid-token\"},\n        json={\"jsonrpc\": \"2.0\", \"method\": \"initialize\", \"id\": 1},\n    )\n    assert response.status_code == 401\n\n\ndef test_mcp_server_auth_valid_token(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test MCP server accepts requests with a valid bearer token.\"\"\"\n    pat = PATManager.create(\n        name=\"Test MCP Token\",\n        expiration_days=7,\n        user_performing_action=admin_user,\n    )\n    access_token = pat.token\n\n    # Test connection with MCP protocol request\n    response = requests.post(\n        STREAMABLE_HTTP_URL,\n        headers={\n            \"Authorization\": f\"Bearer {access_token}\",\n            \"Content-Type\": \"application/json\",\n            \"Accept\": \"application/json\",\n            \"MCP-Protocol-Version\": \"2025-03-26\",\n        },\n        json={\"jsonrpc\": \"2.0\", \"method\": \"initialize\", \"id\": 1},\n    )\n\n    # Should be authenticated (may return MCP protocol response or error)\n    # 200 = valid MCP protocol response\n    # 400 = valid protocol error (authenticated but bad request)\n    assert response.status_code in [200, 400]\n"
  },
  {
    "path": "backend/tests/integration/tests/mcp/test_mcp_server_search.py",
    "content": "\"\"\"Integration tests covering MCP document search flows.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport json\nimport os\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport pytest\nfrom mcp import ClientSession\nfrom mcp.client.streamable_http import streamablehttp_client\nfrom mcp.types import CallToolResult\nfrom mcp.types import TextContent\n\nfrom onyx.db.enums import AccessType\nfrom tests.integration.common_utils.constants import MCP_SERVER_URL\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.pat import PATManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestCCPair\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n# Constants\nMCP_SEARCH_TOOL = \"search_indexed_documents\"\nINDEXED_SOURCES_RESOURCE_URI = \"resource://indexed_sources\"\nDEFAULT_SEARCH_LIMIT = 5\nSTREAMABLE_HTTP_URL = f\"{MCP_SERVER_URL.rstrip('/')}/?transportType=streamable-http\"\n\n\ndef _run_with_mcp_session(\n    headers: dict[str, str],\n    action: Callable[[ClientSession], Awaitable[Any]],\n) -> Any:\n    \"\"\"Run an async action with an MCP client session.\"\"\"\n\n    async def _runner() -> Any:\n        async with streamablehttp_client(STREAMABLE_HTTP_URL, headers=headers) as (\n            read,\n            write,\n            _,\n        ):\n            async with ClientSession(read, write) as session:\n                return await action(session)\n\n    return asyncio.run(_runner())\n\n\ndef _extract_tool_payload(result: CallToolResult) -> dict[str, Any]:\n    \"\"\"Extract JSON payload from MCP tool result.\"\"\"\n    if result.isError:\n        raise AssertionError(f\"MCP tool returned error: {result}\")\n\n    text_blocks = [\n        block.text\n        for block in result.content\n        if isinstance(block, TextContent) and block.text\n    ]\n    if not text_blocks:\n        raise AssertionError(\"Expected textual content from MCP tool result\")\n\n    return json.loads(text_blocks[-1])\n\n\ndef _call_search_tool(\n    headers: dict[str, str], query: str, limit: int = DEFAULT_SEARCH_LIMIT\n) -> CallToolResult:\n    \"\"\"Call the search_indexed_documents tool via MCP.\"\"\"\n\n    async def _action(session: ClientSession) -> CallToolResult:\n        await session.initialize()\n        return await session.call_tool(\n            MCP_SEARCH_TOOL,\n            {\n                \"query\": query,\n                \"limit\": limit,\n            },\n        )\n\n    return _run_with_mcp_session(headers, _action)\n\n\ndef _auth_headers(user: DATestUser, name: str) -> dict[str, str]:\n    \"\"\"Create authorization headers with a PAT token.\"\"\"\n    pat = PATManager.create(\n        name=name,\n        expiration_days=7,\n        user_performing_action=user,\n    )\n    return {\"Authorization\": f\"Bearer {pat.token}\"}\n\n\ndef _seed_document_and_wait_for_indexing(\n    cc_pair: DATestCCPair,\n    content: str,\n    api_key: DATestAPIKey,\n    user_performing_action: DATestUser,\n) -> None:\n    \"\"\"Seed a document and wait for indexing to complete.\"\"\"\n    before = datetime.now(timezone.utc)\n    DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=content,\n        api_key=api_key,\n    )\n    CCPairManager.wait_for_indexing_completion(\n        cc_pair=cc_pair,\n        after=before,\n        user_performing_action=user_performing_action,\n    )\n\n\ndef test_mcp_document_search_flow(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test the complete MCP search flow: initialization, resources, tools, and search.\"\"\"\n    # LLM provider is required for the document-search endpoint\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)\n\n    doc_text = \"MCP happy path search document\"\n    _seed_document_and_wait_for_indexing(\n        cc_pair=cc_pair,\n        content=doc_text,\n        api_key=api_key,\n        user_performing_action=admin_user,\n    )\n\n    headers = _auth_headers(admin_user, name=\"mcp-search-flow\")\n\n    async def _full_flow(session: ClientSession) -> Any:\n        await session.initialize()\n        resources = await session.list_resources()\n        tools = await session.list_tools()\n        search_result = await session.call_tool(\n            MCP_SEARCH_TOOL,\n            {\n                \"query\": doc_text,\n                \"limit\": DEFAULT_SEARCH_LIMIT,\n            },\n        )\n        return resources, tools, search_result\n\n    resources_result, tools_result, search_result = _run_with_mcp_session(\n        headers, _full_flow\n    )\n\n    # Verify resources are available\n    resource_uris = {str(resource.uri) for resource in resources_result.resources}\n    assert INDEXED_SOURCES_RESOURCE_URI in resource_uris\n\n    # Verify tools are available\n    tool_names = {tool.name for tool in tools_result.tools}\n    assert MCP_SEARCH_TOOL in tool_names\n\n    # Verify search results\n    payload = _extract_tool_payload(search_result)\n    assert payload[\"query\"] == doc_text\n    assert payload[\"total_results\"] >= 1\n    assert isinstance(payload[\"documents\"], list)\n    assert len(payload[\"documents\"]) > 0\n    assert any(doc_text in (doc.get(\"content\") or \"\") for doc in payload[\"documents\"])\n\n    # Verify document structure\n    for doc in payload[\"documents\"]:\n        assert isinstance(doc, dict)\n        # Verify expected fields exist (may be None)\n        assert \"content\" in doc\n        assert \"semantic_identifier\" in doc\n        assert \"source_type\" in doc\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group permissions are Enterprise-only\",\n)\ndef test_mcp_search_respects_acl_filters(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that search respects ACL filters - privileged users can access, others cannot.\"\"\"\n    # LLM provider is required for the document-search endpoint\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    user_without_access = UserManager.create(name=\"mcp-acl-user-a\")\n    privileged_user = UserManager.create(name=\"mcp-acl-user-b\")\n\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    restricted_cc_pair = CCPairManager.create_from_scratch(\n        access_type=AccessType.PRIVATE,\n        user_performing_action=admin_user,\n    )\n\n    user_group = UserGroupManager.create(\n        user_ids=[privileged_user.id],\n        cc_pair_ids=[restricted_cc_pair.id],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_performing_action=admin_user, user_groups_to_check=[user_group]\n    )\n\n    restricted_doc_content = \"MCP restricted knowledge base document\"\n    _seed_document_and_wait_for_indexing(\n        cc_pair=restricted_cc_pair,\n        content=restricted_doc_content,\n        api_key=api_key,\n        user_performing_action=admin_user,\n    )\n\n    privileged_headers = _auth_headers(privileged_user, \"mcp-acl-allowed\")\n    restricted_headers = _auth_headers(user_without_access, \"mcp-acl-blocked\")\n\n    # Privileged user should find the document\n    allowed_result = _call_search_tool(privileged_headers, restricted_doc_content)\n    allowed_payload = _extract_tool_payload(allowed_result)\n    assert allowed_payload[\"total_results\"] >= 1\n    assert any(\n        restricted_doc_content in (doc.get(\"content\") or \"\")\n        for doc in allowed_payload[\"documents\"]\n    )\n\n    # User without access should not find the document\n    blocked_result = _call_search_tool(restricted_headers, restricted_doc_content)\n    blocked_payload = _extract_tool_payload(blocked_result)\n    assert blocked_payload[\"total_results\"] == 0\n    assert blocked_payload[\"documents\"] == []\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/conftest.py",
    "content": "\"\"\"\npytest-alembic configuration for testing Alembic migrations.\n\nThis module provides fixtures required by pytest-alembic to test the main\nschema migrations (alembic). For alembic_tenants, see test_alembic_tenants.py.\n\nUsage:\n    Run all built-in pytest-alembic tests:\n        pytest tests/integration/tests/migrations/test_alembic_main.py -v\n\nSee: https://pytest-alembic.readthedocs.io/en/latest/\n\"\"\"\n\nfrom collections.abc import Generator\nfrom typing import Any\n\nimport pytest\nfrom sqlalchemy import create_engine\nfrom sqlalchemy import text\nfrom sqlalchemy.engine import Engine\n\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.sql_engine import SYNC_DB_API\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA\n\n\ndef _create_sync_engine() -> Engine:\n    \"\"\"Create a synchronous SQLAlchemy engine for pytest-alembic.\"\"\"\n    conn_str = build_connection_string(\n        db=\"postgres\",\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n        db_api=SYNC_DB_API,\n    )\n    return create_engine(conn_str)\n\n\n@pytest.fixture\ndef alembic_config() -> dict[str, Any]:\n    \"\"\"\n    Configure pytest-alembic for the main schema migrations.\n\n    Returns pytest-alembic configuration options.\n    See: https://pytest-alembic.readthedocs.io/en/latest/setup.html\n    \"\"\"\n    return {\n        \"file\": \"alembic.ini\",\n        \"script_location\": \"alembic\",\n        # Pass additional attributes to the alembic config\n        # These will be available in env.py via context.config.attributes\n        \"attributes\": {\n            \"schema_name\": POSTGRES_DEFAULT_SCHEMA,\n        },\n    }\n\n\n@pytest.fixture\ndef alembic_engine() -> Generator[Engine, None, None]:\n    \"\"\"\n    Provide a synchronous SQLAlchemy engine for pytest-alembic.\n\n    pytest-alembic requires a synchronous engine to run migrations.\n    The engine is configured to connect to the test database.\n\n    Note: pytest-alembic will internally perform commits, so ensure\n    the database is in an appropriate state before running tests.\n    \"\"\"\n    engine = _create_sync_engine()\n\n    # Ensure the default schema exists\n    with engine.connect() as conn:\n        conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS \"{POSTGRES_DEFAULT_SCHEMA}\"'))\n        conn.commit()\n\n    yield engine\n\n    engine.dispose()\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/test_alembic_main.py",
    "content": "\"\"\"\npytest-alembic tests for the main schema migrations.\n\nThese tests use pytest-alembic to verify that alembic migrations are correct.\nThe tests cover:\n- Single head revision (no diverged migration history)\n- Upgrade path from base to head\n- Up/down consistency (all downgrades succeed)\n\nUsage:\n    pytest tests/integration/tests/migrations/test_alembic_main.py -v\n\nSee: https://github.com/schireson/pytest-alembic\n\"\"\"\n\nfrom pytest_alembic.tests import test_single_head_revision  # type: ignore[import-not-found,unused-ignore]\nfrom pytest_alembic.tests import test_up_down_consistency  # type: ignore[import-not-found,unused-ignore]\nfrom pytest_alembic.tests import test_upgrade  # type: ignore[import-not-found,unused-ignore]\n\n__all__ = [\n    \"test_single_head_revision\",\n    \"test_up_down_consistency\",\n    \"test_upgrade\",\n]\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/test_alembic_tenants.py",
    "content": "\"\"\"\npytest-alembic tests for the tenants/public schema migrations.\n\nThese tests use pytest-alembic to verify that alembic_tenants migrations\nare correct. The alembic_tenants configuration handles migrations for\nthe public schema tables that are shared across tenants.\n\nUsage:\n    pytest tests/integration/tests/migrations/test_alembic_tenants.py -v\n\nSee: https://github.com/schireson/pytest-alembic\n\"\"\"\n\nfrom collections.abc import Generator\nfrom typing import Any\n\nimport pytest\nfrom pytest_alembic import create_alembic_fixture  # type: ignore[import-not-found,unused-ignore]\nfrom pytest_alembic.tests import test_single_head_revision  # type: ignore[import-not-found,unused-ignore]\nfrom pytest_alembic.tests import test_up_down_consistency  # type: ignore[import-not-found,unused-ignore]\nfrom pytest_alembic.tests import test_upgrade  # type: ignore[import-not-found,unused-ignore]\nfrom sqlalchemy import create_engine\nfrom sqlalchemy.engine import Engine\n\nfrom onyx.configs.app_configs import POSTGRES_HOST\nfrom onyx.configs.app_configs import POSTGRES_PASSWORD\nfrom onyx.configs.app_configs import POSTGRES_PORT\nfrom onyx.configs.app_configs import POSTGRES_USER\nfrom onyx.db.engine.sql_engine import build_connection_string\nfrom onyx.db.engine.sql_engine import SYNC_DB_API\n\n\n@pytest.fixture\ndef alembic_config() -> dict[str, Any]:\n    \"\"\"Override alembic_config for tenants configuration.\"\"\"\n    return {\n        \"file\": \"alembic.ini\",\n        \"config_ini_section\": \"schema_private\",\n        \"script_location\": \"alembic_tenants\",\n    }\n\n\n@pytest.fixture\ndef alembic_engine() -> Generator[Engine, None, None]:\n    \"\"\"Override alembic_engine for tenants configuration.\"\"\"\n    conn_str = build_connection_string(\n        db=\"postgres\",\n        user=POSTGRES_USER,\n        password=POSTGRES_PASSWORD,\n        host=POSTGRES_HOST,\n        port=POSTGRES_PORT,\n        db_api=SYNC_DB_API,\n    )\n    engine = create_engine(conn_str)\n    yield engine\n    engine.dispose()\n\n\n# Create a custom alembic fixture for the tenants configuration\nalembic_runner = create_alembic_fixture()\n\n__all__ = [\n    \"test_single_head_revision\",\n    \"test_up_down_consistency\",\n    \"test_upgrade\",\n]\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/test_assistant_consolidation_migration.py",
    "content": "\"\"\"\nIntegration tests for the assistant consolidation migration.\n\nTests the migration from multiple default assistants (Search, General, Art, etc.)\nto a single default Assistant (ID 0) and the associated tool seeding.\n\"\"\"\n\nfrom sqlalchemy import text\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom tests.integration.common_utils.reset import downgrade_postgres\nfrom tests.integration.common_utils.reset import upgrade_postgres\n\n\ndef test_cold_startup_default_assistant() -> None:\n    \"\"\"Test that cold startup creates only the default assistant.\"\"\"\n    # Start fresh at the head revision\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(database=\"postgres\", config_name=\"alembic\", revision=\"head\")\n\n    with get_session_with_current_tenant() as db_session:\n        # Check only default assistant exists\n        result = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, name, builtin_persona, is_featured, deleted\n                FROM persona\n                WHERE builtin_persona = true\n                ORDER BY id\n                \"\"\"\n            )\n        )\n        assistants = result.fetchall()\n\n        # Should have exactly one builtin assistant\n        assert len(assistants) == 1, \"Should have exactly one builtin assistant\"\n        default = assistants[0]\n        assert default[0] == 0, \"Default assistant should have ID 0\"\n        assert default[1] == \"Assistant\", \"Should be named 'Assistant'\"\n        assert default[2] is True, \"Should be builtin\"\n        assert default[3] is True, \"Should be is_featured\"\n        assert default[4] is False, \"Should not be deleted\"\n\n        # Check tools are properly associated\n        result = db_session.execute(\n            text(\n                \"\"\"\n                SELECT t.name, t.display_name\n                FROM tool t\n                JOIN persona__tool pt ON t.id = pt.tool_id\n                WHERE pt.persona_id = 0\n                ORDER BY t.name\n                \"\"\"\n            )\n        )\n        tool_associations = result.fetchall()\n        tool_names = [row[0] for row in tool_associations]\n        tool_display_names = [row[1] for row in tool_associations]\n\n        # Verify all three main tools are attached\n        assert (\n            \"internal_search\" in tool_names\n        ), \"Default assistant should have SearchTool attached\"\n        assert (\n            \"generate_image\" in tool_names\n        ), \"Default assistant should have ImageGenerationTool attached\"\n        assert (\n            \"web_search\" in tool_names\n        ), \"Default assistant should have WebSearchTool attached\"\n        assert (\n            \"read_file\" in tool_names\n        ), \"Default assistant should have FileReaderTool attached\"\n        assert (\n            \"python\" in tool_names\n        ), \"Default assistant should have PythonTool attached\"\n\n        # Also verify by display names for clarity\n        assert (\n            \"Internal Search\" in tool_display_names\n        ), \"Default assistant should have Internal Search tool\"\n        assert (\n            \"Image Generation\" in tool_display_names\n        ), \"Default assistant should have Image Generation tool\"\n        assert (\n            \"Web Search\" in tool_display_names\n        ), \"Default assistant should have Web Search tool\"\n        assert (\n            \"File Reader\" in tool_display_names\n        ), \"Default assistant should have File Reader tool\"\n        assert (\n            \"Code Interpreter\" in tool_display_names\n        ), \"Default assistant should have Code Interpreter tool\"\n\n        # Should have exactly 6 tools\n        assert (\n            len(tool_associations) == 6\n        ), f\"Default assistant should have exactly 6 tools attached, got {len(tool_associations)}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/test_migrations.py",
    "content": "# TODO(rkuo): All of the downgrade_postgres and upgrade_postgres operations here\n# are vulnerable to deadlocks. We could deal with them similar to reset_postgres\n# where we retry out of process\n\nimport json\n\nimport pytest\nfrom sqlalchemy import text\n\nfrom onyx.configs.constants import ANONYMOUS_USER_UUID\nfrom onyx.configs.constants import DEFAULT_BOOST\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom tests.integration.common_utils.reset import downgrade_postgres\nfrom tests.integration.common_utils.reset import upgrade_postgres\n\n\n@pytest.mark.skip(\n    reason=\"Migration test no longer needed - migration has been applied to production\"\n)\ndef test_fix_capitalization_migration() -> None:\n    \"\"\"Test that the be2ab2aa50ee migration correctly lowercases external_user_group_ids\"\"\"\n    # Reset the database and run migrations up to the second to last migration\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        # Upgrade it to the migration before the fix\n        revision=\"369644546676\",\n    )\n\n    # Insert test data with mixed case group IDs\n    test_data = [\n        {\n            \"id\": \"test_doc_1\",\n            \"external_user_group_ids\": [\"Group1\", \"GROUP2\", \"group3\"],\n            \"semantic_id\": \"test_doc_1\",\n            \"boost\": DEFAULT_BOOST,\n            \"hidden\": False,\n            \"from_ingestion_api\": False,\n            \"last_modified\": \"NOW()\",\n        },\n        {\n            \"id\": \"test_doc_2\",\n            \"external_user_group_ids\": [\"UPPER1\", \"upper2\", \"UPPER3\"],\n            \"semantic_id\": \"test_doc_2\",\n            \"boost\": DEFAULT_BOOST,\n            \"hidden\": False,\n            \"from_ingestion_api\": False,\n            \"last_modified\": \"NOW()\",\n        },\n    ]\n\n    # Insert the test data\n    with get_session_with_current_tenant() as db_session:\n        for doc in test_data:\n            db_session.execute(\n                text(\n                    \"\"\"\n                    INSERT INTO document (\n                        id,\n                        external_user_group_ids,\n                        semantic_id,\n                        boost,\n                        hidden,\n                        from_ingestion_api,\n                        last_modified\n                    )\n                    VALUES (\n                        :id,\n                        :group_ids,\n                        :semantic_id,\n                        :boost,\n                        :hidden,\n                        :from_ingestion_api,\n                        :last_modified\n                    )\n                    \"\"\"\n                ),\n                {\n                    \"id\": doc[\"id\"],\n                    \"group_ids\": doc[\"external_user_group_ids\"],\n                    \"semantic_id\": doc[\"semantic_id\"],\n                    \"boost\": doc[\"boost\"],\n                    \"hidden\": doc[\"hidden\"],\n                    \"from_ingestion_api\": doc[\"from_ingestion_api\"],\n                    \"last_modified\": doc[\"last_modified\"],\n                },\n            )\n        db_session.commit()\n\n    # Verify the data was inserted correctly\n    with get_session_with_current_tenant() as db_session:\n        results = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, external_user_group_ids\n                FROM document\n                WHERE id IN ('test_doc_1', 'test_doc_2')\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        # Verify initial state\n        assert len(results) == 2\n        assert results[0].external_user_group_ids == [\"Group1\", \"GROUP2\", \"group3\"]\n        assert results[1].external_user_group_ids == [\"UPPER1\", \"upper2\", \"UPPER3\"]\n\n    # Run migrations again to apply the fix\n    upgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"be2ab2aa50ee\"\n    )\n\n    # Verify the fix was applied\n    with get_session_with_current_tenant() as db_session:\n        results = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, external_user_group_ids\n                FROM document\n                WHERE id IN ('test_doc_1', 'test_doc_2')\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        # Verify all group IDs are lowercase\n        assert len(results) == 2\n        assert results[0].external_user_group_ids == [\"group1\", \"group2\", \"group3\"]\n        assert results[1].external_user_group_ids == [\"upper1\", \"upper2\", \"upper3\"]\n\n\ndef test_jira_connector_migration() -> None:\n    \"\"\"Test that the da42808081e3 migration correctly updates Jira connector configurations\"\"\"\n    # Reset the database and run migrations up to the migration before the Jira connector change\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        # Upgrade it to the migration before the Jira connector change\n        revision=\"f13db29f3101\",\n    )\n\n    # Insert test data with various Jira connector configurations\n    test_data = [\n        {\n            \"id\": 1,\n            \"name\": \"jira_connector_1\",\n            \"source\": \"JIRA\",\n            \"connector_specific_config\": {\n                \"jira_project_url\": \"https://example.atlassian.net/projects/PROJ\",\n                \"comment_email_blacklist\": [\"test@example.com\"],\n                \"batch_size\": 100,\n                \"labels_to_skip\": [\"skip-me\"],\n            },\n        },\n        {\n            \"id\": 2,\n            \"name\": \"jira_connector_2\",\n            \"source\": \"JIRA\",\n            \"connector_specific_config\": {\n                \"jira_project_url\": \"https://other.atlassian.net/projects/OTHER\"\n            },\n        },\n        {\n            \"id\": 3,\n            \"name\": \"jira_connector_3\",\n            \"source\": \"JIRA\",\n            \"connector_specific_config\": {\n                \"jira_project_url\": \"https://example.atlassian.net/projects/TEST\",\n                \"batch_size\": 50,\n            },\n        },\n    ]\n\n    # Insert the test data\n    with get_session_with_current_tenant() as db_session:\n        for connector in test_data:\n            db_session.execute(\n                text(\n                    \"\"\"\n                    INSERT INTO connector (\n                        id,\n                        name,\n                        source,\n                        connector_specific_config\n                    )\n                    VALUES (\n                        :id,\n                        :name,\n                        :source,\n                        :config\n                    )\n                    \"\"\"\n                ),\n                {\n                    \"id\": connector[\"id\"],\n                    \"name\": connector[\"name\"],\n                    \"source\": connector[\"source\"],\n                    \"config\": json.dumps(connector[\"connector_specific_config\"]),\n                },\n            )\n        db_session.commit()\n\n    # Verify the data was inserted correctly\n    with get_session_with_current_tenant() as db_session:\n        results = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, connector_specific_config\n                FROM connector\n                WHERE source = 'JIRA'\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        # Verify initial state\n        assert len(results) == 3\n        assert (\n            results[0].connector_specific_config\n            == test_data[0][\"connector_specific_config\"]\n        )\n        assert (\n            results[1].connector_specific_config\n            == test_data[1][\"connector_specific_config\"]\n        )\n        assert (\n            results[2].connector_specific_config\n            == test_data[2][\"connector_specific_config\"]\n        )\n\n    # Run migrations again to apply the Jira connector change\n    upgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"da42808081e3\"\n    )\n    # Verify the upgrade was applied correctly\n    with get_session_with_current_tenant() as db_session:\n        results = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, connector_specific_config\n                FROM connector\n                WHERE source = 'JIRA'\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        # Verify new format\n        assert len(results) == 3\n\n        # First connector - full config\n        config_0 = results[0].connector_specific_config\n        assert config_0[\"jira_base_url\"] == \"https://example.atlassian.net\"\n        assert config_0[\"project_key\"] == \"PROJ\"\n        assert config_0[\"comment_email_blacklist\"] == [\"test@example.com\"]\n        assert config_0[\"batch_size\"] == 100\n        assert config_0[\"labels_to_skip\"] == [\"skip-me\"]\n\n        # Second connector - minimal config\n        config_1 = results[1].connector_specific_config\n        assert config_1[\"jira_base_url\"] == \"https://other.atlassian.net\"\n        assert config_1[\"project_key\"] == \"OTHER\"\n        assert \"comment_email_blacklist\" not in config_1\n        assert \"batch_size\" not in config_1\n        assert \"labels_to_skip\" not in config_1\n\n        # Third connector - partial config\n        config_2 = results[2].connector_specific_config\n        assert config_2[\"jira_base_url\"] == \"https://example.atlassian.net\"\n        assert config_2[\"project_key\"] == \"TEST\"\n        assert config_2[\"batch_size\"] == 50\n        assert \"comment_email_blacklist\" not in config_2\n        assert \"labels_to_skip\" not in config_2\n\n    # Test downgrade path\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"f13db29f3101\"\n    )\n\n    # Verify the downgrade was applied correctly\n    with get_session_with_current_tenant() as db_session:\n        results = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, connector_specific_config\n                FROM connector\n                WHERE source = 'JIRA'\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        # Verify reverted to old format\n        assert len(results) == 3\n\n        # First connector - full config\n        config_0 = results[0].connector_specific_config\n        assert (\n            config_0[\"jira_project_url\"]\n            == \"https://example.atlassian.net/projects/PROJ\"\n        )\n        assert config_0[\"comment_email_blacklist\"] == [\"test@example.com\"]\n        assert config_0[\"batch_size\"] == 100\n        assert config_0[\"labels_to_skip\"] == [\"skip-me\"]\n\n        # Second connector - minimal config\n        config_1 = results[1].connector_specific_config\n        assert (\n            config_1[\"jira_project_url\"] == \"https://other.atlassian.net/projects/OTHER\"\n        )\n\n        # Third connector - partial config\n        config_2 = results[2].connector_specific_config\n        assert (\n            config_2[\"jira_project_url\"]\n            == \"https://example.atlassian.net/projects/TEST\"\n        )\n        assert config_2[\"batch_size\"] == 50\n\n\ndef test_anonymous_user_migration_dedupes_null_notifications() -> None:\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        revision=\"f7ca3e2f45d9\",\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        db_session.execute(\n            text(\n                \"\"\"\n                INSERT INTO notification (\n                    id,\n                    notif_type,\n                    user_id,\n                    dismissed,\n                    last_shown,\n                    first_shown,\n                    title,\n                    description,\n                    additional_data\n                )\n                VALUES\n                    (\n                        1,\n                        'RELEASE_NOTES',\n                        NULL,\n                        FALSE,\n                        NOW(),\n                        NOW(),\n                        'Onyx v2.10.0 is available!',\n                        'Check out what''s new in v2.10.0',\n                        '{\"version\":\"v2.10.0\",\"link\":\"https://docs.onyx.app/changelog#v2-10-0\"}'::jsonb\n                    ),\n                    (\n                        2,\n                        'RELEASE_NOTES',\n                        NULL,\n                        FALSE,\n                        NOW(),\n                        NOW(),\n                        'Onyx v2.10.0 is available!',\n                        'Check out what''s new in v2.10.0',\n                        '{\"version\":\"v2.10.0\",\"link\":\"https://docs.onyx.app/changelog#v2-10-0\"}'::jsonb\n                    )\n                \"\"\"\n            )\n        )\n        db_session.commit()\n\n    upgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"e7f8a9b0c1d2\"\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        notifications = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, user_id\n                FROM notification\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n        anonymous_user = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, email, role\n                FROM \"user\"\n                WHERE id = :user_id\n                \"\"\"\n            ),\n            {\"user_id\": ANONYMOUS_USER_UUID},\n        ).fetchone()\n\n    assert len(notifications) == 1\n    assert notifications[0].id == 2  # Higher id wins when timestamps are equal\n    assert str(notifications[0].user_id) == ANONYMOUS_USER_UUID\n    assert anonymous_user is not None\n    assert anonymous_user.email == \"anonymous@onyx.app\"\n    assert anonymous_user.role == \"LIMITED\"\n\n\ndef test_anonymous_user_migration_collision_with_existing_anonymous_notification() -> (\n    None\n):\n    \"\"\"Test that a NULL-owned notification that collides with an already-existing\n    anonymous-owned notification is removed during migration.\"\"\"\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        revision=\"f7ca3e2f45d9\",\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        # Create the anonymous user early so we can insert a notification owned by it\n        db_session.execute(\n            text(\n                \"\"\"\n                INSERT INTO \"user\" (id, email, hashed_password, is_active, is_superuser, is_verified, role)\n                VALUES (:id, 'anonymous@onyx.app', '', TRUE, FALSE, TRUE, 'LIMITED')\n                ON CONFLICT (id) DO NOTHING\n                \"\"\"\n            ),\n            {\"id\": ANONYMOUS_USER_UUID},\n        )\n        # Insert an anonymous-owned notification (already migrated in a prior partial run)\n        db_session.execute(\n            text(\n                \"\"\"\n                INSERT INTO notification (\n                    id, notif_type, user_id, dismissed, last_shown, first_shown,\n                    title, description, additional_data\n                )\n                VALUES\n                    (\n                        1, 'RELEASE_NOTES', :user_id, FALSE, NOW(), NOW(),\n                        'Onyx v2.10.0 is available!',\n                        'Check out what''s new in v2.10.0',\n                        '{\"version\":\"v2.10.0\",\"link\":\"https://docs.onyx.app/changelog#v2-10-0\"}'::jsonb\n                    ),\n                    (\n                        2, 'RELEASE_NOTES', NULL, FALSE, NOW(), NOW(),\n                        'Onyx v2.10.0 is available!',\n                        'Check out what''s new in v2.10.0',\n                        '{\"version\":\"v2.10.0\",\"link\":\"https://docs.onyx.app/changelog#v2-10-0\"}'::jsonb\n                    )\n                \"\"\"\n            ),\n            {\"user_id\": ANONYMOUS_USER_UUID},\n        )\n        db_session.commit()\n\n    upgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"e7f8a9b0c1d2\"\n    )\n\n    with get_session_with_current_tenant() as db_session:\n        notifications = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, user_id\n                FROM notification\n                ORDER BY id\n                \"\"\"\n            )\n        ).fetchall()\n\n    # Only the original anonymous-owned notification should remain;\n    # the NULL-owned duplicate should have been deleted\n    assert len(notifications) == 1\n    assert notifications[0].id == 1\n    assert str(notifications[0].user_id) == ANONYMOUS_USER_UUID\n"
  },
  {
    "path": "backend/tests/integration/tests/migrations/test_tool_seeding.py",
    "content": "from pydantic import BaseModel\nfrom sqlalchemy import text\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom tests.integration.common_utils.reset import downgrade_postgres\nfrom tests.integration.common_utils.reset import upgrade_postgres\n\n\nclass ToolSeedingExpectedResult(BaseModel):\n    name: str\n    display_name: str\n    in_code_tool_id: str\n    user_id: str | None\n\n\nEXPECTED_TOOLS = {\n    \"SearchTool\": ToolSeedingExpectedResult(\n        name=\"internal_search\",\n        display_name=\"Internal Search\",\n        in_code_tool_id=\"SearchTool\",\n        user_id=None,\n    ),\n    \"ImageGenerationTool\": ToolSeedingExpectedResult(\n        name=\"generate_image\",\n        display_name=\"Image Generation\",\n        in_code_tool_id=\"ImageGenerationTool\",\n        user_id=None,\n    ),\n    \"WebSearchTool\": ToolSeedingExpectedResult(\n        name=\"web_search\",\n        display_name=\"Web Search\",\n        in_code_tool_id=\"WebSearchTool\",\n        user_id=None,\n    ),\n    \"KnowledgeGraphTool\": ToolSeedingExpectedResult(\n        name=\"run_kg_search\",\n        display_name=\"Knowledge Graph Search\",\n        in_code_tool_id=\"KnowledgeGraphTool\",\n        user_id=None,\n    ),\n    \"PythonTool\": ToolSeedingExpectedResult(\n        name=\"python\",\n        display_name=\"Code Interpreter\",\n        in_code_tool_id=\"PythonTool\",\n        user_id=None,\n    ),\n    \"ResearchAgent\": ToolSeedingExpectedResult(\n        name=\"research_agent\",\n        display_name=\"Research Agent\",\n        in_code_tool_id=\"ResearchAgent\",\n        user_id=None,\n    ),\n    \"FileReaderTool\": ToolSeedingExpectedResult(\n        name=\"read_file\",\n        display_name=\"File Reader\",\n        in_code_tool_id=\"FileReaderTool\",\n        user_id=None,\n    ),\n    \"MemoryTool\": ToolSeedingExpectedResult(\n        name=\"MemoryTool\",\n        display_name=\"Add Memory\",\n        in_code_tool_id=\"MemoryTool\",\n        user_id=None,\n    ),\n}\n\n\ndef test_tool_seeding_migration() -> None:\n    \"\"\"Test that migration from base to head correctly seeds builtin tools.\"\"\"\n    # Start from base and upgrade to just before tool seeding\n    downgrade_postgres(\n        database=\"postgres\", config_name=\"alembic\", revision=\"base\", clear_data=True\n    )\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        revision=\"b7ec9b5b505f\",  # Revision before tool seeding\n    )\n\n    # Verify no tools exist yet\n    with get_session_with_current_tenant() as db_session:\n        result = db_session.execute(text(\"SELECT COUNT(*) FROM tool\"))\n        count = result.scalar()\n        assert count == 0, \"No tools should exist before migration\"\n\n    # Upgrade to head\n    upgrade_postgres(\n        database=\"postgres\",\n        config_name=\"alembic\",\n        revision=\"head\",\n    )\n\n    # Verify tools were created\n    with get_session_with_current_tenant() as db_session:\n        result = db_session.execute(\n            text(\n                \"\"\"\n                SELECT id, name, display_name, description, in_code_tool_id,\n                       user_id\n                FROM tool\n                ORDER BY id\n                \"\"\"\n            )\n        )\n        tools = result.fetchall()\n\n        # Should have all 9 builtin tools\n        assert (\n            len(tools) == 10\n        ), f\"Should have created exactly 9 builtin tools, got {len(tools)}\"\n\n        def validate_tool(expected: ToolSeedingExpectedResult) -> None:\n            tool = next((t for t in tools if t[1] == expected.name), None)\n            assert tool is not None, f\"{expected.name} should exist\"\n            assert (\n                tool[2] == expected.display_name\n            ), f\"{expected.name} display name should be '{expected.display_name}'\"\n            assert (\n                tool[4] == expected.in_code_tool_id\n            ), f\"{expected.name} in_code_tool_id should be '{expected.in_code_tool_id}'\"\n            assert (\n                tool[5] is None\n            ), f\"{expected.name} should not have a user_id (builtin)\"\n\n        # Check SearchTool\n        validate_tool(EXPECTED_TOOLS[\"SearchTool\"])\n\n        # Check ImageGenerationTool\n        validate_tool(EXPECTED_TOOLS[\"ImageGenerationTool\"])\n\n        # Check WebSearchTool\n        validate_tool(EXPECTED_TOOLS[\"WebSearchTool\"])\n\n        # Check KnowledgeGraphTool\n        validate_tool(EXPECTED_TOOLS[\"KnowledgeGraphTool\"])\n\n        # Check PythonTool\n        validate_tool(EXPECTED_TOOLS[\"PythonTool\"])\n\n        # Check ResearchAgent (Deep Research as a tool)\n        validate_tool(EXPECTED_TOOLS[\"ResearchAgent\"])\n\n        # Check FileReaderTool\n        validate_tool(EXPECTED_TOOLS[\"FileReaderTool\"])\n\n        # Check MemoryTool\n        validate_tool(EXPECTED_TOOLS[\"MemoryTool\"])\n"
  },
  {
    "path": "backend/tests/integration/tests/no_vectordb/conftest.py",
    "content": "\"\"\"Fixtures for no-vector-DB integration tests.\n\nThese tests are intended to run against an Onyx deployment started with\nDISABLE_VECTOR_DB=true.  They are automatically **skipped** when the\nserver reports vector_db_enabled=true (i.e. when Vespa is available).\n\"\"\"\n\nimport pytest\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.reset import reset_file_store\nfrom tests.integration.common_utils.reset import reset_postgres\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _server_has_vector_db_disabled() -> bool:\n    \"\"\"Query the running server to check whether DISABLE_VECTOR_DB is set.\"\"\"\n    try:\n        resp = requests.get(\n            f\"{API_SERVER_URL}/settings\",\n            headers=GENERAL_HEADERS,\n        )\n        if resp.ok:\n            return resp.json().get(\"vector_db_enabled\") is False\n    except Exception:\n        pass\n    return False\n\n\n# Skip the entire module when the server has vector DB enabled —\n# these tests only make sense in no-vector-DB deployments.\npytestmark = pytest.mark.skipif(\n    not _server_has_vector_db_disabled(),\n    reason=\"Server is running with vector DB enabled; skipping no-vectordb tests\",\n)\n\n\n@pytest.fixture()\ndef reset() -> None:\n    \"\"\"Reset Postgres and the file store, but skip Vespa (not running).\"\"\"\n    reset_postgres()\n    reset_file_store()\n\n\n@pytest.fixture()\ndef llm_provider(admin_user: DATestUser) -> DATestLLMProvider:\n    \"\"\"Ensure an LLM provider exists for the test session.\"\"\"\n    return LLMProviderManager.create(user_performing_action=admin_user)\n"
  },
  {
    "path": "backend/tests/integration/tests/no_vectordb/test_no_vectordb_chat.py",
    "content": "\"\"\"Integration tests for chat in no-vector-DB mode.\n\nCovers:\n- Uploading a file to a project, sending a chat message, and verifying the LLM\n  receives the file content (small project — fits in context window).\n- Creating a persona with user_files and verifying chat works.\n- Verifying that persona creation with document_sets / hierarchy_nodes /\n  document_ids is rejected with a 400.\n\"\"\"\n\nimport io\nimport time\n\nimport requests\n\nfrom onyx.db.enums import UserFileStatus\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.project import ProjectManager\nfrom tests.integration.common_utils.managers.tool import ToolManager\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nFILE_READER_TOOL_ID = \"FileReaderTool\"\n\n\ndef _wait_for_file_processed(\n    project_id: int,\n    user: DATestUser,\n    timeout: int = 30,\n) -> None:\n    \"\"\"Poll until all files in the project reach COMPLETED status.\"\"\"\n    deadline = time.time() + timeout\n    while time.time() < deadline:\n        files = ProjectManager.get_project_files(project_id, user)\n        if files and all(f.status == UserFileStatus.COMPLETED for f in files):\n            return\n        time.sleep(1)\n    raise TimeoutError(\n        f\"Files in project {project_id} did not reach COMPLETED within {timeout}s\"\n    )\n\n\n# ------------------------------------------------------------------\n# Small-project chat — file content loaded directly into context\n# ------------------------------------------------------------------\n\n\ndef test_chat_with_small_project_file(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Upload a small text file to a project and send a chat message.\n\n    The file is small enough to fit in the LLM context window, so the LLM\n    should see the file content directly and be able to answer questions\n    about it.\n    \"\"\"\n    project = ProjectManager.create(\n        name=\"test-no-vectordb-small\", user_performing_action=admin_user\n    )\n\n    file_content = b\"The secret code is PINEAPPLE-42.\"\n    ProjectManager.upload_files(\n        project_id=project.id,\n        files=[(\"secret.txt\", file_content)],\n        user_performing_action=admin_user,\n    )\n\n    _wait_for_file_processed(project.id, admin_user)\n\n    # Create a chat session associated with the project's default persona\n    chat_session = ChatSessionManager.create(\n        persona_id=0,\n        description=\"no-vectordb small project test\",\n        user_performing_action=admin_user,\n    )\n\n    # Link the chat session to the project\n    resp = requests.post(\n        f\"{API_SERVER_URL}/user/projects/{project.id}/move_chat_session\",\n        json={\"chat_session_id\": str(chat_session.id)},\n        headers=admin_user.headers,\n    )\n    resp.raise_for_status()\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What is the secret code in the file?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None, f\"Chat returned an error: {response.error}\"\n    assert (\n        \"PINEAPPLE-42\" in response.full_message\n    ), f\"Expected the LLM to reference the file content. Got: {response.full_message}\"\n\n\n# ------------------------------------------------------------------\n# Persona with user_files — should work in no-vector-DB mode\n# ------------------------------------------------------------------\n\n\ndef test_persona_with_user_files_chat(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Create a persona with attached user files and verify chat works.\"\"\"\n    # Upload a file first\n    file_content = b\"Quarterly revenue was $42 million.\"\n    file_obj = io.BytesIO(file_content)\n    file_descriptors, error = FileManager.upload_files(\n        files=[(\"revenue.txt\", file_obj)],\n        user_performing_action=admin_user,\n    )\n    assert not error, f\"File upload failed: {error}\"\n    assert len(file_descriptors) > 0\n\n    user_file_id = file_descriptors[0].get(\"user_file_id\")\n    assert user_file_id, \"Expected user_file_id in upload response\"\n\n    # Wait for the file to be processed\n    deadline = time.time() + 30\n    while time.time() < deadline:\n        time.sleep(1)\n        # Check via file fetch — if it succeeds, the file is ready\n        try:\n            FileManager.fetch_uploaded_file(\n                file_descriptors[0][\"id\"],\n                admin_user,\n            )\n            break\n        except Exception:\n            continue\n\n    # Find the FileReaderTool ID from available tools\n    tools = ToolManager.list_tools(user_performing_action=admin_user)\n    file_reader_tool = next(\n        (t for t in tools if t.in_code_tool_id == FILE_READER_TOOL_ID), None\n    )\n    assert (\n        file_reader_tool is not None\n    ), \"FileReaderTool should be registered as a built-in tool\"\n\n    # Create a persona with the user file attached\n    persona = PersonaManager.create(\n        name=\"no-vectordb-persona-test\",\n        description=\"Test persona for no-vectordb mode\",\n        system_prompt=\"You are a helpful assistant. Answer questions using the available tools and files.\",\n        task_prompt=\"\",\n        user_file_ids=[user_file_id],\n        tool_ids=[file_reader_tool.id],\n        user_performing_action=admin_user,\n    )\n\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id,\n        description=\"no-vectordb persona test\",\n        user_performing_action=admin_user,\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What was the quarterly revenue?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None, f\"Chat returned an error: {response.error}\"\n    # The LLM should be able to answer about the revenue (either from direct\n    # context injection or via the FileReaderTool)\n    assert (\n        \"$42 million\" in response.full_message or \"42\" in response.full_message\n    ), f\"Expected the LLM to reference the file content. Got: {response.full_message}\"\n\n\n# ------------------------------------------------------------------\n# Persona validation — vector-DB knowledge types rejected\n# ------------------------------------------------------------------\n\n\ndef _base_persona_body(**overrides: object) -> dict:\n    \"\"\"Build a valid PersonaUpsertRequest body with sensible defaults.\n\n    Callers override only the fields under test so that Pydantic validation\n    passes and the vector-DB guard (``_validate_vector_db_knowledge``) is\n    the one that rejects the request.\n    \"\"\"\n    body: dict = {\n        \"name\": \"should-fail\",\n        \"description\": \"test\",\n        \"system_prompt\": \"test\",\n        \"task_prompt\": \"\",\n        \"is_public\": True,\n        \"datetime_aware\": False,\n        \"document_set_ids\": [],\n        \"tool_ids\": [],\n        \"users\": [],\n        \"groups\": [],\n    }\n    body.update(overrides)\n    return body\n\n\ndef test_persona_rejects_document_sets_without_vector_db(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Creating a persona with document_set_ids should fail with 400.\"\"\"\n    resp = requests.post(\n        f\"{API_SERVER_URL}/persona\",\n        json=_base_persona_body(document_set_ids=[1]),\n        headers=admin_user.headers,\n    )\n    assert (\n        resp.status_code == 400\n    ), f\"Expected 400 for document_set_ids, got {resp.status_code}: {resp.text}\"\n\n\ndef test_persona_rejects_document_ids_without_vector_db(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Creating a persona with document_ids should fail with 400.\"\"\"\n    resp = requests.post(\n        f\"{API_SERVER_URL}/persona\",\n        json=_base_persona_body(document_ids=[\"fake-doc-id\"]),\n        headers=admin_user.headers,\n    )\n    assert (\n        resp.status_code == 400\n    ), f\"Expected 400 for document_ids, got {resp.status_code}: {resp.text}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/no_vectordb/test_no_vectordb_endpoints.py",
    "content": "\"\"\"Integration tests for endpoint gating when DISABLE_VECTOR_DB is set.\n\nVector-DB-dependent endpoints should return HTTP 501.\nNon-dependent endpoints (settings, document sets, chat, etc.) should work\nnormally.\n\"\"\"\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n# ------------------------------------------------------------------\n# Helper\n# ------------------------------------------------------------------\n\n\ndef _headers(user: DATestUser) -> dict[str, str]:\n    return user.headers if user else {\"Content-Type\": \"application/json\"}\n\n\n# ------------------------------------------------------------------\n# Gated endpoints — should return 501\n# ------------------------------------------------------------------\n\n\ndef test_admin_search_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.post(\n        f\"{API_SERVER_URL}/admin/search\",\n        json={\"query\": \"test\", \"filters\": {}},\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501, f\"Expected 501, got {resp.status_code}\"\n\n\ndef test_document_size_info_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.get(\n        f\"{API_SERVER_URL}/document/document-size-info\",\n        params={\"document_id\": \"fake-doc\"},\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_document_chunk_info_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.get(\n        f\"{API_SERVER_URL}/document/chunk-info\",\n        params={\"document_id\": \"fake-doc\"},\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_set_new_search_settings_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.post(\n        f\"{API_SERVER_URL}/search-settings/set-new-search-settings\",\n        json={},\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_cancel_new_embedding_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.post(\n        f\"{API_SERVER_URL}/search-settings/cancel-new-embedding\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_connector_router_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"The entire /manage router is gated — any connector endpoint should 501.\"\"\"\n    resp = requests.get(\n        f\"{API_SERVER_URL}/manage/connector\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_ingestion_post_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.post(\n        f\"{API_SERVER_URL}/onyx-api/ingestion\",\n        json={\"document\": {}},\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\ndef test_ingestion_delete_returns_501(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.delete(\n        f\"{API_SERVER_URL}/onyx-api/ingestion/fake-doc-id\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 501\n\n\n# ------------------------------------------------------------------\n# Non-gated endpoints — should work (2xx)\n# ------------------------------------------------------------------\n\n\ndef test_settings_endpoint_works(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.get(\n        f\"{API_SERVER_URL}/settings\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 200\n    data = resp.json()\n    assert data[\"vector_db_enabled\"] is False\n\n\ndef test_document_set_list_works(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.get(\n        f\"{API_SERVER_URL}/manage/document-set\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 200\n\n\ndef test_persona_list_works(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    resp = requests.get(\n        f\"{API_SERVER_URL}/admin/persona\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 200\n\n\ndef test_tool_list_works(\n    reset: None, admin_user: DATestUser  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    resp = requests.get(\n        f\"{API_SERVER_URL}/tool\",\n        headers=_headers(admin_user),\n    )\n    assert resp.status_code == 200\n    tools = resp.json()\n    tool_ids = {t[\"in_code_tool_id\"] for t in tools if t.get(\"in_code_tool_id\")}\n    assert (\n        \"FileReaderTool\" in tool_ids\n    ), \"FileReaderTool should be registered as a built-in tool\"\n"
  },
  {
    "path": "backend/tests/integration/tests/no_vectordb/test_no_vectordb_file_lifecycle.py",
    "content": "\"\"\"Integration test for the full user-file lifecycle in no-vector-DB mode.\n\nCovers: upload → COMPLETED → unlink from project → delete → gone.\n\nThe entire lifecycle is handled by FastAPI BackgroundTasks (no Celery workers\nneeded).  The conftest-level ``pytestmark`` ensures these tests are skipped\nwhen the server is running with vector DB enabled.\n\"\"\"\n\nimport time\nfrom uuid import UUID\n\nimport requests\n\nfrom onyx.db.enums import UserFileStatus\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.project import ProjectManager\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\nPOLL_INTERVAL_SECONDS = 1\nPOLL_TIMEOUT_SECONDS = 30\n\n\ndef _poll_file_status(\n    file_id: UUID,\n    user: DATestUser,\n    target_status: UserFileStatus,\n    timeout: int = POLL_TIMEOUT_SECONDS,\n) -> None:\n    \"\"\"Poll GET /user/projects/file/{file_id} until the file reaches *target_status*.\"\"\"\n    deadline = time.time() + timeout\n    while time.time() < deadline:\n        resp = requests.get(\n            f\"{API_SERVER_URL}/user/projects/file/{file_id}\",\n            headers=user.headers,\n        )\n        if resp.ok:\n            status = resp.json().get(\"status\")\n            if status == target_status.value:\n                return\n        time.sleep(POLL_INTERVAL_SECONDS)\n    raise TimeoutError(\n        f\"File {file_id} did not reach {target_status.value} within {timeout}s\"\n    )\n\n\ndef _file_is_gone(file_id: UUID, user: DATestUser, timeout: int = 15) -> None:\n    \"\"\"Poll until GET /user/projects/file/{file_id} returns 404.\"\"\"\n    deadline = time.time() + timeout\n    while time.time() < deadline:\n        resp = requests.get(\n            f\"{API_SERVER_URL}/user/projects/file/{file_id}\",\n            headers=user.headers,\n        )\n        if resp.status_code == 404:\n            return\n        time.sleep(POLL_INTERVAL_SECONDS)\n    raise TimeoutError(\n        f\"File {file_id} still accessible after {timeout}s (expected 404)\"\n    )\n\n\ndef test_file_upload_process_delete_lifecycle(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Full lifecycle: upload → COMPLETED → unlink → delete → 404.\n\n    Validates that the API server handles all background processing\n    (via FastAPI BackgroundTasks) without any Celery workers running.\n    \"\"\"\n    project = ProjectManager.create(\n        name=\"lifecycle-test\", user_performing_action=admin_user\n    )\n\n    file_content = b\"Integration test file content for lifecycle verification.\"\n    upload_result = ProjectManager.upload_files(\n        project_id=project.id,\n        files=[(\"lifecycle.txt\", file_content)],\n        user_performing_action=admin_user,\n    )\n    assert upload_result.user_files, \"Expected at least one file in upload response\"\n\n    user_file = upload_result.user_files[0]\n    file_id = user_file.id\n\n    _poll_file_status(file_id, admin_user, UserFileStatus.COMPLETED)\n\n    project_files = ProjectManager.get_project_files(project.id, admin_user)\n    assert any(\n        f.id == file_id for f in project_files\n    ), \"File should be listed in project files after processing\"\n\n    # Unlink the file from the project so the delete endpoint will proceed\n    unlink_resp = requests.delete(\n        f\"{API_SERVER_URL}/user/projects/{project.id}/files/{file_id}\",\n        headers=admin_user.headers,\n    )\n    assert (\n        unlink_resp.status_code == 204\n    ), f\"Expected 204 on unlink, got {unlink_resp.status_code}: {unlink_resp.text}\"\n\n    delete_resp = requests.delete(\n        f\"{API_SERVER_URL}/user/projects/file/{file_id}\",\n        headers=admin_user.headers,\n    )\n    assert (\n        delete_resp.ok\n    ), f\"Delete request failed: {delete_resp.status_code} {delete_resp.text}\"\n    body = delete_resp.json()\n    assert (\n        body[\"has_associations\"] is False\n    ), f\"File still has associations after unlink: {body}\"\n\n    _file_is_gone(file_id, admin_user)\n\n    project_files_after = ProjectManager.get_project_files(project.id, admin_user)\n    assert not any(\n        f.id == file_id for f in project_files_after\n    ), \"Deleted file should not appear in project files\"\n\n\ndef test_delete_blocked_while_associated(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Deleting a file that still belongs to a project should return\n    has_associations=True without actually deleting the file.\"\"\"\n    project = ProjectManager.create(\n        name=\"assoc-test\", user_performing_action=admin_user\n    )\n\n    upload_result = ProjectManager.upload_files(\n        project_id=project.id,\n        files=[(\"assoc.txt\", b\"associated file content\")],\n        user_performing_action=admin_user,\n    )\n    file_id = upload_result.user_files[0].id\n\n    _poll_file_status(file_id, admin_user, UserFileStatus.COMPLETED)\n\n    # Attempt to delete while still linked\n    delete_resp = requests.delete(\n        f\"{API_SERVER_URL}/user/projects/file/{file_id}\",\n        headers=admin_user.headers,\n    )\n    assert delete_resp.ok\n    body = delete_resp.json()\n    assert body[\"has_associations\"] is True, \"Should report existing associations\"\n    assert project.name in body[\"project_names\"]\n\n    # File should still be accessible\n    get_resp = requests.get(\n        f\"{API_SERVER_URL}/user/projects/file/{file_id}\",\n        headers=admin_user.headers,\n    )\n    assert get_resp.status_code == 200, \"File should still exist after blocked delete\"\n"
  },
  {
    "path": "backend/tests/integration/tests/opensearch_migration/test_opensearch_migration_api.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_migration_status_returns_defaults_when_no_record(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"When no migration record exists, status should return zeros/nulls.\"\"\"\n    # Under test.\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/status\",\n        headers=admin_user.headers,\n    )\n\n    # Postcondition.\n    assert response.status_code == 200\n    data = response.json()\n    assert data[\"total_chunks_migrated\"] == 0\n    assert data[\"created_at\"] is None\n    assert data[\"migration_completed_at\"] is None\n    assert data[\"approx_chunk_count_in_vespa\"] is None\n\n\ndef test_retrieval_status_returns_false_when_no_record(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"When no migration record exists, retrieval should default to disabled.\"\"\"\n    # Under test.\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        headers=admin_user.headers,\n    )\n\n    # Postcondition.\n    assert response.status_code == 200\n    data = response.json()\n    assert data[\"enable_opensearch_retrieval\"] is False\n\n\ndef test_set_and_get_retrieval_status(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Setting retrieval to True should persist and be readable.\"\"\"\n    # Under test.\n    # Enable retrieval.\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        json={\"enable_opensearch_retrieval\": True},\n        headers=admin_user.headers,\n    )\n\n    # Postcondition.\n    assert response.status_code == 200\n    assert response.json()[\"enable_opensearch_retrieval\"] is True\n    # Verify it persisted.\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    assert response.json()[\"enable_opensearch_retrieval\"] is True\n\n    # Under test.\n    # Disable retrieval.\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        json={\"enable_opensearch_retrieval\": False},\n        headers=admin_user.headers,\n    )\n\n    # Postcondition.\n    assert response.status_code == 200\n    assert response.json()[\"enable_opensearch_retrieval\"] is False\n    # Verify it persisted.\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200\n    assert response.json()[\"enable_opensearch_retrieval\"] is False\n\n\ndef test_migration_status_after_record_created(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"After toggling retrieval (which creates the record), status should\n    return a valid created_at timestamp.\"\"\"\n    # Precondition.\n    # Create the record by setting retrieval.\n    requests.put(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        json={\"enable_opensearch_retrieval\": False},\n        headers=admin_user.headers,\n    )\n\n    # Under test.\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/status\",\n        headers=admin_user.headers,\n    )\n\n    # Postcondition.\n    assert response.status_code == 200\n    data = response.json()\n    assert data[\"total_chunks_migrated\"] == 0\n    assert data[\"created_at\"] is not None\n    assert data[\"migration_completed_at\"] is None\n    assert data[\"approx_chunk_count_in_vespa\"] is None\n\n\ndef test_endpoints_require_admin(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"Endpoints should reject unauthenticated requests.\"\"\"\n    for url in [\n        f\"{API_SERVER_URL}/admin/opensearch-migration/status\",\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n    ]:\n        response = requests.get(url)\n        assert response.status_code == 403\n\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/opensearch-migration/retrieval\",\n        json={\"enable_opensearch_retrieval\": True},\n    )\n    assert response.status_code == 403\n"
  },
  {
    "path": "backend/tests/integration/tests/pat/test_pat_api.py",
    "content": "\"\"\"\nIntegration tests for Personal Access Token (PAT) API.\n\nTest Suite:\n1. test_pat_lifecycle_happy_path - Complete PAT lifecycle (create, auth, revoke)\n2. test_pat_user_isolation_and_authentication - User authentication and multi-user isolation\n3. test_pat_expiration_flow - Expiration logic (end-of-day UTC, never-expiring)\n4. test_pat_validation_errors - Input validation and error handling\n5. test_pat_sorting_and_last_used - Sorting and last_used_at tracking\n6. test_pat_role_based_access_control - Admin vs Basic vs Curator permissions\n\"\"\"\n\nimport time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport requests\n\nfrom onyx.auth.schemas import UserRole\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.pat import PATManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_pat_lifecycle_happy_path(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Complete PAT lifecycle: create, authenticate, revoke.\"\"\"\n    user: DATestUser = UserManager.create(name=\"pat_user\")\n\n    # Create PAT\n    pat = PATManager.create(\n        name=\"My Integration Token\",\n        expiration_days=30,\n        user_performing_action=user,\n    )\n\n    assert pat.id is not None\n    assert pat.name == \"My Integration Token\"\n    assert pat.token is not None  # Raw token only returned on creation\n    assert pat.token_display is not None\n    assert pat.created_at is not None\n    assert pat.expires_at is not None\n\n    assert pat.token.startswith(\"onyx_pat_\")\n    assert len(pat.token) > 20\n\n    assert \"****\" in pat.token_display\n    assert pat.token_display.startswith(\"onyx_pat_\")\n\n    # List PATs\n    tokens = PATManager.list(user)\n    assert len(tokens) == 1\n    assert tokens[0].id == pat.id\n    assert tokens[0].name == \"My Integration Token\"\n    assert tokens[0].token_display == pat.token_display\n    assert tokens[0].token is None\n\n    # Authenticate with PAT\n    auth_response = PATManager.authenticate(pat.token)\n    assert auth_response.status_code == 200\n    me_data = auth_response.json()\n    assert me_data[\"email\"] == user.email\n    assert me_data[\"id\"] == user.id\n\n    # Revoke PAT\n    PATManager.revoke(pat.id, user)\n\n    # Verify revoked token fails authentication\n    revoked_auth_response = PATManager.authenticate(pat.token)\n    assert revoked_auth_response.status_code == 403  # Revoked token returns 403\n\n    # Verify token is no longer listed\n    tokens_after_revoke = PATManager.list(user)\n    assert len(tokens_after_revoke) == 0\n\n\ndef test_pat_user_isolation_and_authentication(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"\n    PATs authenticate as real users, and users can only see/manage their own tokens.\n    \"\"\"\n    user_a: DATestUser = UserManager.create(name=\"user_a\")\n    user_b: DATestUser = UserManager.create(name=\"user_b\")\n\n    # Create tokens for both users\n    user_a_pats = []\n    for i in range(2):\n        pat = PATManager.create(\n            name=f\"User A Token {i + 1}\",\n            expiration_days=30,\n            user_performing_action=user_a,\n        )\n        user_a_pats.append(pat)\n\n    user_b_pats = []\n    for i in range(2):\n        pat = PATManager.create(\n            name=f\"User B Token {i + 1}\",\n            expiration_days=30,\n            user_performing_action=user_b,\n        )\n        user_b_pats.append(pat)\n\n    # Verify PATs authenticate as the correct users\n    for user, pat in [(user_a, user_a_pats[0]), (user_b, user_b_pats[0])]:\n        assert pat.token is not None\n        me_response = PATManager.authenticate(pat.token)\n        assert me_response.status_code == 200\n        me_data = me_response.json()\n        assert me_data[\"email\"] == user.email\n        assert me_data[\"id\"] == user.id\n\n    # Verify each user only sees their own tokens\n    user_a_list = PATManager.list(user_a)\n    assert len(user_a_list) == 2\n\n    user_b_list = PATManager.list(user_b)\n    assert len(user_b_list) == 2\n\n    # Verify user A cannot delete user B's token using their PAT\n    assert user_a_pats[0].token is not None\n    delete_response = requests.delete(\n        f\"{API_SERVER_URL}/user/pats/{user_b_pats[0].id}\",\n        headers=PATManager.get_auth_headers(user_a_pats[0].token),\n        timeout=60,\n    )\n    assert delete_response.status_code == 404\n\n    # Verify user B's token still exists\n    user_b_list_after = PATManager.list(user_b)\n    assert len(user_b_list_after) == 2\n\n    # Verify deleting non-existent token returns 404\n    delete_fake = requests.delete(\n        f\"{API_SERVER_URL}/user/pats/999999\",\n        headers=user_a.headers,\n        timeout=60,\n    )\n    assert delete_fake.status_code == 404\n\n\ndef test_pat_expiration_flow(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Expiration timestamp is end-of-day (23:59:59 UTC); never-expiring tokens work; revoked tokens fail.\"\"\"\n    user: DATestUser = UserManager.create(name=\"expiration_user\")\n\n    # Create expiring token\n    pat = PATManager.create(\n        name=\"Expiring Token\",\n        expiration_days=7,\n        user_performing_action=user,\n    )\n\n    assert pat.expires_at is not None\n    expires_at = datetime.fromisoformat(pat.expires_at.replace(\"Z\", \"+00:00\"))\n\n    # Verify end-of-day expiration\n    assert expires_at.hour == 23\n    assert expires_at.minute == 59\n    assert expires_at.second == 59\n\n    # Calculate expected end-of-day 7 days from now\n    now = datetime.now(timezone.utc)\n    expected_date = (now + timedelta(days=7)).date()\n    expected_expiry = datetime.combine(expected_date, datetime.max.time()).replace(\n        tzinfo=timezone.utc\n    )\n    # Allow for small timing differences (within a day)\n    assert abs((expires_at - expected_expiry).total_seconds()) < 86400  # 1 day\n\n    # Create never-expiring token\n    never_expiring_pat = PATManager.create(\n        name=\"Never Expiring Token\",\n        expiration_days=None,\n        user_performing_action=user,\n    )\n    assert never_expiring_pat.expires_at is None\n\n    # Verify never-expiring token works\n    assert never_expiring_pat.token is not None\n    auth_response = PATManager.authenticate(never_expiring_pat.token)\n    assert auth_response.status_code == 200\n\n    # Revoke the never-expiring token\n    PATManager.revoke(never_expiring_pat.id, user)\n\n    # Verify revoked token fails (token var still holds the revoked value)\n    revoked_auth_response = PATManager.authenticate(never_expiring_pat.token)\n    assert revoked_auth_response.status_code == 403\n\n\ndef test_pat_validation_errors(reset: None) -> None:  # noqa: ARG001\n    \"\"\"Validate input errors: empty name, name too long, negative/zero expiration.\"\"\"\n    user: DATestUser = UserManager.create(name=\"validation_user\")\n\n    # Empty name should fail\n    empty_name_response = requests.post(\n        f\"{API_SERVER_URL}/user/pats\",\n        json={\"name\": \"\", \"expiration_days\": 30},\n        headers=user.headers,\n        timeout=60,\n    )\n    assert empty_name_response.status_code == 422\n\n    # Name too long should fail\n    long_name = \"a\" * 101\n    long_name_response = requests.post(\n        f\"{API_SERVER_URL}/user/pats\",\n        json={\"name\": long_name, \"expiration_days\": 30},\n        headers=user.headers,\n        timeout=60,\n    )\n    assert long_name_response.status_code == 422\n\n    # Negative expiration should fail\n    negative_exp_response = requests.post(\n        f\"{API_SERVER_URL}/user/pats\",\n        json={\"name\": \"Test Token\", \"expiration_days\": -1},\n        headers=user.headers,\n        timeout=60,\n    )\n    assert negative_exp_response.status_code == 422\n\n    # Zero expiration should fail\n    zero_exp_response = requests.post(\n        f\"{API_SERVER_URL}/user/pats\",\n        json={\"name\": \"Test Token\", \"expiration_days\": 0},\n        headers=user.headers,\n        timeout=60,\n    )\n    assert zero_exp_response.status_code == 422\n\n    # Max length name (100 chars) should succeed\n    valid_name = \"a\" * 100\n    valid_pat = PATManager.create(\n        name=valid_name,\n        expiration_days=7,\n        user_performing_action=user,\n    )\n    assert valid_pat.id is not None\n\n    # Missing name should fail\n    missing_name_response = requests.post(\n        f\"{API_SERVER_URL}/user/pats\",\n        json={\"expiration_days\": 30},\n        headers=user.headers,\n        timeout=60,\n    )\n    assert missing_name_response.status_code == 422\n\n\ndef test_pat_sorting_and_last_used(reset: None) -> None:  # noqa: ARG001\n    \"\"\"PATs are sorted by created_at DESC; last_used_at updates after authentication.\"\"\"\n    user: DATestUser = UserManager.create(name=\"sorting_user\")\n\n    # Create tokens with small delays to ensure different timestamps\n    token1 = PATManager.create(\n        name=\"First Token\",\n        expiration_days=30,\n        user_performing_action=user,\n    )\n\n    time.sleep(0.1)\n\n    PATManager.create(\n        name=\"Second Token\",\n        expiration_days=30,\n        user_performing_action=user,\n    )\n\n    time.sleep(0.1)\n\n    PATManager.create(\n        name=\"Third Token\",\n        expiration_days=30,\n        user_performing_action=user,\n    )\n\n    # Verify sorted by created_at DESC (newest first)\n    tokens = PATManager.list(user)\n    assert len(tokens) == 3\n\n    assert tokens[0].name == \"Third Token\"\n    assert tokens[1].name == \"Second Token\"\n    assert tokens[2].name == \"First Token\"\n\n    # Verify all tokens have no last_used_at initially\n    for token in tokens:\n        assert token.last_used_at is None\n\n    # Use the first token (oldest)\n    assert token1.token is not None\n    auth_response = PATManager.authenticate(token1.token)\n    assert auth_response.status_code == 200\n\n    time.sleep(0.5)\n\n    # Verify last_used_at is updated for the used token only\n    tokens_after_use = PATManager.list(user)\n\n    token1_after_use = next(t for t in tokens_after_use if t.name == \"First Token\")\n    assert token1_after_use.last_used_at is not None\n\n    token2_after_use = next(t for t in tokens_after_use if t.name == \"Second Token\")\n    token3_after_use = next(t for t in tokens_after_use if t.name == \"Third Token\")\n    assert token2_after_use.last_used_at is None\n    assert token3_after_use.last_used_at is None\n\n\ndef test_pat_role_based_access_control(reset: None) -> None:  # noqa: ARG001\n    \"\"\"\n    PATs inherit user roles and permissions:\n    - Admin PAT: Full access to admin-only endpoints\n    - Curator/Global Curator PATs: Access to management endpoints\n    - Basic PAT: Denied access to admin and management endpoints\n    \"\"\"\n    # Create users with different roles\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    assert admin_user.role == UserRole.ADMIN\n\n    basic_user: DATestUser = UserManager.create(name=\"basic_user\")\n    assert basic_user.role == UserRole.BASIC\n\n    curator_user: DATestUser = UserManager.create(name=\"curator_user\")\n    curator_user = UserManager.set_role(\n        user_to_set=curator_user,\n        target_role=UserRole.CURATOR,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert curator_user.role == UserRole.CURATOR\n\n    global_curator_user: DATestUser = UserManager.create(name=\"global_curator_user\")\n    global_curator_user = UserManager.set_role(\n        user_to_set=global_curator_user,\n        target_role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert global_curator_user.role == UserRole.GLOBAL_CURATOR\n\n    # Create PATs for each user\n    admin_pat = PATManager.create(\n        name=\"Admin Token\",\n        expiration_days=7,\n        user_performing_action=admin_user,\n    )\n\n    basic_pat = PATManager.create(\n        name=\"Basic Token\",\n        expiration_days=7,\n        user_performing_action=basic_user,\n    )\n\n    curator_pat = PATManager.create(\n        name=\"Curator Token\",\n        expiration_days=7,\n        user_performing_action=curator_user,\n    )\n\n    global_curator_pat = PATManager.create(\n        name=\"Global Curator Token\",\n        expiration_days=7,\n        user_performing_action=global_curator_user,\n    )\n\n    # Verify all tokens are present (type narrowing for mypy)\n    assert admin_pat.token is not None\n    assert basic_pat.token is not None\n    assert curator_pat.token is not None\n    assert global_curator_pat.token is not None\n\n    # Test admin-only endpoint access\n    print(\"\\n[Test] Admin PAT accessing admin-only endpoint...\")\n    admin_endpoint_response = requests.get(\n        f\"{API_SERVER_URL}/admin/api-key\",\n        headers=PATManager.get_auth_headers(admin_pat.token),\n        timeout=60,\n    )\n    assert admin_endpoint_response.status_code == 200\n    print(\"[✓] Admin PAT successfully accessed /admin/api-key\")\n\n    print(\"\\n[Test] Basic PAT accessing admin endpoint...\")\n    basic_admin_response = requests.get(\n        f\"{API_SERVER_URL}/admin/api-key\",\n        headers=PATManager.get_auth_headers(basic_pat.token),\n        timeout=60,\n    )\n    assert basic_admin_response.status_code == 403\n    print(\"[✓] Basic PAT correctly denied access (403) to /admin/api-key\")\n\n    print(\"\\n[Test] Curator PAT accessing admin-only endpoint...\")\n    curator_admin_response = requests.get(\n        f\"{API_SERVER_URL}/admin/api-key\",\n        headers=PATManager.get_auth_headers(curator_pat.token),\n        timeout=60,\n    )\n    assert curator_admin_response.status_code == 403\n    print(\"[✓] Curator PAT correctly denied access (403) to /admin/api-key\")\n\n    print(\"\\n[Test] Global Curator PAT accessing admin-only endpoint...\")\n    global_curator_admin_response = requests.get(\n        f\"{API_SERVER_URL}/admin/api-key\",\n        headers=PATManager.get_auth_headers(global_curator_pat.token),\n        timeout=60,\n    )\n    assert global_curator_admin_response.status_code == 403\n    print(\"[✓] Global Curator PAT correctly denied access (403) to /admin/api-key\")\n\n    # Test management endpoint access\n    print(\"\\n[Test] Testing management endpoint access for curators...\")\n\n    admin_manage_response = requests.get(\n        f\"{API_SERVER_URL}/manage/admin/connector\",\n        headers=PATManager.get_auth_headers(admin_pat.token),\n        timeout=60,\n    )\n    assert admin_manage_response.status_code == 200\n    print(\"[✓] Admin PAT can access /manage/admin/connector\")\n\n    curator_manage_response = requests.get(\n        f\"{API_SERVER_URL}/manage/admin/connector\",\n        headers=PATManager.get_auth_headers(curator_pat.token),\n        timeout=60,\n    )\n    assert curator_manage_response.status_code == 200\n    print(\"[✓] Curator PAT can access /manage/admin/connector\")\n\n    global_curator_manage_response = requests.get(\n        f\"{API_SERVER_URL}/manage/admin/connector\",\n        headers=PATManager.get_auth_headers(global_curator_pat.token),\n        timeout=60,\n    )\n    assert global_curator_manage_response.status_code == 200\n    print(\"[✓] Global Curator PAT can access /manage/admin/connector\")\n\n    basic_manage_response = requests.get(\n        f\"{API_SERVER_URL}/manage/admin/connector\",\n        headers=PATManager.get_auth_headers(basic_pat.token),\n        timeout=60,\n    )\n    assert basic_manage_response.status_code in [403, 401]\n    print(\n        f\"[✓] Basic PAT correctly denied access ({basic_manage_response.status_code}) to /manage/admin/connector\"\n    )\n\n    # Verify PATs authenticate with correct identity and role\n    print(\"\\n[Test] Verifying PATs authenticate as correct users with correct roles...\")\n\n    admin_me = PATManager.authenticate(admin_pat.token)\n    assert admin_me.status_code == 200\n    assert admin_me.json()[\"email\"] == admin_user.email\n    assert admin_me.json()[\"role\"] == UserRole.ADMIN.value\n\n    basic_me = PATManager.authenticate(basic_pat.token)\n    assert basic_me.status_code == 200\n    assert basic_me.json()[\"email\"] == basic_user.email\n    assert basic_me.json()[\"role\"] == UserRole.BASIC.value\n\n    curator_me = PATManager.authenticate(curator_pat.token)\n    assert curator_me.status_code == 200\n    assert curator_me.json()[\"email\"] == curator_user.email\n    assert curator_me.json()[\"role\"] == UserRole.CURATOR.value\n\n    global_curator_me = PATManager.authenticate(global_curator_pat.token)\n    assert global_curator_me.status_code == 200\n    assert global_curator_me.json()[\"email\"] == global_curator_user.email\n    assert global_curator_me.json()[\"role\"] == UserRole.GLOBAL_CURATOR.value\n\n    print(\"[✓] All PATs authenticate with correct user identity and role\")\n\n    # Verify all PATs can access basic endpoints\n    print(\"\\n[Test] All PATs can access basic endpoints...\")\n    for pat, user_name in [\n        (admin_pat, \"Admin\"),\n        (basic_pat, \"Basic\"),\n        (curator_pat, \"Curator\"),\n        (global_curator_pat, \"Global Curator\"),\n    ]:\n        assert pat.token is not None\n        persona_response = requests.get(\n            f\"{API_SERVER_URL}/persona\",\n            headers=PATManager.get_auth_headers(pat.token),\n            timeout=60,\n        )\n        assert persona_response.status_code == 200\n        print(f\"[✓] {user_name} PAT can access /persona endpoint\")\n\n    print(\"\\n[✓] All role-based access control tests passed!\")\n    print(\"Summary:\")\n    print(\n        \"  - Admin PAT: Full access to admin-only endpoints (/admin/*, /manage/admin/*)\"\n    )\n    print(\n        \"  - Curator PAT: Access to management endpoints (/manage/admin/*), denied on admin-only (/admin/*)\"\n    )\n    print(\n        \"  - Global Curator PAT: Access to management endpoints (/manage/admin/*), denied on admin-only (/admin/*)\"\n    )\n    print(\"  - Basic PAT: Denied access to admin and management endpoints\")\n    print(\"  - All PATs: Can access basic endpoints (/persona, /me, etc.)\")\n    print(\"  - All PATs: Authenticate with correct user identity and role\")\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_auth_permission_propagation.py",
    "content": "\"\"\"Integration tests for permission propagation across auth-triggered group changes.\n\nThese tests verify that effective permissions (via /me/permissions) actually\npropagate when users are added/removed from default groups through role changes.\nCustom permission grant tests will be added once the permission grant API is built.\n\"\"\"\n\nimport os\n\nimport pytest\n\nfrom onyx.auth.schemas import UserRole\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_basic_group_member_emails(admin_user: DATestUser) -> set[str]:\n    all_groups = UserGroupManager.get_all(admin_user, include_default=True)\n    basic_group = next(\n        (g for g in all_groups if g.is_default and g.name == \"Basic\"), None\n    )\n    assert basic_group is not None, \"Basic default group not found\"\n    return {u.email for u in basic_group.users}\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission propagation tests require enterprise features\",\n)\ndef test_basic_permission_granted_on_registration(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"New users should get 'basic' permission through default group assignment.\"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n    basic_user: DATestUser = UserManager.create(email=\"basic@example.com\")\n\n    # Admin should have permissions from Admin group\n    admin_perms = UserManager.get_permissions(admin_user)\n    assert \"basic\" in admin_perms\n\n    # Basic user should have 'basic' from Basic default group\n    basic_perms = UserManager.get_permissions(basic_user)\n    assert \"basic\" in basic_perms\n\n    # Verify group membership matches\n    assert basic_user.email in _get_basic_group_member_emails(admin_user)\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permission propagation tests require enterprise features\",\n)\ndef test_role_downgrade_removes_basic_group_and_permission(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Downgrading to EXT_PERM_USER or SLACK_USER should remove from Basic group.\"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # --- EXT_PERM_USER ---\n    ext_user: DATestUser = UserManager.create(email=\"ext@example.com\")\n    assert ext_user.email in _get_basic_group_member_emails(admin_user)\n\n    UserManager.set_role(\n        user_to_set=ext_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert ext_user.email not in _get_basic_group_member_emails(admin_user)\n\n    # --- SLACK_USER ---\n    slack_user: DATestUser = UserManager.create(email=\"slack@example.com\")\n    assert slack_user.email in _get_basic_group_member_emails(admin_user)\n\n    UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n    assert slack_user.email not in _get_basic_group_member_emails(admin_user)\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_cc_pair_permissions.py",
    "content": "\"\"\"\nThis file takes the happy path to adding a curator to a user group and then tests\nthe permissions of the curator manipulating connector-credential pairs.\n\"\"\"\n\nimport os\n\nimport pytest\nfrom onyx_openapi_client.exceptions import ApiException  # type: ignore[import-untyped,unused-ignore,import-not-found]\n\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and User Group tests are enterprise only\",\n)\ndef test_cc_pair_permissions(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Creating a curator\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating a user group\n    user_group_1 = UserGroupManager.create(\n        name=\"curated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    # setting the user as a curator for the user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n\n    # Creating another user group that the user is not a curator of\n    user_group_2 = UserGroupManager.create(\n        name=\"uncurated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    connector_1 = ConnectorManager.create(\n        name=\"admin_owned_connector\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        access_type=AccessType.PRIVATE,\n        user_performing_action=admin_user,\n    )\n    # currently we dont enforce permissions at the connector level\n    # pending cc_pair -> connector rework\n    # connector_2 = ConnectorManager.create(\n    #     name=\"curator_visible_connector\",\n    #     source=DocumentSource.CONFLUENCE,\n    #     groups=[user_group_2.id],\n    #     is_public=False,\n    #     user_performing_action=admin_user,\n    # )\n    # Create a credentials that the curator is and is not curator of\n    credential_1 = CredentialManager.create(\n        name=\"curator_owned_credential\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        curator_public=False,\n        user_performing_action=admin_user,\n    )\n    credential_2 = CredentialManager.create(\n        name=\"curator_visible_credential\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_2.id],\n        curator_public=False,\n        user_performing_action=admin_user,\n    )\n\n    # END OF HAPPY PATH\n\n    \"\"\"Tests for things Curators should not be able to do\"\"\"\n\n    # Curators should not be able to create a cc\n    # pair for a user group they are not a curator of\n    with pytest.raises(ApiException):\n        CCPairManager.create(\n            connector_id=connector_1.id,\n            credential_id=credential_1.id,\n            name=\"invalid_cc_pair_2\",\n            access_type=AccessType.PRIVATE,\n            groups=[user_group_1.id, user_group_2.id],\n            user_performing_action=curator,\n        )\n\n    # Curators should not be able to create a cc\n    # pair without an attached user group\n    with pytest.raises(ApiException):\n        CCPairManager.create(\n            connector_id=connector_1.id,\n            credential_id=credential_1.id,\n            name=\"invalid_cc_pair_2\",\n            access_type=AccessType.PRIVATE,\n            groups=[],\n            user_performing_action=curator,\n        )\n\n    # # This test is currently disabled because permissions are\n    # # not enforced at the connector level\n    # # Curators should not be able to create a cc pair\n    # # for a user group that the connector does not belong to (NOT WORKING)\n    # with pytest.raises(HTTPError):\n    #     CCPairManager.create(\n    #         connector_id=connector_2.id,\n    #         credential_id=credential_1.id,\n    #         name=\"invalid_cc_pair_3\",\n    #         access_type=AccessType.PRIVATE,\n    #         groups=[user_group_1.id],\n    #         user_performing_action=curator,\n    #     )\n\n    # Curators should not be able to create a cc\n    # pair for a user group that the credential does not belong to\n    with pytest.raises(ApiException):\n        CCPairManager.create(\n            connector_id=connector_1.id,\n            credential_id=credential_2.id,\n            name=\"invalid_cc_pair_4\",\n            access_type=AccessType.PRIVATE,\n            groups=[user_group_1.id],\n            user_performing_action=curator,\n        )\n\n    \"\"\"Tests for things Curators should be able to do\"\"\"\n\n    # Re-create connector since the credential_2 validation error above\n    # triggers connector deletion in the exception handler\n    connector_1 = ConnectorManager.create(\n        name=\"admin_owned_connector_2\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        access_type=AccessType.PRIVATE,\n        user_performing_action=admin_user,\n    )\n\n    # Curators should be able to create a private\n    # cc pair for a user group they are a curator of\n    valid_cc_pair = CCPairManager.create(\n        name=\"valid_cc_pair\",\n        connector_id=connector_1.id,\n        credential_id=credential_1.id,\n        access_type=AccessType.PRIVATE,\n        groups=[user_group_1.id],\n        user_performing_action=curator,\n    )\n\n    # Verify the created cc pair\n    CCPairManager.verify(\n        cc_pair=valid_cc_pair,\n        user_performing_action=curator,\n    )\n\n    # Test pausing the cc pair\n    CCPairManager.pause_cc_pair(valid_cc_pair, user_performing_action=curator)\n\n    # Test deleting the cc pair\n    CCPairManager.delete(valid_cc_pair, user_performing_action=curator)\n    CCPairManager.wait_for_deletion_completion(\n        cc_pair_id=valid_cc_pair.id, user_performing_action=curator\n    )\n\n    CCPairManager.verify(\n        cc_pair=valid_cc_pair,\n        verify_deleted=True,\n        user_performing_action=curator,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_connector_permissions.py",
    "content": "\"\"\"\nThis file takes the happy path to adding a curator to a user group and then tests\nthe permissions of the curator manipulating connectors.\n\"\"\"\n\nimport os\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\ndef test_connector_permissions(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Creating a curator\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating a user group\n    user_group_1 = UserGroupManager.create(\n        name=\"user_group_1\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    # setting the user as a curator for the user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n\n    # Creating another user group that the user is not a curator of\n    user_group_2 = UserGroupManager.create(\n        name=\"user_group_2\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # END OF HAPPY PATH\n\n    \"\"\"Tests for things Curators should not be able to do\"\"\"\n\n    # Curators should not be able to create a connector for a\n    # user group they are not a curator of\n    with pytest.raises(HTTPError):\n        ConnectorManager.create(\n            name=\"invalid_connector_2\",\n            source=DocumentSource.CONFLUENCE,\n            groups=[user_group_1.id, user_group_2.id],\n            access_type=AccessType.PRIVATE,\n            user_performing_action=curator,\n        )\n\n    \"\"\"Tests for things Curators should be able to do\"\"\"\n\n    # Curators should be able to create a private\n    # connector for a user group they are a curator of\n    valid_connector = ConnectorManager.create(\n        name=\"valid_connector\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        access_type=AccessType.PRIVATE,\n        user_performing_action=curator,\n    )\n    assert valid_connector.id is not None\n\n    # Verify the created connector\n    created_connector = ConnectorManager.get(\n        valid_connector.id, user_performing_action=curator\n    )\n    assert created_connector.name == valid_connector.name\n    assert created_connector.source == valid_connector.source\n\n    # Verify that the connector can be found in the list of all connectors\n    all_connectors = ConnectorManager.get_all(user_performing_action=curator)\n    assert any(conn.id == valid_connector.id for conn in all_connectors)\n\n    # Test editing the connector\n    valid_connector.name = \"updated_valid_connector\"\n    ConnectorManager.edit(valid_connector, user_performing_action=curator)\n\n    # Verify the edit\n    updated_connector = ConnectorManager.get(\n        valid_connector.id, user_performing_action=curator\n    )\n    assert updated_connector.name == \"updated_valid_connector\"\n\n    # Test deleting the connector\n    ConnectorManager.delete(connector=valid_connector, user_performing_action=curator)\n\n    # Verify the deletion\n    all_connectors_after_delete = ConnectorManager.get_all(\n        user_performing_action=curator\n    )\n    assert all(conn.id != valid_connector.id for conn in all_connectors_after_delete)\n\n    # Test that curator cannot create a connector for a group they are not a curator of\n    with pytest.raises(HTTPError):\n        ConnectorManager.create(\n            name=\"invalid_connector_3\",\n            source=DocumentSource.CONFLUENCE,\n            groups=[user_group_2.id],\n            access_type=AccessType.PRIVATE,\n            user_performing_action=curator,\n        )\n\n    # Curators should be able to create a public connector\n    public_connector = ConnectorManager.create(\n        name=\"curator_public_connector\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        access_type=AccessType.PUBLIC,\n        user_performing_action=curator,\n    )\n    assert public_connector.id is not None\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_credential_permissions.py",
    "content": "\"\"\"\nThis file takes the happy path to adding a curator to a user group and then tests\nthe permissions of the curator manipulating credentials.\n\"\"\"\n\nimport os\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\ndef test_credential_permissions(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Creating a curator\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating a user group\n    user_group_1 = UserGroupManager.create(\n        name=\"user_group_1\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    # setting the user as a curator for the user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n\n    # Creating another user group that the user is not a curator of\n    user_group_2 = UserGroupManager.create(\n        name=\"user_group_2\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # END OF HAPPY PATH\n\n    \"\"\"Tests for things Curators should not be able to do\"\"\"\n\n    # Curators should not be able to create a credential for a user group they are not a curator of\n    with pytest.raises(HTTPError):\n        CredentialManager.create(\n            name=\"invalid_credential_2\",\n            source=DocumentSource.CONFLUENCE,\n            groups=[user_group_1.id, user_group_2.id],\n            curator_public=False,\n            user_performing_action=curator,\n        )\n\n    \"\"\"Tests for things Curators should be able to do\"\"\"\n    # Curators should be able to create a private credential for a user group they are a curator of\n    valid_credential = CredentialManager.create(\n        name=\"valid_credential\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        curator_public=False,\n        user_performing_action=curator,\n    )\n\n    # Verify the created credential\n    CredentialManager.verify(\n        credential=valid_credential,\n        user_performing_action=curator,\n    )\n\n    # Test editing the credential\n    valid_credential.name = \"updated_valid_credential\"\n    CredentialManager.edit(valid_credential, user_performing_action=curator)\n\n    # Verify the edit\n    CredentialManager.verify(\n        credential=valid_credential,\n        user_performing_action=curator,\n    )\n\n    # Test deleting the credential\n    CredentialManager.delete(valid_credential, user_performing_action=curator)\n\n    # Verify the deletion\n    CredentialManager.verify(\n        credential=valid_credential,\n        verify_deleted=True,\n        user_performing_action=curator,\n    )\n\n    # Curators should be able to create a public credential\n    public_credential = CredentialManager.create(\n        name=\"curator_public_credential\",\n        source=DocumentSource.CONFLUENCE,\n        groups=[user_group_1.id],\n        curator_public=True,\n        user_performing_action=curator,\n    )\n    CredentialManager.verify(\n        credential=public_credential,\n        user_performing_action=curator,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_doc_set_permissions.py",
    "content": "import os\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.db.enums import AccessType\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document_set import DocumentSetManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\ndef test_doc_set_permissions_setup(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Creating a second user (curator)\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating the first user group\n    user_group_1 = UserGroupManager.create(\n        name=\"curated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # Setting the curator as a curator for the first user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n\n    # Creating a second user group\n    user_group_2 = UserGroupManager.create(\n        name=\"uncurated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # Admin creates a cc_pair\n    private_cc_pair = CCPairManager.create_from_scratch(\n        access_type=AccessType.PRIVATE,\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # Admin creates a public cc_pair\n    public_cc_pair = CCPairManager.create_from_scratch(\n        access_type=AccessType.PUBLIC,\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # END OF HAPPY PATH\n\n    \"\"\"Tests for things Curators/Admins should not be able to do\"\"\"\n\n    # Test that curator cannot create a non-public document set for the group they don't curate\n    with pytest.raises(HTTPError):\n        DocumentSetManager.create(\n            name=\"Invalid Document Set 1\",\n            is_public=False,\n            groups=[user_group_2.id],\n            cc_pair_ids=[public_cc_pair.id],\n            user_performing_action=curator,\n        )\n\n    # Test that curator cannot create a document set attached to both groups\n    with pytest.raises(HTTPError):\n        DocumentSetManager.create(\n            name=\"Invalid Document Set 2\",\n            is_public=False,\n            cc_pair_ids=[public_cc_pair.id],\n            groups=[user_group_1.id, user_group_2.id],\n            user_performing_action=curator,\n        )\n\n    # Test that curator cannot create a document set with no groups\n    with pytest.raises(HTTPError):\n        DocumentSetManager.create(\n            name=\"Invalid Document Set 3\",\n            is_public=False,\n            cc_pair_ids=[public_cc_pair.id],\n            groups=[],\n            user_performing_action=curator,\n        )\n\n    # Test that curator cannot create a document set with no cc_pairs\n    with pytest.raises(HTTPError):\n        DocumentSetManager.create(\n            name=\"Invalid Document Set 4\",\n            is_public=False,\n            cc_pair_ids=[],\n            groups=[user_group_1.id],\n            user_performing_action=curator,\n        )\n\n    # Test that admin cannot create a document set with no cc_pairs\n    with pytest.raises(HTTPError):\n        DocumentSetManager.create(\n            name=\"Invalid Document Set 4\",\n            is_public=False,\n            cc_pair_ids=[],\n            groups=[user_group_1.id],\n            user_performing_action=admin_user,\n        )\n\n    \"\"\"Tests for things Curators should be able to do\"\"\"\n    # Test that curator can create a document set for the group they curate\n    valid_doc_set = DocumentSetManager.create(\n        name=\"Valid Document Set\",\n        is_public=False,\n        cc_pair_ids=[public_cc_pair.id],\n        groups=[user_group_1.id],\n        user_performing_action=curator,\n    )\n\n    DocumentSetManager.wait_for_sync(\n        document_sets_to_check=[valid_doc_set], user_performing_action=admin_user\n    )\n\n    # Verify that the valid document set was created\n    DocumentSetManager.verify(\n        document_set=valid_doc_set,\n        user_performing_action=admin_user,\n    )\n\n    # Verify that only one document set exists\n    all_doc_sets = DocumentSetManager.get_all(user_performing_action=admin_user)\n    assert len(all_doc_sets) == 1\n\n    # Add the private_cc_pair to the doc set on our end for later comparison\n    valid_doc_set.cc_pair_ids.append(private_cc_pair.id)\n\n    # Confirm the curator can't add the private_cc_pair to the doc set\n    with pytest.raises(HTTPError):\n        DocumentSetManager.edit(\n            document_set=valid_doc_set,\n            user_performing_action=curator,\n        )\n    # Confirm the admin can't add the private_cc_pair to the doc set\n    with pytest.raises(HTTPError):\n        DocumentSetManager.edit(\n            document_set=valid_doc_set,\n            user_performing_action=admin_user,\n        )\n\n    # Verify the document set has not been updated in the db\n    with pytest.raises(ValueError):\n        DocumentSetManager.verify(\n            document_set=valid_doc_set,\n            user_performing_action=admin_user,\n        )\n\n    # Add the private_cc_pair to the user group on our end for later comparison\n    user_group_1.cc_pair_ids.append(private_cc_pair.id)\n\n    # Admin adds the cc_pair to the group the curator curates\n    UserGroupManager.edit(\n        user_group=user_group_1,\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    UserGroupManager.verify(\n        user_group=user_group_1,\n        user_performing_action=admin_user,\n    )\n\n    # Confirm the curator can now add the cc_pair to the doc set\n    DocumentSetManager.edit(\n        document_set=valid_doc_set,\n        user_performing_action=curator,\n    )\n    DocumentSetManager.wait_for_sync(\n        document_sets_to_check=[valid_doc_set], user_performing_action=admin_user\n    )\n    # Verify the updated document set\n    DocumentSetManager.verify(\n        document_set=valid_doc_set,\n        user_performing_action=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_file_connector_permissions.py",
    "content": "import io\nimport json\nimport os\n\nimport pytest\nimport requests\n\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import UserRole\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\ndef _upload_connector_file(\n    *,\n    user_performing_action: DATestUser,\n    file_name: str,\n    content: bytes,\n) -> tuple[str, str]:\n    headers = user_performing_action.headers.copy()\n    headers.pop(\"Content-Type\", None)\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/manage/admin/connector/file/upload\",\n        files=[(\"files\", (file_name, io.BytesIO(content), \"text/plain\"))],\n        headers=headers,\n    )\n    response.raise_for_status()\n    payload = response.json()\n    return payload[\"file_paths\"][0], payload[\"file_names\"][0]\n\n\ndef _update_connector_files(\n    *,\n    connector_id: int,\n    user_performing_action: DATestUser,\n    file_ids_to_remove: list[str],\n    new_file_name: str,\n    new_file_content: bytes,\n) -> requests.Response:\n    headers = user_performing_action.headers.copy()\n    headers.pop(\"Content-Type\", None)\n\n    return requests.post(\n        f\"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files/update\",\n        data={\"file_ids_to_remove\": json.dumps(file_ids_to_remove)},\n        files=[(\"files\", (new_file_name, io.BytesIO(new_file_content), \"text/plain\"))],\n        headers=headers,\n    )\n\n\ndef _list_connector_files(\n    *,\n    connector_id: int,\n    user_performing_action: DATestUser,\n) -> requests.Response:\n    return requests.get(\n        f\"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files\",\n        headers=user_performing_action.headers,\n    )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\n@pytest.mark.usefixtures(\"reset\")\ndef test_only_global_curator_can_update_public_file_connector_files() -> None:\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    global_curator_creator = UserManager.create(name=\"global_curator_creator\")\n    global_curator_creator = UserManager.set_role(\n        user_to_set=global_curator_creator,\n        target_role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=admin_user,\n    )\n\n    global_curator_editor = UserManager.create(name=\"global_curator_editor\")\n    global_curator_editor = UserManager.set_role(\n        user_to_set=global_curator_editor,\n        target_role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=admin_user,\n    )\n\n    curator_user = UserManager.create(name=\"curator_user\")\n    curator_group = UserGroupManager.create(\n        name=\"curator_group\",\n        user_ids=[curator_user.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[curator_group],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.set_curator_status(\n        test_user_group=curator_group,\n        user_to_set_as_curator=curator_user,\n        user_performing_action=admin_user,\n    )\n\n    initial_file_id, initial_file_name = _upload_connector_file(\n        user_performing_action=global_curator_creator,\n        file_name=\"initial-file.txt\",\n        content=b\"initial file content\",\n    )\n\n    connector = ConnectorManager.create(\n        user_performing_action=global_curator_creator,\n        name=\"public_file_connector\",\n        source=DocumentSource.FILE,\n        connector_specific_config={\n            \"file_locations\": [initial_file_id],\n            \"file_names\": [initial_file_name],\n            \"zip_metadata_file_id\": None,\n        },\n        access_type=AccessType.PUBLIC,\n        groups=[],\n    )\n    credential = CredentialManager.create(\n        user_performing_action=global_curator_creator,\n        source=DocumentSource.FILE,\n        curator_public=True,\n        groups=[],\n        name=\"public_file_connector_credential\",\n    )\n    CCPairManager.create(\n        connector_id=connector.id,\n        credential_id=credential.id,\n        user_performing_action=global_curator_creator,\n        access_type=AccessType.PUBLIC,\n        groups=[],\n        name=\"public_file_connector_cc_pair\",\n    )\n\n    curator_list_response = _list_connector_files(\n        connector_id=connector.id,\n        user_performing_action=curator_user,\n    )\n    curator_list_response.raise_for_status()\n    curator_list_payload = curator_list_response.json()\n    assert any(f[\"file_id\"] == initial_file_id for f in curator_list_payload[\"files\"])\n\n    global_curator_list_response = _list_connector_files(\n        connector_id=connector.id,\n        user_performing_action=global_curator_editor,\n    )\n    global_curator_list_response.raise_for_status()\n    global_curator_list_payload = global_curator_list_response.json()\n    assert any(\n        f[\"file_id\"] == initial_file_id for f in global_curator_list_payload[\"files\"]\n    )\n\n    denied_response = _update_connector_files(\n        connector_id=connector.id,\n        user_performing_action=curator_user,\n        file_ids_to_remove=[initial_file_id],\n        new_file_name=\"curator-file.txt\",\n        new_file_content=b\"curator updated file\",\n    )\n    assert denied_response.status_code == 403\n\n    allowed_response = _update_connector_files(\n        connector_id=connector.id,\n        user_performing_action=global_curator_editor,\n        file_ids_to_remove=[initial_file_id],\n        new_file_name=\"global-curator-file.txt\",\n        new_file_content=b\"global curator updated file\",\n    )\n    allowed_response.raise_for_status()\n\n    payload = allowed_response.json()\n    assert initial_file_id not in payload[\"file_paths\"]\n    assert \"global-curator-file.txt\" in payload[\"file_names\"]\n\n    creator_group = UserGroupManager.create(\n        name=\"creator_group\",\n        user_ids=[global_curator_creator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[creator_group],\n        user_performing_action=admin_user,\n    )\n\n    private_file_id, private_file_name = _upload_connector_file(\n        user_performing_action=global_curator_creator,\n        file_name=\"private-initial-file.txt\",\n        content=b\"private initial file content\",\n    )\n\n    private_connector = ConnectorManager.create(\n        user_performing_action=global_curator_creator,\n        name=\"private_file_connector\",\n        source=DocumentSource.FILE,\n        connector_specific_config={\n            \"file_locations\": [private_file_id],\n            \"file_names\": [private_file_name],\n            \"zip_metadata_file_id\": None,\n        },\n        access_type=AccessType.PRIVATE,\n        groups=[creator_group.id],\n    )\n    private_credential = CredentialManager.create(\n        user_performing_action=global_curator_creator,\n        source=DocumentSource.FILE,\n        curator_public=False,\n        groups=[creator_group.id],\n        name=\"private_file_connector_credential\",\n    )\n    CCPairManager.create(\n        connector_id=private_connector.id,\n        credential_id=private_credential.id,\n        user_performing_action=global_curator_creator,\n        access_type=AccessType.PRIVATE,\n        groups=[creator_group.id],\n        name=\"private_file_connector_cc_pair\",\n    )\n\n    private_denied_response = _update_connector_files(\n        connector_id=private_connector.id,\n        user_performing_action=global_curator_editor,\n        file_ids_to_remove=[private_file_id],\n        new_file_name=\"global-curator-private-file.txt\",\n        new_file_content=b\"global curator private update\",\n    )\n    assert private_denied_response.status_code == 403\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_persona_permissions.py",
    "content": "\"\"\"\nThis file tests the permissions for creating and editing personas for different user roles:\n- Basic users can create personas and edit their own\n- Curators can edit personas that belong exclusively to groups they curate\n- Admins can edit all personas\n\"\"\"\n\nimport os\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\ndef test_persona_permissions(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Creating a curator user\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating a basic user\n    basic_user: DATestUser = UserManager.create(name=\"basic_user\")\n\n    # Creating user groups\n    user_group_1 = UserGroupManager.create(\n        name=\"curated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    # Setting the user as a curator for the user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n\n    # Creating another user group that the user is not a curator of\n    user_group_2 = UserGroupManager.create(\n        name=\"uncurated_user_group\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_2], user_performing_action=admin_user\n    )\n\n    \"\"\"Test that any user can create a persona\"\"\"\n    # Basic user creates a persona\n    basic_user_persona = PersonaManager.create(\n        name=\"basic_user_persona\",\n        description=\"A persona created by basic user\",\n        is_public=False,\n        groups=[],\n        users=[admin_user.id],\n        user_performing_action=basic_user,\n    )\n    PersonaManager.verify(basic_user_persona, user_performing_action=basic_user)\n\n    # Curator creates a persona\n    curator_persona = PersonaManager.create(\n        name=\"curator_persona\",\n        description=\"A persona created by curator\",\n        is_public=False,\n        groups=[],\n        user_performing_action=curator,\n    )\n    PersonaManager.verify(curator_persona, user_performing_action=curator)\n\n    # Admin creates personas for different groups\n    admin_persona_group_1 = PersonaManager.create(\n        name=\"admin_persona_group_1\",\n        description=\"A persona for group 1\",\n        is_public=False,\n        groups=[user_group_1.id],\n        user_performing_action=admin_user,\n    )\n    admin_persona_group_2 = PersonaManager.create(\n        name=\"admin_persona_group_2\",\n        description=\"A persona for group 2\",\n        is_public=False,\n        groups=[user_group_2.id],\n        user_performing_action=admin_user,\n    )\n    admin_persona_both_groups = PersonaManager.create(\n        name=\"admin_persona_both_groups\",\n        description=\"A persona for both groups\",\n        is_public=False,\n        groups=[user_group_1.id, user_group_2.id],\n        user_performing_action=admin_user,\n    )\n\n    \"\"\"Test that users can edit their own personas\"\"\"\n    # Basic user can edit their own persona\n    PersonaManager.edit(\n        persona=basic_user_persona,\n        description=\"Updated description by basic user\",\n        user_performing_action=basic_user,\n    )\n    PersonaManager.verify(basic_user_persona, user_performing_action=basic_user)\n\n    # Basic user cannot edit other's personas\n    with pytest.raises(HTTPError):\n        PersonaManager.edit(\n            persona=curator_persona,\n            description=\"Invalid edit by basic user\",\n            user_performing_action=basic_user,\n        )\n\n    \"\"\"Test curator permissions\"\"\"\n    # Curator can edit personas that belong exclusively to groups they curate\n    PersonaManager.edit(\n        persona=admin_persona_group_1,\n        description=\"Updated by curator\",\n        user_performing_action=curator,\n    )\n    PersonaManager.verify(admin_persona_group_1, user_performing_action=curator)\n\n    # Curator cannot edit personas in groups they don't curate\n    with pytest.raises(HTTPError):\n        PersonaManager.edit(\n            persona=admin_persona_group_2,\n            description=\"Invalid edit by curator\",\n            user_performing_action=curator,\n        )\n\n    # Curator cannot edit personas that belong to multiple groups, even if they curate one\n    with pytest.raises(HTTPError):\n        PersonaManager.edit(\n            persona=admin_persona_both_groups,\n            description=\"Invalid edit by curator\",\n            user_performing_action=curator,\n        )\n\n    \"\"\"Test admin permissions\"\"\"\n    # Admin can edit any persona\n\n    # the persona was shared with the admin user on creation\n    # this edit call will simulate having the same user in the list twice.\n    # The server side should dedupe and handle this correctly (prior bug)\n    PersonaManager.edit(\n        persona=basic_user_persona,\n        description=\"Updated by admin 2\",\n        users=[admin_user.id, admin_user.id],\n        user_performing_action=admin_user,\n    )\n    PersonaManager.verify(basic_user_persona, user_performing_action=admin_user)\n\n    PersonaManager.edit(\n        persona=curator_persona,\n        description=\"Updated by admin\",\n        user_performing_action=admin_user,\n    )\n    PersonaManager.verify(curator_persona, user_performing_action=admin_user)\n\n    PersonaManager.edit(\n        persona=admin_persona_group_1,\n        description=\"Updated by admin\",\n        user_performing_action=admin_user,\n    )\n    PersonaManager.verify(admin_persona_group_1, user_performing_action=admin_user)\n\n    PersonaManager.edit(\n        persona=admin_persona_group_2,\n        description=\"Updated by admin\",\n        user_performing_action=admin_user,\n    )\n    PersonaManager.verify(admin_persona_group_2, user_performing_action=admin_user)\n\n    PersonaManager.edit(\n        persona=admin_persona_both_groups,\n        description=\"Updated by admin\",\n        user_performing_action=admin_user,\n    )\n    PersonaManager.verify(admin_persona_both_groups, user_performing_action=admin_user)\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_user_file_permissions.py",
    "content": "\"\"\"\nThis file tests user file permissions in different scenarios:\n1. Public assistant with user files - files should be accessible to all users\n2. Direct file access - user files should NOT be accessible by users who don't own them\n\"\"\"\n\nimport io\nfrom typing import NamedTuple\n\nimport pytest\n\nfrom onyx.file_store.models import FileDescriptor\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestPersona\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass UserFileTestSetup(NamedTuple):\n    admin_user: DATestUser\n    user1_file_owner: DATestUser\n    user2_non_owner: DATestUser\n    user1_file_descriptor: FileDescriptor\n    user1_file_id: str\n    public_assistant: DATestPersona\n\n\n@pytest.fixture\ndef user_file_setup(reset: None) -> UserFileTestSetup:  # noqa: ARG001\n    \"\"\"\n    Common setup for user file permission tests.\n    Creates users, files, and a public assistant with files.\n    \"\"\"\n    # Create an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # Create LLM provider for chat functionality\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Create user1 who will own the file\n    user1: DATestUser = UserManager.create(name=\"user1_file_owner\")\n\n    # Create user2 who will use the assistant but doesn't own the file\n    user2: DATestUser = UserManager.create(name=\"user2_non_owner\")\n\n    # Create a test file and upload as user1\n    test_file_content = b\"This is test content for user file permission checking.\"\n    test_file = (\"test_file.txt\", io.BytesIO(test_file_content))\n\n    file_descriptors, error = FileManager.upload_files(\n        files=[test_file],\n        user_performing_action=user1,\n    )\n\n    assert not error, f\"Failed to upload file: {error}\"\n    assert len(file_descriptors) == 1, \"Expected 1 file to be uploaded\"\n\n    # Get the file descriptor and user_file_id\n    user1_file_descriptor = file_descriptors[0]\n    user_file_id = user1_file_descriptor.get(\"user_file_id\")\n\n    assert user_file_id is not None, \"user_file_id should not be None\"\n\n    # Create a public assistant with the user file attached\n    public_assistant = PersonaManager.create(\n        name=\"Public Assistant with Files\",\n        description=\"A public assistant with user files for testing permissions\",\n        is_public=True,\n        user_file_ids=[user_file_id],\n        user_performing_action=admin_user,\n    )\n\n    return UserFileTestSetup(\n        admin_user=admin_user,\n        user1_file_owner=user1,\n        user2_non_owner=user2,\n        user1_file_descriptor=user1_file_descriptor,\n        user1_file_id=user_file_id,\n        public_assistant=public_assistant,\n    )\n\n\ndef test_public_assistant_with_user_files(\n    user_file_setup: UserFileTestSetup,\n) -> None:\n    \"\"\"\n    Test that a public assistant with user files attached can be used by users\n    who don't own those files without permission errors.\n    \"\"\"\n    # Create a chat session with the public assistant as user2\n    chat_session = ChatSessionManager.create(\n        persona_id=user_file_setup.public_assistant.id,\n        description=\"Test chat session for user file permissions\",\n        user_performing_action=user_file_setup.user2_non_owner,\n    )\n\n    # Send a message as user2 - this should not throw a permission error\n    # even though user2 doesn't own the file attached to the assistant\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"Hello, can you help me?\",\n        user_performing_action=user_file_setup.user2_non_owner,\n    )\n\n    # Verify the message was processed without errors\n    assert (\n        response.error is None\n    ), f\"Expected no error when user2 uses public assistant with user1's files, but got error: {response.error}\"\n    assert len(response.full_message) > 0, \"Expected a response from the assistant\"\n\n    # Verify chat history is accessible\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=chat_session,\n        user_performing_action=user_file_setup.user2_non_owner,\n    )\n    assert (\n        len(chat_history) >= 2\n    ), \"Expected at least 2 messages (user message and assistant response)\"\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_user_role_permissions.py",
    "content": "\"\"\"\nThis file tests the ability of different user types to set the role of other users.\n\"\"\"\n\nimport os\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.db.models import UserRole\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator and user group tests are enterprise only\",\n)\ndef test_user_role_setting_permissions(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Creating a basic user\n    basic_user: DATestUser = UserManager.create(name=\"basic_user\")\n    assert UserManager.is_role(basic_user, UserRole.BASIC)\n\n    # Creating a curator\n    curator: DATestUser = UserManager.create(name=\"curator\")\n    assert UserManager.is_role(curator, UserRole.BASIC)\n\n    # Creating a curator without adding to a group should not work\n    with pytest.raises(HTTPError):\n        UserManager.set_role(\n            user_to_set=curator,\n            target_role=UserRole.CURATOR,\n            user_performing_action=admin_user,\n        )\n\n    global_curator: DATestUser = UserManager.create(name=\"global_curator\")\n    assert UserManager.is_role(global_curator, UserRole.BASIC)\n\n    # Setting the role of a global curator should not work for a basic user\n    with pytest.raises(HTTPError):\n        UserManager.set_role(\n            user_to_set=global_curator,\n            target_role=UserRole.GLOBAL_CURATOR,\n            user_performing_action=basic_user,\n        )\n\n    # Setting the role of a global curator should work for an admin user\n    UserManager.set_role(\n        user_to_set=global_curator,\n        target_role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=admin_user,\n    )\n    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)\n\n    # Setting the role of a global curator should not work for an invalid curator\n    with pytest.raises(HTTPError):\n        UserManager.set_role(\n            user_to_set=global_curator,\n            target_role=UserRole.BASIC,\n            user_performing_action=global_curator,\n        )\n    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)\n\n    # Creating a user group\n    user_group_1 = UserGroupManager.create(\n        name=\"user_group_1\",\n        user_ids=[],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # This should fail because the curator is not in the user group\n    with pytest.raises(HTTPError):\n        UserGroupManager.set_curator_status(\n            test_user_group=user_group_1,\n            user_to_set_as_curator=curator,\n            user_performing_action=admin_user,\n        )\n\n    # Adding the curator to the user group\n    user_group_1.user_ids = [curator.id]\n    UserGroupManager.edit(user_group=user_group_1, user_performing_action=admin_user)\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # This should work because the curator is in the user group\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/permissions/test_whole_curator_flow.py",
    "content": "\"\"\"\nThis test tests the happy path for curator permissions\n\"\"\"\n\nimport os\n\nimport pytest\n\nfrom onyx.db.enums import AccessType\nfrom onyx.db.models import UserRole\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.connector import ConnectorManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.user import DATestUser\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator tests are enterprise only\",\n)\ndef test_whole_curator_flow(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Creating a curator\n    curator: DATestUser = UserManager.create(name=\"curator\")\n\n    # Creating a user group\n    user_group_1 = UserGroupManager.create(\n        name=\"user_group_1\",\n        user_ids=[curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n    # Making curator a curator of user_group_1\n    UserGroupManager.set_curator_status(\n        test_user_group=user_group_1,\n        user_to_set_as_curator=curator,\n        user_performing_action=admin_user,\n    )\n    assert UserManager.is_role(curator, UserRole.CURATOR)\n\n    # Creating a credential as curator\n    test_credential = CredentialManager.create(\n        name=\"curator_test_credential\",\n        source=DocumentSource.FILE,\n        curator_public=False,\n        groups=[user_group_1.id],\n        user_performing_action=curator,\n    )\n\n    # Creating a connector as curator\n    test_connector = ConnectorManager.create(\n        name=\"curator_test_connector\",\n        source=DocumentSource.FILE,\n        access_type=AccessType.PRIVATE,\n        groups=[user_group_1.id],\n        user_performing_action=curator,\n    )\n\n    # Test editing the connector\n    test_connector.name = \"updated_test_connector\"\n    ConnectorManager.edit(connector=test_connector, user_performing_action=curator)\n\n    # Creating a CC pair as curator\n    test_cc_pair = CCPairManager.create(\n        connector_id=test_connector.id,\n        credential_id=test_credential.id,\n        name=\"curator_test_cc_pair\",\n        access_type=AccessType.PRIVATE,\n        groups=[user_group_1.id],\n        user_performing_action=curator,\n    )\n\n    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user)\n\n    # Verify that the curator can pause and unpause the CC pair\n    CCPairManager.pause_cc_pair(cc_pair=test_cc_pair, user_performing_action=curator)\n\n    # Verify that the curator can delete the CC pair\n    CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=curator)\n    CCPairManager.wait_for_deletion_completion(\n        cc_pair_id=test_cc_pair.id, user_performing_action=curator\n    )\n\n    # Verify that the CC pair has been deleted\n    CCPairManager.verify(\n        cc_pair=test_cc_pair,\n        verify_deleted=True,\n        user_performing_action=admin_user,\n    )\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Curator tests are enterprise only\",\n)\ndef test_global_curator_flow(reset: None) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    assert UserManager.is_role(admin_user, UserRole.ADMIN)\n\n    # Creating a user\n    global_curator: DATestUser = UserManager.create(name=\"global_curator\")\n    assert UserManager.is_role(global_curator, UserRole.BASIC)\n\n    # Set the user to a global curator\n    UserManager.set_role(\n        user_to_set=global_curator,\n        target_role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=admin_user,\n    )\n    assert UserManager.is_role(global_curator, UserRole.GLOBAL_CURATOR)\n\n    # Creating a user group containing the global curator\n    user_group_1 = UserGroupManager.create(\n        name=\"user_group_1\",\n        user_ids=[global_curator.id],\n        cc_pair_ids=[],\n        user_performing_action=admin_user,\n    )\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    # Creating a credential as global curator\n    test_credential = CredentialManager.create(\n        name=\"curator_test_credential\",\n        source=DocumentSource.FILE,\n        curator_public=False,\n        groups=[user_group_1.id],\n        user_performing_action=global_curator,\n    )\n\n    # Creating a connector as global curator\n    test_connector = ConnectorManager.create(\n        name=\"curator_test_connector\",\n        source=DocumentSource.FILE,\n        access_type=AccessType.PRIVATE,\n        groups=[user_group_1.id],\n        user_performing_action=global_curator,\n    )\n\n    # Test editing the connector\n    test_connector.name = \"updated_test_connector\"\n    ConnectorManager.edit(\n        connector=test_connector, user_performing_action=global_curator\n    )\n\n    # Creating a CC pair as global curator\n    test_cc_pair = CCPairManager.create(\n        connector_id=test_connector.id,\n        credential_id=test_credential.id,\n        name=\"curator_test_cc_pair\",\n        access_type=AccessType.PRIVATE,\n        groups=[user_group_1.id],\n        user_performing_action=global_curator,\n    )\n\n    CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user)\n\n    # Verify that the curator can pause and unpause the CC pair\n    CCPairManager.pause_cc_pair(\n        cc_pair=test_cc_pair, user_performing_action=global_curator\n    )\n\n    # Verify that the curator can delete the CC pair\n    CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=global_curator)\n    CCPairManager.wait_for_deletion_completion(\n        cc_pair_id=test_cc_pair.id, user_performing_action=global_curator\n    )\n\n    # Verify that the CC pair has been deleted\n    CCPairManager.verify(\n        cc_pair=test_cc_pair,\n        verify_deleted=True,\n        user_performing_action=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/personalization/test_personalization_flow.py",
    "content": "import requests\n\nfrom onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_auth_headers(user: DATestUser) -> tuple[dict, dict]:\n    return user.headers, {\n        FASTAPI_USERS_AUTH_COOKIE_NAME: user.cookies[FASTAPI_USERS_AUTH_COOKIE_NAME]\n    }\n\n\ndef _get_me(headers: dict, cookies: dict) -> dict:\n    response = requests.get(f\"{API_SERVER_URL}/me\", headers=headers, cookies=cookies)\n    response.raise_for_status()\n    return response.json()\n\n\ndef _patch_personalization(headers: dict, cookies: dict, payload: dict) -> None:\n    response = requests.patch(\n        f\"{API_SERVER_URL}/user/personalization\",\n        json=payload,\n        headers=headers,\n        cookies=cookies,\n    )\n    response.raise_for_status()\n\n\ndef test_personalization_round_trip(reset: None) -> None:  # noqa: ARG001\n    user = UserManager.create()\n    headers, cookies = _get_auth_headers(user)\n\n    # baseline should have empty personalization\n    me_initial = _get_me(headers, cookies)\n    assert me_initial[\"personalization\"][\"name\"] == \"\"\n    assert me_initial[\"personalization\"][\"role\"] == \"\"\n    assert me_initial[\"personalization\"][\"use_memories\"] is True\n    assert me_initial[\"personalization\"][\"enable_memory_tool\"] is True\n    assert me_initial[\"personalization\"][\"memories\"] == []\n\n    payload = {\n        \"name\": \"Jane Doe\",\n        \"role\": \"Developer advocate\",\n        \"use_memories\": True,\n        \"memories\": [\n            {\"content\": \"Loves peanut butter\"},\n            {\"content\": \"Prefers API docs\"},\n        ],\n    }\n\n    _patch_personalization(headers, cookies, payload)\n\n    me_after = _get_me(headers, cookies)\n    personalization = me_after[\"personalization\"]\n\n    assert personalization[\"name\"] == payload[\"name\"]\n    assert personalization[\"role\"] == payload[\"role\"]\n    assert personalization[\"use_memories\"] is True\n    returned_memories = personalization[\"memories\"]\n    assert len(returned_memories) == 2\n    for mem in returned_memories:\n        assert isinstance(mem[\"id\"], int)\n        assert isinstance(mem[\"content\"], str)\n    assert [m[\"content\"] for m in returned_memories] == [\n        \"Prefers API docs\",\n        \"Loves peanut butter\",\n    ]\n\n    # update memories to empty\n    payload[\"memories\"] = []\n    _patch_personalization(headers, cookies, payload)\n    me_final = _get_me(headers, cookies)\n    assert me_final[\"personalization\"][\"memories\"] == []\n\n\ndef test_enable_memory_tool_round_trip(reset: None) -> None:  # noqa: ARG001\n    user = UserManager.create()\n    headers, cookies = _get_auth_headers(user)\n\n    # default should be True\n    me_initial = _get_me(headers, cookies)\n    assert me_initial[\"personalization\"][\"enable_memory_tool\"] is True\n\n    # disable enable_memory_tool\n    _patch_personalization(headers, cookies, {\"enable_memory_tool\": False})\n    me_after = _get_me(headers, cookies)\n    assert me_after[\"personalization\"][\"enable_memory_tool\"] is False\n\n    # re-enable\n    _patch_personalization(headers, cookies, {\"enable_memory_tool\": True})\n    me_reenabled = _get_me(headers, cookies)\n    assert me_reenabled[\"personalization\"][\"enable_memory_tool\"] is True\n\n\ndef test_enable_memory_tool_independent_of_use_memories(\n    reset: None,  # noqa: ARG001\n) -> None:\n    user = UserManager.create()\n    headers, cookies = _get_auth_headers(user)\n\n    # set use_memories=False and enable_memory_tool=True simultaneously\n    _patch_personalization(\n        headers, cookies, {\"use_memories\": False, \"enable_memory_tool\": True}\n    )\n    me = _get_me(headers, cookies)\n    assert me[\"personalization\"][\"use_memories\"] is False\n    assert me[\"personalization\"][\"enable_memory_tool\"] is True\n\n    # reverse: use_memories=True and enable_memory_tool=False\n    _patch_personalization(\n        headers, cookies, {\"use_memories\": True, \"enable_memory_tool\": False}\n    )\n    me = _get_me(headers, cookies)\n    assert me[\"personalization\"][\"use_memories\"] is True\n    assert me[\"personalization\"][\"enable_memory_tool\"] is False\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_persona_categories.py",
    "content": "from uuid import uuid4\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom tests.integration.common_utils.managers.persona import (\n    PersonaLabelManager,\n)\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestPersonaLabel\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_persona_label_management(reset: None) -> None:  # noqa: ARG001\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    persona_label = DATestPersonaLabel(\n        id=None,\n        name=f\"Test label {uuid4()}\",\n    )\n    persona_label = PersonaLabelManager.create(\n        label=persona_label,\n        user_performing_action=admin_user,\n    )\n    print(f\"Created persona label {persona_label.name} with id {persona_label.id}\")\n\n    assert PersonaLabelManager.verify(\n        label=persona_label,\n        user_performing_action=admin_user,\n    ), \"Persona label was not found after creation\"\n\n    regular_user: DATestUser = UserManager.create(name=\"regular_user\")\n\n    updated_persona_label = DATestPersonaLabel(\n        id=persona_label.id,\n        name=f\"Updated {persona_label.name}\",\n    )\n    with pytest.raises(HTTPError) as exc_info:\n        PersonaLabelManager.update(\n            label=updated_persona_label,\n            user_performing_action=regular_user,\n        )\n    assert exc_info.value.response is not None\n    assert exc_info.value.response.status_code == 403\n\n    assert PersonaLabelManager.verify(\n        label=persona_label,\n        user_performing_action=admin_user,\n    ), \"Persona label should not have been updated by non-admin user\"\n\n    result = PersonaLabelManager.delete(\n        label=persona_label,\n        user_performing_action=regular_user,\n    )\n    assert (\n        result is False\n    ), \"Regular user should not be able to delete the persona label\"\n\n    assert PersonaLabelManager.verify(\n        label=persona_label,\n        user_performing_action=admin_user,\n    ), \"Persona label should not have been deleted by non-admin user\"\n\n    updated_persona_label.name = f\"Updated {persona_label.name}\"\n    updated_persona_label = PersonaLabelManager.update(\n        label=updated_persona_label,\n        user_performing_action=admin_user,\n    )\n    print(f\"Updated persona label to {updated_persona_label.name}\")\n\n    assert PersonaLabelManager.verify(\n        label=updated_persona_label,\n        user_performing_action=admin_user,\n    ), \"Persona label was not updated by admin\"\n\n    success = PersonaLabelManager.delete(\n        label=persona_label,\n        user_performing_action=admin_user,\n    )\n    assert success, \"Admin user should be able to delete the persona label\"\n    print(f\"Deleted persona label {persona_label.name} with id {persona_label.id}\")\n\n    assert not PersonaLabelManager.verify(\n        label=persona_label,\n        user_performing_action=admin_user,\n    ), \"Persona label should not exist after deletion by admin\"\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_persona_creation.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _list_minimal_personas(user: DATestUser) -> list[dict]:\n    response = requests.get(\n        f\"{API_SERVER_URL}/persona\",\n        headers=user.headers,\n        cookies=user.cookies,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _share_persona(\n    persona_id: int, user_ids: list[str], acting_user: DATestUser\n) -> None:\n    response = requests.patch(\n        f\"{API_SERVER_URL}/persona/{persona_id}/share\",\n        json={\"user_ids\": user_ids},\n        headers=acting_user.headers,\n        cookies=acting_user.cookies,\n    )\n    response.raise_for_status()\n\n\ndef test_persona_create_update_share_delete(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    basic_user: DATestUser,\n) -> None:\n    # TODO: refactor `PersonaManager.verify`, not a good pattern\n    # Create a persona as admin and verify it can be fetched\n    expected_persona = PersonaManager.create(user_performing_action=admin_user)\n    PersonaManager.verify(expected_persona, user_performing_action=admin_user)\n\n    # Update the persona and verify changes\n    updated_persona = PersonaManager.edit(\n        expected_persona,\n        name=f\"updated-{expected_persona.name}\",\n        description=f\"updated-{expected_persona.description}\",\n        is_public=False,\n        user_performing_action=admin_user,\n    )\n    assert PersonaManager.verify(updated_persona, user_performing_action=admin_user)\n\n    # Creator should see the persona in their minimal list\n    creator_minimals = _list_minimal_personas(admin_user)\n    assert any(p[\"id\"] == updated_persona.id for p in creator_minimals)\n\n    # Regular user should not see a non-public, non-shared persona\n    other_minimals_before = _list_minimal_personas(basic_user)\n    assert all(p[\"id\"] != updated_persona.id for p in other_minimals_before)\n\n    # Share persona with the regular user and verify visibility\n    _share_persona(updated_persona.id, [basic_user.id], admin_user)\n    other_minimals_after = _list_minimal_personas(basic_user)\n    assert any(p[\"id\"] == updated_persona.id for p in other_minimals_after)\n\n    # Delete persona and verify it no longer appears in lists\n    assert PersonaManager.delete(updated_persona, user_performing_action=admin_user)\n\n    # After deletion, list should not include it for either user\n    creator_minimals_after_delete = _list_minimal_personas(admin_user)\n    assert all(p[\"id\"] != updated_persona.id for p in creator_minimals_after_delete)\n\n    regular_minimals_after_delete = _list_minimal_personas(basic_user)\n    assert all(p[\"id\"] != updated_persona.id for p in regular_minimals_after_delete)\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_persona_file_context.py",
    "content": "\"\"\"\nIntegration tests for the unified persona file context flow.\n\nEnd-to-end tests that verify:\n1. Files can be uploaded and attached to a persona via API.\n2. The persona correctly reports its attached files.\n3. A chat session with a file-bearing persona processes without error.\n4. Precedence: custom persona files take priority over project files when\n   the chat session is inside a project.\n\nThese tests run against a real Onyx deployment (all services running).\nFile processing is asynchronous, so we poll the file status endpoint\nuntil files reach COMPLETED before chatting.\n\"\"\"\n\nimport time\n\nimport requests\n\nfrom onyx.db.enums import UserFileStatus\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import MAX_DELAY\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.project import ProjectManager\nfrom tests.integration.common_utils.test_file_utils import create_test_text_file\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\nFILE_PROCESSING_POLL_INTERVAL = 2\n\n\ndef _poll_file_statuses(\n    user_file_ids: list[str],\n    user: DATestUser,\n    target_status: UserFileStatus = UserFileStatus.COMPLETED,\n    timeout: int = MAX_DELAY,\n) -> None:\n    \"\"\"Block until all files reach the target status or timeout expires.\"\"\"\n    deadline = time.time() + timeout\n    while time.time() < deadline:\n        response = requests.post(\n            f\"{API_SERVER_URL}/user/projects/file/statuses\",\n            json={\"file_ids\": user_file_ids},\n            headers=user.headers,\n        )\n        response.raise_for_status()\n        statuses = response.json()\n        if all(f[\"status\"] == target_status.value for f in statuses):\n            return\n        time.sleep(FILE_PROCESSING_POLL_INTERVAL)\n    raise TimeoutError(\n        f\"Files {user_file_ids} did not reach {target_status.value} within {timeout}s\"\n    )\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\ndef test_persona_with_files_chat_no_error(\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"Upload files, attach them to a persona, wait for processing,\n    then send a chat message.  Verify no error is returned.\"\"\"\n\n    # Upload files (creates UserFile records)\n    text_file = create_test_text_file(\n        \"The secret project codename is NIGHTINGALE. It was started in 2024 by the Advanced Research division.\"\n    )\n    file_descriptors, error = FileManager.upload_files(\n        files=[(\"nightingale_brief.txt\", text_file)],\n        user_performing_action=admin_user,\n    )\n    assert not error, f\"File upload failed: {error}\"\n    assert len(file_descriptors) == 1\n\n    user_file_id = file_descriptors[0][\"user_file_id\"]\n    assert user_file_id is not None\n\n    # Wait for file processing\n    _poll_file_statuses([user_file_id], admin_user, timeout=120)\n\n    # Create persona with the file attached\n    persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Nightingale Agent\",\n        description=\"Agent with secret file\",\n        system_prompt=\"You are a helpful assistant with access to uploaded files.\",\n        user_file_ids=[user_file_id],\n    )\n\n    # Verify persona has the file\n    persona_snapshots = PersonaManager.get_one(persona.id, admin_user)\n    assert len(persona_snapshots) == 1\n    assert user_file_id in persona_snapshots[0].user_file_ids\n\n    # Chat with the persona\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id,\n        description=\"Test persona file context\",\n        user_performing_action=admin_user,\n    )\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What is the secret project codename?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None, f\"Chat should succeed, got error: {response.error}\"\n    assert len(response.full_message) > 0, \"Response should not be empty\"\n\n\ndef test_persona_without_files_still_works(\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"A persona with no attached files should still chat normally.\"\"\"\n    persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Blank Agent\",\n        description=\"No files attached\",\n        system_prompt=\"You are a helpful assistant.\",\n    )\n\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id,\n        description=\"Test blank persona\",\n        user_performing_action=admin_user,\n    )\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"Hello, how are you?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None\n    assert len(response.full_message) > 0\n\n\ndef test_persona_files_override_project_files(\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"When a custom persona (with its own files) is used inside a project,\n    the persona's files take precedence — the project's files are invisible.\n\n    We verify this by putting different content in project vs persona files\n    and checking which content the model responds with.\"\"\"\n\n    # Upload persona file\n    persona_file = create_test_text_file(\"The persona's secret word is ALBATROSS.\")\n    persona_fds, err1 = FileManager.upload_files(\n        files=[(\"persona_secret.txt\", persona_file)],\n        user_performing_action=admin_user,\n    )\n    assert not err1\n    persona_user_file_id = persona_fds[0][\"user_file_id\"]\n    assert persona_user_file_id is not None\n    # Create a project and upload project files\n    project = ProjectManager.create(\n        name=\"Precedence Test Project\",\n        user_performing_action=admin_user,\n    )\n    project_files = [\n        (\"project_secret.txt\", b\"The project's secret word is FLAMINGO.\"),\n    ]\n    project_upload_result = ProjectManager.upload_files(\n        project_id=project.id,\n        files=project_files,\n        user_performing_action=admin_user,\n    )\n    assert len(project_upload_result.user_files) == 1\n    project_user_file_id = str(project_upload_result.user_files[0].id)\n\n    # Wait for both persona and project file processing\n    _poll_file_statuses([persona_user_file_id], admin_user, timeout=120)\n    _poll_file_statuses([project_user_file_id], admin_user, timeout=120)\n\n    # Create persona with persona file\n    persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"Override Agent\",\n        description=\"Persona with its own files\",\n        system_prompt=\"You are a helpful assistant. Answer using the files.\",\n        user_file_ids=[persona_user_file_id],\n    )\n\n    # Create chat session inside the project but using the custom persona\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id,\n        project_id=project.id,\n        user_performing_action=admin_user,\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What is the secret word?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None, f\"Chat should succeed, got error: {response.error}\"\n    # The persona's file should be what the model sees, not the project's\n    message_lower = response.full_message.lower()\n    assert (\n        \"albatross\" in message_lower\n    ), f\"Response should reference the persona file's secret word (ALBATROSS), but got: {response.full_message}\"\n\n\ndef test_default_persona_in_project_uses_project_files(\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"When the default persona (id=0) is used inside a project,\n    the project's files should be used for context.\"\"\"\n    project = ProjectManager.create(\n        name=\"Default Persona Project\",\n        user_performing_action=admin_user,\n    )\n    project_files = [\n        (\"project_info.txt\", b\"The project mascot is a PANGOLIN.\"),\n    ]\n    upload_result = ProjectManager.upload_files(\n        project_id=project.id,\n        files=project_files,\n        user_performing_action=admin_user,\n    )\n    assert len(upload_result.user_files) == 1\n\n    # Wait for project file processing\n    project_file_id = str(upload_result.user_files[0].id)\n    _poll_file_statuses([project_file_id], admin_user, timeout=120)\n\n    # Create chat session inside project using default persona (id=0)\n    chat_session = ChatSessionManager.create(\n        persona_id=0,\n        project_id=project.id,\n        user_performing_action=admin_user,\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What is the project mascot?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None\n    assert (\n        \"pangolin\" in response.full_message.lower()\n    ), f\"Response should reference the project file content (PANGOLIN), but got: {response.full_message}\"\n\n\ndef test_custom_persona_no_files_in_project_ignores_project(\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"A custom persona with NO files, used inside a project with files,\n    should NOT see the project's files.  The project is purely organizational.\n\n    We verify by asking about content only in the project file and checking\n    the model does NOT reference it.\"\"\"\n\n    project = ProjectManager.create(\n        name=\"Ignored Project\",\n        user_performing_action=admin_user,\n    )\n    project_upload_result = ProjectManager.upload_files(\n        project_id=project.id,\n        files=[(\"project_only.txt\", b\"The project secret is CAPYBARA.\")],\n        user_performing_action=admin_user,\n    )\n    assert len(project_upload_result.user_files) == 1\n    project_user_file_id = str(project_upload_result.user_files[0].id)\n\n    # Wait for project file processing\n    _poll_file_statuses([project_user_file_id], admin_user, timeout=120)\n\n    # Custom persona with no files\n    persona = PersonaManager.create(\n        user_performing_action=admin_user,\n        name=\"No Files Agent\",\n        description=\"No files, project is irrelevant\",\n        system_prompt=(\n            \"You are a helpful assistant. If you do not have information \"\n            \"to answer a question, say 'I do not have that information.'\"\n        ),\n    )\n\n    chat_session = ChatSessionManager.create(\n        persona_id=persona.id,\n        project_id=project.id,\n        user_performing_action=admin_user,\n    )\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What is the project secret?\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None\n    assert len(response.full_message) > 0\n    assert \"capybara\" not in response.full_message.lower(), (\n        \"Response should NOT reference the project file content (CAPYBARA) \"\n        \"because the custom persona has no files and should not inherit \"\n        f\"project files, but got: {response.full_message}\"\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_persona_label_updates.py",
    "content": "from uuid import uuid4\n\nimport requests\n\nfrom onyx.server.features.persona.models import PersonaUpsertRequest\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.persona import PersonaLabelManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.test_models import DATestPersonaLabel\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_update_persona_with_null_label_ids_preserves_labels(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    persona_label = PersonaLabelManager.create(\n        label=DATestPersonaLabel(name=f\"Test label {uuid4()}\"),\n        user_performing_action=admin_user,\n    )\n    assert persona_label.id is not None\n    persona = PersonaManager.create(\n        label_ids=[persona_label.id],\n        user_performing_action=admin_user,\n    )\n\n    updated_description = f\"{persona.description}-updated\"\n    update_request = PersonaUpsertRequest(\n        name=persona.name,\n        description=updated_description,\n        system_prompt=persona.system_prompt or \"\",\n        task_prompt=persona.task_prompt or \"\",\n        datetime_aware=persona.datetime_aware,\n        document_set_ids=persona.document_set_ids,\n        is_public=persona.is_public,\n        llm_model_provider_override=persona.llm_model_provider_override,\n        llm_model_version_override=persona.llm_model_version_override,\n        tool_ids=persona.tool_ids,\n        users=[],\n        groups=[],\n        label_ids=None,\n    )\n\n    response = requests.patch(\n        f\"{API_SERVER_URL}/persona/{persona.id}\",\n        json=update_request.model_dump(mode=\"json\", exclude_none=False),\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    response.raise_for_status()\n\n    fetched = requests.get(\n        f\"{API_SERVER_URL}/persona/{persona.id}\",\n        headers=admin_user.headers,\n        cookies=admin_user.cookies,\n    )\n    fetched.raise_for_status()\n    fetched_persona = fetched.json()\n\n    assert fetched_persona[\"description\"] == updated_description\n    fetched_label_ids = {label[\"id\"] for label in fetched_persona[\"labels\"]}\n    assert persona_label.id in fetched_label_ids\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_persona_pagination.py",
    "content": "import requests\n\nfrom onyx.server.features.persona.constants import ADMIN_AGENTS_RESOURCE\nfrom onyx.server.features.persona.constants import AGENTS_RESOURCE\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_agents_paginated(\n    user: DATestUser,\n    page_num: int,\n    page_size: int,\n    include_deleted: bool = False,\n    get_editable: bool = False,\n    include_default: bool = True,\n) -> tuple[dict, int]:\n    \"\"\"Fetches a paginated page of agents, with status code.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}{AGENTS_RESOURCE}\",\n        params={\n            \"page_num\": page_num,\n            \"page_size\": page_size,\n            \"include_deleted\": include_deleted,\n            \"get_editable\": get_editable,\n            \"include_default\": include_default,\n        },\n        headers=user.headers,\n        cookies=user.cookies,\n    )\n    return response.json(), response.status_code\n\n\ndef _get_agents_admin_paginated(\n    user: DATestUser,\n    page_num: int,\n    page_size: int,\n    include_deleted: bool = False,\n    get_editable: bool = False,\n    include_default: bool = True,\n) -> tuple[dict, int]:\n    \"\"\"Fetches a paginated page of agents (admin endpoint) with status code.\"\"\"\n    response = requests.get(\n        f\"{API_SERVER_URL}{ADMIN_AGENTS_RESOURCE}\",\n        params={\n            \"page_num\": page_num,\n            \"page_size\": page_size,\n            \"include_deleted\": include_deleted,\n            \"get_editable\": get_editable,\n            \"include_default\": include_default,\n        },\n        headers=user.headers,\n        cookies=user.cookies,\n    )\n    response.raise_for_status()\n    return response.json(), response.status_code\n\n\ndef test_persona_pagination_basic(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test basic pagination - verify correct items and total count.\"\"\"\n    # Preconditions\n    personas_to_create = 25\n    personas = []\n    for i in range(personas_to_create):\n        persona = PersonaManager.create(\n            name=f\"Test Persona {i}\",\n            user_performing_action=admin_user,\n        )\n        personas.append(persona)\n\n    # Under test and postconditions\n    # Test page 0 with size 10.\n    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=10)\n    assert \"items\" in page_0\n    assert \"total_items\" in page_0\n    assert len(page_0[\"items\"]) == 10\n    assert (\n        page_0[\"total_items\"] >= personas_to_create\n    )  # At least personas_to_create (may have default personas)\n\n    # Test page 2 with size 10 (should have 5+ items if only our test personas\n    # exist).\n    page_2, _ = _get_agents_paginated(admin_user, page_num=2, page_size=10)\n    assert len(page_2[\"items\"]) >= 5\n    assert page_2[\"total_items\"] >= personas_to_create\n\n    # Test page beyond end (page 10 with size 10, offset 100).\n    page_beyond, _ = _get_agents_paginated(admin_user, page_num=10, page_size=10)\n    assert len(page_beyond[\"items\"]) == 0\n    assert page_beyond[\"total_items\"] >= personas_to_create  # Total doesn't change.\n\n\ndef test_persona_pagination_ordering(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test ordering - display_priority ASC nulls last, then ID ASC.\"\"\"\n    # Preconditions\n    # Create personas with specific display_priority values.\n    persona_a = PersonaManager.create(\n        name=\"Persona A\",\n        description=\"This should be second\",\n        user_performing_action=admin_user,\n        display_priority=2,\n    )\n    persona_b = PersonaManager.create(\n        name=\"Persona B\",\n        description=\"This should be first\",\n        user_performing_action=admin_user,\n        display_priority=1,\n    )\n    persona_c = PersonaManager.create(\n        name=\"Persona C\",\n        description=\"This should be third\",\n        user_performing_action=admin_user,\n        display_priority=3,\n    )\n    persona_d = PersonaManager.create(\n        name=\"Persona D\",\n        description=\"This should be fourth\",\n        user_performing_action=admin_user,\n        display_priority=3,  # Note the same prio as above, should sort by id\n    )\n\n    # Under test\n    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=100)\n\n    # Postconditions\n    # Find our personas in the results.\n    our_expected_ordered_persona_ids = [\n        persona_b.id,\n        persona_a.id,\n        persona_c.id,\n        persona_d.id,\n    ]\n    our_personas_in_results = [\n        p for p in page_0[\"items\"] if p[\"id\"] in our_expected_ordered_persona_ids\n    ]\n    assert len(our_personas_in_results) == 4\n    # Verify ordering.\n    for i in range(len(our_expected_ordered_persona_ids)):\n        assert our_expected_ordered_persona_ids[i] == our_personas_in_results[i][\"id\"]\n\n\ndef test_persona_pagination_admin_endpoint(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test admin paginated endpoint returns PersonaSnapshot format.\"\"\"\n    # Preconditions\n    personas_to_create = 5\n    for i in range(personas_to_create):\n        PersonaManager.create(\n            name=f\"Admin Test Persona {i}\",\n            user_performing_action=admin_user,\n        )\n\n    # Under test\n    page_0, _ = _get_agents_admin_paginated(admin_user, page_num=0, page_size=10)\n\n    # Postconditions\n    assert \"items\" in page_0\n    assert \"total_items\" in page_0\n    assert len(page_0[\"items\"]) >= personas_to_create\n    assert page_0[\"total_items\"] >= personas_to_create\n    # Verify admin-specific fields are present (PersonaSnapshot has more\n    # fields).\n    first_persona = page_0[\"items\"][0]\n    # PersonaSnapshot should have these fields that MinimalPersonaSnapshot\n    # doesn't.\n    assert \"users\" in first_persona\n    assert \"groups\" in first_persona\n    assert \"user_file_ids\" in first_persona\n\n\ndef test_persona_pagination_with_deleted(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test pagination with include_deleted parameter.\"\"\"\n    # Preconditions\n    # Create and delete a persona.\n    persona = PersonaManager.create(\n        name=\"To Be Deleted\",\n        user_performing_action=admin_user,\n    )\n    PersonaManager.delete(persona, user_performing_action=admin_user)\n\n    # Under test and postconditions\n    # Without include_deleted, should not appear.\n    page_without_deleted, _ = _get_agents_paginated(\n        admin_user, page_num=0, page_size=100, include_deleted=False\n    )\n    persona_ids_without_deleted = [p[\"id\"] for p in page_without_deleted[\"items\"]]\n    assert persona.id not in persona_ids_without_deleted\n\n    # With include_deleted, should appear.\n    page_with_deleted, _ = _get_agents_paginated(\n        admin_user, page_num=0, page_size=100, include_deleted=True\n    )\n    persona_ids_with_deleted = [p[\"id\"] for p in page_with_deleted[\"items\"]]\n    assert persona.id in persona_ids_with_deleted\n\n    # Total counts should differ.\n    assert page_with_deleted[\"total_items\"] > page_without_deleted[\"total_items\"]\n\n\ndef test_persona_pagination_page_size_limits(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test page_size parameter validation (max 1000).\"\"\"\n    # Preconditions\n    # Create a few personas.\n    for i in range(5):\n        PersonaManager.create(\n            name=f\"Size Limit Test {i}\",\n            user_performing_action=admin_user,\n        )\n\n    # Under test and postconditions\n    # Valid page_size of 1\n    data, _ = _get_agents_paginated(admin_user, page_num=0, page_size=1)\n    assert len(data[\"items\"]) <= 1\n\n    # Valid page_size of 1000\n    data, _ = _get_agents_paginated(admin_user, page_num=0, page_size=1000)\n    # We assume not that many default personas are made.\n    assert len(data[\"items\"]) == data[\"total_items\"]\n\n    # Invalid page_size of 1001 (exceeds max)\n    _, status_code = _get_agents_paginated(admin_user, page_num=0, page_size=1001)\n    assert status_code == 422  # Validation error\n\n    # Invalid page_size of 0\n    _, status_code = _get_agents_paginated(admin_user, page_num=0, page_size=0)\n    assert status_code == 422  # Validation error\n\n\ndef test_persona_pagination_count_accuracy(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Test that total_items count is consistent across pages.\"\"\"\n    # Preconditions\n    # Create 15 personas.\n    created_personas = []\n    for i in range(15):\n        persona = PersonaManager.create(\n            name=f\"Count Test {i}\",\n            user_performing_action=admin_user,\n        )\n        created_personas.append(persona)\n\n    # Under test and postconditions\n    # Fetch first page to get total count.\n    page_0, _ = _get_agents_paginated(admin_user, page_num=0, page_size=5)\n    total_items = page_0[\"total_items\"]\n    assert total_items >= 15\n\n    # Fetch all pages to cover all personas.\n    all_ids_from_pages: set[int] = set()\n    num_pages_needed = (total_items + 4) // 5  # Ceiling division\n    for page_num in range(num_pages_needed):\n        page, _ = _get_agents_paginated(admin_user, page_num=page_num, page_size=5)\n        # All pages should report the same total.\n        assert (\n            page[\"total_items\"] == total_items\n        ), f\"Page {page_num} has inconsistent total_items\"\n        all_ids_from_pages.update(p[\"id\"] for p in page[\"items\"])\n\n    # Our created personas should all appear.\n    our_ids = {p.id for p in created_personas}\n    assert our_ids.issubset(\n        all_ids_from_pages\n    ), \"All created personas should appear in paginated results\"\n\n\ndef test_persona_pagination_user_permissions(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    basic_user: DATestUser,\n) -> None:\n    \"\"\"Test that pagination respects user permissions.\"\"\"\n    # Preconditions\n    # Admin creates a private persona (not shared).\n    private_persona = PersonaManager.create(\n        name=\"Private Persona\",\n        description=\"Not shared\",\n        is_public=False,\n        user_performing_action=admin_user,\n    )\n    # Admin creates a public persona.\n    public_persona = PersonaManager.create(\n        name=\"Public Persona\",\n        description=\"Shared with all\",\n        is_public=True,\n        user_performing_action=admin_user,\n    )\n\n    # Under test and postconditions\n    # Admin should see both in paginated results.\n    admin_page, _ = _get_agents_paginated(admin_user, page_num=0, page_size=100)\n    admin_ids = {p[\"id\"] for p in admin_page[\"items\"]}\n    assert private_persona.id in admin_ids\n    assert public_persona.id in admin_ids\n\n    # Basic user should only see public persona.\n    user_page, _ = _get_agents_paginated(basic_user, page_num=0, page_size=100)\n    user_ids = {p[\"id\"] for p in user_page[\"items\"]}\n    assert private_persona.id not in user_ids\n    assert public_persona.id in user_ids\n\n    # Totals should differ.\n    assert admin_page[\"total_items\"] > user_page[\"total_items\"]\n"
  },
  {
    "path": "backend/tests/integration/tests/personas/test_unified_assistant.py",
    "content": "\"\"\"Integration tests for the unified assistant.\"\"\"\n\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_unified_assistant(\n    reset: None, admin_user: DATestUser  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Combined test verifying unified assistant existence, tools, and starter messages.\"\"\"\n    # Fetch all personas\n    personas = PersonaManager.get_all(admin_user)\n\n    # Find the unified assistant (ID 0)\n    unified_assistant = None\n    for persona in personas:\n        if persona.id == 0:\n            unified_assistant = persona\n            break\n\n    # Assert that there are no other assistants (personas) besides the unified assistant\n    # (ID 0)\n    assert (\n        len(personas) == 1\n    ), f\"Expected only the unified assistant, found {len(personas)} personas\"\n\n    # Verify the unified assistant exists\n    assert unified_assistant is not None, \"Unified assistant (ID 0) not found\"\n\n    # Verify basic properties\n    assert unified_assistant.name == \"Assistant\"\n    assert (\n        \"search, web browsing, and image generation\"\n        in unified_assistant.description.lower()\n    )\n    assert unified_assistant.is_featured is True\n    assert unified_assistant.is_listed is True\n\n    # Verify tools\n    tools = unified_assistant.tools\n    tool_names = [tool.name for tool in tools]\n    assert \"internal_search\" in tool_names, \"SearchTool not found in unified assistant\"\n    assert (\n        \"generate_image\" in tool_names\n    ), \"ImageGenerationTool not found in unified assistant\"\n    assert \"web_search\" in tool_names, \"WebSearchTool not found in unified assistant\"\n\n    # Verify no starter messages\n    starter_messages = unified_assistant.starter_messages or []\n    assert len(starter_messages) == 0, \"Starter messages found\"\n"
  },
  {
    "path": "backend/tests/integration/tests/projects/test_projects.py",
    "content": "from typing import List\n\nimport pytest\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import UserFile\nfrom onyx.server.features.projects.models import UserProjectSnapshot\nfrom tests.integration.common_utils.managers.project import ProjectManager\nfrom tests.integration.common_utils.reset import reset_all\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef reset_for_module() -> None:\n    \"\"\"Reset all data once before running any tests in this module.\"\"\"\n    reset_all()\n\n\ndef test_projects_flow(\n    reset_for_module: None,  # noqa: ARG001\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n) -> None:\n    \"\"\"End-to-end project flow covering creation, listing, files, instructions, deletion, and edge cases.\"\"\"\n    # Case 1: Project creation and listing\n    ProjectManager.create(\n        name=\"Test Project 1\",\n        user_performing_action=basic_user,\n    )\n    ProjectManager.create(\n        name=\"Test Project 2\",\n        user_performing_action=basic_user,\n    )\n\n    projects = ProjectManager.get_all(user_performing_action=basic_user)\n    assert len(projects) >= 2\n    project_names = {p.name for p in projects}\n    assert \"Test Project 1\" in project_names\n    assert \"Test Project 2\" in project_names\n    assert all(str(p.user_id) == basic_user.id for p in projects)\n\n    # Case 2: File upload and management\n    file_project = ProjectManager.create(\n        name=\"File Test Project\",\n        user_performing_action=basic_user,\n    )\n    test_files = [\n        (\"test1.txt\", b\"This is test file 1 content\"),\n        (\"test2.txt\", b\"This is test file 2 content\"),\n    ]\n    upload_result = ProjectManager.upload_files(\n        project_id=file_project.id,\n        files=test_files,\n        user_performing_action=basic_user,\n    )\n    assert len(upload_result.user_files) == 2\n    assert len(upload_result.rejected_files) == 0\n    project_files = ProjectManager.get_project_files(\n        project_id=file_project.id,\n        user_performing_action=basic_user,\n    )\n    assert len(project_files) == 2\n    file_names = {f.name for f in project_files}\n    assert \"test1.txt\" in file_names\n    assert \"test2.txt\" in file_names\n\n    # Case 3: Instructions set and update\n    instructions_project = ProjectManager.create(\n        name=\"Instructions Test Project\",\n        user_performing_action=basic_user,\n    )\n    instructions = \"These are test project instructions\"\n    result = ProjectManager.set_instructions(\n        project_id=instructions_project.id,\n        instructions=instructions,\n        user_performing_action=basic_user,\n    )\n    assert result == instructions\n    new_instructions = \"These are updated test project instructions\"\n    result = ProjectManager.set_instructions(\n        project_id=instructions_project.id,\n        instructions=new_instructions,\n        user_performing_action=basic_user,\n    )\n    assert result == new_instructions\n\n    # Case 4: Deletion with files (unlink but do not delete files)\n    delete_file_project = ProjectManager.create(\n        name=\"Deletion Test Project\",\n        user_performing_action=basic_user,\n    )\n    del_test_files = [\n        (\"delete_test1.txt\", b\"This is test file 1 content\"),\n        (\"delete_test2.txt\", b\"This is test file 2 content\"),\n    ]\n    ProjectManager.upload_files(\n        project_id=delete_file_project.id,\n        files=del_test_files,\n        user_performing_action=basic_user,\n    )\n    del_project_files = ProjectManager.get_project_files(\n        project_id=delete_file_project.id,\n        user_performing_action=basic_user,\n    )\n    assert len(del_project_files) == 2\n    deletion_success = ProjectManager.delete(\n        project_id=delete_file_project.id,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success\n    assert ProjectManager.verify_deleted(\n        project_id=delete_file_project.id,\n        user_performing_action=basic_user,\n    )\n    assert ProjectManager.verify_files_unlinked(\n        project_id=delete_file_project.id,\n        user_performing_action=basic_user,\n    )\n    with get_session_with_current_tenant() as db_session:\n        file_ids = [f.id for f in del_project_files]\n        remaining_files = (\n            db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()\n        )\n        assert len(remaining_files) == 2\n\n    # Case 5: Deletion with chat sessions unlinked\n    chat_project = ProjectManager.create(\n        name=\"Chat Session Test Project\",\n        user_performing_action=basic_user,\n    )\n    deletion_success = ProjectManager.delete(\n        project_id=chat_project.id,\n        user_performing_action=basic_user,\n    )\n    assert deletion_success\n    assert ProjectManager.verify_chat_sessions_unlinked(\n        project_id=chat_project.id,\n        user_performing_action=basic_user,\n    )\n\n    # Case 6: Multiple project operations\n    projects_group: List[UserProjectSnapshot] = []\n    for i in range(3):\n        proj = ProjectManager.create(\n            name=f\"Multi-op Project {i}\",\n            user_performing_action=basic_user,\n        )\n        projects_group.append(proj)\n\n    for i, proj in enumerate(projects_group):\n        tfiles = [\n            (f\"multi_test{i}_1.txt\", b\"This is test file 1 content\"),\n            (f\"multi_test{i}_2.txt\", b\"This is test file 2 content\"),\n        ]\n        ProjectManager.upload_files(\n            project_id=proj.id,\n            files=tfiles,\n            user_performing_action=basic_user,\n        )\n\n    for i, proj in enumerate(projects_group):\n        instr = f\"Instructions for project {i}\"\n        res = ProjectManager.set_instructions(\n            project_id=proj.id,\n            instructions=instr,\n            user_performing_action=basic_user,\n        )\n        assert res == instr\n\n    for proj in projects_group:\n        proj_files = ProjectManager.get_project_files(\n            project_id=proj.id,\n            user_performing_action=basic_user,\n        )\n        assert len(proj_files) == 2\n        deletion_success = ProjectManager.delete(\n            project_id=proj.id,\n            user_performing_action=basic_user,\n        )\n        assert deletion_success\n        assert ProjectManager.verify_deleted(\n            project_id=proj.id,\n            user_performing_action=basic_user,\n        )\n        assert ProjectManager.verify_files_unlinked(\n            project_id=proj.id,\n            user_performing_action=basic_user,\n        )\n        with get_session_with_current_tenant() as db_session:\n            file_ids = [f.id for f in proj_files]\n            remaining_files = (\n                db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()\n            )\n            assert len(remaining_files) == 2\n\n    # Case 7: Edge cases\n    with pytest.raises(Exception):\n        ProjectManager.create(\n            name=\"\",\n            user_performing_action=basic_user,\n        )\n\n    non_existent_id = 99999\n    deletion_success = ProjectManager.delete(\n        project_id=non_existent_id,\n        user_performing_action=basic_user,\n    )\n    assert not deletion_success\n\n    with pytest.raises(Exception):\n        ProjectManager.set_instructions(\n            project_id=non_existent_id,\n            instructions=\"Test instructions\",\n            user_performing_action=basic_user,\n        )\n\n    with pytest.raises(Exception):\n        ProjectManager.upload_files(\n            project_id=non_existent_id,\n            files=[(\"test.txt\", b\"content\")],\n            user_performing_action=basic_user,\n        )\n\n    long_name = \"a\" * 1000\n    with pytest.raises(Exception):\n        ProjectManager.create(\n            name=long_name,\n            user_performing_action=basic_user,\n        )\n\n    long_instr_project = ProjectManager.create(\n        name=\"Long Instructions Test\",\n        user_performing_action=basic_user,\n    )\n    long_instructions = \"a\" * 10000\n    result = ProjectManager.set_instructions(\n        project_id=long_instr_project.id,\n        instructions=long_instructions,\n        user_performing_action=basic_user,\n    )\n    assert result == long_instructions\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/test_pruning.py",
    "content": "import http.server\nimport os\nimport shutil\nimport tempfile\nimport threading\nfrom collections.abc import Generator\nfrom contextlib import contextmanager\nfrom datetime import datetime\nfrom datetime import timezone\nfrom time import sleep\nfrom typing import Any\n\nimport uvicorn\nfrom fastapi import FastAPI\nfrom fastapi.staticfiles import StaticFiles\n\nfrom onyx.server.documents.models import DocumentSource\nfrom onyx.utils.logger import setup_logger\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\nlogger = setup_logger()\n\n\n# FastAPI server for serving files\ndef create_fastapi_app(directory: str) -> FastAPI:\n    app = FastAPI()\n\n    # Mount the directory to serve static files\n    app.mount(\"/\", StaticFiles(directory=directory, html=True), name=\"static\")\n\n    return app\n\n\n# as far as we know, this doesn't hang when crawled. This is good.\n@contextmanager\ndef fastapi_server_context(\n    directory: str, port: int = 8000\n) -> Generator[None, None, None]:\n    app = create_fastapi_app(directory)\n\n    config = uvicorn.Config(app=app, host=\"0.0.0.0\", port=port, log_level=\"info\")\n    server = uvicorn.Server(config)\n\n    # Create a thread to run the FastAPI server\n    server_thread = threading.Thread(target=server.run)\n    server_thread.daemon = (\n        True  # Ensures the thread will exit when the main program exits\n    )\n\n    try:\n        # Start the server in the background\n        server_thread.start()\n        sleep(5)  # Give it a few seconds to start\n        yield  # Yield control back to the calling function (context manager in use)\n    finally:\n        # Shutdown the server\n        server.should_exit = True\n        server_thread.join()\n\n\n# Leaving this here for posterity and experimentation, but the reason we're\n# not using this is python's web servers hang frequently when crawled\n# this is obviously not good for a unit test\n@contextmanager\ndef http_server_context(\n    directory: str, port: int = 8000\n) -> Generator[http.server.ThreadingHTTPServer, None, None]:\n    # Create a handler that serves files from the specified directory\n    def handler_class(\n        *args: Any, **kwargs: Any\n    ) -> http.server.SimpleHTTPRequestHandler:\n        return http.server.SimpleHTTPRequestHandler(\n            *args, directory=directory, **kwargs\n        )\n\n    # Create an HTTPServer instance\n    httpd = http.server.ThreadingHTTPServer((\"0.0.0.0\", port), handler_class)\n\n    # Define a thread that runs the server in the background\n    server_thread = threading.Thread(target=httpd.serve_forever)\n    server_thread.daemon = (\n        True  # Ensures the thread will exit when the main program exits\n    )\n\n    try:\n        # Start the server in the background\n        server_thread.start()\n        sleep(5)  # give it a few seconds to start\n        yield httpd\n    finally:\n        # Shutdown the server and wait for the thread to finish\n        httpd.shutdown()\n        httpd.server_close()\n        server_thread.join()\n\n\ndef test_web_pruning(\n    reset: None, vespa_client: vespa_fixture  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # add api key to user\n    APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    test_filename = os.path.realpath(__file__)\n    test_directory = os.path.dirname(test_filename)\n    with tempfile.TemporaryDirectory() as temp_dir:\n        port = 8889\n\n        website_src = os.path.join(test_directory, \"website\")\n        website_tgt = os.path.join(temp_dir, \"website\")\n        shutil.copytree(website_src, website_tgt)\n        with fastapi_server_context(os.path.join(temp_dir, \"website\"), port):\n            sleep(1)  # sleep a tiny bit before starting everything\n\n            hostname = os.getenv(\"TEST_WEB_HOSTNAME\", \"localhost\")\n            config = {\n                \"base_url\": f\"http://{hostname}:{port}/\",\n                \"web_connector_type\": \"recursive\",\n            }\n\n            # store the time before we create the connector so that we know after\n            # when the indexing should have started\n            now = datetime.now(timezone.utc)\n\n            # create connector\n            cc_pair_1 = CCPairManager.create_from_scratch(\n                source=DocumentSource.WEB,\n                connector_specific_config=config,\n                user_performing_action=admin_user,\n            )\n\n            CCPairManager.wait_for_indexing_completion(\n                cc_pair_1, now, timeout=300, user_performing_action=admin_user\n            )\n\n            selected_cc_pair = CCPairManager.get_indexing_status_by_id(\n                cc_pair_1.id, user_performing_action=admin_user\n            )\n\n            assert selected_cc_pair is not None, \"cc_pair not found after indexing!\"\n\n            # used to be 15, but now\n            # localhost:8889/ and localhost:8889/index.html are deduped\n            assert selected_cc_pair.docs_indexed == 14\n\n            logger.info(\"Removing about.html.\")\n            os.remove(os.path.join(website_tgt, \"about.html\"))\n            logger.info(\"Removing courses.html.\")\n            os.remove(os.path.join(website_tgt, \"courses.html\"))\n\n            now = datetime.now(timezone.utc)\n            CCPairManager.prune(cc_pair_1, user_performing_action=admin_user)\n            CCPairManager.wait_for_prune(\n                cc_pair_1, now, timeout=300, user_performing_action=admin_user\n            )\n\n            selected_cc_pair = CCPairManager.get_indexing_status_by_id(\n                cc_pair_1.id, user_performing_action=admin_user\n            )\n            assert selected_cc_pair is not None, \"cc_pair not found after pruning!\"\n            assert selected_cc_pair.docs_indexed == 12\n\n            # check vespa\n            root_id = f\"http://{hostname}:{port}/\"\n            index_id = f\"http://{hostname}:{port}/index.html\"\n            about_id = f\"http://{hostname}:{port}/about.html\"\n            courses_id = f\"http://{hostname}:{port}/courses.html\"\n\n            doc_ids = [root_id, index_id, about_id, courses_id]\n            retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)[\"documents\"]\n            retrieved_docs = {\n                doc[\"fields\"][\"document_id\"]: doc[\"fields\"]\n                for doc in retrieved_docs_dict\n            }\n\n            # verify root exists in Vespa\n            retrieved_doc = retrieved_docs.get(root_id)\n            assert retrieved_doc\n\n            # verify index.html does not exist in Vespa since it is a duplicate of root\n            retrieved_doc = retrieved_docs.get(index_id)\n            assert not retrieved_doc\n\n            # verify about and courses do not exist\n            retrieved_doc = retrieved_docs.get(about_id)\n            assert not retrieved_doc\n\n            retrieved_doc = retrieved_docs.get(courses_id)\n            assert not retrieved_doc\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/about.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta name=\"description\" content=\"\" />\n    <meta name=\"author\" content=\"http://webthemez.com\" />\n    <!-- css -->\n    <link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n    <link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\" />\n    <link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n    <link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n    <link href=\"css/style.css\" rel=\"stylesheet\" />\n\n    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n    <!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n  </head>\n  <body>\n    <div id=\"wrapper\">\n      <!-- start header -->\n      <header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n          <div class=\"container\">\n            <div class=\"navbar-header\">\n              <button\n                type=\"button\"\n                class=\"navbar-toggle\"\n                data-toggle=\"collapse\"\n                data-target=\".navbar-collapse\"\n              >\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n              </button>\n              <a class=\"navbar-brand\" href=\"index.html\"\n                ><img src=\"img/logo.png\" alt=\"logo\"\n              /></a>\n            </div>\n            <div class=\"navbar-collapse collapse\">\n              <ul class=\"nav navbar-nav\">\n                <li><a href=\"index.html\">Home</a></li>\n                <li class=\"active\"><a href=\"about.html\">About Us</a></li>\n                <li><a href=\"courses.html\">Courses</a></li>\n                <li><a href=\"portfolio.html\">Portfolio</a></li>\n                <li><a href=\"pricing.html\">Pricing</a></li>\n                <li><a href=\"contact.html\">Contact</a></li>\n              </ul>\n            </div>\n          </div>\n        </div>\n      </header>\n      <!-- end header -->\n      <section id=\"inner-headline\">\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-12\">\n              <h2 class=\"pageTitle\">About Us</h2>\n            </div>\n          </div>\n        </div>\n      </section>\n      <section id=\"content\">\n        <div class=\"container\">\n          <div class=\"about\">\n            <div class=\"row\">\n              <div class=\"col-md-12\">\n                <div class=\"about-logo\">\n                  <h3>We are awesome <span class=\"color\">TEAM</span></h3>\n                  <p>\n                    Sed ut perspiciaatis unde omnis iste natus error sit\n                    voluptatem accusantium doloremque laudantium, totam rem\n                    aperiam, eaque ipsa quae ab illo inventore veritatis et\n                    quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                    enim ipsam voluptatem quia voluptas\n                  </p>\n                  <p>\n                    Sed ut perspiciaatis unde omnis iste natus error sit\n                    voluptatem accusantium doloremque laudantium, totam rem\n                    aperiam, eaque ipsa quae ab illo inventore veritatis et\n                    quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                    enim ipsam voluptatem quia voluptas\n                  </p>\n                </div>\n                <a href=\"#\" class=\"btn btn-color\">Read more</a>\n              </div>\n            </div>\n            <br />\n\n            <div class=\"row\">\n              <div class=\"col-md-6\">\n                <img src=\"img/section-image-1.png\" alt=\"\" />\n                <div class=\"space\"></div>\n              </div>\n              <div class=\"col-md-6\">\n                <p>\n                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur\n                  adipisicing elit. Atque sed, quidem quis praesentium, ut unde\n                  fuga error commodi architecto, laudantium culpa tenetur at id,\n                  beatae pet.\n                </p>\n                <p>\n                  Lorem ipsum dolor sit amet, consectetur adipisicing elit.\n                  adipisicing sit amet, consectetur adipisicing elit. Atque sed,\n                  quidem quis praesentium,m deserunt.\n                </p>\n                <ul class=\"list-unstyled\">\n                  <li>\n                    <i class=\"fa fa-arrow-circle-right pr-10 colored\"></i> Lorem\n                    ipsum enimdolor sit amet\n                  </li>\n                  <li>\n                    <i class=\"fa fa-arrow-circle-right pr-10 colored\"></i>\n                    Explicabo deleniti neque aliquid\n                  </li>\n                  <li>\n                    <i class=\"fa fa-arrow-circle-right pr-10 colored\"></i>\n                    Consectetur adipisicing elit\n                  </li>\n                  <li>\n                    <i class=\"fa fa-arrow-circle-right pr-10 colored\"></i> Lorem\n                    ipsum dolor sit amet\n                  </li>\n                  <li>\n                    <i class=\"fa fa-arrow-circle-right pr-10 colored\"></i> Quo\n                    issimos molest quibusdam temporibus\n                  </li>\n                </ul>\n              </div>\n            </div>\n            <br />\n            <hr />\n            <br />\n            <div class=\"row\">\n              <div class=\"col-md-4\">\n                <!-- Heading and para -->\n                <div class=\"block-heading-two\">\n                  <h3><span>Why Choose Us?</span></h3>\n                </div>\n                <p>\n                  Sed ut perspiciaatis unde omnis iste natus error sit\n                  voluptatem accusantium doloremque laudantium, totam rem\n                  aperiam, eaque ipsa quae ab illo inventore veritatis et quasi\n                  architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam\n                  voluptatem quia voluptas sit aspernatur. <br /><br />Sed ut\n                  perspiciaatis iste natus error sit voluptatem probably haven't\n                  heard of them accusamus.\n                </p>\n              </div>\n              <div class=\"col-md-4\">\n                <div class=\"block-heading-two\">\n                  <h3><span>Our Solution</span></h3>\n                </div>\n                <!-- Accordion starts -->\n                <div class=\"panel-group\" id=\"accordion-alt3\">\n                  <!-- Panel. Use \"panel-XXX\" class for different colors. Replace \"XXX\" with color. -->\n                  <div class=\"panel\">\n                    <!-- Panel heading -->\n                    <div class=\"panel-heading\">\n                      <h4 class=\"panel-title\">\n                        <a\n                          data-toggle=\"collapse\"\n                          data-parent=\"#accordion-alt3\"\n                          href=\"#collapseOne-alt3\"\n                        >\n                          <i class=\"fa fa-angle-right\"></i> Accordion Heading\n                          Text Item # 1\n                        </a>\n                      </h4>\n                    </div>\n                    <div id=\"collapseOne-alt3\" class=\"panel-collapse collapse\">\n                      <!-- Panel body -->\n                      <div class=\"panel-body\">\n                        Sed ut perspiciaatis unde omnis iste natus error sit\n                        voluptatem accusantium doloremque laudantium, totam rem\n                        aperiam, eaque ipsa quae ab illo inventore veritatis et\n                        quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                        enim ipsam voluptatem quia voluptas\n                      </div>\n                    </div>\n                  </div>\n                  <div class=\"panel\">\n                    <div class=\"panel-heading\">\n                      <h4 class=\"panel-title\">\n                        <a\n                          data-toggle=\"collapse\"\n                          data-parent=\"#accordion-alt3\"\n                          href=\"#collapseTwo-alt3\"\n                        >\n                          <i class=\"fa fa-angle-right\"></i> Accordion Heading\n                          Text Item # 2\n                        </a>\n                      </h4>\n                    </div>\n                    <div id=\"collapseTwo-alt3\" class=\"panel-collapse collapse\">\n                      <div class=\"panel-body\">\n                        Sed ut perspiciaatis unde omnis iste natus error sit\n                        voluptatem accusantium doloremque laudantium, totam rem\n                        aperiam, eaque ipsa quae ab illo inventore veritatis et\n                        quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                        enim ipsam voluptatem quia voluptas\n                      </div>\n                    </div>\n                  </div>\n                  <div class=\"panel\">\n                    <div class=\"panel-heading\">\n                      <h4 class=\"panel-title\">\n                        <a\n                          data-toggle=\"collapse\"\n                          data-parent=\"#accordion-alt3\"\n                          href=\"#collapseThree-alt3\"\n                        >\n                          <i class=\"fa fa-angle-right\"></i> Accordion Heading\n                          Text Item # 3\n                        </a>\n                      </h4>\n                    </div>\n                    <div\n                      id=\"collapseThree-alt3\"\n                      class=\"panel-collapse collapse\"\n                    >\n                      <div class=\"panel-body\">\n                        Sed ut perspiciaatis unde omnis iste natus error sit\n                        voluptatem accusantium doloremque laudantium, totam rem\n                        aperiam, eaque ipsa quae ab illo inventore veritatis et\n                        quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                        enim ipsam voluptatem quia voluptas\n                      </div>\n                    </div>\n                  </div>\n                  <div class=\"panel\">\n                    <div class=\"panel-heading\">\n                      <h4 class=\"panel-title\">\n                        <a\n                          data-toggle=\"collapse\"\n                          data-parent=\"#accordion-alt3\"\n                          href=\"#collapseFour-alt3\"\n                        >\n                          <i class=\"fa fa-angle-right\"></i> Accordion Heading\n                          Text Item # 4\n                        </a>\n                      </h4>\n                    </div>\n                    <div id=\"collapseFour-alt3\" class=\"panel-collapse collapse\">\n                      <div class=\"panel-body\">\n                        Sed ut perspiciaatis unde omnis iste natus error sit\n                        voluptatem accusantium doloremque laudantium, totam rem\n                        aperiam, eaque ipsa quae ab illo inventore veritatis et\n                        quasi architecto beatae vitae dicta sunt explicabo. Nemo\n                        enim ipsam voluptatem quia voluptas\n                      </div>\n                    </div>\n                  </div>\n                </div>\n                <!-- Accordion ends -->\n              </div>\n\n              <div class=\"col-md-4\">\n                <div class=\"block-heading-two\">\n                  <h3><span>Our Expertise</span></h3>\n                </div>\n                <h6>Web Development</h6>\n                <div class=\"progress pb-sm\">\n                  <!-- White color (progress-bar-white) -->\n                  <div\n                    class=\"progress-bar progress-bar-red\"\n                    role=\"progressbar\"\n                    aria-valuenow=\"40\"\n                    aria-valuemin=\"0\"\n                    aria-valuemax=\"100\"\n                    style=\"width: 40%\"\n                  >\n                    <span class=\"sr-only\">40% Complete (success)</span>\n                  </div>\n                </div>\n                <h6>Designing</h6>\n                <div class=\"progress pb-sm\">\n                  <div\n                    class=\"progress-bar progress-bar-green\"\n                    role=\"progressbar\"\n                    aria-valuenow=\"60\"\n                    aria-valuemin=\"0\"\n                    aria-valuemax=\"100\"\n                    style=\"width: 60%\"\n                  >\n                    <span class=\"sr-only\">40% Complete (success)</span>\n                  </div>\n                </div>\n                <h6>User Experience</h6>\n                <div class=\"progress pb-sm\">\n                  <div\n                    class=\"progress-bar progress-bar-lblue\"\n                    role=\"progressbar\"\n                    aria-valuenow=\"80\"\n                    aria-valuemin=\"0\"\n                    aria-valuemax=\"100\"\n                    style=\"width: 80%\"\n                  >\n                    <span class=\"sr-only\">40% Complete (success)</span>\n                  </div>\n                </div>\n                <h6>Development</h6>\n                <div class=\"progress pb-sm\">\n                  <div\n                    class=\"progress-bar progress-bar-yellow\"\n                    role=\"progressbar\"\n                    aria-valuenow=\"30\"\n                    aria-valuemin=\"0\"\n                    aria-valuemax=\"100\"\n                    style=\"width: 30%\"\n                  >\n                    <span class=\"sr-only\">40% Complete (success)</span>\n                  </div>\n                </div>\n              </div>\n            </div>\n\n            <hr />\n            <br />\n            <!-- Our Team starts -->\n\n            <!-- Heading -->\n            <div class=\"block-heading-six\">\n              <h3 class=\"bg-color\">Our Team</h3>\n            </div>\n            <br />\n\n            <!-- Our team starts -->\n\n            <div class=\"team-six\">\n              <div class=\"row\">\n                <div class=\"col-md-3 col-sm-6\">\n                  <!-- Team Member -->\n                  <div class=\"team-member\">\n                    <!-- Image -->\n                    <img class=\"img-responsive\" src=\"img/team1.jpg\" alt=\"\" />\n                    <!-- Name -->\n                    <h4>Johne Doe</h4>\n                    <span class=\"deg\">Creative</span>\n                  </div>\n                </div>\n                <div class=\"col-md-3 col-sm-6\">\n                  <!-- Team Member -->\n                  <div class=\"team-member\">\n                    <!-- Image -->\n                    <img class=\"img-responsive\" src=\"img/team2.jpg\" alt=\"\" />\n                    <!-- Name -->\n                    <h4>Jennifer</h4>\n                    <span class=\"deg\">Programmer</span>\n                  </div>\n                </div>\n                <div class=\"col-md-3 col-sm-6\">\n                  <!-- Team Member -->\n                  <div class=\"team-member\">\n                    <!-- Image -->\n                    <img class=\"img-responsive\" src=\"img/team3.jpg\" alt=\"\" />\n                    <!-- Name -->\n                    <h4>Christean</h4>\n                    <span class=\"deg\">CEO</span>\n                  </div>\n                </div>\n                <div class=\"col-md-3 col-sm-6\">\n                  <!-- Team Member -->\n                  <div class=\"team-member\">\n                    <!-- Image -->\n                    <img class=\"img-responsive\" src=\"img/team4.jpg\" alt=\"\" />\n                    <!-- Name -->\n                    <h4>Kerinele rase</h4>\n                    <span class=\"deg\">Manager</span>\n                  </div>\n                </div>\n              </div>\n            </div>\n\n            <!-- Our team ends -->\n          </div>\n        </div>\n      </section>\n      <footer>\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Our Contact</h5>\n                <address>\n                  <strong>Abovecompany Inc</strong><br />\n                  JC Main Road, Near Silnile tower<br />\n                  Pin-21542 NewYork US.\n                </address>\n                <p>\n                  <i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br />\n                  <i class=\"icon-envelope-alt\"></i> email@domainname.com\n                </p>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Quick Links</h5>\n                <ul class=\"link-list\">\n                  <li><a href=\"#\">Latest Events</a></li>\n                  <li><a href=\"#\">Terms and conditions</a></li>\n                  <li><a href=\"#\">Privacy policy</a></li>\n                  <li><a href=\"#\">Career</a></li>\n                  <li><a href=\"#\">Contact us</a></li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Latest posts</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Recent News</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n        <div id=\"sub-footer\">\n          <div class=\"container\">\n            <div class=\"row\">\n              <div class=\"col-lg-6\">\n                <div class=\"copyright\">\n                  <p>\n                    <span\n                      >&copy; Above Site All right reserved. Template By </span\n                    ><a href=\"http://webthemez.com\" target=\"_blank\"\n                      >WebThemez</a\n                    >\n                  </p>\n                </div>\n              </div>\n              <div class=\"col-lg-6\">\n                <ul class=\"social-network\">\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Facebook\"\n                      ><i class=\"fa fa-facebook\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Twitter\"\n                      ><i class=\"fa fa-twitter\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Linkedin\"\n                      ><i class=\"fa fa-linkedin\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Pinterest\"\n                      ><i class=\"fa fa-pinterest\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Google plus\"\n                      ><i class=\"fa fa-google-plus\"></i\n                    ></a>\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n      </footer>\n    </div>\n    <a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n    <!-- javascript\n    ================================================== -->\n    <!-- Placed at the end of the document so the pages load faster -->\n    <script src=\"js/jquery.js\"></script>\n    <script src=\"js/jquery.easing.1.3.js\"></script>\n    <script src=\"js/bootstrap.min.js\"></script>\n    <script src=\"js/jquery.fancybox.pack.js\"></script>\n    <script src=\"js/jquery.fancybox-media.js\"></script>\n    <script src=\"js/portfolio/jquery.quicksand.js\"></script>\n    <script src=\"js/portfolio/setting.js\"></script>\n    <script src=\"js/jquery.flexslider.js\"></script>\n    <script src=\"js/animate.js\"></script>\n    <script src=\"js/custom.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/contact.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta name=\"description\" content=\"\" />\n    <meta name=\"author\" content=\"http://webthemez.com\" />\n    <!-- css -->\n    <link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n    <link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\" />\n    <link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n    <link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n    <link href=\"css/style.css\" rel=\"stylesheet\" />\n\n    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n    <!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n  </head>\n  <body>\n    <div id=\"wrapper\">\n      <!-- start header -->\n      <header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n          <div class=\"container\">\n            <div class=\"navbar-header\">\n              <button\n                type=\"button\"\n                class=\"navbar-toggle\"\n                data-toggle=\"collapse\"\n                data-target=\".navbar-collapse\"\n              >\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n              </button>\n              <a class=\"navbar-brand\" href=\"index.html\"\n                ><img src=\"img/logo.png\" alt=\"logo\"\n              /></a>\n            </div>\n            <div class=\"navbar-collapse collapse\">\n              <ul class=\"nav navbar-nav\">\n                <li><a href=\"index.html\">Home</a></li>\n                <li><a href=\"about.html\">About Us</a></li>\n                <li><a href=\"courses.html\">Courses</a></li>\n                <li><a href=\"portfolio.html\">Portfolio</a></li>\n                <li><a href=\"pricing.html\">Pricing</a></li>\n                <li class=\"active\"><a href=\"contact.html\">Contact</a></li>\n              </ul>\n            </div>\n          </div>\n        </div>\n      </header>\n      <!-- end header -->\n      <section id=\"inner-headline\">\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-12\">\n              <h2 class=\"pageTitle\">Contact Us</h2>\n            </div>\n          </div>\n        </div>\n      </section>\n      <section id=\"content\">\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-md-12\">\n              <script\n                type=\"text/javascript\"\n                src=\"http://maps.google.com/maps/api/js?sensor=false\"\n              ></script>\n              <div style=\"overflow: hidden; height: 300px; width: 100%\">\n                <div id=\"gmap_canvas\" style=\"height: 300px; width: 100%\"></div>\n                <style>\n                  #gmap_canvas img {\n                    max-width: none !important;\n                    background: none !important;\n                  }</style\n                ><a\n                  class=\"google-map-code\"\n                  href=\"http://www.trivoo.net\"\n                  id=\"get-map-data\"\n                  >trivoo</a\n                >\n              </div>\n              <script type=\"text/javascript\">\n                function init_map() {\n                  var myOptions = {\n                    zoom: 14,\n                    center: new google.maps.LatLng(\n                      40.805478,\n                      -73.96522499999998,\n                    ),\n                    mapTypeId: google.maps.MapTypeId.ROADMAP,\n                  };\n                  map = new google.maps.Map(\n                    document.getElementById(\"gmap_canvas\"),\n                    myOptions,\n                  );\n                  marker = new google.maps.Marker({\n                    map: map,\n                    position: new google.maps.LatLng(\n                      40.805478,\n                      -73.96522499999998,\n                    ),\n                  });\n                  infowindow = new google.maps.InfoWindow({\n                    content:\n                      \"<b>The Breslin</b><br/>2880 Broadway<br/> New York\",\n                  });\n                  google.maps.event.addListener(marker, \"click\", function () {\n                    infowindow.open(map, marker);\n                  });\n                  infowindow.open(map, marker);\n                }\n                google.maps.event.addDomListener(window, \"load\", init_map);\n              </script>\n            </div>\n          </div>\n\n          <div class=\"row\">\n            <div class=\"col-md-6\">\n              <br />\n              <div class=\"alert alert-success hidden\" id=\"contactSuccess\">\n                <strong>Success!</strong> Your message has been sent to us.\n              </div>\n              <div class=\"alert alert-error hidden\" id=\"contactError\">\n                <strong>Error!</strong> There was an error sending your message.\n              </div>\n              <div class=\"contact-form\">\n                <form id=\"contact-form\" role=\"form\" novalidate=\"novalidate\">\n                  <div class=\"form-group has-feedback\">\n                    <label for=\"name\">Name*</label>\n                    <input\n                      type=\"text\"\n                      class=\"form-control\"\n                      id=\"name\"\n                      name=\"name\"\n                      placeholder=\"\"\n                    />\n                    <i class=\"fa fa-user form-control-feedback\"></i>\n                  </div>\n                  <div class=\"form-group has-feedback\">\n                    <label for=\"email\">Email*</label>\n                    <input\n                      type=\"email\"\n                      class=\"form-control\"\n                      id=\"email\"\n                      name=\"email\"\n                      placeholder=\"\"\n                    />\n                    <i class=\"fa fa-envelope form-control-feedback\"></i>\n                  </div>\n                  <div class=\"form-group has-feedback\">\n                    <label for=\"subject\">Subject*</label>\n                    <input\n                      type=\"text\"\n                      class=\"form-control\"\n                      id=\"subject\"\n                      name=\"subject\"\n                      placeholder=\"\"\n                    />\n                    <i class=\"fa fa-navicon form-control-feedback\"></i>\n                  </div>\n                  <div class=\"form-group has-feedback\">\n                    <label for=\"message\">Message*</label>\n                    <textarea\n                      class=\"form-control\"\n                      rows=\"6\"\n                      id=\"message\"\n                      name=\"message\"\n                      placeholder=\"\"\n                    ></textarea>\n                    <i class=\"fa fa-pencil form-control-feedback\"></i>\n                  </div>\n                  <input type=\"submit\" value=\"Submit\" class=\"btn btn-default\" />\n                </form>\n              </div>\n            </div>\n            <div class=\"col-md-6\">\n              <div class=\"span4\">\n                <div class=\"title-box clearfix\">\n                  <h3 class=\"title-box_primary\">Contact info</h3>\n                </div>\n                <h5>\n                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur\n                  adipisicing elit. Atque sed, quidem quis praesentium.\n                </h5>\n                <p>\n                  Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur\n                  adipisicing elit. Lorem ipsum dolor sit amet, cadipisicing sit\n                  amet, consectetur adipisicing elit. Atque sed, quidem quis\n                  praesentium Atque sed, quidem quis praesentium, ut unde fuga\n                  error commodi architecto, laudantium culpa tenetur at id,\n                  beatae pet.<br />\n                </p>\n                <address>\n                  <strong\n                    >The Company Name.<br />\n                    12345 St John Point,<br />\n                    Brisbean, ABC 12 St 11.</strong\n                  ><br />\n                  Telephone: +1 234 567 890<br />\n                  FAX: +1 234 567 890<br />\n                  E-mail:\n                  <a href=\"mailto:info@sitename.org\">mail@sitename.org</a><br />\n                </address>\n              </div>\n            </div>\n          </div>\n        </div>\n      </section>\n      <footer>\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Our Contact</h5>\n                <address>\n                  <strong>Abovecompany Inc</strong><br />\n                  JC Main Road, Near Silnile tower<br />\n                  Pin-21542 NewYork US.\n                </address>\n                <p>\n                  <i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br />\n                  <i class=\"icon-envelope-alt\"></i> email@domainname.com\n                </p>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Quick Links</h5>\n                <ul class=\"link-list\">\n                  <li><a href=\"#\">Latest Events</a></li>\n                  <li><a href=\"#\">Terms and conditions</a></li>\n                  <li><a href=\"#\">Privacy policy</a></li>\n                  <li><a href=\"#\">Career</a></li>\n                  <li><a href=\"#\">Contact us</a></li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Latest posts</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Recent News</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n        <div id=\"sub-footer\">\n          <div class=\"container\">\n            <div class=\"row\">\n              <div class=\"col-lg-6\">\n                <div class=\"copyright\">\n                  <p>\n                    <span\n                      >&copy; Above Site All right reserved. Template By </span\n                    ><a href=\"http://webthemez.com\" target=\"_blank\"\n                      >WebThemez</a\n                    >\n                  </p>\n                </div>\n              </div>\n              <div class=\"col-lg-6\">\n                <ul class=\"social-network\">\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Facebook\"\n                      ><i class=\"fa fa-facebook\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Twitter\"\n                      ><i class=\"fa fa-twitter\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Linkedin\"\n                      ><i class=\"fa fa-linkedin\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Pinterest\"\n                      ><i class=\"fa fa-pinterest\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Google plus\"\n                      ><i class=\"fa fa-google-plus\"></i\n                    ></a>\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n      </footer>\n    </div>\n    <a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n    <!-- javascript\n    ================================================== -->\n    <!-- Placed at the end of the document so the pages load faster -->\n    <script src=\"js/jquery.js\"></script>\n    <script src=\"js/jquery.easing.1.3.js\"></script>\n    <script src=\"js/bootstrap.min.js\"></script>\n    <script src=\"js/jquery.fancybox.pack.js\"></script>\n    <script src=\"js/jquery.fancybox-media.js\"></script>\n    <script src=\"js/portfolio/jquery.quicksand.js\"></script>\n    <script src=\"js/portfolio/setting.js\"></script>\n    <script src=\"js/jquery.flexslider.js\"></script>\n    <script src=\"js/animate.js\"></script>\n    <script src=\"js/custom.js\"></script>\n    <script src=\"js/validate.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/courses.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n<meta name=\"description\" content=\"\" />\n<meta name=\"author\" content=\"http://webthemez.com\" />\n \n<link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n<link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\">\n<link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n<link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n<link href=\"css/style.css\" rel=\"stylesheet\" />\n \n<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n<!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n\n</head>\n<body>\n<div id=\"wrapper\">\n\n\t<!-- start header -->\n\t\t<header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n            <div class=\"container\">\n                <div class=\"navbar-header\">\n                    <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                    </button>\n                    <a class=\"navbar-brand\" href=\"index.html\"><img src=\"img/logo.png\" alt=\"logo\"/></a>\n                </div>\n                <div class=\"navbar-collapse collapse \">\n                    <ul class=\"nav navbar-nav\">\n                        <li><a href=\"index.html\">Home</a></li> \n\t\t\t\t\t\t<li><a href=\"about.html\">About Us</a></li>\n\t\t\t\t\t\t<li class=\"active\"><a href=\"courses.html\">Courses</a></li>\n                        <li><a href=\"portfolio.html\">Portfolio</a></li>\n                        <li><a href=\"pricing.html\">Pricing</a></li>\n                        <li><a href=\"contact.html\">Contact</a></li>\n                    </ul>\n                </div>\n            </div>\n        </div>\n\t</header><!-- end header -->\n\t<section id=\"inner-headline\">\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-12\">\n\t\t\t\t<h2 class=\"pageTitle\">Courses</h2>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</section>\n\t \n\t<section id=\"content\">\n\t\t<div class=\"container\">\t\t \n\t\t\t\t\n<section class=\"course\"> \n\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-12\">\n\t\t\t\t<div class=\"\"><h3>Courses We Offer</h3><span class=\"clear spacer_responsive_hide_mobile \" style=\"height:13px;display:block;\"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>\n\t\t\t</div>\n\t\t</div>\n<div class=\"row\">\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n\t\t\t<div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n        </div>\n\t\t<div class=\"row\">\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n\t\t\t<div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n        </div> \n</section>\t<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p> </br>\n        <div class=\"row service-v1 margin-bottom-40\">\n            <div class=\"col-md-4 md-margin-bottom-40\">\n               <img class=\"img-responsive\" src=\"img/service1.jpg\" alt=\"\">   \n                <h3>Web Development</h3>\n                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        \n            </div>\n            <div class=\"col-md-4\">\n                <img class=\"img-responsive\" src=\"img/service2.jpg\" alt=\"\">            \n                <h3>Mobile Development</h3>\n                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        \n            </div>\n            <div class=\"col-md-4 md-margin-bottom-40\">\n              <img class=\"img-responsive\" src=\"img/service3.jpg\" alt=\"\">  \n                <h3>Responsive Design</h3>\n                <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus</p>        \n            </div>\n        </div> \n \n\n         \n\n        \n    </div>\n    </section>\n\t<footer>\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Our Contact</h5>\n\t\t\t\t\t<address>\n\t\t\t\t\t<strong>Abovecompany Inc</strong><br>\n\t\t\t\t\tJC Main Road, Near Silnile tower<br>\n\t\t\t\t\t Pin-21542 NewYork US.</address>\n\t\t\t\t\t<p>\n\t\t\t\t\t\t<i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br>\n\t\t\t\t\t\t<i class=\"icon-envelope-alt\"></i> email@domainname.com\n\t\t\t\t\t</p>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Quick Links</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Latest Events</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Terms and conditions</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Privacy policy</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Career</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Contact us</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Latest posts</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Recent News</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t<div id=\"sub-footer\">\n\t\t<div class=\"container\">\n\t\t\t<div class=\"row\">\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<div class=\"copyright\">\n\t\t\t\t\t\t<p>\n\t\t\t\t\t\t\t<span>&copy; Above Site All right reserved. Template By </span><a href=\"http://webthemez.com\" target=\"_blank\">WebThemez</a>\n\t\t\t\t\t\t</p>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<ul class=\"social-network\">\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Facebook\"><i class=\"fa fa-facebook\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Twitter\"><i class=\"fa fa-twitter\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Linkedin\"><i class=\"fa fa-linkedin\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Pinterest\"><i class=\"fa fa-pinterest\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Google plus\"><i class=\"fa fa-google-plus\"></i></a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</footer>\n</div>\n<a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n<!-- javascript\n    ================================================== -->\n<!-- Placed at the end of the document so the pages load faster -->\n<script src=\"js/jquery.js\"></script>\n<script src=\"js/jquery.easing.1.3.js\"></script>\n<script src=\"js/bootstrap.min.js\"></script>\n<script src=\"js/jquery.fancybox.pack.js\"></script>\n<script src=\"js/jquery.fancybox-media.js\"></script> \n<script src=\"js/portfolio/jquery.quicksand.js\"></script>\n<script src=\"js/portfolio/setting.js\"></script>\n<script src=\"js/jquery.flexslider.js\"></script>\n<script src=\"js/animate.js\"></script>\n<script src=\"js/custom.js\"></script>\n</body>\n</html>"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/css/animate.css",
    "content": "@charset \"UTF-8\";\n/*\nAnimate.css - http://daneden.me/animate\nLicensed under the MIT license\n\nCopyright (c) 2013 Daniel Eden\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n*/\nbody {\n  /* Addresses a small issue in webkit: http://bit.ly/NEdoDq */\n  -webkit-backface-visibility: hidden;\n}\n.animated {\n  -webkit-animation-duration: 1s;\n  -moz-animation-duration: 1s;\n  -o-animation-duration: 1s;\n  animation-duration: 1s;\n  -webkit-animation-fill-mode: both;\n  -moz-animation-fill-mode: both;\n  -o-animation-fill-mode: both;\n  animation-fill-mode: both;\n}\n\n.animated.hinge {\n  -webkit-animation-duration: 2s;\n  -moz-animation-duration: 2s;\n  -o-animation-duration: 2s;\n  animation-duration: 2s;\n}\n\n@-webkit-keyframes flash {\n  0%,\n  50%,\n  100% {\n    opacity: 1;\n  }\n  25%,\n  75% {\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes flash {\n  0%,\n  50%,\n  100% {\n    opacity: 1;\n  }\n  25%,\n  75% {\n    opacity: 0;\n  }\n}\n\n@-o-keyframes flash {\n  0%,\n  50%,\n  100% {\n    opacity: 1;\n  }\n  25%,\n  75% {\n    opacity: 0;\n  }\n}\n\n@keyframes flash {\n  0%,\n  50%,\n  100% {\n    opacity: 1;\n  }\n  25%,\n  75% {\n    opacity: 0;\n  }\n}\n\n.flash {\n  -webkit-animation-name: flash;\n  -moz-animation-name: flash;\n  -o-animation-name: flash;\n  animation-name: flash;\n}\n@-webkit-keyframes shake {\n  0%,\n  100% {\n    -webkit-transform: translateX(0);\n  }\n  10%,\n  30%,\n  50%,\n  70%,\n  90% {\n    -webkit-transform: translateX(-10px);\n  }\n  20%,\n  40%,\n  60%,\n  80% {\n    -webkit-transform: translateX(10px);\n  }\n}\n\n@-moz-keyframes shake {\n  0%,\n  100% {\n    -moz-transform: translateX(0);\n  }\n  10%,\n  30%,\n  50%,\n  70%,\n  90% {\n    -moz-transform: translateX(-10px);\n  }\n  20%,\n  40%,\n  60%,\n  80% {\n    -moz-transform: translateX(10px);\n  }\n}\n\n@-o-keyframes shake {\n  0%,\n  100% {\n    -o-transform: translateX(0);\n  }\n  10%,\n  30%,\n  50%,\n  70%,\n  90% {\n    -o-transform: translateX(-10px);\n  }\n  20%,\n  40%,\n  60%,\n  80% {\n    -o-transform: translateX(10px);\n  }\n}\n\n@keyframes shake {\n  0%,\n  100% {\n    transform: translateX(0);\n  }\n  10%,\n  30%,\n  50%,\n  70%,\n  90% {\n    transform: translateX(-10px);\n  }\n  20%,\n  40%,\n  60%,\n  80% {\n    transform: translateX(10px);\n  }\n}\n\n.shake {\n  -webkit-animation-name: shake;\n  -moz-animation-name: shake;\n  -o-animation-name: shake;\n  animation-name: shake;\n}\n@-webkit-keyframes bounce {\n  0%,\n  20%,\n  50%,\n  80%,\n  100% {\n    -webkit-transform: translateY(0);\n  }\n  40% {\n    -webkit-transform: translateY(-30px);\n  }\n  60% {\n    -webkit-transform: translateY(-15px);\n  }\n}\n\n@-moz-keyframes bounce {\n  0%,\n  20%,\n  50%,\n  80%,\n  100% {\n    -moz-transform: translateY(0);\n  }\n  40% {\n    -moz-transform: translateY(-30px);\n  }\n  60% {\n    -moz-transform: translateY(-15px);\n  }\n}\n\n@-o-keyframes bounce {\n  0%,\n  20%,\n  50%,\n  80%,\n  100% {\n    -o-transform: translateY(0);\n  }\n  40% {\n    -o-transform: translateY(-30px);\n  }\n  60% {\n    -o-transform: translateY(-15px);\n  }\n}\n@keyframes bounce {\n  0%,\n  20%,\n  50%,\n  80%,\n  100% {\n    transform: translateY(0);\n  }\n  40% {\n    transform: translateY(-30px);\n  }\n  60% {\n    transform: translateY(-15px);\n  }\n}\n\n.bounce {\n  -webkit-animation-name: bounce;\n  -moz-animation-name: bounce;\n  -o-animation-name: bounce;\n  animation-name: bounce;\n}\n@-webkit-keyframes tada {\n  0% {\n    -webkit-transform: scale(1);\n  }\n  10%,\n  20% {\n    -webkit-transform: scale(0.9) rotate(-3deg);\n  }\n  30%,\n  50%,\n  70%,\n  90% {\n    -webkit-transform: scale(1.1) rotate(3deg);\n  }\n  40%,\n  60%,\n  80% {\n    -webkit-transform: scale(1.1) rotate(-3deg);\n  }\n  100% {\n    -webkit-transform: scale(1) rotate(0);\n  }\n}\n\n@-moz-keyframes tada {\n  0% {\n    -moz-transform: scale(1);\n  }\n  10%,\n  20% {\n    -moz-transform: scale(0.9) rotate(-3deg);\n  }\n  30%,\n  50%,\n  70%,\n  90% {\n    -moz-transform: scale(1.1) rotate(3deg);\n  }\n  40%,\n  60%,\n  80% {\n    -moz-transform: scale(1.1) rotate(-3deg);\n  }\n  100% {\n    -moz-transform: scale(1) rotate(0);\n  }\n}\n\n@-o-keyframes tada {\n  0% {\n    -o-transform: scale(1);\n  }\n  10%,\n  20% {\n    -o-transform: scale(0.9) rotate(-3deg);\n  }\n  30%,\n  50%,\n  70%,\n  90% {\n    -o-transform: scale(1.1) rotate(3deg);\n  }\n  40%,\n  60%,\n  80% {\n    -o-transform: scale(1.1) rotate(-3deg);\n  }\n  100% {\n    -o-transform: scale(1) rotate(0);\n  }\n}\n\n@keyframes tada {\n  0% {\n    transform: scale(1);\n  }\n  10%,\n  20% {\n    transform: scale(0.9) rotate(-3deg);\n  }\n  30%,\n  50%,\n  70%,\n  90% {\n    transform: scale(1.1) rotate(3deg);\n  }\n  40%,\n  60%,\n  80% {\n    transform: scale(1.1) rotate(-3deg);\n  }\n  100% {\n    transform: scale(1) rotate(0);\n  }\n}\n\n.tada {\n  -webkit-animation-name: tada;\n  -moz-animation-name: tada;\n  -o-animation-name: tada;\n  animation-name: tada;\n}\n@-webkit-keyframes swing {\n  20%,\n  40%,\n  60%,\n  80%,\n  100% {\n    -webkit-transform-origin: top center;\n  }\n  20% {\n    -webkit-transform: rotate(15deg);\n  }\n  40% {\n    -webkit-transform: rotate(-10deg);\n  }\n  60% {\n    -webkit-transform: rotate(5deg);\n  }\n  80% {\n    -webkit-transform: rotate(-5deg);\n  }\n  100% {\n    -webkit-transform: rotate(0deg);\n  }\n}\n\n@-moz-keyframes swing {\n  20% {\n    -moz-transform: rotate(15deg);\n  }\n  40% {\n    -moz-transform: rotate(-10deg);\n  }\n  60% {\n    -moz-transform: rotate(5deg);\n  }\n  80% {\n    -moz-transform: rotate(-5deg);\n  }\n  100% {\n    -moz-transform: rotate(0deg);\n  }\n}\n\n@-o-keyframes swing {\n  20% {\n    -o-transform: rotate(15deg);\n  }\n  40% {\n    -o-transform: rotate(-10deg);\n  }\n  60% {\n    -o-transform: rotate(5deg);\n  }\n  80% {\n    -o-transform: rotate(-5deg);\n  }\n  100% {\n    -o-transform: rotate(0deg);\n  }\n}\n\n@keyframes swing {\n  20% {\n    transform: rotate(15deg);\n  }\n  40% {\n    transform: rotate(-10deg);\n  }\n  60% {\n    transform: rotate(5deg);\n  }\n  80% {\n    transform: rotate(-5deg);\n  }\n  100% {\n    transform: rotate(0deg);\n  }\n}\n\n.swing {\n  -webkit-transform-origin: top center;\n  -moz-transform-origin: top center;\n  -o-transform-origin: top center;\n  transform-origin: top center;\n  -webkit-animation-name: swing;\n  -moz-animation-name: swing;\n  -o-animation-name: swing;\n  animation-name: swing;\n}\n/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */\n\n@-webkit-keyframes wobble {\n  0% {\n    -webkit-transform: translateX(0%);\n  }\n  15% {\n    -webkit-transform: translateX(-25%) rotate(-5deg);\n  }\n  30% {\n    -webkit-transform: translateX(20%) rotate(3deg);\n  }\n  45% {\n    -webkit-transform: translateX(-15%) rotate(-3deg);\n  }\n  60% {\n    -webkit-transform: translateX(10%) rotate(2deg);\n  }\n  75% {\n    -webkit-transform: translateX(-5%) rotate(-1deg);\n  }\n  100% {\n    -webkit-transform: translateX(0%);\n  }\n}\n\n@-moz-keyframes wobble {\n  0% {\n    -moz-transform: translateX(0%);\n  }\n  15% {\n    -moz-transform: translateX(-25%) rotate(-5deg);\n  }\n  30% {\n    -moz-transform: translateX(20%) rotate(3deg);\n  }\n  45% {\n    -moz-transform: translateX(-15%) rotate(-3deg);\n  }\n  60% {\n    -moz-transform: translateX(10%) rotate(2deg);\n  }\n  75% {\n    -moz-transform: translateX(-5%) rotate(-1deg);\n  }\n  100% {\n    -moz-transform: translateX(0%);\n  }\n}\n\n@-o-keyframes wobble {\n  0% {\n    -o-transform: translateX(0%);\n  }\n  15% {\n    -o-transform: translateX(-25%) rotate(-5deg);\n  }\n  30% {\n    -o-transform: translateX(20%) rotate(3deg);\n  }\n  45% {\n    -o-transform: translateX(-15%) rotate(-3deg);\n  }\n  60% {\n    -o-transform: translateX(10%) rotate(2deg);\n  }\n  75% {\n    -o-transform: translateX(-5%) rotate(-1deg);\n  }\n  100% {\n    -o-transform: translateX(0%);\n  }\n}\n\n@keyframes wobble {\n  0% {\n    transform: translateX(0%);\n  }\n  15% {\n    transform: translateX(-25%) rotate(-5deg);\n  }\n  30% {\n    transform: translateX(20%) rotate(3deg);\n  }\n  45% {\n    transform: translateX(-15%) rotate(-3deg);\n  }\n  60% {\n    transform: translateX(10%) rotate(2deg);\n  }\n  75% {\n    transform: translateX(-5%) rotate(-1deg);\n  }\n  100% {\n    transform: translateX(0%);\n  }\n}\n\n.wobble {\n  -webkit-animation-name: wobble;\n  -moz-animation-name: wobble;\n  -o-animation-name: wobble;\n  animation-name: wobble;\n}\n/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */\n\n@-webkit-keyframes pulse {\n  0% {\n    -webkit-transform: scale(1);\n  }\n  50% {\n    -webkit-transform: scale(1.1);\n  }\n  100% {\n    -webkit-transform: scale(1);\n  }\n}\n@-moz-keyframes pulse {\n  0% {\n    -moz-transform: scale(1);\n  }\n  50% {\n    -moz-transform: scale(1.1);\n  }\n  100% {\n    -moz-transform: scale(1);\n  }\n}\n@-o-keyframes pulse {\n  0% {\n    -o-transform: scale(1);\n  }\n  50% {\n    -o-transform: scale(1.1);\n  }\n  100% {\n    -o-transform: scale(1);\n  }\n}\n@keyframes pulse {\n  0% {\n    transform: scale(1);\n  }\n  50% {\n    transform: scale(1.1);\n  }\n  100% {\n    transform: scale(1);\n  }\n}\n\n.pulse {\n  -webkit-animation-name: pulse;\n  -moz-animation-name: pulse;\n  -o-animation-name: pulse;\n  animation-name: pulse;\n}\n@-webkit-keyframes flip {\n  0% {\n    -webkit-transform: perspective(400px) rotateY(0);\n    -webkit-animation-timing-function: ease-out;\n  }\n  40% {\n    -webkit-transform: perspective(400px) translateZ(150px) rotateY(170deg);\n    -webkit-animation-timing-function: ease-out;\n  }\n  50% {\n    -webkit-transform: perspective(400px) translateZ(150px) rotateY(190deg)\n      scale(1);\n    -webkit-animation-timing-function: ease-in;\n  }\n  80% {\n    -webkit-transform: perspective(400px) rotateY(360deg) scale(0.95);\n    -webkit-animation-timing-function: ease-in;\n  }\n  100% {\n    -webkit-transform: perspective(400px) scale(1);\n    -webkit-animation-timing-function: ease-in;\n  }\n}\n@-moz-keyframes flip {\n  0% {\n    -moz-transform: perspective(400px) rotateY(0);\n    -moz-animation-timing-function: ease-out;\n  }\n  40% {\n    -moz-transform: perspective(400px) translateZ(150px) rotateY(170deg);\n    -moz-animation-timing-function: ease-out;\n  }\n  50% {\n    -moz-transform: perspective(400px) translateZ(150px) rotateY(190deg)\n      scale(1);\n    -moz-animation-timing-function: ease-in;\n  }\n  80% {\n    -moz-transform: perspective(400px) rotateY(360deg) scale(0.95);\n    -moz-animation-timing-function: ease-in;\n  }\n  100% {\n    -moz-transform: perspective(400px) scale(1);\n    -moz-animation-timing-function: ease-in;\n  }\n}\n@-o-keyframes flip {\n  0% {\n    -o-transform: perspective(400px) rotateY(0);\n    -o-animation-timing-function: ease-out;\n  }\n  40% {\n    -o-transform: perspective(400px) translateZ(150px) rotateY(170deg);\n    -o-animation-timing-function: ease-out;\n  }\n  50% {\n    -o-transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1);\n    -o-animation-timing-function: ease-in;\n  }\n  80% {\n    -o-transform: perspective(400px) rotateY(360deg) scale(0.95);\n    -o-animation-timing-function: ease-in;\n  }\n  100% {\n    -o-transform: perspective(400px) scale(1);\n    -o-animation-timing-function: ease-in;\n  }\n}\n@keyframes flip {\n  0% {\n    transform: perspective(400px) rotateY(0);\n    animation-timing-function: ease-out;\n  }\n  40% {\n    transform: perspective(400px) translateZ(150px) rotateY(170deg);\n    animation-timing-function: ease-out;\n  }\n  50% {\n    transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1);\n    animation-timing-function: ease-in;\n  }\n  80% {\n    transform: perspective(400px) rotateY(360deg) scale(0.95);\n    animation-timing-function: ease-in;\n  }\n  100% {\n    transform: perspective(400px) scale(1);\n    animation-timing-function: ease-in;\n  }\n}\n\n.flip {\n  -webkit-backface-visibility: visible !important;\n  -webkit-animation-name: flip;\n  -moz-backface-visibility: visible !important;\n  -moz-animation-name: flip;\n  -o-backface-visibility: visible !important;\n  -o-animation-name: flip;\n  backface-visibility: visible !important;\n  animation-name: flip;\n}\n@-webkit-keyframes flipInX {\n  0% {\n    -webkit-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -webkit-transform: perspective(400px) rotateX(-10deg);\n  }\n\n  70% {\n    -webkit-transform: perspective(400px) rotateX(10deg);\n  }\n\n  100% {\n    -webkit-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n}\n@-moz-keyframes flipInX {\n  0% {\n    -moz-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -moz-transform: perspective(400px) rotateX(-10deg);\n  }\n\n  70% {\n    -moz-transform: perspective(400px) rotateX(10deg);\n  }\n\n  100% {\n    -moz-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n}\n@-o-keyframes flipInX {\n  0% {\n    -o-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -o-transform: perspective(400px) rotateX(-10deg);\n  }\n\n  70% {\n    -o-transform: perspective(400px) rotateX(10deg);\n  }\n\n  100% {\n    -o-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n}\n@keyframes flipInX {\n  0% {\n    transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    transform: perspective(400px) rotateX(-10deg);\n  }\n\n  70% {\n    transform: perspective(400px) rotateX(10deg);\n  }\n\n  100% {\n    transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n}\n\n.flipInX {\n  -webkit-backface-visibility: visible !important;\n  -webkit-animation-name: flipInX;\n  -moz-backface-visibility: visible !important;\n  -moz-animation-name: flipInX;\n  -o-backface-visibility: visible !important;\n  -o-animation-name: flipInX;\n  backface-visibility: visible !important;\n  animation-name: flipInX;\n}\n@-webkit-keyframes flipOutX {\n  0% {\n    -webkit-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -webkit-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes flipOutX {\n  0% {\n    -moz-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -moz-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes flipOutX {\n  0% {\n    -o-transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -o-transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n}\n\n@keyframes flipOutX {\n  0% {\n    transform: perspective(400px) rotateX(0deg);\n    opacity: 1;\n  }\n  100% {\n    transform: perspective(400px) rotateX(90deg);\n    opacity: 0;\n  }\n}\n\n.flipOutX {\n  -webkit-animation-name: flipOutX;\n  -webkit-backface-visibility: visible !important;\n  -moz-animation-name: flipOutX;\n  -moz-backface-visibility: visible !important;\n  -o-animation-name: flipOutX;\n  -o-backface-visibility: visible !important;\n  animation-name: flipOutX;\n  backface-visibility: visible !important;\n}\n@-webkit-keyframes flipInY {\n  0% {\n    -webkit-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -webkit-transform: perspective(400px) rotateY(-10deg);\n  }\n\n  70% {\n    -webkit-transform: perspective(400px) rotateY(10deg);\n  }\n\n  100% {\n    -webkit-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n}\n@-moz-keyframes flipInY {\n  0% {\n    -moz-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -moz-transform: perspective(400px) rotateY(-10deg);\n  }\n\n  70% {\n    -moz-transform: perspective(400px) rotateY(10deg);\n  }\n\n  100% {\n    -moz-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n}\n@-o-keyframes flipInY {\n  0% {\n    -o-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    -o-transform: perspective(400px) rotateY(-10deg);\n  }\n\n  70% {\n    -o-transform: perspective(400px) rotateY(10deg);\n  }\n\n  100% {\n    -o-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n}\n@keyframes flipInY {\n  0% {\n    transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n\n  40% {\n    transform: perspective(400px) rotateY(-10deg);\n  }\n\n  70% {\n    transform: perspective(400px) rotateY(10deg);\n  }\n\n  100% {\n    transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n}\n\n.flipInY {\n  -webkit-backface-visibility: visible !important;\n  -webkit-animation-name: flipInY;\n  -moz-backface-visibility: visible !important;\n  -moz-animation-name: flipInY;\n  -o-backface-visibility: visible !important;\n  -o-animation-name: flipInY;\n  backface-visibility: visible !important;\n  animation-name: flipInY;\n}\n@-webkit-keyframes flipOutY {\n  0% {\n    -webkit-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n  100% {\n    -webkit-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n}\n@-moz-keyframes flipOutY {\n  0% {\n    -moz-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n  100% {\n    -moz-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n}\n@-o-keyframes flipOutY {\n  0% {\n    -o-transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n  100% {\n    -o-transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n}\n@keyframes flipOutY {\n  0% {\n    transform: perspective(400px) rotateY(0deg);\n    opacity: 1;\n  }\n  100% {\n    transform: perspective(400px) rotateY(90deg);\n    opacity: 0;\n  }\n}\n\n.flipOutY {\n  -webkit-backface-visibility: visible !important;\n  -webkit-animation-name: flipOutY;\n  -moz-backface-visibility: visible !important;\n  -moz-animation-name: flipOutY;\n  -o-backface-visibility: visible !important;\n  -o-animation-name: flipOutY;\n  backface-visibility: visible !important;\n  animation-name: flipOutY;\n}\n@-webkit-keyframes fadeIn {\n  0% {\n    opacity: 0;\n  }\n  100% {\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes fadeIn {\n  0% {\n    opacity: 0;\n  }\n  100% {\n    opacity: 1;\n  }\n}\n\n@-o-keyframes fadeIn {\n  0% {\n    opacity: 0;\n  }\n  100% {\n    opacity: 1;\n  }\n}\n\n@keyframes fadeIn {\n  0% {\n    opacity: 0;\n  }\n  100% {\n    opacity: 1;\n  }\n}\n\n.fadeIn {\n  -webkit-animation-name: fadeIn;\n  -moz-animation-name: fadeIn;\n  -o-animation-name: fadeIn;\n  animation-name: fadeIn;\n}\n@-webkit-keyframes fadeInUp {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n}\n\n@-moz-keyframes fadeInUp {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes fadeInUp {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes fadeInUp {\n  0% {\n    opacity: 0;\n    transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n}\n\n.fadeInUp {\n  -webkit-animation-name: fadeInUp;\n  -moz-animation-name: fadeInUp;\n  -o-animation-name: fadeInUp;\n  animation-name: fadeInUp;\n}\n@-webkit-keyframes fadeInDown {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n}\n\n@-moz-keyframes fadeInDown {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes fadeInDown {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes fadeInDown {\n  0% {\n    opacity: 0;\n    transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n}\n\n.fadeInDown {\n  -webkit-animation-name: fadeInDown;\n  -moz-animation-name: fadeInDown;\n  -o-animation-name: fadeInDown;\n  animation-name: fadeInDown;\n}\n@-webkit-keyframes fadeInLeft {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n}\n\n@-moz-keyframes fadeInLeft {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n}\n\n@-o-keyframes fadeInLeft {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n}\n\n@keyframes fadeInLeft {\n  0% {\n    opacity: 0;\n    transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n}\n\n.fadeInLeft {\n  -webkit-animation-name: fadeInLeft;\n  -moz-animation-name: fadeInLeft;\n  -o-animation-name: fadeInLeft;\n  animation-name: fadeInLeft;\n}\n@-webkit-keyframes fadeInRight {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n}\n\n@-moz-keyframes fadeInRight {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n}\n\n@-o-keyframes fadeInRight {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n}\n\n@keyframes fadeInRight {\n  0% {\n    opacity: 0;\n    transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n}\n\n.fadeInRight {\n  -webkit-animation-name: fadeInRight;\n  -moz-animation-name: fadeInRight;\n  -o-animation-name: fadeInRight;\n  animation-name: fadeInRight;\n}\n@-webkit-keyframes fadeInUpBig {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n}\n\n@-moz-keyframes fadeInUpBig {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes fadeInUpBig {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes fadeInUpBig {\n  0% {\n    opacity: 0;\n    transform: translateY(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n}\n\n.fadeInUpBig {\n  -webkit-animation-name: fadeInUpBig;\n  -moz-animation-name: fadeInUpBig;\n  -o-animation-name: fadeInUpBig;\n  animation-name: fadeInUpBig;\n}\n@-webkit-keyframes fadeInDownBig {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n}\n\n@-moz-keyframes fadeInDownBig {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes fadeInDownBig {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes fadeInDownBig {\n  0% {\n    opacity: 0;\n    transform: translateY(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n}\n\n.fadeInDownBig {\n  -webkit-animation-name: fadeInDownBig;\n  -moz-animation-name: fadeInDownBig;\n  -o-animation-name: fadeInDownBig;\n  animation-name: fadeInDownBig;\n}\n@-webkit-keyframes fadeInLeftBig {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n}\n@-moz-keyframes fadeInLeftBig {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n}\n@-o-keyframes fadeInLeftBig {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n}\n@keyframes fadeInLeftBig {\n  0% {\n    opacity: 0;\n    transform: translateX(-2000px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n}\n\n.fadeInLeftBig {\n  -webkit-animation-name: fadeInLeftBig;\n  -moz-animation-name: fadeInLeftBig;\n  -o-animation-name: fadeInLeftBig;\n  animation-name: fadeInLeftBig;\n}\n@-webkit-keyframes fadeInRightBig {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n}\n\n@-moz-keyframes fadeInRightBig {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n}\n\n@-o-keyframes fadeInRightBig {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n}\n\n@keyframes fadeInRightBig {\n  0% {\n    opacity: 0;\n    transform: translateX(2000px);\n  }\n\n  100% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n}\n\n.fadeInRightBig {\n  -webkit-animation-name: fadeInRightBig;\n  -moz-animation-name: fadeInRightBig;\n  -o-animation-name: fadeInRightBig;\n  animation-name: fadeInRightBig;\n}\n@-webkit-keyframes fadeOut {\n  0% {\n    opacity: 1;\n  }\n  100% {\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes fadeOut {\n  0% {\n    opacity: 1;\n  }\n  100% {\n    opacity: 0;\n  }\n}\n\n@-o-keyframes fadeOut {\n  0% {\n    opacity: 1;\n  }\n  100% {\n    opacity: 0;\n  }\n}\n\n@keyframes fadeOut {\n  0% {\n    opacity: 1;\n  }\n  100% {\n    opacity: 0;\n  }\n}\n\n.fadeOut {\n  -webkit-animation-name: fadeOut;\n  -moz-animation-name: fadeOut;\n  -o-animation-name: fadeOut;\n  animation-name: fadeOut;\n}\n@-webkit-keyframes fadeOutUp {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(-20px);\n  }\n}\n@-moz-keyframes fadeOutUp {\n  0% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(-20px);\n  }\n}\n@-o-keyframes fadeOutUp {\n  0% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(-20px);\n  }\n}\n@keyframes fadeOutUp {\n  0% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(-20px);\n  }\n}\n\n.fadeOutUp {\n  -webkit-animation-name: fadeOutUp;\n  -moz-animation-name: fadeOutUp;\n  -o-animation-name: fadeOutUp;\n  animation-name: fadeOutUp;\n}\n@-webkit-keyframes fadeOutDown {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(20px);\n  }\n}\n\n@-moz-keyframes fadeOutDown {\n  0% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(20px);\n  }\n}\n\n@-o-keyframes fadeOutDown {\n  0% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(20px);\n  }\n}\n\n@keyframes fadeOutDown {\n  0% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(20px);\n  }\n}\n\n.fadeOutDown {\n  -webkit-animation-name: fadeOutDown;\n  -moz-animation-name: fadeOutDown;\n  -o-animation-name: fadeOutDown;\n  animation-name: fadeOutDown;\n}\n@-webkit-keyframes fadeOutLeft {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(-20px);\n  }\n}\n\n@-moz-keyframes fadeOutLeft {\n  0% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(-20px);\n  }\n}\n\n@-o-keyframes fadeOutLeft {\n  0% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(-20px);\n  }\n}\n\n@keyframes fadeOutLeft {\n  0% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(-20px);\n  }\n}\n\n.fadeOutLeft {\n  -webkit-animation-name: fadeOutLeft;\n  -moz-animation-name: fadeOutLeft;\n  -o-animation-name: fadeOutLeft;\n  animation-name: fadeOutLeft;\n}\n@-webkit-keyframes fadeOutRight {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(20px);\n  }\n}\n\n@-moz-keyframes fadeOutRight {\n  0% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(20px);\n  }\n}\n\n@-o-keyframes fadeOutRight {\n  0% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(20px);\n  }\n}\n\n@keyframes fadeOutRight {\n  0% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(20px);\n  }\n}\n\n.fadeOutRight {\n  -webkit-animation-name: fadeOutRight;\n  -moz-animation-name: fadeOutRight;\n  -o-animation-name: fadeOutRight;\n  animation-name: fadeOutRight;\n}\n@-webkit-keyframes fadeOutUpBig {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(-2000px);\n  }\n}\n\n@-moz-keyframes fadeOutUpBig {\n  0% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(-2000px);\n  }\n}\n\n@-o-keyframes fadeOutUpBig {\n  0% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(-2000px);\n  }\n}\n\n@keyframes fadeOutUpBig {\n  0% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(-2000px);\n  }\n}\n\n.fadeOutUpBig {\n  -webkit-animation-name: fadeOutUpBig;\n  -moz-animation-name: fadeOutUpBig;\n  -o-animation-name: fadeOutUpBig;\n  animation-name: fadeOutUpBig;\n}\n@-webkit-keyframes fadeOutDownBig {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(2000px);\n  }\n}\n\n@-moz-keyframes fadeOutDownBig {\n  0% {\n    opacity: 1;\n    -moz-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(2000px);\n  }\n}\n\n@-o-keyframes fadeOutDownBig {\n  0% {\n    opacity: 1;\n    -o-transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(2000px);\n  }\n}\n\n@keyframes fadeOutDownBig {\n  0% {\n    opacity: 1;\n    transform: translateY(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(2000px);\n  }\n}\n\n.fadeOutDownBig {\n  -webkit-animation-name: fadeOutDownBig;\n  -moz-animation-name: fadeOutDownBig;\n  -o-animation-name: fadeOutDownBig;\n  animation-name: fadeOutDownBig;\n}\n@-webkit-keyframes fadeOutLeftBig {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(-2000px);\n  }\n}\n\n@-moz-keyframes fadeOutLeftBig {\n  0% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(-2000px);\n  }\n}\n\n@-o-keyframes fadeOutLeftBig {\n  0% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(-2000px);\n  }\n}\n\n@keyframes fadeOutLeftBig {\n  0% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(-2000px);\n  }\n}\n\n.fadeOutLeftBig {\n  -webkit-animation-name: fadeOutLeftBig;\n  -moz-animation-name: fadeOutLeftBig;\n  -o-animation-name: fadeOutLeftBig;\n  animation-name: fadeOutLeftBig;\n}\n@-webkit-keyframes fadeOutRightBig {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(2000px);\n  }\n}\n@-moz-keyframes fadeOutRightBig {\n  0% {\n    opacity: 1;\n    -moz-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(2000px);\n  }\n}\n@-o-keyframes fadeOutRightBig {\n  0% {\n    opacity: 1;\n    -o-transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(2000px);\n  }\n}\n@keyframes fadeOutRightBig {\n  0% {\n    opacity: 1;\n    transform: translateX(0);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(2000px);\n  }\n}\n\n.fadeOutRightBig {\n  -webkit-animation-name: fadeOutRightBig;\n  -moz-animation-name: fadeOutRightBig;\n  -o-animation-name: fadeOutRightBig;\n  animation-name: fadeOutRightBig;\n}\n@-webkit-keyframes bounceIn {\n  0% {\n    opacity: 0;\n    -webkit-transform: scale(0.3);\n  }\n\n  50% {\n    opacity: 1;\n    -webkit-transform: scale(1.05);\n  }\n\n  70% {\n    -webkit-transform: scale(0.9);\n  }\n\n  100% {\n    -webkit-transform: scale(1);\n  }\n}\n\n@-moz-keyframes bounceIn {\n  0% {\n    opacity: 0;\n    -moz-transform: scale(0.3);\n  }\n\n  50% {\n    opacity: 1;\n    -moz-transform: scale(1.05);\n  }\n\n  70% {\n    -moz-transform: scale(0.9);\n  }\n\n  100% {\n    -moz-transform: scale(1);\n  }\n}\n\n@-o-keyframes bounceIn {\n  0% {\n    opacity: 0;\n    -o-transform: scale(0.3);\n  }\n\n  50% {\n    opacity: 1;\n    -o-transform: scale(1.05);\n  }\n\n  70% {\n    -o-transform: scale(0.9);\n  }\n\n  100% {\n    -o-transform: scale(1);\n  }\n}\n\n@keyframes bounceIn {\n  0% {\n    opacity: 0;\n    transform: scale(0.3);\n  }\n\n  50% {\n    opacity: 1;\n    transform: scale(1.05);\n  }\n\n  70% {\n    transform: scale(0.9);\n  }\n\n  100% {\n    transform: scale(1);\n  }\n}\n\n.bounceIn {\n  -webkit-animation-name: bounceIn;\n  -moz-animation-name: bounceIn;\n  -o-animation-name: bounceIn;\n  animation-name: bounceIn;\n}\n@-webkit-keyframes bounceInUp {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -webkit-transform: translateY(-30px);\n  }\n\n  80% {\n    -webkit-transform: translateY(10px);\n  }\n\n  100% {\n    -webkit-transform: translateY(0);\n  }\n}\n@-moz-keyframes bounceInUp {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -moz-transform: translateY(-30px);\n  }\n\n  80% {\n    -moz-transform: translateY(10px);\n  }\n\n  100% {\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes bounceInUp {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -o-transform: translateY(-30px);\n  }\n\n  80% {\n    -o-transform: translateY(10px);\n  }\n\n  100% {\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes bounceInUp {\n  0% {\n    opacity: 0;\n    transform: translateY(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    transform: translateY(-30px);\n  }\n\n  80% {\n    transform: translateY(10px);\n  }\n\n  100% {\n    transform: translateY(0);\n  }\n}\n\n.bounceInUp {\n  -webkit-animation-name: bounceInUp;\n  -moz-animation-name: bounceInUp;\n  -o-animation-name: bounceInUp;\n  animation-name: bounceInUp;\n}\n@-webkit-keyframes bounceInDown {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateY(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -webkit-transform: translateY(30px);\n  }\n\n  80% {\n    -webkit-transform: translateY(-10px);\n  }\n\n  100% {\n    -webkit-transform: translateY(0);\n  }\n}\n\n@-moz-keyframes bounceInDown {\n  0% {\n    opacity: 0;\n    -moz-transform: translateY(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -moz-transform: translateY(30px);\n  }\n\n  80% {\n    -moz-transform: translateY(-10px);\n  }\n\n  100% {\n    -moz-transform: translateY(0);\n  }\n}\n\n@-o-keyframes bounceInDown {\n  0% {\n    opacity: 0;\n    -o-transform: translateY(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -o-transform: translateY(30px);\n  }\n\n  80% {\n    -o-transform: translateY(-10px);\n  }\n\n  100% {\n    -o-transform: translateY(0);\n  }\n}\n\n@keyframes bounceInDown {\n  0% {\n    opacity: 0;\n    transform: translateY(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    transform: translateY(30px);\n  }\n\n  80% {\n    transform: translateY(-10px);\n  }\n\n  100% {\n    transform: translateY(0);\n  }\n}\n\n.bounceInDown {\n  -webkit-animation-name: bounceInDown;\n  -moz-animation-name: bounceInDown;\n  -o-animation-name: bounceInDown;\n  animation-name: bounceInDown;\n}\n@-webkit-keyframes bounceInLeft {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -webkit-transform: translateX(30px);\n  }\n\n  80% {\n    -webkit-transform: translateX(-10px);\n  }\n\n  100% {\n    -webkit-transform: translateX(0);\n  }\n}\n\n@-moz-keyframes bounceInLeft {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -moz-transform: translateX(30px);\n  }\n\n  80% {\n    -moz-transform: translateX(-10px);\n  }\n\n  100% {\n    -moz-transform: translateX(0);\n  }\n}\n\n@-o-keyframes bounceInLeft {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -o-transform: translateX(30px);\n  }\n\n  80% {\n    -o-transform: translateX(-10px);\n  }\n\n  100% {\n    -o-transform: translateX(0);\n  }\n}\n\n@keyframes bounceInLeft {\n  0% {\n    opacity: 0;\n    transform: translateX(-2000px);\n  }\n\n  60% {\n    opacity: 1;\n    transform: translateX(30px);\n  }\n\n  80% {\n    transform: translateX(-10px);\n  }\n\n  100% {\n    transform: translateX(0);\n  }\n}\n\n.bounceInLeft {\n  -webkit-animation-name: bounceInLeft;\n  -moz-animation-name: bounceInLeft;\n  -o-animation-name: bounceInLeft;\n  animation-name: bounceInLeft;\n}\n@-webkit-keyframes bounceInRight {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -webkit-transform: translateX(-30px);\n  }\n\n  80% {\n    -webkit-transform: translateX(10px);\n  }\n\n  100% {\n    -webkit-transform: translateX(0);\n  }\n}\n\n@-moz-keyframes bounceInRight {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -moz-transform: translateX(-30px);\n  }\n\n  80% {\n    -moz-transform: translateX(10px);\n  }\n\n  100% {\n    -moz-transform: translateX(0);\n  }\n}\n\n@-o-keyframes bounceInRight {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    -o-transform: translateX(-30px);\n  }\n\n  80% {\n    -o-transform: translateX(10px);\n  }\n\n  100% {\n    -o-transform: translateX(0);\n  }\n}\n\n@keyframes bounceInRight {\n  0% {\n    opacity: 0;\n    transform: translateX(2000px);\n  }\n\n  60% {\n    opacity: 1;\n    transform: translateX(-30px);\n  }\n\n  80% {\n    transform: translateX(10px);\n  }\n\n  100% {\n    transform: translateX(0);\n  }\n}\n\n.bounceInRight {\n  -webkit-animation-name: bounceInRight;\n  -moz-animation-name: bounceInRight;\n  -o-animation-name: bounceInRight;\n  animation-name: bounceInRight;\n}\n@-webkit-keyframes bounceOut {\n  0% {\n    -webkit-transform: scale(1);\n  }\n\n  25% {\n    -webkit-transform: scale(0.95);\n  }\n\n  50% {\n    opacity: 1;\n    -webkit-transform: scale(1.1);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: scale(0.3);\n  }\n}\n\n@-moz-keyframes bounceOut {\n  0% {\n    -moz-transform: scale(1);\n  }\n\n  25% {\n    -moz-transform: scale(0.95);\n  }\n\n  50% {\n    opacity: 1;\n    -moz-transform: scale(1.1);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: scale(0.3);\n  }\n}\n\n@-o-keyframes bounceOut {\n  0% {\n    -o-transform: scale(1);\n  }\n\n  25% {\n    -o-transform: scale(0.95);\n  }\n\n  50% {\n    opacity: 1;\n    -o-transform: scale(1.1);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: scale(0.3);\n  }\n}\n\n@keyframes bounceOut {\n  0% {\n    transform: scale(1);\n  }\n\n  25% {\n    transform: scale(0.95);\n  }\n\n  50% {\n    opacity: 1;\n    transform: scale(1.1);\n  }\n\n  100% {\n    opacity: 0;\n    transform: scale(0.3);\n  }\n}\n\n.bounceOut {\n  -webkit-animation-name: bounceOut;\n  -moz-animation-name: bounceOut;\n  -o-animation-name: bounceOut;\n  animation-name: bounceOut;\n}\n@-webkit-keyframes bounceOutUp {\n  0% {\n    -webkit-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -webkit-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(-2000px);\n  }\n}\n\n@-moz-keyframes bounceOutUp {\n  0% {\n    -moz-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -moz-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(-2000px);\n  }\n}\n\n@-o-keyframes bounceOutUp {\n  0% {\n    -o-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -o-transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(-2000px);\n  }\n}\n\n@keyframes bounceOutUp {\n  0% {\n    transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    transform: translateY(20px);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(-2000px);\n  }\n}\n\n.bounceOutUp {\n  -webkit-animation-name: bounceOutUp;\n  -moz-animation-name: bounceOutUp;\n  -o-animation-name: bounceOutUp;\n  animation-name: bounceOutUp;\n}\n@-webkit-keyframes bounceOutDown {\n  0% {\n    -webkit-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -webkit-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateY(2000px);\n  }\n}\n\n@-moz-keyframes bounceOutDown {\n  0% {\n    -moz-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -moz-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateY(2000px);\n  }\n}\n\n@-o-keyframes bounceOutDown {\n  0% {\n    -o-transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    -o-transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateY(2000px);\n  }\n}\n\n@keyframes bounceOutDown {\n  0% {\n    transform: translateY(0);\n  }\n\n  20% {\n    opacity: 1;\n    transform: translateY(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateY(2000px);\n  }\n}\n\n.bounceOutDown {\n  -webkit-animation-name: bounceOutDown;\n  -moz-animation-name: bounceOutDown;\n  -o-animation-name: bounceOutDown;\n  animation-name: bounceOutDown;\n}\n@-webkit-keyframes bounceOutLeft {\n  0% {\n    -webkit-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -webkit-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(-2000px);\n  }\n}\n\n@-moz-keyframes bounceOutLeft {\n  0% {\n    -moz-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -moz-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(-2000px);\n  }\n}\n\n@-o-keyframes bounceOutLeft {\n  0% {\n    -o-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -o-transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(-2000px);\n  }\n}\n\n@keyframes bounceOutLeft {\n  0% {\n    transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    transform: translateX(20px);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(-2000px);\n  }\n}\n\n.bounceOutLeft {\n  -webkit-animation-name: bounceOutLeft;\n  -moz-animation-name: bounceOutLeft;\n  -o-animation-name: bounceOutLeft;\n  animation-name: bounceOutLeft;\n}\n@-webkit-keyframes bounceOutRight {\n  0% {\n    -webkit-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -webkit-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(2000px);\n  }\n}\n\n@-moz-keyframes bounceOutRight {\n  0% {\n    -moz-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -moz-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(2000px);\n  }\n}\n\n@-o-keyframes bounceOutRight {\n  0% {\n    -o-transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    -o-transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(2000px);\n  }\n}\n\n@keyframes bounceOutRight {\n  0% {\n    transform: translateX(0);\n  }\n\n  20% {\n    opacity: 1;\n    transform: translateX(-20px);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(2000px);\n  }\n}\n\n.bounceOutRight {\n  -webkit-animation-name: bounceOutRight;\n  -moz-animation-name: bounceOutRight;\n  -o-animation-name: bounceOutRight;\n  animation-name: bounceOutRight;\n}\n@-webkit-keyframes rotateIn {\n  0% {\n    -webkit-transform-origin: center center;\n    -webkit-transform: rotate(-200deg);\n    opacity: 0;\n  }\n\n  100% {\n    -webkit-transform-origin: center center;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n}\n@-moz-keyframes rotateIn {\n  0% {\n    -moz-transform-origin: center center;\n    -moz-transform: rotate(-200deg);\n    opacity: 0;\n  }\n\n  100% {\n    -moz-transform-origin: center center;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n}\n@-o-keyframes rotateIn {\n  0% {\n    -o-transform-origin: center center;\n    -o-transform: rotate(-200deg);\n    opacity: 0;\n  }\n\n  100% {\n    -o-transform-origin: center center;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n}\n@keyframes rotateIn {\n  0% {\n    transform-origin: center center;\n    transform: rotate(-200deg);\n    opacity: 0;\n  }\n\n  100% {\n    transform-origin: center center;\n    transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n.rotateIn {\n  -webkit-animation-name: rotateIn;\n  -moz-animation-name: rotateIn;\n  -o-animation-name: rotateIn;\n  animation-name: rotateIn;\n}\n@-webkit-keyframes rotateInUpLeft {\n  0% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes rotateInUpLeft {\n  0% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-o-keyframes rotateInUpLeft {\n  0% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@keyframes rotateInUpLeft {\n  0% {\n    transform-origin: left bottom;\n    transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    transform-origin: left bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n.rotateInUpLeft {\n  -webkit-animation-name: rotateInUpLeft;\n  -moz-animation-name: rotateInUpLeft;\n  -o-animation-name: rotateInUpLeft;\n  animation-name: rotateInUpLeft;\n}\n@-webkit-keyframes rotateInDownLeft {\n  0% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes rotateInDownLeft {\n  0% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-o-keyframes rotateInDownLeft {\n  0% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@keyframes rotateInDownLeft {\n  0% {\n    transform-origin: left bottom;\n    transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    transform-origin: left bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n.rotateInDownLeft {\n  -webkit-animation-name: rotateInDownLeft;\n  -moz-animation-name: rotateInDownLeft;\n  -o-animation-name: rotateInDownLeft;\n  animation-name: rotateInDownLeft;\n}\n@-webkit-keyframes rotateInUpRight {\n  0% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes rotateInUpRight {\n  0% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-o-keyframes rotateInUpRight {\n  0% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@keyframes rotateInUpRight {\n  0% {\n    transform-origin: right bottom;\n    transform: rotate(-90deg);\n    opacity: 0;\n  }\n\n  100% {\n    transform-origin: right bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n.rotateInUpRight {\n  -webkit-animation-name: rotateInUpRight;\n  -moz-animation-name: rotateInUpRight;\n  -o-animation-name: rotateInUpRight;\n  animation-name: rotateInUpRight;\n}\n@-webkit-keyframes rotateInDownRight {\n  0% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes rotateInDownRight {\n  0% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@-o-keyframes rotateInDownRight {\n  0% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n@keyframes rotateInDownRight {\n  0% {\n    transform-origin: right bottom;\n    transform: rotate(90deg);\n    opacity: 0;\n  }\n\n  100% {\n    transform-origin: right bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n}\n\n.rotateInDownRight {\n  -webkit-animation-name: rotateInDownRight;\n  -moz-animation-name: rotateInDownRight;\n  -o-animation-name: rotateInDownRight;\n  animation-name: rotateInDownRight;\n}\n@-webkit-keyframes rotateOut {\n  0% {\n    -webkit-transform-origin: center center;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -webkit-transform-origin: center center;\n    -webkit-transform: rotate(200deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes rotateOut {\n  0% {\n    -moz-transform-origin: center center;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -moz-transform-origin: center center;\n    -moz-transform: rotate(200deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes rotateOut {\n  0% {\n    -o-transform-origin: center center;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -o-transform-origin: center center;\n    -o-transform: rotate(200deg);\n    opacity: 0;\n  }\n}\n\n@keyframes rotateOut {\n  0% {\n    transform-origin: center center;\n    transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    transform-origin: center center;\n    transform: rotate(200deg);\n    opacity: 0;\n  }\n}\n\n.rotateOut {\n  -webkit-animation-name: rotateOut;\n  -moz-animation-name: rotateOut;\n  -o-animation-name: rotateOut;\n  animation-name: rotateOut;\n}\n@-webkit-keyframes rotateOutUpLeft {\n  0% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes rotateOutUpLeft {\n  0% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes rotateOutUpLeft {\n  0% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@keyframes rotateOutUpLeft {\n  0% {\n    transform-origin: left bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    transform-origin: left bottom;\n    transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n.rotateOutUpLeft {\n  -webkit-animation-name: rotateOutUpLeft;\n  -moz-animation-name: rotateOutUpLeft;\n  -o-animation-name: rotateOutUpLeft;\n  animation-name: rotateOutUpLeft;\n}\n@-webkit-keyframes rotateOutDownLeft {\n  0% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -webkit-transform-origin: left bottom;\n    -webkit-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes rotateOutDownLeft {\n  0% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -moz-transform-origin: left bottom;\n    -moz-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes rotateOutDownLeft {\n  0% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -o-transform-origin: left bottom;\n    -o-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@keyframes rotateOutDownLeft {\n  0% {\n    transform-origin: left bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    transform-origin: left bottom;\n    transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n.rotateOutDownLeft {\n  -webkit-animation-name: rotateOutDownLeft;\n  -moz-animation-name: rotateOutDownLeft;\n  -o-animation-name: rotateOutDownLeft;\n  animation-name: rotateOutDownLeft;\n}\n@-webkit-keyframes rotateOutUpRight {\n  0% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes rotateOutUpRight {\n  0% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes rotateOutUpRight {\n  0% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n@keyframes rotateOutUpRight {\n  0% {\n    transform-origin: right bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    transform-origin: right bottom;\n    transform: rotate(90deg);\n    opacity: 0;\n  }\n}\n\n.rotateOutUpRight {\n  -webkit-animation-name: rotateOutUpRight;\n  -moz-animation-name: rotateOutUpRight;\n  -o-animation-name: rotateOutUpRight;\n  animation-name: rotateOutUpRight;\n}\n@-webkit-keyframes rotateOutDownRight {\n  0% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -webkit-transform-origin: right bottom;\n    -webkit-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes rotateOutDownRight {\n  0% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -moz-transform-origin: right bottom;\n    -moz-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes rotateOutDownRight {\n  0% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    -o-transform-origin: right bottom;\n    -o-transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n@keyframes rotateOutDownRight {\n  0% {\n    transform-origin: right bottom;\n    transform: rotate(0);\n    opacity: 1;\n  }\n\n  100% {\n    transform-origin: right bottom;\n    transform: rotate(-90deg);\n    opacity: 0;\n  }\n}\n\n.rotateOutDownRight {\n  -webkit-animation-name: rotateOutDownRight;\n  -moz-animation-name: rotateOutDownRight;\n  -o-animation-name: rotateOutDownRight;\n  animation-name: rotateOutDownRight;\n}\n@-webkit-keyframes hinge {\n  0% {\n    -webkit-transform: rotate(0);\n    -webkit-transform-origin: top left;\n    -webkit-animation-timing-function: ease-in-out;\n  }\n  20%,\n  60% {\n    -webkit-transform: rotate(80deg);\n    -webkit-transform-origin: top left;\n    -webkit-animation-timing-function: ease-in-out;\n  }\n  40% {\n    -webkit-transform: rotate(60deg);\n    -webkit-transform-origin: top left;\n    -webkit-animation-timing-function: ease-in-out;\n  }\n  80% {\n    -webkit-transform: rotate(60deg) translateY(0);\n    opacity: 1;\n    -webkit-transform-origin: top left;\n    -webkit-animation-timing-function: ease-in-out;\n  }\n  100% {\n    -webkit-transform: translateY(700px);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes hinge {\n  0% {\n    -moz-transform: rotate(0);\n    -moz-transform-origin: top left;\n    -moz-animation-timing-function: ease-in-out;\n  }\n  20%,\n  60% {\n    -moz-transform: rotate(80deg);\n    -moz-transform-origin: top left;\n    -moz-animation-timing-function: ease-in-out;\n  }\n  40% {\n    -moz-transform: rotate(60deg);\n    -moz-transform-origin: top left;\n    -moz-animation-timing-function: ease-in-out;\n  }\n  80% {\n    -moz-transform: rotate(60deg) translateY(0);\n    opacity: 1;\n    -moz-transform-origin: top left;\n    -moz-animation-timing-function: ease-in-out;\n  }\n  100% {\n    -moz-transform: translateY(700px);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes hinge {\n  0% {\n    -o-transform: rotate(0);\n    -o-transform-origin: top left;\n    -o-animation-timing-function: ease-in-out;\n  }\n  20%,\n  60% {\n    -o-transform: rotate(80deg);\n    -o-transform-origin: top left;\n    -o-animation-timing-function: ease-in-out;\n  }\n  40% {\n    -o-transform: rotate(60deg);\n    -o-transform-origin: top left;\n    -o-animation-timing-function: ease-in-out;\n  }\n  80% {\n    -o-transform: rotate(60deg) translateY(0);\n    opacity: 1;\n    -o-transform-origin: top left;\n    -o-animation-timing-function: ease-in-out;\n  }\n  100% {\n    -o-transform: translateY(700px);\n    opacity: 0;\n  }\n}\n\n@keyframes hinge {\n  0% {\n    transform: rotate(0);\n    transform-origin: top left;\n    animation-timing-function: ease-in-out;\n  }\n  20%,\n  60% {\n    transform: rotate(80deg);\n    transform-origin: top left;\n    animation-timing-function: ease-in-out;\n  }\n  40% {\n    transform: rotate(60deg);\n    transform-origin: top left;\n    animation-timing-function: ease-in-out;\n  }\n  80% {\n    transform: rotate(60deg) translateY(0);\n    opacity: 1;\n    transform-origin: top left;\n    animation-timing-function: ease-in-out;\n  }\n  100% {\n    transform: translateY(700px);\n    opacity: 0;\n  }\n}\n\n.hinge {\n  -webkit-animation-name: hinge;\n  -moz-animation-name: hinge;\n  -o-animation-name: hinge;\n  animation-name: hinge;\n}\n/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */\n\n@-webkit-keyframes rollIn {\n  0% {\n    opacity: 0;\n    -webkit-transform: translateX(-100%) rotate(-120deg);\n  }\n  100% {\n    opacity: 1;\n    -webkit-transform: translateX(0px) rotate(0deg);\n  }\n}\n\n@-moz-keyframes rollIn {\n  0% {\n    opacity: 0;\n    -moz-transform: translateX(-100%) rotate(-120deg);\n  }\n  100% {\n    opacity: 1;\n    -moz-transform: translateX(0px) rotate(0deg);\n  }\n}\n\n@-o-keyframes rollIn {\n  0% {\n    opacity: 0;\n    -o-transform: translateX(-100%) rotate(-120deg);\n  }\n  100% {\n    opacity: 1;\n    -o-transform: translateX(0px) rotate(0deg);\n  }\n}\n\n@keyframes rollIn {\n  0% {\n    opacity: 0;\n    transform: translateX(-100%) rotate(-120deg);\n  }\n  100% {\n    opacity: 1;\n    transform: translateX(0px) rotate(0deg);\n  }\n}\n\n.rollIn {\n  -webkit-animation-name: rollIn;\n  -moz-animation-name: rollIn;\n  -o-animation-name: rollIn;\n  animation-name: rollIn;\n}\n/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */\n\n@-webkit-keyframes rollOut {\n  0% {\n    opacity: 1;\n    -webkit-transform: translateX(0px) rotate(0deg);\n  }\n\n  100% {\n    opacity: 0;\n    -webkit-transform: translateX(100%) rotate(120deg);\n  }\n}\n\n@-moz-keyframes rollOut {\n  0% {\n    opacity: 1;\n    -moz-transform: translateX(0px) rotate(0deg);\n  }\n\n  100% {\n    opacity: 0;\n    -moz-transform: translateX(100%) rotate(120deg);\n  }\n}\n\n@-o-keyframes rollOut {\n  0% {\n    opacity: 1;\n    -o-transform: translateX(0px) rotate(0deg);\n  }\n\n  100% {\n    opacity: 0;\n    -o-transform: translateX(100%) rotate(120deg);\n  }\n}\n\n@keyframes rollOut {\n  0% {\n    opacity: 1;\n    transform: translateX(0px) rotate(0deg);\n  }\n\n  100% {\n    opacity: 0;\n    transform: translateX(100%) rotate(120deg);\n  }\n}\n\n.rollOut {\n  -webkit-animation-name: rollOut;\n  -moz-animation-name: rollOut;\n  -o-animation-name: rollOut;\n  animation-name: rollOut;\n}\n\n/* originally authored by Angelo Rohit - https://github.com/angelorohit */\n\n@-webkit-keyframes lightSpeedIn {\n  0% {\n    -webkit-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n  60% {\n    -webkit-transform: translateX(-20%) skewX(30deg);\n    opacity: 1;\n  }\n  80% {\n    -webkit-transform: translateX(0%) skewX(-15deg);\n    opacity: 1;\n  }\n  100% {\n    -webkit-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n}\n\n@-moz-keyframes lightSpeedIn {\n  0% {\n    -moz-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n  60% {\n    -moz-transform: translateX(-20%) skewX(30deg);\n    opacity: 1;\n  }\n  80% {\n    -moz-transform: translateX(0%) skewX(-15deg);\n    opacity: 1;\n  }\n  100% {\n    -moz-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n}\n\n@-o-keyframes lightSpeedIn {\n  0% {\n    -o-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n  60% {\n    -o-transform: translateX(-20%) skewX(30deg);\n    opacity: 1;\n  }\n  80% {\n    -o-transform: translateX(0%) skewX(-15deg);\n    opacity: 1;\n  }\n  100% {\n    -o-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n}\n\n@keyframes lightSpeedIn {\n  0% {\n    transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n  60% {\n    transform: translateX(-20%) skewX(30deg);\n    opacity: 1;\n  }\n  80% {\n    transform: translateX(0%) skewX(-15deg);\n    opacity: 1;\n  }\n  100% {\n    transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n}\n\n.lightSpeedIn {\n  -webkit-animation-name: lightSpeedIn;\n  -moz-animation-name: lightSpeedIn;\n  -o-animation-name: lightSpeedIn;\n  animation-name: lightSpeedIn;\n\n  -webkit-animation-timing-function: ease-out;\n  -moz-animation-timing-function: ease-out;\n  -o-animation-timing-function: ease-out;\n  animation-timing-function: ease-out;\n}\n\n.animated.lightSpeedIn {\n  -webkit-animation-duration: 0.5s;\n  -moz-animation-duration: 0.5s;\n  -o-animation-duration: 0.5s;\n  animation-duration: 0.5s;\n}\n\n/* originally authored by Angelo Rohit - https://github.com/angelorohit */\n\n@-webkit-keyframes lightSpeedOut {\n  0% {\n    -webkit-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -webkit-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n}\n\n@-moz-keyframes lightSpeedOut {\n  0% {\n    -moz-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -moz-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n}\n\n@-o-keyframes lightSpeedOut {\n  0% {\n    -o-transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n  100% {\n    -o-transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n}\n\n@keyframes lightSpeedOut {\n  0% {\n    transform: translateX(0%) skewX(0deg);\n    opacity: 1;\n  }\n  100% {\n    transform: translateX(100%) skewX(-30deg);\n    opacity: 0;\n  }\n}\n\n.lightSpeedOut {\n  -webkit-animation-name: lightSpeedOut;\n  -moz-animation-name: lightSpeedOut;\n  -o-animation-name: lightSpeedOut;\n  animation-name: lightSpeedOut;\n\n  -webkit-animation-timing-function: ease-in;\n  -moz-animation-timing-function: ease-in;\n  -o-animation-timing-function: ease-in;\n  animation-timing-function: ease-in;\n}\n\n.animated.lightSpeedOut {\n  -webkit-animation-duration: 0.25s;\n  -moz-animation-duration: 0.25s;\n  -o-animation-duration: 0.25s;\n  animation-duration: 0.25s;\n}\n\n/* originally authored by Angelo Rohit - https://github.com/angelorohit */\n\n@-webkit-keyframes wiggle {\n  0% {\n    -webkit-transform: skewX(9deg);\n  }\n  10% {\n    -webkit-transform: skewX(-8deg);\n  }\n  20% {\n    -webkit-transform: skewX(7deg);\n  }\n  30% {\n    -webkit-transform: skewX(-6deg);\n  }\n  40% {\n    -webkit-transform: skewX(5deg);\n  }\n  50% {\n    -webkit-transform: skewX(-4deg);\n  }\n  60% {\n    -webkit-transform: skewX(3deg);\n  }\n  70% {\n    -webkit-transform: skewX(-2deg);\n  }\n  80% {\n    -webkit-transform: skewX(1deg);\n  }\n  90% {\n    -webkit-transform: skewX(0deg);\n  }\n  100% {\n    -webkit-transform: skewX(0deg);\n  }\n}\n\n@-moz-keyframes wiggle {\n  0% {\n    -moz-transform: skewX(9deg);\n  }\n  10% {\n    -moz-transform: skewX(-8deg);\n  }\n  20% {\n    -moz-transform: skewX(7deg);\n  }\n  30% {\n    -moz-transform: skewX(-6deg);\n  }\n  40% {\n    -moz-transform: skewX(5deg);\n  }\n  50% {\n    -moz-transform: skewX(-4deg);\n  }\n  60% {\n    -moz-transform: skewX(3deg);\n  }\n  70% {\n    -moz-transform: skewX(-2deg);\n  }\n  80% {\n    -moz-transform: skewX(1deg);\n  }\n  90% {\n    -moz-transform: skewX(0deg);\n  }\n  100% {\n    -moz-transform: skewX(0deg);\n  }\n}\n\n@-o-keyframes wiggle {\n  0% {\n    -o-transform: skewX(9deg);\n  }\n  10% {\n    -o-transform: skewX(-8deg);\n  }\n  20% {\n    -o-transform: skewX(7deg);\n  }\n  30% {\n    -o-transform: skewX(-6deg);\n  }\n  40% {\n    -o-transform: skewX(5deg);\n  }\n  50% {\n    -o-transform: skewX(-4deg);\n  }\n  60% {\n    -o-transform: skewX(3deg);\n  }\n  70% {\n    -o-transform: skewX(-2deg);\n  }\n  80% {\n    -o-transform: skewX(1deg);\n  }\n  90% {\n    -o-transform: skewX(0deg);\n  }\n  100% {\n    -o-transform: skewX(0deg);\n  }\n}\n\n@keyframes wiggle {\n  0% {\n    transform: skewX(9deg);\n  }\n  10% {\n    transform: skewX(-8deg);\n  }\n  20% {\n    transform: skewX(7deg);\n  }\n  30% {\n    transform: skewX(-6deg);\n  }\n  40% {\n    transform: skewX(5deg);\n  }\n  50% {\n    transform: skewX(-4deg);\n  }\n  60% {\n    transform: skewX(3deg);\n  }\n  70% {\n    transform: skewX(-2deg);\n  }\n  80% {\n    transform: skewX(1deg);\n  }\n  90% {\n    transform: skewX(0deg);\n  }\n  100% {\n    transform: skewX(0deg);\n  }\n}\n\n.wiggle {\n  -webkit-animation-name: wiggle;\n  -moz-animation-name: wiggle;\n  -o-animation-name: wiggle;\n  animation-name: wiggle;\n\n  -webkit-animation-timing-function: ease-in;\n  -moz-animation-timing-function: ease-in;\n  -o-animation-timing-function: ease-in;\n  animation-timing-function: ease-in;\n}\n\n.animated.wiggle {\n  -webkit-animation-duration: 0.75s;\n  -moz-animation-duration: 0.75s;\n  -o-animation-duration: 0.75s;\n  animation-duration: 0.75s;\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/css/custom-fonts.css",
    "content": "/* ==================================================\nFont-Face Icons\n================================================== */\n\n@font-face {\n  font-family: \"Icons\";\n  src: url(\"../fonts/customicon/Icons.eot\");\n  src:\n    url(\"../fonts/customicon/Icons.eot?#iefix\") format(\"embedded-opentype\"),\n    url(\"../fonts/customicon/Icons.woff\") format(\"woff\"),\n    url(\"../fonts/customicon/Icons.ttf\") format(\"truetype\"),\n    url(\"../fonts/customicon/Icons.svg#Icons\") format(\"svg\");\n  font-weight: normal;\n  font-style: normal;\n}\n\n/* Use the following CSS code if you want to use data attributes for inserting your icons */\n[data-icon]:before {\n  font-family: \"Icons\";\n  content: attr(data-icon);\n  speak: none;\n  font-weight: normal;\n  font-variant: normal;\n  text-transform: none;\n  line-height: 1;\n  -webkit-font-smoothing: antialiased;\n}\n\n[class^=\"font-\"]:before,\n[class*=\" font-\"]:before {\n  font-family: \"Icons\";\n  speak: none;\n  font-style: normal;\n  font-weight: normal;\n  font-variant: normal;\n  text-transform: none;\n  -webkit-font-smoothing: antialiased;\n}\n\n[class^=\"font-\"],\n[class*=\" font-\"] {\n  display: inline-block;\n  line-height: 1em;\n}\n\n/* Use the following CSS code if you want to have a class per icon */\n/*\nInstead of a list of all class selectors,\nyou can use the generic selector below, but it's slower:\n[class*=\"font-icon-\"] {\n*/\n.font-icon-zoom-out,\n.font-icon-zoom-in,\n.font-icon-wrench,\n.font-icon-waves,\n.font-icon-warning,\n.font-icon-volume-up,\n.font-icon-volume-off,\n.font-icon-volume-down,\n.font-icon-viewport,\n.font-icon-user,\n.font-icon-user-border,\n.font-icon-upload,\n.font-icon-upload-2,\n.font-icon-unlock,\n.font-icon-underline,\n.font-icon-tint,\n.font-icon-time,\n.font-icon-text,\n.font-icon-text-width,\n.font-icon-text-height,\n.font-icon-tags,\n.font-icon-tag,\n.font-icon-table,\n.font-icon-strikethrough,\n.font-icon-stop,\n.font-icon-step-forward,\n.font-icon-step-backward,\n.font-icon-stars,\n.font-icon-star,\n.font-icon-star-line,\n.font-icon-star-half,\n.font-icon-sort,\n.font-icon-sort-up,\n.font-icon-sort-down,\n.font-icon-social-zerply,\n.font-icon-social-youtube,\n.font-icon-social-yelp,\n.font-icon-social-yahoo,\n.font-icon-social-wordpress,\n.font-icon-social-virb,\n.font-icon-social-vimeo,\n.font-icon-social-viddler,\n.font-icon-social-twitter,\n.font-icon-social-tumblr,\n.font-icon-social-stumbleupon,\n.font-icon-social-soundcloud,\n.font-icon-social-skype,\n.font-icon-social-share-this,\n.font-icon-social-quora,\n.font-icon-social-pinterest,\n.font-icon-social-photobucket,\n.font-icon-social-paypal,\n.font-icon-social-myspace,\n.font-icon-social-linkedin,\n.font-icon-social-last-fm,\n.font-icon-social-grooveshark,\n.font-icon-social-google-plus,\n.font-icon-social-github,\n.font-icon-social-forrst,\n.font-icon-social-flickr,\n.font-icon-social-facebook,\n.font-icon-social-evernote,\n.font-icon-social-envato,\n.font-icon-social-email,\n.font-icon-social-dribbble,\n.font-icon-social-digg,\n.font-icon-social-deviant-art,\n.font-icon-social-blogger,\n.font-icon-social-behance,\n.font-icon-social-bebo,\n.font-icon-social-addthis,\n.font-icon-social-500px,\n.font-icon-sitemap,\n.font-icon-signout,\n.font-icon-signin,\n.font-icon-signal,\n.font-icon-shopping-cart,\n.font-icon-search,\n.font-icon-rss,\n.font-icon-road,\n.font-icon-retweet,\n.font-icon-resize-vertical,\n.font-icon-resize-vertical-2,\n.font-icon-resize-small,\n.font-icon-resize-horizontal,\n.font-icon-resize-horizontal-2,\n.font-icon-resize-fullscreen,\n.font-icon-resize-full,\n.font-icon-repeat,\n.font-icon-reorder,\n.font-icon-remove,\n.font-icon-remove-sign,\n.font-icon-remove-circle,\n.font-icon-read-more,\n.font-icon-random,\n.font-icon-question-sign,\n.font-icon-pushpin,\n.font-icon-pushpin-2,\n.font-icon-print,\n.font-icon-plus,\n.font-icon-plus-sign,\n.font-icon-play,\n.font-icon-picture,\n.font-icon-phone,\n.font-icon-phone-sign,\n.font-icon-phone-boxed,\n.font-icon-pause,\n.font-icon-paste,\n.font-icon-paper-clip,\n.font-icon-ok,\n.font-icon-ok-sign,\n.font-icon-ok-circle,\n.font-icon-music,\n.font-icon-move,\n.font-icon-money,\n.font-icon-minus,\n.font-icon-minus-sign,\n.font-icon-map,\n.font-icon-map-marker,\n.font-icon-map-marker-2,\n.font-icon-magnet,\n.font-icon-magic,\n.font-icon-lock,\n.font-icon-list,\n.font-icon-list-3,\n.font-icon-list-2,\n.font-icon-link,\n.font-icon-layer,\n.font-icon-key,\n.font-icon-italic,\n.font-icon-info,\n.font-icon-indent-right,\n.font-icon-indent-left,\n.font-icon-inbox,\n.font-icon-inbox-empty,\n.font-icon-home,\n.font-icon-heart,\n.font-icon-heart-line,\n.font-icon-headphones,\n.font-icon-headphones-line,\n.font-icon-headphones-line-2,\n.font-icon-headphones-2,\n.font-icon-hdd,\n.font-icon-group,\n.font-icon-grid,\n.font-icon-grid-large,\n.font-icon-globe_line,\n.font-icon-glass,\n.font-icon-glass_2,\n.font-icon-gift,\n.font-icon-forward,\n.font-icon-font,\n.font-icon-folder-open,\n.font-icon-folder-close,\n.font-icon-flag,\n.font-icon-fire,\n.font-icon-film,\n.font-icon-file,\n.font-icon-file-empty,\n.font-icon-fast-forward,\n.font-icon-fast-backward,\n.font-icon-facetime,\n.font-icon-eye,\n.font-icon-eye_disable,\n.font-icon-expand-view,\n.font-icon-expand-view-3,\n.font-icon-expand-view-2,\n.font-icon-expand-vertical,\n.font-icon-expand-horizontal,\n.font-icon-exclamation,\n.font-icon-email,\n.font-icon-email_2,\n.font-icon-eject,\n.font-icon-edit,\n.font-icon-edit-check,\n.font-icon-download,\n.font-icon-download_2,\n.font-icon-dashboard,\n.font-icon-credit-card,\n.font-icon-copy,\n.font-icon-comments,\n.font-icon-comments-line,\n.font-icon-comment,\n.font-icon-comment-line,\n.font-icon-columns,\n.font-icon-columns-2,\n.font-icon-cogs,\n.font-icon-cog,\n.font-icon-cloud,\n.font-icon-check,\n.font-icon-check-empty,\n.font-icon-certificate,\n.font-icon-camera,\n.font-icon-calendar,\n.font-icon-bullhorn,\n.font-icon-briefcase,\n.font-icon-bookmark,\n.font-icon-book,\n.font-icon-bolt,\n.font-icon-bold,\n.font-icon-blockquote,\n.font-icon-bell,\n.font-icon-beaker,\n.font-icon-barcode,\n.font-icon-ban-circle,\n.font-icon-ban-chart,\n.font-icon-ban-chart-2,\n.font-icon-backward,\n.font-icon-asterisk,\n.font-icon-arrow-simple-up,\n.font-icon-arrow-simple-up-circle,\n.font-icon-arrow-simple-right,\n.font-icon-arrow-simple-right-circle,\n.font-icon-arrow-simple-left,\n.font-icon-arrow-simple-left-circle,\n.font-icon-arrow-simple-down,\n.font-icon-arrow-simple-down-circle,\n.font-icon-arrow-round-up,\n.font-icon-arrow-round-up-circle,\n.font-icon-arrow-round-right,\n.font-icon-arrow-round-right-circle,\n.font-icon-arrow-round-left,\n.font-icon-arrow-round-left-circle,\n.font-icon-arrow-round-down,\n.font-icon-arrow-round-down-circle,\n.font-icon-arrow-light-up,\n.font-icon-arrow-light-round-up,\n.font-icon-arrow-light-round-up-circle,\n.font-icon-arrow-light-round-right,\n.font-icon-arrow-light-round-right-circle,\n.font-icon-arrow-light-round-left,\n.font-icon-arrow-light-round-left-circle,\n.font-icon-arrow-light-round-down,\n.font-icon-arrow-light-round-down-circle,\n.font-icon-arrow-light-right,\n.font-icon-arrow-light-left,\n.font-icon-arrow-light-down,\n.font-icon-align-right,\n.font-icon-align-left,\n.font-icon-align-justify,\n.font-icon-align-center,\n.font-icon-adjust {\n  font-family: \"Icons\";\n  speak: none;\n  font-style: normal;\n  font-weight: normal;\n  font-variant: normal;\n  text-transform: none;\n  line-height: 1;\n  -webkit-font-smoothing: antialiased;\n}\n.font-icon-zoom-out:before {\n  content: \"\\e000\";\n}\n.font-icon-zoom-in:before {\n  content: \"\\e001\";\n}\n.font-icon-wrench:before {\n  content: \"\\e002\";\n}\n.font-icon-waves:before {\n  content: \"\\e003\";\n}\n.font-icon-warning:before {\n  content: \"\\e004\";\n}\n.font-icon-volume-up:before {\n  content: \"\\e005\";\n}\n.font-icon-volume-off:before {\n  content: \"\\e006\";\n}\n.font-icon-volume-down:before {\n  content: \"\\e007\";\n}\n.font-icon-viewport:before {\n  content: \"\\e008\";\n}\n.font-icon-user:before {\n  content: \"\\e009\";\n}\n.font-icon-user-border:before {\n  content: \"\\e00a\";\n}\n.font-icon-upload:before {\n  content: \"\\e00b\";\n}\n.font-icon-upload-2:before {\n  content: \"\\e00c\";\n}\n.font-icon-unlock:before {\n  content: \"\\e00d\";\n}\n.font-icon-underline:before {\n  content: \"\\e00e\";\n}\n.font-icon-tint:before {\n  content: \"\\e00f\";\n}\n.font-icon-time:before {\n  content: \"\\e010\";\n}\n.font-icon-text:before {\n  content: \"\\e011\";\n}\n.font-icon-text-width:before {\n  content: \"\\e012\";\n}\n.font-icon-text-height:before {\n  content: \"\\e013\";\n}\n.font-icon-tags:before {\n  content: \"\\e014\";\n}\n.font-icon-tag:before {\n  content: \"\\e015\";\n}\n.font-icon-table:before {\n  content: \"\\e016\";\n}\n.font-icon-strikethrough:before {\n  content: \"\\e017\";\n}\n.font-icon-stop:before {\n  content: \"\\e018\";\n}\n.font-icon-step-forward:before {\n  content: \"\\e019\";\n}\n.font-icon-step-backward:before {\n  content: \"\\e01a\";\n}\n.font-icon-stars:before {\n  content: \"\\e01b\";\n}\n.font-icon-star:before {\n  content: \"\\e01c\";\n}\n.font-icon-star-line:before {\n  content: \"\\e01d\";\n}\n.font-icon-star-half:before {\n  content: \"\\e01e\";\n}\n.font-icon-sort:before {\n  content: \"\\e01f\";\n}\n.font-icon-sort-up:before {\n  content: \"\\e020\";\n}\n.font-icon-sort-down:before {\n  content: \"\\e021\";\n}\n.font-icon-social-zerply:before {\n  content: \"\\e022\";\n}\n.font-icon-social-youtube:before {\n  content: \"\\e023\";\n}\n.font-icon-social-yelp:before {\n  content: \"\\e024\";\n}\n.font-icon-social-yahoo:before {\n  content: \"\\e025\";\n}\n.font-icon-social-wordpress:before {\n  content: \"\\e026\";\n}\n.font-icon-social-virb:before {\n  content: \"\\e027\";\n}\n.font-icon-social-vimeo:before {\n  content: \"\\e028\";\n}\n.font-icon-social-viddler:before {\n  content: \"\\e029\";\n}\n.font-icon-social-twitter:before {\n  content: \"\\e02a\";\n}\n.font-icon-social-tumblr:before {\n  content: \"\\e02b\";\n}\n.font-icon-social-stumbleupon:before {\n  content: \"\\e02c\";\n}\n.font-icon-social-soundcloud:before {\n  content: \"\\e02d\";\n}\n.font-icon-social-skype:before {\n  content: \"\\e02e\";\n}\n.font-icon-social-share-this:before {\n  content: \"\\e02f\";\n}\n.font-icon-social-quora:before {\n  content: \"\\e030\";\n}\n.font-icon-social-pinterest:before {\n  content: \"\\e031\";\n}\n.font-icon-social-photobucket:before {\n  content: \"\\e032\";\n}\n.font-icon-social-paypal:before {\n  content: \"\\e033\";\n}\n.font-icon-social-myspace:before {\n  content: \"\\e034\";\n}\n.font-icon-social-linkedin:before {\n  content: \"\\e035\";\n}\n.font-icon-social-last-fm:before {\n  content: \"\\e036\";\n}\n.font-icon-social-grooveshark:before {\n  content: \"\\e037\";\n}\n.font-icon-social-google-plus:before {\n  content: \"\\e038\";\n}\n.font-icon-social-github:before {\n  content: \"\\e039\";\n}\n.font-icon-social-forrst:before {\n  content: \"\\e03a\";\n}\n.font-icon-social-flickr:before {\n  content: \"\\e03b\";\n}\n.font-icon-social-facebook:before {\n  content: \"\\e03c\";\n}\n.font-icon-social-evernote:before {\n  content: \"\\e03d\";\n}\n.font-icon-social-envato:before {\n  content: \"\\e03e\";\n}\n.font-icon-social-email:before {\n  content: \"\\e03f\";\n}\n.font-icon-social-dribbble:before {\n  content: \"\\e040\";\n}\n.font-icon-social-digg:before {\n  content: \"\\e041\";\n}\n.font-icon-social-deviant-art:before {\n  content: \"\\e042\";\n}\n.font-icon-social-blogger:before {\n  content: \"\\e043\";\n}\n.font-icon-social-behance:before {\n  content: \"\\e044\";\n}\n.font-icon-social-bebo:before {\n  content: \"\\e045\";\n}\n.font-icon-social-addthis:before {\n  content: \"\\e046\";\n}\n.font-icon-social-500px:before {\n  content: \"\\e047\";\n}\n.font-icon-sitemap:before {\n  content: \"\\e048\";\n}\n.font-icon-signout:before {\n  content: \"\\e049\";\n}\n.font-icon-signin:before {\n  content: \"\\e04a\";\n}\n.font-icon-signal:before {\n  content: \"\\e04b\";\n}\n.font-icon-shopping-cart:before {\n  content: \"\\e04c\";\n}\n.font-icon-search:before {\n  content: \"\\e04d\";\n}\n.font-icon-rss:before {\n  content: \"\\e04e\";\n}\n.font-icon-road:before {\n  content: \"\\e04f\";\n}\n.font-icon-retweet:before {\n  content: \"\\e050\";\n}\n.font-icon-resize-vertical:before {\n  content: \"\\e051\";\n}\n.font-icon-resize-vertical-2:before {\n  content: \"\\e052\";\n}\n.font-icon-resize-small:before {\n  content: \"\\e053\";\n}\n.font-icon-resize-horizontal:before {\n  content: \"\\e054\";\n}\n.font-icon-resize-horizontal-2:before {\n  content: \"\\e055\";\n}\n.font-icon-resize-fullscreen:before {\n  content: \"\\e056\";\n}\n.font-icon-resize-full:before {\n  content: \"\\e057\";\n}\n.font-icon-repeat:before {\n  content: \"\\e058\";\n}\n.font-icon-reorder:before {\n  content: \"\\e059\";\n}\n.font-icon-remove:before {\n  content: \"\\e05a\";\n}\n.font-icon-remove-sign:before {\n  content: \"\\e05b\";\n}\n.font-icon-remove-circle:before {\n  content: \"\\e05c\";\n}\n.font-icon-read-more:before {\n  content: \"\\e05d\";\n}\n.font-icon-random:before {\n  content: \"\\e05e\";\n}\n.font-icon-question-sign:before {\n  content: \"\\e05f\";\n}\n.font-icon-pushpin:before {\n  content: \"\\e060\";\n}\n.font-icon-pushpin-2:before {\n  content: \"\\e061\";\n}\n.font-icon-print:before {\n  content: \"\\e062\";\n}\n.font-icon-plus:before {\n  content: \"\\e063\";\n}\n.font-icon-plus-sign:before {\n  content: \"\\e064\";\n}\n.font-icon-play:before {\n  content: \"\\e065\";\n}\n.font-icon-picture:before {\n  content: \"\\e066\";\n}\n.font-icon-phone:before {\n  content: \"\\e067\";\n}\n.font-icon-phone-sign:before {\n  content: \"\\e068\";\n}\n.font-icon-phone-boxed:before {\n  content: \"\\e069\";\n}\n.font-icon-pause:before {\n  content: \"\\e06a\";\n}\n.font-icon-paste:before {\n  content: \"\\e06b\";\n}\n.font-icon-paper-clip:before {\n  content: \"\\e06c\";\n}\n.font-icon-ok:before {\n  content: \"\\e06d\";\n}\n.font-icon-ok-sign:before {\n  content: \"\\e06e\";\n}\n.font-icon-ok-circle:before {\n  content: \"\\e06f\";\n}\n.font-icon-music:before {\n  content: \"\\e070\";\n}\n.font-icon-move:before {\n  content: \"\\e071\";\n}\n.font-icon-money:before {\n  content: \"\\e072\";\n}\n.font-icon-minus:before {\n  content: \"\\e073\";\n}\n.font-icon-minus-sign:before {\n  content: \"\\e074\";\n}\n.font-icon-map:before {\n  content: \"\\e075\";\n}\n.font-icon-map-marker:before {\n  content: \"\\e076\";\n}\n.font-icon-map-marker-2:before {\n  content: \"\\e077\";\n}\n.font-icon-magnet:before {\n  content: \"\\e078\";\n}\n.font-icon-magic:before {\n  content: \"\\e079\";\n}\n.font-icon-lock:before {\n  content: \"\\e07a\";\n}\n.font-icon-list:before {\n  content: \"\\e07b\";\n}\n.font-icon-list-3:before {\n  content: \"\\e07c\";\n}\n.font-icon-list-2:before {\n  content: \"\\e07d\";\n}\n.font-icon-link:before {\n  content: \"\\e07e\";\n}\n.font-icon-layer:before {\n  content: \"\\e07f\";\n}\n.font-icon-key:before {\n  content: \"\\e080\";\n}\n.font-icon-italic:before {\n  content: \"\\e081\";\n}\n.font-icon-info:before {\n  content: \"\\e082\";\n}\n.font-icon-indent-right:before {\n  content: \"\\e083\";\n}\n.font-icon-indent-left:before {\n  content: \"\\e084\";\n}\n.font-icon-inbox:before {\n  content: \"\\e085\";\n}\n.font-icon-inbox-empty:before {\n  content: \"\\e086\";\n}\n.font-icon-home:before {\n  content: \"\\e087\";\n}\n.font-icon-heart:before {\n  content: \"\\e088\";\n}\n.font-icon-heart-line:before {\n  content: \"\\e089\";\n}\n.font-icon-headphones:before {\n  content: \"\\e08a\";\n}\n.font-icon-headphones-line:before {\n  content: \"\\e08b\";\n}\n.font-icon-headphones-line-2:before {\n  content: \"\\e08c\";\n}\n.font-icon-headphones-2:before {\n  content: \"\\e08d\";\n}\n.font-icon-hdd:before {\n  content: \"\\e08e\";\n}\n.font-icon-group:before {\n  content: \"\\e08f\";\n}\n.font-icon-grid:before {\n  content: \"\\e090\";\n}\n.font-icon-grid-large:before {\n  content: \"\\e091\";\n}\n.font-icon-globe_line:before {\n  content: \"\\e092\";\n}\n.font-icon-glass:before {\n  content: \"\\e093\";\n}\n.font-icon-glass_2:before {\n  content: \"\\e094\";\n}\n.font-icon-gift:before {\n  content: \"\\e095\";\n}\n.font-icon-forward:before {\n  content: \"\\e096\";\n}\n.font-icon-font:before {\n  content: \"\\e097\";\n}\n.font-icon-folder-open:before {\n  content: \"\\e098\";\n}\n.font-icon-folder-close:before {\n  content: \"\\e099\";\n}\n.font-icon-flag:before {\n  content: \"\\e09a\";\n}\n.font-icon-fire:before {\n  content: \"\\e09b\";\n}\n.font-icon-film:before {\n  content: \"\\e09c\";\n}\n.font-icon-file:before {\n  content: \"\\e09d\";\n}\n.font-icon-file-empty:before {\n  content: \"\\e09e\";\n}\n.font-icon-fast-forward:before {\n  content: \"\\e09f\";\n}\n.font-icon-fast-backward:before {\n  content: \"\\e0a0\";\n}\n.font-icon-facetime:before {\n  content: \"\\e0a1\";\n}\n.font-icon-eye:before {\n  content: \"\\e0a2\";\n}\n.font-icon-eye_disable:before {\n  content: \"\\e0a3\";\n}\n.font-icon-expand-view:before {\n  content: \"\\e0a4\";\n}\n.font-icon-expand-view-3:before {\n  content: \"\\e0a5\";\n}\n.font-icon-expand-view-2:before {\n  content: \"\\e0a6\";\n}\n.font-icon-expand-vertical:before {\n  content: \"\\e0a7\";\n}\n.font-icon-expand-horizontal:before {\n  content: \"\\e0a8\";\n}\n.font-icon-exclamation:before {\n  content: \"\\e0a9\";\n}\n.font-icon-email:before {\n  content: \"\\e0aa\";\n}\n.font-icon-email_2:before {\n  content: \"\\e0ab\";\n}\n.font-icon-eject:before {\n  content: \"\\e0ac\";\n}\n.font-icon-edit:before {\n  content: \"\\e0ad\";\n}\n.font-icon-edit-check:before {\n  content: \"\\e0ae\";\n}\n.font-icon-download:before {\n  content: \"\\e0af\";\n}\n.font-icon-download_2:before {\n  content: \"\\e0b0\";\n}\n.font-icon-dashboard:before {\n  content: \"\\e0b1\";\n}\n.font-icon-credit-card:before {\n  content: \"\\e0b2\";\n}\n.font-icon-copy:before {\n  content: \"\\e0b3\";\n}\n.font-icon-comments:before {\n  content: \"\\e0b4\";\n}\n.font-icon-comments-line:before {\n  content: \"\\e0b5\";\n}\n.font-icon-comment:before {\n  content: \"\\e0b6\";\n}\n.font-icon-comment-line:before {\n  content: \"\\e0b7\";\n}\n.font-icon-columns:before {\n  content: \"\\e0b8\";\n}\n.font-icon-columns-2:before {\n  content: \"\\e0b9\";\n}\n.font-icon-cogs:before {\n  content: \"\\e0ba\";\n}\n.font-icon-cog:before {\n  content: \"\\e0bb\";\n}\n.font-icon-cloud:before {\n  content: \"\\e0bc\";\n}\n.font-icon-check:before {\n  content: \"\\e0bd\";\n}\n.font-icon-check-empty:before {\n  content: \"\\e0be\";\n}\n.font-icon-certificate:before {\n  content: \"\\e0bf\";\n}\n.font-icon-camera:before {\n  content: \"\\e0c0\";\n}\n.font-icon-calendar:before {\n  content: \"\\e0c1\";\n}\n.font-icon-bullhorn:before {\n  content: \"\\e0c2\";\n}\n.font-icon-briefcase:before {\n  content: \"\\e0c3\";\n}\n.font-icon-bookmark:before {\n  content: \"\\e0c4\";\n}\n.font-icon-book:before {\n  content: \"\\e0c5\";\n}\n.font-icon-bolt:before {\n  content: \"\\e0c6\";\n}\n.font-icon-bold:before {\n  content: \"\\e0c7\";\n}\n.font-icon-blockquote:before {\n  content: \"\\e0c8\";\n}\n.font-icon-bell:before {\n  content: \"\\e0c9\";\n}\n.font-icon-beaker:before {\n  content: \"\\e0ca\";\n}\n.font-icon-barcode:before {\n  content: \"\\e0cb\";\n}\n.font-icon-ban-circle:before {\n  content: \"\\e0cc\";\n}\n.font-icon-ban-chart:before {\n  content: \"\\e0cd\";\n}\n.font-icon-ban-chart-2:before {\n  content: \"\\e0ce\";\n}\n.font-icon-backward:before {\n  content: \"\\e0cf\";\n}\n.font-icon-asterisk:before {\n  content: \"\\e0d0\";\n}\n.font-icon-arrow-simple-up:before {\n  content: \"\\e0d1\";\n}\n.font-icon-arrow-simple-up-circle:before {\n  content: \"\\e0d2\";\n}\n.font-icon-arrow-simple-right:before {\n  content: \"\\e0d3\";\n}\n.font-icon-arrow-simple-right-circle:before {\n  content: \"\\e0d4\";\n}\n.font-icon-arrow-simple-left:before {\n  content: \"\\e0d5\";\n}\n.font-icon-arrow-simple-left-circle:before {\n  content: \"\\e0d6\";\n}\n.font-icon-arrow-simple-down:before {\n  content: \"\\e0d7\";\n}\n.font-icon-arrow-simple-down-circle:before {\n  content: \"\\e0d8\";\n}\n.font-icon-arrow-round-up:before {\n  content: \"\\e0d9\";\n}\n.font-icon-arrow-round-up-circle:before {\n  content: \"\\e0da\";\n}\n.font-icon-arrow-round-right:before {\n  content: \"\\e0db\";\n}\n.font-icon-arrow-round-right-circle:before {\n  content: \"\\e0dc\";\n}\n.font-icon-arrow-round-left:before {\n  content: \"\\e0dd\";\n}\n.font-icon-arrow-round-left-circle:before {\n  content: \"\\e0de\";\n}\n.font-icon-arrow-round-down:before {\n  content: \"\\e0df\";\n}\n.font-icon-arrow-round-down-circle:before {\n  content: \"\\e0e0\";\n}\n.font-icon-arrow-light-up:before {\n  content: \"\\e0e1\";\n}\n.font-icon-arrow-light-round-up:before {\n  content: \"\\e0e2\";\n}\n.font-icon-arrow-light-round-up-circle:before {\n  content: \"\\e0e3\";\n}\n.font-icon-arrow-light-round-right:before {\n  content: \"\\e0e4\";\n}\n.font-icon-arrow-light-round-right-circle:before {\n  content: \"\\e0e5\";\n}\n.font-icon-arrow-light-round-left:before {\n  content: \"\\e0e6\";\n}\n.font-icon-arrow-light-round-left-circle:before {\n  content: \"\\e0e7\";\n}\n.font-icon-arrow-light-round-down:before {\n  content: \"\\e0e8\";\n}\n.font-icon-arrow-light-round-down-circle:before {\n  content: \"\\e0e9\";\n}\n.font-icon-arrow-light-right:before {\n  content: \"\\e0ea\";\n}\n.font-icon-arrow-light-left:before {\n  content: \"\\e0eb\";\n}\n.font-icon-arrow-light-down:before {\n  content: \"\\e0ec\";\n}\n.font-icon-align-right:before {\n  content: \"\\e0ed\";\n}\n.font-icon-align-left:before {\n  content: \"\\e0ee\";\n}\n.font-icon-align-justify:before {\n  content: \"\\e0ef\";\n}\n.font-icon-align-center:before {\n  content: \"\\e0f0\";\n}\n.font-icon-adjust:before {\n  content: \"\\e0f1\";\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/css/fancybox/jquery.fancybox.css",
    "content": "/*! fancyBox v2.1.4 fancyapps.com | fancyapps.com/fancybox/#license */\n.fancybox-wrap,\n.fancybox-skin,\n.fancybox-outer,\n.fancybox-inner,\n.fancybox-image,\n.fancybox-wrap iframe,\n.fancybox-wrap object,\n.fancybox-nav,\n.fancybox-nav span,\n.fancybox-tmp {\n  padding: 0;\n  margin: 0;\n  border: 0;\n  outline: none;\n  vertical-align: top;\n}\n\n.fancybox-wrap {\n  position: absolute;\n  top: 0;\n  left: 0;\n  z-index: 8020;\n}\n\n.fancybox-skin {\n  position: relative;\n  background: #2f3238;\n  color: #565656;\n  text-shadow: none;\n  -webkit-border-radius: 0;\n  -moz-border-radius: 0;\n  border-radius: 0;\n}\n\n.fancybox-opened {\n  z-index: 8030;\n}\n\n.fancybox-opened .fancybox-skin {\n  -webkit-box-shadow: none;\n  -moz-box-shadow: none;\n  box-shadow: none;\n}\n\n.fancybox-outer,\n.fancybox-inner {\n  position: relative;\n}\n\n.fancybox-inner {\n  overflow: hidden;\n}\n\n.fancybox-type-iframe .fancybox-inner {\n  -webkit-overflow-scrolling: touch;\n}\n\n.fancybox-error {\n  color: #444;\n  font-size: 14px;\n  line-height: 20px;\n  margin: 0;\n  padding: 15px;\n  white-space: nowrap;\n}\n\n.fancybox-image,\n.fancybox-iframe {\n  display: block;\n  width: 100%;\n  height: 100%;\n}\n\n.fancybox-image {\n  max-width: 100%;\n  max-height: 100%;\n}\n\n#fancybox-loading,\n.fancybox-close,\n.fancybox-prev span,\n.fancybox-next span {\n  background-image: url(\"fancybox_sprite.png\") !important;\n}\n\n#fancybox-loading {\n  position: fixed;\n  top: 50%;\n  left: 50%;\n  margin-top: -22px;\n  margin-left: -22px;\n  background-position: 0 -108px;\n  opacity: 0.8;\n  cursor: pointer;\n  z-index: 8060;\n}\n\n#fancybox-loading div {\n  width: 44px;\n  height: 44px;\n  background: url(\"fancybox_loading.gif\") center center no-repeat;\n}\n\n.fancybox-close {\n  position: absolute;\n  right: 0;\n  top: 0;\n  width: 40px;\n  height: 38px;\n  cursor: pointer;\n  z-index: 9000;\n  background-image: none;\n\n  opacity: 0.5;\n\n  -webkit-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  -moz-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  -o-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n}\n\n.fancybox-close i {\n  left: 50%;\n  top: 50%;\n  margin: -11px 0 0 -11px;\n  font-size: 22px;\n  line-height: 1em;\n  position: absolute;\n  color: #ffffff;\n}\n\n.fancybox-close:hover {\n  opacity: 1;\n}\n\n.fancybox-nav {\n  position: absolute;\n  top: 0;\n  height: 100%;\n  cursor: pointer;\n  text-decoration: none;\n  background: transparent url(\"blank.gif\"); /* helps IE */\n  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);\n  z-index: 8040;\n}\n\n.fancybox-prev,\n.fancybox-prev span {\n  left: 0;\n}\n\n.fancybox-next,\n.fancybox-next span {\n  right: 0;\n}\n\n.fancybox-nav span {\n  position: absolute;\n  top: 50%;\n  width: 44px;\n  height: 32px;\n  margin-top: -25px;\n  cursor: pointer;\n  z-index: 8040;\n  background-image: none;\n  background-color: #26292e;\n  background-position-y: -38px;\n  opacity: 0.5;\n\n  -webkit-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  -moz-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  -o-transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n  transition:\n    background 0.1s linear 0s,\n    opacity 0.1s linear 0s;\n}\n.fancybox-next span {\n  background-position-y: -72px;\n}\n.fancybox-prev span i {\n  left: 50%;\n  top: 50%;\n  margin: -15px 0 0 -17px;\n  font-size: 30px;\n  line-height: 1em;\n  position: absolute;\n  color: #ffffff;\n}\n\n.fancybox-next span i {\n  left: 50%;\n  top: 50%;\n  margin: -15px 0 0 -15px;\n  font-size: 30px;\n  line-height: 1em;\n  position: absolute;\n  color: #ffffff;\n}\n\n.fancybox-nav:hover span {\n  opacity: 1;\n}\n\n.fancybox-tmp {\n  position: absolute;\n  top: -99999px;\n  left: -99999px;\n  visibility: hidden;\n  max-width: 99999px;\n  max-height: 99999px;\n  overflow: visible !important;\n}\n\n/* Overlay helper */\n\n.fancybox-lock {\n  margin: 0 !important;\n}\n\n.fancybox-overlay {\n  position: absolute;\n  top: 0;\n  left: 0;\n  overflow: hidden !important;\n  display: none;\n  z-index: 8010;\n  background: url(\"fancybox_overlay.png\");\n}\n\n.fancybox-overlay-fixed {\n  position: fixed;\n  bottom: 0;\n  right: 0;\n}\n\n.fancybox-lock .fancybox-overlay {\n  overflow: auto;\n  overflow-y: scroll;\n}\n\n/* Title helper */\n\n.fancybox-title {\n  visibility: hidden;\n  position: relative;\n  text-shadow: none;\n  z-index: 8050;\n}\n\n.fancybox-opened .fancybox-title {\n  visibility: visible;\n}\n\n.fancybox-opened .fancybox-title h4 {\n  font-size: 24px;\n  color: #fff;\n  font-weight: 300;\n  margin-bottom: 10px;\n}\n\n.fancybox-opened .fancybox-title p {\n  font-size: 16px;\n  font-weight: 300;\n  color: #bbb;\n  line-height: 1.6em;\n  margin-bottom: 0;\n}\n\n.fancybox-title-float-wrap {\n  position: absolute;\n  bottom: 0;\n  right: 50%;\n  margin-bottom: -35px;\n  z-index: 8050;\n  text-align: center;\n}\n\n.fancybox-title-float-wrap .child {\n  display: inline-block;\n  margin-right: -100%;\n  padding: 2px 20px;\n  background: transparent; /* Fallback for web browsers that doesn't support RGBa */\n  background: rgba(0, 0, 0, 0.8);\n  -webkit-border-radius: 15px;\n  -moz-border-radius: 15px;\n  border-radius: 15px;\n  text-shadow: 0 1px 2px #222;\n  color: #fff;\n  font-weight: bold;\n  line-height: 24px;\n  white-space: nowrap;\n}\n\n.fancybox-title-outside-wrap {\n  position: relative;\n  margin-top: 10px;\n  color: #fff;\n}\n\n.fancybox-title-inside-wrap {\n  padding: 3px 30px 6px;\n  background: #61b331;\n}\n\n.fancybox-title-over-wrap {\n  position: absolute;\n  bottom: 0;\n  left: 0;\n  color: #fff;\n  padding: 10px;\n  background: #000;\n  background: rgba(0, 0, 0, 0.8);\n}\n\n@media (max-width: 480px) {\n  .fancybox-nav span,\n  .fancybox-nav:hover span,\n  .fancybox-close,\n  .fancybox-close:hover {\n    background: transparent;\n  }\n\n  .fancybox-close i {\n    left: 70px;\n    top: 10px;\n  }\n}\n\n@media (max-width: 320px) {\n  .fancybox-close i {\n    left: 30px;\n    top: 20px;\n  }\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/css/font-awesome.css",
    "content": "/*!\n *  Font Awesome 4.0.3 by @davegandy - http://fontawesome.io - @fontawesome\n *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)\n */\n/* FONT PATH\n * -------------------------- */\n@font-face {\n  font-family: \"FontAwesome\";\n  src: url(\"../fonts/fontawesome-webfont.eot?v=4.0.3\");\n  src:\n    url(\"../fonts/fontawesome-webfont.eot?#iefix&v=4.0.3\")\n      format(\"embedded-opentype\"),\n    url(\"../fonts/fontawesome-webfont.woff?v=4.0.3\") format(\"woff\"),\n    url(\"../fonts/fontawesome-webfont.ttf?v=4.0.3\") format(\"truetype\"),\n    url(\"../fonts/fontawesome-webfont.svg?v=4.0.3#fontawesomeregular\")\n      format(\"svg\");\n  font-weight: normal;\n  font-style: normal;\n}\n.fa {\n  display: inline-block;\n  font-family: FontAwesome;\n  font-style: normal;\n  font-weight: normal;\n  line-height: 1;\n  -webkit-font-smoothing: antialiased;\n  -moz-osx-font-smoothing: grayscale;\n}\n/* makes the font 33% larger relative to the icon container */\n.fa-lg {\n  font-size: 1.3333333333333333em;\n  line-height: 0.75em;\n  vertical-align: -15%;\n}\n.fa-2x {\n  font-size: 2em;\n}\n.fa-3x {\n  font-size: 3em;\n}\n.fa-4x {\n  font-size: 4em;\n}\n.fa-5x {\n  font-size: 5em;\n}\n.fa-fw {\n  width: 1.2857142857142858em;\n  text-align: center;\n}\n.fa-ul {\n  padding-left: 0;\n  margin-left: 2.142857142857143em;\n  list-style-type: none;\n}\n.fa-ul > li {\n  position: relative;\n}\n.fa-li {\n  position: absolute;\n  left: -2.142857142857143em;\n  width: 2.142857142857143em;\n  top: 0.14285714285714285em;\n  text-align: center;\n}\n.fa-li.fa-lg {\n  left: -1.8571428571428572em;\n}\n.fa-border {\n  padding: 0.2em 0.25em 0.15em;\n  border: solid 0.08em #eeeeee;\n  border-radius: 0.1em;\n}\n.pull-right {\n  float: right;\n}\n.pull-left {\n  float: left;\n}\n.fa.pull-left {\n  margin-right: 0.3em;\n}\n.fa.pull-right {\n  margin-left: 0.3em;\n}\n.fa-spin {\n  -webkit-animation: spin 2s infinite linear;\n  -moz-animation: spin 2s infinite linear;\n  -o-animation: spin 2s infinite linear;\n  animation: spin 2s infinite linear;\n}\n@-moz-keyframes spin {\n  0% {\n    -moz-transform: rotate(0deg);\n  }\n  100% {\n    -moz-transform: rotate(359deg);\n  }\n}\n@-webkit-keyframes spin {\n  0% {\n    -webkit-transform: rotate(0deg);\n  }\n  100% {\n    -webkit-transform: rotate(359deg);\n  }\n}\n@-o-keyframes spin {\n  0% {\n    -o-transform: rotate(0deg);\n  }\n  100% {\n    -o-transform: rotate(359deg);\n  }\n}\n@-ms-keyframes spin {\n  0% {\n    -ms-transform: rotate(0deg);\n  }\n  100% {\n    -ms-transform: rotate(359deg);\n  }\n}\n@keyframes spin {\n  0% {\n    transform: rotate(0deg);\n  }\n  100% {\n    transform: rotate(359deg);\n  }\n}\n.fa-rotate-90 {\n  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=1);\n  -webkit-transform: rotate(90deg);\n  -moz-transform: rotate(90deg);\n  -ms-transform: rotate(90deg);\n  -o-transform: rotate(90deg);\n  transform: rotate(90deg);\n}\n.fa-rotate-180 {\n  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2);\n  -webkit-transform: rotate(180deg);\n  -moz-transform: rotate(180deg);\n  -ms-transform: rotate(180deg);\n  -o-transform: rotate(180deg);\n  transform: rotate(180deg);\n}\n.fa-rotate-270 {\n  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=3);\n  -webkit-transform: rotate(270deg);\n  -moz-transform: rotate(270deg);\n  -ms-transform: rotate(270deg);\n  -o-transform: rotate(270deg);\n  transform: rotate(270deg);\n}\n.fa-flip-horizontal {\n  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1);\n  -webkit-transform: scale(-1, 1);\n  -moz-transform: scale(-1, 1);\n  -ms-transform: scale(-1, 1);\n  -o-transform: scale(-1, 1);\n  transform: scale(-1, 1);\n}\n.fa-flip-vertical {\n  filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1);\n  -webkit-transform: scale(1, -1);\n  -moz-transform: scale(1, -1);\n  -ms-transform: scale(1, -1);\n  -o-transform: scale(1, -1);\n  transform: scale(1, -1);\n}\n.fa-stack {\n  position: relative;\n  display: inline-block;\n  width: 2em;\n  height: 2em;\n  line-height: 2em;\n  vertical-align: middle;\n}\n.fa-stack-1x,\n.fa-stack-2x {\n  position: absolute;\n  left: 0;\n  width: 100%;\n  text-align: center;\n}\n.fa-stack-1x {\n  line-height: inherit;\n}\n.fa-stack-2x {\n  font-size: 2em;\n}\n.fa-inverse {\n  color: #ffffff;\n}\n/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen\n   readers do not read off random characters that represent icons */\n.fa-glass:before {\n  content: \"\\f000\";\n}\n.fa-music:before {\n  content: \"\\f001\";\n}\n.fa-search:before {\n  content: \"\\f002\";\n}\n.fa-envelope-o:before {\n  content: \"\\f003\";\n}\n.fa-heart:before {\n  content: \"\\f004\";\n}\n.fa-star:before {\n  content: \"\\f005\";\n}\n.fa-star-o:before {\n  content: \"\\f006\";\n}\n.fa-user:before {\n  content: \"\\f007\";\n}\n.fa-film:before {\n  content: \"\\f008\";\n}\n.fa-th-large:before {\n  content: \"\\f009\";\n}\n.fa-th:before {\n  content: \"\\f00a\";\n}\n.fa-th-list:before {\n  content: \"\\f00b\";\n}\n.fa-check:before {\n  content: \"\\f00c\";\n}\n.fa-times:before {\n  content: \"\\f00d\";\n}\n.fa-search-plus:before {\n  content: \"\\f00e\";\n}\n.fa-search-minus:before {\n  content: \"\\f010\";\n}\n.fa-power-off:before {\n  content: \"\\f011\";\n}\n.fa-signal:before {\n  content: \"\\f012\";\n}\n.fa-gear:before,\n.fa-cog:before {\n  content: \"\\f013\";\n}\n.fa-trash-o:before {\n  content: \"\\f014\";\n}\n.fa-home:before {\n  content: \"\\f015\";\n}\n.fa-file-o:before {\n  content: \"\\f016\";\n}\n.fa-clock-o:before {\n  content: \"\\f017\";\n}\n.fa-road:before {\n  content: \"\\f018\";\n}\n.fa-download:before {\n  content: \"\\f019\";\n}\n.fa-arrow-circle-o-down:before {\n  content: \"\\f01a\";\n}\n.fa-arrow-circle-o-up:before {\n  content: \"\\f01b\";\n}\n.fa-inbox:before {\n  content: \"\\f01c\";\n}\n.fa-play-circle-o:before {\n  content: \"\\f01d\";\n}\n.fa-rotate-right:before,\n.fa-repeat:before {\n  content: \"\\f01e\";\n}\n.fa-refresh:before {\n  content: \"\\f021\";\n}\n.fa-list-alt:before {\n  content: \"\\f022\";\n}\n.fa-lock:before {\n  content: \"\\f023\";\n}\n.fa-flag:before {\n  content: \"\\f024\";\n}\n.fa-headphones:before {\n  content: \"\\f025\";\n}\n.fa-volume-off:before {\n  content: \"\\f026\";\n}\n.fa-volume-down:before {\n  content: \"\\f027\";\n}\n.fa-volume-up:before {\n  content: \"\\f028\";\n}\n.fa-qrcode:before {\n  content: \"\\f029\";\n}\n.fa-barcode:before {\n  content: \"\\f02a\";\n}\n.fa-tag:before {\n  content: \"\\f02b\";\n}\n.fa-tags:before {\n  content: \"\\f02c\";\n}\n.fa-book:before {\n  content: \"\\f02d\";\n}\n.fa-bookmark:before {\n  content: \"\\f02e\";\n}\n.fa-print:before {\n  content: \"\\f02f\";\n}\n.fa-camera:before {\n  content: \"\\f030\";\n}\n.fa-font:before {\n  content: \"\\f031\";\n}\n.fa-bold:before {\n  content: \"\\f032\";\n}\n.fa-italic:before {\n  content: \"\\f033\";\n}\n.fa-text-height:before {\n  content: \"\\f034\";\n}\n.fa-text-width:before {\n  content: \"\\f035\";\n}\n.fa-align-left:before {\n  content: \"\\f036\";\n}\n.fa-align-center:before {\n  content: \"\\f037\";\n}\n.fa-align-right:before {\n  content: \"\\f038\";\n}\n.fa-align-justify:before {\n  content: \"\\f039\";\n}\n.fa-list:before {\n  content: \"\\f03a\";\n}\n.fa-dedent:before,\n.fa-outdent:before {\n  content: \"\\f03b\";\n}\n.fa-indent:before {\n  content: \"\\f03c\";\n}\n.fa-video-camera:before {\n  content: \"\\f03d\";\n}\n.fa-picture-o:before {\n  content: \"\\f03e\";\n}\n.fa-pencil:before {\n  content: \"\\f040\";\n}\n.fa-map-marker:before {\n  content: \"\\f041\";\n}\n.fa-adjust:before {\n  content: \"\\f042\";\n}\n.fa-tint:before {\n  content: \"\\f043\";\n}\n.fa-edit:before,\n.fa-pencil-square-o:before {\n  content: \"\\f044\";\n}\n.fa-share-square-o:before {\n  content: \"\\f045\";\n}\n.fa-check-square-o:before {\n  content: \"\\f046\";\n}\n.fa-arrows:before {\n  content: \"\\f047\";\n}\n.fa-step-backward:before {\n  content: \"\\f048\";\n}\n.fa-fast-backward:before {\n  content: \"\\f049\";\n}\n.fa-backward:before {\n  content: \"\\f04a\";\n}\n.fa-play:before {\n  content: \"\\f04b\";\n}\n.fa-pause:before {\n  content: \"\\f04c\";\n}\n.fa-stop:before {\n  content: \"\\f04d\";\n}\n.fa-forward:before {\n  content: \"\\f04e\";\n}\n.fa-fast-forward:before {\n  content: \"\\f050\";\n}\n.fa-step-forward:before {\n  content: \"\\f051\";\n}\n.fa-eject:before {\n  content: \"\\f052\";\n}\n.fa-chevron-left:before {\n  content: \"\\f053\";\n}\n.fa-chevron-right:before {\n  content: \"\\f054\";\n}\n.fa-plus-circle:before {\n  content: \"\\f055\";\n}\n.fa-minus-circle:before {\n  content: \"\\f056\";\n}\n.fa-times-circle:before {\n  content: \"\\f057\";\n}\n.fa-check-circle:before {\n  content: \"\\f058\";\n}\n.fa-question-circle:before {\n  content: \"\\f059\";\n}\n.fa-info-circle:before {\n  content: \"\\f05a\";\n}\n.fa-crosshairs:before {\n  content: \"\\f05b\";\n}\n.fa-times-circle-o:before {\n  content: \"\\f05c\";\n}\n.fa-check-circle-o:before {\n  content: \"\\f05d\";\n}\n.fa-ban:before {\n  content: \"\\f05e\";\n}\n.fa-arrow-left:before {\n  content: \"\\f060\";\n}\n.fa-arrow-right:before {\n  content: \"\\f061\";\n}\n.fa-arrow-up:before {\n  content: \"\\f062\";\n}\n.fa-arrow-down:before {\n  content: \"\\f063\";\n}\n.fa-mail-forward:before,\n.fa-share:before {\n  content: \"\\f064\";\n}\n.fa-expand:before {\n  content: \"\\f065\";\n}\n.fa-compress:before {\n  content: \"\\f066\";\n}\n.fa-plus:before {\n  content: \"\\f067\";\n}\n.fa-minus:before {\n  content: \"\\f068\";\n}\n.fa-asterisk:before {\n  content: \"\\f069\";\n}\n.fa-exclamation-circle:before {\n  content: \"\\f06a\";\n}\n.fa-gift:before {\n  content: \"\\f06b\";\n}\n.fa-leaf:before {\n  content: \"\\f06c\";\n}\n.fa-fire:before {\n  content: \"\\f06d\";\n}\n.fa-eye:before {\n  content: \"\\f06e\";\n}\n.fa-eye-slash:before {\n  content: \"\\f070\";\n}\n.fa-warning:before,\n.fa-exclamation-triangle:before {\n  content: \"\\f071\";\n}\n.fa-plane:before {\n  content: \"\\f072\";\n}\n.fa-calendar:before {\n  content: \"\\f073\";\n}\n.fa-random:before {\n  content: \"\\f074\";\n}\n.fa-comment:before {\n  content: \"\\f075\";\n}\n.fa-magnet:before {\n  content: \"\\f076\";\n}\n.fa-chevron-up:before {\n  content: \"\\f077\";\n}\n.fa-chevron-down:before {\n  content: \"\\f078\";\n}\n.fa-retweet:before {\n  content: \"\\f079\";\n}\n.fa-shopping-cart:before {\n  content: \"\\f07a\";\n}\n.fa-folder:before {\n  content: \"\\f07b\";\n}\n.fa-folder-open:before {\n  content: \"\\f07c\";\n}\n.fa-arrows-v:before {\n  content: \"\\f07d\";\n}\n.fa-arrows-h:before {\n  content: \"\\f07e\";\n}\n.fa-bar-chart-o:before {\n  content: \"\\f080\";\n}\n.fa-twitter-square:before {\n  content: \"\\f081\";\n}\n.fa-facebook-square:before {\n  content: \"\\f082\";\n}\n.fa-camera-retro:before {\n  content: \"\\f083\";\n}\n.fa-key:before {\n  content: \"\\f084\";\n}\n.fa-gears:before,\n.fa-cogs:before {\n  content: \"\\f085\";\n}\n.fa-comments:before {\n  content: \"\\f086\";\n}\n.fa-thumbs-o-up:before {\n  content: \"\\f087\";\n}\n.fa-thumbs-o-down:before {\n  content: \"\\f088\";\n}\n.fa-star-half:before {\n  content: \"\\f089\";\n}\n.fa-heart-o:before {\n  content: \"\\f08a\";\n}\n.fa-sign-out:before {\n  content: \"\\f08b\";\n}\n.fa-linkedin-square:before {\n  content: \"\\f08c\";\n}\n.fa-thumb-tack:before {\n  content: \"\\f08d\";\n}\n.fa-external-link:before {\n  content: \"\\f08e\";\n}\n.fa-sign-in:before {\n  content: \"\\f090\";\n}\n.fa-trophy:before {\n  content: \"\\f091\";\n}\n.fa-github-square:before {\n  content: \"\\f092\";\n}\n.fa-upload:before {\n  content: \"\\f093\";\n}\n.fa-lemon-o:before {\n  content: \"\\f094\";\n}\n.fa-phone:before {\n  content: \"\\f095\";\n}\n.fa-square-o:before {\n  content: \"\\f096\";\n}\n.fa-bookmark-o:before {\n  content: \"\\f097\";\n}\n.fa-phone-square:before {\n  content: \"\\f098\";\n}\n.fa-twitter:before {\n  content: \"\\f099\";\n}\n.fa-facebook:before {\n  content: \"\\f09a\";\n}\n.fa-github:before {\n  content: \"\\f09b\";\n}\n.fa-unlock:before {\n  content: \"\\f09c\";\n}\n.fa-credit-card:before {\n  content: \"\\f09d\";\n}\n.fa-rss:before {\n  content: \"\\f09e\";\n}\n.fa-hdd-o:before {\n  content: \"\\f0a0\";\n}\n.fa-bullhorn:before {\n  content: \"\\f0a1\";\n}\n.fa-bell:before {\n  content: \"\\f0f3\";\n}\n.fa-certificate:before {\n  content: \"\\f0a3\";\n}\n.fa-hand-o-right:before {\n  content: \"\\f0a4\";\n}\n.fa-hand-o-left:before {\n  content: \"\\f0a5\";\n}\n.fa-hand-o-up:before {\n  content: \"\\f0a6\";\n}\n.fa-hand-o-down:before {\n  content: \"\\f0a7\";\n}\n.fa-arrow-circle-left:before {\n  content: \"\\f0a8\";\n}\n.fa-arrow-circle-right:before {\n  content: \"\\f0a9\";\n}\n.fa-arrow-circle-up:before {\n  content: \"\\f0aa\";\n}\n.fa-arrow-circle-down:before {\n  content: \"\\f0ab\";\n}\n.fa-globe:before {\n  content: \"\\f0ac\";\n}\n.fa-wrench:before {\n  content: \"\\f0ad\";\n}\n.fa-tasks:before {\n  content: \"\\f0ae\";\n}\n.fa-filter:before {\n  content: \"\\f0b0\";\n}\n.fa-briefcase:before {\n  content: \"\\f0b1\";\n}\n.fa-arrows-alt:before {\n  content: \"\\f0b2\";\n}\n.fa-group:before,\n.fa-users:before {\n  content: \"\\f0c0\";\n}\n.fa-chain:before,\n.fa-link:before {\n  content: \"\\f0c1\";\n}\n.fa-cloud:before {\n  content: \"\\f0c2\";\n}\n.fa-flask:before {\n  content: \"\\f0c3\";\n}\n.fa-cut:before,\n.fa-scissors:before {\n  content: \"\\f0c4\";\n}\n.fa-copy:before,\n.fa-files-o:before {\n  content: \"\\f0c5\";\n}\n.fa-paperclip:before {\n  content: \"\\f0c6\";\n}\n.fa-save:before,\n.fa-floppy-o:before {\n  content: \"\\f0c7\";\n}\n.fa-square:before {\n  content: \"\\f0c8\";\n}\n.fa-bars:before {\n  content: \"\\f0c9\";\n}\n.fa-list-ul:before {\n  content: \"\\f0ca\";\n}\n.fa-list-ol:before {\n  content: \"\\f0cb\";\n}\n.fa-strikethrough:before {\n  content: \"\\f0cc\";\n}\n.fa-underline:before {\n  content: \"\\f0cd\";\n}\n.fa-table:before {\n  content: \"\\f0ce\";\n}\n.fa-magic:before {\n  content: \"\\f0d0\";\n}\n.fa-truck:before {\n  content: \"\\f0d1\";\n}\n.fa-pinterest:before {\n  content: \"\\f0d2\";\n}\n.fa-pinterest-square:before {\n  content: \"\\f0d3\";\n}\n.fa-google-plus-square:before {\n  content: \"\\f0d4\";\n}\n.fa-google-plus:before {\n  content: \"\\f0d5\";\n}\n.fa-money:before {\n  content: \"\\f0d6\";\n}\n.fa-caret-down:before {\n  content: \"\\f0d7\";\n}\n.fa-caret-up:before {\n  content: \"\\f0d8\";\n}\n.fa-caret-left:before {\n  content: \"\\f0d9\";\n}\n.fa-caret-right:before {\n  content: \"\\f0da\";\n}\n.fa-columns:before {\n  content: \"\\f0db\";\n}\n.fa-unsorted:before,\n.fa-sort:before {\n  content: \"\\f0dc\";\n}\n.fa-sort-down:before,\n.fa-sort-asc:before {\n  content: \"\\f0dd\";\n}\n.fa-sort-up:before,\n.fa-sort-desc:before {\n  content: \"\\f0de\";\n}\n.fa-envelope:before {\n  content: \"\\f0e0\";\n}\n.fa-linkedin:before {\n  content: \"\\f0e1\";\n}\n.fa-rotate-left:before,\n.fa-undo:before {\n  content: \"\\f0e2\";\n}\n.fa-legal:before,\n.fa-gavel:before {\n  content: \"\\f0e3\";\n}\n.fa-dashboard:before,\n.fa-tachometer:before {\n  content: \"\\f0e4\";\n}\n.fa-comment-o:before {\n  content: \"\\f0e5\";\n}\n.fa-comments-o:before {\n  content: \"\\f0e6\";\n}\n.fa-flash:before,\n.fa-bolt:before {\n  content: \"\\f0e7\";\n}\n.fa-sitemap:before {\n  content: \"\\f0e8\";\n}\n.fa-umbrella:before {\n  content: \"\\f0e9\";\n}\n.fa-paste:before,\n.fa-clipboard:before {\n  content: \"\\f0ea\";\n}\n.fa-lightbulb-o:before {\n  content: \"\\f0eb\";\n}\n.fa-exchange:before {\n  content: \"\\f0ec\";\n}\n.fa-cloud-download:before {\n  content: \"\\f0ed\";\n}\n.fa-cloud-upload:before {\n  content: \"\\f0ee\";\n}\n.fa-user-md:before {\n  content: \"\\f0f0\";\n}\n.fa-stethoscope:before {\n  content: \"\\f0f1\";\n}\n.fa-suitcase:before {\n  content: \"\\f0f2\";\n}\n.fa-bell-o:before {\n  content: \"\\f0a2\";\n}\n.fa-coffee:before {\n  content: \"\\f0f4\";\n}\n.fa-cutlery:before {\n  content: \"\\f0f5\";\n}\n.fa-file-text-o:before {\n  content: \"\\f0f6\";\n}\n.fa-building-o:before {\n  content: \"\\f0f7\";\n}\n.fa-hospital-o:before {\n  content: \"\\f0f8\";\n}\n.fa-ambulance:before {\n  content: \"\\f0f9\";\n}\n.fa-medkit:before {\n  content: \"\\f0fa\";\n}\n.fa-fighter-jet:before {\n  content: \"\\f0fb\";\n}\n.fa-beer:before {\n  content: \"\\f0fc\";\n}\n.fa-h-square:before {\n  content: \"\\f0fd\";\n}\n.fa-plus-square:before {\n  content: \"\\f0fe\";\n}\n.fa-angle-double-left:before {\n  content: \"\\f100\";\n}\n.fa-angle-double-right:before {\n  content: \"\\f101\";\n}\n.fa-angle-double-up:before {\n  content: \"\\f102\";\n}\n.fa-angle-double-down:before {\n  content: \"\\f103\";\n}\n.fa-angle-left:before {\n  content: \"\\f104\";\n}\n.fa-angle-right:before {\n  content: \"\\f105\";\n}\n.fa-angle-up:before {\n  content: \"\\f106\";\n}\n.fa-angle-down:before {\n  content: \"\\f107\";\n}\n.fa-desktop:before {\n  content: \"\\f108\";\n}\n.fa-laptop:before {\n  content: \"\\f109\";\n}\n.fa-tablet:before {\n  content: \"\\f10a\";\n}\n.fa-mobile-phone:before,\n.fa-mobile:before {\n  content: \"\\f10b\";\n}\n.fa-circle-o:before {\n  content: \"\\f10c\";\n}\n.fa-quote-left:before {\n  content: \"\\f10d\";\n}\n.fa-quote-right:before {\n  content: \"\\f10e\";\n}\n.fa-spinner:before {\n  content: \"\\f110\";\n}\n.fa-circle:before {\n  content: \"\\f111\";\n}\n.fa-mail-reply:before,\n.fa-reply:before {\n  content: \"\\f112\";\n}\n.fa-github-alt:before {\n  content: \"\\f113\";\n}\n.fa-folder-o:before {\n  content: \"\\f114\";\n}\n.fa-folder-open-o:before {\n  content: \"\\f115\";\n}\n.fa-smile-o:before {\n  content: \"\\f118\";\n}\n.fa-frown-o:before {\n  content: \"\\f119\";\n}\n.fa-meh-o:before {\n  content: \"\\f11a\";\n}\n.fa-gamepad:before {\n  content: \"\\f11b\";\n}\n.fa-keyboard-o:before {\n  content: \"\\f11c\";\n}\n.fa-flag-o:before {\n  content: \"\\f11d\";\n}\n.fa-flag-checkered:before {\n  content: \"\\f11e\";\n}\n.fa-terminal:before {\n  content: \"\\f120\";\n}\n.fa-code:before {\n  content: \"\\f121\";\n}\n.fa-reply-all:before {\n  content: \"\\f122\";\n}\n.fa-mail-reply-all:before {\n  content: \"\\f122\";\n}\n.fa-star-half-empty:before,\n.fa-star-half-full:before,\n.fa-star-half-o:before {\n  content: \"\\f123\";\n}\n.fa-location-arrow:before {\n  content: \"\\f124\";\n}\n.fa-crop:before {\n  content: \"\\f125\";\n}\n.fa-code-fork:before {\n  content: \"\\f126\";\n}\n.fa-unlink:before,\n.fa-chain-broken:before {\n  content: \"\\f127\";\n}\n.fa-question:before {\n  content: \"\\f128\";\n}\n.fa-info:before {\n  content: \"\\f129\";\n}\n.fa-exclamation:before {\n  content: \"\\f12a\";\n}\n.fa-superscript:before {\n  content: \"\\f12b\";\n}\n.fa-subscript:before {\n  content: \"\\f12c\";\n}\n.fa-eraser:before {\n  content: \"\\f12d\";\n}\n.fa-puzzle-piece:before {\n  content: \"\\f12e\";\n}\n.fa-microphone:before {\n  content: \"\\f130\";\n}\n.fa-microphone-slash:before {\n  content: \"\\f131\";\n}\n.fa-shield:before {\n  content: \"\\f132\";\n}\n.fa-calendar-o:before {\n  content: \"\\f133\";\n}\n.fa-fire-extinguisher:before {\n  content: \"\\f134\";\n}\n.fa-rocket:before {\n  content: \"\\f135\";\n}\n.fa-maxcdn:before {\n  content: \"\\f136\";\n}\n.fa-chevron-circle-left:before {\n  content: \"\\f137\";\n}\n.fa-chevron-circle-right:before {\n  content: \"\\f138\";\n}\n.fa-chevron-circle-up:before {\n  content: \"\\f139\";\n}\n.fa-chevron-circle-down:before {\n  content: \"\\f13a\";\n}\n.fa-html5:before {\n  content: \"\\f13b\";\n}\n.fa-css3:before {\n  content: \"\\f13c\";\n}\n.fa-anchor:before {\n  content: \"\\f13d\";\n}\n.fa-unlock-alt:before {\n  content: \"\\f13e\";\n}\n.fa-bullseye:before {\n  content: \"\\f140\";\n}\n.fa-ellipsis-h:before {\n  content: \"\\f141\";\n}\n.fa-ellipsis-v:before {\n  content: \"\\f142\";\n}\n.fa-rss-square:before {\n  content: \"\\f143\";\n}\n.fa-play-circle:before {\n  content: \"\\f144\";\n}\n.fa-ticket:before {\n  content: \"\\f145\";\n}\n.fa-minus-square:before {\n  content: \"\\f146\";\n}\n.fa-minus-square-o:before {\n  content: \"\\f147\";\n}\n.fa-level-up:before {\n  content: \"\\f148\";\n}\n.fa-level-down:before {\n  content: \"\\f149\";\n}\n.fa-check-square:before {\n  content: \"\\f14a\";\n}\n.fa-pencil-square:before {\n  content: \"\\f14b\";\n}\n.fa-external-link-square:before {\n  content: \"\\f14c\";\n}\n.fa-share-square:before {\n  content: \"\\f14d\";\n}\n.fa-compass:before {\n  content: \"\\f14e\";\n}\n.fa-toggle-down:before,\n.fa-caret-square-o-down:before {\n  content: \"\\f150\";\n}\n.fa-toggle-up:before,\n.fa-caret-square-o-up:before {\n  content: \"\\f151\";\n}\n.fa-toggle-right:before,\n.fa-caret-square-o-right:before {\n  content: \"\\f152\";\n}\n.fa-euro:before,\n.fa-eur:before {\n  content: \"\\f153\";\n}\n.fa-gbp:before {\n  content: \"\\f154\";\n}\n.fa-dollar:before,\n.fa-usd:before {\n  content: \"\\f155\";\n}\n.fa-rupee:before,\n.fa-inr:before {\n  content: \"\\f156\";\n}\n.fa-cny:before,\n.fa-rmb:before,\n.fa-yen:before,\n.fa-jpy:before {\n  content: \"\\f157\";\n}\n.fa-ruble:before,\n.fa-rouble:before,\n.fa-rub:before {\n  content: \"\\f158\";\n}\n.fa-won:before,\n.fa-krw:before {\n  content: \"\\f159\";\n}\n.fa-bitcoin:before,\n.fa-btc:before {\n  content: \"\\f15a\";\n}\n.fa-file:before {\n  content: \"\\f15b\";\n}\n.fa-file-text:before {\n  content: \"\\f15c\";\n}\n.fa-sort-alpha-asc:before {\n  content: \"\\f15d\";\n}\n.fa-sort-alpha-desc:before {\n  content: \"\\f15e\";\n}\n.fa-sort-amount-asc:before {\n  content: \"\\f160\";\n}\n.fa-sort-amount-desc:before {\n  content: \"\\f161\";\n}\n.fa-sort-numeric-asc:before {\n  content: \"\\f162\";\n}\n.fa-sort-numeric-desc:before {\n  content: \"\\f163\";\n}\n.fa-thumbs-up:before {\n  content: \"\\f164\";\n}\n.fa-thumbs-down:before {\n  content: \"\\f165\";\n}\n.fa-youtube-square:before {\n  content: \"\\f166\";\n}\n.fa-youtube:before {\n  content: \"\\f167\";\n}\n.fa-xing:before {\n  content: \"\\f168\";\n}\n.fa-xing-square:before {\n  content: \"\\f169\";\n}\n.fa-youtube-play:before {\n  content: \"\\f16a\";\n}\n.fa-dropbox:before {\n  content: \"\\f16b\";\n}\n.fa-stack-overflow:before {\n  content: \"\\f16c\";\n}\n.fa-instagram:before {\n  content: \"\\f16d\";\n}\n.fa-flickr:before {\n  content: \"\\f16e\";\n}\n.fa-adn:before {\n  content: \"\\f170\";\n}\n.fa-bitbucket:before {\n  content: \"\\f171\";\n}\n.fa-bitbucket-square:before {\n  content: \"\\f172\";\n}\n.fa-tumblr:before {\n  content: \"\\f173\";\n}\n.fa-tumblr-square:before {\n  content: \"\\f174\";\n}\n.fa-long-arrow-down:before {\n  content: \"\\f175\";\n}\n.fa-long-arrow-up:before {\n  content: \"\\f176\";\n}\n.fa-long-arrow-left:before {\n  content: \"\\f177\";\n}\n.fa-long-arrow-right:before {\n  content: \"\\f178\";\n}\n.fa-apple:before {\n  content: \"\\f179\";\n}\n.fa-windows:before {\n  content: \"\\f17a\";\n}\n.fa-android:before {\n  content: \"\\f17b\";\n}\n.fa-linux:before {\n  content: \"\\f17c\";\n}\n.fa-dribbble:before {\n  content: \"\\f17d\";\n}\n.fa-skype:before {\n  content: \"\\f17e\";\n}\n.fa-foursquare:before {\n  content: \"\\f180\";\n}\n.fa-trello:before {\n  content: \"\\f181\";\n}\n.fa-female:before {\n  content: \"\\f182\";\n}\n.fa-male:before {\n  content: \"\\f183\";\n}\n.fa-gittip:before {\n  content: \"\\f184\";\n}\n.fa-sun-o:before {\n  content: \"\\f185\";\n}\n.fa-moon-o:before {\n  content: \"\\f186\";\n}\n.fa-archive:before {\n  content: \"\\f187\";\n}\n.fa-bug:before {\n  content: \"\\f188\";\n}\n.fa-vk:before {\n  content: \"\\f189\";\n}\n.fa-weibo:before {\n  content: \"\\f18a\";\n}\n.fa-renren:before {\n  content: \"\\f18b\";\n}\n.fa-pagelines:before {\n  content: \"\\f18c\";\n}\n.fa-stack-exchange:before {\n  content: \"\\f18d\";\n}\n.fa-arrow-circle-o-right:before {\n  content: \"\\f18e\";\n}\n.fa-arrow-circle-o-left:before {\n  content: \"\\f190\";\n}\n.fa-toggle-left:before,\n.fa-caret-square-o-left:before {\n  content: \"\\f191\";\n}\n.fa-dot-circle-o:before {\n  content: \"\\f192\";\n}\n.fa-wheelchair:before {\n  content: \"\\f193\";\n}\n.fa-vimeo-square:before {\n  content: \"\\f194\";\n}\n.fa-turkish-lira:before,\n.fa-try:before {\n  content: \"\\f195\";\n}\n.fa-plus-square-o:before {\n  content: \"\\f196\";\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/css/style.css",
    "content": "/*\nAuthor URI: http://webthemez.com/\nNote: \nLicence under Creative Commons Attribution 3.0 \nDo not remove the back-link in this web template \n-------------------------------------------------------*/\n\n@import url(\"http://fonts.googleapis.com/css?family=Noto+Serif:400,400italic,700|Open+Sans:400,600,700\");\n@import url(\"font-awesome.css\");\n@import url(\"animate.css\");\n\nbody {\n  font-family: \"Open Sans\", Arial, sans-serif;\n  font-size: 14px;\n  font-weight: 300;\n  line-height: 1.6em;\n  color: #656565;\n}\n\na:active {\n  outline: 0;\n}\n\n.clear {\n  clear: both;\n}\n\nh1,\nh2,\nh3,\nh4,\nh5,\nh6 {\n  font-family: \"Open Sans\", Arial, sans-serif;\n  font-weight: 700;\n  line-height: 1.1em;\n  color: #333;\n  margin-bottom: 20px;\n}\n\n.container {\n  padding: 0 20px 0 20px;\n  position: relative;\n}\n\n#wrapper {\n  width: 100%;\n  margin: 0;\n  padding: 0;\n}\n\n.row,\n.row-fluid {\n  margin-bottom: 30px;\n}\n\n.row .row,\n.row-fluid .row-fluid {\n  margin-bottom: 30px;\n}\n\n.row.nomargin,\n.row-fluid.nomargin {\n  margin-bottom: 0;\n}\n\nimg.img-polaroid {\n  margin: 0 0 20px 0;\n}\n.img-box {\n  max-width: 100%;\n}\n/*  Header\n==================================== */\n\nheader .navbar {\n  margin-bottom: 0;\n}\n\n.navbar-default {\n  border: none;\n}\n\n.navbar-brand {\n  color: #222;\n  text-transform: uppercase;\n  font-size: 24px;\n  font-weight: 700;\n  line-height: 1em;\n  letter-spacing: -1px;\n  margin-top: 13px;\n  padding: 0 0 0 15px;\n}\n.navbar-default .navbar-brand {\n  color: #61b331;\n}\n\nheader .navbar-collapse ul.navbar-nav {\n  float: right;\n  margin-right: 0;\n}\n\nheader .navbar-default {\n  background-color: #ffffff;\n}\n\nheader .nav li a:hover,\nheader .nav li a:focus,\nheader .nav li.active a,\nheader .nav li.active a:hover,\nheader .nav li a.dropdown-toggle:hover,\nheader .nav li a.dropdown-toggle:focus,\nheader .nav li.active ul.dropdown-menu li a:hover,\nheader .nav li.active ul.dropdown-menu li.active a {\n  -webkit-transition: all 0.3s ease;\n  -moz-transition: all 0.3s ease;\n  -ms-transition: all 0.3s ease;\n  -o-transition: all 0.3s ease;\n  transition: all 0.3s ease;\n}\n\nheader .navbar-default .navbar-nav > .open > a,\nheader .navbar-default .navbar-nav > .open > a:hover,\nheader .navbar-default .navbar-nav > .open > a:focus {\n  -webkit-transition: all 0.3s ease;\n  -moz-transition: all 0.3s ease;\n  -ms-transition: all 0.3s ease;\n  -o-transition: all 0.3s ease;\n  transition: all 0.3s ease;\n}\n\nheader .navbar {\n  min-height: 70px;\n  padding: 18px 0;\n}\n\nheader .navbar-nav > li {\n  padding-bottom: 12px;\n  padding-top: 12px;\n}\n\nheader .navbar-nav > li > a {\n  padding-bottom: 6px;\n  padding-top: 5px;\n  margin-left: 2px;\n  line-height: 30px;\n  font-weight: 700;\n  -webkit-transition: all 0.3s ease;\n  -moz-transition: all 0.3s ease;\n  -ms-transition: all 0.3s ease;\n  -o-transition: all 0.3s ease;\n  transition: all 0.3s ease;\n}\n\n.dropdown-menu li a:hover {\n  color: #fff !important;\n}\n\nheader .nav .caret {\n  border-bottom-color: #f5f5f5;\n  border-top-color: #f5f5f5;\n}\n.navbar-default .navbar-nav > .active > a,\n.navbar-default .navbar-nav > .active > a:hover,\n.navbar-default .navbar-nav > .active > a:focus {\n  background-color: #fff;\n}\n.navbar-default .navbar-nav > .open > a,\n.navbar-default .navbar-nav > .open > a:hover,\n.navbar-default .navbar-nav > .open > a:focus {\n  background-color: #fff;\n}\n\n.dropdown-menu {\n  box-shadow: none;\n  border-radius: 0;\n  border: none;\n}\n\n.dropdown-menu li:last-child {\n  padding-bottom: 0 !important;\n  margin-bottom: 0;\n}\n\nheader .nav li .dropdown-menu {\n  padding: 0;\n}\n\nheader .nav li .dropdown-menu li a {\n  line-height: 28px;\n  padding: 3px 12px;\n}\n.item-thumbs img {\n  margin-bottom: 15px;\n}\n.flex-control-paging li a.flex-active {\n  background: #000;\n  background: rgb(255, 255, 255);\n  cursor: default;\n}\n.flex-control-paging li a {\n  width: 30px;\n  height: 11px;\n  display: block;\n  background: #666;\n  background: rgba(0, 0, 0, 0.5);\n  cursor: pointer;\n  text-indent: -9999px;\n  -webkit-border-radius: 20px;\n  -moz-border-radius: 20px;\n  -o-border-radius: 20px;\n  border-radius: 20px;\n  box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3);\n}\n.panel-title > a {\n  color: inherit;\n  color: #fff;\n}\n.panel-group .panel-heading + .panel-collapse .panel-body {\n  border-top: 1px solid #ddd;\n  color: #fff;\n  background-color: #9c9c9c;\n}\n/* --- menu --- */\n\nheader .navigation {\n  float: right;\n}\n\nheader ul.nav li {\n  border: none;\n  margin: 0;\n}\n\nheader ul.nav li a {\n  font-size: 12px;\n  border: none;\n  font-weight: 700;\n  text-transform: uppercase;\n}\n\nheader ul.nav li ul li a {\n  font-size: 12px;\n  border: none;\n  font-weight: 300;\n  text-transform: uppercase;\n}\n\n.navbar .nav > li > a {\n  color: #848484;\n  text-shadow: none;\n  border: 1px solid rgba(255, 255, 255, 0) !important;\n}\n\n.navbar .nav a:hover {\n  background: none;\n  color: #14a085 !important;\n}\n\n.navbar .nav > .active > a,\n.navbar .nav > .active > a:hover {\n  background: none;\n  font-weight: 700;\n}\n\n.navbar .nav > .active > a:active,\n.navbar .nav > .active > a:focus {\n  background: none;\n  outline: 0;\n  font-weight: 700;\n}\n\n.navbar .nav li .dropdown-menu {\n  z-index: 2000;\n}\n\nheader ul.nav li ul {\n  margin-top: 1px;\n}\nheader ul.nav li ul li ul {\n  margin: 1px 0 0 1px;\n}\n.dropdown-menu .dropdown i {\n  position: absolute;\n  right: 0;\n  margin-top: 3px;\n  padding-left: 20px;\n}\n\n.navbar .nav > li > .dropdown-menu:before {\n  display: inline-block;\n  border-right: none;\n  border-bottom: none;\n  border-left: none;\n  border-bottom-color: none;\n  content: none;\n}\n.navbar-default .navbar-nav > .active > a,\n.navbar-default .navbar-nav > .active > a:hover,\n.navbar-default .navbar-nav > .active > a:focus {\n  color: #14a085;\n}\n\nul.nav li.dropdown a {\n  z-index: 1000;\n  display: block;\n}\n\nselect.selectmenu {\n  display: none;\n}\n.pageTitle {\n  color: #fff;\n  margin: 30px 0 3px;\n  display: inline-block;\n}\n\n#featured {\n  width: 100%;\n  background: #000;\n  position: relative;\n  margin: 0;\n  padding: 0;\n}\n\n/*  Sliders\n==================================== */\n/* --- flexslider --- */\n\n#featured .flexslider {\n  padding: 0;\n  background: #fff;\n  position: relative;\n  zoom: 1;\n}\n.flex-direction-nav .flex-prev {\n  left: 0px;\n}\n.flex-direction-nav .flex-next {\n  right: 0px;\n}\n.flex-caption {\n  zoom: 0;\n  color: #1c1d21;\n  margin: 0 auto;\n  padding: 1px;\n  position: absolute;\n  vertical-align: bottom;\n  text-align: center;\n  background-color: rgba(255, 255, 255, 0.26);\n  bottom: 5%;\n  display: block;\n  left: 0;\n  right: 0;\n}\n.flex-caption h3 {\n  color: #fff;\n  letter-spacing: 1px;\n  margin-bottom: 8px;\n  text-transform: uppercase;\n}\n.flex-caption p {\n  margin: 0 0 15px;\n}\n.skill-home {\n  margin-bottom: 50px;\n}\n.c1 {\n  border: #ed5441 1px solid;\n  background: #ed5441;\n}\n.c2 {\n  border: #d867b2 1px solid;\n  background: #d867b2;\n}\n.c3 {\n  border: #61b331 1px solid;\n  background: #4bc567;\n}\n.c4 {\n  border: #609cec 1px solid;\n  background: #26aff0;\n}\n.skill-home .icons {\n  padding: 33px 0 0 0;\n  width: 100%;\n  height: 178px;\n  color: rgb(255, 255, 255);\n  font-size: 42px;\n  font-size: 76px;\n  text-align: center;\n  -ms-border-radius: 50%;\n  -moz-border-radius: 50%;\n  -webkit-border-radius: 50%;\n  border-radius: 0;\n  display: inline-table;\n}\n.skill-home h2 {\n  padding-top: 20px;\n  font-size: 36px;\n  font-weight: 700;\n}\n.testimonial-solid {\n  padding: 50px 0 60px 0;\n  margin: 0 0 0 0;\n  background: #efefef;\n  text-align: center;\n}\n.testi-icon-area {\n  text-align: center;\n  position: absolute;\n  top: -84px;\n  margin: 0 auto;\n  width: 100%;\n  color: #000;\n}\n.testi-icon-area .quote {\n  padding: 15px 0 0 0;\n  margin: 0 0 0 0;\n  background: #ffffff;\n  text-align: center;\n  color: #26aff0;\n  display: inline-table;\n  width: 70px;\n  height: 70px;\n  -ms-border-radius: 50%;\n  -moz-border-radius: 50%;\n  -webkit-border-radius: 50%;\n  border-radius: 0;\n  font-size: 42px;\n  border: 1px solid #26aff0;\n  display: none;\n}\n\n.testi-icon-area .carousel-inner {\n  margin: 20px 0;\n}\n.carousel-indicators {\n  bottom: -30px;\n}\n.team-member {\n  text-align: center;\n  background-color: #f9f9f9;\n  padding-bottom: 15px;\n}\n.fancybox-title-inside-wrap {\n  padding: 3px 30px 6px;\n  background: #292929;\n}\n\n.item_introtext {\n  background-color: rgba(254, 254, 255, 0.66);\n  margin: 0 auto;\n  display: inline-block;\n  padding: 25px;\n}\n.item_introtext span {\n  font-size: 20px;\n  display: block;\n  font-weight: bold;\n}\n.item_introtext strong {\n  font-size: 50px;\n  display: block;\n  padding: 14px 0 30px;\n}\n.item_introtext p {\n  font-size: 20px !important;\n  color: #1c1d21;\n  font-weight: bold;\n}\n\n.form-control {\n  border-radius: 0;\n}\n\n/* Testimonial\n----------------------------------*/\n.testimonial-area {\n  padding: 0 0 0 0;\n  margin: 0;\n  background: url(../img/low-poly01.jpg) fixed center center;\n  background-size: cover;\n  -webkit-background-size: cover;\n  -moz-background-size: cover;\n  -ms-background-size: cover;\n  color: red;\n}\n.testimonial-solid p {\n  color: #1f1f1f;\n  font-size: 16px;\n  line-height: 30px;\n  font-style: italic;\n}\nsection.callaction {\n  background: #fff;\n  padding: 50px 0 0 0;\n}\n\n/* Content\n==================================== */\n\n#content {\n  position: relative;\n  background: #fff;\n  padding: 50px 0 0px 0;\n}\n\n#content img {\n  max-width: 100%;\n  height: auto;\n}\n\n.cta-text {\n  text-align: center;\n  margin-top: 10px;\n}\n\n.big-cta .cta {\n  margin-top: 10px;\n}\n\n.box {\n  width: 100%;\n}\n.box-gray {\n  background: #f8f8f8;\n  padding: 20px 20px 30px;\n}\n.box-gray h4,\n.box-gray i {\n  margin-bottom: 20px;\n}\n.box-bottom {\n  padding: 20px 0;\n  text-align: center;\n}\n.box-bottom a {\n  color: #fff;\n  font-weight: 700;\n}\n.box-bottom a:hover {\n  color: #eee;\n  text-decoration: none;\n}\n\n/* Bottom\n==================================== */\n\n#bottom {\n  background: #fcfcfc;\n  padding: 50px 0 0;\n}\n/* twitter */\n#twitter-wrapper {\n  text-align: center;\n  width: 70%;\n  margin: 0 auto;\n}\n#twitter em {\n  font-style: normal;\n  font-size: 13px;\n}\n\n#twitter em.twitterTime a {\n  font-weight: 600;\n}\n\n#twitter ul {\n  padding: 0;\n  list-style: none;\n}\n#twitter ul li {\n  font-size: 20px;\n  line-height: 1.6em;\n  font-weight: 300;\n  margin-bottom: 20px;\n  position: relative;\n  word-break: break-word;\n}\n\n/* page headline\n==================================== */\n\n#inner-headline {\n  background: #14a085;\n  position: relative;\n  margin: 0;\n  padding: 0;\n  color: #fefefe;\n  /* margin: 15px; */\n  border-top: 10px solid #11967c;\n}\n\n#inner-headline .inner-heading h2 {\n  color: #fff;\n  margin: 20px 0 0 0;\n}\n\n/* --- breadcrumbs --- */\n#inner-headline ul.breadcrumb {\n  margin: 30px 0 0;\n  float: left;\n}\n\n#inner-headline ul.breadcrumb li {\n  margin-bottom: 0;\n  padding-bottom: 0;\n}\n#inner-headline ul.breadcrumb li {\n  font-size: 13px;\n  color: #fff;\n}\n\n#inner-headline ul.breadcrumb li i {\n  color: #dedede;\n}\n\n#inner-headline ul.breadcrumb li a {\n  color: #fff;\n}\n\nul.breadcrumb li a:hover {\n  text-decoration: none;\n}\n\n/* Forms\n============================= */\n\n/* --- contact form  ---- */\nform#contactform input[type=\"text\"] {\n  width: 100%;\n  border: 1px solid #f5f5f5;\n  min-height: 40px;\n  padding-left: 20px;\n  font-size: 13px;\n  padding-right: 20px;\n  -webkit-box-sizing: border-box;\n  -moz-box-sizing: border-box;\n  box-sizing: border-box;\n}\n\nform#contactform textarea {\n  border: 1px solid #f5f5f5;\n  width: 100%;\n  padding-left: 20px;\n  padding-top: 10px;\n  font-size: 13px;\n  padding-right: 20px;\n  -webkit-box-sizing: border-box;\n  -moz-box-sizing: border-box;\n  box-sizing: border-box;\n}\n\nform#contactform .validation {\n  font-size: 11px;\n}\n\n#sendmessage {\n  border: 1px solid #e6e6e6;\n  background: #f6f6f6;\n  display: none;\n  text-align: center;\n  padding: 15px 12px 15px 65px;\n  margin: 10px 0;\n  font-weight: 600;\n  margin-bottom: 30px;\n}\n\n#sendmessage.show,\n.show {\n  display: block;\n}\n\nform#commentform input[type=\"text\"] {\n  width: 100%;\n  min-height: 40px;\n  padding-left: 20px;\n  font-size: 13px;\n  padding-right: 20px;\n  -webkit-box-sizing: border-box;\n  -moz-box-sizing: border-box;\n  box-sizing: border-box;\n  -webkit-border-radius: 2px 2px 2px 2px;\n  -moz-border-radius: 2px 2px 2px 2px;\n  border-radius: 2px 2px 2px 2px;\n}\n\nform#commentform textarea {\n  width: 100%;\n  padding-left: 20px;\n  padding-top: 10px;\n  font-size: 13px;\n  padding-right: 20px;\n  -webkit-box-sizing: border-box;\n  -moz-box-sizing: border-box;\n  box-sizing: border-box;\n  -webkit-border-radius: 2px 2px 2px 2px;\n  -moz-border-radius: 2px 2px 2px 2px;\n  border-radius: 2px 2px 2px 2px;\n}\n\n/* --- search form --- */\n.search {\n  float: right;\n  margin: 35px 0 0;\n  padding-bottom: 0;\n}\n\n#inner-headline form.input-append {\n  margin: 0;\n  padding: 0;\n}\n\n/*  Portfolio\n================================ */\n\n.work-nav #filters {\n  margin: 0;\n  padding: 0;\n  list-style: none;\n}\n\n.work-nav #filters li {\n  margin: 0 10px 30px 0;\n  padding: 0;\n  float: left;\n}\n\n.work-nav #filters li a {\n  color: #7f8289;\n  font-size: 16px;\n  display: block;\n}\n\n.work-nav #filters li a:hover {\n}\n\n.work-nav #filters li a.selected {\n  color: #de5e60;\n}\n\n#thumbs {\n  margin: 0;\n  padding: 0;\n}\n\n#thumbs li {\n  list-style-type: none;\n}\n\n.item-thumbs {\n  position: relative;\n  overflow: hidden;\n  margin-bottom: 30px;\n  cursor: pointer;\n}\n\n.item-thumbs a + img {\n  width: 100%;\n}\n\n.item-thumbs .hover-wrap {\n  position: absolute;\n  display: block;\n  width: 100%;\n  height: 100%;\n\n  opacity: 0;\n  filter: alpha(opacity=0);\n\n  -webkit-transition: all 450ms ease-out 0s;\n  -moz-transition: all 450ms ease-out 0s;\n  -o-transition: all 450ms ease-out 0s;\n  transition: all 450ms ease-out 0s;\n\n  -webkit-transform: rotateY(180deg) scale(0.5, 0.5);\n  -moz-transform: rotateY(180deg) scale(0.5, 0.5);\n  -ms-transform: rotateY(180deg) scale(0.5, 0.5);\n  -o-transform: rotateY(180deg) scale(0.5, 0.5);\n  transform: rotateY(180deg) scale(0.5, 0.5);\n}\n\n.item-thumbs:hover .hover-wrap,\n.item-thumbs.active .hover-wrap {\n  opacity: 1;\n  filter: alpha(opacity=100);\n\n  -webkit-transform: rotateY(0deg) scale(1, 1);\n  -moz-transform: rotateY(0deg) scale(1, 1);\n  -ms-transform: rotateY(0deg) scale(1, 1);\n  -o-transform: rotateY(0deg) scale(1, 1);\n  transform: rotateY(0deg) scale(1, 1);\n}\n\n.item-thumbs .hover-wrap .overlay-img {\n  position: absolute;\n  width: 90%;\n  height: 91%;\n  opacity: 0.5;\n  filter: alpha(opacity=80);\n  background: #14a085;\n}\n\n.item-thumbs .hover-wrap .overlay-img-thumb {\n  position: absolute;\n  border-radius: 60px;\n  top: 50%;\n  left: 45%;\n  margin: -16px 0 0 -16px;\n  color: #fff;\n  font-size: 32px;\n  line-height: 1em;\n  opacity: 1;\n  filter: alpha(opacity=100);\n}\n\nul.portfolio-categ {\n  margin: 10px 0 30px 0;\n  padding: 0;\n  float: left;\n  list-style: none;\n}\n\nul.portfolio-categ li {\n  margin: 0;\n  float: left;\n  list-style: none;\n  font-size: 13px;\n  font-weight: 600;\n  border: 1px solid #d5d5d5;\n  margin-right: 15px;\n}\n\nul.portfolio-categ li a {\n  display: block;\n  padding: 8px 20px;\n  color: #14a085;\n}\nul.portfolio-categ li.active {\n  border: 1px solid #d7d8d6;\n\n  background-color: #eaeaea;\n}\nul.portfolio-categ li.active a:hover,\nul.portfolio-categ li a:hover,\nul.portfolio-categ li a:focus,\nul.portfolio-categ li a:active {\n  text-decoration: none;\n  outline: 0;\n}\n#accordion-alt3 .panel-heading h4 {\n  font-size: 13px;\n  line-height: 28px;\n  color: #6b6b6b;\n}\n.panel .panel-heading h4 {\n  font-weight: 400;\n}\n.panel-title {\n  margin-top: 0;\n  margin-bottom: 0;\n  font-size: 15px;\n  color: inherit;\n}\n.panel-group .panel {\n  margin-bottom: 0;\n  border-radius: 2px;\n}\n.panel {\n  margin-bottom: 18px;\n  background-color: #b9b9b9;\n  border: 1px solid transparent;\n  border-radius: 2px;\n  -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n  box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n}\n#accordion-alt3 .panel-heading h4 a i {\n  font-size: 13px;\n  line-height: 18px;\n  width: 18px;\n  height: 18px;\n  margin-right: 5px;\n  color: #fff;\n  text-align: center;\n  border-radius: 50%;\n  margin-left: 6px;\n}\n.progress.pb-sm {\n  height: 6px !important;\n}\n.progress {\n  box-shadow: inset 0 0 2px rgba(0, 0, 0, 0.1);\n}\n.progress {\n  overflow: hidden;\n  height: 18px;\n  margin-bottom: 18px;\n  background-color: #f5f5f5;\n  border-radius: 2px;\n  -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n  box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n}\n.progress .progress-bar.progress-bar-red {\n  background: #ed5441;\n}\n.progress .progress-bar.progress-bar-green {\n  background: #51d466;\n}\n.progress .progress-bar.progress-bar-lblue {\n  background: #32c8de;\n}\n/* --- portfolio detail --- */\n.top-wrapper {\n  margin-bottom: 20px;\n}\n.info-blocks {\n  margin-bottom: 15px;\n}\n.info-blocks i.icon-info-blocks {\n  float: left;\n  color: #318fcf;\n  font-size: 30px;\n  min-width: 50px;\n  margin-top: 6px;\n  text-align: center;\n  background-color: #efefef;\n  padding: 15px;\n}\n.info-blocks .info-blocks-in {\n  padding: 0 10px;\n  overflow: hidden;\n}\n.info-blocks .info-blocks-in h3 {\n  color: #555;\n  font-size: 20px;\n  line-height: 28px;\n  margin: 0px;\n}\n.info-blocks .info-blocks-in p {\n  font-size: 12px;\n}\n\nblockquote {\n  font-size: 16px;\n  font-weight: 400;\n  font-family: \"Noto Serif\", serif;\n  font-style: italic;\n  padding-left: 0;\n  color: #a2a2a2;\n  line-height: 1.6em;\n  border: none;\n}\n\nblockquote cite {\n  display: block;\n  font-size: 12px;\n  color: #666;\n  margin-top: 10px;\n}\nblockquote cite:before {\n  content: \"\\2014 \\0020\";\n}\nblockquote cite a,\nblockquote cite a:visited,\nblockquote cite a:visited {\n  color: #555;\n}\n\n/* --- pullquotes --- */\n\n.pullquote-left {\n  display: block;\n  color: #a2a2a2;\n  font-family: \"Noto Serif\", serif;\n  font-size: 14px;\n  line-height: 1.6em;\n  padding-left: 20px;\n}\n\n.pullquote-right {\n  display: block;\n  color: #a2a2a2;\n  font-family: \"Noto Serif\", serif;\n  font-size: 14px;\n  line-height: 1.6em;\n  padding-right: 20px;\n}\n\n/* --- button --- */\n.btn {\n  text-align: center;\n  background: #318cca;\n  color: #fff;\n  border-radius: 0;\n  padding: 10px 30px;\n}\n.btn-theme {\n  color: #fff;\n}\n.btn-theme:hover {\n  color: #eee;\n}\n\n/* --- list style --- */\n\nul.general {\n  list-style: none;\n  margin-left: 0;\n}\n\nul.link-list {\n  margin: 0;\n  padding: 0;\n  list-style: none;\n}\n\nul.link-list li {\n  margin: 0;\n  padding: 2px 0 2px 0;\n  list-style: none;\n}\nfooter {\n  background: #14a085;\n}\nfooter ul.link-list li a {\n  color: #ffffff;\n}\nfooter ul.link-list li a:hover {\n  color: #e2e2e2;\n}\n/* --- Heading style --- */\n\nh4.heading {\n  font-weight: 700;\n}\n\n.heading {\n  margin-bottom: 30px;\n}\n\n.heading {\n  position: relative;\n}\n\n.widgetheading {\n  width: 100%;\n\n  padding: 0;\n}\n\n#bottom .widgetheading {\n  position: relative;\n  border-bottom: #e6e6e6 1px solid;\n  padding-bottom: 9px;\n}\n\naside .widgetheading {\n  position: relative;\n  border-bottom: #e9e9e9 1px solid;\n  padding-bottom: 9px;\n}\n\nfooter .widgetheading {\n  position: relative;\n}\n\nfooter .widget .social-network {\n  position: relative;\n}\n\n#bottom .widget .widgetheading span,\naside .widget .widgetheading span,\nfooter .widget .widgetheading span {\n  position: absolute;\n  width: 60px;\n  height: 1px;\n  bottom: -1px;\n  right: 0;\n}\n.box-area {\n  border: 1px solid #f3f3f3;\n  padding: 0 15px 12px;\n  padding-top: 41px;\n  margin-top: -42px;\n  text-align: left;\n  background-color: #f9f9f9;\n  position: relative;\n}\n/* --- Map --- */\n.map {\n  position: relative;\n  margin-top: -50px;\n  margin-bottom: 40px;\n}\n\n.map iframe {\n  width: 100%;\n  height: 450px;\n  border: none;\n}\n\n.map-grid iframe {\n  width: 100%;\n  height: 350px;\n  border: none;\n  margin: 0 0 -5px 0;\n  padding: 0;\n}\n\nul.team-detail {\n  margin: -10px 0 0 0;\n  padding: 0;\n  list-style: none;\n}\n\nul.team-detail li {\n  border-bottom: 1px dotted #e9e9e9;\n  margin: 0 0 15px 0;\n  padding: 0 0 15px 0;\n  list-style: none;\n}\n\nul.team-detail li label {\n  font-size: 13px;\n}\n\nul.team-detail li h4,\nul.team-detail li label {\n  margin-bottom: 0;\n}\n\nul.team-detail li ul.social-network {\n  border: none;\n  margin: 0;\n  padding: 0;\n}\n\nul.team-detail li ul.social-network li {\n  border: none;\n  margin: 0;\n}\nul.team-detail li ul.social-network li i {\n  margin: 0;\n}\n\n.pricing-title {\n  background: #fff;\n  text-align: center;\n  padding: 10px 0 10px 0;\n}\n\n.pricing-title h3 {\n  font-weight: 600;\n  margin-bottom: 0;\n}\n\n.pricing-offer {\n  background: #fcfcfc;\n  text-align: center;\n  padding: 40px 0 40px 0;\n  font-size: 18px;\n  border-top: 1px solid #e6e6e6;\n  border-bottom: 1px solid #e6e6e6;\n}\n\n.pricing-box.activeItem .pricing-offer {\n  color: #fff;\n}\n\n.pricing-offer strong {\n  font-size: 78px;\n  line-height: 89px;\n}\n\n.pricing-offer sup {\n  font-size: 28px;\n}\n\n.pricing-container {\n  background: #fff;\n  text-align: center;\n  font-size: 14px;\n}\n\n.pricing-container strong {\n  color: #353535;\n}\n\n.pricing-container ul {\n  list-style: none;\n  padding: 0;\n  margin: 0;\n}\n\n.pricing-container ul li {\n  border-bottom: 1px solid #f5f5f5;\n  list-style: none;\n  padding: 15px 0 15px 0;\n  margin: 0 0 0 0;\n  color: #222;\n}\n\n.pricing-action {\n  margin: 0;\n  background: #fcfcfc;\n  text-align: center;\n  padding: 20px 0 30px 0;\n}\n\n.pricing-wrapp {\n  margin: 0 auto;\n  width: 100%;\n  background: #fd0000;\n}\n.pricing-box-item {\n  border: 1px solid #f5f5f5;\n\n  background: #f9f9f9;\n  position: relative;\n  margin: 0 0 20px 0;\n  padding: 0;\n  -webkit-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);\n  -moz-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);\n  box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03);\n  -webkit-box-sizing: border-box;\n  -moz-box-sizing: border-box;\n  box-sizing: border-box;\n}\n\n.pricing-box-item .pricing-heading {\n  text-align: center;\n  padding: 0px 0 0px 0;\n  display: block;\n}\n.pricing-box-item.activeItem .pricing-heading {\n  text-align: center;\n  padding: 0px 0 1px 0;\n  border-bottom: none;\n  display: block;\n  color: #fff;\n}\n.pricing-box-item.activeItem .pricing-heading h3 {\n}\n\n.pricing-box-item .pricing-heading h3 strong {\n  font-size: 20px;\n  font-weight: 700;\n  letter-spacing: -1px;\n}\n.pricing-box-item .pricing-heading h3 {\n  font-size: 35px;\n  font-weight: 300;\n  letter-spacing: -1px;\n}\n\n.pricing-box-item .pricing-terms {\n  text-align: center;\n  display: block;\n  overflow: hidden;\n  padding: 11px 0 5px;\n}\n\n.pricing-box-item .pricing-terms h6 {\n  font-style: italic;\n  margin-top: 10px;\n  color: #14a085;\n  font-size: 22px;\n  font-family: \"Noto Serif\", serif;\n}\n\n.pricing-box-item .icon .price-circled {\n  margin: 10px 10px 10px 0;\n  display: inline-block !important;\n  text-align: center !important;\n  color: #fff;\n  width: 68px;\n  height: 68px;\n  padding: 12px;\n  font-size: 16px;\n  font-weight: 700;\n  line-height: 68px;\n  text-shadow: none;\n  cursor: pointer;\n  background-color: #888;\n  border-radius: 64px;\n  -moz-border-radius: 64px;\n  -webkit-border-radius: 64px;\n}\n\n.pricing-box-item .pricing-action {\n  margin: 0;\n  text-align: center;\n  padding: 30px 0 30px 0;\n}\n\n/* ===== Widgets ===== */\n\n/* --- flickr --- */\n.widget .flickr_badge {\n  width: 100%;\n}\n.widget .flickr_badge img {\n  margin: 0 9px 20px 0;\n}\n\nfooter .widget .flickr_badge {\n  width: 100%;\n}\nfooter .widget .flickr_badge img {\n  margin: 0 9px 20px 0;\n}\n\n.flickr_badge img {\n  width: 50px;\n  height: 50px;\n  float: left;\n  margin: 0 9px 20px 0;\n}\n\n/* --- Recent post widget --- */\n\n.recent-post {\n  margin: 20px 0 0 0;\n  padding: 0;\n  line-height: 18px;\n}\n\n.recent-post h5 a:hover {\n  text-decoration: none;\n}\n\n.recent-post .text h5 a {\n  color: #353535;\n}\n\nfooter {\n  padding: 50px 0 0 0;\n  color: #f8f8f8;\n}\n\nfooter a {\n  color: #fff;\n}\n\nfooter a:hover {\n  color: #eee;\n}\n\nfooter h1,\nfooter h2,\nfooter h3,\nfooter h4,\nfooter h5,\nfooter h6 {\n  color: #fff;\n}\n\nfooter address {\n  line-height: 1.6em;\n  color: #ffffff;\n}\n\nfooter h5 a:hover,\nfooter a:hover {\n  text-decoration: none;\n}\n\nul.social-network {\n  list-style: none;\n  margin: 0;\n}\n\nul.social-network li {\n  display: inline;\n  margin: 0 5px;\n}\n\n#sub-footer {\n  text-shadow: none;\n  color: #f5f5f5;\n  padding: 0;\n  padding-top: 30px;\n  margin: 20px 0 0 0;\n  background: #14a085;\n}\n\n#sub-footer p {\n  margin: 0;\n  padding: 0;\n}\n\n#sub-footer span {\n  color: #f5f5f5;\n}\n\n.copyright {\n  text-align: left;\n  font-size: 12px;\n}\n\n#sub-footer ul.social-network {\n  float: right;\n}\n\n/* scroll to top */\n.scrollup {\n  position: fixed;\n  width: 32px;\n  height: 32px;\n  bottom: 0px;\n  right: 20px;\n  background: #222;\n}\n\na.scrollup {\n  outline: 0;\n  text-align: center;\n}\n\na.scrollup:hover,\na.scrollup:active,\na.scrollup:focus {\n  opacity: 1;\n  text-decoration: none;\n}\na.scrollup i {\n  margin-top: 10px;\n  color: #fff;\n}\na.scrollup i:hover {\n  text-decoration: none;\n}\n\n.absolute {\n  position: absolute;\n}\n\n.relative {\n  position: relative;\n}\n\n.aligncenter {\n  text-align: center;\n}\n\n.aligncenter span {\n  margin-left: 0;\n}\n\n.floatright {\n  float: right;\n}\n\n.floatleft {\n  float: left;\n}\n\n.floatnone {\n  float: none;\n}\n\n.aligncenter {\n  text-align: center;\n}\n\nimg.pull-left,\n.align-left {\n  float: left;\n  margin: 0 15px 15px 0;\n}\n\n.widget img.pull-left {\n  float: left;\n  margin: 0 15px 15px 0;\n}\n\nimg.pull-right,\n.align-right {\n  float: right;\n  margin: 0 0 15px 15px;\n}\n\narticle img.pull-left,\narticle .align-left {\n  float: left;\n  margin: 5px 15px 15px 0;\n}\n\narticle img.pull-right,\narticle .align-right {\n  float: right;\n  margin: 5px 0 15px 15px;\n}\n============================= */ .clear-marginbot {\n  margin-bottom: 0;\n}\n\n.marginbot10 {\n  margin-bottom: 10px;\n}\n.marginbot20 {\n  margin-bottom: 20px;\n}\n.marginbot30 {\n  margin-bottom: 30px;\n}\n.marginbot40 {\n  margin-bottom: 40px;\n}\n\n.clear-margintop {\n  margin-top: 0;\n}\n\n.margintop10 {\n  margin-top: 10px;\n}\n\n.margintop20 {\n  margin-top: 20px;\n}\n\n.margintop30 {\n  margin-top: 30px;\n}\n\n.margintop40 {\n  margin-top: 40px;\n}\n\n/*  Media queries \n============================= */\n\n@media (min-width: 768px) and (max-width: 979px) {\n  a.detail {\n    background: none;\n    width: 100%;\n  }\n\n  footer .widget form input#appendedInputButton {\n    display: block;\n    width: 91%;\n    -webkit-border-radius: 4px 4px 4px 4px;\n    -moz-border-radius: 4px 4px 4px 4px;\n    border-radius: 4px 4px 4px 4px;\n  }\n\n  footer .widget form .input-append .btn {\n    display: block;\n    width: 100%;\n    padding-right: 0;\n    padding-left: 0;\n    -webkit-box-sizing: border-box;\n    -moz-box-sizing: border-box;\n    box-sizing: border-box;\n    margin-top: 10px;\n  }\n\n  ul.related-folio li {\n    width: 156px;\n    margin: 0 20px 0 0;\n  }\n}\n\n@media (max-width: 767px) {\n  body {\n    padding-right: 0;\n    padding-left: 0;\n  }\n  .navbar-brand {\n    margin-top: 10px;\n    border-bottom: none;\n  }\n  .navbar-header {\n    margin-top: 20px;\n    border-bottom: none;\n  }\n\n  .navbar-nav {\n    border-top: none;\n    float: none;\n    width: 100%;\n  }\n  .navbar .nav > .active > a,\n  .navbar .nav > .active > a:hover {\n    background: none;\n    font-weight: 700;\n    color: #26aff0;\n  }\n  header .navbar-nav > li {\n    padding-bottom: 0px;\n    padding-top: 2px;\n  }\n  header .nav li .dropdown-menu {\n    margin-top: 0;\n  }\n\n  .dropdown-menu {\n    position: absolute;\n    top: 0;\n    left: 40px;\n    z-index: 1000;\n    display: none;\n    float: left;\n    min-width: 160px;\n    padding: 5px 0;\n    margin: 2px 0 0;\n    font-size: 13px;\n    list-style: none;\n    background-color: #fff;\n    background-clip: padding-box;\n    border: 1px solid #f5f5f5;\n    border: 1px solid rgba(0, 0, 0, 0.15);\n    border-radius: 0;\n    -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n    box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n  }\n\n  .navbar-collapse.collapse {\n    border: none;\n    overflow: hidden;\n  }\n\n  .box {\n    border-bottom: 1px solid #e9e9e9;\n    padding-bottom: 20px;\n  }\n\n  #featured .flexslider .slide-caption {\n    width: 90%;\n    padding: 2%;\n    position: absolute;\n    left: 0;\n    bottom: -40px;\n  }\n\n  #inner-headline .breadcrumb {\n    float: left;\n    clear: both;\n    width: 100%;\n  }\n\n  .breadcrumb > li {\n    font-size: 13px;\n  }\n\n  ul.portfolio li article a i.icon-48 {\n    width: 20px;\n    height: 20px;\n    font-size: 16px;\n    line-height: 20px;\n  }\n\n  .left-sidebar {\n    border-right: none;\n    padding: 0 0 0 0;\n    border-bottom: 1px dotted #e6e6e6;\n    padding-bottom: 10px;\n    margin-bottom: 40px;\n  }\n\n  .right-sidebar {\n    margin-top: 30px;\n    border-left: none;\n    padding: 0 0 0 0;\n  }\n\n  footer .col-lg-1,\n  footer .col-lg-2,\n  footer .col-lg-3,\n  footer .col-lg-4,\n  footer .col-lg-5,\n  footer .col-lg-6,\n  footer .col-lg-7,\n  footer .col-lg-8,\n  footer .col-lg-9,\n  footer .col-lg-10,\n  footer .col-lg-11,\n  footer .col-lg-12 {\n    margin-bottom: 20px;\n  }\n\n  #sub-footer ul.social-network {\n    float: left;\n  }\n\n  [class*=\"span\"] {\n    margin-bottom: 20px;\n  }\n}\n\n@media (max-width: 480px) {\n  .bottom-article a.pull-right {\n    float: left;\n    margin-top: 20px;\n  }\n\n  .search {\n    float: left;\n  }\n\n  .flexslider .flex-caption {\n    display: none;\n  }\n\n  .cta-text {\n    margin: 0 auto;\n    text-align: center;\n  }\n\n  ul.portfolio li article a i {\n    width: 20px;\n    height: 20px;\n    font-size: 14px;\n  }\n}\n\n.box-area:before {\n  position: absolute;\n  width: 100%;\n  height: 100%;\n  z-index: 0;\n  background-color: red;\n  content: \"\";\n  position: absolute;\n  top: 7px;\n  left: -1px;\n  width: 100%;\n  height: 23px;\n  background: #f9f9f9;\n  -moz-transform: skewY(-3deg);\n  -o-transform: skewY(-3deg);\n  -ms-transform: skewY(-3deg);\n  -webkit-transform: skewY(-3deg);\n  transform: skewY(11deg);\n  background-size: cover;\n}\n.box-area:after {\n  position: absolute;\n  width: 100%;\n  height: 100%;\n  z-index: 0;\n  background-color: red;\n  content: \"\";\n  position: absolute;\n  top: 7px;\n  left: 1px;\n  width: 100%;\n  height: 22px;\n  background: #f9f9f9;\n  -moz-transform: skewY(-3deg);\n  -o-transform: skewY(-3deg);\n  -ms-transform: skewY(-3deg);\n  -webkit-transform: skewY(-3deg);\n  transform: skewY(-11deg);\n  background-size: cover;\n}\n.box-area h3 {\n  margin-top: -16px;\n  z-index: 12;\n  position: relative;\n}\n.courses {\n  padding: 50px 0;\n}\n.carousel-indicators li {\n  display: inline-block;\n  border: 1px solid #929292;\n}\n.textbox {\n  background-color: #efefef;\n  padding: 4px 25px;\n}\n.textbox h3 {\n  margin: 0;\n  padding: 22px 0 14px;\n  font-size: 18px;\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n<meta name=\"description\" content=\"\" />\n<meta name=\"author\" content=\"http://webthemez.com\" />\n<!-- css -->\n<link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n<link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\">\n<link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n<link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n<link href=\"js/owl-carousel/owl.carousel.css\" rel=\"stylesheet\">\n<link href=\"css/style.css\" rel=\"stylesheet\" />\n \n<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n<!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n\n</head>\n<body>\n<div id=\"wrapper\">\n\t<!-- start header -->\n\t<header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n            <div class=\"container\">\n                <div class=\"navbar-header\">\n                    <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                    </button>\n                    <a class=\"navbar-brand\" href=\"index.html\"><img src=\"img/logo.png\" alt=\"logo\"/></a>\n                </div>\n                <div class=\"navbar-collapse collapse \">\n                    <ul class=\"nav navbar-nav\">\n                        <li class=\"active\"><a href=\"index.html\">Home</a></li> \n\t\t\t\t\t\t<li><a href=\"about.html\">About Us</a></li>\n\t\t\t\t\t\t<li><a href=\"courses.html\">Courses</a></li>\n                        <li><a href=\"portfolio.html\">Portfolio</a></li>\n                        <li><a href=\"pricing.html\">Pricing</a></li>\n                        <li><a href=\"contact.html\">Contact</a></li>\n                    </ul>\n                </div>\n            </div>\n        </div>\n\t</header>\n\t<!-- end header -->\n\t<section id=\"featured\">\n\t \n\t<!-- Slider -->\n        <div id=\"main-slider\" class=\"flexslider\">\n            <ul class=\"slides\">\n              <li>\n                <img src=\"img/slides/1.jpg\" alt=\"\" />\n                <div class=\"flex-caption\">\n                   <div class=\"item_introtext\"> \n\t\t\t\t\t<strong>Online Education</strong>\n\t\t\t\t\t<p>The best educational template</p> </div>\n                </div>\n              </li>\n              <li>\n                <img src=\"img/slides/2.jpg\" alt=\"\" />\n                <div class=\"flex-caption\">\n                     <div class=\"item_introtext\"> \n\t\t\t\t\t<strong>School Education</strong>\n\t\t\t\t\t<p>Get all courses with on-line content</p> </div>\n                </div>\n              </li>\n              <li>\n                <img src=\"img/slides/3.jpg\" alt=\"\" />\n                <div class=\"flex-caption\">\n                     <div class=\"item_introtext\"> \n\t\t\t\t\t<strong>Collage Education</strong>\n\t\t\t\t\t<p>Awesome Template get it know</p> </div>\n                </div>\n              </li>\n            </ul>\n        </div>\n\t<!-- end slider -->\n \n\t</section>\n\t<section class=\"callaction\">\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-12\">\n\t\t\t\t<div class=\"aligncenter\"><h1 class=\"aligncenter\">Our Featured Courses</h1><span class=\"clear spacer_responsive_hide_mobile \" style=\"height:13px;display:block;\"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</section>\n\t<section id=\"content\">\n\t\n\t\n\t<div class=\"container\">\n\t\t\t<div class=\"row\">\n\t\t<div class=\"skill-home\"> <div class=\"skill-home-solid clearfix\"> \n\t\t<div class=\"col-md-3 text-center\">\n\t\t<span class=\"icons c1\"><i class=\"fa fa-trophy\"></i></span> <div class=\"box-area\">\n\t\t<h3>Web Development</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>\n\t\t</div>\n\t\t<div class=\"col-md-3 text-center\"> \n\t\t<span class=\"icons c2\"><i class=\"fa fa-picture-o\"></i></span> <div class=\"box-area\">\n\t\t<h3>UI Design</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>\n\t\t</div>\n\t\t<div class=\"col-md-3 text-center\"> \n\t\t<span class=\"icons c3\"><i class=\"fa fa-desktop\"></i></span> <div class=\"box-area\">\n\t\t<h3>Interaction</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p></div>\n\t\t</div>\n\t\t<div class=\"col-md-3 text-center\"> \n\t\t<span class=\"icons c4\"><i class=\"fa fa-globe\"></i></span> <div class=\"box-area\">\n\t\t<h3>User Experiance</h3> <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident</p>\n\t\t</div></div>\n\t\t</div></div>\n\t\t</div> \n\t\t \n\n\t</div>\n\t</section>\n\t<div class=\"testimonial-area\">\n    <div class=\"testimonial-solid\">\n        <div class=\"container\">\n            <div class=\"testi-icon-area\">\n                <div class=\"quote\">\n                    <i class=\"fa fa-microphone\"></i>\n                </div>\n            </div>\n            <div id=\"carousel-example-generic\" class=\"carousel slide\" data-ride=\"carousel\">\n                <ol class=\"carousel-indicators\">\n                    <li data-target=\"#carousel-example-generic\" data-slide-to=\"0\" class=\"\">\n                        <a href=\"#\"></a>\n                    </li>\n                    <li data-target=\"#carousel-example-generic\" data-slide-to=\"1\" class=\"\">\n                        <a href=\"#\"></a>\n                    </li>\n                    <li data-target=\"#carousel-example-generic\" data-slide-to=\"2\" class=\"active\">\n                        <a href=\"#\"></a>\n                    </li>\n                    <li data-target=\"#carousel-example-generic\" data-slide-to=\"3\" class=\"\">\n                        <a href=\"#\"></a>\n                    </li>\n                </ol>\n                <div class=\"carousel-inner\">\n                    <div class=\"item\">\n                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>\n                        <p>\n                            <b>- Mark John -</b>\n                        </p>\n                    </div>\n                    <div class=\"item\">\n                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>\n                        <p>\n                            <b>- Jaison Warner -</b>\n                        </p>\n                    </div>\n                    <div class=\"item active\">\n                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>\n                        <p>\n                            <b>- Tony Antonio -</b>\n                        </p>\n                    </div>\n                    <div class=\"item\">\n                        <p>Blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi.</p>\n                        <p>\n                            <b>- Leena Doe -</b>\n                        </p>\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n</div>\n<section class=\"courses\">\n<div class=\"container\">\n\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-12\">\n\t\t\t\t<div class=\"aligncenter\"><h2 class=\"aligncenter\">Courses We Offer</h1><span class=\"clear spacer_responsive_hide_mobile \" style=\"height:13px;display:block;\"></span>Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.</div>\n\t\t\t</div>\n\t\t</div>\n<div class=\"row\">\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n\t\t\t<div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n        </div>\n\t\t<div class=\"row\">\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n            <div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n\t\t\t<div class=\"col-md-4\">\n\t\t\t<div class=\"textbox\">\n                  <h3>Heading Course</h3>\n\t\t\t\t<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.</p>\n            </div> </div>\n        </div>\n</div>\n</section>\n\t<footer>\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Our Contact</h5>\n\t\t\t\t\t<address>\n\t\t\t\t\t<strong>Abovecompany Inc</strong><br>\n\t\t\t\t\tJC Main Road, Near Silnile tower<br>\n\t\t\t\t\t Pin-21542 NewYork US.</address>\n\t\t\t\t\t<p>\n\t\t\t\t\t\t<i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br>\n\t\t\t\t\t\t<i class=\"icon-envelope-alt\"></i> email@domainname.com\n\t\t\t\t\t</p>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Quick Links</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Latest Events</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Terms and conditions</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Privacy policy</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Career</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Contact us</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Latest posts</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Recent News</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t<div id=\"sub-footer\">\n\t\t<div class=\"container\">\n\t\t\t<div class=\"row\">\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<div class=\"copyright\">\n\t\t\t\t\t\t<p>\n\t\t\t\t\t\t\t<span>&copy; Above Site All right reserved. Template By </span><a href=\"http://webthemez.com\" target=\"_blank\">WebThemez</a>\n\t\t\t\t\t\t</p>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<ul class=\"social-network\">\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Facebook\"><i class=\"fa fa-facebook\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Twitter\"><i class=\"fa fa-twitter\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Linkedin\"><i class=\"fa fa-linkedin\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Pinterest\"><i class=\"fa fa-pinterest\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Google plus\"><i class=\"fa fa-google-plus\"></i></a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</footer>\n</div>\n<a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n<!-- javascript\n    ================================================== -->\n<!-- Placed at the end of the document so the pages load faster -->\n<script src=\"js/jquery.js\"></script>\n<script src=\"js/jquery.easing.1.3.js\"></script>\n<script src=\"js/bootstrap.min.js\"></script>\n<script src=\"js/jquery.fancybox.pack.js\"></script>\n<script src=\"js/jquery.fancybox-media.js\"></script> \n<script src=\"js/portfolio/jquery.quicksand.js\"></script>\n<script src=\"js/portfolio/setting.js\"></script>\n<script src=\"js/jquery.flexslider.js\"></script>\n<script src=\"js/animate.js\"></script>\n<script src=\"js/custom.js\"></script>\n<script src=\"js/owl-carousel/owl.carousel.js\"></script>\n</body>\n</html>"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/animate.js",
    "content": "jQuery(document).ready(function ($) {\n  //animate effect\n  $(\".e_flash\").hover(\n    function () {\n      $(this).addClass(\"animated flash\");\n    },\n    function () {\n      $(this).removeClass(\"animated flash\");\n    },\n  );\n  $(\".e_bounce\").hover(\n    function () {\n      $(this).addClass(\"animated bounce\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounce\");\n    },\n  );\n\n  $(\".e_shake\").hover(\n    function () {\n      $(this).addClass(\"animated shake\");\n    },\n    function () {\n      $(this).removeClass(\"animated shake\");\n    },\n  );\n  $(\".e_tada\").hover(\n    function () {\n      $(this).addClass(\"animated tada\");\n    },\n    function () {\n      $(this).removeClass(\"animated tada\");\n    },\n  );\n  $(\".e_swing\").hover(\n    function () {\n      $(this).addClass(\"animated swing\");\n    },\n    function () {\n      $(this).removeClass(\"animated swing\");\n    },\n  );\n  $(\".e_wobble\").hover(\n    function () {\n      $(this).addClass(\"animated wobble\");\n    },\n    function () {\n      $(this).removeClass(\"animated wobble\");\n    },\n  );\n  $(\".e_wiggle\").hover(\n    function () {\n      $(this).addClass(\"animated wiggle\");\n    },\n    function () {\n      $(this).removeClass(\"animated wiggle\");\n    },\n  );\n  $(\".e_pulse\").hover(\n    function () {\n      $(this).addClass(\"animated pulse\");\n    },\n    function () {\n      $(this).removeClass(\"animated pulse\");\n    },\n  );\n\n  $(\".e_flip\").hover(\n    function () {\n      $(this).addClass(\"animated flip\");\n    },\n    function () {\n      $(this).removeClass(\"animated flip\");\n    },\n  );\n  $(\".e_flipInX\").hover(\n    function () {\n      $(this).addClass(\"animated flipInX\");\n    },\n    function () {\n      $(this).removeClass(\"animated flipInX\");\n    },\n  );\n  $(\".e_flipOutX\").hover(\n    function () {\n      $(this).addClass(\"animated flipOutX\");\n    },\n    function () {\n      $(this).removeClass(\"animated flipOutX\");\n    },\n  );\n  $(\".e_flipInY\").hover(\n    function () {\n      $(this).addClass(\"animated flipInY\");\n    },\n    function () {\n      $(this).removeClass(\"animated flipInY\");\n    },\n  );\n  $(\".e_flipOutY\").hover(\n    function () {\n      $(this).addClass(\"animated flipOutY\");\n    },\n    function () {\n      $(this).removeClass(\"animated flipOutY\");\n    },\n  );\n\n  //Fading entrances\n  $(\".e_fadeIn\").hover(\n    function () {\n      $(this).addClass(\"animated fadeIn\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeIn\");\n    },\n  );\n  $(\".e_fadeInUp\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInUp\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInUp\");\n    },\n  );\n  $(\".e_fadeInDown\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInDown\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInDown\");\n    },\n  );\n  $(\".e_fadeInLeft\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInLeft\");\n    },\n  );\n  $(\".e_fadeInRight\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInRight\");\n    },\n  );\n  $(\".e_fadeInUpBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInUpBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInUpBig\");\n    },\n  );\n  $(\".e_fadeInUpBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInUpBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInUpBig\");\n    },\n  );\n  $(\".e_fadeInDownBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInDownBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInDownBig\");\n    },\n  );\n  $(\".e_fadeInLeftBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInLeftBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInLeftBig\");\n    },\n  );\n  $(\".e_fadeInRightBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeInRightBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeInRightBig\");\n    },\n  );\n\n  //Fading exits\n  $(\".e_fadeOut\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOut\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOut\");\n    },\n  );\n  $(\".e_fadeOutUp\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutUp\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutUp\");\n    },\n  );\n  $(\".e_fadeOutDown\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutDown\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutDown\");\n    },\n  );\n  $(\".e_fadeOutLeft\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutLeft\");\n    },\n  );\n  $(\".e_fadeOutRight\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutRight\");\n    },\n  );\n  $(\".e_fadeOutUpBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutUpBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutUpBig\");\n    },\n  );\n  $(\".e_fadeOutDownBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutDownBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutDownBig\");\n    },\n  );\n  $(\".e_fadeOutLeftBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutLeftBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutLeftBig\");\n    },\n  );\n  $(\".e_fadeOutRightBig\").hover(\n    function () {\n      $(this).addClass(\"animated fadeOutRightBig\");\n    },\n    function () {\n      $(this).removeClass(\"animated fadeOutRightBig\");\n    },\n  );\n\n  //Bouncing entrances\n  $(\".e_bounceIn\").hover(\n    function () {\n      $(this).addClass(\"animated bounceIn\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceIn\");\n    },\n  );\n  $(\".e_bounceInDown\").hover(\n    function () {\n      $(this).addClass(\"animated bounceInDown\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceInDown\");\n    },\n  );\n  $(\".e_bounceInUp\").hover(\n    function () {\n      $(this).addClass(\"animated bounceInUp\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceInUp\");\n    },\n  );\n  $(\".e_bounceInLeft\").hover(\n    function () {\n      $(this).addClass(\"animated bounceInLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceInLeft\");\n    },\n  );\n  $(\".e_bounceInRight\").hover(\n    function () {\n      $(this).addClass(\"animated bounceInRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceInRight\");\n    },\n  );\n\n  //Bouncing exits\n  $(\".e_bounceOut\").hover(\n    function () {\n      $(this).addClass(\"animated bounceOut\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceOut\");\n    },\n  );\n  $(\".e_bounceOutDown\").hover(\n    function () {\n      $(this).addClass(\"animated bounceOutDown\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceOutDown\");\n    },\n  );\n  $(\".e_bounceOutUp\").hover(\n    function () {\n      $(this).addClass(\"animated bounceOutUp\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceOutUp\");\n    },\n  );\n  $(\".e_bounceOutLeft\").hover(\n    function () {\n      $(this).addClass(\"animated bounceOutLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceOutLeft\");\n    },\n  );\n  $(\".e_bounceOutRight\").hover(\n    function () {\n      $(this).addClass(\"animated bounceOutRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated bounceOutRight\");\n    },\n  );\n\n  //Rotating entrances\n  $(\".e_rotateIn\").hover(\n    function () {\n      $(this).addClass(\"animated rotateIn\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateIn\");\n    },\n  );\n  $(\".e_rotateInDownLeft\").hover(\n    function () {\n      $(this).addClass(\"animated rotateInDownLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateInDownLeft\");\n    },\n  );\n  $(\".e_rotateInDownRight\").hover(\n    function () {\n      $(this).addClass(\"animated rotateInDownRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateInDownRight\");\n    },\n  );\n  $(\".e_rotateInUpRight\").hover(\n    function () {\n      $(this).addClass(\"animated rotateInUpRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateInUpRight\");\n    },\n  );\n  $(\".e_rotateInUpLeft\").hover(\n    function () {\n      $(this).addClass(\"animated rotateInUpLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateInUpLeft\");\n    },\n  );\n\n  //Rotating exits\n  $(\".e_rotateOut\").hover(\n    function () {\n      $(this).addClass(\"animated rotateOut\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateOut\");\n    },\n  );\n  $(\".e_rotateOutDownLeft\").hover(\n    function () {\n      $(this).addClass(\"animated rotateOutDownLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateOutDownLeft\");\n    },\n  );\n  $(\".e_rotateOutDownRight\").hover(\n    function () {\n      $(this).addClass(\"animated rotateOutDownRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateOutDownRight\");\n    },\n  );\n  $(\".e_rotateOutUpLeft\").hover(\n    function () {\n      $(this).addClass(\"animated rotateOutUpLeft\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateOutUpLeft\");\n    },\n  );\n  $(\".e_rotateOutUpRight\").hover(\n    function () {\n      $(this).addClass(\"animated rotateOutUpRight\");\n    },\n    function () {\n      $(this).removeClass(\"animated rotateOutUpRight\");\n    },\n  );\n\n  //Lightspeed\n  $(\".e_lightSpeedIn\").hover(\n    function () {\n      $(this).addClass(\"animated lightSpeedIn\");\n    },\n    function () {\n      $(this).removeClass(\"animated lightSpeedIn\");\n    },\n  );\n  $(\".e_lightSpeedOut\").hover(\n    function () {\n      $(this).addClass(\"animated lightSpeedOut\");\n    },\n    function () {\n      $(this).removeClass(\"animated lightSpeedOut\");\n    },\n  );\n\n  //specials\n  $(\".e_hinge\").hover(\n    function () {\n      $(this).addClass(\"animated hinge\");\n    },\n    function () {\n      $(this).removeClass(\"animated hinge\");\n    },\n  );\n  $(\".e_rollIn\").hover(\n    function () {\n      $(this).addClass(\"animated rollIn\");\n    },\n    function () {\n      $(this).removeClass(\"animated rollIn\");\n    },\n  );\n  $(\".e_rollOut\").hover(\n    function () {\n      $(this).addClass(\"animated rollOut\");\n    },\n    function () {\n      $(this).removeClass(\"animated rollOut\");\n    },\n  );\n});\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/custom.js",
    "content": "/*global jQuery:false */\njQuery(document).ready(function ($) {\n  \"use strict\";\n\n  //add some elements with animate effect\n\n  $(\".big-cta\").hover(\n    function () {\n      $(\".cta a\").addClass(\"animated shake\");\n    },\n    function () {\n      $(\".cta a\").removeClass(\"animated shake\");\n    },\n  );\n  $(\".box\").hover(\n    function () {\n      $(this).find(\".icon\").addClass(\"animated fadeInDown\");\n      $(this).find(\"p\").addClass(\"animated fadeInUp\");\n    },\n    function () {\n      $(this).find(\".icon\").removeClass(\"animated fadeInDown\");\n      $(this).find(\"p\").removeClass(\"animated fadeInUp\");\n    },\n  );\n\n  $(\".accordion\").on(\"show\", function (e) {\n    $(e.target)\n      .prev(\".accordion-heading\")\n      .find(\".accordion-toggle\")\n      .addClass(\"active\");\n    $(e.target)\n      .prev(\".accordion-heading\")\n      .find(\".accordion-toggle i\")\n      .removeClass(\"icon-plus\");\n    $(e.target)\n      .prev(\".accordion-heading\")\n      .find(\".accordion-toggle i\")\n      .addClass(\"icon-minus\");\n  });\n\n  $(\".accordion\").on(\"hide\", function (e) {\n    $(this).find(\".accordion-toggle\").not($(e.target)).removeClass(\"active\");\n    $(this)\n      .find(\".accordion-toggle i\")\n      .not($(e.target))\n      .removeClass(\"icon-minus\");\n    $(this).find(\".accordion-toggle i\").not($(e.target)).addClass(\"icon-plus\");\n  });\n\n  // tooltip\n  $(\".social-network li a, .options_box .color a\").tooltip();\n\n  // fancybox\n  $(\".fancybox\").fancybox({\n    padding: 0,\n    autoResize: true,\n    beforeShow: function () {\n      this.title = $(this.element).attr(\"title\");\n      this.title =\n        \"<h4>\" +\n        this.title +\n        \"</h4>\" +\n        \"<p>\" +\n        $(this.element).parent().find(\"img\").attr(\"alt\") +\n        \"</p>\";\n    },\n    helpers: {\n      title: { type: \"inside\" },\n    },\n  });\n\n  //scroll to top\n  $(window).scroll(function () {\n    if ($(this).scrollTop() > 100) {\n      $(\".scrollup\").fadeIn();\n    } else {\n      $(\".scrollup\").fadeOut();\n    }\n  });\n  $(\".scrollup\").click(function () {\n    $(\"html, body\").animate({ scrollTop: 0 }, 1000);\n    return false;\n  });\n  $(\"#post-slider\").flexslider({\n    // Primary Controls\n    controlNav: false, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage\n    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)\n    prevText: \"Previous\", //String: Set the text for the \"previous\" directionNav item\n    nextText: \"Next\", //String: Set the text for the \"next\" directionNav item\n\n    // Secondary Navigation\n    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys\n    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.\n    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel\n    pausePlay: false, //Boolean: Create pause/play dynamic element\n    pauseText: \"Pause\", //String: Set the text for the \"pause\" pausePlay item\n    playText: \"Play\", //String: Set the text for the \"play\" pausePlay item\n\n    // Special properties\n    controlsContainer: \"\", //{UPDATED} Selector: USE CLASS SELECTOR. Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be \".flexslider-container\". Property is ignored if given element is not found.\n    manualControls: \"\", //Selector: Declare custom control navigation. Examples would be \".flex-control-nav li\" or \"#tabs-nav li img\", etc. The number of elements in your controlNav should match the number of slides/tabs.\n    sync: \"\", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.\n    asNavFor: \"\", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider\n  });\n\n  $(\"#main-slider\").flexslider({\n    namespace: \"flex-\", //{NEW} String: Prefix string attached to the class of every element generated by the plugin\n    selector: \".slides > li\", //{NEW} Selector: Must match a simple pattern. '{container} > {slide}' -- Ignore pattern at your own peril\n    animation: \"fade\", //String: Select your animation type, \"fade\" or \"slide\"\n    easing: \"swing\", //{NEW} String: Determines the easing method used in jQuery transitions. jQuery easing plugin is supported!\n    direction: \"horizontal\", //String: Select the sliding direction, \"horizontal\" or \"vertical\"\n    reverse: false, //{NEW} Boolean: Reverse the animation direction\n    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received \"disable\" classes at either end\n    smoothHeight: false, //{NEW} Boolean: Allow height of the slider to animate smoothly in horizontal mode\n    startAt: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)\n    slideshow: true, //Boolean: Animate slider automatically\n    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds\n    animationSpeed: 600, //Integer: Set the speed of animations, in milliseconds\n    initDelay: 0, //{NEW} Integer: Set an initialization delay, in milliseconds\n    randomize: false, //Boolean: Randomize slide order\n\n    // Usability features\n    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.\n    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering\n    useCSS: true, //{NEW} Boolean: Slider will use CSS3 transitions if available\n    touch: true, //{NEW} Boolean: Allow touch swipe navigation of the slider on touch-enabled devices\n    video: false, //{NEW} Boolean: If using video in the slider, will prevent CSS3 3D Transforms to avoid graphical glitches\n\n    // Primary Controls\n    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage\n    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)\n    prevText: \"Previous\", //String: Set the text for the \"previous\" directionNav item\n    nextText: \"Next\", //String: Set the text for the \"next\" directionNav item\n\n    // Secondary Navigation\n    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys\n    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.\n    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel\n    pausePlay: false, //Boolean: Create pause/play dynamic element\n    pauseText: \"Pause\", //String: Set the text for the \"pause\" pausePlay item\n    playText: \"Play\", //String: Set the text for the \"play\" pausePlay item\n\n    // Special properties\n    controlsContainer: \"\", //{UPDATED} Selector: USE CLASS SELECTOR. Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be \".flexslider-container\". Property is ignored if given element is not found.\n    manualControls: \"\", //Selector: Declare custom control navigation. Examples would be \".flex-control-nav li\" or \"#tabs-nav li img\", etc. The number of elements in your controlNav should match the number of slides/tabs.\n    sync: \"\", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.\n    asNavFor: \"\", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider\n  });\n});\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/flexslider/jquery.flexslider.js",
    "content": "/*\n * jQuery FlexSlider v1.8\n * http://www.woothemes.com/flexslider/\n *\n * Copyright 2012 WooThemes\n * Free to use under the MIT license.\n * http://www.opensource.org/licenses/mit-license.php\n *\n * Contributing Author: Tyler Smith\n */\n\n(function ($) {\n  //FlexSlider: Object Instance\n  $.flexslider = function (el, options) {\n    var slider = $(el);\n\n    // slider DOM reference for use outside of the plugin\n    $.data(el, \"flexslider\", slider);\n\n    slider.init = function () {\n      slider.vars = $.extend({}, $.flexslider.defaults, options);\n      $.data(el, \"flexsliderInit\", true);\n      slider.container = $(\".slides\", slider).eq(0);\n      slider.slides = $(\".slides:first > li\", slider);\n      slider.count = slider.slides.length;\n      slider.animating = false;\n      slider.currentSlide = slider.vars.slideToStart;\n      slider.animatingTo = slider.currentSlide;\n      slider.atEnd = slider.currentSlide == 0 ? true : false;\n      slider.eventType =\n        \"ontouchstart\" in document.documentElement ? \"touchstart\" : \"click\";\n      slider.cloneCount = 0;\n      slider.cloneOffset = 0;\n      slider.manualPause = false;\n      slider.vertical = slider.vars.slideDirection == \"vertical\";\n      slider.prop = slider.vertical ? \"top\" : \"marginLeft\";\n      slider.args = {};\n\n      //Test for webbkit CSS3 Animations\n      slider.transitions =\n        \"webkitTransition\" in document.body.style && slider.vars.useCSS;\n      if (slider.transitions) slider.prop = \"-webkit-transform\";\n\n      //Test for controlsContainer\n      if (slider.vars.controlsContainer != \"\") {\n        slider.controlsContainer = $(slider.vars.controlsContainer).eq(\n          $(\".slides\").index(slider.container),\n        );\n        slider.containerExists = slider.controlsContainer.length > 0;\n      }\n      //Test for manualControls\n      if (slider.vars.manualControls != \"\") {\n        slider.manualControls = $(\n          slider.vars.manualControls,\n          slider.containerExists ? slider.controlsContainer : slider,\n        );\n        slider.manualExists = slider.manualControls.length > 0;\n      }\n\n      ///////////////////////////////////////////////////////////////////\n      // FlexSlider: Randomize Slides\n      if (slider.vars.randomize) {\n        slider.slides.sort(function () {\n          return Math.round(Math.random()) - 0.5;\n        });\n        slider.container.empty().append(slider.slides);\n      }\n      ///////////////////////////////////////////////////////////////////\n\n      ///////////////////////////////////////////////////////////////////\n      // FlexSlider: Slider Animation Initialize\n      if (slider.vars.animation.toLowerCase() == \"slide\") {\n        if (slider.transitions) {\n          slider.setTransition(0);\n        }\n        slider.css({ overflow: \"hidden\" });\n        if (slider.vars.animationLoop) {\n          slider.cloneCount = 2;\n          slider.cloneOffset = 1;\n          slider.container\n            .append(slider.slides.filter(\":first\").clone().addClass(\"clone\"))\n            .prepend(slider.slides.filter(\":last\").clone().addClass(\"clone\"));\n        }\n        //create newSlides to capture possible clones\n        slider.newSlides = $(\".slides:first > li\", slider);\n        var sliderOffset = -1 * (slider.currentSlide + slider.cloneOffset);\n        if (slider.vertical) {\n          slider.newSlides.css({\n            display: \"block\",\n            width: \"100%\",\n            float: \"left\",\n          });\n          slider.container\n            .height((slider.count + slider.cloneCount) * 200 + \"%\")\n            .css(\"position\", \"absolute\")\n            .width(\"100%\");\n          //Timeout function to give browser enough time to get proper height initially\n          setTimeout(function () {\n            slider\n              .css({ position: \"relative\" })\n              .height(slider.slides.filter(\":first\").height());\n            slider.args[slider.prop] = slider.transitions\n              ? \"translate3d(0,\" + sliderOffset * slider.height() + \"px,0)\"\n              : sliderOffset * slider.height() + \"px\";\n            slider.container.css(slider.args);\n          }, 100);\n        } else {\n          slider.args[slider.prop] = slider.transitions\n            ? \"translate3d(\" + sliderOffset * slider.width() + \"px,0,0)\"\n            : sliderOffset * slider.width() + \"px\";\n          slider.container\n            .width((slider.count + slider.cloneCount) * 200 + \"%\")\n            .css(slider.args);\n          //Timeout function to give browser enough time to get proper width initially\n          setTimeout(function () {\n            slider.newSlides\n              .width(slider.width())\n              .css({ float: \"left\", display: \"block\" });\n          }, 100);\n        }\n      } else {\n        //Default to fade\n        //Not supporting fade CSS3 transitions right now\n        slider.transitions = false;\n        slider.slides\n          .css({ width: \"100%\", float: \"left\", marginRight: \"-100%\" })\n          .eq(slider.currentSlide)\n          .fadeIn(slider.vars.animationDuration);\n      }\n      ///////////////////////////////////////////////////////////////////\n\n      ///////////////////////////////////////////////////////////////////\n      // FlexSlider: Control Nav\n      if (slider.vars.controlNav) {\n        if (slider.manualExists) {\n          slider.controlNav = slider.manualControls;\n        } else {\n          var controlNavScaffold = $('<ol class=\"flex-control-nav\"></ol>');\n          var j = 1;\n          for (var i = 0; i < slider.count; i++) {\n            controlNavScaffold.append(\"<li><a>\" + j + \"</a></li>\");\n            j++;\n          }\n\n          if (slider.containerExists) {\n            $(slider.controlsContainer).append(controlNavScaffold);\n            slider.controlNav = $(\n              \".flex-control-nav li a\",\n              slider.controlsContainer,\n            );\n          } else {\n            slider.append(controlNavScaffold);\n            slider.controlNav = $(\".flex-control-nav li a\", slider);\n          }\n        }\n\n        slider.controlNav.eq(slider.currentSlide).addClass(\"active\");\n\n        slider.controlNav.bind(slider.eventType, function (event) {\n          event.preventDefault();\n          if (!$(this).hasClass(\"active\")) {\n            slider.controlNav.index($(this)) > slider.currentSlide\n              ? (slider.direction = \"next\")\n              : (slider.direction = \"prev\");\n            slider.flexAnimate(\n              slider.controlNav.index($(this)),\n              slider.vars.pauseOnAction,\n            );\n          }\n        });\n      }\n      ///////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider: Direction Nav\n      if (slider.vars.directionNav) {\n        var directionNavScaffold = $(\n          '<ul class=\"flex-direction-nav\"><li><a class=\"prev\" href=\"#\">' +\n            slider.vars.prevText +\n            '</a></li><li><a class=\"next\" href=\"#\">' +\n            slider.vars.nextText +\n            \"</a></li></ul>\",\n        );\n\n        if (slider.containerExists) {\n          $(slider.controlsContainer).append(directionNavScaffold);\n          slider.directionNav = $(\n            \".flex-direction-nav li a\",\n            slider.controlsContainer,\n          );\n        } else {\n          slider.append(directionNavScaffold);\n          slider.directionNav = $(\".flex-direction-nav li a\", slider);\n        }\n\n        //Set initial disable styles if necessary\n        if (!slider.vars.animationLoop) {\n          if (slider.currentSlide == 0) {\n            slider.directionNav.filter(\".prev\").addClass(\"disabled\");\n          } else if (slider.currentSlide == slider.count - 1) {\n            slider.directionNav.filter(\".next\").addClass(\"disabled\");\n          }\n        }\n\n        slider.directionNav.bind(slider.eventType, function (event) {\n          event.preventDefault();\n          var target = $(this).hasClass(\"next\")\n            ? slider.getTarget(\"next\")\n            : slider.getTarget(\"prev\");\n\n          if (slider.canAdvance(target)) {\n            slider.flexAnimate(target, slider.vars.pauseOnAction);\n          }\n        });\n      }\n      //////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider: Keyboard Nav\n      if (slider.vars.keyboardNav && $(\"ul.slides\").length == 1) {\n        function keyboardMove(event) {\n          if (slider.animating) {\n            return;\n          } else if (event.keyCode != 39 && event.keyCode != 37) {\n            return;\n          } else {\n            if (event.keyCode == 39) {\n              var target = slider.getTarget(\"next\");\n            } else if (event.keyCode == 37) {\n              var target = slider.getTarget(\"prev\");\n            }\n\n            if (slider.canAdvance(target)) {\n              slider.flexAnimate(target, slider.vars.pauseOnAction);\n            }\n          }\n        }\n        $(document).bind(\"keyup\", keyboardMove);\n      }\n      //////////////////////////////////////////////////////////////////\n\n      ///////////////////////////////////////////////////////////////////\n      // FlexSlider: Mousewheel interaction\n      if (slider.vars.mousewheel) {\n        slider.mousewheelEvent = /Firefox/i.test(navigator.userAgent)\n          ? \"DOMMouseScroll\"\n          : \"mousewheel\";\n        slider.bind(slider.mousewheelEvent, function (e) {\n          e.preventDefault();\n          e = e ? e : window.event;\n          var wheelData = e.detail\n              ? e.detail * -1\n              : e.originalEvent.wheelDelta / 40,\n            target =\n              wheelData < 0\n                ? slider.getTarget(\"next\")\n                : slider.getTarget(\"prev\");\n\n          if (slider.canAdvance(target)) {\n            slider.flexAnimate(target, slider.vars.pauseOnAction);\n          }\n        });\n      }\n      ///////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider: Slideshow Setup\n      if (slider.vars.slideshow) {\n        //pauseOnHover\n        if (slider.vars.pauseOnHover && slider.vars.slideshow) {\n          slider.hover(\n            function () {\n              slider.pause();\n            },\n            function () {\n              if (!slider.manualPause) {\n                slider.resume();\n              }\n            },\n          );\n        }\n\n        //Initialize animation\n        slider.animatedSlides = setInterval(\n          slider.animateSlides,\n          slider.vars.slideshowSpeed,\n        );\n      }\n      //////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider: Pause/Play\n      if (slider.vars.pausePlay) {\n        var pausePlayScaffold = $(\n          '<div class=\"flex-pauseplay\"><span></span></div>',\n        );\n\n        if (slider.containerExists) {\n          slider.controlsContainer.append(pausePlayScaffold);\n          slider.pausePlay = $(\n            \".flex-pauseplay span\",\n            slider.controlsContainer,\n          );\n        } else {\n          slider.append(pausePlayScaffold);\n          slider.pausePlay = $(\".flex-pauseplay span\", slider);\n        }\n\n        var pausePlayState = slider.vars.slideshow ? \"pause\" : \"play\";\n        slider.pausePlay\n          .addClass(pausePlayState)\n          .text(\n            pausePlayState == \"pause\"\n              ? slider.vars.pauseText\n              : slider.vars.playText,\n          );\n\n        slider.pausePlay.bind(slider.eventType, function (event) {\n          event.preventDefault();\n          if ($(this).hasClass(\"pause\")) {\n            slider.pause();\n            slider.manualPause = true;\n          } else {\n            slider.resume();\n            slider.manualPause = false;\n          }\n        });\n      }\n      //////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider:Touch Swip Gestures\n      //Some brilliant concepts adapted from the following sources\n      //Source: TouchSwipe - http://www.netcu.de/jquery-touchwipe-iphone-ipad-library\n      //Source: SwipeJS - http://swipejs.com\n      if (\"ontouchstart\" in document.documentElement && slider.vars.touch) {\n        //For brevity, variables are named for x-axis scrolling\n        //The variables are then swapped if vertical sliding is applied\n        //This reduces redundant code...I think :)\n        //If debugging, recognize variables are named for horizontal scrolling\n        var startX,\n          startY,\n          offset,\n          cwidth,\n          dx,\n          startT,\n          scrolling = false;\n\n        slider.each(function () {\n          if (\"ontouchstart\" in document.documentElement) {\n            this.addEventListener(\"touchstart\", onTouchStart, false);\n          }\n        });\n\n        function onTouchStart(e) {\n          if (slider.animating) {\n            e.preventDefault();\n          } else if (e.touches.length == 1) {\n            slider.pause();\n            cwidth = slider.vertical ? slider.height() : slider.width();\n            startT = Number(new Date());\n            offset = slider.vertical\n              ? (slider.currentSlide + slider.cloneOffset) * slider.height()\n              : (slider.currentSlide + slider.cloneOffset) * slider.width();\n            startX = slider.vertical ? e.touches[0].pageY : e.touches[0].pageX;\n            startY = slider.vertical ? e.touches[0].pageX : e.touches[0].pageY;\n            slider.setTransition(0);\n\n            this.addEventListener(\"touchmove\", onTouchMove, false);\n            this.addEventListener(\"touchend\", onTouchEnd, false);\n          }\n        }\n\n        function onTouchMove(e) {\n          dx = slider.vertical\n            ? startX - e.touches[0].pageY\n            : startX - e.touches[0].pageX;\n          scrolling = slider.vertical\n            ? Math.abs(dx) < Math.abs(e.touches[0].pageX - startY)\n            : Math.abs(dx) < Math.abs(e.touches[0].pageY - startY);\n\n          if (!scrolling) {\n            e.preventDefault();\n            if (slider.vars.animation == \"slide\" && slider.transitions) {\n              if (!slider.vars.animationLoop) {\n                dx =\n                  dx /\n                  ((slider.currentSlide == 0 && dx < 0) ||\n                  (slider.currentSlide == slider.count - 1 && dx > 0)\n                    ? Math.abs(dx) / cwidth + 2\n                    : 1);\n              }\n              slider.args[slider.prop] = slider.vertical\n                ? \"translate3d(0,\" + (-offset - dx) + \"px,0)\"\n                : \"translate3d(\" + (-offset - dx) + \"px,0,0)\";\n              slider.container.css(slider.args);\n            }\n          }\n        }\n\n        function onTouchEnd(e) {\n          slider.animating = false;\n          if (\n            slider.animatingTo == slider.currentSlide &&\n            !scrolling &&\n            !(dx == null)\n          ) {\n            var target =\n              dx > 0 ? slider.getTarget(\"next\") : slider.getTarget(\"prev\");\n            if (\n              (slider.canAdvance(target) &&\n                Number(new Date()) - startT < 550 &&\n                Math.abs(dx) > 20) ||\n              Math.abs(dx) > cwidth / 2\n            ) {\n              slider.flexAnimate(target, slider.vars.pauseOnAction);\n            } else if (slider.vars.animation !== \"fade\") {\n              slider.flexAnimate(\n                slider.currentSlide,\n                slider.vars.pauseOnAction,\n              );\n            }\n          }\n\n          //Finish the touch by undoing the touch session\n          this.removeEventListener(\"touchmove\", onTouchMove, false);\n          this.removeEventListener(\"touchend\", onTouchEnd, false);\n          startX = null;\n          startY = null;\n          dx = null;\n          offset = null;\n        }\n      }\n      //////////////////////////////////////////////////////////////////\n\n      //////////////////////////////////////////////////////////////////\n      //FlexSlider: Resize Functions (If necessary)\n      if (slider.vars.animation.toLowerCase() == \"slide\") {\n        $(window).resize(function () {\n          if (!slider.animating && slider.is(\":visible\")) {\n            if (slider.vertical) {\n              slider.height(slider.slides.filter(\":first\").height());\n              slider.args[slider.prop] =\n                -1 *\n                  (slider.currentSlide + slider.cloneOffset) *\n                  slider.slides.filter(\":first\").height() +\n                \"px\";\n              if (slider.transitions) {\n                slider.setTransition(0);\n                slider.args[slider.prop] = slider.vertical\n                  ? \"translate3d(0,\" + slider.args[slider.prop] + \",0)\"\n                  : \"translate3d(\" + slider.args[slider.prop] + \",0,0)\";\n              }\n              slider.container.css(slider.args);\n            } else {\n              slider.newSlides.width(slider.width());\n              slider.args[slider.prop] =\n                -1 *\n                  (slider.currentSlide + slider.cloneOffset) *\n                  slider.width() +\n                \"px\";\n              if (slider.transitions) {\n                slider.setTransition(0);\n                slider.args[slider.prop] = slider.vertical\n                  ? \"translate3d(0,\" + slider.args[slider.prop] + \",0)\"\n                  : \"translate3d(\" + slider.args[slider.prop] + \",0,0)\";\n              }\n              slider.container.css(slider.args);\n            }\n          }\n        });\n      }\n      //////////////////////////////////////////////////////////////////\n\n      //FlexSlider: start() Callback\n      slider.vars.start(slider);\n    };\n\n    //FlexSlider: Animation Actions\n    slider.flexAnimate = function (target, pause) {\n      if (!slider.animating && slider.is(\":visible\")) {\n        //Animating flag\n        slider.animating = true;\n\n        //FlexSlider: before() animation Callback\n        slider.animatingTo = target;\n        slider.vars.before(slider);\n\n        //Optional paramter to pause slider when making an anmiation call\n        if (pause) {\n          slider.pause();\n        }\n\n        //Update controlNav\n        if (slider.vars.controlNav) {\n          slider.controlNav.removeClass(\"active\").eq(target).addClass(\"active\");\n        }\n\n        //Is the slider at either end\n        slider.atEnd = target == 0 || target == slider.count - 1 ? true : false;\n        if (!slider.vars.animationLoop && slider.vars.directionNav) {\n          if (target == 0) {\n            slider.directionNav\n              .removeClass(\"disabled\")\n              .filter(\".prev\")\n              .addClass(\"disabled\");\n          } else if (target == slider.count - 1) {\n            slider.directionNav\n              .removeClass(\"disabled\")\n              .filter(\".next\")\n              .addClass(\"disabled\");\n          } else {\n            slider.directionNav.removeClass(\"disabled\");\n          }\n        }\n\n        if (!slider.vars.animationLoop && target == slider.count - 1) {\n          slider.pause();\n          //FlexSlider: end() of cycle Callback\n          slider.vars.end(slider);\n        }\n\n        if (slider.vars.animation.toLowerCase() == \"slide\") {\n          var dimension = slider.vertical\n            ? slider.slides.filter(\":first\").height()\n            : slider.slides.filter(\":first\").width();\n\n          if (\n            slider.currentSlide == 0 &&\n            target == slider.count - 1 &&\n            slider.vars.animationLoop &&\n            slider.direction != \"next\"\n          ) {\n            slider.slideString = \"0px\";\n          } else if (\n            slider.currentSlide == slider.count - 1 &&\n            target == 0 &&\n            slider.vars.animationLoop &&\n            slider.direction != \"prev\"\n          ) {\n            slider.slideString = -1 * (slider.count + 1) * dimension + \"px\";\n          } else {\n            slider.slideString =\n              -1 * (target + slider.cloneOffset) * dimension + \"px\";\n          }\n          slider.args[slider.prop] = slider.slideString;\n\n          if (slider.transitions) {\n            slider.setTransition(slider.vars.animationDuration);\n            slider.args[slider.prop] = slider.vertical\n              ? \"translate3d(0,\" + slider.slideString + \",0)\"\n              : \"translate3d(\" + slider.slideString + \",0,0)\";\n            slider.container\n              .css(slider.args)\n              .one(\"webkitTransitionEnd transitionend\", function () {\n                slider.wrapup(dimension);\n              });\n          } else {\n            slider.container.animate(\n              slider.args,\n              slider.vars.animationDuration,\n              function () {\n                slider.wrapup(dimension);\n              },\n            );\n          }\n        } else {\n          //Default to Fade\n          slider.slides\n            .eq(slider.currentSlide)\n            .fadeOut(slider.vars.animationDuration);\n          slider.slides\n            .eq(target)\n            .fadeIn(slider.vars.animationDuration, function () {\n              slider.wrapup();\n            });\n        }\n      }\n    };\n\n    //FlexSlider: Function to minify redundant animation actions\n    slider.wrapup = function (dimension) {\n      if (slider.vars.animation == \"slide\") {\n        //Jump the slider if necessary\n        if (\n          slider.currentSlide == 0 &&\n          slider.animatingTo == slider.count - 1 &&\n          slider.vars.animationLoop\n        ) {\n          slider.args[slider.prop] = -1 * slider.count * dimension + \"px\";\n          if (slider.transitions) {\n            slider.setTransition(0);\n            slider.args[slider.prop] = slider.vertical\n              ? \"translate3d(0,\" + slider.args[slider.prop] + \",0)\"\n              : \"translate3d(\" + slider.args[slider.prop] + \",0,0)\";\n          }\n          slider.container.css(slider.args);\n        } else if (\n          slider.currentSlide == slider.count - 1 &&\n          slider.animatingTo == 0 &&\n          slider.vars.animationLoop\n        ) {\n          slider.args[slider.prop] = -1 * dimension + \"px\";\n          if (slider.transitions) {\n            slider.setTransition(0);\n            slider.args[slider.prop] = slider.vertical\n              ? \"translate3d(0,\" + slider.args[slider.prop] + \",0)\"\n              : \"translate3d(\" + slider.args[slider.prop] + \",0,0)\";\n          }\n          slider.container.css(slider.args);\n        }\n      }\n      slider.animating = false;\n      slider.currentSlide = slider.animatingTo;\n      //FlexSlider: after() animation Callback\n      slider.vars.after(slider);\n    };\n\n    //FlexSlider: Automatic Slideshow\n    slider.animateSlides = function () {\n      if (!slider.animating) {\n        slider.flexAnimate(slider.getTarget(\"next\"));\n      }\n    };\n\n    //FlexSlider: Automatic Slideshow Pause\n    slider.pause = function () {\n      clearInterval(slider.animatedSlides);\n      if (slider.vars.pausePlay) {\n        slider.pausePlay\n          .removeClass(\"pause\")\n          .addClass(\"play\")\n          .text(slider.vars.playText);\n      }\n    };\n\n    //FlexSlider: Automatic Slideshow Start/Resume\n    slider.resume = function () {\n      slider.animatedSlides = setInterval(\n        slider.animateSlides,\n        slider.vars.slideshowSpeed,\n      );\n      if (slider.vars.pausePlay) {\n        slider.pausePlay\n          .removeClass(\"play\")\n          .addClass(\"pause\")\n          .text(slider.vars.pauseText);\n      }\n    };\n\n    //FlexSlider: Helper function for non-looping sliders\n    slider.canAdvance = function (target) {\n      if (!slider.vars.animationLoop && slider.atEnd) {\n        if (\n          slider.currentSlide == 0 &&\n          target == slider.count - 1 &&\n          slider.direction != \"next\"\n        ) {\n          return false;\n        } else if (\n          slider.currentSlide == slider.count - 1 &&\n          target == 0 &&\n          slider.direction == \"next\"\n        ) {\n          return false;\n        } else {\n          return true;\n        }\n      } else {\n        return true;\n      }\n    };\n\n    //FlexSlider: Helper function to determine animation target\n    slider.getTarget = function (dir) {\n      slider.direction = dir;\n      if (dir == \"next\") {\n        return slider.currentSlide == slider.count - 1\n          ? 0\n          : slider.currentSlide + 1;\n      } else {\n        return slider.currentSlide == 0\n          ? slider.count - 1\n          : slider.currentSlide - 1;\n      }\n    };\n\n    //FlexSlider: Helper function to set CSS3 transitions\n    slider.setTransition = function (dur) {\n      slider.container.css({ \"-webkit-transition-duration\": dur / 1000 + \"s\" });\n    };\n\n    //FlexSlider: Initialize\n    slider.init();\n  };\n\n  //FlexSlider: Default Settings\n  $.flexslider.defaults = {\n    animation: \"slide\", //String: Select your animation type, \"fade\" or \"slide\"\n    slideDirection: \"horizontal\", //String: Select the sliding direction, \"horizontal\" or \"vertical\"\n    slideshow: true, //Boolean: Animate slider automatically\n    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds\n    animationDuration: 600, //Integer: Set the speed of animations, in milliseconds\n    directionNav: false, //Boolean: Create navigation for previous/next navigation? (true/false)\n    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage\n    keyboardNav: true, //Boolean: Allow slider navigating via keyboard left/right keys\n    mousewheel: false, //Boolean: Allow slider navigating via mousewheel\n    prevText: \"Previous\", //String: Set the text for the \"previous\" directionNav item\n    nextText: \"Next\", //String: Set the text for the \"next\" directionNav item\n    pausePlay: false, //Boolean: Create pause/play dynamic element\n    pauseText: \"Pause\", //String: Set the text for the \"pause\" pausePlay item\n    playText: \"Play\", //String: Set the text for the \"play\" pausePlay item\n    randomize: false, //Boolean: Randomize slide order\n    slideToStart: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)\n    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received \"disable\" classes at either end\n    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.\n    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering\n    useCSS: true, //Boolean: Override the use of CSS3 Translate3d animations\n    touch: true, //Boolean: Disable touchswipe events\n    controlsContainer: \"\", //Selector: Declare which container the navigation elements should be appended too. Default container is the flexSlider element. Example use would be \".flexslider-container\", \"#container\", etc. If the given element is not found, the default action will be taken.\n    manualControls: \"\", //Selector: Declare custom control navigation. Example would be \".flex-control-nav li\" or \"#tabs-nav li img\", etc. The number of elements in your controlNav should match the number of slides/tabs.\n    start: function () {}, //Callback: function(slider) - Fires when the slider loads the first slide\n    before: function () {}, //Callback: function(slider) - Fires asynchronously with each slider animation\n    after: function () {}, //Callback: function(slider) - Fires after each slider animation completes\n    end: function () {}, //Callback: function(slider) - Fires when the slider reaches the last slide (asynchronous)\n  };\n\n  //FlexSlider: Plugin Function\n  $.fn.flexslider = function (options) {\n    return this.each(function () {\n      var $slides = $(this).find(\".slides > li\");\n      if ($slides.length === 1) {\n        $slides.fadeIn(400);\n        if (options && options.start) options.start($(this));\n      } else if ($(this).data(\"flexsliderInit\") != true) {\n        new $.flexslider(this, options);\n      }\n    });\n  };\n})(jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/flexslider/setting.js",
    "content": "$(window).load(function () {\n  $(\".flexslider\").flexslider();\n});\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/google-code-prettify/prettify.css",
    "content": ".com {\n  color: #93a1a1;\n}\n.lit {\n  color: #195f91;\n}\n.pun,\n.opn,\n.clo {\n  color: #93a1a1;\n}\n.fun {\n  color: #dc322f;\n}\n.str,\n.atv {\n  color: #d14;\n}\n.kwd,\n.prettyprint .tag {\n  color: #1e347b;\n}\n.typ,\n.atn,\n.dec,\n.var {\n  color: teal;\n}\n.pln {\n  color: #48484c;\n}\n\n.prettyprint {\n  padding: 8px;\n  background-color: #f7f7f9;\n  border: 1px solid #e1e1e8;\n}\n.prettyprint.linenums {\n  -webkit-box-shadow:\n    inset 40px 0 0 #fbfbfc,\n    inset 41px 0 0 #ececf0;\n  -moz-box-shadow:\n    inset 40px 0 0 #fbfbfc,\n    inset 41px 0 0 #ececf0;\n  box-shadow:\n    inset 40px 0 0 #fbfbfc,\n    inset 41px 0 0 #ececf0;\n}\n\n/* Specify class=linenums on a pre to get line numbering */\nol.linenums {\n  margin: 0 0 0 33px; /* IE indents via margin-left */\n}\nol.linenums li {\n  padding-left: 12px;\n  color: #bebec5;\n  line-height: 20px;\n  text-shadow: 0 1px 0 #fff;\n}\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/google-code-prettify/prettify.js",
    "content": "var q = null;\nwindow.PR_SHOULD_USE_CONTINUATION = !0;\n(function () {\n  function L(a) {\n    function m(a) {\n      var f = a.charCodeAt(0);\n      if (f !== 92) return f;\n      var b = a.charAt(1);\n      return (f = r[b])\n        ? f\n        : \"0\" <= b && b <= \"7\"\n          ? parseInt(a.substring(1), 8)\n          : b === \"u\" || b === \"x\"\n            ? parseInt(a.substring(2), 16)\n            : a.charCodeAt(1);\n    }\n    function e(a) {\n      if (a < 32) return (a < 16 ? \"\\\\x0\" : \"\\\\x\") + a.toString(16);\n      a = String.fromCharCode(a);\n      if (a === \"\\\\\" || a === \"-\" || a === \"[\" || a === \"]\") a = \"\\\\\" + a;\n      return a;\n    }\n    function h(a) {\n      for (\n        var f = a\n            .substring(1, a.length - 1)\n            .match(\n              /\\\\u[\\dA-Fa-f]{4}|\\\\x[\\dA-Fa-f]{2}|\\\\[0-3][0-7]{0,2}|\\\\[0-7]{1,2}|\\\\[\\S\\s]|[^\\\\]/g,\n            ),\n          a = [],\n          b = [],\n          o = f[0] === \"^\",\n          c = o ? 1 : 0,\n          i = f.length;\n        c < i;\n        ++c\n      ) {\n        var j = f[c];\n        if (/\\\\[bdsw]/i.test(j)) a.push(j);\n        else {\n          var j = m(j),\n            d;\n          c + 2 < i && \"-\" === f[c + 1]\n            ? ((d = m(f[c + 2])), (c += 2))\n            : (d = j);\n          b.push([j, d]);\n          d < 65 ||\n            j > 122 ||\n            (d < 65 ||\n              j > 90 ||\n              b.push([Math.max(65, j) | 32, Math.min(d, 90) | 32]),\n            d < 97 ||\n              j > 122 ||\n              b.push([Math.max(97, j) & -33, Math.min(d, 122) & -33]));\n        }\n      }\n      b.sort(function (a, f) {\n        return a[0] - f[0] || f[1] - a[1];\n      });\n      f = [];\n      j = [NaN, NaN];\n      for (c = 0; c < b.length; ++c)\n        (i = b[c]),\n          i[0] <= j[1] + 1 ? (j[1] = Math.max(j[1], i[1])) : f.push((j = i));\n      b = [\"[\"];\n      o && b.push(\"^\");\n      b.push.apply(b, a);\n      for (c = 0; c < f.length; ++c)\n        (i = f[c]),\n          b.push(e(i[0])),\n          i[1] > i[0] && (i[1] + 1 > i[0] && b.push(\"-\"), b.push(e(i[1])));\n      b.push(\"]\");\n      return b.join(\"\");\n    }\n    function y(a) {\n      for (\n        var f = a.source.match(\n            /\\[(?:[^\\\\\\]]|\\\\[\\S\\s])*]|\\\\u[\\dA-Fa-f]{4}|\\\\x[\\dA-Fa-f]{2}|\\\\\\d+|\\\\[^\\dux]|\\(\\?[!:=]|[()^]|[^()[\\\\^]+/g,\n          ),\n          b = f.length,\n          d = [],\n          c = 0,\n          i = 0;\n        c < b;\n        ++c\n      ) {\n        var j = f[c];\n        j === \"(\"\n          ? ++i\n          : \"\\\\\" === j.charAt(0) &&\n            (j = +j.substring(1)) &&\n            j <= i &&\n            (d[j] = -1);\n      }\n      for (c = 1; c < d.length; ++c) -1 === d[c] && (d[c] = ++t);\n      for (i = c = 0; c < b; ++c)\n        (j = f[c]),\n          j === \"(\"\n            ? (++i, d[i] === void 0 && (f[c] = \"(?:\"))\n            : \"\\\\\" === j.charAt(0) &&\n              (j = +j.substring(1)) &&\n              j <= i &&\n              (f[c] = \"\\\\\" + d[i]);\n      for (i = c = 0; c < b; ++c)\n        \"^\" === f[c] && \"^\" !== f[c + 1] && (f[c] = \"\");\n      if (a.ignoreCase && s)\n        for (c = 0; c < b; ++c)\n          (j = f[c]),\n            (a = j.charAt(0)),\n            j.length >= 2 && a === \"[\"\n              ? (f[c] = h(j))\n              : a !== \"\\\\\" &&\n                (f[c] = j.replace(/[A-Za-z]/g, function (a) {\n                  a = a.charCodeAt(0);\n                  return \"[\" + String.fromCharCode(a & -33, a | 32) + \"]\";\n                }));\n      return f.join(\"\");\n    }\n    for (var t = 0, s = !1, l = !1, p = 0, d = a.length; p < d; ++p) {\n      var g = a[p];\n      if (g.ignoreCase) l = !0;\n      else if (\n        /[a-z]/i.test(\n          g.source.replace(/\\\\u[\\da-f]{4}|\\\\x[\\da-f]{2}|\\\\[^UXux]/gi, \"\"),\n        )\n      ) {\n        s = !0;\n        l = !1;\n        break;\n      }\n    }\n    for (\n      var r = { b: 8, t: 9, n: 10, v: 11, f: 12, r: 13 },\n        n = [],\n        p = 0,\n        d = a.length;\n      p < d;\n      ++p\n    ) {\n      g = a[p];\n      if (g.global || g.multiline) throw Error(\"\" + g);\n      n.push(\"(?:\" + y(g) + \")\");\n    }\n    return RegExp(n.join(\"|\"), l ? \"gi\" : \"g\");\n  }\n  function M(a) {\n    function m(a) {\n      switch (a.nodeType) {\n        case 1:\n          if (e.test(a.className)) break;\n          for (var g = a.firstChild; g; g = g.nextSibling) m(g);\n          g = a.nodeName;\n          if (\"BR\" === g || \"LI\" === g)\n            (h[s] = \"\\n\"), (t[s << 1] = y++), (t[(s++ << 1) | 1] = a);\n          break;\n        case 3:\n        case 4:\n          (g = a.nodeValue),\n            g.length &&\n              ((g = p\n                ? g.replace(/\\r\\n?/g, \"\\n\")\n                : g.replace(/[\\t\\n\\r ]+/g, \" \")),\n              (h[s] = g),\n              (t[s << 1] = y),\n              (y += g.length),\n              (t[(s++ << 1) | 1] = a));\n      }\n    }\n    var e = /(?:^|\\s)nocode(?:\\s|$)/,\n      h = [],\n      y = 0,\n      t = [],\n      s = 0,\n      l;\n    a.currentStyle\n      ? (l = a.currentStyle.whiteSpace)\n      : window.getComputedStyle &&\n        (l = document.defaultView\n          .getComputedStyle(a, q)\n          .getPropertyValue(\"white-space\"));\n    var p = l && \"pre\" === l.substring(0, 3);\n    m(a);\n    return { a: h.join(\"\").replace(/\\n$/, \"\"), c: t };\n  }\n  function B(a, m, e, h) {\n    m && ((a = { a: m, d: a }), e(a), h.push.apply(h, a.e));\n  }\n  function x(a, m) {\n    function e(a) {\n      for (\n        var l = a.d,\n          p = [l, \"pln\"],\n          d = 0,\n          g = a.a.match(y) || [],\n          r = {},\n          n = 0,\n          z = g.length;\n        n < z;\n        ++n\n      ) {\n        var f = g[n],\n          b = r[f],\n          o = void 0,\n          c;\n        if (typeof b === \"string\") c = !1;\n        else {\n          var i = h[f.charAt(0)];\n          if (i) (o = f.match(i[1])), (b = i[0]);\n          else {\n            for (c = 0; c < t; ++c)\n              if (((i = m[c]), (o = f.match(i[1])))) {\n                b = i[0];\n                break;\n              }\n            o || (b = \"pln\");\n          }\n          if (\n            (c = b.length >= 5 && \"lang-\" === b.substring(0, 5)) &&\n            !(o && typeof o[1] === \"string\")\n          )\n            (c = !1), (b = \"src\");\n          c || (r[f] = b);\n        }\n        i = d;\n        d += f.length;\n        if (c) {\n          c = o[1];\n          var j = f.indexOf(c),\n            k = j + c.length;\n          o[2] && ((k = f.length - o[2].length), (j = k - c.length));\n          b = b.substring(5);\n          B(l + i, f.substring(0, j), e, p);\n          B(l + i + j, c, C(b, c), p);\n          B(l + i + k, f.substring(k), e, p);\n        } else p.push(l + i, b);\n      }\n      a.e = p;\n    }\n    var h = {},\n      y;\n    (function () {\n      for (\n        var e = a.concat(m), l = [], p = {}, d = 0, g = e.length;\n        d < g;\n        ++d\n      ) {\n        var r = e[d],\n          n = r[3];\n        if (n) for (var k = n.length; --k >= 0; ) h[n.charAt(k)] = r;\n        r = r[1];\n        n = \"\" + r;\n        p.hasOwnProperty(n) || (l.push(r), (p[n] = q));\n      }\n      l.push(/[\\S\\s]/);\n      y = L(l);\n    })();\n    var t = m.length;\n    return e;\n  }\n  function u(a) {\n    var m = [],\n      e = [];\n    a.tripleQuotedStrings\n      ? m.push([\n          \"str\",\n          /^(?:'''(?:[^'\\\\]|\\\\[\\S\\s]|''?(?=[^']))*(?:'''|$)|\"\"\"(?:[^\"\\\\]|\\\\[\\S\\s]|\"\"?(?=[^\"]))*(?:\"\"\"|$)|'(?:[^'\\\\]|\\\\[\\S\\s])*(?:'|$)|\"(?:[^\"\\\\]|\\\\[\\S\\s])*(?:\"|$))/,\n          q,\n          \"'\\\"\",\n        ])\n      : a.multiLineStrings\n        ? m.push([\n            \"str\",\n            /^(?:'(?:[^'\\\\]|\\\\[\\S\\s])*(?:'|$)|\"(?:[^\"\\\\]|\\\\[\\S\\s])*(?:\"|$)|`(?:[^\\\\`]|\\\\[\\S\\s])*(?:`|$))/,\n            q,\n            \"'\\\"`\",\n          ])\n        : m.push([\n            \"str\",\n            /^(?:'(?:[^\\n\\r'\\\\]|\\\\.)*(?:'|$)|\"(?:[^\\n\\r\"\\\\]|\\\\.)*(?:\"|$))/,\n            q,\n            \"\\\"'\",\n          ]);\n    a.verbatimStrings && e.push([\"str\", /^@\"(?:[^\"]|\"\")*(?:\"|$)/, q]);\n    var h = a.hashComments;\n    h &&\n      (a.cStyleComments\n        ? (h > 1\n            ? m.push([\"com\", /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, q, \"#\"])\n            : m.push([\n                \"com\",\n                /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\\b|[^\\n\\r]*)/,\n                q,\n                \"#\",\n              ]),\n          e.push([\n            \"str\",\n            /^<(?:(?:(?:\\.\\.\\/)*|\\/?)(?:[\\w-]+(?:\\/[\\w-]+)+)?[\\w-]+\\.h|[a-z]\\w*)>/,\n            q,\n          ]))\n        : m.push([\"com\", /^#[^\\n\\r]*/, q, \"#\"]));\n    a.cStyleComments &&\n      (e.push([\"com\", /^\\/\\/[^\\n\\r]*/, q]),\n      e.push([\"com\", /^\\/\\*[\\S\\s]*?(?:\\*\\/|$)/, q]));\n    a.regexLiterals &&\n      e.push([\n        \"lang-regex\",\n        /^(?:^^\\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|,|-=|->|\\/|\\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\\^=|\\^\\^|\\^\\^=|{|\\||\\|=|\\|\\||\\|\\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*(\\/(?=[^*/])(?:[^/[\\\\]|\\\\[\\S\\s]|\\[(?:[^\\\\\\]]|\\\\[\\S\\s])*(?:]|$))+\\/)/,\n      ]);\n    (h = a.types) && e.push([\"typ\", h]);\n    a = (\"\" + a.keywords).replace(/^ | $/g, \"\");\n    a.length &&\n      e.push([\"kwd\", RegExp(\"^(?:\" + a.replace(/[\\s,]+/g, \"|\") + \")\\\\b\"), q]);\n    m.push([\"pln\", /^\\s+/, q, \" \\r\\n\\t\\xa0\"]);\n    e.push(\n      [\"lit\", /^@[$_a-z][\\w$@]*/i, q],\n      [\"typ\", /^(?:[@_]?[A-Z]+[a-z][\\w$@]*|\\w+_t\\b)/, q],\n      [\"pln\", /^[$_a-z][\\w$@]*/i, q],\n      [\n        \"lit\",\n        /^(?:0x[\\da-f]+|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)(?:e[+-]?\\d+)?)[a-z]*/i,\n        q,\n        \"0123456789\",\n      ],\n      [\"pln\", /^\\\\[\\S\\s]?/, q],\n      [\"pun\", /^.[^\\s\\w\"-$'./@\\\\`]*/, q],\n    );\n    return x(m, e);\n  }\n  function D(a, m) {\n    function e(a) {\n      switch (a.nodeType) {\n        case 1:\n          if (k.test(a.className)) break;\n          if (\"BR\" === a.nodeName)\n            h(a), a.parentNode && a.parentNode.removeChild(a);\n          else for (a = a.firstChild; a; a = a.nextSibling) e(a);\n          break;\n        case 3:\n        case 4:\n          if (p) {\n            var b = a.nodeValue,\n              d = b.match(t);\n            if (d) {\n              var c = b.substring(0, d.index);\n              a.nodeValue = c;\n              (b = b.substring(d.index + d[0].length)) &&\n                a.parentNode.insertBefore(s.createTextNode(b), a.nextSibling);\n              h(a);\n              c || a.parentNode.removeChild(a);\n            }\n          }\n      }\n    }\n    function h(a) {\n      function b(a, d) {\n        var e = d ? a.cloneNode(!1) : a,\n          f = a.parentNode;\n        if (f) {\n          var f = b(f, 1),\n            g = a.nextSibling;\n          f.appendChild(e);\n          for (var h = g; h; h = g) (g = h.nextSibling), f.appendChild(h);\n        }\n        return e;\n      }\n      for (; !a.nextSibling; ) if (((a = a.parentNode), !a)) return;\n      for (\n        var a = b(a.nextSibling, 0), e;\n        (e = a.parentNode) && e.nodeType === 1;\n\n      )\n        a = e;\n      d.push(a);\n    }\n    var k = /(?:^|\\s)nocode(?:\\s|$)/,\n      t = /\\r\\n?|\\n/,\n      s = a.ownerDocument,\n      l;\n    a.currentStyle\n      ? (l = a.currentStyle.whiteSpace)\n      : window.getComputedStyle &&\n        (l = s.defaultView\n          .getComputedStyle(a, q)\n          .getPropertyValue(\"white-space\"));\n    var p = l && \"pre\" === l.substring(0, 3);\n    for (l = s.createElement(\"LI\"); a.firstChild; ) l.appendChild(a.firstChild);\n    for (var d = [l], g = 0; g < d.length; ++g) e(d[g]);\n    m === (m | 0) && d[0].setAttribute(\"value\", m);\n    var r = s.createElement(\"OL\");\n    r.className = \"linenums\";\n    for (var n = Math.max(0, (m - 1) | 0) || 0, g = 0, z = d.length; g < z; ++g)\n      (l = d[g]),\n        (l.className = \"L\" + ((g + n) % 10)),\n        l.firstChild || l.appendChild(s.createTextNode(\"\\xa0\")),\n        r.appendChild(l);\n    a.appendChild(r);\n  }\n  function k(a, m) {\n    for (var e = m.length; --e >= 0; ) {\n      var h = m[e];\n      A.hasOwnProperty(h)\n        ? window.console &&\n          console.warn(\"cannot override language handler %s\", h)\n        : (A[h] = a);\n    }\n  }\n  function C(a, m) {\n    if (!a || !A.hasOwnProperty(a))\n      a = /^\\s*</.test(m) ? \"default-markup\" : \"default-code\";\n    return A[a];\n  }\n  function E(a) {\n    var m = a.g;\n    try {\n      var e = M(a.h),\n        h = e.a;\n      a.a = h;\n      a.c = e.c;\n      a.d = 0;\n      C(m, h)(a);\n      var k = /\\bMSIE\\b/.test(navigator.userAgent),\n        m = /\\n/g,\n        t = a.a,\n        s = t.length,\n        e = 0,\n        l = a.c,\n        p = l.length,\n        h = 0,\n        d = a.e,\n        g = d.length,\n        a = 0;\n      d[g] = s;\n      var r, n;\n      for (n = r = 0; n < g; )\n        d[n] !== d[n + 2] ? ((d[r++] = d[n++]), (d[r++] = d[n++])) : (n += 2);\n      g = r;\n      for (n = r = 0; n < g; ) {\n        for (\n          var z = d[n], f = d[n + 1], b = n + 2;\n          b + 2 <= g && d[b + 1] === f;\n\n        )\n          b += 2;\n        d[r++] = z;\n        d[r++] = f;\n        n = b;\n      }\n      for (d.length = r; h < p; ) {\n        var o = l[h + 2] || s,\n          c = d[a + 2] || s,\n          b = Math.min(o, c),\n          i = l[h + 1],\n          j;\n        if (i.nodeType !== 1 && (j = t.substring(e, b))) {\n          k && (j = j.replace(m, \"\\r\"));\n          i.nodeValue = j;\n          var u = i.ownerDocument,\n            v = u.createElement(\"SPAN\");\n          v.className = d[a + 1];\n          var x = i.parentNode;\n          x.replaceChild(v, i);\n          v.appendChild(i);\n          e < o &&\n            ((l[h + 1] = i = u.createTextNode(t.substring(b, o))),\n            x.insertBefore(i, v.nextSibling));\n        }\n        e = b;\n        e >= o && (h += 2);\n        e >= c && (a += 2);\n      }\n    } catch (w) {\n      \"console\" in window && console.log(w && w.stack ? w.stack : w);\n    }\n  }\n  var v = [\"break,continue,do,else,for,if,return,while\"],\n    w = [\n      [\n        v,\n        \"auto,case,char,const,default,double,enum,extern,float,goto,int,long,register,short,signed,sizeof,static,struct,switch,typedef,union,unsigned,void,volatile\",\n      ],\n      \"catch,class,delete,false,import,new,operator,private,protected,public,this,throw,true,try,typeof\",\n    ],\n    F = [\n      w,\n      \"alignof,align_union,asm,axiom,bool,concept,concept_map,const_cast,constexpr,decltype,dynamic_cast,explicit,export,friend,inline,late_check,mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast,template,typeid,typename,using,virtual,where\",\n    ],\n    G = [\n      w,\n      \"abstract,boolean,byte,extends,final,finally,implements,import,instanceof,null,native,package,strictfp,super,synchronized,throws,transient\",\n    ],\n    H = [\n      G,\n      \"as,base,by,checked,decimal,delegate,descending,dynamic,event,fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock,object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var\",\n    ],\n    w = [\n      w,\n      \"debugger,eval,export,function,get,null,set,undefined,var,with,Infinity,NaN\",\n    ],\n    I = [\n      v,\n      \"and,as,assert,class,def,del,elif,except,exec,finally,from,global,import,in,is,lambda,nonlocal,not,or,pass,print,raise,try,with,yield,False,True,None\",\n    ],\n    J = [\n      v,\n      \"alias,and,begin,case,class,def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo,rescue,retry,self,super,then,true,undef,unless,until,when,yield,BEGIN,END\",\n    ],\n    v = [v, \"case,done,elif,esac,eval,fi,function,in,local,set,then,until\"],\n    K =\n      /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\\d*)/,\n    N = /\\S/,\n    O = u({\n      keywords: [\n        F,\n        H,\n        w,\n        \"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END\" +\n          I,\n        J,\n        v,\n      ],\n      hashComments: !0,\n      cStyleComments: !0,\n      multiLineStrings: !0,\n      regexLiterals: !0,\n    }),\n    A = {};\n  k(O, [\"default-code\"]);\n  k(\n    x(\n      [],\n      [\n        [\"pln\", /^[^<?]+/],\n        [\"dec\", /^<!\\w[^>]*(?:>|$)/],\n        [\"com\", /^<\\!--[\\S\\s]*?(?:--\\>|$)/],\n        [\"lang-\", /^<\\?([\\S\\s]+?)(?:\\?>|$)/],\n        [\"lang-\", /^<%([\\S\\s]+?)(?:%>|$)/],\n        [\"pun\", /^(?:<[%?]|[%?]>)/],\n        [\"lang-\", /^<xmp\\b[^>]*>([\\S\\s]+?)<\\/xmp\\b[^>]*>/i],\n        [\"lang-js\", /^<script\\b[^>]*>([\\S\\s]*?)(<\\/script\\b[^>]*>)/i],\n        [\"lang-css\", /^<style\\b[^>]*>([\\S\\s]*?)(<\\/style\\b[^>]*>)/i],\n        [\"lang-in.tag\", /^(<\\/?[a-z][^<>]*>)/i],\n      ],\n    ),\n    [\"default-markup\", \"htm\", \"html\", \"mxml\", \"xhtml\", \"xml\", \"xsl\"],\n  );\n  k(\n    x(\n      [\n        [\"pln\", /^\\s+/, q, \" \\t\\r\\n\"],\n        [\"atv\", /^(?:\"[^\"]*\"?|'[^']*'?)/, q, \"\\\"'\"],\n      ],\n      [\n        [\"tag\", /^^<\\/?[a-z](?:[\\w-.:]*\\w)?|\\/?>$/i],\n        [\"atn\", /^(?!style[\\s=]|on)[a-z](?:[\\w:-]*\\w)?/i],\n        [\"lang-uq.val\", /^=\\s*([^\\s\"'>]*(?:[^\\s\"'/>]|\\/(?=\\s)))/],\n        [\"pun\", /^[/<->]+/],\n        [\"lang-js\", /^on\\w+\\s*=\\s*\"([^\"]+)\"/i],\n        [\"lang-js\", /^on\\w+\\s*=\\s*'([^']+)'/i],\n        [\"lang-js\", /^on\\w+\\s*=\\s*([^\\s\"'>]+)/i],\n        [\"lang-css\", /^style\\s*=\\s*\"([^\"]+)\"/i],\n        [\"lang-css\", /^style\\s*=\\s*'([^']+)'/i],\n        [\"lang-css\", /^style\\s*=\\s*([^\\s\"'>]+)/i],\n      ],\n    ),\n    [\"in.tag\"],\n  );\n  k(x([], [[\"atv\", /^[\\S\\s]+/]]), [\"uq.val\"]);\n  k(u({ keywords: F, hashComments: !0, cStyleComments: !0, types: K }), [\n    \"c\",\n    \"cc\",\n    \"cpp\",\n    \"cxx\",\n    \"cyc\",\n    \"m\",\n  ]);\n  k(u({ keywords: \"null,true,false\" }), [\"json\"]);\n  k(\n    u({\n      keywords: H,\n      hashComments: !0,\n      cStyleComments: !0,\n      verbatimStrings: !0,\n      types: K,\n    }),\n    [\"cs\"],\n  );\n  k(u({ keywords: G, cStyleComments: !0 }), [\"java\"]);\n  k(u({ keywords: v, hashComments: !0, multiLineStrings: !0 }), [\n    \"bsh\",\n    \"csh\",\n    \"sh\",\n  ]);\n  k(\n    u({\n      keywords: I,\n      hashComments: !0,\n      multiLineStrings: !0,\n      tripleQuotedStrings: !0,\n    }),\n    [\"cv\", \"py\"],\n  );\n  k(\n    u({\n      keywords:\n        \"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END\",\n      hashComments: !0,\n      multiLineStrings: !0,\n      regexLiterals: !0,\n    }),\n    [\"perl\", \"pl\", \"pm\"],\n  );\n  k(\n    u({\n      keywords: J,\n      hashComments: !0,\n      multiLineStrings: !0,\n      regexLiterals: !0,\n    }),\n    [\"rb\"],\n  );\n  k(u({ keywords: w, cStyleComments: !0, regexLiterals: !0 }), [\"js\"]);\n  k(\n    u({\n      keywords:\n        \"all,and,by,catch,class,else,extends,false,finally,for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then,true,try,unless,until,when,while,yes\",\n      hashComments: 3,\n      cStyleComments: !0,\n      multilineStrings: !0,\n      tripleQuotedStrings: !0,\n      regexLiterals: !0,\n    }),\n    [\"coffee\"],\n  );\n  k(x([], [[\"str\", /^[\\S\\s]+/]]), [\"regex\"]);\n  window.prettyPrintOne = function (a, m, e) {\n    var h = document.createElement(\"PRE\");\n    h.innerHTML = a;\n    e && D(h, e);\n    E({ g: m, i: e, h: h });\n    return h.innerHTML;\n  };\n  window.prettyPrint = function (a) {\n    function m() {\n      for (\n        var e = window.PR_SHOULD_USE_CONTINUATION ? l.now() + 250 : Infinity;\n        p < h.length && l.now() < e;\n        p++\n      ) {\n        var n = h[p],\n          k = n.className;\n        if (k.indexOf(\"prettyprint\") >= 0) {\n          var k = k.match(g),\n            f,\n            b;\n          if ((b = !k)) {\n            b = n;\n            for (var o = void 0, c = b.firstChild; c; c = c.nextSibling)\n              var i = c.nodeType,\n                o =\n                  i === 1\n                    ? o\n                      ? b\n                      : c\n                    : i === 3\n                      ? N.test(c.nodeValue)\n                        ? b\n                        : o\n                      : o;\n            b = (f = o === b ? void 0 : o) && \"CODE\" === f.tagName;\n          }\n          b && (k = f.className.match(g));\n          k && (k = k[1]);\n          b = !1;\n          for (o = n.parentNode; o; o = o.parentNode)\n            if (\n              (o.tagName === \"pre\" ||\n                o.tagName === \"code\" ||\n                o.tagName === \"xmp\") &&\n              o.className &&\n              o.className.indexOf(\"prettyprint\") >= 0\n            ) {\n              b = !0;\n              break;\n            }\n          b ||\n            ((b = (b = n.className.match(/\\blinenums\\b(?::(\\d+))?/))\n              ? b[1] && b[1].length\n                ? +b[1]\n                : !0\n              : !1) && D(n, b),\n            (d = { g: k, h: n, i: b }),\n            E(d));\n        }\n      }\n      p < h.length ? setTimeout(m, 250) : a && a();\n    }\n    for (\n      var e = [\n          document.getElementsByTagName(\"pre\"),\n          document.getElementsByTagName(\"code\"),\n          document.getElementsByTagName(\"xmp\"),\n        ],\n        h = [],\n        k = 0;\n      k < e.length;\n      ++k\n    )\n      for (var t = 0, s = e[k].length; t < s; ++t) h.push(e[k][t]);\n    var e = q,\n      l = Date;\n    l.now ||\n      (l = {\n        now: function () {\n          return +new Date();\n        },\n      });\n    var p = 0,\n      d,\n      g = /\\blang(?:uage)?-([\\w.]+)(?!\\S)/;\n    m();\n  };\n  window.PR = {\n    createSimpleLexer: x,\n    registerLangHandler: k,\n    sourceDecorator: u,\n    PR_ATTRIB_NAME: \"atn\",\n    PR_ATTRIB_VALUE: \"atv\",\n    PR_COMMENT: \"com\",\n    PR_DECLARATION: \"dec\",\n    PR_KEYWORD: \"kwd\",\n    PR_LITERAL: \"lit\",\n    PR_NOCODE: \"nocode\",\n    PR_PLAIN: \"pln\",\n    PR_PUNCTUATION: \"pun\",\n    PR_SOURCE: \"src\",\n    PR_STRING: \"str\",\n    PR_TAG: \"tag\",\n    PR_TYPE: \"typ\",\n  };\n})();\n\n// make code pretty\nwindow.prettyPrint && prettyPrint();\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/jquery.easing.1.3.js",
    "content": "/*\n * jQuery Easing v1.3 - http://gsgd.co.uk/sandbox/jquery/easing/\n *\n * Uses the built in easing capabilities added In jQuery 1.1\n * to offer multiple easing options\n *\n * TERMS OF USE - jQuery Easing\n *\n * Open source under the BSD License.\n *\n * Copyright © 2008 George McGinley Smith\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without modification,\n * are permitted provided that the following conditions are met:\n *\n * Redistributions of source code must retain the above copyright notice, this list of\n * conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice, this list\n * of conditions and the following disclaimer in the documentation and/or other materials\n * provided with the distribution.\n *\n * Neither the name of the author nor the names of contributors may be used to endorse\n * or promote products derived from this software without specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY\n * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n *  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n */\n\n// t: current time, b: begInnIng value, c: change In value, d: duration\njQuery.easing[\"jswing\"] = jQuery.easing[\"swing\"];\n\njQuery.extend(jQuery.easing, {\n  def: \"easeOutQuad\",\n  swing: function (x, t, b, c, d) {\n    //alert(jQuery.easing.default);\n    return jQuery.easing[jQuery.easing.def](x, t, b, c, d);\n  },\n  easeInQuad: function (x, t, b, c, d) {\n    return c * (t /= d) * t + b;\n  },\n  easeOutQuad: function (x, t, b, c, d) {\n    return -c * (t /= d) * (t - 2) + b;\n  },\n  easeInOutQuad: function (x, t, b, c, d) {\n    if ((t /= d / 2) < 1) return (c / 2) * t * t + b;\n    return (-c / 2) * (--t * (t - 2) - 1) + b;\n  },\n  easeInCubic: function (x, t, b, c, d) {\n    return c * (t /= d) * t * t + b;\n  },\n  easeOutCubic: function (x, t, b, c, d) {\n    return c * ((t = t / d - 1) * t * t + 1) + b;\n  },\n  easeInOutCubic: function (x, t, b, c, d) {\n    if ((t /= d / 2) < 1) return (c / 2) * t * t * t + b;\n    return (c / 2) * ((t -= 2) * t * t + 2) + b;\n  },\n  easeInQuart: function (x, t, b, c, d) {\n    return c * (t /= d) * t * t * t + b;\n  },\n  easeOutQuart: function (x, t, b, c, d) {\n    return -c * ((t = t / d - 1) * t * t * t - 1) + b;\n  },\n  easeInOutQuart: function (x, t, b, c, d) {\n    if ((t /= d / 2) < 1) return (c / 2) * t * t * t * t + b;\n    return (-c / 2) * ((t -= 2) * t * t * t - 2) + b;\n  },\n  easeInQuint: function (x, t, b, c, d) {\n    return c * (t /= d) * t * t * t * t + b;\n  },\n  easeOutQuint: function (x, t, b, c, d) {\n    return c * ((t = t / d - 1) * t * t * t * t + 1) + b;\n  },\n  easeInOutQuint: function (x, t, b, c, d) {\n    if ((t /= d / 2) < 1) return (c / 2) * t * t * t * t * t + b;\n    return (c / 2) * ((t -= 2) * t * t * t * t + 2) + b;\n  },\n  easeInSine: function (x, t, b, c, d) {\n    return -c * Math.cos((t / d) * (Math.PI / 2)) + c + b;\n  },\n  easeOutSine: function (x, t, b, c, d) {\n    return c * Math.sin((t / d) * (Math.PI / 2)) + b;\n  },\n  easeInOutSine: function (x, t, b, c, d) {\n    return (-c / 2) * (Math.cos((Math.PI * t) / d) - 1) + b;\n  },\n  easeInExpo: function (x, t, b, c, d) {\n    return t == 0 ? b : c * Math.pow(2, 10 * (t / d - 1)) + b;\n  },\n  easeOutExpo: function (x, t, b, c, d) {\n    return t == d ? b + c : c * (-Math.pow(2, (-10 * t) / d) + 1) + b;\n  },\n  easeInOutExpo: function (x, t, b, c, d) {\n    if (t == 0) return b;\n    if (t == d) return b + c;\n    if ((t /= d / 2) < 1) return (c / 2) * Math.pow(2, 10 * (t - 1)) + b;\n    return (c / 2) * (-Math.pow(2, -10 * --t) + 2) + b;\n  },\n  easeInCirc: function (x, t, b, c, d) {\n    return -c * (Math.sqrt(1 - (t /= d) * t) - 1) + b;\n  },\n  easeOutCirc: function (x, t, b, c, d) {\n    return c * Math.sqrt(1 - (t = t / d - 1) * t) + b;\n  },\n  easeInOutCirc: function (x, t, b, c, d) {\n    if ((t /= d / 2) < 1) return (-c / 2) * (Math.sqrt(1 - t * t) - 1) + b;\n    return (c / 2) * (Math.sqrt(1 - (t -= 2) * t) + 1) + b;\n  },\n  easeInElastic: function (x, t, b, c, d) {\n    var s = 1.70158;\n    var p = 0;\n    var a = c;\n    if (t == 0) return b;\n    if ((t /= d) == 1) return b + c;\n    if (!p) p = d * 0.3;\n    if (a < Math.abs(c)) {\n      a = c;\n      var s = p / 4;\n    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);\n    return (\n      -(\n        a *\n        Math.pow(2, 10 * (t -= 1)) *\n        Math.sin(((t * d - s) * (2 * Math.PI)) / p)\n      ) + b\n    );\n  },\n  easeOutElastic: function (x, t, b, c, d) {\n    var s = 1.70158;\n    var p = 0;\n    var a = c;\n    if (t == 0) return b;\n    if ((t /= d) == 1) return b + c;\n    if (!p) p = d * 0.3;\n    if (a < Math.abs(c)) {\n      a = c;\n      var s = p / 4;\n    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);\n    return (\n      a * Math.pow(2, -10 * t) * Math.sin(((t * d - s) * (2 * Math.PI)) / p) +\n      c +\n      b\n    );\n  },\n  easeInOutElastic: function (x, t, b, c, d) {\n    var s = 1.70158;\n    var p = 0;\n    var a = c;\n    if (t == 0) return b;\n    if ((t /= d / 2) == 2) return b + c;\n    if (!p) p = d * (0.3 * 1.5);\n    if (a < Math.abs(c)) {\n      a = c;\n      var s = p / 4;\n    } else var s = (p / (2 * Math.PI)) * Math.asin(c / a);\n    if (t < 1)\n      return (\n        -0.5 *\n          (a *\n            Math.pow(2, 10 * (t -= 1)) *\n            Math.sin(((t * d - s) * (2 * Math.PI)) / p)) +\n        b\n      );\n    return (\n      a *\n        Math.pow(2, -10 * (t -= 1)) *\n        Math.sin(((t * d - s) * (2 * Math.PI)) / p) *\n        0.5 +\n      c +\n      b\n    );\n  },\n  easeInBack: function (x, t, b, c, d, s) {\n    if (s == undefined) s = 1.70158;\n    return c * (t /= d) * t * ((s + 1) * t - s) + b;\n  },\n  easeOutBack: function (x, t, b, c, d, s) {\n    if (s == undefined) s = 1.70158;\n    return c * ((t = t / d - 1) * t * ((s + 1) * t + s) + 1) + b;\n  },\n  easeInOutBack: function (x, t, b, c, d, s) {\n    if (s == undefined) s = 1.70158;\n    if ((t /= d / 2) < 1)\n      return (c / 2) * (t * t * (((s *= 1.525) + 1) * t - s)) + b;\n    return (c / 2) * ((t -= 2) * t * (((s *= 1.525) + 1) * t + s) + 2) + b;\n  },\n  easeInBounce: function (x, t, b, c, d) {\n    return c - jQuery.easing.easeOutBounce(x, d - t, 0, c, d) + b;\n  },\n  easeOutBounce: function (x, t, b, c, d) {\n    if ((t /= d) < 1 / 2.75) {\n      return c * (7.5625 * t * t) + b;\n    } else if (t < 2 / 2.75) {\n      return c * (7.5625 * (t -= 1.5 / 2.75) * t + 0.75) + b;\n    } else if (t < 2.5 / 2.75) {\n      return c * (7.5625 * (t -= 2.25 / 2.75) * t + 0.9375) + b;\n    } else {\n      return c * (7.5625 * (t -= 2.625 / 2.75) * t + 0.984375) + b;\n    }\n  },\n  easeInOutBounce: function (x, t, b, c, d) {\n    if (t < d / 2)\n      return jQuery.easing.easeInBounce(x, t * 2, 0, c, d) * 0.5 + b;\n    return (\n      jQuery.easing.easeOutBounce(x, t * 2 - d, 0, c, d) * 0.5 + c * 0.5 + b\n    );\n  },\n});\n\n/*\n *\n * TERMS OF USE - EASING EQUATIONS\n *\n * Open source under the BSD License.\n *\n * Copyright © 2001 Robert Penner\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without modification,\n * are permitted provided that the following conditions are met:\n *\n * Redistributions of source code must retain the above copyright notice, this list of\n * conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice, this list\n * of conditions and the following disclaimer in the documentation and/or other materials\n * provided with the distribution.\n *\n * Neither the name of the author nor the names of contributors may be used to endorse\n * or promote products derived from this software without specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY\n * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n *  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n */\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/jquery.fancybox-media.js",
    "content": "/*!\n * Media helper for fancyBox\n * version: 1.0.5 (Tue, 23 Oct 2012)\n * @requires fancyBox v2.0 or later\n *\n * Usage:\n *     $(\".fancybox\").fancybox({\n *         helpers : {\n *             media: true\n *         }\n *     });\n *\n * Set custom URL parameters:\n *     $(\".fancybox\").fancybox({\n *         helpers : {\n *             media: {\n *                 youtube : {\n *                     params : {\n *                         autoplay : 0\n *                     }\n *                 }\n *             }\n *         }\n *     });\n *\n * Or:\n *     $(\".fancybox\").fancybox({,\n *\t       helpers : {\n *             media: true\n *         },\n *         youtube : {\n *             autoplay: 0\n *         }\n *     });\n *\n *  Supports:\n *\n *      Youtube\n *          http://www.youtube.com/watch?v=opj24KnzrWo\n *          http://www.youtube.com/embed/opj24KnzrWo\n *          http://youtu.be/opj24KnzrWo\n *      Vimeo\n *          http://vimeo.com/40648169\n *          http://vimeo.com/channels/staffpicks/38843628\n *          http://vimeo.com/groups/surrealism/videos/36516384\n *          http://player.vimeo.com/video/45074303\n *      Metacafe\n *          http://www.metacafe.com/watch/7635964/dr_seuss_the_lorax_movie_trailer/\n *          http://www.metacafe.com/watch/7635964/\n *      Dailymotion\n *          http://www.dailymotion.com/video/xoytqh_dr-seuss-the-lorax-premiere_people\n *      Twitvid\n *          http://twitvid.com/QY7MD\n *      Twitpic\n *          http://twitpic.com/7p93st\n *      Instagram\n *          http://instagr.am/p/IejkuUGxQn/\n *          http://instagram.com/p/IejkuUGxQn/\n *      Google maps\n *          http://maps.google.com/maps?q=Eiffel+Tower,+Avenue+Gustave+Eiffel,+Paris,+France&t=h&z=17\n *          http://maps.google.com/?ll=48.857995,2.294297&spn=0.007666,0.021136&t=m&z=16\n *          http://maps.google.com/?ll=48.859463,2.292626&spn=0.000965,0.002642&t=m&z=19&layer=c&cbll=48.859524,2.292532&panoid=YJ0lq28OOy3VT2IqIuVY0g&cbp=12,151.58,,0,-15.56\n */\n(function ($) {\n  \"use strict\";\n\n  //Shortcut for fancyBox object\n  var F = $.fancybox,\n    format = function (url, rez, params) {\n      params = params || \"\";\n\n      if ($.type(params) === \"object\") {\n        params = $.param(params, true);\n      }\n\n      $.each(rez, function (key, value) {\n        url = url.replace(\"$\" + key, value || \"\");\n      });\n\n      if (params.length) {\n        url += (url.indexOf(\"?\") > 0 ? \"&\" : \"?\") + params;\n      }\n\n      return url;\n    };\n\n  //Add helper object\n  F.helpers.media = {\n    defaults: {\n      youtube: {\n        matcher:\n          /(youtube\\.com|youtu\\.be)\\/(watch\\?v=|v\\/|u\\/|embed\\/?)?(videoseries\\?list=(.*)|[\\w-]{11}|\\?listType=(.*)&list=(.*)).*/i,\n        params: {\n          autoplay: 1,\n          autohide: 1,\n          fs: 1,\n          rel: 0,\n          hd: 1,\n          wmode: \"opaque\",\n          enablejsapi: 1,\n        },\n        type: \"iframe\",\n        url: \"//www.youtube.com/embed/$3\",\n      },\n      vimeo: {\n        matcher: /(?:vimeo(?:pro)?.com)\\/(?:[^\\d]+)?(\\d+)(?:.*)/,\n        params: {\n          autoplay: 1,\n          hd: 1,\n          show_title: 1,\n          show_byline: 1,\n          show_portrait: 0,\n          fullscreen: 1,\n        },\n        type: \"iframe\",\n        url: \"//player.vimeo.com/video/$1\",\n      },\n      metacafe: {\n        matcher: /metacafe.com\\/(?:watch|fplayer)\\/([\\w\\-]{1,10})/,\n        params: {\n          autoPlay: \"yes\",\n        },\n        type: \"swf\",\n        url: function (rez, params, obj) {\n          obj.swf.flashVars = \"playerVars=\" + $.param(params, true);\n\n          return \"//www.metacafe.com/fplayer/\" + rez[1] + \"/.swf\";\n        },\n      },\n      dailymotion: {\n        matcher: /dailymotion.com\\/video\\/(.*)\\/?(.*)/,\n        params: {\n          additionalInfos: 0,\n          autoStart: 1,\n        },\n        type: \"swf\",\n        url: \"//www.dailymotion.com/swf/video/$1\",\n      },\n      twitvid: {\n        matcher: /twitvid\\.com\\/([a-zA-Z0-9_\\-\\?\\=]+)/i,\n        params: {\n          autoplay: 0,\n        },\n        type: \"iframe\",\n        url: \"//www.twitvid.com/embed.php?guid=$1\",\n      },\n      twitpic: {\n        matcher:\n          /twitpic\\.com\\/(?!(?:place|photos|events)\\/)([a-zA-Z0-9\\?\\=\\-]+)/i,\n        type: \"image\",\n        url: \"//twitpic.com/show/full/$1/\",\n      },\n      instagram: {\n        matcher: /(instagr\\.am|instagram\\.com)\\/p\\/([a-zA-Z0-9_\\-]+)\\/?/i,\n        type: \"image\",\n        url: \"//$1/p/$2/media/\",\n      },\n      google_maps: {\n        matcher: /maps\\.google\\.([a-z]{2,3}(\\.[a-z]{2})?)\\/(\\?ll=|maps\\?)(.*)/i,\n        type: \"iframe\",\n        url: function (rez) {\n          return (\n            \"//maps.google.\" +\n            rez[1] +\n            \"/\" +\n            rez[3] +\n            \"\" +\n            rez[4] +\n            \"&output=\" +\n            (rez[4].indexOf(\"layer=c\") > 0 ? \"svembed\" : \"embed\")\n          );\n        },\n      },\n    },\n\n    beforeLoad: function (opts, obj) {\n      var url = obj.href || \"\",\n        type = false,\n        what,\n        item,\n        rez,\n        params;\n\n      for (what in opts) {\n        item = opts[what];\n        rez = url.match(item.matcher);\n\n        if (rez) {\n          type = item.type;\n          params = $.extend(\n            true,\n            {},\n            item.params,\n            obj[what] ||\n              ($.isPlainObject(opts[what]) ? opts[what].params : null),\n          );\n\n          url =\n            $.type(item.url) === \"function\"\n              ? item.url.call(this, rez, params, obj)\n              : format(item.url, rez, params);\n\n          break;\n        }\n      }\n\n      if (type) {\n        obj.href = url;\n        obj.type = type;\n\n        obj.autoHeight = false;\n      }\n    },\n  };\n})(jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/jquery.fancybox.pack.js",
    "content": "/*! fancyBox v2.1.4 fancyapps.com | fancyapps.com/fancybox/#license */\n(function (C, z, f, r) {\n  var q = f(C),\n    n = f(z),\n    b = (f.fancybox = function () {\n      b.open.apply(this, arguments);\n    }),\n    H = navigator.userAgent.match(/msie/),\n    w = null,\n    s = z.createTouch !== r,\n    t = function (a) {\n      return a && a.hasOwnProperty && a instanceof f;\n    },\n    p = function (a) {\n      return a && \"string\" === f.type(a);\n    },\n    F = function (a) {\n      return p(a) && 0 < a.indexOf(\"%\");\n    },\n    l = function (a, d) {\n      var e = parseInt(a, 10) || 0;\n      d && F(a) && (e *= b.getViewport()[d] / 100);\n      return Math.ceil(e);\n    },\n    x = function (a, b) {\n      return l(a, b) + \"px\";\n    };\n  f.extend(b, {\n    version: \"2.1.4\",\n    defaults: {\n      padding: 15,\n      margin: 20,\n      width: 800,\n      height: 600,\n      minWidth: 100,\n      minHeight: 100,\n      maxWidth: 9999,\n      maxHeight: 9999,\n      autoSize: !0,\n      autoHeight: !1,\n      autoWidth: !1,\n      autoResize: !0,\n      autoCenter: !s,\n      fitToView: !0,\n      aspectRatio: !1,\n      topRatio: 0.5,\n      leftRatio: 0.5,\n      scrolling: \"auto\",\n      wrapCSS: \"\",\n      arrows: !0,\n      closeBtn: !0,\n      closeClick: !1,\n      nextClick: !1,\n      mouseWheel: !0,\n      autoPlay: !1,\n      playSpeed: 3e3,\n      preload: 3,\n      modal: !1,\n      loop: !0,\n      ajax: { dataType: \"html\", headers: { \"X-fancyBox\": !0 } },\n      iframe: { scrolling: \"auto\", preload: !0 },\n      swf: {\n        wmode: \"transparent\",\n        allowfullscreen: \"true\",\n        allowscriptaccess: \"always\",\n      },\n      keys: {\n        next: { 13: \"left\", 34: \"up\", 39: \"left\", 40: \"up\" },\n        prev: { 8: \"right\", 33: \"down\", 37: \"right\", 38: \"down\" },\n        close: [27],\n        play: [32],\n        toggle: [70],\n      },\n      direction: { next: \"left\", prev: \"right\" },\n      scrollOutside: !0,\n      index: 0,\n      type: null,\n      href: null,\n      content: null,\n      title: null,\n      tpl: {\n        wrap: '<div class=\"fancybox-wrap\" tabIndex=\"-1\"><div class=\"fancybox-skin\"><div class=\"fancybox-outer\"><div class=\"fancybox-inner\"></div></div></div></div>',\n        image: '<img class=\"fancybox-image\" src=\"{href}\" alt=\"\" />',\n        iframe:\n          '<iframe id=\"fancybox-frame{rnd}\" name=\"fancybox-frame{rnd}\" class=\"fancybox-iframe\" frameborder=\"0\" vspace=\"0\" hspace=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen' +\n          (H ? ' allowtransparency=\"true\"' : \"\") +\n          \"></iframe>\",\n        error:\n          '<p class=\"fancybox-error\">The requested content cannot be loaded.<br/>Please try again later.</p>',\n        closeBtn:\n          '<a title=\"Close\" class=\"fancybox-item fancybox-close\" href=\"javascript:;\"><i class=\"font-icon-remove\"></i></a>',\n        next: '<a title=\"Next\" class=\"fancybox-nav fancybox-next\" href=\"javascript:;\"><span><i class=\"font-icon-arrow-simple-right\"></i></span></a>',\n        prev: '<a title=\"Previous\" class=\"fancybox-nav fancybox-prev\" href=\"javascript:;\"><span><i class=\"font-icon-arrow-simple-left\"></i></span></a>',\n      },\n      openEffect: \"fade\",\n      openSpeed: 250,\n      openEasing: \"swing\",\n      openOpacity: !0,\n      openMethod: \"zoomIn\",\n      closeEffect: \"fade\",\n      closeSpeed: 250,\n      closeEasing: \"swing\",\n      closeOpacity: !0,\n      closeMethod: \"zoomOut\",\n      nextEffect: \"elastic\",\n      nextSpeed: 250,\n      nextEasing: \"swing\",\n      nextMethod: \"changeIn\",\n      prevEffect: \"elastic\",\n      prevSpeed: 250,\n      prevEasing: \"swing\",\n      prevMethod: \"changeOut\",\n      helpers: { overlay: !0, title: !0 },\n      onCancel: f.noop,\n      beforeLoad: f.noop,\n      afterLoad: f.noop,\n      beforeShow: f.noop,\n      afterShow: f.noop,\n      beforeChange: f.noop,\n      beforeClose: f.noop,\n      afterClose: f.noop,\n    },\n    group: {},\n    opts: {},\n    previous: null,\n    coming: null,\n    current: null,\n    isActive: !1,\n    isOpen: !1,\n    isOpened: !1,\n    wrap: null,\n    skin: null,\n    outer: null,\n    inner: null,\n    player: { timer: null, isActive: !1 },\n    ajaxLoad: null,\n    imgPreload: null,\n    transitions: {},\n    helpers: {},\n    open: function (a, d) {\n      if (a && (f.isPlainObject(d) || (d = {}), !1 !== b.close(!0)))\n        return (\n          f.isArray(a) || (a = t(a) ? f(a).get() : [a]),\n          f.each(a, function (e, c) {\n            var k = {},\n              g,\n              h,\n              j,\n              m,\n              l;\n            \"object\" === f.type(c) &&\n              (c.nodeType && (c = f(c)),\n              t(c)\n                ? ((k = {\n                    href: c.data(\"fancybox-href\") || c.attr(\"href\"),\n                    title: c.data(\"fancybox-title\") || c.attr(\"title\"),\n                    isDom: !0,\n                    element: c,\n                  }),\n                  f.metadata && f.extend(!0, k, c.metadata()))\n                : (k = c));\n            g = d.href || k.href || (p(c) ? c : null);\n            h = d.title !== r ? d.title : k.title || \"\";\n            m = (j = d.content || k.content) ? \"html\" : d.type || k.type;\n            !m &&\n              k.isDom &&\n              ((m = c.data(\"fancybox-type\")),\n              m ||\n                (m = (m = c.prop(\"class\").match(/fancybox\\.(\\w+)/))\n                  ? m[1]\n                  : null));\n            p(g) &&\n              (m ||\n                (b.isImage(g)\n                  ? (m = \"image\")\n                  : b.isSWF(g)\n                    ? (m = \"swf\")\n                    : \"#\" === g.charAt(0)\n                      ? (m = \"inline\")\n                      : p(c) && ((m = \"html\"), (j = c))),\n              \"ajax\" === m &&\n                ((l = g.split(/\\s+/, 2)), (g = l.shift()), (l = l.shift())));\n            j ||\n              (\"inline\" === m\n                ? g\n                  ? (j = f(p(g) ? g.replace(/.*(?=#[^\\s]+$)/, \"\") : g))\n                  : k.isDom && (j = c)\n                : \"html\" === m\n                  ? (j = g)\n                  : !m && !g && k.isDom && ((m = \"inline\"), (j = c)));\n            f.extend(k, {\n              href: g,\n              type: m,\n              content: j,\n              title: h,\n              selector: l,\n            });\n            a[e] = k;\n          }),\n          (b.opts = f.extend(!0, {}, b.defaults, d)),\n          d.keys !== r &&\n            (b.opts.keys = d.keys ? f.extend({}, b.defaults.keys, d.keys) : !1),\n          (b.group = a),\n          b._start(b.opts.index)\n        );\n    },\n    cancel: function () {\n      var a = b.coming;\n      a &&\n        !1 !== b.trigger(\"onCancel\") &&\n        (b.hideLoading(),\n        b.ajaxLoad && b.ajaxLoad.abort(),\n        (b.ajaxLoad = null),\n        b.imgPreload && (b.imgPreload.onload = b.imgPreload.onerror = null),\n        a.wrap && a.wrap.stop(!0, !0).trigger(\"onReset\").remove(),\n        (b.coming = null),\n        b.current || b._afterZoomOut(a));\n    },\n    close: function (a) {\n      b.cancel();\n      !1 !== b.trigger(\"beforeClose\") &&\n        (b.unbindEvents(),\n        b.isActive &&\n          (!b.isOpen || !0 === a\n            ? (f(\".fancybox-wrap\").stop(!0).trigger(\"onReset\").remove(),\n              b._afterZoomOut())\n            : ((b.isOpen = b.isOpened = !1),\n              (b.isClosing = !0),\n              f(\".fancybox-item, .fancybox-nav\").remove(),\n              b.wrap.stop(!0, !0).removeClass(\"fancybox-opened\"),\n              b.transitions[b.current.closeMethod]())));\n    },\n    play: function (a) {\n      var d = function () {\n          clearTimeout(b.player.timer);\n        },\n        e = function () {\n          d();\n          b.current &&\n            b.player.isActive &&\n            (b.player.timer = setTimeout(b.next, b.current.playSpeed));\n        },\n        c = function () {\n          d();\n          f(\"body\").unbind(\".player\");\n          b.player.isActive = !1;\n          b.trigger(\"onPlayEnd\");\n        };\n      if (!0 === a || (!b.player.isActive && !1 !== a)) {\n        if (\n          b.current &&\n          (b.current.loop || b.current.index < b.group.length - 1)\n        )\n          (b.player.isActive = !0),\n            f(\"body\").bind({\n              \"afterShow.player onUpdate.player\": e,\n              \"onCancel.player beforeClose.player\": c,\n              \"beforeLoad.player\": d,\n            }),\n            e(),\n            b.trigger(\"onPlayStart\");\n      } else c();\n    },\n    next: function (a) {\n      var d = b.current;\n      d && (p(a) || (a = d.direction.next), b.jumpto(d.index + 1, a, \"next\"));\n    },\n    prev: function (a) {\n      var d = b.current;\n      d && (p(a) || (a = d.direction.prev), b.jumpto(d.index - 1, a, \"prev\"));\n    },\n    jumpto: function (a, d, e) {\n      var c = b.current;\n      c &&\n        ((a = l(a)),\n        (b.direction = d || c.direction[a >= c.index ? \"next\" : \"prev\"]),\n        (b.router = e || \"jumpto\"),\n        c.loop &&\n          (0 > a && (a = c.group.length + (a % c.group.length)),\n          (a %= c.group.length)),\n        c.group[a] !== r && (b.cancel(), b._start(a)));\n    },\n    reposition: function (a, d) {\n      var e = b.current,\n        c = e ? e.wrap : null,\n        k;\n      c &&\n        ((k = b._getPosition(d)),\n        a && \"scroll\" === a.type\n          ? (delete k.position, c.stop(!0, !0).animate(k, 200))\n          : (c.css(k), (e.pos = f.extend({}, e.dim, k))));\n    },\n    update: function (a) {\n      var d = a && a.type,\n        e = !d || \"orientationchange\" === d;\n      e && (clearTimeout(w), (w = null));\n      b.isOpen &&\n        !w &&\n        (w = setTimeout(\n          function () {\n            var c = b.current;\n            c &&\n              !b.isClosing &&\n              (b.wrap.removeClass(\"fancybox-tmp\"),\n              (e || \"load\" === d || (\"resize\" === d && c.autoResize)) &&\n                b._setDimension(),\n              (\"scroll\" === d && c.canShrink) || b.reposition(a),\n              b.trigger(\"onUpdate\"),\n              (w = null));\n          },\n          e && !s ? 0 : 300,\n        ));\n    },\n    toggle: function (a) {\n      b.isOpen &&\n        ((b.current.fitToView =\n          \"boolean\" === f.type(a) ? a : !b.current.fitToView),\n        s &&\n          (b.wrap.removeAttr(\"style\").addClass(\"fancybox-tmp\"),\n          b.trigger(\"onUpdate\")),\n        b.update());\n    },\n    hideLoading: function () {\n      n.unbind(\".loading\");\n      f(\"#fancybox-loading\").remove();\n    },\n    showLoading: function () {\n      var a, d;\n      b.hideLoading();\n      a = f('<div id=\"fancybox-loading\"><div></div></div>')\n        .click(b.cancel)\n        .appendTo(\"body\");\n      n.bind(\"keydown.loading\", function (a) {\n        if (27 === (a.which || a.keyCode)) a.preventDefault(), b.cancel();\n      });\n      b.defaults.fixed ||\n        ((d = b.getViewport()),\n        a.css({\n          position: \"absolute\",\n          top: 0.5 * d.h + d.y,\n          left: 0.5 * d.w + d.x,\n        }));\n    },\n    getViewport: function () {\n      var a = (b.current && b.current.locked) || !1,\n        d = { x: q.scrollLeft(), y: q.scrollTop() };\n      a\n        ? ((d.w = a[0].clientWidth), (d.h = a[0].clientHeight))\n        : ((d.w = s && C.innerWidth ? C.innerWidth : q.width()),\n          (d.h = s && C.innerHeight ? C.innerHeight : q.height()));\n      return d;\n    },\n    unbindEvents: function () {\n      b.wrap && t(b.wrap) && b.wrap.unbind(\".fb\");\n      n.unbind(\".fb\");\n      q.unbind(\".fb\");\n    },\n    bindEvents: function () {\n      var a = b.current,\n        d;\n      a &&\n        (q.bind(\n          \"orientationchange.fb\" +\n            (s ? \"\" : \" resize.fb\") +\n            (a.autoCenter && !a.locked ? \" scroll.fb\" : \"\"),\n          b.update,\n        ),\n        (d = a.keys) &&\n          n.bind(\"keydown.fb\", function (e) {\n            var c = e.which || e.keyCode,\n              k = e.target || e.srcElement;\n            if (27 === c && b.coming) return !1;\n            !e.ctrlKey &&\n              !e.altKey &&\n              !e.shiftKey &&\n              !e.metaKey &&\n              (!k || (!k.type && !f(k).is(\"[contenteditable]\"))) &&\n              f.each(d, function (d, k) {\n                if (1 < a.group.length && k[c] !== r)\n                  return b[d](k[c]), e.preventDefault(), !1;\n                if (-1 < f.inArray(c, k)) return b[d](), e.preventDefault(), !1;\n              });\n          }),\n        f.fn.mousewheel &&\n          a.mouseWheel &&\n          b.wrap.bind(\"mousewheel.fb\", function (d, c, k, g) {\n            for (\n              var h = f(d.target || null), j = !1;\n              h.length &&\n              !j &&\n              !h.is(\".fancybox-skin\") &&\n              !h.is(\".fancybox-wrap\");\n\n            )\n              (j =\n                h[0] &&\n                !(h[0].style.overflow && \"hidden\" === h[0].style.overflow) &&\n                ((h[0].clientWidth && h[0].scrollWidth > h[0].clientWidth) ||\n                  (h[0].clientHeight &&\n                    h[0].scrollHeight > h[0].clientHeight))),\n                (h = f(h).parent());\n            if (0 !== c && !j && 1 < b.group.length && !a.canShrink) {\n              if (0 < g || 0 < k) b.prev(0 < g ? \"down\" : \"left\");\n              else if (0 > g || 0 > k) b.next(0 > g ? \"up\" : \"right\");\n              d.preventDefault();\n            }\n          }));\n    },\n    trigger: function (a, d) {\n      var e,\n        c = d || b.coming || b.current;\n      if (c) {\n        f.isFunction(c[a]) &&\n          (e = c[a].apply(c, Array.prototype.slice.call(arguments, 1)));\n        if (!1 === e) return !1;\n        c.helpers &&\n          f.each(c.helpers, function (d, e) {\n            e &&\n              b.helpers[d] &&\n              f.isFunction(b.helpers[d][a]) &&\n              ((e = f.extend(!0, {}, b.helpers[d].defaults, e)),\n              b.helpers[d][a](e, c));\n          });\n        f.event.trigger(a + \".fb\");\n      }\n    },\n    isImage: function (a) {\n      return (\n        p(a) &&\n        a.match(\n          /(^data:image\\/.*,)|(\\.(jp(e|g|eg)|gif|png|bmp|webp)((\\?|#).*)?$)/i,\n        )\n      );\n    },\n    isSWF: function (a) {\n      return p(a) && a.match(/\\.(swf)((\\?|#).*)?$/i);\n    },\n    _start: function (a) {\n      var d = {},\n        e,\n        c;\n      a = l(a);\n      e = b.group[a] || null;\n      if (!e) return !1;\n      d = f.extend(!0, {}, b.opts, e);\n      e = d.margin;\n      c = d.padding;\n      \"number\" === f.type(e) && (d.margin = [e, e, e, e]);\n      \"number\" === f.type(c) && (d.padding = [c, c, c, c]);\n      d.modal &&\n        f.extend(!0, d, {\n          closeBtn: !1,\n          closeClick: !1,\n          nextClick: !1,\n          arrows: !1,\n          mouseWheel: !1,\n          keys: null,\n          helpers: { overlay: { closeClick: !1 } },\n        });\n      d.autoSize && (d.autoWidth = d.autoHeight = !0);\n      \"auto\" === d.width && (d.autoWidth = !0);\n      \"auto\" === d.height && (d.autoHeight = !0);\n      d.group = b.group;\n      d.index = a;\n      b.coming = d;\n      if (!1 === b.trigger(\"beforeLoad\")) b.coming = null;\n      else {\n        c = d.type;\n        e = d.href;\n        if (!c)\n          return (\n            (b.coming = null),\n            b.current && b.router && \"jumpto\" !== b.router\n              ? ((b.current.index = a), b[b.router](b.direction))\n              : !1\n          );\n        b.isActive = !0;\n        if (\"image\" === c || \"swf\" === c)\n          (d.autoHeight = d.autoWidth = !1), (d.scrolling = \"visible\");\n        \"image\" === c && (d.aspectRatio = !0);\n        \"iframe\" === c && s && (d.scrolling = \"scroll\");\n        d.wrap = f(d.tpl.wrap)\n          .addClass(\n            \"fancybox-\" +\n              (s ? \"mobile\" : \"desktop\") +\n              \" fancybox-type-\" +\n              c +\n              \" fancybox-tmp \" +\n              d.wrapCSS,\n          )\n          .appendTo(d.parent || \"body\");\n        f.extend(d, {\n          skin: f(\".fancybox-skin\", d.wrap),\n          outer: f(\".fancybox-outer\", d.wrap),\n          inner: f(\".fancybox-inner\", d.wrap),\n        });\n        f.each([\"Top\", \"Right\", \"Bottom\", \"Left\"], function (a, b) {\n          d.skin.css(\"padding\" + b, x(d.padding[a]));\n        });\n        b.trigger(\"onReady\");\n        if (\"inline\" === c || \"html\" === c) {\n          if (!d.content || !d.content.length) return b._error(\"content\");\n        } else if (!e) return b._error(\"href\");\n        \"image\" === c\n          ? b._loadImage()\n          : \"ajax\" === c\n            ? b._loadAjax()\n            : \"iframe\" === c\n              ? b._loadIframe()\n              : b._afterLoad();\n      }\n    },\n    _error: function (a) {\n      f.extend(b.coming, {\n        type: \"html\",\n        autoWidth: !0,\n        autoHeight: !0,\n        minWidth: 0,\n        minHeight: 0,\n        scrolling: \"no\",\n        hasError: a,\n        content: b.coming.tpl.error,\n      });\n      b._afterLoad();\n    },\n    _loadImage: function () {\n      var a = (b.imgPreload = new Image());\n      a.onload = function () {\n        this.onload = this.onerror = null;\n        b.coming.width = this.width;\n        b.coming.height = this.height;\n        b._afterLoad();\n      };\n      a.onerror = function () {\n        this.onload = this.onerror = null;\n        b._error(\"image\");\n      };\n      a.src = b.coming.href;\n      !0 !== a.complete && b.showLoading();\n    },\n    _loadAjax: function () {\n      var a = b.coming;\n      b.showLoading();\n      b.ajaxLoad = f.ajax(\n        f.extend({}, a.ajax, {\n          url: a.href,\n          error: function (a, e) {\n            b.coming && \"abort\" !== e ? b._error(\"ajax\", a) : b.hideLoading();\n          },\n          success: function (d, e) {\n            \"success\" === e && ((a.content = d), b._afterLoad());\n          },\n        }),\n      );\n    },\n    _loadIframe: function () {\n      var a = b.coming,\n        d = f(a.tpl.iframe.replace(/\\{rnd\\}/g, new Date().getTime()))\n          .attr(\"scrolling\", s ? \"auto\" : a.iframe.scrolling)\n          .attr(\"src\", a.href);\n      f(a.wrap).bind(\"onReset\", function () {\n        try {\n          f(this)\n            .find(\"iframe\")\n            .hide()\n            .attr(\"src\", \"//about:blank\")\n            .end()\n            .empty();\n        } catch (a) {}\n      });\n      a.iframe.preload &&\n        (b.showLoading(),\n        d.one(\"load\", function () {\n          f(this).data(\"ready\", 1);\n          s || f(this).bind(\"load.fb\", b.update);\n          f(this)\n            .parents(\".fancybox-wrap\")\n            .width(\"100%\")\n            .removeClass(\"fancybox-tmp\")\n            .show();\n          b._afterLoad();\n        }));\n      a.content = d.appendTo(a.inner);\n      a.iframe.preload || b._afterLoad();\n    },\n    _preloadImages: function () {\n      var a = b.group,\n        d = b.current,\n        e = a.length,\n        c = d.preload ? Math.min(d.preload, e - 1) : 0,\n        f,\n        g;\n      for (g = 1; g <= c; g += 1)\n        (f = a[(d.index + g) % e]),\n          \"image\" === f.type && f.href && (new Image().src = f.href);\n    },\n    _afterLoad: function () {\n      var a = b.coming,\n        d = b.current,\n        e,\n        c,\n        k,\n        g,\n        h;\n      b.hideLoading();\n      if (a && !1 !== b.isActive)\n        if (!1 === b.trigger(\"afterLoad\", a, d))\n          a.wrap.stop(!0).trigger(\"onReset\").remove(), (b.coming = null);\n        else {\n          d &&\n            (b.trigger(\"beforeChange\", d),\n            d.wrap\n              .stop(!0)\n              .removeClass(\"fancybox-opened\")\n              .find(\".fancybox-item, .fancybox-nav\")\n              .remove());\n          b.unbindEvents();\n          e = a.content;\n          c = a.type;\n          k = a.scrolling;\n          f.extend(b, {\n            wrap: a.wrap,\n            skin: a.skin,\n            outer: a.outer,\n            inner: a.inner,\n            current: a,\n            previous: d,\n          });\n          g = a.href;\n          switch (c) {\n            case \"inline\":\n            case \"ajax\":\n            case \"html\":\n              a.selector\n                ? (e = f(\"<div>\").html(e).find(a.selector))\n                : t(e) &&\n                  (e.data(\"fancybox-placeholder\") ||\n                    e.data(\n                      \"fancybox-placeholder\",\n                      f('<div class=\"fancybox-placeholder\"></div>')\n                        .insertAfter(e)\n                        .hide(),\n                    ),\n                  (e = e.show().detach()),\n                  a.wrap.bind(\"onReset\", function () {\n                    f(this).find(e).length &&\n                      e\n                        .hide()\n                        .replaceAll(e.data(\"fancybox-placeholder\"))\n                        .data(\"fancybox-placeholder\", !1);\n                  }));\n              break;\n            case \"image\":\n              e = a.tpl.image.replace(\"{href}\", g);\n              break;\n            case \"swf\":\n              (e =\n                '<object id=\"fancybox-swf\" classid=\"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000\" width=\"100%\" height=\"100%\"><param name=\"movie\" value=\"' +\n                g +\n                '\"></param>'),\n                (h = \"\"),\n                f.each(a.swf, function (a, b) {\n                  e += '<param name=\"' + a + '\" value=\"' + b + '\"></param>';\n                  h += \" \" + a + '=\"' + b + '\"';\n                }),\n                (e +=\n                  '<embed src=\"' +\n                  g +\n                  '\" type=\"application/x-shockwave-flash\" width=\"100%\" height=\"100%\"' +\n                  h +\n                  \"></embed></object>\");\n          }\n          (!t(e) || !e.parent().is(a.inner)) && a.inner.append(e);\n          b.trigger(\"beforeShow\");\n          a.inner.css(\n            \"overflow\",\n            \"yes\" === k ? \"scroll\" : \"no\" === k ? \"hidden\" : k,\n          );\n          b._setDimension();\n          b.reposition();\n          b.isOpen = !1;\n          b.coming = null;\n          b.bindEvents();\n          if (b.isOpened) {\n            if (d.prevMethod) b.transitions[d.prevMethod]();\n          } else\n            f(\".fancybox-wrap\")\n              .not(a.wrap)\n              .stop(!0)\n              .trigger(\"onReset\")\n              .remove();\n          b.transitions[b.isOpened ? a.nextMethod : a.openMethod]();\n          b._preloadImages();\n        }\n    },\n    _setDimension: function () {\n      var a = b.getViewport(),\n        d = 0,\n        e = !1,\n        c = !1,\n        e = b.wrap,\n        k = b.skin,\n        g = b.inner,\n        h = b.current,\n        c = h.width,\n        j = h.height,\n        m = h.minWidth,\n        u = h.minHeight,\n        n = h.maxWidth,\n        v = h.maxHeight,\n        s = h.scrolling,\n        q = h.scrollOutside ? h.scrollbarWidth : 0,\n        y = h.margin,\n        p = l(y[1] + y[3]),\n        r = l(y[0] + y[2]),\n        z,\n        A,\n        t,\n        D,\n        B,\n        G,\n        C,\n        E,\n        w;\n      e.add(k).add(g).width(\"auto\").height(\"auto\").removeClass(\"fancybox-tmp\");\n      y = l(k.outerWidth(!0) - k.width());\n      z = l(k.outerHeight(!0) - k.height());\n      A = p + y;\n      t = r + z;\n      D = F(c) ? ((a.w - A) * l(c)) / 100 : c;\n      B = F(j) ? ((a.h - t) * l(j)) / 100 : j;\n      if (\"iframe\" === h.type) {\n        if (((w = h.content), h.autoHeight && 1 === w.data(\"ready\")))\n          try {\n            w[0].contentWindow.document.location &&\n              (g.width(D).height(9999),\n              (G = w.contents().find(\"body\")),\n              q && G.css(\"overflow-x\", \"hidden\"),\n              (B = G.height()));\n          } catch (H) {}\n      } else if (h.autoWidth || h.autoHeight)\n        g.addClass(\"fancybox-tmp\"),\n          h.autoWidth || g.width(D),\n          h.autoHeight || g.height(B),\n          h.autoWidth && (D = g.width()),\n          h.autoHeight && (B = g.height()),\n          g.removeClass(\"fancybox-tmp\");\n      c = l(D);\n      j = l(B);\n      E = D / B;\n      m = l(F(m) ? l(m, \"w\") - A : m);\n      n = l(F(n) ? l(n, \"w\") - A : n);\n      u = l(F(u) ? l(u, \"h\") - t : u);\n      v = l(F(v) ? l(v, \"h\") - t : v);\n      G = n;\n      C = v;\n      h.fitToView && ((n = Math.min(a.w - A, n)), (v = Math.min(a.h - t, v)));\n      A = a.w - p;\n      r = a.h - r;\n      h.aspectRatio\n        ? (c > n && ((c = n), (j = l(c / E))),\n          j > v && ((j = v), (c = l(j * E))),\n          c < m && ((c = m), (j = l(c / E))),\n          j < u && ((j = u), (c = l(j * E))))\n        : ((c = Math.max(m, Math.min(c, n))),\n          h.autoHeight && \"iframe\" !== h.type && (g.width(c), (j = g.height())),\n          (j = Math.max(u, Math.min(j, v))));\n      if (h.fitToView)\n        if (\n          (g.width(c).height(j),\n          e.width(c + y),\n          (a = e.width()),\n          (p = e.height()),\n          h.aspectRatio)\n        )\n          for (; (a > A || p > r) && c > m && j > u && !(19 < d++); )\n            (j = Math.max(u, Math.min(v, j - 10))),\n              (c = l(j * E)),\n              c < m && ((c = m), (j = l(c / E))),\n              c > n && ((c = n), (j = l(c / E))),\n              g.width(c).height(j),\n              e.width(c + y),\n              (a = e.width()),\n              (p = e.height());\n        else\n          (c = Math.max(m, Math.min(c, c - (a - A)))),\n            (j = Math.max(u, Math.min(j, j - (p - r))));\n      q && \"auto\" === s && j < B && c + y + q < A && (c += q);\n      g.width(c).height(j);\n      e.width(c + y);\n      a = e.width();\n      p = e.height();\n      e = (a > A || p > r) && c > m && j > u;\n      c = h.aspectRatio\n        ? c < G && j < C && c < D && j < B\n        : (c < G || j < C) && (c < D || j < B);\n      f.extend(h, {\n        dim: { width: x(a), height: x(p) },\n        origWidth: D,\n        origHeight: B,\n        canShrink: e,\n        canExpand: c,\n        wPadding: y,\n        hPadding: z,\n        wrapSpace: p - k.outerHeight(!0),\n        skinSpace: k.height() - j,\n      });\n      !w && h.autoHeight && j > u && j < v && !c && g.height(\"auto\");\n    },\n    _getPosition: function (a) {\n      var d = b.current,\n        e = b.getViewport(),\n        c = d.margin,\n        f = b.wrap.width() + c[1] + c[3],\n        g = b.wrap.height() + c[0] + c[2],\n        c = { position: \"absolute\", top: c[0], left: c[3] };\n      d.autoCenter && d.fixed && !a && g <= e.h && f <= e.w\n        ? (c.position = \"fixed\")\n        : d.locked || ((c.top += e.y), (c.left += e.x));\n      c.top = x(Math.max(c.top, c.top + (e.h - g) * d.topRatio));\n      c.left = x(Math.max(c.left, c.left + (e.w - f) * d.leftRatio));\n      return c;\n    },\n    _afterZoomIn: function () {\n      var a = b.current;\n      a &&\n        ((b.isOpen = b.isOpened = !0),\n        b.wrap.css(\"overflow\", \"visible\").addClass(\"fancybox-opened\"),\n        b.update(),\n        (a.closeClick || (a.nextClick && 1 < b.group.length)) &&\n          b.inner.css(\"cursor\", \"pointer\").bind(\"click.fb\", function (d) {\n            !f(d.target).is(\"a\") &&\n              !f(d.target).parent().is(\"a\") &&\n              (d.preventDefault(), b[a.closeClick ? \"close\" : \"next\"]());\n          }),\n        a.closeBtn &&\n          f(a.tpl.closeBtn)\n            .appendTo(b.skin)\n            .bind(\"click.fb\", function (a) {\n              a.preventDefault();\n              b.close();\n            }),\n        a.arrows &&\n          1 < b.group.length &&\n          ((a.loop || 0 < a.index) &&\n            f(a.tpl.prev).appendTo(b.outer).bind(\"click.fb\", b.prev),\n          (a.loop || a.index < b.group.length - 1) &&\n            f(a.tpl.next).appendTo(b.outer).bind(\"click.fb\", b.next)),\n        b.trigger(\"afterShow\"),\n        !a.loop && a.index === a.group.length - 1\n          ? b.play(!1)\n          : b.opts.autoPlay &&\n            !b.player.isActive &&\n            ((b.opts.autoPlay = !1), b.play()));\n    },\n    _afterZoomOut: function (a) {\n      a = a || b.current;\n      f(\".fancybox-wrap\").trigger(\"onReset\").remove();\n      f.extend(b, {\n        group: {},\n        opts: {},\n        router: !1,\n        current: null,\n        isActive: !1,\n        isOpened: !1,\n        isOpen: !1,\n        isClosing: !1,\n        wrap: null,\n        skin: null,\n        outer: null,\n        inner: null,\n      });\n      b.trigger(\"afterClose\", a);\n    },\n  });\n  b.transitions = {\n    getOrigPosition: function () {\n      var a = b.current,\n        d = a.element,\n        e = a.orig,\n        c = {},\n        f = 50,\n        g = 50,\n        h = a.hPadding,\n        j = a.wPadding,\n        m = b.getViewport();\n      !e &&\n        a.isDom &&\n        d.is(\":visible\") &&\n        ((e = d.find(\"img:first\")), e.length || (e = d));\n      t(e)\n        ? ((c = e.offset()),\n          e.is(\"img\") && ((f = e.outerWidth()), (g = e.outerHeight())))\n        : ((c.top = m.y + (m.h - g) * a.topRatio),\n          (c.left = m.x + (m.w - f) * a.leftRatio));\n      if (\"fixed\" === b.wrap.css(\"position\") || a.locked)\n        (c.top -= m.y), (c.left -= m.x);\n      return (c = {\n        top: x(c.top - h * a.topRatio),\n        left: x(c.left - j * a.leftRatio),\n        width: x(f + j),\n        height: x(g + h),\n      });\n    },\n    step: function (a, d) {\n      var e,\n        c,\n        f = d.prop;\n      c = b.current;\n      var g = c.wrapSpace,\n        h = c.skinSpace;\n      if (\"width\" === f || \"height\" === f)\n        (e = d.end === d.start ? 1 : (a - d.start) / (d.end - d.start)),\n          b.isClosing && (e = 1 - e),\n          (c = \"width\" === f ? c.wPadding : c.hPadding),\n          (c = a - c),\n          b.skin[f](l(\"width\" === f ? c : c - g * e)),\n          b.inner[f](l(\"width\" === f ? c : c - g * e - h * e));\n    },\n    zoomIn: function () {\n      var a = b.current,\n        d = a.pos,\n        e = a.openEffect,\n        c = \"elastic\" === e,\n        k = f.extend({ opacity: 1 }, d);\n      delete k.position;\n      c\n        ? ((d = this.getOrigPosition()), a.openOpacity && (d.opacity = 0.1))\n        : \"fade\" === e && (d.opacity = 0.1);\n      b.wrap.css(d).animate(k, {\n        duration: \"none\" === e ? 0 : a.openSpeed,\n        easing: a.openEasing,\n        step: c ? this.step : null,\n        complete: b._afterZoomIn,\n      });\n    },\n    zoomOut: function () {\n      var a = b.current,\n        d = a.closeEffect,\n        e = \"elastic\" === d,\n        c = { opacity: 0.1 };\n      e && ((c = this.getOrigPosition()), a.closeOpacity && (c.opacity = 0.1));\n      b.wrap.animate(c, {\n        duration: \"none\" === d ? 0 : a.closeSpeed,\n        easing: a.closeEasing,\n        step: e ? this.step : null,\n        complete: b._afterZoomOut,\n      });\n    },\n    changeIn: function () {\n      var a = b.current,\n        d = a.nextEffect,\n        e = a.pos,\n        c = { opacity: 1 },\n        f = b.direction,\n        g;\n      e.opacity = 0.1;\n      \"elastic\" === d &&\n        ((g = \"down\" === f || \"up\" === f ? \"top\" : \"left\"),\n        \"down\" === f || \"right\" === f\n          ? ((e[g] = x(l(e[g]) - 200)), (c[g] = \"+=200px\"))\n          : ((e[g] = x(l(e[g]) + 200)), (c[g] = \"-=200px\")));\n      \"none\" === d\n        ? b._afterZoomIn()\n        : b.wrap.css(e).animate(c, {\n            duration: a.nextSpeed,\n            easing: a.nextEasing,\n            complete: b._afterZoomIn,\n          });\n    },\n    changeOut: function () {\n      var a = b.previous,\n        d = a.prevEffect,\n        e = { opacity: 0.1 },\n        c = b.direction;\n      \"elastic\" === d &&\n        (e[\"down\" === c || \"up\" === c ? \"top\" : \"left\"] =\n          (\"up\" === c || \"left\" === c ? \"-\" : \"+\") + \"=200px\");\n      a.wrap.animate(e, {\n        duration: \"none\" === d ? 0 : a.prevSpeed,\n        easing: a.prevEasing,\n        complete: function () {\n          f(this).trigger(\"onReset\").remove();\n        },\n      });\n    },\n  };\n  b.helpers.overlay = {\n    defaults: {\n      closeClick: !0,\n      speedOut: 200,\n      showEarly: !0,\n      css: {},\n      locked: !s,\n      fixed: !0,\n    },\n    overlay: null,\n    fixed: !1,\n    create: function (a) {\n      a = f.extend({}, this.defaults, a);\n      this.overlay && this.close();\n      this.overlay = f('<div class=\"fancybox-overlay\"></div>').appendTo(\"body\");\n      this.fixed = !1;\n      a.fixed &&\n        b.defaults.fixed &&\n        (this.overlay.addClass(\"fancybox-overlay-fixed\"), (this.fixed = !0));\n    },\n    open: function (a) {\n      var d = this;\n      a = f.extend({}, this.defaults, a);\n      this.overlay\n        ? this.overlay.unbind(\".overlay\").width(\"auto\").height(\"auto\")\n        : this.create(a);\n      this.fixed ||\n        (q.bind(\"resize.overlay\", f.proxy(this.update, this)), this.update());\n      a.closeClick &&\n        this.overlay.bind(\"click.overlay\", function (a) {\n          f(a.target).hasClass(\"fancybox-overlay\") &&\n            (b.isActive ? b.close() : d.close());\n        });\n      this.overlay.css(a.css).show();\n    },\n    close: function () {\n      f(\".fancybox-overlay\").remove();\n      q.unbind(\"resize.overlay\");\n      this.overlay = null;\n      !1 !== this.margin &&\n        (f(\"body\").css(\"margin-right\", this.margin), (this.margin = !1));\n      this.el && this.el.removeClass(\"fancybox-lock\");\n    },\n    update: function () {\n      var a = \"100%\",\n        b;\n      this.overlay.width(a).height(\"100%\");\n      H\n        ? ((b = Math.max(z.documentElement.offsetWidth, z.body.offsetWidth)),\n          n.width() > b && (a = n.width()))\n        : n.width() > q.width() && (a = n.width());\n      this.overlay.width(a).height(n.height());\n    },\n    onReady: function (a, b) {\n      f(\".fancybox-overlay\").stop(!0, !0);\n      this.overlay ||\n        ((this.margin =\n          n.height() > q.height() || \"scroll\" === f(\"body\").css(\"overflow-y\")\n            ? f(\"body\").css(\"margin-right\")\n            : !1),\n        (this.el = z.all && !z.querySelector ? f(\"html\") : f(\"body\")),\n        this.create(a));\n      a.locked &&\n        this.fixed &&\n        ((b.locked = this.overlay.append(b.wrap)), (b.fixed = !1));\n      !0 === a.showEarly && this.beforeShow.apply(this, arguments);\n    },\n    beforeShow: function (a, b) {\n      b.locked &&\n        (this.el.addClass(\"fancybox-lock\"),\n        !1 !== this.margin &&\n          f(\"body\").css(\"margin-right\", l(this.margin) + b.scrollbarWidth));\n      this.open(a);\n    },\n    onUpdate: function () {\n      this.fixed || this.update();\n    },\n    afterClose: function (a) {\n      this.overlay &&\n        !b.isActive &&\n        this.overlay.fadeOut(a.speedOut, f.proxy(this.close, this));\n    },\n  };\n  b.helpers.title = {\n    defaults: { type: \"float\", position: \"bottom\" },\n    beforeShow: function (a) {\n      var d = b.current,\n        e = d.title,\n        c = a.type;\n      f.isFunction(e) && (e = e.call(d.element, d));\n      if (p(e) && \"\" !== f.trim(e)) {\n        d = f(\n          '<div class=\"fancybox-title fancybox-title-' +\n            c +\n            '-wrap\">' +\n            e +\n            \"</div>\",\n        );\n        switch (c) {\n          case \"inside\":\n            c = b.skin;\n            break;\n          case \"outside\":\n            c = b.wrap;\n            break;\n          case \"over\":\n            c = b.inner;\n            break;\n          default:\n            (c = b.skin),\n              d.appendTo(\"body\"),\n              H && d.width(d.width()),\n              d.wrapInner('<span class=\"child\"></span>'),\n              (b.current.margin[2] += Math.abs(l(d.css(\"margin-bottom\"))));\n        }\n        d[\"top\" === a.position ? \"prependTo\" : \"appendTo\"](c);\n      }\n    },\n  };\n  f.fn.fancybox = function (a) {\n    var d,\n      e = f(this),\n      c = this.selector || \"\",\n      k = function (g) {\n        var h = f(this).blur(),\n          j = d,\n          k,\n          l;\n        !g.ctrlKey &&\n          !g.altKey &&\n          !g.shiftKey &&\n          !g.metaKey &&\n          !h.is(\".fancybox-wrap\") &&\n          ((k = a.groupAttr || \"data-fancybox-group\"),\n          (l = h.attr(k)),\n          l || ((k = \"rel\"), (l = h.get(0)[k])),\n          l &&\n            \"\" !== l &&\n            \"nofollow\" !== l &&\n            ((h = c.length ? f(c) : e),\n            (h = h.filter(\"[\" + k + '=\"' + l + '\"]')),\n            (j = h.index(this))),\n          (a.index = j),\n          !1 !== b.open(h, a) && g.preventDefault());\n      };\n    a = a || {};\n    d = a.index || 0;\n    !c || !1 === a.live\n      ? e.unbind(\"click.fb-start\").bind(\"click.fb-start\", k)\n      : n\n          .undelegate(c, \"click.fb-start\")\n          .delegate(\n            c + \":not('.fancybox-item, .fancybox-nav')\",\n            \"click.fb-start\",\n            k,\n          );\n    this.filter(\"[data-fancybox-start=1]\").trigger(\"click\");\n    return this;\n  };\n  n.ready(function () {\n    f.scrollbarWidth === r &&\n      (f.scrollbarWidth = function () {\n        var a = f(\n            '<div style=\"width:50px;height:50px;overflow:auto\"><div/></div>',\n          ).appendTo(\"body\"),\n          b = a.children(),\n          b = b.innerWidth() - b.height(99).innerWidth();\n        a.remove();\n        return b;\n      });\n    if (f.support.fixedPosition === r) {\n      var a = f.support,\n        d = f('<div style=\"position:fixed;top:20px;\"></div>').appendTo(\"body\"),\n        e = 20 === d[0].offsetTop || 15 === d[0].offsetTop;\n      d.remove();\n      a.fixedPosition = e;\n    }\n    f.extend(b.defaults, {\n      scrollbarWidth: f.scrollbarWidth(),\n      fixed: f.support.fixedPosition,\n      parent: f(\"body\"),\n    });\n  });\n})(window, document, jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/jquery.flexslider.js",
    "content": "/*\n * jQuery FlexSlider v2.1\n * http://www.woothemes.com/flexslider/\n *\n * Copyright 2012 WooThemes\n * Free to use under the GPLv2 license.\n * http://www.gnu.org/licenses/gpl-2.0.html\n *\n * Contributing author: Tyler Smith (@mbmufffin)\n */\n\n(function ($) {\n  //FlexSlider: Object Instance\n  $.flexslider = function (el, options) {\n    var slider = $(el),\n      vars = $.extend({}, $.flexslider.defaults, options),\n      namespace = vars.namespace,\n      touch =\n        \"ontouchstart\" in window ||\n        (window.DocumentTouch && document instanceof DocumentTouch),\n      eventType = touch ? \"touchend\" : \"click\",\n      vertical = vars.direction === \"vertical\",\n      reverse = vars.reverse,\n      carousel = vars.itemWidth > 0,\n      fade = vars.animation === \"fade\",\n      asNav = vars.asNavFor !== \"\",\n      methods = {};\n\n    // Store a reference to the slider object\n    $.data(el, \"flexslider\", slider);\n\n    // Privat slider methods\n    methods = {\n      init: function () {\n        slider.animating = false;\n        slider.currentSlide = vars.startAt;\n        slider.animatingTo = slider.currentSlide;\n        slider.atEnd =\n          slider.currentSlide === 0 || slider.currentSlide === slider.last;\n        slider.containerSelector = vars.selector.substr(\n          0,\n          vars.selector.search(\" \"),\n        );\n        slider.slides = $(vars.selector, slider);\n        slider.container = $(slider.containerSelector, slider);\n        slider.count = slider.slides.length;\n        // SYNC:\n        slider.syncExists = $(vars.sync).length > 0;\n        // SLIDE:\n        if (vars.animation === \"slide\") vars.animation = \"swing\";\n        slider.prop = vertical ? \"top\" : \"marginLeft\";\n        slider.args = {};\n        // SLIDESHOW:\n        slider.manualPause = false;\n        // TOUCH/USECSS:\n        slider.transitions =\n          !vars.video &&\n          !fade &&\n          vars.useCSS &&\n          (function () {\n            var obj = document.createElement(\"div\"),\n              props = [\n                \"perspectiveProperty\",\n                \"WebkitPerspective\",\n                \"MozPerspective\",\n                \"OPerspective\",\n                \"msPerspective\",\n              ];\n            for (var i in props) {\n              if (obj.style[props[i]] !== undefined) {\n                slider.pfx = props[i].replace(\"Perspective\", \"\").toLowerCase();\n                slider.prop = \"-\" + slider.pfx + \"-transform\";\n                return true;\n              }\n            }\n            return false;\n          })();\n        // CONTROLSCONTAINER:\n        if (vars.controlsContainer !== \"\")\n          slider.controlsContainer =\n            $(vars.controlsContainer).length > 0 && $(vars.controlsContainer);\n        // MANUAL:\n        if (vars.manualControls !== \"\")\n          slider.manualControls =\n            $(vars.manualControls).length > 0 && $(vars.manualControls);\n\n        // RANDOMIZE:\n        if (vars.randomize) {\n          slider.slides.sort(function () {\n            return Math.round(Math.random()) - 0.5;\n          });\n          slider.container.empty().append(slider.slides);\n        }\n\n        slider.doMath();\n\n        // ASNAV:\n        if (asNav) methods.asNav.setup();\n\n        // INIT\n        slider.setup(\"init\");\n\n        // CONTROLNAV:\n        if (vars.controlNav) methods.controlNav.setup();\n\n        // DIRECTIONNAV:\n        if (vars.directionNav) methods.directionNav.setup();\n\n        // KEYBOARD:\n        if (\n          vars.keyboard &&\n          ($(slider.containerSelector).length === 1 || vars.multipleKeyboard)\n        ) {\n          $(document).bind(\"keyup\", function (event) {\n            var keycode = event.keyCode;\n            if (!slider.animating && (keycode === 39 || keycode === 37)) {\n              var target =\n                keycode === 39\n                  ? slider.getTarget(\"next\")\n                  : keycode === 37\n                    ? slider.getTarget(\"prev\")\n                    : false;\n              slider.flexAnimate(target, vars.pauseOnAction);\n            }\n          });\n        }\n        // MOUSEWHEEL:\n        if (vars.mousewheel) {\n          slider.bind(\"mousewheel\", function (event, delta, deltaX, deltaY) {\n            event.preventDefault();\n            var target =\n              delta < 0 ? slider.getTarget(\"next\") : slider.getTarget(\"prev\");\n            slider.flexAnimate(target, vars.pauseOnAction);\n          });\n        }\n\n        // PAUSEPLAY\n        if (vars.pausePlay) methods.pausePlay.setup();\n\n        // SLIDSESHOW\n        if (vars.slideshow) {\n          if (vars.pauseOnHover) {\n            slider.hover(\n              function () {\n                if (!slider.manualPlay && !slider.manualPause) slider.pause();\n              },\n              function () {\n                if (!slider.manualPause && !slider.manualPlay) slider.play();\n              },\n            );\n          }\n          // initialize animation\n          vars.initDelay > 0\n            ? setTimeout(slider.play, vars.initDelay)\n            : slider.play();\n        }\n\n        // TOUCH\n        if (touch && vars.touch) methods.touch();\n\n        // FADE&&SMOOTHHEIGHT || SLIDE:\n        if (!fade || (fade && vars.smoothHeight))\n          $(window).bind(\"resize focus\", methods.resize);\n\n        // API: start() Callback\n        setTimeout(function () {\n          vars.start(slider);\n        }, 200);\n      },\n      asNav: {\n        setup: function () {\n          slider.asNav = true;\n          slider.animatingTo = Math.floor(slider.currentSlide / slider.move);\n          slider.currentItem = slider.currentSlide;\n          slider.slides\n            .removeClass(namespace + \"active-slide\")\n            .eq(slider.currentItem)\n            .addClass(namespace + \"active-slide\");\n          slider.slides.click(function (e) {\n            e.preventDefault();\n            var $slide = $(this),\n              target = $slide.index();\n            if (\n              !$(vars.asNavFor).data(\"flexslider\").animating &&\n              !$slide.hasClass(\"active\")\n            ) {\n              slider.direction = slider.currentItem < target ? \"next\" : \"prev\";\n              slider.flexAnimate(target, vars.pauseOnAction, false, true, true);\n            }\n          });\n        },\n      },\n      controlNav: {\n        setup: function () {\n          if (!slider.manualControls) {\n            methods.controlNav.setupPaging();\n          } else {\n            // MANUALCONTROLS:\n            methods.controlNav.setupManual();\n          }\n        },\n        setupPaging: function () {\n          var type =\n              vars.controlNav === \"thumbnails\"\n                ? \"control-thumbs\"\n                : \"control-paging\",\n            j = 1,\n            item;\n\n          slider.controlNavScaffold = $(\n            '<ol class=\"' +\n              namespace +\n              \"control-nav \" +\n              namespace +\n              type +\n              '\"></ol>',\n          );\n\n          if (slider.pagingCount > 1) {\n            for (var i = 0; i < slider.pagingCount; i++) {\n              item =\n                vars.controlNav === \"thumbnails\"\n                  ? '<img src=\"' +\n                    slider.slides.eq(i).attr(\"data-thumb\") +\n                    '\"/>'\n                  : \"<a>\" + j + \"</a>\";\n              slider.controlNavScaffold.append(\"<li>\" + item + \"</li>\");\n              j++;\n            }\n          }\n\n          // CONTROLSCONTAINER:\n          slider.controlsContainer\n            ? $(slider.controlsContainer).append(slider.controlNavScaffold)\n            : slider.append(slider.controlNavScaffold);\n          methods.controlNav.set();\n\n          methods.controlNav.active();\n\n          slider.controlNavScaffold.delegate(\n            \"a, img\",\n            eventType,\n            function (event) {\n              event.preventDefault();\n              var $this = $(this),\n                target = slider.controlNav.index($this);\n\n              if (!$this.hasClass(namespace + \"active\")) {\n                slider.direction =\n                  target > slider.currentSlide ? \"next\" : \"prev\";\n                slider.flexAnimate(target, vars.pauseOnAction);\n              }\n            },\n          );\n          // Prevent iOS click event bug\n          if (touch) {\n            slider.controlNavScaffold.delegate(\n              \"a\",\n              \"click touchstart\",\n              function (event) {\n                event.preventDefault();\n              },\n            );\n          }\n        },\n        setupManual: function () {\n          slider.controlNav = slider.manualControls;\n          methods.controlNav.active();\n\n          slider.controlNav.live(eventType, function (event) {\n            event.preventDefault();\n            var $this = $(this),\n              target = slider.controlNav.index($this);\n\n            if (!$this.hasClass(namespace + \"active\")) {\n              target > slider.currentSlide\n                ? (slider.direction = \"next\")\n                : (slider.direction = \"prev\");\n              slider.flexAnimate(target, vars.pauseOnAction);\n            }\n          });\n          // Prevent iOS click event bug\n          if (touch) {\n            slider.controlNav.live(\"click touchstart\", function (event) {\n              event.preventDefault();\n            });\n          }\n        },\n        set: function () {\n          var selector = vars.controlNav === \"thumbnails\" ? \"img\" : \"a\";\n          slider.controlNav = $(\n            \".\" + namespace + \"control-nav li \" + selector,\n            slider.controlsContainer ? slider.controlsContainer : slider,\n          );\n        },\n        active: function () {\n          slider.controlNav\n            .removeClass(namespace + \"active\")\n            .eq(slider.animatingTo)\n            .addClass(namespace + \"active\");\n        },\n        update: function (action, pos) {\n          if (slider.pagingCount > 1 && action === \"add\") {\n            slider.controlNavScaffold.append(\n              $(\"<li><a>\" + slider.count + \"</a></li>\"),\n            );\n          } else if (slider.pagingCount === 1) {\n            slider.controlNavScaffold.find(\"li\").remove();\n          } else {\n            slider.controlNav.eq(pos).closest(\"li\").remove();\n          }\n          methods.controlNav.set();\n          slider.pagingCount > 1 &&\n          slider.pagingCount !== slider.controlNav.length\n            ? slider.update(pos, action)\n            : methods.controlNav.active();\n        },\n      },\n      directionNav: {\n        setup: function () {\n          var directionNavScaffold = $(\n            '<ul class=\"' +\n              namespace +\n              'direction-nav\"><li><a class=\"' +\n              namespace +\n              'prev\" href=\"#\">' +\n              vars.prevText +\n              '</a></li><li><a class=\"' +\n              namespace +\n              'next\" href=\"#\">' +\n              vars.nextText +\n              \"</a></li></ul>\",\n          );\n\n          // CONTROLSCONTAINER:\n          if (slider.controlsContainer) {\n            $(slider.controlsContainer).append(directionNavScaffold);\n            slider.directionNav = $(\n              \".\" + namespace + \"direction-nav li a\",\n              slider.controlsContainer,\n            );\n          } else {\n            slider.append(directionNavScaffold);\n            slider.directionNav = $(\n              \".\" + namespace + \"direction-nav li a\",\n              slider,\n            );\n          }\n\n          methods.directionNav.update();\n\n          slider.directionNav.bind(eventType, function (event) {\n            event.preventDefault();\n            var target = $(this).hasClass(namespace + \"next\")\n              ? slider.getTarget(\"next\")\n              : slider.getTarget(\"prev\");\n            slider.flexAnimate(target, vars.pauseOnAction);\n          });\n          // Prevent iOS click event bug\n          if (touch) {\n            slider.directionNav.bind(\"click touchstart\", function (event) {\n              event.preventDefault();\n            });\n          }\n        },\n        update: function () {\n          var disabledClass = namespace + \"disabled\";\n          if (slider.pagingCount === 1) {\n            slider.directionNav.addClass(disabledClass);\n          } else if (!vars.animationLoop) {\n            if (slider.animatingTo === 0) {\n              slider.directionNav\n                .removeClass(disabledClass)\n                .filter(\".\" + namespace + \"prev\")\n                .addClass(disabledClass);\n            } else if (slider.animatingTo === slider.last) {\n              slider.directionNav\n                .removeClass(disabledClass)\n                .filter(\".\" + namespace + \"next\")\n                .addClass(disabledClass);\n            } else {\n              slider.directionNav.removeClass(disabledClass);\n            }\n          } else {\n            slider.directionNav.removeClass(disabledClass);\n          }\n        },\n      },\n      pausePlay: {\n        setup: function () {\n          var pausePlayScaffold = $(\n            '<div class=\"' + namespace + 'pauseplay\"><a></a></div>',\n          );\n\n          // CONTROLSCONTAINER:\n          if (slider.controlsContainer) {\n            slider.controlsContainer.append(pausePlayScaffold);\n            slider.pausePlay = $(\n              \".\" + namespace + \"pauseplay a\",\n              slider.controlsContainer,\n            );\n          } else {\n            slider.append(pausePlayScaffold);\n            slider.pausePlay = $(\".\" + namespace + \"pauseplay a\", slider);\n          }\n\n          methods.pausePlay.update(\n            vars.slideshow ? namespace + \"pause\" : namespace + \"play\",\n          );\n\n          slider.pausePlay.bind(eventType, function (event) {\n            event.preventDefault();\n            if ($(this).hasClass(namespace + \"pause\")) {\n              slider.manualPause = true;\n              slider.manualPlay = false;\n              slider.pause();\n            } else {\n              slider.manualPause = false;\n              slider.manualPlay = true;\n              slider.play();\n            }\n          });\n          // Prevent iOS click event bug\n          if (touch) {\n            slider.pausePlay.bind(\"click touchstart\", function (event) {\n              event.preventDefault();\n            });\n          }\n        },\n        update: function (state) {\n          state === \"play\"\n            ? slider.pausePlay\n                .removeClass(namespace + \"pause\")\n                .addClass(namespace + \"play\")\n                .text(vars.playText)\n            : slider.pausePlay\n                .removeClass(namespace + \"play\")\n                .addClass(namespace + \"pause\")\n                .text(vars.pauseText);\n        },\n      },\n      touch: function () {\n        var startX,\n          startY,\n          offset,\n          cwidth,\n          dx,\n          startT,\n          scrolling = false;\n\n        el.addEventListener(\"touchstart\", onTouchStart, false);\n        function onTouchStart(e) {\n          if (slider.animating) {\n            e.preventDefault();\n          } else if (e.touches.length === 1) {\n            slider.pause();\n            // CAROUSEL:\n            cwidth = vertical ? slider.h : slider.w;\n            startT = Number(new Date());\n            // CAROUSEL:\n            offset =\n              carousel && reverse && slider.animatingTo === slider.last\n                ? 0\n                : carousel && reverse\n                  ? slider.limit -\n                    (slider.itemW + vars.itemMargin) *\n                      slider.move *\n                      slider.animatingTo\n                  : carousel && slider.currentSlide === slider.last\n                    ? slider.limit\n                    : carousel\n                      ? (slider.itemW + vars.itemMargin) *\n                        slider.move *\n                        slider.currentSlide\n                      : reverse\n                        ? (slider.last -\n                            slider.currentSlide +\n                            slider.cloneOffset) *\n                          cwidth\n                        : (slider.currentSlide + slider.cloneOffset) * cwidth;\n            startX = vertical ? e.touches[0].pageY : e.touches[0].pageX;\n            startY = vertical ? e.touches[0].pageX : e.touches[0].pageY;\n\n            el.addEventListener(\"touchmove\", onTouchMove, false);\n            el.addEventListener(\"touchend\", onTouchEnd, false);\n          }\n        }\n\n        function onTouchMove(e) {\n          dx = vertical\n            ? startX - e.touches[0].pageY\n            : startX - e.touches[0].pageX;\n          scrolling = vertical\n            ? Math.abs(dx) < Math.abs(e.touches[0].pageX - startY)\n            : Math.abs(dx) < Math.abs(e.touches[0].pageY - startY);\n\n          if (!scrolling || Number(new Date()) - startT > 500) {\n            e.preventDefault();\n            if (!fade && slider.transitions) {\n              if (!vars.animationLoop) {\n                dx =\n                  dx /\n                  ((slider.currentSlide === 0 && dx < 0) ||\n                  (slider.currentSlide === slider.last && dx > 0)\n                    ? Math.abs(dx) / cwidth + 2\n                    : 1);\n              }\n              slider.setProps(offset + dx, \"setTouch\");\n            }\n          }\n        }\n\n        function onTouchEnd(e) {\n          // finish the touch by undoing the touch session\n          el.removeEventListener(\"touchmove\", onTouchMove, false);\n\n          if (\n            slider.animatingTo === slider.currentSlide &&\n            !scrolling &&\n            !(dx === null)\n          ) {\n            var updateDx = reverse ? -dx : dx,\n              target =\n                updateDx > 0\n                  ? slider.getTarget(\"next\")\n                  : slider.getTarget(\"prev\");\n\n            if (\n              slider.canAdvance(target) &&\n              ((Number(new Date()) - startT < 550 && Math.abs(updateDx) > 50) ||\n                Math.abs(updateDx) > cwidth / 2)\n            ) {\n              slider.flexAnimate(target, vars.pauseOnAction);\n            } else {\n              if (!fade)\n                slider.flexAnimate(\n                  slider.currentSlide,\n                  vars.pauseOnAction,\n                  true,\n                );\n            }\n          }\n          el.removeEventListener(\"touchend\", onTouchEnd, false);\n          startX = null;\n          startY = null;\n          dx = null;\n          offset = null;\n        }\n      },\n      resize: function () {\n        if (!slider.animating && slider.is(\":visible\")) {\n          if (!carousel) slider.doMath();\n\n          if (fade) {\n            // SMOOTH HEIGHT:\n            methods.smoothHeight();\n          } else if (carousel) {\n            //CAROUSEL:\n            slider.slides.width(slider.computedW);\n            slider.update(slider.pagingCount);\n            slider.setProps();\n          } else if (vertical) {\n            //VERTICAL:\n            slider.viewport.height(slider.h);\n            slider.setProps(slider.h, \"setTotal\");\n          } else {\n            // SMOOTH HEIGHT:\n            if (vars.smoothHeight) methods.smoothHeight();\n            slider.newSlides.width(slider.computedW);\n            slider.setProps(slider.computedW, \"setTotal\");\n          }\n        }\n      },\n      smoothHeight: function (dur) {\n        if (!vertical || fade) {\n          var $obj = fade ? slider : slider.viewport;\n          dur\n            ? $obj.animate(\n                { height: slider.slides.eq(slider.animatingTo).height() },\n                dur,\n              )\n            : $obj.height(slider.slides.eq(slider.animatingTo).height());\n        }\n      },\n      sync: function (action) {\n        var $obj = $(vars.sync).data(\"flexslider\"),\n          target = slider.animatingTo;\n\n        switch (action) {\n          case \"animate\":\n            $obj.flexAnimate(target, vars.pauseOnAction, false, true);\n            break;\n          case \"play\":\n            if (!$obj.playing && !$obj.asNav) {\n              $obj.play();\n            }\n            break;\n          case \"pause\":\n            $obj.pause();\n            break;\n        }\n      },\n    };\n\n    // public methods\n    slider.flexAnimate = function (target, pause, override, withSync, fromNav) {\n      if (asNav && slider.pagingCount === 1)\n        slider.direction = slider.currentItem < target ? \"next\" : \"prev\";\n\n      if (\n        !slider.animating &&\n        (slider.canAdvance(target, fromNav) || override) &&\n        slider.is(\":visible\")\n      ) {\n        if (asNav && withSync) {\n          var master = $(vars.asNavFor).data(\"flexslider\");\n          slider.atEnd = target === 0 || target === slider.count - 1;\n          master.flexAnimate(target, true, false, true, fromNav);\n          slider.direction = slider.currentItem < target ? \"next\" : \"prev\";\n          master.direction = slider.direction;\n\n          if (\n            Math.ceil((target + 1) / slider.visible) - 1 !==\n              slider.currentSlide &&\n            target !== 0\n          ) {\n            slider.currentItem = target;\n            slider.slides\n              .removeClass(namespace + \"active-slide\")\n              .eq(target)\n              .addClass(namespace + \"active-slide\");\n            target = Math.floor(target / slider.visible);\n          } else {\n            slider.currentItem = target;\n            slider.slides\n              .removeClass(namespace + \"active-slide\")\n              .eq(target)\n              .addClass(namespace + \"active-slide\");\n            return false;\n          }\n        }\n\n        slider.animating = true;\n        slider.animatingTo = target;\n        // API: before() animation Callback\n        vars.before(slider);\n\n        // SLIDESHOW:\n        if (pause) slider.pause();\n\n        // SYNC:\n        if (slider.syncExists && !fromNav) methods.sync(\"animate\");\n\n        // CONTROLNAV\n        if (vars.controlNav) methods.controlNav.active();\n\n        // !CAROUSEL:\n        // CANDIDATE: slide active class (for add/remove slide)\n        if (!carousel)\n          slider.slides\n            .removeClass(namespace + \"active-slide\")\n            .eq(target)\n            .addClass(namespace + \"active-slide\");\n\n        // INFINITE LOOP:\n        // CANDIDATE: atEnd\n        slider.atEnd = target === 0 || target === slider.last;\n\n        // DIRECTIONNAV:\n        if (vars.directionNav) methods.directionNav.update();\n\n        if (target === slider.last) {\n          // API: end() of cycle Callback\n          vars.end(slider);\n          // SLIDESHOW && !INFINITE LOOP:\n          if (!vars.animationLoop) slider.pause();\n        }\n\n        // SLIDE:\n        if (!fade) {\n          var dimension = vertical\n              ? slider.slides.filter(\":first\").height()\n              : slider.computedW,\n            margin,\n            slideString,\n            calcNext;\n\n          // INFINITE LOOP / REVERSE:\n          if (carousel) {\n            margin =\n              vars.itemWidth > slider.w ? vars.itemMargin * 2 : vars.itemMargin;\n            calcNext =\n              (slider.itemW + margin) * slider.move * slider.animatingTo;\n            slideString =\n              calcNext > slider.limit && slider.visible !== 1\n                ? slider.limit\n                : calcNext;\n          } else if (\n            slider.currentSlide === 0 &&\n            target === slider.count - 1 &&\n            vars.animationLoop &&\n            slider.direction !== \"next\"\n          ) {\n            slideString = reverse\n              ? (slider.count + slider.cloneOffset) * dimension\n              : 0;\n          } else if (\n            slider.currentSlide === slider.last &&\n            target === 0 &&\n            vars.animationLoop &&\n            slider.direction !== \"prev\"\n          ) {\n            slideString = reverse ? 0 : (slider.count + 1) * dimension;\n          } else {\n            slideString = reverse\n              ? (slider.count - 1 - target + slider.cloneOffset) * dimension\n              : (target + slider.cloneOffset) * dimension;\n          }\n          slider.setProps(slideString, \"\", vars.animationSpeed);\n          if (slider.transitions) {\n            if (!vars.animationLoop || !slider.atEnd) {\n              slider.animating = false;\n              slider.currentSlide = slider.animatingTo;\n            }\n            slider.container.unbind(\"webkitTransitionEnd transitionend\");\n            slider.container.bind(\n              \"webkitTransitionEnd transitionend\",\n              function () {\n                slider.wrapup(dimension);\n              },\n            );\n          } else {\n            slider.container.animate(\n              slider.args,\n              vars.animationSpeed,\n              vars.easing,\n              function () {\n                slider.wrapup(dimension);\n              },\n            );\n          }\n        } else {\n          // FADE:\n          if (!touch) {\n            slider.slides\n              .eq(slider.currentSlide)\n              .fadeOut(vars.animationSpeed, vars.easing);\n            slider.slides\n              .eq(target)\n              .fadeIn(vars.animationSpeed, vars.easing, slider.wrapup);\n          } else {\n            slider.slides\n              .eq(slider.currentSlide)\n              .css({ opacity: 0, zIndex: 1 });\n            slider.slides.eq(target).css({ opacity: 1, zIndex: 2 });\n\n            slider.slides.unbind(\"webkitTransitionEnd transitionend\");\n            slider.slides\n              .eq(slider.currentSlide)\n              .bind(\"webkitTransitionEnd transitionend\", function () {\n                // API: after() animation Callback\n                vars.after(slider);\n              });\n\n            slider.animating = false;\n            slider.currentSlide = slider.animatingTo;\n          }\n        }\n        // SMOOTH HEIGHT:\n        if (vars.smoothHeight) methods.smoothHeight(vars.animationSpeed);\n      }\n    };\n    slider.wrapup = function (dimension) {\n      // SLIDE:\n      if (!fade && !carousel) {\n        if (\n          slider.currentSlide === 0 &&\n          slider.animatingTo === slider.last &&\n          vars.animationLoop\n        ) {\n          slider.setProps(dimension, \"jumpEnd\");\n        } else if (\n          slider.currentSlide === slider.last &&\n          slider.animatingTo === 0 &&\n          vars.animationLoop\n        ) {\n          slider.setProps(dimension, \"jumpStart\");\n        }\n      }\n      slider.animating = false;\n      slider.currentSlide = slider.animatingTo;\n      // API: after() animation Callback\n      vars.after(slider);\n    };\n\n    // SLIDESHOW:\n    slider.animateSlides = function () {\n      if (!slider.animating) slider.flexAnimate(slider.getTarget(\"next\"));\n    };\n    // SLIDESHOW:\n    slider.pause = function () {\n      clearInterval(slider.animatedSlides);\n      slider.playing = false;\n      // PAUSEPLAY:\n      if (vars.pausePlay) methods.pausePlay.update(\"play\");\n      // SYNC:\n      if (slider.syncExists) methods.sync(\"pause\");\n    };\n    // SLIDESHOW:\n    slider.play = function () {\n      slider.animatedSlides = setInterval(\n        slider.animateSlides,\n        vars.slideshowSpeed,\n      );\n      slider.playing = true;\n      // PAUSEPLAY:\n      if (vars.pausePlay) methods.pausePlay.update(\"pause\");\n      // SYNC:\n      if (slider.syncExists) methods.sync(\"play\");\n    };\n    slider.canAdvance = function (target, fromNav) {\n      // ASNAV:\n      var last = asNav ? slider.pagingCount - 1 : slider.last;\n      return fromNav\n        ? true\n        : asNav &&\n            slider.currentItem === slider.count - 1 &&\n            target === 0 &&\n            slider.direction === \"prev\"\n          ? true\n          : asNav &&\n              slider.currentItem === 0 &&\n              target === slider.pagingCount - 1 &&\n              slider.direction !== \"next\"\n            ? false\n            : target === slider.currentSlide && !asNav\n              ? false\n              : vars.animationLoop\n                ? true\n                : slider.atEnd &&\n                    slider.currentSlide === 0 &&\n                    target === last &&\n                    slider.direction !== \"next\"\n                  ? false\n                  : slider.atEnd &&\n                      slider.currentSlide === last &&\n                      target === 0 &&\n                      slider.direction === \"next\"\n                    ? false\n                    : true;\n    };\n    slider.getTarget = function (dir) {\n      slider.direction = dir;\n      if (dir === \"next\") {\n        return slider.currentSlide === slider.last\n          ? 0\n          : slider.currentSlide + 1;\n      } else {\n        return slider.currentSlide === 0\n          ? slider.last\n          : slider.currentSlide - 1;\n      }\n    };\n\n    // SLIDE:\n    slider.setProps = function (pos, special, dur) {\n      var target = (function () {\n        var posCheck = pos\n            ? pos\n            : (slider.itemW + vars.itemMargin) *\n              slider.move *\n              slider.animatingTo,\n          posCalc = (function () {\n            if (carousel) {\n              return special === \"setTouch\"\n                ? pos\n                : reverse && slider.animatingTo === slider.last\n                  ? 0\n                  : reverse\n                    ? slider.limit -\n                      (slider.itemW + vars.itemMargin) *\n                        slider.move *\n                        slider.animatingTo\n                    : slider.animatingTo === slider.last\n                      ? slider.limit\n                      : posCheck;\n            } else {\n              switch (special) {\n                case \"setTotal\":\n                  return reverse\n                    ? (slider.count -\n                        1 -\n                        slider.currentSlide +\n                        slider.cloneOffset) *\n                        pos\n                    : (slider.currentSlide + slider.cloneOffset) * pos;\n                case \"setTouch\":\n                  return reverse ? pos : pos;\n                case \"jumpEnd\":\n                  return reverse ? pos : slider.count * pos;\n                case \"jumpStart\":\n                  return reverse ? slider.count * pos : pos;\n                default:\n                  return pos;\n              }\n            }\n          })();\n        return posCalc * -1 + \"px\";\n      })();\n\n      if (slider.transitions) {\n        target = vertical\n          ? \"translate3d(0,\" + target + \",0)\"\n          : \"translate3d(\" + target + \",0,0)\";\n        dur = dur !== undefined ? dur / 1000 + \"s\" : \"0s\";\n        slider.container.css(\"-\" + slider.pfx + \"-transition-duration\", dur);\n      }\n\n      slider.args[slider.prop] = target;\n      if (slider.transitions || dur === undefined)\n        slider.container.css(slider.args);\n    };\n\n    slider.setup = function (type) {\n      // SLIDE:\n      if (!fade) {\n        var sliderOffset, arr;\n\n        if (type === \"init\") {\n          slider.viewport = $('<div class=\"' + namespace + 'viewport\"></div>')\n            .css({ overflow: \"hidden\", position: \"relative\" })\n            .appendTo(slider)\n            .append(slider.container);\n          // INFINITE LOOP:\n          slider.cloneCount = 0;\n          slider.cloneOffset = 0;\n          // REVERSE:\n          if (reverse) {\n            arr = $.makeArray(slider.slides).reverse();\n            slider.slides = $(arr);\n            slider.container.empty().append(slider.slides);\n          }\n        }\n        // INFINITE LOOP && !CAROUSEL:\n        if (vars.animationLoop && !carousel) {\n          slider.cloneCount = 2;\n          slider.cloneOffset = 1;\n          // clear out old clones\n          if (type !== \"init\") slider.container.find(\".clone\").remove();\n          slider.container\n            .append(slider.slides.first().clone().addClass(\"clone\"))\n            .prepend(slider.slides.last().clone().addClass(\"clone\"));\n        }\n        slider.newSlides = $(vars.selector, slider);\n\n        sliderOffset = reverse\n          ? slider.count - 1 - slider.currentSlide + slider.cloneOffset\n          : slider.currentSlide + slider.cloneOffset;\n        // VERTICAL:\n        if (vertical && !carousel) {\n          slider.container\n            .height((slider.count + slider.cloneCount) * 200 + \"%\")\n            .css(\"position\", \"absolute\")\n            .width(\"100%\");\n          setTimeout(\n            function () {\n              slider.newSlides.css({ display: \"block\" });\n              slider.doMath();\n              slider.viewport.height(slider.h);\n              slider.setProps(sliderOffset * slider.h, \"init\");\n            },\n            type === \"init\" ? 100 : 0,\n          );\n        } else {\n          slider.container.width(\n            (slider.count + slider.cloneCount) * 200 + \"%\",\n          );\n          slider.setProps(sliderOffset * slider.computedW, \"init\");\n          setTimeout(\n            function () {\n              slider.doMath();\n              slider.newSlides.css({\n                width: slider.computedW,\n                float: \"left\",\n                display: \"block\",\n              });\n              // SMOOTH HEIGHT:\n              if (vars.smoothHeight) methods.smoothHeight();\n            },\n            type === \"init\" ? 100 : 0,\n          );\n        }\n      } else {\n        // FADE:\n        slider.slides.css({\n          width: \"100%\",\n          float: \"left\",\n          marginRight: \"-100%\",\n          position: \"relative\",\n        });\n        if (type === \"init\") {\n          if (!touch) {\n            slider.slides\n              .eq(slider.currentSlide)\n              .fadeIn(vars.animationSpeed, vars.easing);\n          } else {\n            slider.slides\n              .css({\n                opacity: 0,\n                display: \"block\",\n                webkitTransition:\n                  \"opacity \" + vars.animationSpeed / 1000 + \"s ease\",\n                zIndex: 1,\n              })\n              .eq(slider.currentSlide)\n              .css({ opacity: 1, zIndex: 2 });\n          }\n        }\n        // SMOOTH HEIGHT:\n        if (vars.smoothHeight) methods.smoothHeight();\n      }\n      // !CAROUSEL:\n      // CANDIDATE: active slide\n      if (!carousel)\n        slider.slides\n          .removeClass(namespace + \"active-slide\")\n          .eq(slider.currentSlide)\n          .addClass(namespace + \"active-slide\");\n    };\n\n    slider.doMath = function () {\n      var slide = slider.slides.first(),\n        slideMargin = vars.itemMargin,\n        minItems = vars.minItems,\n        maxItems = vars.maxItems;\n\n      slider.w = slider.width();\n      slider.h = slide.height();\n      slider.boxPadding = slide.outerWidth() - slide.width();\n\n      // CAROUSEL:\n      if (carousel) {\n        slider.itemT = vars.itemWidth + slideMargin;\n        slider.minW = minItems ? minItems * slider.itemT : slider.w;\n        slider.maxW = maxItems ? maxItems * slider.itemT : slider.w;\n        slider.itemW =\n          slider.minW > slider.w\n            ? (slider.w - slideMargin * minItems) / minItems\n            : slider.maxW < slider.w\n              ? (slider.w - slideMargin * maxItems) / maxItems\n              : vars.itemWidth > slider.w\n                ? slider.w\n                : vars.itemWidth;\n        slider.visible = Math.floor(slider.w / (slider.itemW + slideMargin));\n        slider.move =\n          vars.move > 0 && vars.move < slider.visible\n            ? vars.move\n            : slider.visible;\n        slider.pagingCount = Math.ceil(\n          (slider.count - slider.visible) / slider.move + 1,\n        );\n        slider.last = slider.pagingCount - 1;\n        slider.limit =\n          slider.pagingCount === 1\n            ? 0\n            : vars.itemWidth > slider.w\n              ? (slider.itemW + slideMargin * 2) * slider.count -\n                slider.w -\n                slideMargin\n              : (slider.itemW + slideMargin) * slider.count -\n                slider.w -\n                slideMargin;\n      } else {\n        slider.itemW = slider.w;\n        slider.pagingCount = slider.count;\n        slider.last = slider.count - 1;\n      }\n      slider.computedW = slider.itemW - slider.boxPadding;\n    };\n\n    slider.update = function (pos, action) {\n      slider.doMath();\n\n      // update currentSlide and slider.animatingTo if necessary\n      if (!carousel) {\n        if (pos < slider.currentSlide) {\n          slider.currentSlide += 1;\n        } else if (pos <= slider.currentSlide && pos !== 0) {\n          slider.currentSlide -= 1;\n        }\n        slider.animatingTo = slider.currentSlide;\n      }\n\n      // update controlNav\n      if (vars.controlNav && !slider.manualControls) {\n        if (\n          (action === \"add\" && !carousel) ||\n          slider.pagingCount > slider.controlNav.length\n        ) {\n          methods.controlNav.update(\"add\");\n        } else if (\n          (action === \"remove\" && !carousel) ||\n          slider.pagingCount < slider.controlNav.length\n        ) {\n          if (carousel && slider.currentSlide > slider.last) {\n            slider.currentSlide -= 1;\n            slider.animatingTo -= 1;\n          }\n          methods.controlNav.update(\"remove\", slider.last);\n        }\n      }\n      // update directionNav\n      if (vars.directionNav) methods.directionNav.update();\n    };\n\n    slider.addSlide = function (obj, pos) {\n      var $obj = $(obj);\n\n      slider.count += 1;\n      slider.last = slider.count - 1;\n\n      // append new slide\n      if (vertical && reverse) {\n        pos !== undefined\n          ? slider.slides.eq(slider.count - pos).after($obj)\n          : slider.container.prepend($obj);\n      } else {\n        pos !== undefined\n          ? slider.slides.eq(pos).before($obj)\n          : slider.container.append($obj);\n      }\n\n      // update currentSlide, animatingTo, controlNav, and directionNav\n      slider.update(pos, \"add\");\n\n      // update slider.slides\n      slider.slides = $(vars.selector + \":not(.clone)\", slider);\n      // re-setup the slider to accomdate new slide\n      slider.setup();\n\n      //FlexSlider: added() Callback\n      vars.added(slider);\n    };\n    slider.removeSlide = function (obj) {\n      var pos = isNaN(obj) ? slider.slides.index($(obj)) : obj;\n\n      // update count\n      slider.count -= 1;\n      slider.last = slider.count - 1;\n\n      // remove slide\n      if (isNaN(obj)) {\n        $(obj, slider.slides).remove();\n      } else {\n        vertical && reverse\n          ? slider.slides.eq(slider.last).remove()\n          : slider.slides.eq(obj).remove();\n      }\n\n      // update currentSlide, animatingTo, controlNav, and directionNav\n      slider.doMath();\n      slider.update(pos, \"remove\");\n\n      // update slider.slides\n      slider.slides = $(vars.selector + \":not(.clone)\", slider);\n      // re-setup the slider to accomdate new slide\n      slider.setup();\n\n      // FlexSlider: removed() Callback\n      vars.removed(slider);\n    };\n\n    //FlexSlider: Initialize\n    methods.init();\n  };\n\n  //FlexSlider: Default Settings\n  $.flexslider.defaults = {\n    namespace: \"flex-\", //{NEW} String: Prefix string attached to the class of every element generated by the plugin\n    selector: \".slides > li\", //{NEW} Selector: Must match a simple pattern. '{container} > {slide}' -- Ignore pattern at your own peril\n    animation: \"fade\", //String: Select your animation type, \"fade\" or \"slide\"\n    easing: \"swing\", //{NEW} String: Determines the easing method used in jQuery transitions. jQuery easing plugin is supported!\n    direction: \"horizontal\", //String: Select the sliding direction, \"horizontal\" or \"vertical\"\n    reverse: false, //{NEW} Boolean: Reverse the animation direction\n    animationLoop: true, //Boolean: Should the animation loop? If false, directionNav will received \"disable\" classes at either end\n    smoothHeight: false, //{NEW} Boolean: Allow height of the slider to animate smoothly in horizontal mode\n    startAt: 0, //Integer: The slide that the slider should start on. Array notation (0 = first slide)\n    slideshow: true, //Boolean: Animate slider automatically\n    slideshowSpeed: 7000, //Integer: Set the speed of the slideshow cycling, in milliseconds\n    animationSpeed: 600, //Integer: Set the speed of animations, in milliseconds\n    initDelay: 0, //{NEW} Integer: Set an initialization delay, in milliseconds\n    randomize: false, //Boolean: Randomize slide order\n\n    // Usability features\n    pauseOnAction: true, //Boolean: Pause the slideshow when interacting with control elements, highly recommended.\n    pauseOnHover: false, //Boolean: Pause the slideshow when hovering over slider, then resume when no longer hovering\n    useCSS: true, //{NEW} Boolean: Slider will use CSS3 transitions if available\n    touch: true, //{NEW} Boolean: Allow touch swipe navigation of the slider on touch-enabled devices\n    video: false, //{NEW} Boolean: If using video in the slider, will prevent CSS3 3D Transforms to avoid graphical glitches\n\n    // Primary Controls\n    controlNav: true, //Boolean: Create navigation for paging control of each clide? Note: Leave true for manualControls usage\n    directionNav: true, //Boolean: Create navigation for previous/next navigation? (true/false)\n    prevText: \"Previous\", //String: Set the text for the \"previous\" directionNav item\n    nextText: \"Next\", //String: Set the text for the \"next\" directionNav item\n\n    // Secondary Navigation\n    keyboard: true, //Boolean: Allow slider navigating via keyboard left/right keys\n    multipleKeyboard: false, //{NEW} Boolean: Allow keyboard navigation to affect multiple sliders. Default behavior cuts out keyboard navigation with more than one slider present.\n    mousewheel: false, //{UPDATED} Boolean: Requires jquery.mousewheel.js (https://github.com/brandonaaron/jquery-mousewheel) - Allows slider navigating via mousewheel\n    pausePlay: false, //Boolean: Create pause/play dynamic element\n    pauseText: \"Pause\", //String: Set the text for the \"pause\" pausePlay item\n    playText: \"Play\", //String: Set the text for the \"play\" pausePlay item\n\n    // Special properties\n    controlsContainer: \"\", //{UPDATED} jQuery Object/Selector: Declare which container the navigation elements should be appended too. Default container is the FlexSlider element. Example use would be $(\".flexslider-container\"). Property is ignored if given element is not found.\n    manualControls: \"\", //{UPDATED} jQuery Object/Selector: Declare custom control navigation. Examples would be $(\".flex-control-nav li\") or \"#tabs-nav li img\", etc. The number of elements in your controlNav should match the number of slides/tabs.\n    sync: \"\", //{NEW} Selector: Mirror the actions performed on this slider with another slider. Use with care.\n    asNavFor: \"\", //{NEW} Selector: Internal property exposed for turning the slider into a thumbnail navigation for another slider\n\n    // Carousel Options\n    itemWidth: 0, //{NEW} Integer: Box-model width of individual carousel items, including horizontal borders and padding.\n    itemMargin: 0, //{NEW} Integer: Margin between carousel items.\n    minItems: 0, //{NEW} Integer: Minimum number of carousel items that should be visible. Items will resize fluidly when below this.\n    maxItems: 0, //{NEW} Integer: Maxmimum number of carousel items that should be visible. Items will resize fluidly when above this limit.\n    move: 0, //{NEW} Integer: Number of carousel items that should move on animation. If 0, slider will move all visible items.\n\n    // Callback API\n    start: function () {}, //Callback: function(slider) - Fires when the slider loads the first slide\n    before: function () {}, //Callback: function(slider) - Fires asynchronously with each slider animation\n    after: function () {}, //Callback: function(slider) - Fires after each slider animation completes\n    end: function () {}, //Callback: function(slider) - Fires when the slider reaches the last slide (asynchronous)\n    added: function () {}, //{NEW} Callback: function(slider) - Fires after a slide is added\n    removed: function () {}, //{NEW} Callback: function(slider) - Fires after a slide is removed\n  };\n\n  //FlexSlider: Plugin Function\n  $.fn.flexslider = function (options) {\n    if (options === undefined) options = {};\n\n    if (typeof options === \"object\") {\n      return this.each(function () {\n        var $this = $(this),\n          selector = options.selector ? options.selector : \".slides > li\",\n          $slides = $this.find(selector);\n\n        if ($slides.length === 1) {\n          $slides.fadeIn(400);\n          if (options.start) options.start($this);\n        } else if ($this.data(\"flexslider\") == undefined) {\n          new $.flexslider(this, options);\n        }\n      });\n    } else {\n      // Helper strings to quickly perform functions on the slider\n      var $slider = $(this).data(\"flexslider\");\n      switch (options) {\n        case \"play\":\n          $slider.play();\n          break;\n        case \"pause\":\n          $slider.pause();\n          break;\n        case \"next\":\n          $slider.flexAnimate($slider.getTarget(\"next\"), true);\n          break;\n        case \"prev\":\n        case \"previous\":\n          $slider.flexAnimate($slider.getTarget(\"prev\"), true);\n          break;\n        default:\n          if (typeof options === \"number\") $slider.flexAnimate(options, true);\n      }\n    }\n  };\n})(jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/jquery.js",
    "content": "/*! jQuery v1.9.1 | (c) 2005, 2012 jQuery Foundation, Inc. | jquery.org/license\n//@ sourceMappingURL=jquery.min.map\n*/ (function (a, b) {\n  function G(a) {\n    var b = (F[a] = {});\n    return (\n      p.each(a.split(s), function (a, c) {\n        b[c] = !0;\n      }),\n      b\n    );\n  }\n  function J(a, c, d) {\n    if (d === b && a.nodeType === 1) {\n      var e = \"data-\" + c.replace(I, \"-$1\").toLowerCase();\n      d = a.getAttribute(e);\n      if (typeof d == \"string\") {\n        try {\n          d =\n            d === \"true\"\n              ? !0\n              : d === \"false\"\n                ? !1\n                : d === \"null\"\n                  ? null\n                  : +d + \"\" === d\n                    ? +d\n                    : H.test(d)\n                      ? p.parseJSON(d)\n                      : d;\n        } catch (f) {}\n        p.data(a, c, d);\n      } else d = b;\n    }\n    return d;\n  }\n  function K(a) {\n    var b;\n    for (b in a) {\n      if (b === \"data\" && p.isEmptyObject(a[b])) continue;\n      if (b !== \"toJSON\") return !1;\n    }\n    return !0;\n  }\n  function ba() {\n    return !1;\n  }\n  function bb() {\n    return !0;\n  }\n  function bh(a) {\n    return !a || !a.parentNode || a.parentNode.nodeType === 11;\n  }\n  function bi(a, b) {\n    do a = a[b];\n    while (a && a.nodeType !== 1);\n    return a;\n  }\n  function bj(a, b, c) {\n    b = b || 0;\n    if (p.isFunction(b))\n      return p.grep(a, function (a, d) {\n        var e = !!b.call(a, d, a);\n        return e === c;\n      });\n    if (b.nodeType)\n      return p.grep(a, function (a, d) {\n        return (a === b) === c;\n      });\n    if (typeof b == \"string\") {\n      var d = p.grep(a, function (a) {\n        return a.nodeType === 1;\n      });\n      if (be.test(b)) return p.filter(b, d, !c);\n      b = p.filter(b, d);\n    }\n    return p.grep(a, function (a, d) {\n      return p.inArray(a, b) >= 0 === c;\n    });\n  }\n  function bk(a) {\n    var b = bl.split(\"|\"),\n      c = a.createDocumentFragment();\n    if (c.createElement) while (b.length) c.createElement(b.pop());\n    return c;\n  }\n  function bC(a, b) {\n    return (\n      a.getElementsByTagName(b)[0] ||\n      a.appendChild(a.ownerDocument.createElement(b))\n    );\n  }\n  function bD(a, b) {\n    if (b.nodeType !== 1 || !p.hasData(a)) return;\n    var c,\n      d,\n      e,\n      f = p._data(a),\n      g = p._data(b, f),\n      h = f.events;\n    if (h) {\n      delete g.handle, (g.events = {});\n      for (c in h)\n        for (d = 0, e = h[c].length; d < e; d++) p.event.add(b, c, h[c][d]);\n    }\n    g.data && (g.data = p.extend({}, g.data));\n  }\n  function bE(a, b) {\n    var c;\n    if (b.nodeType !== 1) return;\n    b.clearAttributes && b.clearAttributes(),\n      b.mergeAttributes && b.mergeAttributes(a),\n      (c = b.nodeName.toLowerCase()),\n      c === \"object\"\n        ? (b.parentNode && (b.outerHTML = a.outerHTML),\n          p.support.html5Clone &&\n            a.innerHTML &&\n            !p.trim(b.innerHTML) &&\n            (b.innerHTML = a.innerHTML))\n        : c === \"input\" && bv.test(a.type)\n          ? ((b.defaultChecked = b.checked = a.checked),\n            b.value !== a.value && (b.value = a.value))\n          : c === \"option\"\n            ? (b.selected = a.defaultSelected)\n            : c === \"input\" || c === \"textarea\"\n              ? (b.defaultValue = a.defaultValue)\n              : c === \"script\" && b.text !== a.text && (b.text = a.text),\n      b.removeAttribute(p.expando);\n  }\n  function bF(a) {\n    return typeof a.getElementsByTagName != \"undefined\"\n      ? a.getElementsByTagName(\"*\")\n      : typeof a.querySelectorAll != \"undefined\"\n        ? a.querySelectorAll(\"*\")\n        : [];\n  }\n  function bG(a) {\n    bv.test(a.type) && (a.defaultChecked = a.checked);\n  }\n  function bY(a, b) {\n    if (b in a) return b;\n    var c = b.charAt(0).toUpperCase() + b.slice(1),\n      d = b,\n      e = bW.length;\n    while (e--) {\n      b = bW[e] + c;\n      if (b in a) return b;\n    }\n    return d;\n  }\n  function bZ(a, b) {\n    return (\n      (a = b || a),\n      p.css(a, \"display\") === \"none\" || !p.contains(a.ownerDocument, a)\n    );\n  }\n  function b$(a, b) {\n    var c,\n      d,\n      e = [],\n      f = 0,\n      g = a.length;\n    for (; f < g; f++) {\n      c = a[f];\n      if (!c.style) continue;\n      (e[f] = p._data(c, \"olddisplay\")),\n        b\n          ? (!e[f] && c.style.display === \"none\" && (c.style.display = \"\"),\n            c.style.display === \"\" &&\n              bZ(c) &&\n              (e[f] = p._data(c, \"olddisplay\", cc(c.nodeName))))\n          : ((d = bH(c, \"display\")),\n            !e[f] && d !== \"none\" && p._data(c, \"olddisplay\", d));\n    }\n    for (f = 0; f < g; f++) {\n      c = a[f];\n      if (!c.style) continue;\n      if (!b || c.style.display === \"none\" || c.style.display === \"\")\n        c.style.display = b ? e[f] || \"\" : \"none\";\n    }\n    return a;\n  }\n  function b_(a, b, c) {\n    var d = bP.exec(b);\n    return d ? Math.max(0, d[1] - (c || 0)) + (d[2] || \"px\") : b;\n  }\n  function ca(a, b, c, d) {\n    var e = c === (d ? \"border\" : \"content\") ? 4 : b === \"width\" ? 1 : 0,\n      f = 0;\n    for (; e < 4; e += 2)\n      c === \"margin\" && (f += p.css(a, c + bV[e], !0)),\n        d\n          ? (c === \"content\" &&\n              (f -= parseFloat(bH(a, \"padding\" + bV[e])) || 0),\n            c !== \"margin\" &&\n              (f -= parseFloat(bH(a, \"border\" + bV[e] + \"Width\")) || 0))\n          : ((f += parseFloat(bH(a, \"padding\" + bV[e])) || 0),\n            c !== \"padding\" &&\n              (f += parseFloat(bH(a, \"border\" + bV[e] + \"Width\")) || 0));\n    return f;\n  }\n  function cb(a, b, c) {\n    var d = b === \"width\" ? a.offsetWidth : a.offsetHeight,\n      e = !0,\n      f = p.support.boxSizing && p.css(a, \"boxSizing\") === \"border-box\";\n    if (d <= 0 || d == null) {\n      d = bH(a, b);\n      if (d < 0 || d == null) d = a.style[b];\n      if (bQ.test(d)) return d;\n      (e = f && (p.support.boxSizingReliable || d === a.style[b])),\n        (d = parseFloat(d) || 0);\n    }\n    return d + ca(a, b, c || (f ? \"border\" : \"content\"), e) + \"px\";\n  }\n  function cc(a) {\n    if (bS[a]) return bS[a];\n    var b = p(\"<\" + a + \">\").appendTo(e.body),\n      c = b.css(\"display\");\n    b.remove();\n    if (c === \"none\" || c === \"\") {\n      bI = e.body.appendChild(\n        bI ||\n          p.extend(e.createElement(\"iframe\"), {\n            frameBorder: 0,\n            width: 0,\n            height: 0,\n          }),\n      );\n      if (!bJ || !bI.createElement)\n        (bJ = (bI.contentWindow || bI.contentDocument).document),\n          bJ.write(\"<!doctype html><html><body>\"),\n          bJ.close();\n      (b = bJ.body.appendChild(bJ.createElement(a))),\n        (c = bH(b, \"display\")),\n        e.body.removeChild(bI);\n    }\n    return (bS[a] = c), c;\n  }\n  function ci(a, b, c, d) {\n    var e;\n    if (p.isArray(b))\n      p.each(b, function (b, e) {\n        c || ce.test(a)\n          ? d(a, e)\n          : ci(a + \"[\" + (typeof e == \"object\" ? b : \"\") + \"]\", e, c, d);\n      });\n    else if (!c && p.type(b) === \"object\")\n      for (e in b) ci(a + \"[\" + e + \"]\", b[e], c, d);\n    else d(a, b);\n  }\n  function cz(a) {\n    return function (b, c) {\n      typeof b != \"string\" && ((c = b), (b = \"*\"));\n      var d,\n        e,\n        f,\n        g = b.toLowerCase().split(s),\n        h = 0,\n        i = g.length;\n      if (p.isFunction(c))\n        for (; h < i; h++)\n          (d = g[h]),\n            (f = /^\\+/.test(d)),\n            f && (d = d.substr(1) || \"*\"),\n            (e = a[d] = a[d] || []),\n            e[f ? \"unshift\" : \"push\"](c);\n    };\n  }\n  function cA(a, c, d, e, f, g) {\n    (f = f || c.dataTypes[0]), (g = g || {}), (g[f] = !0);\n    var h,\n      i = a[f],\n      j = 0,\n      k = i ? i.length : 0,\n      l = a === cv;\n    for (; j < k && (l || !h); j++)\n      (h = i[j](c, d, e)),\n        typeof h == \"string\" &&\n          (!l || g[h]\n            ? (h = b)\n            : (c.dataTypes.unshift(h), (h = cA(a, c, d, e, h, g))));\n    return (l || !h) && !g[\"*\"] && (h = cA(a, c, d, e, \"*\", g)), h;\n  }\n  function cB(a, c) {\n    var d,\n      e,\n      f = p.ajaxSettings.flatOptions || {};\n    for (d in c) c[d] !== b && ((f[d] ? a : e || (e = {}))[d] = c[d]);\n    e && p.extend(!0, a, e);\n  }\n  function cC(a, c, d) {\n    var e,\n      f,\n      g,\n      h,\n      i = a.contents,\n      j = a.dataTypes,\n      k = a.responseFields;\n    for (f in k) f in d && (c[k[f]] = d[f]);\n    while (j[0] === \"*\")\n      j.shift(),\n        e === b && (e = a.mimeType || c.getResponseHeader(\"content-type\"));\n    if (e)\n      for (f in i)\n        if (i[f] && i[f].test(e)) {\n          j.unshift(f);\n          break;\n        }\n    if (j[0] in d) g = j[0];\n    else {\n      for (f in d) {\n        if (!j[0] || a.converters[f + \" \" + j[0]]) {\n          g = f;\n          break;\n        }\n        h || (h = f);\n      }\n      g = g || h;\n    }\n    if (g) return g !== j[0] && j.unshift(g), d[g];\n  }\n  function cD(a, b) {\n    var c,\n      d,\n      e,\n      f,\n      g = a.dataTypes.slice(),\n      h = g[0],\n      i = {},\n      j = 0;\n    a.dataFilter && (b = a.dataFilter(b, a.dataType));\n    if (g[1]) for (c in a.converters) i[c.toLowerCase()] = a.converters[c];\n    for (; (e = g[++j]); )\n      if (e !== \"*\") {\n        if (h !== \"*\" && h !== e) {\n          c = i[h + \" \" + e] || i[\"* \" + e];\n          if (!c)\n            for (d in i) {\n              f = d.split(\" \");\n              if (f[1] === e) {\n                c = i[h + \" \" + f[0]] || i[\"* \" + f[0]];\n                if (c) {\n                  c === !0\n                    ? (c = i[d])\n                    : i[d] !== !0 && ((e = f[0]), g.splice(j--, 0, e));\n                  break;\n                }\n              }\n            }\n          if (c !== !0)\n            if (c && a[\"throws\"]) b = c(b);\n            else\n              try {\n                b = c(b);\n              } catch (k) {\n                return {\n                  state: \"parsererror\",\n                  error: c ? k : \"No conversion from \" + h + \" to \" + e,\n                };\n              }\n        }\n        h = e;\n      }\n    return { state: \"success\", data: b };\n  }\n  function cL() {\n    try {\n      return new a.XMLHttpRequest();\n    } catch (b) {}\n  }\n  function cM() {\n    try {\n      return new a.ActiveXObject(\"Microsoft.XMLHTTP\");\n    } catch (b) {}\n  }\n  function cU() {\n    return (\n      setTimeout(function () {\n        cN = b;\n      }, 0),\n      (cN = p.now())\n    );\n  }\n  function cV(a, b) {\n    p.each(b, function (b, c) {\n      var d = (cT[b] || []).concat(cT[\"*\"]),\n        e = 0,\n        f = d.length;\n      for (; e < f; e++) if (d[e].call(a, b, c)) return;\n    });\n  }\n  function cW(a, b, c) {\n    var d,\n      e = 0,\n      f = 0,\n      g = cS.length,\n      h = p.Deferred().always(function () {\n        delete i.elem;\n      }),\n      i = function () {\n        var b = cN || cU(),\n          c = Math.max(0, j.startTime + j.duration - b),\n          d = 1 - (c / j.duration || 0),\n          e = 0,\n          f = j.tweens.length;\n        for (; e < f; e++) j.tweens[e].run(d);\n        return (\n          h.notifyWith(a, [j, d, c]),\n          d < 1 && f ? c : (h.resolveWith(a, [j]), !1)\n        );\n      },\n      j = h.promise({\n        elem: a,\n        props: p.extend({}, b),\n        opts: p.extend(!0, { specialEasing: {} }, c),\n        originalProperties: b,\n        originalOptions: c,\n        startTime: cN || cU(),\n        duration: c.duration,\n        tweens: [],\n        createTween: function (b, c, d) {\n          var e = p.Tween(\n            a,\n            j.opts,\n            b,\n            c,\n            j.opts.specialEasing[b] || j.opts.easing,\n          );\n          return j.tweens.push(e), e;\n        },\n        stop: function (b) {\n          var c = 0,\n            d = b ? j.tweens.length : 0;\n          for (; c < d; c++) j.tweens[c].run(1);\n          return b ? h.resolveWith(a, [j, b]) : h.rejectWith(a, [j, b]), this;\n        },\n      }),\n      k = j.props;\n    cX(k, j.opts.specialEasing);\n    for (; e < g; e++) {\n      d = cS[e].call(j, a, k, j.opts);\n      if (d) return d;\n    }\n    return (\n      cV(j, k),\n      p.isFunction(j.opts.start) && j.opts.start.call(a, j),\n      p.fx.timer(p.extend(i, { anim: j, queue: j.opts.queue, elem: a })),\n      j\n        .progress(j.opts.progress)\n        .done(j.opts.done, j.opts.complete)\n        .fail(j.opts.fail)\n        .always(j.opts.always)\n    );\n  }\n  function cX(a, b) {\n    var c, d, e, f, g;\n    for (c in a) {\n      (d = p.camelCase(c)),\n        (e = b[d]),\n        (f = a[c]),\n        p.isArray(f) && ((e = f[1]), (f = a[c] = f[0])),\n        c !== d && ((a[d] = f), delete a[c]),\n        (g = p.cssHooks[d]);\n      if (g && \"expand\" in g) {\n        (f = g.expand(f)), delete a[d];\n        for (c in f) c in a || ((a[c] = f[c]), (b[c] = e));\n      } else b[d] = e;\n    }\n  }\n  function cY(a, b, c) {\n    var d,\n      e,\n      f,\n      g,\n      h,\n      i,\n      j,\n      k,\n      l = this,\n      m = a.style,\n      n = {},\n      o = [],\n      q = a.nodeType && bZ(a);\n    c.queue ||\n      ((j = p._queueHooks(a, \"fx\")),\n      j.unqueued == null &&\n        ((j.unqueued = 0),\n        (k = j.empty.fire),\n        (j.empty.fire = function () {\n          j.unqueued || k();\n        })),\n      j.unqueued++,\n      l.always(function () {\n        l.always(function () {\n          j.unqueued--, p.queue(a, \"fx\").length || j.empty.fire();\n        });\n      })),\n      a.nodeType === 1 &&\n        (\"height\" in b || \"width\" in b) &&\n        ((c.overflow = [m.overflow, m.overflowX, m.overflowY]),\n        p.css(a, \"display\") === \"inline\" &&\n          p.css(a, \"float\") === \"none\" &&\n          (!p.support.inlineBlockNeedsLayout || cc(a.nodeName) === \"inline\"\n            ? (m.display = \"inline-block\")\n            : (m.zoom = 1))),\n      c.overflow &&\n        ((m.overflow = \"hidden\"),\n        p.support.shrinkWrapBlocks ||\n          l.done(function () {\n            (m.overflow = c.overflow[0]),\n              (m.overflowX = c.overflow[1]),\n              (m.overflowY = c.overflow[2]);\n          }));\n    for (d in b) {\n      f = b[d];\n      if (cP.exec(f)) {\n        delete b[d];\n        if (f === (q ? \"hide\" : \"show\")) continue;\n        o.push(d);\n      }\n    }\n    g = o.length;\n    if (g) {\n      (h = p._data(a, \"fxshow\") || p._data(a, \"fxshow\", {})),\n        q\n          ? p(a).show()\n          : l.done(function () {\n              p(a).hide();\n            }),\n        l.done(function () {\n          var b;\n          p.removeData(a, \"fxshow\", !0);\n          for (b in n) p.style(a, b, n[b]);\n        });\n      for (d = 0; d < g; d++)\n        (e = o[d]),\n          (i = l.createTween(e, q ? h[e] : 0)),\n          (n[e] = h[e] || p.style(a, e)),\n          e in h ||\n            ((h[e] = i.start),\n            q &&\n              ((i.end = i.start),\n              (i.start = e === \"width\" || e === \"height\" ? 1 : 0)));\n    }\n  }\n  function cZ(a, b, c, d, e) {\n    return new cZ.prototype.init(a, b, c, d, e);\n  }\n  function c$(a, b) {\n    var c,\n      d = { height: a },\n      e = 0;\n    b = b ? 1 : 0;\n    for (; e < 4; e += 2 - b)\n      (c = bV[e]), (d[\"margin\" + c] = d[\"padding\" + c] = a);\n    return b && (d.opacity = d.width = a), d;\n  }\n  function da(a) {\n    return p.isWindow(a)\n      ? a\n      : a.nodeType === 9\n        ? a.defaultView || a.parentWindow\n        : !1;\n  }\n  var c,\n    d,\n    e = a.document,\n    f = a.location,\n    g = a.navigator,\n    h = a.jQuery,\n    i = a.$,\n    j = Array.prototype.push,\n    k = Array.prototype.slice,\n    l = Array.prototype.indexOf,\n    m = Object.prototype.toString,\n    n = Object.prototype.hasOwnProperty,\n    o = String.prototype.trim,\n    p = function (a, b) {\n      return new p.fn.init(a, b, c);\n    },\n    q = /[\\-+]?(?:\\d*\\.|)\\d+(?:[eE][\\-+]?\\d+|)/.source,\n    r = /\\S/,\n    s = /\\s+/,\n    t = /^[\\s\\uFEFF\\xA0]+|[\\s\\uFEFF\\xA0]+$/g,\n    u = /^(?:[^#<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)/,\n    v = /^<(\\w+)\\s*\\/?>(?:<\\/\\1>|)$/,\n    w = /^[\\],:{}\\s]*$/,\n    x = /(?:^|:|,)(?:\\s*\\[)+/g,\n    y = /\\\\(?:[\"\\\\\\/bfnrt]|u[\\da-fA-F]{4})/g,\n    z = /\"[^\"\\\\\\r\\n]*\"|true|false|null|-?(?:\\d\\d*\\.|)\\d+(?:[eE][\\-+]?\\d+|)/g,\n    A = /^-ms-/,\n    B = /-([\\da-z])/gi,\n    C = function (a, b) {\n      return (b + \"\").toUpperCase();\n    },\n    D = function () {\n      e.addEventListener\n        ? (e.removeEventListener(\"DOMContentLoaded\", D, !1), p.ready())\n        : e.readyState === \"complete\" &&\n          (e.detachEvent(\"onreadystatechange\", D), p.ready());\n    },\n    E = {};\n  (p.fn = p.prototype =\n    {\n      constructor: p,\n      init: function (a, c, d) {\n        var f, g, h, i;\n        if (!a) return this;\n        if (a.nodeType)\n          return (this.context = this[0] = a), (this.length = 1), this;\n        if (typeof a == \"string\") {\n          a.charAt(0) === \"<\" && a.charAt(a.length - 1) === \">\" && a.length >= 3\n            ? (f = [null, a, null])\n            : (f = u.exec(a));\n          if (f && (f[1] || !c)) {\n            if (f[1])\n              return (\n                (c = c instanceof p ? c[0] : c),\n                (i = c && c.nodeType ? c.ownerDocument || c : e),\n                (a = p.parseHTML(f[1], i, !0)),\n                v.test(f[1]) && p.isPlainObject(c) && this.attr.call(a, c, !0),\n                p.merge(this, a)\n              );\n            g = e.getElementById(f[2]);\n            if (g && g.parentNode) {\n              if (g.id !== f[2]) return d.find(a);\n              (this.length = 1), (this[0] = g);\n            }\n            return (this.context = e), (this.selector = a), this;\n          }\n          return !c || c.jquery\n            ? (c || d).find(a)\n            : this.constructor(c).find(a);\n        }\n        return p.isFunction(a)\n          ? d.ready(a)\n          : (a.selector !== b &&\n              ((this.selector = a.selector), (this.context = a.context)),\n            p.makeArray(a, this));\n      },\n      selector: \"\",\n      jquery: \"1.8.1\",\n      length: 0,\n      size: function () {\n        return this.length;\n      },\n      toArray: function () {\n        return k.call(this);\n      },\n      get: function (a) {\n        return a == null\n          ? this.toArray()\n          : a < 0\n            ? this[this.length + a]\n            : this[a];\n      },\n      pushStack: function (a, b, c) {\n        var d = p.merge(this.constructor(), a);\n        return (\n          (d.prevObject = this),\n          (d.context = this.context),\n          b === \"find\"\n            ? (d.selector = this.selector + (this.selector ? \" \" : \"\") + c)\n            : b && (d.selector = this.selector + \".\" + b + \"(\" + c + \")\"),\n          d\n        );\n      },\n      each: function (a, b) {\n        return p.each(this, a, b);\n      },\n      ready: function (a) {\n        return p.ready.promise().done(a), this;\n      },\n      eq: function (a) {\n        return (a = +a), a === -1 ? this.slice(a) : this.slice(a, a + 1);\n      },\n      first: function () {\n        return this.eq(0);\n      },\n      last: function () {\n        return this.eq(-1);\n      },\n      slice: function () {\n        return this.pushStack(\n          k.apply(this, arguments),\n          \"slice\",\n          k.call(arguments).join(\",\"),\n        );\n      },\n      map: function (a) {\n        return this.pushStack(\n          p.map(this, function (b, c) {\n            return a.call(b, c, b);\n          }),\n        );\n      },\n      end: function () {\n        return this.prevObject || this.constructor(null);\n      },\n      push: j,\n      sort: [].sort,\n      splice: [].splice,\n    }),\n    (p.fn.init.prototype = p.fn),\n    (p.extend = p.fn.extend =\n      function () {\n        var a,\n          c,\n          d,\n          e,\n          f,\n          g,\n          h = arguments[0] || {},\n          i = 1,\n          j = arguments.length,\n          k = !1;\n        typeof h == \"boolean\" && ((k = h), (h = arguments[1] || {}), (i = 2)),\n          typeof h != \"object\" && !p.isFunction(h) && (h = {}),\n          j === i && ((h = this), --i);\n        for (; i < j; i++)\n          if ((a = arguments[i]) != null)\n            for (c in a) {\n              (d = h[c]), (e = a[c]);\n              if (h === e) continue;\n              k && e && (p.isPlainObject(e) || (f = p.isArray(e)))\n                ? (f\n                    ? ((f = !1), (g = d && p.isArray(d) ? d : []))\n                    : (g = d && p.isPlainObject(d) ? d : {}),\n                  (h[c] = p.extend(k, g, e)))\n                : e !== b && (h[c] = e);\n            }\n        return h;\n      }),\n    p.extend({\n      noConflict: function (b) {\n        return a.$ === p && (a.$ = i), b && a.jQuery === p && (a.jQuery = h), p;\n      },\n      isReady: !1,\n      readyWait: 1,\n      holdReady: function (a) {\n        a ? p.readyWait++ : p.ready(!0);\n      },\n      ready: function (a) {\n        if (a === !0 ? --p.readyWait : p.isReady) return;\n        if (!e.body) return setTimeout(p.ready, 1);\n        p.isReady = !0;\n        if (a !== !0 && --p.readyWait > 0) return;\n        d.resolveWith(e, [p]),\n          p.fn.trigger && p(e).trigger(\"ready\").off(\"ready\");\n      },\n      isFunction: function (a) {\n        return p.type(a) === \"function\";\n      },\n      isArray:\n        Array.isArray ||\n        function (a) {\n          return p.type(a) === \"array\";\n        },\n      isWindow: function (a) {\n        return a != null && a == a.window;\n      },\n      isNumeric: function (a) {\n        return !isNaN(parseFloat(a)) && isFinite(a);\n      },\n      type: function (a) {\n        return a == null ? String(a) : E[m.call(a)] || \"object\";\n      },\n      isPlainObject: function (a) {\n        if (!a || p.type(a) !== \"object\" || a.nodeType || p.isWindow(a))\n          return !1;\n        try {\n          if (\n            a.constructor &&\n            !n.call(a, \"constructor\") &&\n            !n.call(a.constructor.prototype, \"isPrototypeOf\")\n          )\n            return !1;\n        } catch (c) {\n          return !1;\n        }\n        var d;\n        for (d in a);\n        return d === b || n.call(a, d);\n      },\n      isEmptyObject: function (a) {\n        var b;\n        for (b in a) return !1;\n        return !0;\n      },\n      error: function (a) {\n        throw new Error(a);\n      },\n      parseHTML: function (a, b, c) {\n        var d;\n        return !a || typeof a != \"string\"\n          ? null\n          : (typeof b == \"boolean\" && ((c = b), (b = 0)),\n            (b = b || e),\n            (d = v.exec(a))\n              ? [b.createElement(d[1])]\n              : ((d = p.buildFragment([a], b, c ? null : [])),\n                p.merge(\n                  [],\n                  (d.cacheable ? p.clone(d.fragment) : d.fragment).childNodes,\n                )));\n      },\n      parseJSON: function (b) {\n        if (!b || typeof b != \"string\") return null;\n        b = p.trim(b);\n        if (a.JSON && a.JSON.parse) return a.JSON.parse(b);\n        if (w.test(b.replace(y, \"@\").replace(z, \"]\").replace(x, \"\")))\n          return new Function(\"return \" + b)();\n        p.error(\"Invalid JSON: \" + b);\n      },\n      parseXML: function (c) {\n        var d, e;\n        if (!c || typeof c != \"string\") return null;\n        try {\n          a.DOMParser\n            ? ((e = new DOMParser()), (d = e.parseFromString(c, \"text/xml\")))\n            : ((d = new ActiveXObject(\"Microsoft.XMLDOM\")),\n              (d.async = \"false\"),\n              d.loadXML(c));\n        } catch (f) {\n          d = b;\n        }\n        return (\n          (!d ||\n            !d.documentElement ||\n            d.getElementsByTagName(\"parsererror\").length) &&\n            p.error(\"Invalid XML: \" + c),\n          d\n        );\n      },\n      noop: function () {},\n      globalEval: function (b) {\n        b &&\n          r.test(b) &&\n          (\n            a.execScript ||\n            function (b) {\n              a.eval.call(a, b);\n            }\n          )(b);\n      },\n      camelCase: function (a) {\n        return a.replace(A, \"ms-\").replace(B, C);\n      },\n      nodeName: function (a, b) {\n        return a.nodeName && a.nodeName.toUpperCase() === b.toUpperCase();\n      },\n      each: function (a, c, d) {\n        var e,\n          f = 0,\n          g = a.length,\n          h = g === b || p.isFunction(a);\n        if (d) {\n          if (h) {\n            for (e in a) if (c.apply(a[e], d) === !1) break;\n          } else for (; f < g; ) if (c.apply(a[f++], d) === !1) break;\n        } else if (h) {\n          for (e in a) if (c.call(a[e], e, a[e]) === !1) break;\n        } else for (; f < g; ) if (c.call(a[f], f, a[f++]) === !1) break;\n        return a;\n      },\n      trim:\n        o && !o.call(\"﻿ \")\n          ? function (a) {\n              return a == null ? \"\" : o.call(a);\n            }\n          : function (a) {\n              return a == null ? \"\" : a.toString().replace(t, \"\");\n            },\n      makeArray: function (a, b) {\n        var c,\n          d = b || [];\n        return (\n          a != null &&\n            ((c = p.type(a)),\n            a.length == null ||\n            c === \"string\" ||\n            c === \"function\" ||\n            c === \"regexp\" ||\n            p.isWindow(a)\n              ? j.call(d, a)\n              : p.merge(d, a)),\n          d\n        );\n      },\n      inArray: function (a, b, c) {\n        var d;\n        if (b) {\n          if (l) return l.call(b, a, c);\n          (d = b.length), (c = c ? (c < 0 ? Math.max(0, d + c) : c) : 0);\n          for (; c < d; c++) if (c in b && b[c] === a) return c;\n        }\n        return -1;\n      },\n      merge: function (a, c) {\n        var d = c.length,\n          e = a.length,\n          f = 0;\n        if (typeof d == \"number\") for (; f < d; f++) a[e++] = c[f];\n        else while (c[f] !== b) a[e++] = c[f++];\n        return (a.length = e), a;\n      },\n      grep: function (a, b, c) {\n        var d,\n          e = [],\n          f = 0,\n          g = a.length;\n        c = !!c;\n        for (; f < g; f++) (d = !!b(a[f], f)), c !== d && e.push(a[f]);\n        return e;\n      },\n      map: function (a, c, d) {\n        var e,\n          f,\n          g = [],\n          h = 0,\n          i = a.length,\n          j =\n            a instanceof p ||\n            (i !== b &&\n              typeof i == \"number\" &&\n              ((i > 0 && a[0] && a[i - 1]) || i === 0 || p.isArray(a)));\n        if (j)\n          for (; h < i; h++)\n            (e = c(a[h], h, d)), e != null && (g[g.length] = e);\n        else for (f in a) (e = c(a[f], f, d)), e != null && (g[g.length] = e);\n        return g.concat.apply([], g);\n      },\n      guid: 1,\n      proxy: function (a, c) {\n        var d, e, f;\n        return (\n          typeof c == \"string\" && ((d = a[c]), (c = a), (a = d)),\n          p.isFunction(a)\n            ? ((e = k.call(arguments, 2)),\n              (f = function () {\n                return a.apply(c, e.concat(k.call(arguments)));\n              }),\n              (f.guid = a.guid = a.guid || f.guid || p.guid++),\n              f)\n            : b\n        );\n      },\n      access: function (a, c, d, e, f, g, h) {\n        var i,\n          j = d == null,\n          k = 0,\n          l = a.length;\n        if (d && typeof d == \"object\") {\n          for (k in d) p.access(a, c, k, d[k], 1, g, e);\n          f = 1;\n        } else if (e !== b) {\n          (i = h === b && p.isFunction(e)),\n            j &&\n              (i\n                ? ((i = c),\n                  (c = function (a, b, c) {\n                    return i.call(p(a), c);\n                  }))\n                : (c.call(a, e), (c = null)));\n          if (c)\n            for (; k < l; k++)\n              c(a[k], d, i ? e.call(a[k], k, c(a[k], d)) : e, h);\n          f = 1;\n        }\n        return f ? a : j ? c.call(a) : l ? c(a[0], d) : g;\n      },\n      now: function () {\n        return new Date().getTime();\n      },\n    }),\n    (p.ready.promise = function (b) {\n      if (!d) {\n        d = p.Deferred();\n        if (e.readyState === \"complete\") setTimeout(p.ready, 1);\n        else if (e.addEventListener)\n          e.addEventListener(\"DOMContentLoaded\", D, !1),\n            a.addEventListener(\"load\", p.ready, !1);\n        else {\n          e.attachEvent(\"onreadystatechange\", D),\n            a.attachEvent(\"onload\", p.ready);\n          var c = !1;\n          try {\n            c = a.frameElement == null && e.documentElement;\n          } catch (f) {}\n          c &&\n            c.doScroll &&\n            (function g() {\n              if (!p.isReady) {\n                try {\n                  c.doScroll(\"left\");\n                } catch (a) {\n                  return setTimeout(g, 50);\n                }\n                p.ready();\n              }\n            })();\n        }\n      }\n      return d.promise(b);\n    }),\n    p.each(\n      \"Boolean Number String Function Array Date RegExp Object\".split(\" \"),\n      function (a, b) {\n        E[\"[object \" + b + \"]\"] = b.toLowerCase();\n      },\n    ),\n    (c = p(e));\n  var F = {};\n  (p.Callbacks = function (a) {\n    a = typeof a == \"string\" ? F[a] || G(a) : p.extend({}, a);\n    var c,\n      d,\n      e,\n      f,\n      g,\n      h,\n      i = [],\n      j = !a.once && [],\n      k = function (b) {\n        (c = a.memory && b),\n          (d = !0),\n          (h = f || 0),\n          (f = 0),\n          (g = i.length),\n          (e = !0);\n        for (; i && h < g; h++)\n          if (i[h].apply(b[0], b[1]) === !1 && a.stopOnFalse) {\n            c = !1;\n            break;\n          }\n        (e = !1),\n          i && (j ? j.length && k(j.shift()) : c ? (i = []) : l.disable());\n      },\n      l = {\n        add: function () {\n          if (i) {\n            var b = i.length;\n            (function d(b) {\n              p.each(b, function (b, c) {\n                var e = p.type(c);\n                e === \"function\" && (!a.unique || !l.has(c))\n                  ? i.push(c)\n                  : c && c.length && e !== \"string\" && d(c);\n              });\n            })(arguments),\n              e ? (g = i.length) : c && ((f = b), k(c));\n          }\n          return this;\n        },\n        remove: function () {\n          return (\n            i &&\n              p.each(arguments, function (a, b) {\n                var c;\n                while ((c = p.inArray(b, i, c)) > -1)\n                  i.splice(c, 1), e && (c <= g && g--, c <= h && h--);\n              }),\n            this\n          );\n        },\n        has: function (a) {\n          return p.inArray(a, i) > -1;\n        },\n        empty: function () {\n          return (i = []), this;\n        },\n        disable: function () {\n          return (i = j = c = b), this;\n        },\n        disabled: function () {\n          return !i;\n        },\n        lock: function () {\n          return (j = b), c || l.disable(), this;\n        },\n        locked: function () {\n          return !j;\n        },\n        fireWith: function (a, b) {\n          return (\n            (b = b || []),\n            (b = [a, b.slice ? b.slice() : b]),\n            i && (!d || j) && (e ? j.push(b) : k(b)),\n            this\n          );\n        },\n        fire: function () {\n          return l.fireWith(this, arguments), this;\n        },\n        fired: function () {\n          return !!d;\n        },\n      };\n    return l;\n  }),\n    p.extend({\n      Deferred: function (a) {\n        var b = [\n            [\"resolve\", \"done\", p.Callbacks(\"once memory\"), \"resolved\"],\n            [\"reject\", \"fail\", p.Callbacks(\"once memory\"), \"rejected\"],\n            [\"notify\", \"progress\", p.Callbacks(\"memory\")],\n          ],\n          c = \"pending\",\n          d = {\n            state: function () {\n              return c;\n            },\n            always: function () {\n              return e.done(arguments).fail(arguments), this;\n            },\n            then: function () {\n              var a = arguments;\n              return p\n                .Deferred(function (c) {\n                  p.each(b, function (b, d) {\n                    var f = d[0],\n                      g = a[b];\n                    e[d[1]](\n                      p.isFunction(g)\n                        ? function () {\n                            var a = g.apply(this, arguments);\n                            a && p.isFunction(a.promise)\n                              ? a\n                                  .promise()\n                                  .done(c.resolve)\n                                  .fail(c.reject)\n                                  .progress(c.notify)\n                              : c[f + \"With\"](this === e ? c : this, [a]);\n                          }\n                        : c[f],\n                    );\n                  }),\n                    (a = null);\n                })\n                .promise();\n            },\n            promise: function (a) {\n              return typeof a == \"object\" ? p.extend(a, d) : d;\n            },\n          },\n          e = {};\n        return (\n          (d.pipe = d.then),\n          p.each(b, function (a, f) {\n            var g = f[2],\n              h = f[3];\n            (d[f[1]] = g.add),\n              h &&\n                g.add(\n                  function () {\n                    c = h;\n                  },\n                  b[a ^ 1][2].disable,\n                  b[2][2].lock,\n                ),\n              (e[f[0]] = g.fire),\n              (e[f[0] + \"With\"] = g.fireWith);\n          }),\n          d.promise(e),\n          a && a.call(e, e),\n          e\n        );\n      },\n      when: function (a) {\n        var b = 0,\n          c = k.call(arguments),\n          d = c.length,\n          e = d !== 1 || (a && p.isFunction(a.promise)) ? d : 0,\n          f = e === 1 ? a : p.Deferred(),\n          g = function (a, b, c) {\n            return function (d) {\n              (b[a] = this),\n                (c[a] = arguments.length > 1 ? k.call(arguments) : d),\n                c === h ? f.notifyWith(b, c) : --e || f.resolveWith(b, c);\n            };\n          },\n          h,\n          i,\n          j;\n        if (d > 1) {\n          (h = new Array(d)), (i = new Array(d)), (j = new Array(d));\n          for (; b < d; b++)\n            c[b] && p.isFunction(c[b].promise)\n              ? c[b]\n                  .promise()\n                  .done(g(b, j, c))\n                  .fail(f.reject)\n                  .progress(g(b, i, h))\n              : --e;\n        }\n        return e || f.resolveWith(j, c), f.promise();\n      },\n    }),\n    (p.support = (function () {\n      var b,\n        c,\n        d,\n        f,\n        g,\n        h,\n        i,\n        j,\n        k,\n        l,\n        m,\n        n = e.createElement(\"div\");\n      n.setAttribute(\"className\", \"t\"),\n        (n.innerHTML =\n          \"  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>\"),\n        (c = n.getElementsByTagName(\"*\")),\n        (d = n.getElementsByTagName(\"a\")[0]),\n        (d.style.cssText = \"top:1px;float:left;opacity:.5\");\n      if (!c || !c.length || !d) return {};\n      (f = e.createElement(\"select\")),\n        (g = f.appendChild(e.createElement(\"option\"))),\n        (h = n.getElementsByTagName(\"input\")[0]),\n        (b = {\n          leadingWhitespace: n.firstChild.nodeType === 3,\n          tbody: !n.getElementsByTagName(\"tbody\").length,\n          htmlSerialize: !!n.getElementsByTagName(\"link\").length,\n          style: /top/.test(d.getAttribute(\"style\")),\n          hrefNormalized: d.getAttribute(\"href\") === \"/a\",\n          opacity: /^0.5/.test(d.style.opacity),\n          cssFloat: !!d.style.cssFloat,\n          checkOn: h.value === \"on\",\n          optSelected: g.selected,\n          getSetAttribute: n.className !== \"t\",\n          enctype: !!e.createElement(\"form\").enctype,\n          html5Clone:\n            e.createElement(\"nav\").cloneNode(!0).outerHTML !== \"<:nav></:nav>\",\n          boxModel: e.compatMode === \"CSS1Compat\",\n          submitBubbles: !0,\n          changeBubbles: !0,\n          focusinBubbles: !1,\n          deleteExpando: !0,\n          noCloneEvent: !0,\n          inlineBlockNeedsLayout: !1,\n          shrinkWrapBlocks: !1,\n          reliableMarginRight: !0,\n          boxSizingReliable: !0,\n          pixelPosition: !1,\n        }),\n        (h.checked = !0),\n        (b.noCloneChecked = h.cloneNode(!0).checked),\n        (f.disabled = !0),\n        (b.optDisabled = !g.disabled);\n      try {\n        delete n.test;\n      } catch (o) {\n        b.deleteExpando = !1;\n      }\n      !n.addEventListener &&\n        n.attachEvent &&\n        n.fireEvent &&\n        (n.attachEvent(\n          \"onclick\",\n          (m = function () {\n            b.noCloneEvent = !1;\n          }),\n        ),\n        n.cloneNode(!0).fireEvent(\"onclick\"),\n        n.detachEvent(\"onclick\", m)),\n        (h = e.createElement(\"input\")),\n        (h.value = \"t\"),\n        h.setAttribute(\"type\", \"radio\"),\n        (b.radioValue = h.value === \"t\"),\n        h.setAttribute(\"checked\", \"checked\"),\n        h.setAttribute(\"name\", \"t\"),\n        n.appendChild(h),\n        (i = e.createDocumentFragment()),\n        i.appendChild(n.lastChild),\n        (b.checkClone = i.cloneNode(!0).cloneNode(!0).lastChild.checked),\n        (b.appendChecked = h.checked),\n        i.removeChild(h),\n        i.appendChild(n);\n      if (n.attachEvent)\n        for (k in { submit: !0, change: !0, focusin: !0 })\n          (j = \"on\" + k),\n            (l = j in n),\n            l ||\n              (n.setAttribute(j, \"return;\"), (l = typeof n[j] == \"function\")),\n            (b[k + \"Bubbles\"] = l);\n      return (\n        p(function () {\n          var c,\n            d,\n            f,\n            g,\n            h = \"padding:0;margin:0;border:0;display:block;overflow:hidden;\",\n            i = e.getElementsByTagName(\"body\")[0];\n          if (!i) return;\n          (c = e.createElement(\"div\")),\n            (c.style.cssText =\n              \"visibility:hidden;border:0;width:0;height:0;position:static;top:0;margin-top:1px\"),\n            i.insertBefore(c, i.firstChild),\n            (d = e.createElement(\"div\")),\n            c.appendChild(d),\n            (d.innerHTML = \"<table><tr><td></td><td>t</td></tr></table>\"),\n            (f = d.getElementsByTagName(\"td\")),\n            (f[0].style.cssText = \"padding:0;margin:0;border:0;display:none\"),\n            (l = f[0].offsetHeight === 0),\n            (f[0].style.display = \"\"),\n            (f[1].style.display = \"none\"),\n            (b.reliableHiddenOffsets = l && f[0].offsetHeight === 0),\n            (d.innerHTML = \"\"),\n            (d.style.cssText =\n              \"box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;\"),\n            (b.boxSizing = d.offsetWidth === 4),\n            (b.doesNotIncludeMarginInBodyOffset = i.offsetTop !== 1),\n            a.getComputedStyle &&\n              ((b.pixelPosition =\n                (a.getComputedStyle(d, null) || {}).top !== \"1%\"),\n              (b.boxSizingReliable =\n                (a.getComputedStyle(d, null) || { width: \"4px\" }).width ===\n                \"4px\"),\n              (g = e.createElement(\"div\")),\n              (g.style.cssText = d.style.cssText = h),\n              (g.style.marginRight = g.style.width = \"0\"),\n              (d.style.width = \"1px\"),\n              d.appendChild(g),\n              (b.reliableMarginRight = !parseFloat(\n                (a.getComputedStyle(g, null) || {}).marginRight,\n              ))),\n            typeof d.style.zoom != \"undefined\" &&\n              ((d.innerHTML = \"\"),\n              (d.style.cssText =\n                h + \"width:1px;padding:1px;display:inline;zoom:1\"),\n              (b.inlineBlockNeedsLayout = d.offsetWidth === 3),\n              (d.style.display = \"block\"),\n              (d.style.overflow = \"visible\"),\n              (d.innerHTML = \"<div></div>\"),\n              (d.firstChild.style.width = \"5px\"),\n              (b.shrinkWrapBlocks = d.offsetWidth !== 3),\n              (c.style.zoom = 1)),\n            i.removeChild(c),\n            (c = d = f = g = null);\n        }),\n        i.removeChild(n),\n        (c = d = f = g = h = i = n = null),\n        b\n      );\n    })());\n  var H = /(?:\\{[\\s\\S]*\\}|\\[[\\s\\S]*\\])$/,\n    I = /([A-Z])/g;\n  p.extend({\n    cache: {},\n    deletedIds: [],\n    uuid: 0,\n    expando: \"jQuery\" + (p.fn.jquery + Math.random()).replace(/\\D/g, \"\"),\n    noData: {\n      embed: !0,\n      object: \"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000\",\n      applet: !0,\n    },\n    hasData: function (a) {\n      return (\n        (a = a.nodeType ? p.cache[a[p.expando]] : a[p.expando]), !!a && !K(a)\n      );\n    },\n    data: function (a, c, d, e) {\n      if (!p.acceptData(a)) return;\n      var f,\n        g,\n        h = p.expando,\n        i = typeof c == \"string\",\n        j = a.nodeType,\n        k = j ? p.cache : a,\n        l = j ? a[h] : a[h] && h;\n      if ((!l || !k[l] || (!e && !k[l].data)) && i && d === b) return;\n      l || (j ? (a[h] = l = p.deletedIds.pop() || ++p.uuid) : (l = h)),\n        k[l] || ((k[l] = {}), j || (k[l].toJSON = p.noop));\n      if (typeof c == \"object\" || typeof c == \"function\")\n        e ? (k[l] = p.extend(k[l], c)) : (k[l].data = p.extend(k[l].data, c));\n      return (\n        (f = k[l]),\n        e || (f.data || (f.data = {}), (f = f.data)),\n        d !== b && (f[p.camelCase(c)] = d),\n        i ? ((g = f[c]), g == null && (g = f[p.camelCase(c)])) : (g = f),\n        g\n      );\n    },\n    removeData: function (a, b, c) {\n      if (!p.acceptData(a)) return;\n      var d,\n        e,\n        f,\n        g = a.nodeType,\n        h = g ? p.cache : a,\n        i = g ? a[p.expando] : p.expando;\n      if (!h[i]) return;\n      if (b) {\n        d = c ? h[i] : h[i].data;\n        if (d) {\n          p.isArray(b) ||\n            (b in d\n              ? (b = [b])\n              : ((b = p.camelCase(b)),\n                b in d ? (b = [b]) : (b = b.split(\" \"))));\n          for (e = 0, f = b.length; e < f; e++) delete d[b[e]];\n          if (!(c ? K : p.isEmptyObject)(d)) return;\n        }\n      }\n      if (!c) {\n        delete h[i].data;\n        if (!K(h[i])) return;\n      }\n      g\n        ? p.cleanData([a], !0)\n        : p.support.deleteExpando || h != h.window\n          ? delete h[i]\n          : (h[i] = null);\n    },\n    _data: function (a, b, c) {\n      return p.data(a, b, c, !0);\n    },\n    acceptData: function (a) {\n      var b = a.nodeName && p.noData[a.nodeName.toLowerCase()];\n      return !b || (b !== !0 && a.getAttribute(\"classid\") === b);\n    },\n  }),\n    p.fn.extend({\n      data: function (a, c) {\n        var d,\n          e,\n          f,\n          g,\n          h,\n          i = this[0],\n          j = 0,\n          k = null;\n        if (a === b) {\n          if (this.length) {\n            k = p.data(i);\n            if (i.nodeType === 1 && !p._data(i, \"parsedAttrs\")) {\n              f = i.attributes;\n              for (h = f.length; j < h; j++)\n                (g = f[j].name),\n                  g.indexOf(\"data-\") === 0 &&\n                    ((g = p.camelCase(g.substring(5))), J(i, g, k[g]));\n              p._data(i, \"parsedAttrs\", !0);\n            }\n          }\n          return k;\n        }\n        return typeof a == \"object\"\n          ? this.each(function () {\n              p.data(this, a);\n            })\n          : ((d = a.split(\".\", 2)),\n            (d[1] = d[1] ? \".\" + d[1] : \"\"),\n            (e = d[1] + \"!\"),\n            p.access(\n              this,\n              function (c) {\n                if (c === b)\n                  return (\n                    (k = this.triggerHandler(\"getData\" + e, [d[0]])),\n                    k === b && i && ((k = p.data(i, a)), (k = J(i, a, k))),\n                    k === b && d[1] ? this.data(d[0]) : k\n                  );\n                (d[1] = c),\n                  this.each(function () {\n                    var b = p(this);\n                    b.triggerHandler(\"setData\" + e, d),\n                      p.data(this, a, c),\n                      b.triggerHandler(\"changeData\" + e, d);\n                  });\n              },\n              null,\n              c,\n              arguments.length > 1,\n              null,\n              !1,\n            ));\n      },\n      removeData: function (a) {\n        return this.each(function () {\n          p.removeData(this, a);\n        });\n      },\n    }),\n    p.extend({\n      queue: function (a, b, c) {\n        var d;\n        if (a)\n          return (\n            (b = (b || \"fx\") + \"queue\"),\n            (d = p._data(a, b)),\n            c &&\n              (!d || p.isArray(c)\n                ? (d = p._data(a, b, p.makeArray(c)))\n                : d.push(c)),\n            d || []\n          );\n      },\n      dequeue: function (a, b) {\n        b = b || \"fx\";\n        var c = p.queue(a, b),\n          d = c.length,\n          e = c.shift(),\n          f = p._queueHooks(a, b),\n          g = function () {\n            p.dequeue(a, b);\n          };\n        e === \"inprogress\" && ((e = c.shift()), d--),\n          e &&\n            (b === \"fx\" && c.unshift(\"inprogress\"),\n            delete f.stop,\n            e.call(a, g, f)),\n          !d && f && f.empty.fire();\n      },\n      _queueHooks: function (a, b) {\n        var c = b + \"queueHooks\";\n        return (\n          p._data(a, c) ||\n          p._data(a, c, {\n            empty: p.Callbacks(\"once memory\").add(function () {\n              p.removeData(a, b + \"queue\", !0), p.removeData(a, c, !0);\n            }),\n          })\n        );\n      },\n    }),\n    p.fn.extend({\n      queue: function (a, c) {\n        var d = 2;\n        return (\n          typeof a != \"string\" && ((c = a), (a = \"fx\"), d--),\n          arguments.length < d\n            ? p.queue(this[0], a)\n            : c === b\n              ? this\n              : this.each(function () {\n                  var b = p.queue(this, a, c);\n                  p._queueHooks(this, a),\n                    a === \"fx\" && b[0] !== \"inprogress\" && p.dequeue(this, a);\n                })\n        );\n      },\n      dequeue: function (a) {\n        return this.each(function () {\n          p.dequeue(this, a);\n        });\n      },\n      delay: function (a, b) {\n        return (\n          (a = p.fx ? p.fx.speeds[a] || a : a),\n          (b = b || \"fx\"),\n          this.queue(b, function (b, c) {\n            var d = setTimeout(b, a);\n            c.stop = function () {\n              clearTimeout(d);\n            };\n          })\n        );\n      },\n      clearQueue: function (a) {\n        return this.queue(a || \"fx\", []);\n      },\n      promise: function (a, c) {\n        var d,\n          e = 1,\n          f = p.Deferred(),\n          g = this,\n          h = this.length,\n          i = function () {\n            --e || f.resolveWith(g, [g]);\n          };\n        typeof a != \"string\" && ((c = a), (a = b)), (a = a || \"fx\");\n        while (h--)\n          (d = p._data(g[h], a + \"queueHooks\")),\n            d && d.empty && (e++, d.empty.add(i));\n        return i(), f.promise(c);\n      },\n    });\n  var L,\n    M,\n    N,\n    O = /[\\t\\r\\n]/g,\n    P = /\\r/g,\n    Q = /^(?:button|input)$/i,\n    R = /^(?:button|input|object|select|textarea)$/i,\n    S = /^a(?:rea|)$/i,\n    T =\n      /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,\n    U = p.support.getSetAttribute;\n  p.fn.extend({\n    attr: function (a, b) {\n      return p.access(this, p.attr, a, b, arguments.length > 1);\n    },\n    removeAttr: function (a) {\n      return this.each(function () {\n        p.removeAttr(this, a);\n      });\n    },\n    prop: function (a, b) {\n      return p.access(this, p.prop, a, b, arguments.length > 1);\n    },\n    removeProp: function (a) {\n      return (\n        (a = p.propFix[a] || a),\n        this.each(function () {\n          try {\n            (this[a] = b), delete this[a];\n          } catch (c) {}\n        })\n      );\n    },\n    addClass: function (a) {\n      var b, c, d, e, f, g, h;\n      if (p.isFunction(a))\n        return this.each(function (b) {\n          p(this).addClass(a.call(this, b, this.className));\n        });\n      if (a && typeof a == \"string\") {\n        b = a.split(s);\n        for (c = 0, d = this.length; c < d; c++) {\n          e = this[c];\n          if (e.nodeType === 1)\n            if (!e.className && b.length === 1) e.className = a;\n            else {\n              f = \" \" + e.className + \" \";\n              for (g = 0, h = b.length; g < h; g++)\n                ~f.indexOf(\" \" + b[g] + \" \") || (f += b[g] + \" \");\n              e.className = p.trim(f);\n            }\n        }\n      }\n      return this;\n    },\n    removeClass: function (a) {\n      var c, d, e, f, g, h, i;\n      if (p.isFunction(a))\n        return this.each(function (b) {\n          p(this).removeClass(a.call(this, b, this.className));\n        });\n      if ((a && typeof a == \"string\") || a === b) {\n        c = (a || \"\").split(s);\n        for (h = 0, i = this.length; h < i; h++) {\n          e = this[h];\n          if (e.nodeType === 1 && e.className) {\n            d = (\" \" + e.className + \" \").replace(O, \" \");\n            for (f = 0, g = c.length; f < g; f++)\n              while (d.indexOf(\" \" + c[f] + \" \") > -1)\n                d = d.replace(\" \" + c[f] + \" \", \" \");\n            e.className = a ? p.trim(d) : \"\";\n          }\n        }\n      }\n      return this;\n    },\n    toggleClass: function (a, b) {\n      var c = typeof a,\n        d = typeof b == \"boolean\";\n      return p.isFunction(a)\n        ? this.each(function (c) {\n            p(this).toggleClass(a.call(this, c, this.className, b), b);\n          })\n        : this.each(function () {\n            if (c === \"string\") {\n              var e,\n                f = 0,\n                g = p(this),\n                h = b,\n                i = a.split(s);\n              while ((e = i[f++]))\n                (h = d ? h : !g.hasClass(e)),\n                  g[h ? \"addClass\" : \"removeClass\"](e);\n            } else if (c === \"undefined\" || c === \"boolean\")\n              this.className && p._data(this, \"__className__\", this.className),\n                (this.className =\n                  this.className || a === !1\n                    ? \"\"\n                    : p._data(this, \"__className__\") || \"\");\n          });\n    },\n    hasClass: function (a) {\n      var b = \" \" + a + \" \",\n        c = 0,\n        d = this.length;\n      for (; c < d; c++)\n        if (\n          this[c].nodeType === 1 &&\n          (\" \" + this[c].className + \" \").replace(O, \" \").indexOf(b) > -1\n        )\n          return !0;\n      return !1;\n    },\n    val: function (a) {\n      var c,\n        d,\n        e,\n        f = this[0];\n      if (!arguments.length) {\n        if (f)\n          return (\n            (c = p.valHooks[f.type] || p.valHooks[f.nodeName.toLowerCase()]),\n            c && \"get\" in c && (d = c.get(f, \"value\")) !== b\n              ? d\n              : ((d = f.value),\n                typeof d == \"string\" ? d.replace(P, \"\") : d == null ? \"\" : d)\n          );\n        return;\n      }\n      return (\n        (e = p.isFunction(a)),\n        this.each(function (d) {\n          var f,\n            g = p(this);\n          if (this.nodeType !== 1) return;\n          e ? (f = a.call(this, d, g.val())) : (f = a),\n            f == null\n              ? (f = \"\")\n              : typeof f == \"number\"\n                ? (f += \"\")\n                : p.isArray(f) &&\n                  (f = p.map(f, function (a) {\n                    return a == null ? \"\" : a + \"\";\n                  })),\n            (c =\n              p.valHooks[this.type] || p.valHooks[this.nodeName.toLowerCase()]);\n          if (!c || !(\"set\" in c) || c.set(this, f, \"value\") === b)\n            this.value = f;\n        })\n      );\n    },\n  }),\n    p.extend({\n      valHooks: {\n        option: {\n          get: function (a) {\n            var b = a.attributes.value;\n            return !b || b.specified ? a.value : a.text;\n          },\n        },\n        select: {\n          get: function (a) {\n            var b,\n              c,\n              d,\n              e,\n              f = a.selectedIndex,\n              g = [],\n              h = a.options,\n              i = a.type === \"select-one\";\n            if (f < 0) return null;\n            (c = i ? f : 0), (d = i ? f + 1 : h.length);\n            for (; c < d; c++) {\n              e = h[c];\n              if (\n                e.selected &&\n                (p.support.optDisabled\n                  ? !e.disabled\n                  : e.getAttribute(\"disabled\") === null) &&\n                (!e.parentNode.disabled ||\n                  !p.nodeName(e.parentNode, \"optgroup\"))\n              ) {\n                b = p(e).val();\n                if (i) return b;\n                g.push(b);\n              }\n            }\n            return i && !g.length && h.length ? p(h[f]).val() : g;\n          },\n          set: function (a, b) {\n            var c = p.makeArray(b);\n            return (\n              p(a)\n                .find(\"option\")\n                .each(function () {\n                  this.selected = p.inArray(p(this).val(), c) >= 0;\n                }),\n              c.length || (a.selectedIndex = -1),\n              c\n            );\n          },\n        },\n      },\n      attrFn: {},\n      attr: function (a, c, d, e) {\n        var f,\n          g,\n          h,\n          i = a.nodeType;\n        if (!a || i === 3 || i === 8 || i === 2) return;\n        if (e && p.isFunction(p.fn[c])) return p(a)[c](d);\n        if (typeof a.getAttribute == \"undefined\") return p.prop(a, c, d);\n        (h = i !== 1 || !p.isXMLDoc(a)),\n          h &&\n            ((c = c.toLowerCase()),\n            (g = p.attrHooks[c] || (T.test(c) ? M : L)));\n        if (d !== b) {\n          if (d === null) {\n            p.removeAttr(a, c);\n            return;\n          }\n          return g && \"set\" in g && h && (f = g.set(a, d, c)) !== b\n            ? f\n            : (a.setAttribute(c, \"\" + d), d);\n        }\n        return g && \"get\" in g && h && (f = g.get(a, c)) !== null\n          ? f\n          : ((f = a.getAttribute(c)), f === null ? b : f);\n      },\n      removeAttr: function (a, b) {\n        var c,\n          d,\n          e,\n          f,\n          g = 0;\n        if (b && a.nodeType === 1) {\n          d = b.split(s);\n          for (; g < d.length; g++)\n            (e = d[g]),\n              e &&\n                ((c = p.propFix[e] || e),\n                (f = T.test(e)),\n                f || p.attr(a, e, \"\"),\n                a.removeAttribute(U ? e : c),\n                f && c in a && (a[c] = !1));\n        }\n      },\n      attrHooks: {\n        type: {\n          set: function (a, b) {\n            if (Q.test(a.nodeName) && a.parentNode)\n              p.error(\"type property can't be changed\");\n            else if (\n              !p.support.radioValue &&\n              b === \"radio\" &&\n              p.nodeName(a, \"input\")\n            ) {\n              var c = a.value;\n              return a.setAttribute(\"type\", b), c && (a.value = c), b;\n            }\n          },\n        },\n        value: {\n          get: function (a, b) {\n            return L && p.nodeName(a, \"button\")\n              ? L.get(a, b)\n              : b in a\n                ? a.value\n                : null;\n          },\n          set: function (a, b, c) {\n            if (L && p.nodeName(a, \"button\")) return L.set(a, b, c);\n            a.value = b;\n          },\n        },\n      },\n      propFix: {\n        tabindex: \"tabIndex\",\n        readonly: \"readOnly\",\n        for: \"htmlFor\",\n        class: \"className\",\n        maxlength: \"maxLength\",\n        cellspacing: \"cellSpacing\",\n        cellpadding: \"cellPadding\",\n        rowspan: \"rowSpan\",\n        colspan: \"colSpan\",\n        usemap: \"useMap\",\n        frameborder: \"frameBorder\",\n        contenteditable: \"contentEditable\",\n      },\n      prop: function (a, c, d) {\n        var e,\n          f,\n          g,\n          h = a.nodeType;\n        if (!a || h === 3 || h === 8 || h === 2) return;\n        return (\n          (g = h !== 1 || !p.isXMLDoc(a)),\n          g && ((c = p.propFix[c] || c), (f = p.propHooks[c])),\n          d !== b\n            ? f && \"set\" in f && (e = f.set(a, d, c)) !== b\n              ? e\n              : (a[c] = d)\n            : f && \"get\" in f && (e = f.get(a, c)) !== null\n              ? e\n              : a[c]\n        );\n      },\n      propHooks: {\n        tabIndex: {\n          get: function (a) {\n            var c = a.getAttributeNode(\"tabindex\");\n            return c && c.specified\n              ? parseInt(c.value, 10)\n              : R.test(a.nodeName) || (S.test(a.nodeName) && a.href)\n                ? 0\n                : b;\n          },\n        },\n      },\n    }),\n    (M = {\n      get: function (a, c) {\n        var d,\n          e = p.prop(a, c);\n        return e === !0 ||\n          (typeof e != \"boolean\" &&\n            (d = a.getAttributeNode(c)) &&\n            d.nodeValue !== !1)\n          ? c.toLowerCase()\n          : b;\n      },\n      set: function (a, b, c) {\n        var d;\n        return (\n          b === !1\n            ? p.removeAttr(a, c)\n            : ((d = p.propFix[c] || c),\n              d in a && (a[d] = !0),\n              a.setAttribute(c, c.toLowerCase())),\n          c\n        );\n      },\n    }),\n    U ||\n      ((N = { name: !0, id: !0, coords: !0 }),\n      (L = p.valHooks.button =\n        {\n          get: function (a, c) {\n            var d;\n            return (\n              (d = a.getAttributeNode(c)),\n              d && (N[c] ? d.value !== \"\" : d.specified) ? d.value : b\n            );\n          },\n          set: function (a, b, c) {\n            var d = a.getAttributeNode(c);\n            return (\n              d || ((d = e.createAttribute(c)), a.setAttributeNode(d)),\n              (d.value = b + \"\")\n            );\n          },\n        }),\n      p.each([\"width\", \"height\"], function (a, b) {\n        p.attrHooks[b] = p.extend(p.attrHooks[b], {\n          set: function (a, c) {\n            if (c === \"\") return a.setAttribute(b, \"auto\"), c;\n          },\n        });\n      }),\n      (p.attrHooks.contenteditable = {\n        get: L.get,\n        set: function (a, b, c) {\n          b === \"\" && (b = \"false\"), L.set(a, b, c);\n        },\n      })),\n    p.support.hrefNormalized ||\n      p.each([\"href\", \"src\", \"width\", \"height\"], function (a, c) {\n        p.attrHooks[c] = p.extend(p.attrHooks[c], {\n          get: function (a) {\n            var d = a.getAttribute(c, 2);\n            return d === null ? b : d;\n          },\n        });\n      }),\n    p.support.style ||\n      (p.attrHooks.style = {\n        get: function (a) {\n          return a.style.cssText.toLowerCase() || b;\n        },\n        set: function (a, b) {\n          return (a.style.cssText = \"\" + b);\n        },\n      }),\n    p.support.optSelected ||\n      (p.propHooks.selected = p.extend(p.propHooks.selected, {\n        get: function (a) {\n          var b = a.parentNode;\n          return (\n            b && (b.selectedIndex, b.parentNode && b.parentNode.selectedIndex),\n            null\n          );\n        },\n      })),\n    p.support.enctype || (p.propFix.enctype = \"encoding\"),\n    p.support.checkOn ||\n      p.each([\"radio\", \"checkbox\"], function () {\n        p.valHooks[this] = {\n          get: function (a) {\n            return a.getAttribute(\"value\") === null ? \"on\" : a.value;\n          },\n        };\n      }),\n    p.each([\"radio\", \"checkbox\"], function () {\n      p.valHooks[this] = p.extend(p.valHooks[this], {\n        set: function (a, b) {\n          if (p.isArray(b)) return (a.checked = p.inArray(p(a).val(), b) >= 0);\n        },\n      });\n    });\n  var V = /^(?:textarea|input|select)$/i,\n    W = /^([^\\.]*|)(?:\\.(.+)|)$/,\n    X = /(?:^|\\s)hover(\\.\\S+|)\\b/,\n    Y = /^key/,\n    Z = /^(?:mouse|contextmenu)|click/,\n    $ = /^(?:focusinfocus|focusoutblur)$/,\n    _ = function (a) {\n      return p.event.special.hover\n        ? a\n        : a.replace(X, \"mouseenter$1 mouseleave$1\");\n    };\n  (p.event = {\n    add: function (a, c, d, e, f) {\n      var g, h, i, j, k, l, m, n, o, q, r;\n      if (a.nodeType === 3 || a.nodeType === 8 || !c || !d || !(g = p._data(a)))\n        return;\n      d.handler && ((o = d), (d = o.handler), (f = o.selector)),\n        d.guid || (d.guid = p.guid++),\n        (i = g.events),\n        i || (g.events = i = {}),\n        (h = g.handle),\n        h ||\n          ((g.handle = h =\n            function (a) {\n              return typeof p != \"undefined\" &&\n                (!a || p.event.triggered !== a.type)\n                ? p.event.dispatch.apply(h.elem, arguments)\n                : b;\n            }),\n          (h.elem = a)),\n        (c = p.trim(_(c)).split(\" \"));\n      for (j = 0; j < c.length; j++) {\n        (k = W.exec(c[j]) || []),\n          (l = k[1]),\n          (m = (k[2] || \"\").split(\".\").sort()),\n          (r = p.event.special[l] || {}),\n          (l = (f ? r.delegateType : r.bindType) || l),\n          (r = p.event.special[l] || {}),\n          (n = p.extend(\n            {\n              type: l,\n              origType: k[1],\n              data: e,\n              handler: d,\n              guid: d.guid,\n              selector: f,\n              namespace: m.join(\".\"),\n            },\n            o,\n          )),\n          (q = i[l]);\n        if (!q) {\n          (q = i[l] = []), (q.delegateCount = 0);\n          if (!r.setup || r.setup.call(a, e, m, h) === !1)\n            a.addEventListener\n              ? a.addEventListener(l, h, !1)\n              : a.attachEvent && a.attachEvent(\"on\" + l, h);\n        }\n        r.add &&\n          (r.add.call(a, n), n.handler.guid || (n.handler.guid = d.guid)),\n          f ? q.splice(q.delegateCount++, 0, n) : q.push(n),\n          (p.event.global[l] = !0);\n      }\n      a = null;\n    },\n    global: {},\n    remove: function (a, b, c, d, e) {\n      var f,\n        g,\n        h,\n        i,\n        j,\n        k,\n        l,\n        m,\n        n,\n        o,\n        q,\n        r = p.hasData(a) && p._data(a);\n      if (!r || !(m = r.events)) return;\n      b = p.trim(_(b || \"\")).split(\" \");\n      for (f = 0; f < b.length; f++) {\n        (g = W.exec(b[f]) || []), (h = i = g[1]), (j = g[2]);\n        if (!h) {\n          for (h in m) p.event.remove(a, h + b[f], c, d, !0);\n          continue;\n        }\n        (n = p.event.special[h] || {}),\n          (h = (d ? n.delegateType : n.bindType) || h),\n          (o = m[h] || []),\n          (k = o.length),\n          (j = j\n            ? new RegExp(\n                \"(^|\\\\.)\" +\n                  j.split(\".\").sort().join(\"\\\\.(?:.*\\\\.|)\") +\n                  \"(\\\\.|$)\",\n              )\n            : null);\n        for (l = 0; l < o.length; l++)\n          (q = o[l]),\n            (e || i === q.origType) &&\n              (!c || c.guid === q.guid) &&\n              (!j || j.test(q.namespace)) &&\n              (!d || d === q.selector || (d === \"**\" && q.selector)) &&\n              (o.splice(l--, 1),\n              q.selector && o.delegateCount--,\n              n.remove && n.remove.call(a, q));\n        o.length === 0 &&\n          k !== o.length &&\n          ((!n.teardown || n.teardown.call(a, j, r.handle) === !1) &&\n            p.removeEvent(a, h, r.handle),\n          delete m[h]);\n      }\n      p.isEmptyObject(m) && (delete r.handle, p.removeData(a, \"events\", !0));\n    },\n    customEvent: { getData: !0, setData: !0, changeData: !0 },\n    trigger: function (c, d, f, g) {\n      if (!f || (f.nodeType !== 3 && f.nodeType !== 8)) {\n        var h,\n          i,\n          j,\n          k,\n          l,\n          m,\n          n,\n          o,\n          q,\n          r,\n          s = c.type || c,\n          t = [];\n        if ($.test(s + p.event.triggered)) return;\n        s.indexOf(\"!\") >= 0 && ((s = s.slice(0, -1)), (i = !0)),\n          s.indexOf(\".\") >= 0 &&\n            ((t = s.split(\".\")), (s = t.shift()), t.sort());\n        if ((!f || p.event.customEvent[s]) && !p.event.global[s]) return;\n        (c =\n          typeof c == \"object\"\n            ? c[p.expando]\n              ? c\n              : new p.Event(s, c)\n            : new p.Event(s)),\n          (c.type = s),\n          (c.isTrigger = !0),\n          (c.exclusive = i),\n          (c.namespace = t.join(\".\")),\n          (c.namespace_re = c.namespace\n            ? new RegExp(\"(^|\\\\.)\" + t.join(\"\\\\.(?:.*\\\\.|)\") + \"(\\\\.|$)\")\n            : null),\n          (m = s.indexOf(\":\") < 0 ? \"on\" + s : \"\");\n        if (!f) {\n          h = p.cache;\n          for (j in h)\n            h[j].events &&\n              h[j].events[s] &&\n              p.event.trigger(c, d, h[j].handle.elem, !0);\n          return;\n        }\n        (c.result = b),\n          c.target || (c.target = f),\n          (d = d != null ? p.makeArray(d) : []),\n          d.unshift(c),\n          (n = p.event.special[s] || {});\n        if (n.trigger && n.trigger.apply(f, d) === !1) return;\n        q = [[f, n.bindType || s]];\n        if (!g && !n.noBubble && !p.isWindow(f)) {\n          (r = n.delegateType || s), (k = $.test(r + s) ? f : f.parentNode);\n          for (l = f; k; k = k.parentNode) q.push([k, r]), (l = k);\n          l === (f.ownerDocument || e) &&\n            q.push([l.defaultView || l.parentWindow || a, r]);\n        }\n        for (j = 0; j < q.length && !c.isPropagationStopped(); j++)\n          (k = q[j][0]),\n            (c.type = q[j][1]),\n            (o = (p._data(k, \"events\") || {})[c.type] && p._data(k, \"handle\")),\n            o && o.apply(k, d),\n            (o = m && k[m]),\n            o && p.acceptData(k) && o.apply(k, d) === !1 && c.preventDefault();\n        return (\n          (c.type = s),\n          !g &&\n            !c.isDefaultPrevented() &&\n            (!n._default || n._default.apply(f.ownerDocument, d) === !1) &&\n            (s !== \"click\" || !p.nodeName(f, \"a\")) &&\n            p.acceptData(f) &&\n            m &&\n            f[s] &&\n            ((s !== \"focus\" && s !== \"blur\") || c.target.offsetWidth !== 0) &&\n            !p.isWindow(f) &&\n            ((l = f[m]),\n            l && (f[m] = null),\n            (p.event.triggered = s),\n            f[s](),\n            (p.event.triggered = b),\n            l && (f[m] = l)),\n          c.result\n        );\n      }\n      return;\n    },\n    dispatch: function (c) {\n      c = p.event.fix(c || a.event);\n      var d,\n        e,\n        f,\n        g,\n        h,\n        i,\n        j,\n        k,\n        l,\n        m,\n        n = (p._data(this, \"events\") || {})[c.type] || [],\n        o = n.delegateCount,\n        q = [].slice.call(arguments),\n        r = !c.exclusive && !c.namespace,\n        s = p.event.special[c.type] || {},\n        t = [];\n      (q[0] = c), (c.delegateTarget = this);\n      if (s.preDispatch && s.preDispatch.call(this, c) === !1) return;\n      if (o && (!c.button || c.type !== \"click\"))\n        for (f = c.target; f != this; f = f.parentNode || this)\n          if (f.disabled !== !0 || c.type !== \"click\") {\n            (h = {}), (j = []);\n            for (d = 0; d < o; d++)\n              (k = n[d]),\n                (l = k.selector),\n                h[l] === b && (h[l] = p(l, this).index(f) >= 0),\n                h[l] && j.push(k);\n            j.length && t.push({ elem: f, matches: j });\n          }\n      n.length > o && t.push({ elem: this, matches: n.slice(o) });\n      for (d = 0; d < t.length && !c.isPropagationStopped(); d++) {\n        (i = t[d]), (c.currentTarget = i.elem);\n        for (\n          e = 0;\n          e < i.matches.length && !c.isImmediatePropagationStopped();\n          e++\n        ) {\n          k = i.matches[e];\n          if (\n            r ||\n            (!c.namespace && !k.namespace) ||\n            (c.namespace_re && c.namespace_re.test(k.namespace))\n          )\n            (c.data = k.data),\n              (c.handleObj = k),\n              (g = (\n                (p.event.special[k.origType] || {}).handle || k.handler\n              ).apply(i.elem, q)),\n              g !== b &&\n                ((c.result = g),\n                g === !1 && (c.preventDefault(), c.stopPropagation()));\n        }\n      }\n      return s.postDispatch && s.postDispatch.call(this, c), c.result;\n    },\n    props:\n      \"attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which\".split(\n        \" \",\n      ),\n    fixHooks: {},\n    keyHooks: {\n      props: \"char charCode key keyCode\".split(\" \"),\n      filter: function (a, b) {\n        return (\n          a.which == null &&\n            (a.which = b.charCode != null ? b.charCode : b.keyCode),\n          a\n        );\n      },\n    },\n    mouseHooks: {\n      props:\n        \"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement\".split(\n          \" \",\n        ),\n      filter: function (a, c) {\n        var d,\n          f,\n          g,\n          h = c.button,\n          i = c.fromElement;\n        return (\n          a.pageX == null &&\n            c.clientX != null &&\n            ((d = a.target.ownerDocument || e),\n            (f = d.documentElement),\n            (g = d.body),\n            (a.pageX =\n              c.clientX +\n              ((f && f.scrollLeft) || (g && g.scrollLeft) || 0) -\n              ((f && f.clientLeft) || (g && g.clientLeft) || 0)),\n            (a.pageY =\n              c.clientY +\n              ((f && f.scrollTop) || (g && g.scrollTop) || 0) -\n              ((f && f.clientTop) || (g && g.clientTop) || 0))),\n          !a.relatedTarget &&\n            i &&\n            (a.relatedTarget = i === a.target ? c.toElement : i),\n          !a.which &&\n            h !== b &&\n            (a.which = h & 1 ? 1 : h & 2 ? 3 : h & 4 ? 2 : 0),\n          a\n        );\n      },\n    },\n    fix: function (a) {\n      if (a[p.expando]) return a;\n      var b,\n        c,\n        d = a,\n        f = p.event.fixHooks[a.type] || {},\n        g = f.props ? this.props.concat(f.props) : this.props;\n      a = p.Event(d);\n      for (b = g.length; b; ) (c = g[--b]), (a[c] = d[c]);\n      return (\n        a.target || (a.target = d.srcElement || e),\n        a.target.nodeType === 3 && (a.target = a.target.parentNode),\n        (a.metaKey = !!a.metaKey),\n        f.filter ? f.filter(a, d) : a\n      );\n    },\n    special: {\n      load: { noBubble: !0 },\n      focus: { delegateType: \"focusin\" },\n      blur: { delegateType: \"focusout\" },\n      beforeunload: {\n        setup: function (a, b, c) {\n          p.isWindow(this) && (this.onbeforeunload = c);\n        },\n        teardown: function (a, b) {\n          this.onbeforeunload === b && (this.onbeforeunload = null);\n        },\n      },\n    },\n    simulate: function (a, b, c, d) {\n      var e = p.extend(new p.Event(), c, {\n        type: a,\n        isSimulated: !0,\n        originalEvent: {},\n      });\n      d ? p.event.trigger(e, null, b) : p.event.dispatch.call(b, e),\n        e.isDefaultPrevented() && c.preventDefault();\n    },\n  }),\n    (p.event.handle = p.event.dispatch),\n    (p.removeEvent = e.removeEventListener\n      ? function (a, b, c) {\n          a.removeEventListener && a.removeEventListener(b, c, !1);\n        }\n      : function (a, b, c) {\n          var d = \"on\" + b;\n          a.detachEvent &&\n            (typeof a[d] == \"undefined\" && (a[d] = null), a.detachEvent(d, c));\n        }),\n    (p.Event = function (a, b) {\n      if (this instanceof p.Event)\n        a && a.type\n          ? ((this.originalEvent = a),\n            (this.type = a.type),\n            (this.isDefaultPrevented =\n              a.defaultPrevented ||\n              a.returnValue === !1 ||\n              (a.getPreventDefault && a.getPreventDefault())\n                ? bb\n                : ba))\n          : (this.type = a),\n          b && p.extend(this, b),\n          (this.timeStamp = (a && a.timeStamp) || p.now()),\n          (this[p.expando] = !0);\n      else return new p.Event(a, b);\n    }),\n    (p.Event.prototype = {\n      preventDefault: function () {\n        this.isDefaultPrevented = bb;\n        var a = this.originalEvent;\n        if (!a) return;\n        a.preventDefault ? a.preventDefault() : (a.returnValue = !1);\n      },\n      stopPropagation: function () {\n        this.isPropagationStopped = bb;\n        var a = this.originalEvent;\n        if (!a) return;\n        a.stopPropagation && a.stopPropagation(), (a.cancelBubble = !0);\n      },\n      stopImmediatePropagation: function () {\n        (this.isImmediatePropagationStopped = bb), this.stopPropagation();\n      },\n      isDefaultPrevented: ba,\n      isPropagationStopped: ba,\n      isImmediatePropagationStopped: ba,\n    }),\n    p.each(\n      { mouseenter: \"mouseover\", mouseleave: \"mouseout\" },\n      function (a, b) {\n        p.event.special[a] = {\n          delegateType: b,\n          bindType: b,\n          handle: function (a) {\n            var c,\n              d = this,\n              e = a.relatedTarget,\n              f = a.handleObj,\n              g = f.selector;\n            if (!e || (e !== d && !p.contains(d, e)))\n              (a.type = f.origType),\n                (c = f.handler.apply(this, arguments)),\n                (a.type = b);\n            return c;\n          },\n        };\n      },\n    ),\n    p.support.submitBubbles ||\n      (p.event.special.submit = {\n        setup: function () {\n          if (p.nodeName(this, \"form\")) return !1;\n          p.event.add(this, \"click._submit keypress._submit\", function (a) {\n            var c = a.target,\n              d =\n                p.nodeName(c, \"input\") || p.nodeName(c, \"button\") ? c.form : b;\n            d &&\n              !p._data(d, \"_submit_attached\") &&\n              (p.event.add(d, \"submit._submit\", function (a) {\n                a._submit_bubble = !0;\n              }),\n              p._data(d, \"_submit_attached\", !0));\n          });\n        },\n        postDispatch: function (a) {\n          a._submit_bubble &&\n            (delete a._submit_bubble,\n            this.parentNode &&\n              !a.isTrigger &&\n              p.event.simulate(\"submit\", this.parentNode, a, !0));\n        },\n        teardown: function () {\n          if (p.nodeName(this, \"form\")) return !1;\n          p.event.remove(this, \"._submit\");\n        },\n      }),\n    p.support.changeBubbles ||\n      (p.event.special.change = {\n        setup: function () {\n          if (V.test(this.nodeName)) {\n            if (this.type === \"checkbox\" || this.type === \"radio\")\n              p.event.add(this, \"propertychange._change\", function (a) {\n                a.originalEvent.propertyName === \"checked\" &&\n                  (this._just_changed = !0);\n              }),\n                p.event.add(this, \"click._change\", function (a) {\n                  this._just_changed &&\n                    !a.isTrigger &&\n                    (this._just_changed = !1),\n                    p.event.simulate(\"change\", this, a, !0);\n                });\n            return !1;\n          }\n          p.event.add(this, \"beforeactivate._change\", function (a) {\n            var b = a.target;\n            V.test(b.nodeName) &&\n              !p._data(b, \"_change_attached\") &&\n              (p.event.add(b, \"change._change\", function (a) {\n                this.parentNode &&\n                  !a.isSimulated &&\n                  !a.isTrigger &&\n                  p.event.simulate(\"change\", this.parentNode, a, !0);\n              }),\n              p._data(b, \"_change_attached\", !0));\n          });\n        },\n        handle: function (a) {\n          var b = a.target;\n          if (\n            this !== b ||\n            a.isSimulated ||\n            a.isTrigger ||\n            (b.type !== \"radio\" && b.type !== \"checkbox\")\n          )\n            return a.handleObj.handler.apply(this, arguments);\n        },\n        teardown: function () {\n          return p.event.remove(this, \"._change\"), !V.test(this.nodeName);\n        },\n      }),\n    p.support.focusinBubbles ||\n      p.each({ focus: \"focusin\", blur: \"focusout\" }, function (a, b) {\n        var c = 0,\n          d = function (a) {\n            p.event.simulate(b, a.target, p.event.fix(a), !0);\n          };\n        p.event.special[b] = {\n          setup: function () {\n            c++ === 0 && e.addEventListener(a, d, !0);\n          },\n          teardown: function () {\n            --c === 0 && e.removeEventListener(a, d, !0);\n          },\n        };\n      }),\n    p.fn.extend({\n      on: function (a, c, d, e, f) {\n        var g, h;\n        if (typeof a == \"object\") {\n          typeof c != \"string\" && ((d = d || c), (c = b));\n          for (h in a) this.on(h, c, d, a[h], f);\n          return this;\n        }\n        d == null && e == null\n          ? ((e = c), (d = c = b))\n          : e == null &&\n            (typeof c == \"string\"\n              ? ((e = d), (d = b))\n              : ((e = d), (d = c), (c = b)));\n        if (e === !1) e = ba;\n        else if (!e) return this;\n        return (\n          f === 1 &&\n            ((g = e),\n            (e = function (a) {\n              return p().off(a), g.apply(this, arguments);\n            }),\n            (e.guid = g.guid || (g.guid = p.guid++))),\n          this.each(function () {\n            p.event.add(this, a, e, d, c);\n          })\n        );\n      },\n      one: function (a, b, c, d) {\n        return this.on(a, b, c, d, 1);\n      },\n      off: function (a, c, d) {\n        var e, f;\n        if (a && a.preventDefault && a.handleObj)\n          return (\n            (e = a.handleObj),\n            p(a.delegateTarget).off(\n              e.namespace ? e.origType + \".\" + e.namespace : e.origType,\n              e.selector,\n              e.handler,\n            ),\n            this\n          );\n        if (typeof a == \"object\") {\n          for (f in a) this.off(f, c, a[f]);\n          return this;\n        }\n        if (c === !1 || typeof c == \"function\") (d = c), (c = b);\n        return (\n          d === !1 && (d = ba),\n          this.each(function () {\n            p.event.remove(this, a, d, c);\n          })\n        );\n      },\n      bind: function (a, b, c) {\n        return this.on(a, null, b, c);\n      },\n      unbind: function (a, b) {\n        return this.off(a, null, b);\n      },\n      live: function (a, b, c) {\n        return p(this.context).on(a, this.selector, b, c), this;\n      },\n      die: function (a, b) {\n        return p(this.context).off(a, this.selector || \"**\", b), this;\n      },\n      delegate: function (a, b, c, d) {\n        return this.on(b, a, c, d);\n      },\n      undelegate: function (a, b, c) {\n        return arguments.length == 1\n          ? this.off(a, \"**\")\n          : this.off(b, a || \"**\", c);\n      },\n      trigger: function (a, b) {\n        return this.each(function () {\n          p.event.trigger(a, b, this);\n        });\n      },\n      triggerHandler: function (a, b) {\n        if (this[0]) return p.event.trigger(a, b, this[0], !0);\n      },\n      toggle: function (a) {\n        var b = arguments,\n          c = a.guid || p.guid++,\n          d = 0,\n          e = function (c) {\n            var e = (p._data(this, \"lastToggle\" + a.guid) || 0) % d;\n            return (\n              p._data(this, \"lastToggle\" + a.guid, e + 1),\n              c.preventDefault(),\n              b[e].apply(this, arguments) || !1\n            );\n          };\n        e.guid = c;\n        while (d < b.length) b[d++].guid = c;\n        return this.click(e);\n      },\n      hover: function (a, b) {\n        return this.mouseenter(a).mouseleave(b || a);\n      },\n    }),\n    p.each(\n      \"blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu\".split(\n        \" \",\n      ),\n      function (a, b) {\n        (p.fn[b] = function (a, c) {\n          return (\n            c == null && ((c = a), (a = null)),\n            arguments.length > 0 ? this.on(b, null, a, c) : this.trigger(b)\n          );\n        }),\n          Y.test(b) && (p.event.fixHooks[b] = p.event.keyHooks),\n          Z.test(b) && (p.event.fixHooks[b] = p.event.mouseHooks);\n      },\n    ),\n    (function (a, b) {\n      function $(a, b, c, d) {\n        (c = c || []), (b = b || q);\n        var e,\n          f,\n          g,\n          j,\n          k = b.nodeType;\n        if (k !== 1 && k !== 9) return [];\n        if (!a || typeof a != \"string\") return c;\n        g = h(b);\n        if (!g && !d)\n          if ((e = L.exec(a)))\n            if ((j = e[1])) {\n              if (k === 9) {\n                f = b.getElementById(j);\n                if (!f || !f.parentNode) return c;\n                if (f.id === j) return c.push(f), c;\n              } else if (\n                b.ownerDocument &&\n                (f = b.ownerDocument.getElementById(j)) &&\n                i(b, f) &&\n                f.id === j\n              )\n                return c.push(f), c;\n            } else {\n              if (e[2])\n                return u.apply(c, t.call(b.getElementsByTagName(a), 0)), c;\n              if ((j = e[3]) && X && b.getElementsByClassName)\n                return u.apply(c, t.call(b.getElementsByClassName(j), 0)), c;\n            }\n        return bk(a, b, c, d, g);\n      }\n      function _(a) {\n        return function (b) {\n          var c = b.nodeName.toLowerCase();\n          return c === \"input\" && b.type === a;\n        };\n      }\n      function ba(a) {\n        return function (b) {\n          var c = b.nodeName.toLowerCase();\n          return (c === \"input\" || c === \"button\") && b.type === a;\n        };\n      }\n      function bb(a, b, c) {\n        if (a === b) return c;\n        var d = a.nextSibling;\n        while (d) {\n          if (d === b) return -1;\n          d = d.nextSibling;\n        }\n        return 1;\n      }\n      function bc(a, b, c, d) {\n        var e,\n          g,\n          h,\n          i,\n          j,\n          k,\n          l,\n          m,\n          n,\n          p,\n          r = !c && b !== q,\n          s = (r ? \"<s>\" : \"\") + a.replace(H, \"$1<s>\"),\n          u = y[o][s];\n        if (u) return d ? 0 : t.call(u, 0);\n        (j = a), (k = []), (m = 0), (n = f.preFilter), (p = f.filter);\n        while (j) {\n          if (!e || (g = I.exec(j)))\n            g && ((j = j.slice(g[0].length)), (h.selector = l)),\n              k.push((h = [])),\n              (l = \"\"),\n              r && (j = \" \" + j);\n          e = !1;\n          if ((g = J.exec(j)))\n            (l += g[0]),\n              (j = j.slice(g[0].length)),\n              (e = h.push({\n                part: g.pop().replace(H, \" \"),\n                string: g[0],\n                captures: g,\n              }));\n          for (i in p)\n            (g = S[i].exec(j)) &&\n              (!n[i] || (g = n[i](g, b, c))) &&\n              ((l += g[0]),\n              (j = j.slice(g[0].length)),\n              (e = h.push({ part: i, string: g.shift(), captures: g })));\n          if (!e) break;\n        }\n        return (\n          l && (h.selector = l),\n          d ? j.length : j ? $.error(a) : t.call(y(s, k), 0)\n        );\n      }\n      function bd(a, b, e, f) {\n        var g = b.dir,\n          h = s++;\n        return (\n          a ||\n            (a = function (a) {\n              return a === e;\n            }),\n          b.first\n            ? function (b) {\n                while ((b = b[g])) if (b.nodeType === 1) return a(b) && b;\n              }\n            : f\n              ? function (b) {\n                  while ((b = b[g])) if (b.nodeType === 1 && a(b)) return b;\n                }\n              : function (b) {\n                  var e,\n                    f = h + \".\" + c,\n                    i = f + \".\" + d;\n                  while ((b = b[g]))\n                    if (b.nodeType === 1) {\n                      if ((e = b[o]) === i) return b.sizset;\n                      if (typeof e == \"string\" && e.indexOf(f) === 0) {\n                        if (b.sizset) return b;\n                      } else {\n                        b[o] = i;\n                        if (a(b)) return (b.sizset = !0), b;\n                        b.sizset = !1;\n                      }\n                    }\n                }\n        );\n      }\n      function be(a, b) {\n        return a\n          ? function (c) {\n              var d = b(c);\n              return d && a(d === !0 ? c : d);\n            }\n          : b;\n      }\n      function bf(a, b, c) {\n        var d,\n          e,\n          g = 0;\n        for (; (d = a[g]); g++)\n          f.relative[d.part]\n            ? (e = bd(e, f.relative[d.part], b, c))\n            : (e = be(\n                e,\n                f.filter[d.part].apply(null, d.captures.concat(b, c)),\n              ));\n        return e;\n      }\n      function bg(a) {\n        return function (b) {\n          var c,\n            d = 0;\n          for (; (c = a[d]); d++) if (c(b)) return !0;\n          return !1;\n        };\n      }\n      function bh(a, b, c, d) {\n        var e = 0,\n          f = b.length;\n        for (; e < f; e++) $(a, b[e], c, d);\n      }\n      function bi(a, b, c, d, e, g) {\n        var h,\n          i = f.setFilters[b.toLowerCase()];\n        return (\n          i || $.error(b),\n          (a || !(h = e)) && bh(a || \"*\", d, (h = []), e),\n          h.length > 0 ? i(h, c, g) : []\n        );\n      }\n      function bj(a, c, d, e) {\n        var f,\n          g,\n          h,\n          i,\n          j,\n          k,\n          l,\n          m,\n          n,\n          o,\n          p,\n          q,\n          r,\n          s = 0,\n          t = a.length,\n          v = S.POS,\n          w = new RegExp(\"^\" + v.source + \"(?!\" + A + \")\", \"i\"),\n          x = function () {\n            var a = 1,\n              c = arguments.length - 2;\n            for (; a < c; a++) arguments[a] === b && (n[a] = b);\n          };\n        for (; s < t; s++) {\n          (f = a[s]), (g = \"\"), (m = e);\n          for (h = 0, i = f.length; h < i; h++) {\n            (j = f[h]), (k = j.string);\n            if (j.part === \"PSEUDO\") {\n              v.exec(\"\"), (l = 0);\n              while ((n = v.exec(k))) {\n                (o = !0), (p = v.lastIndex = n.index + n[0].length);\n                if (p > l) {\n                  (g += k.slice(l, n.index)),\n                    (l = p),\n                    (q = [c]),\n                    J.test(g) && (m && (q = m), (m = e));\n                  if ((r = O.test(g)))\n                    (g = g.slice(0, -5).replace(J, \"$&*\")), l++;\n                  n.length > 1 && n[0].replace(w, x),\n                    (m = bi(g, n[1], n[2], q, m, r));\n                }\n                g = \"\";\n              }\n            }\n            o || (g += k), (o = !1);\n          }\n          g\n            ? J.test(g)\n              ? bh(g, m || [c], d, e)\n              : $(g, c, d, e ? e.concat(m) : m)\n            : u.apply(d, m);\n        }\n        return t === 1 ? d : $.uniqueSort(d);\n      }\n      function bk(a, b, e, g, h) {\n        a = a.replace(H, \"$1\");\n        var i,\n          k,\n          l,\n          m,\n          n,\n          o,\n          p,\n          q,\n          r,\n          s,\n          v = bc(a, b, h),\n          w = b.nodeType;\n        if (S.POS.test(a)) return bj(v, b, e, g);\n        if (g) i = t.call(g, 0);\n        else if (v.length === 1) {\n          if (\n            (o = t.call(v[0], 0)).length > 2 &&\n            (p = o[0]).part === \"ID\" &&\n            w === 9 &&\n            !h &&\n            f.relative[o[1].part]\n          ) {\n            b = f.find.ID(p.captures[0].replace(R, \"\"), b, h)[0];\n            if (!b) return e;\n            a = a.slice(o.shift().string.length);\n          }\n          (r = ((v = N.exec(o[0].string)) && !v.index && b.parentNode) || b),\n            (q = \"\");\n          for (n = o.length - 1; n >= 0; n--) {\n            (p = o[n]), (s = p.part), (q = p.string + q);\n            if (f.relative[s]) break;\n            if (f.order.test(s)) {\n              i = f.find[s](p.captures[0].replace(R, \"\"), r, h);\n              if (i == null) continue;\n              (a = a.slice(0, a.length - q.length) + q.replace(S[s], \"\")),\n                a || u.apply(e, t.call(i, 0));\n              break;\n            }\n          }\n        }\n        if (a) {\n          (k = j(a, b, h)),\n            (c = k.dirruns++),\n            i == null &&\n              (i = f.find.TAG(\"*\", (N.test(a) && b.parentNode) || b));\n          for (n = 0; (m = i[n]); n++) (d = k.runs++), k(m) && e.push(m);\n        }\n        return e;\n      }\n      var c,\n        d,\n        e,\n        f,\n        g,\n        h,\n        i,\n        j,\n        k,\n        l,\n        m = !0,\n        n = \"undefined\",\n        o = (\"sizcache\" + Math.random()).replace(\".\", \"\"),\n        q = a.document,\n        r = q.documentElement,\n        s = 0,\n        t = [].slice,\n        u = [].push,\n        v = function (a, b) {\n          return (a[o] = b || !0), a;\n        },\n        w = function () {\n          var a = {},\n            b = [];\n          return v(function (c, d) {\n            return b.push(c) > f.cacheLength && delete a[b.shift()], (a[c] = d);\n          }, a);\n        },\n        x = w(),\n        y = w(),\n        z = w(),\n        A = \"[\\\\x20\\\\t\\\\r\\\\n\\\\f]\",\n        B = \"(?:\\\\\\\\.|[-\\\\w]|[^\\\\x00-\\\\xa0])+\",\n        C = B.replace(\"w\", \"w#\"),\n        D = \"([*^$|!~]?=)\",\n        E =\n          \"\\\\[\" +\n          A +\n          \"*(\" +\n          B +\n          \")\" +\n          A +\n          \"*(?:\" +\n          D +\n          A +\n          \"*(?:(['\\\"])((?:\\\\\\\\.|[^\\\\\\\\])*?)\\\\3|(\" +\n          C +\n          \")|)|)\" +\n          A +\n          \"*\\\\]\",\n        F =\n          \":(\" +\n          B +\n          \")(?:\\\\((?:(['\\\"])((?:\\\\\\\\.|[^\\\\\\\\])*?)\\\\2|([^()[\\\\]]*|(?:(?:\" +\n          E +\n          \")|[^:]|\\\\\\\\.)*|.*))\\\\)|)\",\n        G =\n          \":(nth|eq|gt|lt|first|last|even|odd)(?:\\\\(((?:-\\\\d)?\\\\d*)\\\\)|)(?=[^-]|$)\",\n        H = new RegExp(\"^\" + A + \"+|((?:^|[^\\\\\\\\])(?:\\\\\\\\.)*)\" + A + \"+$\", \"g\"),\n        I = new RegExp(\"^\" + A + \"*,\" + A + \"*\"),\n        J = new RegExp(\"^\" + A + \"*([\\\\x20\\\\t\\\\r\\\\n\\\\f>+~])\" + A + \"*\"),\n        K = new RegExp(F),\n        L = /^(?:#([\\w\\-]+)|(\\w+)|\\.([\\w\\-]+))$/,\n        M = /^:not/,\n        N = /[\\x20\\t\\r\\n\\f]*[+~]/,\n        O = /:not\\($/,\n        P = /h\\d/i,\n        Q = /input|select|textarea|button/i,\n        R = /\\\\(?!\\\\)/g,\n        S = {\n          ID: new RegExp(\"^#(\" + B + \")\"),\n          CLASS: new RegExp(\"^\\\\.(\" + B + \")\"),\n          NAME: new RegExp(\"^\\\\[name=['\\\"]?(\" + B + \")['\\\"]?\\\\]\"),\n          TAG: new RegExp(\"^(\" + B.replace(\"w\", \"w*\") + \")\"),\n          ATTR: new RegExp(\"^\" + E),\n          PSEUDO: new RegExp(\"^\" + F),\n          CHILD: new RegExp(\n            \"^:(only|nth|last|first)-child(?:\\\\(\" +\n              A +\n              \"*(even|odd|(([+-]|)(\\\\d*)n|)\" +\n              A +\n              \"*(?:([+-]|)\" +\n              A +\n              \"*(\\\\d+)|))\" +\n              A +\n              \"*\\\\)|)\",\n            \"i\",\n          ),\n          POS: new RegExp(G, \"ig\"),\n          needsContext: new RegExp(\"^\" + A + \"*[>+~]|\" + G, \"i\"),\n        },\n        T = function (a) {\n          var b = q.createElement(\"div\");\n          try {\n            return a(b);\n          } catch (c) {\n            return !1;\n          } finally {\n            b = null;\n          }\n        },\n        U = T(function (a) {\n          return (\n            a.appendChild(q.createComment(\"\")),\n            !a.getElementsByTagName(\"*\").length\n          );\n        }),\n        V = T(function (a) {\n          return (\n            (a.innerHTML = \"<a href='#'></a>\"),\n            a.firstChild &&\n              typeof a.firstChild.getAttribute !== n &&\n              a.firstChild.getAttribute(\"href\") === \"#\"\n          );\n        }),\n        W = T(function (a) {\n          a.innerHTML = \"<select></select>\";\n          var b = typeof a.lastChild.getAttribute(\"multiple\");\n          return b !== \"boolean\" && b !== \"string\";\n        }),\n        X = T(function (a) {\n          return (\n            (a.innerHTML =\n              \"<div class='hidden e'></div><div class='hidden'></div>\"),\n            !a.getElementsByClassName || !a.getElementsByClassName(\"e\").length\n              ? !1\n              : ((a.lastChild.className = \"e\"),\n                a.getElementsByClassName(\"e\").length === 2)\n          );\n        }),\n        Y = T(function (a) {\n          (a.id = o + 0),\n            (a.innerHTML =\n              \"<a name='\" + o + \"'></a><div name='\" + o + \"'></div>\"),\n            r.insertBefore(a, r.firstChild);\n          var b =\n            q.getElementsByName &&\n            q.getElementsByName(o).length ===\n              2 + q.getElementsByName(o + 0).length;\n          return (e = !q.getElementById(o)), r.removeChild(a), b;\n        });\n      try {\n        t.call(r.childNodes, 0)[0].nodeType;\n      } catch (Z) {\n        t = function (a) {\n          var b,\n            c = [];\n          for (; (b = this[a]); a++) c.push(b);\n          return c;\n        };\n      }\n      ($.matches = function (a, b) {\n        return $(a, null, null, b);\n      }),\n        ($.matchesSelector = function (a, b) {\n          return $(b, null, null, [a]).length > 0;\n        }),\n        (g = $.getText =\n          function (a) {\n            var b,\n              c = \"\",\n              d = 0,\n              e = a.nodeType;\n            if (e) {\n              if (e === 1 || e === 9 || e === 11) {\n                if (typeof a.textContent == \"string\") return a.textContent;\n                for (a = a.firstChild; a; a = a.nextSibling) c += g(a);\n              } else if (e === 3 || e === 4) return a.nodeValue;\n            } else for (; (b = a[d]); d++) c += g(b);\n            return c;\n          }),\n        (h = $.isXML =\n          function (a) {\n            var b = a && (a.ownerDocument || a).documentElement;\n            return b ? b.nodeName !== \"HTML\" : !1;\n          }),\n        (i = $.contains =\n          r.contains\n            ? function (a, b) {\n                var c = a.nodeType === 9 ? a.documentElement : a,\n                  d = b && b.parentNode;\n                return (\n                  a === d ||\n                  !!(d && d.nodeType === 1 && c.contains && c.contains(d))\n                );\n              }\n            : r.compareDocumentPosition\n              ? function (a, b) {\n                  return b && !!(a.compareDocumentPosition(b) & 16);\n                }\n              : function (a, b) {\n                  while ((b = b.parentNode)) if (b === a) return !0;\n                  return !1;\n                }),\n        ($.attr = function (a, b) {\n          var c,\n            d = h(a);\n          return (\n            d || (b = b.toLowerCase()),\n            f.attrHandle[b]\n              ? f.attrHandle[b](a)\n              : W || d\n                ? a.getAttribute(b)\n                : ((c = a.getAttributeNode(b)),\n                  c\n                    ? typeof a[b] == \"boolean\"\n                      ? a[b]\n                        ? b\n                        : null\n                      : c.specified\n                        ? c.value\n                        : null\n                    : null)\n          );\n        }),\n        (f = $.selectors =\n          {\n            cacheLength: 50,\n            createPseudo: v,\n            match: S,\n            order: new RegExp(\n              \"ID|TAG\" + (Y ? \"|NAME\" : \"\") + (X ? \"|CLASS\" : \"\"),\n            ),\n            attrHandle: V\n              ? {}\n              : {\n                  href: function (a) {\n                    return a.getAttribute(\"href\", 2);\n                  },\n                  type: function (a) {\n                    return a.getAttribute(\"type\");\n                  },\n                },\n            find: {\n              ID: e\n                ? function (a, b, c) {\n                    if (typeof b.getElementById !== n && !c) {\n                      var d = b.getElementById(a);\n                      return d && d.parentNode ? [d] : [];\n                    }\n                  }\n                : function (a, c, d) {\n                    if (typeof c.getElementById !== n && !d) {\n                      var e = c.getElementById(a);\n                      return e\n                        ? e.id === a ||\n                          (typeof e.getAttributeNode !== n &&\n                            e.getAttributeNode(\"id\").value === a)\n                          ? [e]\n                          : b\n                        : [];\n                    }\n                  },\n              TAG: U\n                ? function (a, b) {\n                    if (typeof b.getElementsByTagName !== n)\n                      return b.getElementsByTagName(a);\n                  }\n                : function (a, b) {\n                    var c = b.getElementsByTagName(a);\n                    if (a === \"*\") {\n                      var d,\n                        e = [],\n                        f = 0;\n                      for (; (d = c[f]); f++) d.nodeType === 1 && e.push(d);\n                      return e;\n                    }\n                    return c;\n                  },\n              NAME: function (a, b) {\n                if (typeof b.getElementsByName !== n)\n                  return b.getElementsByName(name);\n              },\n              CLASS: function (a, b, c) {\n                if (typeof b.getElementsByClassName !== n && !c)\n                  return b.getElementsByClassName(a);\n              },\n            },\n            relative: {\n              \">\": { dir: \"parentNode\", first: !0 },\n              \" \": { dir: \"parentNode\" },\n              \"+\": { dir: \"previousSibling\", first: !0 },\n              \"~\": { dir: \"previousSibling\" },\n            },\n            preFilter: {\n              ATTR: function (a) {\n                return (\n                  (a[1] = a[1].replace(R, \"\")),\n                  (a[3] = (a[4] || a[5] || \"\").replace(R, \"\")),\n                  a[2] === \"~=\" && (a[3] = \" \" + a[3] + \" \"),\n                  a.slice(0, 4)\n                );\n              },\n              CHILD: function (a) {\n                return (\n                  (a[1] = a[1].toLowerCase()),\n                  a[1] === \"nth\"\n                    ? (a[2] || $.error(a[0]),\n                      (a[3] = +(a[3]\n                        ? a[4] + (a[5] || 1)\n                        : 2 * (a[2] === \"even\" || a[2] === \"odd\"))),\n                      (a[4] = +(a[6] + a[7] || a[2] === \"odd\")))\n                    : a[2] && $.error(a[0]),\n                  a\n                );\n              },\n              PSEUDO: function (a, b, c) {\n                var d, e;\n                if (S.CHILD.test(a[0])) return null;\n                if (a[3]) a[2] = a[3];\n                else if ((d = a[4]))\n                  K.test(d) &&\n                    (e = bc(d, b, c, !0)) &&\n                    (e = d.indexOf(\")\", d.length - e) - d.length) &&\n                    ((d = d.slice(0, e)), (a[0] = a[0].slice(0, e))),\n                    (a[2] = d);\n                return a.slice(0, 3);\n              },\n            },\n            filter: {\n              ID: e\n                ? function (a) {\n                    return (\n                      (a = a.replace(R, \"\")),\n                      function (b) {\n                        return b.getAttribute(\"id\") === a;\n                      }\n                    );\n                  }\n                : function (a) {\n                    return (\n                      (a = a.replace(R, \"\")),\n                      function (b) {\n                        var c =\n                          typeof b.getAttributeNode !== n &&\n                          b.getAttributeNode(\"id\");\n                        return c && c.value === a;\n                      }\n                    );\n                  },\n              TAG: function (a) {\n                return a === \"*\"\n                  ? function () {\n                      return !0;\n                    }\n                  : ((a = a.replace(R, \"\").toLowerCase()),\n                    function (b) {\n                      return b.nodeName && b.nodeName.toLowerCase() === a;\n                    });\n              },\n              CLASS: function (a) {\n                var b = x[o][a];\n                return (\n                  b ||\n                    (b = x(\n                      a,\n                      new RegExp(\"(^|\" + A + \")\" + a + \"(\" + A + \"|$)\"),\n                    )),\n                  function (a) {\n                    return b.test(\n                      a.className ||\n                        (typeof a.getAttribute !== n &&\n                          a.getAttribute(\"class\")) ||\n                        \"\",\n                    );\n                  }\n                );\n              },\n              ATTR: function (a, b, c) {\n                return b\n                  ? function (d) {\n                      var e = $.attr(d, a),\n                        f = e + \"\";\n                      if (e == null) return b === \"!=\";\n                      switch (b) {\n                        case \"=\":\n                          return f === c;\n                        case \"!=\":\n                          return f !== c;\n                        case \"^=\":\n                          return c && f.indexOf(c) === 0;\n                        case \"*=\":\n                          return c && f.indexOf(c) > -1;\n                        case \"$=\":\n                          return c && f.substr(f.length - c.length) === c;\n                        case \"~=\":\n                          return (\" \" + f + \" \").indexOf(c) > -1;\n                        case \"|=\":\n                          return (\n                            f === c || f.substr(0, c.length + 1) === c + \"-\"\n                          );\n                      }\n                    }\n                  : function (b) {\n                      return $.attr(b, a) != null;\n                    };\n              },\n              CHILD: function (a, b, c, d) {\n                if (a === \"nth\") {\n                  var e = s++;\n                  return function (a) {\n                    var b,\n                      f,\n                      g = 0,\n                      h = a;\n                    if (c === 1 && d === 0) return !0;\n                    b = a.parentNode;\n                    if (b && (b[o] !== e || !a.sizset)) {\n                      for (h = b.firstChild; h; h = h.nextSibling)\n                        if (h.nodeType === 1) {\n                          h.sizset = ++g;\n                          if (h === a) break;\n                        }\n                      b[o] = e;\n                    }\n                    return (\n                      (f = a.sizset - d),\n                      c === 0 ? f === 0 : f % c === 0 && f / c >= 0\n                    );\n                  };\n                }\n                return function (b) {\n                  var c = b;\n                  switch (a) {\n                    case \"only\":\n                    case \"first\":\n                      while ((c = c.previousSibling))\n                        if (c.nodeType === 1) return !1;\n                      if (a === \"first\") return !0;\n                      c = b;\n                    case \"last\":\n                      while ((c = c.nextSibling))\n                        if (c.nodeType === 1) return !1;\n                      return !0;\n                  }\n                };\n              },\n              PSEUDO: function (a, b, c, d) {\n                var e,\n                  g = f.pseudos[a] || f.pseudos[a.toLowerCase()];\n                return (\n                  g || $.error(\"unsupported pseudo: \" + a),\n                  g[o]\n                    ? g(b, c, d)\n                    : g.length > 1\n                      ? ((e = [a, a, \"\", b]),\n                        function (a) {\n                          return g(a, 0, e);\n                        })\n                      : g\n                );\n              },\n            },\n            pseudos: {\n              not: v(function (a, b, c) {\n                var d = j(a.replace(H, \"$1\"), b, c);\n                return function (a) {\n                  return !d(a);\n                };\n              }),\n              enabled: function (a) {\n                return a.disabled === !1;\n              },\n              disabled: function (a) {\n                return a.disabled === !0;\n              },\n              checked: function (a) {\n                var b = a.nodeName.toLowerCase();\n                return (\n                  (b === \"input\" && !!a.checked) ||\n                  (b === \"option\" && !!a.selected)\n                );\n              },\n              selected: function (a) {\n                return (\n                  a.parentNode && a.parentNode.selectedIndex, a.selected === !0\n                );\n              },\n              parent: function (a) {\n                return !f.pseudos.empty(a);\n              },\n              empty: function (a) {\n                var b;\n                a = a.firstChild;\n                while (a) {\n                  if (a.nodeName > \"@\" || (b = a.nodeType) === 3 || b === 4)\n                    return !1;\n                  a = a.nextSibling;\n                }\n                return !0;\n              },\n              contains: v(function (a) {\n                return function (b) {\n                  return (b.textContent || b.innerText || g(b)).indexOf(a) > -1;\n                };\n              }),\n              has: v(function (a) {\n                return function (b) {\n                  return $(a, b).length > 0;\n                };\n              }),\n              header: function (a) {\n                return P.test(a.nodeName);\n              },\n              text: function (a) {\n                var b, c;\n                return (\n                  a.nodeName.toLowerCase() === \"input\" &&\n                  (b = a.type) === \"text\" &&\n                  ((c = a.getAttribute(\"type\")) == null ||\n                    c.toLowerCase() === b)\n                );\n              },\n              radio: _(\"radio\"),\n              checkbox: _(\"checkbox\"),\n              file: _(\"file\"),\n              password: _(\"password\"),\n              image: _(\"image\"),\n              submit: ba(\"submit\"),\n              reset: ba(\"reset\"),\n              button: function (a) {\n                var b = a.nodeName.toLowerCase();\n                return (b === \"input\" && a.type === \"button\") || b === \"button\";\n              },\n              input: function (a) {\n                return Q.test(a.nodeName);\n              },\n              focus: function (a) {\n                var b = a.ownerDocument;\n                return (\n                  a === b.activeElement &&\n                  (!b.hasFocus || b.hasFocus()) &&\n                  (!!a.type || !!a.href)\n                );\n              },\n              active: function (a) {\n                return a === a.ownerDocument.activeElement;\n              },\n            },\n            setFilters: {\n              first: function (a, b, c) {\n                return c ? a.slice(1) : [a[0]];\n              },\n              last: function (a, b, c) {\n                var d = a.pop();\n                return c ? a : [d];\n              },\n              even: function (a, b, c) {\n                var d = [],\n                  e = c ? 1 : 0,\n                  f = a.length;\n                for (; e < f; e = e + 2) d.push(a[e]);\n                return d;\n              },\n              odd: function (a, b, c) {\n                var d = [],\n                  e = c ? 0 : 1,\n                  f = a.length;\n                for (; e < f; e = e + 2) d.push(a[e]);\n                return d;\n              },\n              lt: function (a, b, c) {\n                return c ? a.slice(+b) : a.slice(0, +b);\n              },\n              gt: function (a, b, c) {\n                return c ? a.slice(0, +b + 1) : a.slice(+b + 1);\n              },\n              eq: function (a, b, c) {\n                var d = a.splice(+b, 1);\n                return c ? a : d;\n              },\n            },\n          }),\n        (k = r.compareDocumentPosition\n          ? function (a, b) {\n              return a === b\n                ? ((l = !0), 0)\n                : (\n                      !a.compareDocumentPosition || !b.compareDocumentPosition\n                        ? a.compareDocumentPosition\n                        : a.compareDocumentPosition(b) & 4\n                    )\n                  ? -1\n                  : 1;\n            }\n          : function (a, b) {\n              if (a === b) return (l = !0), 0;\n              if (a.sourceIndex && b.sourceIndex)\n                return a.sourceIndex - b.sourceIndex;\n              var c,\n                d,\n                e = [],\n                f = [],\n                g = a.parentNode,\n                h = b.parentNode,\n                i = g;\n              if (g === h) return bb(a, b);\n              if (!g) return -1;\n              if (!h) return 1;\n              while (i) e.unshift(i), (i = i.parentNode);\n              i = h;\n              while (i) f.unshift(i), (i = i.parentNode);\n              (c = e.length), (d = f.length);\n              for (var j = 0; j < c && j < d; j++)\n                if (e[j] !== f[j]) return bb(e[j], f[j]);\n              return j === c ? bb(a, f[j], -1) : bb(e[j], b, 1);\n            }),\n        [0, 0].sort(k),\n        (m = !l),\n        ($.uniqueSort = function (a) {\n          var b,\n            c = 1;\n          (l = m), a.sort(k);\n          if (l) for (; (b = a[c]); c++) b === a[c - 1] && a.splice(c--, 1);\n          return a;\n        }),\n        ($.error = function (a) {\n          throw new Error(\"Syntax error, unrecognized expression: \" + a);\n        }),\n        (j = $.compile =\n          function (a, b, c) {\n            var d,\n              e,\n              f,\n              g = z[o][a];\n            if (g && g.context === b) return g;\n            d = bc(a, b, c);\n            for (e = 0, f = d.length; e < f; e++) d[e] = bf(d[e], b, c);\n            return (\n              (g = z(a, bg(d))), (g.context = b), (g.runs = g.dirruns = 0), g\n            );\n          }),\n        q.querySelectorAll &&\n          (function () {\n            var a,\n              b = bk,\n              c = /'|\\\\/g,\n              d = /\\=[\\x20\\t\\r\\n\\f]*([^'\"\\]]*)[\\x20\\t\\r\\n\\f]*\\]/g,\n              e = [],\n              f = [\":active\"],\n              g =\n                r.matchesSelector ||\n                r.mozMatchesSelector ||\n                r.webkitMatchesSelector ||\n                r.oMatchesSelector ||\n                r.msMatchesSelector;\n            T(function (a) {\n              (a.innerHTML = \"<select><option selected=''></option></select>\"),\n                a.querySelectorAll(\"[selected]\").length ||\n                  e.push(\n                    \"\\\\[\" +\n                      A +\n                      \"*(?:checked|disabled|ismap|multiple|readonly|selected|value)\",\n                  ),\n                a.querySelectorAll(\":checked\").length || e.push(\":checked\");\n            }),\n              T(function (a) {\n                (a.innerHTML = \"<p test=''></p>\"),\n                  a.querySelectorAll(\"[test^='']\").length &&\n                    e.push(\"[*^$]=\" + A + \"*(?:\\\"\\\"|'')\"),\n                  (a.innerHTML = \"<input type='hidden'/>\"),\n                  a.querySelectorAll(\":enabled\").length ||\n                    e.push(\":enabled\", \":disabled\");\n              }),\n              (e = e.length && new RegExp(e.join(\"|\"))),\n              (bk = function (a, d, f, g, h) {\n                if (!g && !h && (!e || !e.test(a)))\n                  if (d.nodeType === 9)\n                    try {\n                      return u.apply(f, t.call(d.querySelectorAll(a), 0)), f;\n                    } catch (i) {}\n                  else if (\n                    d.nodeType === 1 &&\n                    d.nodeName.toLowerCase() !== \"object\"\n                  ) {\n                    var j,\n                      k,\n                      l,\n                      m = d.getAttribute(\"id\"),\n                      n = m || o,\n                      p = (N.test(a) && d.parentNode) || d;\n                    m ? (n = n.replace(c, \"\\\\$&\")) : d.setAttribute(\"id\", n),\n                      (j = bc(a, d, h)),\n                      (n = \"[id='\" + n + \"']\");\n                    for (k = 0, l = j.length; k < l; k++)\n                      j[k] = n + j[k].selector;\n                    try {\n                      return (\n                        u.apply(f, t.call(p.querySelectorAll(j.join(\",\")), 0)),\n                        f\n                      );\n                    } catch (i) {\n                    } finally {\n                      m || d.removeAttribute(\"id\");\n                    }\n                  }\n                return b(a, d, f, g, h);\n              }),\n              g &&\n                (T(function (b) {\n                  a = g.call(b, \"div\");\n                  try {\n                    g.call(b, \"[test!='']:sizzle\"),\n                      f.push(S.PSEUDO.source, S.POS.source, \"!=\");\n                  } catch (c) {}\n                }),\n                (f = new RegExp(f.join(\"|\"))),\n                ($.matchesSelector = function (b, c) {\n                  c = c.replace(d, \"='$1']\");\n                  if (!h(b) && !f.test(c) && (!e || !e.test(c)))\n                    try {\n                      var i = g.call(b, c);\n                      if (i || a || (b.document && b.document.nodeType !== 11))\n                        return i;\n                    } catch (j) {}\n                  return $(c, null, null, [b]).length > 0;\n                }));\n          })(),\n        (f.setFilters.nth = f.setFilters.eq),\n        (f.filters = f.pseudos),\n        ($.attr = p.attr),\n        (p.find = $),\n        (p.expr = $.selectors),\n        (p.expr[\":\"] = p.expr.pseudos),\n        (p.unique = $.uniqueSort),\n        (p.text = $.getText),\n        (p.isXMLDoc = $.isXML),\n        (p.contains = $.contains);\n    })(a);\n  var bc = /Until$/,\n    bd = /^(?:parents|prev(?:Until|All))/,\n    be = /^.[^:#\\[\\.,]*$/,\n    bf = p.expr.match.needsContext,\n    bg = { children: !0, contents: !0, next: !0, prev: !0 };\n  p.fn.extend({\n    find: function (a) {\n      var b,\n        c,\n        d,\n        e,\n        f,\n        g,\n        h = this;\n      if (typeof a != \"string\")\n        return p(a).filter(function () {\n          for (b = 0, c = h.length; b < c; b++)\n            if (p.contains(h[b], this)) return !0;\n        });\n      g = this.pushStack(\"\", \"find\", a);\n      for (b = 0, c = this.length; b < c; b++) {\n        (d = g.length), p.find(a, this[b], g);\n        if (b > 0)\n          for (e = d; e < g.length; e++)\n            for (f = 0; f < d; f++)\n              if (g[f] === g[e]) {\n                g.splice(e--, 1);\n                break;\n              }\n      }\n      return g;\n    },\n    has: function (a) {\n      var b,\n        c = p(a, this),\n        d = c.length;\n      return this.filter(function () {\n        for (b = 0; b < d; b++) if (p.contains(this, c[b])) return !0;\n      });\n    },\n    not: function (a) {\n      return this.pushStack(bj(this, a, !1), \"not\", a);\n    },\n    filter: function (a) {\n      return this.pushStack(bj(this, a, !0), \"filter\", a);\n    },\n    is: function (a) {\n      return (\n        !!a &&\n        (typeof a == \"string\"\n          ? bf.test(a)\n            ? p(a, this.context).index(this[0]) >= 0\n            : p.filter(a, this).length > 0\n          : this.filter(a).length > 0)\n      );\n    },\n    closest: function (a, b) {\n      var c,\n        d = 0,\n        e = this.length,\n        f = [],\n        g = bf.test(a) || typeof a != \"string\" ? p(a, b || this.context) : 0;\n      for (; d < e; d++) {\n        c = this[d];\n        while (c && c.ownerDocument && c !== b && c.nodeType !== 11) {\n          if (g ? g.index(c) > -1 : p.find.matchesSelector(c, a)) {\n            f.push(c);\n            break;\n          }\n          c = c.parentNode;\n        }\n      }\n      return (\n        (f = f.length > 1 ? p.unique(f) : f), this.pushStack(f, \"closest\", a)\n      );\n    },\n    index: function (a) {\n      return a\n        ? typeof a == \"string\"\n          ? p.inArray(this[0], p(a))\n          : p.inArray(a.jquery ? a[0] : a, this)\n        : this[0] && this[0].parentNode\n          ? this.prevAll().length\n          : -1;\n    },\n    add: function (a, b) {\n      var c =\n          typeof a == \"string\"\n            ? p(a, b)\n            : p.makeArray(a && a.nodeType ? [a] : a),\n        d = p.merge(this.get(), c);\n      return this.pushStack(bh(c[0]) || bh(d[0]) ? d : p.unique(d));\n    },\n    addBack: function (a) {\n      return this.add(a == null ? this.prevObject : this.prevObject.filter(a));\n    },\n  }),\n    (p.fn.andSelf = p.fn.addBack),\n    p.each(\n      {\n        parent: function (a) {\n          var b = a.parentNode;\n          return b && b.nodeType !== 11 ? b : null;\n        },\n        parents: function (a) {\n          return p.dir(a, \"parentNode\");\n        },\n        parentsUntil: function (a, b, c) {\n          return p.dir(a, \"parentNode\", c);\n        },\n        next: function (a) {\n          return bi(a, \"nextSibling\");\n        },\n        prev: function (a) {\n          return bi(a, \"previousSibling\");\n        },\n        nextAll: function (a) {\n          return p.dir(a, \"nextSibling\");\n        },\n        prevAll: function (a) {\n          return p.dir(a, \"previousSibling\");\n        },\n        nextUntil: function (a, b, c) {\n          return p.dir(a, \"nextSibling\", c);\n        },\n        prevUntil: function (a, b, c) {\n          return p.dir(a, \"previousSibling\", c);\n        },\n        siblings: function (a) {\n          return p.sibling((a.parentNode || {}).firstChild, a);\n        },\n        children: function (a) {\n          return p.sibling(a.firstChild);\n        },\n        contents: function (a) {\n          return p.nodeName(a, \"iframe\")\n            ? a.contentDocument || a.contentWindow.document\n            : p.merge([], a.childNodes);\n        },\n      },\n      function (a, b) {\n        p.fn[a] = function (c, d) {\n          var e = p.map(this, b, c);\n          return (\n            bc.test(a) || (d = c),\n            d && typeof d == \"string\" && (e = p.filter(d, e)),\n            (e = this.length > 1 && !bg[a] ? p.unique(e) : e),\n            this.length > 1 && bd.test(a) && (e = e.reverse()),\n            this.pushStack(e, a, k.call(arguments).join(\",\"))\n          );\n        };\n      },\n    ),\n    p.extend({\n      filter: function (a, b, c) {\n        return (\n          c && (a = \":not(\" + a + \")\"),\n          b.length === 1\n            ? p.find.matchesSelector(b[0], a)\n              ? [b[0]]\n              : []\n            : p.find.matches(a, b)\n        );\n      },\n      dir: function (a, c, d) {\n        var e = [],\n          f = a[c];\n        while (\n          f &&\n          f.nodeType !== 9 &&\n          (d === b || f.nodeType !== 1 || !p(f).is(d))\n        )\n          f.nodeType === 1 && e.push(f), (f = f[c]);\n        return e;\n      },\n      sibling: function (a, b) {\n        var c = [];\n        for (; a; a = a.nextSibling) a.nodeType === 1 && a !== b && c.push(a);\n        return c;\n      },\n    });\n  var bl =\n      \"abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video\",\n    bm = / jQuery\\d+=\"(?:null|\\d+)\"/g,\n    bn = /^\\s+/,\n    bo =\n      /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\\w:]+)[^>]*)\\/>/gi,\n    bp = /<([\\w:]+)/,\n    bq = /<tbody/i,\n    br = /<|&#?\\w+;/,\n    bs = /<(?:script|style|link)/i,\n    bt = /<(?:script|object|embed|option|style)/i,\n    bu = new RegExp(\"<(?:\" + bl + \")[\\\\s/>]\", \"i\"),\n    bv = /^(?:checkbox|radio)$/,\n    bw = /checked\\s*(?:[^=]|=\\s*.checked.)/i,\n    bx = /\\/(java|ecma)script/i,\n    by = /^\\s*<!(?:\\[CDATA\\[|\\-\\-)|[\\]\\-]{2}>\\s*$/g,\n    bz = {\n      option: [1, \"<select multiple='multiple'>\", \"</select>\"],\n      legend: [1, \"<fieldset>\", \"</fieldset>\"],\n      thead: [1, \"<table>\", \"</table>\"],\n      tr: [2, \"<table><tbody>\", \"</tbody></table>\"],\n      td: [3, \"<table><tbody><tr>\", \"</tr></tbody></table>\"],\n      col: [2, \"<table><tbody></tbody><colgroup>\", \"</colgroup></table>\"],\n      area: [1, \"<map>\", \"</map>\"],\n      _default: [0, \"\", \"\"],\n    },\n    bA = bk(e),\n    bB = bA.appendChild(e.createElement(\"div\"));\n  (bz.optgroup = bz.option),\n    (bz.tbody = bz.tfoot = bz.colgroup = bz.caption = bz.thead),\n    (bz.th = bz.td),\n    p.support.htmlSerialize || (bz._default = [1, \"X<div>\", \"</div>\"]),\n    p.fn.extend({\n      text: function (a) {\n        return p.access(\n          this,\n          function (a) {\n            return a === b\n              ? p.text(this)\n              : this.empty().append(\n                  ((this[0] && this[0].ownerDocument) || e).createTextNode(a),\n                );\n          },\n          null,\n          a,\n          arguments.length,\n        );\n      },\n      wrapAll: function (a) {\n        if (p.isFunction(a))\n          return this.each(function (b) {\n            p(this).wrapAll(a.call(this, b));\n          });\n        if (this[0]) {\n          var b = p(a, this[0].ownerDocument).eq(0).clone(!0);\n          this[0].parentNode && b.insertBefore(this[0]),\n            b\n              .map(function () {\n                var a = this;\n                while (a.firstChild && a.firstChild.nodeType === 1)\n                  a = a.firstChild;\n                return a;\n              })\n              .append(this);\n        }\n        return this;\n      },\n      wrapInner: function (a) {\n        return p.isFunction(a)\n          ? this.each(function (b) {\n              p(this).wrapInner(a.call(this, b));\n            })\n          : this.each(function () {\n              var b = p(this),\n                c = b.contents();\n              c.length ? c.wrapAll(a) : b.append(a);\n            });\n      },\n      wrap: function (a) {\n        var b = p.isFunction(a);\n        return this.each(function (c) {\n          p(this).wrapAll(b ? a.call(this, c) : a);\n        });\n      },\n      unwrap: function () {\n        return this.parent()\n          .each(function () {\n            p.nodeName(this, \"body\") || p(this).replaceWith(this.childNodes);\n          })\n          .end();\n      },\n      append: function () {\n        return this.domManip(arguments, !0, function (a) {\n          (this.nodeType === 1 || this.nodeType === 11) && this.appendChild(a);\n        });\n      },\n      prepend: function () {\n        return this.domManip(arguments, !0, function (a) {\n          (this.nodeType === 1 || this.nodeType === 11) &&\n            this.insertBefore(a, this.firstChild);\n        });\n      },\n      before: function () {\n        if (!bh(this[0]))\n          return this.domManip(arguments, !1, function (a) {\n            this.parentNode.insertBefore(a, this);\n          });\n        if (arguments.length) {\n          var a = p.clean(arguments);\n          return this.pushStack(p.merge(a, this), \"before\", this.selector);\n        }\n      },\n      after: function () {\n        if (!bh(this[0]))\n          return this.domManip(arguments, !1, function (a) {\n            this.parentNode.insertBefore(a, this.nextSibling);\n          });\n        if (arguments.length) {\n          var a = p.clean(arguments);\n          return this.pushStack(p.merge(this, a), \"after\", this.selector);\n        }\n      },\n      remove: function (a, b) {\n        var c,\n          d = 0;\n        for (; (c = this[d]) != null; d++)\n          if (!a || p.filter(a, [c]).length)\n            !b &&\n              c.nodeType === 1 &&\n              (p.cleanData(c.getElementsByTagName(\"*\")), p.cleanData([c])),\n              c.parentNode && c.parentNode.removeChild(c);\n        return this;\n      },\n      empty: function () {\n        var a,\n          b = 0;\n        for (; (a = this[b]) != null; b++) {\n          a.nodeType === 1 && p.cleanData(a.getElementsByTagName(\"*\"));\n          while (a.firstChild) a.removeChild(a.firstChild);\n        }\n        return this;\n      },\n      clone: function (a, b) {\n        return (\n          (a = a == null ? !1 : a),\n          (b = b == null ? a : b),\n          this.map(function () {\n            return p.clone(this, a, b);\n          })\n        );\n      },\n      html: function (a) {\n        return p.access(\n          this,\n          function (a) {\n            var c = this[0] || {},\n              d = 0,\n              e = this.length;\n            if (a === b)\n              return c.nodeType === 1 ? c.innerHTML.replace(bm, \"\") : b;\n            if (\n              typeof a == \"string\" &&\n              !bs.test(a) &&\n              (p.support.htmlSerialize || !bu.test(a)) &&\n              (p.support.leadingWhitespace || !bn.test(a)) &&\n              !bz[(bp.exec(a) || [\"\", \"\"])[1].toLowerCase()]\n            ) {\n              a = a.replace(bo, \"<$1></$2>\");\n              try {\n                for (; d < e; d++)\n                  (c = this[d] || {}),\n                    c.nodeType === 1 &&\n                      (p.cleanData(c.getElementsByTagName(\"*\")),\n                      (c.innerHTML = a));\n                c = 0;\n              } catch (f) {}\n            }\n            c && this.empty().append(a);\n          },\n          null,\n          a,\n          arguments.length,\n        );\n      },\n      replaceWith: function (a) {\n        return bh(this[0])\n          ? this.length\n            ? this.pushStack(p(p.isFunction(a) ? a() : a), \"replaceWith\", a)\n            : this\n          : p.isFunction(a)\n            ? this.each(function (b) {\n                var c = p(this),\n                  d = c.html();\n                c.replaceWith(a.call(this, b, d));\n              })\n            : (typeof a != \"string\" && (a = p(a).detach()),\n              this.each(function () {\n                var b = this.nextSibling,\n                  c = this.parentNode;\n                p(this).remove(), b ? p(b).before(a) : p(c).append(a);\n              }));\n      },\n      detach: function (a) {\n        return this.remove(a, !0);\n      },\n      domManip: function (a, c, d) {\n        a = [].concat.apply([], a);\n        var e,\n          f,\n          g,\n          h,\n          i = 0,\n          j = a[0],\n          k = [],\n          l = this.length;\n        if (\n          !p.support.checkClone &&\n          l > 1 &&\n          typeof j == \"string\" &&\n          bw.test(j)\n        )\n          return this.each(function () {\n            p(this).domManip(a, c, d);\n          });\n        if (p.isFunction(j))\n          return this.each(function (e) {\n            var f = p(this);\n            (a[0] = j.call(this, e, c ? f.html() : b)), f.domManip(a, c, d);\n          });\n        if (this[0]) {\n          (e = p.buildFragment(a, this, k)),\n            (g = e.fragment),\n            (f = g.firstChild),\n            g.childNodes.length === 1 && (g = f);\n          if (f) {\n            c = c && p.nodeName(f, \"tr\");\n            for (h = e.cacheable || l - 1; i < l; i++)\n              d.call(\n                c && p.nodeName(this[i], \"table\")\n                  ? bC(this[i], \"tbody\")\n                  : this[i],\n                i === h ? g : p.clone(g, !0, !0),\n              );\n          }\n          (g = f = null),\n            k.length &&\n              p.each(k, function (a, b) {\n                b.src\n                  ? p.ajax\n                    ? p.ajax({\n                        url: b.src,\n                        type: \"GET\",\n                        dataType: \"script\",\n                        async: !1,\n                        global: !1,\n                        throws: !0,\n                      })\n                    : p.error(\"no ajax\")\n                  : p.globalEval(\n                      (b.text || b.textContent || b.innerHTML || \"\").replace(\n                        by,\n                        \"\",\n                      ),\n                    ),\n                  b.parentNode && b.parentNode.removeChild(b);\n              });\n        }\n        return this;\n      },\n    }),\n    (p.buildFragment = function (a, c, d) {\n      var f,\n        g,\n        h,\n        i = a[0];\n      return (\n        (c = c || e),\n        (c = (!c.nodeType && c[0]) || c),\n        (c = c.ownerDocument || c),\n        a.length === 1 &&\n          typeof i == \"string\" &&\n          i.length < 512 &&\n          c === e &&\n          i.charAt(0) === \"<\" &&\n          !bt.test(i) &&\n          (p.support.checkClone || !bw.test(i)) &&\n          (p.support.html5Clone || !bu.test(i)) &&\n          ((g = !0), (f = p.fragments[i]), (h = f !== b)),\n        f ||\n          ((f = c.createDocumentFragment()),\n          p.clean(a, c, f, d),\n          g && (p.fragments[i] = h && f)),\n        { fragment: f, cacheable: g }\n      );\n    }),\n    (p.fragments = {}),\n    p.each(\n      {\n        appendTo: \"append\",\n        prependTo: \"prepend\",\n        insertBefore: \"before\",\n        insertAfter: \"after\",\n        replaceAll: \"replaceWith\",\n      },\n      function (a, b) {\n        p.fn[a] = function (c) {\n          var d,\n            e = 0,\n            f = [],\n            g = p(c),\n            h = g.length,\n            i = this.length === 1 && this[0].parentNode;\n          if (\n            (i == null ||\n              (i && i.nodeType === 11 && i.childNodes.length === 1)) &&\n            h === 1\n          )\n            return g[b](this[0]), this;\n          for (; e < h; e++)\n            (d = (e > 0 ? this.clone(!0) : this).get()),\n              p(g[e])[b](d),\n              (f = f.concat(d));\n          return this.pushStack(f, a, g.selector);\n        };\n      },\n    ),\n    p.extend({\n      clone: function (a, b, c) {\n        var d, e, f, g;\n        p.support.html5Clone ||\n        p.isXMLDoc(a) ||\n        !bu.test(\"<\" + a.nodeName + \">\")\n          ? (g = a.cloneNode(!0))\n          : ((bB.innerHTML = a.outerHTML), bB.removeChild((g = bB.firstChild)));\n        if (\n          (!p.support.noCloneEvent || !p.support.noCloneChecked) &&\n          (a.nodeType === 1 || a.nodeType === 11) &&\n          !p.isXMLDoc(a)\n        ) {\n          bE(a, g), (d = bF(a)), (e = bF(g));\n          for (f = 0; d[f]; ++f) e[f] && bE(d[f], e[f]);\n        }\n        if (b) {\n          bD(a, g);\n          if (c) {\n            (d = bF(a)), (e = bF(g));\n            for (f = 0; d[f]; ++f) bD(d[f], e[f]);\n          }\n        }\n        return (d = e = null), g;\n      },\n      clean: function (a, b, c, d) {\n        var f,\n          g,\n          h,\n          i,\n          j,\n          k,\n          l,\n          m,\n          n,\n          o,\n          q,\n          r,\n          s = b === e && bA,\n          t = [];\n        if (!b || typeof b.createDocumentFragment == \"undefined\") b = e;\n        for (f = 0; (h = a[f]) != null; f++) {\n          typeof h == \"number\" && (h += \"\");\n          if (!h) continue;\n          if (typeof h == \"string\")\n            if (!br.test(h)) h = b.createTextNode(h);\n            else {\n              (s = s || bk(b)),\n                (l = b.createElement(\"div\")),\n                s.appendChild(l),\n                (h = h.replace(bo, \"<$1></$2>\")),\n                (i = (bp.exec(h) || [\"\", \"\"])[1].toLowerCase()),\n                (j = bz[i] || bz._default),\n                (k = j[0]),\n                (l.innerHTML = j[1] + h + j[2]);\n              while (k--) l = l.lastChild;\n              if (!p.support.tbody) {\n                (m = bq.test(h)),\n                  (n =\n                    i === \"table\" && !m\n                      ? l.firstChild && l.firstChild.childNodes\n                      : j[1] === \"<table>\" && !m\n                        ? l.childNodes\n                        : []);\n                for (g = n.length - 1; g >= 0; --g)\n                  p.nodeName(n[g], \"tbody\") &&\n                    !n[g].childNodes.length &&\n                    n[g].parentNode.removeChild(n[g]);\n              }\n              !p.support.leadingWhitespace &&\n                bn.test(h) &&\n                l.insertBefore(b.createTextNode(bn.exec(h)[0]), l.firstChild),\n                (h = l.childNodes),\n                l.parentNode.removeChild(l);\n            }\n          h.nodeType ? t.push(h) : p.merge(t, h);\n        }\n        l && (h = l = s = null);\n        if (!p.support.appendChecked)\n          for (f = 0; (h = t[f]) != null; f++)\n            p.nodeName(h, \"input\")\n              ? bG(h)\n              : typeof h.getElementsByTagName != \"undefined\" &&\n                p.grep(h.getElementsByTagName(\"input\"), bG);\n        if (c) {\n          q = function (a) {\n            if (!a.type || bx.test(a.type))\n              return d\n                ? d.push(a.parentNode ? a.parentNode.removeChild(a) : a)\n                : c.appendChild(a);\n          };\n          for (f = 0; (h = t[f]) != null; f++)\n            if (!p.nodeName(h, \"script\") || !q(h))\n              c.appendChild(h),\n                typeof h.getElementsByTagName != \"undefined\" &&\n                  ((r = p.grep(\n                    p.merge([], h.getElementsByTagName(\"script\")),\n                    q,\n                  )),\n                  t.splice.apply(t, [f + 1, 0].concat(r)),\n                  (f += r.length));\n        }\n        return t;\n      },\n      cleanData: function (a, b) {\n        var c,\n          d,\n          e,\n          f,\n          g = 0,\n          h = p.expando,\n          i = p.cache,\n          j = p.support.deleteExpando,\n          k = p.event.special;\n        for (; (e = a[g]) != null; g++)\n          if (b || p.acceptData(e)) {\n            (d = e[h]), (c = d && i[d]);\n            if (c) {\n              if (c.events)\n                for (f in c.events)\n                  k[f] ? p.event.remove(e, f) : p.removeEvent(e, f, c.handle);\n              i[d] &&\n                (delete i[d],\n                j\n                  ? delete e[h]\n                  : e.removeAttribute\n                    ? e.removeAttribute(h)\n                    : (e[h] = null),\n                p.deletedIds.push(d));\n            }\n          }\n      },\n    }),\n    (function () {\n      var a, b;\n      (p.uaMatch = function (a) {\n        a = a.toLowerCase();\n        var b =\n          /(chrome)[ \\/]([\\w.]+)/.exec(a) ||\n          /(webkit)[ \\/]([\\w.]+)/.exec(a) ||\n          /(opera)(?:.*version|)[ \\/]([\\w.]+)/.exec(a) ||\n          /(msie) ([\\w.]+)/.exec(a) ||\n          (a.indexOf(\"compatible\") < 0 &&\n            /(mozilla)(?:.*? rv:([\\w.]+)|)/.exec(a)) ||\n          [];\n        return { browser: b[1] || \"\", version: b[2] || \"0\" };\n      }),\n        (a = p.uaMatch(g.userAgent)),\n        (b = {}),\n        a.browser && ((b[a.browser] = !0), (b.version = a.version)),\n        b.chrome ? (b.webkit = !0) : b.webkit && (b.safari = !0),\n        (p.browser = b),\n        (p.sub = function () {\n          function a(b, c) {\n            return new a.fn.init(b, c);\n          }\n          p.extend(!0, a, this),\n            (a.superclass = this),\n            (a.fn = a.prototype = this()),\n            (a.fn.constructor = a),\n            (a.sub = this.sub),\n            (a.fn.init = function c(c, d) {\n              return (\n                d && d instanceof p && !(d instanceof a) && (d = a(d)),\n                p.fn.init.call(this, c, d, b)\n              );\n            }),\n            (a.fn.init.prototype = a.fn);\n          var b = a(e);\n          return a;\n        });\n    })();\n  var bH,\n    bI,\n    bJ,\n    bK = /alpha\\([^)]*\\)/i,\n    bL = /opacity=([^)]*)/,\n    bM = /^(top|right|bottom|left)$/,\n    bN = /^(none|table(?!-c[ea]).+)/,\n    bO = /^margin/,\n    bP = new RegExp(\"^(\" + q + \")(.*)$\", \"i\"),\n    bQ = new RegExp(\"^(\" + q + \")(?!px)[a-z%]+$\", \"i\"),\n    bR = new RegExp(\"^([-+])=(\" + q + \")\", \"i\"),\n    bS = {},\n    bT = { position: \"absolute\", visibility: \"hidden\", display: \"block\" },\n    bU = { letterSpacing: 0, fontWeight: 400 },\n    bV = [\"Top\", \"Right\", \"Bottom\", \"Left\"],\n    bW = [\"Webkit\", \"O\", \"Moz\", \"ms\"],\n    bX = p.fn.toggle;\n  p.fn.extend({\n    css: function (a, c) {\n      return p.access(\n        this,\n        function (a, c, d) {\n          return d !== b ? p.style(a, c, d) : p.css(a, c);\n        },\n        a,\n        c,\n        arguments.length > 1,\n      );\n    },\n    show: function () {\n      return b$(this, !0);\n    },\n    hide: function () {\n      return b$(this);\n    },\n    toggle: function (a, b) {\n      var c = typeof a == \"boolean\";\n      return p.isFunction(a) && p.isFunction(b)\n        ? bX.apply(this, arguments)\n        : this.each(function () {\n            (c ? a : bZ(this)) ? p(this).show() : p(this).hide();\n          });\n    },\n  }),\n    p.extend({\n      cssHooks: {\n        opacity: {\n          get: function (a, b) {\n            if (b) {\n              var c = bH(a, \"opacity\");\n              return c === \"\" ? \"1\" : c;\n            }\n          },\n        },\n      },\n      cssNumber: {\n        fillOpacity: !0,\n        fontWeight: !0,\n        lineHeight: !0,\n        opacity: !0,\n        orphans: !0,\n        widows: !0,\n        zIndex: !0,\n        zoom: !0,\n      },\n      cssProps: { float: p.support.cssFloat ? \"cssFloat\" : \"styleFloat\" },\n      style: function (a, c, d, e) {\n        if (!a || a.nodeType === 3 || a.nodeType === 8 || !a.style) return;\n        var f,\n          g,\n          h,\n          i = p.camelCase(c),\n          j = a.style;\n        (c = p.cssProps[i] || (p.cssProps[i] = bY(j, i))),\n          (h = p.cssHooks[c] || p.cssHooks[i]);\n        if (d === b)\n          return h && \"get\" in h && (f = h.get(a, !1, e)) !== b ? f : j[c];\n        (g = typeof d),\n          g === \"string\" &&\n            (f = bR.exec(d)) &&\n            ((d = (f[1] + 1) * f[2] + parseFloat(p.css(a, c))), (g = \"number\"));\n        if (d == null || (g === \"number\" && isNaN(d))) return;\n        g === \"number\" && !p.cssNumber[i] && (d += \"px\");\n        if (!h || !(\"set\" in h) || (d = h.set(a, d, e)) !== b)\n          try {\n            j[c] = d;\n          } catch (k) {}\n      },\n      css: function (a, c, d, e) {\n        var f,\n          g,\n          h,\n          i = p.camelCase(c);\n        return (\n          (c = p.cssProps[i] || (p.cssProps[i] = bY(a.style, i))),\n          (h = p.cssHooks[c] || p.cssHooks[i]),\n          h && \"get\" in h && (f = h.get(a, !0, e)),\n          f === b && (f = bH(a, c)),\n          f === \"normal\" && c in bU && (f = bU[c]),\n          d || e !== b\n            ? ((g = parseFloat(f)), d || p.isNumeric(g) ? g || 0 : f)\n            : f\n        );\n      },\n      swap: function (a, b, c) {\n        var d,\n          e,\n          f = {};\n        for (e in b) (f[e] = a.style[e]), (a.style[e] = b[e]);\n        d = c.call(a);\n        for (e in b) a.style[e] = f[e];\n        return d;\n      },\n    }),\n    a.getComputedStyle\n      ? (bH = function (b, c) {\n          var d,\n            e,\n            f,\n            g,\n            h = a.getComputedStyle(b, null),\n            i = b.style;\n          return (\n            h &&\n              ((d = h[c]),\n              d === \"\" &&\n                !p.contains(b.ownerDocument, b) &&\n                (d = p.style(b, c)),\n              bQ.test(d) &&\n                bO.test(c) &&\n                ((e = i.width),\n                (f = i.minWidth),\n                (g = i.maxWidth),\n                (i.minWidth = i.maxWidth = i.width = d),\n                (d = h.width),\n                (i.width = e),\n                (i.minWidth = f),\n                (i.maxWidth = g))),\n            d\n          );\n        })\n      : e.documentElement.currentStyle &&\n        (bH = function (a, b) {\n          var c,\n            d,\n            e = a.currentStyle && a.currentStyle[b],\n            f = a.style;\n          return (\n            e == null && f && f[b] && (e = f[b]),\n            bQ.test(e) &&\n              !bM.test(b) &&\n              ((c = f.left),\n              (d = a.runtimeStyle && a.runtimeStyle.left),\n              d && (a.runtimeStyle.left = a.currentStyle.left),\n              (f.left = b === \"fontSize\" ? \"1em\" : e),\n              (e = f.pixelLeft + \"px\"),\n              (f.left = c),\n              d && (a.runtimeStyle.left = d)),\n            e === \"\" ? \"auto\" : e\n          );\n        }),\n    p.each([\"height\", \"width\"], function (a, b) {\n      p.cssHooks[b] = {\n        get: function (a, c, d) {\n          if (c)\n            return a.offsetWidth === 0 && bN.test(bH(a, \"display\"))\n              ? p.swap(a, bT, function () {\n                  return cb(a, b, d);\n                })\n              : cb(a, b, d);\n        },\n        set: function (a, c, d) {\n          return b_(\n            a,\n            c,\n            d\n              ? ca(\n                  a,\n                  b,\n                  d,\n                  p.support.boxSizing && p.css(a, \"boxSizing\") === \"border-box\",\n                )\n              : 0,\n          );\n        },\n      };\n    }),\n    p.support.opacity ||\n      (p.cssHooks.opacity = {\n        get: function (a, b) {\n          return bL.test(\n            (b && a.currentStyle ? a.currentStyle.filter : a.style.filter) ||\n              \"\",\n          )\n            ? 0.01 * parseFloat(RegExp.$1) + \"\"\n            : b\n              ? \"1\"\n              : \"\";\n        },\n        set: function (a, b) {\n          var c = a.style,\n            d = a.currentStyle,\n            e = p.isNumeric(b) ? \"alpha(opacity=\" + b * 100 + \")\" : \"\",\n            f = (d && d.filter) || c.filter || \"\";\n          c.zoom = 1;\n          if (b >= 1 && p.trim(f.replace(bK, \"\")) === \"\" && c.removeAttribute) {\n            c.removeAttribute(\"filter\");\n            if (d && !d.filter) return;\n          }\n          c.filter = bK.test(f) ? f.replace(bK, e) : f + \" \" + e;\n        },\n      }),\n    p(function () {\n      p.support.reliableMarginRight ||\n        (p.cssHooks.marginRight = {\n          get: function (a, b) {\n            return p.swap(a, { display: \"inline-block\" }, function () {\n              if (b) return bH(a, \"marginRight\");\n            });\n          },\n        }),\n        !p.support.pixelPosition &&\n          p.fn.position &&\n          p.each([\"top\", \"left\"], function (a, b) {\n            p.cssHooks[b] = {\n              get: function (a, c) {\n                if (c) {\n                  var d = bH(a, b);\n                  return bQ.test(d) ? p(a).position()[b] + \"px\" : d;\n                }\n              },\n            };\n          });\n    }),\n    p.expr &&\n      p.expr.filters &&\n      ((p.expr.filters.hidden = function (a) {\n        return (\n          (a.offsetWidth === 0 && a.offsetHeight === 0) ||\n          (!p.support.reliableHiddenOffsets &&\n            ((a.style && a.style.display) || bH(a, \"display\")) === \"none\")\n        );\n      }),\n      (p.expr.filters.visible = function (a) {\n        return !p.expr.filters.hidden(a);\n      })),\n    p.each({ margin: \"\", padding: \"\", border: \"Width\" }, function (a, b) {\n      (p.cssHooks[a + b] = {\n        expand: function (c) {\n          var d,\n            e = typeof c == \"string\" ? c.split(\" \") : [c],\n            f = {};\n          for (d = 0; d < 4; d++) f[a + bV[d] + b] = e[d] || e[d - 2] || e[0];\n          return f;\n        },\n      }),\n        bO.test(a) || (p.cssHooks[a + b].set = b_);\n    });\n  var cd = /%20/g,\n    ce = /\\[\\]$/,\n    cf = /\\r?\\n/g,\n    cg =\n      /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,\n    ch = /^(?:select|textarea)/i;\n  p.fn.extend({\n    serialize: function () {\n      return p.param(this.serializeArray());\n    },\n    serializeArray: function () {\n      return this.map(function () {\n        return this.elements ? p.makeArray(this.elements) : this;\n      })\n        .filter(function () {\n          return (\n            this.name &&\n            !this.disabled &&\n            (this.checked || ch.test(this.nodeName) || cg.test(this.type))\n          );\n        })\n        .map(function (a, b) {\n          var c = p(this).val();\n          return c == null\n            ? null\n            : p.isArray(c)\n              ? p.map(c, function (a, c) {\n                  return { name: b.name, value: a.replace(cf, \"\\r\\n\") };\n                })\n              : { name: b.name, value: c.replace(cf, \"\\r\\n\") };\n        })\n        .get();\n    },\n  }),\n    (p.param = function (a, c) {\n      var d,\n        e = [],\n        f = function (a, b) {\n          (b = p.isFunction(b) ? b() : b == null ? \"\" : b),\n            (e[e.length] = encodeURIComponent(a) + \"=\" + encodeURIComponent(b));\n        };\n      c === b && (c = p.ajaxSettings && p.ajaxSettings.traditional);\n      if (p.isArray(a) || (a.jquery && !p.isPlainObject(a)))\n        p.each(a, function () {\n          f(this.name, this.value);\n        });\n      else for (d in a) ci(d, a[d], c, f);\n      return e.join(\"&\").replace(cd, \"+\");\n    });\n  var cj,\n    ck,\n    cl = /#.*$/,\n    cm = /^(.*?):[ \\t]*([^\\r\\n]*)\\r?$/gm,\n    cn = /^(?:about|app|app\\-storage|.+\\-extension|file|res|widget):$/,\n    co = /^(?:GET|HEAD)$/,\n    cp = /^\\/\\//,\n    cq = /\\?/,\n    cr = /<script\\b[^<]*(?:(?!<\\/script>)<[^<]*)*<\\/script>/gi,\n    cs = /([?&])_=[^&]*/,\n    ct = /^([\\w\\+\\.\\-]+:)(?:\\/\\/([^\\/?#:]*)(?::(\\d+)|)|)/,\n    cu = p.fn.load,\n    cv = {},\n    cw = {},\n    cx = [\"*/\"] + [\"*\"];\n  try {\n    cj = f.href;\n  } catch (cy) {\n    (cj = e.createElement(\"a\")), (cj.href = \"\"), (cj = cj.href);\n  }\n  (ck = ct.exec(cj.toLowerCase()) || []),\n    (p.fn.load = function (a, c, d) {\n      if (typeof a != \"string\" && cu) return cu.apply(this, arguments);\n      if (!this.length) return this;\n      var e,\n        f,\n        g,\n        h = this,\n        i = a.indexOf(\" \");\n      return (\n        i >= 0 && ((e = a.slice(i, a.length)), (a = a.slice(0, i))),\n        p.isFunction(c)\n          ? ((d = c), (c = b))\n          : c && typeof c == \"object\" && (f = \"POST\"),\n        p\n          .ajax({\n            url: a,\n            type: f,\n            dataType: \"html\",\n            data: c,\n            complete: function (a, b) {\n              d && h.each(d, g || [a.responseText, b, a]);\n            },\n          })\n          .done(function (a) {\n            (g = arguments),\n              h.html(e ? p(\"<div>\").append(a.replace(cr, \"\")).find(e) : a);\n          }),\n        this\n      );\n    }),\n    p.each(\n      \"ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend\".split(\n        \" \",\n      ),\n      function (a, b) {\n        p.fn[b] = function (a) {\n          return this.on(b, a);\n        };\n      },\n    ),\n    p.each([\"get\", \"post\"], function (a, c) {\n      p[c] = function (a, d, e, f) {\n        return (\n          p.isFunction(d) && ((f = f || e), (e = d), (d = b)),\n          p.ajax({ type: c, url: a, data: d, success: e, dataType: f })\n        );\n      };\n    }),\n    p.extend({\n      getScript: function (a, c) {\n        return p.get(a, b, c, \"script\");\n      },\n      getJSON: function (a, b, c) {\n        return p.get(a, b, c, \"json\");\n      },\n      ajaxSetup: function (a, b) {\n        return (\n          b ? cB(a, p.ajaxSettings) : ((b = a), (a = p.ajaxSettings)),\n          cB(a, b),\n          a\n        );\n      },\n      ajaxSettings: {\n        url: cj,\n        isLocal: cn.test(ck[1]),\n        global: !0,\n        type: \"GET\",\n        contentType: \"application/x-www-form-urlencoded; charset=UTF-8\",\n        processData: !0,\n        async: !0,\n        accepts: {\n          xml: \"application/xml, text/xml\",\n          html: \"text/html\",\n          text: \"text/plain\",\n          json: \"application/json, text/javascript\",\n          \"*\": cx,\n        },\n        contents: { xml: /xml/, html: /html/, json: /json/ },\n        responseFields: { xml: \"responseXML\", text: \"responseText\" },\n        converters: {\n          \"* text\": a.String,\n          \"text html\": !0,\n          \"text json\": p.parseJSON,\n          \"text xml\": p.parseXML,\n        },\n        flatOptions: { context: !0, url: !0 },\n      },\n      ajaxPrefilter: cz(cv),\n      ajaxTransport: cz(cw),\n      ajax: function (a, c) {\n        function y(a, c, f, i) {\n          var k,\n            s,\n            t,\n            u,\n            w,\n            y = c;\n          if (v === 2) return;\n          (v = 2),\n            h && clearTimeout(h),\n            (g = b),\n            (e = i || \"\"),\n            (x.readyState = a > 0 ? 4 : 0),\n            f && (u = cC(l, x, f));\n          if ((a >= 200 && a < 300) || a === 304)\n            l.ifModified &&\n              ((w = x.getResponseHeader(\"Last-Modified\")),\n              w && (p.lastModified[d] = w),\n              (w = x.getResponseHeader(\"Etag\")),\n              w && (p.etag[d] = w)),\n              a === 304\n                ? ((y = \"notmodified\"), (k = !0))\n                : ((k = cD(l, u)),\n                  (y = k.state),\n                  (s = k.data),\n                  (t = k.error),\n                  (k = !t));\n          else {\n            t = y;\n            if (!y || a) (y = \"error\"), a < 0 && (a = 0);\n          }\n          (x.status = a),\n            (x.statusText = \"\" + (c || y)),\n            k ? o.resolveWith(m, [s, y, x]) : o.rejectWith(m, [x, y, t]),\n            x.statusCode(r),\n            (r = b),\n            j &&\n              n.trigger(\"ajax\" + (k ? \"Success\" : \"Error\"), [x, l, k ? s : t]),\n            q.fireWith(m, [x, y]),\n            j &&\n              (n.trigger(\"ajaxComplete\", [x, l]),\n              --p.active || p.event.trigger(\"ajaxStop\"));\n        }\n        typeof a == \"object\" && ((c = a), (a = b)), (c = c || {});\n        var d,\n          e,\n          f,\n          g,\n          h,\n          i,\n          j,\n          k,\n          l = p.ajaxSetup({}, c),\n          m = l.context || l,\n          n = m !== l && (m.nodeType || m instanceof p) ? p(m) : p.event,\n          o = p.Deferred(),\n          q = p.Callbacks(\"once memory\"),\n          r = l.statusCode || {},\n          t = {},\n          u = {},\n          v = 0,\n          w = \"canceled\",\n          x = {\n            readyState: 0,\n            setRequestHeader: function (a, b) {\n              if (!v) {\n                var c = a.toLowerCase();\n                (a = u[c] = u[c] || a), (t[a] = b);\n              }\n              return this;\n            },\n            getAllResponseHeaders: function () {\n              return v === 2 ? e : null;\n            },\n            getResponseHeader: function (a) {\n              var c;\n              if (v === 2) {\n                if (!f) {\n                  f = {};\n                  while ((c = cm.exec(e))) f[c[1].toLowerCase()] = c[2];\n                }\n                c = f[a.toLowerCase()];\n              }\n              return c === b ? null : c;\n            },\n            overrideMimeType: function (a) {\n              return v || (l.mimeType = a), this;\n            },\n            abort: function (a) {\n              return (a = a || w), g && g.abort(a), y(0, a), this;\n            },\n          };\n        o.promise(x),\n          (x.success = x.done),\n          (x.error = x.fail),\n          (x.complete = q.add),\n          (x.statusCode = function (a) {\n            if (a) {\n              var b;\n              if (v < 2) for (b in a) r[b] = [r[b], a[b]];\n              else (b = a[x.status]), x.always(b);\n            }\n            return this;\n          }),\n          (l.url = ((a || l.url) + \"\")\n            .replace(cl, \"\")\n            .replace(cp, ck[1] + \"//\")),\n          (l.dataTypes = p\n            .trim(l.dataType || \"*\")\n            .toLowerCase()\n            .split(s)),\n          l.crossDomain == null &&\n            ((i = ct.exec(l.url.toLowerCase())),\n            (l.crossDomain = !(\n              !i ||\n              (i[1] == ck[1] &&\n                i[2] == ck[2] &&\n                (i[3] || (i[1] === \"http:\" ? 80 : 443)) ==\n                  (ck[3] || (ck[1] === \"http:\" ? 80 : 443)))\n            ))),\n          l.data &&\n            l.processData &&\n            typeof l.data != \"string\" &&\n            (l.data = p.param(l.data, l.traditional)),\n          cA(cv, l, c, x);\n        if (v === 2) return x;\n        (j = l.global),\n          (l.type = l.type.toUpperCase()),\n          (l.hasContent = !co.test(l.type)),\n          j && p.active++ === 0 && p.event.trigger(\"ajaxStart\");\n        if (!l.hasContent) {\n          l.data &&\n            ((l.url += (cq.test(l.url) ? \"&\" : \"?\") + l.data), delete l.data),\n            (d = l.url);\n          if (l.cache === !1) {\n            var z = p.now(),\n              A = l.url.replace(cs, \"$1_=\" + z);\n            l.url =\n              A + (A === l.url ? (cq.test(l.url) ? \"&\" : \"?\") + \"_=\" + z : \"\");\n          }\n        }\n        ((l.data && l.hasContent && l.contentType !== !1) || c.contentType) &&\n          x.setRequestHeader(\"Content-Type\", l.contentType),\n          l.ifModified &&\n            ((d = d || l.url),\n            p.lastModified[d] &&\n              x.setRequestHeader(\"If-Modified-Since\", p.lastModified[d]),\n            p.etag[d] && x.setRequestHeader(\"If-None-Match\", p.etag[d])),\n          x.setRequestHeader(\n            \"Accept\",\n            l.dataTypes[0] && l.accepts[l.dataTypes[0]]\n              ? l.accepts[l.dataTypes[0]] +\n                  (l.dataTypes[0] !== \"*\" ? \", \" + cx + \"; q=0.01\" : \"\")\n              : l.accepts[\"*\"],\n          );\n        for (k in l.headers) x.setRequestHeader(k, l.headers[k]);\n        if (!l.beforeSend || (l.beforeSend.call(m, x, l) !== !1 && v !== 2)) {\n          w = \"abort\";\n          for (k in { success: 1, error: 1, complete: 1 }) x[k](l[k]);\n          g = cA(cw, l, c, x);\n          if (!g) y(-1, \"No Transport\");\n          else {\n            (x.readyState = 1),\n              j && n.trigger(\"ajaxSend\", [x, l]),\n              l.async &&\n                l.timeout > 0 &&\n                (h = setTimeout(function () {\n                  x.abort(\"timeout\");\n                }, l.timeout));\n            try {\n              (v = 1), g.send(t, y);\n            } catch (B) {\n              if (v < 2) y(-1, B);\n              else throw B;\n            }\n          }\n          return x;\n        }\n        return x.abort();\n      },\n      active: 0,\n      lastModified: {},\n      etag: {},\n    });\n  var cE = [],\n    cF = /\\?/,\n    cG = /(=)\\?(?=&|$)|\\?\\?/,\n    cH = p.now();\n  p.ajaxSetup({\n    jsonp: \"callback\",\n    jsonpCallback: function () {\n      var a = cE.pop() || p.expando + \"_\" + cH++;\n      return (this[a] = !0), a;\n    },\n  }),\n    p.ajaxPrefilter(\"json jsonp\", function (c, d, e) {\n      var f,\n        g,\n        h,\n        i = c.data,\n        j = c.url,\n        k = c.jsonp !== !1,\n        l = k && cG.test(j),\n        m =\n          k &&\n          !l &&\n          typeof i == \"string\" &&\n          !(c.contentType || \"\").indexOf(\"application/x-www-form-urlencoded\") &&\n          cG.test(i);\n      if (c.dataTypes[0] === \"jsonp\" || l || m)\n        return (\n          (f = c.jsonpCallback =\n            p.isFunction(c.jsonpCallback)\n              ? c.jsonpCallback()\n              : c.jsonpCallback),\n          (g = a[f]),\n          l\n            ? (c.url = j.replace(cG, \"$1\" + f))\n            : m\n              ? (c.data = i.replace(cG, \"$1\" + f))\n              : k && (c.url += (cF.test(j) ? \"&\" : \"?\") + c.jsonp + \"=\" + f),\n          (c.converters[\"script json\"] = function () {\n            return h || p.error(f + \" was not called\"), h[0];\n          }),\n          (c.dataTypes[0] = \"json\"),\n          (a[f] = function () {\n            h = arguments;\n          }),\n          e.always(function () {\n            (a[f] = g),\n              c[f] && ((c.jsonpCallback = d.jsonpCallback), cE.push(f)),\n              h && p.isFunction(g) && g(h[0]),\n              (h = g = b);\n          }),\n          \"script\"\n        );\n    }),\n    p.ajaxSetup({\n      accepts: {\n        script:\n          \"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript\",\n      },\n      contents: { script: /javascript|ecmascript/ },\n      converters: {\n        \"text script\": function (a) {\n          return p.globalEval(a), a;\n        },\n      },\n    }),\n    p.ajaxPrefilter(\"script\", function (a) {\n      a.cache === b && (a.cache = !1),\n        a.crossDomain && ((a.type = \"GET\"), (a.global = !1));\n    }),\n    p.ajaxTransport(\"script\", function (a) {\n      if (a.crossDomain) {\n        var c,\n          d = e.head || e.getElementsByTagName(\"head\")[0] || e.documentElement;\n        return {\n          send: function (f, g) {\n            (c = e.createElement(\"script\")),\n              (c.async = \"async\"),\n              a.scriptCharset && (c.charset = a.scriptCharset),\n              (c.src = a.url),\n              (c.onload = c.onreadystatechange =\n                function (a, e) {\n                  if (\n                    e ||\n                    !c.readyState ||\n                    /loaded|complete/.test(c.readyState)\n                  )\n                    (c.onload = c.onreadystatechange = null),\n                      d && c.parentNode && d.removeChild(c),\n                      (c = b),\n                      e || g(200, \"success\");\n                }),\n              d.insertBefore(c, d.firstChild);\n          },\n          abort: function () {\n            c && c.onload(0, 1);\n          },\n        };\n      }\n    });\n  var cI,\n    cJ = a.ActiveXObject\n      ? function () {\n          for (var a in cI) cI[a](0, 1);\n        }\n      : !1,\n    cK = 0;\n  (p.ajaxSettings.xhr = a.ActiveXObject\n    ? function () {\n        return (!this.isLocal && cL()) || cM();\n      }\n    : cL),\n    (function (a) {\n      p.extend(p.support, { ajax: !!a, cors: !!a && \"withCredentials\" in a });\n    })(p.ajaxSettings.xhr()),\n    p.support.ajax &&\n      p.ajaxTransport(function (c) {\n        if (!c.crossDomain || p.support.cors) {\n          var d;\n          return {\n            send: function (e, f) {\n              var g,\n                h,\n                i = c.xhr();\n              c.username\n                ? i.open(c.type, c.url, c.async, c.username, c.password)\n                : i.open(c.type, c.url, c.async);\n              if (c.xhrFields) for (h in c.xhrFields) i[h] = c.xhrFields[h];\n              c.mimeType &&\n                i.overrideMimeType &&\n                i.overrideMimeType(c.mimeType),\n                !c.crossDomain &&\n                  !e[\"X-Requested-With\"] &&\n                  (e[\"X-Requested-With\"] = \"XMLHttpRequest\");\n              try {\n                for (h in e) i.setRequestHeader(h, e[h]);\n              } catch (j) {}\n              i.send((c.hasContent && c.data) || null),\n                (d = function (a, e) {\n                  var h, j, k, l, m;\n                  try {\n                    if (d && (e || i.readyState === 4)) {\n                      (d = b),\n                        g &&\n                          ((i.onreadystatechange = p.noop), cJ && delete cI[g]);\n                      if (e) i.readyState !== 4 && i.abort();\n                      else {\n                        (h = i.status),\n                          (k = i.getAllResponseHeaders()),\n                          (l = {}),\n                          (m = i.responseXML),\n                          m && m.documentElement && (l.xml = m);\n                        try {\n                          l.text = i.responseText;\n                        } catch (a) {}\n                        try {\n                          j = i.statusText;\n                        } catch (n) {\n                          j = \"\";\n                        }\n                        !h && c.isLocal && !c.crossDomain\n                          ? (h = l.text ? 200 : 404)\n                          : h === 1223 && (h = 204);\n                      }\n                    }\n                  } catch (o) {\n                    e || f(-1, o);\n                  }\n                  l && f(h, j, l, k);\n                }),\n                c.async\n                  ? i.readyState === 4\n                    ? setTimeout(d, 0)\n                    : ((g = ++cK),\n                      cJ && (cI || ((cI = {}), p(a).unload(cJ)), (cI[g] = d)),\n                      (i.onreadystatechange = d))\n                  : d();\n            },\n            abort: function () {\n              d && d(0, 1);\n            },\n          };\n        }\n      });\n  var cN,\n    cO,\n    cP = /^(?:toggle|show|hide)$/,\n    cQ = new RegExp(\"^(?:([-+])=|)(\" + q + \")([a-z%]*)$\", \"i\"),\n    cR = /queueHooks$/,\n    cS = [cY],\n    cT = {\n      \"*\": [\n        function (a, b) {\n          var c,\n            d,\n            e,\n            f = this.createTween(a, b),\n            g = cQ.exec(b),\n            h = f.cur(),\n            i = +h || 0,\n            j = 1;\n          if (g) {\n            (c = +g[2]), (d = g[3] || (p.cssNumber[a] ? \"\" : \"px\"));\n            if (d !== \"px\" && i) {\n              i = p.css(f.elem, a, !0) || c || 1;\n              do\n                (e = j = j || \".5\"),\n                  (i = i / j),\n                  p.style(f.elem, a, i + d),\n                  (j = f.cur() / h);\n              while (j !== 1 && j !== e);\n            }\n            (f.unit = d),\n              (f.start = i),\n              (f.end = g[1] ? i + (g[1] + 1) * c : c);\n          }\n          return f;\n        },\n      ],\n    };\n  (p.Animation = p.extend(cW, {\n    tweener: function (a, b) {\n      p.isFunction(a) ? ((b = a), (a = [\"*\"])) : (a = a.split(\" \"));\n      var c,\n        d = 0,\n        e = a.length;\n      for (; d < e; d++) (c = a[d]), (cT[c] = cT[c] || []), cT[c].unshift(b);\n    },\n    prefilter: function (a, b) {\n      b ? cS.unshift(a) : cS.push(a);\n    },\n  })),\n    (p.Tween = cZ),\n    (cZ.prototype = {\n      constructor: cZ,\n      init: function (a, b, c, d, e, f) {\n        (this.elem = a),\n          (this.prop = c),\n          (this.easing = e || \"swing\"),\n          (this.options = b),\n          (this.start = this.now = this.cur()),\n          (this.end = d),\n          (this.unit = f || (p.cssNumber[c] ? \"\" : \"px\"));\n      },\n      cur: function () {\n        var a = cZ.propHooks[this.prop];\n        return a && a.get ? a.get(this) : cZ.propHooks._default.get(this);\n      },\n      run: function (a) {\n        var b,\n          c = cZ.propHooks[this.prop];\n        return (\n          this.options.duration\n            ? (this.pos = b =\n                p.easing[this.easing](\n                  a,\n                  this.options.duration * a,\n                  0,\n                  1,\n                  this.options.duration,\n                ))\n            : (this.pos = b = a),\n          (this.now = (this.end - this.start) * b + this.start),\n          this.options.step &&\n            this.options.step.call(this.elem, this.now, this),\n          c && c.set ? c.set(this) : cZ.propHooks._default.set(this),\n          this\n        );\n      },\n    }),\n    (cZ.prototype.init.prototype = cZ.prototype),\n    (cZ.propHooks = {\n      _default: {\n        get: function (a) {\n          var b;\n          return a.elem[a.prop] == null ||\n            (!!a.elem.style && a.elem.style[a.prop] != null)\n            ? ((b = p.css(a.elem, a.prop, !1, \"\")), !b || b === \"auto\" ? 0 : b)\n            : a.elem[a.prop];\n        },\n        set: function (a) {\n          p.fx.step[a.prop]\n            ? p.fx.step[a.prop](a)\n            : a.elem.style &&\n                (a.elem.style[p.cssProps[a.prop]] != null || p.cssHooks[a.prop])\n              ? p.style(a.elem, a.prop, a.now + a.unit)\n              : (a.elem[a.prop] = a.now);\n        },\n      },\n    }),\n    (cZ.propHooks.scrollTop = cZ.propHooks.scrollLeft =\n      {\n        set: function (a) {\n          a.elem.nodeType && a.elem.parentNode && (a.elem[a.prop] = a.now);\n        },\n      }),\n    p.each([\"toggle\", \"show\", \"hide\"], function (a, b) {\n      var c = p.fn[b];\n      p.fn[b] = function (d, e, f) {\n        return d == null ||\n          typeof d == \"boolean\" ||\n          (!a && p.isFunction(d) && p.isFunction(e))\n          ? c.apply(this, arguments)\n          : this.animate(c$(b, !0), d, e, f);\n      };\n    }),\n    p.fn.extend({\n      fadeTo: function (a, b, c, d) {\n        return this.filter(bZ)\n          .css(\"opacity\", 0)\n          .show()\n          .end()\n          .animate({ opacity: b }, a, c, d);\n      },\n      animate: function (a, b, c, d) {\n        var e = p.isEmptyObject(a),\n          f = p.speed(b, c, d),\n          g = function () {\n            var b = cW(this, p.extend({}, a), f);\n            e && b.stop(!0);\n          };\n        return e || f.queue === !1 ? this.each(g) : this.queue(f.queue, g);\n      },\n      stop: function (a, c, d) {\n        var e = function (a) {\n          var b = a.stop;\n          delete a.stop, b(d);\n        };\n        return (\n          typeof a != \"string\" && ((d = c), (c = a), (a = b)),\n          c && a !== !1 && this.queue(a || \"fx\", []),\n          this.each(function () {\n            var b = !0,\n              c = a != null && a + \"queueHooks\",\n              f = p.timers,\n              g = p._data(this);\n            if (c) g[c] && g[c].stop && e(g[c]);\n            else for (c in g) g[c] && g[c].stop && cR.test(c) && e(g[c]);\n            for (c = f.length; c--; )\n              f[c].elem === this &&\n                (a == null || f[c].queue === a) &&\n                (f[c].anim.stop(d), (b = !1), f.splice(c, 1));\n            (b || !d) && p.dequeue(this, a);\n          })\n        );\n      },\n    }),\n    p.each(\n      {\n        slideDown: c$(\"show\"),\n        slideUp: c$(\"hide\"),\n        slideToggle: c$(\"toggle\"),\n        fadeIn: { opacity: \"show\" },\n        fadeOut: { opacity: \"hide\" },\n        fadeToggle: { opacity: \"toggle\" },\n      },\n      function (a, b) {\n        p.fn[a] = function (a, c, d) {\n          return this.animate(b, a, c, d);\n        };\n      },\n    ),\n    (p.speed = function (a, b, c) {\n      var d =\n        a && typeof a == \"object\"\n          ? p.extend({}, a)\n          : {\n              complete: c || (!c && b) || (p.isFunction(a) && a),\n              duration: a,\n              easing: (c && b) || (b && !p.isFunction(b) && b),\n            };\n      d.duration = p.fx.off\n        ? 0\n        : typeof d.duration == \"number\"\n          ? d.duration\n          : d.duration in p.fx.speeds\n            ? p.fx.speeds[d.duration]\n            : p.fx.speeds._default;\n      if (d.queue == null || d.queue === !0) d.queue = \"fx\";\n      return (\n        (d.old = d.complete),\n        (d.complete = function () {\n          p.isFunction(d.old) && d.old.call(this),\n            d.queue && p.dequeue(this, d.queue);\n        }),\n        d\n      );\n    }),\n    (p.easing = {\n      linear: function (a) {\n        return a;\n      },\n      swing: function (a) {\n        return 0.5 - Math.cos(a * Math.PI) / 2;\n      },\n    }),\n    (p.timers = []),\n    (p.fx = cZ.prototype.init),\n    (p.fx.tick = function () {\n      var a,\n        b = p.timers,\n        c = 0;\n      for (; c < b.length; c++)\n        (a = b[c]), !a() && b[c] === a && b.splice(c--, 1);\n      b.length || p.fx.stop();\n    }),\n    (p.fx.timer = function (a) {\n      a() &&\n        p.timers.push(a) &&\n        !cO &&\n        (cO = setInterval(p.fx.tick, p.fx.interval));\n    }),\n    (p.fx.interval = 13),\n    (p.fx.stop = function () {\n      clearInterval(cO), (cO = null);\n    }),\n    (p.fx.speeds = { slow: 600, fast: 200, _default: 400 }),\n    (p.fx.step = {}),\n    p.expr &&\n      p.expr.filters &&\n      (p.expr.filters.animated = function (a) {\n        return p.grep(p.timers, function (b) {\n          return a === b.elem;\n        }).length;\n      });\n  var c_ = /^(?:body|html)$/i;\n  (p.fn.offset = function (a) {\n    if (arguments.length)\n      return a === b\n        ? this\n        : this.each(function (b) {\n            p.offset.setOffset(this, a, b);\n          });\n    var c,\n      d,\n      e,\n      f,\n      g,\n      h,\n      i,\n      j,\n      k,\n      l,\n      m = this[0],\n      n = m && m.ownerDocument;\n    if (!n) return;\n    return (e = n.body) === m\n      ? p.offset.bodyOffset(m)\n      : ((d = n.documentElement),\n        p.contains(d, m)\n          ? ((c = m.getBoundingClientRect()),\n            (f = da(n)),\n            (g = d.clientTop || e.clientTop || 0),\n            (h = d.clientLeft || e.clientLeft || 0),\n            (i = f.pageYOffset || d.scrollTop),\n            (j = f.pageXOffset || d.scrollLeft),\n            (k = c.top + i - g),\n            (l = c.left + j - h),\n            { top: k, left: l })\n          : { top: 0, left: 0 });\n  }),\n    (p.offset = {\n      bodyOffset: function (a) {\n        var b = a.offsetTop,\n          c = a.offsetLeft;\n        return (\n          p.support.doesNotIncludeMarginInBodyOffset &&\n            ((b += parseFloat(p.css(a, \"marginTop\")) || 0),\n            (c += parseFloat(p.css(a, \"marginLeft\")) || 0)),\n          { top: b, left: c }\n        );\n      },\n      setOffset: function (a, b, c) {\n        var d = p.css(a, \"position\");\n        d === \"static\" && (a.style.position = \"relative\");\n        var e = p(a),\n          f = e.offset(),\n          g = p.css(a, \"top\"),\n          h = p.css(a, \"left\"),\n          i =\n            (d === \"absolute\" || d === \"fixed\") &&\n            p.inArray(\"auto\", [g, h]) > -1,\n          j = {},\n          k = {},\n          l,\n          m;\n        i\n          ? ((k = e.position()), (l = k.top), (m = k.left))\n          : ((l = parseFloat(g) || 0), (m = parseFloat(h) || 0)),\n          p.isFunction(b) && (b = b.call(a, c, f)),\n          b.top != null && (j.top = b.top - f.top + l),\n          b.left != null && (j.left = b.left - f.left + m),\n          \"using\" in b ? b.using.call(a, j) : e.css(j);\n      },\n    }),\n    p.fn.extend({\n      position: function () {\n        if (!this[0]) return;\n        var a = this[0],\n          b = this.offsetParent(),\n          c = this.offset(),\n          d = c_.test(b[0].nodeName) ? { top: 0, left: 0 } : b.offset();\n        return (\n          (c.top -= parseFloat(p.css(a, \"marginTop\")) || 0),\n          (c.left -= parseFloat(p.css(a, \"marginLeft\")) || 0),\n          (d.top += parseFloat(p.css(b[0], \"borderTopWidth\")) || 0),\n          (d.left += parseFloat(p.css(b[0], \"borderLeftWidth\")) || 0),\n          { top: c.top - d.top, left: c.left - d.left }\n        );\n      },\n      offsetParent: function () {\n        return this.map(function () {\n          var a = this.offsetParent || e.body;\n          while (a && !c_.test(a.nodeName) && p.css(a, \"position\") === \"static\")\n            a = a.offsetParent;\n          return a || e.body;\n        });\n      },\n    }),\n    p.each(\n      { scrollLeft: \"pageXOffset\", scrollTop: \"pageYOffset\" },\n      function (a, c) {\n        var d = /Y/.test(c);\n        p.fn[a] = function (e) {\n          return p.access(\n            this,\n            function (a, e, f) {\n              var g = da(a);\n              if (f === b)\n                return g\n                  ? c in g\n                    ? g[c]\n                    : g.document.documentElement[e]\n                  : a[e];\n              g\n                ? g.scrollTo(\n                    d ? p(g).scrollLeft() : f,\n                    d ? f : p(g).scrollTop(),\n                  )\n                : (a[e] = f);\n            },\n            a,\n            e,\n            arguments.length,\n            null,\n          );\n        };\n      },\n    ),\n    p.each({ Height: \"height\", Width: \"width\" }, function (a, c) {\n      p.each(\n        { padding: \"inner\" + a, content: c, \"\": \"outer\" + a },\n        function (d, e) {\n          p.fn[e] = function (e, f) {\n            var g = arguments.length && (d || typeof e != \"boolean\"),\n              h = d || (e === !0 || f === !0 ? \"margin\" : \"border\");\n            return p.access(\n              this,\n              function (c, d, e) {\n                var f;\n                return p.isWindow(c)\n                  ? c.document.documentElement[\"client\" + a]\n                  : c.nodeType === 9\n                    ? ((f = c.documentElement),\n                      Math.max(\n                        c.body[\"scroll\" + a],\n                        f[\"scroll\" + a],\n                        c.body[\"offset\" + a],\n                        f[\"offset\" + a],\n                        f[\"client\" + a],\n                      ))\n                    : e === b\n                      ? p.css(c, d, e, h)\n                      : p.style(c, d, e, h);\n              },\n              c,\n              g ? e : b,\n              g,\n              null,\n            );\n          };\n        },\n      );\n    }),\n    (a.jQuery = a.$ = p),\n    typeof define == \"function\" &&\n      define.amd &&\n      define.amd.jQuery &&\n      define(\"jquery\", [], function () {\n        return p;\n      });\n})(window);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/portfolio/jquery.quicksand.js",
    "content": "/*\n\nQuicksand 1.2.2\n\nReorder and filter items with a nice shuffling animation.\n\nCopyright (c) 2010 Jacek Galanciak (razorjack.net) and agilope.com\nBig thanks for Piotr Petrus (riddle.pl) for deep code review and wonderful docs & demos.\n\nDual licensed under the MIT and GPL version 2 licenses.\nhttp://github.com/jquery/jquery/blob/master/MIT-LICENSE.txt\nhttp://github.com/jquery/jquery/blob/master/GPL-LICENSE.txt\n\nProject site: http://razorjack.net/quicksand\nGithub site: http://github.com/razorjack/quicksand\n\n*/\n\n(function ($) {\n  $.fn.quicksand = function (collection, customOptions) {\n    var options = {\n      duration: 750,\n      easing: \"swing\",\n      attribute: \"data-id\", // attribute to recognize same items within source and dest\n      adjustHeight: \"auto\", // 'dynamic' animates height during shuffling (slow), 'auto' adjusts it before or after the animation, false leaves height constant\n      useScaling: true, // disable it if you're not using scaling effect or want to improve performance\n      enhancement: function (c) {}, // Visual enhacement (eg. font replacement) function for cloned elements\n      selector: \"> *\",\n      dx: 0,\n      dy: 0,\n    };\n    $.extend(options, customOptions);\n\n    if ($.browser.msie || typeof $.fn.scale == \"undefined\") {\n      // Got IE and want scaling effect? Kiss my ass.\n      options.useScaling = false;\n    }\n\n    var callbackFunction;\n    if (typeof arguments[1] == \"function\") {\n      var callbackFunction = arguments[1];\n    } else if (typeof (arguments[2] == \"function\")) {\n      var callbackFunction = arguments[2];\n    }\n\n    return this.each(function (i) {\n      var val;\n      var animationQueue = []; // used to store all the animation params before starting the animation; solves initial animation slowdowns\n      var $collection = $(collection).clone(); // destination (target) collection\n      var $sourceParent = $(this); // source, the visible container of source collection\n      var sourceHeight = $(this).css(\"height\"); // used to keep height and document flow during the animation\n\n      var destHeight;\n      var adjustHeightOnCallback = false;\n\n      var offset = $($sourceParent).offset(); // offset of visible container, used in animation calculations\n      var offsets = []; // coordinates of every source collection item\n\n      var $source = $(this).find(options.selector); // source collection items\n\n      // Replace the collection and quit if IE6\n      if ($.browser.msie && $.browser.version.substr(0, 1) < 7) {\n        $sourceParent.html(\"\").append($collection);\n        return;\n      }\n\n      // Gets called when any animation is finished\n      var postCallbackPerformed = 0; // prevents the function from being called more than one time\n      var postCallback = function () {\n        if (!postCallbackPerformed) {\n          postCallbackPerformed = 1;\n\n          // hack:\n          // used to be: $sourceParent.html($dest.html()); // put target HTML into visible source container\n          // but new webkit builds cause flickering when replacing the collections\n          $toDelete = $sourceParent.find(\"> *\");\n          $sourceParent.prepend($dest.find(\"> *\"));\n          $toDelete.remove();\n\n          if (adjustHeightOnCallback) {\n            $sourceParent.css(\"height\", destHeight);\n          }\n          options.enhancement($sourceParent); // Perform custom visual enhancements on a newly replaced collection\n          if (typeof callbackFunction == \"function\") {\n            callbackFunction.call(this);\n          }\n        }\n      };\n\n      // Position: relative situations\n      var $correctionParent = $sourceParent.offsetParent();\n      var correctionOffset = $correctionParent.offset();\n      if ($correctionParent.css(\"position\") == \"relative\") {\n        if ($correctionParent.get(0).nodeName.toLowerCase() == \"body\") {\n        } else {\n          correctionOffset.top +=\n            parseFloat($correctionParent.css(\"border-top-width\")) || 0;\n          correctionOffset.left +=\n            parseFloat($correctionParent.css(\"border-left-width\")) || 0;\n        }\n      } else {\n        correctionOffset.top -=\n          parseFloat($correctionParent.css(\"border-top-width\")) || 0;\n        correctionOffset.left -=\n          parseFloat($correctionParent.css(\"border-left-width\")) || 0;\n        correctionOffset.top -=\n          parseFloat($correctionParent.css(\"margin-top\")) || 0;\n        correctionOffset.left -=\n          parseFloat($correctionParent.css(\"margin-left\")) || 0;\n      }\n\n      // perform custom corrections from options (use when Quicksand fails to detect proper correction)\n      if (isNaN(correctionOffset.left)) {\n        correctionOffset.left = 0;\n      }\n      if (isNaN(correctionOffset.top)) {\n        correctionOffset.top = 0;\n      }\n\n      correctionOffset.left -= options.dx;\n      correctionOffset.top -= options.dy;\n\n      // keeps nodes after source container, holding their position\n      $sourceParent.css(\"height\", $(this).height());\n\n      // get positions of source collections\n      $source.each(function (i) {\n        offsets[i] = $(this).offset();\n      });\n\n      // stops previous animations on source container\n      $(this).stop();\n      var dx = 0;\n      var dy = 0;\n      $source.each(function (i) {\n        $(this).stop(); // stop animation of collection items\n        var rawObj = $(this).get(0);\n        if (rawObj.style.position == \"absolute\") {\n          dx = -options.dx;\n          dy = -options.dy;\n        } else {\n          dx = options.dx;\n          dy = options.dy;\n        }\n\n        rawObj.style.position = \"absolute\";\n        rawObj.style.margin = \"0\";\n\n        rawObj.style.top =\n          offsets[i].top -\n          parseFloat(rawObj.style.marginTop) -\n          correctionOffset.top +\n          dy +\n          \"px\";\n        rawObj.style.left =\n          offsets[i].left -\n          parseFloat(rawObj.style.marginLeft) -\n          correctionOffset.left +\n          dx +\n          \"px\";\n      });\n\n      // create temporary container with destination collection\n      var $dest = $($sourceParent).clone();\n      var rawDest = $dest.get(0);\n      rawDest.innerHTML = \"\";\n      rawDest.setAttribute(\"id\", \"\");\n      rawDest.style.height = \"auto\";\n      rawDest.style.width = $sourceParent.width() + \"px\";\n      $dest.append($collection);\n      // insert node into HTML\n      // Note that the node is under visible source container in the exactly same position\n      // The browser render all the items without showing them (opacity: 0.0)\n      // No offset calculations are needed, the browser just extracts position from underlayered destination items\n      // and sets animation to destination positions.\n      $dest.insertBefore($sourceParent);\n      $dest.css(\"opacity\", 0.0);\n      rawDest.style.zIndex = -1;\n\n      rawDest.style.margin = \"0\";\n      rawDest.style.position = \"absolute\";\n      rawDest.style.top = offset.top - correctionOffset.top + \"px\";\n      rawDest.style.left = offset.left - correctionOffset.left + \"px\";\n\n      if (options.adjustHeight === \"dynamic\") {\n        // If destination container has different height than source container\n        // the height can be animated, adjusting it to destination height\n        $sourceParent.animate(\n          { height: $dest.height() },\n          options.duration,\n          options.easing,\n        );\n      } else if (options.adjustHeight === \"auto\") {\n        destHeight = $dest.height();\n        if (parseFloat(sourceHeight) < parseFloat(destHeight)) {\n          // Adjust the height now so that the items don't move out of the container\n          $sourceParent.css(\"height\", destHeight);\n        } else {\n          //  Adjust later, on callback\n          adjustHeightOnCallback = true;\n        }\n      }\n\n      // Now it's time to do shuffling animation\n      // First of all, we need to identify same elements within source and destination collections\n      $source.each(function (i) {\n        var destElement = [];\n        if (typeof options.attribute == \"function\") {\n          val = options.attribute($(this));\n          $collection.each(function () {\n            if (options.attribute(this) == val) {\n              destElement = $(this);\n              return false;\n            }\n          });\n        } else {\n          destElement = $collection.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n        }\n        if (destElement.length) {\n          // The item is both in source and destination collections\n          // It it's under different position, let's move it\n          if (!options.useScaling) {\n            animationQueue.push({\n              element: $(this),\n              animation: {\n                top: destElement.offset().top - correctionOffset.top,\n                left: destElement.offset().left - correctionOffset.left,\n                opacity: 1.0,\n              },\n            });\n          } else {\n            animationQueue.push({\n              element: $(this),\n              animation: {\n                top: destElement.offset().top - correctionOffset.top,\n                left: destElement.offset().left - correctionOffset.left,\n                opacity: 1.0,\n                scale: \"1.0\",\n              },\n            });\n          }\n        } else {\n          // The item from source collection is not present in destination collections\n          // Let's remove it\n          if (!options.useScaling) {\n            animationQueue.push({\n              element: $(this),\n              animation: { opacity: \"0.0\" },\n            });\n          } else {\n            animationQueue.push({\n              element: $(this),\n              animation: { opacity: \"0.0\", scale: \"0.0\" },\n            });\n          }\n        }\n      });\n\n      $collection.each(function (i) {\n        // Grab all items from target collection not present in visible source collection\n\n        var sourceElement = [];\n        var destElement = [];\n        if (typeof options.attribute == \"function\") {\n          val = options.attribute($(this));\n          $source.each(function () {\n            if (options.attribute(this) == val) {\n              sourceElement = $(this);\n              return false;\n            }\n          });\n\n          $collection.each(function () {\n            if (options.attribute(this) == val) {\n              destElement = $(this);\n              return false;\n            }\n          });\n        } else {\n          sourceElement = $source.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n          destElement = $collection.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n        }\n\n        var animationOptions;\n        if (sourceElement.length === 0) {\n          // No such element in source collection...\n          if (!options.useScaling) {\n            animationOptions = {\n              opacity: \"1.0\",\n            };\n          } else {\n            animationOptions = {\n              opacity: \"1.0\",\n              scale: \"1.0\",\n            };\n          }\n          // Let's create it\n          d = destElement.clone();\n          var rawDestElement = d.get(0);\n          rawDestElement.style.position = \"absolute\";\n          rawDestElement.style.margin = \"0\";\n          rawDestElement.style.top =\n            destElement.offset().top - correctionOffset.top + \"px\";\n          rawDestElement.style.left =\n            destElement.offset().left - correctionOffset.left + \"px\";\n          d.css(\"opacity\", 0.0); // IE\n          if (options.useScaling) {\n            d.css(\"transform\", \"scale(0.0)\");\n          }\n          d.appendTo($sourceParent);\n\n          animationQueue.push({ element: $(d), animation: animationOptions });\n        }\n      });\n\n      $dest.remove();\n      options.enhancement($sourceParent); // Perform custom visual enhancements during the animation\n      for (i = 0; i < animationQueue.length; i++) {\n        animationQueue[i].element.animate(\n          animationQueue[i].animation,\n          options.duration,\n          options.easing,\n          postCallback,\n        );\n      }\n    });\n  };\n})(jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/portfolio/setting.js",
    "content": "jQuery(document).ready(function ($) {\n  if (jQuery().quicksand) {\n    // Clone applications to get a second collection\n    var $data = $(\".portfolio\").clone();\n\n    //NOTE: Only filter on the main portfolio page, not on the subcategory pages\n    $(\".filter li\").click(function (e) {\n      $(\".filter li\").removeClass(\"active\");\n      // Use the last category class as the category to filter by. This means that multiple categories are not supported (yet)\n      var filterClass = $(this).attr(\"class\").split(\" \").slice(-1)[0];\n\n      if (filterClass == \"all\") {\n        var $filteredData = $data.find(\".item-thumbs\");\n      } else {\n        var $filteredData = $data.find(\n          \".item-thumbs[data-type=\" + filterClass + \"]\",\n        );\n      }\n      $(\".portfolio\").quicksand(\n        $filteredData,\n        {\n          duration: 600,\n          adjustHeight: \"auto\",\n        },\n        function () {\n          // Portfolio fancybox\n          $(\".fancybox\").fancybox({\n            padding: 0,\n            beforeShow: function () {\n              this.title = $(this.element).attr(\"title\");\n              this.title =\n                \"<h4>\" +\n                this.title +\n                \"</h4>\" +\n                \"<p>\" +\n                $(this.element).parent().find(\"img\").attr(\"alt\") +\n                \"</p>\";\n            },\n            helpers: {\n              title: { type: \"inside\" },\n            },\n          });\n        },\n      );\n      $(this).addClass(\"active\");\n      return false;\n    });\n  } //if quicksand\n});\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/quicksand/jquery.quicksand.js",
    "content": "/*\n\nQuicksand 1.2.2\n\nReorder and filter items with a nice shuffling animation.\n\nCopyright (c) 2010 Jacek Galanciak (razorjack.net) and agilope.com\nBig thanks for Piotr Petrus (riddle.pl) for deep code review and wonderful docs & demos.\n\nDual licensed under the MIT and GPL version 2 licenses.\nhttp://github.com/jquery/jquery/blob/master/MIT-LICENSE.txt\nhttp://github.com/jquery/jquery/blob/master/GPL-LICENSE.txt\n\nProject site: http://razorjack.net/quicksand\nGithub site: http://github.com/razorjack/quicksand\n\n*/\n\n(function ($) {\n  $.fn.quicksand = function (collection, customOptions) {\n    var options = {\n      duration: 750,\n      easing: \"swing\",\n      attribute: \"data-id\", // attribute to recognize same items within source and dest\n      adjustHeight: \"auto\", // 'dynamic' animates height during shuffling (slow), 'auto' adjusts it before or after the animation, false leaves height constant\n      useScaling: true, // disable it if you're not using scaling effect or want to improve performance\n      enhancement: function (c) {}, // Visual enhacement (eg. font replacement) function for cloned elements\n      selector: \"> *\",\n      dx: 0,\n      dy: 0,\n    };\n    $.extend(options, customOptions);\n\n    if ($.browser.msie || typeof $.fn.scale == \"undefined\") {\n      // Got IE and want scaling effect? Kiss my ass.\n      options.useScaling = false;\n    }\n\n    var callbackFunction;\n    if (typeof arguments[1] == \"function\") {\n      var callbackFunction = arguments[1];\n    } else if (typeof (arguments[2] == \"function\")) {\n      var callbackFunction = arguments[2];\n    }\n\n    return this.each(function (i) {\n      var val;\n      var animationQueue = []; // used to store all the animation params before starting the animation; solves initial animation slowdowns\n      var $collection = $(collection).clone(); // destination (target) collection\n      var $sourceParent = $(this); // source, the visible container of source collection\n      var sourceHeight = $(this).css(\"height\"); // used to keep height and document flow during the animation\n\n      var destHeight;\n      var adjustHeightOnCallback = false;\n\n      var offset = $($sourceParent).offset(); // offset of visible container, used in animation calculations\n      var offsets = []; // coordinates of every source collection item\n\n      var $source = $(this).find(options.selector); // source collection items\n\n      // Replace the collection and quit if IE6\n      if ($.browser.msie && $.browser.version.substr(0, 1) < 7) {\n        $sourceParent.html(\"\").append($collection);\n        return;\n      }\n\n      // Gets called when any animation is finished\n      var postCallbackPerformed = 0; // prevents the function from being called more than one time\n      var postCallback = function () {\n        if (!postCallbackPerformed) {\n          postCallbackPerformed = 1;\n\n          // hack:\n          // used to be: $sourceParent.html($dest.html()); // put target HTML into visible source container\n          // but new webkit builds cause flickering when replacing the collections\n          $toDelete = $sourceParent.find(\"> *\");\n          $sourceParent.prepend($dest.find(\"> *\"));\n          $toDelete.remove();\n\n          if (adjustHeightOnCallback) {\n            $sourceParent.css(\"height\", destHeight);\n          }\n          options.enhancement($sourceParent); // Perform custom visual enhancements on a newly replaced collection\n          if (typeof callbackFunction == \"function\") {\n            callbackFunction.call(this);\n          }\n        }\n      };\n\n      // Position: relative situations\n      var $correctionParent = $sourceParent.offsetParent();\n      var correctionOffset = $correctionParent.offset();\n      if ($correctionParent.css(\"position\") == \"relative\") {\n        if ($correctionParent.get(0).nodeName.toLowerCase() == \"body\") {\n        } else {\n          correctionOffset.top +=\n            parseFloat($correctionParent.css(\"border-top-width\")) || 0;\n          correctionOffset.left +=\n            parseFloat($correctionParent.css(\"border-left-width\")) || 0;\n        }\n      } else {\n        correctionOffset.top -=\n          parseFloat($correctionParent.css(\"border-top-width\")) || 0;\n        correctionOffset.left -=\n          parseFloat($correctionParent.css(\"border-left-width\")) || 0;\n        correctionOffset.top -=\n          parseFloat($correctionParent.css(\"margin-top\")) || 0;\n        correctionOffset.left -=\n          parseFloat($correctionParent.css(\"margin-left\")) || 0;\n      }\n\n      // perform custom corrections from options (use when Quicksand fails to detect proper correction)\n      if (isNaN(correctionOffset.left)) {\n        correctionOffset.left = 0;\n      }\n      if (isNaN(correctionOffset.top)) {\n        correctionOffset.top = 0;\n      }\n\n      correctionOffset.left -= options.dx;\n      correctionOffset.top -= options.dy;\n\n      // keeps nodes after source container, holding their position\n      $sourceParent.css(\"height\", $(this).height());\n\n      // get positions of source collections\n      $source.each(function (i) {\n        offsets[i] = $(this).offset();\n      });\n\n      // stops previous animations on source container\n      $(this).stop();\n      var dx = 0;\n      var dy = 0;\n      $source.each(function (i) {\n        $(this).stop(); // stop animation of collection items\n        var rawObj = $(this).get(0);\n        if (rawObj.style.position == \"absolute\") {\n          dx = -options.dx;\n          dy = -options.dy;\n        } else {\n          dx = options.dx;\n          dy = options.dy;\n        }\n\n        rawObj.style.position = \"absolute\";\n        rawObj.style.margin = \"0\";\n\n        rawObj.style.top =\n          offsets[i].top -\n          parseFloat(rawObj.style.marginTop) -\n          correctionOffset.top +\n          dy +\n          \"px\";\n        rawObj.style.left =\n          offsets[i].left -\n          parseFloat(rawObj.style.marginLeft) -\n          correctionOffset.left +\n          dx +\n          \"px\";\n      });\n\n      // create temporary container with destination collection\n      var $dest = $($sourceParent).clone();\n      var rawDest = $dest.get(0);\n      rawDest.innerHTML = \"\";\n      rawDest.setAttribute(\"id\", \"\");\n      rawDest.style.height = \"auto\";\n      rawDest.style.width = $sourceParent.width() + \"px\";\n      $dest.append($collection);\n      // insert node into HTML\n      // Note that the node is under visible source container in the exactly same position\n      // The browser render all the items without showing them (opacity: 0.0)\n      // No offset calculations are needed, the browser just extracts position from underlayered destination items\n      // and sets animation to destination positions.\n      $dest.insertBefore($sourceParent);\n      $dest.css(\"opacity\", 0.0);\n      rawDest.style.zIndex = -1;\n\n      rawDest.style.margin = \"0\";\n      rawDest.style.position = \"absolute\";\n      rawDest.style.top = offset.top - correctionOffset.top + \"px\";\n      rawDest.style.left = offset.left - correctionOffset.left + \"px\";\n\n      if (options.adjustHeight === \"dynamic\") {\n        // If destination container has different height than source container\n        // the height can be animated, adjusting it to destination height\n        $sourceParent.animate(\n          { height: $dest.height() },\n          options.duration,\n          options.easing,\n        );\n      } else if (options.adjustHeight === \"auto\") {\n        destHeight = $dest.height();\n        if (parseFloat(sourceHeight) < parseFloat(destHeight)) {\n          // Adjust the height now so that the items don't move out of the container\n          $sourceParent.css(\"height\", destHeight);\n        } else {\n          //  Adjust later, on callback\n          adjustHeightOnCallback = true;\n        }\n      }\n\n      // Now it's time to do shuffling animation\n      // First of all, we need to identify same elements within source and destination collections\n      $source.each(function (i) {\n        var destElement = [];\n        if (typeof options.attribute == \"function\") {\n          val = options.attribute($(this));\n          $collection.each(function () {\n            if (options.attribute(this) == val) {\n              destElement = $(this);\n              return false;\n            }\n          });\n        } else {\n          destElement = $collection.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n        }\n        if (destElement.length) {\n          // The item is both in source and destination collections\n          // It it's under different position, let's move it\n          if (!options.useScaling) {\n            animationQueue.push({\n              element: $(this),\n              animation: {\n                top: destElement.offset().top - correctionOffset.top,\n                left: destElement.offset().left - correctionOffset.left,\n                opacity: 1.0,\n              },\n            });\n          } else {\n            animationQueue.push({\n              element: $(this),\n              animation: {\n                top: destElement.offset().top - correctionOffset.top,\n                left: destElement.offset().left - correctionOffset.left,\n                opacity: 1.0,\n                scale: \"1.0\",\n              },\n            });\n          }\n        } else {\n          // The item from source collection is not present in destination collections\n          // Let's remove it\n          if (!options.useScaling) {\n            animationQueue.push({\n              element: $(this),\n              animation: { opacity: \"0.0\" },\n            });\n          } else {\n            animationQueue.push({\n              element: $(this),\n              animation: { opacity: \"0.0\", scale: \"0.0\" },\n            });\n          }\n        }\n      });\n\n      $collection.each(function (i) {\n        // Grab all items from target collection not present in visible source collection\n\n        var sourceElement = [];\n        var destElement = [];\n        if (typeof options.attribute == \"function\") {\n          val = options.attribute($(this));\n          $source.each(function () {\n            if (options.attribute(this) == val) {\n              sourceElement = $(this);\n              return false;\n            }\n          });\n\n          $collection.each(function () {\n            if (options.attribute(this) == val) {\n              destElement = $(this);\n              return false;\n            }\n          });\n        } else {\n          sourceElement = $source.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n          destElement = $collection.filter(\n            \"[\" +\n              options.attribute +\n              \"=\" +\n              $(this).attr(options.attribute) +\n              \"]\",\n          );\n        }\n\n        var animationOptions;\n        if (sourceElement.length === 0) {\n          // No such element in source collection...\n          if (!options.useScaling) {\n            animationOptions = {\n              opacity: \"1.0\",\n            };\n          } else {\n            animationOptions = {\n              opacity: \"1.0\",\n              scale: \"1.0\",\n            };\n          }\n          // Let's create it\n          d = destElement.clone();\n          var rawDestElement = d.get(0);\n          rawDestElement.style.position = \"absolute\";\n          rawDestElement.style.margin = \"0\";\n          rawDestElement.style.top =\n            destElement.offset().top - correctionOffset.top + \"px\";\n          rawDestElement.style.left =\n            destElement.offset().left - correctionOffset.left + \"px\";\n          d.css(\"opacity\", 0.0); // IE\n          if (options.useScaling) {\n            d.css(\"transform\", \"scale(0.0)\");\n          }\n          d.appendTo($sourceParent);\n\n          animationQueue.push({ element: $(d), animation: animationOptions });\n        }\n      });\n\n      $dest.remove();\n      options.enhancement($sourceParent); // Perform custom visual enhancements during the animation\n      for (i = 0; i < animationQueue.length; i++) {\n        animationQueue[i].element.animate(\n          animationQueue[i].animation,\n          options.duration,\n          options.easing,\n          postCallback,\n        );\n      }\n    });\n  };\n})(jQuery);\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/quicksand/setting.js",
    "content": "jQuery.noConflict();\njQuery(document).ready(function($){\n\nif (jQuery().quicksand) {\n\n \t// Clone applications to get a second collection\n\tvar $data = $(\".portfolio-area\").clone();\n\t\n\t//NOTE: Only filter on the main portfolio page, not on the subcategory pages\n\t$('.portfolio-categ li').click(function(e) {\n\t\t$(\".filter li\").removeClass(\"active\");\t\n\t\t// Use the last category class as the category to filter by. This means that multiple categories are not supported (yet)\n\t\tvar filterClass=$(this).attr('class').split(' ').slice(-1)[0];\n\t\t\n\t\tif (filterClass == 'all') {\n\t\t\tvar $filteredData = $data.find('.item-thumbs');\n\t\t} else {\n\t\t\tvar $filteredData = $data.find('.item-thumbs[data-type=' + filterClass + ']');\n\t\t}\n\t\t$(\".portfolio-area\").quicksand($filteredData, {\n\t\t\tduration: 600,\n\t\t\tadjustHeight: 'auto'\n\t\t}\t\n\t\t$(this).addClass(\"active\"); \t\t\t\n\t\treturn false;\n\t});\n\t\n}//if quicksand\n\n});"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/js/validate.js",
    "content": "/*global jQuery:false */\njQuery(document).ready(function ($) {\n  \"use strict\";\n\n  //Contact\n  $(\"form.validateform\").submit(function () {\n    var f = $(this).find(\".field\"),\n      ferror = false,\n      emailExp = /^[^\\s()<>@,;:\\/]+@\\w[\\w\\.-]+\\.[a-z]{2,}$/i;\n\n    f.children(\"input\").each(function () {\n      // run all inputs\n\n      var i = $(this); // current input\n      var rule = i.attr(\"data-rule\");\n\n      if (rule != undefined) {\n        var ierror = false; // error flag for current input\n        var pos = rule.indexOf(\":\", 0);\n        if (pos >= 0) {\n          var exp = rule.substr(pos + 1, rule.length);\n          rule = rule.substr(0, pos);\n        } else {\n          rule = rule.substr(pos + 1, rule.length);\n        }\n\n        switch (rule) {\n          case \"required\":\n            if (i.val() == \"\") {\n              ferror = ierror = true;\n            }\n            break;\n\n          case \"maxlen\":\n            if (i.val().length < parseInt(exp)) {\n              ferror = ierror = true;\n            }\n            break;\n\n          case \"email\":\n            if (!emailExp.test(i.val())) {\n              ferror = ierror = true;\n            }\n            break;\n\n          case \"checked\":\n            if (!i.attr(\"checked\")) {\n              ferror = ierror = true;\n            }\n            break;\n\n          case \"regexp\":\n            exp = new RegExp(exp);\n            if (!exp.test(i.val())) {\n              ferror = ierror = true;\n            }\n            break;\n        }\n        i.next(\".validation\")\n          .html(\n            ierror\n              ? i.attr(\"data-msg\") != undefined\n                ? i.attr(\"data-msg\")\n                : \"wrong Input\"\n              : \"\",\n          )\n          .show(\"blind\");\n      }\n    });\n    f.children(\"textarea\").each(function () {\n      // run all inputs\n\n      var i = $(this); // current input\n      var rule = i.attr(\"data-rule\");\n\n      if (rule != undefined) {\n        var ierror = false; // error flag for current input\n        var pos = rule.indexOf(\":\", 0);\n        if (pos >= 0) {\n          var exp = rule.substr(pos + 1, rule.length);\n          rule = rule.substr(0, pos);\n        } else {\n          rule = rule.substr(pos + 1, rule.length);\n        }\n\n        switch (rule) {\n          case \"required\":\n            if (i.val() == \"\") {\n              ferror = ierror = true;\n            }\n            break;\n\n          case \"maxlen\":\n            if (i.val().length < parseInt(exp)) {\n              ferror = ierror = true;\n            }\n            break;\n        }\n        i.next(\".validation\")\n          .html(\n            ierror\n              ? i.attr(\"data-msg\") != undefined\n                ? i.attr(\"data-msg\")\n                : \"wrong Input\"\n              : \"\",\n          )\n          .show(\"blind\");\n      }\n    });\n    if (ferror) return false;\n    else var str = $(this).serialize();\n\n    $.ajax({\n      type: \"POST\",\n      url: \"contact/contact.php\",\n      data: str,\n      success: function (msg) {\n        $(\"#sendmessage\").addClass(\"show\");\n        $(\"#errormessage\").ajaxComplete(function (event, request, settings) {\n          if (msg == \"OK\") {\n            $(\"#sendmessage\").addClass(\"show\");\n          } else {\n            $(\"#sendmessage\").removeClass(\"show\");\n            result = msg;\n          }\n\n          $(this).html(result);\n        });\n      },\n    });\n    return false;\n  });\n});\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/portfolio.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta name=\"description\" content=\"\" />\n    <meta name=\"author\" content=\"http://webthemez.com\" />\n    <!-- css -->\n    <link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n    <link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\" />\n    <link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n    <link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n    <link href=\"css/style.css\" rel=\"stylesheet\" />\n\n    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n    <!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n  </head>\n  <body>\n    <div id=\"wrapper\">\n      <!-- start header -->\n      <header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n          <div class=\"container\">\n            <div class=\"navbar-header\">\n              <button\n                type=\"button\"\n                class=\"navbar-toggle\"\n                data-toggle=\"collapse\"\n                data-target=\".navbar-collapse\"\n              >\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n              </button>\n              <a class=\"navbar-brand\" href=\"index.html\"\n                ><img src=\"img/logo.png\" alt=\"logo\"\n              /></a>\n            </div>\n            <div class=\"navbar-collapse collapse\">\n              <ul class=\"nav navbar-nav\">\n                <li><a href=\"index.html\">Home</a></li>\n                <li><a href=\"about.html\">About Us</a></li>\n                <li><a href=\"courses.html\">Courses</a></li>\n                <li class=\"active\"><a href=\"portfolio.html\">Portfolio</a></li>\n                <li><a href=\"pricing.html\">Pricing</a></li>\n                <li><a href=\"contact.html\">Contact</a></li>\n              </ul>\n            </div>\n          </div>\n        </div>\n      </header>\n      <!-- end header -->\n      <section id=\"inner-headline\">\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-12\">\n              <h2 class=\"pageTitle\">Portfolio</h2>\n            </div>\n          </div>\n        </div>\n      </section>\n      <section id=\"content\">\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-12\">\n              <ul class=\"portfolio-categ filter\">\n                <li class=\"all active\"><a href=\"#\">All</a></li>\n                <li class=\"web\"><a href=\"#\" title=\"\">Web design</a></li>\n                <li class=\"icon\"><a href=\"#\" title=\"\">Mobile App</a></li>\n                <li class=\"graphic\"><a href=\"#\" title=\"\">UI design</a></li>\n              </ul>\n              <div class=\"clearfix\"></div>\n              <div class=\"row\">\n                <section id=\"projects\">\n                  <ul id=\"thumbs\" class=\"portfolio\">\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 design\"\n                      data-id=\"id-0\"\n                      data-type=\"web\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/1.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/1.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 design\"\n                      data-id=\"id-1\"\n                      data-type=\"icon\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/2.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/2.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 photography\"\n                      data-id=\"id-2\"\n                      data-type=\"graphic\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/3.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/3.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 design\"\n                      data-id=\"id-0\"\n                      data-type=\"web\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/4.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/4.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 photography\"\n                      data-id=\"id-4\"\n                      data-type=\"web\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/5.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/5.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 photography\"\n                      data-id=\"id-5\"\n                      data-type=\"icon\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/6.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/6.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <li\n                      class=\"item-thumbs col-lg-3 design\"\n                      data-id=\"id-0\"\n                      data-type=\"web\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/7.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/7.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                    <!-- Item Project and Filter Name -->\n                    <li\n                      class=\"item-thumbs col-lg-3 design\"\n                      data-id=\"id-0\"\n                      data-type=\"graphic\"\n                    >\n                      <!-- Fancybox - Gallery Enabled - Title - Full Image -->\n                      <a\n                        class=\"hover-wrap fancybox\"\n                        data-fancybox-group=\"gallery\"\n                        title=\"Portfolio name\"\n                        href=\"img/works/8.jpg\"\n                      >\n                        <span class=\"overlay-img\"></span>\n                        <span class=\"overlay-img-thumb\"\n                          ><i class=\"icon-info-blocks fa fa-code\"></i\n                        ></span>\n                      </a>\n                      <!-- Thumb Image and Description -->\n                      <img src=\"img/works/8.jpg\" alt=\"\" />\n                    </li>\n                    <!-- End Item Project -->\n                  </ul>\n                </section>\n              </div>\n            </div>\n          </div>\n        </div>\n      </section>\n      <footer>\n        <div class=\"container\">\n          <div class=\"row\">\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Our Contact</h5>\n                <address>\n                  <strong>Abovecompany Inc</strong><br />\n                  JC Main Road, Near Silnile tower<br />\n                  Pin-21542 NewYork US.\n                </address>\n                <p>\n                  <i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br />\n                  <i class=\"icon-envelope-alt\"></i> email@domainname.com\n                </p>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Quick Links</h5>\n                <ul class=\"link-list\">\n                  <li><a href=\"#\">Latest Events</a></li>\n                  <li><a href=\"#\">Terms and conditions</a></li>\n                  <li><a href=\"#\">Privacy policy</a></li>\n                  <li><a href=\"#\">Career</a></li>\n                  <li><a href=\"#\">Contact us</a></li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Latest posts</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n            <div class=\"col-lg-3\">\n              <div class=\"widget\">\n                <h5 class=\"widgetheading\">Recent News</h5>\n                <ul class=\"link-list\">\n                  <li>\n                    <a href=\"#\"\n                      >Lorem ipsum dolor sit amet, consectetur adipiscing\n                      elit.</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Pellentesque et pulvinar enim. Quisque at tempor\n                      ligula</a\n                    >\n                  </li>\n                  <li>\n                    <a href=\"#\"\n                      >Natus error sit voluptatem accusantium doloremque</a\n                    >\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n        <div id=\"sub-footer\">\n          <div class=\"container\">\n            <div class=\"row\">\n              <div class=\"col-lg-6\">\n                <div class=\"copyright\">\n                  <p>\n                    <span\n                      >&copy; Above Site All right reserved. Template By </span\n                    ><a href=\"http://webthemez.com\" target=\"_blank\"\n                      >WebThemez</a\n                    >\n                  </p>\n                </div>\n              </div>\n              <div class=\"col-lg-6\">\n                <ul class=\"social-network\">\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Facebook\"\n                      ><i class=\"fa fa-facebook\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Twitter\"\n                      ><i class=\"fa fa-twitter\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Linkedin\"\n                      ><i class=\"fa fa-linkedin\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Pinterest\"\n                      ><i class=\"fa fa-pinterest\"></i\n                    ></a>\n                  </li>\n                  <li>\n                    <a href=\"#\" data-placement=\"top\" title=\"Google plus\"\n                      ><i class=\"fa fa-google-plus\"></i\n                    ></a>\n                  </li>\n                </ul>\n              </div>\n            </div>\n          </div>\n        </div>\n      </footer>\n    </div>\n    <a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n    <!-- javascript\n    ================================================== -->\n    <!-- Placed at the end of the document so the pages load faster -->\n    <script src=\"js/jquery.js\"></script>\n    <script src=\"js/jquery.easing.1.3.js\"></script>\n    <script src=\"js/bootstrap.min.js\"></script>\n    <script src=\"js/jquery.fancybox.pack.js\"></script>\n    <script src=\"js/jquery.fancybox-media.js\"></script>\n    <script src=\"js/portfolio/jquery.quicksand.js\"></script>\n    <script src=\"js/portfolio/setting.js\"></script>\n    <script src=\"js/jquery.flexslider.js\"></script>\n    <script src=\"js/animate.js\"></script>\n    <script src=\"js/custom.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/pricing.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>Above Multi-purpose Free Bootstrap Responsive Template</title>\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n<meta name=\"description\" content=\"\" />\n<meta name=\"author\" content=\"http://webthemez.com\" />\n<!-- css -->\n<link href=\"css/bootstrap.min.css\" rel=\"stylesheet\" />\n<link href=\"css/fancybox/jquery.fancybox.css\" rel=\"stylesheet\">\n<link href=\"css/jcarousel.css\" rel=\"stylesheet\" />\n<link href=\"css/flexslider.css\" rel=\"stylesheet\" />\n<link href=\"css/style.css\" rel=\"stylesheet\" />\n \n<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->\n<!--[if lt IE 9]>\n      <script src=\"http://html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n    <![endif]-->\n\n</head>\n<body>\n<div id=\"wrapper\">\n\t<!-- start header -->\n\t\t<header>\n        <div class=\"navbar navbar-default navbar-static-top\">\n            <div class=\"container\">\n                <div class=\"navbar-header\">\n                    <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                        <span class=\"icon-bar\"></span>\n                    </button>\n                    <a class=\"navbar-brand\" href=\"index.html\"><img src=\"img/logo.png\" alt=\"logo\"/></a>\n                </div>\n                <div class=\"navbar-collapse collapse \">\n                    <ul class=\"nav navbar-nav\">\n                        <li><a href=\"index.html\">Home</a></li> \n\t\t\t\t\t\t<li><a href=\"about.html\">About Us</a></li>\n\t\t\t\t\t\t<li><a href=\"courses.html\">Courses</a></li>\n                        <li><a href=\"portfolio.html\">Portfolio</a></li>\n                        <li class=\"active\"><a href=\"pricing.html\">Pricing</a></li>\n                        <li><a href=\"contact.html\">Contact</a></li>\n                    </ul>\n                </div>\n            </div>\n        </div>\n\t</header><!-- end header -->\n\t<section id=\"inner-headline\">\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-12\">\n\t\t\t\t<h2 class=\"pageTitle\">Pricing</h2>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t<section id=\"content\">\n\t<div class=\"container\">\t \n\t\t<!-- end divider -->\n\t\t<div class=\"row\"> \n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"pricing-box-item\">\n\t\t\t\t\t<div class=\"pricing-heading\">\n\t\t\t\t\t\t<h3><strong>Basic</strong></h3>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-terms\">\n\t\t\t\t\t\t<h6>&#36;15.00 / Year</h6>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-container\">\n\t\t\t\t\t\t<ul>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Responsive Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Bootstrap Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Unlimited Support</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Free Trial version</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> HTML5 CSS3 jQuery</li>\n\t\t\t\t\t\t</ul>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-action\">\n\t\t\t\t\t\t<a href=\"#\" class=\"btn btn-medium btn-theme\"><i class=\"icon-bolt\"></i> Get Now</a>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"pricing-box-item\">\n\t\t\t\t\t<div class=\"pricing-heading\">\n\t\t\t\t\t\t<h3><strong>Standard</strong></h3>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-terms\">\n\t\t\t\t\t\t<h6>&#36;20.00 / Year</h6>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-container\">\n\t\t\t\t\t\t<ul>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Responsive Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Bootstrap Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Unlimited Support</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Free Trial version</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> HTML5 CSS3 jQuery</li>\n\t\t\t\t\t\t</ul>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-action\">\n\t\t\t\t\t\t<a href=\"#\" class=\"btn btn-medium btn-theme\"><i class=\"icon-bolt\"></i> Get Now</a>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"pricing-box-item activeItem\">\n\t\t\t\t\t<div class=\"pricing-heading\">\n\t\t\t\t\t\t<h3><strong>Advanced</strong></h3>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-terms\">\n\t\t\t\t\t\t<h6>&#36;15.00 / Year</h6>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-container\">\n\t\t\t\t\t\t<ul>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Responsive Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Bootstrap Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Unlimited Support</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Free Trial version</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> HTML5 CSS3 jQuery</li>\n\t\t\t\t\t\t</ul>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-action\">\n\t\t\t\t\t\t<a href=\"#\" class=\"btn btn-medium btn-theme\"><i class=\"icon-bolt\"></i> Get Now</a>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"pricing-box-item\">\n\t\t\t\t\t<div class=\"pricing-heading\">\n\t\t\t\t\t\t<h3><strong>Mighty</strong></h3>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-terms\">\n\t\t\t\t\t\t<h6>&#36;15.00 / Year</h6>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-container\">\n\t\t\t\t\t\t<ul>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Responsive Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Bootstrap Design</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Unlimited Support</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> Free Trial version</li>\n\t\t\t\t\t\t\t<li><i class=\"icon-ok\"></i> HTML5 CSS3 jQuery</li>\n\t\t\t\t\t\t</ul>\n\t\t\t\t\t</div>\n\t\t\t\t\t<div class=\"pricing-action\">\n\t\t\t\t\t\t<a href=\"#\" class=\"btn btn-medium btn-theme\"><i class=\"icon-bolt\"></i> Get Now</a>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</section>\n\t<footer>\n\t<div class=\"container\">\n\t\t<div class=\"row\">\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Our Contact</h5>\n\t\t\t\t\t<address>\n\t\t\t\t\t<strong>Abovecompany Inc</strong><br>\n\t\t\t\t\tJC Main Road, Near Silnile tower<br>\n\t\t\t\t\t Pin-21542 NewYork US.</address>\n\t\t\t\t\t<p>\n\t\t\t\t\t\t<i class=\"icon-phone\"></i> (123) 456-789 - 1255-12584 <br>\n\t\t\t\t\t\t<i class=\"icon-envelope-alt\"></i> email@domainname.com\n\t\t\t\t\t</p>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Quick Links</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Latest Events</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Terms and conditions</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Privacy policy</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Career</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Contact us</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Latest posts</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t\t<div class=\"col-lg-3\">\n\t\t\t\t\t<div class=\"widget\">\n\t\t\t\t\t<h5 class=\"widgetheading\">Recent News</h5>\n\t\t\t\t\t<ul class=\"link-list\">\n\t\t\t\t\t\t<li><a href=\"#\">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Pellentesque et pulvinar enim. Quisque at tempor ligula</a></li>\n\t\t\t\t\t\t<li><a href=\"#\">Natus error sit voluptatem accusantium doloremque</a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t<div id=\"sub-footer\">\n\t\t<div class=\"container\">\n\t\t\t<div class=\"row\">\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<div class=\"copyright\">\n\t\t\t\t\t\t<p>\n\t\t\t\t\t\t\t<span>&copy; Above Site All right reserved. Template By </span><a href=\"http://webthemez.com\" target=\"_blank\">WebThemez</a>\n\t\t\t\t\t\t</p>\n\t\t\t\t\t</div>\n\t\t\t\t</div>\n\t\t\t\t<div class=\"col-lg-6\">\n\t\t\t\t\t<ul class=\"social-network\">\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Facebook\"><i class=\"fa fa-facebook\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Twitter\"><i class=\"fa fa-twitter\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Linkedin\"><i class=\"fa fa-linkedin\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Pinterest\"><i class=\"fa fa-pinterest\"></i></a></li>\n\t\t\t\t\t\t<li><a href=\"#\" data-placement=\"top\" title=\"Google plus\"><i class=\"fa fa-google-plus\"></i></a></li>\n\t\t\t\t\t</ul>\n\t\t\t\t</div>\n\t\t\t</div>\n\t\t</div>\n\t</div>\n\t</footer>\n</div>\n<a href=\"#\" class=\"scrollup\"><i class=\"fa fa-angle-up active\"></i></a>\n<!-- javascript\n    ================================================== -->\n<!-- Placed at the end of the document so the pages load faster -->\n<script src=\"js/jquery.js\"></script>\n<script src=\"js/jquery.easing.1.3.js\"></script>\n<script src=\"js/bootstrap.min.js\"></script>\n<script src=\"js/jquery.fancybox.pack.js\"></script>\n<script src=\"js/jquery.fancybox-media.js\"></script> \n<script src=\"js/portfolio/jquery.quicksand.js\"></script>\n<script src=\"js/portfolio/setting.js\"></script>\n<script src=\"js/jquery.flexslider.js\"></script>\n<script src=\"js/animate.js\"></script>\n<script src=\"js/custom.js\"></script>\n</body>\n</html>"
  },
  {
    "path": "backend/tests/integration/tests/pruning/website/readme.txt",
    "content": "Free Responsive HTML5 Template\n\nAbove Educational Bootstrap Responsive template is a modern clean multi-purpose html5 template built with valid HTML5 & CSS3. It's build on top of latest Bootstrap framework 3.3.1 fully responsive web compatible with multi browser and devices. This template can be used for multi-purpose needs like Educational Institutes, colleges, Schools, e-Learning, Training centre, Tutors, Charity, Primary School, business, consultancy, agency, personal portfolio, profile and mobile website.\n\n\nKey features\n-------------\nTwitter Bootstrap 3.3.1\nClean & Developer-friendly HTML5 and CSS3 code\n100% Responsive Layout Design \nMulti-purpose theme\nGoogle Fonts Support\nFont Awesome \nSmooth Scrolling \nFully Customizable\nContact Form\n\n\nCredits :\n-------\n=> Design and developed: \"WebThemez\"  http://webthemez.com\n=> Photos used in template: **Unsplash** - http://unsplash.com\n=> For more free web themes: http://webthemez.com\n=> Framework : http://getbootstrap.com\n\nLicense :\n-------\n**Creative Commons Attribution 3.0** - http://creativecommons.org/licenses/by/3.0/\n\nNote:\nAll images user here is for demo purpose only, we are not responsible for any copyrights.\n"
  },
  {
    "path": "backend/tests/integration/tests/query_history/test_query_history.py",
    "content": "import csv\nimport io\nimport os\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.configs.constants import QAFeedbackType\nfrom onyx.configs.constants import SessionType\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.query_history import QueryHistoryManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.fixture\ndef setup_chat_session(reset: None) -> tuple[DATestUser, str]:  # noqa: ARG001\n    # Create admin user and required resources\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Seed a document\n    cc_pair.documents = []\n    cc_pair.documents.append(\n        DocumentManager.seed_doc_with_content(\n            cc_pair=cc_pair,\n            content=\"The company's revenue in Q1 was $1M\",\n            api_key=api_key,\n        )\n    )\n\n    # Create chat session and send a message\n    chat_session = ChatSessionManager.create(\n        persona_id=0,\n        description=\"Test chat session\",\n        user_performing_action=admin_user,\n    )\n\n    ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What was the Q1 revenue?\",\n        user_performing_action=admin_user,\n    )\n\n    messages = ChatSessionManager.get_chat_history(\n        chat_session=chat_session,\n        user_performing_action=admin_user,\n    )\n\n    # Add another message to the chat session\n    ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=\"What about Q2 revenue?\",\n        user_performing_action=admin_user,\n        parent_message_id=messages[-1].id,\n    )\n\n    return admin_user, str(chat_session.id)\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Chat history tests are enterprise only\",\n)\ndef test_chat_history_endpoints(\n    reset: None,  # noqa: ARG001\n    setup_chat_session: tuple[DATestUser, str],\n) -> None:\n    admin_user, first_chat_id = setup_chat_session\n\n    # Get chat history\n    history_response = QueryHistoryManager.get_query_history_page(\n        user_performing_action=admin_user\n    )\n\n    # Verify we got back the one chat session we created\n    assert len(history_response.items) == 1\n\n    # Verify the first chat session details\n    first_session = history_response.items[0]\n    assert first_session.user_email == admin_user.email\n    assert first_session.name == \"Test chat session\"\n    assert first_session.first_user_message == \"What was the Q1 revenue?\"\n    assert first_session.first_ai_message is not None\n    assert first_session.assistant_id == 0\n    assert first_session.feedback_type is None\n    assert first_session.flow_type == SessionType.CHAT\n    assert first_session.conversation_length == 4  # 2 User messages + 2 AI responses\n\n    # Test date filtering - should return no results\n    past_end = datetime.now(tz=timezone.utc) - timedelta(days=1)\n    past_start = past_end - timedelta(days=1)\n    history_response = QueryHistoryManager.get_query_history_page(\n        start_time=past_start,\n        end_time=past_end,\n        user_performing_action=admin_user,\n    )\n    assert len(history_response.items) == 0\n\n    # Test get specific chat session endpoint\n    session_details = QueryHistoryManager.get_chat_session_admin(\n        chat_session_id=first_chat_id,\n        user_performing_action=admin_user,\n    )\n\n    # Verify the session details\n    assert str(session_details.id) == first_chat_id\n    assert len(session_details.messages) > 0\n    assert session_details.flow_type == SessionType.CHAT\n\n    # Test filtering by feedback\n    history_response = QueryHistoryManager.get_query_history_page(\n        feedback_type=QAFeedbackType.LIKE,\n        user_performing_action=admin_user,\n    )\n    assert len(history_response.items) == 0\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Chat history tests are enterprise only\",\n)\ndef test_chat_history_csv_export(\n    reset: None,  # noqa: ARG001\n    setup_chat_session: tuple[DATestUser, str],\n) -> None:\n    admin_user, _ = setup_chat_session\n\n    # Test CSV export endpoint with date filtering\n    headers, csv_content = QueryHistoryManager.get_query_history_as_csv(\n        user_performing_action=admin_user,\n    )\n    assert headers[\"Content-Type\"] == \"text/csv; charset=utf-8\"\n    assert \"Content-Disposition\" in headers\n\n    # Use csv.reader to properly handle newlines inside quoted fields\n    csv_rows = list(csv.reader(io.StringIO(csv_content)))\n    assert len(csv_rows) == 3  # Header + 2 QA pairs\n    assert csv_rows[0][0] == \"chat_session_id\"\n    assert \"user_message\" in csv_rows[0]\n    assert \"ai_response\" in csv_rows[0]\n    assert \"What was the Q1 revenue?\" in csv_content\n    assert \"What about Q2 revenue?\" in csv_content\n\n    # Test CSV export with date filtering - should return no results\n    past_end = datetime.now(tz=timezone.utc) - timedelta(days=1)\n    past_start = past_end - timedelta(days=1)\n    headers, csv_content = QueryHistoryManager.get_query_history_as_csv(\n        start_time=past_start,\n        end_time=past_end,\n        user_performing_action=admin_user,\n    )\n    csv_rows = list(csv.reader(io.StringIO(csv_content)))\n    assert len(csv_rows) == 1  # Only header, no data rows\n"
  },
  {
    "path": "backend/tests/integration/tests/query_history/test_query_history_pagination.py",
    "content": "import os\nfrom datetime import datetime\n\nimport pytest\n\nfrom onyx.configs.constants import QAFeedbackType\nfrom tests.integration.common_utils.managers.query_history import QueryHistoryManager\nfrom tests.integration.common_utils.test_models import DAQueryHistoryEntry\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.tests.query_history.utils import (\n    setup_chat_sessions_with_different_feedback,\n)\n\n\ndef _verify_query_history_pagination(\n    chat_sessions: list[DAQueryHistoryEntry],\n    user_performing_action: DATestUser,\n    page_size: int = 5,\n    feedback_type: QAFeedbackType | None = None,\n    start_time: datetime | None = None,\n    end_time: datetime | None = None,\n) -> None:\n    retrieved_sessions: list[str] = []\n\n    for i in range(0, len(chat_sessions), page_size):\n        paginated_result = QueryHistoryManager.get_query_history_page(\n            page_num=i // page_size,\n            page_size=page_size,\n            feedback_type=feedback_type,\n            start_time=start_time,\n            end_time=end_time,\n            user_performing_action=user_performing_action,\n        )\n\n        # Verify that the total items is equal to the length of the chat sessions list\n        assert paginated_result.total_items == len(chat_sessions)\n        # Verify that the number of items in the page is equal to the page size\n        assert len(paginated_result.items) == min(page_size, len(chat_sessions) - i)\n        # Add the retrieved chat sessions to the list of retrieved sessions\n        retrieved_sessions.extend(\n            [str(session.id) for session in paginated_result.items]\n        )\n\n    # Create a set of all the expected chat session IDs\n    all_expected_sessions = set(str(session.id) for session in chat_sessions)\n    # Create a set of all the retrieved chat session IDs\n    all_retrieved_sessions = set(retrieved_sessions)\n\n    # Verify that the set of retrieved sessions is equal to the set of expected sessions\n    assert all_expected_sessions == all_retrieved_sessions\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Query history tests are enterprise only\",\n)\ndef test_query_history_pagination(reset: None) -> None:  # noqa: ARG001\n    (\n        admin_user,\n        chat_sessions_by_feedback_type,\n    ) = setup_chat_sessions_with_different_feedback()\n\n    all_chat_sessions = []\n    for _, chat_sessions in chat_sessions_by_feedback_type.items():\n        all_chat_sessions.extend(chat_sessions)\n\n    # Verify basic pagination with different page sizes\n    print(\"Verifying basic pagination with page size 5\")\n    _verify_query_history_pagination(\n        chat_sessions=all_chat_sessions,\n        page_size=5,\n        user_performing_action=admin_user,\n    )\n    print(\"Verifying basic pagination with page size 10\")\n    _verify_query_history_pagination(\n        chat_sessions=all_chat_sessions,\n        page_size=10,\n        user_performing_action=admin_user,\n    )\n\n    print(\"Verifying pagination with feedback type LIKE\")\n    liked_sessions = chat_sessions_by_feedback_type[QAFeedbackType.LIKE]\n    _verify_query_history_pagination(\n        chat_sessions=liked_sessions,\n        feedback_type=QAFeedbackType.LIKE,\n        user_performing_action=admin_user,\n    )\n\n    print(\"Verifying pagination with feedback type DISLIKE\")\n    disliked_sessions = chat_sessions_by_feedback_type[QAFeedbackType.DISLIKE]\n    _verify_query_history_pagination(\n        chat_sessions=disliked_sessions,\n        feedback_type=QAFeedbackType.DISLIKE,\n        user_performing_action=admin_user,\n    )\n\n    print(\"Verifying pagination with feedback type MIXED\")\n    mixed_sessions = chat_sessions_by_feedback_type[QAFeedbackType.MIXED]\n    _verify_query_history_pagination(\n        chat_sessions=mixed_sessions,\n        feedback_type=QAFeedbackType.MIXED,\n        user_performing_action=admin_user,\n    )\n\n    # Test with a small page size to verify handling of partial pages\n    print(\"Verifying pagination with page size 3\")\n    _verify_query_history_pagination(\n        chat_sessions=all_chat_sessions,\n        page_size=3,\n        user_performing_action=admin_user,\n    )\n\n    # Test with a page size larger than the total number of items\n    print(\"Verifying pagination with page size 50\")\n    _verify_query_history_pagination(\n        chat_sessions=all_chat_sessions,\n        page_size=50,\n        user_performing_action=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/query_history/test_usage_reports.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom ee.onyx.db.usage_export import get_all_empty_chat_message_entries\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.seeding.chat_history_seeding import seed_chat_history\n\n\ndef test_usage_reports(reset: None) -> None:  # noqa: ARG001\n    EXPECTED_SESSIONS = 2048\n    MESSAGES_PER_SESSION = 4\n\n    # divide by 2 because only messages of type USER are returned\n    EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION / 2\n\n    seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)\n\n    with get_session_with_current_tenant() as db_session:\n        # count of all entries should be exact\n        period = (\n            datetime.fromtimestamp(0, tz=timezone.utc),\n            datetime.now(tz=timezone.utc),\n        )\n\n        count = 0\n        for entry_batch in get_all_empty_chat_message_entries(db_session, period):\n            for entry in entry_batch:\n                count += 1\n\n        assert count == EXPECTED_MESSAGES\n\n        # count in a one month time range should be within a certain range statistically\n        # this can be improved if we seed the chat history data deterministically\n        period = (\n            datetime.now(tz=timezone.utc) - timedelta(days=30),\n            datetime.now(tz=timezone.utc),\n        )\n\n        count = 0\n        for entry_batch in get_all_empty_chat_message_entries(db_session, period):\n            for entry in entry_batch:\n                count += 1\n\n        lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))\n        upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))\n        assert count > lower\n        assert count < upper\n"
  },
  {
    "path": "backend/tests/integration/tests/query_history/utils.py",
    "content": "from concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\n\nfrom onyx.configs.constants import QAFeedbackType\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DAQueryHistoryEntry\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _create_chat_session_with_feedback(\n    admin_user: DATestUser,\n    i: int,\n    feedback_type: QAFeedbackType | None,\n) -> tuple[QAFeedbackType | None, DAQueryHistoryEntry]:\n    print(f\"Creating chat session {i} with feedback type {feedback_type}\")\n    # Create chat session with timestamp spread over 30 days\n    chat_session = ChatSessionManager.create(\n        persona_id=0,\n        description=f\"Test chat session {i}\",\n        user_performing_action=admin_user,\n    )\n\n    test_session = DAQueryHistoryEntry(\n        id=chat_session.id,\n        persona_id=0,\n        description=f\"Test chat session {i}\",\n        feedback_type=feedback_type,\n    )\n\n    # First message in chat\n    ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=f\"Question {i}?\",\n        user_performing_action=admin_user,\n    )\n\n    messages = ChatSessionManager.get_chat_history(\n        chat_session=chat_session,\n        user_performing_action=admin_user,\n    )\n    if feedback_type == QAFeedbackType.MIXED or feedback_type == QAFeedbackType.DISLIKE:\n        ChatSessionManager.create_chat_message_feedback(\n            message_id=messages[-1].id,\n            is_positive=False,\n            user_performing_action=admin_user,\n        )\n\n    # Second message with different feedback types\n    ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=f\"Follow up {i}?\",\n        user_performing_action=admin_user,\n        parent_message_id=messages[-1].id,\n    )\n\n    # Get updated messages to get the ID of the second message\n    messages = ChatSessionManager.get_chat_history(\n        chat_session=chat_session,\n        user_performing_action=admin_user,\n    )\n    if feedback_type == QAFeedbackType.MIXED or feedback_type == QAFeedbackType.LIKE:\n        ChatSessionManager.create_chat_message_feedback(\n            message_id=messages[-1].id,\n            is_positive=True,\n            user_performing_action=admin_user,\n        )\n\n    return feedback_type, test_session\n\n\ndef setup_chat_sessions_with_different_feedback() -> (\n    tuple[DATestUser, dict[QAFeedbackType | None, list[DAQueryHistoryEntry]]]\n):\n    # Create admin user and required resources\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin_user)\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Seed a document\n    cc_pair.documents = []\n    cc_pair.documents.append(\n        DocumentManager.seed_doc_with_content(\n            cc_pair=cc_pair,\n            content=\"The company's revenue in Q1 was $1M\",\n            api_key=api_key,\n        )\n    )\n\n    chat_sessions_by_feedback_type: dict[\n        QAFeedbackType | None, list[DAQueryHistoryEntry]\n    ] = {}\n    # Use ThreadPoolExecutor to create chat sessions in parallel\n    with ThreadPoolExecutor(max_workers=5) as executor:\n        # Submit all tasks and store futures\n        j = 0\n        # Will result in 40 sessions\n        number_of_sessions = 10\n        futures = []\n        for feedback_type in [\n            QAFeedbackType.MIXED,\n            QAFeedbackType.LIKE,\n            QAFeedbackType.DISLIKE,\n            None,\n        ]:\n            futures.extend(\n                [\n                    executor.submit(\n                        _create_chat_session_with_feedback,\n                        admin_user,\n                        (j * number_of_sessions) + i,\n                        feedback_type,\n                    )\n                    for i in range(number_of_sessions)\n                ]\n            )\n            j += 1\n\n        # Collect results in order\n        for future in as_completed(futures):\n            feedback_type, chat_session = future.result()\n            chat_sessions_by_feedback_type.setdefault(feedback_type, []).append(\n                chat_session\n            )\n\n    return admin_user, chat_sessions_by_feedback_type\n"
  },
  {
    "path": "backend/tests/integration/tests/reporting/test_usage_export_api.py",
    "content": "import csv\nimport os\nimport time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom io import BytesIO\nfrom io import StringIO\nfrom uuid import UUID\nfrom zipfile import ZipFile\n\nimport pytest\nimport requests\n\nfrom ee.onyx.db.usage_export import UsageReportMetadata\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.db.seeding.chat_history_seeding import seed_chat_history\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Usage export is an enterprise feature\",\n)\nclass TestUsageExportAPI:\n    def test_generate_usage_report(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Seed some chat history data for the report\n        seed_chat_history(\n            num_sessions=10,\n            num_messages=4,\n            days=30,\n            user_id=UUID(admin_user.id),\n            persona_id=DEFAULT_PERSONA_ID,\n        )\n\n        # Get initial list of reports\n        initial_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=admin_user.headers,\n        )\n        assert initial_response.status_code == 200\n        initial_reports = initial_response.json()\n        initial_count = len(initial_reports)\n\n        # Test generating a report without date filters (all time)\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 204\n\n        # Wait for the new report to appear (with timeout)\n        max_wait_time = 60  # seconds\n        start_time = time.time()\n        current_reports = initial_reports\n\n        while time.time() - start_time < max_wait_time:\n            check_response = requests.get(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                headers=admin_user.headers,\n            )\n            assert check_response.status_code == 200\n            current_reports = check_response.json()\n\n            if len(current_reports) > initial_count:\n                # New report has been generated\n                break\n\n            time.sleep(2)\n\n        # Verify a new report was created\n        assert len(current_reports) > initial_count\n\n        # Find the new report (should be the first one since they're ordered by time)\n        new_report = current_reports[0]\n        assert \"report_name\" in new_report\n        assert new_report[\"report_name\"].endswith(\".zip\")\n\n    def test_generate_usage_report_with_date_range(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Seed some chat history data\n        seed_chat_history(\n            num_sessions=20,\n            num_messages=4,\n            days=60,\n            user_id=UUID(admin_user.id),\n            persona_id=DEFAULT_PERSONA_ID,\n        )\n\n        # Get initial list of reports\n        initial_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=admin_user.headers,\n        )\n        assert initial_response.status_code == 200\n        initial_reports = initial_response.json()\n        initial_count = len(initial_reports)\n\n        # Generate report for the last 30 days\n        period_to = datetime.now(tz=timezone.utc)\n        period_from = period_to - timedelta(days=30)\n\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={\n                \"period_from\": period_from.isoformat(),\n                \"period_to\": period_to.isoformat(),\n            },\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 204\n\n        # Wait for the new report to appear\n        max_wait_time = 60\n        start_time = time.time()\n        current_reports = initial_reports\n\n        while time.time() - start_time < max_wait_time:\n            check_response = requests.get(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                headers=admin_user.headers,\n            )\n            assert check_response.status_code == 200\n            current_reports = check_response.json()\n\n            if len(current_reports) > initial_count:\n                break\n\n            time.sleep(2)\n\n        assert len(current_reports) > initial_count\n\n        # Find the new report (the one that wasn't in initial_reports)\n        new_reports = [r for r in current_reports if r not in initial_reports]\n        assert len(new_reports) > 0\n        new_report = new_reports[0]\n\n        # Verify the new report has the expected date range\n        assert new_report[\"period_from\"] is not None\n        assert new_report[\"period_to\"] is not None\n\n    def test_generate_usage_report_invalid_dates(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Test with invalid date format\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={\n                \"period_from\": \"not-a-date\",\n                \"period_to\": datetime.now(tz=timezone.utc).isoformat(),\n            },\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 400\n\n    def test_fetch_usage_reports(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # First generate a report to ensure we have at least one\n        seed_chat_history(\n            num_sessions=5,\n            num_messages=4,\n            days=30,\n            user_id=UUID(admin_user.id),\n            persona_id=DEFAULT_PERSONA_ID,\n        )\n\n        # Get initial count\n        initial_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=admin_user.headers,\n        )\n        assert initial_response.status_code == 200\n        initial_count = len(initial_response.json())\n\n        # Generate a report\n        generate_response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={},\n            headers=admin_user.headers,\n        )\n        assert generate_response.status_code == 204\n\n        # Wait for the new report to appear\n        max_wait_time = 15\n        start_time = time.time()\n        reports = []\n\n        while time.time() - start_time < max_wait_time:\n            response = requests.get(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                headers=admin_user.headers,\n            )\n            assert response.status_code == 200\n            reports = response.json()\n\n            if len(reports) > initial_count:\n                break\n\n            time.sleep(2)\n\n        # Verify we have at least one report\n        assert isinstance(reports, list)\n        assert len(reports) > initial_count\n\n        # Validate the structure of the first report\n        first_report = reports[0]\n        assert \"report_name\" in first_report\n        assert \"requestor\" in first_report\n        assert \"time_created\" in first_report\n        assert \"period_from\" in first_report\n        assert \"period_to\" in first_report\n\n        # Verify it's a valid UsageReportMetadata object\n        report_metadata = UsageReportMetadata(**first_report)\n        assert report_metadata.report_name.endswith(\".zip\")\n\n    def test_read_usage_report(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # First generate a report\n        seed_chat_history(\n            num_sessions=5,\n            num_messages=4,\n            days=30,\n            user_id=UUID(admin_user.id),\n            persona_id=DEFAULT_PERSONA_ID,\n        )\n\n        # Get initial reports count\n        initial_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=admin_user.headers,\n        )\n        assert initial_response.status_code == 200\n        initial_count = len(initial_response.json())\n\n        generate_response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={},\n            headers=admin_user.headers,\n        )\n        assert generate_response.status_code == 204\n\n        # Wait for the new report to appear\n        max_wait_time = 15\n        start_time = time.time()\n        reports = []\n\n        while time.time() - start_time < max_wait_time:\n            list_response = requests.get(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                headers=admin_user.headers,\n            )\n            assert list_response.status_code == 200\n            reports = list_response.json()\n\n            if len(reports) > initial_count:\n                break\n\n            time.sleep(2)\n\n        assert len(reports) > initial_count\n\n        report_name = reports[0][\"report_name\"]\n\n        # Download the report\n        download_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report/{report_name}\",\n            headers=admin_user.headers,\n            stream=True,\n        )\n        assert download_response.status_code == 200\n        assert download_response.headers[\"Content-Type\"] == \"application/zip\"\n        assert \"Content-Disposition\" in download_response.headers\n        assert (\n            f\"filename={report_name}\"\n            in download_response.headers[\"Content-Disposition\"]\n        )\n\n        # Verify it's a valid zip file\n        zip_content = BytesIO(download_response.content)\n        with ZipFile(zip_content, \"r\") as zip_file:\n            # Check that the zip contains expected files\n            file_names = zip_file.namelist()\n            assert \"chat_messages.csv\" in file_names\n            assert \"users.csv\" in file_names\n\n            # Verify chat_messages.csv has the expected columns\n            with zip_file.open(\"chat_messages.csv\") as csv_file:\n                csv_content = csv_file.read().decode(\"utf-8\")\n                csv_reader = csv.DictReader(StringIO(csv_content))\n\n                # Check that all expected columns are present\n                expected_columns = {\n                    \"session_id\",\n                    \"user_id\",\n                    \"flow_type\",\n                    \"time_sent\",\n                    \"assistant_name\",\n                    \"user_email\",\n                    \"number_of_tokens\",\n                }\n                actual_columns = set(csv_reader.fieldnames or [])\n                assert (\n                    expected_columns == actual_columns\n                ), f\"Expected columns {expected_columns}, but got {actual_columns}\"\n\n                # Verify there's at least one row of data\n                rows = list(csv_reader)\n                assert len(rows) > 0, \"Expected at least one message in the report\"\n\n                # Verify the first row has non-empty values for all columns\n                first_row = rows[0]\n                for column in expected_columns:\n                    assert column in first_row, f\"Column {column} not found in row\"\n                    assert first_row[\n                        column\n                    ], f\"Column {column} has empty value in first row\"\n\n                # Verify specific new fields have appropriate values\n                assert first_row[\"assistant_name\"], \"assistant_name should not be empty\"\n                assert first_row[\"user_email\"], \"user_email should not be empty\"\n                assert first_row[\n                    \"number_of_tokens\"\n                ].isdigit(), \"number_of_tokens should be a numeric value\"\n                assert (\n                    int(first_row[\"number_of_tokens\"]) >= 0\n                ), \"number_of_tokens should be non-negative\"\n\n    def test_read_nonexistent_report(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Try to download a report that doesn't exist\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report/nonexistent_report.zip\",\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 404\n\n    def test_non_admin_cannot_generate_report(\n        self,\n        reset: None,  # noqa: ARG002\n        basic_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Try to generate a report as non-admin\n        response = requests.post(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            json={},\n            headers=basic_user.headers,\n        )\n        assert response.status_code == 403\n\n    def test_non_admin_cannot_fetch_reports(\n        self,\n        reset: None,  # noqa: ARG002\n        basic_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Try to fetch reports as non-admin\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=basic_user.headers,\n        )\n        assert response.status_code == 403\n\n    def test_non_admin_cannot_download_report(\n        self,\n        reset: None,  # noqa: ARG002\n        basic_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Try to download a report as non-admin\n        response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report/some_report.zip\",\n            headers=basic_user.headers,\n        )\n        assert response.status_code == 403\n\n    def test_concurrent_report_generation(\n        self,\n        reset: None,  # noqa: ARG002\n        admin_user: DATestUser,  # noqa: ARG002\n    ) -> None:\n        # Seed some data\n        seed_chat_history(\n            num_sessions=10,\n            num_messages=4,\n            days=30,\n            user_id=UUID(admin_user.id),\n            persona_id=DEFAULT_PERSONA_ID,\n        )\n\n        # Get initial count of reports\n        initial_response = requests.get(\n            f\"{API_SERVER_URL}/admin/usage-report\",\n            headers=admin_user.headers,\n        )\n        assert initial_response.status_code == 200\n        initial_count = len(initial_response.json())\n\n        # Generate multiple reports concurrently\n        num_reports = 3\n        for i in range(num_reports):\n            response = requests.post(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                json={},\n                headers=admin_user.headers,\n            )\n            assert response.status_code == 204\n\n        # Wait for all reports to be generated\n        max_wait_time = 120\n        start_time = time.time()\n        reports = []\n\n        while time.time() - start_time < max_wait_time:\n            response = requests.get(\n                f\"{API_SERVER_URL}/admin/usage-report\",\n                headers=admin_user.headers,\n            )\n            assert response.status_code == 200\n            reports = response.json()\n\n            if len(reports) >= initial_count + num_reports:\n                break\n\n            time.sleep(2)\n\n        # Verify we have at least 3 new reports\n        assert len(reports) >= initial_count + num_reports\n"
  },
  {
    "path": "backend/tests/integration/tests/scim/test_scim_groups.py",
    "content": "\"\"\"Integration tests for SCIM group provisioning endpoints.\n\nCovers the full group lifecycle as driven by an IdP (Okta / Azure AD):\n1. Create a group via POST /Groups\n2. Retrieve a group via GET /Groups/{id}\n3. List, filter, and paginate groups via GET /Groups\n4. Replace a group via PUT /Groups/{id}\n5. Patch a group (add/remove members, rename) via PATCH /Groups/{id}\n6. Delete a group via DELETE /Groups/{id}\n7. Error cases: duplicate name, not-found, invalid member IDs\n\nAll tests are parameterized across IdP request styles (Okta sends lowercase\nPATCH ops; Entra sends capitalized ops like ``\"Replace\"``). The server\nnormalizes both — these tests verify that.\n\nAuth tests live in test_scim_tokens.py.\nUser lifecycle tests live in test_scim_users.py.\n\"\"\"\n\nimport pytest\nimport requests\n\nfrom onyx.auth.schemas import UserRole\nfrom tests.integration.common_utils.constants import ADMIN_USER_NAME\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.scim_client import ScimClient\nfrom tests.integration.common_utils.managers.scim_token import ScimTokenManager\nfrom tests.integration.common_utils.managers.user import build_email\nfrom tests.integration.common_utils.managers.user import DEFAULT_PASSWORD\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nSCIM_GROUP_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:Group\"\nSCIM_USER_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:User\"\nSCIM_PATCH_SCHEMA = \"urn:ietf:params:scim:api:messages:2.0:PatchOp\"\n\n\n@pytest.fixture(scope=\"module\", params=[\"okta\", \"entra\"])\ndef idp_style(request: pytest.FixtureRequest) -> str:\n    \"\"\"Parameterized IdP style — runs every test with both Okta and Entra request formats.\"\"\"\n    return request.param\n\n\n@pytest.fixture(scope=\"module\")\ndef scim_token(idp_style: str) -> str:\n    \"\"\"Create a single SCIM token shared across all tests in this module.\n\n    Creating a new token revokes the previous one, so we create exactly once\n    per IdP-style run and reuse. Uses UserManager directly to avoid\n    fixture-scope conflicts with the function-scoped admin_user fixture.\n    \"\"\"\n    try:\n        admin = UserManager.create(name=ADMIN_USER_NAME)\n    except Exception:\n        admin = UserManager.login_as_user(\n            DATestUser(\n                id=\"\",\n                email=build_email(ADMIN_USER_NAME),\n                password=DEFAULT_PASSWORD,\n                headers=GENERAL_HEADERS,\n                role=UserRole.ADMIN,\n                is_active=True,\n            )\n        )\n\n    token = ScimTokenManager.create(\n        name=f\"scim-group-tests-{idp_style}\",\n        user_performing_action=admin,\n    ).raw_token\n    assert token is not None\n    return token\n\n\ndef _make_group_resource(\n    display_name: str,\n    external_id: str | None = None,\n    members: list[dict] | None = None,\n) -> dict:\n    \"\"\"Build a minimal SCIM GroupResource payload.\"\"\"\n    resource: dict = {\n        \"schemas\": [SCIM_GROUP_SCHEMA],\n        \"displayName\": display_name,\n    }\n    if external_id is not None:\n        resource[\"externalId\"] = external_id\n    if members is not None:\n        resource[\"members\"] = members\n    return resource\n\n\ndef _make_user_resource(email: str, external_id: str) -> dict:\n    \"\"\"Build a minimal SCIM UserResource payload for member creation.\"\"\"\n    return {\n        \"schemas\": [SCIM_USER_SCHEMA],\n        \"userName\": email,\n        \"externalId\": external_id,\n        \"name\": {\"givenName\": \"Test\", \"familyName\": \"User\"},\n        \"active\": True,\n    }\n\n\ndef _make_patch_request(operations: list[dict], idp_style: str = \"okta\") -> dict:\n    \"\"\"Build a SCIM PatchOp payload, applying IdP-specific operation casing.\n\n    Entra sends capitalized operations (e.g. ``\"Replace\"`` instead of\n    ``\"replace\"``). The server's ``normalize_operation`` validator lowercases\n    them — these tests verify that both casings are accepted.\n    \"\"\"\n    cased_operations = []\n    for operation in operations:\n        cased = dict(operation)\n        if idp_style == \"entra\":\n            cased[\"op\"] = operation[\"op\"].capitalize()\n        cased_operations.append(cased)\n    return {\n        \"schemas\": [SCIM_PATCH_SCHEMA],\n        \"Operations\": cased_operations,\n    }\n\n\ndef _create_scim_user(token: str, email: str, external_id: str) -> requests.Response:\n    return ScimClient.post(\n        \"/Users\", token, json=_make_user_resource(email, external_id)\n    )\n\n\ndef _create_scim_group(\n    token: str,\n    display_name: str,\n    external_id: str | None = None,\n    members: list[dict] | None = None,\n) -> requests.Response:\n    return ScimClient.post(\n        \"/Groups\",\n        token,\n        json=_make_group_resource(display_name, external_id, members),\n    )\n\n\n# ------------------------------------------------------------------\n# Lifecycle: create → get → list → replace → patch → delete\n# ------------------------------------------------------------------\n\n\ndef test_create_group(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Groups creates a group and returns 201.\"\"\"\n    name = f\"Engineering {idp_style}\"\n    resp = _create_scim_group(scim_token, name, external_id=f\"ext-eng-{idp_style}\")\n    assert resp.status_code == 201\n\n    body = resp.json()\n    assert body[\"displayName\"] == name\n    assert body[\"externalId\"] == f\"ext-eng-{idp_style}\"\n    assert body[\"id\"]  # integer ID assigned by server\n    assert body[\"meta\"][\"resourceType\"] == \"Group\"\n\n\ndef test_create_group_with_members(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Groups with members populates the member list.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_member1_{idp_style}@example.com\", f\"ext-gm-{idp_style}\"\n    ).json()\n\n    resp = _create_scim_group(\n        scim_token,\n        f\"Backend Team {idp_style}\",\n        external_id=f\"ext-backend-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    )\n    assert resp.status_code == 201\n\n    body = resp.json()\n    member_ids = [m[\"value\"] for m in body[\"members\"]]\n    assert user[\"id\"] in member_ids\n\n\ndef test_get_group(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Groups/{id} returns the group resource including members.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_get_m_{idp_style}@example.com\", f\"ext-ggm-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Frontend Team {idp_style}\",\n        external_id=f\"ext-fe-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    ).json()\n\n    resp = ScimClient.get(f\"/Groups/{created['id']}\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"id\"] == created[\"id\"]\n    assert body[\"displayName\"] == f\"Frontend Team {idp_style}\"\n    assert body[\"externalId\"] == f\"ext-fe-{idp_style}\"\n    member_ids = [m[\"value\"] for m in body[\"members\"]]\n    assert user[\"id\"] in member_ids\n\n\ndef test_list_groups(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Groups returns a ListResponse containing provisioned groups.\"\"\"\n    name = f\"DevOps Team {idp_style}\"\n    _create_scim_group(scim_token, name, external_id=f\"ext-devops-{idp_style}\")\n\n    resp = ScimClient.get(\"/Groups\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] >= 1\n    names = [r[\"displayName\"] for r in body[\"Resources\"]]\n    assert name in names\n\n\ndef test_list_groups_pagination(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Groups with startIndex and count returns correct pagination.\"\"\"\n    _create_scim_group(\n        scim_token, f\"Page Group A {idp_style}\", external_id=f\"ext-page-a-{idp_style}\"\n    )\n    _create_scim_group(\n        scim_token, f\"Page Group B {idp_style}\", external_id=f\"ext-page-b-{idp_style}\"\n    )\n\n    resp = ScimClient.get(\"/Groups?startIndex=1&count=1\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"startIndex\"] == 1\n    assert body[\"itemsPerPage\"] == 1\n    assert body[\"totalResults\"] >= 2\n    assert len(body[\"Resources\"]) == 1\n\n\ndef test_filter_groups_by_display_name(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Groups?filter=displayName eq '...' returns only matching groups.\"\"\"\n    name = f\"Unique QA Team {idp_style}\"\n    _create_scim_group(scim_token, name, external_id=f\"ext-qa-filter-{idp_style}\")\n\n    resp = ScimClient.get(f'/Groups?filter=displayName eq \"{name}\"', scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] == 1\n    assert body[\"Resources\"][0][\"displayName\"] == name\n\n\ndef test_filter_groups_by_external_id(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Groups?filter=externalId eq '...' returns the matching group.\"\"\"\n    ext_id = f\"ext-unique-group-id-{idp_style}\"\n    _create_scim_group(\n        scim_token, f\"ExtId Filter Group {idp_style}\", external_id=ext_id\n    )\n\n    resp = ScimClient.get(f'/Groups?filter=externalId eq \"{ext_id}\"', scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] == 1\n    assert body[\"Resources\"][0][\"externalId\"] == ext_id\n\n\ndef test_replace_group(scim_token: str, idp_style: str) -> None:\n    \"\"\"PUT /Groups/{id} replaces the group resource.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Original Name {idp_style}\",\n        external_id=f\"ext-replace-g-{idp_style}\",\n    ).json()\n\n    user = _create_scim_user(\n        scim_token, f\"grp_replace_m_{idp_style}@example.com\", f\"ext-grm-{idp_style}\"\n    ).json()\n\n    updated_resource = _make_group_resource(\n        display_name=f\"Renamed Group {idp_style}\",\n        external_id=f\"ext-replace-g-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    )\n    resp = ScimClient.put(f\"/Groups/{created['id']}\", scim_token, json=updated_resource)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"displayName\"] == f\"Renamed Group {idp_style}\"\n    member_ids = [m[\"value\"] for m in body[\"members\"]]\n    assert user[\"id\"] in member_ids\n\n\ndef test_replace_group_clears_members(scim_token: str, idp_style: str) -> None:\n    \"\"\"PUT /Groups/{id} with empty members removes all memberships.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_clear_m_{idp_style}@example.com\", f\"ext-gcm-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Clear Members Group {idp_style}\",\n        external_id=f\"ext-clear-g-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    ).json()\n\n    assert len(created[\"members\"]) == 1\n\n    resp = ScimClient.put(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_group_resource(\n            f\"Clear Members Group {idp_style}\", f\"ext-clear-g-{idp_style}\", members=[]\n        ),\n    )\n    assert resp.status_code == 200\n    assert resp.json()[\"members\"] == []\n\n\ndef test_patch_add_member(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} with op=add adds a member.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Patch Add Group {idp_style}\",\n        external_id=f\"ext-patch-add-{idp_style}\",\n    ).json()\n    user = _create_scim_user(\n        scim_token, f\"grp_patch_add_{idp_style}@example.com\", f\"ext-gpa-{idp_style}\"\n    ).json()\n\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"add\", \"path\": \"members\", \"value\": [{\"value\": user[\"id\"]}]}],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 200\n\n    member_ids = [m[\"value\"] for m in resp.json()[\"members\"]]\n    assert user[\"id\"] in member_ids\n\n\ndef test_patch_remove_member(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} with op=remove removes a specific member.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_patch_rm_{idp_style}@example.com\", f\"ext-gpr-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Patch Remove Group {idp_style}\",\n        external_id=f\"ext-patch-rm-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    ).json()\n    assert len(created[\"members\"]) == 1\n\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [\n                {\n                    \"op\": \"remove\",\n                    \"path\": f'members[value eq \"{user[\"id\"]}\"]',\n                }\n            ],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 200\n    assert resp.json()[\"members\"] == []\n\n\ndef test_patch_replace_members(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} with op=replace on members swaps the entire list.\"\"\"\n    user_a = _create_scim_user(\n        scim_token, f\"grp_repl_a_{idp_style}@example.com\", f\"ext-gra-{idp_style}\"\n    ).json()\n    user_b = _create_scim_user(\n        scim_token, f\"grp_repl_b_{idp_style}@example.com\", f\"ext-grb-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Patch Replace Group {idp_style}\",\n        external_id=f\"ext-patch-repl-{idp_style}\",\n        members=[{\"value\": user_a[\"id\"]}],\n    ).json()\n\n    # Replace member list: swap A for B\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [\n                {\n                    \"op\": \"replace\",\n                    \"path\": \"members\",\n                    \"value\": [{\"value\": user_b[\"id\"]}],\n                }\n            ],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 200\n\n    member_ids = [m[\"value\"] for m in resp.json()[\"members\"]]\n    assert user_b[\"id\"] in member_ids\n    assert user_a[\"id\"] not in member_ids\n\n\ndef test_patch_rename_group(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} with op=replace on displayName renames the group.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Old Group Name {idp_style}\",\n        external_id=f\"ext-rename-g-{idp_style}\",\n    ).json()\n\n    new_name = f\"New Group Name {idp_style}\"\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"displayName\", \"value\": new_name}],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 200\n    assert resp.json()[\"displayName\"] == new_name\n\n    # Confirm via GET\n    get_resp = ScimClient.get(f\"/Groups/{created['id']}\", scim_token)\n    assert get_resp.json()[\"displayName\"] == new_name\n\n\ndef test_delete_group(scim_token: str, idp_style: str) -> None:\n    \"\"\"DELETE /Groups/{id} removes the group.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Delete Me Group {idp_style}\",\n        external_id=f\"ext-del-g-{idp_style}\",\n    ).json()\n\n    resp = ScimClient.delete(f\"/Groups/{created['id']}\", scim_token)\n    assert resp.status_code == 204\n\n    # Second DELETE returns 404 (group hard-deleted)\n    resp2 = ScimClient.delete(f\"/Groups/{created['id']}\", scim_token)\n    assert resp2.status_code == 404\n\n\ndef test_delete_group_preserves_members(scim_token: str, idp_style: str) -> None:\n    \"\"\"DELETE /Groups/{id} removes memberships but does not deactivate users.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_del_member_{idp_style}@example.com\", f\"ext-gdm-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Delete With Members {idp_style}\",\n        external_id=f\"ext-del-wm-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    ).json()\n\n    resp = ScimClient.delete(f\"/Groups/{created['id']}\", scim_token)\n    assert resp.status_code == 204\n\n    # User should still be active and retrievable\n    user_resp = ScimClient.get(f\"/Users/{user['id']}\", scim_token)\n    assert user_resp.status_code == 200\n    assert user_resp.json()[\"active\"] is True\n\n\n# ------------------------------------------------------------------\n# Error cases\n# ------------------------------------------------------------------\n\n\ndef test_create_group_duplicate_name(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Groups with an already-taken displayName returns 409.\"\"\"\n    name = f\"Dup Name Group {idp_style}\"\n    resp1 = _create_scim_group(scim_token, name, external_id=f\"ext-dup-g1-{idp_style}\")\n    assert resp1.status_code == 201\n\n    resp2 = _create_scim_group(scim_token, name, external_id=f\"ext-dup-g2-{idp_style}\")\n    assert resp2.status_code == 409\n\n\ndef test_get_nonexistent_group(scim_token: str) -> None:\n    \"\"\"GET /Groups/{bad-id} returns 404.\"\"\"\n    resp = ScimClient.get(\"/Groups/999999999\", scim_token)\n    assert resp.status_code == 404\n\n\ndef test_create_group_with_invalid_member(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Groups with a non-existent member UUID returns 400.\"\"\"\n    resp = _create_scim_group(\n        scim_token,\n        f\"Bad Member Group {idp_style}\",\n        external_id=f\"ext-bad-m-{idp_style}\",\n        members=[{\"value\": \"00000000-0000-0000-0000-000000000000\"}],\n    )\n    assert resp.status_code == 400\n    assert \"not found\" in resp.json()[\"detail\"].lower()\n\n\ndef test_patch_add_nonexistent_member(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} adding a non-existent member returns 400.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Patch Bad Member Group {idp_style}\",\n        external_id=f\"ext-pbm-{idp_style}\",\n    ).json()\n\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [\n                {\n                    \"op\": \"add\",\n                    \"path\": \"members\",\n                    \"value\": [{\"value\": \"00000000-0000-0000-0000-000000000000\"}],\n                }\n            ],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 400\n    assert \"not found\" in resp.json()[\"detail\"].lower()\n\n\ndef test_patch_add_duplicate_member_is_idempotent(\n    scim_token: str, idp_style: str\n) -> None:\n    \"\"\"PATCH /Groups/{id} adding an already-present member succeeds silently.\"\"\"\n    user = _create_scim_user(\n        scim_token, f\"grp_dup_add_{idp_style}@example.com\", f\"ext-gda-{idp_style}\"\n    ).json()\n    created = _create_scim_group(\n        scim_token,\n        f\"Idempotent Add Group {idp_style}\",\n        external_id=f\"ext-idem-g-{idp_style}\",\n        members=[{\"value\": user[\"id\"]}],\n    ).json()\n    assert len(created[\"members\"]) == 1\n\n    # Add same member again\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"add\", \"path\": \"members\", \"value\": [{\"value\": user[\"id\"]}]}],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 200\n    assert len(resp.json()[\"members\"]) == 1  # still just one member\n\n\ndef test_create_group_reserved_name_admin(scim_token: str) -> None:\n    \"\"\"POST /Groups with reserved name 'Admin' returns 409.\"\"\"\n    resp = _create_scim_group(scim_token, \"Admin\", external_id=\"ext-reserved-admin\")\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_create_group_reserved_name_basic(scim_token: str) -> None:\n    \"\"\"POST /Groups with reserved name 'Basic' returns 409.\"\"\"\n    resp = _create_scim_group(scim_token, \"Basic\", external_id=\"ext-reserved-basic\")\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_replace_group_cannot_rename_to_reserved(\n    scim_token: str, idp_style: str\n) -> None:\n    \"\"\"PUT /Groups/{id} renaming a group to 'Admin' returns 409.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Rename To Reserved {idp_style}\",\n        external_id=f\"ext-rtr-{idp_style}\",\n    ).json()\n\n    resp = ScimClient.put(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_group_resource(\n            display_name=\"Admin\", external_id=f\"ext-rtr-{idp_style}\"\n        ),\n    )\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_patch_rename_to_reserved_name(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} renaming a group to 'Basic' returns 409.\"\"\"\n    created = _create_scim_group(\n        scim_token,\n        f\"Patch Rename Reserved {idp_style}\",\n        external_id=f\"ext-prr-{idp_style}\",\n    ).json()\n\n    resp = ScimClient.patch(\n        f\"/Groups/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"displayName\", \"value\": \"Basic\"}],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_delete_reserved_group_rejected(scim_token: str) -> None:\n    \"\"\"DELETE /Groups/{id} on a reserved group ('Admin') returns 409.\"\"\"\n    # Look up the reserved 'Admin' group via SCIM filter\n    resp = ScimClient.get('/Groups?filter=displayName eq \"Admin\"', scim_token)\n    assert resp.status_code == 200\n    resources = resp.json()[\"Resources\"]\n    assert len(resources) >= 1, \"Expected reserved 'Admin' group to exist\"\n    admin_group_id = resources[0][\"id\"]\n\n    resp = ScimClient.delete(f\"/Groups/{admin_group_id}\", scim_token)\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_scim_created_group_has_basic_permission(\n    scim_token: str, idp_style: str\n) -> None:\n    \"\"\"POST /Groups assigns the 'basic' permission to the group itself.\"\"\"\n    # Create a SCIM group (no members needed — we check the group's permissions)\n    resp = _create_scim_group(\n        scim_token,\n        f\"Basic Perm Group {idp_style}\",\n        external_id=f\"ext-basic-perm-{idp_style}\",\n    )\n    assert resp.status_code == 201\n    group_id = resp.json()[\"id\"]\n\n    # Log in as the admin user (created by the scim_token fixture).\n    admin = DATestUser(\n        id=\"\",\n        email=build_email(ADMIN_USER_NAME),\n        password=DEFAULT_PASSWORD,\n        headers=GENERAL_HEADERS,\n        role=UserRole.ADMIN,\n        is_active=True,\n    )\n    admin = UserManager.login_as_user(admin)\n\n    # Verify the group itself was granted the basic permission\n    perms_resp = requests.get(\n        f\"{API_SERVER_URL}/manage/admin/user-group/{group_id}/permissions\",\n        headers=admin.headers,\n    )\n    perms_resp.raise_for_status()\n    perms = perms_resp.json()\n    assert \"basic\" in perms, f\"SCIM group should have 'basic' permission, got: {perms}\"\n\n\ndef test_replace_group_cannot_rename_from_reserved(scim_token: str) -> None:\n    \"\"\"PUT /Groups/{id} renaming a reserved group ('Admin') to a non-reserved name returns 409.\"\"\"\n    resp = ScimClient.get('/Groups?filter=displayName eq \"Admin\"', scim_token)\n    assert resp.status_code == 200\n    resources = resp.json()[\"Resources\"]\n    assert len(resources) >= 1, \"Expected reserved 'Admin' group to exist\"\n    admin_group_id = resources[0][\"id\"]\n\n    resp = ScimClient.put(\n        f\"/Groups/{admin_group_id}\",\n        scim_token,\n        json=_make_group_resource(\n            display_name=\"RenamedAdmin\", external_id=\"ext-rename-from-reserved\"\n        ),\n    )\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n\n\ndef test_patch_rename_from_reserved_name(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Groups/{id} renaming a reserved group ('Admin') returns 409.\"\"\"\n    resp = ScimClient.get('/Groups?filter=displayName eq \"Admin\"', scim_token)\n    assert resp.status_code == 200\n    resources = resp.json()[\"Resources\"]\n    assert len(resources) >= 1, \"Expected reserved 'Admin' group to exist\"\n    admin_group_id = resources[0][\"id\"]\n\n    resp = ScimClient.patch(\n        f\"/Groups/{admin_group_id}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"displayName\", \"value\": \"RenamedAdmin\"}],\n            idp_style,\n        ),\n    )\n    assert resp.status_code == 409\n    assert \"reserved\" in resp.json()[\"detail\"].lower()\n"
  },
  {
    "path": "backend/tests/integration/tests/scim/test_scim_tokens.py",
    "content": "\"\"\"Integration tests for SCIM token management.\n\nCovers the admin token API and SCIM bearer-token authentication:\n1. Token lifecycle: create, retrieve metadata, use for SCIM requests\n2. Token rotation: creating a new token revokes previous tokens\n3. Revoked tokens are rejected by SCIM endpoints\n4. Non-admin users cannot manage SCIM tokens\n5. SCIM requests without a token are rejected\n6. Service discovery endpoints work without authentication\n7. last_used_at is updated after a SCIM request\n\"\"\"\n\nimport time\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.scim_client import ScimClient\nfrom tests.integration.common_utils.managers.scim_token import ScimTokenManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_scim_token_lifecycle(admin_user: DATestUser) -> None:\n    \"\"\"Create token → retrieve metadata → use for SCIM request.\"\"\"\n    token = ScimTokenManager.create(\n        name=\"Test SCIM Token\",\n        user_performing_action=admin_user,\n    )\n\n    assert token.raw_token is not None\n    assert token.raw_token.startswith(\"onyx_scim_\")\n    assert token.is_active is True\n    assert \"****\" in token.token_display\n\n    # GET returns the same metadata but raw_token is None because the\n    # server only reveals the raw token once at creation time (it stores\n    # only the SHA-256 hash).\n    active = ScimTokenManager.get_active(user_performing_action=admin_user)\n    assert active == token.model_copy(update={\"raw_token\": None})\n\n    # Token works for SCIM requests\n    response = ScimClient.get(\"/Users\", token.raw_token)\n    assert response.status_code == 200\n    body = response.json()\n    assert \"Resources\" in body\n    assert body[\"totalResults\"] >= 0\n\n\ndef test_scim_token_rotation_revokes_previous(admin_user: DATestUser) -> None:\n    \"\"\"Creating a new token automatically revokes the previous one.\"\"\"\n    first = ScimTokenManager.create(\n        name=\"First Token\",\n        user_performing_action=admin_user,\n    )\n    assert first.raw_token is not None\n\n    response = ScimClient.get(\"/Users\", first.raw_token)\n    assert response.status_code == 200\n\n    # Create second token — should revoke first\n    second = ScimTokenManager.create(\n        name=\"Second Token\",\n        user_performing_action=admin_user,\n    )\n    assert second.raw_token is not None\n\n    # Active token should now be the second one\n    active = ScimTokenManager.get_active(user_performing_action=admin_user)\n    assert active == second.model_copy(update={\"raw_token\": None})\n\n    # First token rejected, second works\n    assert ScimClient.get(\"/Users\", first.raw_token).status_code == 401\n    assert ScimClient.get(\"/Users\", second.raw_token).status_code == 200\n\n\ndef test_scim_request_without_token_rejected(\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"SCIM endpoints reject requests with no Authorization header.\"\"\"\n    assert ScimClient.get_no_auth(\"/Users\").status_code == 401\n\n\ndef test_scim_request_with_bad_token_rejected(\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"SCIM endpoints reject requests with an invalid token.\"\"\"\n    assert ScimClient.get(\"/Users\", \"onyx_scim_bogus_token_value\").status_code == 401\n\n\ndef test_non_admin_cannot_create_token(\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"Non-admin users get 403 when trying to create a SCIM token.\"\"\"\n    basic_user = UserManager.create(name=\"scim_basic_user\")\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/enterprise-settings/scim/token\",\n        json={\"name\": \"Should Fail\"},\n        headers=basic_user.headers,\n        timeout=60,\n    )\n    assert response.status_code == 403\n\n\ndef test_non_admin_cannot_get_token(\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"Non-admin users get 403 when trying to retrieve SCIM token metadata.\"\"\"\n    basic_user = UserManager.create(name=\"scim_basic_user2\")\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/enterprise-settings/scim/token\",\n        headers=basic_user.headers,\n        timeout=60,\n    )\n    assert response.status_code == 403\n\n\ndef test_no_active_token_returns_404(new_admin_user: DATestUser) -> None:\n    \"\"\"GET active token returns 404 when no token exists.\"\"\"\n    # new_admin_user depends on the reset fixture, ensuring a clean DB\n    # with no active SCIM tokens.\n    active = ScimTokenManager.get_active(user_performing_action=new_admin_user)\n    assert active is None\n\n    response = requests.get(\n        f\"{API_SERVER_URL}/admin/enterprise-settings/scim/token\",\n        headers=new_admin_user.headers,\n        timeout=60,\n    )\n    assert response.status_code == 404\n\n\ndef test_service_discovery_no_auth_required(\n    admin_user: DATestUser,  # noqa: ARG001\n) -> None:\n    \"\"\"Service discovery endpoints work without any authentication.\"\"\"\n    for path in [\"/ServiceProviderConfig\", \"/ResourceTypes\", \"/Schemas\"]:\n        response = ScimClient.get_no_auth(path)\n        assert response.status_code == 200, f\"{path} returned {response.status_code}\"\n\n\ndef test_last_used_at_updated_after_scim_request(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"last_used_at timestamp is updated after using the token.\"\"\"\n    token = ScimTokenManager.create(\n        name=\"Last Used Token\",\n        user_performing_action=admin_user,\n    )\n    assert token.raw_token is not None\n\n    active = ScimTokenManager.get_active(user_performing_action=admin_user)\n    assert active is not None\n    assert active.last_used_at is None\n\n    # Make a SCIM request, then verify last_used_at is set\n    assert ScimClient.get(\"/Users\", token.raw_token).status_code == 200\n    time.sleep(0.5)\n\n    active_after = ScimTokenManager.get_active(user_performing_action=admin_user)\n    assert active_after is not None\n    assert active_after.last_used_at is not None\n"
  },
  {
    "path": "backend/tests/integration/tests/scim/test_scim_users.py",
    "content": "\"\"\"Integration tests for SCIM user provisioning endpoints.\n\nCovers the full user lifecycle as driven by an IdP (Okta / Azure AD):\n1. Create a user via POST /Users\n2. Retrieve a user via GET /Users/{id}\n3. List, filter, and paginate users via GET /Users\n4. Replace a user via PUT /Users/{id}\n5. Patch a user (deactivate/reactivate) via PATCH /Users/{id}\n6. Delete a user via DELETE /Users/{id}\n7. Error cases: missing externalId, duplicate email, not-found, seat limit\n\nAll tests are parameterized across IdP request styles:\n- **Okta**: lowercase PATCH ops, minimal payloads (core schema only).\n- **Entra**: capitalized ops (``\"Replace\"``), enterprise extension data\n  (department, manager), and structured email arrays.\n\nThe server normalizes both — these tests verify that all IdP-specific fields\nare accepted and round-tripped correctly.\n\nAuth, revoked-token, and service-discovery tests live in test_scim_tokens.py.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nimport pytest\nimport redis\nimport requests\n\nfrom ee.onyx.server.license.models import LicenseMetadata\nfrom ee.onyx.server.license.models import LicenseSource\nfrom ee.onyx.server.license.models import PlanType\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.db.enums import AccountType\nfrom onyx.server.settings.models import ApplicationStatus\nfrom tests.integration.common_utils.constants import ADMIN_USER_NAME\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.scim_client import ScimClient\nfrom tests.integration.common_utils.managers.scim_token import ScimTokenManager\nfrom tests.integration.common_utils.managers.user import build_email\nfrom tests.integration.common_utils.managers.user import DEFAULT_PASSWORD\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nSCIM_USER_SCHEMA = \"urn:ietf:params:scim:schemas:core:2.0:User\"\nSCIM_ENTERPRISE_USER_SCHEMA = (\n    \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\"\n)\nSCIM_PATCH_SCHEMA = \"urn:ietf:params:scim:api:messages:2.0:PatchOp\"\n\n_LICENSE_REDIS_KEY = \"public:license:metadata\"\n\n\n@pytest.fixture(scope=\"module\", params=[\"okta\", \"entra\"])\ndef idp_style(request: pytest.FixtureRequest) -> str:\n    \"\"\"Parameterized IdP style — runs every test with both Okta and Entra request formats.\"\"\"\n    return request.param\n\n\n@pytest.fixture(scope=\"module\")\ndef scim_token(idp_style: str) -> str:\n    \"\"\"Create a single SCIM token shared across all tests in this module.\n\n    Creating a new token revokes the previous one, so we create exactly once\n    per IdP-style run and reuse. Uses UserManager directly to avoid\n    fixture-scope conflicts with the function-scoped admin_user fixture.\n    \"\"\"\n    from tests.integration.common_utils.constants import ADMIN_USER_NAME\n    from tests.integration.common_utils.constants import GENERAL_HEADERS\n    from tests.integration.common_utils.managers.user import build_email\n    from tests.integration.common_utils.managers.user import DEFAULT_PASSWORD\n    from tests.integration.common_utils.managers.user import UserManager\n    from tests.integration.common_utils.test_models import DATestUser\n\n    try:\n        admin = UserManager.create(name=ADMIN_USER_NAME)\n    except Exception:\n        admin = UserManager.login_as_user(\n            DATestUser(\n                id=\"\",\n                email=build_email(ADMIN_USER_NAME),\n                password=DEFAULT_PASSWORD,\n                headers=GENERAL_HEADERS,\n                role=UserRole.ADMIN,\n                is_active=True,\n            )\n        )\n\n    token = ScimTokenManager.create(\n        name=f\"scim-user-tests-{idp_style}\",\n        user_performing_action=admin,\n    ).raw_token\n    assert token is not None\n    return token\n\n\ndef _make_user_resource(\n    email: str,\n    external_id: str,\n    given_name: str = \"Test\",\n    family_name: str = \"User\",\n    active: bool = True,\n    idp_style: str = \"okta\",\n    department: str | None = None,\n    manager_id: str | None = None,\n) -> dict:\n    \"\"\"Build a SCIM UserResource payload appropriate for the IdP style.\n\n    Entra sends richer payloads including enterprise extension data (department,\n    manager), structured email arrays, and the enterprise schema URN. Okta sends\n    minimal payloads with just core user fields.\n    \"\"\"\n    resource: dict = {\n        \"schemas\": [SCIM_USER_SCHEMA],\n        \"userName\": email,\n        \"externalId\": external_id,\n        \"name\": {\n            \"givenName\": given_name,\n            \"familyName\": family_name,\n        },\n        \"active\": active,\n    }\n    if idp_style == \"entra\":\n        dept = department or \"Engineering\"\n        mgr = manager_id or \"mgr-ext-001\"\n        resource[\"schemas\"].append(SCIM_ENTERPRISE_USER_SCHEMA)\n        resource[SCIM_ENTERPRISE_USER_SCHEMA] = {\n            \"department\": dept,\n            \"manager\": {\"value\": mgr},\n        }\n        resource[\"emails\"] = [\n            {\"value\": email, \"type\": \"work\", \"primary\": True},\n        ]\n    return resource\n\n\ndef _make_patch_request(operations: list[dict], idp_style: str = \"okta\") -> dict:\n    \"\"\"Build a SCIM PatchOp payload, applying IdP-specific operation casing.\n\n    Entra sends capitalized operations (e.g. ``\"Replace\"`` instead of\n    ``\"replace\"``). The server's ``normalize_operation`` validator lowercases\n    them — these tests verify that both casings are accepted.\n    \"\"\"\n    cased_operations = []\n    for operation in operations:\n        cased = dict(operation)\n        if idp_style == \"entra\":\n            cased[\"op\"] = operation[\"op\"].capitalize()\n        cased_operations.append(cased)\n    return {\n        \"schemas\": [SCIM_PATCH_SCHEMA],\n        \"Operations\": cased_operations,\n    }\n\n\ndef _create_scim_user(\n    token: str,\n    email: str,\n    external_id: str,\n    idp_style: str = \"okta\",\n) -> requests.Response:\n    return ScimClient.post(\n        \"/Users\",\n        token,\n        json=_make_user_resource(email, external_id, idp_style=idp_style),\n    )\n\n\ndef _assert_entra_extension(\n    body: dict,\n    expected_department: str = \"Engineering\",\n    expected_manager: str = \"mgr-ext-001\",\n) -> None:\n    \"\"\"Assert that Entra enterprise extension fields round-tripped correctly.\"\"\"\n    assert SCIM_ENTERPRISE_USER_SCHEMA in body[\"schemas\"]\n    ext = body[SCIM_ENTERPRISE_USER_SCHEMA]\n    assert ext[\"department\"] == expected_department\n    assert ext[\"manager\"][\"value\"] == expected_manager\n\n\ndef _assert_entra_emails(body: dict, expected_email: str) -> None:\n    \"\"\"Assert that structured email metadata round-tripped correctly.\"\"\"\n    emails = body[\"emails\"]\n    assert len(emails) >= 1\n    work_email = next(e for e in emails if e.get(\"type\") == \"work\")\n    assert work_email[\"value\"] == expected_email\n    assert work_email[\"primary\"] is True\n\n\n# ------------------------------------------------------------------\n# Lifecycle: create -> get -> list -> replace -> patch -> delete\n# ------------------------------------------------------------------\n\n\ndef test_create_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Users creates a provisioned user and returns 201.\"\"\"\n    email = f\"scim_create_{idp_style}@example.com\"\n    ext_id = f\"ext-create-{idp_style}\"\n    resp = _create_scim_user(scim_token, email, ext_id, idp_style)\n    assert resp.status_code == 201\n\n    body = resp.json()\n    assert body[\"userName\"] == email\n    assert body[\"externalId\"] == ext_id\n    assert body[\"active\"] is True\n    assert body[\"id\"]  # UUID assigned by server\n    assert body[\"meta\"][\"resourceType\"] == \"User\"\n    assert body[\"name\"][\"givenName\"] == \"Test\"\n    assert body[\"name\"][\"familyName\"] == \"User\"\n\n    if idp_style == \"entra\":\n        _assert_entra_extension(body)\n        _assert_entra_emails(body, email)\n\n\ndef test_create_user_default_group_and_account_type(\n    scim_token: str, idp_style: str\n) -> None:\n    \"\"\"SCIM-provisioned users get Basic default group and STANDARD account_type.\"\"\"\n    email = f\"scim_defaults_{idp_style}@example.com\"\n    ext_id = f\"ext-defaults-{idp_style}\"\n    resp = _create_scim_user(scim_token, email, ext_id, idp_style)\n    assert resp.status_code == 201\n    user_id = resp.json()[\"id\"]\n\n    # --- Verify group assignment via SCIM GET ---\n    get_resp = ScimClient.get(f\"/Users/{user_id}\", scim_token)\n    assert get_resp.status_code == 200\n    groups = get_resp.json().get(\"groups\", [])\n    group_names = {g[\"display\"] for g in groups}\n    assert \"Basic\" in group_names, f\"Expected 'Basic' in groups, got {group_names}\"\n    assert \"Admin\" not in group_names, \"SCIM user should not be in Admin group\"\n\n    # --- Verify account_type via admin API ---\n    admin = UserManager.login_as_user(\n        DATestUser(\n            id=\"\",\n            email=build_email(ADMIN_USER_NAME),\n            password=DEFAULT_PASSWORD,\n            headers=GENERAL_HEADERS,\n            role=UserRole.ADMIN,\n            is_active=True,\n        )\n    )\n    page = UserManager.get_user_page(\n        user_performing_action=admin,\n        search_query=email,\n    )\n    assert page.total_items >= 1\n    scim_user_snapshot = next((u for u in page.items if u.email == email), None)\n    assert (\n        scim_user_snapshot is not None\n    ), f\"SCIM user {email} not found in user listing\"\n    assert (\n        scim_user_snapshot.account_type == AccountType.STANDARD\n    ), f\"Expected STANDARD, got {scim_user_snapshot.account_type}\"\n\n\ndef test_get_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Users/{id} returns the user resource with all stored fields.\"\"\"\n    email = f\"scim_get_{idp_style}@example.com\"\n    ext_id = f\"ext-get-{idp_style}\"\n    created = _create_scim_user(scim_token, email, ext_id, idp_style).json()\n\n    resp = ScimClient.get(f\"/Users/{created['id']}\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"id\"] == created[\"id\"]\n    assert body[\"userName\"] == email\n    assert body[\"externalId\"] == ext_id\n    assert body[\"name\"][\"givenName\"] == \"Test\"\n    assert body[\"name\"][\"familyName\"] == \"User\"\n\n    if idp_style == \"entra\":\n        _assert_entra_extension(body)\n        _assert_entra_emails(body, email)\n\n\ndef test_list_users(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Users returns a ListResponse containing provisioned users.\"\"\"\n    email = f\"scim_list_{idp_style}@example.com\"\n    _create_scim_user(scim_token, email, f\"ext-list-{idp_style}\", idp_style)\n\n    resp = ScimClient.get(\"/Users\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] >= 1\n    emails = [r[\"userName\"] for r in body[\"Resources\"]]\n    assert email in emails\n\n\ndef test_list_users_pagination(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Users with startIndex and count returns correct pagination.\"\"\"\n    _create_scim_user(\n        scim_token,\n        f\"scim_page1_{idp_style}@example.com\",\n        f\"ext-page-1-{idp_style}\",\n        idp_style,\n    )\n    _create_scim_user(\n        scim_token,\n        f\"scim_page2_{idp_style}@example.com\",\n        f\"ext-page-2-{idp_style}\",\n        idp_style,\n    )\n\n    resp = ScimClient.get(\"/Users?startIndex=1&count=1\", scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"startIndex\"] == 1\n    assert body[\"itemsPerPage\"] == 1\n    assert body[\"totalResults\"] >= 2\n    assert len(body[\"Resources\"]) == 1\n\n\ndef test_filter_users_by_username(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Users?filter=userName eq '...' returns only matching users.\"\"\"\n    email = f\"scim_filter_{idp_style}@example.com\"\n    _create_scim_user(scim_token, email, f\"ext-filter-{idp_style}\", idp_style)\n\n    resp = ScimClient.get(f'/Users?filter=userName eq \"{email}\"', scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] == 1\n    assert body[\"Resources\"][0][\"userName\"] == email\n\n\ndef test_replace_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"PUT /Users/{id} replaces the user resource including enterprise fields.\"\"\"\n    email = f\"scim_replace_{idp_style}@example.com\"\n    ext_id = f\"ext-replace-{idp_style}\"\n    created = _create_scim_user(scim_token, email, ext_id, idp_style).json()\n\n    updated_resource = _make_user_resource(\n        email=email,\n        external_id=ext_id,\n        given_name=\"Updated\",\n        family_name=\"Name\",\n        idp_style=idp_style,\n        department=\"Product\",\n    )\n    resp = ScimClient.put(f\"/Users/{created['id']}\", scim_token, json=updated_resource)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"name\"][\"givenName\"] == \"Updated\"\n    assert body[\"name\"][\"familyName\"] == \"Name\"\n\n    if idp_style == \"entra\":\n        _assert_entra_extension(body, expected_department=\"Product\")\n        _assert_entra_emails(body, email)\n\n\ndef test_patch_deactivate_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH /Users/{id} with active=false deactivates the user.\"\"\"\n    created = _create_scim_user(\n        scim_token,\n        f\"scim_deactivate_{idp_style}@example.com\",\n        f\"ext-deactivate-{idp_style}\",\n        idp_style,\n    ).json()\n    assert created[\"active\"] is True\n\n    resp = ScimClient.patch(\n        f\"/Users/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"active\", \"value\": False}], idp_style\n        ),\n    )\n    assert resp.status_code == 200\n    assert resp.json()[\"active\"] is False\n\n    # Confirm via GET\n    get_resp = ScimClient.get(f\"/Users/{created['id']}\", scim_token)\n    assert get_resp.json()[\"active\"] is False\n\n\ndef test_patch_reactivate_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH active=true reactivates a previously deactivated user.\"\"\"\n    created = _create_scim_user(\n        scim_token,\n        f\"scim_reactivate_{idp_style}@example.com\",\n        f\"ext-reactivate-{idp_style}\",\n        idp_style,\n    ).json()\n\n    # Deactivate\n    deactivate_resp = ScimClient.patch(\n        f\"/Users/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"active\", \"value\": False}], idp_style\n        ),\n    )\n    assert deactivate_resp.status_code == 200\n    assert deactivate_resp.json()[\"active\"] is False\n\n    # Reactivate\n    resp = ScimClient.patch(\n        f\"/Users/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"active\", \"value\": True}], idp_style\n        ),\n    )\n    assert resp.status_code == 200\n    assert resp.json()[\"active\"] is True\n\n\ndef test_delete_user(scim_token: str, idp_style: str) -> None:\n    \"\"\"DELETE /Users/{id} deactivates and removes the SCIM mapping.\"\"\"\n    created = _create_scim_user(\n        scim_token,\n        f\"scim_delete_{idp_style}@example.com\",\n        f\"ext-delete-{idp_style}\",\n        idp_style,\n    ).json()\n\n    resp = ScimClient.delete(f\"/Users/{created['id']}\", scim_token)\n    assert resp.status_code == 204\n\n    # Second DELETE returns 404 per RFC 7644 §3.6 (mapping removed)\n    resp2 = ScimClient.delete(f\"/Users/{created['id']}\", scim_token)\n    assert resp2.status_code == 404\n\n\n# ------------------------------------------------------------------\n# Error cases\n# ------------------------------------------------------------------\n\n\ndef test_create_user_missing_external_id(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Users without externalId succeeds (RFC 7643: externalId is optional).\"\"\"\n    email = f\"scim_no_extid_{idp_style}@example.com\"\n    resp = ScimClient.post(\n        \"/Users\",\n        scim_token,\n        json={\n            \"schemas\": [SCIM_USER_SCHEMA],\n            \"userName\": email,\n            \"active\": True,\n        },\n    )\n    assert resp.status_code == 201\n    body = resp.json()\n    assert body[\"userName\"] == email\n    assert body.get(\"externalId\") is None\n\n\ndef test_create_user_duplicate_email(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Users with an already-taken email returns 409.\"\"\"\n    email = f\"scim_dup_{idp_style}@example.com\"\n    resp1 = _create_scim_user(scim_token, email, f\"ext-dup-1-{idp_style}\", idp_style)\n    assert resp1.status_code == 201\n\n    resp2 = _create_scim_user(scim_token, email, f\"ext-dup-2-{idp_style}\", idp_style)\n    assert resp2.status_code == 409\n\n\ndef test_get_nonexistent_user(scim_token: str) -> None:\n    \"\"\"GET /Users/{bad-id} returns 404.\"\"\"\n    resp = ScimClient.get(\"/Users/00000000-0000-0000-0000-000000000000\", scim_token)\n    assert resp.status_code == 404\n\n\ndef test_filter_users_by_external_id(scim_token: str, idp_style: str) -> None:\n    \"\"\"GET /Users?filter=externalId eq '...' returns the matching user.\"\"\"\n    ext_id = f\"ext-unique-filter-id-{idp_style}\"\n    _create_scim_user(\n        scim_token, f\"scim_extfilter_{idp_style}@example.com\", ext_id, idp_style\n    )\n\n    resp = ScimClient.get(f'/Users?filter=externalId eq \"{ext_id}\"', scim_token)\n    assert resp.status_code == 200\n\n    body = resp.json()\n    assert body[\"totalResults\"] == 1\n    assert body[\"Resources\"][0][\"externalId\"] == ext_id\n\n\n# ------------------------------------------------------------------\n# Seat-limit enforcement\n# ------------------------------------------------------------------\n\n\ndef _seed_license(r: redis.Redis, seats: int) -> None:\n    \"\"\"Write a LicenseMetadata entry into Redis with the given seat cap.\"\"\"\n    now = datetime.now(timezone.utc)\n    metadata = LicenseMetadata(\n        tenant_id=\"public\",\n        organization_name=\"Test Org\",\n        seats=seats,\n        used_seats=0,  # check_seat_availability recalculates from DB\n        plan_type=PlanType.ANNUAL,\n        issued_at=now,\n        expires_at=now + timedelta(days=365),\n        status=ApplicationStatus.ACTIVE,\n        source=LicenseSource.MANUAL_UPLOAD,\n    )\n    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)\n\n\ndef test_create_user_seat_limit(scim_token: str, idp_style: str) -> None:\n    \"\"\"POST /Users returns 403 when the seat limit is reached.\"\"\"\n    r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)\n\n    # admin_user already occupies 1 seat; cap at 1 -> full\n    _seed_license(r, seats=1)\n\n    try:\n        resp = _create_scim_user(\n            scim_token,\n            f\"scim_blocked_{idp_style}@example.com\",\n            f\"ext-blocked-{idp_style}\",\n            idp_style,\n        )\n        assert resp.status_code == 403\n        assert \"seat\" in resp.json()[\"detail\"].lower()\n    finally:\n        r.delete(_LICENSE_REDIS_KEY)\n\n\ndef test_reactivate_user_seat_limit(scim_token: str, idp_style: str) -> None:\n    \"\"\"PATCH active=true returns 403 when the seat limit is reached.\"\"\"\n    # Create and deactivate a user (before license is seeded)\n    created = _create_scim_user(\n        scim_token,\n        f\"scim_reactivate_blocked_{idp_style}@example.com\",\n        f\"ext-reactivate-blocked-{idp_style}\",\n        idp_style,\n    ).json()\n    assert created[\"active\"] is True\n\n    deactivate_resp = ScimClient.patch(\n        f\"/Users/{created['id']}\",\n        scim_token,\n        json=_make_patch_request(\n            [{\"op\": \"replace\", \"path\": \"active\", \"value\": False}], idp_style\n        ),\n    )\n    assert deactivate_resp.status_code == 200\n    assert deactivate_resp.json()[\"active\"] is False\n\n    r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)\n\n    # Seed license capped at current active users -> reactivation should fail\n    _seed_license(r, seats=1)\n\n    try:\n        resp = ScimClient.patch(\n            f\"/Users/{created['id']}\",\n            scim_token,\n            json=_make_patch_request(\n                [{\"op\": \"replace\", \"path\": \"active\", \"value\": True}], idp_style\n            ),\n        )\n        assert resp.status_code == 403\n        assert \"seat\" in resp.json()[\"detail\"].lower()\n    finally:\n        r.delete(_LICENSE_REDIS_KEY)\n"
  },
  {
    "path": "backend/tests/integration/tests/search_settings/test_search_settings.py",
    "content": "import requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nSEARCH_SETTINGS_URL = f\"{API_SERVER_URL}/search-settings\"\n\n\ndef _get_current_search_settings(user: DATestUser) -> dict:\n    response = requests.get(\n        f\"{SEARCH_SETTINGS_URL}/get-current-search-settings\",\n        headers=user.headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _get_all_search_settings(user: DATestUser) -> dict:\n    response = requests.get(\n        f\"{SEARCH_SETTINGS_URL}/get-all-search-settings\",\n        headers=user.headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _get_secondary_search_settings(user: DATestUser) -> dict | None:\n    response = requests.get(\n        f\"{SEARCH_SETTINGS_URL}/get-secondary-search-settings\",\n        headers=user.headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef _update_inference_settings(user: DATestUser, settings: dict) -> None:\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/update-inference-settings\",\n        json=settings,\n        headers=user.headers,\n    )\n    response.raise_for_status()\n\n\ndef _set_new_search_settings(\n    user: DATestUser,\n    current_settings: dict,\n    enable_contextual_rag: bool = False,\n    contextual_rag_llm_name: str | None = None,\n    contextual_rag_llm_provider: str | None = None,\n) -> requests.Response:\n    \"\"\"POST to set-new-search-settings, deriving the payload from current settings.\"\"\"\n    payload = {\n        \"model_name\": current_settings[\"model_name\"],\n        \"model_dim\": current_settings[\"model_dim\"],\n        \"normalize\": current_settings[\"normalize\"],\n        \"query_prefix\": current_settings.get(\"query_prefix\") or \"\",\n        \"passage_prefix\": current_settings.get(\"passage_prefix\") or \"\",\n        \"provider_type\": current_settings.get(\"provider_type\"),\n        \"index_name\": None,\n        \"multipass_indexing\": current_settings.get(\"multipass_indexing\", False),\n        \"embedding_precision\": current_settings[\"embedding_precision\"],\n        \"reduced_dimension\": current_settings.get(\"reduced_dimension\"),\n        \"enable_contextual_rag\": enable_contextual_rag,\n        \"contextual_rag_llm_name\": contextual_rag_llm_name,\n        \"contextual_rag_llm_provider\": contextual_rag_llm_provider,\n    }\n    return requests.post(\n        f\"{SEARCH_SETTINGS_URL}/set-new-search-settings\",\n        json=payload,\n        headers=user.headers,\n    )\n\n\ndef _cancel_new_embedding(user: DATestUser) -> None:\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/cancel-new-embedding\",\n        headers=user.headers,\n    )\n    response.raise_for_status()\n\n\ndef test_get_current_search_settings(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Verify that GET current search settings returns expected fields.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n\n    assert \"model_name\" in settings\n    assert \"model_dim\" in settings\n    assert \"enable_contextual_rag\" in settings\n    assert \"contextual_rag_llm_name\" in settings\n    assert \"contextual_rag_llm_provider\" in settings\n    assert \"index_name\" in settings\n    assert \"embedding_precision\" in settings\n\n\ndef test_get_all_search_settings(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Verify that GET all search settings returns current and secondary.\"\"\"\n    all_settings = _get_all_search_settings(admin_user)\n\n    assert \"current_settings\" in all_settings\n    assert \"secondary_settings\" in all_settings\n    assert all_settings[\"current_settings\"] is not None\n    assert \"model_name\" in all_settings[\"current_settings\"]\n\n\ndef test_get_secondary_search_settings_none_by_default(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Verify that no secondary search settings exist by default.\"\"\"\n    secondary = _get_secondary_search_settings(admin_user)\n    assert secondary is None\n\n\ndef test_set_contextual_rag_model(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Set contextual RAG LLM model and verify it persists.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = llm_provider.default_model_name\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, settings)\n\n    updated = _get_current_search_settings(admin_user)\n    assert updated[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert updated[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n\ndef test_unset_contextual_rag_model(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Set a contextual RAG model, then unset it and verify it becomes None.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = llm_provider.default_model_name\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, settings)\n\n    # Verify it's set\n    updated = _get_current_search_settings(admin_user)\n    assert updated[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert updated[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n    # Unset by disabling contextual RAG\n    updated[\"enable_contextual_rag\"] = False\n    updated[\"contextual_rag_llm_name\"] = None\n    updated[\"contextual_rag_llm_provider\"] = None\n    _update_inference_settings(admin_user, updated)\n\n    # Verify it's unset\n    final = _get_current_search_settings(admin_user)\n    assert final[\"contextual_rag_llm_name\"] is None\n    assert final[\"contextual_rag_llm_provider\"] is None\n\n\ndef test_change_contextual_rag_model(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Change contextual RAG from one model to another and verify the switch.\"\"\"\n    second_provider = LLMProviderManager.create(\n        name=\"second-provider\",\n        default_model_name=\"gpt-4o\",\n        user_performing_action=admin_user,\n    )\n\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = llm_provider.default_model_name\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, settings)\n\n    updated = _get_current_search_settings(admin_user)\n    assert updated[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert updated[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n    # Switch to a different model and provider\n    updated[\"enable_contextual_rag\"] = True\n    updated[\"contextual_rag_llm_name\"] = second_provider.default_model_name\n    updated[\"contextual_rag_llm_provider\"] = second_provider.name\n    _update_inference_settings(admin_user, updated)\n\n    final = _get_current_search_settings(admin_user)\n    assert final[\"contextual_rag_llm_name\"] == second_provider.default_model_name\n    assert final[\"contextual_rag_llm_provider\"] == second_provider.name\n\n\ndef test_change_contextual_rag_provider_only(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Change only the provider while keeping the same model name.\"\"\"\n    shared_model_name = llm_provider.default_model_name\n    second_provider = LLMProviderManager.create(\n        name=\"second-provider\",\n        default_model_name=shared_model_name,\n        user_performing_action=admin_user,\n    )\n\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = shared_model_name\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, settings)\n\n    updated = _get_current_search_settings(admin_user)\n    updated[\"enable_contextual_rag\"] = True\n    updated[\"contextual_rag_llm_provider\"] = second_provider.name\n    _update_inference_settings(admin_user, updated)\n\n    final = _get_current_search_settings(admin_user)\n    assert final[\"contextual_rag_llm_name\"] == shared_model_name\n    assert final[\"contextual_rag_llm_provider\"] == second_provider.name\n\n\ndef test_enable_contextual_rag_preserved_on_inference_update(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Verify that enable_contextual_rag cannot be toggled via update-inference-settings\n    because it is a preserved field.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    original_enable = settings[\"enable_contextual_rag\"]\n\n    # Attempt to flip the flag\n    settings[\"enable_contextual_rag\"] = not original_enable\n    settings[\"contextual_rag_llm_name\"] = None\n    settings[\"contextual_rag_llm_provider\"] = None\n    _update_inference_settings(admin_user, settings)\n\n    updated = _get_current_search_settings(admin_user)\n    assert updated[\"enable_contextual_rag\"] == original_enable\n\n\ndef test_model_name_preserved_on_inference_update(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Verify that model_name cannot be changed via update-inference-settings\n    because it is a preserved field.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    original_model_name = settings[\"model_name\"]\n\n    settings[\"model_name\"] = \"some-other-model\"\n    _update_inference_settings(admin_user, settings)\n\n    updated = _get_current_search_settings(admin_user)\n    assert updated[\"model_name\"] == original_model_name\n\n\ndef test_contextual_rag_settings_reflected_in_get_all(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Verify that contextual RAG updates appear in get-all-search-settings.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = llm_provider.default_model_name\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, settings)\n\n    all_settings = _get_all_search_settings(admin_user)\n    current = all_settings[\"current_settings\"]\n    assert current[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert current[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n\ndef test_update_contextual_rag_nonexistent_provider(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Updating with a provider that does not exist should return 400.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = \"some-model\"\n    settings[\"contextual_rag_llm_provider\"] = \"nonexistent-provider\"\n\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/update-inference-settings\",\n        json=settings,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 400\n    assert \"Provider nonexistent-provider not found\" in response.json()[\"detail\"]\n\n\ndef test_update_contextual_rag_nonexistent_model(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Updating with a valid provider but a model not in that provider should return 400.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = \"nonexistent-model\"\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/update-inference-settings\",\n        json=settings,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 400\n    assert (\n        f\"Model nonexistent-model not found in provider {llm_provider.name}\"\n        in response.json()[\"detail\"]\n    )\n\n\ndef test_update_contextual_rag_missing_provider_name(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Providing a model name without a provider name should return 400.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = \"some-model\"\n    settings[\"contextual_rag_llm_provider\"] = None\n\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/update-inference-settings\",\n        json=settings,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 400\n    assert \"Provider name and model name are required\" in response.json()[\"detail\"]\n\n\ndef test_update_contextual_rag_missing_model_name(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Providing a provider name without a model name should return 400.\"\"\"\n    settings = _get_current_search_settings(admin_user)\n    settings[\"enable_contextual_rag\"] = True\n    settings[\"contextual_rag_llm_name\"] = None\n    settings[\"contextual_rag_llm_provider\"] = llm_provider.name\n\n    response = requests.post(\n        f\"{SEARCH_SETTINGS_URL}/update-inference-settings\",\n        json=settings,\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 400\n    assert \"Provider name and model name are required\" in response.json()[\"detail\"]\n\n\ndef test_set_new_search_settings_with_contextual_rag(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Create new search settings with contextual RAG enabled and verify the\n    secondary settings contain the correct provider and model.\"\"\"\n    current = _get_current_search_settings(admin_user)\n\n    response = _set_new_search_settings(\n        user=admin_user,\n        current_settings=current,\n        enable_contextual_rag=True,\n        contextual_rag_llm_name=llm_provider.default_model_name,\n        contextual_rag_llm_provider=llm_provider.name,\n    )\n    response.raise_for_status()\n    assert \"id\" in response.json()\n\n    secondary = _get_secondary_search_settings(admin_user)\n    assert secondary is not None\n    assert secondary[\"enable_contextual_rag\"] is True\n    assert secondary[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert secondary[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n    _cancel_new_embedding(admin_user)\n\n\ndef test_set_new_search_settings_without_contextual_rag(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Create new search settings with contextual RAG disabled and verify\n    the secondary settings have no RAG provider.\"\"\"\n    current = _get_current_search_settings(admin_user)\n\n    response = _set_new_search_settings(\n        user=admin_user,\n        current_settings=current,\n        enable_contextual_rag=False,\n    )\n    response.raise_for_status()\n\n    secondary = _get_secondary_search_settings(admin_user)\n    assert secondary is not None\n    assert secondary[\"enable_contextual_rag\"] is False\n    assert secondary[\"contextual_rag_llm_name\"] is None\n    assert secondary[\"contextual_rag_llm_provider\"] is None\n\n    _cancel_new_embedding(admin_user)\n\n\ndef test_set_new_then_update_inference_settings(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Create new secondary settings, then update the current (primary) settings\n    with contextual RAG and verify both are visible through get-all.\"\"\"\n    current = _get_current_search_settings(admin_user)\n\n    # Create secondary settings without contextual RAG\n    response = _set_new_search_settings(\n        user=admin_user,\n        current_settings=current,\n        enable_contextual_rag=False,\n    )\n    response.raise_for_status()\n\n    # Update the *current* (primary) settings with a contextual RAG provider\n    current[\"enable_contextual_rag\"] = True\n    current[\"contextual_rag_llm_name\"] = llm_provider.default_model_name\n    current[\"contextual_rag_llm_provider\"] = llm_provider.name\n    _update_inference_settings(admin_user, current)\n\n    all_settings = _get_all_search_settings(admin_user)\n\n    primary = all_settings[\"current_settings\"]\n    assert primary[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert primary[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n    secondary = all_settings[\"secondary_settings\"]\n    assert secondary is not None\n    assert secondary[\"contextual_rag_llm_name\"] is None\n    assert secondary[\"contextual_rag_llm_provider\"] is None\n\n    _cancel_new_embedding(admin_user)\n\n\ndef test_set_new_search_settings_replaces_previous_secondary(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    llm_provider: DATestLLMProvider,\n) -> None:\n    \"\"\"Calling set-new-search-settings twice should retire the first secondary\n    and replace it with the second.\"\"\"\n    current = _get_current_search_settings(admin_user)\n\n    # First: no contextual RAG\n    resp1 = _set_new_search_settings(\n        user=admin_user,\n        current_settings=current,\n        enable_contextual_rag=False,\n    )\n    resp1.raise_for_status()\n    first_id = resp1.json()[\"id\"]\n\n    # Second: with contextual RAG\n    resp2 = _set_new_search_settings(\n        user=admin_user,\n        current_settings=current,\n        enable_contextual_rag=True,\n        contextual_rag_llm_name=llm_provider.default_model_name,\n        contextual_rag_llm_provider=llm_provider.name,\n    )\n    resp2.raise_for_status()\n    second_id = resp2.json()[\"id\"]\n\n    assert second_id != first_id\n\n    secondary = _get_secondary_search_settings(admin_user)\n    assert secondary is not None\n    assert secondary[\"enable_contextual_rag\"] is True\n    assert secondary[\"contextual_rag_llm_name\"] == llm_provider.default_model_name\n    assert secondary[\"contextual_rag_llm_provider\"] == llm_provider.name\n\n    _cancel_new_embedding(admin_user)\n"
  },
  {
    "path": "backend/tests/integration/tests/streaming_endpoints/test_chat_file_attachment.py",
    "content": "import mimetypes\nfrom typing import Any\n\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.file import FileManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.test_file_utils import create_test_image\nfrom tests.integration.common_utils.test_file_utils import create_test_text_file\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_send_message_with_image_attachment(admin_user: DATestUser) -> None:\n    \"\"\"Test sending a chat message with an attached image file.\"\"\"\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Create a simple test image\n    image_file = create_test_image(width=100, height=100, color=\"blue\")\n\n    # Upload the image file\n    file_descriptors, error = FileManager.upload_files(\n        files=[(\"test_image.png\", image_file)],\n        user_performing_action=admin_user,\n    )\n\n    assert not error, f\"File upload should succeed, but got error: {error}\"\n    assert len(file_descriptors) == 1, \"Should have uploaded one file\"\n    assert file_descriptors[0][\"type\"] == \"image\", \"File should be identified as image\"\n\n    # Create a chat session\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n\n    # Send a message with the image attachment\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=\"What color is this image?\",\n        user_performing_action=admin_user,\n        file_descriptors=file_descriptors,\n    )\n\n    # Verify that the message was processed successfully\n    assert response.error is None, \"Chat response should not have an error\"\n    assert (\n        \"blue\" in response.full_message.lower()\n    ), \"Chat response should contain the color of the image\"\n\n\ndef test_send_message_with_text_file_attachment(admin_user: DATestUser) -> None:\n    \"\"\"Test sending a chat message with an attached text file.\"\"\"\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # Create a simple test text file\n    text_file = create_test_text_file(\n        \"This is a test document.\\nIt has multiple lines.\\nThis is the third line.\"\n    )\n\n    # Upload the text file\n    file_descriptors, error = FileManager.upload_files(\n        files=[(\"test_document.txt\", text_file)],\n        user_performing_action=admin_user,\n    )\n\n    assert not error, f\"File upload should succeed, but got error: {error}\"\n    assert len(file_descriptors) == 1, \"Should have uploaded one file\"\n    assert file_descriptors[0][\"type\"] in [\n        \"plain_text\",\n        \"document\",\n    ], \"File should be identified as text or document\"\n\n    # Create a chat session\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n\n    # Send a message with the text file attachment\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=\"Repeat the contents of this file word for word.\",\n        user_performing_action=admin_user,\n        file_descriptors=file_descriptors,\n    )\n\n    # Verify that the message was processed successfully\n    assert response.error is None, \"Chat response should not have an error\"\n    assert (\n        \"third line\" in response.full_message.lower()\n    ), \"Chat response should contain the contents of the file\"\n\n\ndef _set_token_threshold(admin_user: DATestUser, threshold_k: int) -> None:\n    \"\"\"Set the file token count threshold via admin settings API.\"\"\"\n    response = requests.put(\n        f\"{API_SERVER_URL}/admin/settings\",\n        json={\"file_token_count_threshold_k\": threshold_k},\n        headers=admin_user.headers,\n    )\n    response.raise_for_status()\n\n\ndef _upload_raw(\n    filename: str,\n    content: bytes,\n    user: DATestUser,\n) -> dict[str, Any]:\n    \"\"\"Upload a file and return the full JSON response (user_files + rejected_files).\"\"\"\n    mime_type, _ = mimetypes.guess_type(filename)\n    headers = user.headers.copy()\n    headers.pop(\"Content-Type\", None)\n\n    response = requests.post(\n        f\"{API_SERVER_URL}/user/projects/file/upload\",\n        files=[(\"files\", (filename, content, mime_type or \"application/octet-stream\"))],\n        headers=headers,\n    )\n    response.raise_for_status()\n    return response.json()\n\n\ndef test_csv_over_token_threshold_uploaded_not_indexed(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"CSV exceeding token threshold is uploaded (accepted) but skips indexing.\"\"\"\n    _set_token_threshold(admin_user, threshold_k=1)\n    try:\n        # ~2000 tokens with default tokenizer, well over 1K threshold\n        content = (\"x \" * 100 + \"\\n\") * 20\n        result = _upload_raw(\"large.csv\", content.encode(), admin_user)\n\n        assert len(result[\"user_files\"]) == 1, \"CSV should be accepted\"\n        assert len(result[\"rejected_files\"]) == 0, \"CSV should not be rejected\"\n        assert (\n            result[\"user_files\"][0][\"status\"] == \"SKIPPED\"\n        ), \"CSV over threshold should be SKIPPED (uploaded but not indexed)\"\n        assert (\n            result[\"user_files\"][0][\"chunk_count\"] is None\n        ), \"Skipped file should have no chunks\"\n    finally:\n        _set_token_threshold(admin_user, threshold_k=200)\n\n\ndef test_csv_under_token_threshold_uploaded_and_indexed(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"CSV under token threshold is uploaded and queued for indexing.\"\"\"\n    _set_token_threshold(admin_user, threshold_k=200)\n    try:\n        content = \"col1,col2\\na,b\\n\"\n        result = _upload_raw(\"small.csv\", content.encode(), admin_user)\n\n        assert len(result[\"user_files\"]) == 1, \"CSV should be accepted\"\n        assert len(result[\"rejected_files\"]) == 0, \"CSV should not be rejected\"\n        assert (\n            result[\"user_files\"][0][\"status\"] == \"PROCESSING\"\n        ), \"CSV under threshold should be PROCESSING (queued for indexing)\"\n    finally:\n        _set_token_threshold(admin_user, threshold_k=200)\n\n\ndef test_txt_over_token_threshold_rejected(\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"Non-exempt file exceeding token threshold is rejected entirely.\"\"\"\n    _set_token_threshold(admin_user, threshold_k=1)\n    try:\n        # ~2000 tokens, well over 1K threshold. Unlike CSV, .txt is not\n        # exempt from the threshold so the file should be rejected.\n        content = (\"x \" * 100 + \"\\n\") * 20\n        result = _upload_raw(\"big.txt\", content.encode(), admin_user)\n\n        assert len(result[\"user_files\"]) == 0, \"File should not be accepted\"\n        assert len(result[\"rejected_files\"]) == 1, \"File should be rejected\"\n        assert \"token limit\" in result[\"rejected_files\"][0][\"reason\"].lower()\n    finally:\n        _set_token_threshold(admin_user, threshold_k=200)\n"
  },
  {
    "path": "backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py",
    "content": "import time\n\nfrom onyx.configs.constants import MessageType\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.conftest import DocumentBuilderType\n\nTERMINATED_RESPONSE_MESSAGE = (\n    \"Response was terminated prior to completion, try regenerating.\"\n)\n\nLOADING_RESPONSE_MESSAGE = \"Message is loading... Please refresh the page soon.\"\n\n\ndef test_send_two_messages(basic_user: DATestUser) -> None:\n    # Create a chat session\n    test_chat_session = ChatSessionManager.create(\n        persona_id=0,  # Use default persona\n        description=\"Test chat session for multiple messages\",\n        user_performing_action=basic_user,\n    )\n\n    # Send a message to create some data\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=\"hello\",\n        user_performing_action=basic_user,\n    )\n    # Verify that the message was processed successfully\n    assert response.error is None, \"Chat response should not have an error\"\n    assert len(response.full_message) > 0, \"Chat response should not be empty\"\n\n    # Verify that the chat session can be retrieved before deletion\n    chat_history = ChatSessionManager.get_chat_history(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n    assert (\n        len(chat_history) == 3\n    ), \"Chat session should have 1 system message, 1 user message, and 1 assistant message\"\n\n    response2 = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=\"hello again\",\n        user_performing_action=basic_user,\n        parent_message_id=response.assistant_message_id,\n    )\n\n    assert response2.error is None, \"Chat response should not have an error\"\n    assert len(response2.full_message) > 0, \"Chat response should not be empty\"\n\n    # Verify that the chat session can be retrieved before deletion\n    chat_history2 = ChatSessionManager.get_chat_history(\n        chat_session=test_chat_session,\n        user_performing_action=basic_user,\n    )\n    assert (\n        len(chat_history2) == 5\n    ), \"Chat session should have 1 system message, 2 user messages, and 2 assistant messages\"\n\n\ndef test_send_message_simple_with_history(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=\"this is a test message\",\n        user_performing_action=admin_user,\n    )\n\n    assert response.error is None, \"Chat response should not have an error\"\n    assert len(response.full_message) > 0\n\n\ndef test_send_message__basic_searches(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n    document_builder: DocumentBuilderType,\n) -> None:\n    MESSAGE = \"run a search for 'test'. Use the internal search tool.\"\n    SHORT_DOC_CONTENT = \"test\"\n    LONG_DOC_CONTENT = \"blah blah blah blah\" * 100\n\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    short_doc = document_builder([SHORT_DOC_CONTENT])[0]\n\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=MESSAGE,\n        user_performing_action=admin_user,\n    )\n    assert response.error is None, \"Chat response should not have an error\"\n    assert response.top_documents is not None\n    assert len(response.top_documents) == 1\n    assert response.top_documents[0].document_id == short_doc.id\n\n    # make sure this doc is really long so that it will be split into multiple chunks\n    long_doc = document_builder([LONG_DOC_CONTENT])[0]\n\n    # new chat session for simplicity\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n    response = ChatSessionManager.send_message(\n        chat_session_id=test_chat_session.id,\n        message=MESSAGE,\n        user_performing_action=admin_user,\n    )\n    assert response.error is None, \"Chat response should not have an error\"\n    assert response.top_documents is not None\n    assert len(response.top_documents) == 2\n    # short doc should be more relevant and thus first\n    assert response.top_documents[0].document_id == short_doc.id\n    assert response.top_documents[1].document_id == long_doc.id\n\n\ndef test_send_message_disconnect_and_cleanup(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    \"\"\"\n    Test that when a client disconnects mid-stream:\n    1. Client sends a message and disconnects after receiving just 1 packet\n    2. Client checks to see that their message ends up completed\n\n    Note: There is an interim period (between disconnect and checkup) where we expect\n    to see some sort of 'loading' message.\n    \"\"\"\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)\n\n    # Send a message and disconnect after receiving just 1 packet\n    ChatSessionManager.send_message_with_disconnect(\n        chat_session_id=test_chat_session.id,\n        message=\"What are some important events that happened today?\",\n        user_performing_action=admin_user,\n        disconnect_after_packets=1,\n    )\n\n    # Every 5 seconds, check if we have the latest state of the chat session up to a minute\n    increment_seconds = 1\n    max_seconds = 60\n    msg = TERMINATED_RESPONSE_MESSAGE\n\n    for _ in range(max_seconds // increment_seconds):\n        time.sleep(increment_seconds)\n\n        # Get the chat history\n        chat_history = ChatSessionManager.get_chat_history(\n            chat_session=test_chat_session,\n            user_performing_action=admin_user,\n        )\n\n        # Find the assistant message\n        assistant_message = None\n        for chat_obj in chat_history:\n            if chat_obj.message_type == MessageType.ASSISTANT:\n                assistant_message = chat_obj\n                break\n\n        assert assistant_message is not None, \"Assistant message should exist\"\n        msg = assistant_message.message\n\n        if msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE:\n            break\n\n    assert (\n        msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE\n    ), f\"Assistant message should no longer be the terminated response message after cleanup, got: {msg}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/tags/test_tags.py",
    "content": "from onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Document\nfrom onyx.db.tag import get_structured_tags_for_document\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_tag_creation_and_update(reset: None) -> None:  # noqa: ARG001\n    # create admin user\n    admin_user: DATestUser = UserManager.create(email=\"admin@onyx.app\")\n\n    # create a minimal file connector\n    cc_pair = CCPairManager.create_from_scratch(\n        name=\"KG-Test-FileConnector\",\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": [],\n            \"file_names\": [],\n            \"zip_metadata_file_id\": None,\n        },\n        user_performing_action=admin_user,\n    )\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    api_key.headers.update(admin_user.headers)\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # create document\n    doc1_expected_metadata: dict[str, str | list[str]] = {\n        \"value\": \"val\",\n        \"multiple_list\": [\"a\", \"b\", \"c\"],\n        \"single_list\": [\"x\"],\n    }\n    doc1_expected_tags: set[tuple[str, str, bool]] = {\n        (\"value\", \"val\", False),\n        (\"multiple_list\", \"a\", True),\n        (\"multiple_list\", \"b\", True),\n        (\"multiple_list\", \"c\", True),\n        (\"single_list\", \"x\", True),\n    }\n    doc1 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=\"Dummy content\",\n        document_id=\"doc1\",\n        metadata=doc1_expected_metadata,\n        api_key=api_key,\n    )\n\n    # these are added by the connector\n    doc1_expected_metadata[\"document_id\"] = \"doc1\"\n    doc1_expected_tags.add((\"document_id\", \"doc1\", False))\n\n    # get document from db\n    with get_session_with_current_tenant() as db_session:\n        doc1_db = db_session.query(Document).filter(Document.id == doc1.id).first()\n        assert doc1_db is not None\n        assert doc1_db.id == doc1.id\n\n        doc1_tags = doc1_db.tags\n\n    # check tags\n    doc1_tags_data: set[tuple[str, str, bool]] = {\n        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_tags\n    }\n    assert doc1_tags_data == doc1_expected_tags\n\n    # check structured tags\n    with get_session_with_current_tenant() as db_session:\n        doc1_metadata = get_structured_tags_for_document(doc1.id, db_session)\n    assert doc1_metadata == doc1_expected_metadata\n\n    # update metadata\n    doc1_new_expected_metadata: dict[str, str | list[str]] = {\n        \"value\": \"val2\",\n        \"multiple_list\": [\"a\", \"d\"],\n        \"new_value\": \"new_val\",\n    }\n    doc1_new_expected_tags: set[tuple[str, str, bool]] = {\n        (\"value\", \"val2\", False),\n        (\"multiple_list\", \"a\", True),\n        (\"multiple_list\", \"d\", True),\n        (\"new_value\", \"new_val\", False),\n    }\n    doc1_new = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=\"Dummy content\",\n        document_id=\"doc1\",\n        metadata=doc1_new_expected_metadata,\n        api_key=api_key,\n    )\n    assert doc1_new.id == doc1.id\n\n    # these are added by the connector\n    doc1_new_expected_metadata[\"document_id\"] = \"doc1\"\n    doc1_new_expected_tags.add((\"document_id\", \"doc1\", False))\n\n    # get new document from db\n    with get_session_with_current_tenant() as db_session:\n        doc1_new_db = db_session.query(Document).filter(Document.id == doc1.id).first()\n        assert doc1_new_db is not None\n        assert doc1_new_db.id == doc1.id\n\n        doc1_new_tags = doc1_new_db.tags\n\n    # check tags\n    doc1_new_tags_data: set[tuple[str, str, bool]] = {\n        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_new_tags\n    }\n    assert doc1_new_tags_data == doc1_new_expected_tags\n\n    # check structured tags\n    with get_session_with_current_tenant() as db_session:\n        doc1_new_metadata = get_structured_tags_for_document(doc1.id, db_session)\n    assert doc1_new_metadata == doc1_new_expected_metadata\n\n\ndef test_tag_sharing(reset: None) -> None:  # noqa: ARG001\n    # create admin user\n    admin_user: DATestUser = UserManager.create(email=\"admin@onyx.app\")\n\n    # create a minimal file connector\n    cc_pair = CCPairManager.create_from_scratch(\n        name=\"KG-Test-FileConnector\",\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": [],\n            \"file_names\": [],\n            \"zip_metadata_file_id\": None,\n        },\n        user_performing_action=admin_user,\n    )\n    api_key = APIKeyManager.create(user_performing_action=admin_user)\n    api_key.headers.update(admin_user.headers)\n    LLMProviderManager.create(user_performing_action=admin_user)\n\n    # create documents\n    doc1_expected_metadata: dict[str, str | list[str]] = {\n        \"value\": \"val\",\n        \"list\": [\"a\", \"b\"],\n        \"same_key\": \"x\",\n    }\n    doc1_expected_tags: set[tuple[str, str, bool]] = {\n        (\"value\", \"val\", False),\n        (\"list\", \"a\", True),\n        (\"list\", \"b\", True),\n        (\"same_key\", \"x\", False),\n    }\n    doc1 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=\"Dummy content\",\n        document_id=\"doc1\",\n        metadata=doc1_expected_metadata,\n        api_key=api_key,\n    )\n\n    doc2_expected_metadata: dict[str, str | list[str]] = {\n        \"value\": \"val\",\n        \"list\": [\"a\", \"c\"],\n        \"same_key\": [\"x\"],\n    }\n    doc2_expected_tags: set[tuple[str, str, bool]] = {\n        (\"value\", \"val\", False),\n        (\"list\", \"a\", True),\n        (\"list\", \"c\", True),\n        (\"same_key\", \"x\", True),\n    }\n    doc2 = DocumentManager.seed_doc_with_content(\n        cc_pair=cc_pair,\n        content=\"Dummy content\",\n        document_id=\"doc2\",\n        metadata=doc2_expected_metadata,\n        api_key=api_key,\n    )\n\n    # these are added by the connector\n    doc1_expected_metadata[\"document_id\"] = \"doc1\"\n    doc1_expected_tags.add((\"document_id\", \"doc1\", False))\n    doc2_expected_metadata[\"document_id\"] = \"doc2\"\n    doc2_expected_tags.add((\"document_id\", \"doc2\", False))\n\n    # get documents from db\n    with get_session_with_current_tenant() as db_session:\n        doc1_db = db_session.query(Document).filter(Document.id == doc1.id).first()\n        doc2_db = db_session.query(Document).filter(Document.id == doc2.id).first()\n        assert doc1_db is not None\n        assert doc1_db.id == doc1.id\n        assert doc2_db is not None\n        assert doc2_db.id == doc2.id\n\n        doc1_tags = doc1_db.tags\n        doc2_tags = doc2_db.tags\n\n    # check tags\n    doc1_tags_data: set[tuple[str, str, bool]] = {\n        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc1_tags\n    }\n    assert doc1_tags_data == doc1_expected_tags\n\n    doc2_tags_data: set[tuple[str, str, bool]] = {\n        (tag.tag_key, tag.tag_value, tag.is_list) for tag in doc2_tags\n    }\n    assert doc2_tags_data == doc2_expected_tags\n\n    # check tag sharing\n    doc1_tagkv_id: dict[tuple[str, str], int] = {\n        (tag.tag_key, tag.tag_value): tag.id for tag in doc1_tags\n    }\n    doc2_tagkv_id: dict[tuple[str, str], int] = {\n        (tag.tag_key, tag.tag_value): tag.id for tag in doc2_tags\n    }\n    assert doc1_tagkv_id[(\"value\", \"val\")] == doc2_tagkv_id[(\"value\", \"val\")]\n    assert doc1_tagkv_id[(\"list\", \"a\")] == doc2_tagkv_id[(\"list\", \"a\")]\n    assert doc1_tagkv_id[(\"same_key\", \"x\")] != doc2_tagkv_id[(\"same_key\", \"x\")]\n"
  },
  {
    "path": "backend/tests/integration/tests/tools/test_force_tool_use.py",
    "content": "\"\"\"\nIntegration test for forced tool use to verify that web_search can be forced.\nThis test verifies that forcing a tool use works through the complete API flow.\n\"\"\"\n\nimport pytest\nfrom sqlalchemy import select\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.models import Tool\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.test_models import DATestImageGenerationConfig\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ToolName\n\n\ndef test_force_tool_use(\n    basic_user: DATestUser,\n    image_generation_config: DATestImageGenerationConfig,  # noqa: ARG001\n) -> None:\n    with get_session_with_current_tenant() as db_session:\n        image_generation_tool = db_session.execute(\n            select(Tool).where(Tool.in_code_tool_id == \"ImageGenerationTool\")\n        ).scalar_one_or_none()\n        assert image_generation_tool is not None, \"ImageGenerationTool must exist\"\n        image_generation_tool_id = image_generation_tool.id\n\n    # Create a chat session\n    chat_session = ChatSessionManager.create(user_performing_action=basic_user)\n\n    # Send a simple message that wouldn't normally trigger image generation\n    # but force the image generation tool to be used\n    message = \"hi\"\n\n    analyzed_response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=message,\n        user_performing_action=basic_user,\n        forced_tool_ids=[image_generation_tool_id],\n    )\n\n    assert analyzed_response.error is None, \"Chat response should not have an error\"\n\n    image_generation_tool_used = any(\n        tool.tool_name == ToolName.IMAGE_GENERATION\n        for tool in analyzed_response.used_tools\n    )\n    assert (\n        image_generation_tool_used\n    ), \"Image generation tool should have been forced to run\"\n\n\nif __name__ == \"__main__\":\n    # Run with: python -m dotenv -f .vscode/.env run --\n    # python -m pytest backend/tests/integration/tests/tools/test_force_tool_use.py -v -s\n    pytest.main([__file__, \"-v\", \"-s\"])\n"
  },
  {
    "path": "backend/tests/integration/tests/tools/test_image_generation_streaming.py",
    "content": "\"\"\"\nIntegration test for image generation heartbeat streaming through the /send-message API.\nThis test verifies that heartbeat packets are properly streamed through the complete API flow.\n\"\"\"\n\nimport time\n\nimport pytest\n\nfrom onyx.server.query_and_chat.streaming_models import StreamingType\nfrom onyx.tools.tool_implementations.images.image_generation_tool import (\n    HEARTBEAT_INTERVAL,\n)\nfrom tests.integration.common_utils.managers.chat import ChatSessionManager\nfrom tests.integration.common_utils.test_models import DATestImageGenerationConfig\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import ToolName\n\nART_PERSONA_ID = -3\n\n\ndef test_image_generation_streaming(\n    basic_user: DATestUser,\n    llm_provider: DATestLLMProvider,  # noqa: ARG001\n    image_generation_config: DATestImageGenerationConfig,  # noqa: ARG001\n) -> None:\n    \"\"\"\n    Test image generation to verify:\n    1. The image generation tool is invoked successfully\n    2. Heartbeat packets are streamed during generation\n    3. The response contains the generated image information\n\n    This test uses the actual API without any mocking.\n    \"\"\"\n    # Create a chat session with this persona\n    chat_session = ChatSessionManager.create(user_performing_action=basic_user)\n\n    # Send a message that should trigger image generation\n    # Use explicit instructions to ensure the image generation tool is used\n    message = \"Please generate an image of a beautiful sunset over the ocean. Use the image generation tool to create this image.\"\n\n    start_time = time.monotonic()\n    analyzed_response = ChatSessionManager.send_message(\n        chat_session_id=chat_session.id,\n        message=message,\n        user_performing_action=basic_user,\n    )\n    total_time = time.monotonic() - start_time\n\n    assert analyzed_response.error is None, \"Chat response should not have an error\"\n\n    # 1. Check if image generation tool was used\n    image_gen_used = any(\n        tool.tool_name == ToolName.IMAGE_GENERATION\n        for tool in analyzed_response.used_tools\n    )\n    assert image_gen_used\n\n    # Verify we received heartbeat packets during image generation\n    # Image generation typically takes a few seconds and sends heartbeats\n    # every HEARTBEAT_INTERVAL seconds\n    expected_heartbeat_packets = max(1, int(total_time / HEARTBEAT_INTERVAL) - 1)\n    assert len(analyzed_response.heartbeat_packets) >= expected_heartbeat_packets, (\n        f\"Expected at least {expected_heartbeat_packets} heartbeats for {total_time:.2f}s execution, \"\n        f\"but got {len(analyzed_response.heartbeat_packets)}\"\n    )\n\n    # Verify the heartbeat packets have the expected structure\n    for packet in analyzed_response.heartbeat_packets:\n        assert \"obj\" in packet, \"Heartbeat packet should have 'obj' field\"\n        assert (\n            packet[\"obj\"].get(\"type\") == StreamingType.IMAGE_GENERATION_HEARTBEAT.value\n        ), f\"Expected heartbeat type to be {StreamingType.IMAGE_GENERATION_HEARTBEAT.value}, got {packet['obj'].get('type')}\"\n    # 4. Verify image generation tool delta packets with actual image data\n    image_tool_results = [\n        tool\n        for tool in analyzed_response.used_tools\n        if tool.tool_name == ToolName.IMAGE_GENERATION\n    ]\n    assert len(image_tool_results) > 0, \"Should have image generation tool results\"\n\n    image_tool = image_tool_results[0]\n    assert len(image_tool.images) > 0, \"Should have generated at least one image\"\n\n\nif __name__ == \"__main__\":\n    # Run with: python -m dotenv -f .vscode/.env run --\n    # python -m pytest tests/integration/tests/tools/test_image_generation_heartbeat.py -v -s\n    pytest.main([__file__, \"-v\", \"-s\"])\n"
  },
  {
    "path": "backend/tests/integration/tests/usergroup/test_add_users_to_group.py",
    "content": "import os\nfrom uuid import uuid4\n\nimport pytest\nimport requests\n\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import DATestUserGroup\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_add_users_to_group(reset: None) -> None:  # noqa: ARG001\n    admin_user: DATestUser = UserManager.create(name=\"admin_for_add_user\")\n    user_to_add: DATestUser = UserManager.create(name=\"basic_user_to_add\")\n\n    user_group: DATestUserGroup = UserGroupManager.create(\n        name=\"add-user-test-group\",\n        user_ids=[admin_user.id],\n        user_performing_action=admin_user,\n    )\n\n    UserGroupManager.wait_for_sync(\n        user_performing_action=admin_user,\n        user_groups_to_check=[user_group],\n    )\n\n    updated_user_group = UserGroupManager.add_users(\n        user_group=user_group,\n        user_ids=[user_to_add.id],\n        user_performing_action=admin_user,\n    )\n\n    fetched_user_groups = UserGroupManager.get_all(user_performing_action=admin_user)\n    fetched_user_group = next(\n        group for group in fetched_user_groups if group.id == updated_user_group.id\n    )\n\n    fetched_user_ids = {user.id for user in fetched_user_group.users}\n    assert admin_user.id in fetched_user_ids\n    assert user_to_add.id in fetched_user_ids\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_add_users_to_group_invalid_user(reset: None) -> None:  # noqa: ARG001\n    admin_user: DATestUser = UserManager.create(name=\"admin_for_add_user_invalid\")\n\n    user_group: DATestUserGroup = UserGroupManager.create(\n        name=\"add-user-invalid-test-group\",\n        user_ids=[admin_user.id],\n        user_performing_action=admin_user,\n    )\n\n    invalid_user_id = str(uuid4())\n    response = requests.post(\n        f\"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}/add-users\",\n        json={\"user_ids\": [invalid_user_id]},\n        headers=admin_user.headers,\n    )\n\n    assert response.status_code == 404\n    assert \"not found\" in response.text.lower()\n"
  },
  {
    "path": "backend/tests/integration/tests/usergroup/test_group_membership_updates_user_permissions.py",
    "content": "import os\n\nimport pytest\n\nfrom onyx.db.engine.sql_engine import get_session_with_current_tenant\nfrom onyx.db.enums import Permission\nfrom onyx.db.models import PermissionGrant\nfrom onyx.db.models import UserGroup as UserGroupModel\nfrom onyx.db.permissions import recompute_permissions_for_group__no_commit\nfrom onyx.db.permissions import recompute_user_permissions__no_commit\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_user_gets_permissions_when_added_to_group(\n    reset: None,  # noqa: ARG001\n) -> None:\n    admin_user: DATestUser = UserManager.create(name=\"admin_for_perm_test\")\n    basic_user: DATestUser = UserManager.create(name=\"basic_user_for_perm_test\")\n\n    # basic_user starts with only \"basic\" from the default group\n    initial_permissions = UserManager.get_permissions(basic_user)\n    assert \"basic\" in initial_permissions\n    assert \"add:agents\" not in initial_permissions\n\n    # Create a new group and add basic_user\n    group = UserGroupManager.create(\n        name=\"perm-test-group\",\n        user_ids=[admin_user.id, basic_user.id],\n        user_performing_action=admin_user,\n    )\n\n    # Grant a non-basic permission to the group and recompute\n    with get_session_with_current_tenant() as db_session:\n        db_group = db_session.get(UserGroupModel, group.id)\n        assert db_group is not None\n        db_session.add(\n            PermissionGrant(\n                group_id=db_group.id,\n                permission=Permission.ADD_AGENTS,\n                grant_source=\"SYSTEM\",\n            )\n        )\n        db_session.flush()\n        recompute_user_permissions__no_commit(basic_user.id, db_session)\n        db_session.commit()\n\n    # Verify the user gained the new permission (expanded includes read:agents)\n    updated_permissions = UserManager.get_permissions(basic_user)\n    assert (\n        \"add:agents\" in updated_permissions\n    ), f\"User should have 'add:agents' after group grant, got: {updated_permissions}\"\n    assert (\n        \"read:agents\" in updated_permissions\n    ), f\"User should have implied 'read:agents', got: {updated_permissions}\"\n    assert \"basic\" in updated_permissions\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_group_permission_change_propagates_to_all_members(\n    reset: None,  # noqa: ARG001\n) -> None:\n    admin_user: DATestUser = UserManager.create(name=\"admin_propagate\")\n    user_a: DATestUser = UserManager.create(name=\"user_a_propagate\")\n    user_b: DATestUser = UserManager.create(name=\"user_b_propagate\")\n\n    group = UserGroupManager.create(\n        name=\"propagate-test-group\",\n        user_ids=[admin_user.id, user_a.id, user_b.id],\n        user_performing_action=admin_user,\n    )\n\n    # Neither user should have add:agents yet\n    for u in (user_a, user_b):\n        assert \"add:agents\" not in UserManager.get_permissions(u)\n\n    # Grant add:agents to the group, then batch-recompute\n    with get_session_with_current_tenant() as db_session:\n        grant = PermissionGrant(\n            group_id=group.id,\n            permission=Permission.ADD_AGENTS,\n            grant_source=\"SYSTEM\",\n        )\n        db_session.add(grant)\n        db_session.flush()\n        recompute_permissions_for_group__no_commit(group.id, db_session)\n        db_session.commit()\n\n    # Both users should now have the permission (plus implied read:agents)\n    for u in (user_a, user_b):\n        perms = UserManager.get_permissions(u)\n        assert \"add:agents\" in perms, f\"{u.id} missing add:agents: {perms}\"\n        assert \"read:agents\" in perms, f\"{u.id} missing implied read:agents: {perms}\"\n\n    # Soft-delete the grant and recompute — permission should be removed\n    with get_session_with_current_tenant() as db_session:\n        db_grant = (\n            db_session.query(PermissionGrant)\n            .filter_by(group_id=group.id, permission=Permission.ADD_AGENTS)\n            .first()\n        )\n        assert db_grant is not None\n        db_grant.is_deleted = True\n        db_session.flush()\n        recompute_permissions_for_group__no_commit(group.id, db_session)\n        db_session.commit()\n\n    for u in (user_a, user_b):\n        perms = UserManager.get_permissions(u)\n        assert \"add:agents\" not in perms, f\"{u.id} still has add:agents: {perms}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/usergroup/test_new_group_gets_basic_permission.py",
    "content": "import os\n\nimport pytest\n\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_new_group_gets_basic_permission(reset: None) -> None:  # noqa: ARG001\n    admin_user: DATestUser = UserManager.create(name=\"admin_for_basic_perm\")\n\n    user_group = UserGroupManager.create(\n        name=\"basic-perm-test-group\",\n        user_ids=[admin_user.id],\n        user_performing_action=admin_user,\n    )\n\n    permissions = UserGroupManager.get_permissions(\n        user_group=user_group,\n        user_performing_action=admin_user,\n    )\n\n    assert (\n        \"basic\" in permissions\n    ), f\"New group should have 'basic' permission, got: {permissions}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/usergroup/test_user_group_deletion.py",
    "content": "\"\"\"\nThis tests the deletion of a user group with the following foreign key constraints:\n- connector_credential_pair\n- user\n- credential\n- llm_provider\n- document_set\n- token_rate_limit (Not Implemented)\n- persona\n\"\"\"\n\nimport os\n\nimport pytest\n\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.credential import CredentialManager\nfrom tests.integration.common_utils.managers.document_set import DocumentSetManager\nfrom tests.integration.common_utils.managers.llm_provider import LLMProviderManager\nfrom tests.integration.common_utils.managers.persona import PersonaManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestCredential\nfrom tests.integration.common_utils.test_models import DATestDocumentSet\nfrom tests.integration.common_utils.test_models import DATestLLMProvider\nfrom tests.integration.common_utils.test_models import DATestPersona\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import DATestUserGroup\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_user_group_deletion(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,  # noqa: ARG001\n) -> None:\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # create connectors\n    cc_pair = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # Create user group with a cc_pair and a user\n    user_group: DATestUserGroup = UserGroupManager.create(\n        user_ids=[admin_user.id],\n        cc_pair_ids=[cc_pair.id],\n        user_performing_action=admin_user,\n    )\n    cc_pair.groups = [user_group.id]\n\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group], user_performing_action=admin_user\n    )\n    UserGroupManager.verify(\n        user_group=user_group,\n        user_performing_action=admin_user,\n    )\n    CCPairManager.verify(\n        cc_pair=cc_pair,\n        user_performing_action=admin_user,\n    )\n\n    # Create other objects that are related to the user group\n    credential: DATestCredential = CredentialManager.create(\n        groups=[user_group.id],\n        user_performing_action=admin_user,\n    )\n    document_set: DATestDocumentSet = DocumentSetManager.create(\n        cc_pair_ids=[cc_pair.id],\n        groups=[user_group.id],\n        user_performing_action=admin_user,\n    )\n    llm_provider: DATestLLMProvider = LLMProviderManager.create(\n        groups=[user_group.id],\n        user_performing_action=admin_user,\n    )\n    persona: DATestPersona = PersonaManager.create(\n        groups=[user_group.id],\n        user_performing_action=admin_user,\n    )\n\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group], user_performing_action=admin_user\n    )\n    UserGroupManager.verify(\n        user_group=user_group,\n        user_performing_action=admin_user,\n    )\n\n    # Delete the user group\n    UserGroupManager.delete(\n        user_group=user_group,\n        user_performing_action=admin_user,\n    )\n\n    UserGroupManager.wait_for_deletion_completion(\n        user_groups_to_check=[user_group], user_performing_action=admin_user\n    )\n\n    # Set our expected local representations to empty\n    credential.groups = []\n    document_set.groups = []\n    llm_provider.groups = []\n    persona.groups = []\n\n    # Verify that the local representations were updated\n    CredentialManager.verify(\n        credential=credential,\n        user_performing_action=admin_user,\n    )\n\n    DocumentSetManager.verify(\n        document_set=document_set,\n        user_performing_action=admin_user,\n    )\n\n    LLMProviderManager.verify(\n        llm_provider=llm_provider,\n        user_performing_action=admin_user,\n    )\n\n    PersonaManager.verify(\n        persona=persona,\n        user_performing_action=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/usergroup/test_usergroup_syncing.py",
    "content": "import os\n\nimport pytest\n\nfrom onyx.server.documents.models import DocumentSource\nfrom tests.integration.common_utils.constants import NUM_DOCS\nfrom tests.integration.common_utils.managers.api_key import APIKeyManager\nfrom tests.integration.common_utils.managers.cc_pair import CCPairManager\nfrom tests.integration.common_utils.managers.document import DocumentManager\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestAPIKey\nfrom tests.integration.common_utils.test_models import DATestUser\nfrom tests.integration.common_utils.test_models import DATestUserGroup\nfrom tests.integration.common_utils.vespa import vespa_fixture\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"User group tests are enterprise only\",\n)\ndef test_removing_connector(\n    reset: None,  # noqa: ARG001\n    vespa_client: vespa_fixture,\n) -> None:\n    # Creating an admin user (first user created is automatically an admin)\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n\n    # create api key\n    api_key: DATestAPIKey = APIKeyManager.create(\n        user_performing_action=admin_user,\n    )\n\n    # create connectors\n    cc_pair_1 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n    cc_pair_2 = CCPairManager.create_from_scratch(\n        source=DocumentSource.INGESTION_API,\n        user_performing_action=admin_user,\n    )\n\n    # seed documents\n    cc_pair_1.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_1,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    cc_pair_2.documents = DocumentManager.seed_dummy_docs(\n        cc_pair=cc_pair_2,\n        num_docs=NUM_DOCS,\n        api_key=api_key,\n    )\n\n    # Create user group\n    user_group_1: DATestUserGroup = UserGroupManager.create(\n        cc_pair_ids=[cc_pair_1.id, cc_pair_2.id],\n        user_performing_action=admin_user,\n    )\n\n    UserGroupManager.wait_for_sync(\n        user_groups_to_check=[user_group_1], user_performing_action=admin_user\n    )\n\n    UserGroupManager.verify(\n        user_group=user_group_1,\n        user_performing_action=admin_user,\n    )\n\n    # make sure cc_pair_1 docs are user_group_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        group_names=[user_group_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # make sure cc_pair_2 docs are user_group_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        group_names=[user_group_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # remove cc_pair_2 from document set\n    user_group_1.cc_pair_ids = [cc_pair_1.id]\n    UserGroupManager.edit(\n        user_group_1,\n        user_performing_action=admin_user,\n    )\n\n    UserGroupManager.wait_for_sync(\n        user_performing_action=admin_user,\n    )\n\n    # make sure cc_pair_1 docs are user_group_1 only\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_1,\n        group_names=[user_group_1.name],\n        doc_creating_user=admin_user,\n    )\n\n    # make sure cc_pair_2 docs have no user group\n    DocumentManager.verify(\n        vespa_client=vespa_client,\n        cc_pair=cc_pair_2,\n        group_names=[],\n        doc_creating_user=admin_user,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_default_group_assignment.py",
    "content": "\"\"\"Integration tests for default group assignment on user registration.\n\nVerifies that:\n- The first registered user is assigned to the Admin default group\n- Subsequent registered users are assigned to the Basic default group\n- account_type is set to STANDARD for email/password registrations\n\"\"\"\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef test_default_group_assignment_on_registration(reset: None) -> None:  # noqa: ARG001\n    # Register first user — should become admin\n    admin_user: DATestUser = UserManager.create(name=\"first_user\")\n    assert admin_user.role == UserRole.ADMIN\n\n    # Register second user — should become basic\n    basic_user: DATestUser = UserManager.create(name=\"second_user\")\n    assert basic_user.role == UserRole.BASIC\n\n    # Fetch all groups including default ones\n    all_groups = UserGroupManager.get_all(\n        user_performing_action=admin_user,\n        include_default=True,\n    )\n\n    # Find the default Admin and Basic groups\n    admin_group = next(\n        (g for g in all_groups if g.name == \"Admin\" and g.is_default), None\n    )\n    basic_group = next(\n        (g for g in all_groups if g.name == \"Basic\" and g.is_default), None\n    )\n    assert admin_group is not None, \"Admin default group not found\"\n    assert basic_group is not None, \"Basic default group not found\"\n\n    # Verify admin user is in Admin group and NOT in Basic group\n    admin_group_user_ids = {str(u.id) for u in admin_group.users}\n    basic_group_user_ids = {str(u.id) for u in basic_group.users}\n\n    assert (\n        admin_user.id in admin_group_user_ids\n    ), \"First user should be in Admin default group\"\n    assert (\n        admin_user.id not in basic_group_user_ids\n    ), \"First user should NOT be in Basic default group\"\n\n    # Verify basic user is in Basic group and NOT in Admin group\n    assert (\n        basic_user.id in basic_group_user_ids\n    ), \"Second user should be in Basic default group\"\n    assert (\n        basic_user.id not in admin_group_user_ids\n    ), \"Second user should NOT be in Admin default group\"\n\n    # Verify account_type is STANDARD for both users via user listing API\n    paginated_result = UserManager.get_user_page(\n        user_performing_action=admin_user,\n        page_num=0,\n        page_size=10,\n    )\n    users_by_id = {str(u.id): u for u in paginated_result.items}\n\n    admin_snapshot = users_by_id.get(admin_user.id)\n    basic_snapshot = users_by_id.get(basic_user.id)\n    assert admin_snapshot is not None, \"Admin user not found in user listing\"\n    assert basic_snapshot is not None, \"Basic user not found in user listing\"\n\n    assert (\n        admin_snapshot.account_type == AccountType.STANDARD\n    ), f\"Admin user account_type should be STANDARD, got {admin_snapshot.account_type}\"\n    assert (\n        basic_snapshot.account_type == AccountType.STANDARD\n    ), f\"Basic user account_type should be STANDARD, got {basic_snapshot.account_type}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_password_signup_upgrade.py",
    "content": "\"\"\"Integration tests for password signup upgrade paths.\n\nVerifies that when a BOT or EXT_PERM_USER user signs up via email/password:\n- Their account_type is upgraded to STANDARD\n- They are assigned to the Basic default group\n- They gain the correct effective permissions\n\"\"\"\n\nimport pytest\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_default_group_member_emails(\n    admin_user: DATestUser,\n    group_name: str,\n) -> set[str]:\n    \"\"\"Get the set of emails of all members in a named default group.\"\"\"\n    all_groups = UserGroupManager.get_all(admin_user, include_default=True)\n    matched = [g for g in all_groups if g.is_default and g.name == group_name]\n    assert matched, f\"Default group '{group_name}' not found\"\n    return {u.email for u in matched[0].users}\n\n\n@pytest.mark.parametrize(\n    \"target_role\",\n    [UserRole.EXT_PERM_USER, UserRole.SLACK_USER],\n    ids=[\"ext_perm_user\", \"slack_user\"],\n)\ndef test_password_signup_upgrade(\n    reset: None,  # noqa: ARG001\n    target_role: UserRole,\n) -> None:\n    \"\"\"When a non-web user signs up via email/password, they should be\n    upgraded to STANDARD account_type and assigned to the Basic default group.\"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    test_email = f\"{target_role.value}_upgrade@example.com\"\n    test_user = UserManager.create(email=test_email)\n\n    test_user = UserManager.set_role(\n        user_to_set=test_user,\n        target_role=target_role,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    # Verify user was removed from Basic group after downgrade\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert (\n        test_email not in basic_emails\n    ), f\"{target_role.value} should not be in Basic default group\"\n\n    # Re-register with the same email — triggers the password signup upgrade\n    upgraded_user = UserManager.create(email=test_email)\n\n    assert upgraded_user.role == UserRole.BASIC\n\n    paginated = UserManager.get_user_page(\n        user_performing_action=admin_user,\n        page_num=0,\n        page_size=10,\n    )\n    user_snapshot = next(\n        (u for u in paginated.items if str(u.id) == upgraded_user.id), None\n    )\n    assert user_snapshot is not None\n    assert (\n        user_snapshot.account_type == AccountType.STANDARD\n    ), f\"Expected STANDARD, got {user_snapshot.account_type}\"\n\n    # Verify user is now in the Basic default group\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert (\n        test_email in basic_emails\n    ), f\"Upgraded user '{test_email}' not found in Basic default group\"\n\n\ndef test_password_signup_upgrade_propagates_permissions(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"When an EXT_PERM_USER or SLACK_USER signs up via password, they should\n    gain the 'basic' permission through the Basic default group assignment.\"\"\"\n    admin_user: DATestUser = UserManager.create(email=\"admin@example.com\")\n\n    # --- EXT_PERM_USER path ---\n    ext_email = \"ext_perms_check@example.com\"\n    ext_user = UserManager.create(email=ext_email)\n\n    initial_perms = UserManager.get_permissions(ext_user)\n    assert \"basic\" in initial_perms\n\n    ext_user = UserManager.set_role(\n        user_to_set=ext_user,\n        target_role=UserRole.EXT_PERM_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert ext_email not in basic_emails\n\n    upgraded = UserManager.create(email=ext_email)\n    assert upgraded.role == UserRole.BASIC\n\n    perms = UserManager.get_permissions(upgraded)\n    assert (\n        \"basic\" in perms\n    ), f\"Upgraded EXT_PERM_USER should have 'basic' permission, got: {perms}\"\n\n    # --- SLACK_USER path ---\n    slack_email = \"slack_perms_check@example.com\"\n    slack_user = UserManager.create(email=slack_email)\n\n    slack_user = UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert slack_email not in basic_emails\n\n    upgraded = UserManager.create(email=slack_email)\n    assert upgraded.role == UserRole.BASIC\n\n    perms = UserManager.get_permissions(upgraded)\n    assert (\n        \"basic\" in perms\n    ), f\"Upgraded SLACK_USER should have 'basic' permission, got: {perms}\"\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_reactivation_groups.py",
    "content": "\"\"\"Integration tests for default group reconciliation on user reactivation.\n\nVerifies that:\n- A deactivated user retains default group membership after reactivation\n- Reactivation via the admin API reconciles missing group membership\n\"\"\"\n\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.managers.user_group import UserGroupManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\ndef _get_default_group_member_emails(\n    admin_user: DATestUser,\n    group_name: str,\n) -> set[str]:\n    \"\"\"Get the set of emails of all members in a named default group.\"\"\"\n    all_groups = UserGroupManager.get_all(admin_user, include_default=True)\n    matched = [g for g in all_groups if g.is_default and g.name == group_name]\n    assert matched, f\"Default group '{group_name}' not found\"\n    return {u.email for u in matched[0].users}\n\n\ndef test_reactivated_user_retains_default_group(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Deactivating and reactivating a user should preserve their\n    default group membership.\"\"\"\n    admin_user: DATestUser = UserManager.create(name=\"admin_user\")\n    basic_user: DATestUser = UserManager.create(name=\"basic_user\")\n\n    # Verify user is in Basic group initially\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert basic_user.email in basic_emails\n\n    # Deactivate the user\n    UserManager.set_status(\n        user_to_set=basic_user,\n        target_status=False,\n        user_performing_action=admin_user,\n    )\n\n    # Reactivate the user\n    UserManager.set_status(\n        user_to_set=basic_user,\n        target_status=True,\n        user_performing_action=admin_user,\n    )\n\n    # Verify user is still in Basic group after reactivation\n    basic_emails = _get_default_group_member_emails(admin_user, \"Basic\")\n    assert (\n        basic_user.email in basic_emails\n    ), \"Reactivated user should still be in Basic default group\"\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_seat_limit.py",
    "content": "\"\"\"Integration tests for seat limit enforcement on user creation paths.\n\nVerifies that when a license with a seat limit is active, new user\ncreation (registration, invite, reactivation) is blocked with HTTP 402.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\n\nimport redis\nimport requests\n\nfrom ee.onyx.server.license.models import LicenseMetadata\nfrom ee.onyx.server.license.models import LicenseSource\nfrom ee.onyx.server.license.models import PlanType\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.server.settings.models import ApplicationStatus\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.constants import GENERAL_HEADERS\nfrom tests.integration.common_utils.managers.user import UserManager\n\n# TenantRedis prefixes every key with \"{tenant_id}:\".\n# Single-tenant deployments use \"public\" as the tenant id.\n_LICENSE_REDIS_KEY = \"public:license:metadata\"\n\n\ndef _seed_license(r: redis.Redis, seats: int) -> None:\n    \"\"\"Write a LicenseMetadata entry into Redis with the given seat cap.\"\"\"\n    now = datetime.utcnow()\n    metadata = LicenseMetadata(\n        tenant_id=\"public\",\n        organization_name=\"Test Org\",\n        seats=seats,\n        used_seats=0,  # check_seat_availability recalculates from DB\n        plan_type=PlanType.ANNUAL,\n        issued_at=now,\n        expires_at=now + timedelta(days=365),\n        status=ApplicationStatus.ACTIVE,\n        source=LicenseSource.MANUAL_UPLOAD,\n    )\n    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)\n\n\ndef _clear_license(r: redis.Redis) -> None:\n    r.delete(_LICENSE_REDIS_KEY)\n\n\ndef _redis() -> redis.Redis:\n    return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)\n\n\n# ------------------------------------------------------------------\n# Registration\n# ------------------------------------------------------------------\n\n\ndef test_registration_blocked_when_seats_full(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"POST /auth/register returns 402 when the seat limit is reached.\"\"\"\n    r = _redis()\n\n    # First user is admin — occupies 1 seat\n    UserManager.create(name=\"admin_user\")\n\n    # License allows exactly 1 seat → already full\n    _seed_license(r, seats=1)\n\n    try:\n        response = requests.post(\n            url=f\"{API_SERVER_URL}/auth/register\",\n            json={\n                \"email\": \"blocked@example.com\",\n                \"username\": \"blocked@example.com\",\n                \"password\": \"TestPassword123!\",\n            },\n            headers=GENERAL_HEADERS,\n        )\n        assert response.status_code == 402\n    finally:\n        _clear_license(r)\n\n\n# ------------------------------------------------------------------\n# Invitation\n# ------------------------------------------------------------------\n\n\ndef test_invite_blocked_when_seats_full(reset: None) -> None:  # noqa: ARG001\n    \"\"\"PUT /manage/admin/users returns 402 when the seat limit is reached.\"\"\"\n    r = _redis()\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    _seed_license(r, seats=1)\n\n    try:\n        response = requests.put(\n            url=f\"{API_SERVER_URL}/manage/admin/users\",\n            json={\"emails\": [\"newuser@example.com\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 402\n    finally:\n        _clear_license(r)\n\n\n# ------------------------------------------------------------------\n# Reactivation\n# ------------------------------------------------------------------\n\n\ndef test_reactivation_blocked_when_seats_full(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"PATCH /manage/admin/activate-user returns 402 when seats are full.\"\"\"\n    r = _redis()\n\n    admin_user = UserManager.create(name=\"admin_user\")\n    basic_user = UserManager.create(name=\"basic_user\")\n\n    # Deactivate the basic user (frees a seat in the DB count)\n    UserManager.set_status(\n        basic_user, target_status=False, user_performing_action=admin_user\n    )\n\n    # Set license to 1 seat — only admin counts now\n    _seed_license(r, seats=1)\n\n    try:\n        response = requests.patch(\n            url=f\"{API_SERVER_URL}/manage/admin/activate-user\",\n            json={\"user_email\": basic_user.email},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 402\n    finally:\n        _clear_license(r)\n\n\n# ------------------------------------------------------------------\n# No license → no enforcement\n# ------------------------------------------------------------------\n\n\ndef test_registration_allowed_without_license(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Without a license in Redis, registration is unrestricted.\"\"\"\n    r = _redis()\n\n    # Make sure there is no cached license\n    _clear_license(r)\n\n    UserManager.create(name=\"admin_user\")\n\n    # Second user should register without issue\n    second_user = UserManager.create(name=\"second_user\")\n    assert second_user is not None\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_slack_user_deactivation.py",
    "content": "\"\"\"Integration tests for Slack user deactivation and reactivation via admin endpoints.\n\nVerifies that:\n- Slack users can be deactivated by admins\n- Deactivated Slack users can be reactivated by admins\n- Reactivation is blocked when the seat limit is reached\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\n\nimport redis\nimport requests\n\nfrom ee.onyx.server.license.models import LicenseMetadata\nfrom ee.onyx.server.license.models import LicenseSource\nfrom ee.onyx.server.license.models import PlanType\nfrom onyx.auth.schemas import UserRole\nfrom onyx.configs.app_configs import REDIS_DB_NUMBER\nfrom onyx.configs.app_configs import REDIS_HOST\nfrom onyx.configs.app_configs import REDIS_PORT\nfrom onyx.server.settings.models import ApplicationStatus\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n_LICENSE_REDIS_KEY = \"public:license:metadata\"\n\n\ndef _seed_license(r: redis.Redis, seats: int) -> None:\n    now = datetime.utcnow()\n    metadata = LicenseMetadata(\n        tenant_id=\"public\",\n        organization_name=\"Test Org\",\n        seats=seats,\n        used_seats=0,\n        plan_type=PlanType.ANNUAL,\n        issued_at=now,\n        expires_at=now + timedelta(days=365),\n        status=ApplicationStatus.ACTIVE,\n        source=LicenseSource.MANUAL_UPLOAD,\n    )\n    r.set(_LICENSE_REDIS_KEY, metadata.model_dump_json(), ex=300)\n\n\ndef _clear_license(r: redis.Redis) -> None:\n    r.delete(_LICENSE_REDIS_KEY)\n\n\ndef _redis() -> redis.Redis:\n    return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB_NUMBER)\n\n\ndef _get_user_is_active(email: str, admin_user: DATestUser) -> bool:\n    \"\"\"Look up a user's is_active flag via the admin users list endpoint.\"\"\"\n    result = UserManager.get_user_page(\n        user_performing_action=admin_user,\n        search_query=email,\n    )\n    matching = [u for u in result.items if u.email == email]\n    assert len(matching) == 1, f\"Expected exactly 1 user with email {email}\"\n    return matching[0].is_active\n\n\ndef test_slack_user_deactivate_and_reactivate(\n    reset: None,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    \"\"\"Admin can deactivate and then reactivate a Slack user.\"\"\"\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    slack_user = UserManager.create(name=\"slack_test_user\")\n    slack_user = UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    # Deactivate the Slack user\n    UserManager.set_status(\n        slack_user, target_status=False, user_performing_action=admin_user\n    )\n    assert _get_user_is_active(slack_user.email, admin_user) is False\n\n    # Reactivate the Slack user\n    UserManager.set_status(\n        slack_user, target_status=True, user_performing_action=admin_user\n    )\n    assert _get_user_is_active(slack_user.email, admin_user) is True\n\n\ndef test_slack_user_reactivation_blocked_by_seat_limit(\n    reset: None,  # noqa: ARG001\n) -> None:\n    \"\"\"Reactivating a deactivated Slack user returns 402 when seats are full.\"\"\"\n    r = _redis()\n\n    admin_user = UserManager.create(name=\"admin_user\")\n\n    slack_user = UserManager.create(name=\"slack_test_user\")\n    slack_user = UserManager.set_role(\n        user_to_set=slack_user,\n        target_role=UserRole.SLACK_USER,\n        user_performing_action=admin_user,\n        explicit_override=True,\n    )\n\n    UserManager.set_status(\n        slack_user, target_status=False, user_performing_action=admin_user\n    )\n\n    # License allows 1 seat — only admin counts\n    _seed_license(r, seats=1)\n\n    try:\n        response = requests.patch(\n            url=f\"{API_SERVER_URL}/manage/admin/activate-user\",\n            json={\"user_email\": slack_user.email},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 402\n    finally:\n        _clear_license(r)\n"
  },
  {
    "path": "backend/tests/integration/tests/users/test_user_pagination.py",
    "content": "from onyx.auth.schemas import UserRole\nfrom onyx.server.models import FullUserSnapshot\nfrom tests.integration.common_utils.managers.user import UserManager\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\n# Gets a page of users from the db that match the given parameters and then\n# compares that returned page to the list of users passed into the function\n# to verify that the pagination and filtering works as expected.\ndef _verify_user_pagination(\n    users: list[DATestUser],\n    user_performing_action: DATestUser,\n    page_size: int = 5,\n    search_query: str | None = None,\n    role_filter: list[UserRole] | None = None,\n    is_active_filter: bool | None = None,\n) -> None:\n    retrieved_users: list[FullUserSnapshot] = []\n\n    for i in range(0, len(users), page_size):\n        paginated_result = UserManager.get_user_page(\n            page_num=i // page_size,\n            page_size=page_size,\n            search_query=search_query,\n            role_filter=role_filter,\n            is_active_filter=is_active_filter,\n            user_performing_action=user_performing_action,\n        )\n\n        # Verify that the total items is equal to the length of the users list\n        assert paginated_result.total_items == len(users)\n        # Verify that the number of items in the page is equal to the page size\n        assert len(paginated_result.items) == page_size\n        # Add the retrieved users to the list of retrieved users\n        retrieved_users.extend(paginated_result.items)\n\n    # Create a set of all the expected emails\n    all_expected_emails = set([user.email for user in users])\n    # Create a set of all the retrieved emails\n    all_retrieved_emails = set([user.email for user in retrieved_users])\n\n    # Verify that the set of retrieved emails is equal to the set of expected emails\n    assert all_expected_emails == all_retrieved_emails\n\n\ndef test_user_pagination(reset: None) -> None:  # noqa: ARG001\n    # Create an admin user to perform actions\n    user_performing_action: DATestUser = UserManager.create(\n        name=\"admin_performing_action\"\n    )\n\n    # Create 9 admin users\n    admin_users: list[DATestUser] = UserManager.create_test_users(\n        user_name_prefix=\"admin\",\n        count=9,\n        role=UserRole.ADMIN,\n        user_performing_action=user_performing_action,\n    )\n\n    # Add the user_performing_action to the list of admins\n    admin_users.append(user_performing_action)\n\n    # Create 20 basic users\n    basic_users: list[DATestUser] = UserManager.create_test_users(\n        user_name_prefix=\"basic\",\n        count=10,\n        role=UserRole.BASIC,\n        user_performing_action=user_performing_action,\n    )\n\n    # Create 10 global curators\n    global_curators: list[DATestUser] = UserManager.create_test_users(\n        user_name_prefix=\"global_curator\",\n        count=10,\n        role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=user_performing_action,\n    )\n\n    # Create 10 inactive admins\n    inactive_admins: list[DATestUser] = UserManager.create_test_users(\n        user_name_prefix=\"inactive_admin\",\n        count=10,\n        role=UserRole.ADMIN,\n        is_active=False,\n        user_performing_action=user_performing_action,\n    )\n\n    # Create 10 global curator users with an email containing \"search\"\n    searchable_curators: list[DATestUser] = UserManager.create_test_users(\n        user_name_prefix=\"search_curator\",\n        count=10,\n        role=UserRole.GLOBAL_CURATOR,\n        user_performing_action=user_performing_action,\n    )\n\n    # Combine all the users lists into the all_users list\n    all_users: list[DATestUser] = (\n        admin_users\n        + basic_users\n        + global_curators\n        + inactive_admins\n        + searchable_curators\n    )\n    for user in all_users:\n        # Verify that the user's role in the db matches\n        # the role in the user object\n        assert UserManager.is_role(user, user.role)\n        # Verify that the user's status in the db matches\n        # the status in the user object\n        assert UserManager.is_status(user, user.is_active)\n\n    # Verify pagination\n    _verify_user_pagination(\n        users=all_users,\n        user_performing_action=user_performing_action,\n    )\n\n    # Verify filtering by role\n    _verify_user_pagination(\n        users=admin_users + inactive_admins,\n        role_filter=[UserRole.ADMIN],\n        user_performing_action=user_performing_action,\n    )\n    # Verify filtering by status\n    _verify_user_pagination(\n        users=inactive_admins,\n        is_active_filter=False,\n        user_performing_action=user_performing_action,\n    )\n    # Verify filtering by search query\n    _verify_user_pagination(\n        users=searchable_curators,\n        search_query=\"search\",\n        user_performing_action=user_performing_action,\n    )\n\n    # Verify filtering by role and status\n    _verify_user_pagination(\n        users=inactive_admins,\n        role_filter=[UserRole.ADMIN],\n        is_active_filter=False,\n        user_performing_action=user_performing_action,\n    )\n\n    # Verify filtering by role and search query\n    _verify_user_pagination(\n        users=searchable_curators,\n        role_filter=[UserRole.GLOBAL_CURATOR],\n        search_query=\"search\",\n        user_performing_action=user_performing_action,\n    )\n\n    # Verify filtering by role and status and search query\n    _verify_user_pagination(\n        users=inactive_admins,\n        role_filter=[UserRole.ADMIN],\n        is_active_filter=False,\n        search_query=\"inactive_ad\",\n        user_performing_action=user_performing_action,\n    )\n\n    # Verify filtering by multiple roles (admin and global curator)\n    _verify_user_pagination(\n        users=admin_users + global_curators + inactive_admins + searchable_curators,\n        role_filter=[UserRole.ADMIN, UserRole.GLOBAL_CURATOR],\n        user_performing_action=user_performing_action,\n    )\n"
  },
  {
    "path": "backend/tests/integration/tests/web_search/test_web_search_api.py",
    "content": "import os\n\nimport pytest\nimport requests\n\nfrom shared_configs.enums import WebContentProviderType\nfrom shared_configs.enums import WebSearchProviderType\nfrom tests.integration.common_utils.constants import API_SERVER_URL\nfrom tests.integration.common_utils.test_models import DATestUser\n\n\nclass TestOnyxWebCrawler:\n    \"\"\"\n    Integration tests for the Onyx web crawler functionality.\n\n    These tests verify that the built-in crawler can fetch and parse\n    content from public websites correctly.\n    \"\"\"\n\n    @pytest.mark.skip(reason=\"Temporarily disabled\")\n    def test_fetches_public_url_successfully(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that the crawler can fetch content from a public URL.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"https://example.com/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200, response.text\n        data = response.json()\n\n        assert data[\"provider_type\"] == WebContentProviderType.ONYX_WEB_CRAWLER.value\n        assert len(data[\"results\"]) == 1\n\n        result = data[\"results\"][0]\n        assert \"content\" in result\n        content = result[\"content\"]\n\n        # example.com is a static page maintained by IANA with known content\n        # Verify exact expected text from the page\n        assert \"Example Domain\" in content\n        assert \"This domain is for use in\" in content\n        assert \"documentation\" in content or \"illustrative\" in content\n\n    @pytest.mark.skip(reason=\"Temporarily disabled\")\n    def test_fetches_multiple_urls(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that the crawler can fetch multiple URLs in one request.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\n                \"urls\": [\n                    \"https://example.com/\",\n                    \"https://www.iana.org/domains/reserved\",\n                ]\n            },\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200, response.text\n        data = response.json()\n\n        assert data[\"provider_type\"] == WebContentProviderType.ONYX_WEB_CRAWLER.value\n        assert len(data[\"results\"]) == 2\n\n        for result in data[\"results\"]:\n            assert \"content\" in result\n\n    def test_handles_nonexistent_domain(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that the crawler handles non-existent domains gracefully.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"https://this-domain-definitely-does-not-exist-12345.com/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200, response.text\n        data = response.json()\n\n        assert data[\"provider_type\"] == WebContentProviderType.ONYX_WEB_CRAWLER.value\n\n        # The API filters out docs with no title/content, so unreachable domains return no results\n        assert data[\"results\"] == []\n\n    def test_handles_404_page(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that the crawler handles 404 responses gracefully.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"https://example.com/this-page-does-not-exist-12345\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200, response.text\n        data = response.json()\n\n        assert data[\"provider_type\"] == WebContentProviderType.ONYX_WEB_CRAWLER.value\n\n        # Non-200 responses are treated as non-content and filtered out\n        assert data[\"results\"] == []\n\n    def test_https_url_with_path(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that the crawler handles HTTPS URLs with paths correctly.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"https://www.iana.org/about\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200, response.text\n        data = response.json()\n\n        assert len(data[\"results\"]) == 1\n        result = data[\"results\"][0]\n        assert \"content\" in result\n\n\nclass TestSsrfProtection:\n    \"\"\"\n    Integration tests for SSRF protection on the /open-urls endpoint.\n\n    These tests verify that the endpoint correctly blocks requests to:\n    - Internal/private IP addresses\n    - Cloud metadata endpoints\n    - Blocked hostnames (Kubernetes, cloud metadata, etc.)\n    \"\"\"\n\n    def test_blocks_localhost_ip(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to localhost (127.0.0.1) are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://127.0.0.1/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        # URL should be processed but return empty content (blocked by SSRF protection)\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_private_ip_10_network(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to 10.x.x.x private network are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://10.0.0.1/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_private_ip_192_168_network(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to 192.168.x.x private network are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://192.168.1.1/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_private_ip_172_network(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to 172.16-31.x.x private network are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://172.16.0.1/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_aws_metadata_endpoint(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to AWS metadata endpoint (169.254.169.254) are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://169.254.169.254/latest/meta-data/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_kubernetes_metadata_hostname(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to Kubernetes internal hostname are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://kubernetes.default.svc.cluster.local/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_google_metadata_hostname(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to Google Cloud metadata hostname are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://metadata.google.internal/\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_blocks_localhost_with_port(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that requests to localhost with custom port are blocked.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\"urls\": [\"http://127.0.0.1:8080/metrics\"]},\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        assert len(data[\"results\"]) == 0 or data[\"results\"][0][\"content\"] == \"\"\n\n    def test_multiple_urls_filters_internal(self, admin_user: DATestUser) -> None:\n        \"\"\"Test that internal URLs are filtered while external URLs are processed.\"\"\"\n        response = requests.post(\n            f\"{API_SERVER_URL}/web-search/open-urls\",\n            json={\n                \"urls\": [\n                    \"http://127.0.0.1/\",  # Should be blocked\n                    \"http://192.168.1.1/\",  # Should be blocked\n                    \"https://example.com/\",  # Should be allowed (if reachable)\n                ]\n            },\n            headers=admin_user.headers,\n        )\n        assert response.status_code == 200\n        data = response.json()\n        # Internal URLs should return empty content\n        # The exact behavior depends on whether example.com is reachable\n        # but internal URLs should definitely not return sensitive data\n        for result in data[\"results\"]:\n            # Ensure no result contains internal network data\n            content = result.get(\"content\", \"\")\n            # These patterns would indicate SSRF vulnerability\n            assert \"metrics\" not in content.lower() or \"example\" in content.lower()\n            assert \"token\" not in content.lower() or \"example\" in content.lower()\n\n\n# Mark the Exa-dependent tests to skip if no API key\npytestmark_exa = pytest.mark.skipif(\n    not os.environ.get(\"EXA_API_KEY\"),\n    reason=\"EXA_API_KEY not set; live web search tests require real credentials\",\n)\n\n\ndef _activate_exa_provider(admin_user: DATestUser) -> int:\n    response = requests.post(\n        f\"{API_SERVER_URL}/admin/web-search/search-providers\",\n        json={\n            \"id\": None,\n            \"name\": \"integration-exa-provider\",\n            \"provider_type\": WebSearchProviderType.EXA.value,\n            \"config\": {},\n            \"api_key\": os.environ[\"EXA_API_KEY\"],\n            \"api_key_changed\": True,\n            \"activate\": True,\n        },\n        headers=admin_user.headers,\n    )\n    assert response.status_code == 200, response.text\n\n    provider = response.json()\n    assert provider[\"provider_type\"] == WebSearchProviderType.EXA.value\n    assert provider[\"is_active\"] is True\n    assert provider[\"has_api_key\"] is True\n\n    return provider[\"id\"]\n\n\n@pytestmark_exa\n@pytest.mark.skip(reason=\"Temporarily disabled\")\ndef test_web_search_endpoints_with_exa(\n    reset: None,  # noqa: ARG001\n    admin_user: DATestUser,\n) -> None:\n    provider_id = _activate_exa_provider(admin_user)\n    assert isinstance(provider_id, int)\n\n    search_request = {\"queries\": [\"wikipedia python programming\"], \"max_results\": 3}\n\n    lite_response = requests.post(\n        f\"{API_SERVER_URL}/web-search/search-lite\",\n        json=search_request,\n        headers=admin_user.headers,\n    )\n    assert lite_response.status_code == 200, lite_response.text\n    lite_data = lite_response.json()\n\n    assert lite_data[\"provider_type\"] == WebSearchProviderType.EXA.value\n    assert lite_data[\"results\"], \"Expected web search results from Exa\"\n\n    urls = [result[\"url\"] for result in lite_data[\"results\"] if result.get(\"url\")][:2]\n    assert urls, \"Web search should return at least one URL\"\n\n    open_response = requests.post(\n        f\"{API_SERVER_URL}/web-search/open-urls\",\n        json={\"urls\": urls},\n        headers=admin_user.headers,\n    )\n    assert open_response.status_code == 200, open_response.text\n    open_data = open_response.json()\n\n    assert open_data[\"provider_type\"] == WebContentProviderType.ONYX_WEB_CRAWLER.value\n    assert len(open_data[\"results\"]) == len(urls)\n    assert all(\"content\" in result for result in open_data[\"results\"])\n\n    combined_response = requests.post(\n        f\"{API_SERVER_URL}/web-search/search\",\n        json=search_request,\n        headers=admin_user.headers,\n    )\n    assert combined_response.status_code == 200, combined_response.text\n    combined_data = combined_response.json()\n\n    assert combined_data[\"search_provider_type\"] == WebSearchProviderType.EXA.value\n    assert (\n        combined_data[\"content_provider_type\"]\n        == WebContentProviderType.ONYX_WEB_CRAWLER.value\n    )\n    assert combined_data[\"search_results\"]\n\n    unique_urls = list(\n        dict.fromkeys(\n            result[\"url\"]\n            for result in combined_data[\"search_results\"]\n            if result.get(\"url\")\n        )\n    )\n    assert len(combined_data[\"full_content_results\"]) == len(unique_urls)\n"
  },
  {
    "path": "backend/tests/load_env_vars.py",
    "content": "import os\n\n\ndef load_env_vars(env_file: str = \".env\") -> None:\n    current_dir = os.path.dirname(os.path.abspath(__file__))\n    env_path = os.path.join(current_dir, env_file)\n    try:\n        with open(env_path, \"r\") as f:\n            for line in f:\n                line = line.strip()\n                if line and not line.startswith(\"#\"):\n                    key, value = line.split(\"=\", 1)\n                    os.environ[key] = value.strip()\n        print(\"Successfully loaded environment variables\")\n    except FileNotFoundError:\n        print(f\"File {env_file} not found\")\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/README.md",
    "content": "# Search Quality Test Script\n\nThis Python script automates the process of running search quality tests for a backend system.\n\n## Features\n\n- Loads configuration from a YAML file\n- Sets up Docker environment\n- Manages environment variables\n- Switches to specified Git branch\n- Uploads test documents\n- Runs search quality tests\n- Cleans up Docker containers (optional)\n\n## Usage\n\n1. Ensure you have the required dependencies installed.\n2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file.\n3. Configure the `.env_eval` file in `deployment/docker_compose` with the correct environment variables.\n4. Set up the PYTHONPATH permanently:\n   Add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.bash_profile`):\n   ```\n   export PYTHONPATH=$PYTHONPATH:/path/to/onyx/backend\n   ```\n   Replace `/path/to/onyx` with the actual path to your Onyx repository.\n   After adding this line, restart your terminal or run `source ~/.bashrc` (or the appropriate config file) to apply the changes.\n5. Navigate to Onyx repo:\n\n```\ncd path/to/onyx\n```\n\n6. Navigate to the answer_quality folder:\n\n```\ncd backend/tests/regression/answer_quality\n```\n\n7. To launch the evaluation environment, run the launch_eval_env.py script (this step can be skipped if you are running the env outside of docker, just leave \"environment_name\" blank):\n\n```\npython launch_eval_env.py\n```\n\n8. Run the file_uploader.py script to upload the zip files located at the path \"zipped_documents_file\"\n\n```\npython file_uploader.py\n```\n\n9. Run the run_qa.py script to ask questions from the jsonl located at the path \"questions_file\". This will hit the \"query/answer-with-quote\" API endpoint.\n\n```\npython run_qa.py\n```\n\nNote: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data.\n\nIf you decide to run multiple UIs at the same time, the ports will increment upwards from 3000 (E.g. http://localhost:3001).\n\nTo see which port the desired instance is on, look at the ports on the nginx container by running `docker ps` or using docker desktop.\n\nDocker daemon must be running for this to work.\n\n## Configuration\n\nEdit `search_test_config.yaml` to set:\n\n- output_folder\n  - This is the folder where the folders for each test will go\n  - These folders will contain the postgres/vespa data as well as the results for each test\n- zipped_documents_file\n  - The path to the zip file containing the files you'd like to test against\n- questions_file\n  - The path to the yaml containing the questions you'd like to test with\n- commit_sha\n  - Set this to the SHA of the commit you want to run the test against\n  - You must clear all local changes if you want to use this option\n  - Set this to null if you want it to just use the code as is\n- clean_up_docker_containers\n  - Set this to true to automatically delete all docker containers, networks and volumes after the test\n- launch_web_ui\n  - Set this to true if you want to use the UI during/after the testing process\n- only_state\n  - Whether to only run Vespa and Postgres\n- only_retrieve_docs\n  - Set true to only retrieve documents, not LLM response\n  - This is to save on API costs\n- use_cloud_gpu\n  - Set to true or false depending on if you want to use the remote gpu\n  - Only need to set this if use_cloud_gpu is true\n- model_server_ip\n  - This is the ip of the remote model server\n  - Only need to set this if use_cloud_gpu is true\n- model_server_port\n  - This is the port of the remote model server\n  - Only need to set this if use_cloud_gpu is true\n- environment_name\n  - Use this if you would like to relaunch a previous test instance\n  - Input the env_name of the test you'd like to re-launch\n  - Leave empty to launch referencing local default network locations\n- limit\n  - Max number of questions you'd like to ask against the dataset\n  - Set to null for no limit\n- llm\n  - Fill this out according to the normal LLM seeding\n\n## Relaunching From Existing Data\n\nTo launch an existing set of containers that has already completed indexing, set the environment_name variable. This will launch the docker containers mounted on the volumes of the indicated env_name and will not automatically index any documents or run any QA.\n\nOnce these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above).\n\n- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path.\n- run_qa.py will ask questions located at the questions_file path against the indexed documents.\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/regression/answer_quality/api_utils.py",
    "content": "import requests\nfrom retry import retry\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import InputType\nfrom onyx.db.enums import IndexingStatus\nfrom onyx.server.documents.models import ConnectorBase\nfrom tests.regression.answer_quality.cli_utils import get_api_server_host_port\n\nGENERAL_HEADERS = {\"Content-Type\": \"application/json\"}\n\n\ndef _api_url_builder(env_name: str, api_path: str) -> str:\n    if env_name:\n        return f\"http://localhost:{get_api_server_host_port(env_name)}\" + api_path\n    else:\n        return \"http://localhost:8080\" + api_path\n\n\n@retry(tries=10, delay=10)\ndef check_indexing_status(env_name: str) -> tuple[int, bool]:\n    url = _api_url_builder(env_name, \"/manage/admin/connector/indexing-status/\")\n    try:\n        indexing_status_dict = requests.post(\n            url, headers=GENERAL_HEADERS, json={\"get_all_connectors\": True}\n        ).json()\n    except Exception as e:\n        print(\"Failed to check indexing status, API server is likely starting up:\")\n        print(f\"\\t {str(e)}\")\n        print(\"trying again\")\n        raise e\n\n    ongoing_index_attempts = False\n    doc_count = 0\n    for connectors_by_source in indexing_status_dict:\n        connectors = connectors_by_source[\"indexing_statuses\"]\n        for connector in connectors:\n            status = connector[\"last_status\"]\n            if (\n                status == IndexingStatus.IN_PROGRESS\n                or status == IndexingStatus.NOT_STARTED\n            ):\n                ongoing_index_attempts = True\n            elif status == IndexingStatus.SUCCESS:\n                doc_count += 16\n            doc_count += connector[\"docs_indexed\"]\n            doc_count -= 16\n\n    # all the +16 and -16 are to account for the fact that the indexing status\n    # is only updated every 16 documents and will tells us how many are\n    # chunked, not indexed. probably need to fix this. in the future!\n    if doc_count:\n        doc_count += 16\n    return doc_count, ongoing_index_attempts\n\n\ndef run_cc_once(env_name: str, connector_id: int, credential_id: int) -> None:\n    url = _api_url_builder(env_name, \"/manage/admin/connector/run-once/\")\n    body = {\n        \"connector_id\": connector_id,\n        \"credential_ids\": [credential_id],\n        \"from_beginning\": True,\n    }\n    print(\"body:\", body)\n    response = requests.post(url, headers=GENERAL_HEADERS, json=body)\n    if response.status_code == 200:\n        print(\"Connector created successfully:\", response.json())\n    else:\n        print(\"Failed status_code:\", response.status_code)\n        print(\"Failed text:\", response.text)\n\n\ndef create_cc_pair(env_name: str, connector_id: int, credential_id: int) -> None:\n    url = _api_url_builder(\n        env_name, f\"/manage/connector/{connector_id}/credential/{credential_id}\"\n    )\n\n    body = {\"name\": \"zip_folder_contents\", \"is_public\": True, \"groups\": []}\n    print(\"body:\", body)\n    response = requests.put(url, headers=GENERAL_HEADERS, json=body)\n    if response.status_code == 200:\n        print(\"Connector created successfully:\", response.json())\n    else:\n        print(\"Failed status_code:\", response.status_code)\n        print(\"Failed text:\", response.text)\n\n\ndef _get_existing_connector_names(env_name: str) -> list[str]:\n    url = _api_url_builder(env_name, \"/manage/connector\")\n\n    body = {\n        \"credential_json\": {},\n        \"admin_public\": True,\n    }\n    response = requests.get(url, headers=GENERAL_HEADERS, json=body)\n    if response.status_code == 200:\n        connectors = response.json()\n        return [connector[\"name\"] for connector in connectors]\n    else:\n        raise RuntimeError(response.__dict__)\n\n\ndef create_connector(env_name: str, file_paths: list[str]) -> int:\n    url = _api_url_builder(env_name, \"/manage/admin/connector\")\n    connector_name = base_connector_name = \"search_eval_connector\"\n    existing_connector_names = _get_existing_connector_names(env_name)\n\n    count = 1\n    while connector_name in existing_connector_names:\n        connector_name = base_connector_name + \"_\" + str(count)\n        count += 1\n\n    connector = ConnectorBase(\n        name=connector_name,\n        source=DocumentSource.FILE,\n        input_type=InputType.LOAD_STATE,\n        connector_specific_config={\n            \"file_locations\": file_paths,\n            \"file_names\": [],  # For regression tests, no need for file_names\n            \"zip_metadata_file_id\": None,\n        },\n        refresh_freq=None,\n        prune_freq=None,\n        indexing_start=None,\n    )\n\n    body = connector.model_dump()\n    response = requests.post(url, headers=GENERAL_HEADERS, json=body)\n    if response.status_code == 200:\n        return response.json()[\"id\"]\n    else:\n        raise RuntimeError(response.__dict__)\n\n\ndef create_credential(env_name: str) -> int:\n    url = _api_url_builder(env_name, \"/manage/credential\")\n    body = {\n        \"credential_json\": {},\n        \"admin_public\": True,\n        \"source\": DocumentSource.FILE,\n    }\n    response = requests.post(url, headers=GENERAL_HEADERS, json=body)\n    if response.status_code == 200:\n        print(\"credential created successfully:\", response.json())\n        return response.json()[\"id\"]\n    else:\n        raise RuntimeError(response.__dict__)\n\n\n@retry(tries=10, delay=2, backoff=2)\ndef upload_file(env_name: str, zip_file_path: str) -> list[str]:\n    files = [\n        (\"files\", open(zip_file_path, \"rb\")),\n    ]\n\n    api_path = _api_url_builder(env_name, \"/manage/admin/connector/file/upload\")\n    try:\n        response = requests.post(api_path, files=files)\n        response.raise_for_status()  # Raises an HTTPError for bad responses\n        print(\"file uploaded successfully:\", response.json())\n        return response.json()[\"file_paths\"]\n    except Exception as e:\n        print(\"File upload failed, waiting for API server to come up and trying again\")\n        raise e\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/cli_utils.py",
    "content": "import json\nimport os\nimport socket\nimport subprocess\nimport sys\nimport time\nfrom datetime import datetime\nfrom threading import Thread\nfrom typing import IO\n\nimport yaml\nfrom retry import retry\n\n\ndef _run_command(command: str, stream_output: bool = False) -> tuple[str, str]:\n    process = subprocess.Popen(\n        command,\n        shell=True,\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        text=True,\n        bufsize=1,\n    )\n\n    stdout_lines: list[str] = []\n    stderr_lines: list[str] = []\n\n    def process_stream(stream: IO[str], lines: list[str]) -> None:\n        for line in stream:\n            lines.append(line)\n            if stream_output:\n                print(\n                    line,\n                    end=\"\",\n                    file=sys.stdout if stream == process.stdout else sys.stderr,\n                )\n\n    stdout_thread = Thread(target=process_stream, args=(process.stdout, stdout_lines))\n    stderr_thread = Thread(target=process_stream, args=(process.stderr, stderr_lines))\n\n    stdout_thread.start()\n    stderr_thread.start()\n\n    stdout_thread.join()\n    stderr_thread.join()\n\n    process.wait()\n\n    if process.returncode != 0:\n        raise RuntimeError(f\"Command failed with error: {''.join(stderr_lines)}\")\n\n    return \"\".join(stdout_lines), \"\".join(stderr_lines)\n\n\ndef get_current_commit_sha() -> str:\n    print(\"Getting current commit SHA...\")\n    stdout, _ = _run_command(\"git rev-parse HEAD\")\n    sha = stdout.strip()\n    print(f\"Current commit SHA: {sha}\")\n    return sha\n\n\ndef switch_to_commit(commit_sha: str) -> None:\n    print(f\"Switching to commit: {commit_sha}...\")\n    _run_command(f\"git checkout {commit_sha}\")\n    print(f\"Successfully switched to commit: {commit_sha}\")\n    print(\"Repository updated successfully.\")\n\n\ndef get_docker_container_env_vars(env_name: str) -> dict:\n    \"\"\"\n    Retrieves environment variables from \"background\" and \"api_server\" Docker containers.\n    \"\"\"\n    print(f\"Getting environment variables for containers with env_name: {env_name}\")\n\n    combined_env_vars = {}\n    for container_type in [\"background\", \"api_server\"]:\n        container_name = _run_command(\n            f\"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{env_name}/'\"\n        )[0].strip()\n        if not container_name:\n            raise RuntimeError(\n                f\"No {container_type} container found with env_name: {env_name}\"\n            )\n\n        env_vars_json = _run_command(\n            f\"docker inspect --format='{{{{json .Config.Env}}}}' {container_name}\"\n        )[0]\n        env_vars_list = json.loads(env_vars_json.strip())\n\n        for env_var in env_vars_list:\n            key, value = env_var.split(\"=\", 1)\n            combined_env_vars[key] = value\n\n    return combined_env_vars\n\n\ndef manage_data_directories(env_name: str, base_path: str, use_cloud_gpu: bool) -> None:\n    # Use the user's home directory as the base path\n    target_path = os.path.join(os.path.expanduser(base_path), env_name)\n    directories = {\n        \"DANSWER_POSTGRES_DATA_DIR\": os.path.join(target_path, \"postgres/\"),\n        \"DANSWER_VESPA_DATA_DIR\": os.path.join(target_path, \"vespa/\"),\n    }\n    if not use_cloud_gpu:\n        directories[\"DANSWER_INDEX_MODEL_CACHE_DIR\"] = os.path.join(\n            target_path, \"index_model_cache/\"\n        )\n        directories[\"DANSWER_INFERENCE_MODEL_CACHE_DIR\"] = os.path.join(\n            target_path, \"inference_model_cache/\"\n        )\n\n    # Create directories if they don't exist\n    for env_var, directory in directories.items():\n        os.makedirs(directory, exist_ok=True)\n        os.environ[env_var] = directory\n        print(f\"Set {env_var} to: {directory}\")\n    results_output_path = os.path.join(target_path, \"evaluations_output/\")\n    os.makedirs(results_output_path, exist_ok=True)\n\n\ndef set_env_variables(\n    remote_server_ip: str,\n    remote_server_port: str,\n    use_cloud_gpu: bool,\n    llm_config: dict,\n) -> None:\n    env_vars: dict = {}\n    env_vars[\"ENV_SEED_CONFIGURATION\"] = json.dumps({\"llms\": [llm_config]})\n    env_vars[\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\"] = \"true\"\n    if use_cloud_gpu:\n        env_vars[\"MODEL_SERVER_HOST\"] = remote_server_ip\n        env_vars[\"MODEL_SERVER_PORT\"] = remote_server_port\n        env_vars[\"INDEXING_MODEL_SERVER_HOST\"] = remote_server_ip\n\n    for env_var_name, env_var in env_vars.items():\n        os.environ[env_var_name] = env_var\n        print(f\"Set {env_var_name} to: {env_var}\")\n\n\ndef _is_port_in_use(port: int) -> bool:\n    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n        return s.connect_ex((\"localhost\", port)) == 0\n\n\ndef start_docker_compose(\n    env_name: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False\n) -> None:\n    print(\"Starting Docker Compose...\")\n    os.chdir(os.path.dirname(__file__))\n    os.chdir(\"../../../../deployment/docker_compose/\")\n    command = (\n        f\"docker compose -f docker-compose.search-testing.yml -p onyx-{env_name} up -d\"\n    )\n    command += \" --build\"\n    command += \" --force-recreate\"\n\n    if only_state:\n        command += \" index relational_db\"\n    else:\n        if use_cloud_gpu:\n            command += \" --scale indexing_model_server=0\"\n            command += \" --scale inference_model_server=0\"\n        if launch_web_ui:\n            web_ui_port = 3000\n            while _is_port_in_use(web_ui_port):\n                web_ui_port += 1\n            print(f\"UI will be launched at http://localhost:{web_ui_port}\")\n            os.environ[\"NGINX_PORT\"] = str(web_ui_port)\n        else:\n            command += \" --scale web_server=0\"\n            command += \" --scale nginx=0\"\n\n    print(\"Docker Command:\\n\", command)\n\n    _run_command(command, stream_output=True)\n    print(\"Containers have been launched\")\n\n\ndef cleanup_docker(env_name: str) -> None:\n    print(\n        f\"Deleting Docker containers, volumes, and networks for project env_name: {env_name}\"\n    )\n\n    stdout, _ = _run_command(\"docker ps -a --format '{{json .}}'\")\n\n    containers = [json.loads(line) for line in stdout.splitlines()]\n    if not env_name:\n        env_name = datetime.now().strftime(\"-%Y\")\n    project_name = f\"onyx{env_name}\"\n    containers_to_delete = [\n        c for c in containers if c[\"Names\"].startswith(project_name)\n    ]\n\n    if not containers_to_delete:\n        print(f\"No containers found for project: {project_name}\")\n    else:\n        container_ids = \" \".join([c[\"ID\"] for c in containers_to_delete])\n        _run_command(f\"docker rm -f {container_ids}\")\n\n        print(\n            f\"Successfully deleted {len(containers_to_delete)} containers for project: {project_name}\"\n        )\n\n    stdout, _ = _run_command(\"docker volume ls --format '{{.Name}}'\")\n\n    volumes = stdout.splitlines()\n\n    volumes_to_delete = [v for v in volumes if v.startswith(project_name)]\n\n    if not volumes_to_delete:\n        print(f\"No volumes found for project: {project_name}\")\n        return\n\n    # Delete filtered volumes\n    volume_names = \" \".join(volumes_to_delete)\n    _run_command(f\"docker volume rm {volume_names}\")\n\n    print(\n        f\"Successfully deleted {len(volumes_to_delete)} volumes for project: {project_name}\"\n    )\n    stdout, _ = _run_command(\"docker network ls --format '{{.Name}}'\")\n\n    networks = stdout.splitlines()\n\n    networks_to_delete = [n for n in networks if env_name in n]\n\n    if not networks_to_delete:\n        print(f\"No networks found containing env_name: {env_name}\")\n    else:\n        network_names = \" \".join(networks_to_delete)\n        _run_command(f\"docker network rm {network_names}\")\n\n        print(\n            f\"Successfully deleted {len(networks_to_delete)} networks containing env_name: {env_name}\"\n        )\n\n\n@retry(tries=5, delay=5, backoff=2)\ndef get_api_server_host_port(env_name: str) -> str:\n    \"\"\"\n    This pulls all containers with the provided env_name\n    It then grabs the JSON specific container with a name containing \"api_server\"\n    It then grabs the port info from the JSON and strips out the relevent data\n    \"\"\"\n    container_name = \"api_server\"\n\n    stdout, _ = _run_command(\"docker ps -a --format '{{json .}}'\")\n    containers = [json.loads(line) for line in stdout.splitlines()]\n    server_jsons = []\n\n    for container in containers:\n        if container_name in container[\"Names\"] and env_name in container[\"Names\"]:\n            server_jsons.append(container)\n\n    if not server_jsons:\n        raise RuntimeError(\n            f\"No container found containing: {container_name} and {env_name}\"\n        )\n    elif len(server_jsons) > 1:\n        raise RuntimeError(\n            f\"Too many containers matching {container_name} found, please indicate a env_name\"\n        )\n    server_json = server_jsons[0]\n\n    # This is in case the api_server has multiple ports\n    client_port = \"8080\"\n    ports = server_json.get(\"Ports\", \"\")\n    port_infos = ports.split(\",\") if ports else []\n    port_dict = {}\n    for port_info in port_infos:\n        port_arr = port_info.split(\":\")[-1].split(\"->\") if port_info else []\n        if len(port_arr) == 2:\n            port_dict[port_arr[1]] = port_arr[0]\n\n    # Find the host port where client_port is in the key\n    matching_ports = [value for key, value in port_dict.items() if client_port in key]\n\n    if len(matching_ports) > 1:\n        raise RuntimeError(f\"Too many ports matching {client_port} found\")\n    if not matching_ports:\n        raise RuntimeError(\n            f\"No port found containing: {client_port} for container: {container_name} and env_name: {env_name}\"\n        )\n    return matching_ports[0]\n\n\n# Added function to restart Vespa container\ndef restart_vespa_container(env_name: str) -> None:\n    print(f\"Restarting Vespa container for env_name: {env_name}\")\n\n    # Find the Vespa container\n    stdout, _ = _run_command(\n        f\"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{env_name}/'\"\n    )\n    container_name = stdout.strip()\n\n    if not container_name:\n        raise RuntimeError(f\"No Vespa container found with env_name: {env_name}\")\n\n    # Restart the container\n    _run_command(f\"docker restart {container_name}\")\n\n    print(f\"Vespa container '{container_name}' has begun restarting\")\n\n    time.sleep(30)\n    print(f\"Vespa container '{container_name}' has been restarted\")\n\n\nif __name__ == \"__main__\":\n    \"\"\"\n    Running this just cleans up the docker environment for the container indicated by environment_name\n    If no environment_name is indicated, will just clean up all onyx docker containers/volumes/networks\n    Note: vespa/postgres mounts are not deleted\n    \"\"\"\n    current_dir = os.path.dirname(os.path.abspath(__file__))\n    config_path = os.path.join(current_dir, \"search_test_config.yaml\")\n    with open(config_path, \"r\") as file:\n        config = yaml.safe_load(file)\n\n    if not isinstance(config, dict):\n        raise TypeError(\"config must be a dictionary\")\n    cleanup_docker(config[\"environment_name\"])\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/file_uploader.py",
    "content": "import csv\nimport os\nimport tempfile\nimport time\nimport zipfile\nfrom pathlib import Path\nfrom types import SimpleNamespace\n\nimport yaml\n\nfrom tests.regression.answer_quality.api_utils import check_indexing_status\nfrom tests.regression.answer_quality.api_utils import create_cc_pair\nfrom tests.regression.answer_quality.api_utils import create_connector\nfrom tests.regression.answer_quality.api_utils import create_credential\nfrom tests.regression.answer_quality.api_utils import run_cc_once\nfrom tests.regression.answer_quality.api_utils import upload_file\n\n\ndef unzip_and_get_file_paths(zip_file_path: str) -> list[str]:\n    persistent_dir = tempfile.mkdtemp()\n    with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n        zip_ref.extractall(persistent_dir)\n\n    file_paths = []\n    for root, _, files in os.walk(persistent_dir):\n        for file in sorted(files):\n            file_paths.append(os.path.join(root, file))\n\n    return file_paths\n\n\ndef create_temp_zip_from_files(file_paths: list[str]) -> str:\n    persistent_dir = tempfile.mkdtemp()\n    zip_file_path = os.path.join(persistent_dir, \"temp.zip\")\n\n    with zipfile.ZipFile(zip_file_path, \"w\") as zip_file:\n        for file_path in file_paths:\n            zip_file.write(file_path, Path(file_path).name)\n\n    return zip_file_path\n\n\ndef upload_test_files(zip_file_path: str, env_name: str) -> None:\n    print(\"zip:\", zip_file_path)\n    file_paths = upload_file(env_name, zip_file_path)\n\n    conn_id = create_connector(env_name, file_paths)\n    cred_id = create_credential(env_name)\n\n    create_cc_pair(env_name, conn_id, cred_id)\n    run_cc_once(env_name, conn_id, cred_id)\n\n\ndef manage_file_upload(zip_file_path: str, env_name: str) -> None:\n    start_time = time.time()\n    unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)\n    total_file_count = len(unzipped_file_paths)\n    problem_file_list: list[str] = []\n\n    while True:\n        doc_count, ongoing_index_attempts = check_indexing_status(env_name)\n\n        if ongoing_index_attempts:\n            print(\n                f\"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish...\"\n            )\n        elif not doc_count:\n            print(\"No docs indexed, waiting for indexing to start\")\n            temp_zip_file_path = create_temp_zip_from_files(unzipped_file_paths)\n            upload_test_files(temp_zip_file_path, env_name)\n            os.unlink(temp_zip_file_path)\n        elif (doc_count + len(problem_file_list)) < total_file_count:\n            print(f\"No ongooing indexing attempts but only {doc_count} docs indexed\")\n            remaining_files = unzipped_file_paths[doc_count + len(problem_file_list) :]\n            problem_file_list.append(remaining_files.pop(0))\n            print(\n                f\"Removing first doc and grabbed last {len(remaining_files)} docs to try agian\"\n            )\n            temp_zip_file_path = create_temp_zip_from_files(remaining_files)\n            upload_test_files(temp_zip_file_path, env_name)\n            os.unlink(temp_zip_file_path)\n        else:\n            print(f\"Successfully uploaded {doc_count} docs!\")\n            break\n\n        time.sleep(10)\n\n    if problem_file_list:\n        problem_file_csv_path = os.path.join(current_dir, \"problem_files.csv\")\n        with open(problem_file_csv_path, \"w\", newline=\"\") as csvfile:\n            csvwriter = csv.writer(csvfile)\n            csvwriter.writerow([\"Problematic File Paths\"])\n            for problem_file in problem_file_list:\n                csvwriter.writerow([problem_file])\n\n    for file in unzipped_file_paths:\n        os.unlink(file)\n    print(f\"Total time taken: {(time.time() - start_time) / 60} minutes\")\n\n\nif __name__ == \"__main__\":\n    current_dir = os.path.dirname(os.path.abspath(__file__))\n    config_path = os.path.join(current_dir, \"search_test_config.yaml\")\n    with open(config_path, \"r\") as file:\n        config = SimpleNamespace(**yaml.safe_load(file))\n    file_location = config.zipped_documents_file\n    env_name = config.environment_name\n    manage_file_upload(file_location, env_name)\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/launch_eval_env.py",
    "content": "import os\nfrom types import SimpleNamespace\n\nimport yaml\n\nfrom tests.regression.answer_quality.cli_utils import manage_data_directories\nfrom tests.regression.answer_quality.cli_utils import set_env_variables\nfrom tests.regression.answer_quality.cli_utils import start_docker_compose\nfrom tests.regression.answer_quality.cli_utils import switch_to_commit\n\n\ndef load_config(config_filename: str) -> SimpleNamespace:\n    current_dir = os.path.dirname(os.path.abspath(__file__))\n    config_path = os.path.join(current_dir, config_filename)\n    with open(config_path, \"r\") as file:\n        return SimpleNamespace(**yaml.safe_load(file))\n\n\ndef main() -> None:\n    config = load_config(\"search_test_config.yaml\")\n    if config.environment_name:\n        env_name = config.environment_name\n        print(\"launching onyx with environment name:\", env_name)\n    else:\n        print(\"No env name defined. Not launching docker.\")\n        print(\n            \"Please define a name in the config yaml to start a new env or use an existing env\"\n        )\n        return\n\n    set_env_variables(\n        config.model_server_ip,\n        config.model_server_port,\n        config.use_cloud_gpu,\n        config.llm,\n    )\n    manage_data_directories(env_name, config.output_folder, config.use_cloud_gpu)\n    if config.commit_sha:\n        switch_to_commit(config.commit_sha)\n\n    start_docker_compose(\n        env_name, config.launch_web_ui, config.use_cloud_gpu, config.only_state\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "backend/tests/regression/answer_quality/search_test_config.yaml.template",
    "content": "# Copy this to search_test_config.yaml and fill in the values to run the eval pipeline\n# Don't forget to also update the .env_eval file with the correct values\n\n# Directory where test results will be saved\noutput_folder: \"~/onyx_test_results\"\n\n# Path to the zip file containing sample documents\nzipped_documents_file: \"~/sampledocs.zip\"\n\n# Path to the YAML file containing sample questions\nquestions_file: \"~/sample_questions.yaml\"\n\n# Git commit SHA to use (null means use current code as is)\ncommit_sha: null\n\n# Whether to launch a web UI for the test\nlaunch_web_ui: false\n\n# Only retrieve documents, not LLM response\nonly_retrieve_docs: false\n\n# Whether to use a cloud GPU for processing\nuse_cloud_gpu: false\n\n# IP address of the model server (placeholder)\nmodel_server_ip: \"PUT_PUBLIC_CLOUD_IP_HERE\"\n\n# Port of the model server (placeholder)\nmodel_server_port: \"PUT_PUBLIC_CLOUD_PORT_HERE\"\n\n# Name for existing testing env (empty string uses default ports)\nenvironment_name: \"\"\n\n# Limit on number of tests to run (null means no limit)\nlimit: null\n\n# LLM configuration\nllm:\n  # Name of the LLM\n  name: \"default_test_llm\"\n  \n  # Provider of the LLM (e.g., OpenAI)\n  provider: \"openai\"\n  \n  # API key\n  api_key: \"PUT_API_KEY_HERE\"\n  \n  # Default model name to use\n  default_model_name: \"gpt-4o\"\n  \n  # List of model names to use for testing\n  model_names: [\"gpt-4o\"]\n"
  },
  {
    "path": "backend/tests/regression/search_quality/README.md",
    "content": "# Search Quality Test Script\n\nThis Python script evaluates the search and answer quality for a list of queries, against a ground truth. It will use the currently ingested documents for the search, answer generation, and ground truth comparisons.\n\n## Usage\n\n1. Ensure you have the required dependencies installed and onyx running.\n\n2. Ensure you have `OPENAI_API_KEY` set if you intend to do answer evaluation (enabled by default, unless you run the script with the `-s` flag). Go to the API Keys page in the admin panel, generate a basic api token, and add it to the env file as `ONYX_API_KEY=on_...`.\n\n3. Navigate to Onyx repo, **search_quality** folder:\n\n```\ncd path/to/onyx/backend/tests/regression/search_quality\n```\n\n4. Copy `test_queries.json.template` to `test_queries.json` and add/remove test queries in it. The fields for each query are:\n\n   - `question: str` the query\n   - `ground_truth: list[GroundTruth]` an un-ranked list of expected search results with fields:\n      - `doc_source: str` document source (e.g., web, google_drive, linear), used to normalize the links in some cases\n      - `doc_link: str` link associated with document, used to find corresponding document in local index\n   - `ground_truth_response: Optional[str]` a response with clauses the ideal answer should include\n   - `categories: Optional[list[str]]` list of categories, used to aggregate evaluation results\n\n5. Run `run_search_eval.py` to evaluate the queries.  All parameters are optional and have sensible defaults:\n\n```\npython run_search_eval.py\n  -d --dataset          # Path to the test-set JSON file (default: ./test_queries.json)\n  -n --num_search       # Maximum number of documents to retrieve per search (default: 50)\n  -a --num_answer       # Maximum number of documents to use for answer evaluation (default: 25)\n  -w --max_workers      # Maximum number of concurrent search requests (0 = unlimited, default: 10).\n  -r --max_req_rate     # Maximum number of search requests per minute (0 = unlimited, default: 0).\n  -q --timeout          # Request timeout in seconds (default: 120)\n  -e --api_endpoint     # Base URL of the Onyx API server (default: http://127.0.0.1:8080)\n  -s --search_only      # Only perform search and not answer evaluation (default: false)\n  -t --tenant_id        # Tenant ID to use for the evaluation (default: None)\n```\n\nNote: If you only care about search quality, you should run with the `-s` flag for a significantly faster evaluation. Furthermore, you should set `-r` to 1 if running with federated search enabled to avoid hitting rate limits.\n\n6. After the run, an `eval-YYYY-MM-DD-HH-MM-SS` folder is created containing:\n\n   * `test_queries.json`   – the dataset used with the list of valid queries and corresponding indexed ground truth.\n   * `search_results.json` – per-query search and answer details.\n   * `results_by_category.csv` – aggregated metrics per category and for \"all\".\n   * `search_position_chart.png` – bar-chart of ground-truth ranks.\n\nYou can replace `test_queries.json` with the generated one for a slightly faster loading of the queries the next time around."
  },
  {
    "path": "backend/tests/regression/search_quality/models.py",
    "content": "from pydantic import BaseModel\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SavedSearchDoc\n\n\nclass GroundTruth(BaseModel):\n    doc_source: DocumentSource\n    doc_link: str\n\n\nclass TestQuery(BaseModel):\n    question: str\n    ground_truth: list[GroundTruth] = []\n    ground_truth_response: str | None = None\n    categories: list[str] = []\n\n    # autogenerated\n    ground_truth_docids: list[str] = []\n\n\nclass EvalConfig(BaseModel):\n    max_search_results: int\n    max_answer_context: int\n    num_workers: int  # 0 = unlimited\n    max_request_rate: int  # 0 = unlimited\n    request_timeout: int\n    api_url: str\n    search_only: bool\n\n\nclass OneshotQAResult(BaseModel):\n    time_taken: float\n    top_documents: list[SavedSearchDoc]\n    answer: str | None\n\n\nclass RetrievedDocument(BaseModel):\n    document_id: str\n    chunk_id: int\n    content: str\n\n\nclass AnalysisSummary(BaseModel):\n    question: str\n    categories: list[str]\n    found: bool\n    rank: int | None\n    total_results: int\n    ground_truth_count: int\n    response_relevancy: float | None = None\n    faithfulness: float | None = None\n    factual_correctness: float | None = None\n    answer: str | None = None\n    retrieved: list[RetrievedDocument] = []\n    time_taken: float\n\n\nclass SearchMetrics(BaseModel):\n    total_queries: int\n    found_count: int\n\n    # for found results\n    best_rank: int\n    worst_rank: int\n    average_rank: float\n    top_k_accuracy: dict[int, float]\n\n\nclass AnswerMetrics(BaseModel):\n    response_relevancy: float\n    faithfulness: float\n    factual_correctness: float\n\n    # only for metric computation\n    n_response_relevancy: int\n    n_faithfulness: int\n    n_factual_correctness: int\n\n\nclass CombinedMetrics(SearchMetrics, AnswerMetrics):\n    average_time_taken: float\n"
  },
  {
    "path": "backend/tests/regression/search_quality/run_search_eval.py",
    "content": "import csv\nimport json\nimport os\nimport sys\nimport time\nfrom collections import defaultdict\nfrom concurrent.futures import as_completed\nfrom concurrent.futures import ThreadPoolExecutor\nfrom datetime import datetime\nfrom pathlib import Path\nfrom threading import Event\nfrom threading import Lock\nfrom threading import Semaphore\nfrom typing import cast\n\nimport matplotlib.pyplot as plt\nimport requests\nfrom dotenv import load_dotenv\nfrom matplotlib.patches import Patch\nfrom pydantic import ValidationError\nfrom requests.exceptions import RequestException\nfrom retry import retry\n\n# add onyx/backend to path (since this isn't done automatically when running as a script)\ncurrent_dir = Path(__file__).parent\nonyx_dir = current_dir.parent.parent.parent.parent\nsys.path.append(str(onyx_dir / \"backend\"))\n\n# load env before app_config loads (since env doesn't get loaded when running as a script)\nenv_path = onyx_dir / \".vscode\" / \".env\"\nif not env_path.exists():\n    raise RuntimeError(\n        \"Could not find .env file. Please create one in the root .vscode directory.\"\n    )\nload_dotenv(env_path)\n\n# pylint: disable=E402\n# flake8: noqa: E402\n\nfrom ee.onyx.server.query_and_chat.models import SearchFullResponse\nfrom ee.onyx.server.query_and_chat.models import SendSearchQueryRequest\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW\nfrom onyx.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE\nfrom onyx.context.search.models import BaseFilters\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.utils.logger import setup_logger\nfrom shared_configs.configs import MULTI_TENANT\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\nfrom tests.regression.search_quality.models import AnalysisSummary\nfrom tests.regression.search_quality.models import CombinedMetrics\nfrom tests.regression.search_quality.models import EvalConfig\nfrom tests.regression.search_quality.models import OneshotQAResult\nfrom tests.regression.search_quality.models import TestQuery\nfrom tests.regression.search_quality.utils import compute_overall_scores\nfrom tests.regression.search_quality.utils import find_document_id\nfrom tests.regression.search_quality.utils import get_federated_sources\nfrom tests.regression.search_quality.utils import LazyJsonWriter\nfrom tests.regression.search_quality.utils import ragas_evaluate\nfrom tests.regression.search_quality.utils import search_docs_to_doc_contexts\n\nlogger = setup_logger(__name__)\n\nGENERAL_HEADERS = {\"Content-Type\": \"application/json\"}\nTOP_K_LIST = [1, 3, 5, 10]\n\n\nclass SearchAnswerAnalyzer:\n    def __init__(\n        self,\n        config: EvalConfig,\n        tenant_id: str | None = None,\n    ):\n        if not MULTI_TENANT:\n            logger.info(\"Running in single-tenant mode\")\n            tenant_id = POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\n        elif tenant_id is None:\n            raise ValueError(\"Tenant ID is required for multi-tenant\")\n\n        self.config = config\n        self.tenant_id = tenant_id\n\n        # shared analysis results\n        self._lock = Lock()\n        self._progress_counter = 0\n        self._result_writer: LazyJsonWriter | None = None\n        self.ranks: list[int | None] = []\n        self.metrics: dict[str, CombinedMetrics] = defaultdict(\n            lambda: CombinedMetrics(\n                total_queries=0,\n                found_count=0,\n                best_rank=config.max_search_results,\n                worst_rank=1,\n                average_rank=0.0,\n                top_k_accuracy={k: 0.0 for k in TOP_K_LIST},\n                response_relevancy=0.0,\n                faithfulness=0.0,\n                factual_correctness=0.0,\n                n_response_relevancy=0,\n                n_faithfulness=0,\n                n_factual_correctness=0,\n                average_time_taken=0.0,\n            )\n        )\n\n    def run_analysis(self, dataset_path: Path, export_path: Path) -> None:\n        # load and save the dataset\n        dataset = self._load_dataset(dataset_path)\n        dataset_size = len(dataset)\n        dataset_export_path = export_path / \"test_queries.json\"\n        with dataset_export_path.open(\"w\") as f:\n            dataset_serializable = [q.model_dump(mode=\"json\") for q in dataset]\n            json.dump(dataset_serializable, f, indent=4)\n\n        result_export_path = export_path / \"search_results.json\"\n        self._result_writer = LazyJsonWriter(result_export_path)\n\n        # set up rate limiting and threading primitives\n        interval = (\n            60.0 / self.config.max_request_rate\n            if self.config.max_request_rate > 0\n            else 0.0\n        )\n        available_workers = Semaphore(self.config.num_workers)\n        stop_event = Event()\n\n        def _submit_wrapper(tc: TestQuery) -> AnalysisSummary:\n            try:\n                return self._run_and_analyze_one(tc, dataset_size)\n            except Exception as e:\n                logger.error(\"Error during analysis: %s\", e)\n                stop_event.set()\n                raise\n            finally:\n                available_workers.release()\n\n        # run the analysis\n        logger.info(\"Starting analysis of %d queries\", dataset_size)\n        logger.info(\"Using %d parallel workers\", self.config.num_workers)\n        logger.info(\"Exporting search results to %s\", result_export_path)\n\n        with ThreadPoolExecutor(\n            max_workers=self.config.num_workers or None\n        ) as executor:\n            # submit requests at configured rate, break early if any error occurs\n            futures = []\n            for tc in dataset:\n                if stop_event.is_set():\n                    break\n\n                available_workers.acquire()\n                fut = executor.submit(_submit_wrapper, tc)\n                futures.append(fut)\n\n                if (\n                    len(futures) != dataset_size\n                    and interval > 0\n                    and not stop_event.is_set()\n                ):\n                    time.sleep(interval)\n\n            # ensure all tasks finish and surface any exceptions\n            for fut in as_completed(futures):\n                fut.result()\n\n        if self._result_writer:\n            self._result_writer.close()\n        self._aggregate_metrics()\n\n    def generate_detailed_report(self, export_path: Path) -> None:\n        logger.info(\"Generating detailed report...\")\n\n        csv_path = export_path / \"results_by_category.csv\"\n        with csv_path.open(\"w\", newline=\"\") as csv_file:\n            csv_writer = csv.writer(csv_file)\n            csv_writer.writerow(\n                [\n                    \"category\",\n                    \"total_queries\",\n                    \"found\",\n                    \"percent_found\",\n                    \"best_rank\",\n                    \"worst_rank\",\n                    \"avg_rank\",\n                    *[f\"top_{k}_accuracy\" for k in TOP_K_LIST],\n                    *(\n                        [\n                            \"avg_response_relevancy\",\n                            \"avg_faithfulness\",\n                            \"avg_factual_correctness\",\n                        ]\n                        if not self.config.search_only\n                        else []\n                    ),\n                    \"search_score\",\n                    *([\"answer_score\"] if not self.config.search_only else []),\n                    \"avg_time_taken\",\n                ]\n            )\n\n            for category, metrics in sorted(\n                self.metrics.items(), key=lambda c: (0 if c[0] == \"all\" else 1, c[0])\n            ):\n                found_count = metrics.found_count\n                total_count = metrics.total_queries\n                accuracy = found_count / total_count * 100 if total_count > 0 else 0\n\n                print(\n                    f\"\\n{category.upper()}:  total queries: {total_count}\\n  found: {found_count} ({accuracy:.1f}%)\"\n                )\n                best_rank = metrics.best_rank if metrics.found_count > 0 else None\n                worst_rank = metrics.worst_rank if metrics.found_count > 0 else None\n                avg_rank = metrics.average_rank if metrics.found_count > 0 else None\n                if metrics.found_count > 0:\n                    print(\n                        f\"  average rank (for found results): {avg_rank:.2f}\\n\"\n                        f\"  best rank (for found results): {best_rank:.2f}\\n\"\n                        f\"  worst rank (for found results): {worst_rank:.2f}\"\n                    )\n                    for k, acc in metrics.top_k_accuracy.items():\n                        print(f\"  top-{k} accuracy: {acc:.1f}%\")\n                if not self.config.search_only:\n                    if metrics.n_response_relevancy > 0:\n                        print(\n                            f\"  average response relevancy: {metrics.response_relevancy:.2f}\"\n                        )\n                    if metrics.n_faithfulness > 0:\n                        print(f\"  average faithfulness: {metrics.faithfulness:.2f}\")\n                    if metrics.n_factual_correctness > 0:\n                        print(\n                            f\"  average factual correctness: {metrics.factual_correctness:.2f}\"\n                        )\n                search_score, answer_score = compute_overall_scores(metrics)\n                print(f\"  search score: {search_score:.1f}\")\n                if not self.config.search_only:\n                    print(f\"  answer score: {answer_score:.1f}\")\n                print(f\"  average time taken: {metrics.average_time_taken:.2f}s\")\n\n                csv_writer.writerow(\n                    [\n                        category,\n                        total_count,\n                        found_count,\n                        f\"{accuracy:.1f}\",\n                        best_rank or \"\",\n                        worst_rank or \"\",\n                        f\"{avg_rank:.2f}\" if avg_rank is not None else \"\",\n                        *[f\"{acc:.1f}\" for acc in metrics.top_k_accuracy.values()],\n                        *(\n                            [\n                                (\n                                    f\"{metrics.response_relevancy:.2f}\"\n                                    if metrics.n_response_relevancy > 0\n                                    else \"\"\n                                ),\n                                (\n                                    f\"{metrics.faithfulness:.2f}\"\n                                    if metrics.n_faithfulness > 0\n                                    else \"\"\n                                ),\n                                (\n                                    f\"{metrics.factual_correctness:.2f}\"\n                                    if metrics.n_factual_correctness > 0\n                                    else \"\"\n                                ),\n                            ]\n                            if not self.config.search_only\n                            else []\n                        ),\n                        f\"{search_score:.1f}\",\n                        *(\n                            [f\"{answer_score:.1f}\"]\n                            if not self.config.search_only\n                            else []\n                        ),\n                        f\"{metrics.average_time_taken:.2f}\",\n                    ]\n                )\n        logger.info(\"Saved category breakdown csv to %s\", csv_path)\n\n    def generate_chart(self, export_path: Path) -> None:\n        logger.info(\"Generating search position chart...\")\n\n        if len(self.ranks) == 0:\n            logger.warning(\"No results to chart\")\n            return\n\n        found_count = 0\n        not_found_count = 0\n        rank_counts: dict[int, int] = defaultdict(int)\n        for rank in self.ranks:\n            if rank is None:\n                not_found_count += 1\n            else:\n                found_count += 1\n                rank_counts[rank] += 1\n\n        # create the data for plotting\n        if found_count:\n            max_rank = max(rank_counts.keys())\n            positions = list(range(1, max_rank + 1))\n            counts = [rank_counts.get(pos, 0) for pos in positions]\n        else:\n            positions = []\n            counts = []\n\n        # add the \"not found\" bar on the far right\n        if not_found_count:\n            # add some spacing between found positions and \"not found\"\n            not_found_position = (max(positions) + 2) if positions else 1\n            positions.append(not_found_position)\n            counts.append(not_found_count)\n\n            # create labels for x-axis\n            x_labels = [str(pos) for pos in positions[:-1]] + [\n                f\"not found\\n(>{self.config.max_search_results})\"\n            ]\n        else:\n            x_labels = [str(pos) for pos in positions]\n\n        # create the figure and bar chart\n        plt.figure(figsize=(14, 6))\n\n        # use different colors for found vs not found\n        colors = (\n            [\"#3498db\"] * (len(positions) - 1) + [\"#e74c3c\"]\n            if not_found_count > 0\n            else [\"#3498db\"] * len(positions)\n        )\n        bars = plt.bar(\n            positions, counts, color=colors, alpha=0.7, edgecolor=\"black\", linewidth=0.5\n        )\n\n        # customize the chart\n        plt.xlabel(\"Position in Search Results\", fontsize=12)\n        plt.ylabel(\"Number of Ground Truth Documents\", fontsize=12)\n        plt.title(\n            \"Ground Truth Document Positions in Search Results\",\n            fontsize=14,\n            fontweight=\"bold\",\n        )\n        plt.grid(axis=\"y\", alpha=0.3)\n\n        # add value labels on top of each bar\n        for bar, count in zip(bars, counts):\n            if count > 0:\n                plt.text(\n                    bar.get_x() + bar.get_width() / 2,\n                    bar.get_height() + 0.1,\n                    str(count),\n                    ha=\"center\",\n                    va=\"bottom\",\n                    fontweight=\"bold\",\n                )\n\n        # set x-axis labels\n        plt.xticks(positions, x_labels, rotation=45 if not_found_count > 0 else 0)\n\n        # add legend if we have both found and not found\n        if not_found_count and found_count:\n            legend_elements = [\n                Patch(facecolor=\"#3498db\", alpha=0.7, label=\"Found in Results\"),\n                Patch(facecolor=\"#e74c3c\", alpha=0.7, label=\"Not Found\"),\n            ]\n            plt.legend(handles=legend_elements, loc=\"upper right\")\n\n        # make layout tight and save\n        plt.tight_layout()\n        chart_file = export_path / \"search_position_chart.png\"\n        plt.savefig(chart_file, dpi=300, bbox_inches=\"tight\")\n        logger.info(\"Search position chart saved to: %s\", chart_file)\n        plt.show()\n\n    def _load_dataset(self, dataset_path: Path) -> list[TestQuery]:\n        \"\"\"Load the test dataset from a JSON file and validate the ground truth documents.\"\"\"\n        with dataset_path.open(\"r\") as f:\n            dataset_raw: list[dict] = json.load(f)\n\n        with get_session_with_tenant(tenant_id=self.tenant_id) as db_session:\n            federated_sources = get_federated_sources(db_session)\n\n        dataset: list[TestQuery] = []\n        for datum in dataset_raw:\n            # validate the raw datum\n            try:\n                test_query = TestQuery(**datum)\n            except ValidationError as e:\n                logger.error(\"Incorrectly formatted query %s: %s\", datum, e)\n                continue\n\n            # in case the dataset was copied from the previous run export\n            if test_query.ground_truth_docids:\n                dataset.append(test_query)\n                continue\n\n            # validate and get the ground truth documents\n            with get_session_with_tenant(tenant_id=self.tenant_id) as db_session:\n                for ground_truth in test_query.ground_truth:\n                    if (\n                        doc_id := find_document_id(\n                            ground_truth, federated_sources, db_session\n                        )\n                    ) is not None:\n                        test_query.ground_truth_docids.append(doc_id)\n\n            if len(test_query.ground_truth_docids) == 0:\n                logger.warning(\n                    \"No ground truth documents found for query: %s, skipping...\",\n                    test_query.question,\n                )\n                continue\n\n            dataset.append(test_query)\n\n        return dataset\n\n    @retry(tries=3, delay=1, backoff=2)\n    def _perform_search(self, query: str) -> OneshotQAResult:\n        \"\"\"Perform a document search query against the Onyx API and time it.\"\"\"\n        # create the search request\n        filters = BaseFilters()\n        search_request = SendSearchQueryRequest(\n            search_query=query,\n            filters=filters,\n            num_docs_fed_to_llm_selection=self.config.max_search_results,\n            run_query_expansion=False,\n            stream=False,\n        )\n\n        # send the request\n        response = None\n        try:\n            request_data = search_request.model_dump()\n            headers = GENERAL_HEADERS.copy()\n            # Add API key if present\n            if os.environ.get(\"ONYX_API_KEY\"):\n                headers[\"Authorization\"] = f\"Bearer {os.environ.get('ONYX_API_KEY')}\"\n\n            start_time = time.monotonic()\n            response = requests.post(\n                url=f\"{self.config.api_url}/search/send-search-message\",\n                json=request_data,\n                headers=headers,\n                timeout=self.config.request_timeout,\n            )\n            time_taken = time.monotonic() - start_time\n            response.raise_for_status()\n            result = SearchFullResponse.model_validate(response.json())\n\n            # extract documents from the search response\n            if result.search_docs:\n                top_documents = [\n                    SavedSearchDoc.from_search_doc(doc)\n                    for doc in result.search_docs[: self.config.max_search_results]\n                ]\n                return OneshotQAResult(\n                    time_taken=time_taken,\n                    top_documents=top_documents,\n                    answer=None,  # search endpoint doesn't generate answers\n                )\n        except RequestException as e:\n            raise RuntimeError(\n                f\"Search failed for query '{query}': {e}. Response: {response.json()}\"\n                if response\n                else \"\"\n            )\n        raise RuntimeError(f\"Search returned no documents for query {query}\")\n\n    def _run_and_analyze_one(self, test_case: TestQuery, total: int) -> AnalysisSummary:\n        result = self._perform_search(test_case.question)\n\n        # compute rank\n        rank = None\n        found = False\n        ground_truths = set(test_case.ground_truth_docids)\n        for i, doc in enumerate(result.top_documents, 1):\n            if doc.document_id in ground_truths:\n                rank = i\n                found = True\n                break\n\n        # print search progress and result\n        with self._lock:\n            self._progress_counter += 1\n            completed = self._progress_counter\n            status = \"✓ Found\" if found else \"✗ Not found\"\n            rank_info = f\" (rank {rank})\" if found else \"\"\n            question_snippet = (\n                test_case.question[:50] + \"...\"\n                if len(test_case.question) > 50\n                else test_case.question\n            )\n            print(f\"[{completed}/{total}] {status}{rank_info}: {question_snippet}\")\n\n        # get the search contents\n        retrieved = search_docs_to_doc_contexts(result.top_documents, self.tenant_id)\n\n        # do answer evaluation\n        response_relevancy: float | None = None\n        faithfulness: float | None = None\n        factual_correctness: float | None = None\n        contexts = [c.content for c in retrieved[: self.config.max_answer_context]]\n        if not self.config.search_only:\n            if result.answer is None:\n                logger.error(\n                    \"No answer found for query: %s, skipping answer evaluation\",\n                    test_case.question,\n                )\n            else:\n                try:\n                    ragas_result = ragas_evaluate(\n                        question=test_case.question,\n                        answer=result.answer,\n                        contexts=contexts,\n                        reference_answer=test_case.ground_truth_response,\n                    ).scores[0]\n                    response_relevancy = ragas_result[\"answer_relevancy\"]\n                    faithfulness = ragas_result[\"faithfulness\"]\n                    factual_correctness = ragas_result.get(\n                        \"factual_correctness(mode=recall)\"\n                    )\n                except Exception as e:\n                    logger.error(\n                        \"Error evaluating answer for query %s: %s\",\n                        test_case.question,\n                        e,\n                    )\n\n        # save results\n        analysis = AnalysisSummary(\n            question=test_case.question,\n            categories=test_case.categories,\n            found=found,\n            rank=rank,\n            total_results=len(result.top_documents),\n            ground_truth_count=len(test_case.ground_truth_docids),\n            answer=result.answer,\n            response_relevancy=response_relevancy,\n            faithfulness=faithfulness,\n            factual_correctness=factual_correctness,\n            retrieved=retrieved,\n            time_taken=result.time_taken,\n        )\n        with self._lock:\n            self.ranks.append(analysis.rank)\n            if self._result_writer:\n                self._result_writer.append(analysis.model_dump(mode=\"json\"))\n            self._update_metrics(analysis)\n\n        return analysis\n\n    def _update_metrics(self, result: AnalysisSummary) -> None:\n        for cat in result.categories + [\"all\"]:\n            self.metrics[cat].total_queries += 1\n            self.metrics[cat].average_time_taken += result.time_taken\n\n            if result.found:\n                self.metrics[cat].found_count += 1\n\n                rank = cast(int, result.rank)\n                self.metrics[cat].best_rank = min(self.metrics[cat].best_rank, rank)\n                self.metrics[cat].worst_rank = max(self.metrics[cat].worst_rank, rank)\n                self.metrics[cat].average_rank += rank\n                for k in TOP_K_LIST:\n                    self.metrics[cat].top_k_accuracy[k] += int(rank <= k)\n\n            if self.config.search_only:\n                continue\n            if result.response_relevancy is not None:\n                self.metrics[cat].response_relevancy += result.response_relevancy\n                self.metrics[cat].n_response_relevancy += 1\n            if result.faithfulness is not None:\n                self.metrics[cat].faithfulness += result.faithfulness\n                self.metrics[cat].n_faithfulness += 1\n            if result.factual_correctness is not None:\n                self.metrics[cat].factual_correctness += result.factual_correctness\n                self.metrics[cat].n_factual_correctness += 1\n\n    def _aggregate_metrics(self) -> None:\n        for cat in self.metrics:\n            total = self.metrics[cat].total_queries\n            self.metrics[cat].average_time_taken /= total\n\n            if self.metrics[cat].found_count > 0:\n                self.metrics[cat].average_rank /= self.metrics[cat].found_count\n            for k in TOP_K_LIST:\n                self.metrics[cat].top_k_accuracy[k] /= total\n                self.metrics[cat].top_k_accuracy[k] *= 100\n\n            if self.config.search_only:\n                continue\n            if (n := self.metrics[cat].n_response_relevancy) > 0:\n                self.metrics[cat].response_relevancy /= n\n            if (n := self.metrics[cat].n_faithfulness) > 0:\n                self.metrics[cat].faithfulness /= n\n            if (n := self.metrics[cat].n_factual_correctness) > 0:\n                self.metrics[cat].factual_correctness /= n\n\n\ndef run_search_eval(\n    dataset_path: Path,\n    config: EvalConfig,\n    tenant_id: str | None,\n) -> None:\n    # check openai api key is set if doing answer eval (must be called that for ragas to recognize)\n    if not config.search_only and not os.environ.get(\"OPENAI_API_KEY\"):\n        raise RuntimeError(\n            \"OPENAI_API_KEY is required for answer evaluation. Please add it to the root .vscode/.env file.\"\n        )\n\n    # check onyx api key is set (auth is always required)\n    if not os.environ.get(\"ONYX_API_KEY\"):\n        raise RuntimeError(\n            \"ONYX_API_KEY is required. Please create one in the admin panel and add it to the root .vscode/.env file.\"\n        )\n\n    # check onyx is running\n    try:\n        response = requests.get(\n            f\"{config.api_url}/health\", timeout=config.request_timeout\n        )\n        response.raise_for_status()\n    except RequestException as e:\n        raise RuntimeError(f\"Could not connect to Onyx API: {e}\")\n\n    # create the export folder\n    export_folder = current_dir / datetime.now().strftime(\"eval-%Y-%m-%d-%H-%M-%S\")\n    export_path = Path(export_folder)\n    export_path.mkdir(parents=True, exist_ok=True)\n    logger.info(\"Created export folder: %s\", export_path)\n\n    # run the search eval\n    analyzer = SearchAnswerAnalyzer(config=config, tenant_id=tenant_id)\n    analyzer.run_analysis(dataset_path, export_path)\n    analyzer.generate_detailed_report(export_path)\n    analyzer.generate_chart(export_path)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    current_dir = Path(__file__).parent\n    parser = argparse.ArgumentParser(description=\"Run search quality evaluation.\")\n    parser.add_argument(\n        \"-d\",\n        \"--dataset\",\n        type=Path,\n        default=current_dir / \"test_queries.json\",\n        help=\"Path to the test-set JSON file (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-n\",\n        \"--num_search\",\n        type=int,\n        default=50,\n        help=\"Maximum number of documents to retrieve per search (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-a\",\n        \"--num_answer\",\n        type=int,\n        default=25,\n        help=\"Maximum number of documents to use for answer evaluation (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--max_workers\",\n        type=int,\n        default=10,\n        help=\"Maximum number of concurrent search requests (0 = unlimited, default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-r\",\n        \"--max_req_rate\",\n        type=int,\n        default=0,\n        help=\"Maximum number of search requests per minute (0 = unlimited, default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-q\",\n        \"--timeout\",\n        type=int,\n        default=120,\n        help=\"Request timeout in seconds (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-e\",\n        \"--api_endpoint\",\n        type=str,\n        default=\"http://127.0.0.1:8080\",\n        help=\"Base URL of the Onyx API server (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-s\",\n        \"--search_only\",\n        action=\"store_true\",\n        default=False,\n        help=\"Only perform search and not answer evaluation (default: %(default)s).\",\n    )\n    parser.add_argument(\n        \"-t\",\n        \"--tenant_id\",\n        type=str,\n        default=None,\n        help=\"Tenant ID to use for the evaluation (default: %(default)s).\",\n    )\n\n    args = parser.parse_args()\n\n    SqlEngine.init_engine(\n        pool_size=POSTGRES_API_SERVER_POOL_SIZE,\n        max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,\n    )\n\n    try:\n        run_search_eval(\n            args.dataset,\n            EvalConfig(\n                max_search_results=args.num_search,\n                max_answer_context=args.num_answer,\n                num_workers=args.max_workers,\n                max_request_rate=args.max_req_rate,\n                request_timeout=args.timeout,\n                api_url=args.api_endpoint,\n                search_only=args.search_only,\n            ),\n            args.tenant_id,\n        )\n    except Exception as e:\n        logger.error(\"Unexpected error during search evaluation: %s\", e)\n        raise\n    finally:\n        SqlEngine.reset_engine()\n"
  },
  {
    "path": "backend/tests/regression/search_quality/test_queries.json.template",
    "content": "[\n    {\n        \"question\": \"What is Onyx?\",\n        \"ground_truth\": [\n            {\n                \"doc_source\": \"web\",\n                \"doc_link\": \"https://docs.onyx.app/welcome\"\n            }\n        ],\n        \"categories\": [\n            \"keyword\",\n            \"broad\",\n            \"easy\"\n        ]\n    }\n]"
  },
  {
    "path": "backend/tests/regression/search_quality/utils.py",
    "content": "import json\nimport re\nfrom pathlib import Path\nfrom textwrap import indent\nfrom typing import Any\nfrom typing import cast\nfrom typing import TextIO\n\nfrom ragas import evaluate  # type: ignore[import-not-found,unused-ignore]\nfrom ragas import EvaluationDataset  # type: ignore[import-not-found,unused-ignore]\nfrom ragas import SingleTurnSample  # type: ignore[import-not-found,unused-ignore]\nfrom ragas.dataset_schema import EvaluationResult  # type: ignore[import-not-found,unused-ignore]\nfrom ragas.metrics import FactualCorrectness  # type: ignore[import-not-found,unused-ignore]\nfrom ragas.metrics import Faithfulness  # type: ignore[import-not-found,unused-ignore]\nfrom ragas.metrics import ResponseRelevancy  # type: ignore[import-not-found,unused-ignore]\nfrom sqlalchemy.orm import Session\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.db.engine.sql_engine import get_session_with_tenant\nfrom onyx.db.models import Document\nfrom onyx.db.models import FederatedConnector\nfrom onyx.db.search_settings import get_current_search_settings\nfrom onyx.document_index.factory import get_default_document_index\nfrom onyx.document_index.interfaces import VespaChunkRequest\nfrom onyx.prompts.prompt_utils import build_doc_context_str\nfrom onyx.utils.logger import setup_logger\nfrom tests.regression.search_quality.models import CombinedMetrics\nfrom tests.regression.search_quality.models import GroundTruth\nfrom tests.regression.search_quality.models import RetrievedDocument\n\nlogger = setup_logger(__name__)\n\n\ndef get_federated_sources(db_session: Session) -> set[DocumentSource]:\n    \"\"\"Get all federated sources from the database.\"\"\"\n    return {\n        source\n        for connector in db_session.query(FederatedConnector).all()\n        if (source := connector.source.to_non_federated_source()) is not None\n    }\n\n\ndef find_document_id(\n    ground_truth: GroundTruth,\n    federated_sources: set[DocumentSource],\n    db_session: Session,\n) -> str | None:\n    \"\"\"Find a document by its link and return its id if found.\"\"\"\n    # handle federated sources TODO: maybe make handler dictionary by source if this gets complex\n    if ground_truth.doc_source in federated_sources:\n        if ground_truth.doc_source == DocumentSource.SLACK:\n            groups = re.search(\n                r\"archives\\/([A-Z0-9]+)\\/p([0-9]+)\", ground_truth.doc_link\n            )\n            if groups:\n                channel_id = groups.group(1)\n                message_id = groups.group(2)\n                return f\"{channel_id}__{message_id[:-6]}.{message_id[-6:]}\"\n\n    # preprocess links\n    doc_link = ground_truth.doc_link\n    if ground_truth.doc_source == DocumentSource.GOOGLE_DRIVE:\n        if \"/edit\" in doc_link:\n            doc_link = doc_link.split(\"/edit\", 1)[0]\n        elif \"/view\" in doc_link:\n            doc_link = doc_link.split(\"/view\", 1)[0]\n    elif ground_truth.doc_source == DocumentSource.FIREFLIES:\n        doc_link = doc_link.split(\"?\", 1)[0]\n\n    docs = db_session.query(Document).filter(Document.link.ilike(f\"{doc_link}%\")).all()\n    if len(docs) == 0:\n        logger.warning(\"Could not find ground truth document: %s\", doc_link)\n        return None\n    elif len(docs) > 1:\n        logger.warning(\n            \"Found multiple ground truth documents: %s, using the first one: %s\",\n            doc_link,\n            docs[0].id,\n        )\n    return docs[0].id\n\n\ndef get_doc_contents(\n    docs: list[SavedSearchDoc], tenant_id: str\n) -> dict[tuple[str, int], str]:\n    with get_session_with_tenant(tenant_id=tenant_id) as db_session:\n        search_settings = get_current_search_settings(db_session)\n        document_index = get_default_document_index(search_settings, None, db_session)\n\n    filters = IndexFilters(access_control_list=None, tenant_id=tenant_id)\n\n    reqs: list[VespaChunkRequest] = [\n        VespaChunkRequest(\n            document_id=doc.document_id,\n            min_chunk_ind=doc.chunk_ind,\n            max_chunk_ind=doc.chunk_ind,\n        )\n        for doc in docs\n    ]\n\n    results = document_index.id_based_retrieval(chunk_requests=reqs, filters=filters)\n    return {(doc.document_id, doc.chunk_id): doc.content for doc in results}\n\n\ndef search_docs_to_doc_contexts(\n    docs: list[SavedSearchDoc], tenant_id: str\n) -> list[RetrievedDocument]:\n    try:\n        doc_contents = get_doc_contents(docs, tenant_id)\n    except Exception as e:\n        logger.error(\"Error getting doc contents: %s\", e)\n        doc_contents = {}\n\n    return [\n        RetrievedDocument(\n            document_id=doc.document_id,\n            chunk_id=doc.chunk_ind,\n            content=build_doc_context_str(\n                semantic_identifier=doc.semantic_identifier,\n                source_type=doc.source_type,\n                content=doc_contents.get(\n                    (doc.document_id, doc.chunk_ind), f\"Blurb: {doc.blurb}\"\n                ),\n                metadata_dict=doc.metadata,\n                updated_at=doc.updated_at,\n                ind=ind,\n                include_metadata=True,\n            ),\n        )\n        for ind, doc in enumerate(docs)\n    ]\n\n\ndef ragas_evaluate(\n    question: str, answer: str, contexts: list[str], reference_answer: str | None = None\n) -> EvaluationResult:\n    sample = SingleTurnSample(\n        user_input=question,\n        retrieved_contexts=contexts,\n        response=answer,\n        reference=reference_answer,\n    )\n    dataset = EvaluationDataset([sample])\n    return cast(\n        EvaluationResult,\n        evaluate(\n            dataset,\n            metrics=[\n                ResponseRelevancy(),\n                Faithfulness(),\n                *(\n                    [FactualCorrectness(mode=\"recall\")]\n                    if reference_answer is not None\n                    else []\n                ),\n            ],\n        ),\n    )\n\n\ndef compute_overall_scores(metrics: CombinedMetrics) -> tuple[float, float]:\n    \"\"\"Compute the overall search and answer quality scores.\n    The scores are subjective and may require tuning.\"\"\"\n    # search score\n    FOUND_RATIO_WEIGHT = 0.4\n    TOP_IMPORTANCE = 0.7  # 0-inf, how important is it to be no. 1 over other ranks\n\n    found_ratio = metrics.found_count / metrics.total_queries\n    sum_k = sum(1.0 / pow(k, TOP_IMPORTANCE) for k in metrics.top_k_accuracy)\n    weighted_topk = sum(\n        acc / (pow(k, TOP_IMPORTANCE) * sum_k * 100)\n        for k, acc in metrics.top_k_accuracy.items()\n    )\n    search_score = 100 * (\n        FOUND_RATIO_WEIGHT * found_ratio + (1.0 - FOUND_RATIO_WEIGHT) * weighted_topk\n    )\n\n    # answer score\n    mets = [\n        *([metrics.response_relevancy] if metrics.n_response_relevancy > 0 else []),\n        *([metrics.faithfulness] if metrics.n_faithfulness > 0 else []),\n        *([metrics.factual_correctness] if metrics.n_factual_correctness > 0 else []),\n    ]\n    answer_score = 100 * sum(mets) / len(mets) if mets else 0.0\n\n    return search_score, answer_score\n\n\nclass LazyJsonWriter:\n    def __init__(self, filepath: Path, indent: int = 4) -> None:\n        self.filepath = filepath\n        self.file: TextIO | None = None\n        self.indent = indent\n\n    def append(self, serializable_item: dict[str, Any]) -> None:\n        if not self.file:\n            self.file = open(self.filepath, \"a\")\n            self.file.write(\"[\\n\")\n        else:\n            self.file.write(\",\\n\")\n\n        data = json.dumps(serializable_item, indent=self.indent)\n        self.file.write(indent(data, \" \" * self.indent))\n\n    def close(self) -> None:\n        if not self.file:\n            return\n        self.file.write(\"\\n]\")\n        self.file.close()\n        self.file = None\n"
  },
  {
    "path": "backend/tests/unit/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/build/test_rewrite_asset_paths.py",
    "content": "\"\"\"Unit tests for webapp proxy path rewriting/injection.\"\"\"\n\nfrom types import SimpleNamespace\nfrom typing import cast\nfrom typing import Literal\nfrom uuid import UUID\n\nimport httpx\nimport pytest\nfrom fastapi import Request\nfrom sqlalchemy.orm import Session\n\nfrom onyx.server.features.build.api import api\nfrom onyx.server.features.build.api.api import _inject_hmr_fixer\nfrom onyx.server.features.build.api.api import _rewrite_asset_paths\nfrom onyx.server.features.build.api.api import _rewrite_proxy_response_headers\n\nSESSION_ID = \"aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\nBASE = f\"/api/build/sessions/{SESSION_ID}/webapp\"\n\n\ndef rewrite(html: str) -> str:\n    return _rewrite_asset_paths(html.encode(), SESSION_ID).decode()\n\n\ndef inject(html: str) -> str:\n    return _inject_hmr_fixer(html.encode(), SESSION_ID).decode()\n\n\nclass TestNextjsPathRewriting:\n    def test_rewrites_bare_next_script_src(self) -> None:\n        html = '<script src=\"/_next/static/chunks/main.js\">'\n        result = rewrite(html)\n        assert f'src=\"{BASE}/_next/static/chunks/main.js\"' in result\n        assert '\"/_next/' not in result\n\n    def test_rewrites_bare_next_in_single_quotes(self) -> None:\n        html = \"<link href='/_next/static/css/app.css'>\"\n        result = rewrite(html)\n        assert f\"'{BASE}/_next/static/css/app.css'\" in result\n\n    def test_rewrites_bare_next_in_url_parens(self) -> None:\n        html = \"background: url(/_next/static/media/font.woff2)\"\n        result = rewrite(html)\n        assert f\"url({BASE}/_next/static/media/font.woff2)\" in result\n\n    def test_no_double_prefix_when_already_proxied(self) -> None:\n        \"\"\"assetPrefix makes Next.js emit already-prefixed URLs — must not double-rewrite.\"\"\"\n        already_prefixed = f'<script src=\"{BASE}/_next/static/chunks/main.js\">'\n        result = rewrite(already_prefixed)\n        # Should be unchanged\n        assert result == already_prefixed\n        # Specifically, no double path\n        assert f\"{BASE}/{BASE}\" not in result\n\n    def test_rewrites_favicon(self) -> None:\n        html = '<link rel=\"icon\" href=\"/favicon.ico\">'\n        result = rewrite(html)\n        assert f'\"{BASE}/favicon.ico\"' in result\n\n    def test_rewrites_json_data_path_double_quoted(self) -> None:\n        html = 'fetch(\"/data/tickets.json\")'\n        result = rewrite(html)\n        assert f'\"{BASE}/data/tickets.json\"' in result\n\n    def test_rewrites_json_data_path_single_quoted(self) -> None:\n        html = \"fetch('/data/items.json')\"\n        result = rewrite(html)\n        assert f\"'{BASE}/data/items.json'\" in result\n\n    def test_rewrites_escaped_next_font_path_in_json_script(self) -> None:\n        \"\"\"Next dev can embed font asset paths in JSON-escaped script payloads.\"\"\"\n        html = r'{\"src\":\"\\/_next\\/static\\/media\\/font.woff2\"}'\n        result = rewrite(html)\n        assert (\n            r'{\"src\":\"\\/api\\/build\\/sessions\\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\\/webapp\\/_next\\/static\\/media\\/font.woff2\"}'\n            in result\n        )\n\n    def test_rewrites_escaped_next_font_path_in_style_payload(self) -> None:\n        \"\"\"Keep dynamically generated next/font URLs inside the session proxy.\"\"\"\n        html = r'{\"css\":\"@font-face{src:url(\\\"\\/_next\\/static\\/media\\/font.woff2\\\")\"}'\n        result = rewrite(html)\n        assert (\n            r\"\\/api\\/build\\/sessions\\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\\/webapp\\/_next\\/static\\/media\\/font.woff2\"\n            in result\n        )\n\n    def test_rewrites_absolute_next_font_url(self) -> None:\n        html = '<link rel=\"preload\" as=\"font\" href=\"https://craft-dev.onyx.app/_next/static/media/font.woff2\">'\n        result = rewrite(html)\n        assert f'\"{BASE}/_next/static/media/font.woff2\"' in result\n\n    def test_rewrites_root_hmr_path(self) -> None:\n        html = 'new WebSocket(\"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc\")'\n        result = rewrite(html)\n        assert '\"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc\"' not in result\n        assert '\"/_next/webpack-hmr?id=abc\"' in result\n\n    def test_rewrites_escaped_absolute_next_font_url(self) -> None:\n        html = (\n            r'{\"href\":\"https:\\/\\/craft-dev.onyx.app\\/_next\\/static\\/media\\/font.woff2\"}'\n        )\n        result = rewrite(html)\n        assert (\n            r'{\"href\":\"\\/api\\/build\\/sessions\\/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\\/webapp\\/_next\\/static\\/media\\/font.woff2\"}'\n            in result\n        )\n\n\nclass TestRuntimeFixerInjection:\n    def test_injects_websocket_rewrite_shim(self) -> None:\n        html = \"<html><head></head><body></body></html>\"\n        result = inject(html)\n        assert \"window.WebSocket = function (url, protocols)\" in result\n        assert f'var WEBAPP_BASE = \"{BASE}\"' in result\n\n    def test_injects_hmr_websocket_stub(self) -> None:\n        html = \"<html><head></head><body></body></html>\"\n        result = inject(html)\n        assert \"function MockHmrWebSocket(url)\" in result\n        assert \"return new MockHmrWebSocket(rewriteNextAssetUrl(url));\" in result\n\n    def test_injects_before_head_contents(self) -> None:\n        html = \"<html><head><title>x</title></head><body></body></html>\"\n        result = inject(html)\n        assert result.index(\n            \"window.WebSocket = function (url, protocols)\"\n        ) < result.index(\"<title>x</title>\")\n\n    def test_rewritten_hmr_url_still_matches_shim_intercept_logic(self) -> None:\n        html = '<html><head></head><body>new WebSocket(\"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc\")</body></html>'\n\n        rewritten = rewrite(html)\n        assert '\"wss://craft-dev.onyx.app/_next/webpack-hmr?id=abc\"' not in rewritten\n        assert 'new WebSocket(\"/_next/webpack-hmr?id=abc\")' in rewritten\n\n        injected = inject(rewritten)\n\n        assert 'new WebSocket(\"/_next/webpack-hmr?id=abc\")' in injected\n        assert 'parsedUrl.pathname.indexOf(\"/_next/webpack-hmr\") === 0' in injected\n\n\nclass TestProxyHeaderRewriting:\n    def test_rewrites_link_header_font_preload_paths(self) -> None:\n        headers = {\n            \"link\": (\n                '</_next/static/media/font.woff2>; rel=preload; as=\"font\"; crossorigin, '\n                '</_next/static/media/font2.woff2>; rel=preload; as=\"font\"; crossorigin'\n            )\n        }\n\n        result = _rewrite_proxy_response_headers(headers, SESSION_ID)\n\n        assert f\"<{BASE}/_next/static/media/font.woff2>\" in result[\"link\"]\n\n\nclass TestProxyRequestWiring:\n    def test_proxy_request_rewrites_link_header_on_html_response(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        html = b\"<html><head></head><body>ok</body></html>\"\n        upstream = httpx.Response(\n            200,\n            headers={\n                \"content-type\": \"text/html; charset=utf-8\",\n                \"link\": '</_next/static/media/font.woff2>; rel=preload; as=\"font\"',\n            },\n            content=html,\n        )\n\n        monkeypatch.setattr(api, \"_get_sandbox_url\", lambda *_args: \"http://sandbox\")\n\n        class FakeClient:\n            def __init__(self, *_args: object, **_kwargs: object) -> None:\n                pass\n\n            def __enter__(self) -> \"FakeClient\":\n                return self\n\n            def __exit__(self, *_args: object) -> Literal[False]:\n                return False\n\n            def get(self, _url: str, headers: dict[str, str]) -> httpx.Response:\n                assert \"host\" not in {key.lower() for key in headers}\n                return upstream\n\n        monkeypatch.setattr(api.httpx, \"Client\", FakeClient)\n\n        request = cast(Request, SimpleNamespace(headers={}, query_params=\"\"))\n\n        response = api._proxy_request(\n            \"\", request, UUID(SESSION_ID), cast(Session, SimpleNamespace())\n        )\n\n        assert response.headers[\"link\"] == (\n            f'<{BASE}/_next/static/media/font.woff2>; rel=preload; as=\"font\"'\n        )\n\n    def test_proxy_request_injects_hmr_fixer_for_html_response(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        upstream = httpx.Response(\n            200,\n            headers={\"content-type\": \"text/html; charset=utf-8\"},\n            content=b\"<html><head><title>x</title></head><body></body></html>\",\n        )\n\n        monkeypatch.setattr(api, \"_get_sandbox_url\", lambda *_args: \"http://sandbox\")\n\n        class FakeClient:\n            def __init__(self, *_args: object, **_kwargs: object) -> None:\n                pass\n\n            def __enter__(self) -> \"FakeClient\":\n                return self\n\n            def __exit__(self, *_args: object) -> Literal[False]:\n                return False\n\n            def get(self, _url: str, headers: dict[str, str]) -> httpx.Response:\n                assert \"host\" not in {key.lower() for key in headers}\n                return upstream\n\n        monkeypatch.setattr(api.httpx, \"Client\", FakeClient)\n\n        request = cast(Request, SimpleNamespace(headers={}, query_params=\"\"))\n\n        response = api._proxy_request(\n            \"\", request, UUID(SESSION_ID), cast(Session, SimpleNamespace())\n        )\n        body = cast(bytes, response.body).decode(\"utf-8\")\n\n        assert \"window.WebSocket = function (url, protocols)\" in body\n        assert body.index(\"window.WebSocket = function (url, protocols)\") < body.index(\n            \"<title>x</title>\"\n        )\n\n    def test_rewrites_absolute_link_header_font_preload_paths(self) -> None:\n        headers = {\n            \"link\": (\n                '<https://craft-dev.onyx.app/_next/static/media/font.woff2>; rel=preload; as=\"font\"; crossorigin'\n            )\n        }\n\n        result = _rewrite_proxy_response_headers(headers, SESSION_ID)\n\n        assert f\"<{BASE}/_next/static/media/font.woff2>\" in result[\"link\"]\n"
  },
  {
    "path": "backend/tests/unit/ee/conftest.py",
    "content": "\"\"\"Auto-enable EE mode for all tests under tests/unit/ee/.\"\"\"\n\nimport pytest\n\n\n@pytest.fixture(autouse=True)\ndef _enable_ee_for_directory(enable_ee: None) -> None:\n    \"\"\"Wraps the shared enable_ee fixture with autouse for this directory.\"\"\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/db/test_license.py",
    "content": "\"\"\"Tests for license database CRUD operations.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom ee.onyx.db.license import check_seat_availability\nfrom ee.onyx.db.license import delete_license\nfrom ee.onyx.db.license import get_license\nfrom ee.onyx.db.license import upsert_license\nfrom ee.onyx.server.license.models import LicenseMetadata\nfrom ee.onyx.server.license.models import LicenseSource\nfrom ee.onyx.server.license.models import PlanType\nfrom onyx.db.models import License\nfrom onyx.server.settings.models import ApplicationStatus\n\n\nclass TestGetLicense:\n    \"\"\"Tests for get_license function.\"\"\"\n\n    def test_get_existing_license(self) -> None:\n        \"\"\"Test getting an existing license.\"\"\"\n        mock_session = MagicMock()\n        mock_license = License(id=1, license_data=\"test_data\")\n\n        # Mock the query chain\n        mock_session.execute.return_value.scalars.return_value.first.return_value = (\n            mock_license\n        )\n\n        result = get_license(mock_session)\n\n        assert result is not None\n        assert result.license_data == \"test_data\"\n        mock_session.execute.assert_called_once()\n\n    def test_get_no_license(self) -> None:\n        \"\"\"Test getting when no license exists.\"\"\"\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalars.return_value.first.return_value = None\n\n        result = get_license(mock_session)\n\n        assert result is None\n\n\nclass TestUpsertLicense:\n    \"\"\"Tests for upsert_license function.\"\"\"\n\n    def test_insert_new_license(self) -> None:\n        \"\"\"Test inserting a new license when none exists.\"\"\"\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalars.return_value.first.return_value = None\n\n        upsert_license(mock_session, \"new_license_data\")\n\n        # Verify add was called with a License object\n        mock_session.add.assert_called_once()\n        added_license = mock_session.add.call_args[0][0]\n        assert isinstance(added_license, License)\n        assert added_license.license_data == \"new_license_data\"\n\n        mock_session.commit.assert_called_once()\n        mock_session.refresh.assert_called_once()\n\n    def test_update_existing_license(self) -> None:\n        \"\"\"Test updating an existing license.\"\"\"\n        mock_session = MagicMock()\n        existing_license = License(id=1, license_data=\"old_data\")\n        mock_session.execute.return_value.scalars.return_value.first.return_value = (\n            existing_license\n        )\n\n        upsert_license(mock_session, \"updated_license_data\")\n\n        # Verify the existing license was updated\n        assert existing_license.license_data == \"updated_license_data\"\n        mock_session.add.assert_not_called()  # Should not add new\n        mock_session.commit.assert_called_once()\n        mock_session.refresh.assert_called_once_with(existing_license)\n\n\nclass TestDeleteLicense:\n    \"\"\"Tests for delete_license function.\"\"\"\n\n    def test_delete_existing_license(self) -> None:\n        \"\"\"Test deleting an existing license.\"\"\"\n        mock_session = MagicMock()\n        existing_license = License(id=1, license_data=\"test_data\")\n        mock_session.execute.return_value.scalars.return_value.first.return_value = (\n            existing_license\n        )\n\n        result = delete_license(mock_session)\n\n        assert result is True\n        mock_session.delete.assert_called_once_with(existing_license)\n        mock_session.commit.assert_called_once()\n\n    def test_delete_no_license(self) -> None:\n        \"\"\"Test deleting when no license exists.\"\"\"\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalars.return_value.first.return_value = None\n\n        result = delete_license(mock_session)\n\n        assert result is False\n        mock_session.delete.assert_not_called()\n        mock_session.commit.assert_not_called()\n\n\ndef _make_license_metadata(seats: int = 10) -> LicenseMetadata:\n    now = datetime.now(timezone.utc)\n    return LicenseMetadata(\n        tenant_id=\"public\",\n        seats=seats,\n        used_seats=0,\n        plan_type=PlanType.ANNUAL,\n        issued_at=now,\n        expires_at=now + timedelta(days=365),\n        status=ApplicationStatus.ACTIVE,\n        source=LicenseSource.MANUAL_UPLOAD,\n    )\n\n\nclass TestCheckSeatAvailabilitySelfHosted:\n    \"\"\"Seat checks for self-hosted (MULTI_TENANT=False).\"\"\"\n\n    @patch(\"ee.onyx.db.license.get_license_metadata\", return_value=None)\n    def test_no_license_means_unlimited(self, _mock_meta: MagicMock) -> None:\n        result = check_seat_availability(MagicMock(), seats_needed=1)\n        assert result.available is True\n\n    @patch(\"ee.onyx.db.license.get_used_seats\", return_value=5)\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_seats_available(self, mock_meta: MagicMock, _mock_used: MagicMock) -> None:\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(MagicMock(), seats_needed=1)\n        assert result.available is True\n\n    @patch(\"ee.onyx.db.license.get_used_seats\", return_value=10)\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_seats_full_blocks_creation(\n        self, mock_meta: MagicMock, _mock_used: MagicMock\n    ) -> None:\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(MagicMock(), seats_needed=1)\n        assert result.available is False\n        assert result.error_message is not None\n        assert \"10 of 10\" in result.error_message\n\n    @patch(\"ee.onyx.db.license.get_used_seats\", return_value=10)\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_exactly_at_capacity_allows_no_more(\n        self, mock_meta: MagicMock, _mock_used: MagicMock\n    ) -> None:\n        \"\"\"Filling to 100% is allowed; exceeding is not.\"\"\"\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(MagicMock(), seats_needed=1)\n        assert result.available is False\n\n    @patch(\"ee.onyx.db.license.get_used_seats\", return_value=9)\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_filling_to_capacity_is_allowed(\n        self, mock_meta: MagicMock, _mock_used: MagicMock\n    ) -> None:\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(MagicMock(), seats_needed=1)\n        assert result.available is True\n\n\nclass TestCheckSeatAvailabilityMultiTenant:\n    \"\"\"Seat checks for multi-tenant cloud (MULTI_TENANT=True).\n\n    Verifies that get_used_seats takes the MULTI_TENANT branch\n    and delegates to get_tenant_count.\n    \"\"\"\n\n    @patch(\"ee.onyx.db.license.MULTI_TENANT\", True)\n    @patch(\n        \"ee.onyx.server.tenants.user_mapping.get_tenant_count\",\n        return_value=5,\n    )\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_seats_available_multi_tenant(\n        self,\n        mock_meta: MagicMock,\n        mock_tenant_count: MagicMock,\n    ) -> None:\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(\n            MagicMock(), seats_needed=1, tenant_id=\"tenant-abc\"\n        )\n        assert result.available is True\n        mock_tenant_count.assert_called_once_with(\"tenant-abc\")\n\n    @patch(\"ee.onyx.db.license.MULTI_TENANT\", True)\n    @patch(\n        \"ee.onyx.server.tenants.user_mapping.get_tenant_count\",\n        return_value=10,\n    )\n    @patch(\"ee.onyx.db.license.get_license_metadata\")\n    def test_seats_full_multi_tenant(\n        self,\n        mock_meta: MagicMock,\n        mock_tenant_count: MagicMock,\n    ) -> None:\n        mock_meta.return_value = _make_license_metadata(seats=10)\n        result = check_seat_availability(\n            MagicMock(), seats_needed=1, tenant_id=\"tenant-abc\"\n        )\n        assert result.available is False\n        assert result.error_message is not None\n        mock_tenant_count.assert_called_once_with(\"tenant-abc\")\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/db/test_user_group_rename.py",
    "content": "\"\"\"Tests for user group rename DB operation.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.db.user_group import rename_user_group\nfrom onyx.db.models import UserGroup\n\n\nclass TestRenameUserGroup:\n    \"\"\"Tests for rename_user_group function.\"\"\"\n\n    @patch(\"ee.onyx.db.user_group.DISABLE_VECTOR_DB\", False)\n    @patch(\n        \"ee.onyx.db.user_group._mark_user_group__cc_pair_relationships_outdated__no_commit\"\n    )\n    def test_rename_succeeds_and_triggers_sync(\n        self, mock_mark_outdated: MagicMock\n    ) -> None:\n        mock_session = MagicMock()\n        mock_group = MagicMock(spec=UserGroup)\n        mock_group.name = \"Old Name\"\n        mock_group.is_up_to_date = True\n        mock_session.scalar.return_value = mock_group\n\n        result = rename_user_group(mock_session, user_group_id=1, new_name=\"New Name\")\n\n        assert result.name == \"New Name\"\n        assert result.is_up_to_date is False\n        mock_mark_outdated.assert_called_once()\n        mock_session.commit.assert_called_once()\n\n    def test_rename_group_not_found(self) -> None:\n        mock_session = MagicMock()\n        mock_session.scalar.return_value = None\n\n        with pytest.raises(ValueError, match=\"not found\"):\n            rename_user_group(mock_session, user_group_id=999, new_name=\"New Name\")\n\n        mock_session.commit.assert_not_called()\n\n    def test_rename_group_syncing_raises(self) -> None:\n        mock_session = MagicMock()\n        mock_group = MagicMock(spec=UserGroup)\n        mock_group.is_up_to_date = False\n        mock_session.scalar.return_value = mock_group\n\n        with pytest.raises(ValueError, match=\"currently syncing\"):\n            rename_user_group(mock_session, user_group_id=1, new_name=\"New Name\")\n\n        mock_session.commit.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/external_permissions/salesforce/test_postprocessing.py",
    "content": "from datetime import datetime\n\nfrom ee.onyx.external_permissions.salesforce.postprocessing import (\n    censor_salesforce_chunks,\n)\nfrom onyx.configs.app_configs import BLURB_SIZE\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.salesforce.utils import BASE_DATA_PATH\nfrom onyx.context.search.models import InferenceChunk\n\nSQLITE_DIR = BASE_DATA_PATH\n\n\ndef create_test_chunk(\n    doc_id: str,\n    chunk_id: int,\n    content: str,\n    source_links: dict[int, str] | None,\n) -> InferenceChunk:\n    return InferenceChunk(\n        document_id=doc_id,\n        chunk_id=chunk_id,\n        blurb=content[:BLURB_SIZE],\n        content=content,\n        source_links=source_links,\n        section_continuation=False,\n        source_type=DocumentSource.SALESFORCE,\n        semantic_identifier=\"test_chunk\",\n        title=\"Test Chunk\",\n        boost=1,\n        score=None,\n        hidden=False,\n        metadata={},\n        match_highlights=[],\n        updated_at=datetime.now(),\n        image_file_id=None,\n        doc_summary=\"\",\n        chunk_context=\"\",\n    )\n\n\ndef test_validate_salesforce_access_single_object() -> None:\n    \"\"\"Test filtering when chunk has a single Salesforce object reference\"\"\"\n\n    section = \"This is a test document about a Salesforce object.\"\n    test_content = section\n    test_chunk = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=1,\n        content=test_content,\n        source_links={0: \"https://salesforce.com/object1\"},\n    )\n\n    # Test when user has access\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\"object1\": True},\n    )\n    assert len(filtered_chunks) == 1\n    assert filtered_chunks[0].content == test_content\n\n    # Test when user doesn't have access\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\"object1\": False},\n    )\n    assert len(filtered_chunks) == 0\n\n\ndef test_validate_salesforce_access_multiple_objects() -> None:\n    \"\"\"Test filtering when chunk has multiple Salesforce object references\"\"\"\n    section1 = \"First part about object1. \"\n    section2 = \"Second part about object2. \"\n    section3 = \"Third part about object3.\"\n\n    test_content = section1 + section2 + section3\n    section1_end = len(section1)\n    section2_end = section1_end + len(section2)\n\n    test_chunk = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=1,\n        content=test_content,\n        source_links={\n            0: \"https://salesforce.com/object1\",\n            section1_end: \"https://salesforce.com/object2\",\n            section2_end: \"https://salesforce.com/object3\",\n        },\n    )\n\n    # Test when user has access to all objects\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\n            \"object1\": True,\n            \"object2\": True,\n            \"object3\": True,\n        },\n    )\n    assert len(filtered_chunks) == 1\n    assert filtered_chunks[0].content == test_content\n\n    # Test when user has access to some objects\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\n            \"object1\": True,\n            \"object2\": False,\n            \"object3\": True,\n        },\n    )\n    assert len(filtered_chunks) == 1\n    assert section1 in filtered_chunks[0].content\n    assert section2 not in filtered_chunks[0].content\n    assert section3 in filtered_chunks[0].content\n\n    # Test when user has no access\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\n            \"object1\": False,\n            \"object2\": False,\n            \"object3\": False,\n        },\n    )\n    assert len(filtered_chunks) == 0\n\n\ndef test_validate_salesforce_access_multiple_chunks() -> None:\n    \"\"\"Test filtering when there are multiple chunks with different access patterns\"\"\"\n    section1 = \"Content about object1\"\n    section2 = \"Content about object2\"\n\n    chunk1 = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=1,\n        content=section1,\n        source_links={0: \"https://salesforce.com/object1\"},\n    )\n    chunk2 = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=2,\n        content=section2,\n        source_links={0: \"https://salesforce.com/object2\"},\n    )\n\n    # Test mixed access\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[chunk1, chunk2],\n        user_email=\"test@example.com\",\n        access_map={\n            \"object1\": True,\n            \"object2\": False,\n        },\n    )\n    assert len(filtered_chunks) == 1\n    assert filtered_chunks[0].chunk_id == 1\n    assert section1 in filtered_chunks[0].content\n\n\ndef test_validate_salesforce_access_no_source_links() -> None:\n    \"\"\"Test handling of chunks with no source links\"\"\"\n    section = \"Content with no source links\"\n    test_chunk = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=1,\n        content=section,\n        source_links=None,\n    )\n\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={},\n    )\n    assert len(filtered_chunks) == 0\n\n\ndef test_validate_salesforce_access_blurb_update() -> None:\n    \"\"\"Test that blurbs are properly updated based on permitted content\"\"\"\n    section = \"First part about object1. \"\n    long_content = section * 20  # Make it longer than BLURB_SIZE\n    test_chunk = create_test_chunk(\n        doc_id=\"doc1\",\n        chunk_id=1,\n        content=long_content,\n        source_links={0: \"https://salesforce.com/object1\"},\n    )\n\n    filtered_chunks = censor_salesforce_chunks(\n        chunks=[test_chunk],\n        user_email=\"test@example.com\",\n        access_map={\"object1\": True},\n    )\n    assert len(filtered_chunks) == 1\n    assert len(filtered_chunks[0].blurb) <= BLURB_SIZE\n    assert filtered_chunks[0].blurb.startswith(section)\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/external_permissions/sharepoint/test_permission_utils.py",
    "content": "from collections.abc import Generator\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    _enumerate_ad_groups_paginated,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    _is_public_item,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    _iter_graph_collection,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    _normalize_email,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    AD_GROUP_ENUMERATION_THRESHOLD,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    get_external_access_from_sharepoint,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import (\n    get_sharepoint_external_groups,\n)\nfrom ee.onyx.external_permissions.sharepoint.permission_utils import GroupsResult\n\n\nMODULE = \"ee.onyx.external_permissions.sharepoint.permission_utils\"\nGRAPH_API_BASE = \"https://graph.microsoft.com/v1.0\"\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _fake_token() -> str:\n    return \"fake-token\"\n\n\ndef _make_graph_page(\n    items: list[dict[str, Any]],\n    next_link: str | None = None,\n) -> dict[str, Any]:\n    page: dict[str, Any] = {\"value\": items}\n    if next_link:\n        page[\"@odata.nextLink\"] = next_link\n    return page\n\n\n# ---------------------------------------------------------------------------\n# _normalize_email\n# ---------------------------------------------------------------------------\n\n\ndef test_normalize_email_strips_onmicrosoft() -> None:\n    assert _normalize_email(\"user@contoso.onmicrosoft.com\") == \"user@contoso.com\"\n\n\ndef test_normalize_email_noop_for_normal_domain() -> None:\n    assert _normalize_email(\"user@contoso.com\") == \"user@contoso.com\"\n\n\n# ---------------------------------------------------------------------------\n# _iter_graph_collection\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_iter_graph_collection_single_page(mock_get: MagicMock) -> None:\n    mock_get.return_value = _make_graph_page([{\"id\": \"1\"}, {\"id\": \"2\"}])\n\n    items = list(_iter_graph_collection(\"https://graph/items\", _fake_token))\n    assert items == [{\"id\": \"1\"}, {\"id\": \"2\"}]\n    mock_get.assert_called_once()\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_iter_graph_collection_multi_page(mock_get: MagicMock) -> None:\n    mock_get.side_effect = [\n        _make_graph_page([{\"id\": \"1\"}], next_link=\"https://graph/items?page=2\"),\n        _make_graph_page([{\"id\": \"2\"}]),\n    ]\n\n    items = list(_iter_graph_collection(\"https://graph/items\", _fake_token))\n    assert items == [{\"id\": \"1\"}, {\"id\": \"2\"}]\n    assert mock_get.call_count == 2\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_iter_graph_collection_empty(mock_get: MagicMock) -> None:\n    mock_get.return_value = _make_graph_page([])\n    assert list(_iter_graph_collection(\"https://graph/items\", _fake_token)) == []\n\n\n# ---------------------------------------------------------------------------\n# _enumerate_ad_groups_paginated\n# ---------------------------------------------------------------------------\n\n\ndef _mock_graph_get_for_enumeration(\n    groups: list[dict[str, Any]],\n    members_by_group: dict[str, list[dict[str, Any]]],\n) -> Generator[dict[str, Any], None, None]:\n    \"\"\"Return a side_effect function for _graph_api_get that serves\n    groups on the /groups URL and members on /groups/{id}/members URLs.\"\"\"\n\n    def side_effect(\n        url: str,\n        get_access_token: Any,  # noqa: ARG001\n        params: dict[str, str] | None = None,  # noqa: ARG001\n    ) -> dict[str, Any]:\n        if \"/members\" in url:\n            group_id = url.split(\"/groups/\")[1].split(\"/members\")[0]\n            return _make_graph_page(members_by_group.get(group_id, []))\n        return _make_graph_page(groups)\n\n    return side_effect  # type: ignore[return-value]\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_enumerate_ad_groups_yields_groups(mock_get: MagicMock) -> None:\n    groups = [\n        {\"id\": \"g1\", \"displayName\": \"Engineering\"},\n        {\"id\": \"g2\", \"displayName\": \"Marketing\"},\n    ]\n    members = {\n        \"g1\": [{\"userPrincipalName\": \"alice@contoso.com\"}],\n        \"g2\": [{\"mail\": \"bob@contoso.onmicrosoft.com\"}],\n    }\n    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, members)\n\n    results = list(\n        _enumerate_ad_groups_paginated(\n            _fake_token, already_resolved=set(), graph_api_base=GRAPH_API_BASE\n        )\n    )\n\n    assert len(results) == 2\n    eng = next(r for r in results if r.id == \"Engineering_g1\")\n    assert eng.user_emails == [\"alice@contoso.com\"]\n    mkt = next(r for r in results if r.id == \"Marketing_g2\")\n    assert mkt.user_emails == [\"bob@contoso.com\"]\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_enumerate_ad_groups_skips_already_resolved(mock_get: MagicMock) -> None:\n    groups = [{\"id\": \"g1\", \"displayName\": \"Engineering\"}]\n    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, {})\n\n    results = list(\n        _enumerate_ad_groups_paginated(\n            _fake_token,\n            already_resolved={\"Engineering_g1\"},\n            graph_api_base=GRAPH_API_BASE,\n        )\n    )\n    assert results == []\n\n\n@patch(f\"{MODULE}._graph_api_get\")\ndef test_enumerate_ad_groups_circuit_breaker(mock_get: MagicMock) -> None:\n    \"\"\"Enumeration stops after AD_GROUP_ENUMERATION_THRESHOLD groups.\"\"\"\n    over_limit = AD_GROUP_ENUMERATION_THRESHOLD + 5\n    groups = [{\"id\": f\"g{i}\", \"displayName\": f\"Group{i}\"} for i in range(over_limit)]\n    mock_get.side_effect = _mock_graph_get_for_enumeration(groups, {})\n\n    results = list(\n        _enumerate_ad_groups_paginated(\n            _fake_token, already_resolved=set(), graph_api_base=GRAPH_API_BASE\n        )\n    )\n    assert len(results) <= AD_GROUP_ENUMERATION_THRESHOLD\n\n\n# ---------------------------------------------------------------------------\n# get_sharepoint_external_groups\n# ---------------------------------------------------------------------------\n\n\ndef _stub_role_assignment_resolution(\n    groups_to_emails: dict[str, set[str]],\n) -> tuple[MagicMock, MagicMock]:\n    \"\"\"Return (mock_sleep_and_retry, mock_recursive) pre-configured to\n    simulate role-assignment group resolution.\"\"\"\n    mock_sleep = MagicMock()\n    mock_recursive = MagicMock(\n        return_value=GroupsResult(\n            groups_to_emails=groups_to_emails,\n            found_public_group=False,\n        )\n    )\n    return mock_sleep, mock_recursive\n\n\n@patch(f\"{MODULE}._get_groups_and_members_recursively\")\n@patch(f\"{MODULE}.sleep_and_retry\")\ndef test_default_skips_ad_enumeration(\n    mock_sleep: MagicMock,  # noqa: ARG001\n    mock_recursive: MagicMock,\n) -> None:\n    mock_recursive.return_value = GroupsResult(\n        groups_to_emails={\"SiteGroup_abc\": {\"alice@contoso.com\"}},\n        found_public_group=False,\n    )\n\n    results = get_sharepoint_external_groups(\n        client_context=MagicMock(),\n        graph_client=MagicMock(),\n        graph_api_base=GRAPH_API_BASE,\n    )\n\n    assert len(results) == 1\n    assert results[0].id == \"SiteGroup_abc\"\n    assert results[0].user_emails == [\"alice@contoso.com\"]\n\n\n@patch(f\"{MODULE}._enumerate_ad_groups_paginated\")\n@patch(f\"{MODULE}._get_groups_and_members_recursively\")\n@patch(f\"{MODULE}.sleep_and_retry\")\ndef test_enumerate_all_includes_ad_groups(\n    mock_sleep: MagicMock,  # noqa: ARG001\n    mock_recursive: MagicMock,\n    mock_enum: MagicMock,\n) -> None:\n    from ee.onyx.db.external_perm import ExternalUserGroup\n\n    mock_recursive.return_value = GroupsResult(\n        groups_to_emails={\"SiteGroup_abc\": {\"alice@contoso.com\"}},\n        found_public_group=False,\n    )\n    mock_enum.return_value = [\n        ExternalUserGroup(id=\"ADGroup_xyz\", user_emails=[\"bob@contoso.com\"]),\n    ]\n\n    results = get_sharepoint_external_groups(\n        client_context=MagicMock(),\n        graph_client=MagicMock(),\n        get_access_token=_fake_token,\n        enumerate_all_ad_groups=True,\n        graph_api_base=GRAPH_API_BASE,\n    )\n\n    assert len(results) == 2\n    ids = {r.id for r in results}\n    assert ids == {\"SiteGroup_abc\", \"ADGroup_xyz\"}\n    mock_enum.assert_called_once()\n\n\n@patch(f\"{MODULE}._enumerate_ad_groups_paginated\")\n@patch(f\"{MODULE}._get_groups_and_members_recursively\")\n@patch(f\"{MODULE}.sleep_and_retry\")\ndef test_enumerate_all_without_token_skips(\n    mock_sleep: MagicMock,  # noqa: ARG001\n    mock_recursive: MagicMock,\n    mock_enum: MagicMock,\n) -> None:\n    \"\"\"Even if enumerate_all_ad_groups=True, no token means skip.\"\"\"\n    mock_recursive.return_value = GroupsResult(\n        groups_to_emails={},\n        found_public_group=False,\n    )\n\n    results = get_sharepoint_external_groups(\n        client_context=MagicMock(),\n        graph_client=MagicMock(),\n        get_access_token=None,\n        enumerate_all_ad_groups=True,\n        graph_api_base=GRAPH_API_BASE,\n    )\n\n    assert results == []\n    mock_enum.assert_not_called()\n\n\n# ---------------------------------------------------------------------------\n# get_external_access_from_sharepoint – site page URL handling\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"site_base_url, web_url, expected_relative_url\",\n    [\n        (\n            \"https://tenant.sharepoint.com/sites/Evan%27sSite\",\n            \"https://tenant.sharepoint.com/sites/Evan%27sSite/SitePages/Home.aspx\",\n            \"/sites/Evan%27sSite/SitePages/Home.aspx\",\n        ),\n        (\n            \"https://tenant.sharepoint.com/sites/NormalSite\",\n            \"https://tenant.sharepoint.com/sites/NormalSite/SitePages/Page.aspx\",\n            \"/sites/NormalSite/SitePages/Page.aspx\",\n        ),\n        (\n            \"https://tenant.sharepoint.com/sites/Site%20With%20Spaces\",\n            \"https://tenant.sharepoint.com/sites/Site%20With%20Spaces/SitePages/Doc.aspx\",\n            \"/sites/Site%20With%20Spaces/SitePages/Doc.aspx\",\n        ),\n    ],\n    ids=[\"apostrophe-encoded\", \"no-special-chars\", \"space-encoded\"],\n)\n@patch(f\"{MODULE}._get_groups_and_members_recursively\")\n@patch(f\"{MODULE}.sleep_and_retry\")\ndef test_site_page_url_not_duplicated(\n    mock_sleep: MagicMock,  # noqa: ARG001\n    mock_recursive: MagicMock,\n    site_base_url: str,\n    web_url: str,\n    expected_relative_url: str,\n) -> None:\n    \"\"\"Regression: the server-relative URL passed to\n    get_file_by_server_relative_url must preserve percent-encoding so the\n    Office365 library's SPResPath.create_relative() recognises the site prefix\n    and doesn't duplicate it.\"\"\"\n    mock_recursive.return_value = GroupsResult(\n        groups_to_emails={},\n        found_public_group=False,\n    )\n\n    ctx = MagicMock()\n    ctx.base_url = site_base_url\n\n    site_page = {\"webUrl\": web_url}\n\n    get_external_access_from_sharepoint(\n        client_context=ctx,\n        graph_client=MagicMock(),\n        drive_name=None,\n        drive_item=None,\n        site_page=site_page,\n    )\n\n    ctx.web.get_file_by_server_relative_url.assert_called_once_with(\n        expected_relative_url\n    )\n\n\n# ---------------------------------------------------------------------------\n# _is_public_item – sharing link visibility\n# ---------------------------------------------------------------------------\n\n\ndef _make_permission(scope: str | None) -> MagicMock:\n    perm = MagicMock()\n    if scope is None:\n        perm.link = None\n    else:\n        perm.link = MagicMock()\n        perm.link.scope = scope\n    return perm\n\n\ndef _make_drive_item_with_permissions(\n    permissions: list[MagicMock],\n) -> MagicMock:\n    drive_item = MagicMock()\n    drive_item.id = \"item-123\"\n    drive_item.permissions.get_all.return_value = permissions\n    return drive_item\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_anonymous_link_when_enabled(\n    _mock_sleep: MagicMock,\n) -> None:\n    drive_item = _make_drive_item_with_permissions([_make_permission(\"anonymous\")])\n    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is True\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_org_link_when_enabled(\n    _mock_sleep: MagicMock,\n) -> None:\n    drive_item = _make_drive_item_with_permissions([_make_permission(\"organization\")])\n    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is True\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_anonymous_link_when_disabled(\n    _mock_sleep: MagicMock,\n) -> None:\n    \"\"\"When the flag is off, anonymous links do NOT make the item public.\"\"\"\n    drive_item = _make_drive_item_with_permissions([_make_permission(\"anonymous\")])\n    assert _is_public_item(drive_item, treat_sharing_link_as_public=False) is False\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_org_link_when_disabled(\n    _mock_sleep: MagicMock,\n) -> None:\n    \"\"\"When the flag is off, org links do NOT make the item public.\"\"\"\n    drive_item = _make_drive_item_with_permissions([_make_permission(\"organization\")])\n    assert _is_public_item(drive_item, treat_sharing_link_as_public=False) is False\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_no_sharing_links(\n    _mock_sleep: MagicMock,\n) -> None:\n    \"\"\"User-level permissions only — not public even when flag is on.\"\"\"\n    drive_item = _make_drive_item_with_permissions([_make_permission(None)])\n    assert _is_public_item(drive_item, treat_sharing_link_as_public=True) is False\n\n\n@patch(f\"{MODULE}.sleep_and_retry\", side_effect=lambda query, _label: query)\ndef test_is_public_item_default_is_false(\n    _mock_sleep: MagicMock,\n) -> None:\n    \"\"\"Default value of the flag is False, so sharing links are ignored.\"\"\"\n    drive_item = _make_drive_item_with_permissions([_make_permission(\"anonymous\")])\n    assert _is_public_item(drive_item) is False\n\n\ndef test_is_public_item_skips_api_call_when_disabled() -> None:\n    \"\"\"When the flag is off, the permissions API is never called.\"\"\"\n    drive_item = MagicMock()\n    _is_public_item(drive_item, treat_sharing_link_as_public=False)\n    drive_item.permissions.get_all.assert_not_called()\n\n\n# ---------------------------------------------------------------------------\n# get_external_access_from_sharepoint – sharing link integration\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{MODULE}._is_public_item\", return_value=True)\n@patch(f\"{MODULE}.sleep_and_retry\")\ndef test_drive_item_public_when_sharing_link_enabled(\n    _mock_sleep: MagicMock,\n    _mock_is_public: MagicMock,\n) -> None:\n    \"\"\"With treat_sharing_link_as_public=True, a public item returns is_public=True\n    and skips role-assignment resolution entirely.\"\"\"\n    drive_item = MagicMock()\n\n    result = get_external_access_from_sharepoint(\n        client_context=MagicMock(),\n        graph_client=MagicMock(),\n        drive_name=\"Documents\",\n        drive_item=drive_item,\n        site_page=None,\n        treat_sharing_link_as_public=True,\n    )\n\n    assert result.is_public is True\n    assert result.external_user_emails == set()\n    assert result.external_user_group_ids == set()\n\n\n@patch(f\"{MODULE}._get_groups_and_members_recursively\")\n@patch(f\"{MODULE}.sleep_and_retry\")\n@patch(f\"{MODULE}._is_public_item\", return_value=False)\ndef test_drive_item_falls_through_when_sharing_link_disabled(\n    _mock_is_public: MagicMock,\n    mock_sleep: MagicMock,  # noqa: ARG001\n    mock_recursive: MagicMock,\n) -> None:\n    \"\"\"With treat_sharing_link_as_public=False, the function falls through to\n    role-assignment-based permission resolution.\"\"\"\n    mock_recursive.return_value = GroupsResult(\n        groups_to_emails={\"SiteMembers_abc\": {\"alice@contoso.com\"}},\n        found_public_group=False,\n    )\n\n    result = get_external_access_from_sharepoint(\n        client_context=MagicMock(),\n        graph_client=MagicMock(),\n        drive_name=\"Documents\",\n        drive_item=MagicMock(),\n        site_page=None,\n        treat_sharing_link_as_public=False,\n    )\n\n    assert result.is_public is False\n    assert len(result.external_user_group_ids) > 0\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/ee/onyx/hooks/test_executor.py",
    "content": "\"\"\"Unit tests for the hook executor.\"\"\"\n\nimport json\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\nfrom pydantic import BaseModel\n\nfrom ee.onyx.hooks.executor import _execute_hook_impl as execute_hook\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.hooks.points.query_processing import QueryProcessingResponse\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n_PAYLOAD: dict[str, Any] = {\"query\": \"test\", \"user_email\": \"u@example.com\"}\n# A valid QueryProcessingResponse payload — used by success-path tests.\n_RESPONSE_PAYLOAD: dict[str, Any] = {\"query\": \"better test\"}\n\n\ndef _make_hook(\n    *,\n    is_active: bool = True,\n    endpoint_url: str | None = \"https://hook.example.com/query\",\n    api_key: MagicMock | None = None,\n    timeout_seconds: float = 5.0,\n    fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,\n    hook_id: int = 1,\n    is_reachable: bool | None = None,\n    hook_point: HookPoint = HookPoint.QUERY_PROCESSING,\n) -> MagicMock:\n    hook = MagicMock()\n    hook.is_active = is_active\n    hook.endpoint_url = endpoint_url\n    hook.api_key = api_key\n    hook.timeout_seconds = timeout_seconds\n    hook.id = hook_id\n    hook.fail_strategy = fail_strategy\n    hook.is_reachable = is_reachable\n    hook.hook_point = hook_point\n    return hook\n\n\ndef _make_api_key(value: str) -> MagicMock:\n    api_key = MagicMock()\n    api_key.get_value.return_value = value\n    return api_key\n\n\ndef _make_response(\n    *,\n    status_code: int = 200,\n    json_return: Any = _RESPONSE_PAYLOAD,\n    json_side_effect: Exception | None = None,\n) -> MagicMock:\n    \"\"\"Build a response mock with controllable json() behaviour.\"\"\"\n    response = MagicMock()\n    response.status_code = status_code\n    if json_side_effect is not None:\n        response.json.side_effect = json_side_effect\n    else:\n        response.json.return_value = json_return\n    return response\n\n\ndef _setup_client(\n    mock_client_cls: MagicMock,\n    *,\n    response: MagicMock | None = None,\n    side_effect: Exception | None = None,\n) -> MagicMock:\n    \"\"\"Wire up the httpx.Client mock and return the inner client.\n\n    If side_effect is an httpx.HTTPStatusError, it is raised from\n    raise_for_status() (matching real httpx behaviour) and post() returns a\n    response mock with the matching status_code set.  All other exceptions are\n    raised directly from post().\n    \"\"\"\n    mock_client = MagicMock()\n\n    if isinstance(side_effect, httpx.HTTPStatusError):\n        error_response = MagicMock()\n        error_response.status_code = side_effect.response.status_code\n        error_response.raise_for_status.side_effect = side_effect\n        mock_client.post = MagicMock(return_value=error_response)\n    else:\n        mock_client.post = MagicMock(\n            side_effect=side_effect, return_value=response if not side_effect else None\n        )\n\n    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)\n    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)\n    return mock_client\n\n\n# ---------------------------------------------------------------------------\n# Fixtures\n# ---------------------------------------------------------------------------\n\n\n@pytest.fixture()\ndef db_session() -> MagicMock:\n    return MagicMock()\n\n\n# ---------------------------------------------------------------------------\n# Early-exit guards (no HTTP call, no DB writes)\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"multi_tenant,hook\",\n    [\n        # MULTI_TENANT=True exits before the DB lookup — hook is irrelevant.\n        pytest.param(True, None, id=\"multi_tenant\"),\n        pytest.param(False, None, id=\"hook_not_found\"),\n        pytest.param(False, _make_hook(is_active=False), id=\"hook_inactive\"),\n        pytest.param(False, _make_hook(endpoint_url=None), id=\"no_endpoint_url\"),\n    ],\n)\ndef test_early_exit_returns_skipped_with_no_db_writes(\n    db_session: MagicMock,\n    multi_tenant: bool,\n    hook: MagicMock | None,\n) -> None:\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", multi_tenant),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n    ):\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, HookSkipped)\n    mock_update.assert_not_called()\n    mock_log.assert_not_called()\n\n\n# ---------------------------------------------------------------------------\n# Successful HTTP call\n# ---------------------------------------------------------------------------\n\n\ndef test_success_returns_validated_model_and_sets_reachable(\n    db_session: MagicMock,\n) -> None:\n    hook = _make_hook()\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(mock_client_cls, response=_make_response())\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, QueryProcessingResponse)\n    assert result.query == _RESPONSE_PAYLOAD[\"query\"]\n    _, update_kwargs = mock_update.call_args\n    assert update_kwargs[\"is_reachable\"] is True\n    mock_log.assert_not_called()\n\n\ndef test_success_skips_reachable_write_when_already_true(db_session: MagicMock) -> None:\n    \"\"\"Deduplication guard: a hook already at is_reachable=True that succeeds\n    must not trigger a DB write.\"\"\"\n    hook = _make_hook(is_reachable=True)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"),\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(mock_client_cls, response=_make_response())\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, QueryProcessingResponse)\n    assert result.query == _RESPONSE_PAYLOAD[\"query\"]\n    mock_update.assert_not_called()\n\n\ndef test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:\n    \"\"\"response.json() returning a non-dict (e.g. list) must be treated as failure.\n    The server responded, so is_reachable is not updated.\"\"\"\n    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(\n            mock_client_cls,\n            response=_make_response(json_return=[\"unexpected\", \"list\"]),\n        )\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, HookSoftFailed)\n    _, log_kwargs = mock_log.call_args\n    assert log_kwargs[\"is_success\"] is False\n    assert \"non-dict\" in (log_kwargs[\"error_message\"] or \"\")\n    mock_update.assert_not_called()\n\n\ndef test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:\n    \"\"\"response.json() raising must be treated as failure with SOFT strategy.\n    The server responded, so is_reachable is not updated.\"\"\"\n    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(\n            mock_client_cls,\n            response=_make_response(\n                json_side_effect=json.JSONDecodeError(\"not JSON\", \"\", 0)\n            ),\n        )\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, HookSoftFailed)\n    _, log_kwargs = mock_log.call_args\n    assert log_kwargs[\"is_success\"] is False\n    assert \"non-JSON\" in (log_kwargs[\"error_message\"] or \"\")\n    mock_update.assert_not_called()\n\n\n# ---------------------------------------------------------------------------\n# HTTP failure paths\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"exception,fail_strategy,expected_type,expected_is_reachable\",\n    [\n        # NetworkError → is_reachable=False\n        pytest.param(\n            httpx.ConnectError(\"refused\"),\n            HookFailStrategy.SOFT,\n            HookSoftFailed,\n            False,\n            id=\"connect_error_soft\",\n        ),\n        pytest.param(\n            httpx.ConnectError(\"refused\"),\n            HookFailStrategy.HARD,\n            OnyxError,\n            False,\n            id=\"connect_error_hard\",\n        ),\n        # 401/403 → is_reachable=False (api_key revoked)\n        pytest.param(\n            httpx.HTTPStatusError(\n                \"401\",\n                request=MagicMock(),\n                response=MagicMock(status_code=401, text=\"Unauthorized\"),\n            ),\n            HookFailStrategy.SOFT,\n            HookSoftFailed,\n            False,\n            id=\"auth_401_soft\",\n        ),\n        pytest.param(\n            httpx.HTTPStatusError(\n                \"403\",\n                request=MagicMock(),\n                response=MagicMock(status_code=403, text=\"Forbidden\"),\n            ),\n            HookFailStrategy.HARD,\n            OnyxError,\n            False,\n            id=\"auth_403_hard\",\n        ),\n        # TimeoutException → no is_reachable write (None)\n        pytest.param(\n            httpx.TimeoutException(\"timeout\"),\n            HookFailStrategy.SOFT,\n            HookSoftFailed,\n            None,\n            id=\"timeout_soft\",\n        ),\n        pytest.param(\n            httpx.TimeoutException(\"timeout\"),\n            HookFailStrategy.HARD,\n            OnyxError,\n            None,\n            id=\"timeout_hard\",\n        ),\n        # Other HTTP errors → no is_reachable write (None)\n        pytest.param(\n            httpx.HTTPStatusError(\n                \"500\",\n                request=MagicMock(),\n                response=MagicMock(status_code=500, text=\"error\"),\n            ),\n            HookFailStrategy.SOFT,\n            HookSoftFailed,\n            None,\n            id=\"http_status_error_soft\",\n        ),\n        pytest.param(\n            httpx.HTTPStatusError(\n                \"500\",\n                request=MagicMock(),\n                response=MagicMock(status_code=500, text=\"error\"),\n            ),\n            HookFailStrategy.HARD,\n            OnyxError,\n            None,\n            id=\"http_status_error_hard\",\n        ),\n    ],\n)\ndef test_http_failure_paths(\n    db_session: MagicMock,\n    exception: Exception,\n    fail_strategy: HookFailStrategy,\n    expected_type: type,\n    expected_is_reachable: bool | None,\n) -> None:\n    hook = _make_hook(fail_strategy=fail_strategy)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"),\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(mock_client_cls, side_effect=exception)\n\n        if expected_type is OnyxError:\n            with pytest.raises(OnyxError) as exc_info:\n                execute_hook(\n                    db_session=db_session,\n                    hook_point=HookPoint.QUERY_PROCESSING,\n                    payload=_PAYLOAD,\n                    response_type=QueryProcessingResponse,\n                )\n            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED\n        else:\n            result = execute_hook(\n                db_session=db_session,\n                hook_point=HookPoint.QUERY_PROCESSING,\n                payload=_PAYLOAD,\n                response_type=QueryProcessingResponse,\n            )\n            assert isinstance(result, expected_type)\n\n    if expected_is_reachable is None:\n        mock_update.assert_not_called()\n    else:\n        mock_update.assert_called_once()\n        _, kwargs = mock_update.call_args\n        assert kwargs[\"is_reachable\"] is expected_is_reachable\n\n\n# ---------------------------------------------------------------------------\n# Authorization header\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"api_key_value,expect_auth_header\",\n    [\n        pytest.param(\"secret-token\", True, id=\"api_key_present\"),\n        pytest.param(None, False, id=\"api_key_absent\"),\n    ],\n)\ndef test_authorization_header(\n    db_session: MagicMock,\n    api_key_value: str | None,\n    expect_auth_header: bool,\n) -> None:\n    api_key = _make_api_key(api_key_value) if api_key_value else None\n    hook = _make_hook(api_key=api_key)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\"),\n        patch(\"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"),\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        mock_client = _setup_client(mock_client_cls, response=_make_response())\n        execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    _, call_kwargs = mock_client.post.call_args\n    if expect_auth_header:\n        assert call_kwargs[\"headers\"][\"Authorization\"] == f\"Bearer {api_key_value}\"\n    else:\n        assert \"Authorization\" not in call_kwargs[\"headers\"]\n\n\n# ---------------------------------------------------------------------------\n# Persist session failure\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"http_exception,expect_onyx_error\",\n    [\n        pytest.param(None, False, id=\"success_path\"),\n        pytest.param(httpx.ConnectError(\"refused\"), True, id=\"hard_fail_path\"),\n    ],\n)\ndef test_persist_session_failure_is_swallowed(\n    db_session: MagicMock,\n    http_exception: Exception | None,\n    expect_onyx_error: bool,\n) -> None:\n    \"\"\"DB session failure in _persist_result must not mask the real return value or OnyxError.\"\"\"\n    hook = _make_hook(fail_strategy=HookFailStrategy.HARD)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\n            \"ee.onyx.hooks.executor.get_session_with_current_tenant\",\n            side_effect=RuntimeError(\"DB unavailable\"),\n        ),\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(\n            mock_client_cls,\n            response=_make_response() if not http_exception else None,\n            side_effect=http_exception,\n        )\n\n        if expect_onyx_error:\n            with pytest.raises(OnyxError) as exc_info:\n                execute_hook(\n                    db_session=db_session,\n                    hook_point=HookPoint.QUERY_PROCESSING,\n                    payload=_PAYLOAD,\n                    response_type=QueryProcessingResponse,\n                )\n            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED\n        else:\n            result = execute_hook(\n                db_session=db_session,\n                hook_point=HookPoint.QUERY_PROCESSING,\n                payload=_PAYLOAD,\n                response_type=QueryProcessingResponse,\n            )\n            assert isinstance(result, QueryProcessingResponse)\n            assert result.query == _RESPONSE_PAYLOAD[\"query\"]\n\n\n# ---------------------------------------------------------------------------\n# Response model validation\n# ---------------------------------------------------------------------------\n\n\nclass _StrictResponse(BaseModel):\n    \"\"\"Strict model used to reliably trigger a ValidationError in tests.\"\"\"\n\n    required_field: str  # no default → missing key raises ValidationError\n\n\n@pytest.mark.parametrize(\n    \"fail_strategy,expected_type\",\n    [\n        pytest.param(\n            HookFailStrategy.SOFT, HookSoftFailed, id=\"validation_failure_soft\"\n        ),\n        pytest.param(HookFailStrategy.HARD, OnyxError, id=\"validation_failure_hard\"),\n    ],\n)\ndef test_response_validation_failure_respects_fail_strategy(\n    db_session: MagicMock,\n    fail_strategy: HookFailStrategy,\n    expected_type: type,\n) -> None:\n    \"\"\"A response that fails response_model validation is treated like any other\n    hook failure: logged, is_reachable left unchanged, fail_strategy respected.\"\"\"\n    hook = _make_hook(fail_strategy=fail_strategy)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\"ee.onyx.hooks.executor.update_hook__no_commit\") as mock_update,\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        # Response payload is missing required_field → ValidationError\n        _setup_client(mock_client_cls, response=_make_response(json_return={}))\n\n        if expected_type is OnyxError:\n            with pytest.raises(OnyxError) as exc_info:\n                execute_hook(\n                    db_session=db_session,\n                    hook_point=HookPoint.QUERY_PROCESSING,\n                    payload=_PAYLOAD,\n                    response_type=_StrictResponse,\n                )\n            assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED\n        else:\n            result = execute_hook(\n                db_session=db_session,\n                hook_point=HookPoint.QUERY_PROCESSING,\n                payload=_PAYLOAD,\n                response_type=_StrictResponse,\n            )\n            assert isinstance(result, HookSoftFailed)\n\n    # is_reachable must not be updated — server responded correctly\n    mock_update.assert_not_called()\n    # failure must be logged\n    mock_log.assert_called_once()\n    _, log_kwargs = mock_log.call_args\n    assert log_kwargs[\"is_success\"] is False\n    assert \"validation\" in (log_kwargs[\"error_message\"] or \"\").lower()\n\n\n# ---------------------------------------------------------------------------\n# Outer soft-fail guard in execute_hook\n# ---------------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\n    \"fail_strategy,expected_type\",\n    [\n        pytest.param(HookFailStrategy.SOFT, HookSoftFailed, id=\"unexpected_exc_soft\"),\n        pytest.param(HookFailStrategy.HARD, ValueError, id=\"unexpected_exc_hard\"),\n    ],\n)\ndef test_unexpected_exception_in_inner_respects_fail_strategy(\n    db_session: MagicMock,\n    fail_strategy: HookFailStrategy,\n    expected_type: type,\n) -> None:\n    \"\"\"An unexpected exception raised by _execute_hook_inner (not an OnyxError from\n    HARD fail — e.g. a bug or an assertion error) must be swallowed and return\n    HookSoftFailed for SOFT strategy, or re-raised for HARD strategy.\"\"\"\n    hook = _make_hook(fail_strategy=fail_strategy)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\n            \"ee.onyx.hooks.executor._execute_hook_inner\",\n            side_effect=ValueError(\"unexpected bug\"),\n        ),\n    ):\n        if expected_type is HookSoftFailed:\n            result = execute_hook(\n                db_session=db_session,\n                hook_point=HookPoint.QUERY_PROCESSING,\n                payload=_PAYLOAD,\n                response_type=QueryProcessingResponse,\n            )\n            assert isinstance(result, HookSoftFailed)\n        else:\n            with pytest.raises(ValueError, match=\"unexpected bug\"):\n                execute_hook(\n                    db_session=db_session,\n                    hook_point=HookPoint.QUERY_PROCESSING,\n                    payload=_PAYLOAD,\n                    response_type=QueryProcessingResponse,\n                )\n\n\ndef test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> None:\n    \"\"\"is_reachable update failing (e.g. concurrent hook deletion) must not\n    prevent the execution log from being written.\n\n    Simulates the production failure path: update_hook__no_commit raises\n    OnyxError(NOT_FOUND) as it would if the hook was concurrently deleted\n    between the initial lookup and the reachable update.\n    \"\"\"\n    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)\n\n    with (\n        patch(\"ee.onyx.hooks.executor.MULTI_TENANT\", False),\n        patch(\n            \"ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point\",\n            return_value=hook,\n        ),\n        patch(\"ee.onyx.hooks.executor.get_session_with_current_tenant\"),\n        patch(\n            \"ee.onyx.hooks.executor.update_hook__no_commit\",\n            side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, \"hook deleted\"),\n        ),\n        patch(\n            \"ee.onyx.hooks.executor.create_hook_execution_log__no_commit\"\n        ) as mock_log,\n        patch(\"httpx.Client\") as mock_client_cls,\n    ):\n        _setup_client(mock_client_cls, side_effect=httpx.ConnectError(\"refused\"))\n        result = execute_hook(\n            db_session=db_session,\n            hook_point=HookPoint.QUERY_PROCESSING,\n            payload=_PAYLOAD,\n            response_type=QueryProcessingResponse,\n        )\n\n    assert isinstance(result, HookSoftFailed)\n    mock_log.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/billing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/billing/conftest.py",
    "content": "\"\"\"Shared fixtures and utilities for billing tests.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom ee.onyx.server.license.models import LicensePayload\nfrom ee.onyx.server.license.models import PlanType\n\n\n@pytest.fixture\ndef mock_license_payload() -> LicensePayload:\n    \"\"\"Create a valid LicensePayload for testing.\"\"\"\n    return make_license_payload()\n\n\n@pytest.fixture\ndef mock_expired_license_payload() -> LicensePayload:\n    \"\"\"Create an expired LicensePayload for testing.\"\"\"\n    return make_license_payload(expired=True)\n\n\ndef make_license_payload(\n    tenant_id: str = \"tenant_123\",\n    seats: int = 10,\n    expired: bool = False,\n) -> LicensePayload:\n    \"\"\"Create a LicensePayload for testing.\n\n    Args:\n        tenant_id: The tenant ID\n        seats: Number of seats\n        expired: If True, creates an expired license\n    \"\"\"\n    now = datetime.now(timezone.utc)\n    expires_at = (\n        datetime(2020, 1, 1, tzinfo=timezone.utc)\n        if expired\n        else datetime(2030, 1, 1, tzinfo=timezone.utc)\n    )\n\n    return LicensePayload(\n        version=\"1.0\",\n        tenant_id=tenant_id,\n        issued_at=now,\n        expires_at=expires_at,\n        seats=seats,\n        plan_type=PlanType.MONTHLY,\n    )\n\n\ndef make_mock_response(json_data: dict) -> MagicMock:\n    \"\"\"Create a mock httpx response.\n\n    Args:\n        json_data: The JSON data to return from response.json()\n    \"\"\"\n    mock_response = MagicMock()\n    mock_response.json.return_value = json_data\n    mock_response.raise_for_status = MagicMock()\n    return mock_response\n\n\ndef make_mock_http_client(\n    method: str = \"post\",\n    response: MagicMock | None = None,\n    side_effect: Exception | None = None,\n) -> MagicMock:\n    \"\"\"Create a mock httpx.AsyncClient context manager.\n\n    Args:\n        method: HTTP method to mock (\"get\" or \"post\")\n        response: Mock response to return\n        side_effect: Exception to raise instead of returning response\n    \"\"\"\n    mock_client = MagicMock()\n    mock_method = AsyncMock(return_value=response, side_effect=side_effect)\n    setattr(mock_client.return_value.__aenter__.return_value, method, mock_method)\n    return mock_client\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/billing/test_billing_api.py",
    "content": "\"\"\"Tests for the unified billing API endpoints.\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.server.billing.models import BillingInformationResponse\nfrom ee.onyx.server.billing.models import CreateCheckoutSessionResponse\nfrom ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse\nfrom ee.onyx.server.billing.models import SeatUpdateResponse\nfrom ee.onyx.server.billing.models import SubscriptionStatusResponse\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n\nclass TestCreateCheckoutSession:\n    \"\"\"Tests for create_checkout_session endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.create_checkout_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_creates_checkout_session_cloud(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should create checkout session for cloud deployment.\"\"\"\n        from ee.onyx.server.billing.api import create_checkout_session\n        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = \"tenant_123\"\n        mock_service.return_value = CreateCheckoutSessionResponse(\n            stripe_checkout_url=\"https://checkout.stripe.com/session\"\n        )\n\n        request = CreateCheckoutSessionRequest(billing_period=\"monthly\")\n        result = await create_checkout_session(\n            request=request, _=MagicMock(), db_session=MagicMock()\n        )\n\n        assert result.stripe_checkout_url == \"https://checkout.stripe.com/session\"\n        mock_service.assert_called_once()\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.create_checkout_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_creates_checkout_session_self_hosted(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should create checkout session for self-hosted with license.\"\"\"\n        from ee.onyx.server.billing.api import create_checkout_session\n        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest\n\n        mock_get_license.return_value = \"license_data_blob\"\n        mock_get_tenant.return_value = None\n        mock_service.return_value = CreateCheckoutSessionResponse(\n            stripe_checkout_url=\"https://checkout.stripe.com/session\"\n        )\n\n        request = CreateCheckoutSessionRequest(\n            billing_period=\"annual\", email=\"test@example.com\"\n        )\n        result = await create_checkout_session(\n            request=request, _=MagicMock(), db_session=MagicMock()\n        )\n\n        assert result.stripe_checkout_url == \"https://checkout.stripe.com/session\"\n        call_kwargs = mock_service.call_args[1]\n        assert call_kwargs[\"billing_period\"] == \"annual\"\n        assert call_kwargs[\"email\"] == \"test@example.com\"\n        assert call_kwargs[\"license_data\"] == \"license_data_blob\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.create_checkout_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_raises_on_service_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should propagate OnyxError when service fails.\"\"\"\n        from ee.onyx.server.billing.api import create_checkout_session\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = \"tenant_123\"\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Stripe error\",\n            status_code_override=502,\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            await create_checkout_session(\n                request=None, _=MagicMock(), db_session=MagicMock()\n            )\n\n        assert exc_info.value.status_code == 502\n        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY\n        assert exc_info.value.detail == \"Stripe error\"\n\n\nclass TestCreateCustomerPortalSession:\n    \"\"\"Tests for create_customer_portal_session endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api.create_portal_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_requires_license_for_self_hosted(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Should reject self-hosted without license.\"\"\"\n        from ee.onyx.server.billing.api import create_customer_portal_session\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            await create_customer_portal_session(\n                request=None, _=MagicMock(), db_session=MagicMock()\n            )\n\n        assert exc_info.value.status_code == 400\n        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR\n        assert exc_info.value.detail == \"No license found\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.create_portal_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_creates_portal_session(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should create portal session with valid license.\"\"\"\n        from ee.onyx.server.billing.api import create_customer_portal_session\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_service.return_value = CreateCustomerPortalSessionResponse(\n            stripe_customer_portal_url=\"https://billing.stripe.com/portal\"\n        )\n\n        result = await create_customer_portal_session(\n            request=None, _=MagicMock(), db_session=MagicMock()\n        )\n\n        assert result.stripe_customer_portal_url == \"https://billing.stripe.com/portal\"\n\n\nclass TestGetBillingInformation:\n    \"\"\"Tests for get_billing_information endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_returns_not_subscribed_without_license(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n    ) -> None:\n        \"\"\"Should return subscribed=False for self-hosted without license.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = None\n\n        result = await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert isinstance(result, SubscriptionStatusResponse)\n        assert result.subscribed is False\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.get_billing_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_returns_billing_info(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should return billing information with valid license.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_service.return_value = BillingInformationResponse(\n            tenant_id=\"tenant_123\",\n            status=\"active\",\n            seats=10,\n        )\n\n        result = await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert isinstance(result, BillingInformationResponse)\n        assert result.tenant_id == \"tenant_123\"\n        assert result.status == \"active\"\n        assert result.seats == 10\n\n\nclass TestUpdateSeats:\n    \"\"\"Tests for update_seats endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_requires_license_for_self_hosted(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n    ) -> None:\n        \"\"\"Should reject self-hosted without license.\"\"\"\n        from ee.onyx.server.billing.api import update_seats\n        from ee.onyx.server.billing.models import SeatUpdateRequest\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = None\n\n        request = SeatUpdateRequest(new_seat_count=10)\n\n        with pytest.raises(OnyxError) as exc_info:\n            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 400\n        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR\n        assert exc_info.value.detail == \"No license found\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.get_used_seats\")\n    @patch(\"ee.onyx.server.billing.api.update_seat_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_updates_seats_successfully(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_get_used_seats: MagicMock,\n    ) -> None:\n        \"\"\"Should update seats with valid license.\"\"\"\n        from ee.onyx.server.billing.api import update_seats\n        from ee.onyx.server.billing.models import SeatUpdateRequest\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_get_used_seats.return_value = 5\n        mock_service.return_value = SeatUpdateResponse(\n            success=True,\n            current_seats=15,\n            used_seats=5,\n            message=\"Seats updated to 15\",\n        )\n\n        request = SeatUpdateRequest(new_seat_count=15)\n        result = await update_seats(\n            request=request, _=MagicMock(), db_session=MagicMock()\n        )\n\n        assert result.success is True\n        assert result.current_seats == 15\n        assert result.used_seats == 5\n        mock_service.assert_called_once_with(\n            new_seat_count=15,\n            license_data=\"license_blob\",\n            tenant_id=None,\n        )\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.get_used_seats\")\n    @patch(\"ee.onyx.server.billing.api.update_seat_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_handles_billing_service_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_get_used_seats: MagicMock,\n    ) -> None:\n        \"\"\"Should propagate OnyxError from service layer.\"\"\"\n        from ee.onyx.server.billing.api import update_seats\n        from ee.onyx.server.billing.models import SeatUpdateRequest\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_get_used_seats.return_value = 0\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Cannot reduce below 10 seats\",\n            status_code_override=400,\n        )\n\n        request = SeatUpdateRequest(new_seat_count=5)\n\n        with pytest.raises(OnyxError) as exc_info:\n            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 400\n        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY\n        assert exc_info.value.detail == \"Cannot reduce below 10 seats\"\n\n\nclass TestCircuitBreaker:\n    \"\"\"Tests for the billing circuit breaker functionality.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._is_billing_circuit_open\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_returns_503_when_circuit_open(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_circuit_open: MagicMock,\n    ) -> None:\n        \"\"\"Should return 503 when circuit breaker is open.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_circuit_open.return_value = True\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 503\n        assert exc_info.value.error_code is OnyxErrorCode.SERVICE_UNAVAILABLE\n        assert \"Connect to Stripe\" in exc_info.value.detail\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._open_billing_circuit\")\n    @patch(\"ee.onyx.server.billing.api._is_billing_circuit_open\")\n    @patch(\"ee.onyx.server.billing.api.get_billing_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_opens_circuit_on_502_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_circuit_open_check: MagicMock,\n        mock_open_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should open circuit breaker on 502 error.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_circuit_open_check.return_value = False\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Connection failed\",\n            status_code_override=502,\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 502\n        mock_open_circuit.assert_called_once()\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._open_billing_circuit\")\n    @patch(\"ee.onyx.server.billing.api._is_billing_circuit_open\")\n    @patch(\"ee.onyx.server.billing.api.get_billing_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_opens_circuit_on_503_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_circuit_open_check: MagicMock,\n        mock_open_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should open circuit breaker on 503 error.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_circuit_open_check.return_value = False\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Service unavailable\",\n            status_code_override=503,\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 503\n        mock_open_circuit.assert_called_once()\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._open_billing_circuit\")\n    @patch(\"ee.onyx.server.billing.api._is_billing_circuit_open\")\n    @patch(\"ee.onyx.server.billing.api.get_billing_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_opens_circuit_on_504_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_circuit_open_check: MagicMock,\n        mock_open_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should open circuit breaker on 504 error.\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_circuit_open_check.return_value = False\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Gateway timeout\",\n            status_code_override=504,\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 504\n        mock_open_circuit.assert_called_once()\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._open_billing_circuit\")\n    @patch(\"ee.onyx.server.billing.api._is_billing_circuit_open\")\n    @patch(\"ee.onyx.server.billing.api.get_billing_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_does_not_open_circuit_on_400_error(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_circuit_open_check: MagicMock,\n        mock_open_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should NOT open circuit breaker on 400 error (client error).\"\"\"\n        from ee.onyx.server.billing.api import get_billing_information\n\n        mock_get_license.return_value = \"license_blob\"\n        mock_get_tenant.return_value = None\n        mock_circuit_open_check.return_value = False\n        mock_service.side_effect = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"Bad request\",\n            status_code_override=400,\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_billing_information(_=MagicMock(), db_session=MagicMock())\n\n        assert exc_info.value.status_code == 400\n        mock_open_circuit.assert_not_called()\n\n\nclass TestResetConnection:\n    \"\"\"Tests for reset_stripe_connection endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.billing.api._close_billing_circuit\")\n    async def test_closes_circuit_for_self_hosted(\n        self,\n        mock_close_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should close circuit breaker for self-hosted deployment.\"\"\"\n        from ee.onyx.server.billing.api import reset_stripe_connection\n\n        result = await reset_stripe_connection(_=MagicMock())\n\n        assert result.success is True\n        assert \"re-enabled\" in result.message.lower()\n        mock_close_circuit.assert_called_once()\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.MULTI_TENANT\", True)\n    @patch(\"ee.onyx.server.billing.api._close_billing_circuit\")\n    async def test_noop_for_cloud(\n        self,\n        mock_close_circuit: MagicMock,\n    ) -> None:\n        \"\"\"Should be no-op for cloud deployment.\"\"\"\n        from ee.onyx.server.billing.api import reset_stripe_connection\n\n        result = await reset_stripe_connection(_=MagicMock())\n\n        assert result.success is True\n        assert \"not applicable\" in result.message.lower()\n        mock_close_circuit.assert_not_called()\n\n\nclass TestCheckoutSessionWithSeats:\n    \"\"\"Tests for checkout session with seats parameter.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.get_used_seats\")\n    @patch(\"ee.onyx.server.billing.api.create_checkout_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_passes_seats_parameter(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n        mock_get_used_seats: MagicMock,\n    ) -> None:\n        \"\"\"Should pass seats parameter to service.\"\"\"\n        from ee.onyx.server.billing.api import create_checkout_session\n        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = \"tenant_123\"\n        mock_get_used_seats.return_value = 5\n        mock_service.return_value = CreateCheckoutSessionResponse(\n            stripe_checkout_url=\"https://checkout.stripe.com/session\"\n        )\n\n        request = CreateCheckoutSessionRequest(billing_period=\"monthly\", seats=25)\n        await create_checkout_session(\n            request=request, _=MagicMock(), db_session=MagicMock()\n        )\n\n        call_kwargs = mock_service.call_args[1]\n        assert call_kwargs[\"seats\"] == 25\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.api.create_checkout_service\")\n    @patch(\"ee.onyx.server.billing.api._get_tenant_id\")\n    @patch(\"ee.onyx.server.billing.api._get_license_data\")\n    async def test_seats_none_when_not_provided(\n        self,\n        mock_get_license: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_service: AsyncMock,\n    ) -> None:\n        \"\"\"Should pass None for seats when not provided.\"\"\"\n        from ee.onyx.server.billing.api import create_checkout_session\n        from ee.onyx.server.billing.models import CreateCheckoutSessionRequest\n\n        mock_get_license.return_value = None\n        mock_get_tenant.return_value = \"tenant_123\"\n        mock_service.return_value = CreateCheckoutSessionResponse(\n            stripe_checkout_url=\"https://checkout.stripe.com/session\"\n        )\n\n        request = CreateCheckoutSessionRequest(billing_period=\"annual\")\n        await create_checkout_session(\n            request=request, _=MagicMock(), db_session=MagicMock()\n        )\n\n        call_kwargs = mock_service.call_args[1]\n        assert call_kwargs[\"seats\"] is None\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/billing/test_billing_service.py",
    "content": "\"\"\"Tests for the billing service layer.\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom .conftest import make_mock_http_client\nfrom .conftest import make_mock_response\nfrom ee.onyx.server.billing.models import BillingInformationResponse\nfrom ee.onyx.server.billing.models import CreateCheckoutSessionResponse\nfrom ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse\nfrom ee.onyx.server.billing.models import SeatUpdateResponse\nfrom ee.onyx.server.billing.models import SubscriptionStatusResponse\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n\nclass TestMakeBillingRequest:\n    \"\"\"Tests for the _make_billing_request helper.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._get_headers\")\n    @patch(\"ee.onyx.server.billing.service._get_base_url\")\n    async def test_makes_post_request(\n        self,\n        mock_base_url: MagicMock,\n        mock_headers: MagicMock,\n    ) -> None:\n        \"\"\"Should make POST request with body.\"\"\"\n        from ee.onyx.server.billing.service import _make_billing_request\n\n        mock_base_url.return_value = \"https://api.example.com\"\n        mock_headers.return_value = {\"Authorization\": \"Bearer token\"}\n        mock_response = make_mock_response({\"success\": True})\n        mock_client = make_mock_http_client(\"post\", response=mock_response)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            result = await _make_billing_request(\n                method=\"POST\",\n                path=\"/test-endpoint\",\n                body={\"key\": \"value\"},\n            )\n\n        assert result == {\"success\": True}\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._get_headers\")\n    @patch(\"ee.onyx.server.billing.service._get_base_url\")\n    async def test_makes_get_request(\n        self,\n        mock_base_url: MagicMock,\n        mock_headers: MagicMock,\n    ) -> None:\n        \"\"\"Should make GET request with params.\"\"\"\n        from ee.onyx.server.billing.service import _make_billing_request\n\n        mock_base_url.return_value = \"https://api.example.com\"\n        mock_headers.return_value = {\"Authorization\": \"Bearer token\"}\n        mock_response = make_mock_response({\"data\": \"test\"})\n        mock_client = make_mock_http_client(\"get\", response=mock_response)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            result = await _make_billing_request(\n                method=\"GET\",\n                path=\"/test-endpoint\",\n                params={\"tenant_id\": \"123\"},\n            )\n\n        assert result == {\"data\": \"test\"}\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._get_headers\")\n    @patch(\"ee.onyx.server.billing.service._get_base_url\")\n    async def test_raises_on_http_error(\n        self,\n        mock_base_url: MagicMock,\n        mock_headers: MagicMock,\n    ) -> None:\n        \"\"\"Should raise OnyxError on HTTP error.\"\"\"\n        from ee.onyx.server.billing.service import _make_billing_request\n\n        mock_base_url.return_value = \"https://api.example.com\"\n        mock_headers.return_value = {}\n        mock_response = make_mock_response({\"detail\": \"Bad request\"})\n        mock_response.status_code = 400\n        error = httpx.HTTPStatusError(\n            \"Error\", request=MagicMock(), response=mock_response\n        )\n        mock_client = make_mock_http_client(\"post\", side_effect=error)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            with pytest.raises(OnyxError) as exc_info:\n                await _make_billing_request(\n                    method=\"POST\",\n                    path=\"/test\",\n                    error_message=\"Test failed\",\n                )\n\n        assert exc_info.value.status_code == 400\n        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY\n        assert \"Bad request\" in exc_info.value.detail\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._get_headers\")\n    @patch(\"ee.onyx.server.billing.service._get_base_url\")\n    async def test_follows_redirects(\n        self,\n        mock_base_url: MagicMock,\n        mock_headers: MagicMock,\n    ) -> None:\n        \"\"\"AsyncClient must be created with follow_redirects=True.\n\n        The target server (cloud data plane for self-hosted, control\n        plane for cloud) may sit behind nginx that returns 308\n        (HTTP→HTTPS). httpx does not follow redirects by default,\n        so we must explicitly opt in.\n        \"\"\"\n        from ee.onyx.server.billing.service import _make_billing_request\n\n        mock_base_url.return_value = \"http://api.example.com\"\n        mock_headers.return_value = {\"Authorization\": \"Bearer token\"}\n        mock_response = make_mock_response({\"ok\": True})\n        mock_client = make_mock_http_client(\"get\", response=mock_response)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            await _make_billing_request(method=\"GET\", path=\"/test\")\n\n        mock_client.assert_called_once_with(timeout=30.0, follow_redirects=True)\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._get_headers\")\n    @patch(\"ee.onyx.server.billing.service._get_base_url\")\n    async def test_raises_on_connection_error(\n        self,\n        mock_base_url: MagicMock,\n        mock_headers: MagicMock,\n    ) -> None:\n        \"\"\"Should raise OnyxError on connection error.\"\"\"\n        from ee.onyx.server.billing.service import _make_billing_request\n\n        mock_base_url.return_value = \"https://api.example.com\"\n        mock_headers.return_value = {}\n        error = httpx.RequestError(\"Connection failed\")\n        mock_client = make_mock_http_client(\"post\", side_effect=error)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            with pytest.raises(OnyxError) as exc_info:\n                await _make_billing_request(method=\"POST\", path=\"/test\")\n\n        assert exc_info.value.status_code == 502\n        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY\n        assert \"Failed to connect\" in exc_info.value.detail\n\n\nclass TestCreateCheckoutSession:\n    \"\"\"Tests for create_checkout_session service function.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_creates_checkout_session(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should create checkout session and return URL.\"\"\"\n        from ee.onyx.server.billing.service import create_checkout_session\n\n        mock_request.return_value = {\"url\": \"https://checkout.stripe.com/session\"}\n\n        result = await create_checkout_session(\n            billing_period=\"monthly\",\n            email=\"test@example.com\",\n            license_data=\"license_blob\",\n            redirect_url=\"https://app.example.com/success\",\n        )\n\n        assert isinstance(result, CreateCheckoutSessionResponse)\n        assert result.stripe_checkout_url == \"https://checkout.stripe.com/session\"\n\n        call_kwargs = mock_request.call_args[1]\n        assert call_kwargs[\"method\"] == \"POST\"\n        assert call_kwargs[\"path\"] == \"/create-checkout-session\"\n        assert call_kwargs[\"body\"][\"billing_period\"] == \"monthly\"\n        assert call_kwargs[\"body\"][\"email\"] == \"test@example.com\"\n\n\nclass TestCreateCustomerPortalSession:\n    \"\"\"Tests for create_customer_portal_session service function.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_creates_portal_session(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should create portal session and return URL.\"\"\"\n        from ee.onyx.server.billing.service import create_customer_portal_session\n\n        mock_request.return_value = {\"url\": \"https://billing.stripe.com/portal\"}\n\n        result = await create_customer_portal_session(\n            license_data=\"license_blob\",\n            return_url=\"https://app.example.com/billing\",\n        )\n\n        assert isinstance(result, CreateCustomerPortalSessionResponse)\n        assert result.stripe_customer_portal_url == \"https://billing.stripe.com/portal\"\n\n\nclass TestGetBillingInformation:\n    \"\"\"Tests for get_billing_information service function.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_returns_billing_info(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should return billing information.\"\"\"\n        from ee.onyx.server.billing.service import get_billing_information\n\n        mock_request.return_value = {\n            \"tenant_id\": \"tenant_123\",\n            \"status\": \"active\",\n            \"seats\": 10,\n            \"billing_period\": \"monthly\",\n        }\n\n        result = await get_billing_information(license_data=\"license_blob\")\n\n        assert isinstance(result, BillingInformationResponse)\n        assert result.tenant_id == \"tenant_123\"\n        assert result.status == \"active\"\n        assert result.seats == 10\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_returns_not_subscribed(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should return SubscriptionStatusResponse when not subscribed.\"\"\"\n        from ee.onyx.server.billing.service import get_billing_information\n\n        mock_request.return_value = {\"subscribed\": False}\n\n        result = await get_billing_information(license_data=\"license_blob\")\n\n        assert isinstance(result, SubscriptionStatusResponse)\n        assert result.subscribed is False\n\n\nclass TestUpdateSeatCount:\n    \"\"\"Tests for update_seat_count service function.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_updates_seats(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should update seat count and return response.\"\"\"\n        from ee.onyx.server.billing.service import update_seat_count\n\n        mock_request.return_value = {\n            \"success\": True,\n            \"current_seats\": 15,\n            \"used_seats\": 5,\n            \"message\": \"Seats updated to 15\",\n        }\n\n        result = await update_seat_count(\n            new_seat_count=15,\n            license_data=\"license_blob\",\n        )\n\n        assert isinstance(result, SeatUpdateResponse)\n        assert result.success is True\n        assert result.current_seats == 15\n        assert result.used_seats == 5\n\n        call_kwargs = mock_request.call_args[1]\n        assert call_kwargs[\"body\"][\"new_seat_count\"] == 15\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.billing.service._make_billing_request\")\n    async def test_includes_tenant_id_for_cloud(\n        self,\n        mock_request: AsyncMock,\n    ) -> None:\n        \"\"\"Should include tenant_id in body for cloud deployments.\"\"\"\n        from ee.onyx.server.billing.service import update_seat_count\n\n        mock_request.return_value = {\n            \"success\": True,\n            \"current_seats\": 10,\n            \"used_seats\": 5,\n        }\n\n        with patch(\"ee.onyx.server.billing.service.MULTI_TENANT\", True):\n            await update_seat_count(\n                new_seat_count=10,\n                tenant_id=\"tenant_123\",\n            )\n\n        call_kwargs = mock_request.call_args[1]\n        assert call_kwargs[\"body\"][\"tenant_id\"] == \"tenant_123\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/billing/test_proxy.py",
    "content": "\"\"\"Tests for the billing proxy endpoints.\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom .conftest import make_license_payload\nfrom .conftest import make_mock_http_client\nfrom .conftest import make_mock_response\nfrom ee.onyx.server.license.models import LicensePayload\n\n\nclass TestProxySeatUpdate:\n    \"\"\"Tests for proxy_seat_update endpoint.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.forward_to_control_plane\")\n    async def test_proxies_seat_update(\n        self,\n        mock_forward: AsyncMock,\n    ) -> None:\n        \"\"\"Should forward seat update request to control plane.\"\"\"\n        from ee.onyx.server.billing.models import SeatUpdateRequest\n        from ee.onyx.server.tenants.proxy import proxy_seat_update\n\n        mock_forward.return_value = {\n            \"success\": True,\n            \"current_seats\": 15,\n            \"used_seats\": 5,\n            \"message\": \"Seats updated\",\n        }\n\n        license_payload = make_license_payload(tenant_id=\"tenant_123\", seats=10)\n\n        request = SeatUpdateRequest(new_seat_count=15)\n        result = await proxy_seat_update(\n            request_body=request,\n            license_payload=license_payload,\n        )\n\n        assert result.success is True\n        assert result.current_seats == 15\n        assert result.used_seats == 5\n\n        mock_forward.assert_called_once_with(\n            \"POST\",\n            \"/seats/update\",\n            body={\n                \"tenant_id\": \"tenant_123\",\n                \"new_seat_count\": 15,\n            },\n        )\n\n    @pytest.mark.asyncio\n    async def test_rejects_missing_tenant_id(self) -> None:\n        \"\"\"Should reject license without tenant_id.\"\"\"\n        from fastapi import HTTPException\n\n        from ee.onyx.server.billing.models import SeatUpdateRequest\n        from ee.onyx.server.tenants.proxy import proxy_seat_update\n\n        # Create a license payload without tenant_id by using a mock\n        license_payload = MagicMock(spec=LicensePayload)\n        license_payload.tenant_id = None\n\n        request = SeatUpdateRequest(new_seat_count=10)\n\n        with pytest.raises(HTTPException) as exc_info:\n            await proxy_seat_update(\n                request_body=request,\n                license_payload=license_payload,\n            )\n\n        assert exc_info.value.status_code == 401\n        assert \"tenant_id\" in exc_info.value.detail\n\n\nclass TestForwardToControlPlane:\n    \"\"\"Tests for forward_to_control_plane helper.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.generate_data_plane_token\")\n    @patch(\"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\", \"https://cp.test\")\n    async def test_forwards_post_request(\n        self,\n        mock_token: MagicMock,\n    ) -> None:\n        \"\"\"Should forward POST request with JWT auth.\"\"\"\n        from ee.onyx.server.tenants.proxy import forward_to_control_plane\n\n        mock_token.return_value = \"jwt_token\"\n        mock_response = make_mock_response({\"result\": \"success\"})\n        mock_client = make_mock_http_client(\"post\", response=mock_response)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            result = await forward_to_control_plane(\n                \"POST\",\n                \"/test-path\",\n                body={\"key\": \"value\"},\n            )\n\n        assert result == {\"result\": \"success\"}\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.generate_data_plane_token\")\n    @patch(\"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\", \"https://cp.test\")\n    async def test_forwards_get_request(\n        self,\n        mock_token: MagicMock,\n    ) -> None:\n        \"\"\"Should forward GET request with params.\"\"\"\n        from ee.onyx.server.tenants.proxy import forward_to_control_plane\n\n        mock_token.return_value = \"jwt_token\"\n        mock_response = make_mock_response({\"data\": \"test\"})\n        mock_client = make_mock_http_client(\"get\", response=mock_response)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            result = await forward_to_control_plane(\n                \"GET\",\n                \"/billing-info\",\n                params={\"tenant_id\": \"123\"},\n            )\n\n        assert result == {\"data\": \"test\"}\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.generate_data_plane_token\")\n    @patch(\"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\", \"https://cp.test\")\n    async def test_raises_on_http_error(\n        self,\n        mock_token: MagicMock,\n    ) -> None:\n        \"\"\"Should raise HTTPException on HTTP error.\"\"\"\n        from fastapi import HTTPException\n\n        from ee.onyx.server.tenants.proxy import forward_to_control_plane\n\n        mock_token.return_value = \"jwt_token\"\n        mock_response = make_mock_response({\"detail\": \"Bad request\"})\n        mock_response.status_code = 400\n        error = httpx.HTTPStatusError(\n            \"Error\", request=MagicMock(), response=mock_response\n        )\n        mock_client = make_mock_http_client(\"post\", side_effect=error)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            with pytest.raises(HTTPException) as exc_info:\n                await forward_to_control_plane(\"POST\", \"/test\")\n\n        assert exc_info.value.status_code == 400\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.generate_data_plane_token\")\n    @patch(\"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\", \"https://cp.test\")\n    async def test_raises_on_connection_error(\n        self,\n        mock_token: MagicMock,\n    ) -> None:\n        \"\"\"Should raise HTTPException on connection error.\"\"\"\n        from fastapi import HTTPException\n\n        from ee.onyx.server.tenants.proxy import forward_to_control_plane\n\n        mock_token.return_value = \"jwt_token\"\n        error = httpx.RequestError(\"Connection failed\")\n        mock_client = make_mock_http_client(\"post\", side_effect=error)\n\n        with patch(\"httpx.AsyncClient\", mock_client):\n            with pytest.raises(HTTPException) as exc_info:\n                await forward_to_control_plane(\"POST\", \"/test\")\n\n        assert exc_info.value.status_code == 502\n        assert \"Failed to connect\" in exc_info.value.detail\n\n\nclass TestVerifyLicenseAuth:\n    \"\"\"Tests for verify_license_auth helper.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.tenants.proxy.verify_license_signature\")\n    @patch(\"ee.onyx.server.tenants.proxy.is_license_valid\")\n    async def test_valid_license(\n        self,\n        mock_is_valid: MagicMock,\n        mock_verify: MagicMock,\n    ) -> None:\n        \"\"\"Should return payload for valid license.\"\"\"\n        from ee.onyx.server.tenants.proxy import verify_license_auth\n\n        mock_payload = make_license_payload()\n        mock_verify.return_value = mock_payload\n        mock_is_valid.return_value = True\n\n        result = verify_license_auth(\"valid_license_blob\")\n\n        assert result == mock_payload\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.tenants.proxy.verify_license_signature\")\n    async def test_invalid_signature(\n        self,\n        mock_verify: MagicMock,\n    ) -> None:\n        \"\"\"Should reject invalid license signature.\"\"\"\n        from fastapi import HTTPException\n\n        from ee.onyx.server.tenants.proxy import verify_license_auth\n\n        mock_verify.side_effect = ValueError(\"Invalid signature\")\n\n        with pytest.raises(HTTPException) as exc_info:\n            verify_license_auth(\"invalid_license\")\n\n        assert exc_info.value.status_code == 401\n        assert \"Invalid license\" in exc_info.value.detail\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.tenants.proxy.verify_license_signature\")\n    @patch(\"ee.onyx.server.tenants.proxy.is_license_valid\")\n    async def test_expired_license_rejected(\n        self,\n        mock_is_valid: MagicMock,\n        mock_verify: MagicMock,\n    ) -> None:\n        \"\"\"Should reject expired license when allow_expired=False.\"\"\"\n        from fastapi import HTTPException\n\n        from ee.onyx.server.tenants.proxy import verify_license_auth\n\n        mock_payload = make_license_payload(expired=True)\n        mock_verify.return_value = mock_payload\n        mock_is_valid.return_value = False\n\n        with pytest.raises(HTTPException) as exc_info:\n            verify_license_auth(\"expired_license\", allow_expired=False)\n\n        assert exc_info.value.status_code == 401\n        assert \"expired\" in exc_info.value.detail\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.tenants.proxy.verify_license_signature\")\n    @patch(\"ee.onyx.server.tenants.proxy.is_license_valid\")\n    async def test_expired_license_allowed(\n        self,\n        mock_is_valid: MagicMock,\n        mock_verify: MagicMock,\n    ) -> None:\n        \"\"\"Should accept expired license when allow_expired=True.\"\"\"\n        from ee.onyx.server.tenants.proxy import verify_license_auth\n\n        mock_payload = make_license_payload(expired=True)\n        mock_verify.return_value = mock_payload\n        mock_is_valid.return_value = False\n\n        result = verify_license_auth(\"expired_license\", allow_expired=True)\n\n        assert result == mock_payload\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/features/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/features/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/features/hooks/test_api.py",
    "content": "\"\"\"Unit tests for ee.onyx.server.features.hooks.api helpers.\n\nCovers:\n- _check_ssrf_safety: scheme enforcement and private-IP blocklist\n- _validate_endpoint: httpx exception → HookValidateStatus mapping\n  ConnectTimeout     → timeout         (any timeout directs user to increase timeout_seconds)\n  ConnectError       → cannot_connect  (DNS / TLS failure)\n  ReadTimeout et al. → timeout         (TCP connected, server slow)\n  Any other exc      → cannot_connect\n- _raise_for_validation_failure: HookValidateStatus → OnyxError mapping\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom ee.onyx.server.features.hooks.api import _check_ssrf_safety\nfrom ee.onyx.server.features.hooks.api import _raise_for_validation_failure\nfrom ee.onyx.server.features.hooks.api import _validate_endpoint\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.models import HookValidateResponse\nfrom onyx.hooks.models import HookValidateStatus\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n_URL = \"https://example.com/hook\"\n_API_KEY = \"secret\"\n_TIMEOUT = 5.0\n\n\ndef _mock_response(status_code: int) -> MagicMock:\n    response = MagicMock()\n    response.status_code = status_code\n    return response\n\n\n# ---------------------------------------------------------------------------\n# _check_ssrf_safety\n# ---------------------------------------------------------------------------\n\n\nclass TestCheckSsrfSafety:\n    def _call(self, url: str) -> None:\n        _check_ssrf_safety(url)\n\n    # --- scheme checks ---\n\n    def test_https_is_allowed(self) -> None:\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_dns:\n            mock_dns.return_value = [(None, None, None, None, (\"93.184.216.34\", 0))]\n            self._call(\"https://example.com/hook\")  # must not raise\n\n    @pytest.mark.parametrize(\n        \"url\", [\"http://example.com/hook\", \"ftp://example.com/hook\"]\n    )\n    def test_non_https_scheme_rejected(self, url: str) -> None:\n        with pytest.raises(OnyxError) as exc_info:\n            self._call(url)\n        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY\n        assert \"https\" in (exc_info.value.detail or \"\").lower()\n\n    # --- private IP blocklist ---\n\n    @pytest.mark.parametrize(\n        \"ip\",\n        [\n            pytest.param(\"127.0.0.1\", id=\"loopback\"),\n            pytest.param(\"10.0.0.1\", id=\"RFC1918-A\"),\n            pytest.param(\"172.16.0.1\", id=\"RFC1918-B\"),\n            pytest.param(\"192.168.1.1\", id=\"RFC1918-C\"),\n            pytest.param(\"169.254.169.254\", id=\"link-local-IMDS\"),\n            pytest.param(\"100.64.0.1\", id=\"shared-address-space\"),\n            pytest.param(\"::1\", id=\"IPv6-loopback\"),\n            pytest.param(\"fc00::1\", id=\"IPv6-ULA\"),\n            pytest.param(\"fe80::1\", id=\"IPv6-link-local\"),\n        ],\n    )\n    def test_private_ip_is_blocked(self, ip: str) -> None:\n        with (\n            patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_dns,\n            pytest.raises(OnyxError) as exc_info,\n        ):\n            mock_dns.return_value = [(None, None, None, None, (ip, 0))]\n            self._call(\"https://internal.example.com/hook\")\n        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY\n        assert ip in (exc_info.value.detail or \"\")\n\n    def test_public_ip_is_allowed(self) -> None:\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_dns:\n            mock_dns.return_value = [(None, None, None, None, (\"93.184.216.34\", 0))]\n            self._call(\"https://example.com/hook\")  # must not raise\n\n    def test_dns_resolution_failure_raises(self) -> None:\n        import socket\n\n        with (\n            patch(\n                \"onyx.utils.url.socket.getaddrinfo\",\n                side_effect=socket.gaierror(\"name not found\"),\n            ),\n            pytest.raises(OnyxError) as exc_info,\n        ):\n            self._call(\"https://no-such-host.example.com/hook\")\n        assert exc_info.value.error_code == OnyxErrorCode.BAD_GATEWAY\n\n\n# ---------------------------------------------------------------------------\n# _validate_endpoint\n# ---------------------------------------------------------------------------\n\n\nclass TestValidateEndpoint:\n    def _call(self, *, api_key: str | None = _API_KEY) -> HookValidateResponse:\n        # Bypass SSRF check — tested separately in TestCheckSsrfSafety.\n        with patch(\"ee.onyx.server.features.hooks.api._check_ssrf_safety\"):\n            return _validate_endpoint(\n                endpoint_url=_URL,\n                api_key=api_key,\n                timeout_seconds=_TIMEOUT,\n            )\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_2xx_returns_passed(self, mock_client_cls: MagicMock) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.return_value = (\n            _mock_response(200)\n        )\n        assert self._call().status == HookValidateStatus.passed\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_5xx_returns_passed(self, mock_client_cls: MagicMock) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.return_value = (\n            _mock_response(500)\n        )\n        assert self._call().status == HookValidateStatus.passed\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    @pytest.mark.parametrize(\"status_code\", [401, 403])\n    def test_401_403_returns_auth_failed(\n        self, mock_client_cls: MagicMock, status_code: int\n    ) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.return_value = (\n            _mock_response(status_code)\n        )\n        result = self._call()\n        assert result.status == HookValidateStatus.auth_failed\n        assert str(status_code) in (result.error_message or \"\")\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_4xx_non_auth_returns_passed(self, mock_client_cls: MagicMock) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.return_value = (\n            _mock_response(422)\n        )\n        assert self._call().status == HookValidateStatus.passed\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_connect_timeout_returns_timeout(self, mock_client_cls: MagicMock) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (\n            httpx.ConnectTimeout(\"timed out\")\n        )\n        assert self._call().status == HookValidateStatus.timeout\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    @pytest.mark.parametrize(\n        \"exc\",\n        [\n            httpx.ReadTimeout(\"read timeout\"),\n            httpx.WriteTimeout(\"write timeout\"),\n            httpx.PoolTimeout(\"pool timeout\"),\n        ],\n    )\n    def test_read_write_pool_timeout_returns_timeout(\n        self, mock_client_cls: MagicMock, exc: httpx.TimeoutException\n    ) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.side_effect = exc\n        assert self._call().status == HookValidateStatus.timeout\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_connect_error_returns_cannot_connect(\n        self, mock_client_cls: MagicMock\n    ) -> None:\n        # Covers DNS failures, TLS errors, and other connection-level errors.\n        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (\n            httpx.ConnectError(\"name resolution failed\")\n        )\n        assert self._call().status == HookValidateStatus.cannot_connect\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_arbitrary_exception_returns_cannot_connect(\n        self, mock_client_cls: MagicMock\n    ) -> None:\n        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (\n            ConnectionRefusedError(\"refused\")\n        )\n        assert self._call().status == HookValidateStatus.cannot_connect\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_api_key_sent_as_bearer(self, mock_client_cls: MagicMock) -> None:\n        mock_post = mock_client_cls.return_value.__enter__.return_value.post\n        mock_post.return_value = _mock_response(200)\n        self._call(api_key=\"mykey\")\n        _, kwargs = mock_post.call_args\n        assert kwargs[\"headers\"][\"Authorization\"] == \"Bearer mykey\"\n\n    @patch(\"ee.onyx.server.features.hooks.api.httpx.Client\")\n    def test_no_api_key_omits_auth_header(self, mock_client_cls: MagicMock) -> None:\n        mock_post = mock_client_cls.return_value.__enter__.return_value.post\n        mock_post.return_value = _mock_response(200)\n        self._call(api_key=None)\n        _, kwargs = mock_post.call_args\n        assert \"Authorization\" not in kwargs[\"headers\"]\n\n\n# ---------------------------------------------------------------------------\n# _raise_for_validation_failure\n# ---------------------------------------------------------------------------\n\n\nclass TestRaiseForValidationFailure:\n    @pytest.mark.parametrize(\n        \"status, expected_code\",\n        [\n            (HookValidateStatus.auth_failed, OnyxErrorCode.CREDENTIAL_INVALID),\n            (HookValidateStatus.timeout, OnyxErrorCode.GATEWAY_TIMEOUT),\n            (HookValidateStatus.cannot_connect, OnyxErrorCode.BAD_GATEWAY),\n        ],\n    )\n    def test_raises_correct_error_code(\n        self, status: HookValidateStatus, expected_code: OnyxErrorCode\n    ) -> None:\n        validation = HookValidateResponse(status=status, error_message=\"some error\")\n        with pytest.raises(OnyxError) as exc_info:\n            _raise_for_validation_failure(validation)\n        assert exc_info.value.error_code == expected_code\n\n    def test_auth_failed_passes_error_message_directly(self) -> None:\n        validation = HookValidateResponse(\n            status=HookValidateStatus.auth_failed, error_message=\"bad credentials\"\n        )\n        with pytest.raises(OnyxError) as exc_info:\n            _raise_for_validation_failure(validation)\n        assert exc_info.value.detail == \"bad credentials\"\n\n    @pytest.mark.parametrize(\n        \"status\", [HookValidateStatus.timeout, HookValidateStatus.cannot_connect]\n    )\n    def test_timeout_and_cannot_connect_wrap_error_message(\n        self, status: HookValidateStatus\n    ) -> None:\n        validation = HookValidateResponse(status=status, error_message=\"raw error\")\n        with pytest.raises(OnyxError) as exc_info:\n            _raise_for_validation_failure(validation)\n        assert exc_info.value.detail == \"Endpoint validation failed: raw error\"\n\n\n# ---------------------------------------------------------------------------\n# HookValidateStatus enum string values (API contract)\n# ---------------------------------------------------------------------------\n\n\nclass TestHookValidateStatusValues:\n    @pytest.mark.parametrize(\n        \"status, expected\",\n        [\n            (HookValidateStatus.passed, \"passed\"),\n            (HookValidateStatus.auth_failed, \"auth_failed\"),\n            (HookValidateStatus.timeout, \"timeout\"),\n            (HookValidateStatus.cannot_connect, \"cannot_connect\"),\n        ],\n    )\n    def test_string_values(self, status: HookValidateStatus, expected: str) -> None:\n        assert status == expected\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/license/test_api.py",
    "content": "\"\"\"Tests for license API utilities.\"\"\"\n\nfrom ee.onyx.server.license.api import _strip_pem_delimiters\n\n\nclass TestStripPemDelimiters:\n    \"\"\"Tests for the PEM delimiter stripping function.\"\"\"\n\n    def test_strips_pem_delimiters(self) -> None:\n        \"\"\"Content wrapped in PEM delimiters is extracted correctly.\"\"\"\n        content = \"\"\"-----BEGIN ONYX LICENSE-----\neyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\n-----END ONYX LICENSE-----\"\"\"\n\n        result = _strip_pem_delimiters(content)\n\n        assert result == \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\n\n    def test_handles_multiline_content(self) -> None:\n        \"\"\"Multiline base64 content between delimiters is preserved.\"\"\"\n        content = \"\"\"-----BEGIN ONYX LICENSE-----\neyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjog\nIjEuMCIsICJ0ZW5hbnRfaWQiOiAidGVz\ndCJ9LCAic2lnbmF0dXJlIjogImFiYyJ9\n-----END ONYX LICENSE-----\"\"\"\n\n        result = _strip_pem_delimiters(content)\n\n        expected = \"\"\"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjog\nIjEuMCIsICJ0ZW5hbnRfaWQiOiAidGVz\ndCJ9LCAic2lnbmF0dXJlIjogImFiYyJ9\"\"\"\n        assert result == expected\n\n    def test_returns_unchanged_without_delimiters(self) -> None:\n        \"\"\"Content without PEM delimiters is returned unchanged.\"\"\"\n        content = \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\n\n        result = _strip_pem_delimiters(content)\n\n        assert result == content\n\n    def test_handles_whitespace(self) -> None:\n        \"\"\"Leading/trailing whitespace is handled correctly.\"\"\"\n        content = \"\"\"\n  -----BEGIN ONYX LICENSE-----\neyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\n-----END ONYX LICENSE-----\n  \"\"\"\n\n        result = _strip_pem_delimiters(content)\n\n        assert result == \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\n\n    def test_partial_delimiters_unchanged(self) -> None:\n        \"\"\"Content with only begin or only end delimiter is returned unchanged.\"\"\"\n        begin_only = \"\"\"-----BEGIN ONYX LICENSE-----\neyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\"\"\n\n        end_only = \"\"\"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\n-----END ONYX LICENSE-----\"\"\"\n\n        assert _strip_pem_delimiters(begin_only) == begin_only.strip()\n        assert _strip_pem_delimiters(end_only) == end_only.strip()\n\n    def test_trailing_newlines_stripped_from_raw_input(self) -> None:\n        \"\"\"Raw license strings with trailing newlines from user paste are cleaned.\"\"\"\n        content = \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\\n\\n\"\n\n        result = _strip_pem_delimiters(content)\n\n        assert result == \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\n\n    def test_trailing_newlines_stripped_after_pem(self) -> None:\n        \"\"\"Inner content with trailing newlines after PEM stripping is cleaned.\"\"\"\n        content = \"\"\"-----BEGIN ONYX LICENSE-----\neyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\n\n-----END ONYX LICENSE-----\"\"\"\n\n        result = _strip_pem_delimiters(content)\n\n        assert result == \"eyJwYXlsb2FkIjogeyJ2ZXJzaW9uIjogIjEuMCJ9fQ==\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/middleware/test_license_enforcement.py",
    "content": "\"\"\"Tests for license enforcement middleware.\"\"\"\n\nfrom collections.abc import Awaitable\nfrom collections.abc import Callable\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom starlette.requests import Request\nfrom starlette.responses import Response\n\nfrom ee.onyx.configs.license_enforcement_config import EE_ONLY_PATH_PREFIXES\nfrom ee.onyx.configs.license_enforcement_config import (\n    LICENSE_ENFORCEMENT_ALLOWED_PREFIXES,\n)\nfrom ee.onyx.server.middleware.license_enforcement import _is_ee_only_path\nfrom ee.onyx.server.middleware.license_enforcement import _is_path_allowed\nfrom onyx.server.settings.models import ApplicationStatus\n\n# Type alias for the middleware harness tuple\nMiddlewareHarness = tuple[\n    Callable[[Request, Callable[[Request], Awaitable[Response]]], Awaitable[Response]],\n    Callable[[Request], Awaitable[Response]],\n]\n\n# Paths that should be blocked (core functionality requiring license)\nBLOCKED_PATHS = [\n    \"/chat\",\n    \"/search\",\n    \"/admin/connectors\",\n    \"/connector\",\n    \"/persona\",\n]\n\n\nclass TestPathAllowlist:\n    \"\"\"Tests for the path allowlist logic.\n\n    Uses LICENSE_ENFORCEMENT_ALLOWED_PREFIXES from the constants module\n    as the source of truth to ensure tests stay in sync with production code.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"path\", list(LICENSE_ENFORCEMENT_ALLOWED_PREFIXES))\n    def test_allowed_paths_are_allowed(self, path: str) -> None:\n        \"\"\"All paths in LICENSE_ENFORCEMENT_ALLOWED_PREFIXES should be allowed.\"\"\"\n        assert _is_path_allowed(path) is True\n\n    def test_allowed_path_prefix_matching(self) -> None:\n        \"\"\"Subpaths of allowed prefixes should also be allowed.\"\"\"\n        assert _is_path_allowed(\"/auth/callback/google\") is True\n        assert _is_path_allowed(\"/admin/billing/checkout\") is True\n\n    @pytest.mark.parametrize(\"path\", BLOCKED_PATHS)\n    def test_blocked_paths_are_blocked(self, path: str) -> None:\n        \"\"\"Core functionality paths should be blocked when license is gated.\"\"\"\n        assert _is_path_allowed(path) is False\n\n\nclass TestEEOnlyPaths:\n    \"\"\"Tests for EE-only path detection.\n\n    Uses EE_ONLY_PATH_PREFIXES from the constants module as the source of truth\n    to ensure tests stay in sync with production code.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"path\", list(EE_ONLY_PATH_PREFIXES))\n    def test_ee_only_paths_are_detected(self, path: str) -> None:\n        \"\"\"All paths in EE_ONLY_PATH_PREFIXES should be detected as EE-only.\"\"\"\n        assert _is_ee_only_path(path) is True\n\n    @pytest.mark.parametrize(\n        \"path\",\n        [\n            \"/chat\",\n            \"/search\",\n            \"/connector\",\n            \"/persona\",\n        ],\n    )\n    def test_community_paths_are_not_ee_only(self, path: str) -> None:\n        \"\"\"Community features should not be detected as EE-only.\"\"\"\n        assert _is_ee_only_path(path) is False\n\n\nclass TestLicenseEnforcementMiddleware:\n    \"\"\"Tests for middleware behavior under different conditions.\"\"\"\n\n    @pytest.fixture\n    def middleware_harness(self) -> MiddlewareHarness:\n        \"\"\"Create a test harness for the middleware.\"\"\"\n        from ee.onyx.server.middleware.license_enforcement import (\n            add_license_enforcement_middleware,\n        )\n\n        app = MagicMock()\n        logger = MagicMock()\n        captured_middleware: Any = None\n\n        def capture_middleware(\n            middleware_type: str,  # noqa: ARG001\n        ) -> Callable[[Any], Any]:\n            def decorator(func: Any) -> Any:\n                nonlocal captured_middleware\n                captured_middleware = func\n                return func\n\n            return decorator\n\n        app.middleware = capture_middleware\n        add_license_enforcement_middleware(app, logger)\n\n        async def call_next(req: Request) -> Response:  # noqa: ARG001\n            response = MagicMock()\n            response.status_code = 200\n            return response\n\n        return captured_middleware, call_next\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_gated_access_status_gets_402(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"GATED_ACCESS status blocks non-allowlisted paths with 402.\"\"\"\n        mock_get_tenant.return_value = \"default\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = ApplicationStatus.GATED_ACCESS\n        mock_get_metadata.return_value = mock_metadata\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 402\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_grace_period_allows_access(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"GRACE_PERIOD status allows access (for notifications only, not blocking).\"\"\"\n        mock_get_tenant.return_value = \"default\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = ApplicationStatus.GRACE_PERIOD\n        mock_metadata.used_seats = 5\n        mock_metadata.seats = 10\n        mock_get_metadata.return_value = mock_metadata\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 200\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.get_session_with_current_tenant\"\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.refresh_license_cache\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_no_license_blocks_ee_only_paths(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_refresh: MagicMock,\n        mock_get_session: MagicMock,  # noqa: ARG002\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"No license blocks EE-only paths with 402.\"\"\"\n        mock_get_tenant.return_value = \"default\"\n        mock_get_metadata.return_value = None\n        mock_refresh.return_value = None  # Still no license after DB check\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/analytics\"  # EE-only path\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 402\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.get_session_with_current_tenant\"\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.refresh_license_cache\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_no_license_allows_community_paths(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        mock_refresh: MagicMock,\n        mock_get_session: MagicMock,  # noqa: ARG002\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"No license allows community features (non-EE paths).\"\"\"\n        mock_get_tenant.return_value = \"default\"\n        mock_get_metadata.return_value = None\n        mock_refresh.return_value = None  # Still no license after DB check\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"  # Community path\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 200\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_redis_error_fails_open(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"Redis errors should not block users - fail open to allow access.\"\"\"\n        from redis.exceptions import RedisError\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_get_metadata.side_effect = RedisError(\"Connection failed\")\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 200  # Fail open\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        False,\n    )\n    async def test_disabled_enforcement_allows_all(\n        self,\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"When enforcement is disabled, all requests pass through.\"\"\"\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 200\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.middleware.license_enforcement.LICENSE_ENFORCEMENT_ENABLED\",\n        True,\n    )\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.middleware.license_enforcement.get_cached_license_metadata\")\n    async def test_seat_limit_exceeded_gets_402(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        middleware_harness: MiddlewareHarness,\n    ) -> None:\n        \"\"\"Seat limit exceeded returns 402.\"\"\"\n        mock_get_tenant.return_value = \"default\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = ApplicationStatus.ACTIVE\n        mock_metadata.used_seats = 15\n        mock_metadata.seats = 10  # Over limit\n        mock_get_metadata.return_value = mock_metadata\n\n        middleware, call_next = middleware_harness\n        mock_request = MagicMock()\n        mock_request.url.path = \"/api/chat\"\n\n        response = await middleware(mock_request, call_next)\n        assert response.status_code == 402\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/settings/test_license_enforcement_settings.py",
    "content": "\"\"\"Tests for license enforcement in settings API.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom redis.exceptions import RedisError\n\nfrom onyx.server.settings.models import ApplicationStatus\nfrom onyx.server.settings.models import Settings\n\n# Fields we assert on across all tests\n_ASSERT_FIELDS = {\n    \"application_status\",\n    \"ee_features_enabled\",\n    \"seat_count\",\n    \"used_seats\",\n}\n\n\ndef _pick(settings: Settings) -> dict:\n    \"\"\"Extract only the fields under test from a Settings object.\"\"\"\n    return settings.model_dump(include=_ASSERT_FIELDS)\n\n\n@pytest.fixture\ndef base_settings() -> Settings:\n    \"\"\"Create base settings for testing.\"\"\"\n    return Settings(\n        maximum_chat_retention_days=None,\n        gpu_enabled=False,\n        application_status=ApplicationStatus.ACTIVE,\n    )\n\n\nclass TestApplyLicenseStatusToSettings:\n    \"\"\"Tests for apply_license_status_to_settings function.\"\"\"\n\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", False)\n    def test_enforcement_disabled_enables_ee_features(\n        self, base_settings: Settings\n    ) -> None:\n        \"\"\"When LICENSE_ENFORCEMENT_ENABLED=False, EE features are enabled.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        assert base_settings.ee_features_enabled is False\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.ACTIVE,\n            \"ee_features_enabled\": True,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", True)\n    def test_multi_tenant_enables_ee_features(self, base_settings: Settings) -> None:\n        \"\"\"Cloud mode always enables EE features.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.ACTIVE,\n            \"ee_features_enabled\": True,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n    @pytest.mark.parametrize(\n        \"license_status,used_seats,seats,expected\",\n        [\n            (\n                ApplicationStatus.GATED_ACCESS,\n                3,\n                10,\n                {\n                    \"application_status\": ApplicationStatus.GATED_ACCESS,\n                    \"ee_features_enabled\": False,\n                    \"seat_count\": None,\n                    \"used_seats\": None,\n                },\n            ),\n            (\n                ApplicationStatus.ACTIVE,\n                3,\n                10,\n                {\n                    \"application_status\": ApplicationStatus.ACTIVE,\n                    \"ee_features_enabled\": True,\n                    \"seat_count\": None,\n                    \"used_seats\": None,\n                },\n            ),\n            (\n                ApplicationStatus.ACTIVE,\n                10,\n                10,\n                {\n                    \"application_status\": ApplicationStatus.ACTIVE,\n                    \"ee_features_enabled\": True,\n                    \"seat_count\": None,\n                    \"used_seats\": None,\n                },\n            ),\n            (\n                ApplicationStatus.GRACE_PERIOD,\n                3,\n                10,\n                {\n                    \"application_status\": ApplicationStatus.ACTIVE,\n                    \"ee_features_enabled\": True,\n                    \"seat_count\": None,\n                    \"used_seats\": None,\n                },\n            ),\n        ],\n    )\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_self_hosted_license_status_propagation(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        license_status: ApplicationStatus,\n        used_seats: int,\n        seats: int,\n        expected: dict,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"Self-hosted: license status controls both application_status and ee_features_enabled.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = license_status\n        mock_metadata.used_seats = used_seats\n        mock_metadata.seats = seats\n        mock_get_metadata.return_value = mock_metadata\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == expected\n\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_seat_limit_exceeded_sets_status_and_counts(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"Seat limit exceeded sets SEAT_LIMIT_EXCEEDED with counts, keeps EE enabled.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = ApplicationStatus.ACTIVE\n        mock_metadata.used_seats = 15\n        mock_metadata.seats = 10\n        mock_get_metadata.return_value = mock_metadata\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.SEAT_LIMIT_EXCEEDED,\n            \"ee_features_enabled\": True,\n            \"seat_count\": 10,\n            \"used_seats\": 15,\n        }\n\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_expired_license_takes_precedence_over_seat_limit(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"Expired license (GATED_ACCESS) takes precedence over seat limit exceeded.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_metadata = MagicMock()\n        mock_metadata.status = ApplicationStatus.GATED_ACCESS\n        mock_metadata.used_seats = 15\n        mock_metadata.seats = 10\n        mock_get_metadata.return_value = mock_metadata\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.GATED_ACCESS,\n            \"ee_features_enabled\": False,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n    @patch(\"ee.onyx.server.settings.api.ENTERPRISE_EDITION_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.refresh_license_cache\", return_value=None)\n    @patch(\"ee.onyx.server.settings.api.get_session_with_current_tenant\")\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_no_license_with_ee_flag_gates_access(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        _mock_get_session: MagicMock,\n        _mock_refresh: MagicMock,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"No license + ENTERPRISE_EDITION_ENABLED=true → GATED_ACCESS.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_get_metadata.return_value = None\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.GATED_ACCESS,\n            \"ee_features_enabled\": False,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n    @patch(\"ee.onyx.server.settings.api.ENTERPRISE_EDITION_ENABLED\", False)\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.refresh_license_cache\", return_value=None)\n    @patch(\"ee.onyx.server.settings.api.get_session_with_current_tenant\")\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_no_license_without_ee_flag_allows_community(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        _mock_get_session: MagicMock,\n        _mock_refresh: MagicMock,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"No license + ENTERPRISE_EDITION_ENABLED=false → community mode (no gating).\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_get_metadata.return_value = None\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.ACTIVE,\n            \"ee_features_enabled\": False,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n    @patch(\"ee.onyx.server.settings.api.LICENSE_ENFORCEMENT_ENABLED\", True)\n    @patch(\"ee.onyx.server.settings.api.MULTI_TENANT\", False)\n    @patch(\"ee.onyx.server.settings.api.get_current_tenant_id\")\n    @patch(\"ee.onyx.server.settings.api.get_cached_license_metadata\")\n    def test_redis_error_disables_ee_features(\n        self,\n        mock_get_metadata: MagicMock,\n        mock_get_tenant: MagicMock,\n        base_settings: Settings,\n    ) -> None:\n        \"\"\"Redis errors fail closed - disable EE features.\"\"\"\n        from ee.onyx.server.settings.api import apply_license_status_to_settings\n\n        mock_get_tenant.return_value = \"test_tenant\"\n        mock_get_metadata.side_effect = RedisError(\"Connection failed\")\n\n        result = apply_license_status_to_settings(base_settings)\n        assert _pick(result) == {\n            \"application_status\": ApplicationStatus.ACTIVE,\n            \"ee_features_enabled\": False,\n            \"seat_count\": None,\n            \"used_seats\": None,\n        }\n\n\nclass TestSettingsDefaults:\n    \"\"\"Verify Settings model defaults for CE deployments.\"\"\"\n\n    def test_default_ee_features_disabled(self) -> None:\n        \"\"\"CE default: ee_features_enabled is False.\"\"\"\n        settings = Settings()\n        assert settings.ee_features_enabled is False\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/tenants/test_billing_api.py",
    "content": "\"\"\"Tests for billing API endpoints.\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n\nclass TestGetStripePublishableKey:\n    \"\"\"Tests for get_stripe_publishable_key endpoint.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Reset the cache before each test.\"\"\"\n        import ee.onyx.server.tenants.billing_api as billing_api\n\n        billing_api._stripe_publishable_key_cache = None\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\", None)\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL\",\n        \"https://example.com/key.txt\",\n    )\n    async def test_fetches_from_s3_when_no_override(self) -> None:\n        \"\"\"Should fetch key from S3 when no env var override is set.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        mock_response = MagicMock()\n        mock_response.text = \"pk_live_test123\"\n        mock_response.raise_for_status = MagicMock()\n\n        with patch(\"httpx.AsyncClient\") as mock_client:\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                return_value=mock_response\n            )\n            result = await get_stripe_publishable_key()\n\n        assert result.publishable_key == \"pk_live_test123\"\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\",\n        \"pk_test_override123\",\n    )\n    async def test_uses_env_var_override_when_set(self) -> None:\n        \"\"\"Should use env var override instead of fetching from S3.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        with patch(\"httpx.AsyncClient\") as mock_client:\n            result = await get_stripe_publishable_key()\n            # Should not call S3\n            mock_client.assert_not_called()\n\n        assert result.publishable_key == \"pk_test_override123\"\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\",\n        \"invalid_key\",\n    )\n    async def test_rejects_invalid_env_var_key_format(self) -> None:\n        \"\"\"Should reject keys that don't start with pk_.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_stripe_publishable_key()\n\n        assert exc_info.value.status_code == 500\n        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR\n        assert exc_info.value.detail == \"Invalid Stripe publishable key format\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\", None)\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL\",\n        \"https://example.com/key.txt\",\n    )\n    async def test_rejects_invalid_s3_key_format(self) -> None:\n        \"\"\"Should reject keys from S3 that don't start with pk_.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        mock_response = MagicMock()\n        mock_response.text = \"invalid_key\"\n        mock_response.raise_for_status = MagicMock()\n\n        with patch(\"httpx.AsyncClient\") as mock_client:\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                return_value=mock_response\n            )\n            with pytest.raises(OnyxError) as exc_info:\n                await get_stripe_publishable_key()\n\n        assert exc_info.value.status_code == 500\n        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR\n        assert exc_info.value.detail == \"Invalid Stripe publishable key format\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\", None)\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL\",\n        \"https://example.com/key.txt\",\n    )\n    async def test_handles_s3_fetch_error(self) -> None:\n        \"\"\"Should return error when S3 fetch fails.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        with patch(\"httpx.AsyncClient\") as mock_client:\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                side_effect=httpx.HTTPError(\"Connection failed\")\n            )\n            with pytest.raises(OnyxError) as exc_info:\n                await get_stripe_publishable_key()\n\n        assert exc_info.value.status_code == 500\n        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR\n        assert exc_info.value.detail == \"Failed to fetch Stripe publishable key\"\n\n    @pytest.mark.asyncio\n    @patch(\"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\", None)\n    @patch(\"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL\", None)\n    async def test_error_when_no_config(self) -> None:\n        \"\"\"Should return error when neither env var nor S3 URL is configured.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        with pytest.raises(OnyxError) as exc_info:\n            await get_stripe_publishable_key()\n\n        assert exc_info.value.status_code == 500\n        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR\n        assert \"not configured\" in exc_info.value.detail\n\n    @pytest.mark.asyncio\n    @patch(\n        \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\",\n        \"pk_test_cached\",\n    )\n    async def test_caches_key_after_first_fetch(self) -> None:\n        \"\"\"Should cache the key and return it on subsequent calls.\"\"\"\n        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key\n\n        # First call\n        result1 = await get_stripe_publishable_key()\n        assert result1.publishable_key == \"pk_test_cached\"\n\n        # Second call - should use cache even if we change the override\n        with patch(\n            \"ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE\",\n            \"pk_test_different\",\n        ):\n            result2 = await get_stripe_publishable_key()\n            # Should still return cached value\n            assert result2.publishable_key == \"pk_test_cached\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/tenants/test_product_gating.py",
    "content": "\"\"\"Tests for product gating functions.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\n\nclass TestIsTenantGated:\n    \"\"\"Tests for is_tenant_gated - the O(1) Redis check used by middleware.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"redis_result,expected\",\n        [\n            (True, True),\n            (False, False),\n            (1, True),  # Redis sismember can return int\n            (0, False),\n        ],\n    )\n    @patch(\"ee.onyx.server.tenants.product_gating.get_redis_replica_client\")\n    def test_tenant_gated_status(\n        self,\n        mock_get_redis: MagicMock,\n        redis_result: bool | int,\n        expected: bool,\n    ) -> None:\n        \"\"\"is_tenant_gated correctly interprets Redis sismember result.\"\"\"\n        from ee.onyx.server.tenants.product_gating import is_tenant_gated\n\n        mock_redis = MagicMock()\n        mock_redis.sismember.return_value = redis_result\n        mock_get_redis.return_value = mock_redis\n\n        assert is_tenant_gated(\"test_tenant\") is expected\n\n\nclass TestUpdateTenantGating:\n    \"\"\"Tests for update_tenant_gating - modifies Redis gated set.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"status,should_add_to_set\",\n        [\n            (\"gated_access\", True),  # Only GATED_ACCESS adds to set\n            (\"active\", False),  # All other statuses remove from set\n        ],\n    )\n    @patch(\"ee.onyx.server.tenants.product_gating.get_redis_client\")\n    def test_gating_set_modification(\n        self,\n        mock_get_redis: MagicMock,\n        status: str,\n        should_add_to_set: bool,\n    ) -> None:\n        \"\"\"update_tenant_gating adds tenant to set only for GATED_ACCESS status.\"\"\"\n        from ee.onyx.server.tenants.product_gating import update_tenant_gating\n        from onyx.server.settings.models import ApplicationStatus\n\n        mock_redis = MagicMock()\n        mock_get_redis.return_value = mock_redis\n\n        update_tenant_gating(\"test_tenant\", ApplicationStatus(status))\n\n        if should_add_to_set:\n            mock_redis.sadd.assert_called_once()\n            mock_redis.srem.assert_not_called()\n        else:\n            mock_redis.srem.assert_called_once()\n            mock_redis.sadd.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/tenants/test_proxy.py",
    "content": "\"\"\"Tests for proxy endpoints for self-hosted data planes.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\nfrom fastapi import HTTPException\n\nfrom ee.onyx.server.license.models import LicensePayload\nfrom ee.onyx.server.license.models import PlanType\nfrom ee.onyx.server.tenants.proxy import _check_license_enforcement_enabled\nfrom ee.onyx.server.tenants.proxy import _extract_license_from_header\nfrom ee.onyx.server.tenants.proxy import forward_to_control_plane\nfrom ee.onyx.server.tenants.proxy import get_license_payload\nfrom ee.onyx.server.tenants.proxy import get_license_payload_allow_expired\nfrom ee.onyx.server.tenants.proxy import get_optional_license_payload\nfrom ee.onyx.server.tenants.proxy import verify_license_auth\n\n\n# All tests that use license auth need LICENSE_ENFORCEMENT_ENABLED=True\nLICENSE_ENABLED_PATCH = patch(\n    \"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True\n)\n\n\ndef make_license_payload(\n    tenant_id: str = \"tenant_123\",\n    expired: bool = False,\n) -> LicensePayload:\n    \"\"\"Helper to create a test LicensePayload.\"\"\"\n    now = datetime.now(timezone.utc)\n    if expired:\n        expires_at = now - timedelta(days=1)\n    else:\n        expires_at = now + timedelta(days=30)\n\n    return LicensePayload(\n        version=\"1.0\",\n        tenant_id=tenant_id,\n        organization_name=\"Test Org\",\n        issued_at=now - timedelta(days=1),\n        expires_at=expires_at,\n        seats=10,\n        plan_type=PlanType.MONTHLY,\n    )\n\n\nclass TestLicenseEnforcementCheck:\n    \"\"\"Tests for _check_license_enforcement_enabled function.\"\"\"\n\n    def test_raises_when_disabled(self) -> None:\n        \"\"\"Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False.\"\"\"\n        with patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", False):\n            with pytest.raises(HTTPException) as exc_info:\n                _check_license_enforcement_enabled()\n\n            assert exc_info.value.status_code == 501\n            assert \"cloud data plane\" in str(exc_info.value.detail).lower()\n\n    def test_passes_when_enabled(self) -> None:\n        \"\"\"Test that no exception is raised when LICENSE_ENFORCEMENT_ENABLED=True.\"\"\"\n        with patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", True):\n            _check_license_enforcement_enabled()  # Should not raise\n\n\nclass TestExtractLicenseFromHeader:\n    \"\"\"Tests for _extract_license_from_header helper function.\"\"\"\n\n    def test_valid_bearer_token(self) -> None:\n        \"\"\"Test extraction of valid Bearer token.\"\"\"\n        result = _extract_license_from_header(\"Bearer license_data_here\", required=True)\n        assert result == \"license_data_here\"\n\n    def test_bearer_with_spaces_in_token(self) -> None:\n        \"\"\"Test that token with spaces is handled correctly (splits on first space only).\"\"\"\n        result = _extract_license_from_header(\"Bearer token with spaces\", required=True)\n        assert result == \"token with spaces\"\n\n    def test_missing_header_required(self) -> None:\n        \"\"\"Test that missing header raises 401 when required.\"\"\"\n        with pytest.raises(HTTPException) as exc_info:\n            _extract_license_from_header(None, required=True)\n        assert exc_info.value.status_code == 401\n\n    def test_missing_header_optional(self) -> None:\n        \"\"\"Test that missing header returns None when not required.\"\"\"\n        result = _extract_license_from_header(None, required=False)\n        assert result is None\n\n    def test_non_bearer_required(self) -> None:\n        \"\"\"Test that non-Bearer auth raises 401 when required.\"\"\"\n        with pytest.raises(HTTPException) as exc_info:\n            _extract_license_from_header(\"Basic sometoken\", required=True)\n        assert exc_info.value.status_code == 401\n\n    def test_non_bearer_optional(self) -> None:\n        \"\"\"Test that non-Bearer auth returns None when not required.\"\"\"\n        result = _extract_license_from_header(\"Basic sometoken\", required=False)\n        assert result is None\n\n    def test_empty_string_required(self) -> None:\n        \"\"\"Test that empty string raises 401 when required.\"\"\"\n        with pytest.raises(HTTPException) as exc_info:\n            _extract_license_from_header(\"\", required=True)\n        assert exc_info.value.status_code == 401\n\n\nclass TestVerifyLicenseAuth:\n    \"\"\"Tests for verify_license_auth function.\"\"\"\n\n    def test_valid_license(self) -> None:\n        \"\"\"Test that a valid license passes verification.\"\"\"\n        payload = make_license_payload()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n        ):\n            mock_verify.return_value = payload\n\n            result = verify_license_auth(\"valid_license_data\", allow_expired=False)\n\n            assert result == payload\n            mock_verify.assert_called_once_with(\"valid_license_data\")\n\n    def test_invalid_signature(self) -> None:\n        \"\"\"Test that invalid signature raises 401.\"\"\"\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n        ):\n            mock_verify.side_effect = ValueError(\"Invalid signature\")\n\n            with pytest.raises(HTTPException) as exc_info:\n                verify_license_auth(\"bad_license\", allow_expired=False)\n\n            assert exc_info.value.status_code == 401\n            assert \"Invalid license\" in str(exc_info.value.detail)\n\n    def test_expired_license_rejected(self) -> None:\n        \"\"\"Test that expired license raises 401 when not allowed.\"\"\"\n        payload = make_license_payload(expired=True)\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n            patch(\"ee.onyx.server.tenants.proxy.is_license_valid\") as mock_valid,\n        ):\n            mock_verify.return_value = payload\n            mock_valid.return_value = False\n\n            with pytest.raises(HTTPException) as exc_info:\n                verify_license_auth(\"expired_license\", allow_expired=False)\n\n            assert exc_info.value.status_code == 401\n            assert \"expired\" in str(exc_info.value.detail).lower()\n\n    def test_expired_license_allowed(self) -> None:\n        \"\"\"Test that expired license is allowed when allow_expired=True.\"\"\"\n        payload = make_license_payload(expired=True)\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n            patch(\"ee.onyx.server.tenants.proxy.is_license_valid\") as mock_valid,\n        ):\n            mock_verify.return_value = payload\n            mock_valid.return_value = False\n\n            result = verify_license_auth(\"expired_license\", allow_expired=True)\n\n            assert result == payload\n\n    def test_raises_501_when_enforcement_disabled(self) -> None:\n        \"\"\"Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False.\"\"\"\n        with patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", False):\n            with pytest.raises(HTTPException) as exc_info:\n                verify_license_auth(\"any_license\", allow_expired=False)\n\n            assert exc_info.value.status_code == 501\n\n\nclass TestGetLicensePayload:\n    \"\"\"Tests for get_license_payload dependency.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_valid_license(self) -> None:\n        \"\"\"Test that valid license returns payload.\"\"\"\n        payload = make_license_payload()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n            patch(\"ee.onyx.server.tenants.proxy.is_license_valid\") as mock_valid,\n        ):\n            mock_verify.return_value = payload\n            mock_valid.return_value = True\n\n            result = await get_license_payload(\"Bearer valid_license_data\")\n\n            assert result == payload\n\n    @pytest.mark.asyncio\n    async def test_missing_auth_header(self) -> None:\n        \"\"\"Test that missing Authorization header raises 401.\"\"\"\n        with LICENSE_ENABLED_PATCH:\n            with pytest.raises(HTTPException) as exc_info:\n                await get_license_payload(None)\n\n            assert exc_info.value.status_code == 401\n            assert \"Missing or invalid authorization header\" in str(\n                exc_info.value.detail\n            )\n\n    @pytest.mark.asyncio\n    async def test_invalid_auth_format(self) -> None:\n        \"\"\"Test that non-Bearer auth raises 401.\"\"\"\n        with LICENSE_ENABLED_PATCH:\n            with pytest.raises(HTTPException) as exc_info:\n                await get_license_payload(\"Basic sometoken\")\n\n            assert exc_info.value.status_code == 401\n\n\nclass TestGetLicensePayloadAllowExpired:\n    \"\"\"Tests for get_license_payload_allow_expired dependency.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_expired_license_allowed(self) -> None:\n        \"\"\"Test that expired license is accepted.\"\"\"\n        payload = make_license_payload(expired=True)\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n        ):\n            mock_verify.return_value = payload\n\n            result = await get_license_payload_allow_expired(\"Bearer expired_license\")\n\n            assert result == payload\n\n    @pytest.mark.asyncio\n    async def test_missing_auth_header(self) -> None:\n        \"\"\"Test that missing Authorization header raises 401.\"\"\"\n        with LICENSE_ENABLED_PATCH:\n            with pytest.raises(HTTPException) as exc_info:\n                await get_license_payload_allow_expired(None)\n\n            assert exc_info.value.status_code == 401\n\n\nclass TestGetOptionalLicensePayload:\n    \"\"\"Tests for get_optional_license_payload dependency.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_no_auth_returns_none(self) -> None:\n        \"\"\"Test that missing auth returns None (for new customers).\"\"\"\n        with LICENSE_ENABLED_PATCH:\n            result = await get_optional_license_payload(None)\n            assert result is None\n\n    @pytest.mark.asyncio\n    async def test_non_bearer_returns_none(self) -> None:\n        \"\"\"Test that non-Bearer auth returns None.\"\"\"\n        with LICENSE_ENABLED_PATCH:\n            result = await get_optional_license_payload(\"Basic sometoken\")\n            assert result is None\n\n    @pytest.mark.asyncio\n    async def test_valid_license_returns_payload(self) -> None:\n        \"\"\"Test that valid license returns payload.\"\"\"\n        payload = make_license_payload()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.verify_license_signature\"\n            ) as mock_verify,\n        ):\n            mock_verify.return_value = payload\n\n            result = await get_optional_license_payload(\"Bearer valid_license\")\n\n            assert result == payload\n\n    @pytest.mark.asyncio\n    async def test_raises_501_when_enforcement_disabled(self) -> None:\n        \"\"\"Test that 501 is raised when LICENSE_ENFORCEMENT_ENABLED=False.\"\"\"\n        with patch(\"ee.onyx.server.tenants.proxy.LICENSE_ENFORCEMENT_ENABLED\", False):\n            with pytest.raises(HTTPException) as exc_info:\n                await get_optional_license_payload(None)\n\n            assert exc_info.value.status_code == 501\n\n\nclass TestForwardToControlPlane:\n    \"\"\"Tests for forward_to_control_plane function.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_successful_get_request(self) -> None:\n        \"\"\"Test successful GET request forwarding.\"\"\"\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"data\": \"test\"}\n        mock_response.raise_for_status = MagicMock()\n\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                return_value=mock_response\n            )\n\n            result = await forward_to_control_plane(\n                \"GET\", \"/test-endpoint\", params={\"key\": \"value\"}\n            )\n\n            assert result == {\"data\": \"test\"}\n\n    @pytest.mark.asyncio\n    async def test_successful_post_request(self) -> None:\n        \"\"\"Test successful POST request forwarding.\"\"\"\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"url\": \"https://checkout.stripe.com\"}\n        mock_response.raise_for_status = MagicMock()\n\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_client.return_value.__aenter__.return_value.post = AsyncMock(\n                return_value=mock_response\n            )\n\n            result = await forward_to_control_plane(\n                \"POST\", \"/create-checkout-session\", body={\"tenant_id\": \"t1\"}\n            )\n\n            assert result == {\"url\": \"https://checkout.stripe.com\"}\n\n    @pytest.mark.asyncio\n    async def test_http_error_with_detail(self) -> None:\n        \"\"\"Test HTTP error handling with detail from response.\"\"\"\n        mock_response = MagicMock()\n        mock_response.status_code = 404\n        mock_response.json.return_value = {\"detail\": \"Tenant not found\"}\n        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(\n            \"Not Found\",\n            request=MagicMock(),\n            response=mock_response,\n        )\n\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                return_value=mock_response\n            )\n\n            with pytest.raises(HTTPException) as exc_info:\n                await forward_to_control_plane(\"GET\", \"/billing-information\")\n\n            assert exc_info.value.status_code == 404\n            assert \"Tenant not found\" in str(exc_info.value.detail)\n\n    @pytest.mark.asyncio\n    async def test_connection_error(self) -> None:\n        \"\"\"Test connection error handling.\"\"\"\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                side_effect=httpx.RequestError(\"Connection refused\")\n            )\n\n            with pytest.raises(HTTPException) as exc_info:\n                await forward_to_control_plane(\"GET\", \"/test\")\n\n            assert exc_info.value.status_code == 502\n            assert \"Failed to connect to control plane\" in str(exc_info.value.detail)\n\n    @pytest.mark.asyncio\n    async def test_follows_redirects(self) -> None:\n        \"\"\"Test that AsyncClient is created with follow_redirects=True.\n\n        The control plane may sit behind a reverse proxy that returns\n        308 (HTTP→HTTPS). httpx does not follow redirects by default,\n        so we must explicitly opt in.\n        \"\"\"\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"ok\": True}\n        mock_response.raise_for_status = MagicMock()\n\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"http://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_client.return_value.__aenter__.return_value.get = AsyncMock(\n                return_value=mock_response\n            )\n\n            await forward_to_control_plane(\"GET\", \"/test\")\n\n            mock_client.assert_called_once_with(timeout=30.0, follow_redirects=True)\n\n    @pytest.mark.asyncio\n    async def test_unsupported_method(self) -> None:\n        \"\"\"Test that unsupported HTTP methods raise ValueError.\"\"\"\n        with (\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\"),\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n\n            with pytest.raises(ValueError, match=\"Unsupported HTTP method\"):\n                await forward_to_control_plane(\"DELETE\", \"/test\")\n\n\nclass TestProxyCheckoutSessionWithSeats:\n    \"\"\"Tests for proxy checkout session with seats parameter.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_includes_seats_in_body_when_provided(self) -> None:\n        \"\"\"Should include seats in request body when provided.\"\"\"\n        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session\n        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest\n\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"url\": \"https://checkout.stripe.com/session\"}\n        mock_response.raise_for_status = MagicMock()\n\n        license_payload = make_license_payload()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_post = AsyncMock(return_value=mock_response)\n            mock_client.return_value.__aenter__.return_value.post = mock_post\n\n            request = CreateCheckoutSessionRequest(\n                billing_period=\"monthly\",\n                seats=25,\n                email=\"test@example.com\",\n            )\n            await proxy_create_checkout_session(\n                request_body=request,\n                license_payload=license_payload,\n            )\n\n            # Verify seats was included in the body\n            call_kwargs = mock_post.call_args[1]\n            body = call_kwargs[\"json\"]\n            assert body[\"seats\"] == 25\n            assert body[\"billing_period\"] == \"monthly\"\n            assert body[\"email\"] == \"test@example.com\"\n            assert body[\"tenant_id\"] == \"tenant_123\"\n\n    @pytest.mark.asyncio\n    async def test_excludes_seats_when_not_provided(self) -> None:\n        \"\"\"Should not include seats in request body when not provided.\"\"\"\n        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session\n        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest\n\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"url\": \"https://checkout.stripe.com/session\"}\n        mock_response.raise_for_status = MagicMock()\n\n        license_payload = make_license_payload()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_post = AsyncMock(return_value=mock_response)\n            mock_client.return_value.__aenter__.return_value.post = mock_post\n\n            request = CreateCheckoutSessionRequest(billing_period=\"annual\")\n            await proxy_create_checkout_session(\n                request_body=request,\n                license_payload=license_payload,\n            )\n\n            # Verify seats was NOT included in the body\n            call_kwargs = mock_post.call_args[1]\n            body = call_kwargs[\"json\"]\n            assert \"seats\" not in body\n            assert body[\"billing_period\"] == \"annual\"\n\n    @pytest.mark.asyncio\n    async def test_includes_seats_for_new_customer(self) -> None:\n        \"\"\"Should include seats for new customer without license.\"\"\"\n        from ee.onyx.server.tenants.proxy import proxy_create_checkout_session\n        from ee.onyx.server.tenants.proxy import CreateCheckoutSessionRequest\n\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"url\": \"https://checkout.stripe.com/session\"}\n        mock_response.raise_for_status = MagicMock()\n\n        with (\n            LICENSE_ENABLED_PATCH,\n            patch(\n                \"ee.onyx.server.tenants.proxy.generate_data_plane_token\"\n            ) as mock_token,\n            patch(\"ee.onyx.server.tenants.proxy.httpx.AsyncClient\") as mock_client,\n            patch(\n                \"ee.onyx.server.tenants.proxy.CONTROL_PLANE_API_BASE_URL\",\n                \"https://control.example.com\",\n            ),\n        ):\n            mock_token.return_value = \"cp_token\"\n            mock_post = AsyncMock(return_value=mock_response)\n            mock_client.return_value.__aenter__.return_value.post = mock_post\n\n            request = CreateCheckoutSessionRequest(\n                billing_period=\"monthly\",\n                seats=10,\n            )\n            # New customer has no license\n            await proxy_create_checkout_session(\n                request_body=request,\n                license_payload=None,\n            )\n\n            # Verify seats was included but no tenant_id\n            call_kwargs = mock_post.call_args[1]\n            body = call_kwargs[\"json\"]\n            assert body[\"seats\"] == 10\n            assert \"tenant_id\" not in body\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/server/tenants/test_schema_management.py",
    "content": "\"\"\"Tests for schema management functions.\"\"\"\n\nimport pytest\n\nfrom ee.onyx.server.tenants.schema_management import drop_schema\nfrom ee.onyx.server.tenants.schema_management import validate_tenant_id\n\n\nclass TestValidateTenantId:\n    \"\"\"Tests for validate_tenant_id - validates tenant ID format for SQL safety.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"tenant_id\",\n        [\n            # Standard UUID format\n            \"tenant_0aef62e7-9fbf-4bb6-8894-f1441fca6745\",\n            \"tenant_abcd1234-5678-90ab-cdef-1234567890ab\",\n            \"tenant_00000000-0000-0000-0000-000000000000\",\n            \"tenant_ffffffff-ffff-ffff-ffff-ffffffffffff\",\n            # AWS instance ID format\n            \"tenant_i-0d8d7eaa21f5f2fae\",\n            \"tenant_i-0123456789abcdef0\",\n            \"tenant_i-abc\",\n        ],\n    )\n    def test_valid_tenant_ids(self, tenant_id: str) -> None:\n        \"\"\"Valid tenant IDs should pass validation.\"\"\"\n        assert validate_tenant_id(tenant_id) is True\n\n    @pytest.mark.parametrize(\n        \"tenant_id,description\",\n        [\n            # Missing tenant_ prefix\n            (\"0aef62e7-9fbf-4bb6-8894-f1441fca6745\", \"missing prefix\"),\n            (\"public\", \"reserved schema name\"),\n            (\"pg_catalog\", \"system schema\"),\n            # Invalid formats\n            (\"tenant_abc123\", \"not UUID or instance ID format\"),\n            (\"tenant_\", \"empty after prefix\"),\n            (\"tenant_i-\", \"empty instance ID\"),\n            # SQL injection attempts\n            (\"tenant_; DROP TABLE users;--\", \"SQL injection with semicolon\"),\n            ('tenant_\" OR 1=1--', \"SQL injection with quote\"),\n            (\"tenant_abc'; DROP SCHEMA public;--\", \"SQL injection attempt\"),\n            # Other invalid inputs\n            (\"tenant_ABCD1234-5678-90AB-CDEF-1234567890AB\", \"uppercase not allowed\"),\n            (\"../../../etc/passwd\", \"path traversal\"),\n            (\"\", \"empty string\"),\n            (\"tenant_i-GHIJ\", \"invalid hex in instance ID\"),\n        ],\n    )\n    def test_invalid_tenant_ids(self, tenant_id: str, description: str) -> None:\n        \"\"\"Invalid tenant IDs should fail validation.\"\"\"\n        assert validate_tenant_id(tenant_id) is False, f\"Should reject: {description}\"\n\n    def test_uuid_must_be_complete(self) -> None:\n        \"\"\"UUID must have all sections with correct lengths.\"\"\"\n        # Too short\n        assert validate_tenant_id(\"tenant_0aef62e7-9fbf-4bb6-8894\") is False\n        # Too long\n        assert (\n            validate_tenant_id(\"tenant_0aef62e7-9fbf-4bb6-8894-f1441fca6745-extra\")\n            is False\n        )\n        # Wrong section lengths\n        assert validate_tenant_id(\"tenant_0aef62e7-9fbf-4bb6-8894-f1441fca674\") is False\n\n\nclass TestDropSchemaValidation:\n    \"\"\"Tests for drop_schema input validation (no DB required - fails before SQL).\"\"\"\n\n    @pytest.mark.parametrize(\n        \"dangerous_input,description\",\n        [\n            (\"public\", \"system schema\"),\n            (\"pg_catalog\", \"postgres catalog\"),\n            (\"tenant_; DROP TABLE users;--\", \"SQL injection with semicolon\"),\n            ('tenant_\" OR 1=1--', \"SQL injection with quote\"),\n            (\"tenant_abc123\", \"invalid format - not UUID\"),\n            (\"\", \"empty string\"),\n        ],\n    )\n    def test_drop_schema_rejects_invalid_inputs(\n        self, dangerous_input: str, description: str\n    ) -> None:\n        \"\"\"drop_schema should reject invalid tenant IDs before any SQL runs.\"\"\"\n        with pytest.raises(ValueError, match=\"Invalid tenant_id format\") as exc_info:\n            drop_schema(dangerous_input)\n        assert dangerous_input in str(\n            exc_info.value\n        ), f\"Error should include input ({description})\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/utils/test_encryption.py",
    "content": "\"\"\"Tests for EE AES-CBC encryption/decryption with explicit key support.\n\nWith EE mode enabled (via conftest), fetch_versioned_implementation resolves\nto the EE implementations, so no patching of the MIT layer is needed.\n\"\"\"\n\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.utils.encryption import _decrypt_bytes\nfrom ee.onyx.utils.encryption import _encrypt_string\nfrom ee.onyx.utils.encryption import _get_trimmed_key\nfrom ee.onyx.utils.encryption import decrypt_bytes_to_string\nfrom ee.onyx.utils.encryption import encrypt_string_to_bytes\n\nEE_MODULE = \"ee.onyx.utils.encryption\"\n\n# Keys must be exactly 16, 24, or 32 bytes for AES\nKEY_16 = \"a\" * 16\nKEY_16_ALT = \"b\" * 16\nKEY_24 = \"d\" * 24\nKEY_32 = \"c\" * 32\n\n\n@pytest.fixture(autouse=True)\ndef _clear_key_cache() -> None:\n    _get_trimmed_key.cache_clear()\n\n\nclass TestEncryptDecryptRoundTrip:\n    def test_roundtrip_with_env_key(self) -> None:\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", KEY_16):\n            encrypted = _encrypt_string(\"hello world\")\n            assert encrypted != b\"hello world\"\n            assert _decrypt_bytes(encrypted) == \"hello world\"\n\n    def test_roundtrip_with_explicit_key(self) -> None:\n        encrypted = _encrypt_string(\"secret data\", key=KEY_32)\n        assert encrypted != b\"secret data\"\n        assert _decrypt_bytes(encrypted, key=KEY_32) == \"secret data\"\n\n    def test_roundtrip_no_key(self) -> None:\n        \"\"\"Without any key, data is raw-encoded (no encryption).\"\"\"\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", \"\"):\n            encrypted = _encrypt_string(\"plain text\")\n            assert encrypted == b\"plain text\"\n            assert _decrypt_bytes(encrypted) == \"plain text\"\n\n    def test_explicit_key_overrides_env(self) -> None:\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", KEY_16):\n            encrypted = _encrypt_string(\"data\", key=KEY_16_ALT)\n            with pytest.raises(ValueError):\n                _decrypt_bytes(encrypted, key=KEY_16)\n            assert _decrypt_bytes(encrypted, key=KEY_16_ALT) == \"data\"\n\n    def test_different_encryptions_produce_different_bytes(self) -> None:\n        \"\"\"Each encryption uses a random IV, so results differ.\"\"\"\n        a = _encrypt_string(\"same\", key=KEY_16)\n        b = _encrypt_string(\"same\", key=KEY_16)\n        assert a != b\n\n    def test_roundtrip_empty_string(self) -> None:\n        encrypted = _encrypt_string(\"\", key=KEY_16)\n        assert encrypted != b\"\"\n        assert _decrypt_bytes(encrypted, key=KEY_16) == \"\"\n\n    def test_roundtrip_unicode(self) -> None:\n        text = \"日本語テスト 🔐 émojis\"\n        encrypted = _encrypt_string(text, key=KEY_16)\n        assert _decrypt_bytes(encrypted, key=KEY_16) == text\n\n\nclass TestDecryptFallbackBehavior:\n    def test_wrong_env_key_falls_back_to_raw_decode(self) -> None:\n        \"\"\"Default key path: AES fails on non-AES data → fallback to raw decode.\"\"\"\n        raw = \"readable text\".encode()\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", KEY_16):\n            assert _decrypt_bytes(raw) == \"readable text\"\n\n    def test_explicit_wrong_key_raises(self) -> None:\n        \"\"\"Explicit key path: AES fails → raises, no fallback.\"\"\"\n        encrypted = _encrypt_string(\"secret\", key=KEY_16)\n        with pytest.raises(ValueError):\n            _decrypt_bytes(encrypted, key=KEY_16_ALT)\n\n    def test_explicit_none_key_with_no_env(self) -> None:\n        \"\"\"key=None with empty env → raw decode.\"\"\"\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", \"\"):\n            assert _decrypt_bytes(b\"hello\", key=None) == \"hello\"\n\n    def test_explicit_empty_string_key(self) -> None:\n        \"\"\"key='' means no encryption.\"\"\"\n        encrypted = _encrypt_string(\"test\", key=\"\")\n        assert encrypted == b\"test\"\n        assert _decrypt_bytes(encrypted, key=\"\") == \"test\"\n\n\nclass TestKeyValidation:\n    def test_key_too_short_raises(self) -> None:\n        with pytest.raises(RuntimeError, match=\"too short\"):\n            _encrypt_string(\"data\", key=\"short\")\n\n    def test_16_byte_key(self) -> None:\n        encrypted = _encrypt_string(\"data\", key=KEY_16)\n        assert _decrypt_bytes(encrypted, key=KEY_16) == \"data\"\n\n    def test_24_byte_key(self) -> None:\n        encrypted = _encrypt_string(\"data\", key=KEY_24)\n        assert _decrypt_bytes(encrypted, key=KEY_24) == \"data\"\n\n    def test_32_byte_key(self) -> None:\n        encrypted = _encrypt_string(\"data\", key=KEY_32)\n        assert _decrypt_bytes(encrypted, key=KEY_32) == \"data\"\n\n    def test_long_key_truncated_to_32(self) -> None:\n        \"\"\"Keys longer than 32 bytes are truncated to 32.\"\"\"\n        long_key = \"e\" * 64\n        encrypted = _encrypt_string(\"data\", key=long_key)\n        assert _decrypt_bytes(encrypted, key=long_key) == \"data\"\n\n    def test_20_byte_key_trimmed_to_16(self) -> None:\n        \"\"\"A 20-byte key is trimmed to the largest valid AES size that fits (16).\"\"\"\n        key_20 = \"f\" * 20\n        encrypted = _encrypt_string(\"data\", key=key_20)\n        assert _decrypt_bytes(encrypted, key=key_20) == \"data\"\n\n        # Verify it was trimmed to 16 by checking that the first 16 bytes\n        # of the key can also decrypt it\n        key_16_same_prefix = \"f\" * 16\n        assert _decrypt_bytes(encrypted, key=key_16_same_prefix) == \"data\"\n\n    def test_25_byte_key_trimmed_to_24(self) -> None:\n        \"\"\"A 25-byte key is trimmed to the largest valid AES size that fits (24).\"\"\"\n        key_25 = \"g\" * 25\n        encrypted = _encrypt_string(\"data\", key=key_25)\n        assert _decrypt_bytes(encrypted, key=key_25) == \"data\"\n\n        key_24_same_prefix = \"g\" * 24\n        assert _decrypt_bytes(encrypted, key=key_24_same_prefix) == \"data\"\n\n    def test_30_byte_key_trimmed_to_24(self) -> None:\n        \"\"\"A 30-byte key is trimmed to the largest valid AES size that fits (24).\"\"\"\n        key_30 = \"h\" * 30\n        encrypted = _encrypt_string(\"data\", key=key_30)\n        assert _decrypt_bytes(encrypted, key=key_30) == \"data\"\n\n        key_24_same_prefix = \"h\" * 24\n        assert _decrypt_bytes(encrypted, key=key_24_same_prefix) == \"data\"\n\n\nclass TestWrapperFunctions:\n    \"\"\"Test encrypt_string_to_bytes / decrypt_bytes_to_string pass key through.\n\n    With EE mode enabled, the wrappers resolve to EE implementations automatically.\n    \"\"\"\n\n    def test_wrapper_passes_key(self) -> None:\n        encrypted = encrypt_string_to_bytes(\"payload\", key=KEY_16)\n        assert decrypt_bytes_to_string(encrypted, key=KEY_16) == \"payload\"\n\n    def test_wrapper_no_key_uses_env(self) -> None:\n        with patch(f\"{EE_MODULE}.ENCRYPTION_KEY_SECRET\", KEY_32):\n            encrypted = encrypt_string_to_bytes(\"payload\")\n            assert decrypt_bytes_to_string(encrypted) == \"payload\"\n"
  },
  {
    "path": "backend/tests/unit/ee/onyx/utils/test_license_utils.py",
    "content": "\"\"\"Tests for license signature verification utilities.\"\"\"\n\nimport base64\nimport json\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import patch\n\nimport pytest\nfrom cryptography.hazmat.primitives import hashes\nfrom cryptography.hazmat.primitives.asymmetric import padding\nfrom cryptography.hazmat.primitives.asymmetric import rsa\n\nfrom ee.onyx.server.license.models import LicensePayload\nfrom ee.onyx.server.license.models import PlanType\nfrom ee.onyx.utils.license import get_license_status\nfrom ee.onyx.utils.license import is_license_valid\nfrom ee.onyx.utils.license import verify_license_signature\nfrom onyx.server.settings.models import ApplicationStatus\n\n\ndef generate_test_key_pair() -> tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]:\n    \"\"\"Generate a test RSA key pair.\"\"\"\n    private_key = rsa.generate_private_key(\n        public_exponent=65537,\n        key_size=2048,  # Use smaller key for faster tests\n    )\n    public_key = private_key.public_key()\n    return private_key, public_key\n\n\ndef create_signed_license(\n    private_key: rsa.RSAPrivateKey,\n    payload: LicensePayload,\n) -> str:\n    \"\"\"Create a signed license for testing.\"\"\"\n    payload_json = json.dumps(payload.model_dump(mode=\"json\"), sort_keys=True)\n    signature = private_key.sign(\n        payload_json.encode(),\n        padding.PSS(\n            mgf=padding.MGF1(hashes.SHA256()),\n            salt_length=padding.PSS.MAX_LENGTH,\n        ),\n        hashes.SHA256(),\n    )\n\n    license_data = {\n        \"payload\": payload.model_dump(mode=\"json\"),\n        \"signature\": base64.b64encode(signature).decode(),\n    }\n\n    return base64.b64encode(json.dumps(license_data).encode()).decode()\n\n\nclass TestVerifyLicenseSignature:\n    \"\"\"Tests for verify_license_signature function.\"\"\"\n\n    def test_valid_signature(self) -> None:\n        \"\"\"Test that a valid signature passes verification.\"\"\"\n        private_key, public_key = generate_test_key_pair()\n\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),\n            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        license_data = create_signed_license(private_key, payload)\n\n        # Patch the _get_public_key function to return our test key\n        with patch(\"ee.onyx.utils.license._get_public_key\", return_value=public_key):\n            result = verify_license_signature(license_data)\n\n        assert result.tenant_id == \"tenant_123\"\n        assert result.seats == 50\n        assert result.plan_type == PlanType.MONTHLY\n\n    def test_invalid_signature(self) -> None:\n        \"\"\"Test that an invalid signature fails verification.\"\"\"\n        private_key, public_key = generate_test_key_pair()\n        _, different_public_key = generate_test_key_pair()\n\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),\n            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        license_data = create_signed_license(private_key, payload)\n\n        # Patch _get_public_key to return a different key (signature won't match)\n        with patch(\n            \"ee.onyx.utils.license._get_public_key\",\n            return_value=different_public_key,\n        ):\n            with pytest.raises(ValueError, match=\"Invalid license signature\"):\n                verify_license_signature(license_data)\n\n    def test_tampered_payload(self) -> None:\n        \"\"\"Test that a tampered payload fails verification.\"\"\"\n        private_key, public_key = generate_test_key_pair()\n\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime(2025, 1, 1, tzinfo=timezone.utc),\n            expires_at=datetime(2025, 12, 31, tzinfo=timezone.utc),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        # Create valid signature\n        payload_json = json.dumps(payload.model_dump(mode=\"json\"), sort_keys=True)\n        signature = private_key.sign(\n            payload_json.encode(),\n            padding.PSS(\n                mgf=padding.MGF1(hashes.SHA256()),\n                salt_length=padding.PSS.MAX_LENGTH,\n            ),\n            hashes.SHA256(),\n        )\n\n        # Tamper with the payload (change seats)\n        tampered_payload = payload.model_dump(mode=\"json\")\n        tampered_payload[\"seats\"] = 1000  # Changed!\n\n        license_data = {\n            \"payload\": tampered_payload,\n            \"signature\": base64.b64encode(signature).decode(),\n        }\n\n        encoded_license = base64.b64encode(json.dumps(license_data).encode()).decode()\n\n        # Patch _get_public_key to return our test key\n        with patch(\"ee.onyx.utils.license._get_public_key\", return_value=public_key):\n            with pytest.raises(ValueError, match=\"Invalid license signature\"):\n                verify_license_signature(encoded_license)\n\n    def test_invalid_base64(self) -> None:\n        \"\"\"Test that invalid base64 fails.\"\"\"\n        with pytest.raises(ValueError):\n            verify_license_signature(\"not-valid-base64!!!\")\n\n    def test_invalid_json(self) -> None:\n        \"\"\"Test that invalid JSON fails.\"\"\"\n        invalid_data = base64.b64encode(b\"not json\").decode()\n        with pytest.raises(ValueError):\n            verify_license_signature(invalid_data)\n\n\nclass TestGetLicenseStatus:\n    \"\"\"Tests for get_license_status function.\"\"\"\n\n    def test_active_license(self) -> None:\n        \"\"\"Test status for an active license.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=30),\n            expires_at=datetime.now(timezone.utc) + timedelta(days=30),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        status = get_license_status(payload)\n        assert status == ApplicationStatus.ACTIVE\n\n    def test_expired_license_no_grace(self) -> None:\n        \"\"\"Test status for an expired license without grace period.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=60),\n            expires_at=datetime.now(timezone.utc) - timedelta(days=1),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        status = get_license_status(payload)\n        assert status == ApplicationStatus.GATED_ACCESS\n\n    def test_expired_license_within_grace(self) -> None:\n        \"\"\"Test status for an expired license within grace period.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=60),\n            expires_at=datetime.now(timezone.utc) - timedelta(days=1),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        grace_end = datetime.now(timezone.utc) + timedelta(days=29)\n        status = get_license_status(payload, grace_period_end=grace_end)\n        assert status == ApplicationStatus.GRACE_PERIOD\n\n    def test_grace_period_expired(self) -> None:\n        \"\"\"Test status when grace period has expired.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=90),\n            expires_at=datetime.now(timezone.utc) - timedelta(days=31),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        grace_end = datetime.now(timezone.utc) - timedelta(days=1)\n        status = get_license_status(payload, grace_period_end=grace_end)\n        assert status == ApplicationStatus.GATED_ACCESS\n\n\nclass TestIsLicenseValid:\n    \"\"\"Tests for is_license_valid function.\"\"\"\n\n    def test_valid_license(self) -> None:\n        \"\"\"Test that an unexpired license is valid.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=30),\n            expires_at=datetime.now(timezone.utc) + timedelta(days=30),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        assert is_license_valid(payload) is True\n\n    def test_expired_license(self) -> None:\n        \"\"\"Test that an expired license is invalid.\"\"\"\n        payload = LicensePayload(\n            version=\"1.0\",\n            tenant_id=\"tenant_123\",\n            issued_at=datetime.now(timezone.utc) - timedelta(days=60),\n            expires_at=datetime.now(timezone.utc) - timedelta(days=1),\n            seats=50,\n            plan_type=PlanType.MONTHLY,\n        )\n\n        assert is_license_valid(payload) is False\n"
  },
  {
    "path": "backend/tests/unit/federated_connector/slack/test_slack_federated_connnector.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom typing import Any\nfrom unittest.mock import patch\n\nimport pytest\nfrom pydantic import ValidationError\n\nfrom onyx.federated_connectors.models import OAuthResult\nfrom onyx.federated_connectors.slack.federated_connector import SlackFederatedConnector\nfrom onyx.federated_connectors.slack.models import SlackEntities\n\n# Constants for mock Slack OAuth response\nMOCK_APP_ID = \"A093M5L7Q92\"\nMOCK_USER_ID = \"U05SAH6UGUD\"\nMOCK_SCOPE = \"search:read\"\nMOCK_ACCESS_TOKEN = (\n    \"xoxe.xoxp-1-Mi0yLTU5MTAx...MDkwN2U0YjlmZmI4YzA1NTYwZjNlMjRiZDYwNGU0ZA\"\n)\nMOCK_REFRESH_TOKEN = (\n    \"xoxe-1-My0xLTU5MTAxMz...jcyZjA3NDM3YjdhOTRhYmRhMGJmMGVlMzBjNzQ4Y2I\"\n)\nMOCK_TOKEN_TYPE = \"user\"\nMOCK_EXPIRES_IN = 31659\nMOCK_TEAM_ID = \"T05SS40AFAM\"\nMOCK_TEAM_NAME = \"Onyx Team\"\n\n\nclass TestSlackFederatedConnector:\n    \"\"\"Test suite for SlackFederatedConnector\"\"\"\n\n    @pytest.fixture\n    def test_credentials(self) -> dict[str, str]:\n        \"\"\"Test credentials for Slack connector\"\"\"\n        return {\n            \"client_id\": \"test_client_id\",\n            \"client_secret\": \"test_client_secret\",\n            \"redirect_uri\": \"https://test.com/callback\",\n        }\n\n    @pytest.fixture\n    def slack_connector(\n        self, test_credentials: dict[str, str]\n    ) -> SlackFederatedConnector:\n        \"\"\"Create a SlackFederatedConnector instance for testing\"\"\"\n        return SlackFederatedConnector(test_credentials)\n\n    @pytest.fixture\n    def mock_slack_oauth_response(self) -> dict[str, Any]:\n        \"\"\"Mock Slack OAuth response based on real example\"\"\"\n        return {\n            \"ok\": True,\n            \"app_id\": MOCK_APP_ID,\n            \"authed_user\": {\n                \"id\": MOCK_USER_ID,\n                \"scope\": MOCK_SCOPE,\n                \"access_token\": MOCK_ACCESS_TOKEN,\n                \"token_type\": MOCK_TOKEN_TYPE,\n                \"refresh_token\": MOCK_REFRESH_TOKEN,\n                \"expires_in\": MOCK_EXPIRES_IN,\n            },\n            \"team\": {\"id\": MOCK_TEAM_ID, \"name\": MOCK_TEAM_NAME},\n            \"enterprise\": None,\n            \"is_enterprise_install\": False,\n        }\n\n    def test_callback_success(\n        self,\n        slack_connector: SlackFederatedConnector,\n        mock_slack_oauth_response: dict[str, Any],\n    ) -> None:\n        \"\"\"Test successful OAuth callback handling\"\"\"\n        # Mock the token exchange method\n        with patch.object(\n            slack_connector,\n            \"_exchange_code_for_token\",\n            return_value=mock_slack_oauth_response,\n        ):\n            # Simulate callback data with authorization code\n            callback_data = {\n                \"code\": \"test_auth_code\",\n                \"state\": \"test_state\",\n            }\n            redirect_uri = \"https://test.com/callback\"\n\n            # Call the callback method\n            result = slack_connector.callback(callback_data, redirect_uri)\n\n            # Assert the result is an OAuthResult\n            assert isinstance(result, OAuthResult)\n\n            # Assert OAuth token values are correctly extracted\n            assert result.access_token == MOCK_ACCESS_TOKEN\n            assert result.refresh_token == MOCK_REFRESH_TOKEN\n            assert result.token_type == MOCK_TOKEN_TYPE\n            assert result.scope == MOCK_SCOPE\n\n            # Assert expiration time is calculated correctly\n            assert result.expires_at is not None\n            expected_expires_at = datetime.now(timezone.utc) + timedelta(\n                seconds=MOCK_EXPIRES_IN\n            )\n            # Allow for small time difference due to test execution time\n            time_diff = abs((result.expires_at - expected_expires_at).total_seconds())\n            assert time_diff < 5  # Within 5 seconds\n\n            # Assert team info is extracted correctly\n            assert result.team is not None\n            assert result.team[\"id\"] == MOCK_TEAM_ID\n            assert result.team[\"name\"] == MOCK_TEAM_NAME\n\n            # Assert user info is extracted correctly\n            assert result.user is not None\n            assert result.user[\"id\"] == MOCK_USER_ID\n            assert result.user[\"scope\"] == MOCK_SCOPE\n            assert result.user[\"token_type\"] == MOCK_TOKEN_TYPE\n\n            # Assert raw response is preserved\n            assert result.raw_response == mock_slack_oauth_response\n\n    def test_callback_oauth_error(\n        self, slack_connector: SlackFederatedConnector\n    ) -> None:\n        \"\"\"Test OAuth callback with error response\"\"\"\n        callback_data = {\n            \"error\": \"access_denied\",\n            \"error_description\": \"User denied access\",\n        }\n        redirect_uri = \"https://test.com/callback\"\n\n        with pytest.raises(RuntimeError, match=\"OAuth error received: access_denied\"):\n            slack_connector.callback(callback_data, redirect_uri)\n\n    def test_callback_missing_code(\n        self, slack_connector: SlackFederatedConnector\n    ) -> None:\n        \"\"\"Test OAuth callback without authorization code\"\"\"\n        callback_data = {\"state\": \"test_state\"}\n        redirect_uri = \"https://test.com/callback\"\n\n        with pytest.raises(ValueError, match=\"No authorization code received\"):\n            slack_connector.callback(callback_data, redirect_uri)\n\n    def test_callback_slack_api_error(\n        self, slack_connector: SlackFederatedConnector\n    ) -> None:\n        \"\"\"Test OAuth callback when Slack API returns error\"\"\"\n        # Mock failed token exchange\n        mock_error_response = {\n            \"ok\": False,\n            \"error\": \"invalid_code\",\n        }\n\n        with patch.object(\n            slack_connector,\n            \"_exchange_code_for_token\",\n            return_value=mock_error_response,\n        ):\n            callback_data = {\"code\": \"invalid_code\"}\n            redirect_uri = \"https://test.com/callback\"\n\n            with pytest.raises(\n                RuntimeError, match=\"Failed to exchange authorization code for token\"\n            ):\n                slack_connector.callback(callback_data, redirect_uri)\n\n    def test_callback_without_authed_user(\n        self, slack_connector: SlackFederatedConnector\n    ) -> None:\n        \"\"\"Test OAuth callback when authed_user is missing from response\"\"\"\n        # Mock response without authed_user\n        mock_response = {\n            \"ok\": True,\n            \"app_id\": MOCK_APP_ID,\n            \"team\": {\"id\": MOCK_TEAM_ID, \"name\": MOCK_TEAM_NAME},\n        }\n\n        with patch.object(\n            slack_connector, \"_exchange_code_for_token\", return_value=mock_response\n        ):\n            callback_data = {\"code\": \"test_code\"}\n            redirect_uri = \"https://test.com/callback\"\n\n            with pytest.raises(\n                RuntimeError, match=\"Missing authed_user in OAuth response from Slack\"\n            ):\n                slack_connector.callback(callback_data, redirect_uri)\n\n    def test_callback_with_incomplete_authed_user(\n        self, slack_connector: SlackFederatedConnector\n    ) -> None:\n        \"\"\"Test OAuth callback when authed_user is missing access_token\"\"\"\n        # Mock response with authed_user but missing access_token\n        mock_response = {\n            \"ok\": True,\n            \"app_id\": MOCK_APP_ID,\n            \"authed_user\": {\n                \"id\": MOCK_USER_ID,\n                \"scope\": MOCK_SCOPE,\n                \"token_type\": MOCK_TOKEN_TYPE,\n                # Missing access_token\n            },\n            \"team\": {\"id\": MOCK_TEAM_ID, \"name\": MOCK_TEAM_NAME},\n        }\n\n        with patch.object(\n            slack_connector, \"_exchange_code_for_token\", return_value=mock_response\n        ):\n            callback_data = {\"code\": \"test_code\"}\n            redirect_uri = \"https://test.com/callback\"\n\n            result = slack_connector.callback(callback_data, redirect_uri)\n\n            # Should handle gracefully - access_token can be None in some edge cases\n            assert result.access_token is None\n            assert result.refresh_token is None\n            assert result.token_type == MOCK_TOKEN_TYPE\n            assert result.scope == MOCK_SCOPE\n\n\nclass TestSlackEntitiesValidation:\n    \"\"\"Test suite for SlackEntities validation\"\"\"\n\n    def test_default_values(self) -> None:\n        \"\"\"Test that default values are set correctly\"\"\"\n        entities = SlackEntities()\n\n        assert entities.search_all_channels is True\n        assert entities.channels is None\n        assert entities.exclude_channels is None\n        assert entities.include_dm is True\n        assert entities.include_group_dm is True\n        assert entities.include_private_channels is True\n        assert entities.default_search_days == 30\n\n    def test_search_all_channels_true(self) -> None:\n        \"\"\"Test search_all_channels=True ignores channels list\"\"\"\n        entities = SlackEntities(\n            search_all_channels=True,\n            channels=[\"general\"],  # Should be ignored\n        )\n\n        assert entities.search_all_channels is True\n        # channels list is present but search_all_channels takes precedence\n        assert entities.channels == [\"general\"]\n\n    def test_search_all_channels_false_with_channels(self) -> None:\n        \"\"\"Test search_all_channels=False with valid channels\"\"\"\n        entities = SlackEntities(\n            search_all_channels=False, channels=[\"general\", \"engineering\"]\n        )\n\n        assert entities.search_all_channels is False\n        assert entities.channels == [\"general\", \"engineering\"]\n\n    def test_search_all_channels_false_without_channels(self) -> None:\n        \"\"\"Test search_all_channels=False without channels raises error\"\"\"\n        with pytest.raises(\n            ValidationError,\n            match=\"Must specify at least one channel when search_all_channels is False\",\n        ):\n            SlackEntities(search_all_channels=False, channels=None)\n\n        with pytest.raises(\n            ValidationError,\n            match=\"Must specify at least one channel when search_all_channels is False\",\n        ):\n            SlackEntities(search_all_channels=False, channels=[])\n\n    def test_channels_validation(self) -> None:\n        \"\"\"Test channel list validation\"\"\"\n        # Valid channels\n        entities = SlackEntities(\n            search_all_channels=False, channels=[\"general\", \"C12345\", \"random\"]\n        )\n        assert entities.channels is not None\n        assert len(entities.channels) == 3\n\n        # Empty string in channels\n        with pytest.raises(\n            ValidationError, match=\"Each channel must be a non-empty string\"\n        ):\n            SlackEntities(search_all_channels=False, channels=[\"general\", \"\"])\n\n        # Whitespace-only string\n        with pytest.raises(\n            ValidationError, match=\"Each channel must be a non-empty string\"\n        ):\n            SlackEntities(search_all_channels=False, channels=[\"general\", \"   \"])\n\n    def test_exclude_channels_validation(self) -> None:\n        \"\"\"Test exclude channel patterns validation\"\"\"\n        # Valid patterns\n        entities = SlackEntities(exclude_channels=[\"customer*\", \"test-*\", \"private-*\"])\n        assert entities.exclude_channels is not None\n        assert len(entities.exclude_channels) == 3\n\n        # Empty string in patterns\n        with pytest.raises(\n            ValidationError, match=\"Each exclude pattern must be a non-empty string\"\n        ):\n            SlackEntities(exclude_channels=[\"customer*\", \"\"])\n\n        # Whitespace-only pattern\n        with pytest.raises(\n            ValidationError, match=\"Each exclude pattern must be a non-empty string\"\n        ):\n            SlackEntities(exclude_channels=[\"customer*\", \"   \"])\n\n    def test_exclude_channels_with_specific_channels(self) -> None:\n        \"\"\"Test exclude patterns work with specific channel list\"\"\"\n        entities = SlackEntities(\n            search_all_channels=False,\n            channels=[\"general\", \"customer-X\", \"customer-Y\", \"support\"],\n            exclude_channels=[\"customer*\"],\n        )\n\n        assert entities.search_all_channels is False\n        assert entities.channels is not None\n        assert len(entities.channels) == 4\n        assert entities.exclude_channels == [\"customer*\"]\n\n    def test_direct_message_filtering(self) -> None:\n        \"\"\"Test DM filtering options\"\"\"\n        # Test disabling 1:1 DMs\n        entities_no_dm = SlackEntities(include_dm=False)\n        assert entities_no_dm.include_dm is False\n        assert entities_no_dm.include_group_dm is True  # Default is True\n\n        # Test disabling group DMs\n        entities_no_group_dm = SlackEntities(include_group_dm=False)\n        assert entities_no_group_dm.include_dm is True  # Default is True\n        assert entities_no_group_dm.include_group_dm is False\n\n        # Test both enabled (defaults)\n        entities_both = SlackEntities(include_dm=True, include_group_dm=True)\n        assert entities_both.include_dm is True\n        assert entities_both.include_group_dm is True\n\n    def test_private_channel_filtering(self) -> None:\n        \"\"\"Test private channel filtering option\"\"\"\n        entities = SlackEntities(include_private_channels=True)\n\n        assert entities.include_private_channels is True\n\n    def test_default_search_days_validation(self) -> None:\n        \"\"\"Test default_search_days validation\"\"\"\n        # Valid values\n        entities = SlackEntities(default_search_days=7)\n        assert entities.default_search_days == 7\n\n        entities = SlackEntities(default_search_days=90)\n        assert entities.default_search_days == 90\n\n        entities = SlackEntities(default_search_days=365)\n        assert entities.default_search_days == 365\n\n        # Invalid: too small\n        with pytest.raises(\n            ValidationError, match=\"default_search_days must be at least 1\"\n        ):\n            SlackEntities(default_search_days=0)\n\n        with pytest.raises(\n            ValidationError, match=\"default_search_days must be at least 1\"\n        ):\n            SlackEntities(default_search_days=-5)\n\n        # Invalid: too large\n        with pytest.raises(\n            ValidationError, match=\"default_search_days cannot exceed 365 days\"\n        ):\n            SlackEntities(default_search_days=366)\n\n        with pytest.raises(\n            ValidationError, match=\"default_search_days cannot exceed 365 days\"\n        ):\n            SlackEntities(default_search_days=1000)\n\n    def test_complex_configuration(self) -> None:\n        \"\"\"Test a complex realistic configuration\"\"\"\n        entities = SlackEntities(\n            search_all_channels=False,\n            channels=[\"general\", \"engineering\", \"support\"],\n            exclude_channels=[\"test-*\", \"dev-*\"],\n            include_dm=False,\n            include_group_dm=False,\n            include_private_channels=True,\n        )\n\n        assert entities.search_all_channels is False\n        assert entities.channels == [\"general\", \"engineering\", \"support\"]\n        assert entities.exclude_channels == [\"test-*\", \"dev-*\"]\n        assert entities.include_dm is False\n        assert entities.include_group_dm is False\n        assert entities.include_private_channels is True\n\n    def test_validate_entities_method(self) -> None:\n        \"\"\"Test the validate_entities method in SlackFederatedConnector\"\"\"\n        # Create a connector for testing\n        test_credentials = {\n            \"client_id\": \"test_client_id\",\n            \"client_secret\": \"test_client_secret\",\n        }\n        slack_connector = SlackFederatedConnector(test_credentials)\n\n        # Valid entities\n        valid_entities = {\n            \"search_all_channels\": False,\n            \"channels\": [\"general\", \"engineering\"],\n            \"include_dm\": False,\n            \"include_group_dm\": False,\n            \"include_private_channels\": True,\n        }\n        assert slack_connector.validate_entities(valid_entities) is True\n\n        # Invalid entities - channels required when search_all_channels=False\n        invalid_entities = {\n            \"search_all_channels\": False,\n            \"channels\": [],  # Empty list\n        }\n        assert slack_connector.validate_entities(invalid_entities) is False\n\n        # Invalid entities - empty string in channels\n        invalid_entities2 = {\n            \"search_all_channels\": False,\n            \"channels\": [\"general\", \"\"],\n        }\n        assert slack_connector.validate_entities(invalid_entities2) is False\n"
  },
  {
    "path": "backend/tests/unit/file_store/test_file_store.py",
    "content": "import datetime\nfrom collections.abc import Generator\nfrom io import BytesIO\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy import create_engine\nfrom sqlalchemy import DateTime\nfrom sqlalchemy import Enum\nfrom sqlalchemy import String\nfrom sqlalchemy.orm import DeclarativeBase\nfrom sqlalchemy.orm import Mapped\nfrom sqlalchemy.orm import mapped_column\nfrom sqlalchemy.orm import Session\nfrom sqlalchemy.orm import sessionmaker\nfrom sqlalchemy.sql import func\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.file_store.file_store import get_default_file_store\nfrom onyx.file_store.file_store import S3BackedFileStore\n\n\nclass DBBaseTest(DeclarativeBase):\n    pass\n\n\nclass FileRecord(DBBaseTest):\n    __tablename__: str = \"file_record\"\n\n    # Internal file ID, must be unique across all files\n    file_id: Mapped[str] = mapped_column(String, primary_key=True)\n\n    display_name: Mapped[str | None] = mapped_column(String, nullable=True)\n    file_origin: Mapped[FileOrigin] = mapped_column(\n        Enum(FileOrigin, native_enum=False), nullable=False\n    )\n    file_type: Mapped[str] = mapped_column(String, default=\"text/plain\")\n\n    # External storage support (S3, MinIO, Azure Blob, etc.)\n    bucket_name: Mapped[str] = mapped_column(String, nullable=False)\n    object_key: Mapped[str] = mapped_column(String, nullable=False)\n\n    # Timestamps for external storage\n    created_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n    updated_at: Mapped[datetime.datetime] = mapped_column(\n        DateTime(timezone=True), server_default=func.now(), nullable=False\n    )\n\n\n@pytest.fixture\ndef db_session() -> Generator[Session, None, None]:\n    \"\"\"Create an in-memory SQLite database for testing\"\"\"\n    engine = create_engine(\"sqlite:///:memory:\")\n    DBBaseTest.metadata.create_all(engine)\n    SessionLocal = sessionmaker(bind=engine)\n    session = SessionLocal()\n    yield session\n    session.close()\n\n\n@pytest.fixture\ndef sample_content() -> bytes:\n    \"\"\"Sample file content for testing\"\"\"\n    return b\"This is a test file content\"\n\n\n@pytest.fixture\ndef sample_file_io(sample_content: bytes) -> BytesIO:\n    \"\"\"Sample file IO object for testing\"\"\"\n    return BytesIO(sample_content)\n\n\nclass TestExternalStorageFileStore:\n    \"\"\"Test external storage file store functionality (S3-compatible)\"\"\"\n\n    def test_get_default_file_store_s3(self) -> None:\n        \"\"\"Test that S3 file store is returned when backend is s3\"\"\"\n        with patch(\"onyx.configs.app_configs.FILE_STORE_BACKEND\", \"s3\"):\n            file_store = get_default_file_store()\n            assert isinstance(file_store, S3BackedFileStore)\n\n    def test_s3_client_initialization_with_credentials(self) -> None:\n        \"\"\"Test S3 client initialization with explicit credentials\"\"\"\n        with patch(\"boto3.client\") as mock_boto3:\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\",\n                aws_access_key_id=\"test-key\",\n                aws_secret_access_key=\"test-secret\",\n                aws_region_name=\"us-west-2\",\n                s3_endpoint_url=None,\n            )\n            file_store._get_s3_client()\n\n            # Verify boto3 client was called with the expected arguments\n            mock_boto3.assert_called_once()\n            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]\n\n            assert call_kwargs[\"service_name\"] == \"s3\"\n            assert call_kwargs[\"aws_access_key_id\"] == \"test-key\"\n            assert call_kwargs[\"aws_secret_access_key\"] == \"test-secret\"\n            assert call_kwargs[\"region_name\"] == \"us-west-2\"\n\n    def test_s3_client_initialization_with_iam_role(\n        self,\n        db_session: Session,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test S3 client initialization with IAM role (no explicit credentials)\"\"\"\n        with patch(\"boto3.client\") as mock_boto3:\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\",\n                aws_access_key_id=None,\n                aws_secret_access_key=None,\n                aws_region_name=\"us-west-2\",\n                s3_endpoint_url=None,\n            )\n            file_store._get_s3_client()\n\n            # Verify boto3 client was called with the expected arguments\n            mock_boto3.assert_called_once()\n            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]\n\n            assert call_kwargs[\"service_name\"] == \"s3\"\n            assert call_kwargs[\"region_name\"] == \"us-west-2\"\n            # Should not have explicit credentials\n            assert \"aws_access_key_id\" not in call_kwargs\n            assert \"aws_secret_access_key\" not in call_kwargs\n\n    def test_s3_bucket_name_configuration(self) -> None:\n        \"\"\"Test S3 bucket name configuration\"\"\"\n        with patch(\n            \"onyx.file_store.file_store.S3_FILE_STORE_BUCKET_NAME\", \"my-test-bucket\"\n        ):\n            file_store = S3BackedFileStore(bucket_name=\"my-test-bucket\")\n            bucket_name: str = file_store._get_bucket_name()\n            assert bucket_name == \"my-test-bucket\"\n\n    def test_s3_key_generation_default_prefix(self) -> None:\n        \"\"\"Test S3 key generation with default prefix\"\"\"\n        with (\n            patch(\"onyx.file_store.file_store.S3_FILE_STORE_PREFIX\", \"onyx-files\"),\n            patch(\n                \"onyx.file_store.file_store.get_current_tenant_id\",\n                return_value=\"test-tenant\",\n            ),\n        ):\n            file_store = S3BackedFileStore(bucket_name=\"test-bucket\")\n            s3_key: str = file_store._get_s3_key(\"test-file.txt\")\n            assert s3_key == \"onyx-files/test-tenant/test-file.txt\"\n\n    def test_s3_key_generation_custom_prefix(self) -> None:\n        \"\"\"Test S3 key generation with custom prefix\"\"\"\n        with (\n            patch(\"onyx.file_store.file_store.S3_FILE_STORE_PREFIX\", \"custom-prefix\"),\n            patch(\n                \"onyx.file_store.file_store.get_current_tenant_id\",\n                return_value=\"test-tenant\",\n            ),\n        ):\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\", s3_prefix=\"custom-prefix\"\n            )\n            s3_key: str = file_store._get_s3_key(\"test-file.txt\")\n            assert s3_key == \"custom-prefix/test-tenant/test-file.txt\"\n\n    def test_s3_key_generation_with_different_tenant_ids(self) -> None:\n        \"\"\"Test S3 key generation with different tenant IDs\"\"\"\n        with patch(\"onyx.file_store.file_store.S3_FILE_STORE_PREFIX\", \"onyx-files\"):\n            file_store = S3BackedFileStore(bucket_name=\"test-bucket\")\n\n            # Test with tenant ID \"tenant-1\"\n            with patch(\n                \"onyx.file_store.file_store.get_current_tenant_id\",\n                return_value=\"tenant-1\",\n            ):\n                s3_key = file_store._get_s3_key(\"document.pdf\")\n                assert s3_key == \"onyx-files/tenant-1/document.pdf\"\n\n            # Test with tenant ID \"tenant-2\"\n            with patch(\n                \"onyx.file_store.file_store.get_current_tenant_id\",\n                return_value=\"tenant-2\",\n            ):\n                s3_key = file_store._get_s3_key(\"document.pdf\")\n                assert s3_key == \"onyx-files/tenant-2/document.pdf\"\n\n            # Test with default tenant (public)\n            with patch(\n                \"onyx.file_store.file_store.get_current_tenant_id\",\n                return_value=\"public\",\n            ):\n                s3_key = file_store._get_s3_key(\"document.pdf\")\n                assert s3_key == \"onyx-files/public/document.pdf\"\n\n    @patch(\"boto3.client\")\n    def test_s3_save_file_mock(\n        self,\n        mock_boto3: MagicMock,\n        db_session: Session,  # noqa: ARG002\n        sample_file_io: BytesIO,\n    ) -> None:\n        \"\"\"Test S3 file saving with mocked S3 client\"\"\"\n        # Setup S3 mock\n        mock_s3_client: Mock = Mock()\n        mock_boto3.return_value = mock_s3_client\n\n        # Create a mock database session\n        mock_db_session: Mock = Mock()\n        mock_db_session.commit = Mock()\n        mock_db_session.rollback = Mock()\n\n        with (\n            patch(\n                \"onyx.file_store.file_store.S3_FILE_STORE_BUCKET_NAME\", \"test-bucket\"\n            ),\n            patch(\"onyx.file_store.file_store.S3_FILE_STORE_PREFIX\", \"onyx-files\"),\n            patch(\"onyx.file_store.file_store.S3_AWS_ACCESS_KEY_ID\", \"test-key\"),\n            patch(\"onyx.file_store.file_store.S3_AWS_SECRET_ACCESS_KEY\", \"test-secret\"),\n        ):\n            # Mock the database operation to avoid SQLAlchemy issues\n            with patch(\"onyx.db.file_record.upsert_filerecord\") as mock_upsert:\n                mock_upsert.return_value = Mock()\n\n                file_store = S3BackedFileStore(bucket_name=\"test-bucket\")\n\n                # This should not raise an exception\n                file_store.save_file(\n                    file_id=\"test-file.txt\",\n                    content=sample_file_io,\n                    display_name=\"Test File\",\n                    file_origin=FileOrigin.OTHER,\n                    file_type=\"text/plain\",\n                    db_session=mock_db_session,\n                )\n\n                # Verify S3 client was called correctly\n                mock_s3_client.put_object.assert_called_once()\n                call_args = mock_s3_client.put_object.call_args\n                assert call_args[1][\"Bucket\"] == \"test-bucket\"\n                assert call_args[1][\"Key\"] == \"onyx-files/public/test-file.txt\"\n                assert call_args[1][\"ContentType\"] == \"text/plain\"\n\n    def test_minio_client_initialization(self) -> None:\n        \"\"\"Test S3 client initialization with MinIO endpoint\"\"\"\n        with (\n            patch(\"boto3.client\") as mock_boto3,\n            patch(\"urllib3.disable_warnings\"),\n        ):\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\",\n                aws_access_key_id=\"minioadmin\",\n                aws_secret_access_key=\"minioadmin\",\n                aws_region_name=\"us-east-1\",\n                s3_endpoint_url=\"http://localhost:9000\",\n                s3_verify_ssl=False,\n            )\n            file_store._get_s3_client()\n\n            # Verify boto3 client was called with MinIO-specific settings\n            mock_boto3.assert_called_once()\n            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]\n\n            assert call_kwargs[\"service_name\"] == \"s3\"\n            assert call_kwargs[\"endpoint_url\"] == \"http://localhost:9000\"\n            assert call_kwargs[\"aws_access_key_id\"] == \"minioadmin\"\n            assert call_kwargs[\"aws_secret_access_key\"] == \"minioadmin\"\n            assert call_kwargs[\"region_name\"] == \"us-east-1\"\n            assert call_kwargs[\"verify\"] is False\n\n            # Verify S3 configuration for MinIO\n            config = call_kwargs[\"config\"]\n            assert config.signature_version == \"s3v4\"\n            assert config.s3[\"addressing_style\"] == \"path\"\n\n    def test_minio_ssl_verification_enabled(self) -> None:\n        \"\"\"Test MinIO with SSL verification enabled\"\"\"\n        with patch(\"boto3.client\") as mock_boto3:\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\",\n                aws_access_key_id=\"test-key\",\n                aws_secret_access_key=\"test-secret\",\n                s3_endpoint_url=\"https://minio.example.com\",\n                s3_verify_ssl=True,\n            )\n            file_store._get_s3_client()\n\n            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]\n            # When SSL verification is enabled, verify should not be in kwargs (defaults to True)\n            assert \"verify\" not in call_kwargs or call_kwargs.get(\"verify\") is not False\n            assert call_kwargs[\"endpoint_url\"] == \"https://minio.example.com\"\n\n    def test_aws_s3_without_endpoint_url(self) -> None:\n        \"\"\"Test that regular AWS S3 doesn't include endpoint URL or custom config\"\"\"\n        with patch(\"boto3.client\") as mock_boto3:\n            file_store = S3BackedFileStore(\n                bucket_name=\"test-bucket\",\n                aws_access_key_id=\"test-key\",\n                aws_secret_access_key=\"test-secret\",\n                aws_region_name=\"us-west-2\",\n                s3_endpoint_url=None,\n            )\n            file_store._get_s3_client()\n\n            call_kwargs: dict[str, Any] = mock_boto3.call_args[1]\n\n            # For regular AWS S3, endpoint_url should not be present\n            assert \"endpoint_url\" not in call_kwargs\n            assert call_kwargs[\"service_name\"] == \"s3\"\n            assert call_kwargs[\"region_name\"] == \"us-west-2\"\n            # config should not be present for regular AWS S3\n            assert \"config\" not in call_kwargs\n\n\nclass TestFileStoreInterface:\n    \"\"\"Test the general file store interface\"\"\"\n\n    def test_file_store_s3_when_configured(self) -> None:\n        \"\"\"Test that S3 file store is returned when configured\"\"\"\n        with patch(\"onyx.configs.app_configs.FILE_STORE_BACKEND\", \"s3\"):\n            file_store = get_default_file_store()\n            assert isinstance(file_store, S3BackedFileStore)\n\n    def test_file_store_postgres_when_configured(self) -> None:\n        \"\"\"Test that Postgres file store is returned when configured\"\"\"\n        from onyx.file_store.postgres_file_store import PostgresBackedFileStore\n\n        with patch(\"onyx.configs.app_configs.FILE_STORE_BACKEND\", \"postgres\"):\n            file_store = get_default_file_store()\n            assert isinstance(file_store, PostgresBackedFileStore)\n\n    def test_file_store_defaults_to_s3(self) -> None:\n        \"\"\"Test that the default backend is s3\"\"\"\n        file_store = get_default_file_store()\n        assert isinstance(file_store, S3BackedFileStore)\n"
  },
  {
    "path": "backend/tests/unit/file_store/test_postgres_file_store.py",
    "content": "\"\"\"Unit tests for PostgresBackedFileStore.\n\nThese tests mock the database layer (sessions, raw connections, large objects)\nso they run without any external services.\n\"\"\"\n\nfrom io import BytesIO\nfrom io import StringIO\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.file_store.postgres_file_store import POSTGRES_BUCKET_SENTINEL\nfrom onyx.file_store.postgres_file_store import PostgresBackedFileStore\n\n\n@pytest.fixture\ndef store() -> PostgresBackedFileStore:\n    return PostgresBackedFileStore()\n\n\ndef _make_session_ctx(\n    mock_session: MagicMock,\n) -> Any:\n    \"\"\"Build a context-manager mock that yields mock_session.\"\"\"\n    from contextlib import contextmanager\n\n    @contextmanager\n    def _ctx(session: Any = None):  # type: ignore\n        yield session if session is not None else mock_session\n\n    return _ctx\n\n\ndef _mock_lobject(oid: int = 42, data: bytes = b\"hello\") -> MagicMock:\n    \"\"\"Return a mock lobject factory that the raw connection exposes.\"\"\"\n    lobj = MagicMock()\n    lobj.oid = oid\n    lobj.read = MagicMock(side_effect=[data, b\"\"])\n    lobj.write = MagicMock()\n    lobj.close = MagicMock()\n    lobj.unlink = MagicMock()\n    return lobj\n\n\nclass TestInitialize:\n    def test_initialize_is_noop(self, store: PostgresBackedFileStore) -> None:\n        # Should not raise\n        store.initialize()\n\n\nclass TestSaveFile:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_save_bytes_content(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        raw_conn = MagicMock()\n        lobj = _mock_lobject(oid=99)\n        raw_conn.lobject.return_value = lobj\n        mock_session.connection.return_value.connection.dbapi_connection = raw_conn\n\n        with (\n            patch(\n                \"onyx.file_store.postgres_file_store.upsert_filerecord\"\n            ) as mock_upsert_fr,\n            patch(\n                \"onyx.file_store.postgres_file_store.upsert_file_content\"\n            ) as mock_upsert_fc,\n        ):\n            content = BytesIO(b\"test data\")\n            file_id = store.save_file(\n                content=content,\n                display_name=\"test.txt\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                file_id=\"my-file-id\",\n                db_session=mock_session,\n            )\n\n        assert file_id == \"my-file-id\"\n        lobj.write.assert_called_once_with(b\"test data\")\n\n        mock_upsert_fr.assert_called_once()\n        fr_kwargs = mock_upsert_fr.call_args[1]\n        assert fr_kwargs[\"file_id\"] == \"my-file-id\"\n        assert fr_kwargs[\"bucket_name\"] == POSTGRES_BUCKET_SENTINEL\n        assert fr_kwargs[\"object_key\"] == \"99\"\n\n        mock_upsert_fc.assert_called_once()\n        fc_kwargs = mock_upsert_fc.call_args[1]\n        assert fc_kwargs[\"lobj_oid\"] == 99\n        assert fc_kwargs[\"file_size\"] == len(b\"test data\")\n\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_save_string_io_content(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        \"\"\"StringIO content should be encoded to UTF-8 bytes.\"\"\"\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        raw_conn = MagicMock()\n        lobj = _mock_lobject(oid=50)\n        raw_conn.lobject.return_value = lobj\n        mock_session.connection.return_value.connection.dbapi_connection = raw_conn\n\n        with (\n            patch(\"onyx.file_store.postgres_file_store.upsert_filerecord\"),\n            patch(\"onyx.file_store.postgres_file_store.upsert_file_content\"),\n        ):\n            content = StringIO(\"text content\")\n            file_id = store.save_file(\n                content=content,\n                display_name=\"doc.txt\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                db_session=mock_session,\n            )\n\n        # Should have generated a UUID file_id\n        assert file_id is not None\n        lobj.write.assert_called_once_with(b\"text content\")\n\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_save_rolls_back_on_error(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        raw_conn = MagicMock()\n        raw_conn.lobject.side_effect = RuntimeError(\"pg error\")\n        mock_session.connection.return_value.connection.dbapi_connection = raw_conn\n\n        with pytest.raises(RuntimeError, match=\"pg error\"):\n            store.save_file(\n                content=BytesIO(b\"data\"),\n                display_name=\"fail.txt\",\n                file_origin=FileOrigin.OTHER,\n                file_type=\"text/plain\",\n                db_session=mock_session,\n            )\n        mock_session.rollback.assert_called_once()\n\n\nclass TestReadFile:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_read_file_in_memory(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        mock_record = MagicMock()\n        mock_record.lobj_oid = 42\n\n        raw_conn = MagicMock()\n        lobj = _mock_lobject(oid=42, data=b\"file contents\")\n        raw_conn.lobject.return_value = lobj\n        mock_session.connection.return_value.connection.dbapi_connection = raw_conn\n\n        with patch(\n            \"onyx.file_store.postgres_file_store.get_file_content_by_file_id\",\n            return_value=mock_record,\n        ):\n            result = store.read_file(\"my-file\", db_session=mock_session)\n\n        assert result.read() == b\"file contents\"\n\n\nclass TestDeleteFile:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_delete_removes_lobject_and_records(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        mock_record = MagicMock()\n        mock_record.lobj_oid = 77\n\n        raw_conn = MagicMock()\n        lobj = _mock_lobject(oid=77)\n        raw_conn.lobject.return_value = lobj\n        mock_session.connection.return_value.connection.dbapi_connection = raw_conn\n\n        with (\n            patch(\n                \"onyx.file_store.postgres_file_store.get_file_content_by_file_id\",\n                return_value=mock_record,\n            ),\n            patch(\n                \"onyx.file_store.postgres_file_store.delete_file_content_by_file_id\"\n            ) as mock_del_fc,\n            patch(\n                \"onyx.file_store.postgres_file_store.delete_filerecord_by_file_id\"\n            ) as mock_del_fr,\n        ):\n            store.delete_file(\"file-77\", db_session=mock_session)\n\n        lobj.unlink.assert_called_once()\n        mock_del_fc.assert_called_once()\n        mock_del_fr.assert_called_once()\n        mock_session.commit.assert_called_once()\n\n\nclass TestGetFileSize:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_returns_stored_size(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        mock_record = MagicMock()\n        mock_record.file_size = 1024\n\n        with patch(\n            \"onyx.file_store.postgres_file_store.get_file_content_by_file_id\",\n            return_value=mock_record,\n        ):\n            size = store.get_file_size(\"file-1\", db_session=mock_session)\n\n        assert size == 1024\n\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_returns_none_on_error(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        with patch(\n            \"onyx.file_store.postgres_file_store.get_file_content_by_file_id\",\n            side_effect=RuntimeError(\"not found\"),\n        ):\n            size = store.get_file_size(\"missing\", db_session=mock_session)\n\n        assert size is None\n\n\nclass TestChangeFileId:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_reuses_same_lobject(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        \"\"\"Changing file ID should reuse the same large object (no copy).\"\"\"\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        old_fr = MagicMock()\n        old_fr.display_name = \"doc.pdf\"\n        old_fr.file_origin = FileOrigin.OTHER\n        old_fr.file_type = \"application/pdf\"\n        old_fr.file_metadata = None\n        old_fr.object_key = \"55\"\n\n        with (\n            patch(\n                \"onyx.file_store.postgres_file_store.get_filerecord_by_file_id\",\n                return_value=old_fr,\n            ),\n            patch(\n                \"onyx.file_store.postgres_file_store.upsert_filerecord\"\n            ) as mock_upsert_fr,\n            patch(\n                \"onyx.file_store.postgres_file_store.transfer_file_content_file_id\"\n            ) as mock_transfer,\n            patch(\"onyx.file_store.postgres_file_store.delete_filerecord_by_file_id\"),\n        ):\n            store.change_file_id(\"old-id\", \"new-id\", db_session=mock_session)\n\n        # file_content row should be moved in-place via transfer\n        transfer_kwargs = mock_transfer.call_args[1]\n        assert transfer_kwargs[\"old_file_id\"] == \"old-id\"\n        assert transfer_kwargs[\"new_file_id\"] == \"new-id\"\n\n        # New file_record should preserve the same object_key (LO OID)\n        fr_kwargs = mock_upsert_fr.call_args[1]\n        assert fr_kwargs[\"file_id\"] == \"new-id\"\n        assert fr_kwargs[\"object_key\"] == \"55\"\n\n\nclass TestHasFile:\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_returns_true_when_present(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        record = MagicMock()\n        record.file_origin = FileOrigin.OTHER\n        record.file_type = \"text/plain\"\n\n        with patch(\n            \"onyx.file_store.postgres_file_store.get_filerecord_by_file_id_optional\",\n            return_value=record,\n        ):\n            assert store.has_file(\n                \"f1\", FileOrigin.OTHER, \"text/plain\", db_session=mock_session\n            )\n\n    @patch(\n        \"onyx.file_store.postgres_file_store.get_session_with_current_tenant_if_none\"\n    )\n    def test_returns_false_when_absent(\n        self,\n        mock_get_session: MagicMock,\n        store: PostgresBackedFileStore,\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value = _make_session_ctx(mock_session)(None)\n\n        with patch(\n            \"onyx.file_store.postgres_file_store.get_filerecord_by_file_id_optional\",\n            return_value=None,\n        ):\n            assert not store.has_file(\n                \"missing\", FileOrigin.OTHER, \"text/plain\", db_session=mock_session\n            )\n\n\nclass TestReadContentBytes:\n    def test_bytes_passthrough(self) -> None:\n        result = PostgresBackedFileStore._read_content_bytes(BytesIO(b\"raw\"))\n        assert result == b\"raw\"\n\n    def test_string_encoded_to_utf8(self) -> None:\n        result = PostgresBackedFileStore._read_content_bytes(StringIO(\"hello\"))\n        assert result == b\"hello\"\n"
  },
  {
    "path": "backend/tests/unit/model_server/test_embedding.py",
    "content": "import asyncio\nimport time\nfrom typing import Any\nfrom typing import List\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom model_server.encoders import embed_text\nfrom model_server.encoders import process_embed_request\nfrom shared_configs.enums import EmbedTextType\nfrom shared_configs.model_server_models import EmbedRequest\n\n\n@pytest.mark.asyncio\nasync def test_embed_text_no_model_name() -> None:\n    # Test that the function raises an error when no model name is provided\n    with pytest.raises(\n        ValueError,\n        match=\"Model name must be provided to run embeddings\",\n    ):\n        await embed_text(\n            texts=[\"test1\", \"test2\"],\n            model_name=None,\n            max_context_length=512,\n            normalize_embeddings=True,\n            prefix=None,\n        )\n\n\n@pytest.mark.asyncio\nasync def test_embed_text_local_model() -> None:\n    with patch(\"model_server.encoders.get_embedding_model\") as mock_get_model:\n        mock_model = MagicMock()\n        mock_model.encode.return_value = [[0.1, 0.2], [0.3, 0.4]]\n        mock_get_model.return_value = mock_model\n\n        result = await embed_text(\n            texts=[\"test1\", \"test2\"],\n            model_name=\"fake-local-model\",\n            max_context_length=512,\n            normalize_embeddings=True,\n            prefix=None,\n        )\n\n        assert result == [[0.1, 0.2], [0.3, 0.4]]\n        mock_model.encode.assert_called_once()\n\n\n@pytest.mark.asyncio\nasync def test_concurrent_embeddings() -> None:\n    def mock_encode(\n        *args: Any, **kwargs: Any  # noqa: ARG001\n    ) -> List[List[float]]:  # noqa: ARG001\n        time.sleep(5)\n        return [[0.1, 0.2, 0.3]]\n\n    test_req = EmbedRequest(\n        texts=[\"test\"],\n        model_name=\"'nomic-ai/nomic-embed-text-v1'\",\n        deployment_name=None,\n        max_context_length=512,\n        normalize_embeddings=True,\n        api_key=None,\n        provider_type=None,\n        text_type=EmbedTextType.QUERY,\n        manual_query_prefix=None,\n        manual_passage_prefix=None,\n        api_url=None,\n        api_version=None,\n        reduced_dimension=None,\n    )\n\n    with patch(\"model_server.encoders.get_embedding_model\") as mock_get_model:\n        mock_model = MagicMock()\n        mock_model.encode = mock_encode\n        mock_get_model.return_value = mock_model\n        start_time = time.time()\n\n        tasks = [process_embed_request(test_req) for _ in range(5)]\n        await asyncio.gather(*tasks)\n\n        end_time = time.time()\n\n        # 5 * 5 seconds = 25 seconds, this test ensures that the embeddings are at least yielding the thread\n        # However, the developer may still introduce unnecessary blocking above the mock and this test will\n        # still pass as long as it's less than (7 - 5) / 5 seconds\n        assert end_time - start_time < 7\n"
  },
  {
    "path": "backend/tests/unit/onyx/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/access/test_user_file_access.py",
    "content": "\"\"\"Tests for user file ACL computation, including shared persona access.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.access.access import collect_user_file_access\nfrom onyx.access.access import get_access_for_user_files_impl\nfrom onyx.access.utils import prefix_user_email\nfrom onyx.configs.constants import PUBLIC_DOC_PAT\n\n\ndef _make_user(email: str) -> MagicMock:\n    user = MagicMock()\n    user.email = email\n    user.id = uuid4()\n    return user\n\n\ndef _make_persona(\n    *,\n    owner: MagicMock | None = None,\n    shared_users: list[MagicMock] | None = None,\n    is_public: bool = False,\n    deleted: bool = False,\n) -> MagicMock:\n    persona = MagicMock()\n    persona.deleted = deleted\n    persona.is_public = is_public\n    persona.user_id = owner.id if owner else None\n    persona.user = owner\n    persona.users = shared_users or []\n    return persona\n\n\ndef _make_user_file(\n    *,\n    owner: MagicMock,\n    assistants: list[MagicMock] | None = None,\n) -> MagicMock:\n    uf = MagicMock()\n    uf.id = uuid4()\n    uf.user = owner\n    uf.user_id = owner.id\n    uf.assistants = assistants or []\n    return uf\n\n\nclass TestCollectUserFileAccess:\n    def test_owner_only(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        uf = _make_user_file(owner=owner)\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert emails == {\"owner@test.com\"}\n        assert is_public is False\n\n    def test_shared_persona_adds_users(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        shared = _make_user(\"shared@test.com\")\n        persona = _make_persona(owner=owner, shared_users=[shared])\n        uf = _make_user_file(owner=owner, assistants=[persona])\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert emails == {\"owner@test.com\", \"shared@test.com\"}\n        assert is_public is False\n\n    def test_persona_owner_added(self) -> None:\n        \"\"\"Persona owner (different from file owner) gets access too.\"\"\"\n        file_owner = _make_user(\"file-owner@test.com\")\n        persona_owner = _make_user(\"persona-owner@test.com\")\n        persona = _make_persona(owner=persona_owner)\n        uf = _make_user_file(owner=file_owner, assistants=[persona])\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert \"file-owner@test.com\" in emails\n        assert \"persona-owner@test.com\" in emails\n\n    def test_public_persona_makes_file_public(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        persona = _make_persona(owner=owner, is_public=True)\n        uf = _make_user_file(owner=owner, assistants=[persona])\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert is_public is True\n        assert \"owner@test.com\" in emails\n\n    def test_deleted_persona_ignored(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        shared = _make_user(\"shared@test.com\")\n        persona = _make_persona(owner=owner, shared_users=[shared], deleted=True)\n        uf = _make_user_file(owner=owner, assistants=[persona])\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert emails == {\"owner@test.com\"}\n        assert is_public is False\n\n    def test_multiple_personas_combine(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        user_a = _make_user(\"a@test.com\")\n        user_b = _make_user(\"b@test.com\")\n        p1 = _make_persona(owner=owner, shared_users=[user_a])\n        p2 = _make_persona(owner=owner, shared_users=[user_b])\n        uf = _make_user_file(owner=owner, assistants=[p1, p2])\n\n        emails, is_public = collect_user_file_access(uf)\n\n        assert emails == {\"owner@test.com\", \"a@test.com\", \"b@test.com\"}\n\n    def test_deduplication(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        shared = _make_user(\"shared@test.com\")\n        p1 = _make_persona(owner=owner, shared_users=[shared])\n        p2 = _make_persona(owner=owner, shared_users=[shared])\n        uf = _make_user_file(owner=owner, assistants=[p1, p2])\n\n        emails, _ = collect_user_file_access(uf)\n\n        assert emails == {\"owner@test.com\", \"shared@test.com\"}\n\n\nclass TestGetAccessForUserFiles:\n    def test_shared_user_in_acl(self) -> None:\n        \"\"\"Shared persona users should appear in the ACL.\"\"\"\n        owner = _make_user(\"owner@test.com\")\n        shared = _make_user(\"shared@test.com\")\n        persona = _make_persona(owner=owner, shared_users=[shared])\n        uf = _make_user_file(owner=owner, assistants=[persona])\n\n        db_session = MagicMock()\n        with patch(\n            \"onyx.access.access.fetch_user_files_with_access_relationships\",\n            return_value=[uf],\n        ):\n            result = get_access_for_user_files_impl([str(uf.id)], db_session)\n\n        access = result[str(uf.id)]\n        acl = access.to_acl()\n        assert prefix_user_email(\"owner@test.com\") in acl\n        assert prefix_user_email(\"shared@test.com\") in acl\n        assert access.is_public is False\n\n    def test_public_persona_sets_public_acl(self) -> None:\n        owner = _make_user(\"owner@test.com\")\n        persona = _make_persona(owner=owner, is_public=True)\n        uf = _make_user_file(owner=owner, assistants=[persona])\n\n        db_session = MagicMock()\n        with patch(\n            \"onyx.access.access.fetch_user_files_with_access_relationships\",\n            return_value=[uf],\n        ):\n            result = get_access_for_user_files_impl([str(uf.id)], db_session)\n\n        access = result[str(uf.id)]\n        assert access.is_public is True\n        acl = access.to_acl()\n        assert PUBLIC_DOC_PAT in acl\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/conftest.py",
    "content": "from unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.db.models import OAuthAccount\nfrom onyx.db.models import User\n\n\n@pytest.fixture\ndef mock_user() -> MagicMock:\n    \"\"\"Creates a mock User instance for testing.\"\"\"\n    user = MagicMock(spec=User)\n    user.email = \"test@example.com\"\n    user.id = \"test-user-id\"\n    return user\n\n\n@pytest.fixture\ndef mock_oauth_account() -> MagicMock:\n    \"\"\"Creates a mock OAuthAccount instance for testing.\"\"\"\n    oauth_account = MagicMock(spec=OAuthAccount)\n    oauth_account.oauth_name = \"google\"\n    oauth_account.refresh_token = \"test-refresh-token\"\n    oauth_account.access_token = \"test-access-token\"\n    oauth_account.expires_at = None\n    return oauth_account\n\n\n@pytest.fixture\ndef mock_user_manager() -> MagicMock:\n    \"\"\"Creates a mock user manager for testing.\"\"\"\n    user_manager = MagicMock()\n    user_manager.user_db = MagicMock()\n    user_manager.user_db.update_oauth_account = AsyncMock()\n    user_manager.user_db.update = AsyncMock()\n    return user_manager\n\n\n@pytest.fixture\ndef mock_db_session() -> MagicMock:\n    \"\"\"Creates a mock database session for testing.\"\"\"\n    return MagicMock()\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_disposable_email_validator.py",
    "content": "\"\"\"\nTests for disposable email validation.\n\"\"\"\n\nfrom onyx.auth.disposable_email_validator import DisposableEmailValidator\nfrom onyx.auth.disposable_email_validator import is_disposable_email\n\n\nclass TestDisposableEmailValidator:\n    \"\"\"Test the DisposableEmailValidator class.\"\"\"\n\n    def test_singleton_pattern(self) -> None:\n        \"\"\"Test that DisposableEmailValidator is a singleton.\"\"\"\n        validator1 = DisposableEmailValidator()\n        validator2 = DisposableEmailValidator()\n        assert validator1 is validator2\n\n    def test_fallback_domains_included(self) -> None:\n        \"\"\"Test that fallback domains are always included.\"\"\"\n        validator = DisposableEmailValidator()\n        domains = validator.get_domains()\n\n        # Check that our hardcoded fallback domains are present\n        assert \"trashlify.com\" in domains\n        assert \"10minutemail.com\" in domains\n        assert \"guerrillamail.com\" in domains\n        assert \"mailinator.com\" in domains\n        assert \"tempmail.com\" in domains\n        assert \"throwaway.email\" in domains\n        assert \"yopmail.com\" in domains\n\n    def test_is_disposable_trashlify(self) -> None:\n        \"\"\"Test that trashlify.com emails are detected as disposable.\"\"\"\n        assert is_disposable_email(\"test@trashlify.com\") is True\n        assert is_disposable_email(\"user123@trashlify.com\") is True\n        assert is_disposable_email(\"4q4k99yca1@trashlify.com\") is True\n\n    def test_is_disposable_other_known_domains(self) -> None:\n        \"\"\"Test detection of other known disposable domains.\"\"\"\n        disposable_emails = [\n            \"test@10minutemail.com\",\n            \"user@guerrillamail.com\",\n            \"temp@mailinator.com\",\n            \"fake@tempmail.com\",\n            \"throw@throwaway.email\",\n            \"yop@yopmail.com\",\n        ]\n\n        for email in disposable_emails:\n            assert is_disposable_email(email) is True, f\"{email} should be disposable\"\n\n    def test_is_not_disposable_legitimate_domains(self) -> None:\n        \"\"\"Test that legitimate email domains are not flagged.\"\"\"\n        legitimate_emails = [\n            \"user@gmail.com\",\n            \"employee@company.com\",\n            \"admin@onyx.app\",\n            \"test@outlook.com\",\n            \"person@yahoo.com\",\n            \"contact@protonmail.com\",\n        ]\n\n        for email in legitimate_emails:\n            assert (\n                is_disposable_email(email) is False\n            ), f\"{email} should not be disposable\"\n\n    def test_case_insensitive(self) -> None:\n        \"\"\"Test that domain checking is case-insensitive.\"\"\"\n        assert is_disposable_email(\"test@TRASHLIFY.COM\") is True\n        assert is_disposable_email(\"test@Trashlify.Com\") is True\n        assert is_disposable_email(\"test@TrAsHlIfY.cOm\") is True\n\n    def test_invalid_email_formats(self) -> None:\n        \"\"\"Test handling of invalid email formats.\"\"\"\n        assert is_disposable_email(\"\") is False\n        assert is_disposable_email(\"notanemail\") is False\n        assert is_disposable_email(\"@trashlify.com\") is False\n        assert is_disposable_email(\"test@\") is False\n        assert is_disposable_email(\"@\") is False\n\n    def test_email_with_subdomains(self) -> None:\n        \"\"\"Test that emails with subdomains are handled correctly.\"\"\"\n        # The domain should be the last part after @\n        assert is_disposable_email(\"user@mail.trashlify.com\") is False\n        # Only exact domain matches should trigger\n\n    def test_validator_instance_methods(self) -> None:\n        \"\"\"Test the validator instance methods directly.\"\"\"\n        validator = DisposableEmailValidator()\n\n        # Test is_disposable method\n        assert validator.is_disposable(\"test@trashlify.com\") is True\n        assert validator.is_disposable(\"test@gmail.com\") is False\n\n        # Test invalid inputs\n        assert validator.is_disposable(\"\") is False\n        assert validator.is_disposable(\"invalid\") is False\n        assert validator.is_disposable(\"@trashlify.com\") is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_email.py",
    "content": "import pytest\n\nfrom onyx.auth.email_utils import build_user_email_invite\nfrom onyx.auth.email_utils import send_email\nfrom onyx.configs.constants import AuthType\nfrom onyx.configs.constants import ONYX_DEFAULT_APPLICATION_NAME\nfrom onyx.db.engine.sql_engine import SqlEngine\nfrom onyx.server.runtime.onyx_runtime import OnyxRuntime\n\n\n@pytest.mark.skip(\n    reason=\"This sends real emails, so only run when you really want to test this!\"\n)\ndef test_send_user_email_invite() -> None:\n    SqlEngine.init_engine(pool_size=20, max_overflow=5)\n\n    application_name = ONYX_DEFAULT_APPLICATION_NAME\n\n    onyx_file = OnyxRuntime.get_emailable_logo()\n\n    subject = f\"Invitation to Join {application_name} Organization\"\n\n    FROM_EMAIL = \"noreply@onyx.app\"\n    TO_EMAIL = \"support@onyx.app\"\n    text_content, html_content = build_user_email_invite(\n        FROM_EMAIL, TO_EMAIL, ONYX_DEFAULT_APPLICATION_NAME, AuthType.CLOUD\n    )\n\n    send_email(\n        TO_EMAIL,\n        subject,\n        html_content,\n        text_content,\n        mail_from=FROM_EMAIL,\n        inline_png=(\"logo.png\", onyx_file.data),\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_is_same_origin.py",
    "content": "import pytest\n\nfrom onyx.auth.users import _is_same_origin\n\n\nclass TestExactMatch:\n    \"\"\"Origins that are textually identical should always match.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"origin\",\n        [\n            \"http://localhost:3000\",\n            \"https://app.example.com\",\n            \"https://app.example.com:8443\",\n            \"http://127.0.0.1:8080\",\n        ],\n    )\n    def test_identical_origins(self, origin: str) -> None:\n        assert _is_same_origin(origin, origin)\n\n\nclass TestLoopbackPortRelaxation:\n    \"\"\"On loopback addresses, port differences should be ignored.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"actual,expected\",\n        [\n            (\"http://localhost:3001\", \"http://localhost:3000\"),\n            (\"http://localhost:8080\", \"http://localhost:3000\"),\n            (\"http://localhost\", \"http://localhost:3000\"),\n            (\"http://127.0.0.1:3001\", \"http://127.0.0.1:3000\"),\n            (\"http://[::1]:3001\", \"http://[::1]:3000\"),\n        ],\n    )\n    def test_loopback_different_ports_accepted(\n        self, actual: str, expected: str\n    ) -> None:\n        assert _is_same_origin(actual, expected)\n\n    @pytest.mark.parametrize(\n        \"actual,expected\",\n        [\n            (\"https://localhost:3001\", \"http://localhost:3000\"),\n            (\"http://localhost:3001\", \"https://localhost:3000\"),\n        ],\n    )\n    def test_loopback_different_scheme_rejected(\n        self, actual: str, expected: str\n    ) -> None:\n        assert not _is_same_origin(actual, expected)\n\n    def test_loopback_hostname_mismatch_rejected(self) -> None:\n        assert not _is_same_origin(\"http://localhost:3001\", \"http://127.0.0.1:3000\")\n\n\nclass TestNonLoopbackStrictPort:\n    \"\"\"Non-loopback origins must match scheme, hostname, AND port.\"\"\"\n\n    def test_different_port_rejected(self) -> None:\n        assert not _is_same_origin(\n            \"https://app.example.com:8443\", \"https://app.example.com\"\n        )\n\n    def test_different_hostname_rejected(self) -> None:\n        assert not _is_same_origin(\"https://evil.com\", \"https://app.example.com\")\n\n    def test_different_scheme_rejected(self) -> None:\n        assert not _is_same_origin(\"http://app.example.com\", \"https://app.example.com\")\n\n    def test_same_port_explicit(self) -> None:\n        assert _is_same_origin(\n            \"https://app.example.com:443\", \"https://app.example.com:443\"\n        )\n\n\nclass TestDefaultPortNormalization:\n    \"\"\"Port should be normalized so that omitted default port == explicit default port.\"\"\"\n\n    def test_http_implicit_vs_explicit_80(self) -> None:\n        assert _is_same_origin(\"http://example.com\", \"http://example.com:80\")\n\n    def test_http_explicit_80_vs_implicit(self) -> None:\n        assert _is_same_origin(\"http://example.com:80\", \"http://example.com\")\n\n    def test_https_implicit_vs_explicit_443(self) -> None:\n        assert _is_same_origin(\"https://example.com\", \"https://example.com:443\")\n\n    def test_https_explicit_443_vs_implicit(self) -> None:\n        assert _is_same_origin(\"https://example.com:443\", \"https://example.com\")\n\n    def test_http_non_default_port_vs_implicit_rejected(self) -> None:\n        assert not _is_same_origin(\"http://example.com:8080\", \"http://example.com\")\n\n\nclass TestTrailingSlash:\n    \"\"\"Trailing slashes should not affect comparison.\"\"\"\n\n    def test_trailing_slash_on_actual(self) -> None:\n        assert _is_same_origin(\"https://app.example.com/\", \"https://app.example.com\")\n\n    def test_trailing_slash_on_expected(self) -> None:\n        assert _is_same_origin(\"https://app.example.com\", \"https://app.example.com/\")\n\n    def test_trailing_slash_on_both(self) -> None:\n        assert _is_same_origin(\"https://app.example.com/\", \"https://app.example.com/\")\n\n\nclass TestCSWSHScenarios:\n    \"\"\"Realistic attack scenarios that must be rejected.\"\"\"\n\n    def test_remote_attacker_rejected(self) -> None:\n        assert not _is_same_origin(\"https://evil.com\", \"http://localhost:3000\")\n\n    def test_remote_attacker_same_port_rejected(self) -> None:\n        assert not _is_same_origin(\"http://evil.com:3000\", \"http://localhost:3000\")\n\n    def test_remote_attacker_matching_hostname_different_port(self) -> None:\n        assert not _is_same_origin(\n            \"https://app.example.com:9999\", \"https://app.example.com\"\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_jwt_provisioning.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.auth import users as users_module\n\n\ndef test_extract_email_requires_valid_format() -> None:\n    \"\"\"Helper should validate email format before returning value.\"\"\"\n    assert users_module._extract_email_from_jwt({\"email\": \"invalid@\"}) is None\n    result = users_module._extract_email_from_jwt(\n        {\"preferred_username\": \"ValidUser@Example.COM\"}\n    )\n    assert result == \"validuser@example.com\"\n\n\n@pytest.mark.asyncio\nasync def test_get_or_create_user_updates_expiry(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Existing web-login users should be returned and their expiry synced.\"\"\"\n    monkeypatch.setattr(users_module, \"TRACK_EXTERNAL_IDP_EXPIRY\", True)\n    invited_checked: dict[str, str] = {}\n\n    def mark_invited(value: str) -> None:\n        invited_checked[\"email\"] = value\n\n    domain_checked: dict[str, str] = {}\n\n    def mark_domain(value: str) -> None:\n        domain_checked[\"email\"] = value\n\n    monkeypatch.setattr(users_module, \"verify_email_is_invited\", mark_invited)\n    monkeypatch.setattr(users_module, \"verify_email_domain\", mark_domain)\n    email = \"jwt-user@example.com\"\n    exp_value = 1_700_000_000\n    payload: dict[str, Any] = {\"email\": email, \"exp\": exp_value}\n\n    existing_user = MagicMock()\n    existing_user.email = email\n    existing_user.oidc_expiry = None\n    existing_user.role.is_web_login.return_value = True\n\n    manager_holder: dict[str, Any] = {}\n\n    class StubUserManager:\n        def __init__(self, _user_db: object) -> None:\n            manager_holder[\"instance\"] = self\n            self.user_db = MagicMock()\n            self.user_db.update = AsyncMock()\n\n        async def get_by_email(self, email_arg: str) -> MagicMock:\n            assert email_arg == email\n            return existing_user\n\n    monkeypatch.setattr(users_module, \"UserManager\", StubUserManager)\n    monkeypatch.setattr(\n        users_module,\n        \"SQLAlchemyUserAdminDB\",\n        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005\n    )\n\n    result = await users_module._get_or_create_user_from_jwt(\n        payload, MagicMock(), MagicMock()\n    )\n\n    assert result is existing_user\n    assert invited_checked[\"email\"] == email\n    assert domain_checked[\"email\"] == email\n    expected_expiry = datetime.fromtimestamp(exp_value, tz=timezone.utc)\n    instance = manager_holder[\"instance\"]\n    instance.user_db.update.assert_awaited_once_with(\n        existing_user, {\"oidc_expiry\": expected_expiry}\n    )\n    assert existing_user.oidc_expiry == expected_expiry\n\n\n@pytest.mark.asyncio\nasync def test_get_or_create_user_skips_inactive(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Inactive users should not be re-authenticated via JWT.\"\"\"\n    monkeypatch.setattr(users_module, \"TRACK_EXTERNAL_IDP_EXPIRY\", True)\n    monkeypatch.setattr(users_module, \"verify_email_is_invited\", lambda _: None)\n    monkeypatch.setattr(users_module, \"verify_email_domain\", lambda *_a, **_kw: None)\n\n    email = \"inactive@example.com\"\n    payload: dict[str, Any] = {\"email\": email}\n\n    existing_user = MagicMock()\n    existing_user.email = email\n    existing_user.is_active = False\n    existing_user.role.is_web_login.return_value = True\n\n    class StubUserManager:\n        def __init__(self, _user_db: object) -> None:\n            self.user_db = MagicMock()\n            self.user_db.update = AsyncMock()\n\n        async def get_by_email(self, email_arg: str) -> MagicMock:\n            assert email_arg == email\n            return existing_user\n\n    monkeypatch.setattr(users_module, \"UserManager\", StubUserManager)\n    monkeypatch.setattr(\n        users_module,\n        \"SQLAlchemyUserAdminDB\",\n        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005\n    )\n\n    result = await users_module._get_or_create_user_from_jwt(\n        payload, MagicMock(), MagicMock()\n    )\n\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_get_or_create_user_handles_race_conditions(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"If provisioning races, newly inactive users should still be blocked.\"\"\"\n    monkeypatch.setattr(users_module, \"TRACK_EXTERNAL_IDP_EXPIRY\", True)\n    monkeypatch.setattr(users_module, \"verify_email_is_invited\", lambda _: None)\n    monkeypatch.setattr(users_module, \"verify_email_domain\", lambda *_a, **_kw: None)\n\n    email = \"race@example.com\"\n    payload: dict[str, Any] = {\"email\": email}\n\n    inactive_user = MagicMock()\n    inactive_user.email = email\n    inactive_user.is_active = False\n    inactive_user.role.is_web_login.return_value = True\n\n    class StubUserManager:\n        def __init__(self, _user_db: object) -> None:\n            self.user_db = MagicMock()\n            self.user_db.update = AsyncMock()\n            self.get_calls = 0\n\n        async def get_by_email(self, email_arg: str) -> MagicMock:\n            assert email_arg == email\n            if self.get_calls == 0:\n                self.get_calls += 1\n                raise users_module.exceptions.UserNotExists()\n            self.get_calls += 1\n            return inactive_user\n\n        async def create(self, *args: Any, **kwargs: Any) -> MagicMock:  # noqa: ARG002\n            raise users_module.exceptions.UserAlreadyExists()\n\n    monkeypatch.setattr(users_module, \"UserManager\", StubUserManager)\n    monkeypatch.setattr(\n        users_module,\n        \"SQLAlchemyUserAdminDB\",\n        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005\n    )\n\n    result = await users_module._get_or_create_user_from_jwt(\n        payload, MagicMock(), MagicMock()\n    )\n\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_get_or_create_user_provisions_new_user(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A brand new JWT user should be provisioned automatically.\"\"\"\n    email = \"new-user@example.com\"\n    payload = {\"email\": email}\n    created_user = MagicMock()\n    created_user.email = email\n    created_user.oidc_expiry = None\n    created_user.role.is_web_login.return_value = True\n\n    monkeypatch.setattr(users_module, \"TRACK_EXTERNAL_IDP_EXPIRY\", False)\n    monkeypatch.setattr(users_module, \"generate_password\", lambda: \"TempPass123!\")\n    monkeypatch.setattr(users_module, \"verify_email_is_invited\", lambda _: None)\n    monkeypatch.setattr(users_module, \"verify_email_domain\", lambda *_a, **_kw: None)\n\n    recorded: dict[str, Any] = {}\n\n    class StubUserManager:\n        def __init__(self, _user_db: object) -> None:\n            recorded[\"instance\"] = self\n            self.user_db = MagicMock()\n            self.user_db.update = AsyncMock()\n\n        async def get_by_email(self, _email: str) -> MagicMock:\n            raise users_module.exceptions.UserNotExists()\n\n        async def create(self, user_create, safe=False, request=None):  # type: ignore[no-untyped-def]  # noqa: ARG002\n            recorded[\"user_create\"] = user_create\n            recorded[\"request\"] = request\n            return created_user\n\n    monkeypatch.setattr(users_module, \"UserManager\", StubUserManager)\n    monkeypatch.setattr(\n        users_module,\n        \"SQLAlchemyUserAdminDB\",\n        lambda *args, **kwargs: MagicMock(),  # noqa: ARG005\n    )\n\n    request = MagicMock()\n    result = await users_module._get_or_create_user_from_jwt(\n        payload, request, MagicMock()\n    )\n\n    assert result is created_user\n    created_payload = recorded[\"user_create\"]\n    assert created_payload.email == email\n    assert created_payload.is_verified is True\n    assert recorded[\"request\"] is request\n\n\n@pytest.mark.asyncio\nasync def test_get_or_create_user_requires_email_claim() -> None:\n    \"\"\"Tokens without a usable email claim should be ignored.\"\"\"\n    result = await users_module._get_or_create_user_from_jwt(\n        {}, MagicMock(), MagicMock()\n    )\n    assert result is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_oauth_refresher.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.ext.asyncio import AsyncSession\n\nfrom onyx.auth.oauth_refresher import _test_expire_oauth_token\nfrom onyx.auth.oauth_refresher import check_and_refresh_oauth_tokens\nfrom onyx.auth.oauth_refresher import check_oauth_account_has_refresh_token\nfrom onyx.auth.oauth_refresher import get_oauth_accounts_requiring_refresh_token\nfrom onyx.auth.oauth_refresher import refresh_oauth_token\nfrom onyx.db.models import OAuthAccount\n\n\n@pytest.mark.asyncio\nasync def test_refresh_oauth_token_success(\n    mock_user: MagicMock,\n    mock_oauth_account: MagicMock,\n    mock_user_manager: MagicMock,\n    mock_db_session: AsyncSession,\n) -> None:\n    \"\"\"Test successful OAuth token refresh.\"\"\"\n    # Mock HTTP client and response\n    mock_response = MagicMock()\n    mock_response.status_code = 200\n    mock_response.json.return_value = {\n        \"access_token\": \"new_token\",\n        \"refresh_token\": \"new_refresh_token\",\n        \"expires_in\": 3600,\n    }\n\n    # Create async mock for the client post method\n    mock_client = AsyncMock()\n    mock_client.post.return_value = mock_response\n\n    # Use fixture values but ensure refresh token exists\n    mock_oauth_account.oauth_name = (\n        \"google\"  # Ensure it's google to match the refresh endpoint\n    )\n    mock_oauth_account.refresh_token = \"old_refresh_token\"\n\n    # Patch at the module level where it's actually being used\n    with patch(\"onyx.auth.oauth_refresher.httpx.AsyncClient\") as client_class_mock:\n        # Configure the context manager\n        client_instance = mock_client\n        client_class_mock.return_value.__aenter__.return_value = client_instance\n\n        # Call the function under test\n        result = await refresh_oauth_token(\n            mock_user, mock_oauth_account, mock_db_session, mock_user_manager\n        )\n\n    # Assertions\n    assert result is True\n    mock_client.post.assert_called_once()\n    mock_user_manager.user_db.update_oauth_account.assert_called_once()\n\n    # Verify token data was updated correctly\n    update_data = mock_user_manager.user_db.update_oauth_account.call_args[0][2]\n    assert update_data[\"access_token\"] == \"new_token\"\n    assert update_data[\"refresh_token\"] == \"new_refresh_token\"\n    assert \"expires_at\" in update_data\n\n\n@pytest.mark.asyncio\nasync def test_refresh_oauth_token_failure(\n    mock_user: MagicMock,\n    mock_oauth_account: MagicMock,\n    mock_user_manager: MagicMock,\n    mock_db_session: AsyncSession,\n) -> bool:\n    \"\"\"Test OAuth token refresh failure due to HTTP error.\"\"\"\n    # Mock HTTP client with error response\n    mock_response = MagicMock()\n    mock_response.status_code = 400  # Simulate error\n\n    # Create async mock for the client post method\n    mock_client = AsyncMock()\n    mock_client.post.return_value = mock_response\n\n    # Ensure refresh token exists and provider is supported\n    mock_oauth_account.oauth_name = \"google\"\n    mock_oauth_account.refresh_token = \"old_refresh_token\"\n\n    # Patch at the module level where it's actually being used\n    with patch(\"onyx.auth.oauth_refresher.httpx.AsyncClient\") as client_class_mock:\n        # Configure the context manager\n        client_class_mock.return_value.__aenter__.return_value = mock_client\n\n        # Call the function under test\n        result = await refresh_oauth_token(\n            mock_user, mock_oauth_account, mock_db_session, mock_user_manager\n        )\n\n    # Assertions\n    assert result is False\n    mock_client.post.assert_called_once()\n    mock_user_manager.user_db.update_oauth_account.assert_not_called()\n    return True\n\n\n@pytest.mark.asyncio\nasync def test_refresh_oauth_token_no_refresh_token(\n    mock_user: MagicMock,\n    mock_oauth_account: MagicMock,\n    mock_user_manager: MagicMock,\n    mock_db_session: AsyncSession,\n) -> None:\n    \"\"\"Test OAuth token refresh when no refresh token is available.\"\"\"\n    # Set refresh token to None\n    mock_oauth_account.refresh_token = None\n    mock_oauth_account.oauth_name = \"google\"\n\n    # No need to mock httpx since it shouldn't be called\n    result = await refresh_oauth_token(\n        mock_user, mock_oauth_account, mock_db_session, mock_user_manager\n    )\n\n    # Assertions\n    assert result is False\n\n\n@pytest.mark.asyncio\nasync def test_check_and_refresh_oauth_tokens(\n    mock_user: MagicMock,\n    mock_user_manager: MagicMock,\n    mock_db_session: AsyncSession,\n) -> None:\n    \"\"\"Test checking and refreshing multiple OAuth tokens.\"\"\"\n    # Create mock user with OAuth accounts\n    now_timestamp = datetime.now(timezone.utc).timestamp()\n\n    # Create an account that needs refreshing (expiring soon)\n    expiring_account = MagicMock(spec=OAuthAccount)\n    expiring_account.oauth_name = \"google\"\n    expiring_account.refresh_token = \"refresh_token_1\"\n    expiring_account.expires_at = now_timestamp + 60  # Expires in 1 minute\n\n    # Create an account that doesn't need refreshing (expires later)\n    valid_account = MagicMock(spec=OAuthAccount)\n    valid_account.oauth_name = \"google\"\n    valid_account.refresh_token = \"refresh_token_2\"\n    valid_account.expires_at = now_timestamp + 3600  # Expires in 1 hour\n\n    # Create an account without a refresh token\n    no_refresh_account = MagicMock(spec=OAuthAccount)\n    no_refresh_account.oauth_name = \"google\"\n    no_refresh_account.refresh_token = None\n    no_refresh_account.expires_at = (\n        now_timestamp + 60\n    )  # Expiring soon but no refresh token\n\n    # Set oauth_accounts on the mock user\n    mock_user.oauth_accounts = [expiring_account, valid_account, no_refresh_account]\n\n    # Mock refresh_oauth_token function\n    with patch(\n        \"onyx.auth.oauth_refresher.refresh_oauth_token\", AsyncMock(return_value=True)\n    ) as mock_refresh:\n        # Call the function under test\n        await check_and_refresh_oauth_tokens(\n            mock_user, mock_db_session, mock_user_manager\n        )\n\n    # Assertions\n    assert mock_refresh.call_count == 1  # Should only refresh the expiring account\n    # Check it was called with the expiring account\n    mock_refresh.assert_called_once_with(\n        mock_user, expiring_account, mock_db_session, mock_user_manager\n    )\n\n\n@pytest.mark.asyncio\nasync def test_get_oauth_accounts_requiring_refresh_token(mock_user: MagicMock) -> None:\n    \"\"\"Test identifying OAuth accounts that need refresh tokens.\"\"\"\n    # Create accounts with and without refresh tokens\n    account_with_token = MagicMock(spec=OAuthAccount)\n    account_with_token.oauth_name = \"google\"\n    account_with_token.refresh_token = \"refresh_token\"\n\n    account_without_token = MagicMock(spec=OAuthAccount)\n    account_without_token.oauth_name = \"google\"\n    account_without_token.refresh_token = None\n\n    second_account_without_token = MagicMock(spec=OAuthAccount)\n    second_account_without_token.oauth_name = \"github\"\n    second_account_without_token.refresh_token = (\n        \"\"  # Empty string should also be treated as missing\n    )\n\n    # Set accounts on user\n    mock_user.oauth_accounts = [\n        account_with_token,\n        account_without_token,\n        second_account_without_token,\n    ]\n\n    # Call the function under test\n    accounts_needing_refresh = await get_oauth_accounts_requiring_refresh_token(\n        mock_user\n    )\n\n    # Assertions\n    assert len(accounts_needing_refresh) == 2\n    assert account_without_token in accounts_needing_refresh\n    assert second_account_without_token in accounts_needing_refresh\n    assert account_with_token not in accounts_needing_refresh\n\n\n@pytest.mark.asyncio\nasync def test_check_oauth_account_has_refresh_token(\n    mock_user: MagicMock, mock_oauth_account: MagicMock\n) -> None:\n    \"\"\"Test checking if an OAuth account has a refresh token.\"\"\"\n    # Test with refresh token\n    mock_oauth_account.refresh_token = \"refresh_token\"\n    has_token = await check_oauth_account_has_refresh_token(\n        mock_user, mock_oauth_account\n    )\n    assert has_token is True\n\n    # Test with None refresh token\n    mock_oauth_account.refresh_token = None\n    has_token = await check_oauth_account_has_refresh_token(\n        mock_user, mock_oauth_account\n    )\n    assert has_token is False\n\n    # Test with empty string refresh token\n    mock_oauth_account.refresh_token = \"\"\n    has_token = await check_oauth_account_has_refresh_token(\n        mock_user, mock_oauth_account\n    )\n    assert has_token is False\n\n\n@pytest.mark.asyncio\nasync def test_expire_oauth_token(\n    mock_user: MagicMock,\n    mock_oauth_account: MagicMock,\n    mock_user_manager: MagicMock,\n    mock_db_session: AsyncSession,\n) -> None:\n    \"\"\"Tests the testing utility function for token expiration.\"\"\"\n    # Set up the mock account\n    mock_oauth_account.oauth_name = \"google\"\n    mock_oauth_account.refresh_token = \"test_refresh_token\"\n    mock_oauth_account.access_token = \"test_access_token\"\n\n    # Call the function under test\n    result = await _test_expire_oauth_token(\n        mock_user,\n        mock_oauth_account,\n        mock_db_session,\n        mock_user_manager,\n        expire_in_seconds=10,\n    )\n\n    # Assertions\n    assert result is True\n    mock_user_manager.user_db.update_oauth_account.assert_called_once()\n\n    # Verify the expiration time was set correctly\n    update_data = mock_user_manager.user_db.update_oauth_account.call_args[0][2]\n    assert \"expires_at\" in update_data\n\n    # Now should be within 10-11 seconds of the set expiration\n    now = datetime.now(timezone.utc).timestamp()\n    assert update_data[\"expires_at\"] - now >= 8.8  # Allow ~1 second for test execution\n    assert update_data[\"expires_at\"] - now <= 11.2  # Allow ~1 second for test execution\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_oidc_pkce.py",
    "content": "from typing import Any\nfrom typing import cast\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom urllib.parse import parse_qs\nfrom urllib.parse import urlparse\n\nfrom fastapi import FastAPI\nfrom fastapi import Response\nfrom fastapi.testclient import TestClient\nfrom fastapi_users.authentication import AuthenticationBackend\nfrom fastapi_users.authentication import CookieTransport\nfrom fastapi_users.jwt import generate_jwt\nfrom httpx_oauth.oauth2 import BaseOAuth2\nfrom httpx_oauth.oauth2 import GetAccessTokenError\n\nfrom onyx.auth.users import CSRF_TOKEN_COOKIE_NAME\nfrom onyx.auth.users import CSRF_TOKEN_KEY\nfrom onyx.auth.users import get_oauth_router\nfrom onyx.auth.users import get_pkce_cookie_name\nfrom onyx.auth.users import PKCE_COOKIE_NAME_PREFIX\nfrom onyx.auth.users import STATE_TOKEN_AUDIENCE\nfrom onyx.error_handling.exceptions import register_onyx_exception_handlers\n\n\nclass _StubOAuthClient:\n    def __init__(self) -> None:\n        self.name = \"openid\"\n        self.authorization_calls: list[dict[str, str | list[str] | None]] = []\n        self.access_token_calls: list[dict[str, str | None]] = []\n\n    async def get_authorization_url(\n        self,\n        redirect_uri: str,\n        state: str | None = None,\n        scope: list[str] | None = None,\n        code_challenge: str | None = None,\n        code_challenge_method: str | None = None,\n    ) -> str:\n        self.authorization_calls.append(\n            {\n                \"redirect_uri\": redirect_uri,\n                \"state\": state,\n                \"scope\": scope,\n                \"code_challenge\": code_challenge,\n                \"code_challenge_method\": code_challenge_method,\n            }\n        )\n        return f\"https://idp.example.com/authorize?state={state}\"\n\n    async def get_access_token(\n        self, code: str, redirect_uri: str, code_verifier: str | None = None\n    ) -> dict[str, str | int]:\n        self.access_token_calls.append(\n            {\n                \"code\": code,\n                \"redirect_uri\": redirect_uri,\n                \"code_verifier\": code_verifier,\n            }\n        )\n        return {\n            \"access_token\": \"oidc_access_token\",\n            \"refresh_token\": \"oidc_refresh_token\",\n            \"expires_at\": 1730000000,\n        }\n\n    async def get_id_email(self, _access_token: str) -> tuple[str, str | None]:\n        return (\"oidc_account_id\", \"oidc_user@example.com\")\n\n\ndef _build_test_client(\n    enable_pkce: bool,\n    login_status_code: int = 302,\n) -> tuple[TestClient, _StubOAuthClient, MagicMock]:\n    oauth_client = _StubOAuthClient()\n    transport = CookieTransport(cookie_name=\"testsession\")\n\n    async def get_strategy() -> MagicMock:\n        return MagicMock()\n\n    backend = AuthenticationBackend(\n        name=\"test_backend\",\n        transport=transport,\n        get_strategy=get_strategy,\n    )\n\n    login_response = Response(status_code=login_status_code)\n    if login_status_code in {301, 302, 303, 307, 308}:\n        login_response.headers[\"location\"] = \"/app\"\n    login_response.set_cookie(\"testsession\", \"session-token\")\n    backend.login = AsyncMock(return_value=login_response)  # type: ignore[method-assign]\n\n    user = MagicMock()\n    user.is_active = True\n    user_manager = MagicMock()\n    user_manager.oauth_callback = AsyncMock(return_value=user)\n    user_manager.on_after_login = AsyncMock()\n\n    async def get_user_manager() -> MagicMock:\n        return user_manager\n\n    router = get_oauth_router(\n        oauth_client=cast(BaseOAuth2[Any], oauth_client),\n        backend=backend,\n        get_user_manager=get_user_manager,\n        state_secret=\"test-secret\",\n        redirect_url=\"http://localhost/auth/oidc/callback\",\n        associate_by_email=True,\n        is_verified_by_default=True,\n        enable_pkce=enable_pkce,\n    )\n    app = FastAPI()\n    app.include_router(router, prefix=\"/auth/oidc\")\n    register_onyx_exception_handlers(app)\n\n    client = TestClient(app, raise_server_exceptions=False)\n    return client, oauth_client, user_manager\n\n\ndef _extract_state_from_authorize_response(response: Any) -> str:\n    auth_url = response.json()[\"authorization_url\"]\n    return parse_qs(urlparse(auth_url).query)[\"state\"][0]\n\n\ndef test_oidc_authorize_omits_pkce_when_flag_disabled() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=False)\n\n    response = client.get(\"/auth/oidc/authorize\")\n\n    assert response.status_code == 200\n    assert oauth_client.authorization_calls[0][\"code_challenge\"] is None\n    assert oauth_client.authorization_calls[0][\"code_challenge_method\"] is None\n    assert \"fastapiusersoauthcsrf\" in response.cookies.keys()\n    assert not any(\n        key.startswith(PKCE_COOKIE_NAME_PREFIX) for key in response.cookies.keys()\n    )\n\n\ndef test_oidc_authorize_adds_pkce_when_flag_enabled() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n\n    response = client.get(\"/auth/oidc/authorize\")\n\n    assert response.status_code == 200\n    assert oauth_client.authorization_calls[0][\"code_challenge\"] is not None\n    assert oauth_client.authorization_calls[0][\"code_challenge_method\"] == \"S256\"\n    assert any(\n        key.startswith(PKCE_COOKIE_NAME_PREFIX) for key in response.cookies.keys()\n    )\n\n\ndef test_oidc_callback_fails_when_pkce_cookie_missing() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    for key in list(client.cookies.keys()):\n        if key.startswith(PKCE_COOKIE_NAME_PREFIX):\n            del client.cookies[key]\n\n    response = client.get(\n        \"/auth/oidc/callback\", params={\"code\": \"abc123\", \"state\": state}\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_rejects_bad_state_before_token_exchange() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    client.get(\"/auth/oidc/authorize\")\n    tampered_state = \"not-a-valid-state-jwt\"\n    client.cookies.set(get_pkce_cookie_name(tampered_state), \"verifier123\")\n\n    response = client.get(\n        \"/auth/oidc/callback\", params={\"code\": \"abc123\", \"state\": tampered_state}\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_rejects_wrongly_signed_state_before_token_exchange() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    client.get(\"/auth/oidc/authorize\")\n    csrf_token = client.cookies.get(CSRF_TOKEN_COOKIE_NAME)\n    assert csrf_token is not None\n    tampered_state = generate_jwt(\n        {\n            \"aud\": STATE_TOKEN_AUDIENCE,\n            CSRF_TOKEN_KEY: csrf_token,\n        },\n        \"wrong-secret\",\n        3600,\n    )\n    client.cookies.set(get_pkce_cookie_name(tampered_state), \"verifier123\")\n\n    response = client.get(\n        \"/auth/oidc/callback\",\n        params={\"code\": \"abc123\", \"state\": tampered_state},\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert response.json()[\"detail\"] == \"ACCESS_TOKEN_DECODE_ERROR\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_rejects_csrf_mismatch_in_pkce_path() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    # Keep PKCE verifier cookie intact, but invalidate CSRF match against state JWT.\n    client.cookies.set(\"fastapiusersoauthcsrf\", \"wrong-csrf-token\")\n\n    response = client.get(\n        \"/auth/oidc/callback\",\n        params={\"code\": \"abc123\", \"state\": state},\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_get_access_token_error_is_400() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n    with patch.object(\n        oauth_client,\n        \"get_access_token\",\n        AsyncMock(side_effect=GetAccessTokenError(\"token exchange failed\")),\n    ):\n        response = client.get(\n            \"/auth/oidc/callback\", params={\"code\": \"abc123\", \"state\": state}\n        )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert response.json()[\"detail\"] == \"Authorization code exchange failed\"\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_cleans_pkce_cookie_on_idp_error_with_state() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    response = client.get(\n        \"/auth/oidc/callback\",\n        params={\"error\": \"access_denied\", \"state\": state},\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert response.json()[\"detail\"] == \"Authorization request failed or was denied\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_cleans_pkce_cookie_on_missing_email() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    with patch.object(\n        oauth_client, \"get_id_email\", AsyncMock(return_value=(\"oidc_account_id\", None))\n    ):\n        response = client.get(\n            \"/auth/oidc/callback\", params={\"code\": \"abc123\", \"state\": state}\n        )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_rejects_wrong_audience_state_before_token_exchange() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=True)\n    client.get(\"/auth/oidc/authorize\")\n    csrf_token = client.cookies.get(CSRF_TOKEN_COOKIE_NAME)\n    assert csrf_token is not None\n    wrong_audience_state = generate_jwt(\n        {\n            \"aud\": \"wrong-audience\",\n            CSRF_TOKEN_KEY: csrf_token,\n        },\n        \"test-secret\",\n        3600,\n    )\n    client.cookies.set(get_pkce_cookie_name(wrong_audience_state), \"verifier123\")\n\n    response = client.get(\n        \"/auth/oidc/callback\",\n        params={\"code\": \"abc123\", \"state\": wrong_audience_state},\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert response.json()[\"detail\"] == \"ACCESS_TOKEN_DECODE_ERROR\"\n    assert oauth_client.access_token_calls == []\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_uses_code_verifier_when_pkce_enabled() -> None:\n    client, oauth_client, user_manager = _build_test_client(enable_pkce=True)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    with patch(\n        \"onyx.auth.users.fetch_ee_implementation_or_noop\",\n        return_value=lambda _email: \"tenant_1\",\n    ):\n        response = client.get(\n            \"/auth/oidc/callback\",\n            params={\"code\": \"abc123\", \"state\": state},\n            follow_redirects=False,\n        )\n\n    assert response.status_code == 302\n    assert response.headers.get(\"location\") == \"/\"\n    assert oauth_client.access_token_calls[0][\"code_verifier\"] is not None\n    user_manager.oauth_callback.assert_awaited_once()\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_works_without_pkce_when_flag_disabled() -> None:\n    client, oauth_client, user_manager = _build_test_client(enable_pkce=False)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    with patch(\n        \"onyx.auth.users.fetch_ee_implementation_or_noop\",\n        return_value=lambda _email: \"tenant_1\",\n    ):\n        response = client.get(\n            \"/auth/oidc/callback\",\n            params={\"code\": \"abc123\", \"state\": state},\n            follow_redirects=False,\n        )\n\n    assert response.status_code == 302\n    assert oauth_client.access_token_calls[0][\"code_verifier\"] is None\n    user_manager.oauth_callback.assert_awaited_once()\n\n\ndef test_oidc_callback_pkce_preserves_redirect_when_backend_login_is_non_redirect() -> (\n    None\n):\n    client, oauth_client, user_manager = _build_test_client(\n        enable_pkce=True,\n        login_status_code=200,\n    )\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    with patch(\n        \"onyx.auth.users.fetch_ee_implementation_or_noop\",\n        return_value=lambda _email: \"tenant_1\",\n    ):\n        response = client.get(\n            \"/auth/oidc/callback\",\n            params={\"code\": \"abc123\", \"state\": state},\n            follow_redirects=False,\n        )\n\n    assert response.status_code == 302\n    assert response.headers.get(\"location\") == \"/\"\n    assert oauth_client.access_token_calls[0][\"code_verifier\"] is not None\n    user_manager.oauth_callback.assert_awaited_once()\n    assert \"Max-Age=0\" in response.headers.get(\"set-cookie\", \"\")\n\n\ndef test_oidc_callback_non_pkce_rejects_csrf_mismatch() -> None:\n    client, oauth_client, _ = _build_test_client(enable_pkce=False)\n    authorize_response = client.get(\"/auth/oidc/authorize\")\n    state = _extract_state_from_authorize_response(authorize_response)\n\n    client.cookies.set(CSRF_TOKEN_COOKIE_NAME, \"wrong-csrf-token\")\n\n    response = client.get(\n        \"/auth/oidc/callback\",\n        params={\"code\": \"abc123\", \"state\": state},\n    )\n\n    assert response.status_code == 400\n    assert response.json()[\"error_code\"] == \"VALIDATION_ERROR\"\n    assert response.json()[\"detail\"] == \"OAUTH_INVALID_STATE\"\n    # NOTE: In the non-PKCE path, oauth2_authorize_callback exchanges the code\n    # before route-body CSRF validation runs. This is a known ordering trade-off.\n    assert oauth_client.access_token_calls\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_permissions.py",
    "content": "\"\"\"\nUnit tests for onyx.auth.permissions — pure logic and FastAPI dependency.\n\"\"\"\n\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.auth.permissions import ALL_PERMISSIONS\nfrom onyx.auth.permissions import get_effective_permissions\nfrom onyx.auth.permissions import require_permission\nfrom onyx.auth.permissions import resolve_effective_permissions\nfrom onyx.db.enums import Permission\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\n\n\n# ---------------------------------------------------------------------------\n# resolve_effective_permissions\n# ---------------------------------------------------------------------------\n\n\nclass TestResolveEffectivePermissions:\n    def test_empty_set(self) -> None:\n        assert resolve_effective_permissions(set()) == set()\n\n    def test_basic_no_implications(self) -> None:\n        result = resolve_effective_permissions({\"basic\"})\n        assert result == {\"basic\"}\n\n    def test_single_implication(self) -> None:\n        result = resolve_effective_permissions({\"add:agents\"})\n        assert result == {\"add:agents\", \"read:agents\"}\n\n    def test_manage_agents_implies_add_and_read(self) -> None:\n        \"\"\"manage:agents directly maps to {add:agents, read:agents}.\"\"\"\n        result = resolve_effective_permissions({\"manage:agents\"})\n        assert result == {\"manage:agents\", \"add:agents\", \"read:agents\"}\n\n    def test_manage_connectors_chain(self) -> None:\n        result = resolve_effective_permissions({\"manage:connectors\"})\n        assert result == {\"manage:connectors\", \"add:connectors\", \"read:connectors\"}\n\n    def test_manage_document_sets(self) -> None:\n        result = resolve_effective_permissions({\"manage:document_sets\"})\n        assert result == {\n            \"manage:document_sets\",\n            \"read:document_sets\",\n            \"read:connectors\",\n        }\n\n    def test_manage_user_groups_implies_all_reads(self) -> None:\n        result = resolve_effective_permissions({\"manage:user_groups\"})\n        assert result == {\n            \"manage:user_groups\",\n            \"read:connectors\",\n            \"read:document_sets\",\n            \"read:agents\",\n            \"read:users\",\n        }\n\n    def test_admin_override(self) -> None:\n        result = resolve_effective_permissions({\"admin\"})\n        assert result == set(ALL_PERMISSIONS)\n\n    def test_admin_with_others(self) -> None:\n        result = resolve_effective_permissions({\"admin\", \"basic\"})\n        assert result == set(ALL_PERMISSIONS)\n\n    def test_multi_group_union(self) -> None:\n        result = resolve_effective_permissions(\n            {\"add:agents\", \"manage:connectors\", \"basic\"}\n        )\n        assert result == {\n            \"basic\",\n            \"add:agents\",\n            \"read:agents\",\n            \"manage:connectors\",\n            \"add:connectors\",\n            \"read:connectors\",\n        }\n\n    def test_toggle_permission_no_implications(self) -> None:\n        result = resolve_effective_permissions({\"read:agent_analytics\"})\n        assert result == {\"read:agent_analytics\"}\n\n    def test_all_permissions_for_admin(self) -> None:\n        result = resolve_effective_permissions({\"admin\"})\n        assert len(result) == len(ALL_PERMISSIONS)\n\n\n# ---------------------------------------------------------------------------\n# get_effective_permissions (expands implied at read time)\n# ---------------------------------------------------------------------------\n\n\nclass TestGetEffectivePermissions:\n    def test_expands_implied_permissions(self) -> None:\n        \"\"\"Column stores only granted; get_effective_permissions expands implied.\"\"\"\n        user = MagicMock()\n        user.effective_permissions = [\"add:agents\"]\n        result = get_effective_permissions(user)\n        assert result == {Permission.ADD_AGENTS, Permission.READ_AGENTS}\n\n    def test_admin_expands_to_all(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = [\"admin\"]\n        result = get_effective_permissions(user)\n        assert result == set(Permission)\n\n    def test_basic_stays_basic(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = [\"basic\"]\n        result = get_effective_permissions(user)\n        assert result == {Permission.BASIC_ACCESS}\n\n    def test_empty_column(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = []\n        result = get_effective_permissions(user)\n        assert result == set()\n\n\n# ---------------------------------------------------------------------------\n# require_permission (FastAPI dependency)\n# ---------------------------------------------------------------------------\n\n\nclass TestRequirePermission:\n    @pytest.mark.asyncio\n    async def test_admin_bypass(self) -> None:\n        \"\"\"Admin stored in column should pass any permission check.\"\"\"\n        user = MagicMock()\n        user.effective_permissions = [\"admin\"]\n\n        dep = require_permission(Permission.MANAGE_CONNECTORS)\n        result = await dep(user=user)\n        assert result is user\n\n    @pytest.mark.asyncio\n    async def test_has_required_permission(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = [\"manage:connectors\"]\n\n        dep = require_permission(Permission.MANAGE_CONNECTORS)\n        result = await dep(user=user)\n        assert result is user\n\n    @pytest.mark.asyncio\n    async def test_implied_permission_passes(self) -> None:\n        \"\"\"manage:connectors implies read:connectors at read time.\"\"\"\n        user = MagicMock()\n        user.effective_permissions = [\"manage:connectors\"]\n\n        dep = require_permission(Permission.READ_CONNECTORS)\n        result = await dep(user=user)\n        assert result is user\n\n    @pytest.mark.asyncio\n    async def test_missing_permission_raises(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = [\"basic\"]\n\n        dep = require_permission(Permission.MANAGE_CONNECTORS)\n        with pytest.raises(OnyxError) as exc_info:\n            await dep(user=user)\n        assert exc_info.value.error_code == OnyxErrorCode.INSUFFICIENT_PERMISSIONS\n\n    @pytest.mark.asyncio\n    async def test_empty_permissions_fails(self) -> None:\n        user = MagicMock()\n        user.effective_permissions = []\n\n        dep = require_permission(Permission.BASIC_ACCESS)\n        with pytest.raises(OnyxError):\n            await dep(user=user)\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_single_tenant_jwt_strategy.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport jwt\nimport pytest\n\nfrom onyx.auth.users import SingleTenantJWTStrategy\n\n\n_TEST_SECRET = \"test-secret-key-for-jwt-unit-tests\"\n_TEST_LIFETIME = 3600  # 1 hour\n\n\ndef _make_strategy(\n    lifetime_seconds: int | None = _TEST_LIFETIME,\n) -> SingleTenantJWTStrategy:\n    return SingleTenantJWTStrategy(\n        secret=_TEST_SECRET,\n        lifetime_seconds=lifetime_seconds,\n    )\n\n\ndef _make_user(user_id: uuid.UUID | None = None) -> MagicMock:\n    user = MagicMock()\n    user.id = user_id or uuid.uuid4()\n    user.email = \"test@example.com\"\n    return user\n\n\ndef _make_user_manager(user: MagicMock) -> MagicMock:\n    manager = MagicMock()\n    manager.parse_id = MagicMock(return_value=user.id)\n    manager.get = AsyncMock(return_value=user)\n    return manager\n\n\n@pytest.mark.asyncio\nasync def test_write_token_produces_valid_jwt() -> None:\n    \"\"\"write_token should return a JWT whose claims contain sub and iat.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n\n    token = await strategy.write_token(user)\n\n    payload = jwt.decode(\n        token, _TEST_SECRET, algorithms=[\"HS256\"], audience=[\"fastapi-users:auth\"]\n    )\n    assert payload[\"sub\"] == str(user.id)\n    assert \"iat\" in payload\n    assert \"exp\" in payload\n\n\n@pytest.mark.asyncio\nasync def test_write_token_iat_is_accurate() -> None:\n    \"\"\"The iat claim should be close to the current time.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n    before = int(datetime.now(timezone.utc).timestamp())\n\n    token = await strategy.write_token(user)\n\n    payload = jwt.decode(\n        token, _TEST_SECRET, algorithms=[\"HS256\"], audience=[\"fastapi-users:auth\"]\n    )\n    after = int(datetime.now(timezone.utc).timestamp())\n    assert before <= payload[\"iat\"] <= after\n\n\n@pytest.mark.asyncio\nasync def test_read_token_returns_user() -> None:\n    \"\"\"read_token should decode the JWT and return the corresponding user.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n    manager = _make_user_manager(user)\n\n    token = await strategy.write_token(user)\n    result = await strategy.read_token(token, manager)\n\n    assert result is user\n    manager.parse_id.assert_called_once_with(str(user.id))\n    manager.get.assert_called_once_with(user.id)\n\n\n@pytest.mark.asyncio\nasync def test_read_token_returns_none_for_none() -> None:\n    \"\"\"read_token should return None when token is None.\"\"\"\n    strategy = _make_strategy()\n    manager = _make_user_manager(_make_user())\n\n    result = await strategy.read_token(None, manager)\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_read_token_returns_none_for_bad_signature() -> None:\n    \"\"\"read_token should return None for a token signed with a different secret.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n    manager = _make_user_manager(user)\n\n    bad_strategy = SingleTenantJWTStrategy(secret=\"wrong-secret\", lifetime_seconds=3600)\n    bad_token = await bad_strategy.write_token(user)\n\n    result = await strategy.read_token(bad_token, manager)\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_read_token_returns_none_for_expired_token() -> None:\n    \"\"\"read_token should return None when the token has expired.\"\"\"\n    # lifetime_seconds=0 doesn't set exp, so we craft a token manually\n    strategy = _make_strategy()\n    user = _make_user()\n    manager = _make_user_manager(user)\n\n    expired_payload = {\n        \"sub\": str(user.id),\n        \"aud\": [\"fastapi-users:auth\"],\n        \"iat\": 1000000000,\n        \"exp\": 1000000001,  # expired long ago\n    }\n    expired_token = jwt.encode(expired_payload, _TEST_SECRET, algorithm=\"HS256\")\n\n    result = await strategy.read_token(expired_token, manager)\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_destroy_token_is_noop() -> None:\n    \"\"\"destroy_token should not raise — JWTs can't be server-side invalidated.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n    token = await strategy.write_token(user)\n\n    # Should complete without error\n    await strategy.destroy_token(token, user)\n\n\n@pytest.mark.asyncio\nasync def test_refresh_token_returns_new_jwt() -> None:\n    \"\"\"refresh_token should issue a fresh JWT (different from the original).\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n\n    original_token = await strategy.write_token(user)\n    refreshed_token = await strategy.refresh_token(original_token, user)\n\n    # Tokens contain different iat/exp, so the encoded strings should differ\n    # (unless generated in the same second — but we check claims to be safe)\n    refreshed_payload = jwt.decode(\n        refreshed_token,\n        _TEST_SECRET,\n        algorithms=[\"HS256\"],\n        audience=[\"fastapi-users:auth\"],\n    )\n    assert refreshed_payload[\"sub\"] == str(user.id)\n    assert \"iat\" in refreshed_payload\n    assert \"exp\" in refreshed_payload\n\n\n@pytest.mark.asyncio\nasync def test_refresh_token_with_none_creates_new() -> None:\n    \"\"\"refresh_token(None, user) should create a brand-new token.\"\"\"\n    strategy = _make_strategy()\n    user = _make_user()\n\n    token = await strategy.refresh_token(None, user)\n\n    payload = jwt.decode(\n        token, _TEST_SECRET, algorithms=[\"HS256\"], audience=[\"fastapi-users:auth\"]\n    )\n    assert payload[\"sub\"] == str(user.id)\n\n\n@pytest.mark.asyncio\nasync def test_write_token_no_lifetime_omits_exp() -> None:\n    \"\"\"When lifetime_seconds is None, the token should have no exp claim.\"\"\"\n    strategy = _make_strategy(lifetime_seconds=None)\n    user = _make_user()\n\n    token = await strategy.write_token(user)\n\n    payload = jwt.decode(\n        token,\n        _TEST_SECRET,\n        algorithms=[\"HS256\"],\n        audience=[\"fastapi-users:auth\"],\n        options={\"verify_exp\": False},\n    )\n    assert payload[\"sub\"] == str(user.id)\n    assert \"exp\" not in payload\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_user_create_schema.py",
    "content": "\"\"\"\nUnit tests for UserCreate schema dict methods.\n\nVerifies that account_type is always included in create_update_dict\nand create_update_dict_superuser.\n\"\"\"\n\nfrom onyx.auth.schemas import UserCreate\nfrom onyx.db.enums import AccountType\n\n\ndef test_create_update_dict_includes_default_account_type() -> None:\n    uc = UserCreate(email=\"a@b.com\", password=\"secret123\")\n    d = uc.create_update_dict()\n    assert d[\"account_type\"] == AccountType.STANDARD\n\n\ndef test_create_update_dict_includes_explicit_account_type() -> None:\n    uc = UserCreate(\n        email=\"a@b.com\", password=\"secret123\", account_type=AccountType.SERVICE_ACCOUNT\n    )\n    d = uc.create_update_dict()\n    assert d[\"account_type\"] == AccountType.STANDARD\n\n\ndef test_create_update_dict_superuser_includes_account_type() -> None:\n    uc = UserCreate(email=\"a@b.com\", password=\"secret123\")\n    d = uc.create_update_dict_superuser()\n    assert d[\"account_type\"] == AccountType.STANDARD\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_user_default_pins.py",
    "content": "from unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom sqlalchemy.ext.asyncio import AsyncSession\n\nfrom onyx.auth.users import UserManager\n\n\ndef _build_db_session(return_ids: list[int]) -> MagicMock:\n    scalar_result = MagicMock()\n    scalar_result.all.return_value = return_ids\n    execute_result = MagicMock()\n    execute_result.scalars.return_value = scalar_result\n\n    db_session = MagicMock(spec=AsyncSession)\n    db_session.execute = AsyncMock(return_value=execute_result)\n    return db_session\n\n\n@pytest.mark.asyncio\nasync def test_assign_default_pinned_assistants_populates_ids(\n    mock_user: MagicMock,\n) -> None:\n    user_db = MagicMock()\n    user_db.update = AsyncMock()\n\n    user_manager = UserManager(user_db)\n\n    mock_user.pinned_assistants = None\n\n    db_session = _build_db_session([1, 5, 10])\n\n    await user_manager._assign_default_pinned_assistants(mock_user, db_session)\n\n    assert db_session.execute.await_count == 1\n    user_db.update.assert_awaited_once()\n    await_args = user_db.update.await_args\n    assert await_args\n    assert await_args.args == (mock_user, {\"pinned_assistants\": [1, 5, 10]})\n    assert mock_user.pinned_assistants == [1, 5, 10]\n\n\n@pytest.mark.asyncio\nasync def test_assign_default_pinned_assistants_skips_when_no_defaults(\n    mock_user: MagicMock,\n) -> None:\n    user_db = MagicMock()\n    user_db.update = AsyncMock()\n\n    user_manager = UserManager(user_db)\n    mock_user.pinned_assistants = None\n\n    db_session = _build_db_session([])\n\n    await user_manager._assign_default_pinned_assistants(mock_user, db_session)\n\n    assert db_session.execute.await_count == 1\n    user_db.update.assert_not_awaited()\n    assert mock_user.pinned_assistants is None\n\n\n@pytest.mark.asyncio\nasync def test_assign_default_pinned_assistants_noop_if_already_set(\n    mock_user: MagicMock,\n) -> None:\n    user_db = MagicMock()\n    user_db.update = AsyncMock()\n\n    user_manager = UserManager(user_db)\n    mock_user.pinned_assistants = [3]\n\n    db_session = _build_db_session([1, 2, 3])\n\n    await user_manager._assign_default_pinned_assistants(mock_user, db_session)\n\n    user_db.update.assert_not_awaited()\n    assert db_session.execute.await_count == 0\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_user_registration.py",
    "content": "\"\"\"\nUnit tests for the user registration workflow in UserManager.create().\n\nTests cover:\n1. Disposable email validation (before tenant provisioning)\n2. Multi-tenant vs single-tenant invite logic\n3. SAML/OIDC SSO bypass behavior\n4. Empty whitelist vs populated whitelist scenarios\n5. Case-insensitive email matching for existing user checks\n\"\"\"\n\nfrom types import TracebackType\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.auth.schemas import UserCreate\nfrom onyx.auth.users import UserManager\nfrom onyx.configs.constants import AuthType\nfrom onyx.error_handling.exceptions import OnyxError\n\n# Note: Only async test methods are marked with @pytest.mark.asyncio individually\n# to avoid warnings on synchronous tests\n\n\n@pytest.fixture\ndef mock_user_create() -> UserCreate:\n    \"\"\"Create a mock UserCreate object for testing.\"\"\"\n    return UserCreate(\n        email=\"newuser@example.com\",\n        password=\"SecurePassword123!\",\n        is_verified=False,\n    )\n\n\n@pytest.fixture\ndef mock_async_session() -> MagicMock:\n    \"\"\"Create a mock async database session.\"\"\"\n    session = MagicMock()\n    session.execute = AsyncMock()\n    session.scalar = AsyncMock()\n    session.commit = AsyncMock()\n    session.rollback = AsyncMock()\n    return session\n\n\nclass _AsyncSessionContextManager:\n    def __init__(self, session: MagicMock) -> None:\n        self._session = session\n\n    async def __aenter__(self) -> MagicMock:\n        return self._session\n\n    async def __aexit__(\n        self,\n        exc_type: type[BaseException] | None,\n        exc: BaseException | None,\n        tb: TracebackType | None,\n    ) -> bool:\n        return False\n\n\ndef _mock_user_manager_methods(user_manager: UserManager) -> None:\n    setattr(user_manager, \"validate_password\", AsyncMock())\n    setattr(user_manager, \"_assign_default_pinned_assistants\", AsyncMock())\n\n\nclass TestDisposableEmailValidation:\n    \"\"\"Test disposable email validation before tenant provisioning.\"\"\"\n\n    @pytest.mark.asyncio\n    @patch(\"onyx.auth.users.is_disposable_email\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    async def test_blocks_disposable_email_before_tenant_provision(\n        self,\n        mock_get_user_count: MagicMock,  # noqa: ARG002\n        mock_session_manager: MagicMock,  # noqa: ARG002\n        mock_fetch_ee: MagicMock,\n        mock_is_disposable: MagicMock,\n        mock_user_create: UserCreate,\n    ) -> None:\n        \"\"\"Disposable emails should be blocked before tenant provisioning.\"\"\"\n        # Setup\n        mock_is_disposable.return_value = True\n        user_manager = UserManager(MagicMock())\n\n        # Execute & Assert\n        with pytest.raises(OnyxError) as exc:\n            await user_manager.create(mock_user_create)\n\n        assert exc.value.status_code == 400\n        assert \"Disposable email\" in exc.value.detail\n        # Verify we never got to tenant provisioning\n        mock_fetch_ee.assert_not_called()\n\n    @pytest.mark.asyncio\n    @patch(\"onyx.auth.users.is_disposable_email\")\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.MULTI_TENANT\", False)\n    async def test_allows_valid_email_domain(\n        self,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,\n        mock_is_disposable: MagicMock,\n        mock_user_create: UserCreate,\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"Valid emails should pass domain validation.\"\"\"\n        # Setup\n        mock_is_disposable.return_value = False\n        mock_verify_domain.return_value = None  # No exception = valid\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"default_schema\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_get_user_count.return_value = 0\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        user_manager.user_db = mock_user_db\n\n        try:\n            await user_manager.create(mock_user_create)\n        except Exception:\n            pass  # We just want to verify domain check passed\n\n        # Verify domain validation was called\n        mock_verify_domain.assert_called_once_with(\n            mock_user_create.email, is_registration=True\n        )\n\n\nclass TestMultiTenantInviteLogic:\n    \"\"\"Test invite logic for multi-tenant environments.\"\"\"\n\n    @patch(\"onyx.auth.users.SQLAlchemyUserAdminDB\")\n    @patch(\"onyx.auth.users.is_disposable_email\", return_value=False)\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.verify_email_is_invited\")\n    @patch(\"onyx.auth.users.MULTI_TENANT\", True)\n    @patch(\"onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR\")\n    @pytest.mark.asyncio\n    async def test_first_user_no_invite_required(\n        self,\n        mock_context_var: MagicMock,\n        mock_verify_invited: MagicMock,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,  # noqa: ARG002\n        mock_is_disposable: MagicMock,  # noqa: ARG002\n        mock_sql_alchemy_db: MagicMock,\n        mock_user_create: UserCreate,\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"First user in tenant should not require invite.\"\"\"\n        # Setup: No existing users\n        mock_get_user_count.return_value = 0\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"tenant_123\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_context_var.set.return_value = MagicMock()\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        mock_sql_alchemy_db.return_value = mock_user_db\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        try:\n            await user_manager.create(mock_user_create)\n        except Exception:\n            pass\n\n        # Verify invite check was NOT called (user_count = 0)\n        mock_verify_invited.assert_not_called()\n\n    @patch(\"onyx.auth.users.SQLAlchemyUserAdminDB\")\n    @patch(\"onyx.auth.users.is_disposable_email\", return_value=False)\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.verify_email_is_invited\")\n    @patch(\"onyx.auth.users.MULTI_TENANT\", True)\n    @patch(\"onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR\")\n    @pytest.mark.asyncio\n    async def test_subsequent_user_requires_invite(\n        self,\n        mock_context_var: MagicMock,\n        mock_verify_invited: MagicMock,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,  # noqa: ARG002\n        mock_is_disposable: MagicMock,  # noqa: ARG002\n        mock_sql_alchemy_db: MagicMock,\n        mock_user_create: UserCreate,\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"Subsequent users in existing tenant should require invite.\"\"\"\n        # Setup: Existing tenant with users\n        mock_get_user_count.return_value = 5\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"tenant_123\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_context_var.set.return_value = MagicMock()\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        mock_sql_alchemy_db.return_value = mock_user_db\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        try:\n            await user_manager.create(mock_user_create)\n        except Exception:\n            pass\n\n        # Verify invite check WAS called (user_count > 0)\n        mock_verify_invited.assert_called_once_with(mock_user_create.email)\n\n\nclass TestSingleTenantInviteLogic:\n    \"\"\"Test invite logic for single-tenant environments.\"\"\"\n\n    @patch(\"onyx.auth.users.is_disposable_email\", return_value=False)\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.verify_email_is_invited\")\n    @patch(\"onyx.auth.users.MULTI_TENANT\", False)\n    @patch(\"onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR\")\n    @pytest.mark.asyncio\n    async def test_always_checks_invite_list(\n        self,\n        mock_context_var: MagicMock,\n        mock_verify_invited: MagicMock,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,  # noqa: ARG002\n        mock_is_disposable: MagicMock,  # noqa: ARG002\n        mock_user_create: UserCreate,\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"Single-tenant should always check invite list.\"\"\"\n        # Setup\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"default_schema\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_get_user_count.return_value = 0\n        mock_context_var.set.return_value = MagicMock()\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        user_manager.user_db = mock_user_db\n\n        try:\n            await user_manager.create(mock_user_create)\n        except Exception:\n            pass\n\n        # Verify invite check was called\n        mock_verify_invited.assert_called_once_with(mock_user_create.email)\n\n\nclass TestSAMLOIDCBehavior:\n    \"\"\"Test SSO (SAML/OIDC) bypass of invite whitelist.\"\"\"\n\n    @pytest.mark.parametrize(\"auth_type\", [AuthType.SAML, AuthType.OIDC])\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=True)\n    @patch(\"onyx.auth.users.AUTH_TYPE\")\n    def test_sso_bypasses_whitelist(\n        self,\n        mock_auth_type: MagicMock,\n        _mock_invite_only: MagicMock,\n        mock_get_invited: MagicMock,\n        auth_type: AuthType,\n    ) -> None:\n        \"\"\"SAML/OIDC should bypass invite whitelist.\"\"\"\n        from onyx.auth.users import verify_email_is_invited\n\n        # Setup\n        mock_auth_type.return_value = auth_type\n        mock_get_invited.return_value = [\"allowed@example.com\"]\n\n        # Execute - should not raise even with populated whitelist\n        with patch(\"onyx.auth.users.AUTH_TYPE\", auth_type):\n            verify_email_is_invited(\"newuser@example.com\")  # Should not raise\n\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=True)\n    @patch(\"onyx.auth.users.AUTH_TYPE\", AuthType.BASIC)\n    def test_basic_auth_enforces_whitelist(\n        self,\n        mock_get_invited: MagicMock,\n        _mock_invite_only: MagicMock,\n    ) -> None:\n        \"\"\"Basic auth should enforce invite whitelist.\"\"\"\n        from onyx.auth.users import verify_email_is_invited\n\n        # Setup\n        mock_get_invited.return_value = [\"allowed@example.com\"]\n\n        # Execute & Assert\n        with pytest.raises(OnyxError) as exc:\n            verify_email_is_invited(\"newuser@example.com\")\n        assert exc.value.status_code == 403\n\n\nclass TestWhitelistBehavior:\n    \"\"\"Test invite whitelist scenarios.\"\"\"\n\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=False)\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.AUTH_TYPE\", AuthType.BASIC)\n    def test_empty_whitelist_allows_all(\n        self,\n        mock_get_invited: MagicMock,\n        _mock_invite_only: MagicMock,\n    ) -> None:\n        \"\"\"Empty whitelist should allow all users.\"\"\"\n        from onyx.auth.users import verify_email_is_invited\n\n        # Setup: Empty whitelist\n        mock_get_invited.return_value = []\n\n        # Execute - should not raise\n        verify_email_is_invited(\"anyone@example.com\")\n\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=False)\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.AUTH_TYPE\", AuthType.BASIC)\n    def test_invite_only_disabled_allows_non_invited_users(\n        self,\n        mock_get_invited: MagicMock,\n        _mock_invite_only: MagicMock,\n    ) -> None:\n        from onyx.auth.users import verify_email_is_invited\n\n        mock_get_invited.return_value = [\"allowed@example.com\"]\n\n        verify_email_is_invited(\"notallowed@example.com\")\n\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=True)\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.AUTH_TYPE\", AuthType.BASIC)\n    def test_whitelist_blocks_non_invited(\n        self,\n        mock_get_invited: MagicMock,\n        _mock_invite_only: MagicMock,\n    ) -> None:\n        \"\"\"Populated whitelist should block non-invited users.\"\"\"\n        from onyx.auth.users import verify_email_is_invited\n\n        # Setup\n        mock_get_invited.return_value = [\"allowed@example.com\"]\n\n        # Execute & Assert\n        with pytest.raises(OnyxError) as exc:\n            verify_email_is_invited(\"notallowed@example.com\")\n\n        assert exc.value.status_code == 403\n\n    @patch(\"onyx.auth.users.workspace_invite_only_enabled\", return_value=True)\n    @patch(\"onyx.auth.users.get_invited_users\")\n    @patch(\"onyx.auth.users.AUTH_TYPE\", AuthType.BASIC)\n    def test_whitelist_allows_invited_case_insensitive(\n        self,\n        mock_get_invited: MagicMock,\n        _mock_invite_only: MagicMock,\n    ) -> None:\n        \"\"\"Whitelist should match emails case-insensitively.\"\"\"\n        from onyx.auth.users import verify_email_is_invited\n\n        # Setup\n        mock_get_invited.return_value = [\"allowed@example.com\"]\n\n        # Execute - should not raise (case-insensitive match)\n        verify_email_is_invited(\"ALLOWED@EXAMPLE.COM\")\n        verify_email_is_invited(\"Allowed@Example.Com\")\n\n\nclass TestSeatLimitEnforcement:\n    \"\"\"Seat limits block new user creation on self-hosted deployments.\"\"\"\n\n    def test_adding_user_fails_when_seats_full(self) -> None:\n        from onyx.auth.users import enforce_seat_limit\n\n        seat_result = MagicMock(available=False, error_message=\"Seat limit reached\")\n        with patch(\n            \"onyx.auth.users.fetch_ee_implementation_or_noop\",\n            return_value=lambda *_a, **_kw: seat_result,\n        ):\n            with pytest.raises(OnyxError) as exc:\n                enforce_seat_limit(MagicMock())\n\n            assert exc.value.status_code == 402\n\n    def test_seat_limit_only_enforced_for_self_hosted(self) -> None:\n        from onyx.auth.users import enforce_seat_limit\n\n        with patch(\"onyx.auth.users.MULTI_TENANT\", True):\n            enforce_seat_limit(MagicMock())  # should not raise\n\n\nclass TestCaseInsensitiveEmailMatching:\n    \"\"\"Test case-insensitive email matching for existing user checks.\"\"\"\n\n    @patch(\"onyx.auth.users.is_disposable_email\", return_value=False)\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.SQLAlchemyUserAdminDB\")\n    @patch(\"onyx.auth.users.MULTI_TENANT\", True)\n    @patch(\"onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR\")\n    @pytest.mark.asyncio\n    async def test_existing_user_check_case_insensitive(\n        self,\n        mock_context_var: MagicMock,\n        mock_sql_alchemy_db: MagicMock,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,\n        mock_is_disposable: MagicMock,  # noqa: ARG002\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"Existing user check should use case-insensitive email comparison.\"\"\"\n\n        # Setup\n        mock_get_user_count.return_value = 0  # First user - no invite needed\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"tenant_123\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_context_var.set.return_value = MagicMock()\n\n        # Create a result mock\n        result_mock = MagicMock()\n        result_mock.scalar_one_or_none.return_value = None\n        mock_async_session.execute.return_value = result_mock\n\n        user_create = UserCreate(\n            email=\"NewUser@Example.COM\",\n            password=\"SecurePassword123!\",\n            is_verified=False,\n        )\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        mock_sql_alchemy_db.return_value = mock_user_db\n\n        try:\n            await user_manager.create(user_create)\n        except Exception:\n            pass\n\n        # Verify flow\n        mock_verify_domain.assert_called_once_with(\n            user_create.email, is_registration=True\n        )\n\n    @patch(\"onyx.auth.users.is_disposable_email\")\n    @patch(\"onyx.auth.users.verify_email_domain\")\n    @patch(\"onyx.auth.users.fetch_ee_implementation_or_noop\")\n    @patch(\"onyx.auth.users.get_async_session_context_manager\")\n    @patch(\"onyx.auth.users.get_user_count\", new_callable=AsyncMock)\n    @patch(\"onyx.auth.users.verify_email_is_invited\")\n    @patch(\"onyx.auth.users.SQLAlchemyUserAdminDB\")\n    @patch(\"onyx.auth.users.MULTI_TENANT\", True)\n    @patch(\"onyx.auth.users.CURRENT_TENANT_ID_CONTEXTVAR\")\n    @pytest.mark.asyncio\n    async def test_full_registration_flow_existing_tenant(\n        self,\n        mock_context_var: MagicMock,\n        mock_sql_alchemy_db: MagicMock,\n        mock_verify_invited: MagicMock,\n        mock_get_user_count: MagicMock,\n        mock_session_manager: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_verify_domain: MagicMock,\n        mock_is_disposable: MagicMock,\n        mock_user_create: UserCreate,\n        mock_async_session: MagicMock,\n    ) -> None:\n        \"\"\"Test complete flow: valid email, existing tenant, invite required.\"\"\"\n        # Setup: All validations pass, existing tenant\n        mock_is_disposable.return_value = False\n        mock_verify_domain.return_value = None\n        mock_get_user_count.return_value = 10  # Existing tenant\n        mock_fetch_ee.return_value = AsyncMock(return_value=\"existing_tenant_789\")\n        mock_session_manager.return_value = _AsyncSessionContextManager(\n            mock_async_session\n        )\n        mock_context_var.set.return_value = MagicMock()\n\n        user_manager = UserManager(MagicMock())\n        _mock_user_manager_methods(user_manager)\n\n        # Mock the user_db to avoid actual database operations\n        mock_user_db = MagicMock()\n        mock_user_db.create = AsyncMock(return_value=MagicMock(id=\"test-id\"))\n        mock_sql_alchemy_db.return_value = mock_user_db\n\n        try:\n            await user_manager.create(mock_user_create)\n        except Exception:\n            pass\n\n        # Verify flow\n        mock_verify_domain.assert_called_once_with(\n            mock_user_create.email, is_registration=True\n        )\n        mock_verify_invited.assert_called_once()  # Existing tenant = invite needed\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_verify_auth_setting.py",
    "content": "from unittest.mock import MagicMock\n\nimport pytest\n\nimport onyx.auth.users as users\nfrom onyx.auth.users import verify_auth_setting\nfrom onyx.configs.constants import AuthType\n\n\ndef test_verify_auth_setting_raises_for_cloud(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Cloud auth type is not valid for self-hosted deployments.\"\"\"\n    monkeypatch.setenv(\"AUTH_TYPE\", \"cloud\")\n\n    with pytest.raises(ValueError, match=\"'cloud' is not a valid auth type\"):\n        verify_auth_setting()\n\n\ndef test_verify_auth_setting_warns_for_disabled(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Disabled auth type logs a deprecation warning.\"\"\"\n    monkeypatch.setenv(\"AUTH_TYPE\", \"disabled\")\n\n    mock_logger = MagicMock()\n    monkeypatch.setattr(users, \"logger\", mock_logger)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.BASIC)\n\n    verify_auth_setting()\n\n    mock_logger.warning.assert_called_once()\n    assert \"no longer supported\" in mock_logger.warning.call_args[0][0]\n\n\n@pytest.mark.parametrize(\n    \"auth_type\",\n    [AuthType.BASIC, AuthType.GOOGLE_OAUTH, AuthType.OIDC, AuthType.SAML],\n)\ndef test_verify_auth_setting_valid_auth_types(\n    monkeypatch: pytest.MonkeyPatch,\n    auth_type: AuthType,\n) -> None:\n    \"\"\"Valid auth types work without errors or warnings.\"\"\"\n    monkeypatch.setenv(\"AUTH_TYPE\", auth_type.value)\n\n    mock_logger = MagicMock()\n    monkeypatch.setattr(users, \"logger\", mock_logger)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", auth_type)\n\n    verify_auth_setting()\n\n    mock_logger.warning.assert_not_called()\n    mock_logger.notice.assert_called_once_with(f\"Using Auth Type: {auth_type.value}\")\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_verify_email_domain.py",
    "content": "import pytest\n\nimport onyx.auth.users as users\nfrom onyx.auth.users import verify_email_domain\nfrom onyx.configs.constants import AuthType\nfrom onyx.error_handling.exceptions import OnyxError\n\n\ndef test_verify_email_domain_allows_case_insensitive_match(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    # Configure whitelist to lowercase while email has uppercase domain\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [\"example.com\"], raising=False)\n\n    # Should not raise\n    verify_email_domain(\"User@EXAMPLE.COM\")\n\n\ndef test_verify_email_domain_rejects_non_whitelisted_domain(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [\"example.com\"], raising=False)\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_domain(\"user@another.com\")\n    assert exc.value.status_code == 400\n    assert \"Email domain is not valid\" in exc.value.detail\n\n\ndef test_verify_email_domain_invalid_email_format(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [\"example.com\"], raising=False)\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_domain(\"userexample.com\")  # missing '@'\n    assert exc.value.status_code == 400\n    assert \"Email is not valid\" in exc.value.detail\n\n\ndef test_verify_email_domain_rejects_plus_addressing(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_domain(\"user+tag@gmail.com\")\n    assert exc.value.status_code == 400\n    assert \"'+'\" in exc.value.detail\n\n\ndef test_verify_email_domain_allows_plus_for_onyx_app(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    # Should not raise for onyx.app domain\n    verify_email_domain(\"user+tag@onyx.app\")\n\n\ndef test_verify_email_domain_rejects_dotted_gmail_on_registration(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_domain(\"first.last@gmail.com\", is_registration=True)\n    assert exc.value.status_code == 400\n    assert \"'.'\" in exc.value.detail\n\n\ndef test_verify_email_domain_dotted_gmail_allowed_when_not_registration(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    # Existing user signing in — should not be blocked\n    verify_email_domain(\"first.last@gmail.com\", is_registration=False)\n\n\ndef test_verify_email_domain_allows_dotted_non_gmail_on_registration(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    verify_email_domain(\"first.last@example.com\", is_registration=True)\n\n\ndef test_verify_email_domain_dotted_gmail_allowed_when_not_cloud(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.BASIC, raising=False)\n\n    verify_email_domain(\"first.last@gmail.com\", is_registration=True)\n\n\ndef test_verify_email_domain_rejects_googlemail(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"VALID_EMAIL_DOMAINS\", [], raising=False)\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.CLOUD, raising=False)\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_domain(\"user@googlemail.com\")\n    assert exc.value.status_code == 400\n    assert \"gmail.com\" in exc.value.detail\n"
  },
  {
    "path": "backend/tests/unit/onyx/auth/test_verify_email_invite.py",
    "content": "import pytest\n\nimport onyx.auth.users as users\nfrom onyx.auth.users import verify_email_is_invited\nfrom onyx.configs.constants import AuthType\nfrom onyx.error_handling.exceptions import OnyxError\n\n\n@pytest.mark.parametrize(\"auth_type\", [AuthType.SAML, AuthType.OIDC])\ndef test_verify_email_is_invited_skips_whitelist_for_sso(\n    monkeypatch: pytest.MonkeyPatch, auth_type: AuthType\n) -> None:\n    monkeypatch.setattr(users, \"AUTH_TYPE\", auth_type, raising=False)\n    monkeypatch.setattr(users, \"workspace_invite_only_enabled\", lambda: True)\n    monkeypatch.setattr(\n        users,\n        \"get_invited_users\",\n        lambda: [\"allowed@example.com\"],\n        raising=False,\n    )\n\n    # Should not raise even though whitelist is populated\n    verify_email_is_invited(\"newuser@example.com\")\n\n\ndef test_verify_email_is_invited_enforced_for_basic_auth(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.BASIC, raising=False)\n    monkeypatch.setattr(users, \"workspace_invite_only_enabled\", lambda: True)\n    monkeypatch.setattr(\n        users,\n        \"get_invited_users\",\n        lambda: [\"allowed@example.com\"],\n        raising=False,\n    )\n\n    with pytest.raises(OnyxError) as exc:\n        verify_email_is_invited(\"newuser@example.com\")\n    assert exc.value.status_code == 403\n\n\ndef test_verify_email_is_invited_skipped_when_invite_only_disabled(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(users, \"AUTH_TYPE\", AuthType.BASIC, raising=False)\n    monkeypatch.setattr(users, \"workspace_invite_only_enabled\", lambda: False)\n    monkeypatch.setattr(\n        users,\n        \"get_invited_users\",\n        lambda: [\"allowed@example.com\"],\n        raising=False,\n    )\n\n    verify_email_is_invited(\"newuser@example.com\")\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/tenant_provisioning/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/tenant_provisioning/test_check_available_tenants.py",
    "content": "\"\"\"\nUnit tests for the check_available_tenants task.\n\nTests verify:\n- Provisioning loop calls pre_provision_tenant the correct number of times\n- Batch size is capped at _MAX_TENANTS_PER_RUN\n- A failure in one provisioning call does not stop subsequent calls\n- No provisioning happens when pool is already full\n- TARGET_AVAILABLE_TENANTS is respected\n\"\"\"\n\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (\n    _MAX_TENANTS_PER_RUN,\n)\nfrom ee.onyx.background.celery.tasks.tenant_provisioning.tasks import (\n    check_available_tenants,\n)\n\n# Access the underlying function directly, bypassing Celery's task wrapper\n# which injects `self` as the first argument when bind=True.\n_check_available_tenants = check_available_tenants.run\n\n\n@pytest.fixture()\ndef _enable_multi_tenant(monkeypatch: pytest.MonkeyPatch) -> None:\n    monkeypatch.setattr(\n        \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT\",\n        True,\n    )\n\n\n@pytest.fixture()\ndef mock_redis(monkeypatch: pytest.MonkeyPatch) -> MagicMock:\n    mock_lock = MagicMock()\n    mock_lock.acquire.return_value = True\n\n    mock_client = MagicMock()\n    mock_client.lock.return_value = mock_lock\n\n    monkeypatch.setattr(\n        \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_redis_client\",\n        lambda tenant_id: mock_client,  # noqa: ARG005\n    )\n    return mock_client\n\n\n@pytest.fixture()\ndef mock_pre_provision(monkeypatch: pytest.MonkeyPatch) -> MagicMock:\n    mock = MagicMock(return_value=True)\n    monkeypatch.setattr(\n        \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.pre_provision_tenant\",\n        mock,\n    )\n    return mock\n\n\ndef _mock_available_count(monkeypatch: pytest.MonkeyPatch, count: int) -> None:\n    \"\"\"Set up the DB session mock to return a specific available tenant count.\"\"\"\n    mock_session = MagicMock()\n    mock_session.__enter__ = MagicMock(return_value=mock_session)\n    mock_session.__exit__ = MagicMock(return_value=False)\n    mock_session.query.return_value.count.return_value = count\n\n    monkeypatch.setattr(\n        \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.get_session_with_shared_schema\",\n        lambda: mock_session,\n    )\n\n\n@pytest.mark.usefixtures(\"_enable_multi_tenant\", \"mock_redis\")\nclass TestCheckAvailableTenants:\n    def test_provisions_all_needed_tenants(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"When pool has 2 and target is 5, should provision 3.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            5,\n        )\n        _mock_available_count(monkeypatch, 2)\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 3\n\n    def test_batch_capped_at_max_per_run(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"When pool needs more than _MAX_TENANTS_PER_RUN, cap the batch.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            20,\n        )\n        _mock_available_count(monkeypatch, 0)\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == _MAX_TENANTS_PER_RUN\n\n    def test_no_provisioning_when_pool_full(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"When pool already meets target, should not provision anything.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            5,\n        )\n        _mock_available_count(monkeypatch, 5)\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 0\n\n    def test_no_provisioning_when_pool_exceeds_target(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"When pool exceeds target, should not provision anything.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            5,\n        )\n        _mock_available_count(monkeypatch, 8)\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 0\n\n    def test_failure_does_not_stop_remaining(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"If one provisioning fails, the rest should still be attempted.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            5,\n        )\n        _mock_available_count(monkeypatch, 0)\n\n        # Fail on calls 2 and 4 (1-indexed)\n        call_count = 0\n\n        def side_effect() -> bool:\n            nonlocal call_count\n            call_count += 1\n            if call_count in (2, 4):\n                raise RuntimeError(\"provisioning failed\")\n            return True\n\n        mock_pre_provision.side_effect = side_effect\n\n        _check_available_tenants()\n\n        # All 5 should be attempted despite 2 failures\n        assert mock_pre_provision.call_count == 5\n\n    def test_skips_when_not_multi_tenant(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"Should not provision when multi-tenancy is disabled.\"\"\"\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.MULTI_TENANT\",\n            False,\n        )\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 0\n\n    def test_skips_when_lock_not_acquired(\n        self,\n        mock_redis: MagicMock,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"Should skip when another instance holds the lock.\"\"\"\n        mock_redis.lock.return_value.acquire.return_value = False\n\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 0\n\n    def test_lock_release_failure_does_not_raise(\n        self,\n        monkeypatch: pytest.MonkeyPatch,\n        mock_redis: MagicMock,\n        mock_pre_provision: MagicMock,\n    ) -> None:\n        \"\"\"LockNotOwnedError on release should be caught, not propagated.\"\"\"\n        from redis.exceptions import LockNotOwnedError\n\n        monkeypatch.setattr(\n            \"ee.onyx.background.celery.tasks.tenant_provisioning.tasks.TARGET_AVAILABLE_TENANTS\",\n            5,\n        )\n        _mock_available_count(monkeypatch, 4)\n\n        mock_redis.lock.return_value.release.side_effect = LockNotOwnedError(\"expired\")\n\n        # Should not raise\n        _check_available_tenants()\n\n        assert mock_pre_provision.call_count == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/test_hierarchyfetching_queue.py",
    "content": "from unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.background.celery.tasks.hierarchyfetching.tasks import (\n    _connector_supports_hierarchy_fetching,\n)\nfrom onyx.background.celery.tasks.hierarchyfetching.tasks import (\n    check_for_hierarchy_fetching,\n)\nfrom onyx.connectors.factory import ConnectorMissingException\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.interfaces import HierarchyConnector\nfrom onyx.connectors.interfaces import HierarchyOutput\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\n\nTASKS_MODULE = \"onyx.background.celery.tasks.hierarchyfetching.tasks\"\n\n\nclass _NonHierarchyConnector(BaseConnector):\n    def load_credentials(self, credentials: dict) -> dict | None:  # noqa: ARG002\n        return None\n\n\nclass _HierarchyCapableConnector(HierarchyConnector):\n    def load_credentials(self, credentials: dict) -> dict | None:  # noqa: ARG002\n        return None\n\n    def load_hierarchy(\n        self,\n        start: SecondsSinceUnixEpoch,  # noqa: ARG002\n        end: SecondsSinceUnixEpoch,  # noqa: ARG002\n    ) -> HierarchyOutput:\n        return\n        yield\n\n\ndef _build_cc_pair_mock() -> MagicMock:\n    cc_pair = MagicMock()\n    cc_pair.connector.source = \"mock-source\"\n    cc_pair.connector.input_type = \"mock-input-type\"\n    return cc_pair\n\n\ndef _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:\n    redis_client = MagicMock()\n    lock = MagicMock()\n    lock.acquire.return_value = True\n    lock.owned.return_value = True\n    redis_client.lock.return_value = lock\n    return redis_client, lock\n\n\n@patch(f\"{TASKS_MODULE}.identify_connector_class\")\ndef test_connector_supports_hierarchy_fetching_false_for_non_hierarchy_connector(\n    mock_identify_connector_class: MagicMock,\n) -> None:\n    mock_identify_connector_class.return_value = _NonHierarchyConnector\n\n    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False\n    mock_identify_connector_class.assert_called_once_with(\"mock-source\")\n\n\n@patch(f\"{TASKS_MODULE}.task_logger.warning\")\n@patch(f\"{TASKS_MODULE}.identify_connector_class\")\ndef test_connector_supports_hierarchy_fetching_false_when_class_missing(\n    mock_identify_connector_class: MagicMock,\n    mock_warning: MagicMock,\n) -> None:\n    mock_identify_connector_class.side_effect = ConnectorMissingException(\"missing\")\n\n    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False\n    mock_warning.assert_called_once()\n\n\n@patch(f\"{TASKS_MODULE}.identify_connector_class\")\ndef test_connector_supports_hierarchy_fetching_true_for_supported_connector(\n    mock_identify_connector_class: MagicMock,\n) -> None:\n    mock_identify_connector_class.return_value = _HierarchyCapableConnector\n\n    assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is True\n    mock_identify_connector_class.assert_called_once_with(\"mock-source\")\n\n\n@patch(f\"{TASKS_MODULE}._try_creating_hierarchy_fetching_task\")\n@patch(f\"{TASKS_MODULE}._is_hierarchy_fetching_due\")\n@patch(f\"{TASKS_MODULE}.get_connector_credential_pair_from_id\")\n@patch(f\"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids\")\n@patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n@patch(f\"{TASKS_MODULE}.get_redis_client\")\n@patch(f\"{TASKS_MODULE}._connector_supports_hierarchy_fetching\")\ndef test_check_for_hierarchy_fetching_skips_unsupported_connectors(\n    mock_supports_hierarchy_fetching: MagicMock,\n    mock_get_redis_client: MagicMock,\n    mock_get_session: MagicMock,\n    mock_fetch_cc_pair_ids: MagicMock,\n    mock_get_cc_pair: MagicMock,\n    mock_is_due: MagicMock,\n    mock_try_create_task: MagicMock,\n) -> None:\n    redis_client, lock = _build_redis_mock_with_lock()\n    mock_get_redis_client.return_value = redis_client\n    mock_get_session.return_value.__enter__.return_value = MagicMock()\n    mock_fetch_cc_pair_ids.return_value = [123]\n    mock_get_cc_pair.return_value = _build_cc_pair_mock()\n    mock_supports_hierarchy_fetching.return_value = False\n    mock_is_due.return_value = True\n\n    task_app = MagicMock()\n    with patch.object(check_for_hierarchy_fetching, \"app\", task_app):\n        result = check_for_hierarchy_fetching.run(tenant_id=\"test-tenant\")\n\n    assert result == 0\n    mock_is_due.assert_not_called()\n    mock_try_create_task.assert_not_called()\n    lock.release.assert_called_once()\n\n\n@patch(f\"{TASKS_MODULE}._try_creating_hierarchy_fetching_task\")\n@patch(f\"{TASKS_MODULE}._is_hierarchy_fetching_due\")\n@patch(f\"{TASKS_MODULE}.get_connector_credential_pair_from_id\")\n@patch(f\"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids\")\n@patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n@patch(f\"{TASKS_MODULE}.get_redis_client\")\n@patch(f\"{TASKS_MODULE}._connector_supports_hierarchy_fetching\")\ndef test_check_for_hierarchy_fetching_creates_task_for_supported_due_connector(\n    mock_supports_hierarchy_fetching: MagicMock,\n    mock_get_redis_client: MagicMock,\n    mock_get_session: MagicMock,\n    mock_fetch_cc_pair_ids: MagicMock,\n    mock_get_cc_pair: MagicMock,\n    mock_is_due: MagicMock,\n    mock_try_create_task: MagicMock,\n) -> None:\n    redis_client, lock = _build_redis_mock_with_lock()\n    cc_pair = _build_cc_pair_mock()\n    db_session = MagicMock()\n    mock_get_redis_client.return_value = redis_client\n    mock_get_session.return_value.__enter__.return_value = db_session\n    mock_fetch_cc_pair_ids.return_value = [123]\n    mock_get_cc_pair.return_value = cc_pair\n    mock_supports_hierarchy_fetching.return_value = True\n    mock_is_due.return_value = True\n    mock_try_create_task.return_value = \"task-id\"\n\n    task_app = MagicMock()\n    with patch.object(check_for_hierarchy_fetching, \"app\", task_app):\n        result = check_for_hierarchy_fetching.run(tenant_id=\"test-tenant\")\n\n    assert result == 1\n    mock_is_due.assert_called_once_with(cc_pair)\n    mock_try_create_task.assert_called_once_with(\n        celery_app=task_app,\n        cc_pair=cc_pair,\n        db_session=db_session,\n        r=redis_client,\n        tenant_id=\"test-tenant\",\n    )\n    lock.release.assert_called_once()\n\n\n@patch(f\"{TASKS_MODULE}._try_creating_hierarchy_fetching_task\")\n@patch(f\"{TASKS_MODULE}._is_hierarchy_fetching_due\")\n@patch(f\"{TASKS_MODULE}.get_connector_credential_pair_from_id\")\n@patch(f\"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids\")\n@patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n@patch(f\"{TASKS_MODULE}.get_redis_client\")\n@patch(f\"{TASKS_MODULE}._connector_supports_hierarchy_fetching\")\ndef test_check_for_hierarchy_fetching_skips_supported_connector_when_not_due(\n    mock_supports_hierarchy_fetching: MagicMock,\n    mock_get_redis_client: MagicMock,\n    mock_get_session: MagicMock,\n    mock_fetch_cc_pair_ids: MagicMock,\n    mock_get_cc_pair: MagicMock,\n    mock_is_due: MagicMock,\n    mock_try_create_task: MagicMock,\n) -> None:\n    redis_client, lock = _build_redis_mock_with_lock()\n    cc_pair = _build_cc_pair_mock()\n    mock_get_redis_client.return_value = redis_client\n    mock_get_session.return_value.__enter__.return_value = MagicMock()\n    mock_fetch_cc_pair_ids.return_value = [123]\n    mock_get_cc_pair.return_value = cc_pair\n    mock_supports_hierarchy_fetching.return_value = True\n    mock_is_due.return_value = False\n\n    task_app = MagicMock()\n    with patch.object(check_for_hierarchy_fetching, \"app\", task_app):\n        result = check_for_hierarchy_fetching.run(tenant_id=\"test-tenant\")\n\n    assert result == 0\n    mock_is_due.assert_called_once_with(cc_pair)\n    mock_try_create_task.assert_not_called()\n    lock.release.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/test_user_file_impl_redis_locking.py",
    "content": "\"\"\"Tests for the _impl functions' redis_locking parameter.\n\nVerifies that:\n- redis_locking=True acquires/releases Redis locks and clears queued keys\n- redis_locking=False skips all Redis operations entirely\n- Both paths execute the same business logic (DB lookup, status check)\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    delete_user_file_impl,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_user_file_impl,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    project_sync_user_file_impl,\n)\n\nTASKS_MODULE = \"onyx.background.celery.tasks.user_file_processing.tasks\"\n\n\ndef _mock_session_returning_none() -> MagicMock:\n    \"\"\"Return a mock session whose .get() returns None (file not found).\"\"\"\n    session = MagicMock()\n    session.get.return_value = None\n    return session\n\n\n# ------------------------------------------------------------------\n# process_user_file_impl\n# ------------------------------------------------------------------\n\n\nclass TestProcessUserFileImpl:\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_acquires_and_releases_lock(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = True\n        lock.owned.return_value = True\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        user_file_id = str(uuid4())\n        process_user_file_impl(\n            user_file_id=user_file_id,\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        mock_get_redis.assert_called_once_with(tenant_id=\"test-tenant\")\n        redis_client.delete.assert_called_once()\n        lock.acquire.assert_called_once_with(blocking=False)\n        lock.release.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_skips_when_lock_held(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = False\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        process_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        lock.acquire.assert_called_once()\n        mock_get_session.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_false_skips_redis_entirely(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        process_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=False,\n        )\n\n        mock_get_redis.assert_not_called()\n        mock_get_session.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_both_paths_call_db_get(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        \"\"\"Both redis_locking=True and False should call db_session.get(UserFile, ...).\"\"\"\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = True\n        lock.owned.return_value = True\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        uid = str(uuid4())\n\n        process_user_file_impl(user_file_id=uid, tenant_id=\"t\", redis_locking=True)\n        call_count_true = session.get.call_count\n\n        session.reset_mock()\n        mock_get_session.reset_mock()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        process_user_file_impl(user_file_id=uid, tenant_id=\"t\", redis_locking=False)\n        call_count_false = session.get.call_count\n\n        assert call_count_true == call_count_false == 1\n\n\n# ------------------------------------------------------------------\n# delete_user_file_impl\n# ------------------------------------------------------------------\n\n\nclass TestDeleteUserFileImpl:\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_acquires_and_releases_lock(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = True\n        lock.owned.return_value = True\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        delete_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        mock_get_redis.assert_called_once()\n        lock.acquire.assert_called_once_with(blocking=False)\n        lock.release.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_skips_when_lock_held(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = False\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        delete_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        lock.acquire.assert_called_once()\n        mock_get_session.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_false_skips_redis_entirely(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        delete_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=False,\n        )\n\n        mock_get_redis.assert_not_called()\n        mock_get_session.assert_called_once()\n\n\n# ------------------------------------------------------------------\n# project_sync_user_file_impl\n# ------------------------------------------------------------------\n\n\n@patch(\n    f\"{TASKS_MODULE}.fetch_user_files_with_access_relationships\",\n    return_value=[],\n)\nclass TestProjectSyncUserFileImpl:\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_acquires_and_releases_lock(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n        _mock_fetch: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = True\n        lock.owned.return_value = True\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        project_sync_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        mock_get_redis.assert_called_once()\n        redis_client.delete.assert_called_once()\n        lock.acquire.assert_called_once_with(blocking=False)\n        lock.release.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_true_skips_when_lock_held(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n        _mock_fetch: MagicMock,\n    ) -> None:\n        redis_client = MagicMock()\n        lock = MagicMock()\n        lock.acquire.return_value = False\n        redis_client.lock.return_value = lock\n        mock_get_redis.return_value = redis_client\n\n        project_sync_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=True,\n        )\n\n        lock.acquire.assert_called_once()\n        mock_get_session.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TASKS_MODULE}.get_redis_client\")\n    def test_redis_locking_false_skips_redis_entirely(\n        self,\n        mock_get_redis: MagicMock,\n        mock_get_session: MagicMock,\n        _mock_fetch: MagicMock,\n    ) -> None:\n        session = _mock_session_returning_none()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        project_sync_user_file_impl(\n            user_file_id=str(uuid4()),\n            tenant_id=\"test-tenant\",\n            redis_locking=False,\n        )\n\n        mock_get_redis.assert_not_called()\n        mock_get_session.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/test_user_file_processing_no_vectordb.py",
    "content": "\"\"\"Tests for no-vector-DB user file processing paths.\n\nVerifies that when DISABLE_VECTOR_DB is True:\n- process_user_file_impl calls _process_user_file_without_vector_db (not indexing)\n- _process_user_file_without_vector_db extracts text, counts tokens, stores plaintext,\n  sets status=COMPLETED and chunk_count=0\n- delete_user_file_impl skips vector DB chunk deletion\n- project_sync_user_file_impl skips vector DB metadata update\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _process_user_file_without_vector_db,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    delete_user_file_impl,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_user_file_impl,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    project_sync_user_file_impl,\n)\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import UserFileStatus\n\nTASKS_MODULE = \"onyx.background.celery.tasks.user_file_processing.tasks\"\nLLM_FACTORY_MODULE = \"onyx.llm.factory\"\n\n\ndef _make_documents(texts: list[str]) -> list[Document]:\n    \"\"\"Build a list of Document objects with the given section texts.\"\"\"\n    return [\n        Document(\n            id=str(uuid4()),\n            source=DocumentSource.USER_FILE,\n            sections=[TextSection(text=t)],\n            semantic_identifier=f\"test-doc-{i}\",\n            metadata={},\n        )\n        for i, t in enumerate(texts)\n    ]\n\n\ndef _make_user_file(\n    *,\n    status: UserFileStatus = UserFileStatus.PROCESSING,\n    file_id: str = \"test-file-id\",\n    name: str = \"test.txt\",\n) -> MagicMock:\n    \"\"\"Return a MagicMock mimicking a UserFile ORM instance.\"\"\"\n    uf = MagicMock()\n    uf.id = uuid4()\n    uf.file_id = file_id\n    uf.name = name\n    uf.status = status\n    uf.token_count = None\n    uf.chunk_count = None\n    uf.last_project_sync_at = None\n    uf.projects = []\n    uf.assistants = []\n    uf.needs_project_sync = True\n    uf.needs_persona_sync = True\n    return uf\n\n\n# ------------------------------------------------------------------\n# _process_user_file_without_vector_db — direct tests\n# ------------------------------------------------------------------\n\n\nclass TestProcessUserFileWithoutVectorDb:\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_extracts_and_combines_text(\n        self,\n        mock_get_llm: MagicMock,  # noqa: ARG002\n        mock_get_encode: MagicMock,\n        mock_store_plaintext: MagicMock,\n    ) -> None:\n        mock_encode = MagicMock(return_value=[1, 2, 3, 4, 5])\n        mock_get_encode.return_value = mock_encode\n\n        uf = _make_user_file()\n        docs = _make_documents([\"hello world\", \"foo bar\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        stored_text = mock_store_plaintext.call_args.kwargs[\"plaintext_content\"]\n        assert \"hello world\" in stored_text\n        assert \"foo bar\" in stored_text\n\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_computes_token_count(\n        self,\n        mock_get_llm: MagicMock,  # noqa: ARG002\n        mock_get_encode: MagicMock,\n        mock_store_plaintext: MagicMock,  # noqa: ARG002\n    ) -> None:\n        mock_encode = MagicMock(return_value=list(range(42)))\n        mock_get_encode.return_value = mock_encode\n\n        uf = _make_user_file()\n        docs = _make_documents([\"some text content\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        assert uf.token_count == 42\n\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_token_count_falls_back_to_none_on_error(\n        self,\n        mock_get_llm: MagicMock,\n        mock_get_encode: MagicMock,  # noqa: ARG002\n        mock_store_plaintext: MagicMock,  # noqa: ARG002\n    ) -> None:\n        mock_get_llm.side_effect = RuntimeError(\"No LLM configured\")\n\n        uf = _make_user_file()\n        docs = _make_documents([\"text\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        assert uf.token_count is None\n\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_stores_plaintext(\n        self,\n        mock_get_llm: MagicMock,  # noqa: ARG002\n        mock_get_encode: MagicMock,\n        mock_store_plaintext: MagicMock,\n    ) -> None:\n        mock_get_encode.return_value = MagicMock(return_value=[1])\n\n        uf = _make_user_file()\n        docs = _make_documents([\"content to store\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        mock_store_plaintext.assert_called_once_with(\n            user_file_id=uf.id,\n            plaintext_content=\"content to store\",\n        )\n\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_sets_completed_status_and_zero_chunk_count(\n        self,\n        mock_get_llm: MagicMock,  # noqa: ARG002\n        mock_get_encode: MagicMock,\n        mock_store_plaintext: MagicMock,  # noqa: ARG002\n    ) -> None:\n        mock_get_encode.return_value = MagicMock(return_value=[1])\n\n        uf = _make_user_file()\n        docs = _make_documents([\"text\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        assert uf.status == UserFileStatus.COMPLETED\n        assert uf.chunk_count == 0\n        assert uf.last_project_sync_at is not None\n        db_session.add.assert_called_once_with(uf)\n        db_session.commit.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\")\n    @patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\")\n    def test_preserves_deleting_status(\n        self,\n        mock_get_llm: MagicMock,  # noqa: ARG002\n        mock_get_encode: MagicMock,\n        mock_store_plaintext: MagicMock,  # noqa: ARG002\n    ) -> None:\n        mock_get_encode.return_value = MagicMock(return_value=[1])\n\n        uf = _make_user_file(status=UserFileStatus.DELETING)\n        docs = _make_documents([\"text\"])\n        db_session = MagicMock()\n\n        _process_user_file_without_vector_db(uf, docs, db_session)\n\n        assert uf.status == UserFileStatus.DELETING\n        assert uf.chunk_count == 0\n\n\n# ------------------------------------------------------------------\n# process_user_file_impl — branching on DISABLE_VECTOR_DB\n# ------------------------------------------------------------------\n\n\nclass TestProcessImplBranching:\n    @patch(f\"{TASKS_MODULE}._process_user_file_without_vector_db\")\n    @patch(f\"{TASKS_MODULE}._process_user_file_with_indexing\")\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_calls_without_vector_db_when_disabled(\n        self,\n        mock_get_session: MagicMock,\n        mock_with_indexing: MagicMock,\n        mock_without_vdb: MagicMock,\n    ) -> None:\n        uf = _make_user_file()\n        session = MagicMock()\n        session.get.return_value = uf\n        mock_get_session.return_value.__enter__.return_value = session\n\n        connector_mock = MagicMock()\n        connector_mock.load_from_state.return_value = [_make_documents([\"hello\"])]\n\n        with patch(f\"{TASKS_MODULE}.LocalFileConnector\", return_value=connector_mock):\n            process_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n        mock_without_vdb.assert_called_once()\n        mock_with_indexing.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}._process_user_file_without_vector_db\")\n    @patch(f\"{TASKS_MODULE}._process_user_file_with_indexing\")\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", False)\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_calls_with_indexing_when_vector_db_enabled(\n        self,\n        mock_get_session: MagicMock,\n        mock_with_indexing: MagicMock,\n        mock_without_vdb: MagicMock,\n    ) -> None:\n        uf = _make_user_file()\n        session = MagicMock()\n        session.get.return_value = uf\n        mock_get_session.return_value.__enter__.return_value = session\n\n        connector_mock = MagicMock()\n        connector_mock.load_from_state.return_value = [_make_documents([\"hello\"])]\n\n        with patch(f\"{TASKS_MODULE}.LocalFileConnector\", return_value=connector_mock):\n            process_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n        mock_with_indexing.assert_called_once()\n        mock_without_vdb.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}.run_indexing_pipeline\")\n    @patch(f\"{TASKS_MODULE}.store_user_file_plaintext\")\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_indexing_pipeline_not_called_when_disabled(\n        self,\n        mock_get_session: MagicMock,\n        mock_store_plaintext: MagicMock,  # noqa: ARG002\n        mock_run_pipeline: MagicMock,\n    ) -> None:\n        \"\"\"End-to-end: verify run_indexing_pipeline is never invoked.\"\"\"\n        uf = _make_user_file()\n        session = MagicMock()\n        session.get.return_value = uf\n        mock_get_session.return_value.__enter__.return_value = session\n\n        connector_mock = MagicMock()\n        connector_mock.load_from_state.return_value = [_make_documents([\"content\"])]\n\n        with (\n            patch(f\"{TASKS_MODULE}.LocalFileConnector\", return_value=connector_mock),\n            patch(f\"{LLM_FACTORY_MODULE}.get_default_llm\"),\n            patch(\n                f\"{LLM_FACTORY_MODULE}.get_llm_tokenizer_encode_func\",\n                return_value=MagicMock(return_value=[1, 2, 3]),\n            ),\n        ):\n            process_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n        mock_run_pipeline.assert_not_called()\n\n\n# ------------------------------------------------------------------\n# delete_user_file_impl — vector DB skip\n# ------------------------------------------------------------------\n\n\nclass TestDeleteImplNoVectorDb:\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_default_file_store\")\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_skips_vector_db_deletion(\n        self,\n        mock_get_session: MagicMock,\n        mock_get_file_store: MagicMock,\n    ) -> None:\n        uf = _make_user_file(status=UserFileStatus.DELETING)\n        session = MagicMock()\n        session.get.return_value = uf\n        mock_get_session.return_value.__enter__.return_value = session\n        mock_get_file_store.return_value = MagicMock()\n\n        with (\n            patch(f\"{TASKS_MODULE}.get_all_document_indices\") as mock_get_indices,\n            patch(f\"{TASKS_MODULE}.get_active_search_settings\") as mock_get_ss,\n            patch(f\"{TASKS_MODULE}.httpx_init_vespa_pool\") as mock_vespa_pool,\n        ):\n            delete_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n            mock_get_indices.assert_not_called()\n            mock_get_ss.assert_not_called()\n            mock_vespa_pool.assert_not_called()\n\n        session.delete.assert_called_once_with(uf)\n        session.commit.assert_called_once()\n\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_default_file_store\")\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_still_deletes_file_store_and_db_record(\n        self,\n        mock_get_session: MagicMock,\n        mock_get_file_store: MagicMock,\n    ) -> None:\n        uf = _make_user_file(status=UserFileStatus.DELETING)\n        session = MagicMock()\n        session.get.return_value = uf\n        mock_get_session.return_value.__enter__.return_value = session\n\n        file_store = MagicMock()\n        mock_get_file_store.return_value = file_store\n\n        delete_user_file_impl(\n            user_file_id=str(uf.id),\n            tenant_id=\"test-tenant\",\n            redis_locking=False,\n        )\n\n        assert file_store.delete_file.call_count == 2\n        session.delete.assert_called_once_with(uf)\n        session.commit.assert_called_once()\n\n\n# ------------------------------------------------------------------\n# project_sync_user_file_impl — vector DB skip\n# ------------------------------------------------------------------\n\n\nclass TestProjectSyncImplNoVectorDb:\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_skips_vector_db_update(\n        self,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uf = _make_user_file(status=UserFileStatus.COMPLETED)\n        session = MagicMock()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        with (\n            patch(\n                f\"{TASKS_MODULE}.fetch_user_files_with_access_relationships\",\n                return_value=[uf],\n            ),\n            patch(f\"{TASKS_MODULE}.get_all_document_indices\") as mock_get_indices,\n            patch(f\"{TASKS_MODULE}.get_active_search_settings\") as mock_get_ss,\n            patch(f\"{TASKS_MODULE}.httpx_init_vespa_pool\") as mock_vespa_pool,\n        ):\n            project_sync_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n            mock_get_indices.assert_not_called()\n            mock_get_ss.assert_not_called()\n            mock_vespa_pool.assert_not_called()\n\n    @patch(f\"{TASKS_MODULE}.DISABLE_VECTOR_DB\", True)\n    @patch(f\"{TASKS_MODULE}.get_session_with_current_tenant\")\n    def test_still_clears_sync_flags(\n        self,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uf = _make_user_file(status=UserFileStatus.COMPLETED)\n        session = MagicMock()\n        mock_get_session.return_value.__enter__.return_value = session\n\n        with patch(\n            f\"{TASKS_MODULE}.fetch_user_files_with_access_relationships\",\n            return_value=[uf],\n        ):\n            project_sync_user_file_impl(\n                user_file_id=str(uf.id),\n                tenant_id=\"test-tenant\",\n                redis_locking=False,\n            )\n\n        assert uf.needs_project_sync is False\n        assert uf.needs_persona_sync is False\n        assert uf.last_project_sync_at is not None\n        session.add.assert_called_once_with(uf)\n        session.commit.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/tasks/test_user_file_project_sync_queue.py",
    "content": "from unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    _user_file_project_sync_queued_key,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    check_for_user_file_project_sync,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    enqueue_user_file_project_sync_task,\n)\nfrom onyx.background.celery.tasks.user_file_processing.tasks import (\n    process_single_user_file_project_sync,\n)\nfrom onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES\nfrom onyx.configs.constants import OnyxCeleryPriority\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH\n\n\ndef _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:\n    redis_client = MagicMock()\n    lock = MagicMock()\n    lock.acquire.return_value = True\n    lock.owned.return_value = True\n    redis_client.lock.return_value = lock\n    return redis_client, lock\n\n\n@patch(\n    \"onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth\"\n)\n@patch(\"onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client\")\ndef test_check_for_user_file_project_sync_applies_queue_backpressure(\n    mock_get_redis_client: MagicMock,\n    mock_get_queue_depth: MagicMock,\n) -> None:\n    redis_client, lock = _build_redis_mock_with_lock()\n    mock_get_redis_client.return_value = redis_client\n    mock_get_queue_depth.return_value = USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH + 1\n\n    task_app = MagicMock()\n    with patch.object(check_for_user_file_project_sync, \"app\", task_app):\n        check_for_user_file_project_sync.run(tenant_id=\"test-tenant\")\n\n    task_app.send_task.assert_not_called()\n    lock.release.assert_called_once()\n\n\n@patch(\n    \"onyx.background.celery.tasks.user_file_processing.tasks.enqueue_user_file_project_sync_task\"\n)\n@patch(\n    \"onyx.background.celery.tasks.user_file_processing.tasks.get_user_file_project_sync_queue_depth\"\n)\n@patch(\n    \"onyx.background.celery.tasks.user_file_processing.tasks.get_session_with_current_tenant\"\n)\n@patch(\"onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client\")\ndef test_check_for_user_file_project_sync_skips_duplicates(\n    mock_get_redis_client: MagicMock,\n    mock_get_session: MagicMock,\n    mock_get_queue_depth: MagicMock,\n    mock_enqueue: MagicMock,\n) -> None:\n    redis_client, lock = _build_redis_mock_with_lock()\n    mock_get_redis_client.return_value = redis_client\n    mock_get_queue_depth.return_value = 0\n\n    user_file_id_one = uuid4()\n    user_file_id_two = uuid4()\n\n    session = MagicMock()\n    session.execute.return_value.scalars.return_value.all.return_value = [\n        user_file_id_one,\n        user_file_id_two,\n    ]\n    mock_get_session.return_value.__enter__.return_value = session\n    mock_enqueue.side_effect = [True, False]\n\n    task_app = MagicMock()\n    with patch.object(check_for_user_file_project_sync, \"app\", task_app):\n        check_for_user_file_project_sync.run(tenant_id=\"test-tenant\")\n\n    assert mock_enqueue.call_count == 2\n    lock.release.assert_called_once()\n\n\ndef test_enqueue_user_file_project_sync_task_sets_guard_and_expiry() -> None:\n    redis_client = MagicMock()\n    redis_client.set.return_value = True\n    celery_app = MagicMock()\n    user_file_id = str(uuid4())\n\n    enqueued = enqueue_user_file_project_sync_task(\n        celery_app=celery_app,\n        redis_client=redis_client,\n        user_file_id=user_file_id,\n        tenant_id=\"test-tenant\",\n        priority=OnyxCeleryPriority.HIGHEST,\n    )\n\n    assert enqueued is True\n    redis_client.set.assert_called_once_with(\n        _user_file_project_sync_queued_key(user_file_id),\n        1,\n        nx=True,\n        ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,\n    )\n    celery_app.send_task.assert_called_once_with(\n        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,\n        kwargs={\"user_file_id\": user_file_id, \"tenant_id\": \"test-tenant\"},\n        queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,\n        priority=OnyxCeleryPriority.HIGHEST,\n        expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,\n    )\n\n\ndef test_enqueue_user_file_project_sync_task_rolls_back_guard_on_publish_failure() -> (\n    None\n):\n    redis_client = MagicMock()\n    redis_client.set.return_value = True\n    celery_app = MagicMock()\n    celery_app.send_task.side_effect = RuntimeError(\"publish failed\")\n\n    user_file_id = str(uuid4())\n    with pytest.raises(RuntimeError):\n        enqueue_user_file_project_sync_task(\n            celery_app=celery_app,\n            redis_client=redis_client,\n            user_file_id=user_file_id,\n            tenant_id=\"test-tenant\",\n        )\n\n    redis_client.delete.assert_called_once_with(\n        _user_file_project_sync_queued_key(user_file_id)\n    )\n\n\n@patch(\"onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client\")\ndef test_process_single_user_file_project_sync_clears_queued_guard_on_pickup(\n    mock_get_redis_client: MagicMock,\n) -> None:\n    redis_client = MagicMock()\n    lock = MagicMock()\n    lock.acquire.return_value = False\n    redis_client.lock.return_value = lock\n    mock_get_redis_client.return_value = redis_client\n\n    user_file_id = str(uuid4())\n    process_single_user_file_project_sync.run(\n        user_file_id=user_file_id,\n        tenant_id=\"test-tenant\",\n    )\n\n    redis_client.delete.assert_called_once_with(\n        _user_file_project_sync_queued_key(user_file_id)\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/background/celery/test_celery_redis.py",
    "content": "\"\"\"Tests for celery_get_broker_client singleton.\"\"\"\n\nfrom collections.abc import Iterator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.background.celery import celery_redis\n\n\n@pytest.fixture(autouse=True)\ndef reset_singleton() -> Iterator[None]:\n    \"\"\"Reset the module-level singleton between tests.\"\"\"\n    celery_redis._broker_client = None\n    celery_redis._broker_url = None\n    yield\n    celery_redis._broker_client = None\n    celery_redis._broker_url = None\n\n\ndef _make_mock_app(broker_url: str = \"redis://localhost:6379/15\") -> MagicMock:\n    app = MagicMock()\n    app.conf.broker_url = broker_url\n    return app\n\n\nclass TestCeleryGetBrokerClient:\n    @patch(\"onyx.background.celery.celery_redis.Redis\")\n    def test_creates_client_on_first_call(self, mock_redis_cls: MagicMock) -> None:\n        mock_client = MagicMock()\n        mock_redis_cls.from_url.return_value = mock_client\n\n        app = _make_mock_app()\n        result = celery_redis.celery_get_broker_client(app)\n\n        assert result is mock_client\n        call_args = mock_redis_cls.from_url.call_args\n        assert call_args[0][0] == \"redis://localhost:6379/15\"\n        assert call_args[1][\"decode_responses\"] is False\n        assert call_args[1][\"socket_keepalive\"] is True\n        assert call_args[1][\"retry_on_timeout\"] is True\n\n    @patch(\"onyx.background.celery.celery_redis.Redis\")\n    def test_reuses_cached_client(self, mock_redis_cls: MagicMock) -> None:\n        mock_client = MagicMock()\n        mock_client.ping.return_value = True\n        mock_redis_cls.from_url.return_value = mock_client\n\n        app = _make_mock_app()\n        client1 = celery_redis.celery_get_broker_client(app)\n        client2 = celery_redis.celery_get_broker_client(app)\n\n        assert client1 is client2\n        # from_url called only once\n        assert mock_redis_cls.from_url.call_count == 1\n\n    @patch(\"onyx.background.celery.celery_redis.Redis\")\n    def test_reconnects_on_ping_failure(self, mock_redis_cls: MagicMock) -> None:\n        stale_client = MagicMock()\n        stale_client.ping.side_effect = ConnectionError(\"disconnected\")\n\n        fresh_client = MagicMock()\n        fresh_client.ping.return_value = True\n\n        mock_redis_cls.from_url.side_effect = [stale_client, fresh_client]\n\n        app = _make_mock_app()\n\n        # First call creates stale_client\n        client1 = celery_redis.celery_get_broker_client(app)\n        assert client1 is stale_client\n\n        # Second call: ping fails, creates fresh_client\n        client2 = celery_redis.celery_get_broker_client(app)\n        assert client2 is fresh_client\n        assert mock_redis_cls.from_url.call_count == 2\n\n    @patch(\"onyx.background.celery.celery_redis.Redis\")\n    def test_uses_broker_url_from_app_config(self, mock_redis_cls: MagicMock) -> None:\n        mock_redis_cls.from_url.return_value = MagicMock()\n\n        app = _make_mock_app(\"redis://custom-host:6380/3\")\n        celery_redis.celery_get_broker_client(app)\n\n        call_args = mock_redis_cls.from_url.call_args\n        assert call_args[0][0] == \"redis://custom-host:6380/3\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_argument_delta_streaming.py",
    "content": "from typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta\nfrom onyx.utils.jsonriver import Parser\n\n\ndef _make_tool_call_delta(\n    index: int = 0,\n    name: str | None = None,\n    arguments: str | None = None,\n    function_is_none: bool = False,\n) -> MagicMock:\n    \"\"\"Create a mock tool_call_delta matching the LiteLLM streaming shape.\"\"\"\n    delta = MagicMock()\n    delta.index = index\n    if function_is_none:\n        delta.function = None\n    else:\n        delta.function = MagicMock()\n        delta.function.name = name\n        delta.function.arguments = arguments\n    return delta\n\n\ndef _make_placement() -> Placement:\n    return Placement(turn_index=0, tab_index=0)\n\n\ndef _mock_tool_class(emit: bool = True) -> MagicMock:\n    cls = MagicMock()\n    cls.should_emit_argument_deltas.return_value = emit\n    return cls\n\n\ndef _collect(\n    tc_map: dict[int, dict[str, Any]],\n    delta: MagicMock,\n    placement: Placement | None = None,\n    parsers: dict[int, Parser] | None = None,\n) -> list[Any]:\n    \"\"\"Run maybe_emit_argument_delta and return the yielded packets.\"\"\"\n    return list(\n        maybe_emit_argument_delta(\n            tc_map,\n            delta,\n            placement or _make_placement(),\n            parsers if parsers is not None else {},\n        )\n    )\n\n\ndef _stream_fragments(\n    fragments: list[str],\n    tc_map: dict[int, dict[str, Any]],\n    placement: Placement | None = None,\n) -> list[str]:\n    \"\"\"Feed fragments into maybe_emit_argument_delta one by one, returning\n    all emitted content values concatenated per-key as a flat list.\"\"\"\n    pl = placement or _make_placement()\n    parsers: dict[int, Parser] = {}\n    emitted: list[str] = []\n    for frag in fragments:\n        tc_map[0][\"arguments\"] += frag\n        delta = _make_tool_call_delta(arguments=frag)\n        for packet in maybe_emit_argument_delta(tc_map, delta, pl, parsers=parsers):\n            obj = packet.obj\n            assert isinstance(obj, ToolCallArgumentDelta)\n            for value in obj.argument_deltas.values():\n                emitted.append(value)\n    return emitted\n\n\nclass TestMaybeEmitArgumentDeltaGuards:\n    \"\"\"Tests for conditions that cause no packet to be emitted.\"\"\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_when_tool_does_not_opt_in(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"Tools that return False from should_emit_argument_deltas emit nothing.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class(emit=False)\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": '{\"code\": \"x'}\n        }\n        assert _collect(tc_map, _make_tool_call_delta(arguments=\"x\")) == []\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_when_tool_class_unknown(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        mock_get_tool.return_value = None\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"unknown\", \"arguments\": '{\"code\": \"x'}\n        }\n        assert _collect(tc_map, _make_tool_call_delta(arguments=\"x\")) == []\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_when_no_argument_fragment(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": '{\"code\": \"x'}\n        }\n        assert _collect(tc_map, _make_tool_call_delta(arguments=None)) == []\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_when_key_value_not_started(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"Key exists in JSON but its string value hasn't begun yet.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": '{\"code\":'}\n        }\n        assert _collect(tc_map, _make_tool_call_delta(arguments=\":\")) == []\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_before_any_key(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Only the opening brace has arrived — no key to stream yet.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"{\"}\n        }\n        assert _collect(tc_map, _make_tool_call_delta(arguments=\"{\")) == []\n\n\nclass TestMaybeEmitArgumentDeltaBasic:\n    \"\"\"Tests for correct packet content and incremental emission.\"\"\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_emits_packet_with_correct_fields(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"', \"print(1)\", '\"}']\n\n        pl = _make_placement()\n        parsers: dict[int, Parser] = {}\n        all_packets = []\n        for frag in fragments:\n            tc_map[0][\"arguments\"] += frag\n            packets = _collect(\n                tc_map, _make_tool_call_delta(arguments=frag), pl, parsers\n            )\n            all_packets.extend(packets)\n\n        assert len(all_packets) >= 1\n        # Verify packet structure\n        obj = all_packets[0].obj\n        assert isinstance(obj, ToolCallArgumentDelta)\n        assert obj.tool_type == \"python\"\n        # All emitted content should reconstruct the value\n        full_code = \"\"\n        for p in all_packets:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            if \"code\" in p.obj.argument_deltas:\n                full_code += p.obj.argument_deltas[\"code\"]\n        assert full_code == \"print(1)\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_emits_only_new_content_on_subsequent_call(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"After a first emission, subsequent calls emit only the diff.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        parsers: dict[int, Parser] = {}\n        pl = _make_placement()\n\n        # First fragment opens the string\n        tc_map[0][\"arguments\"] = '{\"code\": \"abc'\n        packets_1 = _collect(\n            tc_map, _make_tool_call_delta(arguments='{\"code\": \"abc'), pl, parsers\n        )\n        code_1 = \"\"\n        for p in packets_1:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            code_1 += p.obj.argument_deltas.get(\"code\", \"\")\n        assert code_1 == \"abc\"\n\n        # Second fragment appends more\n        tc_map[0][\"arguments\"] = '{\"code\": \"abcdef'\n        packets_2 = _collect(\n            tc_map, _make_tool_call_delta(arguments=\"def\"), pl, parsers\n        )\n        code_2 = \"\"\n        for p in packets_2:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            code_2 += p.obj.argument_deltas.get(\"code\", \"\")\n        assert code_2 == \"def\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_handles_multiple_keys_sequentially(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"When a second key starts, emissions switch to that key.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"x',\n            '\", \"output\": \"hello',\n            '\"}',\n        ]\n\n        emitted = _stream_fragments(fragments, tc_map)\n        full = \"\".join(emitted)\n        assert \"x\" in full\n        assert \"hello\" in full\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_delta_spans_key_boundary(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"A single delta contains the end of one value and the start of the next key.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"x',\n            'y\", \"lang\": \"py',\n            '\"}',\n        ]\n\n        emitted = _stream_fragments(fragments, tc_map)\n        full = \"\".join(emitted)\n        assert \"xy\" in full\n        assert \"py\" in full\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_empty_value_emits_nothing(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"An empty string value has nothing to emit.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        # Opening quote just arrived, value is empty\n        tc_map[0][\"arguments\"] = '{\"code\": \"'\n        packets = _collect(tc_map, _make_tool_call_delta(arguments='{\"code\": \"'))\n        # No string content yet, so either no packet or empty deltas\n        for p in packets:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            assert p.obj.argument_deltas.get(\"code\", \"\") == \"\"\n\n\nclass TestMaybeEmitArgumentDeltaDecoding:\n    \"\"\"Tests verifying that JSON escape sequences are properly decoded.\"\"\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_decodes_newlines(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"line1\\\\nline2\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"line1\\nline2\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_decodes_tabs(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"\\\\tindented\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"\\tindented\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_decodes_escaped_quotes(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"say \\\\\"hi\\\\\"\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == 'say \"hi\"'\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_decodes_escaped_backslashes(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"path\\\\\\\\dir\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"path\\\\dir\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_decodes_unicode_escape(self, mock_get_tool: MagicMock) -> None:\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"\\\\u0041\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"A\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_incomplete_escape_at_end_decoded_on_next_chunk(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"A trailing backslash (incomplete escape) is completed in the next chunk.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"hello\\\\', 'n\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"hello\\n\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_incomplete_unicode_escape_completed_on_next_chunk(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"A partial \\\\uXX sequence is completed in the next chunk.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"code\": \"hello\\\\u00', '41\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        assert \"\".join(emitted) == \"helloA\"\n\n\nclass TestArgumentDeltaStreamingE2E:\n    \"\"\"Simulates realistic sequences of LLM argument deltas to verify\n    the full pipeline produces correct decoded output.\"\"\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_realistic_python_code_streaming(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Streams: {\"code\": \"print('hello')\\\\nprint('world')\"}\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"',\n            \"code\",\n            '\": \"',\n            \"print(\",\n            \"'hello')\",\n            \"\\\\n\",\n            \"print(\",\n            \"'world')\",\n            '\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == \"print('hello')\\nprint('world')\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_streaming_with_tabs_and_newlines(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Streams code with tabs and newlines.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"',\n            \"if True:\",\n            \"\\\\n\",\n            \"\\\\t\",\n            \"pass\",\n            '\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == \"if True:\\n\\tpass\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_split_escape_sequence(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"An escape sequence split across two fragments (backslash in one,\n        'n' in the next) should still decode correctly.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"hello',\n            \"\\\\\",\n            \"n\",\n            'world\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == \"hello\\nworld\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_multiple_newlines_and_indentation(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Streams a multi-line function with multiple escape sequences.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"',\n            \"def foo():\",\n            \"\\\\n\",\n            \"\\\\t\",\n            \"x = 1\",\n            \"\\\\n\",\n            \"\\\\t\",\n            \"return x\",\n            '\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == \"def foo():\\n\\tx = 1\\n\\treturn x\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_two_keys_streamed_sequentially(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Streams code first, then a second key (language) — both decoded.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"',\n            \"x = 1\",\n            '\", \"language\": \"',\n            \"python\",\n            '\"}',\n        ]\n\n        emitted = _stream_fragments(fragments, tc_map)\n        # Should have emissions for both keys\n        full = \"\".join(emitted)\n        assert \"x = 1\" in full\n        assert \"python\" in full\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_code_containing_dict_literal(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Python code like `x = {\"key\": \"val\"}` contains JSON-like patterns.\n        The escaped quotes inside the *outer* JSON value should prevent the\n        inner `\"key\":` from being mistaken for a top-level JSON key.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        # The LLM sends: {\"code\": \"x = {\\\"key\\\": \\\"val\\\"}\"}\n        # The inner quotes are escaped as \\\" in the JSON value.\n        fragments = [\n            '{\"code\": \"',\n            \"x = {\",\n            '\\\\\"key\\\\\"',\n            \": \",\n            '\\\\\"val\\\\\"',\n            \"}\",\n            '\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == 'x = {\"key\": \"val\"}'\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_code_with_colon_in_value(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Colons inside the string value should not confuse key detection.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = [\n            '{\"code\": \"',\n            \"url = \",\n            '\\\\\"https://example.com\\\\\"',\n            '\"}',\n        ]\n\n        full = \"\".join(_stream_fragments(fragments, tc_map))\n        assert full == 'url = \"https://example.com\"'\n\n\nclass TestMaybeEmitArgumentDeltaEdgeCases:\n    \"\"\"Edge cases not covered by the standard test classes.\"\"\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_no_emission_when_function_is_none(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Some delta chunks have function=None (e.g. role-only deltas).\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": '{\"code\": \"x'}\n        }\n        delta = _make_tool_call_delta(arguments=None, function_is_none=True)\n        assert _collect(tc_map, delta) == []\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_multiple_concurrent_tool_calls(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Two tool calls streaming at different indices in parallel.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"},\n            1: {\"id\": \"tc_2\", \"name\": \"python\", \"arguments\": \"\"},\n        }\n\n        parsers: dict[int, Parser] = {}\n        pl = _make_placement()\n\n        # Feed full JSON to index 0\n        tc_map[0][\"arguments\"] = '{\"code\": \"aaa\"}'\n        packets_0 = _collect(\n            tc_map,\n            _make_tool_call_delta(index=0, arguments='{\"code\": \"aaa\"}'),\n            pl,\n            parsers,\n        )\n        code_0 = \"\"\n        for p in packets_0:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            code_0 += p.obj.argument_deltas.get(\"code\", \"\")\n        assert code_0 == \"aaa\"\n\n        # Feed full JSON to index 1\n        tc_map[1][\"arguments\"] = '{\"code\": \"bbb\"}'\n        packets_1 = _collect(\n            tc_map,\n            _make_tool_call_delta(index=1, arguments='{\"code\": \"bbb\"}'),\n            pl,\n            parsers,\n        )\n        code_1 = \"\"\n        for p in packets_1:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            code_1 += p.obj.argument_deltas.get(\"code\", \"\")\n        assert code_1 == \"bbb\"\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_delta_with_four_arguments(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"A single delta contains four complete key-value pairs.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        full = '{\"a\": \"one\", \"b\": \"two\", \"c\": \"three\", \"d\": \"four\"}'\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        tc_map[0][\"arguments\"] = full\n        parsers: dict[int, Parser] = {}\n        packets = _collect(\n            tc_map, _make_tool_call_delta(arguments=full), parsers=parsers\n        )\n\n        # Collect all argument deltas across packets\n        all_deltas: dict[str, str] = {}\n        for p in packets:\n            assert isinstance(p.obj, ToolCallArgumentDelta)\n            for k, v in p.obj.argument_deltas.items():\n                all_deltas[k] = all_deltas.get(k, \"\") + v\n\n        assert all_deltas == {\n            \"a\": \"one\",\n            \"b\": \"two\",\n            \"c\": \"three\",\n            \"d\": \"four\",\n        }\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_delta_on_second_arg_after_first_complete(\n        self, mock_get_tool: MagicMock\n    ) -> None:\n        \"\"\"First argument is fully complete; delta only adds to the second.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n\n        fragments = [\n            '{\"code\": \"print(1)\", \"lang\": \"py',\n            '\"}',\n        ]\n\n        emitted = _stream_fragments(fragments, tc_map)\n        full = \"\".join(emitted)\n        assert \"print(1)\" in full\n        assert \"py\" in full\n\n    @patch(\"onyx.chat.tool_call_args_streaming._get_tool_class\")\n    def test_non_string_values_skipped(self, mock_get_tool: MagicMock) -> None:\n        \"\"\"Non-string values (numbers, booleans, null) are skipped — they are\n        available in the final tool-call kickoff packet. String arguments\n        following them are still emitted.\"\"\"\n        mock_get_tool.return_value = _mock_tool_class()\n\n        tc_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": \"tc_1\", \"name\": \"python\", \"arguments\": \"\"}\n        }\n        fragments = ['{\"timeout\": 30, \"code\": \"hello\"}']\n\n        emitted = _stream_fragments(fragments, tc_map)\n        full = \"\".join(emitted)\n        assert full == \"hello\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_chat_utils.py",
    "content": "\"\"\"Tests for chat_utils.py, specifically get_custom_agent_prompt.\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom onyx.chat.chat_utils import _build_tool_call_response_history_message\nfrom onyx.chat.chat_utils import get_custom_agent_prompt\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE\n\n\nclass TestGetCustomAgentPrompt:\n    \"\"\"Tests for the get_custom_agent_prompt function.\"\"\"\n\n    def _create_mock_persona(\n        self,\n        persona_id: int = 1,\n        system_prompt: str | None = None,\n        replace_base_system_prompt: bool = False,\n    ) -> MagicMock:\n        \"\"\"Create a mock Persona with the specified attributes.\"\"\"\n        persona = MagicMock()\n        persona.id = persona_id\n        persona.system_prompt = system_prompt\n        persona.replace_base_system_prompt = replace_base_system_prompt\n        return persona\n\n    def _create_mock_chat_session(\n        self,\n        project: MagicMock | None = None,\n    ) -> MagicMock:\n        \"\"\"Create a mock ChatSession with the specified attributes.\"\"\"\n        chat_session = MagicMock()\n        chat_session.project = project\n        return chat_session\n\n    def _create_mock_project(\n        self,\n        instructions: str = \"\",\n    ) -> MagicMock:\n        \"\"\"Create a mock UserProject with the specified attributes.\"\"\"\n        project = MagicMock()\n        project.instructions = instructions\n        return project\n\n    def test_default_persona_no_project(self) -> None:\n        \"\"\"Test that default persona without a project returns None.\"\"\"\n        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)\n        chat_session = self._create_mock_chat_session(project=None)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result is None\n\n    def test_default_persona_with_project_instructions(self) -> None:\n        \"\"\"Test that default persona in a project returns project instructions.\"\"\"\n        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)\n        project = self._create_mock_project(instructions=\"Do X and Y\")\n        chat_session = self._create_mock_chat_session(project=project)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result == \"Do X and Y\"\n\n    def test_default_persona_with_empty_project_instructions(self) -> None:\n        \"\"\"Test that default persona in a project with empty instructions returns None.\"\"\"\n        persona = self._create_mock_persona(persona_id=DEFAULT_PERSONA_ID)\n        project = self._create_mock_project(instructions=\"\")\n        chat_session = self._create_mock_chat_session(project=project)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result is None\n\n    def test_custom_persona_replace_base_prompt_true(self) -> None:\n        \"\"\"Test that custom persona with replace_base_system_prompt=True returns None.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=\"Custom system prompt\",\n            replace_base_system_prompt=True,\n        )\n        chat_session = self._create_mock_chat_session(project=None)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result is None\n\n    def test_custom_persona_with_system_prompt(self) -> None:\n        \"\"\"Test that custom persona with system_prompt returns the system_prompt.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=\"Custom system prompt\",\n            replace_base_system_prompt=False,\n        )\n        chat_session = self._create_mock_chat_session(project=None)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result == \"Custom system prompt\"\n\n    def test_custom_persona_empty_string_system_prompt(self) -> None:\n        \"\"\"Test that custom persona with empty string system_prompt returns None.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=\"\",\n            replace_base_system_prompt=False,\n        )\n        chat_session = self._create_mock_chat_session(project=None)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result is None\n\n    def test_custom_persona_none_system_prompt(self) -> None:\n        \"\"\"Test that custom persona with None system_prompt returns None.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=None,\n            replace_base_system_prompt=False,\n        )\n        chat_session = self._create_mock_chat_session(project=None)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        assert result is None\n\n    def test_custom_persona_in_project_uses_persona_prompt(self) -> None:\n        \"\"\"Test that custom persona in a project uses persona's system_prompt, not project instructions.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=\"Custom system prompt\",\n            replace_base_system_prompt=False,\n        )\n        project = self._create_mock_project(instructions=\"Project instructions\")\n        chat_session = self._create_mock_chat_session(project=project)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        # Should use persona's system_prompt, NOT project instructions\n        assert result == \"Custom system prompt\"\n\n    def test_custom_persona_replace_base_in_project(self) -> None:\n        \"\"\"Test that custom persona with replace_base_system_prompt=True in a project still returns None.\"\"\"\n        persona = self._create_mock_persona(\n            persona_id=1,\n            system_prompt=\"Custom system prompt\",\n            replace_base_system_prompt=True,\n        )\n        project = self._create_mock_project(instructions=\"Project instructions\")\n        chat_session = self._create_mock_chat_session(project=project)\n\n        result = get_custom_agent_prompt(persona, chat_session)\n\n        # Should return None because replace_base_system_prompt=True\n        assert result is None\n\n\nclass TestBuildToolCallResponseHistoryMessage:\n    def test_image_tool_uses_generated_images(self) -> None:\n        message = _build_tool_call_response_history_message(\n            tool_name=\"generate_image\",\n            generated_images=[{\"file_id\": \"img-1\", \"revised_prompt\": \"p1\"}],\n            tool_call_response=None,\n        )\n        assert message == '[{\"file_id\": \"img-1\", \"revised_prompt\": \"p1\"}]'\n\n    def test_non_image_tool_uses_placeholder(self) -> None:\n        message = _build_tool_call_response_history_message(\n            tool_name=\"web_search\",\n            generated_images=None,\n            tool_call_response='{\"raw\":\"value\"}',\n        )\n        assert message == TOOL_CALL_RESPONSE_CROSS_MESSAGE\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_citation_processor.py",
    "content": "\"\"\"\nUnit tests for DynamicCitationProcessor.\n\nThis module contains comprehensive tests for the DynamicCitationProcessor class,\nwhich processes streaming tokens from LLMs to extract citations, remove citation\nmarkers from output text, and emit CitationInfo objects.\n\nKey features tested:\n- Dynamic citation mapping updates\n- Citation extraction and formatting\n- Citation removal from output\n- CitationInfo emission and tracking\n- Edge cases (unicode, code blocks, invalid citations, etc.)\n\"\"\"\n\nfrom datetime import datetime\n\nimport pytest\n\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_processor import CitationMode\nfrom onyx.chat.citation_processor import DynamicCitationProcessor\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SearchDoc\nfrom onyx.server.query_and_chat.streaming_models import CitationInfo\n\n\n# ============================================================================\n# Helper Functions and Fixtures\n# ============================================================================\n\n\ndef create_test_search_doc(\n    document_id: str = \"test-doc-1\",\n    link: str | None = \"https://example.com/doc1\",\n    chunk_ind: int = 0,\n    semantic_identifier: str = \"Test Document\",\n    blurb: str = \"Test blurb\",\n    source_type: DocumentSource = DocumentSource.WEB,\n    boost: int = 1,\n    hidden: bool = False,\n    metadata: dict | None = None,\n    score: float | None = None,\n    match_highlights: list[str] | None = None,\n) -> SearchDoc:\n    \"\"\"Create a test SearchDoc instance with default or custom values.\"\"\"\n    return SearchDoc(\n        document_id=document_id,\n        chunk_ind=chunk_ind,\n        semantic_identifier=semantic_identifier,\n        link=link,\n        blurb=blurb,\n        source_type=source_type,\n        boost=boost,\n        hidden=hidden,\n        metadata=metadata or {},\n        score=score,\n        match_highlights=match_highlights or [],\n        updated_at=datetime.now(),\n    )\n\n\ndef process_tokens(\n    processor: DynamicCitationProcessor, tokens: list[str | None]\n) -> tuple[str, list[CitationInfo]]:\n    \"\"\"\n    Process a list of tokens through the processor and collect results.\n\n    Returns:\n        Tuple of (output_text, citations) where:\n        - output_text: All string outputs concatenated\n        - citations: List of CitationInfo objects emitted\n    \"\"\"\n    output_text = \"\"\n    citations = []\n\n    for token in tokens:\n        for result in processor.process_token(token):\n            if isinstance(result, str):\n                output_text += result\n            elif isinstance(result, CitationInfo):\n                citations.append(result)\n\n    # Flush remaining segment\n    for result in processor.process_token(None):\n        if isinstance(result, str):\n            output_text += result\n        elif isinstance(result, CitationInfo):\n            citations.append(result)\n\n    return output_text, citations\n\n\n@pytest.fixture\ndef mock_search_docs() -> CitationMapping:\n    \"\"\"Create a dictionary of mock SearchDoc objects for testing.\"\"\"\n    return {\n        1: create_test_search_doc(\n            document_id=\"doc_1\",\n            link=\"https://example.com/doc1\",\n            semantic_identifier=\"Document 1\",\n        ),\n        2: create_test_search_doc(\n            document_id=\"doc_2\",\n            link=\"https://example.com/doc2\",\n            semantic_identifier=\"Document 2\",\n        ),\n        3: create_test_search_doc(\n            document_id=\"doc_3\",\n            link=None,  # No link\n            semantic_identifier=\"Document 3\",\n        ),\n        4: create_test_search_doc(\n            document_id=\"doc_4\",\n            link=\"https://example.com/doc4\",\n            semantic_identifier=\"Document 4\",\n        ),\n        5: create_test_search_doc(\n            document_id=\"doc_5\",\n            link=\"https://example.com/doc5\",\n            semantic_identifier=\"Document 5\",\n        ),\n    }\n\n\n# ============================================================================\n# Initialization Tests\n# ============================================================================\n\n\ndef test_default_initialization() -> None:\n    \"\"\"Test default initialization of DynamicCitationProcessor.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    assert processor.citation_to_doc == {}\n    assert processor.llm_out == \"\"\n    assert processor.curr_segment == \"\"\n    assert processor.hold == \"\"\n    assert processor.cited_documents_in_order == []\n    assert processor.cited_document_ids == set()\n    assert processor.recent_cited_documents == set()\n    assert processor.non_citation_count == 0\n\n\ndef test_initialization_with_custom_stop_stream() -> None:\n    \"\"\"Test initialization with custom stop_stream.\"\"\"\n    stop_stream = \"STOP_TOKEN\"\n    processor = DynamicCitationProcessor(stop_stream=stop_stream)\n\n    assert processor.stop_stream == stop_stream\n    assert processor.citation_to_doc == {}\n\n\ndef test_initial_state_empty() -> None:\n    \"\"\"Test that initial state is empty and ready for use.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    assert processor.get_cited_documents() == []\n    assert processor.get_cited_document_ids() == []\n    assert processor.num_cited_documents == 0\n\n\n# ============================================================================\n# Citation Mapping Tests\n# ============================================================================\n\n\ndef test_update_citation_mapping_single(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test updating citation mapping with a single mapping.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    assert len(processor.citation_to_doc) == 1\n    assert processor.citation_to_doc[1] == mock_search_docs[1]\n    assert processor.citation_to_doc[1].document_id == \"doc_1\"\n\n\ndef test_update_citation_mapping_multiple(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test updating citation mapping with multiple mappings.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    assert len(processor.citation_to_doc) == 3\n    assert processor.citation_to_doc[1].document_id == \"doc_1\"\n    assert processor.citation_to_doc[2].document_id == \"doc_2\"\n    assert processor.citation_to_doc[3].document_id == \"doc_3\"\n\n\ndef test_update_citation_mapping_merges(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that update_citation_mapping merges with existing mappings.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n    processor.update_citation_mapping({2: mock_search_docs[2]})\n\n    assert len(processor.citation_to_doc) == 2\n    assert processor.citation_to_doc[1] == mock_search_docs[1]\n    assert processor.citation_to_doc[2] == mock_search_docs[2]\n\n\ndef test_update_citation_mapping_ignores_duplicate_keys(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that update_citation_mapping ignores duplicate citation numbers.\n\n    This behavior is intentional to handle cases like OpenURL reusing the same\n    citation number as a Web Search result - we keep the first one registered.\n    \"\"\"\n    processor = DynamicCitationProcessor()\n    doc1 = mock_search_docs[1]\n    doc2 = create_test_search_doc(\n        document_id=\"doc_1_updated\", link=\"https://updated.com\"\n    )\n\n    processor.update_citation_mapping({1: doc1})\n    processor.update_citation_mapping({1: doc2})\n\n    # First citation should be kept, second one ignored\n    assert len(processor.citation_to_doc) == 1\n    assert processor.citation_to_doc[1].document_id == \"doc_1\"\n    assert processor.citation_to_doc[1].link == \"https://example.com/doc1\"\n\n\n# ============================================================================\n# Basic Citation Processing Tests\n# ============================================================================\n\n\ndef test_single_citation(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test processing a single citation [1].\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n    # Raw citation pattern should be replaced with formatted version\n    assert (\n        \"Text [\" not in output\n        or \"Text [\" in output\n        and \"[[1]](https://example.com/doc1)\" in output\n    )\n    assert \"here.\" in output\n    assert len(citations) == 1\n    assert citations[0].citation_number == 1\n    assert citations[0].document_id == \"doc_1\"\n\n\ndef test_multiple_citations_comma_separated(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test processing multiple citations [1, 2, 3].\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    output, citations = process_tokens(\n        processor, [\"Text [\", \"1\", \",\", \" \", \"2\", \",\", \"3\", \"] end.\"]\n    )\n\n    # Raw citation patterns should be replaced with formatted versions\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n    assert \"[[3]]()\" in output\n    assert \"end.\" in output\n    assert len(citations) == 3\n    assert {c.document_id for c in citations} == {\"doc_1\", \"doc_2\", \"doc_3\"}\n\n\ndef test_double_bracket_citation(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test processing double bracket citation [[1]].\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text [[\", \"1\", \"]] here.\"])\n\n    # Double bracket citation should be replaced with formatted version\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"here.\" in output\n    assert len(citations) == 1\n    assert citations[0].citation_number == 1\n\n\ndef test_citation_split_across_tokens(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation split across multiple tokens.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"[\", \"1\", \"]\"])\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert len(citations) == 1\n\n\ndef test_citation_at_beginning(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation at the beginning of text.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"[\", \"1\", \"] Text here.\"])\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"Text here.\" in output\n    assert len(citations) == 1\n\n\ndef test_citation_at_end(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation at the end of text.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text here [\", \"1\", \"]\"])\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"Text here\" in output\n    assert len(citations) == 1\n\n\ndef test_citation_in_middle(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation in the middle of text.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Start [\", \"1\", \"] end.\"])\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"Start\" in output and \"end.\" in output\n    assert len(citations) == 1\n\n\n# ============================================================================\n# Citation Formatting and Output Tests\n# ============================================================================\n\n\ndef test_citation_removed_from_output(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that citations are removed from output text.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, _ = process_tokens(processor, [\"This is text [\", \"1\", \"] with citation.\"])\n\n    # Raw citation should be replaced with formatted version\n    assert \"This is text [[1]](https://example.com/doc1) with citation.\" in output\n\n\ndef test_formatted_citation_yielded_separately(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that formatted citations are yielded separately.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    results = []\n    for token in [\"Text [\", \"1\", \"] here.\"]:\n        for result in processor.process_token(token):\n            results.append(result)\n\n    # Should have text chunks and formatted citation\n    text_results = [r for r in results if isinstance(r, str)]\n    citation_results = [r for r in results if isinstance(r, CitationInfo)]\n\n    assert len(citation_results) == 1\n    assert any(\"[[1]](https://example.com/doc1)\" in r for r in text_results)\n\n\ndef test_leading_space_with_existing_space(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test leading space handling when space already exists.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, _ = process_tokens(processor, [\"Text \", \"[\", \"1\", \"] here.\"])\n    # Should not add extra space\n    assert \"Text \" in output or \"Text [[1]](https://example.com/doc1)\" in output\n\n\ndef test_leading_space_without_existing_space(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test leading space handling when no space exists.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, _ = process_tokens(processor, [\"Text[\", \"1\", \"] here.\"])\n\n    # Should preserve order: text before citation, then citation with space added\n    assert \"Text [[1]](https://example.com/doc1) here.\" in output\n\n\ndef test_citation_with_link(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation formatting with link.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, _ = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n\n    assert \"Text [[1]](https://example.com/doc1)\" in output\n\n\ndef test_citation_without_link(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citation formatting without link.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({3: mock_search_docs[3]})  # doc_3 has no link\n\n    output, _ = process_tokens(processor, [\"Text [\", \"3\", \"]\"])\n\n    assert \"Text [[3]]()\" in output\n\n\ndef test_multiple_citations_in_sequence(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test multiple citations formatted in sequence.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    output, citations = process_tokens(\n        processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"]\n    )\n\n    assert (\n        \"Text [[1]](https://example.com/doc1)[[2]](https://example.com/doc2)[[3]]()\"\n        in output\n    )\n    assert len(citations) == 3\n\n\n# ============================================================================\n# CitationInfo Emission Tests\n# ============================================================================\n\n\ndef test_citation_info_emitted_for_new_citation(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that CitationInfo is emitted for new citations.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    _, citations = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n\n    assert len(citations) == 1\n    assert citations[0].citation_number == 1\n    assert citations[0].document_id == \"doc_1\"\n\n\ndef test_citation_info_contains_correct_fields(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that CitationInfo contains correct citation_number and document_id.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    _, citations = process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n    assert len(citations) == 2\n    citation_numbers = {c.citation_number for c in citations}\n    document_ids = {c.document_id for c in citations}\n    assert citation_numbers == {1, 2}\n    assert document_ids == {\"doc_1\", \"doc_2\"}\n\n\ndef test_citation_info_deduplication_recent(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that recent citations don't emit CitationInfo.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    _, citations1 = process_tokens(processor, [\"First [\", \"1\", \"]\"])\n    assert len(citations1) == 1\n\n    # Same citation again immediately - should not emit CitationInfo\n    _, citations2 = process_tokens(processor, [\"Second [\", \"1\", \"]\"])\n    assert len(citations2) == 0  # No new CitationInfo\n\n\ndef test_citation_info_order_matches_first_citation(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that CitationInfo order matches first citation order.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    _, citations = process_tokens(processor, [\"[\", \"3\", \"][\", \"1\", \"][\", \"2\", \"]\"])\n\n    # Order should be 3, 1, 2 (first citation order)\n    assert len(citations) == 3\n    assert citations[0].citation_number == 3\n    assert citations[1].citation_number == 1\n    assert citations[2].citation_number == 2\n\n\n# ============================================================================\n# Citation Order Tracking Tests\n# ============================================================================\n\n\ndef test_get_cited_documents_order(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that get_cited_documents returns documents in first citation order.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    process_tokens(processor, [\"[\", \"3\", \"][\", \"1\", \"][\", \"2\", \"]\"])\n\n    cited_docs = processor.get_cited_documents()\n    assert len(cited_docs) == 3\n    assert cited_docs[0].document_id == \"doc_3\"\n    assert cited_docs[1].document_id == \"doc_1\"\n    assert cited_docs[2].document_id == \"doc_2\"\n\n\ndef test_get_cited_document_ids_order(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that get_cited_document_ids returns IDs in correct order.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    process_tokens(processor, [\"[\", \"2\", \"][\", \"1\", \"][\", \"3\", \"]\"])\n\n    doc_ids = processor.get_cited_document_ids()\n    assert doc_ids == [\"doc_2\", \"doc_1\", \"doc_3\"]\n\n\ndef test_num_cited_documents_property(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that num_cited_documents property returns correct count.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    assert processor.num_cited_documents == 0\n\n    process_tokens(processor, [\"[\", \"1\", \"]\"])\n    assert processor.num_cited_documents == 1\n\n    process_tokens(processor, [\"[\", \"2\", \"]\"])\n    assert processor.num_cited_documents == 2\n\n    # Same document again shouldn't increase count\n    process_tokens(processor, [\"[\", \"1\", \"]\"])\n    assert processor.num_cited_documents == 2\n\n\ndef test_multiple_citations_same_document_no_duplicate(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that multiple citations of same document don't duplicate in order.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    process_tokens(processor, [\"[\", \"1\", \"][\", \"1\", \"][\", \"1\", \"]\"])\n\n    cited_docs = processor.get_cited_documents()\n    assert len(cited_docs) == 1\n    assert cited_docs[0].document_id == \"doc_1\"\n\n\n# ============================================================================\n# Recent Citation Deduplication Tests\n# ============================================================================\n\n\ndef test_recent_citations_no_citation_info(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that recent citations don't emit CitationInfo.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    _, citations1 = process_tokens(processor, [\"First [\", \"1\", \"]\"])\n    assert len(citations1) == 1\n\n    _, citations2 = process_tokens(processor, [\"Second [\", \"1\", \"]\"])\n    assert len(citations2) == 0  # No CitationInfo for recent citation\n\n\ndef test_recent_citations_still_format_text(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that recent citations still format citation text.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output1, _ = process_tokens(processor, [\"First [\", \"1\", \"]\"])\n    assert \"[[1]](https://example.com/doc1)\" in output1\n\n    output2, _ = process_tokens(processor, [\"Second [\", \"1\", \"]\"])\n    assert \"[[1]](https://example.com/doc1)\" in output2  # Still formatted\n\n\ndef test_reset_recent_citations(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that reset_recent_citations clears recent tracker.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    _, citations1 = process_tokens(processor, [\"First [\", \"1\", \"]\"])\n    assert len(citations1) == 1\n\n    _, citations2 = process_tokens(processor, [\"Second [\", \"1\", \"]\"])\n    assert len(citations2) == 0  # Recent citation\n\n    processor.reset_recent_citations()\n\n    _, citations3 = process_tokens(processor, [\"Third [\", \"1\", \"]\"])\n    assert len(citations3) == 0  # Still no CitationInfo (already in cited_documents)\n\n\ndef test_non_citation_count_threshold_resets_recent(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that non-citation count threshold (5) resets recent citations.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    _, citations1 = process_tokens(processor, [\"First [\", \"1\", \"]\"])\n    assert len(citations1) == 1\n\n    # Add enough non-citation text to trigger reset (>5 chars)\n    _, citations2 = process_tokens(processor, [\"Second [\", \"1\", \"]\"])\n    assert len(citations2) == 0  # Recent citation\n\n    # Add text with more than 5 non-citation characters\n    _, citations3 = process_tokens(processor, [\"Long text here [\", \"1\", \"]\"])\n    # After >5 non-citation chars, recent citations should be cleared\n    # But since doc_1 is already in cited_documents, no new CitationInfo\n    assert len(citations3) == 0\n\n\n# ============================================================================\n# Invalid Citation Handling Tests\n# ============================================================================\n\n\ndef test_citation_not_in_mapping_skipped(\n    mock_search_docs: CitationMapping, caplog: pytest.LogCaptureFixture\n) -> None:\n    \"\"\"Test that citations with numbers not in mapping are skipped.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text [\", \"99\", \"] here.\"])\n\n    assert \"[99]\" not in output  # Citation removed but not processed\n    assert len(citations) == 0\n    assert \"Citation number 99 not found in mapping\" in caplog.text\n\n\ndef test_invalid_citation_format_skipped(\n    mock_search_docs: CitationMapping,\n    caplog: pytest.LogCaptureFixture,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that invalid citation number formats are skipped.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # This should not match the citation pattern, so it will be left as-is\n    output, citations = process_tokens(processor, [\"Text [\", \"abc\", \"] here.\"])\n\n    assert len(citations) == 0\n    assert \"Text [abc] here.\" in output\n\n\ndef test_empty_citation_content_handled(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test that empty citation content is handled.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # Empty citation like [,] should be handled - empty parts are skipped\n    output, citations = process_tokens(processor, [\"Text [\", \"1\", \",\", \" \", \"2\", \"]\"])\n\n    # Should process both citations, skipping empty parts\n    assert len(citations) >= 1  # At least one valid citation\n\n\ndef test_citation_with_non_integer_skipped(\n    mock_search_docs: CitationMapping,\n    caplog: pytest.LogCaptureFixture,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that citations with non-integer content are skipped.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # This won't match the pattern, but if it did, it would be skipped\n    output, citations = process_tokens(processor, [\"Text [\", \"1.5\", \"]\"])\n\n    # The pattern requires integers, so this won't match\n    assert len(citations) == 0 or \"[1.5]\" in output\n\n\n# ============================================================================\n# Unicode Bracket Tests\n# ============================================================================\n\n\ndef test_unicode_bracket_citation(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test processing unicode bracket citation 【1】.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text 【\", \"1\", \"】 here.\"])\n\n    assert \"【1】\" not in output\n    assert len(citations) == 1\n    assert citations[0].citation_number == 1\n\n\ndef test_unicode_bracket_variant(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test processing unicode bracket variant ［1］.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text ［\", \"1\", \"］ here.\"])\n\n    assert \"［1］\" not in output\n    assert len(citations) == 1\n\n\ndef test_double_unicode_bracket_citation(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test processing double unicode bracket citation 【【1】】.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text 【【\", \"1\", \"】】 here.\"])\n\n    assert \"【【1】】\" not in output\n    assert len(citations) == 1\n\n\ndef test_mixed_ascii_unicode_brackets(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test mixed ASCII and unicode brackets.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    output, citations = process_tokens(\n        processor, [\"ASCII [\", \"1\", \"] unicode 【\", \"2\", \"】\"]\n    )\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n    assert len(citations) == 2\n\n\ndef test_unicode_brackets_split_across_tokens(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test unicode brackets split across tokens.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"【\", \"1\", \"】\"])\n\n    assert \"【1】\" not in output\n    assert len(citations) == 1\n\n\n# ============================================================================\n# Code Block Handling Tests\n# ============================================================================\n\n\ndef test_citation_inside_code_block_not_processed(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that citations inside code blocks are not processed.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    tokens: list[str | None] = [\n        \"Here's code:\\n```\\n\",\n        \"def example():\\n    print('[1]')\\n\",\n        \"```\\n\",\n        \"End.\",\n    ]\n    output, citations = process_tokens(processor, tokens)\n\n    # Citation inside code block should not be processed\n    assert len(citations) == 0\n    # Code block should have plaintext added\n    assert \"```plaintext\" in output\n\n\ndef test_code_block_plaintext_added(\n    mock_search_docs: CitationMapping,  # noqa: ARG001\n) -> None:\n    \"\"\"Test that code blocks with ``` followed by \\\\n get 'plaintext' added.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    tokens: list[str | None] = [\"Code:\\n```\\n\", \"def test():\\n    pass\\n\", \"```\\n\"]\n    output, _ = process_tokens(processor, tokens)\n\n    assert \"```plaintext\" in output\n\n\ndef test_citation_outside_code_block_processed(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that citations outside code blocks are processed normally.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    tokens: list[str | None] = [\n        \"Text [\",\n        \"1\",\n        \"] before code.\\n```\\n\",\n        \"code here\\n\",\n        \"```\\n\",\n        \"Text [\",\n        \"1\",\n        \"] after code.\",\n    ]\n    output, citations = process_tokens(processor, tokens)\n\n    # Should have citations before and after code block\n    # Same document, so only one CitationInfo (first citation)\n    assert len(citations) == 1\n    # Citations outside code block should be formatted\n    assert \"[[1]](https://example.com/doc1)\" in output\n    # Citation inside code block should remain as-is\n    assert \"code here\" in output\n\n\ndef test_multiple_code_blocks(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test handling of multiple code blocks.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    tokens: list[str | None] = [\n        \"First block:\\n```\\n\",\n        \"code1\\n\",\n        \"```\\n\",\n        \"Text [\",\n        \"1\",\n        \"]\\n\",\n        \"Second block:\\n```\\n\",\n        \"code2\\n\",\n        \"```\\n\",\n    ]\n    output, citations = process_tokens(processor, tokens)\n\n    assert \"```plaintext\" in output\n    assert len(citations) == 1\n\n\n# ============================================================================\n# Stop Token Tests\n# ============================================================================\n\n\ndef test_stop_token_detection_stops_processing() -> None:\n    \"\"\"Test that stop token detection stops processing.\"\"\"\n    stop_stream = \"STOP\"\n    processor = DynamicCitationProcessor(stop_stream=stop_stream)\n\n    results = []\n    for token in [\"Text \", \"ST\", \"OP\"]:\n        for result in processor.process_token(token):\n            results.append(result)\n\n    # Try to add more text after stop token\n    for result in processor.process_token(\" more text\"):\n        results.append(result)\n\n    # Processing should stop at STOP token - no results after STOP\n    output = \"\".join(r for r in results if isinstance(r, str))\n    # The stop token itself should not appear in output\n    assert \"STOP\" not in output or output == \"\"\n\n\ndef test_partial_stop_token_held_back() -> None:\n    \"\"\"Test that partial stop token is held back.\"\"\"\n    stop_stream = \"STOP\"\n    processor = DynamicCitationProcessor(stop_stream=stop_stream)\n\n    results = []\n    for token in [\"Text \", \"ST\"]:\n        for result in processor.process_token(token):\n            results.append(result)\n\n    # Partial stop token should be held back\n    output = \"\".join(r for r in results if isinstance(r, str))\n    # Should have \"Text \" but \"ST\" should be held\n    assert \"Text \" in output or output == \"\"\n\n\ndef test_stop_token_at_different_positions() -> None:\n    \"\"\"Test stop token at different positions.\"\"\"\n    stop_stream = \"END\"\n\n    # Stop token at beginning - when detected, processing stops for that token\n    processor1 = DynamicCitationProcessor(stop_stream=stop_stream)\n    results1 = []\n    for token in [\"END\"]:\n        for result in processor1.process_token(token):\n            results1.append(result)\n    # Stop token detection returns early, so no results\n    output1 = \"\".join(r for r in results1 if isinstance(r, str))\n    assert output1 == \"\"  # Stop token detected, no output\n\n    # Stop token in middle - text before stop token is processed\n    processor2 = DynamicCitationProcessor(stop_stream=stop_stream)\n    results2 = []\n    for token in [\"Start \", \"EN\", \"D\"]:\n        for result in processor2.process_token(token):\n            results2.append(result)\n    output2 = \"\".join(r for r in results2 if isinstance(r, str))\n    # \"Start \" should be processed before stop token is detected\n    assert \"Start \" in output2\n    # Stop token \"END\" should not appear in output\n    assert \"END\" not in output2\n\n\n# ============================================================================\n# Edge Cases\n# ============================================================================\n\n\ndef test_empty_token_stream() -> None:\n    \"\"\"Test processing empty token stream.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    output, citations = process_tokens(processor, [])\n\n    assert output == \"\"\n    assert len(citations) == 0\n\n\ndef test_none_token_flushes_remaining_segment(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that None token (end of stream) flushes remaining segment.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    results = []\n    for token in [\"Remaining \", \"text\"]:\n        for result in processor.process_token(token):\n            results.append(result)\n\n    # Flush with None\n    for result in processor.process_token(None):\n        results.append(result)\n\n    output = \"\".join(r for r in results if isinstance(r, str))\n    assert \"Remaining text\" in output\n\n\ndef test_very_long_citation_numbers(\n    mock_search_docs: CitationMapping,  # noqa: ARG001\n) -> None:\n    \"\"\"Test citations with very long citation numbers.\"\"\"\n    processor = DynamicCitationProcessor()\n    # Create a doc with a high citation number\n    doc_100 = create_test_search_doc(\n        document_id=\"doc_100\", link=\"https://example.com/doc100\"\n    )\n    processor.update_citation_mapping({100: doc_100})\n\n    output, citations = process_tokens(processor, [\"Text [\", \"100\", \"]\"])\n\n    assert len(citations) == 1\n    assert citations[0].citation_number == 100\n\n\ndef test_citations_with_extra_whitespace(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test citations with extra whitespace.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    # Extra whitespace in citation should be handled (stripped)\n    output, citations = process_tokens(processor, [\"Text [\", \"1\", \",\", \" \", \"2\", \"]\"])\n\n    assert len(citations) == 2\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n\n\ndef test_consecutive_citations_no_text_between(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test consecutive citations without text between.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    output, citations = process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n    assert len(citations) == 2\n\n\ndef test_citations_at_stream_boundaries(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test citations at stream boundaries.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # Citation split at very beginning\n    output1, citations1 = process_tokens(processor, [\"[\", \"1\", \"] text\"])\n    assert len(citations1) == 1\n    assert \"[[1]](https://example.com/doc1) text\" in output1\n\n    # Citation split at very end\n    processor2 = DynamicCitationProcessor()\n    processor2.update_citation_mapping({1: mock_search_docs[1]})\n    output2, citations2 = process_tokens(processor2, [\"text [\", \"1\", \"]\"])\n    assert len(citations2) == 1\n    assert \"text [[1]](https://example.com/doc1)\" in output2\n\n\n# ============================================================================\n# Dynamic Mapping Updates Tests\n# ============================================================================\n\n\ndef test_process_tokens_then_update_mapping(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test processing tokens, updating mapping, then continuing.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    # Process tokens before mapping is set\n    output1, citations1 = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n    assert len(citations1) == 0  # No mapping yet\n\n    # Update mapping\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # Continue processing\n    output2, citations2 = process_tokens(processor, [\"More text [\", \"1\", \"]\"])\n    assert len(citations2) == 1  # Now has mapping\n\n\ndef test_citations_before_mapping_skipped(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that citations before mapping update are skipped.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    output1, citations1 = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n    assert len(citations1) == 0\n    assert \"[1]\" not in output1  # Still removed from output\n\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output2, citations2 = process_tokens(processor, [\"More [\", \"1\", \"]\"])\n    assert len(citations2) == 1\n\n\ndef test_citations_after_mapping_processed(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that citations after mapping update are processed.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    output, citations = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n\n    assert len(citations) == 1\n    assert citations[0].document_id == \"doc_1\"\n\n\ndef test_multiple_mapping_updates_during_processing(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test multiple mapping updates during processing.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    # First mapping\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n    output1, citations1 = process_tokens(processor, [\"[\", \"1\", \"]\"])\n    assert len(citations1) == 1\n    assert citations1[0].document_id == \"doc_1\"\n\n    # Second mapping\n    processor.update_citation_mapping({2: mock_search_docs[2]})\n    output2, citations2 = process_tokens(processor, [\"[\", \"2\", \"]\"])\n    assert len(citations2) == 1\n\n    # Try to update existing citation number (should be ignored due to duplicate filtering)\n    doc1_updated = create_test_search_doc(\n        document_id=\"doc_1_updated\", link=\"https://updated.com\"\n    )\n    processor.update_citation_mapping({1: doc1_updated})\n    output3, citations3 = process_tokens(processor, [\"[\", \"1\", \"]\"])\n    # No new citation because citation 1 already exists and was already cited\n    assert len(citations3) == 0\n    # Original doc_1 should still be mapped\n    assert processor.citation_to_doc[1].document_id == \"doc_1\"\n\n\n# ============================================================================\n# Integration Tests\n# ============================================================================\n\n\ndef test_full_conversation_flow(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test full conversation flow with multiple turns.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    # Turn 1: Add some documents\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n    output1, citations1 = process_tokens(\n        processor, [\"This is the first response [\", \"1\", \"] with citation.\"]\n    )\n    assert len(citations1) == 1\n\n    # Turn 2: Add more documents and continue\n    processor.update_citation_mapping({3: mock_search_docs[3], 4: mock_search_docs[4]})\n    output2, citations2 = process_tokens(\n        processor, [\"This is the second response [\", \"3\", \"][\", \"4\", \"].\"]\n    )\n    assert len(citations2) == 2\n\n    # Verify order - should be doc_1, doc_3, doc_4 (first citation order)\n    cited_docs = processor.get_cited_documents()\n    assert len(cited_docs) == 3  # doc_1, doc_3, doc_4 (doc_2 was never cited)\n    assert cited_docs[0].document_id == \"doc_1\"\n    assert cited_docs[1].document_id == \"doc_3\"\n    assert cited_docs[2].document_id == \"doc_4\"\n\n\ndef test_complex_text_mixed_citations_code_blocks(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test complex text with mixed citations and code blocks.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    tokens: list[str | None] = [\n        \"Here's some text [\",\n        \"1\",\n        \"] with a citation.\\n\",\n        \"```\\n\",\n        \"def example():\\n    print('code')\\n\",\n        \"```\\n\",\n        \"More text [\",\n        \"2\",\n        \", \",\n        \"3\",\n        \"] here.\",\n    ]\n    output, citations = process_tokens(processor, tokens)\n\n    # Citations should be formatted\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n    assert \"[[3]]()\" in output\n    assert \"```plaintext\" in output\n    assert len(citations) == 3\n\n\ndef test_real_world_citation_patterns(mock_search_docs: CitationMapping) -> None:\n    \"\"\"Test real-world citation patterns.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    # Simulate a realistic LLM response\n    tokens: list[str | None] = [\n        \"According to recent research [\",\n        \"1\",\n        \"], the findings suggest that \",\n        \"multiple studies [\",\n        \"2\",\n        \", \",\n        \"3\",\n        \"] have confirmed these results. \",\n        \"However, some researchers [\",\n        \"1\",\n        \"] have raised concerns.\",\n    ]\n    output, citations = process_tokens(processor, tokens)\n\n    # Citations should be formatted\n    assert \"[[1]](https://example.com/doc1)\" in output\n    assert \"[[2]](https://example.com/doc2)\" in output\n    assert \"[[3]]()\" in output\n    # Should have CitationInfo for doc_1, doc_2, doc_3 (doc_1 appears twice but only one CitationInfo)\n    assert len(citations) == 3\n    # Verify order\n    doc_ids = [c.document_id for c in citations]\n    assert \"doc_1\" in doc_ids\n    assert \"doc_2\" in doc_ids\n    assert \"doc_3\" in doc_ids\n\n\n# ============================================================================\n# get_next_citation_number Tests\n# ============================================================================\n\n\ndef test_get_next_citation_number_empty() -> None:\n    \"\"\"Test get_next_citation_number returns 1 when no citations exist.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    assert processor.get_next_citation_number() == 1\n\n\ndef test_get_next_citation_number_with_citations(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test get_next_citation_number returns max + 1 when citations exist.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    assert processor.get_next_citation_number() == 3\n\n\ndef test_get_next_citation_number_non_sequential(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test get_next_citation_number with non-sequential citation numbers.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 5: mock_search_docs[2], 10: mock_search_docs[3]}\n    )\n\n    # Should return max + 1 = 11\n    assert processor.get_next_citation_number() == 11\n\n\ndef test_project_files_then_search_tool_citations(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"\n    Test that project file citations don't conflict with search tool citations.\n\n    \"\"\"\n    processor = DynamicCitationProcessor()\n\n    # Simulate project files being added (numbered 1, 2, 3)\n    project_file_1 = create_test_search_doc(\n        document_id=\"project_file_1\",\n        link=None,\n        semantic_identifier=\"ProjectFile1.txt\",\n        source_type=DocumentSource.FILE,\n    )\n    project_file_2 = create_test_search_doc(\n        document_id=\"project_file_2\",\n        link=None,\n        semantic_identifier=\"ProjectFile2.txt\",\n        source_type=DocumentSource.FILE,\n    )\n    project_file_3 = create_test_search_doc(\n        document_id=\"project_file_3\",\n        link=None,\n        semantic_identifier=\"ProjectFile3.txt\",\n        source_type=DocumentSource.FILE,\n    )\n\n    processor.update_citation_mapping(\n        {1: project_file_1, 2: project_file_2, 3: project_file_3}\n    )\n\n    # Verify project files are registered\n    assert processor.get_next_citation_number() == 4\n    assert len(processor.citation_to_doc) == 3\n\n    # Simulate search tool results starting at the next available number (4)\n    starting_citation = processor.get_next_citation_number()\n    search_result_1 = mock_search_docs[1]  # Will be citation 4\n    search_result_2 = mock_search_docs[2]  # Will be citation 5\n\n    processor.update_citation_mapping(\n        {starting_citation: search_result_1, starting_citation + 1: search_result_2}\n    )\n\n    # Verify both project files and search results are registered\n    assert len(processor.citation_to_doc) == 5\n    assert processor.citation_to_doc[1].document_id == \"project_file_1\"\n    assert processor.citation_to_doc[2].document_id == \"project_file_2\"\n    assert processor.citation_to_doc[3].document_id == \"project_file_3\"\n    assert processor.citation_to_doc[4].document_id == \"doc_1\"\n    assert processor.citation_to_doc[5].document_id == \"doc_2\"\n\n    # Verify all citations work\n    output, citations = process_tokens(\n        processor,\n        [\n            \"Project [1], [2], [3] and search results [4], [5]\",\n        ],\n    )\n\n    assert \"[[1]]\" in output\n    assert \"[[2]]\" in output\n    assert \"[[3]]\" in output\n    assert \"[[4]](https://example.com/doc1)\" in output\n    assert \"[[5]](https://example.com/doc2)\" in output\n    assert len(citations) == 5\n\n\ndef test_adding_project_files_across_messages(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that adding more project files in subsequent messages works correctly.\n\n    Architecture note: Each message gets a fresh citation processor, so project files\n    always start from citation 1. Each message maintains its own independent citation\n    space, and old messages use their saved citation mappings for display.\n\n    This test simulates:\n    - Message 1: User has 3 project files + runs search\n    - Message 2: User adds 2 MORE project files (now 5 total) + runs search\n    Both messages should work independently without citation conflicts.\n    \"\"\"\n    # ===== MESSAGE 1: 3 project files + search =====\n    message1_processor = DynamicCitationProcessor()\n\n    # Add 3 project files (citations 1, 2, 3)\n    project_files_msg1 = {\n        1: create_test_search_doc(\n            document_id=\"project_file_1\", link=None, source_type=DocumentSource.FILE\n        ),\n        2: create_test_search_doc(\n            document_id=\"project_file_2\", link=None, source_type=DocumentSource.FILE\n        ),\n        3: create_test_search_doc(\n            document_id=\"project_file_3\", link=None, source_type=DocumentSource.FILE\n        ),\n    }\n    message1_processor.update_citation_mapping(project_files_msg1)\n\n    # Run search tool (citations 4, 5)\n    search_start_msg1 = message1_processor.get_next_citation_number()\n    assert search_start_msg1 == 4\n    message1_processor.update_citation_mapping(\n        {\n            4: mock_search_docs[1],\n            5: mock_search_docs[2],\n        }\n    )\n\n    # Verify Message 1 citations\n    assert len(message1_processor.citation_to_doc) == 5\n    assert message1_processor.citation_to_doc[1].document_id == \"project_file_1\"\n    assert message1_processor.citation_to_doc[4].document_id == \"doc_1\"\n\n    # ===== MESSAGE 2: 5 project files + search =====\n    # Fresh processor for new message (simulates new run_llm_loop() call)\n    message2_processor = DynamicCitationProcessor()\n\n    # Add 5 project files (citations 1, 2, 3, 4, 5) - includes 2 NEW files\n    project_files_msg2 = {\n        1: create_test_search_doc(\n            document_id=\"project_file_1\", link=None, source_type=DocumentSource.FILE\n        ),\n        2: create_test_search_doc(\n            document_id=\"project_file_2\", link=None, source_type=DocumentSource.FILE\n        ),\n        3: create_test_search_doc(\n            document_id=\"project_file_3\", link=None, source_type=DocumentSource.FILE\n        ),\n        4: create_test_search_doc(\n            document_id=\"project_file_4\", link=None, source_type=DocumentSource.FILE\n        ),  # NEW\n        5: create_test_search_doc(\n            document_id=\"project_file_5\", link=None, source_type=DocumentSource.FILE\n        ),  # NEW\n    }\n    message2_processor.update_citation_mapping(project_files_msg2)\n\n    # Run search tool (citations 6, 7)\n    search_start_msg2 = message2_processor.get_next_citation_number()\n    assert search_start_msg2 == 6  # Starts after 5 project files\n    message2_processor.update_citation_mapping(\n        {\n            6: mock_search_docs[3],\n            7: mock_search_docs[4],\n        }\n    )\n\n    # Verify Message 2 citations\n    assert len(message2_processor.citation_to_doc) == 7\n    assert message2_processor.citation_to_doc[1].document_id == \"project_file_1\"\n    assert message2_processor.citation_to_doc[4].document_id == \"project_file_4\"  # NEW\n    assert message2_processor.citation_to_doc[5].document_id == \"project_file_5\"  # NEW\n    assert message2_processor.citation_to_doc[6].document_id == \"doc_3\"\n\n    # Verify both messages maintain independent citation spaces\n    # Message 1: Citation 4 = search result (doc_1)\n    # Message 2: Citation 4 = project file (project_file_4)\n    # This is correct - each message has its own citation space\n    assert message1_processor.citation_to_doc[4].document_id == \"doc_1\"\n    assert message2_processor.citation_to_doc[4].document_id == \"project_file_4\"\n\n\n# ============================================================================\n# get_seen_citations Tests\n# ============================================================================\n\n\ndef test_get_seen_citations_empty() -> None:\n    \"\"\"Test get_seen_citations returns empty dict when no citations processed.\"\"\"\n    processor = DynamicCitationProcessor()\n\n    seen = processor.get_seen_citations()\n    assert seen == {}\n\n\ndef test_get_seen_citations_returns_correct_mapping(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test get_seen_citations returns correct citation number to SearchDoc mapping.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    process_tokens(processor, [\"[\", \"1\", \"][\", \"3\", \"]\"])  # Note: skipping [2]\n\n    seen = processor.get_seen_citations()\n    assert len(seen) == 2\n    assert 1 in seen\n    assert 3 in seen\n    assert 2 not in seen  # Citation 2 was never encountered\n    assert seen[1] == mock_search_docs[1]\n    assert seen[3] == mock_search_docs[3]\n\n\ndef test_get_seen_citations_accumulates_across_calls(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test get_seen_citations accumulates citations across multiple process_token calls.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping(\n        {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n    )\n\n    # First batch\n    process_tokens(processor, [\"[\", \"1\", \"]\"])\n    seen1 = processor.get_seen_citations()\n    assert len(seen1) == 1\n    assert 1 in seen1\n\n    # Second batch\n    process_tokens(processor, [\"[\", \"2\", \"]\"])\n    seen2 = processor.get_seen_citations()\n    assert len(seen2) == 2\n    assert 1 in seen2\n    assert 2 in seen2\n\n    # Third batch\n    process_tokens(processor, [\"[\", \"3\", \"]\"])\n    seen3 = processor.get_seen_citations()\n    assert len(seen3) == 3\n    assert 1 in seen3\n    assert 2 in seen3\n    assert 3 in seen3\n\n\ndef test_get_seen_citations_same_citation_multiple_times(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test that citing the same document multiple times only adds it once to seen_citations.\"\"\"\n    processor = DynamicCitationProcessor()\n    processor.update_citation_mapping({1: mock_search_docs[1]})\n\n    # Cite [1] multiple times\n    process_tokens(processor, [\"[\", \"1\", \"][\", \"1\", \"][\", \"1\", \"]\"])\n\n    seen = processor.get_seen_citations()\n    assert len(seen) == 1\n    assert seen[1] == mock_search_docs[1]\n\n\ndef test_get_seen_citations_with_remove_mode(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test get_seen_citations works correctly with REMOVE mode.\"\"\"\n    processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n    seen = processor.get_seen_citations()\n    assert len(seen) == 2\n    assert seen[1].document_id == \"doc_1\"\n    assert seen[2].document_id == \"doc_2\"\n\n\ndef test_seen_citations_vs_cited_documents(\n    mock_search_docs: CitationMapping,\n) -> None:\n    \"\"\"Test the difference between seen_citations and cited_documents.\n\n    seen_citations: citation number -> SearchDoc (tracks which citations were parsed)\n    cited_documents: list of SearchDocs in first-citation order (for CitationInfo emission)\n    \"\"\"\n    # With REMOVE mode, cited_documents won't be populated but seen_citations will be\n    processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n    processor.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n\n    process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n    # seen_citations should have both\n    seen = processor.get_seen_citations()\n    assert len(seen) == 2\n\n    # cited_documents should be empty (because citation_mode=REMOVE)\n    cited = processor.get_cited_documents()\n    assert len(cited) == 0\n\n    # Now test with HYPERLINK mode\n    processor2 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n    processor2.update_citation_mapping({1: mock_search_docs[1], 2: mock_search_docs[2]})\n    process_tokens(processor2, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n    # Both should be populated\n    seen2 = processor2.get_seen_citations()\n    assert len(seen2) == 2\n    cited2 = processor2.get_cited_documents()\n    assert len(cited2) == 2\n\n\n# ============================================================================\n# CitationMode Tests\n# ============================================================================\n\n\nclass TestCitationModeRemove:\n    \"\"\"Tests for CitationMode.REMOVE - citations are completely removed from output.\"\"\"\n\n    def test_remove_mode_removes_citations_from_output(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode removes citation markers from output.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n        # Citation should be completely removed\n        assert \"[1]\" not in output\n        assert \"[[1]]\" not in output\n        # Text should flow naturally\n        assert \"Text\" in output\n        assert \"here.\" in output\n        # No CitationInfo should be emitted\n        assert len(citations) == 0\n\n    def test_remove_mode_no_citation_info_emitted(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode does not emit CitationInfo objects.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"]\n        )\n\n        # All citations should be removed\n        assert \"[1]\" not in output\n        assert \"[2]\" not in output\n        assert \"[3]\" not in output\n        # No CitationInfo should be emitted\n        assert len(citations) == 0\n\n    def test_remove_mode_tracks_seen_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode still tracks seen citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        process_tokens(processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"])\n\n        # Seen citations should be tracked\n        seen = processor.get_seen_citations()\n        assert len(seen) == 3\n        assert 1 in seen\n        assert 2 in seen\n        assert 3 in seen\n        assert seen[1].document_id == \"doc_1\"\n\n    def test_remove_mode_handles_double_space(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode handles spacing correctly (no double spaces).\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Text [\", \"1\", \"] more text.\"])\n\n        # Should not have double space\n        assert \"Text  more\" not in output\n\n    def test_remove_mode_handles_punctuation_spacing(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode handles spacing before punctuation correctly.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Text [\", \"1\", \"].\"])\n\n        # Should not have space before period\n        assert \"Text .\" not in output\n\n    def test_remove_mode_with_multiple_citations_in_bracket(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with comma-separated citations [1, 2, 3].\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \", \", \"2\", \", \", \"3\", \"] end.\"]\n        )\n\n        # Citation should be removed\n        assert \"[1, 2, 3]\" not in output\n        # No CitationInfo emitted\n        assert len(citations) == 0\n        # But seen citations tracked\n        seen = processor.get_seen_citations()\n        assert len(seen) == 3\n\n    def test_remove_mode_with_unicode_brackets(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with unicode bracket citation 【1】.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text 【\", \"1\", \"】 here.\"])\n\n        # Unicode citation should be removed\n        assert \"【1】\" not in output\n        assert len(citations) == 0\n        assert len(processor.get_seen_citations()) == 1\n\n\nclass TestCitationModeKeepMarkers:\n    \"\"\"Tests for CitationMode.KEEP_MARKERS - original markers preserved unchanged.\"\"\"\n\n    def test_keep_markers_mode_preserves_original_citation(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS mode preserves original [1] format.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n        # Original citation format should be preserved\n        assert \"[1]\" in output\n        # Should NOT have markdown link format\n        assert \"[[1]](https://example.com/doc1)\" not in output\n        # No CitationInfo should be emitted\n        assert len(citations) == 0\n\n    def test_keep_markers_mode_no_citation_info_emitted(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS mode does not emit CitationInfo objects.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"]\n        )\n\n        # Original citations should be preserved\n        assert \"[1]\" in output\n        assert \"[2]\" in output\n        assert \"[3]\" in output\n        # No CitationInfo should be emitted\n        assert len(citations) == 0\n\n    def test_keep_markers_mode_tracks_seen_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS mode still tracks seen citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        process_tokens(processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"])\n\n        # Seen citations should be tracked\n        seen = processor.get_seen_citations()\n        assert len(seen) == 3\n        assert 1 in seen\n        assert 2 in seen\n        assert 3 in seen\n\n    def test_keep_markers_mode_with_double_brackets(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test KEEP_MARKERS mode with double bracket citation [[1]].\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [[\", \"1\", \"]] here.\"])\n\n        # Original double bracket format should be preserved\n        assert \"[[1]]\" in output\n        # Should NOT have markdown link format\n        assert \"[[1]](https://example.com/doc1)\" not in output\n        # No CitationInfo should be emitted\n        assert len(citations) == 0\n\n    def test_keep_markers_mode_with_comma_separated_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test KEEP_MARKERS mode with comma-separated citations [1, 2, 3].\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \", \", \"2\", \", \", \"3\", \"] end.\"]\n        )\n\n        # Original format should be preserved\n        assert \"[1, 2, 3]\" in output\n        # No CitationInfo emitted\n        assert len(citations) == 0\n        # But seen citations tracked\n        seen = processor.get_seen_citations()\n        assert len(seen) == 3\n\n    def test_keep_markers_mode_with_unicode_brackets(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test KEEP_MARKERS mode with unicode bracket citation 【1】.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text 【\", \"1\", \"】 here.\"])\n\n        # Original unicode bracket format should be preserved\n        assert \"【1】\" in output\n        assert len(citations) == 0\n        assert len(processor.get_seen_citations()) == 1\n\n    def test_keep_markers_mode_preserves_spacing(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS mode preserves text spacing naturally.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Text [\", \"1\", \"] more text.\"])\n\n        # Text should flow naturally with citation\n        assert \"Text [1] more text.\" in output or \"Text [1]more text.\" in output\n\n\nclass TestCitationModeHyperlink:\n    \"\"\"Tests for CitationMode.HYPERLINK - citations replaced with markdown links.\"\"\"\n\n    def test_hyperlink_mode_formats_citation_as_link(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that HYPERLINK mode formats citations as [[n]](url).\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n        # Should have markdown link format\n        assert \"[[1]](https://example.com/doc1)\" in output\n        # Original format should be replaced\n        assert \"Text [1]\" not in output or \"[[1]]\" in output\n        # CitationInfo should be emitted\n        assert len(citations) == 1\n        assert citations[0].citation_number == 1\n        assert citations[0].document_id == \"doc_1\"\n\n    def test_hyperlink_mode_emits_citation_info(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that HYPERLINK mode emits CitationInfo objects.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"]\"]\n        )\n\n        # All citations should be formatted\n        assert \"[[1]](https://example.com/doc1)\" in output\n        assert \"[[2]](https://example.com/doc2)\" in output\n        assert \"[[3]]()\" in output\n        # CitationInfo should be emitted for each\n        assert len(citations) == 3\n        citation_numbers = {c.citation_number for c in citations}\n        assert citation_numbers == {1, 2, 3}\n\n    def test_hyperlink_mode_tracks_seen_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that HYPERLINK mode tracks seen citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2]}\n        )\n\n        process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n        # Seen citations should be tracked\n        seen = processor.get_seen_citations()\n        assert len(seen) == 2\n        assert 1 in seen\n        assert 2 in seen\n\n    def test_hyperlink_mode_populates_cited_documents(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that HYPERLINK mode populates cited_documents in order.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        process_tokens(processor, [\"[\", \"3\", \"][\", \"1\", \"][\", \"2\", \"]\"])\n\n        # cited_documents should be populated in first-citation order\n        cited = processor.get_cited_documents()\n        assert len(cited) == 3\n        assert cited[0].document_id == \"doc_3\"\n        assert cited[1].document_id == \"doc_1\"\n        assert cited[2].document_id == \"doc_2\"\n\n    def test_hyperlink_mode_is_default(self, mock_search_docs: CitationMapping) -> None:\n        \"\"\"Test that HYPERLINK mode is the default behavior.\"\"\"\n        processor = DynamicCitationProcessor()  # No citation_mode specified\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"]\"])\n\n        # Should behave like HYPERLINK mode\n        assert \"[[1]](https://example.com/doc1)\" in output\n        assert len(citations) == 1\n\n\nclass TestCitationModesWithCodeBlocks:\n    \"\"\"Tests for citation modes behavior with code blocks.\"\"\"\n\n    def test_remove_mode_ignores_citations_in_code_block(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that REMOVE mode doesn't process citations inside code blocks.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        tokens: list[str | None] = [\n            \"Here's code:\\n```\\n\",\n            \"print('[1]')\\n\",\n            \"```\\n\",\n            \"End.\",\n        ]\n        output, citations = process_tokens(processor, tokens)\n\n        # Citation inside code block should be preserved\n        assert \"[1]\" in output\n        assert len(citations) == 0\n\n    def test_keep_markers_mode_ignores_citations_in_code_block(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS mode doesn't process citations inside code blocks.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        tokens: list[str | None] = [\n            \"Here's code:\\n```\\n\",\n            \"print('[1]')\\n\",\n            \"```\\n\",\n            \"End.\",\n        ]\n        output, citations = process_tokens(processor, tokens)\n\n        # Citation inside code block should be preserved\n        assert \"[1]\" in output\n        assert len(citations) == 0\n\n    def test_hyperlink_mode_ignores_citations_in_code_block(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that HYPERLINK mode doesn't process citations inside code blocks.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        tokens: list[str | None] = [\n            \"Here's code:\\n```\\n\",\n            \"print('[1]')\\n\",\n            \"```\\n\",\n            \"End.\",\n        ]\n        output, citations = process_tokens(processor, tokens)\n\n        # Citation inside code block should be preserved (not replaced with link)\n        assert \"[1]\" in output\n        # No CitationInfo emitted for citation in code block\n        assert len(citations) == 0\n\n\n# ============================================================================\n# Edge Case Tests\n# ============================================================================\n\n\nclass TestCitationModeEdgeCases:\n    \"\"\"Edge case tests for citation modes.\"\"\"\n\n    def test_remove_mode_citation_at_start_of_text(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode when citation is at the very start of text.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"[\", \"1\", \"] starts here.\"])\n\n        assert \"[1]\" not in output\n        assert \"starts here.\" in output\n        # Note: When citation is at start, the space after the citation is preserved\n        # This is expected behavior - the spacing logic handles trailing spaces before\n        # punctuation/space, but leading spaces after removed citations remain\n        assert len(citations) == 0\n\n    def test_remove_mode_citation_at_end_of_text(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode when citation is at the very end of text.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"ends here [\", \"1\", \"]\"])\n\n        assert \"[1]\" not in output\n        assert \"ends here\" in output\n        assert len(citations) == 0\n\n    def test_remove_mode_multiple_consecutive_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with multiple consecutive citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2], 3: mock_search_docs[3]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"] end.\"]\n        )\n\n        assert \"[1]\" not in output\n        assert \"[2]\" not in output\n        assert \"[3]\" not in output\n        assert \"Text\" in output\n        assert \"end.\" in output\n        # Should track all citations\n        assert len(processor.get_seen_citations()) == 3\n\n    def test_remove_mode_citation_followed_by_newline(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode when citation is followed by newline.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Text [\", \"1\", \"]\\nNew line.\"])\n\n        assert \"[1]\" not in output\n        assert \"Text\" in output\n        assert \"New line.\" in output\n\n    def test_remove_mode_only_citations_no_other_text(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode when text is only citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2]}\n        )\n\n        output, citations = process_tokens(processor, [\"[\", \"1\", \"][\", \"2\", \"]\"])\n\n        # Should still track citations even though output is mostly empty\n        assert len(processor.get_seen_citations()) == 2\n        assert len(citations) == 0\n\n    def test_keep_markers_mode_citation_at_start(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test KEEP_MARKERS mode when citation is at the start.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"[\", \"1\", \"] starts here.\"])\n\n        assert \"[1]\" in output\n        assert \"starts here.\" in output\n        assert len(citations) == 0\n\n    def test_hyperlink_mode_citation_with_special_chars_in_url(\n        self,\n        mock_search_docs: CitationMapping,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test HYPERLINK mode with special characters in URL.\"\"\"\n        special_doc = create_test_search_doc(\n            document_id=\"special_doc\",\n            link=\"https://example.com/doc?param=value&other=123#section\",\n        )\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: special_doc})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n        assert \"[[1]](https://example.com/doc?param=value&other=123#section)\" in output\n        assert len(citations) == 1\n\n    def test_hyperlink_mode_citation_with_no_url(\n        self,\n        mock_search_docs: CitationMapping,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test HYPERLINK mode when document has no URL.\"\"\"\n        no_url_doc = create_test_search_doc(\n            document_id=\"no_url_doc\",\n            link=None,\n        )\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: no_url_doc})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"1\", \"] here.\"])\n\n        # Should still format but with empty link\n        assert \"[[1]]()\" in output\n        assert len(citations) == 1\n\n    def test_all_modes_with_citation_in_parentheses(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test all modes with citation inside parentheses (see [1]).\"\"\"\n        for mode in [\n            CitationMode.REMOVE,\n            CitationMode.KEEP_MARKERS,\n            CitationMode.HYPERLINK,\n        ]:\n            processor = DynamicCitationProcessor(citation_mode=mode)\n            processor.update_citation_mapping({1: mock_search_docs[1]})\n\n            output, _ = process_tokens(processor, [\"(see [\", \"1\", \"])\"])\n\n            if mode == CitationMode.REMOVE:\n                assert \"[1]\" not in output\n            elif mode == CitationMode.KEEP_MARKERS:\n                assert \"[1]\" in output\n            else:  # HYPERLINK\n                assert \"[[1]]\" in output\n\n    def test_all_modes_with_citation_after_comma(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test all modes with citation after comma.\"\"\"\n        for mode in [\n            CitationMode.REMOVE,\n            CitationMode.KEEP_MARKERS,\n            CitationMode.HYPERLINK,\n        ]:\n            processor = DynamicCitationProcessor(citation_mode=mode)\n            processor.update_citation_mapping({1: mock_search_docs[1]})\n\n            output, _ = process_tokens(processor, [\"First,[\", \"1\", \"] second.\"])\n\n            if mode == CitationMode.REMOVE:\n                assert \"[1]\" not in output\n            elif mode == CitationMode.KEEP_MARKERS:\n                assert \"[1]\" in output\n            else:  # HYPERLINK\n                assert \"[[1]]\" in output\n\n    def test_remove_mode_handles_tab_character(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode handles tab character before citation.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Text\\t[\", \"1\", \"] more.\"])\n\n        assert \"[1]\" not in output\n        # Tab should be handled appropriately\n\n    def test_citation_number_zero(\n        self,\n        mock_search_docs: CitationMapping,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test handling of citation number 0.\"\"\"\n        zero_doc = create_test_search_doc(\n            document_id=\"zero_doc\", link=\"https://zero.com\"\n        )\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({0: zero_doc})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"0\", \"] here.\"])\n\n        assert \"[[0]](https://zero.com)\" in output\n        assert len(citations) == 1\n        assert citations[0].citation_number == 0\n\n    def test_large_citation_numbers(\n        self,\n        mock_search_docs: CitationMapping,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test handling of large citation numbers.\"\"\"\n        large_doc = create_test_search_doc(\n            document_id=\"large_doc\", link=\"https://large.com\"\n        )\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({9999: large_doc})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"9999\", \"] here.\"])\n\n        assert \"[[9999]](https://large.com)\" in output\n        assert len(citations) == 1\n        assert citations[0].citation_number == 9999\n\n    def test_negative_citation_number_not_processed(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that negative numbers in brackets are not processed as citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        # Negative numbers should not be treated as citations\n        output, citations = process_tokens(\n            processor, [\"Array index [-\", \"1\", \"] here.\"]\n        )\n\n        # Should not be processed as citation (no mapping for -1)\n        assert len(citations) == 0\n\n    def test_mixed_valid_invalid_citations_in_sequence(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test processing mix of valid and invalid citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 3: mock_search_docs[3]}\n        )\n\n        # Citation 2 is not in mapping\n        output, citations = process_tokens(\n            processor, [\"Text [\", \"1\", \"][\", \"2\", \"][\", \"3\", \"] end.\"]\n        )\n\n        # Should process 1 and 3, skip 2\n        assert \"[[1]]\" in output\n        assert \"[[3]]\" in output\n        assert len(citations) == 2\n        # 2 should not be in seen citations since it's not in mapping\n        seen = processor.get_seen_citations()\n        assert 1 in seen\n        assert 2 not in seen\n        assert 3 in seen\n\n    def test_empty_token_stream(self) -> None:\n        \"\"\"Test processing empty token stream.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n\n        output, citations = process_tokens(processor, [])\n\n        assert output == \"\"\n        assert len(citations) == 0\n\n    def test_only_none_token(self) -> None:\n        \"\"\"Test processing only None token (flush signal).\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n\n        output, citations = process_tokens(processor, [None])\n\n        assert output == \"\"\n        assert len(citations) == 0\n\n    def test_whitespace_only_tokens(self, mock_search_docs: CitationMapping) -> None:\n        \"\"\"Test processing whitespace-only tokens between citations.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping(\n            {1: mock_search_docs[1], 2: mock_search_docs[2]}\n        )\n\n        output, citations = process_tokens(\n            processor, [\"[\", \"1\", \"]\", \"   \", \"[\", \"2\", \"]\"]\n        )\n\n        assert \"[[1]]\" in output\n        assert \"[[2]]\" in output\n        assert len(citations) == 2\n\n    def test_unicode_text_around_citations(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test citations surrounded by unicode text.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(\n            processor, [\"日本語テキスト [\", \"1\", \"] 続きのテキスト\"]\n        )\n\n        assert \"[[1]]\" in output\n        assert \"日本語テキスト\" in output\n        assert \"続きのテキスト\" in output\n        assert len(citations) == 1\n\n    def test_emoji_around_citations(self, mock_search_docs: CitationMapping) -> None:\n        \"\"\"Test citations surrounded by emoji.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(\n            processor, [\"Great! 🎉 [\", \"1\", \"] Amazing! 🚀\"]\n        )\n\n        assert \"[[1]]\" in output\n        assert \"🎉\" in output\n        assert \"🚀\" in output\n        assert len(citations) == 1\n\n\nclass TestCitationModeWithDifferentProcessors:\n    \"\"\"Test using multiple processors with different modes.\"\"\"\n\n    def test_separate_processors_different_modes(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test using separate processors with different citation modes.\"\"\"\n        # Processor 1: HYPERLINK mode\n        processor1 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor1.update_citation_mapping({1: mock_search_docs[1]})\n        output1, citations1 = process_tokens(processor1, [\"Text [\", \"1\", \"]\"])\n        assert \"[[1]](https://example.com/doc1)\" in output1\n        assert len(citations1) == 1\n\n        # Processor 2: KEEP_MARKERS mode\n        processor2 = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor2.update_citation_mapping({1: mock_search_docs[1]})\n        output2, citations2 = process_tokens(processor2, [\"Text [\", \"1\", \"]\"])\n        assert \"[1]\" in output2\n        assert \"[[1]]\" not in output2\n        assert len(citations2) == 0\n\n        # Processor 3: REMOVE mode\n        processor3 = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor3.update_citation_mapping({1: mock_search_docs[1]})\n        output3, citations3 = process_tokens(processor3, [\"Text [\", \"1\", \"]\"])\n        assert \"[1]\" not in output3\n        assert len(citations3) == 0\n\n        # All should track seen citations\n        assert len(processor1.get_seen_citations()) == 1\n        assert len(processor2.get_seen_citations()) == 1\n        assert len(processor3.get_seen_citations()) == 1\n\n    def test_processors_do_not_share_state(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that separate processors do not share state.\"\"\"\n        processor1 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor1.update_citation_mapping({1: mock_search_docs[1]})\n        process_tokens(processor1, [\"[\", \"1\", \"]\"])\n\n        processor2 = DynamicCitationProcessor(citation_mode=CitationMode.HYPERLINK)\n        processor2.update_citation_mapping({2: mock_search_docs[2]})\n        process_tokens(processor2, [\"[\", \"2\", \"]\"])\n\n        # Each processor should only have its own citations\n        assert 1 in processor1.get_seen_citations()\n        assert 2 not in processor1.get_seen_citations()\n        assert 2 in processor2.get_seen_citations()\n        assert 1 not in processor2.get_seen_citations()\n\n\nclass TestRemoveModeSpacingEdgeCases:\n    \"\"\"Detailed spacing edge cases for REMOVE mode.\"\"\"\n\n    def test_remove_mode_citation_between_sentences(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation between sentences.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(\n            processor, [\"First sentence. [\", \"1\", \"] Second sentence.\"]\n        )\n\n        assert \"[1]\" not in output\n        assert \"First sentence.\" in output\n        assert \"Second sentence.\" in output\n\n    def test_remove_mode_citation_before_question_mark(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation before question mark.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Is this true [\", \"1\", \"]?\"])\n\n        assert \"[1]\" not in output\n        # Should not have space before question mark\n        assert \"true ?\" not in output\n\n    def test_remove_mode_citation_before_exclamation(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation before exclamation mark.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"Amazing [\", \"1\", \"]!\"])\n\n        assert \"[1]\" not in output\n        # Should not have space before exclamation\n        assert \"Amazing !\" not in output\n\n    def test_remove_mode_citation_before_semicolon(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation before semicolon.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"First part [\", \"1\", \"]; second part.\"])\n\n        assert \"[1]\" not in output\n        # Should not have space before semicolon\n        assert \"part ;\" not in output\n\n    def test_remove_mode_citation_before_closing_paren(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation before closing parenthesis.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"(see this [\", \"1\", \"])\"])\n\n        assert \"[1]\" not in output\n        # Should not have space before closing paren\n        assert \"this )\" not in output\n\n    def test_remove_mode_citation_before_closing_bracket(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test REMOVE mode with citation before closing bracket.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, _ = process_tokens(processor, [\"[see this [\", \"1\", \"]]\"])\n\n        assert \"[[1]]\" not in output\n\n\nclass TestKeepMarkersEdgeCases:\n    \"\"\"Edge cases specific to KEEP_MARKERS mode.\"\"\"\n\n    def test_keep_markers_exact_text_preservation(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test that KEEP_MARKERS preserves exact original text.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        original_text = \"The result [1] shows improvement.\"\n        tokens: list[str | None] = list(\n            original_text\n        )  # Split into individual characters\n        output, _ = process_tokens(processor, tokens)\n\n        # Should preserve the exact text\n        assert \"[1]\" in output\n\n    def test_keep_markers_with_citation_not_in_mapping(\n        self, mock_search_docs: CitationMapping\n    ) -> None:\n        \"\"\"Test KEEP_MARKERS with citation number not in mapping.\"\"\"\n        processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)\n        processor.update_citation_mapping({1: mock_search_docs[1]})\n\n        output, citations = process_tokens(processor, [\"Text [\", \"99\", \"] here.\"])\n\n        # Citation 99 is not in mapping, but text should still be preserved\n        # (behavior depends on implementation - citation may be kept or removed)\n        assert len(citations) == 0\n        # Should not be in seen citations\n        assert 99 not in processor.get_seen_citations()\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_citation_utils.py",
    "content": "\"\"\"\nUnit tests for citation_utils module.\n\nThis module tests the collapse_citations function which renumbers citations\nin text to use the smallest possible numbers while respecting existing mappings.\n\"\"\"\n\nfrom datetime import datetime\n\nfrom onyx.chat.citation_processor import CitationMapping\nfrom onyx.chat.citation_utils import collapse_citations\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SearchDoc\n\n\n# ============================================================================\n# Helper Functions\n# ============================================================================\n\n\ndef create_test_search_doc(\n    document_id: str = \"test-doc-1\",\n    link: str | None = \"https://example.com/doc1\",\n    chunk_ind: int = 0,\n    semantic_identifier: str = \"Test Document\",\n    blurb: str = \"Test blurb\",\n    source_type: DocumentSource = DocumentSource.WEB,\n    boost: int = 1,\n    hidden: bool = False,\n    metadata: dict | None = None,\n    score: float | None = None,\n    match_highlights: list[str] | None = None,\n) -> SearchDoc:\n    \"\"\"Create a test SearchDoc instance with default or custom values.\"\"\"\n    return SearchDoc(\n        document_id=document_id,\n        chunk_ind=chunk_ind,\n        semantic_identifier=semantic_identifier,\n        link=link,\n        blurb=blurb,\n        source_type=source_type,\n        boost=boost,\n        hidden=hidden,\n        metadata=metadata or {},\n        score=score,\n        match_highlights=match_highlights or [],\n        updated_at=datetime.now(),\n    )\n\n\n# ============================================================================\n# Basic Functionality Tests\n# ============================================================================\n\n\nclass TestCollapseCitationsBasic:\n    \"\"\"Basic functionality tests for collapse_citations.\"\"\"\n\n    def test_empty_text_and_mappings(self) -> None:\n        \"\"\"Test with empty text and empty mappings.\"\"\"\n        text, mapping = collapse_citations(\"\", {}, {})\n        assert text == \"\"\n        assert mapping == {}\n\n    def test_text_without_citations(self) -> None:\n        \"\"\"Test text without any citations remains unchanged.\"\"\"\n        input_text = \"This is some text without any citations.\"\n        text, mapping = collapse_citations(input_text, {}, {})\n        assert text == input_text\n        assert mapping == {}\n\n    def test_empty_existing_mapping_starts_from_one(self) -> None:\n        \"\"\"Test that with empty existing mapping, new citations start from 1.\"\"\"\n        doc1 = create_test_search_doc(document_id=\"doc_50\")\n        doc2 = create_test_search_doc(document_id=\"doc_60\")\n        new_mapping: CitationMapping = {50: doc1, 60: doc2}\n\n        text, mapping = collapse_citations(\"See [50] and [60].\", {}, new_mapping)\n\n        # Should start from 1 when existing mapping is empty\n        assert text == \"See [1] and [2].\"\n        assert set(mapping.keys()) == {1, 2}\n        assert mapping[1].document_id == \"doc_50\"\n        assert mapping[2].document_id == \"doc_60\"\n\n    def test_single_citation_no_existing(self) -> None:\n        \"\"\"Test collapsing a single citation with no existing mappings.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\"See [25] for details.\", {}, new_mapping)\n\n        assert text == \"See [1] for details.\"\n        assert 1 in mapping\n        assert mapping[1].document_id == \"doc_25\"\n        assert len(mapping) == 1\n\n    def test_multiple_citations_no_existing(self) -> None:\n        \"\"\"Test collapsing multiple citations with no existing mappings.\"\"\"\n        doc1 = create_test_search_doc(document_id=\"doc_100\")\n        doc2 = create_test_search_doc(document_id=\"doc_200\")\n        doc3 = create_test_search_doc(document_id=\"doc_300\")\n        new_mapping: CitationMapping = {100: doc1, 200: doc2, 300: doc3}\n\n        text, mapping = collapse_citations(\n            \"See [100], [200], and [300].\", {}, new_mapping\n        )\n\n        assert text == \"See [1], [2], and [3].\"\n        assert mapping[1].document_id == \"doc_100\"\n        assert mapping[2].document_id == \"doc_200\"\n        assert mapping[3].document_id == \"doc_300\"\n        assert len(mapping) == 3\n\n\nclass TestCollapseCitationsWithExisting:\n    \"\"\"Tests for collapse_citations with existing citation mappings.\"\"\"\n\n    def test_continues_from_existing_mapping(self) -> None:\n        \"\"\"Test that new citations start from the next available number.\"\"\"\n        existing_doc = create_test_search_doc(document_id=\"existing_doc\")\n        existing_mapping: CitationMapping = {1: existing_doc}\n\n        new_doc = create_test_search_doc(document_id=\"new_doc\")\n        new_mapping: CitationMapping = {50: new_doc}\n\n        text, mapping = collapse_citations(\n            \"See [50] for more.\", existing_mapping, new_mapping\n        )\n\n        assert text == \"See [2] for more.\"\n        assert 1 in mapping\n        assert 2 in mapping\n        assert mapping[1].document_id == \"existing_doc\"\n        assert mapping[2].document_id == \"new_doc\"\n        assert len(mapping) == 2\n\n    def test_reuses_existing_citation_for_same_document(self) -> None:\n        \"\"\"Test that citations to existing documents use the existing number.\"\"\"\n        doc = create_test_search_doc(document_id=\"shared_doc\")\n        existing_mapping: CitationMapping = {1: doc}\n\n        # Same document referenced with a different citation number\n        new_doc = create_test_search_doc(document_id=\"shared_doc\")\n        new_mapping: CitationMapping = {50: new_doc}\n\n        text, mapping = collapse_citations(\n            \"See [50] again.\", existing_mapping, new_mapping\n        )\n\n        assert text == \"See [1] again.\"\n        assert len(mapping) == 1\n        assert mapping[1].document_id == \"shared_doc\"\n\n    def test_mixed_existing_and_new_documents(self) -> None:\n        \"\"\"Test with a mix of existing and new documents.\"\"\"\n        existing_doc1 = create_test_search_doc(document_id=\"doc_a\")\n        existing_doc2 = create_test_search_doc(document_id=\"doc_b\")\n        existing_mapping: CitationMapping = {1: existing_doc1, 2: existing_doc2}\n\n        # 30 refers to existing doc_a, 31 is new, 32 refers to existing doc_b\n        new_doc_a = create_test_search_doc(document_id=\"doc_a\")\n        new_doc_c = create_test_search_doc(document_id=\"doc_c\")\n        new_doc_b = create_test_search_doc(document_id=\"doc_b\")\n        new_mapping: CitationMapping = {30: new_doc_a, 31: new_doc_c, 32: new_doc_b}\n\n        text, mapping = collapse_citations(\n            \"Refs: [30], [31], [32].\", existing_mapping, new_mapping\n        )\n\n        # [30] -> [1] (doc_a exists as 1)\n        # [31] -> [3] (doc_c is new, next available)\n        # [32] -> [2] (doc_b exists as 2)\n        assert text == \"Refs: [1], [3], [2].\"\n        assert len(mapping) == 3\n        assert mapping[1].document_id == \"doc_a\"\n        assert mapping[2].document_id == \"doc_b\"\n        assert mapping[3].document_id == \"doc_c\"\n\n    def test_existing_mapping_unchanged(self) -> None:\n        \"\"\"Test that existing mapping values are not modified.\"\"\"\n        existing_doc = create_test_search_doc(\n            document_id=\"existing\", link=\"https://existing.com\"\n        )\n        existing_mapping: CitationMapping = {5: existing_doc}\n\n        new_doc = create_test_search_doc(document_id=\"new_doc\")\n        new_mapping: CitationMapping = {100: new_doc}\n\n        text, mapping = collapse_citations(\"[100]\", existing_mapping, new_mapping)\n\n        # Existing mapping should be preserved with its original key\n        assert 5 in mapping\n        assert mapping[5].document_id == \"existing\"\n        assert mapping[5].link == \"https://existing.com\"\n        # New citation should get next available number (6)\n        assert 6 in mapping\n        assert mapping[6].document_id == \"new_doc\"\n\n\nclass TestCollapseCitationsMultipleCitations:\n    \"\"\"Tests for multiple citation formats and edge cases.\"\"\"\n\n    def test_same_citation_multiple_times(self) -> None:\n        \"\"\"Test the same citation appearing multiple times in text.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\n            \"[25] says X. Also [25] says Y.\", {}, new_mapping\n        )\n\n        assert text == \"[1] says X. Also [1] says Y.\"\n        assert len(mapping) == 1\n        assert mapping[1].document_id == \"doc_25\"\n\n    def test_comma_separated_citations(self) -> None:\n        \"\"\"Test comma-separated citations like [1, 2, 3].\"\"\"\n        doc1 = create_test_search_doc(document_id=\"doc_10\")\n        doc2 = create_test_search_doc(document_id=\"doc_20\")\n        new_mapping: CitationMapping = {10: doc1, 20: doc2}\n\n        text, mapping = collapse_citations(\"[10, 20]\", {}, new_mapping)\n\n        assert text == \"[1, 2]\"\n        assert len(mapping) == 2\n\n    def test_double_bracket_citations(self) -> None:\n        \"\"\"Test double bracket citations like [[25]].\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\"See [[25]] for info.\", {}, new_mapping)\n\n        assert text == \"See [[1]] for info.\"\n        assert mapping[1].document_id == \"doc_25\"\n\n    def test_same_doc_different_old_numbers(self) -> None:\n        \"\"\"Test same document appearing with different citation numbers.\"\"\"\n        doc = create_test_search_doc(document_id=\"same_doc\")\n        # Same document with two different citation numbers\n        new_mapping: CitationMapping = {\n            50: doc,\n            60: create_test_search_doc(document_id=\"same_doc\"),\n        }\n\n        text, mapping = collapse_citations(\"[50] and [60]\", {}, new_mapping)\n\n        # Both should map to the same new number\n        assert text == \"[1] and [1]\"\n        assert len(mapping) == 1\n        assert mapping[1].document_id == \"same_doc\"\n\n\nclass TestCollapseCitationsUnicodeBrackets:\n    \"\"\"Tests for unicode bracket variants.\"\"\"\n\n    def test_unicode_brackets_chinese(self) -> None:\n        \"\"\"Test Chinese-style brackets 【】.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\"See 【25】 for details.\", {}, new_mapping)\n\n        assert text == \"See 【1】 for details.\"\n        assert mapping[1].document_id == \"doc_25\"\n\n    def test_unicode_brackets_fullwidth(self) -> None:\n        \"\"\"Test fullwidth brackets ［］.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\"See ［25］ for details.\", {}, new_mapping)\n\n        assert text == \"See ［1］ for details.\"\n        assert mapping[1].document_id == \"doc_25\"\n\n    def test_double_unicode_brackets(self) -> None:\n        \"\"\"Test double unicode brackets 【【25】】.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        text, mapping = collapse_citations(\"See 【【25】】 for info.\", {}, new_mapping)\n\n        assert text == \"See 【【1】】 for info.\"\n        assert mapping[1].document_id == \"doc_25\"\n\n\nclass TestCollapseCitationsEdgeCases:\n    \"\"\"Edge case tests for collapse_citations.\"\"\"\n\n    def test_citation_not_in_mapping(self) -> None:\n        \"\"\"Test citations in text that aren't in the new mapping are preserved.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_25\")\n        new_mapping: CitationMapping = {25: doc}\n\n        # [99] is not in the mapping, should remain unchanged\n        text, mapping = collapse_citations(\"[25] and [99]\", {}, new_mapping)\n\n        assert text == \"[1] and [99]\"\n        assert len(mapping) == 1\n\n    def test_non_sequential_existing_mapping(self) -> None:\n        \"\"\"Test with non-sequential existing mapping numbers.\"\"\"\n        existing_mapping: CitationMapping = {\n            5: create_test_search_doc(document_id=\"doc_5\"),\n            10: create_test_search_doc(document_id=\"doc_10\"),\n        }\n\n        new_doc = create_test_search_doc(document_id=\"new_doc\")\n        new_mapping: CitationMapping = {99: new_doc}\n\n        text, mapping = collapse_citations(\"[99]\", existing_mapping, new_mapping)\n\n        # Next available should be max(5, 10) + 1 = 11\n        assert text == \"[11]\"\n        assert 5 in mapping\n        assert 10 in mapping\n        assert 11 in mapping\n        assert len(mapping) == 3\n\n    def test_preserves_text_around_citations(self) -> None:\n        \"\"\"Test that text around citations is preserved exactly.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_1\")\n        new_mapping: CitationMapping = {100: doc}\n\n        input_text = \"According to the source [100], this is true.\\n\\nNext paragraph.\"\n        text, mapping = collapse_citations(input_text, {}, new_mapping)\n\n        assert text == \"According to the source [1], this is true.\\n\\nNext paragraph.\"\n\n    def test_citation_at_start_of_text(self) -> None:\n        \"\"\"Test citation at the very start of text.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_1\")\n        new_mapping: CitationMapping = {50: doc}\n\n        text, mapping = collapse_citations(\"[50] is the answer.\", {}, new_mapping)\n\n        assert text == \"[1] is the answer.\"\n\n    def test_citation_at_end_of_text(self) -> None:\n        \"\"\"Test citation at the very end of text.\"\"\"\n        doc = create_test_search_doc(document_id=\"doc_1\")\n        new_mapping: CitationMapping = {50: doc}\n\n        text, mapping = collapse_citations(\"The answer is [50]\", {}, new_mapping)\n\n        assert text == \"The answer is [1]\"\n\n    def test_adjacent_citations(self) -> None:\n        \"\"\"Test citations immediately adjacent to each other.\"\"\"\n        doc1 = create_test_search_doc(document_id=\"doc_1\")\n        doc2 = create_test_search_doc(document_id=\"doc_2\")\n        new_mapping: CitationMapping = {50: doc1, 60: doc2}\n\n        text, mapping = collapse_citations(\"[50][60]\", {}, new_mapping)\n\n        assert text == \"[1][2]\"\n\n    def test_empty_new_mapping_with_existing(self) -> None:\n        \"\"\"Test with existing mapping but no new citations to process.\"\"\"\n        existing_doc = create_test_search_doc(document_id=\"existing\")\n        existing_mapping: CitationMapping = {1: existing_doc}\n\n        text, mapping = collapse_citations(\"No citations here.\", existing_mapping, {})\n\n        assert text == \"No citations here.\"\n        assert mapping == existing_mapping\n\n\nclass TestCollapseCitationsOrdering:\n    \"\"\"Tests for citation ordering behavior.\"\"\"\n\n    def test_assigns_numbers_in_order_of_appearance(self) -> None:\n        \"\"\"Test that new numbers are assigned based on order in new_mapping iteration.\"\"\"\n        doc1 = create_test_search_doc(document_id=\"doc_a\")\n        doc2 = create_test_search_doc(document_id=\"doc_b\")\n        doc3 = create_test_search_doc(document_id=\"doc_c\")\n        # Note: dict order is preserved in Python 3.7+\n        new_mapping: CitationMapping = {300: doc1, 100: doc2, 200: doc3}\n\n        text, mapping = collapse_citations(\"[300] [100] [200]\", {}, new_mapping)\n\n        # The mapping iteration order determines assignment:\n        # 300 -> 1 (first in new_mapping)\n        # 100 -> 2 (second in new_mapping)\n        # 200 -> 3 (third in new_mapping)\n        assert mapping[1].document_id == \"doc_a\"\n        assert mapping[2].document_id == \"doc_b\"\n        assert mapping[3].document_id == \"doc_c\"\n        assert text == \"[1] [2] [3]\"\n\n    def test_multiple_existing_citations_preserved(self) -> None:\n        \"\"\"Test that all existing citations are preserved in output mapping.\"\"\"\n        existing_mapping: CitationMapping = {\n            1: create_test_search_doc(document_id=\"doc_1\"),\n            2: create_test_search_doc(document_id=\"doc_2\"),\n            3: create_test_search_doc(document_id=\"doc_3\"),\n        }\n\n        new_doc = create_test_search_doc(document_id=\"new_doc\")\n        new_mapping: CitationMapping = {99: new_doc}\n\n        text, mapping = collapse_citations(\"[99]\", existing_mapping, new_mapping)\n\n        assert text == \"[4]\"\n        # All existing plus the new one\n        assert len(mapping) == 4\n        assert mapping[1].document_id == \"doc_1\"\n        assert mapping[2].document_id == \"doc_2\"\n        assert mapping[3].document_id == \"doc_3\"\n        assert mapping[4].document_id == \"new_doc\"\n\n\nclass TestCollapseCitationsComplexScenarios:\n    \"\"\"Complex real-world scenario tests.\"\"\"\n\n    def test_research_agent_scenario(self) -> None:\n        \"\"\"Test a realistic research agent scenario with multiple tool calls.\"\"\"\n        # First search returned citations 1-5\n        existing_mapping: CitationMapping = {\n            1: create_test_search_doc(document_id=\"wiki_python\"),\n            2: create_test_search_doc(document_id=\"docs_typing\"),\n            3: create_test_search_doc(document_id=\"blog_best_practices\"),\n        }\n\n        # Second search returned citations starting at 100 (to avoid conflicts)\n        # Some docs are the same as before\n        new_mapping: CitationMapping = {\n            100: create_test_search_doc(document_id=\"wiki_python\"),  # Same as 1\n            101: create_test_search_doc(document_id=\"new_tutorial\"),  # New\n            102: create_test_search_doc(document_id=\"docs_typing\"),  # Same as 2\n            103: create_test_search_doc(document_id=\"another_new\"),  # New\n        }\n\n        text, mapping = collapse_citations(\n            \"According to [100] and [101], also see [102] and [103].\",\n            existing_mapping,\n            new_mapping,\n        )\n\n        # [100] -> [1] (wiki_python exists as 1)\n        # [101] -> [4] (new_tutorial is new, next after 3)\n        # [102] -> [2] (docs_typing exists as 2)\n        # [103] -> [5] (another_new is new)\n        assert text == \"According to [1] and [4], also see [2] and [5].\"\n        assert len(mapping) == 5\n        assert mapping[1].document_id == \"wiki_python\"\n        assert mapping[2].document_id == \"docs_typing\"\n        assert mapping[3].document_id == \"blog_best_practices\"\n        assert mapping[4].document_id == \"new_tutorial\"\n        assert mapping[5].document_id == \"another_new\"\n\n    def test_long_text_with_many_citations(self) -> None:\n        \"\"\"Test processing longer text with many citations.\"\"\"\n        # Create docs for citations 50-55\n        new_mapping: CitationMapping = {\n            i: create_test_search_doc(document_id=f\"doc_{i}\") for i in range(50, 56)\n        }\n\n        text = \"\"\"\n        This is a comprehensive document with multiple citations.\n\n        First, we discuss [50] which provides background information.\n        Then [51] and [52] offer contrasting viewpoints.\n\n        The middle section references [53] extensively, as seen here [53].\n\n        Finally, [54] and [55] conclude the analysis. Note that [50]\n        is referenced again for context.\n        \"\"\"\n\n        result_text, mapping = collapse_citations(text, {}, new_mapping)\n\n        # All 50-55 should be collapsed to 1-6\n        assert \"[1]\" in result_text\n        assert \"[2]\" in result_text\n        assert \"[3]\" in result_text\n        assert \"[4]\" in result_text\n        assert \"[5]\" in result_text\n        assert \"[6]\" in result_text\n        # Original numbers should not appear\n        assert \"[50]\" not in result_text\n        assert \"[51]\" not in result_text\n        assert len(mapping) == 6\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_compression.py",
    "content": "\"\"\"Unit tests for chat history compression module.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.chat.compression import _build_llm_messages_for_summarization\nfrom onyx.chat.compression import find_summary_for_branch\nfrom onyx.chat.compression import generate_summary\nfrom onyx.chat.compression import get_compression_params\nfrom onyx.chat.compression import get_messages_to_summarize\nfrom onyx.chat.compression import SummaryContent\nfrom onyx.configs.constants import MessageType\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import SystemMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.prompts.compression_prompts import PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK\nfrom onyx.prompts.compression_prompts import PROGRESSIVE_USER_REMINDER\nfrom onyx.prompts.compression_prompts import SUMMARIZATION_CUTOFF_MARKER\nfrom onyx.prompts.compression_prompts import SUMMARIZATION_PROMPT\nfrom onyx.prompts.compression_prompts import USER_REMINDER\n\n# Base time for generating sequential timestamps\nBASE_TIME = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)\n\n\ndef create_mock_message(\n    id: int,\n    message: str,\n    token_count: int,\n    message_type: MessageType = MessageType.USER,\n    chat_session_id: int = 1,\n    parent_message_id: int | None = None,\n    last_summarized_message_id: int | None = None,\n    tool_calls: list | None = None,\n) -> MagicMock:\n    \"\"\"Create a mock ChatMessage for testing.\"\"\"\n    mock = MagicMock()\n    mock.id = id\n    mock.message = message\n    mock.token_count = token_count\n    mock.message_type = message_type\n    mock.chat_session_id = chat_session_id\n    mock.parent_message_id = parent_message_id\n    mock.last_summarized_message_id = last_summarized_message_id\n    mock.tool_calls = tool_calls\n    # Generate time_sent based on id for chronological ordering\n    mock.time_sent = BASE_TIME + timedelta(minutes=id)\n    return mock\n\n\ndef test_no_compression_when_under_threshold() -> None:\n    \"\"\"Should not compress when history is under threshold.\"\"\"\n    result = get_compression_params(\n        max_input_tokens=10000,\n        current_history_tokens=1000,\n        reserved_tokens=2000,\n    )\n    assert result.should_compress is False\n\n\ndef test_compression_triggered_when_over_threshold() -> None:\n    \"\"\"Should compress when history exceeds threshold.\"\"\"\n    result = get_compression_params(\n        max_input_tokens=10000,\n        current_history_tokens=7000,\n        reserved_tokens=2000,\n    )\n    assert result.should_compress is True\n    assert result.tokens_for_recent > 0\n\n\ndef test_get_messages_returns_summary_content() -> None:\n    \"\"\"Should return SummaryContent with correct structure.\"\"\"\n    messages = [\n        create_mock_message(1, \"msg1\", 100),\n        create_mock_message(2, \"msg2\", 100),\n    ]\n    result = get_messages_to_summarize(\n        chat_history=messages,  # type: ignore[arg-type]\n        existing_summary=None,\n        tokens_for_recent=50,\n    )\n\n    assert isinstance(result, SummaryContent)\n    assert hasattr(result, \"older_messages\")\n    assert hasattr(result, \"recent_messages\")\n\n\ndef test_messages_after_summary_cutoff_only() -> None:\n    \"\"\"Should only include messages after existing summary cutoff.\"\"\"\n    messages = [\n        create_mock_message(1, \"already summarized\", 100),\n        create_mock_message(2, \"also summarized\", 100),\n        create_mock_message(3, \"new message\", 100),\n    ]\n    existing_summary = MagicMock()\n    existing_summary.last_summarized_message_id = 2\n\n    result = get_messages_to_summarize(\n        chat_history=messages,  # type: ignore[arg-type]\n        existing_summary=existing_summary,\n        tokens_for_recent=50,\n    )\n\n    all_ids = [m.id for m in result.older_messages + result.recent_messages]\n    assert 1 not in all_ids\n    assert 2 not in all_ids\n    assert 3 in all_ids\n\n\ndef test_no_summary_considers_all_messages() -> None:\n    \"\"\"Without existing summary, all messages should be considered.\"\"\"\n    messages = [\n        create_mock_message(1, \"msg1\", 100),\n        create_mock_message(2, \"msg2\", 100),\n        create_mock_message(3, \"msg3\", 100),\n    ]\n\n    result = get_messages_to_summarize(\n        chat_history=messages,  # type: ignore[arg-type]\n        existing_summary=None,\n        tokens_for_recent=50,\n    )\n\n    all_ids = [m.id for m in result.older_messages + result.recent_messages]\n    assert len(all_ids) == 3\n\n\ndef test_empty_messages_filtered_out() -> None:\n    \"\"\"Messages with empty content should be filtered out.\"\"\"\n    messages = [\n        create_mock_message(1, \"has content\", 100),\n        create_mock_message(2, \"\", 0),\n        create_mock_message(3, \"also has content\", 100),\n    ]\n\n    result = get_messages_to_summarize(\n        chat_history=messages,  # type: ignore[arg-type]\n        existing_summary=None,\n        tokens_for_recent=50,\n    )\n\n    all_messages = result.older_messages + result.recent_messages\n    assert len(all_messages) == 2\n\n\ndef test_empty_history_returns_empty() -> None:\n    \"\"\"Should return empty lists for empty history.\"\"\"\n    result = get_messages_to_summarize(\n        chat_history=[],\n        existing_summary=None,\n        tokens_for_recent=100,\n    )\n    assert result.older_messages == []\n    assert result.recent_messages == []\n\n\ndef test_find_summary_for_branch_returns_matching_branch() -> None:\n    \"\"\"Should return summary whose parent_message_id is in current branch.\"\"\"\n    branch_history = [\n        create_mock_message(1, \"msg1\", 100),\n        create_mock_message(2, \"msg2\", 100),\n        create_mock_message(3, \"msg3\", 100),\n    ]\n\n    matching_summary = create_mock_message(\n        id=100,\n        message=\"Summary of conversation\",\n        token_count=50,\n        parent_message_id=3,\n        last_summarized_message_id=2,\n    )\n\n    mock_db = MagicMock()\n    mock_db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [\n        matching_summary\n    ]\n\n    result = find_summary_for_branch(mock_db, branch_history)  # type: ignore[arg-type]\n\n    assert result == matching_summary\n\n\ndef test_find_summary_for_branch_ignores_other_branch() -> None:\n    \"\"\"Should not return summary from a different branch.\"\"\"\n    # Branch B has messages 1, 2, 6, 7 (diverged after message 2)\n    branch_b_history = [\n        create_mock_message(1, \"msg1\", 100),\n        create_mock_message(2, \"msg2\", 100),\n        create_mock_message(6, \"branch b msg1\", 100),\n        create_mock_message(7, \"branch b msg2\", 100),\n    ]\n\n    # Summary was created on branch A (parent_message_id=5 is NOT in branch B)\n    other_branch_summary = create_mock_message(\n        id=100,\n        message=\"Summary from branch A\",\n        token_count=50,\n        parent_message_id=5,\n        last_summarized_message_id=4,\n    )\n\n    mock_db = MagicMock()\n    mock_db.query.return_value.filter.return_value.order_by.return_value.all.return_value = [\n        other_branch_summary\n    ]\n\n    result = find_summary_for_branch(mock_db, branch_b_history)  # type: ignore[arg-type]\n\n    assert result is None\n\n\ndef test_cutoff_always_before_user_message() -> None:\n    \"\"\"Cutoff should always be placed right before a user message.\n\n    If token budget would place the cutoff between tool calls or assistant messages,\n    it should be moved to right before the next user message.\n    \"\"\"\n    messages = [\n        create_mock_message(1, \"user question\", 100, MessageType.USER),\n        create_mock_message(2, \"assistant uses tool\", 100, MessageType.ASSISTANT),\n        create_mock_message(3, \"tool response\", 100, MessageType.TOOL_CALL_RESPONSE),\n        create_mock_message(4, \"assistant continues\", 100, MessageType.ASSISTANT),\n        create_mock_message(5, \"user follow up\", 100, MessageType.USER),\n        create_mock_message(6, \"final answer\", 100, MessageType.ASSISTANT),\n    ]\n\n    # Token budget that would normally cut between messages 3 and 4\n    # (keeping ~300 tokens = messages 4, 5, 6)\n    result = get_messages_to_summarize(\n        chat_history=messages,  # type: ignore[arg-type]\n        existing_summary=None,\n        tokens_for_recent=300,\n    )\n\n    # recent_messages should start with user message (5), not assistant (4)\n    assert result.recent_messages[0].message_type == MessageType.USER\n    assert result.recent_messages[0].id == 5\n\n    # Messages 1, 2, 4 should be in older_messages (to be summarized)\n    # Note: message 3 (TOOL_CALL_RESPONSE) has content so it's included\n    older_ids = [m.id for m in result.older_messages]\n    assert 1 in older_ids\n    assert 2 in older_ids\n    assert 4 in older_ids\n\n\ndef test__build_llm_messages_for_summarization_user_messages() -> None:\n    \"\"\"User messages should be converted to UserMessage objects.\"\"\"\n    messages = [\n        create_mock_message(1, \"Hello\", 10, MessageType.USER),\n        create_mock_message(2, \"How are you?\", 15, MessageType.USER),\n    ]\n\n    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]\n\n    assert len(result) == 2\n    assert all(isinstance(m, UserMessage) for m in result)\n    assert result[0].content == \"Hello\"\n    assert result[1].content == \"How are you?\"\n\n\ndef test__build_llm_messages_for_summarization_assistant_messages() -> None:\n    \"\"\"Assistant messages should be converted to AssistantMessage objects.\"\"\"\n    messages = [\n        create_mock_message(1, \"I'm doing great!\", 20, MessageType.ASSISTANT),\n    ]\n\n    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]\n\n    assert len(result) == 1\n    assert isinstance(result[0], AssistantMessage)\n    assert result[0].content == \"I'm doing great!\"\n\n\ndef test__build_llm_messages_for_summarization_tool_calls() -> None:\n    \"\"\"Assistant messages with tool calls should be formatted compactly.\"\"\"\n    mock_tool_call = MagicMock()\n    mock_tool_call.tool_id = 1\n    msg = create_mock_message(\n        1, \"Using tool\", 20, MessageType.ASSISTANT, tool_calls=[mock_tool_call]\n    )\n\n    tool_id_to_name = {1: \"search\"}\n\n    result = _build_llm_messages_for_summarization([msg], tool_id_to_name)\n\n    assert len(result) == 1\n    assert isinstance(result[0], AssistantMessage)\n    assert result[0].content == \"[Used tools: search]\"\n\n\ndef test__build_llm_messages_for_summarization_skips_tool_responses() -> None:\n    \"\"\"Tool response messages should be skipped.\"\"\"\n    messages = [\n        create_mock_message(1, \"User question\", 10, MessageType.USER),\n        create_mock_message(\n            2, \"Tool response data\", 50, MessageType.TOOL_CALL_RESPONSE\n        ),\n        create_mock_message(3, \"Assistant answer\", 20, MessageType.ASSISTANT),\n    ]\n\n    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]\n\n    assert len(result) == 2\n    assert isinstance(result[0], UserMessage)\n    assert isinstance(result[1], AssistantMessage)\n\n\ndef test__build_llm_messages_for_summarization_skips_empty() -> None:\n    \"\"\"Empty messages should be skipped.\"\"\"\n    messages = [\n        create_mock_message(1, \"Has content\", 10, MessageType.USER),\n        create_mock_message(2, \"\", 0, MessageType.USER),\n        create_mock_message(3, \"Also has content\", 10, MessageType.ASSISTANT),\n    ]\n\n    result = _build_llm_messages_for_summarization(messages, {})  # type: ignore[arg-type]\n\n    assert len(result) == 2\n\n\ndef test_generate_summary_initial_system_prompt() -> None:\n    \"\"\"Initial summarization should use SUMMARIZATION_PROMPT as system prompt.\"\"\"\n    older_messages = [\n        create_mock_message(1, \"User msg\", 10, MessageType.USER),\n        create_mock_message(2, \"Assistant reply\", 10, MessageType.ASSISTANT),\n    ]\n    recent_messages = [\n        create_mock_message(3, \"Recent user msg\", 10, MessageType.USER),\n    ]\n\n    mock_llm = MagicMock()\n    mock_response = MagicMock()\n    mock_response.choice.message.content = \"Summary of conversation\"\n    mock_llm.invoke.return_value = mock_response\n\n    with patch(\"onyx.chat.compression.llm_generation_span\"):\n        result = generate_summary(\n            older_messages=older_messages,  # type: ignore[arg-type]\n            recent_messages=recent_messages,  # type: ignore[arg-type]\n            llm=mock_llm,\n            tool_id_to_name={},\n            existing_summary=None,\n        )\n\n    assert result == \"Summary of conversation\"\n\n    # Check the messages passed to the LLM\n    call_args = mock_llm.invoke.call_args[0][0]\n\n    # First message should be SystemMessage with just SUMMARIZATION_PROMPT\n    assert isinstance(call_args[0], SystemMessage)\n    assert call_args[0].content == SUMMARIZATION_PROMPT\n\n    # Should have separate user/assistant messages, not a single concatenated string\n    user_messages = [m for m in call_args if isinstance(m, UserMessage)]\n    assistant_messages = [m for m in call_args if isinstance(m, AssistantMessage)]\n\n    # Should have: older user msg, cutoff marker, recent user msg, final reminder\n    assert len(user_messages) >= 3  # At least: older user, cutoff, reminder\n    assert len(assistant_messages) >= 1  # At least: older assistant\n\n    # Final message should be the reminder\n    assert isinstance(call_args[-1], UserMessage)\n    assert call_args[-1].content == USER_REMINDER\n\n\ndef test_generate_summary_progressive_system_prompt() -> None:\n    \"\"\"Progressive summarization should append PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK to system prompt.\"\"\"\n    older_messages = [\n        create_mock_message(1, \"User msg\", 10, MessageType.USER),\n    ]\n    recent_messages = [\n        create_mock_message(2, \"Recent msg\", 10, MessageType.USER),\n    ]\n    existing_summary = \"Previous conversation summary\"\n\n    mock_llm = MagicMock()\n    mock_response = MagicMock()\n    mock_response.choice.message.content = \"Updated summary\"\n    mock_llm.invoke.return_value = mock_response\n\n    with patch(\"onyx.chat.compression.llm_generation_span\"):\n        result = generate_summary(\n            older_messages=older_messages,  # type: ignore[arg-type]\n            recent_messages=recent_messages,  # type: ignore[arg-type]\n            llm=mock_llm,\n            tool_id_to_name={},\n            existing_summary=existing_summary,\n        )\n\n    assert result == \"Updated summary\"\n\n    # Check the messages passed to the LLM\n    call_args = mock_llm.invoke.call_args[0][0]\n\n    # First message should be SystemMessage with SUMMARIZATION_PROMPT + PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK\n    assert isinstance(call_args[0], SystemMessage)\n    expected_system = (\n        SUMMARIZATION_PROMPT\n        + PROGRESSIVE_SUMMARY_SYSTEM_PROMPT_BLOCK.format(\n            previous_summary=existing_summary\n        )\n    )\n    assert call_args[0].content == expected_system\n\n    # Final message should be PROGRESSIVE_USER_REMINDER\n    assert isinstance(call_args[-1], UserMessage)\n    assert call_args[-1].content == PROGRESSIVE_USER_REMINDER\n\n\ndef test_generate_summary_cutoff_marker_as_separate_message() -> None:\n    \"\"\"Cutoff marker should be sent as a separate UserMessage.\"\"\"\n    older_messages = [\n        create_mock_message(1, \"User msg\", 10, MessageType.USER),\n    ]\n    recent_messages = [\n        create_mock_message(2, \"Recent msg\", 10, MessageType.USER),\n    ]\n\n    mock_llm = MagicMock()\n    mock_response = MagicMock()\n    mock_response.choice.message.content = \"Summary\"\n    mock_llm.invoke.return_value = mock_response\n\n    with patch(\"onyx.chat.compression.llm_generation_span\"):\n        generate_summary(\n            older_messages=older_messages,  # type: ignore[arg-type]\n            recent_messages=recent_messages,  # type: ignore[arg-type]\n            llm=mock_llm,\n            tool_id_to_name={},\n            existing_summary=None,\n        )\n\n    call_args = mock_llm.invoke.call_args[0][0]\n\n    # Find the cutoff marker message\n    cutoff_messages = [\n        m\n        for m in call_args\n        if isinstance(m, UserMessage) and SUMMARIZATION_CUTOFF_MARKER in str(m.content)\n    ]\n    assert len(cutoff_messages) == 1\n    assert cutoff_messages[0].content == SUMMARIZATION_CUTOFF_MARKER\n\n\ndef test_generate_summary_messages_are_separate() -> None:\n    \"\"\"Messages should be sent as separate objects, not concatenated into one string.\"\"\"\n    older_messages = [\n        create_mock_message(1, \"First user message\", 10, MessageType.USER),\n        create_mock_message(2, \"First assistant reply\", 10, MessageType.ASSISTANT),\n        create_mock_message(3, \"Second user message\", 10, MessageType.USER),\n    ]\n    recent_messages = [\n        create_mock_message(4, \"Recent message\", 10, MessageType.USER),\n    ]\n\n    mock_llm = MagicMock()\n    mock_response = MagicMock()\n    mock_response.choice.message.content = \"Summary\"\n    mock_llm.invoke.return_value = mock_response\n\n    with patch(\"onyx.chat.compression.llm_generation_span\"):\n        generate_summary(\n            older_messages=older_messages,  # type: ignore[arg-type]\n            recent_messages=recent_messages,  # type: ignore[arg-type]\n            llm=mock_llm,\n            tool_id_to_name={},\n            existing_summary=None,\n        )\n\n    call_args = mock_llm.invoke.call_args[0][0]\n\n    # Should have multiple messages, not just 2 (SystemMessage + single UserMessage)\n    assert len(call_args) > 2\n\n    # Count message types\n    system_count = sum(1 for m in call_args if isinstance(m, SystemMessage))\n    user_count = sum(1 for m in call_args if isinstance(m, UserMessage))\n    assistant_count = sum(1 for m in call_args if isinstance(m, AssistantMessage))\n\n    assert system_count == 1  # One system message\n    # 3 older user messages + 1 cutoff + 1 recent + 1 reminder = at least 3 user messages\n    assert user_count >= 3\n    assert assistant_count >= 1  # At least one assistant message from older_messages\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_context_files.py",
    "content": "\"\"\"Tests for the unified context file extraction logic (Phase 5).\n\nCovers:\n- resolve_context_user_files: precedence rule (custom persona supersedes project)\n- extract_context_files: all-or-nothing context window fit check\n- Search filter / search_usage determination in the caller\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import UUID\nfrom uuid import uuid4\n\nfrom onyx.chat.models import ExtractedContextFiles\nfrom onyx.chat.process_message import determine_search_params\nfrom onyx.chat.process_message import extract_context_files\nfrom onyx.chat.process_message import resolve_context_user_files\nfrom onyx.configs.constants import DEFAULT_PERSONA_ID\nfrom onyx.db.models import UserFile\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.tools.models import SearchToolUsage\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _make_user_file(\n    token_count: int = 100,\n    name: str = \"file.txt\",\n    file_id: str | None = None,\n) -> UserFile:\n    file_uuid = UUID(file_id) if file_id else uuid4()\n    return UserFile(\n        id=file_uuid,\n        file_id=str(file_uuid),\n        name=name,\n        token_count=token_count,\n    )\n\n\ndef _make_persona(\n    persona_id: int,\n    user_files: list | None = None,\n) -> MagicMock:\n    persona = MagicMock()\n    persona.id = persona_id\n    persona.user_files = user_files or []\n    return persona\n\n\ndef _make_in_memory_file(\n    file_id: str,\n    content: str = \"hello world\",\n    file_type: ChatFileType = ChatFileType.PLAIN_TEXT,\n    filename: str = \"file.txt\",\n) -> InMemoryChatFile:\n    return InMemoryChatFile(\n        file_id=file_id,\n        content=content.encode(\"utf-8\"),\n        file_type=file_type,\n        filename=filename,\n    )\n\n\n# ===========================================================================\n# resolve_context_user_files\n# ===========================================================================\n\n\nclass TestResolveContextUserFiles:\n    \"\"\"Precedence rule: custom persona fully supersedes project.\"\"\"\n\n    def test_custom_persona_with_files_returns_persona_files(self) -> None:\n        persona_files = [_make_user_file(), _make_user_file()]\n        persona = _make_persona(persona_id=42, user_files=persona_files)\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session\n        )\n\n        assert result == persona_files\n\n    def test_custom_persona_without_files_returns_empty(self) -> None:\n        \"\"\"Custom persona with no files should NOT fall through to project.\"\"\"\n        persona = _make_persona(persona_id=42, user_files=[])\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session\n        )\n\n        assert result == []\n\n    def test_custom_persona_none_files_returns_empty(self) -> None:\n        \"\"\"Custom persona with user_files=None should NOT fall through.\"\"\"\n        persona = _make_persona(persona_id=42, user_files=None)\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session\n        )\n\n        assert result == []\n\n    @patch(\"onyx.chat.process_message.get_user_files_from_project\")\n    def test_default_persona_in_project_returns_project_files(\n        self, mock_get_files: MagicMock\n    ) -> None:\n        project_files = [_make_user_file(), _make_user_file()]\n        mock_get_files.return_value = project_files\n        persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)\n        user_id = uuid4()\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=99, user_id=user_id, db_session=db_session\n        )\n\n        assert result == project_files\n        mock_get_files.assert_called_once_with(\n            project_id=99, user_id=user_id, db_session=db_session\n        )\n\n    def test_default_persona_no_project_returns_empty(self) -> None:\n        persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=None, user_id=uuid4(), db_session=db_session\n        )\n\n        assert result == []\n\n    @patch(\"onyx.chat.process_message.get_user_files_from_project\")\n    def test_custom_persona_without_files_ignores_project(\n        self, mock_get_files: MagicMock\n    ) -> None:\n        \"\"\"Even with a project_id, custom persona means project is invisible.\"\"\"\n        persona = _make_persona(persona_id=7, user_files=[])\n        db_session = MagicMock()\n\n        result = resolve_context_user_files(\n            persona=persona, project_id=99, user_id=uuid4(), db_session=db_session\n        )\n\n        assert result == []\n        mock_get_files.assert_not_called()\n\n\n# ===========================================================================\n# extract_context_files\n# ===========================================================================\n\n\nclass TestExtractContextFiles:\n    \"\"\"All-or-nothing context window fit check.\"\"\"\n\n    def test_empty_user_files_returns_empty(self) -> None:\n        db_session = MagicMock()\n        result = extract_context_files(\n            user_files=[],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=db_session,\n        )\n        assert result.file_texts == []\n        assert result.image_files == []\n        assert result.use_as_search_filter is False\n        assert result.uncapped_token_count is None\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_files_fit_in_context_are_loaded(self, mock_load: MagicMock) -> None:\n        file_id = str(uuid4())\n        uf = _make_user_file(token_count=100, file_id=file_id)\n        mock_load.return_value = [\n            _make_in_memory_file(file_id=file_id, content=\"file content\")\n        ]\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.file_texts == [\"file content\"]\n        assert result.use_as_search_filter is False\n        assert result.total_token_count == 100\n        assert len(result.file_metadata) == 1\n        assert result.file_metadata[0].file_id == file_id\n\n    def test_files_overflow_context_not_loaded(self) -> None:\n        \"\"\"When aggregate tokens exceed 60% of available window, nothing is loaded.\"\"\"\n        uf = _make_user_file(token_count=7000)\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.file_texts == []\n        assert result.image_files == []\n        assert result.use_as_search_filter is True\n        assert result.uncapped_token_count == 7000\n        assert result.total_token_count == 0\n\n    def test_overflow_boundary_exact(self) -> None:\n        \"\"\"Token count exactly at the 60% boundary should trigger overflow.\"\"\"\n        # Available = (10000 - 0) * 0.6 = 6000. Tokens = 6000 → >= threshold.\n        uf = _make_user_file(token_count=6000)\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is True\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_just_under_boundary_loads(self, mock_load: MagicMock) -> None:\n        \"\"\"Token count just under the 60% boundary should load files.\"\"\"\n        file_id = str(uuid4())\n        uf = _make_user_file(token_count=5999, file_id=file_id)\n        mock_load.return_value = [_make_in_memory_file(file_id=file_id, content=\"data\")]\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is False\n        assert result.file_texts == [\"data\"]\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_multiple_files_aggregate_check(self, mock_load: MagicMock) -> None:\n        \"\"\"Multiple small files that individually fit but collectively overflow.\"\"\"\n        files = [_make_user_file(token_count=2500) for _ in range(3)]\n        # 3 * 2500 = 7500 > 6000 threshold\n\n        result = extract_context_files(\n            user_files=files,\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is True\n        assert result.file_texts == []\n        mock_load.assert_not_called()\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_reserved_tokens_reduce_available_space(self, mock_load: MagicMock) -> None:\n        \"\"\"Reserved tokens shrink the available window.\"\"\"\n        file_id = str(uuid4())\n        uf = _make_user_file(token_count=3000, file_id=file_id)\n        # Available = (10000 - 5000) * 0.6 = 3000. Tokens = 3000 → overflow.\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=5000,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is True\n        mock_load.assert_not_called()\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_image_files_are_extracted(self, mock_load: MagicMock) -> None:\n        file_id = str(uuid4())\n        uf = _make_user_file(token_count=50, file_id=file_id)\n        mock_load.return_value = [\n            InMemoryChatFile(\n                file_id=file_id,\n                content=b\"\\x89PNG\",\n                file_type=ChatFileType.IMAGE,\n                filename=\"photo.png\",\n            )\n        ]\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert len(result.image_files) == 1\n        assert result.image_files[0].file_id == file_id\n        assert result.file_texts == []\n        assert result.total_token_count == 50\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_tool_metadata_file_id_matches_chat_history_file_id(\n        self, mock_load: MagicMock\n    ) -> None:\n        \"\"\"The file_id in tool metadata (from extract_context_files) and the\n        file_id in chat history messages (from build_file_context) must\n        agree, otherwise the LLM sees different IDs for the same file across\n        turns.\n\n        In production, UserFile.id (UUID PK) differs from UserFile.file_id\n        (file-store path). Both pathways should produce the same file_id\n        (UserFile.id) for FileReaderTool.\"\"\"\n        from onyx.chat.chat_utils import build_file_context\n\n        user_file_uuid = uuid4()\n        file_store_path = f\"user_files/{user_file_uuid}/data.csv\"\n\n        uf = UserFile(\n            id=user_file_uuid,\n            file_id=file_store_path,\n            name=\"data.csv\",\n            token_count=100,\n            file_type=\"text/csv\",\n        )\n\n        in_memory = InMemoryChatFile(\n            file_id=file_store_path,\n            content=b\"col1,col2\\na,b\",\n            file_type=ChatFileType.TABULAR,\n            filename=\"data.csv\",\n        )\n\n        mock_load.return_value = [in_memory]\n\n        # Pathway 1: extract_context_files (project/persona context)\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n        assert len(result.file_metadata_for_tool) == 1\n        tool_metadata_file_id = result.file_metadata_for_tool[0].file_id\n\n        # Pathway 2: build_file_context (chat history path)\n        # In convert_chat_history, tool_file_id comes from\n        # file_descriptor[\"user_file_id\"], which is str(UserFile.id)\n        ctx = build_file_context(\n            tool_file_id=str(user_file_uuid),\n            filename=\"data.csv\",\n            file_type=ChatFileType.TABULAR,\n        )\n        chat_history_file_id = ctx.tool_metadata.file_id\n\n        # Both pathways must produce the same ID for the LLM\n        assert tool_metadata_file_id == chat_history_file_id, (\n            f\"File ID mismatch: extract_context_files uses '{tool_metadata_file_id}' \"\n            f\"but build_file_context uses '{chat_history_file_id}'.\"\n        )\n\n    @patch(\"onyx.chat.process_message.DISABLE_VECTOR_DB\", True)\n    def test_overflow_with_vector_db_disabled_provides_tool_metadata(self) -> None:\n        \"\"\"When vector DB is disabled, overflow produces FileToolMetadata.\"\"\"\n        uf = _make_user_file(token_count=7000, name=\"bigfile.txt\")\n\n        result = extract_context_files(\n            user_files=[uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is False\n        assert len(result.file_metadata_for_tool) == 1\n        assert result.file_metadata_for_tool[0].filename == \"bigfile.txt\"\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_metadata_only_files_not_counted_in_aggregate_tokens(\n        self, mock_load: MagicMock\n    ) -> None:\n        \"\"\"Metadata-only files (TABULAR) should not count toward the token budget.\"\"\"\n        text_file_id = str(uuid4())\n        text_uf = _make_user_file(token_count=100, file_id=text_file_id)\n        # TABULAR file with large token count — should be excluded from aggregate\n        tabular_uf = _make_user_file(\n            token_count=50000, name=\"huge.xlsx\", file_id=str(uuid4())\n        )\n        tabular_uf.file_type = (\n            \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n        )\n\n        mock_load.return_value = [\n            _make_in_memory_file(file_id=text_file_id, content=\"text content\"),\n            InMemoryChatFile(\n                file_id=str(tabular_uf.id),\n                content=b\"binary xlsx\",\n                file_type=ChatFileType.TABULAR,\n                filename=\"huge.xlsx\",\n            ),\n        ]\n\n        result = extract_context_files(\n            user_files=[text_uf, tabular_uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        # Text file fits (100 < 6000), so files should be loaded\n        assert result.file_texts == [\"text content\"]\n        # TABULAR file should appear as tool metadata, not in file_texts\n        assert len(result.file_metadata_for_tool) == 1\n        assert result.file_metadata_for_tool[0].filename == \"huge.xlsx\"\n\n    @patch(\"onyx.chat.process_message.load_in_memory_chat_files\")\n    def test_metadata_only_files_loaded_as_tool_metadata(\n        self, mock_load: MagicMock\n    ) -> None:\n        \"\"\"When files fit, metadata-only files appear in file_metadata_for_tool.\"\"\"\n        text_file_id = str(uuid4())\n        tabular_file_id = str(uuid4())\n        text_uf = _make_user_file(token_count=100, file_id=text_file_id)\n        tabular_uf = _make_user_file(\n            token_count=500, name=\"data.csv\", file_id=tabular_file_id\n        )\n        tabular_uf.file_type = \"text/csv\"\n\n        mock_load.return_value = [\n            _make_in_memory_file(file_id=text_file_id, content=\"hello\"),\n            InMemoryChatFile(\n                file_id=tabular_file_id,\n                content=b\"col1,col2\\na,b\",\n                file_type=ChatFileType.TABULAR,\n                filename=\"data.csv\",\n            ),\n        ]\n\n        result = extract_context_files(\n            user_files=[text_uf, tabular_uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.file_texts == [\"hello\"]\n        assert len(result.file_metadata_for_tool) == 1\n        assert result.file_metadata_for_tool[0].filename == \"data.csv\"\n        # TABULAR should not appear in file_metadata (that's for citation)\n        assert all(m.filename != \"data.csv\" for m in result.file_metadata)\n\n    def test_overflow_with_vector_db_preserves_metadata_only_tool_metadata(\n        self,\n    ) -> None:\n        \"\"\"When text files overflow with vector DB enabled, metadata-only files\n        should still be exposed via file_metadata_for_tool since they aren't\n        in the vector DB and would otherwise be inaccessible.\"\"\"\n        text_uf = _make_user_file(token_count=7000, name=\"bigfile.txt\")\n        tabular_uf = _make_user_file(token_count=500, name=\"data.xlsx\")\n        tabular_uf.file_type = (\n            \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n        )\n\n        result = extract_context_files(\n            user_files=[text_uf, tabular_uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        # Text files overflow → search filter enabled\n        assert result.use_as_search_filter is True\n        assert result.file_texts == []\n        # TABULAR file should still be in tool metadata\n        assert len(result.file_metadata_for_tool) == 1\n        assert result.file_metadata_for_tool[0].filename == \"data.xlsx\"\n\n    @patch(\"onyx.chat.process_message.DISABLE_VECTOR_DB\", True)\n    def test_overflow_no_vector_db_includes_all_files_in_tool_metadata(self) -> None:\n        \"\"\"When vector DB is disabled and files overflow, all files\n        (both text and metadata-only) appear in file_metadata_for_tool.\"\"\"\n        text_uf = _make_user_file(token_count=7000, name=\"bigfile.txt\")\n        tabular_uf = _make_user_file(token_count=500, name=\"data.xlsx\")\n        tabular_uf.file_type = (\n            \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\"\n        )\n\n        result = extract_context_files(\n            user_files=[text_uf, tabular_uf],\n            llm_max_context_window=10000,\n            reserved_token_count=0,\n            db_session=MagicMock(),\n        )\n\n        assert result.use_as_search_filter is False\n        assert len(result.file_metadata_for_tool) == 2\n        filenames = {m.filename for m in result.file_metadata_for_tool}\n        assert filenames == {\"bigfile.txt\", \"data.xlsx\"}\n\n\n# ===========================================================================\n# Search filter + search_usage determination\n# ===========================================================================\n\n\nclass TestSearchFilterDetermination:\n    \"\"\"Verify that determine_search_params correctly resolves\n    project_id_filter, persona_id_filter, and search_usage based on\n    the extraction result and the precedence rule.\n    \"\"\"\n\n    @staticmethod\n    def _make_context(\n        use_as_search_filter: bool = False,\n        file_texts: list[str] | None = None,\n        uncapped_token_count: int | None = None,\n    ) -> ExtractedContextFiles:\n        return ExtractedContextFiles(\n            file_texts=file_texts or [],\n            image_files=[],\n            use_as_search_filter=use_as_search_filter,\n            total_token_count=0,\n            file_metadata=[],\n            uncapped_token_count=uncapped_token_count,\n        )\n\n    def test_custom_persona_files_fit_no_filter(self) -> None:\n        \"\"\"Custom persona, files fit → no search filter, AUTO.\"\"\"\n        result = determine_search_params(\n            persona_id=42,\n            project_id=99,\n            extracted_context_files=self._make_context(\n                file_texts=[\"content\"],\n                uncapped_token_count=100,\n            ),\n        )\n        assert result.project_id_filter is None\n        assert result.persona_id_filter is None\n        assert result.search_usage == SearchToolUsage.AUTO\n\n    def test_custom_persona_files_overflow_persona_filter(self) -> None:\n        \"\"\"Custom persona, files overflow → persona_id filter, AUTO.\"\"\"\n        result = determine_search_params(\n            persona_id=42,\n            project_id=99,\n            extracted_context_files=self._make_context(use_as_search_filter=True),\n        )\n        assert result.persona_id_filter == 42\n        assert result.project_id_filter is None\n        assert result.search_usage == SearchToolUsage.AUTO\n\n    def test_custom_persona_no_files_no_project_leak(self) -> None:\n        \"\"\"Custom persona (no files) in project → nothing leaks from project.\"\"\"\n        result = determine_search_params(\n            persona_id=42,\n            project_id=99,\n            extracted_context_files=self._make_context(),\n        )\n        assert result.project_id_filter is None\n        assert result.persona_id_filter is None\n        assert result.search_usage == SearchToolUsage.AUTO\n\n    def test_default_persona_project_files_fit_disables_search(self) -> None:\n        \"\"\"Default persona, project files fit → DISABLED.\"\"\"\n        result = determine_search_params(\n            persona_id=DEFAULT_PERSONA_ID,\n            project_id=99,\n            extracted_context_files=self._make_context(\n                file_texts=[\"content\"],\n                uncapped_token_count=100,\n            ),\n        )\n        assert result.project_id_filter is None\n        assert result.search_usage == SearchToolUsage.DISABLED\n\n    def test_default_persona_project_files_overflow_enables_search(self) -> None:\n        \"\"\"Default persona, project files overflow → ENABLED + project_id filter.\"\"\"\n        result = determine_search_params(\n            persona_id=DEFAULT_PERSONA_ID,\n            project_id=99,\n            extracted_context_files=self._make_context(\n                use_as_search_filter=True,\n                uncapped_token_count=7000,\n            ),\n        )\n        assert result.project_id_filter == 99\n        assert result.persona_id_filter is None\n        assert result.search_usage == SearchToolUsage.ENABLED\n\n    def test_default_persona_no_project_auto(self) -> None:\n        \"\"\"Default persona, no project → AUTO.\"\"\"\n        result = determine_search_params(\n            persona_id=DEFAULT_PERSONA_ID,\n            project_id=None,\n            extracted_context_files=self._make_context(),\n        )\n        assert result.project_id_filter is None\n        assert result.search_usage == SearchToolUsage.AUTO\n\n    def test_default_persona_project_no_files_disables_search(self) -> None:\n        \"\"\"Default persona in project with no files → DISABLED.\"\"\"\n        result = determine_search_params(\n            persona_id=DEFAULT_PERSONA_ID,\n            project_id=99,\n            extracted_context_files=self._make_context(),\n        )\n        assert result.search_usage == SearchToolUsage.DISABLED\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_emitter.py",
    "content": "\"\"\"Unit tests for the Emitter class.\n\nAll tests use the streaming mode (merged_queue required). Emitter has a single\ncode path — no standalone bus.\n\"\"\"\n\nimport queue\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _placement(\n    turn_index: int = 0,\n    tab_index: int = 0,\n    sub_turn_index: int | None = None,\n) -> Placement:\n    return Placement(\n        turn_index=turn_index,\n        tab_index=tab_index,\n        sub_turn_index=sub_turn_index,\n    )\n\n\ndef _packet(\n    turn_index: int = 0,\n    tab_index: int = 0,\n    sub_turn_index: int | None = None,\n) -> Packet:\n    \"\"\"Build a minimal valid packet with an OverallStop payload.\"\"\"\n    return Packet(\n        placement=_placement(turn_index, tab_index, sub_turn_index),\n        obj=OverallStop(stop_reason=\"test\"),\n    )\n\n\ndef _make_emitter(model_idx: int = 0) -> tuple[\"Emitter\", \"queue.Queue\"]:\n    \"\"\"Return (emitter, queue) wired together.\"\"\"\n    mq: queue.Queue = queue.Queue()\n    return Emitter(merged_queue=mq, model_idx=model_idx), mq\n\n\n# ---------------------------------------------------------------------------\n# Queue routing\n# ---------------------------------------------------------------------------\n\n\nclass TestEmitterQueueRouting:\n    def test_emit_lands_on_merged_queue(self) -> None:\n        emitter, mq = _make_emitter()\n        emitter.emit(_packet())\n        assert not mq.empty()\n\n    def test_queue_item_is_tuple_of_key_and_packet(self) -> None:\n        emitter, mq = _make_emitter(model_idx=1)\n        emitter.emit(_packet())\n        item = mq.get_nowait()\n        assert isinstance(item, tuple)\n        assert len(item) == 2\n\n    def test_multiple_packets_delivered_fifo(self) -> None:\n        emitter, mq = _make_emitter()\n        p1 = _packet(turn_index=0)\n        p2 = _packet(turn_index=1)\n        emitter.emit(p1)\n        emitter.emit(p2)\n        _, t1 = mq.get_nowait()\n        _, t2 = mq.get_nowait()\n        assert t1.placement.turn_index == 0\n        assert t2.placement.turn_index == 1\n\n\n# ---------------------------------------------------------------------------\n# model_index tagging\n# ---------------------------------------------------------------------------\n\n\nclass TestEmitterModelIndexTagging:\n    def test_n1_default_model_idx_tags_model_index_zero(self) -> None:\n        \"\"\"N=1: default model_idx=0, so packet gets model_index=0.\"\"\"\n        emitter, mq = _make_emitter(model_idx=0)\n        emitter.emit(_packet())\n        _key, tagged = mq.get_nowait()\n        assert tagged.placement.model_index == 0\n\n    def test_model_idx_one_tags_packet(self) -> None:\n        emitter, mq = _make_emitter(model_idx=1)\n        emitter.emit(_packet())\n        _key, tagged = mq.get_nowait()\n        assert tagged.placement.model_index == 1\n\n    def test_model_idx_two_tags_packet(self) -> None:\n        \"\"\"Boundary: third model in a 3-model run.\"\"\"\n        emitter, mq = _make_emitter(model_idx=2)\n        emitter.emit(_packet())\n        _key, tagged = mq.get_nowait()\n        assert tagged.placement.model_index == 2\n\n\n# ---------------------------------------------------------------------------\n# Queue key\n# ---------------------------------------------------------------------------\n\n\nclass TestEmitterQueueKey:\n    def test_key_equals_model_idx(self) -> None:\n        \"\"\"Drain loop uses the key to route packets; it must match model_idx.\"\"\"\n        emitter, mq = _make_emitter(model_idx=2)\n        emitter.emit(_packet())\n        key, _ = mq.get_nowait()\n        assert key == 2\n\n    def test_n1_key_is_zero(self) -> None:\n        emitter, mq = _make_emitter(model_idx=0)\n        emitter.emit(_packet())\n        key, _ = mq.get_nowait()\n        assert key == 0\n\n\n# ---------------------------------------------------------------------------\n# Placement field preservation\n# ---------------------------------------------------------------------------\n\n\nclass TestEmitterPlacementPreservation:\n    def test_turn_index_is_preserved(self) -> None:\n        emitter, mq = _make_emitter()\n        emitter.emit(_packet(turn_index=5))\n        _, tagged = mq.get_nowait()\n        assert tagged.placement.turn_index == 5\n\n    def test_tab_index_is_preserved(self) -> None:\n        emitter, mq = _make_emitter()\n        emitter.emit(_packet(tab_index=3))\n        _, tagged = mq.get_nowait()\n        assert tagged.placement.tab_index == 3\n\n    def test_sub_turn_index_is_preserved(self) -> None:\n        emitter, mq = _make_emitter()\n        emitter.emit(_packet(sub_turn_index=2))\n        _, tagged = mq.get_nowait()\n        assert tagged.placement.sub_turn_index == 2\n\n    def test_sub_turn_index_none_is_preserved(self) -> None:\n        emitter, mq = _make_emitter()\n        emitter.emit(_packet(sub_turn_index=None))\n        _, tagged = mq.get_nowait()\n        assert tagged.placement.sub_turn_index is None\n\n    def test_packet_obj_is_not_modified(self) -> None:\n        \"\"\"The payload object must survive tagging untouched.\"\"\"\n        emitter, mq = _make_emitter()\n        original_obj = OverallStop(stop_reason=\"sentinel\")\n        pkt = Packet(placement=_placement(), obj=original_obj)\n        emitter.emit(pkt)\n        _, tagged = mq.get_nowait()\n        assert tagged.obj is original_obj\n\n    def test_different_obj_types_are_handled(self) -> None:\n        \"\"\"Any valid PacketObj type passes through correctly.\"\"\"\n        emitter, mq = _make_emitter()\n        pkt = Packet(placement=_placement(), obj=ReasoningStart())\n        emitter.emit(pkt)\n        _, tagged = mq.get_nowait()\n        assert isinstance(tagged.obj, ReasoningStart)\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_llm_loop.py",
    "content": "\"\"\"Tests for llm_loop.py, including history construction and empty-response paths.\"\"\"\n\nfrom unittest.mock import Mock\n\nimport pytest\n\nfrom onyx.chat.llm_loop import _build_empty_llm_response_error\nfrom onyx.chat.llm_loop import _try_fallback_tool_extraction\nfrom onyx.chat.llm_loop import construct_message_history\nfrom onyx.chat.llm_loop import EmptyLLMResponseError\nfrom onyx.chat.models import ChatLoadedFile\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ContextFileMetadata\nfrom onyx.chat.models import ExtractedContextFiles\nfrom onyx.chat.models import FileToolMetadata\nfrom onyx.chat.models import LlmStepResult\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.interfaces import ToolChoiceOptions\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import ToolCallKickoff\n\n\ndef create_message(\n    content: str, message_type: MessageType, token_count: int | None = None\n) -> ChatMessageSimple:\n    \"\"\"Helper to create a ChatMessageSimple for testing.\"\"\"\n    if token_count is None:\n        # Simple token estimation: ~1 token per 4 characters\n        token_count = max(1, len(content) // 4)\n    return ChatMessageSimple(\n        message=content,\n        token_count=token_count,\n        message_type=message_type,\n    )\n\n\ndef create_assistant_with_tool_call(\n    tool_call_id: str, tool_name: str, token_count: int\n) -> ChatMessageSimple:\n    \"\"\"Helper to create an ASSISTANT message with tool_calls for testing.\"\"\"\n    tool_call = ToolCallSimple(\n        tool_call_id=tool_call_id,\n        tool_name=tool_name,\n        tool_arguments={},\n        token_count=token_count,\n    )\n    return ChatMessageSimple(\n        message=\"\",\n        token_count=token_count,\n        message_type=MessageType.ASSISTANT,\n        tool_calls=[tool_call],\n    )\n\n\ndef create_tool_response(\n    tool_call_id: str, content: str, token_count: int\n) -> ChatMessageSimple:\n    \"\"\"Helper to create a TOOL_CALL_RESPONSE message for testing.\"\"\"\n    return ChatMessageSimple(\n        message=content,\n        token_count=token_count,\n        message_type=MessageType.TOOL_CALL_RESPONSE,\n        tool_call_id=tool_call_id,\n    )\n\n\ndef create_context_files(\n    num_files: int = 0, num_images: int = 0, tokens_per_file: int = 100\n) -> ExtractedContextFiles:\n    \"\"\"Helper to create ExtractedContextFiles for testing.\"\"\"\n    file_texts = [f\"Project file {i} content\" for i in range(num_files)]\n    file_metadata = [\n        ContextFileMetadata(\n            file_id=f\"file_{i}\",\n            filename=f\"file_{i}.txt\",\n            file_content=f\"Project file {i} content\",\n        )\n        for i in range(num_files)\n    ]\n    image_files = [\n        ChatLoadedFile(\n            file_id=f\"image_{i}\",\n            content=b\"\",\n            file_type=ChatFileType.IMAGE,\n            filename=f\"image_{i}.png\",\n            content_text=None,\n            token_count=50,\n        )\n        for i in range(num_images)\n    ]\n    return ExtractedContextFiles(\n        file_texts=file_texts,\n        image_files=image_files,\n        use_as_search_filter=False,\n        total_token_count=num_files * tokens_per_file,\n        file_metadata=file_metadata,\n        uncapped_token_count=num_files * tokens_per_file,\n    )\n\n\nclass TestConstructMessageHistory:\n    \"\"\"Tests for the construct_message_history function.\"\"\"\n\n    def test_basic_no_truncation(self) -> None:\n        \"\"\"Test basic functionality when all messages fit within token budget.\"\"\"\n        system_prompt = create_message(\n            \"You are a helpful assistant\", MessageType.SYSTEM, 10\n        )\n        user_msg1 = create_message(\"Hello\", MessageType.USER, 5)\n        assistant_msg1 = create_message(\"Hi there!\", MessageType.ASSISTANT, 5)\n        user_msg2 = create_message(\"How are you?\", MessageType.USER, 5)\n\n        simple_chat_history = [user_msg1, assistant_msg1, user_msg2]\n        context_files = create_context_files()\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user1, assistant1, user2\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert result[2] == assistant_msg1\n        assert result[3] == user_msg2\n\n    def test_with_custom_agent_prompt(self) -> None:\n        \"\"\"Test that custom agent prompt is inserted before the last user message.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First message\", MessageType.USER, 5)\n        assistant_msg1 = create_message(\"Response\", MessageType.ASSISTANT, 5)\n        user_msg2 = create_message(\"Second message\", MessageType.USER, 5)\n        custom_agent = create_message(\"Custom instructions\", MessageType.USER, 10)\n\n        simple_chat_history = [user_msg1, assistant_msg1, user_msg2]\n        context_files = create_context_files()\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=custom_agent,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user1, assistant1, custom_agent, user2\n        assert len(result) == 5\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert result[2] == assistant_msg1\n        assert result[3] == custom_agent  # Before last user message\n        assert result[4] == user_msg2\n\n    def test_with_context_files(self) -> None:\n        \"\"\"Test that project files are inserted before the last user message.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First message\", MessageType.USER, 5)\n        user_msg2 = create_message(\"Second message\", MessageType.USER, 5)\n\n        simple_chat_history = [user_msg1, user_msg2]\n        context_files = create_context_files(num_files=2, tokens_per_file=50)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user1, context_files_message, user2\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert (\n            result[2].message_type == MessageType.USER\n        )  # Project files as user message\n        assert \"documents\" in result[2].message  # Should contain JSON structure\n        assert result[3] == user_msg2\n\n    def test_with_reminder_message(self) -> None:\n        \"\"\"Test that reminder message is added at the very end.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg = create_message(\"Hello\", MessageType.USER, 5)\n        reminder = create_message(\"Remember to cite sources\", MessageType.USER, 10)\n\n        simple_chat_history = [user_msg]\n        context_files = create_context_files()\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=reminder,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user, reminder\n        assert len(result) == 3\n        assert result[0] == system_prompt\n        assert result[1] == user_msg\n        assert result[2] == reminder  # At the end\n\n    def test_tool_calls_after_last_user_message(self) -> None:\n        \"\"\"Test that tool calls and responses after last user message are preserved.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First message\", MessageType.USER, 5)\n        assistant_msg1 = create_message(\"Response\", MessageType.ASSISTANT, 5)\n        user_msg2 = create_message(\"Search for X\", MessageType.USER, 5)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"search\", 5)\n        tool_response = create_tool_response(\"tc_1\", \"Search results...\", 10)\n\n        simple_chat_history = [\n            user_msg1,\n            assistant_msg1,\n            user_msg2,\n            assistant_with_tool,\n            tool_response,\n        ]\n        context_files = create_context_files()\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user1, assistant1, user2, assistant_with_tool, tool_response\n        assert len(result) == 6\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert result[2] == assistant_msg1\n        assert result[3] == user_msg2\n        assert result[4] == assistant_with_tool\n        assert result[5] == tool_response\n\n    def test_custom_agent_and_project_before_last_user_with_tools_after(self) -> None:\n        \"\"\"Test correct ordering with custom agent, project files, and tool calls.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 5)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 5)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 5)\n        custom_agent = create_message(\"Custom\", MessageType.USER, 10)\n\n        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]\n        context_files = create_context_files(num_files=1, tokens_per_file=50)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=custom_agent,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, user1, custom_agent, context_files, user2, assistant_with_tool\n        assert len(result) == 6\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert result[2] == custom_agent  # Before last user message\n        assert result[3].message_type == MessageType.USER  # Project files\n        assert \"documents\" in result[3].message\n        assert result[4] == user_msg2  # Last user message\n        assert result[5] == assistant_with_tool  # After last user message\n\n    def test_project_images_attached_to_last_user_message(self) -> None:\n        \"\"\"Test that project images are attached to the last user message.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 5)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 5)\n\n        simple_chat_history = [user_msg1, user_msg2]\n        context_files = create_context_files(num_files=0, num_images=2)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Last message should have the project images\n        last_message = result[-1]\n        assert last_message.message == \"Second\"\n        assert last_message.image_files is not None\n        assert len(last_message.image_files) == 2\n        assert last_message.image_files[0].file_id == \"image_0\"\n        assert last_message.image_files[1].file_id == \"image_1\"\n\n    def test_project_images_preserve_existing_images(self) -> None:\n        \"\"\"Test that project images are appended to existing images on the user message.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n\n        # Create a user message with existing images\n        existing_image = ChatLoadedFile(\n            file_id=\"existing_image\",\n            content=b\"\",\n            file_type=ChatFileType.IMAGE,\n            filename=\"existing.png\",\n            content_text=None,\n            token_count=50,\n        )\n        user_msg = ChatMessageSimple(\n            message=\"Message with image\",\n            token_count=5,\n            message_type=MessageType.USER,\n            image_files=[existing_image],\n        )\n\n        simple_chat_history = [user_msg]\n        context_files = create_context_files(num_files=0, num_images=1)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Last message should have both existing and project images\n        last_message = result[-1]\n        assert last_message.image_files is not None\n        assert len(last_message.image_files) == 2\n        assert last_message.image_files[0].file_id == \"existing_image\"\n        assert last_message.image_files[1].file_id == \"image_0\"\n\n    def test_truncation_from_top(self) -> None:\n        \"\"\"Test that history is truncated from the top when token budget is exceeded.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 20)\n        assistant_msg1 = create_message(\"Response 1\", MessageType.ASSISTANT, 20)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 20)\n        assistant_msg2 = create_message(\"Response 2\", MessageType.ASSISTANT, 20)\n        user_msg3 = create_message(\"Third\", MessageType.USER, 20)\n\n        simple_chat_history = [\n            user_msg1,\n            assistant_msg1,\n            user_msg2,\n            assistant_msg2,\n            user_msg3,\n        ]\n        context_files = create_context_files()\n\n        # Budget only allows last 3 messages + system (10 + 20 + 20 + 20 = 70 tokens)\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=80,\n        )\n\n        # Should have: system, user2, assistant2, user3\n        # user1 and assistant1 should be truncated\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == user_msg2  # user1 truncated\n        assert result[2] == assistant_msg2\n        assert result[3] == user_msg3\n\n    def test_truncation_preserves_last_user_and_messages_after(self) -> None:\n        \"\"\"Test that truncation preserves the last user message and everything after it.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 30)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 20)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 20)\n        tool_response = create_tool_response(\"tc_1\", \"tool_response\", 20)\n\n        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool, tool_response]\n        context_files = create_context_files()\n\n        # Budget only allows last user message and messages after + system\n        # (10 + 20 + 20 + 20 = 70 tokens)\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=80,\n        )\n\n        # Should have: system, user2, assistant_with_tool, tool_response\n        # user1 should be truncated, but user2 and everything after preserved\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == user_msg2  # user1 truncated\n        assert result[2] == assistant_with_tool\n        assert result[3] == tool_response\n\n    def test_truncation_drops_orphaned_tool_response(self) -> None:\n        \"\"\"If truncation drops an assistant tool call, its orphaned tool response is removed.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 10)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 25)\n        tool_response = create_tool_response(\"tc_1\", \"tool_response\", 5)\n        assistant_msg1 = create_message(\"Used the tool above\", MessageType.ASSISTANT, 5)\n        user_msg2 = create_message(\"Latest question\", MessageType.USER, 10)\n\n        simple_chat_history = [\n            user_msg1,\n            assistant_with_tool,\n            tool_response,\n            assistant_msg1,\n            user_msg2,\n        ]\n        context_files = create_context_files()\n\n        # Remaining history budget is 10 tokens (30 total - 10 system - 10 last user):\n        # keeps [tool_response, assistant_msg1] from history_before_last_user,\n        # but drops assistant_with_tool, making tool_response orphaned.\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=30,\n        )\n\n        # Orphaned tool response should be removed from final history.\n        assert len(result) == 3\n        assert result[0] == system_prompt\n        assert result[1] == assistant_msg1\n        assert result[2] == user_msg2\n\n    def test_preserves_non_orphaned_tool_response(self) -> None:\n        \"\"\"Tool responses remain when their assistant tool call is present.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 10)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 20)\n        tool_response = create_tool_response(\"tc_1\", \"tool_response\", 5)\n        user_msg2 = create_message(\"Latest question\", MessageType.USER, 10)\n\n        simple_chat_history = [user_msg1, assistant_with_tool, tool_response, user_msg2]\n        context_files = create_context_files()\n\n        # Remaining history budget is 25 tokens (45 total - 10 system - 10 last user):\n        # keeps both assistant_with_tool and tool_response in history_before_last_user.\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=45,\n        )\n\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == assistant_with_tool\n        assert result[2] == tool_response\n        assert result[3] == user_msg2\n\n    def test_empty_history(self) -> None:\n        \"\"\"Test handling of empty chat history.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        custom_agent = create_message(\"Custom\", MessageType.USER, 10)\n        reminder = create_message(\"Reminder\", MessageType.USER, 10)\n\n        simple_chat_history: list[ChatMessageSimple] = []\n        context_files = create_context_files(num_files=1, tokens_per_file=50)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=custom_agent,\n            simple_chat_history=simple_chat_history,\n            reminder_message=reminder,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Should have: system, custom_agent, context_files, reminder\n        assert len(result) == 4\n        assert result[0] == system_prompt\n        assert result[1] == custom_agent\n        assert result[2].message_type == MessageType.USER  # Project files\n        assert result[3] == reminder\n\n    def test_no_user_message_raises_error(self) -> None:\n        \"\"\"Test that an error is raised when there's no user message in history.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        assistant_msg = create_message(\"Response\", MessageType.ASSISTANT, 5)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 5)\n\n        simple_chat_history = [assistant_msg, assistant_with_tool]\n        context_files = create_context_files()\n\n        with pytest.raises(ValueError, match=\"No user message found\"):\n            construct_message_history(\n                system_prompt=system_prompt,\n                custom_agent_prompt=None,\n                simple_chat_history=simple_chat_history,\n                reminder_message=None,\n                context_files=context_files,\n                available_tokens=1000,\n            )\n\n    def test_not_enough_tokens_for_required_elements(self) -> None:\n        \"\"\"Test error when there aren't enough tokens for required elements.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 50)\n        user_msg = create_message(\"Message\", MessageType.USER, 50)\n        custom_agent = create_message(\"Custom\", MessageType.USER, 50)\n\n        simple_chat_history = [user_msg]\n        context_files = create_context_files(num_files=1, tokens_per_file=100)\n\n        # Total required: 50 (system) + 50 (custom) + 100 (project) + 50 (user) = 250\n        # But only 200 available\n        with pytest.raises(ValueError, match=\"Not enough tokens\"):\n            construct_message_history(\n                system_prompt=system_prompt,\n                custom_agent_prompt=custom_agent,\n                simple_chat_history=simple_chat_history,\n                reminder_message=None,\n                context_files=context_files,\n                available_tokens=200,\n            )\n\n    def test_not_enough_tokens_for_last_user_and_messages_after(self) -> None:\n        \"\"\"Test error when last user message and messages after don't fit.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 10)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 30)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"tool\", 30)\n\n        simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]\n        context_files = create_context_files()\n\n        # Budget: 50 tokens\n        # Required: 10 (system) + 30 (user2) + 30 (assistant_with_tool) = 70 tokens\n        # After subtracting system: 40 tokens available, but need 60 for user2 + assistant_with_tool\n        with pytest.raises(\n            ValueError, match=\"Not enough tokens to include the last user message\"\n        ):\n            construct_message_history(\n                system_prompt=system_prompt,\n                custom_agent_prompt=None,\n                simple_chat_history=simple_chat_history,\n                reminder_message=None,\n                context_files=context_files,\n                available_tokens=50,\n            )\n\n    def test_complex_scenario_all_elements(self) -> None:\n        \"\"\"Test a complex scenario with all elements combined.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg1 = create_message(\"First\", MessageType.USER, 10)\n        assistant_msg1 = create_message(\"Response 1\", MessageType.ASSISTANT, 10)\n        user_msg2 = create_message(\"Second\", MessageType.USER, 10)\n        assistant_msg2 = create_message(\"Response 2\", MessageType.ASSISTANT, 10)\n        user_msg3 = create_message(\"Third\", MessageType.USER, 10)\n        assistant_with_tool = create_assistant_with_tool_call(\"tc_1\", \"search\", 10)\n        tool_response = create_tool_response(\"tc_1\", \"Results\", 10)\n        custom_agent = create_message(\"Custom instructions\", MessageType.USER, 15)\n        reminder = create_message(\"Cite sources\", MessageType.USER, 10)\n\n        simple_chat_history = [\n            user_msg1,\n            assistant_msg1,\n            user_msg2,\n            assistant_msg2,\n            user_msg3,\n            assistant_with_tool,\n            tool_response,\n        ]\n        context_files = create_context_files(num_files=2, tokens_per_file=20)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=custom_agent,\n            simple_chat_history=simple_chat_history,\n            reminder_message=reminder,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Expected order:\n        # system, user1, assistant1, user2, assistant2,\n        # custom_agent, context_files, user3, assistant_with_tool, tool_response, reminder\n        assert len(result) == 11\n        assert result[0] == system_prompt\n        assert result[1] == user_msg1\n        assert result[2] == assistant_msg1\n        assert result[3] == user_msg2\n        assert result[4] == assistant_msg2\n        assert result[5] == custom_agent  # Before last user\n        assert (\n            result[6].message_type == MessageType.USER\n        )  # Project files before last user\n        assert \"documents\" in result[6].message\n        assert result[7] == user_msg3  # Last user message\n        assert result[8] == assistant_with_tool  # After last user\n        assert result[9] == tool_response  # After last user\n        assert result[10] == reminder  # At the very end\n\n    def test_context_files_json_format(self) -> None:\n        \"\"\"Test that project files are formatted correctly as JSON.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg = create_message(\"Hello\", MessageType.USER, 5)\n\n        simple_chat_history = [user_msg]\n        context_files = create_context_files(num_files=2, tokens_per_file=50)\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n        )\n\n        # Find the project files message\n        project_message = result[1]  # Should be between system and user\n\n        # Verify it's formatted as JSON\n        assert \"Here are some documents provided for context\" in project_message.message\n        assert '\"documents\"' in project_message.message\n        assert '\"document\": 1' in project_message.message\n        assert '\"document\": 2' in project_message.message\n        assert '\"contents\"' in project_message.message\n        assert \"Project file 0 content\" in project_message.message\n        assert \"Project file 1 content\" in project_message.message\n\n    def test_file_metadata_for_tool_produces_message(self) -> None:\n        \"\"\"When context_files has file_metadata_for_tool, a metadata listing\n        message should be injected into the history.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg = create_message(\"Analyze the spreadsheet\", MessageType.USER, 5)\n\n        context_files = ExtractedContextFiles(\n            file_texts=[],\n            image_files=[],\n            use_as_search_filter=False,\n            total_token_count=0,\n            file_metadata=[],\n            uncapped_token_count=0,\n            file_metadata_for_tool=[\n                FileToolMetadata(\n                    file_id=\"xlsx-1\",\n                    filename=\"report.xlsx\",\n                    approx_char_count=100000,\n                ),\n            ],\n        )\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=[user_msg],\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=1000,\n            token_counter=_simple_token_counter,\n        )\n\n        # Should have: system, tool_metadata_message, user\n        assert len(result) == 3\n        metadata_msg = result[1]\n        assert metadata_msg.message_type == MessageType.USER\n        assert \"report.xlsx\" in metadata_msg.message\n        assert \"xlsx-1\" in metadata_msg.message\n\n    def test_metadata_only_and_text_files_both_present(self) -> None:\n        \"\"\"When both text content and tool metadata are present, both messages\n        should appear in the history.\"\"\"\n        system_prompt = create_message(\"System\", MessageType.SYSTEM, 10)\n        user_msg = create_message(\"Summarize everything\", MessageType.USER, 5)\n\n        context_files = ExtractedContextFiles(\n            file_texts=[\"Text file content here\"],\n            image_files=[],\n            use_as_search_filter=False,\n            total_token_count=100,\n            file_metadata=[\n                ContextFileMetadata(\n                    file_id=\"txt-1\",\n                    filename=\"notes.txt\",\n                    file_content=\"Text file content here\",\n                ),\n            ],\n            uncapped_token_count=100,\n            file_metadata_for_tool=[\n                FileToolMetadata(\n                    file_id=\"xlsx-1\",\n                    filename=\"data.xlsx\",\n                    approx_char_count=50000,\n                ),\n            ],\n        )\n\n        result = construct_message_history(\n            system_prompt=system_prompt,\n            custom_agent_prompt=None,\n            simple_chat_history=[user_msg],\n            reminder_message=None,\n            context_files=context_files,\n            available_tokens=2000,\n            token_counter=_simple_token_counter,\n        )\n\n        # Should have: system, context_files_message, tool_metadata_message, user\n        assert len(result) == 4\n        # Context files message (text content)\n        assert \"documents\" in result[1].message\n        assert \"Text file content here\" in result[1].message\n        # Tool metadata message\n        assert \"data.xlsx\" in result[2].message\n        assert result[3] == user_msg\n\n\ndef _simple_token_counter(text: str) -> int:\n    \"\"\"Approximate token counter for tests (~4 chars per token).\"\"\"\n    return max(1, len(text) // 4)\n\n\ndef _make_file_metadata(\n    file_id: str, filename: str, approx_chars: int = 50_000\n) -> FileToolMetadata:\n    return FileToolMetadata(\n        file_id=file_id, filename=filename, approx_char_count=approx_chars\n    )\n\n\nclass TestForgottenFileMetadata:\n    \"\"\"Tests for the forgotten-files mechanism in construct_message_history.\n\n    These cover the scenario where a user attaches a large file to a chat\n    message. On the first turn the file content message is in the context\n    window. On subsequent turns, it may be truncated by either:\n      a) context-window budget limits, or\n      b) summary-based truncation removing the message before\n         convert_chat_history ever runs — leaving an \"orphaned\" metadata\n         entry with no corresponding file_id-tagged ChatMessageSimple.\n\n    The forgotten-files mechanism must detect both cases and inject a\n    lightweight metadata message so the LLM knows to use read_file.\n    \"\"\"\n\n    def _build(\n        self,\n        simple_chat_history: list[ChatMessageSimple],\n        available_tokens: int = 10_000,\n        all_injected_file_metadata: dict[str, FileToolMetadata] | None = None,\n    ) -> list[ChatMessageSimple]:\n        \"\"\"Shorthand wrapper around construct_message_history.\"\"\"\n        return construct_message_history(\n            system_prompt=create_message(\"system\", MessageType.SYSTEM, 5),\n            custom_agent_prompt=None,\n            simple_chat_history=simple_chat_history,\n            reminder_message=None,\n            context_files=create_context_files(),\n            available_tokens=available_tokens,\n            token_counter=_simple_token_counter,\n            all_injected_file_metadata=all_injected_file_metadata,\n        )\n\n    @staticmethod\n    def _find_forgotten_message(\n        result: list[ChatMessageSimple],\n    ) -> ChatMessageSimple | None:\n        \"\"\"Find the forgotten-files metadata message in the result, if any.\"\"\"\n        for msg in result:\n            if \"Use the read_file tool\" in msg.message:\n                return msg\n        return None\n\n    # ------------------------------------------------------------------\n    # Case 1: file message is still in context — no forgotten-files needed\n    # ------------------------------------------------------------------\n\n    def test_file_message_present_no_forgotten_metadata(self) -> None:\n        \"\"\"When the file message fits in context, no forgotten-file message\n        should be injected.\n        \"\"\"\n        file_meta = _make_file_metadata(\"file-abc\", \"moby_dick.txt\")\n        file_msg = create_message(\"Contents of moby dick...\", MessageType.USER, 50)\n        file_msg.file_id = \"file-abc\"\n\n        history = [\n            file_msg,\n            create_message(\"Summarize this\", MessageType.ASSISTANT, 20),\n            create_message(\"What's chapter 1?\", MessageType.USER, 10),\n        ]\n        result = self._build(\n            history,\n            available_tokens=10_000,\n            all_injected_file_metadata={\"file-abc\": file_meta},\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert (\n            forgotten is None\n        ), \"Should not inject forgotten-files when file is in context\"\n        # The file message itself should still be present\n        assert any(m.file_id == \"file-abc\" for m in result)\n\n    # ------------------------------------------------------------------\n    # Case 2: file message dropped by context-window truncation\n    # ------------------------------------------------------------------\n\n    def test_file_message_dropped_by_truncation_gets_forgotten_metadata(self) -> None:\n        \"\"\"When the context budget is too tight and the file message gets\n        truncated, a forgotten-files metadata message must appear.\n        \"\"\"\n        file_meta = _make_file_metadata(\"file-abc\", \"moby_dick.txt\")\n        file_msg = create_message(\"x\" * 2000, MessageType.USER, 500)\n        file_msg.file_id = \"file-abc\"\n\n        history = [\n            file_msg,\n            create_message(\"Got it\", MessageType.ASSISTANT, 10),\n            create_message(\"Tell me about ch1\", MessageType.USER, 10),\n        ]\n\n        # Budget is just enough for the system prompt + last messages but\n        # NOT the 500-token file message.\n        result = self._build(\n            history,\n            available_tokens=100,\n            all_injected_file_metadata={\"file-abc\": file_meta},\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert forgotten is not None, \"Forgotten-files message should be injected\"\n        assert \"moby_dick.txt\" in forgotten.message\n        assert \"file-abc\" in forgotten.message\n\n        # The original file message should NOT be in context\n        assert not any(\n            getattr(m, \"file_id\", None) == \"file-abc\"\n            and m.message_type == MessageType.USER\n            for m in result\n            if m is not forgotten\n        )\n\n    # ------------------------------------------------------------------\n    # Case 3: file message removed by summary truncation (\"orphaned\" metadata)\n    # ------------------------------------------------------------------\n\n    def test_orphaned_metadata_triggers_forgotten_files(self) -> None:\n        \"\"\"Simulates the scenario where summary truncation in process_message\n        removed the file's original message BEFORE convert_chat_history ran,\n        so no ChatMessageSimple has the file_id tag. The metadata is still\n        passed via all_injected_file_metadata and must be treated as dropped.\n        \"\"\"\n        file_meta = _make_file_metadata(\"file-abc\", \"moby_dick.txt\")\n\n        # History has no file_id-tagged message — it was already removed by\n        # summary truncation. Only later conversation remains.\n        history = [\n            create_message(\"Summary of earlier convo\", MessageType.ASSISTANT, 20),\n            create_message(\"Now tell me about chapter 2\", MessageType.USER, 10),\n        ]\n\n        result = self._build(\n            history,\n            available_tokens=10_000,\n            all_injected_file_metadata={\"file-abc\": file_meta},\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert (\n            forgotten is not None\n        ), \"Orphaned file metadata should trigger forgotten-files message\"\n        assert \"moby_dick.txt\" in forgotten.message\n        assert \"file-abc\" in forgotten.message\n\n    # ------------------------------------------------------------------\n    # Case 4: multiple files — one survives, one is dropped\n    # ------------------------------------------------------------------\n\n    def test_mixed_files_only_dropped_ones_appear_in_forgotten(self) -> None:\n        \"\"\"When two files exist but only one's message is truncated, only the\n        truncated file should appear in the forgotten-files metadata.\n        \"\"\"\n        meta_a = _make_file_metadata(\"file-a\", \"big_file.txt\")\n        meta_b = _make_file_metadata(\"file-b\", \"small_file.txt\")\n\n        # file-a has a huge message that will be dropped, file-b fits\n        file_msg_a = create_message(\"x\" * 2000, MessageType.USER, 500)\n        file_msg_a.file_id = \"file-a\"\n        file_msg_b = create_message(\"small content\", MessageType.USER, 5)\n        file_msg_b.file_id = \"file-b\"\n\n        history = [\n            file_msg_a,\n            create_message(\"ok\", MessageType.ASSISTANT, 3),\n            file_msg_b,\n            create_message(\"ok\", MessageType.ASSISTANT, 3),\n            create_message(\"Compare the two files\", MessageType.USER, 10),\n        ]\n\n        # Tight budget: system(5) + last-user(10) = 15 min. Give ~50 so\n        # file_msg_b(5)+assistant(3)+assistant(3) fit but file_msg_a(500) won't.\n        result = self._build(\n            history,\n            available_tokens=80,\n            all_injected_file_metadata={\"file-a\": meta_a, \"file-b\": meta_b},\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert forgotten is not None\n        assert \"big_file.txt\" in forgotten.message\n        assert \"file-a\" in forgotten.message\n        # file-b should NOT be in the forgotten message — it's still in context\n        assert \"small_file.txt\" not in forgotten.message\n\n    # ------------------------------------------------------------------\n    # Case 5: no metadata dict → no forgotten-files message even if dropped\n    # ------------------------------------------------------------------\n\n    def test_no_metadata_dict_means_no_forgotten_message(self) -> None:\n        \"\"\"If all_injected_file_metadata is None (FileReaderTool not enabled),\n        no forgotten-files message should be emitted even if file messages\n        are dropped by truncation.\n        \"\"\"\n        file_msg = create_message(\"x\" * 2000, MessageType.USER, 500)\n        file_msg.file_id = \"file-abc\"\n\n        history = [\n            file_msg,\n            create_message(\"Got it\", MessageType.ASSISTANT, 10),\n            create_message(\"Tell me more\", MessageType.USER, 10),\n        ]\n\n        result = self._build(\n            history,\n            available_tokens=100,\n            all_injected_file_metadata=None,\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert (\n            forgotten is None\n        ), \"No forgotten-files message when metadata dict is None\"\n\n    # ------------------------------------------------------------------\n    # Case 6: orphaned metadata with multiple files, all summarized away\n    # ------------------------------------------------------------------\n\n    def test_multiple_orphaned_files_all_appear_in_forgotten(self) -> None:\n        \"\"\"All files from summarized-away messages should be listed in the\n        forgotten-files message.\n        \"\"\"\n        meta_a = _make_file_metadata(\"file-a\", \"report.pdf\")\n        meta_b = _make_file_metadata(\"file-b\", \"data.csv\")\n\n        # Both original messages were removed by summary truncation;\n        # only post-summary messages remain.\n        history = [\n            create_message(\"Earlier discussion summarized\", MessageType.ASSISTANT, 15),\n            create_message(\"What patterns do you see?\", MessageType.USER, 10),\n        ]\n\n        result = self._build(\n            history,\n            available_tokens=10_000,\n            all_injected_file_metadata={\"file-a\": meta_a, \"file-b\": meta_b},\n        )\n\n        forgotten = self._find_forgotten_message(result)\n        assert forgotten is not None\n        assert \"report.pdf\" in forgotten.message\n        assert \"data.csv\" in forgotten.message\n\n    # ------------------------------------------------------------------\n    # Case 7: file metadata persists across many turns after truncation\n    # ------------------------------------------------------------------\n\n    def test_forgotten_metadata_persists_across_many_turns(self) -> None:\n        \"\"\"Simulates the real bug: after the file's original message is\n        summarized away, every subsequent turn should still include the\n        forgotten-files metadata — not just the first turn after truncation.\n        \"\"\"\n        file_meta = _make_file_metadata(\"file-abc\", \"moby_dick.txt\")\n\n        # Build several turns AFTER the file was already summarized away.\n        # Each turn, construct_message_history is called fresh with the\n        # same all_injected_file_metadata.\n        for turn in range(5):\n            messages = [\n                create_message(\"Summary\", MessageType.ASSISTANT, 15),\n            ]\n            # Add some back-and-forth after the summary\n            for i in range(turn):\n                messages.append(create_message(f\"Question {i}\", MessageType.USER, 5))\n                messages.append(create_message(f\"Answer {i}\", MessageType.ASSISTANT, 5))\n            messages.append(\n                create_message(f\"Latest question (turn {turn})\", MessageType.USER, 5)\n            )\n\n            result = self._build(\n                messages,\n                available_tokens=10_000,\n                all_injected_file_metadata={\"file-abc\": file_meta},\n            )\n\n            forgotten = self._find_forgotten_message(result)\n            assert (\n                forgotten is not None\n            ), f\"Turn {turn}: forgotten-files message must persist every turn\"\n            assert \"moby_dick.txt\" in forgotten.message\n\n\nclass TestFallbackToolExtraction:\n    def _tool_defs(self) -> list[dict]:\n        return [\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"internal_search\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"queries\": {\n                                \"type\": \"array\",\n                                \"items\": {\"type\": \"string\"},\n                            }\n                        },\n                        \"required\": [\"queries\"],\n                    },\n                },\n            }\n        ]\n\n    def test_noop_if_fallback_was_already_attempted(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=True,\n            tool_defs=self._tool_defs(),\n            turn_index=0,\n        )\n\n        assert result is llm_step_result\n        assert attempted is False\n\n    def test_extracts_from_answer_when_required_and_no_tool_calls(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=3,\n        )\n\n        assert attempted is True\n        assert result.tool_calls is not None\n        assert len(result.tool_calls) == 1\n        assert result.tool_calls[0].tool_name == \"internal_search\"\n        assert result.tool_calls[0].tool_args == {\"queries\": [\"alpha\"]}\n        assert result.tool_calls[0].placement == Placement(turn_index=3)\n\n    def test_falls_back_to_reasoning_when_answer_has_no_tool_calls(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"beta\"]}}',\n            answer=\"I should search first.\",\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=5,\n        )\n\n        assert attempted is True\n        assert result.tool_calls is not None\n        assert len(result.tool_calls) == 1\n        assert result.tool_calls[0].tool_name == \"internal_search\"\n        assert result.tool_calls[0].tool_args == {\"queries\": [\"beta\"]}\n        assert result.tool_calls[0].placement == Placement(turn_index=5)\n\n    def test_extracts_xml_style_invoke_from_answer_when_required(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer=(\n                '<function_calls><invoke name=\"internal_search\">'\n                '<parameter name=\"queries\" string=\"false\">'\n                '[\"Onyx documentation\", \"Onyx docs\", \"Onyx platform\"]'\n                \"</parameter></invoke></function_calls>\"\n            ),\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=7,\n        )\n\n        assert attempted is True\n        assert result.tool_calls is not None\n        assert len(result.tool_calls) == 1\n        assert result.tool_calls[0].tool_name == \"internal_search\"\n        assert result.tool_calls[0].tool_args == {\n            \"queries\": [\"Onyx documentation\", \"Onyx docs\", \"Onyx platform\"]\n        }\n        assert result.tool_calls[0].placement == Placement(turn_index=7)\n\n    def test_extracts_xml_style_invoke_from_answer_when_auto(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            # Runtime-faithful shape: filtered answer is empty, raw answer has XML payload.\n            answer=None,\n            raw_answer=(\n                '<function_calls><invoke name=\"internal_search\">'\n                '<parameter name=\"queries\" string=\"false\">'\n                '[\"Onyx documentation\", \"Onyx docs\", \"Onyx internal docs\"]'\n                \"</parameter></invoke></function_calls>\"\n            ),\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.AUTO,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=9,\n        )\n\n        assert attempted is True\n        assert result.tool_calls is not None\n        assert len(result.tool_calls) == 1\n        assert result.tool_calls[0].tool_name == \"internal_search\"\n        assert result.tool_calls[0].tool_args == {\n            \"queries\": [\"Onyx documentation\", \"Onyx docs\", \"Onyx internal docs\"]\n        }\n        assert result.tool_calls[0].placement == Placement(turn_index=9)\n\n    def test_extracts_from_raw_answer_when_filtered_answer_has_no_xml(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer=\"\",\n            raw_answer=(\n                '<function_calls><invoke name=\"internal_search\">'\n                '<parameter name=\"queries\" string=\"false\">'\n                '[\"Onyx documentation\", \"Onyx docs\"]'\n                \"</parameter></invoke></function_calls>\"\n            ),\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.AUTO,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=10,\n        )\n\n        assert attempted is True\n        assert result.tool_calls is not None\n        assert len(result.tool_calls) == 1\n        assert result.tool_calls[0].tool_name == \"internal_search\"\n        assert result.tool_calls[0].tool_args == {\n            \"queries\": [\"Onyx documentation\", \"Onyx docs\"]\n        }\n        assert result.tool_calls[0].placement == Placement(turn_index=10)\n\n    def test_does_not_attempt_fallback_for_auto_without_tool_call_hints(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer=\"Here is a normal answer with no tool call payload.\",\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.AUTO,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=2,\n        )\n\n        assert result is llm_step_result\n        assert attempted is False\n\n    def test_returns_unchanged_when_required_but_nothing_extractable(self) -> None:\n        llm_step_result = LlmStepResult(\n            reasoning=\"Need more info.\",\n            answer=\"Let me think.\",\n            tool_calls=None,\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=1,\n        )\n\n        assert result is llm_step_result\n        assert attempted is True\n        assert result.tool_calls is None\n\n    def test_noop_when_tool_calls_already_present(self) -> None:\n        existing_call = ToolCallKickoff(\n            tool_call_id=\"call_existing\",\n            tool_name=\"internal_search\",\n            tool_args={\"queries\": [\"already-set\"]},\n            placement=Placement(turn_index=0),\n        )\n        llm_step_result = LlmStepResult(\n            reasoning=None,\n            answer='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n            tool_calls=[existing_call],\n        )\n\n        result, attempted = _try_fallback_tool_extraction(\n            llm_step_result=llm_step_result,\n            tool_choice=ToolChoiceOptions.REQUIRED,\n            fallback_extraction_attempted=False,\n            tool_defs=self._tool_defs(),\n            turn_index=0,\n        )\n\n        assert result is llm_step_result\n        assert attempted is False\n\n\nclass TestEmptyLlmResponseClassification:\n    def _make_llm(self, provider: str = \"openai\", model: str = \"gpt-5.2\") -> Mock:\n        llm = Mock()\n        llm.config = LLMConfig(\n            model_provider=provider,\n            model_name=model,\n            temperature=0.0,\n            max_input_tokens=4096,\n        )\n        return llm\n\n    def test_openai_empty_stream_is_classified_as_budget_exceeded(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        monkeypatch.setattr(\"onyx.chat.llm_loop.is_true_openai_model\", lambda *_: True)\n\n        err = _build_empty_llm_response_error(\n            llm=self._make_llm(),\n            llm_step_result=LlmStepResult(\n                reasoning=None,\n                answer=None,\n                tool_calls=None,\n                raw_answer=None,\n            ),\n            tool_choice=ToolChoiceOptions.AUTO,\n        )\n\n        assert isinstance(err, EmptyLLMResponseError)\n        assert err.error_code == \"BUDGET_EXCEEDED\"\n        assert err.is_retryable is False\n        assert \"quota\" in err.client_error_msg.lower()\n\n    def test_reasoning_only_response_uses_generic_empty_response_error(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        monkeypatch.setattr(\"onyx.chat.llm_loop.is_true_openai_model\", lambda *_: True)\n\n        err = _build_empty_llm_response_error(\n            llm=self._make_llm(),\n            llm_step_result=LlmStepResult(\n                reasoning=\"scratchpad only\",\n                answer=None,\n                tool_calls=None,\n                raw_answer=None,\n            ),\n            tool_choice=ToolChoiceOptions.AUTO,\n        )\n\n        assert isinstance(err, EmptyLLMResponseError)\n        assert err.error_code == \"EMPTY_LLM_RESPONSE\"\n        assert err.is_retryable is True\n        assert \"quota\" not in err.client_error_msg.lower()\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_llm_step.py",
    "content": "\"\"\"Tests for llm_step.py, specifically sanitization and argument parsing.\"\"\"\n\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.chat.llm_step import _extract_tool_call_kickoffs\nfrom onyx.chat.llm_step import _increment_turns\nfrom onyx.chat.llm_step import _parse_tool_args_to_dict\nfrom onyx.chat.llm_step import _resolve_tool_arguments\nfrom onyx.chat.llm_step import _XmlToolCallContentFilter\nfrom onyx.chat.llm_step import extract_tool_calls_from_response_text\nfrom onyx.chat.llm_step import translate_history_to_llm_format\nfrom onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLMConfig\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import ToolMessage\nfrom onyx.llm.models import UserMessage\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.utils.postgres_sanitization import sanitize_string\n\n\nclass TestSanitizeLlmOutput:\n    \"\"\"Tests for the sanitize_string function.\"\"\"\n\n    def test_removes_null_bytes(self) -> None:\n        \"\"\"Test that NULL bytes are removed from strings.\"\"\"\n        assert sanitize_string(\"hello\\x00world\") == \"helloworld\"\n        assert sanitize_string(\"\\x00start\") == \"start\"\n        assert sanitize_string(\"end\\x00\") == \"end\"\n        assert sanitize_string(\"\\x00\\x00\\x00\") == \"\"\n\n    def test_removes_surrogates(self) -> None:\n        \"\"\"Test that UTF-16 surrogates are removed from strings.\"\"\"\n        # Low surrogate\n        assert sanitize_string(\"hello\\ud800world\") == \"helloworld\"\n        # High surrogate\n        assert sanitize_string(\"hello\\udfffworld\") == \"helloworld\"\n        # Middle of surrogate range\n        assert sanitize_string(\"test\\uda00value\") == \"testvalue\"\n\n    def test_removes_mixed_bad_characters(self) -> None:\n        \"\"\"Test removal of both NULL bytes and surrogates together.\"\"\"\n        assert sanitize_string(\"a\\x00b\\ud800c\\udfffd\") == \"abcd\"\n\n    def test_preserves_valid_unicode(self) -> None:\n        \"\"\"Test that valid Unicode characters are preserved.\"\"\"\n        # Emojis\n        assert sanitize_string(\"hello 👋 world\") == \"hello 👋 world\"\n        # Chinese characters\n        assert sanitize_string(\"你好世界\") == \"你好世界\"\n        # Mixed scripts\n        assert sanitize_string(\"Hello мир 世界\") == \"Hello мир 世界\"\n\n    def test_empty_string(self) -> None:\n        \"\"\"Test that empty strings are handled correctly.\"\"\"\n        assert sanitize_string(\"\") == \"\"\n\n    def test_normal_ascii(self) -> None:\n        \"\"\"Test that normal ASCII strings pass through unchanged.\"\"\"\n        assert sanitize_string(\"hello world\") == \"hello world\"\n        assert sanitize_string('{\"key\": \"value\"}') == '{\"key\": \"value\"}'\n\n\nclass TestParseToolArgsToDict:\n    \"\"\"Tests for the _parse_tool_args_to_dict function.\"\"\"\n\n    def test_none_input(self) -> None:\n        \"\"\"Test that None returns empty dict.\"\"\"\n        assert _parse_tool_args_to_dict(None) == {}\n\n    def test_dict_input(self) -> None:\n        \"\"\"Test that dict input is returned with parsed JSON string values.\"\"\"\n        result = _parse_tool_args_to_dict({\"key\": \"value\", \"num\": 42})\n        assert result == {\"key\": \"value\", \"num\": 42}\n\n    def test_dict_with_json_string_values(self) -> None:\n        \"\"\"Test that JSON string values in dict are parsed.\"\"\"\n        result = _parse_tool_args_to_dict({\"queries\": '[\"q1\", \"q2\"]'})\n        assert result == {\"queries\": [\"q1\", \"q2\"]}\n\n    def test_json_string_input(self) -> None:\n        \"\"\"Test that JSON string is parsed to dict.\"\"\"\n        result = _parse_tool_args_to_dict('{\"key\": \"value\"}')\n        assert result == {\"key\": \"value\"}\n\n    def test_double_encoded_json(self) -> None:\n        \"\"\"Test that double-encoded JSON string is parsed correctly.\"\"\"\n        # This is: '\"{\\\\\"key\\\\\": \\\\\"value\\\\\"}\"'\n        double_encoded = '\"\\\\\"{\\\\\\\\\\\\\"key\\\\\\\\\\\\\": \\\\\\\\\\\\\"value\\\\\\\\\\\\\"}\\\\\"'\n        # Actually let's use a simpler approach\n        import json\n\n        inner = {\"key\": \"value\"}\n        single_encoded = json.dumps(inner)  # '{\"key\": \"value\"}'\n        double_encoded = json.dumps(single_encoded)  # '\"{\\\\\"key\\\\\": \\\\\"value\\\\\"}\"'\n        result = _parse_tool_args_to_dict(double_encoded)\n        assert result == {\"key\": \"value\"}\n\n    def test_invalid_json_returns_empty_dict(self) -> None:\n        \"\"\"Test that invalid JSON returns empty dict.\"\"\"\n        assert _parse_tool_args_to_dict(\"not json\") == {}\n        assert _parse_tool_args_to_dict(\"{invalid}\") == {}\n\n    def test_non_dict_json_returns_empty_dict(self) -> None:\n        \"\"\"Test that non-dict JSON (like arrays) returns empty dict.\"\"\"\n        assert _parse_tool_args_to_dict(\"[1, 2, 3]\") == {}\n        assert _parse_tool_args_to_dict('\"just a string\"') == {}\n\n    def test_non_string_non_dict_returns_empty_dict(self) -> None:\n        \"\"\"Test that non-string, non-dict types return empty dict.\"\"\"\n        assert _parse_tool_args_to_dict(123) == {}\n        assert _parse_tool_args_to_dict([\"list\"]) == {}\n\n    # Sanitization tests\n\n    def test_dict_input_sanitizes_null_bytes(self) -> None:\n        \"\"\"Test that NULL bytes in dict values are sanitized.\"\"\"\n        result = _parse_tool_args_to_dict({\"query\": \"hello\\x00world\"})\n        assert result == {\"query\": \"helloworld\"}\n\n    def test_dict_input_sanitizes_surrogates(self) -> None:\n        \"\"\"Test that surrogates in dict values are sanitized.\"\"\"\n        result = _parse_tool_args_to_dict({\"query\": \"hello\\ud800world\"})\n        assert result == {\"query\": \"helloworld\"}\n\n    def test_json_string_sanitizes_null_bytes(self) -> None:\n        \"\"\"Test that NULL bytes in JSON string are sanitized before parsing.\"\"\"\n        # JSON with NULL byte in value\n        json_str = '{\"query\": \"hello\\x00world\"}'\n        result = _parse_tool_args_to_dict(json_str)\n        assert result == {\"query\": \"helloworld\"}\n\n    def test_json_string_sanitizes_surrogates(self) -> None:\n        \"\"\"Test that surrogates in JSON string are sanitized before parsing.\"\"\"\n        json_str = '{\"query\": \"hello\\ud800world\"}'\n        result = _parse_tool_args_to_dict(json_str)\n        assert result == {\"query\": \"helloworld\"}\n\n    def test_nested_dict_values_sanitized(self) -> None:\n        \"\"\"Test that nested JSON string values are also sanitized.\"\"\"\n        # Dict with a JSON string value that contains bad characters\n        result = _parse_tool_args_to_dict({\"queries\": '[\"q1\\x00\", \"q2\\ud800\"]'})\n        assert result == {\"queries\": [\"q1\", \"q2\"]}\n\n    def test_preserves_valid_unicode_in_dict(self) -> None:\n        \"\"\"Test that valid Unicode is preserved in dict values.\"\"\"\n        result = _parse_tool_args_to_dict({\"query\": \"hello 👋 世界\"})\n        assert result == {\"query\": \"hello 👋 世界\"}\n\n    def test_preserves_valid_unicode_in_json(self) -> None:\n        \"\"\"Test that valid Unicode is preserved in JSON string.\"\"\"\n        json_str = '{\"query\": \"hello 👋 世界\"}'\n        result = _parse_tool_args_to_dict(json_str)\n        assert result == {\"query\": \"hello 👋 世界\"}\n\n\nclass TestExtractToolCallsFromResponseText:\n    def _tool_defs(self) -> list[dict]:\n        return [\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"internal_search\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"queries\": {\n                                \"type\": \"array\",\n                                \"items\": {\"type\": \"string\"},\n                            }\n                        },\n                        \"required\": [\"queries\"],\n                    },\n                },\n            }\n        ]\n\n    def _placement(self) -> Placement:\n        return Placement(turn_index=0, tab_index=0, sub_turn_index=None)\n\n    def test_collapses_nested_arguments_duplicate(self) -> None:\n        response_text = '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}'\n        tool_calls = extract_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_definitions=self._tool_defs(),\n            placement=self._placement(),\n        )\n        assert len(tool_calls) == 1\n        assert tool_calls[0].tool_name == \"internal_search\"\n        assert tool_calls[0].tool_args == {\"queries\": [\"alpha\"]}\n\n    def test_keeps_non_duplicated_sequence(self) -> None:\n        response_text = \"\\n\".join(\n            [\n                '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n                '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"beta\"]}}',\n            ]\n        )\n        tool_calls = extract_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_definitions=self._tool_defs(),\n            placement=self._placement(),\n        )\n        assert len(tool_calls) == 2\n        assert [call.tool_args for call in tool_calls] == [\n            {\"queries\": [\"alpha\"]},\n            {\"queries\": [\"beta\"]},\n        ]\n\n    def test_keeps_intentional_duplicate_tool_calls(self) -> None:\n        response_text = \"\\n\".join(\n            [\n                '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n                '{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n            ]\n        )\n        tool_calls = extract_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_definitions=self._tool_defs(),\n            placement=self._placement(),\n        )\n        assert len(tool_calls) == 2\n        assert [call.tool_args for call in tool_calls] == [\n            {\"queries\": [\"alpha\"]},\n            {\"queries\": [\"alpha\"]},\n        ]\n\n    def test_extracts_xml_style_invoke_tool_call(self) -> None:\n        response_text = \"\"\"\n<function_calls>\n<invoke name=\"internal_search\">\n<parameter name=\"queries\" string=\"false\">[\"Onyx documentation\", \"Onyx docs\", \"Onyx platform\"]</parameter>\n</invoke>\n</function_calls>\n\"\"\"\n        tool_calls = extract_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_definitions=self._tool_defs(),\n            placement=self._placement(),\n        )\n        assert len(tool_calls) == 1\n        assert tool_calls[0].tool_name == \"internal_search\"\n        assert tool_calls[0].tool_args == {\n            \"queries\": [\"Onyx documentation\", \"Onyx docs\", \"Onyx platform\"]\n        }\n\n    def test_ignores_unknown_tool_in_xml_style_invoke(self) -> None:\n        response_text = \"\"\"\n<function_calls>\n<invoke name=\"unknown_tool\">\n<parameter name=\"queries\" string=\"false\">[\"Onyx docs\"]</parameter>\n</invoke>\n</function_calls>\n\"\"\"\n        tool_calls = extract_tool_calls_from_response_text(\n            response_text=response_text,\n            tool_definitions=self._tool_defs(),\n            placement=self._placement(),\n        )\n        assert len(tool_calls) == 0\n\n\nclass TestExtractToolCallKickoffs:\n    \"\"\"Tests for the _extract_tool_call_kickoffs function.\"\"\"\n\n    def test_valid_tool_call(self) -> None:\n        tool_call_map = {\n            0: {\n                \"id\": \"call_123\",\n                \"name\": \"internal_search\",\n                \"arguments\": '{\"queries\": [\"test\"]}',\n            }\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)\n        assert len(result) == 1\n        assert result[0].tool_name == \"internal_search\"\n        assert result[0].tool_args == {\"queries\": [\"test\"]}\n\n    def test_invalid_json_arguments_returns_empty_dict(self) -> None:\n        \"\"\"Verify that malformed JSON arguments produce an empty dict\n        rather than raising an exception. This confirms the dead try/except\n        around _parse_tool_args_to_dict was safe to remove.\"\"\"\n        tool_call_map = {\n            0: {\n                \"id\": \"call_bad\",\n                \"name\": \"internal_search\",\n                \"arguments\": \"not valid json {{{\",\n            }\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)\n        assert len(result) == 1\n        assert result[0].tool_args == {}\n\n    def test_none_arguments_returns_empty_dict(self) -> None:\n        tool_call_map = {\n            0: {\n                \"id\": \"call_none\",\n                \"name\": \"internal_search\",\n                \"arguments\": None,\n            }\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)\n        assert len(result) == 1\n        assert result[0].tool_args == {}\n\n    def test_skips_entries_missing_id_or_name(self) -> None:\n        tool_call_map: dict[int, dict[str, Any]] = {\n            0: {\"id\": None, \"name\": \"internal_search\", \"arguments\": \"{}\"},\n            1: {\"id\": \"call_1\", \"name\": None, \"arguments\": \"{}\"},\n            2: {\"id\": \"call_2\", \"name\": \"internal_search\", \"arguments\": \"{}\"},\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)\n        assert len(result) == 1\n        assert result[0].tool_call_id == \"call_2\"\n\n    def test_tab_index_auto_increments(self) -> None:\n        tool_call_map = {\n            0: {\"id\": \"c1\", \"name\": \"tool_a\", \"arguments\": \"{}\"},\n            1: {\"id\": \"c2\", \"name\": \"tool_b\", \"arguments\": \"{}\"},\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0)\n        assert result[0].placement.tab_index == 0\n        assert result[1].placement.tab_index == 1\n\n    def test_tab_index_override(self) -> None:\n        tool_call_map = {\n            0: {\"id\": \"c1\", \"name\": \"tool_a\", \"arguments\": \"{}\"},\n            1: {\"id\": \"c2\", \"name\": \"tool_b\", \"arguments\": \"{}\"},\n        }\n        result = _extract_tool_call_kickoffs(tool_call_map, turn_index=0, tab_index=5)\n        assert result[0].placement.tab_index == 5\n        assert result[1].placement.tab_index == 5\n\n\nclass TestXmlToolCallContentFilter:\n    def test_strips_function_calls_block_single_chunk(self) -> None:\n        f = _XmlToolCallContentFilter()\n        output = f.process(\n            \"prefix \"\n            '<function_calls><invoke name=\"internal_search\">'\n            '<parameter name=\"queries\" string=\"false\">[\"Onyx docs\"]</parameter>'\n            \"</invoke></function_calls> suffix\"\n        )\n        output += f.flush()\n        assert output == \"prefix  suffix\"\n\n    def test_strips_function_calls_block_split_across_chunks(self) -> None:\n        f = _XmlToolCallContentFilter()\n        chunks = [\n            \"Start \",\n            \"<function_\",\n            'calls><invoke name=\"internal_search\">',\n            '<parameter name=\"queries\" string=\"false\">[\"Onyx docs\"]',\n            \"</parameter></invoke></function_calls>\",\n            \" End\",\n        ]\n        output = \"\".join(f.process(chunk) for chunk in chunks) + f.flush()\n        assert output == \"Start  End\"\n\n    def test_preserves_non_tool_call_xml(self) -> None:\n        f = _XmlToolCallContentFilter()\n        output = f.process(\"A <tag>value</tag> B\")\n        output += f.flush()\n        assert output == \"A <tag>value</tag> B\"\n\n    def test_does_not_strip_similar_tag_names(self) -> None:\n        f = _XmlToolCallContentFilter()\n        output = f.process(\n            \"A <function_calls_v2><invoke>noop</invoke></function_calls_v2> B\"\n        )\n        output += f.flush()\n        assert (\n            output == \"A <function_calls_v2><invoke>noop</invoke></function_calls_v2> B\"\n        )\n\n\nclass TestIncrementTurns:\n    \"\"\"Tests for the _increment_turns helper used by _close_reasoning_if_active.\"\"\"\n\n    def test_increments_turn_index_when_no_sub_turn(self) -> None:\n        turn, sub = _increment_turns(0, None)\n        assert turn == 1\n        assert sub is None\n\n    def test_increments_sub_turn_when_present(self) -> None:\n        turn, sub = _increment_turns(3, 0)\n        assert turn == 3\n        assert sub == 1\n\n    def test_increments_sub_turn_from_nonzero(self) -> None:\n        turn, sub = _increment_turns(5, 2)\n        assert turn == 5\n        assert sub == 3\n\n\nclass TestResolveToolArguments:\n    \"\"\"Tests for the _resolve_tool_arguments helper.\"\"\"\n\n    def test_dict_arguments(self) -> None:\n        obj = {\"arguments\": {\"queries\": [\"test\"]}}\n        assert _resolve_tool_arguments(obj) == {\"queries\": [\"test\"]}\n\n    def test_dict_parameters(self) -> None:\n        \"\"\"Falls back to 'parameters' key when 'arguments' is missing.\"\"\"\n        obj = {\"parameters\": {\"queries\": [\"test\"]}}\n        assert _resolve_tool_arguments(obj) == {\"queries\": [\"test\"]}\n\n    def test_arguments_takes_precedence_over_parameters(self) -> None:\n        obj = {\"arguments\": {\"a\": 1}, \"parameters\": {\"b\": 2}}\n        assert _resolve_tool_arguments(obj) == {\"a\": 1}\n\n    def test_json_string_arguments(self) -> None:\n        obj = {\"arguments\": '{\"queries\": [\"test\"]}'}\n        assert _resolve_tool_arguments(obj) == {\"queries\": [\"test\"]}\n\n    def test_invalid_json_string_returns_empty_dict(self) -> None:\n        obj = {\"arguments\": \"not valid json\"}\n        assert _resolve_tool_arguments(obj) == {}\n\n    def test_no_arguments_or_parameters_returns_empty_dict(self) -> None:\n        obj = {\"name\": \"some_tool\"}\n        assert _resolve_tool_arguments(obj) == {}\n\n    def test_non_dict_non_string_arguments_returns_none(self) -> None:\n        \"\"\"When arguments resolves to a list or int, returns None.\"\"\"\n        assert _resolve_tool_arguments({\"arguments\": [1, 2, 3]}) is None\n        assert _resolve_tool_arguments({\"arguments\": 42}) is None\n\n\nclass TestTranslateHistoryToLlmFormat:\n    @staticmethod\n    def _llm_config(provider: str) -> LLMConfig:\n        return LLMConfig(\n            model_provider=provider,\n            model_name=\"test-model\",\n            temperature=0,\n            max_input_tokens=8192,\n        )\n\n    @staticmethod\n    def _tool_history() -> list[ChatMessageSimple]:\n        return [\n            ChatMessageSimple(\n                message=\"\",\n                token_count=5,\n                message_type=MessageType.ASSISTANT,\n                tool_calls=[\n                    ToolCallSimple(\n                        tool_call_id=\"51381e0b0\",\n                        tool_name=\"internal_search\",\n                        tool_arguments={\"queries\": [\"alpha\"]},\n                    )\n                ],\n            ),\n            ChatMessageSimple(\n                message=\"tool result body\",\n                token_count=5,\n                message_type=MessageType.TOOL_CALL_RESPONSE,\n                tool_call_id=\"51381e0b0\",\n            ),\n        ]\n\n    def test_preserves_structured_tool_history_for_non_ollama(self) -> None:\n        translated = translate_history_to_llm_format(\n            history=self._tool_history(),\n            llm_config=self._llm_config(LlmProviderNames.OPENAI),\n        )\n        assert isinstance(translated, list)\n\n        assert isinstance(translated[0], AssistantMessage)\n        assert translated[0].tool_calls is not None\n        assert translated[0].tool_calls[0].id == \"51381e0b0\"\n        assert isinstance(translated[1], ToolMessage)\n        assert translated[1].tool_call_id == \"51381e0b0\"\n\n    def test_flattens_tool_history_for_ollama(self) -> None:\n        translated = translate_history_to_llm_format(\n            history=self._tool_history(),\n            llm_config=self._llm_config(LlmProviderNames.OLLAMA_CHAT),\n        )\n        assert isinstance(translated, list)\n\n        assert isinstance(translated[0], AssistantMessage)\n        assert translated[0].tool_calls is None\n        assert translated[0].content is not None\n        assert \"51381e0b0\" in translated[0].content\n\n        assert isinstance(translated[1], UserMessage)\n        assert \"51381e0b0\" in translated[1].content\n        assert \"tool result body\" in translated[1].content\n\n    def test_flattens_multiple_assistant_tool_calls_for_ollama(self) -> None:\n        history = [\n            ChatMessageSimple(\n                message=\"I will use tools now.\",\n                token_count=5,\n                message_type=MessageType.ASSISTANT,\n                tool_calls=[\n                    ToolCallSimple(\n                        tool_call_id=\"call-a\",\n                        tool_name=\"internal_search\",\n                        tool_arguments={\"queries\": [\"alpha\"]},\n                    ),\n                    ToolCallSimple(\n                        tool_call_id=\"call-b\",\n                        tool_name=\"internal_search\",\n                        tool_arguments={\"queries\": [\"beta\"]},\n                    ),\n                ],\n            )\n        ]\n        translated = translate_history_to_llm_format(\n            history=history,\n            llm_config=self._llm_config(LlmProviderNames.OLLAMA_CHAT),\n        )\n\n        assert isinstance(translated, list)\n        assert isinstance(translated[0], AssistantMessage)\n        assert translated[0].tool_calls is None\n        assert translated[0].content == (\n            \"I will use tools now.\\n\"\n            '[Tool Call] name=internal_search id=call-a args={\"queries\": [\"alpha\"]}\\n'\n            '[Tool Call] name=internal_search id=call-b args={\"queries\": [\"beta\"]}'\n        )\n\n    @pytest.mark.parametrize(\n        \"provider\",\n        [\n            LlmProviderNames.OPENAI,\n            LlmProviderNames.OLLAMA_CHAT,\n        ],\n    )\n    def test_tool_call_response_requires_tool_call_id(self, provider: str) -> None:\n        with pytest.raises(ValueError, match=\"tool_call_id is not available\"):\n            translate_history_to_llm_format(\n                history=[\n                    ChatMessageSimple(\n                        message=\"tool result body\",\n                        token_count=5,\n                        message_type=MessageType.TOOL_CALL_RESPONSE,\n                        tool_call_id=None,\n                    )\n                ],\n                llm_config=self._llm_config(provider),\n            )\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_multi_model_streaming.py",
    "content": "\"\"\"Unit tests for multi-model streaming validation and DB helpers.\n\nThese are pure unit tests — no real database or LLM calls required.\nThe validation logic in handle_multi_model_stream fires before any external\ncalls, so we can trigger it with lightweight mocks.\n\"\"\"\n\nimport time\nfrom collections.abc import Generator\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.chat.models import StreamingError\nfrom onyx.configs.constants import MessageType\nfrom onyx.db.chat import set_preferred_response\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.streaming_models import OverallStop\nfrom onyx.server.query_and_chat.streaming_models import Packet\nfrom onyx.server.query_and_chat.streaming_models import ReasoningStart\nfrom onyx.utils.variable_functionality import global_version\n\n\n@pytest.fixture(autouse=True)\ndef _restore_ee_version() -> Generator[None, None, None]:\n    \"\"\"Reset EE global state after each test.\n\n    Importing onyx.chat.process_message triggers set_is_ee_based_on_env_variable()\n    (via the celery client import chain).  Without this fixture, the EE flag stays\n    True for the rest of the session and breaks unrelated tests that mock Confluence\n    or other connectors and assume EE is disabled.\n    \"\"\"\n    original = global_version._is_ee\n    yield\n    global_version._is_ee = original\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _make_request(**kwargs: Any) -> SendMessageRequest:\n    defaults: dict[str, Any] = {\n        \"message\": \"hello\",\n        \"chat_session_id\": uuid4(),\n    }\n    defaults.update(kwargs)\n    return SendMessageRequest(**defaults)\n\n\ndef _make_override(provider: str = \"openai\", version: str = \"gpt-4\") -> LLMOverride:\n    return LLMOverride(model_provider=provider, model_version=version)\n\n\ndef _first_from_stream(req: SendMessageRequest, overrides: list[LLMOverride]) -> Any:\n    \"\"\"Return the first item yielded by handle_multi_model_stream.\"\"\"\n    from onyx.chat.process_message import handle_multi_model_stream\n\n    user = MagicMock()\n    user.is_anonymous = False\n    user.email = \"test@example.com\"\n    db = MagicMock()\n\n    gen = handle_multi_model_stream(req, user, db, overrides)\n    return next(gen)\n\n\n# ---------------------------------------------------------------------------\n# handle_multi_model_stream — validation\n# ---------------------------------------------------------------------------\n\n\nclass TestRunMultiModelStreamValidation:\n    def test_single_override_yields_error(self) -> None:\n        \"\"\"Exactly 1 override is not multi-model — yields StreamingError.\"\"\"\n        req = _make_request()\n        result = _first_from_stream(req, [_make_override()])\n        assert isinstance(result, StreamingError)\n        assert \"2-3\" in result.error\n\n    def test_four_overrides_yields_error(self) -> None:\n        \"\"\"4 overrides exceeds maximum — yields StreamingError.\"\"\"\n        req = _make_request()\n        result = _first_from_stream(\n            req,\n            [\n                _make_override(\"openai\", \"gpt-4\"),\n                _make_override(\"anthropic\", \"claude-3\"),\n                _make_override(\"google\", \"gemini-pro\"),\n                _make_override(\"cohere\", \"command-r\"),\n            ],\n        )\n        assert isinstance(result, StreamingError)\n        assert \"2-3\" in result.error\n\n    def test_zero_overrides_yields_error(self) -> None:\n        \"\"\"Empty override list yields StreamingError.\"\"\"\n        req = _make_request()\n        result = _first_from_stream(req, [])\n        assert isinstance(result, StreamingError)\n        assert \"2-3\" in result.error\n\n    def test_deep_research_yields_error(self) -> None:\n        \"\"\"deep_research=True is incompatible with multi-model — yields StreamingError.\"\"\"\n        req = _make_request(deep_research=True)\n        result = _first_from_stream(\n            req, [_make_override(), _make_override(\"anthropic\", \"claude-3\")]\n        )\n        assert isinstance(result, StreamingError)\n        assert \"not supported\" in result.error\n\n    def test_exactly_two_overrides_is_minimum(self) -> None:\n        \"\"\"Boundary: 1 override yields error, 2 overrides passes validation.\"\"\"\n        req = _make_request()\n        # 1 override must yield a StreamingError\n        result = _first_from_stream(req, [_make_override()])\n        assert isinstance(\n            result, StreamingError\n        ), \"1 override should yield StreamingError\"\n        # 2 overrides must NOT yield a validation StreamingError (may raise later due to\n        # missing session, that's OK — validation itself passed)\n        try:\n            result2 = _first_from_stream(\n                req, [_make_override(), _make_override(\"anthropic\", \"claude-3\")]\n            )\n            if isinstance(result2, StreamingError) and \"2-3\" in result2.error:\n                pytest.fail(\n                    f\"2 overrides should pass validation, got StreamingError: {result2.error}\"\n                )\n        except Exception:\n            pass  # Any non-validation error means validation passed\n\n\n# ---------------------------------------------------------------------------\n# set_preferred_response — validation (mocked db)\n# ---------------------------------------------------------------------------\n\n\nclass TestSetPreferredResponseValidation:\n    def test_user_message_not_found(self) -> None:\n        db = MagicMock()\n        db.get.return_value = None\n\n        with pytest.raises(ValueError, match=\"not found\"):\n            set_preferred_response(\n                db, user_message_id=999, preferred_assistant_message_id=1\n            )\n\n    def test_wrong_message_type(self) -> None:\n        \"\"\"Cannot set preferred response on a non-USER message.\"\"\"\n        db = MagicMock()\n        user_msg = MagicMock()\n        user_msg.message_type = MessageType.ASSISTANT  # wrong type\n\n        db.get.return_value = user_msg\n\n        with pytest.raises(ValueError, match=\"not a user message\"):\n            set_preferred_response(\n                db, user_message_id=1, preferred_assistant_message_id=2\n            )\n\n    def test_assistant_message_not_found(self) -> None:\n        db = MagicMock()\n        user_msg = MagicMock()\n        user_msg.message_type = MessageType.USER\n\n        # First call returns user_msg, second call (for assistant) returns None\n        db.get.side_effect = [user_msg, None]\n\n        with pytest.raises(ValueError, match=\"not found\"):\n            set_preferred_response(\n                db, user_message_id=1, preferred_assistant_message_id=2\n            )\n\n    def test_assistant_not_child_of_user(self) -> None:\n        db = MagicMock()\n        user_msg = MagicMock()\n        user_msg.message_type = MessageType.USER\n\n        assistant_msg = MagicMock()\n        assistant_msg.parent_message_id = 999  # different parent\n\n        db.get.side_effect = [user_msg, assistant_msg]\n\n        with pytest.raises(ValueError, match=\"not a child\"):\n            set_preferred_response(\n                db, user_message_id=1, preferred_assistant_message_id=2\n            )\n\n    def test_valid_call_sets_preferred_response_id(self) -> None:\n        db = MagicMock()\n        user_msg = MagicMock()\n        user_msg.message_type = MessageType.USER\n\n        assistant_msg = MagicMock()\n        assistant_msg.parent_message_id = 1  # correct parent\n\n        db.get.side_effect = [user_msg, assistant_msg]\n\n        set_preferred_response(db, user_message_id=1, preferred_assistant_message_id=2)\n\n        assert user_msg.preferred_response_id == 2\n        assert user_msg.latest_child_message_id == 2\n\n\n# ---------------------------------------------------------------------------\n# LLMOverride — display_name field\n# ---------------------------------------------------------------------------\n\n\nclass TestLLMOverrideDisplayName:\n    def test_display_name_defaults_none(self) -> None:\n        override = LLMOverride(model_provider=\"openai\", model_version=\"gpt-4\")\n        assert override.display_name is None\n\n    def test_display_name_set(self) -> None:\n        override = LLMOverride(\n            model_provider=\"openai\",\n            model_version=\"gpt-4\",\n            display_name=\"GPT-4 Turbo\",\n        )\n        assert override.display_name == \"GPT-4 Turbo\"\n\n    def test_display_name_serializes(self) -> None:\n        override = LLMOverride(\n            model_provider=\"anthropic\",\n            model_version=\"claude-opus-4-6\",\n            display_name=\"Claude Opus\",\n        )\n        d = override.model_dump()\n        assert d[\"display_name\"] == \"Claude Opus\"\n\n\n# ---------------------------------------------------------------------------\n# _run_models — drain loop behaviour\n# ---------------------------------------------------------------------------\n\n\ndef _make_setup(n_models: int = 1) -> MagicMock:\n    \"\"\"Minimal ChatTurnSetup mock whose fields pass Pydantic validation in _run_model.\"\"\"\n    setup = MagicMock()\n    setup.llms = [MagicMock() for _ in range(n_models)]\n    setup.model_display_names = [f\"model-{i}\" for i in range(n_models)]\n    setup.check_is_connected = MagicMock(return_value=True)\n    setup.reserved_messages = [MagicMock() for _ in range(n_models)]\n    setup.reserved_token_count = 100\n    # Fields consumed by SearchToolConfig / CustomToolConfig / FileReaderToolConfig\n    # constructors inside _run_model — must be typed correctly for Pydantic.\n    setup.new_msg_req.deep_research = False\n    setup.new_msg_req.internal_search_filters = None\n    setup.new_msg_req.allowed_tool_ids = None\n    setup.new_msg_req.include_citations = True\n    setup.search_params.project_id_filter = None\n    setup.search_params.persona_id_filter = None\n    setup.bypass_acl = False\n    setup.slack_context = None\n    setup.available_files.user_file_ids = []\n    setup.available_files.chat_file_ids = []\n    setup.forced_tool_id = None\n    setup.simple_chat_history = []\n    setup.chat_session.id = uuid4()\n    setup.user_message.id = None\n    setup.custom_tool_additional_headers = None\n    setup.mcp_headers = None\n    return setup\n\n\ndef _run_models_collect(setup: MagicMock) -> list:\n    \"\"\"Drive _run_models to completion and return all yielded items.\"\"\"\n    from onyx.chat.process_message import _run_models\n\n    return list(_run_models(setup, MagicMock(), MagicMock()))\n\n\nclass TestRunModels:\n    \"\"\"Tests for the _run_models worker-thread drain loop.\n\n    All external dependencies (LLM, DB, tools) are patched out.  Worker threads\n    still run but return immediately since run_llm_loop is mocked.\n    \"\"\"\n\n    def test_n1_overall_stop_from_llm_loop_passes_through(self) -> None:\n        \"\"\"OverallStop emitted by run_llm_loop is passed through the drain loop unchanged.\"\"\"\n\n        def emit_stop(**kwargs: Any) -> None:\n            kwargs[\"emitter\"].emit(\n                Packet(\n                    placement=Placement(turn_index=0),\n                    obj=OverallStop(stop_reason=\"complete\"),\n                )\n            )\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=emit_stop),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(_make_setup(n_models=1))\n\n        stops = [\n            p\n            for p in packets\n            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)\n        ]\n        assert len(stops) == 1\n        stop_obj = stops[0].obj\n        assert isinstance(stop_obj, OverallStop)\n        assert stop_obj.stop_reason == \"complete\"\n\n    def test_n1_emitted_packet_has_model_index_zero(self) -> None:\n        \"\"\"Single-model path: model_index is 0 (Emitter defaults model_idx=0).\"\"\"\n\n        def emit_one(**kwargs: Any) -> None:\n            kwargs[\"emitter\"].emit(\n                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())\n            )\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=emit_one),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(_make_setup(n_models=1))\n\n        reasoning = [\n            p\n            for p in packets\n            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)\n        ]\n        assert len(reasoning) == 1\n        assert reasoning[0].placement.model_index == 0\n\n    def test_n2_each_model_packet_tagged_with_its_index(self) -> None:\n        \"\"\"Multi-model path: packets from model 0 get index=0, model 1 gets index=1.\"\"\"\n\n        def emit_one(**kwargs: Any) -> None:\n            # _model_idx is set by _run_model based on position in setup.llms\n            emitter = kwargs[\"emitter\"]\n            emitter.emit(\n                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())\n            )\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=emit_one),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(_make_setup(n_models=2))\n\n        reasoning = [\n            p\n            for p in packets\n            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)\n        ]\n        assert len(reasoning) == 2\n        indices = {p.placement.model_index for p in reasoning}\n        assert indices == {0, 1}\n\n    def test_model_error_yields_streaming_error(self) -> None:\n        \"\"\"An exception inside a worker thread is surfaced as a StreamingError.\"\"\"\n\n        def always_fail(**_kwargs: Any) -> None:\n            raise RuntimeError(\"intentional test failure\")\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=always_fail),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(_make_setup(n_models=1))\n\n        errors = [p for p in packets if isinstance(p, StreamingError)]\n        assert len(errors) == 1\n        assert errors[0].error_code == \"MODEL_ERROR\"\n        assert \"intentional test failure\" in errors[0].error\n\n    def test_one_model_error_does_not_stop_other_models(self) -> None:\n        \"\"\"A failing model yields StreamingError; the surviving model's packets still arrive.\"\"\"\n        setup = _make_setup(n_models=2)\n\n        def fail_model_0_succeed_model_1(**kwargs: Any) -> None:\n            if kwargs[\"llm\"] is setup.llms[0]:\n                raise RuntimeError(\"model 0 failed\")\n            kwargs[\"emitter\"].emit(\n                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())\n            )\n\n        with (\n            patch(\n                \"onyx.chat.process_message.run_llm_loop\",\n                side_effect=fail_model_0_succeed_model_1,\n            ),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(setup)\n\n        errors = [p for p in packets if isinstance(p, StreamingError)]\n        assert len(errors) == 1\n\n        reasoning = [\n            p\n            for p in packets\n            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)\n        ]\n        assert len(reasoning) == 1\n        assert reasoning[0].placement.model_index == 1\n\n    def test_cancellation_yields_user_cancelled_stop(self) -> None:\n        \"\"\"If check_is_connected returns False, drain loop emits user_cancelled.\"\"\"\n\n        def slow_llm(**_kwargs: Any) -> None:\n            time.sleep(0.3)  # Outlasts the 50 ms queue-poll interval\n\n        setup = _make_setup(n_models=1)\n        setup.check_is_connected = MagicMock(return_value=False)\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=slow_llm),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            packets = _run_models_collect(setup)\n\n        stops = [\n            p\n            for p in packets\n            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)\n        ]\n        assert any(\n            isinstance(s.obj, OverallStop) and s.obj.stop_reason == \"user_cancelled\"\n            for s in stops\n        )\n\n    def test_stop_button_calls_completion_for_all_models(self) -> None:\n        \"\"\"llm_loop_completion_handle must be called for all models when the stop button fires.\n\n        Regression test for the disconnect-cleanup bug: the old\n        run_chat_loop_with_state_containers always called completion_callback in\n        its finally block (even on disconnect) so the DB message was updated from\n        the TERMINATED placeholder to a partial answer.  The new _run_models must\n        replicate this — otherwise the integration test\n        test_send_message_disconnect_and_cleanup fails because the message stays\n        as \"Response was terminated prior to completion, try regenerating.\"\n        \"\"\"\n\n        def slow_llm(**_kwargs: Any) -> None:\n            time.sleep(0.3)\n\n        setup = _make_setup(n_models=2)\n        setup.check_is_connected = MagicMock(return_value=False)\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=slow_llm),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\"\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            _run_models_collect(setup)\n\n        # Must be called once per model, not zero times\n        assert mock_handle.call_count == 2\n\n    def test_completion_handle_called_for_each_successful_model(self) -> None:\n        \"\"\"llm_loop_completion_handle must be called once per model that succeeded.\"\"\"\n        setup = _make_setup(n_models=2)\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\"),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\"\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            _run_models_collect(setup)\n\n        assert mock_handle.call_count == 2\n\n    def test_completion_handle_not_called_for_failed_model(self) -> None:\n        \"\"\"llm_loop_completion_handle must be skipped for a model that raised.\"\"\"\n\n        def always_fail(**_kwargs: Any) -> None:\n            raise RuntimeError(\"fail\")\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=always_fail),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\"\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            _run_models_collect(_make_setup(n_models=1))\n\n        mock_handle.assert_not_called()\n\n    def test_http_disconnect_completion_via_generator_exit(self) -> None:\n        \"\"\"GeneratorExit from HTTP disconnect triggers main-thread completion.\n\n        When the HTTP client closes the connection, Starlette throws GeneratorExit\n        into the stream generator. The finally block sets drain_done (signalling\n        emitters to stop blocking), waits for workers via executor.shutdown(wait=True),\n        then calls llm_loop_completion_handle for each successful model from the main\n        thread.\n\n        This is the primary regression for test_send_message_disconnect_and_cleanup:\n        the integration test disconnects mid-stream and expects the DB message to be\n        updated from the TERMINATED placeholder to the real response.\n        \"\"\"\n        import threading\n\n        completion_called = threading.Event()\n\n        def emit_then_block_until_drain(**kwargs: Any) -> None:\n            \"\"\"Emit one packet (to give the drain loop a yield point), then block\n            until drain_done is set — simulating a mid-stream LLM call that exits\n            promptly once the emitter signals shutdown.\n            \"\"\"\n            emitter = kwargs[\"emitter\"]\n            emitter.emit(\n                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())\n            )\n            # Block until drain_done is set by gen.close(). The Emitter's _drain_done\n            # is the same Event that _run_models sets, so this unblocks promptly.\n            emitter._drain_done.wait(timeout=5)\n\n        setup = _make_setup(n_models=1)\n        # is_connected() always True — HTTP disconnect does NOT set the Redis stop fence.\n        setup.check_is_connected = MagicMock(return_value=True)\n\n        with (\n            patch(\n                \"onyx.chat.process_message.run_llm_loop\",\n                side_effect=emit_then_block_until_drain,\n            ),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\",\n                side_effect=lambda *_, **__: completion_called.set(),\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            from onyx.chat.process_message import _run_models\n\n            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))\n            first = next(gen)\n            assert isinstance(first, Packet)\n            # Simulate Starlette closing the stream on HTTP client disconnect.\n            # gen.close() → GeneratorExit → finally → drain_done.set() →\n            # executor.shutdown(wait=True) → main thread completes models.\n            gen.close()\n\n            assert (\n                completion_called.is_set()\n            ), \"main thread must call completion for the successful model\"\n            assert mock_handle.call_count == 1\n\n    def test_b1_race_disconnect_handler_completes_already_finished_model(self) -> None:\n        \"\"\"B1 regression: model finishes BEFORE GeneratorExit fires.\n\n        The worker exits _run_model before drain_done is set. When gen.close()\n        fires afterward, the finally block sets drain_done, waits for workers\n        (already done), then the main thread calls llm_loop_completion_handle.\n\n        Contrast with test_http_disconnect_completion_via_generator_exit, which\n        tests the opposite ordering (worker finishes AFTER disconnect).\n        \"\"\"\n        import threading\n        import time\n\n        completion_called = threading.Event()\n\n        def emit_and_return_immediately(**kwargs: Any) -> None:\n            # Emit one packet so the drain loop has something to yield, then return\n            # immediately — no blocking.  The worker will be done in microseconds.\n            kwargs[\"emitter\"].emit(\n                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())\n            )\n\n        setup = _make_setup(n_models=1)\n        setup.check_is_connected = MagicMock(return_value=True)\n\n        with (\n            patch(\n                \"onyx.chat.process_message.run_llm_loop\",\n                side_effect=emit_and_return_immediately,\n            ),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\",\n                side_effect=lambda *_, **__: completion_called.set(),\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            from onyx.chat.process_message import _run_models\n\n            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))\n            first = next(gen)\n            assert isinstance(first, Packet)\n\n            # Give the worker thread time to finish completely (emit + return +\n            # finally + self-completion check).  It does almost no work, so 100 ms\n            # is far more than enough while still keeping the test fast.\n            time.sleep(0.1)\n\n            # Now close — worker is already done, so else-branch handles completion.\n            gen.close()\n\n            assert completion_called.wait(\n                timeout=5\n            ), \"disconnect handler must call completion for a model that already finished\"\n            assert mock_handle.call_count == 1, \"completion must be called exactly once\"\n\n    def test_stop_button_does_not_call_completion_for_errored_model(self) -> None:\n        \"\"\"B2 regression: stop-button must NOT call completion for an errored model.\n\n        When model 0 raises an exception, its reserved ChatMessage must not be\n        saved with 'stopped by user' — that message is wrong for a model that\n        errored.  llm_loop_completion_handle must only be called for non-errored\n        models when the stop button fires.\n        \"\"\"\n\n        def fail_model_0(**kwargs: Any) -> None:\n            if kwargs[\"llm\"] is setup.llms[0]:\n                raise RuntimeError(\"model 0 errored\")\n            # Model 1: run forever (stop button fires before it finishes)\n            time.sleep(10)\n\n        setup = _make_setup(n_models=2)\n        # Return False immediately so the stop-button path fires while model 1\n        # is still sleeping (model 0 has already errored by then).\n        setup.check_is_connected = lambda: False\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\", side_effect=fail_model_0),\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\n                \"onyx.chat.process_message.llm_loop_completion_handle\"\n            ) as mock_handle,\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            _run_models_collect(setup)\n\n        # Completion must NOT be called for model 0 (it errored).\n        # It MAY be called for model 1 (still in-flight when stop fired).\n        for call in mock_handle.call_args_list:\n            assert (\n                call.kwargs.get(\"llm\") is not setup.llms[0]\n            ), \"llm_loop_completion_handle must not be called for the errored model\"\n\n    def test_external_state_container_used_for_model_zero(self) -> None:\n        \"\"\"When provided, external_state_container is used as state_containers[0].\"\"\"\n        from onyx.chat.chat_state import ChatStateContainer\n        from onyx.chat.process_message import _run_models\n\n        external = ChatStateContainer()\n        setup = _make_setup(n_models=1)\n\n        with (\n            patch(\"onyx.chat.process_message.run_llm_loop\") as mock_llm,\n            patch(\"onyx.chat.process_message.run_deep_research_llm_loop\"),\n            patch(\"onyx.chat.process_message.construct_tools\", return_value={}),\n            patch(\"onyx.chat.process_message.get_session_with_current_tenant\"),\n            patch(\"onyx.chat.process_message.llm_loop_completion_handle\"),\n            patch(\n                \"onyx.chat.process_message.get_llm_token_counter\",\n                return_value=lambda _: 0,\n            ),\n        ):\n            list(\n                _run_models(\n                    setup, MagicMock(), MagicMock(), external_state_container=external\n                )\n            )\n\n        # The state_container kwarg passed to run_llm_loop must be the external one\n        call_kwargs = mock_llm.call_args.kwargs\n        assert call_kwargs[\"state_container\"] is external\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_multi_model_types.py",
    "content": "\"\"\"Unit tests for multi-model answer generation types.\n\nTests cover:\n- Placement.model_index serialization\n- MultiModelMessageResponseIDInfo round-trip\n- SendMessageRequest.llm_overrides backward compatibility\n- ChatMessageDetail new fields\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom uuid import uuid4\n\nfrom onyx.llm.override_models import LLMOverride\nfrom onyx.server.query_and_chat.models import ChatMessageDetail\nfrom onyx.server.query_and_chat.models import ModelResponseSlot\nfrom onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\nfrom onyx.server.query_and_chat.placement import Placement\n\n\nclass TestPlacementModelIndex:\n    def test_default_none(self) -> None:\n        p = Placement(turn_index=0)\n        assert p.model_index is None\n\n    def test_set_value(self) -> None:\n        p = Placement(turn_index=0, model_index=2)\n        assert p.model_index == 2\n\n    def test_serializes(self) -> None:\n        p = Placement(turn_index=0, tab_index=1, model_index=1)\n        d = p.model_dump()\n        assert d[\"model_index\"] == 1\n\n    def test_none_excluded_when_default(self) -> None:\n        p = Placement(turn_index=0)\n        d = p.model_dump()\n        assert d[\"model_index\"] is None\n\n\nclass TestMultiModelMessageResponseIDInfo:\n    def test_round_trip(self) -> None:\n        info = MultiModelMessageResponseIDInfo(\n            user_message_id=42,\n            responses=[\n                ModelResponseSlot(message_id=43, model_name=\"gpt-4\"),\n                ModelResponseSlot(message_id=44, model_name=\"claude-opus\"),\n                ModelResponseSlot(message_id=45, model_name=\"gemini-pro\"),\n            ],\n        )\n        d = info.model_dump()\n        restored = MultiModelMessageResponseIDInfo(**d)\n        assert restored.user_message_id == 42\n        assert [s.message_id for s in restored.responses] == [43, 44, 45]\n        assert [s.model_name for s in restored.responses] == [\n            \"gpt-4\",\n            \"claude-opus\",\n            \"gemini-pro\",\n        ]\n\n    def test_null_user_message_id(self) -> None:\n        info = MultiModelMessageResponseIDInfo(\n            user_message_id=None,\n            responses=[\n                ModelResponseSlot(message_id=1, model_name=\"a\"),\n                ModelResponseSlot(message_id=2, model_name=\"b\"),\n            ],\n        )\n        assert info.user_message_id is None\n\n\nclass TestSendMessageRequestOverrides:\n    def test_llm_overrides_default_none(self) -> None:\n        req = SendMessageRequest(\n            message=\"hello\",\n            chat_session_id=uuid4(),\n        )\n        assert req.llm_overrides is None\n\n    def test_llm_overrides_accepts_list(self) -> None:\n        overrides = [\n            LLMOverride(model_provider=\"openai\", model_version=\"gpt-4\"),\n            LLMOverride(model_provider=\"anthropic\", model_version=\"claude-opus\"),\n        ]\n        req = SendMessageRequest(\n            message=\"hello\",\n            chat_session_id=uuid4(),\n            llm_overrides=overrides,\n        )\n        assert req.llm_overrides is not None\n        assert len(req.llm_overrides) == 2\n\n    def test_backward_compat_single_override(self) -> None:\n        req = SendMessageRequest(\n            message=\"hello\",\n            chat_session_id=uuid4(),\n            llm_override=LLMOverride(model_provider=\"openai\", model_version=\"gpt-4\"),\n        )\n        assert req.llm_override is not None\n        assert req.llm_overrides is None\n\n\nclass TestChatMessageDetailMultiModel:\n    def test_defaults_none(self) -> None:\n        from onyx.configs.constants import MessageType\n\n        detail = ChatMessageDetail(\n            message_id=1,\n            message=\"hello\",\n            message_type=MessageType.ASSISTANT,\n            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),\n            files=[],\n        )\n        assert detail.preferred_response_id is None\n        assert detail.model_display_name is None\n\n    def test_set_values(self) -> None:\n        from onyx.configs.constants import MessageType\n\n        detail = ChatMessageDetail(\n            message_id=1,\n            message=\"hello\",\n            message_type=MessageType.USER,\n            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),\n            files=[],\n            preferred_response_id=42,\n            model_display_name=\"GPT-4\",\n        )\n        assert detail.preferred_response_id == 42\n        assert detail.model_display_name == \"GPT-4\"\n\n    def test_serializes(self) -> None:\n        from onyx.configs.constants import MessageType\n\n        detail = ChatMessageDetail(\n            message_id=1,\n            message=\"hello\",\n            message_type=MessageType.ASSISTANT,\n            time_sent=datetime(2026, 3, 22, tzinfo=timezone.utc),\n            files=[],\n            model_display_name=\"Claude Opus\",\n        )\n        d = detail.model_dump()\n        assert d[\"model_display_name\"] == \"Claude Opus\"\n        assert d[\"preferred_response_id\"] is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_process_message.py",
    "content": "import pytest\n\nfrom onyx.chat.process_message import _resolve_query_processing_hook_result\nfrom onyx.chat.process_message import remove_answer_citations\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.hooks.points.query_processing import QueryProcessingResponse\n\n\ndef test_remove_answer_citations_strips_http_markdown_citation() -> None:\n    answer = \"The answer is Paris [[1]](https://example.com/doc).\"\n\n    assert remove_answer_citations(answer) == \"The answer is Paris.\"\n\n\ndef test_remove_answer_citations_strips_empty_markdown_citation() -> None:\n    answer = \"The answer is Paris [[1]]().\"\n\n    assert remove_answer_citations(answer) == \"The answer is Paris.\"\n\n\ndef test_remove_answer_citations_strips_citation_with_parentheses_in_url() -> None:\n    answer = (\n        \"The answer is Paris \"\n        \"[[1]](https://en.wikipedia.org/wiki/Function_(mathematics)).\"\n    )\n\n    assert remove_answer_citations(answer) == \"The answer is Paris.\"\n\n\ndef test_remove_answer_citations_preserves_non_citation_markdown_links() -> None:\n    answer = (\n        \"See [reference](https://example.com/Function_(mathematics)) \"\n        \"for context [[1]](https://en.wikipedia.org/wiki/Function_(mathematics)).\"\n    )\n\n    assert (\n        remove_answer_citations(answer)\n        == \"See [reference](https://example.com/Function_(mathematics)) for context.\"\n    )\n\n\n# ---------------------------------------------------------------------------\n# Query Processing hook response handling (_resolve_query_processing_hook_result)\n# ---------------------------------------------------------------------------\n\n\ndef test_hook_skipped_leaves_message_text_unchanged() -> None:\n    result = _resolve_query_processing_hook_result(HookSkipped(), \"original query\")\n    assert result == \"original query\"\n\n\ndef test_hook_soft_failed_leaves_message_text_unchanged() -> None:\n    result = _resolve_query_processing_hook_result(HookSoftFailed(), \"original query\")\n    assert result == \"original query\"\n\n\ndef test_null_query_raises_query_rejected() -> None:\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(query=None), \"original query\"\n        )\n    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED\n\n\ndef test_empty_string_query_raises_query_rejected() -> None:\n    \"\"\"Empty string is falsy — must be treated as rejection, same as None.\"\"\"\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(query=\"\"), \"original query\"\n        )\n    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED\n\n\ndef test_whitespace_only_query_raises_query_rejected() -> None:\n    \"\"\"Whitespace-only string is truthy but meaningless — must be treated as rejection.\"\"\"\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(query=\"   \"), \"original query\"\n        )\n    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED\n\n\ndef test_absent_query_field_raises_query_rejected() -> None:\n    \"\"\"query defaults to None when not provided.\"\"\"\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(), \"original query\"\n        )\n    assert exc_info.value.error_code is OnyxErrorCode.QUERY_REJECTED\n\n\ndef test_rejection_message_surfaced_in_error_when_provided() -> None:\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(\n                query=None, rejection_message=\"Queries about X are not allowed.\"\n            ),\n            \"original query\",\n        )\n    assert \"Queries about X are not allowed.\" in str(exc_info.value)\n\n\ndef test_fallback_rejection_message_when_none() -> None:\n    \"\"\"No rejection_message → generic fallback used in OnyxError detail.\"\"\"\n    with pytest.raises(OnyxError) as exc_info:\n        _resolve_query_processing_hook_result(\n            QueryProcessingResponse(query=None, rejection_message=None),\n            \"original query\",\n        )\n    assert \"No rejection reason was provided.\" in str(exc_info.value)\n\n\ndef test_nonempty_query_rewrites_message_text() -> None:\n    result = _resolve_query_processing_hook_result(\n        QueryProcessingResponse(query=\"rewritten query\"), \"original query\"\n    )\n    assert result == \"rewritten query\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_process_message_mock_llm.py",
    "content": "from unittest.mock import Mock\n\nimport pytest\n\nfrom onyx.chat import process_message\nfrom onyx.chat.models import AnswerStream\nfrom onyx.chat.models import StreamingError\nfrom onyx.configs import app_configs\nfrom onyx.server.query_and_chat.models import MessageResponseIDInfo\nfrom onyx.server.query_and_chat.models import SendMessageRequest\n\n\ndef test_mock_llm_response_requires_integration_mode() -> None:\n    assert (\n        app_configs.INTEGRATION_TESTS_MODE is False\n    ), \"Unit tests expect INTEGRATION_TESTS_MODE=false.\"\n    assert (\n        process_message.INTEGRATION_TESTS_MODE is False\n    ), \"process_message should reflect INTEGRATION_TESTS_MODE=false in unit tests.\"\n\n    request = SendMessageRequest(\n        message=\"test\",\n        mock_llm_response='{\"name\":\"internal_search\",\"arguments\":{\"queries\":[\"alpha\"]}}',\n    )\n    mock_user = Mock()\n    mock_user.id = \"user-id\"\n    mock_user.is_anonymous = False\n    mock_user.email = \"user@example.com\"\n\n    with pytest.raises(\n        ValueError,\n        match=\"mock_llm_response can only be used when INTEGRATION_TESTS_MODE=true\",\n    ):\n        next(\n            process_message.handle_stream_message_objects(\n                new_msg_req=request,\n                user=mock_user,\n                db_session=Mock(),\n            )\n        )\n\n\ndef test_gather_stream_returns_empty_answer_when_streaming_error_only() -> None:\n    packets: AnswerStream = iter(\n        [\n            MessageResponseIDInfo(\n                user_message_id=None,\n                reserved_assistant_message_id=42,\n            ),\n            StreamingError(\n                error=\"OpenAI quota exceeded\",\n                error_code=\"BUDGET_EXCEEDED\",\n                is_retryable=False,\n            ),\n        ]\n    )\n\n    result = process_message.gather_stream(packets)\n\n    assert result.answer == \"\"\n    assert result.answer_citationless == \"\"\n    assert result.error_msg == \"OpenAI quota exceeded\"\n    assert result.message_id == 42\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_save_chat.py",
    "content": "\"\"\"Tests for save_chat.py.\n\nCovers _extract_referenced_file_descriptors and sanitization in save_chat_turn.\n\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom pytest import MonkeyPatch\n\nfrom onyx.chat import save_chat\nfrom onyx.chat.save_chat import _extract_referenced_file_descriptors\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.tools.models import PythonExecutionFile\nfrom onyx.tools.models import ToolCallInfo\n\n\ndef _make_tool_call_info(\n    generated_files: list[PythonExecutionFile] | None = None,\n    tool_name: str = \"python\",\n) -> ToolCallInfo:\n    return ToolCallInfo(\n        parent_tool_call_id=None,\n        turn_index=0,\n        tab_index=0,\n        tool_name=tool_name,\n        tool_call_id=\"tc_1\",\n        tool_id=1,\n        reasoning_tokens=None,\n        tool_call_arguments={\"code\": \"print('hi')\"},\n        tool_call_response=\"{}\",\n        generated_files=generated_files,\n    )\n\n\n# ---- _extract_referenced_file_descriptors tests ----\n\n\ndef test_returns_empty_when_no_generated_files() -> None:\n    tool_call = _make_tool_call_info(generated_files=None)\n    result = _extract_referenced_file_descriptors([tool_call], \"some message\")\n    assert result == []\n\n\ndef test_returns_empty_when_file_not_referenced() -> None:\n    files = [\n        PythonExecutionFile(\n            filename=\"chart.png\",\n            file_link=\"http://localhost/api/chat/file/abc-123\",\n        )\n    ]\n    tool_call = _make_tool_call_info(generated_files=files)\n    result = _extract_referenced_file_descriptors([tool_call], \"Here is your answer.\")\n    assert result == []\n\n\ndef test_extracts_referenced_file() -> None:\n    file_id = \"abc-123-def\"\n    files = [\n        PythonExecutionFile(\n            filename=\"chart.png\",\n            file_link=f\"http://localhost/api/chat/file/{file_id}\",\n        )\n    ]\n    tool_call = _make_tool_call_info(generated_files=files)\n    message = (\n        f\"Here is the chart: [chart.png](http://localhost/api/chat/file/{file_id})\"\n    )\n\n    result = _extract_referenced_file_descriptors([tool_call], message)\n\n    assert len(result) == 1\n    assert result[0][\"id\"] == file_id\n    assert result[0][\"type\"] == ChatFileType.IMAGE\n    assert result[0][\"name\"] == \"chart.png\"\n\n\ndef test_filters_unreferenced_files() -> None:\n    referenced_id = \"ref-111\"\n    unreferenced_id = \"unref-222\"\n    files = [\n        PythonExecutionFile(\n            filename=\"chart.png\",\n            file_link=f\"http://localhost/api/chat/file/{referenced_id}\",\n        ),\n        PythonExecutionFile(\n            filename=\"data.csv\",\n            file_link=f\"http://localhost/api/chat/file/{unreferenced_id}\",\n        ),\n    ]\n    tool_call = _make_tool_call_info(generated_files=files)\n    message = f\"Here is the chart: [chart.png](http://localhost/api/chat/file/{referenced_id})\"\n\n    result = _extract_referenced_file_descriptors([tool_call], message)\n\n    assert len(result) == 1\n    assert result[0][\"id\"] == referenced_id\n    assert result[0][\"name\"] == \"chart.png\"\n\n\ndef test_extracts_from_multiple_tool_calls() -> None:\n    id_1 = \"file-aaa\"\n    id_2 = \"file-bbb\"\n    tc1 = _make_tool_call_info(\n        generated_files=[\n            PythonExecutionFile(\n                filename=\"plot.png\",\n                file_link=f\"http://localhost/api/chat/file/{id_1}\",\n            )\n        ]\n    )\n    tc2 = _make_tool_call_info(\n        generated_files=[\n            PythonExecutionFile(\n                filename=\"report.csv\",\n                file_link=f\"http://localhost/api/chat/file/{id_2}\",\n            )\n        ]\n    )\n    message = f\"[plot.png](http://localhost/api/chat/file/{id_1}) and [report.csv](http://localhost/api/chat/file/{id_2})\"\n\n    result = _extract_referenced_file_descriptors([tc1, tc2], message)\n\n    assert len(result) == 2\n    ids = {d[\"id\"] for d in result}\n    assert ids == {id_1, id_2}\n\n\ndef test_csv_file_type() -> None:\n    file_id = \"csv-123\"\n    files = [\n        PythonExecutionFile(\n            filename=\"data.csv\",\n            file_link=f\"http://localhost/api/chat/file/{file_id}\",\n        )\n    ]\n    tool_call = _make_tool_call_info(generated_files=files)\n    message = f\"[data.csv](http://localhost/api/chat/file/{file_id})\"\n\n    result = _extract_referenced_file_descriptors([tool_call], message)\n\n    assert len(result) == 1\n    assert result[0][\"type\"] == ChatFileType.TABULAR\n\n\ndef test_unknown_extension_defaults_to_plain_text() -> None:\n    file_id = \"bin-456\"\n    files = [\n        PythonExecutionFile(\n            filename=\"output.xyz\",\n            file_link=f\"http://localhost/api/chat/file/{file_id}\",\n        )\n    ]\n    tool_call = _make_tool_call_info(generated_files=files)\n    message = f\"[output.xyz](http://localhost/api/chat/file/{file_id})\"\n\n    result = _extract_referenced_file_descriptors([tool_call], message)\n\n    assert len(result) == 1\n    assert result[0][\"type\"] == ChatFileType.PLAIN_TEXT\n\n\ndef test_skips_tool_calls_without_generated_files() -> None:\n    file_id = \"img-789\"\n    tc_no_files = _make_tool_call_info(generated_files=None)\n    tc_empty = _make_tool_call_info(generated_files=[])\n    tc_with_files = _make_tool_call_info(\n        generated_files=[\n            PythonExecutionFile(\n                filename=\"result.png\",\n                file_link=f\"http://localhost/api/chat/file/{file_id}\",\n            )\n        ]\n    )\n    message = f\"[result.png](http://localhost/api/chat/file/{file_id})\"\n\n    result = _extract_referenced_file_descriptors(\n        [tc_no_files, tc_empty, tc_with_files], message\n    )\n\n    assert len(result) == 1\n    assert result[0][\"id\"] == file_id\n\n\n# ---- save_chat_turn sanitization test ----\n\n\ndef test_save_chat_turn_sanitizes_message_and_reasoning(\n    monkeypatch: MonkeyPatch,\n) -> None:\n    mock_tokenizer = MagicMock()\n    mock_tokenizer.encode.return_value = [1, 2, 3]\n    monkeypatch.setattr(save_chat, \"get_tokenizer\", lambda *_a, **_kw: mock_tokenizer)\n\n    mock_msg = MagicMock()\n    mock_msg.id = 1\n    mock_msg.chat_session_id = \"test\"\n    mock_msg.files = None\n\n    mock_session = MagicMock()\n\n    save_chat.save_chat_turn(\n        message_text=\"hello\\x00world\\ud800\",\n        reasoning_tokens=\"think\\x00ing\\udfff\",\n        tool_calls=[],\n        citation_to_doc={},\n        all_search_docs={},\n        db_session=mock_session,\n        assistant_message=mock_msg,\n    )\n\n    assert mock_msg.message == \"helloworld\"\n    assert mock_msg.reasoning_tokens == \"thinking\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/chat/test_stop_signal_checker.py",
    "content": "\"\"\"Unit tests for stop_signal_checker and chat_processing_checker.\n\nThese modules are safety-critical — they control whether a chat stream\ncontinues or stops.  The tests use a simple in-memory CacheBackend stub\nso no external services are needed.\n\"\"\"\n\nfrom uuid import uuid4\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import CacheLock\nfrom onyx.chat.chat_processing_checker import is_chat_session_processing\nfrom onyx.chat.chat_processing_checker import set_processing_status\nfrom onyx.chat.stop_signal_checker import FENCE_TTL\nfrom onyx.chat.stop_signal_checker import is_connected\nfrom onyx.chat.stop_signal_checker import reset_cancel_status\nfrom onyx.chat.stop_signal_checker import set_fence\n\n\nclass _MemoryCacheBackend(CacheBackend):\n    \"\"\"Minimal in-memory CacheBackend for unit tests.\"\"\"\n\n    def __init__(self) -> None:\n        self._store: dict[str, bytes] = {}\n\n    def get(self, key: str) -> bytes | None:\n        return self._store.get(key)\n\n    def set(\n        self,\n        key: str,\n        value: str | bytes | int | float,\n        ex: int | None = None,  # noqa: ARG002\n    ) -> None:\n        if isinstance(value, bytes):\n            self._store[key] = value\n        else:\n            self._store[key] = str(value).encode()\n\n    def delete(self, key: str) -> None:\n        self._store.pop(key, None)\n\n    def exists(self, key: str) -> bool:\n        return key in self._store\n\n    def expire(self, key: str, seconds: int) -> None:\n        pass\n\n    def ttl(self, key: str) -> int:\n        return -2 if key not in self._store else -1\n\n    def lock(self, name: str, timeout: float | None = None) -> CacheLock:\n        raise NotImplementedError\n\n    def rpush(self, key: str, value: str | bytes) -> None:\n        raise NotImplementedError\n\n    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:\n        raise NotImplementedError\n\n\n# ── stop_signal_checker ──────────────────────────────────────────────\n\n\nclass TestSetFence:\n    def test_set_fence_true_creates_key(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_fence(sid, cache, True)\n        assert not is_connected(sid, cache)\n\n    def test_set_fence_false_removes_key(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_fence(sid, cache, True)\n        set_fence(sid, cache, False)\n        assert is_connected(sid, cache)\n\n    def test_set_fence_false_noop_when_absent(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_fence(sid, cache, False)\n        assert is_connected(sid, cache)\n\n    def test_set_fence_uses_ttl(self) -> None:\n        \"\"\"Verify set_fence passes ex=FENCE_TTL to cache.set.\"\"\"\n        calls: list[dict[str, object]] = []\n        cache = _MemoryCacheBackend()\n        original_set = cache.set\n\n        def tracking_set(\n            key: str,\n            value: str | bytes | int | float,\n            ex: int | None = None,\n        ) -> None:\n            calls.append({\"key\": key, \"ex\": ex})\n            original_set(key, value, ex=ex)\n\n        cache.set = tracking_set  # type: ignore[method-assign]\n\n        set_fence(uuid4(), cache, True)\n        assert len(calls) == 1\n        assert calls[0][\"ex\"] == FENCE_TTL\n\n\nclass TestIsConnected:\n    def test_connected_when_no_fence(self) -> None:\n        cache = _MemoryCacheBackend()\n        assert is_connected(uuid4(), cache)\n\n    def test_disconnected_when_fence_set(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_fence(sid, cache, True)\n        assert not is_connected(sid, cache)\n\n    def test_sessions_are_isolated(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid1, sid2 = uuid4(), uuid4()\n        set_fence(sid1, cache, True)\n        assert not is_connected(sid1, cache)\n        assert is_connected(sid2, cache)\n\n\nclass TestResetCancelStatus:\n    def test_clears_fence(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_fence(sid, cache, True)\n        reset_cancel_status(sid, cache)\n        assert is_connected(sid, cache)\n\n    def test_noop_when_no_fence(self) -> None:\n        cache = _MemoryCacheBackend()\n        reset_cancel_status(uuid4(), cache)\n\n\n# ── chat_processing_checker ──────────────────────────────────────────\n\n\nclass TestSetProcessingStatus:\n    def test_set_true_marks_processing(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_processing_status(sid, cache, True)\n        assert is_chat_session_processing(sid, cache)\n\n    def test_set_false_clears_processing(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid = uuid4()\n        set_processing_status(sid, cache, True)\n        set_processing_status(sid, cache, False)\n        assert not is_chat_session_processing(sid, cache)\n\n\nclass TestIsChatSessionProcessing:\n    def test_not_processing_by_default(self) -> None:\n        cache = _MemoryCacheBackend()\n        assert not is_chat_session_processing(uuid4(), cache)\n\n    def test_sessions_are_isolated(self) -> None:\n        cache = _MemoryCacheBackend()\n        sid1, sid2 = uuid4(), uuid4()\n        set_processing_status(sid1, cache, True)\n        assert is_chat_session_processing(sid1, cache)\n        assert not is_chat_session_processing(sid2, cache)\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/airtable/test_airtable_index_all.py",
    "content": "from typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.connectors.airtable.airtable_connector import AirtableConnector\nfrom onyx.connectors.airtable.airtable_connector import parse_airtable_url\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.models import Document\n\n\ndef _make_field_schema(field_id: str, name: str, field_type: str) -> MagicMock:\n    field = MagicMock()\n    field.id = field_id\n    field.name = name\n    field.type = field_type\n    return field\n\n\ndef _make_table_schema(\n    table_id: str,\n    table_name: str,\n    primary_field_id: str,\n    fields: list[MagicMock],\n) -> MagicMock:\n    schema = MagicMock()\n    schema.id = table_id\n    schema.name = table_name\n    schema.primary_field_id = primary_field_id\n    schema.fields = fields\n    schema.views = []\n    return schema\n\n\ndef _make_record(record_id: str, fields: dict[str, Any]) -> dict[str, Any]:\n    return {\"id\": record_id, \"fields\": fields}\n\n\ndef _make_base_info(base_id: str, name: str) -> MagicMock:\n    info = MagicMock()\n    info.id = base_id\n    info.name = name\n    return info\n\n\ndef _make_table_obj(table_id: str, name: str) -> MagicMock:\n    obj = MagicMock()\n    obj.id = table_id\n    obj.name = name\n    return obj\n\n\ndef _setup_mock_api(\n    bases: list[dict[str, Any]],\n) -> MagicMock:\n    \"\"\"Set up a mock AirtableApi with bases, tables, records, and schemas.\n\n    Args:\n        bases: List of dicts with keys: id, name, tables.\n               Each table is a dict with: id, name, primary_field_id, fields, records.\n               Each field is a dict with: id, name, type.\n               Each record is a dict with: id, fields.\n    \"\"\"\n    mock_api = MagicMock()\n\n    base_infos = [_make_base_info(b[\"id\"], b[\"name\"]) for b in bases]\n    mock_api.bases.return_value = base_infos\n\n    def base_side_effect(base_id: str) -> MagicMock:\n        mock_base = MagicMock()\n        base_data = next((b for b in bases if b[\"id\"] == base_id), None)\n        if not base_data:\n            raise ValueError(f\"Unknown base: {base_id}\")\n\n        table_objs = [_make_table_obj(t[\"id\"], t[\"name\"]) for t in base_data[\"tables\"]]\n        mock_base.tables.return_value = table_objs\n        return mock_base\n\n    mock_api.base.side_effect = base_side_effect\n\n    def table_side_effect(base_id: str, table_name_or_id: str) -> MagicMock:\n        base_data = next((b for b in bases if b[\"id\"] == base_id), None)\n        if not base_data:\n            raise ValueError(f\"Unknown base: {base_id}\")\n\n        table_data = next(\n            (\n                t\n                for t in base_data[\"tables\"]\n                if t[\"id\"] == table_name_or_id or t[\"name\"] == table_name_or_id\n            ),\n            None,\n        )\n        if not table_data:\n            raise ValueError(f\"Unknown table: {table_name_or_id}\")\n\n        mock_table = MagicMock()\n        mock_table.name = table_data[\"name\"]\n        mock_table.all.return_value = [\n            _make_record(r[\"id\"], r[\"fields\"]) for r in table_data[\"records\"]\n        ]\n\n        field_schemas = [\n            _make_field_schema(f[\"id\"], f[\"name\"], f[\"type\"])\n            for f in table_data[\"fields\"]\n        ]\n        schema = _make_table_schema(\n            table_data[\"id\"],\n            table_data[\"name\"],\n            table_data[\"primary_field_id\"],\n            field_schemas,\n        )\n        mock_table.schema.return_value = schema\n        return mock_table\n\n    mock_api.table.side_effect = table_side_effect\n    return mock_api\n\n\nSAMPLE_BASES = [\n    {\n        \"id\": \"appBASE1\",\n        \"name\": \"Base One\",\n        \"tables\": [\n            {\n                \"id\": \"tblTABLE1\",\n                \"name\": \"Table A\",\n                \"primary_field_id\": \"fld1\",\n                \"fields\": [\n                    {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                    {\"id\": \"fld2\", \"name\": \"Notes\", \"type\": \"multilineText\"},\n                ],\n                \"records\": [\n                    {\"id\": \"recA1\", \"fields\": {\"Name\": \"Alice\", \"Notes\": \"Note A\"}},\n                    {\"id\": \"recA2\", \"fields\": {\"Name\": \"Bob\", \"Notes\": \"Note B\"}},\n                ],\n            },\n            {\n                \"id\": \"tblTABLE2\",\n                \"name\": \"Table B\",\n                \"primary_field_id\": \"fld3\",\n                \"fields\": [\n                    {\"id\": \"fld3\", \"name\": \"Title\", \"type\": \"singleLineText\"},\n                    {\"id\": \"fld4\", \"name\": \"Status\", \"type\": \"singleSelect\"},\n                ],\n                \"records\": [\n                    {\"id\": \"recB1\", \"fields\": {\"Title\": \"Task 1\", \"Status\": \"Done\"}},\n                ],\n            },\n        ],\n    },\n    {\n        \"id\": \"appBASE2\",\n        \"name\": \"Base Two\",\n        \"tables\": [\n            {\n                \"id\": \"tblTABLE3\",\n                \"name\": \"Table C\",\n                \"primary_field_id\": \"fld5\",\n                \"fields\": [\n                    {\"id\": \"fld5\", \"name\": \"Item\", \"type\": \"singleLineText\"},\n                ],\n                \"records\": [\n                    {\"id\": \"recC1\", \"fields\": {\"Item\": \"Widget\"}},\n                ],\n            },\n        ],\n    },\n]\n\n\ndef _collect_docs(connector: AirtableConnector) -> list[Document]:\n    docs: list[Document] = []\n    for batch in connector.load_from_state():\n        for item in batch:\n            if isinstance(item, Document):\n                docs.append(item)\n    return docs\n\n\nclass TestIndexAll:\n    @patch(\"time.sleep\")\n    def test_index_all_discovers_all_bases_and_tables(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        connector = AirtableConnector()\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        # 2 records from Table A + 1 from Table B + 1 from Table C = 4\n        assert len(docs) == 4\n        doc_ids = {d.id for d in docs}\n        assert doc_ids == {\n            \"airtable__recA1\",\n            \"airtable__recA2\",\n            \"airtable__recB1\",\n            \"airtable__recC1\",\n        }\n\n    @patch(\"time.sleep\")\n    def test_index_all_semantic_id_includes_base_name(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        connector = AirtableConnector()\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n        docs_by_id = {d.id: d for d in docs}\n\n        assert (\n            docs_by_id[\"airtable__recA1\"].semantic_identifier\n            == \"Base One > Table A: Alice\"\n        )\n        assert (\n            docs_by_id[\"airtable__recB1\"].semantic_identifier\n            == \"Base One > Table B: Task 1\"\n        )\n        assert (\n            docs_by_id[\"airtable__recC1\"].semantic_identifier\n            == \"Base Two > Table C: Widget\"\n        )\n\n    @patch(\"time.sleep\")\n    def test_index_all_hierarchy_source_path(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Verify doc_metadata hierarchy source_path is [base_name, table_name].\"\"\"\n        connector = AirtableConnector()\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n        docs_by_id = {d.id: d for d in docs}\n\n        doc_a1 = docs_by_id[\"airtable__recA1\"]\n        assert doc_a1.doc_metadata is not None\n        assert doc_a1.doc_metadata[\"hierarchy\"][\"source_path\"] == [\n            \"Base One\",\n            \"Table A\",\n        ]\n        assert doc_a1.doc_metadata[\"hierarchy\"][\"base_name\"] == \"Base One\"\n        assert doc_a1.doc_metadata[\"hierarchy\"][\"table_name\"] == \"Table A\"\n\n        doc_c1 = docs_by_id[\"airtable__recC1\"]\n        assert doc_c1.doc_metadata is not None\n        assert doc_c1.doc_metadata[\"hierarchy\"][\"source_path\"] == [\n            \"Base Two\",\n            \"Table C\",\n        ]\n\n    @patch(\"time.sleep\")\n    def test_index_all_empty_account(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        connector = AirtableConnector()\n        mock_api = MagicMock()\n        mock_api.bases.return_value = []\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n        assert len(docs) == 0\n\n    @patch(\"time.sleep\")\n    def test_index_all_skips_failing_table(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"If one table fails, other tables should still be indexed.\"\"\"\n        bases = [\n            {\n                \"id\": \"appBASE1\",\n                \"name\": \"Base One\",\n                \"tables\": [\n                    {\n                        \"id\": \"tblGOOD\",\n                        \"name\": \"Good Table\",\n                        \"primary_field_id\": \"fld1\",\n                        \"fields\": [\n                            {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                        ],\n                        \"records\": [\n                            {\"id\": \"recOK\", \"fields\": {\"Name\": \"Works\"}},\n                        ],\n                    },\n                    {\n                        \"id\": \"tblBAD\",\n                        \"name\": \"Bad Table\",\n                        \"primary_field_id\": \"fldX\",\n                        \"fields\": [],\n                        \"records\": [],\n                    },\n                ],\n            },\n        ]\n        mock_api = _setup_mock_api(bases)\n\n        # Make the bad table raise an error when fetching records\n        original_table_side_effect = mock_api.table.side_effect\n\n        def table_with_failure(base_id: str, table_name_or_id: str) -> MagicMock:\n            if table_name_or_id == \"tblBAD\":\n                mock_table = MagicMock()\n                mock_table.all.side_effect = Exception(\"API Error\")\n                mock_table.schema.side_effect = Exception(\"API Error\")\n                return mock_table\n            return original_table_side_effect(base_id, table_name_or_id)\n\n        mock_api.table.side_effect = table_with_failure\n        connector = AirtableConnector()\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        # Only the good table's records should come through\n        assert len(docs) == 1\n        assert docs[0].id == \"airtable__recOK\"\n\n    @patch(\"time.sleep\")\n    def test_index_all_skips_failing_base(\n        self,\n        mock_sleep: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"If listing tables for a base fails, other bases should still be indexed.\"\"\"\n        bases_data = [\n            {\n                \"id\": \"appGOOD\",\n                \"name\": \"Good Base\",\n                \"tables\": [\n                    {\n                        \"id\": \"tblOK\",\n                        \"name\": \"OK Table\",\n                        \"primary_field_id\": \"fld1\",\n                        \"fields\": [\n                            {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                        ],\n                        \"records\": [\n                            {\"id\": \"recOK\", \"fields\": {\"Name\": \"Works\"}},\n                        ],\n                    },\n                ],\n            },\n        ]\n        mock_api = _setup_mock_api(bases_data)\n\n        # Add a bad base that fails on tables()\n        bad_base_info = _make_base_info(\"appBAD\", \"Bad Base\")\n        mock_api.bases.return_value = [\n            bad_base_info,\n            *mock_api.bases.return_value,\n        ]\n\n        original_base_side_effect = mock_api.base.side_effect\n\n        def base_with_failure(base_id: str) -> MagicMock:\n            if base_id == \"appBAD\":\n                mock_base = MagicMock()\n                mock_base.tables.side_effect = Exception(\"Permission denied\")\n                return mock_base\n            return original_base_side_effect(base_id)\n\n        mock_api.base.side_effect = base_with_failure\n\n        connector = AirtableConnector()\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        assert len(docs) == 1\n        assert docs[0].id == \"airtable__recOK\"\n\n\nclass TestSpecificTableMode:\n    def test_specific_table_unchanged(self) -> None:\n        \"\"\"Verify the original single-table behavior still works.\"\"\"\n        bases = [\n            {\n                \"id\": \"appBASE1\",\n                \"name\": \"Base One\",\n                \"tables\": [\n                    {\n                        \"id\": \"tblTABLE1\",\n                        \"name\": \"Table A\",\n                        \"primary_field_id\": \"fld1\",\n                        \"fields\": [\n                            {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                            {\"id\": \"fld2\", \"name\": \"Notes\", \"type\": \"multilineText\"},\n                        ],\n                        \"records\": [\n                            {\n                                \"id\": \"recA1\",\n                                \"fields\": {\"Name\": \"Alice\", \"Notes\": \"Note\"},\n                            },\n                        ],\n                    },\n                ],\n            },\n        ]\n        mock_api = _setup_mock_api(bases)\n\n        connector = AirtableConnector(\n            base_id=\"appBASE1\",\n            table_name_or_id=\"tblTABLE1\",\n        )\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        assert len(docs) == 1\n        assert docs[0].id == \"airtable__recA1\"\n        # No base name prefix in specific mode\n        assert docs[0].semantic_identifier == \"Table A: Alice\"\n\n    def test_specific_table_resolves_base_name_for_hierarchy(self) -> None:\n        \"\"\"In specific mode, bases() is called to resolve the base name for hierarchy.\"\"\"\n        bases = [\n            {\n                \"id\": \"appBASE1\",\n                \"name\": \"Base One\",\n                \"tables\": [\n                    {\n                        \"id\": \"tblTABLE1\",\n                        \"name\": \"Table A\",\n                        \"primary_field_id\": \"fld1\",\n                        \"fields\": [\n                            {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                        ],\n                        \"records\": [\n                            {\"id\": \"recA1\", \"fields\": {\"Name\": \"Test\"}},\n                        ],\n                    },\n                ],\n            },\n        ]\n        mock_api = _setup_mock_api(bases)\n\n        connector = AirtableConnector(\n            base_id=\"appBASE1\",\n            table_name_or_id=\"tblTABLE1\",\n        )\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        # bases() is called to resolve the base name for hierarchy source_path\n        mock_api.bases.assert_called_once()\n        # But base().tables() should NOT be called (no discovery)\n        mock_api.base.assert_not_called()\n        # Semantic identifier should NOT include base name in specific mode\n        assert docs[0].semantic_identifier == \"Table A: Test\"\n        # Hierarchy should include base name for Craft file system\n        assert docs[0].doc_metadata is not None\n        assert docs[0].doc_metadata[\"hierarchy\"][\"source_path\"] == [\n            \"Base One\",\n            \"Table A\",\n        ]\n\n\nclass TestValidateConnectorSettings:\n    def test_validate_index_all_success(self) -> None:\n        connector = AirtableConnector()\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n\n        # Should not raise\n        connector.validate_connector_settings()\n\n    def test_validate_index_all_no_bases(self) -> None:\n        connector = AirtableConnector()\n        mock_api = MagicMock()\n        mock_api.bases.return_value = []\n        connector._airtable_client = mock_api\n\n        with pytest.raises(ConnectorValidationError, match=\"No bases found\"):\n            connector.validate_connector_settings()\n\n    def test_validate_specific_table_success(self) -> None:\n        connector = AirtableConnector(\n            base_id=\"appBASE1\",\n            table_name_or_id=\"tblTABLE1\",\n        )\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n\n        # Should not raise\n        connector.validate_connector_settings()\n\n    def test_validate_empty_fields_auto_detects_index_all(self) -> None:\n        \"\"\"Empty base_id + table_name_or_id auto-detects as index_all mode.\"\"\"\n        connector = AirtableConnector(\n            base_id=\"\",\n            table_name_or_id=\"\",\n        )\n        assert connector.index_all is True\n\n        # Validation should go through the index_all path\n        mock_api = _setup_mock_api(SAMPLE_BASES)\n        connector._airtable_client = mock_api\n        connector.validate_connector_settings()\n\n    def test_validate_specific_table_api_error(self) -> None:\n        connector = AirtableConnector(\n            base_id=\"appBAD\",\n            table_name_or_id=\"tblBAD\",\n        )\n        mock_api = MagicMock()\n        mock_table = MagicMock()\n        mock_table.schema.side_effect = Exception(\"Not found\")\n        mock_api.table.return_value = mock_table\n        connector._airtable_client = mock_api\n\n        with pytest.raises(ConnectorValidationError, match=\"Failed to access table\"):\n            connector.validate_connector_settings()\n\n\nclass TestParseAirtableUrl:\n    def test_full_url_with_view(self) -> None:\n        base_id, table_id, view_id = parse_airtable_url(\n            \"https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV/viwa3yxZvqWnyXftm?blocks=hide\"\n        )\n        assert base_id == \"appZqBgQFQ6kWyeZK\"\n        assert table_id == \"tblc9prNLypy7olTV\"\n        assert view_id == \"viwa3yxZvqWnyXftm\"\n\n    def test_url_without_view(self) -> None:\n        base_id, table_id, view_id = parse_airtable_url(\n            \"https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV\"\n        )\n        assert base_id == \"appZqBgQFQ6kWyeZK\"\n        assert table_id == \"tblc9prNLypy7olTV\"\n        assert view_id is None\n\n    def test_url_without_query_params(self) -> None:\n        base_id, table_id, view_id = parse_airtable_url(\n            \"https://airtable.com/appABC123/tblDEF456/viwGHI789\"\n        )\n        assert base_id == \"appABC123\"\n        assert table_id == \"tblDEF456\"\n        assert view_id == \"viwGHI789\"\n\n    def test_url_with_trailing_whitespace(self) -> None:\n        base_id, table_id, view_id = parse_airtable_url(\n            \"  https://airtable.com/appABC123/tblDEF456  \"\n        )\n        assert base_id == \"appABC123\"\n        assert table_id == \"tblDEF456\"\n\n    def test_invalid_url_raises(self) -> None:\n        with pytest.raises(ValueError, match=\"Could not parse\"):\n            parse_airtable_url(\"https://google.com/something\")\n\n    def test_missing_table_raises(self) -> None:\n        with pytest.raises(ValueError, match=\"Could not parse\"):\n            parse_airtable_url(\"https://airtable.com/appABC123\")\n\n    def test_empty_string_raises(self) -> None:\n        with pytest.raises(ValueError, match=\"Could not parse\"):\n            parse_airtable_url(\"\")\n\n\nclass TestAirtableUrlConnector:\n    def test_url_sets_base_and_table_ids(self) -> None:\n        connector = AirtableConnector(\n            airtable_url=\"https://airtable.com/appZqBgQFQ6kWyeZK/tblc9prNLypy7olTV/viwa3yxZvqWnyXftm?blocks=hide\"\n        )\n        assert connector.base_id == \"appZqBgQFQ6kWyeZK\"\n        assert connector.table_name_or_id == \"tblc9prNLypy7olTV\"\n        assert connector.view_id == \"viwa3yxZvqWnyXftm\"\n\n    def test_url_without_view_leaves_view_none(self) -> None:\n        connector = AirtableConnector(airtable_url=\"https://airtable.com/appABC/tblDEF\")\n        assert connector.base_id == \"appABC\"\n        assert connector.table_name_or_id == \"tblDEF\"\n        assert connector.view_id is None\n\n    def test_url_overrides_explicit_base_and_table(self) -> None:\n        connector = AirtableConnector(\n            base_id=\"appOLD\",\n            table_name_or_id=\"tblOLD\",\n            airtable_url=\"https://airtable.com/appNEW/tblNEW\",\n        )\n        assert connector.base_id == \"appNEW\"\n        assert connector.table_name_or_id == \"tblNEW\"\n\n    def test_url_indexes_correctly(self) -> None:\n        \"\"\"End-to-end: URL-configured connector fetches from the right table.\"\"\"\n        bases = [\n            {\n                \"id\": \"appFromUrl\",\n                \"name\": \"URL Base\",\n                \"tables\": [\n                    {\n                        \"id\": \"tblFromUrl\",\n                        \"name\": \"URL Table\",\n                        \"primary_field_id\": \"fld1\",\n                        \"fields\": [\n                            {\"id\": \"fld1\", \"name\": \"Name\", \"type\": \"singleLineText\"},\n                        ],\n                        \"records\": [\n                            {\"id\": \"recURL1\", \"fields\": {\"Name\": \"From URL\"}},\n                        ],\n                    },\n                ],\n            },\n        ]\n        mock_api = _setup_mock_api(bases)\n\n        connector = AirtableConnector(\n            airtable_url=\"https://airtable.com/appFromUrl/tblFromUrl/viwABC\"\n        )\n        connector._airtable_client = mock_api\n\n        docs = _collect_docs(connector)\n\n        assert len(docs) == 1\n        assert docs[0].id == \"airtable__recURL1\"\n        assert docs[0].semantic_identifier == \"URL Table: From URL\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/asana/test_asana_connector.py",
    "content": "\"\"\"Tests for Asana connector configuration parsing.\"\"\"\n\nimport pytest\n\nfrom onyx.connectors.asana.connector import AsanaConnector\n\n\n@pytest.mark.parametrize(\n    \"project_ids,expected\",\n    [\n        (None, None),\n        (\"\", None),\n        (\"   \", None),\n        (\" 123 \", [\"123\"]),\n        (\" 123 , , 456 , \", [\"123\", \"456\"]),\n    ],\n)\ndef test_asana_connector_project_ids_normalization(\n    project_ids: str | None, expected: list[str] | None\n) -> None:\n    connector = AsanaConnector(\n        asana_workspace_id=\" 1153293530468850 \",\n        asana_project_ids=project_ids,\n        asana_team_id=\" 1210918501948021 \",\n    )\n\n    assert connector.workspace_id == \"1153293530468850\"\n    assert connector.project_ids_to_index == expected\n    assert connector.asana_team_id == \"1210918501948021\"\n\n\n@pytest.mark.parametrize(\n    \"team_id,expected\",\n    [\n        (None, None),\n        (\"\", None),\n        (\"   \", None),\n        (\" 1210918501948021 \", \"1210918501948021\"),\n    ],\n)\ndef test_asana_connector_team_id_normalization(\n    team_id: str | None, expected: str | None\n) -> None:\n    connector = AsanaConnector(\n        asana_workspace_id=\"1153293530468850\",\n        asana_project_ids=None,\n        asana_team_id=team_id,\n    )\n\n    assert connector.asana_team_id == expected\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py",
    "content": "\"\"\"Tests for Canvas connector — client, credentials, conversion.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.canvas.client import CanvasApiClient\nfrom onyx.connectors.canvas.connector import CanvasConnector\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.models import ConnectorMissingCredentialError\nfrom onyx.error_handling.exceptions import OnyxError\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\nFAKE_BASE_URL = \"https://myschool.instructure.com\"\nFAKE_TOKEN = \"fake-canvas-token\"\n\n\ndef _mock_course(\n    course_id: int = 1,\n    name: str = \"Intro to CS\",\n    course_code: str = \"CS101\",\n) -> dict[str, Any]:\n    return {\n        \"id\": course_id,\n        \"name\": name,\n        \"course_code\": course_code,\n        \"created_at\": \"2025-01-01T00:00:00Z\",\n        \"workflow_state\": \"available\",\n    }\n\n\ndef _build_connector(base_url: str = FAKE_BASE_URL) -> CanvasConnector:\n    \"\"\"Build a connector with mocked credential validation.\"\"\"\n    with patch(\"onyx.connectors.canvas.client.rl_requests\") as mock_req:\n        mock_req.get.return_value = _mock_response(json_data=[_mock_course()])\n        connector = CanvasConnector(canvas_base_url=base_url)\n        connector.load_credentials({\"canvas_access_token\": FAKE_TOKEN})\n    return connector\n\n\ndef _mock_page(\n    page_id: int = 10,\n    title: str = \"Syllabus\",\n    updated_at: str = \"2025-06-01T12:00:00Z\",\n) -> dict[str, Any]:\n    return {\n        \"page_id\": page_id,\n        \"url\": \"syllabus\",\n        \"title\": title,\n        \"body\": \"<p>Welcome to the course</p>\",\n        \"created_at\": \"2025-01-15T00:00:00Z\",\n        \"updated_at\": updated_at,\n    }\n\n\ndef _mock_assignment(\n    assignment_id: int = 20,\n    name: str = \"Homework 1\",\n    course_id: int = 1,\n    updated_at: str = \"2025-06-01T12:00:00Z\",\n) -> dict[str, Any]:\n    return {\n        \"id\": assignment_id,\n        \"name\": name,\n        \"description\": \"<p>Solve these problems</p>\",\n        \"html_url\": f\"{FAKE_BASE_URL}/courses/{course_id}/assignments/{assignment_id}\",\n        \"course_id\": course_id,\n        \"created_at\": \"2025-01-20T00:00:00Z\",\n        \"updated_at\": updated_at,\n        \"due_at\": \"2025-02-01T23:59:00Z\",\n    }\n\n\ndef _mock_announcement(\n    announcement_id: int = 30,\n    title: str = \"Class Cancelled\",\n    course_id: int = 1,\n    posted_at: str = \"2025-06-01T12:00:00Z\",\n) -> dict[str, Any]:\n    return {\n        \"id\": announcement_id,\n        \"title\": title,\n        \"message\": \"<p>No class today</p>\",\n        \"html_url\": f\"{FAKE_BASE_URL}/courses/{course_id}/discussion_topics/{announcement_id}\",\n        \"posted_at\": posted_at,\n    }\n\n\ndef _mock_response(\n    status_code: int = 200,\n    json_data: Any = None,\n    link_header: str = \"\",\n) -> MagicMock:\n    \"\"\"Create a mock HTTP response with status, json, and Link header.\"\"\"\n    resp = MagicMock()\n    resp.status_code = status_code\n    resp.reason = \"OK\" if status_code < 300 else \"Error\"\n    resp.json.return_value = json_data if json_data is not None else []\n    resp.headers = {\"Link\": link_header}\n    return resp\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient.__init__ tests\n# ---------------------------------------------------------------------------\n\n\nclass TestCanvasApiClientInit:\n    def test_success(self) -> None:\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n        expected_base_url = f\"{FAKE_BASE_URL}/api/v1\"\n        expected_host = \"myschool.instructure.com\"\n\n        assert client.base_url == expected_base_url\n        assert client._expected_host == expected_host\n\n    def test_normalizes_trailing_slash(self) -> None:\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=f\"{FAKE_BASE_URL}/\",\n        )\n\n        expected_base_url = f\"{FAKE_BASE_URL}/api/v1\"\n\n        assert client.base_url == expected_base_url\n\n    def test_normalizes_existing_api_v1(self) -> None:\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=f\"{FAKE_BASE_URL}/api/v1\",\n        )\n\n        expected_base_url = f\"{FAKE_BASE_URL}/api/v1\"\n\n        assert client.base_url == expected_base_url\n\n    def test_rejects_non_https_scheme(self) -> None:\n        with pytest.raises(ValueError, match=\"must use https\"):\n            CanvasApiClient(\n                bearer_token=FAKE_TOKEN,\n                canvas_base_url=\"ftp://myschool.instructure.com\",\n            )\n\n    def test_rejects_http(self) -> None:\n        with pytest.raises(ValueError, match=\"must use https\"):\n            CanvasApiClient(\n                bearer_token=FAKE_TOKEN,\n                canvas_base_url=\"http://myschool.instructure.com\",\n            )\n\n    def test_rejects_missing_host(self) -> None:\n        with pytest.raises(ValueError, match=\"must include a valid host\"):\n            CanvasApiClient(\n                bearer_token=FAKE_TOKEN,\n                canvas_base_url=\"https://\",\n            )\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient._build_url tests\n# ---------------------------------------------------------------------------\n\n\nclass TestBuildUrl:\n    def setup_method(self) -> None:\n        self.client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n    def test_appends_endpoint(self) -> None:\n        result = self.client._build_url(\"courses\")\n        expected = f\"{FAKE_BASE_URL}/api/v1/courses\"\n\n        assert result == expected\n\n    def test_strips_leading_slash_from_endpoint(self) -> None:\n        result = self.client._build_url(\"/courses\")\n        expected = f\"{FAKE_BASE_URL}/api/v1/courses\"\n\n        assert result == expected\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient._build_headers tests\n# ---------------------------------------------------------------------------\n\n\nclass TestBuildHeaders:\n    def setup_method(self) -> None:\n        self.client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n    def test_returns_bearer_auth(self) -> None:\n        result = self.client._build_headers()\n        expected = {\"Authorization\": f\"Bearer {FAKE_TOKEN}\"}\n\n        assert result == expected\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient.get tests\n# ---------------------------------------------------------------------------\n\n\nclass TestGet:\n    def setup_method(self) -> None:\n        self.client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_success_returns_json_and_next_url(self, mock_requests: MagicMock) -> None:\n        next_link = f\"<{FAKE_BASE_URL}/api/v1/courses?page=2>; \" 'rel=\"next\"'\n        mock_requests.get.return_value = _mock_response(\n            json_data=[{\"id\": 1}], link_header=next_link\n        )\n\n        data, next_url = self.client.get(\"courses\")\n\n        expected_data = [{\"id\": 1}]\n        expected_next = f\"{FAKE_BASE_URL}/api/v1/courses?page=2\"\n\n        assert data == expected_data\n        assert next_url == expected_next\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_success_no_next_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[{\"id\": 1}])\n\n        data, next_url = self.client.get(\"courses\")\n\n        assert data == [{\"id\": 1}]\n        assert next_url is None\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_raises_on_error_status(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(403, {})\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        assert exc_info.value.status_code == 403\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_raises_on_404(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(404, {})\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        assert exc_info.value.status_code == 404\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_raises_on_429(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(429, {})\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        assert exc_info.value.status_code == 429\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_skips_params_when_using_full_url(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        full = f\"{FAKE_BASE_URL}/api/v1/courses?page=2\"\n\n        self.client.get(params={\"per_page\": \"100\"}, full_url=full)\n\n        _, kwargs = mock_requests.get.call_args\n        assert kwargs[\"params\"] is None\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_error_extracts_message_from_error_dict(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"Shape 1: {\"error\": {\"message\": \"Not authorized\"}}\"\"\"\n        mock_requests.get.return_value = _mock_response(\n            403, {\"error\": {\"message\": \"Not authorized\"}}\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Not authorized\"\n\n        assert result == expected\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_error_extracts_message_from_error_string(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"Shape 2: {\"error\": \"Invalid access token\"}\"\"\"\n        mock_requests.get.return_value = _mock_response(\n            401, {\"error\": \"Invalid access token\"}\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Invalid access token\"\n\n        assert result == expected\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_error_extracts_message_from_errors_list(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"Shape 3: {\"errors\": [{\"message\": \"Invalid query\"}]}\"\"\"\n        mock_requests.get.return_value = _mock_response(\n            400, {\"errors\": [{\"message\": \"Invalid query\"}]}\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Invalid query\"\n\n        assert result == expected\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_error_dict_takes_priority_over_errors_list(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"When both error shapes are present, error dict wins.\"\"\"\n        mock_requests.get.return_value = _mock_response(\n            403, {\"error\": \"Specific error\", \"errors\": [{\"message\": \"Generic\"}]}\n        )\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Specific error\"\n\n        assert result == expected\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_error_falls_back_to_reason_when_no_json_message(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"Empty error body falls back to response.reason.\"\"\"\n        mock_requests.get.return_value = _mock_response(500, {})\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Error\"  # from _mock_response's reason for >= 300\n\n        assert result == expected\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_invalid_json_on_success_raises(self, mock_requests: MagicMock) -> None:\n        \"\"\"Invalid JSON on a 2xx response raises OnyxError.\"\"\"\n        resp = MagicMock()\n        resp.status_code = 200\n        resp.json.side_effect = ValueError(\"No JSON\")\n        resp.headers = {\"Link\": \"\"}\n        mock_requests.get.return_value = resp\n\n        with pytest.raises(OnyxError, match=\"Invalid JSON\"):\n            self.client.get(\"courses\")\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_invalid_json_on_error_falls_back_to_reason(\n        self, mock_requests: MagicMock\n    ) -> None:\n        \"\"\"Invalid JSON on a 4xx response falls back to response.reason.\"\"\"\n        resp = MagicMock()\n        resp.status_code = 500\n        resp.reason = \"Internal Server Error\"\n        resp.json.side_effect = ValueError(\"No JSON\")\n        resp.headers = {\"Link\": \"\"}\n        mock_requests.get.return_value = resp\n\n        with pytest.raises(OnyxError) as exc_info:\n            self.client.get(\"courses\")\n\n        result = exc_info.value.detail\n        expected = \"Internal Server Error\"\n\n        assert result == expected\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient.paginate tests\n# ---------------------------------------------------------------------------\n\n\nclass TestPaginate:\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_single_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(\n            json_data=[{\"id\": 1}, {\"id\": 2}]\n        )\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n        pages = list(client.paginate(\"courses\"))\n\n        assert len(pages) == 1\n        assert pages[0] == [{\"id\": 1}, {\"id\": 2}]\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_two_pages(self, mock_requests: MagicMock) -> None:\n        next_link = f'<{FAKE_BASE_URL}/api/v1/courses?page=2>; rel=\"next\"'\n        page1 = _mock_response(json_data=[{\"id\": 1}], link_header=next_link)\n        page2 = _mock_response(json_data=[{\"id\": 2}])\n        mock_requests.get.side_effect = [page1, page2]\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n        pages = list(client.paginate(\"courses\"))\n\n        assert len(pages) == 2\n        assert pages[0] == [{\"id\": 1}]\n        assert pages[1] == [{\"id\": 2}]\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_empty_response(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=FAKE_BASE_URL,\n        )\n\n        pages = list(client.paginate(\"courses\"))\n\n        assert pages == []\n\n\n# ---------------------------------------------------------------------------\n# CanvasApiClient._parse_next_link tests\n# ---------------------------------------------------------------------------\n\n\nclass TestParseNextLink:\n    def setup_method(self) -> None:\n        self.client = CanvasApiClient(\n            bearer_token=FAKE_TOKEN,\n            canvas_base_url=\"https://canvas.example.com\",\n        )\n\n    def test_found(self) -> None:\n        header = '<https://canvas.example.com/api/v1/courses?page=2>; rel=\"next\"'\n\n        result = self.client._parse_next_link(header)\n        expected = \"https://canvas.example.com/api/v1/courses?page=2\"\n\n        assert result == expected\n\n    def test_not_found(self) -> None:\n        header = '<https://canvas.example.com/api/v1/courses?page=1>; rel=\"current\"'\n\n        result = self.client._parse_next_link(header)\n\n        assert result is None\n\n    def test_empty(self) -> None:\n        result = self.client._parse_next_link(\"\")\n\n        assert result is None\n\n    def test_multiple_rels(self) -> None:\n        header = (\n            '<https://canvas.example.com/api/v1/courses?page=1>; rel=\"current\", '\n            '<https://canvas.example.com/api/v1/courses?page=2>; rel=\"next\"'\n        )\n\n        result = self.client._parse_next_link(header)\n        expected = \"https://canvas.example.com/api/v1/courses?page=2\"\n\n        assert result == expected\n\n    def test_rejects_host_mismatch(self) -> None:\n        header = '<https://evil.example.com/api/v1/courses?page=2>; rel=\"next\"'\n\n        with pytest.raises(OnyxError, match=\"unexpected host\"):\n            self.client._parse_next_link(header)\n\n    def test_rejects_non_https_link(self) -> None:\n        header = '<http://canvas.example.com/api/v1/courses?page=2>; rel=\"next\"'\n\n        with pytest.raises(OnyxError, match=\"must use https\"):\n            self.client._parse_next_link(header)\n\n\n# ---------------------------------------------------------------------------\n# CanvasConnector — credential loading\n# ---------------------------------------------------------------------------\n\n\nclass TestLoadCredentials:\n    def _assert_load_credentials_raises(\n        self,\n        status_code: int,\n        expected_error: type[Exception],\n        mock_requests: MagicMock,\n    ) -> None:\n        \"\"\"Helper: assert load_credentials raises expected_error for a given status.\"\"\"\n        mock_requests.get.return_value = _mock_response(status_code, {})\n        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)\n        with pytest.raises(expected_error):\n            connector.load_credentials({\"canvas_access_token\": FAKE_TOKEN})\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_load_credentials_success(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])\n        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)\n\n        result = connector.load_credentials({\"canvas_access_token\": FAKE_TOKEN})\n\n        assert result is None\n        assert connector._canvas_client is not None\n\n    def test_canvas_client_raises_without_credentials(self) -> None:\n        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)\n\n        with pytest.raises(ConnectorMissingCredentialError):\n            _ = connector.canvas_client\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_load_credentials_invalid_token(self, mock_requests: MagicMock) -> None:\n        self._assert_load_credentials_raises(401, CredentialExpiredError, mock_requests)\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_load_credentials_insufficient_permissions(\n        self, mock_requests: MagicMock\n    ) -> None:\n        self._assert_load_credentials_raises(\n            403, InsufficientPermissionsError, mock_requests\n        )\n\n\n# ---------------------------------------------------------------------------\n# CanvasConnector — URL normalization\n# ---------------------------------------------------------------------------\n\n\nclass TestConnectorUrlNormalization:\n    def test_strips_api_v1_suffix(self) -> None:\n        connector = _build_connector(base_url=f\"{FAKE_BASE_URL}/api/v1\")\n\n        result = connector.canvas_base_url\n        expected = FAKE_BASE_URL\n\n        assert result == expected\n\n    def test_strips_trailing_slash(self) -> None:\n        connector = _build_connector(base_url=f\"{FAKE_BASE_URL}/\")\n\n        result = connector.canvas_base_url\n        expected = FAKE_BASE_URL\n\n        assert result == expected\n\n    def test_no_change_for_clean_url(self) -> None:\n        connector = _build_connector(base_url=FAKE_BASE_URL)\n\n        result = connector.canvas_base_url\n        expected = FAKE_BASE_URL\n\n        assert result == expected\n\n\n# ---------------------------------------------------------------------------\n# CanvasConnector — document conversion\n# ---------------------------------------------------------------------------\n\n\nclass TestDocumentConversion:\n    def setup_method(self) -> None:\n        self.connector = _build_connector()\n\n    def test_convert_page_to_document(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasPage\n\n        page = CanvasPage(\n            page_id=10,\n            url=\"syllabus\",\n            title=\"Syllabus\",\n            body=\"<p>Welcome</p>\",\n            created_at=\"2025-01-15T00:00:00Z\",\n            updated_at=\"2025-06-01T12:00:00Z\",\n            course_id=1,\n        )\n\n        doc = self.connector._convert_page_to_document(page)\n\n        expected_id = \"canvas-page-1-10\"\n        expected_metadata = {\"course_id\": \"1\", \"type\": \"page\"}\n        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)\n\n        assert doc.id == expected_id\n        assert doc.source == DocumentSource.CANVAS\n        assert doc.semantic_identifier == \"Syllabus\"\n        assert doc.metadata == expected_metadata\n        assert doc.sections[0].link is not None\n        assert f\"{FAKE_BASE_URL}/courses/1/pages/syllabus\" in doc.sections[0].link\n        assert doc.doc_updated_at == expected_updated_at\n\n    def test_convert_page_without_body(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasPage\n\n        page = CanvasPage(\n            page_id=11,\n            url=\"empty-page\",\n            title=\"Empty Page\",\n            body=None,\n            created_at=\"2025-01-15T00:00:00Z\",\n            updated_at=\"2025-06-01T12:00:00Z\",\n            course_id=1,\n        )\n\n        doc = self.connector._convert_page_to_document(page)\n        section_text = doc.sections[0].text\n        assert section_text is not None\n\n        assert \"Empty Page\" in section_text\n        assert \"<p>\" not in section_text\n\n    def test_convert_assignment_to_document(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasAssignment\n\n        assignment = CanvasAssignment(\n            id=20,\n            name=\"Homework 1\",\n            description=\"<p>Solve these</p>\",\n            html_url=f\"{FAKE_BASE_URL}/courses/1/assignments/20\",\n            course_id=1,\n            created_at=\"2025-01-20T00:00:00Z\",\n            updated_at=\"2025-06-01T12:00:00Z\",\n            due_at=\"2025-02-01T23:59:00Z\",\n        )\n\n        doc = self.connector._convert_assignment_to_document(assignment)\n\n        expected_id = \"canvas-assignment-1-20\"\n        expected_due_text = \"Due: February 01, 2025 23:59 UTC\"\n\n        assert doc.id == expected_id\n        assert doc.source == DocumentSource.CANVAS\n        assert doc.semantic_identifier == \"Homework 1\"\n        assert doc.sections[0].text is not None\n        assert expected_due_text in doc.sections[0].text\n\n    def test_convert_assignment_without_description(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasAssignment\n\n        assignment = CanvasAssignment(\n            id=21,\n            name=\"Quiz 1\",\n            description=None,\n            html_url=f\"{FAKE_BASE_URL}/courses/1/assignments/21\",\n            course_id=1,\n            created_at=\"2025-01-20T00:00:00Z\",\n            updated_at=\"2025-06-01T12:00:00Z\",\n            due_at=None,\n        )\n\n        doc = self.connector._convert_assignment_to_document(assignment)\n        section_text = doc.sections[0].text\n        assert section_text is not None\n\n        assert \"Quiz 1\" in section_text\n        assert \"Due:\" not in section_text\n\n    def test_convert_announcement_to_document(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasAnnouncement\n\n        announcement = CanvasAnnouncement(\n            id=30,\n            title=\"Class Cancelled\",\n            message=\"<p>No class today</p>\",\n            html_url=f\"{FAKE_BASE_URL}/courses/1/discussion_topics/30\",\n            posted_at=\"2025-06-01T12:00:00Z\",\n            course_id=1,\n        )\n\n        doc = self.connector._convert_announcement_to_document(announcement)\n\n        expected_id = \"canvas-announcement-1-30\"\n        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)\n\n        assert doc.id == expected_id\n        assert doc.source == DocumentSource.CANVAS\n        assert doc.semantic_identifier == \"Class Cancelled\"\n        assert doc.doc_updated_at == expected_updated_at\n\n    def test_convert_announcement_without_posted_at(self) -> None:\n        from onyx.connectors.canvas.connector import CanvasAnnouncement\n\n        announcement = CanvasAnnouncement(\n            id=31,\n            title=\"TBD Announcement\",\n            message=None,\n            html_url=f\"{FAKE_BASE_URL}/courses/1/discussion_topics/31\",\n            posted_at=None,\n            course_id=1,\n        )\n\n        doc = self.connector._convert_announcement_to_document(announcement)\n\n        assert doc.doc_updated_at is None\n\n\n# ---------------------------------------------------------------------------\n# CanvasConnector — validate_connector_settings\n# ---------------------------------------------------------------------------\n\n\nclass TestValidateConnectorSettings:\n    def _assert_validate_raises(\n        self,\n        status_code: int,\n        expected_error: type[Exception],\n        mock_requests: MagicMock,\n    ) -> None:\n        \"\"\"Helper: assert validate_connector_settings raises expected_error.\"\"\"\n        success_resp = _mock_response(json_data=[_mock_course()])\n        fail_resp = _mock_response(status_code, {})\n        mock_requests.get.side_effect = [success_resp, fail_resp]\n        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)\n        connector.load_credentials({\"canvas_access_token\": FAKE_TOKEN})\n        with pytest.raises(expected_error):\n            connector.validate_connector_settings()\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_validate_success(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])\n        connector = _build_connector()\n\n        connector.validate_connector_settings()  # should not raise\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_validate_expired_credential(self, mock_requests: MagicMock) -> None:\n        self._assert_validate_raises(401, CredentialExpiredError, mock_requests)\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:\n        self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:\n        self._assert_validate_raises(429, ConnectorValidationError, mock_requests)\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:\n        self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)\n\n\n# ---------------------------------------------------------------------------\n# _list_* pagination tests\n# ---------------------------------------------------------------------------\n\n\nclass TestListCourses:\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_single_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(\n            json_data=[_mock_course(1), _mock_course(2, \"CS201\", \"Data Structures\")]\n        )\n        connector = _build_connector()\n\n        result = connector._list_courses()\n\n        assert len(result) == 2\n        assert result[0].id == 1\n        assert result[1].id == 2\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_empty_response(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        connector = _build_connector()\n\n        result = connector._list_courses()\n\n        assert result == []\n\n\nclass TestListPages:\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_single_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(\n            json_data=[_mock_page(10), _mock_page(11, \"Notes\")]\n        )\n        connector = _build_connector()\n\n        result = connector._list_pages(course_id=1)\n\n        assert len(result) == 2\n        assert result[0].page_id == 10\n        assert result[1].page_id == 11\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_empty_response(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        connector = _build_connector()\n\n        result = connector._list_pages(course_id=1)\n\n        assert result == []\n\n\nclass TestListAssignments:\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_single_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(\n            json_data=[_mock_assignment(20), _mock_assignment(21, \"Quiz 1\")]\n        )\n        connector = _build_connector()\n\n        result = connector._list_assignments(course_id=1)\n\n        assert len(result) == 2\n        assert result[0].id == 20\n        assert result[1].id == 21\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_empty_response(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        connector = _build_connector()\n\n        result = connector._list_assignments(course_id=1)\n\n        assert result == []\n\n\nclass TestListAnnouncements:\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_single_page(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(\n            json_data=[_mock_announcement(30), _mock_announcement(31, \"Update\")]\n        )\n        connector = _build_connector()\n\n        result = connector._list_announcements(course_id=1)\n\n        assert len(result) == 2\n        assert result[0].id == 30\n        assert result[1].id == 31\n\n    @patch(\"onyx.connectors.canvas.client.rl_requests\")\n    def test_empty_response(self, mock_requests: MagicMock) -> None:\n        mock_requests.get.return_value = _mock_response(json_data=[])\n        connector = _build_connector()\n\n        result = connector._list_announcements(course_id=1)\n\n        assert result == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/confluence/test_confluence_checkpointing.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.confluence.connector import ConfluenceCheckpoint\nfrom onyx.connectors.confluence.connector import ConfluenceConnector\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import SlimDocument\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\nfrom tests.unit.onyx.connectors.utils import (\n    load_everything_from_checkpoint_connector_from_checkpoint,\n)\n\nPAGE_SIZE = 2\n\n\n@pytest.fixture\ndef confluence_base_url() -> str:\n    return \"https://example.atlassian.net/wiki\"\n\n\n@pytest.fixture\ndef space_key() -> str:\n    return \"TEST\"\n\n\n@pytest.fixture\ndef mock_confluence_client() -> OnyxConfluence:\n    \"\"\"Create a mock Confluence client with proper typing\"\"\"\n    # Server mode just Also updates the start value\n    return OnyxConfluence(\n        is_cloud=False, url=\"test\", credentials_provider=MagicMock(), timeout=None\n    )\n\n\n@pytest.fixture\ndef confluence_connector(\n    confluence_base_url: str, space_key: str, mock_confluence_client: OnyxConfluence\n) -> Generator[ConfluenceConnector, None, None]:\n    \"\"\"Create a Confluence connector with a mock client\"\"\"\n    # NOTE: we test with is_cloud=False for all tests, which is generally fine because the behavior\n    # for the two versions is \"close enough\". If cloud-specific behavior is added, we can parametrize\n    # the connector and client fixtures to allow either.\n    connector = ConfluenceConnector(\n        wiki_base=confluence_base_url,\n        space=space_key,\n        is_cloud=False,\n        labels_to_skip=[\"secret\", \"sensitive\"],\n        timezone_offset=0.0,\n        batch_size=2,\n    )\n    # Initialize the client directly\n    connector._confluence_client = mock_confluence_client\n    connector._low_timeout_confluence_client = mock_confluence_client\n    with patch(\"onyx.connectors.confluence.connector._SLIM_DOC_BATCH_SIZE\", 2):\n        yield connector\n\n\n@pytest.fixture\ndef create_mock_page() -> Callable[..., dict[str, Any]]:\n    def _create_mock_page(\n        id: str = \"123\",\n        title: str = \"Test Page\",\n        updated: str = \"2023-01-01T12:00:00.000+0000\",\n        content: str = \"Test Content\",\n        labels: list[str] | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Helper to create a mock Confluence page object\"\"\"\n        return {\n            \"id\": id,\n            \"title\": title,\n            \"version\": {\"when\": updated},\n            \"history\": {\"lastUpdated\": {\"when\": updated}},\n            \"body\": {\"storage\": {\"value\": content}},\n            \"metadata\": {\n                \"labels\": {\"results\": [{\"name\": label} for label in (labels or [])]}\n            },\n            \"space\": {\"key\": \"TEST\"},\n            \"_links\": {\"webui\": f\"/spaces/TEST/pages/{id}\"},\n        }\n\n    return _create_mock_page\n\n\ndef test_get_cql_query_with_space(confluence_connector: ConfluenceConnector) -> None:\n    \"\"\"Test CQL query generation with space specified\"\"\"\n    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()\n    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()\n\n    query = confluence_connector._construct_page_cql_query(start, end)\n\n    # Check that the space part and time part are both in the query\n    assert f\"space='{confluence_connector.space}'\" in query\n    assert \"lastmodified >= '2023-01-01 00:00'\" in query\n    assert \"lastmodified <= '2023-01-02 00:00'\" in query\n    assert \" and \" in query.lower()\n\n\ndef test_get_cql_query_without_space(confluence_base_url: str) -> None:\n    \"\"\"Test CQL query generation without space specified\"\"\"\n    # Create connector without space key\n    connector = ConfluenceConnector(wiki_base=confluence_base_url, is_cloud=True)\n\n    start = datetime(2023, 1, 1, tzinfo=connector.timezone).timestamp()\n    end = datetime(2023, 1, 2, tzinfo=connector.timezone).timestamp()\n\n    query = connector._construct_page_cql_query(start, end)\n\n    # Check that only time part is in the query\n    assert \"space=\" not in query\n    assert \"lastmodified >= '2023-01-01 00:00'\" in query\n    assert \"lastmodified <= '2023-01-02 00:00'\" in query\n\n\ndef test_load_from_checkpoint_happy_path(\n    confluence_connector: ConfluenceConnector,\n    create_mock_page: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading from checkpoint - happy path\"\"\"\n    # Set up mocked pages\n    first_updated = datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)\n    last_updated = datetime(2023, 1, 3, 12, 0, tzinfo=timezone.utc)\n    mock_page1 = create_mock_page(\n        id=\"1\", title=\"Page 1\", updated=first_updated.isoformat()\n    )\n    mock_page2 = create_mock_page(\n        id=\"2\", title=\"Page 2\", updated=first_updated.isoformat()\n    )\n    mock_page3 = create_mock_page(\n        id=\"3\", title=\"Page 3\", updated=last_updated.isoformat()\n    )\n\n    # Mock paginated_cql_retrieval to return our mock pages\n    confluence_client = confluence_connector._confluence_client\n    assert confluence_client is not None, \"bad test setup\"\n\n    # Mock space retrieval for hierarchy nodes (called at start of first batch)\n    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore\n        return_value=iter([{\"key\": \"TEST\", \"name\": \"Test Space\"}])\n    )\n\n    get_mock = MagicMock()\n    confluence_client.get = get_mock  # type: ignore\n    get_mock.side_effect = [\n        # First page response\n        MagicMock(\n            json=lambda: {\n                \"results\": [mock_page1, mock_page2],\n                \"_links\": {\"next\": \"rest/api/content/search?cql=type=page&start=2\"},\n            }\n        ),\n        # links and attachemnts responses\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        # next actual page response\n        MagicMock(json=lambda: {\"results\": [mock_page3]}),\n        # more links and attachment responses\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(\n        confluence_connector, 0, end_time\n    )\n\n    # Check that the documents were returned (hierarchy nodes are filtered out by the test utility)\n    assert len(outputs) == 2\n\n    checkpoint_output1 = outputs[0]\n    assert len(checkpoint_output1.items) == 2\n    document1 = checkpoint_output1.items[0]\n    assert isinstance(document1, Document)\n    assert document1.id == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/1\"\n    document2 = checkpoint_output1.items[1]\n    assert isinstance(document2, Document)\n    assert document2.id == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/2\"\n    assert checkpoint_output1.next_checkpoint == ConfluenceCheckpoint(\n        has_more=True, next_page_url=\"rest/api/content/search?cql=type%3Dpage&start=2\"\n    )\n\n    checkpoint_output2 = outputs[1]\n    assert len(checkpoint_output2.items) == 1\n    document3 = checkpoint_output2.items[0]\n    assert isinstance(document3, Document)\n    assert document3.id == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/3\"\n    assert not checkpoint_output2.next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_with_page_processing_error(\n    confluence_connector: ConfluenceConnector,\n    create_mock_page: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading from checkpoint with a mix of successful and failed page processing\"\"\"\n    # Set up mocked pages\n    mock_page1 = create_mock_page(id=\"1\", title=\"Page 1\")\n    mock_page2 = create_mock_page(id=\"2\", title=\"Page 2\")\n\n    # Mock paginated_cql_retrieval to return our mock pages\n    confluence_client = confluence_connector._confluence_client\n    assert confluence_client is not None, \"bad test setup\"\n\n    # Mock space retrieval for hierarchy nodes (called at start of first batch)\n    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore\n        return_value=iter([{\"key\": \"TEST\", \"name\": \"Test Space\"}])\n    )\n\n    get_mock = MagicMock()\n    confluence_client.get = get_mock  # type: ignore\n    get_mock.side_effect = [\n        # First page response\n        MagicMock(\n            json=lambda: {\n                \"results\": [mock_page1, mock_page2],\n                \"_links\": {\"next\": \"rest/api/content/search?cql=type=page&start=2\"},\n            }\n        ),\n        # Comments for page 1\n        MagicMock(json=lambda: {\"results\": []}),\n        # Attachments for page 1\n        MagicMock(json=lambda: {\"results\": []}),\n        # Comments for page 2\n        MagicMock(json=lambda: {\"results\": []}),\n        # Attachments for page 2\n        MagicMock(json=lambda: {\"results\": []}),\n        # Second page response (empty)\n        MagicMock(\n            json=lambda: {\n                \"results\": [],\n                \"_links\": {},\n            }\n        ),\n    ]\n\n    # Mock _convert_page_to_document to fail for the second page\n    def mock_convert_side_effect(page: dict[str, Any]) -> Document | ConnectorFailure:\n        if page[\"id\"] == \"1\":\n            return Document(\n                id=f\"{confluence_connector.wiki_base}/spaces/TEST/pages/1\",\n                sections=[],\n                source=DocumentSource.CONFLUENCE,\n                semantic_identifier=\"Page 1\",\n                metadata={},\n            )\n        else:\n            return ConnectorFailure(\n                failed_document=DocumentFailure(\n                    document_id=page[\"id\"],\n                    document_link=f\"{confluence_connector.wiki_base}/spaces/TEST/pages/{page['id']}\",\n                ),\n                failure_message=\"Failed to process Confluence page\",\n                exception=Exception(\"Test error\"),\n            )\n\n    with patch(\n        \"onyx.connectors.confluence.connector.ConfluenceConnector._convert_page_to_document\",\n        side_effect=mock_convert_side_effect,\n    ):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            confluence_connector, 0, end_time\n        )\n\n        # Hierarchy nodes are filtered out by test utility\n        assert len(outputs) == 1\n        checkpoint_output = outputs[0]\n        assert len(checkpoint_output.items) == 2\n\n        # First item should be successful\n        assert isinstance(checkpoint_output.items[0], Document)\n        assert (\n            checkpoint_output.items[0].id\n            == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/1\"\n        )\n\n        # Second item should be a failure\n        assert isinstance(checkpoint_output.items[1], ConnectorFailure)\n        assert (\n            \"Failed to process Confluence page\"\n            in checkpoint_output.items[1].failure_message\n        )\n\n\ndef test_retrieve_all_slim_docs_perm_sync(\n    confluence_connector: ConfluenceConnector,\n    create_mock_page: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test retrieving all slim documents including hierarchy nodes\"\"\"\n    # Set up mocked pages\n    mock_page1 = create_mock_page(id=\"1\")\n    mock_page2 = create_mock_page(id=\"2\")\n\n    # Mock paginated_cql_retrieval to return our mock pages\n    confluence_client = confluence_connector._confluence_client\n    assert confluence_client is not None, \"bad test setup\"\n\n    # Mock space retrieval for hierarchy nodes\n    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore\n        return_value=iter([{\"key\": \"TEST\", \"name\": \"Test Space\"}])\n    )\n\n    get_mock = MagicMock()\n    confluence_client.get = get_mock  # type: ignore\n    get_mock.side_effect = [\n        # First page response\n        MagicMock(\n            json=lambda: {\n                \"results\": [mock_page1, mock_page2],\n                \"_links\": {\"next\": \"rest/api/content/search?cql=type=page&start=2\"},\n            }\n        ),\n        # attachments for page 1\n        MagicMock(json=lambda: {\"results\": []}),\n        # attachments for page 2\n        MagicMock(json=lambda: {\"results\": []}),\n        # next page of CQL results (empty)\n        MagicMock(json=lambda: {\"results\": []}),\n    ]\n\n    # Call retrieve_all_slim_docs_perm_sync\n    batches = list(confluence_connector.retrieve_all_slim_docs_perm_sync(0, 100))\n    assert get_mock.call_count == 4\n\n    # With batch size of 2, we get:\n    # Batch 1: [HierarchyNode(space), SlimDocument(page1)]\n    # Batch 2: [SlimDocument(page2)]\n    assert len(batches) == 2\n\n    assert len(batches[0]) == 2\n    assert isinstance(batches[0][0], HierarchyNode)\n    assert batches[0][0].raw_node_id == \"TEST\"\n    assert isinstance(batches[0][1], SlimDocument)\n    assert batches[0][1].id == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/1\"\n\n    assert len(batches[1]) == 1\n    assert isinstance(batches[1][0], SlimDocument)\n    assert batches[1][0].id == f\"{confluence_connector.wiki_base}/spaces/TEST/pages/2\"\n\n\n@pytest.mark.parametrize(\n    \"status_code,expected_exception,expected_message\",\n    [\n        (\n            401,\n            CredentialExpiredError,\n            \"Invalid or expired Confluence credentials\",\n        ),\n        (\n            403,\n            InsufficientPermissionsError,\n            \"Insufficient permissions to access Confluence resources\",\n        ),\n        (404, UnexpectedValidationError, \"Unexpected Confluence error\"),\n    ],\n)\ndef test_validate_connector_settings_errors(\n    confluence_connector: ConfluenceConnector,\n    status_code: int,\n    expected_exception: type[Exception],\n    expected_message: str,\n) -> None:\n    \"\"\"Test validation with various error scenarios\"\"\"\n    error = HTTPError(response=MagicMock(status_code=status_code))\n\n    with patch(\n        \"onyx.connectors.confluence.onyx_confluence.OnyxConfluence.retrieve_confluence_spaces\"\n    ) as mock_retrieve:\n        mock_retrieve.side_effect = error\n\n        with pytest.raises(expected_exception) as excinfo:\n            confluence_connector.validate_connector_settings()\n        assert expected_message in str(excinfo.value)\n\n\ndef test_validate_connector_settings_success(\n    confluence_connector: ConfluenceConnector,\n) -> None:\n    \"\"\"Test successful validation\"\"\"\n    low_client = confluence_connector.low_timeout_confluence_client\n    with (\n        patch.object(\n            low_client,\n            \"retrieve_confluence_spaces\",\n            return_value=iter([{\"key\": \"TEST\"}]),\n        ) as mock_retrieve,\n        patch.object(\n            low_client,\n            \"get_space\",\n            return_value={\"key\": \"TEST\"},\n            create=True,\n        ) as mock_get_space,\n    ):\n        confluence_connector.validate_connector_settings()\n        mock_retrieve.assert_called_once()\n        mock_get_space.assert_called_once_with(confluence_connector.space)\n\n\ndef test_checkpoint_progress(\n    confluence_connector: ConfluenceConnector,\n    create_mock_page: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test that the checkpoint's last_updated field is properly updated after processing pages\n    and that processed document IDs are stored to avoid reprocessing.\"\"\"\n    # Set up mocked pages with different timestamps\n    earlier_timestamp = datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)\n    later_timestamp = datetime(2023, 1, 2, 12, 0, tzinfo=timezone.utc)\n    latest_timestamp = datetime(2024, 1, 2, 12, 0, tzinfo=timezone.utc)\n    mock_page1 = create_mock_page(\n        id=\"1\", title=\"Page 1\", updated=earlier_timestamp.isoformat()\n    )\n    mock_page2 = create_mock_page(\n        id=\"2\", title=\"Page 2\", updated=later_timestamp.isoformat()\n    )\n    mock_page3 = create_mock_page(\n        id=\"3\", title=\"Page 3\", updated=latest_timestamp.isoformat()\n    )\n\n    # Mock paginated_cql_retrieval to return our mock pages\n    confluence_client = confluence_connector._confluence_client\n    assert confluence_client is not None, \"bad test setup\"\n\n    # Mock space retrieval for hierarchy nodes (called at start of first batch)\n    confluence_client.retrieve_confluence_spaces = MagicMock(  # type: ignore\n        return_value=iter([{\"key\": \"TEST\", \"name\": \"Test Space\"}])\n    )\n\n    get_mock = MagicMock()\n    confluence_client.get = get_mock  # type: ignore\n    get_mock.side_effect = [\n        # First page response\n        MagicMock(\n            json=lambda: {\n                \"results\": [mock_page1, mock_page2],\n                \"_links\": {\"next\": \"rest/api/content/search?cql=type=page&start=2\"},\n            }\n        ),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n    ]\n\n    # First run - process both pages\n    end_time = datetime(2023, 1, 3, tzinfo=timezone.utc).timestamp()\n    outputs = load_everything_from_checkpoint_connector(\n        confluence_connector, 0, end_time\n    )\n\n    # Hierarchy nodes are filtered out by test utility\n    first_checkpoint = outputs[0].next_checkpoint\n\n    assert (\n        first_checkpoint.next_page_url\n        == \"rest/api/content/search?cql=type%3Dpage&start=2\"\n    )\n    assert not outputs[-1].next_checkpoint.has_more\n\n    assert len(outputs[0].items) == 2\n    assert isinstance(outputs[0].items[0], Document)\n    assert outputs[0].items[0].semantic_identifier == \"Page 1\"\n    assert isinstance(outputs[0].items[1], Document)\n    assert outputs[0].items[1].semantic_identifier == \"Page 2\"\n\n    # Second run - same time range but with checkpoint from first run\n    # Reset the mock to return the same pages\n    get_mock.side_effect = [\n        # First page response\n        MagicMock(\n            json=lambda: {\n                \"results\": [mock_page3],\n                \"_links\": {\"next\": \"rest/api/content/search?cql=type=page&start=3\"},\n            }\n        ),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n        MagicMock(json=lambda: {\"results\": []}),\n    ]\n\n    # Use the checkpoint from first run\n    first_checkpoint.has_more = True\n    outputs_with_checkpoint = load_everything_from_checkpoint_connector_from_checkpoint(\n        confluence_connector, 0, end_time, first_checkpoint\n    )\n\n    # Verify only the new page was processed since the others were in last_seen_doc_ids\n    assert len(outputs_with_checkpoint) == 2\n    assert len(outputs_with_checkpoint[0].items) == 1\n    assert isinstance(outputs_with_checkpoint[0].items[0], Document)\n    assert outputs_with_checkpoint[0].items[0].semantic_identifier == \"Page 3\"\n    assert not outputs_with_checkpoint[-1].next_checkpoint.has_more\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/confluence/test_onyx_confluence.py",
    "content": "import copy\nfrom typing import Any\nfrom unittest import mock\n\nimport pytest\nimport requests\nfrom requests import HTTPError\n\nfrom onyx.connectors.confluence.onyx_confluence import (\n    _DEFAULT_PAGINATION_LIMIT,\n)\nfrom onyx.connectors.confluence.onyx_confluence import OnyxConfluence\nfrom onyx.connectors.interfaces import CredentialsProviderInterface\n\n\n# Helper to create mock responses\ndef _create_mock_response(\n    status_code: int,\n    json_data: dict[str, Any] | None = None,\n    url: str = \"\",\n) -> requests.Response:\n    response = requests.Response()\n    response.status_code = status_code\n    response.url = url\n    if json_data is not None:\n        response.json = mock.Mock(return_value=json_data)  # type: ignore\n    if status_code >= 400:\n        response.reason = \"Mock Error\"\n    return response\n\n\n# Helper to create HTTPError\ndef _create_http_error(\n    status_code: int,\n    json_data: dict[str, Any] | None = None,\n    url: str = \"\",\n) -> requests.Response:\n    response = _create_mock_response(status_code, json_data, url)\n    response.raise_for_status = mock.Mock(side_effect=HTTPError(response=response))  # type: ignore\n    return response\n\n\n@pytest.fixture\ndef mock_credentials_provider() -> mock.Mock:\n    provider = mock.Mock(spec=CredentialsProviderInterface)\n    provider.is_dynamic.return_value = False\n    provider.get_credentials.return_value = {\"confluence_access_token\": \"dummy_token\"}\n    provider.get_tenant_id.return_value = \"test_tenant\"\n    provider.get_provider_key.return_value = \"test_key\"\n    provider.__enter__ = mock.Mock(return_value=None)\n    provider.__exit__ = mock.Mock(return_value=None)\n    return provider\n\n\n@pytest.fixture\ndef confluence_server_client(mock_credentials_provider: mock.Mock) -> OnyxConfluence:\n    confluence = OnyxConfluence(\n        is_cloud=False,\n        url=\"http://fake-confluence.com\",\n        credentials_provider=mock_credentials_provider,\n        timeout=10,\n    )\n    # Mock the internal client directly for controlling 'get'\n    # We also mock the base URL used by the client internally for easier comparison\n    mock_internal_client = mock.Mock()\n    mock_internal_client.url = confluence._url\n    confluence._confluence = mock_internal_client\n    confluence._kwargs = (\n        confluence.shared_base_kwargs\n    )  # Ensure _kwargs is set for potential re-init\n    return confluence\n\n\ndef test_cql_paginate_all_expansions_handles_internal_pagination_error(\n    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture\n) -> None:\n    \"\"\"\n    Tests that cql_paginate_all_expansions correctly handles HTTP 500 errors\n    during the expansion pagination phase (_paginate_url internal logic),\n    retrying with smaller limits down to 1. It simulates successes and failures\n    at limit=1 and expects the final error to be raised.\n\n    Specifically, this test:\n\n    1. Calls the top level cql query and gets a response with 3 children.\n    2. Calls the expansion for the first child and gets a response with 2 children across 2 pages.\n    3. Tries to call the expansion for the second child, gets a 500 error, and retries\n       down to the limit of 1.\n    4. At limit=1, simulates the following sequence for page requests:\n       - Page 1 (start=0): Success\n       - Page 2 (start=1): Success\n       - Page 3 (start=2): Failure (500)\n       - Page 4 (start=3): Failure (500) <- This is the error that should be raised\n    5. Calls the expansion for the third child and gets a response with 1 child.\n    6. The overall call succeeds.\n    \"\"\"\n    caplog.set_level(\"WARNING\")  # To check logging messages\n\n    # Use constants from the client instance, but note the test logic goes below MINIMUM\n    _TEST_MINIMUM_LIMIT = 1  # The limit this test expects the retry to reach\n\n    top_level_cql = \"test_cql\"\n    top_level_expand = \"child_items\"\n    base_top_level_path = (\n        f\"rest/api/content/search?cql={top_level_cql}&expand={top_level_expand}\"\n    )\n    initial_top_level_path = f\"{base_top_level_path}&limit={_DEFAULT_PAGINATION_LIMIT}\"\n\n    # --- Mock Responses ---\n    top_level_raw_response = {\n        \"results\": [\n            {\n                \"id\": 1,\n                \"child_items\": {\n                    \"results\": [],  # Populated by _traverse_and_update\n                    \"_links\": {\n                        \"next\": f\"/rest/api/content/1/child?limit={_DEFAULT_PAGINATION_LIMIT}\"\n                    },\n                    \"size\": 0,\n                },\n            },\n            {\n                \"id\": 2,\n                \"child_items\": {\n                    \"results\": [],\n                    \"_links\": {\n                        \"next\": f\"/rest/api/content/2/child?limit={_DEFAULT_PAGINATION_LIMIT}\"\n                    },\n                    \"size\": 0,\n                },\n            },\n            {\n                \"id\": 3,\n                \"child_items\": {\n                    \"results\": [],\n                    \"_links\": {\n                        \"next\": f\"/rest/api/content/3/child?limit={_DEFAULT_PAGINATION_LIMIT}\"\n                    },\n                    \"size\": 0,\n                },\n            },\n        ],\n        \"_links\": {},\n        \"size\": 3,\n    }\n    top_level_response = _create_mock_response(\n        200,\n        top_level_raw_response,\n        url=initial_top_level_path,\n    )\n\n    # Expansion 1 - Needs 2 pages\n    exp1_page1_path = f\"rest/api/content/1/child?limit={_DEFAULT_PAGINATION_LIMIT}\"\n    # Note: _paginate_url internally calculates start for the next page\n    exp1_page2_path = (\n        f\"rest/api/content/1/child?start=1&limit={_DEFAULT_PAGINATION_LIMIT}\"\n    )\n    exp1_page1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"child_id\": 101}],\n            \"_links\": {\"next\": f\"/{exp1_page2_path}\"},\n            \"size\": 1,\n        },\n        url=exp1_page1_path,\n    )\n    exp1_page2_response = _create_mock_response(\n        200,\n        {\"results\": [{\"child_id\": 102}], \"_links\": {}, \"size\": 1},\n        url=exp1_page2_path,\n    )\n\n    # Problematic Expansion 2 URLs and Errors during limit reduction\n    exp2_base_path = \"rest/api/content/2/child\"\n    exp2_reduction_errors = {}\n    limit = _DEFAULT_PAGINATION_LIMIT\n    while limit > _TEST_MINIMUM_LIMIT:  # Reduce all the way to 1 for the test\n        path = f\"{exp2_base_path}?limit={limit}\"\n        exp2_reduction_errors[path] = _create_http_error(500, url=path)\n        new_limit = limit // 2\n        limit = max(new_limit, _TEST_MINIMUM_LIMIT)  # Ensure it hits 1\n\n    # Expansion 2 - Pagination at Limit = 1 (2 successes, 2 failures)\n    exp2_limit1_page1_path = f\"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=0\"\n    exp2_limit1_page2_path = f\"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=1\"\n    exp2_limit1_page3_path = f\"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=2\"\n    exp2_limit1_page4_path = (\n        f\"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=3\"  # Final failing call\n    )\n    exp2_limit1_page5_path = (\n        f\"{exp2_base_path}?limit={_TEST_MINIMUM_LIMIT}&start=4\"  # Returns nothing\n    )\n\n    exp2_limit1_page1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"child_id\": 201}],\n            \"_links\": {\"next\": f\"/{exp2_limit1_page2_path}\"},\n            \"size\": 1,\n        },\n        url=exp2_limit1_page1_path,\n    )\n    exp2_limit1_page2_error = _create_http_error(500, url=exp2_limit1_page2_path)\n    exp2_limit1_page3_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"child_id\": 203}],\n            \"_links\": {\"next\": f\"/{exp2_limit1_page4_path}\"},\n            \"size\": 1,\n        },\n        url=exp2_limit1_page3_path,\n    )\n    exp2_limit1_page4_error = _create_http_error(\n        500, url=exp2_limit1_page4_path\n    )  # This is the one we expect to bubble up\n    exp2_limit1_page5_response = _create_mock_response(\n        200, {\"results\": [], \"_links\": {}, \"size\": 0}, url=exp2_limit1_page5_path\n    )\n\n    # Expansion 3\n    exp3_page1_path = f\"rest/api/content/3/child?limit={_DEFAULT_PAGINATION_LIMIT}\"\n    exp3_page1_response = _create_mock_response(\n        200,\n        {\"results\": [{\"child_id\": 301}], \"_links\": {}, \"size\": 1},\n        url=exp3_page1_path,\n    )\n\n    # --- Side Effect Logic ---\n    mock_get_call_paths: list[str] = []\n    call_counts: dict[str, int] = {}  # Track calls to specific failing paths\n\n    def get_side_effect(\n        path: str,\n        params: dict[str, Any] | None = None,  # noqa: ARG001\n        advanced_mode: bool = False,  # noqa: ARG001\n    ) -> requests.Response:\n        path = path.strip(\"/\")\n        mock_get_call_paths.append(path)\n        call_counts[path] = call_counts.get(path, 0) + 1\n        print(f\"Mock GET received path: {path} (Call #{call_counts[path]})\")\n\n        # Top Level Call\n        if path == initial_top_level_path:\n            print(f\"-> Returning top level response for {path}\")\n            return top_level_response\n\n        # Expansion 1 - Page 1\n        elif path == exp1_page1_path:\n            print(f\"-> Returning expansion 1 page 1 for {path}\")\n            return exp1_page1_response\n\n        # Expansion 1 - Page 2\n        elif path == exp1_page2_path:\n            print(f\"-> Returning expansion 1 page 2 for {path}\")\n            return exp1_page2_response\n\n        # Expansion 2 - Limit Reduction Errors\n        elif path in exp2_reduction_errors:\n            print(f\"-> Failure: Returning response which raises 500 error for {path}\")\n            return exp2_reduction_errors[path]\n\n        # Expansion 2 - Limit=1 Page 1 (Success)\n        elif path == exp2_limit1_page1_path:\n            print(f\"-> Success: Returning expansion 2 limit 1 page 1 for {path}\")\n            return exp2_limit1_page1_response\n\n        # Expansion 2 - Limit=1 Page 2 (Failure)\n        elif path == exp2_limit1_page2_path:\n            print(f\"-> Failure: Returning response which raises 500 error for {path}\")\n            return exp2_limit1_page2_error\n\n        # Expansion 2 - Limit=1 Page 3 (Success)\n        elif path == exp2_limit1_page3_path:\n            print(f\"-> Success: Returning expansion 2 limit 1 page 3 for {path}\")\n            return exp2_limit1_page3_response\n\n        # Expansion 2 - Limit=1 Page 4 (Failure)\n        elif path == exp2_limit1_page4_path:\n            print(f\"-> Failure: Returning response which raises 500 error for {path}\")\n            return exp2_limit1_page4_error\n\n        elif path == exp2_limit1_page5_path:\n            print(f\"-> Returning expansion 2 limit 1 page 5 for {path}\")\n            return exp2_limit1_page5_response\n\n        # Expansion 3 - Page 1\n        elif path == exp3_page1_path:\n            print(f\"-> Returning expansion 3 page 1 for {path}\")\n            return exp3_page1_response\n\n        # Fallback\n        print(f\"!!! Unexpected GET path in mock: {path}\")\n        raise RuntimeError(f\"Unexpected GET path in mock: {path}\")\n\n    confluence_server_client._confluence.get.side_effect = get_side_effect\n\n    # --- Execute ---\n    # Consume the iterator to trigger the calls\n    result = list(\n        confluence_server_client.cql_paginate_all_expansions(\n            cql=top_level_cql,\n            expand=top_level_expand,\n            limit=_DEFAULT_PAGINATION_LIMIT,\n        )\n    )\n\n    # Verify log for the failures during expansion 2 pagination (page 2 + 4)\n    assert f\"Error in confluence call to /{exp2_limit1_page2_path}\" in caplog.text\n    assert f\"Error in confluence call to /{exp2_limit1_page4_path}\" in caplog.text\n\n    # Verify sequence of calls to 'get'\n    # 1. Top level\n    assert mock_get_call_paths[0] == initial_top_level_path\n    # 2. Expansion 1 (page 1)\n    assert mock_get_call_paths[1] == exp1_page1_path\n    # 3. Expansion 1 (page 2)\n    assert mock_get_call_paths[2] == exp1_page2_path\n    # 4. Expansion 2 (initial attempt)\n    assert (\n        mock_get_call_paths[3] == f\"{exp2_base_path}?limit={_DEFAULT_PAGINATION_LIMIT}\"\n    )\n\n    # 5+. Expansion 2 (retries due to 500s, down to limit=1)\n    call_index = 4\n\n    # 5+N. Expansion 2 (limit=1, page 1 success)\n    assert mock_get_call_paths[call_index] == exp2_limit1_page1_path\n    call_index += 1\n    # 5+N+1. Expansion 2 (limit=1, page 2 success)\n    assert mock_get_call_paths[call_index] == exp2_limit1_page2_path\n    call_index += 1\n    # 5+N+2. Expansion 2 (limit=1, page 3 failure)\n    assert mock_get_call_paths[call_index] == exp2_limit1_page3_path\n    call_index += 1\n\n    # 5+N+3. Expansion 2 (limit=1, page 4 failure)\n    assert mock_get_call_paths[call_index] == exp2_limit1_page4_path\n    call_index += 1\n\n    # 5+N+4. Expansion 2 (limit=1, page 5 success, no results)\n    assert mock_get_call_paths[call_index] == exp2_limit1_page5_path\n    call_index += 1\n\n    # Ensure Expansion 3 is called, that we continue after the final error-raising call\n    assert mock_get_call_paths[call_index] == exp3_page1_path\n    call_index += 1\n\n    # Ensure correct number of calls\n    assert len(mock_get_call_paths) == call_index\n\n    # Ensure the result is correct\n    # NOTE: size does not get updated during _traverse_and_update\n    final_results = copy.deepcopy(top_level_raw_response)\n    final_results[\"results\"][0][\"child_items\"][\"results\"] = [{\"child_id\": 101}, {\"child_id\": 102}]  # type: ignore\n    final_results[\"results\"][1][\"child_items\"][\"results\"] = [{\"child_id\": 201}, {\"child_id\": 203}]  # type: ignore\n    final_results[\"results\"][2][\"child_items\"][\"results\"] = [{\"child_id\": 301}]  # type: ignore\n    assert result == final_results[\"results\"]\n\n\ndef test_paginated_cql_retrieval_handles_pagination_error(\n    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture\n) -> None:\n    \"\"\"\n    Tests that paginated_cql_retrieval correctly handles HTTP 500 errors\n    during pagination, retrying with smaller limits down to 1, skipping\n    the problematic item, and continuing.\n\n    NOTE: in this context, a \"page\" is a set of results NOT a confluence page.\n\n    Specifically, this test:\n    1. Makes an initial CQL call with a limit, gets page 1 successfully.\n    2. Attempts to get page 2 (based on the 'next' link), receives a 500 error.\n    3. The internal _paginate_url logic retries page 2 with limit=1.\n    4. Simulates the following sequence for page 2 retries (limit=1):\n       - Item 1 (start=original_start + 0): Success\n       - Item 2 (start=original_start + 1): Failure (500) - This item is skipped.\n       - Item 3 (start=original_start + 2): Success\n       - Item 4 (start=original_start + 3): Success, no more results in this chunk.\n    5. The function continues to the next page (page 3) successfully.\n    6. Checks that the results from page 1, items 1 & 3 from page 2 (retry),\n       and page 3 are all returned.\n    7. Verifies the error log for the skipped item (item 2).\n    \"\"\"\n    caplog.set_level(\"WARNING\")\n\n    test_cql = \"type=page\"\n    encoded_cql = \"type%3Dpage\"  # URL encoded version\n    test_limit = 4  # Smaller limit for easier testing of page boundaries\n    _TEST_MINIMUM_LIMIT = 1\n\n    base_path = f\"rest/api/content/search?cql={encoded_cql}\"  # Use encoded cql\n    page1_path = f\"{base_path}&limit={test_limit}\"\n    # Page 2 starts where page 1 left off (start=test_limit)\n    page2_initial_path = f\"{base_path}&limit={test_limit}&start={test_limit}\"\n    # Page 3 starts after the problematic page 2 is processed (start=test_limit * 2)\n    page3_path = f\"{base_path}&limit={test_limit}&start={test_limit * 2}\"\n\n    # --- Mock Responses ---\n    # Page 1: Success (4 items)\n    page1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": 1}, {\"id\": 2}, {\"id\": 3}, {\"id\": 4}],\n            \"_links\": {\"next\": f\"/{page2_initial_path}\"},\n            \"size\": 4,\n        },\n        url=page1_path,\n    )\n\n    # Page 2: Initial attempt fails with 500\n    page2_initial_error = _create_http_error(500, url=page2_initial_path)\n\n    # Page 2: Retry attempts with limit=1\n    page2_limit1_start_offset = test_limit  # Start index for page 2 items\n    page2_limit1_item1_path = (\n        f\"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 0}\"\n    )\n    page2_limit1_item2_path = (\n        f\"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 1}\"\n    )\n    page2_limit1_item3_path = (\n        f\"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 2}\"\n    )\n    page2_limit1_item4_path = (\n        f\"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + 3}\"\n    )\n\n    page2_limit1_item1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": 5}],\n            \"_links\": {\"next\": f\"/{page2_limit1_item2_path}\"},\n            \"size\": 1,\n        },  # Note: next link might be present but we check results\n        url=page2_limit1_item1_path,\n    )\n    page2_limit1_item2_error = _create_http_error(\n        500, url=page2_limit1_item2_path\n    )  # The failure\n    page2_limit1_item3_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": 7}],\n            \"_links\": {\"next\": f\"/{page2_limit1_item4_path}\"},\n            \"size\": 1,\n        },\n        url=page2_limit1_item3_path,\n    )\n    page2_limit1_item4_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": 8}],\n            \"_links\": {\"next\": f\"/{page3_path}\"},\n            \"size\": 1,\n        },\n        url=page2_limit1_item4_path,\n    )\n\n    # Page 3: Success (2 items)\n    page3_response = _create_mock_response(\n        200,\n        {\"results\": [{\"id\": 9}, {\"id\": 10}], \"_links\": {}, \"size\": 2},  # No more pages\n        url=page3_path,\n    )\n\n    # --- Side Effect Logic ---\n    mock_get_call_paths: list[str] = []\n    call_counts: dict[str, int] = {}  # Track calls\n\n    def get_side_effect(\n        path: str,\n        params: dict[str, Any] | None = None,  # noqa: ARG001\n        advanced_mode: bool = False,  # noqa: ARG001\n    ) -> requests.Response:\n        path = path.strip(\"/\")\n        mock_get_call_paths.append(path)\n        call_counts[path] = call_counts.get(path, 0) + 1\n        print(f\"Mock GET received path: {path} (Call #{call_counts[path]})\")\n\n        # Page 1\n        if path == page1_path:\n            print(f\"-> Returning page 1 success for {path}\")\n            return page1_response\n        # Page 2 - Initial Failure\n        elif path == page2_initial_path:\n            print(f\"-> Returning page 2 initial 500 error for {path}\")\n            return page2_initial_error\n        # Page 2 - Limit 1 Retries\n        elif path == page2_limit1_item1_path:\n            print(f\"-> Returning page 2 retry item 1 success for {path}\")\n            return page2_limit1_item1_response\n        elif path == page2_limit1_item2_path:\n            print(f\"-> Returning page 2 retry item 2 500 error for {path}\")\n            return page2_limit1_item2_error\n        elif path == page2_limit1_item3_path:\n            print(f\"-> Returning page 2 retry item 3 success for {path}\")\n            return page2_limit1_item3_response\n        elif path == page2_limit1_item4_path:\n            print(f\"-> Returning page 2 retry item 4 success for {path}\")\n            return page2_limit1_item4_response\n        # Page 3\n        elif path == page3_path:\n            print(f\"-> Returning page 3 success for {path}\")\n            return page3_response\n        # Fallback\n        else:\n            print(f\"!!! Unexpected GET path in mock: {path}\")\n            raise RuntimeError(f\"Unexpected GET path in mock: {path}\")\n\n    confluence_server_client._confluence.get.side_effect = get_side_effect\n\n    # --- Execute ---\n    results = list(\n        confluence_server_client.paginated_cql_retrieval(\n            cql=test_cql,\n            limit=test_limit,\n        )\n    )\n\n    # --- Assertions ---\n    # Verify expected results (ids 1-4 from page 1, 5, 7, 8 from page 2 retry, 9-10 from page 3)\n    expected_results = [\n        # Page 1\n        {\"id\": 1},\n        {\"id\": 2},\n        {\"id\": 3},\n        {\"id\": 4},\n        # Page 2, Item 1 (retry)\n        {\"id\": 5},\n        # {\"id\": 6}, # Skipped due to error\n        {\"id\": 7},  # Page 2, Item 3 (retry)\n        {\"id\": 8},  # Page 2, Item 4 (retry)\n        # Page 3\n        {\"id\": 9},\n        {\"id\": 10},\n    ]\n    assert results == expected_results\n\n    # Verify log for the skipped item failure\n    assert f\"Error in confluence call to /{page2_limit1_item2_path}\" in caplog.text\n\n    # Verify sequence of calls\n    expected_calls = [\n        page1_path,  # Page 1 success\n        page2_initial_path,  # Page 2 initial fail (500)\n        # _paginate_url internal retry logic starts here\n        page2_limit1_item1_path,  # Page 2 retry item 1 success\n        page2_limit1_item2_path,  # Page 2 retry item 2 fail (500) -> logged & skipped\n        page2_limit1_item3_path,  # Page 2 retry item 3 success\n        page2_limit1_item4_path,  # Page 2 retry item 4 success\n        # _paginate_url continues to next calculated page (page 3)\n        page3_path,  # Page 3 success\n    ]\n    assert mock_get_call_paths == expected_calls\n\n\ndef test_paginated_cql_retrieval_skips_completely_failing_page(\n    confluence_server_client: OnyxConfluence, caplog: pytest.LogCaptureFixture\n) -> None:\n    \"\"\"\n    Tests that paginated_cql_retrieval skips an entire page if the initial\n    fetch fails and all subsequent limit=1 retries also fail. It should\n    then proceed to fetch the next page successfully.\n    \"\"\"\n    caplog.set_level(\"WARNING\")\n\n    test_cql = \"type=page\"\n    encoded_cql = \"type%3Dpage\"\n    test_limit = 3  # Small limit for testing\n    _TEST_MINIMUM_LIMIT = 1\n\n    base_path = f\"rest/api/content/search?cql={encoded_cql}\"\n    page1_path = f\"{base_path}&limit={test_limit}\"\n    # Page 2 starts where page 1 left off (start=test_limit)\n    page2_initial_path = f\"{base_path}&limit={test_limit}&start={test_limit}\"\n    # Page 3 starts after the completely failed page 2 (start=test_limit * 2)\n    page3_path = f\"{base_path}&limit={test_limit}&start={test_limit * 2}\"\n\n    # --- Mock Responses ---\n    # Page 1: Success (3 items)\n    page1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": 1}, {\"id\": 2}, {\"id\": 3}],\n            \"_links\": {\"next\": f\"/{page2_initial_path}\"},\n            \"size\": 3,\n        },\n        url=page1_path,\n    )\n\n    # Page 2: Initial attempt fails with 500\n    page2_initial_error = _create_http_error(500, url=page2_initial_path)\n\n    # Page 2: Retry attempts with limit=1 (ALL fail)\n    page2_limit1_start_offset = test_limit\n    page2_limit1_retry_errors = {}\n    # Generate failing responses for each item expected on page 2\n    for i in range(test_limit):\n        item_path = f\"{base_path}&limit={_TEST_MINIMUM_LIMIT}&start={page2_limit1_start_offset + i}\"\n        page2_limit1_retry_errors[item_path] = _create_http_error(500, url=item_path)\n\n    # Page 3: Success (2 items)\n    page3_response = _create_mock_response(\n        200,\n        {\"results\": [{\"id\": 7}, {\"id\": 8}], \"_links\": {}, \"size\": 2},\n        url=page3_path,\n    )\n\n    # --- Side Effect Logic ---\n    mock_get_call_paths: list[str] = []\n    call_counts: dict[str, int] = {}\n\n    def get_side_effect(\n        path: str,\n        params: dict[str, Any] | None = None,  # noqa: ARG001\n        advanced_mode: bool = False,  # noqa: ARG001\n    ) -> requests.Response:\n        path = path.strip(\"/\")\n        mock_get_call_paths.append(path)\n        call_counts[path] = call_counts.get(path, 0) + 1\n        print(f\"Mock GET received path: {path} (Call #{call_counts[path]})\")\n\n        if path == page1_path:\n            print(f\"-> Returning page 1 success for {path}\")\n            return page1_response\n        elif path == page2_initial_path:\n            print(f\"-> Returning page 2 initial 500 error for {path}\")\n            return page2_initial_error\n        elif path in page2_limit1_retry_errors:\n            print(f\"-> Returning page 2 limit=1 retry 500 error for {path}\")\n            return page2_limit1_retry_errors[path]\n        elif path == page3_path:\n            print(f\"-> Returning page 3 success for {path}\")\n            return page3_response\n        else:\n            print(f\"!!! Unexpected GET path in mock: {path}\")\n            raise RuntimeError(f\"Unexpected GET path in mock: {path}\")\n\n    confluence_server_client._confluence.get.side_effect = get_side_effect\n\n    # --- Execute ---\n    results = list(\n        confluence_server_client.paginated_cql_retrieval(\n            cql=test_cql,\n            limit=test_limit,\n        )\n    )\n\n    # --- Assertions ---\n    # Verify expected results (ids 1-3 from page 1, 7-8 from page 3)\n    expected_results = [\n        {\"id\": 1},\n        {\"id\": 2},\n        {\"id\": 3},  # Page 1\n        # Page 2 completely skipped\n        {\"id\": 7},\n        {\"id\": 8},  # Page 3\n    ]\n    assert results == expected_results\n\n    # Verify logs for the failed retry attempts on page 2\n    for failed_path in page2_limit1_retry_errors:\n        assert f\"Error in confluence call to /{failed_path}\" in caplog.text\n    assert (\n        f\"Error in confluence call to {page2_initial_path}\" not in caplog.text\n    )  # Initial error triggers retry, not direct logging in _paginate_url\n\n    # Verify sequence of calls\n    expected_calls = [\n        page1_path,  # Page 1 success\n        page2_initial_path,  # Page 2 initial fail (500)\n    ]\n    # Add the failed limit=1 retry calls for page 2\n    expected_calls.extend(list(page2_limit1_retry_errors.keys()))\n    # The retry loop should make one final call to check if there are more items\n    # expected_calls.append(page2_limit1_final_empty_path)\n    # Add the call to page 3\n    expected_calls.append(page3_path)\n\n    assert mock_get_call_paths == expected_calls\n\n\ndef test_paginated_cql_retrieval_cloud_no_retry_on_error(\n    mock_credentials_provider: mock.Mock,\n) -> None:\n    \"\"\"\n    Tests that for Confluence Cloud (is_cloud=True), paginated_cql_retrieval\n    does NOT retry on pagination errors and raises HTTPError immediately.\n    \"\"\"\n    # Setup Confluence Cloud Client\n    confluence_cloud_client = OnyxConfluence(\n        is_cloud=True,  # Key difference: Cloud instance\n        url=\"https://fake-cloud.atlassian.net\",\n        credentials_provider=mock_credentials_provider,\n        timeout=10,\n    )\n    mock_internal_client = mock.Mock()\n    mock_internal_client.url = confluence_cloud_client._url\n    confluence_cloud_client._confluence = mock_internal_client\n    confluence_cloud_client._kwargs = confluence_cloud_client.shared_base_kwargs\n\n    test_cql = \"type=page\"\n    encoded_cql = \"type%3Dpage\"\n    test_limit = 50  # Use a standard limit\n\n    base_path = f\"rest/api/content/search?cql={encoded_cql}\"\n    page1_path = f\"{base_path}&limit={test_limit}\"\n    page2_path = f\"{base_path}&limit={test_limit}&start={test_limit}\"\n\n    # --- Mock Responses ---\n    # Page 1: Success\n    page1_response = _create_mock_response(\n        200,\n        {\n            \"results\": [{\"id\": i} for i in range(test_limit)],\n            \"_links\": {\"next\": f\"/{page2_path}\"},\n            \"size\": test_limit,\n        },\n        url=page1_path,\n    )\n\n    # Page 2: Failure (500)\n    page2_error = _create_http_error(500, url=page2_path)\n\n    # --- Side Effect Logic ---\n    mock_get_call_paths: list[str] = []\n\n    def get_side_effect(\n        path: str,\n        params: dict[str, Any] | None = None,  # noqa: ARG001\n        advanced_mode: bool = False,  # noqa: ARG001\n    ) -> requests.Response:\n        path = path.strip(\"/\")\n        mock_get_call_paths.append(path)\n        print(f\"Mock GET received path: {path}\")\n\n        if path == page1_path:\n            print(f\"-> Returning page 1 success for {path}\")\n            return page1_response\n        elif path == page2_path:\n            print(f\"-> Returning page 2 500 error for {path}\")\n            return page2_error\n        else:\n            # No other paths (like limit=1 retries) should be called\n            print(f\"!!! Unexpected GET path in mock for Cloud test: {path}\")\n            raise RuntimeError(f\"Unexpected GET path in mock for Cloud test: {path}\")\n\n    confluence_cloud_client._confluence.get.side_effect = get_side_effect\n\n    # --- Execute & Assert ---\n    with pytest.raises(HTTPError) as excinfo:\n        # Consume the iterator to trigger calls\n        list(\n            confluence_cloud_client.paginated_cql_retrieval(\n                cql=test_cql,\n                limit=test_limit,\n            )\n        )\n\n    # Verify the error is the one we simulated for page 2\n    assert excinfo.value.response == page2_error\n    assert excinfo.value.response.status_code == 500\n    assert page2_path in excinfo.value.response.url\n\n    # Verify only two calls were made (page 1 success, page 2 fail)\n    # Crucially, no retry attempts with different limits should exist.\n    assert mock_get_call_paths == [page1_path, page2_path]\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/confluence/test_rate_limit_handler.py",
    "content": "from unittest.mock import Mock\n\nimport pytest\nfrom requests import HTTPError\n\nfrom onyx.connectors.confluence.utils import handle_confluence_rate_limit\n\n\n@pytest.fixture\ndef mock_confluence_call() -> Mock:\n    return Mock()\n\n\n# ***** Checking call count to sleep() won't correctly reflect test correctness\n# especially since we really need to sleep multiple times and check for\n# abort signals moving forward. Disabling this test for now until we come up with\n# a better way forward.\n\n# @pytest.mark.parametrize(\n#     \"status_code,text,retry_after\",\n#     [\n#         (429, \"Rate limit exceeded\", \"5\"),\n#         (200, \"Rate limit exceeded\", None),\n#         (429, \"Some other error\", \"5\"),\n#     ],\n# )\n# def test_rate_limit_handling(\n#     mock_confluence_call: Mock, status_code: int, text: str, retry_after: str | None\n# ) -> None:\n#     with patch(\"time.sleep\") as mock_sleep:\n#         mock_confluence_call.side_effect = [\n#             HTTPError(\n#                 response=Mock(\n#                     status_code=status_code,\n#                     text=text,\n#                     headers={\"Retry-After\": retry_after} if retry_after else {},\n#                 )\n#             ),\n#         ] * 2 + [\"Success\"]\n\n#         handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call)\n#         result = handled_call()\n\n#         assert result == \"Success\"\n#         assert mock_confluence_call.call_count == 3\n#         assert mock_sleep.call_count == 2\n#         if retry_after:\n#             mock_sleep.assert_called_with(int(retry_after))\n\n\n# NOTE(rkuo): This tests an older version of rate limiting that is being deprecated\n# and probably should go away soon.\ndef test_non_rate_limit_error(mock_confluence_call: Mock) -> None:\n    mock_confluence_call.side_effect = HTTPError(\n        response=Mock(status_code=500, text=\"Internal Server Error\")\n    )\n\n    handled_call = handle_confluence_rate_limit(mock_confluence_call)\n\n    with pytest.raises(HTTPError):\n        handled_call()\n\n    assert mock_confluence_call.call_count == 5\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/cross_connector_utils/test_html_utils.py",
    "content": "import pathlib\n\nfrom onyx.file_processing.html_utils import parse_html_page_basic\n\n\ndef test_parse_table() -> None:\n    dir_path = pathlib.Path(__file__).parent.resolve()\n    with open(f\"{dir_path}/test_table.html\", \"r\") as file:\n        content = file.read()\n\n    parsed = parse_html_page_basic(content)\n    expected = \"\\n\\thello\\tthere\\tgeneral\\n\\tkenobi\\ta\\tb\\n\\tc\\td\\te\"\n    assert expected in parsed\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/cross_connector_utils/test_rate_limit.py",
    "content": "import time\n\nfrom onyx.connectors.cross_connector_utils.rate_limit_wrapper import (\n    rate_limit_builder,\n)\n\n\ndef test_rate_limit_basic() -> None:\n    call_cnt = 0\n\n    @rate_limit_builder(max_calls=2, period=5)\n    def func() -> None:\n        nonlocal call_cnt\n        call_cnt += 1\n\n    start = time.time()\n\n    # Make calls that shouldn't be rate-limited\n    func()\n    func()\n    time_to_finish_non_ratelimited = time.time() - start\n\n    # Make a call which SHOULD be rate-limited\n    func()\n    time_to_finish_ratelimited = time.time() - start\n\n    assert call_cnt == 3\n    assert time_to_finish_non_ratelimited < 1\n    assert time_to_finish_ratelimited > 5\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/cross_connector_utils/test_table.html",
    "content": "<p>This page is to ensure we’re able to parse a table into a tsv</p>\n<table\n  data-table-width=\"760\"\n  data-layout=\"default\"\n  ac:local-id=\"3ad64d9f-01f1-4f78-876e-0fdf84e826a6\"\n>\n  <tbody>\n    <tr>\n      <th>\n        <p><strong>hello</strong></p>\n      </th>\n      <th>\n        <p><strong>there</strong></p>\n      </th>\n      <th>\n        <p><strong>general</strong></p>\n      </th>\n    </tr>\n    <tr>\n      <td>\n        <p>kenobi</p>\n      </td>\n      <td>\n        <p>a</p>\n      </td>\n      <td>\n        <p>b</p>\n      </td>\n    </tr>\n    <tr>\n      <td>\n        <p>c</p>\n      </td>\n      <td>\n        <p>d</p>\n      </td>\n      <td>\n        <p>e</p>\n      </td>\n    </tr>\n  </tbody>\n</table>\n<p />\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/discord/test_discord_validation.py",
    "content": "from unittest.mock import AsyncMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom discord.errors import LoginFailure\n\nfrom onyx.connectors.discord.connector import DiscordConnector\nfrom onyx.connectors.exceptions import CredentialInvalidError\n\n\ndef _build_connector(token: str = \"fake-bot-token\") -> DiscordConnector:\n    connector = DiscordConnector()\n    connector.load_credentials({\"discord_bot_token\": token})\n    return connector\n\n\n@patch(\"onyx.connectors.discord.connector.Client.close\", new_callable=AsyncMock)\n@patch(\"onyx.connectors.discord.connector.Client.login\", new_callable=AsyncMock)\ndef test_validate_success(\n    mock_login: AsyncMock,\n    mock_close: AsyncMock,\n) -> None:\n    connector = _build_connector()\n    connector.validate_connector_settings()\n\n    mock_login.assert_awaited_once_with(\"fake-bot-token\")\n    mock_close.assert_awaited_once()\n\n\n@patch(\"onyx.connectors.discord.connector.Client.close\", new_callable=AsyncMock)\n@patch(\n    \"onyx.connectors.discord.connector.Client.login\",\n    new_callable=AsyncMock,\n    side_effect=LoginFailure(\"Improper token has been passed.\"),\n)\ndef test_validate_invalid_token(\n    mock_login: AsyncMock,  # noqa: ARG001\n    mock_close: AsyncMock,\n) -> None:\n    connector = _build_connector(token=\"bad-token\")\n\n    with pytest.raises(CredentialInvalidError, match=\"Invalid Discord bot token\"):\n        connector.validate_connector_settings()\n\n    mock_close.assert_awaited_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/github/test_github_checkpointing.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom github import Github\nfrom github import RateLimitExceededException\nfrom github.GithubException import GithubException\nfrom github.Issue import Issue\nfrom github.PaginatedList import PaginatedList\nfrom github.PullRequest import PullRequest\nfrom github.RateLimit import RateLimit\nfrom github.Repository import Repository\nfrom github.Requester import Requester\n\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.github.connector import GithubConnector\nfrom onyx.connectors.github.connector import GithubConnectorStage\nfrom onyx.connectors.github.models import SerializedRepository\nfrom onyx.connectors.models import Document\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\nfrom tests.unit.onyx.connectors.utils import (\n    load_everything_from_checkpoint_connector_from_checkpoint,\n)\n\n\n@pytest.fixture\ndef repo_owner() -> str:\n    return \"test-org\"\n\n\n@pytest.fixture\ndef repositories() -> str:\n    return \"test-repo\"\n\n\n@pytest.fixture\ndef mock_github_client() -> MagicMock:\n    \"\"\"Create a mock GitHub client with proper typing\"\"\"\n    mock = MagicMock(spec=Github)\n    mock.get_repo = MagicMock()\n    mock.get_organization = MagicMock()\n    mock.get_user = MagicMock()\n    mock.get_rate_limit = MagicMock(return_value=MagicMock(spec=RateLimit))\n    mock._requester = MagicMock(spec=Requester)\n    return mock\n\n\n@pytest.fixture\ndef build_github_connector(\n    repo_owner: str, repositories: str, mock_github_client: MagicMock\n) -> Generator[Callable[..., GithubConnector], None, None]:\n    def _github_connector(\n        repo_owner: str = repo_owner, repositories: str = repositories\n    ) -> GithubConnector:\n        connector = GithubConnector(\n            repo_owner=repo_owner,\n            repositories=repositories,\n            include_prs=True,\n            include_issues=True,\n        )\n        connector.github_client = mock_github_client\n        return connector\n\n    yield _github_connector\n\n\n@pytest.fixture\ndef create_mock_pr() -> Callable[..., MagicMock]:\n    def _create_mock_pr(\n        number: int = 1,\n        title: str = \"Test PR\",\n        body: str = \"Test Description\",\n        state: str = \"open\",\n        merged: bool = False,\n        updated_at: datetime = datetime(2023, 1, 1, tzinfo=timezone.utc),\n        html_url: str | None = None,\n    ) -> MagicMock:\n        \"\"\"Helper to create a mock PullRequest object\"\"\"\n        mock_pr = MagicMock(spec=PullRequest)\n        mock_pr.number = number\n        mock_pr.title = title\n        mock_pr.body = body\n        mock_pr.state = state\n        mock_pr.merged = merged\n        mock_pr.updated_at = updated_at\n        mock_pr.html_url = (\n            html_url\n            if html_url is not None\n            else f\"https://github.com/test-org/test-repo/pull/{number}\"\n        )\n        mock_pr.raw_data = {}\n        mock_pr.base = MagicMock()\n        mock_pr.base.repo = MagicMock()\n        mock_pr.base.repo.full_name = \"test-org/test-repo\"\n\n        return mock_pr\n\n    return _create_mock_pr\n\n\n@pytest.fixture\ndef create_mock_issue() -> Callable[..., MagicMock]:\n    def _create_mock_issue(\n        number: int = 1,\n        title: str = \"Test Issue\",\n        body: str = \"Test Description\",\n        state: str = \"open\",\n        updated_at: datetime = datetime(2023, 1, 1, tzinfo=timezone.utc),\n    ) -> MagicMock:\n        \"\"\"Helper to create a mock Issue object\"\"\"\n        mock_issue = MagicMock(spec=Issue)\n        mock_issue.number = number\n        mock_issue.title = title\n        mock_issue.body = body\n        mock_issue.state = state\n        mock_issue.updated_at = updated_at\n        mock_issue.html_url = f\"https://github.com/test-org/test-repo/issues/{number}\"\n        mock_issue.pull_request = None  # Not a PR\n        mock_issue.raw_data = {}\n\n        # Mock the nested base.repo.full_name attribute\n        mock_issue.repository = MagicMock()\n        mock_issue.repository.full_name = \"test-org/test-repo\"\n\n        return mock_issue\n\n    return _create_mock_issue\n\n\n@pytest.fixture\ndef create_mock_repo() -> Callable[..., MagicMock]:\n    def _create_mock_repo(\n        name: str = \"test-repo\",\n        id: int = 1,\n    ) -> MagicMock:\n        mock_repo = MagicMock()\n        mock_repo.name = name\n        mock_repo.id = id\n\n        headers_dict = {\"status\": \"200 OK\", \"content-type\": \"application/json\"}\n        data_dict = {\n            \"id\": id,\n            \"name\": name,\n            \"full_name\": f\"test-org/{name}\",\n            \"private\": False,\n            \"description\": \"Test repository\",\n        }\n\n        mock_repo.configure_mock(raw_headers=headers_dict, raw_data=data_dict)\n\n        mock_repo.get_pulls = MagicMock()\n        mock_repo.get_issues = MagicMock()\n        mock_repo.get_contents = MagicMock()\n\n        return mock_repo\n\n    return _create_mock_repo\n\n\ndef test_load_from_checkpoint_happy_path(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n    create_mock_issue: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint - happy path\"\"\"\n    # Set up mocked repo\n    github_connector = build_github_connector()\n    mock_repo = create_mock_repo()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PRs and issues\n    mock_pr1 = create_mock_pr(number=1, title=\"PR 1\")\n    mock_pr2 = create_mock_pr(number=2, title=\"PR 2\")\n    mock_issue1 = create_mock_issue(number=1, title=\"Issue 1\")\n    mock_issue2 = create_mock_issue(number=2, title=\"Issue 2\")\n\n    # Mock get_pulls and get_issues methods\n    mock_repo.get_pulls.return_value = MagicMock()\n    mock_repo.get_pulls.return_value.get_page.side_effect = [\n        [mock_pr1, mock_pr2],\n        [],\n    ]\n    mock_repo.get_issues.return_value = MagicMock()\n    mock_repo.get_issues.return_value.get_page.side_effect = [\n        [mock_issue1, mock_issue2],\n        [],\n    ]\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            github_connector, 0, end_time\n        )\n\n        # Check that we got all documents and final has_more=False\n        assert len(outputs) == 4\n\n        repo_batch = outputs[0]\n        assert len(repo_batch.items) == 0\n        assert repo_batch.next_checkpoint.has_more is True\n\n        # Check first batch (PRs)\n        first_batch = outputs[1]\n        assert len(first_batch.items) == 2\n        assert isinstance(first_batch.items[0], Document)\n        assert first_batch.items[0].id == \"https://github.com/test-org/test-repo/pull/1\"\n        assert isinstance(first_batch.items[1], Document)\n        assert first_batch.items[1].id == \"https://github.com/test-org/test-repo/pull/2\"\n        assert first_batch.next_checkpoint.curr_page == 1\n\n        # Check second batch (Issues)\n        second_batch = outputs[2]\n        assert len(second_batch.items) == 2\n        assert isinstance(second_batch.items[0], Document)\n        assert (\n            second_batch.items[0].id == \"https://github.com/test-org/test-repo/issues/1\"\n        )\n        assert isinstance(second_batch.items[1], Document)\n        assert (\n            second_batch.items[1].id == \"https://github.com/test-org/test-repo/issues/2\"\n        )\n        assert second_batch.next_checkpoint.has_more\n\n        # Check third batch (finished checkpoint)\n        third_batch = outputs[3]\n        assert len(third_batch.items) == 0\n        assert third_batch.next_checkpoint.has_more is False\n\n\ndef test_load_from_checkpoint_with_rate_limit(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint with rate limit handling\"\"\"\n    # Set up mocked repo\n    github_connector = build_github_connector()\n    mock_repo = create_mock_repo()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PR\n    mock_pr = create_mock_pr()\n\n    # Mock get_pulls to raise RateLimitExceededException on first call\n    mock_repo.get_pulls.return_value = MagicMock()\n    mock_repo.get_pulls.return_value.get_page.side_effect = [\n        RateLimitExceededException(403, {\"message\": \"Rate limit exceeded\"}, {}),\n        [mock_pr],\n        [],\n    ]\n\n    # Mock rate limit reset time\n    mock_rate_limit = MagicMock(spec=RateLimit)\n    mock_rate_limit.core.reset = datetime.now(timezone.utc)\n    github_connector.github_client.get_rate_limit.return_value = mock_rate_limit\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        with patch(\n            \"onyx.connectors.github.connector.sleep_after_rate_limit_exception\"\n        ) as mock_sleep:\n            outputs = load_everything_from_checkpoint_connector(\n                github_connector, 0, end_time\n            )\n\n            assert mock_sleep.call_count == 1\n\n        # Check that we got the document after rate limit was handled\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 1\n        assert isinstance(outputs[1].items[0], Document)\n        assert outputs[1].items[0].id == \"https://github.com/test-org/test-repo/pull/1\"\n\n        assert outputs[-1].next_checkpoint.has_more is False\n\n\ndef test_load_from_checkpoint_with_empty_repo(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint with an empty repository\"\"\"\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector = build_github_connector()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Mock get_pulls and get_issues to return empty lists\n    mock_repo.get_pulls.return_value = MagicMock()\n    mock_repo.get_pulls.return_value.get_page.return_value = []\n    mock_repo.get_issues.return_value = MagicMock()\n    mock_repo.get_issues.return_value.get_page.return_value = []\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            github_connector, 0, end_time\n        )\n\n        # Check that we got no documents\n        assert len(outputs) == 2\n        assert len(outputs[-1].items) == 0\n        assert not outputs[-1].next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_with_prs_only(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint with only PRs enabled\"\"\"\n    # Configure connector to only include PRs\n    github_connector = build_github_connector()\n    github_connector.include_prs = True\n    github_connector.include_issues = False\n\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PRs\n    mock_pr1 = create_mock_pr(number=1, title=\"PR 1\")\n    mock_pr2 = create_mock_pr(number=2, title=\"PR 2\")\n\n    # Mock get_pulls method\n    mock_repo.get_pulls.return_value = MagicMock()\n    mock_repo.get_pulls.return_value.get_page.side_effect = [\n        [mock_pr1, mock_pr2],\n        [],\n    ]\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            github_connector, 0, end_time\n        )\n\n        # Check that we only got PRs\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 2\n        assert all(\n            isinstance(doc, Document) and \"pull\" in doc.id for doc in outputs[0].items\n        )  # All documents should be PRs\n\n        assert outputs[-1].next_checkpoint.has_more is False\n\n\ndef test_load_from_checkpoint_with_issues_only(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_issue: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint with only issues enabled\"\"\"\n    # Configure connector to only include issues\n    github_connector = build_github_connector()\n    github_connector.include_prs = False\n    github_connector.include_issues = True\n\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked issues\n    mock_issue1 = create_mock_issue(number=1, title=\"Issue 1\")\n    mock_issue2 = create_mock_issue(number=2, title=\"Issue 2\")\n\n    # Mock get_issues method\n    mock_repo.get_issues.return_value = MagicMock()\n    mock_repo.get_issues.return_value.get_page.side_effect = [\n        [mock_issue1, mock_issue2],\n        [],\n    ]\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            github_connector, 0, end_time\n        )\n\n        # Check that we only got issues\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 2\n        assert all(\n            isinstance(doc, Document) and \"issues\" in doc.id for doc in outputs[0].items\n        )  # All documents should be issues\n        assert outputs[1].next_checkpoint.has_more\n\n        assert outputs[-1].next_checkpoint.has_more is False\n\n\n@pytest.mark.parametrize(\n    \"status_code,expected_exception,expected_message\",\n    [\n        (\n            401,\n            CredentialExpiredError,\n            \"GitHub credential appears to be invalid or expired\",\n        ),\n        (\n            403,\n            InsufficientPermissionsError,\n            \"Your GitHub token does not have sufficient permissions\",\n        ),\n        (\n            404,\n            ConnectorValidationError,\n            \"GitHub repository not found\",\n        ),\n    ],\n)\ndef test_validate_connector_settings_errors(\n    build_github_connector: Callable[..., GithubConnector],\n    status_code: int,\n    expected_exception: type[Exception],\n    expected_message: str,\n) -> None:\n    \"\"\"Test validation with various error scenarios\"\"\"\n    error = GithubException(status=status_code, data={}, headers={})\n\n    github_connector = build_github_connector()\n    github_client = cast(Github, github_connector.github_client)\n    get_repo_mock = cast(MagicMock, github_client.get_repo)\n    get_repo_mock.side_effect = error\n\n    with pytest.raises(expected_exception) as excinfo:\n        github_connector.validate_connector_settings()\n    assert expected_message in str(excinfo.value)\n\n\ndef test_validate_connector_settings_success(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test successful validation\"\"\"\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector = build_github_connector()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Mock get_contents to simulate successful access\n    mock_repo.get_contents.return_value = MagicMock()\n\n    github_connector.validate_connector_settings()\n    github_connector.github_client.get_repo.assert_called_once_with(\n        f\"{github_connector.repo_owner}/{github_connector.repositories}\"\n    )\n\n\ndef test_load_from_checkpoint_with_cursor_fallback(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test loading from checkpoint with fallback to cursor-based pagination\"\"\"\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector = build_github_connector()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PRs\n    mock_pr1 = create_mock_pr(number=1, title=\"PR 1\")\n    mock_pr2 = create_mock_pr(number=2, title=\"PR 2\")\n\n    # Create a mock paginated list that will raise the 422 error on get_page\n    mock_paginated_list = MagicMock()\n    mock_paginated_list.get_page.side_effect = [\n        GithubException(\n            422,\n            {\n                \"message\": \"Pagination with the page parameter is not supported for large datasets. Use cursor\"\n            },\n            {},\n        ),\n    ]\n\n    # Create a new mock for cursor-based pagination\n    mock_cursor_paginated_list = MagicMock()\n    mock_cursor_paginated_list.__nextUrl = (\n        \"https://api.github.com/repos/test-org/test-repo/pulls?cursor=abc123\"\n    )\n    mock_cursor_paginated_list.__iter__.return_value = iter([mock_pr1, mock_pr2])\n\n    mock_repo.get_pulls.side_effect = [\n        mock_paginated_list,\n        mock_cursor_paginated_list,\n    ]\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(\n            github_connector, 0, end_time\n        )\n\n        # Check that we got the documents via cursor-based pagination\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 2\n        assert isinstance(outputs[1].items[0], Document)\n        assert outputs[1].items[0].id == \"https://github.com/test-org/test-repo/pull/1\"\n        assert isinstance(outputs[1].items[1], Document)\n        assert outputs[1].items[1].id == \"https://github.com/test-org/test-repo/pull/2\"\n\n        # Verify cursor URL is not set in checkpoint since pagination succeeded without failures\n        assert outputs[1].next_checkpoint.cursor_url is None\n        assert outputs[1].next_checkpoint.num_retrieved == 0\n\n\ndef test_load_from_checkpoint_resume_cursor_pagination(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test resuming from a checkpoint that was using cursor-based pagination\"\"\"\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector = build_github_connector()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PRs\n    mock_pr3 = create_mock_pr(number=3, title=\"PR 3\")\n    mock_pr4 = create_mock_pr(number=4, title=\"PR 4\")\n\n    # Create a checkpoint that was using cursor-based pagination\n    checkpoint = github_connector.build_dummy_checkpoint()\n    checkpoint.cursor_url = (\n        \"https://api.github.com/repos/test-org/test-repo/pulls?cursor=abc123\"\n    )\n    checkpoint.num_retrieved = 2\n\n    # Mock get_pulls to use cursor-based pagination\n    mock_paginated_list = MagicMock()\n    mock_paginated_list.__nextUrl = (\n        \"https://api.github.com/repos/test-org/test-repo/pulls?cursor=def456\"\n    )\n    mock_paginated_list.__iter__.return_value = iter([mock_pr3, mock_pr4])\n    mock_repo.get_pulls.return_value = mock_paginated_list\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint with the checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector_from_checkpoint(\n            github_connector, 0, end_time, checkpoint\n        )\n\n        # Check that we got the documents via cursor-based pagination\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 2\n        assert isinstance(outputs[1].items[0], Document)\n        assert outputs[1].items[0].id == \"https://github.com/test-org/test-repo/pull/3\"\n        assert isinstance(outputs[1].items[1], Document)\n        assert outputs[1].items[1].id == \"https://github.com/test-org/test-repo/pull/4\"\n\n        # Verify cursor URL was stored in checkpoint\n        assert outputs[1].next_checkpoint.cursor_url is None\n        assert outputs[1].next_checkpoint.num_retrieved == 0\n\n\ndef test_load_from_checkpoint_cursor_expiration(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test handling of cursor expiration during cursor-based pagination\"\"\"\n    # Set up mocked repo\n    mock_repo = create_mock_repo()\n    github_connector = build_github_connector()\n    github_connector.github_client = mock_github_client\n    mock_github_client.get_repo.return_value = mock_repo\n\n    # Set up mocked PRs\n    mock_pr4 = create_mock_pr(number=4, title=\"PR 4\")\n\n    # Create a checkpoint with an expired cursor\n    checkpoint = github_connector.build_dummy_checkpoint()\n    checkpoint.cursor_url = (\n        \"https://api.github.com/repos/test-org/test-repo/pulls?cursor=expired\"\n    )\n    checkpoint.num_retrieved = 3  # We've already retrieved 3 items\n\n    # Mock get_pulls to simulate cursor expiration by raising an error before any results\n    mock_paginated_list = MagicMock()\n    mock_paginated_list.__nextUrl = (\n        \"https://api.github.com/repos/test-org/test-repo/pulls?cursor=expired\"\n    )\n    mock_paginated_list.__iter__.side_effect = GithubException(\n        422, {\"message\": \"Cursor expired\"}, {}\n    )\n\n    # Create a new mock for successful retrieval after retry\n    mock_retry_paginated_list = MagicMock()\n    mock_retry_paginated_list.__nextUrl = None\n\n    # Create an iterator that will yield the remaining PR\n    def retry_iterator() -> Generator[PullRequest, None, None]:\n        yield mock_pr4\n\n    # Create a mock for the _Slice object that will be returned by pag_list[prev_num_objs:]\n    mock_slice = MagicMock()\n    mock_slice.__iter__.return_value = retry_iterator()\n\n    # Set up the slice behavior for the retry paginated list\n    mock_retry_paginated_list.__getitem__.return_value = mock_slice\n\n    # Set up the side effect for get_pulls to return our mocks\n    mock_repo.get_pulls.side_effect = [\n        mock_paginated_list,\n        mock_retry_paginated_list,\n    ]\n\n    # Mock SerializedRepository.to_Repository to return our mock repo\n    with patch.object(SerializedRepository, \"to_Repository\", return_value=mock_repo):\n        # Call load_from_checkpoint with the checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector_from_checkpoint(\n            github_connector, 0, end_time, checkpoint\n        )\n\n        # Check that we got the remaining document after retrying from the beginning\n        assert len(outputs) >= 2\n        assert len(outputs[1].items) == 1\n        assert isinstance(outputs[1].items[0], Document)\n        assert outputs[1].items[0].id == \"https://github.com/test-org/test-repo/pull/4\"\n\n        # Verify cursor URL was cleared in checkpoint\n        assert outputs[1].next_checkpoint.cursor_url is None\n        assert outputs[1].next_checkpoint.num_retrieved == 0\n\n        # Verify that the slice was called with the correct argument\n        mock_retry_paginated_list.__getitem__.assert_called_once_with(slice(3, None))\n\n\ndef test_load_from_checkpoint_cursor_pagination_completion(\n    build_github_connector: Callable[..., GithubConnector],\n    mock_github_client: MagicMock,\n    create_mock_repo: Callable[..., MagicMock],\n    create_mock_pr: Callable[..., MagicMock],\n) -> None:\n    \"\"\"Test behavior when cursor-based pagination completes and moves to next repository\"\"\"\n    # Set up two repositories\n    mock_repo1 = create_mock_repo(name=\"repo1\", id=1)\n    mock_repo2 = create_mock_repo(name=\"repo2\", id=2)\n\n    # Initialize connector with no specific repositories, so _get_all_repos is used\n    github_connector = build_github_connector(repositories=\"\")\n    github_connector.github_client = mock_github_client\n    mock_pr1 = create_mock_pr(\n        number=1,\n        title=\"PR 1 Repo 1\",\n        html_url=\"https://github.com/test-org/repo1/pull/1\",\n    )\n    mock_pr2 = create_mock_pr(\n        number=2,\n        title=\"PR 2 Repo 1\",\n        html_url=\"https://github.com/test-org/repo1/pull/2\",\n    )\n    mock_pr3 = create_mock_pr(\n        number=3,\n        title=\"PR 3 Repo 2\",\n        html_url=\"https://github.com/test-org/repo2/pull/3\",\n    )\n    mock_pr4 = create_mock_pr(\n        number=4,\n        title=\"PR 4 Repo 2\",\n        html_url=\"https://github.com/test-org/repo2/pull/4\",\n    )\n    checkpoint = github_connector.build_dummy_checkpoint()\n    mock_paginated_list_repo1_prs = MagicMock(spec=PaginatedList)\n\n    def get_page_repo1_side_effect(page_num: int) -> list[PullRequest]:\n        if page_num == 0:\n            return [mock_pr1, mock_pr2]\n        else:\n            return []\n\n    mock_paginated_list_repo1_prs.get_page.side_effect = get_page_repo1_side_effect\n    mock_repo2_cursor_paginator = MagicMock(spec=PaginatedList)\n\n    def repo2_cursor_iterator() -> Generator[PullRequest, None, None]:\n        print(\"setting next url to cursor_step_2\")\n        mock_repo2_cursor_paginator.__nextUrl = \"cursor_step_2\"\n        yield mock_pr3\n        print(\"setting next url to None\")\n        mock_repo2_cursor_paginator.__nextUrl = None\n        yield mock_pr4\n\n    mock_repo2_cursor_paginator.__iter__.return_value = repo2_cursor_iterator()\n    mock_repo2_cursor_paginator.__nextUrl = None\n    pull_requests_func_invocation_count = 0\n\n    def replacement_pull_requests_func(\n        repo: Repository,\n    ) -> Callable[[], PaginatedList[PullRequest]]:\n        nonlocal pull_requests_func_invocation_count\n        pull_requests_func_invocation_count += 1\n        current_repo_name = repo.name\n        lambda_call_count_for_current_repo = 0\n\n        def git_objs_lambda() -> PaginatedList[PullRequest]:\n            nonlocal lambda_call_count_for_current_repo\n            lambda_call_count_for_current_repo += 1\n            if current_repo_name == mock_repo2.name:\n                if lambda_call_count_for_current_repo == 1:\n                    pl_for_offset_failure = MagicMock(spec=PaginatedList)\n\n                    def get_page_raises_exception(\n                        page_num: int,  # noqa: ARG001\n                    ) -> list[PullRequest]:\n                        raise GithubException(422, message=\"use cursor pagination\")\n\n                    pl_for_offset_failure.get_page.side_effect = (\n                        get_page_raises_exception\n                    )\n                    return pl_for_offset_failure\n                else:\n                    return mock_repo2_cursor_paginator\n            elif current_repo_name == mock_repo1.name:\n                return mock_paginated_list_repo1_prs\n            else:\n                raise ValueError(f\"Unexpected repo name: {current_repo_name}\")\n\n        return git_objs_lambda\n\n    mock_requester = MagicMock(spec=Requester)\n    github_connector.github_client._requester = mock_requester\n\n    def get_repo_side_effect(repo_id: int) -> MagicMock:\n        repo_to_return = None\n        headers_dict = None\n        data_dict = None\n        if repo_id == 1:\n            repo_to_return = mock_repo1\n            headers_dict = {\"status\": \"200 OK\", \"content-type\": \"application/json\"}\n            data_dict = {\n                \"id\": 1,\n                \"name\": \"repo1\",\n                \"full_name\": \"test-org/repo1\",\n                \"private\": False,\n                \"description\": \"Test repository\",\n            }\n        elif repo_id == 2:\n            repo_to_return = mock_repo2\n            headers_dict = {\"status\": \"200 OK\", \"content-type\": \"application/json\"}\n            data_dict = {\n                \"id\": 2,\n                \"name\": \"repo2\",\n                \"full_name\": \"test-org/repo2\",\n                \"private\": False,\n                \"description\": \"Test repository\",\n            }\n        else:\n            raise ValueError(f\"Unexpected repo ID: {repo_id}\")\n        if repo_to_return and headers_dict and data_dict:\n            repo_to_return.configure_mock(raw_headers=headers_dict, raw_data=data_dict)\n        return repo_to_return\n\n    mock_github_client.get_repo.side_effect = get_repo_side_effect\n\n    def to_repository_side_effect(\n        self_serialized_repo: SerializedRepository,\n        requester_arg: Requester,  # noqa: ARG001\n    ) -> Repository:\n        if self_serialized_repo.id == mock_repo1.id:\n            return mock_repo1\n        elif self_serialized_repo.id == mock_repo2.id:\n            return mock_repo2\n        raise ValueError(f\"Unexpected repo ID: {self_serialized_repo.id}\")\n\n    mock_empty_issues_list = MagicMock(spec=PaginatedList)\n    mock_empty_issues_list.get_page.return_value = []\n    mock_empty_issues_list.__iter__.return_value = iter([])\n    type(mock_empty_issues_list)._PaginatedList__nextUrl = None\n    mock_repo1.get_issues.return_value = mock_empty_issues_list\n    mock_repo2.get_issues.return_value = mock_empty_issues_list\n    with (\n        patch.object(\n            github_connector, \"get_all_repos\", return_value=[mock_repo1, mock_repo2]\n        ),\n        patch.object(\n            github_connector,\n            \"_pull_requests_func\",\n            side_effect=replacement_pull_requests_func,\n        ),\n        patch.object(\n            SerializedRepository,\n            \"to_Repository\",\n            side_effect=to_repository_side_effect,\n            autospec=True,\n        ) as mock_to_repository,\n    ):\n        end_time = time.time()\n        outputs = list(\n            load_everything_from_checkpoint_connector_from_checkpoint(\n                github_connector, 0, end_time, checkpoint\n            )\n        )\n\n    # --- Assertions ---\n    # Expected outputs: 5 based on the latest logic refinement\n    # 1. Initial cp\n    # 2. After repo2 PRs (cursor fallback) -> yields cp for repo2 issues\n    # 3. After repo2 issues (empty) -> yields cp for repo1 PRs\n    # 4. After repo1 PRs (page 0) -> yields cp for repo1 PRs page 1\n    # 5. After repo1 PRs (page 1 empty) and repo1 issues (empty) -> yields final cp\n\n    assert (\n        len(outputs) == 5\n    )  # Initial, Repo2-PRs, Repo2-Issues, Repo1-PRs-P0, Repo1-Issues(final)\n\n    # Output 0: Initial checkpoint, after _get_all_repos\n    cp0 = outputs[0].next_checkpoint\n    assert cp0.has_more\n    assert cp0.cached_repo is not None\n    assert cp0.cached_repo.id == mock_repo2.id  # mock_repo2 is popped first\n    assert cp0.cached_repo_ids == [mock_repo1.id]\n    assert cp0.stage == GithubConnectorStage.PRS\n    assert cp0.cursor_url is None\n\n    # Output 1: After processing PRs for mock_repo2 (via cursor fallback)\n    # Items should be pr3, pr4\n    assert len(outputs[1].items) == 2\n    assert all(isinstance(item, Document) for item in outputs[1].items)\n    assert {\n        item.semantic_identifier for item in cast(list[Document], outputs[1].items)\n    } == {\"3: PR 3 Repo 2\", \"4: PR 4 Repo 2\"}\n    cp1 = outputs[1].next_checkpoint\n    assert (\n        cp1.has_more\n    )  # Still have repo1 in cached_repo_ids at the time checkpoint is yielded\n    assert cp1.cached_repo is not None\n    assert cp1.cached_repo.id == mock_repo2.id\n    assert cp1.stage == GithubConnectorStage.ISSUES  # Moved to issues for repo2\n    assert cp1.cursor_url is None  # Cursor completed and reset\n    assert cp1.num_retrieved == 0  # Reset\n    assert cp1.curr_page == 0  # Reset\n\n    # Output 2: After processing Issues for mock_repo2 (empty)\n    assert len(outputs[2].items) == 0\n    cp2 = outputs[2].next_checkpoint\n    assert cp2.has_more  # Checkpoint yielded BEFORE final has_more check\n    assert cp2.cached_repo is not None\n    assert cp2.cached_repo.id == mock_repo1.id  # Moved to repo1\n    assert cp2.cached_repo_ids == []  # Popped repo1 id\n    assert cp2.stage == GithubConnectorStage.PRS  # For repo1\n    assert cp2.cursor_url is None\n\n    # Output 3: After processing PRs for mock_repo1 (via offset, page 0)\n    assert len(outputs[3].items) == 2\n    assert all(isinstance(item, Document) for item in outputs[3].items)\n    assert {\n        item.semantic_identifier for item in cast(list[Document], outputs[3].items)\n    } == {\"1: PR 1 Repo 1\", \"2: PR 2 Repo 1\"}\n    cp3 = outputs[3].next_checkpoint\n    # This checkpoint is returned early because offset had items. has_more reflects state then.\n    assert cp3.has_more  # still need to do issues\n    assert cp3.cached_repo is not None\n    assert cp3.cached_repo.id == mock_repo1.id\n    assert cp3.stage == GithubConnectorStage.PRS  # Still PRS stage\n    assert cp3.curr_page == 1  # Offset pagination incremented page for PRs\n    assert cp3.cursor_url is None\n\n    # Output 4: After processing PRs page 1 (empty) and Issues for mock_repo1 (empty) - Final checkpoint\n    assert len(outputs[4].items) == 0\n    cp4 = outputs[4].next_checkpoint\n    assert not cp4.has_more  # All done\n    assert cp4.cached_repo is not None\n    assert cp4.cached_repo.id == mock_repo1.id  # Last processed repo\n    assert (\n        cp4.stage == GithubConnectorStage.PRS\n    )  # Reset for a hypothetical next run/repo\n    assert cp4.curr_page == 0\n    assert cp4.num_retrieved == 0\n    assert cp4.cursor_url is None\n\n    # Verify to_Repository calls\n    print(mock_to_repository.call_args_list)\n    assert (\n        mock_to_repository.call_count == 4\n    )  # Twice for repo2, twice for repo1 (issues don't need it)\n    assert (\n        mock_to_repository.call_args_list[0][0][0].id == mock_repo2.id\n    )  # First call was for repo2\n    assert (\n        mock_to_repository.call_args_list[1][0][0].id == mock_repo2.id\n    )  # Second call was for repo2\n    assert (\n        mock_to_repository.call_args_list[2][0][0].id == mock_repo1.id\n    )  # Third call was for repo1\n    assert (\n        mock_to_repository.call_args_list[3][0][0].id == mock_repo1.id\n    )  # Fourth call was for repo1\n\n    # Verify _pull_requests_func was invoked for both repos' PR stages\n    assert (\n        pull_requests_func_invocation_count == 3\n    )  # twice for repo2 PRs, once for repo1 PRs\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/gmail/test_connector.py",
    "content": "import datetime\nimport json\nimport os\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.gmail.connector import _build_time_range_query\nfrom onyx.connectors.gmail.connector import GmailCheckpoint\nfrom onyx.connectors.gmail.connector import GmailConnector\nfrom onyx.connectors.gmail.connector import thread_to_document\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom tests.unit.onyx.connectors.utils import (\n    load_everything_from_checkpoint_connector_from_checkpoint,\n)\n\n\ndef test_thread_to_document() -> None:\n    json_path = os.path.join(os.path.dirname(__file__), \"thread.json\")\n    with open(json_path, \"r\") as f:\n        full_email_thread = json.load(f)\n\n    doc = thread_to_document(full_email_thread, \"admin@onyx-test.com\")\n    assert isinstance(doc, Document)\n    assert doc.source == DocumentSource.GMAIL\n    assert doc.semantic_identifier == \"Email Chain 1\"\n    assert doc.doc_updated_at == datetime.datetime(\n        2024, 11, 2, 17, 34, 55, tzinfo=datetime.timezone.utc\n    )\n    assert len(doc.sections) == 4\n    assert doc.metadata == {}\n\n\ndef test_build_time_range_query() -> None:\n    time_range_start = 1703066296.159339\n    time_range_end = 1704984791.657404\n    query = _build_time_range_query(time_range_start, time_range_end)\n    assert query == \"after:1703066296 before:1704984791\"\n    query = _build_time_range_query(time_range_start, None)\n    assert query == \"after:1703066296\"\n    query = _build_time_range_query(None, time_range_end)\n    assert query == \"before:1704984791\"\n    query = _build_time_range_query(0.0, time_range_end)\n    assert query == \"before:1704984791\"\n    query = _build_time_range_query(None, None)\n    assert query is None\n\n\ndef test_time_str_to_utc() -> None:\n    str_to_dt = {\n        \"Tue, 5 Oct 2021 09:38:25 GMT\": datetime.datetime(\n            2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc\n        ),\n        \"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)\": datetime.datetime(\n            2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc\n        ),\n        \"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)\": datetime.datetime(\n            2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc\n        ),\n        \"30 Jun 2023 18:45:01 +0300\": datetime.datetime(\n            2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc\n        ),\n        \"22 Mar 2020 20:12:18 +0000 (GMT)\": datetime.datetime(\n            2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc\n        ),\n        \"Date: Wed, 27 Aug 2025 11:40:00 +0200\": datetime.datetime(\n            2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc\n        ),\n    }\n    for strptime, expected_datetime in str_to_dt.items():\n        assert time_str_to_utc(strptime) == expected_datetime\n\n\ndef test_gmail_checkpoint_progression() -> None:\n    connector = GmailConnector()\n    connector._creds = MagicMock()\n    connector._primary_admin_email = \"admin@example.com\"\n\n    user_emails = [\"user1@example.com\", \"user2@example.com\"]\n\n    thread_list_responses: dict[str, dict[str | None, dict[str, Any]]] = {\n        \"user1@example.com\": {\n            None: {\n                \"threads\": [{\"id\": \"t1\"}, {\"id\": \"t2\"}],\n                \"nextPageToken\": \"token-user1-page2\",\n            },\n            \"token-user1-page2\": {\n                \"threads\": [{\"id\": \"t3\"}],\n                \"nextPageToken\": None,\n            },\n        },\n        \"user2@example.com\": {\n            None: {\n                \"threads\": [{\"id\": \"t4\"}],\n                \"nextPageToken\": None,\n            }\n        },\n    }\n\n    full_thread_responses = {\n        \"user1@example.com\": {\n            \"t1\": {\"id\": \"t1\"},\n            \"t2\": {\"id\": \"t2\"},\n            \"t3\": {\"id\": \"t3\"},\n        },\n        \"user2@example.com\": {\n            \"t4\": {\"id\": \"t4\"},\n        },\n    }\n\n    class MockRequest:\n        def __init__(self, response: dict[str, Any]):\n            self._response = response\n\n        def execute(self) -> dict[str, Any]:\n            return self._response\n\n    class MockThreadsResource:\n        def __init__(self, user_email: str) -> None:\n            self._user_email = user_email\n\n        def list(\n            self,\n            *,\n            userId: str,\n            fields: str,\n            q: str | None = None,  # noqa: ARG002\n            pageToken: str | None = None,\n            **_: object,\n        ) -> MockRequest:\n            assert userId == self._user_email\n            assert \"nextPageToken\" in fields\n            responses = thread_list_responses[self._user_email]\n            key = pageToken or None\n            return MockRequest(responses[key])\n\n        def get(\n            self,\n            *,\n            userId: str,\n            id: str,\n            fields: str,\n            **_: object,\n        ) -> MockRequest:\n            assert userId == self._user_email\n            assert \"messages\" in fields or \"payload\" in fields\n            return MockRequest(full_thread_responses[self._user_email][id])\n\n    class MockUsersResource:\n        def __init__(self, user_email: str) -> None:\n            self._user_email = user_email\n\n        def threads(self) -> MockThreadsResource:\n            return MockThreadsResource(self._user_email)\n\n    class MockGmailService:\n        def __init__(self, user_email: str) -> None:\n            self._user_email = user_email\n\n        def users(self) -> MockUsersResource:\n            return MockUsersResource(self._user_email)\n\n    def fake_get_gmail_service(_: object, user_email: str) -> MockGmailService:\n        return MockGmailService(user_email)\n\n    def fake_thread_to_document(\n        full_thread: dict[str, object], user_email: str\n    ) -> Document:\n        thread_id = cast(str, full_thread[\"id\"])\n        return Document(\n            id=f\"{user_email}:{thread_id}\",\n            semantic_identifier=f\"Thread {thread_id}\",\n            sections=[TextSection(text=f\"Body {thread_id}\")],\n            source=DocumentSource.GMAIL,\n            metadata={},\n            external_access=ExternalAccess(\n                external_user_emails={user_email},\n                external_user_group_ids=set(),\n                is_public=False,\n            ),\n        )\n\n    checkpoint = connector.build_dummy_checkpoint()\n    assert isinstance(checkpoint, GmailCheckpoint)\n\n    with patch.object(GmailConnector, \"_get_all_user_emails\", return_value=user_emails):\n        with patch(\n            \"onyx.connectors.gmail.connector.get_gmail_service\",\n            side_effect=fake_get_gmail_service,\n        ):\n            with patch(\n                \"onyx.connectors.gmail.connector.thread_to_document\",\n                side_effect=fake_thread_to_document,\n            ) as mock_thread_to_document:\n                outputs = load_everything_from_checkpoint_connector_from_checkpoint(\n                    connector=connector,\n                    start=0,\n                    end=1_000,\n                    checkpoint=checkpoint,\n                )\n\n    document_ids = [\n        item.id\n        for output in outputs\n        for item in output.items\n        if isinstance(item, Document)\n    ]\n\n    assert document_ids == [\n        \"user2@example.com:t4\",\n        \"user1@example.com:t1\",\n        \"user1@example.com:t2\",\n        \"user1@example.com:t3\",\n    ]\n\n    assert mock_thread_to_document.call_count == 4\n\n    final_checkpoint = outputs[-1].next_checkpoint\n    assert isinstance(final_checkpoint, GmailCheckpoint)\n    assert final_checkpoint.has_more is False\n    assert final_checkpoint.user_emails == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/gmail/thread.json",
    "content": "{\n  \"id\": \"192edefb315737c3\",\n  \"messages\": [\n    {\n      \"id\": \"192edeff0dc743cf\",\n      \"payload\": {\n        \"headers\": [\n          {\n            \"name\": \"MIME-Version\",\n            \"value\": \"1.0\"\n          },\n          {\n            \"name\": \"Date\",\n            \"value\": \"Sat, 2 Nov 2024 10:32:57 -0700\"\n          },\n          {\n            \"name\": \"Message-ID\",\n            \"value\": \"<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"Subject\",\n            \"value\": \"Email Chain 1\"\n          },\n          {\n            \"name\": \"From\",\n            \"value\": \"Test Admin Admin <admin@onyx-test.com>\"\n          },\n          {\n            \"name\": \"To\",\n            \"value\": \"test-group-1@onyx-test.com\"\n          },\n          {\n            \"name\": \"Content-Type\",\n            \"value\": \"multipart/alternative; boundary=\\\"0000000000004480480625f17117\\\"\"\n          }\n        ],\n        \"parts\": [\n          {\n            \"mimeType\": \"text/plain\",\n            \"body\": {\n              \"data\": \"VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDENCg==\"\n            }\n          },\n          {\n            \"mimeType\": \"text/html\",\n            \"body\": {\n              \"data\": \"PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDE8L2Rpdj4NCg==\"\n            }\n          }\n        ]\n      }\n    },\n    {\n      \"id\": \"192edf07fbcc8b2c\",\n      \"payload\": {\n        \"headers\": [\n          {\n            \"name\": \"Delivered-To\",\n            \"value\": \"admin@onyx-test.com\"\n          },\n          {\n            \"name\": \"Received\",\n            \"value\": \"by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1873533vqt;        Sat, 2 Nov 2024 10:33:34 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"X-Received\",\n            \"value\": \"by 2002:a05:6102:1284:b0:4a9:555b:fb50 with SMTP id ada2fe7eead31-4a9555bfd21mr8428882137.20.1730568814436;        Sat, 02 Nov 2024 10:33:34 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"ARC-Seal\",\n            \"value\": \"i=1; a=rsa-sha256; t=1730568814; cv=none;        d=google.com; s=arc-20240605;        b=A75GBczY/LN8OhNdpZ1VM3opx5VWU3HWYnwCIL9TLBqEpNz2X74TXNkCevJkImB3VF         BkFY7gHg7d8oGdsQvUp2EEdRBXKoYT8P4PTc3ZSD2W8LYU2XCudIbA5xtGObELmI0h0f         bCXT8dE7m6hGJPTg0WPSlkvGs2bY52bmSbCbrnrA/Mx/oyxYPzwv5cMw3CLMXo/8nOLO         FAzrnMTKRqYtn/QvYjUne7PpVSYPk0Edg5261/jn9qatyyL8VePU4FriQTffjAC85Ayc         jikVA5QnsYO79aXJE0SIw4xBHwtOgmyWhU9TPw2NfuQHZWrm39JudUYlmZb8MV4VpX6p         otxw==\"\n          },\n          {\n            \"name\": \"ARC-Message-Signature\",\n            \"value\": \"i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :dkim-signature;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=;        b=bkhR3iHOUD64TOG3Mqfd9BMT/2IF9gHEjHZWR/tet5J05UKFhk2d4k69wuSLNJcxlF         dB6zzgt1vvEnCbSV+XBCEG1zW76T/sN6Ldn7+5xomsGFYvTZsW4E7OJqxkedfdpFeWwc         eBlgX765wnBs4ztktDhK6gO8igWx3CaYH5wbX72DV4wqcQpDNpMqNHK7sHrlOG2YJGzV         7i3tli4dJqu1zgQK+lo1or1QQyadFzhbwX2iFdSLTNSNR3s70kqqBOT69lDMv84dfKCp         +hXE0uwjOY/9lGG9rO1/e5WWEDC2BSZ7wzjvvyBRjDG+lavBqTggUizd8W+MlRYXONAX         t7Kg==;        dara=google.com\"\n          },\n          {\n            \"name\": \"ARC-Authentication-Results\",\n            \"value\": \"i=1; mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7;       spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com;       dara=pass header.i=@onyx-test.com\"\n          },\n          {\n            \"name\": \"Return-Path\",\n            \"value\": \"<test_user_1@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Received\",\n            \"value\": \"from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41])        by mx.google.com with SMTPS id a1e0cc1a2514c-855dae589a1sor1192309241.6.2024.11.02.10.33.34        for <admin@onyx-test.com>        (Google Transport Security);        Sat, 02 Nov 2024 10:33:34 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"Received-SPF\",\n            \"value\": \"none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;\"\n          },\n          {\n            \"name\": \"Authentication-Results\",\n            \"value\": \"mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7;       spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com;       dara=pass header.i=@onyx-test.com\"\n          },\n          {\n            \"name\": \"DKIM-Signature\",\n            \"value\": \"v=1; a=rsa-sha256; c=relaxed/relaxed;        d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568814; x=1731173614; darn=onyx-test.com;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :from:to:cc:subject:date:message-id:reply-to;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        b=Z57TqzI7sEwwOumQx0z6YhibC1x2CHlNmBjwyQT1mNOUScZbzo6nmH8Ydo7slsTfgZ         rgwKEEYkf/CYlFWGUEzGzc22jVUCSMjNMFB0nEtfj+GPJaNjDR9FxjFLTUfSq64H/RCI         eO9+oEAJHaa5QmceX2yiSJFXNqmVEMJNT+K6CnlbN5gW6CUD2tBt46vW83PVJgxKMc76         A7/eaDxdZDLUvpjHes4SvM7x0eBM9t7w9wb/jEjGqA54HI2YHVcxM4HJxrbCChYn8UoG         7+UOpfOmHTZLdLYgMtSqYanJ3BTENEdyVp2LIOZOhlUT7Hbr9esyeVyy765XTuRAWxmo         DGPQ==\"\n          },\n          {\n            \"name\": \"X-Google-DKIM-Signature\",\n            \"value\": \"v=1; a=rsa-sha256; c=relaxed/relaxed;        d=1e100.net; s=20230601; t=1730568814; x=1731173614;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;        bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=;        b=fxuobWT2rW8kvQ14LUHbJEJOdCM4uBP+Obo7jL4w0BvwLrBNNbMPqMUc8d8u17dnS7         gczFCprOr5PZnVNmOZMQvmRTJ6poTkWOGQhsOyDOSLNI0IzuaN2wh9qjmFez6Z9nTx3f         Lo0I0uahwzNkExywHC9x0H3NOZlS4074qkyLJObgnOHa5vml8SEcChMuzOQuCSU9wNjO         t26urEoct8LArf0K/xztjxpEpDCgnf4Cr/KmZfi4/2Sjv4jwQzkLVuiwADraHIJbLv1m         UMNs92dakWYK0cBbuwOx/sYpUWWyhVmv6Q0LqXzJjtpY4Z0zsnpI2UCrkAdAOSh7geEJ         LCnw==\"\n          },\n          {\n            \"name\": \"X-Gm-Message-State\",\n            \"value\": \"AOJu0YyCYZOHIzoRHgMd7foUCpX2JYDwPS2XsTjWiMkkR364/mhFKFsQ vixTj7QM6pDecoDxn8pS0btM7b8z+cwo/8hFiYNgp26wK5L0aGymu+M8OuEk/73fuEthWVV0eko B9LvS5+qixa/oNO/HkRJpVTQmAH7OTT25KeZJj0Dd3x1JqsrfiNE=\"\n          },\n          {\n            \"name\": \"X-Google-Smtp-Source\",\n            \"value\": \"AGHT+IHCMrQhOT9sgPUOQJL1oVfxMruiLg3BZ5DXqKMdQ7PYF2puka6Ovabv3BPg08CeyS1ovKydIdwHT2uleZkkAaU=\"\n          },\n          {\n            \"name\": \"X-Received\",\n            \"value\": \"by 2002:a05:6102:5092:b0:4a3:e05e:f6a3 with SMTP id ada2fe7eead31-4a900e11589mr14462681137.3.1730568813787; Sat, 02 Nov 2024 10:33:33 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"MIME-Version\",\n            \"value\": \"1.0\"\n          },\n          {\n            \"name\": \"References\",\n            \"value\": \"<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"In-Reply-To\",\n            \"value\": \"<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"From\",\n            \"value\": \"test_user_1 1 <test_user_1@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Date\",\n            \"value\": \"Sat, 2 Nov 2024 10:33:22 -0700\"\n          },\n          {\n            \"name\": \"Message-ID\",\n            \"value\": \"<CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"Subject\",\n            \"value\": \"Re: Email Chain 1\"\n          },\n          {\n            \"name\": \"To\",\n            \"value\": \"Test Admin Admin <admin@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Content-Type\",\n            \"value\": \"multipart/alternative; boundary=\\\"00000000000067dbf70625f1730f\\\"\"\n          }\n        ],\n        \"parts\": [\n          {\n            \"mimeType\": \"text/plain\",\n            \"body\": {\n              \"data\": \"VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMQ0KPg0K\"\n            }\n          },\n          {\n            \"mimeType\": \"text/html\",\n            \"body\": {\n              \"data\": \"PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo=\"\n            }\n          }\n        ]\n      }\n    },\n    {\n      \"id\": \"192edf157175fcec\",\n      \"payload\": {\n        \"headers\": [\n          {\n            \"name\": \"MIME-Version\",\n            \"value\": \"1.0\"\n          },\n          {\n            \"name\": \"Date\",\n            \"value\": \"Sat, 2 Nov 2024 10:34:29 -0700\"\n          },\n          {\n            \"name\": \"References\",\n            \"value\": \"<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com> <CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"In-Reply-To\",\n            \"value\": \"<CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"Bcc\",\n            \"value\": \"test_user_3@onyx-test.com\"\n          },\n          {\n            \"name\": \"Message-ID\",\n            \"value\": \"<CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"Subject\",\n            \"value\": \"Fwd: Email Chain 1\"\n          },\n          {\n            \"name\": \"From\",\n            \"value\": \"Test Admin Admin <admin@onyx-test.com>\"\n          },\n          {\n            \"name\": \"To\",\n            \"value\": \"test_user_2 2 <test_user_2@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Content-Type\",\n            \"value\": \"multipart/alternative; boundary=\\\"000000000000bf7afd0625f1764f\\\"\"\n          }\n        ],\n        \"parts\": [\n          {\n            \"mimeType\": \"text/plain\",\n            \"body\": {\n              \"data\": \"VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDENCg0KLS0tLS0tLS0tLSBGb3J3YXJkZWQgbWVzc2FnZSAtLS0tLS0tLS0NCkZyb206IHRlc3RfdXNlcl8xIDEgPHRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20-DQpEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NClN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxDQpUbzogVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCg0KDQpUaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMQ0KDQpPbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCndyb3RlOg0KDQo-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-DQo=\"\n            }\n          },\n          {\n            \"mimeType\": \"text/html\",\n            \"body\": {\n              \"data\": \"PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iPnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb208L2E-Jmd0Ozwvc3Bhbj48YnI-RGF0ZTogU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNPGJyPlN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxPGJyPlRvOiBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7PGJyPjwvZGl2Pjxicj48YnI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDsgd3JvdGU6PGJyPjwvZGl2PjxibG9ja3F1b3RlIGNsYXNzPSJnbWFpbF9xdW90ZSIgc3R5bGU9Im1hcmdpbjowcHggMHB4IDBweCAwLjhleDtib3JkZXItbGVmdDoxcHggc29saWQgcmdiKDIwNCwyMDQsMjA0KTtwYWRkaW5nLWxlZnQ6MWV4Ij48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMTwvZGl2Pg0KPC9ibG9ja3F1b3RlPjwvZGl2Pg0KPC9kaXY-PC9kaXY-DQo=\"\n            }\n          }\n        ]\n      }\n    },\n    {\n      \"id\": \"192edf1e8f7ecbb4\",\n      \"payload\": {\n        \"headers\": [\n          {\n            \"name\": \"Delivered-To\",\n            \"value\": \"admin@onyx-test.com\"\n          },\n          {\n            \"name\": \"Received\",\n            \"value\": \"by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1874156vqt;        Sat, 2 Nov 2024 10:35:07 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"X-Received\",\n            \"value\": \"by 2002:a05:6122:319c:b0:50d:81f9:5210 with SMTP id 71dfb90a1353d-5105d128958mr15853812e0c.13.1730568906834;        Sat, 02 Nov 2024 10:35:06 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"ARC-Seal\",\n            \"value\": \"i=1; a=rsa-sha256; t=1730568906; cv=none;        d=google.com; s=arc-20240605;        b=JUd7S6ql1poKM5ox92op2g2Z67AS8sEkp5f/S+Mr5+7KSichsjAwixWg/YhhRhvaY/         UcykrbdaAeWfCuGtJgSq1nr1z5hB3iAltv/D2XCdJdOXzVDpVvaV9lT/YU6266VKtsnq         gFVKfjyMe/MnNKvDITQL67A2gRvhiR3XWxwEVvrMArMpUb9bbudlF/5L3MQY4BCIvWLL         9uBv1ZnclghscsxspoG3CkULkGqHGUTKq6bPoUn/hOljiVdsVVagoOwhbDEcyMRKUDnm         2t3H7iiujhlBIDbRoLJR/6C+A6AMyNKPAFA3axM6EXrTOADMZ8a0JqFj8O4rktYpRV+d         zHxQ==\"\n          },\n          {\n            \"name\": \"ARC-Message-Signature\",\n            \"value\": \"i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :dkim-signature;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=;        b=IarHhl5g5tjBhlMRRXo6WwTzaFOI4Q3w4ebNunftDUHwzV7Qu1hY0y7r3SRNaBb+qD         ZncYUI6PF/Oo7eMG65IloXfu+kHUI8NJMaoERUWgEk21Tj6cOSRO4x/W6V5PSX7a4lWZ         K1cNdAlaiWI09Esv07Vel975Bgrd+XiCwoVgJAAslHOJ2bZwSYWzvwLqdkCRVrAGJQ9/         I80kvOnNVesIFdIR6SGrhdz8xNIIoe60k8PjJRzkmzy/tEeKCYBz6W+NW4xoIaAVmKUw         RvjI8JozUVkGzh+LLyx64MakPCZPWM+ft+D35JodarYh+KesF+HV/Oe7rjaw7JXZ1WoE         OdJQ==;        dara=google.com\"\n          },\n          {\n            \"name\": \"ARC-Authentication-Results\",\n            \"value\": \"i=1; mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL;       spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com;       dara=pass header.i=@onyx-test.com\"\n          },\n          {\n            \"name\": \"Return-Path\",\n            \"value\": \"<test_user_3@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Received\",\n            \"value\": \"from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41])        by mx.google.com with SMTPS id 71dfb90a1353d-5106f3f9037sor1051490e0c.7.2024.11.02.10.35.06        for <admin@onyx-test.com>        (Google Transport Security);        Sat, 02 Nov 2024 10:35:06 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"Received-SPF\",\n            \"value\": \"none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;\"\n          },\n          {\n            \"name\": \"Authentication-Results\",\n            \"value\": \"mx.google.com;       dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL;       spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com;       dara=pass header.i=@onyx-test.com\"\n          },\n          {\n            \"name\": \"DKIM-Signature\",\n            \"value\": \"v=1; a=rsa-sha256; c=relaxed/relaxed;        d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568906; x=1731173706; darn=onyx-test.com;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :from:to:cc:subject:date:message-id:reply-to;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        b=1U8JkCbLjicGtH7otVX3QjKv/XK5fGnmOIVMTD/b9cO1w8ai2GwCuJbBo+z1IuGqto         aRuNCcEqUIaFvVFiezvhL9xg7scIwHHvLOrSpmc0h0JMSx8q4kKaUGKEJpewsYvkStmr         DYv/cUIeaPTIChSuUDV7FVMhf7jIyIaYry3i9/EIlw+on18nD30C9kXwds5yWW8XGvtR         /OUuSdgJzuoNmypUt8v9Ebqd+LP23YTs+78/G1Ag+JjugxxF+C9cm7SxmooWueukRkm8         o8nQO5QVx/y/xsCZdM2XXcKCLcZIntuY48amlfFyIqrhG1/DEM6htD64meMGctNTptQf         jHrw==\"\n          },\n          {\n            \"name\": \"X-Google-DKIM-Signature\",\n            \"value\": \"v=1; a=rsa-sha256; c=relaxed/relaxed;        d=1e100.net; s=20230601; t=1730568906; x=1731173706;        h=to:subject:message-id:date:from:in-reply-to:references:mime-version         :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;        bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=;        b=J4+ozlusGGM1Hn95EZkDeYbExgkyOlAdcY6LcV4Wx1zeI78HtEXGgvqcZ5sP7HzS1X         /A3i7WkgmjpC9bU2/zKLrfXDvYQ7udQwTJtKsKaUo4O65Al7Wtgz8e8rBDYikhqEEAZQ         GbEwqp+qa+v0T4rPhkQKd4zpIE3AUd3eh5u5iF/UEYc1NcyV35uMGWRP4jOK6F67MwS7         73MgObcGqmBH48I4K+ITYAkNEMGOBpY6fheGxCxyDpcG5gbf8swlWX2Dd0EM9H72o+Xb         jvAslOq1lZzPZUgyyZJ2wVEASxF8S7depiOLcTPKwsw+pgXIMAUBExBvu0u4PhO0qG+z         pftQ==\"\n          },\n          {\n            \"name\": \"X-Gm-Message-State\",\n            \"value\": \"AOJu0Yy2r0aT3w7HBU7t0JGla+x3AddG9WdnQT06r6T/HGZwZ9Wp9TUs Orb/HMtgvXivtYFkG14NJkMTBO4EqSynmzaxAvEheDXB1uYE2LS21XoqrvycvYQh3GUHBwUdS8L lE6BUjm4TJfXlZWAqKRxg4C0j1UFSuVdkXf6P1GCsdyKKTeS6A9eohw==\"\n          },\n          {\n            \"name\": \"X-Google-Smtp-Source\",\n            \"value\": \"AGHT+IHXTB7Ar9w/Q3G3gCT19SVELYvWl30pNGuNiTmkYZgMWFS7YUWTkG/DS4/mrjMRXpYuclOLHv8BeOmw9Jovkr4=\"\n          },\n          {\n            \"name\": \"X-Received\",\n            \"value\": \"by 2002:a05:6102:3a10:b0:4a9:49:26d2 with SMTP id ada2fe7eead31-4a90109fb68mr15589362137.29.1730568906301; Sat, 02 Nov 2024 10:35:06 -0700 (PDT)\"\n          },\n          {\n            \"name\": \"MIME-Version\",\n            \"value\": \"1.0\"\n          },\n          {\n            \"name\": \"References\",\n            \"value\": \"<CABnEGTWbSYxvRDsxnXy1b2iQF=peGsHuOmrOcixpQFCJ9EBHHg@mail.gmail.com> <CANSSAx8n6=Kr4sQaGVYaKj63Hdb4=NCffD6OhAADYm+2fe7_dw@mail.gmail.com> <CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"In-Reply-To\",\n            \"value\": \"<CABnEGTUEDvhfyOWTCauhTCn5mVXGp6p1=yw65RUsGu8E=c2k4g@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"From\",\n            \"value\": \"test_user_3 3 <test_user_3@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Date\",\n            \"value\": \"Sat, 2 Nov 2024 10:34:55 -0700\"\n          },\n          {\n            \"name\": \"Message-ID\",\n            \"value\": \"<CACcF+8GU1V2_CcYsUFNOh0+oSkMG=oN-ioyPPXRsD+0Ghr-u-Q@mail.gmail.com>\"\n          },\n          {\n            \"name\": \"Subject\",\n            \"value\": \"Re: Email Chain 1\"\n          },\n          {\n            \"name\": \"To\",\n            \"value\": \"Test Admin Admin <admin@onyx-test.com>\"\n          },\n          {\n            \"name\": \"Content-Type\",\n            \"value\": \"multipart/alternative; boundary=\\\"000000000000eb82a70625f178cf\\\"\"\n          }\n        ],\n        \"parts\": [\n          {\n            \"mimeType\": \"text/plain\",\n            \"body\": {\n              \"data\": \"VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozNOKAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDMgaW4gY2hhaW4gMQ0KPg0KPiAtLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLQ0KPiBGcm9tOiB0ZXN0X3VzZXJfMSAxIDx0ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPg0KPiBEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NCj4gU3ViamVjdDogUmU6IEVtYWlsIENoYWluIDENCj4gVG86IFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQo-DQo-DQo-IFRoaXMgaXMgZW1haWwgMiBpbiBjaGFpbiAxDQo-DQo-IE9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluIDxhZG1pbkBvbnl4LXRlc3QuY29tPg0KPiB3cm90ZToNCj4NCj4-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-Pg0KPg0K\"\n            }\n          },\n          {\n            \"mimeType\": \"text/html\",\n            \"body\": {\n              \"data\": \"PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDE8YnIgY2xhc3M9ImdtYWlsLUFwcGxlLWludGVyY2hhbmdlLW5ld2xpbmUiPjwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzTigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7IHdyb3RlOjxicj48L2Rpdj48YmxvY2txdW90ZSBjbGFzcz0iZ21haWxfcXVvdGUiIHN0eWxlPSJtYXJnaW46MHB4IDBweCAwcHggMC44ZXg7Ym9yZGVyLWxlZnQ6MXB4IHNvbGlkIHJnYigyMDQsMjA0LDIwNCk7cGFkZGluZy1sZWZ0OjFleCI-PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj50ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPC9hPiZndDs8L3NwYW4-PGJyPkRhdGU6IFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTTxicj5TdWJqZWN0OiBSZTogRW1haWwgQ2hhaW4gMTxicj5UbzogVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDs8YnI-PC9kaXY-PGJyPjxicj48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMTwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSIgdGFyZ2V0PSJfYmxhbmsiPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo8L2Rpdj48L2Rpdj4NCjwvYmxvY2txdW90ZT48L2Rpdj4NCg==\"\n            }\n          }\n        ]\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/google_utils/test_rate_limit_detection.py",
    "content": "import json\n\nimport httplib2  # type: ignore[import-untyped]\nfrom googleapiclient.errors import HttpError  # type: ignore[import-untyped]\n\nfrom onyx.connectors.google_utils.google_utils import _is_rate_limit_error\n\n\ndef _make_http_error(\n    status: int,\n    reason: str = \"unknown\",\n    error_reason: str = \"\",\n) -> HttpError:\n    resp = httplib2.Response({\"status\": status})\n    if error_reason:\n        body = json.dumps(\n            {\n                \"error\": {\n                    \"message\": reason,\n                    \"errors\": [{\"reason\": error_reason, \"message\": reason}],\n                }\n            }\n        ).encode()\n    else:\n        body = json.dumps({\"error\": {\"message\": reason}}).encode()\n    return HttpError(resp, body)\n\n\ndef test_429_is_rate_limit() -> None:\n    assert _is_rate_limit_error(_make_http_error(429))\n\n\ndef test_403_user_rate_limit_exceeded() -> None:\n    err = _make_http_error(\n        403,\n        reason=\"User rate limit exceeded.\",\n        error_reason=\"userRateLimitExceeded\",\n    )\n    assert _is_rate_limit_error(err)\n\n\ndef test_403_rate_limit_exceeded() -> None:\n    err = _make_http_error(\n        403,\n        reason=\"Rate limit exceeded.\",\n        error_reason=\"rateLimitExceeded\",\n    )\n    assert _is_rate_limit_error(err)\n\n\ndef test_403_permission_denied_is_not_rate_limit() -> None:\n    err = _make_http_error(\n        403,\n        reason=\"The caller does not have permission\",\n        error_reason=\"forbidden\",\n    )\n    assert not _is_rate_limit_error(err)\n\n\ndef test_404_is_not_rate_limit() -> None:\n    assert not _is_rate_limit_error(_make_http_error(404))\n\n\ndef test_500_is_not_rate_limit() -> None:\n    assert not _is_rate_limit_error(_make_http_error(500))\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/conftest.py",
    "content": "from collections.abc import Generator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom jira import JIRA\n\nfrom onyx.connectors.jira.connector import JiraConnector\n\n\n@pytest.fixture\ndef jira_base_url() -> str:\n    return \"https://jira.example.com\"\n\n\n@pytest.fixture\ndef project_key() -> str:\n    return \"TEST\"\n\n\n@pytest.fixture\ndef user_email() -> str:\n    return \"test@example.com\"\n\n\n@pytest.fixture\ndef mock_jira_api_token() -> str:\n    return \"token123\"\n\n\n@pytest.fixture\ndef mock_jira_client() -> MagicMock:\n    \"\"\"Create a mock JIRA client with proper typing\"\"\"\n    mock = MagicMock(spec=JIRA)\n    # Add proper return typing for search_issues method\n    mock.search_issues = MagicMock()\n    # Add proper return typing for project method\n    mock.project = MagicMock()\n    # Add proper return typing for projects method\n    mock.projects = MagicMock()\n    return mock\n\n\n@pytest.fixture\ndef jira_connector(\n    jira_base_url: str, project_key: str, mock_jira_client: MagicMock\n) -> Generator[JiraConnector, None, None]:\n    connector = JiraConnector(\n        jira_base_url=jira_base_url,\n        project_key=project_key,\n        comment_email_blacklist=[\"blacklist@example.com\"],\n        labels_to_skip=[\"secret\", \"sensitive\"],\n    )\n    connector._jira_client = mock_jira_client\n    connector._jira_client.client_info.return_value = jira_base_url\n    with patch(\"onyx.connectors.jira.connector._JIRA_FULL_PAGE_SIZE\", 2):\n        yield connector\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/test_jira_bulk_fetch.py",
    "content": "from typing import Any\nfrom unittest.mock import MagicMock\n\nimport pytest\nimport requests\nfrom jira import JIRA\nfrom jira.resources import Issue\n\nfrom onyx.connectors.jira.connector import bulk_fetch_issues\n\n\ndef _make_raw_issue(issue_id: str) -> dict[str, Any]:\n    return {\n        \"id\": issue_id,\n        \"key\": f\"TEST-{issue_id}\",\n        \"fields\": {\"summary\": f\"Issue {issue_id}\"},\n    }\n\n\ndef _mock_jira_client() -> MagicMock:\n    mock = MagicMock(spec=JIRA)\n    mock._options = {\"server\": \"https://jira.example.com\"}\n    mock._session = MagicMock()\n    mock._get_url = MagicMock(\n        return_value=\"https://jira.example.com/rest/api/3/issue/bulkfetch\"\n    )\n    return mock\n\n\ndef test_bulk_fetch_success() -> None:\n    \"\"\"Happy path: all issues fetched in one request.\"\"\"\n    client = _mock_jira_client()\n    raw = [_make_raw_issue(\"1\"), _make_raw_issue(\"2\"), _make_raw_issue(\"3\")]\n    resp = MagicMock()\n    resp.json.return_value = {\"issues\": raw}\n    client._session.post.return_value = resp\n\n    result = bulk_fetch_issues(client, [\"1\", \"2\", \"3\"])\n    assert len(result) == 3\n    assert all(isinstance(r, Issue) for r in result)\n    client._session.post.assert_called_once()\n\n\ndef test_bulk_fetch_splits_on_json_error() -> None:\n    \"\"\"When the full batch fails with JSONDecodeError, sub-batches succeed.\"\"\"\n    client = _mock_jira_client()\n\n    call_count = 0\n\n    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001\n        nonlocal call_count\n        call_count += 1\n        ids = json[\"issueIdsOrKeys\"]\n        if len(ids) > 2:\n            resp = MagicMock()\n            resp.json.side_effect = requests.exceptions.JSONDecodeError(\n                \"Expecting ',' delimiter\", \"doc\", 2294125\n            )\n            return resp\n\n        resp = MagicMock()\n        resp.json.return_value = {\"issues\": [_make_raw_issue(i) for i in ids]}\n        return resp\n\n    client._session.post.side_effect = _post_side_effect\n\n    result = bulk_fetch_issues(client, [\"1\", \"2\", \"3\", \"4\"])\n    assert len(result) == 4\n    returned_ids = {r.raw[\"id\"] for r in result}\n    assert returned_ids == {\"1\", \"2\", \"3\", \"4\"}\n    assert call_count > 1\n\n\ndef test_bulk_fetch_raises_on_single_unfetchable_issue() -> None:\n    \"\"\"A single issue that always fails JSON decode raises after splitting.\"\"\"\n    client = _mock_jira_client()\n\n    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001\n        ids = json[\"issueIdsOrKeys\"]\n        if \"bad\" in ids:\n            resp = MagicMock()\n            resp.json.side_effect = requests.exceptions.JSONDecodeError(\n                \"Expecting ',' delimiter\", \"doc\", 100\n            )\n            return resp\n\n        resp = MagicMock()\n        resp.json.return_value = {\"issues\": [_make_raw_issue(i) for i in ids]}\n        return resp\n\n    client._session.post.side_effect = _post_side_effect\n\n    with pytest.raises(requests.exceptions.JSONDecodeError):\n        bulk_fetch_issues(client, [\"1\", \"bad\", \"2\"])\n\n\ndef test_bulk_fetch_non_json_error_propagates() -> None:\n    \"\"\"Non-JSONDecodeError exceptions still propagate.\"\"\"\n    client = _mock_jira_client()\n\n    resp = MagicMock()\n    resp.json.side_effect = ValueError(\"something else broke\")\n    client._session.post.return_value = resp\n\n    try:\n        bulk_fetch_issues(client, [\"1\"])\n        assert False, \"Expected ValueError to propagate\"\n    except ValueError:\n        pass\n\n\ndef test_bulk_fetch_with_fields() -> None:\n    \"\"\"Fields parameter is forwarded correctly.\"\"\"\n    client = _mock_jira_client()\n    raw = [_make_raw_issue(\"1\")]\n    resp = MagicMock()\n    resp.json.return_value = {\"issues\": raw}\n    client._session.post.return_value = resp\n\n    bulk_fetch_issues(client, [\"1\"], fields=\"summary,description\")\n\n    call_payload = client._session.post.call_args[1][\"json\"]\n    assert call_payload[\"fields\"] == [\"summary\", \"description\"]\n\n\ndef test_bulk_fetch_recursive_splitting_raises_on_bad_issue() -> None:\n    \"\"\"With a 6-issue batch where one is bad, recursion isolates it and raises.\"\"\"\n    client = _mock_jira_client()\n    bad_id = \"BAD\"\n\n    def _post_side_effect(url: str, json: dict[str, Any]) -> MagicMock:  # noqa: ARG001\n        ids = json[\"issueIdsOrKeys\"]\n        if bad_id in ids:\n            resp = MagicMock()\n            resp.json.side_effect = requests.exceptions.JSONDecodeError(\n                \"truncated\", \"doc\", 999\n            )\n            return resp\n\n        resp = MagicMock()\n        resp.json.return_value = {\"issues\": [_make_raw_issue(i) for i in ids]}\n        return resp\n\n    client._session.post.side_effect = _post_side_effect\n\n    with pytest.raises(requests.exceptions.JSONDecodeError):\n        bulk_fetch_issues(client, [\"1\", \"2\", bad_id, \"3\", \"4\", \"5\"])\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/test_jira_checkpointing.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom jira import JIRA\nfrom jira import JIRAError\nfrom jira.resources import Issue\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.exceptions import UnexpectedValidationError\nfrom onyx.connectors.jira.connector import JiraConnector\nfrom onyx.connectors.jira.connector import JiraConnectorCheckpoint\nfrom onyx.connectors.jira.utils import JIRA_SERVER_API_VERSION\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import SlimDocument\nfrom onyx.utils.logger import setup_logger\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\n\nlogger = setup_logger()\nPAGE_SIZE = 2\n\n\n@pytest.fixture\ndef jira_connector(\n    jira_base_url: str, project_key: str, mock_jira_client: MagicMock\n) -> Generator[JiraConnector, None, None]:\n    connector = JiraConnector(\n        jira_base_url=jira_base_url,\n        project_key=project_key,\n        comment_email_blacklist=[\"blacklist@example.com\"],\n        labels_to_skip=[\"secret\", \"sensitive\"],\n    )\n    connector._jira_client = mock_jira_client\n    connector._jira_client.client_info.return_value = jira_base_url\n    connector._jira_client._options = MagicMock()\n    connector._jira_client._options.return_value = {\n        \"rest_api_version\": JIRA_SERVER_API_VERSION\n    }\n    with patch(\"onyx.connectors.jira.connector._JIRA_FULL_PAGE_SIZE\", 2):\n        yield connector\n\n\n@pytest.fixture\ndef create_mock_issue() -> Callable[..., MagicMock]:\n    def _create_mock_issue(\n        key: str = \"TEST-123\",\n        summary: str = \"Test Issue\",\n        updated: str = \"2023-01-01T12:00:00.000+0000\",\n        description: str = \"Test Description\",\n        labels: list[str] | None = None,\n        project_key: str = \"TEST\",\n        project_name: str = \"Test Project\",\n        issuetype_name: str = \"Story\",\n        parent_key: str | None = None,\n        parent_issuetype_name: str | None = None,\n    ) -> MagicMock:\n        \"\"\"Helper to create a mock Issue object\"\"\"\n        mock_issue = MagicMock(spec=Issue)\n        # Create fields attribute first\n        mock_issue.fields = MagicMock()\n        mock_issue.key = key\n        mock_issue.fields.summary = summary\n        mock_issue.fields.updated = updated\n        mock_issue.fields.description = description\n        mock_issue.fields.labels = labels or []\n\n        # Set up creator and assignee for testing owner extraction\n        mock_issue.fields.reporter = MagicMock()\n        mock_issue.fields.reporter.displayName = \"Test Creator\"\n        mock_issue.fields.reporter.emailAddress = \"creator@example.com\"\n\n        mock_issue.fields.assignee = MagicMock()\n        mock_issue.fields.assignee.displayName = \"Test Assignee\"\n        mock_issue.fields.assignee.emailAddress = \"assignee@example.com\"\n\n        # Set up priority, status, and resolution\n        mock_issue.fields.priority = MagicMock()\n        mock_issue.fields.priority.name = \"High\"\n\n        mock_issue.fields.status = MagicMock()\n        mock_issue.fields.status.name = \"In Progress\"\n\n        mock_issue.fields.resolution = MagicMock()\n        mock_issue.fields.resolution.name = \"Fixed\"\n\n        # Set up project for hierarchy node generation\n        mock_issue.fields.project = MagicMock()\n        mock_issue.fields.project.key = project_key\n        mock_issue.fields.project.name = project_name\n\n        # Set up issuetype for epic detection\n        mock_issue.fields.issuetype = MagicMock()\n        mock_issue.fields.issuetype.name = issuetype_name\n\n        # Set up parent field for hierarchy\n        if parent_key:\n            mock_issue.fields.parent = MagicMock()\n            mock_issue.fields.parent.key = parent_key\n            mock_issue.fields.parent.fields = MagicMock()\n            mock_issue.fields.parent.fields.issuetype = MagicMock()\n            mock_issue.fields.parent.fields.issuetype.name = (\n                parent_issuetype_name or \"Story\"\n            )\n            mock_issue.fields.parent.fields.summary = f\"Parent {parent_key}\"\n        else:\n            mock_issue.fields.parent = None\n\n        # Add raw field for accessing through API version check\n        mock_issue.raw = {\"fields\": {\"description\": description}}\n\n        return mock_issue\n\n    return _create_mock_issue\n\n\ndef test_load_credentials(jira_connector: JiraConnector) -> None:\n    \"\"\"Test loading credentials\"\"\"\n    with patch(\"onyx.connectors.jira.connector.build_jira_client\") as mock_build_client:\n        mock_build_client.return_value = jira_connector._jira_client\n        credentials = {\n            \"jira_user_email\": \"user@example.com\",\n            \"jira_api_token\": \"token123\",\n        }\n\n        result = jira_connector.load_credentials(credentials)\n\n        mock_build_client.assert_called_once_with(\n            credentials=credentials,\n            jira_base=jira_connector.jira_base,\n            scoped_token=False,\n        )\n        assert result is None\n        assert jira_connector._jira_client == mock_build_client.return_value\n\n\ndef test_get_jql_query_with_project(jira_connector: JiraConnector) -> None:\n    \"\"\"Test JQL query generation with project specified\"\"\"\n    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()\n    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()\n\n    query = jira_connector._get_jql_query(start, end)\n\n    # Check that the project part and time part are both in the query\n    assert f'project = \"{jira_connector.jira_project}\"' in query\n    assert \"updated >= '2023-01-01 00:00'\" in query\n    assert \"updated <= '2023-01-02 00:00'\" in query\n    assert \" AND \" in query\n\n\ndef test_get_jql_query_without_project(jira_base_url: str) -> None:\n    \"\"\"Test JQL query generation without project specified\"\"\"\n    # Create connector without project key\n    connector = JiraConnector(jira_base_url=jira_base_url)\n\n    start = datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp()\n    end = datetime(2023, 1, 2, tzinfo=timezone.utc).timestamp()\n\n    query = connector._get_jql_query(start, end)\n\n    # Check that only time part is in the query\n    assert \"project =\" not in query\n    assert \"updated >= '2023-01-01 00:00'\" in query\n    assert \"updated <= '2023-01-02 00:00'\" in query\n\n\ndef test_load_from_checkpoint_happy_path(\n    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]\n) -> None:\n    \"\"\"Test loading from checkpoint - happy path\"\"\"\n    # Set up mocked issues\n    mock_issue1 = create_mock_issue(key=\"TEST-1\", summary=\"Issue 1\")\n    mock_issue2 = create_mock_issue(key=\"TEST-2\", summary=\"Issue 2\")\n    mock_issue3 = create_mock_issue(key=\"TEST-3\", summary=\"Issue 3\")\n\n    # Only mock the search_issues method\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    search_issues_mock = cast(MagicMock, jira_client.search_issues)\n    search_issues_mock.side_effect = [\n        [mock_issue1, mock_issue2],\n        [mock_issue3],\n        [],\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)\n\n    # Check that the documents were returned\n    assert len(outputs) == 2\n\n    checkpoint_output1 = outputs[0]\n    assert len(checkpoint_output1.items) == 2\n    document1 = checkpoint_output1.items[0]\n    assert isinstance(document1, Document)\n    assert document1.id == \"https://jira.example.com/browse/TEST-1\"\n    document2 = checkpoint_output1.items[1]\n    assert isinstance(document2, Document)\n    assert document2.id == \"https://jira.example.com/browse/TEST-2\"\n    assert checkpoint_output1.next_checkpoint == JiraConnectorCheckpoint(\n        offset=2,\n        has_more=True,\n        seen_hierarchy_node_ids=[\"TEST\"],\n    )\n\n    checkpoint_output2 = outputs[1]\n    assert len(checkpoint_output2.items) == 1\n    document3 = checkpoint_output2.items[0]\n    assert isinstance(document3, Document)\n    assert document3.id == \"https://jira.example.com/browse/TEST-3\"\n    assert checkpoint_output2.next_checkpoint == JiraConnectorCheckpoint(\n        offset=3,\n        has_more=False,\n        seen_hierarchy_node_ids=[\"TEST\"],\n    )\n\n    # Check that search_issues was called with the right parameters\n    assert search_issues_mock.call_count == 2\n    args, kwargs = search_issues_mock.call_args_list[0]\n    assert kwargs[\"startAt\"] == 0\n    assert kwargs[\"maxResults\"] == PAGE_SIZE\n\n    args, kwargs = search_issues_mock.call_args_list[1]\n    assert kwargs[\"startAt\"] == 2\n    assert kwargs[\"maxResults\"] == PAGE_SIZE\n\n\ndef test_load_from_checkpoint_with_issue_processing_error(\n    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]\n) -> None:\n    \"\"\"Test loading from checkpoint with a mix of successful and failed issue processing across multiple batches\"\"\"\n    # Set up mocked issues for first batch\n    mock_issue1 = create_mock_issue(key=\"TEST-1\", summary=\"Issue 1\")\n    mock_issue2 = create_mock_issue(key=\"TEST-2\", summary=\"Issue 2\")\n    # Set up mocked issues for second batch\n    mock_issue3 = create_mock_issue(key=\"TEST-3\", summary=\"Issue 3\")\n    mock_issue4 = create_mock_issue(key=\"TEST-4\", summary=\"Issue 4\")\n\n    # Mock search_issues to return our mock issues in batches\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    search_issues_mock = cast(MagicMock, jira_client.search_issues)\n    search_issues_mock.side_effect = [\n        [mock_issue1, mock_issue2],  # First batch\n        [mock_issue3, mock_issue4],  # Second batch\n        [],  # Empty batch to indicate end\n    ]\n\n    # Mock process_jira_issue to succeed for some issues and fail for others\n    def mock_process_side_effect(\n        jira_base_url: str,  # noqa: ARG001\n        issue: Issue,\n        *args: Any,  # noqa: ARG001\n        **kwargs: Any,  # noqa: ARG001\n    ) -> Document | None:\n        if issue.key in [\"TEST-1\", \"TEST-3\"]:\n            return Document(\n                id=f\"https://jira.example.com/browse/{issue.key}\",\n                sections=[],\n                source=DocumentSource.JIRA,\n                semantic_identifier=f\"{issue.key}: {issue.fields.summary}\",\n                title=f\"{issue.key} {issue.fields.summary}\",\n                metadata={},\n            )\n        else:\n            raise Exception(f\"Processing error for {issue.key}\")\n\n    with patch(\"onyx.connectors.jira.connector.process_jira_issue\") as mock_process:\n        mock_process.side_effect = mock_process_side_effect\n\n        # Call load_from_checkpoint\n        end_time = time.time()\n        outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)\n\n        assert len(outputs) == 3\n\n        # Check first batch\n        first_batch = outputs[0]\n        assert len(first_batch.items) == 2\n        # First item should be successful\n        assert isinstance(first_batch.items[0], Document)\n        assert first_batch.items[0].id == \"https://jira.example.com/browse/TEST-1\"\n        # Second item should be a failure\n        assert isinstance(first_batch.items[1], ConnectorFailure)\n        assert first_batch.items[1].failed_document is not None\n        assert first_batch.items[1].failed_document.document_id == \"TEST-2\"\n        assert \"Failed to process Jira issue\" in first_batch.items[1].failure_message\n        # Check checkpoint indicates more items (full batch)\n        assert first_batch.next_checkpoint.has_more is True\n        assert first_batch.next_checkpoint.offset == 2\n\n        # Check second batch\n        second_batch = outputs[1]\n        assert len(second_batch.items) == 2\n        # First item should be successful\n        assert isinstance(second_batch.items[0], Document)\n        assert second_batch.items[0].id == \"https://jira.example.com/browse/TEST-3\"\n        # Second item should be a failure\n        assert isinstance(second_batch.items[1], ConnectorFailure)\n        assert second_batch.items[1].failed_document is not None\n        assert second_batch.items[1].failed_document.document_id == \"TEST-4\"\n        assert \"Failed to process Jira issue\" in second_batch.items[1].failure_message\n        # Check checkpoint indicates more items\n        assert second_batch.next_checkpoint.has_more is True\n        assert second_batch.next_checkpoint.offset == 4\n\n        # Check third, empty batch\n        third_batch = outputs[2]\n        assert len(third_batch.items) == 0\n        assert third_batch.next_checkpoint.has_more is False\n        assert third_batch.next_checkpoint.offset == 4\n\n\ndef test_load_from_checkpoint_with_skipped_issue(\n    jira_connector: JiraConnector, create_mock_issue: Callable[..., MagicMock]\n) -> None:\n    \"\"\"Test loading from checkpoint with an issue that should be skipped due to labels\"\"\"\n    LABEL_TO_SKIP = \"secret\"\n    jira_connector.labels_to_skip = {LABEL_TO_SKIP}\n\n    # Set up mocked issue with a label to skip\n    mock_issue = create_mock_issue(\n        key=\"TEST-1\", summary=\"Issue 1\", labels=[LABEL_TO_SKIP]\n    )\n\n    # Mock search_issues to return our mock issue\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    search_issues_mock = cast(MagicMock, jira_client.search_issues)\n    search_issues_mock.return_value = [mock_issue]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(jira_connector, 0, end_time)\n\n    assert len(outputs) == 1\n    checkpoint_output = outputs[0]\n    # Check that no documents were returned\n    assert len(checkpoint_output.items) == 0\n\n\ndef test_retrieve_all_slim_docs_perm_sync(\n    jira_connector: JiraConnector, create_mock_issue: Any\n) -> None:\n    \"\"\"Test retrieving all slim documents\"\"\"\n    # Set up mocked issues with proper project fields\n    mock_issue1 = create_mock_issue(key=\"TEST-1\", project_key=\"TEST\")\n    mock_issue2 = create_mock_issue(key=\"TEST-2\", project_key=\"TEST\")\n\n    # Mock search_issues to return our mock issues\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    search_issues_mock = cast(MagicMock, jira_client.search_issues)\n    search_issues_mock.return_value = [mock_issue1, mock_issue2]\n\n    # Call retrieve_all_slim_docs_perm_sync\n    batches = list(jira_connector.retrieve_all_slim_docs_perm_sync(0, 100))\n\n    # Check that a batch was returned (may include hierarchy nodes + slim docs)\n    assert len(batches) == 1\n    # Filter to just slim documents for checking\n    slim_docs = [item for item in batches[0] if isinstance(item, SlimDocument)]\n    assert len(slim_docs) == 2\n    assert slim_docs[0].id == \"https://jira.example.com/browse/TEST-1\"\n    assert slim_docs[1].id == \"https://jira.example.com/browse/TEST-2\"\n\n    # Check that search_issues was called\n    search_issues_mock.assert_called_once()\n\n\n@pytest.mark.parametrize(\n    \"status_code,expected_exception,expected_message\",\n    [\n        (\n            401,\n            CredentialExpiredError,\n            \"Jira credential appears to be expired or invalid\",\n        ),\n        (\n            403,\n            InsufficientPermissionsError,\n            \"Your Jira token does not have sufficient permissions\",\n        ),\n        (\n            # This test used to check for 404 project not found, but the jira validation logic for 404\n            # now returns an UnexpectedValidationError when no error text is provided.\n            # There's no point in passing the expected message and asserting it exists in the raised error\n            # If tested in the UI, wrong project key will still produce the expected error.\n            404,\n            UnexpectedValidationError,\n            \"Unexpected Jira error during validation\",\n        ),\n        (\n            429,\n            ConnectorValidationError,\n            \"Validation failed due to Jira rate-limits being exceeded\",\n        ),\n    ],\n)\ndef test_validate_connector_settings_errors(\n    jira_connector: JiraConnector,\n    status_code: int,\n    expected_exception: type[Exception],\n    expected_message: str,\n) -> None:\n    \"\"\"Test validation with various error scenarios\"\"\"\n    error = JIRAError(status_code=status_code)\n\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    project_mock = cast(MagicMock, jira_client.project)\n    project_mock.side_effect = error\n\n    with pytest.raises(expected_exception) as excinfo:\n        jira_connector.validate_connector_settings()\n    assert expected_message in str(excinfo.value)\n\n\ndef test_validate_connector_settings_with_project_success(\n    jira_connector: JiraConnector,\n) -> None:\n    \"\"\"Test successful validation with project specified\"\"\"\n    jira_client = cast(JIRA, jira_connector._jira_client)\n    project_mock = cast(MagicMock, jira_client.project)\n    project_mock.return_value = MagicMock()\n    jira_connector.validate_connector_settings()\n    project_mock.assert_called_once_with(jira_connector.jira_project)\n\n\ndef test_validate_connector_settings_without_project_success(\n    jira_base_url: str,\n) -> None:\n    \"\"\"Test successful validation without project specified\"\"\"\n    connector = JiraConnector(jira_base_url=jira_base_url)\n    connector._jira_client = MagicMock()\n    connector._jira_client.projects.return_value = [MagicMock()]\n\n    connector.validate_connector_settings()\n    connector._jira_client.projects.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/test_jira_error_handling.py",
    "content": "\"\"\"Tests for Jira connector error handling during indexing.\"\"\"\n\nimport time\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom jira import JIRA\nfrom jira import JIRAError\n\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.jira.connector import JiraConnector\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\n\n\n@pytest.fixture\ndef jira_connector_with_invalid_project(jira_base_url: str) -> JiraConnector:\n    \"\"\"Create a Jira connector with an invalid project key.\"\"\"\n    connector = JiraConnector(\n        jira_base_url=jira_base_url,\n        project_key=\"INVALID_PROJECT\",\n    )\n    mock_client = MagicMock(spec=JIRA)\n    mock_client._options = {\"rest_api_version\": \"2\"}\n    connector._jira_client = mock_client\n    return connector\n\n\ndef test_nonexistent_project_error_during_indexing(\n    jira_connector_with_invalid_project: JiraConnector,\n) -> None:\n    \"\"\"Test that a non-existent project error during indexing is properly handled.\"\"\"\n    # Create a JIRAError that mimics the error from the stack trace\n    error = JIRAError(\n        status_code=400,\n        text='{\"errorMessages\":[\"The value \\'INVALID_PROJECT\\' does not exist for the field \\'project\\'.\"],\"errors\":{}}',\n    )\n\n    # Mock search_issues to raise this error\n    jira_client = jira_connector_with_invalid_project._jira_client\n    assert jira_client is not None\n    jira_client.search_issues.side_effect = error  # type: ignore\n\n    # Attempt to load from checkpoint - should raise ConnectorValidationError\n    end_time = time.time()\n    with pytest.raises(ConnectorValidationError) as excinfo:\n        list(\n            load_everything_from_checkpoint_connector(\n                jira_connector_with_invalid_project, 0, end_time\n            )\n        )\n\n    # Verify the error message is user-friendly\n    error_message = str(excinfo.value)\n    assert \"does not exist\" in error_message or \"don't have access\" in error_message\n    assert \"INVALID_PROJECT\" in error_message or \"project\" in error_message.lower()\n\n\ndef test_invalid_jql_error_during_indexing(\n    jira_connector_with_invalid_project: JiraConnector,\n) -> None:\n    \"\"\"Test that an invalid JQL error during indexing is properly handled.\"\"\"\n    # Create a JIRAError for invalid JQL syntax\n    error = JIRAError(\n        status_code=400,\n        text='{\"errorMessages\":[\"Error in the JQL Query: Expecting \\')\\' before the end of the query.\"],\"errors\":{}}',\n    )\n\n    # Mock search_issues to raise this error\n    jira_client = jira_connector_with_invalid_project._jira_client\n    assert jira_client is not None\n    jira_client.search_issues.side_effect = error  # type: ignore\n\n    # Attempt to load from checkpoint - should raise ConnectorValidationError\n    end_time = time.time()\n    with pytest.raises(ConnectorValidationError) as excinfo:\n        list(\n            load_everything_from_checkpoint_connector(\n                jira_connector_with_invalid_project, 0, end_time\n            )\n        )\n\n    # Verify the error message mentions invalid JQL\n    error_message = str(excinfo.value)\n    assert \"Invalid JQL\" in error_message or \"JQL\" in error_message\n\n\ndef test_credential_expired_error_during_indexing(\n    jira_connector_with_invalid_project: JiraConnector,\n) -> None:\n    \"\"\"Test that expired credentials during indexing are properly handled.\"\"\"\n    # Create a JIRAError for expired credentials\n    error = JIRAError(status_code=401)\n\n    # Mock search_issues to raise this error\n    jira_client = jira_connector_with_invalid_project._jira_client\n    assert jira_client is not None\n    jira_client.search_issues.side_effect = error  # type: ignore\n\n    # Attempt to load from checkpoint - should raise CredentialExpiredError\n    end_time = time.time()\n    with pytest.raises(CredentialExpiredError) as excinfo:\n        list(\n            load_everything_from_checkpoint_connector(\n                jira_connector_with_invalid_project, 0, end_time\n            )\n        )\n\n    # Verify the error message mentions credentials\n    error_message = str(excinfo.value)\n    assert \"credential\" in error_message.lower() or \"401\" in error_message\n\n\ndef test_insufficient_permissions_error_during_indexing(\n    jira_connector_with_invalid_project: JiraConnector,\n) -> None:\n    \"\"\"Test that insufficient permissions during indexing are properly handled.\"\"\"\n    # Create a JIRAError for insufficient permissions\n    error = JIRAError(status_code=403)\n\n    # Mock search_issues to raise this error\n    jira_client = jira_connector_with_invalid_project._jira_client\n    assert jira_client is not None\n    jira_client.search_issues.side_effect = error  # type: ignore\n\n    # Attempt to load from checkpoint - should raise InsufficientPermissionsError\n    end_time = time.time()\n    with pytest.raises(InsufficientPermissionsError) as excinfo:\n        list(\n            load_everything_from_checkpoint_connector(\n                jira_connector_with_invalid_project, 0, end_time\n            )\n        )\n\n    # Verify the error message mentions permissions\n    error_message = str(excinfo.value)\n    assert \"permission\" in error_message.lower() or \"403\" in error_message\n\n\ndef test_cloud_nonexistent_project_error_during_indexing(\n    jira_base_url: str,\n) -> None:\n    \"\"\"Test that a non-existent project error for Jira Cloud is properly handled.\"\"\"\n    from requests.exceptions import HTTPError\n\n    # Create a cloud connector\n    connector = JiraConnector(\n        jira_base_url=jira_base_url,\n        project_key=\"INVALID_PROJECT\",\n    )\n    mock_client = MagicMock()\n    mock_client._options = {\"rest_api_version\": \"3\"}\n    connector._jira_client = mock_client\n\n    # Mock the session get method to return an error response\n    mock_response = MagicMock()\n    mock_response.status_code = 400\n    mock_response.json.return_value = {\n        \"errorMessages\": [\n            \"The value 'INVALID_PROJECT' does not exist for the field 'project'.\"\n        ],\n        \"errors\": {},\n    }\n\n    # Create a proper HTTPError with the response attached\n    http_error = HTTPError(\"400 Client Error: Bad Request\")\n    http_error.response = mock_response\n    mock_response.raise_for_status.side_effect = http_error\n\n    mock_session = MagicMock()\n    mock_session.get.return_value = mock_response\n    mock_client._session = mock_session\n    mock_client._get_url.return_value = (\n        \"https://api.atlassian.com/ex/jira/cloud-id/rest/api/3/search/jql\"\n    )\n\n    # Attempt to load from checkpoint - should raise ConnectorValidationError\n    end_time = time.time()\n    with pytest.raises(ConnectorValidationError) as excinfo:\n        list(load_everything_from_checkpoint_connector(connector, 0, end_time))\n\n    # Verify the error message is user-friendly\n    error_message = str(excinfo.value)\n    assert \"does not exist\" in error_message or \"don't have access\" in error_message\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/test_jira_large_ticket_handling.py",
    "content": "from collections.abc import Generator\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom jira.resources import Issue\nfrom pytest_mock import MockFixture\n\nfrom onyx.connectors.jira.connector import _perform_jql_search\nfrom onyx.connectors.jira.connector import process_jira_issue\n\n\n@pytest.fixture\ndef mock_jira_client() -> MagicMock:\n    return MagicMock()\n\n\n@pytest.fixture\ndef mock_issue_small() -> MagicMock:\n    issue = MagicMock(spec=Issue)\n    fields = MagicMock()\n    fields.description = \"Small description\"\n    fields.comment = MagicMock()\n    fields.comment.comments = [\n        MagicMock(body=\"Small comment 1\"),\n        MagicMock(body=\"Small comment 2\"),\n    ]\n    fields.reporter = MagicMock()\n    fields.reporter.displayName = \"John Doe\"\n    fields.reporter.emailAddress = \"john@example.com\"\n    fields.assignee = MagicMock()\n    fields.assignee.displayName = \"John Doe\"\n    fields.assignee.emailAddress = \"john@example.com\"\n    fields.summary = \"Small Issue\"\n    fields.updated = \"2023-01-01T00:00:00+0000\"\n    fields.labels = []\n\n    issue.fields = fields\n    issue.key = \"SMALL-1\"\n    return issue\n\n\n@pytest.fixture\ndef mock_issue_large() -> MagicMock:\n    issue = MagicMock(spec=Issue)\n    fields = MagicMock()\n    fields.description = \"a\" * 99_000\n    fields.comment = MagicMock()\n    fields.comment.comments = [\n        MagicMock(body=\"Large comment \" * 1000),\n        MagicMock(body=\"Another large comment \" * 1000),\n    ]\n    fields.reporter = MagicMock()\n    fields.reporter.displayName = \"Jane Doe\"\n    fields.reporter.emailAddress = \"jane@example.com\"\n    fields.assignee = MagicMock()\n    fields.assignee.displayName = \"Jane Doe\"\n    fields.assignee.emailAddress = \"jane@example.com\"\n    fields.summary = \"Large Issue\"\n    fields.updated = \"2023-01-02T00:00:00+0000\"\n    fields.labels = []\n\n    issue.fields = fields\n    issue.key = \"LARGE-1\"\n    return issue\n\n\n@pytest.fixture\ndef mock_jira_api_version() -> Generator[Any, Any, Any]:\n    with patch(\"onyx.connectors.jira.utils.JIRA_CLOUD_API_VERSION\", \"3\"):\n        with patch(\"onyx.connectors.jira.utils.JIRA_SERVER_API_VERSION\", \"2\"):\n            yield\n\n\n@pytest.fixture\ndef patched_environment(\n    mock_jira_api_version: MockFixture,  # noqa: ARG001\n) -> Generator[Any, Any, Any]:\n    yield\n\n\ndef test_fetch_jira_issues_batch_small_ticket(\n    mock_jira_client: MagicMock,\n    mock_issue_small: MagicMock,\n    patched_environment: MockFixture,  # noqa: ARG001\n) -> None:\n    mock_jira_client.search_issues.return_value = [mock_issue_small]\n\n    # First get the issues via pagination\n    issues = list(_perform_jql_search(mock_jira_client, \"project = TEST\", 0, 50))\n    assert len(issues) == 1\n\n    # Then process each issue\n    docs = [process_jira_issue(\"test.com\", issue) for issue in issues]\n    docs = [doc for doc in docs if doc is not None]  # Filter out None values\n\n    assert len(docs) == 1\n    doc = docs[0]\n    assert doc is not None  # Type assertion for mypy\n    assert doc.id.endswith(\"/SMALL-1\")\n    assert doc.sections[0].text is not None\n    assert \"Small description\" in doc.sections[0].text\n    assert \"Small comment 1\" in doc.sections[0].text\n    assert \"Small comment 2\" in doc.sections[0].text\n\n\ndef test_fetch_jira_issues_batch_large_ticket(\n    mock_jira_client: MagicMock,\n    mock_issue_large: MagicMock,\n    patched_environment: MockFixture,  # noqa: ARG001\n) -> None:\n    mock_jira_client.search_issues.return_value = [mock_issue_large]\n\n    # First get the issues via pagination\n    issues = list(_perform_jql_search(mock_jira_client, \"project = TEST\", 0, 50))\n    assert len(issues) == 1\n\n    # Then process each issue\n    docs = [process_jira_issue(\"test.com\", issue) for issue in issues]\n    docs = [doc for doc in docs if doc is not None]  # Filter out None values\n\n    assert len(docs) == 0  # The large ticket should be skipped\n\n\ndef test_fetch_jira_issues_batch_mixed_tickets(\n    mock_jira_client: MagicMock,\n    mock_issue_small: MagicMock,\n    mock_issue_large: MagicMock,\n    patched_environment: MockFixture,  # noqa: ARG001\n) -> None:\n    mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]\n\n    # First get the issues via pagination\n    issues = list(_perform_jql_search(mock_jira_client, \"project = TEST\", 0, 50))\n    assert len(issues) == 2\n\n    # Then process each issue\n    docs = [process_jira_issue(\"test.com\", issue) for issue in issues]\n    docs = [doc for doc in docs if doc is not None]  # Filter out None values\n\n    assert len(docs) == 1  # Only the small ticket should be included\n    doc = docs[0]\n    assert doc is not None  # Type assertion for mypy\n    assert doc.id.endswith(\"/SMALL-1\")\n\n\n@patch(\"onyx.connectors.jira.connector.JIRA_CONNECTOR_MAX_TICKET_SIZE\", 50)\ndef test_fetch_jira_issues_batch_custom_size_limit(\n    mock_jira_client: MagicMock,\n    mock_issue_small: MagicMock,\n    mock_issue_large: MagicMock,\n    patched_environment: MockFixture,  # noqa: ARG001\n) -> None:\n    mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]\n\n    # First get the issues via pagination\n    issues = list(_perform_jql_search(mock_jira_client, \"project = TEST\", 0, 50))\n    assert len(issues) == 2\n\n    # Then process each issue\n    docs = [process_jira_issue(\"test.com\", issue) for issue in issues]\n    docs = [doc for doc in docs if doc is not None]  # Filter out None values\n\n    assert len(docs) == 0  # Both tickets should be skipped due to the low size limit\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/jira/test_jira_permission_sync.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync\nfrom onyx.connectors.jira.connector import JiraConnector\nfrom onyx.connectors.jira.utils import JIRA_SERVER_API_VERSION\nfrom onyx.db.models import ConnectorCredentialPair\nfrom onyx.utils.sensitive import make_mock_sensitive_value\n\npytestmark = pytest.mark.usefixtures(\"enable_ee\")\n\n\n@pytest.fixture\ndef mock_jira_cc_pair(\n    jira_base_url: str,\n    project_key: str,\n    user_email: str,\n    mock_jira_api_token: str,\n) -> MagicMock:\n    mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)\n    mock_cc_pair.connector = MagicMock()\n    mock_cc_pair.credential.credential_json = make_mock_sensitive_value(\n        {\n            \"jira_user_email\": user_email,\n            \"jira_api_token\": mock_jira_api_token,\n        }\n    )\n    mock_cc_pair.connector.connector_specific_config = {\n        \"jira_base_url\": jira_base_url,\n        \"project_key\": project_key,\n    }\n    mock_cc_pair.connector.indexing_start = None\n\n    return mock_cc_pair\n\n\n@pytest.fixture\ndef mock_fetch_all_existing_docs_fn() -> MagicMock:\n    return MagicMock(return_value=[])\n\n\n@pytest.fixture\ndef mock_fetch_all_existing_docs_ids_fn() -> MagicMock:\n    return MagicMock(return_value=[])\n\n\ndef test_jira_permission_sync(\n    jira_connector: JiraConnector,\n    mock_jira_cc_pair: MagicMock,\n    mock_fetch_all_existing_docs_fn: MagicMock,\n    mock_fetch_all_existing_docs_ids_fn: MagicMock,\n) -> None:\n    with patch(\"onyx.connectors.jira.connector.build_jira_client\") as mock_build_client:\n        mock_build_client.return_value = jira_connector._jira_client\n        assert jira_connector._jira_client is not None\n        jira_connector._jira_client._options = MagicMock()\n        jira_connector._jira_client._options.return_value = {\n            \"rest_api_version\": JIRA_SERVER_API_VERSION\n        }\n\n        for doc in jira_doc_sync(\n            cc_pair=mock_jira_cc_pair,\n            fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,\n            fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,\n        ):\n            print(doc)\n\n\ndef test_jira_doc_sync_passes_indexing_start(\n    jira_connector: JiraConnector,\n    mock_jira_cc_pair: MagicMock,\n    mock_fetch_all_existing_docs_fn: MagicMock,\n    mock_fetch_all_existing_docs_ids_fn: MagicMock,\n) -> None:\n    \"\"\"Verify that generic_doc_sync derives indexing_start from cc_pair\n    and forwards it to retrieve_all_slim_docs_perm_sync.\"\"\"\n    indexing_start_dt = datetime(2025, 6, 1, tzinfo=timezone.utc)\n    mock_jira_cc_pair.connector.indexing_start = indexing_start_dt\n\n    with patch(\"onyx.connectors.jira.connector.build_jira_client\") as mock_build_client:\n        mock_build_client.return_value = jira_connector._jira_client\n        assert jira_connector._jira_client is not None\n        jira_connector._jira_client._options = MagicMock()\n        jira_connector._jira_client._options.return_value = {\n            \"rest_api_version\": JIRA_SERVER_API_VERSION\n        }\n\n        with patch.object(\n            type(jira_connector),\n            \"retrieve_all_slim_docs_perm_sync\",\n            return_value=iter([]),\n        ) as mock_retrieve:\n            list(\n                jira_doc_sync(\n                    cc_pair=mock_jira_cc_pair,\n                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,\n                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,\n                )\n            )\n\n            mock_retrieve.assert_called_once()\n            call_kwargs = mock_retrieve.call_args\n            assert call_kwargs.kwargs[\"start\"] == indexing_start_dt.timestamp()\n\n\ndef test_jira_doc_sync_passes_none_when_no_indexing_start(\n    jira_connector: JiraConnector,\n    mock_jira_cc_pair: MagicMock,\n    mock_fetch_all_existing_docs_fn: MagicMock,\n    mock_fetch_all_existing_docs_ids_fn: MagicMock,\n) -> None:\n    \"\"\"Verify that indexing_start is None when the connector has no indexing_start set.\"\"\"\n    mock_jira_cc_pair.connector.indexing_start = None\n\n    with patch(\"onyx.connectors.jira.connector.build_jira_client\") as mock_build_client:\n        mock_build_client.return_value = jira_connector._jira_client\n        assert jira_connector._jira_client is not None\n        jira_connector._jira_client._options = MagicMock()\n        jira_connector._jira_client._options.return_value = {\n            \"rest_api_version\": JIRA_SERVER_API_VERSION\n        }\n\n        with patch.object(\n            type(jira_connector),\n            \"retrieve_all_slim_docs_perm_sync\",\n            return_value=iter([]),\n        ) as mock_retrieve:\n            list(\n                jira_doc_sync(\n                    cc_pair=mock_jira_cc_pair,\n                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,\n                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,\n                )\n            )\n\n            mock_retrieve.assert_called_once()\n            call_kwargs = mock_retrieve.call_args\n            assert call_kwargs.kwargs[\"start\"] is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/mediawiki/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/connectors/mediawiki/test_mediawiki_family.py",
    "content": "from typing import Final\n\nimport pytest\nfrom pytest_mock import MockFixture\nfrom pywikibot.families.wikipedia_family import Family as WikipediaFamily  # type: ignore[import-untyped]\nfrom pywikibot.family import Family  # type: ignore[import-untyped]\n\nfrom onyx.connectors.mediawiki import family\n\n\n# Disabling these tests as they are flaky and rely on external wikis that are maintained by just fan communities\n\n\nNON_BUILTIN_WIKIS: Final[list[tuple[str, str]]] = [\n    (\"https://fallout.fandom.com\", \"falloutwiki\"),\n    (\"https://harrypotter.fandom.com/wiki/\", \"harrypotterwiki\"),\n    # (\"https://artofproblemsolving.com/wiki\", \"artofproblemsolving\"),  # FLAKY\n    (\"https://www.bogleheads.org/wiki/Main_Page\", \"bogleheadswiki\"),\n    (\"https://bogleheads.org/wiki/Main_Page\", \"bogleheadswiki\"),\n    (\"https://www.dandwiki.com/wiki/\", \"dungeonsanddragons\"),\n    (\"https://wiki.factorio.com/\", \"factoriowiki\"),\n]\n\n\n# TODO: Add support for more builtin family types from `pywikibot.families`.\n@pytest.mark.skip(reason=\"Temporarily skipped\")\n@pytest.mark.parametrize(\n    \"url, name, expected\",\n    [\n        (\n            \"https://en.wikipedia.org\",\n            \"wikipedia\",\n            WikipediaFamily,\n        ),  # Support urls with protocol\n        (\n            \"wikipedia.org\",\n            \"wikipedia\",\n            WikipediaFamily,\n        ),  # Support urls without subdomain\n        (\n            \"en.wikipedia.org\",\n            \"wikipedia\",\n            WikipediaFamily,\n        ),  # Support urls with subdomain\n        (\"m.wikipedia.org\", \"wikipedia\", WikipediaFamily),\n        (\"de.wikipedia.org\", \"wikipedia\", WikipediaFamily),\n    ],\n)\ndef test_family_class_dispatch_builtins(\n    url: str, name: str, expected: type[Family]\n) -> None:\n    \"\"\"Test that the family class dispatch function returns the correct family class in several scenarios.\"\"\"\n    assert family.family_class_dispatch(url, name) == expected\n\n\n@pytest.mark.skip(reason=\"Temporarily skipped\")\n@pytest.mark.parametrize(\"url, name\", NON_BUILTIN_WIKIS)\ndef test_family_class_dispatch_on_non_builtins_generates_new_class_fast(\n    url: str, name: str, mocker: MockFixture\n) -> None:\n    \"\"\"Test that using the family class dispatch function on an unknown url generates a new family class.\"\"\"\n    mock_generate_family_class = mocker.patch.object(family, \"generate_family_class\")\n    family.family_class_dispatch(url, name)\n    mock_generate_family_class.assert_called_once_with(url, name)\n\n\n@pytest.mark.skip(reason=\"Temporarily skipped\")\n@pytest.mark.slow\n@pytest.mark.parametrize(\"url, name\", NON_BUILTIN_WIKIS)\ndef test_family_class_dispatch_on_non_builtins_generates_new_class_slow(\n    url: str, name: str\n) -> None:\n    \"\"\"Test that using the family class dispatch function on an unknown url generates a new family class.\n\n    This test is slow because it actually performs the network calls to generate the family classes.\n    \"\"\"\n    generated_family_class = family.generate_family_class(url, name)\n    assert issubclass(generated_family_class, Family)\n    dispatch_family_class = family.family_class_dispatch(url, name)\n    assert dispatch_family_class == generated_family_class\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/mediawiki/test_wiki.py",
    "content": "from __future__ import annotations\n\nimport datetime\nimport tempfile\nfrom collections.abc import Iterable\n\nimport pytest\nimport pywikibot  # type: ignore[import-untyped]\nfrom pytest_mock import MockFixture\n\nfrom onyx.connectors.mediawiki import wiki\n\n# Some of these tests are disabled for now due to flakiness with wikipedia as the backend\n\npywikibot.config.base_dir = tempfile.TemporaryDirectory().name\n\n\n@pytest.fixture\ndef site() -> pywikibot.Site:\n    return pywikibot.Site(\"en\", \"wikipedia\")\n\n\ndef test_pywikibot_timestamp_to_utc_datetime() -> None:\n    timestamp_without_tzinfo = pywikibot.Timestamp(2023, 12, 27, 15, 38, 49)\n    timestamp_min_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.min)\n    timestamp_max_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.max)\n    assert timestamp_min_timezone.tzinfo == datetime.timezone.min\n    assert timestamp_max_timezone.tzinfo == datetime.timezone.max\n    for timestamp in [\n        timestamp_without_tzinfo,\n        timestamp_min_timezone,\n        timestamp_max_timezone,\n    ]:\n        dt = wiki.pywikibot_timestamp_to_utc_datetime(timestamp)\n        assert dt.tzinfo == datetime.timezone.utc\n\n\nclass MockPage(pywikibot.Page):\n    def __init__(\n        self, site: pywikibot.Site, title: str, _has_categories: bool = False\n    ) -> None:\n        super().__init__(site, title)\n        self._has_categories = _has_categories\n        self.header = \"This is a header\"\n        self._sections = [\"This is a section\", \"This is another section\"]\n\n    @property\n    def _sections_helper(self) -> list[str]:\n        return [\n            f\"== Section {i} ==\\n{section}\\n\"\n            for i, section in enumerate(self._sections)\n        ]\n\n    @property\n    def text(self) -> str:\n        text = self.header + \"\\n\"\n        for section in self._sections_helper:\n            text += section\n        return text\n\n    @property\n    def pageid(self) -> str:\n        return \"1\"\n\n    def full_url(self) -> str:\n        return \"Test URL\"\n\n    def categories(\n        self,\n        with_sort_key: bool = False,  # noqa: ARG002\n        total: int | None = None,  # noqa: ARG002\n        content: bool = False,  # noqa: ARG002\n    ) -> Iterable[pywikibot.Page]:\n        if not self._has_categories:\n            return []\n        return [\n            MockPage(self.site, \"Test Category1\"),\n            MockPage(self.site, \"Test Category2\"),\n        ]\n\n    @property\n    def latest_revision(self) -> pywikibot.page.Revision:\n        return pywikibot.page.Revision(\n            timestamp=pywikibot.Timestamp(2023, 12, 27, 15, 38, 49)\n        )\n\n\n@pytest.mark.skip(reason=\"Test disabled\")\ndef test_get_doc_from_page(site: pywikibot.Site) -> None:\n    test_page = MockPage(site, \"Test Page\", _has_categories=True)\n    doc = wiki.get_doc_from_page(test_page, site, wiki.DocumentSource.MEDIAWIKI)\n    assert doc.source == wiki.DocumentSource.MEDIAWIKI\n    assert doc.title == test_page.title()\n    assert doc.doc_updated_at == wiki.pywikibot_timestamp_to_utc_datetime(\n        test_page.latest_revision.timestamp\n    )\n    assert len(doc.sections) == 3\n    for section, expected_section in zip(\n        doc.sections, test_page._sections_helper + [test_page.header]\n    ):\n        assert (\n            section.text is not None\n            and section.text.strip() == expected_section.strip()\n        )  # Extra whitespace before/after is okay\n        assert section.link and section.link.startswith(test_page.full_url())\n    assert doc.semantic_identifier == test_page.title()\n    assert doc.metadata == {\n        \"categories\": [category.title() for category in test_page.categories()]\n    }\n    assert doc.id == f\"MEDIAWIKI_{test_page.pageid}_{test_page.full_url()}\"\n\n\n@pytest.mark.skip(reason=\"Test disabled\")\ndef test_mediawiki_connector_recurse_depth() -> None:\n    \"\"\"Test that the recurse_depth parameter is parsed correctly.\n\n    -1 should be parsed as `True` (for unbounded recursion)\n    0 or greater should be parsed as an integer\n    Negative values less than -1 should raise a ValueError\n\n    This is the specification dictated by the `pywikibot` library. We do not need to test behavior beyond this.\n    \"\"\"\n    hostname = \"wikipedia.org\"\n    categories: list[str] = []\n    pages = [\"Test Page\"]\n\n    # Recurse depth less than -1 raises ValueError\n    with pytest.raises(ValueError):\n        recurse_depth = -2\n        wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)\n\n    # Recurse depth of -1 gets parsed as `True`\n    recurse_depth = -1\n    connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)\n    assert connector.recurse_depth is True\n\n    # Recurse depth of 0 or greater gets parsed as an integer\n    recurse_depth = 0\n    connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth)\n    assert connector.recurse_depth == recurse_depth\n\n\n@pytest.mark.skip(reason=\"Test disabled\")\ndef test_load_from_state_calls_poll_source_with_nones(mocker: MockFixture) -> None:\n    connector = wiki.MediaWikiConnector(\"wikipedia.org\", [], [], 0, \"test\")\n    poll_source = mocker.patch.object(connector, \"poll_source\")\n    connector.load_from_state()\n    poll_source.assert_called_once_with(None, None)\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/notion/test_notion_datasource.py",
    "content": "\"\"\"Unit tests for Notion connector data source API migration.\n\nTests the new data source discovery + querying flow and the\ndata_source_id -> database_id parent resolution.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom requests.exceptions import HTTPError\n\nfrom onyx.connectors.notion.connector import NotionConnector\nfrom onyx.connectors.notion.connector import NotionDataSource\nfrom onyx.connectors.notion.connector import NotionPage\n\n\ndef _make_connector() -> NotionConnector:\n    connector = NotionConnector()\n    connector.load_credentials({\"notion_integration_token\": \"fake-token\"})\n    return connector\n\n\ndef _mock_response(json_data: dict, status_code: int = 200) -> MagicMock:\n    resp = MagicMock()\n    resp.json.return_value = json_data\n    resp.status_code = status_code\n    if status_code >= 400:\n        resp.raise_for_status.side_effect = HTTPError(\n            f\"HTTP {status_code}\", response=resp\n        )\n    else:\n        resp.raise_for_status.return_value = None\n    return resp\n\n\nclass TestFetchDataSourcesForDatabase:\n    def test_multi_source_database(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response(\n            {\n                \"object\": \"database\",\n                \"id\": \"db-1\",\n                \"data_sources\": [\n                    {\"id\": \"ds-1\", \"name\": \"Source A\"},\n                    {\"id\": \"ds-2\", \"name\": \"Source B\"},\n                ],\n            }\n        )\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.get\", return_value=resp\n        ):\n            result = connector._fetch_data_sources_for_database(\"db-1\")\n\n        assert result == [\n            NotionDataSource(id=\"ds-1\", name=\"Source A\"),\n            NotionDataSource(id=\"ds-2\", name=\"Source B\"),\n        ]\n\n    def test_single_source_database(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response(\n            {\n                \"object\": \"database\",\n                \"id\": \"db-1\",\n                \"data_sources\": [{\"id\": \"ds-1\", \"name\": \"Only Source\"}],\n            }\n        )\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.get\", return_value=resp\n        ):\n            result = connector._fetch_data_sources_for_database(\"db-1\")\n\n        assert result == [NotionDataSource(id=\"ds-1\", name=\"Only Source\")]\n\n    def test_404_returns_empty(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response({\"object\": \"error\"}, status_code=404)\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.get\", return_value=resp\n        ):\n            result = connector._fetch_data_sources_for_database(\"db-missing\")\n\n        assert result == []\n\n\nclass TestFetchDataSource:\n    def test_query_returns_pages(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response(\n            {\n                \"results\": [\n                    {\n                        \"object\": \"page\",\n                        \"id\": \"page-1\",\n                        \"properties\": {\"Name\": {\"type\": \"title\", \"title\": []}},\n                    }\n                ],\n                \"next_cursor\": None,\n            }\n        )\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.post\", return_value=resp\n        ):\n            result = connector._fetch_data_source(\"ds-1\")\n\n        assert len(result[\"results\"]) == 1\n        assert result[\"results\"][0][\"id\"] == \"page-1\"\n        assert result[\"next_cursor\"] is None\n\n    def test_404_returns_empty_results(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response({\"object\": \"error\"}, status_code=404)\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.post\", return_value=resp\n        ):\n            result = connector._fetch_data_source(\"ds-missing\")\n\n        assert result == {\"results\": [], \"next_cursor\": None}\n\n\nclass TestGetParentRawId:\n    def test_database_id_parent(self) -> None:\n        connector = _make_connector()\n        parent = {\"type\": \"database_id\", \"database_id\": \"db-1\"}\n        assert connector._get_parent_raw_id(parent) == \"db-1\"\n\n    def test_data_source_id_with_mapping(self) -> None:\n        connector = _make_connector()\n        connector._data_source_to_database_map[\"ds-1\"] = \"db-1\"\n        parent = {\"type\": \"data_source_id\", \"data_source_id\": \"ds-1\"}\n        assert connector._get_parent_raw_id(parent) == \"db-1\"\n\n    def test_data_source_id_without_mapping_falls_back(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        parent = {\"type\": \"data_source_id\", \"data_source_id\": \"ds-unknown\"}\n        assert connector._get_parent_raw_id(parent) == \"ws-1\"\n\n    def test_workspace_parent(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        parent = {\"type\": \"workspace\"}\n        assert connector._get_parent_raw_id(parent) == \"ws-1\"\n\n    def test_page_id_parent(self) -> None:\n        connector = _make_connector()\n        parent = {\"type\": \"page_id\", \"page_id\": \"page-1\"}\n        assert connector._get_parent_raw_id(parent) == \"page-1\"\n\n    def test_block_id_parent_with_mapping(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        connector._child_page_parent_map[\"inline-page-1\"] = \"containing-page-1\"\n        parent = {\"type\": \"block_id\"}\n        assert (\n            connector._get_parent_raw_id(parent, page_id=\"inline-page-1\")\n            == \"containing-page-1\"\n        )\n\n    def test_block_id_parent_without_mapping_falls_back(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        parent = {\"type\": \"block_id\"}\n        assert connector._get_parent_raw_id(parent, page_id=\"unknown-page\") == \"ws-1\"\n\n    def test_none_parent_defaults_to_workspace(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        assert connector._get_parent_raw_id(None) == \"ws-1\"\n\n\nclass TestReadPagesFromDatabaseMultiSource:\n    def test_queries_all_data_sources(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n\n        with (\n            patch.object(\n                connector,\n                \"_fetch_data_sources_for_database\",\n                return_value=[\n                    NotionDataSource(id=\"ds-1\", name=\"Source A\"),\n                    NotionDataSource(id=\"ds-2\", name=\"Source B\"),\n                ],\n            ),\n            patch.object(\n                connector,\n                \"_fetch_data_source\",\n                return_value={\"results\": [], \"next_cursor\": None},\n            ) as mock_fetch_ds,\n        ):\n            result = connector._read_pages_from_database(\"db-1\")\n\n        assert mock_fetch_ds.call_count == 2\n        mock_fetch_ds.assert_any_call(\"ds-1\", None)\n        mock_fetch_ds.assert_any_call(\"ds-2\", None)\n\n        assert connector._data_source_to_database_map[\"ds-1\"] == \"db-1\"\n        assert connector._data_source_to_database_map[\"ds-2\"] == \"db-1\"\n\n        assert result.blocks == []\n        assert result.child_page_ids == []\n        assert len(result.hierarchy_nodes) == 1\n        assert result.hierarchy_nodes[0].raw_node_id == \"db-1\"\n\n    def test_collects_pages_from_all_sources(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        connector.recursive_index_enabled = True\n\n        ds1_results = {\n            \"results\": [{\"object\": \"page\", \"id\": \"page-from-ds1\", \"properties\": {}}],\n            \"next_cursor\": None,\n        }\n        ds2_results = {\n            \"results\": [{\"object\": \"page\", \"id\": \"page-from-ds2\", \"properties\": {}}],\n            \"next_cursor\": None,\n        }\n\n        with (\n            patch.object(\n                connector,\n                \"_fetch_data_sources_for_database\",\n                return_value=[\n                    NotionDataSource(id=\"ds-1\", name=\"Source A\"),\n                    NotionDataSource(id=\"ds-2\", name=\"Source B\"),\n                ],\n            ),\n            patch.object(\n                connector,\n                \"_fetch_data_source\",\n                side_effect=[ds1_results, ds2_results],\n            ),\n        ):\n            result = connector._read_pages_from_database(\"db-1\")\n\n        assert \"page-from-ds1\" in result.child_page_ids\n        assert \"page-from-ds2\" in result.child_page_ids\n\n    def test_pagination_across_pages(self) -> None:\n        connector = _make_connector()\n        connector.workspace_id = \"ws-1\"\n        connector.recursive_index_enabled = True\n\n        page1 = {\n            \"results\": [{\"object\": \"page\", \"id\": \"page-1\", \"properties\": {}}],\n            \"next_cursor\": \"cursor-abc\",\n        }\n        page2 = {\n            \"results\": [{\"object\": \"page\", \"id\": \"page-2\", \"properties\": {}}],\n            \"next_cursor\": None,\n        }\n\n        with (\n            patch.object(\n                connector,\n                \"_fetch_data_sources_for_database\",\n                return_value=[NotionDataSource(id=\"ds-1\", name=\"Source A\")],\n            ),\n            patch.object(\n                connector,\n                \"_fetch_data_source\",\n                side_effect=[page1, page2],\n            ) as mock_fetch_ds,\n        ):\n            result = connector._read_pages_from_database(\"db-1\")\n\n        assert mock_fetch_ds.call_count == 2\n        mock_fetch_ds.assert_any_call(\"ds-1\", None)\n        mock_fetch_ds.assert_any_call(\"ds-1\", \"cursor-abc\")\n        assert \"page-1\" in result.child_page_ids\n        assert \"page-2\" in result.child_page_ids\n\n\nclass TestInTrashField:\n    def test_notion_page_accepts_in_trash(self) -> None:\n        page = NotionPage(\n            id=\"page-1\",\n            created_time=\"2026-01-01T00:00:00.000Z\",\n            last_edited_time=\"2026-01-01T00:00:00.000Z\",\n            in_trash=False,\n            properties={},\n            url=\"https://notion.so/page-1\",\n        )\n        assert page.in_trash is False\n\n    def test_notion_page_in_trash_true(self) -> None:\n        page = NotionPage(\n            id=\"page-1\",\n            created_time=\"2026-01-01T00:00:00.000Z\",\n            last_edited_time=\"2026-01-01T00:00:00.000Z\",\n            in_trash=True,\n            properties={},\n            url=\"https://notion.so/page-1\",\n        )\n        assert page.in_trash is True\n\n\nclass TestFetchDatabaseAsPage:\n    def test_handles_missing_properties(self) -> None:\n        connector = _make_connector()\n        resp = _mock_response(\n            {\n                \"object\": \"database\",\n                \"id\": \"db-1\",\n                \"created_time\": \"2026-01-01T00:00:00.000Z\",\n                \"last_edited_time\": \"2026-01-01T00:00:00.000Z\",\n                \"in_trash\": False,\n                \"url\": \"https://notion.so/db-1\",\n                \"title\": [{\"text\": {\"content\": \"My DB\"}, \"plain_text\": \"My DB\"}],\n                \"data_sources\": [{\"id\": \"ds-1\", \"name\": \"Source\"}],\n            }\n        )\n        with patch(\n            \"onyx.connectors.notion.connector.rl_requests.get\", return_value=resp\n        ):\n            page = connector._fetch_database_as_page(\"db-1\")\n\n        assert page.id == \"db-1\"\n        assert page.database_name == \"My DB\"\n        assert page.properties == {}\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/salesforce/test_salesforce_custom_config.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTest script for the new custom query configuration functionality in SalesforceConnector.\n\nThis demonstrates how to use the new custom_query_config parameter to specify\nexactly which fields and associations (child objects) to retrieve for each object type.\n\"\"\"\n\nimport json\nfrom typing import Any\n\nfrom onyx.connectors.salesforce.connector import _validate_custom_query_config\nfrom onyx.connectors.salesforce.connector import SalesforceConnector\nfrom onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\n\n\ndef test_custom_query_config() -> None:\n    \"\"\"Test the custom query configuration functionality.\"\"\"\n\n    # Example custom query configuration\n    # This specifies exactly which fields and associations to retrieve\n    custom_config = {\n        ACCOUNT_OBJECT_TYPE: {\n            \"fields\": [\"Id\", \"Name\", \"Industry\", \"CreatedDate\", MODIFIED_FIELD],\n            \"associations\": {\n                \"Contact\": [\"Id\", \"FirstName\", \"LastName\", \"Email\"],\n                \"Opportunity\": [\"Id\", \"Name\", \"StageName\", \"Amount\", \"CloseDate\"],\n            },\n        },\n        \"Lead\": {\n            \"fields\": [\"Id\", \"FirstName\", \"LastName\", \"Company\", \"Status\"],\n            \"associations\": {},  # No associations for Lead\n        },\n    }\n\n    # Create connector with custom configuration\n    connector = SalesforceConnector(\n        batch_size=50, custom_query_config=json.dumps(custom_config)\n    )\n\n    print(\"✅ SalesforceConnector created successfully with custom query config\")\n    print(f\"Parent object list: {connector.parent_object_list}\")\n    print(f\"Custom config keys: {list(custom_config.keys())}\")\n\n    # Test that the parent object list is derived from the custom config\n    assert connector.parent_object_list == [ACCOUNT_OBJECT_TYPE, \"Lead\"]\n    assert connector.custom_query_config == custom_config\n\n    print(\"✅ Basic validation passed\")\n\n\ndef test_traditional_config() -> None:\n    \"\"\"Test that the traditional requested_objects approach still works.\"\"\"\n\n    # Traditional approach\n    connector = SalesforceConnector(\n        batch_size=50, requested_objects=[ACCOUNT_OBJECT_TYPE, \"Contact\"]\n    )\n\n    print(\"✅ SalesforceConnector created successfully with traditional config\")\n    print(f\"Parent object list: {connector.parent_object_list}\")\n\n    # Test that it still works the old way\n    assert connector.parent_object_list == [ACCOUNT_OBJECT_TYPE, \"Contact\"]\n    assert connector.custom_query_config is None\n\n    print(\"✅ Traditional config validation passed\")\n\n\ndef test_validation() -> None:\n    \"\"\"Test that invalid configurations are rejected.\"\"\"\n\n    # Test invalid config structure\n    invalid_configs: list[Any] = [\n        # Invalid fields type\n        {ACCOUNT_OBJECT_TYPE: {\"fields\": \"invalid\"}},\n        # Invalid associations type\n        {ACCOUNT_OBJECT_TYPE: {\"associations\": \"invalid\"}},\n        # Nested invalid structure\n        {ACCOUNT_OBJECT_TYPE: {\"associations\": {\"Contact\": {\"fields\": \"invalid\"}}}},\n    ]\n\n    for i, invalid_config in enumerate(invalid_configs):\n        try:\n            _validate_custom_query_config(invalid_config)\n            assert False, f\"Should have raised ValueError for invalid_config[{i}]\"\n        except ValueError:\n            print(f\"✅ Correctly rejected invalid config {i}\")\n\n\nif __name__ == \"__main__\":\n    print(\"Testing SalesforceConnector custom query configuration...\")\n    print(\"=\" * 60)\n\n    test_custom_query_config()\n    print()\n\n    test_traditional_config()\n    print()\n\n    test_validation()\n    print()\n\n    print(\"=\" * 60)\n    print(\"🎉 All tests passed! The custom query configuration is working correctly.\")\n    print()\n    print(\"Example usage:\")\n    print(\n        \"\"\"\n# Custom configuration approach\ncustom_config = {\n    ACCOUNT_OBJECT_TYPE: {\n        \"fields\": [\"Id\", \"Name\", \"Industry\"],\n        \"associations\": {\n            \"Contact\": {\n                \"fields\": [\"Id\", \"FirstName\", \"LastName\", \"Email\"],\n                \"associations\": {}\n            }\n        }\n    }\n}\n\nconnector = SalesforceConnector(custom_query_config=custom_config)\n\n# Traditional approach (still works)\nconnector = SalesforceConnector(requested_objects=[ACCOUNT_OBJECT_TYPE, \"Contact\"])\n\"\"\"\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/salesforce/test_salesforce_sqlite.py",
    "content": "import csv\nimport json\nimport os\nimport shutil\nimport tempfile\nimport time\nfrom collections import defaultdict\nfrom datetime import datetime\nfrom datetime import timezone\nfrom pathlib import Path\nfrom typing import cast\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.salesforce.doc_conversion import _extract_section\nfrom onyx.connectors.salesforce.doc_conversion import ID_PREFIX\nfrom onyx.connectors.salesforce.onyx_salesforce import OnyxSalesforce\nfrom onyx.connectors.salesforce.salesforce_calls import _bulk_retrieve_from_salesforce\nfrom onyx.connectors.salesforce.salesforce_calls import _make_time_filter_for_sf_type\nfrom onyx.connectors.salesforce.salesforce_calls import _make_time_filtered_query\nfrom onyx.connectors.salesforce.salesforce_calls import get_object_by_id_query\nfrom onyx.connectors.salesforce.sqlite_functions import OnyxSalesforceSQLite\nfrom onyx.connectors.salesforce.utils import ACCOUNT_OBJECT_TYPE\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\nfrom onyx.connectors.salesforce.utils import USER_OBJECT_TYPE\nfrom onyx.utils.logger import setup_logger\n\n# from onyx.connectors.salesforce.onyx_salesforce_type import OnyxSalesforceType\n# from onyx.connectors.salesforce.salesforce_calls import get_children_of_sf_type\n\nlogger = setup_logger()\n\n\n_VALID_SALESFORCE_IDS = [\n    \"001bm00000fd9Z3AAI\",\n    \"001bm00000fdYTdAAM\",\n    \"001bm00000fdYTeAAM\",\n    \"001bm00000fdYTfAAM\",\n    \"001bm00000fdYTgAAM\",\n    \"001bm00000fdYThAAM\",\n    \"001bm00000fdYTiAAM\",\n    \"001bm00000fdYTjAAM\",\n    \"001bm00000fdYTkAAM\",\n    \"001bm00000fdYTlAAM\",\n    \"001bm00000fdYTmAAM\",\n    \"001bm00000fdYTnAAM\",\n    \"001bm00000fdYToAAM\",\n    \"500bm00000XoOxtAAF\",\n    \"500bm00000XoOxuAAF\",\n    \"500bm00000XoOxvAAF\",\n    \"500bm00000XoOxwAAF\",\n    \"500bm00000XoOxxAAF\",\n    \"500bm00000XoOxyAAF\",\n    \"500bm00000XoOxzAAF\",\n    \"500bm00000XoOy0AAF\",\n    \"500bm00000XoOy1AAF\",\n    \"500bm00000XoOy2AAF\",\n    \"500bm00000XoOy3AAF\",\n    \"500bm00000XoOy4AAF\",\n    \"500bm00000XoOy5AAF\",\n    \"500bm00000XoOy6AAF\",\n    \"500bm00000XoOy7AAF\",\n    \"500bm00000XoOy8AAF\",\n    \"500bm00000XoOy9AAF\",\n    \"500bm00000XoOyAAAV\",\n    \"500bm00000XoOyBAAV\",\n    \"500bm00000XoOyCAAV\",\n    \"500bm00000XoOyDAAV\",\n    \"500bm00000XoOyEAAV\",\n    \"500bm00000XoOyFAAV\",\n    \"500bm00000XoOyGAAV\",\n    \"500bm00000XoOyHAAV\",\n    \"500bm00000XoOyIAAV\",\n    \"003bm00000EjHCjAAN\",\n    \"003bm00000EjHCkAAN\",\n    \"003bm00000EjHClAAN\",\n    \"003bm00000EjHCmAAN\",\n    \"003bm00000EjHCnAAN\",\n    \"003bm00000EjHCoAAN\",\n    \"003bm00000EjHCpAAN\",\n    \"003bm00000EjHCqAAN\",\n    \"003bm00000EjHCrAAN\",\n    \"003bm00000EjHCsAAN\",\n    \"003bm00000EjHCtAAN\",\n    \"003bm00000EjHCuAAN\",\n    \"003bm00000EjHCvAAN\",\n    \"003bm00000EjHCwAAN\",\n    \"003bm00000EjHCxAAN\",\n    \"003bm00000EjHCyAAN\",\n    \"003bm00000EjHCzAAN\",\n    \"003bm00000EjHD0AAN\",\n    \"003bm00000EjHD1AAN\",\n    \"003bm00000EjHD2AAN\",\n    \"550bm00000EXc2tAAD\",\n    \"006bm000006kyDpAAI\",\n    \"006bm000006kyDqAAI\",\n    \"006bm000006kyDrAAI\",\n    \"006bm000006kyDsAAI\",\n    \"006bm000006kyDtAAI\",\n    \"006bm000006kyDuAAI\",\n    \"006bm000006kyDvAAI\",\n    \"006bm000006kyDwAAI\",\n    \"006bm000006kyDxAAI\",\n    \"006bm000006kyDyAAI\",\n    \"006bm000006kyDzAAI\",\n    \"006bm000006kyE0AAI\",\n    \"006bm000006kyE1AAI\",\n    \"006bm000006kyE2AAI\",\n    \"006bm000006kyE3AAI\",\n    \"006bm000006kyE4AAI\",\n    \"006bm000006kyE5AAI\",\n    \"006bm000006kyE6AAI\",\n    \"006bm000006kyE7AAI\",\n    \"006bm000006kyE8AAI\",\n    \"006bm000006kyE9AAI\",\n    \"006bm000006kyEAAAY\",\n    \"006bm000006kyEBAAY\",\n    \"006bm000006kyECAAY\",\n    \"006bm000006kyEDAAY\",\n    \"006bm000006kyEEAAY\",\n    \"006bm000006kyEFAAY\",\n    \"006bm000006kyEGAAY\",\n    \"006bm000006kyEHAAY\",\n    \"006bm000006kyEIAAY\",\n    \"006bm000006kyEJAAY\",\n    \"005bm000009zy0TAAQ\",\n    \"005bm000009zy25AAA\",\n    \"005bm000009zy26AAA\",\n    \"005bm000009zy28AAA\",\n    \"005bm000009zy29AAA\",\n    \"005bm000009zy2AAAQ\",\n    \"005bm000009zy2BAAQ\",\n]\n\n\ndef _clear_sf_db(directory: str) -> None:\n    \"\"\"\n    Clears the SF DB by deleting all files in the data directory.\n    \"\"\"\n    shutil.rmtree(directory, ignore_errors=True)\n\n\ndef _create_csv_file_and_update_db(\n    sf_db: OnyxSalesforceSQLite,\n    object_type: str,\n    records: list[dict],\n    filename: str = \"test_data.csv\",\n) -> None:\n    \"\"\"\n    Creates a CSV file for the given object type and records.\n\n    Args:\n        object_type: The Salesforce object type (e.g. ACCOUNT_OBJECT_TYPE, \"Contact\")\n        records: List of dictionaries containing the record data\n        filename: Name of the CSV file to create (default: test_data.csv)\n    \"\"\"\n    if not records:\n        return\n\n    # Get all unique fields from records\n    fields: set[str] = set()\n    for record in records:\n        fields.update(record.keys())\n    fields = set(sorted(list(fields)))  # Sort for consistent order\n\n    # Create CSV file\n    with tempfile.TemporaryDirectory() as directory:\n        csv_path = os.path.join(directory, filename)\n        with open(csv_path, \"w\", newline=\"\", encoding=\"utf-8\") as f:\n            writer = csv.DictWriter(f, fieldnames=fields)\n            writer.writeheader()\n            for record in records:\n                writer.writerow(record)\n\n        # Update the database with the CSV\n        sf_db.update_from_csv(object_type, csv_path)\n\n\ndef _create_csv_with_example_data(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Creates CSV files with example data, organized by object type.\n    \"\"\"\n    example_data: dict[str, list[dict]] = {\n        ACCOUNT_OBJECT_TYPE: [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Acme Inc.\",\n                \"BillingCity\": \"New York\",\n                \"Industry\": \"Technology\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Globex Corp\",\n                \"BillingCity\": \"Los Angeles\",\n                \"Industry\": \"Manufacturing\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Initech\",\n                \"BillingCity\": \"Austin\",\n                \"Industry\": \"Software\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[3],\n                \"Name\": \"TechCorp Solutions\",\n                \"BillingCity\": \"San Francisco\",\n                \"Industry\": \"Software\",\n                \"AnnualRevenue\": 5000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[4],\n                \"Name\": \"BioMed Research\",\n                \"BillingCity\": \"Boston\",\n                \"Industry\": \"Healthcare\",\n                \"AnnualRevenue\": 12000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[5],\n                \"Name\": \"Green Energy Co\",\n                \"BillingCity\": \"Portland\",\n                \"Industry\": \"Energy\",\n                \"AnnualRevenue\": 8000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[6],\n                \"Name\": \"DataFlow Analytics\",\n                \"BillingCity\": \"Seattle\",\n                \"Industry\": \"Technology\",\n                \"AnnualRevenue\": 3000000,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[7],\n                \"Name\": \"Cloud Nine Services\",\n                \"BillingCity\": \"Denver\",\n                \"Industry\": \"Cloud Computing\",\n                \"AnnualRevenue\": 7000000,\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"FirstName\": \"John\",\n                \"LastName\": \"Doe\",\n                \"Email\": \"john.doe@acme.com\",\n                \"Title\": \"CEO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[41],\n                \"FirstName\": \"Jane\",\n                \"LastName\": \"Smith\",\n                \"Email\": \"jane.smith@acme.com\",\n                \"Title\": \"CTO\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[42],\n                \"FirstName\": \"Bob\",\n                \"LastName\": \"Johnson\",\n                \"Email\": \"bob.j@globex.com\",\n                \"Title\": \"Sales Director\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[43],\n                \"FirstName\": \"Sarah\",\n                \"LastName\": \"Chen\",\n                \"Email\": \"sarah.chen@techcorp.com\",\n                \"Title\": \"Product Manager\",\n                \"Phone\": \"415-555-0101\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[44],\n                \"FirstName\": \"Michael\",\n                \"LastName\": \"Rodriguez\",\n                \"Email\": \"m.rodriguez@biomed.com\",\n                \"Title\": \"Research Director\",\n                \"Phone\": \"617-555-0202\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[45],\n                \"FirstName\": \"Emily\",\n                \"LastName\": \"Green\",\n                \"Email\": \"emily.g@greenenergy.com\",\n                \"Title\": \"Sustainability Lead\",\n                \"Phone\": \"503-555-0303\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[46],\n                \"FirstName\": \"David\",\n                \"LastName\": \"Kim\",\n                \"Email\": \"david.kim@dataflow.com\",\n                \"Title\": \"Data Scientist\",\n                \"Phone\": \"206-555-0404\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[47],\n                \"FirstName\": \"Rachel\",\n                \"LastName\": \"Taylor\",\n                \"Email\": \"r.taylor@cloudnine.com\",\n                \"Title\": \"Cloud Architect\",\n                \"Phone\": \"303-555-0505\",\n            },\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"Name\": \"Acme Server Upgrade\",\n                \"Amount\": 50000,\n                \"Stage\": \"Prospecting\",\n                \"CloseDate\": \"2024-06-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[63],\n                \"Name\": \"Globex Manufacturing Line\",\n                \"Amount\": 150000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-03-15\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[64],\n                \"Name\": \"Initech Software License\",\n                \"Amount\": 75000,\n                \"Stage\": \"Closed Won\",\n                \"CloseDate\": \"2024-01-30\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[65],\n                \"Name\": \"TechCorp AI Implementation\",\n                \"Amount\": 250000,\n                \"Stage\": \"Needs Analysis\",\n                \"CloseDate\": \"2024-08-15\",\n                \"Probability\": 60,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[66],\n                \"Name\": \"BioMed Lab Equipment\",\n                \"Amount\": 500000,\n                \"Stage\": \"Value Proposition\",\n                \"CloseDate\": \"2024-09-30\",\n                \"Probability\": 75,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[67],\n                \"Name\": \"Green Energy Solar Project\",\n                \"Amount\": 750000,\n                \"Stage\": \"Proposal\",\n                \"CloseDate\": \"2024-07-15\",\n                \"Probability\": 80,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[68],\n                \"Name\": \"DataFlow Analytics Platform\",\n                \"Amount\": 180000,\n                \"Stage\": \"Negotiation\",\n                \"CloseDate\": \"2024-05-30\",\n                \"Probability\": 90,\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[69],\n                \"Name\": \"Cloud Nine Infrastructure\",\n                \"Amount\": 300000,\n                \"Stage\": \"Qualification\",\n                \"CloseDate\": \"2024-10-15\",\n                \"Probability\": 40,\n            },\n        ],\n    }\n\n    # Create CSV files for each object type\n    for object_type, records in example_data.items():\n        _create_csv_file_and_update_db(sf_db, object_type, records)\n\n\ndef _test_query(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests querying functionality by verifying:\n    1. All expected Account IDs are found\n    2. Each Account's data matches what was inserted\n    \"\"\"\n    # Expected test data for verification\n    expected_accounts: dict[str, dict[str, str | int]] = {\n        _VALID_SALESFORCE_IDS[0]: {\n            \"Name\": \"Acme Inc.\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n        },\n        _VALID_SALESFORCE_IDS[1]: {\n            \"Name\": \"Globex Corp\",\n            \"BillingCity\": \"Los Angeles\",\n            \"Industry\": \"Manufacturing\",\n        },\n        _VALID_SALESFORCE_IDS[2]: {\n            \"Name\": \"Initech\",\n            \"BillingCity\": \"Austin\",\n            \"Industry\": \"Software\",\n        },\n        _VALID_SALESFORCE_IDS[3]: {\n            \"Name\": \"TechCorp Solutions\",\n            \"BillingCity\": \"San Francisco\",\n            \"Industry\": \"Software\",\n            \"AnnualRevenue\": 5000000,\n        },\n        _VALID_SALESFORCE_IDS[4]: {\n            \"Name\": \"BioMed Research\",\n            \"BillingCity\": \"Boston\",\n            \"Industry\": \"Healthcare\",\n            \"AnnualRevenue\": 12000000,\n        },\n        _VALID_SALESFORCE_IDS[5]: {\n            \"Name\": \"Green Energy Co\",\n            \"BillingCity\": \"Portland\",\n            \"Industry\": \"Energy\",\n            \"AnnualRevenue\": 8000000,\n        },\n        _VALID_SALESFORCE_IDS[6]: {\n            \"Name\": \"DataFlow Analytics\",\n            \"BillingCity\": \"Seattle\",\n            \"Industry\": \"Technology\",\n            \"AnnualRevenue\": 3000000,\n        },\n        _VALID_SALESFORCE_IDS[7]: {\n            \"Name\": \"Cloud Nine Services\",\n            \"BillingCity\": \"Denver\",\n            \"Industry\": \"Cloud Computing\",\n            \"AnnualRevenue\": 7000000,\n        },\n    }\n\n    # Get all Account IDs\n    account_ids = sf_db.find_ids_by_type(ACCOUNT_OBJECT_TYPE)\n\n    # Verify we found all expected accounts\n    assert len(account_ids) == len(\n        expected_accounts\n    ), f\"Expected {len(expected_accounts)} accounts, found {len(account_ids)}\"\n    assert set(account_ids) == set(\n        expected_accounts.keys()\n    ), \"Found account IDs don't match expected IDs\"\n\n    # Verify each account's data\n    for acc_id in account_ids:\n        combined = sf_db.get_record(acc_id)\n        assert combined is not None, f\"Could not find account {acc_id}\"\n\n        expected = expected_accounts[acc_id]\n\n        # Verify account data matches\n        for key, value in expected.items():\n            value = str(value)\n            assert (\n                combined.data[key] == value\n            ), f\"Account {acc_id} field {key} expected {value}, got {combined.data[key]}\"\n\n    print(\"All query tests passed successfully!\")\n\n\ndef _test_upsert(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests upsert functionality by:\n    1. Updating an existing account\n    2. Creating a new account\n    3. Verifying both operations were successful\n    \"\"\"\n    # Create CSV for updating an existing account and adding a new one\n    update_data: list[dict[str, str | int]] = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[0],\n            \"Name\": \"Acme Inc. Updated\",\n            \"BillingCity\": \"New York\",\n            \"Industry\": \"Technology\",\n            \"Description\": \"Updated company info\",\n        },\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[2],\n            \"Name\": \"New Company Inc.\",\n            \"BillingCity\": \"Miami\",\n            \"Industry\": \"Finance\",\n            \"AnnualRevenue\": 1000000,\n        },\n    ]\n\n    _create_csv_file_and_update_db(\n        sf_db, ACCOUNT_OBJECT_TYPE, update_data, \"update_data.csv\"\n    )\n\n    # Verify the update worked\n    updated_record = sf_db.get_record(_VALID_SALESFORCE_IDS[0])\n    assert updated_record is not None, \"Updated record not found\"\n    assert updated_record.data[\"Name\"] == \"Acme Inc. Updated\", \"Name not updated\"\n    assert (\n        updated_record.data[\"Description\"] == \"Updated company info\"\n    ), \"Description not added\"\n\n    # Verify the new record was created\n    new_record = sf_db.get_record(_VALID_SALESFORCE_IDS[2])\n    assert new_record is not None, \"New record not found\"\n    assert new_record.data[\"Name\"] == \"New Company Inc.\", \"New record name incorrect\"\n    assert new_record.data[\"AnnualRevenue\"] == \"1000000\", \"New record revenue incorrect\"\n\n    print(\"All upsert tests passed successfully!\")\n\n\ndef _test_relationships(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests relationship shelf updates and queries by:\n    1. Creating test data with relationships\n    2. Verifying the relationships are correctly stored\n    3. Testing relationship queries\n    \"\"\"\n    # Create test data for each object type\n    test_data: dict[str, list[dict[str, str | int]]] = {\n        \"Case\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[13],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[14],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Subject\": \"Test Case 2\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[48],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Test\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n        \"Opportunity\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[62],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Test Opportunity\",\n                \"Amount\": 100000,\n            }\n        ],\n    }\n\n    # Create and update CSV files for each object type\n    for object_type, records in test_data.items():\n        _create_csv_file_and_update_db(\n            sf_db, object_type, records, \"relationship_test.csv\"\n        )\n\n    # Test relationship queries\n    # All these objects should be children of Acme Inc.\n    child_ids = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert len(child_ids) == 4, f\"Expected 4 child objects, found {len(child_ids)}\"\n    assert _VALID_SALESFORCE_IDS[13] in child_ids, \"Case 1 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[14] in child_ids, \"Case 2 not found in relationship\"\n    assert _VALID_SALESFORCE_IDS[48] in child_ids, \"Contact not found in relationship\"\n    assert (\n        _VALID_SALESFORCE_IDS[62] in child_ids\n    ), \"Opportunity not found in relationship\"\n\n    # Test querying relationships for a different account (should be empty)\n    other_account_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert (\n        len(other_account_children) == 0\n    ), \"Expected no children for different account\"\n\n    print(\"All relationship tests passed successfully!\")\n\n\ndef _test_account_with_children(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests querying all accounts and retrieving their child objects.\n    This test verifies that:\n    1. All accounts can be retrieved\n    2. Child objects are correctly linked\n    3. Child object data is complete and accurate\n    \"\"\"\n    # First get all account IDs\n    account_ids = sf_db.find_ids_by_type(ACCOUNT_OBJECT_TYPE)\n    assert len(account_ids) > 0, \"No accounts found\"\n\n    # For each account, get its children and verify the data\n    for account_id in account_ids:\n        account = sf_db.get_record(account_id)\n        assert account is not None, f\"Could not find account {account_id}\"\n\n        # Get all child objects\n        child_ids = sf_db.get_child_ids(account_id)\n\n        # For Acme Inc., verify specific relationships\n        if account_id == _VALID_SALESFORCE_IDS[0]:  # Acme Inc.\n            assert (\n                len(child_ids) == 4\n            ), f\"Expected 4 children for Acme Inc., found {len(child_ids)}\"\n\n            # Get all child records\n            child_records = []\n            for child_id in child_ids:\n                child_record = sf_db.get_record(child_id)\n                if child_record is not None:\n                    child_records.append(child_record)\n            # Verify Cases\n            cases = [r for r in child_records if r.type == \"Case\"]\n            assert (\n                len(cases) == 2\n            ), f\"Expected 2 cases for Acme Inc., found {len(cases)}\"\n            case_subjects = {case.data[\"Subject\"] for case in cases}\n            assert \"Test Case 1\" in case_subjects, \"Test Case 1 not found\"\n            assert \"Test Case 2\" in case_subjects, \"Test Case 2 not found\"\n\n            # Verify Contacts\n            contacts = [r for r in child_records if r.type == \"Contact\"]\n            assert (\n                len(contacts) == 1\n            ), f\"Expected 1 contact for Acme Inc., found {len(contacts)}\"\n            contact = contacts[0]\n            assert contact.data[\"FirstName\"] == \"Test\", \"Contact first name mismatch\"\n            assert contact.data[\"LastName\"] == \"Contact\", \"Contact last name mismatch\"\n\n            # Verify Opportunities\n            opportunities = [r for r in child_records if r.type == \"Opportunity\"]\n            assert (\n                len(opportunities) == 1\n            ), f\"Expected 1 opportunity for Acme Inc., found {len(opportunities)}\"\n            opportunity = opportunities[0]\n            assert (\n                opportunity.data[\"Name\"] == \"Test Opportunity\"\n            ), \"Opportunity name mismatch\"\n            assert opportunity.data[\"Amount\"] == \"100000\", \"Opportunity amount mismatch\"\n\n    print(\"All account with children tests passed successfully!\")\n\n\ndef _test_relationship_updates(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests that relationships are properly updated when a child object's parent reference changes.\n    This test verifies:\n    1. Initial relationship is created correctly\n    2. When parent reference is updated, old relationship is removed\n    3. New relationship is created correctly\n    \"\"\"\n    # Create initial test data - Contact linked to Acme Inc.\n    initial_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[0],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    _create_csv_file_and_update_db(\n        sf_db, \"Contact\", initial_contact, \"initial_contact.csv\"\n    )\n\n    # Verify initial relationship\n    acme_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] in acme_children\n    ), \"Initial relationship not created\"\n\n    # Update contact to be linked to Globex Corp instead\n    updated_contact = [\n        {\n            \"Id\": _VALID_SALESFORCE_IDS[40],\n            \"AccountId\": _VALID_SALESFORCE_IDS[1],\n            \"FirstName\": \"Test\",\n            \"LastName\": \"Contact\",\n        }\n    ]\n    _create_csv_file_and_update_db(\n        sf_db, \"Contact\", updated_contact, \"updated_contact.csv\"\n    )\n\n    # Verify old relationship is removed\n    acme_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[0])\n    assert (\n        _VALID_SALESFORCE_IDS[40] not in acme_children\n    ), \"Old relationship not removed\"\n\n    # Verify new relationship is created\n    globex_children = sf_db.get_child_ids(_VALID_SALESFORCE_IDS[1])\n    assert _VALID_SALESFORCE_IDS[40] in globex_children, \"New relationship not created\"\n\n    print(\"All relationship update tests passed successfully!\")\n\n\ndef _test_get_affected_parent_ids(sf_db: OnyxSalesforceSQLite) -> None:\n    \"\"\"\n    Tests get_affected_parent_ids functionality by verifying:\n    1. IDs that are directly in the parent_types list are included\n    2. IDs that have children in the updated_ids list are included\n    3. IDs that are neither of the above are not included\n    \"\"\"\n    # Create test data with relationships\n    test_data = {\n        ACCOUNT_OBJECT_TYPE: [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[0],\n                \"Name\": \"Parent Account 1\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[1],\n                \"Name\": \"Parent Account 2\",\n            },\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[2],\n                \"Name\": \"Not Affected Account\",\n            },\n        ],\n        \"Contact\": [\n            {\n                \"Id\": _VALID_SALESFORCE_IDS[40],\n                \"AccountId\": _VALID_SALESFORCE_IDS[0],\n                \"FirstName\": \"Child\",\n                \"LastName\": \"Contact\",\n            }\n        ],\n    }\n\n    # Create and update CSV files for test data\n    for object_type, records in test_data.items():\n        _create_csv_file_and_update_db(sf_db, object_type, records)\n\n    # Test Case 1: Account directly in updated_ids and parent_types\n    updated_ids = [_VALID_SALESFORCE_IDS[1]]  # Parent Account 2\n    parent_types = set([ACCOUNT_OBJECT_TYPE])\n    affected_ids_by_type = defaultdict(set)\n    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(\n        updated_ids, parent_types\n    ):\n        affected_ids_by_type[parent_type].add(parent_id)\n    assert (\n        ACCOUNT_OBJECT_TYPE in affected_ids_by_type\n    ), \"Account type not in affected_ids_by_type\"\n    assert (\n        _VALID_SALESFORCE_IDS[1] in affected_ids_by_type[ACCOUNT_OBJECT_TYPE]\n    ), \"Direct parent ID not included\"\n\n    # Test Case 2: Account with child in updated_ids\n    updated_ids = [_VALID_SALESFORCE_IDS[40]]  # Child Contact\n    parent_types = set([ACCOUNT_OBJECT_TYPE])\n    affected_ids_by_type = defaultdict(set)\n    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(\n        updated_ids, parent_types\n    ):\n        affected_ids_by_type[parent_type].add(parent_id)\n    assert (\n        ACCOUNT_OBJECT_TYPE in affected_ids_by_type\n    ), \"Account type not in affected_ids_by_type\"\n    assert (\n        _VALID_SALESFORCE_IDS[0] in affected_ids_by_type[ACCOUNT_OBJECT_TYPE]\n    ), \"Parent of updated child not included\"\n\n    # Test Case 3: Both direct and indirect affects\n    updated_ids = [_VALID_SALESFORCE_IDS[1], _VALID_SALESFORCE_IDS[40]]  # Both cases\n    parent_types = set([ACCOUNT_OBJECT_TYPE])\n    affected_ids_by_type = defaultdict(set)\n    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(\n        updated_ids, parent_types\n    ):\n        affected_ids_by_type[parent_type].add(parent_id)\n    assert (\n        ACCOUNT_OBJECT_TYPE in affected_ids_by_type\n    ), \"Account type not in affected_ids_by_type\"\n    affected_ids = affected_ids_by_type[ACCOUNT_OBJECT_TYPE]\n    assert len(affected_ids) == 2, \"Expected exactly two affected parent IDs\"\n    assert _VALID_SALESFORCE_IDS[0] in affected_ids, \"Parent of child not included\"\n    assert _VALID_SALESFORCE_IDS[1] in affected_ids, \"Direct parent ID not included\"\n    assert (\n        _VALID_SALESFORCE_IDS[2] not in affected_ids\n    ), \"Unaffected ID incorrectly included\"\n\n    # Test Case 4: No matches\n    updated_ids = [_VALID_SALESFORCE_IDS[40]]  # Child Contact\n    parent_types = set([\"Opportunity\"])  # Wrong type\n    affected_ids_by_type = defaultdict(set)\n    for parent_type, parent_id, _ in sf_db.get_changed_parent_ids_by_type(\n        updated_ids, parent_types\n    ):\n        affected_ids_by_type[parent_type].add(parent_id)\n    assert len(affected_ids_by_type) == 0, \"Should return empty dict when no matches\"\n\n    print(\"All get_affected_parent_ids tests passed successfully!\")\n\n\ndef test_salesforce_sqlite() -> None:\n    with tempfile.TemporaryDirectory() as directory:\n        _clear_sf_db(directory)\n\n        filename = os.path.join(directory, \"salesforce_db.sqlite\")\n        sf_db = OnyxSalesforceSQLite(filename)\n        sf_db.connect()\n        sf_db.apply_schema()\n\n        _create_csv_with_example_data(sf_db)\n\n        _test_query(sf_db)\n\n        _test_upsert(sf_db)\n\n        _test_relationships(sf_db)\n\n        _test_account_with_children(sf_db)\n\n        _test_relationship_updates(sf_db)\n\n        _test_get_affected_parent_ids(sf_db)\n\n        sf_db.close()\n\n        _clear_sf_db(directory)\n\n\n@pytest.mark.skip(reason=\"Enable when credentials are available\")\ndef test_salesforce_bulk_retrieve() -> None:\n\n    username = os.environ[\"SF_USERNAME\"]\n    password = os.environ[\"SF_PASSWORD\"]\n    security_token = os.environ[\"SF_SECURITY_TOKEN\"]\n\n    sf_client = OnyxSalesforce(\n        username=username,\n        password=password,\n        security_token=security_token,\n        domain=None,\n    )\n\n    # onyx_sf_type = OnyxSalesforceType(\"Contact\", sf_client)\n    sf_object_name = \"Contact\"\n    queryable_fields = sf_client.get_queryable_fields_by_type(sf_object_name)\n\n    intermediate_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)\n    time_filter = _make_time_filter_for_sf_type(\n        queryable_fields, 0, intermediate_time.timestamp()\n    )\n    assert time_filter\n\n    query = _make_time_filtered_query(queryable_fields, sf_object_name, time_filter)\n\n    with tempfile.TemporaryDirectory() as temp_dir:\n        object_type, csv_paths = _bulk_retrieve_from_salesforce(\n            sf_object_name, query, temp_dir, sf_client\n        )\n\n        assert csv_paths\n\n        # Count rows in the downloaded CSV(s)\n        total_data_rows = 0\n        csv_files_found = []\n        for filename in os.listdir(temp_dir):\n            # Ensure we only process files ending with .csv and belonging to the correct object type\n            # The filename format is expected to be \"ObjectType.some_random_id.csv\"\n            if filename.endswith(\".csv\") and filename.startswith(f\"{object_type}.\"):\n                filepath = os.path.join(temp_dir, filename)\n                csv_files_found.append(filepath)\n                try:\n                    with open(filepath, \"r\", encoding=\"utf-8\") as f:\n                        reader = csv.reader(f)\n                        try:\n                            next(reader)  # Attempt to skip header\n                            # Count data rows\n                            num_data_rows = sum(1 for _ in reader)\n                            logger.info(\n                                f\"Counted {num_data_rows} data rows in {filename}\"\n                            )\n                            total_data_rows += num_data_rows\n                        except StopIteration:\n                            # Handle empty file or file with only header\n                            logger.info(\n                                f\"File {filename} is empty or contains only a header.\"\n                            )\n                except Exception as e:\n                    logger.error(f\"Error reading or counting rows in {filename}: {e}\")\n\n        logger.info(\n            f\"Found {len(csv_files_found)} CSV files for {object_type} in {temp_dir}.\"\n        )\n        logger.info(\n            f\"Total data rows across all CSVs for {object_type}: {total_data_rows}\"\n        )\n\n        assert total_data_rows > 1100 and total_data_rows < 1200\n\n\n# def test_salesforce_client_sobjects():\n\n#     username = os.environ[\"SF_USERNAME\"]\n#     password = os.environ[\"SF_PASSWORD\"]\n#     security_token = os.environ[\"SF_SECURITY_TOKEN\"]\n\n#     sf_client = Salesforce(\n#         username=username,\n#         password=password,\n#         security_token=security_token,\n#         domain=None,\n#     )\n\n#     # does exist\n#     record = sf_client.restful(\"sobjects/005bm000002bBHtAAM\")\n\n#     # does exist\n#     record = sf_client.sobjects.get(\"005bm000002bBHtAAM\")\n\n#     # doesn't exist\n#     record = sf_client.sobjects.get(\"01234567890ABCDEFG\")\n\n\ndef test_normalize_record() -> None:\n    \"\"\"Test normalize record\"\"\"\n\n    expected_str = (\n        '{\"Id\": \"001bm00000eu6n5AAA\", '\n        '\"LastModifiedDate\": \"2024-12-24T18:18:29.000Z\", '\n        '\"BillingStreet\": \"123 Nowhere Parkway\", '\n        '\"CreatedDate\": \"2024-12-24T18:18:29.000Z\", '\n        '\"IsDeleted\": \"false\", '\n        '\"SystemModstamp\": \"2024-12-24T18:18:29.000Z\", '\n        '\"Name\": \"Some Company\", '\n        '\"LastModifiedById\": \"005bm000002bBHtAAM\", '\n        '\"PhotoUrl\": \"/services/images/photo/001bm00000eu6n5AAA\", '\n        '\"BillingCity\": \"Some Town\", '\n        '\"CleanStatus\": \"Pending\"}'\n    )\n    current_dir = Path(__file__).parent\n    with open(current_dir / \"test_account.csv\", \"r\", newline=\"\", encoding=\"utf-8\") as f:\n        reader = csv.DictReader(f)\n        for row in reader:\n            assert len(row) == 64\n\n            normalized_record, parent_ids = OnyxSalesforceSQLite.normalize_record(row)\n            normalized_record_json_str = json.dumps(normalized_record)\n            assert normalized_record_json_str == expected_str\n            assert \"005bm000002bBHtAAM\" in parent_ids\n            assert len(parent_ids) == 1\n\n\ndef _get_child_records_by_id_query(\n    object_id: str,\n    sf_type: str,\n    child_relationships: list[str],\n    relationships_to_fields: dict[str, set[str]],\n) -> str:\n    \"\"\"Returns a SOQL query given the object id, type and child relationships.\n\n    When the query is executed, it comes back as result.records[0][child_relationship(s)]\n    \"\"\"\n\n    SUBQUERY_LIMIT = 10\n\n    query = \"SELECT \"\n    for child_relationship in child_relationships:\n        # TODO(rkuo): what happens if there is a very large list of child records?\n        # is that possible problem?\n\n        # NOTE: we actually have to list out the subqueries we want.\n        # We can't use the following shortcuts:\n        #   FIELDS(ALL) can include binary fields, so don't use that\n        #   FIELDS(CUSTOM) can include aggregate queries, so don't use that\n        fields = relationships_to_fields[child_relationship]\n        fields_fragment = \",\".join(fields)\n        query += f\"(SELECT {fields_fragment} FROM {child_relationship} LIMIT {SUBQUERY_LIMIT}), \"\n\n    query = query.rstrip(\", \")\n    query += f\" FROM {sf_type} WHERE Id = '{object_id}'\"\n    return query\n\n\n# TODO: move these to daily connector tests\n@pytest.mark.skip(reason=\"Enable when credentials are available\")\ndef test_salesforce_connector_single() -> None:\n    \"\"\"Test various manipulations of a single record\"\"\"\n\n    # this record has some opportunity child records\n    parent_id = \"001bm00000BXfhEAAT\"\n    parent_type = ACCOUNT_OBJECT_TYPE\n    parent_types = [parent_type]\n\n    username = os.environ[\"SF_USERNAME\"]\n    password = os.environ[\"SF_PASSWORD\"]\n    security_token = os.environ[\"SF_SECURITY_TOKEN\"]\n\n    sf_client = OnyxSalesforce(\n        username=username,\n        password=password,\n        security_token=security_token,\n        domain=None,\n    )\n\n    # onyx_parent_sf_type = OnyxSalesforceType(parent_type, sf_client)\n\n    child_types: set[str] = set()\n    parent_to_child_types: dict[str, set[str]] = {}  # map from parent to child types\n    parent_to_child_relationships: dict[str, set[str]] = (\n        {}\n    )  # map from parent to child relationships\n    child_to_parent_types: dict[str, set[str]] = (\n        {}\n    )  # reverse map from child to parent types\n    child_relationship_to_queryable_fields: dict[str, set[str]] = {}\n\n    # parent_reference_fields_by_type: dict[str, dict[str, list[str]]] = {}\n\n    # Step 1 - make a list of all the types to download (parent + direct child + USER_OBJECT_TYPE)\n    logger.info(f\"Parent object types: num={len(parent_types)} list={parent_types}\")\n    for parent_type_working in parent_types:\n        child_types_working = sf_client.get_children_of_sf_type(parent_type_working)\n        logger.debug(f\"Found {len(child_types)} child types for {parent_type_working}\")\n\n        for child_type, child_relationship in child_types_working.items():\n            # onyx_sf_type = OnyxSalesforceType(child_type, sf_client)\n\n            # map parent to child type\n            if parent_type_working not in parent_to_child_types:\n                parent_to_child_types[parent_type_working] = set()\n            parent_to_child_types[parent_type_working].add(child_type)\n\n            # map parent to child relationship\n            if parent_type_working not in parent_to_child_relationships:\n                parent_to_child_relationships[parent_type_working] = set()\n            parent_to_child_relationships[parent_type_working].add(child_relationship)\n\n            # reverse map child to parent\n            if child_relationship not in child_to_parent_types:\n                child_to_parent_types[child_type] = set()\n            child_to_parent_types[child_type].add(parent_type_working)\n\n            child_relationship_to_queryable_fields[child_relationship] = (\n                sf_client.get_queryable_fields_by_type(child_type)\n            )\n\n        child_types.update(list(child_types_working.keys()))\n        logger.info(\n            f\"Child object types: parent={parent_type_working} num={len(child_types_working)} list={child_types_working.keys()}\"\n        )\n\n    # queryable_fields_attachment = _get_all_queryable_fields_of_sf_type(sf_client, \"Attachment\")\n    # queryable_fields_contact_point_email = _get_all_queryable_fields_of_sf_type(sf_client, \"ContactPointEmail\")\n\n    # queryable_str = \",\".join(queryable_fields_contact_point_email)\n    sections: list[TextSection] = []\n\n    queryable_fields = sf_client.get_queryable_fields_by_type(parent_type)\n    query = get_object_by_id_query(parent_id, parent_type, queryable_fields)\n    result = sf_client.query(query)\n    records = result[\"records\"]\n    record = records[0]\n    assert record[\"attributes\"][\"type\"] == ACCOUNT_OBJECT_TYPE\n    parent_last_modified_date = record.get(MODIFIED_FIELD, \"\")\n    parent_semantic_identifier = record.get(\"Name\", \"Unknown Object\")\n    parent_last_modified_by_id = record.get(\"LastModifiedById\")\n\n    normalized_record, _ = OnyxSalesforceSQLite.normalize_record(record)\n    parent_text_section = _extract_section(\n        normalized_record, f\"https://{sf_client.sf_instance}/{parent_id}\"\n    )\n    sections.append(parent_text_section)\n\n    time_start = time.monotonic()\n\n    # hardcoded testing with just one parent id\n    MAX_CHILD_TYPES_IN_QUERY = 20\n    child_relationships: list[str] = list(parent_to_child_relationships[parent_type])\n\n    # relationship_status - the child object types added to this dict have been queried\n    relationship_status: dict[str, bool] = {}\n\n    child_relationships_batch = []\n    for child_relationship in child_relationships:\n        # this is binary content, skip it\n        if child_relationship == \"Attachments\":\n            continue\n\n        child_relationships_batch.append(child_relationship)\n        if len(child_relationships_batch) < MAX_CHILD_TYPES_IN_QUERY:\n            continue\n\n        query = _get_child_records_by_id_query(\n            parent_id,\n            parent_type,\n            child_relationships_batch,\n            child_relationship_to_queryable_fields,\n        )\n        print(f\"{query=}\")\n\n        # sf_type = parent_type\n        # query = (\n        #     f\"SELECT \"\n        #     f\"Id, \"\n        #     f\"(SELECT OwnerId,CreatedDate,Id,Name,BestTimeToContactStartTime,ActiveToDate,\"\n        #     f\"EmailLatestBounceReasonText,CreatedById,LastModifiedDate,LastModifiedById,\"\n        #     f\"PreferenceRank,EmailDomain,BestTimeToContactEndTime,SystemModstamp,EmailMailBox,\"\n        #     f\"LastReferencedDate,UsageType,ActiveFromDate,ParentId,LastViewedDate,IsPrimary,\"\n        #     f\"EmailAddress,EmailLatestBounceDateTime,IsDeleted,BestTimeToContactTimezone \"\n        #     f\"FROM ContactPointEmails LIMIT 10) \"\n        #     f\"FROM {sf_type} WHERE Id = '{parent_id}'\"\n        # )\n\n        # NOTE: Querying STANDARD and CUSTOM when there are no custom fields results in an\n        # non-descriptive error (only root aggregation)\n        # sf_type = parent_type\n        # query = (\n        #     f\"SELECT \"\n        #     f\"Id, \"\n        #     f\"(SELECT FIELDS(STANDARD) FROM ContactPointEmails LIMIT 10) \"\n        #     f\"FROM {sf_type} WHERE Id = '{parent_id}'\"\n        # )\n\n        # query = (\n        #     f\"SELECT \"\n        #     f\"{sf_type}.Id \"\n        #     f\"FROM {sf_type} WHERE Id = '{parent_id}'\"\n        # )\n\n        try:\n            result = sf_client.query(query)\n            print(f\"{result=}\")\n        except Exception:\n            logger.exception(f\"Query failed: {query=}\")\n            for child_relationship in child_relationships_batch:\n                relationship_status[child_relationship] = False\n        else:\n            for child_record_key, child_record in result[\"records\"][0].items():\n                if child_record_key == \"attributes\":\n                    continue\n\n                if child_record:\n                    child_text_section = _extract_section(\n                        child_record,\n                        f\"https://{sf_client.sf_instance}/{child_record_key}\",\n                    )\n                    sections.append(child_text_section)\n                    relationship_status[child_record_key] = False\n                else:\n                    relationship_status[child_record_key] = False\n        finally:\n            child_relationships_batch.clear()\n\n    if len(child_relationships_batch) > 0:\n        query = _get_child_records_by_id_query(\n            parent_id,\n            parent_types[0],\n            child_relationships_batch,\n            child_relationship_to_queryable_fields,\n        )\n        print(f\"{query=}\")\n\n        try:\n            result = sf_client.query(query)\n            print(f\"{result=}\")\n        except Exception:\n            logger.exception(f\"Query failed: {query=}\")\n            for child_relationship in child_relationships_batch:\n                relationship_status[child_relationship] = False\n        else:\n            for child_record_key, child_record in result[\"records\"][0].items():\n                if child_record_key == \"attributes\":\n                    continue\n\n                if child_record:\n                    child_text_section = _extract_section(\n                        child_record,\n                        f\"https://{sf_client.sf_instance}/{child_record_key}\",\n                    )\n                    sections.append(child_text_section)\n                    relationship_status[child_record_key] = False\n                else:\n                    relationship_status[child_record_key] = False\n        finally:\n            child_relationships_batch.clear()\n\n    # get user relationship if present\n    primary_owner_list = None\n    if parent_last_modified_by_id:\n        queryable_user_fields = sf_client.get_queryable_fields_by_type(USER_OBJECT_TYPE)\n        query = get_object_by_id_query(\n            parent_last_modified_by_id, USER_OBJECT_TYPE, queryable_user_fields\n        )\n        result = sf_client.query(query)\n        user_record = result[\"records\"][0]\n        expert_info = BasicExpertInfo(\n            first_name=user_record.get(\"FirstName\"),\n            last_name=user_record.get(\"LastName\"),\n            email=user_record.get(\"Email\"),\n            display_name=user_record.get(\"Name\"),\n        )\n\n        if (\n            expert_info.first_name\n            or expert_info.last_name\n            or expert_info.email\n            or expert_info.display_name\n        ):\n            primary_owner_list = [expert_info]\n\n    doc = Document(\n        id=ID_PREFIX + parent_id,\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.SALESFORCE,\n        semantic_identifier=parent_semantic_identifier,\n        doc_updated_at=time_str_to_utc(parent_last_modified_date),\n        primary_owners=primary_owner_list,\n        metadata={},\n    )\n\n    assert doc is not None\n\n    time_elapsed = time.monotonic() - time_start\n    print(f\"elapsed={time_elapsed:.2f}\")\n\n    print(f\"{relationship_status=}\")\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/salesforce/test_yield_doc_batches.py",
    "content": "\"\"\"Unit tests for _yield_doc_batches and metadata type conversion in SalesforceConnector.\"\"\"\n\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.salesforce.connector import _convert_to_metadata_value\nfrom onyx.connectors.salesforce.connector import SalesforceConnector\nfrom onyx.connectors.salesforce.utils import ID_FIELD\nfrom onyx.connectors.salesforce.utils import MODIFIED_FIELD\nfrom onyx.connectors.salesforce.utils import NAME_FIELD\nfrom onyx.connectors.salesforce.utils import SalesforceObject\n\n\nclass TestConvertToMetadataValue:\n    \"\"\"Tests for the _convert_to_metadata_value helper function.\"\"\"\n\n    def test_string_value(self) -> None:\n        \"\"\"String values should be returned as-is.\"\"\"\n        assert _convert_to_metadata_value(\"hello\") == \"hello\"\n        assert _convert_to_metadata_value(\"\") == \"\"\n\n    def test_boolean_true(self) -> None:\n        \"\"\"Boolean True should be converted to string 'True'.\"\"\"\n        assert _convert_to_metadata_value(True) == \"True\"\n\n    def test_boolean_false(self) -> None:\n        \"\"\"Boolean False should be converted to string 'False'.\"\"\"\n        assert _convert_to_metadata_value(False) == \"False\"\n\n    def test_integer_value(self) -> None:\n        \"\"\"Integer values should be converted to string.\"\"\"\n        assert _convert_to_metadata_value(42) == \"42\"\n        assert _convert_to_metadata_value(0) == \"0\"\n        assert _convert_to_metadata_value(-100) == \"-100\"\n\n    def test_float_value(self) -> None:\n        \"\"\"Float values should be converted to string.\"\"\"\n        assert _convert_to_metadata_value(3.14) == \"3.14\"\n        assert _convert_to_metadata_value(0.0) == \"0.0\"\n        assert _convert_to_metadata_value(-2.5) == \"-2.5\"\n\n    def test_list_of_strings(self) -> None:\n        \"\"\"List of strings should remain as list of strings.\"\"\"\n        result = _convert_to_metadata_value([\"a\", \"b\", \"c\"])\n        assert result == [\"a\", \"b\", \"c\"]\n\n    def test_list_of_mixed_types(self) -> None:\n        \"\"\"List with mixed types should have all items converted to strings.\"\"\"\n        result = _convert_to_metadata_value([1, True, 3.14, \"text\"])\n        assert result == [\"1\", \"True\", \"3.14\", \"text\"]\n\n    def test_empty_list(self) -> None:\n        \"\"\"Empty list should return empty list.\"\"\"\n        assert _convert_to_metadata_value([]) == []\n\n\nclass TestYieldDocBatches:\n    \"\"\"Tests for the _yield_doc_batches method of SalesforceConnector.\"\"\"\n\n    @pytest.fixture\n    def connector(self) -> SalesforceConnector:\n        \"\"\"Create a SalesforceConnector instance with mocked sf_client.\"\"\"\n        connector = SalesforceConnector(\n            batch_size=10,\n            requested_objects=[\"Opportunity\"],\n        )\n        # Mock the sf_client property\n        mock_sf_client = MagicMock()\n        mock_sf_client.sf_instance = \"test.salesforce.com\"\n        connector._sf_client = mock_sf_client\n        return connector\n\n    @pytest.fixture\n    def mock_sf_db(self) -> MagicMock:\n        \"\"\"Create a mock OnyxSalesforceSQLite object.\"\"\"\n        return MagicMock()\n\n    def _create_salesforce_object(\n        self,\n        object_id: str,\n        object_type: str,\n        data: dict[str, Any],\n    ) -> SalesforceObject:\n        \"\"\"Helper to create a SalesforceObject with required fields.\"\"\"\n        # Ensure required fields are present\n        data.setdefault(ID_FIELD, object_id)\n        data.setdefault(MODIFIED_FIELD, \"2024-01-15T10:30:00.000Z\")\n        data.setdefault(NAME_FIELD, f\"Test {object_type}\")\n        return SalesforceObject(id=object_id, type=object_type, data=data)\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_metadata_type_conversion_for_opportunity(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test that Opportunity metadata fields are properly type-converted.\"\"\"\n        parent_id = \"006bm000006kyDpAAI\"\n        parent_type = \"Opportunity\"\n\n        # Create a parent object with various data types in the fields\n        parent_data = {\n            ID_FIELD: parent_id,\n            NAME_FIELD: \"Test Opportunity\",\n            MODIFIED_FIELD: \"2024-01-15T10:30:00.000Z\",\n            \"Account\": \"Acme Corp\",  # string - should become \"account\" metadata\n            \"FiscalQuarter\": 2,  # int - should be converted to \"2\"\n            \"FiscalYear\": 2024,  # int - should be converted to \"2024\"\n            \"IsClosed\": False,  # bool - should be converted to \"False\"\n            \"StageName\": \"Prospecting\",  # string\n            \"Type\": \"New Business\",  # string\n            \"Amount\": 50000.50,  # float - should be converted to \"50000.50\"\n            \"CloseDate\": \"2024-06-30\",  # string\n            \"Probability\": 75,  # int - should be converted to \"75\"\n            \"CreatedDate\": \"2024-01-01T00:00:00.000Z\",  # string\n        }\n        parent_object = self._create_salesforce_object(\n            parent_id, parent_type, parent_data\n        )\n\n        # Setup mock sf_db\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, parent_id, 1)]\n        )\n        mock_sf_db.get_record.return_value = parent_object\n        mock_sf_db.file_size = 1024\n\n        # Create a mock document that convert_sf_object_to_doc will return\n        mock_doc = Document(\n            id=f\"SALESFORCE_{parent_id}\",\n            sections=[],\n            source=DocumentSource.SALESFORCE,\n            semantic_identifier=\"Test Opportunity\",\n            metadata={},\n        )\n        mock_convert.return_value = mock_doc\n\n        # Track parent changes\n        parents_changed = 0\n\n        def increment() -> None:\n            nonlocal parents_changed\n            parents_changed += 1\n\n        # Call _yield_doc_batches\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {parent_id: parent_type}\n        parent_types = {parent_type}\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                increment,\n            )\n        )\n\n        # Verify we got one batch with one document\n        assert len(batches) == 1\n        docs = batches[0]\n        assert len(docs) == 1\n\n        doc = docs[0]\n        assert isinstance(doc, Document)\n\n        # Verify metadata type conversions\n        # All values should be strings (or list of strings)\n        assert doc.metadata[\"object_type\"] == \"Opportunity\"\n        assert doc.metadata[\"account\"] == \"Acme Corp\"  # string stays string\n        assert doc.metadata[\"fiscal_quarter\"] == \"2\"  # int -> str\n        assert doc.metadata[\"fiscal_year\"] == \"2024\"  # int -> str\n        assert doc.metadata[\"is_closed\"] == \"False\"  # bool -> str\n        assert doc.metadata[\"stage_name\"] == \"Prospecting\"  # string stays string\n        assert doc.metadata[\"type\"] == \"New Business\"  # string stays string\n        assert (\n            doc.metadata[\"amount\"] == \"50000.5\"\n        )  # float -> str (Python drops trailing zeros)\n        assert doc.metadata[\"close_date\"] == \"2024-06-30\"  # string stays string\n        assert doc.metadata[\"probability\"] == \"75\"  # int -> str\n        assert doc.metadata[\"name\"] == \"Test Opportunity\"  # NAME_FIELD\n\n        # Verify parent was counted\n        assert parents_changed == 1\n        assert type_to_processed[parent_type] == 1\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_missing_optional_metadata_fields(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test that missing optional metadata fields are not added.\"\"\"\n        parent_id = \"006bm000006kyDqAAI\"\n        parent_type = \"Opportunity\"\n\n        # Create parent object with only some fields\n        parent_data = {\n            ID_FIELD: parent_id,\n            NAME_FIELD: \"Minimal Opportunity\",\n            MODIFIED_FIELD: \"2024-01-15T10:30:00.000Z\",\n            \"StageName\": \"Closed Won\",\n            # Notably missing: Amount, Probability, FiscalQuarter, etc.\n        }\n        parent_object = self._create_salesforce_object(\n            parent_id, parent_type, parent_data\n        )\n\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, parent_id, 1)]\n        )\n        mock_sf_db.get_record.return_value = parent_object\n        mock_sf_db.file_size = 1024\n\n        mock_doc = Document(\n            id=f\"SALESFORCE_{parent_id}\",\n            sections=[],\n            source=DocumentSource.SALESFORCE,\n            semantic_identifier=\"Minimal Opportunity\",\n            metadata={},\n        )\n        mock_convert.return_value = mock_doc\n\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {parent_id: parent_type}\n        parent_types = {parent_type}\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                lambda: None,\n            )\n        )\n\n        doc = batches[0][0]\n        assert isinstance(doc, Document)\n\n        # Only present fields should be in metadata\n        assert \"stage_name\" in doc.metadata\n        assert doc.metadata[\"stage_name\"] == \"Closed Won\"\n        assert \"name\" in doc.metadata\n        assert doc.metadata[\"name\"] == \"Minimal Opportunity\"\n\n        # Missing fields should not be in metadata\n        assert \"amount\" not in doc.metadata\n        assert \"probability\" not in doc.metadata\n        assert \"fiscal_quarter\" not in doc.metadata\n        assert \"fiscal_year\" not in doc.metadata\n        assert \"is_closed\" not in doc.metadata\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_contact_metadata_fields(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test metadata conversion for Contact object type.\"\"\"\n        parent_id = \"003bm00000EjHCjAAN\"\n        parent_type = \"Contact\"\n\n        parent_data = {\n            ID_FIELD: parent_id,\n            NAME_FIELD: \"John Doe\",\n            MODIFIED_FIELD: \"2024-02-20T14:00:00.000Z\",\n            \"Account\": \"Globex Corp\",\n            \"CreatedDate\": \"2024-01-01T00:00:00.000Z\",\n        }\n        parent_object = self._create_salesforce_object(\n            parent_id, parent_type, parent_data\n        )\n\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, parent_id, 1)]\n        )\n        mock_sf_db.get_record.return_value = parent_object\n        mock_sf_db.file_size = 1024\n\n        mock_doc = Document(\n            id=f\"SALESFORCE_{parent_id}\",\n            sections=[],\n            source=DocumentSource.SALESFORCE,\n            semantic_identifier=\"John Doe\",\n            metadata={},\n        )\n        mock_convert.return_value = mock_doc\n\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {parent_id: parent_type}\n        parent_types = {parent_type}\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                lambda: None,\n            )\n        )\n\n        doc = batches[0][0]\n        assert isinstance(doc, Document)\n\n        # Verify Contact-specific metadata\n        assert doc.metadata[\"object_type\"] == \"Contact\"\n        assert doc.metadata[\"account\"] == \"Globex Corp\"\n        assert doc.metadata[\"created_date\"] == \"2024-01-01T00:00:00.000Z\"\n        assert doc.metadata[\"last_modified_date\"] == \"2024-02-20T14:00:00.000Z\"\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_no_default_attributes_for_unknown_type(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test that unknown object types only get object_type metadata.\"\"\"\n        parent_id = \"001bm00000fd9Z3AAI\"\n        parent_type = \"CustomObject__c\"\n\n        parent_data = {\n            ID_FIELD: parent_id,\n            NAME_FIELD: \"Custom Record\",\n            MODIFIED_FIELD: \"2024-03-01T08:00:00.000Z\",\n            \"CustomField__c\": \"custom value\",\n            \"NumberField__c\": 123,\n        }\n        parent_object = self._create_salesforce_object(\n            parent_id, parent_type, parent_data\n        )\n\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, parent_id, 1)]\n        )\n        mock_sf_db.get_record.return_value = parent_object\n        mock_sf_db.file_size = 1024\n\n        mock_doc = Document(\n            id=f\"SALESFORCE_{parent_id}\",\n            sections=[],\n            source=DocumentSource.SALESFORCE,\n            semantic_identifier=\"Custom Record\",\n            metadata={},\n        )\n        mock_convert.return_value = mock_doc\n\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {parent_id: parent_type}\n        parent_types = {parent_type}\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                lambda: None,\n            )\n        )\n\n        doc = batches[0][0]\n        assert isinstance(doc, Document)\n\n        # Only object_type should be set for unknown types\n        assert doc.metadata[\"object_type\"] == \"CustomObject__c\"\n        # Custom fields should NOT be in metadata (not in _DEFAULT_ATTRIBUTES_TO_KEEP)\n        assert \"CustomField__c\" not in doc.metadata\n        assert \"NumberField__c\" not in doc.metadata\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_skips_missing_parent_objects(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test that missing parent objects are skipped gracefully.\"\"\"\n        parent_id = \"006bm000006kyDrAAI\"\n        parent_type = \"Opportunity\"\n\n        # get_record returns None for missing object\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, parent_id, 1)]\n        )\n        mock_sf_db.get_record.return_value = None\n        mock_sf_db.file_size = 1024\n\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {parent_id: parent_type}\n        parent_types = {parent_type}\n\n        parents_changed = 0\n\n        def increment() -> None:\n            nonlocal parents_changed\n            parents_changed += 1\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                increment,\n            )\n        )\n\n        # Should yield one empty batch\n        assert len(batches) == 1\n        assert len(batches[0]) == 0\n\n        # convert_sf_object_to_doc should not have been called\n        mock_convert.assert_not_called()\n\n        # Parents changed should still be 0\n        assert parents_changed == 0\n\n    @patch(\"onyx.connectors.salesforce.connector.convert_sf_object_to_doc\")\n    def test_multiple_documents_batching(\n        self,\n        mock_convert: MagicMock,\n        connector: SalesforceConnector,\n        mock_sf_db: MagicMock,\n    ) -> None:\n        \"\"\"Test that multiple documents are correctly batched.\"\"\"\n        # Create 3 parent objects\n        parent_ids = [\n            \"006bm000006kyDsAAI\",\n            \"006bm000006kyDtAAI\",\n            \"006bm000006kyDuAAI\",\n        ]\n        parent_type = \"Opportunity\"\n\n        parent_objects = [\n            self._create_salesforce_object(\n                pid,\n                parent_type,\n                {\n                    ID_FIELD: pid,\n                    NAME_FIELD: f\"Opportunity {i}\",\n                    MODIFIED_FIELD: \"2024-01-15T10:30:00.000Z\",\n                    \"IsClosed\": i % 2 == 0,  # alternating bool values\n                    \"Amount\": 1000.0 * (i + 1),\n                },\n            )\n            for i, pid in enumerate(parent_ids)\n        ]\n\n        # Setup mock to return all three\n        mock_sf_db.get_changed_parent_ids_by_type.return_value = iter(\n            [(parent_type, pid, i + 1) for i, pid in enumerate(parent_ids)]\n        )\n        mock_sf_db.get_record.side_effect = parent_objects\n        mock_sf_db.file_size = 1024\n\n        # Create mock documents\n        mock_docs = [\n            Document(\n                id=f\"SALESFORCE_{pid}\",\n                sections=[],\n                source=DocumentSource.SALESFORCE,\n                semantic_identifier=f\"Opportunity {i}\",\n                metadata={},\n            )\n            for i, pid in enumerate(parent_ids)\n        ]\n        mock_convert.side_effect = mock_docs\n\n        type_to_processed: dict[str, int] = {}\n        changed_ids_to_type = {pid: parent_type for pid in parent_ids}\n        parent_types = {parent_type}\n\n        batches = list(\n            connector._yield_doc_batches(\n                mock_sf_db,\n                type_to_processed,\n                changed_ids_to_type,\n                parent_types,\n                lambda: None,\n            )\n        )\n\n        # With batch_size=10, all 3 docs should be in one batch\n        assert len(batches) == 1\n        assert len(batches[0]) == 3\n\n        # Verify each document has correct metadata\n        for i, doc in enumerate(batches[0]):\n            assert isinstance(doc, Document)\n            assert doc.metadata[\"object_type\"] == \"Opportunity\"\n            assert doc.metadata[\"is_closed\"] == str(i % 2 == 0)\n            assert doc.metadata[\"amount\"] == str(1000.0 * (i + 1))\n\n        assert type_to_processed[parent_type] == 3\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_delta_checkpointing.py",
    "content": "\"\"\"Tests for per-page delta checkpointing in the SharePoint connector (P1-1).\n\nValidates that:\n- Delta drives process one page per _load_from_checkpoint call\n- Checkpoints persist the delta next_link for resumption\n- Crash + resume skips already-processed pages\n- BFS (folder-scoped) drives process all items in one call\n- 410 Gone triggers a full-resync URL in the checkpoint\n- Duplicate document IDs across delta pages are deduplicated\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom collections import deque\nfrom collections.abc import Generator\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.sharepoint.connector import DriveItemData\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.connectors.sharepoint.connector import SharepointConnectorCheckpoint\nfrom onyx.connectors.sharepoint.connector import SiteDescriptor\n\n# ---------------------------------------------------------------------------\n# Constants\n# ---------------------------------------------------------------------------\n\nSITE_URL = \"https://example.sharepoint.com/sites/sample\"\nDRIVE_WEB_URL = f\"{SITE_URL}/Shared Documents\"\nDRIVE_ID = \"fake-drive-id\"\n\n# Use a start time in the future so delta URLs include a timestamp token\n_START_TS = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()\n_END_TS = datetime(2026, 1, 1, tzinfo=timezone.utc).timestamp()\n\n# For BFS tests we use epoch so no token is generated\n_EPOCH_START: float = 0.0\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _make_item(item_id: str, name: str = \"doc.pdf\") -> DriveItemData:\n    return DriveItemData(\n        id=item_id,\n        name=name,\n        web_url=f\"{SITE_URL}/{name}\",\n        parent_reference_path=\"/drives/d1/root:\",\n        drive_id=DRIVE_ID,\n    )\n\n\ndef _make_document(item: DriveItemData) -> Document:\n    return Document(\n        id=item.id,\n        source=DocumentSource.SHAREPOINT,\n        semantic_identifier=item.name,\n        metadata={},\n        sections=[TextSection(link=item.web_url, text=\"content\")],\n    )\n\n\ndef _consume_generator(\n    gen: Generator[Any, None, SharepointConnectorCheckpoint],\n) -> tuple[list[Any], SharepointConnectorCheckpoint]:\n    \"\"\"Exhaust a _load_from_checkpoint generator.\n\n    Returns (yielded_items, returned_checkpoint).\n    \"\"\"\n    yielded: list[Any] = []\n    try:\n        while True:\n            yielded.append(next(gen))\n    except StopIteration as e:\n        return yielded, e.value\n\n\ndef _docs_from(yielded: list[Any]) -> list[Document]:\n    return [y for y in yielded if isinstance(y, Document)]\n\n\ndef _failures_from(yielded: list[Any]) -> list[ConnectorFailure]:\n    return [y for y in yielded if isinstance(y, ConnectorFailure)]\n\n\ndef _build_ready_checkpoint(\n    drive_names: list[str] | None = None,\n    folder_path: str | None = None,\n) -> SharepointConnectorCheckpoint:\n    \"\"\"Checkpoint ready for Phase 3 (sites initialised, drives queued).\"\"\"\n    cp = SharepointConnectorCheckpoint(has_more=True)\n    cp.cached_site_descriptors = deque()\n    cp.current_site_descriptor = SiteDescriptor(\n        url=SITE_URL,\n        drive_name=None,\n        folder_path=folder_path,\n    )\n    cp.cached_drive_names = deque(drive_names or [\"Documents\"])\n    cp.process_site_pages = False\n    return cp\n\n\ndef _setup_connector(monkeypatch: pytest.MonkeyPatch) -> SharepointConnector:\n    \"\"\"Create a connector with common methods mocked.\"\"\"\n    connector = SharepointConnector()\n    connector._graph_client = object()\n    connector.include_site_pages = False\n\n    def fake_resolve_drive(\n        self: SharepointConnector,  # noqa: ARG001\n        site_descriptor: SiteDescriptor,  # noqa: ARG001\n        drive_name: str,  # noqa: ARG001\n    ) -> tuple[str, str | None]:\n        return (DRIVE_ID, DRIVE_WEB_URL)\n\n    def fake_get_access_token(self: SharepointConnector) -> str:  # noqa: ARG001\n        return \"fake-access-token\"\n\n    monkeypatch.setattr(SharepointConnector, \"_resolve_drive\", fake_resolve_drive)\n    monkeypatch.setattr(\n        SharepointConnector, \"_get_graph_access_token\", fake_get_access_token\n    )\n\n    return connector\n\n\ndef _mock_convert(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"Replace _convert_driveitem_to_document_with_permissions with a trivial stub.\"\"\"\n\n    def fake_convert(\n        driveitem: DriveItemData,\n        drive_name: str,  # noqa: ARG001\n        ctx: Any = None,  # noqa: ARG001\n        graph_client: Any = None,  # noqa: ARG001\n        graph_api_base: str = \"\",  # noqa: ARG001\n        include_permissions: bool = False,  # noqa: ARG001\n        parent_hierarchy_raw_node_id: str | None = None,  # noqa: ARG001\n        access_token: str | None = None,  # noqa: ARG001\n        treat_sharing_link_as_public: bool = False,  # noqa: ARG001\n    ) -> Document:\n        return _make_document(driveitem)\n\n    monkeypatch.setattr(\n        \"onyx.connectors.sharepoint.connector._convert_driveitem_to_document_with_permissions\",\n        fake_convert,\n    )\n\n\n# ---------------------------------------------------------------------------\n# Tests\n# ---------------------------------------------------------------------------\n\n\nclass TestDeltaPerPageCheckpointing:\n    \"\"\"Delta (non-folder-scoped) drives should process one API page per\n    _load_from_checkpoint call, persisting the next-link in between.\"\"\"\n\n    def test_processes_one_page_per_cycle(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        items_p1 = [_make_item(\"a\"), _make_item(\"b\")]\n        items_p2 = [_make_item(\"c\")]\n        items_p3 = [_make_item(\"d\"), _make_item(\"e\")]\n\n        call_count = 0\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                return items_p1, \"https://graph.microsoft.com/next2\"\n            if call_count == 2:\n                return items_p2, \"https://graph.microsoft.com/next3\"\n            return items_p3, None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n\n        # Call 1: Phase 3a inits drive, Phase 3b processes page 1\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 2\n        assert (\n            checkpoint.current_drive_delta_next_link\n            == \"https://graph.microsoft.com/next2\"\n        )\n        assert checkpoint.current_drive_id == DRIVE_ID\n        assert checkpoint.has_more is True\n\n        # Call 2: Phase 3b processes page 2\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 1\n        assert (\n            checkpoint.current_drive_delta_next_link\n            == \"https://graph.microsoft.com/next3\"\n        )\n\n        # Call 3: Phase 3b processes page 3 (last)\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 2\n        assert checkpoint.current_drive_name is None\n        assert checkpoint.current_drive_id is None\n        assert checkpoint.current_drive_delta_next_link is None\n\n    def test_resume_after_simulated_crash(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Serialise the checkpoint after page 1, create a fresh connector,\n        and verify page 2 is fetched using the saved next-link.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        captured_urls: list[str] = []\n        call_count = 0\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            nonlocal call_count\n            call_count += 1\n            captured_urls.append(page_url)\n            if call_count == 1:\n                return [_make_item(\"a\")], \"https://graph.microsoft.com/next2\"\n            return [_make_item(\"b\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        # Process page 1\n        checkpoint = _build_ready_checkpoint()\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        _, checkpoint = _consume_generator(gen)\n        assert (\n            checkpoint.current_drive_delta_next_link\n            == \"https://graph.microsoft.com/next2\"\n        )\n\n        # --- Simulate crash: serialise & deserialise checkpoint ---\n        saved_json = checkpoint.model_dump_json()\n        restored = SharepointConnectorCheckpoint.model_validate_json(saved_json)\n\n        # New connector instance (as if process restarted)\n        connector2 = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        # Resume — should pick up from next2\n        gen = connector2._load_from_checkpoint(\n            _START_TS, _END_TS, restored, include_permissions=False\n        )\n        yielded, final_cp = _consume_generator(gen)\n\n        docs = _docs_from(yielded)\n        assert len(docs) == 1\n        assert docs[0].id == \"b\"\n        assert captured_urls[-1] == \"https://graph.microsoft.com/next2\"\n        assert final_cp.current_drive_name is None\n        assert final_cp.current_drive_delta_next_link is None\n\n    def test_single_page_drive_completes_in_one_cycle(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"A drive with only one delta page should init + process + clear\n        in a single _load_from_checkpoint call.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            return [_make_item(\"only\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, final_cp = _consume_generator(gen)\n\n        assert len(_docs_from(yielded)) == 1\n        assert final_cp.current_drive_name is None\n        assert final_cp.current_drive_id is None\n        assert final_cp.current_drive_delta_next_link is None\n\n\nclass TestBfsPathNoCheckpointing:\n    \"\"\"Folder-scoped (BFS) drives should process all items in one call\n    because the BFS queue cannot be cheaply serialised.\"\"\"\n\n    def test_bfs_processes_all_at_once(self, monkeypatch: pytest.MonkeyPatch) -> None:\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        items = [_make_item(\"x\"), _make_item(\"y\"), _make_item(\"z\")]\n\n        def fake_iter_paged(\n            self: SharepointConnector,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            folder_path: str | None = None,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> Generator[DriveItemData, None, None]:\n            yield from items\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_iter_drive_items_paged\", fake_iter_paged\n        )\n\n        checkpoint = _build_ready_checkpoint(folder_path=\"Engineering/Docs\")\n        gen = connector._load_from_checkpoint(\n            _EPOCH_START, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, final_cp = _consume_generator(gen)\n\n        assert len(_docs_from(yielded)) == 3\n        assert final_cp.current_drive_name is None\n        assert final_cp.current_drive_id is None\n        assert final_cp.current_drive_delta_next_link is None\n\n\nclass TestDelta410GoneResync:\n    \"\"\"On 410 Gone the checkpoint should be updated with a full-resync URL\n    and the next cycle should re-enumerate from scratch.\"\"\"\n\n    def test_410_stores_full_resync_url(self, monkeypatch: pytest.MonkeyPatch) -> None:\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        call_count = 0\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,\n        ) -> tuple[list[DriveItemData], str | None]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                # Simulate the 410 handler returning a full-resync URL\n                full_url = f\"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/delta?$top={page_size}\"\n                return [], full_url\n            return [_make_item(\"recovered\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n\n        # Call 1: 3a inits, 3b gets empty page + resync URL\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 0\n        assert checkpoint.current_drive_delta_next_link is not None\n        assert \"token=\" not in checkpoint.current_drive_delta_next_link\n\n        # Call 2: processes the full resync\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert len(docs) == 1\n        assert docs[0].id == \"recovered\"\n        assert checkpoint.current_drive_name is None\n\n\nclass TestDeltaPageFetchFailure:\n    \"\"\"If _fetch_one_delta_page raises, the drive should be abandoned with a\n    ConnectorFailure and the checkpoint should be cleared for the next drive.\"\"\"\n\n    def test_page_fetch_error_yields_failure_and_clears_state(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            raise RuntimeError(\"network blip\")\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, final_cp = _consume_generator(gen)\n\n        failures = _failures_from(yielded)\n        assert len(failures) == 1\n        assert \"network blip\" in failures[0].failure_message\n        assert final_cp.current_drive_name is None\n        assert final_cp.current_drive_id is None\n        assert final_cp.current_drive_delta_next_link is None\n\n\nclass TestDeltaDuplicateDocumentDedup:\n    \"\"\"The Microsoft Graph delta API can return the same item on multiple\n    pages.  Documents already yielded should be skipped via\n    checkpoint.seen_document_ids.\"\"\"\n\n    def test_duplicate_across_pages_is_skipped(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Item 'dup' appears on both page 1 and page 2.  It should only be\n        yielded once.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        call_count = 0\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                return [_make_item(\"a\"), _make_item(\"dup\")], \"https://next2\"\n            return [_make_item(\"dup\"), _make_item(\"b\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n\n        # Page 1: yields a, dup\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert [d.id for d in docs] == [\"a\", \"dup\"]\n        assert \"dup\" in checkpoint.seen_document_ids\n\n        # Page 2: dup should be skipped, only b yielded\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert [d.id for d in docs] == [\"b\"]\n\n    def test_duplicate_within_same_page_is_skipped(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"If the same item appears twice on a single delta page, only the\n        first occurrence should be yielded.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            return [_make_item(\"x\"), _make_item(\"x\"), _make_item(\"y\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert [d.id for d in docs] == [\"x\", \"y\"]\n\n    def test_seen_ids_survive_checkpoint_serialization(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"seen_document_ids must survive JSON serialization so that\n        dedup works across crash + resume.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        call_count = 0\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                return [_make_item(\"a\")], \"https://next2\"\n            return [_make_item(\"a\"), _make_item(\"b\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint()\n\n        # Page 1\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        _, checkpoint = _consume_generator(gen)\n        assert \"a\" in checkpoint.seen_document_ids\n\n        # Simulate crash: round-trip through JSON\n        restored = SharepointConnectorCheckpoint.model_validate_json(\n            checkpoint.model_dump_json()\n        )\n        assert \"a\" in restored.seen_document_ids\n\n        # Page 2 with restored checkpoint: 'a' should be skipped\n        connector2 = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        gen = connector2._load_from_checkpoint(\n            _START_TS, _END_TS, restored, include_permissions=False\n        )\n        yielded, final_cp = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert [d.id for d in docs] == [\"b\"]\n\n    def test_no_dedup_across_separate_indexing_runs(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"A fresh checkpoint (new indexing run) should have an empty\n        seen_document_ids, so previously-indexed docs are re-processed.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            return [_make_item(\"a\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        # First run\n        cp1 = _build_ready_checkpoint()\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, cp1, include_permissions=False\n        )\n        yielded, _ = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 1\n\n        # Second run with a fresh checkpoint — same doc should appear again\n        cp2 = _build_ready_checkpoint()\n        assert len(cp2.seen_document_ids) == 0\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, cp2, include_permissions=False\n        )\n        yielded, _ = _consume_generator(gen)\n        assert len(_docs_from(yielded)) == 1\n\n    def test_same_id_across_drives_not_skipped(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Graph item IDs are only unique within a drive.  An item in drive B\n        that happens to share an ID with an item already seen in drive A must\n        NOT be skipped.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        _mock_convert(monkeypatch)\n\n        def fake_fetch_page(\n            self: SharepointConnector,  # noqa: ARG001\n            page_url: str,  # noqa: ARG001\n            drive_id: str,  # noqa: ARG001\n            start: datetime | None = None,  # noqa: ARG001\n            end: datetime | None = None,  # noqa: ARG001\n            page_size: int = 200,  # noqa: ARG001\n        ) -> tuple[list[DriveItemData], str | None]:\n            return [_make_item(\"shared-id\")], None\n\n        monkeypatch.setattr(\n            SharepointConnector, \"_fetch_one_delta_page\", fake_fetch_page\n        )\n\n        checkpoint = _build_ready_checkpoint(drive_names=[\"DriveA\", \"DriveB\"])\n\n        # Drive A: yields the item\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert len(docs) == 1\n        assert docs[0].id == \"shared-id\"\n\n        # seen_document_ids should have been cleared when drive A finished\n        assert len(checkpoint.seen_document_ids) == 0\n\n        # Drive B: same ID must be yielded again (different drive)\n        gen = connector._load_from_checkpoint(\n            _START_TS, _END_TS, checkpoint, include_permissions=False\n        )\n        yielded, checkpoint = _consume_generator(gen)\n        docs = _docs_from(yielded)\n        assert len(docs) == 1\n        assert docs[0].id == \"shared-id\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_denylist.py",
    "content": "from __future__ import annotations\n\nimport pytest\n\nfrom onyx.connectors.sharepoint.connector import _build_item_relative_path\nfrom onyx.connectors.sharepoint.connector import _is_path_excluded\nfrom onyx.connectors.sharepoint.connector import _is_site_excluded\nfrom onyx.connectors.sharepoint.connector import DriveItemData\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.connectors.sharepoint.connector import SiteDescriptor\n\n\nclass TestIsSiteExcluded:\n    def test_exact_match(self) -> None:\n        assert _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/archive\",\n            [\"https://contoso.sharepoint.com/sites/archive\"],\n        )\n\n    def test_trailing_slash_mismatch(self) -> None:\n        assert _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/archive/\",\n            [\"https://contoso.sharepoint.com/sites/archive\"],\n        )\n\n    def test_glob_wildcard(self) -> None:\n        assert _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/archive-2024\",\n            [\"*/sites/archive-*\"],\n        )\n\n    def test_no_match(self) -> None:\n        assert not _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/engineering\",\n            [\"https://contoso.sharepoint.com/sites/archive\"],\n        )\n\n    def test_empty_patterns(self) -> None:\n        assert not _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/engineering\",\n            [],\n        )\n\n    def test_multiple_patterns(self) -> None:\n        patterns = [\n            \"*/sites/archive-*\",\n            \"*/sites/hr-confidential\",\n        ]\n        assert _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/hr-confidential\",\n            patterns,\n        )\n        assert not _is_site_excluded(\n            \"https://contoso.sharepoint.com/sites/engineering\",\n            patterns,\n        )\n\n\nclass TestIsPathExcluded:\n    def test_filename_glob(self) -> None:\n        assert _is_path_excluded(\"Engineering/report.tmp\", [\"*.tmp\"])\n\n    def test_filename_only(self) -> None:\n        assert _is_path_excluded(\"report.tmp\", [\"*.tmp\"])\n\n    def test_office_lock_files(self) -> None:\n        assert _is_path_excluded(\"Docs/~$document.docx\", [\"~$*\"])\n\n    def test_folder_glob(self) -> None:\n        assert _is_path_excluded(\"Archive/old/report.docx\", [\"Archive/*\"])\n\n    def test_nested_folder_glob(self) -> None:\n        assert _is_path_excluded(\"Projects/Archive/report.docx\", [\"*/Archive/*\"])\n\n    def test_no_match(self) -> None:\n        assert not _is_path_excluded(\"Engineering/report.docx\", [\"*.tmp\"])\n\n    def test_empty_patterns(self) -> None:\n        assert not _is_path_excluded(\"anything.docx\", [])\n\n    def test_multiple_patterns(self) -> None:\n        patterns = [\"*.tmp\", \"~$*\", \"Archive/*\"]\n        assert _is_path_excluded(\"test.tmp\", patterns)\n        assert _is_path_excluded(\"~$doc.docx\", patterns)\n        assert _is_path_excluded(\"Archive/old.pdf\", patterns)\n        assert not _is_path_excluded(\"Engineering/report.docx\", patterns)\n\n\nclass TestBuildItemRelativePath:\n    def test_with_folder(self) -> None:\n        assert (\n            _build_item_relative_path(\n                \"/drives/abc/root:/Engineering/API\", \"report.docx\"\n            )\n            == \"Engineering/API/report.docx\"\n        )\n\n    def test_root_level(self) -> None:\n        assert (\n            _build_item_relative_path(\"/drives/abc/root:\", \"report.docx\")\n            == \"report.docx\"\n        )\n\n    def test_none_parent(self) -> None:\n        assert _build_item_relative_path(None, \"report.docx\") == \"report.docx\"\n\n    def test_percent_encoded_folder(self) -> None:\n        assert (\n            _build_item_relative_path(\"/drives/abc/root:/My%20Documents\", \"report.docx\")\n            == \"My Documents/report.docx\"\n        )\n\n    def test_no_root_marker(self) -> None:\n        assert _build_item_relative_path(\"/drives/abc\", \"report.docx\") == \"report.docx\"\n\n\nclass TestFilterExcludedSites:\n    def test_filters_matching_sites(self) -> None:\n        connector = SharepointConnector(\n            excluded_sites=[\"*/sites/archive\"],\n        )\n        descriptors = [\n            SiteDescriptor(\n                url=\"https://t.sharepoint.com/sites/archive\",\n                drive_name=None,\n                folder_path=None,\n            ),\n            SiteDescriptor(\n                url=\"https://t.sharepoint.com/sites/engineering\",\n                drive_name=None,\n                folder_path=None,\n            ),\n        ]\n        result = connector._filter_excluded_sites(descriptors)\n        assert len(result) == 1\n        assert result[0].url == \"https://t.sharepoint.com/sites/engineering\"\n\n    def test_empty_excluded_returns_all(self) -> None:\n        connector = SharepointConnector(excluded_sites=[])\n        descriptors = [\n            SiteDescriptor(\n                url=\"https://t.sharepoint.com/sites/a\",\n                drive_name=None,\n                folder_path=None,\n            ),\n            SiteDescriptor(\n                url=\"https://t.sharepoint.com/sites/b\",\n                drive_name=None,\n                folder_path=None,\n            ),\n        ]\n        result = connector._filter_excluded_sites(descriptors)\n        assert len(result) == 2\n\n\nclass TestIsDriveitemExcluded:\n    def test_excluded_by_extension(self) -> None:\n        connector = SharepointConnector(excluded_paths=[\"*.tmp\"])\n        item = DriveItemData(\n            id=\"1\",\n            name=\"file.tmp\",\n            web_url=\"https://example.com/file.tmp\",\n            parent_reference_path=\"/drives/abc/root:/Docs\",\n        )\n        assert connector._is_driveitem_excluded(item)\n\n    def test_not_excluded(self) -> None:\n        connector = SharepointConnector(excluded_paths=[\"*.tmp\"])\n        item = DriveItemData(\n            id=\"1\",\n            name=\"file.docx\",\n            web_url=\"https://example.com/file.docx\",\n            parent_reference_path=\"/drives/abc/root:/Docs\",\n        )\n        assert not connector._is_driveitem_excluded(item)\n\n    def test_no_patterns_never_excludes(self) -> None:\n        connector = SharepointConnector(excluded_paths=[])\n        item = DriveItemData(\n            id=\"1\",\n            name=\"file.tmp\",\n            web_url=\"https://example.com/file.tmp\",\n            parent_reference_path=\"/drives/abc/root:/Docs\",\n        )\n        assert not connector._is_driveitem_excluded(item)\n\n    def test_folder_pattern(self) -> None:\n        connector = SharepointConnector(excluded_paths=[\"Archive/*\"])\n        item = DriveItemData(\n            id=\"1\",\n            name=\"old.pdf\",\n            web_url=\"https://example.com/old.pdf\",\n            parent_reference_path=\"/drives/abc/root:/Archive\",\n        )\n        assert connector._is_driveitem_excluded(item)\n\n    @pytest.mark.parametrize(\n        \"whitespace_pattern\",\n        [\"\", \"  \", \"\\t\"],\n    )\n    def test_whitespace_patterns_ignored(self, whitespace_pattern: str) -> None:\n        connector = SharepointConnector(excluded_paths=[whitespace_pattern])\n        assert connector.excluded_paths == []\n\n    def test_whitespace_padded_patterns_are_trimmed(self) -> None:\n        connector = SharepointConnector(excluded_paths=[\"  *.tmp  \", \" Archive/* \"])\n        assert connector.excluded_paths == [\"*.tmp\", \"Archive/*\"]\n\n        item = DriveItemData(\n            id=\"1\",\n            name=\"file.tmp\",\n            web_url=\"https://example.com/file.tmp\",\n            parent_reference_path=\"/drives/abc/root:/Docs\",\n        )\n        assert connector._is_driveitem_excluded(item)\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_drive_matching.py",
    "content": "from __future__ import annotations\n\nfrom collections import deque\nfrom collections.abc import Generator\nfrom collections.abc import Sequence\nfrom datetime import datetime\nfrom datetime import timezone\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import TextSection\nfrom onyx.connectors.sharepoint.connector import DriveItemData\nfrom onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.connectors.sharepoint.connector import SharepointConnectorCheckpoint\nfrom onyx.connectors.sharepoint.connector import SiteDescriptor\n\n\nclass _FakeQuery:\n    def __init__(self, payload: Sequence[Any]) -> None:\n        self._payload = payload\n\n    def execute_query(self) -> Sequence[Any]:\n        return self._payload\n\n\nclass _FakeDrive:\n    def __init__(self, name: str) -> None:\n        self.name = name\n        self.id = f\"fake-drive-id-{name}\"\n        self.web_url = f\"https://example.sharepoint.com/sites/sample/{name}\"\n\n\nclass _FakeDrivesCollection:\n    def __init__(self, drives: Sequence[_FakeDrive]) -> None:\n        self._drives = drives\n\n    def get(self) -> _FakeQuery:\n        return _FakeQuery(list(self._drives))\n\n\nclass _FakeSite:\n    def __init__(self, drives: Sequence[_FakeDrive]) -> None:\n        self.drives = _FakeDrivesCollection(drives)\n\n\nclass _FakeSites:\n    def __init__(self, drives: Sequence[_FakeDrive]) -> None:\n        self._drives = drives\n\n    def get_by_url(self, _url: str) -> _FakeSite:\n        return _FakeSite(self._drives)\n\n\nclass _FakeGraphClient:\n    def __init__(self, drives: Sequence[_FakeDrive]) -> None:\n        self.sites = _FakeSites(drives)\n\n\n_SAMPLE_ITEM = DriveItemData(\n    id=\"item-1\",\n    name=\"sample.pdf\",\n    web_url=\"https://example.sharepoint.com/sites/sample/sample.pdf\",\n    parent_reference_path=None,\n    drive_id=\"fake-drive-id\",\n)\n\n\ndef _build_connector(drives: Sequence[_FakeDrive]) -> SharepointConnector:\n    connector = SharepointConnector()\n    connector._graph_client = _FakeGraphClient(drives)\n    return connector\n\n\ndef _fake_iter_drive_items_paged(\n    self: SharepointConnector,  # noqa: ARG001\n    drive_id: str,  # noqa: ARG001\n    folder_path: str | None = None,  # noqa: ARG001\n    start: datetime | None = None,  # noqa: ARG001\n    end: datetime | None = None,  # noqa: ARG001\n    page_size: int = 200,  # noqa: ARG001\n) -> Generator[DriveItemData, None, None]:\n    yield _SAMPLE_ITEM\n\n\ndef _fake_iter_drive_items_delta(\n    self: SharepointConnector,  # noqa: ARG001\n    drive_id: str,  # noqa: ARG001\n    start: datetime | None = None,  # noqa: ARG001\n    end: datetime | None = None,  # noqa: ARG001\n    page_size: int = 200,  # noqa: ARG001\n) -> Generator[DriveItemData, None, None]:\n    yield _SAMPLE_ITEM\n\n\n@pytest.mark.parametrize(\n    (\"requested_drive_name\", \"graph_drive_name\"),\n    [\n        (\"Shared Documents\", \"Documents\"),\n        (\"Freigegebene Dokumente\", \"Dokumente\"),\n        (\"Documentos compartidos\", \"Documentos\"),\n    ],\n)\ndef test_fetch_driveitems_matches_international_drive_names(\n    requested_drive_name: str,\n    graph_drive_name: str,\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    connector = _build_connector([_FakeDrive(graph_drive_name)])\n    site_descriptor = SiteDescriptor(\n        url=\"https://example.sharepoint.com/sites/sample\",\n        drive_name=requested_drive_name,\n        folder_path=None,\n    )\n\n    monkeypatch.setattr(\n        SharepointConnector,\n        \"_iter_drive_items_delta\",\n        _fake_iter_drive_items_delta,\n    )\n\n    results = list(connector._fetch_driveitems(site_descriptor=site_descriptor))\n\n    assert len(results) == 1\n    drive_item, returned_drive_name, drive_web_url = results[0]\n    assert drive_item.id == _SAMPLE_ITEM.id\n    assert returned_drive_name == requested_drive_name\n    assert drive_web_url is not None\n\n\n@pytest.mark.parametrize(\n    (\"requested_drive_name\", \"graph_drive_name\"),\n    [\n        (\"Shared Documents\", \"Documents\"),\n        (\"Freigegebene Dokumente\", \"Dokumente\"),\n        (\"Documentos compartidos\", \"Documentos\"),\n    ],\n)\ndef test_get_drive_items_for_drive_id_matches_map(\n    requested_drive_name: str,\n    graph_drive_name: str,\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    connector = _build_connector([_FakeDrive(graph_drive_name)])\n    site_descriptor = SiteDescriptor(\n        url=\"https://example.sharepoint.com/sites/sample\",\n        drive_name=requested_drive_name,\n        folder_path=None,\n    )\n\n    monkeypatch.setattr(\n        SharepointConnector,\n        \"_iter_drive_items_delta\",\n        _fake_iter_drive_items_delta,\n    )\n\n    items_iter = connector._get_drive_items_for_drive_id(\n        site_descriptor=site_descriptor,\n        drive_id=\"fake-drive-id\",\n    )\n\n    results = list(items_iter)\n    assert len(results) == 1\n    assert results[0].id == _SAMPLE_ITEM.id\n\n\ndef test_load_from_checkpoint_maps_drive_name(monkeypatch: pytest.MonkeyPatch) -> None:\n    connector = SharepointConnector()\n    connector._graph_client = object()\n    connector.include_site_pages = False\n\n    captured_drive_names: list[str] = []\n    sample_item = DriveItemData(\n        id=\"doc-1\",\n        name=\"sample.pdf\",\n        web_url=\"https://example.sharepoint.com/sites/sample/sample.pdf\",\n        parent_reference_path=None,\n        drive_id=\"fake-drive-id\",\n    )\n\n    def fake_resolve_drive(\n        self: SharepointConnector,  # noqa: ARG001\n        site_descriptor: SiteDescriptor,  # noqa: ARG001\n        drive_name: str,\n    ) -> tuple[str, str | None]:\n        assert drive_name == \"Documents\"\n        return (\n            \"fake-drive-id\",\n            \"https://example.sharepoint.com/sites/sample/Documents\",\n        )\n\n    def fake_fetch_one_delta_page(\n        self: SharepointConnector,  # noqa: ARG001\n        page_url: str,  # noqa: ARG001\n        drive_id: str,  # noqa: ARG001\n        start: datetime | None = None,  # noqa: ARG001\n        end: datetime | None = None,  # noqa: ARG001\n        page_size: int = 200,  # noqa: ARG001\n    ) -> tuple[list[DriveItemData], str | None]:\n        return [sample_item], None\n\n    def fake_convert(\n        driveitem: DriveItemData,  # noqa: ARG001\n        drive_name: str,\n        ctx: Any,  # noqa: ARG001\n        graph_client: Any,  # noqa: ARG001\n        graph_api_base: str,  # noqa: ARG001\n        include_permissions: bool,  # noqa: ARG001\n        parent_hierarchy_raw_node_id: str | None = None,  # noqa: ARG001\n        access_token: str | None = None,  # noqa: ARG001\n        treat_sharing_link_as_public: bool = False,  # noqa: ARG001\n    ) -> Document:\n        captured_drive_names.append(drive_name)\n        return Document(\n            id=\"doc-1\",\n            source=DocumentSource.SHAREPOINT,\n            semantic_identifier=\"sample.pdf\",\n            metadata={},\n            sections=[TextSection(link=\"https://example.com\", text=\"content\")],\n        )\n\n    def fake_get_access_token(self: SharepointConnector) -> str:  # noqa: ARG001\n        return \"fake-access-token\"\n\n    monkeypatch.setattr(\n        SharepointConnector,\n        \"_resolve_drive\",\n        fake_resolve_drive,\n    )\n    monkeypatch.setattr(\n        SharepointConnector,\n        \"_fetch_one_delta_page\",\n        fake_fetch_one_delta_page,\n    )\n    monkeypatch.setattr(\n        \"onyx.connectors.sharepoint.connector._convert_driveitem_to_document_with_permissions\",\n        fake_convert,\n    )\n    monkeypatch.setattr(\n        SharepointConnector,\n        \"_get_graph_access_token\",\n        fake_get_access_token,\n    )\n\n    checkpoint = SharepointConnectorCheckpoint(has_more=True)\n    checkpoint.cached_site_descriptors = deque()\n    checkpoint.current_site_descriptor = SiteDescriptor(\n        url=\"https://example.sharepoint.com/sites/sample\",\n        drive_name=SHARED_DOCUMENTS_MAP[\"Documents\"],\n        folder_path=None,\n    )\n    checkpoint.cached_drive_names = deque([\"Documents\"])\n    checkpoint.current_drive_name = None\n    checkpoint.process_site_pages = False\n\n    generator = connector._load_from_checkpoint(\n        start=0,\n        end=0,\n        checkpoint=checkpoint,\n        include_permissions=False,\n    )\n\n    all_yielded: list[Any] = []\n    try:\n        while True:\n            all_yielded.append(next(generator))\n    except StopIteration:\n        pass\n\n    from onyx.connectors.models import HierarchyNode\n\n    documents = [item for item in all_yielded if not isinstance(item, HierarchyNode)]\n    hierarchy_nodes = [item for item in all_yielded if isinstance(item, HierarchyNode)]\n\n    assert len(documents) == 1\n    assert captured_drive_names == [SHARED_DOCUMENTS_MAP[\"Documents\"]]\n    assert len(hierarchy_nodes) >= 1\n\n\ndef test_get_drive_items_uses_delta_when_no_folder_path(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When folder_path is None, _get_drive_items_for_drive_id should use delta.\"\"\"\n    connector = _build_connector([_FakeDrive(\"Documents\")])\n    site = SiteDescriptor(\n        url=\"https://example.sharepoint.com/sites/sample\",\n        drive_name=\"Documents\",\n        folder_path=None,\n    )\n\n    called_method: list[str] = []\n\n    def fake_delta(\n        self: SharepointConnector,  # noqa: ARG001\n        drive_id: str,  # noqa: ARG001\n        start: datetime | None = None,  # noqa: ARG001\n        end: datetime | None = None,  # noqa: ARG001\n        page_size: int = 200,  # noqa: ARG001\n    ) -> Generator[DriveItemData, None, None]:\n        called_method.append(\"delta\")\n        yield _SAMPLE_ITEM\n\n    def fake_paged(\n        self: SharepointConnector,  # noqa: ARG001\n        drive_id: str,  # noqa: ARG001\n        folder_path: str | None = None,  # noqa: ARG001\n        start: datetime | None = None,  # noqa: ARG001\n        end: datetime | None = None,  # noqa: ARG001\n        page_size: int = 200,  # noqa: ARG001\n    ) -> Generator[DriveItemData, None, None]:\n        called_method.append(\"paged\")\n        yield _SAMPLE_ITEM\n\n    monkeypatch.setattr(SharepointConnector, \"_iter_drive_items_delta\", fake_delta)\n    monkeypatch.setattr(SharepointConnector, \"_iter_drive_items_paged\", fake_paged)\n\n    items = connector._get_drive_items_for_drive_id(site, \"fake-drive-id\")\n    list(items)\n\n    assert called_method == [\"delta\"]\n\n\ndef test_get_drive_items_uses_paged_when_folder_path_set(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When folder_path is set, _get_drive_items_for_drive_id should use BFS.\"\"\"\n    connector = _build_connector([_FakeDrive(\"Documents\")])\n    site = SiteDescriptor(\n        url=\"https://example.sharepoint.com/sites/sample\",\n        drive_name=\"Documents\",\n        folder_path=\"Engineering/Docs\",\n    )\n\n    called_method: list[str] = []\n\n    def fake_delta(\n        self: SharepointConnector,  # noqa: ARG001\n        drive_id: str,  # noqa: ARG001\n        start: datetime | None = None,  # noqa: ARG001\n        end: datetime | None = None,  # noqa: ARG001\n        page_size: int = 200,  # noqa: ARG001\n    ) -> Generator[DriveItemData, None, None]:\n        called_method.append(\"delta\")\n        yield _SAMPLE_ITEM\n\n    def fake_paged(\n        self: SharepointConnector,  # noqa: ARG001\n        drive_id: str,  # noqa: ARG001\n        folder_path: str | None = None,  # noqa: ARG001\n        start: datetime | None = None,  # noqa: ARG001\n        end: datetime | None = None,  # noqa: ARG001\n        page_size: int = 200,  # noqa: ARG001\n    ) -> Generator[DriveItemData, None, None]:\n        called_method.append(\"paged\")\n        yield _SAMPLE_ITEM\n\n    monkeypatch.setattr(SharepointConnector, \"_iter_drive_items_delta\", fake_delta)\n    monkeypatch.setattr(SharepointConnector, \"_iter_drive_items_paged\", fake_paged)\n\n    items = connector._get_drive_items_for_drive_id(site, \"fake-drive-id\")\n    list(items)\n\n    assert called_method == [\"paged\"]\n\n\ndef test_iter_drive_items_delta_uses_timestamp_token(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Delta iteration should pass the start time as a URL token for incremental sync.\"\"\"\n    connector = SharepointConnector()\n\n    captured_urls: list[str] = []\n\n    def fake_graph_api_get_json(\n        self: SharepointConnector,  # noqa: ARG001\n        url: str,\n        params: dict[str, str] | None = None,  # noqa: ARG001\n    ) -> dict[str, Any]:\n        captured_urls.append(url)\n        return {\n            \"value\": [\n                {\n                    \"id\": \"file-1\",\n                    \"name\": \"report.docx\",\n                    \"webUrl\": \"https://example.sharepoint.com/report.docx\",\n                    \"file\": {\n                        \"mimeType\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"\n                    },\n                    \"lastModifiedDateTime\": \"2025-06-15T12:00:00Z\",\n                    \"parentReference\": {\"path\": \"/drives/d1/root:\", \"driveId\": \"d1\"},\n                }\n            ],\n            \"@odata.deltaLink\": \"https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final\",\n        }\n\n    monkeypatch.setattr(\n        SharepointConnector, \"_graph_api_get_json\", fake_graph_api_get_json\n    )\n\n    start = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)\n    items = list(connector._iter_drive_items_delta(\"d1\", start=start))\n\n    assert len(items) == 1\n    assert items[0].id == \"file-1\"\n    assert len(captured_urls) == 1\n    assert \"token=2025-06-01T00%3A00%3A00%2B00%3A00\" in captured_urls[0]\n\n\ndef test_iter_drive_items_delta_full_crawl_when_no_start(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Delta iteration without a start time should do a full enumeration (no token).\"\"\"\n    connector = SharepointConnector()\n\n    captured_urls: list[str] = []\n\n    def fake_graph_api_get_json(\n        self: SharepointConnector,  # noqa: ARG001\n        url: str,\n        params: dict[str, str] | None = None,  # noqa: ARG001\n    ) -> dict[str, Any]:\n        captured_urls.append(url)\n        return {\n            \"value\": [],\n            \"@odata.deltaLink\": \"https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final\",\n        }\n\n    monkeypatch.setattr(\n        SharepointConnector, \"_graph_api_get_json\", fake_graph_api_get_json\n    )\n\n    list(connector._iter_drive_items_delta(\"d1\"))\n\n    assert len(captured_urls) == 1\n    assert \"token=\" not in captured_urls[0]\n    assert captured_urls[0].endswith(\"/drives/d1/root/delta\")\n\n\ndef test_iter_drive_items_delta_skips_folders_and_deleted(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Delta results with folder or deleted facets should be skipped.\"\"\"\n    connector = SharepointConnector()\n\n    def fake_graph_api_get_json(\n        self: SharepointConnector,  # noqa: ARG001\n        url: str,  # noqa: ARG001\n        params: dict[str, str] | None = None,  # noqa: ARG001\n    ) -> dict[str, Any]:\n        return {\n            \"value\": [\n                {\"id\": \"folder-1\", \"name\": \"Docs\", \"folder\": {\"childCount\": 5}},\n                {\"id\": \"deleted-1\", \"name\": \"old.txt\", \"deleted\": {\"state\": \"deleted\"}},\n                {\n                    \"id\": \"file-1\",\n                    \"name\": \"keep.pdf\",\n                    \"webUrl\": \"https://example.sharepoint.com/keep.pdf\",\n                    \"file\": {\"mimeType\": \"application/pdf\"},\n                    \"lastModifiedDateTime\": \"2025-06-15T12:00:00Z\",\n                    \"parentReference\": {\"path\": \"/drives/d1/root:\", \"driveId\": \"d1\"},\n                },\n            ],\n            \"@odata.deltaLink\": \"https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final\",\n        }\n\n    monkeypatch.setattr(\n        SharepointConnector, \"_graph_api_get_json\", fake_graph_api_get_json\n    )\n\n    items = list(connector._iter_drive_items_delta(\"d1\"))\n    assert len(items) == 1\n    assert items[0].id == \"file-1\"\n\n\ndef test_iter_drive_items_delta_handles_410_gone(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"On 410 Gone, delta should fall back to full enumeration.\"\"\"\n    import requests as req\n\n    connector = SharepointConnector()\n\n    call_count = 0\n\n    def fake_graph_api_get_json(\n        self: SharepointConnector,  # noqa: ARG001\n        url: str,\n        params: dict[str, str] | None = None,  # noqa: ARG001\n    ) -> dict[str, Any]:\n        nonlocal call_count\n        call_count += 1\n\n        if call_count == 1 and \"token=\" in url:\n            response = req.Response()\n            response.status_code = 410\n            raise req.HTTPError(response=response)\n\n        return {\n            \"value\": [\n                {\n                    \"id\": \"file-1\",\n                    \"name\": \"doc.pdf\",\n                    \"webUrl\": \"https://example.sharepoint.com/doc.pdf\",\n                    \"file\": {\"mimeType\": \"application/pdf\"},\n                    \"lastModifiedDateTime\": \"2025-06-15T12:00:00Z\",\n                    \"parentReference\": {\"path\": \"/drives/d1/root:\", \"driveId\": \"d1\"},\n                }\n            ],\n            \"@odata.deltaLink\": \"https://graph.microsoft.com/v1.0/drives/d1/root/delta?token=final\",\n        }\n\n    monkeypatch.setattr(\n        SharepointConnector, \"_graph_api_get_json\", fake_graph_api_get_json\n    )\n\n    start = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)\n    items = list(connector._iter_drive_items_delta(\"d1\", start=start))\n\n    assert len(items) == 1\n    assert items[0].id == \"file-1\"\n    assert call_count == 2\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_fetch_site_pages.py",
    "content": "\"\"\"Unit tests for SharepointConnector._fetch_site_pages error handling.\n\nCovers 404 handling (classic sites / no modern pages) and 400\ncanvasLayout fallback (corrupt pages causing $expand=canvasLayout to\nfail on the LIST endpoint).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom typing import Any\n\nimport pytest\nfrom requests import Response\nfrom requests.exceptions import HTTPError\n\nfrom onyx.connectors.sharepoint.connector import GRAPH_INVALID_REQUEST_CODE\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\nfrom onyx.connectors.sharepoint.connector import SiteDescriptor\n\nSITE_URL = \"https://tenant.sharepoint.com/sites/ClassicSite\"\nFAKE_SITE_ID = \"tenant.sharepoint.com,abc123,def456\"\nPAGES_COLLECTION = f\"https://graph.microsoft.com/v1.0/sites/{FAKE_SITE_ID}/pages\"\nSITE_PAGES_BASE = f\"{PAGES_COLLECTION}/microsoft.graph.sitePage\"\n\n\ndef _site_descriptor() -> SiteDescriptor:\n    return SiteDescriptor(url=SITE_URL, drive_name=None, folder_path=None)\n\n\ndef _make_http_error(\n    status_code: int,\n    error_code: str = \"itemNotFound\",\n    message: str = \"Item not found\",\n) -> HTTPError:\n    body = {\"error\": {\"code\": error_code, \"message\": message}}\n    response = Response()\n    response.status_code = status_code\n    response._content = json.dumps(body).encode()\n    response.headers[\"Content-Type\"] = \"application/json\"\n    return HTTPError(response=response)\n\n\ndef _setup_connector(\n    monkeypatch: pytest.MonkeyPatch,  # noqa: ARG001\n) -> SharepointConnector:\n    \"\"\"Create a connector with the graph client and site resolution mocked.\"\"\"\n    connector = SharepointConnector(sites=[SITE_URL])\n    connector.graph_api_base = \"https://graph.microsoft.com/v1.0\"\n\n    mock_sites = type(\n        \"FakeSites\",\n        (),\n        {\n            \"get_by_url\": staticmethod(\n                lambda url: type(  # noqa: ARG005\n                    \"Q\",\n                    (),\n                    {\n                        \"execute_query\": lambda self: None,  # noqa: ARG005\n                        \"id\": FAKE_SITE_ID,\n                    },\n                )()\n            ),\n        },\n    )()\n    connector._graph_client = type(\"FakeGraphClient\", (), {\"sites\": mock_sites})()\n\n    return connector\n\n\ndef _patch_graph_api_get_json(\n    monkeypatch: pytest.MonkeyPatch,\n    fake_fn: Any,\n) -> None:\n    monkeypatch.setattr(SharepointConnector, \"_graph_api_get_json\", fake_fn)\n\n\nclass TestFetchSitePages404:\n    def test_404_yields_no_pages(self, monkeypatch: pytest.MonkeyPatch) -> None:\n        \"\"\"A 404 from the Pages API should result in zero yielded pages.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            raise _make_http_error(404)\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        pages = list(connector._fetch_site_pages(_site_descriptor()))\n        assert pages == []\n\n    def test_404_does_not_raise(self, monkeypatch: pytest.MonkeyPatch) -> None:\n        \"\"\"A 404 must not propagate as an exception.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            raise _make_http_error(404)\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        for _ in connector._fetch_site_pages(_site_descriptor()):\n            pass\n\n    def test_non_404_http_error_still_raises(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"Non-404 HTTP errors (e.g. 403) must still propagate.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            raise _make_http_error(403)\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        with pytest.raises(HTTPError):\n            list(connector._fetch_site_pages(_site_descriptor()))\n\n    def test_successful_fetch_yields_pages(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"When the API succeeds, pages should be yielded normally.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        fake_page = {\n            \"id\": \"page-1\",\n            \"title\": \"Hello World\",\n            \"webUrl\": f\"{SITE_URL}/SitePages/Hello.aspx\",\n            \"lastModifiedDateTime\": \"2025-06-01T00:00:00Z\",\n        }\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            return {\"value\": [fake_page]}\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        pages = list(connector._fetch_site_pages(_site_descriptor()))\n        assert len(pages) == 1\n        assert pages[0][\"id\"] == \"page-1\"\n\n    def test_404_on_second_page_stops_pagination(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"If the first API page succeeds but a nextLink returns 404,\n        already-yielded pages are kept and iteration stops cleanly.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        call_count = 0\n        first_page = {\n            \"id\": \"page-1\",\n            \"title\": \"First\",\n            \"webUrl\": f\"{SITE_URL}/SitePages/First.aspx\",\n            \"lastModifiedDateTime\": \"2025-06-01T00:00:00Z\",\n        }\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                return {\n                    \"value\": [first_page],\n                    \"@odata.nextLink\": \"https://graph.microsoft.com/next\",\n                }\n            raise _make_http_error(404)\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        pages = list(connector._fetch_site_pages(_site_descriptor()))\n        assert len(pages) == 1\n        assert pages[0][\"id\"] == \"page-1\"\n\n\nclass TestFetchSitePages400Fallback:\n    \"\"\"When $expand=canvasLayout on the LIST endpoint returns 400\n    invalidRequest, _fetch_site_pages should fall back to listing\n    without expansion, then expanding each page individually.\"\"\"\n\n    GOOD_PAGE: dict[str, Any] = {\n        \"id\": \"good-1\",\n        \"name\": \"Good.aspx\",\n        \"title\": \"Good Page\",\n        \"lastModifiedDateTime\": \"2025-06-01T00:00:00Z\",\n    }\n    BAD_PAGE: dict[str, Any] = {\n        \"id\": \"bad-1\",\n        \"name\": \"Bad.aspx\",\n        \"title\": \"Bad Page\",\n        \"lastModifiedDateTime\": \"2025-06-01T00:00:00Z\",\n    }\n    GOOD_PAGE_EXPANDED: dict[str, Any] = {\n        **GOOD_PAGE,\n        \"canvasLayout\": {\"horizontalSections\": []},\n    }\n\n    def test_fallback_expands_good_pages_individually(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"On 400 from the LIST expand, the connector should list without\n        expand, then GET each page individually with $expand=canvasLayout.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        good_page = self.GOOD_PAGE\n        bad_page = self.BAD_PAGE\n        good_page_expanded = self.GOOD_PAGE_EXPANDED\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,\n            params: dict[str, str] | None = None,\n        ) -> dict[str, Any]:\n            if url == SITE_PAGES_BASE and params == {\"$expand\": \"canvasLayout\"}:\n                raise _make_http_error(\n                    400, GRAPH_INVALID_REQUEST_CODE, \"Invalid request\"\n                )\n            if url == SITE_PAGES_BASE and params is None:\n                return {\"value\": [good_page, bad_page]}\n            expand_params = {\"$expand\": \"canvasLayout\"}\n            if url == f\"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage\":\n                assert params == expand_params, f\"Expected $expand params, got {params}\"\n                return good_page_expanded\n            if url == f\"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage\":\n                assert params == expand_params, f\"Expected $expand params, got {params}\"\n                raise _make_http_error(\n                    400, GRAPH_INVALID_REQUEST_CODE, \"Invalid request\"\n                )\n            raise AssertionError(f\"Unexpected call: {url} {params}\")\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n        pages = list(connector._fetch_site_pages(_site_descriptor()))\n\n        assert len(pages) == 2\n        assert pages[0].get(\"canvasLayout\") is not None\n        assert pages[1].get(\"canvasLayout\") is None\n        assert pages[1][\"id\"] == \"bad-1\"\n\n    def test_mid_pagination_400_does_not_duplicate(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"If the first paginated batch succeeds but a later nextLink\n        returns 400, pages from the first batch must not be re-yielded\n        by the fallback.\"\"\"\n        connector = _setup_connector(monkeypatch)\n        good_page = self.GOOD_PAGE\n        good_page_expanded = self.GOOD_PAGE_EXPANDED\n        bad_page = self.BAD_PAGE\n        second_page = {\n            \"id\": \"page-2\",\n            \"name\": \"Second.aspx\",\n            \"title\": \"Second Page\",\n            \"lastModifiedDateTime\": \"2025-06-01T00:00:00Z\",\n        }\n        next_link = \"https://graph.microsoft.com/v1.0/next-page-link\"\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,\n            params: dict[str, str] | None = None,\n        ) -> dict[str, Any]:\n            if url == SITE_PAGES_BASE and params == {\"$expand\": \"canvasLayout\"}:\n                return {\n                    \"value\": [good_page],\n                    \"@odata.nextLink\": next_link,\n                }\n            if url == next_link:\n                raise _make_http_error(\n                    400, GRAPH_INVALID_REQUEST_CODE, \"Invalid request\"\n                )\n            if url == SITE_PAGES_BASE and params is None:\n                return {\"value\": [good_page, bad_page, second_page]}\n            expand_params = {\"$expand\": \"canvasLayout\"}\n            if url == f\"{PAGES_COLLECTION}/good-1/microsoft.graph.sitePage\":\n                assert params == expand_params, f\"Expected $expand params, got {params}\"\n                return good_page_expanded\n            if url == f\"{PAGES_COLLECTION}/bad-1/microsoft.graph.sitePage\":\n                assert params == expand_params, f\"Expected $expand params, got {params}\"\n                raise _make_http_error(\n                    400, GRAPH_INVALID_REQUEST_CODE, \"Invalid request\"\n                )\n            if url == f\"{PAGES_COLLECTION}/page-2/microsoft.graph.sitePage\":\n                assert params == expand_params, f\"Expected $expand params, got {params}\"\n                return {**second_page, \"canvasLayout\": {\"horizontalSections\": []}}\n            raise AssertionError(f\"Unexpected call: {url} {params}\")\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n        pages = list(connector._fetch_site_pages(_site_descriptor()))\n\n        ids = [p[\"id\"] for p in pages]\n        assert ids == [\"good-1\", \"bad-1\", \"page-2\"]\n\n    def test_non_invalid_request_400_still_raises(\n        self, monkeypatch: pytest.MonkeyPatch\n    ) -> None:\n        \"\"\"A 400 with a different error code (not invalidRequest) should\n        propagate, not trigger the fallback.\"\"\"\n        connector = _setup_connector(monkeypatch)\n\n        def fake_get_json(\n            self: SharepointConnector,  # noqa: ARG001\n            url: str,  # noqa: ARG001\n            params: dict[str, str] | None = None,  # noqa: ARG001\n        ) -> dict[str, Any]:\n            raise _make_http_error(400, \"badRequest\", \"Something else went wrong\")\n\n        _patch_graph_api_get_json(monkeypatch, fake_get_json)\n\n        with pytest.raises(HTTPError):\n            list(connector._fetch_site_pages(_site_descriptor()))\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_hierarchy_helpers.py",
    "content": "\"\"\"Unit tests for SharePoint connector hierarchy helper functions.\"\"\"\n\nfrom __future__ import annotations\n\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\n\n\ndef test_extract_folder_path_from_parent_reference_with_folder() -> None:\n    \"\"\"Test extracting folder path when file is in a folder.\"\"\"\n    connector = SharepointConnector()\n\n    # Standard path format: /drives/{drive_id}/root:/folder/path\n    path = \"/drives/b!abc123def456/root:/Engineering/API\"\n    result = connector._extract_folder_path_from_parent_reference(path)\n    assert result == \"Engineering/API\"\n\n\ndef test_extract_folder_path_from_parent_reference_nested_folder() -> None:\n    \"\"\"Test extracting folder path from deeply nested folders.\"\"\"\n    connector = SharepointConnector()\n\n    path = \"/drives/b!xyz789/root:/Documents/Project/2025/Q1\"\n    result = connector._extract_folder_path_from_parent_reference(path)\n    assert result == \"Documents/Project/2025/Q1\"\n\n\ndef test_extract_folder_path_from_parent_reference_at_root() -> None:\n    \"\"\"Test extracting folder path when file is at drive root.\"\"\"\n    connector = SharepointConnector()\n\n    # File at root: path ends with \"root:\" or \"root:/\"\n    path = \"/drives/b!abc123/root:\"\n    result = connector._extract_folder_path_from_parent_reference(path)\n    assert result is None\n\n\ndef test_extract_folder_path_from_parent_reference_at_root_with_slash() -> None:\n    \"\"\"Test extracting folder path when file is at drive root (with trailing slash).\"\"\"\n    connector = SharepointConnector()\n\n    path = \"/drives/b!abc123/root:/\"\n    result = connector._extract_folder_path_from_parent_reference(path)\n    assert result is None\n\n\ndef test_extract_folder_path_from_parent_reference_none() -> None:\n    \"\"\"Test extracting folder path when path is None.\"\"\"\n    connector = SharepointConnector()\n\n    result = connector._extract_folder_path_from_parent_reference(None)\n    assert result is None\n\n\ndef test_extract_folder_path_from_parent_reference_empty() -> None:\n    \"\"\"Test extracting folder path when path is empty.\"\"\"\n    connector = SharepointConnector()\n\n    result = connector._extract_folder_path_from_parent_reference(\"\")\n    assert result is None\n\n\ndef test_extract_folder_path_from_parent_reference_no_root() -> None:\n    \"\"\"Test extracting folder path when path doesn't contain root:/.\"\"\"\n    connector = SharepointConnector()\n\n    # Unusual path format without root:/\n    path = \"/drives/b!abc123/items/folder\"\n    result = connector._extract_folder_path_from_parent_reference(path)\n    assert result is None\n\n\ndef test_build_folder_url_simple() -> None:\n    \"\"\"Test building folder URL with simple folder path.\"\"\"\n    connector = SharepointConnector()\n\n    site_url = \"https://company.sharepoint.com/sites/eng\"\n    drive_name = \"Shared Documents\"\n    folder_path = \"Engineering\"\n\n    result = connector._build_folder_url(site_url, drive_name, folder_path)\n    expected = \"https://company.sharepoint.com/sites/eng/Shared Documents/Engineering\"\n    assert result == expected\n\n\ndef test_build_folder_url_nested() -> None:\n    \"\"\"Test building folder URL with nested folder path.\"\"\"\n    connector = SharepointConnector()\n\n    site_url = \"https://company.sharepoint.com/sites/eng\"\n    drive_name = \"Shared Documents\"\n    folder_path = \"Engineering/API/v2\"\n\n    result = connector._build_folder_url(site_url, drive_name, folder_path)\n    expected = (\n        \"https://company.sharepoint.com/sites/eng/Shared Documents/Engineering/API/v2\"\n    )\n    assert result == expected\n\n\ndef test_build_folder_url_with_spaces() -> None:\n    \"\"\"Test building folder URL with spaces in folder path.\"\"\"\n    connector = SharepointConnector()\n\n    site_url = \"https://company.sharepoint.com/sites/eng\"\n    drive_name = \"Shared Documents\"\n    folder_path = \"Engineering/API Docs/Version 2\"\n\n    result = connector._build_folder_url(site_url, drive_name, folder_path)\n    expected = \"https://company.sharepoint.com/sites/eng/Shared Documents/Engineering/API Docs/Version 2\"\n    assert result == expected\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_rest_client_context_caching.py",
    "content": "\"\"\"Unit tests for SharepointConnector._create_rest_client_context caching.\"\"\"\n\nfrom __future__ import annotations\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.sharepoint.connector import _REST_CTX_MAX_AGE_S\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\n\nSITE_A = \"https://tenant.sharepoint.com/sites/SiteA\"\nSITE_B = \"https://tenant.sharepoint.com/sites/SiteB\"\n\nFAKE_CREDS = {\"sp_client_id\": \"x\", \"sp_directory_id\": \"y\"}\n\n\ndef _make_connector() -> SharepointConnector:\n    \"\"\"Return a SharepointConnector with minimal credentials wired up.\"\"\"\n    connector = SharepointConnector(sites=[SITE_A])\n    connector.msal_app = MagicMock()\n    connector.sp_tenant_domain = \"tenant\"\n    connector._credential_json = FAKE_CREDS\n    return connector\n\n\ndef _noop_load_credentials(connector: SharepointConnector) -> MagicMock:\n    \"\"\"Patch load_credentials to just swap in a fresh MagicMock for msal_app.\"\"\"\n\n    def _fake_load(creds: dict) -> None:  # noqa: ARG001, ARG002\n        connector.msal_app = MagicMock()\n\n    mock = MagicMock(side_effect=_fake_load)\n    connector.load_credentials = mock  # type: ignore[method-assign]\n    return mock\n\n\ndef _fresh_client_context() -> MagicMock:\n    \"\"\"Return a MagicMock for ClientContext that produces a distinct object per call.\"\"\"\n    mock_cls = MagicMock()\n    # Each ClientContext(url).with_access_token(cb) returns a unique sentinel\n    mock_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005\n    return mock_cls\n\n\n@patch(\"onyx.connectors.sharepoint.connector.acquire_token_for_rest\")\n@patch(\"onyx.connectors.sharepoint.connector.ClientContext\")\ndef test_returns_cached_context_within_max_age(\n    mock_client_ctx_cls: MagicMock,\n    _mock_acquire: MagicMock,\n) -> None:\n    \"\"\"Repeated calls with the same site_url within the TTL return the same object.\"\"\"\n    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005\n    connector = _make_connector()\n    _noop_load_credentials(connector)\n\n    ctx1 = connector._create_rest_client_context(SITE_A)\n    ctx2 = connector._create_rest_client_context(SITE_A)\n\n    assert ctx1 is ctx2\n    assert mock_client_ctx_cls.call_count == 1\n\n\n@patch(\"onyx.connectors.sharepoint.connector.time\")\n@patch(\"onyx.connectors.sharepoint.connector.acquire_token_for_rest\")\n@patch(\"onyx.connectors.sharepoint.connector.ClientContext\")\ndef test_rebuilds_context_after_max_age(\n    mock_client_ctx_cls: MagicMock,\n    _mock_acquire: MagicMock,\n    mock_time: MagicMock,\n) -> None:\n    \"\"\"After _REST_CTX_MAX_AGE_S the cached context is replaced.\"\"\"\n    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005\n    connector = _make_connector()\n    _noop_load_credentials(connector)\n\n    mock_time.monotonic.return_value = 0.0\n    ctx1 = connector._create_rest_client_context(SITE_A)\n\n    # Just past the boundary — should rebuild\n    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 1\n    ctx2 = connector._create_rest_client_context(SITE_A)\n\n    assert ctx1 is not ctx2\n    assert mock_client_ctx_cls.call_count == 2\n\n\n@patch(\"onyx.connectors.sharepoint.connector.acquire_token_for_rest\")\n@patch(\"onyx.connectors.sharepoint.connector.ClientContext\")\ndef test_rebuilds_context_on_site_change(\n    mock_client_ctx_cls: MagicMock,\n    _mock_acquire: MagicMock,\n) -> None:\n    \"\"\"Switching to a different site_url forces a new context.\"\"\"\n    mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005\n    connector = _make_connector()\n    _noop_load_credentials(connector)\n\n    ctx_a = connector._create_rest_client_context(SITE_A)\n    ctx_b = connector._create_rest_client_context(SITE_B)\n\n    assert ctx_a is not ctx_b\n    assert mock_client_ctx_cls.call_count == 2\n\n\n@patch(\"onyx.connectors.sharepoint.connector.time\")\n@patch(\"onyx.connectors.sharepoint.connector.acquire_token_for_rest\")\n@patch(\"onyx.connectors.sharepoint.connector.ClientContext\")\ndef test_load_credentials_called_on_rebuild(\n    _mock_client_ctx_cls: MagicMock,\n    _mock_acquire: MagicMock,\n    mock_time: MagicMock,\n) -> None:\n    \"\"\"load_credentials is called every time the context is rebuilt.\"\"\"\n    _mock_client_ctx_cls.side_effect = lambda url: MagicMock()  # noqa: ARG005\n    connector = _make_connector()\n    mock_load = _noop_load_credentials(connector)\n\n    # First call — rebuild (no cache yet)\n    mock_time.monotonic.return_value = 0.0\n    connector._create_rest_client_context(SITE_A)\n    assert mock_load.call_count == 1\n\n    # Second call — cache hit, no rebuild\n    mock_time.monotonic.return_value = 100.0\n    connector._create_rest_client_context(SITE_A)\n    assert mock_load.call_count == 1\n\n    # Third call — expired, rebuild\n    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 1\n    connector._create_rest_client_context(SITE_A)\n    assert mock_load.call_count == 2\n\n    # Fourth call — site change, rebuild\n    mock_time.monotonic.return_value = _REST_CTX_MAX_AGE_S + 2\n    connector._create_rest_client_context(SITE_B)\n    assert mock_load.call_count == 3\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/sharepoint/test_url_parsing.py",
    "content": "from __future__ import annotations\n\nfrom onyx.connectors.sharepoint.connector import SharepointConnector\n\n\ndef test_extract_site_and_drive_info_from_share_link() -> None:\n    url = \"https://tenant.sharepoint.com/:f:/r/sites/SampleSite/Shared%20Documents/Sample%20Folder\"\n\n    site_descriptors = SharepointConnector._extract_site_and_drive_info([url])\n\n    assert len(site_descriptors) == 1\n    descriptor = site_descriptors[0]\n    assert descriptor.url == \"https://tenant.sharepoint.com/sites/SampleSite\"\n    assert descriptor.drive_name == \"Shared Documents\"\n    assert descriptor.folder_path == \"Sample Folder\"\n\n\ndef test_extract_site_and_drive_info_standard_url() -> None:\n    url = (\n        \"https://tenant.sharepoint.com/sites/SampleSite/Shared%20Documents/Nested/Path\"\n    )\n\n    site_descriptors = SharepointConnector._extract_site_and_drive_info([url])\n\n    assert len(site_descriptors) == 1\n    descriptor = site_descriptors[0]\n    assert descriptor.url == \"https://tenant.sharepoint.com/sites/SampleSite\"\n    assert descriptor.drive_name == \"Shared Documents\"\n    assert descriptor.folder_path == \"Nested/Path\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/slab/test_slab_validation.py",
    "content": "from unittest.mock import patch\n\nimport pytest\n\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.slab.connector import SlabConnector\n\n\ndef _build_connector(base_url: str = \"https://myteam.slab.com\") -> SlabConnector:\n    connector = SlabConnector(base_url=base_url)\n    connector.load_credentials({\"slab_bot_token\": \"fake-token\"})\n    return connector\n\n\ndef test_validate_rejects_missing_scheme() -> None:\n    connector = _build_connector(base_url=\"myteam.slab.com\")\n    with pytest.raises(ConnectorValidationError, match=\"https://\"):\n        connector.validate_connector_settings()\n\n\n@patch(\"onyx.connectors.slab.connector.get_all_post_ids\", return_value=[\"id1\"])\ndef test_validate_success(mock_get_posts: object) -> None:  # noqa: ARG001\n    connector = _build_connector()\n    connector.validate_connector_settings()\n\n\n@patch(\n    \"onyx.connectors.slab.connector.get_all_post_ids\",\n    side_effect=Exception(\"401 Unauthorized\"),\n)\ndef test_validate_bad_token_raises(\n    mock_get_posts: object,  # noqa: ARG001\n) -> None:  # noqa: ARG001\n    connector = _build_connector()\n    with pytest.raises(ConnectorValidationError, match=\"Failed to fetch posts\"):\n        connector.validate_connector_settings()\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/slack/test_message_filtering.py",
    "content": "import pytest\n\nfrom onyx.connectors.slack.connector import _bot_inclusive_msg_filter\nfrom onyx.connectors.slack.connector import default_msg_filter\nfrom onyx.connectors.slack.connector import SlackConnector\nfrom onyx.connectors.slack.connector import SlackMessageFilterReason\nfrom onyx.connectors.slack.models import MessageType\n\n\n# -- default_msg_filter tests --\n\n\n@pytest.mark.parametrize(\n    \"message,expected_reason\",\n    [\n        # Regular user message: not filtered\n        (\n            {\"text\": \"hello\", \"user\": \"U123\", \"ts\": \"1.0\"},\n            None,\n        ),\n        # Bot message with bot_id: filtered as BOT\n        (\n            {\"text\": \"automated update\", \"bot_id\": \"B123\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.BOT,\n        ),\n        # App message with app_id: filtered as BOT\n        (\n            {\"text\": \"app notification\", \"app_id\": \"A123\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.BOT,\n        ),\n        # Bot message with both bot_id and app_id: filtered as BOT\n        (\n            {\"text\": \"bot+app\", \"bot_id\": \"B1\", \"app_id\": \"A1\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.BOT,\n        ),\n        # DanswerBot Testing is explicitly allowed through\n        (\n            {\n                \"text\": \"danswer test\",\n                \"bot_id\": \"B999\",\n                \"bot_profile\": {\"name\": \"DanswerBot Testing\"},\n                \"ts\": \"1.0\",\n            },\n            None,\n        ),\n        # channel_join subtype: filtered as DISALLOWED\n        (\n            {\"text\": \"joined\", \"subtype\": \"channel_join\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.DISALLOWED,\n        ),\n        # channel_leave subtype: filtered as DISALLOWED\n        (\n            {\"text\": \"left\", \"subtype\": \"channel_leave\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.DISALLOWED,\n        ),\n        # pinned_item subtype: filtered as DISALLOWED\n        (\n            {\"text\": \"pinned\", \"subtype\": \"pinned_item\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.DISALLOWED,\n        ),\n        # Empty subtype: not filtered\n        (\n            {\"text\": \"normal\", \"subtype\": \"\", \"ts\": \"1.0\"},\n            None,\n        ),\n    ],\n    ids=[\n        \"regular_user_message\",\n        \"bot_id_message\",\n        \"app_id_message\",\n        \"bot_and_app_id\",\n        \"danswerbot_testing_allowed\",\n        \"channel_join\",\n        \"channel_leave\",\n        \"pinned_item\",\n        \"empty_subtype\",\n    ],\n)\ndef test_default_msg_filter(\n    message: MessageType,\n    expected_reason: SlackMessageFilterReason | None,\n) -> None:\n    assert default_msg_filter(message) == expected_reason\n\n\n# -- _bot_inclusive_msg_filter tests --\n\n\n@pytest.mark.parametrize(\n    \"message,expected_reason\",\n    [\n        # Regular user message: not filtered\n        (\n            {\"text\": \"hello\", \"user\": \"U123\", \"ts\": \"1.0\"},\n            None,\n        ),\n        # Bot message: NOT filtered (this is the whole point)\n        (\n            {\"text\": \"automated update\", \"bot_id\": \"B123\", \"ts\": \"1.0\"},\n            None,\n        ),\n        # App message: NOT filtered\n        (\n            {\"text\": \"app notification\", \"app_id\": \"A123\", \"ts\": \"1.0\"},\n            None,\n        ),\n        # channel_join subtype: still filtered as DISALLOWED\n        (\n            {\"text\": \"joined\", \"subtype\": \"channel_join\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.DISALLOWED,\n        ),\n        # channel_leave subtype: still filtered as DISALLOWED\n        (\n            {\"text\": \"left\", \"subtype\": \"channel_leave\", \"ts\": \"1.0\"},\n            SlackMessageFilterReason.DISALLOWED,\n        ),\n    ],\n    ids=[\n        \"regular_user_message\",\n        \"bot_message_allowed\",\n        \"app_message_allowed\",\n        \"channel_join_still_filtered\",\n        \"channel_leave_still_filtered\",\n    ],\n)\ndef test_bot_inclusive_msg_filter(\n    message: MessageType,\n    expected_reason: SlackMessageFilterReason | None,\n) -> None:\n    assert _bot_inclusive_msg_filter(message) == expected_reason\n\n\n# -- SlackConnector config tests --\n\n\ndef test_default_filter_when_include_bot_messages_false() -> None:\n    \"\"\"When include_bot_messages is False (default), the default filter is used.\"\"\"\n    connector = SlackConnector(use_redis=False)\n    assert connector.msg_filter_func is default_msg_filter\n\n\ndef test_bot_inclusive_filter_when_include_bot_messages_true() -> None:\n    \"\"\"When include_bot_messages is True, the bot-inclusive filter is used.\"\"\"\n    connector = SlackConnector(include_bot_messages=True, use_redis=False)\n    assert connector.msg_filter_func is _bot_inclusive_msg_filter\n\n\ndef test_include_bot_messages_defaults_to_false() -> None:\n    \"\"\"The include_bot_messages config defaults to False for backward compatibility.\"\"\"\n    connector = SlackConnector(use_redis=False)\n    assert connector.include_bot_messages is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/teams/test_collect_teams.py",
    "content": "\"\"\"Test the OData filtering for MS Teams with special character handling.\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom onyx.connectors.teams.connector import _collect_all_teams\n\n\ndef test_special_characters_in_team_names() -> None:\n    \"\"\"Test that team names with special characters use client-side filtering.\"\"\"\n    mock_graph_client = MagicMock()\n\n    # Mock team with special characters\n    mock_team = MagicMock()\n    mock_team.id = \"test-id\"\n    mock_team.display_name = \"Research & Development (R&D) Team\"\n    mock_team.properties = {}\n\n    # Mock successful responses for client-side filtering\n    mock_team_collection = MagicMock()\n    mock_team_collection.has_next = False\n    mock_team_collection.__iter__ = lambda self: iter([mock_team])  # noqa: ARG005\n\n    mock_get_query = MagicMock()\n    mock_top_query = MagicMock()\n    mock_top_query.execute_query.return_value = mock_team_collection\n    mock_get_query.top.return_value = mock_top_query\n    mock_graph_client.teams.get = MagicMock(return_value=mock_get_query)\n\n    # Test with team name containing special characters (has &, parentheses)\n    # This should use client-side filtering (get().top()) instead of OData filtering\n    result = _collect_all_teams(\n        mock_graph_client, [\"Research & Development (R&D) Team\"]\n    )\n\n    # Verify that get().top() was called for client-side filtering\n    mock_graph_client.teams.get.assert_called()\n    mock_get_query.top.assert_called_with(50)\n\n    # Verify the team was found through client-side filtering\n    assert len(result) == 1\n    assert result[0].display_name == \"Research & Development (R&D) Team\"\n\n\ndef test_single_quote_escaping() -> None:\n    \"\"\"Test that team names with single quotes use OData filtering with proper escaping.\"\"\"\n    mock_graph_client = MagicMock()\n\n    # Mock successful responses\n    mock_team_collection = MagicMock()\n    mock_team_collection.has_next = False\n    mock_team_collection.__iter__ = lambda self: iter([])  # noqa: ARG005\n\n    mock_get_query = MagicMock()\n    mock_filter_query = MagicMock()\n    mock_filter_query.before_execute = MagicMock(return_value=mock_filter_query)\n    mock_filter_query.execute_query.return_value = mock_team_collection\n    mock_get_query.filter.return_value = mock_filter_query\n    mock_graph_client.teams.get = MagicMock(return_value=mock_get_query)\n\n    # Test with a team name containing a single quote (no &, (, ) so uses OData)\n    _collect_all_teams(mock_graph_client, [\"Team's Group\"])\n\n    # Verify OData filter was used (since no special characters)\n    mock_graph_client.teams.get.assert_called()\n    mock_get_query.filter.assert_called_once()\n\n    # Verify the filter: single quote should be escaped to '' for OData syntax\n    filter_arg = mock_get_query.filter.call_args[0][0]\n    expected_filter = \"displayName eq 'Team''s Group'\"\n    assert (\n        filter_arg == expected_filter\n    ), f\"Expected: {expected_filter}, Got: {filter_arg}\"\n\n\ndef test_helper_functions() -> None:\n    \"\"\"Test the helper functions for team name processing.\"\"\"\n    from onyx.connectors.teams.connector import (\n        _escape_odata_string,\n        _has_odata_incompatible_chars,\n        _can_use_odata_filter,\n    )\n\n    # Test OData string escaping\n    assert _escape_odata_string(\"Team's Group\") == \"Team''s Group\"\n    assert _escape_odata_string(\"Normal Team\") == \"Normal Team\"\n\n    # Test special character detection\n    assert _has_odata_incompatible_chars([\"R&D Team\"])\n    assert _has_odata_incompatible_chars([\"Team (Alpha)\"])\n    assert not _has_odata_incompatible_chars([\"Normal Team\"])\n    assert not _has_odata_incompatible_chars([])\n    assert not _has_odata_incompatible_chars(None)\n\n    # Test filtering strategy determination\n    can_use, safe, problematic = _can_use_odata_filter([\"Normal Team\", \"R&D Team\"])\n    assert can_use\n    assert \"Normal Team\" in safe\n    assert \"R&D Team\" in problematic\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/test_connector_factory.py",
    "content": "\"\"\"\nUnit tests for lazy loading connector factory to validate:\n1. All connector mappings are correct\n2. Module paths and class names are valid\n3. Error handling works properly\n4. Caching functions correctly\n\"\"\"\n\nimport importlib\nfrom unittest.mock import MagicMock\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.factory import _connector_cache\nfrom onyx.connectors.factory import _load_connector_class\nfrom onyx.connectors.factory import ConnectorMissingException\nfrom onyx.connectors.factory import identify_connector_class\nfrom onyx.connectors.factory import instantiate_connector\nfrom onyx.connectors.interfaces import BaseConnector\nfrom onyx.connectors.models import InputType\nfrom onyx.connectors.registry import CONNECTOR_CLASS_MAP\nfrom onyx.connectors.registry import ConnectorMapping\n\n\nclass TestConnectorMappingValidation:\n    \"\"\"Test that all connector mappings are valid.\"\"\"\n\n    def test_all_connector_mappings_exist(self) -> None:\n        \"\"\"Test that all mapped modules and classes actually exist.\"\"\"\n        errors = []\n\n        for source, mapping in CONNECTOR_CLASS_MAP.items():\n            try:\n                # Try to import the module\n                module = importlib.import_module(mapping.module_path)\n\n                # Try to get the class\n                connector_class = getattr(module, mapping.class_name)\n\n                # Verify it's a subclass of BaseConnector\n                if not issubclass(connector_class, BaseConnector):\n                    errors.append(\n                        f\"{source.value}: {mapping.class_name} is not a BaseConnector subclass\"\n                    )\n\n            except ImportError as e:\n                errors.append(\n                    f\"{source.value}: Failed to import {mapping.module_path} - {e}\"\n                )\n            except AttributeError as e:\n                errors.append(\n                    f\"{source.value}: Class {mapping.class_name} not found in {mapping.module_path} - {e}\"\n                )\n\n        if errors:\n            pytest.fail(\"Connector mapping validation failed:\\n\" + \"\\n\".join(errors))\n\n    def test_no_duplicate_mappings(self) -> None:\n        \"\"\"Test that each DocumentSource only appears once in the mapping.\"\"\"\n        sources = list(CONNECTOR_CLASS_MAP.keys())\n        unique_sources = set(sources)\n\n        assert len(sources) == len(\n            unique_sources\n        ), \"Duplicate DocumentSource entries found\"\n\n    def test_blob_storage_connectors_correct(self) -> None:\n        \"\"\"Test that all blob storage sources map to the same connector.\"\"\"\n        blob_sources = [\n            DocumentSource.S3,\n            DocumentSource.R2,\n            DocumentSource.GOOGLE_CLOUD_STORAGE,\n            DocumentSource.OCI_STORAGE,\n        ]\n\n        expected_mapping = ConnectorMapping(\n            module_path=\"onyx.connectors.blob.connector\",\n            class_name=\"BlobStorageConnector\",\n        )\n\n        for source in blob_sources:\n            assert (\n                CONNECTOR_CLASS_MAP[source] == expected_mapping\n            ), f\"{source.value} should map to BlobStorageConnector\"\n\n\nclass TestConnectorClassLoading:\n    \"\"\"Test the lazy loading mechanism.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Clear cache before each test.\"\"\"\n        _connector_cache.clear()\n\n    def test_load_connector_class_success(self) -> None:\n        \"\"\"Test successful connector class loading.\"\"\"\n        # Use a simple connector that should always exist\n        connector_class = _load_connector_class(DocumentSource.WEB)\n\n        assert connector_class is not None\n        assert issubclass(connector_class, BaseConnector)\n        assert connector_class.__name__ == \"WebConnector\"\n\n    def test_load_connector_class_caching(self) -> None:\n        \"\"\"Test that connector classes are cached after first load.\"\"\"\n        assert len(_connector_cache) == 0\n\n        # Load connector first time\n        connector_class1 = _load_connector_class(DocumentSource.WEB)\n        assert len(_connector_cache) == 1\n        assert DocumentSource.WEB in _connector_cache\n\n        # Load same connector second time - should use cache\n        connector_class2 = _load_connector_class(DocumentSource.WEB)\n        assert connector_class1 is connector_class2  # Same object reference\n        assert len(_connector_cache) == 1  # Cache size unchanged\n\n    @patch(\"importlib.import_module\")\n    def test_load_connector_class_import_error(self, mock_import: Mock) -> None:\n        \"\"\"Test handling of import errors.\"\"\"\n        mock_import.side_effect = ImportError(\"Module not found\")\n\n        with pytest.raises(ConnectorMissingException) as exc_info:\n            _load_connector_class(DocumentSource.WEB)\n\n        assert (\n            \"Failed to import WebConnector from onyx.connectors.web.connector\"\n            in str(exc_info.value)\n        )\n\n    @patch(\"importlib.import_module\")\n    def test_load_connector_class_attribute_error(self, mock_import: Mock) -> None:\n        \"\"\"Test handling of missing class in module.\"\"\"\n\n        # Create a custom mock that raises AttributeError for the specific class\n        class MockModule:\n            def __getattr__(self, name: str) -> MagicMock:\n                if name == \"WebConnector\":\n                    raise AttributeError(\"Class not found\")\n                return MagicMock()\n\n        mock_import.return_value = MockModule()\n\n        with pytest.raises(ConnectorMissingException) as exc_info:\n            _load_connector_class(DocumentSource.WEB)\n\n        assert (\n            \"Failed to import WebConnector from onyx.connectors.web.connector\"\n            in str(exc_info.value)\n        )\n\n\nclass TestIdentifyConnectorClass:\n    \"\"\"Test the identify_connector_class function.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Clear cache before each test.\"\"\"\n        _connector_cache.clear()\n\n    def test_identify_connector_basic(self) -> None:\n        \"\"\"Test basic connector identification.\"\"\"\n        connector_class = identify_connector_class(\n            DocumentSource.GITHUB, InputType.SLIM_RETRIEVAL\n        )\n\n        assert connector_class is not None\n        assert issubclass(connector_class, BaseConnector)\n        assert connector_class.__name__ == \"GithubConnector\"\n\n    def test_identify_connector_slack_special_case(self) -> None:\n        \"\"\"Test Slack connector special handling.\"\"\"\n        # Test POLL input type\n        slack_poll = identify_connector_class(DocumentSource.SLACK, InputType.POLL)\n        assert slack_poll.__name__ == \"SlackConnector\"\n\n        # Test SLIM_RETRIEVAL input type\n        slack_slim = identify_connector_class(\n            DocumentSource.SLACK, InputType.SLIM_RETRIEVAL\n        )\n        assert slack_slim.__name__ == \"SlackConnector\"\n\n        # Should be the same class\n        assert slack_poll is slack_slim\n\n    def test_identify_connector_without_input_type(self) -> None:\n        \"\"\"Test connector identification without specifying input type.\"\"\"\n        connector_class = identify_connector_class(DocumentSource.GITHUB)\n\n        assert connector_class is not None\n        assert connector_class.__name__ == \"GithubConnector\"\n\n\nclass TestConnectorMappingIntegrity:\n    \"\"\"Test integrity of the connector mapping data.\"\"\"\n\n    def test_all_document_sources_mapped(self) -> None:\n        \"\"\"Test that all DocumentSource values have mappings (where expected).\"\"\"\n        # Get all DocumentSource enum values\n        all_sources = set(DocumentSource)\n        mapped_sources = set(CONNECTOR_CLASS_MAP.keys())\n\n        expected_unmapped = {\n            DocumentSource.INGESTION_API,  # This is handled differently\n            DocumentSource.REQUESTTRACKER,  # Not yet implemented or special case\n            DocumentSource.NOT_APPLICABLE,  # Special placeholder, no connector needed\n            DocumentSource.USER_FILE,  # Special placeholder, no connector needed\n            DocumentSource.CRAFT_FILE,  # Direct S3 upload via API, no connector needed\n            # Add other legitimately unmapped sources here if they exist\n        }\n\n        unmapped_sources = all_sources - mapped_sources - expected_unmapped\n\n        if unmapped_sources:\n            pytest.fail(\n                f\"DocumentSource values without connector mappings: {[s.value for s in unmapped_sources]}\"\n            )\n\n    def test_mapping_format_consistency(self) -> None:\n        \"\"\"Test that all mappings follow the expected format.\"\"\"\n        for source, mapping in CONNECTOR_CLASS_MAP.items():\n            assert isinstance(\n                mapping, ConnectorMapping\n            ), f\"{source.value} mapping is not a ConnectorMapping\"\n\n            assert isinstance(\n                mapping.module_path, str\n            ), f\"{source.value} module_path is not a string\"\n            assert isinstance(\n                mapping.class_name, str\n            ), f\"{source.value} class_name is not a string\"\n            assert mapping.module_path.startswith(\n                \"onyx.connectors.\"\n            ), f\"{source.value} module_path doesn't start with onyx.connectors.\"\n            assert mapping.class_name.endswith(\n                \"Connector\"\n            ), f\"{source.value} class_name doesn't end with Connector\"\n\n\nclass TestInstantiateConnectorIntegration:\n    \"\"\"Test that the lazy loading works with the main instantiate_connector function.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Clear cache before each test.\"\"\"\n        _connector_cache.clear()\n\n    def test_instantiate_connector_loads_class_lazily(self) -> None:\n        \"\"\"Test that instantiate_connector triggers lazy loading.\"\"\"\n        from onyx.utils.sensitive import make_mock_sensitive_value\n\n        # Mock the database session and credential\n        mock_session = MagicMock()\n        mock_credential = MagicMock()\n        mock_credential.id = 123\n        mock_credential.credential_json = make_mock_sensitive_value({\"test\": \"data\"})\n\n        # This should trigger lazy loading but will fail on actual instantiation\n        # due to missing real configuration - that's expected\n        with pytest.raises(Exception):  # We expect some kind of error due to mock data\n            instantiate_connector(\n                mock_session,\n                DocumentSource.WEB,  # Simple connector\n                InputType.SLIM_RETRIEVAL,\n                {},  # Empty config\n                mock_credential,\n            )\n\n        # But the class should have been loaded into cache\n        assert DocumentSource.WEB in _connector_cache\n        assert _connector_cache[DocumentSource.WEB].__name__ == \"WebConnector\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/test_document_metadata_coercion.py",
    "content": "from onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentBase\nfrom onyx.connectors.models import TextSection\n\n\ndef _minimal_doc_kwargs(metadata: dict) -> dict:\n    return {\n        \"id\": \"test-doc\",\n        \"sections\": [TextSection(text=\"hello\", link=\"http://example.com\")],\n        \"source\": DocumentSource.NOT_APPLICABLE,\n        \"semantic_identifier\": \"Test Doc\",\n        \"metadata\": metadata,\n    }\n\n\ndef test_int_values_coerced_to_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"count\": 42}))\n    assert doc.metadata == {\"count\": \"42\"}\n\n\ndef test_float_values_coerced_to_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"score\": 3.14}))\n    assert doc.metadata == {\"score\": \"3.14\"}\n\n\ndef test_bool_values_coerced_to_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"active\": True}))\n    assert doc.metadata == {\"active\": \"True\"}\n\n\ndef test_list_of_ints_coerced_to_list_of_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"ids\": [1, 2, 3]}))\n    assert doc.metadata == {\"ids\": [\"1\", \"2\", \"3\"]}\n\n\ndef test_list_of_mixed_types_coerced_to_list_of_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"tags\": [\"a\", 1, True, 2.5]}))\n    assert doc.metadata == {\"tags\": [\"a\", \"1\", \"True\", \"2.5\"]}\n\n\ndef test_list_of_dicts_coerced_to_list_of_str() -> None:\n    raw = {\"nested\": [{\"key\": \"val\"}, {\"key2\": \"val2\"}]}\n    doc = Document(**_minimal_doc_kwargs(raw))\n    assert doc.metadata == {\"nested\": [\"{'key': 'val'}\", \"{'key2': 'val2'}\"]}\n\n\ndef test_dict_value_coerced_to_str() -> None:\n    raw = {\"info\": {\"inner_key\": \"inner_val\"}}\n    doc = Document(**_minimal_doc_kwargs(raw))\n    assert doc.metadata == {\"info\": \"{'inner_key': 'inner_val'}\"}\n\n\ndef test_none_value_coerced_to_str() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"empty\": None}))\n    assert doc.metadata == {\"empty\": \"None\"}\n\n\ndef test_already_valid_str_values_unchanged() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"key\": \"value\"}))\n    assert doc.metadata == {\"key\": \"value\"}\n\n\ndef test_already_valid_list_of_str_unchanged() -> None:\n    doc = Document(**_minimal_doc_kwargs({\"tags\": [\"a\", \"b\", \"c\"]}))\n    assert doc.metadata == {\"tags\": [\"a\", \"b\", \"c\"]}\n\n\ndef test_empty_metadata_unchanged() -> None:\n    doc = Document(**_minimal_doc_kwargs({}))\n    assert doc.metadata == {}\n\n\ndef test_mixed_metadata_values() -> None:\n    raw = {\n        \"str_val\": \"hello\",\n        \"int_val\": 99,\n        \"list_val\": [1, \"two\", 3.0],\n        \"dict_val\": {\"nested\": True},\n    }\n    doc = Document(**_minimal_doc_kwargs(raw))\n    assert doc.metadata == {\n        \"str_val\": \"hello\",\n        \"int_val\": \"99\",\n        \"list_val\": [\"1\", \"two\", \"3.0\"],\n        \"dict_val\": \"{'nested': True}\",\n    }\n\n\ndef test_coercion_works_on_base_class() -> None:\n    kwargs = _minimal_doc_kwargs({\"count\": 42})\n    kwargs.pop(\"source\")\n    kwargs.pop(\"id\")\n    doc = DocumentBase(**kwargs)\n    assert doc.metadata == {\"count\": \"42\"}\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/test_microsoft_graph_env.py",
    "content": "import pytest\nfrom office365.graph_client import AzureEnvironment  # type: ignore[import-untyped]\n\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.microsoft_graph_env import resolve_microsoft_environment\n\n\ndef test_resolve_global_defaults() -> None:\n    env = resolve_microsoft_environment(\n        \"https://graph.microsoft.com\", \"https://login.microsoftonline.com\"\n    )\n    assert env.environment == AzureEnvironment.Global\n    assert env.sharepoint_domain_suffix == \"sharepoint.com\"\n\n\ndef test_resolve_gcc_high() -> None:\n    env = resolve_microsoft_environment(\n        \"https://graph.microsoft.us\", \"https://login.microsoftonline.us\"\n    )\n    assert env.environment == AzureEnvironment.USGovernmentHigh\n    assert env.graph_host == \"https://graph.microsoft.us\"\n    assert env.authority_host == \"https://login.microsoftonline.us\"\n    assert env.sharepoint_domain_suffix == \"sharepoint.us\"\n\n\ndef test_resolve_dod() -> None:\n    env = resolve_microsoft_environment(\n        \"https://dod-graph.microsoft.us\", \"https://login.microsoftonline.us\"\n    )\n    assert env.environment == AzureEnvironment.USGovernmentDoD\n    assert env.sharepoint_domain_suffix == \"sharepoint.us\"\n\n\ndef test_trailing_slashes_are_stripped() -> None:\n    env = resolve_microsoft_environment(\n        \"https://graph.microsoft.us/\", \"https://login.microsoftonline.us/\"\n    )\n    assert env.environment == AzureEnvironment.USGovernmentHigh\n\n\ndef test_mismatched_authority_raises() -> None:\n    with pytest.raises(ConnectorValidationError, match=\"inconsistent\"):\n        resolve_microsoft_environment(\n            \"https://graph.microsoft.us\", \"https://login.microsoftonline.com\"\n        )\n\n\ndef test_unknown_graph_host_raises() -> None:\n    with pytest.raises(ConnectorValidationError, match=\"Unsupported\"):\n        resolve_microsoft_environment(\n            \"https://graph.example.com\", \"https://login.example.com\"\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/utils.py",
    "content": "from typing import cast\nfrom typing import Generic\nfrom typing import TypeVar\n\nfrom pydantic import BaseModel\n\nfrom onyx.connectors.connector_runner import CheckpointOutputWrapper\nfrom onyx.connectors.interfaces import CheckpointedConnector\nfrom onyx.connectors.interfaces import SecondsSinceUnixEpoch\nfrom onyx.connectors.models import ConnectorCheckpoint\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\n\n_ITERATION_LIMIT = 100_000\n\n\nCT = TypeVar(\"CT\", bound=ConnectorCheckpoint)\n\n\nclass SingleConnectorCallOutput(BaseModel, Generic[CT]):\n    items: list[Document | ConnectorFailure]\n    next_checkpoint: CT\n\n\ndef load_everything_from_checkpoint_connector(\n    connector: CheckpointedConnector[CT],\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n) -> list[SingleConnectorCallOutput[CT]]:\n\n    checkpoint = cast(CT, connector.build_dummy_checkpoint())\n    return load_everything_from_checkpoint_connector_from_checkpoint(\n        connector, start, end, checkpoint\n    )\n\n\ndef load_everything_from_checkpoint_connector_from_checkpoint(\n    connector: CheckpointedConnector[CT],\n    start: SecondsSinceUnixEpoch,\n    end: SecondsSinceUnixEpoch,\n    checkpoint: CT,\n) -> list[SingleConnectorCallOutput[CT]]:\n    num_iterations = 0\n    outputs: list[SingleConnectorCallOutput[CT]] = []\n    while checkpoint.has_more:\n        items: list[Document | ConnectorFailure] = []\n        doc_batch_generator = CheckpointOutputWrapper[CT]()(\n            connector.load_from_checkpoint(start, end, checkpoint)\n        )\n        for document, hierarchy_node, failure, next_checkpoint in doc_batch_generator:\n            if hierarchy_node is not None:\n                continue\n            if failure is not None:\n                items.append(failure)\n            if document is not None:\n                items.append(document)\n            if next_checkpoint is not None:\n                checkpoint = next_checkpoint\n\n        outputs.append(\n            SingleConnectorCallOutput(items=items, next_checkpoint=checkpoint)\n        )\n\n        num_iterations += 1\n        if num_iterations > _ITERATION_LIMIT:\n            raise RuntimeError(\"Too many iterations. Infinite loop?\")\n\n    return outputs\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/zendesk/test_zendesk_checkpointing.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom collections.abc import Generator\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import call\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom requests.exceptions import HTTPError\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.exceptions import ConnectorValidationError\nfrom onyx.connectors.exceptions import CredentialExpiredError\nfrom onyx.connectors.exceptions import InsufficientPermissionsError\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.zendesk.connector import ZendeskClient\nfrom onyx.connectors.zendesk.connector import ZendeskConnector\nfrom tests.unit.onyx.connectors.utils import load_everything_from_checkpoint_connector\n\n\n@pytest.fixture\ndef mock_zendesk_client() -> MagicMock:\n    \"\"\"Create a mock Zendesk client\"\"\"\n    mock = MagicMock(spec=ZendeskClient)\n    mock.base_url = \"https://test.zendesk.com/api/v2\"\n    mock.auth = (\"test@example.com/token\", \"test_token\")\n    mock.make_request = MagicMock()\n    return mock\n\n\n@pytest.fixture\ndef zendesk_connector(\n    mock_zendesk_client: MagicMock,\n) -> Generator[ZendeskConnector, None, None]:\n    \"\"\"Create a Zendesk connector with mocked client\"\"\"\n    connector = ZendeskConnector(content_type=\"articles\")\n    connector.client = mock_zendesk_client\n    yield connector\n\n\n@pytest.fixture\ndef unmocked_zendesk_connector() -> Generator[ZendeskConnector, None, None]:\n    \"\"\"Create a Zendesk connector with unmocked client\"\"\"\n    zendesk_connector = ZendeskConnector(content_type=\"articles\")\n    zendesk_connector.client = ZendeskClient(\n        \"test\", \"test@example.com/token\", \"test_token\"\n    )\n    yield zendesk_connector\n\n\n@pytest.fixture\ndef create_mock_article() -> Callable[..., dict[str, Any]]:\n    def _create_mock_article(\n        id: int = 1,\n        title: str = \"Test Article\",\n        body: str = \"Test Content\",\n        updated_at: str = \"2023-01-01T12:00:00Z\",\n        author_id: str = \"123\",\n        label_names: list[str] | None = None,\n        draft: bool = False,\n    ) -> dict[str, Any]:\n        \"\"\"Helper to create a mock article\"\"\"\n        return {\n            \"id\": id,\n            \"title\": title,\n            \"body\": body,\n            \"updated_at\": updated_at,\n            \"author_id\": author_id,\n            \"label_names\": label_names or [],\n            \"draft\": draft,\n            \"html_url\": f\"https://test.zendesk.com/hc/en-us/articles/{id}\",\n        }\n\n    return _create_mock_article\n\n\n@pytest.fixture\ndef create_mock_ticket() -> Callable[..., dict[str, Any]]:\n    def _create_mock_ticket(\n        id: int = 1,\n        subject: str = \"Test Ticket\",\n        description: str = \"Test Description\",\n        updated_at: str = \"2023-01-01T12:00:00Z\",\n        submitter_id: str = \"123\",\n        status: str = \"open\",\n        priority: str = \"normal\",\n        tags: list[str] | None = None,\n        ticket_type: str = \"question\",\n    ) -> dict[str, Any]:\n        \"\"\"Helper to create a mock ticket\"\"\"\n        return {\n            \"id\": id,\n            \"subject\": subject,\n            \"description\": description,\n            \"updated_at\": updated_at,\n            \"submitter\": submitter_id,\n            \"status\": status,\n            \"priority\": priority,\n            \"tags\": tags or [],\n            \"type\": ticket_type,\n            \"url\": f\"https://test.zendesk.com/agent/tickets/{id}\",\n        }\n\n    return _create_mock_ticket\n\n\n@pytest.fixture\ndef create_mock_author() -> Callable[..., dict[str, Any]]:\n    def _create_mock_author(\n        id: str = \"123\",\n        name: str = \"Test User\",\n        email: str = \"test@example.com\",\n    ) -> dict[str, Any]:\n        \"\"\"Helper to create a mock author\"\"\"\n        return {\n            \"user\": {\n                \"id\": id,\n                \"name\": name,\n                \"email\": email,\n            }\n        }\n\n    return _create_mock_author\n\n\ndef test_load_from_checkpoint_articles_happy_path(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n    create_mock_article: Callable[..., dict[str, Any]],\n    create_mock_author: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading articles from checkpoint - happy path\"\"\"\n    # Set up mock responses\n    mock_article1 = create_mock_article(id=1, title=\"Article 1\")\n    mock_article2 = create_mock_article(id=2, title=\"Article 2\")\n    mock_author = create_mock_author()\n\n    # Mock API responses\n    mock_zendesk_client.make_request.side_effect = [\n        # First call: content tags\n        {\"records\": []},\n        # Second call: articles page\n        {\n            \"articles\": [mock_article1, mock_article2],\n            \"meta\": {\n                \"has_more\": False,\n                \"after_cursor\": None,\n            },\n        },\n        # Third call: author info\n        mock_author,\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)\n\n    # Check that we got the documents\n    assert len(outputs) == 2\n    assert outputs[0].next_checkpoint.cached_content_tags is not None\n\n    assert len(outputs[1].items) == 2\n\n    # Check first document\n    doc1 = outputs[1].items[0]\n    assert isinstance(doc1, Document)\n    assert doc1.id == \"article:1\"\n    assert doc1.semantic_identifier == \"Article 1\"\n    assert doc1.source == DocumentSource.ZENDESK\n\n    # Check second document\n    doc2 = outputs[1].items[1]\n    assert isinstance(doc2, Document)\n    assert doc2.id == \"article:2\"\n    assert doc2.semantic_identifier == \"Article 2\"\n    assert doc2.source == DocumentSource.ZENDESK\n\n    # Check checkpoint state\n    assert not outputs[1].next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_tickets_happy_path(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n    create_mock_ticket: Callable[..., dict[str, Any]],\n    create_mock_author: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading tickets from checkpoint - happy path\"\"\"\n    # Configure connector for tickets\n    zendesk_connector.content_type = \"tickets\"\n\n    # Set up mock responses\n    mock_ticket1 = create_mock_ticket(id=1, subject=\"Ticket 1\")\n    mock_ticket2 = create_mock_ticket(id=2, subject=\"Ticket 2\")\n    mock_author = create_mock_author()\n\n    # Mock API responses\n    mock_zendesk_client.make_request.side_effect = [\n        # First call: content tags\n        {\"records\": []},\n        # Second call: tickets page\n        {\n            \"tickets\": [mock_ticket1, mock_ticket2],\n            \"end_of_stream\": True,\n            \"end_time\": int(time.time()),\n        },\n        # Third call: author info\n        mock_author,\n        # Fourth call: comments page\n        {\"comments\": []},\n        # Fifth call: comments page\n        {\"comments\": []},\n    ]\n\n    zendesk_connector.client = mock_zendesk_client\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)\n\n    # Check that we got the documents\n    assert len(outputs) == 2\n    assert outputs[0].next_checkpoint.cached_content_tags is not None\n    assert len(outputs[1].items) == 2\n\n    # Check first document\n    doc1 = outputs[1].items[0]\n    print(doc1, type(doc1))\n    assert isinstance(doc1, Document)\n    assert doc1.id == \"zendesk_ticket_1\"\n    assert doc1.semantic_identifier == \"Ticket #1: Ticket 1\"\n    assert doc1.source == DocumentSource.ZENDESK\n\n    # Check second document\n    doc2 = outputs[1].items[1]\n    assert isinstance(doc2, Document)\n    assert doc2.id == \"zendesk_ticket_2\"\n    assert doc2.semantic_identifier == \"Ticket #2: Ticket 2\"\n    assert doc2.source == DocumentSource.ZENDESK\n\n    # Check checkpoint state\n    assert not outputs[1].next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_with_rate_limit(\n    unmocked_zendesk_connector: ZendeskConnector,\n    create_mock_article: Callable[..., dict[str, Any]],\n    create_mock_author: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading from checkpoint with rate limit handling\"\"\"\n    zendesk_connector = unmocked_zendesk_connector\n    # Set up mock responses\n    mock_article = create_mock_article()\n    mock_author = create_mock_author()\n    author_response = MagicMock()\n    author_response.status_code = 200\n    author_response.json.return_value = mock_author\n\n    # Create mock responses for requests.get\n    rate_limit_response = MagicMock()\n    rate_limit_response.status_code = 429\n    rate_limit_response.headers = {\"Retry-After\": \"60\"}\n    rate_limit_response.raise_for_status.side_effect = HTTPError(\n        response=rate_limit_response\n    )\n\n    success_response = MagicMock()\n    success_response.status_code = 200\n    success_response.json.return_value = {\n        \"articles\": [mock_article],\n        \"meta\": {\n            \"has_more\": False,\n            \"after_cursor\": None,\n        },\n    }\n\n    # Mock requests.get to simulate rate limit then success\n    with patch(\"onyx.connectors.zendesk.connector.requests.get\") as mock_get:\n        mock_get.side_effect = [\n            # First call: content tags\n            MagicMock(\n                status_code=200,\n                json=lambda: {\"records\": [], \"meta\": {\"has_more\": False}},\n            ),\n            # Second call: articles page (rate limited)\n            rate_limit_response,\n            # Third call: articles page (after rate limit)\n            success_response,\n            # Fourth call: author info\n            author_response,\n        ]\n\n        # Call load_from_checkpoint\n        end_time = time.time()\n        with patch(\"onyx.connectors.zendesk.connector.time.sleep\") as mock_sleep:\n            outputs = load_everything_from_checkpoint_connector(\n                zendesk_connector, 0, end_time\n            )\n            mock_sleep.assert_has_calls([call(60), call(0.1)])\n\n        # Check that we got the document after rate limit was handled\n        assert len(outputs) == 2\n        assert outputs[0].next_checkpoint.cached_content_tags is not None\n        assert len(outputs[1].items) == 1\n        assert isinstance(outputs[1].items[0], Document)\n        assert outputs[1].items[0].id == \"article:1\"\n\n        # Verify the requests were made with correct parameters\n        assert mock_get.call_count == 4\n        # First call should be for content tags\n        args, kwargs = mock_get.call_args_list[0]\n        assert \"guide/content_tags\" in args[0]\n        # Second call should be for articles (rate limited)\n        args, kwargs = mock_get.call_args_list[1]\n        assert \"help_center/articles\" in args[0]\n        # Third call should be for articles (success)\n        args, kwargs = mock_get.call_args_list[2]\n        assert \"help_center/articles\" in args[0]\n        # Fourth call should be for author info\n        args, kwargs = mock_get.call_args_list[3]\n        assert \"users/123\" in args[0]\n\n\ndef test_load_from_checkpoint_with_empty_response(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n) -> None:\n    \"\"\"Test loading from checkpoint with empty response\"\"\"\n    # Mock API responses\n    mock_zendesk_client.make_request.side_effect = [\n        # First call: content tags\n        {\"records\": []},\n        # Second call: empty articles page\n        {\n            \"articles\": [],\n            \"meta\": {\n                \"has_more\": False,\n                \"after_cursor\": None,\n            },\n        },\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)\n\n    # Check that we got no documents\n    assert len(outputs) == 2\n    assert outputs[0].next_checkpoint.cached_content_tags is not None\n    assert len(outputs[1].items) == 0\n    assert not outputs[1].next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_with_skipped_article(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n    create_mock_article: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading from checkpoint with an article that should be skipped\"\"\"\n    # Set up mock responses with a draft article\n    mock_article = create_mock_article(draft=True)\n    mock_zendesk_client.make_request.side_effect = [\n        # First call: content tags\n        {\"records\": []},\n        # Second call: articles page with draft article\n        {\n            \"articles\": [mock_article],\n            \"meta\": {\n                \"has_more\": False,\n                \"after_cursor\": None,\n            },\n        },\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)\n\n    # Check that no documents were returned\n    assert len(outputs) == 2\n    assert outputs[0].next_checkpoint.cached_content_tags is not None\n    assert len(outputs[1].items) == 0\n    assert not outputs[1].next_checkpoint.has_more\n\n\ndef test_load_from_checkpoint_with_skipped_ticket(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n    create_mock_ticket: Callable[..., dict[str, Any]],\n) -> None:\n    \"\"\"Test loading from checkpoint with a deleted ticket\"\"\"\n    # Configure connector for tickets\n    zendesk_connector.content_type = \"tickets\"\n\n    # Set up mock responses with a deleted ticket\n    mock_ticket = create_mock_ticket(status=\"deleted\")\n    mock_zendesk_client.make_request.side_effect = [\n        # First call: content tags\n        {\"records\": []},\n        # Second call: tickets page with deleted ticket\n        {\n            \"tickets\": [mock_ticket],\n            \"end_of_stream\": True,\n            \"end_time\": int(time.time()),\n        },\n    ]\n\n    # Call load_from_checkpoint\n    end_time = time.time()\n    outputs = load_everything_from_checkpoint_connector(zendesk_connector, 0, end_time)\n\n    # Check that no documents were returned\n    assert len(outputs) == 2\n    assert outputs[0].next_checkpoint.cached_content_tags is not None\n    assert len(outputs[1].items) == 0\n    assert not outputs[1].next_checkpoint.has_more\n\n\n@pytest.mark.parametrize(\n    \"status_code,expected_exception,expected_message\",\n    [\n        (\n            401,\n            CredentialExpiredError,\n            \"Your Zendesk credentials appear to be invalid or expired\",\n        ),\n        (\n            403,\n            InsufficientPermissionsError,\n            \"Your Zendesk token does not have sufficient permissions\",\n        ),\n        (\n            404,\n            ConnectorValidationError,\n            \"Zendesk resource not found\",\n        ),\n    ],\n)\ndef test_validate_connector_settings_errors(\n    zendesk_connector: ZendeskConnector,\n    status_code: int,\n    expected_exception: type[Exception],\n    expected_message: str,\n) -> None:\n    \"\"\"Test validation with various error scenarios\"\"\"\n    mock_response = MagicMock()\n    mock_response.status_code = status_code\n    error = HTTPError(response=mock_response)\n\n    mock_zendesk_client = cast(MagicMock, zendesk_connector.client)\n    mock_zendesk_client.make_request.side_effect = error\n\n    with pytest.raises(expected_exception) as excinfo:\n        print(\"excinfo\", excinfo)\n        zendesk_connector.validate_connector_settings()\n\n    assert expected_message in str(excinfo.value)\n\n\ndef test_validate_connector_settings_success(\n    zendesk_connector: ZendeskConnector,\n    mock_zendesk_client: MagicMock,\n) -> None:\n    \"\"\"Test successful validation\"\"\"\n    # Mock successful API response\n    mock_zendesk_client.make_request.return_value = {\n        \"articles\": [],\n        \"meta\": {\"has_more\": False},\n    }\n\n    zendesk_connector.validate_connector_settings()\n"
  },
  {
    "path": "backend/tests/unit/onyx/connectors/zendesk/test_zendesk_rate_limit.py",
    "content": "from __future__ import annotations\n\nimport types\nfrom typing import Any\nfrom typing import Dict\n\nimport pytest\n\n\nclass _FakeTime:\n    \"\"\"A controllable time module replacement.\n\n    - monotonic(): returns an internal counter (seconds)\n    - sleep(x): advances the internal counter by x seconds\n    \"\"\"\n\n    def __init__(self) -> None:\n        self._t = 0.0\n\n    def monotonic(self) -> float:\n        return self._t\n\n    def sleep(self, seconds: float) -> None:\n        # advance time without real waiting\n        self._t += float(seconds)\n\n\nclass _FakeResponse:\n    def __init__(self, json_payload: Dict[str, Any], status_code: int = 200) -> None:\n        self._json = json_payload\n        self.status_code = status_code\n        self.headers: Dict[str, str] = {}\n\n    def json(self) -> Dict[str, Any]:\n        return self._json\n\n    def raise_for_status(self) -> None:\n        # simulate OK\n        return None\n\n\ndef test_zendesk_client_per_minute_rate_limiting(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    # Import here to allow monkeypatching modules safely\n    from onyx.connectors.zendesk.connector import ZendeskClient\n    import onyx.connectors.cross_connector_utils.rate_limit_wrapper as rlw\n    import onyx.connectors.zendesk.connector as zendesk_mod\n\n    fake_time = _FakeTime()\n\n    # Patch time in both the rate limit wrapper and the zendesk connector module\n    monkeypatch.setattr(rlw, \"time\", fake_time, raising=True)\n    monkeypatch.setattr(zendesk_mod, \"time\", fake_time, raising=True)\n\n    # Stub out requests.get to avoid network and return a minimal valid payload\n    calls: list[str] = []\n\n    def _fake_get(\n        url: str,\n        auth: Any,  # noqa: ARG001\n        params: Dict[str, Any],  # noqa: ARG001\n    ) -> _FakeResponse:\n        calls.append(url)\n        # minimal Zendesk list response (articles path)\n        return _FakeResponse({\"articles\": [], \"meta\": {\"has_more\": False}})\n\n    monkeypatch.setattr(\n        zendesk_mod, \"requests\", types.SimpleNamespace(get=_fake_get), raising=True\n    )\n\n    # Build client with a small limit: 2 calls per 60 seconds\n    client = ZendeskClient(\"subd\", \"e\", \"t\", calls_per_minute=2)\n\n    # Make three calls in quick succession. The third should be rate limited\n    client.make_request(\"help_center/articles\", {\"page[size]\": 1})\n    client.make_request(\"help_center/articles\", {\"page[size]\": 1})\n\n    # At this point we've used up the 2 allowed calls within the 60s window\n    # The next call should trigger sleeps with exponential backoff until >60s elapsed\n    client.make_request(\"help_center/articles\", {\"page[size]\": 1})\n\n    # Ensure we did not actually wait in real time but logically advanced beyond a minute\n    assert fake_time.monotonic() >= 60\n    # Ensure the HTTP function was invoked three times\n    assert len(calls) == 3\n"
  },
  {
    "path": "backend/tests/unit/onyx/context/search/federated/test_slack_query_construction.py",
    "content": "from unittest.mock import MagicMock\n\nfrom onyx.context.search.federated.slack_search_utils import (\n    build_channel_query_filter,\n)\nfrom onyx.context.search.federated.slack_search_utils import matches_exclude_pattern\nfrom onyx.onyxbot.slack.models import ChannelType\n\n\nclass TestChannelPatternMatching:\n    \"\"\"Test glob pattern matching for channel exclusion\"\"\"\n\n    def test_exact_match(self) -> None:\n        \"\"\"Test exact channel name match\"\"\"\n        assert matches_exclude_pattern(\"customer-support\", [\"customer-support\"]) is True\n        assert matches_exclude_pattern(\"engineering\", [\"customer-support\"]) is False\n\n    def test_glob_pattern_star(self) -> None:\n        \"\"\"Test glob patterns with * wildcard\"\"\"\n        # Suffix wildcard\n        assert matches_exclude_pattern(\"customer-X\", [\"customer*\"]) is True\n        assert matches_exclude_pattern(\"customer-support\", [\"customer*\"]) is True\n        assert matches_exclude_pattern(\"engineering\", [\"customer*\"]) is False\n\n        # Prefix wildcard\n        assert matches_exclude_pattern(\"test-env\", [\"*-env\"]) is True\n        assert matches_exclude_pattern(\"prod-env\", [\"*-env\"]) is True\n        assert matches_exclude_pattern(\"test-staging\", [\"*-env\"]) is False\n\n        # Infix wildcard\n        assert matches_exclude_pattern(\"customer-test-env\", [\"customer*env\"]) is True\n        assert matches_exclude_pattern(\"customer-prod-env\", [\"customer*env\"]) is True\n        assert matches_exclude_pattern(\"customer-test\", [\"customer*env\"]) is False\n\n    def test_multiple_patterns(self) -> None:\n        \"\"\"Test matching against multiple patterns\"\"\"\n        patterns = [\"test-*\", \"dev-*\", \"customer*\"]\n\n        assert matches_exclude_pattern(\"test-env\", patterns) is True\n        assert matches_exclude_pattern(\"dev-env\", patterns) is True\n        assert matches_exclude_pattern(\"customer-X\", patterns) is True\n        assert matches_exclude_pattern(\"prod-env\", patterns) is False\n\n    def test_hash_prefix_normalization(self) -> None:\n        \"\"\"Test that # prefix is handled correctly\"\"\"\n        # Pattern has #, channel name doesn't\n        assert matches_exclude_pattern(\"customer-X\", [\"#customer*\"]) is True\n\n        # Channel name has #, pattern doesn't\n        assert matches_exclude_pattern(\"#customer-X\", [\"customer*\"]) is True\n\n        # Both have #\n        assert matches_exclude_pattern(\"#customer-X\", [\"#customer*\"]) is True\n\n    def test_case_insensitive(self) -> None:\n        \"\"\"Test that matching is case insensitive\"\"\"\n        assert matches_exclude_pattern(\"Customer-Support\", [\"customer*\"]) is True\n        assert matches_exclude_pattern(\"CUSTOMER-X\", [\"customer*\"]) is True\n        assert matches_exclude_pattern(\"customer-x\", [\"CUSTOMER*\"]) is True\n\n    def test_whitespace_handling(self) -> None:\n        \"\"\"Test that whitespace is trimmed\"\"\"\n        assert matches_exclude_pattern(\" customer-X \", [\"customer*\"]) is True\n        assert matches_exclude_pattern(\"customer-X\", [\" customer* \"]) is True\n\n\nclass TestChannelQueryFilterBuilding:\n    \"\"\"Test channel query filter string construction\"\"\"\n\n    def test_specific_channels_no_exclude(self) -> None:\n        \"\"\"Test filter with specific channels, no exclusions\"\"\"\n        entities = {\n            \"search_all_channels\": False,\n            \"channels\": [\"general\", \"engineering\"],\n        }\n\n        filter_str = build_channel_query_filter(entities)\n\n        assert \"in:#general\" in filter_str\n        assert \"in:#engineering\" in filter_str\n        assert filter_str.count(\"in:#\") == 2\n\n    def test_specific_channels_with_exclude(self) -> None:\n        \"\"\"Test filter with specific channels and exclusions\"\"\"\n        entities = {\n            \"search_all_channels\": False,\n            \"channels\": [\"general\", \"customer-X\", \"customer-Y\", \"support\"],\n            \"exclude_channels\": [\"customer*\"],\n        }\n\n        filter_str = build_channel_query_filter(entities)\n\n        # Should include non-customer channels\n        assert \"in:#general\" in filter_str\n        assert \"in:#support\" in filter_str\n\n        # Should exclude customer channels\n        assert \"customer-X\" not in filter_str\n        assert \"customer-Y\" not in filter_str\n\n    def test_all_channels_no_exclude(self) -> None:\n        \"\"\"Test search all channels with no exclusions\"\"\"\n        entities = {\"search_all_channels\": True}\n\n        filter_str = build_channel_query_filter(entities)\n\n        # Should return empty string (no filter)\n        assert filter_str == \"\"\n\n    def test_all_channels_with_exclude(self) -> None:\n        \"\"\"Test search all channels with exclusions\"\"\"\n        entities = {\n            \"search_all_channels\": True,\n            \"exclude_channels\": [\"customer*\", \"test-*\"],\n        }\n        available_channels = [\n            \"general\",\n            \"customer-X\",\n            \"customer-Y\",\n            \"test-env\",\n            \"support\",\n        ]\n\n        filter_str = build_channel_query_filter(entities, available_channels)\n\n        # Should use negative filters for excluded channels\n        assert \"-in:#customer-X\" in filter_str\n        assert \"-in:#customer-Y\" in filter_str\n        assert \"-in:#test-env\" in filter_str\n\n        # Should NOT include positive filters (we're searching ALL channels, just excluding some)\n        assert \"in:#general\" not in filter_str\n        assert \"in:#support\" not in filter_str\n\n    def test_empty_channels_list(self) -> None:\n        \"\"\"Test with empty channels list\"\"\"\n        entities = {\"search_all_channels\": False, \"channels\": []}\n\n        # Should raise ValidationError during entity parsing, but if it gets through\n        # should return empty string\n        try:\n            filter_str = build_channel_query_filter(entities)\n            assert filter_str == \"\"\n        except Exception:\n            # Expected - validation should fail\n            pass\n\n    def test_channel_name_normalization(self) -> None:\n        \"\"\"Test that channel names are normalized (# removed)\"\"\"\n        entities = {\n            \"search_all_channels\": False,\n            \"channels\": [\"#general\", \"engineering\"],  # One with #, one without\n        }\n\n        filter_str = build_channel_query_filter(entities)\n\n        # Both should be included with in:# prefix\n        assert \"in:#general\" in filter_str\n        assert \"in:#engineering\" in filter_str\n\n    def test_invalid_entities(self) -> None:\n        \"\"\"Test with invalid entities\"\"\"\n        entities = {\"invalid_field\": \"value\"}\n\n        filter_str = build_channel_query_filter(entities)\n\n        # Should return empty string on validation error\n        assert filter_str == \"\"\n\n    def test_no_available_channels(self) -> None:\n        \"\"\"Test exclude patterns when channel list fetch fails\"\"\"\n        entities = {\n            \"search_all_channels\": True,\n            \"exclude_channels\": [\"customer*\"],\n        }\n        available_channels = None  # Channel fetch failed\n\n        filter_str = build_channel_query_filter(entities, available_channels)\n\n        # Should return empty string if we can't fetch channels\n        assert filter_str == \"\"\n\n\nclass TestDateExtraction:\n    \"\"\"Test date range extraction from queries\"\"\"\n\n    def test_extract_explicit_days(self) -> None:\n        \"\"\"Test extracting explicit day ranges\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            extract_date_range_from_query,\n        )\n\n        mock_llm = MagicMock()\n\n        # Mock LLM response for \"last 7 days\"\n        mock_llm.invoke.return_value = MagicMock()\n        mock_llm.invoke.return_value.content = '{\"days_back\": 7}'\n\n        days = extract_date_range_from_query(\n            \"show me results from last 7 days\", mock_llm, 30\n        )\n\n        assert days == 7\n\n    def test_enforce_default_search_days_limit(self) -> None:\n        \"\"\"Test that default_search_days is enforced as hard limit\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            extract_date_range_from_query,\n        )\n\n        mock_llm = MagicMock()\n\n        # Mock LLM response for \"last 90 days\" but limit is 30\n        mock_llm.invoke.return_value = MagicMock()\n        mock_llm.invoke.return_value.content = '{\"days_back\": 90}'\n\n        days = extract_date_range_from_query(\n            \"show me results from last 90 days\", mock_llm, 30\n        )\n\n        # Should be capped at 30\n        assert days == 30\n\n    def test_no_date_mentioned(self) -> None:\n        \"\"\"Test when no date is mentioned in query\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            extract_date_range_from_query,\n        )\n\n        mock_llm = MagicMock()\n\n        # Mock LLM response for no date\n        mock_llm.invoke.return_value = MagicMock()\n        mock_llm.invoke.return_value.content = '{\"days_back\": null}'\n\n        days = extract_date_range_from_query(\"show me budget reports\", mock_llm, 30)\n\n        # Should use default\n        assert days == 30\n\n    def test_llm_failure_fallback(self) -> None:\n        \"\"\"Test fallback when LLM fails\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            extract_date_range_from_query,\n        )\n\n        mock_llm = MagicMock()\n\n        # Mock LLM failure\n        mock_llm.invoke.side_effect = Exception(\"LLM error\")\n\n        days = extract_date_range_from_query(\"show me results\", mock_llm, 30)\n\n        # Should fall back to default\n        assert days == 30\n\n\nclass TestChannelTypeFiltering:\n    \"\"\"Test post-filtering based on channel type\"\"\"\n\n    def test_include_public_channels_always(self) -> None:\n        \"\"\"Test that public channels are always included\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            should_include_message,\n        )\n\n        entities = {\n            \"include_dm\": False,\n            \"include_private_channels\": False,\n        }\n\n        assert should_include_message(ChannelType.PUBLIC_CHANNEL, entities) is True\n\n    def test_filter_dm_based_on_entities(self) -> None:\n        \"\"\"Test DM filtering based on include_dm setting\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            should_include_message,\n        )\n\n        # DMs enabled\n        entities_with_dm = {\"include_dm\": True}\n        assert should_include_message(ChannelType.IM, entities_with_dm) is True\n\n        # DMs disabled\n        entities_no_dm = {\"include_dm\": False}\n        assert should_include_message(ChannelType.IM, entities_no_dm) is False\n\n    def test_filter_group_dm(self) -> None:\n        \"\"\"Test group DM (MPIM) filtering uses include_group_dm setting\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            should_include_message,\n        )\n\n        # Group DMs should follow include_group_dm setting\n        entities_with_group_dm = {\"include_group_dm\": True}\n        assert should_include_message(ChannelType.MPIM, entities_with_group_dm) is True\n\n        entities_no_group_dm = {\"include_group_dm\": False}\n        assert should_include_message(ChannelType.MPIM, entities_no_group_dm) is False\n\n    def test_filter_private_channels(self) -> None:\n        \"\"\"Test private channel filtering\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            should_include_message,\n        )\n\n        # Private channels enabled\n        entities_with_private = {\"include_private_channels\": True}\n        assert (\n            should_include_message(ChannelType.PRIVATE_CHANNEL, entities_with_private)\n            is True\n        )\n\n        # Private channels disabled\n        entities_no_private = {\"include_private_channels\": False}\n        assert (\n            should_include_message(ChannelType.PRIVATE_CHANNEL, entities_no_private)\n            is False\n        )\n\n    def test_invalid_entities_default_behavior(self) -> None:\n        \"\"\"Test that invalid entities default to including messages\"\"\"\n        from onyx.context.search.federated.slack_search_utils import (\n            should_include_message,\n        )\n\n        invalid_entities = {\"invalid_field\": \"value\"}\n\n        # Should default to including (safe behavior)\n        assert (\n            should_include_message(ChannelType.PUBLIC_CHANNEL, invalid_entities) is True\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/context/search/federated/test_slack_thread_context.py",
    "content": "\"\"\"Tests for Slack thread context fetching with rate limit handling.\"\"\"\n\nfrom datetime import datetime\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom slack_sdk.errors import SlackApiError\n\nfrom onyx.context.search.federated.models import SlackMessage\nfrom onyx.context.search.federated.slack_search import _fetch_thread_context\nfrom onyx.context.search.federated.slack_search import (\n    fetch_thread_contexts_with_rate_limit_handling,\n)\nfrom onyx.context.search.federated.slack_search import SlackRateLimitError\nfrom onyx.context.search.federated.slack_search import ThreadContextResult\n\n\ndef _create_mock_message(\n    message_id: str = \"1234567890.123456\",\n    thread_id: str | None = \"1234567890.000000\",\n    text: str = \"test message\",\n    channel_id: str = \"C123456\",\n) -> SlackMessage:\n    \"\"\"Create a mock SlackMessage for testing.\"\"\"\n    return SlackMessage(\n        document_id=f\"{channel_id}_{message_id}\",\n        channel_id=channel_id,\n        message_id=message_id,\n        thread_id=thread_id,\n        link=f\"https://slack.com/archives/{channel_id}/p{message_id.replace('.', '')}\",\n        metadata={\"channel\": \"test-channel\"},\n        timestamp=datetime.now(),\n        recency_bias=1.0,\n        semantic_identifier=\"user in #test-channel: test message\",\n        text=text,\n        highlighted_texts=set(),\n        slack_score=1000.0,\n    )\n\n\nclass TestSlackRateLimitError:\n    \"\"\"Test SlackRateLimitError exception.\"\"\"\n\n    def test_exception_is_raised(self) -> None:\n        \"\"\"Test that SlackRateLimitError can be raised and caught.\"\"\"\n        with pytest.raises(SlackRateLimitError):\n            raise SlackRateLimitError(\"Rate limited\")\n\n\nclass TestThreadContextResult:\n    \"\"\"Test ThreadContextResult class.\"\"\"\n\n    def test_success_result(self) -> None:\n        \"\"\"Test creating a success result.\"\"\"\n        result = ThreadContextResult.success(\"enriched text\")\n        assert result.text == \"enriched text\"\n        assert not result.is_rate_limited\n        assert not result.is_error\n\n    def test_rate_limited_result(self) -> None:\n        \"\"\"Test creating a rate limited result.\"\"\"\n        result = ThreadContextResult.rate_limited(\"original text\")\n        assert result.text == \"original text\"\n        assert result.is_rate_limited\n        assert not result.is_error\n\n    def test_error_result(self) -> None:\n        \"\"\"Test creating an error result.\"\"\"\n        result = ThreadContextResult.error(\"original text\")\n        assert result.text == \"original text\"\n        assert not result.is_rate_limited\n        assert result.is_error\n\n\nclass TestFetchThreadContext:\n    \"\"\"Test _fetch_thread_context function.\"\"\"\n\n    def test_non_thread_message_returns_success(self) -> None:\n        \"\"\"Test that non-thread messages return success with original text.\"\"\"\n        message = _create_mock_message(thread_id=None, text=\"original text\")\n\n        result = _fetch_thread_context(message, \"xoxp-token\", \"T12345\")\n\n        assert result.text == \"original text\"\n        assert not result.is_rate_limited\n        assert not result.is_error\n\n    @patch(\"onyx.context.search.federated.slack_search.WebClient\")\n    def test_rate_limit_returns_rate_limited_result(\n        self, mock_webclient_class: MagicMock\n    ) -> None:\n        \"\"\"Test that 429 rate limit returns rate_limited result.\"\"\"\n        message = _create_mock_message(text=\"original text\")\n\n        # Create mock response with 429 status\n        mock_response = MagicMock()\n        mock_response.status_code = 429\n\n        # Create mock client that raises rate limit error\n        mock_client = MagicMock()\n        mock_client.conversations_replies.side_effect = SlackApiError(\n            \"ratelimited\", mock_response\n        )\n        mock_webclient_class.return_value = mock_client\n\n        result = _fetch_thread_context(message, \"xoxp-token\", \"T12345\")\n\n        assert result.text == \"original text\"\n        assert result.is_rate_limited\n        assert not result.is_error\n\n    @patch(\"onyx.context.search.federated.slack_search.WebClient\")\n    def test_other_api_error_returns_error_result(\n        self, mock_webclient_class: MagicMock\n    ) -> None:\n        \"\"\"Test that non-rate-limit API errors return error result.\"\"\"\n        message = _create_mock_message(text=\"original text\")\n\n        # Create mock response with non-429 error\n        mock_response = MagicMock()\n        mock_response.status_code = 500\n\n        mock_client = MagicMock()\n        mock_client.conversations_replies.side_effect = SlackApiError(\n            \"internal_error\", mock_response\n        )\n        mock_webclient_class.return_value = mock_client\n\n        result = _fetch_thread_context(message, \"xoxp-token\", \"T12345\")\n\n        assert result.text == \"original text\"\n        assert not result.is_rate_limited\n        assert result.is_error\n\n    @patch(\"onyx.context.search.federated.slack_search.WebClient\")\n    def test_unexpected_exception_returns_error_result(\n        self, mock_webclient_class: MagicMock\n    ) -> None:\n        \"\"\"Test that unexpected exceptions return error result.\"\"\"\n        message = _create_mock_message(text=\"original text\")\n\n        mock_client = MagicMock()\n        mock_client.conversations_replies.side_effect = RuntimeError(\"Network error\")\n        mock_webclient_class.return_value = mock_client\n\n        result = _fetch_thread_context(message, \"xoxp-token\", \"T12345\")\n\n        assert result.text == \"original text\"\n        assert not result.is_rate_limited\n        assert result.is_error\n\n    @patch(\"onyx.context.search.federated.slack_search.batch_get_user_profiles\")\n    @patch(\"onyx.context.search.federated.slack_search.WebClient\")\n    def test_successful_thread_fetch_returns_context(\n        self, mock_webclient_class: MagicMock, mock_batch_profiles: MagicMock\n    ) -> None:\n        \"\"\"Test that successful thread fetch returns enriched context.\"\"\"\n        message = _create_mock_message(\n            message_id=\"1234567890.123456\",\n            thread_id=\"1234567890.000000\",\n            text=\"original text\",\n        )\n\n        # Mock user profile lookup\n        mock_batch_profiles.return_value = {\n            \"U111\": \"User One\",\n            \"U222\": \"User Two\",\n            \"U333\": \"User Three\",\n        }\n\n        # Create mock response with thread messages\n        mock_response = MagicMock()\n        mock_response.get.return_value = [\n            {\n                \"text\": \"Thread starter message\",\n                \"user\": \"U111\",\n                \"ts\": \"1234567890.000000\",\n            },\n            {\"text\": \"Reply 1\", \"user\": \"U222\", \"ts\": \"1234567890.111111\"},\n            {\"text\": \"Reply 2 (matched)\", \"user\": \"U333\", \"ts\": \"1234567890.123456\"},\n        ]\n        mock_response.validate.return_value = None\n\n        mock_client = MagicMock()\n        mock_client.conversations_replies.return_value = mock_response\n        mock_webclient_class.return_value = mock_client\n\n        result = _fetch_thread_context(message, \"xoxp-token\", \"T12345\")\n\n        # Should contain thread starter and replies with resolved usernames\n        assert \"Thread starter message\" in result.text\n        assert \"Reply\" in result.text\n        assert \"User One\" in result.text\n        assert not result.is_rate_limited\n        assert not result.is_error\n\n\nclass TestFetchThreadContextsWithRateLimitHandling:\n    \"\"\"Test fetch_thread_contexts_with_rate_limit_handling function.\"\"\"\n\n    def test_empty_message_list_returns_empty(self) -> None:\n        \"\"\"Test that empty message list returns empty list.\"\"\"\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=[],\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n        )\n\n        assert result == []\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_batch_processing_respects_batch_size(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that messages are processed in batches of specified size.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\") for i in range(7)\n        ]\n\n        # Mock parallel execution to return ThreadContextResult objects\n        mock_parallel.return_value = [\n            ThreadContextResult.success(\"enriched\") for _ in range(3)\n        ]\n\n        fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=3,\n            max_messages=None,\n        )\n\n        # Should have called parallel execution 3 times (7 messages / 3 batch = 3 batches)\n        assert mock_parallel.call_count == 3\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_rate_limit_stops_further_batches(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that rate limiting stops processing of subsequent batches.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\", text=f\"msg{i}\")\n            for i in range(6)\n        ]\n\n        # First batch succeeds, second batch has one success and one rate limit\n        mock_parallel.side_effect = [\n            [\n                ThreadContextResult.success(\"enriched0\"),\n                ThreadContextResult.success(\"enriched1\"),\n            ],\n            [\n                ThreadContextResult.success(\"enriched2\"),\n                ThreadContextResult.rate_limited(\"msg3\"),  # Rate limit hit\n            ],\n        ]\n\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=2,\n            max_messages=None,\n        )\n\n        # Should have 6 results total\n        assert len(result) == 6\n        # First 2 should be enriched\n        assert result[0] == \"enriched0\"\n        assert result[1] == \"enriched1\"\n        # Second batch: first enriched (preserved!), second rate limited (original text)\n        assert result[2] == \"enriched2\"\n        assert result[3] == \"msg3\"\n        # Last 2 (skipped due to rate limit) should be original text\n        assert result[4] == \"msg4\"\n        assert result[5] == \"msg5\"\n\n        # Should only call parallel twice (stopped after rate limit detected)\n        assert mock_parallel.call_count == 2\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_other_errors_dont_stop_processing(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that non-rate-limit errors don't stop batch processing.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\", text=f\"msg{i}\")\n            for i in range(4)\n        ]\n\n        # First batch has an error (not rate limit), second batch succeeds\n        mock_parallel.side_effect = [\n            [\n                ThreadContextResult.success(\"enriched0\"),\n                ThreadContextResult.error(\"msg1\"),  # Error but NOT rate limit\n            ],\n            [\n                ThreadContextResult.success(\"enriched2\"),\n                ThreadContextResult.success(\"enriched3\"),\n            ],\n        ]\n\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=2,\n            max_messages=None,\n        )\n\n        # Should have 4 results total\n        assert len(result) == 4\n        assert result[0] == \"enriched0\"\n        assert result[1] == \"msg1\"  # Error returns original text\n        assert result[2] == \"enriched2\"\n        assert result[3] == \"enriched3\"\n\n        # Should have called both batches (errors don't stop processing)\n        assert mock_parallel.call_count == 2\n\n\nclass TestMaxMessagesLimit:\n    \"\"\"Test max_messages parameter limiting thread context fetches.\"\"\"\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_max_messages_limits_context_fetches(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that only top N messages get thread context when max_messages is set.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\", text=f\"msg{i}\")\n            for i in range(10)\n        ]\n\n        # Mock parallel to return ThreadContextResult for messages that are fetched\n        mock_parallel.return_value = [\n            ThreadContextResult.success(\"enriched0\"),\n            ThreadContextResult.success(\"enriched1\"),\n            ThreadContextResult.success(\"enriched2\"),\n        ]\n\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=5,\n            max_messages=3,  # Only fetch context for top 3\n        )\n\n        # Should have 10 results total\n        assert len(result) == 10\n        # First 3 should be enriched\n        assert result[0] == \"enriched0\"\n        assert result[1] == \"enriched1\"\n        assert result[2] == \"enriched2\"\n        # Remaining 7 should be original text\n        for i in range(3, 10):\n            assert result[i] == f\"msg{i}\"\n\n        # Should only call parallel once (3 messages with batch_size=5 = 1 batch)\n        assert mock_parallel.call_count == 1\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_max_messages_none_fetches_all(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that max_messages=None fetches context for all messages.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\", text=f\"msg{i}\")\n            for i in range(5)\n        ]\n\n        mock_parallel.return_value = [\n            ThreadContextResult.success(f\"enriched{i}\") for i in range(5)\n        ]\n\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=10,\n            max_messages=None,  # No limit\n        )\n\n        # All 5 should be enriched\n        assert len(result) == 5\n        for i in range(5):\n            assert result[i] == f\"enriched{i}\"\n\n    @patch(\"onyx.context.search.federated.slack_search._fetch_thread_context\")\n    @patch(\n        \"onyx.context.search.federated.slack_search.run_functions_tuples_in_parallel\"\n    )\n    def test_max_messages_greater_than_total_fetches_all(\n        self,\n        mock_parallel: MagicMock,\n        mock_fetch_context: MagicMock,  # noqa: ARG002\n    ) -> None:\n        \"\"\"Test that max_messages > total messages fetches all.\"\"\"\n        messages = [\n            _create_mock_message(message_id=f\"123456789{i}.000000\", text=f\"msg{i}\")\n            for i in range(3)\n        ]\n\n        mock_parallel.return_value = [\n            ThreadContextResult.success(\"enriched0\"),\n            ThreadContextResult.success(\"enriched1\"),\n            ThreadContextResult.success(\"enriched2\"),\n        ]\n\n        result = fetch_thread_contexts_with_rate_limit_handling(\n            slack_messages=messages,\n            access_token=\"xoxp-token\",\n            team_id=\"T12345\",\n            batch_size=10,\n            max_messages=100,  # More than we have\n        )\n\n        # All 3 should be enriched\n        assert len(result) == 3\n        for i in range(3):\n            assert result[i] == f\"enriched{i}\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/db/conftest.py",
    "content": "\"\"\"Fixtures for unit-testing DAL classes with mocked sessions.\"\"\"\n\nfrom typing import Any\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\n\n\ndef model_attrs(obj: object) -> dict[str, Any]:\n    \"\"\"Extract user-set attributes from a SQLAlchemy model instance.\n\n    Filters out SQLAlchemy internal state (``_sa_instance_state``).\n    Use this in tests to assert the full set of fields on a model object\n    so that adding a new field forces the test to be updated.\n    \"\"\"\n    return {k: v for k, v in vars(obj).items() if not k.startswith(\"_\")}\n\n\n@pytest.fixture\ndef mock_db_session() -> MagicMock:\n    \"\"\"A MagicMock standing in for a SQLAlchemy Session.\"\"\"\n    return MagicMock(spec=Session)\n\n\n@pytest.fixture\ndef scim_dal(mock_db_session: MagicMock) -> ScimDAL:\n    \"\"\"A ScimDAL backed by a mock session.\"\"\"\n    return ScimDAL(mock_db_session)\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_assign_default_groups.py",
    "content": "\"\"\"\nUnit tests for assign_user_to_default_groups__no_commit in onyx.db.users.\n\nCovers:\n1. Standard/service-account users get assigned to the correct default group\n2. BOT, EXT_PERM_USER, ANONYMOUS account types are skipped\n3. Missing default group raises RuntimeError\n4. Already-in-group is a no-op\n5. IntegrityError race condition is handled gracefully\n6. The function never commits the session\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.exc import IntegrityError\n\nfrom onyx.db.enums import AccountType\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.models import UserGroup\nfrom onyx.db.users import assign_user_to_default_groups__no_commit\n\n\ndef _mock_user(\n    account_type: AccountType = AccountType.STANDARD,\n    email: str = \"test@example.com\",\n) -> MagicMock:\n    user = MagicMock()\n    user.id = uuid4()\n    user.email = email\n    user.account_type = account_type\n    return user\n\n\ndef _mock_group(name: str = \"Basic\", group_id: int = 1) -> MagicMock:\n    group = MagicMock()\n    group.id = group_id\n    group.name = name\n    group.is_default = True\n    return group\n\n\ndef _make_query_chain(first_return: object = None) -> MagicMock:\n    \"\"\"Returns a mock that supports .filter(...).filter(...).first() chaining.\"\"\"\n    chain = MagicMock()\n    chain.filter.return_value = chain\n    chain.first.return_value = first_return\n    return chain\n\n\ndef _setup_db_session(\n    group_result: object = None,\n    membership_result: object = None,\n) -> MagicMock:\n    \"\"\"Create a db_session mock that routes query(UserGroup) and query(User__UserGroup).\"\"\"\n    db_session = MagicMock()\n\n    group_chain = _make_query_chain(group_result)\n    membership_chain = _make_query_chain(membership_result)\n\n    def query_side_effect(model: type) -> MagicMock:\n        if model is UserGroup:\n            return group_chain\n        if model is User__UserGroup:\n            return membership_chain\n        return MagicMock()\n\n    db_session.query.side_effect = query_side_effect\n    return db_session\n\n\ndef test_standard_user_assigned_to_basic_group() -> None:\n    group = _mock_group(\"Basic\")\n    db_session = _setup_db_session(group_result=group, membership_result=None)\n    savepoint = MagicMock()\n    db_session.begin_nested.return_value = savepoint\n    user = _mock_user(AccountType.STANDARD)\n\n    assign_user_to_default_groups__no_commit(db_session, user, is_admin=False)\n\n    db_session.add.assert_called_once()\n    added = db_session.add.call_args[0][0]\n    assert isinstance(added, User__UserGroup)\n    assert added.user_id == user.id\n    assert added.user_group_id == group.id\n    db_session.flush.assert_called_once()\n\n\ndef test_admin_user_assigned_to_admin_group() -> None:\n    group = _mock_group(\"Admin\", group_id=2)\n    db_session = _setup_db_session(group_result=group, membership_result=None)\n    savepoint = MagicMock()\n    db_session.begin_nested.return_value = savepoint\n    user = _mock_user(AccountType.STANDARD)\n\n    assign_user_to_default_groups__no_commit(db_session, user, is_admin=True)\n\n    db_session.add.assert_called_once()\n    added = db_session.add.call_args[0][0]\n    assert isinstance(added, User__UserGroup)\n    assert added.user_group_id == group.id\n\n\n@pytest.mark.parametrize(\n    \"account_type\",\n    [AccountType.BOT, AccountType.EXT_PERM_USER, AccountType.ANONYMOUS],\n)\ndef test_excluded_account_types_skipped(account_type: AccountType) -> None:\n    db_session = MagicMock()\n    user = _mock_user(account_type)\n\n    assign_user_to_default_groups__no_commit(db_session, user)\n\n    db_session.query.assert_not_called()\n    db_session.add.assert_not_called()\n\n\ndef test_service_account_not_skipped() -> None:\n    group = _mock_group(\"Basic\")\n    db_session = _setup_db_session(group_result=group, membership_result=None)\n    savepoint = MagicMock()\n    db_session.begin_nested.return_value = savepoint\n    user = _mock_user(AccountType.SERVICE_ACCOUNT)\n\n    assign_user_to_default_groups__no_commit(db_session, user, is_admin=False)\n\n    db_session.add.assert_called_once()\n\n\ndef test_missing_default_group_raises_error() -> None:\n    db_session = _setup_db_session(group_result=None)\n    user = _mock_user()\n\n    with pytest.raises(RuntimeError, match=\"Default group .* not found\"):\n        assign_user_to_default_groups__no_commit(db_session, user)\n\n\ndef test_already_in_group_is_noop() -> None:\n    group = _mock_group(\"Basic\")\n    existing_membership = MagicMock()\n    db_session = _setup_db_session(\n        group_result=group, membership_result=existing_membership\n    )\n    user = _mock_user()\n\n    assign_user_to_default_groups__no_commit(db_session, user)\n\n    db_session.add.assert_not_called()\n    db_session.begin_nested.assert_not_called()\n\n\ndef test_integrity_error_race_condition_handled() -> None:\n    group = _mock_group(\"Basic\")\n    db_session = _setup_db_session(group_result=group, membership_result=None)\n    savepoint = MagicMock()\n    db_session.begin_nested.return_value = savepoint\n    db_session.flush.side_effect = IntegrityError(None, None, Exception(\"duplicate\"))\n    user = _mock_user()\n\n    # Should not raise\n    assign_user_to_default_groups__no_commit(db_session, user)\n\n    savepoint.rollback.assert_called_once()\n\n\ndef test_no_commit_called_on_successful_assignment() -> None:\n    group = _mock_group(\"Basic\")\n    db_session = _setup_db_session(group_result=group, membership_result=None)\n    savepoint = MagicMock()\n    db_session.begin_nested.return_value = savepoint\n    user = _mock_user()\n\n    assign_user_to_default_groups__no_commit(db_session, user)\n\n    db_session.commit.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_chat_sessions.py",
    "content": "\"\"\"Tests for get_chat_sessions_by_user filtering behavior.\n\nVerifies that failed chat sessions (those with only SYSTEM messages) are\ncorrectly filtered out while preserving recently created sessions, matching\nthe behavior specified in PR #7233.\n\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom uuid import UUID\nfrom uuid import uuid4\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.chat import get_chat_sessions_by_user\nfrom onyx.db.models import ChatSession\n\n\ndef _make_session(\n    user_id: UUID,\n    time_created: datetime | None = None,\n    time_updated: datetime | None = None,\n    description: str = \"\",\n) -> MagicMock:\n    \"\"\"Create a mock ChatSession with the given attributes.\"\"\"\n    session = MagicMock(spec=ChatSession)\n    session.id = uuid4()\n    session.user_id = user_id\n    session.time_created = time_created or datetime.now(timezone.utc)\n    session.time_updated = time_updated or session.time_created\n    session.description = description\n    session.deleted = False\n    session.onyxbot_flow = False\n    session.project_id = None\n    return session\n\n\n@pytest.fixture\ndef user_id() -> UUID:\n    return uuid4()\n\n\n@pytest.fixture\ndef old_time() -> datetime:\n    \"\"\"A timestamp well outside the 5-minute leeway window.\"\"\"\n    return datetime.now(timezone.utc) - timedelta(hours=1)\n\n\n@pytest.fixture\ndef recent_time() -> datetime:\n    \"\"\"A timestamp within the 5-minute leeway window.\"\"\"\n    return datetime.now(timezone.utc) - timedelta(minutes=2)\n\n\nclass TestGetChatSessionsByUser:\n    \"\"\"Tests for the failed chat filtering logic in get_chat_sessions_by_user.\"\"\"\n\n    def test_filters_out_failed_sessions(\n        self, user_id: UUID, old_time: datetime\n    ) -> None:\n        \"\"\"Sessions with only SYSTEM messages should be excluded.\"\"\"\n        valid_session = _make_session(user_id, time_created=old_time)\n        failed_session = _make_session(user_id, time_created=old_time)\n\n        db_session = MagicMock(spec=Session)\n\n        # First execute: returns all sessions\n        # Second execute: returns only the valid session's ID (has non-system msgs)\n        mock_result_1 = MagicMock()\n        mock_result_1.scalars.return_value.all.return_value = [\n            valid_session,\n            failed_session,\n        ]\n\n        mock_result_2 = MagicMock()\n        mock_result_2.scalars.return_value.all.return_value = [valid_session.id]\n\n        db_session.execute.side_effect = [mock_result_1, mock_result_2]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=False,\n        )\n\n        assert len(result) == 1\n        assert result[0].id == valid_session.id\n\n    def test_keeps_recent_sessions_without_messages(\n        self, user_id: UUID, recent_time: datetime\n    ) -> None:\n        \"\"\"Recently created sessions should be kept even without messages.\"\"\"\n        recent_session = _make_session(user_id, time_created=recent_time)\n\n        db_session = MagicMock(spec=Session)\n\n        mock_result_1 = MagicMock()\n        mock_result_1.scalars.return_value.all.return_value = [recent_session]\n\n        db_session.execute.side_effect = [mock_result_1]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=False,\n        )\n\n        assert len(result) == 1\n        assert result[0].id == recent_session.id\n        # Should only have been called once — no second query needed\n        # because the recent session is within the leeway window\n        assert db_session.execute.call_count == 1\n\n    def test_include_failed_chats_skips_filtering(\n        self, user_id: UUID, old_time: datetime\n    ) -> None:\n        \"\"\"When include_failed_chats=True, no filtering should occur.\"\"\"\n        session_a = _make_session(user_id, time_created=old_time)\n        session_b = _make_session(user_id, time_created=old_time)\n\n        db_session = MagicMock(spec=Session)\n\n        mock_result = MagicMock()\n        mock_result.scalars.return_value.all.return_value = [session_a, session_b]\n\n        db_session.execute.side_effect = [mock_result]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=True,\n        )\n\n        assert len(result) == 2\n        # Only one DB call — no second query for message validation\n        assert db_session.execute.call_count == 1\n\n    def test_limit_applied_after_filtering(\n        self, user_id: UUID, old_time: datetime\n    ) -> None:\n        \"\"\"Limit should be applied after filtering, not before.\"\"\"\n        sessions = [_make_session(user_id, time_created=old_time) for _ in range(5)]\n        valid_ids = [s.id for s in sessions[:3]]\n\n        db_session = MagicMock(spec=Session)\n\n        mock_result_1 = MagicMock()\n        mock_result_1.scalars.return_value.all.return_value = sessions\n\n        mock_result_2 = MagicMock()\n        mock_result_2.scalars.return_value.all.return_value = valid_ids\n\n        db_session.execute.side_effect = [mock_result_1, mock_result_2]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=False,\n            limit=2,\n        )\n\n        assert len(result) == 2\n        # Should be the first 2 valid sessions (order preserved)\n        assert result[0].id == sessions[0].id\n        assert result[1].id == sessions[1].id\n\n    def test_mixed_recent_and_old_sessions(\n        self, user_id: UUID, old_time: datetime, recent_time: datetime\n    ) -> None:\n        \"\"\"Mix of recent and old sessions should filter correctly.\"\"\"\n        old_valid = _make_session(user_id, time_created=old_time)\n        old_failed = _make_session(user_id, time_created=old_time)\n        recent_no_msgs = _make_session(user_id, time_created=recent_time)\n\n        db_session = MagicMock(spec=Session)\n\n        mock_result_1 = MagicMock()\n        mock_result_1.scalars.return_value.all.return_value = [\n            old_valid,\n            old_failed,\n            recent_no_msgs,\n        ]\n\n        mock_result_2 = MagicMock()\n        mock_result_2.scalars.return_value.all.return_value = [old_valid.id]\n\n        db_session.execute.side_effect = [mock_result_1, mock_result_2]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=False,\n        )\n\n        result_ids = {cs.id for cs in result}\n        assert old_valid.id in result_ids\n        assert recent_no_msgs.id in result_ids\n        assert old_failed.id not in result_ids\n\n    def test_empty_result(self, user_id: UUID) -> None:\n        \"\"\"No sessions should return empty list without errors.\"\"\"\n        db_session = MagicMock(spec=Session)\n\n        mock_result = MagicMock()\n        mock_result.scalars.return_value.all.return_value = []\n\n        db_session.execute.side_effect = [mock_result]\n\n        result = get_chat_sessions_by_user(\n            user_id=user_id,\n            deleted=False,\n            db_session=db_session,\n            include_failed_chats=False,\n        )\n\n        assert result == []\n        assert db_session.execute.call_count == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_dal.py",
    "content": "from unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.db.dal import DAL\n\n\nclass TestDALSessionDelegation:\n    \"\"\"Verify that DAL methods delegate correctly to the underlying session.\"\"\"\n\n    def test_commit(self) -> None:\n        session = MagicMock()\n        dal = DAL(session)\n        dal.commit()\n        session.commit.assert_called_once()\n\n    def test_flush(self) -> None:\n        session = MagicMock()\n        dal = DAL(session)\n        dal.flush()\n        session.flush.assert_called_once()\n\n    def test_rollback(self) -> None:\n        session = MagicMock()\n        dal = DAL(session)\n        dal.rollback()\n        session.rollback.assert_called_once()\n\n    def test_session_property_exposes_underlying_session(self) -> None:\n        session = MagicMock()\n        dal = DAL(session)\n        assert dal.session is session\n\n    def test_commit_propagates_exception(self) -> None:\n        session = MagicMock()\n        session.commit.side_effect = RuntimeError(\"db error\")\n        dal = DAL(session)\n        with pytest.raises(RuntimeError, match=\"db error\"):\n            dal.commit()\n\n\nclass TestDALFromTenant:\n    \"\"\"Verify the from_tenant context manager lifecycle.\"\"\"\n\n    @patch(\"onyx.db.dal.get_session_with_tenant\")\n    def test_yields_dal_with_tenant_session(self, mock_get_session: MagicMock) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        with DAL.from_tenant(\"tenant_abc\") as dal:\n            assert isinstance(dal, DAL)\n            assert dal.session is mock_session\n\n        mock_get_session.assert_called_once_with(tenant_id=\"tenant_abc\")\n\n    @patch(\"onyx.db.dal.get_session_with_tenant\")\n    def test_session_closed_after_context_exits(\n        self, mock_get_session: MagicMock\n    ) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        with DAL.from_tenant(\"tenant_abc\"):\n            pass\n\n        mock_get_session.return_value.__exit__.assert_called_once()\n\n    @patch(\"onyx.db.dal.get_session_with_tenant\")\n    def test_session_closed_on_exception(self, mock_get_session: MagicMock) -> None:\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        with pytest.raises(ValueError):\n            with DAL.from_tenant(\"tenant_abc\"):\n                raise ValueError(\"something broke\")\n\n        mock_get_session.return_value.__exit__.assert_called_once()\n\n    @patch(\"onyx.db.dal.get_session_with_tenant\")\n    def test_subclass_from_tenant_returns_subclass_instance(\n        self, mock_get_session: MagicMock\n    ) -> None:\n        \"\"\"from_tenant uses cls(), so subclasses should get their own type back.\"\"\"\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        class MyDAL(DAL):\n            pass\n\n        with MyDAL.from_tenant(\"tenant_abc\") as dal:\n            assert isinstance(dal, MyDAL)\n\n    @patch(\"onyx.db.dal.get_session_with_tenant\")\n    def test_uncommitted_changes_not_auto_committed(\n        self, mock_get_session: MagicMock\n    ) -> None:\n        \"\"\"Exiting the context manager should NOT auto-commit.\"\"\"\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        with DAL.from_tenant(\"tenant_abc\"):\n            pass\n\n        mock_session.commit.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_delete_user.py",
    "content": "from typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import UUID\nfrom uuid import uuid4\n\nfrom onyx.db.models import DocumentSet\nfrom onyx.db.models import DocumentSet__User\nfrom onyx.db.models import Persona\nfrom onyx.db.models import Persona__User\nfrom onyx.db.models import SamlAccount\nfrom onyx.db.models import User__UserGroup\nfrom onyx.db.users import delete_user_from_db\n\n\ndef _mock_user(\n    user_id: UUID | None = None, email: str = \"test@example.com\"\n) -> MagicMock:\n    user = MagicMock()\n    user.id = user_id or uuid4()\n    user.email = email\n    user.oauth_accounts = []\n    return user\n\n\ndef _make_query_chain() -> MagicMock:\n    \"\"\"Returns a mock that supports .filter(...).delete() and .filter(...).update(...)\"\"\"\n    chain = MagicMock()\n    chain.filter.return_value = chain\n    return chain\n\n\n@patch(\"onyx.db.users.remove_user_from_invited_users\")\n@patch(\n    \"onyx.db.users.fetch_ee_implementation_or_noop\",\n    return_value=lambda **_kwargs: None,\n)\ndef test_delete_user_nulls_out_document_set_ownership(\n    _mock_ee: Any, _mock_remove_invited: Any\n) -> None:\n    user = _mock_user()\n    db_session = MagicMock()\n\n    query_chains: dict[type, MagicMock] = {}\n\n    def query_side_effect(model: type) -> MagicMock:\n        if model not in query_chains:\n            query_chains[model] = _make_query_chain()\n        return query_chains[model]\n\n    db_session.query.side_effect = query_side_effect\n\n    delete_user_from_db(user, db_session)\n\n    # Verify DocumentSet.user_id is nulled out (update, not delete)\n    doc_set_chain = query_chains[DocumentSet]\n    doc_set_chain.filter.assert_called()\n    doc_set_chain.filter.return_value.update.assert_called_once_with(\n        {DocumentSet.user_id: None}\n    )\n\n    # Verify Persona.user_id is nulled out (update, not delete)\n    persona_chain = query_chains[Persona]\n    persona_chain.filter.assert_called()\n    persona_chain.filter.return_value.update.assert_called_once_with(\n        {Persona.user_id: None}\n    )\n\n\n@patch(\"onyx.db.users.remove_user_from_invited_users\")\n@patch(\n    \"onyx.db.users.fetch_ee_implementation_or_noop\",\n    return_value=lambda **_kwargs: None,\n)\ndef test_delete_user_cleans_up_join_tables(\n    _mock_ee: Any, _mock_remove_invited: Any\n) -> None:\n    user = _mock_user()\n    db_session = MagicMock()\n\n    query_chains: dict[type, MagicMock] = {}\n\n    def query_side_effect(model: type) -> MagicMock:\n        if model not in query_chains:\n            query_chains[model] = _make_query_chain()\n        return query_chains[model]\n\n    db_session.query.side_effect = query_side_effect\n\n    delete_user_from_db(user, db_session)\n\n    # Join tables should be deleted (not updated)\n    for model in [DocumentSet__User, Persona__User, User__UserGroup, SamlAccount]:\n        chain = query_chains[model]\n        chain.filter.return_value.delete.assert_called_once()\n\n\n@patch(\"onyx.db.users.remove_user_from_invited_users\")\n@patch(\n    \"onyx.db.users.fetch_ee_implementation_or_noop\",\n    return_value=lambda **_kwargs: None,\n)\ndef test_delete_user_commits_and_removes_invited(\n    _mock_ee: Any, mock_remove_invited: Any\n) -> None:\n    user = _mock_user(email=\"deleted@example.com\")\n    db_session = MagicMock()\n    db_session.query.return_value = _make_query_chain()\n\n    delete_user_from_db(user, db_session)\n\n    db_session.delete.assert_called_once_with(user)\n    db_session.commit.assert_called_once()\n    mock_remove_invited.assert_called_once_with(\"deleted@example.com\")\n\n\n@patch(\"onyx.db.users.remove_user_from_invited_users\")\n@patch(\n    \"onyx.db.users.fetch_ee_implementation_or_noop\",\n    return_value=lambda **_kwargs: None,\n)\ndef test_delete_user_deletes_oauth_accounts(\n    _mock_ee: Any, _mock_remove_invited: Any\n) -> None:\n    user = _mock_user()\n    oauth1 = MagicMock()\n    oauth2 = MagicMock()\n    user.oauth_accounts = [oauth1, oauth2]\n    db_session = MagicMock()\n    db_session.query.return_value = _make_query_chain()\n\n    delete_user_from_db(user, db_session)\n\n    db_session.delete.assert_any_call(oauth1)\n    db_session.delete.assert_any_call(oauth2)\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_llm_sync.py",
    "content": "\"\"\"Tests for LLM provider model sync functionality.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.db.llm import sync_model_configurations\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.server.manage.llm.models import SyncModelEntry\n\n\nclass TestSyncModelConfigurations:\n    \"\"\"Tests for sync_model_configurations function.\"\"\"\n\n    def test_inserts_new_models(self) -> None:\n        \"\"\"Test that new models are inserted.\"\"\"\n        # Mock the provider with no existing models\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = []\n\n        mock_session = MagicMock()\n\n        with patch(\n            \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n        ):\n            models = [\n                SyncModelEntry(\n                    name=\"gpt-4\",\n                    display_name=\"GPT-4\",\n                    max_input_tokens=128000,\n                    supports_image_input=True,\n                ),\n                SyncModelEntry(\n                    name=\"gpt-4o\",\n                    display_name=\"GPT-4o\",\n                    max_input_tokens=128000,\n                    supports_image_input=True,\n                ),\n            ]\n\n            result = sync_model_configurations(\n                db_session=mock_session,\n                provider_name=LlmProviderNames.OPENAI,\n                models=models,\n            )\n\n            assert result == 2  # Two new models\n            assert (\n                mock_session.execute.call_count == 2 * 3\n            )  # 2 models * (model insert + chat insert + vision insert)\n            mock_session.commit.assert_called_once()\n\n    def test_skips_existing_models(self) -> None:\n        \"\"\"Test that existing models are not overwritten.\"\"\"\n        # Mock existing model\n        mock_existing_model = MagicMock()\n        mock_existing_model.name = \"gpt-4\"\n\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = [mock_existing_model]\n\n        mock_session = MagicMock()\n\n        with patch(\n            \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n        ):\n            models = [\n                SyncModelEntry(\n                    name=\"gpt-4\",  # Existing - should be skipped\n                    display_name=\"GPT-4\",\n                    max_input_tokens=128000,\n                    supports_image_input=True,\n                ),\n                SyncModelEntry(\n                    name=\"gpt-4o\",  # New - should be inserted\n                    display_name=\"GPT-4o\",\n                    max_input_tokens=128000,\n                    supports_image_input=True,\n                ),\n            ]\n\n            result = sync_model_configurations(\n                db_session=mock_session,\n                provider_name=LlmProviderNames.OPENAI,\n                models=models,\n            )\n\n            assert result == 1  # Only one new model\n            assert mock_session.execute.call_count == 3\n\n    def test_no_commit_when_no_new_models(self) -> None:\n        \"\"\"Test that commit is not called when no new models.\"\"\"\n        mock_existing_model = MagicMock()\n        mock_existing_model.name = \"gpt-4\"\n\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = [mock_existing_model]\n\n        mock_session = MagicMock()\n\n        with patch(\n            \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n        ):\n            models = [\n                SyncModelEntry(\n                    name=\"gpt-4\",  # Already exists\n                    display_name=\"GPT-4\",\n                    max_input_tokens=128000,\n                    supports_image_input=True,\n                ),\n            ]\n\n            result = sync_model_configurations(\n                db_session=mock_session,\n                provider_name=LlmProviderNames.OPENAI,\n                models=models,\n            )\n\n            assert result == 0\n            mock_session.commit.assert_not_called()\n\n    def test_raises_on_missing_provider(self) -> None:\n        \"\"\"Test that ValueError is raised when provider not found.\"\"\"\n        mock_session = MagicMock()\n\n        with patch(\"onyx.db.llm.fetch_existing_llm_provider\", return_value=None):\n            with pytest.raises(ValueError, match=\"not found\"):\n                sync_model_configurations(\n                    db_session=mock_session,\n                    provider_name=\"nonexistent\",\n                    models=[SyncModelEntry(name=\"model\", display_name=\"Model\")],\n                )\n\n    def test_handles_missing_optional_fields(self) -> None:\n        \"\"\"Test that optional fields default correctly.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = []\n\n        mock_session = MagicMock()\n\n        with patch(\n            \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n        ):\n            # Model with only required fields (max_input_tokens and supports_image_input default)\n            models = [\n                SyncModelEntry(\n                    name=\"model-1\",\n                    display_name=\"Model 1\",\n                ),\n            ]\n\n            result = sync_model_configurations(\n                db_session=mock_session,\n                provider_name=\"custom\",\n                models=models,\n            )\n\n            assert result == 1\n            # Verify execute was called with correct defaults\n            call_args = mock_session.execute.call_args\n            assert call_args is not None\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_persona_display_priority.py",
    "content": "from types import SimpleNamespace\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.db.persona import update_personas_display_priority\n\n\ndef _persona(persona_id: int, display_priority: int) -> SimpleNamespace:\n    return SimpleNamespace(id=persona_id, display_priority=display_priority)\n\n\ndef test_update_display_priority_updates_subset(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    # Precondition\n    persona_a = _persona(1, 5)\n    persona_b = _persona(2, 6)\n    db_session = MagicMock()\n    user = MagicMock()\n    monkeypatch.setattr(\n        \"onyx.db.persona.get_raw_personas_for_user\",\n        lambda user, db_session, **kwargs: [persona_a, persona_b],  # noqa: ARG005\n    )\n\n    # Under test\n    update_personas_display_priority(\n        {persona_a.id: 0}, db_session, user, commit_db_txn=True\n    )\n\n    # Postcondition\n    assert persona_a.display_priority == 0\n    assert persona_b.display_priority == 6\n    db_session.commit.assert_called_once_with()\n\n\ndef test_update_display_priority_invalid_ids(monkeypatch: pytest.MonkeyPatch) -> None:\n    # Precondition\n    persona_a = _persona(1, 5)\n    db_session = MagicMock()\n    user = MagicMock()\n    monkeypatch.setattr(\n        \"onyx.db.persona.get_raw_personas_for_user\",\n        lambda user, db_session, **kwargs: [persona_a],  # noqa: ARG005\n    )\n\n    # Under test\n    with pytest.raises(ValueError):\n        update_personas_display_priority(\n            {persona_a.id: 0, 99: 1},\n            db_session,\n            user,\n            commit_db_txn=True,\n        )\n\n    # Postcondition\n    db_session.commit.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_projects_upload_task_expiry.py",
    "content": "\"\"\"\nUnit test verifying that the upload API path sends tasks with expires=.\n\nThe upload_files_to_user_files_with_indexing function must include expires=\non every send_task call to prevent phantom task accumulation if the worker\nis down or slow.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\nfrom onyx.configs.constants import OnyxCeleryQueues\nfrom onyx.configs.constants import OnyxCeleryTask\nfrom onyx.db.models import UserFile\nfrom onyx.db.projects import upload_files_to_user_files_with_indexing\n\n\ndef _make_mock_user_file() -> MagicMock:\n    uf = MagicMock(spec=UserFile)\n    uf.id = str(uuid4())\n    return uf\n\n\n@patch(\"onyx.db.projects.get_current_tenant_id\", return_value=\"test_tenant\")\n@patch(\"onyx.db.projects.create_user_files\")\n@patch(\n    \"onyx.background.celery.versioned_apps.client.app\",\n    new_callable=MagicMock,\n)\ndef test_send_task_includes_expires(\n    mock_client_app: MagicMock,\n    mock_create: MagicMock,\n    mock_tenant: MagicMock,  # noqa: ARG001\n) -> None:\n    \"\"\"Every send_task call from the upload path must include expires=.\"\"\"\n    user_files = [_make_mock_user_file(), _make_mock_user_file()]\n    mock_create.return_value = MagicMock(\n        user_files=user_files,\n        rejected_files=[],\n        id_to_temp_id={},\n        skip_indexing_filenames=set(),\n        indexable_files=user_files,\n    )\n\n    mock_user = MagicMock()\n    mock_db_session = MagicMock()\n\n    upload_files_to_user_files_with_indexing(\n        files=[],\n        project_id=None,\n        user=mock_user,\n        temp_id_map=None,\n        db_session=mock_db_session,\n    )\n\n    assert mock_client_app.send_task.call_count == len(user_files)\n\n    for call in mock_client_app.send_task.call_args_list:\n        assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE\n        assert call.kwargs.get(\"queue\") == OnyxCeleryQueues.USER_FILE_PROCESSING\n        assert (\n            call.kwargs.get(\"expires\") == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES\n        ), \"send_task must include expires= to prevent phantom task accumulation\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_scim_dal.py",
    "content": "import logging\nfrom unittest.mock import MagicMock\nfrom uuid import uuid4\n\nimport pytest\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom onyx.db.models import ScimGroupMapping\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import ScimUserMapping\nfrom tests.unit.onyx.db.conftest import model_attrs\n\n\nclass TestScimDALTokens:\n    \"\"\"Tests for ScimDAL token operations.\"\"\"\n\n    def test_create_token_adds_to_session(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        user_id = uuid4()\n\n        scim_dal.create_token(\n            name=\"test\",\n            hashed_token=\"abc123\",\n            token_display=\"****abcd\",\n            created_by_id=user_id,\n        )\n\n        mock_db_session.add.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n        added_obj = mock_db_session.add.call_args[0][0]\n        assert model_attrs(added_obj) == {\n            \"name\": \"test\",\n            \"hashed_token\": \"abc123\",\n            \"token_display\": \"****abcd\",\n            \"created_by_id\": user_id,\n        }\n\n    def test_get_token_by_hash_queries_session(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        token = ScimToken(\n            id=1,\n            name=\"test-token\",\n            hashed_token=\"a\" * 64,\n            token_display=\"onyx_scim_****abcd\",\n            is_active=True,\n            created_by_id=uuid4(),\n        )\n        mock_db_session.scalar.return_value = token\n\n        result = scim_dal.get_token_by_hash(\"a\" * 64)\n\n        assert result is token\n        mock_db_session.scalar.assert_called_once()\n\n    def test_revoke_token_sets_inactive(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        token = ScimToken(\n            id=1,\n            name=\"test-token\",\n            hashed_token=\"a\" * 64,\n            token_display=\"onyx_scim_****abcd\",\n            is_active=True,\n            created_by_id=uuid4(),\n        )\n        mock_db_session.get.return_value = token\n        expected = model_attrs(token) | {\"is_active\": False}\n\n        scim_dal.revoke_token(1)\n\n        assert model_attrs(token) == expected\n\n    def test_revoke_nonexistent_token_raises(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.get.return_value = None\n\n        with pytest.raises(ValueError, match=\"not found\"):\n            scim_dal.revoke_token(999)\n\n\nclass TestScimDALUserMappings:\n    \"\"\"Tests for ScimDAL user mapping operations.\"\"\"\n\n    def test_create_user_mapping(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        user_id = uuid4()\n\n        scim_dal.create_user_mapping(external_id=\"ext-1\", user_id=user_id)\n\n        mock_db_session.add.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n        added_obj = mock_db_session.add.call_args[0][0]\n        assert model_attrs(added_obj) == {\n            \"external_id\": \"ext-1\",\n            \"user_id\": user_id,\n            \"scim_username\": None,\n            \"department\": None,\n            \"manager\": None,\n            \"given_name\": None,\n            \"family_name\": None,\n            \"scim_emails_json\": None,\n        }\n\n    def test_delete_user_mapping(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        mapping = ScimUserMapping(id=1, external_id=\"ext-1\", user_id=uuid4())\n        mock_db_session.get.return_value = mapping\n\n        scim_dal.delete_user_mapping(1)\n\n        mock_db_session.delete.assert_called_once_with(mapping)\n\n    def test_delete_nonexistent_user_mapping_is_idempotent(\n        self,\n        scim_dal: ScimDAL,\n        mock_db_session: MagicMock,\n        caplog: pytest.LogCaptureFixture,\n    ) -> None:\n        mock_db_session.get.return_value = None\n\n        with caplog.at_level(logging.WARNING):\n            scim_dal.delete_user_mapping(999)\n\n        mock_db_session.delete.assert_not_called()\n        assert \"SCIM user mapping 999 not found\" in caplog.text\n\n    def test_update_user_mapping_external_id(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        mapping = ScimUserMapping(id=1, external_id=\"old-id\", user_id=uuid4())\n        mock_db_session.get.return_value = mapping\n        expected = model_attrs(mapping) | {\"external_id\": \"new-id\"}\n\n        result = scim_dal.update_user_mapping_external_id(1, \"new-id\")\n\n        assert result is mapping\n        assert model_attrs(result) == expected\n\n    def test_update_nonexistent_user_mapping_raises(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.get.return_value = None\n\n        with pytest.raises(ValueError, match=\"not found\"):\n            scim_dal.update_user_mapping_external_id(999, \"new-id\")\n\n\nclass TestScimDALGroupMappings:\n    \"\"\"Tests for ScimDAL group mapping operations.\"\"\"\n\n    def test_create_group_mapping(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        scim_dal.create_group_mapping(external_id=\"ext-g1\", user_group_id=5)\n\n        mock_db_session.add.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n        added_obj = mock_db_session.add.call_args[0][0]\n        assert model_attrs(added_obj) == {\n            \"external_id\": \"ext-g1\",\n            \"user_group_id\": 5,\n        }\n\n    def test_delete_group_mapping(\n        self, scim_dal: ScimDAL, mock_db_session: MagicMock\n    ) -> None:\n        mapping = ScimGroupMapping(id=1, external_id=\"ext-g1\", user_group_id=10)\n        mock_db_session.get.return_value = mapping\n\n        scim_dal.delete_group_mapping(1)\n\n        mock_db_session.delete.assert_called_once_with(mapping)\n\n    def test_delete_nonexistent_group_mapping_is_idempotent(\n        self,\n        scim_dal: ScimDAL,\n        mock_db_session: MagicMock,\n        caplog: pytest.LogCaptureFixture,\n    ) -> None:\n        mock_db_session.get.return_value = None\n\n        with caplog.at_level(logging.WARNING):\n            scim_dal.delete_group_mapping(999)\n\n        mock_db_session.delete.assert_not_called()\n        assert \"SCIM group mapping 999 not found\" in caplog.text\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_tools.py",
    "content": "from unittest.mock import MagicMock\nfrom uuid import uuid4\n\nfrom onyx.db import tools as tools_mod\n\n\ndef test_create_tool_call_no_commit_sanitizes_fields() -> None:\n    mock_session = MagicMock()\n\n    tool_call = tools_mod.create_tool_call_no_commit(\n        chat_session_id=uuid4(),\n        parent_chat_message_id=1,\n        turn_number=0,\n        tool_id=1,\n        tool_call_id=\"tc-1\",\n        tool_call_arguments={\"task\\x00\": \"research\\ud800 topic\"},\n        tool_call_response=\"report\\x00 text\\udfff here\",\n        tool_call_tokens=10,\n        db_session=mock_session,\n        reasoning_tokens=\"reason\\x00ing\\ud800\",\n        generated_images=[{\"url\": \"img\\x00.png\\udfff\"}],\n    )\n\n    assert tool_call.tool_call_response == \"report text here\"\n    assert tool_call.reasoning_tokens == \"reasoning\"\n    assert tool_call.tool_call_arguments == {\"task\": \"research topic\"}\n    assert tool_call.generated_images == [{\"url\": \"img.png\"}]\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_usage.py",
    "content": "\"\"\"Unit tests for tenant usage tracking and limits.\"\"\"\n\nfrom datetime import datetime\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.db.usage import check_usage_limit\nfrom onyx.db.usage import get_current_window_start\nfrom onyx.db.usage import get_or_create_tenant_usage\nfrom onyx.db.usage import get_tenant_usage_stats\nfrom onyx.db.usage import increment_usage\nfrom onyx.db.usage import TenantUsageStats\nfrom onyx.db.usage import UsageLimitExceededError\nfrom onyx.db.usage import UsageType\n\n\nclass TestGetCurrentWindowStart:\n    \"\"\"Tests for get_current_window_start function.\"\"\"\n\n    def test_weekly_window_aligns_to_monday(self) -> None:\n        \"\"\"Test that weekly windows align to Monday 00:00 UTC.\"\"\"\n        with patch(\"onyx.db.usage.USAGE_LIMIT_WINDOW_SECONDS\", 604800):  # 1 week\n            window_start = get_current_window_start()\n\n            # Window should be on a Monday\n            assert window_start.weekday() == 0  # Monday\n\n            # Window should be at midnight UTC\n            assert window_start.hour == 0\n            assert window_start.minute == 0\n            assert window_start.second == 0\n            assert window_start.microsecond == 0\n\n    def test_window_start_is_timezone_aware(self) -> None:\n        \"\"\"Test that window start is timezone-aware.\"\"\"\n        window_start = get_current_window_start()\n        assert window_start.tzinfo is not None\n\n\nclass TestGetOrCreateTenantUsage:\n    \"\"\"Tests for get_or_create_tenant_usage function.\"\"\"\n\n    def test_creates_or_gets_usage_record(self) -> None:\n        \"\"\"Test that get_or_create returns a usage record via atomic upsert.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.llm_cost_cents = 0.0\n        mock_usage.chunks_indexed = 0\n\n        mock_session = MagicMock()\n        # The new implementation uses INSERT ... ON CONFLICT with RETURNING\n        # which calls execute().scalar_one()\n        mock_session.execute.return_value.scalar_one.return_value = mock_usage\n\n        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)\n        usage = get_or_create_tenant_usage(mock_session, window_start)\n\n        # Verify execute was called (with the INSERT ... ON CONFLICT statement)\n        mock_session.execute.assert_called_once()\n        mock_session.flush.assert_called_once()\n        assert usage == mock_usage\n\n    def test_returns_usage_record_from_atomic_upsert(self) -> None:\n        \"\"\"Test that the returned usage record comes from the atomic upsert.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.llm_cost_cents = 100.0\n        mock_usage.chunks_indexed = 500\n\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalar_one.return_value = mock_usage\n\n        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)\n        usage = get_or_create_tenant_usage(mock_session, window_start)\n\n        assert usage == mock_usage\n        assert usage.llm_cost_cents == 100.0\n        assert usage.chunks_indexed == 500\n\n\nclass TestGetTenantUsageStats:\n    \"\"\"Tests for get_tenant_usage_stats function.\"\"\"\n\n    def test_returns_zero_stats_when_no_record_exists(self) -> None:\n        \"\"\"Test that zero stats are returned when no usage record exists.\"\"\"\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalar_one_or_none.return_value = None\n\n        window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)\n        stats = get_tenant_usage_stats(mock_session, window_start)\n\n        assert stats.llm_cost_cents == 0.0\n        assert stats.chunks_indexed == 0\n        assert stats.api_calls == 0\n        assert stats.non_streaming_api_calls == 0\n\n    def test_returns_actual_stats_when_record_exists(self) -> None:\n        \"\"\"Test that actual stats are returned when usage record exists.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.window_start = datetime(2024, 1, 1, tzinfo=timezone.utc)\n        mock_usage.llm_cost_cents = 250.5\n        mock_usage.chunks_indexed = 1000\n        mock_usage.api_calls = 50\n        mock_usage.non_streaming_api_calls = 10\n\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalar_one_or_none.return_value = mock_usage\n\n        stats = get_tenant_usage_stats(mock_session)\n\n        assert stats.llm_cost_cents == 250.5\n        assert stats.chunks_indexed == 1000\n        assert stats.api_calls == 50\n        assert stats.non_streaming_api_calls == 10\n\n\nclass TestIncrementUsage:\n    \"\"\"Tests for increment_usage function.\"\"\"\n\n    def test_increments_llm_cost(self) -> None:\n        \"\"\"Test that LLM cost is incremented correctly.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.llm_cost_cents = 100.0\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.db.usage.get_or_create_tenant_usage\", return_value=mock_usage):\n            increment_usage(mock_session, UsageType.LLM_COST, 50.5)\n\n        assert mock_usage.llm_cost_cents == 150.5\n        mock_session.flush.assert_called_once()\n\n    def test_increments_chunks_indexed(self) -> None:\n        \"\"\"Test that chunks indexed is incremented correctly.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.chunks_indexed = 500\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.db.usage.get_or_create_tenant_usage\", return_value=mock_usage):\n            increment_usage(mock_session, UsageType.CHUNKS_INDEXED, 100)\n\n        assert mock_usage.chunks_indexed == 600\n\n    def test_increments_api_calls(self) -> None:\n        \"\"\"Test that API calls is incremented correctly.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.api_calls = 10\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.db.usage.get_or_create_tenant_usage\", return_value=mock_usage):\n            increment_usage(mock_session, UsageType.API_CALLS, 1)\n\n        assert mock_usage.api_calls == 11\n\n    def test_increments_non_streaming_calls(self) -> None:\n        \"\"\"Test that non-streaming API calls is incremented correctly.\"\"\"\n        mock_usage = MagicMock()\n        mock_usage.non_streaming_api_calls = 5\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.db.usage.get_or_create_tenant_usage\", return_value=mock_usage):\n            increment_usage(mock_session, UsageType.NON_STREAMING_API_CALLS, 1)\n\n        assert mock_usage.non_streaming_api_calls == 6\n\n\nclass TestCheckUsageLimit:\n    \"\"\"Tests for check_usage_limit function.\"\"\"\n\n    def test_passes_when_under_limit(self) -> None:\n        \"\"\"Test that check passes when usage is under the limit.\"\"\"\n        mock_session = MagicMock()\n\n        mock_stats = TenantUsageStats(\n            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),\n            llm_cost_cents=100.0,\n            chunks_indexed=500,\n            api_calls=10,\n            non_streaming_api_calls=5,\n        )\n\n        with patch(\"onyx.db.usage.get_tenant_usage_stats\", return_value=mock_stats):\n            # Should not raise\n            check_usage_limit(\n                mock_session,\n                UsageType.LLM_COST,\n                limit=500,\n                pending_amount=0,\n            )\n\n    def test_passes_when_exactly_at_limit(self) -> None:\n        \"\"\"Test that check passes when usage is exactly at the limit.\"\"\"\n        mock_session = MagicMock()\n\n        mock_stats = TenantUsageStats(\n            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),\n            llm_cost_cents=500.0,\n            chunks_indexed=500,\n            api_calls=10,\n            non_streaming_api_calls=5,\n        )\n\n        with patch(\"onyx.db.usage.get_tenant_usage_stats\", return_value=mock_stats):\n            # Should not raise - at limit but not over\n            check_usage_limit(\n                mock_session,\n                UsageType.LLM_COST,\n                limit=500,\n                pending_amount=0,\n            )\n\n    def test_fails_when_over_limit(self) -> None:\n        \"\"\"Test that check fails when usage exceeds the limit.\"\"\"\n        mock_session = MagicMock()\n\n        mock_stats = TenantUsageStats(\n            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),\n            llm_cost_cents=501.0,\n            chunks_indexed=500,\n            api_calls=10,\n            non_streaming_api_calls=5,\n        )\n\n        with patch(\"onyx.db.usage.get_tenant_usage_stats\", return_value=mock_stats):\n            with pytest.raises(UsageLimitExceededError) as exc_info:\n                check_usage_limit(\n                    mock_session,\n                    UsageType.LLM_COST,\n                    limit=500,\n                    pending_amount=0,\n                )\n\n            assert exc_info.value.usage_type == UsageType.LLM_COST\n            assert exc_info.value.current == 501.0\n            assert exc_info.value.limit == 500.0\n\n    def test_fails_when_pending_would_exceed_limit(self) -> None:\n        \"\"\"Test that check fails when pending amount would exceed the limit.\"\"\"\n        mock_session = MagicMock()\n\n        mock_stats = TenantUsageStats(\n            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),\n            llm_cost_cents=400.0,\n            chunks_indexed=500,\n            api_calls=10,\n            non_streaming_api_calls=5,\n        )\n\n        with patch(\"onyx.db.usage.get_tenant_usage_stats\", return_value=mock_stats):\n            with pytest.raises(UsageLimitExceededError) as exc_info:\n                check_usage_limit(\n                    mock_session,\n                    UsageType.LLM_COST,\n                    limit=500,\n                    pending_amount=150,  # 400 + 150 = 550 > 500\n                )\n\n            assert exc_info.value.current == 550.0  # includes pending\n\n    def test_checks_chunks_indexed_limit(self) -> None:\n        \"\"\"Test that chunk indexing limit is checked correctly.\"\"\"\n        mock_session = MagicMock()\n\n        mock_stats = TenantUsageStats(\n            window_start=datetime(2024, 1, 1, tzinfo=timezone.utc),\n            llm_cost_cents=100.0,\n            chunks_indexed=10001,\n            api_calls=10,\n            non_streaming_api_calls=5,\n        )\n\n        with patch(\"onyx.db.usage.get_tenant_usage_stats\", return_value=mock_stats):\n            with pytest.raises(UsageLimitExceededError) as exc_info:\n                check_usage_limit(\n                    mock_session,\n                    UsageType.CHUNKS_INDEXED,\n                    limit=10000,\n                    pending_amount=0,\n                )\n\n            assert exc_info.value.usage_type == UsageType.CHUNKS_INDEXED\n\n\nclass TestUsageLimitExceededError:\n    \"\"\"Tests for UsageLimitExceededError exception.\"\"\"\n\n    def test_error_message_format(self) -> None:\n        \"\"\"Test that error message is formatted correctly.\"\"\"\n        error = UsageLimitExceededError(\n            usage_type=UsageType.LLM_COST,\n            current=150.5,\n            limit=100.0,\n        )\n\n        assert \"llm_cost_cents\" in str(error)\n        assert \"150.5\" in str(error)\n        assert \"100\" in str(error)\n\n    def test_stores_values(self) -> None:\n        \"\"\"Test that error stores all values correctly.\"\"\"\n        error = UsageLimitExceededError(\n            usage_type=UsageType.API_CALLS,\n            current=1001,\n            limit=1000,\n        )\n\n        assert error.usage_type == UsageType.API_CALLS\n        assert error.current == 1001\n        assert error.limit == 1000\n\n\nclass TestWindowRollover:\n    \"\"\"Tests for window rollover behavior.\"\"\"\n\n    def test_new_window_resets_usage(self) -> None:\n        \"\"\"Test that a new window has zero usage even if previous window had usage.\"\"\"\n        mock_session = MagicMock()\n        mock_session.execute.return_value.scalar_one_or_none.return_value = None\n\n        # Get stats for a new window (no existing record)\n        with patch(\n            \"onyx.db.usage.get_current_window_start\",\n            return_value=datetime(2024, 1, 8, tzinfo=timezone.utc),\n        ):\n            stats = get_tenant_usage_stats(mock_session)\n\n        # New window should have zero usage\n        assert stats.llm_cost_cents == 0.0\n        assert stats.chunks_indexed == 0\n        assert stats.api_calls == 0\n        assert stats.non_streaming_api_calls == 0\n"
  },
  {
    "path": "backend/tests/unit/onyx/db/test_voice.py",
    "content": "\"\"\"Unit tests for onyx.db.voice module.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.db.models import VoiceProvider\nfrom onyx.db.voice import deactivate_stt_provider\nfrom onyx.db.voice import deactivate_tts_provider\nfrom onyx.db.voice import delete_voice_provider\nfrom onyx.db.voice import fetch_default_stt_provider\nfrom onyx.db.voice import fetch_default_tts_provider\nfrom onyx.db.voice import fetch_voice_provider_by_id\nfrom onyx.db.voice import fetch_voice_provider_by_type\nfrom onyx.db.voice import fetch_voice_providers\nfrom onyx.db.voice import MAX_VOICE_PLAYBACK_SPEED\nfrom onyx.db.voice import MIN_VOICE_PLAYBACK_SPEED\nfrom onyx.db.voice import set_default_stt_provider\nfrom onyx.db.voice import set_default_tts_provider\nfrom onyx.db.voice import update_user_voice_settings\nfrom onyx.db.voice import upsert_voice_provider\nfrom onyx.error_handling.exceptions import OnyxError\n\n\ndef _make_voice_provider(\n    id: int = 1,\n    name: str = \"Test Provider\",\n    provider_type: str = \"openai\",\n    is_default_stt: bool = False,\n    is_default_tts: bool = False,\n) -> VoiceProvider:\n    \"\"\"Create a VoiceProvider instance for testing.\"\"\"\n    provider = VoiceProvider()\n    provider.id = id\n    provider.name = name\n    provider.provider_type = provider_type\n    provider.is_default_stt = is_default_stt\n    provider.is_default_tts = is_default_tts\n    provider.api_key = None\n    provider.api_base = None\n    provider.custom_config = None\n    provider.stt_model = None\n    provider.tts_model = None\n    provider.default_voice = None\n    return provider\n\n\nclass TestFetchVoiceProviders:\n    \"\"\"Tests for fetch_voice_providers.\"\"\"\n\n    def test_returns_all_providers(self, mock_db_session: MagicMock) -> None:\n        providers = [\n            _make_voice_provider(id=1, name=\"Provider A\"),\n            _make_voice_provider(id=2, name=\"Provider B\"),\n        ]\n        mock_db_session.scalars.return_value.all.return_value = providers\n\n        result = fetch_voice_providers(mock_db_session)\n\n        assert result == providers\n        mock_db_session.scalars.assert_called_once()\n\n    def test_returns_empty_list_when_no_providers(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalars.return_value.all.return_value = []\n\n        result = fetch_voice_providers(mock_db_session)\n\n        assert result == []\n\n\nclass TestFetchVoiceProviderById:\n    \"\"\"Tests for fetch_voice_provider_by_id.\"\"\"\n\n    def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:\n        provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = provider\n\n        result = fetch_voice_provider_by_id(mock_db_session, 1)\n\n        assert result is provider\n        mock_db_session.scalar.assert_called_once()\n\n    def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:\n        mock_db_session.scalar.return_value = None\n\n        result = fetch_voice_provider_by_id(mock_db_session, 999)\n\n        assert result is None\n\n\nclass TestFetchDefaultProviders:\n    \"\"\"Tests for fetch_default_stt_provider and fetch_default_tts_provider.\"\"\"\n\n    def test_fetch_default_stt_provider_returns_provider(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1, is_default_stt=True)\n        mock_db_session.scalar.return_value = provider\n\n        result = fetch_default_stt_provider(mock_db_session)\n\n        assert result is provider\n\n    def test_fetch_default_stt_provider_returns_none_when_no_default(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        result = fetch_default_stt_provider(mock_db_session)\n\n        assert result is None\n\n    def test_fetch_default_tts_provider_returns_provider(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1, is_default_tts=True)\n        mock_db_session.scalar.return_value = provider\n\n        result = fetch_default_tts_provider(mock_db_session)\n\n        assert result is provider\n\n    def test_fetch_default_tts_provider_returns_none_when_no_default(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        result = fetch_default_tts_provider(mock_db_session)\n\n        assert result is None\n\n\nclass TestFetchVoiceProviderByType:\n    \"\"\"Tests for fetch_voice_provider_by_type.\"\"\"\n\n    def test_returns_provider_when_found(self, mock_db_session: MagicMock) -> None:\n        provider = _make_voice_provider(id=1, provider_type=\"openai\")\n        mock_db_session.scalar.return_value = provider\n\n        result = fetch_voice_provider_by_type(mock_db_session, \"openai\")\n\n        assert result is provider\n\n    def test_returns_none_when_not_found(self, mock_db_session: MagicMock) -> None:\n        mock_db_session.scalar.return_value = None\n\n        result = fetch_voice_provider_by_type(mock_db_session, \"nonexistent\")\n\n        assert result is None\n\n\nclass TestUpsertVoiceProvider:\n    \"\"\"Tests for upsert_voice_provider.\"\"\"\n\n    def test_creates_new_provider_when_no_id(self, mock_db_session: MagicMock) -> None:\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        upsert_voice_provider(\n            db_session=mock_db_session,\n            provider_id=None,\n            name=\"New Provider\",\n            provider_type=\"openai\",\n            api_key=\"test-key\",\n            api_key_changed=True,\n        )\n\n        mock_db_session.add.assert_called_once()\n        mock_db_session.flush.assert_called()\n        added_obj = mock_db_session.add.call_args[0][0]\n        assert added_obj.name == \"New Provider\"\n        assert added_obj.provider_type == \"openai\"\n\n    def test_updates_existing_provider(self, mock_db_session: MagicMock) -> None:\n        existing_provider = _make_voice_provider(id=1, name=\"Old Name\")\n        mock_db_session.scalar.return_value = existing_provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        upsert_voice_provider(\n            db_session=mock_db_session,\n            provider_id=1,\n            name=\"Updated Name\",\n            provider_type=\"elevenlabs\",\n            api_key=\"new-key\",\n            api_key_changed=True,\n        )\n\n        mock_db_session.add.assert_not_called()\n        assert existing_provider.name == \"Updated Name\"\n        assert existing_provider.provider_type == \"elevenlabs\"\n\n    def test_raises_when_provider_not_found(self, mock_db_session: MagicMock) -> None:\n        mock_db_session.scalar.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            upsert_voice_provider(\n                db_session=mock_db_session,\n                provider_id=999,\n                name=\"Test\",\n                provider_type=\"openai\",\n                api_key=None,\n                api_key_changed=False,\n            )\n\n        assert \"No voice provider with id 999\" in str(exc_info.value)\n\n    def test_does_not_update_api_key_when_not_changed(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        existing_provider = _make_voice_provider(id=1)\n        existing_provider.api_key = \"original-key\"  # type: ignore[assignment]\n        original_api_key = existing_provider.api_key\n        mock_db_session.scalar.return_value = existing_provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        upsert_voice_provider(\n            db_session=mock_db_session,\n            provider_id=1,\n            name=\"Test\",\n            provider_type=\"openai\",\n            api_key=\"new-key\",\n            api_key_changed=False,\n        )\n\n        # api_key should remain unchanged (same object reference)\n        assert existing_provider.api_key is original_api_key\n\n    def test_activates_stt_when_requested(self, mock_db_session: MagicMock) -> None:\n        existing_provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = existing_provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n        mock_db_session.execute.return_value = None\n\n        upsert_voice_provider(\n            db_session=mock_db_session,\n            provider_id=1,\n            name=\"Test\",\n            provider_type=\"openai\",\n            api_key=None,\n            api_key_changed=False,\n            activate_stt=True,\n        )\n\n        assert existing_provider.is_default_stt is True\n\n    def test_activates_tts_when_requested(self, mock_db_session: MagicMock) -> None:\n        existing_provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = existing_provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n        mock_db_session.execute.return_value = None\n\n        upsert_voice_provider(\n            db_session=mock_db_session,\n            provider_id=1,\n            name=\"Test\",\n            provider_type=\"openai\",\n            api_key=None,\n            api_key_changed=False,\n            activate_tts=True,\n        )\n\n        assert existing_provider.is_default_tts is True\n\n\nclass TestDeleteVoiceProvider:\n    \"\"\"Tests for delete_voice_provider.\"\"\"\n\n    def test_hard_deletes_provider_when_found(self, mock_db_session: MagicMock) -> None:\n        provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = provider\n\n        delete_voice_provider(mock_db_session, 1)\n\n        mock_db_session.delete.assert_called_once_with(provider)\n        mock_db_session.flush.assert_called_once()\n\n    def test_does_nothing_when_provider_not_found(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        delete_voice_provider(mock_db_session, 999)\n\n        mock_db_session.flush.assert_not_called()\n\n\nclass TestSetDefaultProviders:\n    \"\"\"Tests for set_default_stt_provider and set_default_tts_provider.\"\"\"\n\n    def test_set_default_stt_provider_deactivates_others(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = provider\n        mock_db_session.execute.return_value = None\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        result = set_default_stt_provider(db_session=mock_db_session, provider_id=1)\n\n        mock_db_session.execute.assert_called_once()\n        assert result.is_default_stt is True\n\n    def test_set_default_stt_provider_raises_when_not_found(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            set_default_stt_provider(db_session=mock_db_session, provider_id=999)\n\n        assert \"No voice provider with id 999\" in str(exc_info.value)\n\n    def test_set_default_tts_provider_deactivates_others(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = provider\n        mock_db_session.execute.return_value = None\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        result = set_default_tts_provider(db_session=mock_db_session, provider_id=1)\n\n        mock_db_session.execute.assert_called_once()\n        assert result.is_default_tts is True\n\n    def test_set_default_tts_provider_updates_model_when_provided(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1)\n        mock_db_session.scalar.return_value = provider\n        mock_db_session.execute.return_value = None\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        result = set_default_tts_provider(\n            db_session=mock_db_session, provider_id=1, tts_model=\"tts-1-hd\"\n        )\n\n        assert result.tts_model == \"tts-1-hd\"\n\n    def test_set_default_tts_provider_raises_when_not_found(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            set_default_tts_provider(db_session=mock_db_session, provider_id=999)\n\n        assert \"No voice provider with id 999\" in str(exc_info.value)\n\n\nclass TestDeactivateProviders:\n    \"\"\"Tests for deactivate_stt_provider and deactivate_tts_provider.\"\"\"\n\n    def test_deactivate_stt_provider_sets_false(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1, is_default_stt=True)\n        mock_db_session.scalar.return_value = provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        result = deactivate_stt_provider(db_session=mock_db_session, provider_id=1)\n\n        assert result.is_default_stt is False\n\n    def test_deactivate_stt_provider_raises_when_not_found(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            deactivate_stt_provider(db_session=mock_db_session, provider_id=999)\n\n        assert \"No voice provider with id 999\" in str(exc_info.value)\n\n    def test_deactivate_tts_provider_sets_false(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        provider = _make_voice_provider(id=1, is_default_tts=True)\n        mock_db_session.scalar.return_value = provider\n        mock_db_session.flush.return_value = None\n        mock_db_session.refresh.return_value = None\n\n        result = deactivate_tts_provider(db_session=mock_db_session, provider_id=1)\n\n        assert result.is_default_tts is False\n\n    def test_deactivate_tts_provider_raises_when_not_found(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        mock_db_session.scalar.return_value = None\n\n        with pytest.raises(OnyxError) as exc_info:\n            deactivate_tts_provider(db_session=mock_db_session, provider_id=999)\n\n        assert \"No voice provider with id 999\" in str(exc_info.value)\n\n\nclass TestUpdateUserVoiceSettings:\n    \"\"\"Tests for update_user_voice_settings.\"\"\"\n\n    def test_updates_auto_send(self, mock_db_session: MagicMock) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id, auto_send=True)\n\n        mock_db_session.execute.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n\n    def test_updates_auto_playback(self, mock_db_session: MagicMock) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id, auto_playback=True)\n\n        mock_db_session.execute.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n\n    def test_updates_playback_speed_within_range(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id, playback_speed=1.5)\n\n        mock_db_session.execute.assert_called_once()\n\n    def test_clamps_playback_speed_to_min(self, mock_db_session: MagicMock) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id, playback_speed=0.1)\n\n        mock_db_session.execute.assert_called_once()\n        stmt = mock_db_session.execute.call_args[0][0]\n        compiled = stmt.compile(compile_kwargs={\"literal_binds\": True})\n        assert str(MIN_VOICE_PLAYBACK_SPEED) in str(compiled)\n\n    def test_clamps_playback_speed_to_max(self, mock_db_session: MagicMock) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id, playback_speed=5.0)\n\n        mock_db_session.execute.assert_called_once()\n        stmt = mock_db_session.execute.call_args[0][0]\n        compiled = stmt.compile(compile_kwargs={\"literal_binds\": True})\n        assert str(MAX_VOICE_PLAYBACK_SPEED) in str(compiled)\n\n    def test_updates_multiple_settings(self, mock_db_session: MagicMock) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(\n            mock_db_session,\n            user_id,\n            auto_send=True,\n            auto_playback=False,\n            playback_speed=1.25,\n        )\n\n        mock_db_session.execute.assert_called_once()\n        mock_db_session.flush.assert_called_once()\n\n    def test_does_nothing_when_no_settings_provided(\n        self, mock_db_session: MagicMock\n    ) -> None:\n        user_id = uuid4()\n\n        update_user_voice_settings(mock_db_session, user_id)\n\n        mock_db_session.execute.assert_not_called()\n        mock_db_session.flush.assert_not_called()\n\n\nclass TestSpeedClampingLogic:\n    \"\"\"Tests for the speed clamping constants and logic.\"\"\"\n\n    def test_min_speed_constant(self) -> None:\n        assert MIN_VOICE_PLAYBACK_SPEED == 0.5\n\n    def test_max_speed_constant(self) -> None:\n        assert MAX_VOICE_PLAYBACK_SPEED == 2.0\n\n    def test_clamping_formula(self) -> None:\n        \"\"\"Verify the clamping formula used in update_user_voice_settings.\"\"\"\n        test_cases = [\n            (0.1, MIN_VOICE_PLAYBACK_SPEED),\n            (0.5, 0.5),\n            (1.0, 1.0),\n            (1.5, 1.5),\n            (2.0, 2.0),\n            (3.0, MAX_VOICE_PLAYBACK_SPEED),\n        ]\n        for speed, expected in test_cases:\n            clamped = max(\n                MIN_VOICE_PLAYBACK_SPEED, min(MAX_VOICE_PLAYBACK_SPEED, speed)\n            )\n            assert (\n                clamped == expected\n            ), f\"speed={speed} expected={expected} got={clamped}\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/document_index/opensearch/test_get_doc_chunk_id.py",
    "content": "import pytest\n\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE\nfrom onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id\nfrom onyx.document_index.opensearch.string_filtering import (\n    MAX_DOCUMENT_ID_ENCODED_LENGTH,\n)\nfrom shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE\n\n\nSINGLE_TENANT_STATE = TenantState(\n    tenant_id=POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE, multitenant=False\n)\nMULTI_TENANT_STATE = TenantState(\n    tenant_id=\"tenant_abcdef12-3456-7890-abcd-ef1234567890\", multitenant=True\n)\nEXPECTED_SHORT_TENANT = \"abcdef12\"\n\n\nclass TestGetOpensearchDocChunkIdSingleTenant:\n    def test_basic(self) -> None:\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, \"my-doc-id\", chunk_index=0\n        )\n        assert result == f\"my-doc-id__{DEFAULT_MAX_CHUNK_SIZE}__0\"\n\n    def test_custom_chunk_size(self) -> None:\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, \"doc1\", chunk_index=3, max_chunk_size=1024\n        )\n        assert result == \"doc1__1024__3\"\n\n    def test_special_chars_are_stripped(self) -> None:\n        \"\"\"Tests characters not matching [A-Za-z0-9_.-~] are removed.\"\"\"\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, \"doc/with?special#chars&more%stuff\", chunk_index=0\n        )\n        assert \"/\" not in result\n        assert \"?\" not in result\n        assert \"#\" not in result\n        assert result == f\"docwithspecialcharsmorestuff__{DEFAULT_MAX_CHUNK_SIZE}__0\"\n\n    def test_short_doc_id_not_hashed(self) -> None:\n        \"\"\"\n        Tests that a short doc ID should appear directly in the result, not as a\n        hash.\n        \"\"\"\n        doc_id = \"short-id\"\n        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)\n        assert \"short-id\" in result\n\n    def test_long_doc_id_is_hashed(self) -> None:\n        \"\"\"\n        Tests that a doc ID exceeding the max length should be replaced with a\n        blake2b hash.\n        \"\"\"\n        # Create a doc ID that will exceed max length after the suffix is\n        # appended.\n        doc_id = \"a\" * MAX_DOCUMENT_ID_ENCODED_LENGTH\n        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)\n        # The original doc ID should NOT appear in the result.\n        assert doc_id not in result\n        # The suffix should still be present.\n        assert f\"__{DEFAULT_MAX_CHUNK_SIZE}__0\" in result\n\n    def test_long_doc_id_hash_is_deterministic(self) -> None:\n        doc_id = \"x\" * MAX_DOCUMENT_ID_ENCODED_LENGTH\n        result1 = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, doc_id, chunk_index=5\n        )\n        result2 = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, doc_id, chunk_index=5\n        )\n        assert result1 == result2\n\n    def test_long_doc_id_different_inputs_produce_different_hashes(self) -> None:\n        doc_id_a = \"a\" * MAX_DOCUMENT_ID_ENCODED_LENGTH\n        doc_id_b = \"b\" * MAX_DOCUMENT_ID_ENCODED_LENGTH\n        result_a = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, doc_id_a, chunk_index=0\n        )\n        result_b = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, doc_id_b, chunk_index=0\n        )\n        assert result_a != result_b\n\n    def test_result_never_exceeds_max_length(self) -> None:\n        \"\"\"\n        Tests that the final result should always be under\n        MAX_DOCUMENT_ID_ENCODED_LENGTH bytes.\n        \"\"\"\n        doc_id = \"z\" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999\n        )\n        assert len(result.encode(\"utf-8\")) < MAX_DOCUMENT_ID_ENCODED_LENGTH\n\n    def test_no_tenant_prefix_in_single_tenant(self) -> None:\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, \"mydoc\", chunk_index=0\n        )\n        assert not result.startswith(SINGLE_TENANT_STATE.tenant_id)\n\n\nclass TestGetOpensearchDocChunkIdMultiTenant:\n    def test_includes_tenant_prefix(self) -> None:\n        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, \"mydoc\", chunk_index=0)\n        assert result.startswith(f\"{EXPECTED_SHORT_TENANT}__\")\n\n    def test_format(self) -> None:\n        result = get_opensearch_doc_chunk_id(\n            MULTI_TENANT_STATE, \"mydoc\", chunk_index=2, max_chunk_size=256\n        )\n        assert result == f\"{EXPECTED_SHORT_TENANT}__mydoc__256__2\"\n\n    def test_long_doc_id_is_hashed_multitenant(self) -> None:\n        doc_id = \"d\" * MAX_DOCUMENT_ID_ENCODED_LENGTH\n        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)\n        # Should still have tenant prefix.\n        assert result.startswith(f\"{EXPECTED_SHORT_TENANT}__\")\n        # The original doc ID should NOT appear in the result.\n        assert doc_id not in result\n        # The suffix should still be present.\n        assert f\"__{DEFAULT_MAX_CHUNK_SIZE}__0\" in result\n\n    def test_result_never_exceeds_max_length_multitenant(self) -> None:\n        doc_id = \"q\" * (MAX_DOCUMENT_ID_ENCODED_LENGTH * 2)\n        result = get_opensearch_doc_chunk_id(\n            MULTI_TENANT_STATE, doc_id, chunk_index=999, max_chunk_size=99999\n        )\n        assert len(result.encode(\"utf-8\")) < MAX_DOCUMENT_ID_ENCODED_LENGTH\n\n    def test_different_tenants_produce_different_ids(self) -> None:\n        tenant_a = TenantState(\n            tenant_id=\"tenant_aaaaaaaa-0000-0000-0000-000000000000\", multitenant=True\n        )\n        tenant_b = TenantState(\n            tenant_id=\"tenant_bbbbbbbb-0000-0000-0000-000000000000\", multitenant=True\n        )\n        result_a = get_opensearch_doc_chunk_id(tenant_a, \"same-doc\", chunk_index=0)\n        result_b = get_opensearch_doc_chunk_id(tenant_b, \"same-doc\", chunk_index=0)\n        assert result_a != result_b\n\n\nclass TestGetOpensearchDocChunkIdEdgeCases:\n    def test_chunk_index_zero(self) -> None:\n        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, \"doc\", chunk_index=0)\n        assert result.endswith(\"__0\")\n\n    def test_large_chunk_index(self) -> None:\n        result = get_opensearch_doc_chunk_id(\n            SINGLE_TENANT_STATE, \"doc\", chunk_index=99999\n        )\n        assert result.endswith(\"__99999\")\n\n    def test_doc_id_with_only_special_chars_raises(self) -> None:\n        \"\"\"\n        Tests that a doc ID that becomes empty after filtering should raise\n        ValueError.\n        \"\"\"\n        with pytest.raises(ValueError, match=\"empty after filtering\"):\n            get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, \"###???///\", chunk_index=0)\n\n    def test_doc_id_at_boundary_length(self) -> None:\n        \"\"\"\n        Tests that a doc ID right at the boundary should not be hashed.\n        \"\"\"\n        suffix = f\"__{DEFAULT_MAX_CHUNK_SIZE}__0\"\n        suffix_len = len(suffix.encode(\"utf-8\"))\n        # Max doc ID length that won't trigger hashing (must be <\n        # max_encoded_length).\n        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - 1\n        doc_id = \"a\" * max_doc_len\n        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)\n        assert doc_id in result\n\n    def test_doc_id_at_boundary_length_multitenant(self) -> None:\n        \"\"\"\n        Tests that a doc ID right at the boundary should not be hashed in\n        multitenant mode.\n        \"\"\"\n        suffix = f\"__{DEFAULT_MAX_CHUNK_SIZE}__0\"\n        suffix_len = len(suffix.encode(\"utf-8\"))\n        prefix = f\"{EXPECTED_SHORT_TENANT}__\"\n        prefix_len = len(prefix.encode(\"utf-8\"))\n        # Max doc ID length that won't trigger hashing (must be <\n        # max_encoded_length).\n        max_doc_len = MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len - prefix_len - 1\n        doc_id = \"a\" * max_doc_len\n        result = get_opensearch_doc_chunk_id(MULTI_TENANT_STATE, doc_id, chunk_index=0)\n        assert doc_id in result\n\n    def test_doc_id_one_over_boundary_is_hashed(self) -> None:\n        \"\"\"\n        Tests that a doc ID one byte over the boundary should be hashed.\n        \"\"\"\n        suffix = f\"__{DEFAULT_MAX_CHUNK_SIZE}__0\"\n        suffix_len = len(suffix.encode(\"utf-8\"))\n        # This length will trigger the >= check in filter_and_validate_document_id\n        doc_id = \"a\" * (MAX_DOCUMENT_ID_ENCODED_LENGTH - suffix_len)\n        result = get_opensearch_doc_chunk_id(SINGLE_TENANT_STATE, doc_id, chunk_index=0)\n        assert doc_id not in result\n"
  },
  {
    "path": "backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py",
    "content": "from unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.opensearch.opensearch_document_index import (\n    OpenSearchDocumentIndex,\n)\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\n\n\ndef _make_chunk(\n    doc_id: str,\n    chunk_id: int,\n) -> DocMetadataAwareIndexChunk:\n    \"\"\"Creates a minimal DocMetadataAwareIndexChunk for testing.\"\"\"\n    doc = Document(\n        id=doc_id,\n        sections=[TextSection(text=\"test\", link=\"http://test.com\")],\n        source=DocumentSource.FILE,\n        semantic_identifier=\"test_doc\",\n        metadata={},\n    )\n    access = DocumentAccess.build(\n        user_emails=[],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=True,\n    )\n    return DocMetadataAwareIndexChunk(\n        chunk_id=chunk_id,\n        blurb=\"test\",\n        content=\"test content\",\n        source_links={0: \"http://test.com\"},\n        image_file_id=None,\n        section_continuation=False,\n        source_document=doc,\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        contextual_rag_reserved_tokens=0,\n        embeddings=ChunkEmbedding(full_embedding=[0.1] * 10, mini_chunk_embeddings=[]),\n        title_embedding=[0.1] * 10,\n        tenant_id=\"test_tenant\",\n        access=access,\n        document_sets=set(),\n        user_project=[],\n        personas=[],\n        boost=0,\n        aggregated_chunk_boost_factor=1.0,\n        ancestor_hierarchy_node_ids=[],\n    )\n\n\ndef _make_index() -> tuple[OpenSearchDocumentIndex, MagicMock]:\n    \"\"\"Creates an OpenSearchDocumentIndex with a mocked client.\n    Returns the index and the mock for bulk_index_documents.\"\"\"\n    mock_client = MagicMock()\n    mock_bulk = MagicMock()\n    mock_client.bulk_index_documents = mock_bulk\n\n    tenant_state = TenantState(tenant_id=\"test_tenant\", multitenant=False)\n\n    index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)\n    index._index_name = \"test_index\"\n    index._client = mock_client\n    index._tenant_state = tenant_state\n\n    return index, mock_bulk\n\n\ndef _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:\n    return IndexingMetadata(\n        doc_id_to_chunk_cnt_diff={\n            doc_id: IndexingMetadata.ChunkCounts(\n                old_chunk_cnt=0,\n                new_chunk_cnt=chunk_count,\n            ),\n        },\n    )\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_single_doc_under_batch_limit_flushes_once() -> None:\n    \"\"\"A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once.\"\"\"\n    index, mock_bulk = _make_index()\n    doc_id = \"doc_1\"\n    num_chunks = 50\n    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]\n    metadata = _make_metadata(doc_id, num_chunks)\n\n    with patch.object(index, \"delete\", return_value=0):\n        index.index(chunks, metadata)\n\n    assert mock_bulk.call_count == 1\n    batch_arg = mock_bulk.call_args_list[0]\n    assert len(batch_arg.kwargs[\"documents\"]) == num_chunks\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_single_doc_over_batch_limit_flushes_multiple_times() -> None:\n    \"\"\"A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times.\"\"\"\n    index, mock_bulk = _make_index()\n    doc_id = \"doc_1\"\n    num_chunks = 250\n    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]\n    metadata = _make_metadata(doc_id, num_chunks)\n\n    with patch.object(index, \"delete\", return_value=0):\n        index.index(chunks, metadata)\n\n    # 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)\n    assert mock_bulk.call_count == 3\n    batch_sizes = [len(call.kwargs[\"documents\"]) for call in mock_bulk.call_args_list]\n    assert batch_sizes == [100, 100, 50]\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_single_doc_exactly_at_batch_limit() -> None:\n    \"\"\"A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once\n    (the flush happens on the next chunk, not at the boundary).\"\"\"\n    index, mock_bulk = _make_index()\n    doc_id = \"doc_1\"\n    num_chunks = 100\n    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]\n    metadata = _make_metadata(doc_id, num_chunks)\n\n    with patch.object(index, \"delete\", return_value=0):\n        index.index(chunks, metadata)\n\n    # 100 chunks hit the >= check on chunk 101 which doesn't exist,\n    # so final flush handles all 100\n    # Actually: the elif fires when len(current_chunks) >= 100, which happens\n    # when current_chunks has 100 items and the 101st chunk arrives.\n    # With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.\n    # No 101st chunk arrives, so the final flush handles all 100.\n    assert mock_bulk.call_count == 1\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_single_doc_one_over_batch_limit() -> None:\n    \"\"\"101 chunks for one doc: first 100 flushed when the 101st arrives, then\n    the 101st is flushed at the end.\"\"\"\n    index, mock_bulk = _make_index()\n    doc_id = \"doc_1\"\n    num_chunks = 101\n    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]\n    metadata = _make_metadata(doc_id, num_chunks)\n\n    with patch.object(index, \"delete\", return_value=0):\n        index.index(chunks, metadata)\n\n    assert mock_bulk.call_count == 2\n    batch_sizes = [len(call.kwargs[\"documents\"]) for call in mock_bulk.call_args_list]\n    assert batch_sizes == [100, 1]\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_multiple_docs_each_under_limit_flush_per_doc() -> None:\n    \"\"\"Multiple documents each under the batch limit should flush once per document.\"\"\"\n    index, mock_bulk = _make_index()\n    chunks = []\n    for doc_idx in range(3):\n        doc_id = f\"doc_{doc_idx}\"\n        for chunk_idx in range(50):\n            chunks.append(_make_chunk(doc_id, chunk_idx))\n\n    metadata = IndexingMetadata(\n        doc_id_to_chunk_cnt_diff={\n            f\"doc_{i}\": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)\n            for i in range(3)\n        },\n    )\n\n    with patch.object(index, \"delete\", return_value=0):\n        index.index(chunks, metadata)\n\n    # 3 documents = 3 flushes (one per doc boundary + final)\n    assert mock_bulk.call_count == 3\n\n\n@patch(\n    \"onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH\",\n    100,\n)\ndef test_delete_called_once_per_document() -> None:\n    \"\"\"Even with multiple flushes for a single document, delete should only be\n    called once per document.\"\"\"\n    index, _mock_bulk = _make_index()\n    doc_id = \"doc_1\"\n    num_chunks = 250\n    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]\n    metadata = _make_metadata(doc_id, num_chunks)\n\n    with patch.object(index, \"delete\", return_value=0) as mock_delete:\n        index.index(chunks, metadata)\n\n    mock_delete.assert_called_once_with(doc_id, None)\n"
  },
  {
    "path": "backend/tests/unit/onyx/document_index/test_disabled_document_index.py",
    "content": "\"\"\"Tests for DisabledDocumentIndex — verifies all methods raise RuntimeError.\n\nThis is the safety net for the DISABLE_VECTOR_DB feature. Every method on\nDisabledDocumentIndex must raise RuntimeError with the standard error message\nso that any accidental vector-DB call is caught immediately.\n\"\"\"\n\nimport re\n\nimport pytest\n\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import QueryExpansionType\nfrom onyx.db.enums import EmbeddingPrecision\nfrom onyx.document_index.disabled import DisabledDocumentIndex\nfrom onyx.document_index.disabled import VECTOR_DB_DISABLED_ERROR\n\nESCAPED_ERROR = re.escape(VECTOR_DB_DISABLED_ERROR)\n\n\n@pytest.fixture\ndef disabled_index() -> DisabledDocumentIndex:\n    return DisabledDocumentIndex(\n        index_name=\"test_index\",\n        secondary_index_name=\"test_secondary\",\n    )\n\n\ndef _stub_filters() -> IndexFilters:\n    return IndexFilters(access_control_list=None)\n\n\n# ------------------------------------------------------------------\n# Verifiable\n# ------------------------------------------------------------------\n\n\ndef test_ensure_indices_exist_no_raises(\n    disabled_index: DisabledDocumentIndex,\n) -> None:\n    disabled_index.ensure_indices_exist(\n        primary_embedding_dim=768,\n        primary_embedding_precision=EmbeddingPrecision.FLOAT,\n        secondary_index_embedding_dim=None,\n        secondary_index_embedding_precision=None,\n    )\n\n\ndef test_register_multitenant_indices_raises() -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        DisabledDocumentIndex.register_multitenant_indices(\n            indices=[\"idx\"],\n            embedding_dims=[768],\n            embedding_precisions=[EmbeddingPrecision.FLOAT],\n        )\n\n\n# ------------------------------------------------------------------\n# Indexable\n# ------------------------------------------------------------------\n\n\ndef test_index_raises(disabled_index: DisabledDocumentIndex) -> None:\n    from dataclasses import dataclass, field\n\n    # We only need a stub — the method raises before inspecting arguments.\n    @dataclass\n    class _StubBatchParams:\n        doc_id_to_previous_chunk_cnt: dict[str, int] = field(default_factory=dict)\n        doc_id_to_new_chunk_cnt: dict[str, int] = field(default_factory=dict)\n        tenant_id: str = \"test\"\n        large_chunks_enabled: bool = False\n\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.index(\n            chunks=[],\n            index_batch_params=_StubBatchParams(),  # type: ignore\n        )\n\n\n# ------------------------------------------------------------------\n# Deletable\n# ------------------------------------------------------------------\n\n\ndef test_delete_single_raises(disabled_index: DisabledDocumentIndex) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.delete_single(\n            doc_id=\"doc-1\",\n            tenant_id=\"test\",\n            chunk_count=None,\n        )\n\n\n# ------------------------------------------------------------------\n# Updatable\n# ------------------------------------------------------------------\n\n\ndef test_update_single_raises(disabled_index: DisabledDocumentIndex) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.update_single(\n            doc_id=\"doc-1\",\n            tenant_id=\"test\",\n            chunk_count=None,\n            fields=None,\n            user_fields=None,\n        )\n\n\n# ------------------------------------------------------------------\n# IdRetrievalCapable\n# ------------------------------------------------------------------\n\n\ndef test_id_based_retrieval_raises(\n    disabled_index: DisabledDocumentIndex,\n) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.id_based_retrieval(\n            chunk_requests=[],\n            filters=_stub_filters(),\n        )\n\n\n# ------------------------------------------------------------------\n# HybridCapable\n# ------------------------------------------------------------------\n\n\ndef test_hybrid_retrieval_raises(\n    disabled_index: DisabledDocumentIndex,\n) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.hybrid_retrieval(\n            query=\"test\",\n            query_embedding=[0.0] * 768,\n            final_keywords=None,\n            filters=_stub_filters(),\n            hybrid_alpha=0.5,\n            time_decay_multiplier=1.0,\n            num_to_retrieve=10,\n            ranking_profile_type=QueryExpansionType.KEYWORD,\n        )\n\n\n# ------------------------------------------------------------------\n# AdminCapable\n# ------------------------------------------------------------------\n\n\ndef test_admin_retrieval_raises(\n    disabled_index: DisabledDocumentIndex,\n) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.admin_retrieval(\n            query=\"test\",\n            query_embedding=[0.0] * 768,\n            filters=_stub_filters(),\n        )\n\n\n# ------------------------------------------------------------------\n# RandomCapable\n# ------------------------------------------------------------------\n\n\ndef test_random_retrieval_raises(\n    disabled_index: DisabledDocumentIndex,\n) -> None:\n    with pytest.raises(RuntimeError, match=ESCAPED_ERROR):\n        disabled_index.random_retrieval(\n            filters=_stub_filters(),\n        )\n\n\n# ------------------------------------------------------------------\n# Introspection — index_name and secondary_index_name should still work\n# ------------------------------------------------------------------\n\n\ndef test_index_names_accessible(disabled_index: DisabledDocumentIndex) -> None:\n    assert disabled_index.index_name == \"test_index\"\n    assert disabled_index.secondary_index_name == \"test_secondary\"\n\n\ndef test_default_names() -> None:\n    index = DisabledDocumentIndex()\n    assert index.index_name == \"disabled\"\n    assert index.secondary_index_name is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/document_index/vespa/shared_utils/test_utils.py",
    "content": "from onyx.utils.text_processing import remove_invalid_unicode_chars\n\n\ndef test_remove_invalid_unicode_chars() -> None:\n    \"\"\"Test that invalid Unicode characters are properly removed.\"\"\"\n    # Test removal of illegal XML character 0xFDDB\n    text_with_illegal_char = \"Valid text \\ufddb more text\"\n    sanitized = remove_invalid_unicode_chars(text_with_illegal_char)\n    assert \"\\ufddb\" not in sanitized\n    assert sanitized == \"Valid text  more text\"\n\n    # Test that valid characters are preserved\n    valid_text = \"Hello, world! 你好世界\"\n    assert remove_invalid_unicode_chars(valid_text) == valid_text\n\n    # Test multiple invalid characters including 0xFDDB\n    text_with_multiple_illegal = \"\\x00Hello\\ufddb World\\ufffe!\"\n    sanitized = remove_invalid_unicode_chars(text_with_multiple_illegal)\n    assert all(c not in sanitized for c in [\"\\x00\", \"\\ufddb\", \"\\ufffe\"])\n    assert sanitized == \"Hello World!\"\n\n\ndef test_remove_surrogate_characters() -> None:\n    \"\"\"Test removal of unpaired UTF-16 surrogates that cause 'surrogates not allowed' errors.\n\n    This is the specific error seen when indexing Drive documents with Cohere:\n    'utf-8' codec can't encode character '\\\\udc00' in position X: surrogates not allowed\n    \"\"\"\n    # Test low surrogate (the exact error case from Drive indexing with Cohere)\n    text_with_low_surrogate = \"Text before \\udc00 text after\"\n    sanitized = remove_invalid_unicode_chars(text_with_low_surrogate)\n    assert \"\\udc00\" not in sanitized\n    assert sanitized == \"Text before  text after\"\n\n    # Test high surrogate\n    text_with_high_surrogate = \"Start \\ud800 end\"\n    sanitized = remove_invalid_unicode_chars(text_with_high_surrogate)\n    assert \"\\ud800\" not in sanitized\n    assert sanitized == \"Start  end\"\n\n    # Test that the sanitized text can be encoded to UTF-8 without error\n    problematic_text = \"Document content \\udc00 with \\ud800 surrogates \\udfff here\"\n    sanitized = remove_invalid_unicode_chars(problematic_text)\n    # This should not raise an exception\n    sanitized.encode(\"utf-8\")\n    assert sanitized == \"Document content  with  surrogates  here\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py",
    "content": "\"\"\"Unit tests for VespaDocumentIndex.index().\n\nThese tests mock all external I/O (HTTP calls, thread pools) and verify\nthe streaming logic, ID cleaning/mapping, and DocumentInsertionRecord\nconstruction.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.document_index.interfaces import EnrichedDocumentIndexingInfo\nfrom onyx.document_index.interfaces_new import IndexingMetadata\nfrom onyx.document_index.interfaces_new import TenantState\nfrom onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexChunk\n\n\ndef _make_chunk(\n    doc_id: str,\n    chunk_id: int = 0,\n    content: str = \"test content\",\n) -> DocMetadataAwareIndexChunk:\n    doc = Document(\n        id=doc_id,\n        semantic_identifier=\"test_doc\",\n        sections=[TextSection(text=content, link=None)],\n        source=DocumentSource.NOT_APPLICABLE,\n        metadata={},\n    )\n    index_chunk = IndexChunk(\n        chunk_id=chunk_id,\n        blurb=content[:50],\n        content=content,\n        source_links=None,\n        image_file_id=None,\n        section_continuation=False,\n        source_document=doc,\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        contextual_rag_reserved_tokens=0,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        embeddings=ChunkEmbedding(\n            full_embedding=[0.1] * 10,\n            mini_chunk_embeddings=[],\n        ),\n        title_embedding=None,\n    )\n    access = DocumentAccess.build(\n        user_emails=[],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=True,\n    )\n    return DocMetadataAwareIndexChunk.from_index_chunk(\n        index_chunk=index_chunk,\n        access=access,\n        document_sets=set(),\n        user_project=[],\n        personas=[],\n        boost=0,\n        aggregated_chunk_boost_factor=1.0,\n        tenant_id=\"test_tenant\",\n    )\n\n\ndef _make_indexing_metadata(\n    doc_ids: list[str],\n    old_counts: list[int],\n    new_counts: list[int],\n) -> IndexingMetadata:\n    return IndexingMetadata(\n        doc_id_to_chunk_cnt_diff={\n            doc_id: IndexingMetadata.ChunkCounts(\n                old_chunk_cnt=old,\n                new_chunk_cnt=new,\n            )\n            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)\n        }\n    )\n\n\ndef _stub_enrich(\n    doc_id: str,\n    old_chunk_cnt: int,\n) -> EnrichedDocumentIndexingInfo:\n    \"\"\"Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'\n    when old_chunk_cnt == 0, or 'has existing chunks' otherwise.\"\"\"\n    return EnrichedDocumentIndexingInfo(\n        doc_id=doc_id,\n        chunk_start_index=0,\n        old_version=False,\n        chunk_end_index=old_chunk_cnt,\n    )\n\n\n@patch(\"onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks\")\n@patch(\"onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks\")\n@patch(\n    \"onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids\",\n    return_value=[],\n)\n@patch(\"onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info\")\n@patch(\n    \"onyx.document_index.vespa.vespa_document_index.BATCH_SIZE\",\n    3,\n)\ndef test_index_respects_batch_size(\n    mock_enrich: MagicMock,\n    mock_get_chunk_ids: MagicMock,  # noqa: ARG001\n    mock_delete: MagicMock,  # noqa: ARG001\n    mock_batch_index: MagicMock,\n) -> None:\n    \"\"\"When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called\n    multiple times with correctly sized batches.\"\"\"\n    mock_enrich.return_value = _stub_enrich(\"doc1\", old_chunk_cnt=0)\n\n    index = VespaDocumentIndex(\n        index_name=\"test_index\",\n        tenant_state=TenantState(tenant_id=\"test_tenant\", multitenant=False),\n        large_chunks_enabled=False,\n        httpx_client=MagicMock(),\n    )\n\n    chunks = [_make_chunk(\"doc1\", chunk_id=i) for i in range(7)]\n    metadata = _make_indexing_metadata([\"doc1\"], old_counts=[0], new_counts=[7])\n\n    results = index.index(chunks=chunks, indexing_metadata=metadata)\n\n    assert len(results) == 1\n\n    # With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1\n    assert mock_batch_index.call_count == 3\n    batch_sizes = [len(c.kwargs[\"chunks\"]) for c in mock_batch_index.call_args_list]\n    assert batch_sizes == [3, 3, 1]\n\n    # Verify all chunks are accounted for and in order\n    all_indexed = [\n        chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs[\"chunks\"]\n    ]\n    assert len(all_indexed) == 7\n    assert [c.chunk_id for c in all_indexed] == list(range(7))\n"
  },
  {
    "path": "backend/tests/unit/onyx/error_handling/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/error_handling/test_exceptions.py",
    "content": "\"\"\"Tests for OnyxError and the global exception handler.\"\"\"\n\nimport pytest\nfrom fastapi import FastAPI\nfrom fastapi.testclient import TestClient\n\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.error_handling.exceptions import register_onyx_exception_handlers\n\n\nclass TestOnyxError:\n    \"\"\"Unit tests for OnyxError construction and properties.\"\"\"\n\n    def test_basic_construction(self) -> None:\n        err = OnyxError(OnyxErrorCode.NOT_FOUND, \"Session not found\")\n        assert err.error_code is OnyxErrorCode.NOT_FOUND\n        assert err.detail == \"Session not found\"\n        assert err.status_code == 404\n\n    def test_message_defaults_to_code(self) -> None:\n        err = OnyxError(OnyxErrorCode.UNAUTHENTICATED)\n        assert err.detail == \"UNAUTHENTICATED\"\n        assert str(err) == \"UNAUTHENTICATED\"\n\n    def test_status_code_override(self) -> None:\n        err = OnyxError(\n            OnyxErrorCode.BAD_GATEWAY,\n            \"upstream failed\",\n            status_code_override=503,\n        )\n        assert err.status_code == 503\n        # error_code still reports its own default\n        assert err.error_code.status_code == 502\n\n    def test_no_override_uses_error_code_status(self) -> None:\n        err = OnyxError(OnyxErrorCode.RATE_LIMITED, \"slow down\")\n        assert err.status_code == 429\n\n    def test_is_exception(self) -> None:\n        err = OnyxError(OnyxErrorCode.INTERNAL_ERROR)\n        assert isinstance(err, Exception)\n\n\nclass TestExceptionHandler:\n    \"\"\"Integration test: OnyxError → JSON response via FastAPI TestClient.\"\"\"\n\n    @pytest.fixture()\n    def client(self) -> TestClient:\n        app = FastAPI()\n        register_onyx_exception_handlers(app)\n\n        @app.get(\"/boom\")\n        def _boom() -> None:\n            raise OnyxError(OnyxErrorCode.NOT_FOUND, \"Thing not found\")\n\n        @app.get(\"/boom-override\")\n        def _boom_override() -> None:\n            raise OnyxError(\n                OnyxErrorCode.BAD_GATEWAY,\n                \"upstream 503\",\n                status_code_override=503,\n            )\n\n        @app.get(\"/boom-default-msg\")\n        def _boom_default() -> None:\n            raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)\n\n        return TestClient(app, raise_server_exceptions=False)\n\n    def test_returns_correct_status_and_body(self, client: TestClient) -> None:\n        resp = client.get(\"/boom\")\n        assert resp.status_code == 404\n        body = resp.json()\n        assert body[\"error_code\"] == \"NOT_FOUND\"\n        assert body[\"detail\"] == \"Thing not found\"\n\n    def test_status_code_override_in_response(self, client: TestClient) -> None:\n        resp = client.get(\"/boom-override\")\n        assert resp.status_code == 503\n        body = resp.json()\n        assert body[\"error_code\"] == \"BAD_GATEWAY\"\n        assert body[\"detail\"] == \"upstream 503\"\n\n    def test_default_message(self, client: TestClient) -> None:\n        resp = client.get(\"/boom-default-msg\")\n        assert resp.status_code == 401\n        body = resp.json()\n        assert body[\"error_code\"] == \"UNAUTHENTICATED\"\n        assert body[\"detail\"] == \"UNAUTHENTICATED\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/federated_connectors/test_federated_connector_factory.py",
    "content": "\"\"\"\nUnit tests for federated connector lazy loading factory to validate:\n1. All federated connector mappings are correct\n2. Module paths and class names are valid\n3. Error handling works properly\n4. Caching functions correctly\n\"\"\"\n\nimport importlib\nfrom unittest.mock import MagicMock\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import FederatedConnectorSource\nfrom onyx.federated_connectors.factory import _federated_connector_cache\nfrom onyx.federated_connectors.factory import _load_federated_connector_class\nfrom onyx.federated_connectors.factory import FederatedConnectorMissingException\nfrom onyx.federated_connectors.factory import get_federated_connector_cls\nfrom onyx.federated_connectors.interfaces import FederatedConnector\nfrom onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP\nfrom onyx.federated_connectors.registry import FederatedConnectorMapping\n\n\nclass TestFederatedConnectorMappingValidation:\n    \"\"\"Test that all federated connector mappings are valid.\"\"\"\n\n    def test_all_federated_connector_mappings_exist(self) -> None:\n        \"\"\"Test that all mapped modules and classes actually exist.\"\"\"\n        errors = []\n\n        for source, mapping in FEDERATED_CONNECTOR_CLASS_MAP.items():\n            try:\n                # Try to import the module\n                module = importlib.import_module(mapping.module_path)\n\n                # Try to get the class\n                connector_class = getattr(module, mapping.class_name)\n\n                # Verify it's a subclass of FederatedConnector\n                if not issubclass(connector_class, FederatedConnector):\n                    errors.append(\n                        f\"{source.value}: {mapping.class_name} is not a FederatedConnector subclass\"\n                    )\n\n            except ImportError as e:\n                errors.append(\n                    f\"{source.value}: Failed to import {mapping.module_path} - {e}\"\n                )\n            except AttributeError as e:\n                errors.append(\n                    f\"{source.value}: Class {mapping.class_name} not found in {mapping.module_path} - {e}\"\n                )\n\n        if errors:\n            pytest.fail(\n                \"Federated connector mapping validation failed:\\n\" + \"\\n\".join(errors)\n            )\n\n    def test_no_duplicate_mappings(self) -> None:\n        \"\"\"Test that each FederatedConnectorSource only appears once in the mapping.\"\"\"\n        sources = list(FEDERATED_CONNECTOR_CLASS_MAP.keys())\n        unique_sources = set(sources)\n\n        assert len(sources) == len(\n            unique_sources\n        ), \"Duplicate FederatedConnectorSource entries found\"\n\n    def test_mapping_format_consistency(self) -> None:\n        \"\"\"Test that all mappings follow the expected format.\"\"\"\n        for source, mapping in FEDERATED_CONNECTOR_CLASS_MAP.items():\n            assert isinstance(\n                mapping, FederatedConnectorMapping\n            ), f\"{source.value} mapping is not a FederatedConnectorMapping\"\n\n            assert isinstance(\n                mapping.module_path, str\n            ), f\"{source.value} module_path is not a string\"\n            assert isinstance(\n                mapping.class_name, str\n            ), f\"{source.value} class_name is not a string\"\n            assert mapping.module_path.startswith(\n                \"onyx.federated_connectors.\"\n            ), f\"{source.value} module_path doesn't start with onyx.federated_connectors.\"\n            assert mapping.class_name.endswith(\n                \"FederatedConnector\"\n            ), f\"{source.value} class_name doesn't end with FederatedConnector\"\n\n\nclass TestFederatedConnectorClassLoading:\n    \"\"\"Test the lazy loading mechanism.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Clear cache before each test.\"\"\"\n        _federated_connector_cache.clear()\n\n    def test_load_federated_connector_class_success(self) -> None:\n        \"\"\"Test successful federated connector class loading.\"\"\"\n        connector_class = _load_federated_connector_class(\n            FederatedConnectorSource.FEDERATED_SLACK\n        )\n\n        assert connector_class is not None\n        assert issubclass(connector_class, FederatedConnector)\n        assert connector_class.__name__ == \"SlackFederatedConnector\"\n\n    def test_load_federated_connector_class_caching(self) -> None:\n        \"\"\"Test that federated connector classes are cached after first load.\"\"\"\n        assert len(_federated_connector_cache) == 0\n\n        # Load connector first time\n        connector_class1 = _load_federated_connector_class(\n            FederatedConnectorSource.FEDERATED_SLACK\n        )\n        assert len(_federated_connector_cache) == 1\n        assert FederatedConnectorSource.FEDERATED_SLACK in _federated_connector_cache\n\n        # Load same connector second time - should use cache\n        connector_class2 = _load_federated_connector_class(\n            FederatedConnectorSource.FEDERATED_SLACK\n        )\n        assert connector_class1 is connector_class2  # Same object reference\n        assert len(_federated_connector_cache) == 1  # Cache size unchanged\n\n    @patch(\"importlib.import_module\")\n    def test_load_federated_connector_class_import_error(\n        self, mock_import: Mock\n    ) -> None:\n        \"\"\"Test handling of import errors.\"\"\"\n        mock_import.side_effect = ImportError(\"Module not found\")\n\n        with pytest.raises(FederatedConnectorMissingException) as exc_info:\n            _load_federated_connector_class(FederatedConnectorSource.FEDERATED_SLACK)\n\n        assert (\n            \"Failed to import SlackFederatedConnector from onyx.federated_connectors.slack.federated_connector\"\n            in str(exc_info.value)\n        )\n\n    @patch(\"importlib.import_module\")\n    def test_load_federated_connector_class_attribute_error(\n        self, mock_import: Mock\n    ) -> None:\n        \"\"\"Test handling of missing class in module.\"\"\"\n\n        # Create a custom mock that raises AttributeError for the specific class\n        class MockModule:\n            def __getattr__(self, name: str) -> MagicMock:\n                if name == \"SlackFederatedConnector\":\n                    raise AttributeError(\"Class not found\")\n                return MagicMock()\n\n        mock_import.return_value = MockModule()\n\n        with pytest.raises(FederatedConnectorMissingException) as exc_info:\n            _load_federated_connector_class(FederatedConnectorSource.FEDERATED_SLACK)\n\n        assert (\n            \"Failed to import SlackFederatedConnector from onyx.federated_connectors.slack.federated_connector\"\n            in str(exc_info.value)\n        )\n\n\nclass TestGetFederatedConnectorCls:\n    \"\"\"Test the get_federated_connector_cls function.\"\"\"\n\n    def setup_method(self) -> None:\n        \"\"\"Clear cache before each test.\"\"\"\n        _federated_connector_cache.clear()\n\n    def test_get_federated_connector_cls_basic(self) -> None:\n        \"\"\"Test basic federated connector class retrieval.\"\"\"\n        connector_class = get_federated_connector_cls(\n            FederatedConnectorSource.FEDERATED_SLACK\n        )\n\n        assert connector_class is not None\n        assert issubclass(connector_class, FederatedConnector)\n        assert connector_class.__name__ == \"SlackFederatedConnector\"\n\n\nclass TestFederatedConnectorMappingIntegrity:\n    \"\"\"Test integrity of the federated connector mapping data.\"\"\"\n\n    def test_all_federated_connector_sources_mapped(self) -> None:\n        \"\"\"Test that all FederatedConnectorSource values have mappings.\"\"\"\n        # Get all FederatedConnectorSource enum values\n        all_sources = set(FederatedConnectorSource)\n        mapped_sources = set(FEDERATED_CONNECTOR_CLASS_MAP.keys())\n\n        unmapped_sources = all_sources - mapped_sources\n\n        if unmapped_sources:\n            pytest.fail(\n                f\"FederatedConnectorSource values without connector mappings: {[s.value for s in unmapped_sources]}\"\n            )\n"
  },
  {
    "path": "backend/tests/unit/onyx/federated_connectors/test_oauth_utils.py",
    "content": "\"\"\"Unit tests for federated OAuth state generation and verification.\n\nUses unittest.mock to patch get_cache_backend so no external services\nare needed.  Verifies the generate -> verify round-trip, one-time-use\nsemantics, TTL propagation, and error handling.\n\"\"\"\n\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.cache.interface import CacheBackend\nfrom onyx.cache.interface import CacheLock\nfrom onyx.federated_connectors.oauth_utils import generate_oauth_state\nfrom onyx.federated_connectors.oauth_utils import OAUTH_STATE_TTL\nfrom onyx.federated_connectors.oauth_utils import OAuthSession\nfrom onyx.federated_connectors.oauth_utils import verify_oauth_state\n\n\nclass _MemoryCacheBackend(CacheBackend):\n    \"\"\"Minimal in-memory CacheBackend for unit tests.\"\"\"\n\n    def __init__(self) -> None:\n        self._store: dict[str, bytes] = {}\n        self.set_calls: list[dict[str, object]] = []\n\n    def get(self, key: str) -> bytes | None:\n        return self._store.get(key)\n\n    def set(\n        self,\n        key: str,\n        value: str | bytes | int | float,\n        ex: int | None = None,\n    ) -> None:\n        self.set_calls.append({\"key\": key, \"ex\": ex})\n        if isinstance(value, bytes):\n            self._store[key] = value\n        else:\n            self._store[key] = str(value).encode()\n\n    def delete(self, key: str) -> None:\n        self._store.pop(key, None)\n\n    def exists(self, key: str) -> bool:\n        return key in self._store\n\n    def expire(self, key: str, seconds: int) -> None:\n        pass\n\n    def ttl(self, key: str) -> int:\n        return -2 if key not in self._store else -1\n\n    def lock(self, name: str, timeout: float | None = None) -> CacheLock:\n        raise NotImplementedError\n\n    def rpush(self, key: str, value: str | bytes) -> None:\n        raise NotImplementedError\n\n    def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:\n        raise NotImplementedError\n\n\ndef _patched(cache: _MemoryCacheBackend):  # type: ignore[no-untyped-def]\n    return patch(\n        \"onyx.federated_connectors.oauth_utils.get_cache_backend\",\n        return_value=cache,\n    )\n\n\nclass TestGenerateAndVerifyRoundTrip:\n    def test_round_trip_basic(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            state = generate_oauth_state(\n                federated_connector_id=42,\n                user_id=\"user-abc\",\n            )\n            session = verify_oauth_state(state)\n\n        assert session.federated_connector_id == 42\n        assert session.user_id == \"user-abc\"\n        assert session.redirect_uri is None\n        assert session.additional_data == {}\n\n    def test_round_trip_with_all_fields(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            state = generate_oauth_state(\n                federated_connector_id=7,\n                user_id=\"user-xyz\",\n                redirect_uri=\"https://example.com/callback\",\n                additional_data={\"scope\": \"read\"},\n            )\n            session = verify_oauth_state(state)\n\n        assert session.federated_connector_id == 7\n        assert session.user_id == \"user-xyz\"\n        assert session.redirect_uri == \"https://example.com/callback\"\n        assert session.additional_data == {\"scope\": \"read\"}\n\n\nclass TestOneTimeUse:\n    def test_verify_deletes_state(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            state = generate_oauth_state(federated_connector_id=1, user_id=\"u\")\n            verify_oauth_state(state)\n\n            with pytest.raises(ValueError, match=\"OAuth state not found\"):\n                verify_oauth_state(state)\n\n\nclass TestTTLPropagation:\n    def test_default_ttl(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            generate_oauth_state(federated_connector_id=1, user_id=\"u\")\n\n        assert len(cache.set_calls) == 1\n        assert cache.set_calls[0][\"ex\"] == OAUTH_STATE_TTL\n\n    def test_custom_ttl(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            generate_oauth_state(federated_connector_id=1, user_id=\"u\", ttl=600)\n\n        assert cache.set_calls[0][\"ex\"] == 600\n\n\nclass TestVerifyInvalidState:\n    def test_missing_state_raises(self) -> None:\n        cache = _MemoryCacheBackend()\n        with _patched(cache):\n            state = generate_oauth_state(federated_connector_id=1, user_id=\"u\")\n            # Manually clear the cache to simulate expiration\n            cache._store.clear()\n\n            with pytest.raises(ValueError, match=\"OAuth state not found\"):\n                verify_oauth_state(state)\n\n\nclass TestOAuthSessionSerialization:\n    def test_to_dict_from_dict_round_trip(self) -> None:\n        session = OAuthSession(\n            federated_connector_id=5,\n            user_id=\"u-123\",\n            redirect_uri=\"https://redir.example.com\",\n            additional_data={\"key\": \"val\"},\n        )\n        d = session.to_dict()\n        restored = OAuthSession.from_dict(d)\n\n        assert restored.federated_connector_id == 5\n        assert restored.user_id == \"u-123\"\n        assert restored.redirect_uri == \"https://redir.example.com\"\n        assert restored.additional_data == {\"key\": \"val\"}\n\n    def test_from_dict_defaults(self) -> None:\n        minimal = {\"federated_connector_id\": 1, \"user_id\": \"u\"}\n        session = OAuthSession.from_dict(minimal)\n        assert session.redirect_uri is None\n        assert session.additional_data == {}\n"
  },
  {
    "path": "backend/tests/unit/onyx/file_processing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/file_processing/test_image_summarization_errors.py",
    "content": "\"\"\"\nUnit tests for image summarization error handling.\n\nVerifies that:\n1. LLM errors produce actionable error messages (not base64 dumps)\n2. Unsupported MIME type logs include the magic bytes and size\n3. The ValueError raised on LLM failure preserves the original exception\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.file_processing.image_summarization import _summarize_image\nfrom onyx.file_processing.image_summarization import summarize_image_with_error_handling\n\n\nclass TestSummarizeImageErrorMessage:\n    \"\"\"_summarize_image must not dump base64 image data into error messages.\"\"\"\n\n    def test_error_message_contains_exception_type_not_base64(self) -> None:\n        \"\"\"The ValueError should contain the original exception info, not message payloads.\"\"\"\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = RuntimeError(\"Connection timeout\")\n\n        # A fake base64-encoded image string (should NOT appear in the error)\n        fake_encoded = \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUg...\"\n\n        with pytest.raises(ValueError, match=\"RuntimeError: Connection timeout\"):\n            _summarize_image(fake_encoded, mock_llm, query=\"test\")\n\n    def test_error_message_does_not_contain_base64(self) -> None:\n        \"\"\"Ensure base64 data is never included in the error message.\"\"\"\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = RuntimeError(\"API error\")\n\n        fake_encoded = \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA\"\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(fake_encoded, mock_llm)\n\n        error_str = str(exc_info.value)\n        assert \"base64\" not in error_str\n        assert \"iVBOR\" not in error_str\n\n    def test_original_exception_is_chained(self) -> None:\n        \"\"\"The ValueError should chain the original exception via __cause__.\"\"\"\n        mock_llm = MagicMock()\n        original = RuntimeError(\"upstream failure\")\n        mock_llm.invoke.side_effect = original\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n        assert exc_info.value.__cause__ is original\n\n\nclass TestUnsupportedMimeTypeLogging:\n    \"\"\"summarize_image_with_error_handling should log useful info for unsupported formats.\"\"\"\n\n    @patch(\n        \"onyx.file_processing.image_summarization.summarize_image_pipeline\",\n        side_effect=__import__(\n            \"onyx.file_processing.image_summarization\",\n            fromlist=[\"UnsupportedImageFormatError\"],\n        ).UnsupportedImageFormatError(\"unsupported\"),\n    )\n    def test_logs_magic_bytes_and_size(\n        self, mock_pipeline: MagicMock  # noqa: ARG002\n    ) -> None:\n        \"\"\"The info log should include magic bytes hex and image size.\"\"\"\n        mock_llm = MagicMock()\n        # TIFF magic bytes (not in the supported list)\n        image_data = b\"\\x49\\x49\\x2a\\x00\" + b\"\\x00\" * 100\n\n        with patch(\"onyx.file_processing.image_summarization.logger\") as mock_logger:\n            result = summarize_image_with_error_handling(\n                llm=mock_llm,\n                image_data=image_data,\n                context_name=\"test_image.tiff\",\n            )\n\n        assert result is None\n        mock_logger.info.assert_called_once()\n        log_args = mock_logger.info.call_args\n        # Check the format string args contain magic bytes and size\n        assert \"49492a00\" in str(log_args)\n        assert \"104\" in str(log_args)  # 4 + 100 bytes\n"
  },
  {
    "path": "backend/tests/unit/onyx/file_processing/test_image_summarization_litellm_errors.py",
    "content": "\"\"\"\nUnit tests verifying that LiteLLM error details are extracted and surfaced\nin image summarization error messages.\n\nWhen the LLM call fails, the error handler should include the status_code,\nllm_provider, and model from LiteLLM exceptions so operators can diagnose\nthe root cause (rate limit, content filter, unsupported vision, etc.)\nwithout needing to dig through LiteLLM internals.\n\"\"\"\n\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.file_processing.image_summarization import _summarize_image\n\n\ndef _make_litellm_style_error(\n    *,\n    message: str = \"API error\",\n    status_code: int | None = None,\n    llm_provider: str | None = None,\n    model: str | None = None,\n) -> RuntimeError:\n    \"\"\"Create an exception with LiteLLM-style attributes.\"\"\"\n    exc = RuntimeError(message)\n    if status_code is not None:\n        exc.status_code = status_code  # type: ignore[attr-defined]\n    if llm_provider is not None:\n        exc.llm_provider = llm_provider  # type: ignore[attr-defined]\n    if model is not None:\n        exc.model = model  # type: ignore[attr-defined]\n    return exc\n\n\nclass TestLiteLLMErrorExtraction:\n    \"\"\"Verify that LiteLLM error attributes are included in the ValueError.\"\"\"\n\n    def test_status_code_included(self) -> None:\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = _make_litellm_style_error(\n            message=\"Content filter triggered\",\n            status_code=400,\n            llm_provider=\"azure\",\n            model=\"gpt-4o\",\n        )\n\n        with pytest.raises(ValueError, match=\"status_code=400\"):\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n    def test_llm_provider_included(self) -> None:\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = _make_litellm_style_error(\n            message=\"Bad request\",\n            status_code=400,\n            llm_provider=\"azure\",\n        )\n\n        with pytest.raises(ValueError, match=\"llm_provider=azure\"):\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n    def test_model_included(self) -> None:\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = _make_litellm_style_error(\n            message=\"Bad request\",\n            model=\"gpt-4o\",\n        )\n\n        with pytest.raises(ValueError, match=\"model=gpt-4o\"):\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n    def test_all_fields_in_single_message(self) -> None:\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = _make_litellm_style_error(\n            message=\"Rate limit exceeded\",\n            status_code=429,\n            llm_provider=\"azure\",\n            model=\"gpt-4o\",\n        )\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n        msg = str(exc_info.value)\n        assert \"status_code=429\" in msg\n        assert \"llm_provider=azure\" in msg\n        assert \"model=gpt-4o\" in msg\n        assert \"Rate limit exceeded\" in msg\n\n    def test_plain_exception_without_litellm_attrs(self) -> None:\n        \"\"\"Non-LiteLLM exceptions should still produce a useful message.\"\"\"\n        mock_llm = MagicMock()\n        mock_llm.invoke.side_effect = ConnectionError(\"Connection refused\")\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n        msg = str(exc_info.value)\n        assert \"ConnectionError\" in msg\n        assert \"Connection refused\" in msg\n        # Should not contain status_code/llm_provider/model\n        assert \"status_code\" not in msg\n        assert \"llm_provider\" not in msg\n\n    def test_no_base64_in_error(self) -> None:\n        \"\"\"Error messages must not contain the full base64 image payload.\n\n        Some LiteLLM exceptions echo the request body (including base64 images)\n        in their message.  The truncation guard ensures the bulk of such a\n        payload is stripped from the re-raised ValueError.\n        \"\"\"\n        mock_llm = MagicMock()\n        # Build a long base64-like payload that exceeds the 512-char truncation\n        fake_b64_payload = \"iVBORw0KGgo\" * 100  # ~1100 chars\n        fake_b64 = f\"data:image/png;base64,{fake_b64_payload}\"\n\n        mock_llm.invoke.side_effect = RuntimeError(\n            f\"Request failed for payload: {fake_b64}\"\n        )\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(fake_b64, mock_llm)\n\n        msg = str(exc_info.value)\n        # The full payload must not appear (truncation should have kicked in)\n        assert fake_b64_payload not in msg\n        assert \"truncated\" in msg\n\n    def test_long_error_message_truncated(self) -> None:\n        \"\"\"Exception messages longer than 512 chars are truncated.\"\"\"\n        mock_llm = MagicMock()\n        long_msg = \"x\" * 1000\n        mock_llm.invoke.side_effect = RuntimeError(long_msg)\n\n        with pytest.raises(ValueError) as exc_info:\n            _summarize_image(\"data:image/png;base64,abc\", mock_llm)\n\n        msg = str(exc_info.value)\n        assert \"truncated\" in msg\n        # The full 1000-char string should not appear\n        assert long_msg not in msg\n"
  },
  {
    "path": "backend/tests/unit/onyx/file_processing/test_pdf.py",
    "content": "\"\"\"Unit tests for pypdf-dependent PDF processing functions.\n\nTests cover:\n- read_pdf_file: text extraction, metadata, encrypted PDFs, image extraction\n- pdf_to_text: convenience wrapper\n- is_pdf_protected: password protection detection\n\nFixture PDFs live in ./fixtures/ and are pre-built so the test layer has no\ndependency on pypdf internals (pypdf.generic).\n\"\"\"\n\nfrom io import BytesIO\nfrom pathlib import Path\n\nfrom onyx.file_processing.extract_file_text import pdf_to_text\nfrom onyx.file_processing.extract_file_text import read_pdf_file\nfrom onyx.file_processing.password_validation import is_pdf_protected\n\nFIXTURES = Path(__file__).parent / \"fixtures\"\n\n\ndef _load(name: str) -> BytesIO:\n    return BytesIO((FIXTURES / name).read_bytes())\n\n\n# ── read_pdf_file ────────────────────────────────────────────────────────\n\n\nclass TestReadPdfFile:\n    def test_basic_text_extraction(self) -> None:\n        text, _, images = read_pdf_file(_load(\"simple.pdf\"))\n        assert \"Hello World\" in text\n        assert images == []\n\n    def test_multi_page_text_extraction(self) -> None:\n        text, _, _ = read_pdf_file(_load(\"multipage.pdf\"))\n        assert \"Page one content\" in text\n        assert \"Page two content\" in text\n\n    def test_metadata_extraction(self) -> None:\n        _, pdf_metadata, _ = read_pdf_file(_load(\"with_metadata.pdf\"))\n        assert pdf_metadata.get(\"Title\") == \"My Title\"\n        assert pdf_metadata.get(\"Author\") == \"Jane Doe\"\n\n    def test_encrypted_pdf_with_correct_password(self) -> None:\n        text, _, _ = read_pdf_file(_load(\"encrypted.pdf\"), pdf_pass=\"pass123\")\n        assert \"Secret Content\" in text\n\n    def test_encrypted_pdf_without_password(self) -> None:\n        text, _, _ = read_pdf_file(_load(\"encrypted.pdf\"))\n        assert text == \"\"\n\n    def test_encrypted_pdf_with_wrong_password(self) -> None:\n        text, _, _ = read_pdf_file(_load(\"encrypted.pdf\"), pdf_pass=\"wrong\")\n        assert text == \"\"\n\n    def test_empty_pdf(self) -> None:\n        text, _, _ = read_pdf_file(_load(\"empty.pdf\"))\n        assert text.strip() == \"\"\n\n    def test_invalid_pdf_returns_empty(self) -> None:\n        text, _, images = read_pdf_file(BytesIO(b\"this is not a pdf\"))\n        assert text == \"\"\n        assert images == []\n\n    def test_image_extraction_disabled_by_default(self) -> None:\n        _, _, images = read_pdf_file(_load(\"with_image.pdf\"))\n        assert images == []\n\n    def test_image_extraction_collects_images(self) -> None:\n        _, _, images = read_pdf_file(_load(\"with_image.pdf\"), extract_images=True)\n        assert len(images) == 1\n        img_bytes, img_name = images[0]\n        assert len(img_bytes) > 0\n        assert img_name  # non-empty name\n\n    def test_image_callback_streams_instead_of_collecting(self) -> None:\n        \"\"\"With image_callback, images are streamed via callback and not accumulated.\"\"\"\n        collected: list[tuple[bytes, str]] = []\n\n        def callback(data: bytes, name: str) -> None:\n            collected.append((data, name))\n\n        _, _, images = read_pdf_file(\n            _load(\"with_image.pdf\"), extract_images=True, image_callback=callback\n        )\n        # Callback received the image\n        assert len(collected) == 1\n        assert len(collected[0][0]) > 0\n        # Returned list is empty when callback is used\n        assert images == []\n\n\n# ── pdf_to_text ──────────────────────────────────────────────────────────\n\n\nclass TestPdfToText:\n    def test_returns_text(self) -> None:\n        assert \"Hello World\" in pdf_to_text(_load(\"simple.pdf\"))\n\n    def test_with_password(self) -> None:\n        assert \"Secret Content\" in pdf_to_text(\n            _load(\"encrypted.pdf\"), pdf_pass=\"pass123\"\n        )\n\n    def test_encrypted_without_password_returns_empty(self) -> None:\n        assert pdf_to_text(_load(\"encrypted.pdf\")) == \"\"\n\n\n# ── is_pdf_protected ─────────────────────────────────────────────────────\n\n\nclass TestIsPdfProtected:\n    def test_unprotected_pdf(self) -> None:\n        assert is_pdf_protected(_load(\"simple.pdf\")) is False\n\n    def test_protected_pdf(self) -> None:\n        assert is_pdf_protected(_load(\"encrypted.pdf\")) is True\n\n    def test_preserves_file_position(self) -> None:\n        pdf = _load(\"simple.pdf\")\n        pdf.seek(42)\n        is_pdf_protected(pdf)\n        assert pdf.tell() == 42\n"
  },
  {
    "path": "backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py",
    "content": "import io\nfrom typing import cast\n\nimport openpyxl\nfrom openpyxl.worksheet.worksheet import Worksheet\n\nfrom onyx.file_processing.extract_file_text import xlsx_to_text\n\n\ndef _make_xlsx(sheets: dict[str, list[list[str]]]) -> io.BytesIO:\n    \"\"\"Create an in-memory xlsx file from a dict of sheet_name -> matrix of strings.\"\"\"\n    wb = openpyxl.Workbook()\n    if wb.active is not None:\n        wb.remove(cast(Worksheet, wb.active))\n    for sheet_name, rows in sheets.items():\n        ws = wb.create_sheet(title=sheet_name)\n        for row in rows:\n            ws.append(row)\n    buf = io.BytesIO()\n    wb.save(buf)\n    buf.seek(0)\n    return buf\n\n\nclass TestXlsxToText:\n    def test_single_sheet_basic(self) -> None:\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"Name\", \"Age\"],\n                    [\"Alice\", \"30\"],\n                    [\"Bob\", \"25\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        assert len(lines) == 3\n        assert \"Name\" in lines[0]\n        assert \"Age\" in lines[0]\n        assert \"Alice\" in lines[1]\n        assert \"30\" in lines[1]\n        assert \"Bob\" in lines[2]\n\n    def test_multiple_sheets_separated(self) -> None:\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [[\"a\", \"b\"]],\n                \"Sheet2\": [[\"c\", \"d\"]],\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        # TEXT_SECTION_SEPARATOR is \"\\n\\n\"\n        assert \"\\n\\n\" in result\n        parts = result.split(\"\\n\\n\")\n        assert any(\"a\" in p for p in parts)\n        assert any(\"c\" in p for p in parts)\n\n    def test_empty_cells(self) -> None:\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"a\", \"\", \"b\"],\n                    [\"\", \"c\", \"\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        assert len(lines) == 2\n\n    def test_commas_in_cells_are_quoted(self) -> None:\n        \"\"\"Cells containing commas should be quoted in CSV output.\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"hello, world\", \"normal\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        assert '\"hello, world\"' in result\n\n    def test_empty_workbook(self) -> None:\n        xlsx = _make_xlsx({\"Sheet1\": []})\n        result = xlsx_to_text(xlsx)\n        assert result.strip() == \"\"\n\n    def test_long_empty_row_run_capped(self) -> None:\n        \"\"\"Runs of >2 empty rows should be capped to 2.\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"header\"],\n                    [\"\"],\n                    [\"\"],\n                    [\"\"],\n                    [\"\"],\n                    [\"data\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        # 4 empty rows capped to 2, so: header + 2 empty + data = 4 lines\n        assert len(lines) == 4\n        assert \"header\" in lines[0]\n        assert \"data\" in lines[-1]\n\n    def test_long_empty_col_run_capped(self) -> None:\n        \"\"\"Runs of >2 empty columns should be capped to 2.\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"a\", \"\", \"\", \"\", \"b\"],\n                    [\"c\", \"\", \"\", \"\", \"d\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        assert len(lines) == 2\n        # Each row should have 4 fields (a + 2 empty + b), not 5\n        # csv format: a,,,b (3 commas = 4 fields)\n        first_line = lines[0].strip()\n        # Count commas to verify column reduction\n        assert first_line.count(\",\") == 3\n\n    def test_short_empty_runs_kept(self) -> None:\n        \"\"\"Runs of <=2 empty rows/cols should be preserved.\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"a\", \"b\"],\n                    [\"\", \"\"],\n                    [\"\", \"\"],\n                    [\"c\", \"d\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        # All 4 rows preserved (2 empty rows <= threshold)\n        assert len(lines) == 4\n\n    def test_bad_zip_file_returns_empty(self) -> None:\n        bad_file = io.BytesIO(b\"not a zip file\")\n        result = xlsx_to_text(bad_file, file_name=\"test.xlsx\")\n        assert result == \"\"\n\n    def test_bad_zip_tilde_file_returns_empty(self) -> None:\n        bad_file = io.BytesIO(b\"not a zip file\")\n        result = xlsx_to_text(bad_file, file_name=\"~$temp.xlsx\")\n        assert result == \"\"\n\n    def test_large_sparse_sheet(self) -> None:\n        \"\"\"A sheet with data, a big empty gap, and more data — gap is capped to 2.\"\"\"\n        rows: list[list[str]] = [[\"row1_data\"]]\n        rows.extend([[\"\"] for _ in range(10)])\n        rows.append([\"row2_data\"])\n        xlsx = _make_xlsx({\"Sheet1\": rows})\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        # 10 empty rows capped to 2: row1_data + 2 empty + row2_data = 4\n        assert len(lines) == 4\n        assert \"row1_data\" in lines[0]\n        assert \"row2_data\" in lines[-1]\n\n    def test_quotes_in_cells(self) -> None:\n        \"\"\"Cells containing quotes should be properly escaped.\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    ['say \"hello\"', \"normal\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        # csv.writer escapes quotes by doubling them\n        assert '\"\"hello\"\"' in result\n\n    def test_each_row_is_separate_line(self) -> None:\n        \"\"\"Each row should produce its own line (regression for writerow vs writerows).\"\"\"\n        xlsx = _make_xlsx(\n            {\n                \"Sheet1\": [\n                    [\"r1c1\", \"r1c2\"],\n                    [\"r2c1\", \"r2c2\"],\n                    [\"r3c1\", \"r3c2\"],\n                ]\n            }\n        )\n        result = xlsx_to_text(xlsx)\n        lines = [line for line in result.strip().split(\"\\n\") if line.strip()]\n        assert len(lines) == 3\n        assert \"r1c1\" in lines[0] and \"r1c2\" in lines[0]\n        assert \"r2c1\" in lines[1] and \"r2c2\" in lines[1]\n        assert \"r3c1\" in lines[2] and \"r3c2\" in lines[2]\n"
  },
  {
    "path": "backend/tests/unit/onyx/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/hooks/test_api_dependencies.py",
    "content": "\"\"\"Unit tests for the hooks feature gate.\"\"\"\n\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.error_handling.error_codes import OnyxErrorCode\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.hooks.api_dependencies import require_hook_enabled\n\n\nclass TestRequireHookEnabled:\n    def test_raises_when_multi_tenant(self) -> None:\n        with patch(\"onyx.hooks.api_dependencies.MULTI_TENANT\", True):\n            with pytest.raises(OnyxError) as exc_info:\n                require_hook_enabled()\n        assert exc_info.value.error_code is OnyxErrorCode.SINGLE_TENANT_ONLY\n        assert exc_info.value.status_code == 403\n        assert \"multi-tenant\" in exc_info.value.detail\n\n    def test_passes_when_single_tenant(self) -> None:\n        with patch(\"onyx.hooks.api_dependencies.MULTI_TENANT\", False):\n            require_hook_enabled()  # must not raise\n"
  },
  {
    "path": "backend/tests/unit/onyx/hooks/test_base_spec.py",
    "content": "import pytest\nfrom pydantic import BaseModel\n\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks.points.base import HookPointSpec\n\n\ndef test_init_subclass_raises_for_missing_attrs() -> None:\n    with pytest.raises(TypeError, match=\"must define class attributes\"):\n\n        class IncompleteSpec(HookPointSpec):\n            hook_point = HookPoint.QUERY_PROCESSING\n            # missing display_name, description, payload_model, response_model, etc.\n\n            class _Payload(BaseModel):\n                pass\n\n            payload_model = _Payload\n            response_model = _Payload\n"
  },
  {
    "path": "backend/tests/unit/onyx/hooks/test_models.py",
    "content": "import pytest\nfrom pydantic import ValidationError\n\nfrom onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks.models import HookCreateRequest\nfrom onyx.hooks.models import HookUpdateRequest\n\n\ndef test_hook_update_request_rejects_empty() -> None:\n    # No fields supplied at all\n    with pytest.raises(ValidationError, match=\"At least one field must be provided\"):\n        HookUpdateRequest()\n\n\ndef test_hook_update_request_rejects_null_name_when_only_field() -> None:\n    # Explicitly setting name=None is rejected as name cannot be cleared\n    with pytest.raises(ValidationError, match=\"name cannot be cleared\"):\n        HookUpdateRequest(name=None)\n\n\ndef test_hook_update_request_accepts_single_field() -> None:\n    req = HookUpdateRequest(name=\"new name\")\n    assert req.name == \"new name\"\n\n\ndef test_hook_update_request_accepts_partial_fields() -> None:\n    req = HookUpdateRequest(fail_strategy=HookFailStrategy.SOFT, timeout_seconds=10.0)\n    assert req.fail_strategy == HookFailStrategy.SOFT\n    assert req.timeout_seconds == 10.0\n    assert req.name is None\n\n\ndef test_hook_update_request_rejects_null_name() -> None:\n    with pytest.raises(ValidationError, match=\"name cannot be cleared\"):\n        HookUpdateRequest(name=None, fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_update_request_rejects_empty_name() -> None:\n    with pytest.raises(ValidationError, match=\"name cannot be cleared\"):\n        HookUpdateRequest(name=\"\", fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_update_request_rejects_null_endpoint_url() -> None:\n    with pytest.raises(ValidationError, match=\"endpoint_url cannot be cleared\"):\n        HookUpdateRequest(endpoint_url=None, fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_update_request_rejects_empty_endpoint_url() -> None:\n    with pytest.raises(ValidationError, match=\"endpoint_url cannot be cleared\"):\n        HookUpdateRequest(endpoint_url=\"\", fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_update_request_allows_null_api_key() -> None:\n    # api_key=null is valid — means \"clear the api key\"\n    req = HookUpdateRequest(api_key=None)\n    assert req.api_key is None\n    assert \"api_key\" in req.model_fields_set\n\n\ndef test_hook_update_request_rejects_whitespace_name() -> None:\n    with pytest.raises(ValidationError, match=\"name cannot be cleared\"):\n        HookUpdateRequest(name=\"   \", fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_update_request_rejects_whitespace_endpoint_url() -> None:\n    with pytest.raises(ValidationError, match=\"endpoint_url cannot be cleared\"):\n        HookUpdateRequest(endpoint_url=\"   \", fail_strategy=HookFailStrategy.SOFT)\n\n\ndef test_hook_create_request_rejects_whitespace_name() -> None:\n    with pytest.raises(ValidationError, match=\"whitespace-only\"):\n        HookCreateRequest(\n            name=\"   \",\n            hook_point=HookPoint.QUERY_PROCESSING,\n            endpoint_url=\"https://example.com/hook\",\n        )\n\n\ndef test_hook_create_request_rejects_whitespace_endpoint_url() -> None:\n    with pytest.raises(ValidationError, match=\"whitespace-only\"):\n        HookCreateRequest(\n            name=\"my hook\",\n            hook_point=HookPoint.QUERY_PROCESSING,\n            endpoint_url=\"   \",\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/hooks/test_query_processing_spec.py",
    "content": "from onyx.db.enums import HookFailStrategy\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks.points.query_processing import QueryProcessingSpec\n\n\ndef test_hook_point_is_query_processing() -> None:\n    assert QueryProcessingSpec().hook_point == HookPoint.QUERY_PROCESSING\n\n\ndef test_default_fail_strategy_is_hard() -> None:\n    assert QueryProcessingSpec().default_fail_strategy == HookFailStrategy.HARD\n\n\ndef test_default_timeout_seconds() -> None:\n    # User is actively waiting — 5s is the documented contract for this hook point\n    assert QueryProcessingSpec().default_timeout_seconds == 5.0\n\n\ndef test_input_schema_required_fields() -> None:\n    schema = QueryProcessingSpec().input_schema\n    assert schema[\"type\"] == \"object\"\n    required = schema[\"required\"]\n    assert \"query\" in required\n    assert \"user_email\" in required\n    assert \"chat_session_id\" in required\n\n\ndef test_input_schema_chat_session_id_is_string() -> None:\n    props = QueryProcessingSpec().input_schema[\"properties\"]\n    assert props[\"chat_session_id\"][\"type\"] == \"string\"\n\n\ndef test_input_schema_query_is_string() -> None:\n    props = QueryProcessingSpec().input_schema[\"properties\"]\n    assert props[\"query\"][\"type\"] == \"string\"\n\n\ndef test_input_schema_user_email_is_nullable() -> None:\n    props = QueryProcessingSpec().input_schema[\"properties\"]\n    # Pydantic v2 emits anyOf for nullable fields\n    assert any(s.get(\"type\") == \"null\" for s in props[\"user_email\"][\"anyOf\"])\n\n\ndef test_output_schema_query_is_optional() -> None:\n    # query defaults to None (absent = reject); not required in the schema\n    schema = QueryProcessingSpec().output_schema\n    assert \"query\" not in schema.get(\"required\", [])\n\n\ndef test_output_schema_query_is_nullable() -> None:\n    # null means \"reject the query\"; Pydantic v2 emits anyOf for nullable fields\n    props = QueryProcessingSpec().output_schema[\"properties\"]\n    assert any(s.get(\"type\") == \"null\" for s in props[\"query\"][\"anyOf\"])\n\n\ndef test_output_schema_rejection_message_is_optional() -> None:\n    schema = QueryProcessingSpec().output_schema\n    assert \"rejection_message\" not in schema.get(\"required\", [])\n\n\ndef test_input_schema_no_additional_properties() -> None:\n    assert QueryProcessingSpec().input_schema.get(\"additionalProperties\") is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/hooks/test_registry.py",
    "content": "import pytest\n\nfrom onyx.db.enums import HookPoint\nfrom onyx.hooks import registry as registry_module\nfrom onyx.hooks.registry import get_all_specs\nfrom onyx.hooks.registry import get_hook_point_spec\nfrom onyx.hooks.registry import validate_registry\n\n\ndef test_registry_covers_all_hook_points() -> None:\n    \"\"\"Every HookPoint enum member must have a registered spec.\"\"\"\n    assert {s.hook_point for s in get_all_specs()} == set(\n        HookPoint\n    ), f\"Missing specs for: {set(HookPoint) - {s.hook_point for s in get_all_specs()}}\"\n\n\ndef test_get_hook_point_spec_returns_correct_spec() -> None:\n    for hook_point in HookPoint:\n        spec = get_hook_point_spec(hook_point)\n        assert spec.hook_point == hook_point\n\n\ndef test_get_all_specs_returns_all() -> None:\n    specs = get_all_specs()\n    assert len(specs) == len(HookPoint)\n    assert {s.hook_point for s in specs} == set(HookPoint)\n\n\ndef test_get_hook_point_spec_raises_for_unregistered(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"get_hook_point_spec raises ValueError when a hook point has no spec.\"\"\"\n    monkeypatch.setattr(registry_module, \"_REGISTRY\", {})\n    with pytest.raises(ValueError, match=\"No spec registered for hook point\"):\n        get_hook_point_spec(HookPoint.QUERY_PROCESSING)\n\n\ndef test_validate_registry_passes() -> None:\n    validate_registry()  # should not raise with the real registry\n\n\ndef test_validate_registry_raises_for_incomplete(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(registry_module, \"_REGISTRY\", {})\n    with pytest.raises(RuntimeError, match=\"Hook point\\\\(s\\\\) have no registered spec\"):\n        validate_registry()\n"
  },
  {
    "path": "backend/tests/unit/onyx/image_gen/test_provider_building.py",
    "content": "import json\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.image_gen.exceptions import ImageProviderCredentialsError\nfrom onyx.image_gen.factory import get_image_generation_provider\nfrom onyx.image_gen.interfaces import ImageGenerationProviderCredentials\nfrom onyx.image_gen.interfaces import ReferenceImage\nfrom onyx.image_gen.providers.azure_img_gen import AzureImageGenerationProvider\nfrom onyx.image_gen.providers.openai_img_gen import OpenAIImageGenerationProvider\nfrom onyx.image_gen.providers.vertex_img_gen import VertexImageGenerationProvider\n\nOPENAI_PROVIDER = \"openai\"\nAZURE_PROVIDER = \"azure\"\nVERTEX_PROVIDER = \"vertex_ai\"\n\n\ndef _get_default_image_gen_creds() -> ImageGenerationProviderCredentials:\n    return ImageGenerationProviderCredentials(\n        api_key=None,\n        api_base=None,\n        api_version=None,\n        deployment_name=None,\n        custom_config=None,\n    )\n\n\ndef test_request_provider_that_no_exist() -> None:\n    provider = \"nonexistent\"\n    credentials = _get_default_image_gen_creds()\n\n    with pytest.raises(ValueError):\n        get_image_generation_provider(provider, credentials)\n\n\ndef test_build_openai_provider_from_api_key_and_base() -> None:\n    credentials = _get_default_image_gen_creds()\n\n    credentials.api_key = \"test\"\n    credentials.api_base = \"test\"\n\n    provider = OPENAI_PROVIDER\n\n    image_gen_provider = get_image_generation_provider(provider, credentials)\n\n    assert isinstance(image_gen_provider, OpenAIImageGenerationProvider)\n    assert image_gen_provider._api_key == \"test\"\n    assert image_gen_provider._api_base == \"test\"\n    assert image_gen_provider.supports_reference_images is True\n    assert image_gen_provider.max_reference_images == 16\n\n\ndef test_build_openai_provider_fails_no_api_key() -> None:\n    credentials = _get_default_image_gen_creds()\n\n    credentials.api_base = \"test\"\n\n    provider = OPENAI_PROVIDER\n\n    with pytest.raises(ImageProviderCredentialsError):\n        get_image_generation_provider(provider, credentials)\n\n\ndef test_build_azure_provider_from_api_key_and_base_and_version() -> None:\n    credentials = _get_default_image_gen_creds()\n\n    credentials.api_key = \"test\"\n    credentials.api_base = \"test\"\n    credentials.api_version = \"test\"\n\n    provider = AZURE_PROVIDER\n\n    image_gen_provider = get_image_generation_provider(provider, credentials)\n\n    assert isinstance(image_gen_provider, AzureImageGenerationProvider)\n    assert image_gen_provider._api_key == \"test\"\n    assert image_gen_provider._api_base == \"test\"\n    assert image_gen_provider._api_version == \"test\"\n    assert image_gen_provider.supports_reference_images is True\n    assert image_gen_provider.max_reference_images == 16\n\n\ndef test_build_azure_provider_fails_missing_credential() -> None:\n    azure_required = [\n        \"api_key\",\n        \"api_base\",\n        \"api_version\",\n    ]\n\n    default_creds = _get_default_image_gen_creds()\n    default_creds.api_key = \"test\"\n    default_creds.api_base = \"test\"\n    default_creds.api_version = \"test\"\n\n    for attribute in azure_required:\n        credentials = default_creds.model_copy()\n        setattr(credentials, attribute, None)\n\n        with pytest.raises(ImageProviderCredentialsError):\n            get_image_generation_provider(AZURE_PROVIDER, credentials)\n\n\ndef test_build_vertex_provider_from_credentials() -> None:\n    credentials = _get_default_image_gen_creds()\n\n    vertex_credentials = {\n        \"project_id\": \"demo_project_1\",\n        \"private_key_id\": \"test\",\n    }\n\n    vertex_json = json.dumps(vertex_credentials)\n    credentials.custom_config = {\n        \"vertex_credentials\": vertex_json,\n        \"vertex_location\": \"global\",\n    }\n    provider = VERTEX_PROVIDER\n\n    image_gen_provider = get_image_generation_provider(provider, credentials)\n\n    assert isinstance(image_gen_provider, VertexImageGenerationProvider)\n    assert image_gen_provider._vertex_credentials == vertex_json\n    assert image_gen_provider._vertex_location == \"global\"\n    assert image_gen_provider._vertex_project == \"demo_project_1\"\n\n\ndef test_build_vertex_provider_with_missing_project_id() -> None:\n    credentials = _get_default_image_gen_creds()\n\n    vertex_credentials = {\n        \"private_key_id\": \"test\",\n    }\n\n    vertex_json = json.dumps(vertex_credentials)\n    credentials.custom_config = {\n        \"vertex_credentials\": vertex_json,\n        \"vertex_location\": \"global\",\n    }\n\n    with pytest.raises(ImageProviderCredentialsError):\n        get_image_generation_provider(\"vertex_ai\", credentials)\n\n\ndef test_openai_provider_uses_image_generation_without_reference_images() -> None:\n    provider = OpenAIImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"test-base\",\n    )\n    expected_response = object()\n\n    with (\n        patch(\"litellm.image_generation\", return_value=expected_response) as mock_gen,\n        patch(\"litellm.image_edit\") as mock_edit,\n    ):\n        response = provider.generate_image(\n            prompt=\"draw a mountain\",\n            model=\"gpt-image-1\",\n            size=\"1024x1024\",\n            n=1,\n            quality=\"high\",\n        )\n\n    assert response is expected_response\n    mock_gen.assert_called_once()\n    mock_edit.assert_not_called()\n\n\ndef test_openai_provider_uses_image_edit_with_reference_images() -> None:\n    provider = OpenAIImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"test-base\",\n    )\n    reference_images = [\n        ReferenceImage(data=b\"image-1-bytes\", mime_type=\"image/png\"),\n        ReferenceImage(data=b\"image-2-bytes\", mime_type=\"image/jpeg\"),\n    ]\n    expected_response = object()\n\n    with (\n        patch(\"litellm.image_generation\") as mock_gen,\n        patch(\"litellm.image_edit\", return_value=expected_response) as mock_edit,\n    ):\n        response = provider.generate_image(\n            prompt=\"make this look watercolor\",\n            model=\"gpt-image-1\",\n            size=\"1024x1024\",\n            n=1,\n            quality=\"high\",\n            reference_images=reference_images,\n        )\n\n    assert response is expected_response\n    mock_gen.assert_not_called()\n    mock_edit.assert_called_once()\n    assert mock_edit.call_args.kwargs[\"image\"] == [\n        b\"image-1-bytes\",\n        b\"image-2-bytes\",\n    ]\n\n\ndef test_openai_provider_rejects_reference_images_for_unsupported_model() -> None:\n    provider = OpenAIImageGenerationProvider(api_key=\"test-key\")\n\n    with pytest.raises(ValueError):\n        provider.generate_image(\n            prompt=\"edit this image\",\n            model=\"dall-e-3\",\n            size=\"1024x1024\",\n            n=1,\n            reference_images=[ReferenceImage(data=b\"image-1\", mime_type=\"image/png\")],\n        )\n\n\ndef test_openai_provider_rejects_multiple_reference_images_for_dalle3() -> None:\n    provider = OpenAIImageGenerationProvider(api_key=\"test-key\")\n\n    with pytest.raises(\n        ValueError,\n        match=\"does not support image edits with reference images\",\n    ):\n        provider.generate_image(\n            prompt=\"edit this image\",\n            model=\"dall-e-3\",\n            size=\"1024x1024\",\n            n=1,\n            reference_images=[\n                ReferenceImage(data=b\"image-1\", mime_type=\"image/png\"),\n                ReferenceImage(data=b\"image-2\", mime_type=\"image/png\"),\n            ],\n        )\n\n\ndef test_azure_provider_uses_image_generation_without_reference_images() -> None:\n    provider = AzureImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"https://azure.example.com\",\n        api_version=\"2024-05-01-preview\",\n        deployment_name=\"img-deployment\",\n    )\n    expected_response = object()\n\n    with (\n        patch(\"litellm.image_generation\", return_value=expected_response) as mock_gen,\n        patch(\"litellm.image_edit\") as mock_edit,\n    ):\n        response = provider.generate_image(\n            prompt=\"draw a skyline\",\n            model=\"gpt-image-1\",\n            size=\"1024x1024\",\n            n=1,\n            quality=\"high\",\n        )\n\n    assert response is expected_response\n    mock_gen.assert_called_once()\n    mock_edit.assert_not_called()\n    assert mock_gen.call_args.kwargs[\"model\"] == \"azure/img-deployment\"\n\n\ndef test_azure_provider_uses_image_edit_with_reference_images() -> None:\n    provider = AzureImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"https://azure.example.com\",\n        api_version=\"2024-05-01-preview\",\n        deployment_name=\"img-deployment\",\n    )\n    reference_images = [\n        ReferenceImage(data=b\"image-1-bytes\", mime_type=\"image/png\"),\n        ReferenceImage(data=b\"image-2-bytes\", mime_type=\"image/jpeg\"),\n    ]\n    expected_response = object()\n\n    with (\n        patch(\"litellm.image_generation\") as mock_gen,\n        patch(\"litellm.image_edit\", return_value=expected_response) as mock_edit,\n    ):\n        response = provider.generate_image(\n            prompt=\"make this noir style\",\n            model=\"gpt-image-1\",\n            size=\"1024x1024\",\n            n=1,\n            quality=\"high\",\n            reference_images=reference_images,\n        )\n\n    assert response is expected_response\n    mock_gen.assert_not_called()\n    mock_edit.assert_called_once()\n    assert mock_edit.call_args.kwargs[\"model\"] == \"azure/img-deployment\"\n    assert mock_edit.call_args.kwargs[\"image\"] == [\n        b\"image-1-bytes\",\n        b\"image-2-bytes\",\n    ]\n\n\ndef test_azure_provider_rejects_reference_images_for_unsupported_model() -> None:\n    provider = AzureImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"https://azure.example.com\",\n        api_version=\"2024-05-01-preview\",\n    )\n\n    with pytest.raises(ValueError):\n        provider.generate_image(\n            prompt=\"edit this image\",\n            model=\"dall-e-3\",\n            size=\"1024x1024\",\n            n=1,\n            reference_images=[ReferenceImage(data=b\"image-1\", mime_type=\"image/png\")],\n        )\n\n\ndef test_azure_provider_rejects_multiple_reference_images_for_dalle3() -> None:\n    provider = AzureImageGenerationProvider(\n        api_key=\"test-key\",\n        api_base=\"https://azure.example.com\",\n        api_version=\"2024-05-01-preview\",\n    )\n\n    with pytest.raises(\n        ValueError,\n        match=\"does not support image edits with reference images\",\n    ):\n        provider.generate_image(\n            prompt=\"edit this image\",\n            model=\"dall-e-3\",\n            size=\"1024x1024\",\n            n=1,\n            reference_images=[\n                ReferenceImage(data=b\"image-1\", mime_type=\"image/png\"),\n                ReferenceImage(data=b\"image-2\", mime_type=\"image/png\"),\n            ],\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/conftest.py",
    "content": "import pytest\n\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface\n\n\nclass MockHeartbeat(IndexingHeartbeatInterface):\n    def __init__(self) -> None:\n        self.call_count = 0\n\n    def should_stop(self) -> bool:\n        return False\n\n    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002\n        self.call_count += 1\n\n\n@pytest.fixture\ndef mock_heartbeat() -> MockHeartbeat:\n    return MockHeartbeat()\n\n\n@pytest.fixture\ndef embedder() -> DefaultIndexingEmbedder:\n    return DefaultIndexingEmbedder(\n        model_name=\"intfloat/e5-base-v2\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_censoring.py",
    "content": "import os\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import InferenceChunk\nfrom onyx.db.models import User\nfrom onyx.utils.variable_functionality import fetch_ee_implementation_or_noop\n\n_post_query_chunk_censoring = fetch_ee_implementation_or_noop(\n    \"onyx.external_permissions.post_query_censoring\", \"_post_query_chunk_censoring\"\n)\n\n\n@pytest.mark.skipif(\n    os.environ.get(\"ENABLE_PAID_ENTERPRISE_EDITION_FEATURES\", \"\").lower() != \"true\",\n    reason=\"Permissions tests are enterprise only\",\n)\nclass TestPostQueryChunkCensoring:\n    @pytest.fixture(autouse=True)\n    def setUp(self) -> None:\n        self.mock_user = User(id=1, email=\"test@example.com\")\n        self.mock_chunk_1 = InferenceChunk(\n            document_id=\"doc1\",\n            chunk_id=1,\n            content=\"chunk1 content\",\n            source_type=DocumentSource.SALESFORCE,\n            semantic_identifier=\"doc1_1\",\n            title=\"doc1\",\n            boost=1,\n            score=0.9,\n            hidden=False,\n            metadata={},\n            match_highlights=[],\n            doc_summary=\"doc1 summary\",\n            chunk_context=\"doc1 context\",\n            updated_at=None,\n            image_file_id=None,\n            source_links={},\n            section_continuation=False,\n            blurb=\"chunk1\",\n        )\n        self.mock_chunk_2 = InferenceChunk(\n            document_id=\"doc2\",\n            chunk_id=2,\n            content=\"chunk2 content\",\n            source_type=DocumentSource.SLACK,\n            semantic_identifier=\"doc2_2\",\n            title=\"doc2\",\n            boost=1,\n            score=0.8,\n            hidden=False,\n            metadata={},\n            match_highlights=[],\n            doc_summary=\"doc2 summary\",\n            chunk_context=\"doc2 context\",\n            updated_at=None,\n            image_file_id=None,\n            source_links={},\n            section_continuation=False,\n            blurb=\"chunk2\",\n        )\n        self.mock_chunk_3 = InferenceChunk(\n            document_id=\"doc3\",\n            chunk_id=3,\n            content=\"chunk3 content\",\n            source_type=DocumentSource.SALESFORCE,\n            semantic_identifier=\"doc3_3\",\n            title=\"doc3\",\n            boost=1,\n            score=0.7,\n            hidden=False,\n            metadata={},\n            match_highlights=[],\n            doc_summary=\"doc3 summary\",\n            chunk_context=\"doc3 context\",\n            updated_at=None,\n            image_file_id=None,\n            source_links={},\n            section_continuation=False,\n            blurb=\"chunk3\",\n        )\n        self.mock_chunk_4 = InferenceChunk(\n            document_id=\"doc4\",\n            chunk_id=4,\n            content=\"chunk4 content\",\n            source_type=DocumentSource.SALESFORCE,\n            semantic_identifier=\"doc4_4\",\n            title=\"doc4\",\n            boost=1,\n            score=0.6,\n            hidden=False,\n            metadata={},\n            match_highlights=[],\n            doc_summary=\"doc4 summary\",\n            chunk_context=\"doc4 context\",\n            updated_at=None,\n            image_file_id=None,\n            source_links={},\n            section_continuation=False,\n            blurb=\"chunk4\",\n        )\n\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources\"\n    )\n    def test_post_query_chunk_censoring_no_user(\n        self, mock_get_sources: MagicMock\n    ) -> None:\n        mock_get_sources.return_value = {DocumentSource.SALESFORCE}\n        chunks = [self.mock_chunk_1, self.mock_chunk_2]\n        result = _post_query_chunk_censoring(chunks, None)\n        assert result == chunks\n\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources\"\n    )\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION\"\n    )\n    def test_post_query_chunk_censoring_salesforce_censored(\n        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock\n    ) -> None:\n        mock_get_sources.return_value = {DocumentSource.SALESFORCE}\n        mock_censor_func_impl = MagicMock(\n            return_value=[self.mock_chunk_1]\n        )  # Only return chunk 1\n        mock_censor_func.__getitem__.return_value = mock_censor_func_impl\n\n        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]\n        result = _post_query_chunk_censoring(chunks, self.mock_user)\n        assert len(result) == 2\n        assert self.mock_chunk_1 in result\n        assert self.mock_chunk_2 in result\n        assert self.mock_chunk_3 not in result\n        mock_censor_func_impl.assert_called_once()\n\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources\"\n    )\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION\"\n    )\n    def test_post_query_chunk_censoring_salesforce_error(\n        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock\n    ) -> None:\n        mock_get_sources.return_value = {DocumentSource.SALESFORCE}\n        mock_censor_func_impl = MagicMock(side_effect=Exception(\"Censoring error\"))\n        mock_censor_func.__getitem__.return_value = mock_censor_func_impl\n\n        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]\n        result = _post_query_chunk_censoring(chunks, self.mock_user)\n        assert len(result) == 1\n        assert self.mock_chunk_2 in result\n        mock_censor_func_impl.assert_called_once()\n\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources\"\n    )\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION\"\n    )\n    def test_post_query_chunk_censoring_no_censoring(\n        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock\n    ) -> None:\n        mock_get_sources.return_value = set()  # No sources to censor\n        mock_censor_func_impl = MagicMock()\n        mock_censor_func.__getitem__.return_value = mock_censor_func_impl\n\n        chunks = [self.mock_chunk_1, self.mock_chunk_2, self.mock_chunk_3]\n        result = _post_query_chunk_censoring(chunks, self.mock_user)\n        assert result == chunks\n        mock_censor_func_impl.assert_not_called()\n\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring._get_all_censoring_enabled_sources\"\n    )\n    @patch(\n        \"ee.onyx.external_permissions.post_query_censoring.DOC_SOURCE_TO_CHUNK_CENSORING_FUNCTION\"\n    )\n    def test_post_query_chunk_censoring_order_maintained(\n        self, mock_censor_func: MagicMock, mock_get_sources: MagicMock\n    ) -> None:\n        mock_get_sources.return_value = {DocumentSource.SALESFORCE}\n        mock_censor_func_impl = MagicMock(\n            return_value=[self.mock_chunk_3, self.mock_chunk_1]\n        )  # Return chunk 3 and 1\n        mock_censor_func.__getitem__.return_value = mock_censor_func_impl\n\n        chunks = [\n            self.mock_chunk_1,\n            self.mock_chunk_2,\n            self.mock_chunk_3,\n            self.mock_chunk_4,\n        ]\n        result = _post_query_chunk_censoring(chunks, self.mock_user)\n        assert len(result) == 3\n        assert result[0] == self.mock_chunk_1\n        assert result[1] == self.mock_chunk_2\n        assert result[2] == self.mock_chunk_3\n        assert self.mock_chunk_4 not in result\n        mock_censor_func_impl.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_chunker.py",
    "content": "from typing import Any\nfrom unittest.mock import Mock\n\nimport pytest\n\nfrom onyx.configs.app_configs import USE_CHUNK_SUMMARY\nfrom onyx.configs.app_configs import USE_DOCUMENT_SUMMARY\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.chunker import Chunker\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_pipeline import process_image_sections\nfrom onyx.llm.utils import MAX_CONTEXT_TOKENS\nfrom tests.unit.onyx.indexing.conftest import MockHeartbeat\n\n\n@pytest.mark.parametrize(\"enable_contextual_rag\", [True, False])\ndef test_chunk_document(\n    embedder: DefaultIndexingEmbedder, enable_contextual_rag: bool\n) -> None:\n    short_section_1 = \"This is a short section.\"\n    long_section = (\n        \"This is a long section that should be split into multiple chunks. \" * 100\n    )\n    short_section_2 = \"This is another short section.\"\n    short_section_3 = \"This is another short section again.\"\n    short_section_4 = \"Final short section.\"\n    semantic_identifier = \"Test Document\"\n\n    document = Document(\n        id=\"test_doc\",\n        source=DocumentSource.WEB,\n        semantic_identifier=semantic_identifier,\n        metadata={\"tags\": [\"tag1\", \"tag2\"]},\n        doc_updated_at=None,\n        sections=[\n            TextSection(text=short_section_1, link=\"link1\"),\n            TextSection(text=short_section_2, link=\"link2\"),\n            TextSection(text=long_section, link=\"link3\"),\n            TextSection(text=short_section_3, link=\"link4\"),\n            TextSection(text=short_section_4, link=\"link5\"),\n        ],\n    )\n    indexing_documents = process_image_sections([document])\n\n    mock_llm_invoke_count = 0\n\n    def mock_llm_invoke(\n        self: Any, *args: Any, **kwargs: Any  # noqa: ARG001\n    ) -> Mock:  # noqa: ARG001\n        nonlocal mock_llm_invoke_count\n        mock_llm_invoke_count += 1\n        m = Mock()\n        m.content = f\"Test{mock_llm_invoke_count}\"\n        return m\n\n    mock_llm = Mock()\n    mock_llm.invoke = mock_llm_invoke\n\n    chunker = Chunker(\n        tokenizer=embedder.embedding_model.tokenizer,\n        enable_multipass=False,\n        enable_contextual_rag=enable_contextual_rag,\n    )\n    chunks = chunker.chunk(indexing_documents)\n\n    assert len(chunks) == 5\n    assert short_section_1 in chunks[0].content\n    assert short_section_3 in chunks[-1].content\n    assert short_section_4 in chunks[-1].content\n    assert \"tag1\" in chunks[0].metadata_suffix_keyword\n    assert \"tag2\" in chunks[0].metadata_suffix_semantic\n\n    rag_tokens = MAX_CONTEXT_TOKENS * (\n        int(USE_DOCUMENT_SUMMARY) + int(USE_CHUNK_SUMMARY)\n    )\n    for chunk in chunks:\n        assert chunk.contextual_rag_reserved_tokens == (\n            rag_tokens if enable_contextual_rag else 0\n        )\n\n\ndef test_chunker_heartbeat(\n    embedder: DefaultIndexingEmbedder, mock_heartbeat: MockHeartbeat\n) -> None:\n    document = Document(\n        id=\"test_doc\",\n        source=DocumentSource.WEB,\n        semantic_identifier=\"Test Document\",\n        metadata={\"tags\": [\"tag1\", \"tag2\"]},\n        doc_updated_at=None,\n        sections=[\n            TextSection(text=\"This is a short section.\", link=\"link1\"),\n        ],\n    )\n    indexing_documents = process_image_sections([document])\n\n    chunker = Chunker(\n        tokenizer=embedder.embedding_model.tokenizer,\n        enable_multipass=False,\n        callback=mock_heartbeat,\n        enable_contextual_rag=False,\n    )\n\n    chunks = chunker.chunk(indexing_documents)\n\n    assert mock_heartbeat.call_count == 1\n    assert len(chunks) > 0\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py",
    "content": "\"\"\"Unit tests for _embed_chunks_to_store.\n\nTests cover:\n  - Single batch, no failures\n  - Multiple batches, no failures\n  - Failure in a single batch\n  - Cross-batch document failure scrubbing\n  - Later batches skip already-failed docs\n  - Empty input\n  - All chunks fail\n\"\"\"\n\nfrom collections.abc import Callable\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.connectors.models import ConnectorFailure\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentFailure\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.chunk_batch_store import ChunkBatchStore\nfrom onyx.indexing.indexing_pipeline import _embed_chunks_to_store\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import IndexChunk\n\n\ndef _make_doc(doc_id: str) -> Document:\n    return Document(\n        id=doc_id,\n        semantic_identifier=\"test\",\n        source=DocumentSource.FILE,\n        sections=[TextSection(text=\"test\", link=None)],\n        metadata={},\n    )\n\n\ndef _make_chunk(doc_id: str, chunk_id: int) -> DocAwareChunk:\n    return DocAwareChunk(\n        chunk_id=chunk_id,\n        blurb=\"test\",\n        content=\"test content\",\n        source_links=None,\n        image_file_id=None,\n        section_continuation=False,\n        source_document=_make_doc(doc_id),\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        contextual_rag_reserved_tokens=0,\n    )\n\n\ndef _make_index_chunk(doc_id: str, chunk_id: int) -> IndexChunk:\n    \"\"\"Create an IndexChunk (a DocAwareChunk with embeddings).\"\"\"\n    return IndexChunk(\n        chunk_id=chunk_id,\n        blurb=\"test\",\n        content=\"test content\",\n        source_links=None,\n        image_file_id=None,\n        section_continuation=False,\n        source_document=_make_doc(doc_id),\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        contextual_rag_reserved_tokens=0,\n        embeddings=ChunkEmbedding(\n            full_embedding=[0.1] * 10,\n            mini_chunk_embeddings=[],\n        ),\n        title_embedding=None,\n    )\n\n\ndef _make_failure(doc_id: str) -> ConnectorFailure:\n    return ConnectorFailure(\n        failed_document=DocumentFailure(document_id=doc_id, document_link=None),\n        failure_message=\"embedding failed\",\n        exception=RuntimeError(\"embedding failed\"),\n    )\n\n\ndef _mock_embed_success(\n    chunks: list[DocAwareChunk], **_kwargs: object\n) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n    \"\"\"Simulate successful embedding of all chunks.\"\"\"\n    return (\n        [_make_index_chunk(c.source_document.id, c.chunk_id) for c in chunks],\n        [],\n    )\n\n\ndef _mock_embed_fail_doc(\n    fail_doc_id: str,\n) -> Callable[..., tuple[list[IndexChunk], list[ConnectorFailure]]]:\n    \"\"\"Return an embed mock that fails all chunks for a specific doc.\"\"\"\n\n    def _embed(\n        chunks: list[DocAwareChunk], **_kwargs: object\n    ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n        successes = [\n            _make_index_chunk(c.source_document.id, c.chunk_id)\n            for c in chunks\n            if c.source_document.id != fail_doc_id\n        ]\n        failures = (\n            [_make_failure(fail_doc_id)]\n            if any(c.source_document.id == fail_doc_id for c in chunks)\n            else []\n        )\n        return successes, failures\n\n    return _embed\n\n\nclass TestEmbedChunksInBatches:\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 100)\n    def test_single_batch_no_failures(self, mock_embed: MagicMock) -> None:\n        \"\"\"All chunks fit in one batch and embed successfully.\"\"\"\n        mock_embed.side_effect = _mock_embed_success\n\n        with ChunkBatchStore() as store:\n            chunks = [_make_chunk(\"doc1\", i) for i in range(3)]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            assert len(result.successful_chunk_ids) == 3\n            assert len(result.connector_failures) == 0\n\n            # Verify stored contents\n            assert len(store._batch_files()) == 1\n            stored = list(store.stream())\n            assert len(stored) == 3\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 3)\n    def test_multiple_batches_no_failures(self, mock_embed: MagicMock) -> None:\n        \"\"\"Chunks are split across multiple batches, all succeed.\"\"\"\n        mock_embed.side_effect = _mock_embed_success\n\n        with ChunkBatchStore() as store:\n            chunks = [_make_chunk(\"doc1\", i) for i in range(7)]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            assert len(result.successful_chunk_ids) == 7\n            assert len(result.connector_failures) == 0\n            assert len(store._batch_files()) == 3  # 3 + 3 + 1\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 100)\n    def test_single_batch_with_failure(self, mock_embed: MagicMock) -> None:\n        \"\"\"One doc fails embedding, its chunks are excluded from results.\"\"\"\n        mock_embed.side_effect = _mock_embed_fail_doc(\"doc2\")\n\n        with ChunkBatchStore() as store:\n            chunks = [\n                _make_chunk(\"doc1\", 0),\n                _make_chunk(\"doc2\", 1),\n                _make_chunk(\"doc1\", 2),\n            ]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            assert len(result.connector_failures) == 1\n            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}\n            assert \"doc2\" not in successful_doc_ids\n            assert \"doc1\" in successful_doc_ids\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 3)\n    def test_cross_batch_failure_scrubs_earlier_batch(\n        self, mock_embed: MagicMock\n    ) -> None:\n        \"\"\"Doc A spans batches 0 and 1.  It succeeds in batch 0 but fails in\n        batch 1.  Its chunks should be scrubbed from batch 0's batch file.\"\"\"\n        call_count = 0\n\n        def _embed(\n            chunks: list[DocAwareChunk], **_kwargs: object\n        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n            nonlocal call_count\n            call_count += 1\n            if call_count == 1:\n                return _mock_embed_success(chunks)\n            else:\n                return _mock_embed_fail_doc(\"docA\")(chunks)\n\n        mock_embed.side_effect = _embed\n\n        with ChunkBatchStore() as store:\n            chunks = [\n                _make_chunk(\"docA\", 0),\n                _make_chunk(\"docA\", 1),\n                _make_chunk(\"docA\", 2),\n                _make_chunk(\"docA\", 3),\n                _make_chunk(\"docB\", 0),\n                _make_chunk(\"docB\", 1),\n            ]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            # docA should be fully excluded from results\n            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}\n            assert \"docA\" not in successful_doc_ids\n            assert \"docB\" in successful_doc_ids\n            assert len(result.connector_failures) == 1\n\n            # Verify batch 0 was scrubbed of docA chunks\n            all_stored = list(store.stream())\n            stored_doc_ids = {c.source_document.id for c in all_stored}\n            assert \"docA\" not in stored_doc_ids\n            assert \"docB\" in stored_doc_ids\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 3)\n    def test_later_batch_skips_already_failed_doc(self, mock_embed: MagicMock) -> None:\n        \"\"\"If docA fails in batch 0, its chunks in batch 1 are skipped\n        entirely (never sent to the embedder).\"\"\"\n        embedded_doc_ids: list[str] = []\n\n        def _embed(\n            chunks: list[DocAwareChunk], **_kwargs: object\n        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n            for c in chunks:\n                embedded_doc_ids.append(c.source_document.id)\n            return _mock_embed_fail_doc(\"docA\")(chunks)\n\n        mock_embed.side_effect = _embed\n\n        with ChunkBatchStore() as store:\n            chunks = [\n                _make_chunk(\"docA\", 0),\n                _make_chunk(\"docA\", 1),\n                _make_chunk(\"docA\", 2),\n                _make_chunk(\"docA\", 3),\n                _make_chunk(\"docB\", 0),\n                _make_chunk(\"docB\", 1),\n            ]\n            _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n        # docA should only appear in batch 0, not batch 1\n        batch_1_doc_ids = embedded_doc_ids[3:]\n        assert \"docA\" not in batch_1_doc_ids\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 3)\n    def test_failed_doc_skipped_in_later_batch_while_other_doc_succeeds(\n        self, mock_embed: MagicMock\n    ) -> None:\n        \"\"\"doc1 spans batches 0 and 1, doc2 only in batch 1.  Batch 0 fails\n        doc1.  In batch 1, doc1 chunks should be skipped but doc2 chunks\n        should still be embedded successfully.\"\"\"\n        embedded_chunks: list[list[str]] = []\n\n        def _embed(\n            chunks: list[DocAwareChunk], **_kwargs: object\n        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n            embedded_chunks.append([c.source_document.id for c in chunks])\n            return _mock_embed_fail_doc(\"doc1\")(chunks)\n\n        mock_embed.side_effect = _embed\n\n        with ChunkBatchStore() as store:\n            chunks = [\n                _make_chunk(\"doc1\", 0),\n                _make_chunk(\"doc1\", 1),\n                _make_chunk(\"doc1\", 2),\n                _make_chunk(\"doc1\", 3),\n                _make_chunk(\"doc2\", 0),\n                _make_chunk(\"doc2\", 1),\n            ]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            # doc1 should be fully excluded, doc2 fully included\n            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}\n            assert \"doc1\" not in successful_doc_ids\n            assert \"doc2\" in successful_doc_ids\n            assert len(result.successful_chunk_ids) == 2  # doc2's 2 chunks\n\n            # Batch 1 should only contain doc2 (doc1 was filtered before embedding)\n            assert len(embedded_chunks) == 2\n            assert \"doc1\" not in embedded_chunks[1]\n            assert embedded_chunks[1] == [\"doc2\", \"doc2\"]\n\n            # Verify on-disk state has no doc1 chunks\n            all_stored = list(store.stream())\n            assert all(c.source_document.id == \"doc2\" for c in all_stored)\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    def test_empty_input(self, mock_embed: MagicMock) -> None:\n        \"\"\"Empty chunk list produces empty results.\"\"\"\n        mock_embed.side_effect = _mock_embed_success\n\n        with ChunkBatchStore() as store:\n            result = _embed_chunks_to_store(\n                chunks=[],\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            assert len(result.successful_chunk_ids) == 0\n            assert len(result.connector_failures) == 0\n            mock_embed.assert_not_called()\n\n    @patch(\n        \"onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling\",\n    )\n    @patch(\"onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH\", 100)\n    def test_all_chunks_fail(self, mock_embed: MagicMock) -> None:\n        \"\"\"When all documents fail, results have no successful chunks.\"\"\"\n\n        def _fail_all(\n            chunks: list[DocAwareChunk], **_kwargs: object\n        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:\n            doc_ids = {c.source_document.id for c in chunks}\n            return [], [_make_failure(doc_id) for doc_id in doc_ids]\n\n        mock_embed.side_effect = _fail_all\n\n        with ChunkBatchStore() as store:\n            chunks = [_make_chunk(\"doc1\", 0), _make_chunk(\"doc2\", 1)]\n            result = _embed_chunks_to_store(\n                chunks=chunks,\n                embedder=MagicMock(),\n                tenant_id=\"test\",\n                request_id=None,\n                store=store,\n            )\n\n            assert len(result.successful_chunk_ids) == 0\n            assert len(result.connector_failures) == 2\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_embedder.py",
    "content": "from collections.abc import Generator\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocAwareChunk\nfrom onyx.indexing.models import IndexChunk\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\n\n\n@pytest.fixture\ndef mock_embedding_model() -> Generator[Mock, None, None]:\n    with patch(\"onyx.indexing.embedder.EmbeddingModel\") as mock:\n        yield mock\n\n\n@pytest.mark.parametrize(\n    \"chunk_context, doc_summary\",\n    [(\"Test chunk context\", \"Test document summary\"), (\"\", \"\")],\n)\ndef test_default_indexing_embedder_embed_chunks(\n    mock_embedding_model: Mock, chunk_context: str, doc_summary: str\n) -> None:\n    # Setup\n    embedder = DefaultIndexingEmbedder(\n        model_name=\"test-model\",\n        normalize=True,\n        query_prefix=None,\n        passage_prefix=None,\n        provider_type=EmbeddingProvider.OPENAI,\n    )\n\n    # Mock the encode method of the embedding model\n    mock_embedding_model.return_value.encode.side_effect = [\n        [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],  # Main chunk embeddings\n        [[7.0, 8.0, 9.0]],  # Title embedding\n    ]\n\n    # Create test input\n    source_doc = Document(\n        id=\"test_doc\",\n        source=DocumentSource.WEB,\n        semantic_identifier=\"Test Document\",\n        metadata={\"tags\": [\"tag1\", \"tag2\"]},\n        doc_updated_at=None,\n        sections=[\n            TextSection(text=\"This is a short section.\", link=\"link1\"),\n        ],\n    )\n    chunks: list[DocAwareChunk] = [\n        DocAwareChunk(\n            chunk_id=1,\n            blurb=\"This is a short section.\",\n            content=\"Test chunk\",\n            source_links={0: \"link1\"},\n            section_continuation=False,\n            source_document=source_doc,\n            title_prefix=\"Title: \",\n            metadata_suffix_semantic=\"\",\n            metadata_suffix_keyword=\"\",\n            mini_chunk_texts=None,\n            large_chunk_reference_ids=[],\n            large_chunk_id=None,\n            image_file_id=None,\n            chunk_context=chunk_context,\n            doc_summary=doc_summary,\n            contextual_rag_reserved_tokens=200,\n        )\n    ]\n\n    # Execute\n    result: list[IndexChunk] = embedder.embed_chunks(chunks)\n\n    # Assert\n    assert len(result) == 1\n    assert isinstance(result[0], IndexChunk)\n    assert result[0].content == \"Test chunk\"\n    assert result[0].embeddings == ChunkEmbedding(\n        full_embedding=[1.0, 2.0, 3.0],\n        mini_chunk_embeddings=[],\n    )\n    assert result[0].title_embedding == [7.0, 8.0, 9.0]\n\n    # Verify the embedding model was called exactly as follows\n    mock_embedding_model.return_value.encode.assert_any_call(\n        texts=[f\"Title: {doc_summary}Test chunk{chunk_context}\"],\n        text_type=EmbedTextType.PASSAGE,\n        large_chunks_present=False,\n        tenant_id=None,\n        request_id=None,\n    )\n    # Same for title only embedding call\n    mock_embedding_model.return_value.encode.assert_any_call(\n        [\"Test Document\"],\n        text_type=EmbedTextType.PASSAGE,\n        tenant_id=None,\n        request_id=None,\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_indexing_pipeline.py",
    "content": "import threading\nfrom typing import Any\nfrom typing import cast\nfrom typing import List\nfrom unittest.mock import MagicMock\nfrom unittest.mock import Mock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.configs.app_configs import MAX_DOCUMENT_CHARS\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import ImageSection\nfrom onyx.connectors.models import TextSection\nfrom onyx.hooks.executor import HookSkipped\nfrom onyx.hooks.executor import HookSoftFailed\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionResponse\nfrom onyx.hooks.points.document_ingestion import DocumentIngestionSection\nfrom onyx.indexing.chunker import Chunker\nfrom onyx.indexing.embedder import DefaultIndexingEmbedder\nfrom onyx.indexing.indexing_pipeline import _apply_document_ingestion_hook\nfrom onyx.indexing.indexing_pipeline import add_contextual_summaries\nfrom onyx.indexing.indexing_pipeline import filter_documents\nfrom onyx.indexing.indexing_pipeline import process_image_sections\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.model_response import Choice\nfrom onyx.llm.model_response import Message\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.utils import get_max_input_tokens\n\n\ndef create_test_document(\n    doc_id: str = \"test_id\",\n    title: str | None = \"Test Title\",\n    semantic_id: str = \"test_semantic_id\",\n    sections: List[TextSection] | None = None,\n) -> Document:\n    if sections is None:\n        sections = [TextSection(text=\"Test content\", link=\"test_link\")]\n    return Document(\n        id=doc_id,\n        title=title,\n        semantic_identifier=semantic_id,\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.FILE,\n        metadata={},\n    )\n\n\ndef test_filter_documents_empty_title_and_content() -> None:\n    doc = create_test_document(\n        title=\"\", semantic_id=\"\", sections=[TextSection(text=\"\", link=\"test_link\")]\n    )\n    result = filter_documents([doc])\n    assert len(result) == 0\n\n\ndef test_filter_documents_empty_title_with_content() -> None:\n    doc = create_test_document(\n        title=\"\", sections=[TextSection(text=\"Valid content\", link=\"test_link\")]\n    )\n    result = filter_documents([doc])\n    assert len(result) == 1\n    assert result[0].id == \"test_id\"\n\n\ndef test_filter_documents_empty_content_with_title() -> None:\n    doc = create_test_document(\n        title=\"Valid Title\", sections=[TextSection(text=\"\", link=\"test_link\")]\n    )\n    result = filter_documents([doc])\n    assert len(result) == 1\n    assert result[0].id == \"test_id\"\n\n\ndef test_filter_documents_exceeding_max_chars() -> None:\n    if not MAX_DOCUMENT_CHARS:  # Skip if no max chars configured\n        return\n    long_text = \"a\" * (MAX_DOCUMENT_CHARS + 1)\n    doc = create_test_document(sections=[TextSection(text=long_text, link=\"test_link\")])\n    result = filter_documents([doc])\n    assert len(result) == 0\n\n\ndef test_filter_documents_valid_document() -> None:\n    doc = create_test_document(\n        title=\"Valid Title\",\n        sections=[TextSection(text=\"Valid content\", link=\"test_link\")],\n    )\n    result = filter_documents([doc])\n    assert len(result) == 1\n    assert result[0].id == \"test_id\"\n    assert result[0].title == \"Valid Title\"\n\n\ndef test_filter_documents_whitespace_only() -> None:\n    doc = create_test_document(\n        title=\"   \",\n        semantic_id=\"  \",\n        sections=[TextSection(text=\"   \", link=\"test_link\")],\n    )\n    result = filter_documents([doc])\n    assert len(result) == 0\n\n\ndef test_filter_documents_semantic_id_no_title() -> None:\n    doc = create_test_document(\n        title=None,\n        semantic_id=\"Valid Semantic ID\",\n        sections=[TextSection(text=\"Valid content\", link=\"test_link\")],\n    )\n    result = filter_documents([doc])\n    assert len(result) == 1\n    assert result[0].semantic_identifier == \"Valid Semantic ID\"\n\n\ndef test_filter_documents_multiple_sections() -> None:\n    doc = create_test_document(\n        sections=[\n            TextSection(text=\"Content 1\", link=\"test_link\"),\n            TextSection(text=\"Content 2\", link=\"test_link\"),\n            TextSection(text=\"Content 3\", link=\"test_link\"),\n        ]\n    )\n    result = filter_documents([doc])\n    assert len(result) == 1\n    assert len(result[0].sections) == 3\n\n\ndef test_filter_documents_multiple_documents() -> None:\n    docs = [\n        create_test_document(doc_id=\"1\", title=\"Title 1\"),\n        create_test_document(\n            doc_id=\"2\", title=\"\", sections=[TextSection(text=\"\", link=\"test_link\")]\n        ),  # Should be filtered\n        create_test_document(doc_id=\"3\", title=\"Title 3\"),\n    ]\n    result = filter_documents(docs)\n    assert len(result) == 2\n    assert {doc.id for doc in result} == {\"1\", \"3\"}\n\n\ndef test_filter_documents_empty_batch() -> None:\n    result = filter_documents([])\n    assert len(result) == 0\n\n\n@patch(\"onyx.llm.utils.GEN_AI_MAX_TOKENS\", 4096)\n@pytest.mark.parametrize(\"enable_contextual_rag\", [True, False])\ndef test_contextual_rag(\n    embedder: DefaultIndexingEmbedder, enable_contextual_rag: bool\n) -> None:\n    short_section_1 = \"This is a short section.\"\n    long_section = (\n        \"This is a long section that should be split into multiple chunks. \" * 100\n    )\n    short_section_2 = \"This is another short section.\"\n    short_section_3 = \"This is another short section again.\"\n    short_section_4 = \"Final short section.\"\n    semantic_identifier = \"Test Document\"\n\n    document = Document(\n        id=\"test_doc\",\n        source=DocumentSource.WEB,\n        semantic_identifier=semantic_identifier,\n        metadata={\"tags\": [\"tag1\", \"tag2\"]},\n        doc_updated_at=None,\n        sections=[\n            TextSection(text=short_section_1, link=\"link1\"),\n            TextSection(text=short_section_2, link=\"link2\"),\n            TextSection(text=long_section, link=\"link3\"),\n            TextSection(text=short_section_3, link=\"link4\"),\n            TextSection(text=short_section_4, link=\"link5\"),\n        ],\n    )\n    indexing_documents = process_image_sections([document])\n\n    mock_llm_invoke_count = 0\n    counter_lock = threading.Lock()\n\n    def mock_llm_invoke(\n        *args: Any, **kwargs: Any  # noqa: ARG001\n    ) -> ModelResponse:  # noqa: ARG001\n        nonlocal mock_llm_invoke_count\n        with counter_lock:\n            mock_llm_invoke_count += 1\n        return ModelResponse(\n            id=f\"test-{mock_llm_invoke_count}\",\n            created=\"2024-01-01T00:00:00Z\",\n            choice=Choice(message=Message(content=f\"Test{mock_llm_invoke_count}\")),\n        )\n\n    llm_tokenizer = embedder.embedding_model.tokenizer\n\n    mock_llm = Mock()\n    mock_llm.config.max_input_tokens = get_max_input_tokens(\n        model_provider=LlmProviderNames.OPENAI, model_name=\"gpt-4o\"\n    )\n    mock_llm.invoke = mock_llm_invoke\n\n    chunker = Chunker(\n        tokenizer=embedder.embedding_model.tokenizer,\n        enable_multipass=False,\n        enable_contextual_rag=enable_contextual_rag,\n    )\n    chunks = chunker.chunk(indexing_documents)\n\n    chunks = add_contextual_summaries(\n        chunks=chunks,\n        llm=mock_llm,\n        tokenizer=llm_tokenizer,\n        chunk_token_limit=chunker.chunk_token_limit * 2,\n    )\n\n    assert len(chunks) == 5\n    assert short_section_1 in chunks[0].content\n    assert short_section_3 in chunks[-1].content\n    assert short_section_4 in chunks[-1].content\n    assert \"tag1\" in chunks[0].metadata_suffix_keyword\n    assert \"tag2\" in chunks[0].metadata_suffix_semantic\n\n    doc_summary = \"Test1\" if enable_contextual_rag else \"\"\n    chunk_context = \"\"\n    count = 2\n    for chunk in chunks:\n        if enable_contextual_rag:\n            chunk_context = f\"Test{count}\"\n            count += 1\n        assert chunk.doc_summary == doc_summary\n        assert chunk.chunk_context == chunk_context\n\n\n# ---------------------------------------------------------------------------\n# _apply_document_ingestion_hook\n# ---------------------------------------------------------------------------\n\n_PATCH_EXECUTE_HOOK = \"onyx.indexing.indexing_pipeline.execute_hook\"\n\n\ndef _make_doc(\n    doc_id: str = \"doc1\",\n    sections: list[TextSection | ImageSection] | None = None,\n) -> Document:\n    if sections is None:\n        sections = [TextSection(text=\"Hello\", link=\"http://example.com\")]\n    return Document(\n        id=doc_id,\n        title=\"Test Doc\",\n        semantic_identifier=\"test-doc\",\n        sections=cast(list[TextSection | ImageSection], sections),\n        source=DocumentSource.FILE,\n        metadata={},\n    )\n\n\ndef test_document_ingestion_hook_skipped_passes_through() -> None:\n    doc = _make_doc()\n    with patch(_PATCH_EXECUTE_HOOK, return_value=HookSkipped()):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert result == [doc]\n\n\ndef test_document_ingestion_hook_soft_failed_passes_through() -> None:\n    doc = _make_doc()\n    with patch(_PATCH_EXECUTE_HOOK, return_value=HookSoftFailed()):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert result == [doc]\n\n\ndef test_document_ingestion_hook_none_sections_drops_document() -> None:\n    doc = _make_doc()\n    with patch(\n        _PATCH_EXECUTE_HOOK,\n        return_value=DocumentIngestionResponse(\n            sections=None, rejection_reason=\"PII detected\"\n        ),\n    ):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert result == []\n\n\ndef test_document_ingestion_hook_all_invalid_sections_drops_document() -> None:\n    \"\"\"A non-empty list where every section has neither text nor image_file_id drops the doc.\"\"\"\n    doc = _make_doc()\n    with patch(\n        _PATCH_EXECUTE_HOOK,\n        return_value=DocumentIngestionResponse(sections=[DocumentIngestionSection()]),\n    ):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert result == []\n\n\ndef test_document_ingestion_hook_empty_sections_drops_document() -> None:\n    doc = _make_doc()\n    with patch(\n        _PATCH_EXECUTE_HOOK,\n        return_value=DocumentIngestionResponse(sections=[]),\n    ):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert result == []\n\n\ndef test_document_ingestion_hook_rewrites_text_sections() -> None:\n    doc = _make_doc(sections=[TextSection(text=\"original\", link=\"http://a.com\")])\n    with patch(\n        _PATCH_EXECUTE_HOOK,\n        return_value=DocumentIngestionResponse(\n            sections=[DocumentIngestionSection(text=\"rewritten\", link=\"http://b.com\")]\n        ),\n    ):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert len(result) == 1\n    assert len(result[0].sections) == 1\n    section = result[0].sections[0]\n    assert isinstance(section, TextSection)\n    assert section.text == \"rewritten\"\n    assert section.link == \"http://b.com\"\n\n\ndef test_document_ingestion_hook_preserves_image_section_order() -> None:\n    \"\"\"Hook receives all sections including images and controls final ordering.\"\"\"\n    image = ImageSection(image_file_id=\"img-1\", link=None)\n    doc = _make_doc(\n        sections=cast(\n            list[TextSection | ImageSection],\n            [TextSection(text=\"original\", link=None), image],\n        )\n    )\n    # Hook moves the image before the text section\n    with patch(\n        _PATCH_EXECUTE_HOOK,\n        return_value=DocumentIngestionResponse(\n            sections=[\n                DocumentIngestionSection(image_file_id=\"img-1\", link=None),\n                DocumentIngestionSection(text=\"rewritten\", link=None),\n            ]\n        ),\n    ):\n        result = _apply_document_ingestion_hook([doc], MagicMock())\n    assert len(result) == 1\n    sections = result[0].sections\n    assert len(sections) == 2\n    assert (\n        isinstance(sections[0], ImageSection) and sections[0].image_file_id == \"img-1\"\n    )\n    assert isinstance(sections[1], TextSection) and sections[1].text == \"rewritten\"\n\n\ndef test_document_ingestion_hook_mixed_batch() -> None:\n    \"\"\"Drop one doc, rewrite another, pass through a third.\"\"\"\n    doc_drop = _make_doc(doc_id=\"drop\")\n    doc_rewrite = _make_doc(doc_id=\"rewrite\")\n    doc_skip = _make_doc(doc_id=\"skip\")\n\n    def _side_effect(**kwargs: Any) -> Any:\n        doc_id = kwargs[\"payload\"][\"document_id\"]\n        if doc_id == \"drop\":\n            return DocumentIngestionResponse(sections=None)\n        if doc_id == \"rewrite\":\n            return DocumentIngestionResponse(\n                sections=[DocumentIngestionSection(text=\"new text\", link=None)]\n            )\n        return HookSkipped()\n\n    with patch(_PATCH_EXECUTE_HOOK, side_effect=_side_effect):\n        result = _apply_document_ingestion_hook(\n            [doc_drop, doc_rewrite, doc_skip], MagicMock()\n        )\n\n    assert len(result) == 2\n    ids = {d.id for d in result}\n    assert ids == {\"rewrite\", \"skip\"}\n    rewritten = next(d for d in result if d.id == \"rewrite\")\n    assert isinstance(rewritten.sections[0], TextSection)\n    assert rewritten.sections[0].text == \"new text\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_personas_in_chunks.py",
    "content": "\"\"\"Tests that persona IDs are correctly propagated through the indexing pipeline.\n\nCovers Phase 1 (schema plumbing) and Phase 2 (write at index time) of the\nunify-assistant-project-files plan.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom onyx.access.models import DocumentAccess\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import TextSection\nfrom onyx.indexing.models import ChunkEmbedding\nfrom onyx.indexing.models import DocMetadataAwareIndexChunk\nfrom onyx.indexing.models import IndexChunk\n\n\ndef _make_index_chunk(\n    doc_id: str = \"test-file-id\",\n    content: str = \"test content\",\n) -> IndexChunk:\n    embedding = [0.1] * 10\n    doc = Document(\n        id=doc_id,\n        semantic_identifier=\"test_file.txt\",\n        sections=[TextSection(text=content, link=None)],\n        source=DocumentSource.USER_FILE,\n        metadata={},\n    )\n    return IndexChunk(\n        chunk_id=0,\n        blurb=content[:50],\n        content=content,\n        source_links=None,\n        image_file_id=None,\n        section_continuation=False,\n        source_document=doc,\n        title_prefix=\"\",\n        metadata_suffix_semantic=\"\",\n        metadata_suffix_keyword=\"\",\n        contextual_rag_reserved_tokens=0,\n        doc_summary=\"\",\n        chunk_context=\"\",\n        mini_chunk_texts=None,\n        large_chunk_id=None,\n        embeddings=ChunkEmbedding(\n            full_embedding=embedding,\n            mini_chunk_embeddings=[],\n        ),\n        title_embedding=None,\n    )\n\n\ndef _make_access() -> DocumentAccess:\n    return DocumentAccess.build(\n        user_emails=[\"user@example.com\"],\n        user_groups=[],\n        external_user_emails=[],\n        external_user_group_ids=[],\n        is_public=False,\n    )\n\n\ndef test_from_index_chunk_propagates_personas() -> None:\n    \"\"\"Personas list passed to from_index_chunk appears on the result.\"\"\"\n    chunk = _make_index_chunk()\n    persona_ids = [10, 20, 30]\n\n    aware_chunk = DocMetadataAwareIndexChunk.from_index_chunk(\n        index_chunk=chunk,\n        access=_make_access(),\n        document_sets=set(),\n        user_project=[1],\n        personas=persona_ids,\n        boost=0,\n        aggregated_chunk_boost_factor=1.0,\n        tenant_id=\"test_tenant\",\n    )\n\n    assert aware_chunk.personas == persona_ids\n    assert aware_chunk.user_project == [1]\n\n\ndef test_from_index_chunk_empty_personas() -> None:\n    \"\"\"An empty personas list is preserved (not turned into None or omitted).\"\"\"\n    chunk = _make_index_chunk()\n\n    aware_chunk = DocMetadataAwareIndexChunk.from_index_chunk(\n        index_chunk=chunk,\n        access=_make_access(),\n        document_sets=set(),\n        user_project=[],\n        personas=[],\n        boost=0,\n        aggregated_chunk_boost_factor=1.0,\n        tenant_id=\"test_tenant\",\n    )\n\n    assert aware_chunk.personas == []\n\n\ndef _make_document(doc_id: str) -> Document:\n    return Document(\n        id=doc_id,\n        semantic_identifier=\"test_file.txt\",\n        sections=[TextSection(text=\"test content\", link=None)],\n        source=DocumentSource.USER_FILE,\n        metadata={},\n    )\n\n\ndef _run_adapter_build(\n    file_id: str,\n    project_ids_map: dict[str, list[int]],\n    persona_ids_map: dict[str, list[int]],\n) -> list[DocMetadataAwareIndexChunk]:\n    \"\"\"Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk\n    with all external dependencies mocked.\"\"\"\n    from onyx.indexing.adapters.user_file_indexing_adapter import (\n        UserFileIndexingAdapter,\n    )\n    from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext\n\n    chunk = _make_index_chunk(doc_id=file_id)\n    doc = _make_document(doc_id=file_id)\n\n    context = DocumentBatchPrepareContext(\n        updatable_docs=[doc],\n        id_to_boost_map={},\n    )\n\n    adapter = UserFileIndexingAdapter(tenant_id=\"test_tenant\", db_session=MagicMock())\n\n    with (\n        patch(\n            \"onyx.indexing.adapters.user_file_indexing_adapter.fetch_user_project_ids_for_user_files\",\n            return_value=project_ids_map,\n        ),\n        patch(\n            \"onyx.indexing.adapters.user_file_indexing_adapter.fetch_persona_ids_for_user_files\",\n            return_value=persona_ids_map,\n        ),\n        patch(\n            \"onyx.indexing.adapters.user_file_indexing_adapter.get_access_for_user_files\",\n            return_value={file_id: _make_access()},\n        ),\n        patch(\n            \"onyx.indexing.adapters.user_file_indexing_adapter.fetch_chunk_counts_for_user_files\",\n            return_value=[(file_id, 0)],\n        ),\n        patch(\n            \"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm\",\n            side_effect=Exception(\"no LLM in tests\"),\n        ),\n    ):\n        enricher = adapter.prepare_enrichment(\n            context=context,\n            tenant_id=\"test_tenant\",\n            chunks=[chunk],\n        )\n        return [enricher.enrich_chunk(chunk, 1.0)]\n\n\ndef test_prepare_enrichment_includes_persona_ids() -> None:\n    \"\"\"UserFileIndexingAdapter.prepare_enrichment writes persona IDs\n    fetched from the DB into each chunk's metadata.\"\"\"\n    file_id = str(uuid4())\n    persona_ids = [5, 12]\n    project_ids = [3]\n\n    chunks = _run_adapter_build(\n        file_id=file_id,\n        project_ids_map={file_id: project_ids},\n        persona_ids_map={file_id: persona_ids},\n    )\n\n    assert len(chunks) == 1\n    assert chunks[0].personas == persona_ids\n    assert chunks[0].user_project == project_ids\n\n\ndef test_prepare_enrichment_missing_file_defaults_to_empty() -> None:\n    \"\"\"When a file has no persona or project associations in the DB, the\n    adapter should default to empty lists (not KeyError or None).\"\"\"\n    file_id = str(uuid4())\n\n    chunks = _run_adapter_build(\n        file_id=file_id,\n        project_ids_map={},\n        persona_ids_map={},\n    )\n\n    assert len(chunks) == 1\n    assert chunks[0].personas == []\n    assert chunks[0].user_project == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/indexing/test_vespa.py",
    "content": "from http import HTTPStatus\nfrom typing import Any\n\nimport httpx\nimport pytest\nfrom sqlalchemy.orm import Session\n\nfrom onyx.db.engine.sql_engine import get_sqlalchemy_engine\nfrom onyx.document_index.document_index_utils import get_both_index_properties\nfrom onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT\n\n\n@pytest.mark.skip()\ndef test_vespa_update() -> None:\n    \"\"\"This Test exercises some ambiguous Vespa behavior and\n    shows exactly what happens.\n    \"\"\"\n\n    doc_id = \"test-vespa-update\"\n\n    with Session(get_sqlalchemy_engine()) as db_session:\n        primary_index_name, _, _, _ = get_both_index_properties(db_session)\n        endpoint = (\n            f\"{DOCUMENT_ID_ENDPOINT.format(index_name=primary_index_name)}/{doc_id}\"\n        )\n        with httpx.Client(http2=True) as http_client:\n            payload: dict[str, Any] = {}\n\n            # always delete to set up the test, should always be OK\n            res = http_client.delete(endpoint)\n            assert HTTPStatus.OK == res.status_code\n\n            # Verify the document is not found\n            res = http_client.get(endpoint)\n            assert HTTPStatus.NOT_FOUND == res.status_code\n\n            # Attempt to update a nonexistent test document. Should return OK\n            payload[\"fields\"] = {}\n            payload[\"fields\"][\"title\"] = {\"assign\": \"Best of Bob Dylan\"}\n\n            res = http_client.put(\n                endpoint,\n                headers={\"Content-Type\": \"application/json\"},\n                json=payload,\n            )\n            assert HTTPStatus.OK == res.status_code\n\n            # when we look for it, should be NOT_FOUND\n            res = http_client.get(endpoint)\n            assert HTTPStatus.NOT_FOUND == res.status_code\n\n            # POST/Put new document\n            payload = {}\n            payload[\"fields\"] = {}\n            payload[\"fields\"][\"document_id\"] = doc_id\n            payload[\"fields\"][\"title\"] = \"A Head Full of Dreams\"\n\n            res = http_client.post(\n                endpoint,\n                headers={\"Content-Type\": \"application/json\"},\n                json=payload,\n            )\n            assert HTTPStatus.OK == res.status_code\n\n            # when we look for it, now we should find it\n            res = http_client.get(endpoint)\n            assert HTTPStatus.OK == res.status_code\n            d = res.json()\n\n            assert payload[\"fields\"][\"title\"] == d[\"fields\"][\"title\"]\n\n            # Attempt to update the document that we know exists. Should return OK\n            payload[\"fields\"] = {}\n            payload[\"fields\"][\"title\"] = {\"assign\": \"Remember The Name\"}\n\n            res = http_client.put(\n                endpoint,\n                headers={\"Content-Type\": \"application/json\"},\n                json=payload,\n            )\n            assert HTTPStatus.OK == res.status_code\n\n            # verify the change\n            res = http_client.get(endpoint)\n            assert HTTPStatus.OK == res.status_code\n            d = res.json()\n            assert payload[\"fields\"][\"title\"][\"assign\"] == d[\"fields\"][\"title\"]\n\n            # always delete to clean up the test, should always be OK\n            res = http_client.delete(endpoint)\n            assert HTTPStatus.OK == res.status_code\n\n            # Verify the document is not found\n            res = http_client.get(endpoint)\n            assert HTTPStatus.NOT_FOUND == res.status_code\n"
  },
  {
    "path": "backend/tests/unit/onyx/lazy_handling/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/llm/conftest.py",
    "content": "\"\"\"\nTest configuration for LLM tests.\n\nThis module loads model metadata enrichments before running tests\nso that the model_name_parser has access to the enriched data.\n\"\"\"\n\nfrom collections.abc import Generator\n\nimport pytest\n\nfrom onyx.llm.litellm_singleton.config import load_model_metadata_enrichments\nfrom onyx.llm.model_name_parser import parse_litellm_model_name\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef load_enrichments() -> Generator[None, None, None]:\n    \"\"\"Load model metadata enrichments before any tests run.\"\"\"\n    load_model_metadata_enrichments()\n    # Clear parser cache to ensure fresh lookups\n    parse_litellm_model_name.cache_clear()\n    yield\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_bedrock_token_limit.py",
    "content": "\"\"\"Tests for get_bedrock_token_limit function.\"\"\"\n\nfrom unittest.mock import patch\n\nfrom onyx.llm.utils import get_bedrock_token_limit\n\n\nclass TestGetBedrockTokenLimit:\n    \"\"\"Tests for Bedrock token limit lookup.\"\"\"\n\n    def test_parse_from_model_id_suffix_200k(self) -> None:\n        \"\"\"Test parsing :200k suffix.\"\"\"\n        result = get_bedrock_token_limit(\"anthropic.claude-3-5-sonnet:200k\")\n        assert result == 200000\n\n    def test_parse_from_model_id_suffix_128k(self) -> None:\n        \"\"\"Test parsing :128k suffix.\"\"\"\n        result = get_bedrock_token_limit(\"meta.llama3-70b:128k\")\n        assert result == 128000\n\n    def test_parse_from_model_id_suffix_4k(self) -> None:\n        \"\"\"Test parsing :4k suffix.\"\"\"\n        result = get_bedrock_token_limit(\"some-model:4k\")\n        assert result == 4000\n\n    def test_parse_from_model_id_suffix_1000k(self) -> None:\n        \"\"\"Test parsing :1000k suffix (1M context).\"\"\"\n        result = get_bedrock_token_limit(\"amazon.nova-pro:1000k\")\n        assert result == 1000000\n\n    def test_litellm_lookup_with_bedrock_prefix(self) -> None:\n        \"\"\"Test LiteLLM lookup works with bedrock/ prefix.\"\"\"\n        mock_model_map = {\n            \"bedrock/anthropic.claude-3-5-sonnet\": {\"max_input_tokens\": 200000}\n        }\n        with patch(\"onyx.llm.utils.get_model_map\", return_value=mock_model_map):\n            result = get_bedrock_token_limit(\"anthropic.claude-3-5-sonnet\")\n            assert result == 200000\n\n    def test_litellm_lookup_without_prefix(self) -> None:\n        \"\"\"Test LiteLLM lookup works without bedrock/ prefix.\"\"\"\n        mock_model_map = {\"anthropic.claude-3-sonnet\": {\"max_input_tokens\": 200000}}\n        with patch(\"onyx.llm.utils.get_model_map\", return_value=mock_model_map):\n            result = get_bedrock_token_limit(\"anthropic.claude-3-sonnet\")\n            assert result == 200000\n\n    def test_litellm_max_tokens_fallback(self) -> None:\n        \"\"\"Test fallback to max_tokens when max_input_tokens not present.\"\"\"\n        mock_model_map = {\"bedrock/some-model\": {\"max_tokens\": 32000}}\n        with patch(\"onyx.llm.utils.get_model_map\", return_value=mock_model_map):\n            result = get_bedrock_token_limit(\"some-model\")\n            assert result == 32000\n\n    def test_hardcoded_mapping_claude_3_5(self) -> None:\n        \"\"\"Test hardcoded mapping for Claude 3.5 models.\"\"\"\n        # Mock empty LiteLLM to force mapping lookup\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\n                \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n            )\n            assert result == 200000\n\n    def test_hardcoded_mapping_llama3_3(self) -> None:\n        \"\"\"Test hardcoded mapping for Llama 3.3 models (128K context).\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"meta.llama3-3-70b-instruct-v1:0\")\n            assert result == 128000\n\n    def test_hardcoded_mapping_llama3_70b(self) -> None:\n        \"\"\"Test hardcoded mapping for Llama 3 70B (8K context).\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"meta.llama3-70b-instruct-v1:0\")\n            assert result == 8000\n\n    def test_hardcoded_mapping_nova_pro(self) -> None:\n        \"\"\"Test hardcoded mapping for Nova Pro.\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"amazon.nova-pro-v1:0\")\n            assert result == 300000\n\n    def test_hardcoded_mapping_mistral_large(self) -> None:\n        \"\"\"Test hardcoded mapping for Mistral Large.\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"mistral.mistral-large-2407-v1:0\")\n            assert result == 128000\n\n    def test_default_fallback_unknown_model(self) -> None:\n        \"\"\"Test default fallback for unknown models.\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"unknown.model-v1:0\")\n            # Should fall back to GEN_AI_MODEL_FALLBACK_MAX_TOKENS (32000)\n            assert result == 32000\n\n    def test_cross_region_model_id(self) -> None:\n        \"\"\"Test cross-region model ID (us.anthropic.claude-...).\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\n                \"us.anthropic.claude-3-5-sonnet-20241022-v2:0\"\n            )\n            assert result == 200000\n\n    def test_case_insensitive_matching(self) -> None:\n        \"\"\"Test that matching is case-insensitive.\"\"\"\n        with patch(\"onyx.llm.utils.get_model_map\", return_value={}):\n            result = get_bedrock_token_limit(\"ANTHROPIC.CLAUDE-3-5-SONNET\")\n            assert result == 200000\n\n    def test_suffix_takes_priority_over_litellm(self) -> None:\n        \"\"\"Test that :NNNk suffix takes priority over LiteLLM.\"\"\"\n        mock_model_map = {\"bedrock/model\": {\"max_input_tokens\": 50000}}\n        with patch(\"onyx.llm.utils.get_model_map\", return_value=mock_model_map):\n            # The :100k suffix should be used, not the LiteLLM value\n            result = get_bedrock_token_limit(\"model:100k\")\n            assert result == 100000\n\n    def test_litellm_exception_falls_through(self) -> None:\n        \"\"\"Test that LiteLLM exceptions fall through to mapping.\"\"\"\n        with patch(\n            \"onyx.llm.utils.get_model_map\", side_effect=Exception(\"LiteLLM error\")\n        ):\n            # Should still work via hardcoded mapping\n            result = get_bedrock_token_limit(\"anthropic.claude-3-5-sonnet\")\n            assert result == 200000\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_factory.py",
    "content": "from unittest.mock import patch\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.factory import _build_provider_extra_headers\nfrom onyx.llm.factory import get_llm\nfrom onyx.llm.factory import llm_from_provider\nfrom onyx.llm.well_known_providers.constants import OLLAMA_API_KEY_CONFIG_KEY\nfrom onyx.server.manage.llm.models import LLMProviderView\nfrom onyx.server.manage.llm.models import ModelConfigurationView\n\n\ndef test_build_provider_extra_headers_adds_bearer_for_ollama_api_key() -> None:\n    headers = _build_provider_extra_headers(\n        LlmProviderNames.OLLAMA_CHAT,\n        {OLLAMA_API_KEY_CONFIG_KEY: \"  test-key  \"},\n    )\n\n    assert headers == {\"Authorization\": \"Bearer test-key\"}\n\n\ndef test_build_provider_extra_headers_keeps_existing_bearer_prefix() -> None:\n    headers = _build_provider_extra_headers(\n        LlmProviderNames.OLLAMA_CHAT,\n        {OLLAMA_API_KEY_CONFIG_KEY: \"bearer test-key\"},\n    )\n\n    assert headers == {\"Authorization\": \"bearer test-key\"}\n\n\ndef test_build_provider_extra_headers_ignores_empty_ollama_api_key() -> None:\n    headers = _build_provider_extra_headers(\n        LlmProviderNames.OLLAMA_CHAT,\n        {OLLAMA_API_KEY_CONFIG_KEY: \"   \"},\n    )\n\n    assert headers == {}\n\n\ndef _build_provider_view(\n    provider: str,\n    max_input_tokens: int | None,\n) -> LLMProviderView:\n    return LLMProviderView(\n        id=1,\n        name=\"test-provider\",\n        provider=provider,\n        model_configurations=[\n            ModelConfigurationView(\n                name=\"test-model\",\n                is_visible=True,\n                max_input_tokens=max_input_tokens,\n                supports_image_input=False,\n            )\n        ],\n        api_key=None,\n        api_base=\"http://localhost:11434\",\n        api_version=None,\n        custom_config=None,\n        is_public=True,\n        is_auto_mode=False,\n        groups=[],\n        personas=[],\n        deployment_name=None,\n    )\n\n\ndef test_get_llm_sets_ollama_num_ctx_model_kwarg() -> None:\n    with patch(\"onyx.llm.factory.LitellmLLM\") as mock_litellm_llm:\n        get_llm(\n            provider=LlmProviderNames.OLLAMA_CHAT,\n            model=\"test-model\",\n            deployment_name=None,\n            max_input_tokens=4096,\n            model_kwargs={\"num_ctx\": 8192},\n        )\n\n        kwargs = mock_litellm_llm.call_args.kwargs\n        assert kwargs[\"model_kwargs\"] == {\"num_ctx\": 8192}\n\n\ndef test_get_llm_does_not_set_ollama_num_ctx_for_non_ollama_provider() -> None:\n    with patch(\"onyx.llm.factory.LitellmLLM\") as mock_litellm_llm:\n        get_llm(\n            provider=LlmProviderNames.OPENAI,\n            model=\"gpt-4o-mini\",\n            deployment_name=None,\n            max_input_tokens=4096,\n        )\n\n        kwargs = mock_litellm_llm.call_args.kwargs\n        assert kwargs[\"model_kwargs\"] == {}\n\n\ndef test_llm_from_provider_passes_configured_ollama_num_ctx() -> None:\n    provider = _build_provider_view(\n        provider=LlmProviderNames.OLLAMA_CHAT,\n        max_input_tokens=16384,\n    )\n\n    with patch(\"onyx.llm.factory.get_llm\") as mock_get_llm:\n        llm_from_provider(\n            model_name=\"test-model\",\n            llm_provider=provider,\n        )\n\n        kwargs = mock_get_llm.call_args.kwargs\n        assert kwargs[\"max_input_tokens\"] == 16384\n        assert kwargs[\"model_kwargs\"] == {\"num_ctx\": 16384}\n\n\ndef test_llm_from_provider_omits_ollama_num_ctx_when_model_context_unknown() -> None:\n    provider = _build_provider_view(\n        provider=LlmProviderNames.OLLAMA_CHAT,\n        max_input_tokens=None,\n    )\n\n    with (\n        patch(\n            \"onyx.llm.factory.get_max_input_tokens_from_llm_provider\",\n            return_value=32000,\n        ),\n        patch(\"onyx.llm.factory.get_llm\") as mock_get_llm,\n    ):\n        llm_from_provider(\n            model_name=\"test-model\",\n            llm_provider=provider,\n        )\n\n        kwargs = mock_get_llm.call_args.kwargs\n        assert kwargs[\"max_input_tokens\"] == 32000\n        assert kwargs[\"model_kwargs\"] == {}\n\n\ndef test_llm_from_provider_never_sets_ollama_num_ctx_for_non_ollama_provider() -> None:\n    provider = _build_provider_view(\n        provider=LlmProviderNames.OPENAI,\n        max_input_tokens=16384,\n    )\n\n    with patch(\"onyx.llm.factory.get_llm\") as mock_get_llm:\n        llm_from_provider(\n            model_name=\"test-model\",\n            llm_provider=provider,\n        )\n\n        kwargs = mock_get_llm.call_args.kwargs\n        assert kwargs[\"max_input_tokens\"] == 16384\n        assert kwargs[\"model_kwargs\"] == {}\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_formatting_reenabled.py",
    "content": "from onyx.llm.utils import model_needs_formatting_reenabled\n\n\ndef test_gpt_5_exact_match() -> None:\n    \"\"\"Test that gpt-5 model name exactly matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"gpt-5\") is True\n\n\ndef test_o3_exact_match() -> None:\n    \"\"\"Test that o3 model name exactly matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"o3\") is True\n\n\ndef test_o1_exact_match() -> None:\n    \"\"\"Test that o1 model name exactly matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"o1\") is True\n\n\ndef test_gpt_5_with_provider_prefix() -> None:\n    \"\"\"Test that gpt-5 with provider prefix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/gpt-5\") is True\n\n\ndef test_o3_with_provider_prefix() -> None:\n    \"\"\"Test that o3 with provider prefix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/o3\") is True\n\n\ndef test_o1_with_provider_prefix() -> None:\n    \"\"\"Test that o1 with provider prefix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/o1\") is True\n\n\ndef test_gpt_5_with_suffix() -> None:\n    \"\"\"Test that gpt-5 with suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"gpt-5-preview\") is True\n    assert model_needs_formatting_reenabled(\"gpt-5-mini\") is True\n    assert model_needs_formatting_reenabled(\"gpt-5-turbo\") is True\n\n\ndef test_o3_with_suffix() -> None:\n    \"\"\"Test that o3 with suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"o3-mini\") is True\n    assert model_needs_formatting_reenabled(\"o3-preview\") is True\n    assert model_needs_formatting_reenabled(\"o3-max\") is True\n\n\ndef test_o1_with_suffix() -> None:\n    \"\"\"Test that o1 with suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"o1-preview\") is True\n    assert model_needs_formatting_reenabled(\"o1-mini\") is True\n    assert model_needs_formatting_reenabled(\"o1-max\") is True\n\n\ndef test_gpt_5_with_provider_and_suffix() -> None:\n    \"\"\"Test that gpt-5 with provider prefix and suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/gpt-5-preview\") is True\n    assert model_needs_formatting_reenabled(\"openai/gpt-5-mini\") is True\n\n\ndef test_o3_with_provider_and_suffix() -> None:\n    \"\"\"Test that o3 with provider prefix and suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/o3-mini\") is True\n    assert model_needs_formatting_reenabled(\"openai/o3-preview\") is True\n\n\ndef test_o1_with_provider_and_suffix() -> None:\n    \"\"\"Test that o1 with provider prefix and suffix matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai/o1-preview\") is True\n    assert model_needs_formatting_reenabled(\"openai/o1-mini\") is True\n\n\ndef test_gpt_5_with_space_boundary() -> None:\n    \"\"\"Test that gpt-5 with space boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai gpt-5\") is True\n    assert model_needs_formatting_reenabled(\"gpt-5 preview\") is True\n\n\ndef test_o3_with_space_boundary() -> None:\n    \"\"\"Test that o3 with space boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai o3\") is True\n    assert model_needs_formatting_reenabled(\"o3 mini\") is True\n\n\ndef test_o1_with_space_boundary() -> None:\n    \"\"\"Test that o1 with space boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"openai o1\") is True\n    assert model_needs_formatting_reenabled(\"o1 preview\") is True\n\n\ndef test_gpt_5_with_slash_boundary() -> None:\n    \"\"\"Test that gpt-5 with slash boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"provider/gpt-5\") is True\n    assert model_needs_formatting_reenabled(\"gpt-5/version\") is True\n\n\ndef test_o3_with_slash_boundary() -> None:\n    \"\"\"Test that o3 with slash boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"provider/o3\") is True\n    assert model_needs_formatting_reenabled(\"o3/version\") is True\n\n\ndef test_o1_with_slash_boundary() -> None:\n    \"\"\"Test that o1 with slash boundary matches.\"\"\"\n    assert model_needs_formatting_reenabled(\"provider/o1\") is True\n    assert model_needs_formatting_reenabled(\"o1/version\") is True\n\n\ndef test_gpt_4_does_not_match() -> None:\n    \"\"\"Test that gpt-4 does not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"gpt-4\") is False\n    assert model_needs_formatting_reenabled(\"gpt-4-turbo\") is False\n    assert model_needs_formatting_reenabled(\"gpt-4o\") is False\n    assert model_needs_formatting_reenabled(\"openai/gpt-4\") is False\n\n\ndef test_gpt_3_5_does_not_match() -> None:\n    \"\"\"Test that gpt-3.5-turbo does not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"gpt-3.5-turbo\") is False\n    assert model_needs_formatting_reenabled(\"openai/gpt-3.5-turbo\") is False\n\n\ndef test_o2_does_not_match() -> None:\n    \"\"\"Test that o2 does not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"o2\") is False\n    assert model_needs_formatting_reenabled(\"o2-preview\") is False\n    assert model_needs_formatting_reenabled(\"openai/o2\") is False\n\n\ndef test_o4_does_not_match() -> None:\n    \"\"\"Test that o4 does not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"o4\") is False\n    assert model_needs_formatting_reenabled(\"o4-mini\") is False\n    assert model_needs_formatting_reenabled(\"openai/o4\") is False\n\n\ndef test_other_models_do_not_match() -> None:\n    \"\"\"Test that other common models do not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"claude-3-5-sonnet-20241022\") is False\n    assert model_needs_formatting_reenabled(\"gemini-1.5-pro\") is False\n    assert model_needs_formatting_reenabled(\"llama3.1\") is False\n    assert model_needs_formatting_reenabled(\"mistral-large\") is False\n\n\ndef test_case_sensitivity() -> None:\n    \"\"\"Test that model names are case-sensitive.\"\"\"\n    assert model_needs_formatting_reenabled(\"GPT-5\") is False\n    assert model_needs_formatting_reenabled(\"O3\") is False\n    assert model_needs_formatting_reenabled(\"O1\") is False\n    assert model_needs_formatting_reenabled(\"Gpt-5\") is False\n\n\ndef test_models_with_gpt_5_in_middle() -> None:\n    \"\"\"Test that models containing gpt-5 in the middle match.\"\"\"\n    assert model_needs_formatting_reenabled(\"something-gpt-5-suffix\") is True\n    assert model_needs_formatting_reenabled(\"prefix/gpt-5/suffix\") is True\n\n\ndef test_models_with_o3_in_middle() -> None:\n    \"\"\"Test that models containing o3 in the middle match.\"\"\"\n    assert model_needs_formatting_reenabled(\"something-o3-suffix\") is True\n    assert model_needs_formatting_reenabled(\"prefix/o3/suffix\") is True\n\n\ndef test_models_with_o1_in_middle() -> None:\n    \"\"\"Test that models containing o1 in the middle match.\"\"\"\n    assert model_needs_formatting_reenabled(\"something-o1-suffix\") is True\n    assert model_needs_formatting_reenabled(\"prefix/o1/suffix\") is True\n\n\ndef test_models_that_contain_but_not_match() -> None:\n    \"\"\"Test that models containing the strings but not matching word boundaries do not match.\"\"\"\n    # These should not match because they don't have proper word boundaries\n    assert (\n        model_needs_formatting_reenabled(\"gpt-50\") is False\n    )  # gpt-5 is part of gpt-50\n    assert model_needs_formatting_reenabled(\"o30\") is False  # o3 is part of o30\n    assert model_needs_formatting_reenabled(\"o10\") is False  # o1 is part of o10\n    assert model_needs_formatting_reenabled(\"gpt-51\") is False\n    assert (\n        model_needs_formatting_reenabled(\"somethingo3\") is False\n    )  # no boundary before o3\n    assert (\n        model_needs_formatting_reenabled(\"o3something\") is False\n    )  # no boundary after o3\n\n\ndef test_empty_string() -> None:\n    \"\"\"Test that empty string does not match.\"\"\"\n    assert model_needs_formatting_reenabled(\"\") is False\n\n\ndef test_real_litellm_model_names() -> None:\n    \"\"\"Test with real model names that might appear in litellm.\"\"\"\n    # Based on common patterns from models.litellm.ai\n    assert model_needs_formatting_reenabled(\"openai/gpt-5\") is True\n    assert model_needs_formatting_reenabled(\"openai/o3-mini\") is True\n    assert model_needs_formatting_reenabled(\"openai/o1-preview\") is True\n\n    # These should not match\n    assert model_needs_formatting_reenabled(\"openai/gpt-4o\") is False\n    assert model_needs_formatting_reenabled(\"openai/gpt-4-turbo\") is False\n    assert (\n        model_needs_formatting_reenabled(\"anthropic/claude-3-5-sonnet-20241022\")\n        is False\n    )\n    assert model_needs_formatting_reenabled(\"google/gemini-1.5-pro\") is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_litellm_monkey_patches.py",
    "content": "from typing import Any\n\nfrom litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator\n\nfrom onyx.llm.litellm_singleton.monkey_patches import apply_monkey_patches\n\n_UNSET = object()\n\n\ndef _create_iterator() -> OllamaChatCompletionResponseIterator:\n    apply_monkey_patches()\n    return OllamaChatCompletionResponseIterator(\n        streaming_response=iter(()),\n        sync_stream=True,\n    )\n\n\ndef _build_chunk(\n    *,\n    thinking: object = _UNSET,\n    content: object = _UNSET,\n) -> dict[str, Any]:\n    message: dict[str, Any] = {\"role\": \"assistant\"}\n    if thinking is not _UNSET:\n        message[\"thinking\"] = thinking\n    if content is not _UNSET:\n        message[\"content\"] = content\n\n    return {\n        \"model\": \"llama3.1\",\n        \"message\": message,\n        \"done\": False,\n        \"prompt_eval_count\": 0,\n        \"eval_count\": 0,\n    }\n\n\ndef test_ollama_chunk_parser_transitions_from_native_thinking_to_content() -> None:\n    iterator = _create_iterator()\n\n    thinking_chunk = _build_chunk(thinking=\"Let me think\")\n    content_chunk = _build_chunk(thinking=\"\", content=\"Final answer\")\n\n    thinking_response = iterator.chunk_parser(thinking_chunk)\n    content_response = iterator.chunk_parser(content_chunk)\n\n    assert thinking_response.choices[0].delta.reasoning_content == \"Let me think\"\n    assert thinking_response.choices[0].delta.content is None\n\n    assert getattr(content_response.choices[0].delta, \"reasoning_content\", None) is None\n    assert content_response.choices[0].delta.content == \"Final answer\"\n    assert iterator.finished_reasoning_content is True\n\n\ndef test_ollama_chunk_parser_keeps_tagged_thinking_until_close_tag() -> None:\n    iterator = _create_iterator()\n\n    start_chunk = _build_chunk(content=\"<think>step 1\")\n    middle_chunk = _build_chunk(content=\"step 2\")\n    close_chunk = _build_chunk(content=\"final</think>\")\n\n    start_response = iterator.chunk_parser(start_chunk)\n    middle_response = iterator.chunk_parser(middle_chunk)\n    close_response = iterator.chunk_parser(close_chunk)\n\n    assert start_response.choices[0].delta.reasoning_content == \"step 1\"\n    assert start_response.choices[0].delta.content is None\n\n    assert middle_response.choices[0].delta.reasoning_content == \"step 2\"\n    assert middle_response.choices[0].delta.content is None\n\n    assert getattr(close_response.choices[0].delta, \"reasoning_content\", None) is None\n    assert close_response.choices[0].delta.content == \"final\"\n    assert iterator.finished_reasoning_content is True\n\n\ndef test_ollama_chunk_parser_handles_think_tag_after_native_thinking() -> None:\n    iterator = _create_iterator()\n\n    native_thinking_chunk = _build_chunk(thinking=\"native reasoning\")\n    tagged_thinking_chunk = _build_chunk(content=\"<think>tagged reasoning\")\n\n    iterator.chunk_parser(native_thinking_chunk)\n    tagged_response = iterator.chunk_parser(tagged_thinking_chunk)\n\n    assert tagged_response.choices[0].delta.reasoning_content == \"tagged reasoning\"\n    assert tagged_response.choices[0].delta.content is None\n\n\ndef test_ollama_chunk_parser_preserves_content_when_thinking_and_content_coexist() -> (\n    None\n):\n    iterator = _create_iterator()\n\n    combined_chunk = _build_chunk(\n        thinking=\"Need one thought\",\n        content=\"Visible answer token\",\n    )\n\n    response = iterator.chunk_parser(combined_chunk)\n\n    assert response.choices[0].delta.reasoning_content == \"Need one thought\"\n    assert response.choices[0].delta.content == \"Visible answer token\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_llm_provider_options.py",
    "content": "from datetime import datetime\nfrom datetime import timezone\n\nimport pytest\n\nfrom onyx.llm.well_known_providers.auto_update_models import (\n    LLMProviderRecommendation,\n)\nfrom onyx.llm.well_known_providers.auto_update_models import LLMRecommendations\nfrom onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME\nfrom onyx.llm.well_known_providers.llm_provider_options import (\n    model_configurations_for_provider,\n)\nfrom onyx.llm.well_known_providers.models import SimpleKnownModel\n\n\ndef _build_recommendations(\n    provider_name: str, visible_model_names: list[str]\n) -> LLMRecommendations:\n    return LLMRecommendations(\n        version=\"test\",\n        updated_at=datetime.now(timezone.utc),\n        providers={\n            provider_name: LLMProviderRecommendation(\n                default_model=SimpleKnownModel(name=visible_model_names[0]),\n                additional_visible_models=[\n                    SimpleKnownModel(name=model_name)\n                    for model_name in visible_model_names[1:]\n                ],\n            )\n        },\n    )\n\n\ndef test_model_configurations_vertex_are_sorted_by_name(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.fetch_models_for_provider\",\n        lambda _provider_name: [\"zeta-model\", \"alpha-model\", \"Beta-model\"],\n    )\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.get_max_input_tokens\",\n        lambda _model_name, _provider_name: None,\n    )\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.model_supports_image_input\",\n        lambda _model_name, _provider_name: False,\n    )\n\n    recommendations = _build_recommendations(\n        VERTEXAI_PROVIDER_NAME, [\"gamma-model\", \"alpha-model\"]\n    )\n\n    model_configurations = model_configurations_for_provider(\n        VERTEXAI_PROVIDER_NAME, recommendations\n    )\n\n    assert [model.name for model in model_configurations] == [\n        \"alpha-model\",\n        \"Beta-model\",\n        \"gamma-model\",\n        \"zeta-model\",\n    ]\n    assert [model.is_visible for model in model_configurations] == [\n        True,\n        False,\n        True,\n        False,\n    ]\n\n\ndef test_model_configurations_non_vertex_preserve_provider_order(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.fetch_models_for_provider\",\n        lambda _provider_name: [\"model-b\", \"model-a\"],\n    )\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.get_max_input_tokens\",\n        lambda _model_name, _provider_name: None,\n    )\n    monkeypatch.setattr(\n        \"onyx.llm.well_known_providers.llm_provider_options.model_supports_image_input\",\n        lambda _model_name, _provider_name: False,\n    )\n\n    recommendations = _build_recommendations(\n        OPENAI_PROVIDER_NAME, [\"model-c\", \"model-a\"]\n    )\n\n    model_configurations = model_configurations_for_provider(\n        OPENAI_PROVIDER_NAME, recommendations\n    )\n\n    assert [model.name for model in model_configurations] == [\n        \"model-b\",\n        \"model-a\",\n        \"model-c\",\n    ]\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_model_is_reasoning.py",
    "content": "from onyx.llm.utils import model_is_reasoning_model\n\n\ndef test_model_is_reasoning_model() -> None:\n    \"\"\"Test that reasoning models are correctly identified and non-reasoning models are not\"\"\"\n\n    # Models that should be identified as reasoning models\n    reasoning_models = [\n        (\"o3\", \"openai\"),\n        (\"o3-mini\", \"openai\"),\n        (\"o4-mini\", \"openai\"),\n        (\"deepseek-reasoner\", \"deepseek\"),\n        (\"deepseek-r1\", \"openrouter/deepseek\"),\n        (\"claude-sonnet-4-20250514\", \"anthropic\"),\n    ]\n\n    # Models that should NOT be identified as reasoning models\n    non_reasoning_models = [\n        (\"gpt-4o\", \"openai\"),\n        (\"claude-3-5-sonnet-20240620\", \"anthropic\"),\n    ]\n\n    # Test reasoning models\n    for model_name, provider in reasoning_models:\n        assert (\n            model_is_reasoning_model(model_name, provider) is True\n        ), f\"Expected {provider}/{model_name} to be identified as a reasoning model\"\n\n    # Test non-reasoning models\n    for model_name, provider in non_reasoning_models:\n        assert (\n            model_is_reasoning_model(model_name, provider) is False\n        ), f\"Expected {provider}/{model_name} to NOT be identified as a reasoning model\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_model_map.py",
    "content": "from unittest.mock import patch\n\nimport litellm\n\nfrom onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.utils import find_model_obj\nfrom onyx.llm.utils import get_model_map\n\n\ndef test_partial_match_in_model_map() -> None:\n    \"\"\"\n    We should handle adding/not adding the provider prefix to the model name.\n    \"\"\"\n    get_model_map.cache_clear()\n\n    model_map = get_model_map()\n\n    _EXPECTED_FIELDS = {\n        \"input_cost_per_audio_per_second\": 0,\n        \"input_cost_per_audio_per_second_above_128k_tokens\": 0,\n        \"input_cost_per_character\": 0,\n        \"input_cost_per_character_above_128k_tokens\": 0,\n        \"input_cost_per_image\": 0,\n        \"input_cost_per_image_above_128k_tokens\": 0,\n        \"input_cost_per_token\": 0,\n        \"input_cost_per_token_above_128k_tokens\": 0,\n        \"input_cost_per_video_per_second\": 0,\n        \"input_cost_per_video_per_second_above_128k_tokens\": 0,\n        \"max_input_tokens\": 131072,\n        \"max_output_tokens\": 8192,\n        \"max_tokens\": 8192,\n        \"output_cost_per_character\": 0,\n        \"output_cost_per_character_above_128k_tokens\": 0,\n        \"output_cost_per_token\": 0,\n        \"output_cost_per_token_above_128k_tokens\": 0,\n        \"source\": \"https://aistudio.google.com\",\n        \"supports_audio_output\": False,\n        \"supports_function_calling\": True,\n        \"supports_response_schema\": True,\n        \"supports_system_messages\": False,\n        \"supports_tool_choice\": True,\n        \"supports_vision\": True,\n    }\n\n    result1 = find_model_obj(\n        model_map, LlmProviderNames.OPENAI, \"gemini/gemma-3-27b-it\"\n    )\n    assert result1 is not None\n    for key, value in _EXPECTED_FIELDS.items():\n        assert key in result1\n        assert result1[key] == value, \"Unexpected value for key: {}\".format(key)\n\n    result2 = find_model_obj(model_map, LlmProviderNames.OPENAI, \"gemma-3-27b-it\")\n    assert result2 is not None\n    for key, value in _EXPECTED_FIELDS.items():\n        assert key in result2\n        assert result2[key] == value, \"Unexpected value for key: {}\".format(key)\n\n    get_model_map.cache_clear()\n\n\ndef test_no_overwrite_in_model_map() -> None:\n    \"\"\"Make sure we use the original entry if it exists.\"\"\"\n    # Create a mock model_cost dict with multiple entries for \"onyx-llm\"\n    mock_original_model_cost = {\n        \"gpt-4o\": {\n            \"is_correct\": True,\n        },\n        \"provider/gpt-4o\": {\n            \"is_correct\": False,\n        },\n    }\n\n    with patch.object(litellm, \"model_cost\", mock_original_model_cost):\n        get_model_map.cache_clear()  # Clear the LRU cache to use the patched data\n\n        model_map = get_model_map()\n        result = find_model_obj(model_map, LlmProviderNames.OPENAI, \"gpt-4o\")\n        assert result is not None\n        assert result[\"is_correct\"] is True\n\n    get_model_map.cache_clear()\n\n\ndef test_twelvelabs_pegasus_override_present() -> None:\n    get_model_map.cache_clear()\n    try:\n        model_map = get_model_map()\n        model_obj = find_model_obj(\n            model_map,\n            \"twelvelabs\",\n            \"us.twelvelabs.pegasus-1-2-v1:0\",\n        )\n        assert model_obj is not None\n        assert model_obj[\"max_input_tokens\"] == GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n        assert model_obj[\"max_tokens\"] == GEN_AI_MODEL_FALLBACK_MAX_TOKENS\n        assert model_obj[\"supports_reasoning\"] is False\n    finally:\n        get_model_map.cache_clear()\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_model_name_parser.py",
    "content": "\"\"\"\nUnit tests for LiteLLM model name parser.\n\nTests verify that enrichment data is correctly returned from the parser.\n\"\"\"\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.model_name_parser import parse_litellm_model_name\n\n\ndef test_bedrock_model_with_enrichment() -> None:\n    \"\"\"Test parsing a Bedrock model - provider extracted, metadata from enrichment.\"\"\"\n    result = parse_litellm_model_name(\n        \"bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0\"\n    )\n\n    assert result.raw_name == \"bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0\"\n    assert result.provider == LlmProviderNames.BEDROCK\n    assert result.vendor == LlmProviderNames.ANTHROPIC\n    assert result.display_name == \"Claude Sonnet 3.5\"\n    assert result.provider_display_name == \"Claude (Bedrock - Anthropic)\"\n\n\ndef test_region_extraction() -> None:\n    \"\"\"Test that region prefix is extracted from model key.\"\"\"\n    result = parse_litellm_model_name(\n        \"bedrock/eu.anthropic.claude-3-5-sonnet-20241022-v2:0\"\n    )\n\n    assert result.region == \"eu\"\n    assert result.provider == LlmProviderNames.BEDROCK\n\n\ndef test_direct_provider_inference() -> None:\n    \"\"\"Test that provider is inferred from litellm.model_cost for unprefixed models.\"\"\"\n    result = parse_litellm_model_name(\"gpt-4o\")\n\n    assert result.provider == LlmProviderNames.OPENAI\n    assert result.display_name == \"GPT-4o\"\n    assert result.provider_display_name == \"GPT (OpenAI)\"\n\n\ndef test_unknown_model_fallback() -> None:\n    \"\"\"Test that unknown models get a cleaned-up display name.\"\"\"\n    result = parse_litellm_model_name(\"some-unknown-model-xyz\")\n\n    assert result.raw_name == \"some-unknown-model-xyz\"\n    # Unknown models get title-cased display names\n    assert result.display_name == \"Some Unknown Model Xyz\"\n    assert result.vendor is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_model_response.py",
    "content": "from __future__ import annotations\n\nfrom typing import cast\nfrom typing import TYPE_CHECKING\n\nimport pytest\n\nfrom onyx.llm.model_response import ChatCompletionDeltaToolCall\nfrom onyx.llm.model_response import from_litellm_model_response\nfrom onyx.llm.model_response import from_litellm_model_response_stream\nfrom onyx.llm.model_response import FunctionCall\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import ModelResponseStream\n\nif TYPE_CHECKING:\n    from litellm.types.utils import (\n        ModelResponse as LiteLLMModelResponse,\n        ModelResponseStream as LiteLLMModelResponseStream,\n    )\n\n\nclass _LiteLLMStreamDouble:\n    \"\"\"\n    Lightweight double that mimics the LiteLLM ``ModelResponseStream`` interface\n    used by ``from_litellm_model_response_stream``.\n    \"\"\"\n\n    def __init__(self, payload: dict) -> None:\n        self._payload = payload\n\n    def model_dump(self) -> dict:\n        return self._payload\n\n\nclass _LiteLLMResponseDouble:\n    \"\"\"\n    Lightweight double that mimics the LiteLLM ``ModelResponse`` interface\n    used by ``from_litellm_model_response``.\n    \"\"\"\n\n    def __init__(self, payload: dict) -> None:\n        self._payload = payload\n\n    def model_dump(self) -> dict:\n        return self._payload\n\n\ndef _make_stream_double(payload: dict) -> \"LiteLLMModelResponseStream\":\n    \"\"\"Create a test double for LiteLLM ModelResponseStream.\"\"\"\n    return cast(\"LiteLLMModelResponseStream\", _LiteLLMStreamDouble(payload))\n\n\ndef _make_response_double(payload: dict) -> \"LiteLLMModelResponse\":\n    \"\"\"Create a test double for LiteLLM ModelResponse.\"\"\"\n    return cast(\"LiteLLMModelResponse\", _LiteLLMResponseDouble(payload))\n\n\ndef _build_tool_call_payload() -> dict:\n    return {\n        \"id\": \"chatcmpl-f739f09c-7c9b-4dd6-aea7-cf41d4fd2196\",\n        \"created\": 1762544538,\n        \"model\": \"gpt-5\",\n        \"object\": \"chat.completion.chunk\",\n        \"choices\": [\n            {\n                \"finish_reason\": None,\n                \"index\": 0,\n                \"delta\": {\n                    \"content\": \"\",\n                    \"tool_calls\": [\n                        {\n                            \"id\": None,\n                            \"index\": 0,\n                            \"type\": \"function\",\n                            \"function\": {\n                                \"arguments\": '{\"',\n                                \"name\": None,\n                            },\n                        }\n                    ],\n                },\n            }\n        ],\n    }\n\n\ndef _build_reasoning_payload() -> dict:\n    return {\n        \"id\": \"chatcmpl-c2a25682-5715-4ca2-84a9-061498f79626\",\n        \"created\": 1762544538,\n        \"model\": \"gpt-5\",\n        \"object\": \"chat.completion.chunk\",\n        \"choices\": [\n            {\n                \"finish_reason\": None,\n                \"index\": 0,\n                \"delta\": {\n                    \"reasoning_content\": \" variations\",\n                },\n            }\n        ],\n    }\n\n\ndef _build_finish_reason_payload() -> tuple[dict, dict]:\n    base_chunk = {\n        \"id\": \"chatcmpl-2b136068-c6fb-4af1-97d5-d2c9d84cd52b\",\n        \"created\": 1762544448,\n        \"object\": \"chat.completion.chunk\",\n    }\n\n    content_chunk = base_chunk | {\n        \"choices\": [\n            {\n                \"finish_reason\": None,\n                \"index\": 0,\n                \"delta\": {\n                    \"content\": \"?\",\n                },\n            }\n        ],\n    }\n\n    final_chunk = base_chunk | {\n        \"choices\": [\n            {\n                \"finish_reason\": \"stop\",\n                \"index\": 0,\n                \"delta\": {},\n            }\n        ],\n    }\n\n    return content_chunk, final_chunk\n\n\ndef _build_multiple_tool_calls_payload() -> dict:\n    return {\n        \"id\": \"Yn4SaajROLXEnvgP5JTN-AQ\",\n        \"created\": 1762819684,\n        \"model\": \"gemini-2.5-flash\",\n        \"object\": \"chat.completion.chunk\",\n        \"choices\": [\n            {\n                \"finish_reason\": None,\n                \"index\": 0,\n                \"delta\": {\n                    \"content\": None,\n                    \"tool_calls\": [\n                        {\n                            \"id\": \"call_130bec4755e544ea95f4b1bafd81\",\n                            \"function\": {\n                                \"arguments\": '{\"queries\": [\"new agent framework\"]}',\n                                \"name\": \"internal_search\",\n                            },\n                            \"type\": \"function\",\n                            \"index\": 0,\n                        },\n                        {\n                            \"id\": \"call_42273e8ee5ac4c0a97237d6d25a6\",\n                            \"function\": {\n                                \"arguments\": '{\"queries\": [\"cheese\"]}',\n                                \"name\": \"web_search\",\n                            },\n                            \"type\": \"function\",\n                            \"index\": 1,\n                        },\n                    ],\n                },\n            }\n        ],\n    }\n\n\ndef _build_non_streaming_response_payload() -> dict:\n    return {\n        \"id\": \"chatcmpl-abc123\",\n        \"created\": 1234567890,\n        \"model\": \"gpt-4\",\n        \"object\": \"chat.completion\",\n        \"choices\": [\n            {\n                \"finish_reason\": \"stop\",\n                \"index\": 0,\n                \"message\": {\n                    \"content\": \"Hello, world!\",\n                    \"role\": \"assistant\",\n                },\n            }\n        ],\n    }\n\n\ndef _build_non_streaming_tool_call_payload() -> dict:\n    return {\n        \"id\": \"chatcmpl-xyz789\",\n        \"created\": 9876543210,\n        \"model\": \"gpt-4\",\n        \"object\": \"chat.completion\",\n        \"choices\": [\n            {\n                \"finish_reason\": \"tool_calls\",\n                \"index\": 0,\n                \"message\": {\n                    \"content\": None,\n                    \"role\": \"assistant\",\n                    \"tool_calls\": [\n                        {\n                            \"id\": \"call_abc123\",\n                            \"type\": \"function\",\n                            \"function\": {\n                                \"name\": \"search_documents\",\n                                \"arguments\": '{\"query\": \"test\"}',\n                            },\n                        }\n                    ],\n                },\n            }\n        ],\n    }\n\n\ndef test_from_litellm_model_response_stream_parses_tool_calls() -> None:\n    response = from_litellm_model_response_stream(\n        _make_stream_double(_build_tool_call_payload())\n    )\n\n    assert isinstance(response, ModelResponseStream)\n    assert response.id == \"chatcmpl-f739f09c-7c9b-4dd6-aea7-cf41d4fd2196\"\n    assert response.created == \"1762544538\"\n\n    tool_calls = response.choice.delta.tool_calls\n    assert len(tool_calls) == 1\n    assert tool_calls[0] == ChatCompletionDeltaToolCall(\n        id=None,\n        index=0,\n        type=\"function\",\n        function=FunctionCall(arguments='{\"', name=None),\n    )\n\n\ndef test_from_litellm_model_response_stream_preserves_reasoning_content() -> None:\n    response = from_litellm_model_response_stream(\n        _make_stream_double(_build_reasoning_payload())\n    )\n\n    assert response.choice.delta.content is None\n    assert response.choice.delta.reasoning_content == \" variations\"\n    assert response.choice.finish_reason is None\n\n\n@pytest.mark.parametrize(\"payload\", _build_finish_reason_payload())\ndef test_from_litellm_model_response_stream_handles_content_and_finish_reason(\n    payload: dict,\n) -> None:\n    response = from_litellm_model_response_stream(_make_stream_double(payload))\n\n    assert response.id == \"chatcmpl-2b136068-c6fb-4af1-97d5-d2c9d84cd52b\"\n    assert response.created == \"1762544448\"\n    assert response.choice.index == 0\n    if payload[\"choices\"][0][\"finish_reason\"] == \"stop\":\n        assert response.choice.finish_reason == \"stop\"\n        assert response.choice.delta.content is None\n    else:\n        assert response.choice.finish_reason is None\n        assert response.choice.delta.content == \"?\"\n\n\ndef test_from_litellm_model_response_stream_parses_multiple_tool_calls() -> None:\n    response = from_litellm_model_response_stream(\n        _make_stream_double(_build_multiple_tool_calls_payload())\n    )\n\n    tool_calls = response.choice.delta.tool_calls\n    assert response.id == \"Yn4SaajROLXEnvgP5JTN-AQ\"\n    assert response.created == \"1762819684\"\n    assert response.choice.finish_reason is None\n    assert response.choice.delta.content is None\n    assert len(tool_calls) == 2\n    assert tool_calls[0] == ChatCompletionDeltaToolCall(\n        id=\"call_130bec4755e544ea95f4b1bafd81\",\n        index=0,\n        type=\"function\",\n        function=FunctionCall(\n            arguments='{\"queries\": [\"new agent framework\"]}',\n            name=\"internal_search\",\n        ),\n    )\n    assert tool_calls[1] == ChatCompletionDeltaToolCall(\n        id=\"call_42273e8ee5ac4c0a97237d6d25a6\",\n        index=1,\n        type=\"function\",\n        function=FunctionCall(\n            arguments='{\"queries\": [\"cheese\"]}',\n            name=\"web_search\",\n        ),\n    )\n\n\ndef test_from_litellm_model_response_parses_basic_message() -> None:\n    response = from_litellm_model_response(\n        _make_response_double(_build_non_streaming_response_payload())\n    )\n\n    assert isinstance(response, ModelResponse)\n    assert response.id == \"chatcmpl-abc123\"\n    assert response.created == \"1234567890\"\n    assert response.choice.finish_reason == \"stop\"\n    assert response.choice.message.content == \"Hello, world!\"\n    assert response.choice.message.role == \"assistant\"\n    assert response.choice.message.tool_calls is None\n\n\ndef test_from_litellm_model_response_parses_tool_calls() -> None:\n    response = from_litellm_model_response(\n        _make_response_double(_build_non_streaming_tool_call_payload())\n    )\n\n    assert isinstance(response, ModelResponse)\n    assert response.id == \"chatcmpl-xyz789\"\n    assert response.created == \"9876543210\"\n    assert response.choice.finish_reason == \"tool_calls\"\n    assert response.choice.message.content is None\n    assert response.choice.message.role == \"assistant\"\n    assert response.choice.message.tool_calls is not None\n    assert len(response.choice.message.tool_calls) == 1\n\n    tool_call = response.choice.message.tool_calls[0]\n    assert tool_call.id == \"call_abc123\"\n    assert tool_call.type == \"function\"\n    assert tool_call.function.name == \"search_documents\"\n    assert tool_call.function.arguments == '{\"query\": \"test\"}'\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_multi_llm.py",
    "content": "import os\nimport threading\nimport time\nfrom typing import Any\nfrom unittest.mock import ANY\nfrom unittest.mock import patch\n\nimport litellm\nimport pytest\nfrom litellm.types.utils import ChatCompletionDeltaToolCall\nfrom litellm.types.utils import Delta\nfrom litellm.types.utils import Function as LiteLLMFunction\n\nimport onyx.llm.models\nfrom onyx.configs.app_configs import MOCK_LLM_RESPONSE\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.interfaces import LLMUserIdentity\nfrom onyx.llm.model_response import ModelResponse\nfrom onyx.llm.model_response import ModelResponseStream\nfrom onyx.llm.models import AssistantMessage\nfrom onyx.llm.models import FunctionCall\nfrom onyx.llm.models import LanguageModelInput\nfrom onyx.llm.models import ReasoningEffort\nfrom onyx.llm.models import ToolCall\nfrom onyx.llm.models import UserMessage\nfrom onyx.llm.multi_llm import LitellmLLM\nfrom onyx.llm.utils import get_max_input_tokens\n\nVERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG = [\n    \"claude-opus-4-5@20251101\",\n    \"claude-opus-4-6\",\n]\n\n\ndef _create_delta(\n    role: str | None = None,\n    content: str | None = None,\n    tool_calls: list[ChatCompletionDeltaToolCall] | None = None,\n) -> Delta:\n    delta = Delta(role=role, content=content)\n    # NOTE: for some reason, if you pass tool_calls to the constructor, it doesn't actually\n    # get set, so we have to do it this way\n    delta.tool_calls = tool_calls\n    return delta\n\n\ndef _model_response_to_assistant_message(response: ModelResponse) -> AssistantMessage:\n    \"\"\"Convert a ModelResponse to an AssistantMessage for testing.\"\"\"\n    message = response.choice.message\n    tool_calls = None\n    if message.tool_calls:\n        tool_calls = [\n            ToolCall(\n                id=tc.id,\n                function=FunctionCall(\n                    name=tc.function.name or \"\",\n                    arguments=tc.function.arguments or \"\",\n                ),\n            )\n            for tc in message.tool_calls\n        ]\n    return AssistantMessage(\n        role=\"assistant\",\n        content=message.content,\n        tool_calls=tool_calls,\n    )\n\n\ndef _accumulate_stream_to_assistant_message(\n    stream_chunks: list[ModelResponseStream],\n) -> AssistantMessage:\n    \"\"\"Accumulate streaming deltas into a final AssistantMessage for testing.\"\"\"\n    accumulated_content = \"\"\n    tool_calls_map: dict[int, dict[str, str]] = {}\n\n    for chunk in stream_chunks:\n        delta = chunk.choice.delta\n\n        # Accumulate content\n        if delta.content:\n            accumulated_content += delta.content\n\n        # Accumulate tool calls\n        if delta.tool_calls:\n            for tool_call_delta in delta.tool_calls:\n                index = tool_call_delta.index\n\n                if index not in tool_calls_map:\n                    tool_calls_map[index] = {\n                        \"id\": \"\",\n                        \"name\": \"\",\n                        \"arguments\": \"\",\n                    }\n\n                if tool_call_delta.id:\n                    tool_calls_map[index][\"id\"] = tool_call_delta.id\n\n                if tool_call_delta.function:\n                    if tool_call_delta.function.name:\n                        tool_calls_map[index][\"name\"] = tool_call_delta.function.name\n                    if tool_call_delta.function.arguments:\n                        tool_calls_map[index][\n                            \"arguments\"\n                        ] += tool_call_delta.function.arguments\n\n    # Convert accumulated tool calls to ToolCall list, sorted by index\n    tool_calls = None\n    if tool_calls_map:\n        tool_calls = [\n            ToolCall(\n                type=\"function\",\n                id=tc_data[\"id\"],\n                function=FunctionCall(\n                    name=tc_data[\"name\"],\n                    arguments=tc_data[\"arguments\"],\n                ),\n            )\n            for index in sorted(tool_calls_map.keys())\n            for tc_data in [tool_calls_map[index]]\n            if tc_data[\"id\"] and tc_data[\"name\"]\n        ]\n\n    return AssistantMessage(\n        role=\"assistant\",\n        content=accumulated_content if accumulated_content else None,\n        tool_calls=tool_calls,\n    )\n\n\n@pytest.fixture\ndef default_multi_llm() -> LitellmLLM:\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    return LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n    )\n\n\ndef test_multiple_tool_calls(default_multi_llm: LitellmLLM) -> None:\n    # Mock the litellm.completion function\n    with patch(\"litellm.completion\") as mock_completion:\n        # invoke() internally uses stream=True and reassembles via\n        # stream_chunk_builder, so the mock must return stream chunks.\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(\n                            role=\"assistant\",\n                            tool_calls=[\n                                ChatCompletionDeltaToolCall(\n                                    id=\"call_1\",\n                                    function=LiteLLMFunction(\n                                        name=\"get_weather\",\n                                        arguments='{\"location\": \"New York\"}',\n                                    ),\n                                    type=\"function\",\n                                    index=0,\n                                ),\n                                ChatCompletionDeltaToolCall(\n                                    id=\"call_2\",\n                                    function=LiteLLMFunction(\n                                        name=\"get_time\",\n                                        arguments='{\"timezone\": \"EST\"}',\n                                    ),\n                                    type=\"function\",\n                                    index=1,\n                                ),\n                            ],\n                        ),\n                        finish_reason=\"tool_calls\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        # Define input messages\n        messages: LanguageModelInput = [\n            UserMessage(content=\"What's the weather and time in New York?\")\n        ]\n\n        # Define available tools\n        tools = [\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get the current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\"location\": {\"type\": \"string\"}},\n                        \"required\": [\"location\"],\n                    },\n                },\n            },\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_time\",\n                    \"description\": \"Get the current time for a timezone\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\"timezone\": {\"type\": \"string\"}},\n                        \"required\": [\"timezone\"],\n                    },\n                },\n            },\n        ]\n\n        result = default_multi_llm.invoke(messages, tools)\n\n        # Assert that the result is a ModelResponse\n        assert isinstance(result, ModelResponse)\n\n        # Convert to AssistantMessage for easier assertion\n        assistant_msg = _model_response_to_assistant_message(result)\n\n        # Assert that the content is None (as per the mock response)\n        assert assistant_msg.content is None or assistant_msg.content == \"\"\n\n        # Assert that there are two tool calls\n        assert assistant_msg.tool_calls is not None\n        assert len(assistant_msg.tool_calls) == 2\n\n        # Assert the details of the first tool call\n        assert assistant_msg.tool_calls[0].id == \"call_1\"\n        assert assistant_msg.tool_calls[0].function.name == \"get_weather\"\n        assert (\n            assistant_msg.tool_calls[0].function.arguments == '{\"location\": \"New York\"}'\n        )\n\n        # Assert the details of the second tool call\n        assert assistant_msg.tool_calls[1].id == \"call_2\"\n        assert assistant_msg.tool_calls[1].function.name == \"get_time\"\n        assert assistant_msg.tool_calls[1].function.arguments == '{\"timezone\": \"EST\"}'\n\n        # Verify that litellm.completion was called with the correct arguments\n        mock_completion.assert_called_once_with(\n            model=\"openai/responses/gpt-3.5-turbo\",\n            api_key=\"test_key\",\n            base_url=None,\n            api_version=None,\n            custom_llm_provider=None,\n            messages=[\n                {\"role\": \"user\", \"content\": \"What's the weather and time in New York?\"}\n            ],\n            tools=tools,\n            stream=True,\n            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE\n            timeout=30,\n            max_tokens=None,\n            client=ANY,  # HTTPHandler instance created per-request\n            stream_options={\"include_usage\": True},\n            parallel_tool_calls=True,\n            mock_response=MOCK_LLM_RESPONSE,\n            allowed_openai_params=[\"tool_choice\"],\n        )\n\n\ndef test_multiple_tool_calls_streaming(default_multi_llm: LitellmLLM) -> None:\n    # Mock the litellm.completion function\n    with patch(\"litellm.completion\") as mock_completion:\n        # Create a mock response with multiple tool calls using litellm objects\n        mock_response = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(\n                            role=\"assistant\",\n                            tool_calls=[\n                                ChatCompletionDeltaToolCall(\n                                    id=\"call_1\",\n                                    function=LiteLLMFunction(\n                                        name=\"get_weather\", arguments='{\"location\": '\n                                    ),\n                                    type=\"function\",\n                                    index=0,\n                                )\n                            ],\n                        ),\n                        finish_reason=None,\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(\n                            tool_calls=[\n                                ChatCompletionDeltaToolCall(\n                                    id=\"\",\n                                    function=LiteLLMFunction(arguments='\"New York\"}'),\n                                    type=\"function\",\n                                    index=0,\n                                )\n                            ]\n                        ),\n                        finish_reason=None,\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(\n                            tool_calls=[\n                                ChatCompletionDeltaToolCall(\n                                    id=\"call_2\",\n                                    function=LiteLLMFunction(\n                                        name=\"get_time\", arguments='{\"timezone\": \"EST\"}'\n                                    ),\n                                    type=\"function\",\n                                    index=1,\n                                )\n                            ]\n                        ),\n                        finish_reason=\"tool_calls\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_response\n\n        # Define input messages and tools (same as in the non-streaming test)\n        messages: LanguageModelInput = [\n            UserMessage(content=\"What's the weather and time in New York?\")\n        ]\n\n        tools = [\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get the current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\"location\": {\"type\": \"string\"}},\n                        \"required\": [\"location\"],\n                    },\n                },\n            },\n            {\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_time\",\n                    \"description\": \"Get the current time for a timezone\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\"timezone\": {\"type\": \"string\"}},\n                        \"required\": [\"timezone\"],\n                    },\n                },\n            },\n        ]\n\n        # Call the stream method\n        stream_result = list(default_multi_llm.stream(messages, tools))\n\n        # Assert that we received the correct number of chunks\n        assert len(stream_result) == 3\n\n        # Assert that each chunk is a ModelResponseStream\n        for chunk in stream_result:\n            assert isinstance(chunk, ModelResponseStream)\n\n        # Accumulate the stream chunks into a final AssistantMessage\n        final_result = _accumulate_stream_to_assistant_message(stream_result)\n\n        # Assert that the final result matches our expectations\n        assert isinstance(final_result, AssistantMessage)\n        assert final_result.content is None or final_result.content == \"\"\n        assert final_result.tool_calls is not None\n        assert len(final_result.tool_calls) == 2\n        assert final_result.tool_calls[0].id == \"call_1\"\n        assert final_result.tool_calls[0].function.name == \"get_weather\"\n        assert (\n            final_result.tool_calls[0].function.arguments == '{\"location\": \"New York\"}'\n        )\n        assert final_result.tool_calls[1].id == \"call_2\"\n        assert final_result.tool_calls[1].function.name == \"get_time\"\n        assert final_result.tool_calls[1].function.arguments == '{\"timezone\": \"EST\"}'\n\n        # Verify that litellm.completion was called with the correct arguments\n        mock_completion.assert_called_once_with(\n            model=\"openai/responses/gpt-3.5-turbo\",\n            api_key=\"test_key\",\n            base_url=None,\n            api_version=None,\n            custom_llm_provider=None,\n            messages=[\n                {\"role\": \"user\", \"content\": \"What's the weather and time in New York?\"}\n            ],\n            tools=tools,\n            stream=True,\n            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE\n            timeout=30,\n            max_tokens=None,\n            client=ANY,  # HTTPHandler instance created per-stream\n            stream_options={\"include_usage\": True},\n            parallel_tool_calls=True,\n            mock_response=MOCK_LLM_RESPONSE,\n            allowed_openai_params=[\"tool_choice\"],\n        )\n\n\n@pytest.mark.parametrize(\"model_name\", VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG)\ndef test_vertex_stream_omits_stream_options(model_name: str) -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.VERTEX_AI,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=LlmProviderNames.VERTEX_AI,\n            model_name=model_name,\n        ),\n    )\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_completion.return_value = []\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        list(llm.stream(messages))\n\n        kwargs = mock_completion.call_args.kwargs\n        assert \"stream_options\" not in kwargs\n\n\ndef test_openai_auto_reasoning_effort_maps_to_medium() -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.OPENAI,\n        model_name=\"gpt-5.2\",\n        max_input_tokens=get_max_input_tokens(\n            model_provider=LlmProviderNames.OPENAI,\n            model_name=\"gpt-5.2\",\n        ),\n    )\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.multi_llm.model_is_reasoning_model\", return_value=True),\n        patch(\"onyx.llm.multi_llm.is_true_openai_model\", return_value=True),\n    ):\n        mock_completion.return_value = []\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        list(llm.stream(messages, reasoning_effort=ReasoningEffort.AUTO))\n\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"reasoning\"][\"effort\"] == \"medium\"\n\n\n@pytest.mark.parametrize(\"model_name\", VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG)\ndef test_vertex_opus_omits_reasoning_effort(model_name: str) -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.VERTEX_AI,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=LlmProviderNames.VERTEX_AI,\n            model_name=model_name,\n        ),\n    )\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.multi_llm.model_is_reasoning_model\", return_value=True),\n    ):\n        mock_completion.return_value = []\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        list(llm.stream(messages))\n\n        kwargs = mock_completion.call_args.kwargs\n        assert \"reasoning_effort\" not in kwargs\n\n\ndef test_openai_chat_omits_reasoning_params() -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.OPENAI,\n        model_name=\"gpt-5-chat\",\n        max_input_tokens=get_max_input_tokens(\n            model_provider=LlmProviderNames.OPENAI,\n            model_name=\"gpt-5-chat\",\n        ),\n    )\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\n            \"onyx.llm.multi_llm.model_is_reasoning_model\", return_value=True\n        ) as mock_is_reasoning,\n        patch(\n            \"onyx.llm.multi_llm.is_true_openai_model\", return_value=True\n        ) as mock_is_openai,\n    ):\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-5-chat\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        llm.invoke(messages)\n\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"model\"] == \"openai/responses/gpt-5-chat\"\n        assert \"reasoning\" not in kwargs\n        assert \"reasoning_effort\" not in kwargs\n        assert mock_is_reasoning.called\n        assert mock_is_openai.called\n\n\ndef test_user_identity_metadata_enabled(default_multi_llm: LitellmLLM) -> None:\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", True),\n    ):\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        identity = LLMUserIdentity(user_id=\"user_123\", session_id=\"session_abc\")\n\n        default_multi_llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"user\"] == \"user_123\"\n        assert kwargs[\"metadata\"][\"session_id\"] == \"session_abc\"\n\n\ndef test_user_identity_user_id_truncated_to_64_chars(\n    default_multi_llm: LitellmLLM,\n) -> None:\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", True),\n    ):\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        long_user_id = \"u\" * 82\n        identity = LLMUserIdentity(user_id=long_user_id, session_id=\"session_abc\")\n\n        default_multi_llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"user\"] == long_user_id[:64]\n\n\ndef test_user_identity_metadata_disabled_omits_identity(\n    default_multi_llm: LitellmLLM,\n) -> None:\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", False),\n    ):\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        identity = LLMUserIdentity(user_id=\"user_123\", session_id=\"session_abc\")\n\n        default_multi_llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert \"user\" not in kwargs\n        assert \"metadata\" not in kwargs\n\n\ndef test_existing_metadata_pass_through_when_identity_disabled() -> None:\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n        model_kwargs={\"metadata\": {\"foo\": \"bar\"}},\n    )\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", False),\n    ):\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        identity = LLMUserIdentity(user_id=\"user_123\", session_id=\"session_abc\")\n\n        llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert \"user\" not in kwargs\n        assert kwargs[\"metadata\"][\"foo\"] == \"bar\"\n\n\ndef test_openai_model_invoke_uses_httphandler_client(\n    default_multi_llm: LitellmLLM,\n) -> None:\n    \"\"\"Test that OpenAI models get an HTTPHandler client passed for invoke().\"\"\"\n    from litellm import HTTPHandler\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-3.5-turbo\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        default_multi_llm.invoke(messages)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert isinstance(kwargs[\"client\"], HTTPHandler)\n\n\ndef test_openai_model_stream_uses_httphandler_client(\n    default_multi_llm: LitellmLLM,\n) -> None:\n    \"\"\"Test that OpenAI models get an HTTPHandler client passed for stream().\"\"\"\n    from litellm import HTTPHandler\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_completion.return_value = []\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        list(default_multi_llm.stream(messages))\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert isinstance(kwargs[\"client\"], HTTPHandler)\n\n\ndef test_anthropic_model_passes_no_client() -> None:\n    \"\"\"Test that non-OpenAI models (Anthropic) don't get a client passed.\"\"\"\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.ANTHROPIC,\n        model_name=\"claude-3-opus-20240229\",\n        max_input_tokens=200000,\n    )\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"claude-3-opus-20240229\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        llm.invoke(messages)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"client\"] is None\n\n\ndef test_bedrock_model_passes_no_client() -> None:\n    \"\"\"Test that Bedrock models don't get a client passed.\"\"\"\n    llm = LitellmLLM(\n        api_key=None,\n        timeout=30,\n        model_provider=LlmProviderNames.BEDROCK,\n        model_name=\"anthropic.claude-3-sonnet-20240229-v1:0\",\n        max_input_tokens=200000,\n    )\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"anthropic.claude-3-sonnet-20240229-v1:0\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        llm.invoke(messages)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"client\"] is None\n\n\ndef test_azure_openai_model_uses_httphandler_client() -> None:\n    \"\"\"Test that Azure OpenAI models get an HTTPHandler client passed.\n\n    Azure OpenAI uses the same responses API as OpenAI, so it needs\n    the same HTTPHandler isolation to avoid connection pool conflicts.\n    \"\"\"\n    from litellm import HTTPHandler\n\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=LlmProviderNames.AZURE,\n        model_name=\"gpt-4o\",\n        api_base=\"https://my-resource.openai.azure.com\",\n        api_version=\"2024-02-15-preview\",\n        max_input_tokens=128000,\n    )\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_stream_chunks = [\n            litellm.ModelResponse(\n                id=\"chatcmpl-123\",\n                choices=[\n                    litellm.Choices(\n                        delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                        finish_reason=\"stop\",\n                        index=0,\n                    )\n                ],\n                model=\"gpt-4o\",\n            ),\n        ]\n        mock_completion.return_value = mock_stream_chunks\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        llm.invoke(messages)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert isinstance(kwargs[\"client\"], HTTPHandler)\n\n\ndef test_temporary_env_cleanup(monkeypatch: pytest.MonkeyPatch) -> None:\n    # Assign some environment variables\n    EXPECTED_ENV_VARS = {\n        \"TEST_ENV_VAR\": \"test_value\",\n        \"ANOTHER_ONE\": \"1\",\n        \"THIRD_ONE\": \"2\",\n    }\n\n    CUSTOM_CONFIG = {\n        \"TEST_ENV_VAR\": \"fdsfsdf\",\n        \"ANOTHER_ONE\": \"3\",\n        \"THIS_IS_RANDOM\": \"123213\",\n    }\n\n    for env_var, value in EXPECTED_ENV_VARS.items():\n        monkeypatch.setenv(env_var, value)\n\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n        model_kwargs={\"metadata\": {\"foo\": \"bar\"}},\n        custom_config=CUSTOM_CONFIG,\n    )\n\n    # When custom_config is set, invoke() internally uses stream=True and\n    # reassembles via stream_chunk_builder, so the mock must return stream chunks.\n    mock_stream_chunks = [\n        litellm.ModelResponse(\n            id=\"chatcmpl-123\",\n            choices=[\n                litellm.Choices(\n                    delta=_create_delta(role=\"assistant\", content=\"Hello\"),\n                    finish_reason=\"stop\",\n                    index=0,\n                )\n            ],\n            model=\"gpt-3.5-turbo\",\n        ),\n    ]\n\n    def on_litellm_completion(\n        **kwargs: dict[str, Any],  # noqa: ARG001\n    ) -> list[litellm.ModelResponse]:\n        # Validate that the environment variables are those in custom config\n        for env_var, value in CUSTOM_CONFIG.items():\n            assert env_var in os.environ\n            assert os.environ[env_var] == value\n\n        return mock_stream_chunks\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", False),\n    ):\n        mock_completion.side_effect = on_litellm_completion\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        identity = LLMUserIdentity(user_id=\"user_123\", session_id=\"session_abc\")\n\n        llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"stream\"] is True\n        assert \"user\" not in kwargs\n        assert kwargs[\"metadata\"][\"foo\"] == \"bar\"\n\n        # Check that the environment variables are back to the original values\n        for env_var, value in EXPECTED_ENV_VARS.items():\n            assert env_var in os.environ\n            assert os.environ[env_var] == value\n\n        # Check that temporary env var from CUSTOM_CONFIG is no longer set\n        assert \"THIS_IS_RANDOM\" not in os.environ\n\n\ndef test_temporary_env_cleanup_on_exception(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"Verify env vars are restored even when an exception occurs during LLM invocation.\"\"\"\n    # Assign some environment variables\n    EXPECTED_ENV_VARS = {\n        \"TEST_ENV_VAR\": \"test_value\",\n        \"ANOTHER_ONE\": \"1\",\n        \"THIRD_ONE\": \"2\",\n    }\n\n    CUSTOM_CONFIG = {\n        \"TEST_ENV_VAR\": \"fdsfsdf\",\n        \"ANOTHER_ONE\": \"3\",\n        \"THIS_IS_RANDOM\": \"123213\",\n    }\n\n    for env_var, value in EXPECTED_ENV_VARS.items():\n        monkeypatch.setenv(env_var, value)\n\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n        model_kwargs={\"metadata\": {\"foo\": \"bar\"}},\n        custom_config=CUSTOM_CONFIG,\n    )\n\n    def on_litellm_completion_raises(\n        **kwargs: dict[str, Any],  # noqa: ARG001\n    ) -> None:  # noqa: ARG001\n        # Validate that the environment variables are those in custom config\n        for env_var, value in CUSTOM_CONFIG.items():\n            assert env_var in os.environ\n            assert os.environ[env_var] == value\n\n        # Simulate an error during LLM call\n        raise RuntimeError(\"Simulated LLM API failure\")\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.utils.SEND_USER_METADATA_TO_LLM_PROVIDER\", False),\n    ):\n        mock_completion.side_effect = on_litellm_completion_raises\n\n        messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n        identity = LLMUserIdentity(user_id=\"user_123\", session_id=\"session_abc\")\n\n        with pytest.raises(RuntimeError, match=\"Simulated LLM API failure\"):\n            llm.invoke(messages, user_identity=identity)\n\n        mock_completion.assert_called_once()\n\n        # Check that the environment variables are back to the original values\n        for env_var, value in EXPECTED_ENV_VARS.items():\n            assert env_var in os.environ\n            assert os.environ[env_var] == value\n\n        # Check that temporary env var from CUSTOM_CONFIG is no longer set\n        assert \"THIS_IS_RANDOM\" not in os.environ\n\n\n@pytest.mark.parametrize(\"use_stream\", [False, True], ids=[\"invoke\", \"stream\"])\ndef test_multithreaded_custom_config_isolation(\n    monkeypatch: pytest.MonkeyPatch,\n    use_stream: bool,\n) -> None:\n    \"\"\"Verify the env lock prevents concurrent LLM calls from seeing each other's custom_config.\n\n    Two LitellmLLM instances with different custom_config dicts call invoke/stream\n    concurrently. The _env_lock in temporary_env_and_lock serializes their access so\n    each call only ever sees its own env vars—never the other's.\n    \"\"\"\n    # Ensure these keys start unset\n    monkeypatch.delenv(\"SHARED_KEY\", raising=False)\n    monkeypatch.delenv(\"LLM_A_ONLY\", raising=False)\n    monkeypatch.delenv(\"LLM_B_ONLY\", raising=False)\n\n    CONFIG_A = {\n        \"SHARED_KEY\": \"value_from_A\",\n        \"LLM_A_ONLY\": \"a_secret\",\n    }\n    CONFIG_B = {\n        \"SHARED_KEY\": \"value_from_B\",\n        \"LLM_B_ONLY\": \"b_secret\",\n    }\n\n    all_env_keys = list(set(list(CONFIG_A.keys()) + list(CONFIG_B.keys())))\n\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    llm_a = LitellmLLM(\n        api_key=\"key_a\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n        custom_config=CONFIG_A,\n    )\n    llm_b = LitellmLLM(\n        api_key=\"key_b\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n        custom_config=CONFIG_B,\n    )\n\n    # Both invoke (with custom_config) and stream use stream=True at the\n    # litellm level, so the mock must return stream chunks.\n    mock_stream_chunks = [\n        litellm.ModelResponse(\n            id=\"chatcmpl-123\",\n            choices=[\n                litellm.Choices(\n                    delta=_create_delta(role=\"assistant\", content=\"Hi\"),\n                    finish_reason=\"stop\",\n                    index=0,\n                )\n            ],\n            model=model_name,\n        ),\n    ]\n\n    # Track what each call observed inside litellm.completion.\n    # Keyed by api_key so we can identify which LLM instance made the call.\n    observed_envs: dict[str, dict[str, str | None]] = {}\n\n    def fake_completion(**kwargs: Any) -> list[litellm.ModelResponse]:\n        time.sleep(0.1)  # We expect someone to get caught on the lock\n        api_key = kwargs.get(\"api_key\", \"\")\n        label = \"A\" if api_key == \"key_a\" else \"B\"\n\n        snapshot: dict[str, str | None] = {}\n        for key in all_env_keys:\n            snapshot[key] = os.environ.get(key)\n        observed_envs[label] = snapshot\n\n        return mock_stream_chunks\n\n    errors: list[Exception] = []\n\n    def run_llm(llm: LitellmLLM) -> None:\n        try:\n            messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n            if use_stream:\n                list(llm.stream(messages))\n            else:\n                llm.invoke(messages)\n        except Exception as e:\n            errors.append(e)\n\n    with patch(\"litellm.completion\", side_effect=fake_completion):\n        t_a = threading.Thread(target=run_llm, args=(llm_a,))\n        t_b = threading.Thread(target=run_llm, args=(llm_b,))\n\n        t_a.start()\n        t_b.start()\n        t_a.join(timeout=10)\n        t_b.join(timeout=10)\n\n    assert not errors, f\"Thread errors: {errors}\"\n    assert \"A\" in observed_envs and \"B\" in observed_envs\n\n    # Thread A must have seen its own config for SHARED_KEY, not B's\n    assert observed_envs[\"A\"][\"SHARED_KEY\"] == \"value_from_A\"\n    assert observed_envs[\"A\"][\"LLM_A_ONLY\"] == \"a_secret\"\n    # A must NOT see B's exclusive key\n    assert observed_envs[\"A\"][\"LLM_B_ONLY\"] is None\n\n    # Thread B must have seen its own config for SHARED_KEY, not A's\n    assert observed_envs[\"B\"][\"SHARED_KEY\"] == \"value_from_B\"\n    assert observed_envs[\"B\"][\"LLM_B_ONLY\"] == \"b_secret\"\n    # B must NOT see A's exclusive key\n    assert observed_envs[\"B\"][\"LLM_A_ONLY\"] is None\n\n    # After both calls, env should be clean\n    assert os.environ.get(\"SHARED_KEY\") is None\n    assert os.environ.get(\"LLM_A_ONLY\") is None\n    assert os.environ.get(\"LLM_B_ONLY\") is None\n\n\ndef test_multithreaded_invoke_without_custom_config_skips_env_lock() -> None:\n    \"\"\"Verify that invoke() without custom_config does not acquire the env lock.\n\n    Two LitellmLLM instances without custom_config call invoke concurrently.\n    Both should run with stream=False, never touch the env lock, and complete\n    without blocking each other.\n    \"\"\"\n    from onyx.llm import multi_llm as multi_llm_module\n\n    model_provider = LlmProviderNames.OPENAI\n    model_name = \"gpt-3.5-turbo\"\n\n    llm_a = LitellmLLM(\n        api_key=\"key_a\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n    )\n    llm_b = LitellmLLM(\n        api_key=\"key_b\",\n        timeout=30,\n        model_provider=model_provider,\n        model_name=model_name,\n        max_input_tokens=get_max_input_tokens(\n            model_provider=model_provider,\n            model_name=model_name,\n        ),\n    )\n\n    mock_stream_chunks = [\n        litellm.ModelResponse(\n            id=\"chatcmpl-123\",\n            choices=[\n                litellm.Choices(\n                    delta=_create_delta(role=\"assistant\", content=\"Hi\"),\n                    finish_reason=\"stop\",\n                    index=0,\n                )\n            ],\n            model=model_name,\n        ),\n    ]\n\n    call_kwargs: dict[str, dict[str, Any]] = {}\n\n    def fake_completion(**kwargs: Any) -> list[litellm.ModelResponse]:\n        api_key = kwargs.get(\"api_key\", \"\")\n        label = \"A\" if api_key == \"key_a\" else \"B\"\n        call_kwargs[label] = kwargs\n        return mock_stream_chunks\n\n    errors: list[Exception] = []\n\n    def run_llm(llm: LitellmLLM) -> None:\n        try:\n            messages: LanguageModelInput = [UserMessage(content=\"Hi\")]\n            llm.invoke(messages)\n        except Exception as e:\n            errors.append(e)\n\n    with (\n        patch(\"litellm.completion\", side_effect=fake_completion),\n        patch.object(\n            multi_llm_module,\n            \"temporary_env_and_lock\",\n            wraps=multi_llm_module.temporary_env_and_lock,\n        ) as mock_env_lock,\n    ):\n        t_a = threading.Thread(target=run_llm, args=(llm_a,))\n        t_b = threading.Thread(target=run_llm, args=(llm_b,))\n\n        t_a.start()\n        t_b.start()\n        t_a.join(timeout=10)\n        t_b.join(timeout=10)\n\n    assert not errors, f\"Thread errors: {errors}\"\n    assert \"A\" in call_kwargs and \"B\" in call_kwargs\n\n    # invoke() always uses stream=True internally (reassembles via stream_chunk_builder)\n    assert call_kwargs[\"A\"][\"stream\"] is True\n    assert call_kwargs[\"B\"][\"stream\"] is True\n\n    # The env lock context manager should never have been called\n    mock_env_lock.assert_not_called()\n\n\n# ---- Tests for Bedrock tool content stripping ----\n\n\ndef test_messages_contain_tool_content_with_tool_role() -> None:\n    from onyx.llm.multi_llm import _messages_contain_tool_content\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"I'll search for that.\"},\n        {\"role\": \"tool\", \"content\": \"search results\", \"tool_call_id\": \"tc_1\"},\n    ]\n    assert _messages_contain_tool_content(messages) is True\n\n\ndef test_messages_contain_tool_content_with_tool_calls() -> None:\n    from onyx.llm.multi_llm import _messages_contain_tool_content\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\n            \"role\": \"assistant\",\n            \"content\": None,\n            \"tool_calls\": [\n                {\n                    \"id\": \"tc_1\",\n                    \"type\": \"function\",\n                    \"function\": {\"name\": \"search\", \"arguments\": \"{}\"},\n                }\n            ],\n        },\n    ]\n    assert _messages_contain_tool_content(messages) is True\n\n\ndef test_messages_contain_tool_content_without_tools() -> None:\n    from onyx.llm.multi_llm import _messages_contain_tool_content\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi there!\"},\n    ]\n    assert _messages_contain_tool_content(messages) is False\n\n\ndef test_strip_tool_content_converts_assistant_tool_calls_to_text() -> None:\n    from onyx.llm.multi_llm import _strip_tool_content_from_messages\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"user\", \"content\": \"Search for cats\"},\n        {\n            \"role\": \"assistant\",\n            \"content\": \"Let me search.\",\n            \"tool_calls\": [\n                {\n                    \"id\": \"tc_1\",\n                    \"type\": \"function\",\n                    \"function\": {\n                        \"name\": \"search\",\n                        \"arguments\": '{\"query\": \"cats\"}',\n                    },\n                }\n            ],\n        },\n        {\n            \"role\": \"tool\",\n            \"content\": \"Found 3 results about cats.\",\n            \"tool_call_id\": \"tc_1\",\n        },\n        {\"role\": \"assistant\", \"content\": \"Here are the results.\"},\n    ]\n\n    result = _strip_tool_content_from_messages(messages)\n\n    assert len(result) == 4\n\n    # First message unchanged\n    assert result[0] == {\"role\": \"user\", \"content\": \"Search for cats\"}\n\n    # Assistant with tool calls → plain text\n    assert result[1][\"role\"] == \"assistant\"\n    assert \"tool_calls\" not in result[1]\n    assert \"Let me search.\" in result[1][\"content\"]\n    assert \"[Tool Call]\" in result[1][\"content\"]\n    assert \"search\" in result[1][\"content\"]\n    assert \"tc_1\" in result[1][\"content\"]\n\n    # Tool response → user message\n    assert result[2][\"role\"] == \"user\"\n    assert \"[Tool Result]\" in result[2][\"content\"]\n    assert \"tc_1\" in result[2][\"content\"]\n    assert \"Found 3 results about cats.\" in result[2][\"content\"]\n\n    # Final assistant message unchanged\n    assert result[3] == {\"role\": \"assistant\", \"content\": \"Here are the results.\"}\n\n\ndef test_strip_tool_content_handles_assistant_with_no_text_content() -> None:\n    from onyx.llm.multi_llm import _strip_tool_content_from_messages\n\n    messages: list[dict[str, Any]] = [\n        {\n            \"role\": \"assistant\",\n            \"content\": None,\n            \"tool_calls\": [\n                {\n                    \"id\": \"tc_1\",\n                    \"type\": \"function\",\n                    \"function\": {\"name\": \"search\", \"arguments\": \"{}\"},\n                }\n            ],\n        },\n    ]\n\n    result = _strip_tool_content_from_messages(messages)\n    assert result[0][\"role\"] == \"assistant\"\n    assert \"[Tool Call]\" in result[0][\"content\"]\n    assert \"tool_calls\" not in result[0]\n\n\ndef test_strip_tool_content_passes_through_non_tool_messages() -> None:\n    from onyx.llm.multi_llm import _strip_tool_content_from_messages\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"system\", \"content\": \"You are helpful.\"},\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi!\"},\n    ]\n\n    result = _strip_tool_content_from_messages(messages)\n    assert result == messages\n\n\ndef test_strip_tool_content_handles_list_content_blocks() -> None:\n    from onyx.llm.multi_llm import _strip_tool_content_from_messages\n\n    messages: list[dict[str, Any]] = [\n        {\n            \"role\": \"assistant\",\n            \"content\": [{\"type\": \"text\", \"text\": \"Searching now.\"}],\n            \"tool_calls\": [\n                {\n                    \"id\": \"tc_1\",\n                    \"type\": \"function\",\n                    \"function\": {\"name\": \"search\", \"arguments\": \"{}\"},\n                }\n            ],\n        },\n        {\n            \"role\": \"tool\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"result A\"},\n                {\"type\": \"text\", \"text\": \"result B\"},\n            ],\n            \"tool_call_id\": \"tc_1\",\n        },\n    ]\n\n    result = _strip_tool_content_from_messages(messages)\n\n    # Assistant: list content flattened + tool call appended\n    assert result[0][\"role\"] == \"assistant\"\n    assert \"Searching now.\" in result[0][\"content\"]\n    assert \"[Tool Call]\" in result[0][\"content\"]\n    assert isinstance(result[0][\"content\"], str)\n\n    # Tool: list content flattened into user message\n    assert result[1][\"role\"] == \"user\"\n    assert \"result A\" in result[1][\"content\"]\n    assert \"result B\" in result[1][\"content\"]\n    assert isinstance(result[1][\"content\"], str)\n\n\ndef test_strip_tool_content_merges_consecutive_tool_results() -> None:\n    \"\"\"Bedrock requires strict user/assistant alternation. Multiple parallel\n    tool results must be merged into a single user message.\"\"\"\n    from onyx.llm.multi_llm import _strip_tool_content_from_messages\n\n    messages: list[dict[str, Any]] = [\n        {\"role\": \"user\", \"content\": \"weather and news?\"},\n        {\n            \"role\": \"assistant\",\n            \"content\": None,\n            \"tool_calls\": [\n                {\n                    \"id\": \"tc_1\",\n                    \"type\": \"function\",\n                    \"function\": {\"name\": \"search_weather\", \"arguments\": \"{}\"},\n                },\n                {\n                    \"id\": \"tc_2\",\n                    \"type\": \"function\",\n                    \"function\": {\"name\": \"search_news\", \"arguments\": \"{}\"},\n                },\n            ],\n        },\n        {\"role\": \"tool\", \"content\": \"sunny 72F\", \"tool_call_id\": \"tc_1\"},\n        {\"role\": \"tool\", \"content\": \"headline news\", \"tool_call_id\": \"tc_2\"},\n        {\"role\": \"assistant\", \"content\": \"Here are the results.\"},\n    ]\n\n    result = _strip_tool_content_from_messages(messages)\n\n    # user, assistant (flattened), user (merged tool results), assistant\n    assert len(result) == 4\n    roles = [m[\"role\"] for m in result]\n    assert roles == [\"user\", \"assistant\", \"user\", \"assistant\"]\n\n    # Both tool results merged into one user message\n    merged = result[2][\"content\"]\n    assert \"tc_1\" in merged\n    assert \"sunny 72F\" in merged\n    assert \"tc_2\" in merged\n    assert \"headline news\" in merged\n\n\ndef test_no_tool_choice_sent_when_no_tools(default_multi_llm: LitellmLLM) -> None:\n    \"\"\"Regression test for providers (e.g. Fireworks) that reject tool_choice=null.\n\n    When no tools are provided, tool_choice must not be forwarded to\n    litellm.completion() at all — not even as None.\n    \"\"\"\n    messages: LanguageModelInput = [UserMessage(content=\"Hello!\")]\n\n    mock_stream_chunks = [\n        litellm.ModelResponse(\n            id=\"chatcmpl-123\",\n            choices=[\n                litellm.Choices(\n                    delta=_create_delta(role=\"assistant\", content=\"Hello!\"),\n                    finish_reason=\"stop\",\n                    index=0,\n                )\n            ],\n            model=\"gpt-3.5-turbo\",\n        ),\n    ]\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_completion.return_value = mock_stream_chunks\n\n        default_multi_llm.invoke(messages, tools=None)\n\n        _, kwargs = mock_completion.call_args\n        assert (\n            \"tool_choice\" not in kwargs\n        ), \"tool_choice must not be sent to providers when no tools are provided\"\n\n\ndef test_bifrost_normalizes_api_base_in_model_kwargs() -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        api_base=\"https://bifrost.example.com/\",\n        timeout=30,\n        model_provider=LlmProviderNames.BIFROST,\n        model_name=\"anthropic/claude-sonnet-4-6\",\n        max_input_tokens=32000,\n    )\n\n    assert llm._custom_llm_provider == \"openai\"\n    assert llm._api_base == \"https://bifrost.example.com/v1\"\n    assert llm._model_kwargs[\"api_base\"] == \"https://bifrost.example.com/v1\"\n\n\ndef test_prompt_contains_tool_call_history_true() -> None:\n    from onyx.llm.multi_llm import _prompt_contains_tool_call_history\n\n    messages: LanguageModelInput = [\n        UserMessage(content=\"What's the weather?\"),\n        AssistantMessage(\n            content=None,\n            tool_calls=[\n                ToolCall(\n                    id=\"tc_1\",\n                    function=FunctionCall(name=\"get_weather\", arguments=\"{}\"),\n                )\n            ],\n        ),\n    ]\n    assert _prompt_contains_tool_call_history(messages) is True\n\n\ndef test_prompt_contains_tool_call_history_false_no_tools() -> None:\n    from onyx.llm.multi_llm import _prompt_contains_tool_call_history\n\n    messages: LanguageModelInput = [\n        UserMessage(content=\"Hello\"),\n        AssistantMessage(content=\"Hi there!\"),\n    ]\n    assert _prompt_contains_tool_call_history(messages) is False\n\n\ndef test_prompt_contains_tool_call_history_false_user_only() -> None:\n    from onyx.llm.multi_llm import _prompt_contains_tool_call_history\n\n    messages: LanguageModelInput = [UserMessage(content=\"Hello\")]\n    assert _prompt_contains_tool_call_history(messages) is False\n\n\ndef test_bedrock_claude_drops_thinking_when_thinking_blocks_missing() -> None:\n    \"\"\"When thinking is enabled but assistant messages with tool_calls lack\n    thinking_blocks, the thinking param must be dropped to avoid the Bedrock\n    BadRequestError about missing thinking blocks.\"\"\"\n    llm = LitellmLLM(\n        api_key=None,\n        timeout=30,\n        model_provider=LlmProviderNames.BEDROCK,\n        model_name=\"anthropic.claude-sonnet-4-20250514-v1:0\",\n        max_input_tokens=200000,\n    )\n\n    messages: LanguageModelInput = [\n        UserMessage(content=\"What's the weather?\"),\n        AssistantMessage(\n            content=None,\n            tool_calls=[\n                ToolCall(\n                    id=\"tc_1\",\n                    function=FunctionCall(\n                        name=\"get_weather\",\n                        arguments='{\"city\": \"Paris\"}',\n                    ),\n                )\n            ],\n        ),\n        onyx.llm.models.ToolMessage(\n            content=\"22°C sunny\",\n            tool_call_id=\"tc_1\",\n        ),\n    ]\n\n    tools = [\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"get_weather\",\n                \"description\": \"Get the weather\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\"city\": {\"type\": \"string\"}},\n                },\n            },\n        }\n    ]\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.multi_llm.model_is_reasoning_model\", return_value=True),\n    ):\n        mock_completion.return_value = []\n\n        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))\n\n        kwargs = mock_completion.call_args.kwargs\n        assert \"thinking\" not in kwargs, (\n            \"thinking param should be dropped when thinking_blocks are missing \"\n            \"from assistant messages with tool_calls\"\n        )\n\n\ndef test_bedrock_claude_keeps_thinking_when_no_tool_history() -> None:\n    \"\"\"When thinking is enabled and there are no historical assistant messages\n    with tool_calls, the thinking param should be preserved.\"\"\"\n    llm = LitellmLLM(\n        api_key=None,\n        timeout=30,\n        model_provider=LlmProviderNames.BEDROCK,\n        model_name=\"anthropic.claude-sonnet-4-20250514-v1:0\",\n        max_input_tokens=200000,\n    )\n\n    messages: LanguageModelInput = [\n        UserMessage(content=\"What's the weather?\"),\n    ]\n\n    tools = [\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"get_weather\",\n                \"description\": \"Get the weather\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\"city\": {\"type\": \"string\"}},\n                },\n            },\n        }\n    ]\n\n    with (\n        patch(\"litellm.completion\") as mock_completion,\n        patch(\"onyx.llm.multi_llm.model_is_reasoning_model\", return_value=True),\n    ):\n        mock_completion.return_value = []\n\n        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))\n\n        kwargs = mock_completion.call_args.kwargs\n        assert \"thinking\" in kwargs, (\n            \"thinking param should be preserved when no assistant messages \"\n            \"with tool_calls exist in history\"\n        )\n        assert kwargs[\"thinking\"][\"type\"] == \"enabled\"\n\n\ndef test_bifrost_claude_includes_allowed_openai_params() -> None:\n    llm = LitellmLLM(\n        api_key=\"test_key\",\n        api_base=\"https://bifrost.example.com\",\n        timeout=30,\n        model_provider=LlmProviderNames.BIFROST,\n        model_name=\"anthropic/claude-sonnet-4-6\",\n        max_input_tokens=32000,\n    )\n\n    messages: LanguageModelInput = [UserMessage(content=\"Use a tool if needed\")]\n    tools = [\n        {\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": \"lookup\",\n                \"description\": \"Look up data\",\n                \"parameters\": {\n                    \"type\": \"object\",\n                    \"properties\": {\"query\": {\"type\": \"string\"}},\n                    \"required\": [\"query\"],\n                },\n            },\n        }\n    ]\n    mock_stream_chunks = [\n        litellm.ModelResponse(\n            id=\"chatcmpl-123\",\n            choices=[\n                litellm.Choices(\n                    delta=_create_delta(role=\"assistant\", content=\"Done\"),\n                    finish_reason=\"stop\",\n                    index=0,\n                )\n            ],\n            model=\"anthropic/claude-sonnet-4-6\",\n        ),\n    ]\n\n    with patch(\"litellm.completion\") as mock_completion:\n        mock_completion.return_value = mock_stream_chunks\n\n        llm.invoke(messages, tools=tools)\n\n        kwargs = mock_completion.call_args.kwargs\n        assert kwargs[\"model\"] == \"anthropic/claude-sonnet-4-6\"\n        assert kwargs[\"base_url\"] == \"https://bifrost.example.com/v1\"\n        assert kwargs[\"custom_llm_provider\"] == \"openai\"\n        assert kwargs[\"allowed_openai_params\"] == [\"tool_choice\"]\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_reasoning_effort_mapping.py",
    "content": "from onyx.llm.models import OPENAI_REASONING_EFFORT\nfrom onyx.llm.models import ReasoningEffort\n\n\n# Valid OpenAI reasoning effort values per the API documentation\n# https://platform.openai.com/docs/api-reference/responses\nVALID_OPENAI_REASONING_EFFORT_VALUES = frozenset(\n    {\"none\", \"minimal\", \"low\", \"medium\", \"high\", \"xhigh\"}\n)\n\n\ndef test_openai_reasoning_effort_mapping_has_valid_values() -> None:\n    \"\"\"Test that all OPENAI_REASONING_EFFORT mapping values are valid OpenAI API values.\n\n    This test prevents regressions where invalid values like \"auto\" are passed to the\n    OpenAI API, which would result in a 400 Bad Request error.\n\n    The OpenAI API only accepts: 'none', 'minimal', 'low', 'medium', 'high', 'xhigh'\n    \"\"\"\n    for effort_level, openai_value in OPENAI_REASONING_EFFORT.items():\n        assert openai_value in VALID_OPENAI_REASONING_EFFORT_VALUES, (\n            f\"OPENAI_REASONING_EFFORT[{effort_level}] = '{openai_value}' is not a valid \"\n            f\"OpenAI reasoning effort value. Valid values are: {sorted(VALID_OPENAI_REASONING_EFFORT_VALUES)}\"\n        )\n\n\ndef test_openai_reasoning_effort_mapping_covers_all_effort_levels() -> None:\n    \"\"\"Test that OPENAI_REASONING_EFFORT has mappings for all ReasoningEffort values.\n\n    This ensures we don't accidentally forget to add a mapping when new effort levels are added.\n    Note: ReasoningEffort.OFF maps to \"none\" in the OpenAI API.\n    \"\"\"\n    # These are the effort levels that should have OpenAI mappings\n    expected_effort_levels = {\n        ReasoningEffort.AUTO,\n        ReasoningEffort.OFF,\n        ReasoningEffort.LOW,\n        ReasoningEffort.MEDIUM,\n        ReasoningEffort.HIGH,\n    }\n\n    mapped_effort_levels = set(OPENAI_REASONING_EFFORT.keys())\n\n    assert mapped_effort_levels == expected_effort_levels, (\n        f\"OPENAI_REASONING_EFFORT mapping is missing or has extra effort levels. \"\n        f\"Expected: {expected_effort_levels}, Got: {mapped_effort_levels}\"\n    )\n\n\ndef test_reasoning_effort_auto_does_not_map_to_auto() -> None:\n    \"\"\"Explicitly test that ReasoningEffort.AUTO does not map to the string 'auto'.\n\n    OpenAI's API does not accept 'auto' as a value for reasoning.effort.\n    This test exists as a specific guard against the bug that caused this test file\n    to be created in the first place.\n    \"\"\"\n    assert OPENAI_REASONING_EFFORT[ReasoningEffort.AUTO] != \"auto\", (\n        \"ReasoningEffort.AUTO must not map to 'auto' - OpenAI API rejects this value. \"\n        \"Use a valid default like 'medium' or 'low' instead.\"\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_request_context.py",
    "content": "import contextvars\n\nfrom onyx.llm.request_context import get_llm_mock_response\nfrom onyx.llm.request_context import reset_llm_mock_response\nfrom onyx.llm.request_context import set_llm_mock_response\n\n\ndef test_reset_llm_mock_response_same_context() -> None:\n    token = set_llm_mock_response(\"mock-response\")\n    assert get_llm_mock_response() == \"mock-response\"\n\n    reset_llm_mock_response(token)\n    assert get_llm_mock_response() is None\n\n\ndef test_reset_llm_mock_response_different_context() -> None:\n    foreign_context = contextvars.copy_context()\n    foreign_token = foreign_context.run(set_llm_mock_response, \"foreign-response\")\n\n    set_llm_mock_response(\"current-response\")\n    assert get_llm_mock_response() == \"current-response\"\n\n    # Should not raise even when token came from another context.\n    reset_llm_mock_response(foreign_token)\n    assert get_llm_mock_response() is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_true_openai_model.py",
    "content": "from onyx.llm.constants import LlmProviderNames\nfrom onyx.llm.utils import get_model_map\nfrom onyx.llm.utils import is_true_openai_model\n\n\nclass TestIsTrueOpenAIModel:\n    \"\"\"Tests for the is_true_openai_model function using real LiteLLM model registry.\"\"\"\n\n    def test_real_openai_gpt4(self) -> None:\n        \"\"\"Test that real OpenAI GPT-4 model is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4\") is True\n\n    def test_real_openai_gpt4_turbo(self) -> None:\n        \"\"\"Test that real OpenAI GPT-4-turbo model is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4-turbo\") is True\n\n    def test_real_openai_gpt35_turbo(self) -> None:\n        \"\"\"Test that real OpenAI GPT-3.5-turbo model is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-3.5-turbo\") is True\n\n    def test_real_openai_gpt4o(self) -> None:\n        \"\"\"Test that real OpenAI GPT-4o model is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4o\") is True\n\n    def test_real_openai_gpt4o_mini(self) -> None:\n        \"\"\"Test that real OpenAI GPT-4o-mini model is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4o-mini\") is True\n\n    def test_openai_with_provider_prefix(self) -> None:\n        \"\"\"Test that OpenAI model with provider prefix is correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"openai/gpt-4\") is False\n\n    def test_real_openai_with_date_version(self) -> None:\n        \"\"\"Test that OpenAI model with date version is correctly identified.\"\"\"\n        # Check if this specific dated version exists in the registry\n        model_map = get_model_map()\n        if \"openai/gpt-4-0613\" in model_map:\n            assert is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4-0613\") is True\n\n    def test_non_openai_provider_anthropic(self) -> None:\n        \"\"\"Test that non-OpenAI provider (Anthropic) returns False.\"\"\"\n        assert (\n            is_true_openai_model(\n                LlmProviderNames.ANTHROPIC, \"claude-3-5-sonnet-20241022\"\n            )\n            is False\n        )\n\n    def test_non_openai_provider_gemini(self) -> None:\n        \"\"\"Test that non-OpenAI provider returns False.\"\"\"\n        assert (\n            is_true_openai_model(LlmProviderNames.VERTEX_AI, \"gemini-1.5-pro\") is False\n        )\n\n    def test_non_openai_provider_ollama(self) -> None:\n        \"\"\"Test that Ollama provider returns False.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OLLAMA_CHAT, \"llama3.1\") is False\n\n    def test_openai_compatible_not_in_registry(self) -> None:\n        \"\"\"Test that OpenAI-compatible model not in registry returns False.\"\"\"\n        # Custom model served via vLLM or LiteLLM proxy\n        assert (\n            is_true_openai_model(LlmProviderNames.OPENAI, \"custom-llama-model\") is False\n        )\n\n    def test_openai_compatible_starts_with_o_not_in_registry(self) -> None:\n        \"\"\"Test that model starting with 'o' but not in registry returns False.\"\"\"\n        # This would have returned True with the old implementation\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"ollama-model\") is False\n\n    def test_empty_model_name(self) -> None:\n        \"\"\"Test that empty model name returns False.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"\") is False\n\n    def test_empty_provider(self) -> None:\n        \"\"\"Test that empty provider returns False.\"\"\"\n        assert is_true_openai_model(\"\", \"gpt-4\") is False\n\n    def test_case_sensitivity(self) -> None:\n        \"\"\"Test that model names are case-sensitive.\"\"\"\n        # Model names should be case-sensitive\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"GPT-4\") is False\n\n    def test_none_values_handled(self) -> None:\n        \"\"\"Test that None values are handled gracefully.\"\"\"\n        # Should not crash with None values\n        assert is_true_openai_model(LlmProviderNames.OPENAI, None) is False  # type: ignore\n\n    def test_litellm_proxy_custom_model(self) -> None:\n        \"\"\"Test that custom models via LiteLLM proxy return False.\"\"\"\n        # Custom model name not in OpenAI registry\n        assert is_true_openai_model(LlmProviderNames.OPENAI, \"my-custom-gpt\") is False\n\n    def test_vllm_hosted_model(self) -> None:\n        \"\"\"Test that vLLM-hosted models with OpenAI-compatible API return False.\"\"\"\n        # vLLM hosting a custom model with OpenAI-compatible API\n        assert (\n            is_true_openai_model(LlmProviderNames.OPENAI, \"TheBloke/Llama-2-7B-GPTQ\")\n            is False\n        )\n\n    def test_openrouter_openai_model(self) -> None:\n        \"\"\"Test that OpenRouter proxied OpenAI models return False.\"\"\"\n        # OpenRouter is a proxy service, not true OpenAI\n        assert (\n            is_true_openai_model(LlmProviderNames.OPENROUTER, \"openai/gpt-4\") is False\n        )\n\n    def test_together_ai_model(self) -> None:\n        \"\"\"Test that Together AI models return False.\"\"\"\n        assert is_true_openai_model(\"together_ai\", \"mistralai/Mixtral-8x7B\") is False\n\n    def test_model_with_custom_suffix(self) -> None:\n        \"\"\"Test that models with custom suffixes not in registry return False.\"\"\"\n        # Custom deployment with suffix\n        assert (\n            is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-4-my-deployment\")\n            is False\n        )\n\n    def test_real_openai_text_embedding_models(self) -> None:\n        \"\"\"Test that real OpenAI text-embedding models are correctly identified.\"\"\"\n        # Check if embedding models are in the registry\n        model_map = get_model_map()\n        if \"openai/text-embedding-ada-002\" in model_map:\n            assert (\n                is_true_openai_model(LlmProviderNames.OPENAI, \"text-embedding-ada-002\")\n                is True\n            )\n        if \"openai/text-embedding-3-small\" in model_map:\n            assert (\n                is_true_openai_model(LlmProviderNames.OPENAI, \"text-embedding-3-small\")\n                is True\n            )\n\n    def test_deprecated_openai_models(self) -> None:\n        \"\"\"Test that deprecated but real OpenAI models are still identified correctly.\"\"\"\n        # Check for older models that might still be in registry\n        model_map = get_model_map()\n        if \"openai/gpt-3.5-turbo-instruct\" in model_map:\n            assert (\n                is_true_openai_model(LlmProviderNames.OPENAI, \"gpt-3.5-turbo-instruct\")\n                is True\n            )\n\n    def test_azure_openai_model_through_litellm_proxy(self) -> None:\n        \"\"\"Test that Azure OpenAI models are correctly identified.\"\"\"\n        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"gpt-4\") is True\n        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"gpt-5\") is True\n        assert is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"gpt-5.1\") is True\n\n        assert (\n            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"azure/gpt-4\") is True\n        )\n        assert (\n            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"azure/gpt-5\") is True\n        )\n        assert (\n            is_true_openai_model(LlmProviderNames.LITELLM_PROXY, \"azure/gpt-5.1\")\n            is True\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/llm/test_vision_model_selection_logging.py",
    "content": "\"\"\"\nUnit tests for vision model selection logging in get_default_llm_with_vision.\n\nVerifies that operators get clear feedback about:\n1. Which vision model was selected and why\n2. When the default vision model doesn't support image input\n3. When no vision-capable model exists at all\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.llm.factory import get_default_llm_with_vision\n\n\n_FACTORY = \"onyx.llm.factory\"\n\n\ndef _make_mock_model(\n    *,\n    name: str = \"gpt-4o\",\n    provider: str = \"openai\",\n    provider_id: int = 1,\n    flow_types: list[str] | None = None,\n) -> MagicMock:\n    model = MagicMock()\n    model.name = name\n    model.llm_provider_id = provider_id\n    model.llm_provider.provider = provider\n    model.llm_model_flow_types = flow_types or []\n    return model\n\n\n@patch(f\"{_FACTORY}.get_session_with_current_tenant\")\n@patch(f\"{_FACTORY}.fetch_default_vision_model\")\n@patch(f\"{_FACTORY}.model_supports_image_input\", return_value=True)\n@patch(f\"{_FACTORY}.llm_from_provider\")\n@patch(f\"{_FACTORY}.LLMProviderView\")\n@patch(f\"{_FACTORY}.logger\")\ndef test_logs_when_using_default_vision_model(\n    mock_logger: MagicMock,\n    mock_provider_view: MagicMock,  # noqa: ARG001\n    mock_llm_from: MagicMock,  # noqa: ARG001\n    mock_supports: MagicMock,  # noqa: ARG001\n    mock_fetch_default: MagicMock,\n    mock_session: MagicMock,  # noqa: ARG001\n) -> None:\n    mock_fetch_default.return_value = _make_mock_model(name=\"gpt-4o\", provider=\"azure\")\n\n    get_default_llm_with_vision()\n\n    mock_logger.info.assert_called_once()\n    log_msg = mock_logger.info.call_args[0][0]\n    assert \"default vision model\" in log_msg.lower()\n\n\n@patch(f\"{_FACTORY}.get_session_with_current_tenant\")\n@patch(f\"{_FACTORY}.fetch_default_vision_model\")\n@patch(f\"{_FACTORY}.model_supports_image_input\", return_value=False)\n@patch(f\"{_FACTORY}.fetch_existing_models\", return_value=[])\n@patch(f\"{_FACTORY}.logger\")\ndef test_warns_when_default_model_lacks_vision(\n    mock_logger: MagicMock,\n    mock_fetch_models: MagicMock,  # noqa: ARG001\n    mock_supports: MagicMock,  # noqa: ARG001\n    mock_fetch_default: MagicMock,\n    mock_session: MagicMock,  # noqa: ARG001\n) -> None:\n    mock_fetch_default.return_value = _make_mock_model(\n        name=\"text-only-model\", provider=\"azure\"\n    )\n\n    result = get_default_llm_with_vision()\n\n    assert result is None\n    # Should have warned about the default model not supporting vision\n    warning_calls = [\n        call\n        for call in mock_logger.warning.call_args_list\n        if \"does not support\" in str(call)\n    ]\n    assert len(warning_calls) >= 1\n\n\n@patch(f\"{_FACTORY}.get_session_with_current_tenant\")\n@patch(f\"{_FACTORY}.fetch_default_vision_model\", return_value=None)\n@patch(f\"{_FACTORY}.fetch_existing_models\", return_value=[])\n@patch(f\"{_FACTORY}.logger\")\ndef test_warns_when_no_models_exist(\n    mock_logger: MagicMock,\n    mock_fetch_models: MagicMock,  # noqa: ARG001\n    mock_fetch_default: MagicMock,  # noqa: ARG001\n    mock_session: MagicMock,  # noqa: ARG001\n) -> None:\n    result = get_default_llm_with_vision()\n\n    assert result is None\n    mock_logger.warning.assert_called_once()\n    log_msg = mock_logger.warning.call_args[0][0]\n    assert \"no llm models\" in log_msg.lower()\n\n\n@patch(f\"{_FACTORY}.get_session_with_current_tenant\")\n@patch(f\"{_FACTORY}.fetch_default_vision_model\", return_value=None)\n@patch(f\"{_FACTORY}.fetch_existing_models\")\n@patch(f\"{_FACTORY}.model_supports_image_input\", return_value=False)\n@patch(f\"{_FACTORY}.LLMProviderView\")\n@patch(f\"{_FACTORY}.logger\")\ndef test_warns_when_no_model_supports_vision(\n    mock_logger: MagicMock,\n    mock_provider_view: MagicMock,  # noqa: ARG001\n    mock_supports: MagicMock,  # noqa: ARG001\n    mock_fetch_models: MagicMock,\n    mock_fetch_default: MagicMock,  # noqa: ARG001\n    mock_session: MagicMock,  # noqa: ARG001\n) -> None:\n    mock_fetch_models.return_value = [\n        _make_mock_model(name=\"text-model-1\", provider=\"openai\"),\n        _make_mock_model(name=\"text-model-2\", provider=\"azure\", provider_id=2),\n    ]\n\n    result = get_default_llm_with_vision()\n\n    assert result is None\n    warning_calls = [\n        call\n        for call in mock_logger.warning.call_args_list\n        if \"no vision-capable model\" in str(call).lower()\n    ]\n    assert len(warning_calls) == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/natural_language_processing/test_search_nlp_models.py",
    "content": "from collections.abc import AsyncGenerator\nfrom typing import List\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom httpx import AsyncClient\nfrom litellm.exceptions import RateLimitError\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.natural_language_processing.search_nlp_models import CloudEmbedding\nfrom shared_configs.enums import EmbeddingProvider\nfrom shared_configs.enums import EmbedTextType\n\n\n@pytest.fixture\nasync def mock_http_client() -> AsyncGenerator[AsyncMock, None]:\n    with patch(\"httpx.AsyncClient\") as mock:\n        client = AsyncMock(spec=AsyncClient)\n        mock.return_value = client\n        client.post = AsyncMock()\n        async with client as c:\n            yield c\n\n\n@pytest.fixture\ndef sample_embeddings() -> List[List[float]]:\n    return [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n\n\n@pytest.mark.asyncio\nasync def test_cloud_embedding_context_manager() -> None:\n    async with CloudEmbedding(\"fake-key\", EmbeddingProvider.OPENAI) as embedding:\n        assert not embedding._closed\n    assert embedding._closed\n\n\n@pytest.mark.asyncio\nasync def test_cloud_embedding_explicit_close() -> None:\n    embedding = CloudEmbedding(\"fake-key\", EmbeddingProvider.OPENAI)\n    assert not embedding._closed\n    await embedding.aclose()\n    assert embedding._closed\n\n\n@pytest.mark.asyncio\nasync def test_openai_embedding(\n    mock_http_client: AsyncMock,  # noqa: ARG001\n    sample_embeddings: List[List[float]],\n) -> None:\n    with patch(\"openai.AsyncOpenAI\") as mock_openai:\n        mock_client = AsyncMock()\n        mock_openai.return_value = mock_client\n\n        mock_response = MagicMock()\n        mock_response.data = [MagicMock(embedding=emb) for emb in sample_embeddings]\n        mock_client.embeddings.create = AsyncMock(return_value=mock_response)\n\n        embedding = CloudEmbedding(\"fake-key\", EmbeddingProvider.OPENAI)\n        result = await embedding._embed_openai(\n            [\"test1\", \"test2\"], \"text-embedding-ada-002\", None\n        )\n\n        assert result == sample_embeddings\n        mock_client.embeddings.create.assert_called_once()\n\n\n@pytest.mark.asyncio\nasync def test_rate_limit_handling() -> None:\n    with patch(\n        \"onyx.natural_language_processing.search_nlp_models.CloudEmbedding.embed\"\n    ) as mock_embed:\n        mock_embed.side_effect = RateLimitError(\n            \"Rate limit exceeded\",\n            llm_provider=LlmProviderNames.OPENAI,\n            model=\"fake-model\",\n        )\n\n        embedding = CloudEmbedding(\"fake-key\", EmbeddingProvider.OPENAI)\n\n        with pytest.raises(RateLimitError):\n            await embedding.embed(\n                texts=[\"test\"],\n                model_name=\"fake-model\",\n                text_type=EmbedTextType.QUERY,\n            )\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/conftest.py",
    "content": "\"\"\"Fixtures for Discord bot unit tests.\"\"\"\n\nimport random\nfrom collections.abc import Callable\nfrom typing import Any\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport discord\nimport pytest\n\n\nclass AsyncIteratorMock:\n    \"\"\"Helper class to mock async iterators like channel.history().\"\"\"\n\n    def __init__(self, items: list[Any]) -> None:\n        self.items = items\n        self.index = 0\n\n    def __aiter__(self) -> \"AsyncIteratorMock\":\n        return self\n\n    async def __anext__(self) -> Any:\n        if self.index >= len(self.items):\n            raise StopAsyncIteration\n        item = self.items[self.index]\n        self.index += 1\n        return item\n\n\ndef mock_message(\n    content: str = \"Test message\",\n    author_bot: bool = False,\n    message_type: discord.MessageType = discord.MessageType.default,\n    reference: MagicMock | None = None,\n    message_id: int | None = None,\n    author_id: int | None = None,\n    author_display_name: str | None = None,\n) -> MagicMock:\n    \"\"\"Helper to create mock Discord messages.\"\"\"\n    msg = MagicMock(spec=discord.Message)\n    msg.id = message_id or random.randint(100000, 999999)\n    msg.content = content\n    msg.author = MagicMock()\n    msg.author.id = author_id or random.randint(100000, 999999)\n    msg.author.bot = author_bot\n    msg.author.display_name = author_display_name or (\"Bot\" if author_bot else \"User\")\n    msg.type = message_type\n    msg.reference = reference\n    msg.mentions = []\n    msg.role_mentions = []\n    msg.channel_mentions = []\n    return msg\n\n\n@pytest.fixture\ndef mock_bot_user() -> MagicMock:\n    \"\"\"Mock Discord bot user.\"\"\"\n    user = MagicMock(spec=discord.ClientUser)\n    user.id = 123456789\n    user.display_name = \"OnyxBot\"\n    user.bot = True\n    return user\n\n\n@pytest.fixture\ndef mock_discord_guild() -> MagicMock:\n    \"\"\"Mock Discord guild with channels.\"\"\"\n    guild = MagicMock(spec=discord.Guild)\n    guild.id = 987654321\n    guild.name = \"Test Server\"\n    guild.default_role = MagicMock()\n\n    # Create some mock channels\n    text_channel = MagicMock(spec=discord.TextChannel)\n    text_channel.id = 111111111\n    text_channel.name = \"general\"\n    text_channel.type = discord.ChannelType.text\n    perms = MagicMock()\n    perms.view_channel = True\n    text_channel.permissions_for.return_value = perms\n\n    forum_channel = MagicMock(spec=discord.ForumChannel)\n    forum_channel.id = 222222222\n    forum_channel.name = \"forum\"\n    forum_channel.type = discord.ChannelType.forum\n    forum_channel.permissions_for.return_value = perms\n\n    guild.channels = [text_channel, forum_channel]\n    guild.text_channels = [text_channel]\n    guild.forum_channels = [forum_channel]\n\n    return guild\n\n\n@pytest.fixture\ndef mock_discord_message(mock_bot_user: MagicMock) -> MagicMock:  # noqa: ARG001\n    \"\"\"Mock Discord message for testing.\"\"\"\n    msg = MagicMock(spec=discord.Message)\n    msg.id = 555555555\n    msg.author = MagicMock()\n    msg.author.id = 444444444\n    msg.author.bot = False\n    msg.author.display_name = \"TestUser\"\n    msg.content = \"Hello bot\"\n    msg.guild = MagicMock()\n    msg.guild.id = 987654321\n    msg.guild.name = \"Test Server\"\n    msg.channel = MagicMock()\n    msg.channel.id = 111111111\n    msg.channel.name = \"general\"\n    msg.type = discord.MessageType.default\n    msg.mentions = []\n    msg.role_mentions = []\n    msg.channel_mentions = []\n    msg.reference = None\n    return msg\n\n\n@pytest.fixture\ndef mock_thread_with_messages(mock_bot_user: MagicMock) -> MagicMock:\n    \"\"\"Mock Discord thread with message history.\"\"\"\n    thread = MagicMock(spec=discord.Thread)\n    thread.id = 666666666\n    thread.name = \"Test Thread\"\n    thread.owner_id = mock_bot_user.id\n    thread.parent = MagicMock(spec=discord.TextChannel)\n    thread.parent.id = 111111111\n\n    # Mock starter message\n    starter = mock_message(\n        content=\"Thread starter message\",\n        author_bot=False,\n        message_id=thread.id,\n    )\n\n    messages = [\n        mock_message(author_bot=False, content=\"User msg 1\", message_id=100),\n        mock_message(author_bot=True, content=\"Bot response\", message_id=101),\n        mock_message(author_bot=False, content=\"User msg 2\", message_id=102),\n    ]\n\n    # Setup async iterator for history\n    def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n        return AsyncIteratorMock(messages)\n\n    thread.history = history\n\n    # Mock parent.fetch_message\n    async def fetch_starter(msg_id: int) -> MagicMock:\n        if msg_id == thread.id:\n            return starter\n        raise discord.NotFound(MagicMock(), \"Not found\")\n\n    thread.parent.fetch_message = AsyncMock(side_effect=fetch_starter)\n\n    return thread\n\n\n@pytest.fixture\ndef mock_thread_forum_parent() -> MagicMock:\n    \"\"\"Mock thread with ForumChannel parent (special case).\"\"\"\n    thread = MagicMock(spec=discord.Thread)\n    thread.id = 777777777\n    thread.name = \"Forum Post\"\n    thread.parent = MagicMock(spec=discord.ForumChannel)\n    thread.parent.id = 222222222\n    return thread\n\n\n@pytest.fixture\ndef mock_reply_chain() -> MagicMock:\n    \"\"\"Mock message with reply chain.\"\"\"\n    # Build chain backwards: msg3 -> msg2 -> msg1\n    ref3 = MagicMock()\n    ref3.message_id = 1003\n\n    ref2 = MagicMock()\n    ref2.message_id = 1002\n\n    msg3 = mock_message(content=\"Third message\", reference=None, message_id=1003)\n    msg2 = mock_message(content=\"Second message\", reference=ref3, message_id=1002)\n    msg1 = mock_message(content=\"First message\", reference=ref2, message_id=1001)\n\n    # Store messages for lookup\n    msg1._chain = {1002: msg2, 1003: msg3}\n    msg2._chain = {1003: msg3}\n\n    return msg1\n\n\n@pytest.fixture\ndef mock_guild_config_enabled() -> MagicMock:\n    \"\"\"Guild config that is enabled.\"\"\"\n    config = MagicMock()\n    config.id = 1\n    config.guild_id = 987654321\n    config.enabled = True\n    config.default_persona_id = 1\n    return config\n\n\n@pytest.fixture\ndef mock_guild_config_disabled() -> MagicMock:\n    \"\"\"Guild config that is disabled.\"\"\"\n    config = MagicMock()\n    config.id = 2\n    config.guild_id = 987654321\n    config.enabled = False\n    config.default_persona_id = None\n    return config\n\n\n@pytest.fixture\ndef mock_channel_config_factory() -> Callable[..., MagicMock]:\n    \"\"\"Factory fixture for creating channel configs with various settings.\"\"\"\n\n    def _make_config(\n        enabled: bool = True,\n        require_bot_invocation: bool = True,\n        thread_only_mode: bool = False,\n        persona_override_id: int | None = None,\n    ) -> MagicMock:\n        config = MagicMock()\n        config.id = random.randint(1, 1000)\n        config.channel_id = 111111111\n        config.enabled = enabled\n        config.require_bot_invocation = require_bot_invocation\n        config.thread_only_mode = thread_only_mode\n        config.persona_override_id = persona_override_id\n        return config\n\n    return _make_config\n\n\n@pytest.fixture\ndef mock_message_with_bot_mention(mock_bot_user: MagicMock) -> MagicMock:\n    \"\"\"Message that mentions the bot.\"\"\"\n    msg = MagicMock(spec=discord.Message)\n    msg.id = 888888888\n    msg.mentions = [mock_bot_user]\n    msg.author = MagicMock()\n    msg.author.id = 444444444\n    msg.author.bot = False\n    msg.author.display_name = \"TestUser\"\n    msg.type = discord.MessageType.default\n    msg.content = f\"<@{mock_bot_user.id}> hello\"\n    msg.reference = None\n    msg.guild = MagicMock()\n    msg.guild.id = 987654321\n    msg.channel = MagicMock()\n    msg.channel.id = 111111111\n    msg.role_mentions = []\n    msg.channel_mentions = []\n    return msg\n\n\n@pytest.fixture\ndef mock_guild_with_members() -> MagicMock:\n    \"\"\"Mock guild for mention resolution.\"\"\"\n    guild = MagicMock(spec=discord.Guild)\n\n    def get_member(member_id: int) -> MagicMock:\n        member = MagicMock()\n        member.display_name = f\"User{member_id}\"\n        return member\n\n    def get_role(role_id: int) -> MagicMock:\n        role = MagicMock()\n        role.name = f\"Role{role_id}\"\n        return role\n\n    def get_channel(channel_id: int) -> MagicMock:\n        channel = MagicMock()\n        channel.name = f\"channel{channel_id}\"\n        return channel\n\n    guild.get_member = get_member\n    guild.get_role = get_role\n    guild.get_channel = get_channel\n    return guild\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_api_client.py",
    "content": "\"\"\"Unit tests for Discord bot API client.\n\nTests for OnyxAPIClient class functionality.\n\"\"\"\n\nfrom typing import Any\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport aiohttp\nimport pytest\n\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.onyxbot.discord.api_client import OnyxAPIClient\nfrom onyx.onyxbot.discord.constants import API_REQUEST_TIMEOUT\nfrom onyx.onyxbot.discord.exceptions import APIConnectionError\nfrom onyx.onyxbot.discord.exceptions import APIResponseError\nfrom onyx.onyxbot.discord.exceptions import APITimeoutError\n\n\nclass MockAsyncContextManager:\n    \"\"\"Helper class to create proper async context managers for testing.\"\"\"\n\n    def __init__(\n        self, return_value: Any = None, enter_side_effect: Exception | None = None\n    ) -> None:\n        self.return_value = return_value\n        self.enter_side_effect = enter_side_effect\n\n    async def __aenter__(self) -> Any:\n        if self.enter_side_effect:\n            raise self.enter_side_effect\n        return self.return_value\n\n    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:\n        pass\n\n\nclass TestClientLifecycle:\n    \"\"\"Tests for API client lifecycle management.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_initialize_creates_session(self) -> None:\n        \"\"\"initialize() creates aiohttp session.\"\"\"\n        client = OnyxAPIClient()\n        assert client._session is None\n\n        with patch(\"aiohttp.ClientSession\") as mock_session_class:\n            mock_session = MagicMock()\n            mock_session_class.return_value = mock_session\n\n            await client.initialize()\n\n        assert client._session is not None\n        mock_session_class.assert_called_once()\n\n    def test_is_initialized_before_init(self) -> None:\n        \"\"\"is_initialized returns False before initialize().\"\"\"\n        client = OnyxAPIClient()\n        assert client.is_initialized is False\n\n    @pytest.mark.asyncio\n    async def test_is_initialized_after_init(self) -> None:\n        \"\"\"is_initialized returns True after initialize().\"\"\"\n        client = OnyxAPIClient()\n\n        with patch(\"aiohttp.ClientSession\"):\n            await client.initialize()\n\n        assert client.is_initialized is True\n\n    @pytest.mark.asyncio\n    async def test_close_closes_session(self) -> None:\n        \"\"\"close() closes session and resets is_initialized.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_session = AsyncMock()\n        with patch(\"aiohttp.ClientSession\", return_value=mock_session):\n            await client.initialize()\n            assert client.is_initialized is True\n\n            await client.close()\n\n        assert client.is_initialized is False\n        mock_session.close.assert_called_once()\n\n    @pytest.mark.asyncio\n    async def test_send_message_not_initialized(self) -> None:\n        \"\"\"send_chat_message() before initialize() raises APIConnectionError.\"\"\"\n        client = OnyxAPIClient()\n\n        with pytest.raises(APIConnectionError) as exc_info:\n            await client.send_chat_message(\"test\", \"api_key\")\n\n        assert \"not initialized\" in str(exc_info.value)\n\n\nclass TestSendChatMessage:\n    \"\"\"Tests for send_chat_message functionality.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_send_message_success(self) -> None:\n        \"\"\"Valid request returns ChatFullResponse.\"\"\"\n        client = OnyxAPIClient()\n\n        response_data = {\n            \"answer\": \"Test response\",\n            \"citations\": [],\n            \"error_msg\": None,\n        }\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n        mock_response.json = AsyncMock(return_value=response_data)\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        with patch.object(\n            ChatFullResponse,\n            \"model_validate\",\n            return_value=MagicMock(answer=\"Test response\", error_msg=None),\n        ):\n            result = await client.send_chat_message(\"Hello\", \"api_key_123\")\n\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_send_message_with_persona(self) -> None:\n        \"\"\"persona_id is passed to API.\"\"\"\n        client = OnyxAPIClient()\n\n        response_data = {\"answer\": \"Response\", \"citations\": [], \"error_msg\": None}\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n        mock_response.json = AsyncMock(return_value=response_data)\n\n        mock_session = MagicMock()\n        mock_post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n        mock_session.post = mock_post\n\n        client._session = mock_session\n\n        with patch.object(\n            ChatFullResponse,\n            \"model_validate\",\n            return_value=MagicMock(answer=\"Response\", error_msg=None),\n        ):\n            await client.send_chat_message(\"Hello\", \"api_key\", persona_id=5)\n\n        # Verify persona was included in request\n        call_args = mock_post.call_args\n        json_data = call_args.kwargs.get(\"json\") or call_args[1].get(\"json\")\n        assert json_data is not None\n\n    @pytest.mark.asyncio\n    async def test_send_message_401_error(self) -> None:\n        \"\"\"Invalid API key returns APIResponseError with 401.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 401\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(APIResponseError) as exc_info:\n            await client.send_chat_message(\"Hello\", \"bad_key\")\n\n        assert exc_info.value.status_code == 401\n\n    @pytest.mark.asyncio\n    async def test_send_message_403_error(self) -> None:\n        \"\"\"Persona not accessible returns APIResponseError with 403.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 403\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(APIResponseError) as exc_info:\n            await client.send_chat_message(\"Hello\", \"api_key\", persona_id=999)\n\n        assert exc_info.value.status_code == 403\n\n    @pytest.mark.asyncio\n    async def test_send_message_timeout(self) -> None:\n        \"\"\"Request timeout raises APITimeoutError.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(\n                enter_side_effect=TimeoutError(\"Timeout\")\n            )\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(APITimeoutError):\n            await client.send_chat_message(\"Hello\", \"api_key\")\n\n    @pytest.mark.asyncio\n    async def test_send_message_connection_error(self) -> None:\n        \"\"\"Network failure raises APIConnectionError.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(\n                enter_side_effect=aiohttp.ClientConnectorError(\n                    MagicMock(), OSError(\"Connection refused\")\n                )\n            )\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(APIConnectionError):\n            await client.send_chat_message(\"Hello\", \"api_key\")\n\n    @pytest.mark.asyncio\n    async def test_send_message_server_error(self) -> None:\n        \"\"\"500 response raises APIResponseError with 500.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 500\n        mock_response.text = AsyncMock(return_value=\"Internal Server Error\")\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(APIResponseError) as exc_info:\n            await client.send_chat_message(\"Hello\", \"api_key\")\n\n        assert exc_info.value.status_code == 500\n\n\nclass TestHealthCheck:\n    \"\"\"Tests for health_check functionality.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_health_check_success(self) -> None:\n        \"\"\"Server healthy returns True.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n\n        mock_session = MagicMock()\n        mock_session.get = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        result = await client.health_check()\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_health_check_failure(self) -> None:\n        \"\"\"Server unhealthy returns False.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 503\n\n        mock_session = MagicMock()\n        mock_session.get = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        result = await client.health_check()\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_health_check_timeout(self) -> None:\n        \"\"\"Request times out returns False.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_session = MagicMock()\n        mock_session.get = MagicMock(\n            return_value=MockAsyncContextManager(\n                enter_side_effect=TimeoutError(\"Timeout\")\n            )\n        )\n\n        client._session = mock_session\n\n        result = await client.health_check()\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_health_check_not_initialized(self) -> None:\n        \"\"\"Health check before initialize returns False.\"\"\"\n        client = OnyxAPIClient()\n\n        result = await client.health_check()\n        assert result is False\n\n\nclass TestResponseParsing:\n    \"\"\"Tests for API response parsing.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_response_malformed_json(self) -> None:\n        \"\"\"API returns invalid JSON raises exception.\"\"\"\n        client = OnyxAPIClient()\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n        mock_response.json = AsyncMock(side_effect=ValueError(\"Invalid JSON\"))\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        with pytest.raises(ValueError):\n            await client.send_chat_message(\"Hello\", \"api_key\")\n\n    @pytest.mark.asyncio\n    async def test_response_with_error_msg(self) -> None:\n        \"\"\"200 status but error_msg present - warning logged, response returned.\"\"\"\n        client = OnyxAPIClient()\n\n        response_data = {\n            \"answer\": \"Partial response\",\n            \"citations\": [],\n            \"error_msg\": \"Some warning\",\n        }\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n        mock_response.json = AsyncMock(return_value=response_data)\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        mock_result = MagicMock()\n        mock_result.answer = \"Partial response\"\n        mock_result.error_msg = \"Some warning\"\n\n        with patch.object(ChatFullResponse, \"model_validate\", return_value=mock_result):\n            result = await client.send_chat_message(\"Hello\", \"api_key\")\n\n        # Should still return response\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_response_empty_answer(self) -> None:\n        \"\"\"answer field is empty string - handled gracefully.\"\"\"\n        client = OnyxAPIClient()\n\n        response_data = {\n            \"answer\": \"\",\n            \"citations\": [],\n            \"error_msg\": None,\n        }\n\n        mock_response = MagicMock()\n        mock_response.status = 200\n        mock_response.json = AsyncMock(return_value=response_data)\n\n        mock_session = MagicMock()\n        mock_session.post = MagicMock(\n            return_value=MockAsyncContextManager(return_value=mock_response)\n        )\n\n        client._session = mock_session\n\n        mock_result = MagicMock()\n        mock_result.answer = \"\"\n        mock_result.error_msg = None\n\n        with patch.object(ChatFullResponse, \"model_validate\", return_value=mock_result):\n            result = await client.send_chat_message(\"Hello\", \"api_key\")\n\n        # Should return response even with empty answer\n        assert result is not None\n\n\nclass TestClientConfiguration:\n    \"\"\"Tests for client configuration.\"\"\"\n\n    def test_default_timeout(self) -> None:\n        \"\"\"Client uses API_REQUEST_TIMEOUT by default.\"\"\"\n        client = OnyxAPIClient()\n        assert client._timeout == API_REQUEST_TIMEOUT\n\n    def test_custom_timeout(self) -> None:\n        \"\"\"Client accepts custom timeout.\"\"\"\n        client = OnyxAPIClient(timeout=60)\n        assert client._timeout == 60\n\n    @pytest.mark.asyncio\n    async def test_double_initialize_warning(self) -> None:\n        \"\"\"Calling initialize() twice logs warning but doesn't error.\"\"\"\n        client = OnyxAPIClient()\n\n        with patch(\"aiohttp.ClientSession\") as mock_session_class:\n            mock_session = MagicMock()\n            mock_session_class.return_value = mock_session\n\n            await client.initialize()\n            # Second call should be safe\n            await client.initialize()\n\n        # Should only create one session\n        assert mock_session_class.call_count == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py",
    "content": "\"\"\"Unit tests for Discord bot cache manager.\n\nTests for DiscordCacheManager class functionality.\n\"\"\"\n\nimport asyncio\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.onyxbot.discord.cache import DiscordCacheManager\n\n\nclass TestCacheInitialization:\n    \"\"\"Tests for cache initialization.\"\"\"\n\n    def test_cache_starts_empty(self) -> None:\n        \"\"\"New cache manager has empty caches.\"\"\"\n        cache = DiscordCacheManager()\n        assert cache._guild_tenants == {}\n        assert cache._api_keys == {}\n        assert cache.is_initialized is False\n\n    @pytest.mark.asyncio\n    async def test_cache_refresh_all_loads_guilds(self) -> None:\n        \"\"\"refresh_all() loads all active guilds.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config1 = MagicMock()\n        mock_config1.guild_id = 111111\n        mock_config1.enabled = True\n\n        mock_config2 = MagicMock()\n        mock_config2.guild_id = 222222\n        mock_config2.enabled = True\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config1, mock_config2],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"test_api_key\",\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        assert cache.is_initialized is True\n        assert 111111 in cache._guild_tenants\n        assert 222222 in cache._guild_tenants\n        assert cache._guild_tenants[111111] == \"tenant1\"\n        assert cache._guild_tenants[222222] == \"tenant1\"\n\n    @pytest.mark.asyncio\n    async def test_cache_refresh_provisions_api_key(self) -> None:\n        \"\"\"Refresh for tenant without key creates API key.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"new_api_key\",\n            ) as mock_provision,\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        assert cache._api_keys.get(\"tenant1\") == \"new_api_key\"\n        mock_provision.assert_called()\n\n\nclass TestCacheLookups:\n    \"\"\"Tests for cache lookup operations.\"\"\"\n\n    def test_get_tenant_returns_correct(self) -> None:\n        \"\"\"Lookup registered guild returns correct tenant ID.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants[123456] = \"tenant1\"\n\n        result = cache.get_tenant(123456)\n        assert result == \"tenant1\"\n\n    def test_get_tenant_returns_none_unknown(self) -> None:\n        \"\"\"Lookup unregistered guild returns None.\"\"\"\n        cache = DiscordCacheManager()\n\n        result = cache.get_tenant(999999)\n        assert result is None\n\n    def test_get_api_key_returns_correct(self) -> None:\n        \"\"\"Lookup tenant's API key returns valid key.\"\"\"\n        cache = DiscordCacheManager()\n        cache._api_keys[\"tenant1\"] = \"api_key_123\"\n\n        result = cache.get_api_key(\"tenant1\")\n        assert result == \"api_key_123\"\n\n    def test_get_api_key_returns_none_unknown(self) -> None:\n        \"\"\"Lookup unknown tenant returns None.\"\"\"\n        cache = DiscordCacheManager()\n\n        result = cache.get_api_key(\"unknown_tenant\")\n        assert result is None\n\n    def test_get_all_guild_ids(self) -> None:\n        \"\"\"After loading returns all cached guild IDs.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants = {111: \"t1\", 222: \"t2\", 333: \"t1\"}\n\n        result = cache.get_all_guild_ids()\n        assert set(result) == {111, 222, 333}\n\n\nclass TestCacheUpdates:\n    \"\"\"Tests for cache update operations.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_refresh_guild_adds_new(self) -> None:\n        \"\"\"refresh_guild() for new guild adds it to cache.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        with (\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"api_key\",\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_guild(111111, \"tenant1\")\n\n        assert cache.get_tenant(111111) == \"tenant1\"\n\n    @pytest.mark.asyncio\n    async def test_refresh_guild_verifies_active(self) -> None:\n        \"\"\"refresh_guild() for disabled guild doesn't add it.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = False  # Disabled!\n\n        with (\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_guild(111111, \"tenant1\")\n\n        # Should not be added because it's disabled\n        assert cache.get_tenant(111111) is None\n\n    def test_remove_guild(self) -> None:\n        \"\"\"remove_guild() removes guild from cache.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants[111111] = \"tenant1\"\n\n        cache.remove_guild(111111)\n\n        assert cache.get_tenant(111111) is None\n\n    def test_clear_removes_all(self) -> None:\n        \"\"\"clear() empties all caches.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants = {111: \"t1\", 222: \"t2\"}\n        cache._api_keys = {\"t1\": \"key1\", \"t2\": \"key2\"}\n        cache._initialized = True\n\n        cache.clear()\n\n        assert cache._guild_tenants == {}\n        assert cache._api_keys == {}\n        assert cache.is_initialized is False\n\n\nclass TestThreadSafety:\n    \"\"\"Tests for thread/async safety.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_concurrent_refresh_no_race(self) -> None:\n        \"\"\"Multiple concurrent refresh_all() calls don't corrupt data.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        call_count = 0\n\n        async def slow_refresh() -> tuple[list[int], str]:\n            nonlocal call_count\n            call_count += 1\n            # Simulate slow operation\n            await asyncio.sleep(0.01)\n            return ([111111], \"api_key\")\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch.object(cache, \"_load_tenant_data\", side_effect=slow_refresh),\n        ):\n            # Run multiple concurrent refreshes\n            await asyncio.gather(\n                cache.refresh_all(),\n                cache.refresh_all(),\n                cache.refresh_all(),\n            )\n\n        # Each refresh should complete without error\n        assert cache.is_initialized is True\n\n    @pytest.mark.asyncio\n    async def test_concurrent_read_write(self) -> None:\n        \"\"\"Read during refresh doesn't cause exceptions.\"\"\"\n        cache = DiscordCacheManager()\n        cache._guild_tenants[111111] = \"tenant1\"\n\n        async def read_loop() -> None:\n            for _ in range(10):\n                cache.get_tenant(111111)\n                await asyncio.sleep(0.001)\n\n        async def write_loop() -> None:\n            for i in range(10):\n                cache._guild_tenants[200000 + i] = f\"tenant{i}\"\n                await asyncio.sleep(0.001)\n\n        # Should not raise any exceptions\n        await asyncio.gather(read_loop(), write_loop())\n\n\nclass TestAPIKeyProvisioning:\n    \"\"\"Tests for API key provisioning via cache refresh.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_api_key_created_on_first_refresh(self) -> None:\n        \"\"\"Cache refresh with no existing key creates new API key.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"new_api_key_123\",\n            ) as mock_create,\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        mock_create.assert_called_once()\n        assert cache.get_api_key(\"tenant1\") == \"new_api_key_123\"\n\n    @pytest.mark.asyncio\n    async def test_api_key_cached_after_creation(self) -> None:\n        \"\"\"Subsequent lookups after creation use cached key.\"\"\"\n        cache = DiscordCacheManager()\n        cache._api_keys[\"tenant1\"] = \"cached_key\"\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n            ) as mock_create,\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        # Should NOT call create because key is already cached\n        mock_create.assert_not_called()\n        # Cached key should be preserved after refresh\n        assert cache.get_api_key(\"tenant1\") == \"cached_key\"\n\n\nclass TestGatedTenantHandling:\n    \"\"\"Tests for gated tenant filtering.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_refresh_skips_gated_tenants(self) -> None:\n        \"\"\"Gated tenant's guilds are not loaded.\"\"\"\n        cache = DiscordCacheManager()\n\n        # tenant2 is gated\n        gated_tenants = {\"tenant2\"}\n\n        mock_config_t1 = MagicMock()\n        mock_config_t1.guild_id = 111111\n        mock_config_t1.enabled = True\n\n        mock_config_t2 = MagicMock()\n        mock_config_t2.guild_id = 222222\n        mock_config_t2.enabled = True\n\n        def mock_get_configs(db: MagicMock) -> list[MagicMock]:  # noqa: ARG001\n            # Track which tenant this was called for\n            return [mock_config_t1]  # Always return same for simplicity\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\", \"tenant2\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: gated_tenants,\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                side_effect=mock_get_configs,\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"api_key\",\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        # Only tenant1 should be loaded (tenant2 is gated)\n        assert \"tenant1\" in cache._api_keys and 111111 in cache._guild_tenants\n        # tenant2's guilds should NOT be in cache\n        assert \"tenant2\" not in cache._api_keys and 222222 not in cache._guild_tenants\n\n    @pytest.mark.asyncio\n    async def test_gated_check_calls_ee_function(self) -> None:\n        \"\"\"Refresh all tenants calls fetch_ee_implementation_or_noop.\"\"\"\n        cache = DiscordCacheManager()\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ) as mock_ee,\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[],\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        mock_ee.assert_called_once()\n\n    @pytest.mark.asyncio\n    async def test_ungated_tenant_included(self) -> None:\n        \"\"\"Regular (ungated) tenant has guilds loaded normally.\"\"\"\n        cache = DiscordCacheManager()\n\n        mock_config = MagicMock()\n        mock_config.guild_id = 111111\n        mock_config.enabled = True\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),  # No gated tenants\n            ),\n            patch(\"onyx.onyxbot.discord.cache.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.cache.get_guild_configs\",\n                return_value=[mock_config],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.get_or_create_discord_service_api_key\",\n                return_value=\"api_key\",\n            ),\n        ):\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            await cache.refresh_all()\n\n        assert cache.get_tenant(111111) == \"tenant1\"\n\n\nclass TestCacheErrorHandling:\n    \"\"\"Tests for error handling in cache operations.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_refresh_all_handles_tenant_error(self) -> None:\n        \"\"\"Error loading one tenant doesn't stop others.\"\"\"\n        cache = DiscordCacheManager()\n\n        call_count = 0\n\n        async def mock_load(tenant_id: str) -> tuple[list[int], str]:\n            nonlocal call_count\n            call_count += 1\n            if tenant_id == \"tenant1\":\n                raise Exception(\"Tenant 1 error\")\n            return ([222222], \"api_key\")\n\n        with (\n            patch(\n                \"onyx.onyxbot.discord.cache.get_all_tenant_ids\",\n                return_value=[\"tenant1\", \"tenant2\"],\n            ),\n            patch(\n                \"onyx.onyxbot.discord.cache.fetch_ee_implementation_or_noop\",\n                return_value=lambda: set(),\n            ),\n            patch.object(cache, \"_load_tenant_data\", side_effect=mock_load),\n        ):\n            await cache.refresh_all()\n\n        # Should still complete and load tenant2\n        assert call_count == 2  # Both tenants attempted\n        assert cache.get_tenant(222222) == \"tenant2\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_context_builders.py",
    "content": "\"\"\"Unit tests for Discord bot context builders.\n\nTests the thread and reply context building logic with mocked Discord API.\n\"\"\"\n\nfrom typing import Any\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\n\nimport discord\nimport pytest\n\nfrom onyx.onyxbot.discord.constants import MAX_CONTEXT_MESSAGES\nfrom onyx.onyxbot.discord.handle_message import _build_conversation_context\nfrom onyx.onyxbot.discord.handle_message import _build_reply_chain_context\nfrom onyx.onyxbot.discord.handle_message import _build_thread_context\nfrom onyx.onyxbot.discord.handle_message import _format_messages_as_context\nfrom onyx.onyxbot.discord.handle_message import format_message_content\nfrom tests.unit.onyx.onyxbot.discord.conftest import AsyncIteratorMock\nfrom tests.unit.onyx.onyxbot.discord.conftest import mock_message\n\n\nclass TestThreadContextBuilder:\n    \"\"\"Tests for _build_thread_context function.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_basic(\n        self, mock_thread_with_messages: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread with messages returns context in order.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999  # Current message ID\n        msg.channel = mock_thread_with_messages\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        assert result is not None\n        assert \"Conversation history\" in result\n        # Should contain message content\n        assert \"User msg\" in result or \"Bot response\" in result\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_max_limit(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread with 20 messages returns only MAX_CONTEXT_MESSAGES.\"\"\"\n        # Create 20 messages\n        messages = [\n            mock_message(content=f\"Message {i}\", message_id=i) for i in range(20)\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:\n            limit = kwargs.get(\"limit\", MAX_CONTEXT_MESSAGES)\n            return AsyncIteratorMock(messages[:limit])\n\n        thread.history = history\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        assert result is not None\n        # Should only have MAX_CONTEXT_MESSAGES worth of content\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_includes_starter(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread with starter message includes it at beginning.\"\"\"\n        starter = mock_message(\n            content=\"This is the thread starter\",\n            message_id=666666,\n        )\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(return_value=starter)\n\n        messages = [\n            mock_message(content=\"Reply 1\", message_id=1),\n            mock_message(content=\"Reply 2\", message_id=2),\n        ]\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        assert result is not None\n        assert \"thread starter\" in result\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_filters_system_messages(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread with system messages only includes content messages.\"\"\"\n        messages = [\n            mock_message(\n                content=\"Normal message\", message_type=discord.MessageType.default\n            ),\n            mock_message(\n                content=\"\", message_type=discord.MessageType.pins_add\n            ),  # System\n            mock_message(\n                content=\"Another normal\", message_type=discord.MessageType.reply\n            ),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        # Should not include system message type\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_includes_bot_messages(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Bot messages in thread are included for context.\"\"\"\n        messages = [\n            mock_message(content=\"User question\", author_bot=False),\n            mock_message(\n                content=\"Bot response\",\n                author_bot=True,\n                author_id=mock_bot_user.id,\n                author_display_name=\"OnyxBot\",\n            ),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        assert result is not None\n        assert \"Bot response\" in result\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_empty_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread with only system messages returns None.\"\"\"\n        messages = [\n            mock_message(content=\"\", message_type=discord.MessageType.pins_add),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        await _build_thread_context(msg, mock_bot_user)\n        # Should return None for empty context\n        # (depends on implementation - may return None or empty string)\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_forum_channel(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread parent is ForumChannel - does NOT fetch starter message.\"\"\"\n        messages = [\n            mock_message(content=\"Forum reply\", message_id=1),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.ForumChannel)  # Forum!\n        # Set up mock before calling function so we can verify it wasn't called\n        thread.parent.fetch_message = AsyncMock()\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        await _build_thread_context(msg, mock_bot_user)\n\n        # Should not try to fetch starter message for forum channels\n        thread.parent.fetch_message.assert_not_called()\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_starter_fetch_fails(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Starter message fetch raises NotFound - continues without starter.\"\"\"\n        messages = [\n            mock_message(content=\"Reply message\", message_id=1),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"Not found\")\n        )\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        # Should still return context without starter\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_build_thread_context_deduplicates_starter(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Starter also in recent history is not duplicated.\"\"\"\n        starter = mock_message(content=\"Thread starter\", message_id=666666)\n\n        messages = [\n            starter,  # Starter in history\n            mock_message(content=\"Reply\", message_id=1),\n        ]\n\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(return_value=starter)\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n\n        result = await _build_thread_context(msg, mock_bot_user)\n\n        # Should only have starter once\n        if result:\n            assert (\n                result.count(\"Thread starter\") <= 2\n            )  # At most once in formatted output\n\n\nclass TestReplyChainContextBuilder:\n    \"\"\"Tests for _build_reply_chain_context function.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_single_reply(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Message replies to one message returns 1 message in chain.\"\"\"\n        parent = mock_message(content=\"Parent message\", message_id=100)\n        parent.reference = None\n\n        child = MagicMock(spec=discord.Message)\n        child.id = 200\n        child.reference = MagicMock()\n        child.reference.message_id = 100\n        child.channel = MagicMock()\n        child.channel.fetch_message = AsyncMock(return_value=parent)\n        child.channel.name = \"general\"\n\n        result = await _build_reply_chain_context(child, mock_bot_user)\n\n        assert result is not None\n        assert \"Parent message\" in result\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_deep_chain(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"A → B → C → D reply chain returns full chain in chronological order.\"\"\"\n        msg_d = mock_message(content=\"Message D\", message_id=4)\n        msg_d.reference = None\n\n        msg_c = mock_message(content=\"Message C\", message_id=3)\n        ref_c = MagicMock()\n        ref_c.message_id = 4\n        msg_c.reference = ref_c\n\n        msg_b = mock_message(content=\"Message B\", message_id=2)\n        ref_b = MagicMock()\n        ref_b.message_id = 3\n        msg_b.reference = ref_b\n\n        # Current message replying to B\n        ref_a = MagicMock()\n        ref_a.message_id = 2\n\n        msg_a = MagicMock(spec=discord.Message)\n        msg_a.id = 1\n        msg_a.reference = ref_a\n        msg_a.channel = MagicMock()\n        msg_a.channel.name = \"general\"\n\n        # Mock fetch to return the chain\n        message_map = {2: msg_b, 3: msg_c, 4: msg_d}\n\n        async def fetch_message(msg_id: int) -> MagicMock:\n            if msg_id in message_map:\n                return message_map[msg_id]\n            raise discord.NotFound(MagicMock(), \"Not found\")\n\n        msg_a.channel.fetch_message = AsyncMock(side_effect=fetch_message)\n\n        result = await _build_reply_chain_context(msg_a, mock_bot_user)\n\n        assert result is not None\n        # Should have all messages from the chain\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_max_depth(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Chain depth > MAX_CONTEXT_MESSAGES stops at limit.\"\"\"\n        # Create a chain longer than MAX_CONTEXT_MESSAGES\n        messages = {}\n        for i in range(MAX_CONTEXT_MESSAGES + 5, 0, -1):\n            msg = mock_message(content=f\"Message {i}\", message_id=i)\n            if i < MAX_CONTEXT_MESSAGES + 5:\n                ref = MagicMock()\n                ref.message_id = i + 1\n                msg.reference = ref\n            else:\n                msg.reference = None\n            messages[i] = msg\n\n        # Start from message 1\n        start = MagicMock(spec=discord.Message)\n        start.id = 0\n        start.reference = MagicMock()\n        start.reference.message_id = 1\n        start.channel = MagicMock()\n        start.channel.name = \"general\"\n\n        async def fetch_message(msg_id: int) -> MagicMock:\n            if msg_id in messages:\n                return messages[msg_id]\n            raise discord.NotFound(MagicMock(), \"Not found\")\n\n        start.channel.fetch_message = AsyncMock(side_effect=fetch_message)\n\n        result = await _build_reply_chain_context(start, mock_bot_user)\n\n        # Should have at most MAX_CONTEXT_MESSAGES\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_no_reply(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Message is not a reply returns None.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n\n        result = await _build_reply_chain_context(msg, mock_bot_user)\n        assert result is None\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_deleted_message(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Reply to deleted message handles gracefully with partial chain.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 200\n        msg.reference = MagicMock()\n        msg.reference.message_id = 100\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"Not found\")\n        )\n        msg.channel.name = \"general\"\n\n        await _build_reply_chain_context(msg, mock_bot_user)\n        # Should handle gracefully - may return None or partial context\n        # Either is acceptable\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_missing_reference_data(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"message.reference.message_id is None returns None.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = None\n\n        result = await _build_reply_chain_context(msg, mock_bot_user)\n        assert result is None\n\n    @pytest.mark.asyncio\n    async def test_build_reply_chain_http_exception(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"discord.HTTPException on fetch stops chain.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 200\n        msg.reference = MagicMock()\n        msg.reference.message_id = 100\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(\n            side_effect=discord.HTTPException(MagicMock(), \"HTTP error\")\n        )\n        msg.channel.name = \"general\"\n\n        await _build_reply_chain_context(msg, mock_bot_user)\n        # Should handle gracefully\n\n\nclass TestCombinedContext:\n    \"\"\"Tests for combined thread + reply context.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_combined_context_thread_with_reply(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Reply inside thread includes both contexts.\"\"\"\n        # Create a thread with messages\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        # Thread history\n        thread_messages = [\n            mock_message(content=\"Thread msg 1\", message_id=1),\n            mock_message(content=\"Thread msg 2\", message_id=2),\n        ]\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(thread_messages)\n\n        thread.history = history\n\n        # Message is a reply to another message in the thread\n        parent_msg = mock_message(content=\"Parent message\", message_id=2)\n        parent_msg.reference = None\n\n        ref = MagicMock()\n        ref.message_id = 2\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n        msg.reference = ref\n        msg.channel.fetch_message = AsyncMock(return_value=parent_msg)\n        msg.channel.name = \"test-thread\"\n\n        result = await _build_conversation_context(msg, mock_bot_user)\n\n        # Should have context from the thread\n        assert result is not None\n        assert \"Conversation history\" in result\n\n    @pytest.mark.asyncio\n    async def test_build_conversation_context_routes_to_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Message in thread routes to _build_thread_context.\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"\")\n        )\n\n        messages = [mock_message(content=\"Thread msg\")]\n\n        def history(**kwargs: Any) -> AsyncIteratorMock:  # noqa: ARG001\n            return AsyncIteratorMock(messages)\n\n        thread.history = history\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 999\n        msg.channel = thread\n        msg.reference = None\n\n        result = await _build_conversation_context(msg, mock_bot_user)\n        assert result is not None\n\n    @pytest.mark.asyncio\n    async def test_build_conversation_context_routes_to_reply(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Message with reference routes to _build_reply_chain_context.\"\"\"\n        parent = mock_message(content=\"Parent\", message_id=100)\n        parent.reference = None\n\n        msg = MagicMock(spec=discord.Message)\n        msg.id = 200\n        msg.channel = MagicMock(spec=discord.TextChannel)  # Not a thread\n        msg.reference = MagicMock()\n        msg.reference.message_id = 100\n        msg.channel.fetch_message = AsyncMock(return_value=parent)\n        msg.channel.name = \"general\"\n\n        result = await _build_conversation_context(msg, mock_bot_user)\n        assert result is not None\n\n\nclass TestContextFormatting:\n    \"\"\"Tests for context formatting.\"\"\"\n\n    def test_format_message_content_mentions(self) -> None:\n        \"\"\"Messages with <@123> mentions are converted to @username.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.content = \"Hello <@123456789> how are you?\"\n\n        user = MagicMock()\n        user.id = 123456789\n        user.display_name = \"TestUser\"\n        msg.mentions = [user]\n        msg.role_mentions = []\n        msg.channel_mentions = []\n\n        result = format_message_content(msg)\n        assert \"@TestUser\" in result\n        assert \"<@123456789>\" not in result\n\n    def test_format_message_content_roles(self) -> None:\n        \"\"\"Messages with <@&456> roles are converted to @rolename.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.content = \"Attention <@&456789> members\"\n\n        role = MagicMock()\n        role.id = 456789\n        role.name = \"Moderators\"\n        msg.mentions = []\n        msg.role_mentions = [role]\n        msg.channel_mentions = []\n\n        result = format_message_content(msg)\n        assert \"@Moderators\" in result\n        assert \"<@&456789>\" not in result\n\n    def test_format_message_content_channels(self) -> None:\n        \"\"\"Messages with <#789> channels are converted to #channelname.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.content = \"Check out <#789012>\"\n\n        channel = MagicMock()\n        channel.id = 789012\n        channel.name = \"announcements\"\n        msg.mentions = []\n        msg.role_mentions = []\n        msg.channel_mentions = [channel]\n\n        result = format_message_content(msg)\n        assert \"#announcements\" in result\n        assert \"<#789012>\" not in result\n\n    def test_context_format_output(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Build full context has expected format.\"\"\"\n        messages: list[Any] = [\n            mock_message(content=\"Hello bot\", author_bot=False),\n        ]\n        messages[0].type = discord.MessageType.default\n\n        result = _format_messages_as_context(messages, mock_bot_user)\n\n        assert result is not None\n        assert \"Conversation history\" in result\n        assert \"---\" in result\n\n    def test_context_format_with_username(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Messages from users include @username: prefix.\"\"\"\n        msg = mock_message(content=\"User message\", author_bot=False)\n        msg.author.display_name = \"TestUser\"\n        msg.type = discord.MessageType.default\n\n        result = _format_messages_as_context([msg], mock_bot_user)\n\n        assert result is not None\n        assert \"@TestUser:\" in result\n\n    def test_context_format_bot_marker(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Bot messages in context are marked as OnyxBot:.\"\"\"\n        msg = mock_message(\n            content=\"Bot response\",\n            author_bot=True,\n            author_id=mock_bot_user.id,\n        )\n        msg.type = discord.MessageType.default\n\n        result = _format_messages_as_context([msg], mock_bot_user)\n\n        assert result is not None\n        assert \"OnyxBot:\" in result\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_discord_utils.py",
    "content": "\"\"\"Unit tests for Discord bot utilities.\n\nTests for:\n- Token management (get_bot_token)\n- Registration key parsing (parse_discord_registration_key, generate_discord_registration_key)\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.onyxbot.discord.utils import get_bot_token\nfrom onyx.server.manage.discord_bot.utils import generate_discord_registration_key\nfrom onyx.server.manage.discord_bot.utils import parse_discord_registration_key\nfrom onyx.server.manage.discord_bot.utils import REGISTRATION_KEY_PREFIX\n\n\nclass TestGetBotToken:\n    \"\"\"Tests for get_bot_token function.\"\"\"\n\n    def test_get_token_from_env(self) -> None:\n        \"\"\"When env var is set, returns env var.\"\"\"\n        with patch(\"onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN\", \"env_token_123\"):\n            result = get_bot_token()\n            assert result == \"env_token_123\"\n\n    def test_get_token_from_db(self) -> None:\n        \"\"\"When no env var and DB config exists, returns DB token.\"\"\"\n        mock_config = MagicMock()\n        mock_config.bot_token = \"db_token_456\"\n\n        with (\n            patch(\"onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN\", None),\n            patch(\"onyx.onyxbot.discord.utils.AUTH_TYPE\", \"basic\"),  # Not CLOUD\n            patch(\"onyx.onyxbot.discord.utils.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.utils.get_discord_bot_config\",\n                return_value=mock_config,\n            ),\n        ):\n            mock_session.return_value.__enter__ = MagicMock()\n            mock_session.return_value.__exit__ = MagicMock()\n            result = get_bot_token()\n            assert result == \"db_token_456\"\n\n    def test_get_token_none(self) -> None:\n        \"\"\"When no env var and no DB config, returns None.\"\"\"\n        with (\n            patch(\"onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN\", None),\n            patch(\"onyx.onyxbot.discord.utils.AUTH_TYPE\", \"basic\"),  # Not CLOUD\n            patch(\"onyx.onyxbot.discord.utils.get_session_with_tenant\") as mock_session,\n            patch(\n                \"onyx.onyxbot.discord.utils.get_discord_bot_config\",\n                return_value=None,\n            ),\n        ):\n            mock_session.return_value.__enter__ = MagicMock()\n            mock_session.return_value.__exit__ = MagicMock()\n            result = get_bot_token()\n            assert result is None\n\n    def test_get_token_env_priority(self) -> None:\n        \"\"\"When both env var and DB exist, env var takes priority.\"\"\"\n        mock_config = MagicMock()\n        mock_config.bot_token = \"db_token_456\"\n\n        with (\n            patch(\"onyx.onyxbot.discord.utils.DISCORD_BOT_TOKEN\", \"env_token_123\"),\n            patch(\n                \"onyx.onyxbot.discord.utils.get_discord_bot_config\",\n                return_value=mock_config,\n            ),\n        ):\n            result = get_bot_token()\n            # Should return env var, not DB token\n            assert result == \"env_token_123\"\n\n\nclass TestParseRegistrationKey:\n    \"\"\"Tests for parse_discord_registration_key function.\"\"\"\n\n    def test_parse_registration_key_valid(self) -> None:\n        \"\"\"Valid key format returns tenant_id.\"\"\"\n        key = \"discord_tenant123.randomtoken\"\n        result = parse_discord_registration_key(key)\n        assert result == \"tenant123\"\n\n    def test_parse_registration_key_invalid(self) -> None:\n        \"\"\"Malformed key returns None.\"\"\"\n        result = parse_discord_registration_key(\"malformed_key\")\n        assert result is None\n\n    def test_parse_registration_key_missing_prefix(self) -> None:\n        \"\"\"Key without 'discord_' prefix returns None.\"\"\"\n        key = \"tenant123.randomtoken\"\n        result = parse_discord_registration_key(key)\n        assert result is None\n\n    def test_parse_registration_key_missing_dot(self) -> None:\n        \"\"\"Key without separator '.' returns None.\"\"\"\n        key = \"discord_tenant123randomtoken\"\n        result = parse_discord_registration_key(key)\n        assert result is None\n\n    def test_parse_registration_key_empty_token(self) -> None:\n        \"\"\"Key with empty token part returns None.\"\"\"\n        # This test verifies behavior with empty token after dot\n        key = \"discord_tenant123.\"\n        result = parse_discord_registration_key(key)\n        # Current implementation allows empty token, but returns tenant\n        # If this should be invalid, update the implementation\n        assert result == \"tenant123\" or result is None\n\n    def test_parse_registration_key_url_encoded_tenant(self) -> None:\n        \"\"\"Tenant ID with URL encoding is decoded correctly.\"\"\"\n        # URL encoded \"my tenant\" -> \"my%20tenant\"\n        key = \"discord_my%20tenant.randomtoken\"\n        result = parse_discord_registration_key(key)\n        assert result == \"my tenant\"\n\n    def test_parse_registration_key_special_chars(self) -> None:\n        \"\"\"Key with special characters in tenant ID.\"\"\"\n        # Tenant with slashes (URL encoded)\n        key = \"discord_tenant%2Fwith%2Fslashes.randomtoken\"\n        result = parse_discord_registration_key(key)\n        assert result == \"tenant/with/slashes\"\n\n\nclass TestGenerateRegistrationKey:\n    \"\"\"Tests for generate_discord_registration_key function.\"\"\"\n\n    def test_generate_registration_key(self) -> None:\n        \"\"\"Generated key has correct format.\"\"\"\n        key = generate_discord_registration_key(\"tenant123\")\n\n        assert key.startswith(REGISTRATION_KEY_PREFIX)\n        assert \"tenant123\" in key\n        assert \".\" in key\n\n        # Parse it back to verify round-trip\n        parsed = parse_discord_registration_key(key)\n        assert parsed == \"tenant123\"\n\n    def test_generate_registration_key_unique(self) -> None:\n        \"\"\"Each generated key is unique.\"\"\"\n        keys = [generate_discord_registration_key(\"tenant123\") for _ in range(10)]\n        assert len(set(keys)) == 10  # All unique\n\n    def test_generate_registration_key_special_tenant(self) -> None:\n        \"\"\"Key generation handles special characters in tenant ID.\"\"\"\n        key = generate_discord_registration_key(\"my tenant/id\")\n\n        # Should be URL encoded\n        assert \"%20\" in key or \"%2F\" in key\n\n        # Parse it back\n        parsed = parse_discord_registration_key(key)\n        assert parsed == \"my tenant/id\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_message_utils.py",
    "content": "\"\"\"Unit tests for Discord bot message utilities.\n\nTests for:\n- Message splitting (_split_message)\n- Citation formatting (_append_citations)\n\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom onyx.chat.models import ChatFullResponse\nfrom onyx.onyxbot.discord.constants import MAX_MESSAGE_LENGTH\nfrom onyx.onyxbot.discord.handle_message import _append_citations\nfrom onyx.onyxbot.discord.handle_message import _split_message\n\n\nclass TestSplitMessage:\n    \"\"\"Tests for _split_message function.\"\"\"\n\n    def test_split_message_under_limit(self) -> None:\n        \"\"\"Message under 2000 chars returns single chunk.\"\"\"\n        content = \"x\" * 1999\n        chunks = _split_message(content)\n        assert len(chunks) == 1\n        assert chunks[0] == content\n\n    def test_split_message_at_limit(self) -> None:\n        \"\"\"Message exactly at 2000 chars returns single chunk.\"\"\"\n        content = \"x\" * MAX_MESSAGE_LENGTH\n        chunks = _split_message(content)\n        assert len(chunks) == 1\n        assert chunks[0] == content\n\n    def test_split_message_over_limit(self) -> None:\n        \"\"\"Message over 2000 chars splits into multiple chunks.\"\"\"\n        content = \"x\" * 2001\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n        # All chunks should be <= MAX_MESSAGE_LENGTH\n        for chunk in chunks:\n            assert len(chunk) <= MAX_MESSAGE_LENGTH\n\n    def test_split_at_double_newline(self) -> None:\n        \"\"\"Prefers splitting at double newline.\"\"\"\n        # Create content with double newline near the end but before limit\n        first_part = \"x\" * 1500\n        second_part = \"y\" * 1000\n        content = f\"{first_part}\\n\\n{second_part}\"\n\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n        # First chunk should end with or right after the double newline\n        assert chunks[0].endswith(\"\\n\\n\") or first_part in chunks[0]\n\n    def test_split_at_single_newline(self) -> None:\n        \"\"\"When no double newline, splits at single newline.\"\"\"\n        first_part = \"x\" * 1500\n        second_part = \"y\" * 1000\n        content = f\"{first_part}\\n{second_part}\"\n\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n\n    def test_split_at_period_space(self) -> None:\n        \"\"\"When no newlines, splits at '. ' (period + space).\"\"\"\n        first_part = \"x\" * 1500\n        second_part = \"y\" * 1000\n        content = f\"{first_part}. {second_part}\"\n\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n        # First chunk should include the period\n        assert chunks[0].endswith(\". \") or chunks[0].endswith(\".\")\n\n    def test_split_at_space(self) -> None:\n        \"\"\"When no better breakpoints, splits at space.\"\"\"\n        first_part = \"x\" * 1500\n        second_part = \"y\" * 1000\n        content = f\"{first_part} {second_part}\"\n\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n\n    def test_split_no_breakpoint(self) -> None:\n        \"\"\"Handles gracefully when no breakpoints available (hard split).\"\"\"\n        # 2001 chars with no spaces or newlines\n        content = \"x\" * 2001\n        chunks = _split_message(content)\n        assert len(chunks) == 2\n        # Content should be preserved\n        assert \"\".join(chunks) == content\n\n    def test_split_threshold_50_percent(self) -> None:\n        \"\"\"Breakpoint at less than 50% of limit is skipped.\"\"\"\n        # Put a breakpoint early (at 40% = 800 chars)\n        # and another late (at 80% = 1600 chars)\n        early_part = \"x\" * 800\n        middle_part = \"m\" * 800  # Total: 1600\n        late_part = \"y\" * 600  # Total: 2200\n        content = f\"{early_part}\\n\\n{middle_part}\\n\\n{late_part}\"\n\n        chunks = _split_message(content)\n        # Should prefer the later breakpoint over the 40% one\n        assert len(chunks) == 2\n        # First chunk should be longer than 800 chars\n        assert len(chunks[0]) > 800\n\n    def test_split_multiple_chunks(self) -> None:\n        \"\"\"5000 char message splits into 3 chunks.\"\"\"\n        content = \"x\" * 5000\n        chunks = _split_message(content)\n        assert len(chunks) == 3\n        # Each chunk should be <= MAX_MESSAGE_LENGTH\n        for chunk in chunks:\n            assert len(chunk) <= MAX_MESSAGE_LENGTH\n\n    def test_split_preserves_content(self) -> None:\n        \"\"\"Concatenated chunks equal original content.\"\"\"\n        content = \"Hello world! \" * 200  # About 2600 chars\n        chunks = _split_message(content)\n        assert \"\".join(chunks) == content\n\n    def test_split_with_unicode(self) -> None:\n        \"\"\"Handles unicode characters correctly.\"\"\"\n        # Mix of ASCII and unicode\n        content = \"Hello \" + \"🎉\" * 500 + \" World \" + \"x\" * 1500\n        chunks = _split_message(content)\n        # Should not break in the middle of emoji\n        assert \"\".join(chunks) == content\n\n\nclass TestAppendCitations:\n    \"\"\"Tests for _append_citations function.\"\"\"\n\n    def _make_response(\n        self,\n        answer: str,\n        citations: list[dict] | None = None,\n        documents: list[dict] | None = None,\n    ) -> ChatFullResponse:\n        \"\"\"Helper to create ChatFullResponse with citations.\"\"\"\n        response = MagicMock(spec=ChatFullResponse)\n        response.answer = answer\n\n        if citations:\n            citation_mocks = []\n            for c in citations:\n                cm = MagicMock()\n                cm.citation_number = c.get(\"num\", 1)\n                cm.document_id = c.get(\"doc_id\", \"doc1\")\n                citation_mocks.append(cm)\n            response.citation_info = citation_mocks\n        else:\n            response.citation_info = None\n\n        if documents:\n            doc_mocks = []\n            for d in documents:\n                dm = MagicMock()\n                dm.document_id = d.get(\"doc_id\", \"doc1\")\n                dm.semantic_identifier = d.get(\"name\", \"Source\")\n                dm.link = d.get(\"link\")\n                doc_mocks.append(dm)\n            response.top_documents = doc_mocks\n        else:\n            response.top_documents = None\n\n        return response\n\n    def test_format_citations_empty_list(self) -> None:\n        \"\"\"No citations returns answer unchanged.\"\"\"\n        response = self._make_response(\"Test answer\")\n        result = _append_citations(\"Test answer\", response)\n        assert result == \"Test answer\"\n        assert \"Sources:\" not in result\n\n    def test_format_citations_single(self) -> None:\n        \"\"\"Single citation is formatted correctly.\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[{\"num\": 1, \"doc_id\": \"doc1\"}],\n            documents=[\n                {\n                    \"doc_id\": \"doc1\",\n                    \"name\": \"Document One\",\n                    \"link\": \"https://example.com\",\n                }\n            ],\n        )\n        result = _append_citations(\"Test answer\", response)\n        assert \"**Sources:**\" in result\n        assert \"[Document One](<https://example.com>)\" in result\n\n    def test_format_citations_multiple(self) -> None:\n        \"\"\"Multiple citations are all formatted and numbered.\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[\n                {\"num\": 1, \"doc_id\": \"doc1\"},\n                {\"num\": 2, \"doc_id\": \"doc2\"},\n                {\"num\": 3, \"doc_id\": \"doc3\"},\n            ],\n            documents=[\n                {\"doc_id\": \"doc1\", \"name\": \"Doc 1\", \"link\": \"https://example.com/1\"},\n                {\"doc_id\": \"doc2\", \"name\": \"Doc 2\", \"link\": \"https://example.com/2\"},\n                {\"doc_id\": \"doc3\", \"name\": \"Doc 3\", \"link\": \"https://example.com/3\"},\n            ],\n        )\n        result = _append_citations(\"Test answer\", response)\n        assert \"1. [Doc 1]\" in result\n        assert \"2. [Doc 2]\" in result\n        assert \"3. [Doc 3]\" in result\n\n    def test_format_citations_max_five(self) -> None:\n        \"\"\"Only first 5 citations are included.\"\"\"\n        citations = [{\"num\": i, \"doc_id\": f\"doc{i}\"} for i in range(1, 11)]\n        documents = [\n            {\n                \"doc_id\": f\"doc{i}\",\n                \"name\": f\"Doc {i}\",\n                \"link\": f\"https://example.com/{i}\",\n            }\n            for i in range(1, 11)\n        ]\n        response = self._make_response(\n            \"Test answer\", citations=citations, documents=documents\n        )\n        result = _append_citations(\"Test answer\", response)\n\n        # Should have 5 citations\n        assert \"1. [Doc 1]\" in result\n        assert \"5. [Doc 5]\" in result\n        # Should NOT have 6th citation\n        assert \"6. [Doc 6]\" not in result\n\n    def test_format_citation_no_link(self) -> None:\n        \"\"\"Citation without link formats as plain text (no markdown).\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[{\"num\": 1, \"doc_id\": \"doc1\"}],\n            documents=[{\"doc_id\": \"doc1\", \"name\": \"No Link Doc\", \"link\": None}],\n        )\n        result = _append_citations(\"Test answer\", response)\n        assert \"1. No Link Doc\" in result\n        # Should not have markdown link syntax\n        assert \"[No Link Doc](<\" not in result\n\n    def test_format_citation_empty_name(self) -> None:\n        \"\"\"Empty semantic_identifier defaults to 'Source'.\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[{\"num\": 1, \"doc_id\": \"doc1\"}],\n            documents=[{\"doc_id\": \"doc1\", \"name\": \"\", \"link\": \"https://example.com\"}],\n        )\n        result = _append_citations(\"Test answer\", response)\n        # Should use fallback \"Source\" name\n        assert \"[Source]\" in result or \"Source\" in result\n\n    def test_format_citation_link_with_brackets(self) -> None:\n        \"\"\"Link with special characters is wrapped with angle brackets.\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[{\"num\": 1, \"doc_id\": \"doc1\"}],\n            documents=[\n                {\n                    \"doc_id\": \"doc1\",\n                    \"name\": \"Special Doc\",\n                    \"link\": \"https://example.com/path?query=value&other=123\",\n                }\n            ],\n        )\n        result = _append_citations(\"Test answer\", response)\n        # Discord markdown uses <link> to prevent embed\n        assert \"(<https://example.com\" in result\n\n    def test_format_citations_sorted_by_number(self) -> None:\n        \"\"\"Citations are sorted by citation number.\"\"\"\n        # Add in reverse order\n        response = self._make_response(\n            \"Test answer\",\n            citations=[\n                {\"num\": 3, \"doc_id\": \"doc3\"},\n                {\"num\": 1, \"doc_id\": \"doc1\"},\n                {\"num\": 2, \"doc_id\": \"doc2\"},\n            ],\n            documents=[\n                {\"doc_id\": \"doc1\", \"name\": \"Doc 1\", \"link\": \"https://example.com/1\"},\n                {\"doc_id\": \"doc2\", \"name\": \"Doc 2\", \"link\": \"https://example.com/2\"},\n                {\"doc_id\": \"doc3\", \"name\": \"Doc 3\", \"link\": \"https://example.com/3\"},\n            ],\n        )\n        result = _append_citations(\"Test answer\", response)\n\n        # Find positions\n        pos1 = result.find(\"1. [Doc 1]\")\n        pos2 = result.find(\"2. [Doc 2]\")\n        pos3 = result.find(\"3. [Doc 3]\")\n\n        # Should be in order\n        assert pos1 < pos2 < pos3\n\n    def test_format_citations_with_missing_document(self) -> None:\n        \"\"\"Citation referencing non-existent document is skipped.\"\"\"\n        response = self._make_response(\n            \"Test answer\",\n            citations=[\n                {\"num\": 1, \"doc_id\": \"doc1\"},\n                {\"num\": 2, \"doc_id\": \"doc_missing\"},  # No matching document\n            ],\n            documents=[\n                {\"doc_id\": \"doc1\", \"name\": \"Doc 1\", \"link\": \"https://example.com/1\"},\n            ],\n        )\n        result = _append_citations(\"Test answer\", response)\n        assert \"Doc 1\" in result\n        # Missing doc should not appear\n        assert \"doc_missing\" not in result.lower()\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/discord/test_should_respond.py",
    "content": "\"\"\"Unit tests for Discord bot should_respond logic.\n\nTests the decision tree for when the bot should respond to messages.\n\"\"\"\n\nfrom unittest.mock import AsyncMock\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport discord\nimport pytest\n\nfrom onyx.onyxbot.discord.handle_message import check_implicit_invocation\nfrom onyx.onyxbot.discord.handle_message import should_respond\n\n\nclass TestBasicShouldRespond:\n    \"\"\"Tests for basic should_respond decision logic.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_should_respond_guild_disabled(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Guild config enabled=false returns False.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = False\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with patch(\n                \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                return_value=mock_guild_config,\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is False\n\n    @pytest.mark.asyncio\n    async def test_should_respond_guild_enabled(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Guild config enabled=true proceeds to channel check.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is True\n\n    @pytest.mark.asyncio\n    async def test_should_respond_channel_disabled(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Channel config enabled=false returns False.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = False\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is False\n\n    @pytest.mark.asyncio\n    async def test_should_respond_channel_enabled(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Channel config enabled=true proceeds to mention check.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 2\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is True\n        assert result.persona_id == 2\n\n    @pytest.mark.asyncio\n    async def test_should_respond_channel_not_found(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"No channel config returns False (not whitelisted).\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=None,  # No config\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is False\n\n    @pytest.mark.asyncio\n    async def test_should_respond_require_mention_true_no_mention(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"require_bot_invocation=true with no @mention returns False.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = True\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        # No bot mention\n        mock_discord_message.mentions = []\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.check_implicit_invocation\",\n                    return_value=False,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is False\n\n    @pytest.mark.asyncio\n    async def test_should_respond_require_mention_true_with_mention(\n        self, mock_message_with_bot_mention: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"require_bot_invocation=true with @mention returns True.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = True\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_message_with_bot_mention, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is True\n\n    @pytest.mark.asyncio\n    async def test_should_respond_require_mention_false_no_mention(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"require_bot_invocation=false with no @mention returns True.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is True\n\n\nclass TestImplicitShouldRespond:\n    \"\"\"Tests for implicit invocation (no @mention required in certain contexts).\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_reply_to_bot_message(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"User replies to a bot message returns True.\"\"\"\n        # Create a message that replies to the bot\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = 12345\n\n        # Mock the referenced message as a bot message\n        referenced_msg = MagicMock()\n        referenced_msg.author.id = mock_bot_user.id\n\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_reply_to_user_message(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"User replies to another user's message returns False.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = 12345\n\n        # Mock the referenced message as a user message\n        referenced_msg = MagicMock()\n        referenced_msg.author.id = 999999  # Different from bot\n\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_in_bot_owned_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Message in thread owned by bot returns True.\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.owner_id = mock_bot_user.id  # Bot owns the thread\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n        msg.channel = thread\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_in_user_owned_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Message in thread owned by user returns False.\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.owner_id = 999999  # User owns the thread\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n        msg.channel = thread\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_reply_in_bot_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Reply to user in bot-owned thread returns True (thread context).\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.owner_id = mock_bot_user.id\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        # User replying to another user in bot's thread\n        referenced_msg = MagicMock()\n        referenced_msg.author.id = 888888  # Another user\n\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = 12345\n        msg.channel = thread\n        msg.channel.fetch_message = AsyncMock(return_value=referenced_msg)\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        # Should return True because it's in bot's thread\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_thread_from_bot_message(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread created from bot message (non-forum) returns True.\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 777777\n        thread.owner_id = 999999  # User owns thread but...\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        # The starter message is from the bot\n        starter_msg = MagicMock()\n        starter_msg.author.id = mock_bot_user.id\n        thread.parent.fetch_message = AsyncMock(return_value=starter_msg)\n\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n        msg.channel = thread\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_forum_channel_excluded(\n        self, mock_bot_user: MagicMock, mock_thread_forum_parent: MagicMock\n    ) -> None:\n        \"\"\"Thread parent is ForumChannel - does NOT check starter message.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n        msg.channel = mock_thread_forum_parent\n        mock_thread_forum_parent.owner_id = 999999  # Not bot\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        # Should be False - forum threads don't use starter message check\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_combined_with_mention(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Has @mention AND is implicit - should return True (either works).\"\"\"\n        thread = MagicMock(spec=discord.Thread)\n        thread.owner_id = mock_bot_user.id\n        thread.parent = MagicMock(spec=discord.TextChannel)\n\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = None\n        msg.channel = thread\n        msg.mentions = [mock_bot_user]\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is True\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_reference_fetch_fails(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"discord.NotFound when fetching reply reference returns False.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = 12345\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(\n            side_effect=discord.NotFound(MagicMock(), \"Not found\")\n        )\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is False\n\n    @pytest.mark.asyncio\n    async def test_implicit_respond_http_exception(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"discord.HTTPException during check returns False.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.reference = MagicMock()\n        msg.reference.message_id = 12345\n        msg.channel = MagicMock()\n        msg.channel.fetch_message = AsyncMock(\n            side_effect=discord.HTTPException(MagicMock(), \"HTTP error\")\n        )\n\n        result = await check_implicit_invocation(msg, mock_bot_user)\n        assert result is False\n\n\nclass TestThreadOnlyMode:\n    \"\"\"Tests for thread_only_mode behavior.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_thread_only_mode_message_in_thread(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"thread_only_mode=true, message in thread returns True.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = True\n        mock_channel_config.persona_override_id = None\n\n        # Create thread message\n        thread = MagicMock(spec=discord.Thread)\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.id = 111111111\n\n        msg = MagicMock(spec=discord.Message)\n        msg.guild = MagicMock()\n        msg.guild.id = 987654321\n        msg.channel = thread\n        msg.mentions = []\n        msg.reference = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(msg, \"tenant1\", mock_bot_user)\n\n        assert result.should_respond is True\n        assert result.thread_only_mode is True\n\n    @pytest.mark.asyncio\n    async def test_thread_only_mode_false_message_in_channel(\n        self, mock_discord_message: MagicMock, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"thread_only_mode=false, message in channel returns True.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(\n                    mock_discord_message, \"tenant1\", mock_bot_user\n                )\n\n        assert result.should_respond is True\n        assert result.thread_only_mode is False\n\n\nclass TestEdgeCases:\n    \"\"\"Edge case tests for should_respond.\"\"\"\n\n    @pytest.mark.asyncio\n    async def test_should_respond_no_guild(self, mock_bot_user: MagicMock) -> None:\n        \"\"\"Message without guild (DM) returns False.\"\"\"\n        msg = MagicMock(spec=discord.Message)\n        msg.guild = None\n\n        result = await should_respond(msg, \"tenant1\", mock_bot_user)\n        assert result.should_respond is False\n\n    @pytest.mark.asyncio\n    async def test_should_respond_thread_uses_parent_channel_config(\n        self, mock_bot_user: MagicMock\n    ) -> None:\n        \"\"\"Thread under channel uses parent channel's config.\"\"\"\n        mock_guild_config = MagicMock()\n        mock_guild_config.enabled = True\n        mock_guild_config.default_persona_id = 1\n\n        mock_channel_config = MagicMock()\n        mock_channel_config.enabled = True\n        mock_channel_config.require_bot_invocation = False\n        mock_channel_config.thread_only_mode = False\n        mock_channel_config.persona_override_id = 5  # Specific persona\n\n        # Create thread message\n        thread = MagicMock(spec=discord.Thread)\n        thread.id = 666666\n        thread.parent = MagicMock(spec=discord.TextChannel)\n        thread.parent.id = 111111111  # Parent channel ID\n\n        msg = MagicMock(spec=discord.Message)\n        msg.guild = MagicMock()\n        msg.guild.id = 987654321\n        msg.channel = thread\n        msg.mentions = []\n        msg.reference = None\n\n        with patch(\n            \"onyx.onyxbot.discord.handle_message.get_session_with_tenant\"\n        ) as mock_session:\n            mock_db = MagicMock()\n            mock_session.return_value.__enter__ = MagicMock(return_value=mock_db)\n            mock_session.return_value.__exit__ = MagicMock()\n\n            with (\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_guild_config_by_discord_id\",\n                    return_value=mock_guild_config,\n                ),\n                patch(\n                    \"onyx.onyxbot.discord.handle_message.get_channel_config_by_discord_ids\",\n                    return_value=mock_channel_config,\n                ),\n            ):\n                result = await should_respond(msg, \"tenant1\", mock_bot_user)\n\n        assert result.should_respond is True\n        # Should use parent's persona override\n        assert result.persona_id == 5\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/test_handle_regular_answer.py",
    "content": "\"\"\"Tests for Slack channel reference resolution and tag filtering\nin handle_regular_answer.py.\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom slack_sdk.errors import SlackApiError\n\nfrom onyx.context.search.models import Tag\nfrom onyx.onyxbot.slack.constants import SLACK_CHANNEL_REF_PATTERN\nfrom onyx.onyxbot.slack.handlers.handle_regular_answer import resolve_channel_references\n\n\n# ---------------------------------------------------------------------------\n# Helpers\n# ---------------------------------------------------------------------------\n\n\ndef _mock_client_with_channels(\n    channel_map: dict[str, str],\n) -> MagicMock:\n    \"\"\"Return a mock WebClient where conversations_info resolves IDs to names.\"\"\"\n    client = MagicMock()\n\n    def _conversations_info(channel: str) -> MagicMock:\n        if channel in channel_map:\n            resp = MagicMock()\n            resp.validate = MagicMock()\n            resp.__getitem__ = lambda _self, key: {\n                \"channel\": {\n                    \"name\": channel_map[channel],\n                    \"is_im\": False,\n                    \"is_mpim\": False,\n                }\n            }[key]\n            return resp\n        raise SlackApiError(\"channel_not_found\", response=MagicMock())\n\n    client.conversations_info = _conversations_info\n    return client\n\n\ndef _mock_logger() -> MagicMock:\n    return MagicMock()\n\n\n# ---------------------------------------------------------------------------\n# SLACK_CHANNEL_REF_PATTERN regex tests\n# ---------------------------------------------------------------------------\n\n\nclass TestSlackChannelRefPattern:\n    def test_matches_bare_channel_id(self) -> None:\n        matches = SLACK_CHANNEL_REF_PATTERN.findall(\"<#C097NBWMY8Y>\")\n        assert matches == [(\"C097NBWMY8Y\", \"\")]\n\n    def test_matches_channel_id_with_name(self) -> None:\n        matches = SLACK_CHANNEL_REF_PATTERN.findall(\"<#C097NBWMY8Y|eng-infra>\")\n        assert matches == [(\"C097NBWMY8Y\", \"eng-infra\")]\n\n    def test_matches_multiple_channels(self) -> None:\n        msg = \"compare <#C111AAA> and <#C222BBB|general>\"\n        matches = SLACK_CHANNEL_REF_PATTERN.findall(msg)\n        assert len(matches) == 2\n        assert (\"C111AAA\", \"\") in matches\n        assert (\"C222BBB\", \"general\") in matches\n\n    def test_no_match_on_plain_text(self) -> None:\n        matches = SLACK_CHANNEL_REF_PATTERN.findall(\"no channels here\")\n        assert matches == []\n\n    def test_no_match_on_user_mention(self) -> None:\n        matches = SLACK_CHANNEL_REF_PATTERN.findall(\"<@U12345>\")\n        assert matches == []\n\n\n# ---------------------------------------------------------------------------\n# resolve_channel_references tests\n# ---------------------------------------------------------------------------\n\n\nclass TestResolveChannelReferences:\n    def test_resolves_bare_channel_id_via_api(self) -> None:\n        client = _mock_client_with_channels({\"C097NBWMY8Y\": \"eng-infra\"})\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"summary of <#C097NBWMY8Y> this week\",\n            client=client,\n            logger=logger,\n        )\n\n        assert message == \"summary of #eng-infra this week\"\n        assert len(tags) == 1\n        assert tags[0] == Tag(tag_key=\"Channel\", tag_value=\"eng-infra\")\n\n    def test_uses_name_from_pipe_format_without_api_call(self) -> None:\n        client = MagicMock()\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"check <#C097NBWMY8Y|eng-infra> for updates\",\n            client=client,\n            logger=logger,\n        )\n\n        assert message == \"check #eng-infra for updates\"\n        assert tags == [Tag(tag_key=\"Channel\", tag_value=\"eng-infra\")]\n        # Should NOT have called the API since name was in the markup\n        client.conversations_info.assert_not_called()\n\n    def test_multiple_channels(self) -> None:\n        client = _mock_client_with_channels(\n            {\n                \"C111AAA\": \"eng-infra\",\n                \"C222BBB\": \"eng-general\",\n            }\n        )\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"compare <#C111AAA> and <#C222BBB>\",\n            client=client,\n            logger=logger,\n        )\n\n        assert \"#eng-infra\" in message\n        assert \"#eng-general\" in message\n        assert \"<#\" not in message\n        assert len(tags) == 2\n        tag_values = {t.tag_value for t in tags}\n        assert tag_values == {\"eng-infra\", \"eng-general\"}\n\n    def test_no_channel_references_returns_unchanged(self) -> None:\n        client = MagicMock()\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"just a normal message with no channels\",\n            client=client,\n            logger=logger,\n        )\n\n        assert message == \"just a normal message with no channels\"\n        assert tags == []\n\n    def test_api_failure_skips_channel_gracefully(self) -> None:\n        # Client that fails for all channel lookups\n        client = _mock_client_with_channels({})\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"check <#CBADID123>\",\n            client=client,\n            logger=logger,\n        )\n\n        # Message should remain unchanged for the failed channel\n        assert \"<#CBADID123>\" in message\n        assert tags == []\n        logger.warning.assert_called_once()\n\n    def test_partial_failure_resolves_what_it_can(self) -> None:\n        # Only one of two channels resolves\n        client = _mock_client_with_channels({\"C111AAA\": \"eng-infra\"})\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"compare <#C111AAA> and <#CBADID123>\",\n            client=client,\n            logger=logger,\n        )\n\n        assert \"#eng-infra\" in message\n        assert \"<#CBADID123>\" in message  # failed one stays raw\n        assert len(tags) == 1\n        assert tags[0].tag_value == \"eng-infra\"\n\n    def test_duplicate_channel_produces_single_tag(self) -> None:\n        client = _mock_client_with_channels({\"C111AAA\": \"eng-infra\"})\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"summarize <#C111AAA> and compare with <#C111AAA>\",\n            client=client,\n            logger=logger,\n        )\n\n        assert message == \"summarize #eng-infra and compare with #eng-infra\"\n        assert len(tags) == 1\n        assert tags[0].tag_value == \"eng-infra\"\n\n    def test_mixed_pipe_and_bare_formats(self) -> None:\n        client = _mock_client_with_channels({\"C222BBB\": \"random\"})\n        logger = _mock_logger()\n\n        message, tags = resolve_channel_references(\n            message=\"see <#C111AAA|eng-infra> and <#C222BBB>\",\n            client=client,\n            logger=logger,\n        )\n\n        assert \"#eng-infra\" in message\n        assert \"#random\" in message\n        assert len(tags) == 2\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/test_slack_blocks.py",
    "content": "from datetime import datetime\n\nimport pytest\nimport pytz\nimport timeago  # type: ignore\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.context.search.models import SavedSearchDoc\nfrom onyx.onyxbot.slack.blocks import _build_documents_blocks\n\n\ndef _make_saved_doc(updated_at: datetime | None) -> SavedSearchDoc:\n    return SavedSearchDoc(\n        db_doc_id=1,\n        document_id=\"doc-1\",\n        chunk_ind=0,\n        semantic_identifier=\"Example Doc\",\n        link=\"https://example.com\",\n        blurb=\"Some blurb\",\n        source_type=DocumentSource.FILE,\n        boost=0,\n        hidden=False,\n        metadata={},\n        score=0.0,\n        match_highlights=[],\n        updated_at=updated_at,\n        primary_owners=[\"user@example.com\"],\n        secondary_owners=None,\n        is_relevant=None,\n        relevance_explanation=None,\n        is_internet=False,\n    )\n\n\ndef test_build_documents_blocks_formats_naive_timestamp(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    naive_timestamp: datetime = datetime(2024, 1, 1, 12, 0, 0)\n    captured: dict[str, datetime] = {}\n\n    # Save the original timeago.format so we can call it inside the fake\n    original_timeago_format = timeago.format\n\n    def fake_timeago_format(doc_dt: datetime, now: datetime) -> str:\n        captured[\"doc\"] = doc_dt\n        result = original_timeago_format(doc_dt, now)\n        captured[\"result\"] = result\n        return result\n\n    monkeypatch.setattr(\n        \"onyx.onyxbot.slack.blocks.timeago.format\",\n        fake_timeago_format,\n    )\n\n    blocks = _build_documents_blocks(\n        documents=[_make_saved_doc(updated_at=naive_timestamp)],\n        message_id=42,\n    )\n\n    assert len(blocks) >= 2\n    section_block = blocks[1].to_dict()\n    assert \"result\" in captured\n    expected_text = (\n        f\"<https://example.com|Example Doc>\\n_Updated {captured['result']}_\\n>\"\n    )\n    assert section_block[\"text\"][\"text\"] == expected_text\n\n    assert \"doc\" in captured\n    formatted_timestamp: datetime = captured[\"doc\"]\n    expected_timestamp: datetime = naive_timestamp.replace(tzinfo=pytz.utc)\n    assert formatted_timestamp == expected_timestamp\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/test_slack_channel_config.py",
    "content": "from unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.db.slack_channel_config import create_slack_channel_persona\n\n\ndef test_create_slack_channel_persona_reuses_existing_persona() -> None:\n    db_session = MagicMock()\n    existing_persona = MagicMock()\n    existing_persona.id = 42\n    db_session.scalar.return_value = existing_persona\n\n    fake_tool = MagicMock()\n    fake_tool.id = 7\n\n    with (\n        patch(\n            \"onyx.db.slack_channel_config.get_builtin_tool\",\n            return_value=fake_tool,\n        ),\n        patch(\"onyx.db.slack_channel_config.upsert_persona\") as mock_upsert,\n    ):\n        mock_upsert.return_value = MagicMock()\n\n        create_slack_channel_persona(\n            db_session=db_session,\n            channel_name=\"general\",\n            document_set_ids=[1],\n        )\n\n    mock_upsert.assert_called_once()\n    assert mock_upsert.call_args.kwargs[\"persona_id\"] == existing_persona.id\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/test_slack_formatting.py",
    "content": "from onyx.onyxbot.slack.formatting import _convert_slack_links_to_markdown\nfrom onyx.onyxbot.slack.formatting import _normalize_link_destinations\nfrom onyx.onyxbot.slack.formatting import _sanitize_html\nfrom onyx.onyxbot.slack.formatting import _transform_outside_code_blocks\nfrom onyx.onyxbot.slack.formatting import format_slack_message\nfrom onyx.onyxbot.slack.utils import remove_slack_text_interactions\nfrom onyx.utils.text_processing import decode_escapes\n\n\ndef test_normalize_citation_link_wraps_url_with_parentheses() -> None:\n    message = (\n        \"See [[1]](https://example.com/Access%20ID%20Card(s)%20Guide.pdf) for details.\"\n    )\n\n    normalized = _normalize_link_destinations(message)\n\n    assert (\n        \"See [[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>) for details.\"\n        == normalized\n    )\n\n\ndef test_normalize_citation_link_keeps_existing_angle_brackets() -> None:\n    message = \"[[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>)\"\n\n    normalized = _normalize_link_destinations(message)\n\n    assert message == normalized\n\n\ndef test_normalize_citation_link_handles_multiple_links() -> None:\n    message = \"[[1]](https://example.com/(USA)%20Guide.pdf) [[2]](https://example.com/Plan(s)%20Overview.pdf)\"\n\n    normalized = _normalize_link_destinations(message)\n\n    assert \"[[1]](<https://example.com/(USA)%20Guide.pdf>)\" in normalized\n    assert \"[[2]](<https://example.com/Plan(s)%20Overview.pdf>)\" in normalized\n\n\ndef test_format_slack_message_keeps_parenthesized_citation_links_intact() -> None:\n    message = (\n        \"Download [[1]](https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf)\"\n    )\n\n    formatted = format_slack_message(message)\n    rendered = decode_escapes(remove_slack_text_interactions(formatted))\n\n    assert (\n        \"<https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf|[1]>\"\n        in rendered\n    )\n    assert \"|[1]>%20Access%20ID%20Card\" not in rendered\n\n\ndef test_slack_style_links_converted_to_clickable_links() -> None:\n    message = \"Visit <https://example.com/page|Example Page> for details.\"\n\n    formatted = format_slack_message(message)\n\n    assert \"<https://example.com/page|Example Page>\" in formatted\n    assert \"&lt;\" not in formatted\n\n\ndef test_slack_style_links_preserved_inside_code_blocks() -> None:\n    message = \"```\\n<https://example.com|click>\\n```\"\n\n    converted = _convert_slack_links_to_markdown(message)\n\n    assert \"<https://example.com|click>\" in converted\n\n\ndef test_html_tags_stripped_outside_code_blocks() -> None:\n    message = \"Hello<br/>world ```<div>code</div>``` after\"\n\n    sanitized = _transform_outside_code_blocks(message, _sanitize_html)\n\n    assert \"<br\" not in sanitized\n    assert \"<div>code</div>\" in sanitized\n\n\ndef test_format_slack_message_block_spacing() -> None:\n    message = \"Paragraph one.\\n\\nParagraph two.\"\n\n    formatted = format_slack_message(message)\n\n    assert \"Paragraph one.\\n\\nParagraph two.\" == formatted\n\n\ndef test_format_slack_message_code_block_no_trailing_blank_line() -> None:\n    message = \"```python\\nprint('hi')\\n```\"\n\n    formatted = format_slack_message(message)\n\n    assert formatted.endswith(\"print('hi')\\n```\")\n\n\ndef test_format_slack_message_ampersand_not_double_escaped() -> None:\n    message = 'She said \"hello\" & goodbye.'\n\n    formatted = format_slack_message(message)\n\n    assert \"&amp;\" in formatted\n    assert \"&quot;\" not in formatted\n\n\n# -- Table rendering tests --\n\n\ndef test_table_renders_as_vertical_cards() -> None:\n    message = \"| Feature | Status | Owner |\\n|---------|--------|-------|\\n| Auth | Done | Alice |\\n| Search | In Progress | Bob |\\n\"\n\n    formatted = format_slack_message(message)\n\n    assert \"*Auth*\\n  • Status: Done\\n  • Owner: Alice\" in formatted\n    assert \"*Search*\\n  • Status: In Progress\\n  • Owner: Bob\" in formatted\n    # Cards separated by blank line\n    assert \"Owner: Alice\\n\\n*Search*\" in formatted\n    # No raw pipe-and-dash table syntax\n    assert \"---|\" not in formatted\n\n\ndef test_table_single_column() -> None:\n    message = \"| Name |\\n|------|\\n| Alice |\\n| Bob |\\n\"\n\n    formatted = format_slack_message(message)\n\n    assert \"*Alice*\" in formatted\n    assert \"*Bob*\" in formatted\n\n\ndef test_table_embedded_in_text() -> None:\n    message = \"Here are the results:\\n\\n| Item | Count |\\n|------|-------|\\n| Apples | 5 |\\n\\nThat's all.\"\n\n    formatted = format_slack_message(message)\n\n    assert \"Here are the results:\" in formatted\n    assert \"*Apples*\\n  • Count: 5\" in formatted\n    assert \"That's all.\" in formatted\n\n\ndef test_table_with_formatted_cells() -> None:\n    message = \"| Name | Link |\\n|------|------|\\n| **Alice** | [profile](https://example.com) |\\n\"\n\n    formatted = format_slack_message(message)\n\n    # Bold cell should not double-wrap: *Alice* not **Alice**\n    assert \"*Alice*\" in formatted\n    assert \"**Alice**\" not in formatted\n    assert \"<https://example.com|profile>\" in formatted\n\n\ndef test_table_with_alignment_specifiers() -> None:\n    message = \"| Left | Center | Right |\\n|:-----|:------:|------:|\\n| a | b | c |\\n\"\n\n    formatted = format_slack_message(message)\n\n    assert \"*a*\\n  • Center: b\\n  • Right: c\" in formatted\n\n\ndef test_two_tables_in_same_message_use_independent_headers() -> None:\n    message = \"| A | B |\\n|---|---|\\n| 1 | 2 |\\n\\n| X | Y | Z |\\n|---|---|---|\\n| p | q | r |\\n\"\n\n    formatted = format_slack_message(message)\n\n    assert \"*1*\\n  • B: 2\" in formatted\n    assert \"*p*\\n  • Y: q\\n  • Z: r\" in formatted\n\n\ndef test_table_empty_first_column_no_bare_asterisks() -> None:\n    message = \"| Name | Status |\\n|------|--------|\\n| | Done |\\n\"\n\n    formatted = format_slack_message(message)\n\n    # Empty title should not produce \"**\" (bare asterisks)\n    assert \"**\" not in formatted\n    assert \"  • Status: Done\" in formatted\n"
  },
  {
    "path": "backend/tests/unit/onyx/onyxbot/test_slack_gating.py",
    "content": "\"\"\"Tests for Slack bot gating and seat limit enforcement.\"\"\"\n\nfrom collections.abc import Generator\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.settings.models import ApplicationStatus\n\n# ---------------------------------------------------------------------------\n# Shared helpers\n# ---------------------------------------------------------------------------\n\n_HANDLE_MSG = \"onyx.onyxbot.slack.handlers.handle_message\"\n_LISTENER = \"onyx.onyxbot.slack.listener\"\n\n\ndef _make_socket_request(\n    req_type: str = \"events_api\",\n    event: dict | None = None,\n) -> MagicMock:\n    \"\"\"Create a mock SocketModeRequest.\"\"\"\n    req = MagicMock()\n    req.type = req_type\n    if req_type == \"events_api\":\n        req.payload = {\n            \"event\": event or {\"type\": \"message\", \"channel\": \"C123\", \"ts\": \"1234.5678\"}\n        }\n    elif req_type == \"slash_commands\":\n        req.payload = {\"channel_id\": \"C123\"}\n    else:\n        req.payload = {}\n    return req\n\n\ndef _make_license_metadata(\n    status: ApplicationStatus = ApplicationStatus.ACTIVE,\n) -> MagicMock:\n    \"\"\"Create a mock LicenseMetadata.\"\"\"\n    metadata = MagicMock()\n    metadata.status = status\n    return metadata\n\n\ndef _ee_side_effect(\n    is_gated: bool = False,\n    metadata: Any = None,\n) -> list:\n    \"\"\"Build fetch_ee_implementation_or_noop side_effect for gating tests.\n\n    Returns callables for: [is_tenant_gated, get_cached_license_metadata].\n    \"\"\"\n    return [\n        lambda *_a, **_kw: is_gated,\n        lambda *_a, **_kw: metadata,\n    ]\n\n\ndef _make_message_info(email: str = \"user@test.com\") -> MagicMock:\n    \"\"\"Create a mock SlackMessageInfo for handle_message tests.\"\"\"\n    info = MagicMock()\n    info.channel_to_respond = \"C123\"\n    info.thread_messages = [MagicMock(message=\"test?\")]\n    info.sender_id = \"U123\"\n    info.bypass_filters = False\n    info.is_slash_command = False\n    info.is_bot_dm = False\n    info.email = email\n    info.msg_to_respond = \"1234.5678\"\n    return info\n\n\ndef _make_channel_config() -> MagicMock:\n    \"\"\"Create a mock SlackChannelConfig.\"\"\"\n    config = MagicMock()\n    config.persona = None\n    config.channel_config = {}\n    return config\n\n\n# ---------------------------------------------------------------------------\n# _check_tenant_gated\n# ---------------------------------------------------------------------------\n\n\nclass TestCheckTenantGated:\n    \"\"\"Tests for _check_tenant_gated function.\"\"\"\n\n    @pytest.fixture(autouse=True)\n    def _patch_tenant_id(self) -> Any:\n        with patch(f\"{_LISTENER}.get_current_tenant_id\", return_value=\"public\"):\n            yield\n\n    def _call(\n        self,\n        _mock_fetch_ee: MagicMock,\n        event: dict | None = None,\n    ) -> tuple[bool, MagicMock]:\n        \"\"\"Call _check_tenant_gated with a fresh client + request.\"\"\"\n        from onyx.onyxbot.slack.listener import _check_tenant_gated\n\n        client = MagicMock()\n        client.web_client = MagicMock()\n        req = _make_socket_request(event=event)\n        result = _check_tenant_gated(client, req)\n        return result, client\n\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_active_license_not_gated(self, mock_fetch_ee: MagicMock) -> None:\n        metadata = _make_license_metadata()\n        mock_fetch_ee.side_effect = _ee_side_effect(metadata=metadata)\n\n        result, _ = self._call(mock_fetch_ee)\n        assert result is False\n\n    @patch(f\"{_LISTENER}.respond_in_thread_or_channel\")\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_multi_tenant_gated_blocks_and_responds(\n        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock\n    ) -> None:\n        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)\n\n        result, _ = self._call(mock_fetch_ee)\n\n        assert result is True\n        mock_respond.assert_called_once()\n        assert \"subscription has expired\" in mock_respond.call_args[1][\"text\"]\n\n    @patch(f\"{_LISTENER}.respond_in_thread_or_channel\")\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_gated_access_status_blocks(\n        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock\n    ) -> None:\n        metadata = _make_license_metadata(status=ApplicationStatus.GATED_ACCESS)\n        mock_fetch_ee.side_effect = _ee_side_effect(metadata=metadata)\n\n        result, _ = self._call(mock_fetch_ee)\n\n        assert result is True\n        mock_respond.assert_called_once()\n\n    @pytest.mark.parametrize(\n        \"event\",\n        [\n            {\"type\": \"message\", \"channel\": \"C123\", \"bot_id\": \"B456\", \"ts\": \"1\"},\n            {\n                \"type\": \"message\",\n                \"channel\": \"C123\",\n                \"bot_profile\": {\"id\": \"B456\"},\n                \"ts\": \"1\",\n            },\n            {\"type\": \"message\", \"channel\": \"C123\", \"subtype\": \"bot_message\", \"ts\": \"1\"},\n        ],\n        ids=[\"bot_id\", \"bot_profile\", \"subtype_bot_message\"],\n    )\n    @patch(f\"{_LISTENER}.respond_in_thread_or_channel\")\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_bot_message_no_response_sent(\n        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock, event: dict\n    ) -> None:\n        \"\"\"Bot messages are blocked but no response is sent (prevents loop).\"\"\"\n        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)\n\n        result, _ = self._call(mock_fetch_ee, event=event)\n\n        assert result is True\n        mock_respond.assert_not_called()\n\n    @patch(f\"{_LISTENER}.respond_in_thread_or_channel\")\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_app_mention_no_response_sent(\n        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock\n    ) -> None:\n        \"\"\"app_mention events are blocked silently (dedup with message event).\"\"\"\n        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)\n\n        result, _ = self._call(\n            mock_fetch_ee,\n            event={\"type\": \"app_mention\", \"channel\": \"C123\", \"ts\": \"1\"},\n        )\n\n        assert result is True\n        mock_respond.assert_not_called()\n\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_no_license_metadata_not_gated(self, mock_fetch_ee: MagicMock) -> None:\n        \"\"\"No license metadata (CE mode) means not gated.\"\"\"\n        mock_fetch_ee.side_effect = _ee_side_effect(metadata=None)\n\n        result, _ = self._call(mock_fetch_ee)\n        assert result is False\n\n    @patch(f\"{_LISTENER}.respond_in_thread_or_channel\")\n    @patch(f\"{_LISTENER}.fetch_ee_implementation_or_noop\")\n    def test_response_uses_thread_ts(\n        self, mock_fetch_ee: MagicMock, mock_respond: MagicMock\n    ) -> None:\n        mock_fetch_ee.side_effect = _ee_side_effect(is_gated=True)\n\n        self._call(\n            mock_fetch_ee,\n            event={\n                \"type\": \"message\",\n                \"channel\": \"C123\",\n                \"thread_ts\": \"1111.0000\",\n                \"ts\": \"2222.0000\",\n            },\n        )\n\n        assert mock_respond.call_args[1][\"thread_ts\"] == \"1111.0000\"\n\n\n# ---------------------------------------------------------------------------\n# _extract_channel_from_request\n# ---------------------------------------------------------------------------\n\n\nclass TestExtractChannelFromRequest:\n    \"\"\"Tests for _extract_channel_from_request function.\"\"\"\n\n    @pytest.mark.parametrize(\n        \"req_type, payload, expected\",\n        [\n            (\"events_api\", {\"event\": {\"channel\": \"C123\"}}, \"C123\"),\n            (\"slash_commands\", {\"channel_id\": \"C456\"}, \"C456\"),\n            (\"interactive\", {\"container\": {\"channel_id\": \"C789\"}}, \"C789\"),\n            (\"unknown\", {}, None),\n        ],\n    )\n    def test_channel_extraction(\n        self, req_type: str, payload: dict, expected: str | None\n    ) -> None:\n        from onyx.onyxbot.slack.listener import _extract_channel_from_request\n\n        req = MagicMock()\n        req.type = req_type\n        req.payload = payload\n        assert _extract_channel_from_request(req) == expected\n\n\n# ---------------------------------------------------------------------------\n# handle_message seat check\n# ---------------------------------------------------------------------------\n\n\nclass TestHandleMessageSeatCheck:\n    \"\"\"Tests for seat limit enforcement in handle_message.\"\"\"\n\n    @pytest.fixture(autouse=True)\n    def _common_patches(self) -> Any:\n        \"\"\"Patch side-effect-only dependencies that every test needs.\"\"\"\n        with (\n            patch(f\"{_HANDLE_MSG}.slack_usage_report\"),\n            patch(f\"{_HANDLE_MSG}.send_msg_ack_to_user\"),\n        ):\n            yield\n\n    @pytest.fixture\n    def db_session(self) -> Generator[MagicMock, None, None]:\n        with patch(f\"{_HANDLE_MSG}.get_session_with_current_tenant\") as mock:\n            session = MagicMock()\n            mock.return_value.__enter__ = MagicMock(return_value=session)\n            mock.return_value.__exit__ = MagicMock(return_value=False)\n            yield session\n\n    def _call_handle_message(\n        self, client: MagicMock | None = None, email: str = \"user@test.com\"\n    ) -> bool:\n        from onyx.onyxbot.slack.handlers.handle_message import handle_message\n\n        return handle_message(\n            message_info=_make_message_info(email),\n            slack_channel_config=_make_channel_config(),\n            client=client or MagicMock(),\n            feedback_reminder_id=None,\n        )\n\n    @pytest.mark.usefixtures(\"db_session\")\n    @patch(f\"{_HANDLE_MSG}.respond_in_thread_or_channel\")\n    @patch(f\"{_HANDLE_MSG}.fetch_ee_implementation_or_noop\")\n    @patch(f\"{_HANDLE_MSG}.get_user_by_email\", return_value=None)\n    def test_new_user_blocked_when_seats_exceeded(\n        self,\n        _mock_get_user: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_respond: MagicMock,\n    ) -> None:\n        seat_result = MagicMock(available=False, error_message=\"Seat limit exceeded\")\n        mock_fetch_ee.return_value = lambda **_kw: seat_result\n\n        result = self._call_handle_message()\n\n        assert result is False\n        assert \"seat limit\" in mock_respond.call_args[1][\"text\"]\n        assert \"Onyx administrator\" in mock_respond.call_args[1][\"text\"]\n\n    @pytest.mark.usefixtures(\"db_session\")\n    @patch(f\"{_HANDLE_MSG}.handle_regular_answer\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.handle_standard_answers\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.add_slack_user_if_not_exists\")\n    @patch(f\"{_HANDLE_MSG}.fetch_ee_implementation_or_noop\")\n    @patch(f\"{_HANDLE_MSG}.get_user_by_email\")\n    def test_existing_user_bypasses_seat_check(\n        self,\n        mock_get_user: MagicMock,\n        mock_fetch_ee: MagicMock,\n        _mock_add_user: MagicMock,\n        _mock_standard: MagicMock,\n        _mock_regular: MagicMock,\n    ) -> None:\n        mock_get_user.return_value = MagicMock()  # User exists\n\n        self._call_handle_message()\n\n        mock_fetch_ee.assert_not_called()\n\n    @patch(f\"{_HANDLE_MSG}.handle_regular_answer\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.handle_standard_answers\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.add_slack_user_if_not_exists\")\n    @patch(f\"{_HANDLE_MSG}.fetch_ee_implementation_or_noop\")\n    @patch(f\"{_HANDLE_MSG}.get_user_by_email\", return_value=None)\n    def test_new_user_allowed_when_seats_available(\n        self,\n        _mock_get_user: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_add_user: MagicMock,\n        _mock_standard: MagicMock,\n        _mock_regular: MagicMock,\n        db_session: MagicMock,\n    ) -> None:\n        mock_fetch_ee.return_value = lambda **_kw: MagicMock(available=True)\n\n        self._call_handle_message(email=\"new@test.com\")\n\n        mock_add_user.assert_called_once_with(db_session, \"new@test.com\")\n\n    @patch(f\"{_HANDLE_MSG}.handle_regular_answer\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.handle_standard_answers\", return_value=False)\n    @patch(f\"{_HANDLE_MSG}.add_slack_user_if_not_exists\")\n    @patch(f\"{_HANDLE_MSG}.fetch_ee_implementation_or_noop\")\n    @patch(f\"{_HANDLE_MSG}.get_user_by_email\", return_value=None)\n    def test_noop_seat_check_allows_new_user(\n        self,\n        _mock_get_user: MagicMock,\n        mock_fetch_ee: MagicMock,\n        mock_add_user: MagicMock,\n        _mock_standard: MagicMock,\n        _mock_regular: MagicMock,\n        db_session: MagicMock,\n    ) -> None:\n        \"\"\"CE mode: noop returns None, user is allowed.\"\"\"\n        mock_fetch_ee.return_value = lambda **_kw: None\n\n        self._call_handle_message(email=\"new@test.com\")\n\n        mock_add_user.assert_called_once_with(db_session, \"new@test.com\")\n\n\n# ---------------------------------------------------------------------------\n# check_seat_availability\n# ---------------------------------------------------------------------------\n\n\nclass TestCheckSeatAvailability:\n    \"\"\"Tests for check_seat_availability function.\"\"\"\n\n    def _check(self, used: int, total: int) -> Any:\n        from ee.onyx.db.license import check_seat_availability\n\n        metadata = MagicMock(seats=total)\n        with (\n            patch(\"ee.onyx.db.license.get_used_seats\", return_value=used),\n            patch(\"ee.onyx.db.license.get_license_metadata\", return_value=metadata),\n        ):\n            return check_seat_availability(MagicMock())\n\n    def test_seats_available(self) -> None:\n        result = self._check(used=5, total=10)\n        assert result.available is True\n\n    def test_seats_exceeded(self) -> None:\n        result = self._check(used=10, total=10)\n        assert result.available is False\n        assert \"Seat limit\" in result.error_message\n\n    def test_at_capacity_allows_fill(self) -> None:\n        \"\"\"Filling to exactly 100% is allowed (uses > not >=).\"\"\"\n        result = self._check(used=9, total=10)\n        assert result.available is True\n\n    def test_no_license_allows_unlimited(self) -> None:\n        from ee.onyx.db.license import check_seat_availability\n\n        with patch(\"ee.onyx.db.license.get_license_metadata\", return_value=None):\n            result = check_seat_availability(MagicMock())\n            assert result.available is True\n\n\n# ---------------------------------------------------------------------------\n# get_used_seats\n# ---------------------------------------------------------------------------\n\n\nclass TestGetUsedSeats:\n    \"\"\"Tests for get_used_seats — anonymous user exclusion.\"\"\"\n\n    @patch(\"ee.onyx.db.license.MULTI_TENANT\", False)\n    @patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\")\n    def test_excludes_anonymous_user(self, mock_get_session: MagicMock) -> None:\n        from ee.onyx.db.license import get_used_seats\n\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n        mock_session.execute.return_value.scalar.return_value = 3\n\n        assert get_used_seats() == 3\n        mock_session.execute.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/prompts/test_prompt_utils.py",
    "content": "from onyx.prompts.constants import REMINDER_TAG_DESCRIPTION\nfrom onyx.prompts.prompt_utils import replace_reminder_tag\n\n\ndef test_replace_reminder_tag_pattern() -> None:\n    prompt = \"Some text {{REMINDER_TAG_DESCRIPTION}} more text\"\n    result = replace_reminder_tag(prompt)\n    assert \"{{REMINDER_TAG_DESCRIPTION}}\" not in result\n    assert REMINDER_TAG_DESCRIPTION in result\n\n\ndef test_replace_reminder_tag_no_pattern() -> None:\n    prompt = \"Some text without any pattern\"\n    result = replace_reminder_tag(prompt)\n    assert result == prompt\n"
  },
  {
    "path": "backend/tests/unit/onyx/redis_ca.pem",
    "content": "-----BEGIN CERTIFICATE-----\nMIIDXzCCAkegAwIBAgILBAAAAAABIVhTCKIwDQYJKoZIhvcNAQELBQAwTDEgMB4G\nA1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjMxEzARBgNVBAoTCkdsb2JhbFNp\nZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDkwMzE4MTAwMDAwWhcNMjkwMzE4\nMTAwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMzETMBEG\nA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI\nhvcNAQEBBQADggEPADCCAQoCggEBAMwldpB5BngiFvXAg7aEyiie/QV2EcWtiHL8\nRgJDx7KKnQRfJMsuS+FggkbhUqsMgUdwbN1k0ev1LKMPgj0MK66X17YUhhB5uzsT\ngHeMCOFJ0mpiLx9e+pZo34knlTifBtc+ycsmWQ1z3rDI6SYOgxXG71uL0gRgykmm\nKPZpO/bLyCiR5Z2KYVc3rHQU3HTgOu5yLy6c+9C7v/U9AOEGM+iCK65TpjoWc4zd\nQQ4gOsC0p6Hpsk+QLjJg6VfLuQSSaGjlOCZgdbKfd/+RFO+uIEn8rUAVSNECMWEZ\nXriX7613t2Saer9fwRPvm2L7DWzgVGkWqQPabumDk3F2xmmFghcCAwEAAaNCMEAw\nDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFI/wS3+o\nLkUkrk1Q+mOai97i3Ru8MA0GCSqGSIb3DQEBCwUAA4IBAQBLQNvAUKr+yAzv95ZU\nRUm7lgAJQayzE4aGKAczymvmdLm6AC2upArT9fHxD4q/c2dKg8dEe3jgr25sbwMp\njjM5RcOO5LlXbKr8EpbsU8Yt5CRsuZRj+9xTaGdWPoO4zzUhw8lo/s7awlOqzJCK\n6fBdRoyV3XpYKBovHd7NADdBj+1EbddTKJd+82cEHhXXipa0095MJ6RMG3NzdvQX\nmcIfeg7jLQitChws/zyrVQ4PkX4268NXSb7hLi18YIvDQVETI53O9zJrlAGomecs\nMx86OyXShkDOOyyGeMlhLxS67ttVb9+E7gUJTb0o2HLO02JQZR7rkpeDMdmztcpH\nWD9f\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIIGMTCCBBmgAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwajELMAkGA1UEBhMCVVMx\nCzAJBgNVBAgMAkNBMQswCQYDVQQHDAJDQTESMBAGA1UECgwJUmVkaXNMYWJzMS0w\nKwYDVQQDDCRSZWRpc0xhYnMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcN\nMTgwMjI1MTUzNzM3WhcNMjgwMjIzMTUzNzM3WjBfMQswCQYDVQQGEwJVUzELMAkG\nA1UECAwCQ0ExEjAQBgNVBAoMCVJlZGlzTGFiczEvMC0GA1UEAwwmUkNQIEludGVy\nbWVkaWF0ZSBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggIiMA0GCSqGSIb3DQEBAQUA\nA4ICDwAwggIKAoICAQDf9dqbxc8Bq7Ctq9rWcxrGNKKHivqLAFpPq02yLPx6fsOv\nTq7GsDChAYBBc4v7Y2Ap9RD5Vs3dIhEANcnolf27QwrG9RMnnvzk8pCvp1o6zSU4\nVuOE1W66/O1/7e2rVxyrnTcP7UgK43zNIXu7+tiAqWsO92uSnuMoGPGpeaUm1jym\nhjWKtkAwDFSqvHY+XL5qDVBEjeUe+WHkYUg40cAXjusAqgm2hZt29c2wnVrxW25W\nP0meNlzHGFdA2AC5z54iRiqj57dTfBTkHoBczQxcyw6hhzxZQ4e5I5zOKjXXEhZN\nr0tA3YC14CTabKRus/JmZieyZzRgEy2oti64tmLYTqSlAD78pRL40VNoaSYetXLw\nhhNsXCHgWaY6d5bLOc/aIQMAV5oLvZQKvuXAF1IDmhPA+bZbpWipp0zagf1P1H3s\nUzsMdn2KM0ejzgotbtNlj5TcrVwpmvE3ktvUAuA+hi3FkVx1US+2Gsp5x4YOzJ7u\nP1WPk6ShF0JgnJH2ILdj6kttTWwFzH17keSFICWDfH/+kM+k7Y1v3EXMQXE7y0T9\nMjvJskz6d/nv+sQhY04xt64xFMGTnZjlJMzfQNi7zWFLTZnDD0lPowq7l3YiPoTT\nt5Xky83lu0KZsZBo0WlWaDG00gLVdtRgVbcuSWxpi5BdLb1kRab66JptWjxwXQID\nAQABo4HrMIHoMDoGA1UdHwQzMDEwL6AtoCuGKWh0dHBzOi8vcmwtY2Etc2VydmVy\nLnJlZGlzbGFicy5jb20vdjEvY3JsMEYGCCsGAQUFBwEBBDowODA2BggrBgEFBQcw\nAYYqaHR0cHM6Ly9ybC1jYS1zZXJ2ZXIucmVkaXNsYWJzLmNvbS92MS9vY3NwMB0G\nA1UdDgQWBBQHar5OKvQUpP2qWt6mckzToeCOHDAfBgNVHSMEGDAWgBQi42wH6hM4\nL2sujEvLM0/u8lRXTzASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIB\nhjANBgkqhkiG9w0BAQsFAAOCAgEAirEn/iTsAKyhd+pu2W3Z5NjCko4NPU0EYUbr\nAP7+POK2rzjIrJO3nFYQ/LLuC7KCXG+2qwan2SAOGmqWst13Y+WHp44Kae0kaChW\nvcYLXXSoGQGC8QuFSNUdaeg3RbMDYFT04dOkqufeWVccoHVxyTSg9eD8LZuHn5jw\n7QDLiEECBmIJHk5Eeo2TAZrx4Yx6ufSUX5HeVjlAzqwtAqdt99uCJ/EL8bgpWbe+\nXoSpvUv0SEC1I1dCAhCKAvRlIOA6VBcmzg5Am12KzkqTul12/VEFIgzqu0Zy2Jbc\nAUPrYVu/+tOGXQaijy7YgwH8P8n3s7ZeUa1VABJHcxrxYduDDJBLZi+MjheUDaZ1\njQRHYevI2tlqeSBqdPKG4zBY5lS0GiAlmuze5oENt0P3XboHoZPHiqcK3VECgTVh\n/BkJcuudETSJcZDmQ8YfoKfBzRQNg2sv/hwvUv73Ss51Sco8GEt2lD8uEdib1Q6z\nzDT5lXJowSzOD5ZA9OGDjnSRL+2riNtKWKEqvtEG3VBJoBzu9GoxbAc7wIZLxmli\niF5a/Zf5X+UXD3s4TMmy6C4QZJpAA2egsSQCnraWO2ULhh7iXMysSkF/nzVfZn43\niqpaB8++9a37hWq14ZmOv0TJIDz//b2+KC4VFXWQ5W5QC6whsjT+OlG4p5ZYG0jo\n616pxqo=\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIIFujCCA6KgAwIBAgIJAJ1aTT1lu2ScMA0GCSqGSIb3DQEBCwUAMGoxCzAJBgNV\nBAYTAlVTMQswCQYDVQQIDAJDQTELMAkGA1UEBwwCQ0ExEjAQBgNVBAoMCVJlZGlz\nTGFiczEtMCsGA1UEAwwkUmVkaXNMYWJzIFJvb3QgQ2VydGlmaWNhdGUgQXV0aG9y\naXR5MB4XDTE4MDIyNTE1MjA0MloXDTM4MDIyMDE1MjA0MlowajELMAkGA1UEBhMC\nVVMxCzAJBgNVBAgMAkNBMQswCQYDVQQHDAJDQTESMBAGA1UECgwJUmVkaXNMYWJz\nMS0wKwYDVQQDDCRSZWRpc0xhYnMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkw\nggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDLEjXy7YrbN5Waau5cd6g1\nG5C2tMmeTpZ0duFAPxNU4oE3RHS5gGiok346fUXuUxbZ6QkuzeN2/2Z+RmRcJhQY\nDm0ZgdG4x59An1TJfnzKKoWj8ISmoHS/TGNBdFzXV7FYNLBuqZouqePI6ReC6Qhl\npp45huV32Q3a6IDrrvx7Wo5ZczEQeFNbCeCOQYNDdTmCyEkHqc2AGo8eoIlSTutT\nULOC7R5gzJVTS0e1hesQ7jmqHjbO+VQS1NAL4/5K6cuTEqUl+XhVhPdLWBXJQ5ag\n54qhX4v+ojLzeU1R/Vc6NjMvVtptWY6JihpgplprN0Yh2556ewcXMeturcKgXfGJ\nxeYzsjzXerEjrVocX5V8BNrg64NlifzTMKNOOv4fVZszq1SIHR8F9ROrqiOdh8iC\nJpUbLpXH9hWCSEO6VRMB2xJoKu3cgl63kF30s77x7wLFMEHiwsQRKxooE1UhgS9K\n2sO4TlQ1eWUvFvHSTVDQDlGQ6zu4qjbOpb3Q8bQwoK+ai2alkXVR4Ltxe9QlgYK3\nStsnPhruzZGA0wbXdpw0bnM+YdlEm5ffSTpNIfgHeaa7Dtb801FtA71ZlH7A6TaI\nSIQuUST9EKmv7xrJyx0W1pGoPOLw5T029aTjnICSLdtV9bLwysrLhIYG5bnPq78B\ncS+jZHFGzD7PUVGQD01nOQIDAQABo2MwYTAdBgNVHQ4EFgQUIuNsB+oTOC9rLoxL\nyzNP7vJUV08wHwYDVR0jBBgwFoAUIuNsB+oTOC9rLoxLyzNP7vJUV08wDwYDVR0T\nAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAHfg\nz5pMNUAKdMzK1aS1EDdK9yKz4qicILz5czSLj1mC7HKDRy8cVADUxEICis++CsCu\nrYOvyCVergHQLREcxPq4rc5Nq1uj6J6649NEeh4WazOOjL4ZfQ1jVznMbGy+fJm3\n3Hoelv6jWRG9iqeJZja7/1s6YC6bWymI/OY1e4wUKeNHAo+Vger7MlHV+RuabaX+\nhSJ8bJAM59NCM7AgMTQpJCncrcdLeceYniGy5Q/qt2b5mJkQVkIdy4TPGGB+AXDJ\nD0q3I/JDRkDUFNFdeW0js7fHdsvCR7O3tJy5zIgEV/o/BCkmJVtuwPYOrw/yOlKj\nTY/U7ATAx9VFF6/vYEOMYSmrZlFX+98L6nJtwDqfLB5VTltqZ4H/KBxGE3IRSt9l\nFXy40U+LnXzhhW+7VBAvyYX8GEXhHkKU8Gqk1xitrqfBXY74xKgyUSTolFSfFVgj\nmcM/X4K45bka+qpkj7Kfv/8D4j6aZekwhN2ly6hhC1SmQ8qjMjpG/mrWOSSHZFmf\nybu9iD2AYHeIOkshIl6xYIa++Q/00/vs46IzAbQyriOi0XxlSMMVtPx0Q3isp+ji\nn8Mq9eOuxYOEQ4of8twUkUDd528iwGtEdwf0Q01UyT84S62N8AySl1ZBKXJz6W4F\nUhWfa/HQYOAPDdEjNgnVwLI23b8t0TozyCWw7q8h\n-----END CERTIFICATE-----\n\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/server/features/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/server/features/hierarchy/test_user_access_info.py",
    "content": "\"\"\"Unit tests for _get_user_access_info helper function.\n\nThese tests mock all database operations and don't require a real database.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom sqlalchemy.orm import Session\n\nfrom onyx.server.features.hierarchy.api import _get_user_access_info\n\n\ndef test_get_user_access_info_returns_email_and_groups() -> None:\n    \"\"\"_get_user_access_info returns the user's email and external group IDs.\"\"\"\n    mock_user = MagicMock()\n    mock_user.email = \"test@example.com\"\n    mock_db_session = MagicMock(spec=Session)\n\n    with patch(\n        \"onyx.server.features.hierarchy.api.get_user_external_group_ids\",\n        return_value=[\"group1\", \"group2\"],\n    ):\n        email, groups = _get_user_access_info(mock_user, mock_db_session)\n\n    assert email == \"test@example.com\"\n    assert groups == [\"group1\", \"group2\"]\n\n\ndef test_get_user_access_info_with_no_groups() -> None:\n    \"\"\"User with no external groups returns empty list.\"\"\"\n    mock_user = MagicMock()\n    mock_user.email = \"solo@example.com\"\n    mock_db_session = MagicMock(spec=Session)\n\n    with patch(\n        \"onyx.server.features.hierarchy.api.get_user_external_group_ids\",\n        return_value=[],\n    ):\n        email, groups = _get_user_access_info(mock_user, mock_db_session)\n\n    assert email == \"solo@example.com\"\n    assert groups == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/features/hooks/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/server/manage/embedding/test_embedding_api.py",
    "content": "from types import SimpleNamespace\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.db.models import SearchSettings\nfrom onyx.server.manage.embedding.api import list_embedding_models\nfrom onyx.server.manage.embedding.api import list_embedding_providers\nfrom onyx.utils.encryption import decrypt_bytes_to_string\nfrom onyx.utils.encryption import encrypt_string_to_bytes\nfrom onyx.utils.encryption import mask_string\nfrom onyx.utils.sensitive import SensitiveValue\nfrom shared_configs.enums import EmbeddingProvider\n\n\ndef _build_sensitive_value(raw_value: str) -> SensitiveValue[str]:\n    return SensitiveValue[str](\n        encrypted_bytes=encrypt_string_to_bytes(raw_value),\n        decrypt_fn=decrypt_bytes_to_string,\n    )\n\n\ndef _build_search_settings(raw_api_key: str) -> SimpleNamespace:\n    return SimpleNamespace(\n        id=7,\n        model_name=\"gemini-embedding-001\",\n        normalize=False,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        provider_type=EmbeddingProvider.GOOGLE,\n        cloud_provider=SimpleNamespace(\n            api_key=_build_sensitive_value(raw_api_key),\n            api_url=\"\",\n            api_version=None,\n            deployment_name=None,\n        ),\n        api_url=\"\",\n    )\n\n\ndef test_list_embedding_models_masks_api_key() -> None:\n    raw_api_key = \"sk-abcdefghijklmnopqrstuvwxyz1234567890\"\n    search_settings = _build_search_settings(raw_api_key)\n\n    with patch(\n        \"onyx.server.manage.embedding.api.get_all_search_settings\",\n        return_value=[search_settings],\n    ):\n        response = list_embedding_models(_=MagicMock(), db_session=MagicMock())\n\n    assert len(response) == 1\n    assert response[0].api_key == mask_string(raw_api_key)\n    assert response[0].api_key != raw_api_key\n\n\ndef test_list_embedding_models_returns_none_for_local_model_api_key() -> None:\n    local_search_settings = SimpleNamespace(\n        id=1,\n        model_name=\"thenlper/gte-small\",\n        normalize=False,\n        query_prefix=\"\",\n        passage_prefix=\"\",\n        provider_type=None,\n        cloud_provider=None,\n        api_url=None,\n    )\n\n    with patch(\n        \"onyx.server.manage.embedding.api.get_all_search_settings\",\n        return_value=[local_search_settings],\n    ):\n        response = list_embedding_models(_=MagicMock(), db_session=MagicMock())\n\n    assert len(response) == 1\n    assert response[0].api_key is None\n\n\ndef test_list_embedding_providers_uses_sensitive_value_masking_once() -> None:\n    raw_api_key = \"sk-abcdefghijklmnopqrstuvwxyz1234567890\"\n    provider_model = SimpleNamespace(\n        provider_type=EmbeddingProvider.GOOGLE,\n        api_key=_build_sensitive_value(raw_api_key),\n        api_url=\"\",\n        api_version=None,\n        deployment_name=None,\n    )\n\n    with patch(\n        \"onyx.server.manage.embedding.api.fetch_existing_embedding_providers\",\n        return_value=[provider_model],\n    ):\n        response = list_embedding_providers(_=MagicMock(), db_session=MagicMock())\n\n    assert len(response) == 1\n    assert response[0].api_key == mask_string(raw_api_key)\n    assert response[0].api_key != mask_string(mask_string(raw_api_key))\n\n\ndef test_search_settings_api_key_property_returns_raw_value_for_runtime_use() -> None:\n    raw_api_key = \"sk-runtime-should-use-unmasked-value-1234567890\"\n    fake_search_settings = SimpleNamespace(\n        cloud_provider=SimpleNamespace(api_key=_build_sensitive_value(raw_api_key))\n    )\n\n    api_key_property = SearchSettings.__dict__[\"api_key\"]\n    assert api_key_property.fget(fake_search_settings) == raw_api_key\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/manage/llm/test_fetch_models_api.py",
    "content": "\"\"\"Tests for LLM model fetch endpoints.\n\nThese tests verify the full request/response flow for fetching models\nfrom dynamic providers (Ollama, OpenRouter, Litellm), including the\nsync-to-DB behavior when provider_name is specified.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport httpx\nimport pytest\n\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.server.manage.llm.models import BifrostFinalModelResponse\nfrom onyx.server.manage.llm.models import BifrostModelsRequest\nfrom onyx.server.manage.llm.models import LitellmFinalModelResponse\nfrom onyx.server.manage.llm.models import LitellmModelsRequest\nfrom onyx.server.manage.llm.models import LMStudioFinalModelResponse\nfrom onyx.server.manage.llm.models import LMStudioModelsRequest\nfrom onyx.server.manage.llm.models import OllamaFinalModelResponse\nfrom onyx.server.manage.llm.models import OllamaModelsRequest\nfrom onyx.server.manage.llm.models import OpenRouterFinalModelResponse\nfrom onyx.server.manage.llm.models import OpenRouterModelsRequest\n\n\nclass TestGetOllamaAvailableModels:\n    \"\"\"Tests for the Ollama model fetch endpoint.\"\"\"\n\n    @pytest.fixture\n    def mock_ollama_tags_response(self) -> dict:\n        \"\"\"Mock response from Ollama /api/tags endpoint.\"\"\"\n        return {\n            \"models\": [\n                {\"name\": \"llama3:latest\"},\n                {\"name\": \"mistral:7b\"},\n                {\"name\": \"qwen2.5:14b\"},\n            ]\n        }\n\n    @pytest.fixture\n    def mock_ollama_show_response(self) -> dict:\n        \"\"\"Mock response from Ollama /api/show endpoint.\"\"\"\n        return {\n            \"details\": {\"family\": \"llama\", \"families\": [\"llama\"]},\n            \"model_info\": {\n                \"general.architecture\": \"llama\",\n                \"llama.context_length\": 8192,\n            },\n            \"capabilities\": [\n                \"completion\"\n            ],  # Required to pass supports_completion() check\n        }\n\n    def test_returns_model_list(\n        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict\n    ) -> None:\n        \"\"\"Test that endpoint returns properly formatted model list.\"\"\"\n        from onyx.server.manage.llm.api import get_ollama_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            # Mock GET for /api/tags\n            mock_get_response = MagicMock()\n            mock_get_response.json.return_value = mock_ollama_tags_response\n            mock_get_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_get_response\n\n            # Mock POST for /api/show (called for each model)\n            mock_post_response = MagicMock()\n            mock_post_response.json.return_value = mock_ollama_show_response\n            mock_post_response.raise_for_status = MagicMock()\n            mock_httpx.post.return_value = mock_post_response\n\n            request = OllamaModelsRequest(api_base=\"http://localhost:11434\")\n            results = get_ollama_available_models(request, MagicMock(), mock_session)\n\n            assert len(results) == 3\n            assert all(isinstance(r, OllamaFinalModelResponse) for r in results)\n            # Check display names are generated\n            assert any(\"Llama\" in r.display_name for r in results)\n            assert any(\"Mistral\" in r.display_name for r in results)\n            # Results should be alphabetically sorted by model name\n            assert [r.name for r in results] == sorted(\n                [r.name for r in results], key=str.lower\n            )\n\n    def test_syncs_to_db_when_provider_name_specified(\n        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict\n    ) -> None:\n        \"\"\"Test that models are synced to DB when provider_name is given.\"\"\"\n        from onyx.server.manage.llm.api import get_ollama_available_models\n\n        mock_session = MagicMock()\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = []\n\n        with (\n            patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx,\n            patch(\n                \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n            ),\n        ):\n            mock_get_response = MagicMock()\n            mock_get_response.json.return_value = mock_ollama_tags_response\n            mock_get_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_get_response\n\n            mock_post_response = MagicMock()\n            mock_post_response.json.return_value = mock_ollama_show_response\n            mock_post_response.raise_for_status = MagicMock()\n            mock_httpx.post.return_value = mock_post_response\n\n            request = OllamaModelsRequest(\n                api_base=\"http://localhost:11434\",\n                provider_name=\"my-ollama\",\n            )\n            get_ollama_available_models(request, MagicMock(), mock_session)\n\n            # Verify DB operations were called\n            assert mock_session.execute.call_count == 6\n            mock_session.commit.assert_called_once()\n\n    def test_no_sync_when_provider_name_not_specified(\n        self, mock_ollama_tags_response: dict, mock_ollama_show_response: dict\n    ) -> None:\n        \"\"\"Test that models are NOT synced when provider_name is None.\"\"\"\n        from onyx.server.manage.llm.api import get_ollama_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_get_response = MagicMock()\n            mock_get_response.json.return_value = mock_ollama_tags_response\n            mock_get_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_get_response\n\n            mock_post_response = MagicMock()\n            mock_post_response.json.return_value = mock_ollama_show_response\n            mock_post_response.raise_for_status = MagicMock()\n            mock_httpx.post.return_value = mock_post_response\n\n            request = OllamaModelsRequest(api_base=\"http://localhost:11434\")\n            get_ollama_available_models(request, MagicMock(), mock_session)\n\n            # No DB operations should happen\n            mock_session.execute.assert_not_called()\n            mock_session.commit.assert_not_called()\n\n\nclass TestGetOpenRouterAvailableModels:\n    \"\"\"Tests for the OpenRouter model fetch endpoint.\"\"\"\n\n    @pytest.fixture\n    def mock_openrouter_response(self) -> dict:\n        \"\"\"Mock response from OpenRouter API.\"\"\"\n        return {\n            \"data\": [\n                {\n                    \"id\": \"anthropic/claude-3.5-sonnet\",\n                    \"name\": \"Claude 3.5 Sonnet\",\n                    \"context_length\": 200000,\n                    \"architecture\": {\"input_modalities\": [\"text\", \"image\"]},\n                },\n                {\n                    \"id\": \"openai/gpt-4o\",\n                    \"name\": \"GPT-4o\",\n                    \"context_length\": 128000,\n                    \"architecture\": {\"input_modalities\": [\"text\", \"image\"]},\n                },\n                {\n                    \"id\": \"meta-llama/llama-3.1-70b\",\n                    \"name\": \"Llama 3.1 70B\",\n                    \"context_length\": 131072,\n                    \"architecture\": {\"input_modalities\": [\"text\"]},\n                },\n            ]\n        }\n\n    def test_returns_model_list(self, mock_openrouter_response: dict) -> None:\n        \"\"\"Test that endpoint returns properly formatted model list.\"\"\"\n        from onyx.server.manage.llm.api import get_openrouter_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_openrouter_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = OpenRouterModelsRequest(\n                api_base=\"https://openrouter.ai/api/v1\",\n                api_key=\"test-key\",\n            )\n            results = get_openrouter_available_models(\n                request, MagicMock(), mock_session\n            )\n\n            assert len(results) == 3\n            assert all(isinstance(r, OpenRouterFinalModelResponse) for r in results)\n            # Check that models have correct context lengths\n            claude = next(r for r in results if \"claude\" in r.name.lower())\n            assert claude.max_input_tokens == 200000\n\n    def test_infers_vision_support(self, mock_openrouter_response: dict) -> None:\n        \"\"\"Test that vision support is correctly inferred from modality.\"\"\"\n        from onyx.server.manage.llm.api import get_openrouter_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_openrouter_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = OpenRouterModelsRequest(\n                api_base=\"https://openrouter.ai/api/v1\",\n                api_key=\"test-key\",\n            )\n            results = get_openrouter_available_models(\n                request, MagicMock(), mock_session\n            )\n\n            # Models with \"image\" in modality should have vision support\n            claude = next(r for r in results if \"claude\" in r.name.lower())\n            llama = next(r for r in results if \"llama\" in r.name.lower())\n\n            assert claude.supports_image_input is True\n            assert llama.supports_image_input is False\n\n    def test_syncs_to_db_when_provider_name_specified(\n        self, mock_openrouter_response: dict\n    ) -> None:\n        \"\"\"Test that models are synced to DB when provider_name is given.\"\"\"\n        from onyx.server.manage.llm.api import get_openrouter_available_models\n\n        mock_session = MagicMock()\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = []\n\n        with (\n            patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get,\n            patch(\n                \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n            ),\n        ):\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_openrouter_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = OpenRouterModelsRequest(\n                api_base=\"https://openrouter.ai/api/v1\",\n                api_key=\"test-key\",\n                provider_name=\"my-openrouter\",\n            )\n            get_openrouter_available_models(request, MagicMock(), mock_session)\n\n            # Verify DB operations were called\n            assert mock_session.execute.call_count == 8\n            mock_session.commit.assert_called_once()\n\n    def test_preserves_existing_models_on_sync(\n        self, mock_openrouter_response: dict\n    ) -> None:\n        \"\"\"Test that existing models are not overwritten during sync.\"\"\"\n        from onyx.server.manage.llm.api import get_openrouter_available_models\n\n        mock_session = MagicMock()\n\n        # Provider already has claude model\n        existing_model = MagicMock()\n        existing_model.name = \"anthropic/claude-3.5-sonnet\"\n\n        mock_provider = MagicMock()\n        mock_provider.id = 1\n        mock_provider.model_configurations = [existing_model]\n\n        with (\n            patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get,\n            patch(\n                \"onyx.db.llm.fetch_existing_llm_provider\", return_value=mock_provider\n            ),\n        ):\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_openrouter_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = OpenRouterModelsRequest(\n                api_base=\"https://openrouter.ai/api/v1\",\n                api_key=\"test-key\",\n                provider_name=\"my-openrouter\",\n            )\n            get_openrouter_available_models(request, MagicMock(), mock_session)\n\n            # Only 2 new models should be inserted (claude already exists)\n            assert mock_session.execute.call_count == 5\n\n    def test_no_sync_when_provider_name_not_specified(\n        self, mock_openrouter_response: dict\n    ) -> None:\n        \"\"\"Test that models are NOT synced when provider_name is None.\"\"\"\n        from onyx.server.manage.llm.api import get_openrouter_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_openrouter_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = OpenRouterModelsRequest(\n                api_base=\"https://openrouter.ai/api/v1\",\n                api_key=\"test-key\",\n            )\n            get_openrouter_available_models(request, MagicMock(), mock_session)\n\n            # No DB operations should happen\n            mock_session.execute.assert_not_called()\n            mock_session.commit.assert_not_called()\n\n\nclass TestGetLMStudioAvailableModels:\n    \"\"\"Tests for the LM Studio model fetch endpoint.\"\"\"\n\n    @pytest.fixture\n    def mock_lm_studio_response(self) -> dict:\n        \"\"\"Mock response from LM Studio /api/v1/models endpoint.\"\"\"\n        return {\n            \"models\": [\n                {\n                    \"key\": \"lmstudio-community/Meta-Llama-3-8B\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Meta Llama 3 8B\",\n                    \"max_context_length\": 8192,\n                    \"capabilities\": {\"vision\": False},\n                },\n                {\n                    \"key\": \"lmstudio-community/Qwen2.5-VL-7B\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Qwen 2.5 VL 7B\",\n                    \"max_context_length\": 32768,\n                    \"capabilities\": {\"vision\": True},\n                },\n                {\n                    \"key\": \"text-embedding-nomic-embed-text-v1.5\",\n                    \"type\": \"embedding\",\n                    \"display_name\": \"Nomic Embed Text v1.5\",\n                    \"max_context_length\": 2048,\n                    \"capabilities\": {},\n                },\n                {\n                    \"key\": \"lmstudio-community/DeepSeek-R1-8B\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"DeepSeek R1 8B\",\n                    \"max_context_length\": 65536,\n                    \"capabilities\": {\"vision\": False},\n                },\n            ]\n        }\n\n    def test_returns_model_list(self, mock_lm_studio_response: dict) -> None:\n        \"\"\"Test that endpoint returns properly formatted LLM-only model list.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_lm_studio_response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            results = get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            # Only LLM-type models should be returned (embedding filtered out)\n            assert len(results) == 3\n            assert all(isinstance(r, LMStudioFinalModelResponse) for r in results)\n            names = [r.name for r in results]\n            assert \"text-embedding-nomic-embed-text-v1.5\" not in names\n            # Results should be alphabetically sorted by model name\n            assert names == sorted(names, key=str.lower)\n\n    def test_infers_vision_support(self, mock_lm_studio_response: dict) -> None:\n        \"\"\"Test that vision support is correctly read from capabilities.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_lm_studio_response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            results = get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            qwen = next(r for r in results if \"Qwen\" in r.display_name)\n            llama = next(r for r in results if \"Llama\" in r.display_name)\n\n            assert qwen.supports_image_input is True\n            assert llama.supports_image_input is False\n\n    def test_infers_reasoning_from_model_name(self) -> None:\n        \"\"\"Test that reasoning is inferred from model name when not in capabilities.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n        response = {\n            \"models\": [\n                {\n                    \"key\": \"lmstudio-community/DeepSeek-R1-8B\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"DeepSeek R1 8B\",\n                    \"max_context_length\": 65536,\n                    \"capabilities\": {},\n                },\n                {\n                    \"key\": \"lmstudio-community/Meta-Llama-3-8B\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Meta Llama 3 8B\",\n                    \"max_context_length\": 8192,\n                    \"capabilities\": {},\n                },\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            results = get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            deepseek = next(r for r in results if \"DeepSeek\" in r.display_name)\n            llama = next(r for r in results if \"Llama\" in r.display_name)\n\n            assert deepseek.supports_reasoning is True\n            assert llama.supports_reasoning is False\n\n    def test_uses_display_name_from_api(self, mock_lm_studio_response: dict) -> None:\n        \"\"\"Test that display_name from the API is used directly.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_lm_studio_response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            results = get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            llama = next(r for r in results if \"Llama\" in r.name)\n            assert llama.display_name == \"Meta Llama 3 8B\"\n            assert llama.max_input_tokens == 8192\n\n    def test_strips_trailing_v1_from_api_base(self) -> None:\n        \"\"\"Test that /v1 suffix is stripped before building the native API URL.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n        response = {\n            \"models\": [\n                {\n                    \"key\": \"test-model\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Test\",\n                    \"max_context_length\": 4096,\n                    \"capabilities\": {},\n                },\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234/v1\")\n            get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            # Should hit /api/v1/models, not /v1/api/v1/models\n            mock_httpx.get.assert_called_once()\n            called_url = mock_httpx.get.call_args[0][0]\n            assert called_url == \"http://localhost:1234/api/v1/models\"\n\n    def test_falls_back_to_stored_api_key(self) -> None:\n        \"\"\"Test that stored API key is used when api_key_changed is False.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n        mock_provider = MagicMock()\n        mock_provider.custom_config = {\"LM_STUDIO_API_KEY\": \"stored-secret\"}\n\n        response = {\n            \"models\": [\n                {\n                    \"key\": \"test-model\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Test\",\n                    \"max_context_length\": 4096,\n                    \"capabilities\": {},\n                },\n            ]\n        }\n\n        with (\n            patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx,\n            patch(\n                \"onyx.server.manage.llm.api.fetch_existing_llm_provider\",\n                return_value=mock_provider,\n            ),\n        ):\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(\n                api_base=\"http://localhost:1234\",\n                api_key=\"masked-value\",\n                api_key_changed=False,\n                provider_name=\"my-lm-studio\",\n            )\n            get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            headers = mock_httpx.get.call_args[1][\"headers\"]\n            assert headers[\"Authorization\"] == \"Bearer stored-secret\"\n\n    def test_uses_submitted_api_key_when_changed(self) -> None:\n        \"\"\"Test that submitted API key is used when api_key_changed is True.\"\"\"\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n        response = {\n            \"models\": [\n                {\n                    \"key\": \"test-model\",\n                    \"type\": \"llm\",\n                    \"display_name\": \"Test\",\n                    \"max_context_length\": 4096,\n                    \"capabilities\": {},\n                },\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(\n                api_base=\"http://localhost:1234\",\n                api_key=\"new-secret\",\n                api_key_changed=True,\n                provider_name=\"my-lm-studio\",\n            )\n            get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n            headers = mock_httpx.get.call_args[1][\"headers\"]\n            assert headers[\"Authorization\"] == \"Bearer new-secret\"\n\n    def test_raises_on_empty_models(self) -> None:\n        \"\"\"Test that an error is raised when no models are returned.\"\"\"\n        from onyx.error_handling.exceptions import OnyxError\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = {\"models\": []}\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            with pytest.raises(OnyxError):\n                get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n    def test_raises_on_only_non_llm_models(self) -> None:\n        \"\"\"Test that an error is raised when all models are non-LLM type.\"\"\"\n        from onyx.error_handling.exceptions import OnyxError\n        from onyx.server.manage.llm.api import get_lm_studio_available_models\n\n        mock_session = MagicMock()\n        response = {\n            \"models\": [\n                {\n                    \"key\": \"embedding-model\",\n                    \"type\": \"embedding\",\n                    \"display_name\": \"Embedding\",\n                    \"max_context_length\": 2048,\n                    \"capabilities\": {},\n                },\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx\") as mock_httpx:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_httpx.get.return_value = mock_response\n\n            request = LMStudioModelsRequest(api_base=\"http://localhost:1234\")\n            with pytest.raises(OnyxError):\n                get_lm_studio_available_models(request, MagicMock(), mock_session)\n\n\nclass TestGetLitellmAvailableModels:\n    \"\"\"Tests for the Litellm proxy model fetch endpoint.\"\"\"\n\n    @pytest.fixture\n    def mock_litellm_response(self) -> dict:\n        \"\"\"Mock response from Litellm /v1/models endpoint.\"\"\"\n        return {\n            \"data\": [\n                {\n                    \"id\": \"gpt-4o\",\n                    \"object\": \"model\",\n                    \"created\": 1700000000,\n                    \"owned_by\": \"openai\",\n                },\n                {\n                    \"id\": \"claude-3-5-sonnet\",\n                    \"object\": \"model\",\n                    \"created\": 1700000001,\n                    \"owned_by\": \"anthropic\",\n                },\n                {\n                    \"id\": \"gemini-pro\",\n                    \"object\": \"model\",\n                    \"created\": 1700000002,\n                    \"owned_by\": \"google\",\n                },\n            ]\n        }\n\n    def test_returns_model_list(self, mock_litellm_response: dict) -> None:\n        \"\"\"Test that endpoint returns properly formatted model list.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_litellm_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            results = get_litellm_available_models(request, MagicMock(), mock_session)\n\n            assert len(results) == 3\n            assert all(isinstance(r, LitellmFinalModelResponse) for r in results)\n\n    def test_model_fields_parsed_correctly(self, mock_litellm_response: dict) -> None:\n        \"\"\"Test that provider_name and model_name are correctly extracted.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_litellm_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            results = get_litellm_available_models(request, MagicMock(), mock_session)\n\n            gpt = next(r for r in results if r.model_name == \"gpt-4o\")\n            assert gpt.provider_name == \"openai\"\n\n            claude = next(r for r in results if r.model_name == \"claude-3-5-sonnet\")\n            assert claude.provider_name == \"anthropic\"\n\n    def test_results_sorted_by_model_name(self, mock_litellm_response: dict) -> None:\n        \"\"\"Test that results are alphabetically sorted by model_name.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_litellm_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            results = get_litellm_available_models(request, MagicMock(), mock_session)\n\n            model_names = [r.model_name for r in results]\n            assert model_names == sorted(model_names, key=str.lower)\n\n    def test_empty_data_raises_onyx_error(self) -> None:\n        \"\"\"Test that empty model list raises OnyxError.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = {\"data\": []}\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            with pytest.raises(OnyxError, match=\"No models found\"):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n    def test_missing_data_key_raises_onyx_error(self) -> None:\n        \"\"\"Test that response without 'data' key raises OnyxError.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = {}\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            with pytest.raises(OnyxError):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n    def test_skips_unparseable_entries(self) -> None:\n        \"\"\"Test that malformed model entries are skipped without failing.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n        response_with_bad_entry = {\n            \"data\": [\n                {\n                    \"id\": \"gpt-4o\",\n                    \"object\": \"model\",\n                    \"created\": 1700000000,\n                    \"owned_by\": \"openai\",\n                },\n                # Missing required fields\n                {\"bad_field\": \"bad_value\"},\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response_with_bad_entry\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            results = get_litellm_available_models(request, MagicMock(), mock_session)\n\n            assert len(results) == 1\n            assert results[0].model_name == \"gpt-4o\"\n\n    def test_all_entries_unparseable_raises_onyx_error(self) -> None:\n        \"\"\"Test that OnyxError is raised when all entries fail to parse.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n        response_all_bad = {\n            \"data\": [\n                {\"bad_field\": \"bad_value\"},\n                {\"another_bad\": 123},\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response_all_bad\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            with pytest.raises(OnyxError, match=\"No compatible models\"):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n    def test_api_base_trailing_slash_handled(self) -> None:\n        \"\"\"Test that trailing slashes in api_base are handled correctly.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n        mock_litellm_response = {\n            \"data\": [\n                {\n                    \"id\": \"gpt-4o\",\n                    \"object\": \"model\",\n                    \"created\": 1700000000,\n                    \"owned_by\": \"openai\",\n                },\n            ]\n        }\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_litellm_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000/\",\n                api_key=\"test-key\",\n            )\n            get_litellm_available_models(request, MagicMock(), mock_session)\n\n            # Should call /v1/models without double slashes\n            call_args = mock_get.call_args\n            assert call_args[0][0] == \"http://localhost:4000/v1/models\"\n\n    def test_connection_failure_raises_onyx_error(self) -> None:\n        \"\"\"Test that connection failures are wrapped in OnyxError.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_get.side_effect = httpx.ConnectError(\n                \"Connection refused\", request=MagicMock()\n            )\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            with pytest.raises(OnyxError, match=\"Failed to fetch LiteLLM proxy models\"):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n    def test_401_raises_authentication_error(self) -> None:\n        \"\"\"Test that a 401 response raises OnyxError with authentication message.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.status_code = 401\n            mock_get.side_effect = httpx.HTTPStatusError(\n                \"Unauthorized\", request=MagicMock(), response=mock_response\n            )\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"bad-key\",\n            )\n            with pytest.raises(OnyxError, match=\"Authentication failed\"):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n    def test_404_raises_not_found_error(self) -> None:\n        \"\"\"Test that a 404 response raises OnyxError with endpoint not found message.\"\"\"\n        from onyx.server.manage.llm.api import get_litellm_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.status_code = 404\n            mock_get.side_effect = httpx.HTTPStatusError(\n                \"Not Found\", request=MagicMock(), response=mock_response\n            )\n\n            request = LitellmModelsRequest(\n                api_base=\"http://localhost:4000\",\n                api_key=\"test-key\",\n            )\n            with pytest.raises(OnyxError, match=\"endpoint not found\"):\n                get_litellm_available_models(request, MagicMock(), mock_session)\n\n\nclass TestGetBifrostAvailableModels:\n    \"\"\"Tests for the Bifrost model fetch endpoint.\"\"\"\n\n    @pytest.fixture\n    def mock_bifrost_response(self) -> dict:\n        \"\"\"Mock response from Bifrost /v1/models endpoint.\"\"\"\n        return {\n            \"data\": [\n                {\n                    \"id\": \"anthropic/claude-3-5-sonnet\",\n                    \"name\": \"Claude 3.5 Sonnet\",\n                    \"context_length\": 200000,\n                },\n                {\n                    \"id\": \"openai/gpt-4o\",\n                    \"name\": \"GPT-4o\",\n                    \"context_length\": 128000,\n                },\n                {\n                    \"id\": \"deepseek/deepseek-r1\",\n                    \"name\": \"DeepSeek R1\",\n                    \"context_length\": 64000,\n                },\n            ]\n        }\n\n    def test_returns_model_list(self, mock_bifrost_response: dict) -> None:\n        \"\"\"Test that endpoint returns properly formatted non-embedding models.\"\"\"\n        from onyx.server.manage.llm.api import get_bifrost_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_bifrost_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = BifrostModelsRequest(api_base=\"https://bifrost.example.com\")\n            results = get_bifrost_available_models(request, MagicMock(), mock_session)\n\n            assert len(results) == 3\n            assert all(isinstance(r, BifrostFinalModelResponse) for r in results)\n            assert [r.name for r in results] == sorted(\n                [r.name for r in results], key=str.lower\n            )\n\n    def test_infers_vision_support(self, mock_bifrost_response: dict) -> None:\n        \"\"\"Test that vision support is inferred from provider/model IDs.\"\"\"\n        from onyx.server.manage.llm.api import get_bifrost_available_models\n\n        mock_session = MagicMock()\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = mock_bifrost_response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = BifrostModelsRequest(api_base=\"https://bifrost.example.com\")\n            results = get_bifrost_available_models(request, MagicMock(), mock_session)\n\n            claude = next(r for r in results if r.name == \"anthropic/claude-3-5-sonnet\")\n            gpt4o = next(r for r in results if r.name == \"openai/gpt-4o\")\n            deepseek = next(r for r in results if r.name == \"deepseek/deepseek-r1\")\n\n            assert claude.supports_image_input is True\n            assert gpt4o.supports_image_input is True\n            assert deepseek.supports_image_input is False\n\n    def test_existing_v1_suffix_is_not_duplicated(self) -> None:\n        \"\"\"Test that an existing /v1 suffix still hits a single /v1/models endpoint.\"\"\"\n        from onyx.server.manage.llm.api import get_bifrost_available_models\n\n        mock_session = MagicMock()\n        response = {\"data\": [{\"id\": \"openai/gpt-4o\", \"name\": \"GPT-4o\"}]}\n\n        with patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get:\n            mock_response = MagicMock()\n            mock_response.json.return_value = response\n            mock_response.raise_for_status = MagicMock()\n            mock_get.return_value = mock_response\n\n            request = BifrostModelsRequest(api_base=\"https://bifrost.example.com/v1\")\n            get_bifrost_available_models(request, MagicMock(), mock_session)\n\n            called_url = mock_get.call_args[0][0]\n            assert called_url == \"https://bifrost.example.com/v1/models\"\n\n    def test_request_failure_is_logged_and_wrapped(self) -> None:\n        \"\"\"Test that request-layer failures are logged before raising OnyxError.\"\"\"\n        from onyx.server.manage.llm.api import get_bifrost_available_models\n\n        mock_session = MagicMock()\n\n        with (\n            patch(\"onyx.server.manage.llm.api.httpx.get\") as mock_get,\n            patch(\"onyx.server.manage.llm.api.logger.warning\") as mock_warning,\n        ):\n            mock_get.side_effect = httpx.ConnectError(\n                \"Connection refused\", request=MagicMock()\n            )\n\n            request = BifrostModelsRequest(api_base=\"https://bifrost.example.com\")\n            with pytest.raises(OnyxError, match=\"Failed to fetch Bifrost models\"):\n                get_bifrost_available_models(request, MagicMock(), mock_session)\n\n            mock_warning.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/manage/llm/test_llm_provider_utils.py",
    "content": "\"\"\"Tests for LLM provider utilities.\"\"\"\n\nfrom onyx.server.manage.llm.utils import generate_bedrock_display_name\nfrom onyx.server.manage.llm.utils import generate_ollama_display_name\nfrom onyx.server.manage.llm.utils import infer_vision_support\nfrom onyx.server.manage.llm.utils import is_embedding_model\nfrom onyx.server.manage.llm.utils import is_reasoning_model\nfrom onyx.server.manage.llm.utils import is_valid_bedrock_model\nfrom onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix\n\n\nclass TestGenerateBedrockDisplayName:\n    \"\"\"Tests for Bedrock display name generation.\"\"\"\n\n    def test_claude_model_basic(self) -> None:\n        \"\"\"Test basic Claude model name.\"\"\"\n        result = generate_bedrock_display_name(\n            \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n        )\n        assert \"Claude\" in result\n        assert \"3.5\" in result\n        assert \"Sonnet\" in result\n\n    def test_claude_model_with_region_prefix(self) -> None:\n        \"\"\"Test Claude model with region prefix (cross-region inference).\"\"\"\n        result = generate_bedrock_display_name(\n            \"us.anthropic.claude-3-5-sonnet-20241022-v2:0\"\n        )\n        assert \"Claude\" in result\n        assert \"(us)\" in result\n\n    def test_llama_model(self) -> None:\n        \"\"\"Test Llama model name.\"\"\"\n        result = generate_bedrock_display_name(\"meta.llama3-70b-instruct-v1:0\")\n        assert \"Llama\" in result\n        assert \"70B\" in result or \"70b\" in result.lower()\n\n    def test_nova_model(self) -> None:\n        \"\"\"Test Amazon Nova model name.\"\"\"\n        result = generate_bedrock_display_name(\"amazon.nova-pro-v1:0\")\n        assert \"Nova\" in result\n        assert \"Pro\" in result\n\n    def test_mistral_model(self) -> None:\n        \"\"\"Test Mistral model name.\"\"\"\n        result = generate_bedrock_display_name(\"mistral.mistral-large-2407-v1:0\")\n        assert \"Mistral\" in result\n\n    def test_removes_version_suffix(self) -> None:\n        \"\"\"Test that version suffixes like :0 are removed.\"\"\"\n        result = generate_bedrock_display_name(\"anthropic.claude-3-opus:0\")\n        assert \":0\" not in result\n\n    def test_removes_date_stamps(self) -> None:\n        \"\"\"Test that date stamps like -20241022-v2 are removed.\"\"\"\n        result = generate_bedrock_display_name(\n            \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n        )\n        assert \"20241022\" not in result\n\n\nclass TestGenerateOllamaDisplayName:\n    \"\"\"Tests for Ollama display name generation.\"\"\"\n\n    def test_llama_basic(self) -> None:\n        \"\"\"Test basic Llama model.\"\"\"\n        result = generate_ollama_display_name(\"llama3:latest\")\n        assert \"Llama\" in result\n\n    def test_llama_with_size(self) -> None:\n        \"\"\"Test Llama with size tag.\"\"\"\n        result = generate_ollama_display_name(\"llama3:70b\")\n        assert \"Llama\" in result\n        assert \"70B\" in result\n\n    def test_qwen_model(self) -> None:\n        \"\"\"Test Qwen model.\"\"\"\n        result = generate_ollama_display_name(\"qwen2.5:7b\")\n        assert \"Qwen\" in result\n        assert \"7B\" in result\n\n    def test_mistral_model(self) -> None:\n        \"\"\"Test Mistral model.\"\"\"\n        result = generate_ollama_display_name(\"mistral:latest\")\n        assert \"Mistral\" in result\n\n    def test_deepseek_model(self) -> None:\n        \"\"\"Test DeepSeek model.\"\"\"\n        result = generate_ollama_display_name(\"deepseek-r1:14b\")\n        assert \"DeepSeek\" in result\n        assert \"14B\" in result\n\n    def test_skips_latest_tag(self) -> None:\n        \"\"\"Test that 'latest' tag is not shown.\"\"\"\n        result = generate_ollama_display_name(\"llama3:latest\")\n        assert \"latest\" not in result.lower()\n\n    def test_version_number_preserved(self) -> None:\n        \"\"\"Test that version numbers like 3.3 are preserved.\"\"\"\n        result = generate_ollama_display_name(\"llama3.3:70b\")\n        assert \"3.3\" in result or \"3 3\" in result  # Either format is acceptable\n\n\nclass TestStripOpenrouterVendorPrefix:\n    \"\"\"Tests for OpenRouter vendor prefix stripping.\"\"\"\n\n    def test_strips_matching_prefix(self) -> None:\n        \"\"\"Test stripping matching vendor prefix.\"\"\"\n        result = strip_openrouter_vendor_prefix(\"Microsoft: Phi 4\", \"microsoft/phi-4\")\n        assert result == \"Phi 4\"\n\n    def test_strips_mistral_prefix(self) -> None:\n        \"\"\"Test stripping Mistral prefix.\"\"\"\n        result = strip_openrouter_vendor_prefix(\n            \"Mistral: Mixtral 8x7B Instruct\", \"mistralai/mixtral-8x7b\"\n        )\n        assert result == \"Mixtral 8x7B Instruct\"\n\n    def test_preserves_when_no_prefix(self) -> None:\n        \"\"\"Test preserving name when no prefix pattern.\"\"\"\n        result = strip_openrouter_vendor_prefix(\n            \"Claude 3.5 Sonnet\", \"anthropic/claude-3.5-sonnet\"\n        )\n        assert result == \"Claude 3.5 Sonnet\"\n\n    def test_preserves_when_no_slash_in_id(self) -> None:\n        \"\"\"Test preserving name when no slash in model ID.\"\"\"\n        result = strip_openrouter_vendor_prefix(\"Some Model\", \"some-model\")\n        assert result == \"Some Model\"\n\n    def test_handles_partial_vendor_match(self) -> None:\n        \"\"\"Test handling partial vendor name matches.\"\"\"\n        # \"Mistral\" should match \"mistralai\"\n        result = strip_openrouter_vendor_prefix(\n            \"Mistral: Some Model\", \"mistralai/some-model\"\n        )\n        assert result == \"Some Model\"\n\n\nclass TestIsValidBedrockModel:\n    \"\"\"Tests for Bedrock model validation.\"\"\"\n\n    def test_valid_claude_model(self) -> None:\n        \"\"\"Test valid Claude model.\"\"\"\n        assert is_valid_bedrock_model(\"anthropic.claude-3-5-sonnet\", True) is True\n\n    def test_invalid_embedding_model(self) -> None:\n        \"\"\"Test that embedding models are filtered.\"\"\"\n        assert is_valid_bedrock_model(\"amazon.titan-embed-text-v1\", True) is False\n\n    def test_invalid_image_model(self) -> None:\n        \"\"\"Test that image generation models are filtered.\"\"\"\n        assert is_valid_bedrock_model(\"stability.stable-diffusion-xl\", True) is False\n\n    def test_invalid_non_streaming(self) -> None:\n        \"\"\"Test that non-streaming models are filtered.\"\"\"\n        assert is_valid_bedrock_model(\"anthropic.claude-3-sonnet\", False) is False\n\n    def test_empty_model_id(self) -> None:\n        \"\"\"Test that empty model ID is invalid.\"\"\"\n        assert is_valid_bedrock_model(\"\", True) is False\n\n\nclass TestInferVisionSupport:\n    \"\"\"Tests for vision support inference.\"\"\"\n\n    def test_claude_3_has_vision(self) -> None:\n        \"\"\"Test Claude 3 models have vision.\"\"\"\n        assert infer_vision_support(\"anthropic.claude-3-5-sonnet\") is True\n\n    def test_claude_4_has_vision(self) -> None:\n        \"\"\"Test Claude 4 models have vision.\"\"\"\n        assert infer_vision_support(\"anthropic.claude-4-opus\") is True\n\n    def test_nova_pro_has_vision(self) -> None:\n        \"\"\"Test Nova Pro has vision.\"\"\"\n        assert infer_vision_support(\"amazon.nova-pro-v1\") is True\n\n    def test_bifrost_claude_has_vision(self) -> None:\n        \"\"\"Test Bifrost Claude models are recognized as vision-capable.\"\"\"\n        assert infer_vision_support(\"anthropic/claude-3-5-sonnet\") is True\n\n    def test_bifrost_gpt4o_has_vision(self) -> None:\n        \"\"\"Test Bifrost GPT-4o models are recognized as vision-capable.\"\"\"\n        assert infer_vision_support(\"openai/gpt-4o\") is True\n\n    def test_mistral_no_vision(self) -> None:\n        \"\"\"Test Mistral doesn't have vision (not in known list).\"\"\"\n        assert infer_vision_support(\"mistral.mistral-large\") is False\n\n\nclass TestIsReasoningModel:\n    \"\"\"Tests for reasoning model detection.\"\"\"\n\n    def test_o1_is_reasoning(self) -> None:\n        \"\"\"Test o1 models are detected as reasoning.\"\"\"\n        assert is_reasoning_model(\"openai/o1-preview\", \"O1 Preview\") is True\n\n    def test_o3_is_reasoning(self) -> None:\n        \"\"\"Test o3 models are detected as reasoning.\"\"\"\n        assert is_reasoning_model(\"openai/o3-mini\", \"O3 Mini\") is True\n\n    def test_deepseek_r1_is_reasoning(self) -> None:\n        \"\"\"Test DeepSeek R1 is detected as reasoning.\"\"\"\n        assert is_reasoning_model(\"deepseek/deepseek-r1\", \"DeepSeek R1\") is True\n\n    def test_qwq_is_reasoning(self) -> None:\n        \"\"\"Test QwQ is detected as reasoning.\"\"\"\n        assert is_reasoning_model(\"qwen/qwq-32b\", \"QwQ 32B\") is True\n\n    def test_gpt_4_not_reasoning(self) -> None:\n        \"\"\"Test GPT-4 is not detected as reasoning.\"\"\"\n        assert is_reasoning_model(\"openai/gpt-4\", \"GPT-4\") is False\n\n    def test_claude_not_reasoning(self) -> None:\n        \"\"\"Test Claude is not detected as reasoning.\"\"\"\n        assert (\n            is_reasoning_model(\"anthropic/claude-3-5-sonnet\", \"Claude 3.5 Sonnet\")\n            is False\n        )\n\n\nclass TestIsEmbeddingModel:\n    \"\"\"Tests for embedding model detection.\"\"\"\n\n    def test_openai_embedding_ada(self) -> None:\n        assert is_embedding_model(\"text-embedding-ada-002\") is True\n\n    def test_openai_embedding_3_small(self) -> None:\n        assert is_embedding_model(\"text-embedding-3-small\") is True\n\n    def test_openai_embedding_3_large(self) -> None:\n        assert is_embedding_model(\"text-embedding-3-large\") is True\n\n    def test_cohere_embed_model(self) -> None:\n        assert is_embedding_model(\"embed-english-v3.0\") is True\n\n    def test_bedrock_titan_embed(self) -> None:\n        assert is_embedding_model(\"amazon.titan-embed-text-v1\") is True\n\n    def test_gpt4o_not_embedding(self) -> None:\n        assert is_embedding_model(\"gpt-4o\") is False\n\n    def test_gpt4_not_embedding(self) -> None:\n        assert is_embedding_model(\"gpt-4\") is False\n\n    def test_dall_e_not_embedding(self) -> None:\n        assert is_embedding_model(\"dall-e-3\") is False\n\n    def test_unknown_custom_model_not_embedding(self) -> None:\n        \"\"\"Custom/local models not in litellm's model DB should default to False.\"\"\"\n        assert is_embedding_model(\"my-custom-local-model-v1\") is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/manage/test_bulk_invite_limit.py",
    "content": "\"\"\"Test bulk invite limit for free trial tenants.\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom fastapi import HTTPException\n\nfrom onyx.server.manage.models import EmailInviteStatus\nfrom onyx.server.manage.users import bulk_invite_users\n\n\n@patch(\"onyx.server.manage.users.MULTI_TENANT\", True)\n@patch(\"onyx.server.manage.users.is_tenant_on_trial_fn\", return_value=True)\n@patch(\"onyx.server.manage.users.get_current_tenant_id\", return_value=\"test_tenant\")\n@patch(\"onyx.server.manage.users.get_invited_users\", return_value=[])\n@patch(\"onyx.server.manage.users.get_all_users\", return_value=[])\n@patch(\"onyx.server.manage.users.NUM_FREE_TRIAL_USER_INVITES\", 5)\ndef test_trial_tenant_cannot_exceed_invite_limit(*_mocks: None) -> None:\n    \"\"\"Trial tenants cannot invite more users than the configured limit.\"\"\"\n    emails = [f\"user{i}@example.com\" for i in range(6)]\n\n    with pytest.raises(HTTPException) as exc_info:\n        bulk_invite_users(emails=emails)\n\n    assert exc_info.value.status_code == 403\n    assert \"invite limit\" in exc_info.value.detail.lower()\n\n\n@patch(\"onyx.server.manage.users.MULTI_TENANT\", True)\n@patch(\"onyx.server.manage.users.DEV_MODE\", True)\n@patch(\"onyx.server.manage.users.ENABLE_EMAIL_INVITES\", False)\n@patch(\"onyx.server.manage.users.is_tenant_on_trial_fn\", return_value=True)\n@patch(\"onyx.server.manage.users.get_current_tenant_id\", return_value=\"test_tenant\")\n@patch(\"onyx.server.manage.users.get_invited_users\", return_value=[])\n@patch(\"onyx.server.manage.users.get_all_users\", return_value=[])\n@patch(\"onyx.server.manage.users.write_invited_users\", return_value=3)\n@patch(\"onyx.server.manage.users.enforce_seat_limit\")\n@patch(\"onyx.server.manage.users.NUM_FREE_TRIAL_USER_INVITES\", 5)\n@patch(\n    \"onyx.server.manage.users.fetch_ee_implementation_or_noop\",\n    return_value=lambda *_args: None,\n)\ndef test_trial_tenant_can_invite_within_limit(*_mocks: None) -> None:\n    \"\"\"Trial tenants can invite users when under the limit.\"\"\"\n    emails = [\"user1@example.com\", \"user2@example.com\", \"user3@example.com\"]\n\n    result = bulk_invite_users(emails=emails)\n\n    assert result.invited_count == 3\n    assert result.email_invite_status == EmailInviteStatus.DISABLED\n\n\n# --- email_invite_status tests ---\n\n_COMMON_PATCHES = [\n    patch(\"onyx.server.manage.users.MULTI_TENANT\", False),\n    patch(\"onyx.server.manage.users.get_current_tenant_id\", return_value=\"test_tenant\"),\n    patch(\"onyx.server.manage.users.get_invited_users\", return_value=[]),\n    patch(\"onyx.server.manage.users.get_all_users\", return_value=[]),\n    patch(\"onyx.server.manage.users.write_invited_users\", return_value=1),\n    patch(\"onyx.server.manage.users.enforce_seat_limit\"),\n]\n\n\ndef _with_common_patches(fn: object) -> object:\n    for p in reversed(_COMMON_PATCHES):\n        fn = p(fn)  # type: ignore\n    return fn\n\n\n@_with_common_patches\n@patch(\"onyx.server.manage.users.ENABLE_EMAIL_INVITES\", False)\ndef test_email_invite_status_disabled(*_mocks: None) -> None:\n    \"\"\"When email invites are disabled, status is disabled.\"\"\"\n    result = bulk_invite_users(emails=[\"user@example.com\"])\n\n    assert result.email_invite_status == EmailInviteStatus.DISABLED\n\n\n@_with_common_patches\n@patch(\"onyx.server.manage.users.ENABLE_EMAIL_INVITES\", True)\n@patch(\"onyx.server.manage.users.EMAIL_CONFIGURED\", False)\ndef test_email_invite_status_not_configured(*_mocks: None) -> None:\n    \"\"\"When email invites are enabled but no server is configured, status is not_configured.\"\"\"\n    result = bulk_invite_users(emails=[\"user@example.com\"])\n\n    assert result.email_invite_status == EmailInviteStatus.NOT_CONFIGURED\n\n\n@_with_common_patches\n@patch(\"onyx.server.manage.users.ENABLE_EMAIL_INVITES\", True)\n@patch(\"onyx.server.manage.users.EMAIL_CONFIGURED\", True)\n@patch(\"onyx.server.manage.users.send_user_email_invite\")\ndef test_email_invite_status_sent(mock_send: MagicMock, *_mocks: None) -> None:\n    \"\"\"When email invites are enabled and configured, status is sent.\"\"\"\n    result = bulk_invite_users(emails=[\"user@example.com\"])\n\n    mock_send.assert_called_once()\n    assert result.email_invite_status == EmailInviteStatus.SENT\n\n\n@_with_common_patches\n@patch(\"onyx.server.manage.users.ENABLE_EMAIL_INVITES\", True)\n@patch(\"onyx.server.manage.users.EMAIL_CONFIGURED\", True)\n@patch(\n    \"onyx.server.manage.users.send_user_email_invite\",\n    side_effect=Exception(\"SMTP auth failed\"),\n)\ndef test_email_invite_status_send_failed(*_mocks: None) -> None:\n    \"\"\"When email sending throws, status is send_failed and invite is still saved.\"\"\"\n    result = bulk_invite_users(emails=[\"user@example.com\"])\n\n    assert result.email_invite_status == EmailInviteStatus.SEND_FAILED\n    assert result.invited_count == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/manage/voice/test_voice_api_validation.py",
    "content": "import pytest\n\nfrom onyx.error_handling.exceptions import OnyxError\nfrom onyx.server.manage.voice.api import _validate_voice_api_base\n\n\ndef test_validate_voice_api_base_blocks_private_for_non_azure() -> None:\n    with pytest.raises(OnyxError, match=\"Invalid target URI\"):\n        _validate_voice_api_base(\"openai\", \"http://127.0.0.1:11434\")\n\n\ndef test_validate_voice_api_base_allows_private_for_azure() -> None:\n    validated = _validate_voice_api_base(\"azure\", \"http://127.0.0.1:5000\")\n    assert validated == \"http://127.0.0.1:5000\"\n\n\ndef test_validate_voice_api_base_blocks_metadata_for_azure() -> None:\n    with pytest.raises(OnyxError, match=\"Invalid target URI\"):\n        _validate_voice_api_base(\"azure\", \"http://metadata.google.internal/\")\n\n\ndef test_validate_voice_api_base_returns_none_for_none() -> None:\n    assert _validate_voice_api_base(\"openai\", None) is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/conftest.py",
    "content": "\"\"\"Shared fixtures for SCIM endpoint unit tests.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom collections.abc import Generator\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi.responses import JSONResponse\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.server.scim.api import ScimJSONResponse\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimListResponse\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.providers.base import ScimProvider\nfrom ee.onyx.server.scim.providers.entra import EntraProvider\nfrom ee.onyx.server.scim.providers.okta import OktaProvider\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import ScimUserMapping\nfrom onyx.db.models import User\nfrom onyx.db.models import UserGroup\nfrom onyx.db.models import UserRole\n\n# Every supported SCIM provider must appear here so that all endpoint tests\n# run against it.  When adding a new provider, add its class to this list.\nSCIM_PROVIDERS: list[type[ScimProvider]] = [OktaProvider, EntraProvider]\n\n\n@pytest.fixture\ndef mock_db_session() -> MagicMock:\n    \"\"\"A MagicMock standing in for a SQLAlchemy Session.\"\"\"\n    return MagicMock(spec=Session)\n\n\n@pytest.fixture\ndef mock_token() -> MagicMock:\n    \"\"\"A MagicMock standing in for a verified ScimToken.\"\"\"\n    token = MagicMock(spec=ScimToken)\n    token.id = 1\n    return token\n\n\n@pytest.fixture(params=SCIM_PROVIDERS, ids=[p.__name__ for p in SCIM_PROVIDERS])\ndef provider(request: pytest.FixtureRequest) -> ScimProvider:\n    \"\"\"Parameterized provider — runs each test with every provider in SCIM_PROVIDERS.\"\"\"\n    return request.param()\n\n\n@pytest.fixture\ndef mock_dal() -> Generator[MagicMock, None, None]:\n    \"\"\"Patch ScimDAL construction in api module and yield the mock instance.\"\"\"\n    with patch(\"ee.onyx.server.scim.api.ScimDAL\") as cls:\n        dal = cls.return_value\n        # User defaults\n        dal.get_user.return_value = None\n        dal.get_user_by_email.return_value = None\n        dal.get_user_mapping_by_user_id.return_value = None\n        dal.get_user_mapping_by_external_id.return_value = None\n        dal.list_users.return_value = ([], 0)\n        # Group defaults\n        dal.get_group.return_value = None\n        dal.get_group_by_name.return_value = None\n        dal.get_group_mapping_by_group_id.return_value = None\n        dal.get_group_mapping_by_external_id.return_value = None\n        dal.get_group_members.return_value = []\n        dal.list_groups.return_value = ([], 0)\n        # User-group relationship defaults\n        dal.get_user_groups.return_value = []\n        dal.get_users_groups_batch.return_value = {}\n        yield dal\n\n\ndef make_scim_user(**kwargs: Any) -> ScimUserResource:\n    \"\"\"Build a ScimUserResource with sensible defaults.\"\"\"\n    defaults: dict[str, Any] = {\n        \"userName\": \"test@example.com\",\n        \"externalId\": \"ext-default\",\n        \"active\": True,\n        \"name\": ScimName(givenName=\"Test\", familyName=\"User\"),\n    }\n    defaults.update(kwargs)\n    return ScimUserResource(**defaults)\n\n\ndef make_scim_group(**kwargs: Any) -> ScimGroupResource:\n    \"\"\"Build a ScimGroupResource with sensible defaults.\"\"\"\n    defaults: dict[str, Any] = {\"displayName\": \"Engineering\"}\n    defaults.update(kwargs)\n    return ScimGroupResource(**defaults)\n\n\ndef make_db_user(**kwargs: Any) -> MagicMock:\n    \"\"\"Build a mock User ORM object with configurable attributes.\"\"\"\n    user = MagicMock(spec=User)\n    user.id = kwargs.get(\"id\", uuid4())\n    user.email = kwargs.get(\"email\", \"test@example.com\")\n    user.is_active = kwargs.get(\"is_active\", True)\n    user.personal_name = kwargs.get(\"personal_name\", \"Test User\")\n    user.role = kwargs.get(\"role\", UserRole.BASIC)\n    return user\n\n\ndef make_db_group(**kwargs: Any) -> MagicMock:\n    \"\"\"Build a mock UserGroup ORM object with configurable attributes.\"\"\"\n    group = MagicMock(spec=UserGroup)\n    group.id = kwargs.get(\"id\", 1)\n    group.name = kwargs.get(\"name\", \"Engineering\")\n    group.is_up_for_deletion = kwargs.get(\"is_up_for_deletion\", False)\n    group.is_up_to_date = kwargs.get(\"is_up_to_date\", True)\n    group.is_default = kwargs.get(\"is_default\", False)\n    return group\n\n\ndef make_user_mapping(**kwargs: Any) -> MagicMock:\n    \"\"\"Build a mock ScimUserMapping ORM object with configurable attributes.\"\"\"\n    mapping = MagicMock(spec=ScimUserMapping)\n    mapping.id = kwargs.get(\"id\", 1)\n    mapping.external_id = kwargs.get(\"external_id\", \"ext-default\")\n    mapping.user_id = kwargs.get(\"user_id\", uuid4())\n    mapping.scim_username = kwargs.get(\"scim_username\", None)\n    mapping.department = kwargs.get(\"department\", None)\n    mapping.manager = kwargs.get(\"manager\", None)\n    mapping.given_name = kwargs.get(\"given_name\", None)\n    mapping.family_name = kwargs.get(\"family_name\", None)\n    mapping.scim_emails_json = kwargs.get(\"scim_emails_json\", None)\n    return mapping\n\n\ndef assert_scim_error(result: object, expected_status: int) -> None:\n    \"\"\"Assert *result* is a JSONResponse with the given status code.\"\"\"\n    assert isinstance(result, JSONResponse)\n    assert result.status_code == expected_status\n\n\n# ---------------------------------------------------------------------------\n# Response parsing helpers\n# ---------------------------------------------------------------------------\n\n\ndef parse_scim_user(result: object, *, status: int = 200) -> ScimUserResource:\n    \"\"\"Assert *result* is a ScimJSONResponse and parse as ScimUserResource.\"\"\"\n    assert isinstance(\n        result, ScimJSONResponse\n    ), f\"Expected ScimJSONResponse, got {type(result).__name__}\"\n    assert result.status_code == status\n    return ScimUserResource.model_validate(json.loads(result.body))\n\n\ndef parse_scim_group(result: object, *, status: int = 200) -> ScimGroupResource:\n    \"\"\"Assert *result* is a ScimJSONResponse and parse as ScimGroupResource.\"\"\"\n    assert isinstance(\n        result, ScimJSONResponse\n    ), f\"Expected ScimJSONResponse, got {type(result).__name__}\"\n    assert result.status_code == status\n    return ScimGroupResource.model_validate(json.loads(result.body))\n\n\ndef parse_scim_list(result: object) -> ScimListResponse:\n    \"\"\"Assert *result* is a ScimJSONResponse and parse as ScimListResponse.\"\"\"\n    assert isinstance(\n        result, ScimJSONResponse\n    ), f\"Expected ScimJSONResponse, got {type(result).__name__}\"\n    assert result.status_code == 200\n    return ScimListResponse.model_validate(json.loads(result.body))\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_admin.py",
    "content": "\"\"\"Tests for SCIM admin token management endpoints.\"\"\"\n\nfrom datetime import datetime\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi import HTTPException\nfrom sqlalchemy.orm import Session\n\nfrom ee.onyx.db.scim import ScimDAL\nfrom ee.onyx.server.enterprise_settings.api import create_scim_token\nfrom ee.onyx.server.enterprise_settings.api import get_active_scim_token\nfrom ee.onyx.server.scim.models import ScimTokenCreate\nfrom onyx.db.models import ScimToken\nfrom onyx.db.models import User\n\n\n@pytest.fixture\ndef mock_db_session() -> MagicMock:\n    return MagicMock(spec=Session)\n\n\n@pytest.fixture\ndef scim_dal(mock_db_session: MagicMock) -> ScimDAL:\n    return ScimDAL(mock_db_session)\n\n\n@pytest.fixture\ndef admin_user() -> User:\n    user = User(id=uuid4(), email=\"admin@test.com\")\n    user.is_active = True\n    return user\n\n\ndef _make_token(token_id: int, name: str, *, is_active: bool = True) -> ScimToken:\n    return ScimToken(\n        id=token_id,\n        name=name,\n        hashed_token=\"h\" * 64,\n        token_display=\"onyx_scim_****abcd\",\n        is_active=is_active,\n        created_by_id=uuid4(),\n        created_at=datetime(2026, 1, 1),\n        last_used_at=None,\n    )\n\n\nclass TestGetActiveToken:\n    def test_returns_token_metadata(self, scim_dal: ScimDAL, admin_user: User) -> None:\n        token = _make_token(1, \"prod-token\")\n        scim_dal._session.scalar.return_value = token  # type: ignore[attr-defined]\n\n        result = get_active_scim_token(_=admin_user, dal=scim_dal)\n\n        assert result.id == 1\n        assert result.name == \"prod-token\"\n        assert result.is_active is True\n\n    def test_raises_404_when_no_active_token(\n        self, scim_dal: ScimDAL, admin_user: User\n    ) -> None:\n        scim_dal._session.scalar.return_value = None  # type: ignore[attr-defined]\n\n        with pytest.raises(HTTPException) as exc_info:\n            get_active_scim_token(_=admin_user, dal=scim_dal)\n\n        assert exc_info.value.status_code == 404\n\n\nclass TestCreateToken:\n    @patch(\"ee.onyx.server.enterprise_settings.api.generate_scim_token\")\n    def test_creates_token_and_revokes_previous(\n        self,\n        mock_generate: MagicMock,\n        scim_dal: ScimDAL,\n        admin_user: User,\n    ) -> None:\n        mock_generate.return_value = (\"raw_token_val\", \"hashed_val\", \"****abcd\")\n\n        # Simulate one existing active token that should get revoked\n        existing = _make_token(1, \"old-token\", is_active=True)\n        scim_dal._session.scalars.return_value.all.return_value = [existing]  # type: ignore[attr-defined]\n\n        # Simulate DB defaults that would be set on INSERT/flush\n        def fake_add(obj: ScimToken) -> None:\n            obj.id = 2\n            obj.is_active = True\n            obj.created_at = datetime(2026, 2, 1)\n\n        scim_dal._session.add.side_effect = fake_add  # type: ignore[attr-defined]\n\n        body = ScimTokenCreate(name=\"new-token\")\n        result = create_scim_token(body=body, user=admin_user, dal=scim_dal)\n\n        # Previous token was revoked (by create_token's internal revocation)\n        assert existing.is_active is False\n\n        # New token returned with raw value\n        assert result.raw_token == \"raw_token_val\"\n        assert result.name == \"new-token\"\n        assert result.is_active is True\n\n        # Session was committed\n        scim_dal._session.commit.assert_called_once()  # type: ignore[attr-defined]\n\n    @patch(\"ee.onyx.server.enterprise_settings.api.generate_scim_token\")\n    def test_creates_first_token_when_none_exist(\n        self,\n        mock_generate: MagicMock,\n        scim_dal: ScimDAL,\n        admin_user: User,\n    ) -> None:\n        mock_generate.return_value = (\"raw_token_val\", \"hashed_val\", \"****abcd\")\n\n        # No existing tokens\n        scim_dal._session.scalars.return_value.all.return_value = []  # type: ignore[attr-defined]\n\n        def fake_add(obj: ScimToken) -> None:\n            obj.id = 1\n            obj.is_active = True\n            obj.created_at = datetime(2026, 2, 1)\n\n        scim_dal._session.add.side_effect = fake_add  # type: ignore[attr-defined]\n\n        body = ScimTokenCreate(name=\"first-token\")\n        result = create_scim_token(body=body, user=admin_user, dal=scim_dal)\n\n        assert result.raw_token == \"raw_token_val\"\n        assert result.name == \"first-token\"\n        assert result.is_active is True\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_auth.py",
    "content": "from unittest.mock import MagicMock\n\nimport pytest\n\nfrom ee.onyx.server.scim.auth import _hash_scim_token\nfrom ee.onyx.server.scim.auth import generate_scim_token\nfrom ee.onyx.server.scim.auth import SCIM_TOKEN_PREFIX\nfrom ee.onyx.server.scim.auth import ScimAuthError\nfrom ee.onyx.server.scim.auth import verify_scim_token\n\n\nclass TestGenerateScimToken:\n    def test_returns_three_strings(self) -> None:\n        raw, hashed, display = generate_scim_token()\n        assert isinstance(raw, str)\n        assert isinstance(hashed, str)\n        assert isinstance(display, str)\n\n    def test_raw_token_has_prefix(self) -> None:\n        raw, _, _ = generate_scim_token()\n        assert raw.startswith(SCIM_TOKEN_PREFIX)\n\n    def test_hash_is_sha256_hex(self) -> None:\n        raw, hashed, _ = generate_scim_token()\n        assert len(hashed) == 64\n        assert hashed == _hash_scim_token(raw)\n\n    def test_display_shows_last_four_chars(self) -> None:\n        raw, _, display = generate_scim_token()\n        assert display.endswith(raw[-4:])\n        assert \"****\" in display\n\n    def test_tokens_are_unique(self) -> None:\n        tokens = {generate_scim_token()[0] for _ in range(10)}\n        assert len(tokens) == 10\n\n\nclass TestHashScimToken:\n    def test_deterministic(self) -> None:\n        assert _hash_scim_token(\"test\") == _hash_scim_token(\"test\")\n\n    def test_different_inputs_different_hashes(self) -> None:\n        assert _hash_scim_token(\"a\") != _hash_scim_token(\"b\")\n\n\nclass TestVerifyScimToken:\n    def _make_request(self, auth_header: str | None = None) -> MagicMock:\n        request = MagicMock()\n        headers: dict[str, str] = {}\n        if auth_header is not None:\n            headers[\"Authorization\"] = auth_header\n        request.headers = headers\n        return request\n\n    def _make_dal(self, token: MagicMock | None = None) -> MagicMock:\n        dal = MagicMock()\n        dal.get_token_by_hash.return_value = token\n        return dal\n\n    def test_missing_header_raises_401(self) -> None:\n        request = self._make_request(None)\n        dal = self._make_dal()\n        with pytest.raises(ScimAuthError) as exc_info:\n            verify_scim_token(request, dal)\n        assert exc_info.value.status_code == 401\n        assert \"Missing\" in str(exc_info.value.detail)\n\n    def test_wrong_prefix_raises_401(self) -> None:\n        request = self._make_request(\"Bearer on_some_api_key\")\n        dal = self._make_dal()\n        with pytest.raises(ScimAuthError) as exc_info:\n            verify_scim_token(request, dal)\n        assert exc_info.value.status_code == 401\n\n    def test_token_not_in_db_raises_401(self) -> None:\n        raw, _, _ = generate_scim_token()\n        request = self._make_request(f\"Bearer {raw}\")\n        dal = self._make_dal(token=None)\n        with pytest.raises(ScimAuthError) as exc_info:\n            verify_scim_token(request, dal)\n        assert exc_info.value.status_code == 401\n        assert \"Invalid\" in str(exc_info.value.detail)\n\n    def test_inactive_token_raises_401(self) -> None:\n        raw, _, _ = generate_scim_token()\n        request = self._make_request(f\"Bearer {raw}\")\n        mock_token = MagicMock()\n        mock_token.is_active = False\n        dal = self._make_dal(token=mock_token)\n        with pytest.raises(ScimAuthError) as exc_info:\n            verify_scim_token(request, dal)\n        assert exc_info.value.status_code == 401\n        assert \"revoked\" in str(exc_info.value.detail)\n\n    def test_valid_token_returns_token(self) -> None:\n        raw, _, _ = generate_scim_token()\n        request = self._make_request(f\"Bearer {raw}\")\n        mock_token = MagicMock()\n        mock_token.is_active = True\n        dal = self._make_dal(token=mock_token)\n        result = verify_scim_token(request, dal)\n        assert result is mock_token\n        dal.get_token_by_hash.assert_called_once()\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_entra.py",
    "content": "\"\"\"Comprehensive Entra ID (Azure AD) SCIM compatibility tests.\n\nCovers the full Entra provisioning lifecycle: service discovery, user CRUD\nwith enterprise extension schema, group CRUD with excludedAttributes, and\nall Entra-specific behavioral quirks (PascalCase ops, enterprise URN in\nPATCH value dicts).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\nfrom fastapi import Response\n\nfrom ee.onyx.server.scim.api import create_user\nfrom ee.onyx.server.scim.api import delete_user\nfrom ee.onyx.server.scim.api import get_group\nfrom ee.onyx.server.scim.api import get_resource_types\nfrom ee.onyx.server.scim.api import get_schemas\nfrom ee.onyx.server.scim.api import get_service_provider_config\nfrom ee.onyx.server.scim.api import get_user\nfrom ee.onyx.server.scim.api import list_groups\nfrom ee.onyx.server.scim.api import list_users\nfrom ee.onyx.server.scim.api import patch_group\nfrom ee.onyx.server.scim.api import patch_user\nfrom ee.onyx.server.scim.api import replace_user\nfrom ee.onyx.server.scim.api import ScimJSONResponse\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_USER_SCHEMA\nfrom ee.onyx.server.scim.models import ScimEnterpriseExtension\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimManagerRef\nfrom ee.onyx.server.scim.models import ScimMappingFields\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimPatchOperation\nfrom ee.onyx.server.scim.models import ScimPatchOperationType\nfrom ee.onyx.server.scim.models import ScimPatchRequest\nfrom ee.onyx.server.scim.models import ScimPatchResourceValue\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.providers.base import ScimProvider\nfrom ee.onyx.server.scim.providers.entra import EntraProvider\nfrom tests.unit.onyx.server.scim.conftest import make_db_group\nfrom tests.unit.onyx.server.scim.conftest import make_db_user\nfrom tests.unit.onyx.server.scim.conftest import make_scim_user\nfrom tests.unit.onyx.server.scim.conftest import make_user_mapping\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_group\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_list\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_user\n\n\n@pytest.fixture\ndef entra_provider() -> ScimProvider:\n    \"\"\"An EntraProvider instance for Entra-specific endpoint tests.\"\"\"\n    return EntraProvider()\n\n\n# ---------------------------------------------------------------------------\n# Service Discovery\n# ---------------------------------------------------------------------------\n\n\nclass TestEntraServiceDiscovery:\n    \"\"\"Entra expects enterprise extension in discovery endpoints.\"\"\"\n\n    def test_service_provider_config_advertises_patch(self) -> None:\n        config = get_service_provider_config()\n        assert config.patch.supported is True\n\n    def test_resource_types_include_enterprise_extension(self) -> None:\n        result = get_resource_types()\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert \"Resources\" in parsed\n        user_type = next(rt for rt in parsed[\"Resources\"] if rt[\"id\"] == \"User\")\n        extension_schemas = [ext[\"schema\"] for ext in user_type[\"schemaExtensions\"]]\n        assert SCIM_ENTERPRISE_USER_SCHEMA in extension_schemas\n\n    def test_schemas_include_enterprise_user(self) -> None:\n        result = get_schemas()\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        schema_ids = [s[\"id\"] for s in parsed[\"Resources\"]]\n        assert SCIM_ENTERPRISE_USER_SCHEMA in schema_ids\n\n    def test_enterprise_schema_has_expected_attributes(self) -> None:\n        result = get_schemas()\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        enterprise = next(\n            s for s in parsed[\"Resources\"] if s[\"id\"] == SCIM_ENTERPRISE_USER_SCHEMA\n        )\n        attr_names = {a[\"name\"] for a in enterprise[\"attributes\"]}\n        assert \"department\" in attr_names\n        assert \"manager\" in attr_names\n\n    def test_service_discovery_content_type(self) -> None:\n        \"\"\"SCIM responses must use application/scim+json content type.\"\"\"\n        result = get_resource_types()\n        assert isinstance(result, ScimJSONResponse)\n        assert result.media_type == \"application/scim+json\"\n\n\n# ---------------------------------------------------------------------------\n# User Lifecycle (Entra-specific)\n# ---------------------------------------------------------------------------\n\n\nclass TestEntraUserLifecycle:\n    \"\"\"Test user CRUD through Entra's lens: enterprise schemas, PascalCase ops.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_create_user_includes_enterprise_schema(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(userName=\"alice@contoso.com\")\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result, status=201)\n        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas\n        assert SCIM_USER_SCHEMA in resource.schemas\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_create_user_with_enterprise_extension(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Enterprise extension department/manager should round-trip on create.\"\"\"\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(\n            userName=\"alice@contoso.com\",\n            enterprise_extension=ScimEnterpriseExtension(\n                department=\"Engineering\",\n                manager=ScimManagerRef(value=\"mgr-uuid-123\"),\n            ),\n        )\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result, status=201)\n        assert resource.enterprise_extension is not None\n        assert resource.enterprise_extension.department == \"Engineering\"\n        assert resource.enterprise_extension.manager is not None\n        assert resource.enterprise_extension.manager.value == \"mgr-uuid-123\"\n\n        # Verify DAL received the enterprise fields\n        mock_dal.create_user_mapping.assert_called_once()\n        call_kwargs = mock_dal.create_user_mapping.call_args[1]\n        assert call_kwargs[\"fields\"] == ScimMappingFields(\n            department=\"Engineering\",\n            manager=\"mgr-uuid-123\",\n            given_name=\"Test\",\n            family_name=\"User\",\n        )\n\n    def test_get_user_includes_enterprise_schema(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"alice@contoso.com\")\n        mock_dal.get_user.return_value = user\n\n        result = get_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas\n\n    def test_get_user_returns_enterprise_extension_data(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"GET should return stored enterprise extension data.\"\"\"\n        user = make_db_user(email=\"alice@contoso.com\")\n        mock_dal.get_user.return_value = user\n        mapping = make_user_mapping(user_id=user.id)\n        mapping.department = \"Sales\"\n        mapping.manager = \"mgr-456\"\n        mock_dal.get_user_mapping_by_user_id.return_value = mapping\n\n        result = get_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        assert resource.enterprise_extension is not None\n        assert resource.enterprise_extension.department == \"Sales\"\n        assert resource.enterprise_extension.manager is not None\n        assert resource.enterprise_extension.manager.value == \"mgr-456\"\n\n    def test_list_users_includes_enterprise_schema(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"alice@contoso.com\")\n        mapping = make_user_mapping(external_id=\"entra-ext-1\", user_id=user.id)\n        mock_dal.list_users.return_value = ([(user, mapping)], 1)\n\n        result = list_users(\n            filter=None,\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        resource = parsed.Resources[0]\n        assert isinstance(resource, ScimUserResource)\n        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas\n\n    def test_patch_user_deactivate_with_pascal_case_replace(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ``\"Replace\"`` (PascalCase) instead of ``\"replace\"``.\"\"\"\n        user = make_db_user(is_active=True)\n        mock_dal.get_user.return_value = user\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=\"Replace\",  # type: ignore[arg-type]\n                    path=\"active\",\n                    value=False,\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        # Mock doesn't propagate the change, so verify via the DAL call\n        mock_dal.update_user.assert_called_once()\n        call_kwargs = mock_dal.update_user.call_args\n        assert call_kwargs[1][\"is_active\"] is False\n\n    def test_patch_user_add_external_id_with_pascal_case(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ``\"Add\"`` (PascalCase) instead of ``\"add\"``.\"\"\"\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=\"Add\",  # type: ignore[arg-type]\n                    path=\"externalId\",\n                    value=\"entra-ext-999\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        # Verify the patched externalId was synced to the DAL\n        mock_dal.sync_user_external_id.assert_called_once()\n        call_args = mock_dal.sync_user_external_id.call_args\n        assert call_args[0][1] == \"entra-ext-999\"\n\n    def test_patch_user_enterprise_extension_in_value_dict(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends enterprise extension URN as key in path-less PATCH value\n        dicts — enterprise data should be stored, not ignored.\"\"\"\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n\n        value = ScimPatchResourceValue(active=False)\n        assert value.__pydantic_extra__ is not None\n        value.__pydantic_extra__[\n            \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\"\n        ] = {\"department\": \"Engineering\"}\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=None,\n                    value=value,\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        # Verify active=False was applied\n        mock_dal.update_user.assert_called_once()\n        call_kwargs = mock_dal.update_user.call_args\n        assert call_kwargs[1][\"is_active\"] is False\n        # Verify enterprise data was passed to DAL\n        mock_dal.sync_user_external_id.assert_called_once()\n        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]\n        assert sync_kwargs[\"fields\"] == ScimMappingFields(\n            department=\"Engineering\",\n            given_name=\"Test\",\n            family_name=\"User\",\n            scim_emails_json='[{\"value\": \"test@example.com\", \"type\": \"work\", \"primary\": true}]',\n        )\n\n    def test_patch_user_remove_external_id(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"PATCH remove op should clear the target field.\"\"\"\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        mapping = make_user_mapping(user_id=user.id)\n        mapping.external_id = \"ext-to-remove\"\n        mock_dal.get_user_mapping_by_user_id.return_value = mapping\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REMOVE,\n                    path=\"externalId\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        # externalId should be cleared (None)\n        mock_dal.sync_user_external_id.assert_called_once()\n        call_args = mock_dal.sync_user_external_id.call_args\n        assert call_args[0][1] is None\n\n    def test_patch_user_emails_primary_eq_true_value(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"PATCH with path emails[primary eq true].value should update\n        the primary email entry, not userName.\"\"\"\n        user = make_db_user(email=\"old@contoso.com\")\n        mock_dal.get_user.return_value = user\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"emails[primary eq true].value\",\n                    value=\"new@contoso.com\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        # userName should remain unchanged — emails and userName are separate\n        assert resource.userName == \"old@contoso.com\"\n        # Primary email should be updated\n        primary_emails = [e for e in resource.emails if e.primary]\n        assert len(primary_emails) == 1\n        assert primary_emails[0].value == \"new@contoso.com\"\n\n    def test_patch_user_enterprise_urn_department_path(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"PATCH with dotted enterprise URN path should store department.\"\"\"\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:department\",\n                    value=\"Marketing\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        mock_dal.sync_user_external_id.assert_called_once()\n        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]\n        assert sync_kwargs[\"fields\"] == ScimMappingFields(\n            department=\"Marketing\",\n            given_name=\"Test\",\n            family_name=\"User\",\n            scim_emails_json='[{\"value\": \"test@example.com\", \"type\": \"work\", \"primary\": true}]',\n        )\n\n    def test_replace_user_includes_enterprise_schema(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"old@contoso.com\")\n        mock_dal.get_user.return_value = user\n        resource = make_scim_user(\n            userName=\"new@contoso.com\",\n            name=ScimName(givenName=\"New\", familyName=\"Name\"),\n        )\n\n        result = replace_user(\n            user_id=str(user.id),\n            user_resource=resource,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        assert SCIM_ENTERPRISE_USER_SCHEMA in resource.schemas\n\n    def test_replace_user_with_enterprise_extension(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"PUT with enterprise extension should store the fields.\"\"\"\n        user = make_db_user(email=\"alice@contoso.com\")\n        mock_dal.get_user.return_value = user\n        resource = make_scim_user(\n            userName=\"alice@contoso.com\",\n            enterprise_extension=ScimEnterpriseExtension(\n                department=\"HR\",\n                manager=ScimManagerRef(value=\"boss-id\"),\n            ),\n        )\n\n        result = replace_user(\n            user_id=str(user.id),\n            user_resource=resource,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        mock_dal.sync_user_external_id.assert_called_once()\n        sync_kwargs = mock_dal.sync_user_external_id.call_args[1]\n        assert sync_kwargs[\"fields\"] == ScimMappingFields(\n            department=\"HR\",\n            manager=\"boss-id\",\n            given_name=\"Test\",\n            family_name=\"User\",\n        )\n\n    def test_delete_user_returns_204(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        mock_dal.get_user_mapping_by_user_id.return_value = MagicMock(id=1)\n\n        result = delete_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, Response)\n        assert result.status_code == 204\n\n    def test_double_delete_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        \"\"\"Second DELETE should return 404 — the SCIM mapping is gone.\"\"\"\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        # No mapping — user was already deleted from SCIM's perspective\n        mock_dal.get_user_mapping_by_user_id.return_value = None\n\n        result = delete_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        assert result.status_code == 404\n\n    def test_name_formatted_preserved_on_create(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"When name.formatted is provided, it should be used as personal_name.\"\"\"\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(\n            userName=\"alice@contoso.com\",\n            name=ScimName(\n                givenName=\"Alice\",\n                familyName=\"Smith\",\n                formatted=\"Dr. Alice Smith\",\n            ),\n        )\n\n        with patch(\n            \"ee.onyx.server.scim.api._check_seat_availability\", return_value=None\n        ):\n            result = create_user(\n                user_resource=resource,\n                _token=mock_token,\n                provider=entra_provider,\n                db_session=mock_db_session,\n            )\n\n        parse_scim_user(result, status=201)\n        # The User constructor should have received the formatted name\n        mock_dal.add_user.assert_called_once()\n        created_user = mock_dal.add_user.call_args[0][0]\n        assert created_user.personal_name == \"Dr. Alice Smith\"\n\n\n# ---------------------------------------------------------------------------\n# Group Lifecycle (Entra-specific)\n# ---------------------------------------------------------------------------\n\n\nclass TestEntraGroupLifecycle:\n    \"\"\"Test group CRUD with Entra-specific behaviors.\"\"\"\n\n    def test_get_group_standard_response(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=10, name=\"Contoso Engineering\")\n        mock_dal.get_group.return_value = group\n        uid = uuid4()\n        mock_dal.get_group_members.return_value = [(uid, \"alice@contoso.com\")]\n\n        result = get_group(\n            group_id=\"10\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_group(result)\n        assert resource.displayName == \"Contoso Engineering\"\n        assert len(resource.members) == 1\n\n    def test_list_groups_with_excluded_attributes_members(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ?excludedAttributes=members on group list queries.\"\"\"\n        group = make_db_group(id=10, name=\"Engineering\")\n        uid = uuid4()\n        mock_dal.list_groups.return_value = ([(group, \"ext-g-1\")], 1)\n        mock_dal.get_group_members.return_value = [(uid, \"alice@contoso.com\")]\n\n        result = list_groups(\n            filter=None,\n            excludedAttributes=\"members\",\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert parsed[\"totalResults\"] == 1\n        resource = parsed[\"Resources\"][0]\n        assert \"members\" not in resource\n        assert resource[\"displayName\"] == \"Engineering\"\n\n    def test_get_group_with_excluded_attributes_members(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ?excludedAttributes=members on single group GET.\"\"\"\n        group = make_db_group(id=10, name=\"Engineering\")\n        uid = uuid4()\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = [(uid, \"alice@contoso.com\")]\n\n        result = get_group(\n            group_id=\"10\",\n            excludedAttributes=\"members\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert \"members\" not in parsed\n        assert parsed[\"displayName\"] == \"Engineering\"\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_patch_group_add_members_with_pascal_case(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ``\"Add\"`` (PascalCase) for group member additions.\"\"\"\n        group = make_db_group(id=10)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n        mock_dal.validate_member_ids.return_value = []\n\n        uid = str(uuid4())\n        patched = ScimGroupResource(\n            id=\"10\",\n            displayName=\"Engineering\",\n            members=[ScimGroupMember(value=uid)],\n        )\n        mock_apply.return_value = (patched, [uid], [])\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=\"Add\",  # type: ignore[arg-type]\n                    path=\"members\",\n                    value=[ScimGroupMember(value=uid)],\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"10\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.upsert_group_members.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_patch_group_remove_member_with_pascal_case(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra sends ``\"Remove\"`` (PascalCase) for group member removals.\"\"\"\n        group = make_db_group(id=10)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        uid = str(uuid4())\n        patched = ScimGroupResource(id=\"10\", displayName=\"Engineering\", members=[])\n        mock_apply.return_value = (patched, [], [uid])\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=\"Remove\",  # type: ignore[arg-type]\n                    path=f'members[value eq \"{uid}\"]',\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"10\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.remove_group_members.assert_called_once()\n\n\n# ---------------------------------------------------------------------------\n# excludedAttributes (RFC 7644 §3.4.2.5)\n# ---------------------------------------------------------------------------\n\n\nclass TestExcludedAttributes:\n    \"\"\"Test excludedAttributes query parameter on GET endpoints.\"\"\"\n\n    def test_list_groups_excludes_members(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=1, name=\"Team\")\n        uid = uuid4()\n        mock_dal.list_groups.return_value = ([(group, None)], 1)\n        mock_dal.get_group_members.return_value = [(uid, \"user@example.com\")]\n\n        result = list_groups(\n            filter=None,\n            excludedAttributes=\"members\",\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        resource = parsed[\"Resources\"][0]\n        assert \"members\" not in resource\n        assert \"displayName\" in resource\n\n    def test_get_group_excludes_members(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=1, name=\"Team\")\n        uid = uuid4()\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = [(uid, \"user@example.com\")]\n\n        result = get_group(\n            group_id=\"1\",\n            excludedAttributes=\"members\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert \"members\" not in parsed\n        assert \"displayName\" in parsed\n\n    def test_list_users_excludes_groups(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        user = make_db_user()\n        mapping = make_user_mapping(user_id=user.id)\n        mock_dal.list_users.return_value = ([(user, mapping)], 1)\n        mock_dal.get_users_groups_batch.return_value = {user.id: [(1, \"Engineering\")]}\n\n        result = list_users(\n            filter=None,\n            excludedAttributes=\"groups\",\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        resource = parsed[\"Resources\"][0]\n        assert \"groups\" not in resource\n        assert \"userName\" in resource\n\n    def test_get_user_excludes_groups(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        mock_dal.get_user_groups.return_value = [(1, \"Engineering\")]\n\n        result = get_user(\n            user_id=str(user.id),\n            excludedAttributes=\"groups\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert \"groups\" not in parsed\n        assert \"userName\" in parsed\n\n    def test_multiple_excluded_attributes(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=1, name=\"Team\")\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        result = get_group(\n            group_id=\"1\",\n            excludedAttributes=\"members,externalId\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, ScimJSONResponse)\n        parsed = json.loads(result.body)\n        assert \"members\" not in parsed\n        assert \"externalId\" not in parsed\n        assert \"displayName\" in parsed\n\n    def test_no_excluded_attributes_returns_full_response(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=1, name=\"Team\")\n        uid = uuid4()\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = [(uid, \"user@example.com\")]\n\n        result = get_group(\n            group_id=\"1\",\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_group(result)\n        assert len(resource.members) == 1\n\n\n# ---------------------------------------------------------------------------\n# Entra Connection Probe\n# ---------------------------------------------------------------------------\n\n\nclass TestEntraConnectionProbe:\n    \"\"\"Entra sends a probe request during initial SCIM setup.\"\"\"\n\n    def test_filter_for_nonexistent_user_returns_empty_list(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        entra_provider: ScimProvider,\n    ) -> None:\n        \"\"\"Entra probes with: GET /Users?filter=userName eq \"non-existent\"&count=1\"\"\"\n        mock_dal.list_users.return_value = ([], 0)\n\n        result = list_users(\n            filter='userName eq \"non-existent@contoso.com\"',\n            startIndex=1,\n            count=1,\n            _token=mock_token,\n            provider=entra_provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        assert parsed.totalResults == 0\n        assert parsed.Resources == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_filtering.py",
    "content": "import pytest\n\nfrom ee.onyx.server.scim.filtering import parse_scim_filter\nfrom ee.onyx.server.scim.filtering import ScimFilter\nfrom ee.onyx.server.scim.filtering import ScimFilterOperator\n\n\nclass TestParseScimFilter:\n    \"\"\"Tests for SCIM filter expression parsing.\"\"\"\n\n    def test_eq_filter_double_quoted(self) -> None:\n        result = parse_scim_filter('userName eq \"john@example.com\"')\n        assert result == ScimFilter(\n            attribute=\"userName\",\n            operator=ScimFilterOperator.EQUAL,\n            value=\"john@example.com\",\n        )\n\n    def test_eq_filter_single_quoted(self) -> None:\n        result = parse_scim_filter(\"userName eq 'john@example.com'\")\n        assert result == ScimFilter(\n            attribute=\"userName\",\n            operator=ScimFilterOperator.EQUAL,\n            value=\"john@example.com\",\n        )\n\n    def test_co_filter(self) -> None:\n        result = parse_scim_filter('displayName co \"Engineering\"')\n        assert result == ScimFilter(\n            attribute=\"displayName\",\n            operator=ScimFilterOperator.CONTAINS,\n            value=\"Engineering\",\n        )\n\n    def test_sw_filter(self) -> None:\n        result = parse_scim_filter('userName sw \"admin\"')\n        assert result == ScimFilter(\n            attribute=\"userName\",\n            operator=ScimFilterOperator.STARTS_WITH,\n            value=\"admin\",\n        )\n\n    def test_case_insensitive_operator(self) -> None:\n        result = parse_scim_filter('userName EQ \"test@example.com\"')\n        assert result is not None\n        assert result.operator == ScimFilterOperator.EQUAL\n\n    def test_external_id_filter(self) -> None:\n        result = parse_scim_filter('externalId eq \"abc-123\"')\n        assert result == ScimFilter(\n            attribute=\"externalId\",\n            operator=ScimFilterOperator.EQUAL,\n            value=\"abc-123\",\n        )\n\n    def test_empty_value(self) -> None:\n        result = parse_scim_filter('userName eq \"\"')\n        assert result == ScimFilter(\n            attribute=\"userName\",\n            operator=ScimFilterOperator.EQUAL,\n            value=\"\",\n        )\n\n    def test_whitespace_trimming(self) -> None:\n        result = parse_scim_filter('  userName eq \"test\"  ')\n        assert result is not None\n        assert result.value == \"test\"\n\n    @pytest.mark.parametrize(\n        \"filter_string\",\n        [\n            None,\n            \"\",\n            \"   \",\n        ],\n    )\n    def test_empty_input_returns_none(self, filter_string: str | None) -> None:\n        assert parse_scim_filter(filter_string) is None\n\n    @pytest.mark.parametrize(\n        \"filter_string\",\n        [\n            \"userName\",  # missing operator and value\n            \"userName eq\",  # missing value\n            'userName gt \"5\"',  # unsupported operator\n            'userName ne \"test\"',  # unsupported operator\n            \"userName eq unquoted\",  # unquoted value\n            'a eq \"x\" and b eq \"y\"',  # compound filter not supported\n        ],\n    )\n    def test_malformed_input_raises_value_error(self, filter_string: str) -> None:\n        with pytest.raises(ValueError, match=\"Unsupported or malformed\"):\n            parse_scim_filter(filter_string)\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_group_endpoints.py",
    "content": "\"\"\"Unit tests for SCIM Group CRUD endpoints.\"\"\"\n\nfrom __future__ import annotations\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom fastapi import Response\n\nfrom ee.onyx.server.scim.api import create_group\nfrom ee.onyx.server.scim.api import delete_group\nfrom ee.onyx.server.scim.api import get_group\nfrom ee.onyx.server.scim.api import list_groups\nfrom ee.onyx.server.scim.api import patch_group\nfrom ee.onyx.server.scim.api import replace_group\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimPatchOperation\nfrom ee.onyx.server.scim.models import ScimPatchOperationType\nfrom ee.onyx.server.scim.models import ScimPatchRequest\nfrom ee.onyx.server.scim.patch import ScimPatchError\nfrom ee.onyx.server.scim.providers.base import ScimProvider\nfrom tests.unit.onyx.server.scim.conftest import assert_scim_error\nfrom tests.unit.onyx.server.scim.conftest import make_db_group\nfrom tests.unit.onyx.server.scim.conftest import make_scim_group\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_group\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_list\n\n\nclass TestListGroups:\n    \"\"\"Tests for GET /scim/v2/Groups.\"\"\"\n\n    def test_empty_result(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.list_groups.return_value = ([], 0)\n\n        result = list_groups(\n            filter=None,\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        assert parsed.totalResults == 0\n        assert parsed.Resources == []\n\n    def test_unsupported_filter_returns_400(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.list_groups.side_effect = ValueError(\n            \"Unsupported filter attribute: userName\"\n        )\n\n        result = list_groups(\n            filter='userName eq \"x\"',\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    def test_returns_groups_with_members(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5, name=\"Engineering\")\n        uid = uuid4()\n        mock_dal.list_groups.return_value = ([(group, \"ext-g-1\")], 1)\n        mock_dal.get_group_members.return_value = [(uid, \"alice@example.com\")]\n\n        result = list_groups(\n            filter=None,\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        assert parsed.totalResults == 1\n        resource = parsed.Resources[0]\n        assert isinstance(resource, ScimGroupResource)\n        assert resource.displayName == \"Engineering\"\n        assert resource.externalId == \"ext-g-1\"\n        assert len(resource.members) == 1\n        assert resource.members[0].display == \"alice@example.com\"\n\n\nclass TestGetGroup:\n    \"\"\"Tests for GET /scim/v2/Groups/{group_id}.\"\"\"\n\n    def test_returns_scim_resource(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5, name=\"Engineering\")\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        result = get_group(\n            group_id=\"5\",\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_group(result)\n        assert resource.displayName == \"Engineering\"\n        assert resource.id == \"5\"\n\n    def test_non_integer_id_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n        provider: ScimProvider,\n    ) -> None:\n        result = get_group(\n            group_id=\"not-a-number\",\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group.return_value = None\n\n        result = get_group(\n            group_id=\"999\",\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n\nclass TestCreateGroup:\n    \"\"\"Tests for POST /scim/v2/Groups.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_success(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group_by_name.return_value = None\n        mock_validate.return_value = ([], None)\n        mock_dal.get_group_members.return_value = []\n\n        resource = make_scim_group(displayName=\"New Group\")\n\n        result = create_group(\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_group(result, status=201)\n        assert resource.displayName == \"New Group\"\n        mock_dal.add_group.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    def test_duplicate_name_returns_409(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group_by_name.return_value = make_db_group()\n        resource = make_scim_group()\n\n        result = create_group(\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 409)\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_invalid_member_returns_400(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group_by_name.return_value = None\n        mock_validate.return_value = ([], \"Invalid member ID: bad-uuid\")\n\n        resource = make_scim_group(members=[ScimGroupMember(value=\"bad-uuid\")])\n\n        result = create_group(\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_nonexistent_member_returns_400(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group_by_name.return_value = None\n        uid = uuid4()\n        mock_validate.return_value = ([], f\"Member(s) not found: {uid}\")\n\n        resource = make_scim_group(members=[ScimGroupMember(value=str(uid))])\n\n        result = create_group(\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_creates_external_id_mapping(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group_by_name.return_value = None\n        mock_validate.return_value = ([], None)\n        mock_dal.get_group_members.return_value = []\n\n        resource = make_scim_group(externalId=\"ext-g-123\")\n\n        result = create_group(\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result, status=201)\n        mock_dal.create_group_mapping.assert_called_once()\n\n\nclass TestReplaceGroup:\n    \"\"\"Tests for PUT /scim/v2/Groups/{group_id}.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_success(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5, name=\"Old Name\")\n        mock_dal.get_group.return_value = group\n        mock_validate.return_value = ([], None)\n        mock_dal.get_group_members.return_value = []\n\n        resource = make_scim_group(displayName=\"New Name\")\n\n        result = replace_group(\n            group_id=\"5\",\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.update_group.assert_called_once_with(group, name=\"New Name\")\n        mock_dal.replace_group_members.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group.return_value = None\n\n        result = replace_group(\n            group_id=\"999\",\n            group_resource=make_scim_group(),\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_invalid_member_returns_400(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_validate.return_value = ([], \"Invalid member ID: bad\")\n\n        resource = make_scim_group(members=[ScimGroupMember(value=\"bad\")])\n\n        result = replace_group(\n            group_id=\"5\",\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    @patch(\"ee.onyx.server.scim.api._validate_and_parse_members\")\n    def test_syncs_external_id(\n        self,\n        mock_validate: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_validate.return_value = ([], None)\n        mock_dal.get_group_members.return_value = []\n\n        resource = make_scim_group(externalId=\"new-ext\")\n\n        replace_group(\n            group_id=\"5\",\n            group_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        mock_dal.sync_group_external_id.assert_called_once_with(5, \"new-ext\")\n\n\nclass TestPatchGroup:\n    \"\"\"Tests for PATCH /scim/v2/Groups/{group_id}.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_rename(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5, name=\"Old Name\")\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        patched = ScimGroupResource(id=\"5\", displayName=\"New Name\", members=[])\n        mock_apply.return_value = (patched, [], [])\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"displayName\",\n                    value=\"New Name\",\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"5\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.update_group.assert_called_once_with(group, name=\"New Name\")\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_group.return_value = None\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"displayName\",\n                    value=\"X\",\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"999\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_patch_error_returns_error_response(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        mock_apply.side_effect = ScimPatchError(\"Unsupported path\", 400)\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"badPath\",\n                    value=\"x\",\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"5\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_add_members(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n        mock_dal.validate_member_ids.return_value = []\n\n        uid = str(uuid4())\n        patched = ScimGroupResource(\n            id=\"5\",\n            displayName=\"Engineering\",\n            members=[ScimGroupMember(value=uid)],\n        )\n        mock_apply.return_value = (patched, [uid], [])\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.ADD,\n                    path=\"members\",\n                    value=[ScimGroupMember(value=uid)],\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"5\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.validate_member_ids.assert_called_once()\n        mock_dal.upsert_group_members.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_add_nonexistent_member_returns_400(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        uid = uuid4()\n        patched = ScimGroupResource(\n            id=\"5\",\n            displayName=\"Engineering\",\n            members=[ScimGroupMember(value=str(uid))],\n        )\n        mock_apply.return_value = (patched, [str(uid)], [])\n        mock_dal.validate_member_ids.return_value = [uid]\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.ADD,\n                    path=\"members\",\n                    value=[ScimGroupMember(value=str(uid))],\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"5\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    @patch(\"ee.onyx.server.scim.api.apply_group_patch\")\n    def test_remove_members(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mock_dal.get_group_members.return_value = []\n\n        uid = str(uuid4())\n        patched = ScimGroupResource(id=\"5\", displayName=\"Engineering\", members=[])\n        mock_apply.return_value = (patched, [], [uid])\n\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REMOVE,\n                    path=f'members[value eq \"{uid}\"]',\n                )\n            ]\n        )\n\n        result = patch_group(\n            group_id=\"5\",\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_group(result)\n        mock_dal.remove_group_members.assert_called_once()\n\n\nclass TestDeleteGroup:\n    \"\"\"Tests for DELETE /scim/v2/Groups/{group_id}.\"\"\"\n\n    def test_success(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        group = make_db_group(id=5)\n        mock_dal.get_group.return_value = group\n        mapping = MagicMock()\n        mapping.id = 1\n        mock_dal.get_group_mapping_by_group_id.return_value = mapping\n\n        result = delete_group(\n            group_id=\"5\",\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, Response)\n        assert result.status_code == 204\n        mock_dal.delete_group_mapping.assert_called_once_with(1)\n        mock_dal.delete_group_with_members.assert_called_once_with(group)\n        mock_dal.commit.assert_called_once()\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        mock_dal.get_group.return_value = None\n\n        result = delete_group(\n            group_id=\"999\",\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    def test_non_integer_id_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n    ) -> None:\n        result = delete_group(\n            group_id=\"abc\",\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_patch.py",
    "content": "import pytest\n\nfrom ee.onyx.server.scim.models import ScimEmail\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimMeta\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimPatchOperation\nfrom ee.onyx.server.scim.models import ScimPatchOperationType\nfrom ee.onyx.server.scim.models import ScimPatchResourceValue\nfrom ee.onyx.server.scim.models import ScimPatchValue\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.patch import apply_group_patch\nfrom ee.onyx.server.scim.patch import apply_user_patch\nfrom ee.onyx.server.scim.patch import ScimPatchError\nfrom ee.onyx.server.scim.providers.entra import EntraProvider\nfrom ee.onyx.server.scim.providers.okta import OktaProvider\n\n_OKTA_IGNORED = OktaProvider().ignored_patch_paths\n_ENTRA_IGNORED = EntraProvider().ignored_patch_paths\n\n\ndef _make_user(**kwargs: object) -> ScimUserResource:\n    defaults: dict = {\n        \"userName\": \"test@example.com\",\n        \"active\": True,\n        \"name\": ScimName(givenName=\"Test\", familyName=\"User\"),\n    }\n    defaults.update(kwargs)\n    return ScimUserResource(**defaults)\n\n\ndef _make_group(**kwargs: object) -> ScimGroupResource:\n    defaults: dict = {\"displayName\": \"Engineering\"}\n    defaults.update(kwargs)\n    return ScimGroupResource(**defaults)\n\n\ndef _replace_op(\n    path: str | None = None,\n    value: ScimPatchValue = None,\n) -> ScimPatchOperation:\n    return ScimPatchOperation(op=ScimPatchOperationType.REPLACE, path=path, value=value)\n\n\ndef _add_op(\n    path: str | None = None,\n    value: ScimPatchValue = None,\n) -> ScimPatchOperation:\n    return ScimPatchOperation(op=ScimPatchOperationType.ADD, path=path, value=value)\n\n\ndef _remove_op(path: str) -> ScimPatchOperation:\n    return ScimPatchOperation(op=ScimPatchOperationType.REMOVE, path=path)\n\n\nclass TestApplyUserPatch:\n    \"\"\"Tests for SCIM user PATCH operations.\"\"\"\n\n    def test_deactivate_user(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_replace_op(\"active\", False)], user)\n        assert result.active is False\n        assert result.userName == \"test@example.com\"\n\n    def test_activate_user(self) -> None:\n        user = _make_user(active=False)\n        result, _ = apply_user_patch([_replace_op(\"active\", True)], user)\n        assert result.active is True\n\n    def test_replace_given_name(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_replace_op(\"name.givenName\", \"NewFirst\")], user)\n        assert result.name is not None\n        assert result.name.givenName == \"NewFirst\"\n        assert result.name.familyName == \"User\"\n\n    def test_replace_family_name(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_replace_op(\"name.familyName\", \"NewLast\")], user)\n        assert result.name is not None\n        assert result.name.familyName == \"NewLast\"\n\n    def test_replace_username(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_replace_op(\"userName\", \"new@example.com\")], user)\n        assert result.userName == \"new@example.com\"\n\n    def test_replace_without_path_uses_dict(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(active=False, userName=\"new@example.com\"),\n                )\n            ],\n            user,\n        )\n        assert result.active is False\n        assert result.userName == \"new@example.com\"\n\n    def test_multiple_operations(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [\n                _replace_op(\"active\", False),\n                _replace_op(\"name.givenName\", \"Updated\"),\n            ],\n            user,\n        )\n        assert result.active is False\n        assert result.name is not None\n        assert result.name.givenName == \"Updated\"\n\n    def test_case_insensitive_path(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_replace_op(\"Active\", False)], user)\n        assert result.active is False\n\n    def test_original_not_mutated(self) -> None:\n        user = _make_user()\n        apply_user_patch([_replace_op(\"active\", False)], user)\n        assert user.active is True\n\n    def test_unsupported_path_raises(self) -> None:\n        user = _make_user()\n        with pytest.raises(ScimPatchError, match=\"Unsupported path\"):\n            apply_user_patch([_replace_op(\"unknownField\", \"value\")], user)\n\n    def test_remove_op_clears_field(self) -> None:\n        \"\"\"Remove op should clear the target field (not raise).\"\"\"\n        user = _make_user(externalId=\"ext-123\")\n        result, _ = apply_user_patch([_remove_op(\"externalId\")], user)\n        assert result.externalId is None\n\n    def test_remove_unsupported_path_raises(self) -> None:\n        \"\"\"Remove op on unsupported path (e.g. 'active') should raise.\"\"\"\n        user = _make_user()\n        with pytest.raises(ScimPatchError, match=\"Unsupported remove path\"):\n            apply_user_patch([_remove_op(\"active\")], user)\n\n    def test_replace_without_path_ignores_id(self) -> None:\n        \"\"\"Okta sends 'id' alongside actual changes — it should be silently ignored.\"\"\"\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [_replace_op(None, ScimPatchResourceValue(active=False, id=\"some-uuid\"))],\n            user,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.active is False\n\n    def test_replace_without_path_ignores_schemas(self) -> None:\n        \"\"\"The 'schemas' key in a value dict should be silently ignored.\"\"\"\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(\n                        active=False,\n                        schemas=[\"urn:ietf:params:scim:schemas:core:2.0:User\"],\n                    ),\n                )\n            ],\n            user,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.active is False\n\n    def test_okta_deactivation_payload(self) -> None:\n        \"\"\"Exact Okta deactivation payload: path-less replace with id + active.\"\"\"\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(id=\"abc-123\", active=False),\n                )\n            ],\n            user,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.active is False\n        assert result.userName == \"test@example.com\"\n\n    def test_replace_displayname(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [_replace_op(\"displayName\", \"New Display Name\")], user\n        )\n        assert result.displayName == \"New Display Name\"\n        assert result.name is not None\n        assert result.name.formatted == \"New Display Name\"\n\n    def test_replace_without_path_complex_value_dict(self) -> None:\n        \"\"\"Okta sends id/schemas/meta alongside actual changes — complex types\n        (lists, nested dicts) must not cause Pydantic validation errors.\"\"\"\n        user = _make_user()\n        result, _ = apply_user_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(\n                        active=False,\n                        id=\"some-uuid\",\n                        schemas=[\"urn:ietf:params:scim:schemas:core:2.0:User\"],\n                        meta=ScimMeta(resourceType=\"User\"),\n                    ),\n                )\n            ],\n            user,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.active is False\n        assert result.userName == \"test@example.com\"\n\n    def test_add_operation_works_like_replace(self) -> None:\n        user = _make_user()\n        result, _ = apply_user_patch([_add_op(\"externalId\", \"ext-456\")], user)\n        assert result.externalId == \"ext-456\"\n\n    def test_entra_capitalized_replace_op(self) -> None:\n        \"\"\"Entra ID sends ``\"Replace\"`` instead of ``\"replace\"``.\"\"\"\n        user = _make_user()\n        op = ScimPatchOperation(op=\"Replace\", path=\"active\", value=False)  # type: ignore[arg-type]\n        result, _ = apply_user_patch([op], user)\n        assert result.active is False\n\n    def test_entra_capitalized_add_op(self) -> None:\n        \"\"\"Entra ID sends ``\"Add\"`` instead of ``\"add\"``.\"\"\"\n        user = _make_user()\n        op = ScimPatchOperation(op=\"Add\", path=\"externalId\", value=\"ext-999\")  # type: ignore[arg-type]\n        result, _ = apply_user_patch([op], user)\n        assert result.externalId == \"ext-999\"\n\n    def test_entra_enterprise_extension_handled(self) -> None:\n        \"\"\"Entra sends the enterprise extension URN as a key in path-less\n        PATCH value dicts — enterprise data should be captured in ent_data.\"\"\"\n        user = _make_user()\n        value = ScimPatchResourceValue(active=False)\n        # Simulate Entra including the enterprise extension URN as extra data\n        assert value.__pydantic_extra__ is not None\n        value.__pydantic_extra__[\n            \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\"\n        ] = {\"department\": \"Engineering\"}\n        result, ent_data = apply_user_patch(\n            [_replace_op(None, value)],\n            user,\n            ignored_paths=_ENTRA_IGNORED,\n        )\n        assert result.active is False\n        assert result.userName == \"test@example.com\"\n        assert ent_data[\"department\"] == \"Engineering\"\n\n    def test_okta_handles_enterprise_extension_urn(self) -> None:\n        \"\"\"Enterprise extension URN paths are handled universally, even\n        for Okta — the data is captured in the enterprise data dict.\"\"\"\n        user = _make_user()\n        value = ScimPatchResourceValue(active=False)\n        assert value.__pydantic_extra__ is not None\n        value.__pydantic_extra__[\n            \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User\"\n        ] = {\"department\": \"Engineering\"}\n        result, ent_data = apply_user_patch(\n            [_replace_op(None, value)],\n            user,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.active is False\n        assert ent_data[\"department\"] == \"Engineering\"\n\n    def test_emails_primary_eq_true_value(self) -> None:\n        \"\"\"emails[primary eq true].value should update the primary email entry.\"\"\"\n        user = _make_user(\n            emails=[ScimEmail(value=\"old@example.com\", type=\"work\", primary=True)]\n        )\n        result, _ = apply_user_patch(\n            [_replace_op(\"emails[primary eq true].value\", \"new@example.com\")], user\n        )\n        # userName should remain unchanged — emails and userName are separate\n        assert result.userName == \"test@example.com\"\n        assert len(result.emails) == 1\n        assert result.emails[0].value == \"new@example.com\"\n        assert result.emails[0].primary is True\n\n    def test_enterprise_urn_department_path(self) -> None:\n        \"\"\"Dotted enterprise URN path should set department in ent_data.\"\"\"\n        user = _make_user()\n        _, ent_data = apply_user_patch(\n            [\n                _replace_op(\n                    \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:department\",\n                    \"Marketing\",\n                )\n            ],\n            user,\n        )\n        assert ent_data[\"department\"] == \"Marketing\"\n\n    def test_enterprise_urn_manager_path(self) -> None:\n        \"\"\"Dotted enterprise URN path for manager should set manager.\"\"\"\n        user = _make_user()\n        _, ent_data = apply_user_patch(\n            [\n                _replace_op(\n                    \"urn:ietf:params:scim:schemas:extension:enterprise:2.0:User:manager\",\n                    ScimPatchResourceValue.model_validate({\"value\": \"boss-id\"}),\n                )\n            ],\n            user,\n        )\n        assert ent_data[\"manager\"] == \"boss-id\"\n\n\nclass TestApplyGroupPatch:\n    \"\"\"Tests for SCIM group PATCH operations.\"\"\"\n\n    def test_replace_display_name(self) -> None:\n        group = _make_group()\n        result, added, removed = apply_group_patch(\n            [_replace_op(\"displayName\", \"New Name\")], group\n        )\n        assert result.displayName == \"New Name\"\n        assert added == []\n        assert removed == []\n\n    def test_add_members(self) -> None:\n        group = _make_group()\n        result, added, removed = apply_group_patch(\n            [\n                _add_op(\n                    \"members\",\n                    [ScimGroupMember(value=\"user-1\"), ScimGroupMember(value=\"user-2\")],\n                )\n            ],\n            group,\n        )\n        assert len(result.members) == 2\n        assert added == [\"user-1\", \"user-2\"]\n        assert removed == []\n\n    def test_add_members_without_path(self) -> None:\n        group = _make_group()\n        result, added, _ = apply_group_patch(\n            [_add_op(None, [ScimGroupMember(value=\"user-1\")])],\n            group,\n        )\n        assert len(result.members) == 1\n        assert added == [\"user-1\"]\n\n    def test_add_duplicate_member_skipped(self) -> None:\n        group = _make_group(members=[ScimGroupMember(value=\"user-1\")])\n        result, added, _ = apply_group_patch(\n            [\n                _add_op(\n                    \"members\",\n                    [ScimGroupMember(value=\"user-1\"), ScimGroupMember(value=\"user-2\")],\n                )\n            ],\n            group,\n        )\n        assert len(result.members) == 2\n        assert added == [\"user-2\"]\n\n    def test_remove_member(self) -> None:\n        group = _make_group(\n            members=[\n                ScimGroupMember(value=\"user-1\"),\n                ScimGroupMember(value=\"user-2\"),\n            ]\n        )\n        result, added, removed = apply_group_patch(\n            [_remove_op('members[value eq \"user-1\"]')],\n            group,\n        )\n        assert len(result.members) == 1\n        assert result.members[0].value == \"user-2\"\n        assert removed == [\"user-1\"]\n        assert added == []\n\n    def test_remove_nonexistent_member(self) -> None:\n        group = _make_group(members=[ScimGroupMember(value=\"user-1\")])\n        result, _, removed = apply_group_patch(\n            [_remove_op('members[value eq \"user-999\"]')],\n            group,\n        )\n        assert len(result.members) == 1\n        assert removed == []\n\n    def test_mixed_operations(self) -> None:\n        group = _make_group(members=[ScimGroupMember(value=\"user-1\")])\n        result, added, removed = apply_group_patch(\n            [\n                _replace_op(\"displayName\", \"Renamed\"),\n                _add_op(\"members\", [ScimGroupMember(value=\"user-2\")]),\n                _remove_op('members[value eq \"user-1\"]'),\n            ],\n            group,\n        )\n        assert result.displayName == \"Renamed\"\n        assert added == [\"user-2\"]\n        assert removed == [\"user-1\"]\n        assert len(result.members) == 1\n\n    def test_remove_without_path_raises(self) -> None:\n        group = _make_group()\n        with pytest.raises(ScimPatchError, match=\"requires a path\"):\n            apply_group_patch(\n                [ScimPatchOperation(op=ScimPatchOperationType.REMOVE, path=None)],\n                group,\n            )\n\n    def test_remove_invalid_path_raises(self) -> None:\n        group = _make_group()\n        with pytest.raises(ScimPatchError, match=\"Unsupported remove path\"):\n            apply_group_patch([_remove_op(\"displayName\")], group)\n\n    def test_replace_members_with_path(self) -> None:\n        group = _make_group(\n            members=[\n                ScimGroupMember(value=\"user-1\"),\n                ScimGroupMember(value=\"user-2\"),\n            ]\n        )\n        result, added, removed = apply_group_patch(\n            [\n                _replace_op(\n                    \"members\",\n                    [ScimGroupMember(value=\"user-2\"), ScimGroupMember(value=\"user-3\")],\n                )\n            ],\n            group,\n        )\n        assert len(result.members) == 2\n        member_ids = {m.value for m in result.members}\n        assert member_ids == {\"user-2\", \"user-3\"}\n        assert \"user-3\" in added\n        assert \"user-1\" in removed\n        assert \"user-2\" not in added\n        assert \"user-2\" not in removed\n\n    def test_replace_members_empty_list_clears(self) -> None:\n        group = _make_group(\n            members=[\n                ScimGroupMember(value=\"user-1\"),\n                ScimGroupMember(value=\"user-2\"),\n            ]\n        )\n        result, added, removed = apply_group_patch(\n            [_replace_op(\"members\", [])],\n            group,\n        )\n        assert len(result.members) == 0\n        assert added == []\n        assert set(removed) == {\"user-1\", \"user-2\"}\n\n    def test_unsupported_replace_path_raises(self) -> None:\n        group = _make_group()\n        with pytest.raises(ScimPatchError, match=\"Unsupported path\"):\n            apply_group_patch([_replace_op(\"unknownField\", \"val\")], group)\n\n    def test_original_not_mutated(self) -> None:\n        group = _make_group()\n        apply_group_patch([_replace_op(\"displayName\", \"Changed\")], group)\n        assert group.displayName == \"Engineering\"\n\n    def test_replace_without_path_ignores_id(self) -> None:\n        \"\"\"Group replace with 'id' in value dict should be silently ignored.\"\"\"\n        group = _make_group()\n        result, _, _ = apply_group_patch(\n            [\n                _replace_op(\n                    None, ScimPatchResourceValue(displayName=\"Updated\", id=\"some-id\")\n                )\n            ],\n            group,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.displayName == \"Updated\"\n\n    def test_replace_without_path_ignores_schemas(self) -> None:\n        group = _make_group()\n        result, _, _ = apply_group_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(\n                        displayName=\"Updated\",\n                        schemas=[\"urn:ietf:params:scim:schemas:core:2.0:Group\"],\n                    ),\n                )\n            ],\n            group,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.displayName == \"Updated\"\n\n    def test_replace_without_path_complex_value_dict(self) -> None:\n        \"\"\"Group PATCH with complex types in value dict (lists, nested dicts)\n        must not cause Pydantic validation errors.\"\"\"\n        group = _make_group()\n        result, _, _ = apply_group_patch(\n            [\n                _replace_op(\n                    None,\n                    ScimPatchResourceValue(\n                        displayName=\"Updated\",\n                        id=\"123\",\n                        schemas=[\"urn:ietf:params:scim:schemas:core:2.0:Group\"],\n                        meta=ScimMeta(resourceType=\"Group\"),\n                    ),\n                )\n            ],\n            group,\n            ignored_paths=_OKTA_IGNORED,\n        )\n        assert result.displayName == \"Updated\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_providers.py",
    "content": "from unittest.mock import MagicMock\nfrom uuid import UUID\nfrom uuid import uuid4\n\nfrom ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA\nfrom ee.onyx.server.scim.models import SCIM_USER_SCHEMA\nfrom ee.onyx.server.scim.models import ScimEmail\nfrom ee.onyx.server.scim.models import ScimGroupMember\nfrom ee.onyx.server.scim.models import ScimGroupResource\nfrom ee.onyx.server.scim.models import ScimMeta\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimUserGroupRef\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS\nfrom ee.onyx.server.scim.providers.base import get_default_provider\nfrom ee.onyx.server.scim.providers.entra import _ENTRA_IGNORED_PATCH_PATHS\nfrom ee.onyx.server.scim.providers.entra import EntraProvider\nfrom ee.onyx.server.scim.providers.okta import OktaProvider\n\n\ndef _make_mock_user(\n    user_id: UUID | None = None,\n    email: str = \"test@example.com\",\n    personal_name: str | None = \"Test User\",\n    is_active: bool = True,\n) -> MagicMock:\n    user = MagicMock()\n    user.id = user_id or uuid4()\n    user.email = email\n    user.personal_name = personal_name\n    user.is_active = is_active\n    return user\n\n\ndef _make_mock_group(group_id: int = 42, name: str = \"Engineering\") -> MagicMock:\n    group = MagicMock()\n    group.id = group_id\n    group.name = name\n    return group\n\n\nclass TestOktaProvider:\n    def test_name(self) -> None:\n        assert OktaProvider().name == \"okta\"\n\n    def test_ignored_patch_paths(self) -> None:\n        assert OktaProvider().ignored_patch_paths == COMMON_IGNORED_PATCH_PATHS\n\n    def test_build_user_resource_basic(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-123\")\n\n        assert result == ScimUserResource(\n            id=str(user.id),\n            externalId=\"ext-123\",\n            userName=\"test@example.com\",\n            name=ScimName(givenName=\"Test\", familyName=\"User\", formatted=\"Test User\"),\n            displayName=\"Test User\",\n            emails=[ScimEmail(value=\"test@example.com\", type=\"work\", primary=True)],\n            active=True,\n            groups=[],\n            meta=ScimMeta(resourceType=\"User\"),\n        )\n\n    def test_build_user_resource_has_core_schema_only(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-123\")\n        assert result.schemas == [SCIM_USER_SCHEMA]\n\n    def test_build_user_resource_with_groups(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user()\n        groups = [(1, \"Engineering\"), (2, \"Design\")]\n        result = provider.build_user_resource(user, \"ext-123\", groups=groups)\n\n        assert result.groups == [\n            ScimUserGroupRef(value=\"1\", display=\"Engineering\"),\n            ScimUserGroupRef(value=\"2\", display=\"Design\"),\n        ]\n\n    def test_build_user_resource_empty_groups(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-123\", groups=[])\n\n        assert result.groups == []\n\n    def test_build_user_resource_no_groups(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-123\")\n\n        assert result.groups == []\n\n    def test_build_user_resource_name_parsing(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user(personal_name=\"Jane Doe\")\n        result = provider.build_user_resource(user, None)\n\n        assert result.name == ScimName(\n            givenName=\"Jane\", familyName=\"Doe\", formatted=\"Jane Doe\"\n        )\n\n    def test_build_user_resource_single_name(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user(personal_name=\"Madonna\")\n        result = provider.build_user_resource(user, None)\n\n        assert result.name == ScimName(\n            givenName=\"Madonna\", familyName=\"\", formatted=\"Madonna\"\n        )\n\n    def test_build_user_resource_no_name(self) -> None:\n        provider = OktaProvider()\n        user = _make_mock_user(personal_name=None)\n        result = provider.build_user_resource(user, None)\n\n        # Falls back to deriving name from email local part\n        assert result.name == ScimName(\n            givenName=\"test\", familyName=\"\", formatted=\"test\"\n        )\n        assert result.displayName is None\n\n    def test_build_user_resource_scim_username_preserves_case(self) -> None:\n        \"\"\"When scim_username is set, userName and emails use original case.\"\"\"\n        provider = OktaProvider()\n        user = _make_mock_user(email=\"alice@example.com\")\n        result = provider.build_user_resource(\n            user, \"ext-1\", scim_username=\"Alice@Example.com\"\n        )\n\n        assert result.userName == \"Alice@Example.com\"\n        assert result.emails[0].value == \"Alice@Example.com\"\n\n    def test_build_user_resource_scim_username_none_falls_back(self) -> None:\n        \"\"\"When scim_username is None, userName falls back to user.email.\"\"\"\n        provider = OktaProvider()\n        user = _make_mock_user(email=\"alice@example.com\")\n        result = provider.build_user_resource(user, \"ext-1\", scim_username=None)\n\n        assert result.userName == \"alice@example.com\"\n        assert result.emails[0].value == \"alice@example.com\"\n\n    def test_build_group_resource(self) -> None:\n        provider = OktaProvider()\n        group = _make_mock_group()\n        uid1, uid2 = uuid4(), uuid4()\n        members: list[tuple[UUID, str | None]] = [\n            (uid1, \"alice@example.com\"),\n            (uid2, \"bob@example.com\"),\n        ]\n\n        result = provider.build_group_resource(group, members, \"ext-g-1\")\n\n        assert result == ScimGroupResource(\n            id=\"42\",\n            externalId=\"ext-g-1\",\n            displayName=\"Engineering\",\n            members=[\n                ScimGroupMember(value=str(uid1), display=\"alice@example.com\"),\n                ScimGroupMember(value=str(uid2), display=\"bob@example.com\"),\n            ],\n            meta=ScimMeta(resourceType=\"Group\"),\n        )\n\n    def test_build_group_resource_empty_members(self) -> None:\n        provider = OktaProvider()\n        group = _make_mock_group()\n        result = provider.build_group_resource(group, [])\n\n        assert result.members == []\n\n\nclass TestEntraProvider:\n    def test_name(self) -> None:\n        assert EntraProvider().name == \"entra\"\n\n    def test_ignored_patch_paths(self) -> None:\n        paths = EntraProvider().ignored_patch_paths\n        assert paths == _ENTRA_IGNORED_PATCH_PATHS\n        # Enterprise extension URN is now handled (not ignored)\n        assert paths >= COMMON_IGNORED_PATCH_PATHS\n\n    def test_build_user_resource_includes_enterprise_schema(self) -> None:\n        provider = EntraProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-entra-1\")\n\n        assert result.schemas == [SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA]\n\n    def test_build_user_resource_basic(self) -> None:\n        provider = EntraProvider()\n        user = _make_mock_user()\n        result = provider.build_user_resource(user, \"ext-entra-1\")\n\n        assert result == ScimUserResource(\n            schemas=[SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA],\n            id=str(user.id),\n            externalId=\"ext-entra-1\",\n            userName=\"test@example.com\",\n            name=ScimName(givenName=\"Test\", familyName=\"User\", formatted=\"Test User\"),\n            displayName=\"Test User\",\n            emails=[ScimEmail(value=\"test@example.com\", type=\"work\", primary=True)],\n            active=True,\n            groups=[],\n            meta=ScimMeta(resourceType=\"User\"),\n        )\n\n\nclass TestGetDefaultProvider:\n    def test_returns_okta(self) -> None:\n        provider = get_default_provider()\n        assert isinstance(provider, OktaProvider)\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/scim/test_user_endpoints.py",
    "content": "\"\"\"Unit tests for SCIM User CRUD endpoints.\"\"\"\n\nfrom __future__ import annotations\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom fastapi import Response\nfrom sqlalchemy.exc import IntegrityError\n\nfrom ee.onyx.server.scim.api import _scim_name_to_str\nfrom ee.onyx.server.scim.api import create_user\nfrom ee.onyx.server.scim.api import delete_user\nfrom ee.onyx.server.scim.api import get_user\nfrom ee.onyx.server.scim.api import list_users\nfrom ee.onyx.server.scim.api import patch_user\nfrom ee.onyx.server.scim.api import replace_user\nfrom ee.onyx.server.scim.models import ScimMappingFields\nfrom ee.onyx.server.scim.models import ScimName\nfrom ee.onyx.server.scim.models import ScimPatchOperation\nfrom ee.onyx.server.scim.models import ScimPatchOperationType\nfrom ee.onyx.server.scim.models import ScimPatchRequest\nfrom ee.onyx.server.scim.models import ScimUserResource\nfrom ee.onyx.server.scim.patch import ScimPatchError\nfrom ee.onyx.server.scim.providers.base import ScimProvider\nfrom tests.unit.onyx.server.scim.conftest import assert_scim_error\nfrom tests.unit.onyx.server.scim.conftest import make_db_user\nfrom tests.unit.onyx.server.scim.conftest import make_scim_user\nfrom tests.unit.onyx.server.scim.conftest import make_user_mapping\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_list\nfrom tests.unit.onyx.server.scim.conftest import parse_scim_user\n\n\nclass TestListUsers:\n    \"\"\"Tests for GET /scim/v2/Users.\"\"\"\n\n    def test_empty_result(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.list_users.return_value = ([], 0)\n\n        result = list_users(\n            filter=None,\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        assert parsed.totalResults == 0\n        assert parsed.Resources == []\n\n    def test_returns_users_with_scim_shape(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"alice@example.com\", personal_name=\"Alice Smith\")\n        mapping = make_user_mapping(\n            external_id=\"ext-abc\", user_id=user.id, scim_username=\"Alice@example.com\"\n        )\n        mock_dal.list_users.return_value = ([(user, mapping)], 1)\n\n        result = list_users(\n            filter=None,\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_list(result)\n        assert parsed.totalResults == 1\n        assert len(parsed.Resources) == 1\n        resource = parsed.Resources[0]\n        assert isinstance(resource, ScimUserResource)\n        assert resource.userName == \"Alice@example.com\"\n        assert resource.externalId == \"ext-abc\"\n\n    def test_unsupported_filter_attribute_returns_400(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.list_users.side_effect = ValueError(\n            \"Unsupported filter attribute: emails\"\n        )\n\n        result = list_users(\n            filter='emails eq \"x@y.com\"',\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n    def test_invalid_filter_syntax_returns_400(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n        provider: ScimProvider,\n    ) -> None:\n        result = list_users(\n            filter=\"not a valid filter\",\n            startIndex=1,\n            count=100,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n\nclass TestGetUser:\n    \"\"\"Tests for GET /scim/v2/Users/{user_id}.\"\"\"\n\n    def test_returns_scim_resource(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"alice@example.com\")\n        mock_dal.get_user.return_value = user\n\n        result = get_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        assert resource.userName == \"alice@example.com\"\n        assert resource.id == str(user.id)\n\n    def test_invalid_uuid_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n        provider: ScimProvider,\n    ) -> None:\n        result = get_user(\n            user_id=\"not-a-uuid\",\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    def test_user_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user.return_value = None\n\n        result = get_user(\n            user_id=str(uuid4()),\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n\nclass TestCreateUser:\n    \"\"\"Tests for POST /scim/v2/Users.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_success(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(userName=\"new@example.com\")\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result, status=201)\n        assert resource.userName == \"new@example.com\"\n        mock_dal.add_user.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_missing_external_id_still_creates_mapping(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"Mapping is always created to mark user as SCIM-managed.\"\"\"\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(externalId=None)\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_user(result, status=201)\n        assert parsed.userName is not None\n        mock_dal.add_user.assert_called_once()\n        mock_dal.create_user_mapping.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_duplicate_scim_managed_email_returns_409(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"409 only when the existing user already has a SCIM mapping.\"\"\"\n        existing = make_db_user()\n        mock_dal.get_user_by_email.return_value = existing\n        mock_dal.get_user_mapping_by_user_id.return_value = make_user_mapping(\n            user_id=existing.id\n        )\n        resource = make_scim_user()\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 409)\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_existing_user_without_mapping_gets_linked(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"Pre-existing user without SCIM mapping gets adopted (linked).\"\"\"\n        existing = make_db_user(email=\"admin@example.com\", personal_name=None)\n        mock_dal.get_user_by_email.return_value = existing\n        mock_dal.get_user_mapping_by_user_id.return_value = None\n        resource = make_scim_user(userName=\"admin@example.com\", externalId=\"ext-admin\")\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parsed = parse_scim_user(result, status=201)\n        assert parsed.userName == \"admin@example.com\"\n        # Should NOT create a new user — reuse existing\n        mock_dal.add_user.assert_not_called()\n        # Should sync is_active and personal_name from the SCIM request\n        mock_dal.update_user.assert_called_once_with(\n            existing, is_active=True, personal_name=\"Test User\"\n        )\n        # Should create a SCIM mapping for the existing user\n        mock_dal.create_user_mapping.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_integrity_error_returns_409(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user_by_email.return_value = None\n        mock_dal.add_user.side_effect = IntegrityError(\"dup\", {}, Exception())\n        resource = make_scim_user()\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 409)\n        mock_dal.rollback.assert_called_once()\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\")\n    def test_seat_limit_returns_403(\n        self,\n        mock_seats: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n        provider: ScimProvider,\n    ) -> None:\n        mock_seats.return_value = \"Seat limit reached\"\n        resource = make_scim_user()\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 403)\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_creates_external_id_mapping(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(externalId=\"ext-123\")\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result, status=201)\n        assert resource.externalId == \"ext-123\"\n        mock_dal.create_user_mapping.assert_called_once()\n\n\nclass TestReplaceUser:\n    \"\"\"Tests for PUT /scim/v2/Users/{user_id}.\"\"\"\n\n    def test_success(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(email=\"old@example.com\")\n        mock_dal.get_user.return_value = user\n        resource = make_scim_user(\n            userName=\"new@example.com\",\n            name=ScimName(givenName=\"New\", familyName=\"Name\"),\n        )\n\n        result = replace_user(\n            user_id=str(user.id),\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        mock_dal.update_user.assert_called_once()\n        mock_dal.commit.assert_called_once()\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user.return_value = None\n\n        result = replace_user(\n            user_id=str(uuid4()),\n            user_resource=make_scim_user(),\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\")\n    def test_reactivation_checks_seats(\n        self,\n        mock_seats: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(is_active=False)\n        mock_dal.get_user.return_value = user\n        mock_seats.return_value = \"No seats\"\n        resource = make_scim_user(active=True)\n\n        result = replace_user(\n            user_id=str(user.id),\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 403)\n        mock_seats.assert_called_once()\n\n    def test_syncs_external_id(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n\n        resource = make_scim_user(externalId=None)\n\n        result = replace_user(\n            user_id=str(user.id),\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        mock_dal.sync_user_external_id.assert_called_once_with(\n            user.id,\n            None,\n            scim_username=\"test@example.com\",\n            fields=ScimMappingFields(\n                given_name=\"Test\",\n                family_name=\"User\",\n            ),\n        )\n\n\nclass TestPatchUser:\n    \"\"\"Tests for PATCH /scim/v2/Users/{user_id}.\"\"\"\n\n    def test_deactivate(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user(is_active=True)\n        mock_dal.get_user.return_value = user\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"active\",\n                    value=False,\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        mock_dal.update_user.assert_called_once()\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        mock_dal.get_user.return_value = None\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"active\",\n                    value=False,\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(uuid4()),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    def test_patch_displayname_persists(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"PATCH displayName should update personal_name in the DB.\"\"\"\n        user = make_db_user(personal_name=\"Old Name\")\n        mock_dal.get_user.return_value = user\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REPLACE,\n                    path=\"displayName\",\n                    value=\"New Display Name\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        parse_scim_user(result)\n        # Verify the update_user call received the new display name\n        call_kwargs = mock_dal.update_user.call_args\n        assert call_kwargs[1][\"personal_name\"] == \"New Display Name\"\n\n    @patch(\"ee.onyx.server.scim.api.apply_user_patch\")\n    def test_patch_error_returns_error_response(\n        self,\n        mock_apply: MagicMock,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        user = make_db_user()\n        mock_dal.get_user.return_value = user\n        mock_apply.side_effect = ScimPatchError(\"Bad op\", 400)\n        patch_req = ScimPatchRequest(\n            Operations=[\n                ScimPatchOperation(\n                    op=ScimPatchOperationType.REMOVE,\n                    path=\"userName\",\n                )\n            ]\n        )\n\n        result = patch_user(\n            user_id=str(user.id),\n            patch_request=patch_req,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 400)\n\n\nclass TestDeleteUser:\n    \"\"\"Tests for DELETE /scim/v2/Users/{user_id}.\"\"\"\n\n    def test_success(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        user = make_db_user(is_active=True)\n        mock_dal.get_user.return_value = user\n        mapping = MagicMock()\n        mapping.id = 1\n        mock_dal.get_user_mapping_by_user_id.return_value = mapping\n\n        result = delete_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert isinstance(result, Response)\n        assert result.status_code == 204\n        mock_dal.deactivate_user.assert_called_once_with(user)\n        mock_dal.delete_user_mapping.assert_called_once_with(1)\n        mock_dal.commit.assert_called_once()\n\n    def test_not_found_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n    ) -> None:\n        mock_dal.get_user.return_value = None\n\n        result = delete_user(\n            user_id=str(uuid4()),\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n    def test_invalid_uuid_returns_404(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,  # noqa: ARG002\n    ) -> None:\n        result = delete_user(\n            user_id=\"not-a-uuid\",\n            _token=mock_token,\n            db_session=mock_db_session,\n        )\n\n        assert_scim_error(result, 404)\n\n\nclass TestScimNameToStr:\n    \"\"\"Tests for _scim_name_to_str helper.\"\"\"\n\n    def test_prefers_formatted_over_components(self) -> None:\n        \"\"\"When client provides formatted, use it — the client knows what it wants.\"\"\"\n        name = ScimName(\n            givenName=\"Jane\", familyName=\"Smith\", formatted=\"Dr. Jane Smith\"\n        )\n        assert _scim_name_to_str(name) == \"Dr. Jane Smith\"\n\n    def test_given_name_only(self) -> None:\n        name = ScimName(givenName=\"Jane\")\n        assert _scim_name_to_str(name) == \"Jane\"\n\n    def test_family_name_only(self) -> None:\n        name = ScimName(familyName=\"Smith\")\n        assert _scim_name_to_str(name) == \"Smith\"\n\n    def test_falls_back_to_formatted(self) -> None:\n        name = ScimName(formatted=\"Display Name\")\n        assert _scim_name_to_str(name) == \"Display Name\"\n\n    def test_none_returns_none(self) -> None:\n        assert _scim_name_to_str(None) is None\n\n    def test_empty_name_returns_none(self) -> None:\n        name = ScimName()\n        assert _scim_name_to_str(name) is None\n\n\nclass TestEmailCasePreservation:\n    \"\"\"Tests verifying email case is preserved through SCIM endpoints.\"\"\"\n\n    @patch(\"ee.onyx.server.scim.api._check_seat_availability\", return_value=None)\n    def test_create_preserves_username_case(\n        self,\n        mock_seats: MagicMock,  # noqa: ARG002\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"POST /Users with mixed-case userName returns the original case.\"\"\"\n        mock_dal.get_user_by_email.return_value = None\n        resource = make_scim_user(userName=\"Alice@Example.COM\")\n\n        result = create_user(\n            user_resource=resource,\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result, status=201)\n        assert resource.userName == \"Alice@Example.COM\"\n        assert resource.emails[0].value == \"Alice@Example.COM\"\n\n    def test_get_preserves_username_case(\n        self,\n        mock_db_session: MagicMock,\n        mock_token: MagicMock,\n        mock_dal: MagicMock,\n        provider: ScimProvider,\n    ) -> None:\n        \"\"\"GET /Users/{id} returns the original-case userName from mapping.\"\"\"\n        user = make_db_user(email=\"alice@example.com\")\n        mock_dal.get_user.return_value = user\n        mapping = make_user_mapping(\n            external_id=\"ext-1\",\n            user_id=user.id,\n            scim_username=\"Alice@Example.COM\",\n        )\n        mock_dal.get_user_mapping_by_user_id.return_value = mapping\n\n        result = get_user(\n            user_id=str(user.id),\n            _token=mock_token,\n            provider=provider,\n            db_session=mock_db_session,\n        )\n\n        resource = parse_scim_user(result)\n        assert resource.userName == \"Alice@Example.COM\"\n        assert resource.emails[0].value == \"Alice@Example.COM\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_full_user_snapshot.py",
    "content": "import datetime\nfrom unittest.mock import MagicMock\nfrom uuid import uuid4\n\nfrom onyx.auth.schemas import UserRole\nfrom onyx.db.enums import AccountType\nfrom onyx.server.models import FullUserSnapshot\nfrom onyx.server.models import UserGroupInfo\n\n\ndef _mock_user(\n    personal_name: str | None = \"Test User\",\n    created_at: datetime.datetime | None = None,\n    updated_at: datetime.datetime | None = None,\n) -> MagicMock:\n    user = MagicMock()\n    user.id = uuid4()\n    user.email = \"test@example.com\"\n    user.role = UserRole.BASIC\n    user.is_active = True\n    user.password_configured = True\n    user.personal_name = personal_name\n    user.created_at = created_at or datetime.datetime(\n        2025, 1, 1, tzinfo=datetime.timezone.utc\n    )\n    user.updated_at = updated_at or datetime.datetime(\n        2025, 6, 15, tzinfo=datetime.timezone.utc\n    )\n    user.account_type = AccountType.STANDARD\n    return user\n\n\ndef test_from_user_model_includes_new_fields() -> None:\n    user = _mock_user(personal_name=\"Alice\")\n    groups = [UserGroupInfo(id=1, name=\"Engineering\")]\n\n    snapshot = FullUserSnapshot.from_user_model(user, groups=groups)\n\n    assert snapshot.personal_name == \"Alice\"\n    assert snapshot.created_at == user.created_at\n    assert snapshot.updated_at == user.updated_at\n    assert snapshot.groups == groups\n\n\ndef test_from_user_model_defaults_groups_to_empty() -> None:\n    user = _mock_user()\n    snapshot = FullUserSnapshot.from_user_model(user)\n\n    assert snapshot.groups == []\n\n\ndef test_from_user_model_personal_name_none() -> None:\n    user = _mock_user(personal_name=None)\n    snapshot = FullUserSnapshot.from_user_model(user)\n\n    assert snapshot.personal_name is None\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_pool_metrics.py",
    "content": "\"\"\"Unit tests for SQLAlchemy connection pool Prometheus metrics.\"\"\"\n\nimport time\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom fastapi import FastAPI\nfrom sqlalchemy.pool import NullPool\n\nfrom onyx.server.metrics.postgres_connection_pool import _register_pool_events\nfrom onyx.server.metrics.postgres_connection_pool import PoolStateCollector\nfrom onyx.server.metrics.postgres_connection_pool import (\n    setup_postgres_connection_pool_metrics,\n)\nfrom onyx.utils.middleware import _build_route_map\nfrom onyx.utils.middleware import _match_route\n\n\n# --- PoolStateCollector tests ---\n\n\ndef test_pool_state_collector_reports_pool_stats() -> None:\n    \"\"\"Verify the custom collector reads pool.checkedout/checkedin/overflow/size.\"\"\"\n    mock_pool = MagicMock()\n    mock_pool.checkedout.return_value = 5\n    mock_pool.checkedin.return_value = 35\n    mock_pool.overflow.return_value = 2\n    mock_pool.size.return_value = 40\n\n    collector = PoolStateCollector()\n    collector.add_pool(\"sync\", mock_pool)\n\n    families = collector.collect()\n    # 4 GaugeMetricFamilies: checked_out, checked_in, overflow, size\n    assert len(families) == 4\n\n    # Convert to dict for easier assertions\n    metrics: dict[str, float] = {}\n    for family in families:\n        for sample in family.samples:\n            metrics[f\"{sample.name}:{sample.labels['engine']}\"] = sample.value\n\n    assert metrics[\"onyx_db_pool_checked_out:sync\"] == 5\n    assert metrics[\"onyx_db_pool_checked_in:sync\"] == 35\n    assert metrics[\"onyx_db_pool_overflow:sync\"] == 2\n    assert metrics[\"onyx_db_pool_size:sync\"] == 40\n\n\ndef test_pool_state_collector_handles_multiple_engines() -> None:\n    \"\"\"Verify the collector reports metrics for multiple engines.\"\"\"\n    sync_pool = MagicMock()\n    sync_pool.checkedout.return_value = 10\n    sync_pool.checkedin.return_value = 30\n    sync_pool.overflow.return_value = 0\n    sync_pool.size.return_value = 40\n\n    readonly_pool = MagicMock()\n    readonly_pool.checkedout.return_value = 3\n    readonly_pool.checkedin.return_value = 7\n    readonly_pool.overflow.return_value = 1\n    readonly_pool.size.return_value = 10\n\n    collector = PoolStateCollector()\n    collector.add_pool(\"sync\", sync_pool)\n    collector.add_pool(\"readonly\", readonly_pool)\n\n    families = collector.collect()\n    # Each family should have 2 samples (sync + readonly)\n    for family in families:\n        assert len(list(family.samples)) == 2\n\n\n# --- Pool event listener tests ---\n\n\ndef _make_conn_record() -> MagicMock:\n    \"\"\"Create a mock connection record with an info dict.\"\"\"\n    record = MagicMock()\n    record.info = {}\n    return record\n\n\ndef test_checkout_event_stores_endpoint_and_increments_gauge() -> None:\n    \"\"\"Verify checkout event stores handler on conn_record and increments metrics.\"\"\"\n    engine = MagicMock()\n    engine.pool = MagicMock()\n    listeners: dict[str, Any] = {}\n\n    # Capture event listeners\n    with patch(\"onyx.server.metrics.postgres_connection_pool.event\") as mock_event:\n\n        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001\n            def decorator(fn: Any) -> Any:\n                listeners[event_name] = fn\n                return fn\n\n            return decorator\n\n        mock_event.listens_for.side_effect = capture_listener\n        _register_pool_events(engine, \"sync\")\n\n    conn_record = _make_conn_record()\n\n    with (\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool.CURRENT_ENDPOINT_CONTEXTVAR\"\n        ) as mock_ctx,\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool.CURRENT_TENANT_ID_CONTEXTVAR\"\n        ) as mock_tenant_ctx,\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool._connections_held\"\n        ) as mock_gauge,\n        patch(\"onyx.server.metrics.postgres_connection_pool._checkout_total\"),\n    ):\n        mock_labels = MagicMock()\n        mock_gauge.labels.return_value = mock_labels\n        mock_ctx.get.return_value = \"/api/chat/send-message\"\n        mock_tenant_ctx.get.return_value = \"tenant_xyz\"\n        listeners[\"checkout\"](None, conn_record, None)\n\n    assert conn_record.info[\"_metrics_endpoint\"] == \"/api/chat/send-message\"\n    assert conn_record.info[\"_metrics_tenant_id\"] == \"tenant_xyz\"\n    assert \"_metrics_checkout_time\" in conn_record.info\n    mock_gauge.labels.assert_called_with(\n        handler=\"/api/chat/send-message\", engine=\"sync\", tenant_id=\"tenant_xyz\"\n    )\n    mock_labels.inc.assert_called_once()\n\n\ndef test_checkin_event_observes_hold_duration() -> None:\n    \"\"\"Verify checkin event reads endpoint from conn_record and observes hold time.\"\"\"\n    engine = MagicMock()\n    engine.pool = MagicMock()\n    listeners: dict[str, Any] = {}\n\n    with patch(\"onyx.server.metrics.postgres_connection_pool.event\") as mock_event:\n\n        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001\n            def decorator(fn: Any) -> Any:\n                listeners[event_name] = fn\n                return fn\n\n            return decorator\n\n        mock_event.listens_for.side_effect = capture_listener\n        _register_pool_events(engine, \"sync\")\n\n    conn_record = _make_conn_record()\n    conn_record.info[\"_metrics_endpoint\"] = \"/api/search\"\n    conn_record.info[\"_metrics_tenant_id\"] = \"tenant_abc\"\n    conn_record.info[\"_metrics_checkout_time\"] = time.monotonic() - 0.5\n\n    with (\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool._connections_held\"\n        ) as mock_gauge,\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool._hold_seconds\"\n        ) as mock_hist,\n        patch(\"onyx.server.metrics.postgres_connection_pool._checkin_total\"),\n    ):\n        mock_labels = MagicMock()\n        mock_gauge.labels.return_value = mock_labels\n        mock_hist_labels = MagicMock()\n        mock_hist.labels.return_value = mock_hist_labels\n\n        listeners[\"checkin\"](None, conn_record)\n\n        mock_gauge.labels.assert_called_with(\n            handler=\"/api/search\", engine=\"sync\", tenant_id=\"tenant_abc\"\n        )\n        mock_labels.dec.assert_called_once()\n        mock_hist.labels.assert_called_with(handler=\"/api/search\", engine=\"sync\")\n        mock_hist_labels.observe.assert_called_once()\n        # Verify the observed duration is roughly 0.5s\n        observed = mock_hist_labels.observe.call_args[0][0]\n        assert 0.4 < observed < 1.0\n\n    # conn_record.info should be cleaned up\n    assert \"_metrics_endpoint\" not in conn_record.info\n    assert \"_metrics_tenant_id\" not in conn_record.info\n    assert \"_metrics_checkout_time\" not in conn_record.info\n\n\ndef test_checkin_with_missing_endpoint_uses_unknown() -> None:\n    \"\"\"Verify checkin gracefully handles missing endpoint and tenant info.\"\"\"\n    engine = MagicMock()\n    engine.pool = MagicMock()\n    listeners: dict[str, Any] = {}\n\n    with patch(\"onyx.server.metrics.postgres_connection_pool.event\") as mock_event:\n\n        def capture_listener(target: Any, event_name: str) -> Any:  # noqa: ARG001\n            def decorator(fn: Any) -> Any:\n                listeners[event_name] = fn\n                return fn\n\n            return decorator\n\n        mock_event.listens_for.side_effect = capture_listener\n        _register_pool_events(engine, \"sync\")\n\n    conn_record = _make_conn_record()\n\n    with (\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool._connections_held\"\n        ) as mock_gauge,\n        patch(\"onyx.server.metrics.postgres_connection_pool._hold_seconds\"),\n        patch(\"onyx.server.metrics.postgres_connection_pool._checkin_total\"),\n    ):\n        mock_labels = MagicMock()\n        mock_gauge.labels.return_value = mock_labels\n\n        listeners[\"checkin\"](None, conn_record)\n\n        mock_gauge.labels.assert_called_with(\n            handler=\"unknown\", engine=\"sync\", tenant_id=\"unknown\"\n        )\n\n\n# --- setup_postgres_connection_pool_metrics tests ---\n\n\ndef test_setup_skips_null_pool_engines() -> None:\n    \"\"\"Verify setup_postgres_connection_pool_metrics skips engines with NullPool.\"\"\"\n    with (\n        patch(\"onyx.server.metrics.postgres_connection_pool.REGISTRY\"),\n        patch(\n            \"onyx.server.metrics.postgres_connection_pool._register_pool_events\"\n        ) as mock_register,\n    ):\n        null_engine = MagicMock()\n        null_engine.pool = MagicMock(spec=NullPool)\n\n        setup_postgres_connection_pool_metrics({\"null\": null_engine})\n        mock_register.assert_not_called()\n\n\n# --- Route matching tests ---\n\n\ndef test_build_route_map_extracts_api_routes() -> None:\n    \"\"\"Verify _build_route_map extracts APIRoute path regexes.\"\"\"\n    app = FastAPI()\n\n    @app.get(\"/api/test\")\n    def test_endpoint() -> dict:\n        return {}\n\n    @app.get(\"/api/items/{item_id}\")\n    def get_item(item_id: str) -> dict:  # noqa: ARG001\n        return {}\n\n    route_map = _build_route_map(app)\n    # Should have at least the 2 routes we defined\n    templates = [template for _, template in route_map]\n    assert \"/api/test\" in templates\n    assert \"/api/items/{item_id}\" in templates\n\n\ndef test_match_route_resolves_parameterized_paths() -> None:\n    \"\"\"Verify _match_route resolves /api/items/abc-123 to /api/items/{item_id}.\"\"\"\n    app = FastAPI()\n\n    @app.get(\"/api/items/{item_id}\")\n    def get_item(item_id: str) -> dict:  # noqa: ARG001\n        return {}\n\n    route_map = _build_route_map(app)\n    result = _match_route(route_map, \"/api/items/abc-123\")\n    assert result == \"/api/items/{item_id}\"\n\n\ndef test_match_route_returns_none_for_unknown_paths() -> None:\n    \"\"\"Verify _match_route returns None for paths not in the route map.\"\"\"\n    app = FastAPI()\n\n    @app.get(\"/api/test\")\n    def test_endpoint() -> dict:\n        return {}\n\n    route_map = _build_route_map(app)\n    result = _match_route(route_map, \"/api/nonexistent\")\n    assert result is None\n\n\ndef test_match_route_exact_paths() -> None:\n    \"\"\"Verify _match_route handles exact (non-parameterized) paths.\"\"\"\n    app = FastAPI()\n\n    @app.get(\"/api/health\")\n    def health() -> dict:\n        return {}\n\n    route_map = _build_route_map(app)\n    result = _match_route(route_map, \"/api/health\")\n    assert result == \"/api/health\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_projects_file_utils.py",
    "content": "from io import BytesIO\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom fastapi import UploadFile\n\nfrom onyx.natural_language_processing import utils as nlp_utils\nfrom onyx.natural_language_processing.utils import BaseTokenizer\nfrom onyx.natural_language_processing.utils import count_tokens\nfrom onyx.server.features.projects import projects_file_utils as utils\nfrom onyx.server.settings.models import Settings\n\n\nclass _Tokenizer(BaseTokenizer):\n    def encode(self, text: str) -> list[int]:\n        return [1] * len(text)\n\n    def tokenize(self, text: str) -> list[str]:\n        return list(text)\n\n    def decode(self, _tokens: list[int]) -> str:\n        return \"\"\n\n\nclass _NonSeekableFile(BytesIO):\n    def tell(self) -> int:\n        raise OSError(\"tell not supported\")\n\n    def seek(self, *_args: object, **_kwargs: object) -> int:\n        raise OSError(\"seek not supported\")\n\n\ndef _make_upload(filename: str, size: int, content: bytes | None = None) -> UploadFile:\n    payload = content if content is not None else (b\"x\" * size)\n    return UploadFile(filename=filename, file=BytesIO(payload), size=size)\n\n\ndef _make_upload_no_size(filename: str, content: bytes) -> UploadFile:\n    return UploadFile(filename=filename, file=BytesIO(content), size=None)\n\n\ndef _make_settings(upload_size_mb: int = 1, token_threshold_k: int = 100) -> Settings:\n    return Settings(\n        user_file_max_upload_size_mb=upload_size_mb,\n        file_token_count_threshold_k=token_threshold_k,\n    )\n\n\ndef _patch_common_dependencies(\n    monkeypatch: pytest.MonkeyPatch,\n    upload_size_mb: int = 1,\n    token_threshold_k: int = 100,\n) -> None:\n    monkeypatch.setattr(utils, \"fetch_default_llm_model\", lambda _db: None)\n    monkeypatch.setattr(utils, \"get_tokenizer\", lambda **_kwargs: _Tokenizer())\n    monkeypatch.setattr(utils, \"is_file_password_protected\", lambda **_kwargs: False)\n    monkeypatch.setattr(\n        utils,\n        \"load_settings\",\n        lambda: _make_settings(upload_size_mb, token_threshold_k),\n    )\n\n\ndef test_get_upload_size_bytes_falls_back_to_stream_size() -> None:\n    upload = UploadFile(filename=\"example.txt\", file=BytesIO(b\"abcdef\"), size=None)\n    upload.file.seek(2)\n\n    size = utils.get_upload_size_bytes(upload)\n\n    assert size == 6\n    assert upload.file.tell() == 2\n\n\ndef test_get_upload_size_bytes_logs_warning_when_stream_size_unavailable(\n    caplog: pytest.LogCaptureFixture,\n) -> None:\n    upload = UploadFile(filename=\"non_seekable.txt\", file=_NonSeekableFile(), size=None)\n\n    caplog.set_level(\"WARNING\")\n    size = utils.get_upload_size_bytes(upload)\n\n    assert size is None\n    assert \"Could not determine upload size via stream seek\" in caplog.text\n    assert \"non_seekable.txt\" in caplog.text\n\n\ndef test_is_upload_too_large_logs_warning_when_size_unknown(\n    monkeypatch: pytest.MonkeyPatch,\n    caplog: pytest.LogCaptureFixture,\n) -> None:\n    upload = _make_upload(\"size_unknown.txt\", size=1)\n    monkeypatch.setattr(utils, \"get_upload_size_bytes\", lambda _upload: None)\n\n    caplog.set_level(\"WARNING\")\n    is_too_large = utils.is_upload_too_large(upload, max_bytes=100)\n\n    assert is_too_large is False\n    assert \"Could not determine upload size; skipping size-limit check\" in caplog.text\n    assert \"size_unknown.txt\" in caplog.text\n\n\ndef test_categorize_uploaded_files_accepts_size_under_limit(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    # upload_size_mb=1 → max_bytes = 1*1024*1024; file size 99 is well under\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    upload = _make_upload(\"small.png\", size=99)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert len(result.rejected) == 0\n\n\ndef test_categorize_uploaded_files_uses_seek_fallback_when_upload_size_missing(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    upload = _make_upload_no_size(\"small.png\", content=b\"x\" * 99)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert len(result.rejected) == 0\n\n\ndef test_categorize_uploaded_files_accepts_size_at_limit(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    # 1 MB = 1048576 bytes; file at exactly that boundary should be accepted\n    upload = _make_upload(\"edge.png\", size=1048576)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert len(result.rejected) == 0\n\n\ndef test_categorize_uploaded_files_rejects_size_over_limit_with_reason(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    upload = _make_upload(\"large.png\", size=1048577)  # 1 byte over 1 MB\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n    assert result.rejected[0].reason == \"Exceeds 1 MB file size limit\"\n\n\ndef test_categorize_uploaded_files_mixed_batch_keeps_valid_and_rejects_oversized(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    small = _make_upload(\"small.png\", size=50)\n    large = _make_upload(\"large.png\", size=1048577)\n\n    result = utils.categorize_uploaded_files([small, large], MagicMock())\n\n    assert [file.filename for file in result.acceptable] == [\"small.png\"]\n    assert len(result.rejected) == 1\n    assert result.rejected[0].filename == \"large.png\"\n    assert result.rejected[0].reason == \"Exceeds 1 MB file size limit\"\n\n\ndef test_categorize_uploaded_files_enforces_size_limit_always(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n\n    upload = _make_upload(\"oversized.pdf\", size=1048577)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n    assert result.rejected[0].reason == \"Exceeds 1 MB file size limit\"\n\n\ndef test_categorize_uploaded_files_checks_size_before_text_extraction(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n\n    extract_mock = MagicMock(return_value=\"this should not run\")\n    monkeypatch.setattr(utils, \"extract_file_text\", extract_mock)\n\n    oversized_doc = _make_upload(\"oversized.pdf\", size=1048577)\n    result = utils.categorize_uploaded_files([oversized_doc], MagicMock())\n\n    extract_mock.assert_not_called()\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n    assert result.rejected[0].reason == \"Exceeds 1 MB file size limit\"\n\n\ndef test_categorize_enforces_size_limit_when_upload_size_mb_is_positive(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A positive upload_size_mb is always enforced.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 10)\n\n    upload = _make_upload(\"huge.png\", size=1048577, content=b\"x\")\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n\n\ndef test_categorize_enforces_token_limit_when_threshold_k_is_positive(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A positive token_threshold_k is always enforced.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=5)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 6000)\n\n    upload = _make_upload(\"big_image.png\", size=100)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n\n\ndef test_categorize_no_token_limit_when_threshold_k_is_zero(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"token_threshold_k=0 means no token limit; high-token files are accepted.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=0)\n    monkeypatch.setattr(\n        utils, \"estimate_image_tokens_for_upload\", lambda _upload: 999_999\n    )\n\n    upload = _make_upload(\"huge_image.png\", size=100)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.rejected) == 0\n    assert len(result.acceptable) == 1\n\n\ndef test_categorize_both_limits_enforced(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Both positive limits are enforced; file exceeding token limit is rejected.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=10, token_threshold_k=5)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 6000)\n\n    upload = _make_upload(\"over_tokens.png\", size=100)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 0\n    assert len(result.rejected) == 1\n    assert result.rejected[0].reason == \"Exceeds 5K token limit\"\n\n\ndef test_categorize_rejection_reason_contains_dynamic_values(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Rejection reasons reflect the admin-configured limits, not hardcoded values.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)\n    monkeypatch.setattr(utils, \"estimate_image_tokens_for_upload\", lambda _upload: 8000)\n\n    # File within size limit but over token limit\n    upload = _make_upload(\"tokens.png\", size=100)\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert result.rejected[0].reason == \"Exceeds 7K token limit\"\n\n    # File over size limit\n    _patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)\n    oversized = _make_upload(\"big.png\", size=42 * 1024 * 1024 + 1)\n    result2 = utils.categorize_uploaded_files([oversized], MagicMock())\n\n    assert result2.rejected[0].reason == \"Exceeds 42 MB file size limit\"\n\n\n# --- count_tokens tests ---\n\n\ndef test_count_tokens_small_text() -> None:\n    \"\"\"Small text should be encoded in a single call and return correct count.\"\"\"\n    tokenizer = _Tokenizer()\n    text = \"hello world\"\n    assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))\n\n\ndef test_count_tokens_chunked_matches_single_call() -> None:\n    \"\"\"Chunked encoding should produce the same result as single-call for small text.\"\"\"\n    tokenizer = _Tokenizer()\n    text = \"a\" * 1000\n    assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))\n\n\ndef test_count_tokens_large_text_is_chunked(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"Text exceeding _ENCODE_CHUNK_SIZE should be split into multiple encode calls.\"\"\"\n    monkeypatch.setattr(nlp_utils, \"_ENCODE_CHUNK_SIZE\", 100)\n    tokenizer = _Tokenizer()\n    text = \"a\" * 250\n    # _Tokenizer returns 1 token per char, so total should be 250\n    assert count_tokens(text, tokenizer) == 250\n\n\ndef test_count_tokens_with_token_limit_exits_early(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When token_limit is set and exceeded, count_tokens should stop early.\"\"\"\n    monkeypatch.setattr(nlp_utils, \"_ENCODE_CHUNK_SIZE\", 100)\n\n    encode_call_count = 0\n    original_tokenizer = _Tokenizer()\n\n    class _CountingTokenizer(BaseTokenizer):\n        def encode(self, text: str) -> list[int]:\n            nonlocal encode_call_count\n            encode_call_count += 1\n            return original_tokenizer.encode(text)\n\n        def tokenize(self, text: str) -> list[str]:\n            return list(text)\n\n        def decode(self, _tokens: list[int]) -> str:\n            return \"\"\n\n    tokenizer = _CountingTokenizer()\n    # 500 chars → 5 chunks of 100; limit=150 → should stop after 2 chunks\n    text = \"a\" * 500\n    result = count_tokens(text, tokenizer, token_limit=150)\n\n    assert result == 200  # 2 chunks × 100 tokens each\n    assert encode_call_count == 2, \"Should have stopped after 2 chunks\"\n\n\ndef test_count_tokens_with_token_limit_not_exceeded(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When token_limit is set but not exceeded, all chunks are encoded.\"\"\"\n    monkeypatch.setattr(nlp_utils, \"_ENCODE_CHUNK_SIZE\", 100)\n    tokenizer = _Tokenizer()\n    text = \"a\" * 250\n    result = count_tokens(text, tokenizer, token_limit=1000)\n    assert result == 250\n\n\ndef test_count_tokens_no_limit_encodes_all_chunks(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Without token_limit, all chunks are encoded regardless of count.\"\"\"\n    monkeypatch.setattr(nlp_utils, \"_ENCODE_CHUNK_SIZE\", 100)\n    tokenizer = _Tokenizer()\n    text = \"a\" * 500\n    result = count_tokens(text, tokenizer)\n    assert result == 500\n\n\n# --- early exit via token_limit in categorize tests ---\n\n\ndef test_categorize_early_exits_tokenization_for_large_text(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Large text files should be rejected via early-exit tokenization\n    without encoding all chunks.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)\n    # token_threshold = 1000; _ENCODE_CHUNK_SIZE = 100 → text of 500 chars = 5 chunks\n    # Should stop after 2nd chunk (200 tokens > 1000? No... need 1 token per char)\n    # With _Tokenizer: 1 token per char. threshold=1000, chunk=100 → need 11 chunks\n    # Let's use a bigger text\n    monkeypatch.setattr(nlp_utils, \"_ENCODE_CHUNK_SIZE\", 100)\n    large_text = \"x\" * 5000  # 5000 tokens, threshold 1000\n    monkeypatch.setattr(utils, \"extract_file_text\", lambda **_kwargs: large_text)\n\n    encode_call_count = 0\n    original_tokenizer = _Tokenizer()\n\n    class _CountingTokenizer(BaseTokenizer):\n        def encode(self, text: str) -> list[int]:\n            nonlocal encode_call_count\n            encode_call_count += 1\n            return original_tokenizer.encode(text)\n\n        def tokenize(self, text: str) -> list[str]:\n            return list(text)\n\n        def decode(self, _tokens: list[int]) -> str:\n            return \"\"\n\n    monkeypatch.setattr(utils, \"get_tokenizer\", lambda **_kwargs: _CountingTokenizer())\n\n    upload = _make_upload(\"big.txt\", size=5000, content=large_text.encode())\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.rejected) == 1\n    assert \"token limit\" in result.rejected[0].reason\n    # 5000 chars / 100 chunk_size = 50 chunks total; should stop well before all 50\n    assert (\n        encode_call_count < 50\n    ), f\"Expected early exit but encoded {encode_call_count} chunks out of 50\"\n\n\ndef test_categorize_text_under_token_limit_accepted(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"Text files under the token threshold should be accepted with exact count.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)\n    small_text = \"x\" * 500  # 500 tokens < 1000 threshold\n    monkeypatch.setattr(utils, \"extract_file_text\", lambda **_kwargs: small_text)\n\n    upload = _make_upload(\"ok.txt\", size=500, content=small_text.encode())\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert result.acceptable_file_to_token_count[\"ok.txt\"] == 500\n\n\n# --- skip-indexing vs rejection by file type ---\n\n\ndef test_csv_over_token_threshold_accepted_skip_indexing(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"CSV exceeding token threshold is uploaded but flagged to skip indexing.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)\n    text = \"x\" * 2000  # 2000 tokens > 1000 threshold\n    monkeypatch.setattr(utils, \"extract_file_text\", lambda **_kwargs: text)\n\n    upload = _make_upload(\"large.csv\", size=2000, content=text.encode())\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert result.acceptable[0].filename == \"large.csv\"\n    assert \"large.csv\" in result.skip_indexing\n    assert len(result.rejected) == 0\n\n\ndef test_csv_under_token_threshold_accepted_and_indexed(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"CSV under token threshold is uploaded and indexed normally.\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)\n    text = \"x\" * 500  # 500 tokens < 1000 threshold\n    monkeypatch.setattr(utils, \"extract_file_text\", lambda **_kwargs: text)\n\n    upload = _make_upload(\"small.csv\", size=500, content=text.encode())\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.acceptable) == 1\n    assert result.acceptable[0].filename == \"small.csv\"\n    assert \"small.csv\" not in result.skip_indexing\n    assert len(result.rejected) == 0\n\n\ndef test_pdf_over_token_threshold_rejected(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"PDF exceeding token threshold is rejected entirely (not uploaded).\"\"\"\n    _patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)\n    text = \"x\" * 2000  # 2000 tokens > 1000 threshold\n    monkeypatch.setattr(utils, \"extract_file_text\", lambda **_kwargs: text)\n\n    upload = _make_upload(\"big.pdf\", size=2000, content=text.encode())\n    result = utils.categorize_uploaded_files([upload], MagicMock())\n\n    assert len(result.rejected) == 1\n    assert result.rejected[0].filename == \"big.pdf\"\n    assert \"1K token limit\" in result.rejected[0].reason\n    assert len(result.acceptable) == 0\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_prometheus_instrumentation.py",
    "content": "\"\"\"Unit tests for Prometheus instrumentation module.\"\"\"\n\nimport threading\nfrom typing import Any\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom fastapi import FastAPI\nfrom fastapi.testclient import TestClient\nfrom prometheus_client import CollectorRegistry\nfrom prometheus_client import Gauge\n\nfrom onyx.server.metrics.per_tenant import per_tenant_request_callback\nfrom onyx.server.metrics.prometheus_setup import setup_prometheus_metrics\nfrom onyx.server.metrics.slow_requests import slow_request_callback\n\n\ndef _make_info(\n    duration: float,\n    method: str = \"GET\",\n    handler: str = \"/api/test\",\n    status: str = \"200\",\n) -> Any:\n    \"\"\"Build a fake metrics Info object matching the instrumentator's Info shape.\"\"\"\n    return MagicMock(\n        modified_duration=duration,\n        method=method,\n        modified_handler=handler,\n        modified_status=status,\n    )\n\n\ndef test_slow_request_callback_increments_above_threshold() -> None:\n    with patch(\"onyx.server.metrics.slow_requests._slow_requests\") as mock_counter:\n        mock_labels = MagicMock()\n        mock_counter.labels.return_value = mock_labels\n\n        info = _make_info(\n            duration=2.0, method=\"POST\", handler=\"/api/chat\", status=\"200\"\n        )\n        slow_request_callback(info)\n\n        mock_counter.labels.assert_called_once_with(\n            method=\"POST\", handler=\"/api/chat\", status=\"200\"\n        )\n        mock_labels.inc.assert_called_once()\n\n\ndef test_slow_request_callback_skips_below_threshold() -> None:\n    with patch(\"onyx.server.metrics.slow_requests._slow_requests\") as mock_counter:\n        info = _make_info(duration=0.5)\n        slow_request_callback(info)\n\n        mock_counter.labels.assert_not_called()\n\n\ndef test_slow_request_callback_skips_at_exact_threshold() -> None:\n    with (\n        patch(\"onyx.server.metrics.slow_requests.SLOW_REQUEST_THRESHOLD_SECONDS\", 1.0),\n        patch(\"onyx.server.metrics.slow_requests._slow_requests\") as mock_counter,\n    ):\n        info = _make_info(duration=1.0)\n        slow_request_callback(info)\n\n        mock_counter.labels.assert_not_called()\n\n\ndef test_setup_attaches_instrumentator_to_app() -> None:\n    with patch(\"onyx.server.metrics.prometheus_setup.Instrumentator\") as mock_cls:\n        mock_instance = MagicMock()\n        mock_instance.instrument.return_value = mock_instance\n        mock_cls.return_value = mock_instance\n\n        app = FastAPI()\n        setup_prometheus_metrics(app)\n\n        mock_cls.assert_called_once_with(\n            should_group_status_codes=False,\n            should_ignore_untemplated=False,\n            should_group_untemplated=True,\n            should_instrument_requests_inprogress=True,\n            inprogress_labels=True,\n            excluded_handlers=[\"/health\", \"/metrics\", \"/openapi.json\"],\n        )\n        assert mock_instance.add.call_count == 3\n        mock_instance.instrument.assert_called_once_with(\n            app,\n            latency_lowr_buckets=(\n                0.01,\n                0.025,\n                0.05,\n                0.1,\n                0.25,\n                0.5,\n                1.0,\n                2.5,\n                5.0,\n                10.0,\n            ),\n        )\n        mock_instance.expose.assert_called_once_with(app)\n\n\ndef test_per_tenant_callback_increments_with_tenant_id() -> None:\n    \"\"\"Verify per-tenant callback reads tenant from contextvar and increments.\"\"\"\n    with (\n        patch(\n            \"onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR\"\n        ) as mock_ctx,\n        patch(\"onyx.server.metrics.per_tenant._requests_by_tenant\") as mock_counter,\n    ):\n        mock_labels = MagicMock()\n        mock_counter.labels.return_value = mock_labels\n        mock_ctx.get.return_value = \"tenant_abc\"\n\n        info = _make_info(\n            duration=0.1, method=\"POST\", handler=\"/api/chat\", status=\"200\"\n        )\n        per_tenant_request_callback(info)\n\n        mock_counter.labels.assert_called_once_with(\n            tenant_id=\"tenant_abc\",\n            method=\"POST\",\n            handler=\"/api/chat\",\n            status=\"200\",\n        )\n        mock_labels.inc.assert_called_once()\n\n\ndef test_per_tenant_callback_falls_back_to_unknown() -> None:\n    \"\"\"Verify per-tenant callback uses 'unknown' when contextvar is None.\"\"\"\n    with (\n        patch(\n            \"onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR\"\n        ) as mock_ctx,\n        patch(\"onyx.server.metrics.per_tenant._requests_by_tenant\") as mock_counter,\n    ):\n        mock_labels = MagicMock()\n        mock_counter.labels.return_value = mock_labels\n        mock_ctx.get.return_value = None\n\n        info = _make_info(duration=0.1)\n        per_tenant_request_callback(info)\n\n        mock_counter.labels.assert_called_once_with(\n            tenant_id=\"unknown\",\n            method=\"GET\",\n            handler=\"/api/test\",\n            status=\"200\",\n        )\n        mock_labels.inc.assert_called_once()\n\n\ndef test_inprogress_gauge_increments_during_request() -> None:\n    \"\"\"Verify the in-progress gauge goes up while a request is in flight.\"\"\"\n    registry = CollectorRegistry()\n    gauge = Gauge(\n        \"http_requests_inprogress_test\",\n        \"In-progress requests\",\n        [\"method\", \"handler\"],\n        registry=registry,\n    )\n\n    request_started = threading.Event()\n    request_release = threading.Event()\n\n    app = FastAPI()\n\n    @app.get(\"/slow\")\n    def slow_endpoint() -> dict:\n        gauge.labels(method=\"GET\", handler=\"/slow\").inc()\n        request_started.set()\n        request_release.wait(timeout=5)\n        gauge.labels(method=\"GET\", handler=\"/slow\").dec()\n        return {\"status\": \"done\"}\n\n    client = TestClient(app, raise_server_exceptions=False)\n\n    def make_request() -> None:\n        client.get(\"/slow\")\n\n    thread = threading.Thread(target=make_request)\n    thread.start()\n\n    request_started.wait(timeout=5)\n    assert gauge.labels(method=\"GET\", handler=\"/slow\")._value.get() == 1.0\n\n    request_release.set()\n    thread.join(timeout=5)\n    assert gauge.labels(method=\"GET\", handler=\"/slow\")._value.get() == 0.0\n\n\ndef test_inprogress_gauge_tracks_concurrent_requests() -> None:\n    \"\"\"Verify the gauge correctly counts multiple concurrent in-flight requests.\"\"\"\n    registry = CollectorRegistry()\n    gauge = Gauge(\n        \"http_requests_inprogress_concurrent_test\",\n        \"In-progress requests\",\n        [\"method\", \"handler\"],\n        registry=registry,\n    )\n\n    # 3 parties: 2 request threads + main thread\n    barrier = threading.Barrier(3)\n    release = threading.Event()\n\n    app = FastAPI()\n\n    @app.get(\"/concurrent\")\n    def concurrent_endpoint() -> dict:\n        gauge.labels(method=\"GET\", handler=\"/concurrent\").inc()\n        barrier.wait(timeout=5)\n        release.wait(timeout=5)\n        gauge.labels(method=\"GET\", handler=\"/concurrent\").dec()\n        return {\"status\": \"done\"}\n\n    client = TestClient(app, raise_server_exceptions=False)\n\n    def make_request() -> None:\n        client.get(\"/concurrent\")\n\n    t1 = threading.Thread(target=make_request)\n    t2 = threading.Thread(target=make_request)\n    t1.start()\n    t2.start()\n\n    # All 3 threads meet here — both requests are in-flight\n    barrier.wait(timeout=5)\n    assert gauge.labels(method=\"GET\", handler=\"/concurrent\")._value.get() == 2.0\n\n    release.set()\n    t1.join(timeout=5)\n    t2.join(timeout=5)\n    assert gauge.labels(method=\"GET\", handler=\"/concurrent\")._value.get() == 0.0\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_settings_store.py",
    "content": "import pytest\n\nfrom onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\nfrom onyx.key_value_store.interface import KvKeyNotFoundError\nfrom onyx.server.settings import store as settings_store\nfrom onyx.server.settings.models import (\n    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,\n)\nfrom onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\nfrom onyx.server.settings.models import Settings\n\n\nclass _FakeKvStore:\n    def __init__(self, data: dict | None = None) -> None:\n        self._data = data\n\n    def load(self, _key: str) -> dict:\n        if self._data is None:\n            raise KvKeyNotFoundError()\n        return self._data\n\n\nclass _FakeCache:\n    def __init__(self) -> None:\n        self._vals: dict[str, bytes] = {}\n\n    def get(self, key: str) -> bytes | None:\n        return self._vals.get(key)\n\n    def set(self, key: str, value: str, ex: int | None = None) -> None:  # noqa: ARG002\n        self._vals[key] = value.encode(\"utf-8\")\n\n\ndef test_load_settings_uses_model_defaults_when_no_stored_value(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When no settings are stored (vector DB enabled), load_settings() should\n    resolve the default token threshold to 200.\"\"\"\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore())\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"DISABLE_VECTOR_DB\", False)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\n    assert (\n        settings.file_token_count_threshold_k\n        == DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB\n    )\n\n\ndef test_load_settings_uses_high_token_default_when_vector_db_disabled(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When vector DB is disabled and no settings are stored, the token\n    threshold should default to 10000 (10M tokens).\"\"\"\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore())\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"DISABLE_VECTOR_DB\", True)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\n    assert (\n        settings.file_token_count_threshold_k\n        == DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB\n    )\n\n\ndef test_load_settings_preserves_explicit_value_when_vector_db_disabled(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When vector DB is disabled but admin explicitly set a token threshold,\n    that value should be preserved (not overridden by the 10000 default).\"\"\"\n    stored = Settings(file_token_count_threshold_k=500).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"DISABLE_VECTOR_DB\", True)\n\n    settings = settings_store.load_settings()\n\n    assert settings.file_token_count_threshold_k == 500\n\n\ndef test_load_settings_preserves_zero_token_threshold(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A value of 0 means 'no limit' and should be preserved.\"\"\"\n    stored = Settings(file_token_count_threshold_k=0).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"DISABLE_VECTOR_DB\", True)\n\n    settings = settings_store.load_settings()\n\n    assert settings.file_token_count_threshold_k == 0\n\n\ndef test_load_settings_resolves_zero_upload_size_to_default(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A value of 0 should be treated as unset and resolved to the default.\"\"\"\n    stored = Settings(user_file_max_upload_size_mb=0).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\n\n\ndef test_load_settings_clamps_upload_size_to_env_max(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When the stored upload size exceeds MAX_ALLOWED_UPLOAD_SIZE_MB, it should\n    be clamped to the env-configured maximum.\"\"\"\n    stored = Settings(user_file_max_upload_size_mb=500).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"MAX_ALLOWED_UPLOAD_SIZE_MB\", 250)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == 250\n\n\ndef test_load_settings_preserves_upload_size_within_max(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When the stored upload size is within MAX_ALLOWED_UPLOAD_SIZE_MB, it should\n    be preserved unchanged.\"\"\"\n    stored = Settings(user_file_max_upload_size_mb=150).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"MAX_ALLOWED_UPLOAD_SIZE_MB\", 250)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == 150\n\n\ndef test_load_settings_zero_upload_size_resolves_to_default(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"A value of 0 should be treated as unset and resolved to the default,\n    clamped to MAX_ALLOWED_UPLOAD_SIZE_MB.\"\"\"\n    stored = Settings(user_file_max_upload_size_mb=0).model_dump()\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore(stored))\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"MAX_ALLOWED_UPLOAD_SIZE_MB\", 100)\n    monkeypatch.setattr(settings_store, \"DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\", 100)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == 100\n\n\ndef test_load_settings_default_clamped_to_max(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    \"\"\"When DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB exceeds MAX_ALLOWED_UPLOAD_SIZE_MB,\n    the effective default should be min(DEFAULT, MAX).\"\"\"\n    monkeypatch.setattr(settings_store, \"get_kv_store\", lambda: _FakeKvStore())\n    monkeypatch.setattr(settings_store, \"get_cache_backend\", lambda: _FakeCache())\n    monkeypatch.setattr(settings_store, \"DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB\", 100)\n    monkeypatch.setattr(settings_store, \"MAX_ALLOWED_UPLOAD_SIZE_MB\", 50)\n\n    settings = settings_store.load_settings()\n\n    assert settings.user_file_max_upload_size_mb == 50\n"
  },
  {
    "path": "backend/tests/unit/onyx/server/test_upload_files.py",
    "content": "import io\nimport zipfile\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom zipfile import BadZipFile\n\nimport pytest\nfrom fastapi import UploadFile\nfrom starlette.datastructures import Headers\n\nfrom onyx.configs.constants import FileOrigin\nfrom onyx.server.documents.connector import upload_files\n\n\ndef _create_test_zip() -> bytes:\n    \"\"\"Create a simple in-memory zip file containing two text files.\"\"\"\n    buf = io.BytesIO()\n    with zipfile.ZipFile(buf, \"w\") as zf:\n        zf.writestr(\"file1.txt\", \"hello\")\n        zf.writestr(\"file2.txt\", \"world\")\n    return buf.getvalue()\n\n\ndef _make_upload_file(content: bytes, filename: str, content_type: str) -> UploadFile:\n    return UploadFile(\n        file=io.BytesIO(content),\n        filename=filename,\n        headers=Headers({\"content-type\": content_type}),\n    )\n\n\n@patch(\"onyx.server.documents.connector.get_default_file_store\")\ndef test_upload_zip_with_unzip_true_extracts_files(\n    mock_get_store: MagicMock,\n) -> None:\n    \"\"\"When unzip=True (default), a zip upload is extracted into individual files.\"\"\"\n    mock_store = MagicMock()\n    mock_store.save_file.side_effect = lambda **kwargs: f\"id-{kwargs['display_name']}\"\n    mock_get_store.return_value = mock_store\n\n    zip_bytes = _create_test_zip()\n    upload = _make_upload_file(zip_bytes, \"test.zip\", \"application/zip\")\n\n    result = upload_files([upload], FileOrigin.CONNECTOR)\n\n    # Should have extracted the two individual files, not stored the zip itself\n    assert len(result.file_paths) == 2\n    assert \"id-file1.txt\" in result.file_paths\n    assert \"id-file2.txt\" in result.file_paths\n    assert \"file1.txt\" in result.file_names\n    assert \"file2.txt\" in result.file_names\n\n\n@patch(\"onyx.server.documents.connector.get_default_file_store\")\ndef test_upload_zip_with_unzip_false_stores_zip_as_is(\n    mock_get_store: MagicMock,\n) -> None:\n    \"\"\"When unzip=False, the zip file is stored as-is without extraction.\"\"\"\n    mock_store = MagicMock()\n    mock_store.save_file.return_value = \"zip-file-id\"\n    mock_get_store.return_value = mock_store\n\n    zip_bytes = _create_test_zip()\n    upload = _make_upload_file(zip_bytes, \"site_export.zip\", \"application/zip\")\n\n    result = upload_files([upload], FileOrigin.CONNECTOR, unzip=False)\n\n    # Should store exactly one file (the zip itself)\n    assert len(result.file_paths) == 1\n    assert result.file_paths[0] == \"zip-file-id\"\n    assert result.file_names == [\"site_export.zip\"]\n    # No zip metadata should be created\n    assert result.zip_metadata_file_id is None\n\n    # Verify the stored content is a valid zip\n    saved_content: io.BytesIO = mock_store.save_file.call_args[1][\"content\"]\n    saved_content.seek(0)\n    with zipfile.ZipFile(saved_content, \"r\") as zf:\n        assert set(zf.namelist()) == {\"file1.txt\", \"file2.txt\"}\n\n\n@patch(\"onyx.server.documents.connector.get_default_file_store\")\ndef test_upload_invalid_zip_with_unzip_false_raises(\n    mock_get_store: MagicMock,\n) -> None:\n    \"\"\"An invalid zip is rejected even when unzip=False (validation still runs).\"\"\"\n    mock_get_store.return_value = MagicMock()\n\n    bad_zip = _make_upload_file(b\"not a zip\", \"bad.zip\", \"application/zip\")\n\n    with pytest.raises(BadZipFile):\n        upload_files([bad_zip], FileOrigin.CONNECTOR, unzip=False)\n\n\n@patch(\"onyx.server.documents.connector.get_default_file_store\")\ndef test_upload_multiple_zips_rejected_when_unzip_false(\n    mock_get_store: MagicMock,\n) -> None:\n    \"\"\"The seen_zip guard rejects a second zip even when unzip=False.\"\"\"\n    mock_store = MagicMock()\n    mock_store.save_file.return_value = \"zip-id\"\n    mock_get_store.return_value = mock_store\n\n    zip_bytes = _create_test_zip()\n    zip1 = _make_upload_file(zip_bytes, \"a.zip\", \"application/zip\")\n    zip2 = _make_upload_file(zip_bytes, \"b.zip\", \"application/zip\")\n\n    with pytest.raises(Exception, match=\"Only one zip file\"):\n        upload_files([zip1, zip2], FileOrigin.CONNECTOR, unzip=False)\n"
  },
  {
    "path": "backend/tests/unit/onyx/test_redis.py",
    "content": "import os\n\nimport pytest\nimport redis\n\nfrom onyx.redis.redis_pool import RedisPool\nfrom onyx.utils.logger import setup_logger\n\nlogger = setup_logger()\n\n\n@pytest.mark.skipif(\n    os.getenv(\"REDIS_CLOUD_PYTEST_PASSWORD\", \"\") == \"\",\n    reason=\"Environment variable REDIS_CLOUD_PYTEST_PASSWORD is not set\",\n)\ndef test_redis_ssl() -> None:\n    REDIS_PASSWORD = os.environ.get(\"REDIS_CLOUD_PYTEST_PASSWORD\")\n    REDIS_HOST = \"redis-15414.c267.us-east-1-4.ec2.redns.redis-cloud.com\"\n    REDIS_PORT = 15414\n    REDIS_SSL_CERT_REQS = \"required\"\n\n    assert REDIS_PASSWORD\n\n    # Construct the path to the CA certificate for the redis ssl test instance\n    # it contains no secret data, so it's OK to have checked in!\n    current_dir = os.path.dirname(__file__)\n    REDIS_SSL_CA_CERTS = os.path.join(current_dir, \"redis_ca.pem\")\n\n    pool = RedisPool.create_pool(\n        host=REDIS_HOST,\n        port=REDIS_PORT,\n        password=REDIS_PASSWORD,\n        ssl=True,\n        ssl_cert_reqs=REDIS_SSL_CERT_REQS,\n        ssl_ca_certs=REDIS_SSL_CA_CERTS,\n    )\n\n    r = redis.Redis(connection_pool=pool)\n    assert r.ping()\n"
  },
  {
    "path": "backend/tests/unit/onyx/test_startup_validation.py",
    "content": "\"\"\"Tests for startup validation in no-vector-DB mode.\n\nVerifies that DISABLE_VECTOR_DB raises RuntimeError when combined with\nincompatible settings (MULTI_TENANT, ENABLE_CRAFT).\n\"\"\"\n\nfrom unittest.mock import patch\n\nimport pytest\n\n\nclass TestValidateNoVectorDbSettings:\n    @patch(\"onyx.main.DISABLE_VECTOR_DB\", False)\n    def test_no_error_when_vector_db_enabled(self) -> None:\n        from onyx.main import validate_no_vector_db_settings\n\n        validate_no_vector_db_settings()\n\n    @patch(\"onyx.main.DISABLE_VECTOR_DB\", True)\n    @patch(\"onyx.main.MULTI_TENANT\", False)\n    @patch(\"onyx.server.features.build.configs.ENABLE_CRAFT\", False)\n    def test_no_error_when_no_conflicts(self) -> None:\n        from onyx.main import validate_no_vector_db_settings\n\n        validate_no_vector_db_settings()\n\n    @patch(\"onyx.main.DISABLE_VECTOR_DB\", True)\n    @patch(\"onyx.main.MULTI_TENANT\", True)\n    def test_raises_on_multi_tenant(self) -> None:\n        from onyx.main import validate_no_vector_db_settings\n\n        with pytest.raises(RuntimeError, match=\"MULTI_TENANT\"):\n            validate_no_vector_db_settings()\n\n    @patch(\"onyx.main.DISABLE_VECTOR_DB\", True)\n    @patch(\"onyx.main.MULTI_TENANT\", False)\n    @patch(\"onyx.server.features.build.configs.ENABLE_CRAFT\", True)\n    def test_raises_on_enable_craft(self) -> None:\n        from onyx.main import validate_no_vector_db_settings\n\n        with pytest.raises(RuntimeError, match=\"ENABLE_CRAFT\"):\n            validate_no_vector_db_settings()\n\n    @patch(\"onyx.main.DISABLE_VECTOR_DB\", True)\n    @patch(\"onyx.main.MULTI_TENANT\", True)\n    @patch(\"onyx.server.features.build.configs.ENABLE_CRAFT\", True)\n    def test_multi_tenant_checked_before_craft(self) -> None:\n        \"\"\"MULTI_TENANT is checked first, so it should be the error raised.\"\"\"\n        from onyx.main import validate_no_vector_db_settings\n\n        with pytest.raises(RuntimeError, match=\"MULTI_TENANT\"):\n            validate_no_vector_db_settings()\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/tools/custom/test_custom_tools.py",
    "content": "import unittest\nimport uuid\nfrom typing import Any\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import DynamicSchemaInfo\nfrom onyx.tools.models import ToolResponse\nfrom onyx.tools.tool_implementations.custom.custom_tool import (\n    build_custom_tools_from_openapi_schema_and_headers,\n)\nfrom onyx.tools.tool_implementations.custom.custom_tool import CustomToolCallSummary\nfrom onyx.tools.tool_implementations.custom.custom_tool import (\n    validate_openapi_schema,\n)\nfrom onyx.utils.headers import HeaderItemDict\n\n\nclass TestCustomTool(unittest.TestCase):\n    \"\"\"\n    Test suite for CustomTool functionality.\n    This class tests the creation, running, and result handling of custom tools\n    based on OpenAPI schemas.\n    \"\"\"\n\n    def setUp(self) -> None:\n        \"\"\"\n        Set up the test environment before each test method.\n        Initializes an OpenAPI schema and DynamicSchemaInfo for testing.\n        \"\"\"\n        self.openapi_schema: dict[str, Any] = {\n            \"openapi\": \"3.0.0\",\n            \"info\": {\n                \"version\": \"1.0.0\",\n                \"title\": \"Assistants API\",\n                \"description\": \"An API for managing assistants\",\n            },\n            \"servers\": [\n                {\"url\": \"http://localhost:8080/CHAT_SESSION_ID/test/MESSAGE_ID\"},\n            ],\n            \"paths\": {\n                \"/assistant/{assistant_id}\": {\n                    \"GET\": {\n                        \"summary\": \"Get a specific Assistant\",\n                        \"operationId\": \"getAssistant\",\n                        \"parameters\": [\n                            {\n                                \"name\": \"assistant_id\",\n                                \"in\": \"path\",\n                                \"required\": True,\n                                \"schema\": {\"type\": \"string\"},\n                            }\n                        ],\n                    },\n                    \"POST\": {\n                        \"summary\": \"Create a new Assistant\",\n                        \"operationId\": \"createAssistant\",\n                        \"parameters\": [\n                            {\n                                \"name\": \"assistant_id\",\n                                \"in\": \"path\",\n                                \"required\": True,\n                                \"schema\": {\"type\": \"string\"},\n                            }\n                        ],\n                        \"requestBody\": {\n                            \"required\": True,\n                            \"content\": {\n                                \"application/json\": {\"schema\": {\"type\": \"object\"}}\n                            },\n                        },\n                    },\n                }\n            },\n        }\n        validate_openapi_schema(self.openapi_schema)\n        self.dynamic_schema_info: DynamicSchemaInfo = DynamicSchemaInfo(\n            chat_session_id=uuid.uuid4(), message_id=20\n        )\n\n    @patch(\"onyx.tools.tool_implementations.custom.custom_tool.requests.request\")\n    def test_custom_tool_run_get(self, mock_request: unittest.mock.MagicMock) -> None:\n        \"\"\"\n        Test the GET method of a custom tool.\n        Verifies that the tool correctly constructs the URL and makes the GET request.\n        \"\"\"\n        # Mock the response object\n        mock_response = unittest.mock.MagicMock()\n        mock_response.headers = {\"Content-Type\": \"application/json\"}\n        mock_response.json.return_value = {\"id\": \"123\", \"name\": \"Test Assistant\"}\n        mock_request.return_value = mock_response\n\n        tools = build_custom_tools_from_openapi_schema_and_headers(\n            tool_id=-1,  # dummy tool id\n            openapi_schema=self.openapi_schema,\n            dynamic_schema_info=self.dynamic_schema_info,\n        )\n\n        result = tools[0].run(\n            placement=Placement(turn_index=0, tab_index=0),\n            override_kwargs=None,\n            assistant_id=\"123\",\n        )\n        expected_url = f\"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123\"\n        mock_request.assert_called_once_with(\"GET\", expected_url, json=None, headers={})\n\n        self.assertIsNotNone(result, \"Expected a result from the tool run\")\n        self.assertIsNotNone(\n            result.rich_response,\n            \"Expected rich_response to be set\",\n        )\n        assert isinstance(result.rich_response, CustomToolCallSummary)\n        self.assertEqual(\n            result.rich_response.tool_name,\n            \"getAssistant\",\n            \"Tool name in response does not match expected value\",\n        )\n\n    @patch(\"onyx.tools.tool_implementations.custom.custom_tool.requests.request\")\n    def test_custom_tool_run_post(self, mock_request: unittest.mock.MagicMock) -> None:\n        \"\"\"\n        Test the POST method of a custom tool.\n        Verifies that the tool correctly constructs the URL and makes the POST request with the given body.\n        \"\"\"\n        # Mock the response object\n        mock_response = unittest.mock.MagicMock()\n        mock_response.headers = {\"Content-Type\": \"application/json\"}\n        mock_response.json.return_value = {\"id\": \"456\", \"name\": \"Created Assistant\"}\n        mock_request.return_value = mock_response\n\n        tools = build_custom_tools_from_openapi_schema_and_headers(\n            tool_id=-1,  # dummy tool id\n            openapi_schema=self.openapi_schema,\n            dynamic_schema_info=self.dynamic_schema_info,\n        )\n\n        result = tools[1].run(\n            placement=Placement(turn_index=0, tab_index=0),\n            override_kwargs=None,\n            assistant_id=\"456\",\n        )\n        expected_url = f\"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/456\"\n        mock_request.assert_called_once_with(\n            \"POST\", expected_url, json=None, headers={}\n        )\n\n        self.assertIsNotNone(result, \"Expected a result from the tool run\")\n        self.assertIsNotNone(\n            result.rich_response,\n            \"Expected rich_response to be set\",\n        )\n        assert isinstance(result.rich_response, CustomToolCallSummary)\n        self.assertEqual(\n            result.rich_response.tool_name,\n            \"createAssistant\",\n            \"Tool name in response does not match expected value\",\n        )\n\n    @patch(\"onyx.tools.tool_implementations.custom.custom_tool.requests.request\")\n    def test_custom_tool_with_headers(\n        self, mock_request: unittest.mock.MagicMock\n    ) -> None:\n        \"\"\"\n        Test the custom tool with custom headers.\n        Verifies that the tool correctly includes the custom headers in the request.\n        \"\"\"\n        # Mock the response object\n        mock_response = unittest.mock.MagicMock()\n        mock_response.headers = {\"Content-Type\": \"application/json\"}\n        mock_response.json.return_value = {\"id\": \"123\"}\n        mock_request.return_value = mock_response\n\n        custom_headers: list[HeaderItemDict] = [\n            {\"key\": \"Authorization\", \"value\": \"Bearer token123\"},\n            {\"key\": \"Custom-Header\", \"value\": \"CustomValue\"},\n        ]\n        tools = build_custom_tools_from_openapi_schema_and_headers(\n            tool_id=-1,  # dummy tool id\n            openapi_schema=self.openapi_schema,\n            custom_headers=custom_headers,\n            dynamic_schema_info=self.dynamic_schema_info,\n        )\n\n        tools[0].run(\n            placement=Placement(turn_index=0, tab_index=0),\n            override_kwargs=None,\n            assistant_id=\"123\",\n        )\n        expected_url = f\"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123\"\n        expected_headers = {\n            \"Authorization\": \"Bearer token123\",\n            \"Custom-Header\": \"CustomValue\",\n        }\n        mock_request.assert_called_once_with(\n            \"GET\", expected_url, json=None, headers=expected_headers\n        )\n\n    @patch(\"onyx.tools.tool_implementations.custom.custom_tool.requests.request\")\n    def test_custom_tool_with_empty_headers(\n        self, mock_request: unittest.mock.MagicMock\n    ) -> None:\n        \"\"\"\n        Test the custom tool with an empty list of custom headers.\n        Verifies that the tool correctly handles an empty list of headers.\n        \"\"\"\n        # Mock the response object\n        mock_response = unittest.mock.MagicMock()\n        mock_response.headers = {\"Content-Type\": \"application/json\"}\n        mock_response.json.return_value = {\"id\": \"123\"}\n        mock_request.return_value = mock_response\n\n        custom_headers: list[HeaderItemDict] = []\n        tools = build_custom_tools_from_openapi_schema_and_headers(\n            tool_id=-1,  # dummy tool id\n            openapi_schema=self.openapi_schema,\n            custom_headers=custom_headers,\n            dynamic_schema_info=self.dynamic_schema_info,\n        )\n\n        tools[0].run(\n            placement=Placement(turn_index=0, tab_index=0),\n            override_kwargs=None,\n            assistant_id=\"123\",\n        )\n        expected_url = f\"http://localhost:8080/{self.dynamic_schema_info.chat_session_id}/test/{self.dynamic_schema_info.message_id}/assistant/123\"\n        mock_request.assert_called_once_with(\"GET\", expected_url, json=None, headers={})\n\n    def test_invalid_openapi_schema(self) -> None:\n        \"\"\"\n        Test that an invalid OpenAPI schema raises a ValueError.\n        \"\"\"\n        invalid_schema: dict[str, Any] = {\n            \"openapi\": \"3.0.0\",\n            \"info\": {\n                \"version\": \"1.0.0\",\n                \"title\": \"Invalid API\",\n            },\n            # Missing required 'paths' key\n        }\n\n        with self.assertRaises(ValueError) as _:\n            validate_openapi_schema(invalid_schema)\n\n    def test_custom_tool_final_result(self) -> None:\n        \"\"\"\n        Test extracting the final result from a custom tool response.\n        Verifies that the tool result can be correctly extracted from the ToolResponse.\n        \"\"\"\n        mock_response = ToolResponse(\n            rich_response=CustomToolCallSummary(\n                response_type=\"json\",\n                tool_name=\"getAssistant\",\n                tool_result={\"id\": \"789\", \"name\": \"Final Assistant\"},\n            ),\n            llm_facing_response='{\"id\": \"789\", \"name\": \"Final Assistant\"}',\n        )\n\n        # Extract the final result from the rich_response\n        assert isinstance(mock_response.rich_response, CustomToolCallSummary)\n        final_result = mock_response.rich_response.tool_result\n        self.assertEqual(\n            final_result,\n            {\"id\": \"789\", \"name\": \"Final Assistant\"},\n            \"Final result does not match expected output\",\n        )\n\n\nif __name__ == \"__main__\":\n    pytest.main([__file__])\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_construct_tools_no_vectordb.py",
    "content": "\"\"\"Tests for tool construction when DISABLE_VECTOR_DB is True.\n\nVerifies that:\n- SearchTool.is_available() returns False when vector DB is disabled\n- OpenURLTool.is_available() returns False when vector DB is disabled\n- The force-add SearchTool block is suppressed when DISABLE_VECTOR_DB\n- FileReaderTool.is_available() returns True when vector DB is disabled\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool\n\nAPP_CONFIGS_MODULE = \"onyx.configs.app_configs\"\nFILE_READER_MODULE = \"onyx.tools.tool_implementations.file_reader.file_reader_tool\"\n\n\n# ------------------------------------------------------------------\n# SearchTool.is_available()\n# ------------------------------------------------------------------\n\n\nclass TestSearchToolAvailability:\n    @patch(f\"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB\", True)\n    def test_unavailable_when_vector_db_disabled(self) -> None:\n        from onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n        assert SearchTool.is_available(MagicMock()) is False\n\n    @patch(\"onyx.db.connector.check_user_files_exist\", return_value=True)\n    @patch(\n        \"onyx.tools.tool_implementations.search.search_tool.check_federated_connectors_exist\",\n        return_value=False,\n    )\n    @patch(\n        \"onyx.tools.tool_implementations.search.search_tool.check_connectors_exist\",\n        return_value=False,\n    )\n    @patch(f\"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB\", False)\n    def test_available_when_vector_db_enabled_and_files_exist(\n        self,\n        mock_connectors: MagicMock,  # noqa: ARG002\n        mock_federated: MagicMock,  # noqa: ARG002\n        mock_user_files: MagicMock,  # noqa: ARG002\n    ) -> None:\n        from onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n        assert SearchTool.is_available(MagicMock()) is True\n\n\n# ------------------------------------------------------------------\n# OpenURLTool.is_available()\n# ------------------------------------------------------------------\n\n\nclass TestOpenURLToolAvailability:\n    @patch(f\"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB\", True)\n    def test_unavailable_when_vector_db_disabled(self) -> None:\n        from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\n\n        assert OpenURLTool.is_available(MagicMock()) is False\n\n    @patch(f\"{APP_CONFIGS_MODULE}.DISABLE_VECTOR_DB\", False)\n    def test_available_when_vector_db_enabled(self) -> None:\n        from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\n\n        assert OpenURLTool.is_available(MagicMock()) is True\n\n\n# ------------------------------------------------------------------\n# FileReaderTool.is_available()\n# ------------------------------------------------------------------\n\n\nclass TestFileReaderToolAvailability:\n    @patch(f\"{FILE_READER_MODULE}.DISABLE_VECTOR_DB\", True)\n    def test_available_when_vector_db_disabled(self) -> None:\n        assert FileReaderTool.is_available(MagicMock()) is True\n\n    @patch(f\"{FILE_READER_MODULE}.DISABLE_VECTOR_DB\", False)\n    def test_unavailable_when_vector_db_enabled(self) -> None:\n        assert FileReaderTool.is_available(MagicMock()) is False\n\n\n# ------------------------------------------------------------------\n# Force-add SearchTool suppression\n# ------------------------------------------------------------------\n\n\nclass TestForceAddSearchToolGuard:\n    def test_force_add_block_checks_disable_vector_db(self) -> None:\n        \"\"\"The force-add SearchTool block in construct_tools should include\n        `not DISABLE_VECTOR_DB` so that forced search is also suppressed\n        without a vector DB.\"\"\"\n        import inspect\n\n        from onyx.tools.tool_constructor import construct_tools\n\n        source = inspect.getsource(construct_tools)\n        assert (\n            \"DISABLE_VECTOR_DB\" in source\n        ), \"construct_tools should reference DISABLE_VECTOR_DB to suppress force-adding SearchTool\"\n\n\n# ------------------------------------------------------------------\n# Persona API — _validate_vector_db_knowledge\n# ------------------------------------------------------------------\n\n\nclass TestValidateVectorDbKnowledge:\n    @patch(\n        \"onyx.server.features.persona.api.DISABLE_VECTOR_DB\",\n        True,\n    )\n    def test_rejects_document_set_ids(self) -> None:\n        from fastapi import HTTPException\n\n        from onyx.server.features.persona.api import _validate_vector_db_knowledge\n\n        request = MagicMock()\n        request.document_set_ids = [1]\n        request.hierarchy_node_ids = []\n        request.document_ids = []\n\n        with __import__(\"pytest\").raises(HTTPException) as exc_info:\n            _validate_vector_db_knowledge(request)\n        assert exc_info.value.status_code == 400\n        assert \"document sets\" in exc_info.value.detail\n\n    @patch(\n        \"onyx.server.features.persona.api.DISABLE_VECTOR_DB\",\n        True,\n    )\n    def test_rejects_hierarchy_node_ids(self) -> None:\n        from fastapi import HTTPException\n\n        from onyx.server.features.persona.api import _validate_vector_db_knowledge\n\n        request = MagicMock()\n        request.document_set_ids = []\n        request.hierarchy_node_ids = [1]\n        request.document_ids = []\n\n        with __import__(\"pytest\").raises(HTTPException) as exc_info:\n            _validate_vector_db_knowledge(request)\n        assert exc_info.value.status_code == 400\n        assert \"hierarchy nodes\" in exc_info.value.detail\n\n    @patch(\n        \"onyx.server.features.persona.api.DISABLE_VECTOR_DB\",\n        True,\n    )\n    def test_rejects_document_ids(self) -> None:\n        from fastapi import HTTPException\n\n        from onyx.server.features.persona.api import _validate_vector_db_knowledge\n\n        request = MagicMock()\n        request.document_set_ids = []\n        request.hierarchy_node_ids = []\n        request.document_ids = [\"doc-abc\"]\n\n        with __import__(\"pytest\").raises(HTTPException) as exc_info:\n            _validate_vector_db_knowledge(request)\n        assert exc_info.value.status_code == 400\n        assert \"documents\" in exc_info.value.detail\n\n    @patch(\n        \"onyx.server.features.persona.api.DISABLE_VECTOR_DB\",\n        True,\n    )\n    def test_allows_user_files_only(self) -> None:\n        from onyx.server.features.persona.api import _validate_vector_db_knowledge\n\n        request = MagicMock()\n        request.document_set_ids = []\n        request.hierarchy_node_ids = []\n        request.document_ids = []\n\n        _validate_vector_db_knowledge(request)\n\n    @patch(\n        \"onyx.server.features.persona.api.DISABLE_VECTOR_DB\",\n        False,\n    )\n    def test_allows_everything_when_vector_db_enabled(self) -> None:\n        from onyx.server.features.persona.api import _validate_vector_db_knowledge\n\n        request = MagicMock()\n        request.document_set_ids = [1, 2]\n        request.hierarchy_node_ids = [3]\n        request.document_ids = [\"doc-x\"]\n\n        _validate_vector_db_knowledge(request)\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_file_reader_tool.py",
    "content": "\"\"\"Tests for the FileReaderTool.\n\nVerifies:\n- Tool definition schema is well-formed\n- File ID validation (allowlist, UUID format)\n- Character range extraction and clamping\n- Error handling for missing parameters and non-text files\n- is_available() reflects DISABLE_VECTOR_DB\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nimport pytest\n\nfrom onyx.file_store.models import ChatFileType\nfrom onyx.file_store.models import InMemoryChatFile\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FILE_ID_FIELD\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import FileReaderTool\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import MAX_NUM_CHARS\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import NUM_CHARS_FIELD\nfrom onyx.tools.tool_implementations.file_reader.file_reader_tool import (\n    START_CHAR_FIELD,\n)\n\nTOOL_MODULE = \"onyx.tools.tool_implementations.file_reader.file_reader_tool\"\n_PLACEMENT = Placement(turn_index=0)\n\n\ndef _make_tool(\n    user_file_ids: list | None = None,\n    chat_file_ids: list | None = None,\n) -> FileReaderTool:\n    emitter = MagicMock()\n    return FileReaderTool(\n        tool_id=99,\n        emitter=emitter,\n        user_file_ids=user_file_ids or [],\n        chat_file_ids=chat_file_ids or [],\n    )\n\n\ndef _text_file(content: str, filename: str = \"test.txt\") -> InMemoryChatFile:\n    return InMemoryChatFile(\n        file_id=\"some-file-id\",\n        content=content.encode(\"utf-8\"),\n        file_type=ChatFileType.PLAIN_TEXT,\n        filename=filename,\n    )\n\n\n# ------------------------------------------------------------------\n# Tool metadata\n# ------------------------------------------------------------------\n\n\nclass TestToolMetadata:\n    def test_tool_name(self) -> None:\n        tool = _make_tool()\n        assert tool.name == \"read_file\"\n\n    def test_tool_definition_schema(self) -> None:\n        tool = _make_tool()\n        defn = tool.tool_definition()\n        assert defn[\"type\"] == \"function\"\n        func = defn[\"function\"]\n        assert func[\"name\"] == \"read_file\"\n        props = func[\"parameters\"][\"properties\"]\n        assert FILE_ID_FIELD in props\n        assert START_CHAR_FIELD in props\n        assert NUM_CHARS_FIELD in props\n        assert func[\"parameters\"][\"required\"] == [FILE_ID_FIELD]\n\n\n# ------------------------------------------------------------------\n# File ID validation\n# ------------------------------------------------------------------\n\n\nclass TestFileIdValidation:\n    def test_rejects_invalid_uuid(self) -> None:\n        tool = _make_tool()\n        with pytest.raises(ToolCallException, match=\"Invalid file_id\"):\n            tool._validate_file_id(\"not-a-uuid\")\n\n    def test_rejects_file_not_in_allowlist(self) -> None:\n        tool = _make_tool(user_file_ids=[uuid4()])\n        other_id = uuid4()\n        with pytest.raises(ToolCallException, match=\"not in available files\"):\n            tool._validate_file_id(str(other_id))\n\n    def test_accepts_user_file_id(self) -> None:\n        uid = uuid4()\n        tool = _make_tool(user_file_ids=[uid])\n        assert tool._validate_file_id(str(uid)) == uid\n\n    def test_accepts_chat_file_id(self) -> None:\n        cid = uuid4()\n        tool = _make_tool(chat_file_ids=[cid])\n        assert tool._validate_file_id(str(cid)) == cid\n\n\n# ------------------------------------------------------------------\n# run() — character range extraction\n# ------------------------------------------------------------------\n\n\nclass TestRun:\n    @patch(f\"{TOOL_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TOOL_MODULE}.load_user_file\")\n    def test_returns_full_content_by_default(\n        self,\n        mock_load_user_file: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uid = uuid4()\n        content = \"Hello, world!\"\n        mock_load_user_file.return_value = _text_file(content)\n        mock_get_session.return_value.__enter__.return_value = MagicMock()\n\n        tool = _make_tool(user_file_ids=[uid])\n        resp = tool.run(\n            placement=_PLACEMENT,\n            override_kwargs=MagicMock(),\n            **{FILE_ID_FIELD: str(uid)},\n        )\n        assert content in resp.llm_facing_response\n\n    @patch(f\"{TOOL_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TOOL_MODULE}.load_user_file\")\n    def test_respects_start_char_and_num_chars(\n        self,\n        mock_load_user_file: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uid = uuid4()\n        content = \"abcdefghijklmnop\"\n        mock_load_user_file.return_value = _text_file(content)\n        mock_get_session.return_value.__enter__.return_value = MagicMock()\n\n        tool = _make_tool(user_file_ids=[uid])\n        resp = tool.run(\n            placement=_PLACEMENT,\n            override_kwargs=MagicMock(),\n            **{FILE_ID_FIELD: str(uid), START_CHAR_FIELD: 4, NUM_CHARS_FIELD: 6},\n        )\n        assert \"efghij\" in resp.llm_facing_response\n\n    @patch(f\"{TOOL_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TOOL_MODULE}.load_user_file\")\n    def test_clamps_num_chars_to_max(\n        self,\n        mock_load_user_file: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uid = uuid4()\n        content = \"x\" * (MAX_NUM_CHARS + 500)\n        mock_load_user_file.return_value = _text_file(content)\n        mock_get_session.return_value.__enter__.return_value = MagicMock()\n\n        tool = _make_tool(user_file_ids=[uid])\n        resp = tool.run(\n            placement=_PLACEMENT,\n            override_kwargs=MagicMock(),\n            **{FILE_ID_FIELD: str(uid), NUM_CHARS_FIELD: MAX_NUM_CHARS + 9999},\n        )\n        assert f\"Characters 0-{MAX_NUM_CHARS}\" in resp.llm_facing_response\n\n    @patch(f\"{TOOL_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TOOL_MODULE}.load_user_file\")\n    def test_includes_continuation_hint(\n        self,\n        mock_load_user_file: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uid = uuid4()\n        content = \"x\" * 100\n        mock_load_user_file.return_value = _text_file(content)\n        mock_get_session.return_value.__enter__.return_value = MagicMock()\n\n        tool = _make_tool(user_file_ids=[uid])\n        resp = tool.run(\n            placement=_PLACEMENT,\n            override_kwargs=MagicMock(),\n            **{FILE_ID_FIELD: str(uid), NUM_CHARS_FIELD: 10},\n        )\n        assert \"use start_char=10 to continue reading\" in resp.llm_facing_response\n\n    def test_raises_on_missing_file_id(self) -> None:\n        tool = _make_tool()\n        with pytest.raises(ToolCallException, match=\"Missing required\"):\n            tool.run(\n                placement=_PLACEMENT,\n                override_kwargs=MagicMock(),\n            )\n\n    @patch(f\"{TOOL_MODULE}.get_session_with_current_tenant\")\n    @patch(f\"{TOOL_MODULE}.load_user_file\")\n    def test_raises_on_non_text_file(\n        self,\n        mock_load_user_file: MagicMock,\n        mock_get_session: MagicMock,\n    ) -> None:\n        uid = uuid4()\n        mock_load_user_file.return_value = InMemoryChatFile(\n            file_id=\"img\",\n            content=b\"\\x89PNG\",\n            file_type=ChatFileType.IMAGE,\n            filename=\"photo.png\",\n        )\n        mock_get_session.return_value.__enter__.return_value = MagicMock()\n\n        tool = _make_tool(user_file_ids=[uid])\n        with pytest.raises(ToolCallException, match=\"not a text file\"):\n            tool.run(\n                placement=_PLACEMENT,\n                override_kwargs=MagicMock(),\n                **{FILE_ID_FIELD: str(uid)},\n            )\n\n\n# ------------------------------------------------------------------\n# is_available()\n# ------------------------------------------------------------------\n\n\nclass TestIsAvailable:\n    @patch(f\"{TOOL_MODULE}.DISABLE_VECTOR_DB\", True)\n    def test_available_when_vector_db_disabled(self) -> None:\n        assert FileReaderTool.is_available(MagicMock()) is True\n\n    @patch(f\"{TOOL_MODULE}.DISABLE_VECTOR_DB\", False)\n    def test_unavailable_when_vector_db_enabled(self) -> None:\n        assert FileReaderTool.is_available(MagicMock()) is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_no_vectordb.py",
    "content": "\"\"\"Tests for tool availability when DISABLE_VECTOR_DB is True.\n\nVerifies that SearchTool and OpenURLTool report themselves as unavailable\nwhen the vector DB is disabled, and that FileReaderTool remains available.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\n\n# ------------------------------------------------------------------\n# SearchTool\n# ------------------------------------------------------------------\n\n\n@patch(\"onyx.configs.app_configs.DISABLE_VECTOR_DB\", True)\ndef test_search_tool_unavailable_when_vector_db_disabled() -> None:\n    from onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n    db_session = MagicMock(spec=Session)\n    assert SearchTool.is_available(db_session) is False\n\n\n@patch(\"onyx.configs.app_configs.DISABLE_VECTOR_DB\", False)\n@patch(\n    \"onyx.tools.tool_implementations.search.search_tool.check_connectors_exist\",\n    return_value=True,\n)\ndef test_search_tool_available_when_vector_db_enabled(\n    _mock_connectors: MagicMock,\n) -> None:\n    from onyx.tools.tool_implementations.search.search_tool import SearchTool\n\n    db_session = MagicMock(spec=Session)\n    assert SearchTool.is_available(db_session) is True\n\n\n# ------------------------------------------------------------------\n# OpenURLTool\n# ------------------------------------------------------------------\n\n\n@patch(\"onyx.configs.app_configs.DISABLE_VECTOR_DB\", True)\ndef test_open_url_tool_unavailable_when_vector_db_disabled() -> None:\n    from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool\n\n    db_session = MagicMock(spec=Session)\n    assert OpenURLTool.is_available(db_session) is False\n\n\n# ------------------------------------------------------------------\n# FileReaderTool — available when vector DB is disabled (for now)\n# ------------------------------------------------------------------\n\n\n@pytest.mark.parametrize(\"vector_db_disabled\", [True, False])\ndef test_file_reader_tool_available(vector_db_disabled: bool) -> None:\n    # Patch where it's *used*, not where it's defined — the module has its own\n    # local reference after `from onyx.configs.app_configs import DISABLE_VECTOR_DB`.\n    with patch(\n        \"onyx.tools.tool_implementations.file_reader.file_reader_tool.DISABLE_VECTOR_DB\",\n        vector_db_disabled,\n    ):\n        from onyx.tools.tool_implementations.file_reader.file_reader_tool import (\n            FileReaderTool,\n        )\n\n        db_session = MagicMock(spec=Session)\n        assert FileReaderTool.is_available(db_session) is vector_db_disabled\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_python_tool_availability.py",
    "content": "\"\"\"Tests for PythonTool availability based on server_enabled flag and health check.\n\nVerifies that PythonTool reports itself as unavailable when either:\n- CODE_INTERPRETER_BASE_URL is not set, or\n- CodeInterpreterServer.server_enabled is False in the database, or\n- The Code Interpreter service health check fails.\n\nAlso verifies that the health check result is cached with a TTL.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom sqlalchemy.orm import Session\n\nTOOL_MODULE = \"onyx.tools.tool_implementations.python.python_tool\"\nCLIENT_MODULE = \"onyx.tools.tool_implementations.python.code_interpreter_client\"\n\n\n@pytest.fixture(autouse=True)\ndef _clear_health_cache() -> None:\n    \"\"\"Reset the health check cache before every test.\"\"\"\n    import onyx.tools.tool_implementations.python.code_interpreter_client as mod\n\n    mod._health_cache = {}\n\n\n# ------------------------------------------------------------------\n# Unavailable when CODE_INTERPRETER_BASE_URL is not set\n# ------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", None)\ndef test_python_tool_unavailable_without_base_url() -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is False\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"\")\ndef test_python_tool_unavailable_with_empty_base_url() -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is False\n\n\n# ------------------------------------------------------------------\n# Unavailable when server_enabled is False\n# ------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://localhost:8000\")\n@patch(f\"{TOOL_MODULE}.fetch_code_interpreter_server\")\ndef test_python_tool_unavailable_when_server_disabled(\n    mock_fetch: MagicMock,\n) -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    mock_server = MagicMock()\n    mock_server.server_enabled = False\n    mock_fetch.return_value = mock_server\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is False\n\n\n# ------------------------------------------------------------------\n# Health check determines availability when URL + server are OK\n# ------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://localhost:8000\")\n@patch(f\"{TOOL_MODULE}.fetch_code_interpreter_server\")\n@patch(f\"{TOOL_MODULE}.CodeInterpreterClient\")\ndef test_python_tool_available_when_health_check_passes(\n    mock_client_cls: MagicMock,\n    mock_fetch: MagicMock,\n) -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    mock_server = MagicMock()\n    mock_server.server_enabled = True\n    mock_fetch.return_value = mock_server\n\n    mock_client = MagicMock()\n    mock_client.health.return_value = True\n    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)\n    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is True\n    mock_client.health.assert_called_once_with(use_cache=True)\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://localhost:8000\")\n@patch(f\"{TOOL_MODULE}.fetch_code_interpreter_server\")\n@patch(f\"{TOOL_MODULE}.CodeInterpreterClient\")\ndef test_python_tool_unavailable_when_health_check_fails(\n    mock_client_cls: MagicMock,\n    mock_fetch: MagicMock,\n) -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    mock_server = MagicMock()\n    mock_server.server_enabled = True\n    mock_fetch.return_value = mock_server\n\n    mock_client = MagicMock()\n    mock_client.health.return_value = False\n    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)\n    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is False\n    mock_client.health.assert_called_once_with(use_cache=True)\n\n\n# ------------------------------------------------------------------\n# Health check is NOT reached when preconditions fail\n# ------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://localhost:8000\")\n@patch(f\"{TOOL_MODULE}.fetch_code_interpreter_server\")\n@patch(f\"{TOOL_MODULE}.CodeInterpreterClient\")\ndef test_health_check_not_called_when_server_disabled(\n    mock_client_cls: MagicMock,\n    mock_fetch: MagicMock,\n) -> None:\n    from onyx.tools.tool_implementations.python.python_tool import PythonTool\n\n    mock_server = MagicMock()\n    mock_server.server_enabled = False\n    mock_fetch.return_value = mock_server\n\n    db_session = MagicMock(spec=Session)\n    assert PythonTool.is_available(db_session) is False\n    mock_client_cls.assert_not_called()\n\n\n# ------------------------------------------------------------------\n# Health check caching (tested at the client level)\n# ------------------------------------------------------------------\n\n\ndef test_health_check_cached_on_second_call() -> None:\n    from onyx.tools.tool_implementations.python.code_interpreter_client import (\n        CodeInterpreterClient,\n    )\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n    mock_response = MagicMock()\n    mock_response.json.return_value = {\"status\": \"ok\"}\n\n    with patch.object(client.session, \"get\", return_value=mock_response) as mock_get:\n        assert client.health(use_cache=True) is True\n        assert client.health(use_cache=True) is True\n        # Only one HTTP call — the second used the cache\n        mock_get.assert_called_once()\n\n\n@patch(f\"{CLIENT_MODULE}.time\")\ndef test_health_check_refreshed_after_ttl_expires(mock_time: MagicMock) -> None:\n    from onyx.tools.tool_implementations.python.code_interpreter_client import (\n        CodeInterpreterClient,\n        _HEALTH_CACHE_TTL_SECONDS,\n    )\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n    mock_response = MagicMock()\n    mock_response.json.return_value = {\"status\": \"ok\"}\n\n    with patch.object(client.session, \"get\", return_value=mock_response) as mock_get:\n        # First call at t=0 — cache miss\n        mock_time.monotonic.return_value = 0.0\n        assert client.health(use_cache=True) is True\n        assert mock_get.call_count == 1\n\n        # Second call within TTL — cache hit\n        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS - 1)\n        assert client.health(use_cache=True) is True\n        assert mock_get.call_count == 1\n\n        # Third call after TTL — cache miss, fresh request\n        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS + 1)\n        assert client.health(use_cache=True) is True\n        assert mock_get.call_count == 2\n\n\ndef test_health_check_no_cache_by_default() -> None:\n    from onyx.tools.tool_implementations.python.code_interpreter_client import (\n        CodeInterpreterClient,\n    )\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n    mock_response = MagicMock()\n    mock_response.json.return_value = {\"status\": \"ok\"}\n\n    with patch.object(client.session, \"get\", return_value=mock_response) as mock_get:\n        assert client.health() is True\n        assert client.health() is True\n        # Both calls hit the network when use_cache=False (default)\n        assert mock_get.call_count == 2\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_search_utils.py",
    "content": "\"\"\"Unit tests for search utility functions.\"\"\"\n\nfrom typing import NamedTuple\n\nimport pytest\n\nfrom onyx.tools.tool_implementations.search.search_tool import deduplicate_queries\nfrom onyx.tools.tool_implementations.search.search_utils import (\n    weighted_reciprocal_rank_fusion,\n)\n\n\n# =============================================================================\n# Test Data Structures\n# =============================================================================\n\n\nclass MockDocument(NamedTuple):\n    \"\"\"Mock document for testing RRF.\"\"\"\n\n    document_id: str\n    content: str\n\n\n# =============================================================================\n# Tests for weighted_reciprocal_rank_fusion\n# =============================================================================\n\n\nclass TestWeightedReciprocalRankFusion:\n    \"\"\"Test suite for weighted_reciprocal_rank_fusion function.\"\"\"\n\n    def test_single_result_list(self) -> None:\n        \"\"\"Test RRF with a single result list.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n\n        ranked_results = [[doc_a, doc_b, doc_c]]\n        weights = [1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # With a single list, order should be preserved\n        assert len(result) == 3\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n        assert result[2].document_id == \"doc_c\"\n\n    def test_two_identical_lists_equal_weights(self) -> None:\n        \"\"\"Test RRF with two identical lists and equal weights.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n\n        ranked_results = [\n            [doc_a, doc_b, doc_c],\n            [doc_a, doc_b, doc_c],\n        ]\n        weights = [1.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # Order should be preserved, but items appear only once\n        assert len(result) == 3\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n        assert result[2].document_id == \"doc_c\"\n\n    def test_two_different_lists_equal_weights(self) -> None:\n        \"\"\"Test RRF with different result lists and equal weights.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n        doc_d = MockDocument(\"doc_d\", \"Content D\")\n\n        ranked_results = [\n            [doc_a, doc_b, doc_c],\n            [doc_c, doc_a, doc_d],\n        ]\n        weights = [1.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_a and doc_c should rank highest (appear in both lists)\n        assert len(result) == 4\n        # doc_a appears at rank 1 and 2 in the two lists\n        # doc_c appears at rank 3 and 1 in the two lists\n        # Both should be at top, exact order depends on tiebreaking\n        top_two_ids = {result[0].document_id, result[1].document_id}\n        assert top_two_ids == {\"doc_a\", \"doc_c\"}\n\n    def test_weighted_lists_higher_weight_dominates(self) -> None:\n        \"\"\"Test that higher weighted list influences ranking more.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n\n        # First list has higher weight\n        ranked_results = [\n            [doc_a, doc_b],  # weight 2.0\n            [doc_c, doc_a],  # weight 1.0\n        ]\n        weights = [2.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_a should be first (rank 1 in list 1 with weight 2.0, rank 2 in list 2 with weight 1.0)\n        # RRF score for doc_a: 2.0/(50+1) + 1.0/(50+2) = 2.0/51 + 1.0/52 = 0.0392 + 0.0192 = 0.0584\n        # RRF score for doc_b: 2.0/(50+2) = 2.0/52 = 0.0385\n        # RRF score for doc_c: 1.0/(50+1) = 1.0/51 = 0.0196\n        assert len(result) == 3\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n        assert result[2].document_id == \"doc_c\"\n\n    def test_empty_result_list(self) -> None:\n        \"\"\"Test RRF with empty result list.\"\"\"\n        ranked_results: list[list[MockDocument]] = [[]]\n        weights = [1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        assert len(result) == 0\n\n    def test_multiple_empty_lists(self) -> None:\n        \"\"\"Test RRF with multiple empty result lists.\"\"\"\n        ranked_results: list[list[MockDocument]] = [[], [], []]\n        weights = [1.0, 1.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        assert len(result) == 0\n\n    def test_mixed_empty_and_non_empty_lists(self) -> None:\n        \"\"\"Test RRF with mix of empty and non-empty lists.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n\n        ranked_results = [\n            [],\n            [doc_a, doc_b],\n            [],\n        ]\n        weights = [1.0, 1.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        assert len(result) == 2\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n\n    def test_mismatched_weights_raises_error(self) -> None:\n        \"\"\"Test that mismatched weights and results raises ValueError.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n\n        ranked_results = [[doc_a]]\n        weights = [1.0, 2.0]  # Too many weights\n\n        with pytest.raises(ValueError, match=\"must match\"):\n            weighted_reciprocal_rank_fusion(\n                ranked_results=ranked_results,\n                weights=weights,\n                id_extractor=lambda doc: doc.document_id,\n            )\n\n    def test_custom_k_value(self) -> None:\n        \"\"\"Test RRF with custom k value.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n\n        ranked_results = [[doc_a, doc_b]]\n        weights = [1.0]\n\n        # With k=10, scores should be: 1/(10+1)=0.091, 1/(10+2)=0.083\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n            k=10,\n        )\n\n        assert len(result) == 2\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n\n    def test_deduplication_preserves_first_occurrence(self) -> None:\n        \"\"\"Test that when same document appears in multiple lists, first occurrence is used.\"\"\"\n        doc_a1 = MockDocument(\"doc_a\", \"Content A - First\")\n        doc_a2 = MockDocument(\"doc_a\", \"Content A - Second\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n\n        ranked_results = [\n            [doc_a1, doc_b],\n            [doc_a2],  # Same ID as doc_a1\n        ]\n        weights = [1.0, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # Should use first occurrence of doc_a\n        assert len(result) == 2\n        doc_a_result = next(doc for doc in result if doc.document_id == \"doc_a\")\n        assert doc_a_result.content == \"Content A - First\"\n\n    def test_realistic_semantic_vs_keyword_search_scenario(self) -> None:\n        \"\"\"Test realistic scenario: semantic search vs keyword search with different weights.\"\"\"\n        # Semantic search results\n        doc_a = MockDocument(\"doc_a\", \"Semantic Result A\")\n        doc_b = MockDocument(\"doc_b\", \"Semantic Result B\")\n        doc_c = MockDocument(\"doc_c\", \"Semantic Result C\")\n\n        # Keyword search results (doc_c ranks first, doc_a also appears)\n        doc_d = MockDocument(\"doc_d\", \"Keyword Result D\")\n\n        ranked_results = [\n            [doc_a, doc_b, doc_c],  # Semantic: weight 1.2\n            [doc_c, doc_a, doc_d],  # Keyword: weight 1.0\n        ]\n        weights = [1.2, 1.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_a and doc_c appear in both lists and should rank highest\n        assert len(result) == 4\n        top_two_ids = {result[0].document_id, result[1].document_id}\n        assert top_two_ids == {\"doc_a\", \"doc_c\"}\n\n    def test_many_lists_with_varying_weights(self) -> None:\n        \"\"\"Test RRF with multiple lists and varying weights.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n        doc_d = MockDocument(\"doc_d\", \"Content D\")\n\n        ranked_results = [\n            [doc_a, doc_b],  # weight 1.3\n            [doc_c, doc_a],  # weight 1.0\n            [doc_a, doc_d],  # weight 0.7\n            [doc_b, doc_a],  # weight 0.5\n        ]\n        weights = [1.3, 1.0, 0.7, 0.5]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_a appears in all 4 lists, should rank first\n        assert len(result) == 4\n        assert result[0].document_id == \"doc_a\"\n\n    def test_zero_weight(self) -> None:\n        \"\"\"Test RRF with zero weight for one list.\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n        doc_c = MockDocument(\"doc_c\", \"Content C\")\n\n        ranked_results = [\n            [doc_a, doc_b],  # weight 1.0\n            [doc_c],  # weight 0.0 (ignored)\n        ]\n        weights = [1.0, 0.0]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_c should rank last due to zero weight\n        assert len(result) == 3\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n        assert result[2].document_id == \"doc_c\"\n\n    def test_negative_weight(self) -> None:\n        \"\"\"Test RRF with negative weight (should still work mathematically).\"\"\"\n        doc_a = MockDocument(\"doc_a\", \"Content A\")\n        doc_b = MockDocument(\"doc_b\", \"Content B\")\n\n        ranked_results = [\n            [doc_a, doc_b],  # weight 1.0\n            [doc_b, doc_a],  # weight -0.5 (penalizes)\n        ]\n        weights = [1.0, -0.5]\n\n        result = weighted_reciprocal_rank_fusion(\n            ranked_results=ranked_results,\n            weights=weights,\n            id_extractor=lambda doc: doc.document_id,\n        )\n\n        # doc_a should rank higher (benefits from positive weight more)\n        # doc_a: 1.0/(50+1) + (-0.5)/(50+2) = 0.0196 - 0.0096 = 0.0100\n        # doc_b: 1.0/(50+2) + (-0.5)/(50+1) = 0.0192 - 0.0098 = 0.0094\n        assert len(result) == 2\n        assert result[0].document_id == \"doc_a\"\n        assert result[1].document_id == \"doc_b\"\n\n\n# =============================================================================\n# Tests for deduplicate_queries\n# =============================================================================\n\n\nclass TestDeduplicateQueries:\n    \"\"\"Test suite for deduplicate_queries function.\"\"\"\n\n    def test_no_duplicates(self) -> None:\n        \"\"\"Test deduplication with no duplicate queries.\"\"\"\n        queries_with_weights = [\n            (\"first query\", 1.0),\n            (\"second query\", 2.0),\n            (\"third query\", 1.5),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 3\n        assert (\"first query\", 1.0) in result\n        assert (\"second query\", 2.0) in result\n        assert (\"third query\", 1.5) in result\n\n    def test_exact_duplicates(self) -> None:\n        \"\"\"Test deduplication with exact duplicate queries.\"\"\"\n        queries_with_weights = [\n            (\"same query\", 1.0),\n            (\"same query\", 2.0),\n            (\"same query\", 1.5),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        # Should have one entry with summed weights\n        assert len(result) == 1\n        assert result[0][0] == \"same query\"\n        assert result[0][1] == 4.5  # 1.0 + 2.0 + 1.5\n\n    def test_case_insensitive_duplicates(self) -> None:\n        \"\"\"Test that deduplication is case-insensitive.\"\"\"\n        queries_with_weights = [\n            (\"Search Query\", 1.0),\n            (\"search query\", 2.0),\n            (\"SEARCH QUERY\", 1.5),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        # Should have one entry with summed weights\n        assert len(result) == 1\n        # Should preserve the casing of first occurrence\n        assert result[0][0] == \"Search Query\"\n        assert result[0][1] == 4.5  # 1.0 + 2.0 + 1.5\n\n    def test_mixed_duplicates_and_unique(self) -> None:\n        \"\"\"Test deduplication with mix of duplicates and unique queries.\"\"\"\n        queries_with_weights = [\n            (\"unique query\", 1.0),\n            (\"duplicate query\", 2.0),\n            (\"DUPLICATE QUERY\", 1.5),\n            (\"another unique\", 3.0),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 3\n\n        # Check for unique queries\n        unique_queries = [q for q, w in result if q == \"unique query\"]\n        assert len(unique_queries) == 1\n        unique_weight = [w for q, w in result if q == \"unique query\"][0]\n        assert unique_weight == 1.0\n\n        another_unique_queries = [q for q, w in result if q == \"another unique\"]\n        assert len(another_unique_queries) == 1\n        another_weight = [w for q, w in result if q == \"another unique\"][0]\n        assert another_weight == 3.0\n\n        # Check for deduplicated query\n        dup_queries = [q for q, w in result if q.lower() == \"duplicate query\"]\n        assert len(dup_queries) == 1\n        dup_weight = [w for q, w in result if q.lower() == \"duplicate query\"][0]\n        assert dup_weight == 3.5  # 2.0 + 1.5\n\n    def test_empty_list(self) -> None:\n        \"\"\"Test deduplication with empty list.\"\"\"\n        queries_with_weights: list[tuple[str, float]] = []\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 0\n\n    def test_single_query(self) -> None:\n        \"\"\"Test deduplication with single query.\"\"\"\n        queries_with_weights = [(\"single query\", 1.5)]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 1\n        assert result[0] == (\"single query\", 1.5)\n\n    def test_preserves_first_occurrence_casing(self) -> None:\n        \"\"\"Test that the first occurrence's casing is preserved.\"\"\"\n        queries_with_weights = [\n            (\"First Version\", 1.0),\n            (\"first version\", 2.0),\n            (\"FIRST VERSION\", 3.0),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 1\n        # First occurrence casing should be preserved\n        assert result[0][0] == \"First Version\"\n        assert result[0][1] == 6.0\n\n    def test_whitespace_differences(self) -> None:\n        \"\"\"Test that queries with different whitespace are treated as different.\"\"\"\n        queries_with_weights = [\n            (\"query with spaces\", 1.0),\n            (\"query  with  spaces\", 2.0),  # Different spacing\n            (\"query with spaces\", 3.0),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        # Should have two entries (one for single space, one for double)\n        assert len(result) == 2\n\n        # Find the summed weight for single-space version\n        single_space_weight = [w for q, w in result if q == \"query with spaces\"][0]\n        assert single_space_weight == 4.0  # 1.0 + 3.0\n\n        # Find the weight for double-space version\n        double_space_weight = [w for q, w in result if q == \"query  with  spaces\"][0]\n        assert double_space_weight == 2.0\n\n    def test_zero_weights(self) -> None:\n        \"\"\"Test deduplication with zero weights.\"\"\"\n        queries_with_weights = [\n            (\"query\", 0.0),\n            (\"query\", 0.0),\n            (\"other query\", 1.0),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 2\n        query_weight = [w for q, w in result if q == \"query\"][0]\n        assert query_weight == 0.0\n        other_weight = [w for q, w in result if q == \"other query\"][0]\n        assert other_weight == 1.0\n\n    def test_negative_weights(self) -> None:\n        \"\"\"Test deduplication with negative weights.\"\"\"\n        queries_with_weights = [\n            (\"query\", 2.0),\n            (\"query\", -1.0),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 1\n        assert result[0][0] == \"query\"\n        assert result[0][1] == 1.0  # 2.0 + (-1.0)\n\n    def test_realistic_scenario_semantic_and_keyword_queries(self) -> None:\n        \"\"\"Test realistic scenario with semantic and keyword query deduplication.\"\"\"\n        queries_with_weights = [\n            (\"What is machine learning?\", 1.3),  # Semantic query\n            (\"what is machine learning?\", 1.0),  # LLM non-custom query\n            (\"machine learning definition\", 1.0),  # Keyword expansion\n            (\"machine learning basics\", 1.0),  # Keyword expansion\n            (\"MACHINE LEARNING DEFINITION\", 1.0),  # Duplicate keyword (different case)\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        # Should have 3 unique queries after deduplication\n        assert len(result) == 3\n\n        # Check that \"What is machine learning?\" variants were deduplicated\n        ml_queries = [\n            (q, w) for q, w in result if q.lower() == \"what is machine learning?\"\n        ]\n        assert len(ml_queries) == 1\n        assert (\n            ml_queries[0][0] == \"What is machine learning?\"\n        )  # First occurrence casing\n        assert ml_queries[0][1] == 2.3  # 1.3 + 1.0\n\n        # Check that \"machine learning definition\" variants were deduplicated\n        def_queries = [\n            (q, w) for q, w in result if q.lower() == \"machine learning definition\"\n        ]\n        assert len(def_queries) == 1\n        assert (\n            def_queries[0][0] == \"machine learning definition\"\n        )  # First occurrence casing\n        assert def_queries[0][1] == 2.0  # 1.0 + 1.0\n\n        # Check that \"machine learning basics\" is present with its original weight\n        basics_queries = [\n            (q, w) for q, w in result if q.lower() == \"machine learning basics\"\n        ]\n        assert len(basics_queries) == 1\n        assert basics_queries[0][1] == 1.0\n\n    def test_special_characters_and_punctuation(self) -> None:\n        \"\"\"Test deduplication with special characters and punctuation.\"\"\"\n        queries_with_weights = [\n            (\"What's the weather?\", 1.0),\n            (\"what's the weather?\", 2.0),\n            (\"WHAT'S THE WEATHER?\", 1.5),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 1\n        assert result[0][0] == \"What's the weather?\"\n        assert result[0][1] == 4.5\n\n    def test_unicode_characters(self) -> None:\n        \"\"\"Test deduplication with unicode characters.\"\"\"\n        queries_with_weights = [\n            (\"Café\", 1.0),\n            (\"café\", 2.0),\n            (\"CAFÉ\", 1.5),\n        ]\n\n        result = deduplicate_queries(queries_with_weights)\n\n        assert len(result) == 1\n        assert result[0][0] == \"Café\"\n        assert result[0][1] == 4.5\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_tool_runner.py",
    "content": "from onyx.chat.models import ChatMessageSimple\nfrom onyx.chat.models import ToolCallSimple\nfrom onyx.configs.constants import MessageType\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import ToolCallKickoff\nfrom onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message\nfrom onyx.tools.tool_runner import _extract_recent_generated_image_file_ids\nfrom onyx.tools.tool_runner import _merge_tool_calls\n\n\ndef _make_tool_call(\n    tool_name: str,\n    tool_args: dict,\n    tool_call_id: str = \"call_1\",\n    turn_index: int = 0,\n    tab_index: int = 0,\n) -> ToolCallKickoff:\n    \"\"\"Helper to create a ToolCallKickoff for testing.\"\"\"\n    return ToolCallKickoff(\n        tool_call_id=tool_call_id,\n        tool_name=tool_name,\n        tool_args=tool_args,\n        placement=Placement(turn_index=turn_index, tab_index=tab_index),\n    )\n\n\nclass TestMergeToolCalls:\n    \"\"\"Tests for _merge_tool_calls function.\"\"\"\n\n    def test_empty_list(self) -> None:\n        \"\"\"Empty input returns empty output.\"\"\"\n        result = _merge_tool_calls([])\n        assert result == []\n\n    def test_single_search_tool_call_not_merged(self) -> None:\n        \"\"\"A single SearchTool call is returned as-is (no merging needed).\"\"\"\n        call = _make_tool_call(\n            tool_name=\"internal_search\",\n            tool_args={\"queries\": [\"query1\"]},\n            tool_call_id=\"call_1\",\n        )\n        result = _merge_tool_calls([call])\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"internal_search\"\n        assert result[0].tool_args == {\"queries\": [\"query1\"]}\n        assert result[0].tool_call_id == \"call_1\"\n\n    def test_single_web_search_tool_call_not_merged(self) -> None:\n        \"\"\"A single WebSearchTool call is returned as-is.\"\"\"\n        call = _make_tool_call(\n            tool_name=\"web_search\",\n            tool_args={\"queries\": [\"web query\"]},\n        )\n        result = _merge_tool_calls([call])\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"web_search\"\n        assert result[0].tool_args == {\"queries\": [\"web query\"]}\n\n    def test_single_open_url_tool_call_not_merged(self) -> None:\n        \"\"\"A single OpenURLTool call is returned as-is.\"\"\"\n        call = _make_tool_call(\n            tool_name=\"open_url\",\n            tool_args={\"urls\": [\"https://example.com\"]},\n        )\n        result = _merge_tool_calls([call])\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"open_url\"\n        assert result[0].tool_args == {\"urls\": [\"https://example.com\"]}\n\n    def test_multiple_search_tool_calls_merged(self) -> None:\n        \"\"\"Multiple SearchTool calls have their queries merged into one call.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"query1\", \"query2\"]},\n                tool_call_id=\"call_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"query3\"]},\n                tool_call_id=\"call_2\",\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"internal_search\"\n        assert result[0].tool_args[\"queries\"] == [\"query1\", \"query2\", \"query3\"]\n        # Uses first call's ID\n        assert result[0].tool_call_id == \"call_1\"\n\n    def test_multiple_web_search_tool_calls_merged(self) -> None:\n        \"\"\"Multiple WebSearchTool calls have their queries merged.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"web_search\",\n                tool_args={\"queries\": [\"web1\"]},\n                tool_call_id=\"call_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"web_search\",\n                tool_args={\"queries\": [\"web2\", \"web3\"]},\n                tool_call_id=\"call_2\",\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"web_search\"\n        assert result[0].tool_args[\"queries\"] == [\"web1\", \"web2\", \"web3\"]\n\n    def test_multiple_open_url_tool_calls_merged(self) -> None:\n        \"\"\"Multiple OpenURLTool calls have their urls merged.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"open_url\",\n                tool_args={\"urls\": [\"https://a.com\"]},\n                tool_call_id=\"call_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"open_url\",\n                tool_args={\"urls\": [\"https://b.com\", \"https://c.com\"]},\n                tool_call_id=\"call_2\",\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_name == \"open_url\"\n        assert result[0].tool_args[\"urls\"] == [\n            \"https://a.com\",\n            \"https://b.com\",\n            \"https://c.com\",\n        ]\n\n    def test_non_mergeable_tool_not_merged(self) -> None:\n        \"\"\"Non-mergeable tools (e.g., python) are returned as separate calls.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"python\",\n                tool_args={\"code\": \"print(1)\"},\n                tool_call_id=\"call_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"python\",\n                tool_args={\"code\": \"print(2)\"},\n                tool_call_id=\"call_2\",\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 2\n        assert result[0].tool_args[\"code\"] == \"print(1)\"\n        assert result[1].tool_args[\"code\"] == \"print(2)\"\n\n    def test_mixed_mergeable_and_non_mergeable(self) -> None:\n        \"\"\"Mix of mergeable and non-mergeable tools handles correctly.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q1\"]},\n                tool_call_id=\"search_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"python\",\n                tool_args={\"code\": \"x = 1\"},\n                tool_call_id=\"python_1\",\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q2\"]},\n                tool_call_id=\"search_2\",\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        # Should have 2 calls: merged search + python\n        assert len(result) == 2\n\n        tool_names = {r.tool_name for r in result}\n        assert tool_names == {\"internal_search\", \"python\"}\n\n        search_result = next(r for r in result if r.tool_name == \"internal_search\")\n        assert search_result.tool_args[\"queries\"] == [\"q1\", \"q2\"]\n\n        python_result = next(r for r in result if r.tool_name == \"python\")\n        assert python_result.tool_args[\"code\"] == \"x = 1\"\n\n    def test_multiple_different_mergeable_tools(self) -> None:\n        \"\"\"Multiple different mergeable tools each get merged separately.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"search1\"]},\n            ),\n            _make_tool_call(\n                tool_name=\"web_search\",\n                tool_args={\"queries\": [\"web1\"]},\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"search2\"]},\n            ),\n            _make_tool_call(\n                tool_name=\"web_search\",\n                tool_args={\"queries\": [\"web2\"]},\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        # Should have 2 merged calls\n        assert len(result) == 2\n\n        search_result = next(r for r in result if r.tool_name == \"internal_search\")\n        assert search_result.tool_args[\"queries\"] == [\"search1\", \"search2\"]\n\n        web_result = next(r for r in result if r.tool_name == \"web_search\")\n        assert web_result.tool_args[\"queries\"] == [\"web1\", \"web2\"]\n\n    def test_preserves_first_call_placement(self) -> None:\n        \"\"\"Merged call uses the placement from the first call.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q1\"]},\n                turn_index=1,\n                tab_index=2,\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q2\"]},\n                turn_index=3,\n                tab_index=4,\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].placement.turn_index == 1\n        assert result[0].placement.tab_index == 2\n\n    def test_preserves_other_args_from_first_call(self) -> None:\n        \"\"\"Merged call preserves non-merge-field args from the first call.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q1\"], \"other_param\": \"value1\"},\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q2\"], \"other_param\": \"value2\"},\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_args[\"queries\"] == [\"q1\", \"q2\"]\n        # Other params from first call are preserved\n        assert result[0].tool_args[\"other_param\"] == \"value1\"\n\n    def test_handles_empty_queries_list(self) -> None:\n        \"\"\"Handles calls with empty queries lists.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": []},\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q1\"]},\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_args[\"queries\"] == [\"q1\"]\n\n    def test_handles_missing_merge_field(self) -> None:\n        \"\"\"Handles calls where the merge field is missing entirely.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={},  # No queries field\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q1\"]},\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        assert result[0].tool_args[\"queries\"] == [\"q1\"]\n\n    def test_handles_string_value_instead_of_list(self) -> None:\n        \"\"\"Handles edge case where merge field is a string instead of list.\"\"\"\n        calls = [\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": \"single_query\"},  # String instead of list\n            ),\n            _make_tool_call(\n                tool_name=\"internal_search\",\n                tool_args={\"queries\": [\"q2\"]},\n            ),\n        ]\n        result = _merge_tool_calls(calls)\n\n        assert len(result) == 1\n        # String should be converted to list item\n        assert result[0].tool_args[\"queries\"] == [\"single_query\", \"q2\"]\n\n\nclass TestImageHistoryExtraction:\n    def test_extracts_image_file_ids_from_json_response(self) -> None:\n        msg = '[{\"file_id\":\"img-1\",\"revised_prompt\":\"v1\"},{\"file_id\":\"img-2\",\"revised_prompt\":\"v2\"}]'\n        assert _extract_image_file_ids_from_tool_response_message(msg) == [\n            \"img-1\",\n            \"img-2\",\n        ]\n\n    def test_extracts_recent_generated_image_ids_from_history(self) -> None:\n        history = [\n            ChatMessageSimple(\n                message=\"\",\n                token_count=1,\n                message_type=MessageType.ASSISTANT,\n                tool_calls=[\n                    ToolCallSimple(\n                        tool_call_id=\"call_1\",\n                        tool_name=\"generate_image\",\n                        tool_arguments={\"prompt\": \"test\"},\n                        token_count=1,\n                    )\n                ],\n            ),\n            ChatMessageSimple(\n                message='[{\"file_id\":\"img-1\",\"revised_prompt\":\"r1\"}]',\n                token_count=1,\n                message_type=MessageType.TOOL_CALL_RESPONSE,\n                tool_call_id=\"call_1\",\n            ),\n        ]\n\n        assert _extract_recent_generated_image_file_ids(history) == [\"img-1\"]\n\n    def test_ignores_non_image_tool_responses(self) -> None:\n        history = [\n            ChatMessageSimple(\n                message=\"\",\n                token_count=1,\n                message_type=MessageType.ASSISTANT,\n                tool_calls=[\n                    ToolCallSimple(\n                        tool_call_id=\"call_1\",\n                        tool_name=\"web_search\",\n                        tool_arguments={\"queries\": [\"q\"]},\n                        token_count=1,\n                    )\n                ],\n            ),\n            ChatMessageSimple(\n                message='[{\"file_id\":\"img-1\",\"revised_prompt\":\"r1\"}]',\n                token_count=1,\n                message_type=MessageType.TOOL_CALL_RESPONSE,\n                tool_call_id=\"call_1\",\n            ),\n        ]\n\n        assert _extract_recent_generated_image_file_ids(history) == []\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_tool_runner_chat_files.py",
    "content": "\"\"\"\nUnit tests for chat_files handling in tool_runner.py.\n\nThese tests verify that chat files are properly passed to PythonTool\nthrough the PythonToolOverrideKwargs mechanism.\n\"\"\"\n\nimport pytest\n\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import PythonToolOverrideKwargs\n\n\nclass TestChatFilesPassingToPythonTool:\n    \"\"\"Tests for passing chat_files to PythonTool.\"\"\"\n\n    @pytest.fixture\n    def sample_chat_files(self) -> list[ChatFile]:\n        \"\"\"Create sample chat files for testing.\"\"\"\n        return [\n            ChatFile(filename=\"test.xlsx\", content=b\"excel content\"),\n            ChatFile(filename=\"data.csv\", content=b\"col1,col2\\n1,2\\n3,4\"),\n        ]\n\n    def test_chat_files_passed_to_python_tool_override_kwargs(\n        self,\n        sample_chat_files: list[ChatFile],\n    ) -> None:\n        \"\"\"Test that PythonToolOverrideKwargs correctly stores chat_files.\"\"\"\n        # Verify the override_kwargs structure stores chat_files correctly\n        override_kwargs = PythonToolOverrideKwargs(chat_files=sample_chat_files)\n\n        assert override_kwargs.chat_files == sample_chat_files\n        assert len(override_kwargs.chat_files) == 2\n        assert override_kwargs.chat_files[0].filename == \"test.xlsx\"\n        assert override_kwargs.chat_files[0].content == b\"excel content\"\n        assert override_kwargs.chat_files[1].filename == \"data.csv\"\n\n    def test_empty_chat_files_defaults_to_empty_list(self) -> None:\n        \"\"\"Test that empty chat_files defaults to empty list.\"\"\"\n        override_kwargs = PythonToolOverrideKwargs()\n        assert override_kwargs.chat_files == []\n\n    def test_none_chat_files_handled_in_tool_runner(self) -> None:\n        \"\"\"Test that None chat_files are handled gracefully in the tool_runner code path.\n\n        The tool_runner.py uses `chat_files or []` pattern when creating\n        PythonToolOverrideKwargs, so we verify this pattern works correctly.\n        \"\"\"\n        # Simulate the pattern used in tool_runner.py:\n        # override_kwargs = PythonToolOverrideKwargs(chat_files=chat_files or [])\n        chat_files_param: list[ChatFile] | None = None\n\n        # This is the exact pattern used in tool_runner.py\n        override_kwargs = PythonToolOverrideKwargs(\n            chat_files=chat_files_param or [],\n        )\n\n        assert override_kwargs.chat_files == []\n        assert isinstance(override_kwargs.chat_files, list)\n\n\nclass TestChatFileConversion:\n    \"\"\"Tests for ChatLoadedFile to ChatFile conversion.\"\"\"\n\n    def test_convert_loaded_files_to_chat_files(self) -> None:\n        \"\"\"Test conversion of ChatLoadedFile to ChatFile.\"\"\"\n        from onyx.chat.models import ChatLoadedFile\n        from onyx.chat.process_message import _convert_loaded_files_to_chat_files\n        from onyx.file_store.models import ChatFileType\n\n        # Create sample ChatLoadedFile objects\n        loaded_files = [\n            ChatLoadedFile(\n                file_id=\"file-1\",\n                content=b\"test content 1\",\n                file_type=ChatFileType.DOC,\n                filename=\"document.pdf\",\n                content_text=\"parsed text\",\n                token_count=10,\n            ),\n            ChatLoadedFile(\n                file_id=\"file-2\",\n                content=b\"csv,data\\n1,2\",\n                file_type=ChatFileType.TABULAR,\n                filename=\"data.csv\",\n                content_text=\"csv,data\\n1,2\",\n                token_count=5,\n            ),\n        ]\n\n        # Convert to ChatFile\n        chat_files = _convert_loaded_files_to_chat_files(loaded_files)\n\n        assert len(chat_files) == 2\n        assert chat_files[0].filename == \"document.pdf\"\n        assert chat_files[0].content == b\"test content 1\"\n        assert chat_files[1].filename == \"data.csv\"\n        assert chat_files[1].content == b\"csv,data\\n1,2\"\n\n    def test_convert_files_with_none_content_skipped(self) -> None:\n        \"\"\"Test that files with None content are skipped.\"\"\"\n        from onyx.chat.models import ChatLoadedFile\n        from onyx.chat.process_message import _convert_loaded_files_to_chat_files\n        from onyx.file_store.models import ChatFileType\n\n        loaded_files = [\n            ChatLoadedFile(\n                file_id=\"file-1\",\n                content=b\"valid content\",\n                file_type=ChatFileType.DOC,\n                filename=\"valid.pdf\",\n                content_text=\"text\",\n                token_count=10,\n            ),\n            ChatLoadedFile(\n                file_id=\"file-2\",\n                content=b\"\",\n                file_type=ChatFileType.DOC,\n                filename=\"invalid.pdf\",\n                content_text=None,\n                token_count=0,\n            ),\n        ]\n\n        chat_files = _convert_loaded_files_to_chat_files(loaded_files)\n\n        # Only the file with valid content should be included\n        assert len(chat_files) == 1\n        assert chat_files[0].filename == \"valid.pdf\"\n\n    def test_convert_files_with_missing_filename_uses_fallback(self) -> None:\n        \"\"\"Test that files without filename use file_id as fallback.\"\"\"\n        from onyx.chat.models import ChatLoadedFile\n        from onyx.chat.process_message import _convert_loaded_files_to_chat_files\n        from onyx.file_store.models import ChatFileType\n\n        loaded_files = [\n            ChatLoadedFile(\n                file_id=\"abc123\",\n                content=b\"content\",\n                file_type=ChatFileType.DOC,\n                filename=None,\n                content_text=\"text\",\n                token_count=5,\n            ),\n        ]\n\n        chat_files = _convert_loaded_files_to_chat_files(loaded_files)\n\n        assert len(chat_files) == 1\n        assert chat_files[0].filename == \"file_abc123\"\n\n    def test_convert_empty_list_returns_empty(self) -> None:\n        \"\"\"Test that empty input returns empty output.\"\"\"\n        from onyx.chat.process_message import _convert_loaded_files_to_chat_files\n\n        chat_files = _convert_loaded_files_to_chat_files([])\n        assert chat_files == []\n\n\nclass TestChatFileModel:\n    \"\"\"Tests for the ChatFile model itself.\"\"\"\n\n    def test_chat_file_creation(self) -> None:\n        \"\"\"Test ChatFile model creation.\"\"\"\n        chat_file = ChatFile(\n            filename=\"test.xlsx\",\n            content=b\"binary content\",\n        )\n\n        assert chat_file.filename == \"test.xlsx\"\n        assert chat_file.content == b\"binary content\"\n\n    def test_chat_file_with_unicode_filename(self) -> None:\n        \"\"\"Test ChatFile with unicode filename.\"\"\"\n        chat_file = ChatFile(\n            filename=\"报告.xlsx\",\n            content=b\"content\",\n        )\n\n        assert chat_file.filename == \"报告.xlsx\"\n\n    def test_chat_file_with_spaces_in_filename(self) -> None:\n        \"\"\"Test ChatFile with spaces in filename.\"\"\"\n        chat_file = ChatFile(\n            filename=\"my file name.xlsx\",\n            content=b\"content\",\n        )\n\n        assert chat_file.filename == \"my file name.xlsx\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/test_tool_utils.py",
    "content": "import pytest\n\nfrom onyx.llm.constants import LlmProviderNames\nfrom onyx.tools.utils import explicit_tool_calling_supported\n\n\n@pytest.mark.parametrize(\n    \"model_provider, model_name, expected_result\",\n    [\n        (LlmProviderNames.ANTHROPIC, \"claude-4-sonnet-20250514\", True),\n        (\n            \"another-provider\",\n            \"claude-haiku-4-5-20251001\",\n            True,\n        ),\n        (\n            LlmProviderNames.ANTHROPIC,\n            \"claude-3-sonnet-20240229\",\n            False,\n        ),\n        (\n            LlmProviderNames.BEDROCK,\n            \"amazon.titan-text-express-v1\",\n            False,\n        ),\n        (LlmProviderNames.OPENAI, \"gpt-4o\", True),\n        (LlmProviderNames.OPENAI, \"gpt-3.5-turbo-instruct\", False),\n    ],\n)\ndef test_explicit_tool_calling_supported(\n    model_provider: str,\n    model_name: str,\n    expected_result: bool,\n) -> None:\n    \"\"\"\n    Anthropic models support tool calling, but\n    a) will raise an error if you provide any tool messages and don't provide a list of tools.\n    b) will send text before and after generating tool calls.\n    We don't want to provide that list of tools because our UI doesn't support sequential\n    tool calling yet for (a) and just looks bad for (b), so for now we just treat anthropic\n    models as non-tool-calling.\n\n    Additionally, for Bedrock provider, any model containing an anthropic model name as a\n    substring should also return False for the same reasons.\n    \"\"\"\n    actual_result = explicit_tool_calling_supported(model_provider, model_name)\n    assert actual_result == expected_result\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/open_url/data/test_snippet_finding_data.json",
    "content": "{\n\t\"categories\": [\n\t\t{\n\t\t\t\"category\": \"find_snippet_simple\",\n\t\t\t\"tests\": [\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"exact_match\",\n\t\t\t\t\t\"content\": \"The quick brown fox jumps over the lazy dog.\",\n\t\t\t\t\t\"snippet\": \"The quick brown fox jumps over the lazy dog.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 43\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"match_at_start\",\n\t\t\t\t\t\"content\": \"The weather in Sydney is sunny today.\",\n\t\t\t\t\t\"snippet\": \"weather in Sydney\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 4,\n\t\t\t\t\t\t\"expected_end_idx\": 20\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"match_at_end\",\n\t\t\t\t\t\"content\": \"There are multiple things to consider about AI models including how they are trained. That can impact the fine-tuning results.\",\n\t\t\t\t\t\"snippet\": \"impact the fine-tuning results.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 95,\n\t\t\t\t\t\t\"expected_end_idx\": 125\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"match_in_middle\",\n\t\t\t\t\t\"content\": \"the total fertility rate was highest in countries in Africa and central Asia, where most countries had a total fertility rate between 3 to 7 births per woman, and lowest in countries in East Asia, where most countries had a total\",\n\t\t\t\t\t\"snippet\": \"total fertility rate was highest in countries in Africa and central Asia\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 4,\n\t\t\t\t\t\t\"expected_end_idx\": 75\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"We expect the first match to be returned\",\n\t\t\t\t\t\"name\": \"multiple_matches_in_content\",\n\t\t\t\t\t\"content\": \"What's the go? Hey there mate. How are you doing? Hey there mate\",\n\t\t\t\t\t\"snippet\": \"Hey there mate\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 15,\n\t\t\t\t\t\t\"expected_end_idx\": 28\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"category\": \"find_snippet_normalized\",\n\t\t\t\"tests\": [\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"normalized_exact_match\",\n\t\t\t\t\t\"content\": \"The quick  brown  fox, jumps  over the lazy   dog!\",\n\t\t\t\t\t\"snippet\": \"The quick brown fox jumps over the lazy dog\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 49\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"normalized_match_with_html_entities\",\n\t\t\t\t\t\"content\": \"You&apos;re our first priority.\\nEvery time.\\nWe believe everyone should be able to make financial decisions with\\nconfidence.\",\n\t\t\t\t\t\"snippet\": \"everyone should be able to make financial decisions\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 55,\n\t\t\t\t\t\t\"expected_end_idx\": 105\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"multiple_html_entities\",\n\t\t\t\t\t\"content\": \"Guess&apos;what&hellip;&#39;is up?\",\n\t\t\t\t\t\"snippet\": \"Guess'what...'is up?\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 33\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"html_entity_in_snippet\",\n\t\t\t\t\t\"content\":\"Guess'what...'is up?\",\n\t\t\t\t\t\"snippet\": \"Guess&apos;what&hellip;&#39;is up?\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 19\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"multiple_whitespace\",\n\t\t\t\t\t\"content\": \"Hello  there\",\n\t\t\t\t\t\"snippet\": \"Hello there\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 11\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"lots_of_punctuation\",\n\t\t\t\t\t\"content\": \"Like OMG!!! this, is, crazy!!! right? he said 'i dont even know' but  I do know that...\",\n\t\t\t\t\t\"snippet\": \"this is crazy\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 12,\n\t\t\t\t\t\t\"expected_end_idx\": 26\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"lots_of_punctuation_again\",\n\t\t\t\t\t\"content\": \"Like OMG!!! this, is, crazy!!! right? he said 'i don't even know' but  I do know that...\",\n\t\t\t\t\t\"snippet\": \"i don't even know\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 47,\n\t\t\t\t\t\t\"expected_end_idx\": 63\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"case_insensitive_match\",\n\t\t\t\t\t\"content\": \"HelLo There\",\n\t\t\t\t\t\"snippet\": \"hello\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 4\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"curly_apostrophe_normalization\",\n\t\t\t\t\t\"content\": \"It’s a test\",\n\t\t\t\t\t\"snippet\": \"it's a test\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 10\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"unicode_dash_normalization\",\n\t\t\t\t\t\"content\": \"pages 3–5 are included\",\n\t\t\t\t\t\"snippet\": \"3-5\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 6,\n\t\t\t\t\t  \"expected_end_idx\": 8\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"nbsp_and_tab_whitespace\",\n\t\t\t\t\t\"content\": \"A\\u00A0B\\tC\",\n\t\t\t\t\t\"snippet\": \"A B C\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 4\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"zero_width_space_inside_word\",\n\t\t\t\t\t\"content\": \"he\\u200Bllo world\",\n\t\t\t\t\t\"snippet\": \"hello\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 5\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"unicode_combining_accent_normalization\",\n\t\t\t\t\t\"content\": \"Cafe\\u0301 is open\",\n\t\t\t\t\t\"snippet\": \"Café\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 4\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"amp_and_nbsp_entities\",\n\t\t\t\t\t\"content\": \"Tom &amp; Jerry&nbsp;Show\",\n\t\t\t\t\t\"snippet\": \"Tom & Jerry Show\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 24\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"We expect to pick the first normalised occurrence\",\n\t\t\t\t\t\"name\": \"multiple_occurrences_pick_first\",\n\t\t\t\t\t\"content\": \"foo  bar... foo bar\",\n\t\t\t\t\t\"snippet\": \"foo bar\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t  \"snippet_located\": true,\n\t\t\t\t\t  \"expected_start_idx\": 0,\n\t\t\t\t\t  \"expected_end_idx\": 7\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"match_on_unicode_character\",\n\t\t\t\t\t\"content\": \"Sunrise\\nSunrise\\nSunset\\nSunset\\n## Hourly Weather\\n[Next 48 Hours] \\n## Don&#x27;t Miss\\n[\\n![] \\n## A Rare Southern Snowstorm: What You Need To Know\\n] [\\n![] \\n## Multiple Systems To Bring Snow To Great Lakes, Northeast\",\n\t\t\t\t\t\"snippet\": \"Sunset\\nSunset\\n## Hourly Weather\\n[Next 48 Hours] \\n## Don't Miss\\n[\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 16,\n\t\t\t\t\t\t\"expected_end_idx\": 84\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"complex_normalisation\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"* [Press Releases] \",\n\t\t\t\t\t\t\"* [Careers] \",\n\t\t\t\t\t\t\"* [Solutions] \",\n\t\t\t\t\t\t\"* * [WeatherBug] \",\n\t\t\t\t\t\t\"* [Consumer] \",\n\t\t\t\t\t\t\"* [Corporate] \",\n\t\t\t\t\t\t\"* [WeatherBug] \",\n\t\t\t\t\t\t\"* [Consumer] \",\n\t\t\t\t\t\t\"* [Corporate] \",\n\t\t\t\t\t\t\"[![]![]] [![Responsive menu icon]] \",\n\t\t\t\t\t\t\"* [Now] \",\n\t\t\t\t\t\t\"* [Hourly] \",\n\t\t\t\t\t\t\"* [10 Day] \",\n\t\t\t\t\t\t\"* * # Today&#x27;s Weather - Sydney, AUS\",\n\t\t\t\t\t\t\"December 8, 2025\",\n\t\t\t\t\t\t\"1:01 AM\",\n\t\t\t\t\t\t\"SYDNEY INTL AIRP\",\n\t\t\t\t\t\t\"68&#xB0;\",\n\t\t\t\t\t\t\"Feels Like68&#xB0;\",\n\t\t\t\t\t\t\"Hi--Lo65&#xB0;F\",\n\t\t\t\t\t\t\"![30% Chance of Light Rain] \",\n\t\t\t\t\t\t\"30% Chance of Light Rain\",\n\t\t\t\t\t\t\"* [\",\n\t\t\t\t\t\t\"Live Radar\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"* * Weather Details\",\n\t\t\t\t\t\t\"* Windchill68&#xBA;FDaily Rain0&quot;\",\n\t\t\t\t\t\t\"* Dewpoint54&#xBA;FMonthly Rain--\",\n\t\t\t\t\t\t\"* Humidity60%Avg. WindENE 9mph\",\n\t\t\t\t\t\t\"* Pressure29.97&quot;Wind Gust9mph\",\n\t\t\t\t\t\t\"* Sunrise5:37 AMMoonWaning Gibbous\",\n\t\t\t\t\t\t\"* \"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"[![]![]] [![Responsive menu icon]] \",\n\t\t\t\t\t\t\"* [Now] \",\n\t\t\t\t\t\t\"* [Hourly] \",\n\t\t\t\t\t\t\"* [10 Day] \",\n\t\t\t\t\t\t\"* * # Today's Weather - Sydney, AUS\",\n\t\t\t\t\t\t\"December 8, 2025\",\n\t\t\t\t\t\t\"1:01 AM\",\n\t\t\t\t\t\t\"SYDNEY INTL AIRP\",\n\t\t\t\t\t\t\"68°\",\n\t\t\t\t\t\t\"Feels Like68°\",\n\t\t\t\t\t\t\"Hi--Lo65°F\",\n\t\t\t\t\t\t\"![30% Chance of Light Rain]\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 140,\n\t\t\t\t\t\t\"expected_end_idx\": 362\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"bad_ampersand\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"duals focus only on the price returns of the index, dividends play an important role in overall investment returns.\",\n\t\t\t\t\t\t\"# S&amp;P 500 Total Returns by Year\",\n\t\t\t\t\t\t\"|Year|Total Return|\",\n\t\t\t\t\t\t\"2026|1.23%|\",\n\t\t\t\t\t\t\"2025|17.88%|\",\n\t\t\t\t\t\t\"2024|25.02%|\",\n\t\t\t\t\t\t\"2023|26.29%|\",\n\t\t\t\t\t\t\"2022|-18.11%\",\n\t\t\t\t\t\t\"2021|28.71%|\",\n\t\t\t\t\t\t\"2020|18.40%|\",\n\t\t\t\t\t\t\"2019|31.49%|\",\n\t\t\t\t\t\t\"2018|-4.38%|\",\n\t\t\t\t\t\t\"2017|21.83%|\",\n\t\t\t\t\t\t\"2016|11.96%|\",\n\t\t\t\t\t\t\"2015|1.38%|\",\n\t\t\t\t\t\t\"2014|13.69%|\",\n\t\t\t\t\t\t\"2013|32.39%|\",\n\t\t\t\t\t\t\"2012|16.00%|\",\n\t\t\t\t\t\t\"2011|2.11%|\",\n\t\t\t\t\t\t\"2010|15.06%|\",\n\t\t\t\t\t\t\"2009|26.46%|\",\n\t\t\t\t\t\t\"2008|-37.00%\",\n\t\t\t\t\t\t\"2007|5.49%|\",\n\t\t\t\t\t\t\"2006|15.79%|\",\n\t\t\t\t\t\t\"2005|4.91%|\",\n\t\t\t\t\t\t\"2004|10.88%|\",\n\t\t\t\t\t\t\"2003|28.68%|\",\n\t\t\t\t\t\t\"2002|-22.10%\",\n\t\t\t\t\t\t\"2001|-11.89%\",\n\t\t\t\t\t\t\"2000|-9.10%|\",\n\t\t\t\t\t\t\"1999|21.04%|\",\n\t\t\t\t\t\t\"1998|28.58%|\",\n\t\t\t\t\t\t\"1997|33.36%|\",\n\t\t\t\t\t\t\"1996|22.96%|\",\n\t\t\t\t\t\t\"1995|37.58%|\",\n\t\t\t\t\t\t\"1994|1.32%|\",\n\t\t\t\t\t\t\"1993|10.08%|\",\n\t\t\t\t\t\t\"1992|7.62%|\",\n\t\t\t\t\t\t\"1991|30.\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"# S&P 500 Total Returns by Year\",\n\t\t\t\t\t\t\"|Year|Total Return|\",\n\t\t\t\t\t\t\"2026|1.23%|\",\n\t\t\t\t\t\t\"2025|17.88%|\",\n\t\t\t\t\t\t\"2024|25.02%|\",\n\t\t\t\t\t\t\"2023|26.29%|\",\n\t\t\t\t\t\t\"2022|-18.11%|\",\n\t\t\t\t\t\t\"2021|28.71%|\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 116,\n\t\t\t\t\t\t\"expected_end_idx\": 247\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"more_random_code_entities\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"* [See more FAQs] \",\n\t\t\t\t\t\t\"* [Find routing and account numbers] \",\n\t\t\t\t\t\t\"## Popular FAQs\",\n\t\t\t\t\t\t\"* [How do I find my routing and account numbers?] \",\n\t\t\t\t\t\t\"* [Is there a fee for Zelle®?] \",\n\t\t\t\t\t\t\"* [How do I report suspected fraud?] \",\n\t\t\t\t\t\t\"* [See more FAQs] \",\n\t\t\t\t\t\t\"# Current mortgage and refinance rates\",\n\t\t\t\t\t\t\"&zwj;\",\n\t\t\t\t\t\t\"## Mortgage interest rates today\",\n\t\t\t\t\t\t\"&#160;\",\n\t\t\t\t\t\t\"![] \",\n\t\t\t\t\t\t\"## Get a customized rate and payment\",\n\t\t\t\t\t\t\"See how much you could qualify to borrow and what your estimated rate and payment would be. It takes just a few minutes and won’t affect your credit score.\",\n\t\t\t\t\t\t\"[Estimate your rate] \",\n\t\t\t\t\t\t\"&zwj;\",\n\t\t\t\t\t\t\"## Common question\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"* [See more FAQs] \",\n\t\t\t\t\t\t\"# Current mortgage and refinance rates\",\n\t\t\t\t\t\t\"‍\",\n\t\t\t\t\t\t\"## Mortgage interest rates today\",\n\t\t\t\t\t\t\" \",\n\t\t\t\t\t\t\"![] \",\n\t\t\t\t\t\t\"## Get a customized rate and payment\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 194,\n\t\t\t\t\t\t\"expected_end_idx\": 338\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": false\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"category\": \"token_matching\",\n\t\t\t\"tests\": [\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"match_on_different_numbers\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"°C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"28Sun.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"21°13 °C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"29Mon.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"20°12 °C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"30Tue.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"20°13 °C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"### SeptemberWeather Overview\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"| | |\",\n\t\t\t\t\t\t\"| --- | --- |\",\n\t\t\t\t\t\t\"| Sunny | 13 |\",\n\t\t\t\t\t\t\"| Cloudy | 3 |\",\n\t\t\t\t\t\t\"| Rainy | 13 |\",\n\t\t\t\t\t\t\"| Snowy | 0 |\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"#### No. of days:\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Sunny\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"13\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Cloudy\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"3\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Rainy\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"13\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Snowy\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"0\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"The monthly weather averages in Sydney consist of 13 sunny days, 3 cloudy days, 13 rainy days, and 0 snowy days.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"## Sydney's Locations Weather Conditions\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Locations\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Temp\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Condition\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Humi.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"UV\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Wind Speed & Direction\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"[Cook And Phillip Sydney East] \",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"13 °C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Moderate rain\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"100%\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"0\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"27.7 kmph /  NW\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"[Day Street] \",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"13 °C\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Moderate rain\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"100%\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"0\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"22.3 kmph /  NW\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"[Earlwood Sydney East\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"The monthly weather averages in Sydney consist of 9 sunny days, 1 cloudy days, 20 rainy days, and 0 snowy days.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"## Sydney - Weather Conditions australia\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Locations\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Temp\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Condition\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Humi.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"UV\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Wind Speed & Direction\",\n\t\t\t\t\t\t\"\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 235,\n\t\t\t\t\t\t\"expected_end_idx\": 448\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"token_match_boundary_but_different_length\",\n\t\t\t\t\t\"name\": \"critical_negation_difference\",\n\t\t\t\t\t\"content\": \"The FDA has approved the new vaccine for emergency use in adults over 18 years old.\",\n\t\t\t\t\t\"snippet\": \"The FDA has not approved the new vaccine for emergency use in adults over 18 years old.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": true,\n\t\t\t\t\t\t\"expected_start_idx\": 0,\n\t\t\t\t\t\t\"expected_end_idx\": 82\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"category\": \"no_match\",\n\t\t\t\"tests\": [\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"chicago_difference\",\n\t\t\t\t\t\"snippet\": \"In Chicago, how much snow falls each winter can vary dramatically. According to the National Weather Service, seasonal snowfall totals from 1900 to today have ranged from a low of **9.8 inches (1920–1921)** to a high of **89.7 inches\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"While Chicago’s known as the [Windy City], that lake breeze isn’t the only weather phenomenon that might take your breath away when you’re visiting our city. Does it snow in Chicago? Indeed, it does! Although the first measurable snowfall comes later than usual in some years, it definitely does snow in Chicago. If you’re visiting in the winter, take a moment to brush up on when it will snow in Chicago and how much snow will fall each month.\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"[Upcoming Events] \",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"[Plan Your Visit] \",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"## When Will it Snow in Chicago?\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"If you live here, you already know that in Chicago, how much snow falls each month can vary widely from year to year! Sometimes the first snow arrives in October, while other years see hardly a flurry through December. So how do meteorologists answer the question of when does it snow in Chicago? Generally, they predict snowfall off and on from November through March, with April getting the occasional dusting. Whenever it does snow in Chicago, be sure to take advantage of the Chicago snow on the area’s many great sledding hills!\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"## Chicago Snow Totals by Year\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"In Chicago, how much snow falls now compared to previous years? With records going back to 1900, the National Weather Service has tracked how much snow falls in Chicago. Seasonally in the years from 1900-present, the average inches of snow in Chicago have annually ranged from 9.8 inches in 1920 – 1921 to 89.7 inches in 1978 – 1979. That’s quite a range of inches of snow in Chicago! Winter 2020 – 2021 got a total of 48.8, with the year prior getting quite a bit less, just 34.8 inches. Take a look at the annual inches of snow in Chicago over the last decade:\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"- 2021 – 2022—32.8 in\",\n\t\t\t\t\t\t\"- 2020 – 2021—48.8 in\",\n\t\t\t\t\t\t\"- 2019 – 2020—34.8 in\",\n\t\t\t\t\t\t\"- 2018 – 2019—49.5 in\",\n\t\t\t\t\t\t\"- 2017 – 2018—36. in\",\n\t\t\t\t\t\t\"- 2016 – 2017—26.1 in\",\n\t\t\t\t\t\t\"- 2015 – 2016—31.2 in\",\n\t\t\t\t\t\t\"- 2014 – 2015—50.7 in\",\n\t\t\t\t\t\t\"- 2013 – 2014—82.0 in\",\n\t\t\t\t\t\t\"- 2012 – 2013—30.1 in\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Here are the winter monthly snowfall ranges for winter 2020-2021:\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"- November—0.7 inches of snowfall (normal = 1.8 inches)\",\n\t\t\t\t\t\t\"- December—2.8 inches of snowfall (normal = 7.6 inches)\",\n\t\t\t\t\t\t\"- January—21.9 inches of snowfall (normal = 11.3 inches)\",\n\t\t\t\t\t\t\"- February—21.6 inches of snowfall (normal = 10.7 inches)\",\n\t\t\t\t\t\t\"- March—1.8 inches of snowfall (normal = 5.5 inches)\",\n\t\t\t\t\t\t\"- April—none (normal = 1.3 inches)\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"## Enjoy Winter Weather in Chicago!\",\n\t\t\t\t\t\t\"\",\n\t\t\t\t\t\t\"Now that you know about the typical Chicago snow totals and when it will snow in Chicago, you can pack your parka and boots and prepare to witness the magic of Chicagoland in wintertime! Schedule a [visit to the Skydeck] this winter to see Chicago blanketed in snow from the [most unique vantage point] available. We’re here to help you plan your visit—contact us for details about [dining on The Ledge], [Skydeck engagement parties], and other special event options!\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"sport-game-change\",\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"## [MiamiHeatvsBostonCeltics] \",\n\t\t\t\t\t\t\"LIVE[Dec 19, 2025] ·7:00 PM EST·TD GardenBoston, MA\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"10:49 [91-98] [MIA] MISS S. Fontecchio 8' driving Layup\",\n\t\t\t\t\t\t\"10:48 [91-98] [BOS] H. González REBOUND (Off:1 Def:5)\"\n\t\t\t\t\t],\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"Spoiler-Free NBA Scores, Recaps & Game Ratings\",\n\t\t\t\t\t\t\"[Wikihoops] \",\n\t\t\t\t\t\t\"# Spoiler-Free NBA Scores, Recaps & Game Ratings\",\n\t\t\t\t\t\t\"[Open Menu] [Close menu] \",\n\t\t\t\t\t\t\"[![Logo]] \",\n\t\t\t\t\t\t\"[] [] \",\n\t\t\t\t\t\t\"## NBA ·2025-26 Regular Season\",\n\t\t\t\t\t\t\"### 5 Games on Thursday[December 4, 2025] \",\n\t\t\t\t\t\t\"## [BostonCelticsvsWashingtonWizards] \",\n\t\t\t\t\t\t\"FINAL[Dec 4, 2025] ·7:00 PM EST·Capital One ArenaWashington, DC\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"No play-by-play available\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"1/10\",\n\t\t\t\t\t\t\"Rating-99%[Vote up] 1[Vote down] 10\",\n\t\t\t\t\t\t\"* [Show Ratings] \",\n\t\t\t\t\t\t\"* [Add bookmark] [Edit bookmark] \",\n\t\t\t\t\t\t\"* [Save game] [Remove from saved games] \",\n\t\t\t\t\t\t\"## [Golden StateWarriorsvsPhiladelphiaSixers] \",\n\t\t\t\t\t\t\"FINAL[Dec 4, 2025] ·7:00 PM EST·Xfinity Mobile ArenaPhiladelphia, PANBA TV\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"No play-by-play available\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"8/10\",\n\t\t\t\t\t\t\"Rating2493%[Vote up] 26[Vote down] 2\",\n\t\t\t\t\t\t\"* [Show Ratings] \",\n\t\t\t\t\t\t\"* [Add bookmark] [Edit bookmark] \",\n\t\t\t\t\t\t\"* [Save game] [Remove from saved games] \",\n\t\t\t\t\t\t\"## [Los AngelesLakersvsTorontoRaptors] \",\n\t\t\t\t\t\t\"FINAL[Dec 4, 2025] ·7:30 PM EST·Scotiabank ArenaToronto, ON\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"No play-by-play available\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"9/10\",\n\t\t\t\t\t\t\"Rating2894%[Vote up] 30[Vote down] 2\",\n\t\t\t\t\t\t\"* [Show Ratings] \",\n\t\t\t\t\t\t\"* [Add bookmark] [Edit bookmark] \",\n\t\t\t\t\t\t\"* [Save game] [Remove from saved games] \",\n\t\t\t\t\t\t\"## [UtahJazzvsBrooklynNets] \",\n\t\t\t\t\t\t\"FINAL[Dec 4, 2025] ·7:30 PM EST·Barclays CenterBrooklyn, NY\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"No play-by-play available\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"4/10\",\n\t\t\t\t\t\t\"Rating-333%[Vote up] 3[Vote down] 6\",\n\t\t\t\t\t\t\"* [Show Ratings] \",\n\t\t\t\t\t\t\"* [Add bookmark] [Edit bookmark] \",\n\t\t\t\t\t\t\"* [Save game] [Remove from saved games] \",\n\t\t\t\t\t\t\"## [MinnesotaWolvesvsNew OrleansPelicans] \",\n\t\t\t\t\t\t\"FINAL[Dec 4, 2025] ·8:00 PM EST·Smoothie King CenterNew Orleans, LA\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"No play-by-play available\",\n\t\t\t\t\t\t\"```\",\n\t\t\t\t\t\t\"6/10\",\n\t\t\t\t\t\t\"Rating157%[Vote up] 4[Vote down] 3\",\n\t\t\t\t\t\t\"* [Show Ratings] \",\n\t\t\t\t\t\t\"* [Add bookmark] [Edit bookmark] \",\n\t\t\t\t\t\t\"* [Save game] [Remove from saved games] \",\n\t\t\t\t\t\t\"### Homeis where the![W] is\",\n\t\t\t\t\t\t\"Add the Wikihoops web app to your home screen. No app store download required.\",\n\t\t\t\t\t\t\"Install Web AppHow to install Progressive Web Apps (PWAs)\",\n\t\t\t\t\t\t\"* **On Android**, Firefox, Chrome, Edge, Opera, and Samsung Internet Browser all support installing PWAs\",\n\t\t\t\t\t\t\"* **On iOS**, PWAs may or[may not be supported] \",\n\t\t\t\t\t\t\"* Chrome and Edge support installing PWAs on**Linux, Windows, macOS, and Chromebooks**\",\n\t\t\t\t\t\t\"* **[Google Chrome Help on PWAs] **\",\n\t\t\t\t\t\t\"* **[Use Web Apps with Firefox for Android] **\",\n\t\t\t\t\t\t\"### Spoiler Alert\",\n\t\t\t\t\t\t\"Close\",\n\t\t\t\t\t\t\"This action may reveal spoilers such as scores and season records.\",\n\t\t\t\t\t\t\"ContinueCancel\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"name\": \"facebook-premier-league\",\n\t\t\t\t\t\"snippet\": \"Meta © 2025\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"Fabrizio Romano - 🥹🇵🇹Cristiano's 950 career goals in...\",\n\t\t\t\t\t\t\"**\",\n\t\t\t\t\t\t\"Facebook\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"Log In\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"## Fabrizio Romano&#x27;s Post\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"### [**Fabrizio Romano**] Verified account\",\n\t\t\t\t\t\t\"[October 25, 2025] ·Shared with Public\",\n\t\t\t\t\t\t\"![🥹]![🇵🇹] Cristiano's 950 career goals in details…\",\n\t\t\t\t\t\t\"Real Madrid –450 goals\",\n\t\t\t\t\t\t\"Manchester United –145 goals\",\n\t\t\t\t\t\t\"Portugal –143 goals\",\n\t\t\t\t\t\t\"Al-Nassr –106 goals\",\n\t\t\t\t\t\t\"Juventus –101 goals\",\n\t\t\t\t\t\t\"Sporting CP –5 goals\",\n\t\t\t\t\t\t\"Greatest![🐐] \",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"![May be an image of soccer, football and text that says &#x27;NUNASSK ማGEAB KAFD FD κAF&#x27;] \",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"All reactions:\",\n\t\t\t\t\t\t\"91K\",\n\t\t\t\t\t\t\"1.7K comments\",\n\t\t\t\t\t\t\"494 shares\",\n\t\t\t\t\t\t\"**\",\n\t\t\t\t\t\t\"Like\",\n\t\t\t\t\t\t\"**\",\n\t\t\t\t\t\t\"Comment\",\n\t\t\t\t\t\t\"Most relevant\",\n\t\t\t\t\t\t\"**\",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"Debajyoti Nag\",\n\t\t\t\t\t\t\"I noticed one thing that Fabrizio never added a![🐐] emonji when telling about Ronaldo and messi . But today he just written &quot;Greatest![🐐] &quot; . He knows who&#x27;s the goat![🐐] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"50\",\n\t\t\t\t\t\t\"View all 6 replies\",\n\t\t\t\t\t\t\"Edel Queen\",\n\t\t\t\t\t\t\"Bring his assists as well you calculating his goals too, Man is with over 300+ assists\",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"9\",\n\t\t\t\t\t\t\"View all 6 replies\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"[Gunna Thèé Gēé] \",\n\t\t\t\t\t\t\"\\u201cIf Chelsea had won I would have won some money, But I'm happy they lost. Because\",\n\t\t\t\t\t\t\"money can&#x27;t buy happiness\",\n\t\t\t\t\t\t\"![😜]![😜]![😜] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"29\",\n\t\t\t\t\t\t\"View all 2 replies\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"[Shafi Mazid] \",\n\t\t\t\t\t\t\"Higher than the highest Sky Scrapper. Beyond the debate.![🦿] machine![🦾] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"6\",\n\t\t\t\t\t\t\"View 1 reply\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"[Sicelo Thabethe] \",\n\t\t\t\t\t\t\"Road to 200 penalties![🙌🏿]![🙌🏿] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"3\",\n\t\t\t\t\t\t\"View all 9 replies\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"[Samtex Ventures] \",\n\t\t\t\t\t\t\"Chelsea ordered Garnacho while United gave them Gachagua![🤣]![🤣] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"17\",\n\t\t\t\t\t\t\"View all 3 replies\",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"[Anawo Destiny] \",\n\t\t\t\t\t\t\"Spanish league conquered\",\n\t\t\t\t\t\t\"EPL conquered\",\n\t\t\t\t\t\t\"Own country conquered\",\n\t\t\t\t\t\t\"Camels league conquered\",\n\t\t\t\t\t\t\"Seria A conquered\",\n\t\t\t\t\t\t\"Portugal's league. Started from there!!\",\n\t\t\t\t\t\t\"6 leagues felt the greatness of the football goat.\",\n\t\t\t\t\t\t\"But messi went to ligue 1 and was chased out![😂]![😂] \",\n\t\t\t\t\t\t\"Went to Hollywood fashion league and has been fighting Tyler Perry and Jason statam on the pitch and was still beating![😂]![😂] \",\n\t\t\t\t\t\t\"Know your goat![🐐] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"3\",\n\t\t\t\t\t\t\"View 1 reply\",\n\t\t\t\t\t\t\"![] Top fan\",\n\t\t\t\t\t\t\"Sïrr Ñicølas\",\n\t\t\t\t\t\t\"Maturing is realizing tht Ronaldo should have had 990 goals if it wasn&#x27;t because of wht happened when he was 37![😭]![😭] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"[Ziya&#x27;ulhaq Sa&#x27;adan El-dia] \",\n\t\t\t\t\t\t\"Did Fabrizio say greatest? He also acknowledges the![🐐] \",\n\t\t\t\t\t\t\"* [11w] \",\n\t\t\t\t\t\t\"[\",\n\t\t\t\t\t\t\"] \",\n\t\t\t\t\t\t\"[Jona Lalremsanga] \",\n\t\t\t\t\t\t\"Now, People who never score Ronaldo&#x27;s Goals for sporting CP (5 goals) will comment and Judge him![🤣] \",\n\t\t\t\t\t\t\"* [11w]\",\n\t\t\t\t\t\t\"https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork/PST045224\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar sentence structure but key words substituted\",\n\t\t\t\t\t\"name\": \"word_substitution_boundary\",\n\t\t\t\t\t\"content\": \"The quick brown fox jumps over the lazy dog in the sunny meadow today.\",\n\t\t\t\t\t\"snippet\": \"The slow grey wolf leaps over the tired cat in the rainy forest today.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Same words but completely different order\",\n\t\t\t\t\t\"name\": \"word_order_scrambled\",\n\t\t\t\t\t\"content\": \"Machine learning models require large datasets for training and validation.\",\n\t\t\t\t\t\"snippet\": \"Training and validation require large models for machine learning datasets.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar numbers in statistics context but different subject\",\n\t\t\t\t\t\"name\": \"similar_statistics_different_values\",\n\t\t\t\t\t\"content\": \"In 2024, the corporation disclosed earnings of $45.2 million with 12,500 staff across 28 regions.\",\n\t\t\t\t\t\"snippet\": \"In 2019, the startup announced losses of $8.7 billion with 350 contractors across 3 cities.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar product names but different\",\n\t\t\t\t\t\"name\": \"similar_product_names\",\n\t\t\t\t\t\"content\": \"The new iPhone 15 Pro Max features an A17 Bionic chip with enhanced neural engine.\",\n\t\t\t\t\t\"snippet\": \"The new Galaxy S24 Ultra features a Snapdragon 8 Gen 3 chip with enhanced AI engine.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Overlapping topic but different phrasing\",\n\t\t\t\t\t\"name\": \"overlapping_phrases_different_context\",\n\t\t\t\t\t\"content\": \"Meteorologists predict heavy rain will drench northern areas beginning at dawn tomorrow.\",\n\t\t\t\t\t\"snippet\": \"The traffic report indicates severe congestion on southern highways clearing by midnight tonight.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar location-based content\",\n\t\t\t\t\t\"name\": \"similar_locations_different_details\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"## Sydney Weather Report\",\n\t\t\t\t\t\t\"Temperature: 28°C\",\n\t\t\t\t\t\t\"Humidity: 65%\",\n\t\t\t\t\t\t\"Wind: NE 15 km/h\",\n\t\t\t\t\t\t\"Conditions: Partly cloudy with afternoon thunderstorms expected\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"## Melbourne Weather Report\",\n\t\t\t\t\t\t\"Temperature: 22°C\",\n\t\t\t\t\t\t\"Humidity: 78%\",\n\t\t\t\t\t\t\"Wind: SW 20 km/h\",\n\t\t\t\t\t\t\"Conditions: Mostly overcast with morning showers expected\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar topic with different wording and entities\",\n\t\t\t\t\t\"name\": \"antonym_substitution\",\n\t\t\t\t\t\"content\": \"Wall Street rallied sharply as traders demonstrated renewed optimism about semiconductor companies.\",\n\t\t\t\t\t\"snippet\": \"Bond markets tumbled dramatically while analysts expressed growing pessimism regarding retail corporations.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Similar list items but different specifics\",\n\t\t\t\t\t\"name\": \"similar_list_different_items\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"Top 5 Programming Languages in 2025:\",\n\t\t\t\t\t\t\"1. Python - 28.5%\",\n\t\t\t\t\t\t\"2. JavaScript - 22.1%\",\n\t\t\t\t\t\t\"3. Java - 15.3%\",\n\t\t\t\t\t\t\"4. TypeScript - 12.8%\",\n\t\t\t\t\t\t\"5. C++ - 9.4%\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"Top 5 Programming Languages in 2024:\",\n\t\t\t\t\t\t\"1. JavaScript - 31.2%\",\n\t\t\t\t\t\t\"2. Python - 24.7%\",\n\t\t\t\t\t\t\"3. TypeScript - 18.6%\",\n\t\t\t\t\t\t\"4. Java - 11.2%\",\n\t\t\t\t\t\t\"5. Rust - 7.8%\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Same template different entities\",\n\t\t\t\t\t\"name\": \"same_template_different_entities\",\n\t\t\t\t\t\"content\": \"John Smith, CEO of TechCorp Inc., announced the acquisition of DataFlow Systems for $2.3 billion.\",\n\t\t\t\t\t\"snippet\": \"Jane Doe, CFO of InnovateCo Ltd., announced the merger with CloudSync Solutions for $1.8 billion.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Sports score with similar format but different teams and scores\",\n\t\t\t\t\t\"name\": \"similar_sports_scores\",\n\t\t\t\t\t\"content\": [\n\t\t\t\t\t\t\"NBA Finals Game 5 Results:\",\n\t\t\t\t\t\t\"Los Angeles Lakers 112 - Boston Celtics 108\",\n\t\t\t\t\t\t\"Top Scorer: LeBron James (34 pts)\",\n\t\t\t\t\t\t\"MVP: Anthony Davis\"\n\t\t\t\t\t],\n\t\t\t\t\t\"snippet\": [\n\t\t\t\t\t\t\"NBA Finals Game 7 Results:\",\n\t\t\t\t\t\t\"Golden State Warriors 118 - Miami Heat 115\",\n\t\t\t\t\t\t\"Top Scorer: Stephen Curry (42 pts)\",\n\t\t\t\t\t\t\"MVP: Draymond Green\"\n\t\t\t\t\t],\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"_comment\": \"Token matcher boundary: Completely different recipe with similar structure\",\n\t\t\t\t\t\"name\": \"similar_recipe_different_ingredients\",\n\t\t\t\t\t\"content\": \"Combine 2 cups flour, 1 cup sugar, 3 eggs, and 1/2 cup butter in a bowl. Bake at 350°F for 25 minutes.\",\n\t\t\t\t\t\"snippet\": \"Blend 4 bananas, 2 avocados, 1 cup honey, and a splash of almond milk until smooth. Chill for 2 hours.\",\n\t\t\t\t\t\"expected_result\": {\n\t\t\t\t\t\t\"snippet_located\": false\n\t\t\t\t\t},\n\t\t\t\t\t\"allow_buffer\": true\n\t\t\t\t}\n\t\t\t]\n\t\t}\n\t]\n}"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/open_url/test_onyx_web_crawler.py",
    "content": "from __future__ import annotations\n\nimport time\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\nfrom pydantic import BaseModel\n\nimport onyx.tools.tool_implementations.open_url.onyx_web_crawler as crawler_module\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_CONNECT_TIMEOUT_SECONDS,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import (\n    DEFAULT_READ_TIMEOUT_SECONDS,\n)\nfrom onyx.tools.tool_implementations.open_url.onyx_web_crawler import OnyxWebCrawler\n\n\nclass FakeResponse(BaseModel):\n    status_code: int\n    headers: dict[str, str]\n    content: bytes\n    text: str = \"\"\n    apparent_encoding: str | None = None\n    encoding: str | None = None\n\n\ndef test_fetch_url_pdf_with_content_type(monkeypatch: pytest.MonkeyPatch) -> None:\n    crawler = OnyxWebCrawler()\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"application/pdf\"},\n        content=b\"%PDF-1.4 mock\",\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n    monkeypatch.setattr(\n        crawler_module,\n        \"extract_pdf_text\",\n        lambda *args, **kwargs: (\"pdf text\", {\"Title\": \"Doc Title\"}),  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/report.pdf\")\n\n    assert result.full_content == \"pdf text\"\n    assert result.title == \"Doc Title\"\n    assert result.scrape_successful is True\n\n\ndef test_fetch_url_pdf_with_signature(monkeypatch: pytest.MonkeyPatch) -> None:\n    crawler = OnyxWebCrawler()\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"application/octet-stream\"},\n        content=b\"%PDF-1.7 mock\",\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n    monkeypatch.setattr(\n        crawler_module,\n        \"extract_pdf_text\",\n        lambda *args, **kwargs: (\"pdf text\", {}),  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/files/file.pdf\")\n\n    assert result.full_content == \"pdf text\"\n    assert result.title == \"file.pdf\"\n    assert result.scrape_successful is True\n\n\ndef test_fetch_url_decodes_html_bytes(monkeypatch: pytest.MonkeyPatch) -> None:\n    crawler = OnyxWebCrawler()\n    html_bytes = b\"<html><body>caf\\xe9</body></html>\"\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"text/html; charset=iso-8859-1\"},\n        content=html_bytes,\n        text=\"caf\\u00ef\\u00bf\\u00bd\",\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/page.html\")\n\n    assert \"caf\\u00e9\" in result.full_content\n    assert result.scrape_successful is True\n\n\ndef test_fetch_url_pdf_exceeds_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"PDF content exceeding max_pdf_size_bytes should be rejected.\"\"\"\n    crawler = OnyxWebCrawler(max_pdf_size_bytes=100)\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"application/pdf\"},\n        content=b\"%PDF-1.4 \" + b\"x\" * 200,  # 209 bytes, exceeds 100 limit\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/large.pdf\")\n\n    assert result.full_content == \"\"\n    assert result.scrape_successful is False\n    assert result.link == \"https://example.com/large.pdf\"\n\n\ndef test_fetch_url_pdf_within_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"PDF content within max_pdf_size_bytes should be processed normally.\"\"\"\n    crawler = OnyxWebCrawler(max_pdf_size_bytes=500)\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"application/pdf\"},\n        content=b\"%PDF-1.4 mock\",  # Small content\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n    monkeypatch.setattr(\n        crawler_module,\n        \"extract_pdf_text\",\n        lambda *args, **kwargs: (\"pdf text\", {\"Title\": \"Doc Title\"}),  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/small.pdf\")\n\n    assert result.full_content == \"pdf text\"\n    assert result.scrape_successful is True\n\n\ndef test_fetch_url_html_exceeds_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"HTML content exceeding max_html_size_bytes should be rejected.\"\"\"\n    crawler = OnyxWebCrawler(max_html_size_bytes=50)\n    html_bytes = b\"<html><body>\" + b\"x\" * 100 + b\"</body></html>\"  # Exceeds 50 limit\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"text/html\"},\n        content=html_bytes,\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/large.html\")\n\n    assert result.full_content == \"\"\n    assert result.scrape_successful is False\n    assert result.link == \"https://example.com/large.html\"\n\n\ndef test_fetch_url_html_within_size_limit(monkeypatch: pytest.MonkeyPatch) -> None:\n    \"\"\"HTML content within max_html_size_bytes should be processed normally.\"\"\"\n    crawler = OnyxWebCrawler(max_html_size_bytes=500)\n    html_bytes = b\"<html><body>hello world</body></html>\"\n    response = FakeResponse(\n        status_code=200,\n        headers={\"Content-Type\": \"text/html\"},\n        content=html_bytes,\n    )\n\n    monkeypatch.setattr(\n        crawler_module,\n        \"ssrf_safe_get\",\n        lambda *args, **kwargs: response,  # noqa: ARG005\n    )\n\n    result = crawler._fetch_url(\"https://example.com/small.html\")\n\n    assert \"hello world\" in result.full_content\n    assert result.scrape_successful is True\n\n\n# ---------------------------------------------------------------------------\n# Helpers for parallel / failure-isolation / timeout tests\n# ---------------------------------------------------------------------------\n\n\ndef _make_mock_response(\n    *,\n    status_code: int = 200,\n    content: bytes = b\"<html><body>Hello</body></html>\",\n    content_type: str = \"text/html\",\n    delay: float = 0.0,\n) -> MagicMock:\n    \"\"\"Create a mock response that behaves like a requests.Response.\"\"\"\n    resp = MagicMock()\n    resp.status_code = status_code\n    resp.headers = {\"Content-Type\": content_type}\n\n    if delay:\n        original_content = content\n\n        @property  # type: ignore[misc]\n        def _delayed_content(_self: object) -> bytes:\n            time.sleep(delay)\n            return original_content\n\n        type(resp).content = _delayed_content\n    else:\n        resp.content = content\n\n    resp.apparent_encoding = None\n    resp.encoding = None\n\n    return resp\n\n\nclass TestParallelExecution:\n    \"\"\"Verify that contents() fetches URLs in parallel.\"\"\"\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_multiple_urls_fetched_concurrently(self, mock_get: MagicMock) -> None:\n        \"\"\"With a per-URL delay, parallel execution should be much faster than sequential.\"\"\"\n        per_url_delay = 0.3\n        num_urls = 5\n        urls = [f\"http://example.com/page{i}\" for i in range(num_urls)]\n\n        mock_get.return_value = _make_mock_response(delay=per_url_delay)\n\n        crawler = OnyxWebCrawler()\n        start = time.monotonic()\n        results = crawler.contents(urls)\n        elapsed = time.monotonic() - start\n\n        # Sequential would take ~1.5s; parallel should be well under that\n        assert elapsed < per_url_delay * num_urls * 0.7\n        assert len(results) == num_urls\n        assert all(r.scrape_successful for r in results)\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_empty_urls_returns_empty(self, mock_get: MagicMock) -> None:\n        crawler = OnyxWebCrawler()\n        results = crawler.contents([])\n        assert results == []\n        mock_get.assert_not_called()\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_single_url(self, mock_get: MagicMock) -> None:\n        mock_get.return_value = _make_mock_response()\n        crawler = OnyxWebCrawler()\n        results = crawler.contents([\"http://example.com\"])\n        assert len(results) == 1\n        assert results[0].scrape_successful\n\n\nclass TestFailureIsolation:\n    \"\"\"Verify that one URL failure doesn't affect others in the batch.\"\"\"\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_one_failure_doesnt_kill_batch(self, mock_get: MagicMock) -> None:\n        good_resp = _make_mock_response()\n        bad_resp = _make_mock_response(status_code=500)\n\n        # First and third URLs succeed, second fails\n        mock_get.side_effect = [good_resp, bad_resp, good_resp]\n\n        crawler = OnyxWebCrawler()\n        results = crawler.contents([\"http://a.com\", \"http://b.com\", \"http://c.com\"])\n\n        assert len(results) == 3\n        assert results[0].scrape_successful\n        assert not results[1].scrape_successful\n        assert results[2].scrape_successful\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_exception_doesnt_kill_batch(self, mock_get: MagicMock) -> None:\n        good_resp = _make_mock_response()\n\n        # Second URL raises an exception\n        mock_get.side_effect = [\n            good_resp,\n            RuntimeError(\"connection reset\"),\n            _make_mock_response(),\n        ]\n\n        crawler = OnyxWebCrawler()\n        results = crawler.contents([\"http://a.com\", \"http://b.com\", \"http://c.com\"])\n\n        assert len(results) == 3\n        assert results[0].scrape_successful\n        assert not results[1].scrape_successful\n        assert results[2].scrape_successful\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_ssrf_exception_doesnt_kill_batch(self, mock_get: MagicMock) -> None:\n        from onyx.utils.url import SSRFException\n\n        good_resp = _make_mock_response()\n        mock_get.side_effect = [\n            good_resp,\n            SSRFException(\"blocked\"),\n            _make_mock_response(),\n        ]\n\n        crawler = OnyxWebCrawler()\n        results = crawler.contents(\n            [\"http://a.com\", \"http://internal.local\", \"http://c.com\"]\n        )\n\n        assert len(results) == 3\n        assert results[0].scrape_successful\n        assert not results[1].scrape_successful\n        assert results[2].scrape_successful\n\n\nclass TestTupleTimeout:\n    \"\"\"Verify that separate connect and read timeouts are passed correctly.\"\"\"\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_default_tuple_timeout(self, mock_get: MagicMock) -> None:\n        mock_get.return_value = _make_mock_response()\n\n        crawler = OnyxWebCrawler()\n        crawler.contents([\"http://example.com\"])\n\n        call_kwargs = mock_get.call_args\n        assert call_kwargs.kwargs[\"timeout\"] == (\n            DEFAULT_CONNECT_TIMEOUT_SECONDS,\n            DEFAULT_READ_TIMEOUT_SECONDS,\n        )\n\n    @patch(\"onyx.tools.tool_implementations.open_url.onyx_web_crawler.ssrf_safe_get\")\n    def test_custom_tuple_timeout(self, mock_get: MagicMock) -> None:\n        mock_get.return_value = _make_mock_response()\n\n        crawler = OnyxWebCrawler(timeout_seconds=30, connect_timeout_seconds=3)\n        crawler.contents([\"http://example.com\"])\n\n        call_kwargs = mock_get.call_args\n        assert call_kwargs.kwargs[\"timeout\"] == (3, 30)\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/open_url/test_snippet_matcher.py",
    "content": "from __future__ import annotations\n\nimport json\nimport unicodedata  # used to verify NFC expansion test preconditions\nfrom pathlib import Path\n\nimport pytest\nfrom pydantic import BaseModel\nfrom pydantic import field_validator\n\nfrom onyx.tools.tool_implementations.open_url.snippet_matcher import (\n    find_snippet_in_content,\n)\n\n\"\"\"\nWe want to store tests in the json file in the following format:\n{\n    \"categories\": [\n        {\n            \"category\":  \"...\",\n            \"tests\": [\n                {\n                    \"name\": \"...\",\n                    \"content\": \"... or [\"...\", \"...\"] where each item is a new line\",\n                    \"snippet\": \"...\" or [\"...\", \"...\"] where each item is a new line,\n                    \"expected_result\": {\n                        \"snippet_located\": true,\n                        \"expected_start_idx\": 0,\n                        \"expected_end_idx\": 10\n                    },\n                    \"allow_buffer\": false (Optional,  default: true)\n                },\n                ...\n            ]\n        },\n        ...\n    ]\n}\n\"\"\"\n\nTEST_DATA_FILE_PATH = Path(__file__).parent / \"data\" / \"test_snippet_finding_data.json\"\n\n\nclass TestSchemaResult(BaseModel):\n    \"\"\"\n    Expected results from the snippet matcher.\n    \"\"\"\n\n    snippet_located: bool\n\n    # Don't include if snippet_located is False\n    expected_start_idx: int = -1\n    expected_end_idx: int = -1\n\n\nclass TestSchema(BaseModel):\n    \"\"\"\n    A test takes in some content and a snippet.\n\n    Expected result is what we expect the output to be.\n    \"\"\"\n\n    name: str\n    content: str\n    snippet: str\n\n    expected_result: TestSchemaResult\n    allow_buffer: bool = True\n\n    @field_validator(\"content\", \"snippet\", mode=\"before\")\n    @classmethod\n    def convert_list_to_string(cls, v: str | list[str]) -> str:\n        \"\"\"\n        We want to be able to handle strings or list of strings for content and snippet.\n        The client should only see strings though, so we do some parsing here.\n        \"\"\"\n        if isinstance(v, list):\n            return \"\\n\".join(v)\n        return v\n\n\nclass TestCategory(BaseModel):\n    \"\"\"\n    A category of tests.\n    \"\"\"\n\n    category: str\n    tests: list[TestSchema]\n\n\nclass TestDataFile(BaseModel):\n    \"\"\"\n    The root structure of the test data JSON file.\n    \"\"\"\n\n    categories: list[TestCategory]\n\n\ndef load_all_tests() -> list[tuple[str, TestSchema]]:\n    \"\"\"\n    Loads all tests from the JSON file and returns them as a list of tuples.\n\n    Each tuple contains (test_id, test_data) where test_id is \"{category}_{name}\".\n    \"\"\"\n    with open(TEST_DATA_FILE_PATH, \"r\") as file:\n        data = json.load(file)\n\n    # Validate the entire file structure using Pydantic\n    test_data = TestDataFile.model_validate(data)\n\n    # Collect all tests with their category-prefixed names\n    all_tests: list[tuple[str, TestSchema]] = []\n    for category in test_data.categories:\n        for test in category.tests:\n            test_id = f\"{category.category}_{test.name}\"\n            all_tests.append((test_id, test))\n\n    return all_tests\n\n\n# Load tests at module level for parametrization\n_ALL_TESTS = load_all_tests()\n\n\n@pytest.mark.parametrize(\n    \"test_data\",\n    [test for _, test in _ALL_TESTS],\n    ids=[test_id for test_id, _ in _ALL_TESTS],\n)\ndef test_snippet_finding(test_data: TestSchema) -> None:\n    \"\"\"\n    Tests the snippet matching functionality.\n\n    Each test case is defined in the JSON file and named {category}_{name}.\n    \"\"\"\n    result = find_snippet_in_content(test_data.content, test_data.snippet)\n\n    assert (\n        result.snippet_located == test_data.expected_result.snippet_located\n    ), f\"snippet_located mismatch: expected {test_data.expected_result.snippet_located}, got {result.snippet_located}\"\n\n    # If buffer is allowed, we let the start and end indices be within 10 characters of where we expect\n    BUFFER_SIZE = 10 if test_data.allow_buffer else 0\n\n    assert (\n        test_data.expected_result.expected_start_idx - BUFFER_SIZE\n        <= result.start_idx\n        <= test_data.expected_result.expected_start_idx + BUFFER_SIZE\n    ), f\"start_idx mismatch: expected {test_data.expected_result.expected_start_idx}, got {result.start_idx}\"\n    assert (\n        test_data.expected_result.expected_end_idx - BUFFER_SIZE\n        <= result.end_idx\n        <= test_data.expected_result.expected_end_idx + BUFFER_SIZE\n    ), f\"end_idx mismatch: expected {test_data.expected_result.expected_end_idx}, got {result.end_idx}\"\n\n\n# Characters confirmed to expand from 1 → 2 codepoints under NFC\nNFC_EXPANDING_CHARS = [\n    (\"\\u0958\", \"Devanagari letter qa\"),\n    (\"\\u0959\", \"Devanagari letter khha\"),\n    (\"\\u095a\", \"Devanagari letter ghha\"),\n]\n\n\n@pytest.mark.parametrize(\n    \"char,description\",\n    NFC_EXPANDING_CHARS,\n)\ndef test_nfc_expanding_char_snippet_match(char: str, description: str) -> None:\n    \"\"\"Snippet matching should produce valid indices for content\n    containing characters that expand under NFC normalization.\"\"\"\n    nfc = unicodedata.normalize(\"NFC\", char)\n    if len(nfc) <= 1:\n        pytest.skip(f\"{description} does not expand under NFC on this platform\")\n\n    content = f\"before {char} after\"\n    snippet = f\"{char} after\"\n\n    result = find_snippet_in_content(content, snippet)\n\n    assert result.snippet_located, f\"[{description}] Snippet should be found in content\"\n    assert (\n        0 <= result.start_idx < len(content)\n    ), f\"[{description}] start_idx {result.start_idx} out of bounds\"\n    assert (\n        0 <= result.end_idx < len(content)\n    ), f\"[{description}] end_idx {result.end_idx} out of bounds\"\n    assert (\n        result.start_idx <= result.end_idx\n    ), f\"[{description}] start_idx {result.start_idx} > end_idx {result.end_idx}\"\n\n    matched = content[result.start_idx : result.end_idx + 1]\n    matched_nfc = unicodedata.normalize(\"NFC\", matched)\n    snippet_nfc = unicodedata.normalize(\"NFC\", snippet)\n    assert (\n        snippet_nfc in matched_nfc or matched_nfc in snippet_nfc\n    ), f\"[{description}] Matched span '{matched}' does not overlap with expected snippet '{snippet}'\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/open_url/test_url_normalization.py",
    "content": "\"\"\"Unit tests for URL normalization module.\"\"\"\n\nimport pytest\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.tools.tool_implementations.open_url.open_url_tool import _url_lookup_variants\nfrom onyx.tools.tool_implementations.open_url.url_normalization import (\n    _detect_source_type,\n)\nfrom onyx.tools.tool_implementations.open_url.url_normalization import normalize_url\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://docs.google.com/document/d/1ABC123/edit?tab=t.0\",\n            \"https://docs.google.com/document/d/1ABC123\",\n        ),\n        (\n            \"https://docs.google.com/document/d/1ABC123/view\",\n            \"https://docs.google.com/document/d/1ABC123\",\n        ),\n        (\n            \"https://docs.google.com/document/d/1ABC123\",\n            \"https://docs.google.com/document/d/1ABC123\",\n        ),\n        (\n            \"https://drive.google.com/file/d/1ABC123/view?usp=sharing\",\n            \"https://drive.google.com/file/d/1ABC123\",\n        ),\n        (\n            \"https://drive.google.com/open?id=1ABC123\",\n            \"https://drive.google.com/file/d/1ABC123\",\n        ),\n        (\n            \"https://docs.google.com/document/d/1TVE04FYWmyP9j-OJFYcG3tnaLeqBbZ1pauCvmYkNq7c/edit?tab=t.0\",\n            \"https://docs.google.com/document/d/1TVE04FYWmyP9j-OJFYcG3tnaLeqBbZ1pauCvmYkNq7c\",\n        ),\n    ],\n)\ndef test_google_drive_normalization(url: str, expected: str) -> None:\n    \"\"\"Test Google Drive URL normalization.\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.GOOGLE_DRIVE) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://notion.so/Page-1234567890abcdef1234567890abcdef\",\n            \"12345678-90ab-cdef-1234-567890abcdef\",\n        ),\n        (\n            \"https://notion.so/page?p=1234567890abcdef1234567890abcdef\",\n            \"12345678-90ab-cdef-1234-567890abcdef\",\n        ),\n        # Edge case: URL with title prefix but valid UUID\n        (\n            \"https://www.notion.so/My-Page-abc123def456ghi789jkl012mno345pq\",\n            None,  # May not extract correctly if UUID is incomplete\n        ),\n    ],\n)\ndef test_notion_normalization(url: str, expected: str | None) -> None:\n    \"\"\"Test Notion URL normalization (extracts page ID as UUID).\"\"\"\n    result = normalize_url(url, source_type=DocumentSource.NOTION)\n    assert result == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://workspace.slack.com/archives/C1234567890/p1234567890123456\",\n            \"C1234567890__1234567890.123456\",\n        ),\n        (\n            \"https://workspace.slack.com/archives/C1234567890/p1234567890123456?thread_ts=1234567890.123456\",\n            \"C1234567890__1234567890.123456\",\n        ),\n    ],\n)\ndef test_slack_normalization(url: str, expected: str) -> None:\n    \"\"\"Test Slack URL normalization (extracts channel_id__thread_ts format).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.SLACK) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/12345?query=param#section\",\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/12345\",\n        ),\n        (\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/12345\",\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/12345\",\n        ),\n    ],\n)\ndef test_confluence_normalization(url: str, expected: str) -> None:\n    \"\"\"Test Confluence URL normalization (uses default normalizer).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.CONFLUENCE) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://example.atlassian.net/jira/browse/PROJ-123?query=param#section\",\n            \"https://example.atlassian.net/jira/browse/PROJ-123\",\n        ),\n        (\n            \"https://example.atlassian.net/jira/software/projects/PROJ/issues/PROJ-123\",\n            \"https://example.atlassian.net/jira/software/projects/PROJ/issues/PROJ-123\",\n        ),\n    ],\n)\ndef test_jira_normalization(url: str, expected: str) -> None:\n    \"\"\"Test Jira URL normalization (uses default normalizer).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.JIRA) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://github.com/owner/repo/blob/main/file.py?query=param#section\",\n            \"https://github.com/owner/repo/blob/main/file.py\",\n        ),\n        (\n            \"https://github.com/owner/repo/blob/main/file.py\",\n            \"https://github.com/owner/repo/blob/main/file.py\",\n        ),\n    ],\n)\ndef test_github_normalization(url: str, expected: str) -> None:\n    \"\"\"Test GitHub URL normalization (uses default normalizer).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.GITHUB) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://gitlab.com/owner/repo/-/blob/main/file.py?query=param#section\",\n            \"https://gitlab.com/owner/repo/-/blob/main/file.py\",\n        ),\n    ],\n)\ndef test_gitlab_normalization(url: str, expected: str) -> None:\n    \"\"\"Test GitLab URL normalization (uses default normalizer).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.GITLAB) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://example.sharepoint.com/sites/Site/Doc.aspx?query=param#section\",\n            \"https://example.sharepoint.com/sites/Site/Doc.aspx\",\n        ),\n    ],\n)\ndef test_sharepoint_normalization(url: str, expected: str) -> None:\n    \"\"\"Test SharePoint URL normalization (uses default normalizer).\"\"\"\n    assert normalize_url(url, source_type=DocumentSource.SHAREPOINT) == expected\n\n\n@pytest.mark.parametrize(\n    \"url,expected_source\",\n    [\n        (\n            \"https://docs.google.com/document/d/1ABC123/edit\",\n            DocumentSource.GOOGLE_DRIVE,\n        ),\n        (\"https://drive.google.com/file/d/123\", DocumentSource.GOOGLE_DRIVE),\n        (\"https://www.notion.so/Page-abc123def456\", DocumentSource.NOTION),\n        (\"https://notion.site/page\", DocumentSource.NOTION),\n        (\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/123\",\n            DocumentSource.CONFLUENCE,\n        ),\n        (\"https://example.atlassian.net/jira/browse/PROJ-123\", DocumentSource.JIRA),\n        (\"https://github.com/owner/repo/blob/main/file.py\", DocumentSource.GITHUB),\n        (\"https://gitlab.com/owner/repo\", DocumentSource.GITLAB),\n        (\"https://example.sharepoint.com/sites/Site\", DocumentSource.SHAREPOINT),\n        (\"https://workspace.slack.com/archives/C123/p456\", DocumentSource.SLACK),\n        (\"https://example.com/doc\", None),  # Unknown source\n    ],\n)\ndef test_detect_source_type(url: str, expected_source: DocumentSource | None) -> None:\n    \"\"\"Test source type detection from URL patterns.\"\"\"\n    assert _detect_source_type(url) == expected_source\n\n\n@pytest.mark.parametrize(\n    \"url,expected_source,expected_normalized\",\n    [\n        (\n            \"https://docs.google.com/document/d/1ABC123/edit\",\n            DocumentSource.GOOGLE_DRIVE,\n            \"https://docs.google.com/document/d/1ABC123\",\n        ),\n        (\n            \"https://www.notion.so/Page-1234567890abcdef1234567890abcdef\",\n            DocumentSource.NOTION,\n            \"12345678-90ab-cdef-1234-567890abcdef\",\n        ),\n        (\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/123\",\n            DocumentSource.CONFLUENCE,\n            \"https://example.atlassian.net/wiki/spaces/SPACE/pages/123\",\n        ),\n    ],\n)\ndef test_normalize_url_with_auto_detection(\n    url: str, expected_source: DocumentSource, expected_normalized: str\n) -> None:\n    \"\"\"Test normalize_url auto-detects source type when source_type not provided.\"\"\"\n    detected = _detect_source_type(url)\n    assert detected == expected_source\n\n    normalized = normalize_url(url)  # No source_type provided\n    assert normalized == expected_normalized\n\n\n@pytest.mark.parametrize(\n    \"url,expected\",\n    [\n        (\n            \"https://example.com/doc?query=param#section\",\n            \"https://example.com/doc\",\n        ),\n        (\n            \"https://example.com/doc/\",\n            \"https://example.com/doc\",\n        ),\n        (\n            \"http://example.com/doc\",\n            \"http://example.com/doc\",  # Default normalizer preserves scheme\n        ),\n    ],\n)\ndef test_default_normalizer(url: str, expected: str) -> None:\n    \"\"\"Test default normalizer for connectors without custom normalizers.\"\"\"\n    # Use a source type that doesn't have a custom normalizer\n    result = normalize_url(url, source_type=DocumentSource.WEB)\n    assert result == expected\n\n\ndef test_normalize_url_returns_none_for_invalid_url() -> None:\n    \"\"\"Test that normalize_url returns None for invalid URLs.\"\"\"\n    assert normalize_url(\"not-a-url\") is None\n    assert normalize_url(\"\") is None\n\n\ndef test_normalize_url_with_unknown_source_type() -> None:\n    \"\"\"Test that normalize_url falls back to default for unknown source types.\"\"\"\n    url = \"https://example.com/doc?query=param\"\n    # Use a source type that doesn't have a custom normalizer\n    result = normalize_url(url, source_type=DocumentSource.WEB)\n    assert result == \"https://example.com/doc\"\n\n\ndef test_url_lookup_variants_includes_trailing_slash_versions() -> None:\n    \"\"\"Test that variants include both with and without trailing slash.\"\"\"\n    variants = _url_lookup_variants(\"https://example.com/path\")\n    assert \"https://example.com/path\" in variants\n    assert \"https://example.com/path/\" in variants\n    assert len(variants) == 2\n\n\ndef test_url_lookup_variants_strips_query_and_fragment() -> None:\n    \"\"\"Test that variants strip query parameters and fragments.\"\"\"\n    variants = _url_lookup_variants(\"https://example.com/path?a=1#section\")\n    assert \"https://example.com/path\" in variants\n    assert \"https://example.com/path/\" in variants\n    # Should not include query/fragment variants\n    assert \"https://example.com/path?a=1\" not in variants\n    assert \"https://example.com/path#section\" not in variants\n\n\ndef test_url_lookup_variants_handles_normalized_urls() -> None:\n    \"\"\"Test that variants work correctly with already-normalized URLs.\"\"\"\n    # Test with a Google Drive URL that's already normalized\n    variants = _url_lookup_variants(\"https://docs.google.com/document/d/abc123def456\")\n    assert \"https://docs.google.com/document/d/abc123def456\" in variants\n    assert \"https://docs.google.com/document/d/abc123def456/\" in variants\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/python/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/python/test_code_interpreter_client.py",
    "content": "\"\"\"Unit tests for CodeInterpreterClient streaming-to-batch fallback.\n\nWhen the streaming endpoint (/v1/execute/stream) returns 404 — e.g. because the\ncode-interpreter service is an older version that doesn't support streaming — the\nclient should transparently fall back to the batch endpoint (/v1/execute) and\nconvert the batch response into the same stream-event interface.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    CodeInterpreterClient,\n)\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import FileInput\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamOutputEvent,\n)\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamResultEvent,\n)\n\n\ndef _make_batch_response(\n    stdout: str = \"\",\n    stderr: str = \"\",\n    exit_code: int = 0,\n    timed_out: bool = False,\n    duration_ms: int = 50,\n) -> MagicMock:\n    \"\"\"Build a mock ``requests.Response`` for the batch /v1/execute endpoint.\"\"\"\n    resp = MagicMock()\n    resp.status_code = 200\n    resp.raise_for_status = MagicMock()\n    resp.json.return_value = {\n        \"stdout\": stdout,\n        \"stderr\": stderr,\n        \"exit_code\": exit_code,\n        \"timed_out\": timed_out,\n        \"duration_ms\": duration_ms,\n        \"files\": [],\n    }\n    return resp\n\n\ndef _make_404_response() -> MagicMock:\n    \"\"\"Build a mock ``requests.Response`` that returns 404 (streaming not found).\"\"\"\n    resp = MagicMock()\n    resp.status_code = 404\n    return resp\n\n\ndef test_execute_streaming_fallback_to_batch_on_404() -> None:\n    \"\"\"When /v1/execute/stream returns 404, the client should fall back to\n    /v1/execute and yield equivalent StreamEvent objects.\"\"\"\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n\n    stream_resp = _make_404_response()\n    batch_resp = _make_batch_response(\n        stdout=\"hello world\\n\",\n        stderr=\"a warning\\n\",\n    )\n\n    urls_called: list[str] = []\n\n    def mock_post(url: str, **_kwargs: object) -> MagicMock:\n        urls_called.append(url)\n        if url.endswith(\"/v1/execute/stream\"):\n            return stream_resp\n        if url.endswith(\"/v1/execute\"):\n            return batch_resp\n        raise AssertionError(f\"Unexpected URL: {url}\")\n\n    with patch.object(client.session, \"post\", side_effect=mock_post):\n        events = list(client.execute_streaming(code=\"print('hello world')\"))\n\n    # Streaming endpoint was attempted first, then batch\n    assert len(urls_called) == 2\n    assert urls_called[0].endswith(\"/v1/execute/stream\")\n    assert urls_called[1].endswith(\"/v1/execute\")\n\n    # The 404 response must be closed before making the batch call\n    stream_resp.close.assert_called_once()\n\n    # _batch_as_stream yields: stdout event, stderr event, result event\n    assert len(events) == 3\n\n    assert isinstance(events[0], StreamOutputEvent)\n    assert events[0].stream == \"stdout\"\n    assert events[0].data == \"hello world\\n\"\n\n    assert isinstance(events[1], StreamOutputEvent)\n    assert events[1].stream == \"stderr\"\n    assert events[1].data == \"a warning\\n\"\n\n    assert isinstance(events[2], StreamResultEvent)\n    assert events[2].exit_code == 0\n    assert not events[2].timed_out\n    assert events[2].duration_ms == 50\n    assert events[2].files == []\n\n\ndef test_execute_streaming_fallback_stdout_only() -> None:\n    \"\"\"Fallback with only stdout (no stderr) should yield two events:\n    one StreamOutputEvent for stdout and one StreamResultEvent.\"\"\"\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n\n    stream_resp = _make_404_response()\n    batch_resp = _make_batch_response(stdout=\"result: 42\\n\")\n\n    def mock_post(url: str, **_kwargs: object) -> MagicMock:\n        if url.endswith(\"/v1/execute/stream\"):\n            return stream_resp\n        if url.endswith(\"/v1/execute\"):\n            return batch_resp\n        raise AssertionError(f\"Unexpected URL: {url}\")\n\n    with patch.object(client.session, \"post\", side_effect=mock_post):\n        events = list(client.execute_streaming(code=\"print(42)\"))\n\n    # No stderr → only stdout + result\n    assert len(events) == 2\n\n    assert isinstance(events[0], StreamOutputEvent)\n    assert events[0].stream == \"stdout\"\n    assert events[0].data == \"result: 42\\n\"\n\n    assert isinstance(events[1], StreamResultEvent)\n    assert events[1].exit_code == 0\n\n\ndef test_execute_streaming_fallback_preserves_files_param() -> None:\n    \"\"\"When falling back, the files parameter must be forwarded to the\n    batch endpoint so staged files are still available for execution.\"\"\"\n\n    client = CodeInterpreterClient(base_url=\"http://fake:9000\")\n\n    stream_resp = _make_404_response()\n    batch_resp = _make_batch_response(stdout=\"ok\\n\")\n\n    captured_payloads: list[dict] = []\n\n    def mock_post(url: str, **kwargs: object) -> MagicMock:\n        if \"json\" in kwargs:\n            captured_payloads.append(kwargs[\"json\"])  # type: ignore[arg-type]\n        if url.endswith(\"/v1/execute/stream\"):\n            return stream_resp\n        if url.endswith(\"/v1/execute\"):\n            return batch_resp\n        raise AssertionError(f\"Unexpected URL: {url}\")\n\n    files_input: list[FileInput] = [{\"path\": \"data.csv\", \"file_id\": \"file-abc123\"}]\n\n    with patch.object(client.session, \"post\", side_effect=mock_post):\n        events = list(\n            client.execute_streaming(\n                code=\"import pandas\",\n                files=files_input,\n            )\n        )\n\n    # Both the streaming attempt and the batch fallback should include files\n    assert len(captured_payloads) == 2\n    for payload in captured_payloads:\n        assert payload[\"files\"] == files_input\n        assert payload[\"code\"] == \"import pandas\"\n\n    # Should still yield valid events\n    assert any(isinstance(e, StreamResultEvent) for e in events)\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/python/test_python_tool_upload_cache.py",
    "content": "\"\"\"Unit tests for PythonTool file-upload caching.\n\nVerifies that PythonTool reuses code-interpreter file IDs across multiple\nrun() calls within the same session instead of re-uploading identical content\non every agent loop iteration.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nfrom onyx.tools.models import ChatFile\nfrom onyx.tools.models import PythonToolOverrideKwargs\nfrom onyx.tools.tool_implementations.python.code_interpreter_client import (\n    StreamResultEvent,\n)\nfrom onyx.tools.tool_implementations.python.python_tool import PythonTool\n\nTOOL_MODULE = \"onyx.tools.tool_implementations.python.python_tool\"\n\n\ndef _make_stream_result() -> StreamResultEvent:\n    return StreamResultEvent(\n        exit_code=0,\n        timed_out=False,\n        duration_ms=10,\n        files=[],\n    )\n\n\ndef _make_tool() -> PythonTool:\n    emitter = MagicMock()\n    return PythonTool(tool_id=1, emitter=emitter)\n\n\ndef _make_override(files: list[ChatFile]) -> PythonToolOverrideKwargs:\n    return PythonToolOverrideKwargs(chat_files=files)\n\n\ndef _run_tool(tool: PythonTool, mock_client: MagicMock, files: list[ChatFile]) -> None:\n    \"\"\"Call tool.run() with a mocked CodeInterpreterClient context manager.\"\"\"\n    from onyx.server.query_and_chat.placement import Placement\n\n    mock_client.execute_streaming.return_value = iter([_make_stream_result()])\n\n    ctx = MagicMock()\n    ctx.__enter__ = MagicMock(return_value=mock_client)\n    ctx.__exit__ = MagicMock(return_value=False)\n\n    placement = Placement(turn_index=0, tab_index=0)\n    override = _make_override(files)\n\n    with patch(f\"{TOOL_MODULE}.CodeInterpreterClient\", return_value=ctx):\n        tool.run(placement=placement, override_kwargs=override, code=\"print('hi')\")\n\n\n# ---------------------------------------------------------------------------\n# Cache hit: same content uploaded in a second call reuses the file_id\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_same_file_uploaded_only_once_across_two_runs() -> None:\n    tool = _make_tool()\n    client = MagicMock()\n    client.upload_file.return_value = \"file-id-abc\"\n\n    pptx_content = b\"fake pptx bytes\"\n    files = [ChatFile(filename=\"report.pptx\", content=pptx_content)]\n\n    _run_tool(tool, client, files)\n    _run_tool(tool, client, files)\n\n    # upload_file should only have been called once across both runs\n    client.upload_file.assert_called_once_with(pptx_content, \"report.pptx\")\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_cached_file_id_is_staged_on_second_run() -> None:\n    tool = _make_tool()\n    client = MagicMock()\n    client.upload_file.return_value = \"file-id-abc\"\n\n    files = [ChatFile(filename=\"data.pptx\", content=b\"content\")]\n\n    _run_tool(tool, client, files)\n\n    # On the second run, execute_streaming should still receive the file\n    client.execute_streaming.return_value = iter([_make_stream_result()])\n    ctx = MagicMock()\n    ctx.__enter__ = MagicMock(return_value=client)\n    ctx.__exit__ = MagicMock(return_value=False)\n\n    from onyx.server.query_and_chat.placement import Placement\n\n    placement = Placement(turn_index=1, tab_index=0)\n    with patch(f\"{TOOL_MODULE}.CodeInterpreterClient\", return_value=ctx):\n        tool.run(\n            placement=placement,\n            override_kwargs=_make_override(files),\n            code=\"print('hi')\",\n        )\n\n    # The second execute_streaming call should include the file\n    _, kwargs = client.execute_streaming.call_args\n    staged_files = kwargs.get(\"files\") or []\n    assert any(f[\"file_id\"] == \"file-id-abc\" for f in staged_files)\n\n\n# ---------------------------------------------------------------------------\n# Cache miss: different content triggers a new upload\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_different_file_content_uploaded_separately() -> None:\n    tool = _make_tool()\n    client = MagicMock()\n    client.upload_file.side_effect = [\"file-id-v1\", \"file-id-v2\"]\n\n    file_v1 = ChatFile(filename=\"report.pptx\", content=b\"version 1\")\n    file_v2 = ChatFile(filename=\"report.pptx\", content=b\"version 2\")\n\n    _run_tool(tool, client, [file_v1])\n    _run_tool(tool, client, [file_v2])\n\n    assert client.upload_file.call_count == 2\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_multiple_distinct_files_each_uploaded_once() -> None:\n    tool = _make_tool()\n    client = MagicMock()\n    client.upload_file.side_effect = [\"id-a\", \"id-b\"]\n\n    files = [\n        ChatFile(filename=\"a.pptx\", content=b\"aaa\"),\n        ChatFile(filename=\"b.xlsx\", content=b\"bbb\"),\n    ]\n\n    _run_tool(tool, client, files)\n    _run_tool(tool, client, files)\n\n    # Two distinct files — each uploaded exactly once\n    assert client.upload_file.call_count == 2\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_same_content_different_filename_uploaded_separately() -> None:\n    # Identical bytes but different names must each get their own upload slot\n    # so both files appear under their respective paths in the workspace.\n    tool = _make_tool()\n    client = MagicMock()\n    client.upload_file.side_effect = [\"id-v1\", \"id-v2\"]\n\n    same_bytes = b\"shared content\"\n    files = [\n        ChatFile(filename=\"report_v1.csv\", content=same_bytes),\n        ChatFile(filename=\"report_v2.csv\", content=same_bytes),\n    ]\n\n    _run_tool(tool, client, files)\n\n    assert client.upload_file.call_count == 2\n\n\n# ---------------------------------------------------------------------------\n# No cross-instance sharing: a fresh PythonTool re-uploads everything\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_new_tool_instance_re_uploads_file() -> None:\n    client = MagicMock()\n    client.upload_file.side_effect = [\"id-session-1\", \"id-session-2\"]\n\n    files = [ChatFile(filename=\"deck.pptx\", content=b\"slide data\")]\n\n    tool_session_1 = _make_tool()\n    _run_tool(tool_session_1, client, files)\n\n    tool_session_2 = _make_tool()\n    _run_tool(tool_session_2, client, files)\n\n    # Different instances — each uploads independently\n    assert client.upload_file.call_count == 2\n\n\n# ---------------------------------------------------------------------------\n# Upload failure: failed upload is not cached, retried next run\n# ---------------------------------------------------------------------------\n\n\n@patch(f\"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL\", \"http://fake:8000\")\ndef test_upload_failure_not_cached() -> None:\n    tool = _make_tool()\n    client = MagicMock()\n    # First call raises, second succeeds\n    client.upload_file.side_effect = [Exception(\"network error\"), \"file-id-ok\"]\n\n    files = [ChatFile(filename=\"slides.pptx\", content=b\"data\")]\n\n    # First run — upload fails, file is skipped but not cached\n    _run_tool(tool, client, files)\n\n    # Second run — should attempt upload again\n    _run_tool(tool, client, files)\n\n    assert client.upload_file.call_count == 2\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/websearch/data/tartan.txt",
    "content": "Three tartans; the left and right are made with the \"modern\" dye palette; the middle is made with \"muted\" colours.Tartans come in a wide variety of colours and patterns.1970s [Missoni] tartan knit jumper (sweater) and skirt set\n\n**Tartan** ( [Scottish Gaelic]: _breacan_[\\[ˈpɾʲɛxkən\\]]), also known as **plaid** ( especialy in American English, is a patterned cloth consisting of crossing horizontal and vertical bands in multiple colours, forming repeating symmetrical patterns known as _setts_. Tartan patterns vary in complexity, from simple two-colour designs to intricate motifs with over twenty hues. Originating in woven wool, tartan is most strongly associated with [Scotland], where it has been used for centuries in traditional clothing such as the [kilt]. Specific tartans are linked to [Scottish clans], families, or regions, with patterns and colours derived historically from local natural dyes (now supplanted by artificial ones). Tartans also serve institutional roles, including [military uniforms] and organisational branding.\n\nTartan became a symbol of [Scottish identity], especially from the 17th century onward, despite a ban under the [Dress Act 1746] lasting about two generations following the [Jacobite rising of 1745]. The 19th-century [Highland Revival] popularized tartan globally, associating it with [Highland dress] and the [Scottish diaspora]. Today, tartan is used worldwide in clothing, accessories, and design, transcending its traditional roots. Modern tartans are registered for organisations, individuals, and commemorative purposes, with thousands of designs in the [Scottish Register of Tartans].\n\nWhile often linked to Scottish heritage, tartans exist in other cultures, such as Africa, East and South Asia, and Eastern Europe. The earliest surviving samples of tartan-style cloth are around 3,000 years old and were discovered in [Xinjiang], China.\n\n## Etymology and terminology\n\n\\[ [edit] \\]\n\nThe [English] and [Scots] word _tartan_ is possibly derived from [French] _tiretaine_ meaning ' [linsey-woolsey] cloth'.[\\[1\\]] [\\[2\\]] [\\[3\\]] Other hypotheses are that it derives from [Scottish Gaelic] _tarsainn_ or _tarsuinn_, meaning 'across' or 'crossing over';[\\[2\\]] [\\[3\\]] or from French _tartarin_ or _tartaryn_ (occurring in 1454 spelled _tartyn_)[\\[4\\]] meaning ' [Tartar cloth] '.[\\[1\\]] It is unrelated to the superficially similar word _[tarlatan] _, which refers to a very open-weave [muslin] similar to [cheesecloth]. _Tartan_ is both a [mass noun] (\"12 metres of tartan\") and a [count noun] (\"12 different tartans\").\n\nToday, _tartan_ refers to coloured patterns, though originally did not have to be made up of a pattern at all, as it referred to the type of weave; as late as the 1820s, some tartan cloth was described as \"plain coloured ... without pattern\".[\\[5\\]] [\\[6\\]] Patterned cloth from the [Gaelic] -speaking [Scottish Highlands] was called _breacan_, meaning 'many colours'. Over time, the meanings of _tartan_ and _breacan_ were combined to describe a certain type of pattern on a certain type of cloth.[\\[6\\]] \n\nThe pattern of a particular tartan is called its _sett_. The sett is made up of a series of lines at specific widths which cross at [right angles] and blend into each other;[\\[6\\]] the longer term _setting_ is occasionally used.[\\[7\\]] _Sett_ can refer to either the minimal visual presentation of the complete tartan pattern or to a textual representation of it (in a _thread count_).[\\[6\\]] \n\nToday _tartan_ is used more generally to describe the pattern, not limited to textiles, appearing on media such as paper, plastics, packaging, and wall coverings.[\\[8\\]] [\\[6\\]] [\\[9\\]] [\\[10\\]] \n\nIn North America, the term _plaid_ is commonly used to refer to tartan.[\\[11\\]] [\\[12\\]] [\\[13\\]] [\\[a\\]] _Plaid_, derived from the Scottish Gaelic _plaide_ meaning 'blanket',[\\[16\\]] [\\[b\\]] was first used of any rectangular garment, sometimes made up of tartan,[\\[c\\]] which could be worn several ways: the [belted plaid] (_breacan féile_) or \"great kilt\" which preceded the modern [kilt]; the [arisaid] (_earasaid_), a large shawl that could be wrapped into a dress; and several types of shoulder cape, such as the [full plaid] and [fly plaid]. In time, _plaid_ was used to describe blankets themselves.[\\[12\\]] In former times, the term _plaiding_[\\[20\\]] or _pladding_[\\[21\\]] was sometimes used to refer to tartan cloth.\n\n### Weaving construction\n\n\\[ [edit] \\]\n\nVisualisation of 2/2 twill weave: the black weft threads go two over then two under the orange warp threads, staggered by one thread each pass (resulting in a diagonal pattern). In the actual cloth, the white gaps would be closed.\n\nThe [Scottish Register of Tartans] provides the following summary definition of tartan:[\\[22\\]] \n\n> Tartan (the design) is a pattern that comprises two or more different solid-coloured stripes that can be of similar but are usually of differing proportions that repeat in a defined sequence. The sequence of the warp colours (long-ways threads) is repeated in same order and size in the weft (cross-ways threads). The majority of such patterns (or setts) are symmetrical, i.e. the pattern repeats in the same colour order and proportions in every direction from the two pivot points. In the less common asymmetric patterns, the colour sequence repeats in blocks as opposed to around alternating pivots but the size and colour sequence of warp and weft remain the same.\n\nClose-up view of traditional tartan cloth, showing pattern of diagonal \"ribs\" of colour; this is a five-colour tartan, in scarlet red, black, yellow, azure blue, and crimson red.\n\nIn more detail, traditional tartan cloth is a tight, staggered 2/2 [twill] weave of [worsted] wool: the horizontal _[weft] _ (also _woof_ or _fill_) is woven in a simple arrangement of two-over-two-under the fixed, vertical _[warp] _, advancing one thread at each pass.[\\[15\\]] As each thread in the weft crosses threads in the warp, the staggering by one means that each warp thread will also cross two weft threads. The result, when the material is examined closely, is a characteristic 45-degree diagonal pattern of \"ribs\" where different colours cross.[\\[6\\]] [\\[23\\]] Where a thread in the weft crosses threads of the same colour in the warp, this produces a solid colour on the tartan, while a weft thread crossing warp threads of a different colour produces an equal admixture of the two colours alternating, producing the appearance of a third colour – a halftone _blend_ or _mixture_ – when viewed from further back.[\\[24\\]] [\\[6\\]] [\\[22\\]] (The effect is similar to multicolour [halftone] printing, or [cross-hatching] in coloured-pencil art.)[\\[6\\]] Thus, a set of two base colours produces three different colours including one blend, increasing [quadratically] with the number of base colours; so a set of six base colours produces fifteen blends and a total of twenty-one different perceived colours.[\\[6\\]] [\\[25\\]] [\\[d\\]] This means that the more stripes and colours used, the more blurred and subdued the tartan's pattern becomes.[\\[6\\]] [\\[24\\]] Unlike in simple _[checker (chequer)] _ or _dicing_ patterns (like a chessboard), no solid colour in a tartan appears next to another solid colour, only a blend[\\[6\\]] (solid colours may touch at their corners).[\\[26\\]] \n\nJames D. Scarlett (2008) offered a definition of a usual tartan pattern (some types of tartan deviate from the particulars of this definition – see below):[\\[6\\]] \n\n> The unit of tartan pattern, the _sett_, is a square, composed of a number of rectangles, square and oblong, arranged symmetrically around a central square. Each of these elements occurs four times, at intervals of ninety degrees, and each is rotated ninety degrees in relation to its fellows. The proportions of the elements are determined by the relative widths of the stripes that form them.\n\nThe sequence of thread colours in the _sett_ (the minimal design of the tartan, to be duplicated[\\[6\\]]  – \"the DNA of a tartan\"),[\\[27\\]] starts at an edge and either reverses or (rarely) repeats on what are called _pivot points_ or _pivots_.[\\[28\\]] In diagram A, the sett begins at the first pivot, reverses at the second pivot, continues, then reverses again at the next pivot, and will carry on in this manner horizontally. In diagram B, the sett proceeds in the same way as in the warp but vertically. The diagrams illustrate the construction of a typical _symmetric_[\\[29\\]] (also _symmetrical_,[\\[27\\]] _reflective_,[\\[27\\]] _reversing_,[\\[30\\]] or _mirroring_)[\\[31\\]] [\\[e\\]] tartan. However, on a rare _asymmetric_[\\[33\\]] ( _asymmetrical_,[\\[27\\]] or _non-reversing_)[\\[33\\]] [\\[f\\]] tartan, the sett does not reverse at the pivots, it just repeats at them.[\\[g\\]] An old term for the latter type is _cheek_ or _cheeck_ pattern.[\\[35\\]] Also, some tartans (very few among traditional Scottish tartans) do not have exactly the same sett for the warp and weft. This means the warp and weft will have differing thread counts (see below).[\\[h\\]] Asymmetric and differing-warp-and-weft patterns are more common in madras cloth (see [§ Indian madras], below) and some other weaving traditions than in Scottish tartan.\n\n- Diagram A, the warp\n\n- Diagram B, the weft\n\n- Diagram C, the tartan. The combining of the warp and weft.\n\n\nA tartan is recorded by counting the threads of each colour that appear in the sett.[\\[i\\]] The _[thread count] _ (or _threadcount_, _thread-count_) not only describes the width of the stripes on a sett, but also the colours used (typically abbreviated).[\\[27\\]] Usually every number in a thread count is an even number[\\[42\\]] to assist in manufacture. The first and last threads of the thread count are the pivots.[\\[28\\]] A thread count combined with exact colour information and other weaving details is referred to as a _ticket stamp_[\\[43\\]] or simply _ticket_.[\\[44\\]] \n\nTartan weaving in [Lochcarron], [Scottish Highlands] \n\nThere is no universally standardised way to write a thread count,[\\[37\\]] but the different systems are easy to distinguish. As a simple example:\n\n- The thread count \"/K4 R24 K24 Y4/\" corresponds to a mirroring pattern of 4 black threads, 24 red threads, 24 black threads, 4 yellow threads, in which the beginning black and ending yellow pivots are _not_ repeated (after Y4/, the colours are reversed, first K24 then R24); this is a \" _full-count_ at the pivots\" thread count.[\\[28\\]] \n - An equivalent notation is boldfacing the pivot abbreviations: **K** 4 R24 K24 **Y** 4.\n- The same tartan could also be represented as \"K/2 R24 K24 Y/2\", in markup that indicates that the leading black and trailing yellow _are_ duplicated before continuing from these pivot points (after Y/2, the colours are reversed as Y/2 again, then K24, then R24); this is a \" _half-count_ at the pivots\" thread count.[\\[27\\]] \n- In the older and potentially ambiguous style of thread-counting, without the \"/\" (or bold) notation, a thread count like \"K4 R24 K24 Y4\" is assumed to be full-count at the pivots, unless the author clearly indicates otherwise.[\\[28\\]] [\\[37\\]] [\\[j\\]] \n\nIn all of these cases, the result is a _half-sett_ thread count, which represents the threading before the pattern mirrors and completes; a full-sett thread count for a mirroring (symmetric) tartan is redundant.[\\[37\\]] A \"/\" can also be used between two colour codes (e.g. \"W/Y24\" for \"white/yellow 24\") to create even more of a shorthand threadcount for simple tartans in which half of the half-sett pattern is different from the other only in the way of a colour swap;[\\[45\\]] but this is not a common style of thread-counting.\n\n- An asymmetric tartan, one that does not mirror, would be represented in a _full-sett_ thread count with \"...\" markup, as \"...K4 R24 K24 Y4...\" (after Y4, the entire pattern would begin again from K4).[\\[27\\]] \n\nVarious writers and tartan databases do not use a consistent set of colour names (see [§ Colour, palettes, and meaning], below) and abbreviations,[\\[46\\]] so a thread count may not be universally understandable without a colour [key/legend]. Some recorders prefer to begin a thread count at the pivot with the colour name (or abbreviation) that is first in alphabetical order (e.g. if there is a white pivot and a blue one, begin with blue),[\\[27\\]] but this is actually arbitrary.\n\nThough thread counts are quite specific, they can be modified depending on the desired size of the tartan. For example, the sett of a tartan (e.g., 6 inches square – a typical size for kilts)[\\[27\\]] may be too large to fit upon the face of a [necktie]. In this case, the thread count would be reduced _in proportion_ (e.g. to 3 inches to a side).[\\[37\\]] In some works, a thread count is reduced to the smallest even number of threads (often down to 2) required to accurately reproduce the design;[\\[28\\]] in such a case, it is often necessary to up-scale the thread count proportionally for typical use in kilts and plaids.\n\nBefore the 19th century, tartan was often woven with thread for the weft that was up to 1/3 thicker than the fine thread used for the warp,[\\[6\\]] which would result in a rectangular rather than square pattern; the solution was to adjust the weft thread count to return the pattern to square,[\\[23\\]] or make it non-square on purpose, as is still done in a handful of traditional tartans.[\\[h\\]] Uneven warp-and-weft thread thickness could also contribute to a striped rather than checked appearance in some tartan samples.[\\[47\\]] \n\nThe predominant colours of a tartan (the widest bands) are called the _under-check_ (or _under check_, _undercheck_, _under-cheque_);[\\[48\\]] sometimes the terms _ground_,[\\[k\\]] _background_,[\\[50\\]] or _base_[\\[50\\]] are used instead, especially if there is only one dominant colour. Thin, contrasting lines are referred to as the _over-check_[\\[51\\]] [\\[50\\]] (also _over-stripe_ or _overstripe_).[\\[52\\]] Over-checks in pairs are sometimes referred to as _[tram] lines_, _tramlines_, or _tram tracks_.[\\[53\\]] Bright over-checks are sometimes bordered on either side (usually both), for extra contrast, by additional thin lines, often black, called _guard lines_ or _guards_.[\\[53\\]] Historically, the weaver William Wilson & Son of Bannockburn sometimes wove bright over-checks in silk, to give some added shine[\\[54\\]] [\\[55\\]] (commercially around 1820–30, but in regimental officers' plaids back to at least 1794).[\\[56\\]] [\\[l\\]] Tartan used for plaids (not the belted plaid) often have a [purled] fringe.[\\[57\\]] \n\nZoom-in on a bagpiper's [full plaid] ( [royal Stuart tartan]), showing the purled fringe style typical for such garments\n\nAn old-time practice, to the 18th century, was to add an accent on plaids or sometimes kilts in the form of a _[selvedge] _ in [herringbone weave] at the edge, 1–3 inches (2.5–7.6 cm) wide, but still fitting into the colour pattern of the sett;[\\[57\\]] [\\[58\\]] a few modern weavers will still produce some tartan in this style. Sometimes more decorative selvedges were used: _Selvedge marks_ were borders (usually on one side only) formed by repeating a colour from the sett in a broad band (often in herringbone), sometimes further bordered by a thin strip of another colour from the sett or decorated in mid-selvedge with two thin strips; these were typically used for the bottoms of belted plaids and kilts,[\\[57\\]] [\\[59\\]] and were usually black in military tartans, but could be more colourful in civilian ones.[\\[60\\]] The more elaborate _selvedge patterns_ were a wider series of narrow stripes using some or all of the colours of the sett; these were almost exclusively used on household tartans (blankets, curtains, etc.), and on two opposing sides of the fabric.[\\[60\\]] [\\[57\\]] The very rare _total border_ is an all-four-sides selvedge of a completely different sett; described by Peter Eslea MacDonald (2019) as \"an extraordinarily difficult feature to weave and can be regarded as the zenith of the tartan weaver's art\",[\\[57\\]] it only survives in Scottish-style tartan as a handful of 18th-century samples (in Scotland[\\[61\\]] and [Nova Scotia], Canada, but probably all originally from Scotland).[\\[62\\]] The style has also been used [in Estonia in the weaving of _suurrätt_ shawls/plaids].\n\n- 18th-century tartan with a herringbone _selvedge_ at the bottom\n\n\n\n( [detail])\n\n- Black Watch tartan with a _selvedge mark_ at the bottom (also herringbone)\n\n\n\n( [detail])\n\n- Wilsons 1819 blanket tartan with a _selvedge pattern_ on the right\n\n- Bottom-right corner of blanket with _total border_ selvedge; approximation based on photo of real blanket discovered in Nova Scotia, but probably Scottish, c. 1780s\n\n\nTartan is usually woven _balanced-warp_ (or just _balanced_), repeating evenly from a pivot point at the centre outwards and with a complete sett finishing at the outer selvedge;[\\[7\\]] [\\[63\\]] [\\[64\\]] e.g. a piece of tartan for a plaid might be 24 setts long and 4 wide. An _offset_, _off-set_, or _unbalanced_ weave is one in which the pattern finishes at the edge in the middle of a pivot colour; this was typically done with pieces intended to be joined (e.g. for a belted plaid or a blanket) to make larger spans of cloth with the pattern continuing across the seam;[\\[7\\]] [\\[64\\]] if the tartan had a selvedge mark or selvedge pattern, it was at the other side of the warp.[\\[65\\]] \n\nThe term _hard tartan_ refers to a version of the cloth woven with very tightly wound, non-fuzzy thread, producing a comparatively rougher and denser (though also thinner) material than is now typical for kilts.[\\[66\\]] [\\[67\\]] It was in common use up until the 1830s.[\\[47\\]] There are extant but uncommon samples of hard tartan from the early 18th century that use the more intricate herringbone instead of twill weave throughout the entire cloth.[\\[68\\]] \n\nWhile modern tartan is primarily a commercial enterprise on large [power looms], tartan was originally the product of rural weavers of the pre-industrial age, and can be produced by a dedicated hobbyist with a strong, stable [hand loom].[\\[69\\]] [\\[70\\]] [\\[71\\]] Since around 1808, the traditional size of the warp [reed] for tartan is 37 inches (94 cm), the length of the Scottish [ell] (previous sizes were sometimes 34 and 40 inches).[\\[72\\]] Telfer Dunbar (1979) describes the setup thus:[\\[72\\]] \n\n> The reed varies in thickness according to the texture of the material to be woven. A thirty-Porter (which contains 20 splits of the reed) or 600-reed, is divided into 600 openings in the breadth of 37 inches. Twenty of these openings are called a Porter and into each opening are put two threads, making 1,200 threads of warp and as many of weft in a square yard of tartan through a 30-Porter reed.\n\n_Splits_ are also referred to as _dents_, and _Porters_ are also called _gangs_.[\\[73\\]] \n\n### Styles and design principles\n\n\\[ [edit] \\]\n\nTraditional tartan patterns can be divided into several style classes. The most basic is a simple two-colour check of thick bands (with or without thin over-checks of one or more other colours). A variant on this splits one or more of the bands, to form squares of smaller squares instead of just big, solid squares; a style heavily favoured in _[Vestiarium Scoticum] _. A complexity step up is the superimposed check, in which a third colour is placed centrally \"on top of\" or \"inside\" (surrounded by) one of the base under-check colours, providing a pattern of nested squares, which might then also have thin, bright and/or black over-checks added. Another group is multiple checks, typically of two broad bands of colour on a single dominant \"background\" (e.g. red, blue, red, green, red – again possibly with contrasting narrow over-checks). The aforementioned types can be combined into more complex tartans. In any of these styles, an over-check is sometimes not a new colour but one of the under-check colours \"on top of\" the other under-check. A rare style, traditionally used for [arisaid] (_earasaid_) tartans but no longer in much if any Scottish use, is a pattern consisting entirely of thin over-checks, sometimes grouped, \"on\" a single ground colour, usually white.[\\[74\\]] M. Martin (1703) reported that the line colours were typically blue, black, and red.[\\[75\\]] Examples of this style do not survive,[\\[76\\]] at least not in the tartan databases (there may be preserved museum pieces with such patterns).[\\[m\\]] Some tartan patterns are more abstract and do not fit into any of these styles,[\\[78\\]] especially in madras cloth (see [§ Indian madras], below).\n\n- **Most basic check** – [MacGregor red-and-black] (Rob Roy), as simple as it gets: equal proportions of two colours.\n\n- **Basic check modified** – [Wallace] red/dress, black on a slightly larger ground of red, laced with yellow and black over-checks.\n\n- **Split check** – [MacGregor] red-and-green with a wide green band split into three to form a \"square of squares\", then laced with a white over-check.\n\n- **Superimposed check** – [Ruthven], a red ground with a big green stripe \"inside\" a bigger blue one, then white and green over-checks.\n\n- **Multiple checks** – [Davidson], a green ground with equal blue and black bands, then with red, blue, and black over-checks.\n\n- **Complex example** – [Ross], combines split-check and multiple-check styles, with one blue and two green split checks on red, with blue and green over-checks.\n\n\nThere are no codified rules or principles of tartan design, but a few writers have offered some considered opinions. Banks & de La Chapelle (2007) summarized, with a view to broad, general tartan use, including for fashion: \"Color – and how it is worked – is pivotal to tartan design.... Thus, tartans should be composed of clear, bright colors, but ones sufficiently soft to blend well and thereby create new shades.\" James D. Scarlett (2008) noted: \"the more colours to begin with, the more subdued the final effect\",[\\[50\\]] or put more precisely, \"the more stripes to the sett and the more colours used, the more diffuse and 'blurred' the pattern\".[\\[6\\]] That does not necessarily translate into subtlety; a tartan of many colours and stripes can seem \"busy\".[\\[79\\]] \n\nScarlett (2008), after extensive research into historical Highland patterns (which were dominated by rich red and medium green in about equal weight with dark blue as a blending accent – not accounting for common black lines), suggested that for a balanced and _traditional_ style:[\\[6\\]] \n\n> any basic tartan type of design should have for its background, a \"high impact\" colour and two others, of which one should be the [complement] to the first and the other a darker and more neutral shade; other colours, introduced to break up the pattern or as accents, should be a matter of taste. It is important that no colour should be so strong as to \"swamp\" another; otherwise, the blending of colours at the crossing will be adversely affected. ... Tartan is a complex abstract art-form with a strong mathematical undertone, far removed from a simple check with a few lines of contrasting colours scattered over it.\n\nScarlett (1990) provided a more general explanation, traditional styles aside:[\\[80\\]] \n\n> Colours for tartan work require to be clear and unambiguous and bright but soft, to give good contrast of both colour and brightness and to mix well so as to give distinctly new shades where two colours cross without any one swamping another.\n\nFurther, Scarlett (1990) held that \"background checks will show a firm but not harsh contrast and the overchecks will be such as to show clearly\" on the under-check (or \"background\") colours.[\\[50\\]] He summed up the desired total result as \"a harmonious blend of colour and pattern worthy to be looked upon as an art form in its own right\".[\\[81\\]] \n\nOmitting traditional black lines has a strong softening effect, as in the 1970s Missoni fashion ensemble (top right) and in many madras patterns (see [§ Indian madras], below). A Scottish black-less design (now the [Mar] dress tartan) dates to the 18th century;[\\[82\\]] another is Ruthven (1842, above), and many of the Ross tartans (e.g. 1886, above), as well as several of the Victorian–Edwardian [MacDougal\\[l\\]] designs,[\\[83\\]] are further examples. Various modern tartans also use this effect, e.g. Canadian Maple Leaf (1964, at [§ Regional], below). Clever use of black or another dark colour can produce a [visual perception] of depth.[\\[84\\]] \n\n### Colour, palettes, and meaning\n\n\\[ [edit] \\]\n\nThe brighter of the [MacLeod] tartans, known affectionately as the \"loud MacLeod\", in the saturated _modern_ palette.\n\nThere is no set of exact colour standards for tartan hues; thread colour varies from weaver to weaver even for \"the same\" colour.[\\[85\\]] A certain range of general colours, however, are traditional in Scottish tartan. These include blue (dark), crimson (rose or dark red), green (medium-dark), black, grey (medium-dark), purple, red (scarlet or bright), tan/brown, white (actually natural undyed wool, called _lachdann_ in Gaelic),[\\[86\\]] [\\[n\\]] and yellow.[\\[45\\]] [\\[6\\]] [\\[o\\]] Some additional colours that have been used more rarely are azure (light or sky blue), maroon, and vert (bright or grass green),[\\[45\\]] plus light grey (as seen in Balmoral tartan, though it is sometimes given as lavender).[\\[89\\]] Since the opening of the tartan databases to registration of newly designed tartans, including many for organisational and fashion purposes, a wider range of colours have been involved, such as orange[\\[90\\]] and pink,[\\[91\\]] which were not often used (as distinct colours rather than as renditions of red) in old traditional tartans.[\\[p\\]] The [Scottish Register of Tartans] uses a long list of colours keyed to hexadecimal \" [Web colours] \", sorting groups of hues into a constrained set of basic codes (but expanded upon the above traditional list, with additional options like dark orange, dark yellow, light purple, etc.).[\\[92\\]] This helps designers fit their creative tartan into a coding scheme while allowing weavers to produce an approximation of that design from readily stocked yarn supplies.\n\nIn the mid-19th century, the [natural dyes] that had been [traditionally used in the Highlands] [\\[24\\]] [\\[93\\]] [\\[94\\]] [\\[q\\]] (like various [lichens], [alder] bark, [bilberry], [cochineal], [heather], [indigo], [woad], and [yellow bedstraw]) began to be replaced by [artificial dyes], which were easier to use and were more economic for the booming tartan industry,[\\[95\\]] though also less subtle.[\\[96\\]] Although [William Morris] in the late-19th-century [Arts and Crafts movement] tried to revive use of British natural dyes, most were so low-yield and so inconsistent from locality to locality (part of the reason for the historical tartan differentiation by area) that they proved to have little mass-production potential, despite some purple dye ( [cudbear]) commercialisation efforts in Glasgow in the 18th century.[\\[95\\]] The hard-wound, fine wool used in tartan weaving was rather resistant to natural dyes, and some dye baths required days or even weeks.[\\[95\\]] The dyeing also required [mordants] to fix the colours permanently, usually metallic salts like [alum]; there are records from 1491 of alum being imported to [Leith], though not necessarily all for tartan production in particular.[\\[97\\]] Some colours of dye were usually imported, especially red cochineal and to some extent blue indigo (both expensive and used to deepen native dyes), from the [Low Countries], with which Scotland had extensive trade since the 15th century.[\\[98\\]] Aged human urine (called _fual_ or _graith_) was also used, as a colour-deepener, a dye [solubility] agent, a lichen [fermenter], and a final colour-fastness treatment.[\\[99\\]] All commercially manufactured tartan today is coloured using artificial not natural dyes, even in the less saturated colour palettes.[\\[100\\]] [\\[101\\]] \n\nThe hues of colours in any established tartan can be altered to produce variations of the same tartan. Such varying of the hues to taste dates to at least the 1788 pattern book of manufacturer William Wilson & Son of Bannockburn.[\\[102\\]] Today, the semi-standardised colour schemes or _palettes_ (what marketers might call \" [colourways] \")[\\[103\\]] are divided generally into _modern_, _ancient_, _muted_, and _weathered_ (sometimes with other names, depending on weaver). These terms only refer to relative [dye] [\"colourfulness\" saturation] levels and do not represent distinct tartans.[\\[104\\]] [\\[105\\]] \n\nModernAlso known as _ordinary_; refers to darker tartan, with fully saturated colours.[\\[101\\]] [\\[105\\]] In a _modern_ palette, setts made up of blue, black, and green tend to be obscured because of the darkness of the colours in this scheme.[\\[101\\]] AncientAlso known as _old colours_ (OC); refers to a lighter palette of tartan. These hues are ostensibly meant to represent the colours that would result from natural-dyed fabric aging over time. However, the results are not accurate (e.g., in real examples of very old tartan, black often fades toward khaki[\\[101\\]] or green[\\[106\\]] while blue remains dark;[\\[101\\]] and natural dyes are capable of producing some very vibrant colours in the first place, though not very consistently).[\\[105\\]] [\\[80\\]] [\\[107\\]] This style originated in the first half of the 20th century.[\\[108\\]] [\\[105\\]] This _ancient_ is not to be confused with the same word in a few names of tartans such as \"ancient Campbell\".WeatheredAlso called _faded_; refers to tartan that is even lighter (less saturated) than _ancient_, as if exposed for a very long time.[\\[105\\]] This style was invented in the late 1940s.[\\[108\\]] MutedRefers to tartan which is between _modern_ and _ancient_ in vibrancy. Although this type of colouring is very recent, dating only from the early 1970s, these hues are thought to be the closest match to the colours attained by natural dyes used before the mid-19th century.[\\[105\\]] \n\nSome particular tartan mills have introduced other colour schemes that are unique to that weaver and only available in certain tartans. Two examples are Lochcarron's _antique_,[\\[105\\]] between _modern_ and _ancient_; and D. C. Dalgliesh's _reproduction_, a slight variation on _weathered_,[\\[104\\]] dating to the 1940s and claimed to be based on 18th-century samples.[\\[109\\]] \n\nA general observation about _ancient/old_, _weathered/faded_, and _muted_ are that they rather uniformly reduce the saturation of all colours, while actual natural-dyed tartan samples show that the historical practice was usually to pair one or more saturated colours with one or more pale ones, for greater clarity and depth, a \"harmonious balance\".[\\[110\\]] [\\[105\\]] [\\[104\\]] According to Scarlett (1990): \"The colours were clear, bright and soft, altogether unlike the eye-searing brilliance or washed-out dullness of modern tartans\".[\\[81\\]] \n\nThe same tartan in the same palette from two manufacturers will not precisely match; there is considerable artistic license involved in exactly how saturated to make a hue.[\\[101\\]] \n\nTartan-generation software can approximate the appearance of a tartan in any of these palettes. The examples below are all the \"Prince Charles Edward Stuart\" tartan:[\\[111\\]] \n\n- _Modern_ palette\n\n- _Ancient_ or _old colours_ palette\n\n- _Weathered_ or _faded_ palette\n\n- _Muted_ palette\n\n- Lochcarron-style _antique_ palette\n\n- D. C. Dalgliesh-style _reproduction_ palette\n\n\nScottish tartans that use two or more hues of the same basic colour are fairly rare. The best known is the British royal family's Balmoral[\\[112\\]] (1853, two greys, both as under-check – see illustration at [§ Family and individual], below). Others include: [Akins] [\\[113\\]] (1850, two reds, one as over-check and sometimes rendered purple), [MacBean] [\\[114\\]] (1872, two reds, one as over-check and sometimes rendered purple), Childers Universal regimental[\\[115\\]] (1907, two greens, both under-check), [Gordon] red[\\[116\\]] (recorded 1930–1950 but probably considerably older; two blues and two reds, one of each used more or less as over-checks), [Galloway] district hunting/green[\\[117\\]] [\\[118\\]] (1939/1950s, two greens, both under-check), [US Air Force Reserve] Pipe Band[\\[119\\]] (1988, two blues, both under-check), [McCandlish] [\\[120\\]] [\\[121\\]] [\\[122\\]] (1992, three variants, all under-check), [Isle of Skye] district[\\[123\\]] (1992, three greens, all arguably under-check, nested within each other), and [Chisholm] Colonial[\\[124\\]] (2008, two blues, one an over-check, the other nearly blended into green). The practice is more common in very recent commercial tartans that have no association with Scottish families or districts, such as the [Loverboy] fashion label tartan[\\[125\\]] (2018, three blues, one an over-check).\n\nThe idea that the various colours used in tartan have a specific meaning is purely a modern one,[\\[126\\]] notwithstanding a legend that red tartans were \"battle tartans\", designed so they would not show blood. It is only recently created tartans, such as [Canadian provincial and territorial tartans] (beginning 1950s) and [US state tartans] (beginning 1980s), that are stated to be designed with certain symbolic meaning for the colours used. For example, green sometimes represents [prairies] or forests, blue can represent lakes and rivers, and yellow might stand for various crops.[\\[127\\]] In the _Scottish Register of Tartans_ (and the databases before it), colour inspiration notes are often recorded by a tartan's designer. However, there is no common set of tartan colour or pattern \"motifs\" with [allusive] meanings that is shared among designers.[\\[r\\]] \n\nMore abstractly, from an [art criticism] perspective, design historian [Richard Martin] (1988) wrote of tartans as designs and tartan as a textile class having no truly endemic or objectified meanings, but being an art that \"has the property of being a vessel or container of meaning, a design form that exists not only in history but through history\", capable of conveying radically different, even contradictory, contextual meanings \"ever changing and evolving\" through socio-cultural transmutation of the fabric's use. Thus tartan could veer from symbol of anti-union and Jacobite Highland rebellion to emblem of pan-British loyalty to empire in the space of two generations, or serve different fashion markets in the same recent decades as both a sartorial status symbol of traditional values and a punk and grunge rebel banner.[\\[130\\]] \n\n### Pre-medieval origins\n\n\\[ [edit] \\]\n\nToday, tartan is mostly associated with Scotland; however, the oldest tartan-patterned twill cloth[\\[131\\]] ever discovered dates to a heterogenous culture of the [Tarim Basin], c. 2100 BC through the first centuries AD[\\[131\\]] [\\[132\\]] [\\[133\\]] in what today is [Xinjiang], China, southeast of Kazakhstan. The tartan fabric (along with other types of simple and patterned cloth) was recovered, in excavations beginning in 1978, with other grave goods of the [Tarim or Ürümqi mummies] [\\[134\\]]  – a group of often [Caucasoid] (light-haired, round-eyed)[\\[135\\]] [\\[136\\]] bodies naturally preserved by the arid desert rather than intentionally [mummified]. The most publicised of them is the [Chärchän Man], buried around 1,000 BC with tartan-like leggings in the [Taklamakan Desert].[\\[137\\]] [\\[134\\]] Other twill tartan samples (with differing warp and weft) were recovered in the region from the site of Qizilchoqa in 1979, dating to around 1,200 BC; the material was woven with up to six colours and required a sophisticated loom[\\[131\\]] [\\[138\\]] (of a type that seems to have originated in the West).[\\[134\\]] [\\[s\\]] [Victor H. Mair], an archaeologist and linguist involved in the excavations wrote: \"The ancient inhabitants ... undoubtedly had vibrant interactions with peoples of West Central Asia and even further west, since their magnificent textiles possess motifs, dyes, and weaves that are characteristic of cultures that lie in that direction.\"[\\[131\\]] \n\nTextile analysis of that fabric has shown it to be similar to that of ancient Europe.[\\[139\\]] According to textile historian [Elizabeth J. Wayland Barber], the late [Bronze Age] to early [Iron Age] people of Central Europe, the [Hallstatt culture], which is linked with ancient [Celtic] populations and flourished between the 8th and 6th centuries BC, produced tartan-like textiles. Some of them were discovered in 2004, remarkably preserved, in the Hallstatt salt mines near [Salzburg], Austria; they feature a mix of natural-coloured and dyed wool.[\\[2\\]] [\\[t\\]] Some date as early as 1200 BC, and Wayland Barber says of them that: \"The overall similarities between Hallstatt plaid twills and recent Scottish ones, right down to the typical weight of the cloth, strongly indicate continuity of tradition. The chief difference is that the Hallstatt plaids contain no more than two colors\".[\\[140\\]] Similar finds have been made elsewhere in Central Europe and Scandinavia.[\\[6\\]] \n\nClassical Roman writers made various references to the continental [Gauls], south of Britain, wearing striped or variegated clothing; Latin seems to have lacked an exact word for 'checked'. For example, [Virgil] in the [Aeneid] (29–19 BC, book VIII, line 660) described the Gauls as _virgatis lucent sagulis_ (or _sagalis_) meaning something like 'they shine in striped cloaks' or 'their cloaks are striped brightly'.[\\[141\\]] [\\[142\\]] [\\[143\\]] Other writers used words such as _pictae_ and _virgatae_[\\[144\\]] with translations like 'marled', 'variegated', 'particoloured', etc. Scarlett (1990) warns: \"What is not reasonable is the ready assumption by many modern authors that every time one of these words, or something like it, was used, tartan was intended.\"[\\[145\\]] It might have been intended sometimes, or the writer might have just meant linear stripes like [seersucker] cloth. Both Scarlett and Thompson (1992) decry the unsustainable assumption by a few earlier modern writers (e.g. James Grant, 1886) that Gauls must have been running around in clan tartans.[\\[145\\]] [\\[141\\]] The Romans particularly wrote of Gauls as wearing striped _[braccae] _ (trousers). E. G. Cody in remarks in his 1885 edition of [John Lesley] 's _Historie of Scotland_ hypothesized that this was actually a Gaulish [loanword] and was [cognate] with Gaelic _breacan_.[\\[144\\]] This is one of many \"tartan legends\" that is not well accepted; rather, _braccae_ is considered by modern [linguists] a cognate of English _breeches_, Gaelic _briogais_ ('trousers'), etc.[\\[146\\]] \n\nThe earliest documented tartan-like cloth in Britain, known as the \"Falkirk tartan\",[\\[147\\]] dates from the 3rd century AD.[\\[148\\]] It was uncovered at [Falkirk] in Stirlingshire, Scotland, near the [Antonine Wall]. The fragment, held in the [National Museum of Scotland], was stuffed into the mouth of an earthenware pot containing almost 2,000 [Roman] coins.[\\[149\\]] The Falkirk tartan has a simple [\"Border check\"] design, of undyed light and dark wool.[\\[u\\]] Other evidence from this period is the surviving fragment of a statue of [Roman Emperor] [Caracalla], once part of the [triumphal arch] of [Volubilis] completed in 217 AD. It depicts a [Caledonian] [Pictish] prisoner wearing tartan [trews] (represented by carving a checked design then [inlaying] it with bronze and silver [alloys] to give a variegated appearance).[\\[150\\]] [\\[v\\]] Based on such evidence, tartan researcher James D. Scarlett (1990) believes Scottish tartan to be \"of Pictish or earlier origin\",[\\[151\\]] though Brown (2012) notes there is no way to prove or disprove this.[\\[152\\]] \n\nEarly forms of tartan like this are thought to have been invented in pre-Roman times, and would have been popular among the inhabitants of the northern [Roman provinces] [\\[153\\]] [\\[154\\]] as well as in other parts of [Northern Europe] such as [Jutland], where the same pattern was prevalent,[\\[155\\]] [\\[156\\]] [\\[157\\]] and [Bronze Age] [Sweden].[\\[158\\]] \n\nThat [twill] weave was selected, even in ancient times, is probably no accident; \"plain (2/2) twill for a given gauge of yarn, yields a cloth 50% heavier \\[denser\\] – and hence more weather-proof – than the [simple 1/1 weave].\"[\\[6\\]] According to Scarlett (2008):[\\[6\\]] \n\n> \\[T\\]here are sound reasons why such a type of pattern-textile should have developed almost automatically in isolated, self-sufficient ... communities. Such communities are unlikely to possess large dye-vats, and so cannot piece-dye woven cloth; such processes as [batik] and [tie-dye] are unavailable. ... Stripes are the practical solution, since they use small quantities of a colour at a time and are interspersed with other colours, but the scope is limited ...; stripes across both brighten the colours and add many mixtures. From there on it is really only a matter of getting organised; the now-geometric pattern reduces to a small unit, easier to remember and to follow in a world where little was written down; it is further simplified by being split into two equal halves and, with weft as warp, the weft pattern can be followed from the warp.\n\nDetail of Spanish altarpiece by the \"Master of [Estamariu] \", late 14th century, showing a particoloured [cotehardie] with a three-colour, complex tartan\n\nThere is little written or pictorial evidence about tartan (much less surviving tartan cloth) from the [medieval era]. Tartan use in Britain between the 3rd-century Falkirk tartan and 16th-century samples, writings, and art is unclear.[\\[159\\]] [\\[160\\]] [Cosmo Innes] (1860) wrote that, according to medieval [hagiographies], Scots of the 7th–8th centuries \"used cloaks of variegated colour, apparently of home manufacture\".[\\[161\\]] Based on similarities of tartans used by various clans, including the [Murrays], [Sutherlands], and [Gordons], and the history of their family interactions over the centuries, [Thomas Innes of Learney] estimated that a regional \"parent\" pattern, of a more general style, might date to the 12th or 13th century,[\\[162\\]] but this is quite speculative. The [cartularies] of [Aberdeen] in the 13th century barred clergymen from wearing \"striped\" clothing, which could have referred to tartan.[\\[163\\]] \n\nIn 1333, Italian [Gothic artists] [Simone Martini] and [Lippo Memmi] produced the _[Annunciation with St. Margaret and St. Ansanus] _, a wood-panel painting in [tempera] and [gold leaf]. It [features the archangel Gabriel in a tartan-patterned mantle], with light highlights where the darker stripes meet, perhaps representing jewels, [embroidery], or [supplementary weaving]. Art historians consider it an example of [\"Tartar\" (Mongol) textile influence]; it likely has no relation to Scottish tartan.[\\[w\\]] \"Tartar\" cloth came in a great array of patterns, many more complex than tartan (such as the fine detail in Gabriel's robe in the same painting); patterns of this sort were influential especially on Italian art in the 14th century.\n\nThere are several other continental European paintings of tartan-like garments from around this era (even back to the 13th century), but most of them show very simple two-colour [basic check] patterns, or (like the Martini and Memmi _Annunciation_ example) broad squares made by thin lines of one colour on a background of another. Any of them could represent embroidery or [patchwork] rather than woven tartan. There seems to be no indication in surviving records of tartan material being imported from Scotland in this period. In the second half of the 14th century, the artist known only as the \"Master of [Estamariu] \" (in Catalonia, Spain) painted an altarpiece of St Vincent, [one of the details of which] is a man in a [cotehardie] that is red on one half and a complex three-colour tartan on the other, which is very similar to later-attested Scottish tartans.\n\nSir [Francis James Grant], mid-20th-century [Lord Lyon King of Arms], noted that records showed the wearing of tartan in Scotland to date as far back as 1440.[\\[164\\]] However, it is unclear to which records he was referring, and other, later researchers have not matched this early date.\n\nThe Glen Affric tartan (c. 1500–1600 AD), discovered in a peat bog in the 1980s_Éscossois sauvage_ ('Savage Scotsman') by [Lucas de Heere], c. 1567–80\n\nThe oldest surviving sample of complex, dyed-wool tartan (not just a simple check pattern) in Scotland has been shown through [radiocarbon dating] to be from the 16th century; known as the \"Glen Affric tartan\", it was discovered in the early 1980s in a [peat bog] near [Glen Affric] in the Scottish Highlands; its faded colours include green, brown, red, and yellow. On loan from the [Scottish Tartans Authority], the 55 cm × 42 cm (22 in × 17 in) artefact went on display at the [V&A Dundee] museum in April 2023.[\\[148\\]] [\\[165\\]] [\\[166\\]] [\\[167\\]] [\\[x\\]] \n\nThe earliest certain written reference to tartan by name is in the 1532–33 accounts of the [Treasurer of Scotland]: \"Ane uthir tartane galcoit gevin to the king be the Maister Forbes\" ('Another tartan coat given to the king by the Master Forbes'),[\\[4\\]] followed not long after by a 1538 record of clothing made by [Thomas Arthur] for King [James V of Scotland], which includes \"heland tertane to be hoiss\" ('Highland tartan to be [hose] ').[\\[168\\]] [\\[169\\]] [\\[170\\]] [\\[y\\]] Plaids were featured a bit earlier; poet [William Dunbar] (c. 1459 – c. 1530) mentions \"Five thousand [ellis] ... Of Hieland pladdis\".[\\[171\\]] The earliest surviving [image of a Highlander in what was probably meant to represent tartan] is a 1567–80 [watercolour] by [Lucas de Heere], showing a man in a belted, pleated yellow tunic with a thin-lined checked pattern, a light-red cloak, and tight blue shorts (of a type also seen in period Irish art), with [claymore] and [dirk].[\\[172\\]] It looks much like medieval illustrations of \"Tartar\" cloth and thus cannot be certain to represent true tartan. By the late 16th century, there are numerous references to striped or checked plaids. Supposedly, the earliest pattern that is still produced today (though not in continual use) is the Lennox district tartan,[\\[173\\]] (also adopted as the clan tartan of [Lennox])[\\[174\\]] said to have been reproduced by D. W. Stewart in 1893 from a portrait of [Margaret Douglas], Countess of Lennox, dating to around 1575.[\\[175\\]] However, this seems to be legend, as no modern tartan researchers or art historians have identified such a portrait, and the earliest known realistic one of a woman in tartan dates much later, to c. 1700.[\\[176\\]] Extant portraits of Margaret show her in [velvet] and [brocade].[\\[177\\]] \n\nTartan and Highland dress in the [Elizabethan era] have been said to have become essentially [classless] [\\[z\\]]  – worn in the Highlands by everyone from high-born [lairds] to common [crofters],[\\[183\\]] at least by the late 16th century. The historian [John Major] wrote in 1521 that it was the upper class, including warriors, who wore plaids while the common among them wore linen, suggesting that woollen cloth was something of a luxury.[\\[184\\]] But by 1578, Bishop [John Lesley] of Ross wrote that the [belted plaid] was the general Highland costume of both rich and poor, with the nobility simply able to afford larger plaids with more colours.[\\[181\\]] (Later, Burt (1726) also wrote of gentlemen having larger plaids than commoners.)[\\[20\\]] If colours conveyed distinction, it was of social class not clan.[\\[185\\]] D. W. Stewart (1893) attributed the change, away from linen, to broader manufacture of woollen cloth and \"the increased prosperity of the people\".[\\[181\\]] \n\nMany writers of the period drew parallels between Irish and Highland dress, especially the wearing of a long yellow-dyed shirt called the _[léine] _ or saffron shirt (though probably not actually dyed with expensive imported [saffron]),[\\[186\\]] worn with a mantle (cloak) over it, and sometimes with [trews].[\\[187\\]] It is not entirely certain when these mantles were first made of tartan in the Highlands, but the distinctive cloth seems to get its recorded mentions first in the 16th century, starting with Major (1521). In 1556, [Jean de Beaugué], a French witness of Scottish troops at the 1548 [Siege of Haddington], distinguished Lowlanders from Highland \"savages\", and wrote of the latter as wearing dyed shirts \"and a certain light covering made of wool of various colours\".[\\[188\\]] [\\[189\\]] [\\[190\\]] [George Buchanan] in 1582 wrote that \"plaids of many colours\" had a long tradition but that the Highland fashion by his era had mostly shifted to a plainer look, especially brown tones, as a practical matter of camouflage.[\\[191\\]] [\\[aa\\]] [Fynes Moryson] wrote in 1598 (published 1617) of common Highland women wearing \"plodan\", \"a course stuffe, of two or three colours in Checker worke\".[\\[194\\]] \n\nHighland man and woman in tartan, c. 1603–1616, by [Hieronymus Tielsch]. The crude attempt to represent tartan shows a blue and green pattern with red over-check, but did not blend the colours.[\\[ab\\]] \n\nIts dense weave requiring specialised skills and equipment, tartan was not generally one individual's work but something of an early [cottage industry] in the Highlands – an often communal activity called _calanas_, including some associated [folk singing traditions]  – with several related occupational specialties (wool comber, dyer, [waulker], [warp] -winder, weaver) among people in a village, part-time or full-time,[\\[196\\]] especially women.[\\[197\\]] [\\[ac\\]] The [spinning wheel] was a late technological arrival in the Highlands, and tartan in this era was woven from fine (but fairly inconsistent) hard-spun yarn that was spun by hand on [drop spindles].[\\[6\\]] The era's commerce in tartans was centred on [Inverness], the early business records of which are filled with many references to tartan goods.[\\[200\\]] Tartan patterns were loosely associated with the weavers of particular areas, owing in part to differences in availability of natural dyes,[\\[95\\]] [\\[201\\]] [\\[93\\]] [\\[202\\]] and it was common for Highlanders to wear whatever was available to them,[\\[9\\]] often a number of different tartans at the same time.[\\[203\\]] [\\[ad\\]] The early tartans found in east-coastal Scotland used red more often, probably because of easier continental-European trade in the red dye [cochineal], while western tartans were more often in blues and greens, owing to the locally available dyes.[\\[175\\]] (See also [§ Colour, palettes, and meaning].) The greater expense of red dye may have also made it a [status symbol].[\\[205\\]] Tartan spread at least somewhat out of the Highlands, but was not universally well received. The General Assembly of the [Kirk of Scotland] in 1575 prohibited the ministers and readers of the church (and their wives) from wearing tartan plaids and other \" [sumptuous] \" clothing,[\\[206\\]] [\\[207\\]] while the council of Aberdeen, \"a district by no means Highland\", in 1576 banned the wearing of plaids (probably meaning belted plaids).[\\[208\\]] \n\nA 1594 Irish account by [Lughaidh Ó Cléirigh] of Scottish [gallowglass mercenaries] in Ireland clearly describes the belted plaid, \"a mottled garment with numerous colours hanging in folds to the calf of the leg, with a girdle round the loins over the garment\".[\\[209\\]] The privately organised early \" [plantations] \" (colonies) and later governmental [Plantation of Ulster] brought tartan weaving to Northern Ireland in the late 16th to early 17th centuries.[\\[210\\]] Many of the new settlers were Scots, and they joined the population already well-established there by centuries of gallowglass and other immigrants. In 1956, the earliest surviving piece of Irish tartan cloth was discovered in peaty loam just outside [Dungiven] in [Northern Ireland], in the form of tartan [trews], along with other non-tartan clothing items.[\\[211\\]] It was dubbed the \"Dungiven tartan\" or \"Ulster tartan\".[\\[212\\]] The sample was dated using [palynology] to c. 1590–1650[\\[213\\]] [\\[214\\]] (the soil that surrounded the cloth was saturated with pollen from [Scots pine], a species imported to Ulster from Scotland by plantationers).[\\[215\\]] [\\[19\\]] According to archaeological textile expert [Audrey Henshall], the cloth was probably woven in [County Donegal], Ireland, but the trews tailored in the Scottish Highlands[\\[215\\]] [\\[216\\]] at some expense, suggesting someone of rank,[\\[217\\]] possibly a gallowglass.[\\[213\\]] Henshall reproduced the tartan for a 1958 exhibit;[\\[215\\]] [\\[19\\]] it became popular (and heavily promoted) as a district tartan for Ulster[\\[19\\]] (both in a faded form, like it was found,[\\[218\\]] and a bright palette that attempted to reproduce what it may have originally looked like),[\\[219\\]] and seems to have inspired the later creation of more Irish district tartans.[\\[19\\]] [\\[220\\]] (see [§ Regional], below). There is nearly nothing in period source material to suggest that the Irish also habitually wore tartan; one of the only sources that can possibly be interpreted in support of the idea is [William Camden], who wrote in his _Britannia_ (since at least the 1607 edition) that \"Highlandmen ... wear after the Irish fashion striped mantles\".[\\[221\\]] [\\[222\\]] [\\[ae\\]] \n\nThe earliest image of Scottish soldiers wearing tartan belted plaids and trews; 1631 German engraving by [Georg Köler].\n\nThe earliest unambiguous surviving [image of Highlanders in an approximation of tartan] is a watercolour, dating to c. 1603–1616 and rediscovered in the late 20th century, by [Hieronymus Tielsch] or Tielssch. It shows a man's belted plaid, and a woman's plaid (arisaid, _earasaid_) worn as a shawl or cloak over a dress, and also depicts diced short hose and a [blue bonnet].[\\[195\\]] [\\[223\\]] [\\[ab\\]] Clans had for a long time independently raised [militias], and starting in 1603, the British government itself mustered [irregular] militia units in the Highlands, known as the [Independent Highland Companies] (IHCs).[\\[224\\]] Being Highlanders, they were probably wearing tartan (1631 Highland mercenaries certainly were, and the ICHs were in tartan in 1709[\\[224\\]] and actual uniforms of tartan by 1725).[\\[225\\]] [\\[226\\]] [\\[227\\]] Tartan was used [as a furnishing fabric], including [bed hangings] at [Ardstinchar Castle] in 1605.[\\[228\\]] After mention of Highlanders' \"striped mantles\" in Camden's _Britannia_ of 1607,[\\[221\\]] poet [John Taylor] wrote in 1618 in _The Pennyless Pilgrimage_ of \"tartane\" Highland garb in detail (in terms that generally match what was described and illustrated even two centuries later); he noted that it was worn not just by locals but also by visiting British gentlemen.[\\[af\\]] [\\[ag\\]] The council of Aberdeen again cracked down on plaids in 1621, this time against their use as women's head-wear,[\\[208\\]] and the kirk in [Glasgow] had previously, in 1604, forbidden their wear during services;[\\[230\\]] similar kirk session rulings appeared in [Elgin] in 1624, in [Kinghorn] in 1642 and 1644, and [Monifieth] in 1643, with women's plaids more literarily censured in Edinburgh in 1633 by [William Lithgow].[\\[231\\]] In 1622, the [Baron Courts] of [Breadalbane] set fixed prices for different complexities of tartan and plain cloth.[\\[232\\]] \n\nIn 1627, a tartan-dressed body of Highland archers served under the [Earl of Morton].[\\[233\\]] More independent companies were raised in 1667.[\\[224\\]] The [earliest image of Scottish soldiers in tartan] is a 1631 [copperplate engraving] by [Georg Köler] (1600–1638); it features Highland mercenaries of the [Thirty Years' War] in the forces of [Gustavus Adolphus] of Sweden.[\\[234\\]] [\\[235\\]] Not long after, James Gordon, Parson of [Rothiemay], wrote in _A History of Scots Affairs from 1637 to 1641_ of the belted plaid as \"a loose Cloke of several [Ells], striped and party colour'd, which they gird breadthwise with a Leathern Belt ....\" He also described the short hose and trews (\"trowzes\").[\\[236\\]] A 1653 map, [_Scotia Antiqua_] by [Joan Blaeu], features a cartouche that depicts men in trews and belted plaid; the tartan is crudely represented as just thin lines on a plain background,[\\[237\\]] and various existing copies are hand-coloured differently. [Daniel Defoe], in _Memoirs of a Cavalier_ (c. 1720) wrote, using materials that probably dated to the [English Civil War], of Highlanders invading Northern England back in 1639 that they had worn \"doublet, breeches and stockings, of a stuff they called plaid, striped across red and yellow, with short cloaks of the same\".[\\[238\\]] \n\nBesides the formerly often-chastised wearing of head-plaids in church, women's dress was not often described (except in earlier times as being similar to men's).[\\[ah\\]] The Highland and island women's equivalent of the belted plaid was the [arisaid] (_earasaid_), a plaid that could be worn as a large shawl or be wrapped into a dress. Sir [William Brereton] had written in 1634–35 (published 1844) of Lowland women in Edinburgh that: \"Many wear (especially of the meaner sort) plaids ... which is \\[ _[sic] _\\] cast over their heads and covers their faces on both sides, and would reach almost to the ground, but that they pluck them up, and wear them cast under their arms.\" He also reported that women there wore \"six or seven several habits and fashions, some for distinction of widows, wives and maids\", including gowns, capes/cloaks, bonnets with [bongrace] veils, and collar ruffs, though he did not address tartan patterns in particular in such garments.[\\[240\\]] \n\nWhile tartan was still made in the Highlands as cottage industry, by 1655 production had centred on [Aberdeen], made there \"in greater plenty than \\[in\\] any other place of the nation whatsoever\",[\\[21\\]] though it was also manufactured in [Glasgow], [Montrose], and [Dundee], much of it for export.[\\[21\\]] In Glasgow at least, some of the trade was in tartan manufactured in the Highlands and the Hebrides and brought there for sale along with hides and other goods.[\\[21\\]] Impressed by the trade in Glasgow, [Richard Franck] in his _Northern Memoirs_ of 1658 wrote that the cloth was \"the staple of this country\".[\\[241\\]] In 1662, the naturalist [John Ray] wrote of the \"party coloured blanket which \\[Scots\\] call a plad, over their heads and shoulders\", and commented that a Scotsman even of the lower class was \"clad like a gentleman\" because the habit in this time was to spend extraordinarily on clothing,[\\[242\\]] a habit that seems to have gone back to the late 16th century.[\\[243\\]] A Thomas Kirk of Yorkshire commented on trews, plaids, and possibly kilts of \"plaid colour\" in 1677;[\\[244\\]] more material by Kirk was printed in the 1891 _Early Travellers in Scotland_ edited by [Peter Hume Brown], recording \"plad wear\" in the form of belted plaids, trews, and hose.[\\[245\\]] A poem by [William Cleland] in 1678 had Scottish officers in trews and shoulder plaids, and soldiers in belted plaids.[\\[246\\]] In 1689, Thomas Morer, an English clergyman to Scottish regiments, described Lowland women as frequently wearing plaids despite otherwise dressing mostly like the English.[\\[247\\]] \n\nMungo Murray, c. 1683, by [John Michael Wright] ( [Scottish National Portrait Gallery] version), featuring a very complex tartan\n\nThe [earliest known realistic portrait in tartan Highland dress] is a piece (which exists in three versions) by [John Michael Wright], showing a very complicated tartan of brown, black, and two hues of red;[\\[248\\]] it is dated to c. 1683 and is of Mungo Murray, son of [John Murray, Marquess of Atholl].[\\[249\\]] [\\[ai\\]] \n\nIn 1688, William Sacheverell, [lieutenant governor of the Isle of Man], wrote of the tartan plaids of the women of [Mull] in the [Inner Hebrides] as \"much finer, the colours more lively, and the squares larger than the men's .... This serves them for a veil, and covers both head and body.\"[\\[251\\]] In the 1691 poem _The Grameid_,[\\[252\\]] James Philip of Almerieclose described the 1689 [Battle of Killiecrankie] in terms that seem to suggest that some clan militias had uniform tartan [liveries], and some historians have interpreted it thus.[\\[253\\]] [\\[254\\]] \n\nIt is not until the early 18th century that regional uniformity in tartan, sufficient to identify the area of origin, is reported to have occurred.[\\[159\\]] [Martin Martin], in _A Description of the Western Islands of Scotland_, published in 1703, wrote, after describing trews and belted plaids \"of divers Colours ... agreeable to the nicest Fancy\", that tartans could be used to distinguish the inhabitants of different places.[\\[aj\\]] Martin did not mention anything like the use of a special pattern by each family.\n\nIn 1709, the [Independent Highland Companies] were wearing everyday Highland dress, not uniforms of a particular tartan, to better blend in with civilians and detect [Jacobite] treachery.[\\[224\\]] In 1713, the [Royal Company of Archers] (a royal bodyguard unit first formed in 1676),[\\[257\\]] became the first unit in service to the British crown who adopted a particular tartan as a part of their formal uniform. The militiamen of [Clan Grant] may have been all in green-and-red tartan (details unspecified) as early as 1703–04[\\[258\\]] [\\[175\\]] and wearing a uniform tartan livery by 1715.[\\[259\\]] It is not a surviving pattern, and modern Grant tartans are of much later date.[\\[260\\]] (For details on early uniform tartans, see [Regimental tartan § Pre-regiment military use].)\n\nAn account of the Highland men in 1711 had it that they all, including \"those of the better sort\", wore the belted plaid.[\\[261\\]] A 1723 account suggested that gentlemen, at least when commingling with the English, were more likely to wear tartan trews and hose with their attendants in the belted plaid,[\\[261\\]] which Burt also observed;[\\[262\\]] trews were also more practical for horseback riding.[\\[263\\]] Also around 1723, short tartan jackets, called in Gaelic _còta-goirid_, sometimes with slashed sleeves and worn with a matching waistcoat, made their first appearance and began supplanting, in Highland dress, the plain-coloured [doublets] that were common throughout European dress of the era; the _còta-goirid_ was often worn with matching trews and a shoulder plaid that might or might not match, but could also be worn with a belted plaid.[\\[264\\]] [\\[ak\\]] \n\nRachel Gordon of Abergeldie, c. 1700 – the earliest known formal portrait of a woman in tartan\n\nM. Martin (1703) wrote that the \"vulgar\" [Hebridean] women still wore the [arisaid] wrap/dress,[\\[265\\]] describing it as \"a white Plad, having a few small Stripes of black, blue, and red; it reach'd from the Neck to the Heels, and was tied before on the Breast with a Buckle of Silver, or Brass\", some very ornate. He said they also wore a decorated belt, scarlet sleeves, and head kerchiefs of linen.[\\[266\\]] Martin was not the only period source to suggest it was primarily the wear of the common women, with upper-class Highland ladies in the 18th century more likely to wear tailored gowns, dresses, and [riding habits], often of imported material, as did Lowland and English women.[\\[176\\]] [\\[267\\]] Highland women's dress was also sometimes simply in linear stripes rather than tartan, a cloth called _iomairt_ ( [drugget]).[\\[176\\]] From the late 18th century, as the arisaid was increasingly set aside for contemporary womenswear, while Highland men continued wearing the belted plaid.,[\\[268\\]] the ladies' plaids were reduced to smaller \"screens\" – fringed shawls used as headdresses and as dress accessories,[\\[267\\]] \"a gentrification of the arisaid\".[\\[176\\]] (Wilsons continued producing these in the first half of the 19th century.)[\\[176\\]] [John Macky] in _A Journey Through Scotland_ (1723) wrote of Scottish women wearing, when about, such tartan plaids over their heads and bodies, over English-style dress, and likened the practice to continental women wearing black wraps for church, market, and other functions.[\\[247\\]] [Edmund Burt], an Englishman who spent years in and around Inverness, wrote in 1727–1737 (published 1754) that the women there also wore such plaids, made of fine [worsted] wool or even of [silk], that they were sometimes used to cover the head, and that they were worn long, to the ankle, on one side. He added that in Edinburgh (far to the southeast) they were also worn, with ladies indicating their [Whig] or [Tory] political stance by which side they wore long (though he did not remember which side was which).[\\[269\\]] In Edinburgh, perennial disapproval of the \"barbarous habitte\" of women wearing plaids over their heads returned in 1753 writings of [William Maitland]. Women first appear in known painted portraits with tartan c. 1700, with that of Rachel Gordon of [Abergeldie]; more early examples are found in 1742 and 1749 paintings by William Mosman,. They show plaids (in tartans that do not survive as modern patterns) worn loosely around the shoulders by sitters in typical European-fashion dresses.[\\[270\\]] Some entire dresses of tartan feature in mid-18th-century portraits, but they are uncommon.[\\[176\\]] In the Jacobite period, tartan was sometimes also used as trim, e.g. on hats. Plaids were worn also as part of wedding outfits. The monied sometimes had entire wedding dresses of tartan, some in silk, and even devised custom tartans for weddings, typically based on existing patterns with colours changed.[\\[267\\]] \n\nHighland soldier and family, the woman in an [arisaid]; by [Martin Engelbrecht] c. 1717–1754[\\[al\\]] \n\nPortraits became more popular among the Highland elite starting in the early 18th century.[\\[272\\]] Similar cloth to that in the c. 1683 Mungo Murray portrait appears in [the 1708 portrait] of the young [John Campbell of Glenorchy], attributed to [Charles Jervas]; and [the c. 1712 portrait] of [Kenneth Sutherland, Lord Duffus], by [Richard Waitt].[\\[273\\]] This style of very \"busy\" but brown-dominated tartan seems to have been fairly common through the early 18th century, and is quite different from later patterns.[\\[274\\]] As the century wore on, bolder setts came to dominate, judging from later portraits and surviving cloth and clothing samples. By the early 18th century, tartan manufacture (and weaving in general) were centred in [Bannockburn], Stirling; this is where the eventually dominant tartan weaver William Wilson & Son, founded c. 1765, were based.[\\[275\\]] [\\[am\\]] \n\nJudging from rare surviving samples, the predominant civilian tartan colours of this period, in addition to white (undyed wool) and black, were rich reds and greens and rather dark blues, not consistent from area to area; where a good black was available, dark blue was less used.[\\[6\\]] The sett of a typical Highland pattern of the era as shown in portraits was red with broad bands of green and/or blue, sometimes with fine-line over-checks.[\\[6\\]] [\\[an\\]] [Oil portraiture] was the province of the privileged, and \" [Sunday best] \" tartans with red grounds were commonly worn in them as a [status symbol], from the early 18th century, the dye typically being made from expensive imported [cochineal].[\\[176\\]] [\\[277\\]] Green and blue more generally predominated owing to their relative ease of production with locally available dyes, with more difficult yellow[\\[ao\\]] and red dyes commonly being saved for thin over-check lines[\\[279\\]] (a practice that continued, e.g. in military and consequently many clan tartans, through to the 19th century – see [Regimental tartans]). However, even local-dyestuff blues were often over-dyed with some amount of imported [indigo] for a richer colour.[\\[49\\]] \n\n#### Union protest and Jacobite rebellion\n\n\\[ [edit] \\]\n\nThe [Treaty] and [Acts of Union] in 1706–07, which did away with the separate [Parliament of Scotland], led to [Scottish Lowlanders] adopting tartan in large numbers for the first time, as a symbol of protest against the union.[\\[280\\]] [\\[281\\]] It was worn not just by men (regardless of social class),[\\[282\\]] but even influential Edinburgh ladies,[\\[280\\]] [\\[283\\]] well into the 1790s.[\\[284\\]] By the beginning of the 18th century, there was also some demand for tartan in England, to be used for curtains, bedding, nightgowns, etc., and weavers in [Norwich], Norfolk, and some other English cities were attempting to duplicate Scottish product, but were considered the lower-quality option.[\\[261\\]] \n\n[Charles Edward Stuart], \"Bonnie Prince Charlie\", in tartan and blue bonnet with Jacobite white cockade; portrait by [William Mosman] c. 1750\n\nThe most effective fighters for [Jacobitism] were the supporting Scottish clans, leading to an association of tartan and [Highland dress] with the Jacobite cause to restore the Catholic [Stuart dynasty] to the throne of England, Scotland, and Ireland. This included [great kilts], and [trews] (trousers) with great coats, all typically of tartan cloth, as well as the [blue bonnet]. The British parliament had considered banning the belted plaid after the [Jacobite rising of 1715], but did not.[\\[285\\]] Highland garb came to form something of a Jacobite uniform,[\\[284\\]] [\\[286\\]] even worn by Prince [Charles Edward Stuart] (\"Bonnie Prince Charlie\") himself by the mid-18th century,[\\[287\\]] [\\[ap\\]] mostly in propaganda portraits (with inconsistent tartans) but also by eyewitness account at [Culloden].[\\[293\\]] By this period, sometimes a belted plaid was worn over tartan trews and jacket (in patterns that need not match).[\\[294\\]] \n\nA pattern from a coat (probably Jacobite) known to date to the period of the 1745 uprising\n\nBurt had concurred c. 1728, as did his 1818 editor [Robert Jamieson], with Buchanan's much earlier 1582 observation that tartans were often in colours intended to blend into heather and other natural surroundings.[\\[295\\]] This may just represent prejudices of English writers of the period, however, at least by the mid-18th century. Extant samples of Culloden-era cloth are sometimes quite colourful. One example is [a pattern found on a coat] (probably Jacobite) known to date to around the 1745 uprising; while it has faded to olive and navy tones, the sett is a bold one of green, blue, black, red, yellow, white, and light blue (in diminishing proportions). While an approximation of the pattern was first published in D. W. Stewart (1893), the colours and proportions were wrong; the original coat was rediscovered and re-examined in 2007.[\\[296\\]] [\\[297\\]] Another surviving Culloden sample, predominantly red with broad bands of blue, green, and black, and some thin over-check lines, consists of a largely intact entire plaid that belonged on one John Moir; it was donated to the National Museum of Scotland in 2019.[\\[298\\]] \n\nThere is a legend that a particular still-extant tartan was used by the Jacobites as an identifier even prior to \" [the '15] \". This story can be traced to W. & A. Smith (1850) in _Authenticated Tartans of the Clans and Families of Scotland_, in which they claimed that a pattern they published was received from an unnamed woman then still living who in turn claimed a family tradition that the tartan dated to 1712, long before her birth, but for which there is no evidence.[\\[32\\]] This hearsay tale was later repeated as if known fact by other books, e.g., Adam Frank's _What Is My Tartan?_ in 1896,[\\[299\\]] and Margaret MacDougall's 1974 revision of Robert Bain's 1938 _Clans and Tartans of Scotland_.[\\[aq\\]] Even the often credulous Innes of Learney (1938) did not believe it.[\\[302\\]] The pattern in question does date to at least c. 1815–26, because it was collected by the [Highland Society of London] during that span.[\\[32\\]] But there is no substantiated evidence of Jacobites using a consistent tartan, much less one surviving to the present.\n\nIndependent Highland Companies were re-raised from Scottish clans loyal to the Hanoverian monarchy during 1725–29.[\\[303\\]] [\\[ar\\]] [\\[304\\]] This time they wore uniform tartans of blue, black, and green, presumably with differencing over-check lines.[\\[305\\]] [\\[304\\]] [\\[225\\]] They were all normalised to one tartan during 1725–33[\\[225\\]] [\\[227\\]] [\\[226\\]] [\\[306\\]] (a pattern which probably does not survive to the present day).[\\[175\\]] The uniform tartan appears to have changed into a new tartan, known today as Black Watch or Government, when the companies amalgamated to become the [42nd (Black Watch)] regiment in 1739. (See [Regimental tartan].)\n\n#### Proscription and its aftermath\n\n\\[ [edit] \\]\n\nAfter the failure of the [Jacobite rising of 1745], efforts to pacify the Highlands and weaken the cultural and political power of the clans[\\[307\\]] [\\[308\\]] led to the [Dress Act 1746], part of the [Act of Proscription] to disarm the Highlanders. Because tartan Highland dress was so strongly symbolically linked to the militant Jacobite cause,[\\[309\\]] the act – a highly political throwback to the long-abandoned [sumptuary laws] [\\[309\\]]  – banned the wearing of Highland dress by men and boys in Scotland north of the [River Forth] (i.e. in the Highlands),[\\[as\\]] except for the landed gentry[\\[at\\]] and the Highland regiments of the British Army.[\\[311\\]] The law was based on 16th century bans against the wearing of traditional [Irish clothing] in the [Kingdom of Ireland] by the [Dublin Castle administration].[\\[312\\]] Sir [Walter Scott] wrote of the Dress Act: \"There was knowledge of mankind in the prohibition, since it divested the Highlanders of a dress which was closely in association with their habits of Clanship and of war.\"[\\[313\\]] \n\nTartans recorded shortly after the act (thus probably being patterns in use in the period before proscription) show that a general pattern was used in a wide area, with minor changes being made by individual weavers to taste.[\\[234\\]] E.g., the tartan today used as the main (red) [Mackintosh] clan tartan,[\\[314\\]] recorded by the [Highland Society of London] around 1815, was found in variants from [Perthshire] and [Badenoch] along the [Great Glen] to [Loch Moy].[\\[234\\]] Other such groups can be found, e.g. a [Huntly] -centred [Murray] / [Sutherland] / [Gordon] cluster analysed as clearly related by Innes of Learney (1938)[\\[162\\]]  – distinguished from a different Huntly/ [MacRae] / [Ross] / [Grant] group identified by [Scottish Register of Tartans] and tartan researcher Peter Eslea MacDonald of [Scottish Tartans Authority].[\\[315\\]] [\\[316\\]] But Scarlett (1990) says that \"the old patterns available are too few in number to permit a detailed study of such pattern distributions\" throughout the Highlands.[\\[234\\]] Portraits of the era also show that tartan was increasingly made with identical or near-identical [warp and weft] patterns, which had not always been the case earlier, and that the tartan cloth used was of the fine twill, with even-warp-and-weft thickness, still used today for kilts.[\\[254\\]] [\\[272\\]] \n\nAlthough the Dress Act, contrary to popular later belief, did not ban all tartan[\\[317\\]] (or bagpipes, or Gaelic), and women, noblemen, and soldiers continued to wear tartan,[\\[318\\]] it nevertheless effectively severed the everyday tradition of Highlanders wearing primarily tartan, as it imposed the wearing of non-Highland clothing common in the rest of Europe for two generations.[\\[311\\]] [\\[319\\]] (While some Highlanders defied the act,[\\[320\\]] [\\[321\\]] there were stiff criminal penalties.)[\\[322\\]] It had a demoralising effect,[\\[au\\]] and the goal of this and related measures to integrate the Highlanders into Lowland and broader British society[\\[312\\]] was largely successful.[\\[309\\]] [\\[324\\]] By the 1770s, Highland dress seemed all but extinct.[\\[325\\]] However, the act may also ironically have helped to \"galvanize clan consciousness\" under that suppression;[\\[326\\]] Scottish clans, in romanticised form, were to come roaring back in the \"clan tartans\" run of the [Regency] (late [Georgian]) to [Victorian] period.\n\nJacobite women continued wearing tartan during the proscription (1749 portrait of [Flora MacDonald] by [Allan Ramsay] and [Joseph van Aken]; the tartan is a [Tullibardine] area pattern, later the [Murray] of Tullibardine clan tartan).[\\[327\\]] \n\nIn the interim, Jacobite women continued using tartan profusely, for clothing (from dresses to shoes), curtains, and everyday items.[\\[328\\]] [\\[318\\]] While [Classicism] -infused portraiture of 18th-century clan nobles (often painted outside Scotland) typically showed them in tartan and \"Highland\" dress, much of it was loyalist regimental military stylings, the antithesis of Jacobite messaging;[\\[329\\]] it foreshadowed a major shift in the politics of tartan (see [§ Late Georgian], below). Nevertheless, this profuse application of tartan could be seen as rebellious to some extent, with the [reified] Highlander becoming \"a heroic and classical figure, the [legatee] of primitive virtues.\"[\\[330\\]] And by the 1760s, tartan had become increasingly associated with Scotland in general, not just the Highlands, especially in the English mind.[\\[331\\]] \n\nHelen Murray of Ochtertyre, daughter and eldest child of Sir Patrick Murray of Ochtertyre, 4th Bt; c. 1750, artist uncertain. The tartans of the bodice and skirt do not match exactly, and are not surviving patterns.[\\[332\\]] \n\nAfter much outcry (as the ban applied to Jacobites and loyalists alike), the Dress Act was repealed in 1782, primarily through efforts of the Highland Society of London;[\\[333\\]] the repeal bill was introduced by [James Graham, Marquis of Graham] (later Duke of Montrose).[\\[334\\]] Some Highlanders resumed their traditional dress,[\\[335\\]] but overall it had been abandoned by its former peasant wearers, taken up instead by the upper and middle classes, as a fashion.[\\[336\\]] Tartan had been \"culturally relocated as a picturesque ensemble or as the clothing of a hardy and effective fighting force\" for the crown, not a symbol of direct rebellion.[\\[337\\]] R. Martin (1988) calls this transmutation \"the great [bifurcation] in tartan dress\",[\\[338\\]] the cloth being largely (forcibly) abandoned by the original Highland provincials then taken up by the military and consequently by non-Highlander civilians. During the prohibition, traditional Highland techniques of wool spinning and dyeing, and the weaving of tartan, had sharply declined.[\\[95\\]] [\\[310\\]] [\\[104\\]] Commercial production of tartan was to become re-centred in the Lowlands, in factory villages along the fringe of the Highlands,[\\[339\\]] among companies like Wilsons of [Bannockburn] (then the dominant manufacturer),[\\[340\\]] with the rise of demand for [tartan for military regimental dress].[\\[341\\]] Some tartan weaving continued in the Highlands,[\\[342\\]] [\\[343\\]] and would even see a boost in the late Georgian period.[\\[342\\]] Tartan by this era had also become popular in Lowland areas including [Fife] and [Lothian] and the urban centres of [Edinburgh] and [Stirling].[\\[317\\]] From 1797 to 1830,[\\[275\\]] Wilsons were exporting large quantities of tartan (for both men's and women's clothing), first to the British colonies in [Grenada] and [Jamaica] (where the affordable, durable, and bright material was popular for clothing [enslaved people]),[\\[339\\]] and had clients in England, Northern and Central Europe, and a bit later in North and South America and the Mediterranean.[\\[344\\]] [\\[345\\]] However, by the end of the 18th century, Wilsons had \"stiff competition\" (in civilian tartan) from English weavers in [Norwich].[\\[346\\]] \n\nBecause the Dress Act had not applied to the military or gentry, tartan gradually had become associated with the affluent, rather than \" [noble savage] \" Highlanders,[\\[347\\]] [\\[348\\]] [\\[349\\]] from the late 18th century and into the 19th,[\\[350\\]] along with patriotic military-influenced clothing styles in general;[\\[351\\]] tartan and militarised Highland dress were being revived among the fashion-conscious across Britain, even among women with military relatives.[\\[352\\]] The clans, Jacobitism, and anti-unionism (none of them any longer an actual threat of civil unrest) were increasingly viewed with a sense of nostalgia,[\\[183\\]] [\\[353\\]] [\\[354\\]] [\\[349\\]] especially after the death of Prince Charles Edward Stuart in 1788,[\\[355\\]] even as Highland regiments proved their loyalty and worth.[\\[349\\]] Adopting the airs of a [Tory] sort of tartaned \" [Highlandism] \"[\\[356\\]] provided a post-union and resigned sense of national (and militarily elite) distinction from the rest of Britain, without threatening [empire].[\\[357\\]] Even the future [George IV] donned Highland regalia for a [masquerade ball] in 1789.[\\[358\\]] By the 1790s, some of the gentry were helping design tartans for their own personal use, according to surviving records from Wilsons.[\\[183\\]] [Jane (Maxwell) Gordon, Duchess of Gordon], was said to have \"introduced tartan to [\\[royal\\] court]  ... wearing a plaid of the Black Watch, to which her son had just been appointed\", in 1792; she triggered a fashion of wearing tartan in London and Paris, though was not immune to caricature by the disapproving.[\\[359\\]] \n\nR. Martin (1988) wrote, from a [historiographical] perspective, that after the Dress Act:[\\[338\\]] \n\n> the idea of Highland dress was stored in the collective historical attic; when it was revived in the years leading up to 1822, it had been forgotten by some two or three generations in civilian dress and could be remembered, however deceptively, however naively, to have been the ancient dress of the Highlands, not that so recently worn as the standard peasant dress before 1746. The ban on tartan was hugely successful, but so inimical to a natural historical process, that it promoted the violent re-assertion of the tartan, sanctioned by a spurious sense of history, in the next century.\n\nThe tumultuous events of 18th-century Scotland led to not just broader public use of tartan cloth, but two particular enduring tartan categories: regimental tartans and eventually clan tartans.\n\nSoldiers from a Highland regiment c. 1744 wearing tartan [belted plaids] (great kilts).\n\nAfter the period of the early clan militias and the [Independent Highland Companies] (IHCs), over 100 battalions of line, fencible, militia, and volunteer regiments were raised, between c. 1739 and the end of the Napoleonic Wars in 1815, in or predominantly in the Highlands,[\\[360\\]] a substantial proportion of them in Highland dress. Of these units, only some had distinct uniform tartans, and of those, only a small number were recorded to the present day.\n\n_The [Sword Dance] _ by David Cunliffe, 1853, depicting men of the 42nd and 93rd. The dancer in the centre wears the 42nd's red band tartan.\n\nThe IHCs were amalgamated in 1739 to become the [43rd (later 42nd) Regiment of Foot],[\\[361\\]] called the Black Watch.[\\[362\\]] It was the first proper governmental [Highland regiment], part of the [British Army], and they wore the [belted plaid] (\"great kilt\") for dress, and the tailored [small kilt] for undress uniform.[\\[363\\]] [\\[304\\]] [\\[364\\]] For the former garment,[\\[365\\]] they used a distinctive tartan, which was designed for the unit.[\\[366\\]] It was originally called the \"42nd tartan\",[\\[366\\]] so it probably was not adopted until after the unit was renumbered the 42nd in 1749.[\\[225\\]] It seems likely that the tartan was based on those used by the IHCs earlier, but with double black \"tram line\" over-checks added.[\\[225\\]] [\\[367\\]] The Black Watch pattern was used by various other regiments, and it has been estimated that to clothe them all, some 30–40 miles (48–64 km) of the tartan had to be woven before 1750 alone.[\\[59\\]] It became the basis of various later regimental (and eventually clan) tartans.[\\[225\\]] It remains popular in general-public use under the names \"Black Watch\", \"Government\", and any of \"old [Campbell] \", \"hunting [Grant] \", or \"hunting [Munro] \",[\\[225\\]] but today officially called \"Government No. 1\" by the military. (See illustration in [§ Popular designs], below.) The 42nd had separate tratans for its small kilt until c. 1814[\\[365\\]] [\\[368\\]] (also used for [grenadiers] ' belted plaids),[\\[365\\]] for [pipers],[\\[369\\]] [\\[370\\]] and for drummers.[\\[371\\]] \n\nAfter the [Jacobite uprisings], raising a regiment in service to the king was, for many Scottish [lairds], a way of rehabilitating the family name, assuring new-found loyalty to the [Hanoverian crown], and currying royal favour (even regaining forfeited estates).[\\[372\\]] Exempt from the [Dress Act], men in these Highland regiments of the [empire] were given Highland dress, and the \"kilts and pipes that were once considered barbaric were now seen as ‘safe’ nationalism\" within the army.[\\[373\\]] From c. 1770 onward into the 19th century, virtually all the regimental tartan was produced by the company William Wilson & Son of [Bannockburn], the dominant tartan weaver.[\\[340\\]] Regimental uniforms, including tartans, were left – usually within the general Black Watch-based colour scheme of black, blue, and green – to their commanders.[\\[374\\]] [\\[375\\]] \n\n[72nd Duke of Albany's Own Highlanders] during a trews-wearing period, c. 1844, in the tartan named for Prince Charles Edward Stuart\n\nSome surviving early regimental tartans include:\n\n- [Loudoun's Highlanders] (64th, raised in 1745), used a tartan similar to Black Watch, but with over-checks of red and yellow, and lacking the two black \"tram lines\" of Black Watch.[\\[376\\]] [\\[377\\]] \n- The [78th (Highlanders)] or Ross-shire Buffs (raised 1793), [MacLeod's Highlanders] (73rd, later 71st, raised 1777–78), and the original [Seaforth Highland Regiment] (78th, later 72nd, raised 1778)[\\[av\\]] first used Black Watch, then in 1787 adopted a variant of it with thin over-checks of red and white.[\\[378\\]] [\\[379\\]] [\\[380\\]] It eventually became the [Clan Mackenzie] tartan,[\\[379\\]] and it remains used as an official British military tartan, designated \"Government No. 5A\". A slight variation, with yellow in place of white, became one of the [Clan MacLeod] tartans.[\\[381\\]] \n- The [74th (Highland) Regiment of Foot] (raised 1787)[\\[aw\\]] used another variant of the Black Watch tartan with a black-guarded white over-check.[\\[382\\]] Also in 1787, the [75th (Highland) Regiment], later 75th (Stirlingshire), probably used a more distinct tartan, not based on Black Watch, of purple and black on a green ground, with thin white and black over-checks; it was later called \"No. 64 or Abercromby\" by Wilsons, and though it did not become adopted as an [Abercromby/Abercrombie clan] tartan, variants of it became two unrelated clan patterns.[\\[383\\]] \n- The [Gordon Highlanders] (100th, later 92nd, raised 1794) also wore [an altered Black Watch], this time with a thin yellow over-check.[\\[384\\]] [\\[385\\]] In a rare show of competition to Wilsons, the pattern was designed in 1793 and supplied by weaver William Forsyth of [Huntly], Aberdeen.[\\[386\\]] This pattern became the main tartan of [Clan Gordon].[\\[387\\]] [\\[388\\]] Something nearly identical (perhaps with the yellow over-check in a different width) was also used by the [8th (Rothesay and Caithness) Fencibles].[\\[389\\]] \n- The [Cameronian Volunteers] (79th, later Queen's Own Cameron Highlanders, raised 1793) used [a comparatively distinct tartan], later the family tartan of the [Cameron of Erracht] branch of [Clan Cameron].[\\[56\\]] [\\[390\\]] [\\[391\\]] It is structurally much like Black Watch, but without black over-checks and with a number of yellow and red over-checks. It has been said to have been designed by the unit leaderor a family member.[\\[56\\]] [\\[392\\]] \n- The [Fraser Fencibles] (raised 1794–95) used a tartan with a red ground and green and blue bands, unrelated to the Black Watch style.[\\[393\\]] [\\[394\\]] \n- The [Sutherland Highlanders (93rd)] raised 1799, and later the [Argyll and Sutherland Highlanders] (Princess Louise's, formed 1881 by amalgamation of the 93rd with the [91st Argyllshire Highlanders]), may have worn a lightened version of Black Watch,[\\[395\\]] [\\[396\\]] now sometimes used as one of the [Clan Sutherland] tartans;[\\[397\\]] [\\[398\\]] it is also still militarily used as sett \"Government No. 1A\".\n- The Loyal Clan Donnachie Volunteers (raised in 1803) had its own uniform tartan, which was later adopted as the hunting [Robertson/Donnachie/Duncan] clan tartan.[\\[399\\]] [\\[400\\]] \n- The [Duke of Albany's Own Highlanders] (formerly Seaforth's 72nd), during a [trews] -wearing period of 1823–1881, wore a [tartan called Prince Charles Edward Stuart], similar to [royal Stewart],[\\[111\\]] as [shown in a period painting]. Identified in surviving cloth samples from the mid-18th century[\\[63\\]] (before the regiment) it is one of the oldest setts in continuous production.[\\[63\\]] \n\nFor more detail, and an image gallery of these setts, see [Regimental tartan].\n\nAn Italian woman inspects the kilts of two pipe majors in Rome, 1944, toward the end of kilts as undress uniform in Highland regiments\n\nBy the turn of the 18th and 19th centuries, women in Scotland were especially \"desirous to dress in the uniform plaids of their husbands\", in particularly fine-quality cloth, according to records of Wilsons of Bannockburn.[\\[359\\]] After the Highland regiments proved themselves fearless and effective in various military campaigns, the glory associated with them did much to keep alive, initially among the gentry and later the general public, an interest in tartan and kilts, which might have otherwise slipped into obscurity due to the Dress Act's prohibition.[\\[401\\]] The belted plaid was abandoned in favour of the small kilt, around 1814.[\\[348\\]] [\\[365\\]] After the \"clan tartanry\" rush of the early to mid-19th century (see below), various later Highland regiments adopted some of the recently minted clan tartans for their uniforms (reversing the original regimental-into-clan-tartan flow). Some of these adoptions remain in regimental use today.\n\nThe [Lowland regiments] (dating in some form to 1633 and never before dressed in Highland garb but in a variant of regular army uniform) were outfitted in tartan trews in 1881. This both linked them with and distinguished them from the tartan-kilted Highland regiments.[\\[402\\]] Typically the \"Government\" (Black Watch) tartan was used, though some units later diversified. Several Highland regiments were again assigned new tartans that were clan tartans rather than unit-specific ones, into the early 20th century.[\\[303\\]] \n\nToday, about a dozen tartans are officially used (and half a dozen more unofficially) between all of the surviving historical [Scottish regiments], which have largely been amalgamated since 2006 as battalions into the [Royal Regiment of Scotland], part of the [Scottish, Welsh and Irish Division].[\\[403\\]] These tartans are only worn in dress and pipe-band uniforms, after the practical uniform changes introduced in the early part of World War II, which did away with tartan kilts and trews in undress uniforms. (For further information on these tartans and the modern units using them, see [List of tartans § UK military or government tartans].) Some military units in other countries also have their own tartans. In all, there are at least 38 documented tartans that have at one time or another been associated with regiments, though many of them also with clans.[\\[ax\\]] \n\nWith an exception dating to 1618[\\[405\\]] and another to c. 1703–1715[\\[259\\]] (neither of which appear to have survived), it is generally regarded that tartans associated by name with [Scottish clans] mostly date to the early-to-mid 19th century,[\\[9\\]] [\\[406\\]] [\\[407\\]] [\\[408\\]] [\\[141\\]] [\\[409\\]] [\\[410\\]] some few to the late 18th at the earliest,[\\[93\\]] [\\[411\\]] [\\[349\\]] depending on how one defines \"clan tartan\". The belief that the clan tartans are an \"ancient\" system of symbolic family differentiation is pervasive, even passionate, but lacks substantive evidence even as it is overwhelmed by counter-evidence. It is what J. C. Thompson (1992) called \"the Great Tartan Myth\",[\\[141\\]] and James D. Scarlett (1990) \"the Tartan Cult\".[\\[256\\]] Lt.-Col. M. M. Haldane (1931) called it an assumption, which \"has acquired such a formidable weight from mere reiteration\" without \"critical examination of evidence\".[\\[412\\]] Barnes & Allen (1956) observed:[\\[93\\]] \n\n> There is no doubt that many 'setts' had been traditional to certain districts for centuries, but the theory that they were a sort of Clan uniform seems now to have been quite discredited.\n\nResponding to the claim that clan tartans have \"an ancient political significance\", [Richard Martin], curator of the [Fashion Institute of Technology] museum and later the Costume Institute at the [Metropolitan Museum of Art], wrote (1988): \"\\[This\\] assertion about history is wrong and can be demonstrated to be [perniciously] wrong\".[\\[413\\]] According to [National Galleries of Scotland] curator [A. E. Haswell Miller] (1956):[\\[409\\]] \n\n> To sum up, the presumed heraldic or \"family badge\" significance of the tartan has no documentary support, and the establishment of the myth can be accounted for by a happy coincidence of the desire of the potential customers, the manufacturer and the salesman. Although the antiquity of the \"clan tartans\" is exaggerated, what might be termed their unofficial registration took place during the nineteenth century, and if we are prepared to accept some hundred and fifty years as sufficient to create \"tradition\", it may be excusable to accept the _fait accompli_ as a pleasant – and perhaps not entirely useless – national vanity.\n\n[Highland-dress] researcher and curator John Telfer Dunbar added:[\\[414\\]] \n\n> The desire to give to relics of all kinds greater antiquity than they truly possess is manifold. It is a pity that tradition should be degraded in this way and the acceptance of such claims by later students has been a constant obstacle to research. The more difficult task of searching back to original sources has often been avoided in favour of easy acceptance.\n\nJust that sort of research was performed by Peter Eslea MacDonald of the [Scottish Tartans Authority], who – using every available surviving company record and sample – reconstructed and traced the history of tartan patterns from the leading weaver of the late Georgian through Edwardian eras, a company instrumental in the actual design, spread, and acceptance of clan tartans. His conclusion:[\\[410\\]] \n\n> Today, books and shops dealing with [Highland dress] will be mainly, if not exclusively, concerned with clan tartans. They may seek to suggest that these are the actual patterns worn by the Scottish clans throughout history, up to and including the [Battle of Culloden] in 1746. This is not the case. The majority of the pre-1850 patterns bearing clan names can only be traced back to the early 19th century and to the famous weaving firm of William Wilson & Son of Bannockburn, near Stirling.\n\nThe notion of clan tartans has been called \"an astonishingly successful marketing story\"[\\[406\\]] and an example of an [invented tradition],[\\[415\\]] though one that became very well-accepted by the clans to whom it pertained and by the weaving industry starting in 1815, as well as by the general public from around 1822 – \"adopted enthusiastically by both wearer and seller alike\".[\\[416\\]] \n\nPrecursors of clan tartans were regionally distinctive tartans (since at least the early 18th century, perhaps even the 16th), regimental uniform tartans (from 1725 onward), and personal tartans of nobles (dating to perhaps the mid-18th century if not earlier).\n\nToday, clan tartans are an important aspect of Scottish clans, and every clan has at least one tartan attributed to its name (some officially, some not, and in a few cases one tartan is shared between multiple clans). Clan tartans may not have actually been traditional, but they became conventional.\n\n### Long-running debate\n\n\\[ [edit] \\]\n\nJohn Campbell of the Bank, 1749, by [William Mosman]. The present official [Clan Campbell] tartans are predominantly blue, green and black.[\\[417\\]] \n\nVarious writers on tartans have supported or opposed the idea of clans long using distinctive tartans as an identifying badge, interpreting the scarce evidence as suited their viewpoint.[\\[ay\\]] Where one saw a militia uniform, or an individual noble's plaid, another saw a clan identifier. The 19th-century Celtic scholar [John Francis Campbell] of Islay was certain that while tartans in general were quite old, \"uniform clan tartans are no older than clan regiments\", a view backed by Haldane (1931) in a series of articles in _[The Scots Magazine] _,[\\[419\\]] followed by many tartan writers later.\n\nThe earliest evidence summarised below could have been more a matter of militia uniform than clan-wide dress; a distinction in that era is difficult to be certain of today, because troops then were led by landed gentry and a unit was raised largely on its commander's land from his clansmen.[\\[az\\]] Such definitional uncertainty could also apply to the 1691 _Grameid_ poem;[\\[252\\]] describing what appear to be some soldierly uniform tartans,[\\[ba\\]] it could be reinterpreted as supporting an early notion of clan tartans, if one wanted to define that as 'what most of the men of a clan were wearing into battle'; Scarlett (1990) confirms that there has been \"fiery argument\" in favour of a clan tartans interpretation.[\\[254\\]] However, [Robert Jamieson] (1818) reported that the \"field dress\" plaids of [Highland] men, for war and hunting, were different from their everyday dress – made of coarser material and using patterns intended to blend into natural surroundings, the _cath dath_ or _cath da'_ ('war colour').[\\[94\\]] [\\[bb\\]] This casts some doubt on interpretation of militia tartans as general clan tartans. Most of the later regimental uniform tartans (which did not become adopted as clan tartans until around the early 19th century or the late 18th in a few cases, when they did at all) were variations on the dark, green-based Black Watch tartan, as detailed above.\n\nJ. C. Thompson (1992) noted \"a typical Victorian inclination to cite previous authors with little or no attempt to evaluate their statements .... Modern analysis cannot afford to be so uncritical.\"[\\[421\\]] Scarlett (1990) relatedly observed:[\\[422\\]] \n\n> Wishful proofs are found in profusion in the literature of tartan, early and late, and consist of stating an opinion as a fact and adding some more or less relevant historical reference in support, either implying or stating that this proves the point. That it proves nothing at all is neither here nor there, so long as the manner of the presentation is sufficiently authoritative; given this treatment the wildest theory will be accepted, copied from one book to the next and so enter tartan lore. It is almost axiomatic that the wilder the theory the more acceptable it will be ....\n\nEven D. W. Stewart (1893), who had sometimes been sympathetic toward the idea of clan tartans existing before the 19th century, wrote:[\\[423\\]] \n\n> Some ... assure us that the antiquity of the so-called clan patterns is very great, and many writers allege in general terms that these designs were used as a clan distinction from the earliest period. ... The halo of romance surrounding the [Jacobite] struggle inclined many, and still induces others, to accept as authentic and reliable, statements which in different circumstances would be more closely sifted. Thus it is that the tartans ... have won much favour, and those who find one represented as bearing their name accept it as their ancient clan pattern without the inconvenience of investigation, or of posing any awkward questions.\n\nThe Victorians also engaged in some imaginative invention. Aside from the outright forgery of the \" [Sobieski Stuarts] \" (see [§ 19th century broad adoption], below), another extreme case is [Charles Rogers], who in his _Social Life in Scotland_ (1884–86) fantastically claimed that the ancient [Picts] ' figural designs – which were painted or tattooed on their bodies, and they went into battle nude [\\[424\\]] – must have been \"denoting the families or [septs] to which they belonged\" and thus \"This practice originated the tartan of Celtic clans.\"[\\[425\\]] Another asserted that tartan was invented around a thousand years ago by [Saint Margaret of Scotland].[\\[425\\]] \n\nAside from the unreliability of early writers (and later copiers of them), part of the confusion and debate about clan tartans comes down to definitions. Sir [Thomas Innes of Learney], writing in 1938 and described as \"immensely keen on \\[tartan\\] codification and the importance of it\",[\\[426\\]] was one of the firmest proponents of the idea of very old clan tartans (in the particular sense of 'patterns consistently used for a period by certain clans', not 'patterns named for certain clans and claimed by them to the present').[\\[bc\\]] He held that some setts gradually became associated with particular families (clans and septs thereof) over time;[\\[427\\]] clan territories had mostly become stable by the 16th century.[\\[428\\]] D. W. Stewart's 1893 reference shows various cases of old district tartans later sometimes being identified for a time with specific families before 19th-century adoption of their own (usually different) clan tartans.[\\[bd\\]] Innes of Learney wrote of clan tartans that (notwithstanding the unusual 1618 case covered below) \"the tendency was rather to insist upon a similarity of general hue than on similarity of detail\",[\\[431\\]] a vague sense that is not what \"clan tartan\" usually refers to. He also reasoned that \"it was not until about the 18th century that the clan tartans became _conscious_ and _acknowledged_ badges of identification\".[\\[432\\]] However, the surviving period source material lacks this \"acknowledgement\" and does not actually suggest broad adoption of formal clan tartans (with clan names, particularity of detail, and a symbolic, identifying intent) until the early 19th century.\n\nThe \" [Sobieski Stuarts] \" (1842) and later D. W. Stewart (1893) made much of some changes to the [feu duty] paid in woven cloth by locals of Noraboll on the island of [Islay] to their lords. In 1587, under the [Macleans], the cloth was to be white, black, and green; in 1617, under the [Mackenzies], the demanded cloth-rent changed to white, black, and grey. These writers were sure, without any further evidence, that this represented a change of clan tartans.[\\[433\\]] [\\[be\\]] \n\nThe only clear instance of a clan-based and specific [livery] tartan to an early date, rather than simply regional and later regimental uniformity, is found in a 1618 letter from [Sir Robert Gordon of Gordonstoun] (in the employ of the [Earl of Sutherland]) to [Murray of Pulrossie], chieftain of the [Murray] branch in [Sutherland] but subordinate to the Earl of Sutherland, chief of [Clan Sutherland] (in turn recently become subordinate to the [Gordon earls]). The letter (rediscovered in 1909) requested Pulrossie \"to remove the red and white lines from the plaides of his men so as to bring their dress into harmony with that of the other septs\" of Sutherland.[\\[405\\]] The letter does not specify the tartan to which to conform; there have been sharply conflicting interpretations, and it is not even certain that it was a tartan that survived to the present.[\\[bf\\]] \n\nThis 1714 portrait, by [Richard Waitt], of the piper to the chief of [Clan Grant] does show a broad green-ish and red tartan, but it does not match any modern Grant pattern.[\\[439\\]] [\\[bg\\]] \n\nA case of general colour-matching: In 1703–04, the chief of [Clan Grant] ordered that his \" [fencible] \" men obtain clothing in red and green tartan[\\[258\\]] (vaguely described as \"broad springed\"[\\[258\\]] but not specified in detail).[\\[175\\]] The material seems not to have been provided by Grant for them in a centralised way, but left to each man to furnish by his own means (on penalty of a fine).[\\[258\\]] \"He did not order them to wear the 'Clan Grant Tartan', as one would expect if such a tartan existed at that time.\"[\\[441\\]] Some of the modern Grant tartans also use red and green; one was designed by Wilsons of Bannockburn in 1819 as \"New Bruce\" and shortly adopted by both Grant of [Redcastle] [\\[442\\]] and [Clan Drummond];[\\[443\\]] one was reconstructed from an 1838 portrait;[\\[444\\]] another first appeared in the dubious _Vestiarium Scoticum_ of 1842[\\[445\\]] [\\[446\\]] (see below); and so on – none with pre-19th-century history. Nevertheless, D. W. Stewart (1893) proclaimed on this thin material that here was \"a complete chain of evidence ... of the existence of a uniform clan pattern at the very start of the eighteenth century\" – despite his own observation that portraits of leading members of the Grant family in this era do not show them wearing consistent tartans,[\\[258\\]] [\\[254\\]] much less ones that agree with modern \"official\" Grant tartans.[\\[407\\]] [\\[447\\]] [\\[bh\\]] Scarlett (1990), though thinking this presaged \"the Clan Tartan Idea\", notes that \"had the men of [Strathspey] been accustomed to wearing uniform tartans it would not have been necessary to order them to do so\"[\\[254\\]] (twice over). He also observes that the lairds of Grant in this period were unusually bent on uniformity, one of them even issuing moustache regulations for clansmen;[\\[254\\]] the Grant red-and-green order cannot be taken as typical of everyday Highland practice. Telfer Dunbar (1979) notes that Highland military discipline hardly existed: \"To these independent Highland chieftains restraint of any kind was irksome and unbearable, and to impose any rigid military discipline on their followers ... \\[was\\] found to be impossible.\"[\\[448\\]] Nevertheless, Mackay (1924) corroborates Grant militia wearing a livery tartan in 1715.[\\[259\\]] \n\nIn 1718, [Allan Ramsay] (the writer, father of the artist by the same name) published the poem _Tartana_, which combined colours with Latinised family names: \"... If shining red [Campbella] 's cheeks adorn .... If lin'd with green [Stuarta] 's Plaid we view ... Or thine [Ramseia], edg'd around with blue ....\" This has sometimes been taken as evidence of early clan tartans, despite possibly just referring to the edging and lining of garments (coloured facings were common on jackets of the time).[\\[449\\]] [\\[bi\\]] Worse for this hypothesis, the Campbell tartans are predominantly green, Stuart/Stewart red, and Ramsay red and green. The extant red Campbell tartans are all modern reconstructions of patterns (that are unlike each other) from portraits;[\\[451\\]] [\\[452\\]] [\\[453\\]] Stewart/Stuart tartans with significant green date to the early 19th century[\\[454\\]] [\\[455\\]] [\\[456\\]] [\\[457\\]] [\\[458\\]] or much later;[\\[459\\]] [\\[460\\]] [\\[461\\]] and the Ramsay blue hunting sett dates to 1950.[\\[462\\]] \n\nA Victorian volume, _Old and New Edinburgh_ (1884) by [James Grant],[\\[463\\]] stated that one Rev. Joseph Robertson MacGregor \"attired himself in a full suit of the MacGregor tartan\" in 1782, upon repeal of the Dress Act. But it misquoted the original source (and contained other errors). The original, _A Series of Original Portraits and Caricature Etchings_ (1842) by [John Kay], read: \"dressed himself in the Highland costume peculiar to his clan\", and says nothing of tartan, much less a suit of clan tartan.[\\[464\\]] While 1782 is within the late-18th-century range accepted by some researchers for some informal early clan tartans, this is not clear evidence of one.\n\n### Lack of further evidence of early adoption\n\n\\[ [edit] \\]\n\n[John Lesley], bishop of Ross, in 1578 wrote a great deal about Highland customs, including dress, but did not include clan tartans (despite later being claimed to have been the original keeper of the _Vestiarium Scoticum_ clan-tartans manuscript, now known to be a 19th-century forgery).[\\[465\\]] \nIn 1688, William Sacheverell, a [Manx] politician, described Hebrideans of the [Isle of Mull] all wearing plaids, but the women in a different style of colour and pattern – not a consistent \"clan\" tartan.[\\[251\\]] Rev. Thomas Morer in 1689 described Highland garb in some detail, including tartan plaids and hose (made from the same cloth), but mentions no clan patterns.[\\[466\\]] [Daniel Defoe] (c. 1720) wrote also in considerable detail of Highland warriors of the prior century, and noted that the men were organised into \"companies, all of a name\", each led by \"one of their own clan or family\", yet he never mentions any distinction between tartans of these different groups, instead describing them all as wearing tartan with red and yellow over-checks,[\\[238\\]] strongly implying a regional style. This pattern of 17th- through 18th-century writings providing specifics of tartan and Highland dress, but nothing about clan tartans, is consistent.\n\nContemporary portraits show that although tartan is of an early date, the pattern worn depended not on the wearer's clan, but rather regional style and personal taste. They frequently depict subjects wearing multiple tartans at once.[\\[432\\]] Nor do the tartans shown match current clan tartans.[\\[467\\]] [\\[468\\]] For example, the famous painting [_The MacDonald Boys Playing Golf_] (1740s), attributed usually to [William Mosman] but sometimes to [Jeremiah Davison], shows them wearing five different tartans, and they are not surviving patterns (except as later reconstructions from the painting).[\\[bj\\]] Period tartans were also often of differing [warp and weft] (giving more of a striped than checked appearance), unlike modern symmetrical patterns.[\\[470\\]] Sometimes the portraits were copied, but with tartans that do not match, as if the designs were up to artistic whim.[\\[471\\]] As Scarlett (1990) put it:[\\[472\\]] \n\n> \"\\[T\\]hese portraits have one thing in common: in no case does the tartan shown bear any close resemblance to the modern 'Clan' tartan. ... There is a great lack of evidence to show that the pattern of a tartan had any important significance in the early eighteenth century\n\nD. W. Stewart (1893) had also noted this, about both portrait tartans and \"examples of tartan fabrics which can be proved to date from the risings of 1715 and 1745\".[\\[423\\]] Many of the portraits by [Allan Ramsay] the younger show the same shoulder plaid but with colours changed, suggesting it was the artist's own studio prop and used for modelling purposes by his clients who apparently did not care about the tartan pattern.[\\[26\\]] [\\[176\\]] [\\[327\\]] According to [Scottish National Portrait Gallery] keeper A. E. Haswell Miller (1956):[\\[409\\]] \n\n> Authentic documentation of the tartan previous to the 19th century is limited to a comparatively small number of contemporary portraits, and is negative so far as it provides any suggestion of [heraldic] significance or \"clan badge\" intention.\n\n[David Morier] 's _[An Incident in the Rebellion of 1745] _. The tartans shown generally do not resemble modern ones.\n\nAccording to Trevor-Roper (1983):[\\[473\\]] \n\n> contemporary evidence concerning the [rebellion of 1745]  – whether pictorial, [sartorial], or literary – shows no\n> differentiation of clans, no continuity of setts.... Tartans were a matter of private taste, or necessity, only.\n\n[David Morier] 's well-known mid-18th-century [painting] of the Highland charge at the 1745 [Battle of Culloden] shows eight Highlanders wearing over twenty different tartans which have been analysed in detail;[\\[474\\]] very few of the setts painted resemble today's clan tartans,[\\[305\\]] [\\[313\\]] though they are similar to existing samples of tartan cloth from the era.[\\[475\\]] [\\[bk\\]] The method of identifying Highlander friend from foe was not through tartans but by the colour of the [bonnet] 's [cockade] or ribbon, or perhaps by the different plant sprigs worn in the cockade of the bonnet.[\\[185\\]] [\\[141\\]] [\\[bl\\]] [\\[bm\\]] In particular, the government Highland militia forces wore a badge in the form of a black cockade with red [saltire]; according to Mackay Scobie (1946), \"each individual wore his own Highland dress with varied tartans, with the only uniform part being the ' [Hanoverian] ' cockade and large coloured cross on the bonnet.\"[\\[482\\]] A 1745 letter on the Jacobite troops at Culloden describes \"all ye Forces as well Horse as foot were in Highland Dress except ye body Guards wh. wore Blue bound wth Red\"; i.e., only the bodyguards were wearing a uniform, and it was not of Highland dress.[\\[483\\]] \n\nOne of many tartan legends has it that the Highland-dress ban of the [Dress Act] was enacted because tartans were used as clan-identifying symbols or uniforms, but not a trace of this idea can be found in period sources. To the contrary, Burt (1727–37) was explicit that English objection to Highland dress (since perhaps 1703–04)[\\[339\\]] was _general_, because the garb served to distinguish the Highlanders as a people apart from the [Lowlanders] and other British (not distinguish Highlander from Highlander).[\\[484\\]] [\\[bn\\]] Defoe (c. 1720) likewise mocked Highland dress as what he saw as a clownish costume that set Highlanders apart from everyone else, not each other.[\\[238\\]] Similarly, in an account of Jacobite trials, it was asked whether defendants had worn \"Highland cloaths\" in general, with no mention of clan-identifying patterns.[\\[485\\]] Extant [MacDonald] tartan fragments from the Battle of Culloden do not match each other or any current clan tartan named MacDonald.[\\[481\\]] [Lord President] [Duncan Forbes of Culloden], keen on punishing the Jacobites with disarmament and other penalties, wrote a detailed letter laying out _pro_ and _con_ points (mostly _con_) regarding the proposed Highland-dress ban before Parliament passed it, yet never indicated anything like clan tartans, something that would have been a key argument to address.[\\[486\\]] \n\nC. C. P. Lawson (1967) raised a point of logic: \"Remembering the continuous clan feuds and the consequent state of more or less perpetual hostilities, a recognisable clan plaid would have been a positive danger to the wearer outside his own territory.\"[\\[487\\]] This may explain why the handful of early apparent examples of groups of men in similar tartan seem to have the nature of militia uniforms and are mentioned in the context of \" [fencible] \" bodies or outright battle (possibly aside from the 1618 case).[\\[bo\\]] Lawson also states: \" [The '45] supplies no evidence that tartans were used as clan insignia .... Relics of those tartans which were worn at Culloden or of the pre-1745 period bear no resemblance to any known modern tartan.\"[\\[487\\]] The [Lord Lyon King of Arms] in 1948, Sir [Francis James Grant], wrote that pre-1745 tartans were qualitatively different from those of the 19th century and later.[\\[488\\]] [Scottish United Services Museum] curator Dunbar (1979) notes this as well.[\\[489\\]] \n\nThe Jacobite poets wrote much about the rousing appeal of Highland clans and Highland dress, even tartan specifically, but never mentioned clan tartans.[\\[490\\]] Similarly, multiple large volumes of traditional Highland folklore were collected and published by [John Francis Campbell] in 1860–62 (revised 1890–93), and [Alexander Carmichael] (who also collected tartan samples) in 1900, but the period materials in them are devoid of any recorded references to clan tartans[\\[490\\]] (despite post-dating the popularisation of the notion among city-dwellers and the upper class).\n\nThe idea of groups of men wearing the exact same tartan as an identifier is thought to originate (aside, again, from the odd 1618 case) from [Highland regiment] units in the 18th century, starting with the Black Watch in 1739/1749.[\\[305\\]] [\\[225\\]] According to Trevor-Roper (1983):[\\[491\\]] \n\n> \\[I\\]t was probably their use of it which gave birth to the idea of differentiating tartan by clans; for as the Highland regiments were multiplied ... so their tartan uniforms were differentiated; and when the wearing of tartan by civilians was resumed, and the romantic movement encouraged the cult of the clan, the same principle of differentiation was easily transferred from regiment to clan.\n\nParticular regiments were often dominated by men raised from the same clan lands, and this may have blurred the line between regimental uniform and clan-identifying tartan. (And several tartans of extinct regiments survive today as clan tartans.) Newsome (2006) writes: \"the practice of clans wearing these regimental tartans may have in fact been the inspiration for the 'clan tartan' system as we now know it.\"[\\[225\\]] Telfer Dunbar (1979), on the idea of the early [Independent Highland Companies] using distinct uniform tartans: \"I feel sure that here we have much of the 'clan tartan' origin.\"[\\[492\\]] The end of the 18th and beginning of the 19th centuries brought an unprecedented level of influence of military clothing styles, including Highland regimental, on civilian attire (even for women), especially among the social elite connected to regiments.[\\[351\\]] Some regimental tartans appear to have been named after their commanding officers, and this may be how they came to be associated with family/clan names over time.[\\[493\\]] [\\[379\\]] Banks & de La Chapelle (2007):[\\[334\\]] \n\n> the notion of differentiation of tartan by clans might have evolved from this desire to distinguish on Highland regiment uniform from another. Certainly, its classification for military use laid the groundwork for many subsequent designs and the movement toward uniformity.\n\nScarlett (1990) also observed the connection to regional or \"district\" tartans:[\\[256\\]] \n\n> \\[B\\]asic patterns prevailed over wide areas and were modified by local weavers for their own ends. It can easily be seen that a local pattern of this kind, made for a captive clientele, might have become identified with the people of that locality who were themselves predominantly of one Clan or family group and its adherents and, when the belief grew up that Clan tartans had been worn since the beginning of time, have become, by retrospection, the Clan tartan of that group. There is no evidence that the Highlanders themselves looked on tartan in that light, however ....\n\nUnknown Jacobite lady in Tullabardine tartan, c. 1740–1750, attributed to [Cosmo Alexander] \n\nHaswell Miller (1956) similarly noted: \"We can ... readily accept that certain dyes would prevail in different regions and that traditional types of pattern might be followed in various parts.\"[\\[409\\]] [Martin Martin] in 1703 had described tartans as being identifiably specific to particular regions, but not clans.[\\[255\\]] There are numerous cases of tartans loosely associated with districts later becoming clan tartans. The best-documented case[\\[7\\]] is the [Tullibardine] pattern, one of the few modern clan tartans that can be traced (at all, not as a clan tartan) to the pre-proscription period.[\\[327\\]] [\\[7\\]] It was long associated with [Perthshire], and later adopted as the [Murray] of Tullibardine clan tartan, but sold by Wilsons as simply \"Tullibardine\" as late as c. 1830–40, and it was found for sale in a market by W. & A. Smith around 1850, who also said it was worn then by [Charles Murray, Earl of Dunmore];[\\[327\\]] the first record of the pattern as \"Murray of Tullibardine\" is in their 1850 book.[\\[7\\]] It appears in at least five early portraits; four date to c. 1740–1750, the first of an unknown female sitter attributed to [Cosmo Alexander],[\\[bp\\]] and three by [Allan Ramsay] (with the cloth painting completed by [Joseph van Aken]) which are not of any known Murrays (but of [a Campbell], [a MacLeod], and [a MacDonald]).[\\[327\\]] It is not until 1770 that a known Murray is painted wearing it ( [John Murray, Earl of Dunmore], by [Joshua Reynolds]),[\\[327\\]] which still does not necessarily make it a \"clan tartan\" at that early a date (\"evidence for its historic use by that branch \\[of Clan Murray\\] is circumstantial at best\").[\\[7\\]] The oldest version of it differs slightly as to colours and sett from the modern clan version.[\\[7\\]] [\\[bq\\]] \n\nSimilarly, according to the [Scottish Register of Tartans], [the district tartan] for [Huntly],[\\[315\\]] originating in more complex form as [the personal tartan] of a Marchioness of Huntly[\\[495\\]] (probably [Henrietta Mordaunt]), was in use as a regional tartan since at least \"the '45\", and worn at Culloden by clansmen of [Brodie], [Forbes], [Gordon], [MacRae], [Munro], and [Ross], \"which gives a strong indication of the greater antiquity of the 'District' setts compared to the Clan tartans.\"[\\[495\\]] \n\nSome surviving early records of tartan manufacture are those of the Orphan Hospital Manufactory and Paul's Work, in Edinburgh, for the period 1734–37 and 1751–52; tartans were not named but given numeric designations such as \"No. 2nd\".[\\[169\\]] In 1745, the _[Caledonian Mercury] _ of Edinburgh carried an advertisement for a \"Great Choice of Tartans, the newest Patterns\" – not clan or even district tartans, but newly devised ones, suggesting a fashion market driven by novelty not supposed \"heraldic\" traditions. Even clan-tartans booster D. W. Stewart (1893) conceded: \"This advertisement, it may be urged, is a stumbling-block in the way of those who argue for the antiquity of clan patterns; for it seems peculiar that, when the city was filled with Highlanders of all ranks and many clans, they should be offered not their ancient setts ....\"[\\[496\\]] Other advertisements for tartan from 1745 to the early 19th century did not mention clans, or focus on the patterns at all, but rather on the forms in which the cloth could be ordered.[\\[497\\]] Even immediately after the repeal of the Dress Act in 1782, the demand was for \"latest patterns and bright colours\",[\\[498\\]] with no hint of a family heraldry aspect.\n\nWilliam Wilson & Son of Bannockburn, just south of the dividing line between the Highlands and Lowlands,[\\[499\\]] were the first large-scale commercial tartan producers;[\\[105\\]] founded c. 1765,[\\[9\\]] they had become the foremost supplier of tartan to the military by around 1770, and the dominant tartan weaver in general.[\\[340\\]] It was an endeavor that required the introduction of tartan recording, of standardisation of setts and dyes, and of consistency and quality control.[\\[9\\]] [\\[500\\]] [\\[499\\]] Wilsons corresponded with their agents (especially the son, James Wilson)[\\[102\\]] in the Highlands to get information and samples of cloth from the various districts to enable them to reproduce \"perfectly genuine patterns\". Wilsons recorded over 200 setts in addition to ones they designed in-house, collected in their 1819 _Key Pattern Book_ of around 250 setts[\\[9\\]] (among earlier in-house volumes to the 1770s). These tartans were numbered, named after places, or given fanciful names such as \" [Rob Roy] \", later sometimes family names (after prominent members), sometimes foreign names like \"Coburg\", but usually not those of clans,[\\[275\\]] [\\[399\\]] [\\[501\\]] [\\[141\\]] nor, when they did, often matching present clan patterns.[\\[502\\]] [\\[br\\]] A large proportion of the modern clan tartans, however, _can_ be traced to this work – just often originally with numbers or unrelated names.[\\[399\\]] The evidence of direct adoption from Wilsons happening frequently completely overwhelms \"ancient clan tartans\" sentiment.[\\[bs\\]] \n\nThe _Scottish National Dictionary_, in providing an unusually discursive definition of _tartan_, includes: \"\\[T\\]owards the end of the 18th century and largely through the enterprise of Messrs Wilson, weavers in Bannockburn, a series of tartans, each ascribed to a certain clan, was devised and is now accepted as authoritative, though almost entirely unhistorical.\"[\\[515\\]] Analysing the direct and strong influence of Wilsons' _Key Pattern Book_ ( _KPB_) on the later adoption of clan tartans (see next section), Eslea MacDonald (2012) concluded:[\\[516\\]] \n\n> Some of the 1819 _KPB_ setts no longer retain their original names, others were altered or were the basis for a number of variations which were named or simply numbered .... Whatever their origins, these patterns gave rise to the idea of clan tartans as we know them today. In a very few cases a pattern's origins may have indeed been a lot older than the 1819 _KPB_ but their contemporary names were almost always the work of Wilsons or subsequent writers.\n\nThe Cockburn Collection of 56 tartan samples (some of them duplicates) was put together between 1810 and c. 1825 (most likely 1816–25)[\\[517\\]] [\\[399\\]] by Lt.-Gen. Sir William Cockburn, and is now in the [Mitchell Library] in [Glasgow].[\\[399\\]] [\\[518\\]] This collection does ascribe particular family names to many of these setts (probably naming them after prominent individuals),[\\[517\\]] but only sometimes corresponding to current clan tartan associations (indeed, some patterns that are today associated with particular clans were given multiple different names in the Cockburn Collection).[\\[bt\\]] There are many conflicts in name-to-pattern associations between this collection and that of the [Highland Society of London] around the same time.[\\[517\\]] \n\nEven [David Stewart of Garth], who was to become one of the chief proponents of the idea of clan tartans, observed in 1814 only that various heads of families seemed to have selected personal tartans and that there were also district tartans.[\\[520\\]] When Garth and his Highland Society of London solicited clan tartans from chiefs in 1815 (see below), Col. Alexander Robertson of Struan, Chief of [Clan Robertson/Donnachaidh/Duncan], wrote back:[\\[521\\]] \n\n> It does not appear to be appertained, either by tradition or by authentick history, that the different Clans in the Highlands of Scotland, wore any distinctive pattern or tartan. It is well known that they all had particular [Colours, or Standards], emblematical of some of their most honourable attachments, but as far as I have been able to discover, they wore no uniform Garb.\n\nAt the beginning of the 19th century, a letter from an Inverness tailor to Wilsons of Bannockburn requested fine tartan cloth to be used for women's clothing, because the fashion was to wear husbands' regimental tartans (not clan tartans).[\\[359\\]] In 1829, responding negatively to the idea of Lowland and Borders \"clans\" wearing their own tartans, Sir [Walter Scott]  – who was instrumental in helping start the clan-tartans fervour in the first place – wrote \"where had slept this universal custom that nowhere, unless in this MS. \\[the draft _Vestiarium Scoticum_, published ultimately in 1842\\] is it even heard of? ... I would rather suppose that the author had been some tartan-weaver zealous for his craft, who wished to extend the use of tartan over the whole kingdom.\"[\\[522\\]] Also in the same year, he wrote: \"The idea of distinguishing the clans by their tartans is but a fashion of modern date in the Highlands themselves\".[\\[523\\]] \n\nAnother of the tartan legends has it that [Alexander Gordon, 4th Duke of Gordon], commissioned the design of a clan tartan based on Black Watch in 1793, kept one of three designs, then passed the other two on to [cadet branches] of the family.[\\[524\\]] This tale can be traced in unembellished form to 1793 records of weaver William Forsyth of Huntly which do not say this at all, only that Forsyth provided three potential designs for a _regiment_ tartan, with yellow over-checks in various configurations, of which the Duke selected no. 2 for the unit, the [92nd Gordon Highlanders].[\\[386\\]] \n\nScarlett (1990) surmises that there must have been _some_ informal clan tartans – a confluence of district tartans that had become associated with particular families, and adoptions of regimental uniform tartans by them – by the late 18th century, otherwise there is no explanation for where Stewart of Garth got the idea.[\\[525\\]] Scottish United Services Museum curator Maj. I. H. Mackay Scobie (1942),[\\[411\\]] Haswell Miller (1947),[\\[526\\]] and Barnes & Allen (1956),[\\[93\\]] also zeroed in on this timeframe. Eslea MacDonald (2010-11) observes, for example, the Murrays using the common Tullibardine regional pattern in portraits and in bed hangings at their clan seat, [Blair Castle], 1770 – c. 1780 and possibly earlier.[\\[327\\]] Telfer Dunbar (1979), considering the 1703–04 Grant proclamation and the early regiments, suggests that \"any uniformity of tartan was only to be found in an organised body of troops, or the 'tail' or following of a chief.\"[\\[441\\]] These possible comparatively early, informal clan tartans of the late-18th-century simply cannot usually be identified (when they survived) until the early 19th century.\n\n### 19th century broad adoption\n\n\\[ [edit] \\]\n\nIt has been suggested by a modern chief of [Clan Campbell] and another of the clan executives that the clan had informally adopted what is now known as [old Campbell or Black Watch] tartan by the early 19th century, because so many of their men were already wearing it as part of regimental uniform[\\[417\\]] (three of the Independent Highland Companies that amalgamated into the [Black Watch] regiment in 1739–1751 were Campbell units).[\\[492\\]] Some time in or after 1806, when he became clan chief, the city-dwelling politician [George Campbell, 6th Duke of Argyll], created his own personal tartan, of Black Watch with a thin over-check of white and yellow added,[\\[527\\]] \"to differentiate himself from the rest of the Campbells\", i.e. because they were already so often wearing Black Watch.[\\[417\\]] This essentially may have been one of the earliest attested surviving clan tartans (and the duke's variant was an early declared personal tartan of a noble).[\\[bu\\]] \n\nMaj.-Gen. [David Stewart of Garth], c. 1820, in [royal Stewart tartan] \n\nThe idea arose among Scottish expatriates (especially in the [Celtic societies], which encouraged members to wear \"appropriate\" tartans),[\\[528\\]] eager to \"preserve\" Highland culture,[\\[9\\]] [\\[528\\]] [\\[529\\]] that tartans had traditionally been named and that the names represented clan affiliations.[\\[9\\]] Among them was Maj.-Gen. [David Stewart of Garth], a Black Watch veteran and vice-president of the [Highland Society of London] [\\[530\\]] (founded 1778).[\\[399\\]] He and fellow members Sir [John Sinclair] and [Andrew Robertson] were among the first proponents of the idea of clans being identified by tartans, despite the lack of evidence.[\\[530\\]] [\\[531\\]] [\\[532\\]] [\\[517\\]] [\\[bv\\]] The society also counted among its members the [Prince of Wales] [\\[534\\]] (the future [George IV], who was to become instrumental to clan \"tartanry\" in 1822) and two [dukes], among various itinerant actual Scots[\\[535\\]]  – including [James Macpherson] of \" [Ossian] \" fame (or infamy).[\\[536\\]] \n\n[Elizabeth Gordon (_née_ Brodie), Duchess of Gordon], c. 1813–1814 by [Alfred Edward Chalon]; she appears to be wearing Black Watch (42nd regiment) tartan, as it lacks the yellow over-check of 92nd Regiment, which became the Gordon clan tartan. This was only about a year before the Highland Society solicited clan patterns.\n\nOn 8 April 1815, the society resolved that the clan chiefs each \"be respectfully solicited to furnish the Society with as much of the Tartan of his Lordship's Clan as will serve to Show the Pattern and to Authenticate the Same by Attaching Thereunto a Card bearing the Impression of his Lordship's Arms.\"[\\[537\\]] Many had no idea of what their tartan might be or whether they had one,[\\[538\\]] some provided only a vague description, and some claimed they had none.[\\[534\\]] But plenty were keen to comply and to provide authentic signed and [sealed] samples;[\\[537\\]] [\\[534\\]] [\\[539\\]] many (possibly most) turned to Wilsons of Bannockburn for a design,[\\[528\\]] [\\[504\\]] while some directly adopted a regimental tartan as their own,[\\[399\\]] [\\[9\\]] [\\[bw\\]] and still others adapted designs from old portraits of clan nobles.[\\[534\\]] [\\[bx\\]] [Alexander Wentworth Macdonald], Baron [Macdonald], wrote back to the society: \"Being really ignorant of what is exactly The Macdonald Tartan, I request you will have the goodness to exert every Means in your power to Obtain a perfectly genuine Pattern, Such as Will Warrant me in Authenticating it with my Arms.\"[\\[537\\]] Finding no agreement within his clan on a pattern, Robertson of Struan ended up adopting the regimental tartan of the Loyal Clan Donnachie (Robertson) Volunteers; being based on the Black Watch pattern, it could not pre-date the late 18th century.[\\[542\\]] On the other hand, [Sir John Macgregor Murray] of [Clan Gregor], who had spent most of his life in England and India, was writing instructions on the use of his clan's tartan by December 1818.[\\[543\\]] In 1819, Wilsons were engaged in correspondence to \"send ... specimens of all coloured Tartans used by these Clans ...said to exceed thirty in number\", to a writer in Italy preparing a book on clan tartans;[\\[544\\]] the same year, they also produced their _Key Pattern Book_ of over 200 tartans (representing only a fraction of their total tartan output, presumably the most marketable designs, and not always under the same names as found in contemporary collections of Wilsons' tartan samples such as the Cockburn collection and that of the Highland Society).[\\[545\\]] \n\nAccording to Trevor-Roper (1983), Wilsons were in a direct \"alliance\" with the Highland Society of London by 1819; the former saw a great marketing opportunity, and the latter provided a veneer of respectability as Wilsons helped the society pin tartans to clan names.[\\[546\\]] [\\[547\\]] Banks & de La Chapelle (2007) concur: \"The Wilson firm worked in tandem with the Highland Society, preparing tartan samples for the latter to certify as belonging to one clan or another.\"[\\[502\\]] Clan nobles (who sometimes contradicted each other, within the same clan, on what their tartan was or should be)[\\[548\\]] were apparently also \"ready to adopt changes at the mere dictation of fancy\" to improve designs.[\\[549\\]] From the \"authentications\" they received 1815–26, the society built up a clan-tartan collection (now in the [National Museum of Scotland]), with 34 authenticated specimens and about 40 others.[\\[534\\]] [\\[550\\]] [\\[by\\]] Other such societies generated more interest, belief, and demand.[\\[551\\]] According to the analysis by Eslea MacDonald (2012), \"Most of the pieces sealed \\[by clan chiefs\\] and deposited with the Society at that time were patterns woven, and in the majority of cases appear to have been designed, by Wilsons. This obviously means they could not have existed prior to c1765 when William Wilson started his business.\"[\\[542\\]] So many of Wilsons' stock tartans from their _Key Pattern Book_ of 1819 were being renamed for clans that J. C. Thompson (1992) wrote: \"Clearly the naming of tartans was just getting started in 1819\",[\\[141\\]] and: \"There was nothing people wanted more than an ancient clan tartan system, and they were determined to have one.\"[\\[552\\]] By 1821, advertisements for tartan cloth had shifted to include language like \"true\", \"warranted\", and \"original\", and began to stress antiquity and family connections.[\\[553\\]] \n\nThe 1822 [visit of George IV to Scotland], in Highland garb and with a great deal of tartan-festooned public ceremony (arranged by Stewart of Garth and [romanticist] writer Sir [Walter Scott] of the [Celtic Society of Edinburgh]), had a profound tartan-boosting effect, including the invention of new clan-specific tartans to suit[\\[554\\]] [\\[399\\]] [\\[555\\]] (or renaming of old tartans to have clan names),[\\[9\\]] [\\[556\\]] as clan chiefs had been asked to attend in clan tartans.[\\[9\\]] It caused a boom in the tartan-weaving business,[\\[503\\]] and a broader public notion that tartans should be named for families.[\\[399\\]] [\\[9\\]] \"When these two \\[Scott and Stewart of Garth\\] stage-managed the King's visit ... they fixed the Clan Tartan idea in the public mind.\"[\\[556\\]] Wilsons' pattern book in 1822 had expanded significantly with tartans named for clans, in addition to all their numbered setts.[\\[514\\]] According to R. Martin (1988), Wilsons and other weavers were made aware of the king's planned visit three or four years in advance, and had all that time to pad their catalogues with additional designs and to assign clan names to patterns often \"probably picked entirely out of the air.\"[\\[557\\]] He added that \"anyone looking at the tartan pattern books of 1819 to 1822 would have realized the cacophony of different names for the same \\[pattern\\], the chaos of clan attributions, and the complete capriciousness of that association.\"[\\[558\\]] A telling letter from a tailor, archived among the Wilsons papers, to the company in 1822 asked: \"Please send me a piece of [Rose] tartan, and if there isn't one, please send me a different pattern and call it Rose.\"[\\[505\\]] \n\nBy 1824, an invitation to the [Atholl Gathering], one of the earliest of the modern [Highland games] festivals, made it clear that participants should arrive \"in the plaids or Tartans of their Clans\".[\\[559\\]] In 1829, Sir [Thomas Dick Lauder] complained to Walter Scott about all the \"uncouth, spurious, modern \\[tartans\\] which are every day manufactured, christened after particular names, and worn as genuine\",[\\[560\\]] and also of \"clans ... at this moment ignorantly disputing for the right to the same tartans which in fact belong to none of them but are merely modern inventions for clothing Regimental Highlanders\".[\\[561\\]] Scott himself was backpedalling away from what he had helped create, and was suspicious of the recent claims about \"ancient\" clan tartans: \"it has been the bane of Scottish literature and disgrace of her antiquities, that we have manifested an eager propensity to believe without inquiry and propagate the errors which we adopt too hastily ourselves.\"[\\[562\\]] \n\nThe [Scott] tartan invented by the \"Sobieski Stuarts\" around 1829, eventually published in the 1842 _Vestiarium_. Based on the c. 1819 [MacGregor], the tartan was rejected (along with other Lowland family tartans) by [Walter Scott], but remains the most popular Scott tartan.[\\[563\\]] \n\nA wave of highly dubious books were published, all purporting to reveal true clan histories and tartans; they presented little in the way of evidence, but they caused enthusiastic adoption of clan tartans. The first of these, in 1831, was _The Scottish Gaël or Celtic Manners, as Preserved Among the Highlanders_ by [James Logan], containing 54 tartans (based on Wilsons' collection, that of the Highland Society of London, and other sources he alleged but did not name, plus some he collected or devised himself); the author ignored advice from Wilsons on which were actually old tartans, and included some erroneous, fictitious, and incomplete setts.[\\[564\\]] [\\[565\\]] [\\[bz\\]] He also included untenable assertions about the designs' antiquity; \"Logan took the line that everything Highland was rooted impossibly far in the past\", and was mocked in _[The Pall Mall Gazette] _ for it.[\\[556\\]] Meanwhile, Wilsons and other weavers simply adopted some patterns from his book due to demand,[\\[566\\]] [\\[503\\]] [\\[567\\]] and also took to inventing all-new \"clan tartans\" to keep up with the growing market for patterns associated with names.[\\[568\\]] The archived correspondence of Wilsons in the 1830s shows that the company was frequently pressured by merchants for the \"truest\" and \"real\" clan patterns.[\\[569\\]] Logan, despite himself being involved in sham clan tartanry, observed that \"fanciful varieties of tartan ... were being passed off as genuine\" by Wilsons and other weavers.[\\[505\\]] \n\nLogan was followed in 1842 by _[Vestiarium Scoticum] _ by the so-called [Sobieski Stuarts], purporting to contain 75 centuries-old clan tartans, illustrated in great detail but from vague textual descriptions.[\\[570\\]] Although it is now known to have been largely a forgery,[\\[571\\]] [\\[572\\]] [\\[ca\\]] many of the visual tartan designs in this \"final – and fantastic – codification\"[\\[572\\]] of clan tartans were nevertheless adopted and still survive as accepted tartans of clans,[\\[576\\]] [\\[577\\]] especially for Lowland clan names (which had hitherto never been associated with tartan or Highland garb at all).[\\[399\\]] [\\[183\\]] [\\[578\\]] [\\[579\\]] Starting in 1822, [Borders] families had been redefining themselves as clans, and the book encouraged more of them to take on clan tartans and open clan societies.[\\[580\\]] Modern critics have even praised the lasting socio-cultural accomplishement of the Sobieski-Stuarts' works in helping establish a systemic clan-tartans legend while recognizing the bogus nature of their material.[\\[cb\\]] \n\n(The socio-political background of these events and their overall impact on tartan in general are presented at [§ Late Georgian] and [§ Victorian], below.)\n\nTrevor-Roper (1983) believed that the Sobieski Stuarts had been in direct communication with manufacturers like Wilsons, and were advising clan chiefs on which tartans to choose, from as early as 1819;[\\[582\\]] J. C. Thompson (1992) agreed.[\\[552\\]] Dick Lauder certainly said they were doing so by 1829, and that Wilsons were already weaving many Sobieski Stuart samples by that year;[\\[583\\]] the company's own records the same year confirm orders for designs from the Sobieski Stuarts.[\\[584\\]] _Vestiarium_ was followed soon after by _The Costume of the Clans_ published by the Sobieski Stuarts in 1845;[\\[566\\]] the illustrations it provided, allegedly based on portraits, have proven to be largely a mixture of error and invention.[\\[585\\]] By 1849, John Sobieski Stuart was in discussion with a publisher to produce a new, cheaper edition of _Vestiarium_, in a series of small volumes \"so that it might be rendered as available as possible to manufacturers and the trades in general concerned in Tartan ... and it was for the\\[ir\\] advantage and use ... that I consented to the publication.\" The same letter also proposed binding the manufacturers by contract to produce tartans that conformed exactly to the Sobieski Stuarts' specifications.[\\[586\\]] \n\nWeavers like Wilsons were complicit, not passive, in the tartan boom. They had lost much of their military and export markets after major wars ended and colonies in the Americas and elsewhere had become more self-sufficient.[\\[509\\]] \"The concept of differentiated clan tartans, newly popularized, was codified and developed by canny manufacturers .... Since the repeal of the \\[Dress Act\\], these tartan makers saw the prospect of a vast new market.\"[\\[276\\]] According to [Alastair Campbell of Airds]:[\\[416\\]] \n\n> One factor which has been decisive throughout the history of the development of the modern system \\[of clan tartans\\] has been the influence of the tartan manufacturers .... As with any marketing organisation it was important to maintain a steady flow of \"new products\", and every year new patterns were produced .... The idea of individual tartans providing a clan or family identity was a most attractive one, which was adopted enthusiastically by both wearer and seller alike.\n\n\" [Maclachlan] \", a romanticised Highland warrior image from Logan and McIan's _The Clans of the Scottish Highlands_, 1843\n\nThis heavy promotion for decades of the clan-tartans idea has been described as \"inciting a rush to lay claim to the tartan to which one's family was 'entitled'\".[\\[587\\]] Other 19th-century clan-tartan works followed.[\\[576\\]] Logan (by then president of the Highland Society of London)[\\[582\\]] returned, with illustrator [Robert Ranald McIan], with _The Clans of the Scottish Highlands_ in several volumes 1843–1849, which had inconsistently hand-coloured portraits of chiefs in clan tartans, which he stated were \"acknowledged by the present chiefs and clans\".[\\[399\\]] _The Clans of the Highlands of Scotland_ in 1850 by Thomas Smibert drew heavily on Wilsons' patterns and on Logan.[\\[588\\]] In the same year, _Authenticated Tartans of the Clans and Families of Scotland_ by William & Andrew Smith was based on trade sources such as Wilsons, competing mill Romanes & Paterson of Edinburgh, and army clothier George Hunter's pre-1822 collection of setts (and some consultation with historian [W. F. Skene]).[\\[588\\]] [\\[421\\]] Also in 1850, Gen. James Browne published _History of the Highlands and the Highland Clans_, another _Vestiarium_ knock-off.[\\[582\\]] \n\nIn 1871, Gaelic folklorist and Highland dress fancier [John Francis Campbell] of Islay wrote in _Clan Tartans_:[\\[589\\]] \n\n> I have come to the conclusion that Sir Walter Scott and my friends the Editors of the _Vestiarium Scoticum_ and Scotch Manufacturers of tartans are together responsible for the present flourishing and luxuriant crop of brilliant clan tartans .... I do not believe that the distinctions which are now made as to Clan Tartans ever prevailed at all, till Tartan became an important manufacture in Scotland in the reign of George the 4th\n\nJ. Claude produced the tartan pattern sample book _Clans Originaux_ in Paris c. 1880, and some tartans were adopted from it,[\\[cc\\]] though its 185 samples were mostly of already-known tartans.[\\[19\\]] A second edition of _The Costume of the Clans_ was published in 1892.[\\[592\\]] Another influential book was Donald W. Stewart's _Old & Rare Scottish Tartans_ (1893), which included swatches of fabric; several accepted clan tartans date to this work.[\\[593\\]] \n\nBooks of this era also introduced lists of alleged clan [septs], families of different surnames (often of English, [Norman], or other non-Gaelic derivation) supposedly linked to particular clans as \"extended family\". It was a means of greatly increasing tartan sales by attaching many more names to extant tartan designs, but not well-grounded in any historical reality.[\\[594\\]] [\\[595\\]] [\\[412\\]] Two such works, both published by W. & A. K. Johnston were: _Tartans of the Clans and Septs of Scotland_ by James Grant in 1886, revised by Henry Whyte in 1906 in more of a picture-book format (three tartans make their first appearance in the 1886 edition,[\\[596\\]] and various more in the 1906 version, with no provenance);[\\[597\\]] and _What Is My Tartan? or, The Clans of Scotland, with Their Septs and Dependents_ by Adam Frank in 1896.[\\[399\\]] \n\nThe romanticised notion of clan tartans had become deeply embedded in the Scottish imagination and further afield.[\\[183\\]] [\\[598\\]] \"\\[I\\]t all got mixed up in the public mind and the myth of tartan as a kind of [heraldry] became established, not only in the eyes of outsiders, even the Clansfolk believed it\".[\\[599\\]] On the cusp of the [Scottish Renaissance] and [Gaelic Revival], most clans (including major Lowland families) had been assigned and had generally accepted one or more tartans by the late 19th century.\n\n### 20th century consolidation\n\n\\[ [edit] \\]\n\nCharles E. N. Leith Hay, 1905 portrait by [John Ernest Breun], in Edwardian daywear Highland dress, kilt in a dark rendition of the [Hay and Leith] tartan. Most clan tartans were settled by the turn of the 19th and 20th centuries.\n\nThe first [Edwardian] book on the subject (aside from a larger 1906 \"library edition\" of Whyte as _The Scottish Clans and Their Tartans with Notes_),[\\[597\\]] was Frank Adam's 1908 _The Clans, Septs & Regiments of the Scottish Highlands_, which remains in print today (though in drastically edited form, by Sir [Thomas Innes of Learney]).[\\[399\\]] [\\[600\\]] A variety of books, with colour plates, had been affordably and widely published about clan tartans by the mid-20th century. Three popular ones were _The Clans and Tartans of Scotland_ by Robert Bain, 1938 (the first to use photographic [halftone] prints; revised and updated many times through 1983);[\\[601\\]] _The Tartans of the Clans and Families of Scotland_ by Innes of Learney (later to become the [Lord Lyon King of Arms] as well as a founder of the [Scottish Tartans Society]),[\\[602\\]] 1938, advancing some clan-tartanry ideas his Lord Lyon predecessor Sir [Francis James Grant] considered \"humbug\";[\\[600\\]] and _The Scottish Clans & Their Tartans_ published by W. & A. K. Johnston, 1945 (later editions re-titled _The Scottish Tartans with Historical Sketches_, edited by Innes of Learney), and based on previous works by Grant and Whyte. Many others followed in successive decades.[\\[399\\]] \n\n400 clan and district tartan samples at the headquarters of the weaver Lochcarron of Scotland\n\nThe mass-market books (some with over 200 tartans illustrated) did much to cement the idea of clan tartans in the public imagination, as well as to consistently anchor particular tartans to particular clans. And the works were in more general agreement with one another than had been the Victorian \"authorities\".[\\[cd\\]] They also simultaneously increased the number of clans with their own assigned tartans, and reduced the number of tartans claimed to be those of certain clans to a more manageable number, probably after consultation with clan chiefs and clan society officers. They did, however, typically include sept lists, which today are widely regarded as bogus[\\[594\\]] (though many present-day clan associations still use them, as a means of attracting larger membership).\n\nAlmost every extant clan (with or without a chief) had at least one tartan associated with it by this era. Many clans have several well-accepted tartans. Sometimes they represent different branches of the family; e.g., there are separate tartans for [Campbell] of Breadalbane, Campbell of Cawdor, and Campbell of Loudoun, in addition to the general \"old\" Campbell tartan. In other cases, they are (at least ostensibly) for specific purposes such as hunting, mourning, formal dress occasions, or Highland dance competition; e.g., the [MacFarlane] dress[\\[604\\]] and hunting tartans[\\[605\\]] are different. (See [§ Tartans for specific purposes], below.)\n\nAn important, more scholarly work was 1950's _The Setts of the Scottish Tartans_ by Donald C. Stewart[\\[606\\]] [\\[ce\\]] (son of the aforementioned D. W. Stewart).[\\[399\\]] The younger Stewart has been hailed as \"the founder of serious tartan research\"; originated now-standard methods for indexing tartans; and would go on to help expose the _Vestiarium Scoticum_ as a fraud, in _Scotland's Forged Tartans_, co-authored with J. Charles Thompson in 1980.[\\[399\\]] [\\[608\\]] \n\nIn the late 20th century to present, clan and other tartans also have been catalogued in [databases]. (See [§ Registration], below.) A small number of new official clan tartans (mostly specific-purpose \"side\" tartans, like dance tartans) were registered in tartan databases in the 21st century.[\\[cf\\]] \n\nRegarding modern misrepresentations of clan tartans on historical figures in films and even museums, Scarlett (1990) wrote: \"so widely have the tartan myths been spread that any script- or guide-book writer will, in complete ignorance, write the most arrant nonsense and never think that it might not be true. ... Once false information has been disseminated by a supposedly authoritative body it is virtually impossible to correct it.\"[\\[610\\]] \n\n### Recognition by clan chiefs\n\n\\[ [edit] \\]\n\nThe \"officialness\" of clan tartans has varied widely, and still does today. Although it is possible for anyone to create a tartan and assign it any name they wish, the only person with the authority to make a clan's tartan \"official\" is the chief.[\\[399\\]] \n\nSome clans [have had no chiefs] for some time, while only a majority subset of those with living chiefs in the modern era made direct proclamations as to their clan tartans and registered them with the [Lord Lyon].[\\[cg\\]] Some time after the launch of the [Scottish Register of Tartans] (SRT) in 2009, the Lord Lyon stopped recording clan tartans, deferring to SRT for this purpose. (See [§ Registration], below.) Some of the clan tartans were simply adopted by custom,[\\[ch\\]] and have remained rather consistent into the 21st century. A clan booth at a [Highland games] event is likely to proudly display at least their best-known clan tartan, regardless whether a chief has declared it official.\n\nHowever, some chiefs have been quite adamant about what their clan's legitimate tartans are. Some time prior to 1890, [George Campbell, 8th Duke of Argyll], chief of [Clan Campbell], is said to have specified the main Campbell tartan, as distinct from that of the [Campbell of Cawdor] sett, after a portrait had depicted him in the latter and also supposedly at the prompting of the War Office, perhaps with regard to [Argyll and Sutherland Highlanders] uniforms.[\\[613\\]] [Ian Campbell, 12th Duke of Argyll], Clan Campbell chief in the late 20th century, excoriated attempts to claim there were other than the four aforementioned particular Campbell tartans (and specifically rejected the personal-variant tartan of the 6th Duke).[\\[417\\]] Similarly, [Sir Malcolm MacGregor], chief of [Clan Gregor], wrote in 2012 that only four MacGregor tartans (plus a newer dance tartan) are legitimate, out of 10 or more alleged ones found in a tartan database, which he blamed on \"indiscriminate commercialisation ... disingenuous and lead\\[ing\\] to confusion\".[\\[614\\]] \n\nIn at least one instance, a clan tartan appears in the [coat of arms] of a clan chief and is considered by the Lord Lyon as the \"proper\" tartan of the clan: The crest of the chief of [Clan MacLennan] is _A demi-piper all [Proper], garbed in the proper tartan of the Clan Maclennan_.[\\[615\\]] [\\[ci\\]] \n\nSome chief-authenticated clan tartans are quite late arrivals. In 1961, the [Clan Davidson] main tartan was replaced (and registered with the Lord Lyon) by one of multiple disputed chiefs, Sir David Davidson of Allt Dinnie, with a design dating to 1893, in place of an older white-striped version.[\\[616\\]] Chief [Charles Shaw of Tordarroch] in 1971 replaced the old [Shaw] tartan (a Black Watch variant based on a misprinted image in Logan & McIan (1847))[\\[617\\]] with a new pair (dress[\\[618\\]] and hunting)[\\[619\\]] designed in 1969 by D. C. Stewart based on more historical sources.[\\[620\\]] [Clan Mar] had no approved tartan until Chief [Margaret of Mar] registered one in 1978 (from a design that may pre-date 1850);[\\[621\\]] their dress/red tartan was not adopted until 1992 (from a design dating to the 18th century).[\\[82\\]] The [MacLeod] red tartan was approved by Chief [John MacLeod of MacLeod] in 1982, to join much longer-standing yellow and blue tartans of the clan; it was based loosely on what appears in a 1748 portrait of Chief [Norman MacLeod] by [Allan Ramsay] and [Joseph van Aken].[\\[622\\]] Baron [David Lumsden of Cushnie-Lumsden] in 1996 approved the [Clan Lumsden] hunting sett by Peter Eslea MacDonald[\\[623\\]] (though technically the baron was just the chieftain of the Cushnie-Lumsden branch). In 1998, Chief [Dugald MacTavish of Dunardry] approved a 1958 design as the [MacTavish] dress tartan.[\\[624\\]] In 2005, Chief Gillem Lumsden of that Ilk registered a new main [Lumsden] tartan with the Lord Lyon,[\\[625\\]] based closely on that of a c. 1790 Lumsden family waistcoat.[\\[626\\]] Also in 2005, a pattern for [Duncan of Sketraw] was approved by Chieftain John Duncan of Sketraw, based on a 1930s design.[\\[627\\]] In 2007, Chief [Fergus D. H. Macdowall of Garthland] designed the [Clan MacDowall] tartan (the clan previously used [MacDougall] or [Galloway] district); he registered it with the Lord Lyon and [Scottish Tartans Authority] in 2008.[\\[628\\]] [\\[629\\]] The [Cochrane] hunting tartan was designed personally by Chief [Iain A. D. B. Cochrane, Earl of Dundonald], in 2008.[\\[630\\]] The [Clan Carruthers] tartan was approved by Chief Simon Peter Carruthers of Holmains in 2017.[\\[631\\]] \n\nAside from regimental and clan usage, tartan has seen broad (and sometimes highly politicised) use by the general public in the modern era. By the 19th century, the Highland [romantic revival], inspired by [James Macpherson] 's \" [Ossian] \" poems and the writings of Sir [Walter Scott], led to wider interest in tartan and other things felt to be [Gaelic] and [Celtic]. Clubs like the [Celtic societies] welcomed [Lowlanders], and tartan was rapidly [appropriated] [\\[557\\]] as part of the [Scottish national identity] [\\[632\\]] [\\[633\\]] (and part of broader British dress as a familiar [exoticism]).[\\[634\\]] [\\[635\\]] \n\n\"The New Fashion, or The Scotsman in Paris\", from a series of Parisian fashion prints, 1815\n\nThe period of widened public interest in tartan and Highland dress after the repeal of the Dress Act in 1782 has been called the **Highland Revival**.[\\[32\\]] [\\[cj\\]] While tartan had already seen more nationwide use from 1707, as a [Scottish nationalism] symbol against [union with England],[\\[281\\]] it was turned on its ear to become a romanticised symbol of union loyalism in the early 19th century,[\\[183\\]] [\\[637\\]] an era in which prominent conflicts caused a patriotic influence of military (including Highland) style on civilian clothing,[\\[ck\\]] even among women[\\[351\\]] [\\[640\\]] despite its overtly masculine focus.[\\[641\\]] [\\[642\\]] [\\[176\\]] First among the northern gentry and later among the common people more broadly, there was a renewed interest in tartan and Highland dress, despite the long period of prohibition – largely due to the glory associated with the Highland regiments' exemplary service in various military campaigns.[\\[401\\]] \"Highlandism\"[\\[643\\]] became a romantic, mythologised (even fictionalised) and colourful [escapism] [\\[644\\]] [\\[349\\]] even as Lowland Scotland itself was becoming one of the most [industrialised] places on earth, and the entire nation was undergoing the social upheavals of union and [empire], of large-scale warfare, of urbanisation, and of modernisation during the [Scottish Enlightenment].[\\[645\\]] The bloody [French Revolution] of 1789–1799 had also helped inspire a British setting aside of old [Stuart] and [Hanoverian] rivalry.[\\[646\\]] \n\nBefore the clan tartans rush began in 1815, tartan was already being aggressively marketed to the general public as \"fancy\" cloth with names that commemorated famous events and people, even fictional characters from books and songs, e.g. \" [Waterloo] \", \" [Flora MacDonald] \", \"Sir Walter Scott\", \" [Wellington] \", \" [Maggie Lauder] \", and \" [Meg Merrilees] \". This inspired a novel perception that tartans should be named.[\\[647\\]] Some of the designs by leading weaver Wilsons of Bannockburn by this period were considered recognisable on sight.[\\[648\\]] \n\nIn 1822, Maj.-Gen. [David Stewart of Garth], who was with both the [Highland Society of London] and the [Celtic Society of Edinburgh],[\\[649\\]] [\\[532\\]] published _Sketches of the Character, Manners, and Present State of the Highlanders of Scotland_, the first of a number of 19th-century books lionising the Highlanders, the clans, and the tartaned regiments.[\\[641\\]] [\\[539\\]] The various Celtic/Highland societies throughout Britain had already been driving a rise in tartan demand since the late 18th century.[\\[650\\]] [\\[375\\]] The societies liked wearing Highland dress – in their own assimilated, urban idiom,[\\[cl\\]] such as tartan [frock coats] [\\[355\\]]  – and devising new tartans; it has been suggested that they were engaging in a sort of \"internal colonisation\", imposing what they wanted to see rather than simply recording what was traditionally Highland.[\\[651\\]] Aside from tartan fabric's increasing use in non-Highland styles of clothing, Highland dress itself had already become highly stylised, quite removed from the simplicity of its peasant origins;[\\[652\\]] this was a trend that would continue throughout the later Victorian period.\n\n#### The King's jaunt in tartan\n\n\\[ [edit] \\]\n\n_[George IV in Highland Dress] _. [David Wilkie] 's idealised depiction of [the king], in full [Highland regalia], during his visit to Scotland in 1822[\\[cm\\]] \n\nThe popularity of tartan was greatly increased by the royal [visit of King George IV] of the United Kingdom to Edinburgh in 1822, with other nobles including [Lord Mayor of London] Sir [William Curtis],[\\[654\\]] in Highland garb. George was the first reigning monarch to visit Scotland in 171 years.[\\[554\\]] The pageantry invented for the event, which was nicknamed \"the King's Jaunt\", brought a sudden consumer-driven demand for tartan cloth[\\[503\\]] and made it the [national dress] of the whole of Scotland.[\\[554\\]] [\\[655\\]] [\\[632\\]] [\\[656\\]] The 21 days of festivities were organised by the Jacobitism-romanticising but staunchly unionist[\\[183\\]] Walter Scott, who was another co-founder of the Celtic Society of Edinburgh, and military officer [David Stewart of Garth].[\\[530\\]] They urged Scots (most of whom were Lowlanders) to attend \"all plaided and plumed in their tartan array\"[\\[657\\]] in \"complete national costume\".[\\[655\\]] One contemporary writer sarcastically described the pomp that surrounded the celebrations as \"Sir Walter's Celtified Pageantry\",[\\[657\\]] [\\[658\\]] and another as a \"plaided panorama\".[\\[655\\]] Clan chiefs, expected to be kilted, had little choice but to take the event seriously, and arrived to show their loyalty in something of a panic, with tartaned retinues of half a dozen[\\[276\\]] to up to 50 per clan[\\[659\\]] (equipped at great expense, and with only about a month's official notice), in a city overflowing with Highlanders, Lowlanders, and English spectators decked in tartan,[\\[660\\]] [\\[504\\]] a sight that Scott's own son-in-law and biographer [John Gibson Lockhart] called a \"Celtic hallucination\".[\\[502\\]] Thousands of spectators attended the many events arranged for the visit.[\\[276\\]] The formal ball, reserved for the gentry, required Highland dress for admittance, and some 300 tailors were employed to supply it.[\\[659\\]] \n\nThe royal endorsement of tartan and Highland-wear did much to erase any lingering association of them with the servile peasant class of the Highlands[\\[661\\]] (or the region's bands of mountain bandits, for that matter).[\\[659\\]] Because Scott had become \"the acknowledged preserver of Scotland's past\" through his historical novels, the legend he helped create of tartan and Highland dress as a Scotland-wide tradition rooted in antiquity was widely and quickly accepted, despite its ignoring and erasing of cultural diversity within the country[\\[652\\]] (of Gaels, [Norse–Gaels], [Scoto-Normans], and Lowlanders of largely [Anglo-Saxon] extraction). \"A bogus tartan caricature of \\[Scotland\\] had been drawn and accepted, even by those who mocked it, and it would develop in perspective and colour.\"[\\[662\\]] George IV's visit – which was not just theatrical but thoroughly political, in marrying Hanoverian power and loyalty to Stuart ideology and pride[\\[663\\]]  – has been described in by [Angus Calder] (1994) as the catalyst by which \"a Union of practical convenience became a Union of irrational love and fears, sublimated in militarism, tartanry, royalism and, eventually imperialism\".[\\[664\\]] R. Martin (1988) added: \"it would seem that this visit presages the acts of orchestrated political propaganda that we have come to know very well in the 20th century.\"[\\[557\\]] \n\nPortrait of [John Crichton-Stuart, 2nd Marquess of Bute], by [Henry Raeburn], c. 1829, showing adaptation of tartan to [Regency-era] clothing styles, like this red-lined cloak\n\nFollowing the royal visit, the tartan industry boomed,[\\[665\\]] and the number of available tartans increased tenfold;[\\[666\\]] in 1822, Wilsons' pattern book had numbered setts in the hundreds, and introduced many more with proper names.[\\[514\\]] Scarlett (1990) writes that \"Tartan was no longer the dress of northern barbarians or political dissidents; it had become respectable and the garb of loyal subjects.\"[\\[667\\]] Books which documented tartans began to appear and added to the \" [tartanry] \" craze. [James Logan] 's romanticised work[\\[567\\]] _The Scottish Gaël_ (1831) was the first such publication, and led the weaving industry to adopt new patterns, even Logan's invented or erroneous ones.[\\[566\\]] \n\nThe result of these flurries of attention has been described as an \"astonishing frenzy of excitement into which \\[patronage of tartanry\\] threw the citizens of Edinburgh and much of the rest of Scotland\".[\\[668\\]] \n\nFrom the 1820s, Georgian and then Victorian portraiture of clan nobles continued the earlier theme of regimentally re-styled Highland dress, with jewels, gold, and other symbols of aristocracy – a \"synthetic Gaelicism\".[\\[669\\]] The funerals of [Sir John Macgregor Murray] and [Alasdair Ranaldson Macdonell of Glengarry], in 1822 and 1823 respectively, were marked by tartan, [bagpipes], and \"wailing\" of clansmen – \"a feudal sight in an increasingly industrial age\".[\\[670\\]] A large public tartan affair was the 1824 [Atholl Gathering] [\\[559\\]] (an annual event that, after a period of abeyance, continues to the present). From the end of proscription through the Georgian promotion, \"distrust of the Highlands became fascination\",[\\[671\\]] and tartan and Highland garb \"moved from the periphery to the very center, accompanied by all the processes of forgetting and imaginative re-creation\".[\\[672\\]] Tartan, no longer the everyday traditional dress of Highland \"barbarians\", had become, in altered form, all the rage among the Scottish upper and even middle classes as formal attire.[\\[673\\]] This popularisation of tartan increased its marketability in the Lowlands, in England, and in the colonies, and provided a boost to the Scottish textile industry.[\\[342\\]] \n\nFrench tartan fashions from _Costumes Parisiens_, 1826\n\nTartan had begun making appearances in civilian Georgian fashion throughout Britain and into continental Europe, as illustrated in publications such as London's _[Gallery of Fashion] _ (1787) and _[La Belle Assemblée] _ (1808), and (after Paris was famously occupied by Highland regiments during the [Waterloo campaign] and the fall of [Napoleon] in 1815)[\\[674\\]] [\\[641\\]] [\\[675\\]] in the French periodicals _Le Prétexte_ (1815)[\\[676\\]] and _Costumes Parisiens_ (1826); tartan was in vogue in Paris in particular in this period,[\\[677\\]] [\\[678\\]] and approximations of Highland soldiers even appeared in Parisian plays at the time.[\\[679\\]] Tartans associated with family names became popular, but there was also a brisk trade in new tartans commissioned for societies, to commemorate events, in honour of famous persons, and designed simply to personal aesthetic taste.[\\[680\\]] Manufacturers struggled to keep up with demand.[\\[681\\]] [\\[632\\]] By 1819, dominant tartan weaver Wilsons of Bannockburn[\\[340\\]] (also a carpet and ribbon weaver)[\\[682\\]] was keenly interested in exploiting the civilian market, due to a reduction in regimental demand, and introduced many more patterns, providing cloth in various grades.[\\[683\\]] By 1820, the company had access to 132 [looms];[\\[684\\]] they experienced a four-fold increase in output in 1821, leading up to George IV's visit,[\\[655\\]] after which they acquired 40 more looms[\\[505\\]] [\\[666\\]] in an add-on building,[\\[681\\]] named the Royal George after the king,[\\[505\\]] and expanded into a new mill in 1822, [mechanising] more and more to keep up with demand.[\\[684\\]] They stopped weaving [muslin] to focus on tartan,[\\[681\\]] and produced it in a range of qualities from finest [merino] wool to cheap [linsey-woolsey] blends, demonstrating that whatever high-class associations tartan had taken on, there was significant working-class demand.[\\[685\\]] In 1829, a merchant wrote to Wilsons that \"We are like to be torn to pieces for tartan; the demand is so great we cannot supply our customers\", and there was great demand for the newest patterns.[\\[569\\]] \n\nIllustration of Victorian women weaving at power looms in a textile factory (this one in Denmark, but the scene in Wilsons of Bannockburn at its peak would have been very similar).\n\nGeorgian and later Victorian entrepreneurs not only created new tartans, but new tartan objects called _tartanware_, starting as far back as the proscription period in the form of wine glasses decorated with tartan and enamel Jacobite portraits.[\\[313\\]] Tartan decorated an assortment of common household objects, such as [snuffboxes], jewellery cases, tableware, sewing accessories, desk items, and even doorknobs and furniture – a tartan knick-knack market for tourists that continues through the present in the Highlands.[\\[686\\]] Visitors to the Highlands went home with tartanware, and Scotland-based businesses sent tartanware out as gifts to customers. Some of the more popular tartans used were the [Stewart], [MacDonald], [MacGregor], [MacDuff], [MacBeth], and one fancifully named \"Prince Charlie\".[\\[687\\]] [\\[688\\]] [\\[689\\]] Today, tartanware is widely collected in England and Scotland.[\\[690\\]] There was a symbiotic relationship between tartanware production and interest in tartans generated by books on the subject: a tartanware manufacturer from 1820 onward was W. & A. Smith, of [Mauchline], also incidentally the publishers of _Authenticated Tartans of the Clans and Families of Scotland_ (1850).;[\\[691\\]] tartanware was sometimes more specifically called Mauchlinware.[\\[692\\]] \n\nLeading up to the beginning of [Queen Victoria] 's reign in 1837, tartan was a brisk trade in London, Manchester, and other English cities and towns.[\\[693\\]] In 1839, the [Eglinton Tournament], a [medieval re-enactment] featuring [jousting] and a [ball], was organised in [North Ayrshire] by [Archibald Montgomerie, Earl of Eglinton]; it drew some 100,000 spectators, who had been asked to attend in plaids, and included [George Murray, Duke of Atholl], arriving with an entire regiment in tartan, his newly re-formed [Atholl Highlanders] [\\[694\\]] (which still exists as Europe's last remaining private military force).\n\n_Scene in the Highlands with Portraits of the Duchess of Bedford and Duke of Gordon_ (in various tartans), by [Edwin Landseer], 1825\\. The [Highlands were being cleared] of native people, for deer hunting preserves and sheep pastures\n\n#### _Vestiarium Scoticum_\n\n\\[ [edit] \\]\n\nThe first publication showing colour plates of an array of tartans was the _[Vestiarium Scoticum] _ (meaning 'wardrobe of the Scots'), published in 1842,[\\[576\\]] and it included a first: tartans for Lowland families. It was the work of two brothers: John Carter Allen and Charles Manning Allen, from [Surrey], England, who used a variety of assumed names. The two implied they were grandsons of Prince [Charles Edward Stuart] and [Princess Louise of Stolberg-Gedern], and consequently later became known as the \" [Sobieski Stuarts] \". They claimed further that the _Vestiarium_ was based on a 1571 manuscript on clan tartans – a manuscript which they never managed to produce. It was not known at the time, but many of the tartans were simply invented by the brothers, and others were taken from early-19th-century sources like the Cockburn and Wilson collections.[\\[695\\]] [\\[399\\]] The brothers heavily favoured basic checks, or crudely divided checks, with thin over-checks added; they had an identifiable style of tartans, assessment of which has varied from \"few can be called inspired\"[\\[50\\]] to \"quite novel and singularly gorgeous\".[\\[696\\]] The _Vestiarium_ was followed by their equally dubious _The Costume of the Clans_ in 1845.[\\[566\\]] The books, which \"added mystery, romance and some spurious historical documentation to the subject\",[\\[572\\]] triggered another wave of interest in tartans, and the enthusiasm generated by these publications led the way for numerous tartan books in the later 19th century.[\\[657\\]] [\\[576\\]] [\\[cn\\]] \n\nThe sudden availability (and almost unquestioning acceptance) of Lowland tartans helped spread tartan further in popularity. \"The \\[tartan\\] cult was gathering strength and tartan was no longer 'Highland', it had become 'Scottish'.\"[\\[599\\]] \n\n#### The Queen and \"Balmorality\"\n\n\\[ [edit] \\]\n\nA silk and velvet late-Victorian young woman's tartan dress, 1878, probably made in England\n\nTwenty years after her uncle's royal visit to Scotland, Victoria and her husband [Prince Albert] made their first trip to the Scottish Highlands in 1842; she was the first monarch to set foot in the Highlands since [Mary, Queen of Scots], in the 16th century.[\\[697\\]] The visit involved her large royal party being met with several theatrical tartan-kilted welcomes by Highland nobility and their retinues, with much sycophantic newspaper fanfare (while the common people were experiencing considerable misery); the Queen wrote: \"It seemed as if a great chieftain in olden feudal times was receiving his sovereign\".[\\[698\\]] The monarch's early trips to Scotland were seen as a royal endorsement and had a transformative effect on the image of the country, as a now-loyal land of tartan, [pipers], and kilted martial display.[\\[699\\]] \n\nVictoria and Albert leased [Balmoral Castle], in [Aberdeenshire], in 1848 (and bought it in 1852)[\\[700\\]] as a private royal [demesne] and hired a local architect to re-model the estate in feudalised [Scots baronial] style, starting a \"sham-castles\" trend.[\\[701\\]] Prince Albert personally took care of the interior design, where he made great use of tartan. He used the royal Stewart (red) and the hunting Stewart (green) tartans for carpets, while using the dress Stewart (red and white) for curtains and upholstery.[\\[577\\]] Prince Albert (who often wore the kilt at Balmoral) is said to have created the Balmoral tartan, still used as a royal tartan today.[\\[702\\]] (See illustration at [§ Family and individual], below.) They even decorated their [carriage] with tartan.[\\[703\\]] [\\[704\\]] Their adoption of a showy form of Highland dress inspired adoption by subject \"who would have previously left Highland dress to the festivals of the Scots.\"[\\[692\\]] \n\nThe royal couple spent a considerable amount of time at their Scottish estate (nearly 7 years in total),[\\[705\\]] [\\[co\\]] and in doing so hosted \"Highland\" activities. Victoria was attended by pipers,[\\[707\\]] and her children were attired in Highland dress.[\\[708\\]] Prince Albert himself loved watching the [Highland games] [\\[709\\]] and the pair became patrons of the [Braemar Gathering].[\\[710\\]] (Support from and attendance by various nobles may have helped preserve such events to the present, but it also \"tartanised\" them permanently, all the way into the 21st century.)[\\[711\\]] The royal enthusiasm for and patronage of Highland things generated more [early tourism] to the Highlands,[\\[587\\]] [\\[712\\]] [\\[cp\\]] and a boost to business in the region as far as [Perth] and [Edinburgh].[\\[714\\]] It also spread tartan-wearing to other northern British lords and ladies, who began to invent complicated etiquette rules of dress for Highland garb, which had the effect of increasing the sense that it was upper-class attire.[\\[183\\]] [\\[715\\]] [\\[599\\]] (See [§ Etiquette], below.) Adoption of tartan continued to spread into England; [Thomas Osborne, Duke of Leeds], in [West Yorkshire], devised a [livery] tartan for his men in 1848.[\\[716\\]] Tartan, though a \"pseudo- [Caledonian] masquerade\",[\\[717\\]] had become \"the stuff of loyalty to the crown\",[\\[718\\]] with \"a spurious royal and aristocratic cachet\".[\\[719\\]] This royal promotion was also noted abroad, with the effect that tartan became one of the widest-recognised [cultural-identity] symbols for the entire British country.[\\[720\\]] \n\nDespite their considerable devotion to charity (up to 20% of their [Privy Purse] income),[\\[721\\]] Victoria and Albert, along with their friends in the northern gentry, have been accused of using their \"Balmorality\" – a term coined by [George Scott-Moncrieff] (1932) to refer to upper-class appropriation of Highland cultural trappings, marked by \"hypocrisy\" and \"false sentiment\" – to trivialise and even fictionalise history.[\\[722\\]] According to Fiona K. Armstrong (2017), they engaged in long-term, tartan-blanketed escapism from the uncertainties of modernising, industrialised society and from pressing British societal problems, while worsening those problems in the actual Highlands.[\\[722\\]] The queen's Balmoral residency also had another detrimental effect on the Scottish Highlands; inspired by her residency, aristocrats who lived outside the Highlands began purchasing estates in the region, resulting in land-ownership disparities that persist into the present day.[\\[723\\]] The Highlands during Victoria's reign also became more accessible by road, rail, and boat.[\\[709\\]] \n\nA late Victorian style, this two-piece tartan suit dates to about 1875–1880\n\nAs the tartan and \"romantic Highlands\" craze swept over Scotland, the real Highland population suffered grievously from the [Hungry Forties] as well as the [Highland Clearances], when thousands of Gaelic-speaking Scots from the Highlands and Isles were evicted by landlords (often the very men who would have been their clan chiefs) to make way for sheep[\\[657\\]] [\\[724\\]] and for expansive deer-hunting preserves.[\\[725\\]] Scots were also largely disenfranchised from voting, and the Highlands were running out of young men, in great regimental demand to fight and die in foreign wars for the empire, and many emigrating otherwise,[\\[726\\]] with Victoria and Albert directly patronising [emigration societies].[\\[727\\]] Nearly 2 million Scots moved to non-European destinations during the Victorian era (more than half the native-born Scottish people of the period), and took a measure of Highlandism with them[\\[728\\]]  – \"many of the generally understood images of the Highlands were held to be 'real' by people at the time\".[\\[729\\]] This would have strong tartan-promoting results among the [Scottish diaspora] later;[\\[730\\]] Scarlett (1990) calls it a \"tartan hunger that has been abroad from late Victorian times to the present day\".[\\[731\\]] \n\n[Thomas Babington Macaulay] wrote in 1848 of the romantic reinvention of Highland customs as somehow _generally_ Scottish: \"Soon the vulgar imagination was so completely occupied by [plaids], [targets], and [claymores], that, by most Englishmen, _Scotchman_ and _Highlander_ were regarded as synonymous words.\"[\\[732\\]] In 1849, Sir [John Graham Dalyell] asserted that \"forty years ago no reputable gentleman would have appeared in a kilt in the streets of Edinburgh.\"[\\[733\\]] Scott-Moncrieff (1932) likewise wrote of tartans being \"misconceived\" and worn all over Scotland (and even England) in the Victorian era as a part of the Queen's influence.[\\[734\\]] Increasingly-urban Scotland was putting on a \"rural face\"[\\[735\\]] (a trend that would continue with \" [kailyard] \" literature). Tartanry and Highlandism were popular in part as a counter to a sense (especially among the aristocracy) that Scotland was losing its separate national identity in the Georgian to Victorian era, being ever more [Anglicised] as just \"North Britain\" amid empire-wide modernisation.[\\[736\\]] \n\n[Kenneth MacLeay] 's 1866 portrait of a [MacLachlan], a [Graham], a [MacFarlane], and a [Colquhoun], for Victoria's _Highlanders of Scotland_ book project.\n\nIn an 1849 letter to a publisher about a planned second edition of _Vestiarium Scoticum_, John Sobieski Stuart noted that tartan had become \"extensively worn and manufactured\" on the continent, as far away as France, Germany, [Bohemia], and Hungary; he also expressed an interest in working directly with tartanware and tartan book makers W. & A. Smith of Mauchline.[\\[737\\]] The same year, the Duke and [Duchess of Atholl] (whose entire estate was prescribed tartan livery)[\\[738\\]] hosted a Highland-dress affair in London, the [Royal Caledonian Ball], the first known charity ball (still a sold-out annual event today).[\\[739\\]] The 1859 opening of the massive [Loch Katrine] waterworks (to pump fresh water to [Glasgow], running out of [well] water) was attended by Queen Victoria, with the Atholl Highlanders (cannon in tow), the Celtic Society of Glasgow, and an [honour-guard] unit called the Glasgow Volunteers putting on a tartan- and piper-laden display for the newspapers; it was a confluence of modern engineering and romantic–patriotic tartanry.[\\[740\\]] When the Prince Consort died in 1861, Victoria commissioned a tartan-kilted statue of Albert at Balmoral by [William Theed].[\\[741\\]] \n\nAccording to Jonathan Faiers (2008), Victoria had actually intentionally made tartan more popular for the benefit of the British textile industry.[\\[742\\]] By the 1860s, tartan was not only as popular in London as in Scotland,[\\[743\\]] leading weaver Wilsons of Bannockburn produced £80,000 of product per year, and employed 500–600 people. (It amalgamated with another of the family businesses, a carpet-weaving operation, in 1867, which continued to 1924.)[\\[275\\]] Around 1860, new [synthetic aniline dyes] allowed for production of tartans in vivid colours at more affordable prices, and their lower cost translated into more consumption of tartan by the middle class.[\\[707\\]] \n\nThe first permanent colour photograph, by [Thomas Sutton] in 1861, was of a tartan ribbon.\n\nAs modernisation marched on, the world's first permanent [colour photograph], taken by [Thomas Sutton] (using the three-colour process developed by Scottish physicist [James Clerk Maxwell]) in 1861, was of a tartan ribbon.[\\[744\\]] It was created by using red, blue, and yellow filters to create three photographs which were then combined into a composite. R. Martin (1988) notes that there was a confluence of unrelated technological \"junctions and serendipities\" in the mid-19th century that together broadly promoted tartan, including photography, consistently bright and more economical artificial dyes, affordable [colour book printing], mass-production of soft but durable fine textiles, and applicability of printed patterns to middle-class products like tartanware – all \"far-removed from the true peasant history of tartan.\"[\\[745\\]] Ian Brown (2012), a professor with a focus on Scottish literature and culture, has written that while George IV and Victoria (not to mention business interests in their wake, like the Wilsons of Bannockburn and the Smiths of Mauchline) seemed to have been \"the winner taking over the loser's tokens\", the renewed public interest in tartan within and beyond Scotland was not entirely owing to them, especially given the international interest in Highland-romantic works of Walter Scott and \"Ossian\". The acceptance of and even enthusiasm for tartan among the post-proscription upper class can be seen as a necessary attempt at reconciliation within a culturally diverse country, and the influence ran both ways, with old Scottish nationalism transmuting into a new unionism that demanded recognition of Scottish interests and institutions. \"In short, it is an open question whether George IV in a kilt and Victoria and Albert at Balmoral are appropriating and subverting a set of values, or whether they are being appropriated and subverted.\"[\\[746\\]] Even the 1822 \"King's Jaunt\" had been stage-managed by two Scots with a keen interest in romanticising and promoting Gaelic and broader Scottish culture (historico-traditional accuracy notwithstanding),[\\[652\\]] and the Atholls' deep and tartan-arrayed involvement in Victoria's activities in the north can be viewed in the same light.[\\[747\\]] Both George IV[\\[748\\]] [\\[749\\]] [\\[750\\]] and Victoria,[\\[751\\]] primarily of German [House of Hanover] stock, came to identify strongly with their quite thin Scottish [House of Stuart] genealogy.\n\n[Prince Arthur] dressed up as [Bonnie Prince Charlie] for the 1871 Waverley Ball\n\nThe 1863 funeral of the Duke of Atholl was another anachronistically feudal, tartan-and-pipers pageant.[\\[752\\]] In 1866–1870, Victoria and the Duchess of Atholl commissioned artist [Kenneth MacLeay] in Edinburgh to produce a series of [watercolours] of statuesque men in tartan Highland gear, representing common people from [ghillies] to shepherds and fishermen, \"as they _now_ are\". Prints were published in 1870 as _Highlanders of Scotland: Portraits Illustrative of the Principal Clans and Followings, and the Retainers of the Royal Household at Balmoral_, with text by Amelia (Emily) Murray MacGregor, an attendant of Victoria as well as a [Clan Gregor] historian and the first female Gaelic lecturer. A tartanistical fantasy, as well as another exercise in \"Highlander as [noble savage] \", the art book necessitated canvassing Scottish aristocrats for outfits and suitable models (\"specimens\"), as the everyday people did not look the hyper-masculine part, were not able to afford such Highland-dress extravagances as were to be illustrated, and were more likely to be wearing trousers than kilts.[\\[753\\]] The resulting book is the most detailed record of the \"proper\", codified Victorian-era Highland dress and accessories, which \"removed tartan from its blustery nonchalance to an ordered set of adornments\"[\\[743\\]]  – most of which survive to the present, Highland dress being remarkably resistant to further major stylistic changes, Victorian styles having become \"traditional\". Tartan had also become more established throughout the 1850s and 1860s as a textile for European-fashionable rather than Highland women's clothing, from [bodices] and dresses to [sashes] and shawls (the never-extinguished ladies' plaids).[\\[743\\]] The tartan sash in particular was a favourite of the Queen,[\\[743\\]] and remains a common womenswear option, worn several different ways in modern Highland dress,[\\[754\\]] though it has little to do with original Highland clothing before the 19th century; it is an adaptation of the plaid to a style of the European nobility.\n\nIn 1871, at the Waverley Ball, a [fancy dress] affair in London, the Prince of Wales (the future King [Edward VII]) and his brother [Prince Arthur], long accustomed to Highland dress, arrived tartaned out as an old-time [Lord of the Isles] and as [Bonnie Prince Charlie], respectively.[\\[755\\]] In 1872, [ethnologist] Jacob Falke wrote that \"In Scotland indeed the plaid has still some importance, but it is an object of manufacture, and ... its motives have long ago become the common property of fashion, and indeed have become so permeated by it that what is genuine and old in it is scarcely to be recognised\".[\\[756\\]] Since its 1880 re-opening, the Gaelic Society of Perth in the Lowlands held festivities that involved much piping and tartan-wear, into the early 20th century, despite the language-preservation organisation having nothing to do with Highland dress or _[pibroch] _; being swathed in tartan had somehow become vital to such events.[\\[757\\]] By 1883, Highland dress as proper [courtly] attire had become highly regulated, aristocratic, and formal, but \"inclusive\" in one sense – the tartan-wear was permitted at court for essentially anyone claiming Highland origins or land-ownership (even if natively English), not just the gentles of the well-established clans.[\\[758\\]] \n\nIn the Victorian era, tartan garments for women as well as men continued to be featured in fashion catalogues, in styles not derived from Highland costume, such as everyday suits and dresses.[\\[587\\]] Tartan had also become popular for children's clothing in continental Europe,[\\[759\\]] inspired by the royal children of Victoria.[\\[760\\]] In the United States, tartan was similarly worked into school uniforms, especially [at Catholic schools].[\\[761\\]] The late 19th century saw tartan (sometimes in silk) in fashion throughout Europe, including in France (e.g. Paris, Lyon, and Alsace) and Italy,[\\[762\\]] and as far from Britain as Russia.[\\[763\\]] (See [c. 1855 French master weaver's illustration] of complex tartan-making.) Founded in 1898, [Walker's Shortbread] has long been sold in [royal Stewart tartan] packaging around the world (especially for [Christmas] and [Hogmanay]).[\\[764\\]] \n\n### 20th century to present\n\n\\[ [edit] \\]\n\n[Edward, Duke of Windsor], in a tartan necktie, 1945\n\nIn the [Edwardian era], tartan had become less a component of men's clothing (with the decline in kilt-wearing) but more an important part of women's fashion,[\\[76\\]] including fanciful _[haute couture] _ designs [from Paris] that had no connection to Highland style,[\\[587\\]] and many accessories such as [petticoats], [stockings], and [blouses]; masculine accessories included [braces (suspenders)], [neckties], [cummerbunds], and socks.[\\[76\\]] \n\n[Edward VII] himself had grown up wearing Highland dress frequently.[\\[708\\]] There was also in this period into the 1920s a market for Highland-dress etiquette booklets, which tied into the era's \"dress sense\" of decorum and class[\\[426\\]] (see also [§ Etiquette], below). Because of its associations with the British aristocracy, Scottish clans, and Highland military, tartan had developed an air of dignity and exclusivity.[\\[765\\]] Because of this, tartan was to make periodic resurgences in the world of fashion. The tartan uniforms of the [Scottish Regiments] were an important recruiting tool during World War I; as [Archibald Primrose, Lord Rosebery], put it: \"there is nothing so magnificent in our army as the swing of a kilted regiment\".[\\[766\\]] Tartan's Georgian re-orientation as a symbol representing unionism and empire continued well into the first half of the 20th century,[\\[767\\]] though outright tartanry and Highlandism on the part of the upper class waned, especially after about 1920.[\\[768\\]] Nevertheless, [Edward VIII], later Duke of Windsor, was a life-long devotee of tartan, often wearing more than one at a time.[\\[587\\]] \n\n[Catholic school uniform] skirts, using a wide variety of tartans\n\nTartan patterns (often simple, unnamed ones) remained commonly used for skirts and [pinafore dresses] (jumper dresses) in Catholic and other private [school uniform] codes in North America and also in public and private schools in New Zealand. The style spread to many other places, including South America, [Japan] [\\[685\\]] (which sometimes imports tartan directly from Scotland),[\\[769\\]] and Hong Kong.\n\n[Harry Lauder] in one of his Highland outfits, 1922\n\n[Harry Lauder] (properly Sir Henry – he was knighted for his [war-effort] fundraising during World War I) became world-famous in the 1910s and 1920s, on a [dance hall] and [vaudeville] entertainment platform of tartan Highland dress, a thick [Scots] accent, and folksy songs about an idealised, rural Scotland, like his hit \" [Roamin' in the Gloamin'] \". At one point, he was the highest-paid performer in the world, and toured the United States, Australia, South Africa, and of course the UK to sold-out audiences. A Lowlander himself, Lauder has been credited with (and blamed for) keeping alive a tartanry-and-Highlandism image of Scotland, with critics calling him a \"kilted clown\" who promoted the idea of Scotsmen \"clothed like the chieftain of Clan McCrazy\".[\\[770\\]] \n\n#### Diaspora and globalisation\n\n\\[ [edit] \\]\n\nBy the mid-20th century,[\\[771\\]] annual [Highland games] events, modelled on the traditional events in Scotland, had been established not just in Scotland but throughout the United States, Canada, Australia, New Zealand, and South Africa, among other places with a notable [Scottish diaspora], which totals about 50 million people worldwide.[\\[772\\]] There are dozens of such events in Scotland,[\\[773\\]] and at least 260 annual Highland games events worldwide as of 2000,[\\[774\\]] more than 100 of them in the US alone, and dozens more in Canada.[\\[773\\]] They are closely intertwined with [bagpipe] band competitions (which date to 1781), a lasting source of tartan imagery in their regiment-inspired Highland uniforms.[\\[775\\]] \n\n[Massed bands] at the [Glengarry Highland Games], Maxville, Ontario, Canada, 2006\n\nThe games' rather flamboyantly[\\[776\\]] tartaned [subculture] is sustained outside Scotland primarily by multi-generational Scottish descendants rather than by direct Scottish expatriates.[\\[777\\]] [\\[778\\]] \n\nMystic Highland Pipe Band at [Tartan Day] parade, New York City, 2002\n\n[Tartan Day], an annual [symbolic ethnicity] holiday among the Scottish diaspora, is a growing affair celebrated on 6 April, the date on which the [Declaration of Arbroath] was signed in 1320. Tartan Day was first declared in [Nova Scotia] in 1987, and was essentially nation-wide in Canada by the 1990s. It has since spread to Australia (with varying levels of official recognition, 1989–1996), the US (1998), and other places including New Zealand,[\\[779\\]] and even Argentina[\\[780\\]] and Paris, France.[\\[781\\]] In [New York City], it has turned into an entire Tartan Week since 1999, with honorary \"grand marshals\" that are usually Scottish celebrities.[\\[782\\]] \n\nThe term _tartanism_ (as distinct from _tartanry_) has been coined by Ian Brown (2012) for this international tokenisation of tartan as an [ethnic-identity] symbol, evolving to some degree independently to suit diasporic cultural needs and unrestrained by the views of the originating Scottish \"home\" culture.[\\[783\\]] According to Ian Maitland Hume (2001), tartan and the kilt are powerful symbols that \"encapsulate many facets of a heritage which people aspire to access ... a part-mythical family origin for those seeking roots\".[\\[784\\]] \n\nThe [Scottish Tartans Museum and Heritage Center] was opened by the [Scottish Tartans Society] in 1988 in [Highlands, North Carolina]; in 1994, it moved to nearby [Franklin]. The museum, which runs independently of STS, features over 600 tartans on display, including specimens dating to c. 1725, and Highland dress examples to ca. 1800.[\\[785\\]] (STS also operated a Scottish Tartans Museum in Edinburgh,[\\[786\\]] but it closed when STS did in 2000.) A major exhibition on tartan was produced by the [Fashion Institute of Technology] in New York 1988–89, and another was created for the [Edinburgh Festival] in 1989.[\\[787\\]] Others followed in Italy in 2003, and Japan in 2018.[\\[788\\]] In April 2023, the Victoria and Albert Museum of Dundee ( [V&A Dundee]) opened a design exhibit (running until January 2024) about tartan and its \"shifting context\", with goals of \"challenging preconceptions of what tartan is, whether that be from a historical sense or fashion sense\".[\\[789\\]] [\\[790\\]] \n\nD. Gordon Teall of Teallach, of the Scottish Tartans Society, observed in 1994:[\\[791\\]] \n\n> Tartans have always formed part of Scotland's historic heritage and it is a compliment to their country that they have become so widespread throughout the English and Gaelic speaking world. They are probably more popular now than they have ever been because they have come to symbolise the spirit of families, clans and districts and, more recently, corporate institutions.\n\nEven as tartan has been bent to the cultural needs of the diaspora, as \"the most straightforward and outward sign of ... affinity with Scottishness\", and bent to the commercial intents of fashion, tourism, entertainment, and other industries, tartan's reception by native Scots in Scotland has been less favourable for decades, even the last century or so. Reasons include a feeling that it is not really a symbol of broad Scottish national identity because of its specifically Gaelic and Highland origin; the \"Highlandist\" and imperialist foisting of it on the entire country as national costume in the late Georgian through Victorian eras; distorted views of Scottish people promulgated by Lauder and other tartaned entertainers of a century ago; an academic view of tartary and Lowland [kailyard literature] as two halves of a low-brow, romanticising vulgarity (reinforced in recent decades by the \" [Tartan Army] \" fandom of the Scotland national football team reinvigorating a working-class attachment to kilts and tartan); and historically inaccurate portrayal of Scotland by tartan-heavy Hollywood productions like [_Brigadoon_] (1954) and _[Braveheart] _ (1995).[\\[792\\]] Brancaz (2016) argues that \"looking at tartan through the lens of the intelligentsia fails to account for its enduring appeal and resilience. ... \\[T\\]he wearing of kilts and tartans at weddings, funerals, and _[cèilidhs] _ in Scotland has increasingly been interpreted as a form of cultural reappropriation.\"[\\[793\\]] \n\n#### Industry and politics\n\n\\[ [edit] \\]\n\nIn 2006, the [British Ministry of Defence] sparked controversy when it allowed foreign woollen mills to bid for the government contracts to provide the tartans used by the Scottish troops (newly amalgamated as battalions into the [Royal Regiment of Scotland]), and lowered the formerly very high standards for the cloth.[\\[225\\]] \n\nFollowing a [bill] submitted in the [Scottish Parliament] in February 2007,[\\[794\\]] Scotland's [enterprise minister] announced in July 2007 that the [National Archives of Scotland] would set up a national register of tartans.[\\[795\\]] The announcement stated that \"Tartan's importance to Scotland cannot be overestimated. It is deeply embedded in Scottish culture and is an internationally recognised symbol of Scotland.\"[\\[795\\]] This was later reiterated in 2013 through the BBC.[\\[796\\]] The ministry cited an industry report indicating that \"the tartan industry is a significant contributor to the overall [Scottish economy]; and larger ... than suggested by previous industry estimates\", and is the basis for some 200 businesses, 4,000 jobs, and £350 million in annual [GDP] in Scotland.[\\[795\\]] The bill passed in October 2008, and the [Scottish Register of Tartans] launched in February 2009.[\\[794\\]] (See [§ Registration], below.)\n\nGeneral tartan-pattern clothing shot up in popularity again starting around 2010\n\n_[The Observer] _ reported in 2010 that tartan clothing had become more popular than ever before, crossing subcultural, social-class, and age-group lines, and showing in that year a 540% sales increase in Britain from only two years earlier.[\\[797\\]] Around the same time, there began a resurgence in tartan kilt wearing among Scottish young people \"as a mark of a vibrant, modern Scotland\".[\\[798\\]] [\\[799\\]] [\\[800\\]] [\\[801\\]] This has interrupted a generations-long trend of native Scottish disaffection toward tartan as stereotyping [kitsch].[\\[802\\]] [\\[803\\]] An online survey by [BBC] in 2012 found that 52% of respondents strongly or very strongly disagreed with the premise \"Walter Scott's re-branding of all Scots as tartan-wearing Highlanders has been a hindrance to Scotland's cultural development\", and only a third agreed.[\\[804\\]] Tartan in mainstream, international fashion experienced another resurgence starting in 2019.[\\[805\\]] \n\nContemporary [Scottish nationalism] has been said to be \"fed, in part, by tartan and Jacobite nostalgia\".[\\[806\\]] After avoidance of tartan since the 1970s (especially by Scottish [liberals]),[\\[807\\]] the cloth has been politicised again as a nationalist symbol (as it was in the early 18th century), especially during the [2014 Scottish independence referendum] and in the [Scottish National Party] 's 2015 campaign.[\\[800\\]] (Perhaps owing to this messaging shift, the VisitScotland agency around the same time changed its tourism advertising to minimise, though not eliminate, tartan imagery.)[\\[808\\]] [Murray Pittock] (2002) writes that the [neo-Jacobitism] is \"both irritating kitsch and a language of identity\" for modern Scots.[\\[809\\]] After several decades of intellectual hostility toward tartan (e.g. in [Tom Nairn] 's 1977 _The Break-up of Britain: Crisis and Neo-nationalism_, and [Hugh Trevor-Roper] 's posthumous 2008 _The Invention of Scotland_), an \"academic re-assessment of tartan\" began in the early 21st century, relying on a wider range of early and modern source material,[\\[810\\]] in [historiographical], multidisciplinary [edited volumes] including _Scottish History: The Power of the Past_ (eds. [Edward J. Cowan] and [Richard J. Finlay], 2002) and _From Tartan to Tartany_ (ed. Ian Brown, 2010).\n\nMajor commercial weavers (tartan mills) of traditional tartan cloth that are operating today include Lochcarron of Scotland[\\[811\\]] [\\[812\\]] in [Lochcarron] and [Selkirk]; Ingles Buchan in [Glasgow];[\\[399\\]] [\\[812\\]] House of Edgar (also a Highland dress vendor, and a subsidiary of Macnaughton Holdings) in [Perth];[\\[399\\]] Johnstons of [Elgin] (also a wool clothing maker),[\\[399\\]] Strathmore Woollen in [Forfar],[\\[399\\]] and D. C. Dalgliesh in Selkirk,[\\[105\\]] all three of which are now part of the Edinburgh-based Scotweb, under the [trade name] Clan;[\\[813\\]] Prickly Thistle (also a women's clothing maker) in [Evanton] and [Edinburgh];[\\[814\\]] The Tartan Weaving Mill (also a weaving museum, and a subsidiary of Gold Brothers) in Edinburgh;[\\[815\\]] Andrew Elliot Ltd in [Selkirk]; Stevens & Graham (specialising mostly in tartan rugs and carpet) in [Rutherglen]; Marton Mills in [West Yorkshire], England; [Cambrian Woollen Mill], in Powys, Wales; West Coast Woollen Mills in [Vancouver], British Columbia, Canada;[\\[816\\]] GK Textiles in [Port Moody], BC (formerly Fraser & Kirkbright, Vancouver);[\\[817\\]] and [Pendleton Woolen Mills] in Portland, Oregon, US.[\\[216\\]] The modern trade in wool tartan fabric has three principal markets: Highland dress, high fashion (with significant business from France and Italy), and furnishing.[\\[818\\]] [\\[19\\]] \n\nPopular tartans (including for kilts and other Highland dress, as well as for school uniforms) have increasingly been manufactured, primarily in the UK, in [poly-viscose] (PV),[\\[819\\]] a blend of the artificial materials [polyester] and [viscose] (rayon), typically in a 65% polyester to 35% viscose ratio.[\\[820\\]] [\\[821\\]] PV is promoted as washable, durable, crease-resistant but heat-settable for permanent pleating, shrinkage-resistant, stain-resistant, colour-fast, low- [pilling], hypoallergenic, not attractive to [clothes moths], more \"breatheable\" than polyester (thus good for athletics), lower cost than wool, and lighter weight than wool, but said to have a wool-like texture.[\\[822\\]] [\\[823\\]] [\\[824\\]] [\\[825\\]] [\\[826\\]] It also does not rely on animal industry, so it appeals to [vegans].[\\[822\\]] [\\[823\\]] Large-scale global manufacturers of tartan-patterned cloth in a variety of cotton, polyester, viscose, [nylon], etc., materials and blends include Başkan Tekstil in [Istanbul] and [Bursa], Turkey; and Jeen Wei Enterprises in [Taichung], Taiwan; while a leading maker of tartan [ribbon] is Satab in [Saint-Just-Malmont], France.[\\[827\\]] Tartan designs have long been produced in low-cost [cotton] in large quantities in China.[\\[816\\]] \n\nCarol Craig (2003) writes: \"Like it or not, tartan is a very sophisticated branding and marketing tool for Scotland.\"[\\[828\\]] In a tartan-as-marketing analysis, Paterson (2001) observed that continued internationalisation of tartan manufacture, design, and consumption has diluted the associative \"Scottishness\" of tartan and its value as a national identifier. He blames this in part on Scottish weavers' failure to adapt to market demands for a wider range of fabric applications, as well as the businesses' own complicity in broadening tartan's perceived cultural identity, e.g. in creating tartans for non-Scottish families, places, and organisations.[\\[829\\]] \n\n(For particular 20th-century to present-day tartans, see also [§ Corporate and commercial] and [§ Fashion], below.)\n\nScene from 1954 _[Brigadoon] _ film, with kilts and tartan trews\n\nIn 1947, the tartan-laden Broadway musical _[Brigadoon] _ (followed by [a film version] in 1954 and a television adaptation in 1966) renewed an excessively romanticised notion of the Highlands and Highland dress. A critical review called it a \"whimsical dream-world\" that was \"overloaded with Hollywood-Scottish trappings\".[\\[830\\]] (The production is generally not well received by actual Scots.)[\\[831\\]] [\\[832\\]] \n\nTartan suits were popular in the [mod subculture] of Great Britain of the early to mid-1960s and its late 1970s [revival].\n\n\" [Tartan Army] \" Scottish football fans at a match in Milan, Italy, in 2005\n\nSince the 1970s, the fandom of the [Scotland men's national football (soccer) team] have been collectively referred to by the nickname \" [Tartan Army] \", with fans often sporting tartan clothing (including kilts) at matches.\n\nThe [Bay City Rollers] in the Netherlands in 1976, sporting some tartan shirts and a tartan-trimmed jacket\n\nPopular in the mid-1970s, Scottish teeny-bopper band the [Bay City Rollers] were described by the _[British Hit Singles & Albums] _ reference book as \"tartan teen sensations from [Edinburgh] \".[\\[833\\]] \n\nA German [punk] wearing a piece of the [royal Stewart tartan], 1984\n\nTartan became a common element of [punk subculture] starting in the late 1970s. [Punk music] was a way for youth in the British Isles to voice their discontent with the [ruling class] and with modern society. The unorthodox use of tartan (especially the [royal Stewart]), which had long been associated with authority and [gentility], was then seen as an expression of that discontent. In this way, tartan – worn unconventionally – became an [anti-establishment] symbol. This was entirely on purpose according to [Vivienne Westwood], a designer deeply involved in early punk fashion;[\\[765\\]] [\\[834\\]] the idea was \"to seize the very fabric of the Establishment in order to reverse its meaning and perhaps to challenge society's design.\"[\\[835\\]] American punks often wore tartan skirts, a \"subversion\" of the Catholic school-girl uniform, and kilts have also been worn in the punk scene since the late 1970s, especially in the UK.[\\[801\\]] Baggy tartan pants later proved popular among [pop-punks] and [skate punks], and tartan-lined jackets among [ska punks]. (For further information, see [Punk fashion].) From the late 1990s, kilts (mostly modernised \" [utility kilts] \" but sometimes traditional ones) have become relatively popular even in North American post-punk subculture (e.g. the [goth] – [industrial], [emo], and [steampunk] scenes), though often in black rather than tartan.\n\nAfter the 1970s, Westwood, who continued to work extenstively with tartan, was joined by other big-name _couturiers_. These included [Ralph Lauren] and [Laura Ashley], whose designs promoted tartan as a mainstream modern clothing option \"with traditional grace and style\" for both women and men;[\\[811\\]] [\\[835\\]] [Stephen Sprouse],[\\[835\\]] credited with a 1980s combination of \"uptown sophistication in clothing with a downtown punk and pop sensibility\";[\\[836\\]] and later [Alexander McQueen],[\\[837\\]] who was \"consciously repoliticising the cloth\".[\\[319\\]] Others have included [Jean Paul Gaultier], [Tommy Hilfiger] (who made tartan central to his fall 2000 collection), [Christian Lacroix], [Yves Saint Laurent], [Giorgio Armani], and [Gianfranco Ferré].[\\[838\\]] [\\[835\\]] A tartan outfit designed by Westwood featured on a [commemorative UK postage stamp] issued by the [Royal Mail] in 2012 celebrating \"Great British Fashion\".[\\[839\\]] \n\nTartan/plaid [flannel] shirts, emblematic of the working class, re-entered mainstream fashion through a series of [subcultural] adoptions, originating primarily in the western United States. First, the style became a staple of [cholo] style in and around Los Angeles, from the 1970s. From there, the style later became adopted by [hip hop fashion] in the 1990s, especially the [West Coast hip hop] lifestyle.[\\[840\\]] Tartan flannel shirts also became quintessentially part of (and androgynous within) the [grunge] scene (starting in Seattle) of the late 1980s to 2000s.[\\[841\\]] There was fashion cross-pollination between these youth-culture movements,[\\[842\\]] and the fashion industry has found this confluence very marketable.[\\[843\\]] \n\nA resurgence of interest in tartan and kilts (and even Scottish tourism)[\\[832\\]] [\\[844\\]] has been generated in recent times by major Hollywood productions[\\[845\\]] like the [_Highlander_ franchise] (1986–2007),[\\[846\\]] [\\[845\\]] _[Four Weddings and a Funeral] _ (1994),[\\[847\\]] _[Braveheart] _ (1995),[\\[848\\]] [\\[849\\]] [\\[850\\]] _[Rob Roy] _ (1995),[\\[848\\]] [\\[850\\]] [\\[851\\]] _[Brave] _ (2012),[\\[852\\]] and the television series _[Outlander] _ (2014–, with a follow-on [travelogue documentary] series, _[Men in Kilts] _).[\\[853\\]] Many of these featured custom-designed tartans.[\\[846\\]] [\\[854\\]] \n\nTartan clothing has appeared frequently in _[Doctor Who] _. The [Fourth Doctor] ( [Tom Baker]) wore a [Wallace] tartan scarf on _[Terror of the Zygons] _,[\\[855\\]] and his robot-dog companion [K9] had a tartan collar.[\\[854\\]] The [Sixth Doctor] ( [Colin Baker]) had a signature patchwork frock coat that included segments in three different tartans, and also typically wore a tartan waistcoat in a fourth sett under it.[\\[856\\]] The [Seventh Doctor] ( [Sylvester McCoy]) wore a crimson and black tartan scarf on _[Time and the Rani] _. [Clara Oswald] ( [Jenna Coleman]), the companion of the [Eleventh Doctor] ( [Matt Smith]) and the [Twelfth Doctor] ( [Peter Capaldi]), wore a [Campbell] tartan dress on \" [The Name of the Doctor] \" and a Wallace skirt on \" [The Time of the Doctor] \" and \" [Deep Breath] \".[\\[857\\]] [Annabel Scholey] as Claire Brown, in the [Thirteenth Doctor] ( [Jodie Whittaker]) serial _[Flux] _, wears a 1960s-style muted tartan dress.[\\[858\\]] The [Fourteenth Doctor] ( [David Tennant]) wore a brown tartan suit in the [60th anniversary specials].[\\[859\\]] \n\n- 1980s _Doctor Who_ patchwork costume of the [Sixth Doctor], with at least three tartans involved\n\n- Royal Stewart again, as a mod/ska-punk jacket lining, 2007\n\n- [Rita Ora] performing in Glasgow in 2018, wearing a tartan trench coat made of at least five different setts\n\n- [Grunge fashion] still alive and well in 2019, featuring a lot of tartan/plaid shirts\n\n- A rather impractical tartan gown by [Christopher John Rogers], 2020–21, on display at the [Metropolitan Museum of Art] Costume Institute's exhibit _In America: A Lexicon of Fashion_\n\n\nOne of the most popular tartans is the [royal Stewart], ostensibly the personal tartan of the [British monarch], since George IV declared it his own (though it was probably designed by the [Sobieski Stuarts],[\\[700\\]] albeit based on mid-18th-century pattern called \"Prince Charles Edward Stuart\").[\\[111\\]] [\\[63\\]] The \"royal\" sett was first published in 1831 in the book _The Scottish Gaël_ by James Logan. In addition to its use in clothing, such as skirts and scarves, royal Stewart tartan has also appeared on biscuit tins for Scottish [shortbread],[\\[860\\]] and it has also long been favoured by the British [punk] scene.\n\nAnother tartan in very common use by the general public is [Black Watch] (also known as old [Campbell], [Grant] hunting, and Government).[\\[127\\]] This tartan, a dark variant (and ancestor) of the main Clan Campbell tartan, has long been used by military units in the [British Army] and other [Commonwealth] forces.\n\nEarly manufacturer Wilsons of Bannockburn made many \"fashion\", \"fancy\", or \"national\" tartans with catalogue numbers or fanciful names, without any association with particular families, districts, or organisations; two popular ones still in use are both usually called \"Caledonia\". Wilsons No. 3 is found in their 1819 _Key Pattern Book_ and is comparatively simple,[\\[861\\]] while No. 144 is more complex, though of a similar colour scheme, and seems to date to the late 18th century.[\\[862\\]] (The numbering suggests the other does as well.) Some other tartans in this \"Caledonia\" group were later claimed by clans; e.g. Caledonia No. 43 or \"Kidd\" became one of the [MacPherson] tartans.[\\[863\\]] [\\[861\\]] \n\n- Royal Stewart tartan\n\n- Black Watch tartan\n\n- Wilsons' No. 3 tartan, named Caledonia\n\n- Wilsons' No. 155, also often called Caledonia\n\n\nIn the general fashion industry, various patterns are technically tartan but are not treated as tartan _s_ in the clan or district sense. The very basic red-and-black Rob Roy or Robert Roy MacGregor pattern, the oldest of the [Clan Gregor] setts (though named after [Rob Roy] in the Victorian period),[\\[864\\]] is also in broad use (often with changed colours) as one of the most common patterns used in [flannel] cloth for clothing and bedding; in the US, it is often called \"buffalo plaid\",[\\[865\\]] a term of uncertain derivation.[\\[866\\]] When the Rob Roy sett is changed to a white ground with any other colour this forms the most common [gingham] cloth style. Gingham is often given a wider setting, to form a lattice appearance (sometimes called \"windowpane plaid\" or \"windowpane check\").[\\[867\\]] When that pattern is given one or more additional over-check colours, the result is the pattern known as [tattersall].[\\[867\\]] \n\n- One of the most common [flannel] patterns, \"buffalo plaid\" is just Rob Roy MacGregor tartan (originally red and black) rendered in any of various colours\n\n- Rob Roy changed to white and any other colour becomes [gingham] \n\n- Windowpane gingham\n\n- Windowpane gingham with two or more over-checks becomes [tattersall] \n\n\n## Tartans for specific purposes\n\n\\[ [edit] \\]\n\n\"Tartan of Pride\", designed in 2008;[\\[868\\]] one of over a dozen [LGBT] -themed modern \"fashion\" tartans\n\nIn addition to clan tartans, many tartan patterns have been developed for individuals, families, districts and towns, institutions, corporations, and events.[\\[9\\]] They have even been created for particular religious and [ethnic groups],[\\[cq\\]] and for sociological groups like the [LGBT] community.[\\[799\\]] [\\[873\\]] Tartan has had a long history with the military, and today some military units – particularly those within the [Commonwealth]  – have tartan dress uniforms.[\\[874\\]] (See [List of tartans § UK military or government tartans].)\n\nMany districts, cities, and towns in Scotland have their own tartans, mostly dating to the 20th century (though some few district tartans are quite old),[\\[cr\\]] and not always official; many were just created for marketing to tourists,[\\[875\\]] and some are copyrighted works tied to specific vendors.[\\[876\\]] They are intended primarily for those to whom a clan tartan does not seem to apply (see [§ Etiquette], below). At least two [local government councils in Scotland] have official tartans.[\\[877\\]] \n\nThe Maple Leaf tartan, designed in 1964,[\\[878\\]] has been an official symbol of Canada since 2011.[\\[879\\]] \n\nIn addition to the traditional district and modern geographic tartans of Scotland, new designs have been created for places in other countries. Only some regional tartans are officially recognised by the government bodies of the places the designs represent.\n\nThe [pan-Celticism] movement has inspired the creation of \"national\" (in the sense of [Celtic nations]) and sometimes regional tartans \"to emphasise the ... bonds with other Celtic countries\"[\\[880\\]] outside of Scotland; none of these appear to have any official recognition. There are [tartans of Cornwall], long a part of Devonshire in England (the designs date from 1963 to the 1980s);[\\[cs\\]] [Wales] (from 1967 onward[\\[ct\\]]  – sometimes with false claims of antiquity by marketers);[\\[891\\]] the [Isle of Man] (from 1946, many by D. G. Teall of the Scottish Tartans Society, and several asymmetric);[\\[cu\\]] [Brittany] in France (from 2002);[\\[cv\\]] [Galicia] in Spain (from 1990);[\\[cw\\]] and especially [Ireland] (from 1956).\n\nAfter the discovery of the \"Dungiven tartan\" (see [§ 16th century], above) and its marketing as a district tartan for Ulster, Scottish weavers (and in two cases English, and in another American) decided to tap an Irish and especially Irish-American market by introducing a profusion of national, province, and county tartans for Ireland and [Northern Ireland], generally based on established Scottish tartans with some colour changes.[\\[19\\]] [\\[216\\]] These geographical tartans, which (aside from the Dungiven/Ulster reconstruction of 1956) date to 1970 and later,[\\[220\\]] do not have any official recognition, and are purely a product of the industry.[\\[19\\]] [\\[214\\]] One weaver even introduced a competing set of Irish national and county tartans in 1996, different from the previous offerings.[\\[214\\]] \"The influence of native Irish people, either as suppliers or consumers of Irish tartans, would appear to be minimal.\"[\\[19\\]] \n\nFurther afield, all but two [Canadian provinces and territories] have [official tartans], with the first dating from 1956. Neither [Quebec] nor [Nunavut], Canada's newest territory, have formally adopted patterns. [Alberta], meanwhile, has two official tartans, including a dress one. All but Quebec's were registered with the [Court of the Lord Lyon] in Scotland.[\\[908\\]] Canada has an official national tartan that was originally designed to commemorate the introduction of its new maple leaf flag, and was made an official national emblem in 2011.[\\[879\\]] Various Canadian regions (like [Labrador] and [Cape Breton Island]), counties, municipalities, and institutions also have official tartans.[\\[cx\\]] \n\n[Tartans have been created for Australia]; its capital city, [Canberra]; each of its [states]; and some of its [local government areas]; but only some of those tartans have been officially adopted or recognised by the relevant governments in Australia. [US states] have [official tartans], with the first dating from 1988.\n\n### Hunting, mourning, dress, and dance\n\n\\[ [edit] \\]\n\n[Highland dancing], at a 2008 [Highland games] event, in [Aboyne dresses] with dance tartans that feature a lot of white\n\nA tartan is sometimes differentiated from another with the same name by a label: _hunting_, _mourning_, _dress_, or _dance_. The first three of these ideas are the result of Victorian fondness for dress etiquette and show[\\[911\\]] (and weaver marketing);[\\[66\\]] the last is more recent.\n\n**Hunting tartans** tend to be made up of subdued colours, such as dark blues, greens, and browns.[\\[912\\]] Although there is some evidence of early tartans with camouflage colours going back to the 16th century, hunting tartans, despite the name, have very little to do with actual hunting.[\\[12\\]] \n\n**Mourning tartans**, though quite rare, are associated with death and funerals. They are usually designed using combinations of black and white, or by replacing bright colours such as reds and yellows in a traditional tartan with black, white, or grey.[\\[913\\]] \n\n**Dress tartans** are usually special tartans for [formal-dress] occasions[\\[914\\]] (e.g. dress Stewart[\\[915\\]] is distinct from both the main [royal Stewart tartan] and the hunting Stewart,[\\[916\\]] among several other tartans attributed to [Stewart/Stuart]). In a few cases, a dress tartan is simply the main tartan of the clan.[\\[cy\\]] Dress tartans that do differ from main clan tartans are sometimes entirely different (e.g. [MacMillan] [\\[36\\]] and MacMillan dress[\\[921\\]] are unrelated designs), while in most cases they are based on the main tartan but with colour differences (e.g. Stewart). Some dress tartans are very modern,[\\[82\\]] [\\[624\\]] but some date back to the era of the _Vestiarium Scoticum_.[\\[922\\]] \n\n**Dance tartans**, intended for [Highland dance] outfits, for either sex, are inspired (like most dress tartans before them) by the [arisaid] (_earasaid_ tartans thought to have been worn by Highland women in the 17th and 18th centuries, which often featured white as a major colour, as do typical dance tartans today (most or all of which date to the 20th century or later). Some dance tartans are named \"arisaid\" rather than \"dance\", e.g. [Fraser] arisaid.[\\[923\\]] [\\[cz\\]] \n\nThere has been some confusion between dress and dance tartans, especially since the idea of the latter developed from the former.[\\[da\\]] Most dress tartans, including some of the oldest, also have white in them, and have been used for dance competition in lieu of a dance-specific tartan, so are easy to mistake for dance tartans, which almost invariably have white in them.[\\[926\\]] [\\[db\\]] \n\n### Family and individual\n\n\\[ [edit] \\]\n\nA large proportion of non-clan tartans in all of the modern tartan databases have always been family tartans, promulgated mostly from the late 20th century for family names that are not clans or listed as [septs] of clans. These are usually Scottish surnames, but the _[Scottish Register of Tartans] _ ( _SRT_) database increasingly includes new family tartans for names that are not Scottish or even British. Most family tartans have no copyright claim, since they are intended for use by anyone with the surname or an extended-family connection. The _SRT_ classifies them together with clan tartans in a \"clan/family\" category if they have history that pre-dates _SRT_ or if they are newer and are approved by a legally recognized clan chief or family head, but in a \"name\" category if they are newer and lack such imprimatur.\n\nThe [British royal family] 's own Balmoral tartan (designed c. 1852). It is incidentally one of the few long-established tartans with multiple hues of the same colour (two greys, in this case).\n\nA few non-clan family tartans have an older pedigree. The best known is Balmoral tartan, reserved for the [British royal family] and personal [pipers] thereof, since its creation by [Prince Albert] c. 1852.[\\[dc\\]] (See also further discussion under [§ Etiquette], below.) Some clans recognise tartans for specific family branches and septs that are not themselves generally regarded as clans. For example, [Clan Robertson/Donnachaidh/Duncan] acknowledges separate, established tartans (some of them quite old) for Inches, MacGlashan, MacInroy, MacLagan, MacPhee, MacWilliam, Reid, and Robinson,[\\[928\\]] and they are all registered in the _SRT_.\n\nSince the late 1960s, various weavers have marketed (primarily to Irish Americans) some tartans with Irish family names, without any involvement by family members.[\\[216\\]] There had also been a legend that the rare _Clans Originaux_ (1880) contained Irish family tartans, but this was finally disproven in 2003.[\\[19\\]] [\\[dd\\]] There is one case of a formal [Irish clan] /family tartan, however: The [Clan Cian] Society commissioned a tartan for Cian of [Ely], and registered it with the [Chief Herald of Ireland] in 1983.[\\[19\\]] [\\[216\\]] (Even this has an Irish-American connection, as the chief resided in California, and the society is US-headquartered.)[\\[930\\]] Similarly, a commercial operation in [Cardiff] named Wales Tartan Centre (supplied by [Cambrian Woollen Mill]) has since the early 2000s promoted a long series of tartans named for common or prominent Welsh family names; they are unusual in often having odd-numbered thread counts, and having a different [warp and weft] (producing rectangular rather than square patterns), probably to distinguish them from the Scottish style.[\\[891\\]] [\\[931\\]] \n\nFor the [much narrower sense of _family_], the _SRT_ registers also as \"name\" tartans those that are created by individuals for only themselves and their immediate-family members, often for weddings; these usually have a copyright claim. One of the earliest tartans named for a specific person[\\[de\\]] is the \"Janet Wilson sett\", entered into the late 1770s records of Wilsons of Bannockburn and believed to refer to the company founder's wife or daughter-in-law, though made as one of their publicly available patterns.[\\[317\\]] [\\[df\\]] \n\n### Corporate and commercial\n\n\\[ [edit] \\]\n\nNumerous Scottish brands use tartan, and some have unique tartans. Various not-for-profit organisations also have corporate tartans. Probably the earliest case was that of the Ancient Caledonian Society of London (founded in 1786 and defunct since 1837), which used what is believed to have been a consistent tartan[\\[934\\]] for its members' [frock coats] (which, unusually, featured [brocade] woven into the tartan, of [Jacobite] white roses – it may be what 1767 advertisements called \"flowered tartan\"); only one known example of the coat survives.[\\[355\\]] [\\[935\\]] \n\nScottish airline [Loganair] in its tartan livery\n\nAs an example of a modern commercial tartan, [Irn-Bru] (introduced in 1901), the best-selling [soft drink] in Scotland,[\\[936\\]] has its own tartan.[\\[937\\]] Scottish regional airline [Loganair] uses tartan livery, including on the tails of its planes, and has two registered corporate tartans.[\\[938\\]] \"Racing Stewart\"[\\[939\\]] is a pattern created in 1995 for the [Jackie Stewart] Formula One car-racing team.[\\[940\\]] \n\n\"DunBroch\", a tartan devised by Disney/Pixar for fictional characters in the animated film _[Brave] _\n\nThe \"corporate\" category is one of the fastest-growing in the official _[Scottish Register of Tartans] _ ( _SRT_) database, with a large number of Scottish (and American and other) companies and societies registering organisational tartans. These are generally protected by [copyright] and sometimes [trademark] law. These tartans vary in purpose from general corporate livery, to special event tartans, to tartans for fictional characters.\n\nTwo examples of the latter are [Sanrio] 's 2004 creation of a predominantly pink tartan for [Hello Kitty];[\\[91\\]] and the 2011 creation by [Disney] / [Pixar] of the DunBroch tartan for the family of the main character, Mérida, of the animated Highland fantasy/adventure film _[Brave] _.[\\[941\\]] \n\nHandbag in [Burberry] check\n\nAn early example of a tartan created by and for the fashion industry, and surely the most famous, is \"Burberry check\". It was introduced in the 1920s for the lining of trench coats made by [Burberry] of London, but has been used for all manner of clothing and accessories since 1967[\\[942\\]] (with another major marketing push in 2001) and is emblematic of the company and its upscale product line.[\\[943\\]] (For additional information, including a legal dispute, see [§ Legal protection], below.)\n\nA fast-growing category in the _SRT_ is that of \"fashion\" tartans, created by companies and individual designers simply for aesthetic reasons, without any association with a particular clan, family, region, etc. Like organisational tartans, most of these have a copyright claim attached to them.\n\nA prominent example: In 2017, Scottish fashion designer [Charles Jeffrey] designed a signature tartan for his Loverboy label, registering it in the _SRT_.[\\[125\\]] \n\nManufacture and use of tartan (at least in the Scottish context) is regulated, formally and informally, in three ways: _registration_ (recording of a tartan and its association, if any, with a particular family, organisation, person, event, etc.); _legal protection_ of a tartan as intellectual property (trademark, copyright); and _etiquette_ (socio-cultural norms regarding the use of tartan and Highland dress).\n\nCoat of arms of the [Scottish Register of Tartans] \n\nThe naming and registration of \"official\" clan tartans began in 1815, when the [Highland Society of London] solicited clan tartans from clan chiefs.\n\nFollowing recognition by a clan chief of a tartan as a clan tartan, the chief was formerly able to petition the [Lord Lyon King of Arms], the Scottish [heraldic] authority, to register it as a formal clan tartan.[\\[dg\\]] Once approved by the Lord Lyon, after recommendation by the Advisory Committee on Tartan, the clan tartan was then recorded in the Lyon Court Books.[\\[305\\]] However, leading up to the launch of the [Scottish Register of Tartans] in 2009 (see below for details), the office of the Lord Lyon stopped providing this tartan-recording process (though its statutory authority was not changed by the Tartans Bill).\n\nModern-day tartans can be created and registered by anyone, with the Scottish Register of Tartans. Modern registered tartans include ones for Scottish and other districts, cities, and towns; for Irish counties (devised since the 1990s)[\\[105\\]] and families (for example, the surname [Fitzpatrick] has two registered tartans[\\[944\\]]); for organisations and companies; and even for specific events or individuals. Tartans are also being created in record numbers among the [Scottish diaspora] in the United States, Canada, Australia, New Zealand, etc., especially for places, military divisions, pipe bands, and individuals and their immediate families.\n\nUntil the late 20th century, instead of a central official tartan registry, independent organisations located in Scotland, Canada, and the United States documented and recorded tartans.[\\[945\\]] In 1963, an organisation called the [Scottish Tartans Society] (now defunct, and originally named Scottish Tartans Information Centre)[\\[602\\]] was created to record and preserve every known tartan design.[\\[946\\]] The society's _Register of All Publicly Known Tartans_ ( _RAPKT_) contained about 2,700 different designs of tartan.[\\[947\\]] Registration of new designs was not free of charge. The society, however, ran into financial troubles in 2000, and folded.[\\[948\\]] [\\[399\\]] \n\nFormer members of that society formed two new Scotland-based entities – the [Scottish Tartans Authority] (STA, 1996 – before STS closed) and the [Scottish Tartans World Register] (STWR, 2000 – the [trade name] of a private company, Tartan Registration Ltd).[\\[399\\]] Both of these organisations initially based their databases on the _RAPKT_. STA's database, the _International Tartan Index_ ( _ITI_) consisted of about 3,500 different tartans (with over 7,000, counting variants) as of 2004.[\\[947\\]] The online _ITI_ was later rebranded _The Tartan Ferret_. STWR's self-titled _Scottish Tartans World Register_ database was made up of about 3,000 different designs as of 2004.[\\[947\\]] Both organisations were registered as Scottish [charities] and recorded new tartans (free in the case of STA and for a fee in the case of STWR) on request.[\\[949\\]] [\\[950\\]] \n\nIn the interim, a jointly Scotland- and US-based organisation, International Association of Tartan Studies and Tartan Educational & Cultural Association (IATS/TECA) emerged in 1984[\\[399\\]] and published its own _TartanArt_ database in the early 1990s as Microsoft Windows software which was much used in the North American kilt-making trade. IATS/TECA was absorbed by STA by 2005.[\\[399\\]] \n\nThe [Scottish Register of Tartans] (SRT) is Scotland's official tartan register, and was established in 2009.[\\[951\\]] SRT is maintained and administered by the [National Archives of Scotland] (NAS), a statutory body based in [Edinburgh].[\\[952\\]] The aim of the register is to provide a definitive and accessible resource to promote and preserve tartans. It is also intended to be the definitive source for the registration of new tartans (if they pass criteria for inclusion and a registration fee is paid). The database itself – also named simply _Scottish Register of Tartans_, and sometimes called _TartanRegister_ from its domain name – is made up of the pre-existing registers of STA and STWR as they were at the time of SRT's launch (preserving the STA's and STWR's registration numbers, dates, and other details in the SRT data), plus new registrations from 5 February 2009 onward. On the register's website, users can register new tartans, search for existing tartans and request their thread counts, and receive notifications of newly registered tartans.[\\[951\\]] [\\[953\\]] \n\nSTWR became defunct some time after 2008. STA later closed the _ITI/Tartan Ferret_ to new registrations, and in late 2022 removed the search feature from the STA website (pending a site redesign), deferring to the Scottish Register of Tartans, which now appears to be the only operating tartan registry. STA continues offline work on the _ITI_ database, correcting errors, importing new _SRT_ additions, and recording historical patterns newly discovered in museum holdings, etc.\n\nSome modern tartans are protected by [trademark] law, and the trademark proprietor can, in certain circumstances, prevent others from selling that tartan.[\\[127\\]] An example is the \" [Burberry] check\" of the English fashion house, an instantly recognisable tartan that is very well known around the world.[\\[954\\]] [\\[dh\\]] \n\nUnlike [trademark registration] and [copyright registration], the Scottish Register of Tartans (SRT) and its authorising Tartans Bill do not create any new or enhanced [intellectual property] rights through the act of registration (nor provide any enforcement mechanism other than removal of infringing entries from the registry).[\\[958\\]] \n\nSRT, however, permits registrants optionally to assert and record copyright and/or trademark claims over their new tartans, for designs that are eligible for such protection under other established law[\\[959\\]] (such as the [Copyright, Designs and Patents Act 1988]; and the [Scotland Act 1998], which took over copyright and trademark registration and enforcement in Scotland)[\\[960\\]] and lists such tartans as restricted. An SRT registration \"provides evidence of the existence and date of \\[the\\] design\",[\\[961\\]] which helps establish the copyright date under the [Berne Copyright Convention]. Such legal protections apply only to comparatively recently created tartans; old clan, regimental, and district tartans are outside the protection periods of such intellectual property laws.[\\[43\\]] \n\nSRT also permits the listing of intended _use and manufacture_ restriction preferences, but has no enforcement capability,[\\[962\\]] and also includes a statement that \"No other rights can be conferred.\"[\\[961\\]] British tartan weavers, such as Lochcarron and D. C. Dalgliesh, generally will not produce material in an SRT \"restricted\" tartan without written evidence of permission from the copyright/trademark claimant. In additional furtherance of intellectual property concerns, the SRT also refuses to register a new tartan that is confusingly similar to any existing one (as determined by an SRT review process).[\\[963\\]] \n\nThe application of copyright law to tartans is not well tested. The leading British legal case on textile copyright, concerned with designs printed on fabric, is _[Designer Guild Ltd v Russell Williams (Textiles) Ltd] _ (2000), finding for fairly broad copyright protection in textile works that involve creative originality.[\\[964\\]] In 2008, two tartan pattern copyright holders, Rosemary Nicolson Samios and weaver Lochcarron of Scotland, took legal action for infringement of an [Isle of Skye] district sett (designed 1993) and the [Princess Diana] Memorial sett (designed 1997), respectively, against the Gold Brothers firm of Surinder, Galab, Malap, and Dildar Singh, who operate dozens of stores in Scotland and online that sell primarily Chinese-made tartan objects or \"tartan- [tat] \", including cheap Highland-dress outfits, for the tourist market.[\\[876\\]] The Isle of Skye tartan was considerably profitable for Samios, after the pattern was popularised by Queen [Elizabeth II] wearing it in 1999. The Princess Diana sett was designed by Alistair Buchan of Lochcanrron and of the [Scottish Tartans Authority] as a charity fundraiser. A British court on 2 July 2008 issued an [interim interdict] (preliminary injunction) against Gold Brothers' sale of Isle of Skye goods, after a police search found hundreds of metres of the pattern in Chinese-made cloth in the company's warehouse.[\\[876\\]] [\\[965\\]] [\\[966\\]] [\\[967\\]] Both cases may have been [settled out-of-court] because published news regarding them ceases in 2008. A more recent case, _Abraham Moon & Sons Ltd v. Thornber & Others_ (2012), actually involved tartan. It held that the textual _ticket stamp_ (a detailed set of weaving instructions, i.e. a thread count with additional information on precise colours, etc.) used to produce a tartan designed in-house by the claimant had been infringed, was protected as a literary work, and _also_ constituted a \"recording\" of the graphical work of the tartan and thus was independently protected as a work of artistic craftsmanship.[\\[43\\]] [\\[968\\]] As of 2020, the decision was being appealed, as it conflicted with previous caselaw, e.g. _Hensher v Restawile_ (1976), holding such instructions to be uncopyrightable.[\\[969\\]] [\\[43\\]] \n\nWhile tartan arguably could be classified as a form of [intangible cultural heritage],[\\[970\\]] and its value to identifying Scottish products both in Scotland and internationally has been recognised and exploited for a long time,[\\[940\\]] tartan is not protected by either [geographical indication (protected designation of origin)] law, nor _[sui generis] _ legislation specific to that kind of product.[\\[971\\]] [Harris tweed], another textile associated more narrowly with Scotland, does have such protection. In 1998, Keith Lumsden, research officer of the [Scottish Tartans Society], proposed that the word _tartan_ be prohibited for use to market a textile, unless the design was accepted in an official governmental tartan registry (which did not then exist).[\\[940\\]] When the Scottish Parliament finally authorised the Scottish Register of Tartans in 2008, it did not include anything like this sort of trade protection. According to Michael B. Paterson (2001): \"No mechanism exists to protect \\[traditional Scottish\\] tartan from 'misuse' by interests having nothing to do with Scotland or Scotland's interests\", though the tartan registries \"play an important, if weak, role in asserting Scotland's [cultural rights] in relation to tartan.\"[\\[972\\]] \n\nScottish actor [Sean Connery] at a [Tartan Day] celebration in Washington DC. When [knighted] by Queen Elizabeth II in 2000, he wore this green-and-black hunting-tartan kilt of his mother's [Clan Maclean].\n\nSince the [Victorian era], authorities on tartan have claimed that there is an [etiquette] to wearing tartan, specifically tartan attributed to clans or families. In the same line of opinion, some tartans attributed to the [British royal family] have been claimed to be \"off limits\" to non-royalty.[\\[973\\]] [\\[974\\]] Even so, there are no laws or universally accepted rules on who can or cannot wear a particular tartan. (Some writers have nevertheless asserted their existence anyway, e.g. Alexander Campbell in 1890, regarding different Campbell tartans.)[\\[975\\]] The concept of the entitlement to certain tartans has led to the term _universal tartan_, or _free tartan_, which describes tartan which can be worn by anyone without controversy. Traditional examples of such are the Black Watch, Caledonia, hunting Stewart, and Jacobite tartans, [shepherds' check], and district tartans.[\\[976\\]] [\\[799\\]] [\\[977\\]] The published marketing of tartans for simple fashion purposes without any association to a place or body dates back to at least 1745,[\\[496\\]] and much of Wilsons' output through the 19th century consisted of \"fancy\" patterns for the general public.[\\[978\\]] Some recently created designs intended for everyone (though some are exclusive to particular weavers or Highland-dress outfitters) have names including Braveheart, Clansman, European Union, Highlander, Independence, Pride of Scotland, Rainbow, Scotland 2000, Scotland the Brave, Scottish National, Scottish Parliament, Spirit of Scotland, Stone of Destiny, and Twenty First Century.[\\[979\\]] \n\nBooks on Scottish clans list guidelines,[\\[127\\]] but are not always in agreement. One such opinion is that people not bearing a clan surname, or surname claimed as a sept of a clan, should not wear the tartan of their mother's clan.[\\[980\\]] This opinion is reinforced by the fact that in the Scottish clan system, the Lord Lyon states that membership to a clan technically passes through the surname. This means that children who bear their father's surname belong to the father's clan (if any), and that children who bear their mother's surname (her [maiden name]) belong to their mother's clan (if any).[\\[981\\]] Also, the Lord Lyon states that a clan tartan should only be worn by those who profess allegiance to that clan's chief.[\\[982\\]] \n\nSome clan societies even claim that certain tartans are the personal property of a chief or chieftain, and in some cases they allow or deny their clansfolk \"permission\" to wear that tartan.[\\[di\\]] According to the [Scottish Tartans Authority] – which is an establishment of the Scottish tartan industry – the Balmoral tartan should not be worn by anyone who is not part of the British royal family. Even so, some weavers outside of the United Kingdom ignore the \"longstanding convention\" of the British royal family's \"right\" to this tartan. The society also claims that non-royals who wear this tartan are treated with \"great disdain\" by the Scottish tartan industry.[\\[984\\]] [\\[dj\\]] \n\nGenerally, a more liberal attitude had been taken by those in the business of selling tartan, holding that anyone may wear any tartan they like. Under the liberal view, claimed \"rules\" are mere conventions (some of which are recent creations), with different levels of importance depending on the symbolic meaning of the tartan on some particular occasion.\n\nThe [Standing Council of Scottish Chiefs] has also taken a fairly flexible position (organisationally; some specific individual chiefs may have a narrower or looser take, and not all chiefs are members). Aside from opposing the creation of a new tartan using a clan's name without the chief's permission, their website states (adopting more loosely some ideas from the Lord Lyon view):[\\[987\\]] \n\n> There are no strict rules on who has the right to wear a particular tartan. People normally wear only the tartan (if any) of their surname, or a \"district tartan\" connected with where they live or where their family come from. Wearing a particular clan tartan indicates that the wearer bears an allegiance to the chief of that clan.\n\nSome Highland-dress historians have taken a dim view of regulatory intents and proclamations with regard to tartans; Scottish National Portrait Gallery curator A. E. Haswell Miller wrote that \"to claim special entitlement to a tartan in the same manner as heraldic arms is certainly absurd\", because evidence suggests that the idea was just invented by writers of the late 18th to mid-19th centuries.[\\[988\\]] Sir [Thomas Dick Lauder] expressed similar views as far back as 1829, right in the middle of the \"clan tartanry\" rush, dismissing both the then-new adoption of \"official\" clan tartans and attempts by clans to claim regimental ones.[\\[561\\]] \n\nWhile tartan has been most closely associated with Scotland, and dating back to the Roman period was perhaps associated with Northwestern Europe in general, it is likely that the idea of using patterns of rectangles and lines has independently occurred many times, in any cultures with weaving.[\\[989\\]] Basic tartan \"is almost as primitive a weave as it is possible to make ... probably the earliest form of patterened fabric anywhere.\"[\\[2\\]] Surviving pre-modern historical examples seem sparse, however.\n\nModern tartan-style cloth in a wide variety of materials and patterns from simple to complex is available and used today around the world, often simply as a style of cloth and without any association with Scotland.\n\nMaasai men c. 1906–1918, one wearing a tartan _shúkà_; photo by [Walther Dobbertin] \n\nAmong the [Maasai people] of Kenya and Tanzania, the _shúkà_ is a [cotton] blanket-like garment (what Scots would call a plaid) worn as a wrap, and very commonly in a tartan pattern, though sometimes linearly striped or of one colour.[\\[990\\]] _Shúkà_ are predominantly red, though sometimes seen in blue and other colours.\n\n[Maasai] men in _shúkà_; [Narok County], Kenya, 2018\n\n_Shúkà_ were originally of painted (typically red) leather, but Maasai have had access to [plain-weave] cotton fabric for some time, imported to the region by Americans since the 1860s.[\\[990\\]] [Joseph Thomas Last], a British [missionary], in 1883 described the Maasai as particularly fond of red and white cloth, to be worn by higher-status men (though he did not mention tartan in particular);[\\[991\\]] a 1903 report also had them typically wearing red blanket-like garments, after a time of favouring blue.[\\[990\\]] The Maasai were loosely allied with the British, 1895–1904,[\\[992\\]] and the latter made heavy use of [Scottish regiments] in African conflicts, bringing tartan with them. However, \" [Guinea] cloth\" (mostly produced in India), sometimes red and blue checked, was a common commodity in 18th-century western Africa, pre-dating [British West Africa]; whether it relates at all to _shúkà_ is unknown.[\\[993\\]] _Shúkà_ patterns usually lack the thin black lines common in Scottish tartans.\n\nA nomadic [cattle-pastoralist] culture, without their own weaving tradition, the Maasai have been described as unusually culturally conservative and resistant to modernisation.[\\[994\\]] Nevertheless, they have always engaged in trade to get goods they do not make themselves,[\\[991\\]] and have made local traditional use of modern materials.[\\[995\\]] The Maasai approach has been to resist yet assimilate [colonial] and post-colonial influences.[\\[996\\]] \n\nAlthough there is evidence of tartan usage among the Maasai to at least the period 1906–1918, when [Walther Dobbertin] photographed a tartan _shúkà_ in what was then [German East Africa], the current bright tartan and striped style of _shúkà_ appears to have been adopted primarily in the 1960s[\\[993\\]] [\\[997\\]] (partly in response to national-level clothing modernisation pressure), supplanting leather but keeping the same form-factor.[\\[996\\]] The shift in outward form without affecting function led one writer to quip that Maasai dress \"has undergone dramatic changes while not changing at all\".[\\[998\\]] Tartan-patterned cloth is not typically used for other Maasai garments besides _shúkà_.\n\nThe _shúkà_ has become so emblematic of the Maasai that there is some discussion (driven by the Maasai themselves) at the national and regional level about protecting it as a form of [cultural property].[\\[999\\]] While it has been claimed that _shúkà_ patterns, at least at one time, conveyed particular meanings,[\\[dk\\]] and there historically have long been weaving operations in various African areas,[\\[1000\\]] most _shúkà_ today that are not mass-manufactured in [Dar es Salaam] or [Mombasa] actually come from China, not Africa.[\\[993\\]] \n\n### East and South Asia\n\n\\[ [edit] \\]\n\nThe earliest-discovered tartan fabric in the world was discovered in Western China, in the context of the Tarim mummies, dated to c. 2100 BC through the first centuries BC (See [§ Pre-medieval origins], above). Today, tartan is still woven in China, both as a traditional fabric and in large commercial quantities for export.[\\[816\\]] \n\n- Chinese man in traditional hat of silk tartan with wool pompons, 2008\n\n- Historical [brocade] of the [Zhuang people] in [Yunnan], China (photo 2011). It is often very complex material, but sometimes simple tartan like this.\n\n- A tartan [cheongsam] (qipao) at a [Hong Kong] clothier in 2021\n\n- Tartan and other textiles for sale in bulk at [Yen Chow Street Hawker Bazaar], Hong Kong, 2022\n\n- A simple three-colour tartan pattern being woven on a hand loom in [Pilikula] heritage village, India, 2016\n\n- Indian [sari] in a two-colour tartan pattern with highlights at the crossings of the black lines, which may be [embroidery] or [supplementary weaving] \n\n- A modern, elaborate _kōshijima_ dress from Japan's [lolita fashion] subculture, 2018\n\n\nFour Bhutanese men, 2012, in _gho_ robes, with four different _mathra_ patterns, from vary narrow to quite broad\n\nIn [Bhutan], traditional men's robes (_gho_)[\\[1001\\]] and knee-stockings (_omso_, similar to [argyle] socks),[\\[1002\\]] and women's dresses (_kira_)[\\[1003\\]] are traditional [national costume] styles that are largely mandatory for public dress since 1963.[\\[1004\\]] [\\[dl\\]] Tartan (generally called _mathra_ or, after the district of its primary production, _[Bumthang] mathra_,[\\[1006\\]] [\\[dm\\]] among other names for specific patterns) is among the many common textile styles for these garments, some much more elaborate (generally called _yathra_)[\\[1008\\]] than tartan. The tartan cloths are woven traditionally in [yak] and [sheep] wool, but today also in [cotton] and [raw silk].[\\[1009\\]] \n\n_Gira_ dress featuring \"X\" patterns where the white stripes meet, produced by supplementary weaving\n\n_Mathra_ is woven primarily with a red ground. Some specific tartan/plaid styles of Bhutan are: broad-checked _thra bom_; narrow-checked _thra charuru_; _sethra_ ('golden pattern'), an orange or rust ground with yellow and sometimes black checks (with black, it is more specifically called _sethra dokhana_, and without, _dalapgi sethra_); red, blue, and black patterns on a white ground, in at least four varieties called _pangtsi_ (specifically red and black on white),[\\[1010\\]] _[Decheling] kamtham_, and other names;[\\[1011\\]] and another style is named _burai mathra_.[\\[1012\\]] Some of these fabrics feature [supplementary weft] decorative patterns (flowers, etc.) added to the tartan, with an [embroidered] or [brocaded] appearance, generally called _pesar_ ('new pattern'); one such style is more specifically called _sethra metho chen_, the yellow-orange pattern with flowers added. There are also patterns of simple linear stripes that do not cross each other (generally called _adha\\[ng\\] mathra_ or _aikapur_), with various names for specific styles.[\\[1013\\]] \n\nSamples of tartan madras cloth, showing its muted look\n\nMadras is a patterened, light-weight, breatheable, cotton cloth named for the Madras (now [Chennai]) area of India.[\\[1014\\]] Traditional madras is hand-woven from lumpy, [carded] -cotton thread, and coloured with natural dyes which may bleed together upon washing to create a more muted pattern than typical tartan, as well as a rougher texture.[\\[1015\\]] Madras also has a \"softer\" look because it typically lacks the black lines found in most Scottish tartans. Madras cloth dates to at least the 16th century, produced in a variety of patterns, including religious designs and floral prints.[\\[1015\\]] It is unclear if tartan patterns were among the original designs, though they became very popular later. Weaving, primarily for export, in Madras/Chennai became a large-scale commercial enterprise after the British [East India Company] came to control the area in the mid-17th century.[\\[1016\\]] Major production of this style of cloth also took place in [Cambay State] (present-day [Gujarat]).[\\[1017\\]] \n\nMadras, ideal for warm-weather wear, became popular in the Philippines (where it is known as _cambaya_)[\\[1017\\]] and the Caribbean;[\\[1015\\]] mainly in undyed form, it was also exported to Europe.[\\[1015\\]] Tartan madras reached America by 1718, and appeared in the 1897 [Sears] catalogue.[\\[1015\\]] It was popular in the United States in the 1930s and again in the 1960s, often associated with [preppy] style.[\\[1015\\]] Substantial export of the cloth to South Africa began in 1958.[\\[1015\\]] \n\nModern madras cloth is commonly in tartan patterns, but also simply striped ( [seersucker]). Unlike Scottish-style tartan, madras is not woven in 2/2 [twill] pattern, but is a [muslin] of [plain weave];[\\[1015\\]] it thus, when viewed up close, features a \"pepper and salt\" colour mixture where colours cross[\\[6\\]] (a [dot matrix], technically), not staggered diagonal lines (see [detail image]). It also usually lacks black lines.\n\n[Woodcut] image of Japanese _[kabuki] _ actor Iwai Hanshiro IV dressed in _kōshi_, 1780s\n\nIn Japan, tartan patterns called _kōshi_格子 (also _koushi_ or _goushi_, literally\n'lattice') or _kōshijima_格子縞 date back to at least the 18th century,[\\[409\\]] possibly the 17th[\\[1018\\]] in the [Edo period] (1603–1867), and were popular for _[kabuki] _ theatrical costuming, which inspired general public use by both sexes, for the _[kosode] _ (precursor of the _[kimono] _),\nthe _[obi] _, and other garments.[\\[1019\\]] The name is a reference to the details of _[shoji] _ room dividers, the grid pattern said to stand for strength, with larger stripes representing more power.[\\[1019\\]] _Kōshi_ range from simple checked patterns to complex multi-colour weaves. [Ikat] thread-dyeing techniques were sometimes employed before the weaving, such that a colour in the pattern was mottled,[\\[1019\\]] and parts of the design may sometimes have been [embroidered], [supplementary-woven], or dyed-over for additional highlight or contrast.[\\[1019\\]] Some styles have particular names, such as _misuji-kōshi_ ('three-striped lattice')[\\[1019\\]] and _futasuji-kōshi_ ('forked lattice').[\\[1020\\]] A pattern with larger squares is more generally called _ogoshi_ or with smaller squares _kogoshi_.[\\[1021\\]] \n\nIt is unclear whether there was a Scottish tartan influence on the development of _kōshi_. The Edo period pre-dates the [Perry Expedition] of 1853–1854 and its opening of Japan to general [Western] trade, but mostly post-dates early European contact from 1543 to the closure of Japan to outsiders in 1639 under the _[sakoku] _ isolationist policy.\n\nNothing suggests that particular patterns have been associated with specific families or [Japanese clans].\n\nToday, _kōshijima_ is the general Japanese word for 'tartan/plaid, checked pattern'.[\\[1022\\]] Tartan is popular in present-day Japan, both for high fashion and for streetwear,[\\[319\\]] as well as [school uniforms].[\\[685\\]] Since the 1960s, the Japanese department store chain [Isetan] has used an emblematic tartan as a marketing tool (e.g. on all its shopping bags); the pattern is based on some [MacMillan] tartans.[\\[1023\\]] Japan hosted a major museum exhibit about tartan in 2018.[\\[1024\\]] \n\n### Eastern Europe to Western Asia\n\n\\[ [edit] \\]\n\nTartan-style patterns are common throughout Southeastern Europe.\n\n[John Francis Campbell] (1862) described the native weaving of the [Sámi] (Lapps) of northern Europe as being hand-loom tartan.[\\[989\\]] \n\nConsiderably to the southeast, the [Tatars] [\\[dn\\]] and [Chuvash], [Turkic] peoples of [Tatarstan] and [Chuvashia], respectively, in the Russian Federation, have worn tartan, striped, and other patterns since at least the 19th century.\n\n- Detail of [Serbian] tartan folk dress, densely [pleated], 2017\n\n- Tartan patterns used in a [Bulgarian folk] costume\n\n- Simple [shepherd's check] tartan being woven by [Pomaks] in Greece, 2007\n\n- Example of 1920s tartan cloth from [Belarus], in a complex non-twill [damask] weave\n\n- Silk tartan cloth of white, grey, and golden thread from [Lithuania] \n\n- [Estonian] woman wearing a tartan _suurrätt_ (plaid/shawl)\n\n- Another Estonian _suurrätt_, with a total-border pattern of more complexity than the simple central pattern\n\n- [Tatars] in [Kazan] in 1870, wearing tartan, stripes, and other patterns\n\n- [Chuvashian] example, c. 1870\n\n\n[Robert Jamieson], writing in 1818 as editor of [Edmund Burt] 's 1727–37 _Letters of a Gentleman in the North of Scotland_, said that in his era, married women of the north-western provinces of [Russia] wore tartan plaids \"of massy silk, richly varied, with broad cross-bars of gold and silver tissue\".[\\[94\\]] This seems quite distinct from Scottish-style construction.\n\n[Alexander Pushkin] wearing a tartan cape; by [Orest Kiprensky], 1827\n\nThe Russian poet [Alexander Pushkin] (1799–1837), who was influenced by the romantic-Highlands writings of [Walter Scott],[\\[1025\\]] [\\[1026\\]] posed for one of the most famous paintings in Russia, the [1827 portrait] by [Orest Kiprensky]. Pushkin wears what looks at first like a Scottish-style tartan [shoulder plaid], but is more probably a sleeveless \"Almaviva\" cape/cloak, a style in fashion at the time and known to have been worn by Pushkin.[\\[763\\]] \n\nTartan was commented on in the _Moscow Telegraph_ in 1826 as being in broad fashion in the city for all sorts of garments (often as a decorative accent).[\\[763\\]] Scottish-style plaids apparently did come into some fashion in Russia as women's wear for a space during the mid-to-late 19th century, a style picked up from stage productions; some 19th century Russian [paintings illustrate] use of plaids as [shawls].[\\[763\\]] Tartan (and plain-striped) shawls were also common among the [Volga Germans] and [Bessarabia Germans] in Russia; a mixture of hand-woven (originally as bedclothes and other household goods) and mass-produced in Russia, the shawls became emblematic of the German-from-Russia [diaspora] in North and South America from the nineteenth century to the mid-20th.[\\[1027\\]] [\\[1028\\]] \n\nAround the end of the 19th century, the Russian equivalent of Regency and Victorian British tartanware objects, such as decorative [Fedoskino] boxes with tartan accents in a style called _Shotlandka_Шотландка (literally 'Scotlandish'), were produced by companies like the Lukutin Manufactory on the outskirts of [Moscow].[\\[1029\\]] \n\nToday, _shotlandka_ or _shotlandki_шотландки are simply Russian words for 'tartan/plaid' generally.[\\[1030\\]] \n\n- 1839 portrait of Maria Arkadievna Bek by [Pimen Orlov] may illustrate one of the Russian plaids with silver thread\n\n- Posthumous portrait of [Alexander Pushkin] by [Carl Peter Mazer], 1839, shows him in a red and green tartan dressing gown.[\\[763\\]] \n\n- Tatyana Petrovna Musina-Pushkina, Princess Kropotkina (1800–1865), portrait c. 1840s by unknown artist\n\n\n#### Adoption by the Māori\n\n\\[ [edit] \\]\n\n[Pōtatau Te Wherowhero], the first [Māori] [king], adopted a particular house tartan with design elements symbolizing his ancestry, such as inner stripes representing [migration canoes] that first arrived in Aotearoa New Zealand; this tartan was presented by his descendant [Tūheitia Paki] to [Charles III] in the former's visit to Buckingham Palace in May 2023.[\\[1031\\]] His following [tribes] concentrated around [Northland] have also adopted green tartans.[\\[1032\\]] \n\n- [Drugget], a coarse and often linearly striped cloth that was common in the Scottish Western Isles\n- [Flannel], a type of fuzzy cloth often produced in a tartan pattern\n- [Hodden], a non-tartan cloth of undyed wool, sometimes also used for kilts, especially for non-Scottish pipe bands\n- [List of tartans] \n- [Mackinaw cloth], a dense woollen cloth often produced in tartan patterns\n- [Madras (cloth)], cotton cloth of India often woven in tartan patterns\n- [Tartan Day], a day of celebration, in Canada, Australia, the US, and some other countries, recognising the influence of Scottish immigration\n- [Tartanry] \n\n- [Argyle (pattern)] \n- [Battenburg markings], a check (dicing) pattern used on UK emergency vehicles\n- [Border tartan] \n- [Check (pattern)] or chequer\n- [Gingham] (Vichy check)\n- [Glen plaid] \n- [Harlequin print] \n- [Herringbone (cloth)] \n- [Houndstooth] \n- [Sillitoe tartan], a check (dicing, not actually tartan) pattern commonly used on police headgear\n- [Tattersall (cloth)] \n\n001. **[^] **The use of _plaid_ to mean 'tartan' has not been _exclusively_ North American; in 1808, the London publication _[La Belle Assemblée] _ referred to \"plaid scarfs\".[\\[14\\]] Also, it has sometimes been claimed that _plaid_ refers to all such patterns generally, and _tartan_ only to patterns of Scottish clans,[\\[15\\]] but there is no support for this idea in works of tartan scholarship.\n002. **[^] **[MacBain (1911)], p. 277. [Cognate] words in other languages are the [Luwian] _pldtmn_ and later [Latin] _paludamentum_ for 'cloak'. The _paludamentum_ was a cloak put on by Roman officers in time of war.[\\[17\\]] [\\[18\\]] \n003. **[^] **Solid-colour, non-tartan kilts were often thought to be an Irish invention of the late 19th century, but an example of a belted plaid or \"great kilt\" from Scotland was found in a 1635 portrait of Sir Duncan Campbell of [Loch Awe],[\\[19\\]] among other Scottish examples.\n004. **[^] **The two Scarlett sources provide two exact formulas which seem at first to be contradictory, but one is for number of blends and the other for number of colours total.\n005. **[^] **The term _mirroring_ can be ambiguous, because the longer phrase _mirror pattern_ may refer to \"one in which ... two alternating ground motifs are the same size and arrangement but in different colours.\"[\\[32\\]] \n006. **[^] **The term _repeating_[\\[27\\]] has also been used, as distinct from _mirroring_, but is so ambiguous that sometimes the same patterns are referred to as _non-repeating_.[\\[33\\]] Neither term will be used further in this article.\n007. **[^] **A well-known example is the main [Buchanan] tartan.[\\[34\\]] \n008. ^ [_**a**_] [_**b**_] An example is the most popular [MacMillan] tartan, in which the warp and weft are different, though similar; the largest blocks of colour are green rectangles instead of squares.[\\[36\\]] \n009. **[^] **Early collectors of tartan, like Logan in 1831, recorded setts by measuring the width of each stripe in eighths of an inch.[\\[37\\]] [\\[38\\]] A persistent legend that tartans were originally recorded on little \"pattern sticks\" has been dispelled as a \" [telephone game] \"-style progressive, willful misunderstanding of an early description of the warp as wrapped on a warp beam/roller for the loom. It was poorly described by [Martin Martin] in 1703 as \"an exact Pattern of the Plad on a piece of Wood\", which Logan (1831) misunderstood as a small stick used as a perpetual \"record\" of the tartan pattern on it, after which the \" [Sobieski Stuarts] \" in 1842 blatantly falsified a supposed 16th-century description of \"pattern sticks\", and Archibald Campbell (1890) repeated the story again as factual. No such artefact has ever been found by modern researchers, and the idea has been described as impractical because the threads would not stay put indefinitely, and it would make much more sense to simply write or draw the pattern on paper, or keep a strip of the woven material.[\\[39\\]] [\\[40\\]] Mackay (1924) claimed he had seen some examples and appeared to describe warp beams, but then claimed they were used as a long-term record of \"clan tartans\" of the area.[\\[41\\]] Eslea MacDonald (2015) points out that Mackay had a tendency toward \"manipulating the evidence\" when advancing his ideas about very old clan tartans, and that he made up a fake-Gaelic name for the alleged pattern sticks.[\\[40\\]] \n010. **[^] **For example, [Stewart, D. C. (1974)], [Scarlett (1990)], and Scottish Register of Tartans (2009–) all use full-count-at-pivots \"bare\" thread counts, without slash or bold notation, while [Eslea MacDonald (2012)] uses them to represent half-count-at-pivots, but states this explicitly.\n011. **[^] **_Ground_ in this sense dates to at least 1895. [Telfer Dunbar (1979)], pp. 112–113, quoting 1895 letter: \"... dress Stuart tartan on a white instead of on a red ground .... the 'Stuart hunting-tartan' on a green ground\". However, _ground_ has a different meaning at the thread-dyeing stage, where it refers to a first layer of colour which is then over-dyed with another, either to deepen the hue or make a new ones, e.g. purple from blue over red.[\\[49\\]] \n012. **[^] **Wilsons, the near-exclusive producer of Georgian through Victorian regimental tartan, produced different grades of cloth for officers, sergeants, and enlisted.[\\[56\\]] \n013. **[^] **Scarlett (1990) provided [a reconstruction of what 17th- to early 18th-century arisaid tartans probably basically looked like], based on the appearance of later wider-banded \"bar blanket\" tartans which evolved from the arisaid setts. His sample is modernised in being simplified, symmetrical, mirroring, and not having a decorative selvedge.[\\[77\\]] \n014. **[^] **The French term _écru_ has also been applied,[\\[87\\]] but is ambiguous, as it technically refers to the colour of undyed [linen] not wool, and has been taken to indicate a richer, sandy range of hues in English usage than in French.[\\[88\\]] \n015. **[^] **The [Lord Lyon] 's colour-coding system actually had three reds: \"gules/scarlet\", \"red\" (a dull red), and \"crimson\".[\\[45\\]] But it is not entirely clear what the difference between them is.\n016. **[^] **Multiple hues of pink appear in Wilsons' colour lists around the early 19th century, which included colours for wool and other weaving, but orange does not.[\\[87\\]] \n017. **[^] **For lists of such natural dye materials and their preparation, see: Kok, Annette (1979) \\[1962\\]. \"Appendix: Early Scottish Highland Dyes\"\". In Dunbar, John Telfer (ed.). _History of Highland Dress_. London: B. T. Batsford Ltd. pp. 222–240. Some additional such information is available in: [Mackay (1924)], pp. 59–64; and [Eslea MacDonald (2012)], pp. 76–77. See also: [Campbell, J. F. (1893)], p. 335.\n018. **[^] **A romantic legend about such a thing goes back quite a way, however. According to Innes of Learney (1971): \"The late J. G. Mackay, like Lord Archibald Campbell, claimed that clan tartans were not only deliberately arranged, but formed an elaborate system of identification by dress, as technical as [armorial bearings]  .... \\[T\\]artans were never intended to, and did not, have the precise distinctions and ready recognisability of armorial bearings. Mr Mackay gives much interesting information ...; he does not, however, succeed in adducing evidence that there was a scientific system of arrangement, and circumstances are against the existence of a _science_.\"[\\[128\\]] In summary, Mackay believed that lines of various colours formed a [heraldic] system of [cadency (differencing)] between related family branches.[\\[129\\]] The argument depends on the Victorian-era clan tartans having been used in the 17th–18th centuries, but all modern tartan scholarship shows this idea to be broadly false.\n019. **[^] **For a photograph of one of the Tarim cloth fragments and a reproduction of what the fabric may have originally looked like, see: Spada (2019).[\\[138\\]] \n020. **[^] **For a photograph of one of the Salzburg cloth fragments, see: Belfrage, Anna (30 April 2016). [\"Of mummies in tartan\"]. _AnnaBelfrage.com_. Retrieved 10 June 2023.\n021. **[^] **For a photograph of the Falkirk cloth fragment, see: [\"Record: Cloth (Fragment) – found at Falkirk Stirlingshire\"]. _NMS.Scran.ac.uk_. [National Museums Scotland]. 2015. Retrieved 3 June 2023.\n022. **[^] **For a photograph of the Caracalla statue fragment, see (about 1/3 down the page): Lamley, Hamish (21 February 2022). [\"Pictish Fashion\"]. _PictaviaLeather.co.uk_. Retrieved 10 June 2023.\n023. **[^] **Nor any relation to the modern [Tatar people] (see [§ Russian shotlandka]).\n024. **[^] **This historic Glen Affric tartan is not to be confused with various competing modern district tartans named \"Glen Affic\" available from some vendors such as Clan/Scotweb, Stevens & Graham, and Spoonflower.\n025. **[^] **There are possible mentions earlier, to the 14th century, in both [Early Scots] and [Middle English] using the French-borrowed terms _tiretain_ and _tartarin_ in various spellings, but they do not clearly refer to tartan, either the cloth or the pattern, but rather seem to refer to valued cloth in general.[\\[4\\]] \n026. **[^] **There is a legend that during some period there was a \"caste\" system by which chiefs were entitled to up to seven colours in a tartan, fewer colours were allowed for clansmen according to position in the social hierarchy, and just single-coloured cloth for servants. Barnes & Allen (1956)[\\[93\\]] attributed the idea to Frank Adam (1908). He did indeed write that \"it is said\" there was such a system, but cited no evidence.[\\[143\\]] The fancy is from Logan (1831), who cites nothing but an ancient \" [Achy Edgathach] \" of Ireland.[\\[178\\]] That was a legendary ancient Irish king in the _[Lebor Gabála Érenn] _, said to have passed a [sumptuary law] limiting clothing colours by social status, during his very short reign of only four years (some time between 1537 and 1155 BC).[\\[179\\]] It is old Irish folklore and nothing to do with history of Scottish tartan. Scarlett (1990): \"it is difficult to allow such a tale any credibility\".[\\[145\\]] No modern Highland-dress scholars repeat it seriously (the last one to do so seems to have been Mackay (1924) who said it applied to \" [Druidical] times\"),[\\[180\\]] and the idea is contradicted by existence of old regional tartans of complexity, and by chiefs adopting tartans of marked simplicity. Practically, the extra dye and weaving-labour expenses of complicated tartans meant that they cost more and so were more often worn by monied persons,[\\[145\\]] as clearly reported by [John Lesley] (1578)[\\[181\\]] and [Robert Heron] (1799).[\\[182\\]] \n027. **[^] **Buchanan (1582): \"They delight in marbled cloths, especially that have stripes of sundrie colours; they love chiefly purple and blue; their predecessors used short mantles or plaids of divers colours, sundrie ways divided, and among some the same custom is observed to this day, but for the most part they are brown, most near to the colour of the hadder \\[ [heather] \\], to the effect, when they lie among the hadders, the bright colour of their plaids shall not [bewray] them.\"[\\[192\\]] (Buchanan's wording was recycled in 1603, in the anonymous _Certayn Mattere Concerning Scotland_.)[\\[193\\]] \n028. ^ [_**a**_] [_**b**_] The attempt to depict tartan is fairly crude, done as divided stripes, instead of a staggered pattern of blending rectangles, though it is possible it represents a weave with differing [warp and weft], which could produce more of a striped pattern. The exact details shown in the image are open to other question, because the artist illustrated an imaginative sword that is a combination of a Scottish [claymore] hilt with the blade and quillions of a German [Landsknecht] sword of a type more familiar to the German ( [Silesian]) artist. Also, Telfer Dunbar (1979) called the colours yellow, blue, and red (not green, blue, and red), so the palette accuracy of the photo could be in doubt.[\\[195\\]] The original art is in [Huntington Library] MSS: HM 25863, f. 28r.\n029. **[^] **There is a recurrent legend running through Victorian works on tartan that the tartan cloth for each Highland man was usually made at home singly by his wife or mother,[\\[198\\]] but this proves to be an impractical idea,[\\[199\\]] for which there is no evidence, and considerable evidence against, including rich folk tradition of (mostly women's) group labour.\n030. **[^] **Innes of Learney (1938/1971) believed that Highlanders wore multiple tartans because some were personal (perhaps inherited), some geographical, and some clan-specific,[\\[204\\]] but presented no real evidence for this hypothesis. The idea can be traced to Lord Archibald Campbell (1890), who asserted (with no evidence at all) that a Highlander wearing multiple tartans at once could be explained by him donning the pattern of his commander, his own paternal clan, and maternal clan.[\\[198\\]] \n031. **[^] **Even this, however, is ambiguous, and could mean that the Highlanders wore striped mantles, and worse their mantles in the same fashion that the Irish wore their own mantles, striped or not. And \"striped\" does not necessarily mean tartan. The Camden material is also contemporaneous with the Plantation of Ulster.\n032. **[^] **[Taylor]: \" ... all and every man in generall in one habit .... For once in the yeere, ... many of the nobility and gentry of the kingdome (for their pleasure) doe come into these Highland countries to hunt, where they doe conforme themselves to the habite of the Highlandmen, who for the most part speake nothing but Irish \\[i.e. Gaelic\\] .... Their habite is shooes, with but one sole apiece; stockings (which they call short hose) made of a warm stuff of divers colours, which they call tartane; as for breeches, many of them, nor their forefathers, never wore any, but a jerkin of the same stuff that their hose is of; their garters being bands, or wreathes of hay or straw; with a plaed about their shoulders, which is a mantle of divers colours, much finer and lighter stuffe than their hose; with blue flat caps on their heads ....\"[\\[229\\]] \n033. **[^] **Adam (1908/1970) makes the surprising claim that in Taylor's time, \"any one who assumed the tartan of the clan was considered as being under the special protection of that clan\" and implies that Taylor said this.[\\[190\\]] Adam invented it, as nothing like this is in Taylor's original material. Taylor simply said that visitors wearing Highland dress would be \"conquered with kindnesse, and the sport will be plentifull\".\n034. **[^] **E.g. in the revised 1707 edition of Rev. [James Brome] 's _Travels over England, Scotland and Wales_, is material partly adapted from Buchanan (1582): \"They go habited in Mantles striped, or streaked with divers colours about the Shoulders, which they call Plodden, with a Coat girt close to their Bodies, and commonly are naked up their Legs, but wear Sandals upon the Soles of their Feet, and their Women go clad much after the same Fashion.\"[\\[239\\]] This suggests a span of at least 1582–1707 of Highland fashion being rather consistent and unisex.\n035. **[^] **The same artist earlier painted [a three-in-one portrait of actor-playwright John Lacy], in 1675, which featured trews and belted plaid,[\\[250\\]] but the tartan there is very casually represented as simple red and blue lines on white.\n036. **[^] **[Martin Martin] (1703) wrote: \"each Isle differs from the other in their fancy of making Plaids, as to the Stripes in Breadth and Colours. This Humour is as different thro the main Land of the Highlands, in-so-far that they who have seen these Places are able, at the first view of a Man's Plaid to guess the Place of his Residence ....\"[\\[255\\]] Scarlett (1990) says some earlier writers used this to just assume \"a fully organised system of District tartans at that time\" though Martin said nothing of the sort.[\\[256\\]] Scarlett considered Martin's account to have \"a rather sweeping style that suggests some exaggeration\", but generally plausible on other evidence of particular patterns, with minor variations, being common across wide areas.[\\[256\\]] \n037. **[^] **These tartan jackets are not to be confused with the later short [regimental Highland doublet] styles, borrowed directly from the military [Highland regiments] starting in the late 18th century; these are also of plain colour, not tartan.\n038. **[^] **Sources conflict sharply on the date. Telfer Dunbar (1979), relying on Mackay Scobie, says 1717-1739;[\\[271\\]] while Eslea MacDonald (2016), relying on R. W. Munro's _Highland Clans & Tartans_ (1977), says 1754.[\\[176\\]] \n039. **[^] **Banks & de La Chapelle (2007) give an implausible 1724 date for the founding of Wilsons,[\\[276\\]] which does not agree with other scholarship, and they cite no source for the assertion.\n040. **[^] **Scarlett (2008): \"Red, blue and green have been recorded as the first colours to appear in all primitive art, so there may be some deep physiological or psychological reason for the predominance of these colours.\"[\\[6\\]] \n041. **[^] **Britain has many native plants that can produce at least a thin yellow, but they seem not to have been favoured, except as a ground-colour for over-dyeing with blue to create green.[\\[278\\]] \n042. **[^] **A small piece of tartan believed to be from a plaid of Bonnie Prince Charlie, given in 1746 to Lady [Anne Mackintosh] of [Clan Farquharson], survives in the [National Records of Scotland].[\\[169\\]] The prince apparently had a habit of giving out plaids as thanks for hospitality, and several recorded (but quite different) tartans are said to have come from these plaids, e.g. SRT 4220,[\\[288\\]] 4421,[\\[289\\]] 4422,[\\[290\\]] and 4423.[\\[291\\]] According to Telfer Dunbar (1979), various museums and other collections hold at least 40 pieces of tartan claimed to have been worn by \"the Young Pretender\", eight at the Battle of Culloden, and they cannot all be genuine.[\\[292\\]] One sample in particular is more likely than the others to be legitimate.[\\[64\\]] \n043. **[^] **There are several other tartans called \"Jacobite\". One dates to c. 1850,[\\[67\\]] or might be a bit older and is probably a Wilsons design,[\\[32\\]] and the others are more recent commercial inventions of c. 1930[\\[300\\]] and the late 20th century.[\\[301\\]] \n044. **[^] **Telfer Dunbar (1979) says the correct year is 1725 and that 1729 was an error introduced by Stewart of Garth (1822) and copied by later authors.[\\[224\\]] \n045. **[^] **Specifically, as defined in an earlier act of Parliament, north of the \"Highland line\" running from Perth in the east to Dumbarton in the west.[\\[310\\]] \n046. **[^] **The Dress Act _per se_ did not enumerate exceptions for the nobility, but the enclosing Act of Proscription did.\n047. **[^] **[Lt.-Col. Sir John MacGregor Murray], newly chief of [Clan Gregor] and later vice-president of the Highland Society of London, wrote of the difficulty of raising a new regiment, in 1803: \"It will require much to rekindle the martial spirit of our ancestors, which has, unfortunately, been systematically broken down – we were so long degraded by the privation of our arms and dress, and so much unmanned by being converted into manufacturers\".[\\[323\\]] \n048. **[^] **Not to be confused with the second Seaforth's Highlanders, also raised as the 78th, in 1793. The original Seaforth's Highlanders were amalgamated with other units under the [Childers Reforms] to become the 1881 [Seaforth Highlanders].\n049. **[^] **Not to be confused with the earlier [74th Regiment of (Highland) Foot], raised 1777.\n050. **[^] **The commercial tartan weaver D. C. Dalgliesh provides a list of those that they supply, and it includes a mix of obscure tartans from defunct regiments, ones still used today for surviving regiments, tartans of overseas units that were \"Highland\" only in name, some that are now only associated with clans, and a number that are/were reserved for military pipe-band use and were not used in regular dress or undress uniforms.[\\[404\\]] \n051. **[^] **As one example, in _[The Lockhart Papers] _, first published in 1714, is a passage describing how opposing battatlions of [MacDonalds] from different places could only tell each other apart by colour of bonnet cockade. D. W. Stewart (1893) leapt to the conclusion they must have worn the same tartan, despite the material saying nothing of the sort[\\[418\\]] (they could have been wearing whatever tartans they happened to have, not uniforms, making tartan meaningless for distinguishing units of men).[\\[141\\]] \n052. **[^] **D. W. Stewart (1893) sometimes leaned toward the uniform interpretation: \"It appears from the regulations issued to the retainers of the [Clan Grant] [anent] the wearing of a uniform tartan that distinctive patterns were in use, at least for military purpose, or on occasion of great gatherings\".[\\[261\\]] The Grant case is covered in detail later.\n053. **[^] **D. W. Stewart (1893) again came down on the \"uniform\" side, despite otherwise being a booster of the idea of early clan tartans;[\\[253\\]] so did Scarlett (1990).[\\[254\\]] \n054. **[^] **Adam (1908/1970) confirmed that there were two different grades of tartan worn,[\\[190\\]] as did Logan (1831), but both are sources of dubious quality. Scarlett (1990, 2008) also observes that there were once at least two kinds of tartan weave, a coarse, dense sort in which the weft threads were thicker than the warp,[\\[6\\]] and a finer equal-twill weave, seen often in portraits, that is more like the kilt cloth produced today.[\\[272\\]] (But he does not describe one as being specially intended for war.) Such a fineness split seems to have continued for a long time; Wilsons of Bannockburn manufactured regimental tartan in both coarse (\"hard tartan\") and fine qualities as late as 1819,[\\[420\\]] perhaps as an undress and dress distinction, or enlisted and officer.\n055. **[^] **Innes of Learney's [motte-and-bailey tactic] when it comes to what \"clan tartan\" means is exemplified by his supposition that similar tartans used in lands of [Murray], Murray [of Athol], and [Sutherland] must mean they went back to a common tribal tartan \"from the twelfth century\" (which is not attested), and that: \"It was no doubt 'the Murrays' tartan' without being ' _The_ Murray tartan'\".[\\[162\\]] \n056. **[^] **E.g., [the district tartan] of [Huntly] [\\[315\\]] was sometimes called [Brodie], sometimes associated instead with [Forbes] or [Gordon], while Forbes did not have a distinct clan tartan until the key date of 1822, nor Brodie until the beginning of the 19th century.[\\[429\\]] The several tartans named Gordon all date to 1798 or later (and that earliest one was adopted from a 1793 regimental tartan).[\\[430\\]] \n057. **[^] **However, not only is it not certain that a single cloth of mixed colours was intended, rather than three cloths of distinct colours, Stewart contradicted himself: When the lands in question were restored to the MacLeans in 1630, the grey did not revert to green but remained gras, i.e. grey. Nevertheless, Stewart asserted: \"The explanation is simple enough. White and black and green are the only colours in the oldest authenticated Mac Lean tartan.\"[\\[434\\]] But that design dates only to the fraudulent 1842 _Vestiarium Scoticum_ and is not \"authenticated\" by anything;[\\[435\\]] several other (red-based) MacLean tartans date to at least 1819 ( _STR_ reference nos. 2603, 2605, and 2606).\n058. **[^] **The Scottish Tartans Society seemed to think it was something very similar to [Black Watch], with the red-and-white-striped Murray of Pulrossie version somehow, despite its 1618 prohibition, eventually becoming the primary Sutherland tartan.[\\[436\\]] Innes of Learney also supported the interpretation that it was a dark Black Watch-style tartan, related to others used in the region.[\\[405\\]] On the other hand, House of Gordon USA, a clan society, proclaims: \"It was a Red Gordon!\",[\\[437\\]] referring to a primarily red and teal tartan, also known as [old Huntly], recorded in 1819,[\\[438\\]] and appearing in a \"stripey\" variant, with differing warp and weft, in the [1766 painting of William Gordon]. The society does not publish any basis for their assertion.\n059. **[^] **The piper's name was William Cumming. Telfer Dunbar (1979) describes this tartan, and that of a companion portrait of Alastair Grant Mòr \"the Champion\", as also showing thin yellow over-checks which are not really visible in this photo. He also describes the green as \"grey\".[\\[440\\]] \n060. **[^] **This problem of no consistent tartans in old family portraits recurs in other clans, such as [Murray] and [MacDonald], going back to the 18th century.[\\[407\\]] Trevor-Roper (1983) also notes this inconsistency among Highland portraits,[\\[185\\]] as does Haswell Miller (1956).[\\[409\\]] \n061. **[^] **Willie Scobie, in 2012, railed against \"an influential and determined body of opinion set against the idea of clan tartans having existed prior to the late 18th century\", analysed the _Tartana_ lines in light of known clan tartans, found no correspondences aside from the Royal Company of Archers (supposedly using a Stuart tartan, which in reality they did not,[\\[257\\]] and not being a clan anyway) having green edging on their jackets, and nevertheless decided: \"we have in this piece of literature strong (one is almost tempted to say irrefutable) evidence of the existence of clan tartans in the year 1718.\"[\\[450\\]] \n062. **[^] **Thompson (1992)[\\[141\\]] said none of them survive; but the coat of the older boy is in what is now known as \"MacDonald, Lord of the Isles\", though the sett was reconstructed from the painting.[\\[469\\]] \n063. **[^] **A legend started by Lord Archibald Campbell (1890), who was working from a copy of the painting not the original, is that in his words: \"No more conclusive proof of distinctive clan colours has been exhibited and it silences all dispute on the question at once and for all time.\"[\\[476\\]] Modern researchers do not take this seriously. E.g., J. Telfer Dunbar's evaluation: \"This is an extraordinary claim as the tartans are clearly and accurately shown and not one of them agree with any clan tartans as known when Lord Archibald was writing or even to-day.\"[\\[198\\]] Scarlett (1990) pointed out that Campbell himself claimed, later in 1899, that the models for the painting were Jacobite prisoners.[\\[477\\]] If that were the case, they could be wearing whatever they were told to put on, even material supplied by the painter; i.e. the tartans depicted would be accurate representations of the cloth of the period but could not signify anything, even if they did match. The Jacobite Relics and Rare Scottish Antiquities Exhibition of 1946 agreed that the models were prisoners.[\\[478\\]] \n064. **[^] **James Ray, who served in the government forces at the Battle of Culloden, wrote in 1752: \"In their flight I came up with a pretty young Highlander, who called out to me, Hold your Hand, I'm a Cambell. On which I asked him, Where's your Bonnet? He reply'd, Somebody have snatched it off my Head. I only mention this to shew how we distinguished our loyal Clans from the Rebels; they being dress'd and equip'd all in one Way, except the Bonnet; ours having a red or yellow Cross of Cloath or Ribbon; theirs a white Cockade\".[\\[479\\]] Telfer Dunbar (1979): \"If it had been possible to distinguish a Campbell by a 'Clan Campbell' tartan, either Ray would have done so or else remarked on the fact that the man was not wearing an identifiable tartan.\"[\\[480\\]] \n065. **[^] **_A Journal of the Expedition of Prince Charles Edward in 1745, by a Highland Officer_ provides this account: \"We M'Donalds were much preplex'd, in the event of ane ingagement, how to distinguish ourselves from our bretheren and nighbours the M'Donalds of Sky, seeing we were both Highlanders and both wore heather in our bonnets, only our white cockades made some distinction\".[\\[418\\]] Telfer Dunbar (1979): \"If all the MacDonalds wore the same tartan, surely the writer would have mentioned this rather than the heather which they wore in their bonnets. A common tartan would have been much more confusing than a sprig of heather.\"[\\[481\\]] Also, this particular case does not demonstrate that _all_ the clans had different emblematic plants. Clan plants, like clan tartans, were solicited from chiefs in the early 19th century by the Highland Society of London, and there is no evidence of widespread assignment before then – only this single-clan mention in one period source.\n066. **[^] **And because the belted plaid in particular, as very practical for outdoor wear but not as work clothing, was believed to be conducive to a life of idle shirking and outright banditry.[\\[484\\]] \n067. **[^] **Even the Sutherland/Pulrossie letter of 1618 referred specifically to \"the plaides of his men\",[\\[405\\]] which is suggestive of his militia, not his entire clan.\n068. **[^] **Sometimes said to be [Jean \"Jenny\" Cameron], without conclusive evidence; there are five other identity candidates.[\\[494\\]] \n069. **[^] **Another legend, started by James Grant (1886), has it that the tartan goes back to \" [Charles, first Earl of Dunmore], second son of the first Marquis of Tullibardine\", but this was just a bad mis-reading of the Smith brothers (1850), from whom Grant plagiarised, referring to the _then-current_ Earl of Dunmore.[\\[327\\]] [\\[7\\]] \n070. **[^] **E.g., Telfer Dunbar (1979) provides this example: Wilsons' popular \"Gordon\" was green, purple, and black with over-checks of seven colours, and \"unlike the present clan pattern\".[\\[442\\]] \n071. **[^] **One example is today's [Macpherson], adopted in 1817, which was originally \"Caledonia\" then \"No. 43\", \"No. 155\", or \"Kidd\" in Wilsons' pattern books.[\\[399\\]] [\\[503\\]] (There is no \"Clan Kidd\"; the Kidd in question was a bulk orderer who used the tartan to clothe slaves in the West Indies.[\\[504\\]] Confusion seems to have arisen when Wilson also assigned the pattern the name \"Macpherson\" after another West Indies customer by that name.[\\[505\\]] Another is [Campbell of Cawdor], originally \"No. 230\" or \" [Argyll] \", after the county.[\\[506\\]] [\\[507\\]] A complex example is the case of [Abercrombie] or [Abercromby].[\\[508\\]] Logan (1831) first published the tartan usually used for this name, but he modified it[\\[509\\]] from an 1805 Wilsons tartan record for \"No. 64\" or \"Abercrombie\", named for Sir [Ralph Abercrombie] not an entire family.[\\[510\\]] The design first popular for \"Abercrombie\" in the early 18th century changed names somehow[\\[509\\]] to [Graham] then later became today's Graham of [Montrose] tartan.[\\[511\\]] Wilsons' \"Abercromby with yellow\"[\\[509\\]] is today's [Campbell of Breadalbane] [\\[512\\]] after also being used by a fencible regiment.[\\[513\\]] The main [Buchanan] tartan, famous for being asymmetric, originated as a Wilsons fashion tartan around 1800 and was not adopted as a clan tartan until the 1830s.[\\[34\\]] \"Logan\" was invented by Wilsons, named after a merchant, and changed several times until it sold well.[\\[514\\]] \"Drummond\" was originally Wilsons' \"Perth\".[\\[514\\]] Wilson's 1819 pattern \"Regent\" turned into the [MacLaren] clan tartan by 1830, with a shift from purple to blue.[\\[96\\]] Scarlett (1990) and the Scottish Register of Tartans provide numerous other examples of modern \"clan\" tartans actually just being renamed generic/fashion/fancy, regimental, and famous-individual tartans from Wilsons, when they were not taken from the later forgery _Vestiarium Scoticum_.\n072. **[^] **A prime example is the Black Watch tartan, which Cockburn collected four times and assigned the names \"Campbell Argyll\", \"Grant\", \"Munro\" and \"Sutherland\".[\\[519\\]] [\\[517\\]] \n073. **[^] **According to a documentary, [Clan Gregor], the [Gordons], and a [MacDonald] branch might also have had early informal clan tartans around this period.[\\[183\\]] However, the chief of the MacDonalds indicated not knowing of a clan tartan in 1815, and the tartan that was the subject of the 1618 Gordon/Murray/Sutherland letter is uncertain.\n074. **[^] **Stewart of Garth may have had financial motivations for promoting an aristocratic [\"tartanry\" or \"Highlandism\"] and attaching his name to it prominently – like many other Scottish [lairds], he was in dire fiscal shape.[\\[533\\]] \n075. **[^] **At least six at once claimed the Black Watch regimental tartan,[\\[517\\]] [\\[534\\]] and \"Several chiefs were asked to resubmit a different tartan in order to be seen to be different and thus support the idea of historical clan tartans.\"[\\[517\\]] In some cases, minor alterations were made, e.g. [Forbes] was devised in 1822 by adding a white over-check to Black Watch.[\\[540\\]] \n076. **[^] **There are numerous examples, but a prominent case is that two of the [Lord of the Isles] tartan variants were taken from portraits dating to the third quarter of the 18th century.[\\[541\\]] This practice, incidentally, has contributed to confusion about the age of clan tartans; a tartan adopted officially by a clan in 1850 from a painting dating to 1750 might misleadingly be said to be \"a clan tartan dating to 1750\".\n077. **[^] **The authenticated samples bore seals of clan chiefs, while submissions received without such authentications were sealed by society secretary George Wedderburn.[\\[56\\]] The society collected tartans in general as well, and amassed 586 by 1987.[\\[534\\]] \n078. **[^] **Some faulty (according to Wilsons) clan patterns included in Logan (1831) were those for [Abercrombie], [Douglas], and [Graham], but there were more.[\\[564\\]] \n079. **[^] **In fairness, only most of the tartans in _Vestiarium_ were made up; almost a dozen had previously appeared in collections like those of Cockburn and Wilson.[\\[573\\]] Telfer Dunbar (1979) also considered that the Sobieski Stuarts' more general material on the history and then-present of Highland dress was of considerable value, at least when its sources could be traced.[\\[574\\]] Of the tartans material, [Walter Scott] fairly charitably wrote that the brothers had \"an exaggerating imagination, which possibly deceives even themselves\".[\\[575\\]] \n080. **[^] **R. Martin (1988): \"I would like to excuse the prevarications of the Sobieski-Stuart brothers with a nod to [Baudrillard]; they lied and they cheated, but they did something quite extraordinary in ascribing a meaning to textile design that has more or less stuck: false as it is, the Sobieski-Stuarts fostered a myth of textile identification and implication that has served a continuing and compelling social need for well over a hundred years. They may have been factually wrong, but culturally very right.\"[\\[581\\]] \n081. **[^] **E.g., the usual tartan of [Clan Home] dates to _Clans Originaux_.[\\[590\\]] Another is [Brodie] hunting;[\\[591\\]] it was also later included in _Old & Rare Scottish Tartans_. A third is MacBean.[\\[114\\]] \n082. **[^] **See Scarlett (1990), chapter \"The Setts of the Tartans\", for numerous examples of names with 5 or even 10 \"clan tartans\", most of them traceable to Wilsons, Logan, or the Sobieski Stewarts.[\\[603\\]] For a quick visual example of conflicting claimed clan tartans, many of them dating to the Victorian to Edwardian periods, see the \"MacDougal\" search results in the _Scottish Register of Tartans_;[\\[83\\]] the list for that name is not much polluted by recent individual and \"fashion\" entries.\n083. **[^] **Revised in 1974, D. C. Stewart's _The Setts of the Scottish Tartans_ has been further updated and expanded by James D. Scarlett in 1990 as _Tartan: The Highland Textile_,[\\[607\\]] perhaps the most definitive work on tartan published so far (though by no means the largest in terms of number of tartans illustrated; it is a book of research not of pictures).\n084. **[^] **E.g. the red variant of the 1975 MacGregor dance tartan dates to 2005.[\\[609\\]] \n085. **[^] **_Electric Scotland_ published an annotated list of clans and their tartans' Lord Lyon registration status. The list is much shorter than some other clan lists, because it omits clans that have not applied to the Lord Lyon for tartan registry at all; it lists only those with Lyon-recorded tartans or those then in process of such registration.[\\[611\\]] \n086. **[^] **Example: The [Clan Watson] tartan dates to c. 1932 and appears to have been created by one of two ministers (sources disagree), based on the MacRae hunting and Gordon tartans.[\\[612\\]] \n087. **[^] **The Highland [MacLennans] use the same tartan as the Lowland [Logans]. Clan Logan is [without a chief].\n088. **[^] **Eslea MacDonald (2022) defines this \"Highland Revival\" period as the 1782 end of the Dress Act to the beginning of Victoria's reign in 1837.[\\[32\\]] The utility and accuracy of this term when constratined to Victoria's accession is questionable, because revivalism of Highland cultural trappings did not abate during her reign but actually intensified markedly. Also, the term _tartan revival_ has been used, with essentially the same meaning, though without closely prescribed dates.[\\[636\\]] \n089. **[^] **In this era, soldiering, especially as an officer, was the \"aristocratic profession _par excellence_\",[\\[638\\]] and this had a strong effect on fashion. In Highland dress of the period, sometimes civilian and military styles were commingled.[\\[639\\]] \n090. **[^] **Not to universal approval. The chief of [Clan MacDonell of Glengarry] wrote of a Celtic Society of Edinburgh gathering: \"I never saw so much tartan before in my life, with so little Highland material ... they have no right to burlesque the national character or dress of the Highlands.\"[\\[539\\]] \n091. **[^] **[David Wilkie] 's portrait of [George IV] depicts the king as being much slimmer than he actually was. Wilkie covered up the fact that the king's kilt was too short – sitting well above the knees – and also left out the pink tights the king wore to hide his bare legs.[\\[653\\]] \n092. **[^] **A detailed summary of the 19th-century tartan books can be found in [D. W. Stewart (1893)], pp. 57–61.\n093. **[^] **Queen Victoria wrote of her time in Scotland: \"... I feel a sort of reverence in going over these scenes in this most beautiful country, which I am proud to call my own, where there was such devoted loyalty to the family of my ancestors – for [Stuart blood] is in my veins, and I am now their representative, and the people are as devoted and loyal to me as they were to that unhappy race\".[\\[706\\]] \n094. **[^] **There were \"tartanitis\"-infused travel books of the era to go along with the tourism, e.g. _A Tour in Tartan-land_ by [Rev. Edward \"Cuthbert Bede\" Bradley] (1863).[\\[713\\]] \n095. **[^] **As examples, modern tartans have been created for [Chinese], [Jewish],[\\[869\\]] [Muslim],[\\[870\\]] and [Sikh] [\\[871\\]] communities, as well as Italian Scots.[\\[872\\]] \n096. **[^] **Wilsons of Bannockburn created several of the comparatively old ones – [Aberdeen], [Crieff], [Dundee], [Glasgow], and [Perth]  – simply by naming patterns after the places in which they were the most popular.[\\[442\\]] \n097. **[^] **Cornish \"national\" examples:[\\[881\\]] [\\[882\\]] [\\[883\\]] [\\[884\\]] [\\[885\\]] [\\[886\\]] \n098. **[^] **Welsh national examples:[\\[880\\]] [\\[887\\]] [\\[888\\]] [\\[889\\]] [\\[890\\]] \n099. **[^] **Manx national examples:[\\[892\\]] [\\[893\\]] [\\[894\\]] [\\[895\\]] [\\[896\\]] [\\[897\\]] [\\[898\\]] [\\[899\\]] [\\[900\\]] [\\[901\\]] [\\[902\\]] The last of these is inexplicably assigned a date of 1863 in _SRT_, but with a note that seems to indicate it was designed by D. G. Teall of STS in 1981.\n100. **[^] **Breton \"national\" examples:[\\[781\\]] [\\[903\\]] [\\[904\\]] \n101. **[^] **Galician \"national\" examples:,[\\[905\\]] [\\[906\\]] [\\[907\\]] \n102. **[^] **For example, [Bruce County] has an official tartan.[\\[909\\]] An example of a Canadian municipality with an official tartan is [Beauport, Quebec City].[\\[910\\]] \n103. **[^] **E.g., [Matheson] dress[\\[917\\]] is also known simply as Matheson, and is distinguished from a Matheson hunting tartan.[\\[918\\]] As with many Scottish names, there are an accumulation of other fashion and individual tartan designs named \"Matheson\",[\\[919\\]] but the only two recognised by the Clan Matheson Society are Matheson \\[dress\\] and Matheson hunting.[\\[920\\]] Similarly, [Shaw of Tordarroch] dress[\\[618\\]] is the main tartan, and is distinguished from a hunting variant,[\\[619\\]] with the old, erroneous \"Shaw\" tartan being retained only as a memorial tartan for a particular family figure.[\\[617\\]] \n104. **[^] **A photo in Adam (1908/1970) confirms that tartans with white stripes were used for Highland dance outfits at least as far back as the Edwardian period, though the style of [female dance-competition dress] has notably changed toward kilt-length instead of mid-calf skirts since then.[\\[924\\]] \n105. **[^] **Some writers have confused them as late as the 1980s (which suggests that dance tartans as a _conventional_ category unto themselves may date to the 1990s and later, though some specific dance tartans date to at least the mid-1970s).[\\[925\\]] E.g., J. C. Thompson (1989) conflates dance and dress tartans and treats all dress tartans as if they were white-bearing,[\\[101\\]] despite the clear fact that some dress tartans of considerable age do not have white in them, e.g. [Matheson] dress from c. 1850.[\\[917\\]] \n106. **[^] **The white-heavy MacGregor dance tartan (in three colour variants dating to 1975–2005) is confusingly listed in the _Scottish Register of Tartans_ as both dance and dress,[\\[925\\]] but the chief of [Clan Gregor] insists it is for dancers only,[\\[614\\]] so it is demonstrably not a general dress-wear tartan. Several other dance tartans are listed also as dress tartans in the _SRT_, but most appear to be \"fashion\" inventions by individuals or by woollen mills and are not associated with clans or districts.[\\[926\\]] \n107. **[^] **Possibly as early as 1850, and based on the Hay Stewart tartan or on royal Stewart, both probably by the Sobieski Stuarts.[\\[700\\]] It is often misdated to 1853.[\\[927\\]] \n108. **[^] **An example of a writer uncritically perpetuating the story can be found in M. B. Paterson (2001).[\\[929\\]] \n109. **[^] **As noted above, an early regimental tartan of 1787 was for a while called \"Mackenzie–MacLeod\" after two commanders, but this was a troop uniform tartan, not one for the named individuals.\n110. **[^] **The sett actually survives in two variants in the _SRT_, created for an 1880 wedding; they are now sometimes used as Wilson family tartans.[\\[932\\]] [\\[933\\]] \n111. **[^] **The Lord Lyon would only accept formal clan tartan registrations from clan chiefs; this excluded chiefless [armigerous clans] from tartan registration with the Lord Lyon, whether or not they had latter-day clan associations/societies. However, many now-armigerous clans _were_ able to register tartans with the Lord Lyon before they became chiefless, and these registrations remain in the Lyon Court Books. The Lord Lyon seemed to consider a clan that _has had_ a chief to remain a clan and not just a family/surname (the Lord Lyon did not do any registration of family tartans, i.e. those for non-clan surnames), though a statement by the Lord Lyon on this matter in 2002 is not as clearly worded as it could have been.[\\[611\\]] \n112. **[^] **In 2003, [Burberry] demanded members of the tartan industry to stop trading a certain Thomson Camel tartan.[\\[955\\]] Burberry claimed this tartan was confusingly similar to their Burberry check and that it thus infringed their registered trademark.[\\[956\\]] Burberry took legal action again in 2013 to protect its tartan trademark in China.[\\[957\\]] \n113. **[^] **For example, the Clan Cameron Association website states that the Cameron of Lochiel tartan \"is the personal tartan of the Chief and his immediate family; as a rule it should not be worn by clansfolk\".[\\[983\\]] \n114. **[^] **Since 1937, the only non-royals permitted by the British royal family to wear the Balmoral tartan are the monarch's own personal piper and pipers at the royal Balmoral estate. Even royal family members only wear it with the permission of the monarch.[\\[700\\]] The official website of the [monarchy of the United Kingdom] claims the tartan is not available for purchase.[\\[985\\]] [\\[986\\]] \n115. **[^] **Oyange-Ngando (2018): \"the intentional and specific arrangement of colour where each bears a certain meaning, for example a colour arrangement could represent age, clan or marital status of an individual\". Oyange-Ngando's paper cites many sources, but cites none at all for this claim. Modern photos of Maasai show members of the same tribe/clan wearing a wide variety of _shúkà_ patterns, seemingly to taste.\n116. **[^] **They are prescribed dress in at least in the more populous places. Remote areas, inhabited largely by ethnic minorities, still exhibit local traditional dress norms that differ from area to area.[\\[1005\\]] \n117. **[^] **Just _Bumthang_ by itself is a term for a type of woolen cloth, regardless of pattern.[\\[1007\\]] \n118. **[^] **Not to be confused with the [Mongols], who were called \"Ta\\[r\\]tars\" by medieval Europeans, and supplied patterned cloth among other trade goods (see [§ Medieval], above).\n\n0001. ^ [_**a**_] [_**b**_] Harper, Douglas. [\"tartan (n.)\"]. _Online Etymology Dictionary_. [Archived] from the original on 5 August 2017. Retrieved 4 March 2018.\n0002. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)], p. 57.\n0003. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 3.\n0004. ^ [_**a**_] [_**b**_] [_**c**_] [\"Tartan(e), Tertan(e), n. Also: (tartain)\"]. _A Dictionary of the Older Scottish Tongue (up to 1700)_. Dictionaries of the Scots Language SCIO / University of Glasgow. 2001. [Archived] from the original on 15 July 2023. Retrieved 14 July 2023.\n0005. **[^] **[Scarlett (1990)], p. 11\n0006. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] [_**n**_] [_**o**_] [_**p**_] [_**q**_] [_**r**_] [_**s**_] [_**t**_] [_**u**_] [_**v**_] [_**w**_] [_**x**_] [_**y**_] [_**z**_] Scarlett, James D. (2008). [\"Submission from James D. Scarlett\"] (PDF). [Scottish Parliament]. Archived from [the original] (PDF) on 19 December 2008. Retrieved 12 October 2008.\n0007. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] Eslea MacDonald, Peter (November 2010). [\"The Murray of Tullibardine Tartan – A Re-appraisal\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 25 June 2023. Retrieved 25 June 2023.\n0008. **[^] **[Tuckett (2016)], p. 2, citing: Scarlett, James D. (1997). \"Tartan: The Highland cloth and Highland art form\". In Butt, John; Ponting, Kenneth (eds.). _Scottish Textile History_. Aberdeen University Press. p. 71.\n0009. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] Newsome, Matthew Allan C. (1994). [\"Introduction to Tartan\"]. Franklin, North Carolina: Scottish Tartans Museum. Archived from [the original] on 10 February 2006. Retrieved 31 May 2010.\n0010. **[^] **[Cheape (2012)], pp. 15–16.\n0011. **[^] **Griest, Terry L. (1986). _Scottish Tartans and Family Names_. Harp & Lion Press. p. 2. The words tartan and plaid have come to be used synonymously, particularly in North America. This usage is incorrect when referring to Scottish tartan\n0012. ^ [_**a**_] [_**b**_] [_**c**_] [\"Frequently Asked Questions\"]. _ScottishTartans.org_. Archived from [the original] on 17 April 2000. Retrieved 16 October 2008.\n0013. **[^] **[Cheape (2012)], p. 15.\n0014. **[^] **[Tuckett (2016)], p. 10.\n0015. ^ [_**a**_] [_**b**_] [Black (1959)], p. 3.\n0016. **[^] **[Telfer Dunbar (1979)], p. 2.\n0017. **[^] **[Ramsay, William] (1875). [\"Paludamentum\"]. In [Smith, William] (ed.). _A Dictionary of Greek and Roman Antiquities_. London: John Murray. pp. 853–854. Retrieved 28 May 2023 – via University of Chicago.\n0018. **[^] **\"plaid\". [_Merriam-Webster's Collegiate Dictionary_] (11th ed.). [Merriam-Webster]. 2003\\. p. 947. [ISBN] [0877798095].\n0019. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] Newsome, Matthew; Wilkinson, Todd (2010). [\"Hibernean Dress, Caledonian Custom: A brief history of Irish kilts and tartan\"]. _ScottishTartans.org_. Scottish Tartans Museum. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0020. ^ [_**a**_] [_**b**_] See, e.g., [Mackay (1924)], p. 94, quoting: Burt (1726).\n0021. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 40, quoting: Tucker, Thomas (1824) \\[1655\\]. [Murray, John Archibald] (ed.). [_Report upon the Settlement of the Revenues of Excise and Customs in Scotland_]. Bannatyne Club Press. [Archived] from the original on 15 July 2023. Retrieved 8 July 2023 – via Google Books.\n0022. ^ [_**a**_] [_**b**_] [\"Frequently Asked Questions\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. [Archived] from the original on 1 July 2023. Retrieved 20 June 2023.\n0023. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 55.\n0024. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)], p. 61.\n0025. **[^] **[Scarlett (1990)], p. 46.\n0026. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 23.\n0027. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [\"Threadcount\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. [Archived] from the original on 3 April 2019. Retrieved 10 June 2023.\n0028. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Scarlett (1990)], p. 62\n0029. **[^] **[\"Berwick-upon-Tweed (symmetric)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.\n0030. **[^] **[\"Tartan Details - Campbell of Lochnell Dress)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.\n0031. **[^] **[\"Tartan Details - Unnamed C18th - Cf 4445\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 21 June 2023. Retrieved 20 June 2023.\n0032. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] Eslea MacDonald, Peter (April 2022). [\"The Jacobite Tartan\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 23 June 2023. Retrieved 23 April 2023.\n0033. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (October 2018). [\"Tartan from Isabella Fraser's Wedding Dress 1785\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 30 November 2023. Retrieved 20 June 2023.\n0034. ^ [_**a**_] [_**b**_] [\"Tartan Details - Buchanan – 1800\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0035. **[^] **[Scarlett (1990)], p. 53.\n0036. ^ [_**a**_] [_**b**_] [\"Tartan Details - MacMillan Anc (Clans Originaux)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0037. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [\"What's a Threadcount\"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 16 October 2008.\n0038. **[^] **[Scarlett (1990)], p. 17.\n0039. **[^] **[Scarlett (1990)], pp. 6–7.\n0040. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (2015). [\"Pattern Sticks – Fact or Fiction?\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 24 June 2023. Retrieved 23 June 2023.\n0041. **[^] **[Mackay (1924)], p. 46.\n0042. **[^] **[Scarlett (1990)], p. 51.\n0043. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Blakely (2015)], p. 13.\n0044. **[^] **Urquhart, Blair; Cruickshank, Kris (2006). _Textile32_ (Windows software) (v3.2 ed.). Comrie, Perthshire: Tartan Software / [Scottish Tartans Authority] International Tartan Index. \"Ticket\" menu.\n0045. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 63.\n0046. **[^] **See \"complaint\" about this in: [Scarlett (1990)], pp. 55–56.\n0047. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 88.\n0048. **[^] **See usage at, e.g.: [\"Tartan Details - Edmonton Scottish Society\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2021. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0049. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 227.\n0050. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Scarlett (1990)], p. 47.\n0051. **[^] **[Banks & de La Chapelle (2007)], p. 65.\n0052. **[^] **Usage example: [Scarlett (1990)], p. 33, footnote 6.\n0053. ^ [_**a**_] [_**b**_] See usage at, e.g.: [\"Tartan Details - Rankin (Dalgleish) #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0054. **[^] **[Scarlett (1990)], p. 52.\n0055. **[^] **[Telfer Dunbar (1979)], pp. 145, 151.\n0056. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Eslea MacDonald, Peter (January 2012). [\"The Original Cameron of Erracht Cloth?\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 25 June 2023. Retrieved 24 June 2023.\n0057. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Eslea MacDonald, Peter (February 2019). [\"Traditional selvedge decoration on tartan cloth\"] (PDF). _ScottishTartans.co.uk_. Retrieved 23 June 2023.\n0058. **[^] **[Scarlett (1990)], pp. 52–53.\n0059. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (April 2020). [\"The Use of a Selvedge Mark on Early Military Tartan\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 3 July 2023. Retrieved 3 July 2023.\n0060. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 18.\n0061. **[^] **Eslea MacDonald, Peter (September 2018). [\"An 18th Century Plaid belonging to the Maclaines of Lochbuie\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 23 June 2023.\n0062. **[^] **Eslea MacDonald, Peter (2004). [\"Two Tartan Plaids from Antigonish County, Nova Scotia\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 23 June 2023.\n0063. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (January 2016). [\"A portion of joined plaiding at Glamis Castle – Prince Charles Edward tartan\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 29 June 2023. Retrieved 28 June 2023.\n0064. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (November 2014). [\"An Unnamed 18th Century Jacobite Era Plaid – Carlisle Museum\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 30 November 2023. Retrieved 28 June 2023.\n0065. **[^] **[Eslea MacDonald (2012)], p. 17.\n0066. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 42.\n0067. ^ [_**a**_] [_**b**_] See usage at, e.g.: [\"Tartan Details - Jacobite, Old\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 10 June 2023.\n0068. **[^] **[Telfer Dunbar (1979)], pp. 49–50.\n0069. **[^] **[Scarlett (1990)], pp. 49–53, 181–183.\n0070. **[^] **Tilson Davis, Linda (2018). _Weaving Tartans: A Guide for Contemporary Handweavers_. Amazon Digital Services LLC - Kdp. [ISBN] [9781723818028].\n0071. **[^] **[Black (1959)].\n0072. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 146–147.\n0073. **[^] **[Eslea MacDonald (2012)], pp. 16–17.\n0074. **[^] **[Scarlett (1990)], pp. 46–48.\n0075. **[^] **[Mackay (1924)], p. 49.\n0076. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 40.\n0077. **[^] **[Scarlett (1990)], p. 40, plate 5(b).\n0078. **[^] **[Scarlett (1990)], p. 185.\n0079. **[^] **For example: [\"Tartan Details - Dundee Wallace\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 12 June 2023. Retrieved 11 June 2023.\n0080. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 45.\n0081. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 7.\n0082. ^ [_**a**_] [_**b**_] [_**c**_] [\"Tartan Details - Mar Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0083. ^ [_**a**_] [_**b**_] [\"Search Results \\[MacDougal\\]\"]. _TartanRegister.gov.uk_. 2023. Retrieved 13 June 2023.\n0084. **[^] **[\"Tartan Details - Innes of Learney Hunting (Personal)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0085. **[^] **[Scarlett (1990)], p. viii.\n0086. **[^] **[\"Women's Dress\"]. _TartansAuthority.com_. [Scottish Tartans Authority]. 2010\\. Archived from [the original] on 6 July 2022. Retrieved 9 July 2023. Quoting: [Logan, James]; [McIan, Robert Ronald] (1845–1847). _Clans of the Scottish Highlands_. London: Ackermann & Co.\n0087. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 10.\n0088. **[^] **Maerz, Aloys John; Paul, Morris Rea (1930). [_A Dictionary of Color_]. New York: [McGraw-Hill]. p. 149. [LCCN] [30016563]. [OCLC] [1150631] – via Internet Archive. There is a newer 1950 2nd edition, but both versions are collector's items that are difficult to find except via [inter-library loan].\n0089. **[^] **[Scarlett (1990)], pp. 67–68.\n0090. **[^] **[\"Tartan Details - Prince of Orange\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 9 June 2023. Retrieved 9 June 2023.\n0091. ^ [_**a**_] [_**b**_] [\"Tartan Details - Hello Kitty\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0092. **[^] **[\"Tartan Register colours\"] (PDF). _TartanRegister.gov.uk_. Scottish Register of Tartans. 2022. Retrieved 9 June 2023.\n0093. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Barnes & Allen (1956)]: p. 266.\n0094. ^ [_**a**_] [_**b**_] [_**c**_] [Stewart, D. W. (1893)], p. 33.\n0095. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Telfer Dunbar (1979)], pp. 222–224.\n0096. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 9.\n0097. **[^] **[Telfer Dunbar (1979)], p. 225.\n0098. **[^] **[Telfer Dunbar (1979)], pp. 227–229.\n0099. **[^] **[Telfer Dunbar (1979)], pp. 226, 228, 231, 239–240.\n0100. **[^] **Eslea MacDonald, Peter. [\"The Use of Colour in Tartan\"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 22 October 2008.\n0101. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] Thompson, J. Charles (1989). \"Color Schemes\". _So You're Going to Wear the Kilt_ (3rd revised ed.). Arlington, Virginia: Heraldic Art. pp. 34–37\\. [ISBN] [0862280176].\n0102. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 145.\n0103. **[^] **Actually, this term has been used in this specific context: Urquhart, Blair; Cruickshank, Kris (2006). _Textile32_ (Windows software) (v3.2 ed.). Comrie, Perthshire: Tartan Software / [Scottish Tartans Authority] International Tartan Index. \"Select Colours for Pattern\" menu. Select a colourway...\n0104. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (2014). [\"The Use of Colour in Tartan\"]. _ScottishTartans.co.uk_. Retrieved 16 May 2023.\n0105. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] Newsome, Matthew Allan C. (2006). [\"Tartan Colors — A Photo Essay\"]. _Albanach_. Retrieved 16 May 2023.\n0106. **[^] **[Telfer Dunbar (1979)], p. 238.\n0107. **[^] **[Telfer Dunbar (1979)], p. 50.\n0108. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 43. Scarlett says \"toward the end of the Great War\" (WWI), while Newsome (2006) says \"some time after World War II\".\n0109. **[^] **_The Story of Reproduction Tartans_ (brochure). Selkirk, Scotland: D. C. Dalgliesh Ltd. c. 1990s.\\[ _[self-published source] _\\]\n0110. **[^] **[Eslea MacDonald (2012)], p. 8.\n0111. ^ [_**a**_] [_**b**_] [_**c**_] [\"Tartan Details - Stewart, Prince Charles Edward\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 25 June 2023. SRT's entry on this tartan attempts to illustrate it in the original regimental version with azure and olive green, but mis-renders it in tones that are nearly grey and do not agree with SRT's own colour codes.\n0112. **[^] **[\"Tartan Details - Balmoral (Original)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 1 April 2017. Retrieved 8 June 2023.\n0113. **[^] **[\"Tartan Details - Akins Clan (Personal)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 12 June 2023. Retrieved 11 June 2023.\n0114. ^ [_**a**_] [_**b**_] [\"Tartan Details - MacBean (Clan)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 June 2023.\n0115. **[^] **Illustrated, but with an incorrect \"Childers (Gurkha Rifles)\" name, here: [\"Tartan Details - Childers (Gurkha Rifles)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 20 June 2023. Retrieved 14 June 2023. On the naming confusion, see: [Scarlett (1990)], pp. 32–33.\n0116. **[^] **[\"Tartan Details - Gordon Red\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 19 June 2023.\n0117. **[^] **[\"Tartan Details - Galloway Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.\n0118. **[^] **[\"Tartan Details - Galloway Green (yellow line)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.\n0119. **[^] **[\"Tartan Details - US Air Force Reserve Pipe Band\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.\n0120. **[^] **[\"Tartan Details - McCandlish Red\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 15 May 2023. Retrieved 9 June 2023.\n0121. **[^] **[\"Tartan Details - Hunting Green\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 9 June 2023.\n0122. **[^] **[\"Tartan Details - McCandlish Dress Grey\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 9 June 2023.\n0123. **[^] **[\"Tartan Details - Isle of Skye\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.\n0124. **[^] **[\"Tartan Details - Chisholm Colonial\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 13 June 2023. Retrieved 8 June 2023.\n0125. ^ [_**a**_] [_**b**_] [\"Tartan details - LOVERBOY\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2018. [Archived] from the original on 30 December 2021. Retrieved 1 January 2022.\n0126. **[^] **[Scarlett (1990)], p. 48.\n0127. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [MacDonald, M. (1995)] p. 48.\n0128. **[^] **[Innes of Learney (1971)], p. 6. Citing: [Mackay (1924)]; and: [Campbell, A. (1890)].\n0129. **[^] **[Mackay (1924)], pp. 37–38, 40–41, 45–46.\n0130. **[^] **[Martin, R. (1988)], pp. 60–61 and throughout.\n0131. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Mair, Victor H. (2016). [\"Ancient Mummies of the Tarim Basin: Discovering Early Inhabitants of Eastern Central Asia\"]. _Expedition Magazine_. Vol. 58, no. 2. Philadelphia: [Penn Museum]. [Archived] from the original on 28 September 2023. Retrieved 10 June 2023.\n0132. **[^] **[Mallory, J. P.]; [Mair, Victor H.] (2000). _The Tarim Mummies: Ancient China and the Mystery of the Earliest Peoples from the West_. London: Thames & Hudson. p. 237. [ISBN] [9780500051016].\n0133. **[^] **[\"The genomic origins of the Bronze Age Tarim Basin mummies\"]. _European Nucleotide Archive_. School of Life Sciences, [Jilin University]. 20 August 2021. [Archived] from the original on 16 February 2022. Retrieved 10 June 2023.\n0134. ^ [_**a**_] [_**b**_] [_**c**_] Seenan, Gerard (24 January 1999). [\"Preserved with the mummies, clues to an ancient mystery: Tattered fabric that could hold the key to early history\"]. _[The Guardian] _. Retrieved 10 June 2023.\n0135. **[^] **[Mallory & Mair (2000)], p. 191.\n0136. **[^] **Shuicheng, Li (2003). [\"Ancient Interactions in Eurasia and Northwest China: Revisiting J. G. Andersson's Legacy\"]. _Bulletin of the Museum of Far Eastern Antiquities_. **75**. Stockholm: Fälth & Hässler: 13.\n0137. **[^] **Coonan, Clifford (28 August 2006). [\"A meeting of civilisations: The mystery of China's Celtic mummies\"]. _[The Independent] _. Archived from [the original] on 3 April 2008. Retrieved 11 October 2008.\n0138. ^ [_**a**_] [_**b**_] Spada, Gianfranco (31 October 2019). [\"Qizilchoqa Tartan Tissue – Anonymous\"]. _Geometricae_. Valencia / London: Center for International Research on Concrete Art. [ISSN] [2605-5309]. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0139. **[^] **Fortson, Benjamin W. (2004). _Indo-European Language and Culture: An Introduction_. Blackwell Publishing. p. 352. [ISBN] [1405103167].\n0140. **[^] **Newsome, Matthew Allan C. (2005). [\"Who Says Tartan Is Just for Scots?\"]. _Albanach.org_. Retrieved 14 July 2023. Quoting: [Wayland Barber, Elizabeth J.] (2000) \\[1999\\]. _The Mummies of Ürümchi_. London: W. W. Norton & Co. [ISBN] [9780393320190].\n0141. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [Thompson (1992)], p. iv.\n0142. **[^] **[Scarlett (1990)], pp. 9–10.\n0143. ^ [_**a**_] [_**b**_] [Adam (1908/1970)], p. 385.\n0144. ^ [_**a**_] [_**b**_] [Stewart (1893)], p. 8, citing: [Lesley, John] (1885) \\[1571\\]. Cody, E. G. (ed.). _The Historie of Scotland_. Vol. 1. Edinburgh: Scottish Text Society.\n0145. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 10.\n0146. **[^] **Delamarre, Xavier (2008). _Dictionnaire de la langue gauloise: Une approche linguistique du vieux-celtique continental_ (in French). Errance. [ISBN] [9782877723695].\n0147. **[^] **[\"Tartan Details - Falkirk\"]. _TartaRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 1 February 2024. Retrieved 2 February 2024.\n0148. ^ [_**a**_] [_**b**_] Chen, Min (7 April 2023). [\"A Tattered Scrap of Fabric, Unearthed From a Peat Bog in the Scottish Highlands, Is the World's Oldest Piece of Tartan\"]. _Artnet News_. [Archived] from the original on 18 May 2023. Retrieved 18 May 2023.\n0149. **[^] **[\"Who wore Scotland's oldest piece of tartan?\"]. _[The Scotsman] _. Retrieved 11 February 2020.\n0150. **[^] **[\"Earliest depiction of Scottish tartan discovered on Roman statue\"]. _[The Scotsman] _. 4 December 2012. [Archived] from the original on 8 June 2023. Retrieved 8 June 2023. A newswire story, essentially the same article is repeated [at _The Herald_] [Archived] 8 June 2023 at the [Wayback Machine] and in shorter form [at _BBC News_].\n0151. **[^] **[Scarlett (1990)], pp. ix–x.\n0152. **[^] **[Brown (2012)], p. 2.\n0153. **[^] **[\"Tartan – Shepherd / Falkirk\"]. Scottish Tartans World Register. Archived from [the original] on 4 October 2011. Retrieved 8 October 2008.\n0154. **[^] **[\"Falkirk tartan\"]. Search Results. [National Museums Scotland]. [Archived] from the original on 23 August 2017. Retrieved 8 October 2008.\n0155. **[^] **Wild, J. P. (2002). \"The Textile Industries of Roman Britain\". _Britannia_. **33**: 1–42\\. [doi]: [10.2307/1558851]. [JSTOR] [1558851].\n0156. **[^] **Wild, J. P. (1964). \"The Textile Term _Scutulatus_\". _The Classical Quarterly_. New Series. **14** (2): 263–266\\. [doi]: [10.1017/S0009838800023818]. [JSTOR] [637730]. [S2CID] [170603077].\n0157. **[^] **Harrison, Mark (1993). _Anglo-Saxon Thegn, 449–1066 A.D_. Osprey Publishing. p. 17. [ISBN] [1855323494].\n0158. **[^] **[Telfer Dunbar (1979)], p. 48.\n0159. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 66, 68.\n0160. **[^] **[Stewart, D. W. (1893)], p. 1 \\[A\\].\n0161. **[^] **[Innes, Cosmo] (1860). [_Scotland in the Middle Ages: Sketches of Early Scottish History and Social Progress_]. Edinburgh: Edmonston and Douglas. p. 227 – via Google Books. Also cited in: [Campbell, J. F. (1893)], p. 335.\n0162. ^ [_**a**_] [_**b**_] [_**c**_] [Innes of Learney (1971)], p. 10.\n0163. **[^] **[Stewart, D. W. (1893)], p. 2.\n0164. **[^] **[Telfer Dunbar (1979)], p. 14; quoting an article in _[The Scotsman] _, 7 December 1948, summarizing a Grant presentation the night before at the Celtic Union of Edinburgh.\n0165. **[^] **Richardson, Alan (27 March 2023). [\"Oldest tartan found to date back to 16th Century\"]. _BBC_. Retrieved 28 March 2023.\n0166. **[^] **[\"Scotland's oldest tartan discovered by Scottish Tartans Authority\"]. [V&A Dundee]. [Archived] from the original on 23 May 2024. Retrieved 22 May 2023.\n0167. **[^] **Killgrove, Kristina (1 April 2023). [\"Oldest Scottish tartan ever found was preserved in a bog for over 400 years\"]. _Live Science_. Future US. [Archived] from the original on 22 May 2023. Retrieved 22 May 2023.\n0168. **[^] **[Maria Hayward], _Stuart Style: Monarchy, Dress and the Scottish Male Elite_ (Yale, 2020), p. 39 citing [National Records of Scotland] E21/34 f.63v: [Rosalind K. Marshall], \"To be the Kingis Grace ane Dowblett: The Costume of James V, King of Scots\", Costume, 28:1 (2014), p. 16: [James Balfour Paul], _Accounts of the Treasurer_, 6 (Edinburgh, 1905), pp. 79–80, 436–437.\n0169. ^ [_**a**_] [_**b**_] [_**c**_] [\"Sources in the National Records of Scotland\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. [Archived] from the original on 19 May 2023. Retrieved 28 May 2023.\n0170. **[^] **[Stewart, D. W. (1893)], pp. 4–5.\n0171. **[^] **[Dunbar, William] (1834). Laing, David (ed.). [_The Poems of William Dunbar_] (PDF). Vol. II. Edinburgh: Laing & Forbes. p. 38. [Archived] (PDF) from the original on 6 June 2023. Retrieved 6 June 2023. Also quoted in: [Mackay (1924)], p. 53.\n0172. **[^] **[Telfer Dunbar (1979)], pp. 51–52, and plate 7.\n0173. **[^] **[\"Tartan Details - Lennox\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0174. **[^] **[\"Tartan & Symbols\"]. _ClanLennox.org_. Clan Lennox Council of Commissioners. 2019. [Archived] from the original on 22 June 2023. Retrieved 22 June 2023.\n0175. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Smith, Philip D. Jr. (2020). [\"History of Tartan\"]. _ClanChiefs.org.uk_. [Standing Council of Scottish Chiefs]. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023. (Article first published on _PanAlba_.)\n0176. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] Eslea MacDonald, Peter (2016). [\"Musings on the Arisaid and Other Female Dress\"] (PDF). _ScottishTartans.org.uk_. [Archived] (PDF) from the original on 22 June 2023. Retrieved 21 June 2023.\n0177. **[^] **See gallery at Douglas Archives, which also does not repeat the 1575 tartan story: [\"Margaret, Countess of Lennox\"]. _The Douglas Archives: A collection of historical and genealogical research_. Clan Douglas Society. 30 September 2021. [Archived] from the original on 22 June 2023. Retrieved 22 June 2023.\n0178. **[^] **[Logan, James] (1831). [_The Scottish Gaël; Or, Celtic Manners, as Preserved Among the Highlanders: Being an Historical and Descriptive Account of the Inhabitants, Antiquities and National Peculiarities of Scotland_]. Cornhill, Aberdeen: Smith, Elder & Co. p. 231. [Archived] from the original on 8 September 2023. Retrieved 19 August 2023 – via Google Books.\n0179. **[^] **[Macalister, R. A. Stewart]; Murphy, Michael, eds. (2008). [_Lebor Gabála Érenn: The Book of the Taking of Ireland_] (PDF). Vol. Part VI: Index D–F. University College Cork. \"Eochu Édgathach\" entry. [Archived] (PDF) from the original on 11 June 2023. Retrieved 10 June 2023 – via CELT: [Corpus of Electronic Texts].\n0180. **[^] **[Mackay (1924)], p. 35.\n0181. ^ [_**a**_] [_**b**_] [_**c**_] [Stewart, D. W. (1893)], p. 7.\n0182. **[^] **[Adam (1908/1970)], p. 385, citing: [Heron, Robert] (1799). _History of Scotland_. Edinburgh/London: T. Cadell Jun. & W. Davies.\n0183. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] Neil, Tim (dir.) (2013). [_Spinning a Yarn: The Dubious History of Scottish Tartan_] (Television production). [BBC Television]. [Archived] from the original on 18 May 2023. Retrieved 17 May 2023 – via YouTube.\n0184. **[^] **[Stewart, D. W. (1893)], pp. 3–4.\n0185. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 23.\n0186. **[^] **[Telfer Dunbar (1979)], p. 234.\n0187. **[^] **[Telfer Dunbar (1979)], pp. 27–33, 35; quotes numerous period sources.\n0188. **[^] **Caldwell, David; Oleksy, Vicky; Rhodes, Bess (2023). _The Battle of Pinkie, 1547: The Last Battle Between the Independent Kingdoms of Scotland and England_. Oxford: [Oxbow Books]. p. 85. [ISBN] [9781789259735].\n0189. **[^] **[Telfer Dunbar (1979)], p. 27.\n0190. ^ [_**a**_] [_**b**_] [_**c**_] [Adam (1908/1970)], p. 387.\n0191. **[^] **[Stewart, D. W. (1893)], p. 9.\n0192. **[^] **[Stewart, D. W. (1893)], p. 9. It also appears in [Banks & de La Chapelle (2007)] p. 68, citing: Grant, I. F.; Cheape, Hugh (1997). _Periods in Highland History_. New York: Barnes & Noble. p. 8. [ISBN] [9780760717158].; and (in the original Early Modern English) in [Mackay (1923)], p. 67.\n0193. **[^] **[Campbell, J. F. (1893)], p. 336.\n0194. **[^] **[Telfer Dunbar (1979)], pp. 32–33, 92. This excerpt was left out of later republications, and is found only in Dunbar, among the later writers; he tracked down the original 1617 book.\n0195. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 53, and plate 8.\n0196. **[^] **[Scarlett (1990)], pp. 4, 6–7.\n0197. **[^] **[Telfer Dunbar (1979)], pp. 224, 229, 239.\n0198. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 72, quoting: [Campbell, A. (1890)].\n0199. **[^] **[Scarlett (1990)], pp. 5, 239.\n0200. **[^] **[Banks & de La Chapelle (2007)], pp. 68, 70.\n0201. **[^] **[Banks & de La Chapelle (2007)], pp. 65–66.\n0202. **[^] **[Campbell, J. F. (1893)], p. 336: \"tartan was anciently worn, and ... particular patterns were worn in certain districts.\" See also p. 335, on natural dyes.\n0203. **[^] **[Hinderks (2014)], p. 2, citing: Nicholson, Robin (November 2005). \"From Ramsay's _Flora MacDonald_ to Raeburn's _MacNab_: The Use of Tartan as a Symbol of Identity\". _Textile History_. **36** (2): 149. [doi]: [10.1179/004049605x61546]. [S2CID] [192109063].\n0204. **[^] **[Innes of Learney (1971)], pp. 8–9.\n0205. **[^] **[Hinderks (2014)], p. 3.\n0206. **[^] **[Stewart, D. W. (1893)], pp. 9–10.\n0207. **[^] **[Telfer Dunbar (1979)], p. 91.\n0208. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], pp. 10–11.\n0209. **[^] **[Stewart, D. W. (1893)], p. 14. Also quoted with a slightly different translation in [Telfer Dunbar (1979)], p. 32.\n0210. **[^] **[Stewart, D. W. (1893)], p. 15.\n0211. **[^] **[Henshall, Audrey S.]; Seaby, Wilfred A. (1961–1962). \"The Dungiven Costume\". _Ulster Journal of Archaeology_. 3rd series. 24–25\\. Ulster Archaeological Society: 119–142\\. [JSTOR] [20627382].\n0212. **[^] **[\"Irish Tartan and the Irish Kilt\"]. _Donaldsons.scot_. Donaldsons of Scotland. 2023. \"The Origins of Irish Tartans\" section. [Archived] from the original on 21 May 2023. Retrieved 30 May 2023.\n0213. ^ [_**a**_] [_**b**_] Wilton, Brian (1 August 2019). [\"The history of district tartans\"]. _History Scotland_. Warners Group Publications. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0214. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 182.\n0215. ^ [_**a**_] [_**b**_] [_**c**_] Smith, Clifford (2004). [\"Tartan and Kilts\"]. _UlsterScotsAgency.com_. [Ulster-Scots Agency]. Archived from [the original] on 14 August 2009. Retrieved 30 May 2023.\n0216. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Newsome, Matthew (18 November 2005). [\"Irish Tartans: Scottish tartans in disguise?\"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 30 May 2023.\n0217. **[^] **Dickson, Leslie (1995). [\"Ulster Tartan\"]. _Ullans: The Magazine for Ulster-Scots_ (3). Ulster-Scots Academy. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0218. **[^] **[\"Tartan Details - Ulster (Original)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0219. **[^] **[\"Tartan Details - Ulster (Red (Reconstruction))\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 30 May 2023.\n0220. ^ [_**a**_] [_**b**_] Newsome, Matthew (16 March 2008). [\"Rethinking Irish Tartans\"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 30 May 2023.\n0221. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 15. Stewart was reading the 1607 Latin edition.\n0222. **[^] **[Telfer Dunbar (1979)], p. 33. Dunbar was reading the abridged English edition of 1617.\n0223. **[^] **Working, Laura (25 October 2016). [\"Humanism in the Desert: Transculturality at the Huntington Library\"]. _TIDE: Travel, Transculturality, and Identity in England, c. 1500–1700_. University of Oxford. [Archived] from the original on 17 June 2023. Retrieved 16 June 2023.\n0224. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Telfer Dunbar (1979)], p. 155.\n0225. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] Newsome, Matthew Allan C. (17 July 2016). [\"The Original Military Tartan – the Black Watch\"]. _Albanach_. [Archived] from the original on 23 May 2024. Retrieved 10 June 2023. Citing: Scarlett, James D. (2003). _The Origins and Development of Military Tartans: A Re-Appraisal_. Partizan Press. [ISBN] [1858185009].\n0226. ^ [_**a**_] [_**b**_] Anderson, Donald (Spring 1939). \"The Earliest Appearance of the Black Watch\". _Journal of the Society for Army Historical Research_. **18** (69): 16–20\\. [JSTOR] [44219779].\n0227. ^ [_**a**_] [_**b**_] [Scarlett (1990)], pp. 26–27. The period material is also quoted at length in: [Telfer Dunbar (1979)], p. 157.\n0228. **[^] **Paterson, James (1847). _History of the County of Ayr_. Vol. 1. pp. 380–382.\n0229. **[^] **[Telfer Dunbar (1979)], pp. 33–34. Also quoted in: [Banks & de La Chapelle (2007)], p. 70. And: [Stewart, D. W. (1893)], pp. 15–16.\n0230. **[^] **[Telfer Dunbar (1979)], p. 93.\n0231. **[^] **[Telfer Dunbar (1979)], pp. 94–95.\n0232. **[^] **[Telfer Dunbar (1979)], p. 37.\n0233. **[^] **[Campbell, J. F. (1893)], pp. 369–370.\n0234. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 12.\n0235. **[^] **[Banks & de La Chapelle (2007)], p. 63.\n0236. **[^] **[Telfer Dunbar (1979)], p. 35.\n0237. **[^] **[Telfer Dunbar (1979)], p. 53 and title page. Dunbar incorrectly dates the map to 1643, though it is clearly marked 1653 in Roman numerals.\n0238. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], pp. 38–39.\n0239. **[^] **[Telfer Dunbar (1979)], pp. 39–40.\n0240. **[^] **[Telfer Dunbar (1979)], pp. 37–38.\n0241. **[^] **[Telfer Dunbar (1979)], p. 41, quoting: [Franck, Richard] (1821) \\[1658\\]. _Northern Memoirs_. Edinburgh/London: Archibald Constable & Co. / Hurst, Robinson & Co. Dunbar gives a date of 1656, but the book was written and first published in 1658.\n0242. **[^] **[Telfer Dunbar (1979)], p. 41, quoting: [Ray, John] (1846) \\[1662\\]. _Memorials of John Ray_. London: Ray Society.\n0243. **[^] **[Telfer Dunbar (1979)], pp. 91–92.\n0244. **[^] **[Telfer Dunbar (1979)], pp. 41–42.\n0245. **[^] **[Telfer Dunbar (1979)], p. 42, quoting: [Hume Brown, Peter], ed. (1891). [_Early Travellers in Scotland, 1295–1689_]. Edinburgh: David Douglas – via Internet Archive.\n0246. **[^] **[Telfer Dunbar (1979)], pp. 43–44.\n0247. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 97.\n0248. **[^] **[Telfer Dunbar (1979)], pp. 55–56, and plate 9. When Dunbar was writing, the portrait was thought to date to c. 1660 and to depict either an unknown Highland chieftain or the actor-playwright [John Lacy].\n0249. **[^] **[Wright, John Michael]. [\"Lord Mungo Murray \\[Am Morair Mungo Moireach\\], 1668–1700. Son of 1st Marquess of Atholl\"]. _NationalGalleries.org_. National Galleries of Scotland. [Archived] from the original on 29 January 2023. Retrieved 16 June 2023.\n0250. **[^] **[Telfer Dunbar (1979)], p. 56.\n0251. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 24. Also quoted in: [Telfer Dunbar (1979)], pp. 44, 96.\n0252. ^ [_**a**_] [_**b**_] Philip of Almerieclose, James (1888) \\[1691\\]. [_The Grameid: An Heroic Poem Descriptive of the Campaign of Viscount Dundee in 1689_]. Translated by Murdoch, Alexander D. Edinburgh: Scottish Historical Society. Retrieved 8 June 2023 – via Internet Archive.\n0253. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], pp. 22–23: \" _The Grameid_, written in 1691, contains many references to the clothing and uniforms of the Highland army serving under [Viscount Dundee].\"\n0254. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [Scarlett (1990)], p. 13.\n0255. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 12. Also quoted in: [Stewart, D. W. (1893)], p. 25; and: [Telfer Dunbar (1979)], p. 45.\n0256. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 34.\n0257. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (19 January 2012). [\"Tartans of the Royal Company of Archers\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 24 June 2023. Retrieved 23 June 2023.\n0258. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Stewart, D. W. (1893)], pp. 26–28.\n0259. ^ [_**a**_] [_**b**_] [_**c**_] [Mackay (1924)], p. 50, at footnote.\n0260. **[^] **See sources cited in the [§ Clan tartans] section.\n0261. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Stewart, D. W. (1893)], p. 29.\n0262. **[^] **[Stewart, D. W. (1893)], pp. 31–32.\n0263. **[^] **[Campbell, J. F. (1893)], p. 347.\n0264. **[^] **[Telfer Dunbar (1979)], pp. 62–66, plates 16–19, 23–24, 28–29, 30, 33–35, 44.\n0265. **[^] **[MacBain (1911)], p. 151. Also quoted in: [Telfer Dunbar (1979)], pp. 92–93.\n0266. **[^] **[Stewart, D. W. (1893)], p. 25–26.\n0267. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], pp. 39–40.\n0268. **[^] **[Brown (2012)], p. 6; citing: Stiùbhart, Domnhall Uilleam (2009). \"Highland rogues and roots of Highland romanticism\". In MacLachlan, Christopher (ed.). _Crossing the Highland Line_. Glasgow: Association for Scottish Literary Studies.\n0269. **[^] **[Stewart, D. W. (1893)], pp. 30–31. Also quoted in: [Telfer Dunbar (1979)], p. 99.\n0270. **[^] **[Telfer Dunbar (1979)], p. 100.\n0271. **[^] **[Telfer Dunbar (1979)], p. 172.\n0272. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 22\n0273. **[^] **[Telfer Dunbar (1979)], pp. 57–58.\n0274. **[^] **[Telfer Dunbar (1979)], pp. 55–58.\n0275. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 144.\n0276. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)], p. 100.\n0277. **[^] **[Telfer Dunbar (1979)], p. 229.\n0278. **[^] **[Telfer Dunbar (1979)], pp. 231–234.\n0279. **[^] **[Telfer Dunbar (1979)], pp. 39, 49.\n0280. ^ [_**a**_] [_**b**_] [Innes of Learney (1971)], pp. 10–11.\n0281. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] p. 75.\n0282. **[^] **[Stewart, D. W. (1893)], p. 35.\n0283. **[^] **[Banks & de La Chapelle (2007)] pp. 17, 24.\n0284. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 17.\n0285. **[^] **[Trevor-Roper (1983)], pp. 20–21.\n0286. **[^] **[Hinderks (2014)], p. 5, citing (among others): MacInnes, Allan (2007). \"Jacobitism in Scotland: Episodic Cause of National Movement?\". _The Scottish Historical Review_. **86** (2): 229–251\\. [doi]: [10.3366/shr.2007.86.2.225]. [S2CID] [154561509].\n0287. **[^] **[Banks & de La Chapelle (2007)] pp. 24, 78.\n0288. **[^] **[\"Tartan Details - Unnamed C18th - Prince Charles Edward\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.\n0289. **[^] **[\"Tartan Details - Unnamed C18th - Prince Charles Edward #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.\n0290. **[^] **[\"Tartan Details - Prince Charles Edward (Edinburgh)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.\n0291. **[^] **[\"Tartan Details - Unnamed C18th - Prince Charles Edward #4\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 28 May 2023. Retrieved 28 May 2023.\n0292. **[^] **[Telfer Dunbar (1979)], pp. 87–89.\n0293. **[^] **[Telfer Dunbar (1979)], pp. 82–86, 90.\n0294. **[^] **[Telfer Dunbar (1979)], p. 76, plates 25, 27.\n0295. **[^] **[Stewart, D. W. (1893)], pp. 32–33.\n0296. **[^] **Eslea MacDonald, Peter (September 2021). [\"Culloden Tartan\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 19 January 2023. Retrieved 26 June 2023.\n0297. **[^] **[\"Tartan Details - Culloden 1746 - Original\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 27 June 2023. Retrieved 26 June 2023.\n0298. **[^] **Drysdale, Neil (13 November 2019). [\"Valuable Culloden plaid donated to the National Museum of Scotland\"]. _[The Press and Journal] _. [Archived] from the original on 15 July 2023. Retrieved 15 July 2023.\n0299. **[^] **[\"Tartan Details - Jacobite - 1850\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 30 May 2023. Retrieved 10 June 2023.\n0300. **[^] **[\"Tartan Details - Jacobite #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 23 May 2024. Retrieved 10 June 2023.\n0301. **[^] **[\"Tartan Details - Jacobite Dress #1\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 10 June 2023. Retrieved 10 June 2023.\n0302. **[^] **[Innes of Learney (1971)], pp. 10–11: \"No doubt the 'Jacobite' (political) tartan and a number of older Lowland tartans were invented at this time \\[1840s\\]\".\n0303. ^ [_**a**_] [_**b**_] [Cowan, Paul] (2021). [\"Quick Guide to the Scottish Regiments\"]. _Scottish Military Disasters_. [Archived] from the original on 23 May 2024. Retrieved 18 May 2023. This is the updated website version of the book: Cowan, Paul (2008). _Scottish Military Disasters_. Neil Wilson Publishing.\n0304. ^ [_**a**_] [_**b**_] [_**c**_] [Groves (1893)]: p. 2.\n0305. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Campbell of Airds, Alastair] (2000). _A History of Clan Campbell; Volume 1, From Origins to the Battle of Flodden_. Edinburgh: [Edinburgh University Press]. pp. 259–261\\. [ISBN] [1902930177].\n0306. **[^] **[Telfer Dunbar (1979)], p. 158.\n0307. **[^] **[Armstrong (2017)], p. 196.\n0308. **[^] **[Hinderks (2014)], p. 5.\n0309. ^ [_**a**_] [_**b**_] [_**c**_] [Hinderks (2014)], pp. 5–7, citing (among others): Cheape, Hugh (2012) \\[2010\\]. \"Gheibhte breacain charnaid\". In Brown, Ian (ed.). _From Tartan to Tartanry: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].\n0310. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 3.\n0311. ^ [_**a**_] [_**b**_] Eslea MacDonald, Peter (March 2021). [\"Act of Proscription 1746: The Tartan Ban – Fact or Myth?\"] (PDF). _ScottishTartans.co.uk_. [Archived] (PDF) from the original on 23 May 2024. Retrieved 13 May 2023.\n0312. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 3.\n0313. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)] p. 84.\n0314. **[^] **[\"Tartan Details - MacKintosh\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 11 June 2023. Retrieved 11 June 2023.\n0315. ^ [_**a**_] [_**b**_] [_**c**_] [\"Tartan Details - Marchioness of Huntly's\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 19 June 2023. Retrieved 20 June 2023. SRT's record is mistitled \"Marchioness of Huntly's\" instead of \"Huntly\". At the separate [record for the Marchioness tartan] [Archived] 19 June 2023 at the [Wayback Machine] they correctly identify it as such. SRT also incorrectly states this tartan was published in Wilsons' _Key Pattern Book_ of 1819; that again applies to the Marchioness entry.\n0316. **[^] **[\"Tartan Details - Gordon, Red (1819)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. [Archived] from the original on 3 July 2023. Retrieved 19 June 2023.\n0317. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 6.\n0318. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] pp. 84–85.\n0319. ^ [_**a**_] [_**b**_] [_**c**_] Gardiner, Karen (29 March 2023). [\"The real history of tartan, from the Scottish Highlands to the streets of Tokyo\"]. _[National Geographic] _. Archived from [the original] on 29 March 2023.\n0320. **[^] **[Scarlett (1990)], p. 15, footnote 9.\n0321. **[^] **[Hinderks (2014)], p. 10, citing: Cheape, Hugh (1991). _Tartan: The Highland Habit_. Edinburgh: National Museums Scotland. p. 49.\n0322. **[^] **[Hinderks (2014)], pp. 6–7, citing: Faiers, Jonathan (2008). _Tartan_. Oxford: Berg / Victoria & Albert Museum. pp. 107–108.\n0323. **[^] **[Armstrong (2017)], pp. 34, 36–37.\n0324. **[^] **[Telfer Dunbar (1979)], p. 9.\n0325. **[^] **[Trevor-Roper (1983)], p. 24.\n0326. **[^] **[Banks & de La Chapelle (2007)], p. 24.\n0327. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] Eslea MacDonald, Peter (November 2010). [\"The early use of the Murray of Tullibardine Tartan\"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.\n0328. **[^] **[Hinderks (2014)], p. 9, citing: Tuckett, Sally J. S. (2009). [\"National Dress, Gender and Scotland: 1745–1822\"]. _Textile History_. **40** (2: Researching the Garment Industry): 22. [doi]: [10.1179/004049609x12504376351308]. [S2CID] [161283151].\n0329. **[^] **[Hinderks (2014)], p. 9, citing: Coltman, Viccy (2010). [\"Party-coloured Plaid? Portraits of Eighteenth-century Scots in Tartan\"]. _Textile History_. **41** (2: Researching the Garment Industry): 189. [doi]: [10.1179/174329510X12798919710635]. [S2CID] [154382977].\n0330. **[^] **[Banks & de La Chapelle (2007)], p. 85; quoting: Cheape, Hugh (2005). _The Changing Image of the Highlands After 1745_. Benjamin West in Focus. National Gallieries of Scotland.\n0331. **[^] **[Brown (2012)], p. 3.\n0332. **[^] **Eslea MacDonald, Peter (May 2014). [\"Murray of Ochtertyre\"] (PDF). _ScottishTartans.co.uk_. Retrieved 21 June 2023.\n0333. **[^] **[Armstrong (2017)], p. 22, citing _Collins Encyclopaedia_.\n0334. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 87.\n0335. **[^] **[Stewart, D. W. (1893)], pp. 40, 42.\n0336. **[^] **[Trevor-Roper (1983)], pp. 24–25.\n0337. **[^] **[Hinderks (2014)], p. 9, quoting: [Nicholson (2005)], p. 158.\n0338. ^ [_**a**_] [_**b**_] [Martin, R. (1988)], p. 53.\n0339. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 14.\n0340. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 19.\n0341. **[^] **[Tuckett (2016)], pp. 4–5, 7.\n0342. ^ [_**a**_] [_**b**_] [_**c**_] [Hinderks (2014)], p. 10, citing: [Dziennik (2012)], pp. 127–129.\n0343. **[^] **[Campbell, J. F. (1893)], p. 172.\n0344. **[^] **[Telfer Dunbar (1979)], pp. 149–150.\n0345. **[^] **[Tuckett (2016)], pp. 6–7, 12–13.\n0346. **[^] **[Telfer Dunbar (1979)], pp. 145–146.\n0347. **[^] **[Armstrong (2017)], pp. 5, 19, 24.\n0348. ^ [_**a**_] [_**b**_] [Trevor-Roper (1983)], p. 25.\n0349. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Scarlett (1990)], p. 35.\n0350. **[^] **[Banks & de La Chapelle (2007)] p. 85.\n0351. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], pp. 10–11.\n0352. **[^] **[Telfer Dunbar (1979)], pp. 8–10.\n0353. **[^] **[Tuckett (2016)], pp. 18–19.\n0354. **[^] **[Armstrong (2017)], pp. 10, 14, 17, 55–56.\n0355. ^ [_**a**_] [_**b**_] [_**c**_] Eslea MacDonald, Peter (October 2020). [\"An 18th century Tartan Dress Coat of the Ancient Caledonian Society\"] (PDF). _ScottishTartans.co.uk_. Retrieved 30 June 2023.\n0356. **[^] **[Armstrong (2017)], p. 32; credits the term \"Tory Highlandism\" to: Cookson, J. E. (1999). \"The Napoleonic Wars, military Scotland and Tory Highlandism in the early nineteenth century\". _Scottish Historical Review_. **78** (1): 60–75\\. [doi]: [10.3366/shr.1999.78.1.60].\n0357. **[^] **[Armstrong (2017)], pp. 14, 18, 44, 55–56, 196–197.\n0358. **[^] **[Brown (2012)], p. 5; citing: [Pittock, Murray] (2009). \"To see ourselves as other see us\". _European Journal of English Studies_. **13** (3): 298..\n0359. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 102.\n0360. **[^] **[Telfer Dunbar (1979)], p. 10.\n0361. **[^] **Simpson, Peter (1996). _The Independent Highland Companies, 1603–1760_. Edinburgh: J. Donald. pp. 116–117\\. [ISBN] [9780859764322].\n0362. **[^] **[\"Empire: 1815–1915\"]. _TheBlackWatch.co.uk_. Archived from [the original] on 17 May 2016. Retrieved 8 May 2016.\n0363. **[^] **[Telfer Dunbar (1979)], pp. 180–181, citing: Sumner, Percy (1948). \"\\[title unspecified by source\\]\". _Journal of the Society for Army Historical Research_. **XXVI** (106). Citing in turn the regiment's own order books, originally reproduced in _The Red Hackle_ in October 1935.\n0364. **[^] **[Campbell, J. F. (1893)], p. 343.\n0365. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 178, relying on Mackay Scobie.\n0366. ^ [_**a**_] [_**b**_] [Groves (1893)]: p. 3.\n0367. **[^] **[Scarlett (1990)], pp. 29–30.\n0368. **[^] **[Scarlett (1990)], pp. 26–28.\n0369. **[^] **Eslea MacDonald, Peter (October 2015). [\"42nd Regiment Band or Musicians' Tartan\"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.\n0370. **[^] **[\"Tartan Details - 42nd Regiment (Musicians)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 June 2023.\n0371. **[^] **[\"Tartan Details - 42nd Regt - Drummers' Plaid\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 26 June 2023.\n0372. **[^] **[Armstrong (2017)], p. 112.\n0373. **[^] **[Armstrong (2017)], p. 20.\n0374. **[^] **[Tuckett (2016)], pp. 9–10.\n0375. ^ [_**a**_] [_**b**_] [Hinderks (2014)], p. 8, citing: Dziennik, Matthew P. (2012). [\"Whig Tartan: Material Culture and Its Use in the Scottish Highlands, 1746–1815\"]. _Past & Present_ (217): 125, 136. [doi]: [10.1093/pastj/gts025].\n0376. **[^] **[\"Tartan Details - Loudoun's Highlanders\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 June 2023.\n0377. **[^] **Described and illustrated in: [Scarlett (1990)], pp. 27, 29, plate 2(a).\n0378. **[^] **[Telfer Dunbar (1979)], pp. 159, 184. Telfer Dunbar refers to them by their amalgamated, post-Childers Reforms names, but they are the same original regiments, 71st MacLeod's and 72nd Seaforth's.\n0379. ^ [_**a**_] [_**b**_] [_**c**_] [Barnes & Allen (1956)]: pp. 84–86.\n0380. **[^] **[\"Tartan Details - 78th Highlanders Regiment\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 June 2023.\n0381. **[^] **[\"Tartan Details - MacLeod, Green\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 6 August 2023.\n0382. **[^] **[\"Tartan Details - 74th Regiment of Foot\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.\n0383. **[^] **[Eslea MacDonald (2012)], p. 20.\n0384. **[^] **[Barnes & Allen (1956)]: pp. 86–87.\n0385. **[^] **[\"Tartan Details - 92nd Regiment (Gordon)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.\n0386. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 159–160.\n0387. **[^] **[\"Tartan Details - Gordon Clan\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.\n0388. **[^] **[\"Tartans\"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 24 June 2023.\n0389. **[^] **[Telfer Dunbar (1979)], pp. 185–186.\n0390. **[^] **[\"Tartan Details - Cameron of Erracht\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 7 June 2023.\n0391. **[^] **[\"Tartan Details - 79th Regiment\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 21 June 2023. This version is a slightly different setting as to hues, going a bit darker, but is clearly the same tartan as Cameron of Earracht.\n0392. **[^] **[Barnes & Allen (1956)]: p. 86.\n0393. **[^] **Browne, James (1854). [_History of the Highlands and of the Highland Clans: With an Extensive Selection from the Hitherto Inedited Stuart Papers_]. Vol. 4. A. Fullarton & Co. p. 377 – via Google Books.\n0394. **[^] **[\"Tartan Details - Inverness Fencibles\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 1 July 2023. This source incorrectly lists this as the Inverness Fencibles tartan and conflates the two units; the [Inverness-shire Fencibles], as they were properly named, was a completely different unit, raised the same year, and their tartan is unknown.\n0395. **[^] **[\"Tartan Details - 42nd Regiment\"]. Scottish Register of Tartans. Retrieved 8 June 2023.\n0396. **[^] **[Scarlett (1990)], p. 31, says that Robert Bain's _The Clans and Tartans of Scotland_ (1953 ed.) confirms this lightened Black Watch for the 93rd.\n0397. **[^] **[\"Tartan Details - Sutherland #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 8 June 2023.\n0398. **[^] **[\"Tartan Details - Sutherland 42nd\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 22 June 2023.\n0399. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] [_**i**_] [_**j**_] [_**k**_] [_**l**_] [_**m**_] [_**n**_] [_**o**_] [_**p**_] [_**q**_] [_**r**_] [_**s**_] [_**t**_] [_**u**_] [_**v**_] [_**w**_] [_**x**_] [_**y**_] [_**z**_] [_**aa**_] Newsome, Matthew Allan C. (2005). [\"Sources of the Tartans\"]. _Albanach_. Retrieved 16 May 2023.\n0400. **[^] **Moncreiffe of That Ilk, Iain (1962) \\[1954\\]. _The Robertsons (Clan Donnachaidh of Atholl)_. Edinburgh: W. & A. K. Johnston & G. W. Bacon Ltd. p. 9 (fig. opposite).\n0401. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 87; citing: [Stewart, D. C. (1974)], p. 2.\n0402. **[^] **Carman, W. Y. (1957). _British Military Uniforms from Contemporary Pictures_. London: Leonard Hill. pp. 146, 152.\n0403. **[^] **[\"Militaria Dictionary and Beginner's Guide\"]. _KellyBadge.co.uk_. Ellesmere, Shropshire: Ian G. Kelly (Militaria). 2000. \"Tartan Numbering System\" section. Retrieved 8 June 2023.\n0404. **[^] **[\"Regimental Tartans\"]. _DCDalgliesh.co.uk_. D. C. Dalgliesh Ltd. 2023. Retrieved 19 May 2023.\n0405. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Innes of Learney (1971)], pp. 9–10.\n0406. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 155.\n0407. ^ [_**a**_] [_**b**_] [_**c**_] Ward, Philip; Edwards, Julia (2012) \\[1978\\]. [_The Book of Common Fallacies_]. Skyhorse Publishing. p. 422. [ISBN] [9781616083366]. Retrieved 29 May 2023.\n0408. **[^] **[Trevor-Roper (1983)], pp. 28–30.\n0409. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [Haswell Miller, A. E.] (1956). [Donaldson, Gordon] (ed.). _Common Errors in Scottish History_. London: Historical Association / George Philip & Son. Quoted at length in: [Telfer Dunbar (1979)], pp. 17–18; also quoted in: McGann, Kass (2003). [\"The Question of Clan Tartans\"]. _ReconstructingHistory.com_. \"The Evolution of the Kilt\" series. Archived from [the original] on 22 April 2008. Retrieved 10 June 2023.\n0410. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 2.\n0411. ^ [_**a**_] [_**b**_] Mackay Scobie, Iain Hamilton (June 1942). \"Tartan and Clan Tartan\". _Chambers Journal_. Quoted in: [Telfer Dunbar (1979)], pp. 14–15; and [McGann (2003)].\n0412. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 15–16, quoting: Haldane, M. M. (1931). \"The Great Clan Tartan Myth\". _[The Scots Magazine] _. **16** (1): 44–51. (Haldane is also cited, as a debate opponent, in Innes of Learney (1971).)\n0413. **[^] **[Martin, R. (1988)], p. 51; responding to claims in: Lurie, Alison (1981). _The Language of Clothes_. Random House. [ISBN] [9780394513027].\n0414. **[^] **[Telfer Dunbar (1979)], p. 57.\n0415. **[^] **[Trevor-Roper (1983)], pp. 28 _ff._\n0416. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 161, quoting: [Campbell of Airds, Alastair] (1998) \\[1994\\]. \"Tartan and the Highland dress\". In [Way of Plean, George]; Squire, Romily (eds.). [_Collins Scottish Clan & Family Encyclopedia_]. HarperCollins / Standing Council of Scottish Chiefs. pp. 37–38\\. [ISBN] [9780760711200] – via Internet Archive.\n0417. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [\"Official Position on Clan Campbell Tartans\"]. _CCSNA.org_. Clan Campbell Society (North America). 2018. Retrieved 13 May 2023. Quoting letter of Chief Ian Campbell in considerable detail.\n0418. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 36, quoting: [Lockhart, George] (1817). \"A Journal of the Expedition of Prince Charles Edward in 1745, by a Highland Officer\". In [Aufrère, Anthony] (ed.). _Lockhart Papers_. Vol. II. p. 505.\n0419. **[^] **[Armstrong (2017)], p. 23, citing the following, source of both the Campbell of Islay and Haldane statements: [Haldane (1931)].\n0420. **[^] **[Scarlett (1990)], pp. 27–28.\n0421. ^ [_**a**_] [_**b**_] [Thompson (1992)], p. iii.\n0422. **[^] **[Scarlett (1990), p. 9] \n0423. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 57, quoting: Stewart, Donald William (August 1892). \"Tartans in Family Portraits\". _The Scottish Antiquary, or, Northern Notes and Queries_. note 455.\n0424. **[^] **Wagner, Paul; Reynolds, Wayne (2002). _Pictish Warrior: AD 297–841_. \"Warrior\" series. Vol. 50. Osprey. p. 28.\n0425. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 23.\n0426. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], p. 162, quoting Hugh Cheape, from a 1999 interview.\n0427. **[^] **[Innes of Learney (1971)], pp. 8, 10.\n0428. **[^] **[Innes of Learney (1971)], p. 8, citing: Fraser of Reelig, Charles Ian (1930). _Some Notes on Highland Tartans_. Inverness: The Northern Chronicle Office.\n0429. **[^] **[Stewart, D. W. (1893)], at \"Brodie\" and \"Huntley\".\n0430. **[^] **[\"Search Results \\[Gordon\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 24 May 2023.\n0431. **[^] **[Innes of Learney (1971)], p. 8.\n0432. ^ [_**a**_] [_**b**_] [Innes of Learney (1971)], p. 9.\n0433. **[^] **[Stewart, D. W. (1893)], pp. 12–13.\n0434. **[^] **[Stewart, D. W. (1893)], p. 13.\n0435. **[^] **[\"Tartan Details - MacLean of Duart Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.\n0436. **[^] **[\"Tartan Details - Sutherland\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 May 2023.\n0437. **[^] **[\"Tartans\"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 20 May 2023.\n0438. **[^] **[\"Tartan Details - Gordon, Red (1819)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 20 May 2023.\n0439. **[^] **[Telfer Dunbar (1979)], plate 13.\n0440. **[^] **[Telfer Dunbar (1979)], pp. 58–60.\n0441. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 62.\n0442. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 146.\n0443. **[^] **[\"Tartan Details - Grant (1819 #1)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.\n0444. **[^] **[\"Tartan Details - Grant (1838)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.\n0445. **[^] **[Zaczek & Phillips (2013)], p. 153.\n0446. **[^] **[\"Tartan Details - Grant (Vestiarium Scoticum)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.\n0447. **[^] **[Telfer Dunbar (1979)], pp. 58–62.\n0448. **[^] **[Telfer Dunbar (1979)], pp. 191–192; quoting: Drummond-Norie, William (1898). _Loyal Lochaber and Its Associations Historical, Genealogical, and Traditionary_. Glasgow: Morison Bros.\n0449. **[^] **See, e.g.: [Telfer Dunbar (1979)], p. 173.\n0450. **[^] **Scobie, Willie (2012). [\"A Case for Clan Tartans\"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 23 June 2021. Retrieved 24 May 2023.\n0451. **[^] **[\"Tartan Details - Campbell of Armaddie\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0452. **[^] **[\"Tartan Details - Campbell of Lochlane\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0453. **[^] **[\"Tartan Details - Campbell, Red\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0454. **[^] **[\"Tartan Details - Stewart of Ardshiel\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0455. **[^] **[\"Tartan Details - Stewart of Atholl\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0456. **[^] **[\"Tartan Details - Stewart, Hunting #1\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0457. **[^] **[\"Tartan Details - Stewart, Old\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0458. **[^] **[\"Tartan Details -Stewart of Appin Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0459. **[^] **[\"Tartan Details - Stewart, Hunting #1\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0460. **[^] **[\"Tartan Details - Stewart, Hunting #3\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0461. **[^] **[\"Tartan Details - Stewart, Green\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0462. **[^] **[\"Tartan Details - Ramsay Blue Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 5 July 2023.\n0463. **[^] **[Grant, James] (1884). [_Cassell's Old and New Edinburgh_]. Vol. II. London: Cassell & Co. p. 235 – via Google Books.\n0464. **[^] **[Kay, John]; Paton, Hugh (1842). [\"No. LXIV: The Rev. Joseph Robertson MacGregor, First Minister of the Edinburgh Gaelic Chapel\"]. _A Series of Original Portraits and Caricature Etchings_. Vol. I, part I. Edinburgh: Hugh Paton. pp. 152–153. Retrieved 29 May 2023 – via Google Books.\n0465. **[^] **[Telfer Dunbar (1979)], p. 130.\n0466. **[^] **[Telfer Dunbar (1979)], p. 44.\n0467. **[^] **[Stewart, D. W. (1893)], pp. 28–29.\n0468. **[^] **[Telfer Dunbar (1979)], p. 17.\n0469. **[^] **[\"Tartan Details - MacDonald, Lord of The Isles\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 24 June 2023.\n0470. **[^] **[Telfer Dunbar (1979)], pp. 61, 64, 71, 75, 88 plates 11–12, 35. \"The tartan is of a typical pre-clan tartan style.\"\n0471. **[^] **[Telfer Dunbar (1979)], pp. 57–60, 69, 71.\n0472. **[^] **[Scarlett (1990)], pp. 13, 22.\n0473. **[^] **[Trevor-Roper (1983)], p. 23, citing also: McClintock, H. F. (1943). _Old Highland Dress and Tartans_ (2nd ed.). Dundalk: Dundalgan Press. And: [Telfer Dunbar (1979)].\n0474. **[^] **[Telfer Dunbar (1979)], pp. 73–75.\n0475. **[^] **[Telfer Dunbar (1979)], p. 75.\n0476. **[^] **[Campbell, A. (1890)], p. vi.\n0477. **[^] **[Scarlett (1990)], p. 23, quoting: Campbell, Archibald (1899). [_Highland Dress, Arms and Ornament_]. Westminster: Constable & Co. – via Internet Archive.\n0478. **[^] **[Telfer Dunbar (1979)], p. 73.\n0479. **[^] **Ray, James (1752). [_Compleat History of the Rebellion, From Its First Rise, in 1745, to Its Total Suppression at the Glorious Battle of Culloden, in April 1746_]. p. 344 – via Google Books.\n0480. **[^] **[Telfer Dunbar (1979)], p. 19.\n0481. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 20.\n0482. **[^] **[Telfer Dunbar (1979)], pp. 19, 163–164, quoting two articles (untitled by Dunbar) by I. H. Mackay Scobie in the _Journal of the Society for Army Historical Research_, 1941 and 1946.\n0483. **[^] **[Telfer Dunbar (1979)], p. 90.\n0484. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 4; Quotes the entire passage from Burt (1727–37, published 1754).\n0485. **[^] **[Telfer Dunbar (1979)], pp. 19–20.\n0486. **[^] **[Telfer Dunbar (1979)], pp. 4–6; quotes the entire letter from Forbes of Culloden to the Lord Lyon (1746).\n0487. ^ [_**a**_] [_**b**_] Lawson, Cecil C. P. (1967) \\[1941\\]. _A History of the Uniforms of the British Army_. Vol. II. London: Norman Military Publications. p. 61. Quoted in: [Telfer Dunbar (1979)], p. 15.\n0488. **[^] **[Telfer Dunbar (1979)], p. 15.\n0489. **[^] **[Telfer Dunbar (1979)], pp. 47, 50: \"fine specimens of pre-nineteenth-century tartans with their lovely colour combinations and interesting weaves are far removed from the 'clan' tartans of later times.  ... \\[A\\] number of old hard-spun splaid genuinely pre-1745 \\[are\\] unlike any modern 'clan' tartan patterns.\"\n0490. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], pp. 16–17.\n0491. **[^] **[Trevor-Roper (1983)], pp. 25–26.\n0492. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 159.\n0493. **[^] **[Hinderks (2014)], p. 8, citing: Bolton, Andrew (2003). _Bravehearts: Men in Skirts_. Victoria & Albert Museum. p. 99. [ISBN] [9780810965584].\n0494. **[^] **Eslea MacDonald, Peter (November 2010). [\"A Jacobite Lady Reveals Her True Colours\"] (PDF). _ScottishTartans.co.uk_. Retrieved 25 June 2023.\n0495. ^ [_**a**_] [_**b**_] [\"Tartan Details - Huntly\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023. Citing previously unpublished research of James D. Scarlett (2005). The SRT record is confusingly named \"Huntly\" (which can refer to multiple tartans) rather than the more precise name \"Marchioness of Huntly's\", which SRT [misapplied to a different tartan].\n0496. ^ [_**a**_] [_**b**_] [Stewart, D. W. (1893)], p. 36. Also \\[mis-\\]quoted in: [Trevor-Roper (1983)], p. 23.\n0497. **[^] **[Tuckett (2016)], p. 16.\n0498. **[^] **[Telfer Dunbar (1979)], p. 14.\n0499. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], pp. 3–4.\n0500. **[^] **[Tuckett (2016)], p. 9.\n0501. **[^] **Mills, N. J.; Carswell, A. L. (1998). \"Wilson of Bannockburn and the Clothing of the Highland Regiments\". _Journal of the Society for Army Historical Research_. **76** (307): 177. [JSTOR] [44230132].\n0502. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)] p. 104.\n0503. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 17.\n0504. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 30.\n0505. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Eslea MacDonald (2012)], p. 5.\n0506. **[^] **[Tuckett (2016)], p. 29, footnote 92.\n0507. **[^] **[\"Tartan Details - Campbell of Cawdor\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 6 June 2023.\n0508. **[^] **[\"Tartan Details - Abercrombie (Wilsons' No.2/64)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0509. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Scarlett (1990)], p. 64.\n0510. **[^] **[\"Tartan Details - Abercrombie\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0511. **[^] **[\"Tartan Details - Graham of Montrose #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0512. **[^] **[\"Tartan Details - Campbell of Breadalbane #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0513. **[^] **[\"Tartan Details - Campbell of Breadalbane\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 9 June 2023.\n0514. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Telfer Dunbar (1979)], p. 149.\n0515. **[^] **[\"Tartan\"]. _Scottish National Dictionary (1700–)_. Dictionaries of the Scots Language SCIO / University of Glasgow. 2005 \\[1974\\]. Retrieved 10 July 2023.\n0516. **[^] **[Eslea MacDonald (2012)], pp. 5–6.\n0517. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] Eslea MacDonald, Peter (February 2023). [\"The Cockburn Collection\"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 June 2023.\n0518. **[^] **Cockburn, William (c. 1820). _A collection of old hard tartans made by William Cockburn of Cockburn, Bart. between the years 1810–1820_.\n0519. **[^] **[\"Tartan Details - Black Watch (Government)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.\n0520. **[^] **[Paterson, M. B. (2001)], p. 157, quoting Stewart of Garth letter to [Andrew Robertson], from biography: Robert, James Ervine (1998). _The First Highlander: Major-General David Stewart of Garth CB, 1768–1829_. East Linton: Tuckwell. [ISBN] [1862320500].\n0521. **[^] **[Paterson, M. B. (2001)], p. 158, quoting from letters in J. I. Robertson's _The First Highlander_ (1998) again.\n0522. **[^] **[Telfer Dunbar (1979)], p. 119.\n0523. **[^] **[Telfer Dunbar (1979)], p. 124.\n0524. **[^] **This is reported as fact by the US-based House of Gordon society, which also makes other unsupportable assertions: [\"Tartans\"]. _HouseOfGordonUSA.org_. House of Gordon USA. 2020. Retrieved 11 July 2023.\n0525. **[^] **[Scarlett (1990)], p. 35. For \"confluence of district ... and ... regimental\", see entire chapters running pp. 9–36; wherein the arguments are made in stages.\n0526. **[^] **[Haswell Miller, A. E.] (November 1947). \"\\[title not given in source\\]\". _[Scotland's Magazine] _. Cited in: [Telfer Dunbar (1979)], p. 17.\n0527. **[^] **[\"Tartan Details - Campbell of Argyll\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.\n0528. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 15.\n0529. **[^] **[Armstrong (2017)], pp. 36, 283–284.\n0530. ^ [_**a**_] [_**b**_] [_**c**_] [\"Major-General David Stewart of Garth\"]. _TheBlackWatch.org_. Perth: Black Watch Museum / Scottish Tourist Board. 27 November 2019. Retrieved 21 May 2023.\n0531. **[^] **[Tuckett (2016)], p. 28, footnote 88.\n0532. ^ [_**a**_] [_**b**_] [Trevor-Roper (1983)], pp. 28–29.\n0533. **[^] **[Armstrong (2017)], pp. 107, 172.\n0534. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [Paterson, M. B. (2001)], pp. 158–159.\n0535. **[^] **[Armstrong (2017)], p. 21.\n0536. **[^] **[Trevor-Roper (1983)], p. 26.\n0537. ^ [_**a**_] [_**b**_] [_**c**_] [Urquhart (1994)], p. 18.\n0538. **[^] **[Scarlett (1990)], p. 19: \"the Clan Chiefs of the time appear to have been singularly lacking in knowledge of the tartans that they and their forbears should have been wearing since the Celtic mists parted – at least in what was fast becoming the popular imagination\", as late as the 1850s.\n0539. ^ [_**a**_] [_**b**_] [_**c**_] [Armstrong (2017)], p. 37.\n0540. **[^] **[Telfer Dunbar (1979)], p. 160.\n0541. **[^] **Eslea MacDonald, Peter (March 2020). [\"The Lord of the Isles Tartans\"] (PDF). _ScottishTartans.co.uk_. Retrieved 24 May 2023.\n0542. ^ [_**a**_] [_**b**_] [Eslea MacDonald (2012)], p. 4.\n0543. **[^] **[Armstrong (2017)], p. 40.\n0544. **[^] **[Telfer Dunbar (1979)], p. 139.\n0545. **[^] **[Eslea MacDonald (2012)], p. 7.\n0546. **[^] **[Trevor-Roper (1983)], pp. 30, 32.\n0547. **[^] **[Banks & de La Chapelle (2007)] p. 104; citing: [Prebble (2000)], p. 105.\n0548. **[^] **[Scarlett (1990)], pp. 19–20, citing archived correspondence between Stewart of Garth and Robertson of Struan.\n0549. **[^] **[Scarlett (1990)], p. 19, quoting: Smibert, Thomas (1850). _The Clans of the Highlands of Scotland_. J. Hogg.\n0550. **[^] **[\"Tartan Details - Murray of Atholl\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 May 2023.\n0551. **[^] **[Tuckett (2016)], pp. 15–17.\n0552. ^ [_**a**_] [_**b**_] [Thompson (1992)], p. v.\n0553. **[^] **[Tuckett (2016)], pp. 16–17.\n0554. ^ [_**a**_] [_**b**_] [_**c**_] [Moncreiffe of that Ilk 1967]: p. 24.\n0555. **[^] **[Armstrong (2017)], p. 57.\n0556. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 36.\n0557. ^ [_**a**_] [_**b**_] [_**c**_] [Martin, R. (1988)], p. 54.\n0558. **[^] **[Martin, R. (1988)], p. 55.\n0559. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 148, citing quotation from: Webster, David (2011). _The World History of Highland Games_. Edinburgh: Luath. p. 65.\n0560. **[^] **[Trevor-Roper (1983)], p. 33. Also quoted with different punctuation in: [Telfer Dunbar (1979)], p. 116.\n0561. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 116.\n0562. **[^] **[Telfer Dunbar (1979)], p. 118.\n0563. **[^] **[\"Tartan Details - Scott\"]. _TartanRegister.gov.uk_. The Scottish Register of Tartans. 16 April 2010. Retrieved 7 June 2023.\n0564. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 153.\n0565. **[^] **[Scarlett (1990)], pp. 64, 188–193.\n0566. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Banks & de La Chapelle (2007)] pp. 106–108. They actually get publication date of _The Costume of the Clans_ off by one year; it was 1845 as confirmed in all other sources, like Telfer Dunbar (1979).\n0567. ^ [_**a**_] [_**b**_] [Armstrong (2017)], pp. 60–61.\n0568. **[^] **[Banks & de La Chapelle (2007)] p. 107.\n0569. ^ [_**a**_] [_**b**_] [Telfer Dunbar (1979)], p. 151.\n0570. **[^] **[Telfer Dunbar (1979)], p. 142.\n0571. **[^] **Stewart, Donald Calder; Thompson, J. Charles (1980). Scarlett, James (ed.). _Scotland's Forged Tartans: An Analytical Study of the Vestiarium Scoticum_. Edinburgh: Paul Harris Publishing. [ISBN] [0904505677].\\[ _[page needed] _\\]\n0572. ^ [_**a**_] [_**b**_] [_**c**_] [Armstrong (2017)], p. 61.\n0573. **[^] **[Scarlett (1990)], p. 195, quoting: [Stewart, D. C. (1974)].\n0574. **[^] **[Telfer Dunbar (1979)], pp. 103, 107, 111.\n0575. **[^] **[Telfer Dunbar (1979)], p. 131.\n0576. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter. [\"A Short History of Tartan\"]. _ScottishTartans.co.uk_. Retrieved 7 October 2008.\n0577. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)] p. 108.\n0578. **[^] **[Armstrong (2017)], pp. 11, 61–62.\n0579. **[^] **[Scarlett (1990)], p. 18.\n0580. **[^] **[Armstrong (2017)], pp. 4, 11, 49, 57, 157, 236.\n0581. **[^] **[Martin, R. (1988)], p. 56.\n0582. ^ [_**a**_] [_**b**_] [_**c**_] [Trevor-Roper (1983)], p. 39.\n0583. **[^] **[Telfer Dunbar (1979)], pp. 116–117.\n0584. **[^] **[Telfer Dunbar (1979)], pp. 138–139.\n0585. **[^] **[Scarlett (1990)], pp. 195–196, quoting: [Stewart, D. C. (1974)].\n0586. **[^] **[Telfer Dunbar (1979)], pp. 140–141.\n0587. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [Banks & de La Chapelle (2007)] p. 26.\n0588. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 196, quoting: [Stewart, D. C. (1974)].\n0589. **[^] **[Telfer Dunbar (1979)], p. 129.\n0590. **[^] **[\"Tartan Details - Home (Clans Originaux)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.\n0591. **[^] **[\"Tartan Details - Brodie Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0592. **[^] **[Telfer Dunbar (1979)], p. 103.\n0593. **[^] **[Scarlett (1990)], p. 197, quoting: [Stewart, D. C. (1974)].\n0594. ^ [_**a**_] [_**b**_] Duncan of Sketraw, John A. (4 April 2009). [\"The Romantic Myth of Scottish Clan Septs\"]. _ScotsHistoryOnline.co.uk_. Archived from [the original] on 12 March 2022. Retrieved 13 May 2023.\n0595. **[^] **[Scarlett (1990)], pp. 37–38.\n0596. **[^] **[Scarlett (1990)], pp. 196–197, quoting: [Stewart, D. C. (1974)].\n0597. ^ [_**a**_] [_**b**_] [Scarlett (1990)], pp. 20, 197–198, quoting: [Stewart, D. C. (1974)].\n0598. **[^] **[Scarlett (1990)], p. 19: \"what was fast becoming the popular imagination\" by about 1850.\n0599. ^ [_**a**_] [_**b**_] [_**c**_] [Scarlett (1990)], p. 37.\n0600. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. 198, quoting: [Stewart, D. C. (1974)].\n0601. **[^] **[Scarlett (1990)], pp. 198–199, quoting: [Stewart, D. C. (1974)].\n0602. ^ [_**a**_] [_**b**_] [Scarlett (1990)], p. ix.\n0603. **[^] **[Scarlett (1990)], pp. 64–180.\n0604. **[^] **[\"Tartan Details - MacFarlane Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0605. **[^] **[\"Tartan Details - MacFarlane Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0606. **[^] **[Stewart, D. C. (1974)].\n0607. **[^] **[Scarlett (1990)].\n0608. **[^] **[Scarlett (1990)], p. 21.\n0609. **[^] **[\"Tartan Details - MacGregor Dress Red (Dance)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2005. Retrieved 13 May 2023.\n0610. **[^] **[Scarlett (1990)], p. 24.\n0611. ^ [_**a**_] [_**b**_] McIntyre, Alastair, ed. (2023) \\[2002\\]. [\"Official Scottish Clans and Families\"]. _ElectricScotland.com_. Retrieved 15 May 2023. This list appears to be regularly maintained, at least as of 2023.\n0612. **[^] **[\"Tartan Details - Watson\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.\n0613. **[^] **[Campbell, A. (1890)], p. 55.\n0614. ^ [_**a**_] [_**b**_] MacGregor of MacGregor, Malcolm (28 April 2012). [\"Our Tartan\"]. _ACGSUS.org_. American Clan Gregor Society. Retrieved 13 May 2023.\n0615. **[^] **[Way of Plean; Squire (2000)], p. 214.\n0616. **[^] **[\"Tartan Details - Davidson\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 18 June 2023.\n0617. ^ [_**a**_] [_**b**_] [\"Tartan Details - Shaw\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.\n0618. ^ [_**a**_] [_**b**_] [\"Tartan Details - Shaw of Tordarroch Red (Dress)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.\n0619. ^ [_**a**_] [_**b**_] [\"Tartan Details - Shaw of Tordarroch Green (Hunting)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 11 June 2023.\n0620. **[^] **[Scarlett (1990)], p. 19.\n0621. **[^] **[\"Tartan Details - Mar Tribe\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0622. **[^] **[\"Tartan Details - MacLeod Red\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.\n0623. **[^] **[\"Tartan Details: Lumsden Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 October 2023.\n0624. ^ [_**a**_] [_**b**_] [\"Tartan Details - MacTavish Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0625. **[^] **[\"Tartan Details - Lumsden\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2013. Retrieved 19 June 2023.\n0626. **[^] **[\"Tartan Details - Lumsden (Waistcoat)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 19 June 2023.\n0627. **[^] **[\"Tartan Details: Duncan of Sketraw\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 2 October 2023.\n0628. **[^] **[\"Tartan Details - MacDowall\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2008. Retrieved 13 June 2023.\n0629. **[^] **[\"Clan MacDowall Tartans\"]. _MacDowall.wixsite.com_. Clan MacDowall Society. 2013. Retrieved 13 June 2023.\n0630. **[^] **[\"Tartan Details - Cochrane Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2015. Retrieved 10 June 2023.\n0631. **[^] **[\"Tartan Details: Carruthers\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. Retrieved 2 October 2023.\n0632. ^ [_**a**_] [_**b**_] [_**c**_] Milne, N. C. (2010). [_Scottish Culture and Traditions_]. Paragon Publishing. p. 138. [ISBN] [9781899820795]. Retrieved 28 May 2023 – via Google Books.\n0633. **[^] **[Armstrong (2017)], pp. 15–16.\n0634. **[^] **[Tuckett (2016)], p. 8.\n0635. **[^] **[Armstrong (2017)], p. 32.\n0636. **[^] **[Banks & de La Chapelle (2007)], p. 92.\n0637. **[^] **[Armstrong (2017)], p. 147.\n0638. **[^] **[Armstrong (2017)], p. 31, quoting: [Burke, John Bernard] (1914). _A Genealogical and Heraldic Dictionary of the Peerage and Baronetage of the British Empire_ (106th ed.). London: Harrison. p. 1803.\n0639. **[^] **[Telfer Dunbar (1979)], p. 80.\n0640. **[^] **[Armstrong (2017)], pp. 14, 20.\n0641. ^ [_**a**_] [_**b**_] [_**c**_] McNeil, Kenneth (2007). \"Britain's 'imperial man': Walter Scott, David Stewart, and Highland Masculinity\". [_Scotland, Britain, Empire_]. Ohio State University Press. pp. 83–84. Retrieved 29 May 2023.\n0642. **[^] **[Armstrong (2017)], p. 117, 155.\n0643. **[^] **[Armstrong (2017)], pp. 1, 3–5, _ff._\n0644. **[^] **[Armstrong (2017)], p. 56.\n0645. **[^] **[Armstrong (2017)], pp. 5, 14–16, 18–19, 24–26, 56.\n0646. **[^] **[Armstrong (2017)], pp. 110–111, 150, 197.\n0647. **[^] **[Scarlett (1990)], pp. 36–37.\n0648. **[^] **[Telfer Dunbar (1979)], p. 147.\n0649. **[^] **[Armstrong (2017)], pp. 36–37.\n0650. **[^] **[Paterson, M. B. (2001)], p. 160.\n0651. **[^] **[Hinderks (2014)], pp. 8–9, citing: [Dziennik (2012)], p. 136. And: [Nicholson (2005)], p. 160. And: [Harvie, Christopher] (1977). _Scotland and Nationalism: Scottish Society and Politics 1707 to the Present_. London: Routledge. pp. 13–14.\n0652. ^ [_**a**_] [_**b**_] [_**c**_] [Banks & de La Chapelle (2007)], pp. 99–100; quoting: Clyde, Robert (1995). _From Rebel to Hero: The Changing Image of the Highlander, 1745–1830_. Tuckwell Press. p. 129. [ISBN] [9781862320277].\n0653. **[^] **[\"An incident during the visit of George IV to Edinburgh, 1822\"]. [National Galleries Scotland]. Retrieved 9 January 2017.\n0654. **[^] **[Telfer Dunbar (1979)], pp. 12, 18.\n0655. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Tuckett (2016)], p. 18.\n0656. **[^] **[Telfer Dunbar (1979)], pp. 3, 9.\n0657. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Magnusson, Magnus] (2003). _Scotland: The Story of a Nation_. Grove Press. pp. 653–654\\. [ISBN] [0802139329].\n0658. **[^] **Duncan, Ian (2007). [_Scott's Shadow: The Novel in Romantic Edinburgh_]. [Princeton University Press]. pp. 7–8\\. [ISBN] [9780691043838].\n0659. ^ [_**a**_] [_**b**_] [_**c**_] Brown, Angie (13 August 2022). [\"How the king's visit saw kilts become Scotland's national dress\"]. _[BBC News] _. Retrieved 7 July 2023.\n0660. **[^] **[Armstrong (2017)], pp. 48–52.\n0661. **[^] **[Banks & de La Chapelle (2007)] p. 105.\n0662. **[^] **[Armstrong (2017)], p. 54, quoting: Prebble, John (1988). _The King's Jaunt: George IV in Scotland, August 1822, \"One and Twenty Daft Days\"_. London: Collins. p. 364.\n0663. **[^] **[Armstrong (2017)], pp. 52–53, 57, 283.\n0664. **[^] **Calder, Angus (1994). _Revolving Culture: Notes from the Scottish Republic_. London & New York: I.B. Tauris. p. 103. Quoted in: [Porter (1998)], p. 2.\n0665. **[^] **[Trevor-Roper (1983)], p. 31.\n0666. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 54.\n0667. **[^] **[Scarlett (1990)], p. 15.\n0668. **[^] **[Paterson, M. B. (2001)], p. 159, footnote 13.\n0669. **[^] **[Armstrong (2017)], pp. 43–44. The term \"synthetic Gaelicism\" is attributed to: Morton, H. V. (1937) \\[1929\\]. _In Search of Scotland_. New York: Dodd, Mead and Company. p. 132.\n0670. **[^] **[Armstrong (2017)], p. 42.\n0671. **[^] **[Hinderks (2014)], p. 9, quoting: [Cheape (1991)], p. 49.\n0672. **[^] **[Hinderks (2014)], p. 9, quoting: [Bolton (2003)], p. 100.\n0673. **[^] **[Hinderks (2014)], p. 10. Citing: Hobsbawm & Ranger (1983), p. 24. And: Thorburn, W. A. (1976). [\"Military Origins of Scottish National Dress\"]. _Costume_. **10** (1): 29, 33. [doi]: [10.1179/cos.1976.10.1.29].\n0674. **[^] **[Paterson, M. B. (2001)], pp. 78, 160–161\n0675. **[^] **[Trevor-Roper (1983)], p. 28.\n0676. **[^] **[Tuckett (2016)], p. 11.\n0677. **[^] **[Armstrong (2017)], p. 126.\n0678. **[^] **[Telfer Dunbar (1979)], p. 11.\n0679. **[^] **[Telfer Dunbar (1979)], p. 175.\n0680. **[^] **[Tuckett (2016)], pp. 17–18.\n0681. ^ [_**a**_] [_**b**_] [_**c**_] [Telfer Dunbar (1979)], p. 150.\n0682. **[^] **[Telfer Dunbar (1979)], p. 117.\n0683. **[^] **[Telfer Dunbar (1979)], pp. 147–149.\n0684. ^ [_**a**_] [_**b**_] [Tuckett (2016)], pp. 7–9.\n0685. ^ [_**a**_] [_**b**_] [_**c**_] [Tuckett (2016)], p. 20.\n0686. **[^] **[Banks & de La Chapelle (2007)] p. 109; citing: [Zacek & Phillips (2013)], p. 74.\n0687. **[^] **See extensive treatment in: [von Fürstenberg, Princess Ira]; Nicolls, Andrew (1996). _Tartanware: Souvenirs from Scotland_. Trafalgar Square Press. [ISBN] [9781857935141].\n0688. **[^] **[Banks & de La Chapelle (2007)] pp. 21–22.\n0689. **[^] **[Armstrong (2017)], p. 84.\n0690. **[^] **[\"19th-century Scottish kitch is today's collectible\"]. _CoastalAntiques.com_. Collecting tartanware. Archived from [the original] on 16 September 2004. Retrieved 25 October 2008.\n0691. **[^] **[Paterson, M. B. (2001)], p. 168, footnote 30.\n0692. ^ [_**a**_] [_**b**_] [Martin, R. (1988)], p. 57.\n0693. **[^] **[Armstrong (2017)], pp. 83–84.\n0694. **[^] **[Armstrong (2017)], pp. 112–113.\n0695. **[^] **Wilton, Brian. [\"History of Tartan\"]. _TartansAuthority.com_. Crieff, Scotland: [Scottish Tartans Authority]. Archived from [the original] on 22 March 2004. Retrieved 6 October 2008.\n0696. **[^] **[Telfer Dunbar (1979)], p. 127, quoting an 1847 review.\n0697. **[^] **[Armstrong (2017)], pp. 67, 198.\n0698. **[^] **[Armstrong (2017)], pp. 68–70, 88–89.\n0699. **[^] **[Armstrong (2017)], pp. 74, 98, 102.\n0700. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] Eslea MacDonald, Peter (June 2019). [\"The Balmoral Tartan\"] (PDF). _ScottishTartans.co.uk_. Retrieved 30 June 2023.\n0701. **[^] **[Armstrong (2017)], pp. 74–76, 85, 95.\n0702. **[^] **[\"Tartan in Royal Dress\"]. Royal Collection Trust. Retrieved 3 February 2020.\n0703. **[^] **[Banks & de La Chapelle (2007)], pp. 34, 108.\n0704. **[^] **[Armstrong (2017)], pp. 83–84, 99.\n0705. **[^] **[Armstrong (2017)], p. 62.\n0706. **[^] **[Queen Victoria] (1885). [_More leaves from the journal of a life in the Highlands, from 1862 to 1882_] (New ed.). London: Smith, Elder & Co. p. 173.\n0707. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 207.\n0708. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 101.\n0709. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 108–109.\n0710. **[^] **[Armstrong (2017)], pp. 93, 205.\n0711. **[^] **[Armstrong (2017)], pp. 93–94, 104, 107, 148–150, 236.\n0712. **[^] **[Armstrong (2017)], pp. 19, 59, 65, 87–88, 192.\n0713. **[^] **[Paterson, M. B. (2001)], p. 161.\n0714. **[^] **[Armstrong (2017)], pp. 6, 57, 87.\n0715. **[^] **[Armstrong (2017)], pp. 12, 272.\n0716. **[^] **[Armstrong (2017)], p. 93.\n0717. **[^] **[Armstrong (2017)], p. 87, quoting: [Brown, Ivor J. C.] (1955). _Balmoral: The History of a Home_. London: Collins. p. 15.\n0718. **[^] **[Armstrong (2017)], p. 84, quoting: [Monod, Paul]; [Pittock, Murray G. H.]; [Szechi, Daniel], eds. (2010). _Loyalty and Identity: Jacobites at Home and Abroad_. Basingstoke: Palgrave Macmillan. p. 43.\n0719. **[^] **[Armstrong (2017)], p. 84, quoting: [Thompson, Dorothy] (1990). _Queen Victoria: The Woman, the Monarch and the People_. London: Virago. p. 54.\n0720. **[^] **[Armstrong (2017)], p. 88, citing: [Devine, T. M.] (2000). _The Scottish Nation, 1700–2000_. London: Allen Lane. p. 231.\n0721. **[^] **[Armstrong (2017)], pp. 82, 87, 91.\n0722. ^ [_**a**_] [_**b**_] [Armstrong (2017)], pp. 6, 16, 58, 72, 77–79, 81, 92, 150, 167, 204–206, 271–272. The term \"Balmorality\" is attributed to: [Scott-Moncrieff, George] (1932). \"Balmorality\". In [Thomson, David Cleghorn] (ed.). _Scotland in Quest of Her Youth: A Scrutiny_. London: Oliver & Boyd. pp. 69–86.\n0723. **[^] **[Armstrong (2017)], pp. 58, 65, 85, 95, 99–100, 103, 109, 237, 262.\n0724. **[^] **[Armstrong (2017)], pp. 6, 11, 59, 78, 204, 241.\n0725. **[^] **[Armstrong (2017)], pp. 99–100, 119–121, 237, 262.\n0726. **[^] **[Armstrong (2017)], pp. 95–96, 103, 167, 176, 212–213, 245.\n0727. **[^] **[Armstrong (2017)], pp. 6, 59, 78, 103, 285.\n0728. **[^] **[Armstrong (2017)], pp. 167, 254.\n0729. **[^] **[Armstrong (2017)], p. 259, quoting: Withers, C. W. J. (1992). \"The historical creation of the Scottish Highlands\". In Donnachie, Ian; [Whatley, Christopher] (eds.). _The Manufacture of Scottish History_. Edinburgh: Polygon. p. 155.\n0730. **[^] **[Armstrong (2017)], pp. 254, 259.\n0731. **[^] **[Scarlett (1990)], p. 16.\n0732. **[^] **Macaulay, Thomas Babington (1848). \"Chapter XIII\". [_The History of England from the Accession of James II:_]. § 284–285.\n0733. **[^] **[Armstrong (2017)], p. 272.\n0734. **[^] **[Armstrong (2017)], pp. 251–252, quoting: [Scott-Moncrieff (1932)], p. 75.\n0735. **[^] **[Armstrong (2017)], p. 81, quoting: [Devine (2000)], p. 231.\n0736. **[^] **[Armstrong (2017)], pp. 108–109, 125, 275.\n0737. **[^] **[Telfer Dunbar (1979)], p. 141.\n0738. **[^] **[Armstrong (2017)], p. 243.\n0739. **[^] **[Armstrong (2017)], pp. 124–125, 237, 254, 262.\n0740. **[^] **[Armstrong (2017)], pp. 178–184.\n0741. **[^] **[Martin, R. (1988)], pp. 57–58.\n0742. **[^] **[Armstrong (2017)], p. 84, citing: [Faiers (2008)], p. 193.\n0743. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Martin, R. (1988)], p. 58.\n0744. **[^] **Jacobson, Ralph E; Ray, Sidney F.; Attridge, Geoffrey G.; Axford, Norman R. (2000). [_The Manual of Photography: Photographic and Digital Imaging_]. Focal Press. p. 228. [ISBN] [0240515749].\n0745. **[^] **[Martin, R. (1988)], pp. 56–57.\n0746. **[^] **[Brown (2012)], pp. 6–7.\n0747. **[^] **[Armstrong (2017)], _passim_; much of this work is an anaysis of the \"symbiotic\" Highlandism relationship between Victoria and the Atholls.\n0748. **[^] **Dorrian, Mark (2006). [\"The King and the City: On the Iconology of George IV in Edinburgh\"] (PDF). _Edinburgh Architecture Research_. **30**: 32\\. [ISSN] [0140-5039]. Retrieved 10 July 2023.\n0749. **[^] **[\"King George IV\"]. _Undiscovered Scotland_. 2007. Retrieved 10 July 2023.\n0750. **[^] **Campbell, Jeannie (17 June 2022). [\"Royal visit to Edinburgh 1822 – The Background, Part One\"]. _Bagpipe News_. Retrieved 10 July 2023.\n0751. **[^] **[Armstrong (2017)], p. 63; see especially footnote 33.\n0752. **[^] **[Armstrong (2017)], p. 185.\n0753. **[^] **[Armstrong (2017)], pp. 209–221. The \"as they _now_ are\" quote is on p. 219.\n0754. **[^] **[\"Wearing of Sashes by Ladies in Evening Dress\"] (PDF). [Court of the Lord Lyon]. 2009 – via Society of Scottish Armigers. SSA indicates this was originally published by the Lord Lyon, and the text seems to indicate this, but the LL website no longer provides such a document.\n0755. **[^] **[Armstrong (2017)], p. 198.\n0756. **[^] **Falke, Jacob (1872). [\"National Domestic Industry\"] (PDF). _The Workshop_. **5** (3): 33–36\\. [doi]: [10.2307/25586655]. [JSTOR] [25586655]. Retrieved 13 July 2023.\n0757. **[^] **[Armstrong (2017)], pp. 239, 242, 279.\n0758. **[^] **[Armstrong (2017)], pp. 190–191.\n0759. **[^] **[Armstrong (2017)], p. 125.\n0760. **[^] **[Martin, R. (1988)], pp. 58, 60.\n0761. **[^] **See detailed treatment in: Dwyer-McNulty, Sally (2014). _Common Threads: A Cultural History of Clothing in American Catholicism_. University of North Carolina Press. [ISBN] [9781469614106].\n0762. **[^] **[Paterson, M. B. (2001)], pp. 174–175.\n0763. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] Kirsanova, Raisa (September 2016). [\"Scottish tartans and Russian Romanticism\"]. _Clothing Cultures_. **3** (3): 237–245\\. [doi]: [10.1386/cc.3.3.237\\_1]. Retrieved 26 May 2023.\n0764. **[^] **[\"All you need to know about Walkers Shortbread\"]. _The Scotsman_. Retrieved 18 October 2021.\n0765. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], pp. 26–27.\n0766. **[^] **[Mackay (1924)], p. 21.\n0767. **[^] **[Paterson, M. B. (2001)], pp. 81–82, 130.\n0768. **[^] **[Armstrong (2017)], pp. 236, 256.\n0769. **[^] **[Paterson, M. B. (2001)], p. 27, footnote 24.\n0770. **[^] **[Armstrong (2017)], pp. 3, 277.\n0771. **[^] **[Armstrong (2017)], p. 256.\n0772. **[^] **[Armstrong (2017)], p. 283.\n0773. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 264.\n0774. **[^] **[Paterson, M. B. (2001)], p. 190.\n0775. **[^] **[Armstrong (2017)], pp. 260, 273.\n0776. **[^] **[Paterson, M. B. (2001)], p. 218.\n0777. **[^] **[Paterson, M. B. (2001)], pp. 195, 218.\n0778. **[^] **[Armstrong (2017)], pp. 253–254, 280.\n0779. **[^] **[\"National Tartan Day\"]. _Channel 39 Southern Television_. 1 July 2008. Retrieved 9 April 2023.\n0780. **[^] **Gilchrist, Jim (15 December 2008). \"Stories of Homecoming: We're on the march with Argentina's Scots\". _[The Scotsman] _. p. 18.\n0781. ^ [_**a**_] [_**b**_] [\"Tartan Details - Brittany National\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0782. **[^] **[\"About Us\"]. _NYCTartanWeek.org_. National Tartan Day New York Committee. 2023. Retrieved 3 June 2023.\n0783. **[^] **[Brown (2012)], p. 7.\n0784. **[^] **[Armstrong (2017)], p. 256, quoting: Maitland Hume, Ian (2001). _The contemporary role of the kilt and tartan in the construction and expression of Scottish American identity_ (PhD). University of Edinburgh.\n0785. **[^] **[\"About Us\"]. _ScottishTartansMuseum.org_. Franklin, North Carolina: Scottish Tartans Museum and Heritage Center. 2020. Retrieved 28 May 2023.\n0786. **[^] **[Paterson, M. B. (2001)], p. 180.\n0787. **[^] **[Paterson, M. B. (2001)], p. 131.\n0788. **[^] **Wylie, James (2022). [\"The People's Tartan: Ba part of reinventing tartan\"]. _VAM.ac.uk_. [V&A Dundee]. Retrieved 29 May 2023.\n0789. **[^] **[\"Exhibition: Tartan – on until Sunday 14 January 2024\"]. _VAM.ac.uk_. [V&A Dundee]. 2023. Retrieved 26 May 2023.\n0790. **[^] **Bamford, Abbey (3 April 2023). [\"Plaid weaves grid concept into V&A Dundee Tartan exhibition space\"]. _[Design Week] _. Retrieved 26 May 2023.\n0791. **[^] **[Paterson, M. B. (2001)], p. 154, quoting Teall of Teallach's foreword in Blair Urquhart's _Identifying Tartans_.\n0792. **[^] **[Brancaz (2016)], paras. 9–10, 12, 14–15. Citing: Ray, Celeste (May 2010). _Ancestral Clanscapes and Transatlantic Tartaneers_. Symposium on Return Migration. Edinburgh: Scottish Centre for Diaspora Studies. pp. 6–7. Republished as: Ray, Celeste (2012). \"Ancestral clanscapes and transatlantic tartaneers\". In Varricchio, Mario (ed.). _Back to Caledonia: Scottish Homecomings from the Seventeenth Century to the Present_. Birlinn. pp. 168–188\\. [ISBN] [9781906566449]. Also citing: McArthur, Colin (2003). _Brigadoon, Braveheart and the Scots: Distortions of Scotland in Hollywood Cinema_. \"Cinema and Society\" series. London: I. B. Tauris. [ISBN] [9781860649271].\n0793. **[^] **[Brancaz (2016)], para. 15.\n0794. ^ [_**a**_] [_**b**_] [\"What's New\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Archived from [the original] on 12 April 2009. Retrieved 13 March 2020.\n0795. ^ [_**a**_] [_**b**_] [_**c**_] [Mather, Jim]; MacKenzie, George (9 July 2007). [\"National Tartan Register to be set up\"]. _Gov.scot_. [Government of Scotland]. Archived from [the original] on 16 November 2020. Retrieved 27 May 2023.\n0796. **[^] **[Armstrong (2017)], p. 2275.\n0797. **[^] **Fisher, Alice (10 April 2010). [\"Why the world has gone mad for plaid\"]. \"Fashion\" department. _[The Observer] _. Retrieved 28 May 2020.\n0798. **[^] **Pittock, M. G. H. (2013) \\[2008\\]. _The Road to Independence? Scotland in the Balance_ (2nd ed.). London: Reaktion Books. p. 150.\n0799. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 172.\n0800. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 274.\n0801. ^ [_**a**_] [_**b**_] [Cheape (2012)], p. 14.\n0802. **[^] **This is much of the theme of [Paterson, M. B. (2001)], \"Chapter 5: Tartan (Case Study 1)\", pp. 152–188.\n0803. **[^] **[Armstrong (2017)], pp. 238, 268.\n0804. **[^] **[Armstrong (2017)], p. 268.\n0805. **[^] **Watson, Nicola (8 March 2019). [\"Tartan Is the Next Big Thing: 5 Standout Fall/Winter 2019 Trends That You Need to Know of\"]. _[Her World] _. [SPH Magazines]. Retrieved 10 July 2023.\n0806. **[^] **[Armstrong (2017)], pp. 12, 280–281.\n0807. **[^] **[Armstrong (2017)], pp. 12, 269, 291.\n0808. **[^] **[Armstrong (2017)], p. 276.\n0809. **[^] **[Armstrong (2017)], p. 278, quoting: [Pittock, Murray G. H.] (2002). \"The Jacobite Cult\". In [Cowan, Edward J.]; [Finlay, Richard J.] (eds.). _Scottish History: The Power of the Past_. Edinburgh University Press. p. 208.\n0810. **[^] **[Brancaz (2016)], paras. 17–21.\n0811. ^ [_**a**_] [_**b**_] [Banks & de La Chapelle (2007)], p. 33.\n0812. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 259.\n0813. **[^] **[\"Our Brands\"]. _Clan.com_. Scotweb Marketing Ltd. 2023. Retrieved 15 July 2023.\\[ _[self-published source] _\\]\n0814. **[^] **Johnston, Kevin (dir.) (4 April 2021). \"Clans and Tartans\". [_Men in Kilts: A Roadtrip with Sam and Graham_]. Season 1. Episode 7. Starz.\n0815. **[^] **[\"Tartan Weaving Mill & Exhibition\"]. _Gazetteer for Scotland_. University of Edinburgh / Royal Scottish Geographical Society. 2022. Retrieved 12 July 2023.\n0816. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 175.\n0817. **[^] **[\"Tartans\"]. _MacDougall.org_. Clan MacDougall Society of North America. 2021. Retrieved 13 June 2023.\n0818. **[^] **[Paterson, M. B. (2001)], p. 153.\n0819. **[^] **[Paterson, M. B. (2001)], p. 174. (PV was imprecisely referred to as \"vinyl viscose\" here.)\n0820. **[^] **Example manufacturer specs: [\"Polyviscose Plaid Fabric Made to Order in 91 Tartans\"]. _ScotlandShop.com_. 2021. Retrieved 30 May 2023.\n0821. **[^] **Example manufacturer specs: [\"Poly Viscose Tartan Swatches\"]. _Kilts-n-Stuff.com_. Celtic Croft. Retrieved 30 May 2023.\n0822. ^ [_**a**_] [_**b**_] Example marketing: Moloney, Eve (22 December 2021). [\"What Is Polyviscose Fabric?\"]. _TheScotlandKiltCompany.co.uk_. Retrieved 30 May 2023.\n0823. ^ [_**a**_] [_**b**_] Example marketing: Fiddes, Nick (2020). [\"Spotlight on Poly-viscose fabric – its uses, nature, benefits and drawbacks\"]. _Clan.com_. Retrieved 30 May 2023.\n0824. **[^] **Example marketing: [\"Everything You Need to Know About Poly-Viscose Fabric\"]. _Kils-n-Stuff.com_. Celtic Croft. 20 October 2020. Retrieved 30 May 2023.\n0825. **[^] **Example marketing: [\"11 – 12 oz. Poly Viscose Tartan Cloth\"]. _USAKilts.com_. 2020. Retrieved 30 May 2023.\n0826. **[^] **Example marketing: [\"Poly-Viscose\"]. _AtlantaKilts.com_. 2022. Retrieved 30 May 2023.\n0827. **[^] **[Paterson, M. B. (2001)], pp. 176–177.\n0828. **[^] **[Armstrong (2017)], p. 269, quoting: Craig, Carol (2003). _The Scots' Crisis of Confidence_. Edinburgh: Big Thinking. p. 27.\n0829. **[^] **[Paterson, M. B. (2001)], pp. 184–186.\n0830. **[^] **\"Brigadoon\". _[The Monthly Film Bulletin] _. **22** (258): 99. July 1955.\n0831. **[^] **Beatty, John (Summer 2003). [\"Scotland and Its Image: The Brigadoon Syndrome\"]. _The Scotia News_. **2** (3 \\[total issue no. 7\\]). Saltire Society of New York. Retrieved 29 May 2023 – via City University of New York.\n0832. ^ [_**a**_] [_**b**_] [\"Kilt movies pay off Hollywood lift for Scottish tourism\"]. _[The Herald] _. Glasgow. 25 August 1996. Retrieved 29 May 2023.\n0833. **[^] **Roberts, David (2006). _British Hit Singles & Albums_ (19th ed.). London: Guinness World Records. p. 45. [ISBN] [1904994105].\n0834. **[^] **Ash, Juliet; Wright, Lee (1988). _Components of Dress: Design, Manufacturing, and Image-making in the Fashion Industry_. Routledge. p. 63. [ISBN] [0415006473].\n0835. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [Martin, R. (1988)], p. 59.\n0836. **[^] **Norwich, William (5 March 2004). [\"Stephen Sprouse, Design Pioneer, Dies at 50\"]. _[The New York Times] _. Retrieved 6 July 2023.\n0837. **[^] **Several examples of McQueen's bold tartan designs are illustrated about half-way through this long article: Truong, Alain (3 May 2011). [\"Alexander McQueen's Iconic Designs in Costume Institute Retrospective at Metropolitan Museum\"]. _Eloge de l'Art_. Retrieved 23 June 2023.\n0838. **[^] **[Paterson, M. B. (2001)], pp. 152–153.\n0839. **[^] **Milligans, Lauren (15 May 2012). [\"Designer Stamps - Great British Fashion Stamps\"]. _British Vogue_. Retrieved 24 September 2022.\n0840. **[^] **[\"Checkerboard and Tartan: How Two Patterns Changed Connotation Through Centuries\"]. _One Block Down_. 2021. Retrieved 12 July 2023.\n0841. **[^] **Komar, Marlen (30 March 2016). [\"The Evolution of Androgynous Fashion Throughout the 20th Century\"]. _[Bustle] _. Retrieved 28 May 2023.\n0842. **[^] **Elan, Priya (13 April 2016). [\"How A$AP Rocky, Rihanna and Kanye West reinvented grunge style\"]. _[The Guardian] _. Retrieved 28 May 2023.\n0843. **[^] **Kobel, Peter (2 April 1993). [\"Smells Like Big Bucks\"]. _[Entertainment Weekly] _. [Archived] from the original on 14 October 2007. Retrieved 28 May 2023.\n0844. **[^] **[Paterson, M. B. (2001)], pp. 285, 289.\n0845. ^ [_**a**_] [_**b**_] [Armstrong (2017)], p. 262.\n0846. ^ [_**a**_] [_**b**_] Doran, Tom (May 2015). [\"Tartans Made for the Movies\"]. _The Scotia News_. **9** (5 \\[total issue 50\\]). Saltire Society of New York. Archived from [the original] on 25 January 2022. Retrieved 29 May 2023 – via City University of New York.\n0847. **[^] **[Paterson, M. B. (2001)], pp. 64, 287.\n0848. ^ [_**a**_] [_**b**_] [Paterson, M. B. (2001)], pp. 285–287, 304.\n0849. **[^] **Greig, Finlay (4 September 2020). [\"10 Braveheart inaccuracies: historical blunders in the Mel Gibson film about the Wars of Scottish Independence\"]. _[The Scotsman] _. Retrieved 29 May 2023.\n0850. ^ [_**a**_] [_**b**_] Shattuck, Kathryn (21 May 1995). [\"If That's His Wallace Kilt, Who Stole The Yellow Stripes?\"]. \"Film\" section. _[The New York Times] _. Retrieved 29 May 2023.\n0851. **[^] **[Armstrong (2017)], p. 30.\n0852. **[^] **Butson, Mackenzi (6 May 2020). [\"Disney/Pixar's Brave: 10 Aspects of Scottish Culture Explored in the Film\"]. _[Screen Rant] _. Retrieved 29 May 2023.\n0853. **[^] **[Armstrong (2017)], pp. 11, 237, 259, 262, 280, 290.\n0854. ^ [_**a**_] [_**b**_] Martin, Carol A. L. (2020). [\"Film & Television: When authenticity is important, some costumers go as far as designing a special tartan\"]. _CuriousAndUnusualTartans.com_. Retrieved 29 May 2023.\n0855. **[^] **Nicol, Danny (2018). _Doctor Who: A British Alien?_. Springer. p. 93.\n0856. **[^] **Delgado, Gabriela (11 September 2021). [\"Doctor Who: Every Doctor's Signature Outfit, Ranked from Worst to Best\"]. _[Comic Book Resources] _. Retrieved 10 July 2023.\n0857. **[^] **[Behind The Lens - The Time of the Doctor - Doctor Who: Christmas Special 2013 - BBC]  – [YouTube], official BBC channel, 26 December 2013. Received 14 July 2018.\n0858. **[^] **[\"Claire Brown\"]. _Doctor Who World_. 2021. Retrieved 18 June 2023.\n0859. **[^] **[\"Doctor Who: Jodie Whittaker's regeneration reveals a new Doctor\"]. _BBC News_. 23 October 2022. Retrieved 26 October 2022.\n0860. **[^] **[\"The stories behind 7 of Scotland's most popular tartans\"]. _[The Scotsman] _. 8 February 2018. Retrieved 8 June 2023.\n0861. ^ [_**a**_] [_**b**_] [\"Tartan Details - Caledonia No 3\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.\n0862. **[^] **[\"Tartan Details - Caledonia\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.\n0863. **[^] **[\"Tartan Details - MacPherson #5\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 16 July 2023.\n0864. **[^] **[\"Tartan Details - Rob Roy Macgregor\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 3 July 2023.\n0865. **[^] **[\" **buffalo plaid** in American English\"]. _CollinsDictionary.com_. HarperCollins. 2023. Retrieved 12 July 2023.\n0866. **[^] **Garton, Christine (16 December 2019). [\"A Brief History of Buffalo Plaid\"]. _PieceWork Magazine_. Long Thread Media. Retrieved 12 July 2023.\n0867. ^ [_**a**_] [_**b**_] Schlueter, Preston (12 January 2020). [\"Grids, Plaids, and Windowpanes: Checked Patterns in Menswear and How to Wear Them\"]. _Gentleman's Gazette_.\n0868. **[^] **[\"Tartan Details - Pride, The Tartan of\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.\n0869. **[^] **Schwartzapfel, Beth (17 July 2008). [\"Scots design Jewish tartan\"]. _[The Jewish Daily Forward] _. Sound the Bagpipes. Retrieved 10 May 2009.\n0870. **[^] **[\"Tartan Details - Scottish Islamic\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2012. Retrieved 13 November 2023.\n0871. **[^] **[\"Tartan Details - Sikh\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 November 2023.\n0872. **[^] **[Paterson, M. B. (2001)], p 127.\n0873. **[^] **[\"Search Results \\[rainbow\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 18 June 2023. More can be found with keyword searches \"LGBT\" and \"pride\" (though most of the latter are false-positives).\n0874. **[^] **Hutcheson, Colin W. [\"Regimental Tartans\"]. _TartansAuthority.com_. Scottish Tartans Authority. Retrieved 1 May 2010.\n0875. **[^] **[Scarlett (1990)], pp. 43–44.\n0876. ^ [_**a**_] [_**b**_] [_**c**_] Newsome, Matthew Allan C. (2008). [\"Purveyors of 'Tartan Tat' Taken to Task\"]. _Albanach.org_. Retrieved 14 July 2023. Originally published in _The Scottish Banner_, September 2008.\n0877. **[^] **[\"Check out our new tartan\"]. _[The Scotsman] _. 17 September 2008. Retrieved 24 September 2008.\n0878. **[^] **[\"Tartan Details - Maple Leaf\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 7 June 2023.\n0879. ^ [_**a**_] [_**b**_] [\"Tartans\"]. [Department of Canadian Heritage]. Archived from [the original] on 16 August 2002. Retrieved 24 September 2008.\n0880. ^ [_**a**_] [_**b**_] [\"Tartan Details - Welsh National\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0881. **[^] **[\"Tartan Details - Cornish National\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0882. **[^] **[\"Tartan Details - Cornish National #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0883. **[^] **[\"Tartan Details - Cornish Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0884. **[^] **[\"Tartan Details - Cornish National Day\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0885. **[^] **[\"Tartan Details - St. Piran Cornish Flag\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0886. **[^] **[\"Tartan Details - St. Piran Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0887. **[^] **[\"Tartan Details - Welsh National #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0888. **[^] **[\"Tartan Details - Welsh National #3\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0889. **[^] **[\"Tartan Details - Welsh Assembly\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0890. **[^] **[\"Tartan Details - Spirit of Wales\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0891. ^ [_**a**_] [_**b**_] Newsome, Matthew Allan C. (15 April 2005). [\"Welsh Tartans\"]. _Albanach.org_. Retrieved 13 June 2023.\n0892. **[^] **[\"Tartan Details - Manx National\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0893. **[^] **[\"Tartan Details - Manx National #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0894. **[^] **[\"Tartan Details - Manx Centenary\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0895. **[^] **[\"Tartan Details - Ellan Vannin\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0896. **[^] **[\"Tartan Details - Ellan Vannin (1958)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0897. **[^] **[\"Tartan Details - Manx Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0898. **[^] **[\"Tartan Details - Manx Ellan Vannin\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0899. **[^] **[\"Tartan Details - Manx Heritage\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0900. **[^] **[\"Tartan Details - Manx Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0901. **[^] **[\"Tartan Details - Isle of Man\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0902. **[^] **[\"Tartan Details - Manx Mannin Plaid\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 June 2023.\n0903. **[^] **[\"Tartan Details - Brittany National Walking\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0904. **[^] **[\"Tartan Details - Grey Breton\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0905. **[^] **[\"Tartan Details - Galicia\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0906. **[^] **[\"Tartan Details - Gallaecia - Galicia National\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0907. **[^] **[\"Tartan Details - Gallaecia (Unofficial)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0908. **[^] **[\"The Government of Canada Invites Canadians to Celebrate Tartan Day\"]. [Department of Canadian Heritage]. 5 April 2008. Archived from [the original] on 8 June 2011. Retrieved 24 September 2008.\n0909. **[^] **[\"Tartan Details – Bruce County\"]. _The Scottish Register of Tartans_. Retrieved 9 January 2017.\n0910. **[^] **[\"Tartan Details – Ville de Beauport\"]. _The Scottish Register of Tartans_. Retrieved 9 January 2017.\n0911. **[^] **[Scarlett (1990)], p. 38.\n0912. **[^] **[\"Hunting Tartans\"]. _Tartans.Scotland.net_. Archived from [the original] on 17 April 2020. Retrieved 12 June 2023.\n0913. **[^] **[\"Mourning Tartans\"]. _Tartans.Scotland.net_. Archived from [the original] on 7 August 2020. Retrieved 20 October 2008.\n0914. **[^] **[Innes of Learney (1971)], p. 69.\n0915. **[^] **[\"Tartan Details - Stewart, Dress #1\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023. Stuart dress exists in two other variants in the same database.\n0916. **[^] **[\"Hunting Stewart\"]. _StewartSociety.org_. Stewart Society. 2016. Retrieved 13 May 2023.\n0917. ^ [_**a**_] [_**b**_] [\"Tartan Details - Matheson Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0918. **[^] **[\"Tartan Details - Matheson Hunting\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0919. **[^] **[\"Search Results \\[Matheson\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 10 June 2023.\n0920. **[^] **[\"Information\"]. _ClanMatheson.org_. Clan Matheson Society. 2022. \"Heraldry\" pop-up. Retrieved 10 June 2023.\n0921. **[^] **[\"Tartan Details - MacMillan Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0922. **[^] **E.g.: [\"Tartan Details - Wallace Dress\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0923. **[^] **[\"Tartan Details - Fraser Arisaid\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 10 June 2023.\n0924. **[^] **[Adam (1908/1970)], plate XIV, after p. 384.\n0925. ^ [_**a**_] [_**b**_] [\"Tartan Details - MacGregor Dress Burgundy (Dance)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.\n\n [\"Tartan Details - MacGregor Dress Green (Dance)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.\n\n [\"Tartan Details - MacGregor Dress Red (Dance)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 May 2023.\n0926. ^ [_**a**_] [_**b**_] See: [\"Search Results \\[dance\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2023. Retrieved 13 May 2023. Nearly all dance tartans that have a clan or district association (i.e. are not recent creations by individuals or commercial weavers for fashion purposes) use white as a major colour.\n0927. **[^] **[\"The Queen's Tartans\"]. _MacGregorAndMacDuff.co.uk_. 13 September 2022. Retrieved 14 May 2023.\n0928. **[^] **[\"Tartans of Clan Donnachaidh\"]. _Donnachaidh.com_. Pitlochry, Scotland: Clan Donnachaidh Society. 2018. Archived from [the original] on 20 May 2023. Retrieved 19 May 2023.\n0929. **[^] **[Paterson, M. B. (2001)], pp. 182–183.\n0930. **[^] **[\"Tartan Details - Cian of Ely\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 May 2023.\n0931. **[^] **[\"Tartan Details - Thomas of Wales\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 13 June 2023.\n0932. **[^] **[\"Tartan Details - Wilson (Janet)\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.\n0933. **[^] **[\"Tartan Details - Wilson (Janet) #2\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 22 May 2023.\n0934. **[^] **[\"Tartan Details - Ancient Caledonian Society\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 14 May 2023.\n0935. **[^] **[\"Ancient Caledonian Society Dress Coat (Circa 1786)\"] (PDF). _HighlandSocietyOfLondon.org_. [Highland Society of London]. 2020. Retrieved 14 May 2023.\n0936. **[^] **[\"How this brand has outsold Coke in Scotland for over a century\"]. _CBC.ca_. Canadian Broadcasting Company. Retrieved 18 October 2021.\n0937. **[^] **[\"Tartan Details - Irn Bru\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 June 2023.\n0938. **[^] **[\"Search Results \\[Loganair\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2017. Retrieved 14 May 2023.\n0939. **[^] **[\"Tartan Details - Racing Stewart\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 30 June 2023.\n0940. ^ [_**a**_] [_**b**_] [_**c**_] [Paterson, M. B. (2001)], p. 168.\n0941. **[^] **Martin, Carol A. L. (2012). [\"Jun 22: The Premiere of the Film _Brave_\"]. _CuriousAndUnusualTartans.com_. Retrieved 14 May 2023.\n0942. **[^] **[\"Burberry Check\"]. _BurberryPLC.com_. [Burberry]. 2022\\. Archived from [the original] on 14 May 2023. Retrieved 14 May 2023.\n0943. **[^] **[Banks & de La Chapelle (2007)], pp. 33–34.\n0944. **[^] **[\"Heraldry & Tartan – The Fitzpatrick – Mac Giolla Phádraig Clan Society\"]. _FitzpatrickSociety.com_. Retrieved 9 November 2021.\n0945. **[^] **[\"Consultation on the Creation of a Register of Tartan\"] (PDF). _Scottish-Parliament.uk_. [Scottish Parliament]. Archived from [the original] (PDF) on 27 October 2005. Retrieved 9 September 2008.\n0946. **[^] **[\"Scottish Tartans Society\"]. _Scottish Tartans World Register_. Archived from [the original] on 4 October 2011. Retrieved 15 July 2023.\n0947. ^ [_**a**_] [_**b**_] [_**c**_] Newsome, Matthew A. C. (2004). [\"What's the 'Official' Word on Tartans?\"]. _Albanach.org_. Retrieved 15 July 2023. Originally published in _The Scottish Banner_, December 2004.\n0948. **[^] **[\"Scottish Register of Tartans Bill\"] (PDF). _Scottish-Parliament.uk_. [Scottish Parliament]. Archived from [the original] (PDF) on 19 December 2008. Retrieved 8 September 2008.\n0949. **[^] **[\"About us\"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 24 March 2004. Retrieved 12 September 2008.\n0950. **[^] **[\"About the Scottish Tartan World Register\"]. Scottish Tartans World Register. Retrieved 7 September 2008.\n0951. ^ [_**a**_] [_**b**_] [\"\\[Homepage\\]\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 4 November 2020.\n0952. **[^] **[\"About Us\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. Retrieved 8 February 2009.\n0953. **[^] **[\"Guidance\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 8 February 2009.\n0954. **[^] **Haig, Matt (2004). [_Brand Royalty: How the World's Top 100 Brands Thrive and Survive_]. Kogan Page Publishers. p. 143. [ISBN] [0749442573].\n0955. **[^] **[\"Tartan Details - Thomson Camel\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2009. Retrieved 31 May 2023.\n0956. **[^] **McDougall, Liam (18 May 2003). [\"Fashion giant Burberry tries to kill off traditional tartan rival\"]. _[Sunday Herald] _. Archived from [the original] on 10 March 2007. Retrieved 7 May 2009 – via FindArticles.com.\n0957. **[^] **[\"Burberry moves to protect iconic tartan in China\"]. _[Reuters].com_. 28 November 2013. Retrieved 13 July 2023.\n0958. **[^] **[Blakely (2015)], pp. 9, 11.\n0959. **[^] **[Blakely (2015)], pp. 12–13.\n0960. **[^] **[\"Scotland Act 1998 – Section C4: Intellectual Property\"]. _Legislation.gov.uk_. [The National Archives (United Kingdom)]. 1998. Retrieved 28 May 2023.\n0961. ^ [_**a**_] [_**b**_] [\"Guidance – Copyright and design right\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 28 May 2023.\n0962. **[^] **[\"Guidance – Restrictions\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 28 May 2023.\n0963. **[^] **[\"Guidance – Unique and 'sufficiently different' designs\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2020. Retrieved 15 May 2023.\n0964. **[^] **[\"United Kingdom – House of Lords Gives Expansive View of Copyright Protection\"]. _Ladas.com_. Ladas & Parry LLP. February 2002. Archived from [the original] on 16 June 2013. Retrieved 28 May 2023.\n0965. **[^] **MacQueen, H. (5 July 2008). [\"Pass off in the Royal Mile?\"]. _Scots Law News_. University of Edinburgh School of Law.\n0966. **[^] **Boag-Thomson, Jonanna; Carlyle, Paul (13 August 2008). [\"Tartan possibly from over the sea by certain not from Skye ends up in court\"]. _Lexology.com_. Retrieved 15 July 2023.\n0967. **[^] **Robertson, John (3 July 2008). [\"Ban on 'Queen's tartan' sales over breach of copyright law\"]. _[The Scotsman] _ – via TapaTalk Kilt Forum.\n0968. **[^] **Swan, Charles (20 November 2012). [\"Fabric Ticket Stamps are Copyright Artistic Works: _Abraham Moon & Sons Ltd v Thornber and Others_\"]. _SwanTurton.com_. London: Swan Turton LLP. Retrieved 28 May 2023.\n0969. **[^] **Clark, Simon; Sefton, Sara; Linsner, Marc (November 2020). [\"A Fabric Design Has Been Found to be a Work of Artistic Craftsmanship: Will Response Clothing Cause a Shift in How UK Copyright is Assessed?\"] (PDF). _European Intellectual Property Review_ (9). Reuters: 617–618. Retrieved 28 May 2023.\n0970. **[^] **[Blakely (2015)], pp. 1–3, 13.\n0971. **[^] **[Blakely (2015)], pp. 13–16.\n0972. **[^] **[Paterson, M. B. (2001)], p. 186.\n0973. **[^] **[\"Royal Tartans\"]. _Tartans.Scotland.net_. Archived from [the original] on 7 August 2020. Retrieved 18 October 2008.\n0974. **[^] **Hutcheson, Colin W. [\"Royal Tartans\"]. _TartansAuthority.com_. Scottish Tartans Authority. Retrieved 9 January 2017.\n0975. **[^] **[Campbell, A. (1890)], p. 55: \"No \\[Campbell of\\] Argyll has any right to the red stripe or the light blue as worn by this \\[Campbell of Cawdor\\] branch of the Campbell Clan.\"\n0976. **[^] **[\"Universal Tartans\"]. _Tartans.Scotland.net_. Archived from [the original] on 4 January 2018. Retrieved 18 October 2008.\n0977. **[^] **[Scarlett 1990], pp. 38, 44\n0978. **[^] **[Telfer Dunbar (1979)], pp. 144–154.\n0979. **[^] **[Paterson, M. B. (2001)], pp. 172–173.\n0980. **[^] **[_The Scottish Clans and Their Tartans_ (2005)] p. 14.\n0981. **[^] **[\"Information Leaflet No.2 – Scottish Crest Badges\"] (PDF). _ScotArmigers.net_. [Court of the Lord Lyon]. Retrieved 27 December 2011.\n0982. **[^] **[\"Tartans\"]. [Court of the Lord Lyon]. Archived from [the original] on 14 January 2008. Retrieved 16 October 2008.\n0983. **[^] **[\"The 'Basics' of Clan Cameron\"]. _Clan-Cameron.org_. Retrieved 18 October 2008.\n0984. **[^] **[\"Tartan – FAQ\"]. _TartansAuthority.com_. Scottish Tartans Authority. Archived from [the original] on 4 June 2004. Retrieved 18 October 2008.\n0985. **[^] **[\"Piping in the Balmoral Tartan\"]. _Royal.gov.uk_. Archived from [the original] on 4 December 2008. Retrieved 20 October 2008.\n0986. **[^] **[\"Royals 'banned' public from wearing official Balmoral tartan\"]. _[The Scotsman] _. 25 August 2016. Retrieved 15 July 2023.\n0987. **[^] **[\"Tartans\"]. _ClanChiefs.org_. [Standing Council of Scottish Chiefs]. 2016. Retrieved 30 May 2023.\n0988. **[^] **[Telfer Dunbar (1979)], pp. 17–18, quoting Haswell Miller's \"The Truth About Tartan\", _Scotland's Magazine_ (November 1947), and summarising his material in _Common Errors in Scottish History_ (1956).\n0989. ^ [_**a**_] [_**b**_] [Campbell, J. F. (1893)], pp. 333–334.\n0990. ^ [_**a**_] [_**b**_] [_**c**_] [Oyange-Ngando (2018)], p. 5\n0991. ^ [_**a**_] [_**b**_] [Last, Joseph Thomas] (1883). [\"A visit to the Masai people living beyond the borders of the Nguru country\"]. _[Proceedings of the Royal Geographical Society] _. **5** (9): 530–531. Retrieved 27 May 2023.\n0992. **[^] **Waller, Richard (October 1976). [\"The Maasai and the British 1895–1905: The Origins of an Alliance\"]. _The Journal of African History_. **17** (4). Cambridge University Press: 529–553\\. [doi]: [10.1017/S002185370001505X]. [S2CID] [154867998]. Retrieved 27 May 2023.\n0993. ^ [_**a**_] [_**b**_] [_**c**_] Huang, Nellie (8 September 2016). [\"Cultural Fabric: The Maasai's Shuka\"]. _GAdventures.com_. Retrieved 27 May 2023.\n0994. **[^] **[Oyange-Ngando (2018)], p. 4\n0995. **[^] **[Oyange-Ngando (2018)], p. 12.\n0996. ^ [_**a**_] [_**b**_] Mastamet-Mason, Anne; Müller, Karla; van der Merwe, Nicolette (2017). [_History of African indigenous costumes and textiles: Towards decolonising a fashion design curriculum_] (PDF). 14th National Design Education Conference. [Tshwane University of Technology]: Design Education Forum of South Africa. p. 147. Retrieved 27 May 2023.\n0997. **[^] **[\"Maasai\"]. _Junior Worldmark Encyclopedia of World Cultures_. 1999\\. Archived from [the original] on 8 July 2012.\n0998. **[^] **Klumpp, Donna Rey (18 July 2013). [\"An Historical Overview of Maasai Dress\"]. _The Journal of the Costume Society of America_. **7** (1): 95. [doi]: [10.1179/036121181803657846]. Retrieved 27 May 2023.\n0999. **[^] **[Oyange-Ngando (2018)], pp. 1, 2, 4, 7 _ff._\n1000. **[^] **Spring, Chris (2020). \"Textiles of Eastern and Southern Africa\". In Harris, Jennifer (ed.). [_A Companion to Textile Culture_]. Wiley. pp. 145–163\\. [doi]: [10.1002/9781118768730.ch8]. [ISBN] [9781118768907]. [S2CID] [225574795]. Retrieved 27 May 2023.\n1001. **[^] **[Altmann (2015)], pp. 21–22, 27–28.\n1002. **[^] **[Altmann (2015)], pp. 22–23.\n1003. **[^] **[Altmann (2015)], pp. 21–22, 29–30, 35–36.\n1004. **[^] **[Altmann (2015)], p. 19.\n1005. **[^] **[Altmann (2015)], pp. 47–83 _ff._\n1006. **[^] **[Altmann (2015)], pp. 63, 67, 257 _ff._\n1007. **[^] **[Altmann (2015)], p. 63.\n1008. **[^] **[Altmann (2015)], p. 332.\n1009. **[^] **[Altmann (2015)], p. 257.\n1010. **[^] **[Altmann (2015)], pp. 258, 380.\n1011. **[^] **[Altmann (2015)], pp. 257–258, 337, 380.\n1012. **[^] **[Altmann (2015)], p. 349.\n1013. **[^] **[Altmann (2015)], pp. 139, 257–258, 337, 349.\n1014. **[^] **Lynch, Anette; Mitchell D., Strauss (2014). _Ethnic Dress in the United States: A Cultural Encyclopedia_. Rowman and Littlefield. p. 189. [ISBN] [9780759121508].\n1015. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [_**f**_] [_**g**_] [_**h**_] German, Deb (9 June 2015). [\"Checkered Past: A Brief History of the Madras Plaid Shirt\"]. _Orvis News_. Archived from [the original] on 3 December 2020. Retrieved 26 May 2023.\n1016. **[^] **Schneider, Sven Raphael (21 June 2019). [\"Madras Guide – How the Shirt, Pants & Jackets Became Popular\"]. _Gentlemans Gazette_. [Archived] from the original on 2 June 2017. Retrieved 26 May 2022.\n1017. ^ [_**a**_] [_**b**_] Coo, Stéphanie Marie R. (3 October 2014). [_Clothing and the colonial culture of appearances in nineteenth century Spanish Philippines (1820-1896)_] (PhD). Université Nice Sophia Antipolis. [Archived] from the original on 26 October 2022. Retrieved 26 May 2023.\n1018. **[^] **[Paterson, M. B. (2001)], p. 171, footnote 36.\n1019. ^ [_**a**_] [_**b**_] [_**c**_] [_**d**_] [_**e**_] [\"Utagawa Kunisada (Toyokuni III)\"]. Scholten Japanese Art. 2022. Cites \"Waseda University Theatre Museum, ref. no. 002-1384\" as its own source.\n1020. **[^] **[\"Japanese checkered fabric by the yard, Futasuji-koushi (forked grid)\"]. _Bansyo-Fabric.com_. 2023. Retrieved 15 May 2023.\n1021. **[^] **[\"Koshi (Plaid)\"]. _Japanese Futon Glossary_. Futon Tokyo. 2019. Retrieved 15 May 2023.\n1022. **[^] **[\"Meaning of 格子縞 in Japanese\"]. _Japanese Dictionary_. Nihongo Master. 2023. Retrieved 15 May 2023.\n1023. **[^] **[\"Tartan Details - MacMillan/Isetan\"]. _TartanRegister.gov.uk_. Scottish Register of Tartans. 2012. Retrieved 7 June 2023.\n1024. **[^] **Wylie, James (2022). [\"The People's Tartan: Be part of reinventing tartan\"]. _VAM.ac.uk_. [V&A Dundee]. Retrieved 29 May 2023.\n1025. **[^] **Greene, Militsa (July 1965). \"Pushkin and Sir Walter Scott\". _Forum for Modern Language Studies_. **I** (3): 207–215\\. [doi]: [10.1093/fmls/I.3.207].\n1026. **[^] **Hoisington, Sona Stephan (1981). \"Pushkin's Belkin and the Mystifications of Sir Walter Scott\". _Comparative Literature_. **33** (4): 342–57\\. [doi]: [10.2307/1770468]. [JSTOR] [1770468].\n1027. **[^] **Braaten, Ann Wiley; DeLong, Marilyn R. (3 December 2013). [\"Shawls of the Germans from Russia\"]. _Textile History_. **44** (2): 197–213\\. [doi]: [10.1179/0040496913Z.00000000028]. [S2CID] [161759277].\n1028. **[^] **Braaten, Ann Wiley (November 2005). [_German from Russia Immigrants' Shawls Brought to the U.S.A.: A Material Culture Study_] (PhD). University of Minnesota. Retrieved 13 July 2023. This URL provides a partial preview without subscription, which contains the text for which it is cited here.\n1029. **[^] **Example: [\"233: An Exceptional Russian Red Tartan Ground Lacquer Casket after Geftler's _Sphinxes on the Neva Embankment_\"]. _FreemansAuction.com_. Philadelphia: [Freeman's Auctioneers & Appraisers]. 29 November 2018. Retrieved 26 May 2023.\n1030. **[^] **Seitova, F. Z. (2022). [\"Peculiarities of Teaching the Terminology of the Textile Industry in English\"]. In Muldagalieva, A. A.; Kulzhanbekova, G. K.; Baymuratova, I. A.; Tleulinova, M. B. (eds.). _Materials of Scientific and Methodological Articles: Current Trends in Interdisciplinary Issues_. Almaty: Al-Farabi Kazakh National University. p. 364. [ISBN] [9786010459557]. Retrieved 13 July 2023.\n1031. **[^] **Symons, Todd (6 May 2023). [\"Māori King presents King Charles III with coronation gifts, will join royals in royal box for coronation concert\"]. _[Newshub] _. Archived from [the original] on 6 May 2023. Retrieved 11 December 2023.\n1032. **[^] **Muru-Lanning, Charlotte (9 December 2020). [\"From fedoras to fascinators: A history of Māori and hats\"]. _[The Spinoff] _. Retrieved 3 September 2023.\n\n- _The Scottish Clans and Their Tartans_. Kessinger Publishing. 2005. [ISBN] [141796815X]. (Originally published by W. & A. K. Johnston & G. W. Bacon Ltd, Edinburgh and London, 1939.)\n- Adam, Frank (1970) \\[1908\\]. [Innes of Learney, Thomas] (ed.). [_The Clans, Septs, and Regiments of the Scottish Highlands_] (8th ed.). Clearfield. [ISBN] [9780717945009] – via Internet Archive.\n- Altmann, Karin (2015). [_Fabric of Life: Textile Arts in Bhutan – Culture, Tradition and Transformation_]. Berlin / Boston: De Gruyter. [ISBN] [9783110428612] – via Google Books.\n- Armstrong, Fiona Kathryne (31 August 2017). [_Highlandism: Its value to Scotland and how a queen and two aristocratic women promoted the phenomenon in the Victorian age_] (PhD). University of Strathclyde. [doi]: [10.48730/2m47-md74]. Retrieved 28 May 2023.\n- Banks, Jeffrey; de La Chapelle, Doria (2007). [_Tartan: Romancing the Plaid_]. New York: Rizzoli. [ISBN] [9780847829828]. Retrieved 4 June 2023 – via Internet Archive.\n- Barnes, R. Money; Allen, C. Kennedy (1956). _The Uniforms & History of the Scottish Regiments_. Seeley, Service & Co.\n- Black, Mary E. (1959). \"Tartan Study\". [_The Sett and Weaving of Tartans_] (PDF). Shelby, North Carolina: Shelby Mills. Retrieved 14 May 2023. This source appears to be reliable on weaving technique, but not on history of tartan.\n- Blakely, Megan Rae (November 2015). [\"Pattern Recognition: Governmental Regulation of Tartan and Commodification of Culture\"] (PDF). _International Journal of Cultural Property_. **22** (4): 487–504\\. [doi]: [10.1017/s0940739115000284]. [S2CID] [152102254]. Retrieved 28 May 2023. (URL is to full-text pre-print copy; page numbers cited refer to this copy.) For a more detailed treatment of ICH and trade-regulation issues, see also: Blakely, Megan Rae (2018). [_Intellectual property and intangible cultural heritage in Celtic-derived countries_] (PDF) (PhD). [University of Glasgow]. pp. 3, 78–103. Retrieved 28 May 2023.\n- Brancaz, Lauren Ann-Killian (2016). [\"The Homecoming of Tartan: How Scotland and North America Collaborate in Shaping Tartan\"]. _Études écossaises_ (18): 69–87\\. [doi]: [10.4000/etudesecossaises.1074]. [S2CID] [131473903].\n- Brown, Ian (2012). \"Introduction: Tartan, Tartanry and Hybridity\". _From Tartan to Tartany: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].\n- Campbell, Archibald (1890). [_The Children of the Mist: Or, the Scottish Clansmen in Peace and War_]. Edinburgh/London: W. & A. K. Johnston – via Internet Archive.\n- [Campbell, John Francis] (1893) \\[1862\\]. [_Popular Tales of the West Highlands_]. Vol. IV (new \\[2nd\\] ed.). Paisley / London: Alexander Gardner – via Internet Archive.\n- Cheape, Hugh (2012). \" _Gheibhte Breacain Charnaid_ ('Scarlet Tartans Would Be Got ...'): The Re-invention of Tradition\". In Brown, Ian (ed.). _From Tartan to Tartanry: Scottish Culture, History and Myth_. Edinburgh University Press. [ISBN] [9780748664641].\n- Cheape, Hugh (2006) \\[1991\\]. _Tartan: The Highland Habit_ (3rd ed.). Edinburgh: National Museums of Scotland. [ISBN] [9781905267026].\n- Eslea MacDonald, Peter, ed. (2012) \\[1997\\]. _The 1819 Key Pattern Book: One Hundred Original Tartans_ (2nd ed.). J. J. Munro Trust. [ISBN] [9780957186507].\n- [Groves, John Percy] (1893). [_History of the 42nd Royal Highlanders – \"The Black Watch\" now the first battalion \"The Black Watch\" (Royal Highlanders) 1729–1893_]. Edinburgh: [W. & A. K. Johnston] – via Internet Archive.\n- Hinderks, Victoria (2014). [\"The Politicization of Scottish Dress: A Study of Highland Garb\"]. _Constellations_. **5** (2). University of Alberta. [doi]: [10.29173/cons22033]. Retrieved 3 June 2023.\n- [Innes of Learney, Thomas] (1971) \\[1938\\]. [_The Tartans of the Clans and Families of Scotland_] (8th ed.). Edinburgh: [Johnston and Bacon]. [ISBN] [9780717945016] – via Internet Archive.\n- MacBain, Alexander (1911). [_An Etymological Dictionary of the Gaelic language_]. Stirling: Eneas Mackay – via Internet Archive.\n- MacDonald, Micheil (1995). _The Clans of Scotland: The History and Landscape of the Scottish Clans_. London: Grange Books. [ISBN] [1856277496].\n- Mackay, J. G. (1924). [_The Romantic Story of the Highland Garb and the Tartan_]. Stirling: Eneas Mackay – via Internet Archive.\n- [Martin, Richard] (16–18 September 1988). [\"Transmutations of the Tartan: Attributed Meanings to Tartan Design\"]. _Textiles as Primary Sources: Proceedings_. First Textile Society of America Symposium. [Textile Society of America] / [Minneapolis Institute of Art]. No. 646. [Archived] from the original on 19 May 2022. Retrieved 6 July 2023.\n- [Moncreiffe of that Ilk, Iain] (1967). _The Highland Clans_. London: Barrie & Rocklif.\n- Oyange-Ngando, Elizabeth (2018). [\"Fashion as property in traditional culture: A Maasai case study\"]. _Journal of Intellectual Property Law & Practice_. **13** (11): 878–883\\. [doi]: [10.1093/jiplp/jpy119]. Retrieved 27 May 2023. The PDF available here has different page numbering (starting 1, not 878); page numbers cited here refer to this PDF version.\n- Paterson, Michael Bennis (2001). [_Selling Scotland: Towards an intercultural approach to export marketing involving differentiation on the basis of 'Scottishness'_] (PDF) (PhD). [University of Glasgow]. Retrieved 29 May 2023.\n- Porter, James (1998). [\"The Folklore of Northern Scotland: Five Discourses on Cultural Representation\"]. _[Folklore] _. **109** (1–2). Taylor & Francis: 1–14\\. [doi]: [10.1080/0015587X.1998.9715956]. Archived from [the original] on 29 May 2023.\n- Scarlett, James D. (1990). _Tartan: The Highland Textile_. London: Shepheard-Walwyn. [ISBN] [9780856831201].\n- Stewart, Donald C. (1974) \\[1950\\]. _The Setts of the Scottish Tartans with Descriptive and Historical Notes_ (revised ed.). London: Shepheard-Walwyn. [ISBN] [9780442278854].\n- Stewart, Donald William (1893). [_Old & Rare Scottish Tartans_]. Edinburgh: George P. Johnson.\n- Telfer Dunbar, John (1979) \\[1962\\]. _History of Highland Dress_ (2nd ed.). London: B. T. Batsford. [ISBN] [071341894X]. The original edition is actually superior; it has the same text but more colour plates; it is available online: Telfer Dunbar, John (1962). [_History of Highland Dress_] (1st ed.). Philadelphia: Doufour Editions – via Internet Archive.\n- Thompson, J. Charles (1992). \"Introduction\". [_Scottish Tartans in Full Color_]. New York: Dover. [ISBN] [9780486270463] – via Google Books. A collection of illustrations from James Grant's _The Tartans of the Clans of Scotland_ (1886), without Grant's dubious text.\n- [Trevor-Roper, Hugh] (1983). \"The Highland Tradition of Scotland\". In [Hobsbawm, Eric]; Ranger, Terence (eds.). [_The Invention of Tradition_]. Cambridge University Press. [ISBN] [0521246458] – via Internet Archive.\n- Tuckett, Sally J. S. (2016). [\"Reassessing the romance: Tartan as a popular commodity, c. 1770–1830\"] (PDF). _Scottish Historical Review_. **95** (2): 182–202\\. [doi]: [10.3366/shr.2016.0295]. (URL is to full-text pre-print copy; page numbers cited refer to this copy.)\n- Urquhart, Blair, ed. (1994). [_Identifying Tartans_]. London: The Apple Press/Book Sales. [ISBN] [1850764999] – via Internet Archive. (Also known by the title _Tartans: The New Compact Study Guide and Identifier_.)\n- [Way of Plean, George]; Squire, Romilly (2000) \\[1995\\]. [_Clans & Tartans_]. Collins Pocket Reference. Glasgow: HarperCollins. [ISBN] [0004725018] – via Internet Archive.\n- Zaczek, Iain; Phillips, Charles (2013) \\[2004\\]. _The Illustrated Encyclopedia of Tartan_. Wigston, Leicestershire: Southwater/Anness. [ISBN] [9781780192758].\n\nWikimedia Commons has media related to [Tartans].\n\nLook up _**[tartan] **_ in Wiktionary, the free dictionary.\n\n- [McIan, Robert Ranald] (2013) \\[1843\\]. [\" _Clans of the Scottish Highlands_ Fashion Plates\"]. _Metropolitan Museum of Art Libraries_ – via [OCLC].org. A collection of illustrations from James Logan's _The Clans of the Scottish Highlands_ (1843) without Logan's dubious text.\n- [\"The Scottish Register of Tartans\"]. _TartanRegister.gov.uk_. – the Scottish government's official tartan registry"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/websearch/test_brave_client.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import cast\n\nimport pytest\nimport requests\nfrom fastapi import HTTPException\n\nimport onyx.tools.tool_implementations.web_search.clients.brave_client as brave_module\nfrom onyx.tools.tool_implementations.web_search.clients.brave_client import (\n    BraveClient,\n)\n\n\nclass DummyResponse:\n    def __init__(\n        self,\n        *,\n        status_code: int,\n        payload: dict[str, Any] | None = None,\n        text: str = \"\",\n    ) -> None:\n        self.status_code = status_code\n        self._payload = payload\n        self.text = text\n\n    def raise_for_status(self) -> None:\n        if self.status_code >= 400:\n            http_error = requests.HTTPError(f\"{self.status_code} Client Error\")\n            http_error.response = cast(requests.Response, self)\n            raise http_error\n\n    def json(self) -> dict[str, Any]:\n        if self._payload is None:\n            raise ValueError(\"No JSON payload\")\n        return self._payload\n\n\ndef test_search_maps_brave_response(monkeypatch: pytest.MonkeyPatch) -> None:\n    client = BraveClient(api_key=\"test-key\", num_results=5)\n\n    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001\n        return DummyResponse(\n            status_code=200,\n            payload={\n                \"web\": {\n                    \"results\": [\n                        {\n                            \"title\": \"Result 1\",\n                            \"url\": \"https://example.com/one\",\n                            \"description\": \"Snippet 1\",\n                        },\n                        {\n                            \"title\": \"Result without URL\",\n                            \"description\": \"Should be skipped\",\n                        },\n                    ]\n                }\n            },\n        )\n\n    monkeypatch.setattr(brave_module.requests, \"get\", _mock_get)\n\n    results = client.search(\"onyx\")\n\n    assert len(results) == 1\n    assert results[0].title == \"Result 1\"\n    assert results[0].link == \"https://example.com/one\"\n    assert results[0].snippet == \"Snippet 1\"\n\n\ndef test_search_caps_count_to_brave_max(monkeypatch: pytest.MonkeyPatch) -> None:\n    client = BraveClient(api_key=\"test-key\", num_results=100)\n    captured_count: str | None = None\n\n    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001\n        nonlocal captured_count\n        captured_count = kwargs[\"params\"][\"count\"]\n        return DummyResponse(status_code=200, payload={\"web\": {\"results\": []}})\n\n    monkeypatch.setattr(brave_module.requests, \"get\", _mock_get)\n\n    client.search(\"onyx\")\n\n    assert captured_count == \"20\"\n\n\ndef test_search_includes_optional_params(monkeypatch: pytest.MonkeyPatch) -> None:\n    client = BraveClient(\n        api_key=\"test-key\",\n        num_results=5,\n        country=\"us\",\n        search_lang=\"en\",\n        ui_lang=\"en-US\",\n        safesearch=\"moderate\",\n        freshness=\"pw\",\n    )\n    captured_params: dict[str, str] | None = None\n\n    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001\n        nonlocal captured_params\n        captured_params = kwargs[\"params\"]\n        return DummyResponse(status_code=200, payload={\"web\": {\"results\": []}})\n\n    monkeypatch.setattr(brave_module.requests, \"get\", _mock_get)\n\n    client.search(\"onyx\")\n\n    assert captured_params is not None\n    assert captured_params[\"country\"] == \"US\"\n    assert captured_params[\"search_lang\"] == \"en\"\n    assert captured_params[\"ui_lang\"] == \"en-US\"\n    assert captured_params[\"safesearch\"] == \"moderate\"\n    assert captured_params[\"freshness\"] == \"pw\"\n\n\ndef test_search_raises_descriptive_error_on_http_failure(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    client = BraveClient(api_key=\"test-key\", num_results=5)\n\n    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001\n        return DummyResponse(\n            status_code=401,\n            payload={\"error\": {\"message\": \"Unauthorized\"}},\n        )\n\n    monkeypatch.setattr(brave_module.requests, \"get\", _mock_get)\n\n    with pytest.raises(ValueError, match=\"status 401\"):\n        client.search(\"onyx\")\n\n\ndef test_search_does_not_retry_non_retryable_http_errors(\n    monkeypatch: pytest.MonkeyPatch,\n) -> None:\n    client = BraveClient(api_key=\"test-key\", num_results=5)\n    calls = 0\n\n    def _mock_get(*args: Any, **kwargs: Any) -> DummyResponse:  # noqa: ARG001\n        nonlocal calls\n        calls += 1\n        return DummyResponse(\n            status_code=401,\n            payload={\"error\": {\"message\": \"Unauthorized\"}},\n        )\n\n    monkeypatch.setattr(brave_module.requests, \"get\", _mock_get)\n\n    with pytest.raises(ValueError, match=\"status 401\"):\n        client.search(\"onyx\")\n    assert calls == 1\n\n\n@pytest.mark.parametrize(\n    (\"kwargs\", \"expected_error\"),\n    [\n        ({\"country\": \"USA\"}, \"country\"),\n        ({\"safesearch\": \"invalid\"}, \"safesearch\"),\n        ({\"freshness\": \"invalid\"}, \"freshness\"),\n        ({\"timeout_seconds\": 0}, \"timeout_seconds\"),\n    ],\n)\ndef test_constructor_rejects_invalid_config_values(\n    kwargs: dict[str, Any],\n    expected_error: str,\n) -> None:\n    with pytest.raises(ValueError, match=expected_error):\n        BraveClient(api_key=\"test-key\", **kwargs)\n\n\ndef test_test_connection_maps_invalid_key_errors() -> None:\n    client = BraveClient(api_key=\"test-key\")\n\n    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001\n        raise ValueError(\"Brave search failed (status 401): Unauthorized\")\n\n    client.search = _mock_search  # type: ignore[method-assign]\n\n    with pytest.raises(HTTPException, match=\"Invalid Brave API key\"):\n        client.test_connection()\n\n\ndef test_test_connection_maps_rate_limit_errors() -> None:\n    client = BraveClient(api_key=\"test-key\")\n\n    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001\n        raise ValueError(\"Brave search failed (status 429): Too many requests\")\n\n    client.search = _mock_search  # type: ignore[method-assign]\n\n    with pytest.raises(HTTPException, match=\"rate limit exceeded\"):\n        client.test_connection()\n\n\ndef test_test_connection_propagates_unexpected_errors() -> None:\n    client = BraveClient(api_key=\"test-key\")\n\n    def _mock_search(query: str) -> list[Any]:  # noqa: ARG001\n        raise RuntimeError(\"unexpected parsing bug\")\n\n    client.search = _mock_search  # type: ignore[method-assign]\n\n    with pytest.raises(RuntimeError, match=\"unexpected parsing bug\"):\n        client.test_connection()\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_providers.py",
    "content": "import pytest\n\nfrom onyx.tools.tool_implementations.web_search.clients.brave_client import (\n    BraveClient,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    build_search_provider_from_config,\n)\nfrom onyx.tools.tool_implementations.web_search.providers import (\n    provider_requires_api_key,\n)\nfrom shared_configs.enums import WebSearchProviderType\n\n\ndef test_provider_requires_api_key() -> None:\n    \"\"\"Test that provider_requires_api_key correctly identifies which providers need API keys.\"\"\"\n    assert provider_requires_api_key(WebSearchProviderType.EXA) is True\n    assert provider_requires_api_key(WebSearchProviderType.BRAVE) is True\n    assert provider_requires_api_key(WebSearchProviderType.SERPER) is True\n    assert provider_requires_api_key(WebSearchProviderType.GOOGLE_PSE) is True\n    assert provider_requires_api_key(WebSearchProviderType.SEARXNG) is False\n\n\ndef test_build_searxng_provider_without_api_key() -> None:\n    \"\"\"Test that SearXNG provider can be built without an API key.\"\"\"\n    provider = build_search_provider_from_config(\n        provider_type=WebSearchProviderType.SEARXNG,\n        api_key=None,\n        config={\"searxng_base_url\": \"http://localhost:8080\"},\n    )\n    assert provider is not None\n\n\ndef test_build_searxng_provider_requires_base_url() -> None:\n    \"\"\"Test that SearXNG provider requires a base URL.\"\"\"\n    with pytest.raises(ValueError, match=\"Please provide a URL\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.SEARXNG,\n            api_key=None,\n            config={},\n        )\n\n\ndef test_build_exa_provider_requires_api_key() -> None:\n    \"\"\"Test that Exa provider requires an API key.\"\"\"\n    with pytest.raises(ValueError, match=\"API key is required\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.EXA,\n            api_key=None,\n            config={},\n        )\n\n\ndef test_build_brave_provider_requires_api_key() -> None:\n    \"\"\"Test that Brave provider requires an API key.\"\"\"\n    with pytest.raises(ValueError, match=\"API key is required\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.BRAVE,\n            api_key=None,\n            config={},\n        )\n\n\ndef test_build_brave_provider_with_optional_config() -> None:\n    provider = build_search_provider_from_config(\n        provider_type=WebSearchProviderType.BRAVE,\n        api_key=\"test-api-key\",\n        config={\n            \"country\": \"us\",\n            \"search_lang\": \"en\",\n            \"ui_lang\": \"en-US\",\n            \"safesearch\": \"strict\",\n            \"freshness\": \"pm\",\n            \"timeout_seconds\": \"12\",\n        },\n    )\n    assert isinstance(provider, BraveClient)\n    assert provider._country == \"US\"  # noqa: SLF001\n    assert provider._search_lang == \"en\"  # noqa: SLF001\n    assert provider._ui_lang == \"en-US\"  # noqa: SLF001\n    assert provider._safesearch == \"strict\"  # noqa: SLF001\n    assert provider._freshness == \"pm\"  # noqa: SLF001\n    assert provider._timeout_seconds == 12  # noqa: SLF001\n\n\ndef test_build_brave_provider_rejects_invalid_timeout() -> None:\n    with pytest.raises(ValueError, match=\"timeout_seconds\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.BRAVE,\n            api_key=\"test-api-key\",\n            config={\"timeout_seconds\": \"not-an-int\"},\n        )\n\n\ndef test_build_serper_provider_requires_api_key() -> None:\n    \"\"\"Test that Serper provider requires an API key.\"\"\"\n    with pytest.raises(ValueError, match=\"API key is required\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.SERPER,\n            api_key=None,\n            config={},\n        )\n\n\ndef test_build_google_pse_provider_requires_api_key() -> None:\n    \"\"\"Test that Google PSE provider requires an API key.\"\"\"\n    with pytest.raises(ValueError, match=\"API key is required\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.GOOGLE_PSE,\n            api_key=None,\n            config={\"search_engine_id\": \"test-cx\"},\n        )\n\n\ndef test_build_google_pse_provider_requires_search_engine_id() -> None:\n    \"\"\"Test that Google PSE provider requires a search engine ID.\"\"\"\n    with pytest.raises(ValueError, match=\"search engine id\"):\n        build_search_provider_from_config(\n            provider_type=WebSearchProviderType.GOOGLE_PSE,\n            api_key=\"test-api-key\",\n            config={},\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_tool_run.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any\nfrom typing import cast\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.tools.models import ToolCallException\nfrom onyx.tools.models import WebSearchToolOverrideKwargs\nfrom onyx.tools.tool_implementations.web_search.models import WebSearchResult\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import (\n    _normalize_queries_input,\n)\nfrom onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool\n\n\ndef _make_result(\n    title: str = \"Title\", link: str = \"https://example.com\"\n) -> WebSearchResult:\n    return WebSearchResult(title=title, link=link, snippet=\"snippet\")\n\n\ndef _make_tool(mock_provider: Any) -> WebSearchTool:\n    \"\"\"Instantiate WebSearchTool with all DB/provider deps mocked out.\"\"\"\n    provider_model = MagicMock()\n    provider_model.provider_type = \"brave\"\n    provider_model.api_key = MagicMock()\n    provider_model.api_key.get_value.return_value = \"fake-key\"\n    provider_model.config = {}\n\n    with (\n        patch(\n            \"onyx.tools.tool_implementations.web_search.web_search_tool.get_session_with_current_tenant\"\n        ) as mock_session_ctx,\n        patch(\n            \"onyx.tools.tool_implementations.web_search.web_search_tool.fetch_active_web_search_provider\",\n            return_value=provider_model,\n        ),\n        patch(\n            \"onyx.tools.tool_implementations.web_search.web_search_tool.build_search_provider_from_config\",\n            return_value=mock_provider,\n        ),\n    ):\n        mock_session_ctx.return_value.__enter__ = MagicMock(return_value=MagicMock())\n        mock_session_ctx.return_value.__exit__ = MagicMock(return_value=False)\n        tool = WebSearchTool(tool_id=1, emitter=MagicMock())\n\n    return tool\n\n\ndef _run(tool: WebSearchTool, queries: Any) -> list[str]:\n    \"\"\"Call tool.run() and return the list of query strings passed to provider.search.\"\"\"\n    placement = Placement(turn_index=0, tab_index=0)\n    override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)\n    tool.run(placement=placement, override_kwargs=override_kwargs, queries=queries)\n    search_mock = cast(MagicMock, tool._provider.search)  # noqa: SLF001\n    return [call.args[0] for call in search_mock.call_args_list]\n\n\nclass TestNormalizeQueriesInput:\n    \"\"\"Unit tests for _normalize_queries_input (coercion + sanitization).\"\"\"\n\n    def test_bare_string_returns_single_element_list(self) -> None:\n        assert _normalize_queries_input(\"hello\") == [\"hello\"]\n\n    def test_bare_string_stripped_and_sanitized(self) -> None:\n        assert _normalize_queries_input(\"  hello  \") == [\"hello\"]\n        # Control chars (e.g. null) removed; no space inserted\n        assert _normalize_queries_input(\"hello\\x00world\") == [\"helloworld\"]\n\n    def test_empty_string_returns_empty_list(self) -> None:\n        assert _normalize_queries_input(\"\") == []\n        assert _normalize_queries_input(\"   \") == []\n\n    def test_list_of_strings_returned_sanitized(self) -> None:\n        assert _normalize_queries_input([\"a\", \"b\"]) == [\"a\", \"b\"]\n        # Leading/trailing space stripped; control chars (e.g. tab) removed\n        assert _normalize_queries_input([\"  a  \", \"b\\tb\"]) == [\"a\", \"bb\"]\n\n    def test_list_none_skipped(self) -> None:\n        assert _normalize_queries_input([\"a\", None, \"b\"]) == [\"a\", \"b\"]\n\n    def test_list_non_string_coerced(self) -> None:\n        assert _normalize_queries_input([1, \"two\"]) == [\"1\", \"two\"]\n\n    def test_list_whitespace_only_dropped(self) -> None:\n        assert _normalize_queries_input([\"a\", \"\", \"  \", \"b\"]) == [\"a\", \"b\"]\n\n    def test_non_list_non_string_returns_empty_list(self) -> None:\n        assert _normalize_queries_input(42) == []\n        assert _normalize_queries_input({}) == []\n\n\nclass TestWebSearchToolRunQueryCoercion:\n    def test_list_of_strings_dispatches_each_query(self) -> None:\n        \"\"\"Normal case: list of queries → one search call per query.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.search.return_value = [_make_result()]\n        mock_provider.supports_site_filter = False\n        tool = _make_tool(mock_provider)\n\n        dispatched = _run(tool, [\"python decorators\", \"python generators\"])\n\n        # run_functions_tuples_in_parallel uses a thread pool; call_args_list order is non-deterministic.\n        assert sorted(dispatched) == [\"python decorators\", \"python generators\"]\n\n    def test_bare_string_dispatches_as_single_query(self) -> None:\n        \"\"\"LLM returns a bare string instead of an array — must NOT be split char-by-char.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.search.return_value = [_make_result()]\n        mock_provider.supports_site_filter = False\n        tool = _make_tool(mock_provider)\n\n        dispatched = _run(tool, \"what is the capital of France\")\n\n        assert len(dispatched) == 1\n        assert dispatched[0] == \"what is the capital of France\"\n\n    def test_bare_string_does_not_search_individual_characters(self) -> None:\n        \"\"\"Regression: single-char searches must not occur.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.search.return_value = [_make_result()]\n        mock_provider.supports_site_filter = False\n        tool = _make_tool(mock_provider)\n\n        dispatched = _run(tool, \"hi\")\n        for query_arg in dispatched:\n            assert (\n                len(query_arg) > 1\n            ), f\"Single-character query dispatched: {query_arg!r}\"\n\n    def test_control_characters_sanitized_before_dispatch(self) -> None:\n        \"\"\"Queries with control chars have those chars removed before dispatch.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.search.return_value = [_make_result()]\n        mock_provider.supports_site_filter = False\n        tool = _make_tool(mock_provider)\n\n        dispatched = _run(tool, [\"foo\\x00bar\", \"baz\\tbaz\"])\n\n        # run_functions_tuples_in_parallel uses a thread pool; call_args_list is in\n        # execution order, not submission order, so compare in sorted order.\n        assert sorted(dispatched) == [\"bazbaz\", \"foobar\"]\n\n    def test_all_empty_or_whitespace_raises_tool_call_exception(self) -> None:\n        \"\"\"When normalization yields no valid queries, run() raises ToolCallException.\"\"\"\n        mock_provider = MagicMock()\n        mock_provider.supports_site_filter = False\n        tool = _make_tool(mock_provider)\n        placement = Placement(turn_index=0, tab_index=0)\n        override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)\n\n        with pytest.raises(ToolCallException) as exc_info:\n            tool.run(\n                placement=placement,\n                override_kwargs=override_kwargs,\n                queries=\"   \",\n            )\n\n        assert \"No valid\" in str(exc_info.value)\n        cast(MagicMock, mock_provider.search).assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/onyx/tools/tool_implementations/websearch/test_websearch_utils.py",
    "content": "from pathlib import Path\n\nfrom onyx.tools.tool_implementations.open_url.models import WebContent\nfrom onyx.tools.tool_implementations.web_search.utils import (\n    inference_section_from_internet_page_scrape,\n)\n\nCONTENT_FILE = Path(__file__).parent / \"data\" / \"tartan.txt\"\n\n# inference_section_from_internet_page_scrape will cull the content to 15000 characters\nMAX_NUM_CHARS_WEB_CONTENT = 15000\nTRUNCATED_CONTENT_SUFFIX = \" [...truncated]\"\nTRUNCATED_CONTENT_PREFIX = \"[...truncated] \"\n\n\ndef get_text_from_file(file_path: Path) -> str:\n    with open(file_path, \"r\") as file:\n        return file.read()\n\n\ndef get_tartan_text() -> str:\n    return get_text_from_file(CONTENT_FILE)\n\n\ndef create_web_content_object(text: str) -> WebContent:\n    return WebContent(\n        full_content=text,\n        title=\"Tartan\",\n        link=\"https://en.wikipedia.org/wiki/Tartan\",\n        published_date=None,\n        scrape_successful=True,\n    )\n\n\ndef test_no_snippet_provided() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    section = inference_section_from_internet_page_scrape(web_content, \"\")\n\n    # Section will be of length min(MAX_NUM_CHARS_WEB_CONTENT, len(tartan_text))\n    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(\n        TRUNCATED_CONTENT_SUFFIX\n    )\n\n    # Get the combined_content without the truncated suffix\n    combined_content_without_suffix = section.combined_content[\n        :MAX_NUM_CHARS_WEB_CONTENT\n    ]\n\n    # Check that we have the first 15000 characters of the tartan text\n    assert combined_content_without_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]\n    assert (\n        section.combined_content\n        == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT] + TRUNCATED_CONTENT_SUFFIX\n    )\n\n\ndef test_snippet_lower_bound_() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    snippet = (\n        'Close-up view of traditional tartan cloth, showing pattern of diagonal \"ribs\" of colour; '\n        \"this is a five-colour tartan, in scarlet red, black, yellow...\"\n    )\n\n    section = inference_section_from_internet_page_scrape(web_content, snippet)\n\n    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(\n        TRUNCATED_CONTENT_SUFFIX\n    )\n\n    no_suffix = section.combined_content[:MAX_NUM_CHARS_WEB_CONTENT]\n\n    assert no_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]\n    assert section.combined_content == no_suffix + TRUNCATED_CONTENT_SUFFIX\n\n\ndef test_snippet_provided_after_limit() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    snippet = (\n        'Transmutations of the Tartan: Attributed Meanings to Tartan Design\"]. '\n        \"_Textiles as Primary Sources: Proceedings_. First Textile Society of America Symposium.\"\n    )\n\n    section = inference_section_from_internet_page_scrape(web_content, snippet)\n\n    assert (\n        len(section.combined_content)\n        == len(TRUNCATED_CONTENT_PREFIX) + MAX_NUM_CHARS_WEB_CONTENT\n    )\n\n    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]\n    # We should get the last 15000 characters of the tartan text\n    index = len(tartan_text) - MAX_NUM_CHARS_WEB_CONTENT\n\n    assert no_prefix == tartan_text[index:]\n    assert section.combined_content == TRUNCATED_CONTENT_PREFIX + no_prefix\n\n\ndef test_snippet_provided_in_middle() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    snippet = \"marketing as a district tartan for Ulster, Scottish weavers (and in two cases English, and in another American)\"\n\n    SNIPPET_START_LOCATION_IN_TEXT = 215398\n\n    section = inference_section_from_internet_page_scrape(web_content, snippet)\n\n    assert len(section.combined_content) == len(\n        TRUNCATED_CONTENT_PREFIX\n    ) + MAX_NUM_CHARS_WEB_CONTENT + len(TRUNCATED_CONTENT_SUFFIX)\n\n    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]\n    no_affix = no_prefix[:MAX_NUM_CHARS_WEB_CONTENT]\n\n    # expected start index of the snippet\n    expected_start_idx = SNIPPET_START_LOCATION_IN_TEXT\n    expected_end_idx = expected_start_idx + len(snippet) - 1\n\n    top_padding = (MAX_NUM_CHARS_WEB_CONTENT - len(snippet)) // 2\n    bottom_padding = MAX_NUM_CHARS_WEB_CONTENT - len(snippet) - top_padding\n\n    assert (\n        no_affix\n        == tartan_text[\n            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1\n        ]\n    )\n\n    assert section.combined_content == (\n        TRUNCATED_CONTENT_PREFIX\n        + tartan_text[\n            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1\n        ]\n        + TRUNCATED_CONTENT_SUFFIX\n    )\n\n\ndef test_bad_snippet() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    snippet = \"This is a bad snippet\"\n    # We expect the fallback (from top) to occur\n    section = inference_section_from_internet_page_scrape(web_content, snippet)\n\n    # Section will be of length min(MAX_NUM_CHARS_WEB_CONTENT, len(tartan_text))\n    assert len(section.combined_content) == MAX_NUM_CHARS_WEB_CONTENT + len(\n        TRUNCATED_CONTENT_SUFFIX\n    )\n\n    # Get the combined_content without the truncated suffix\n    combined_content_without_suffix = section.combined_content[\n        :MAX_NUM_CHARS_WEB_CONTENT\n    ]\n\n    # Check that we have the first 15000 characters of the tartan text\n    assert combined_content_without_suffix == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT]\n    assert (\n        section.combined_content\n        == tartan_text[:MAX_NUM_CHARS_WEB_CONTENT] + TRUNCATED_CONTENT_SUFFIX\n    )\n\n\ndef test_similar_snippet_in_middle_fuzzy_match() -> None:\n    tartan_text = get_tartan_text()\n    web_content = create_web_content_object(tartan_text)\n\n    # In the actual text, the word \"English\" is used instead of \"British\"\n    # This is very similar though, so we expect a fuzzy match to occur\n    snippet = \"marketing as a district tartan for Ulster, Scottish weavers (and in two cases British, and in another American)\"\n\n    SNIPPET_START_LOCATION_IN_TEXT = 215398\n\n    section = inference_section_from_internet_page_scrape(web_content, snippet)\n\n    assert len(section.combined_content) == len(\n        TRUNCATED_CONTENT_PREFIX\n    ) + MAX_NUM_CHARS_WEB_CONTENT + len(TRUNCATED_CONTENT_SUFFIX)\n\n    no_prefix = section.combined_content[len(TRUNCATED_CONTENT_PREFIX) :]\n    no_affix = no_prefix[:MAX_NUM_CHARS_WEB_CONTENT]\n\n    # expected start index of the snippet\n    expected_start_idx = SNIPPET_START_LOCATION_IN_TEXT\n    expected_end_idx = expected_start_idx + len(snippet) - 1\n\n    top_padding = (MAX_NUM_CHARS_WEB_CONTENT - len(snippet)) // 2\n    bottom_padding = MAX_NUM_CHARS_WEB_CONTENT - len(snippet) - top_padding\n\n    assert (\n        no_affix\n        == tartan_text[\n            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1\n        ]\n    )\n\n    assert section.combined_content == (\n        TRUNCATED_CONTENT_PREFIX\n        + tartan_text[\n            expected_start_idx - top_padding : expected_end_idx + bottom_padding + 1\n        ]\n        + TRUNCATED_CONTENT_SUFFIX\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/tracing/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/onyx/tracing/test_tracing_setup.py",
    "content": "\"\"\"Unit tests for tracing setup functions.\"\"\"\n\nimport importlib\nimport os\nfrom unittest.mock import patch\n\nfrom onyx.configs import app_configs\nfrom onyx.tracing import setup as tracing_setup\n\n\ndef test_setup_tracing_with_no_creds() -> None:\n    \"\"\"Test that setup_tracing returns empty list when no credentials are configured.\"\"\"\n    # Ensure no tracing credentials are set\n    os.environ.pop(\"BRAINTRUST_API_KEY\", None)\n    os.environ.pop(\"LANGFUSE_SECRET_KEY\", None)\n    os.environ.pop(\"LANGFUSE_PUBLIC_KEY\", None)\n\n    # Reload modules to pick up environment changes\n    importlib.reload(app_configs)\n    importlib.reload(tracing_setup)\n\n    # Reset the initialized flag\n    tracing_setup._initialized = False\n\n    # Call the function - should return empty list\n    result = tracing_setup.setup_tracing()\n    assert result == []\n\n\ndef test_setup_tracing_is_idempotent() -> None:\n    \"\"\"Test that setup_tracing only initializes once.\"\"\"\n    # Ensure no tracing credentials are set\n    os.environ.pop(\"BRAINTRUST_API_KEY\", None)\n    os.environ.pop(\"LANGFUSE_SECRET_KEY\", None)\n    os.environ.pop(\"LANGFUSE_PUBLIC_KEY\", None)\n\n    # Reload modules\n    importlib.reload(app_configs)\n    importlib.reload(tracing_setup)\n\n    # Reset the initialized flag\n    tracing_setup._initialized = False\n\n    # First call\n    tracing_setup.setup_tracing()\n\n    # Second call should return empty (already initialized)\n    result2 = tracing_setup.setup_tracing()\n    assert result2 == []\n\n    # Clean up\n    tracing_setup._initialized = False\n\n\ndef test_setup_tracing_with_braintrust_creds() -> None:\n    \"\"\"Test that setup_tracing initializes Braintrust when credentials are available.\"\"\"\n    # Set Braintrust credentials\n    os.environ[\"BRAINTRUST_API_KEY\"] = \"test-api-key\"\n    os.environ[\"BRAINTRUST_PROJECT\"] = \"test-project\"\n    os.environ.pop(\"LANGFUSE_SECRET_KEY\", None)\n    os.environ.pop(\"LANGFUSE_PUBLIC_KEY\", None)\n\n    # Reload modules to pick up new environment variables\n    importlib.reload(app_configs)\n    importlib.reload(tracing_setup)\n\n    # Reset the initialized flag\n    tracing_setup._initialized = False\n\n    # Mock the _setup_braintrust function to avoid actual initialization\n    with patch.object(tracing_setup, \"_setup_braintrust\") as mock_setup:\n        result = tracing_setup.setup_tracing()\n        mock_setup.assert_called_once()\n        assert \"braintrust\" in result\n\n    # Clean up\n    os.environ.pop(\"BRAINTRUST_API_KEY\", None)\n    os.environ.pop(\"BRAINTRUST_PROJECT\", None)\n    tracing_setup._initialized = False\n    importlib.reload(app_configs)\n\n\ndef test_setup_tracing_with_langfuse_creds() -> None:\n    \"\"\"Test that setup_tracing initializes Langfuse when credentials are available.\"\"\"\n    # Set Langfuse credentials\n    os.environ[\"LANGFUSE_SECRET_KEY\"] = \"test-secret-key\"\n    os.environ[\"LANGFUSE_PUBLIC_KEY\"] = \"test-public-key\"\n    os.environ.pop(\"BRAINTRUST_API_KEY\", None)\n\n    # Reload modules to pick up new environment variables\n    importlib.reload(app_configs)\n    importlib.reload(tracing_setup)\n\n    # Reset the initialized flag\n    tracing_setup._initialized = False\n\n    # Mock the _setup_langfuse function to avoid actual initialization\n    with patch.object(tracing_setup, \"_setup_langfuse\") as mock_setup:\n        result = tracing_setup.setup_tracing()\n        mock_setup.assert_called_once()\n        assert \"langfuse\" in result\n\n    # Clean up\n    os.environ.pop(\"LANGFUSE_SECRET_KEY\", None)\n    os.environ.pop(\"LANGFUSE_PUBLIC_KEY\", None)\n    tracing_setup._initialized = False\n    importlib.reload(app_configs)\n\n\ndef test_setup_tracing_with_both_providers() -> None:\n    \"\"\"Test that setup_tracing initializes both providers when both credentials are available.\"\"\"\n    # Set both credentials\n    os.environ[\"BRAINTRUST_API_KEY\"] = \"test-api-key\"\n    os.environ[\"BRAINTRUST_PROJECT\"] = \"test-project\"\n    os.environ[\"LANGFUSE_SECRET_KEY\"] = \"test-secret-key\"\n    os.environ[\"LANGFUSE_PUBLIC_KEY\"] = \"test-public-key\"\n\n    # Reload modules to pick up new environment variables\n    importlib.reload(app_configs)\n    importlib.reload(tracing_setup)\n\n    # Reset the initialized flag\n    tracing_setup._initialized = False\n\n    # Mock both setup functions to avoid actual initialization\n    with (\n        patch.object(tracing_setup, \"_setup_braintrust\") as mock_bt,\n        patch.object(tracing_setup, \"_setup_langfuse\") as mock_lf,\n    ):\n        result = tracing_setup.setup_tracing()\n        mock_bt.assert_called_once()\n        mock_lf.assert_called_once()\n        assert \"braintrust\" in result\n        assert \"langfuse\" in result\n\n    # Clean up\n    os.environ.pop(\"BRAINTRUST_API_KEY\", None)\n    os.environ.pop(\"BRAINTRUST_PROJECT\", None)\n    os.environ.pop(\"LANGFUSE_SECRET_KEY\", None)\n    os.environ.pop(\"LANGFUSE_PUBLIC_KEY\", None)\n    tracing_setup._initialized = False\n    importlib.reload(app_configs)\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_gpu_utils.py",
    "content": "\"\"\"\nTest cases for onyx/utils/gpu_utils.py with DISABLE_MODEL_SERVER environment variable\n\"\"\"\n\nimport os\nfrom unittest import TestCase\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport requests\n\nfrom onyx.utils.gpu_utils import _get_gpu_status_from_model_server\n\n\nclass TestGPUUtils(TestCase):\n    \"\"\"Test cases for GPU utilities with DISABLE_MODEL_SERVER support\"\"\"\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"true\"})\n    def test_disable_model_server_true(self) -> None:\n        \"\"\"Test that GPU status returns False when DISABLE_MODEL_SERVER is true\"\"\"\n        result = _get_gpu_status_from_model_server(indexing=False)\n        assert result is False\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"True\"})\n    def test_disable_model_server_capital_true(self) -> None:\n        \"\"\"Test that GPU status returns False when DISABLE_MODEL_SERVER is True (capital)\"\"\"\n        # \"True\" WILL trigger disable because .lower() is called\n        result = _get_gpu_status_from_model_server(indexing=False)\n        assert result is False\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"1\"})\n    @patch(\"requests.get\")\n    def test_disable_model_server_one(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that GPU status makes request when DISABLE_MODEL_SERVER is 1\"\"\"\n        # \"1\" should NOT trigger disable (only \"true\" should)\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"gpu_available\": True}\n        mock_get.return_value = mock_response\n\n        result = _get_gpu_status_from_model_server(indexing=False)\n        assert result is True\n        mock_get.assert_called_once()\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"yes\"})\n    @patch(\"requests.get\")\n    def test_disable_model_server_yes(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that GPU status makes request when DISABLE_MODEL_SERVER is yes\"\"\"\n        # \"yes\" should NOT trigger disable (only \"true\" should)\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"gpu_available\": False}\n        mock_get.return_value = mock_response\n\n        result = _get_gpu_status_from_model_server(indexing=True)\n        assert result is False\n        mock_get.assert_called_once()\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"false\"})\n    @patch(\"requests.get\")\n    def test_disable_model_server_false(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that GPU status makes request when DISABLE_MODEL_SERVER is false\"\"\"\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"gpu_available\": True}\n        mock_get.return_value = mock_response\n\n        result = _get_gpu_status_from_model_server(indexing=True)\n        assert result is True\n        mock_get.assert_called_once()\n\n    @patch.dict(os.environ, {}, clear=True)\n    @patch(\"requests.get\")\n    def test_disable_model_server_not_set(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that GPU status makes request when DISABLE_MODEL_SERVER is not set\"\"\"\n        mock_response = MagicMock()\n        mock_response.json.return_value = {\"gpu_available\": False}\n        mock_get.return_value = mock_response\n\n        result = _get_gpu_status_from_model_server(indexing=False)\n        assert result is False\n        mock_get.assert_called_once()\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"true\"})\n    def test_disabled_host_fallback(self) -> None:\n        \"\"\"Test that disabled host is handled correctly via environment variable\"\"\"\n        result = _get_gpu_status_from_model_server(indexing=True)\n        assert result is False\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"false\"})\n    @patch(\"requests.get\")\n    def test_request_exception_handling(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that exceptions are properly raised when GPU status request fails\"\"\"\n        mock_get.side_effect = requests.RequestException(\"Connection error\")\n\n        with self.assertRaises(requests.RequestException):\n            _get_gpu_status_from_model_server(indexing=False)\n\n    @patch.dict(os.environ, {\"DISABLE_MODEL_SERVER\": \"true\"})\n    @patch(\"requests.get\")\n    def test_gpu_status_request_with_disable(self, mock_get: MagicMock) -> None:\n        \"\"\"Test that no request is made when DISABLE_MODEL_SERVER is true\"\"\"\n        result = _get_gpu_status_from_model_server(indexing=True)\n        assert result is False\n        # Verify that no HTTP request was made\n        mock_get.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_json_river.py",
    "content": "\"\"\"Tests for the jsonriver incremental JSON parser.\"\"\"\n\nimport json\n\nimport pytest\n\nfrom onyx.utils.jsonriver import JsonValue\nfrom onyx.utils.jsonriver import Parser\n\n\ndef _all_deltas(chunks: list[str]) -> list[JsonValue]:\n    \"\"\"Feed chunks one at a time and collect all emitted deltas.\"\"\"\n    parser = Parser()\n    deltas: list[JsonValue] = []\n    for chunk in chunks:\n        deltas.extend(parser.feed(chunk))\n    deltas.extend(parser.finish())\n    return deltas\n\n\nclass TestParseComplete:\n    \"\"\"Parsing complete JSON in a single chunk.\"\"\"\n\n    def test_simple_object(self) -> None:\n        deltas = _all_deltas(['{\"a\": 1}'])\n        assert any(r == {\"a\": 1.0} or r == {\"a\": 1} for r in deltas)\n\n    def test_simple_array(self) -> None:\n        deltas = _all_deltas([\"[1, 2, 3]\"])\n        assert any(isinstance(r, list) for r in deltas)\n\n    def test_simple_string(self) -> None:\n        deltas = _all_deltas(['\"hello\"'])\n        assert \"hello\" in deltas or any(\"hello\" in str(r) for r in deltas)\n\n    def test_null(self) -> None:\n        deltas = _all_deltas([\"null\"])\n        assert None in deltas\n\n    def test_boolean_true(self) -> None:\n        deltas = _all_deltas([\"true\"])\n        assert True in deltas\n\n    def test_boolean_false(self) -> None:\n        deltas = _all_deltas([\"false\"])\n        assert any(r is False for r in deltas)\n\n    def test_number(self) -> None:\n        deltas = _all_deltas([\"42\"])\n        assert 42.0 in deltas\n\n    def test_negative_number(self) -> None:\n        deltas = _all_deltas([\"-3.14\"])\n        assert any(abs(r - (-3.14)) < 1e-10 for r in deltas if isinstance(r, float))\n\n    def test_empty_object(self) -> None:\n        deltas = _all_deltas([\"{}\"])\n        assert {} in deltas\n\n    def test_empty_array(self) -> None:\n        deltas = _all_deltas([\"[]\"])\n        assert [] in deltas\n\n\nclass TestStreamingDeltas:\n    \"\"\"Incremental feeding produces correct deltas.\"\"\"\n\n    def test_object_string_value_streamed_char_by_char(self) -> None:\n        chunks = list('{\"code\": \"abc\"}')\n        deltas = _all_deltas(chunks)\n        str_parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"code\" in d:\n                val = d[\"code\"]\n                if isinstance(val, str):\n                    str_parts.append(val)\n        assert \"\".join(str_parts) == \"abc\"\n\n    def test_object_streamed_in_two_halves(self) -> None:\n        deltas = _all_deltas(['{\"name\": \"Al', 'ice\"}'])\n        str_parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"name\" in d:\n                val = d[\"name\"]\n                if isinstance(val, str):\n                    str_parts.append(val)\n        assert \"\".join(str_parts) == \"Alice\"\n\n    def test_multiple_keys_streamed(self) -> None:\n        deltas = _all_deltas(['{\"a\": \"x', '\", \"b\": \"y\"}'])\n        a_parts: list[str] = []\n        b_parts: list[str] = []\n        for d in deltas:\n            if isinstance(d, dict):\n                if \"a\" in d and isinstance(d[\"a\"], str):\n                    a_parts.append(d[\"a\"])\n                if \"b\" in d and isinstance(d[\"b\"], str):\n                    b_parts.append(d[\"b\"])\n        assert \"\".join(a_parts) == \"x\"\n        assert \"\".join(b_parts) == \"y\"\n\n    def test_deltas_only_contain_new_string_content(self) -> None:\n        parser = Parser()\n        d1 = parser.feed('{\"msg\": \"hel')\n        d2 = parser.feed('lo\"}')\n        parser.finish()\n\n        msg_parts = []\n        for d in d1 + d2:\n            if isinstance(d, dict) and \"msg\" in d:\n                val = d[\"msg\"]\n                if isinstance(val, str):\n                    msg_parts.append(val)\n        assert \"\".join(msg_parts) == \"hello\"\n\n        # Each delta should only contain new chars, not repeat previous ones\n        if len(msg_parts) == 2:\n            assert msg_parts[0] == \"hel\"\n            assert msg_parts[1] == \"lo\"\n\n\nclass TestEscapeSequences:\n    \"\"\"JSON escape sequences are decoded correctly, even across chunk boundaries.\"\"\"\n\n    def test_newline_escape(self) -> None:\n        deltas = _all_deltas(['{\"text\": \"line1\\\\nline2\"}'])\n        text_parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"text\" in d and isinstance(d[\"text\"], str):\n                text_parts.append(d[\"text\"])\n        assert \"\".join(text_parts) == \"line1\\nline2\"\n\n    def test_tab_escape(self) -> None:\n        deltas = _all_deltas(['{\"t\": \"a\\\\tb\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"t\" in d and isinstance(d[\"t\"], str):\n                parts.append(d[\"t\"])\n        assert \"\".join(parts) == \"a\\tb\"\n\n    def test_escaped_quote(self) -> None:\n        deltas = _all_deltas(['{\"q\": \"say \\\\\"hi\\\\\"\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"q\" in d and isinstance(d[\"q\"], str):\n                parts.append(d[\"q\"])\n        assert \"\".join(parts) == 'say \"hi\"'\n\n    def test_unicode_escape(self) -> None:\n        deltas = _all_deltas(['{\"u\": \"\\\\u0041\\\\u0042\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"u\" in d and isinstance(d[\"u\"], str):\n                parts.append(d[\"u\"])\n        assert \"\".join(parts) == \"AB\"\n\n    def test_escape_split_across_chunks(self) -> None:\n        deltas = _all_deltas(['{\"x\": \"a\\\\', 'nb\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"x\" in d and isinstance(d[\"x\"], str):\n                parts.append(d[\"x\"])\n        assert \"\".join(parts) == \"a\\nb\"\n\n    def test_unicode_escape_split_across_chunks(self) -> None:\n        deltas = _all_deltas(['{\"u\": \"\\\\u00', '41\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"u\" in d and isinstance(d[\"u\"], str):\n                parts.append(d[\"u\"])\n        assert \"\".join(parts) == \"A\"\n\n    def test_backslash_escape(self) -> None:\n        deltas = _all_deltas(['{\"p\": \"c:\\\\\\\\dir\"}'])\n        parts = []\n        for d in deltas:\n            if isinstance(d, dict) and \"p\" in d and isinstance(d[\"p\"], str):\n                parts.append(d[\"p\"])\n        assert \"\".join(parts) == \"c:\\\\dir\"\n\n\nclass TestNestedStructures:\n    \"\"\"Nested objects and arrays produce correct deltas.\"\"\"\n\n    def test_nested_object(self) -> None:\n        deltas = _all_deltas(['{\"outer\": {\"inner\": \"val\"}}'])\n        found = False\n        for d in deltas:\n            if isinstance(d, dict) and \"outer\" in d:\n                outer = d[\"outer\"]\n                if isinstance(outer, dict) and \"inner\" in outer:\n                    found = True\n        assert found\n\n    def test_array_of_strings(self) -> None:\n        deltas = _all_deltas(['[\"a', '\", \"b\"]'])\n        all_items: list[str] = []\n        for d in deltas:\n            if isinstance(d, list):\n                for item in d:\n                    if isinstance(item, str):\n                        all_items.append(item)\n            elif isinstance(d, str):\n                all_items.append(d)\n        joined = \"\".join(all_items)\n        assert \"a\" in joined\n        assert \"b\" in joined\n\n    def test_object_with_number_and_bool(self) -> None:\n        deltas = _all_deltas(['{\"count\": 42, \"active\": true}'])\n        has_count = False\n        has_active = False\n        for d in deltas:\n            if isinstance(d, dict):\n                if \"count\" in d and d[\"count\"] == 42.0:\n                    has_count = True\n                if \"active\" in d and d[\"active\"] is True:\n                    has_active = True\n        assert has_count\n        assert has_active\n\n    def test_object_with_null_value(self) -> None:\n        deltas = _all_deltas(['{\"key\": null}'])\n        found = False\n        for d in deltas:\n            if isinstance(d, dict) and \"key\" in d and d[\"key\"] is None:\n                found = True\n        assert found\n\n\nclass TestComputeDelta:\n    \"\"\"Direct tests for the _compute_delta static method.\"\"\"\n\n    def test_none_prev_returns_current(self) -> None:\n        assert Parser._compute_delta(None, {\"a\": \"b\"}) == {\"a\": \"b\"}\n\n    def test_string_delta(self) -> None:\n        assert Parser._compute_delta(\"hel\", \"hello\") == \"lo\"\n\n    def test_string_no_change(self) -> None:\n        assert Parser._compute_delta(\"same\", \"same\") is None\n\n    def test_dict_new_key(self) -> None:\n        assert Parser._compute_delta({\"a\": \"x\"}, {\"a\": \"x\", \"b\": \"y\"}) == {\"b\": \"y\"}\n\n    def test_dict_string_append(self) -> None:\n        assert Parser._compute_delta({\"code\": \"def\"}, {\"code\": \"def hello()\"}) == {\n            \"code\": \" hello()\"\n        }\n\n    def test_dict_no_change(self) -> None:\n        assert Parser._compute_delta({\"a\": 1}, {\"a\": 1}) is None\n\n    def test_list_new_items(self) -> None:\n        assert Parser._compute_delta([1, 2], [1, 2, 3]) == [3]\n\n    def test_list_last_item_updated(self) -> None:\n        assert Parser._compute_delta([\"a\"], [\"ab\"]) == [\"ab\"]\n\n    def test_list_no_change(self) -> None:\n        assert Parser._compute_delta([1, 2], [1, 2]) is None\n\n    def test_primitive_change(self) -> None:\n        assert Parser._compute_delta(1, 2) == 2\n\n    def test_primitive_no_change(self) -> None:\n        assert Parser._compute_delta(42, 42) is None\n\n\nclass TestParserLifecycle:\n    \"\"\"Edge cases around parser state and lifecycle.\"\"\"\n\n    def test_feed_after_finish_returns_empty(self) -> None:\n        parser = Parser()\n        parser.feed('{\"a\": 1}')\n        parser.finish()\n        assert parser.feed(\"more\") == []\n\n    def test_empty_feed_returns_empty(self) -> None:\n        parser = Parser()\n        assert parser.feed(\"\") == []\n\n    def test_whitespace_only_returns_empty(self) -> None:\n        parser = Parser()\n        assert parser.feed(\"   \") == []\n\n    def test_finish_with_trailing_whitespace(self) -> None:\n        parser = Parser()\n        # Trailing whitespace terminates the number, so feed() emits it\n        deltas = parser.feed(\"42  \")\n        assert 42.0 in deltas\n        parser.finish()  # Should not raise\n\n    def test_finish_with_trailing_content_raises(self) -> None:\n        parser = Parser()\n        # Feed a complete JSON value followed by non-whitespace in one chunk\n        parser.feed('{\"a\": 1} extra')\n        with pytest.raises(ValueError, match=\"Unexpected trailing\"):\n            parser.finish()\n\n    def test_finish_flushes_pending_number(self) -> None:\n        parser = Parser()\n        deltas = parser.feed(\"42\")\n        # Number has no terminator, so feed() can't emit it yet\n        assert deltas == []\n        final = parser.finish()\n        assert 42.0 in final\n\n\nclass TestToolCallSimulation:\n    \"\"\"Simulate the LLM tool-call streaming use case.\"\"\"\n\n    def test_python_tool_call_streaming(self) -> None:\n        full_json = json.dumps({\"code\": \"print('hello world')\"})\n        chunk_size = 5\n        chunks = [\n            full_json[i : i + chunk_size] for i in range(0, len(full_json), chunk_size)\n        ]\n\n        parser = Parser()\n        code_parts: list[str] = []\n        for chunk in chunks:\n            for delta in parser.feed(chunk):\n                if isinstance(delta, dict) and \"code\" in delta:\n                    val = delta[\"code\"]\n                    if isinstance(val, str):\n                        code_parts.append(val)\n        for delta in parser.finish():\n            if isinstance(delta, dict) and \"code\" in delta:\n                val = delta[\"code\"]\n                if isinstance(val, str):\n                    code_parts.append(val)\n        assert \"\".join(code_parts) == \"print('hello world')\"\n\n    def test_multi_arg_tool_call(self) -> None:\n        full = '{\"query\": \"search term\", \"num_results\": 5}'\n        chunks = [full[:15], full[15:30], full[30:]]\n\n        parser = Parser()\n        query_parts: list[str] = []\n        has_num_results = False\n        for chunk in chunks:\n            for delta in parser.feed(chunk):\n                if isinstance(delta, dict):\n                    if \"query\" in delta and isinstance(delta[\"query\"], str):\n                        query_parts.append(delta[\"query\"])\n                    if \"num_results\" in delta:\n                        has_num_results = True\n        for delta in parser.finish():\n            if isinstance(delta, dict):\n                if \"query\" in delta and isinstance(delta[\"query\"], str):\n                    query_parts.append(delta[\"query\"])\n                if \"num_results\" in delta:\n                    has_num_results = True\n        assert \"\".join(query_parts) == \"search term\"\n        assert has_num_results\n\n    def test_code_with_newlines_and_escapes(self) -> None:\n        code = 'def greet(name):\\n    print(f\"Hello, {name}!\")\\n    return True'\n        full = json.dumps({\"code\": code})\n        chunk_size = 8\n        chunks = [full[i : i + chunk_size] for i in range(0, len(full), chunk_size)]\n\n        parser = Parser()\n        code_parts: list[str] = []\n        for chunk in chunks:\n            for delta in parser.feed(chunk):\n                if isinstance(delta, dict) and \"code\" in delta:\n                    val = delta[\"code\"]\n                    if isinstance(val, str):\n                        code_parts.append(val)\n        for delta in parser.finish():\n            if isinstance(delta, dict) and \"code\" in delta:\n                val = delta[\"code\"]\n                if isinstance(val, str):\n                    code_parts.append(val)\n        assert \"\".join(code_parts) == code\n\n    def test_single_char_streaming(self) -> None:\n        full = '{\"key\": \"value\"}'\n        parser = Parser()\n        key_parts: list[str] = []\n        for ch in full:\n            for delta in parser.feed(ch):\n                if isinstance(delta, dict) and \"key\" in delta:\n                    val = delta[\"key\"]\n                    if isinstance(val, str):\n                        key_parts.append(val)\n        for delta in parser.finish():\n            if isinstance(delta, dict) and \"key\" in delta:\n                val = delta[\"key\"]\n                if isinstance(val, str):\n                    key_parts.append(val)\n        assert \"\".join(key_parts) == \"value\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_postgres_sanitization.py",
    "content": "from pytest import MonkeyPatch\n\nfrom onyx.access.models import ExternalAccess\nfrom onyx.connectors.models import BasicExpertInfo\nfrom onyx.connectors.models import Document\nfrom onyx.connectors.models import DocumentSource\nfrom onyx.connectors.models import HierarchyNode\nfrom onyx.connectors.models import IndexAttemptMetadata\nfrom onyx.connectors.models import TextSection\nfrom onyx.db.enums import HierarchyNodeType\nfrom onyx.indexing import indexing_pipeline\nfrom onyx.utils.postgres_sanitization import sanitize_document_for_postgres\nfrom onyx.utils.postgres_sanitization import sanitize_hierarchy_node_for_postgres\nfrom onyx.utils.postgres_sanitization import sanitize_json_like\nfrom onyx.utils.postgres_sanitization import sanitize_string\n\n\n# ---- sanitize_string tests ----\n\n\ndef test_sanitize_string_strips_nul_bytes() -> None:\n    assert sanitize_string(\"hello\\x00world\") == \"helloworld\"\n    assert sanitize_string(\"\\x00\\x00\\x00\") == \"\"\n    assert sanitize_string(\"clean\") == \"clean\"\n\n\ndef test_sanitize_string_strips_high_surrogates() -> None:\n    assert sanitize_string(\"before\\ud800after\") == \"beforeafter\"\n    assert sanitize_string(\"a\\udbffb\") == \"ab\"\n\n\ndef test_sanitize_string_strips_low_surrogates() -> None:\n    assert sanitize_string(\"before\\udc00after\") == \"beforeafter\"\n    assert sanitize_string(\"a\\udfffb\") == \"ab\"\n\n\ndef test_sanitize_string_strips_nul_and_surrogates_together() -> None:\n    assert sanitize_string(\"he\\x00llo\\ud800 wo\\udfffrld\\x00\") == \"hello world\"\n\n\ndef test_sanitize_string_preserves_valid_unicode() -> None:\n    assert sanitize_string(\"café ☕ 日本語 😀\") == \"café ☕ 日本語 😀\"\n\n\ndef test_sanitize_string_empty_input() -> None:\n    assert sanitize_string(\"\") == \"\"\n\n\n# ---- sanitize_json_like tests ----\n\n\ndef test_sanitize_json_like_handles_plain_string() -> None:\n    assert sanitize_json_like(\"he\\x00llo\\ud800\") == \"hello\"\n\n\ndef test_sanitize_json_like_handles_nested_dict() -> None:\n    dirty = {\n        \"ke\\x00y\": \"va\\ud800lue\",\n        \"nested\": {\"inne\\x00r\": \"de\\udfffep\"},\n    }\n    assert sanitize_json_like(dirty) == {\n        \"key\": \"value\",\n        \"nested\": {\"inner\": \"deep\"},\n    }\n\n\ndef test_sanitize_json_like_handles_list_with_surrogates() -> None:\n    dirty = [\"a\\x00\", \"b\\ud800\", {\"c\\udc00\": \"d\\udfff\"}]\n    assert sanitize_json_like(dirty) == [\"a\", \"b\", {\"c\": \"d\"}]\n\n\ndef test_sanitize_json_like_handles_tuple() -> None:\n    dirty = (\"a\\x00\", \"b\\ud800\")\n    assert sanitize_json_like(dirty) == (\"a\", \"b\")\n\n\ndef test_sanitize_json_like_passes_through_non_strings() -> None:\n    assert sanitize_json_like(42) == 42\n    assert sanitize_json_like(3.14) == 3.14\n    assert sanitize_json_like(True) is True\n    assert sanitize_json_like(None) is None\n\n\n# ---- sanitize_document_for_postgres tests ----\n\n\ndef test_sanitize_document_for_postgres_removes_nul_bytes() -> None:\n    document = Document(\n        id=\"doc\\x00-id\",\n        source=DocumentSource.FILE,\n        semantic_identifier=\"sem\\x00-id\",\n        title=\"ti\\x00tle\",\n        parent_hierarchy_raw_node_id=\"parent\\x00-id\",\n        sections=[TextSection(link=\"lin\\x00k\", text=\"te\\x00xt\")],\n        metadata={\"ke\\x00y\": \"va\\x00lue\", \"list\\x00key\": [\"a\\x00\", \"b\"]},\n        doc_metadata={\n            \"j\\x00son\": {\n                \"in\\x00ner\": \"va\\x00l\",\n                \"arr\": [\"x\\x00\", {\"dee\\x00p\": \"y\\x00\"}],\n            }\n        },\n        primary_owners=[BasicExpertInfo(display_name=\"Ali\\x00ce\", email=\"a\\x00@x.com\")],\n        secondary_owners=[BasicExpertInfo(first_name=\"Bo\\x00b\", last_name=\"Sm\\x00ith\")],\n        external_access=ExternalAccess(\n            external_user_emails={\"user\\x00@example.com\"},\n            external_user_group_ids={\"gro\\x00up-1\"},\n            is_public=False,\n        ),\n    )\n\n    sanitized = sanitize_document_for_postgres(document)\n\n    assert sanitized.id == \"doc-id\"\n    assert sanitized.semantic_identifier == \"sem-id\"\n    assert sanitized.title == \"title\"\n    assert sanitized.parent_hierarchy_raw_node_id == \"parent-id\"\n    assert sanitized.sections[0].link == \"link\"\n    assert sanitized.sections[0].text == \"text\"\n    assert sanitized.metadata == {\"key\": \"value\", \"listkey\": [\"a\", \"b\"]}\n    assert sanitized.doc_metadata == {\n        \"json\": {\"inner\": \"val\", \"arr\": [\"x\", {\"deep\": \"y\"}]}\n    }\n    assert sanitized.primary_owners is not None\n    assert sanitized.primary_owners[0].display_name == \"Alice\"\n    assert sanitized.primary_owners[0].email == \"a@x.com\"\n    assert sanitized.secondary_owners is not None\n    assert sanitized.secondary_owners[0].first_name == \"Bob\"\n    assert sanitized.secondary_owners[0].last_name == \"Smith\"\n    assert sanitized.external_access is not None\n    assert sanitized.external_access.external_user_emails == {\"user@example.com\"}\n    assert sanitized.external_access.external_user_group_ids == {\"group-1\"}\n\n    # Ensure original document is not mutated\n    assert document.id == \"doc\\x00-id\"\n    assert document.metadata == {\"ke\\x00y\": \"va\\x00lue\", \"list\\x00key\": [\"a\\x00\", \"b\"]}\n\n\ndef test_sanitize_hierarchy_node_for_postgres_removes_nul_bytes() -> None:\n    node = HierarchyNode(\n        raw_node_id=\"raw\\x00-id\",\n        raw_parent_id=\"paren\\x00t-id\",\n        display_name=\"fol\\x00der\",\n        link=\"https://exa\\x00mple.com\",\n        node_type=HierarchyNodeType.FOLDER,\n        external_access=ExternalAccess(\n            external_user_emails={\"a\\x00@example.com\"},\n            external_user_group_ids={\"g\\x00-1\"},\n            is_public=True,\n        ),\n    )\n\n    sanitized = sanitize_hierarchy_node_for_postgres(node)\n\n    assert sanitized.raw_node_id == \"raw-id\"\n    assert sanitized.raw_parent_id == \"parent-id\"\n    assert sanitized.display_name == \"folder\"\n    assert sanitized.link == \"https://example.com\"\n    assert sanitized.external_access is not None\n    assert sanitized.external_access.external_user_emails == {\"a@example.com\"}\n    assert sanitized.external_access.external_user_group_ids == {\"g-1\"}\n\n\ndef test_index_doc_batch_prepare_sanitizes_before_db_ops(\n    monkeypatch: MonkeyPatch,\n) -> None:\n    document = Document(\n        id=\"doc\\x00id\",\n        source=DocumentSource.FILE,\n        semantic_identifier=\"sem\\x00id\",\n        sections=[TextSection(text=\"content\", link=\"li\\x00nk\")],\n        metadata={\"ke\\x00y\": \"va\\x00lue\"},\n    )\n\n    captured: dict[str, object] = {}\n\n    def _get_documents_by_ids(db_session: object, document_ids: list[str]) -> list:\n        _ = db_session, document_ids\n        return []\n\n    monkeypatch.setattr(\n        indexing_pipeline, \"get_documents_by_ids\", _get_documents_by_ids\n    )\n\n    def _capture_upsert_documents_in_db(**kwargs: object) -> None:\n        captured[\"upsert_documents\"] = kwargs[\"documents\"]\n\n    monkeypatch.setattr(\n        indexing_pipeline, \"_upsert_documents_in_db\", _capture_upsert_documents_in_db\n    )\n\n    def _capture_doc_cc_pair(*args: object) -> None:\n        captured[\"cc_pair_doc_ids\"] = args[3]\n\n    monkeypatch.setattr(\n        indexing_pipeline,\n        \"upsert_document_by_connector_credential_pair\",\n        _capture_doc_cc_pair,\n    )\n\n    def _noop_link_hierarchy_nodes_to_documents(\n        db_session: object,\n        document_ids: list[str],\n        source: DocumentSource,\n        commit: bool,\n    ) -> int:\n        _ = db_session, document_ids, source, commit\n        return 0\n\n    monkeypatch.setattr(\n        indexing_pipeline,\n        \"link_hierarchy_nodes_to_documents\",\n        _noop_link_hierarchy_nodes_to_documents,\n    )\n\n    context = indexing_pipeline.index_doc_batch_prepare(\n        documents=[document],\n        index_attempt_metadata=IndexAttemptMetadata(connector_id=1, credential_id=2),\n        db_session=object(),  # type: ignore[arg-type]\n        ignore_time_skip=True,\n    )\n\n    assert context is not None\n    assert context.updatable_docs[0].id == \"docid\"\n    assert context.updatable_docs[0].semantic_identifier == \"semid\"\n    assert context.updatable_docs[0].metadata == {\"key\": \"value\"}\n    assert captured[\"cc_pair_doc_ids\"] == [\"docid\"]\n\n    upsert_documents = captured[\"upsert_documents\"]\n    assert isinstance(upsert_documents, list)\n    assert upsert_documents[0].id == \"docid\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_sensitive.py",
    "content": "\"\"\"Tests for SensitiveValue wrapper class.\"\"\"\n\nimport json\nfrom typing import Any\n\nimport pytest\n\nfrom onyx.utils.sensitive import SensitiveAccessError\nfrom onyx.utils.sensitive import SensitiveValue\n\n\ndef _encrypt_string(value: str) -> bytes:\n    \"\"\"Simple mock encryption (just encoding for tests).\"\"\"\n    return value.encode(\"utf-8\")\n\n\ndef _decrypt_string(value: bytes) -> str:\n    \"\"\"Simple mock decryption (just decoding for tests).\"\"\"\n    return value.decode(\"utf-8\")\n\n\nclass TestSensitiveValueString:\n    \"\"\"Tests for SensitiveValue with string values.\"\"\"\n\n    def test_get_value_raw(self) -> None:\n        \"\"\"Test getting raw unmasked value.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"my-secret-token\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        assert sensitive.get_value(apply_mask=False) == \"my-secret-token\"\n\n    def test_get_value_masked(self) -> None:\n        \"\"\"Test getting masked value with default masking.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"my-very-long-secret-token-here\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        result = sensitive.get_value(apply_mask=True)\n        # Default mask_string shows first 4 and last 4 chars\n        assert result == \"my-v...here\"\n\n    def test_get_value_masked_short_string(self) -> None:\n        \"\"\"Test that short strings are fully masked.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"short\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        result = sensitive.get_value(apply_mask=True)\n        # Short strings get fully masked\n        assert result == \"••••••••••••\"\n\n    def test_get_value_custom_mask_fn(self) -> None:\n        \"\"\"Test using a custom masking function.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        result = sensitive.get_value(\n            apply_mask=True,\n            mask_fn=lambda x: \"REDACTED\",  # noqa: ARG005\n        )\n        assert result == \"REDACTED\"\n\n    def test_str_raises_error(self) -> None:\n        \"\"\"Test that str() raises SensitiveAccessError.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        with pytest.raises(SensitiveAccessError):\n            str(sensitive)\n\n    def test_repr_is_safe(self) -> None:\n        \"\"\"Test that repr() doesn't expose the value.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        result = repr(sensitive)\n        assert \"secret\" not in result\n        assert \"SensitiveValue\" in result\n        assert \"get_value\" in result\n\n    def test_iter_raises_error(self) -> None:\n        \"\"\"Test that iteration raises SensitiveAccessError.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        with pytest.raises(SensitiveAccessError):\n            for _ in sensitive:  # type: ignore[attr-defined]\n                pass\n\n    def test_getitem_raises_error(self) -> None:\n        \"\"\"Test that subscript access raises SensitiveAccessError.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        with pytest.raises(SensitiveAccessError):\n            _ = sensitive[0]\n\n    def test_bool_returns_true(self) -> None:\n        \"\"\"Test that bool() works for truthiness checks.\"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        assert bool(sensitive) is True\n\n    def test_equality_with_same_value(self) -> None:\n        \"\"\"Test equality comparison between SensitiveValues with same encrypted bytes.\"\"\"\n        encrypted = _encrypt_string(\"secret\")\n        sensitive1 = SensitiveValue(\n            encrypted_bytes=encrypted,\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        sensitive2 = SensitiveValue(\n            encrypted_bytes=encrypted,\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        assert sensitive1 == sensitive2\n\n    def test_equality_with_different_value(self) -> None:\n        \"\"\"Test equality comparison between SensitiveValues with different encrypted bytes.\"\"\"\n        sensitive1 = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret1\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        sensitive2 = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret2\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        assert sensitive1 != sensitive2\n\n    def test_equality_with_non_sensitive_returns_not_equal(self) -> None:\n        \"\"\"Test that comparing with non-SensitiveValue is always not-equal.\n\n        Returns NotImplemented so Python falls back to identity comparison.\n        This is required for compatibility with SQLAlchemy's attribute tracking.\n        \"\"\"\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=_decrypt_string,\n            is_json=False,\n        )\n        assert not (sensitive == \"secret\")\n\n\nclass TestSensitiveValueJson:\n    \"\"\"Tests for SensitiveValue with JSON/dict values.\"\"\"\n\n    def test_get_value_raw_dict(self) -> None:\n        \"\"\"Test getting raw unmasked dict value.\"\"\"\n        data: dict[str, Any] = {\"api_key\": \"secret-key\", \"username\": \"user123\"}\n        sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(\n            encrypted_bytes=_encrypt_string(json.dumps(data)),\n            decrypt_fn=_decrypt_string,\n            is_json=True,\n        )\n        result = sensitive.get_value(apply_mask=False)\n        assert result == data\n\n    def test_get_value_masked_dict(self) -> None:\n        \"\"\"Test getting masked dict value with default masking.\"\"\"\n        data = {\"api_key\": \"my-very-long-api-key-value\", \"username\": \"user123456789\"}\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(json.dumps(data)),\n            decrypt_fn=_decrypt_string,\n            is_json=True,\n        )\n        result = sensitive.get_value(apply_mask=True)\n        # Values should be masked\n        assert \"my-very-long-api-key-value\" not in str(result)\n        assert \"user123456789\" not in str(result)\n\n    def test_getitem_raises_error_for_dict(self) -> None:\n        \"\"\"Test that subscript access raises SensitiveAccessError for dict.\"\"\"\n        data = {\"api_key\": \"secret\"}\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(json.dumps(data)),\n            decrypt_fn=_decrypt_string,\n            is_json=True,\n        )\n        with pytest.raises(SensitiveAccessError):\n            _ = sensitive[\"api_key\"]\n\n    def test_iter_raises_error_for_dict(self) -> None:\n        \"\"\"Test that iteration raises SensitiveAccessError for dict.\"\"\"\n        data = {\"api_key\": \"secret\"}\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(json.dumps(data)),\n            decrypt_fn=_decrypt_string,\n            is_json=True,\n        )\n        with pytest.raises(SensitiveAccessError):\n            for _ in sensitive:  # type: ignore[attr-defined]\n                pass\n\n\nclass TestSensitiveValueCaching:\n    \"\"\"Tests for lazy decryption caching.\"\"\"\n\n    def test_decryption_is_cached(self) -> None:\n        \"\"\"Test that decryption result is cached.\"\"\"\n        decrypt_count = [0]\n\n        def counting_decrypt(value: bytes) -> str:\n            decrypt_count[0] += 1\n            return value.decode(\"utf-8\")\n\n        sensitive = SensitiveValue(\n            encrypted_bytes=_encrypt_string(\"secret\"),\n            decrypt_fn=counting_decrypt,\n            is_json=False,\n        )\n\n        # First access\n        sensitive.get_value(apply_mask=False)\n        assert decrypt_count[0] == 1\n\n        # Second access should use cached value\n        sensitive.get_value(apply_mask=False)\n        assert decrypt_count[0] == 1\n\n        # Masked access should also use cached value\n        sensitive.get_value(apply_mask=True)\n        assert decrypt_count[0] == 1\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_sensitive_typing.py",
    "content": "\"\"\"\nTests demonstrating static type checking for SensitiveValue.\n\nRun with: mypy tests/unit/onyx/utils/test_sensitive_typing.py --ignore-missing-imports\n\nThese tests show what mypy will catch when SensitiveValue is misused.\n\"\"\"\n\nfrom typing import Any\n\n# This file demonstrates what mypy will catch.\n# The commented-out code below would produce type errors.\n\n\ndef demonstrate_correct_usage() -> None:\n    \"\"\"Shows correct patterns that pass type checking.\"\"\"\n    from onyx.utils.sensitive import SensitiveValue\n    from onyx.utils.encryption import encrypt_string_to_bytes, decrypt_bytes_to_string\n\n    # Create a SensitiveValue\n    encrypted = encrypt_string_to_bytes('{\"api_key\": \"secret\"}')\n    sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(\n        encrypted_bytes=encrypted,\n        decrypt_fn=decrypt_bytes_to_string,\n        is_json=True,\n    )\n\n    # CORRECT: Using get_value() to access the value\n    raw_dict: dict[str, Any] = sensitive.get_value(apply_mask=False)\n    assert raw_dict[\"api_key\"] == \"secret\"\n\n    masked_dict: dict[str, Any] = sensitive.get_value(apply_mask=True)\n    assert \"secret\" not in str(masked_dict)\n\n    # CORRECT: Using bool for truthiness\n    if sensitive:\n        print(\"Value exists\")\n\n\n# The code below demonstrates what mypy would catch.\n# Uncomment to see the type errors.\n\"\"\"\ndef demonstrate_incorrect_usage() -> None:\n    '''Shows patterns that mypy will flag as errors.'''\n    from onyx.utils.sensitive import SensitiveValue\n    from onyx.utils.encryption import encrypt_string_to_bytes, decrypt_bytes_to_string\n\n    encrypted = encrypt_string_to_bytes('{\"api_key\": \"secret\"}')\n    sensitive: SensitiveValue[dict[str, Any]] = SensitiveValue(\n        encrypted_bytes=encrypted,\n        decrypt_fn=decrypt_bytes_to_string,\n        is_json=True,\n    )\n\n    # ERROR: SensitiveValue doesn't support subscript access\n    # mypy error: Value of type \"SensitiveValue[dict[str, Any]]\" is not indexable\n    api_key = sensitive[\"api_key\"]\n\n    # ERROR: SensitiveValue doesn't support iteration\n    # mypy error: \"SensitiveValue[dict[str, Any]]\" has no attribute \"__iter__\"\n    for key in sensitive:\n        print(key)\n\n    # ERROR: Can't pass SensitiveValue where dict is expected\n    # mypy error: Argument 1 has incompatible type \"SensitiveValue[dict[str, Any]]\"; expected \"dict[str, Any]\"\n    def process_dict(d: dict[str, Any]) -> None:\n        pass\n    process_dict(sensitive)\n\n    # ERROR: Can't use .get() on SensitiveValue\n    # mypy error: \"SensitiveValue[dict[str, Any]]\" has no attribute \"get\"\n    value = sensitive.get(\"api_key\")\n\"\"\"\n\n\ndef test_correct_usage_passes() -> None:\n    \"\"\"This test runs the correct usage demonstration.\"\"\"\n    demonstrate_correct_usage()\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_telemetry.py",
    "content": "from typing import Any\nfrom unittest.mock import Mock\n\nfrom onyx.configs.constants import MilestoneRecordType\nfrom onyx.utils import telemetry as telemetry_utils\n\n\ndef test_mt_cloud_telemetry_noop_when_not_multi_tenant(monkeypatch: Any) -> None:\n    fetch_impl = Mock()\n    monkeypatch.setattr(\n        telemetry_utils,\n        \"fetch_versioned_implementation_with_fallback\",\n        fetch_impl,\n    )\n    # mt_cloud_telemetry reads the module-local imported symbol, so patch this path.\n    monkeypatch.setattr(\"onyx.utils.telemetry.MULTI_TENANT\", False)\n\n    telemetry_utils.mt_cloud_telemetry(\n        tenant_id=\"tenant-1\",\n        distinct_id=\"12345678-1234-1234-1234-123456789abc\",\n        event=MilestoneRecordType.USER_MESSAGE_SENT,\n        properties={\"origin\": \"web\"},\n    )\n\n    fetch_impl.assert_not_called()\n\n\ndef test_mt_cloud_telemetry_calls_event_telemetry_when_multi_tenant(\n    monkeypatch: Any,\n) -> None:\n    event_telemetry = Mock()\n    fetch_impl = Mock(return_value=event_telemetry)\n    monkeypatch.setattr(\n        telemetry_utils,\n        \"fetch_versioned_implementation_with_fallback\",\n        fetch_impl,\n    )\n    # mt_cloud_telemetry reads the module-local imported symbol, so patch this path.\n    monkeypatch.setattr(\"onyx.utils.telemetry.MULTI_TENANT\", True)\n\n    telemetry_utils.mt_cloud_telemetry(\n        tenant_id=\"tenant-1\",\n        distinct_id=\"12345678-1234-1234-1234-123456789abc\",\n        event=MilestoneRecordType.USER_MESSAGE_SENT,\n        properties={\"origin\": \"web\"},\n    )\n\n    fetch_impl.assert_called_once_with(\n        module=\"onyx.utils.telemetry\",\n        attribute=\"event_telemetry\",\n        fallback=telemetry_utils.noop_fallback,\n    )\n    event_telemetry.assert_called_once_with(\n        \"12345678-1234-1234-1234-123456789abc\",\n        MilestoneRecordType.USER_MESSAGE_SENT,\n        {\"origin\": \"web\", \"tenant_id\": \"tenant-1\"},\n    )\n\n\ndef test_mt_cloud_identify_noop_when_not_multi_tenant(monkeypatch: Any) -> None:\n    fetch_impl = Mock()\n    monkeypatch.setattr(\n        telemetry_utils,\n        \"fetch_versioned_implementation_with_fallback\",\n        fetch_impl,\n    )\n    monkeypatch.setattr(\"onyx.utils.telemetry.MULTI_TENANT\", False)\n\n    telemetry_utils.mt_cloud_identify(\n        distinct_id=\"12345678-1234-1234-1234-123456789abc\",\n        properties={\"email\": \"user@example.com\"},\n    )\n\n    fetch_impl.assert_not_called()\n\n\ndef test_mt_cloud_identify_calls_identify_user_when_multi_tenant(\n    monkeypatch: Any,\n) -> None:\n    identify_user = Mock()\n    fetch_impl = Mock(return_value=identify_user)\n    monkeypatch.setattr(\n        telemetry_utils,\n        \"fetch_versioned_implementation_with_fallback\",\n        fetch_impl,\n    )\n    monkeypatch.setattr(\"onyx.utils.telemetry.MULTI_TENANT\", True)\n\n    telemetry_utils.mt_cloud_identify(\n        distinct_id=\"12345678-1234-1234-1234-123456789abc\",\n        properties={\"email\": \"user@example.com\"},\n    )\n\n    fetch_impl.assert_called_once_with(\n        module=\"onyx.utils.telemetry\",\n        attribute=\"identify_user\",\n        fallback=telemetry_utils.noop_fallback,\n    )\n    identify_user.assert_called_once_with(\n        \"12345678-1234-1234-1234-123456789abc\",\n        {\"email\": \"user@example.com\"},\n    )\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_threadpool_concurrency.py",
    "content": "import contextvars\nimport threading\nimport time\nfrom collections.abc import Generator\nfrom collections.abc import Iterator\nfrom concurrent.futures import ThreadPoolExecutor\n\nimport pytest\n\nfrom onyx.utils.threadpool_concurrency import parallel_yield\nfrom onyx.utils.threadpool_concurrency import run_in_background\nfrom onyx.utils.threadpool_concurrency import run_with_timeout\nfrom onyx.utils.threadpool_concurrency import ThreadSafeDict\nfrom onyx.utils.threadpool_concurrency import wait_on_background\n\n# Create a context variable for testing\ntest_context_var = contextvars.ContextVar(\"test_var\", default=\"default\")\n\n\ndef test_run_with_timeout_completes() -> None:\n    \"\"\"Test that a function that completes within timeout works correctly\"\"\"\n\n    def quick_function(x: int) -> int:\n        return x * 2\n\n    result = run_with_timeout(1.0, quick_function, x=21)\n    assert result == 42\n\n\n@pytest.mark.parametrize(\"slow,timeout\", [(1, 0.1), (0.3, 0.2)])\ndef test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None:\n    \"\"\"Test that a function that exceeds timeout raises TimeoutError\"\"\"\n\n    def slow_function() -> None:\n        time.sleep(slow)\n\n    start = time.monotonic()\n    with pytest.raises(TimeoutError) as exc_info:\n        run_with_timeout(timeout, slow_function)\n    elapsed = time.monotonic() - start\n\n    assert f\"timed out after {timeout} seconds\" in str(exc_info.value)\n    assert elapsed >= timeout\n    # Should return around the timeout duration, not the full sleep duration\n    assert elapsed == pytest.approx(timeout, abs=0.8)\n\n\n@pytest.mark.filterwarnings(\"ignore::pytest.PytestUnhandledThreadExceptionWarning\")\ndef test_run_with_timeout_propagates_exceptions() -> None:\n    \"\"\"Test that other exceptions from the function are propagated properly\"\"\"\n\n    def error_function() -> None:\n        raise ValueError(\"Test error\")\n\n    with pytest.raises(ValueError) as exc_info:\n        run_with_timeout(1.0, error_function)\n\n    assert \"Test error\" in str(exc_info.value)\n\n\ndef test_run_with_timeout_with_args_and_kwargs() -> None:\n    \"\"\"Test that args and kwargs are properly passed to the function\"\"\"\n\n    def complex_function(x: int, y: int, multiply: bool = False) -> int:\n        if multiply:\n            return x * y\n        return x + y\n\n    # Test with just positional args\n    result1 = run_with_timeout(1.0, complex_function, x=5, y=3)\n    assert result1 == 8\n\n    # Test with positional and keyword args\n    result2 = run_with_timeout(1.0, complex_function, x=5, y=3, multiply=True)\n    assert result2 == 15\n\n\ndef test_run_in_background_and_wait_success() -> None:\n    \"\"\"Test that run_in_background and wait_on_background work correctly for successful execution\"\"\"\n\n    def background_function(x: int) -> int:\n        time.sleep(0.1)  # Small delay to ensure it's actually running in background\n        return x * 2\n\n    # Start the background task\n    task = run_in_background(background_function, 21)\n\n    # Verify we can do other work while task is running\n    start_time = time.time()\n    result = wait_on_background(task)\n    elapsed = time.time() - start_time\n\n    assert result == 42\n    # sometimes slightly flaky\n    assert elapsed >= 0.095  # Verify we actually waited for the sleep\n\n\n@pytest.mark.filterwarnings(\"ignore::pytest.PytestUnhandledThreadExceptionWarning\")\ndef test_run_in_background_propagates_exceptions() -> None:\n    \"\"\"Test that exceptions in background tasks are properly propagated\"\"\"\n\n    def error_function() -> None:\n        time.sleep(0.1)  # Small delay to ensure it's actually running in background\n        raise ValueError(\"Test background error\")\n\n    task = run_in_background(error_function)\n\n    with pytest.raises(ValueError) as exc_info:\n        wait_on_background(task)\n\n    assert \"Test background error\" in str(exc_info.value)\n\n\ndef test_run_in_background_with_args_and_kwargs() -> None:\n    \"\"\"Test that args and kwargs are properly passed to the background function\"\"\"\n\n    def complex_function(x: int, y: int, multiply: bool = False) -> int:\n        time.sleep(0.1)  # Small delay to ensure it's actually running in background\n        if multiply:\n            return x * y\n        return x + y\n\n    # Test with args\n    task1 = run_in_background(complex_function, 5, 3)\n    result1 = wait_on_background(task1)\n    assert result1 == 8\n\n    # Test with args and kwargs\n    task2 = run_in_background(complex_function, 5, 3, multiply=True)\n    result2 = wait_on_background(task2)\n    assert result2 == 15\n\n\ndef test_multiple_background_tasks() -> None:\n    \"\"\"Test running multiple background tasks concurrently\"\"\"\n\n    def slow_add(x: int, y: int) -> int:\n        time.sleep(0.2)  # Make each task take some time\n        return x + y\n\n    # Start multiple tasks\n    start_time = time.time()\n    task1 = run_in_background(slow_add, 1, 2)\n    task2 = run_in_background(slow_add, 3, 4)\n    task3 = run_in_background(slow_add, 5, 6)\n\n    # Wait for all results\n    result1 = wait_on_background(task1)\n    result2 = wait_on_background(task2)\n    result3 = wait_on_background(task3)\n    elapsed = time.time() - start_time\n\n    # Verify results\n    assert result1 == 3\n    assert result2 == 7\n    assert result3 == 11\n\n    # Verify tasks ran in parallel (total time should be ~0.2s, not ~0.6s)\n    assert 0.2 <= elapsed < 0.4  # Allow some buffer for test environment variations\n\n\ndef test_thread_safe_dict_basic_operations() -> None:\n    \"\"\"Test basic operations of ThreadSafeDict\"\"\"\n    d = ThreadSafeDict[str, int]()\n\n    # Test setting and getting\n    d[\"a\"] = 1\n    assert d[\"a\"] == 1\n\n    # Test get with default\n    assert d.get(\"a\", None) == 1\n    assert d.get(\"b\", 2) == 2\n\n    # Test deletion\n    del d[\"a\"]\n    assert \"a\" not in d\n\n    # Test length\n    d[\"x\"] = 10\n    d[\"y\"] = 20\n    assert len(d) == 2\n\n    # Test iteration\n    keys = sorted(d.keys())\n    assert keys == [\"x\", \"y\"]\n\n    # Test items and values\n    assert dict(d.items()) == {\"x\": 10, \"y\": 20}\n    assert sorted(d.values()) == [10, 20]\n\n\ndef test_thread_safe_dict_concurrent_access() -> None:\n    \"\"\"Test ThreadSafeDict with concurrent access from multiple threads\"\"\"\n    d = ThreadSafeDict[str, int]()\n    num_threads = 10\n    iterations = 1000\n\n    def increment_values() -> None:\n        for i in range(iterations):\n            key = str(i % 5)  # Use 5 different keys\n            # Get current value or 0 if not exists, increment, then store\n            d.atomic_get_set(key, lambda x: x + 1, 0)\n\n    # Create and start threads\n    threads = []\n    for _ in range(num_threads):\n        t = threading.Thread(target=increment_values)\n        threads.append(t)\n        t.start()\n\n    # Wait for all threads to complete\n    for t in threads:\n        t.join()\n\n    # Verify results\n    # Each key should have been incremented (num_threads * iterations) / 5 times\n    expected_value = (num_threads * iterations) // 5\n    for i in range(5):\n        assert d[str(i)] == expected_value\n\n\ndef test_thread_safe_dict_bulk_operations() -> None:\n    \"\"\"Test bulk operations of ThreadSafeDict\"\"\"\n    d = ThreadSafeDict[str, int]()\n\n    # Test update with dict\n    d.update({\"a\": 1, \"b\": 2})\n    assert dict(d.items()) == {\"a\": 1, \"b\": 2}\n\n    # Test update with kwargs\n    d.update(c=3, d=4)\n    assert dict(d.items()) == {\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 4}\n\n    # Test clear\n    d.clear()\n    assert len(d) == 0\n\n\ndef test_thread_safe_dict_concurrent_bulk_operations() -> None:\n    \"\"\"Test ThreadSafeDict with concurrent bulk operations\"\"\"\n    d = ThreadSafeDict[str, int]()\n    num_threads = 5\n\n    def bulk_update(start: int) -> None:\n        # Each thread updates with its own range of numbers\n        updates = {str(i): i for i in range(start, start + 20)}\n        d.update(updates)\n        time.sleep(0.01)  # Add some delay to increase chance of thread overlap\n\n    # Run updates concurrently\n    with ThreadPoolExecutor(max_workers=num_threads) as executor:\n        futures = [executor.submit(bulk_update, i * 20) for i in range(num_threads)]\n        for future in futures:\n            future.result()\n\n    # Verify results\n    assert len(d) == num_threads * 20\n    # Verify all numbers from 0 to (num_threads * 20) are present\n    for i in range(num_threads * 20):\n        assert d[str(i)] == i\n\n\ndef test_thread_safe_dict_atomic_operations() -> None:\n    \"\"\"Test atomic operations with ThreadSafeDict's lock\"\"\"\n    d = ThreadSafeDict[str, list[int]]()\n    d[\"numbers\"] = []\n\n    def append_numbers(start: int) -> None:\n        numbers = d[\"numbers\"]\n        with d.lock:\n            for i in range(start, start + 5):\n                numbers.append(i)\n                time.sleep(0.001)  # Add delay to increase chance of thread overlap\n        d[\"numbers\"] = numbers\n\n    # Run concurrent append operations\n    threads = []\n    for i in range(4):  # 4 threads, each adding 5 numbers\n        t = threading.Thread(target=append_numbers, args=(i * 5,))\n        threads.append(t)\n        t.start()\n\n    for t in threads:\n        t.join()\n\n    # Verify results\n    numbers = d[\"numbers\"]\n    assert len(numbers) == 20  # 4 threads * 5 numbers each\n    assert sorted(numbers) == list(range(20))  # All numbers 0-19 should be present\n\n\ndef test_parallel_yield_basic() -> None:\n    \"\"\"Test that parallel_yield correctly yields values from multiple generators.\"\"\"\n\n    def make_gen(values: list[int], delay: float) -> Generator[int, None, None]:\n        for v in values:\n            time.sleep(delay)\n            yield v\n\n    # Create generators with different delays\n    gen1 = make_gen([1, 4, 7], 0.1)  # Slower generator\n    gen2 = make_gen([2, 5, 8], 0.05)  # Faster generator\n    gen3 = make_gen([3, 6, 9], 0.15)  # Slowest generator\n\n    # Collect results with timestamps\n    results: list[tuple[float, int]] = []\n    start_time = time.time()\n\n    for value in parallel_yield([gen1, gen2, gen3]):\n        results.append((time.time() - start_time, value))\n\n    # Verify all values were yielded\n    assert sorted(v for _, v in results) == list(range(1, 10))\n\n    # Verify that faster generators yielded earlier\n    # Group results by generator (values 1,4,7 are gen1, 2,5,8 are gen2, 3,6,9 are gen3)\n    gen1_times = [t for t, v in results if v in (1, 4, 7)]\n    gen2_times = [t for t, v in results if v in (2, 5, 8)]\n    gen3_times = [t for t, v in results if v in (3, 6, 9)]\n\n    # Average times for each generator\n    avg_gen1 = sum(gen1_times) / len(gen1_times)\n    avg_gen2 = sum(gen2_times) / len(gen2_times)\n    avg_gen3 = sum(gen3_times) / len(gen3_times)\n\n    # Verify gen2 (fastest) has lowest average time\n    assert avg_gen2 < avg_gen1\n    assert avg_gen2 < avg_gen3\n\n\ndef test_parallel_yield_empty_generators() -> None:\n    \"\"\"Test parallel_yield with empty generators.\"\"\"\n\n    def empty_gen() -> Iterator[int]:\n        if False:\n            yield 0  # Makes this a generator function\n\n    gens = [empty_gen() for _ in range(3)]\n    results = list(parallel_yield(gens))\n    assert len(results) == 0\n\n\ndef test_parallel_yield_different_lengths() -> None:\n    \"\"\"Test parallel_yield with generators of different lengths.\"\"\"\n\n    def make_gen(count: int) -> Iterator[int]:\n        for i in range(count):\n            yield i\n            time.sleep(0.01)  # Small delay to ensure concurrent execution\n\n    gens = [\n        make_gen(1),  # Yields: [0]\n        make_gen(3),  # Yields: [0, 1, 2]\n        make_gen(2),  # Yields: [0, 1]\n    ]\n\n    results = list(parallel_yield(gens))\n    assert len(results) == 6  # Total number of items from all generators\n    assert sorted(results) == [0, 0, 0, 1, 1, 2]\n\n\ndef test_parallel_yield_exception_handling() -> None:\n    \"\"\"Test parallel_yield handles exceptions in generators properly.\"\"\"\n\n    def failing_gen() -> Iterator[int]:\n        yield 1\n        raise ValueError(\"Generator failure\")\n\n    def normal_gen() -> Iterator[int]:\n        yield 2\n        yield 3\n\n    gens = [failing_gen(), normal_gen()]\n\n    with pytest.raises(ValueError, match=\"Generator failure\"):\n        list(parallel_yield(gens))\n\n\ndef test_parallel_yield_non_blocking() -> None:\n    \"\"\"Test parallel_yield with non-blocking generators (simple ranges).\"\"\"\n\n    def range_gen(start: int, end: int) -> Iterator[int]:\n        for i in range(start, end):\n            yield i\n\n    # Create three overlapping ranges\n    gens = [range_gen(0, 100), range_gen(100, 200), range_gen(200, 300)]\n\n    results = list(parallel_yield(gens))\n\n    # Verify no values are missing\n    assert len(results) == 300  # Should have all values from 0 to 299\n    assert sorted(results) == list(range(300))\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_threadpool_contextvars.py",
    "content": "import contextvars\nimport time\n\nfrom onyx.utils.threadpool_concurrency import FunctionCall\nfrom onyx.utils.threadpool_concurrency import run_functions_in_parallel\nfrom onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel\nfrom onyx.utils.threadpool_concurrency import run_in_background\nfrom onyx.utils.threadpool_concurrency import run_with_timeout\nfrom onyx.utils.threadpool_concurrency import wait_on_background\n\n# Create a test contextvar\ntest_var = contextvars.ContextVar(\"test_var\", default=\"default\")\n\n\ndef get_contextvar_value() -> str:\n    \"\"\"Helper function that runs in a thread and returns the contextvar value\"\"\"\n    # Add a small sleep to ensure we're actually running in a different thread\n    time.sleep(0.1)\n    return test_var.get()\n\n\ndef test_run_with_timeout_preserves_contextvar() -> None:\n    \"\"\"Test that run_with_timeout preserves contextvar values\"\"\"\n    # Set a value in the main thread\n    test_var.set(\"test_value\")\n\n    # Run function with timeout and verify the value is preserved\n    result = run_with_timeout(1.0, get_contextvar_value)\n    assert result == \"test_value\"\n\n\ndef test_run_functions_in_parallel_preserves_contextvar() -> None:\n    \"\"\"Test that run_functions_in_parallel preserves contextvar values\"\"\"\n    # Set a value in the main thread\n    test_var.set(\"parallel_test\")\n\n    # Create multiple function calls\n    function_calls = [\n        FunctionCall(get_contextvar_value),\n        FunctionCall(get_contextvar_value),\n    ]\n\n    # Run in parallel and verify all results have the correct value\n    results = run_functions_in_parallel(function_calls)\n\n    for result_id, value in results.items():\n        assert value == \"parallel_test\"\n\n\ndef test_run_functions_tuples_preserves_contextvar() -> None:\n    \"\"\"Test that run_functions_tuples_in_parallel preserves contextvar values\"\"\"\n    # Set a value in the main thread\n    test_var.set(\"tuple_test\")\n\n    # Create list of function tuples\n    functions_with_args = [\n        (get_contextvar_value, ()),\n        (get_contextvar_value, ()),\n    ]\n\n    # Run in parallel and verify all results have the correct value\n    results = run_functions_tuples_in_parallel(functions_with_args)\n\n    for result in results:\n        assert result == \"tuple_test\"\n\n\ndef test_nested_contextvar_modifications() -> None:\n    \"\"\"Test that modifications to contextvars in threads don't affect other threads\"\"\"\n\n    def modify_and_return_contextvar(new_value: str) -> tuple[str, str]:\n        \"\"\"Helper that modifies the contextvar and returns both values\"\"\"\n        original = test_var.get()\n        test_var.set(new_value)\n        time.sleep(0.1)  # Ensure threads overlap\n        return original, test_var.get()\n\n    # Set initial value\n    test_var.set(\"initial\")\n\n    # Run multiple functions that modify the contextvar\n    functions_with_args = [\n        (modify_and_return_contextvar, (\"thread1\",)),\n        (modify_and_return_contextvar, (\"thread2\",)),\n    ]\n\n    results = run_functions_tuples_in_parallel(functions_with_args)\n\n    # Verify each thread saw the initial value and its own modification\n    for original, modified in results:\n        assert original == \"initial\"  # Each thread should see the initial value\n        assert modified in [\n            \"thread1\",\n            \"thread2\",\n        ]  # Each thread should see its own modification\n\n    # Verify the main thread's value wasn't affected\n    assert test_var.get() == \"initial\"\n\n\ndef test_contextvar_isolation_between_runs() -> None:\n    \"\"\"Test that contextvar changes don't leak between separate parallel runs\"\"\"\n\n    def set_and_return_contextvar(value: str) -> str:\n        test_var.set(value)\n        return test_var.get()\n\n    # First run\n    test_var.set(\"first_run\")\n    first_results = run_functions_tuples_in_parallel(\n        [\n            (set_and_return_contextvar, (\"thread1\",)),\n            (set_and_return_contextvar, (\"thread2\",)),\n        ]\n    )\n\n    # Verify first run results\n    assert all(result in [\"thread1\", \"thread2\"] for result in first_results)\n\n    # Second run should still see the main thread's value\n    assert test_var.get() == \"first_run\"\n\n    # Second run with different value\n    test_var.set(\"second_run\")\n    second_results = run_functions_tuples_in_parallel(\n        [\n            (set_and_return_contextvar, (\"thread3\",)),\n            (set_and_return_contextvar, (\"thread4\",)),\n        ]\n    )\n\n    # Verify second run results\n    assert all(result in [\"thread3\", \"thread4\"] for result in second_results)\n\n\ndef test_run_in_background_preserves_contextvar() -> None:\n    \"\"\"Test that run_in_background preserves contextvar values and modifications are isolated\"\"\"\n\n    def modify_and_sleep() -> tuple[str, str]:\n        \"\"\"Modifies contextvar, sleeps, and returns original, modified, and final values\"\"\"\n        original = test_var.get()\n        test_var.set(\"modified_in_background\")\n        time.sleep(0.1)  # Ensure we can check main thread during execution\n        final = test_var.get()\n        return original, final\n\n    # Set initial value in main thread\n    token = test_var.set(\"initial_value\")\n    try:\n        # Start background task\n        task = run_in_background(modify_and_sleep)\n\n        # Verify main thread value remains unchanged while task runs\n        assert test_var.get() == \"initial_value\"\n\n        # Get results from background thread\n        original, modified = wait_on_background(task)\n\n        # Verify the background thread:\n        # 1. Saw the initial value\n        assert original == \"initial_value\"\n        # 2. Successfully modified its own copy\n        assert modified == \"modified_in_background\"\n\n        # Verify main thread value is still unchanged after task completion\n        assert test_var.get() == \"initial_value\"\n    finally:\n        # Clean up\n        test_var.reset(token)\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_url_ssrf.py",
    "content": "\"\"\"\nUnit tests for SSRF protection in URL validation utilities.\n\nThese tests verify that the SSRF protection correctly blocks\nrequests to internal/private IP addresses and other potentially dangerous destinations.\n\"\"\"\n\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.utils.url import _is_ip_private_or_reserved\nfrom onyx.utils.url import _validate_and_resolve_url\nfrom onyx.utils.url import ssrf_safe_get\nfrom onyx.utils.url import SSRFException\nfrom onyx.utils.url import validate_outbound_http_url\n\n\nclass TestIsIpPrivateOrReserved:\n    \"\"\"Tests for the _is_ip_private_or_reserved helper function.\"\"\"\n\n    def test_loopback_ipv4(self) -> None:\n        \"\"\"Test that IPv4 loopback addresses are detected as private.\"\"\"\n        assert _is_ip_private_or_reserved(\"127.0.0.1\") is True\n        assert _is_ip_private_or_reserved(\"127.0.0.2\") is True\n        assert _is_ip_private_or_reserved(\"127.255.255.255\") is True\n\n    def test_loopback_ipv6(self) -> None:\n        \"\"\"Test that IPv6 loopback addresses are detected as private.\"\"\"\n        assert _is_ip_private_or_reserved(\"::1\") is True\n\n    def test_private_class_a(self) -> None:\n        \"\"\"Test that private Class A addresses (10.x.x.x) are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"10.0.0.1\") is True\n        assert _is_ip_private_or_reserved(\"10.255.255.255\") is True\n\n    def test_private_class_b(self) -> None:\n        \"\"\"Test that private Class B addresses (172.16-31.x.x) are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"172.16.0.1\") is True\n        assert _is_ip_private_or_reserved(\"172.31.255.255\") is True\n\n    def test_private_class_c(self) -> None:\n        \"\"\"Test that private Class C addresses (192.168.x.x) are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"192.168.0.1\") is True\n        assert _is_ip_private_or_reserved(\"192.168.255.255\") is True\n\n    def test_link_local(self) -> None:\n        \"\"\"Test that link-local addresses are detected as private.\"\"\"\n        assert _is_ip_private_or_reserved(\"169.254.0.1\") is True\n        assert _is_ip_private_or_reserved(\"169.254.255.255\") is True\n\n    def test_cloud_metadata_ips(self) -> None:\n        \"\"\"Test that cloud metadata service IPs are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"169.254.169.254\") is True  # AWS/GCP/Azure\n        assert _is_ip_private_or_reserved(\"169.254.170.2\") is True  # AWS ECS\n\n    def test_multicast(self) -> None:\n        \"\"\"Test that multicast addresses are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"224.0.0.1\") is True\n        assert _is_ip_private_or_reserved(\"239.255.255.255\") is True\n\n    def test_unspecified(self) -> None:\n        \"\"\"Test that unspecified addresses are detected.\"\"\"\n        assert _is_ip_private_or_reserved(\"0.0.0.0\") is True\n        assert _is_ip_private_or_reserved(\"::\") is True\n\n    def test_public_ips(self) -> None:\n        \"\"\"Test that public IP addresses are not flagged as private.\"\"\"\n        assert _is_ip_private_or_reserved(\"8.8.8.8\") is False  # Google DNS\n        assert _is_ip_private_or_reserved(\"1.1.1.1\") is False  # Cloudflare DNS\n        assert _is_ip_private_or_reserved(\"104.16.0.1\") is False  # Cloudflare\n        assert _is_ip_private_or_reserved(\"142.250.80.46\") is False  # Google\n\n    def test_invalid_ip(self) -> None:\n        \"\"\"Test that invalid IPs are treated as potentially unsafe.\"\"\"\n        assert _is_ip_private_or_reserved(\"not-an-ip\") is True\n        assert _is_ip_private_or_reserved(\"\") is True\n\n\nclass TestValidateAndResolveUrl:\n    \"\"\"Tests for the _validate_and_resolve_url function.\"\"\"\n\n    def test_empty_url(self) -> None:\n        \"\"\"Test that empty URLs raise ValueError.\"\"\"\n        with pytest.raises(ValueError, match=\"URL cannot be empty\"):\n            _validate_and_resolve_url(\"\")\n\n    def test_invalid_scheme_ftp(self) -> None:\n        \"\"\"Test that non-HTTP schemes are rejected.\"\"\"\n        with pytest.raises(SSRFException, match=\"Invalid URL scheme\"):\n            _validate_and_resolve_url(\"ftp://example.com/file.txt\")\n\n    def test_invalid_scheme_file(self) -> None:\n        \"\"\"Test that file:// scheme is rejected.\"\"\"\n        with pytest.raises(SSRFException, match=\"Invalid URL scheme\"):\n            _validate_and_resolve_url(\"file:///etc/passwd\")\n\n    def test_invalid_scheme_gopher(self) -> None:\n        \"\"\"Test that gopher:// scheme is rejected.\"\"\"\n        with pytest.raises(SSRFException, match=\"Invalid URL scheme\"):\n            _validate_and_resolve_url(\"gopher://localhost:70/\")\n\n    def test_valid_http_scheme(self) -> None:\n        \"\"\"Test that http scheme is accepted for public URLs.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [\n                (2, 1, 6, \"\", (\"93.184.216.34\", 80))  # example.com's IP\n            ]\n            ip, hostname, port = _validate_and_resolve_url(\"http://example.com/\")\n            assert ip == \"93.184.216.34\"\n            assert hostname == \"example.com\"\n            assert port == 80\n\n    def test_valid_https_scheme(self) -> None:\n        \"\"\"Test that https scheme is accepted for public URLs.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 443))]\n            ip, hostname, port = _validate_and_resolve_url(\"https://example.com/\")\n            assert ip == \"93.184.216.34\"\n            assert hostname == \"example.com\"\n            assert port == 443\n\n    def test_localhost_ipv4(self) -> None:\n        \"\"\"Test that localhost (127.0.0.1) is blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            _validate_and_resolve_url(\"http://127.0.0.1/\")\n\n    def test_localhost_hostname(self) -> None:\n        \"\"\"Test that 'localhost' hostname is blocked.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"127.0.0.1\", 80))]\n            with pytest.raises(\n                SSRFException, match=\"Access to hostname 'localhost' is not allowed.\"\n            ):\n                _validate_and_resolve_url(\"http://localhost/\")\n\n    def test_private_ip_10_network(self) -> None:\n        \"\"\"Test that 10.x.x.x addresses are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            _validate_and_resolve_url(\"http://10.0.0.1/\")\n\n    def test_private_ip_172_network(self) -> None:\n        \"\"\"Test that 172.16-31.x.x addresses are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            _validate_and_resolve_url(\"http://172.16.0.1/\")\n\n    def test_private_ip_192_168_network(self) -> None:\n        \"\"\"Test that 192.168.x.x addresses are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            _validate_and_resolve_url(\"http://192.168.1.1/\")\n\n    def test_aws_metadata_endpoint(self) -> None:\n        \"\"\"Test that AWS metadata endpoint is blocked.\"\"\"\n        with pytest.raises(\n            SSRFException, match=\"Access to hostname '169.254.169.254' is not allowed.\"\n        ):\n            _validate_and_resolve_url(\"http://169.254.169.254/latest/meta-data/\")\n\n    def test_blocked_hostname_kubernetes(self) -> None:\n        \"\"\"Test that Kubernetes internal hostnames are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"not allowed\"):\n            _validate_and_resolve_url(\"http://kubernetes.default.svc.cluster.local/\")\n\n    def test_blocked_hostname_metadata_google(self) -> None:\n        \"\"\"Test that Google metadata hostname is blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"not allowed\"):\n            _validate_and_resolve_url(\"http://metadata.google.internal/\")\n\n    def test_url_with_credentials(self) -> None:\n        \"\"\"Test that URLs with embedded credentials are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"embedded credentials\"):\n            _validate_and_resolve_url(\"http://user:pass@example.com/\")\n\n    def test_url_with_port(self) -> None:\n        \"\"\"Test that URLs with ports are handled correctly.\"\"\"\n        # Internal IP with custom port should be blocked\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            _validate_and_resolve_url(\"http://127.0.0.1:8080/metrics\")\n\n        # Public IP with custom port should be allowed\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 8080))]\n            ip, hostname, port = _validate_and_resolve_url(\"http://example.com:8080/\")\n            assert ip == \"93.184.216.34\"\n            assert port == 8080\n\n    def test_hostname_resolving_to_private_ip(self) -> None:\n        \"\"\"Test that hostnames resolving to private IPs are blocked.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"192.168.1.100\", 80))]\n            with pytest.raises(SSRFException, match=\"internal/private IP\"):\n                _validate_and_resolve_url(\"http://internal-service.company.com/\")\n\n    def test_multiple_dns_records_one_private(self) -> None:\n        \"\"\"Test that a hostname with mixed public/private IPs is blocked.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [\n                (2, 1, 6, \"\", (\"93.184.216.34\", 80)),  # Public\n                (2, 1, 6, \"\", (\"10.0.0.1\", 80)),  # Private\n            ]\n            with pytest.raises(SSRFException, match=\"internal/private IP\"):\n                _validate_and_resolve_url(\"http://dual-stack.example.com/\")\n\n    def test_dns_resolution_failure(self) -> None:\n        \"\"\"Test that DNS resolution failures are handled safely.\"\"\"\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            import socket\n\n            mock_getaddrinfo.side_effect = socket.gaierror(\"Name resolution failed\")\n            with pytest.raises(SSRFException, match=\"Could not resolve hostname\"):\n                _validate_and_resolve_url(\"http://nonexistent-domain-12345.invalid/\")\n\n\nclass TestSsrfSafeGet:\n    \"\"\"Tests for the ssrf_safe_get function.\"\"\"\n\n    def test_blocks_private_ip(self) -> None:\n        \"\"\"Test that requests to private IPs are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            ssrf_safe_get(\"http://192.168.1.1/\")\n\n    def test_blocks_localhost(self) -> None:\n        \"\"\"Test that requests to localhost are blocked.\"\"\"\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            ssrf_safe_get(\"http://127.0.0.1/\")\n\n    def test_blocks_metadata_endpoint(self) -> None:\n        \"\"\"Test that requests to cloud metadata endpoints are blocked.\"\"\"\n        with pytest.raises(\n            SSRFException, match=\"Access to hostname '169.254.169.254' is not allowed.\"\n        ):\n            ssrf_safe_get(\"http://169.254.169.254/\")\n\n    def test_makes_request_to_validated_ip_http(self) -> None:\n        \"\"\"Test that HTTP requests are made to the validated IP.\"\"\"\n        mock_response = MagicMock()\n        mock_response.status_code = 200\n        mock_response.is_redirect = False\n\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 80))]\n\n            with patch(\"onyx.utils.url.requests.get\") as mock_get:\n                mock_get.return_value = mock_response\n\n                response = ssrf_safe_get(\"http://example.com/path\")\n\n                # Verify the request was made to the IP, not the hostname\n                mock_get.assert_called_once()\n                call_args = mock_get.call_args\n                assert \"93.184.216.34\" in call_args[0][0]\n                # Verify Host header is set\n                assert call_args[1][\"headers\"][\"Host\"] == \"example.com\"\n                assert response == mock_response\n\n    def test_makes_request_with_original_url_https(self) -> None:\n        \"\"\"Test that HTTPS requests use original URL for TLS.\"\"\"\n        mock_response = MagicMock()\n        mock_response.status_code = 200\n        mock_response.is_redirect = False\n\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 443))]\n\n            with patch(\"onyx.utils.url.requests.get\") as mock_get:\n                mock_get.return_value = mock_response\n\n                response = ssrf_safe_get(\"https://example.com/path\")\n\n                # For HTTPS, we use original URL for TLS\n                mock_get.assert_called_once()\n                call_args = mock_get.call_args\n                assert call_args[0][0] == \"https://example.com/path\"\n                assert response == mock_response\n\n    def test_passes_custom_headers(self) -> None:\n        \"\"\"Test that custom headers are passed through.\"\"\"\n        mock_response = MagicMock()\n        mock_response.is_redirect = False\n\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 80))]\n\n            with patch(\"onyx.utils.url.requests.get\") as mock_get:\n                mock_get.return_value = mock_response\n\n                custom_headers = {\"User-Agent\": \"TestBot/1.0\"}\n                ssrf_safe_get(\"http://example.com/\", headers=custom_headers)\n\n                call_args = mock_get.call_args\n                assert call_args[1][\"headers\"][\"User-Agent\"] == \"TestBot/1.0\"\n\n    def test_passes_timeout(self) -> None:\n        \"\"\"Test that timeout is passed through, including tuple form.\"\"\"\n        mock_response = MagicMock()\n        mock_response.is_redirect = False\n\n        with patch(\"onyx.utils.url.socket.getaddrinfo\") as mock_getaddrinfo:\n            mock_getaddrinfo.return_value = [(2, 1, 6, \"\", (\"93.184.216.34\", 80))]\n\n            with patch(\"onyx.utils.url.requests.get\") as mock_get:\n                mock_get.return_value = mock_response\n\n                ssrf_safe_get(\"http://example.com/\", timeout=(5, 15))\n\n                call_args = mock_get.call_args\n                assert call_args[1][\"timeout\"] == (5, 15)\n\n\nclass TestValidateOutboundHttpUrl:\n    def test_rejects_private_ip_by_default(self) -> None:\n        with pytest.raises(SSRFException, match=\"internal/private IP\"):\n            validate_outbound_http_url(\"http://127.0.0.1:8000\")\n\n    def test_allows_private_ip_when_explicitly_enabled(self) -> None:\n        validated_url = validate_outbound_http_url(\n            \"http://127.0.0.1:8000\", allow_private_network=True\n        )\n        assert validated_url == \"http://127.0.0.1:8000\"\n\n    def test_blocks_metadata_hostname_when_private_is_enabled(self) -> None:\n        with pytest.raises(SSRFException, match=\"not allowed\"):\n            validate_outbound_http_url(\n                \"http://metadata.google.internal/latest\",\n                allow_private_network=True,\n            )\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_vespa_query.py",
    "content": "from datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom onyx.configs.constants import DocumentSource\nfrom onyx.configs.constants import INDEX_SEPARATOR\nfrom onyx.context.search.models import IndexFilters\nfrom onyx.context.search.models import Tag\nfrom onyx.document_index.vespa.shared_utils.vespa_request_builders import (\n    build_vespa_filters,\n)\nfrom onyx.document_index.vespa_constants import DOC_UPDATED_AT\nfrom onyx.document_index.vespa_constants import DOCUMENT_SETS\nfrom onyx.document_index.vespa_constants import HIDDEN\nfrom onyx.document_index.vespa_constants import METADATA_LIST\nfrom onyx.document_index.vespa_constants import PERSONAS\nfrom onyx.document_index.vespa_constants import SOURCE_TYPE\nfrom onyx.document_index.vespa_constants import TENANT_ID\nfrom onyx.document_index.vespa_constants import USER_PROJECT\nfrom shared_configs.configs import MULTI_TENANT\n\n\nclass TestBuildVespaFilters:\n    def test_empty_filters(self) -> None:\n        \"\"\"Test with empty filters object.\"\"\"\n        filters = IndexFilters(access_control_list=[])\n        result = build_vespa_filters(filters)\n        assert result == f\"!({HIDDEN}=true) and \"\n\n        # With trailing AND removed\n        result = build_vespa_filters(filters, remove_trailing_and=True)\n        assert result == f\"!({HIDDEN}=true)\"\n\n    def test_include_hidden(self) -> None:\n        \"\"\"Test with include_hidden flag.\"\"\"\n        filters = IndexFilters(access_control_list=[])\n        result = build_vespa_filters(filters, include_hidden=True)\n        assert result == \"\"  # No filters applied when including hidden\n\n        # With some other filter to ensure proper AND chaining\n        filters = IndexFilters(access_control_list=[], source_type=[DocumentSource.WEB])\n        result = build_vespa_filters(filters, include_hidden=True)\n        assert result == f'({SOURCE_TYPE} contains \"web\") and '\n\n    def test_acl(self) -> None:\n        \"\"\"Test with acls — uses weightedSet operator for efficient matching.\"\"\"\n        # Single ACL\n        filters = IndexFilters(access_control_list=[\"user1\"])\n        result = build_vespa_filters(filters)\n        assert (\n            result\n            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{\"user1\":1}}) and '\n        )\n\n        # Multiple ACL's\n        filters = IndexFilters(access_control_list=[\"user2\", \"group2\"])\n        result = build_vespa_filters(filters)\n        assert (\n            result\n            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{\"user2\":1, \"group2\":1}}) and '\n        )\n\n    def test_tenant_filter(self) -> None:\n        \"\"\"Test tenant ID filtering.\"\"\"\n        # With tenant ID\n        if MULTI_TENANT:\n            filters = IndexFilters(access_control_list=[], tenant_id=\"tenant1\")\n            result = build_vespa_filters(filters)\n            assert (\n                f'!({HIDDEN}=true) and ({TENANT_ID} contains \"tenant1\") and ' == result\n            )\n\n        # No tenant ID\n        filters = IndexFilters(access_control_list=[], tenant_id=None)\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n    def test_source_type_filter(self) -> None:\n        \"\"\"Test source type filtering.\"\"\"\n        # Single source type\n        filters = IndexFilters(access_control_list=[], source_type=[DocumentSource.WEB])\n        result = build_vespa_filters(filters)\n        assert f'!({HIDDEN}=true) and ({SOURCE_TYPE} contains \"web\") and ' == result\n\n        # Multiple source types\n        filters = IndexFilters(\n            access_control_list=[],\n            source_type=[DocumentSource.WEB, DocumentSource.JIRA],\n        )\n        result = build_vespa_filters(filters)\n        assert (\n            f'!({HIDDEN}=true) and ({SOURCE_TYPE} contains \"web\" or {SOURCE_TYPE} contains \"jira\") and '\n            == result\n        )\n\n        # Empty source type list\n        filters = IndexFilters(access_control_list=[], source_type=[])\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n    def test_tag_filters(self) -> None:\n        \"\"\"Test tag filtering.\"\"\"\n        # Single tag\n        filters = IndexFilters(\n            access_control_list=[], tags=[Tag(tag_key=\"color\", tag_value=\"red\")]\n        )\n        result = build_vespa_filters(filters)\n        assert (\n            f'!({HIDDEN}=true) and ({METADATA_LIST} contains \"color{INDEX_SEPARATOR}red\") and '\n            == result\n        )\n\n        # Multiple tags\n        filters = IndexFilters(\n            access_control_list=[],\n            tags=[\n                Tag(tag_key=\"color\", tag_value=\"red\"),\n                Tag(tag_key=\"size\", tag_value=\"large\"),\n            ],\n        )\n        result = build_vespa_filters(filters)\n        expected = (\n            f'!({HIDDEN}=true) and ({METADATA_LIST} contains \"color{INDEX_SEPARATOR}red\" '\n            f'or {METADATA_LIST} contains \"size{INDEX_SEPARATOR}large\") and '\n        )\n        assert expected == result\n\n        # Empty tags list\n        filters = IndexFilters(access_control_list=[], tags=[])\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n    def test_document_sets_filter(self) -> None:\n        \"\"\"Test document sets filtering.\"\"\"\n        # Single document set\n        filters = IndexFilters(access_control_list=[], document_set=[\"set1\"])\n        result = build_vespa_filters(filters)\n        assert f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains \"set1\") and ' == result\n\n        # Multiple document sets\n        filters = IndexFilters(access_control_list=[], document_set=[\"set1\", \"set2\"])\n        result = build_vespa_filters(filters)\n        assert (\n            f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains \"set1\" or {DOCUMENT_SETS} contains \"set2\") and '\n            == result\n        )\n\n        # Empty document sets\n        filters = IndexFilters(access_control_list=[], document_set=[])\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n    def test_user_project_filter(self) -> None:\n        \"\"\"Test user project filtering.\n\n        project_id_filter alone does NOT trigger a knowledge scope restriction\n        (an agent with no explicit knowledge should search everything).\n        It only participates when explicit knowledge filters are present.\n        \"\"\"\n        # project_id_filter alone → no restriction\n        filters = IndexFilters(access_control_list=[], project_id_filter=789)\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n        # project_id_filter with document_set → both OR'd\n        filters = IndexFilters(\n            access_control_list=[], project_id_filter=789, document_set=[\"set1\"]\n        )\n        result = build_vespa_filters(filters)\n        assert (\n            f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains \"set1\") or ({USER_PROJECT} contains \"789\")) and '\n            == result\n        )\n\n        # No project id filter\n        filters = IndexFilters(access_control_list=[], project_id_filter=None)\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n    def test_time_cutoff_filter(self) -> None:\n        \"\"\"Test time cutoff filtering.\"\"\"\n        # With cutoff time\n        cutoff_time = datetime(2023, 1, 1, tzinfo=timezone.utc)\n        filters = IndexFilters(access_control_list=[], time_cutoff=cutoff_time)\n        result = build_vespa_filters(filters)\n        cutoff_secs = int(cutoff_time.timestamp())\n        assert (\n            f\"!({HIDDEN}=true) and !({DOC_UPDATED_AT} < {cutoff_secs}) and \" == result\n        )\n\n        # No cutoff time\n        filters = IndexFilters(access_control_list=[], time_cutoff=None)\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n\n        # Test untimed logic (when cutoff is old enough)\n        old_cutoff = datetime.now(timezone.utc) - timedelta(days=100)\n        filters = IndexFilters(access_control_list=[], time_cutoff=old_cutoff)\n        result = build_vespa_filters(filters)\n        old_cutoff_secs = int(old_cutoff.timestamp())\n        assert (\n            f\"!({HIDDEN}=true) and !({DOC_UPDATED_AT} < {old_cutoff_secs}) and \"\n            == result\n        )\n\n    def test_combined_filters(self) -> None:\n        \"\"\"Test combining multiple filter types.\n\n        Knowledge-scope filters (document_set, project_id_filter, persona_id_filter)\n        are OR'd together, while all other filters are AND'd.\n        \"\"\"\n        filters = IndexFilters(\n            access_control_list=[\"user1\", \"group1\"],\n            source_type=[DocumentSource.WEB],\n            tags=[Tag(tag_key=\"color\", tag_value=\"red\")],\n            document_set=[\"set1\"],\n            project_id_filter=789,\n            persona_id_filter=42,\n            time_cutoff=datetime(2023, 1, 1, tzinfo=timezone.utc),\n        )\n\n        result = build_vespa_filters(filters)\n\n        expected = f\"!({HIDDEN}=true) and \"\n        expected += 'weightedSet(access_control_list, {\"user1\":1, \"group1\":1}) and '\n        expected += f'({SOURCE_TYPE} contains \"web\") and '\n        expected += f'({METADATA_LIST} contains \"color{INDEX_SEPARATOR}red\") and '\n        # Knowledge scope filters are OR'd together\n        # (persona_id_filter is primary, project_id_filter is additive — order reflects this)\n        expected += (\n            f'(({DOCUMENT_SETS} contains \"set1\")'\n            f' or ({PERSONAS} contains \"42\")'\n            f' or ({USER_PROJECT} contains \"789\")'\n            f\") and \"\n        )\n        cutoff_secs = int(datetime(2023, 1, 1, tzinfo=timezone.utc).timestamp())\n        expected += f\"!({DOC_UPDATED_AT} < {cutoff_secs}) and \"\n\n        assert expected == result\n\n        # With trailing AND removed\n        result_no_trailing = build_vespa_filters(filters, remove_trailing_and=True)\n        assert expected[:-5] == result_no_trailing  # Remove trailing \" and \"\n\n    def test_knowledge_scope_single_filter_not_wrapped(self) -> None:\n        \"\"\"When only one knowledge-scope filter is present it should not\n        be wrapped in an extra OR group.\"\"\"\n        filters = IndexFilters(access_control_list=[], document_set=[\"set1\"])\n        result = build_vespa_filters(filters)\n        assert f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains \"set1\") and ' == result\n\n    def test_persona_id_filter_is_primary_knowledge_scope(self) -> None:\n        \"\"\"persona_id_filter alone should trigger a knowledge scope restriction\n        (a persona with user files IS explicit knowledge).\"\"\"\n        filters = IndexFilters(access_control_list=[], persona_id_filter=42)\n        result = build_vespa_filters(filters)\n        assert f'!({HIDDEN}=true) and ({PERSONAS} contains \"42\") and ' == result\n\n    def test_persona_id_filter_with_project_id_filter(self) -> None:\n        \"\"\"When persona_id_filter triggers the scope, project_id_filter should be\n        OR'd in additively.\"\"\"\n        filters = IndexFilters(\n            access_control_list=[], persona_id_filter=42, project_id_filter=789\n        )\n        result = build_vespa_filters(filters)\n        expected = (\n            f\"!({HIDDEN}=true) and \"\n            f'(({PERSONAS} contains \"42\") or ({USER_PROJECT} contains \"789\")) and '\n        )\n        assert expected == result\n\n    def test_knowledge_scope_document_set_and_persona_filter_ored(self) -> None:\n        \"\"\"Document set filter and persona_id_filter must be OR'd so that\n        connector documents (in the set) and persona user files can\n        both be found.\"\"\"\n        filters = IndexFilters(\n            access_control_list=[],\n            document_set=[\"engineering\"],\n            persona_id_filter=42,\n        )\n        result = build_vespa_filters(filters)\n        expected = f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains \"engineering\") or ({PERSONAS} contains \"42\")) and '\n        assert expected == result\n\n    def test_acl_large_list_uses_weighted_set(self) -> None:\n        \"\"\"Verify that large ACL lists produce a weightedSet clause\n        instead of OR-chained contains — this is what prevents Vespa\n        HTTP 400 errors for users with thousands of permission groups.\"\"\"\n        acl = [f\"external_group:google_drive_{i}\" for i in range(10_000)]\n        acl += [\"user_email:user@example.com\", \"__PUBLIC__\"]\n        filters = IndexFilters(access_control_list=acl)\n        result = build_vespa_filters(filters)\n\n        assert \"weightedSet(access_control_list, {\" in result\n        # Must NOT contain OR-chained contains clauses\n        assert \"access_control_list contains\" not in result\n        # All entries should be present\n        assert '\"external_group:google_drive_0\":1' in result\n        assert '\"external_group:google_drive_9999\":1' in result\n        assert '\"user_email:user@example.com\":1' in result\n        assert '\"__PUBLIC__\":1' in result\n\n    def test_acl_empty_strings_filtered(self) -> None:\n        \"\"\"Empty strings in the ACL list should be filtered out.\"\"\"\n        filters = IndexFilters(access_control_list=[\"user1\", \"\", \"group1\"])\n        result = build_vespa_filters(filters)\n        assert (\n            result\n            == f'!({HIDDEN}=true) and weightedSet(access_control_list, {{\"user1\":1, \"group1\":1}}) and '\n        )\n\n        # All empty\n        filters = IndexFilters(access_control_list=[\"\", \"\"])\n        result = build_vespa_filters(filters)\n        assert result == f\"!({HIDDEN}=true) and \"\n\n    def test_empty_or_none_values(self) -> None:\n        \"\"\"Test with empty or None values in filter lists.\"\"\"\n        # Empty strings in document set\n        filters = IndexFilters(\n            access_control_list=[], document_set=[\"set1\", \"\", \"set2\"]\n        )\n        result = build_vespa_filters(filters)\n        assert (\n            f'!({HIDDEN}=true) and ({DOCUMENT_SETS} contains \"set1\" or {DOCUMENT_SETS} contains \"set2\") and '\n            == result\n        )\n\n        # All empty strings in document set\n        filters = IndexFilters(access_control_list=[], document_set=[\"\", \"\"])\n        result = build_vespa_filters(filters)\n        assert f\"!({HIDDEN}=true) and \" == result\n"
  },
  {
    "path": "backend/tests/unit/onyx/utils/test_vespa_tasks.py",
    "content": "from types import SimpleNamespace\nfrom typing import Any\n\nfrom onyx.background.celery.tasks.vespa import tasks as vespa_tasks\n\n\nclass _StubRedisDocumentSet:\n    \"\"\"Lightweight stand-in for RedisDocumentSet used by monitor tests.\"\"\"\n\n    reset_called = False\n\n    @staticmethod\n    def get_id_from_fence_key(key: str) -> str | None:\n        parts = key.split(\"_\")\n        return parts[-1] if len(parts) == 3 else None\n\n    def __init__(self, tenant_id: str, object_id: str) -> None:  # noqa: ARG002\n        self.taskset_key = f\"documentset_taskset_{object_id}\"\n        self._payload = 0\n\n    @property\n    def fenced(self) -> bool:\n        return True\n\n    @property\n    def payload(self) -> int:\n        return self._payload\n\n    def reset(self) -> None:\n        self.__class__.reset_called = True\n\n\ndef _setup_common_patches(monkeypatch: Any, document_set: Any) -> dict[str, bool]:\n    calls: dict[str, bool] = {\"deleted\": False, \"synced\": False}\n\n    monkeypatch.setattr(vespa_tasks, \"RedisDocumentSet\", _StubRedisDocumentSet)\n\n    monkeypatch.setattr(\n        vespa_tasks,\n        \"get_document_set_by_id\",\n        lambda db_session, document_set_id: document_set,  # noqa: ARG005\n    )\n\n    def _delete(document_set_row: Any, db_session: Any) -> None:  # noqa: ARG001\n        calls[\"deleted\"] = True\n\n    monkeypatch.setattr(vespa_tasks, \"delete_document_set\", _delete)\n\n    def _mark(document_set_id: Any, db_session: Any) -> None:  # noqa: ARG001\n        calls[\"synced\"] = True\n\n    monkeypatch.setattr(vespa_tasks, \"mark_document_set_as_synced\", _mark)\n\n    monkeypatch.setattr(\n        vespa_tasks,\n        \"update_sync_record_status\",\n        lambda db_session, entity_id, sync_type, sync_status, num_docs_synced: None,  # noqa: ARG005\n    )\n\n    return calls\n\n\ndef test_monitor_preserves_federated_only_document_set(monkeypatch: Any) -> None:\n    document_set = SimpleNamespace(\n        connector_credential_pairs=[],\n        federated_connectors=[object()],\n    )\n\n    calls = _setup_common_patches(monkeypatch, document_set)\n\n    vespa_tasks.monitor_document_set_taskset(\n        tenant_id=\"tenant\",\n        key_bytes=b\"documentset_fence_1\",\n        r=SimpleNamespace(scard=lambda key: 0),  # type: ignore[arg-type]  # noqa: ARG005\n        db_session=SimpleNamespace(),  # type: ignore[arg-type]\n    )\n\n    assert calls[\"synced\"] is True\n    assert calls[\"deleted\"] is False\n\n\ndef test_monitor_deletes_document_set_with_no_connectors(monkeypatch: Any) -> None:\n    document_set = SimpleNamespace(\n        connector_credential_pairs=[],\n        federated_connectors=[],\n    )\n\n    calls = _setup_common_patches(monkeypatch, document_set)\n\n    vespa_tasks.monitor_document_set_taskset(\n        tenant_id=\"tenant\",\n        key_bytes=b\"documentset_fence_2\",\n        r=SimpleNamespace(scard=lambda key: 0),  # type: ignore[arg-type]  # noqa: ARG005\n        db_session=SimpleNamespace(),  # type: ignore[arg-type]\n    )\n\n    assert calls[\"deleted\"] is True\n    assert calls[\"synced\"] is False\n"
  },
  {
    "path": "backend/tests/unit/onyx/voice/providers/test_azure_provider.py",
    "content": "import pytest\n\nfrom onyx.voice.providers.azure import AzureVoiceProvider\n\n\ndef test_azure_provider_extracts_region_from_target_uri() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\",\n        api_base=\"https://westus.api.cognitive.microsoft.com/\",\n        custom_config={},\n    )\n    assert provider.speech_region == \"westus\"\n\n\ndef test_azure_provider_normalizes_uppercase_region() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\",\n        api_base=None,\n        custom_config={\"speech_region\": \"WestUS2\"},\n    )\n    assert provider.speech_region == \"westus2\"\n\n\ndef test_azure_provider_rejects_invalid_speech_region() -> None:\n    with pytest.raises(ValueError, match=\"Invalid Azure speech_region\"):\n        AzureVoiceProvider(\n            api_key=\"key\",\n            api_base=None,\n            custom_config={\"speech_region\": \"westus/../../etc\"},\n        )\n"
  },
  {
    "path": "backend/tests/unit/onyx/voice/providers/test_azure_ssml.py",
    "content": "import io\nimport struct\nimport wave\n\nimport pytest\n\nfrom onyx.voice.providers.azure import AzureVoiceProvider\n\n\n# --- _is_azure_cloud_url ---\n\n\ndef test_is_azure_cloud_url_speech_microsoft() -> None:\n    assert AzureVoiceProvider._is_azure_cloud_url(\n        \"https://eastus.tts.speech.microsoft.com/cognitiveservices/v1\"\n    )\n\n\ndef test_is_azure_cloud_url_cognitive_microsoft() -> None:\n    assert AzureVoiceProvider._is_azure_cloud_url(\n        \"https://westus.api.cognitive.microsoft.com/\"\n    )\n\n\ndef test_is_azure_cloud_url_rejects_custom_host() -> None:\n    assert not AzureVoiceProvider._is_azure_cloud_url(\"https://my-custom-host.com/\")\n\n\ndef test_is_azure_cloud_url_rejects_none() -> None:\n    assert not AzureVoiceProvider._is_azure_cloud_url(None)\n\n\n# --- _extract_speech_region_from_uri ---\n\n\ndef test_extract_region_from_tts_url() -> None:\n    assert (\n        AzureVoiceProvider._extract_speech_region_from_uri(\n            \"https://eastus.tts.speech.microsoft.com/cognitiveservices/v1\"\n        )\n        == \"eastus\"\n    )\n\n\ndef test_extract_region_from_cognitive_api_url() -> None:\n    assert (\n        AzureVoiceProvider._extract_speech_region_from_uri(\n            \"https://eastus.api.cognitive.microsoft.com/\"\n        )\n        == \"eastus\"\n    )\n\n\ndef test_extract_region_returns_none_for_custom_domain() -> None:\n    \"\"\"Custom domains use resource name, not region — must use speech_region config.\"\"\"\n    assert (\n        AzureVoiceProvider._extract_speech_region_from_uri(\n            \"https://myresource.cognitiveservices.azure.com/\"\n        )\n        is None\n    )\n\n\ndef test_extract_region_returns_none_for_none() -> None:\n    assert AzureVoiceProvider._extract_speech_region_from_uri(None) is None\n\n\n# --- _validate_speech_region ---\n\n\ndef test_validate_region_normalizes_to_lowercase() -> None:\n    assert AzureVoiceProvider._validate_speech_region(\"WestUS2\") == \"westus2\"\n\n\ndef test_validate_region_accepts_hyphens() -> None:\n    assert AzureVoiceProvider._validate_speech_region(\"us-east-1\") == \"us-east-1\"\n\n\ndef test_validate_region_rejects_path_traversal() -> None:\n    with pytest.raises(ValueError, match=\"Invalid Azure speech_region\"):\n        AzureVoiceProvider._validate_speech_region(\"westus/../../etc\")\n\n\ndef test_validate_region_rejects_dots() -> None:\n    with pytest.raises(ValueError, match=\"Invalid Azure speech_region\"):\n        AzureVoiceProvider._validate_speech_region(\"west.us\")\n\n\n# --- _pcm16_to_wav ---\n\n\ndef test_pcm16_to_wav_produces_valid_wav() -> None:\n    samples = [32767, -32768, 0, 1234]\n    pcm_data = struct.pack(f\"<{len(samples)}h\", *samples)\n    wav_bytes = AzureVoiceProvider._pcm16_to_wav(pcm_data, sample_rate=16000)\n\n    with wave.open(io.BytesIO(wav_bytes), \"rb\") as wav_file:\n        assert wav_file.getnchannels() == 1\n        assert wav_file.getsampwidth() == 2\n        assert wav_file.getframerate() == 16000\n        frames = wav_file.readframes(4)\n        recovered = struct.unpack(f\"<{len(samples)}h\", frames)\n        assert list(recovered) == samples\n\n\n# --- URL Construction ---\n\n\ndef test_get_tts_url_cloud() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\", api_base=None, custom_config={\"speech_region\": \"eastus\"}\n    )\n    assert (\n        provider._get_tts_url()\n        == \"https://eastus.tts.speech.microsoft.com/cognitiveservices/v1\"\n    )\n\n\ndef test_get_stt_url_cloud() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\", api_base=None, custom_config={\"speech_region\": \"westus2\"}\n    )\n    assert \"westus2.stt.speech.microsoft.com\" in provider._get_stt_url()\n\n\ndef test_get_tts_url_self_hosted() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\", api_base=\"http://localhost:5000\", custom_config={}\n    )\n    assert provider._get_tts_url() == \"http://localhost:5000/cognitiveservices/v1\"\n\n\ndef test_get_tts_url_self_hosted_strips_trailing_slash() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\", api_base=\"http://localhost:5000/\", custom_config={}\n    )\n    assert provider._get_tts_url() == \"http://localhost:5000/cognitiveservices/v1\"\n\n\n# --- _is_self_hosted ---\n\n\ndef test_is_self_hosted_true_for_custom_endpoint() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\", api_base=\"http://localhost:5000\", custom_config={}\n    )\n    assert provider._is_self_hosted() is True\n\n\ndef test_is_self_hosted_false_for_azure_cloud() -> None:\n    provider = AzureVoiceProvider(\n        api_key=\"key\",\n        api_base=\"https://eastus.api.cognitive.microsoft.com/\",\n        custom_config={},\n    )\n    assert provider._is_self_hosted() is False\n\n\n# --- Resampling ---\n\n\ndef test_resample_pcm16_passthrough() -> None:\n    from onyx.voice.providers.azure import AzureStreamingTranscriber\n\n    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)\n    t.input_sample_rate = 16000\n    t.target_sample_rate = 16000\n\n    data = struct.pack(\"<4h\", 100, 200, 300, 400)\n    assert t._resample_pcm16(data) == data\n\n\ndef test_resample_pcm16_downsamples() -> None:\n    from onyx.voice.providers.azure import AzureStreamingTranscriber\n\n    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)\n    t.input_sample_rate = 24000\n    t.target_sample_rate = 16000\n\n    input_samples = [1000, 2000, 3000, 4000, 5000, 6000]\n    data = struct.pack(f\"<{len(input_samples)}h\", *input_samples)\n\n    result = t._resample_pcm16(data)\n    assert len(result) // 2 == 4\n\n\ndef test_resample_pcm16_empty_data() -> None:\n    from onyx.voice.providers.azure import AzureStreamingTranscriber\n\n    t = AzureStreamingTranscriber.__new__(AzureStreamingTranscriber)\n    t.input_sample_rate = 24000\n    t.target_sample_rate = 16000\n\n    assert t._resample_pcm16(b\"\") == b\"\"\n"
  },
  {
    "path": "backend/tests/unit/onyx/voice/providers/test_elevenlabs_provider.py",
    "content": "import struct\n\nfrom onyx.voice.providers.elevenlabs import _http_to_ws_url\nfrom onyx.voice.providers.elevenlabs import DEFAULT_ELEVENLABS_API_BASE\nfrom onyx.voice.providers.elevenlabs import ElevenLabsSTTMessageType\nfrom onyx.voice.providers.elevenlabs import ElevenLabsVoiceProvider\n\n\n# --- _http_to_ws_url ---\n\n\ndef test_http_to_ws_url_converts_https_to_wss() -> None:\n    assert _http_to_ws_url(\"https://api.elevenlabs.io\") == \"wss://api.elevenlabs.io\"\n\n\ndef test_http_to_ws_url_converts_http_to_ws() -> None:\n    assert _http_to_ws_url(\"http://localhost:8080\") == \"ws://localhost:8080\"\n\n\ndef test_http_to_ws_url_passes_through_other_schemes() -> None:\n    assert _http_to_ws_url(\"wss://already.ws\") == \"wss://already.ws\"\n\n\ndef test_http_to_ws_url_preserves_path() -> None:\n    assert (\n        _http_to_ws_url(\"https://api.elevenlabs.io/v1/tts\")\n        == \"wss://api.elevenlabs.io/v1/tts\"\n    )\n\n\n# --- StrEnum comparison ---\n\n\ndef test_stt_message_type_compares_as_string() -> None:\n    \"\"\"StrEnum members should work in string comparisons (e.g. from JSON).\"\"\"\n    assert str(ElevenLabsSTTMessageType.COMMITTED_TRANSCRIPT) == \"committed_transcript\"\n    assert isinstance(ElevenLabsSTTMessageType.ERROR, str)\n\n\n# --- Resampling ---\n\n\ndef test_resample_pcm16_passthrough_when_same_rate() -> None:\n    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber\n\n    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)\n    t.input_sample_rate = 16000\n    t.target_sample_rate = 16000\n\n    data = struct.pack(\"<4h\", 100, 200, 300, 400)\n    assert t._resample_pcm16(data) == data\n\n\ndef test_resample_pcm16_downsamples() -> None:\n    \"\"\"24kHz -> 16kHz should produce fewer samples (ratio 3:2).\"\"\"\n    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber\n\n    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)\n    t.input_sample_rate = 24000\n    t.target_sample_rate = 16000\n\n    input_samples = [1000, 2000, 3000, 4000, 5000, 6000]\n    data = struct.pack(f\"<{len(input_samples)}h\", *input_samples)\n\n    result = t._resample_pcm16(data)\n    output_samples = struct.unpack(f\"<{len(result) // 2}h\", result)\n\n    assert len(output_samples) == 4\n\n\ndef test_resample_pcm16_clamps_to_int16_range() -> None:\n    from onyx.voice.providers.elevenlabs import ElevenLabsStreamingTranscriber\n\n    t = ElevenLabsStreamingTranscriber.__new__(ElevenLabsStreamingTranscriber)\n    t.input_sample_rate = 24000\n    t.target_sample_rate = 16000\n\n    input_samples = [32767, -32768, 32767, -32768, 32767, -32768]\n    data = struct.pack(f\"<{len(input_samples)}h\", *input_samples)\n\n    result = t._resample_pcm16(data)\n    output_samples = struct.unpack(f\"<{len(result) // 2}h\", result)\n    for s in output_samples:\n        assert -32768 <= s <= 32767\n\n\n# --- Provider Model Defaulting ---\n\n\ndef test_provider_defaults_invalid_stt_model() -> None:\n    provider = ElevenLabsVoiceProvider(api_key=\"test\", stt_model=\"invalid_model\")\n    assert provider.stt_model == \"scribe_v1\"\n\n\ndef test_provider_defaults_invalid_tts_model() -> None:\n    provider = ElevenLabsVoiceProvider(api_key=\"test\", tts_model=\"invalid_model\")\n    assert provider.tts_model == \"eleven_multilingual_v2\"\n\n\ndef test_provider_accepts_valid_models() -> None:\n    provider = ElevenLabsVoiceProvider(\n        api_key=\"test\", stt_model=\"scribe_v2_realtime\", tts_model=\"eleven_turbo_v2_5\"\n    )\n    assert provider.stt_model == \"scribe_v2_realtime\"\n    assert provider.tts_model == \"eleven_turbo_v2_5\"\n\n\ndef test_provider_defaults_api_base() -> None:\n    provider = ElevenLabsVoiceProvider(api_key=\"test\")\n    assert provider.api_base == DEFAULT_ELEVENLABS_API_BASE\n\n\ndef test_provider_get_available_voices_returns_copy() -> None:\n    provider = ElevenLabsVoiceProvider(api_key=\"test\")\n    voices = provider.get_available_voices()\n    voices.clear()\n    assert len(provider.get_available_voices()) > 0\n"
  },
  {
    "path": "backend/tests/unit/onyx/voice/providers/test_openai_provider.py",
    "content": "import io\nimport struct\nimport wave\n\nfrom onyx.voice.providers.openai import _create_wav_header\nfrom onyx.voice.providers.openai import _http_to_ws_url\nfrom onyx.voice.providers.openai import OpenAIRealtimeMessageType\nfrom onyx.voice.providers.openai import OpenAIVoiceProvider\n\n\n# --- _http_to_ws_url ---\n\n\ndef test_http_to_ws_url_converts_https_to_wss() -> None:\n    assert _http_to_ws_url(\"https://api.openai.com\") == \"wss://api.openai.com\"\n\n\ndef test_http_to_ws_url_converts_http_to_ws() -> None:\n    assert _http_to_ws_url(\"http://localhost:9090\") == \"ws://localhost:9090\"\n\n\ndef test_http_to_ws_url_passes_through_ws() -> None:\n    assert _http_to_ws_url(\"wss://already.ws\") == \"wss://already.ws\"\n\n\n# --- StrEnum comparison ---\n\n\ndef test_realtime_message_type_compares_as_string() -> None:\n    assert str(OpenAIRealtimeMessageType.ERROR) == \"error\"\n    assert (\n        str(OpenAIRealtimeMessageType.TRANSCRIPTION_DELTA)\n        == \"conversation.item.input_audio_transcription.delta\"\n    )\n    assert isinstance(OpenAIRealtimeMessageType.ERROR, str)\n\n\n# --- _create_wav_header ---\n\n\ndef test_wav_header_is_44_bytes() -> None:\n    assert len(_create_wav_header(1000)) == 44\n\n\ndef test_wav_header_chunk_size_matches_data_length() -> None:\n    data_length = 2000\n    header = _create_wav_header(data_length)\n    chunk_size = struct.unpack_from(\"<I\", header, 4)[0]\n    assert chunk_size == 36 + data_length\n\n\ndef test_wav_header_byte_rate() -> None:\n    header = _create_wav_header(100, sample_rate=24000, channels=1, bits_per_sample=16)\n    byte_rate = struct.unpack_from(\"<I\", header, 28)[0]\n    assert byte_rate == 24000 * 1 * 16 // 8\n\n\ndef test_wav_header_produces_valid_wav() -> None:\n    \"\"\"Header + PCM data should parse as valid WAV.\"\"\"\n    data_length = 100\n    pcm_data = b\"\\x00\" * data_length\n    header = _create_wav_header(data_length, sample_rate=24000)\n\n    with wave.open(io.BytesIO(header + pcm_data), \"rb\") as wav_file:\n        assert wav_file.getnchannels() == 1\n        assert wav_file.getsampwidth() == 2\n        assert wav_file.getframerate() == 24000\n        assert wav_file.getnframes() == data_length // 2\n\n\n# --- Provider Defaults ---\n\n\ndef test_provider_default_models() -> None:\n    provider = OpenAIVoiceProvider(api_key=\"test\")\n    assert provider.stt_model == \"whisper-1\"\n    assert provider.tts_model == \"tts-1\"\n    assert provider.default_voice == \"alloy\"\n\n\ndef test_provider_custom_models() -> None:\n    provider = OpenAIVoiceProvider(\n        api_key=\"test\",\n        stt_model=\"gpt-4o-transcribe\",\n        tts_model=\"tts-1-hd\",\n        default_voice=\"nova\",\n    )\n    assert provider.stt_model == \"gpt-4o-transcribe\"\n    assert provider.tts_model == \"tts-1-hd\"\n    assert provider.default_voice == \"nova\"\n\n\ndef test_provider_get_available_voices_returns_copy() -> None:\n    provider = OpenAIVoiceProvider(api_key=\"test\")\n    voices = provider.get_available_voices()\n    voices.clear()\n    assert len(provider.get_available_voices()) > 0\n"
  },
  {
    "path": "backend/tests/unit/scripts/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/server/metrics/test_celery_task_metrics.py",
    "content": "\"\"\"Tests for generic Celery task lifecycle Prometheus metrics.\"\"\"\n\nfrom collections.abc import Iterator\nfrom unittest.mock import MagicMock\n\nimport pytest\n\nfrom onyx.server.metrics.celery_task_metrics import _task_start_times\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_postrun\nfrom onyx.server.metrics.celery_task_metrics import on_celery_task_prerun\nfrom onyx.server.metrics.celery_task_metrics import TASK_COMPLETED\nfrom onyx.server.metrics.celery_task_metrics import TASK_DURATION\nfrom onyx.server.metrics.celery_task_metrics import TASK_STARTED\nfrom onyx.server.metrics.celery_task_metrics import TASKS_ACTIVE\n\n\n@pytest.fixture(autouse=True)\ndef reset_metrics() -> Iterator[None]:\n    \"\"\"Clear metric state between tests.\"\"\"\n    _task_start_times.clear()\n    yield\n    _task_start_times.clear()\n\n\ndef _make_task(name: str = \"test_task\", queue: str = \"test_queue\") -> MagicMock:\n    task = MagicMock()\n    task.name = name\n    task.request = MagicMock()\n    task.request.delivery_info = {\"routing_key\": queue}\n    return task\n\n\nclass TestCeleryTaskPrerun:\n    def test_increments_started_and_active(self) -> None:\n        task = _make_task()\n        before_started = TASK_STARTED.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n        before_active = TASKS_ACTIVE.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n\n        on_celery_task_prerun(\"task-1\", task)\n\n        after_started = TASK_STARTED.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n        after_active = TASKS_ACTIVE.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n\n        assert after_started == before_started + 1\n        assert after_active == before_active + 1\n\n    def test_records_start_time(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n        assert \"task-1\" in _task_start_times\n\n    def test_noop_when_task_is_none(self) -> None:\n        on_celery_task_prerun(\"task-1\", None)\n        assert \"task-1\" not in _task_start_times\n\n    def test_noop_when_task_id_is_none(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(None, task)\n        # Should not crash\n\n    def test_handles_missing_delivery_info(self) -> None:\n        task = _make_task()\n        task.request.delivery_info = None\n        on_celery_task_prerun(\"task-1\", task)\n        assert \"task-1\" in _task_start_times\n\n\nclass TestCeleryTaskPostrun:\n    def test_increments_completed_success(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n\n        before = TASK_COMPLETED.labels(\n            task_name=\"test_task\", queue=\"test_queue\", outcome=\"success\"\n        )._value.get()\n\n        on_celery_task_postrun(\"task-1\", task, \"SUCCESS\")\n\n        after = TASK_COMPLETED.labels(\n            task_name=\"test_task\", queue=\"test_queue\", outcome=\"success\"\n        )._value.get()\n        assert after == before + 1\n\n    def test_increments_completed_failure(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n\n        before = TASK_COMPLETED.labels(\n            task_name=\"test_task\", queue=\"test_queue\", outcome=\"failure\"\n        )._value.get()\n\n        on_celery_task_postrun(\"task-1\", task, \"FAILURE\")\n\n        after = TASK_COMPLETED.labels(\n            task_name=\"test_task\", queue=\"test_queue\", outcome=\"failure\"\n        )._value.get()\n        assert after == before + 1\n\n    def test_decrements_active(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n\n        active_before = TASKS_ACTIVE.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n\n        on_celery_task_postrun(\"task-1\", task, \"SUCCESS\")\n\n        active_after = TASKS_ACTIVE.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._value.get()\n        assert active_after == active_before - 1\n\n    def test_observes_duration(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n\n        before_count = TASK_DURATION.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._sum.get()\n\n        on_celery_task_postrun(\"task-1\", task, \"SUCCESS\")\n\n        after_count = TASK_DURATION.labels(\n            task_name=\"test_task\", queue=\"test_queue\"\n        )._sum.get()\n        # Duration should have increased (at least slightly)\n        assert after_count > before_count\n\n    def test_cleans_up_start_time(self) -> None:\n        task = _make_task()\n        on_celery_task_prerun(\"task-1\", task)\n        assert \"task-1\" in _task_start_times\n\n        on_celery_task_postrun(\"task-1\", task, \"SUCCESS\")\n        assert \"task-1\" not in _task_start_times\n\n    def test_noop_when_task_is_none(self) -> None:\n        on_celery_task_postrun(\"task-1\", None, \"SUCCESS\")\n\n    def test_handles_missing_start_time(self) -> None:\n        \"\"\"Postrun without prerun should not crash.\"\"\"\n        task = _make_task()\n        on_celery_task_postrun(\"task-1\", task, \"SUCCESS\")\n        # Should not raise\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py",
    "content": "\"\"\"Tests for indexing pipeline Prometheus collectors.\"\"\"\n\nfrom collections.abc import Iterator\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector\nfrom onyx.server.metrics.indexing_pipeline import IndexAttemptCollector\nfrom onyx.server.metrics.indexing_pipeline import QueueDepthCollector\n\n\n@pytest.fixture(autouse=True)\ndef _mock_broker_client() -> Iterator[None]:\n    \"\"\"Patch celery_get_broker_client for all collector tests.\"\"\"\n    with patch(\n        \"onyx.background.celery.celery_redis.celery_get_broker_client\",\n        return_value=MagicMock(),\n    ):\n        yield\n\n\nclass TestQueueDepthCollector:\n    def test_returns_empty_when_factory_not_set(self) -> None:\n        collector = QueueDepthCollector()\n        assert collector.collect() == []\n\n    def test_returns_empty_describe(self) -> None:\n        collector = QueueDepthCollector()\n        assert collector.describe() == []\n\n    def test_collects_queue_depths(self) -> None:\n        collector = QueueDepthCollector(cache_ttl=0)\n        collector.set_celery_app(MagicMock())\n\n        with (\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n                return_value=5,\n            ),\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids\",\n                return_value={\"task-1\", \"task-2\"},\n            ),\n        ):\n            families = collector.collect()\n\n        assert len(families) == 3\n        depth_family = families[0]\n        unacked_family = families[1]\n        age_family = families[2]\n\n        assert depth_family.name == \"onyx_queue_depth\"\n        assert len(depth_family.samples) > 0\n        for sample in depth_family.samples:\n            assert sample.value == 5\n\n        assert unacked_family.name == \"onyx_queue_unacked\"\n        unacked_labels = {s.labels[\"queue\"] for s in unacked_family.samples}\n        assert \"docfetching\" in unacked_labels\n        assert \"docprocessing\" in unacked_labels\n\n        assert age_family.name == \"onyx_queue_oldest_task_age_seconds\"\n        for sample in unacked_family.samples:\n            assert sample.value == 2\n\n    def test_handles_redis_error_gracefully(self) -> None:\n        collector = QueueDepthCollector(cache_ttl=0)\n        MagicMock()\n        collector.set_celery_app(MagicMock())\n\n        with patch(\n            \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n            side_effect=Exception(\"connection lost\"),\n        ):\n            families = collector.collect()\n\n        # Returns stale cache (empty on first call)\n        assert families == []\n\n    def test_caching_returns_stale_within_ttl(self) -> None:\n        collector = QueueDepthCollector(cache_ttl=60)\n        MagicMock()\n        collector.set_celery_app(MagicMock())\n\n        with (\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n                return_value=5,\n            ),\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids\",\n                return_value=set(),\n            ),\n        ):\n            first = collector.collect()\n\n        # Second call within TTL should return cached result without calling Redis\n        with patch(\n            \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n            side_effect=Exception(\"should not be called\"),\n        ):\n            second = collector.collect()\n\n        assert first is second  # Same object, from cache\n\n    def test_error_returns_stale_cache(self) -> None:\n        collector = QueueDepthCollector(cache_ttl=0)\n        MagicMock()\n        collector.set_celery_app(MagicMock())\n\n        # First call succeeds\n        with (\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n                return_value=10,\n            ),\n            patch(\n                \"onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids\",\n                return_value=set(),\n            ),\n        ):\n            good_result = collector.collect()\n\n        assert len(good_result) == 3\n        assert good_result[0].samples[0].value == 10\n\n        # Second call fails — should return stale cache, not empty\n        with patch(\n            \"onyx.server.metrics.indexing_pipeline.celery_get_queue_length\",\n            side_effect=Exception(\"Redis down\"),\n        ):\n            stale_result = collector.collect()\n\n        assert stale_result is good_result\n\n\nclass TestIndexAttemptCollector:\n    def test_returns_empty_when_not_configured(self) -> None:\n        collector = IndexAttemptCollector()\n        assert collector.collect() == []\n\n    def test_returns_empty_describe(self) -> None:\n        collector = IndexAttemptCollector()\n        assert collector.describe() == []\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    @patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\")\n    def test_collects_index_attempts(\n        self,\n        mock_get_session: MagicMock,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = IndexAttemptCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.return_value = [\"public\"]\n\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        from onyx.db.enums import IndexingStatus\n\n        mock_row = (\n            IndexingStatus.IN_PROGRESS,\n            MagicMock(value=\"web\"),\n            81,\n            \"Table Tennis Blade Guide\",\n            2,\n        )\n        mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [\n            mock_row\n        ]\n\n        families = collector.collect()\n        assert len(families) == 1\n        assert families[0].name == \"onyx_index_attempts_active\"\n        assert len(families[0].samples) == 1\n        sample = families[0].samples[0]\n        assert sample.labels == {\n            \"status\": \"in_progress\",\n            \"source\": \"web\",\n            \"tenant_id\": \"public\",\n            \"connector_name\": \"Table Tennis Blade Guide\",\n            \"cc_pair_id\": \"81\",\n        }\n        assert sample.value == 2\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    def test_handles_db_error_gracefully(\n        self,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = IndexAttemptCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.side_effect = Exception(\"DB down\")\n        families = collector.collect()\n        # No stale cache, so returns empty\n        assert families == []\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    def test_skips_none_tenant_ids(\n        self,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = IndexAttemptCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.return_value = [None]\n        families = collector.collect()\n        assert len(families) == 1  # Returns the gauge family, just with no samples\n        assert len(families[0].samples) == 0\n\n\nclass TestConnectorHealthCollector:\n    def test_returns_empty_when_not_configured(self) -> None:\n        collector = ConnectorHealthCollector()\n        assert collector.collect() == []\n\n    def test_returns_empty_describe(self) -> None:\n        collector = ConnectorHealthCollector()\n        assert collector.describe() == []\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    @patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\")\n    def test_collects_connector_health(\n        self,\n        mock_get_session: MagicMock,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = ConnectorHealthCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.return_value = [\"public\"]\n\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        now = datetime.now(tz=timezone.utc)\n        last_success = now - timedelta(hours=2)\n\n        mock_status = MagicMock(value=\"ACTIVE\")\n        mock_source = MagicMock(value=\"google_drive\")\n        # Row: (id, status, in_error, last_success, name, source)\n        mock_row = (\n            42,\n            mock_status,\n            True,  # in_repeated_error_state\n            last_success,\n            \"My GDrive Connector\",\n            mock_source,\n        )\n        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]\n\n        # Mock the index attempt queries (error counts + docs counts)\n        mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (\n            []\n        )\n\n        families = collector.collect()\n\n        assert len(families) == 6\n        names = {f.name for f in families}\n        assert names == {\n            \"onyx_connector_last_success_age_seconds\",\n            \"onyx_connector_in_error_state\",\n            \"onyx_connectors_by_status\",\n            \"onyx_connectors_in_error_total\",\n            \"onyx_connector_docs_indexed\",\n            \"onyx_connector_error_count\",\n        }\n\n        staleness = next(\n            f for f in families if f.name == \"onyx_connector_last_success_age_seconds\"\n        )\n        assert len(staleness.samples) == 1\n        assert staleness.samples[0].value == pytest.approx(7200, abs=5)\n\n        error_state = next(\n            f for f in families if f.name == \"onyx_connector_in_error_state\"\n        )\n        assert error_state.samples[0].value == 1.0\n\n        by_status = next(f for f in families if f.name == \"onyx_connectors_by_status\")\n        assert by_status.samples[0].labels == {\n            \"tenant_id\": \"public\",\n            \"status\": \"ACTIVE\",\n        }\n        assert by_status.samples[0].value == 1\n\n        error_total = next(\n            f for f in families if f.name == \"onyx_connectors_in_error_total\"\n        )\n        assert error_total.samples[0].value == 1\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    @patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\")\n    def test_skips_staleness_when_no_last_success(\n        self,\n        mock_get_session: MagicMock,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = ConnectorHealthCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.return_value = [\"public\"]\n\n        mock_session = MagicMock()\n        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)\n        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)\n\n        mock_status = MagicMock(value=\"INITIAL_INDEXING\")\n        mock_source = MagicMock(value=\"slack\")\n        mock_row = (\n            10,\n            mock_status,\n            False,\n            None,  # no last_successful_index_time\n            0,\n            mock_source,\n        )\n        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]\n\n        families = collector.collect()\n\n        staleness = next(\n            f for f in families if f.name == \"onyx_connector_last_success_age_seconds\"\n        )\n        assert len(staleness.samples) == 0\n\n    @patch(\"onyx.db.engine.tenant_utils.get_all_tenant_ids\")\n    def test_handles_db_error_gracefully(\n        self,\n        mock_get_tenants: MagicMock,\n    ) -> None:\n        collector = ConnectorHealthCollector(cache_ttl=0)\n        collector.configure()\n\n        mock_get_tenants.side_effect = Exception(\"DB down\")\n        families = collector.collect()\n        assert families == []\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_indexing_pipeline_setup.py",
    "content": "\"\"\"Tests for indexing pipeline setup.\"\"\"\n\nfrom unittest.mock import MagicMock\n\nfrom onyx.server.metrics.indexing_pipeline import QueueDepthCollector\nfrom onyx.server.metrics.indexing_pipeline import RedisHealthCollector\n\n\nclass TestCollectorCeleryAppSetup:\n    def test_queue_depth_collector_uses_celery_app(self) -> None:\n        \"\"\"QueueDepthCollector.set_celery_app stores the app for broker access.\"\"\"\n        collector = QueueDepthCollector()\n        mock_app = MagicMock()\n        collector.set_celery_app(mock_app)\n        assert collector._celery_app is mock_app\n\n    def test_redis_health_collector_uses_celery_app(self) -> None:\n        \"\"\"RedisHealthCollector.set_celery_app stores the app for broker access.\"\"\"\n        collector = RedisHealthCollector()\n        mock_app = MagicMock()\n        collector.set_celery_app(mock_app)\n        assert collector._celery_app is mock_app\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_indexing_task_metrics.py",
    "content": "\"\"\"Tests for per-connector indexing task Prometheus metrics.\"\"\"\n\nfrom collections.abc import Iterator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.metrics.indexing_task_metrics import _connector_cache\nfrom onyx.server.metrics.indexing_task_metrics import _indexing_start_times\nfrom onyx.server.metrics.indexing_task_metrics import ConnectorInfo\nfrom onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_COMPLETED\nfrom onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_DURATION\nfrom onyx.server.metrics.indexing_task_metrics import INDEXING_TASK_STARTED\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun\nfrom onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun\n\n\n@pytest.fixture(autouse=True)\ndef reset_state() -> Iterator[None]:\n    \"\"\"Clear caches and state between tests.\n\n    Sets CURRENT_TENANT_ID_CONTEXTVAR to a realistic value so cache keys\n    are never keyed on an empty string.\n    \"\"\"\n    from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n    token = CURRENT_TENANT_ID_CONTEXTVAR.set(\"test_tenant\")\n    _connector_cache.clear()\n    _indexing_start_times.clear()\n    yield\n    _connector_cache.clear()\n    _indexing_start_times.clear()\n    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n\ndef _make_task(name: str) -> MagicMock:\n    task = MagicMock()\n    task.name = name\n    return task\n\n\ndef _mock_db_lookup(\n    source: str = \"google_drive\", name: str = \"My Google Drive\"\n) -> tuple:\n    \"\"\"Return (session_patch, cc_pair_patch) context managers for DB mocking.\"\"\"\n    mock_cc_pair = MagicMock()\n    mock_cc_pair.name = name\n    mock_cc_pair.connector.source.value = source\n\n    session_patch = patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\")\n    cc_pair_patch = patch(\n        \"onyx.db.connector_credential_pair.get_connector_credential_pair_from_id\",\n        return_value=mock_cc_pair,\n    )\n    return session_patch, cc_pair_patch\n\n\nclass TestIndexingTaskPrerun:\n    def test_skips_non_indexing_task(self) -> None:\n        task = _make_task(\"some_other_task\")\n        kwargs = {\"cc_pair_id\": 1, \"tenant_id\": \"public\"}\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n        assert \"task-1\" not in _indexing_start_times\n\n    def test_emits_started_for_docfetching(self) -> None:\n        # Pre-populate cache to avoid DB lookup (tenant-scoped key)\n        _connector_cache[(\"test_tenant\", 42)] = ConnectorInfo(\n            source=\"google_drive\", name=\"My Google Drive\"\n        )\n\n        task = _make_task(\"connector_doc_fetching_task\")\n        kwargs = {\"cc_pair_id\": 42, \"tenant_id\": \"tenant-1\"}\n\n        before = INDEXING_TASK_STARTED.labels(\n            task_name=\"connector_doc_fetching_task\",\n            source=\"google_drive\",\n            tenant_id=\"tenant-1\",\n            cc_pair_id=\"42\",\n        )._value.get()\n\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n\n        after = INDEXING_TASK_STARTED.labels(\n            task_name=\"connector_doc_fetching_task\",\n            source=\"google_drive\",\n            tenant_id=\"tenant-1\",\n            cc_pair_id=\"42\",\n        )._value.get()\n\n        assert after == before + 1\n        assert \"task-1\" in _indexing_start_times\n\n    def test_emits_started_for_docprocessing(self) -> None:\n        _connector_cache[(\"test_tenant\", 10)] = ConnectorInfo(\n            source=\"slack\", name=\"Slack Connector\"\n        )\n\n        task = _make_task(\"docprocessing_task\")\n        kwargs = {\"cc_pair_id\": 10, \"tenant_id\": \"public\"}\n\n        on_indexing_task_prerun(\"task-2\", task, kwargs)\n        assert \"task-2\" in _indexing_start_times\n\n    def test_cache_hit_avoids_db_call(self) -> None:\n        _connector_cache[(\"test_tenant\", 42)] = ConnectorInfo(\n            source=\"confluence\", name=\"Engineering Confluence\"\n        )\n\n        task = _make_task(\"connector_doc_fetching_task\")\n        kwargs = {\"cc_pair_id\": 42, \"tenant_id\": \"public\"}\n\n        # No DB patches needed — cache should be used\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n        assert \"task-1\" in _indexing_start_times\n\n    def test_db_lookup_on_cache_miss(self) -> None:\n        \"\"\"On first encounter of a cc_pair_id, does a DB lookup and caches.\"\"\"\n        mock_cc_pair = MagicMock()\n        mock_cc_pair.name = \"Notion Workspace\"\n        mock_cc_pair.connector.source.value = \"notion\"\n\n        mock_session = MagicMock()\n        mock_session.__enter__ = MagicMock(return_value=MagicMock())\n        mock_session.__exit__ = MagicMock(return_value=False)\n\n        with (\n            patch(\n                \"onyx.server.metrics.indexing_task_metrics._resolve_connector\"\n            ) as mock_resolve,\n        ):\n            mock_resolve.return_value = ConnectorInfo(\n                source=\"notion\", name=\"Notion Workspace\"\n            )\n\n            task = _make_task(\"connector_doc_fetching_task\")\n            kwargs = {\"cc_pair_id\": 77, \"tenant_id\": \"public\"}\n\n            on_indexing_task_prerun(\"task-1\", task, kwargs)\n            mock_resolve.assert_called_once_with(77)\n\n    def test_missing_cc_pair_returns_unknown(self) -> None:\n        \"\"\"When _resolve_connector can't find the cc_pair, uses 'unknown'.\"\"\"\n        with patch(\n            \"onyx.server.metrics.indexing_task_metrics._resolve_connector\"\n        ) as mock_resolve:\n            mock_resolve.return_value = ConnectorInfo(source=\"unknown\", name=\"unknown\")\n\n            task = _make_task(\"connector_doc_fetching_task\")\n            kwargs = {\"cc_pair_id\": 999, \"tenant_id\": \"public\"}\n\n            on_indexing_task_prerun(\"task-1\", task, kwargs)\n            assert \"task-1\" in _indexing_start_times\n\n    def test_skips_when_cc_pair_id_missing(self) -> None:\n        task = _make_task(\"connector_doc_fetching_task\")\n        kwargs = {\"tenant_id\": \"public\"}\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n        assert \"task-1\" not in _indexing_start_times\n\n    def test_db_error_does_not_crash(self) -> None:\n        with patch(\n            \"onyx.server.metrics.indexing_task_metrics._resolve_connector\",\n            side_effect=Exception(\"DB down\"),\n        ):\n            task = _make_task(\"connector_doc_fetching_task\")\n            kwargs = {\"cc_pair_id\": 1, \"tenant_id\": \"public\"}\n            # Should not raise\n            on_indexing_task_prerun(\"task-1\", task, kwargs)\n\n\nclass TestIndexingTaskPostrun:\n    def test_skips_non_indexing_task(self) -> None:\n        task = _make_task(\"some_other_task\")\n        kwargs = {\"cc_pair_id\": 1, \"tenant_id\": \"public\"}\n        on_indexing_task_postrun(\"task-1\", task, kwargs, \"SUCCESS\")\n        # Should not raise\n\n    def test_emits_completed_and_duration(self) -> None:\n        _connector_cache[(\"test_tenant\", 42)] = ConnectorInfo(\n            source=\"google_drive\", name=\"Marketing Drive\"\n        )\n\n        task = _make_task(\"docprocessing_task\")\n        kwargs = {\"cc_pair_id\": 42, \"tenant_id\": \"public\"}\n\n        # Simulate prerun\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n\n        before_completed = INDEXING_TASK_COMPLETED.labels(\n            task_name=\"docprocessing_task\",\n            source=\"google_drive\",\n            tenant_id=\"public\",\n            cc_pair_id=\"42\",\n            outcome=\"success\",\n        )._value.get()\n\n        before_duration = INDEXING_TASK_DURATION.labels(\n            task_name=\"docprocessing_task\",\n            source=\"google_drive\",\n            tenant_id=\"public\",\n        )._sum.get()\n\n        on_indexing_task_postrun(\"task-1\", task, kwargs, \"SUCCESS\")\n\n        after_completed = INDEXING_TASK_COMPLETED.labels(\n            task_name=\"docprocessing_task\",\n            source=\"google_drive\",\n            tenant_id=\"public\",\n            cc_pair_id=\"42\",\n            outcome=\"success\",\n        )._value.get()\n\n        after_duration = INDEXING_TASK_DURATION.labels(\n            task_name=\"docprocessing_task\",\n            source=\"google_drive\",\n            tenant_id=\"public\",\n        )._sum.get()\n\n        assert after_completed == before_completed + 1\n        assert after_duration > before_duration\n\n    def test_failure_outcome(self) -> None:\n        _connector_cache[(\"test_tenant\", 42)] = ConnectorInfo(\n            source=\"slack\", name=\"Slack\"\n        )\n\n        task = _make_task(\"connector_doc_fetching_task\")\n        kwargs = {\"cc_pair_id\": 42, \"tenant_id\": \"public\"}\n\n        on_indexing_task_prerun(\"task-1\", task, kwargs)\n\n        before = INDEXING_TASK_COMPLETED.labels(\n            task_name=\"connector_doc_fetching_task\",\n            source=\"slack\",\n            tenant_id=\"public\",\n            cc_pair_id=\"42\",\n            outcome=\"failure\",\n        )._value.get()\n\n        on_indexing_task_postrun(\"task-1\", task, kwargs, \"FAILURE\")\n\n        after = INDEXING_TASK_COMPLETED.labels(\n            task_name=\"connector_doc_fetching_task\",\n            source=\"slack\",\n            tenant_id=\"public\",\n            cc_pair_id=\"42\",\n            outcome=\"failure\",\n        )._value.get()\n\n        assert after == before + 1\n\n    def test_handles_postrun_without_prerun(self) -> None:\n        \"\"\"Postrun for an indexing task without a matching prerun should not crash.\"\"\"\n        _connector_cache[(\"test_tenant\", 42)] = ConnectorInfo(\n            source=\"slack\", name=\"Slack\"\n        )\n\n        task = _make_task(\"docprocessing_task\")\n        kwargs = {\"cc_pair_id\": 42, \"tenant_id\": \"public\"}\n\n        # No prerun — should still emit completed counter, just skip duration\n        on_indexing_task_postrun(\"task-1\", task, kwargs, \"SUCCESS\")\n\n\nclass TestResolveConnector:\n    def test_failed_lookup_not_cached(self) -> None:\n        \"\"\"When DB lookup returns None, result should NOT be cached.\"\"\"\n        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(\"test-tenant\")\n        try:\n            with (\n                patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\"),\n                patch(\n                    \"onyx.db.connector_credential_pair\"\n                    \".get_connector_credential_pair_from_id\",\n                    return_value=None,\n                ),\n            ):\n                from onyx.server.metrics.indexing_task_metrics import _resolve_connector\n\n                result = _resolve_connector(999)\n                assert result.source == \"unknown\"\n                # Should NOT be cached so subsequent calls can retry\n                assert (\"test-tenant\", 999) not in _connector_cache\n        finally:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n    def test_exception_not_cached(self) -> None:\n        \"\"\"When DB lookup raises, result should NOT be cached.\"\"\"\n        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(\"test-tenant\")\n        try:\n            with (\n                patch(\n                    \"onyx.db.engine.sql_engine.get_session_with_current_tenant\",\n                    side_effect=Exception(\"DB down\"),\n                ),\n            ):\n                from onyx.server.metrics.indexing_task_metrics import _resolve_connector\n\n                result = _resolve_connector(888)\n                assert result.source == \"unknown\"\n                assert (\"test-tenant\", 888) not in _connector_cache\n        finally:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n\n    def test_successful_lookup_is_cached(self) -> None:\n        \"\"\"When DB lookup succeeds, result should be cached.\"\"\"\n        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR\n\n        token = CURRENT_TENANT_ID_CONTEXTVAR.set(\"test-tenant\")\n        try:\n            mock_cc_pair = MagicMock()\n            mock_cc_pair.name = \"My Drive\"\n            mock_cc_pair.connector.source.value = \"google_drive\"\n\n            with (\n                patch(\"onyx.db.engine.sql_engine.get_session_with_current_tenant\"),\n                patch(\n                    \"onyx.db.connector_credential_pair\"\n                    \".get_connector_credential_pair_from_id\",\n                    return_value=mock_cc_pair,\n                ),\n            ):\n                from onyx.server.metrics.indexing_task_metrics import _resolve_connector\n\n                result = _resolve_connector(777)\n                assert result.source == \"google_drive\"\n                assert result.name == \"My Drive\"\n                assert (\"test-tenant\", 777) in _connector_cache\n        finally:\n            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_metrics_server.py",
    "content": "\"\"\"Tests for the Prometheus metrics server module.\"\"\"\n\nfrom collections.abc import Iterator\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.server.metrics.metrics_server import _DEFAULT_PORTS\nfrom onyx.server.metrics.metrics_server import start_metrics_server\n\n\n@pytest.fixture(autouse=True)\ndef reset_server_state() -> Iterator[None]:\n    \"\"\"Reset the global _server_started between tests.\"\"\"\n    import onyx.server.metrics.metrics_server as mod\n\n    mod._server_started = False\n    yield\n    mod._server_started = False\n\n\nclass TestStartMetricsServer:\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    def test_uses_default_port_for_known_worker(self, mock_start: MagicMock) -> None:\n        port = start_metrics_server(\"monitoring\")\n        assert port == _DEFAULT_PORTS[\"monitoring\"]\n        mock_start.assert_called_once_with(_DEFAULT_PORTS[\"monitoring\"])\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    @patch.dict(\"os.environ\", {\"PROMETHEUS_METRICS_PORT\": \"9999\"})\n    def test_env_var_overrides_default(self, mock_start: MagicMock) -> None:\n        port = start_metrics_server(\"monitoring\")\n        assert port == 9999\n        mock_start.assert_called_once_with(9999)\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    @patch.dict(\"os.environ\", {\"PROMETHEUS_METRICS_ENABLED\": \"false\"})\n    def test_disabled_via_env_var(self, mock_start: MagicMock) -> None:\n        port = start_metrics_server(\"monitoring\")\n        assert port is None\n        mock_start.assert_not_called()\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    def test_unknown_worker_type_no_env_var(self, mock_start: MagicMock) -> None:\n        port = start_metrics_server(\"unknown_worker\")\n        assert port is None\n        mock_start.assert_not_called()\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    def test_idempotent(self, mock_start: MagicMock) -> None:\n        port1 = start_metrics_server(\"monitoring\")\n        port2 = start_metrics_server(\"monitoring\")\n        assert port1 == _DEFAULT_PORTS[\"monitoring\"]\n        assert port2 is None\n        mock_start.assert_called_once()\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    def test_handles_os_error(self, mock_start: MagicMock) -> None:\n        mock_start.side_effect = OSError(\"Address already in use\")\n        port = start_metrics_server(\"monitoring\")\n        assert port is None\n\n    @patch(\"onyx.server.metrics.metrics_server.start_http_server\")\n    @patch.dict(\"os.environ\", {\"PROMETHEUS_METRICS_PORT\": \"not_a_number\"})\n    def test_invalid_port_env_var_returns_none(self, mock_start: MagicMock) -> None:\n        port = start_metrics_server(\"monitoring\")\n        assert port is None\n        mock_start.assert_not_called()\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_opensearch_search_metrics.py",
    "content": "\"\"\"Tests for OpenSearch search Prometheus metrics.\"\"\"\n\nfrom unittest.mock import patch\n\nfrom onyx.document_index.opensearch.constants import OpenSearchSearchType\nfrom onyx.server.metrics.opensearch_search import _client_duration\nfrom onyx.server.metrics.opensearch_search import _search_total\nfrom onyx.server.metrics.opensearch_search import _searches_in_progress\nfrom onyx.server.metrics.opensearch_search import _server_duration\nfrom onyx.server.metrics.opensearch_search import observe_opensearch_search\nfrom onyx.server.metrics.opensearch_search import track_opensearch_search_in_progress\n\n\nclass TestObserveOpenSearchSearch:\n    def test_increments_counter(self) -> None:\n        search_type = OpenSearchSearchType.HYBRID\n        before = _search_total.labels(search_type=search_type.value)._value.get()\n        observe_opensearch_search(search_type, 0.1, 50)\n        after = _search_total.labels(search_type=search_type.value)._value.get()\n        assert after == before + 1\n\n    def test_observes_client_duration(self) -> None:\n        search_type = OpenSearchSearchType.KEYWORD\n        before_sum = _client_duration.labels(search_type=search_type.value)._sum.get()\n        observe_opensearch_search(search_type, 0.25, 100)\n        after_sum = _client_duration.labels(search_type=search_type.value)._sum.get()\n        assert after_sum == before_sum + 0.25\n\n    def test_observes_server_duration(self) -> None:\n        search_type = OpenSearchSearchType.SEMANTIC\n        before_sum = _server_duration.labels(search_type=search_type.value)._sum.get()\n        observe_opensearch_search(search_type, 0.3, 200)\n        after_sum = _server_duration.labels(search_type=search_type.value)._sum.get()\n        # 200ms should be recorded as 0.2s.\n        assert after_sum == before_sum + 0.2\n\n    def test_server_took_none_skips_server_histogram(self) -> None:\n        search_type = OpenSearchSearchType.UNKNOWN\n        before_server = _server_duration.labels(\n            search_type=search_type.value\n        )._sum.get()\n        before_client = _client_duration.labels(\n            search_type=search_type.value\n        )._sum.get()\n        before_total = _search_total.labels(search_type=search_type.value)._value.get()\n\n        observe_opensearch_search(search_type, 0.1, None)\n\n        # Server histogram should NOT be observed.\n        after_server = _server_duration.labels(search_type=search_type.value)._sum.get()\n        assert after_server == before_server\n\n        # Client histogram and counter should still work.\n        after_client = _client_duration.labels(search_type=search_type.value)._sum.get()\n        after_total = _search_total.labels(search_type=search_type.value)._value.get()\n        assert after_client == before_client + 0.1\n        assert after_total == before_total + 1\n\n    def test_exceptions_do_not_propagate(self) -> None:\n        search_type = OpenSearchSearchType.RANDOM\n        with patch.object(\n            _search_total.labels(search_type=search_type.value),\n            \"inc\",\n            side_effect=RuntimeError(\"boom\"),\n        ):\n            # Should not raise.\n            observe_opensearch_search(search_type, 0.1, 50)\n\n\nclass TestTrackOpenSearchSearchInProgress:\n    def test_gauge_increments_and_decrements(self) -> None:\n        search_type = OpenSearchSearchType.HYBRID\n        before = _searches_in_progress.labels(\n            search_type=search_type.value\n        )._value.get()\n\n        with track_opensearch_search_in_progress(search_type):\n            during = _searches_in_progress.labels(\n                search_type=search_type.value\n            )._value.get()\n            assert during == before + 1\n\n        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()\n        assert after == before\n\n    def test_gauge_decrements_on_exception(self) -> None:\n        search_type = OpenSearchSearchType.SEMANTIC\n        before = _searches_in_progress.labels(\n            search_type=search_type.value\n        )._value.get()\n\n        raised = False\n        try:\n            with track_opensearch_search_in_progress(search_type):\n                raise ValueError(\"simulated search failure\")\n        except ValueError:\n            raised = True\n        assert raised\n\n        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()\n        assert after == before\n\n    def test_inc_exception_does_not_break_search(self) -> None:\n        search_type = OpenSearchSearchType.KEYWORD\n        before = _searches_in_progress.labels(\n            search_type=search_type.value\n        )._value.get()\n\n        with patch.object(\n            _searches_in_progress.labels(search_type=search_type.value),\n            \"inc\",\n            side_effect=RuntimeError(\"boom\"),\n        ):\n            # Context manager should still yield without decrementing.\n            with track_opensearch_search_in_progress(search_type):\n                # Search logic would execute here.\n                during = _searches_in_progress.labels(\n                    search_type=search_type.value\n                )._value.get()\n                assert during == before\n\n        after = _searches_in_progress.labels(search_type=search_type.value)._value.get()\n        assert after == before\n"
  },
  {
    "path": "backend/tests/unit/server/metrics/test_worker_health.py",
    "content": "\"\"\"Tests for WorkerHeartbeatMonitor and WorkerHealthCollector.\"\"\"\n\nimport time\nfrom unittest.mock import MagicMock\n\nfrom onyx.server.metrics.indexing_pipeline import WorkerHealthCollector\nfrom onyx.server.metrics.indexing_pipeline import WorkerHeartbeatMonitor\n\n\nclass TestWorkerHeartbeatMonitor:\n    def test_heartbeat_registers_worker(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n\n        status = monitor.get_worker_status()\n        assert \"primary@host1\" in status\n        assert status[\"primary@host1\"] is True\n\n    def test_multiple_workers(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        monitor._on_heartbeat({\"hostname\": \"docfetching@host1\"})\n        monitor._on_heartbeat({\"hostname\": \"monitoring@host1\"})\n\n        status = monitor.get_worker_status()\n        assert len(status) == 3\n        assert all(alive for alive in status.values())\n\n    def test_offline_removes_worker(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        monitor._on_offline({\"hostname\": \"primary@host1\"})\n\n        status = monitor.get_worker_status()\n        assert \"primary@host1\" not in status\n\n    def test_stale_heartbeat_marks_worker_down(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        with monitor._lock:\n            monitor._worker_last_seen[\"primary@host1\"] = (\n                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10\n            )\n\n        status = monitor.get_worker_status()\n        assert status[\"primary@host1\"] is False\n\n    def test_very_stale_worker_is_pruned(self) -> None:\n        \"\"\"Workers dead for 2x the timeout are pruned from the dict.\"\"\"\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        with monitor._lock:\n            monitor._worker_last_seen[\"gone@host1\"] = (\n                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS * 2 - 10\n            )\n\n        status = monitor.get_worker_status()\n        assert \"gone@host1\" not in status\n        assert monitor.get_worker_status() == {}\n\n    def test_heartbeat_refreshes_stale_worker(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        with monitor._lock:\n            monitor._worker_last_seen[\"primary@host1\"] = (\n                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10\n            )\n        assert monitor.get_worker_status()[\"primary@host1\"] is False\n\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        assert monitor.get_worker_status()[\"primary@host1\"] is True\n\n    def test_ignores_empty_hostname(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({})\n        monitor._on_heartbeat({\"hostname\": \"\"})\n        monitor._on_offline({})\n\n        assert monitor.get_worker_status() == {}\n\n    def test_returns_full_hostname_as_key(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"docprocessing@my-long-host.local\"})\n\n        status = monitor.get_worker_status()\n        assert \"docprocessing@my-long-host.local\" in status\n\n    def test_start_is_idempotent(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        # Mock the thread so we don't actually start one\n        mock_thread = MagicMock()\n        mock_thread.is_alive.return_value = True\n        monitor._thread = mock_thread\n        monitor._running = True\n\n        # Second start should be a no-op\n        monitor.start()\n        # Thread constructor should not have been called again\n        assert monitor._thread is mock_thread\n\n    def test_thread_safety(self) -> None:\n        \"\"\"get_worker_status should not raise even if heartbeats arrive concurrently.\"\"\"\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        status = monitor.get_worker_status()\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        status2 = monitor.get_worker_status()\n        assert status == status2\n\n\nclass TestWorkerHealthCollector:\n    def test_returns_empty_when_no_monitor(self) -> None:\n        collector = WorkerHealthCollector(cache_ttl=0)\n        assert collector.collect() == []\n\n    def test_collects_active_workers(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        monitor._on_heartbeat({\"hostname\": \"docfetching@host1\"})\n        monitor._on_heartbeat({\"hostname\": \"monitoring@host1\"})\n\n        collector = WorkerHealthCollector(cache_ttl=0)\n        collector.set_monitor(monitor)\n\n        families = collector.collect()\n        assert len(families) == 2\n\n        active = families[0]\n        assert active.name == \"onyx_celery_active_worker_count\"\n        assert active.samples[0].value == 3\n\n        up = families[1]\n        assert up.name == \"onyx_celery_worker_up\"\n        assert len(up.samples) == 3\n        # Labels use short names (before @)\n        labels = {s.labels[\"worker\"] for s in up.samples}\n        assert labels == {\"primary\", \"docfetching\", \"monitoring\"}\n        for sample in up.samples:\n            assert sample.value == 1\n\n    def test_reports_dead_worker(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n        monitor._on_heartbeat({\"hostname\": \"primary@host1\"})\n        with monitor._lock:\n            monitor._worker_last_seen[\"monitoring@host1\"] = (\n                time.monotonic() - monitor._HEARTBEAT_TIMEOUT_SECONDS - 10\n            )\n\n        collector = WorkerHealthCollector(cache_ttl=0)\n        collector.set_monitor(monitor)\n\n        families = collector.collect()\n        active = families[0]\n        assert active.samples[0].value == 1\n\n        up = families[1]\n        samples_by_name = {s.labels[\"worker\"]: s.value for s in up.samples}\n        assert samples_by_name[\"primary\"] == 1\n        assert samples_by_name[\"monitoring\"] == 0\n\n    def test_empty_monitor_returns_zero(self) -> None:\n        monitor = WorkerHeartbeatMonitor(MagicMock())\n\n        collector = WorkerHealthCollector(cache_ttl=0)\n        collector.set_monitor(monitor)\n\n        families = collector.collect()\n        assert len(families) == 2\n        active = families[0]\n        assert active.samples[0].value == 0\n        up = families[1]\n        assert up.name == \"onyx_celery_worker_up\"\n        assert len(up.samples) == 0\n"
  },
  {
    "path": "backend/tests/unit/tools/__init__.py",
    "content": ""
  },
  {
    "path": "backend/tests/unit/tools/test_memory_tool_packets.py",
    "content": "\"\"\"Tests for memory tool streaming packet emissions.\"\"\"\n\nimport queue\nfrom unittest.mock import MagicMock\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom onyx.chat.emitter import Emitter\nfrom onyx.server.query_and_chat.placement import Placement\nfrom onyx.server.query_and_chat.session_loading import create_memory_packets\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolDelta\nfrom onyx.server.query_and_chat.streaming_models import MemoryToolStart\nfrom onyx.server.query_and_chat.streaming_models import SectionEnd\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryTool\nfrom onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs\nfrom onyx.tools.tool_implementations.memory.models import MemoryToolResponse\n\n\n@pytest.fixture\ndef emitter_queue() -> queue.Queue:\n    return queue.Queue()\n\n\n@pytest.fixture\ndef emitter(emitter_queue: queue.Queue) -> Emitter:\n    return Emitter(merged_queue=emitter_queue)\n\n\n@pytest.fixture\ndef mock_llm() -> MagicMock:\n    return MagicMock()\n\n\n@pytest.fixture\ndef memory_tool(emitter: Emitter, mock_llm: MagicMock) -> MemoryTool:\n    return MemoryTool(tool_id=1, emitter=emitter, llm=mock_llm)\n\n\n@pytest.fixture\ndef placement() -> Placement:\n    return Placement(turn_index=0, tab_index=0)\n\n\n@pytest.fixture\ndef override_kwargs() -> MemoryToolOverrideKwargs:\n    return MemoryToolOverrideKwargs(\n        user_name=\"Test User\",\n        user_email=\"test@example.com\",\n        user_role=None,\n        existing_memories=[\"User likes dark mode\"],\n        chat_history=[],\n    )\n\n\nclass TestMemoryToolEmitStart:\n    def test_emit_start_emits_memory_tool_start_packet(\n        self,\n        memory_tool: MemoryTool,\n        emitter_queue: queue.Queue,\n        placement: Placement,\n    ) -> None:\n        memory_tool.emit_start(placement)\n\n        _key, packet = emitter_queue.get_nowait()\n        assert isinstance(packet.obj, MemoryToolStart)\n        assert packet.placement is not None\n        assert packet.placement.turn_index == placement.turn_index\n        assert packet.placement.tab_index == placement.tab_index\n        assert packet.placement.model_index == 0  # emitter stamps model_index=0\n\n    def test_emit_start_with_different_placement(\n        self,\n        memory_tool: MemoryTool,\n        emitter_queue: queue.Queue,\n    ) -> None:\n        placement = Placement(turn_index=2, tab_index=1)\n        memory_tool.emit_start(placement)\n\n        _key, packet = emitter_queue.get_nowait()\n        assert packet.placement.turn_index == 2\n        assert packet.placement.tab_index == 1\n\n\nclass TestMemoryToolRun:\n    @patch(\"onyx.tools.tool_implementations.memory.memory_tool.process_memory_update\")\n    def test_run_emits_delta_for_add_operation(\n        self,\n        mock_process: MagicMock,\n        memory_tool: MemoryTool,\n        emitter_queue: queue.Queue,\n        placement: Placement,\n        override_kwargs: MemoryToolOverrideKwargs,\n    ) -> None:\n        mock_process.return_value = (\"User prefers Python\", None)\n\n        memory_tool.run(\n            placement=placement,\n            override_kwargs=override_kwargs,\n            memory=\"User prefers Python\",\n        )\n\n        _key, packet = emitter_queue.get_nowait()\n        assert isinstance(packet.obj, MemoryToolDelta)\n        assert packet.obj.memory_text == \"User prefers Python\"\n        assert packet.obj.operation == \"add\"\n        assert packet.obj.memory_id is None\n        assert packet.obj.index is None\n\n    @patch(\"onyx.tools.tool_implementations.memory.memory_tool.process_memory_update\")\n    def test_run_emits_delta_for_update_operation(\n        self,\n        mock_process: MagicMock,\n        memory_tool: MemoryTool,\n        emitter_queue: queue.Queue,\n        placement: Placement,\n        override_kwargs: MemoryToolOverrideKwargs,\n    ) -> None:\n        mock_process.return_value = (\"User prefers light mode\", 0)\n\n        memory_tool.run(\n            placement=placement,\n            override_kwargs=override_kwargs,\n            memory=\"User prefers light mode\",\n        )\n\n        _key, packet = emitter_queue.get_nowait()\n        assert isinstance(packet.obj, MemoryToolDelta)\n        assert packet.obj.memory_text == \"User prefers light mode\"\n        assert packet.obj.operation == \"update\"\n        assert packet.obj.memory_id is None\n        assert packet.obj.index == 0\n\n    @patch(\"onyx.tools.tool_implementations.memory.memory_tool.process_memory_update\")\n    def test_run_returns_tool_response_with_rich_response(\n        self,\n        mock_process: MagicMock,\n        memory_tool: MemoryTool,\n        placement: Placement,\n        override_kwargs: MemoryToolOverrideKwargs,\n    ) -> None:\n        mock_process.return_value = (\"User prefers Python\", None)\n\n        result = memory_tool.run(\n            placement=placement,\n            override_kwargs=override_kwargs,\n            memory=\"User prefers Python\",\n        )\n\n        assert isinstance(result.rich_response, MemoryToolResponse)\n        assert result.rich_response.memory_text == \"User prefers Python\"\n        assert result.rich_response.index_to_replace is None\n        assert \"User prefers Python\" in result.llm_facing_response\n\n\nclass TestCreateMemoryPackets:\n    def test_produces_start_delta_end_for_add(self) -> None:\n        packets = create_memory_packets(\n            memory_text=\"User likes Python\",\n            operation=\"add\",\n            memory_id=None,\n            turn_index=1,\n            tab_index=0,\n        )\n\n        assert len(packets) == 3\n        assert isinstance(packets[0].obj, MemoryToolStart)\n        assert isinstance(packets[1].obj, MemoryToolDelta)\n        assert isinstance(packets[2].obj, SectionEnd)\n\n        delta = packets[1].obj\n        assert isinstance(delta, MemoryToolDelta)\n        assert delta.memory_text == \"User likes Python\"\n        assert delta.operation == \"add\"\n        assert delta.memory_id is None\n        assert delta.index is None\n\n    def test_produces_start_delta_end_for_update(self) -> None:\n        packets = create_memory_packets(\n            memory_text=\"User prefers light mode\",\n            operation=\"update\",\n            memory_id=42,\n            turn_index=3,\n            tab_index=1,\n            index=5,\n        )\n\n        assert len(packets) == 3\n        assert isinstance(packets[0].obj, MemoryToolStart)\n        assert isinstance(packets[1].obj, MemoryToolDelta)\n        assert isinstance(packets[2].obj, SectionEnd)\n\n        delta = packets[1].obj\n        assert isinstance(delta, MemoryToolDelta)\n        assert delta.memory_text == \"User prefers light mode\"\n        assert delta.operation == \"update\"\n        assert delta.memory_id == 42\n        assert delta.index == 5\n\n    def test_placement_is_set_correctly(self) -> None:\n        packets = create_memory_packets(\n            memory_text=\"test\",\n            operation=\"add\",\n            memory_id=None,\n            turn_index=5,\n            tab_index=2,\n        )\n\n        for packet in packets:\n            assert packet.placement.turn_index == 5\n            assert packet.placement.tab_index == 2\n"
  },
  {
    "path": "contributor_ip_assignment/EE_Contributor_IP_Assignment_Agreement.md",
    "content": "# Enterprise Edition Contribution IP Assignment Agreement (DanswerAI, Inc.)\n\n**Effective Date:** ______________________\n\nThis Enterprise Edition Contribution IP Assignment Agreement (the “**Agreement**”) is entered into by and between:\n\n- **DanswerAI, Inc.** (“**Company**”), the maintainer of the Onyx product, and  \n- **Contributor:** ______________________ (“**Contributor**”)\n\nCompany and Contributor may be referred to individually as a “**Party**” and collectively as the “**Parties**.”\n\n## 1. Purpose and scope\n\nOnyx’s repository is primarily licensed under the MIT License, but includes **proprietary-licensed Enterprise Edition components** (as defined below). This Agreement applies **only** to Contributions made to the Enterprise Edition components and is intended to ensure Company owns all rights necessary to license, distribute, and commercialize Enterprise Edition features.\n\n## 2. Definitions\n\n2.1 **“Enterprise Edition” or “EE”** means (a) any source code, documentation, configuration, assets, tests, build scripts, or other materials located in or under **any directory named `ee`** anywhere in the repository (including nested paths), and (b) any other files or directories that are explicitly marked as proprietary or Enterprise Edition in repository documentation, file headers, or license notices, and (c) any derivative works, modifications, or additions to the foregoing.\n\n2.2 **“Contribution(s)”** means any work of authorship (including code, documentation, or other materials) that Contributor submits to Company for inclusion in EE, including via pull request, patch, commit, issue attachment, email, or any other submission method accepted by Company, and any modifications to existing EE materials.\n\n2.3 **“Intellectual Property Rights”** means all rights worldwide in and to copyrights, moral rights, neighboring rights, trade secrets, mask work rights, design rights, database rights, patent rights, and any other proprietary rights, whether registered or unregistered.\n\n## 3. Assignment of rights\n\n3.1 **Assignment.** To the maximum extent permitted by law, Contributor hereby **assigns and transfers to Company**, and agrees to assign and transfer to Company, **all right, title, and interest** in and to all Contributions and all associated Intellectual Property Rights, including all rights to reproduce, prepare derivative works, distribute, publicly perform, publicly display, and otherwise exploit the Contributions in any manner.\n\n3.2 **Future rights and further assurances.** Contributor agrees to execute and deliver (including electronically) any documents and take any actions reasonably requested by Company to perfect, record, or enforce Company’s rights in the Contributions. If Contributor fails to do so after reasonable request, Contributor appoints Company as Contributor’s attorney-in-fact solely to execute such documents on Contributor’s behalf.\n\n3.3 **Work made for hire (where applicable).** To the extent any Contribution qualifies as a “work made for hire” under applicable law, it shall be deemed a work made for hire for Company. If not, it is assigned under Section 3.1.\n\n## 4. Moral rights waiver\n\nTo the extent permitted by law, Contributor **waives and agrees not to assert** any moral rights (including rights of attribution and integrity) or similar rights in the Contributions against Company or Company’s licensees, successors, or assigns.\n\n## 5. Patent rights (assignment / license)\n\n5.1 **Patent assignment.** To the maximum extent permitted by law, Contributor hereby assigns to Company all right, title, and interest in any patent rights that are **necessarily infringed** by making, using, selling, offering for sale, importing, or otherwise exploiting the Contributions or EE as incorporated with the Contributions.\n\n5.2 **Fallback patent license.** If any patent rights cannot be assigned as a matter of law, Contributor grants Company a **perpetual, irrevocable, worldwide, transferable, sublicensable, royalty-free** license under such patent rights to make, have made, use, sell, offer for sale, import, and otherwise exploit the Contributions and EE.\n\n## 6. Contributor representations\n\nContributor represents and warrants that:\n\n6.1 **Authority.** Contributor has the legal right and authority to enter into this Agreement and to make the assignments and grants herein.\n\n6.2 **Originality / rights clearance.** Each Contribution is original to Contributor or Contributor has secured all necessary rights and permissions to submit it and to assign the rights described in this Agreement.\n\n6.3 **No third-party restrictions.** Contributions are not subject to any employment, contractor, academic, or other agreement that would conflict with this Agreement or restrict assignment to Company. Contributor has not included any code or materials that require disclosure of source code or impose “copyleft” or similar reciprocal obligations on EE (including but not limited to GPL, AGPL, LGPL (in a way that would impose reciprocity on EE), or other licenses that would require EE to be distributed under different terms), unless Company has expressly agreed in writing.\n\n6.4 **No confidential information.** Contributor will not submit any confidential or proprietary information of any third party (including an employer) as part of a Contribution.\n\n## 7. Relationship to MIT-licensed portions of the repo\n\nThis Agreement applies **only** to Contributions to EE as defined in Section 2.1. Contributions made solely to MIT-licensed portions of the repository remain governed by the repository’s applicable open-source licensing and contribution terms, unless a separate written agreement states otherwise.\n\n## 8. No obligation; consideration\n\n8.1 **No obligation to accept.** Company has no obligation to accept, merge, or distribute any Contribution.\n\n8.2 **Consideration.** Contributor agrees that the opportunity to contribute to EE and Company’s potential acceptance and use of the Contributions are adequate consideration for the assignments and grants in this Agreement.\n\n## 9. Limitation of liability\n\nTo the maximum extent permitted by law, **neither Party** will be liable to the other for any indirect, incidental, special, consequential, or punitive damages arising out of this Agreement.\n\n## 10. Governing law; venue\n\nThis Agreement is governed by the laws of the **State of California**, excluding conflict-of-laws rules. The Parties agree to exclusive jurisdiction and venue in the state or federal courts located in **California**, unless prohibited by applicable law.\n\n## 11. Miscellaneous\n\n11.1 **Entire agreement.** This Agreement is the entire agreement between the Parties regarding EE Contributions and supersedes all prior or contemporaneous understandings on that subject.\n\n11.2 **Amendment.** Any amendment must be in writing and signed by both Parties.\n\n11.3 **Severability.** If any provision is held unenforceable, the remaining provisions remain in full force and effect.\n\n11.4 **Counterparts; electronic signatures.** This Agreement may be executed in counterparts, including via electronic signature, each of which is deemed an original.\n\n---\n\n## Signatures\n\n**COMPANY:** DanswerAI, Inc.  \nBy: ____________________________________  \nName: __________________________________  \nTitle: ___________________________________  \nDate: ___________________________________\n\n**CONTRIBUTOR:**  \nSignature: _______________________________  \nName: ___________________________________  \nEmail: ___________________________________  \nDate: ___________________________________"
  },
  {
    "path": "ct.yaml",
    "content": "# See https://github.com/helm/chart-testing#configuration\n\n# still have to specify this on the command line for list-changed\nchart-dirs:\n  - deployment/helm/charts\n\n# must be kept in sync with Chart.yaml\nchart-repos:\n  - vespa=https://onyx-dot-app.github.io/vespa-helm-charts\n  - opensearch=https://opensearch-project.github.io/helm-charts\n  - ingress-nginx=https://kubernetes.github.io/ingress-nginx\n  - postgresql=https://cloudnative-pg.github.io/charts\n  - redis=https://ot-container-kit.github.io/helm-charts\n  - minio=https://charts.min.io/\n  - code-interpreter=https://onyx-dot-app.github.io/python-sandbox/\n  \n# have seen postgres take 10 min to pull ... so 15 min seems like a good timeout?\nhelm-extra-args: --debug --timeout 900s\n\n# nginx appears to not work on kind, likely due to lack of loadbalancer support\n# helm-extra-set-args also only works on the command line, not in this yaml\n# helm-extra-set-args: --set=nginx.enabled=false\n\nvalidate-maintainers: false\n"
  },
  {
    "path": "cubic.yaml",
    "content": "# yaml-language-server: $schema=https://cubic.dev/schema/cubic-repository-config.schema.json\nversion: 1\n\nreviews:\n  enabled: true\n  sensitivity: medium\n  incremental_commits: true\n  check_drafts: false\n\n  custom_instructions: |\n    Use explicit type annotations for variables to enhance code clarity,\n    especially when moving type hints around in the code.\n\n    Use `contributing_guides/best_practices.md` as core review context.\n    Prefer consistency with existing patterns, fix issues in code you touch,\n    avoid tacking new features onto muddy interfaces, fail loudly instead of\n    silently swallowing errors, keep code strictly typed, preserve clear state\n    boundaries, remove duplicate or dead logic, break up overly long functions,\n    avoid hidden import-time side effects, respect module boundaries, and favor\n    correctness-by-construction over relying on callers to use an API correctly.\n\n    Reference these files for additional context:\n    - `contributing_guides/best_practices.md` — Best practices for contributing to the codebase\n    - `CLAUDE.md` — Project instructions and coding standards\n    - `backend/alembic/README.md` — Migration guidance, including multi-tenant migration behavior\n    - `deployment/helm/charts/onyx/values-lite.yaml` — Lite deployment Helm values and service assumptions\n    - `deployment/docker_compose/docker-compose.onyx-lite.yml` — Lite deployment Docker Compose overlay and disabled service behavior\n\n  ignore:\n    files:\n      - greptile.json\n      - cubic.yaml\n\n  custom_rules:\n    - name: TODO format\n      description: >\n        Whenever a TODO is added, there must always be an associated name or\n        ticket in the style of TODO(name): ... or TODO(1234): ...\n\n    - name: Frontend standards\n      description: >\n        For frontend changes, enforce all standards described in the\n        web/AGENTS.md file.\n      include:\n        - web/**\n        - desktop/**\n\n    - name: No debugging code\n      description: >\n        Remove temporary debugging code before merging to production,\n        especially tenant-specific debugging logs.\n\n    - name: No hardcoded booleans\n      description: >\n        When hardcoding a boolean variable to a constant value, remove the\n        variable entirely and clean up all places where it's used rather than\n        just setting it to a constant.\n\n    - name: Multi-tenant awareness\n      description: >\n        Code changes must consider both multi-tenant and single-tenant\n        deployments. In multi-tenant mode, preserve tenant isolation, ensure\n        tenant context is propagated correctly, and avoid assumptions that only\n        hold for a single shared schema or globally shared state. In\n        single-tenant mode, avoid introducing unnecessary tenant-specific\n        requirements or cloud-only control-plane dependencies.\n\n    - name: Onyx lite compatibility\n      description: >\n        Code changes must consider both regular Onyx deployments and Onyx lite\n        deployments. Lite deployments disable the vector DB, Redis, model\n        servers, and background workers by default, use PostgreSQL-backed\n        cache/auth/file storage, and rely on the API server to handle\n        background work. Do not assume those services are available unless the\n        code path is explicitly limited to full deployments.\n\n    - name: OnyxError over HTTPException\n      description: >\n        Never raise HTTPException directly in business code. Use\n        `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from\n        `onyx.error_handling.exceptions`. A global FastAPI exception handler\n        converts OnyxError into structured JSON responses with\n        {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in\n        `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors\n        with dynamic HTTP status codes, use `status_code_override`:\n        `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`.\n      include:\n        - backend/**/*.py\n\nissues:\n  fix_with_cubic_buttons: true\n  pr_comment_fixes: true\n  fix_commits_to_pr: true\n"
  },
  {
    "path": "deployment/.gitignore",
    "content": ".env*\nsecrets.yaml\n"
  },
  {
    "path": "deployment/README.md",
    "content": "Documentation for how to deploy Onyx can be found in our official docs:\nhttps://docs.onyx.app/deployment/overview\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/README.md",
    "content": "# Onyx AWS ECS Fargate CloudFormation Deployment\n\nThis directory contains CloudFormation templates and scripts to deploy Onyx on AWS ECS Fargate.\n\n## Configuration\n\nAll configuration parameters are stored in a single JSON file: `onyx_config.json`. This file contains all the parameters needed for the different CloudFormation stacks.\n\nExample:\n```json\n{\n  \"OnyxNamespace\": \"onyx\",\n  \"Environment\": \"production\",\n  \"EFSName\": \"onyx-efs\",\n  \"AWSRegion\": \"us-east-2\",\n  \"VpcID\": \"YOUR_VPC_ID\",\n  \"SubnetIDs\": \"YOUR_SUBNET_ID1,YOUR_SUBNET_ID2\",\n  \"DomainName\": \"YOUR_DOMAIN e.g ecs.onyx.app\",\n  \"ValidationMethod\": \"DNS\",\n  \"HostedZoneId\": \"\"\n}\n```\n\n### Required Parameters\n\n- `Environment`: Used to prefix all stack names during deployment. This is required.\n- `OnyxNamespace`: Namespace for the Onyx deployment.\n- `EFSName`: Name for the Elastic File System.\n- `AWSRegion`: AWS region where resources will be deployed.\n- `VpcID`: ID of the VPC where Onyx will be deployed.\n- `SubnetIDs`: Comma-separated list of subnet IDs for deployment.\n- `DomainName`: Domain name for the Onyx deployment.\n- `ValidationMethod`: Method for domain validation (typically \"DNS\").\n- [optional] `HostedZoneId`: Route 53 hosted zone ID (only if using Route 53 for DNS).\n\nThe deployment script automatically extracts the needed parameters for each CloudFormation template based on the parameter names defined in the templates.\n\n## Deployment Order\n\nThe deployment follows this order:\n\n1. Infrastructure stacks:\n   - EFS\n   - Cluster\n   - ACM\n\n2. Service stacks:\n   - Postgres\n   - Redis\n   - Vespa Engine\n   - Model Server (Indexing)\n   - Model Server (Inference)\n   - Backend API Server\n   - Backend Background Server\n   - Web Server\n   - Nginx\n\n## Usage\n\nTo deploy:\n```bash\n./deploy.sh\n```\n\nTo uninstall:\n```bash\n./uninstall.sh\n```\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/deploy.sh",
    "content": "#!/bin/bash\n\n# Function to remove comments from JSON and output valid JSON\nremove_comments() {\n    sed 's/\\/\\/.*$//' \"$1\" | grep -v '^[[:space:]]*$'\n}\n\n# Variables\nTEMPLATE_DIR=\"$(pwd)\"\nSERVICE_DIR=\"$TEMPLATE_DIR/services\"\n\n# Unified config file\nCONFIG_FILE=\"onyx_config.jsonl\"\n\n# Try to get AWS_REGION from config, fallback to default if not found\nAWS_REGION_FROM_CONFIG=$(remove_comments \"$CONFIG_FILE\" | jq -r '.AWSRegion // empty')\nif [ -n \"$AWS_REGION_FROM_CONFIG\" ]; then\n    AWS_REGION=\"$AWS_REGION_FROM_CONFIG\"\nelse\n    AWS_REGION=\"${AWS_REGION:-us-east-2}\"\nfi\n\n# Get environment from config file\nENVIRONMENT=$(remove_comments \"$CONFIG_FILE\" | jq -r '.Environment')\nif [ -z \"$ENVIRONMENT\" ] || [ \"$ENVIRONMENT\" == \"null\" ]; then\n    echo \"Missing Environment in $CONFIG_FILE. Please add the Environment field.\"\n    exit 1\nfi\n\n# Try to get S3_BUCKET from config, fallback to default if not found\nS3_BUCKET_FROM_CONFIG=$(remove_comments \"$CONFIG_FILE\" | jq -r '.S3Bucket // empty')\nif [ -n \"$S3_BUCKET_FROM_CONFIG\" ]; then\n    S3_BUCKET=\"$S3_BUCKET_FROM_CONFIG\"\nelse\n    S3_BUCKET=\"${S3_BUCKET:-onyx-ecs-fargate-configs}\"\nfi\n\nINFRA_ORDER=(\n  \"onyx_efs_template.yaml\"\n  \"onyx_cluster_template.yaml\"\n  \"onyx_acm_template.yaml\"\n)\n\n# Deployment order for services\nSERVICE_ORDER=(\n  \"onyx_postgres_service_template.yaml\"\n  \"onyx_redis_service_template.yaml\"\n  \"onyx_vespaengine_service_template.yaml\"\n  \"onyx_model_server_indexing_service_template.yaml\"\n  \"onyx_model_server_inference_service_template.yaml\"\n  \"onyx_backend_api_server_service_template.yaml\"\n  \"onyx_backend_background_server_service_template.yaml\"\n  \"onyx_web_server_service_template.yaml\"\n  \"onyx_nginx_service_template.yaml\"\n)\n\n# Function to validate a CloudFormation template\nvalidate_template() {\n  local template_file=$1\n  echo \"Validating template: $template_file...\"\n  aws cloudformation validate-template --template-body file://\"$template_file\" --region \"$AWS_REGION\" > /dev/null\n  if [ $? -ne 0 ]; then\n    echo \"Error: Validation failed for $template_file. Exiting.\"\n    exit 1\n  fi\n  echo \"Validation succeeded for $template_file.\"\n}\n\n# Function to create CloudFormation parameters from JSON\ncreate_parameters_from_json() {\n  local template_file=$1\n  local temp_params_file=\"${template_file%.yaml}_parameters.json\"\n  \n  # Convert the config file contents to CloudFormation parameter format\n  echo \"[\" > \"$temp_params_file\"\n  \n  # Process all key-value pairs from the config file\n  local first=true\n  remove_comments \"$CONFIG_FILE\" | jq -r 'to_entries[] | select(.value != null and .value != \"\") | \"\\(.key)|\\(.value)\"' | while IFS='|' read -r key value; do\n    if [ \"$first\" = true ]; then\n      first=false\n    else\n      echo \",\" >> \"$temp_params_file\"\n    fi\n    echo \"    {\\\"ParameterKey\\\": \\\"$key\\\", \\\"ParameterValue\\\": \\\"$value\\\"}\" >> \"$temp_params_file\"\n  done\n  \n  echo \"]\" >> \"$temp_params_file\"\n  \n  # Debug output - display the created parameters file\n  echo \"Generated parameters file: $temp_params_file\" >&2\n  echo \"Contents:\" >&2\n  cat \"$temp_params_file\" >&2\n  \n  # Return just the filename\n  echo \"$temp_params_file\"\n}\n\n# Function to deploy a CloudFormation stack\ndeploy_stack() {\n  local stack_name=$1\n  local template_file=$2\n\n  echo \"Checking if stack $stack_name exists...\"\n  if aws cloudformation describe-stacks --stack-name \"$stack_name\" --region \"$AWS_REGION\" > /dev/null 2>&1; then\n    echo \"Stack $stack_name already exists. Skipping deployment.\"\n    return 0\n  fi\n  \n  # Create temporary parameters file for this template\n  local temp_params_file=$(create_parameters_from_json \"$template_file\")\n  \n  # Special handling for SubnetIDs parameter if needed\n  if grep -q \"SubnetIDs\" \"$template_file\"; then\n    echo \"Template uses SubnetIDs parameter, ensuring it's properly formatted...\"\n    # Make sure we're passing SubnetIDs as a comma-separated list\n    local subnet_ids=$(remove_comments \"$CONFIG_FILE\" | jq -r '.SubnetIDs // empty')\n    if [ -n \"$subnet_ids\" ]; then\n      echo \"Using SubnetIDs from config: $subnet_ids\"\n    else\n      echo \"Warning: SubnetIDs not found in config but template requires it.\"\n    fi\n  fi\n  \n  echo \"Deploying stack: $stack_name with template: $template_file and generated config from: $CONFIG_FILE...\"\n  aws cloudformation deploy \\\n    --stack-name \"$stack_name\" \\\n    --template-file \"$template_file\" \\\n    --parameter-overrides file://\"$temp_params_file\" \\\n    --capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND \\\n    --region \"$AWS_REGION\" \\\n    --no-cli-auto-prompt > /dev/null\n\n  if [ $? -ne 0 ]; then\n    echo \"Error: Deployment failed for $stack_name. Exiting.\"\n    exit 1\n  fi\n  \n  # Clean up temporary parameter file\n  rm \"$temp_params_file\"\n  \n  echo \"Stack deployed successfully: $stack_name.\"\n}\n\nconvert_underscores_to_hyphens() {\n  local input_string=\"$1\"\n  local converted_string=\"${input_string//_/-}\"\n  echo \"$converted_string\"\n}\n\ndeploy_infra_stacks() {\n    for template_name in \"${INFRA_ORDER[@]}\"; do\n      # Skip ACM template if HostedZoneId is not set\n      if [[ \"$template_name\" == \"onyx_acm_template.yaml\" ]]; then\n        HOSTED_ZONE_ID=$(remove_comments \"$CONFIG_FILE\" | jq -r '.HostedZoneId')\n        if [ -z \"$HOSTED_ZONE_ID\" ] || [ \"$HOSTED_ZONE_ID\" == \"\" ] || [ \"$HOSTED_ZONE_ID\" == \"null\" ]; then\n          echo \"Skipping ACM template deployment because HostedZoneId is not set in $CONFIG_FILE\"\n          continue\n        fi\n      fi\n\n      template_file=\"$template_name\"\n      stack_name=\"$ENVIRONMENT-$(basename \"$template_name\" _template.yaml)\"\n      stack_name=$(convert_underscores_to_hyphens \"$stack_name\")\n\n      if [ -f \"$template_file\" ]; then\n        validate_template \"$template_file\"\n        deploy_stack \"$stack_name\" \"$template_file\"\n      else\n        echo \"Warning: Template file $template_file not found. Skipping.\"\n      fi\n    done\n}\n\ndeploy_services_stacks() { \n    for template_name in \"${SERVICE_ORDER[@]}\"; do\n      template_file=\"$SERVICE_DIR/$template_name\"\n      stack_name=\"$ENVIRONMENT-$(basename \"$template_name\" _template.yaml)\"\n      stack_name=$(convert_underscores_to_hyphens \"$stack_name\")\n\n      if [ -f \"$template_file\" ]; then\n        validate_template \"$template_file\"\n        deploy_stack \"$stack_name\" \"$template_file\"\n      else\n        echo \"Warning: Template file $template_file not found. Skipping.\"\n      fi\n    done\n}\n\necho \"Starting deployment of Onyx to ECS Fargate Cluster...\"\ndeploy_infra_stacks\ndeploy_services_stacks\n\necho \"All templates validated and deployed successfully.\"\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/onyx_acm_template.yaml",
    "content": "AWSTemplateFormatVersion: '2010-09-09'\nDescription: CloudFormation template to create an ACM Certificate.\n\nParameters:\n  DomainName:\n    Type: String\n    Description: The primary domain name for the certificate (e.g., example.com).\n    Default: example.com\n  Environment:\n    Type: String\n    Default: production\n  ValidationMethod:\n    Type: String\n    Default: DNS\n\nResources:\n  Certificate:\n    Type: AWS::CertificateManager::Certificate\n    Properties:\n      DomainName: !Ref DomainName\n      ValidationMethod: !Ref ValidationMethod\n      Tags:\n        - Key: env\n          Value: !Ref Environment\n\nOutputs:\n  OutputAcm:\n    Description: ACM Cert Id\n    Value: !Ref Certificate\n    Export:\n      Name: !Sub ${AWS::StackName}-OnyxCertificate\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/onyx_cluster_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: The template used to create an ECS Cluster from the ECS Console.\n\nParameters:\n  Environment:\n    Type: String\n    Description: The environment that is used in the name of the cluster as well.\n  OnyxNamespace:\n    Type: String\n    Default: onyx\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n\nResources:\n  ECSCluster:\n    Type: AWS::ECS::Cluster\n    Properties:\n      ClusterName: !Sub ${Environment}-onyx-cluster\n      CapacityProviders:\n        - FARGATE\n        - FARGATE_SPOT\n      ClusterSettings:\n        - Name: containerInsights\n          Value: enhanced\n      ServiceConnectDefaults:\n        Namespace: !Sub ${Environment}-onyx-cluster\n      Tags:\n        - Key: env\n          Value: !Ref Environment\n        - Key: app\n          Value: onyx\n\n  S3Bucket:\n    Type: AWS::S3::Bucket\n    Properties:\n      BucketName: !Sub ${Environment}-onyx-ecs-fargate-configs\n      AccessControl: Private\n      BucketEncryption:\n        ServerSideEncryptionConfiguration:\n          - ServerSideEncryptionByDefault:\n              SSEAlgorithm: AES256\n      PublicAccessBlockConfiguration:\n        BlockPublicAcls: true\n        BlockPublicPolicy: true\n        IgnorePublicAcls: true\n        RestrictPublicBuckets: true\n\n  PrivateDnsNamespace:\n    Type: AWS::ServiceDiscovery::PrivateDnsNamespace\n    Properties:\n      Description: AWS Cloud Map private DNS namespace for resources for onyx website.\n      Vpc: !Ref VpcID\n      Name: !Ref OnyxNamespace\n      Properties:\n        DnsProperties:\n          SOA:\n            TTL: 50\n\n  ECSTaskRole:\n    Type: AWS::IAM::Role\n    Properties:\n      RoleName: !Sub ${Environment}-OnyxEcsTaskRole\n      AssumeRolePolicyDocument:\n        Version: \"2012-10-17\"\n        Statement:\n          - Effect: Allow\n            Principal:\n              Service: ecs-tasks.amazonaws.com\n            Action: sts:AssumeRole\n      Policies:\n        - PolicyName: \"EFSPolicy\"\n          PolicyDocument:\n            Version: \"2012-10-17\"\n            Statement:\n              - Sid: \"VisualEditor0\"\n                Effect: Allow\n                Action:\n                  - \"elasticfilesystem:*\"\n                Resource:\n                  - !Sub \"arn:aws:elasticfilesystem:*:${AWS::AccountId}:access-point/*\"\n                  - !Sub \"arn:aws:elasticfilesystem:*:${AWS::AccountId}:file-system/*\"\n              - Sid: \"VisualEditor1\"\n                Effect: Allow\n                Action: \"elasticfilesystem:*\"\n                Resource: \"*\"\n        - PolicyName: \"S3Policy\"\n          PolicyDocument:\n            Version: \"2012-10-17\"\n            Statement:\n              - Sid: \"VisualEditor0\"\n                Effect: Allow\n                Action:\n                  - \"s3:GetObject\"\n                  - \"s3:ListBucket\"\n                Resource:\n                  - !Sub \"arn:aws:s3:::${Environment}-onyx-ecs-fargate-configs/*\"\n                  - !Sub \"arn:aws:s3:::${Environment}-onyx-ecs-fargate-configs\"\n\n  ECSTaskExecutionRole:\n    Type: AWS::IAM::Role\n    Properties:\n      RoleName: !Sub ${Environment}-OnyxECSTaskExecutionRole\n      AssumeRolePolicyDocument:\n        Version: \"2012-10-17\"\n        Statement:\n          - Effect: Allow\n            Principal:\n              Service: ecs-tasks.amazonaws.com\n            Action: sts:AssumeRole\n      ManagedPolicyArns:\n        - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy\n      Policies:\n        - PolicyName: \"CloudWatchLogsPolicy\"\n          PolicyDocument:\n            Version: \"2012-10-17\"\n            Statement:\n              - Sid: \"VisualEditor0\"\n                Effect: Allow\n                Action: \"logs:CreateLogGroup\"\n                Resource: !Sub \"arn:aws:logs:*:${AWS::AccountId}:log-group:*\"\n        - PolicyName: \"SecretsManagerPolicy\"\n          PolicyDocument:\n            Version: \"2012-10-17\"\n            Statement:\n              - Effect: Allow\n                Action:\n                  - secretsmanager:GetSecretValue\n                Resource:\n                  - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password-*\n                  - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret-*\n\nOutputs:\n  OutputEcsCluster:\n    Description: Onyx ECS Cluster\n    Value: !Ref ECSCluster\n    Export:\n      Name: !Sub ${AWS::StackName}-ECSClusterName\n  OutputECSTaskRole:\n    Description: Onyx ECS Task Role\n    Value: !Ref ECSTaskRole\n    Export:\n      Name: !Sub ${AWS::StackName}-ECSTaskRole\n  OutputECSTaskExecutionRole:\n    Description: Onyx ECS TaskExecutionRole\n    Value: !Ref ECSTaskExecutionRole\n    Export:\n      Name: !Sub ${AWS::StackName}-ECSTaskExecutionRole\n  OutputOnyxNamespace:\n    Description: Onyx CloudMap namespace ID for ECS service discvoery.\n    Value: !Ref PrivateDnsNamespace\n    Export:\n      Name: !Sub ${AWS::StackName}-OnyxNamespace\n  OutputOnyxNamespaceName:\n    Description: Onyx CloudMap namespace domain name for ECS service discvoery.\n    Value: !Ref OnyxNamespace\n    Export:\n      Name: !Sub ${AWS::StackName}-OnyxNamespaceName\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/onyx_config.jsonl",
    "content": "{\n  // Naming, likely doesn't need to be changed\n  \"OnyxNamespace\": \"onyx\",\n  \"Environment\": \"production\",\n  \"EFSName\": \"onyx-efs\",\n\n  // Region and VPC Stuff\n  \"AWSRegion\": \"us-east-2\",\n  \"VpcID\": \"YOUR_VPC_ID\",\n  \"SubnetIDs\": \"YOUR_SUBNET_ID1,YOUR_SUBNET_ID2\",\n\n  // Domain and ACM Stuff\n  \"DomainName\": \"YOUR_DOMAIN e.g ecs.onyx.app\",\n  \"ValidationMethod\": \"DNS\",\n  \"HostedZoneId\": \"\"  // Only specify if using Route 53 for DNS\n} "
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/onyx_efs_template.yaml",
    "content": "Parameters:\n\n  EFSName:\n    Type: String\n    Default: onyx-efs\n  Environment:\n    Type: String\n    Default: production\n  VpcID:\n    Type: String\n    Default: vpc-0f230ca52bb04c722 \n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n\nResources:\n\n  OnyxEfs:\n    Type: AWS::EFS::FileSystem\n    Properties:\n      BackupPolicy: \n        Status: ENABLED\n      Encrypted: True\n      PerformanceMode: generalPurpose\n      FileSystemTags:\n        - Key: Name\n          Value: !Sub ${Environment}-${EFSName}-${AWS::Region}-${AWS::AccountId}\n      FileSystemProtection:\n        ReplicationOverwriteProtection: ENABLED\n      ThroughputMode: elastic\n\n  VespaEngineTmpEfsAccessPoint:\n    Type: AWS::EFS::AccessPoint\n    Properties:\n      AccessPointTags: \n        - Key: Name\n          Value: vespaengine-tmp\n      FileSystemId: !Ref OnyxEfs\n      RootDirectory: \n        CreationInfo:\n          OwnerGid: \"1000\"\n          OwnerUid: \"1000\"\n          Permissions: \"0755\"\n        Path: /var/tmp\n\n  VespaEngineDataEfsAccessPoint:\n    Type: AWS::EFS::AccessPoint\n    Properties:\n      AccessPointTags: \n        - Key: Name\n          Value: vespaengine-data\n      FileSystemId: !Ref OnyxEfs\n      RootDirectory: \n        CreationInfo:\n          OwnerGid: \"1000\"\n          OwnerUid: \"1000\"\n          Permissions: \"0755\"\n        Path: /opt/vespa/var\n\n  PostgresDataEfsAccessPoint:\n    Type: AWS::EFS::AccessPoint\n    Properties:\n      AccessPointTags: \n        - Key: Name\n          Value: postgres-data\n      FileSystemId: !Ref OnyxEfs\n      RootDirectory: \n        CreationInfo:\n          OwnerGid: \"1000\"\n          OwnerUid: \"1000\"\n          Permissions: \"0755\"\n        Path: /var/lib/postgresql/data\n\n  EFSMountTarget1:\n    DependsOn: OnyxEfs\n    Type: AWS::EFS::MountTarget\n    Properties:\n      FileSystemId: !Ref OnyxEfs\n      SubnetId: !Select [0, !Ref SubnetIDs]\n      SecurityGroups:\n        - !Ref EFSSecurityGroupMountTargets\n\n  EFSMountTarget2:\n    DependsOn: OnyxEfs\n    Type: AWS::EFS::MountTarget\n    Properties:\n      FileSystemId: !Ref OnyxEfs\n      SubnetId: !Select [1, !Ref SubnetIDs]\n      SecurityGroups:\n        - !Ref EFSSecurityGroupMountTargets\n\n  EFSSecurityGroupMountTargets:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: Security Group for EFS Mount Targets\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - IpProtocol: tcp\n          FromPort: 2049\n          ToPort: 2049\n          CidrIp: 0.0.0.0/0\n\nOutputs:\n  OutputOnyxEfsId:\n    Description: Onyx Filesystem Id\n    Value: !Ref OnyxEfs\n    Export:\n      Name: !Sub ${AWS::StackName}-OnyxEfsId\n  OutputVespaEngineTmpEfsAccessPoint:\n    Description: VespaEngine Tmp AP\n    Value: !Ref VespaEngineTmpEfsAccessPoint\n    Export:\n      Name: !Sub ${AWS::StackName}-VespaEngineTmpEfsAccessPoint\n  OutputVespaEngineDataEfsAccessPoint:\n    Description: VespaEngine Data Ap\n    Value: !Ref VespaEngineDataEfsAccessPoint\n    Export:\n      Name: !Sub ${AWS::StackName}-VespaEngineDataEfsAccessPoint\n  OutputPostgresDataEfsAccessPoint:\n    Description: Postgres Data AP\n    Value: !Ref PostgresDataEfsAccessPoint\n    Export:\n      Name: !Sub ${AWS::StackName}-PostgresDataEfsAccessPoint\n  OutputEFSSecurityGroupMountTargets:\n    Description: EFS Security Group\n    Value: !Ref EFSSecurityGroupMountTargets\n    Export:\n      Name: !Sub ${AWS::StackName}-EFSSecurityGroupMountTargets\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_backend_api_server_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Backend Api Server TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-backend-api-server\n  TaskCpu:\n    Type: String\n    Default: \"2048\"\n  TaskMemory:\n    Type: String\n    Default: \"4096\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 8080\n          ToPort: 8080\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 8080\n          ToPort: 8080\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: onyx-backend\n          Image: onyxdotapp/onyx-backend:latest\n          Cpu: 0\n          Essential: true\n          Command:\n            - \"/bin/sh\"\n            - \"-c\"\n            - |\n              alembic upgrade head && echo \"Starting Onyx Api Server\" && uvicorn onyx.main:app --host 0.0.0.0 --port 8080\n          PortMappings:\n            - Name: backend\n              ContainerPort: 8080\n              HostPort: 8080\n              Protocol: tcp\n              AppProtocol: http\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          Environment:\n            - Name: REDIS_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-redis-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: MODEL_SERVER_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-model-server-inference-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: VESPA_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-vespaengine-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: POSTGRES_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-postgres-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: INDEXING_MODEL_SERVER_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-model-server-indexing-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: AUTH_TYPE\n              Value: basic\n          Secrets:\n            - Name: POSTGRES_PASSWORD\n              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password\n            - Name: USER_AUTH_SECRET\n              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret\n          VolumesFrom: []\n          SystemControls: []\n\n  ECSAutoScalingTarget:\n    Type: AWS::ApplicationAutoScaling::ScalableTarget\n    DependsOn: ECSService\n    Properties:\n      MaxCapacity: 5\n      MinCapacity: 1\n      ResourceId: !Sub\n        - \"service/${ImportedCluster}/${Environment}-${ServiceName}-service\"\n        - ImportedCluster: !ImportValue\n            'Fn::Sub': \"${Environment}-onyx-cluster-ECSClusterName\"\n          ServiceName: !Ref ServiceName\n          Environment: !Ref Environment\n      ScalableDimension: ecs:service:DesiredCount\n      ServiceNamespace: ecs\n\n  ECSAutoScalingPolicy:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 75\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageCPUUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n\n  ECSAutoScalingPolicyMemory:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-mem-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 80\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageMemoryUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_backend_background_server_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Backend Background Server TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-backend-background-server\n  TaskCpu:\n    Type: String\n    Default: \"2048\"\n  TaskMemory:\n    Type: String\n    Default: \"4096\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 8080\n          ToPort: 8080\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 8080\n          ToPort: 8080\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: onyx-backend-background\n          Image: onyxdotapp/onyx-backend:latest\n          Cpu: 0\n          Essential: true\n          Command:\n            - \"/usr/bin/supervisord\"\n            - \"-c\"\n            - \"/etc/supervisor/conf.d/supervisord.conf\"\n          PortMappings:\n            - Name: backend\n              ContainerPort: 8080\n              HostPort: 8080\n              Protocol: tcp\n              AppProtocol: http\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          Environment:\n            - Name: REDIS_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-redis-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: MODEL_SERVER_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-model-server-inference-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: VESPA_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-vespaengine-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: POSTGRES_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-postgres-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: INDEXING_MODEL_SERVER_HOST\n              Value: !Sub \n                - \"${Environment}-onyx-model-server-indexing-service.${ImportedNamespace}\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n            - Name: AUTH_TYPE\n              Value: basic\n          Secrets:\n            - Name: POSTGRES_PASSWORD\n              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password\n            - Name: USER_AUTH_SECRET\n              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret\n          VolumesFrom: []\n          SystemControls: []\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_model_server_indexing_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Model Server Indexing TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-model-server-indexing\n  TaskCpu:\n    Type: String\n    Default: \"2048\"\n  TaskMemory:\n    Type: String\n    Default: \"4096\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 9000\n          ToPort: 9000\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 9000\n          ToPort: 9000\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: onyx-model-server-indexing\n          Image: onyxdotapp/onyx-model-server:latest\n          Cpu: 0\n          Essential: true\n          Command:\n            - \"/bin/sh\"\n            - \"-c\"\n            - >\n              if [ \"${DISABLE_MODEL_SERVER}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER}\" = \"true\" ]; then echo 'Skipping service...';\n              exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi\n          PortMappings:\n            - Name: model-server\n              ContainerPort: 9000\n              HostPort: 9000\n              Protocol: tcp\n              AppProtocol: http\n          Environment:\n            - Name: LOG_LEVEL\n              Value: info\n            - Name: INDEXING_ONLY\n              Value: True\n            - Name: VESPA_SEARCHER_THREADS\n              Value: \"1\"\n          MountPoints:\n            - SourceVolume: efs-volume\n              ContainerPath: /app/.cache/huggingface/\n              ReadOnly: false\n          VolumesFrom: []\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: \"ecs\"\n          SystemControls: []\n      Volumes:\n        - Name: efs-volume\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\" \n            RootDirectory: \"/\"\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_model_server_inference_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Model Server Inference TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-model-server-inference\n  TaskCpu:\n    Type: String\n    Default: \"2048\"\n  TaskMemory:\n    Type: String\n    Default: \"4096\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 9000\n          ToPort: 9000\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 9000\n          ToPort: 9000\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: onyx-model-server-inference\n          Image: onyxdotapp/onyx-model-server:latest\n          Cpu: 0\n          Essential: true\n          Command:\n            - \"/bin/sh\"\n            - \"-c\"\n            - >\n              if [ \"${DISABLE_MODEL_SERVER}\" = \"True\" ] || [ \"${DISABLE_MODEL_SERVER}\" = \"true\" ]; then echo 'Skipping service...';\n              exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi\n          PortMappings:\n            - Name: model-server\n              ContainerPort: 9000\n              HostPort: 9000\n              Protocol: tcp\n              AppProtocol: http\n          Environment:\n            - Name: LOG_LEVEL\n              Value: info\n          MountPoints:\n            - SourceVolume: efs-volume\n              ContainerPath: /app/.cache/huggingface/\n              ReadOnly: false\n          VolumesFrom: []\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: \"ecs\"\n          SystemControls: []\n      Volumes:\n        - Name: efs-volume\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\" \n            RootDirectory: \"/\"\n\n  ECSAutoScalingTarget:\n    Type: AWS::ApplicationAutoScaling::ScalableTarget\n    DependsOn: ECSService\n    Properties:\n      MaxCapacity: 5\n      MinCapacity: 1\n      ResourceId: !Sub\n        - \"service/${ImportedCluster}/${Environment}-${ServiceName}-service\"\n        - ImportedCluster: !ImportValue\n            'Fn::Sub': \"${Environment}-onyx-cluster-ECSClusterName\"\n          ServiceName: !Ref ServiceName\n          Environment: !Ref Environment\n      ScalableDimension: ecs:service:DesiredCount\n      ServiceNamespace: ecs\n\n  ECSAutoScalingPolicy:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 75\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageCPUUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n\n  ECSAutoScalingPolicyMemory:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-memory-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 80\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageMemoryUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_nginx_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: \"The template used to create an ECS Service from the ECS Console.\"\n\nParameters:\n  SubnetIDs:\n      Type: CommaDelimitedList\n      Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n      Type: String\n      Default: vpc-098cfa79d637dabff\n  HostedZoneId:\n      Type: String\n      Default: ''\n  DomainName:\n      Type: String\n      Default: demo.danswer.ai\n  Environment:\n    Type: String\n  ServiceName:\n    Type: String\n    Default: onyx-nginx\n  OnyxNamespace:\n    Type: String\n    Default: onyx\n  OnyxBackendApiServiceName:\n    Type: String\n    Default: onyx-backend-api-server-service\n  OnyxWebServerServiceName:\n    Type: String\n    Default: onyx-web-server-service\n  TaskCpu:\n    Type: String\n    Default: \"512\"\n  TaskMemory:\n    Type: String\n    Default: \"1024\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n  GitHubConfigUrl:\n    Type: String\n    Default: \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx/app.conf.template\"\n    Description: \"URL to the nginx configuration file on GitHub\"\n  GitHubRunScriptUrl:\n    Type: String\n    Default: \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx/run-nginx.sh\"\n    Description: \"URL to the nginx run script on GitHub\"\n\nConditions:\n  CreateRoute53: !Not \n    - !Equals \n      - !Ref HostedZoneId\n      - ''\n\nResources:\n  ECSService:\n    Type: \"AWS::ECS::Service\"\n    DependsOn: LoadBalancer\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: \"FARGATE\"\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}\n      SchedulingStrategy: \"REPLICA\"\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: \"ENABLED\"\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: \"ENABLED\"\n          SecurityGroups: \n            - !Ref SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: \"LATEST\"\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: \"ECS\"\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt\n            - \"ServiceDiscoveryService\"\n            - \"Arn\"\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n      LoadBalancers:\n        - ContainerName: nginx\n          ContainerPort: 80\n          TargetGroupArn: !Ref TargetGroup\n\n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      ContainerDefinitions:\n        - Name: nginx\n          Image: nginx:1.25.5-alpine\n          Cpu: 0\n          PortMappings:\n            - Name: nginx-80-tcp\n              ContainerPort: 80\n              HostPort: 80\n              Protocol: tcp\n          Essential: true\n          Command:\n            - /bin/sh\n            - -c\n            - dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template\n          Environment:\n            - Name: EMAIL\n              Value: \"\"\n            - Name: DOMAIN\n              Value: !Ref DomainName\n            - Name: ONYX_BACKEND_API_HOST\n              Value: !Sub ${Environment}-${OnyxBackendApiServiceName}.${OnyxNamespace}\n            - Name: ONYX_WEB_SERVER_HOST\n              Value: !Sub ${Environment}-${OnyxWebServerServiceName}.${OnyxNamespace}\n          MountPoints:\n            - SourceVolume: efs-volume\n              ContainerPath: /etc/nginx/conf.d\n          VolumesFrom: []\n          DependsOn:\n            - ContainerName: github-sync-container\n              Condition: SUCCESS\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-OnyxNginxTaskDefinition\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: 25m\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          SystemControls: []\n        - Name: github-sync-container\n          Image: curlimages/curl:latest\n          Cpu: 128\n          MemoryReservation: 256\n          PortMappings: []\n          Essential: false\n          Command:\n            - sh\n            - -c\n            - !Sub |\n              curl -L ${GitHubConfigUrl} -o /etc/nginx/conf.d/app.conf.template && \n              curl -L ${GitHubRunScriptUrl} -o /etc/nginx/conf.d/run-nginx.sh && \n              chmod 644 /etc/nginx/conf.d/app.conf.template && \n              chmod 755 /etc/nginx/conf.d/run-nginx.sh && \n              exit 0 || exit 1\n          MountPoints:\n            - SourceVolume: efs-volume\n              ContainerPath: /etc/nginx/conf.d\n          VolumesFrom: []\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-github-sync-configs-TaskDefinition\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: 25m\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          SystemControls: []\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\" \n      NetworkMode: awsvpc\n      Volumes:\n        - Name: efs-volume\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\"\n            RootDirectory: /\n      PlacementConstraints: []\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      EnableFaultInjection: false \n\n  SecurityGroup:\n    Type: \"AWS::EC2::SecurityGroup\"\n    Properties:\n      GroupDescription: !Sub \"Security group for ${ServiceName}\"\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 80\n          ToPort: 80\n          IpProtocol: \"tcp\"\n          CidrIp: \"0.0.0.0/0\"\n        - FromPort: 80\n          ToPort: 80\n          IpProtocol: \"tcp\"\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Ref ServiceName\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n\n  LoadBalancer:\n    Type: AWS::ElasticLoadBalancingV2::LoadBalancer\n    DependsOn: SecurityGroup\n    Properties:\n      Type: application\n      Scheme: internet-facing\n      Subnets: !Ref SubnetIDs\n      SecurityGroups: \n        - !Ref SecurityGroup\n\n  LoadBalancerListener:\n    Type: AWS::ElasticLoadBalancingV2::Listener\n    Properties:\n      LoadBalancerArn: !Ref LoadBalancer\n      Port: 80\n      Protocol: HTTP\n      DefaultActions:\n        - Type: forward\n          TargetGroupArn: !Ref TargetGroup\n\n  TargetGroup:\n    Type: AWS::ElasticLoadBalancingV2::TargetGroup\n    Properties:\n      HealthCheckEnabled: True\n      HealthCheckIntervalSeconds: 30\n      HealthCheckPort: 80\n      HealthCheckPath: /api/health\n      HealthCheckProtocol: HTTP\n      HealthCheckTimeoutSeconds: 20\n      HealthyThresholdCount: 3\n      Port: 80\n      Protocol: HTTP\n      ProtocolVersion: HTTP1\n      VpcId: !Ref VpcID\n      TargetType: ip\n\n  Route53Record:\n    Type: AWS::Route53::RecordSet\n    Condition: CreateRoute53\n    Properties:\n      HostedZoneId: !Ref HostedZoneId\n      Name: !Ref DomainName\n      Type: A\n      AliasTarget:\n        DNSName: !GetAtt LoadBalancer.DNSName\n        HostedZoneId: !GetAtt LoadBalancer.CanonicalHostedZoneID\n        EvaluateTargetHealth: false\n\nOutputs:\n  ECSService:\n    Description: \"The created service.\"\n    Value: !Ref \"ECSService\"\n  ServiceDiscoveryService:\n    Value: !Ref \"ServiceDiscoveryService\"\n  OutputOnyxLoadBalancerDNSName:\n    Description: LoadBalancer DNSName\n    Value: !GetAtt LoadBalancer.DNSName\n    Export:\n      Name: !Sub ${AWS::StackName}-OnyxLoadBalancerDNSName\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_postgres_service_template.yaml",
    "content": "AWSTemplateFormatVersion: '2010-09-09'\nParameters:\n  Environment:\n    Type: String\n    Default: production\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-postgres\n  TaskCpu:\n    Type: String\n    Default: \"1024\"\n  TaskMemory:\n    Type: String\n    Default: \"2048\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: DISABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - !Ref SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 100\n        MinimumHealthyPercent: 0\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 5432\n          ToPort: 5432\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 5432\n          ToPort: 5432\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n        - FromPort: 2049\n          ToPort: 2049\n          IpProtocol: tcp\n          SourceSecurityGroupId:\n            Fn::ImportValue:\n              Fn::Sub: \"${Environment}-onyx-efs-EFSSecurityGroupMountTargets\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n\n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      Volumes:\n        - Name: efs-volume-data\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\"\n            RootDirectory: \"/\"\n            TransitEncryption: ENABLED\n            AuthorizationConfig:\n              AccessPointId:\n                Fn::ImportValue:\n                  Fn::Sub: \"${Environment}-onyx-efs-PostgresDataEfsAccessPoint\"\n      ContainerDefinitions:\n        - Name: !Ref ServiceName\n          Image: postgres:15.2-alpine\n          Cpu: 0\n          Essential: true\n          StopTimeout: 30\n          Command:\n            - \"-c\"\n            - \"max_connections=250\"\n          PortMappings:\n            - Name: postgres\n              ContainerPort: 5432\n              HostPort: 5432\n              Protocol: tcp\n              AppProtocol: http\n          Environment:\n            - Name: POSTGRES_USER\n              Value: postgres\n            - Name: PGSSLMODE\n              Value: require\n            - Name: POSTGRES_DB\n              Value: postgres\n          Secrets:\n            - Name: POSTGRES_PASSWORD\n              ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password\n          MountPoints:\n            - SourceVolume: efs-volume-data\n              ContainerPath: /var/lib/postgresql/data\n              ReadOnly: false\n            - SourceVolume: efs-volume-data\n              ContainerPath: /var/lib/postgresql\n              ReadOnly: false\n          User: \"1000\"\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: /ecs/OnyxPostgresTaskDefinition\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_redis_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Redis TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-redis\n  TaskCpu:\n    Type: String\n    Default: \"1024\"\n  TaskMemory:\n    Type: String\n    Default: \"2048\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 6379\n          ToPort: 6379\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 6379\n          ToPort: 6379\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: redis \n          Image: redis:7.4-alpine \n          Cpu: 0\n          Essential: true\n          Command:\n            - \"redis-server\"\n            - \"--save\"\n            - \"\\\"\\\"\"\n            - \"--appendonly\"\n            - \"no\"\n          PortMappings:\n            - Name: redis_port\n              ContainerPort: 6379\n              HostPort: 6379\n              Protocol: tcp\n              AppProtocol: http\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          Environment: []\n          VolumesFrom: []\n          SystemControls: []\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_vespaengine_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Vespa Engine TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-vespaengine\n  TaskCpu:\n    Type: String\n    Default: \"4096\"\n  TaskMemory:\n    Type: String\n    Default: \"16384\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 19071\n          ToPort: 19071\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 19071\n          ToPort: 19071\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n        - FromPort: 8081\n          ToPort: 8081\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 8081\n          ToPort: 8081\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n        - FromPort: 2049\n          ToPort: 2049\n          IpProtocol: tcp\n          SourceSecurityGroupId:\n            Fn::ImportValue:\n              Fn::Sub: \"${Environment}-onyx-efs-EFSSecurityGroupMountTargets\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: vespaengine\n          Image: vespaengine/vespa:8.609.39\n          Cpu: 0\n          Essential: true\n          PortMappings:\n            - Name: vespaengine_port\n              ContainerPort: 19071\n              HostPort: 19071\n              Protocol: tcp\n              AppProtocol: http\n            - Name: vespaengine_port2\n              ContainerPort: 8081\n              HostPort: 8081\n              Protocol: tcp\n              AppProtocol: http\n          MountPoints:\n            - SourceVolume: efs-volume-data\n              ContainerPath: /opt/vespa/var\n              ReadOnly: false\n            - SourceVolume: efs-volume-tmp\n              ContainerPath: /var/tmp\n              ReadOnly: false\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: /ecs/OnyxVespaEngineTaskDefinition\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          User: \"1000\"\n          Environment:\n            - Name: VESPA_SKIP_UPGRADE_CHECK\n              Value: \"true\"\n          VolumesFrom: []\n          SystemControls: []\n      Volumes:\n        - Name: efs-volume-tmp\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\"\n            RootDirectory: \"/\"\n            TransitEncryption: ENABLED\n            AuthorizationConfig:\n              AccessPointId:\n                Fn::ImportValue:\n                  Fn::Sub: \"${Environment}-onyx-efs-VespaEngineTmpEfsAccessPoint\"\n        - Name: efs-volume-data\n          EFSVolumeConfiguration:\n            FilesystemId:\n              Fn::ImportValue:\n                Fn::Sub: \"${Environment}-onyx-efs-OnyxEfsId\"\n            RootDirectory: \"/\"\n            TransitEncryption: ENABLED\n            AuthorizationConfig:\n              AccessPointId:\n                Fn::ImportValue:\n                  Fn::Sub: \"${Environment}-onyx-efs-VespaEngineDataEfsAccessPoint\"\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/services/onyx_web_server_service_template.yaml",
    "content": "AWSTemplateFormatVersion: \"2010-09-09\"\nDescription: CloudFormation template for Onyx Web Server TaskDefinition\nParameters:\n  Environment:\n    Type: String\n  SubnetIDs:\n    Type: CommaDelimitedList\n    Description: \"Comma-delimited list of at least two subnet IDs in different Availability Zones\"\n  VpcID:\n    Type: String\n    Default: vpc-098cfa79d637dabff\n  ServiceName:\n    Type: String\n    Default: onyx-web-server\n  TaskCpu:\n    Type: String\n    Default: \"1024\"\n  TaskMemory:\n    Type: String\n    Default: \"2048\"\n  TaskDesiredCount:\n    Type: Number\n    Default: 1\n\nResources:\n\n  ECSService:\n    Type: AWS::ECS::Service\n    Properties:\n      Cluster:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSClusterName\"\n      CapacityProviderStrategy:\n        - CapacityProvider: FARGATE\n          Base: 0\n          Weight: 1\n      TaskDefinition: !Ref TaskDefinition\n      ServiceName: !Sub ${Environment}-${ServiceName}-service\n      SchedulingStrategy: REPLICA\n      DesiredCount: !Ref TaskDesiredCount\n      AvailabilityZoneRebalancing: ENABLED\n      NetworkConfiguration:\n        AwsvpcConfiguration:\n          AssignPublicIp: ENABLED\n          SecurityGroups:\n            - Ref: SecurityGroup\n          Subnets: !Ref SubnetIDs\n      PlatformVersion: LATEST\n      DeploymentConfiguration:\n        MaximumPercent: 200\n        MinimumHealthyPercent: 100\n        DeploymentCircuitBreaker:\n          Enable: true\n          Rollback: true\n      DeploymentController:\n        Type: ECS\n      ServiceConnectConfiguration:\n        Enabled: false\n      ServiceRegistries:\n        - RegistryArn: !GetAtt ServiceDiscoveryService.Arn\n      Tags:\n        - Key: app\n          Value: onyx\n        - Key: service\n          Value: !Ref ServiceName\n        - Key: env\n          Value: !Ref Environment\n      EnableECSManagedTags: true\n\n  SecurityGroup:\n    Type: AWS::EC2::SecurityGroup\n    Properties:\n      GroupDescription: !Sub Onyx SecurityGroup access to EFS mount and ${ServiceName}.\n      GroupName: !Sub ${Environment}-ecs-${ServiceName}\n      VpcId: !Ref VpcID\n      SecurityGroupIngress:\n        - FromPort: 3000\n          ToPort: 3000\n          IpProtocol: tcp\n          CidrIp: 0.0.0.0/0\n        - FromPort: 3000\n          ToPort: 3000\n          IpProtocol: tcp\n          CidrIpv6: \"::/0\"\n\n  ServiceDiscoveryService:\n    Type: \"AWS::ServiceDiscovery::Service\"\n    Properties:\n      Name: !Sub ${Environment}-${ServiceName}-service\n      DnsConfig:\n        DnsRecords:\n          - Type: \"A\"\n            TTL: 15\n      NamespaceId:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespace\"\n      HealthCheckCustomConfig:\n        FailureThreshold: 1\n  \n  TaskDefinition:\n    Type: AWS::ECS::TaskDefinition\n    Properties:\n      Family: !Sub ${Environment}-${ServiceName}-TaskDefinition\n      TaskRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskRole\"\n      ExecutionRoleArn:\n        Fn::ImportValue:\n          Fn::Sub: \"${Environment}-onyx-cluster-ECSTaskExecutionRole\"\n      NetworkMode: awsvpc\n      RequiresCompatibilities:\n        - FARGATE\n      Cpu: !Ref TaskCpu\n      Memory: !Ref TaskMemory\n      RuntimePlatform:\n        CpuArchitecture: ARM64\n        OperatingSystemFamily: LINUX\n      ContainerDefinitions:\n        - Name: onyx-webserver\n          Image: onyxdotapp/onyx-web-server:latest\n          Cpu: 0\n          Essential: true\n          PortMappings:\n            - Name: webserver\n              ContainerPort: 3000\n              HostPort: 3000\n              Protocol: tcp\n          Environment:\n            - Name: INTERNAL_URL\n              Value: !Sub\n                - \"http://${Environment}-onyx-backend-api-server-service.${ImportedNamespace}:8080\"\n                - ImportedNamespace: !ImportValue\n                    Fn::Sub: \"${Environment}-onyx-cluster-OnyxNamespaceName\"\n          LogConfiguration:\n            LogDriver: awslogs\n            Options:\n              awslogs-group: !Sub /ecs/${Environment}-${ServiceName}\n              mode: non-blocking\n              awslogs-create-group: \"true\"\n              max-buffer-size: \"25m\"\n              awslogs-region: !Ref AWS::Region\n              awslogs-stream-prefix: ecs\n          User: \"1000\"\n          VolumesFrom: []\n          SystemControls: []\n\n  ECSAutoScalingTarget:\n    Type: AWS::ApplicationAutoScaling::ScalableTarget\n    DependsOn: ECSService\n    Properties:\n      MaxCapacity: 5\n      MinCapacity: 1\n      ResourceId: !Sub\n        - \"service/${ImportedCluster}/${Environment}-${ServiceName}-service\"\n        - ImportedCluster: !ImportValue\n            'Fn::Sub': \"${Environment}-onyx-cluster-ECSClusterName\"\n          ServiceName: !Ref ServiceName\n          Environment: !Ref Environment\n      ScalableDimension: ecs:service:DesiredCount\n      ServiceNamespace: ecs\n\n  ECSAutoScalingPolicy:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-cpu-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 75\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageCPUUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n\n  ECSAutoScalingPolicyMemory:\n    Type: AWS::ApplicationAutoScaling::ScalingPolicy\n    Properties:\n      PolicyName: !Sub ${Environment}-${ServiceName}-service-memory-scaleout\n      ScalingTargetId: !Ref ECSAutoScalingTarget\n      PolicyType: TargetTrackingScaling\n      TargetTrackingScalingPolicyConfiguration:\n        TargetValue: 80\n        PredefinedMetricSpecification:\n          PredefinedMetricType: ECSServiceAverageMemoryUtilization\n        ScaleOutCooldown: 60\n        ScaleInCooldown: 60\n"
  },
  {
    "path": "deployment/aws_ecs_fargate/cloudformation/uninstall.sh",
    "content": "#!/bin/bash\n\nAWS_REGION=\"${AWS_REGION:-us-west-1}\"\n\n# Reference to consolidated config\nCONFIG_FILE=\"onyx_config.json\"\n\n# Get environment from config file\nENVIRONMENT=$(jq -r '.Environment' \"$CONFIG_FILE\")\nif [ -z \"$ENVIRONMENT\" ] || [ \"$ENVIRONMENT\" == \"null\" ]; then\n    echo \"Missing Environment in $CONFIG_FILE. Please add the Environment field.\"\n    exit 1\nfi\n\n# Try to get S3_BUCKET from config, fallback to default if not found\nS3_BUCKET_FROM_CONFIG=$(jq -r '.S3Bucket // empty' \"$CONFIG_FILE\")\nif [ -n \"$S3_BUCKET_FROM_CONFIG\" ]; then\n    S3_BUCKET=\"$S3_BUCKET_FROM_CONFIG\"\nelse\n    S3_BUCKET=\"${S3_BUCKET:-onyx-ecs-fargate-configs}\"\nfi\n\nSTACK_NAMES=(\n  \"${ENVIRONMENT}-onyx-nginx-service\"\n  \"${ENVIRONMENT}-onyx-web-server-service\"\n  \"${ENVIRONMENT}-onyx-backend-background-server-service\"\n  \"${ENVIRONMENT}-onyx-backend-api-server-service\"\n  \"${ENVIRONMENT}-onyx-model-server-inference-service\"\n  \"${ENVIRONMENT}-onyx-model-server-indexing-service\"\n  \"${ENVIRONMENT}-onyx-vespaengine-service\"\n  \"${ENVIRONMENT}-onyx-redis-service\"\n  \"${ENVIRONMENT}-onyx-postgres-service\"\n  \"${ENVIRONMENT}-onyx-cluster\"\n  \"${ENVIRONMENT}-onyx-acm\"\n  \"${ENVIRONMENT}-onyx-efs\"\n  )\n\ndelete_stack() {\n  local stack_name=$1\n\n  if [ \"$stack_name\" == \"${ENVIRONMENT}-onyx-cluster\" ]; then\n      echo \"Removing all objects and directories from the onyx config s3 bucket.\"\n      aws s3 rm \"s3://${ENVIRONMENT}-${S3_BUCKET}\" --recursive\n      sleep 5\n  fi\n\n  echo \"Checking if stack $stack_name exists...\"\n  if aws cloudformation describe-stacks --stack-name \"$stack_name\" --region \"$AWS_REGION\" > /dev/null 2>&1; then\n  \techo \"Deleting stack: $stack_name...\"\n  \taws cloudformation delete-stack \\\n\t\t--stack-name \"$stack_name\" \\\n\t\t--region \"$AWS_REGION\"\n\t\n\techo \"Waiting for stack $stack_name to be deleted...\"\n\taws cloudformation wait stack-delete-complete \\\n\t\t--stack-name \"$stack_name\" \\\n\t\t--region \"$AWS_REGION\"\n\n\tif [ $? -eq 0 ]; then\n\t\techo \"Stack $stack_name deleted successfully.\"\n\t\tsleep 10\n\telse\n\t\techo \"Failed to delete stack $stack_name. Exiting.\"\n\t\texit 1\n\tfi\n  else\n\techo \"Stack $stack_name does not exist, skipping.\"\n\treturn 0\n  fi\t\n}\n\nfor stack_name in \"${STACK_NAMES[@]}\"; do\n  delete_stack \"$stack_name\"\ndone\n\necho \"All stacks deleted successfully.\"\n"
  },
  {
    "path": "deployment/data/nginx/app.conf.template",
    "content": "# Log format to include request latency\nlog_format custom_main '$remote_addr - $remote_user [$time_local] \"$request\" '\n                '$status $body_bytes_sent \"$http_referer\" '\n                '\"$http_user_agent\" \"$http_x_forwarded_for\" '\n                'rt=$request_time';\n\nupstream api_server {\n    # fail_timeout=0 means we always retry an upstream even if it failed\n    # to return a good HTTP response\n\n    # for UNIX domain socket setups\n    #server unix:/tmp/gunicorn.sock fail_timeout=0;\n\n    # for a TCP configuration\n    # TODO: use gunicorn to manage multiple processes\n    server ${ONYX_BACKEND_API_HOST}:8080 fail_timeout=0;\n}\n\nupstream web_server {\n    server ${ONYX_WEB_SERVER_HOST}:3000 fail_timeout=0;\n}\n\n# Conditionally include MCP upstream configuration\ninclude /etc/nginx/conf.d/mcp_upstream.conf.inc;\n\n# WebSocket support: only set Connection \"upgrade\" for actual upgrade requests\nmap $http_upgrade $connection_upgrade {\n    default upgrade;\n    ''      close;\n}\n\nserver {\n    listen 80 default_server;\n\n    client_max_body_size 5G;    # Maximum upload size\n\n    access_log /var/log/nginx/access.log custom_main;\n\n    # Conditionally include MCP location configuration\n    include /etc/nginx/conf.d/mcp.conf.inc;\n\n    location ~ ^/scim(/.*)?$ {\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n        proxy_http_version 1.1;\n        proxy_buffering off;\n        proxy_redirect off;\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n        proxy_pass http://api_server;\n    }\n\n    # Match both /api/* and /openapi.json in a single rule\n    location ~ ^/(api|openapi.json)(/.*)?$ {\n        # Rewrite /api prefixed matched paths\n        rewrite ^/api(/.*)$ $1 break;\n\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        # need to use 1.1 to support chunked transfers and WebSocket\n        proxy_http_version 1.1;\n        proxy_set_header Upgrade $http_upgrade;\n        proxy_set_header Connection $connection_upgrade;\n        proxy_buffering off;\n\n        # timeout settings\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://api_server;\n    }\n\n    location / {\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        proxy_http_version 1.1;\n\n        # timeout settings\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://web_server;\n    }\n\n}\n"
  },
  {
    "path": "deployment/data/nginx/app.conf.template.no-letsencrypt",
    "content": "# Log format to include request latency\nlog_format custom_main '$remote_addr - $remote_user [$time_local] \"$request\" '\n                '$status $body_bytes_sent \"$http_referer\" '\n                '\"$http_user_agent\" \"$http_x_forwarded_for\" '\n                'rt=$request_time';\n\nupstream api_server {\n    # fail_timeout=0 means we always retry an upstream even if it failed\n    # to return a good HTTP response\n\n    # for UNIX domain socket setups\n    #server unix:/tmp/gunicorn.sock fail_timeout=0;\n\n    # for a TCP configuration\n    # TODO: use gunicorn to manage multiple processes\n    server api_server:8080 fail_timeout=0;\n}\n\nupstream web_server {\n    server web_server:3000 fail_timeout=0;\n}\n\n# Conditionally include MCP upstream configuration\ninclude /etc/nginx/conf.d/mcp_upstream.conf.inc;\n\n# WebSocket support: only set Connection \"upgrade\" for actual upgrade requests\nmap $http_upgrade $connection_upgrade {\n    default upgrade;\n    ''      close;\n}\n\nserver {\n    listen 80 default_server;\n\n    client_max_body_size 5G;    # Maximum upload size\n\n    access_log /var/log/nginx/access.log custom_main;\n\n    # Conditionally include MCP location configuration\n    include /etc/nginx/conf.d/mcp.conf.inc;\n\n    location ~ ^/scim(/.*)?$ {\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n        proxy_http_version 1.1;\n        proxy_buffering off;\n        proxy_redirect off;\n        proxy_pass http://api_server;\n    }\n\n    # Match both /api/* and /openapi.json in a single rule\n    location ~ ^/(api|openapi.json)(/.*)?$ {\n        # Rewrite /api prefixed matched paths\n        rewrite ^/api(/.*)$ $1 break;\n\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        # need to use 1.1 to support chunked transfers and WebSocket\n        proxy_http_version 1.1;\n        proxy_set_header Upgrade $http_upgrade;\n        proxy_set_header Connection $connection_upgrade;\n        proxy_buffering off;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://api_server;\n    }\n\n    location / {\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        proxy_http_version 1.1;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://web_server;\n    }\n}\n\nserver {\n    listen 443 ssl default_server;\n\n    client_max_body_size 5G;    # Maximum upload size\n    \n    location / {\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't use forwarded schema, host, or port here - this is the entry point\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host; \n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        proxy_http_version 1.1;\n        proxy_set_header Upgrade $http_upgrade;\n        proxy_set_header Connection $connection_upgrade;\n        proxy_buffering off;\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://localhost:80;\n    }\n\n    ssl_certificate /etc/nginx/sslcerts/${SSL_CERT_FILE_NAME};\n    ssl_certificate_key /etc/nginx/sslcerts/${SSL_CERT_KEY_FILE_NAME};\n}\n"
  },
  {
    "path": "deployment/data/nginx/app.conf.template.prod",
    "content": "# Log format to include request latency\nlog_format custom_main '$remote_addr - $remote_user [$time_local] \"$request\" '\n                '$status $body_bytes_sent \"$http_referer\" '\n                '\"$http_user_agent\" \"$http_x_forwarded_for\" '\n                'rt=$request_time';\n\nupstream api_server {\n    # fail_timeout=0 means we always retry an upstream even if it failed\n    # to return a good HTTP response\n\n    # for UNIX domain socket setups\n    #server unix:/tmp/gunicorn.sock fail_timeout=0;\n\n    # for a TCP configuration\n    # TODO: use gunicorn to manage multiple processes\n    server ${ONYX_BACKEND_API_HOST}:8080 fail_timeout=0;\n}\n\nupstream web_server {\n    server ${ONYX_WEB_SERVER_HOST}:3000 fail_timeout=0;\n}\n\n# Conditionally include MCP upstream configuration\ninclude /etc/nginx/conf.d/mcp_upstream.conf.inc;\n\n# WebSocket support: only set Connection \"upgrade\" for actual upgrade requests\nmap $http_upgrade $connection_upgrade {\n    default upgrade;\n    ''      close;\n}\n\nserver {\n    listen 80 default_server;\n\n    client_max_body_size 5G;    # Maximum upload size\n\n    access_log /var/log/nginx/access.log custom_main;\n\n    # Conditionally include MCP location configuration \n    include /etc/nginx/conf.d/mcp.conf.inc;\n\n    location ~ ^/scim(/.*)?$ {\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n        proxy_http_version 1.1;\n        proxy_buffering off;\n        proxy_redirect off;\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n        proxy_pass http://api_server;\n    }\n\n    # Match both /api/* and /openapi.json in a single rule\n    location ~ ^/(api|openapi.json)(/.*)?$ {\n        # Rewrite /api prefixed matched paths\n        rewrite ^/api(/.*)$ $1 break;\n\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        # need to use 1.1 to support chunked transfers and WebSocket\n        proxy_http_version 1.1;\n        proxy_set_header Upgrade $http_upgrade;\n        proxy_set_header Connection $connection_upgrade;\n        proxy_buffering off;\n\n        # timeout settings\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://api_server;\n    }\n\n    location / {\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't trust client-supplied X-Forwarded-* headers — use nginx's own values\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        proxy_http_version 1.1;\n\n        # timeout settings\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://web_server;\n    }\n\n    location /.well-known/acme-challenge/ {\n        root /var/www/certbot;\n    }\n}\n\nserver {\n    listen 443 ssl default_server;\n\n    client_max_body_size 5G;    # Maximum upload size\n\n    location / {\n        # misc headers\n        proxy_set_header X-Real-IP $remote_addr;\n        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        # don't use forwarded schema, host, or port here - this is the entry point\n        proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n        proxy_set_header X-Forwarded-Port $server_port;\n        proxy_set_header Host $host;\n\n        proxy_http_version 1.1;\n        proxy_set_header Upgrade $http_upgrade;\n        proxy_set_header Connection $connection_upgrade;\n        proxy_buffering off;\n\n        # timeout settings\n        proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;\n        proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;\n        proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;\n\n        # we don't want nginx trying to do something clever with\n        # redirects, we set the Host: header above already.\n        proxy_redirect off;\n        proxy_pass http://localhost:80;\n    }\n\n    ssl_certificate /etc/letsencrypt/live/${DOMAIN}/fullchain.pem;\n    ssl_certificate_key /etc/letsencrypt/live/${DOMAIN}/privkey.pem;\n    include /etc/letsencrypt/options-ssl-nginx.conf;\n    ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;\n}\n"
  },
  {
    "path": "deployment/data/nginx/mcp.conf.inc.template",
    "content": "# MCP Server - Model Context Protocol for LLM integrations\n# Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.\nlocation ~ ^/mcp(/.*)?$ {\n    # misc headers\n    proxy_set_header X-Real-IP $remote_addr;\n    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n    proxy_set_header X-Forwarded-Proto $scheme;\n    proxy_set_header X-Forwarded-Host $host;\n    proxy_set_header X-Forwarded-Port $server_port;\n    proxy_set_header Host $host;\n\n    # Standard HTTP 1.1\n    proxy_http_version 1.1;\n\n    # Timeouts for MCP requests\n    proxy_connect_timeout 30s;\n    proxy_send_timeout 300s;\n    proxy_read_timeout 300s;\n\n    proxy_redirect off;\n    rewrite ^/mcp(/.*)$ $1 break;\n    rewrite ^/mcp/?$ / break;\n    proxy_pass http://mcp_server;\n}\n"
  },
  {
    "path": "deployment/data/nginx/mcp_upstream.conf.inc.template",
    "content": "upstream mcp_server {\n    server ${ONYX_MCP_SERVER_HOST}:8090 fail_timeout=0;\n}\n"
  },
  {
    "path": "deployment/data/nginx/run-nginx.sh",
    "content": "# fill in the template\nexport ONYX_BACKEND_API_HOST=\"${ONYX_BACKEND_API_HOST:-api_server}\"\nexport ONYX_WEB_SERVER_HOST=\"${ONYX_WEB_SERVER_HOST:-web_server}\"\nexport ONYX_MCP_SERVER_HOST=\"${ONYX_MCP_SERVER_HOST:-mcp_server}\"\n\nexport SSL_CERT_FILE_NAME=\"${SSL_CERT_FILE_NAME:-ssl.crt}\"\nexport SSL_CERT_KEY_FILE_NAME=\"${SSL_CERT_KEY_FILE_NAME:-ssl.key}\"\n\n# Nginx timeout settings (in seconds)\nexport NGINX_PROXY_CONNECT_TIMEOUT=\"${NGINX_PROXY_CONNECT_TIMEOUT:-300}\"\nexport NGINX_PROXY_SEND_TIMEOUT=\"${NGINX_PROXY_SEND_TIMEOUT:-300}\"\nexport NGINX_PROXY_READ_TIMEOUT=\"${NGINX_PROXY_READ_TIMEOUT:-300}\"\n\necho \"Using API server host: $ONYX_BACKEND_API_HOST\"\necho \"Using web server host: $ONYX_WEB_SERVER_HOST\"\necho \"Using MCP server host: $ONYX_MCP_SERVER_HOST\"\necho \"Using nginx proxy timeouts - connect: ${NGINX_PROXY_CONNECT_TIMEOUT}s, send: ${NGINX_PROXY_SEND_TIMEOUT}s, read: ${NGINX_PROXY_READ_TIMEOUT}s\"\n\nenvsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME $ONYX_BACKEND_API_HOST $ONYX_WEB_SERVER_HOST $ONYX_MCP_SERVER_HOST $NGINX_PROXY_CONNECT_TIMEOUT $NGINX_PROXY_SEND_TIMEOUT $NGINX_PROXY_READ_TIMEOUT' < \"/etc/nginx/conf.d/$1\" > /etc/nginx/conf.d/app.conf\n\n# Conditionally create MCP server configuration\nif [ \"${MCP_SERVER_ENABLED}\" = \"True\" ] || [ \"${MCP_SERVER_ENABLED}\" = \"true\" ]; then\n  echo \"MCP server is enabled, creating MCP configuration...\"\n  envsubst '$ONYX_MCP_SERVER_HOST' < \"/etc/nginx/conf.d/mcp_upstream.conf.inc.template\" > /etc/nginx/conf.d/mcp_upstream.conf.inc\n  envsubst '$ONYX_MCP_SERVER_HOST' < \"/etc/nginx/conf.d/mcp.conf.inc.template\" > /etc/nginx/conf.d/mcp.conf.inc\nelse\n  echo \"MCP server is disabled, removing MCP configuration...\"\n  # Leave empty placeholder files so nginx includes do not fail\n  # These files are empty because MCP server is disabled\n  echo \"# Empty file - MCP server is disabled\" > /etc/nginx/conf.d/mcp_upstream.conf.inc\n  echo \"# Empty file - MCP server is disabled\" > /etc/nginx/conf.d/mcp.conf.inc\nfi\n\n# wait for the api_server to be ready\necho \"Waiting for API server to boot up; this may take a minute or two...\"\necho \"If this takes more than ~5 minutes, check the logs of the API server container for errors with the following command:\"\necho\necho \"docker logs onyx-api_server-1\"\necho\n\nwhile true; do\n  # Use curl to send a request and capture the HTTP status code\n  status_code=$(curl -o /dev/null -s -w \"%{http_code}\\n\" \"http://${ONYX_BACKEND_API_HOST}:8080/health\")\n  \n  # Check if the status code is 200\n  if [ \"$status_code\" -eq 200 ]; then\n    echo \"API server responded with 200, starting nginx...\"\n    break  # Exit the loop\n  else\n    echo \"API server responded with $status_code, retrying in 5 seconds...\"\n    sleep 5  # Sleep for 5 seconds before retrying\n  fi\ndone\n\n# Start nginx and reload every 6 hours\nwhile :; do sleep 6h & wait; nginx -s reload; done & nginx -g \"daemon off;\"\n"
  },
  {
    "path": "deployment/docker_compose/README.md",
    "content": "# Welcome to Onyx\n\nTo set up Onyx there are several options, Onyx supports the following for deployment:\n1. Quick guided install via the install.sh script\n2. Pulling the repo and running `docker compose up -d` from the deployment/docker_compose directory\n  - Note, it is recommended to copy over the env.template file to .env and edit the necessary values\n3. For large scale deployments leveraging Kubernetes, there are two options, Helm or Terraform.\n\nThis README focuses on the easiest guided deployment which is via install.sh.\n\n**For more detailed guides, please refer to the documentation: https://docs.onyx.app/deployment/overview**\n\n## install.sh script\n\n```\ncurl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh\n```\n\nThis provides a guided installation of Onyx via Docker Compose. It will deploy the latest version of Onyx\nand set up the volumes to ensure data is persisted across deployments or upgrades.\n\nThe script will create an onyx_data directory, all necessary files for the deployment will be stored in\nthere. Note that no application critical data is stored in that directory so even if you delete it, the\ndata needed to restore the app will not be destroyed.\n\nThe data about chats, users, etc. are instead stored as named Docker Volumes. This is managed by Docker\nand where it is stored will depend on your Docker setup. You can always delete these as well by running\nthe install.sh script with --delete-data.\n\nTo shut down the deployment without deleting, use install.sh --shutdown.\n\n### Upgrading the deployment\nOnyx maintains backwards compatibility across all minor versions following SemVer. If following the install.sh script (or through Docker Compose), you can\nupgrade it by first bringing down the containers. To do this, use `install.sh --shutdown`\n(or `docker compose down` from the directory with the docker-compose.yml file).\n\nAfter the containers are stopped, you can safely upgrade by either re-running the `install.sh` script (if you left the values as default which is latest,\nthen it will automatically update to latest each time the script is run). If you are more comfortable running docker compose commands, you can also run\ncommands directly from the directory with the docker-compose.yml file. First verify the version you want in the environment file (see below),\n(if using `latest` tag, be sure to run `docker compose pull`) and run `docker compose up` to restart the services on the latest version\n\n### Environment variables\nThe Docker Compose files try to look for a .env file in the same directory. The `install.sh` script sets it up from a file called env.template which is\ndownloaded during the initial setup. Feel free to edit the .env file to customize your deployment. The most important / common changed values are\nlocated near the top of the file.\n\nIMAGE_TAG is the version of Onyx to run. It is recommended to leave it as latest to get all updates with each redeployment.\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.dev.yml",
    "content": "# Docker Compose Override for Development/Testing\n# This file exposes service ports for development and testing purposes\n#\n# Usage:\n#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n#\n# Or set COMPOSE_FILE environment variable:\n#   export COMPOSE_FILE=docker-compose.yml:docker-compose.dev.yml\n#   docker compose up -d --wait\n\nservices:\n  api_server:\n    ports:\n      - \"8080:8080\"\n    deploy:\n      resources:\n        limits:\n          cpus: \"${API_SERVER_CPU_LIMIT:-0}\"\n          memory: \"${API_SERVER_MEM_LIMIT:-0}\"\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   ports:\n  #     - \"8090:8090\"\n\n  relational_db:\n    ports:\n      - \"5432:5432\"\n\n  index:\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    healthcheck:\n      # We use 19071 because the application server at 8010 is not ready until\n      # it is activated/configured by the application.\n      test: [\"CMD-SHELL\", \"curl -sf http://localhost:19071/state/v1/health\"]\n      interval: 30s\n      timeout: 10s\n      retries: 3\n      start_period: 30s\n\n  opensearch:\n    ports:\n      - \"9200:9200\"\n    # Rootless Docker can reject the base OpenSearch ulimit settings, so clear\n    # the inherited block entirely in the dev override.\n    ulimits: !reset null\n    environment:\n      - bootstrap.memory_lock=false\n\n  inference_model_server:\n    ports:\n      - \"9000:9000\"\n\n  cache:\n    ports:\n      - \"6379:6379\"\n\n  minio:\n    # use different ports to avoid conflicts with model servers\n    ports:\n      - \"9004:9000\"\n      - \"9005:9001\"\n\n  code-interpreter:\n    ports:\n      - \"8000:8000\"\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.mcp-api-key-test.yml",
    "content": "name: onyx\n\nservices:\n  mcp_api_key_server:\n    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:latest}\n    restart: on-failure\n    working_dir: /workspace\n    environment:\n      - MCP_API_KEY_TEST_PORT=${MCP_API_KEY_TEST_PORT:-8005}\n      - MCP_API_KEY=${MCP_API_KEY:-test-api-key-12345}\n      - MCP_SERVER_HOST=${MCP_API_KEY_SERVER_HOST:-0.0.0.0}\n      - MCP_SERVER_PUBLIC_HOST=${MCP_API_KEY_SERVER_PUBLIC_HOST:-host.docker.internal}\n    command: >\n      /bin/sh -c \"\n      python backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py ${MCP_API_KEY:-test-api-key-12345} ${MCP_API_KEY_TEST_PORT:-8005}\n      \"\n    ports:\n      - \"${MCP_API_KEY_TEST_PORT:-8005}:${MCP_API_KEY_TEST_PORT:-8005}\"\n    volumes:\n      - ../..:/workspace:ro\n\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.mcp-oauth-test.yml",
    "content": "name: onyx\n\nservices:\n  mcp_oauth_server:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    restart: on-failure\n    working_dir: /workspace\n    environment:\n      - MCP_OAUTH_CLIENT_ID=${MCP_OAUTH_CLIENT_ID:-}\n      - MCP_OAUTH_CLIENT_SECRET=${MCP_OAUTH_CLIENT_SECRET:-}\n      - MCP_OAUTH_ISSUER=${MCP_OAUTH_ISSUER:-}\n      - MCP_OAUTH_JWKS_URI=${MCP_OAUTH_JWKS_URI:-}\n      - MCP_OAUTH_USERNAME=${MCP_OAUTH_USERNAME:-}\n      - MCP_OAUTH_PASSWORD=${MCP_OAUTH_PASSWORD:-}\n      - MCP_OAUTH_REQUIRED_SCOPES=${MCP_OAUTH_REQUIRED_SCOPES:-mcp:use}\n      - MCP_TEST_SERVER_PORT=${MCP_TEST_SERVER_PORT:-8004}\n      - MCP_SERVER_PORT=${MCP_TEST_SERVER_PORT:-8004}\n      - MCP_SERVER_HOST=${MCP_SERVER_HOST:-0.0.0.0}\n      - MCP_SERVER_PUBLIC_HOST=${MCP_SERVER_PUBLIC_HOST:-host.docker.internal}\n      - MCP_SERVER_PUBLIC_URL=${MCP_SERVER_PUBLIC_URL:-}\n    command: >\n      /bin/sh -c \"\n      python backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py ${MCP_TEST_SERVER_PORT:-8004}\n      \"\n    ports:\n      - \"${MCP_TEST_SERVER_PORT:-8004}:${MCP_TEST_SERVER_PORT:-8004}\"\n    volumes:\n      - ../..:/workspace:ro\n\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.multitenant-dev.yml",
    "content": "name: onyx\n\nservices:\n  api_server:\n    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"\n      alembic -n schema_private upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - minio\n    restart: unless-stopped\n    ports:\n      - \"8080:8080\"\n    environment:\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n      - MULTI_TENANT=true\n      - LOG_LEVEL=DEBUG\n      - AUTH_TYPE=cloud\n      - REQUIRE_EMAIL_VERIFICATION=false\n      - DISABLE_TELEMETRY=true\n      - IMAGE_TAG=test\n      - DEV_MODE=true\n      # Auth Settings\n      - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-}\n      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}\n      - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}\n      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}\n      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}\n      - SMTP_SERVER=${SMTP_SERVER:-}\n      - SMTP_PORT=${SMTP_PORT:-587}\n      - SMTP_USER=${SMTP_USER:-}\n      - SMTP_PASS=${SMTP_PASS:-}\n      - ENABLE_EMAIL_INVITES=${ENABLE_EMAIL_INVITES:-}\n      - EMAIL_FROM=${EMAIL_FROM:-}\n      - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}\n      - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}\n      - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}\n      - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-}\n      - CORS_ALLOWED_ORIGIN=${CORS_ALLOWED_ORIGIN:-}\n      # Gen AI Settings\n      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}\n      - LLM_SOCKET_READ_TIMEOUT=${LLM_SOCKET_READ_TIMEOUT:-}\n      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}\n      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}\n      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}\n      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}\n      # Query Options\n      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}\n      - HYBRID_ALPHA=${HYBRID_ALPHA:-}\n      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}\n      # Other services\n      - POSTGRES_HOST=relational_db\n      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - WEB_DOMAIN=${WEB_DOMAIN:-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}\n      # Don't change the NLP model configs unless you know what you're doing\n      - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}\n      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}\n      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}\n      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}\n      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}\n      - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}\n      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}\n      - LOG_ONYX_MODEL_INTERACTIONS=${LOG_ONYX_MODEL_INTERACTIONS:-}\n      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}\n      - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}\n      - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}\n      - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}\n      - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}\n      - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}\n      # Egnyte OAuth Configs\n      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}\n      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}\n      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}\n      # Linear OAuth Configs\n      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}\n      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}\n      # Analytics Configs\n      - SENTRY_DSN=${SENTRY_DSN:-}\n      # Chat Configs\n      - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}\n\n      # Show extra/uncommon connectors\n      - SHOW_EXTRA_CONNECTORS=${SHOW_EXTRA_CONNECTORS:-true}\n\n      # Enables the use of bedrock models or IAM Auth\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}\n      - AWS_REGION_NAME=${AWS_REGION_NAME:-}\n      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}\n      # Seeding configuration\n      - USE_IAM_AUTH=${USE_IAM_AUTH:-}\n      - OPENAI_DEFAULT_API_KEY=${OPENAI_DEFAULT_API_KEY:-}\n\n      # Vespa Language Forcing\n      # See: https://docs.vespa.ai/en/linguistics.html\n      - VESPA_LANGUAGE_OVERRIDE=${VESPA_LANGUAGE_OVERRIDE:-}\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  background:\n    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"\n      if [ -f /etc/ssl/certs/custom-ca.crt ]; then\n        update-ca-certificates;\n      fi &&\n      /app/scripts/supervisord_entrypoint.sh\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - indexing_model_server\n    restart: unless-stopped\n    environment:\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true\n      - MULTI_TENANT=true\n      - LOG_LEVEL=DEBUG\n      - AUTH_TYPE=cloud\n      - REQUIRE_EMAIL_VERIFICATION=false\n      - DISABLE_TELEMETRY=true\n      - IMAGE_TAG=test\n      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}\n      - JWT_PUBLIC_KEY_URL=${JWT_PUBLIC_KEY_URL:-}\n      # Gen AI Settings (Needed by OnyxBot)\n      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}\n      - LLM_SOCKET_READ_TIMEOUT=${LLM_SOCKET_READ_TIMEOUT:-}\n      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}\n      - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}\n      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}\n      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}\n      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}\n      # Query Options\n      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}\n      - HYBRID_ALPHA=${HYBRID_ALPHA:-}\n      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}\n      # Other Services\n      - POSTGRES_HOST=relational_db\n      - POSTGRES_USER=${POSTGRES_USER:-}\n      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}\n      - DB_READONLY_USER=${DB_READONLY_USER:-}\n      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}\n      - POSTGRES_DB=${POSTGRES_DB:-}\n      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - WEB_DOMAIN=${WEB_DOMAIN:-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}\n      # Don't change the NLP model configs unless you know what you're doing\n      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}\n      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}\n      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}\n      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}\n      - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      # Indexing Configs\n      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}\n      - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}\n      - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}\n      - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}\n      - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}\n      - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}\n      - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}\n      - JIRA_SERVER_API_VERSION=${JIRA_SERVER_API_VERSION:-}\n      - JIRA_CLOUD_API_VERSION=${JIRA_CLOUD_API_VERSION:-}\n      - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}\n      - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}\n      - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}\n      - MAX_DOCUMENT_CHARS=${MAX_DOCUMENT_CHARS:-}\n      - MAX_FILE_SIZE_BYTES=${MAX_FILE_SIZE_BYTES:-}\n      # Egnyte OAuth Configs\n      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}\n      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}\n      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}\n      # Lienar OAuth Configs\n      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}\n      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}\n      # Celery Configs (defaults are set in the supervisord.conf file.\n      # prefer doing that to have one source of defaults)\n      - CELERY_WORKER_DOCFETCHING_CONCURRENCY=${CELERY_WORKER_DOCFETCHING_CONCURRENCY:-}\n      - CELERY_WORKER_DOCPROCESSING_CONCURRENCY=${CELERY_WORKER_DOCPROCESSING_CONCURRENCY:-}\n      - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-}\n      - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-}\n\n      # Onyx SlackBot Configs\n      - ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER=${ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER:-}\n      - ONYX_BOT_FEEDBACK_VISIBILITY=${ONYX_BOT_FEEDBACK_VISIBILITY:-}\n      - ONYX_BOT_DISPLAY_ERROR_MSGS=${ONYX_BOT_DISPLAY_ERROR_MSGS:-}\n      - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}\n      - ONYX_BOT_MAX_QPM=${ONYX_BOT_MAX_QPM:-}\n      - ONYX_BOT_MAX_WAIT_TIME=${ONYX_BOT_MAX_WAIT_TIME:-}\n      # Discord Bot Configuration (runs via supervisord, requires DISCORD_BOT_TOKEN to be set)\n      # IMPORTANT: Only one Discord bot instance can run per token - do not scale background workers\n      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}\n      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}\n      # API Server connection for Discord bot message processing\n      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n      # Logging\n      # Leave this on pretty please? Nothing sensitive is collected!\n      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}\n      - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs\n      # Log all of Onyx prompts and interactions with the LLM\n      - LOG_ONYX_MODEL_INTERACTIONS=${LOG_ONYX_MODEL_INTERACTIONS:-}\n      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}\n\n      # Analytics Configs\n      - SENTRY_DSN=${SENTRY_DSN:-}\n\n      # Enterprise Edition stuff\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}\n      - USE_IAM_AUTH=${USE_IAM_AUTH:-}\n      - AWS_REGION_NAME=${AWS_REGION_NAME:-}\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}\n      # Seeding configuration\n      - OPENAI_DEFAULT_API_KEY=${OPENAI_DEFAULT_API_KEY:-}\n    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    # Uncomment the following lines if you need to include a custom CA certificate\n    # This section enables the use of a custom CA certificate\n    # If present, the custom CA certificate is mounted as a volume\n    # The container checks for its existence and updates the system's CA certificates\n    # This allows for secure communication with services using custom SSL certificates\n    # Optional volume mount for CA certificate\n    # volumes:\n    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile\n    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro\n\n  web_server:\n    image: ${ONYX_WEB_SERVER_IMAGE:-onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}\n        # Enterprise Edition only\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.\n        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=http://api_server:8080\n      - WEB_DOMAIN=${WEB_DOMAIN:-}\n      - THEME_IS_DARK=${THEME_IS_DARK:-}\n\n      # Enterprise Edition only\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}\n      - NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL:-}\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n  #   build:\n  #     context: ../../backend\n  #     dockerfile: Dockerfile\n  #   command: >\n  #     /bin/sh -c \"if [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"True\\\" ] && [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"true\\\" ]; then\n  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';\n  #       exit 0;\n  #     else\n  #       exec python -m onyx.mcp_server_main;\n  #     fi\"\n  #   ports:\n  #     - \"8090:8090\"\n  #   env_file:\n  #     - path: .env\n  #       required: false\n  #   depends_on:\n  #     - relational_db\n  #     - cache\n  #   restart: \"no\"\n  #   environment:\n  #     - POSTGRES_HOST=relational_db\n  #     - REDIS_HOST=cache\n  #     # MCP Server Configuration\n  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}\n  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}\n  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}\n  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n  #     - API_SERVER_HOST=api_server\n  #   extra_hosts:\n  #     - \"host.docker.internal:host-gateway\"\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   volumes:\n  #     - mcp_server_logs:/var/log/onyx\n\n  inference_model_server:\n    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n\n      # Analytics Configs\n      - SENTRY_DSN=${SENTRY_DSN:-}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - model_cache_huggingface:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  indexing_model_server:\n    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - INDEXING_ONLY=True\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}\n\n      # Analytics Configs\n      - SENTRY_DSN=${SENTRY_DSN:-}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - indexing_huggingface_model_cache:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    restart: unless-stopped\n    environment:\n      - POSTGRES_USER=${POSTGRES_USER:-postgres}\n      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}\n      - DB_READONLY_USER=${DB_READONLY_USER:-}\n      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}\n    ports:\n      - \"5432:5432\"\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=true\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  opensearch:\n    image: opensearchproject/opensearch:3.4.0\n    restart: unless-stopped\n    # Controls whether this service runs. In order to enable it, add\n    # opensearch-enabled to COMPOSE_PROFILES in the environment for this\n    # docker-compose.\n    # NOTE: Now enabled on by default. To explicitly disable this service,\n    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not\n    # list the profile, or when running docker compose, include all desired\n    # service names but this one. Additionally set\n    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.\n    # profiles: [\"opensearch-enabled\"]\n    environment:\n      # We need discovery.type=single-node so that OpenSearch doesn't try\n      # forming a cluster and waiting for other nodes to become live.\n      - discovery.type=single-node\n      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n      # We do this to avoid unstable performance from page swaps.\n      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.\n      # Java heap should be ~50% of memory limit. For now we assume a limit of\n      # 4g although in practice the container can request more than this.\n      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n      # Xms is the starting size, Xmx is the maximum size. These should be the\n      # same.\n      - \"OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g\"\n    volumes:\n      - opensearch-data:/usr/share/opensearch/data\n    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n    ulimits:\n      # Similarly to bootstrap.memory_lock, we don't want to impose limits on\n      # how much memory a process can lock from being swapped.\n      memlock:\n        soft: -1 # Set memlock to unlimited (no soft or hard limit).\n        hard: -1\n      nofile:\n        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.\n        hard: 65536\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    environment:\n      - DOMAIN=localhost\n    ports:\n      - \"${HOST_PORT_80:-80}:80\"\n      - \"${HOST_PORT:-3000}:80\" # allow for localhost:3000 usage, since that is the norm\n    volumes:\n      - ../data/nginx:/nginx-templates:ro\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template\"\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    restart: unless-stopped\n    ports:\n      - \"9004:9000\"\n      - \"9005:9001\"\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    ports:\n      - \"6379:6379\"\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\n  code-interpreter:\n    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}\n    command: [\"bash\", \"./entrypoint.sh\", \"code-interpreter-api\"]\n    restart: unless-stopped\n    env_file:\n      - path: .env\n        required: false\n\n    # Below is needed for the `docker-out-of-docker` execution mode\n    user: root\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock\n\n    # uncomment below + comment out the above to use the `docker-in-docker` execution mode\n    # privileged: true\n\nvolumes:\n  db_volume:\n  vespa_volume: # Created by the container itself\n  minio_data:\n\n  model_cache_huggingface:\n  indexing_huggingface_model_cache:\n  # mcp_server_logs:\n  # Persistent data for OpenSearch.\n  opensearch-data:\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.onyx-lite.yml",
    "content": "# =============================================================================\n# ONYX LITE — MINIMAL DEPLOYMENT OVERLAY\n# =============================================================================\n# Overlay to run Onyx in a minimal configuration: no vector database (Vespa),\n# no Redis, no model servers, and no background workers. Only PostgreSQL is\n# required. In this mode, connectors and RAG search are disabled, but the core\n# chat experience (LLM conversations, tools, user file uploads, Projects,\n# Agent knowledge, code interpreter) still works.\n#\n# Usage:\n#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml up -d\n#\n# With dev ports:\n#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml \\\n#                  -f docker-compose.dev.yml up -d --wait\n#\n# This overlay:\n#   - Moves Vespa (index), both model servers, OpenSearch, MinIO,\n#     Redis (cache), and the background worker to profiles so they do\n#     not start by default\n#   - Makes depends_on references to removed services optional\n#   - Sets DISABLE_VECTOR_DB=true on the api_server\n#   - Uses PostgreSQL for caching and auth instead of Redis\n#   - Uses PostgreSQL for file storage instead of S3/MinIO\n#\n# To selectively bring services back:\n#   --profile vectordb          Vespa + indexing model server\n#   --profile inference         Inference model server\n#   --profile background        Background worker (Celery) — also needs redis\n#   --profile redis             Redis cache\n#   --profile opensearch        OpenSearch\n#   --profile s3-filestore      MinIO (S3-compatible file store)\n# =============================================================================\n\nname: onyx\n\nservices:\n  api_server:\n    depends_on:\n      index:\n        condition: service_started\n        required: false\n      opensearch:\n        condition: service_started\n        required: false\n      cache:\n        condition: service_started\n        required: false\n      inference_model_server:\n        condition: service_started\n        required: false\n      minio:\n        condition: service_started\n        required: false\n    environment:\n      - DISABLE_VECTOR_DB=true\n      - FILE_STORE_BACKEND=postgres\n      - CACHE_BACKEND=postgres\n      - AUTH_BACKEND=postgres\n\n  # Move the background worker to a profile so it does not start by default.\n  # The API server handles all background work in lite mode.\n  background:\n    profiles: [\"background\"]\n    depends_on:\n      index:\n        condition: service_started\n        required: false\n      inference_model_server:\n        condition: service_started\n        required: false\n      indexing_model_server:\n        condition: service_started\n        required: false\n\n  # Move Redis to a profile so it does not start by default.\n  # The Postgres cache backend replaces Redis in lite mode.\n  cache:\n    profiles: [\"redis\"]\n\n  # Move Vespa and indexing model server to a profile so they do not start.\n  index:\n    profiles: [\"vectordb\"]\n\n  indexing_model_server:\n    profiles: [\"vectordb\"]\n\n  # Inference model server is only needed for local embeddings, not for LLM chat.\n  inference_model_server:\n    profiles: [\"inference\"]\n\n  # OpenSearch is not needed in lite mode (no indexing).\n  opensearch:\n    profiles: [\"opensearch\"]\n\n  # MinIO is not needed in lite mode (Postgres handles file storage).\n  minio:\n    profiles: [\"s3-filestore\"]\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.prod-cloud.yml",
    "content": "name: onyx\n\nservices:\n  api_server:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.cloud\n    command: >\n      /bin/sh -c \"alembic -n schema_private upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - minio\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n    env_file:\n      - path: .env\n        required: false\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  background:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: /app/scripts/supervisord_entrypoint.sh\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - indexing_model_server\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}\n      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}\n      # API Server connection for Discord bot message processing\n      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n    env_file:\n      - path: .env\n        required: false\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  web_server:\n    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=http://api_server:8080\n    env_file:\n      - path: .env\n        required: false\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n  #   build:\n  #     context: ../../backend\n  #     dockerfile: Dockerfile\n  #   command: >\n  #     /bin/sh -c \"if [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"True\\\" ] && [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"true\\\" ]; then\n  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';\n  #       exit 0;\n  #     else\n  #       exec python -m onyx.mcp_server_main;\n  #     fi\"\n  #   env_file:\n  #     - path: .env\n  #       required: false\n  #   depends_on:\n  #     - relational_db\n  #     - cache\n  #   restart: \"no\"\n  #   environment:\n  #     - POSTGRES_HOST=relational_db\n  #     - REDIS_HOST=cache\n  #     # MCP Server Configuration\n  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}\n  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}\n  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}\n  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n  #   extra_hosts:\n  #     - \"host.docker.internal:host-gateway\"\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   volumes:\n  #     - mcp_server_logs:/var/log/onyx\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    restart: unless-stopped\n    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file\n    env_file:\n      - path: .env\n        required: false\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  inference_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - model_cache_huggingface:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  indexing_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - INDEXING_ONLY=True\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - indexing_huggingface_model_cache:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=true\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  opensearch:\n    image: opensearchproject/opensearch:3.4.0\n    restart: unless-stopped\n    # Controls whether this service runs. In order to enable it, add\n    # opensearch-enabled to COMPOSE_PROFILES in the environment for this\n    # docker-compose.\n    # NOTE: Now enabled on by default. To explicitly disable this service,\n    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not\n    # list the profile, or when running docker compose, include all desired\n    # service names but this one. Additionally set\n    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.\n    # profiles: [\"opensearch-enabled\"]\n    environment:\n      # We need discovery.type=single-node so that OpenSearch doesn't try\n      # forming a cluster and waiting for other nodes to become live.\n      - discovery.type=single-node\n      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n      # We do this to avoid unstable performance from page swaps.\n      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.\n      # Java heap should be ~50% of memory limit. For now we assume a limit of\n      # 4g although in practice the container can request more than this.\n      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n      # Xms is the starting size, Xmx is the maximum size. These should be the\n      # same.\n      - \"OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g\"\n    volumes:\n      - opensearch-data:/usr/share/opensearch/data\n    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n    ulimits:\n      # Similarly to bootstrap.memory_lock, we don't want to impose limits on\n      # how much memory a process can lock from being swapped.\n      memlock:\n        soft: -1 # Set memlock to unlimited (no soft or hard limit).\n        hard: -1\n      nofile:\n        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.\n        hard: 65536\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    ports:\n      - \"80:80\"\n      - \"443:443\"\n    volumes:\n      - ../data/nginx:/nginx-templates:ro\n      - ../data/certbot/conf:/etc/letsencrypt\n      - ../data/certbot/www:/var/www/certbot\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template.prod\"\n    env_file:\n      - .env.nginx\n    environment:\n      # Nginx proxy timeout settings (in seconds)\n      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}\n      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}\n      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}\n\n  # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71\n  certbot:\n    image: certbot/certbot\n    restart: unless-stopped\n    volumes:\n      - ../data/certbot/conf:/etc/letsencrypt\n      - ../data/certbot/www:/var/www/certbot\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    entrypoint: \"/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'\"\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    restart: unless-stopped\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\nvolumes:\n  db_volume:\n  vespa_volume:\n  minio_data:\n  # Created by the container itself\n  model_cache_huggingface:\n  indexing_huggingface_model_cache:\n  # mcp_server_logs:\n  # Persistent data for OpenSearch.\n  opensearch-data:\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml",
    "content": "name: onyx\n\nservices:\n  api_server:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"alembic upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - minio\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}\n      - USE_IAM_AUTH=${USE_IAM_AUTH}\n      - AWS_REGION_NAME=${AWS_REGION_NAME-}\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    env_file:\n      - path: .env\n        required: false\n    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    volumes:\n      # optional, only for debugging purposes\n      - api_server_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n\n  background:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: /app/scripts/supervisord_entrypoint.sh\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - indexing_model_server\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      - USE_IAM_AUTH=${USE_IAM_AUTH}\n      - AWS_REGION_NAME=${AWS_REGION_NAME-}\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    env_file:\n      - path: .env\n        required: false\n    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    volumes:\n      - background_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  web_server:\n    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=http://api_server:8080\n    env_file:\n      - path: .env\n        required: false\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n  #   build:\n  #     context: ../../backend\n  #     dockerfile: Dockerfile\n  #   command: >\n  #     /bin/sh -c \"if [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"True\\\" ] && [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"true\\\" ]; then\n  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';\n  #       exit 0;\n  #     else\n  #       exec python -m onyx.mcp_server_main;\n  #     fi\"\n  #   env_file:\n  #     - path: .env\n  #       required: false\n  #   depends_on:\n  #     - relational_db\n  #     - cache\n  #   restart: \"no\"\n  #   environment:\n  #     - POSTGRES_HOST=relational_db\n  #     - REDIS_HOST=cache\n  #     # MCP Server Configuration\n  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}\n  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}\n  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}\n  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n  #   extra_hosts:\n  #     - \"host.docker.internal:host-gateway\"\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   volumes:\n  #     - mcp_server_logs:/var/log/onyx\n\n  inference_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - model_cache_huggingface:/app/.cache/huggingface/\n      # optional, only for debugging purposes\n      - inference_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  indexing_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - INDEXING_ONLY=True\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - indexing_huggingface_model_cache:/app/.cache/huggingface/\n      # optional, only for debugging purposes\n      - indexing_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    restart: unless-stopped\n    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file\n    env_file:\n      - path: .env\n        required: false\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=true\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  opensearch:\n    image: opensearchproject/opensearch:3.4.0\n    restart: unless-stopped\n    # Controls whether this service runs. In order to enable it, add\n    # opensearch-enabled to COMPOSE_PROFILES in the environment for this\n    # docker-compose.\n    # NOTE: Now enabled on by default. To explicitly disable this service,\n    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not\n    # list the profile, or when running docker compose, include all desired\n    # service names but this one. Additionally set\n    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.\n    # profiles: [\"opensearch-enabled\"]\n    environment:\n      # We need discovery.type=single-node so that OpenSearch doesn't try\n      # forming a cluster and waiting for other nodes to become live.\n      - discovery.type=single-node\n      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n      # We do this to avoid unstable performance from page swaps.\n      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.\n      # Java heap should be ~50% of memory limit. For now we assume a limit of\n      # 4g although in practice the container can request more than this.\n      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n      # Xms is the starting size, Xmx is the maximum size. These should be the\n      # same.\n      - \"OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g\"\n    volumes:\n      - opensearch-data:/usr/share/opensearch/data\n    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n    ulimits:\n      # Similarly to bootstrap.memory_lock, we don't want to impose limits on\n      # how much memory a process can lock from being swapped.\n      memlock:\n        soft: -1 # Set memlock to unlimited (no soft or hard limit).\n        hard: -1\n      nofile:\n        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.\n        hard: 65536\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    ports:\n      - \"80:80\"\n      - \"443:443\"\n    volumes:\n      - ../data/nginx:/nginx-templates:ro\n      - ../data/sslcerts:/etc/nginx/sslcerts\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template.prod.no-letsencrypt\"\n    env_file:\n      - .env.nginx\n    environment:\n      # Nginx proxy timeout settings (in seconds)\n      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}\n      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}\n      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    restart: unless-stopped\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\n  code-interpreter:\n    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}\n    command: [\"bash\", \"./entrypoint.sh\", \"code-interpreter-api\"]\n    restart: unless-stopped\n    env_file:\n      - path: .env\n        required: false\n\n    # Below is needed for the `docker-out-of-docker` execution mode\n    user: root\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock\n\n    # uncomment below + comment out the above to use the `docker-in-docker` execution mode\n    # privileged: true\n\nvolumes:\n  db_volume:\n  vespa_volume:\n  minio_data:\n  # Created by the container itself\n  model_cache_huggingface:\n  indexing_huggingface_model_cache:\n  # for logs that we don't want to lose on container restarts\n  api_server_logs:\n  background_logs:\n  inference_model_server_logs:\n  indexing_model_server_logs:\n  # mcp_server_logs:\n  # Shared volume for persistent document storage (Craft file-system mode)\n  file-system:\n  # Persistent data for OpenSearch.\n  opensearch-data:\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.prod.yml",
    "content": "name: onyx\n\nservices:\n  api_server:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"\n      alembic upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - minio\n      - inference_model_server\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}\n      - USE_IAM_AUTH=${USE_IAM_AUTH}\n      - AWS_REGION_NAME=${AWS_REGION_NAME-}\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    env_file:\n      - path: .env\n        required: false\n    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    volumes:\n      - api_server_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n\n  background:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"\n      if [ -f /etc/ssl/certs/custom-ca.crt ]; then\n        update-ca-certificates;\n      fi &&\n      /app/scripts/supervisord_entrypoint.sh\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - inference_model_server\n      - indexing_model_server\n    restart: unless-stopped\n    environment:\n      - AUTH_TYPE=${AUTH_TYPE:-oidc}\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      - USE_IAM_AUTH=${USE_IAM_AUTH}\n      - AWS_REGION_NAME=${AWS_REGION_NAME-}\n      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}\n      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}\n      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}\n      # API Server connection for Discord bot message processing\n      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    env_file:\n      - path: .env\n        required: false\n    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    volumes:\n      - background_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    # Uncomment the following lines if you need to include a custom CA certificate\n    # This section enables the use of a custom CA certificate\n    # If present, the custom CA certificate is mounted as a volume\n    # The container checks for its existence and updates the system's CA certificates\n    # This allows for secure communication with services using custom SSL certificates\n    # volumes:\n    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile\n    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro\n\n  web_server:\n    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=http://api_server:8080\n    env_file:\n      - path: .env\n        required: false\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n  #   build:\n  #     context: ../../backend\n  #     dockerfile: Dockerfile\n  #   command: >\n  #     /bin/sh -c \"if [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"True\\\" ] && [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"true\\\" ]; then\n  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';\n  #       exit 0;\n  #     else\n  #       exec python -m onyx.mcp_server_main;\n  #     fi\"\n  #   env_file:\n  #     - path: .env\n  #       required: false\n  #   depends_on:\n  #     - relational_db\n  #     - cache\n  #   restart: \"no\"\n  #   environment:\n  #     - POSTGRES_HOST=relational_db\n  #     - REDIS_HOST=cache\n  #     # MCP Server Configuration\n  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}\n  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}\n  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}\n  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n  #   extra_hosts:\n  #     - \"host.docker.internal:host-gateway\"\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   volumes:\n  #     - mcp_server_logs:/var/log/onyx\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    restart: unless-stopped\n    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file\n    env_file:\n      - path: .env\n        required: false\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  inference_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: unless-stopped\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - model_cache_huggingface:/app/.cache/huggingface/\n      # optional, only for debugging purposes\n      - inference_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  indexing_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: unless-stopped\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - INDEXING_ONLY=True\n      # Set to debug to get more fine-grained logs\n      - LOG_LEVEL=${LOG_LEVEL:-info}\n      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - indexing_huggingface_model_cache:/app/.cache/huggingface/\n      # optional, only for debugging purposes\n      - indexing_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=true\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  opensearch:\n    image: opensearchproject/opensearch:3.4.0\n    restart: unless-stopped\n    # Controls whether this service runs. In order to enable it, add\n    # opensearch-enabled to COMPOSE_PROFILES in the environment for this\n    # docker-compose.\n    # NOTE: Now enabled on by default. To explicitly disable this service,\n    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not\n    # list the profile, or when running docker compose, include all desired\n    # service names but this one. Additionally set\n    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.\n    # profiles: [\"opensearch-enabled\"]\n    environment:\n      # We need discovery.type=single-node so that OpenSearch doesn't try\n      # forming a cluster and waiting for other nodes to become live.\n      - discovery.type=single-node\n      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n      # We do this to avoid unstable performance from page swaps.\n      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.\n      # Java heap should be ~50% of memory limit. For now we assume a limit of\n      # 4g although in practice the container can request more than this.\n      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n      # Xms is the starting size, Xmx is the maximum size. These should be the\n      # same.\n      - \"OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g\"\n    volumes:\n      - opensearch-data:/usr/share/opensearch/data\n    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n    ulimits:\n      # Similarly to bootstrap.memory_lock, we don't want to impose limits on\n      # how much memory a process can lock from being swapped.\n      memlock:\n        soft: -1 # Set memlock to unlimited (no soft or hard limit).\n        hard: -1\n      nofile:\n        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.\n        hard: 65536\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    ports:\n      - \"80:80\"\n      - \"443:443\"\n    volumes:\n      - ../data/nginx:/nginx-templates:ro\n      - ../data/certbot/conf:/etc/letsencrypt\n      - ../data/certbot/www:/var/www/certbot\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template.prod\"\n    env_file:\n      - .env.nginx\n    environment:\n      # Nginx proxy timeout settings (in seconds)\n      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}\n      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}\n      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}\n\n  # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71\n  certbot:\n    image: certbot/certbot\n    restart: unless-stopped\n    volumes:\n      - ../data/certbot/conf:/etc/letsencrypt\n      - ../data/certbot/www:/var/www/certbot\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    entrypoint: \"/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'\"\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    restart: unless-stopped\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\n  code-interpreter:\n    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}\n    command: [\"bash\", \"./entrypoint.sh\", \"code-interpreter-api\"]\n    restart: unless-stopped\n    env_file:\n      - path: .env\n        required: false\n\n    # Below is needed for the `docker-out-of-docker` execution mode\n    user: root\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock\n\n    # uncomment below + comment out the above to use the `docker-in-docker` execution mode\n    # privileged: true\n\nvolumes:\n  db_volume:\n  vespa_volume:\n  minio_data:\n  # Created by the container itself\n  model_cache_huggingface:\n  indexing_huggingface_model_cache:\n  # for logs that we don't want to lose on container restarts\n  api_server_logs:\n  background_logs:\n  inference_model_server_logs:\n  indexing_model_server_logs:\n  # mcp_server_logs:\n  # Shared volume for persistent document storage (Craft file-system mode)\n  file-system:\n  # Persistent data for OpenSearch.\n  opensearch-data:\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.resources.yml",
    "content": "# Docker service resource limits. Most are commented out by default.\n# 'background' service has preset (override-able) limits due to variable resource needs.\n# Uncomment and set env vars for specific service limits.\n# See: https://docs.danswer.dev/deployment/resource-sizing for details.\n\nservices:\n  background:\n    deploy:\n      resources:\n        limits:\n          # 6 CPUs, 10GB of memory. Very generous, primarily to prevent OOM crashing the host machine.\n          cpus: ${BACKGROUND_CPU_LIMIT:-6}\n          memory: ${BACKGROUND_MEM_LIMIT:-10g}\n  #       reservations:\n  #         cpus: ${BACKGROUND_CPU_RESERVATION}\n  #         memory: ${BACKGROUND_MEM_RESERVATION}\n  #\n  nginx:\n    deploy:\n      resources:\n        limits:\n          cpus: ${NGINX_CPU_LIMIT:-1}\n          memory: ${NGINX_MEM_LIMIT:-1g}\n  #      reservations:\n  #        cpus: ${NGINX_CPU_RESERVATION:}\n  #        memory: ${NGINX_MEM_RESERVATION}\n  #\n  api_server:\n    deploy:\n      resources:\n        limits:\n          cpus: ${API_SERVER_CPU_LIMIT:-2}\n          memory: ${API_SERVER_MEM_LIMIT:-4g}\n  #      reservations:\n  #        cpus: ${API_SERVER_CPU_RESERVATION}\n  #        memory: ${API_SERVER_MEM_RESERVATION}\n  #\n  #   index:\n  #     deploy:\n  #       resources:\n  #         limits:\n  #           cpus: ${VESPA_CPU_LIMIT}\n  #           memory: ${VESPA_MEM_LIMIT}\n  #         reservations:\n  #           cpus: ${VESPA_CPU_RESERVATION}\n  #           memory: ${VESPA_MEM_RESERVATION}\n  #\n  inference_model_server:\n    deploy:\n      resources:\n        limits:\n          # cpus: ${INFERENCE_CPU_LIMIT}\n          memory: ${INFERENCE_MEM_LIMIT:-5g}\n  #       reservations:\n  #         cpus: ${INFERENCE_CPU_RESERVATION}\n  #         memory: ${INFERENCE_MEM_RESERVATION}\n  #\n  indexing_model_server:\n    deploy:\n      resources:\n        limits:\n          # cpus: ${INDEXING_CPU_LIMIT}\n          memory: ${INDEXING_MEM_LIMIT:-5g}\n  #       reservations:\n  #         cpus: ${INDEXING_CPU_RESERVATION}\n  #         memory: ${INDEXING_MEM_RESERVATION}\n  #\n  relational_db:\n    deploy:\n      resources:\n        limits:\n          cpus: ${POSTGRES_CPU_LIMIT:-2}\n          memory: ${POSTGRES_MEM_LIMIT:-4g}\n  #         reservations:\n  #           cpus: ${POSTGRES_CPU_RESERVATION}\n  #           memory: ${POSTGRES_MEM_RESERVATION}\n\n  # minio:\n  #   deploy:\n  #     resources:\n  #       limits:\n  #         cpus: ${MINIO_CPU_LIMIT:-1}\n  #         memory: ${MINIO_MEM_LIMIT:-1g}\n  #       reservations:\n  #         cpus: ${MINIO_CPU_RESERVATION}\n  #         memory: ${MINIO_MEM_RESERVATION}\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.search-testing.yml",
    "content": "name: onyx\n\nservices:\n  api_server:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: >\n      /bin/sh -c \"alembic upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    depends_on:\n      - relational_db\n      - index\n      - cache\n      - minio\n    restart: unless-stopped\n    ports:\n      - \"8080\"\n    env_file:\n      - .env_eval\n    environment:\n      - AUTH_TYPE=basic\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}\n      - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True\n      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n      - LICENSE_ENFORCEMENT_ENABLED=false\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  background:\n    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n    command: /app/scripts/supervisord_entrypoint.sh\n    depends_on:\n      - relational_db\n      - index\n      - cache\n    restart: unless-stopped\n    env_file:\n      - .env_eval\n    environment:\n      - AUTH_TYPE=basic\n      - POSTGRES_HOST=relational_db\n      - VESPA_HOST=index\n      - REDIS_HOST=cache\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True\n      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license\n      - LICENSE_ENFORCEMENT_ENABLED=false\n      # MinIO configuration\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - S3_FILE_STORE_BUCKET_NAME=${S3_FILE_STORE_BUCKET_NAME:-}\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    volumes:\n      - log_store:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  web_server:\n    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n\n        # Enterprise Edition only\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.\n        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=http://api_server:8080\n      - WEB_DOMAIN=${WEB_DOMAIN:-}\n      - THEME_IS_DARK=${THEME_IS_DARK:-}\n\n      # Enterprise Edition only\n      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}\n\n  inference_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - LOG_LEVEL=${LOG_LEVEL:-debug}\n    volumes:\n      - inference_model_cache_huggingface:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  indexing_model_server:\n    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    restart: on-failure\n    environment:\n      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}\n      - INDEXING_ONLY=True\n      - LOG_LEVEL=${LOG_LEVEL:-debug}\n      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}\n    volumes:\n      - inference_model_cache_huggingface:/app/.cache/huggingface/\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    restart: unless-stopped\n    environment:\n      - POSTGRES_USER=${POSTGRES_USER:-postgres}\n      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}\n      - DB_READONLY_USER=${DB_READONLY_USER:-}\n      - DB_READONLY_PASSWORD=${DB_READONLY_PASSWORD:-}\n    ports:\n      - \"5432\"\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=true\n    ports:\n      - \"19071:19071\"\n      - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    environment:\n      - DOMAIN=localhost\n    ports:\n      - \"${NGINX_PORT:-3000}:80\" # allow for localhost:3000 usage, since that is the norm\n    volumes:\n      - ../data/nginx:/nginx-templates:ro\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template\"\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    restart: unless-stopped\n    ports:\n      - \"9004:9000\"\n      - \"9005:9001\"\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    ports:\n      - \"6379:6379\"\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\nvolumes:\n  inference_model_cache_huggingface:\n  db_volume:\n    driver: local\n    driver_opts:\n      type: none\n      o: bind\n      device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}\n  vespa_volume:\n    driver: local\n    driver_opts:\n      type: none\n      o: bind\n      device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}\n  log_store: # for logs that we don't want to lose on container restarts\n  minio_data:\n"
  },
  {
    "path": "deployment/docker_compose/docker-compose.yml",
    "content": "# =============================================================================\n# ONYX DOCKER COMPOSE\n# =============================================================================\n# This is the default configuration for Onyx. This file is fairly configurable,\n# also see env.template for possible settings.\n#\n# PRODUCTION DEPLOYMENT CHECKLIST:\n# To convert this setup to a production deployment following best practices,\n# follow the checklist below. Note that there are other ways to secure the Onyx\n# deployment so these are not strictly necessary for all teams.\n#\n# 1. SECURITY HARDENING:\n#    - Remove all port exposures except nginx (80/443)\n#    - Comment out ports for: api_server, relational_db, index, cache, minio\n#\n# 2. SSL/TLS SETUP:\n#    - Uncomment the certbot service (see below)\n#    - Add SSL certificate volumes to nginx service\n#    - Change nginx command from app.conf.template to app.conf.template.prod\n#\n# 3. ENVIRONMENT CONFIGURATION:\n#    - Replace env_file with explicit environment variables\n#\n# 4. AUTHENTICATION:\n#    - Select an authentication method like Basic, Google OAuth, OIDC, or SAML\n#\n# 5. CA CERTIFICATES:\n#    - Uncomment custom CA certificate volumes if needed\n#\n# 6. DOMAIN CONFIGURATION:\n#    - Set proper DOMAIN environment variable for nginx\n#    - Configure DNS and SSL certificates\n#\n# For a complete production setup, refer to docker-compose.prod.yml\n# =============================================================================\n\nname: onyx\n\nservices:\n  api_server:\n    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n      args:\n        - ENABLE_CRAFT=${ENABLE_CRAFT:-false}\n    command: >\n      /bin/sh -c \"alembic upgrade head &&\n      echo \\\"Starting Onyx Api Server\\\" &&\n      uvicorn onyx.main:app --host 0.0.0.0 --port 8080\"\n    # Check env.template and copy to .env for env vars\n    env_file:\n      - path: .env\n        required: false\n    depends_on:\n      relational_db:\n        condition: service_started\n      index:\n        condition: service_started\n      opensearch:\n        condition: service_started\n        required: false\n      cache:\n        condition: service_started\n      inference_model_server:\n        condition: service_started\n      minio:\n        condition: service_started\n        required: false\n    restart: unless-stopped\n    # DEV: To expose ports, either:\n    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n    # 2. Uncomment the ports below\n    # ports:\n    #   - \"8080:8080\"\n    environment:\n      # Auth Settings\n      - AUTH_TYPE=${AUTH_TYPE:-basic}\n      - FILE_STORE_BACKEND=${FILE_STORE_BACKEND:-s3}\n      - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}\n      - VESPA_HOST=${VESPA_HOST:-index}\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=${REDIS_HOST:-cache}\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - CODE_INTERPRETER_BASE_URL=${CODE_INTERPRETER_BASE_URL:-http://code-interpreter:8000}\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      # Onyx Craft configuration (disabled by default, set ENABLE_CRAFT=true in .env to enable)\n      # Use --include-craft with install script, or manually set in .env file\n      - ENABLE_CRAFT=${ENABLE_CRAFT:-false}\n      - OUTPUTS_TEMPLATE_PATH=${OUTPUTS_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs}\n      - VENV_TEMPLATE_PATH=${VENV_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv}\n      - WEB_TEMPLATE_PATH=${WEB_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    # PRODUCTION: Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    healthcheck:\n      test:\n        [\n          \"CMD\",\n          \"python\",\n          \"-c\",\n          \"import urllib.request; urllib.request.urlopen('http://localhost:8080/health')\",\n        ]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n      start_period: 25s\n    # Optional, only for debugging purposes\n    volumes:\n      - api_server_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n\n  background:\n    image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile\n      args:\n        - ENABLE_CRAFT=${ENABLE_CRAFT:-false}\n    command: >\n      /bin/sh -c \"\n      if [ -f /app/scripts/setup_craft_templates.sh ]; then\n        /app/scripts/setup_craft_templates.sh;\n      fi &&\n      if [ -f /etc/ssl/certs/custom-ca.crt ]; then\n        update-ca-certificates;\n      fi &&\n      /app/scripts/supervisord_entrypoint.sh\"\n    env_file:\n      - path: .env\n        required: false\n    depends_on:\n      relational_db:\n        condition: service_started\n      index:\n        condition: service_started\n      opensearch:\n        condition: service_started\n        required: false\n      cache:\n        condition: service_started\n      inference_model_server:\n        condition: service_started\n      indexing_model_server:\n        condition: service_started\n    restart: unless-stopped\n    environment:\n      - FILE_STORE_BACKEND=${FILE_STORE_BACKEND:-s3}\n      - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}\n      - VESPA_HOST=${VESPA_HOST:-index}\n      - OPENSEARCH_HOST=${OPENSEARCH_HOST:-opensearch}\n      - OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      - ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=${OPENSEARCH_FOR_ONYX_ENABLED:-true}\n      - REDIS_HOST=${REDIS_HOST:-cache}\n      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}\n      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}\n      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-http://minio:9000}\n      - S3_AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID:-minioadmin}\n      - S3_AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY:-minioadmin}\n      - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN:-}\n      - DISCORD_BOT_INVOKE_CHAR=${DISCORD_BOT_INVOKE_CHAR:-!}\n      # API Server connection for Discord bot message processing\n      - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n      - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n      # Onyx Craft configuration (set up automatically on container startup)\n      - ENABLE_CRAFT=${ENABLE_CRAFT:-false}\n      - OUTPUTS_TEMPLATE_PATH=${OUTPUTS_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs}\n      - VENV_TEMPLATE_PATH=${VENV_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv}\n      - WEB_TEMPLATE_PATH=${WEB_TEMPLATE_PATH:-/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web}\n      - PERSISTENT_DOCUMENT_STORAGE_PATH=${PERSISTENT_DOCUMENT_STORAGE_PATH:-/app/file-system}\n    # PRODUCTION: Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres\n    # volumes:\n    #   - ./bundle.pem:/app/bundle.pem:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    # Optional, only for debugging purposes\n    volumes:\n      - background_logs:/var/log/onyx\n      # Shared volume for persistent document storage (Craft file-system mode)\n      - file-system:/app/file-system\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    # PRODUCTION: Uncomment the following lines if you need to include a custom CA certificate\n    # This section enables the use of a custom CA certificate\n    # If present, the custom CA certificate is mounted as a volume\n    # The container checks for its existence and updates the system's CA certificates\n    # This allows for secure communication with services using custom SSL certificates\n    # Optional volume mount for CA certificate\n    # volumes:\n    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile\n    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro\n\n  web_server:\n    image: ${ONYX_WEB_SERVER_IMAGE:-onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../web\n      dockerfile: Dockerfile\n      args:\n        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}\n        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}\n        # Enterprise Edition only\n        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}\n        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.\n        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}\n        - NODE_OPTIONS=${NODE_OPTIONS:-\"--max-old-space-size=4096\"}\n    env_file:\n      - path: .env\n        required: false\n    depends_on:\n      - api_server\n    restart: unless-stopped\n    environment:\n      - INTERNAL_URL=${INTERNAL_URL:-http://api_server:8080}\n\n  # Uncomment the block below to enable the MCP server for Onyx.\n  # mcp_server:\n  #   image: ${ONYX_BACKEND_IMAGE:-onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}}\n  #   build:\n  #     context: ../../backend\n  #     dockerfile: Dockerfile\n  #   command: >\n  #     /bin/sh -c \"if [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"True\\\" ] && [ \\\"${MCP_SERVER_ENABLED:-}\\\" != \\\"true\\\" ]; then\n  #       echo 'MCP server is disabled (MCP_SERVER_ENABLED=false), skipping...';\n  #       exit 0;\n  #     else\n  #       exec python -m onyx.mcp_server_main;\n  #     fi\"\n  #   env_file:\n  #     - path: .env\n  #       required: false\n  #   depends_on:\n  #     - relational_db\n  #     - cache\n  #   restart: \"no\"\n  #   environment:\n  #     - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}\n  #     - REDIS_HOST=${REDIS_HOST:-cache}\n  #     # MCP Server Configuration\n  #     - MCP_SERVER_ENABLED=${MCP_SERVER_ENABLED:-false}\n  #     - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8090}\n  #     - MCP_SERVER_CORS_ORIGINS=${MCP_SERVER_CORS_ORIGINS:-}\n  #     - API_SERVER_PROTOCOL=${API_SERVER_PROTOCOL:-http}\n  #     - API_SERVER_HOST=${API_SERVER_HOST:-api_server}\n  #   extra_hosts:\n  #     - \"host.docker.internal:host-gateway\"\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   # Optional, only for debugging purposes\n  #   volumes:\n  #     - mcp_server_logs:/var/log/onyx\n\n  inference_model_server:\n    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    # GPU Support: Uncomment the following lines to enable GPU support\n    # Requires nvidia-container-toolkit to be installed on the host\n    # deploy:\n    #   resources:\n    #     reservations:\n    #       devices:\n    #         - driver: nvidia\n    #           count: all\n    #           capabilities: [gpu]\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    env_file:\n      - path: .env\n        required: false\n    restart: unless-stopped\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - model_cache_huggingface:/app/.cache/huggingface/\n      # Optional, only for debugging purposes\n      - inference_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    healthcheck:\n      test:\n        [\n          \"CMD\",\n          \"python\",\n          \"-c\",\n          \"import urllib.request; urllib.request.urlopen('http://localhost:9000/api/health')\",\n        ]\n      interval: 20s\n      timeout: 5s\n      retries: 3\n\n  indexing_model_server:\n    image: ${ONYX_MODEL_SERVER_IMAGE:-onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}}\n    build:\n      context: ../../backend\n      dockerfile: Dockerfile.model_server\n    # GPU Support: Uncomment the following lines to enable GPU support\n    # Requires nvidia-container-toolkit to be installed on the host\n    # deploy:\n    #   resources:\n    #     reservations:\n    #       devices:\n    #         - driver: nvidia\n    #           count: all\n    #           capabilities: [gpu]\n    command: >\n      /bin/sh -c \"if [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"True\\\" ] || [ \\\"${DISABLE_MODEL_SERVER:-}\\\" = \\\"true\\\" ]; then\n        echo 'Skipping service...';\n        exit 0;\n      else\n        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;\n      fi\"\n    env_file:\n      - path: .env\n        required: false\n    restart: unless-stopped\n    environment:\n      - INDEXING_ONLY=True\n    volumes:\n      # Not necessary, this is just to reduce download time during startup\n      - indexing_huggingface_model_cache:/app/.cache/huggingface/\n      # Optional, only for debugging purposes\n      - indexing_model_server_logs:/var/log/onyx\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    healthcheck:\n      test:\n        [\n          \"CMD\",\n          \"python\",\n          \"-c\",\n          \"import urllib.request; urllib.request.urlopen('http://localhost:9000/api/health')\",\n        ]\n      interval: 20s\n      timeout: 5s\n      retries: 3\n\n  relational_db:\n    image: postgres:15.2-alpine\n    shm_size: 1g\n    command: -c 'max_connections=250'\n    env_file:\n      - path: .env\n        required: false\n    restart: unless-stopped\n    # PRODUCTION: Override the defaults by passing in the environment variables\n    environment:\n      - POSTGRES_USER=${POSTGRES_USER:-postgres}\n      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}\n    # DEV: To expose ports, either:\n    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n    # 2. Uncomment the ports below\n    # ports:\n    #   - \"5432:5432\"\n    healthcheck:\n      test: [\"CMD-SHELL\", \"pg_isready -U ${POSTGRES_USER:-postgres}\"]\n      interval: 10s\n      timeout: 5s\n      retries: 5\n    volumes:\n      - db_volume:/var/lib/postgresql/data\n\n  # This container name cannot have an underscore in it due to Vespa expectations of the URL\n  index:\n    image: vespaengine/vespa:8.609.39\n    restart: unless-stopped\n    env_file:\n      - path: .env\n        required: false\n    environment:\n      - VESPA_SKIP_UPGRADE_CHECK=${VESPA_SKIP_UPGRADE_CHECK:-true}\n    # DEV: To expose ports, either:\n    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n    # 2. Uncomment the ports below\n    # ports:\n    #   - \"19071:19071\"\n    #   - \"8081:8081\"\n    volumes:\n      - vespa_volume:/opt/vespa/var\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  opensearch:\n    image: opensearchproject/opensearch:3.4.0\n    restart: unless-stopped\n    # Controls whether this service runs. In order to enable it, add\n    # opensearch-enabled to COMPOSE_PROFILES in the environment for this\n    # docker-compose.\n    # NOTE: Now enabled on by default. To explicitly disable this service,\n    # uncomment this profile and ensure COMPOSE_PROFILES in your env does not\n    # list the profile, or when running docker compose, include all desired\n    # service names but this one. Additionally set\n    # OPENSEARCH_FOR_ONYX_ENABLED=false in your env.\n    # profiles: [\"opensearch-enabled\"]\n    environment:\n      # We need discovery.type=single-node so that OpenSearch doesn't try\n      # forming a cluster and waiting for other nodes to become live.\n      - discovery.type=single-node\n      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD:-StrongPassword123!}\n      # This and the JVM config below come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n      # We do this to avoid unstable performance from page swaps.\n      - bootstrap.memory_lock=true # Disable JVM heap memory swapping.\n      # Java heap should be ~50% of memory limit. For now we assume a limit of\n      # 4g although in practice the container can request more than this.\n      # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n      # Xms is the starting size, Xmx is the maximum size. These should be the\n      # same.\n      - \"OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g\"\n    volumes:\n      - opensearch-data:/usr/share/opensearch/data\n    # These come from the example in https://docs.opensearch.org/latest/install-and-configure/install-opensearch/docker/\n    ulimits:\n      # Similarly to bootstrap.memory_lock, we don't want to impose limits on\n      # how much memory a process can lock from being swapped.\n      memlock:\n        soft: -1 # Set memlock to unlimited (no soft or hard limit).\n        hard: -1\n      nofile:\n        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536.\n        hard: 65536\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n\n  nginx:\n    image: nginx:1.25.5-alpine\n    restart: unless-stopped\n    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`\n    # if api_server / web_server are not up\n    depends_on:\n      - api_server\n      - web_server\n    env_file:\n      - path: .env\n        required: false\n    environment:\n      - DOMAIN=localhost\n      # Nginx proxy timeout settings (in seconds)\n      - NGINX_PROXY_CONNECT_TIMEOUT=${NGINX_PROXY_CONNECT_TIMEOUT:-300}\n      - NGINX_PROXY_SEND_TIMEOUT=${NGINX_PROXY_SEND_TIMEOUT:-300}\n      - NGINX_PROXY_READ_TIMEOUT=${NGINX_PROXY_READ_TIMEOUT:-300}\n    ports:\n      - \"${HOST_PORT_80:-80}:80\"\n      - \"${HOST_PORT:-3000}:80\" # allow for localhost:3000 usage, since that is the norm\n    volumes:\n      # Mount templates read-only; the startup command copies them into\n      # the writable /etc/nginx/conf.d/ inside the container.  This avoids\n      # \"Permission denied\" errors on Windows Docker bind mounts.\n      - ../data/nginx:/nginx-templates:ro\n    # PRODUCTION: Add SSL certificate volumes for HTTPS support:\n    #   - ../data/certbot/conf:/etc/letsencrypt\n    #   - ../data/certbot/www:/var/www/certbot\n    logging:\n      driver: json-file\n      options:\n        max-size: \"50m\"\n        max-file: \"6\"\n    # The specified script waits for the api_server to start up.\n    # Without this we've seen issues where nginx shows no error logs but\n    # does not receive any traffic\n    # PRODUCTION: Change to app.conf.template.prod for production nginx config\n    command: >\n      /bin/sh -c \"rm -f /etc/nginx/conf.d/default.conf\n      && cp -a /nginx-templates/. /etc/nginx/conf.d/\n      && sed 's/\\r$//' /etc/nginx/conf.d/run-nginx.sh > /tmp/run-nginx.sh\n      && chmod +x /tmp/run-nginx.sh\n      && /tmp/run-nginx.sh app.conf.template\"\n\n  cache:\n    image: redis:7.4-alpine\n    restart: unless-stopped\n    # DEV: To expose ports, either:\n    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n    # 2. Uncomment the ports below\n    # ports:\n    #   - \"6379:6379\"\n    # docker silently mounts /data even without an explicit volume mount, which enables\n    # persistence. explicitly setting save and appendonly forces ephemeral behavior.\n    command: redis-server --save \"\" --appendonly no\n    env_file:\n      - path: .env\n        required: false\n    # Use tmpfs to prevent creation of anonymous volumes for /data\n    tmpfs:\n      - /data\n\n  minio:\n    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1\n    profiles: [\"s3-filestore\"]\n    restart: unless-stopped\n    # DEV: To expose ports, either:\n    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait\n    # 2. Uncomment the ports below\n    # ports:\n    #   - \"9004:9000\"\n    #   - \"9005:9001\"\n    env_file:\n      - path: .env\n        required: false\n    environment:\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n      # Note: we've seen the default bucket creation logic not work in some cases\n      MINIO_DEFAULT_BUCKETS: ${S3_FILE_STORE_BUCKET_NAME:-onyx-file-store-bucket}\n    volumes:\n      - minio_data:/data\n    command: server /data --console-address \":9001\"\n    healthcheck:\n      test: [\"CMD\", \"mc\", \"ready\", \"local\"]\n      interval: 30s\n      timeout: 20s\n      retries: 3\n\n  code-interpreter:\n    image: onyxdotapp/code-interpreter:${CODE_INTERPRETER_IMAGE_TAG:-latest}\n    command: [\"bash\", \"./entrypoint.sh\", \"code-interpreter-api\"]\n    restart: unless-stopped\n    env_file:\n      - path: .env\n        required: false\n\n    # Below is needed for the `docker-out-of-docker` execution mode\n    # For Linux rootless Docker, set DOCKER_SOCK_PATH=${XDG_RUNTIME_DIR}/docker.sock\n    user: root\n    volumes:\n      - ${DOCKER_SOCK_PATH:-/var/run/docker.sock}:/var/run/docker.sock\n\n    # uncomment below + comment out the above to use the `docker-in-docker` execution mode\n    # privileged: true\n\n  # PRODUCTION: Uncomment the following certbot service for SSL certificate management\n  # certbot:\n  #   image: certbot/certbot\n  #   restart: unless-stopped\n  #   volumes:\n  #     - ../data/certbot/conf:/etc/letsencrypt\n  #     - ../data/certbot/www:/var/www/certbot\n  #   logging:\n  #     driver: json-file\n  #     options:\n  #       max-size: \"50m\"\n  #       max-file: \"6\"\n  #   entrypoint: \"/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'\"\n\nvolumes:\n  # Necessary for persisting data for use\n  db_volume:\n  vespa_volume: # Created by the container itself\n  minio_data:\n  # Caches to prevent re-downloading models, not strictly necessary\n  model_cache_huggingface:\n  indexing_huggingface_model_cache:\n  # Logs preserved across container restarts\n  api_server_logs:\n  background_logs:\n  # mcp_server_logs:\n  inference_model_server_logs:\n  indexing_model_server_logs:\n  # Shared volume for persistent document storage (Craft file-system mode)\n  file-system:\n  # Persistent data for OpenSearch.\n  opensearch-data:\n"
  },
  {
    "path": "deployment/docker_compose/env.nginx.template",
    "content": "# DOMAIN is necessary for https setup, EMAIL is optional\nDOMAIN=\nEMAIL=\n\n# If using the `no-letsencrypt` setup, the below are required.\n# They specify the path within /onyx/deployment/data/sslcerts directory\n# where the certificate / certificate key can be found. You can either\n# name your certificate / certificate key files to follow the convention\n# below or adjust these to match your naming conventions.\nSSL_CERT_FILE_NAME=ssl.cert\nSSL_CERT_KEY_FILE_NAME=ssl.key\n"
  },
  {
    "path": "deployment/docker_compose/env.prod.template",
    "content": "# Fill in the values and copy the contents of this file to .env in the deployment directory.\n# Some valid default values are provided where applicable, delete the variables which you don't set values for.\n# This is only necessary when using the docker-compose.prod.yml compose file.\n\n\n# Could be something like onyx.companyname.com\nWEB_DOMAIN=http://localhost:3000\n\n# The following are for configuring User Authentication, supported flows are:\n# disabled\n# basic (standard username / password)\n# google_oauth (login with google/gmail account)\n# oidc\n# saml\nAUTH_TYPE=google_oauth\n\n# Set the values below to use with Google OAuth\nGOOGLE_OAUTH_CLIENT_ID=\nGOOGLE_OAUTH_CLIENT_SECRET=\nSECRET=\n\n# if using basic auth and you want to require email verification, \n# then uncomment / set the following\n#REQUIRE_EMAIL_VERIFICATION=true\n#SMTP_USER=your-email@company.com\n#SMTP_PASS=your-gmail-password\n\n# The below are only needed if you aren't using gmail as your SMTP\n#SMTP_SERVER=  \n#SMTP_PORT=\n# When missing SMTP_USER, this is used instead\n#EMAIL_FROM=\n\n# OpenID Connect (OIDC)\n#OPENID_CONFIG_URL=\n#OIDC_PKCE_ENABLED=\n\n# SAML config directory for OneLogin compatible setups\n#SAML_CONF_DIR=\n\n\n# How long before user needs to reauthenticate, default to 7 days. (cookie expiration time)\nSESSION_EXPIRE_TIME_SECONDS=604800\n\n\n# Use the below to specify a list of allowed user domains, only checked if user Auth is turned on\n# e.g. `VALID_EMAIL_DOMAINS=example.com,example.org` will only allow users\n# with an @example.com or an @example.org email\n#VALID_EMAIL_DOMAINS=\n\n\n# Default values here are what Postgres uses by default, feel free to change.\nPOSTGRES_USER=postgres\nPOSTGRES_PASSWORD=password\n\n\n# Default values here for the read-only user for the knowledge graph and other future read-only purposes. \n# Please change password!\nDB_READONLY_USER=db_readonly_user\nDB_READONLY_PASSWORD=password\n\n# If setting the vespa language is required, set this ('en', 'de', etc.).\n# See: https://docs.vespa.ai/en/linguistics.html \n#VESPA_LANGUAGE_OVERRIDE=\n\n# Show extra/uncommon connectors\n# See https://docs.onyx.app/admins/connectors/overview for a full list of connectors\nSHOW_EXTRA_CONNECTORS=False\n"
  },
  {
    "path": "deployment/docker_compose/env.template",
    "content": "# Copy this file to .env so it's picked up by the docker compose yaml files\n# Uncomment the values you would like to set\n# No edits necessary, works out of the box\n\n\n################################################################################\n## COMMONLY MODIFIED CONFIGURATIONS\n################################################################################\n## Version of Onyx to deploy, default is latest (main built nightly)\n## For Craft support, use: IMAGE_TAG=craft-latest\nIMAGE_TAG=latest\n\n## Onyx Craft Configuration\n## Craft enables AI-powered web app building within Onyx (disabled by default)\n## To enable Craft, uncomment the lines below (and comment out the above)\n## or use --include-craft with the install script\n## This adds Node.js 20 and opencode CLI to the image at build time\n# ENABLE_CRAFT=true\n# IMAGE_TAG=craft-latest\n\n## Auth Settings\n### https://docs.onyx.app/deployment/authentication\nAUTH_TYPE=basic\n# SESSION_EXPIRE_TIME_SECONDS=\n### Recommended for basic auth - used for signing password reset and verification tokens\n### If using install.sh, this will be auto-generated\n### If setting manually, run: openssl rand -hex 32\nUSER_AUTH_SECRET=\"\"\n### Recommend to set this for security\n# ENCRYPTION_KEY_SECRET=\n### Optional\n# API_KEY_HASH_ROUNDS=\n### You can add a comma separated list of domains like onyx.app, only those domains will be allowed to signup/log in\n# VALID_EMAIL_DOMAINS=\n\n## Chat Configuration\n# HARD_DELETE_CHATS=\n# MAX_ALLOWED_UPLOAD_SIZE_MB=250\n# Default per-user upload size limit (MB) when no admin value is set.\n# Automatically clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at runtime.\n# DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=100\n\n## Base URL for redirects\n# WEB_DOMAIN=\n\n## Enterprise Features, requires a paid plan and licenses\nENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false\n\n\n################################################################################\n## SERVICES CONFIGURATIONS\n################################################################################\n## Database Configuration\nPOSTGRES_USER=postgres\nPOSTGRES_PASSWORD=password\n# POSTGRES_DB=\n# POSTGRES_DEFAULT_SCHEMA=\n# POSTGRES_USE_NULL_POOL=\n# POSTGRES_API_SERVER_POOL_SIZE=\n# POSTGRES_API_SERVER_POOL_OVERFLOW\n# POSTGRES_IDLE_SESSIONS_TIMEOUT=\n# POSTGRES_POOL_RECYCLE=\n# DB_READONLY_USER=\n# DB_READONLY_PASSWORD=\n\n## File Store Backend: \"s3\" (default, uses MinIO) or \"postgres\" (no extra services needed)\n## COMPOSE_PROFILES activates the MinIO service. To use PostgreSQL file storage instead,\n## remove s3-filestore from COMPOSE_PROFILES and set FILE_STORE_BACKEND=postgres.\nCOMPOSE_PROFILES=s3-filestore\nFILE_STORE_BACKEND=s3\n## Setting for enabling OpenSearch.\nOPENSEARCH_FOR_ONYX_ENABLED=true\n\n## MinIO/S3 Configuration (only needed when FILE_STORE_BACKEND=s3)\nS3_ENDPOINT_URL=http://minio:9000\nS3_AWS_ACCESS_KEY_ID=minioadmin\nS3_AWS_SECRET_ACCESS_KEY=minioadmin\nS3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket\nMINIO_ROOT_USER=minioadmin\nMINIO_ROOT_PASSWORD=minioadmin\n\n## Nginx Proxy Timeout Configuration (in seconds)\n## These settings control how long nginx waits for upstream servers (api_server/web_server)\n## Increase these values if you experience timeout errors with long-running requests\n# NGINX_PROXY_CONNECT_TIMEOUT=300\n# NGINX_PROXY_SEND_TIMEOUT=300\n# NGINX_PROXY_READ_TIMEOUT=300\n\n## MCP Server Configuration\n## The MCP (Model Context Protocol) server allows external MCP clients to interact with Onyx\n## Set to true to enable the MCP server (disabled by default)\n# MCP_SERVER_ENABLED=false\n## Port for the MCP server (defaults to 8090)\n# MCP_SERVER_PORT=8090\n## CORS origins for MCP clients (comma-separated list)\n# MCP_SERVER_CORS_ORIGINS=\n\n## Discord Bot Configuration\n## The Discord bot allows users to interact with Onyx from Discord servers\n## Bot token from Discord Developer Portal (required to enable the bot)\n# DISCORD_BOT_TOKEN=\n## Command prefix for bot commands (default: \"!\")\n# DISCORD_BOT_INVOKE_CHAR=!\n\n## Celery Configuration\n# CELERY_BROKER_POOL_LIMIT=\n# CELERY_WORKER_DOCFETCHING_CONCURRENCY=\n# CELERY_WORKER_DOCPROCESSING_CONCURRENCY=\n# CELERY_WORKER_LIGHT_CONCURRENCY=\n# CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=\n\n## AWS Configuration\n# AWS_ACCESS_KEY_ID=\n# AWS_SECRET_ACCESS_KEY=\n# AWS_REGION_NAME=\n# Set to true when using IAM authentication for Postgres connections.\nUSE_IAM_AUTH=false\n\n\n################################################################################\n## DEVELOPER, DEBUGGING, AND LOGGING\n################################################################################\n## Logging and Telemetry\nLOG_LEVEL=info\nLOG_ONYX_MODEL_INTERACTIONS=False\n# LOG_VESPA_TIMING_INFORMATION=\n# LOG_ENDPOINT_LATENCY=\n# LOG_POSTGRES_LATENCY=\n# LOG_POSTGRES_CONN_COUNTS=\n# DISABLE_TELEMETRY=\n\n## Feature Flags\n# SHOW_EXTRA_CONNECTORS=true\n# DISABLE_MODEL_SERVER=false\n\n## Analytics\n# SENTRY_DSN=\n\n## Demo/Testing\n# MOCK_CONNECTOR_FILE_PATH=\n\n\n################################################################################\n## ADVANCED CONFIGURATIONS\n################################################################################\n## SlackBot Configuration\n# ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER=\n# ONYX_BOT_FEEDBACK_VISIBILITY=\n# ONYX_BOT_DISPLAY_ERROR_MSGS=\n# NOTIFY_SLACKBOT_NO_ANSWER=\n# ONYX_BOT_MAX_QPM=\n# ONYX_BOT_MAX_WAIT_TIME=\n\n## Advanced Auth Settings\n# GOOGLE_OAUTH_CLIENT_ID=\n# GOOGLE_OAUTH_CLIENT_SECRET=\n# REQUIRE_EMAIL_VERIFICATION=\n# SMTP_SERVER=\n# SMTP_PORT=\n# SMTP_USER=\n# SMTP_PASS=\n# ENABLE_EMAIL_INVITES=\n# EMAIL_FROM=\n# OAUTH_CLIENT_ID=\n# OAUTH_CLIENT_SECRET=\n# OPENID_CONFIG_URL=\n# OIDC_PKCE_ENABLED=\n# TRACK_EXTERNAL_IDP_EXPIRY=\n# CORS_ALLOWED_ORIGIN=\n# INTEGRATION_TESTS_MODE=\n# JWT_PUBLIC_KEY_URL=\n\n## Gen AI Settings\n# GEN_AI_MAX_TOKENS=\n# LLM_SOCKET_READ_TIMEOUT=\n# MAX_CHUNKS_FED_TO_CHAT=\n# DISABLE_LITELLM_STREAMING=\n# LITELLM_EXTRA_HEADERS=\n# GEN_AI_API_KEY=\n# GENERATIVE_MODEL_ACCESS_CHECK_FREQ=\n# LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=\n\n## Query Options\n# DOC_TIME_DECAY=\n# HYBRID_ALPHA=\n# EDIT_KEYWORD_QUERY=\n# USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH=\n\n## Model Configuration\n# EMBEDDING_BATCH_SIZE=\n# DOCUMENT_ENCODER_MODEL=\n# DOC_EMBEDDING_DIM=\n# NORMALIZE_EMBEDDINGS=\n# ASYM_QUERY_PREFIX=\n# ASYM_PASSAGE_PREFIX=\n# DISABLE_RERANK_FOR_STREAMING=\n# MODEL_SERVER_PORT=\n# INDEX_BATCH_SIZE=\n# MIN_THREADS_ML_MODELS=\n# CLIENT_EMBEDDING_TIMEOUT=\n\n## Indexing Configuration\n# VESPA_SEARCHER_THREADS=\n# ENABLED_CONNECTOR_TYPES=\n# DISABLE_INDEX_UPDATE_ON_SWAP=\n# CONTINUE_ON_CONNECTOR_FAILURE=\n# CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=\n# JIRA_CONNECTOR_LABELS_TO_SKIP=\n# WEB_CONNECTOR_VALIDATE_URLS=\n# JIRA_SERVER_API_VERSION=\n# JIRA_CLOUD_API_VERSION=\n# GONG_CONNECTOR_START_TIME=\n# NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=\n# GITHUB_CONNECTOR_BASE_URL=\n# MAX_DOCUMENT_CHARS=\n# MAX_FILE_SIZE_BYTES=\n\n## OAuth Connector Configs\n# EGNYTE_CLIENT_ID=\n# EGNYTE_CLIENT_SECRET=\n# EGNYTE_LOCALHOST_OVERRIDE=\n# LINEAR_CLIENT_ID=\n# LINEAR_CLIENT_SECRET=\n\n## Miscellaneous\n# ONYX_QUERY_HISTORY_TYPE=\n# CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=\n# VESPA_LANGUAGE_OVERRIDE=\n\n## Frontend Configs\n# THEME_IS_DARK=\n# NEXT_PUBLIC_DISABLE_LOGOUT=\n# NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=\n# NEXT_PUBLIC_THEME=\n# NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=\n# NEXT_PUBLIC_CUSTOM_REFRESH_URL=\n\n## Pointer to services\nPOSTGRES_HOST=relational_db\nVESPA_HOST=index\nREDIS_HOST=cache\nMODEL_SERVER_HOST=inference_model_server\nINDEXING_MODEL_SERVER_HOST=indexing_model_server\nINTERNAL_URL=http://api_server:8080\n"
  },
  {
    "path": "deployment/docker_compose/init-letsencrypt.sh",
    "content": "#!/bin/bash\n\n# .env.nginx file must be present in the same directory as this script and\n# must set DOMAIN (and optionally EMAIL)\nset -o allexport\nsource .env.nginx\nset +o allexport\n\n# Function to determine correct docker compose command\ndocker_compose_cmd() {\n  if command -v docker-compose >/dev/null 2>&1; then\n    echo \"docker-compose\"\n  elif command -v docker compose >/dev/null 2>&1; then\n    echo \"docker compose\"\n  else\n    echo 'Error: docker-compose or docker compose is not installed.' >&2\n    exit 1\n  fi\n}\n\n# Assign appropriate Docker Compose command\nCOMPOSE_CMD=$(docker_compose_cmd)\n\n# Only add www to domain list if domain wasn't explicitly set as a subdomain\nif [[ ! $DOMAIN == www.* ]]; then\n    domains=(\"$DOMAIN\" \"www.$DOMAIN\")\nelse\n    domains=(\"$DOMAIN\")\nfi\n\nrsa_key_size=4096\ndata_path=\"../data/certbot\"\nemail=\"$EMAIL\" # Adding a valid address is strongly recommended\nstaging=0 # Set to 1 if you're testing your setup to avoid hitting request limits\n\nif [ -d \"$data_path\" ]; then\n  read -p \"Existing data found for $domains. Continue and replace existing certificate? (y/N) \" decision\n  if [ \"$decision\" != \"Y\" ] && [ \"$decision\" != \"y\" ]; then\n    exit\n  fi\nfi\n\n\nif [ ! -e \"$data_path/conf/options-ssl-nginx.conf\" ] || [ ! -e \"$data_path/conf/ssl-dhparams.pem\" ]; then\n  echo \"### Downloading recommended TLS parameters ...\"\n  mkdir -p \"$data_path/conf\"\n  curl -s https://raw.githubusercontent.com/certbot/certbot/master/certbot-nginx/certbot_nginx/_internal/tls_configs/options-ssl-nginx.conf > \"$data_path/conf/options-ssl-nginx.conf\"\n  curl -s https://raw.githubusercontent.com/certbot/certbot/master/certbot/certbot/ssl-dhparams.pem > \"$data_path/conf/ssl-dhparams.pem\"\n  echo\nfi\n\necho \"### Creating dummy certificate for $domains ...\"\npath=\"/etc/letsencrypt/live/$domains\"\nmkdir -p \"$data_path/conf/live/$domains\"\n$COMPOSE_CMD -f docker-compose.prod.yml run  --name onyx --rm --entrypoint \"\\\n  openssl req -x509 -nodes -newkey rsa:$rsa_key_size -days 1\\\n    -keyout '$path/privkey.pem' \\\n    -out '$path/fullchain.pem' \\\n    -subj '/CN=localhost'\" certbot\necho\n\n\necho \"### Starting nginx ...\"\n$COMPOSE_CMD -f docker-compose.prod.yml up --force-recreate -d nginx\necho\n\necho \"Waiting for nginx to be ready, this may take a minute...\"\nwhile true; do\n  # Use curl to send a request and capture the HTTP status code\n  status_code=$(curl -o /dev/null -s -w \"%{http_code}\\n\" \"http://localhost/api/health\")\n  \n  # Check if the status code is 200\n  if [ \"$status_code\" -eq 200 ]; then\n    break  # Exit the loop\n  else\n    echo \"Nginx is not ready yet, retrying in 5 seconds...\"\n    sleep 5  # Sleep for 5 seconds before retrying\n  fi\ndone\n\necho \"### Deleting dummy certificate for $domains ...\"\n$COMPOSE_CMD -f docker-compose.prod.yml run  --name onyx --rm --entrypoint \"\\\n  rm -Rf /etc/letsencrypt/live/$domains && \\\n  rm -Rf /etc/letsencrypt/archive/$domains && \\\n  rm -Rf /etc/letsencrypt/renewal/$domains.conf\" certbot\necho\n\n\necho \"### Requesting Let's Encrypt certificate for $domains ...\"\n#Join $domains to -d args\ndomain_args=\"\"\nfor domain in \"${domains[@]}\"; do\n  domain_args=\"$domain_args -d $domain\"\ndone\n\n# Select appropriate email arg\ncase \"$email\" in\n  \"\") email_arg=\"--register-unsafely-without-email\" ;;\n  *) email_arg=\"--email $email\" ;;\nesac\n\n# Enable staging mode if needed\nif [ $staging != \"0\" ]; then staging_arg=\"--staging\"; fi\n\n$COMPOSE_CMD -f docker-compose.prod.yml run --name onyx --rm --entrypoint \"\\\n  certbot certonly --webroot -w /var/www/certbot \\\n    $staging_arg \\\n    $email_arg \\\n    $domain_args \\\n    --rsa-key-size $rsa_key_size \\\n    --agree-tos \\\n    --force-renewal\" certbot\necho\n\necho \"### Renaming certificate directory if needed ...\"\n$COMPOSE_CMD -f docker-compose.prod.yml run --name onyx --rm --entrypoint \"\\\n  sh -c 'for domain in $domains; do \\\n    numbered_dir=\\$(find /etc/letsencrypt/live -maxdepth 1 -type d -name \\\"\\$domain-00*\\\" | sort -r | head -n1); \\\n    if [ -n \\\"\\$numbered_dir\\\" ]; then \\\n      mv \\\"\\$numbered_dir\\\" /etc/letsencrypt/live/\\$domain; \\\n    fi; \\\n  done'\" certbot\n\necho \"### Reloading nginx ...\"\n$COMPOSE_CMD -f docker-compose.prod.yml up --force-recreate -d\n"
  },
  {
    "path": "deployment/docker_compose/install.ps1",
    "content": "# Onyx Installer for Windows\n# Usage: .\\install.ps1 [OPTIONS]\n# Remote (with params):\n#   & ([scriptblock]::Create((irm https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.ps1))) -Lite -NoPrompt\n# Remote (defaults only, configure via interaction during script):\n#   irm https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.ps1 | iex\n\nparam(\n    [switch]$Shutdown,\n    [switch]$DeleteData,\n    [switch]$IncludeCraft,\n    [switch]$Lite,\n    [switch]$Local,\n    [switch]$NoPrompt,\n    [switch]$DryRun,\n    [switch]$ShowVerbose,\n    [switch]$Help\n)\n\n$ErrorActionPreference = \"Stop\"\n\n# Runs a native command with stderr silenced and ErrorActionPreference=Continue.\nfunction Invoke-NativeQuiet {\n    param([scriptblock]$Command, [switch]$PassThru)\n    $prev = $ErrorActionPreference\n    $ErrorActionPreference = \"Continue\"\n    try {\n        if ($PassThru) { & $Command 2>$null }\n        else           { $null = & $Command 2>$null }\n    } finally { $ErrorActionPreference = $prev }\n}\n\n$script:ExpectedDockerRamGB = 10\n$script:ExpectedDiskGB = 32\n$script:InstallRoot = if ($env:INSTALL_PREFIX) { $env:INSTALL_PREFIX } else { \"onyx_data\" }\n$script:LiteComposeFile = \"docker-compose.onyx-lite.yml\"\n$script:GitHubRawUrl = \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose\"\n$script:NginxBaseUrl = \"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx\"\n$script:CurrentStep = 0\n$script:TotalSteps = 10\n$script:ComposeCmdType = $null\n$script:LiteMode = $Lite.IsPresent\n$script:IncludeCraftMode = $IncludeCraft.IsPresent\n$script:IsWindowsServer = (Get-CimInstance Win32_OperatingSystem).ProductType -ne 1\n\n# ── Output Helpers ───────────────────────────────────────────────────────────\n\nfunction Print-Success  { param([string]$Message) Write-Host \"[OK] $Message\" -ForegroundColor Green }\nfunction Print-OnyxError{ param([string]$Message) Write-Host \"[X]  $Message\" -ForegroundColor Red }\nfunction Print-Info     { param([string]$Message) Write-Host \"[i]  $Message\" -ForegroundColor Yellow }\nfunction Print-Warning  { param([string]$Message) Write-Host \"[!]  $Message\" -ForegroundColor Yellow }\n\nfunction Print-Step {\n    param([string]$Title)\n    $script:CurrentStep++\n    Write-Host \"`n=== $Title - Step $($script:CurrentStep)/$($script:TotalSteps) ===`n\" -ForegroundColor Cyan\n}\n\nfunction Test-Interactive {\n    return -not $NoPrompt\n}\n\nfunction Prompt-OrDefault {\n    param([string]$PromptText, [string]$DefaultValue)\n    if (-not (Test-Interactive)) { return $DefaultValue }\n    $reply = Read-Host $PromptText\n    if ([string]::IsNullOrWhiteSpace($reply)) { return $DefaultValue }\n    return $reply\n}\n\nfunction Confirm-Action {\n    param([string]$Description)\n    $reply = (Prompt-OrDefault \"Install $Description? (Y/n) [default: Y]\" \"Y\").Trim().ToLower()\n    if ($reply -match '^n') {\n        Print-Warning \"Skipping: $Description\"\n        return $false\n    }\n    return $true\n}\n\nfunction Prompt-VersionTag {\n    Print-Info \"Which tag would you like to deploy?\"\n    if ($script:IncludeCraftMode) {\n        Write-Host \"  - Press Enter for craft-latest (recommended for Craft)\"\n        Write-Host \"  - Type a specific tag (e.g., craft-v1.0.0)\"\n        $version = Prompt-OrDefault \"Enter tag [default: craft-latest]\" \"craft-latest\"\n    } else {\n        Write-Host \"  - Press Enter for edge (recommended)\"\n        Write-Host \"  - Type a specific tag (e.g., v0.1.0)\"\n        $version = Prompt-OrDefault \"Enter tag [default: edge]\" \"edge\"\n    }\n    if     ($script:IncludeCraftMode -and $version -eq \"craft-latest\") { Print-Info \"Selected: craft-latest (Craft enabled)\" }\n    elseif ($version -eq \"edge\") { Print-Info \"Selected: edge (latest nightly)\" }\n    else   { Print-Info \"Selected: $version\" }\n    return $version\n}\n\nfunction Prompt-DeploymentMode {\n    param([string]$LiteOverlayPath)\n    if ($script:LiteMode) { Print-Info \"Deployment mode: Lite (set via -Lite flag)\"; return }\n    Print-Info \"Which deployment mode would you like?\"\n    Write-Host \"  1) Lite      - Minimal deployment (no Vespa, Redis, or model servers)\"\n    Write-Host \"                  LLM chat, tools, file uploads, and Projects still work\"\n    Write-Host \"  2) Standard  - Full deployment with search, connectors, and RAG\"\n    $modeChoice = Prompt-OrDefault \"Choose a mode (1 or 2) [default: 1]\" \"1\"\n    if ($modeChoice -eq \"2\") {\n        Print-Info \"Selected: Standard mode\"\n    } else {\n        $script:LiteMode = $true\n        Print-Info \"Selected: Lite mode\"\n        if (-not (Ensure-OnyxFile $LiteOverlayPath \"$($script:GitHubRawUrl)/$($script:LiteComposeFile)\" $script:LiteComposeFile)) { exit 1 }\n    }\n}\n\nfunction Assert-NotCraftLite {\n    param([string]$Tag)\n    if (-not ($script:LiteMode -and $Tag -match '^craft-')) { return }\n    Print-OnyxError \"Cannot use a craft image tag ($Tag) with Lite mode.\"\n    Print-Info \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n    exit 1\n}\n\nfunction Refresh-PathFromRegistry {\n    $env:Path = [System.Environment]::GetEnvironmentVariable(\"Path\", \"Machine\") + \";\" + [System.Environment]::GetEnvironmentVariable(\"Path\", \"User\")\n}\n\nfunction Get-NativeVersionString {\n    param([scriptblock]$Command)\n    $output = Invoke-NativeQuiet -PassThru $Command\n    $match = [regex]::Match(($output -join \"\"), '(\\d+\\.\\d+\\.\\d+)')\n    if ($match.Success) { return $match.Value }\n    return \"unknown\"\n}\n\n# ── Download Helpers ─────────────────────────────────────────────────────────\n\nfunction Download-OnyxFile {\n    param([string]$Url, [string]$Output)\n    for ($attempt = 1; $attempt -le 3; $attempt++) {\n        try {\n            [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12\n            Invoke-WebRequest -Uri $Url -OutFile $Output -UseBasicParsing -ErrorAction Stop\n            return\n        } catch {\n            if ($attempt -eq 3) { throw }\n            Start-Sleep -Seconds 2\n        }\n    }\n}\n\nfunction Ensure-OnyxFile {\n    param([string]$Path, [string]$Url, [string]$Description)\n    if ($Local) {\n        if (Test-Path $Path) { Print-Success \"Using existing $Description\"; return $true }\n        Print-OnyxError \"Required file missing: $Description ($Path)\"\n        return $false\n    }\n    Print-Info \"Downloading $Description...\"\n    try {\n        Download-OnyxFile -Url $Url -Output $Path\n        Print-Success \"$Description downloaded\"\n        return $true\n    } catch {\n        Print-OnyxError \"Failed to download $Description\"\n        return $false\n    }\n}\n\n# ── .env File Helpers ────────────────────────────────────────────────────────\n\nfunction Set-EnvFileValue {\n    param([string]$Path, [string]$Key, [string]$Value, [switch]$Uncomment)\n    $lines = Get-Content $Path\n    $found = $false\n    $result = @()\n    foreach ($line in $lines) {\n        if ($Uncomment -and $line -match \"^\\s*#\\s*${Key}=\") {\n            $result += \"${Key}=${Value}\"; $found = $true\n        } elseif ($line -match \"^${Key}=\") {\n            $result += \"${Key}=${Value}\"; $found = $true\n        } else { $result += $line }\n    }\n    if (-not $found) { $result += \"${Key}=${Value}\" }\n    Write-Utf8NoBom -Path $Path -Content (($result -join \"`n\") + \"`n\")\n}\n\nfunction Get-EnvFileValue {\n    param([string]$Path, [string]$Key)\n    $match = Select-String -Path $Path -Pattern \"^${Key}=(.*)\" | Select-Object -First 1\n    if ($match) { return $match.Matches.Groups[1].Value.Trim().Trim('\"', \"'\") }\n    return $null\n}\n\nfunction New-SecureSecret {\n    $bytes = New-Object byte[] 32\n    $rng = [System.Security.Cryptography.RandomNumberGenerator]::Create()\n    $rng.GetBytes($bytes); $rng.Dispose()\n    return ($bytes | ForEach-Object { $_.ToString(\"x2\") }) -join ''\n}\n\n# ── Docker Compose ───────────────────────────────────────────────────────────\n\nfunction Get-ComposeFileArgs {\n    param([switch]$AutoDetect)\n    $fileArgs = @(\"-f\", \"docker-compose.yml\")\n    $litePath = Join-Path $script:InstallRoot \"deployment\\$($script:LiteComposeFile)\"\n    if ($script:LiteMode -or ($AutoDetect -and (Test-Path $litePath))) {\n        $fileArgs += @(\"-f\", $script:LiteComposeFile)\n    }\n    return $fileArgs\n}\n\nfunction Invoke-Compose {\n    param([switch]$AutoDetect, [Parameter(ValueFromRemainingArguments)][string[]]$Arguments)\n    $deployDir = Join-Path $script:InstallRoot \"deployment\"\n    $fileArgs = Get-ComposeFileArgs -AutoDetect:$AutoDetect\n    $prev = $ErrorActionPreference; $ErrorActionPreference = \"Continue\"\n    Push-Location $deployDir\n    try {\n        if ($script:ComposeCmdType -eq \"plugin\") { & docker @(@(\"compose\") + $fileArgs + $Arguments) }\n        else { & docker-compose @($fileArgs + $Arguments) }\n        return $LASTEXITCODE\n    } finally { Pop-Location; $ErrorActionPreference = $prev }\n}\n\nfunction Initialize-ComposeCommand {\n    Invoke-NativeQuiet { docker compose version }\n    if ($LASTEXITCODE -eq 0) { $script:ComposeCmdType = \"plugin\"; return $true }\n    if (Get-Command docker-compose -ErrorAction SilentlyContinue) { $script:ComposeCmdType = \"standalone\"; return $true }\n    $script:ComposeCmdType = $null; return $false\n}\n\n# ── Utilities ────────────────────────────────────────────────────────────────\n\nfunction Compare-SemVer {\n    param([string]$Version1, [string]$Version2)\n    $parts1 = ($Version1 -split '\\.') + @(\"0\",\"0\",\"0\")\n    $parts2 = ($Version2 -split '\\.') + @(\"0\",\"0\",\"0\")\n    for ($i = 0; $i -lt 3; $i++) {\n        $v1 = 0; $v2 = 0\n        [void][int]::TryParse($parts1[$i], [ref]$v1)\n        [void][int]::TryParse($parts2[$i], [ref]$v2)\n        if ($v1 -lt $v2) { return -1 }\n        if ($v1 -gt $v2) { return 1 }\n    }\n    return 0\n}\n\nfunction Test-PortAvailable {\n    param([int]$Port)\n    try { $tcp = New-Object System.Net.Sockets.TcpClient; $tcp.Connect(\"127.0.0.1\", $Port); $tcp.Close(); return $false }\n    catch { return $true }\n}\n\nfunction Find-AvailablePort {\n    param([int]$StartPort = 3000)\n    for ($port = $StartPort; $port -le 65535; $port++) {\n        if (Test-PortAvailable $port) { return $port }\n    }\n    return $StartPort\n}\n\nfunction Get-DockerMemoryMB {\n    foreach ($p in @((Join-Path $env:APPDATA \"Docker\\settings.json\"), (Join-Path $env:LOCALAPPDATA \"Docker\\settings.json\"))) {\n        if (-not (Test-Path $p)) { continue }\n        try {\n            $s = Get-Content $p -Raw | ConvertFrom-Json\n            if ($s.memoryMiB -and $s.memoryMiB -gt 0) { return [int]$s.memoryMiB }\n        } catch { }\n    }\n    try {\n        $info = Invoke-NativeQuiet -PassThru { docker system info }\n        $mem = $info | Where-Object { $_ -match \"Total Memory\" } | Select-Object -First 1\n        if ($mem -match '(\\d+\\.?\\d*)\\s*GiB') { return [int]([double]$Matches[1] * 1024) }\n    } catch { }\n    return 0\n}\n\nfunction Test-OnyxHealth {\n    param([int]$Port)\n    Print-Info \"Checking Onyx service health...\"\n    Write-Host \"Containers are healthy, waiting for database migrations and service initialization to finish.\"\n    for ($attempt = 1; $attempt -le 600; $attempt++) {\n        try {\n            $r = Invoke-WebRequest -Uri \"http://localhost:$Port\" -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop\n            if ($r.StatusCode -in @(200, 301, 302, 303, 307, 308)) { return $true }\n        } catch { }\n        $m = [math]::Floor($attempt / 60); $s = $attempt % 60\n        $dots = \".\" * (($attempt % 3) + 1); $pad = \" \" * (3 - $dots.Length)\n        Write-Host -NoNewline \"`rChecking Onyx service${dots}${pad} (${m}m ${s}s elapsed)\"\n        Start-Sleep -Seconds 1\n    }\n    Write-Host \"\"; return $false\n}\n\nfunction Test-IsAdmin {\n    $id = [Security.Principal.WindowsIdentity]::GetCurrent()\n    (New-Object Security.Principal.WindowsPrincipal($id)).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)\n}\n\nfunction Invoke-ElevatedRelaunch {\n    if (Test-IsAdmin) { return $false }\n    Print-Info \"Administrator privileges required. Relaunching as Administrator...\"\n    if (-not $PSCommandPath) { Print-Warning \"Cannot determine script path. Please re-run as Administrator.\"; return $false }\n    $argList = @(\"-NoProfile\", \"-ExecutionPolicy\", \"Bypass\", \"-File\", \"`\"$PSCommandPath`\"\")\n    if ($Shutdown)     { $argList += \"-Shutdown\" }\n    if ($DeleteData)   { $argList += \"-DeleteData\" }\n    if ($IncludeCraft) { $argList += \"-IncludeCraft\" }\n    if ($Lite)         { $argList += \"-Lite\" }\n    if ($Local)        { $argList += \"-Local\" }\n    if ($NoPrompt)     { $argList += \"-NoPrompt\" }\n    if ($DryRun)       { $argList += \"-DryRun\" }\n    if ($ShowVerbose)  { $argList += \"-ShowVerbose\" }\n    try { $proc = Start-Process powershell -ArgumentList $argList -Verb RunAs -Wait -PassThru; exit $proc.ExitCode }\n    catch { Print-Warning \"UAC elevation was declined or failed.\"; return $false }\n}\n\nfunction Write-Utf8NoBom {\n    param([string]$Path, [string]$Content)\n    [System.IO.File]::WriteAllText($Path, $Content, (New-Object System.Text.UTF8Encoding($false)))\n}\n\n# ── Help / Shutdown / Delete ─────────────────────────────────────────────────\n\nfunction Show-OnyxHelp {\n    $help = \"Onyx Installation Script for Windows`n\"\n    $help += \"`nUsage: .\\install.ps1 [OPTIONS]`n\"\n    $help += \"`nOptions:\"\n    $help += \"`n  -IncludeCraft  Enable Onyx Craft (AI-powered web app building)\"\n    $help += \"`n  -Lite          Deploy Onyx Lite (no Vespa, Redis, or model servers)\"\n    $help += \"`n  -Local         Use existing config files instead of downloading from GitHub\"\n    $help += \"`n  -Shutdown      Stop (pause) Onyx containers\"\n    $help += \"`n  -DeleteData    Remove all Onyx data (containers, volumes, and files)\"\n    $help += \"`n  -NoPrompt      Run non-interactively with defaults (for CI/automation)\"\n    $help += \"`n  -DryRun        Show what would be done without making changes\"\n    $help += \"`n  -ShowVerbose   Show detailed output for debugging\"\n    $help += \"`n  -Help          Show this help message\"\n    $help += \"`n`nExamples:\"\n    $help += \"`n  .\\install.ps1                    # Install Onyx\"\n    $help += \"`n  .\\install.ps1 -Lite              # Install Onyx Lite\"\n    $help += \"`n  .\\install.ps1 -IncludeCraft      # Install with Craft enabled\"\n    $help += \"`n  .\\install.ps1 -Shutdown          # Pause Onyx services\"\n    $help += \"`n  .\\install.ps1 -DeleteData        # Completely remove Onyx\"\n    $help += \"`n  .\\install.ps1 -Local             # Re-run using existing config\"\n    $help += \"`n  .\\install.ps1 -NoPrompt          # Non-interactive install\"\n    Write-Host $help\n}\n\nfunction Invoke-OnyxShutdown {\n    Write-Host \"`n=== Shutting down Onyx ===`n\" -ForegroundColor Cyan\n    $deployDir = Join-Path $script:InstallRoot \"deployment\"\n    if (-not (Test-Path (Join-Path $deployDir \"docker-compose.yml\"))) {\n        Print-Warning \"Onyx deployment not found. Nothing to shutdown.\"\n        return\n    }\n    if (-not (Initialize-ComposeCommand)) { Print-OnyxError \"Docker Compose not found.\"; exit 1 }\n    $stopArgs = @(\"stop\")\n    $result = Invoke-Compose -AutoDetect @stopArgs\n    if ($result -ne 0) { Print-OnyxError \"Failed to stop containers\"; exit 1 }\n    Print-Success \"Onyx containers stopped (paused)\"\n}\n\nfunction Invoke-OnyxDeleteData {\n    Write-Host \"`n=== WARNING: This will permanently delete all Onyx data ===`n\" -ForegroundColor Red\n    Print-Warning \"This action will remove all Onyx containers, volumes, files, and user data.\"\n    if (Test-Interactive) {\n        $confirm = Prompt-OrDefault \"Type 'DELETE' to confirm\" \"\"\n        if ($confirm -ne \"DELETE\") { Print-Info \"Operation cancelled.\"; return }\n    } else {\n        Print-OnyxError \"Cannot confirm destructive operation in non-interactive mode.\"\n        exit 1\n    }\n    $deployDir = Join-Path $script:InstallRoot \"deployment\"\n    if ((Test-Path (Join-Path $deployDir \"docker-compose.yml\")) -and (Initialize-ComposeCommand)) {\n        $downArgs = @(\"down\", \"-v\")\n        $result = Invoke-Compose -AutoDetect @downArgs\n        if ($result -eq 0) { Print-Success \"Containers and volumes removed\" }\n        else { Print-OnyxError \"Failed to remove containers\" }\n    }\n    if (Test-Path $script:InstallRoot) {\n        Remove-Item -Recurse -Force $script:InstallRoot\n        Print-Success \"Data directories removed\"\n    }\n    Print-Success \"All Onyx data has been permanently deleted!\"\n}\n\n# ── Docker Daemon ────────────────────────────────────────────────────────────\n\nfunction Wait-ForDockerDaemon {\n    param([int]$MaxWait = 60)\n    Print-Info \"Waiting for Docker daemon to become ready (up to ${MaxWait} seconds)...\"\n    $waited = 0; $lastError = \"\"; $unchangedErrorCount = 0\n    while ($waited -lt $MaxWait) {\n        Start-Sleep -Seconds 3; $waited += 3\n        $prevEAP = $ErrorActionPreference; $ErrorActionPreference = \"Continue\"\n        $dockerOutput = & docker info 2>&1\n        $ErrorActionPreference = $prevEAP\n        $errRecords = @($dockerOutput | Where-Object { $_ -is [System.Management.Automation.ErrorRecord] })\n        $currentError = if ($errRecords.Count -gt 0) { $errRecords[0].ToString() } else { \"\" }\n        if ($LASTEXITCODE -eq 0) { Write-Host \"\"; Print-Success \"Docker daemon is running\"; return $true }\n        if ($currentError) {\n            if ($currentError -eq $lastError) { $unchangedErrorCount++ } else { $unchangedErrorCount = 0; $lastError = $currentError }\n            if ($unchangedErrorCount -ge 5) {\n                Write-Host \"\"; Print-OnyxError \"Docker daemon is not starting. Persistent error after ${waited}s:\"\n                Write-Host \"    $lastError\" -ForegroundColor Red; return $false\n            }\n        }\n        $dots = \".\" * (($waited / 3 % 3) + 1); $pad = \" \" * (3 - $dots.Length)\n        Write-Host -NoNewline \"`rWaiting for Docker daemon${dots}${pad} (${waited}s elapsed)\"\n    }\n    Write-Host \"\"; Print-OnyxError \"Docker daemon did not respond within ${MaxWait} seconds.\"\n    if ($lastError) { Print-Info \"Last error: $lastError\" }\n    return $false\n}\n\nfunction Fix-DockerCredStore {\n    $configFile = Join-Path $env:USERPROFILE \".docker\\config.json\"\n    if (-not (Test-Path $configFile)) { return }\n    try {\n        $rawBytes = [System.IO.File]::ReadAllBytes($configFile)\n        $hasBom = $rawBytes.Length -ge 3 -and $rawBytes[0] -eq 0xEF -and $rawBytes[1] -eq 0xBB -and $rawBytes[2] -eq 0xBF\n        $raw = [System.IO.File]::ReadAllText($configFile).TrimStart([char]0xFEFF)\n        $config = $raw | ConvertFrom-Json\n        $needsRewrite = $hasBom\n        # Check property existence (not truthiness -- \"\" is falsy in PS)\n        if ($null -ne $config.PSObject.Properties['credsStore']) {\n            Print-Info \"Removing credsStore='$($config.credsStore)' from Docker config...\"\n            $config.PSObject.Properties.Remove('credsStore')\n            $needsRewrite = $true\n        }\n        if ($null -ne $config.PSObject.Properties['credHelpers']) {\n            Print-Info \"Removing credHelpers from Docker config...\"\n            $config.PSObject.Properties.Remove('credHelpers')\n            $needsRewrite = $true\n        }\n        if ($needsRewrite) {\n            Write-Utf8NoBom -Path $configFile -Content ($config | ConvertTo-Json -Depth 10)\n            Print-Success \"Docker credential config cleaned\"\n        }\n    } catch {\n        Print-Warning \"Could not update Docker config: $_\"\n        try { Write-Utf8NoBom -Path $configFile -Content '{}'; Print-Success \"Docker config reset\" }\n        catch { Print-Warning \"Could not reset Docker config: $_\" }\n    }\n\n}\n\nfunction Register-DockerService {\n    if (Get-Service docker -ErrorAction SilentlyContinue) { return $true }\n    Print-Info \"Docker service not registered. Looking for dockerd.exe...\"\n    $candidates = @(\n        (Join-Path $env:ProgramFiles \"Docker\\Docker\\resources\\dockerd.exe\"),\n        (Join-Path $env:ProgramFiles \"Docker\\dockerd.exe\"),\n        (Join-Path $env:ProgramFiles \"Docker\\Docker\\dockerd.exe\")\n    )\n    $dockerExe = Get-Command docker -ErrorAction SilentlyContinue\n    if ($dockerExe) { $candidates = @((Join-Path (Split-Path $dockerExe.Source) \"dockerd.exe\")) + $candidates }\n    $dockerdPath = $null\n    foreach ($c in $candidates) { if (Test-Path $c) { $dockerdPath = $c; break } }\n    if (-not $dockerdPath) {\n        Print-OnyxError \"Could not find dockerd.exe to register as a service.\"\n        return $false\n    }\n    Print-Info \"Found dockerd at: $dockerdPath\"\n    Invoke-NativeQuiet { & $dockerdPath --register-service }\n    if ($LASTEXITCODE -ne 0) {\n        Print-Warning \"dockerd --register-service failed (code $LASTEXITCODE), trying sc.exe...\"\n        Invoke-NativeQuiet { sc.exe create docker binPath= \"`\"$dockerdPath`\" --run-service\" start= auto }\n    }\n    if (-not (Get-Service docker -ErrorAction SilentlyContinue)) {\n        Print-OnyxError \"Failed to register Docker as a Windows service.\"\n        return $false\n    }\n    Print-Success \"Docker service registered\"\n    return $true\n}\n\nfunction Start-DockerDaemon {\n    Invoke-NativeQuiet { docker info }\n    if ($LASTEXITCODE -eq 0) { return $true }\n\n    if ($script:IsWindowsServer) {\n        Print-Info \"Windows Server detected - starting Docker...\"\n        # Prefer Docker Desktop if installed (provides Linux containers);\n        # native dockerd on Windows Server only supports Windows containers.\n        $ddExe = \"${env:ProgramFiles}\\Docker\\Docker\\Docker Desktop.exe\"\n        if (Test-Path $ddExe) {\n            Print-Info \"Docker Desktop is installed - using it for Linux container support.\"\n            # Stop native Docker service if running to avoid pipe conflicts\n            $svc = Get-Service docker -ErrorAction SilentlyContinue\n            if ($svc -and $svc.Status -eq 'Running') {\n                Print-Info \"Stopping native Docker Engine service to avoid conflicts...\"\n                Stop-Service docker -Force -ErrorAction SilentlyContinue\n                Start-Sleep -Seconds 3\n            }\n            Fix-DockerCredStore\n            Start-Process $ddExe\n            if (Wait-ForDockerDaemon -MaxWait 120) { return $true }\n            Print-Warning \"Docker Desktop did not start. Falling back to Docker Engine service...\"\n        }\n        # Fallback: native dockerd service (Windows containers only)\n        if (-not (Register-DockerService)) { return $false }\n        Fix-DockerCredStore\n        try { Start-Service docker -ErrorAction Stop; Print-Success \"Docker service started\" }\n        catch { Print-Warning \"Failed to start Docker service: $_\"; return $false }\n        return (Wait-ForDockerDaemon -MaxWait 60)\n    }\n\n    # Windows Desktop - start Docker Desktop\n    Print-Info \"Starting Docker Desktop...\"\n    $launchPath = $null\n    foreach ($path in @(\n        \"${env:ProgramFiles}\\Docker\\Docker\\Docker Desktop.exe\",\n        \"${env:ProgramFiles(x86)}\\Docker\\Docker\\Docker Desktop.exe\",\n        \"${env:LOCALAPPDATA}\\Docker\\Docker Desktop.exe\"\n    )) {\n        if (Test-Path $path) { Start-Process $path; $launchPath = $path; break }\n    }\n    if (-not $launchPath) {\n        try { Start-Process \"Docker Desktop\" -ErrorAction Stop }\n        catch { Print-Warning \"Could not find Docker Desktop executable.\"; return $false }\n    }\n    if (-not (Wait-ForDockerDaemon -MaxWait 120)) {\n        $proc = Get-Process \"Docker Desktop\" -ErrorAction SilentlyContinue\n        if ($proc) { Print-Info \"Docker Desktop IS running (PID: $($proc.Id)), but the daemon is not responding.\" }\n        else { Print-Warning \"Docker Desktop process is NOT running - it may have crashed.\" }\n        Print-Info \"Try starting Docker Desktop manually, check WSL2 status, or restart your computer.\"\n        return $false\n    }\n    Print-Info \"Waiting 15 seconds for Docker Desktop to fully stabilize...\"\n    Start-Sleep -Seconds 15\n    return $true\n}\n\n# ── Docker Install ───────────────────────────────────────────────────────────\n\nfunction Install-DockerEngine {\n    Print-Info \"Windows Server detected - Docker Engine is required.\"\n    if (-not (Confirm-Action \"Docker Engine (Windows Server)\")) { exit 1 }\n    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }\n\n    try {\n        $feature = Get-WindowsFeature -Name Containers -ErrorAction Stop\n        if ($feature.InstallState -ne 'Installed') {\n            Print-Info \"Installing Windows Containers feature...\"\n            $result = Install-WindowsFeature -Name Containers -ErrorAction Stop\n            if ($result.RestartNeeded -eq 'Yes') {\n                Print-Warning \"A reboot is required. Please restart and re-run this script.\"\n                exit 0\n            }\n            Print-Success \"Containers feature installed\"\n        }\n    } catch { Print-Warning \"Could not check/install Containers feature: $_\" }\n\n    $installed = $false\n\n    if (-not $installed) {\n        Print-Info \"Attempting Docker install via DockerMsftProvider...\"\n        try {\n            if (-not (Get-PackageProvider -Name NuGet -ErrorAction SilentlyContinue)) {\n                Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force | Out-Null\n            }\n            if (-not (Get-Module DockerMsftProvider -ListAvailable -ErrorAction SilentlyContinue)) {\n                Install-Module -Name DockerMsftProvider -Repository PSGallery -Force\n            }\n            Install-Package -Name docker -ProviderName DockerMsftProvider -Force | Out-Null\n            $installed = $true\n            Print-Success \"Docker installed via DockerMsftProvider\"\n        } catch { Print-Warning \"DockerMsftProvider failed: $_\" }\n    }\n\n    if (-not $installed) {\n        Print-Info \"Downloading Docker binaries directly...\"\n        try {\n            $page = Invoke-WebRequest -Uri \"https://download.docker.com/win/static/stable/x86_64/\" -UseBasicParsing -ErrorAction Stop\n            $latestZip = $page.Links | Where-Object { $_.href -match '^docker-\\d+.*\\.zip$' } |\n                Sort-Object href -Descending | Select-Object -First 1\n            if (-not $latestZip) { throw \"Could not find Docker zip\" }\n            $zipPath = Join-Path $env:TEMP \"docker-ce.zip\"\n            Download-OnyxFile -Url \"https://download.docker.com/win/static/stable/x86_64/$($latestZip.href)\" -Output $zipPath\n            Expand-Archive -Path $zipPath -DestinationPath $env:ProgramFiles -Force\n            Remove-Item -Force $zipPath -ErrorAction SilentlyContinue\n            $dockerPath = Join-Path $env:ProgramFiles \"docker\"\n            $machinePath = [System.Environment]::GetEnvironmentVariable(\"Path\", \"Machine\")\n            if ($machinePath -notlike \"*$dockerPath*\") {\n                [System.Environment]::SetEnvironmentVariable(\"Path\", \"$machinePath;$dockerPath\", \"Machine\")\n            }\n            Refresh-PathFromRegistry\n            & \"$dockerPath\\dockerd.exe\" --register-service\n            $installed = $true\n            Print-Success \"Docker installed and registered as service\"\n        } catch { Print-Warning \"Direct binary install failed: $_\" }\n    }\n\n    if (-not $installed) {\n        Print-OnyxError \"Could not install Docker Engine on Windows Server.\"\n        Print-Info \"Install manually: https://docs.docker.com/engine/install/binaries/#install-server-and-client-binaries-on-windows\"\n        exit 1\n    }\n\n    try { Start-Service docker -ErrorAction Stop; Print-Success \"Docker service started\" }\n    catch { Print-OnyxError \"Failed to start Docker service: $_\"; exit 1 }\n    Install-ComposePlugin\n    if (-not (Wait-ForDockerDaemon -MaxWait 30)) { Print-OnyxError \"Docker installed but daemon not responding.\"; exit 1 }\n    Print-Success \"Docker Engine installed and running on Windows Server\"\n}\n\nfunction Install-ComposePlugin {\n    Invoke-NativeQuiet { docker compose version }\n    if ($LASTEXITCODE -eq 0) { return }\n    if (-not (Confirm-Action \"Docker Compose plugin\")) { return }\n    Print-Info \"Installing Docker Compose plugin...\"\n    $dest = Join-Path $env:ProgramFiles \"docker\\cli-plugins\"\n    New-Item -ItemType Directory -Force -Path $dest | Out-Null\n    try {\n        Download-OnyxFile -Url \"https://github.com/docker/compose/releases/latest/download/docker-compose-windows-x86_64.exe\" -Output (Join-Path $dest \"docker-compose.exe\")\n        Print-Success \"Docker Compose plugin installed\"\n    } catch {\n        Print-Warning \"Failed to install Docker Compose plugin: $_\"\n    }\n}\n\nfunction Install-Wsl {\n    Invoke-NativeQuiet { wsl --status }\n    if ($LASTEXITCODE -eq 0) { Print-Success \"WSL2 is available\"; return $true }\n    if (-not (Confirm-Action \"WSL2 (required for Docker)\")) { return $false }\n    Print-Info \"Installing WSL2...\"\n    try {\n        $proc = Start-Process wsl -ArgumentList \"--install\", \"--no-distribution\" -Wait -PassThru -NoNewWindow\n        if ($proc.ExitCode -eq 0) { Print-Success \"WSL2 installed\"; return $true }\n        Print-Warning \"WSL2 install exited with code $($proc.ExitCode). A reboot may be required.\"\n        return $false\n    } catch { Print-Warning \"Failed to install WSL2: $_\"; return $false }\n}\n\nfunction Install-DockerDesktop {\n    Print-Info \"Docker Desktop is required but not installed.\"\n    if (-not (Confirm-Action \"Docker Desktop\")) { exit 1 }\n    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }\n    $wslReady = Install-Wsl\n    $installed = $false\n\n    if (Get-Command winget -ErrorAction SilentlyContinue) {\n        Print-Info \"Installing Docker Desktop via winget...\"\n        winget install Docker.DockerDesktop --accept-package-agreements --accept-source-agreements\n        if ($LASTEXITCODE -eq 0) { Print-Success \"Docker Desktop installed via winget\"; $installed = $true }\n    }\n\n    if (-not $installed -and (Get-Command choco -ErrorAction SilentlyContinue)) {\n        Print-Info \"Installing Docker Desktop via Chocolatey...\"\n        choco install docker-desktop -y\n        if ($LASTEXITCODE -eq 0) { Print-Success \"Docker Desktop installed via Chocolatey\"; $installed = $true }\n    }\n\n    if (-not $installed) {\n        Print-Info \"Downloading Docker Desktop installer directly...\"\n        $installerPath = Join-Path $env:TEMP \"DockerDesktopInstaller_$([System.IO.Path]::GetRandomFileName().Split('.')[0]).exe\"\n        try {\n            Download-OnyxFile -Url \"https://desktop.docker.com/win/main/amd64/Docker%20Desktop%20Installer.exe\" -Output $installerPath\n            $proc = Start-Process -FilePath $installerPath -ArgumentList \"install\", \"--quiet\", \"--accept-license\" -Wait -PassThru -NoNewWindow\n            if ($proc.ExitCode -eq 0) {\n                Print-Success \"Docker Desktop installed via direct download\"; $installed = $true\n            } elseif ($proc.ExitCode -eq 3) {\n                Print-Warning \"Prerequisites not met.\"\n                if (-not $wslReady) { Print-OnyxError \"WSL2 is required. Run: wsl --install --no-distribution, then reboot.\" }\n                else { Print-Info \"A reboot may be needed. Restart and re-run this script.\" }\n            } else {\n                Print-Warning \"Installer exited with code $($proc.ExitCode).\"\n                if (-not (Test-IsAdmin)) { Print-Info \"Try re-running as Administrator.\" }\n            }\n        } catch { Print-Warning \"Direct download failed: $_\" }\n        finally { Remove-Item -Force $installerPath -ErrorAction SilentlyContinue }\n    }\n\n    if (-not $installed) {\n        Print-OnyxError \"Could not install Docker Desktop automatically.\"\n        Print-Info \"Install manually: https://docs.docker.com/desktop/install/windows-install/\"\n        exit 1\n    }\n\n    Refresh-PathFromRegistry\n    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) {\n        Print-OnyxError \"Docker installed but 'docker' command not available. Restart your terminal and re-run.\"\n        exit 1\n    }\n    if (-not (Start-DockerDaemon)) {\n        Print-OnyxError \"Docker Desktop installed but could not be started. Launch it from the Start Menu and re-run.\"\n        exit 1\n    }\n    Print-Success \"Docker Desktop installed and running\"\n}\n\nfunction Invoke-WslInstall {\n    Print-Info \"Native Docker on Windows Server only supports Windows containers.\"\n    Print-Info \"Onyx will be installed via WSL2 (Windows Subsystem for Linux).\"\n    if (-not (Confirm-Action \"Onyx via WSL2 (installs WSL2 + Ubuntu + Docker inside Linux)\")) { exit 1 }\n    if (-not (Test-IsAdmin)) { Invoke-ElevatedRelaunch }\n\n    # Free memory by stopping the Windows Docker service (not needed once we use WSL2)\n    $svc = Get-Service docker -ErrorAction SilentlyContinue\n    if ($svc -and $svc.Status -eq 'Running') {\n        Print-Info \"Stopping Windows Docker service to free memory for WSL2...\"\n        Stop-Service docker -Force -ErrorAction SilentlyContinue\n        Start-Sleep -Seconds 3\n    }\n\n    # Check available memory before proceeding\n    try {\n        $os = Get-CimInstance Win32_OperatingSystem\n        $freeGB = [math]::Round($os.FreePhysicalMemory / 1MB, 1)\n        $totalGB = [math]::Round($os.TotalVisibleMemorySize / 1MB, 1)\n        Print-Info \"System memory: ${totalGB}GB total, ${freeGB}GB free\"\n        if ($totalGB -lt 4) {\n            Print-OnyxError \"Onyx requires at least 4GB RAM (Lite mode) or 10GB RAM (Standard mode).\"\n            Print-Info \"This machine has ${totalGB}GB total. Consider using a larger instance.\"\n            exit 1\n        }\n    } catch {}\n\n    # Ensure WSL2 is available\n    Invoke-NativeQuiet { wsl --status }\n    if ($LASTEXITCODE -ne 0) {\n        if (-not (Confirm-Action \"WSL2 (Windows Subsystem for Linux)\")) { exit 1 }\n        Print-Info \"Installing WSL2...\"\n        try {\n            $proc = Start-Process wsl -ArgumentList \"--install\", \"--no-distribution\" -Wait -PassThru -NoNewWindow\n            if ($proc.ExitCode -ne 0) {\n                Print-OnyxError \"WSL2 installation failed (code $($proc.ExitCode)). A reboot may be needed.\"\n                Print-Info \"After rebooting, re-run this script.\"\n                exit 1\n            }\n        } catch {\n            Print-OnyxError \"Could not install WSL2: $_\"\n            exit 1\n        }\n    }\n    Print-Success \"WSL2 is available\"\n\n    # Ensure Ubuntu is installed in WSL\n    $distros = (Invoke-NativeQuiet -PassThru { wsl -l -q }) -join \"`n\"\n    if ($distros -notmatch \"Ubuntu\") {\n        Print-Info \"Installing Ubuntu in WSL2...\"\n        $proc = Start-Process wsl -ArgumentList \"--install\", \"-d\", \"Ubuntu\" -Wait -PassThru -NoNewWindow\n        if ($proc.ExitCode -ne 0) {\n            Print-OnyxError \"Ubuntu installation failed. Try manually: wsl --install -d Ubuntu\"\n            Print-Info \"If this is a memory error, this machine needs at least 4GB RAM.\"\n            exit 1\n        }\n    }\n    Print-Success \"Ubuntu is available in WSL2\"\n\n    # Build the install.sh invocation to run inside WSL2\n    Print-Info \"Handing off to the Linux install script inside WSL2...\"\n    $bashArgs = @()\n    if ($script:LiteMode) { $bashArgs += \"--lite\" }\n    if ($script:IncludeCraftMode) { $bashArgs += \"--include-craft\" }\n    if ($NoPrompt) { $bashArgs += \"--no-prompt\" }\n    if ($ShowVerbose) { $bashArgs += \"--verbose\" }\n\n    $installUrl = \"$($script:GitHubRawUrl)/install.sh\"\n    $bashCmd = \"curl -fsSL '$installUrl' | bash -s -- $($bashArgs -join ' ')\"\n    Print-Info \"Running: $bashCmd\"\n    wsl -d Ubuntu -- bash -c $bashCmd\n    $wslExit = $LASTEXITCODE\n\n    if ($wslExit -eq 0) {\n        Print-Success \"Onyx installation complete (via WSL2)\"\n        # Determine the port Onyx is running on inside WSL\n        Print-Info \"Onyx should be accessible at http://localhost:3000\"\n        Print-Info \"WSL2 automatically forwards ports to the Windows host.\"\n    } else {\n        Print-OnyxError \"Installation inside WSL2 exited with code $wslExit\"\n        Print-Info \"You can debug by running: wsl -d Ubuntu\"\n    }\n    exit $wslExit\n}\n\nfunction Install-Docker {\n    if ($script:IsWindowsServer) { Install-DockerEngine } else { Install-DockerDesktop }\n}\n\n# ── Main Installation Flow ───────────────────────────────────────────────────\n\nfunction Main {\n    if ($Help) { Show-OnyxHelp; return }\n    if ($PSVersionTable.PSVersion.Major -lt 5) { Print-OnyxError \"PowerShell 5+ required (found $($PSVersionTable.PSVersion))\"; exit 1 }\n    if ($script:LiteMode -and $script:IncludeCraftMode) {\n        Print-OnyxError \"-Lite and -IncludeCraft cannot be used together.\"\n        exit 1\n    }\n    if ($script:LiteMode) { $script:ExpectedDockerRamGB = 4; $script:ExpectedDiskGB = 16 }\n    if ($Shutdown)   { Invoke-OnyxShutdown; return }\n    if ($DeleteData) { Invoke-OnyxDeleteData; return }\n\n    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) { Install-Docker }\n\n    # Banner\n    $edition = if ($script:IsWindowsServer) { \"Windows Server\" } else { \"Windows Desktop\" }\n    Write-Host \"`n   ____`n  / __ \\`n | |  | |_ __  _   ___  __`n | |  | | '_ \\| | | \\ \\/ /`n | |__| | | | | |_| |>  < `n  \\____/|_| |_|\\__, /_/\\_\\`n                __/ |`n               |___/\" -ForegroundColor Cyan\n    Write-Host \"Welcome to Onyx Installation Script (Windows)\"\n    Write-Host \"==============================================\"\n    Print-Success \"$edition detected\"\n    Write-Host \"This script will:\" -ForegroundColor Yellow\n    Write-Host \"1. Download deployment files for Onyx into a new '$($script:InstallRoot)' directory\"\n    Write-Host \"2. Check your system resources (Docker, memory, disk space)\"\n    Write-Host \"3. Guide you through deployment options (version, authentication)\"\n\n    if (Test-Interactive) {\n        Write-Host \"`nPlease acknowledge and press Enter to continue...\" -ForegroundColor Yellow\n        $null = Prompt-OrDefault \"\" \"\"\n    } else {\n        Write-Host \"`nRunning in non-interactive mode - proceeding automatically...\" -ForegroundColor Yellow\n    }\n\n    if ($DryRun) {\n        Print-Info \"Dry run mode - showing what would happen:\"\n        Write-Host \"  - Install root: $($script:InstallRoot)  Lite: $($script:LiteMode)  Craft: $($script:IncludeCraftMode)\"\n        Write-Host \"  - OS: Windows $([System.Environment]::OSVersion.Version)  PS: $($PSVersionTable.PSVersion)\"\n        Print-Success \"Dry run complete (no changes made)\"\n        return\n    }\n    if ($ShowVerbose) { Print-Info \"Verbose mode enabled\" }\n\n    # ── Step 1: Verify Docker ─────────────────────────────────────────────\n    Print-Step \"Verifying Docker installation\"\n    if (-not (Get-Command docker -ErrorAction SilentlyContinue)) { Install-Docker }\n    $dockerVersion = Get-NativeVersionString { docker --version }\n    Print-Success \"Docker $dockerVersion is installed\"\n\n    if (-not (Initialize-ComposeCommand)) {\n        if ($script:IsWindowsServer) {\n            Install-ComposePlugin\n            if (-not (Initialize-ComposeCommand)) { Print-OnyxError \"Docker Compose could not be installed.\"; exit 1 }\n        } else {\n            Print-OnyxError \"Docker Compose is not installed. Docker Desktop includes it.\"\n            Print-Info \"Visit: https://docs.docker.com/desktop/install/windows-install/\"\n            exit 1\n        }\n    }\n    $composeVersion = Get-NativeVersionString { if ($script:ComposeCmdType -eq \"plugin\") { docker compose version } else { docker-compose --version } }\n    Print-Success \"Docker Compose $composeVersion is installed ($($script:ComposeCmdType))\"\n\n    Invoke-NativeQuiet { docker info }\n    if ($LASTEXITCODE -ne 0) {\n        $label = if ($script:IsWindowsServer) { \"Docker service\" } else { \"Docker Desktop\" }\n        Print-Info \"Docker daemon is not running. Starting $label...\"\n        if (-not (Start-DockerDaemon)) { Print-OnyxError \"Could not start Docker. Start it manually and re-run.\"; exit 1 }\n    }\n    Print-Success \"Docker daemon is running\"\n    if ($script:IsWindowsServer) { Fix-DockerCredStore }\n\n    # Verify Docker is running Linux containers (Onyx images are Linux-based)\n    $osType = ((Invoke-NativeQuiet -PassThru { docker info --format '{{.OSType}}' }) -join \"\").Trim()\n    if ($osType -eq \"windows\") {\n        Print-Warning \"Docker is running in Windows containers mode, but Onyx requires Linux containers.\"\n        $switchCli = Join-Path $env:ProgramFiles \"Docker\\Docker\\DockerCli.exe\"\n        $switched = $false\n        if (Test-Path $switchCli) {\n            Print-Info \"Attempting to switch to Linux containers via DockerCli...\"\n            try { & $switchCli -SwitchLinuxEngine 2>$null } catch {}\n            Start-Sleep -Seconds 15\n            for ($w = 0; $w -lt 12; $w++) {\n                Invoke-NativeQuiet { docker info }\n                if ($LASTEXITCODE -eq 0) { break }\n                Start-Sleep -Seconds 5\n            }\n            $osType2 = ((Invoke-NativeQuiet -PassThru { docker info --format '{{.OSType}}' }) -join \"\").Trim()\n            $switched = ($osType2 -eq \"linux\")\n        }\n        if ($switched) {\n            Print-Success \"Switched to Linux containers\"\n        } else {\n            Print-Info \"Native Docker on Windows Server only supports Windows containers.\"\n            Print-Info \"Switching to WSL2 approach for Linux container support...\"\n            Invoke-WslInstall\n        }\n    }\n\n    # ── Step 2: Verify Resources ──────────────────────────────────────────\n    Print-Step \"Verifying Docker resources\"\n    $memoryMB = Get-DockerMemoryMB\n    if ($memoryMB -gt 0) {\n        $memoryGB = [math]::Round($memoryMB / 1024, 1)\n        $memoryDisplay = if ($memoryGB -ge 1) { \"~${memoryGB}GB\" } else { \"${memoryMB}MB\" }\n        Print-Info \"Docker memory allocation: $memoryDisplay\"\n    } else {\n        Print-Warning \"Could not determine memory allocation\"\n        $memoryDisplay = \"unknown\"\n    }\n\n    $diskAvailableGB = [math]::Floor((Get-PSDrive -Name (Get-Location).Drive.Name).Free / 1GB)\n    Print-Info \"Available disk space: ${diskAvailableGB}GB\"\n\n    $resourceWarning = $false\n    if ($memoryMB -gt 0 -and $memoryMB -lt ($script:ExpectedDockerRamGB * 1024)) {\n        Print-Warning \"Less than $($script:ExpectedDockerRamGB)GB RAM available (found: $memoryDisplay)\"\n        $resourceWarning = $true\n    }\n    if ($diskAvailableGB -lt $script:ExpectedDiskGB) {\n        Print-Warning \"Less than $($script:ExpectedDiskGB)GB disk space available (found: ${diskAvailableGB}GB)\"\n        $resourceWarning = $true\n    }\n    if ($resourceWarning) {\n        Print-Warning \"Onyx recommends at least $($script:ExpectedDockerRamGB)GB RAM and $($script:ExpectedDiskGB)GB disk for standard mode.\"\n        Print-Warning \"Lite mode requires less (1-4GB RAM, 8-16GB disk) but has no vector database.\"\n        $reply = (Prompt-OrDefault \"Do you want to continue anyway? (Y/n)\" \"y\").Trim().ToLower()\n        if ($reply -notmatch '^y') { Print-Info \"Installation cancelled.\"; exit 1 }\n        Print-Info \"Proceeding despite resource limitations...\"\n    }\n\n    # ── Step 3: Create Directories ────────────────────────────────────────\n    Print-Step \"Creating directory structure\"\n    if (Test-Path $script:InstallRoot) { Print-Info \"Using existing $($script:InstallRoot) directory\" }\n    $deploymentDir = Join-Path $script:InstallRoot \"deployment\"\n    New-Item -ItemType Directory -Force -Path $deploymentDir | Out-Null\n    New-Item -ItemType Directory -Force -Path (Join-Path $script:InstallRoot \"data\\nginx\\local\") | Out-Null\n    Print-Success \"Directory structure created\"\n\n    # ── Step 4: Download Config Files ─────────────────────────────────────\n    if ($Local) { Print-Step \"Verifying existing configuration files\" }\n    else { Print-Step \"Downloading Onyx configuration files\" }\n\n    $composeDest = Join-Path $deploymentDir \"docker-compose.yml\"\n    if (-not (Ensure-OnyxFile $composeDest \"$($script:GitHubRawUrl)/docker-compose.yml\" \"docker-compose.yml\")) { exit 1 }\n\n    if ($composeVersion -ne \"unknown\" -and (Compare-SemVer $composeVersion \"2.24.0\") -lt 0) {\n        Print-Warning \"Docker Compose $composeVersion is older than 2.24.0 (required for env_file format).\"\n        Print-Info \"Update Docker Desktop or install a newer Docker Compose. Installation may fail.\"\n        $reply = (Prompt-OrDefault \"Continue anyway? (Y/n)\" \"y\").Trim().ToLower()\n        if ($reply -notmatch '^y') { exit 1 }\n    }\n\n    $liteOverlayPath = Join-Path $deploymentDir $script:LiteComposeFile\n    if ($script:LiteMode) {\n        if (-not (Ensure-OnyxFile $liteOverlayPath \"$($script:GitHubRawUrl)/$($script:LiteComposeFile)\" $script:LiteComposeFile)) { exit 1 }\n    }\n\n    $envTemplateDest = Join-Path $deploymentDir \"env.template\"\n    if (-not (Ensure-OnyxFile $envTemplateDest \"$($script:GitHubRawUrl)/env.template\" \"env.template\")) { exit 1 }\n    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot \"data\\nginx\\app.conf.template\") \"$($script:NginxBaseUrl)/app.conf.template\" \"nginx/app.conf.template\")) { exit 1 }\n    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot \"data\\nginx\\run-nginx.sh\") \"$($script:NginxBaseUrl)/run-nginx.sh\" \"nginx/run-nginx.sh\")) { exit 1 }\n    if (-not (Ensure-OnyxFile (Join-Path $script:InstallRoot \"README.md\") \"$($script:GitHubRawUrl)/README.md\" \"README.md\")) { exit 1 }\n\n    $gitkeep = Join-Path $script:InstallRoot \"data\\nginx\\local\\.gitkeep\"\n    if (-not (Test-Path $gitkeep)) { New-Item -ItemType File -Force -Path $gitkeep | Out-Null }\n    Print-Success \"All configuration files ready\"\n\n    # ── Step 5: Deployment Config ─────────────────────────────────────────\n    Print-Step \"Setting up deployment configs\"\n    $envFile = Join-Path $deploymentDir \".env\"\n\n    # Check if services are already running\n    if ((Test-Path $composeDest) -and (Initialize-ComposeCommand)) {\n        $running = @()\n        $psArgs = @(\"ps\", \"-q\")\n        try { $running = @(Invoke-Compose -AutoDetect @psArgs 2>$null | Where-Object { $_ }) } catch { }\n        if ($running.Count -gt 0) {\n            Print-OnyxError \"Onyx services are currently running!\"\n            Print-Info \"Run '.\\install.ps1 -Shutdown' first, then re-run this script.\"\n            exit 1\n        }\n    }\n\n    $version = \"latest\"\n\n    if (Test-Path $envFile) {\n        Print-Info \"Existing .env file found. What would you like to do?\"\n        Write-Host \"  - Press Enter to restart with current configuration\"\n        Write-Host \"  - Type 'update' to update to a newer version\"\n        $reply = Prompt-OrDefault \"Choose an option [default: restart]\" \"\"\n\n        Prompt-DeploymentMode -LiteOverlayPath $liteOverlayPath\n\n        if ($reply -eq \"update\") {\n            $version = Prompt-VersionTag\n            Assert-NotCraftLite $version\n            Set-EnvFileValue -Path $envFile -Key \"IMAGE_TAG\" -Value $version\n            Print-Success \"Updated IMAGE_TAG to $version\"\n            if ($version -match '^craft-') {\n                Set-EnvFileValue -Path $envFile -Key \"ENABLE_CRAFT\" -Value \"true\" -Uncomment\n            }\n        } else {\n            Assert-NotCraftLite (Get-EnvFileValue -Path $envFile -Key \"IMAGE_TAG\")\n            Print-Info \"Keeping existing configuration\"\n        }\n        if ($script:LiteMode) {\n            $profiles = Get-EnvFileValue -Path $envFile -Key \"COMPOSE_PROFILES\"\n            if ($profiles -and $profiles -match 's3-filestore') {\n                Set-EnvFileValue -Path $envFile -Key \"COMPOSE_PROFILES\" -Value \"\"\n            }\n        }\n    } else {\n        Print-Info \"No existing .env file found. Setting up new deployment...\"\n        Prompt-DeploymentMode -LiteOverlayPath $liteOverlayPath\n        if ($script:LiteMode -and $script:IncludeCraftMode) {\n            Print-OnyxError \"-IncludeCraft cannot be used with Lite mode.\"\n            exit 1\n        }\n        if ($script:LiteMode) { $script:ExpectedDockerRamGB = 4; $script:ExpectedDiskGB = 16 }\n\n        $version = Prompt-VersionTag\n        Assert-NotCraftLite $version\n\n        Copy-Item -Path $envTemplateDest -Destination $envFile -Force\n        Set-EnvFileValue -Path $envFile -Key \"IMAGE_TAG\" -Value $version\n        Print-Success \"IMAGE_TAG set to $version\"\n        if ($script:LiteMode) { Set-EnvFileValue -Path $envFile -Key \"COMPOSE_PROFILES\" -Value \"\" }\n        Set-EnvFileValue -Path $envFile -Key \"AUTH_TYPE\" -Value \"basic\"\n        Print-Success \"Basic authentication enabled\"\n        Set-EnvFileValue -Path $envFile -Key \"USER_AUTH_SECRET\" -Value \"`\"$(New-SecureSecret)`\"\"\n        Print-Success \"Generated secure USER_AUTH_SECRET\"\n        if ($script:IncludeCraftMode -or $version -match '^craft-') {\n            Set-EnvFileValue -Path $envFile -Key \"ENABLE_CRAFT\" -Value \"true\" -Uncomment\n            Print-Success \"Onyx Craft enabled\"\n        } else {\n            Print-Info \"Onyx Craft disabled (use -IncludeCraft to enable)\"\n        }\n        Print-Success \".env file created\"\n        Print-Info \"You can customize .env later for OAuth/SAML, AI models, domain settings, and Craft.\"\n    }\n\n    # Clean up stale lite overlay if standard mode was selected\n    if (-not $script:LiteMode -and (Test-Path $liteOverlayPath)) {\n        Remove-Item -Force $liteOverlayPath\n        Print-Info \"Removed previous lite overlay (switching to standard mode)\"\n    }\n\n    # ── Step 6: Check Ports ───────────────────────────────────────────────\n    Print-Step \"Checking for available ports\"\n    $availablePort = Find-AvailablePort 3000\n    if ($availablePort -ne 3000) { Print-Info \"Port 3000 in use, using port $availablePort\" }\n    else { Print-Info \"Port 3000 is available\" }\n    $env:HOST_PORT = $availablePort\n    Print-Success \"Using port $availablePort for nginx\"\n\n    $currentImageTag = Get-EnvFileValue -Path $envFile -Key \"IMAGE_TAG\"\n    $useLatest = ($currentImageTag -eq \"edge\" -or $currentImageTag -eq \"latest\" -or $currentImageTag -match '^craft-')\n    if ($useLatest) { Print-Info \"Using '$currentImageTag' tag - will force pull and recreate containers\" }\n\n    # For pinned version tags, re-download config files from that tag so the\n    # compose file matches the images being pulled (the initial download used main).\n    if (-not $useLatest -and -not $Local) {\n        $pinnedBase = \"https://raw.githubusercontent.com/onyx-dot-app/onyx/$currentImageTag/deployment\"\n        Print-Info \"Fetching config files matching tag $currentImageTag...\"\n        try {\n            Download-OnyxFile \"$pinnedBase/docker_compose/docker-compose.yml\" $composeDest\n            try { Download-OnyxFile \"$pinnedBase/data/nginx/app.conf.template\" (Join-Path $script:InstallRoot \"data\\nginx\\app.conf.template\") } catch {}\n            try { Download-OnyxFile \"$pinnedBase/data/nginx/run-nginx.sh\" (Join-Path $script:InstallRoot \"data\\nginx\\run-nginx.sh\") } catch {}\n            if ($script:LiteMode) {\n                try { Download-OnyxFile \"$pinnedBase/docker_compose/$($script:LiteComposeFile)\" $liteOverlayPath } catch {}\n            }\n            Print-Success \"Config files updated to match $currentImageTag\"\n        } catch {\n            Print-Warning \"Tag $currentImageTag not found on GitHub - using main branch configs\"\n        }\n    }\n\n    # ── Step 7: Pull Images ───────────────────────────────────────────────\n    Print-Step \"Pulling Docker images\"\n    Print-Info \"This may take several minutes depending on your internet connection...\"\n\n    $pullArgs = @(\"pull\"); if (-not $ShowVerbose) { $pullArgs += \"--quiet\" }\n    if ((Invoke-Compose @pullArgs) -ne 0) { Print-OnyxError \"Failed to download Docker images\"; exit 1 }\n    Print-Success \"Docker images downloaded successfully\"\n\n    # ── Step 8: Start Services ────────────────────────────────────────────\n    Print-Step \"Starting Onyx services\"\n    Print-Info \"Launching containers...\"\n    $upArgs = @(\"up\", \"-d\")\n    if ($useLatest) { $upArgs += @(\"--pull\", \"always\", \"--force-recreate\") }\n    $upResult = Invoke-Compose @upArgs\n    if ($upResult -ne 0) { Print-OnyxError \"Failed to start Onyx services\"; exit 1 }\n\n    # ── Step 9: Container Health ──────────────────────────────────────────\n    Print-Step \"Verifying container health\"\n    Start-Sleep -Seconds 10\n    $restartIssues = $false\n    $containerIds = @()\n    $psArgs = @(\"ps\", \"-q\")\n    try { $containerIds = @(Invoke-Compose @psArgs 2>$null | Where-Object { $_ }) } catch { }\n\n    foreach ($cid in $containerIds) {\n        if ([string]::IsNullOrWhiteSpace($cid)) { continue }\n        $name = (& docker inspect --format '{{.Name}}' $cid 2>$null).TrimStart('/')\n        $restarts = 0; try { $restarts = [int](& docker inspect --format '{{.RestartCount}}' $cid 2>$null) } catch { }\n        $status = & docker inspect --format '{{.State.Status}}' $cid 2>$null\n        if ($status -eq \"running\" -and $restarts -gt 2) {\n            Print-OnyxError \"$name is in a restart loop (restarted $restarts times)\"; $restartIssues = $true\n        } elseif ($status -eq \"running\") { Print-Success \"$name is healthy\" }\n        elseif ($status -eq \"restarting\") { Print-OnyxError \"$name is stuck restarting\"; $restartIssues = $true }\n        else { Print-Warning \"$name status: $status\" }\n    }\n\n    if ($restartIssues) {\n        Print-OnyxError \"Some containers are experiencing issues!\"\n        $cmd = if ($script:ComposeCmdType -eq \"plugin\") { \"docker compose\" } else { \"docker-compose\" }\n        Print-Info \"Check logs: cd `\"$(Join-Path $script:InstallRoot 'deployment')`\" && $cmd $((Get-ComposeFileArgs) -join ' ') logs\"\n        Print-Info \"For help, contact: founders@onyx.app\"\n        exit 1\n    }\n\n    # ── Step 10: Complete ─────────────────────────────────────────────────\n    Print-Step \"Installation Complete!\"\n    Print-Success \"All containers are running successfully!\"\n    $port = if ($env:HOST_PORT) { $env:HOST_PORT } else { 3000 }\n\n    if (Test-OnyxHealth -Port $port) {\n        Write-Host \"============================================\" -ForegroundColor Green\n        Write-Host \"   Onyx service is ready!                   \" -ForegroundColor Green\n        Write-Host \"============================================\" -ForegroundColor Green\n    } else {\n        Print-Warning \"Health check timed out after 10 minutes\"\n        Print-Info \"Containers are running, but the web service may still be initializing.\"\n        Write-Host \"============================================\" -ForegroundColor Yellow\n        Write-Host \"   Onyx containers are running              \" -ForegroundColor Yellow\n        Write-Host \"============================================\" -ForegroundColor Yellow\n    }\n\n    Print-Info \"Access Onyx at: http://localhost:$port\"\n    Print-Info \"Visit http://localhost:$port/auth/signup to create your admin account\"\n    Print-Info \"The first user created will automatically have admin privileges\"\n\n    if ($script:LiteMode) {\n        Print-Info \"Running in Lite mode - Vespa, Redis, model servers, and background workers are NOT started.\"\n        Print-Info \"Connectors and RAG search are disabled. LLM chat, tools, Projects still work.\"\n    }\n\n    Print-Info \"See the README in $($script:InstallRoot) for more information.\"\n    Print-Info \"For help or issues, contact: founders@onyx.app\"\n}\n\nMain\n"
  },
  {
    "path": "deployment/docker_compose/install.sh",
    "content": "#!/bin/bash\n\nset -euo pipefail\n\n# Expected resource requirements (overridden below if --lite)\nEXPECTED_DOCKER_RAM_GB=10\nEXPECTED_DISK_GB=32\n\n# Parse command line arguments\nSHUTDOWN_MODE=false\nDELETE_DATA_MODE=false\nINCLUDE_CRAFT=false  # Disabled by default, use --include-craft to enable\nLITE_MODE=false       # Disabled by default, use --lite to enable\nUSE_LOCAL_FILES=false # Disabled by default, use --local to skip downloading config files\nNO_PROMPT=false\nDRY_RUN=false\nVERBOSE=false\n\nwhile [[ $# -gt 0 ]]; do\n    case $1 in\n        --shutdown)\n            SHUTDOWN_MODE=true\n            shift\n            ;;\n        --delete-data)\n            DELETE_DATA_MODE=true\n            shift\n            ;;\n        --include-craft)\n            INCLUDE_CRAFT=true\n            shift\n            ;;\n        --lite)\n            LITE_MODE=true\n            shift\n            ;;\n        --local)\n            USE_LOCAL_FILES=true\n            shift\n            ;;\n        --no-prompt)\n            NO_PROMPT=true\n            shift\n            ;;\n        --dry-run)\n            DRY_RUN=true\n            shift\n            ;;\n        --verbose)\n            VERBOSE=true\n            shift\n            ;;\n        --help|-h)\n            echo \"Onyx Installation Script\"\n            echo \"\"\n            echo \"Usage: $0 [OPTIONS]\"\n            echo \"\"\n            echo \"Options:\"\n            echo \"  --include-craft  Enable Onyx Craft (AI-powered web app building)\"\n            echo \"  --lite           Deploy Onyx Lite (no Vespa, Redis, or model servers)\"\n            echo \"  --local          Use existing config files instead of downloading from GitHub\"\n            echo \"  --shutdown       Stop (pause) Onyx containers\"\n            echo \"  --delete-data    Remove all Onyx data (containers, volumes, and files)\"\n            echo \"  --no-prompt      Run non-interactively with defaults (for CI/automation)\"\n            echo \"  --dry-run        Show what would be done without making changes\"\n            echo \"  --verbose        Show detailed output for debugging\"\n            echo \"  --help, -h       Show this help message\"\n            echo \"\"\n            echo \"Examples:\"\n            echo \"  $0                    # Install Onyx\"\n            echo \"  $0 --lite             # Install Onyx Lite (minimal deployment)\"\n            echo \"  $0 --include-craft    # Install Onyx with Craft enabled\"\n            echo \"  $0 --shutdown         # Pause Onyx services\"\n            echo \"  $0 --delete-data      # Completely remove Onyx and all data\"\n            echo \"  $0 --local            # Re-run using existing config files on disk\"\n            echo \"  $0 --no-prompt        # Non-interactive install with defaults\"\n            exit 0\n            ;;\n        *)\n            echo \"Unknown option: $1\"\n            echo \"Use --help for usage information\"\n            exit 1\n            ;;\n    esac\ndone\n\nif [[ \"$VERBOSE\" = true ]]; then\n    set -x\nfi\n\nif [[ \"$LITE_MODE\" = true ]] && [[ \"$INCLUDE_CRAFT\" = true ]]; then\n    echo \"ERROR: --lite and --include-craft cannot be used together.\"\n    echo \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n    exit 1\nfi\n\n# When --lite is passed as a flag, lower resource thresholds early (before the\n# resource check). When lite is chosen interactively, the thresholds are adjusted\n# after the resource check has already passed with the standard thresholds —\n# which is the safer direction.\nif [[ \"$LITE_MODE\" = true ]]; then\n    EXPECTED_DOCKER_RAM_GB=4\n    EXPECTED_DISK_GB=16\nfi\n\nINSTALL_ROOT=\"${INSTALL_PREFIX:-onyx_data}\"\n\nLITE_COMPOSE_FILE=\"docker-compose.onyx-lite.yml\"\n\n# Build the -f flags for docker compose.\n# Pass \"true\" as $1 to auto-detect a previously-downloaded lite overlay\n# (used by shutdown/delete-data so users don't need to remember --lite).\ncompose_file_args() {\n    local auto_detect=\"${1:-false}\"\n    local args=\"-f docker-compose.yml\"\n    if [[ \"$LITE_MODE\" = true ]] || { [[ \"$auto_detect\" = true ]] && [[ -f \"${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}\" ]]; }; then\n        args=\"$args -f ${LITE_COMPOSE_FILE}\"\n    fi\n    echo \"$args\"\n}\n\n# --- Downloader detection (curl with wget fallback) ---\nDOWNLOADER=\"\"\ndetect_downloader() {\n    if command -v curl &> /dev/null; then\n        DOWNLOADER=\"curl\"\n        return 0\n    fi\n    if command -v wget &> /dev/null; then\n        DOWNLOADER=\"wget\"\n        return 0\n    fi\n    echo \"ERROR: Neither curl nor wget found. Please install one and retry.\"\n    exit 1\n}\ndetect_downloader\n\ndownload_file() {\n    local url=\"$1\"\n    local output=\"$2\"\n    if [[ \"$DOWNLOADER\" == \"curl\" ]]; then\n        curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o \"$output\" \"$url\"\n    else\n        wget -q --tries=3 --timeout=20 -O \"$output\" \"$url\"\n    fi\n}\n\n# Ensures a required file is present. With --local, verifies the file exists on\n# disk. Otherwise, downloads it from the given URL. Returns 0 on success, 1 on\n# failure (caller should handle the exit).\nensure_file() {\n    local path=\"$1\"\n    local url=\"$2\"\n    local desc=\"$3\"\n\n    if [[ \"$USE_LOCAL_FILES\" = true ]]; then\n        if [[ -f \"$path\" ]]; then\n            print_success \"Using existing ${desc}\"\n            return 0\n        fi\n        print_error \"Required file missing: ${desc} (${path})\"\n        return 1\n    fi\n\n    print_info \"Downloading ${desc}...\"\n    if download_file \"$url\" \"$path\" 2>/dev/null; then\n        print_success \"${desc} downloaded\"\n        return 0\n    fi\n    print_error \"Failed to download ${desc}\"\n    print_info \"Please ensure you have internet connection and try again\"\n    return 1\n}\n\n# --- Interactive prompt helpers ---\nis_interactive() {\n    [[ \"$NO_PROMPT\" = false ]] && [[ -r /dev/tty ]] && [[ -w /dev/tty ]]\n}\n\nread_prompt_line() {\n    local prompt_text=\"$1\"\n    if ! is_interactive; then\n        REPLY=\"\"\n        return\n    fi\n    [[ -n \"$prompt_text\" ]] && printf \"%s\" \"$prompt_text\" > /dev/tty\n    IFS= read -r REPLY < /dev/tty || REPLY=\"\"\n}\n\nread_prompt_char() {\n    local prompt_text=\"$1\"\n    if ! is_interactive; then\n        REPLY=\"\"\n        return\n    fi\n    [[ -n \"$prompt_text\" ]] && printf \"%s\" \"$prompt_text\" > /dev/tty\n    IFS= read -r -n 1 REPLY < /dev/tty || REPLY=\"\"\n    printf \"\\n\" > /dev/tty\n}\n\nprompt_or_default() {\n    local prompt_text=\"$1\"\n    local default_value=\"$2\"\n    read_prompt_line \"$prompt_text\"\n    [[ -z \"$REPLY\" ]] && REPLY=\"$default_value\"\n    return 0\n}\n\nprompt_yn_or_default() {\n    local prompt_text=\"$1\"\n    local default_value=\"$2\"\n    read_prompt_char \"$prompt_text\"\n    [[ -z \"$REPLY\" ]] && REPLY=\"$default_value\"\n    return 0\n}\n\nconfirm_action() {\n    local description=\"$1\"\n    prompt_yn_or_default \"Install ${description}? (Y/n) [default: Y] \" \"Y\"\n    if [[ \"$REPLY\" =~ ^[Nn] ]]; then\n        print_warning \"Skipping: ${description}\"\n        return 1\n    fi\n    return 0\n}\n\n# Colors for output\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nBLUE='\\033[0;34m'\nBOLD='\\033[1m'\nNC='\\033[0m' # No Color\n\n# Step counter variables\nCURRENT_STEP=0\nTOTAL_STEPS=10\n\n# Print colored output\nprint_success() {\n    echo -e \"${GREEN}✓${NC} $1\"\n}\n\nprint_error() {\n    echo -e \"${RED}✗${NC} $1\"\n}\n\nprint_info() {\n    echo -e \"${YELLOW}ℹ${NC} $1\"\n}\n\nprint_step() {\n    CURRENT_STEP=$((CURRENT_STEP + 1))\n    echo \"\"\n    echo -e \"${BLUE}${BOLD}=== $1 - Step ${CURRENT_STEP}/${TOTAL_STEPS} ===${NC}\"\n    echo \"\"\n}\n\nprint_warning() {\n    echo -e \"${YELLOW}⚠${NC}  $1\"\n}\n\n# Handle shutdown mode\nif [ \"$SHUTDOWN_MODE\" = true ]; then\n    echo \"\"\n    echo -e \"${BLUE}${BOLD}=== Shutting down Onyx ===${NC}\"\n    echo \"\"\n    \n    if [ -d \"${INSTALL_ROOT}/deployment\" ]; then\n        print_info \"Stopping Onyx containers...\"\n\n        # Check if docker-compose.yml exists\n        if [ -f \"${INSTALL_ROOT}/deployment/docker-compose.yml\" ]; then\n            # Determine compose command\n            if docker compose version &> /dev/null; then\n                COMPOSE_CMD=\"docker compose\"\n            elif command -v docker-compose &> /dev/null; then\n                COMPOSE_CMD=\"docker-compose\"\n            else\n                print_error \"Docker Compose not found. Cannot stop containers.\"\n                exit 1\n            fi\n\n            # Stop containers (without removing them)\n            (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args true) stop)\n            if [ $? -eq 0 ]; then\n                print_success \"Onyx containers stopped (paused)\"\n            else\n                print_error \"Failed to stop containers\"\n                exit 1\n            fi\n        else\n            print_warning \"docker-compose.yml not found in ${INSTALL_ROOT}/deployment\"\n        fi\n    else\n        print_warning \"Onyx data directory not found. Nothing to shutdown.\"\n    fi\n\n    echo \"\"\n    print_success \"Onyx shutdown complete!\"\n    exit 0\nfi\n\n# Handle delete data mode\nif [ \"$DELETE_DATA_MODE\" = true ]; then\n    echo \"\"\n    echo -e \"${RED}${BOLD}=== WARNING: This will permanently delete all Onyx data ===${NC}\"\n    echo \"\"\n    print_warning \"This action will remove:\"\n    echo \"  • All Onyx containers and volumes\"\n    echo \"  • All downloaded files and configurations\"\n    echo \"  • All user data and documents\"\n    echo \"\"\n    if is_interactive; then\n        prompt_or_default \"Are you sure you want to continue? Type 'DELETE' to confirm: \" \"\"\n        echo \"\" > /dev/tty\n        if [ \"$REPLY\" != \"DELETE\" ]; then\n            print_info \"Operation cancelled.\"\n            exit 0\n        fi\n    else\n        print_error \"Cannot confirm destructive operation in non-interactive mode.\"\n        print_info \"Run interactively or remove the ${INSTALL_ROOT} directory manually.\"\n        exit 1\n    fi\n\n    print_info \"Removing Onyx containers and volumes...\"\n\n    if [ -d \"${INSTALL_ROOT}/deployment\" ]; then\n        # Check if docker-compose.yml exists\n        if [ -f \"${INSTALL_ROOT}/deployment/docker-compose.yml\" ]; then\n            # Determine compose command\n            if docker compose version &> /dev/null; then\n                COMPOSE_CMD=\"docker compose\"\n            elif command -v docker-compose &> /dev/null; then\n                COMPOSE_CMD=\"docker-compose\"\n            else\n                print_error \"Docker Compose not found. Cannot remove containers.\"\n                exit 1\n            fi\n\n            # Stop and remove containers with volumes\n            (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args true) down -v)\n            if [ $? -eq 0 ]; then\n                print_success \"Onyx containers and volumes removed\"\n            else\n                print_error \"Failed to remove containers and volumes\"\n            fi\n        fi\n    fi\n\n    print_info \"Removing data directories...\"\n    if [ -d \"${INSTALL_ROOT}\" ]; then\n        rm -rf \"${INSTALL_ROOT}\"\n        print_success \"Data directories removed\"\n    else\n        print_warning \"No ${INSTALL_ROOT} directory found\"\n    fi\n\n    echo \"\"\n    print_success \"All Onyx data has been permanently deleted!\"\n    exit 0\nfi\n\n# --- Auto-install Docker (Linux only) ---\n# Runs before the banner so a group-based re-exec doesn't repeat it.\ninstall_docker_linux() {\n    local distro_id=\"\"\n    if [[ -f /etc/os-release ]]; then\n        distro_id=\"$(. /etc/os-release && echo \"${ID:-}\")\"\n    fi\n\n    case \"$distro_id\" in\n        amzn)\n            print_info \"Detected Amazon Linux — installing Docker via package manager...\"\n            if command -v dnf &> /dev/null; then\n                sudo dnf install -y docker\n            else\n                sudo yum install -y docker\n            fi\n            ;;\n        *)\n            print_info \"Installing Docker via get.docker.com...\"\n            download_file \"https://get.docker.com\" /tmp/get-docker.sh\n            sudo sh /tmp/get-docker.sh\n            rm -f /tmp/get-docker.sh\n            ;;\n    esac\n\n    sudo systemctl start docker 2>/dev/null || sudo service docker start 2>/dev/null || true\n    sudo systemctl enable docker 2>/dev/null || true\n}\n\n# Detect OS (including WSL)\nIS_WSL=false\nif [[ -n \"${WSL_DISTRO_NAME:-}\" ]] || grep -qi microsoft /proc/version 2>/dev/null; then\n    IS_WSL=true\nfi\n\n# Dry-run: show plan and exit\nif [[ \"$DRY_RUN\" = true ]]; then\n    print_info \"Dry run mode — showing what would happen:\"\n    echo \"  • Install root: ${INSTALL_ROOT}\"\n    echo \"  • Lite mode: ${LITE_MODE}\"\n    echo \"  • Include Craft: ${INCLUDE_CRAFT}\"\n    echo \"  • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})\"\n    echo \"  • Downloader: ${DOWNLOADER}\"\n    echo \"\"\n    print_success \"Dry run complete (no changes made)\"\n    exit 0\nfi\n\nif ! command -v docker &> /dev/null; then\n    if [[ \"$OSTYPE\" == \"linux-gnu\"* ]] || [[ -n \"${WSL_DISTRO_NAME:-}\" ]]; then\n        print_info \"Docker is required but not installed.\"\n        if ! confirm_action \"Docker Engine\"; then\n            print_error \"Docker is required to run Onyx.\"\n            exit 1\n        fi\n        install_docker_linux\n        if ! command -v docker &> /dev/null; then\n            print_error \"Docker installation failed.\"\n            echo \"  Visit: https://docs.docker.com/get-docker/\"\n            exit 1\n        fi\n        print_success \"Docker installed successfully\"\n    fi\nfi\n\n# --- Auto-install Docker Compose plugin (Linux only) ---\nif command -v docker &> /dev/null \\\n    && ! docker compose version &> /dev/null \\\n    && ! command -v docker-compose &> /dev/null \\\n    && { [[ \"$OSTYPE\" == \"linux-gnu\"* ]] || [[ -n \"${WSL_DISTRO_NAME:-}\" ]]; }; then\n\n    print_info \"Docker Compose is required but not installed.\"\n    if ! confirm_action \"Docker Compose plugin\"; then\n        print_error \"Docker Compose is required to run Onyx.\"\n        exit 1\n    fi\n    COMPOSE_ARCH=\"$(uname -m)\"\n    COMPOSE_URL=\"https://github.com/docker/compose/releases/latest/download/docker-compose-linux-${COMPOSE_ARCH}\"\n    COMPOSE_DIR=\"/usr/local/lib/docker/cli-plugins\"\n    COMPOSE_TMP=\"$(mktemp)\"\n    sudo mkdir -p \"$COMPOSE_DIR\"\n    if download_file \"$COMPOSE_URL\" \"$COMPOSE_TMP\"; then\n        sudo mv \"$COMPOSE_TMP\" \"$COMPOSE_DIR/docker-compose\"\n        sudo chmod +x \"$COMPOSE_DIR/docker-compose\"\n        if docker compose version &> /dev/null; then\n            print_success \"Docker Compose plugin installed\"\n        else\n            print_error \"Docker Compose plugin installed but not detected.\"\n            echo \"  Visit: https://docs.docker.com/compose/install/\"\n            exit 1\n        fi\n    else\n        rm -f \"$COMPOSE_TMP\"\n        print_error \"Failed to download Docker Compose plugin.\"\n        echo \"  Visit: https://docs.docker.com/compose/install/\"\n        exit 1\n    fi\nfi\n\n# On Linux, ensure the current user can talk to the Docker daemon without\n# sudo.  If necessary, add them to the \"docker\" group and re-exec the\n# script under that group so the rest of the install proceeds normally.\nif command -v docker &> /dev/null \\\n    && { [[ \"$OSTYPE\" == \"linux-gnu\"* ]] || [[ -n \"${WSL_DISTRO_NAME:-}\" ]]; } \\\n    && [[ \"$(id -u)\" -ne 0 ]] \\\n    && ! docker info &> /dev/null; then\n    if [[ \"${_ONYX_REEXEC:-}\" = \"1\" ]]; then\n        print_error \"Cannot connect to Docker after group re-exec.\"\n        print_info \"Log out and back in, then run the script again.\"\n        exit 1\n    fi\n    if ! getent group docker &> /dev/null; then\n        sudo groupadd docker\n    fi\n    print_info \"Adding $USER to the docker group...\"\n    sudo usermod -aG docker \"$USER\"\n    print_info \"Re-launching with docker group active...\"\n    exec sg docker -c \"_ONYX_REEXEC=1 bash $(printf '%q ' \"$0\" \"$@\")\"\nfi\n\n# ASCII Art Banner\necho \"\"\necho -e \"${BLUE}${BOLD}\"\necho \"  ____                    \"\necho \" / __ \\                   \"\necho \"| |  | |_ __  _   ___  __ \"\necho \"| |  | | '_ \\| | | \\ \\/ / \"\necho \"| |__| | | | | |_| |>  <  \"\necho \" \\____/|_| |_|\\__, /_/\\_\\ \"\necho \"               __/ |      \"\necho \"              |___/       \"\necho -e \"${NC}\"\necho \"Welcome to Onyx Installation Script\"\necho \"====================================\"\necho \"\"\n\n# User acknowledgment section\necho -e \"${YELLOW}${BOLD}This script will:${NC}\"\necho \"1. Download deployment files for Onyx into a new '${INSTALL_ROOT}' directory\"\necho \"2. Check your system resources (Docker, memory, disk space)\"\necho \"3. Guide you through deployment options (version, authentication)\"\necho \"\"\n\nif is_interactive; then\n    echo -e \"${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}\"\n    read_prompt_line \"\"\n    echo \"\"\nelse\n    echo -e \"${YELLOW}${BOLD}Running in non-interactive mode - proceeding automatically...${NC}\"\n    echo \"\"\nfi\n\n# GitHub repo base URL - using main branch\nGITHUB_RAW_URL=\"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose\"\n\n# Check system requirements\nprint_step \"Verifying Docker installation\"\n\n# Check Docker\nif ! command -v docker &> /dev/null; then\n    print_error \"Docker is not installed. Please install Docker first.\"\n    echo \"Visit: https://docs.docker.com/get-docker/\"\n    exit 1\nfi\nDOCKER_VERSION=$(docker --version | grep -oE '[0-9]+\\.[0-9]+\\.[0-9]+' | head -1)\nprint_success \"Docker $DOCKER_VERSION is installed\"\n\n# Check Docker Compose\nif docker compose version &> /dev/null; then\n    COMPOSE_VERSION=$(docker compose version | grep -oE '[0-9]+\\.[0-9]+\\.[0-9]+' | head -1)\n    COMPOSE_CMD=\"docker compose\"\n    if [ -z \"$COMPOSE_VERSION\" ]; then\n        # Handle non-standard versions like \"dev\" - assume recent enough\n        COMPOSE_VERSION=\"dev\"\n        print_success \"Docker Compose (dev build) is installed (plugin)\"\n    else\n        print_success \"Docker Compose $COMPOSE_VERSION is installed (plugin)\"\n    fi\nelif command -v docker-compose &> /dev/null; then\n    COMPOSE_VERSION=$(docker-compose --version | grep -oE '[0-9]+\\.[0-9]+\\.[0-9]+' | head -1)\n    COMPOSE_CMD=\"docker-compose\"\n    if [ -z \"$COMPOSE_VERSION\" ]; then\n        COMPOSE_VERSION=\"dev\"\n        print_success \"Docker Compose (dev build) is installed (standalone)\"\n    else\n        print_success \"Docker Compose $COMPOSE_VERSION is installed (standalone)\"\n    fi\nelse\n    print_error \"Docker Compose is not installed. Please install Docker Compose first.\"\n    echo \"Visit: https://docs.docker.com/compose/install/\"\n    exit 1\nfi\n\n# Returns 0 if $1 <= $2, 1 if $1 > $2\n# Handles missing or non-numeric parts gracefully (treats them as 0)\nversion_compare() {\n    local version1=\"${1:-0.0.0}\"\n    local version2=\"${2:-0.0.0}\"\n\n    local v1_major v1_minor v1_patch v2_major v2_minor v2_patch\n    v1_major=$(echo \"$version1\" | cut -d. -f1)\n    v1_minor=$(echo \"$version1\" | cut -d. -f2)\n    v1_patch=$(echo \"$version1\" | cut -d. -f3)\n    v2_major=$(echo \"$version2\" | cut -d. -f1)\n    v2_minor=$(echo \"$version2\" | cut -d. -f2)\n    v2_patch=$(echo \"$version2\" | cut -d. -f3)\n\n    # Default non-numeric or empty parts to 0\n    [[ \"$v1_major\" =~ ^[0-9]+$ ]] || v1_major=0\n    [[ \"$v1_minor\" =~ ^[0-9]+$ ]] || v1_minor=0\n    [[ \"$v1_patch\" =~ ^[0-9]+$ ]] || v1_patch=0\n    [[ \"$v2_major\" =~ ^[0-9]+$ ]] || v2_major=0\n    [[ \"$v2_minor\" =~ ^[0-9]+$ ]] || v2_minor=0\n    [[ \"$v2_patch\" =~ ^[0-9]+$ ]] || v2_patch=0\n\n    if [ \"$v1_major\" -lt \"$v2_major\" ]; then return 0\n    elif [ \"$v1_major\" -gt \"$v2_major\" ]; then return 1; fi\n\n    if [ \"$v1_minor\" -lt \"$v2_minor\" ]; then return 0\n    elif [ \"$v1_minor\" -gt \"$v2_minor\" ]; then return 1; fi\n\n    [ \"$v1_patch\" -le \"$v2_patch\" ]\n}\n\n# Check Docker daemon\nif ! docker info &> /dev/null; then\n    if [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n        print_info \"Docker daemon is not running. Starting Docker Desktop...\"\n        open -a Docker\n        # Wait up to 120 seconds for Docker to be ready\n        DOCKER_WAIT=0\n        DOCKER_MAX_WAIT=120\n        while ! docker info &> /dev/null; do\n            if [ $DOCKER_WAIT -ge $DOCKER_MAX_WAIT ]; then\n                print_error \"Docker Desktop did not start within ${DOCKER_MAX_WAIT} seconds.\"\n                print_info \"Please start Docker Desktop manually and re-run this script.\"\n                exit 1\n            fi\n            printf \"\\r\\033[KWaiting for Docker Desktop to start... (%ds)\" \"$DOCKER_WAIT\"\n            sleep 2\n            DOCKER_WAIT=$((DOCKER_WAIT + 2))\n        done\n        echo \"\"\n        print_success \"Docker Desktop is now running\"\n    else\n        print_error \"Docker daemon is not running. Please start Docker.\"\n        exit 1\n    fi\nelse\n    print_success \"Docker daemon is running\"\nfi\n\n# Check Docker resources\nprint_step \"Verifying Docker resources\"\n\n# Get Docker system info\nDOCKER_INFO=$(docker system info 2>/dev/null)\n\n# Try to get memory allocation (method varies by platform)\nif [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n    # macOS - Docker Desktop\n    if command -v jq &> /dev/null && [ -f ~/Library/Group\\ Containers/group.com.docker/settings.json ]; then\n        MEMORY_MB=$(cat ~/Library/Group\\ Containers/group.com.docker/settings.json 2>/dev/null | jq '.memoryMiB // 0' 2>/dev/null || echo \"0\")\n    else\n        # Try to get from docker system info\n        MEMORY_BYTES=$(docker system info 2>/dev/null | grep -i \"total memory\" | grep -oE '[0-9]+\\.[0-9]+' | head -1)\n        if [ -n \"$MEMORY_BYTES\" ]; then\n            # Convert from GiB to MB (multiply by 1024)\n            MEMORY_MB=$(echo \"$MEMORY_BYTES * 1024\" | bc 2>/dev/null | cut -d. -f1)\n            if [ -z \"$MEMORY_MB\" ]; then\n                MEMORY_MB=\"0\"\n            fi\n        else\n            MEMORY_MB=\"0\"\n        fi\n    fi\nelse\n    # Linux - Native Docker\n    MEMORY_KB=$(grep MemTotal /proc/meminfo | grep -oE '[0-9]+' || echo \"0\")\n    MEMORY_MB=$((MEMORY_KB / 1024))\nfi\n\n# Convert to GB for display\nif [ \"$MEMORY_MB\" -gt 0 ]; then\n    MEMORY_GB=$(awk \"BEGIN {printf \\\"%.1f\\\", $MEMORY_MB / 1024}\")\n    if [ \"$(awk \"BEGIN {print ($MEMORY_MB >= 1024)}\")\" = \"1\" ]; then\n        MEMORY_DISPLAY=\"~${MEMORY_GB}GB\"\n    else\n        MEMORY_DISPLAY=\"${MEMORY_MB}MB\"\n    fi\n    if [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n        print_info \"Docker memory allocation: ${MEMORY_DISPLAY}\"\n    else\n        print_info \"System memory: ${MEMORY_DISPLAY} (Docker uses host memory directly)\"\n    fi\nelse\n    print_warning \"Could not determine memory allocation\"\n    MEMORY_DISPLAY=\"unknown\"\n    MEMORY_MB=0\nfi\n\n# Check disk space (different commands for macOS vs Linux)\nif [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n    # macOS uses -g for GB\n    DISK_AVAILABLE=$(df -g . | awk 'NR==2 {print $4}')\nelse\n    # Linux uses -BG for GB\n    DISK_AVAILABLE=$(df -BG . | awk 'NR==2 {print $4}' | sed 's/G//')\nfi\nprint_info \"Available disk space: ${DISK_AVAILABLE}GB\"\n\n# Resource requirements check\nRESOURCE_WARNING=false\nEXPECTED_RAM_MB=$((EXPECTED_DOCKER_RAM_GB * 1024))\n\nif [ \"$MEMORY_MB\" -gt 0 ] && [ \"$MEMORY_MB\" -lt \"$EXPECTED_RAM_MB\" ]; then\n    print_warning \"Less than ${EXPECTED_DOCKER_RAM_GB}GB RAM available (found: ${MEMORY_DISPLAY})\"\n    RESOURCE_WARNING=true\nfi\n\nif [ \"$DISK_AVAILABLE\" -lt \"$EXPECTED_DISK_GB\" ]; then\n    print_warning \"Less than ${EXPECTED_DISK_GB}GB disk space available (found: ${DISK_AVAILABLE}GB)\"\n    RESOURCE_WARNING=true\nfi\n\nif [ \"$RESOURCE_WARNING\" = true ]; then\n    echo \"\"\n    print_warning \"Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance in standard mode.\"\n    print_warning \"Lite mode requires less resources (1-4GB RAM, 8-16GB disk depending on usage), but does not include a vector database.\"\n    echo \"\"\n    prompt_yn_or_default \"Do you want to continue anyway? (Y/n): \" \"y\"\n    if [[ ! $REPLY =~ ^[Yy]$ ]]; then\n        print_info \"Installation cancelled. Please allocate more resources and try again.\"\n        exit 1\n    fi\n    print_info \"Proceeding with installation despite resource limitations...\"\nfi\n\n# Create directory structure\nprint_step \"Creating directory structure\"\nif [ -d \"${INSTALL_ROOT}\" ]; then\n    print_info \"Directory structure already exists\"\n    print_success \"Using existing ${INSTALL_ROOT} directory\"\nfi\nmkdir -p \"${INSTALL_ROOT}/deployment\"\nmkdir -p \"${INSTALL_ROOT}/data/nginx/local\"\nprint_success \"Directory structure created\"\n\n# Ensure all required configuration files are present\nNGINX_BASE_URL=\"https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx\"\n\nif [[ \"$USE_LOCAL_FILES\" = true ]]; then\n    print_step \"Verifying existing configuration files\"\nelse\n    print_step \"Downloading Onyx configuration files\"\n    print_info \"This step downloads all necessary configuration files from GitHub...\"\nfi\n\nensure_file \"${INSTALL_ROOT}/deployment/docker-compose.yml\" \\\n    \"${GITHUB_RAW_URL}/docker-compose.yml\" \"docker-compose.yml\" || exit 1\n\n# Check Docker Compose version compatibility after obtaining docker-compose.yml\nif [ \"$COMPOSE_VERSION\" != \"dev\" ] && version_compare \"$COMPOSE_VERSION\" \"2.24.0\"; then\n    print_warning \"Docker Compose version $COMPOSE_VERSION is older than 2.24.0\"\n    echo \"\"\n    print_warning \"The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later.\"\n    echo \"\"\n    print_info \"To use this configuration with your current Docker Compose version, you have two options:\"\n    echo \"\"\n    echo \"1. Upgrade Docker Compose to version 2.24.0 or later (recommended)\"\n    echo \"   Visit: https://docs.docker.com/compose/install/\"\n    echo \"\"\n    echo \"2. Manually replace all env_file sections in docker-compose.yml\"\n    echo \"   Change from:\"\n    echo \"     env_file:\"\n    echo \"       - path: .env\"\n    echo \"         required: false\"\n    echo \"   To:\"\n    echo \"     env_file: .env\"\n    echo \"\"\n    print_warning \"The installation will continue, but may fail if Docker Compose cannot parse the file.\"\n    echo \"\"\n    prompt_yn_or_default \"Do you want to continue anyway? (Y/n): \" \"y\"\n    if [[ ! $REPLY =~ ^[Yy]$ ]]; then\n        print_info \"Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file.\"\n        exit 1\n    fi\n    print_info \"Proceeding with installation despite Docker Compose version compatibility issues...\"\nfi\n\n# Ask for deployment mode (standard vs lite) unless already set via --lite flag\nif [[ \"$LITE_MODE\" = false ]]; then\n    print_info \"Which deployment mode would you like?\"\n    echo \"\"\n    echo \"  1) Lite      - Minimal deployment (no Vespa, Redis, or model servers)\"\n    echo \"                  LLM chat, tools, file uploads, and Projects still work\"\n    echo \"  2) Standard  - Full deployment with search, connectors, and RAG\"\n    echo \"\"\n    prompt_or_default \"Choose a mode (1 or 2) [default: 1]: \" \"1\"\n    echo \"\"\n\n    case \"$REPLY\" in\n        2)\n            print_info \"Selected: Standard mode\"\n            ;;\n        *)\n            LITE_MODE=true\n            print_info \"Selected: Lite mode\"\n            ;;\n    esac\nelse\n    print_info \"Deployment mode: Lite (set via --lite flag)\"\nfi\n\nif [[ \"$LITE_MODE\" = true ]] && [[ \"$INCLUDE_CRAFT\" = true ]]; then\n    print_error \"--include-craft cannot be used with Lite mode.\"\n    print_info \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n    exit 1\nfi\n\nif [[ \"$LITE_MODE\" = true ]]; then\n    EXPECTED_DOCKER_RAM_GB=4\n    EXPECTED_DISK_GB=16\nfi\n\n# Handle lite overlay file based on selected mode\nif [[ \"$LITE_MODE\" = true ]]; then\n    ensure_file \"${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}\" \\\n        \"${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}\" \"${LITE_COMPOSE_FILE}\" || exit 1\nelif [[ -f \"${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}\" ]]; then\n    rm -f \"${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}\"\n    print_info \"Removed previous lite overlay (switching to standard mode)\"\nfi\n\nensure_file \"${INSTALL_ROOT}/deployment/env.template\" \\\n    \"${GITHUB_RAW_URL}/env.template\" \"env.template\" || exit 1\n\nensure_file \"${INSTALL_ROOT}/data/nginx/app.conf.template\" \\\n    \"$NGINX_BASE_URL/app.conf.template\" \"nginx/app.conf.template\" || exit 1\n\nensure_file \"${INSTALL_ROOT}/data/nginx/run-nginx.sh\" \\\n    \"$NGINX_BASE_URL/run-nginx.sh\" \"nginx/run-nginx.sh\" || exit 1\nchmod +x \"${INSTALL_ROOT}/data/nginx/run-nginx.sh\"\n\nensure_file \"${INSTALL_ROOT}/README.md\" \\\n    \"${GITHUB_RAW_URL}/README.md\" \"README.md\" || exit 1\n\ntouch \"${INSTALL_ROOT}/data/nginx/local/.gitkeep\"\nprint_success \"All configuration files ready\"\n\n# Set up deployment configuration\nprint_step \"Setting up deployment configs\"\nENV_FILE=\"${INSTALL_ROOT}/deployment/.env\"\nENV_TEMPLATE=\"${INSTALL_ROOT}/deployment/env.template\"\n# Check if services are already running\nif [ -d \"${INSTALL_ROOT}/deployment\" ] && [ -f \"${INSTALL_ROOT}/deployment/docker-compose.yml\" ]; then\n    # Determine compose command\n    if docker compose version &> /dev/null; then\n        COMPOSE_CMD=\"docker compose\"\n    elif command -v docker-compose &> /dev/null; then\n        COMPOSE_CMD=\"docker-compose\"\n    else\n        COMPOSE_CMD=\"\"\n    fi\n\n    if [ -n \"$COMPOSE_CMD\" ]; then\n        # Check if any containers are running\n        RUNNING_CONTAINERS=$(cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args true) ps -q 2>/dev/null | wc -l)\n        if [ \"$RUNNING_CONTAINERS\" -gt 0 ]; then\n            print_error \"Onyx services are currently running!\"\n            echo \"\"\n            print_info \"To make configuration changes, you must first shut down the services.\"\n            echo \"\"\n            print_info \"Please run the following command to shut down Onyx:\"\n            echo -e \"   ${BOLD}./install.sh --shutdown${NC}\"\n            echo \"\"\n            print_info \"Then run this script again to make your changes.\"\n            exit 1\n        fi\n    fi\nfi\n\nif [ -f \"$ENV_FILE\" ]; then\n    print_info \"Existing .env file found. What would you like to do?\"\n    echo \"\"\n    echo \"• Press Enter to restart with current configuration\"\n    echo \"• Type 'update' to update to a newer version\"\n    echo \"\"\n    prompt_or_default \"Choose an option [default: restart]: \" \"\"\n    echo \"\"\n\n    if [ \"$REPLY\" = \"update\" ]; then\n        print_info \"Update selected. Which tag would you like to deploy?\"\n        echo \"\"\n        echo \"• Press Enter for edge (recommended)\"\n        echo \"• Type a specific tag (e.g., v0.1.0)\"\n        echo \"\"\n        if [ \"$INCLUDE_CRAFT\" = true ]; then\n            prompt_or_default \"Enter tag [default: craft-latest]: \" \"craft-latest\"\n            VERSION=\"$REPLY\"\n        else\n            prompt_or_default \"Enter tag [default: edge]: \" \"edge\"\n            VERSION=\"$REPLY\"\n        fi\n        echo \"\"\n\n        if [ \"$INCLUDE_CRAFT\" = true ] && [ \"$VERSION\" = \"craft-latest\" ]; then\n            print_info \"Selected: craft-latest (Craft enabled)\"\n        elif [ \"$VERSION\" = \"edge\" ]; then\n            print_info \"Selected: edge (latest nightly)\"\n        else\n            print_info \"Selected: $VERSION\"\n        fi\n\n        # Reject craft image tags when running in lite mode\n        if [[ \"$LITE_MODE\" = true ]] && [[ \"${VERSION:-}\" == craft-* ]]; then\n            print_error \"Cannot use a craft image tag (${VERSION}) with --lite.\"\n            print_info \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n            exit 1\n        fi\n\n        # Update .env file with new version\n        print_info \"Updating configuration for version $VERSION...\"\n        if grep -q \"^IMAGE_TAG=\" \"$ENV_FILE\"; then\n            # Update existing IMAGE_TAG line\n            sed -i.bak \"s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/\" \"$ENV_FILE\"\n        else\n            # Add IMAGE_TAG line if it doesn't exist\n            echo \"IMAGE_TAG=$VERSION\" >> \"$ENV_FILE\"\n        fi\n        print_success \"Updated IMAGE_TAG to $VERSION in .env file\"\n\n        # If using craft image, also enable ENABLE_CRAFT\n        if [[ \"$VERSION\" == craft-* ]]; then\n            sed -i.bak 's/^#* *ENABLE_CRAFT=.*/ENABLE_CRAFT=true/' \"$ENV_FILE\" 2>/dev/null || true\n            print_success \"ENABLE_CRAFT set to true\"\n        fi\n        print_success \"Configuration updated for upgrade\"\n    else\n        # Reject restarting a craft deployment in lite mode\n        EXISTING_TAG=$(grep \"^IMAGE_TAG=\" \"$ENV_FILE\" | head -1 | cut -d'=' -f2 | tr -d ' \"'\"'\"'')\n        if [[ \"$LITE_MODE\" = true ]] && [[ \"${EXISTING_TAG:-}\" == craft-* ]]; then\n            print_error \"Cannot restart a craft deployment (${EXISTING_TAG}) with --lite.\"\n            print_info \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n            exit 1\n        fi\n\n        print_info \"Keeping existing configuration...\"\n        print_success \"Will restart with current settings\"\n    fi\n\n    # Ensure COMPOSE_PROFILES is cleared when running in lite mode on an\n    # existing .env (the template ships with s3-filestore enabled).\n    if [[ \"$LITE_MODE\" = true ]] && grep -q \"^COMPOSE_PROFILES=.*s3-filestore\" \"$ENV_FILE\" 2>/dev/null; then\n        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' \"$ENV_FILE\" 2>/dev/null || true\n        print_success \"Cleared COMPOSE_PROFILES for lite mode\"\n    fi\nelse\n    print_info \"No existing .env file found. Setting up new deployment...\"\n    echo \"\"\n\n    # Ask for version\n    print_info \"Which tag would you like to deploy?\"\n    echo \"\"\n    if [ \"$INCLUDE_CRAFT\" = true ]; then\n        echo \"• Press Enter for craft-latest (recommended for Craft)\"\n        echo \"• Type a specific tag (e.g., craft-v1.0.0)\"\n        echo \"\"\n        prompt_or_default \"Enter tag [default: craft-latest]: \" \"craft-latest\"\n        VERSION=\"$REPLY\"\n    else\n        echo \"• Press Enter for edge (recommended)\"\n        echo \"• Type a specific tag (e.g., v0.1.0)\"\n        echo \"\"\n        prompt_or_default \"Enter tag [default: edge]: \" \"edge\"\n        VERSION=\"$REPLY\"\n    fi\n    echo \"\"\n\n    if [ \"$INCLUDE_CRAFT\" = true ] && [ \"$VERSION\" = \"craft-latest\" ]; then\n        print_info \"Selected: craft-latest (Craft enabled)\"\n    elif [ \"$VERSION\" = \"edge\" ]; then\n        print_info \"Selected: edge (latest nightly)\"\n    else\n        print_info \"Selected: $VERSION\"\n    fi\n\n    # Ask for authentication schema\n    # echo \"\"\n    # print_info \"Which authentication schema would you like to set up?\"\n    # echo \"\"\n    # echo \"1) Basic - Username/password authentication\"\n    # echo \"2) No Auth - Open access (development/testing)\"\n    # echo \"\"\n    # read -p \"Choose an option (1) [default 1]: \" -r AUTH_CHOICE\n    # echo \"\"\n\n    # case \"${AUTH_CHOICE:-1}\" in\n    #     1)\n    #         AUTH_SCHEMA=\"basic\"\n    #         print_info \"Selected: Basic authentication\"\n    #         ;;\n    #     # 2)\n    #     #     AUTH_SCHEMA=\"disabled\"\n    #     #     print_info \"Selected: No authentication\"\n    #     #     ;;\n    #     *)\n    #         AUTH_SCHEMA=\"basic\"\n    #         print_info \"Invalid choice, using basic authentication\"\n    #         ;;\n    # esac\n\n    # TODO (jessica): Uncomment this once no auth users still have an account\n    # Use basic auth by default\n    AUTH_SCHEMA=\"basic\"\n\n    # Reject craft image tags when running in lite mode (must check before writing .env)\n    if [[ \"$LITE_MODE\" = true ]] && [[ \"${VERSION:-}\" == craft-* ]]; then\n        print_error \"Cannot use a craft image tag (${VERSION}) with --lite.\"\n        print_info \"Craft requires services (Vespa, Redis, background workers) that lite mode disables.\"\n        exit 1\n    fi\n\n    # Create .env file from template\n    print_info \"Creating .env file with your selections...\"\n    cp \"$ENV_TEMPLATE\" \"$ENV_FILE\"\n\n    # Update IMAGE_TAG with selected version\n    print_info \"Setting IMAGE_TAG to $VERSION...\"\n    sed -i.bak \"s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/\" \"$ENV_FILE\"\n    print_success \"IMAGE_TAG set to $VERSION\"\n\n    # In lite mode, clear COMPOSE_PROFILES so profiled services (MinIO, etc.)\n    # stay disabled — the template ships with s3-filestore enabled by default.\n    if [[ \"$LITE_MODE\" = true ]]; then\n        sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' \"$ENV_FILE\" 2>/dev/null || true\n        print_success \"Cleared COMPOSE_PROFILES for lite mode\"\n    fi\n\n    # Configure basic authentication (default)\n    sed -i.bak 's/^AUTH_TYPE=.*/AUTH_TYPE=basic/' \"$ENV_FILE\" 2>/dev/null || true\n    print_success \"Basic authentication enabled in configuration\"\n\n    # Check if openssl is available\n    if ! command -v openssl &> /dev/null; then\n        print_error \"openssl is required to generate secure secrets but was not found.\"\n        exit 1\n    fi\n\n    # Generate a secure USER_AUTH_SECRET\n    USER_AUTH_SECRET=$(openssl rand -hex 32)\n    sed -i.bak \"s/^USER_AUTH_SECRET=.*/USER_AUTH_SECRET=\\\"$USER_AUTH_SECRET\\\"/\" \"$ENV_FILE\" 2>/dev/null || true\n\n    # Configure Craft based on flag or if using a craft-* image tag\n    # By default, env.template has Craft commented out (disabled)\n    if [ \"$INCLUDE_CRAFT\" = true ] || [[ \"$VERSION\" == craft-* ]]; then\n        # Set ENABLE_CRAFT=true for runtime configuration (handles commented and uncommented lines)\n        sed -i.bak 's/^#* *ENABLE_CRAFT=.*/ENABLE_CRAFT=true/' \"$ENV_FILE\" 2>/dev/null || true\n        print_success \"Onyx Craft enabled (ENABLE_CRAFT=true)\"\n    else\n        print_info \"Onyx Craft disabled (use --include-craft to enable)\"\n    fi\n\n    print_success \".env file created with your preferences\"\n    echo \"\"\n    print_info \"IMPORTANT: The .env file has been configured with your selections.\"\n    print_info \"You can customize it later for:\"\n    echo \"  • Advanced authentication (OAuth, SAML, etc.)\"\n    echo \"  • AI model configuration\"\n    echo \"  • Domain settings (for production)\"\n    echo \"  • Onyx Craft (set ENABLE_CRAFT=true)\"\n    echo \"\"\nfi\n\n# Function to check if a port is available\nis_port_available() {\n    local port=$1\n\n    # Try netcat first if available\n    if command -v nc &> /dev/null; then\n        # Try to connect to the port, if it fails, the port is available\n        ! nc -z localhost \"$port\" 2>/dev/null\n    # Fallback using curl/telnet approach\n    elif command -v curl &> /dev/null; then\n        # Try to connect with curl, if it fails, the port might be available\n        ! curl -s --max-time 1 --connect-timeout 1 \"http://localhost:$port\" >/dev/null 2>&1\n    # Final fallback using lsof if available\n    elif command -v lsof &> /dev/null; then\n        # Check if any process is listening on the port\n        ! lsof -i \":$port\" >/dev/null 2>&1\n    else\n        # No port checking tools available, assume port is available\n        print_warning \"No port checking tools available (nc, curl, lsof). Assuming port $port is available.\"\n        return 0\n    fi\n}\n\n# Function to find the first available port starting from a given port\nfind_available_port() {\n    local start_port=${1:-3000}\n    local port=$start_port\n\n    while [ $port -le 65535 ]; do\n        if is_port_available \"$port\"; then\n            echo \"$port\"\n            return 0\n        fi\n        port=$((port + 1))\n    done\n\n    # If no port found, return the original port as fallback\n    echo \"$start_port\"\n    return 1\n}\n\n# Check for port checking tools availability\nPORT_CHECK_AVAILABLE=false\nif command -v nc &> /dev/null || command -v curl &> /dev/null || command -v lsof &> /dev/null; then\n    PORT_CHECK_AVAILABLE=true\nfi\n\nif [ \"$PORT_CHECK_AVAILABLE\" = false ]; then\n    print_warning \"No port checking tools found (nc, curl, lsof). Port detection may not work properly.\"\n    print_info \"Consider installing one of these tools for reliable automatic port detection.\"\nfi\n\n# Find available port for nginx\nprint_step \"Checking for available ports\"\nAVAILABLE_PORT=$(find_available_port 3000)\n\nif [ \"$AVAILABLE_PORT\" != \"3000\" ]; then\n    print_info \"Port 3000 is in use, found available port: $AVAILABLE_PORT\"\nelse\n    print_info \"Port 3000 is available\"\nfi\n\n# Export HOST_PORT for docker-compose\nexport HOST_PORT=$AVAILABLE_PORT\nprint_success \"Using port $AVAILABLE_PORT for nginx\"\n\n# Determine if we're using a floating tag (edge, latest, craft-*) that should force pull\n# Read IMAGE_TAG from .env file and remove any quotes or whitespace\nCURRENT_IMAGE_TAG=$(grep \"^IMAGE_TAG=\" \"$ENV_FILE\" | head -1 | cut -d'=' -f2 | tr -d ' \"'\"'\"'')\nif [ \"$CURRENT_IMAGE_TAG\" = \"edge\" ] || [ \"$CURRENT_IMAGE_TAG\" = \"latest\" ] || [[ \"$CURRENT_IMAGE_TAG\" == craft-* ]]; then\n    USE_LATEST=true\n    if [[ \"$CURRENT_IMAGE_TAG\" == craft-* ]]; then\n        print_info \"Using craft tag '$CURRENT_IMAGE_TAG' - will force pull and recreate containers\"\n    else\n        print_info \"Using '$CURRENT_IMAGE_TAG' tag - will force pull and recreate containers\"\n    fi\nelse\n    USE_LATEST=false\nfi\n\n# For pinned version tags, re-download config files from that tag so the\n# compose file matches the images being pulled (the initial download used main).\nif [[ \"$USE_LATEST\" = false ]] && [[ \"$USE_LOCAL_FILES\" = false ]]; then\n    PINNED_BASE=\"https://raw.githubusercontent.com/onyx-dot-app/onyx/${CURRENT_IMAGE_TAG}/deployment\"\n    print_info \"Fetching config files matching tag ${CURRENT_IMAGE_TAG}...\"\n    if download_file \"${PINNED_BASE}/docker_compose/docker-compose.yml\" \"${INSTALL_ROOT}/deployment/docker-compose.yml\" 2>/dev/null; then\n        download_file \"${PINNED_BASE}/data/nginx/app.conf.template\" \"${INSTALL_ROOT}/data/nginx/app.conf.template\" 2>/dev/null || true\n        download_file \"${PINNED_BASE}/data/nginx/run-nginx.sh\" \"${INSTALL_ROOT}/data/nginx/run-nginx.sh\" 2>/dev/null || true\n        chmod +x \"${INSTALL_ROOT}/data/nginx/run-nginx.sh\"\n        if [[ \"$LITE_MODE\" = true ]]; then\n            download_file \"${PINNED_BASE}/docker_compose/${LITE_COMPOSE_FILE}\" \\\n                \"${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}\" 2>/dev/null || true\n        fi\n        print_success \"Config files updated to match ${CURRENT_IMAGE_TAG}\"\n    else\n        print_warning \"Tag ${CURRENT_IMAGE_TAG} not found on GitHub — using main branch configs\"\n    fi\nfi\n\n# Pull Docker images with reduced output\nprint_step \"Pulling Docker images\"\nprint_info \"This may take several minutes depending on your internet connection...\"\necho \"\"\nprint_info \"Downloading Docker images (this may take a while)...\"\n(cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) pull --quiet)\nif [ $? -eq 0 ]; then\n    print_success \"Docker images downloaded successfully\"\nelse\n    print_error \"Failed to download Docker images\"\n    exit 1\nfi\n\n# Start services\nprint_step \"Starting Onyx services\"\nprint_info \"Launching containers...\"\necho \"\"\nif [ \"$USE_LATEST\" = true ]; then\n    print_info \"Force pulling latest images and recreating containers...\"\n    (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)\nelse\n    (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) up -d)\nfi\nif [ $? -ne 0 ]; then\n    print_error \"Failed to start Onyx services\"\n    exit 1\nfi\n\n# Monitor container startup\nprint_step \"Verifying container health\"\nprint_info \"Waiting for containers to initialize (10 seconds)...\"\n\n# Progress bar for waiting\nfor i in {1..10}; do\n    printf \"\\r[%-10s] %d%%\" $(printf '#%.0s' $(seq 1 $((i*10/10)))) $((i*100/10))\n    sleep 1\ndone\necho \"\"\necho \"\"\n\n# Check for restart loops\nprint_info \"Checking container health status...\"\nRESTART_ISSUES=false\nCONTAINERS=$(cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)\n\nfor CONTAINER in $CONTAINERS; do\n    PROJECT_NAME=\"$(basename \"$INSTALL_ROOT\")_deployment_\"\n    CONTAINER_NAME=$(docker inspect --format '{{.Name}}' \"$CONTAINER\" | sed \"s/^\\/\\|^${PROJECT_NAME}//g\")\n    RESTART_COUNT=$(docker inspect --format '{{.RestartCount}}' \"$CONTAINER\")\n    STATUS=$(docker inspect --format '{{.State.Status}}' \"$CONTAINER\")\n\n    if [ \"$STATUS\" = \"running\" ]; then\n        if [ \"$RESTART_COUNT\" -gt 2 ]; then\n            print_error \"$CONTAINER_NAME is in a restart loop (restarted $RESTART_COUNT times)\"\n            RESTART_ISSUES=true\n        else\n            print_success \"$CONTAINER_NAME is healthy\"\n        fi\n    elif [ \"$STATUS\" = \"restarting\" ]; then\n        print_error \"$CONTAINER_NAME is stuck restarting\"\n        RESTART_ISSUES=true\n    else\n        print_warning \"$CONTAINER_NAME status: $STATUS\"\n    fi\ndone\n\necho \"\"\n\nif [ \"$RESTART_ISSUES\" = true ]; then\n    print_error \"Some containers are experiencing issues!\"\n    echo \"\"\n    print_info \"Please check the logs for more information:\"\n    echo \"  (cd \\\"${INSTALL_ROOT}/deployment\\\" && $COMPOSE_CMD $(compose_file_args) logs)\"\n\n    echo \"\"\n    print_info \"If the issue persists, please contact: founders@onyx.app\"\n    echo \"Include the output of the logs command in your message.\"\n    exit 1\nfi\n\n# Health check function\ncheck_onyx_health() {\n    local max_attempts=600  # 10 minutes * 60 attempts per minute (every 1 second)\n    local attempt=1\n    local port=${HOST_PORT:-3000}\n\n    print_info \"Checking Onyx service health...\"\n    echo \"Containers are healthy, waiting for database migrations and service initialization to finish.\"\n    echo \"\"\n\n    while [ $attempt -le $max_attempts ]; do\n        local http_code=\"\"\n        if [[ \"$DOWNLOADER\" == \"curl\" ]]; then\n            http_code=$(curl -s -o /dev/null -w \"%{http_code}\" \"http://localhost:$port\" 2>/dev/null || echo \"000\")\n        else\n            http_code=$(wget -q --spider -S \"http://localhost:$port\" 2>&1 | grep \"HTTP/\" | tail -1 | awk '{print $2}' || echo \"000\")\n        fi\n        if echo \"$http_code\" | grep -qE \"^(200|301|302|303|307|308)$\"; then\n            return 0\n        fi\n\n        # Show animated progress with time elapsed\n        local elapsed=$((attempt))\n        local minutes=$((elapsed / 60))\n        local seconds=$((elapsed % 60))\n\n        # Create animated dots with fixed spacing (cycle through 1-3 dots)\n        local dots=\"\"\n        case $((attempt % 3)) in\n            0) dots=\".  \" ;;\n            1) dots=\".. \" ;;\n            2) dots=\"...\" ;;\n        esac\n\n        # Clear line and show progress with fixed spacing\n        printf \"\\r\\033[KChecking Onyx service%s (%dm %ds elapsed)\" \"$dots\" \"$minutes\" \"$seconds\"\n\n        sleep 1\n        attempt=$((attempt + 1))\n    done\n\n    echo \"\"  # New line after the progress line\n    return 1\n}\n\n# Success message\nprint_step \"Installation Complete!\"\nprint_success \"All containers are running successfully!\"\necho \"\"\n\n# Run health check\nif check_onyx_health; then\n    echo \"\"\n    echo -e \"${GREEN}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\"\n    echo -e \"${GREEN}${BOLD}   🎉 Onyx service is ready! 🎉${NC}\"\n    echo -e \"${GREEN}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\"\nelse\n    print_warning \"Health check timed out after 10 minutes\"\n    print_info \"Containers are running, but the web service may still be initializing (or something went wrong)\"\n    echo \"\"\n    echo -e \"${YELLOW}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\"\n    echo -e \"${YELLOW}${BOLD}   ⚠️  Onyx containers are running ⚠️${NC}\"\n    echo -e \"${YELLOW}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\"\nfi\necho \"\"\nprint_info \"Access Onyx at:\"\necho -e \"   ${BOLD}http://localhost:${HOST_PORT}${NC}\"\necho \"\"\nprint_info \"If authentication is enabled, you can create your admin account here:\"\necho \"   • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account\"\necho \"   • The first user created will automatically have admin privileges\"\necho \"\"\nif [[ \"$LITE_MODE\" = true ]]; then\n    echo \"\"\n    print_info \"Running in Lite mode — the following services are NOT started:\"\n    echo \"  • Vespa (vector database)\"\n    echo \"  • Redis (cache)\"\n    echo \"  • Model servers (embedding/inference)\"\n    echo \"  • Background workers (Celery)\"\n    echo \"\"\n    print_info \"Connectors and RAG search are disabled. LLM chat, tools, user file\"\n    print_info \"uploads, Projects, Agent knowledge, and code interpreter still work.\"\nfi\necho \"\"\nprint_info \"Refer to the README in the ${INSTALL_ROOT} directory for more information.\"\necho \"\"\nprint_info \"For help or issues, contact: founders@onyx.app\"\necho \"\"\n\n# --- GitHub star prompt (inspired by oh-my-codex) ---\n# Only prompt in interactive mode and only if gh CLI is available.\n# Uses the GitHub API directly (PUT /user/starred) like oh-my-codex.\nif is_interactive && command -v gh &>/dev/null; then\n    prompt_yn_or_default \"Enjoying Onyx? Star the repo on GitHub? [Y/n] \" \"Y\"\n    if [[ ! \"$REPLY\" =~ ^[Nn] ]]; then\n        if GH_PAGER= gh api -X PUT /user/starred/onyx-dot-app/onyx < /dev/null >/dev/null 2>&1; then\n            print_success \"Thanks for the star!\"\n        else\n            print_info \"Star us at: https://github.com/onyx-dot-app/onyx\"\n        fi\n    fi\nfi\n"
  },
  {
    "path": "deployment/helm/README.md",
    "content": "# Dependency updates (when subchart versions are bumped)\n* If updating subcharts, you need to run this before committing!\n* cd charts/onyx\n* helm dependency update .\n\n# Local testing\n\n## One time setup\n* brew install kind\n* Ensure you have no config at ~/.kube/config\n* kind create cluster\n* mv ~/.kube/config ~/.kube/kind-config\n\n## Automated install and test with ct\n* export KUBECONFIG=~/.kube/kind-config\n* kubectl config use-context kind-kind\n* from source root run the following. This does a very basic test against the web server\n  * ct install --all --helm-extra-set-args=\"--set=nginx.enabled=false\" --debug --config ct.yaml\n\n## Output template to file and inspect\n* cd charts/onyx\n* helm template test-output . --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!' > test-output.yaml\n\n## Test the entire cluster manually\n* cd charts/onyx\n* helm install onyx . -n onyx --set postgresql.primary.persistence.enabled=false --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!'\n  * the postgres flag is to keep the storage ephemeral for testing. You probably don't want to set that in prod.\n  * the OpenSearch admin password must be set on first install unless you are supplying `auth.opensearch.existingSecret`.\n  * no flag for ephemeral vespa storage yet, might be good for testing\n* kubectl -n onyx port-forward service/onyx-nginx 8080:80\n  * this will forward the local port 8080 to the installed chart for you to run tests, etc.\n* When you are finished\n  * helm uninstall onyx -n onyx\n  * Vespa leaves behind a PVC. Delete it if you are completely done.\n    * k -n onyx get pvc\n    * k -n onyx delete pvc vespa-storage-da-vespa-0\n  * If you didn't disable Postgres persistence earlier, you may want to delete that PVC too.\n\n## Run as non-root user\nBy default, some onyx containers run as root. If you'd like to explicitly run the onyx containers as a non-root user, update the values.yaml file for the following components:\n  * `celery_shared`, `api`, `webserver`, `indexCapability`, `inferenceCapability`\n    ```yaml\n    securityContext:\n      runAsNonRoot: true\n      runAsUser: 1001\n    ```\n  * `vespa`\n    ```yaml\n    podSecurityContext:\n      fsGroup: 1000\n    securityContext:\n      privileged: false\n      runAsUser: 1000\n    ```\n\n## Resourcing\nIn the helm charts, we have resource suggestions for all Onyx-owned components. \nThese are simply initial suggestions, and may need to be tuned for your specific use case.\n\nPlease talk to us in Slack if you have any questions!\n\n## Autoscaling options\nThe chart renders Kubernetes HorizontalPodAutoscalers by default. To keep this behavior, leave\n`autoscaling.engine` as `hpa` and adjust the per-component `autoscaling.*` values as needed.\n\nIf you would like to use KEDA ScaledObjects instead:\n\n1. Install and manage the KEDA operator in your cluster yourself (for example via the official KEDA Helm chart). KEDA is no longer packaged as a dependency of the Onyx chart.\n2. Set `autoscaling.engine: keda` in your `values.yaml` and enable autoscaling for the components you want to scale.\n\nWhen `autoscaling.engine` is set to `keda`, the chart will render the existing ScaledObject templates; otherwise HPAs will be rendered.\n"
  },
  {
    "path": "deployment/helm/charts/onyx/.gitignore",
    "content": "### Helm ###\n# Chart dependencies\n**/charts/*.tgz\n"
  },
  {
    "path": "deployment/helm/charts/onyx/.helmignore",
    "content": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation (prefixed with !). Only one pattern per line.\n.DS_Store\n# Common VCS dirs\n.git/\n.gitignore\n.bzr/\n.bzrignore\n.hg/\n.hgignore\n.svn/\n# Common backup files\n*.swp\n*.bak\n*.tmp\n*.orig\n*~\n# Various IDEs\n.project\n.idea/\n*.tmproj\n.vscode/\n"
  },
  {
    "path": "deployment/helm/charts/onyx/Chart.yaml",
    "content": "apiVersion: v2\nname: onyx\ndescription: A Helm chart for Kubernetes\nhome: https://www.onyx.app/\nsources:\n  - \"https://github.com/onyx-dot-app/onyx\"\ntype: application\nversion: 0.4.39\nappVersion: latest\nannotations:\n  category: Productivity\n  licenses: MIT\n  images: |\n    - name: webserver\n      image: docker.io/onyxdotapp/onyx-web-server:latest\n    - name: background\n      image: docker.io/onyxdotapp/onyx-backend:latest\n    - name: vespa\n      image: vespaengine/vespa:8.609.39\ndependencies:\n  - name: cloudnative-pg\n    version: 0.26.0\n    repository: https://cloudnative-pg.github.io/charts\n    condition: postgresql.enabled\n    alias: postgresql\n  - name: vespa\n    version: 0.2.25\n    repository: https://onyx-dot-app.github.io/vespa-helm-charts\n    condition: vespa.enabled\n  - name: opensearch\n    version: 3.4.0\n    repository: https://opensearch-project.github.io/helm-charts\n    condition: opensearch.enabled\n  - name: ingress-nginx\n    version: 4.13.3\n    repository: https://kubernetes.github.io/ingress-nginx\n    condition: nginx.enabled\n    alias: nginx\n  - name: redis\n    version: 0.16.6\n    repository: https://ot-container-kit.github.io/helm-charts\n    condition: redis.enabled\n  - name: minio\n    version: 5.4.0\n    repository: https://charts.min.io/\n    condition: minio.enabled\n  - name: code-interpreter\n    version: 0.3.1\n    repository: https://onyx-dot-app.github.io/python-sandbox/\n    condition: codeInterpreter.enabled\n"
  },
  {
    "path": "deployment/helm/charts/onyx/ci/ct-values.yaml",
    "content": "# Values for chart-testing (ct lint/install)\n# This file is automatically used by ct when running lint and install commands\nauth:\n  opensearch:\n    values:\n      opensearch_admin_password: \"placeholder-OpenSearch1!\"\n  userauth:\n    values:\n      user_auth_secret: \"placeholder-for-ci-testing\"\n"
  },
  {
    "path": "deployment/helm/charts/onyx/dashboards/indexing-pipeline.json",
    "content": "{\n  \"annotations\": {\n    \"list\": [\n      {\n        \"builtIn\": 1,\n        \"datasource\": {\n          \"type\": \"grafana\",\n          \"uid\": \"-- Grafana --\"\n        },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"graphTooltip\": 1,\n  \"id\": null,\n  \"links\": [],\n  \"panels\": [\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 0\n      },\n      \"id\": 100,\n      \"panels\": [],\n      \"title\": \"At a glance\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Total number of Celery workers responding to heartbeat ping. Checked every 60 seconds from the monitoring worker.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"red\",\n                \"value\": null\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 1\n              },\n              {\n                \"color\": \"green\",\n                \"value\": 3\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 0,\n        \"y\": 1\n      },\n      \"id\": 23,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"center\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"onyx_celery_active_worker_count\",\n          \"legendFormat\": \"Active workers\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Active workers\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Total count of connectors currently in a repeated error state. Yellow at 1+, red at 5+.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 1\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 5\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 4,\n        \"y\": 1\n      },\n      \"id\": 13,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"center\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"sum(onyx_connectors_in_error_total{tenant_id=~\\\"$tenant_id\\\"})\",\n          \"legendFormat\": \"Connectors in error\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Connectors in error\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Total tasks waiting across all Celery queues right now.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 50\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 200\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 8,\n        \"y\": 1\n      },\n      \"id\": 101,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"center\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"sum(onyx_queue_depth)\",\n          \"legendFormat\": \"Queue depth\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Total queue depth\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"p95 indexing task duration across all sources in the last 5 minutes.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 30\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 90\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 12,\n        \"y\": 1\n      },\n      \"id\": 102,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"center\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"histogram_quantile(0.95, sum by (le) (rate(onyx_indexing_task_duration_seconds_bucket{tenant_id=~\\\"$tenant_id\\\"}[5m])))\",\n          \"legendFormat\": \"p95 duration\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"p95 task duration\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Percentage of indexing tasks that failed in the last 5 minutes.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 1\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 5\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 16,\n        \"y\": 1\n      },\n      \"id\": 103,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"center\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"100 * sum(rate(onyx_indexing_task_completed_total{outcome=\\\"failure\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])) / sum(rate(onyx_indexing_task_completed_total{tenant_id=~\\\"$tenant_id\\\"}[5m]))\",\n          \"legendFormat\": \"Error rate\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Task error rate\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Per-worker heartbeat status. Green = responding to ping. Red = not responding. Workers removed after 10 consecutive missed pings.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"0\": {\n                  \"color\": \"red\",\n                  \"text\": \"DOWN\"\n                }\n              },\n              \"type\": \"value\"\n            },\n            {\n              \"options\": {\n                \"1\": {\n                  \"color\": \"green\",\n                  \"text\": \"UP\"\n                }\n              },\n              \"type\": \"value\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"red\",\n                \"value\": null\n              },\n              {\n                \"color\": \"green\",\n                \"value\": 1\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 3,\n        \"w\": 4,\n        \"x\": 20,\n        \"y\": 1\n      },\n      \"id\": 24,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.4.1\",\n      \"targets\": [\n        {\n          \"expr\": \"onyx_celery_worker_up\",\n          \"legendFormat\": \"{{worker}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Worker heartbeats\",\n      \"type\": \"stat\"\n    },\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 4\n      },\n      \"id\": 25,\n      \"panels\": [],\n      \"title\": \"Connector Health\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Current indexing attempts by status and connector.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"bars\",\n            \"fillOpacity\": 50,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 5\n      },\n      \"id\": 3,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_index_attempts_active{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"}\",\n          \"legendFormat\": \"{{status}} / {{source}} / {{connector_name}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Active Index Attempts\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Distribution of all connectors by their current status.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"mappings\": []\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 6,\n        \"x\": 12,\n        \"y\": 5\n      },\n      \"id\": 11,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"right\",\n          \"showLegend\": true\n        },\n        \"pieType\": \"pie\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"sum by (status) (onyx_connectors_by_status{tenant_id=~\\\"$tenant_id\\\"})\",\n          \"legendFormat\": \"{{status}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Connectors by Status\",\n      \"type\": \"piechart\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Time since each connector last completed a successful index. Sorted by staleness.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"custom\": {\n            \"align\": \"auto\",\n            \"cellOptions\": {\n              \"type\": \"auto\"\n            },\n            \"inspect\": false\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Age\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"unit\",\n                \"value\": \"s\"\n              },\n              {\n                \"id\": \"custom.cellOptions\",\n                \"value\": {\n                  \"type\": \"color-background\"\n                }\n              },\n              {\n                \"id\": \"thresholds\",\n                \"value\": {\n                  \"mode\": \"absolute\",\n                  \"steps\": [\n                    {\n                      \"color\": \"green\",\n                      \"value\": null\n                    },\n                    {\n                      \"color\": \"yellow\",\n                      \"value\": 3600\n                    },\n                    {\n                      \"color\": \"red\",\n                      \"value\": 86400\n                    }\n                  ]\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Failed attempts\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"custom.cellOptions\",\n                \"value\": {\n                  \"type\": \"color-background\"\n                }\n              },\n              {\n                \"id\": \"thresholds\",\n                \"value\": {\n                  \"mode\": \"absolute\",\n                  \"steps\": [\n                    {\n                      \"color\": \"green\",\n                      \"value\": null\n                    },\n                    {\n                      \"color\": \"yellow\",\n                      \"value\": 1\n                    },\n                    {\n                      \"color\": \"red\",\n                      \"value\": 5\n                    }\n                  ]\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 12,\n        \"w\": 16,\n        \"x\": 0,\n        \"y\": 13\n      },\n      \"id\": 9,\n      \"options\": {\n        \"cellHeight\": \"sm\",\n        \"footer\": {\n          \"countRows\": false,\n          \"fields\": \"\",\n          \"reducer\": [\"sum\"],\n          \"show\": false\n        },\n        \"showHeader\": true\n      },\n      \"targets\": [\n        {\n          \"expr\": \"topk(20, onyx_connector_last_success_age_seconds{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"})\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"onyx_connector_error_count{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"}\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"B\"\n        },\n        {\n          \"expr\": \"onyx_connector_docs_indexed{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"}\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"C\"\n        }\n      ],\n      \"title\": \"Connector staleness \\u2014 age since last success\",\n      \"transformations\": [\n        {\n          \"id\": \"merge\",\n          \"options\": {}\n        },\n        {\n          \"id\": \"organize\",\n          \"options\": {\n            \"excludeByName\": {\n              \"Time\": true,\n              \"__name__\": true,\n              \"cc_pair_id\": true,\n              \"instance\": true,\n              \"job\": true\n            },\n            \"renameByName\": {\n              \"Value #A\": \"Age\",\n              \"Value #B\": \"Failed attempts\",\n              \"Value #C\": \"Docs indexed\",\n              \"connector_name\": \"Connector\",\n              \"source\": \"Source\",\n              \"tenant_id\": \"Tenant\"\n            }\n          }\n        },\n        {\n          \"id\": \"sortBy\",\n          \"options\": {\n            \"fields\": {},\n            \"sort\": [\n              {\n                \"desc\": true,\n                \"field\": \"Age\"\n              }\n            ]\n          }\n        }\n      ],\n      \"type\": \"table\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Connectors that have failed repeatedly and entered an error state.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"custom\": {\n            \"align\": \"auto\",\n            \"cellOptions\": {\n              \"type\": \"auto\"\n            },\n            \"inspect\": false\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 12,\n        \"w\": 8,\n        \"x\": 16,\n        \"y\": 13\n      },\n      \"id\": 10,\n      \"options\": {\n        \"cellHeight\": \"sm\",\n        \"footer\": {\n          \"countRows\": false,\n          \"fields\": \"\",\n          \"reducer\": [\"sum\"],\n          \"show\": false\n        },\n        \"showHeader\": true\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_connector_in_error_state{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"} == 1\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Connectors in error state\",\n      \"transformations\": [\n        {\n          \"id\": \"organize\",\n          \"options\": {\n            \"excludeByName\": {\n              \"Time\": true,\n              \"Value\": true,\n              \"__name__\": true,\n              \"cc_pair_id\": true,\n              \"instance\": true,\n              \"job\": true\n            },\n            \"renameByName\": {\n              \"connector_name\": \"Connector\",\n              \"source\": \"Source\",\n              \"tenant_id\": \"Tenant\"\n            }\n          }\n        }\n      ],\n      \"type\": \"table\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Total documents indexed and total failed index attempts per connector.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"custom\": {\n            \"align\": \"auto\",\n            \"cellOptions\": {\n              \"type\": \"auto\"\n            },\n            \"footer\": {\n              \"reducers\": []\n            },\n            \"inspect\": false\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Failed Attempts\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"thresholds\",\n                \"value\": {\n                  \"mode\": \"absolute\",\n                  \"steps\": [\n                    {\n                      \"color\": \"green\",\n                      \"value\": 0\n                    },\n                    {\n                      \"color\": \"yellow\",\n                      \"value\": 1\n                    },\n                    {\n                      \"color\": \"red\",\n                      \"value\": 5\n                    }\n                  ]\n                }\n              },\n              {\n                \"id\": \"custom.cellOptions\",\n                \"value\": {\n                  \"type\": \"color-background\"\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 10,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 25\n      },\n      \"id\": 15,\n      \"options\": {\n        \"cellHeight\": \"sm\",\n        \"showHeader\": true\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_connector_docs_indexed{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"}\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"onyx_connector_error_count{tenant_id=~\\\"$tenant_id\\\", source=~\\\"$source\\\", connector_name=~\\\"$connector_name\\\"}\",\n          \"format\": \"table\",\n          \"instant\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Docs Indexed & Failed Attempts per Connector\",\n      \"transformations\": [\n        {\n          \"id\": \"merge\",\n          \"options\": {}\n        },\n        {\n          \"id\": \"organize\",\n          \"options\": {\n            \"excludeByName\": {\n              \"Time\": true,\n              \"__name__\": true,\n              \"cc_pair_id\": true,\n              \"instance\": true,\n              \"job\": true,\n              \"tenant_id\": true\n            },\n            \"renameByName\": {\n              \"Value #A\": \"Docs Indexed\",\n              \"Value #B\": \"Failed Attempts\",\n              \"connector_name\": \"Connector\",\n              \"source\": \"Source\"\n            }\n          }\n        },\n        {\n          \"id\": \"sortBy\",\n          \"options\": {\n            \"fields\": {},\n            \"sort\": [\n              {\n                \"desc\": true,\n                \"field\": \"Failed Attempts\"\n              }\n            ]\n          }\n        }\n      ],\n      \"type\": \"table\"\n    },\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 35\n      },\n      \"id\": 26,\n      \"panels\": [],\n      \"title\": \"Indexing Pipeline\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Rate of completed indexing tasks per minute stacked by source. Successes and failures are separate series.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"bars\",\n            \"fillOpacity\": 80,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byFrameRefID\",\n              \"options\": \"B\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"red\",\n                  \"mode\": \"fixed\"\n                }\n              },\n              {\n                \"id\": \"custom.fillOpacity\",\n                \"value\": 70\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 14,\n        \"x\": 0,\n        \"y\": 36\n      },\n      \"id\": 5,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"sum\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": true,\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"sum by (source) (rate(onyx_indexing_task_completed_total{outcome=\\\"success\\\", source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])) * 60\",\n          \"legendFormat\": \"{{source}} (success)\",\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"sum by (source) (rate(onyx_indexing_task_completed_total{outcome=\\\"failure\\\", source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])) * 60\",\n          \"legendFormat\": \"{{source}} (failure)\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Indexing throughput \\u2014 success + failures stacked\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"p95 as solid line, p50 as dashed line per source.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 8,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 10,\n        \"x\": 14,\n        \"y\": 36\n      },\n      \"id\": 6,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"mean\", \"max\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"histogram_quantile(0.95, sum by (source, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])))\",\n          \"legendFormat\": \"p95 {{source}}\",\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"histogram_quantile(0.50, sum by (source, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])))\",\n          \"legendFormat\": \"p50 {{source}}\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Task latency \\u2014 p95 (solid) + p50 (dashed) per source\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Completed indexing tasks per minute broken down by individual connector and outcome.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 14,\n        \"x\": 0,\n        \"y\": 44\n      },\n      \"id\": 7,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"sum by (source, connector_name, outcome) (rate(onyx_indexing_task_completed_total{source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\", connector_name=~\\\"$connector_name\\\"}[5m])) * 60\",\n          \"legendFormat\": \"{{source}} / {{connector_name}} ({{outcome}})\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Throughput by connector\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"95th percentile indexing task duration per individual connector.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 5,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 10,\n        \"x\": 14,\n        \"y\": 44\n      },\n      \"id\": 8,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"histogram_quantile(0.95, sum by (source, connector_name, le) (rate(onyx_indexing_task_duration_seconds_bucket{source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\", connector_name=~\\\"$connector_name\\\"}[5m])))\",\n          \"legendFormat\": \"p95 {{source}} / {{connector_name}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"p95 duration by connector\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Distribution of task execution times.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"custom\": {\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"scaleDistribution\": {\n              \"log\": 2,\n              \"type\": \"log\"\n            }\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 14,\n        \"x\": 0,\n        \"y\": 52\n      },\n      \"id\": 22,\n      \"options\": {\n        \"calculate\": false,\n        \"cellGap\": 1,\n        \"color\": {\n          \"exponent\": 0.5,\n          \"fill\": \"dark-orange\",\n          \"mode\": \"scheme\",\n          \"reverse\": false,\n          \"scale\": \"exponential\",\n          \"scheme\": \"Oranges\",\n          \"steps\": 64\n        },\n        \"filterValues\": {\n          \"le\": 1e-9\n        },\n        \"legend\": {\n          \"show\": true\n        },\n        \"rowsFrame\": {\n          \"layout\": \"auto\"\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"show\": true,\n          \"showColorScale\": false,\n          \"yHistogram\": true\n        },\n        \"yAxis\": {\n          \"axisPlacement\": \"left\",\n          \"reverse\": false,\n          \"unit\": \"s\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"sum(increase(onyx_indexing_task_duration_seconds_bucket{source=~\\\"$source\\\", tenant_id=~\\\"$tenant_id\\\"}[5m])) by (le)\",\n          \"format\": \"heatmap\",\n          \"legendFormat\": \"{{le}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Task runtime heatmap\",\n      \"type\": \"heatmap\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Rate of task retries, revocations, and rejections per minute. Should be near zero.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 10,\n        \"x\": 14,\n        \"y\": 52\n      },\n      \"id\": 21,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"noValue\": \"No events (healthy)\",\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"rate(onyx_celery_task_retried_total[5m]) * 60\",\n          \"legendFormat\": \"retry: {{task_name}}\",\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"rate(onyx_celery_task_revoked_total[5m]) * 60\",\n          \"legendFormat\": \"revoked: {{task_name}}\",\n          \"refId\": \"B\"\n        },\n        {\n          \"expr\": \"rate(onyx_celery_task_rejected_total[5m]) * 60\",\n          \"legendFormat\": \"rejected: {{task_name}}\",\n          \"refId\": \"C\"\n        }\n      ],\n      \"title\": \"Error events \\u2014 retries / revocations / rejections\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 60\n      },\n      \"id\": 27,\n      \"panels\": [],\n      \"title\": \"Queue Infrastructure\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Number of tasks waiting in each Celery queue.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 14,\n        \"x\": 0,\n        \"y\": 61\n      },\n      \"id\": 1,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\", \"max\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": true,\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_queue_depth\",\n          \"legendFormat\": \"{{queue}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Queue depth\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Rate of change of queue depth per minute.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 10\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 100\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 10,\n        \"x\": 14,\n        \"y\": 61\n      },\n      \"id\": 16,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"deriv(onyx_queue_depth[5m]) * 60\",\n          \"legendFormat\": \"{{queue}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Queue growth rate (tasks/min)\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Age of the oldest waiting task in each non-empty queue.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 60\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 300\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 6,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 69\n      },\n      \"id\": 17,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"noValue\": \"No queued tasks (healthy)\",\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_queue_oldest_task_age_seconds > 0\",\n          \"legendFormat\": \"{{queue}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Oldest task age per queue\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 75\n      },\n      \"id\": 28,\n      \"panels\": [],\n      \"title\": \"Redis & Workers\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Redis memory consumption over time. Peak shown as a dashed line.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Peak\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"custom.lineStyle\",\n                \"value\": {\n                  \"dash\": [4, 4],\n                  \"fill\": \"dash\"\n                }\n              },\n              {\n                \"id\": \"custom.lineWidth\",\n                \"value\": 1\n              },\n              {\n                \"id\": \"custom.fillOpacity\",\n                \"value\": 0\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 0,\n        \"y\": 76\n      },\n      \"id\": 18,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_redis_memory_used_bytes\",\n          \"legendFormat\": \"Used\",\n          \"refId\": \"A\"\n        },\n        {\n          \"expr\": \"onyx_redis_memory_peak_bytes\",\n          \"legendFormat\": \"Peak\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Redis memory \\u2014 used vs peak\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Ratio of OS-allocated memory to Redis-used memory. >1.5 = significant fragmentation. <1.0 = Redis is swapping to disk (critical).\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [],\n          \"max\": 3,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"red\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"green\",\n                \"value\": 1\n              },\n              {\n                \"color\": \"yellow\",\n                \"value\": 1.5\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 2\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 4,\n        \"x\": 8,\n        \"y\": 76\n      },\n      \"id\": 19,\n      \"options\": {\n        \"minVizHeight\": 75,\n        \"minVizWidth\": 75,\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": true,\n        \"showThresholdMarkers\": true,\n        \"sizing\": \"auto\"\n      },\n      \"targets\": [\n        {\n          \"expr\": \"max(last_over_time(onyx_redis_memory_fragmentation_ratio[2m]))\",\n          \"legendFormat\": \"Fragmentation\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Redis fragmentation ratio\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Number of active Redis client connections. A steadily rising count indicates a connection leak.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"lineWidth\": 2\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 6,\n        \"x\": 12,\n        \"y\": 76\n      },\n      \"id\": 20,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\"last\"],\n          \"displayMode\": \"table\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"hideZeros\": false,\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"expr\": \"onyx_redis_connected_clients\",\n          \"legendFormat\": \"Clients\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Redis connected clients\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"Health of each Prometheus scrape target. Green = UP. Red = DOWN.\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"0\": {\n                  \"color\": \"red\",\n                  \"text\": \"DOWN\"\n                }\n              },\n              \"type\": \"value\"\n            },\n            {\n              \"options\": {\n                \"1\": {\n                  \"color\": \"green\",\n                  \"text\": \"UP\"\n                }\n              },\n              \"type\": \"value\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"red\",\n                \"value\": 0\n              },\n              {\n                \"color\": \"green\",\n                \"value\": 1\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 6,\n        \"x\": 18,\n        \"y\": 76\n      },\n      \"id\": 14,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"auto\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\"lastNotNull\"],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"value_and_name\",\n        \"wideLayout\": true\n      },\n      \"targets\": [\n        {\n          \"expr\": \"up{job=~\\\"onyx-.*\\\"}\",\n          \"legendFormat\": \"{{job}}\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Scrape target health\",\n      \"type\": \"stat\"\n    }\n  ],\n  \"refresh\": \"10s\",\n  \"schemaVersion\": 39,\n  \"tags\": [\"onyx\", \"indexing\", \"prometheus\"],\n  \"templating\": {\n    \"list\": [\n      {\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"multi\": false,\n        \"name\": \"DS_PROMETHEUS\",\n        \"options\": [],\n        \"query\": \"prometheus\",\n        \"refresh\": 1,\n        \"type\": \"datasource\"\n      },\n      {\n        \"allValue\": \".*\",\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"${DS_PROMETHEUS}\"\n        },\n        \"definition\": \"label_values(onyx_connector_last_success_age_seconds, source)\",\n        \"hide\": 0,\n        \"includeAll\": true,\n        \"multi\": true,\n        \"name\": \"source\",\n        \"options\": [],\n        \"query\": \"label_values(onyx_connector_last_success_age_seconds, source)\",\n        \"refresh\": 1,\n        \"type\": \"query\"\n      },\n      {\n        \"allValue\": \".*\",\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"${DS_PROMETHEUS}\"\n        },\n        \"definition\": \"label_values(onyx_connector_last_success_age_seconds, connector_name)\",\n        \"hide\": 0,\n        \"includeAll\": true,\n        \"multi\": true,\n        \"name\": \"connector_name\",\n        \"options\": [],\n        \"query\": \"label_values(onyx_connector_last_success_age_seconds, connector_name)\",\n        \"refresh\": 1,\n        \"type\": \"query\"\n      },\n      {\n        \"allValue\": \".*\",\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"${DS_PROMETHEUS}\"\n        },\n        \"definition\": \"label_values(onyx_connector_last_success_age_seconds, tenant_id)\",\n        \"hide\": 0,\n        \"includeAll\": true,\n        \"multi\": true,\n        \"name\": \"tenant_id\",\n        \"options\": [],\n        \"query\": \"label_values(onyx_connector_last_success_age_seconds, tenant_id)\",\n        \"refresh\": 1,\n        \"type\": \"query\"\n      }\n    ]\n  },\n  \"time\": {\n    \"from\": \"now-3h\",\n    \"to\": \"now\"\n  },\n  \"timepicker\": {},\n  \"timezone\": \"browser\",\n  \"title\": \"Onyx Indexing Pipeline\",\n  \"uid\": \"onyx-indexing-pipeline\",\n  \"version\": 1,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/_helpers.tpl",
    "content": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"onyx.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCreate a default fully qualified app name.\nWe truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).\nIf release name contains chart name it will be used as a full name.\n*/}}\n{{- define \"onyx.fullname\" -}}\n{{- if .Values.fullnameOverride }}\n{{- .Values.fullnameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- $name := default .Chart.Name .Values.nameOverride }}\n{{- if contains $name .Release.Name }}\n{{- .Release.Name | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n{{- end }}\n{{- end }}\n\n{{/*\nCreate chart name and version as used by the chart label.\n*/}}\n{{- define \"onyx.chart\" -}}\n{{- printf \"%s-%s\" .Chart.Name .Chart.Version | replace \"+\" \"_\" | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCommon labels\n*/}}\n{{- define \"onyx.labels\" -}}\nhelm.sh/chart: {{ include \"onyx.chart\" . }}\n{{ include \"onyx.selectorLabels\" . }}\n{{- if .Chart.AppVersion }}\napp.kubernetes.io/version: {{ .Chart.AppVersion | quote }}\n{{- end }}\napp.kubernetes.io/managed-by: {{ .Release.Service }}\n{{- end }}\n\n{{/*\nSelector labels\n*/}}\n{{- define \"onyx.selectorLabels\" -}}\napp.kubernetes.io/name: {{ include \"onyx.name\" . }}\napp.kubernetes.io/instance: {{ .Release.Name }}\n{{- end }}\n\n{{/*\nCreate the name of the service account to use\n*/}}\n{{- define \"onyx.serviceAccountName\" -}}\n{{- if .Values.serviceAccount.create }}\n{{- default (include \"onyx.fullname\" .) .Values.serviceAccount.name }}\n{{- else }}\n{{- default \"default\" .Values.serviceAccount.name }}\n{{- end }}\n{{- end }}\n\n{{/*\nSet secret name\n*/}}\n{{- define \"onyx.secretName\" -}}\n{{- default .secretName .existingSecret }}\n{{- end }}\n\n{{/*\nCreate env vars from secrets\n*/}}\n{{- define \"onyx.envSecrets\" -}}\n    {{- range $secretSuffix, $secretContent := .Values.auth }}\n    {{- if and (ne $secretContent.enabled false) ($secretContent.secretKeys) }}\n    {{- range $name, $key := $secretContent.secretKeys }}\n- name: {{ $name | upper | replace \"-\" \"_\" | quote }}\n  valueFrom:\n    secretKeyRef:\n      name: {{ include \"onyx.secretName\" $secretContent }}\n      key: {{ default $name $key }}\n    {{- end }}\n    {{- end }}\n    {{- end }}\n{{- end }}\n\n{{/*\nHelpers for mounting a psql convenience script into pods.\n*/}}\n{{- define \"onyx.pgInto.enabled\" -}}\n{{- if and .Values.tooling .Values.tooling.pgInto .Values.tooling.pgInto.enabled }}true{{- end }}\n{{- end }}\n\n{{- define \"onyx.pgInto.configMapName\" -}}\n{{- printf \"%s-pginto\" (include \"onyx.fullname\" .) -}}\n{{- end }}\n\n{{- define \"onyx.pgInto.checksumAnnotation\" -}}\n{{- if (include \"onyx.pgInto.enabled\" .) }}\nchecksum/pginto: {{ include (print $.Template.BasePath \"/tooling-pginto-configmap.yaml\") . | sha256sum }}\n{{- end }}\n{{- end }}\n\n{{- define \"onyx.pgInto.volumeMount\" -}}\n{{- if (include \"onyx.pgInto.enabled\" .) }}\n- name: pginto-script\n  mountPath: {{ default \"/usr/local/bin/pginto\" .Values.tooling.pgInto.mountPath }}\n  subPath: pginto\n  readOnly: true\n{{- end }}\n{{- end }}\n\n{{- define \"onyx.pgInto.volume\" -}}\n{{- if (include \"onyx.pgInto.enabled\" .) }}\n- name: pginto-script\n  configMap:\n    name: {{ include \"onyx.pgInto.configMapName\" . }}\n    defaultMode: 0755\n{{- end }}\n{{- end }}\n\n{{- define \"onyx.renderVolumeMounts\" -}}\n{{- $pginto := include \"onyx.pgInto.volumeMount\" .ctx -}}\n{{- $existing := .volumeMounts -}}\n{{- if or $pginto $existing -}}\nvolumeMounts:\n{{- if $pginto }}\n{{ $pginto | nindent 2 }}\n{{- end }}\n{{- if $existing }}\n{{ toYaml $existing | nindent 2 }}\n{{- end }}\n{{- end -}}\n{{- end }}\n\n{{- define \"onyx.renderVolumes\" -}}\n{{- $pginto := include \"onyx.pgInto.volume\" .ctx -}}\n{{- $existing := .volumes -}}\n{{- if or $pginto $existing -}}\nvolumes:\n{{- if $pginto }}\n{{ $pginto | nindent 2 }}\n{{- end }}\n{{- if $existing }}\n{{ toYaml $existing | nindent 2 }}\n{{- end }}\n{{- end -}}\n{{- end }}\n\n{{/*\nReturn the configured autoscaling engine; defaults to HPA when unset.\n*/}}\n{{- define \"onyx.autoscaling.engine\" -}}\n{{- $engine := default \"hpa\" .Values.autoscaling.engine -}}\n{{- $engine | lower -}}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/api-deployment.yaml",
    "content": "{{- if gt (int .Values.api.replicaCount) 0 }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-api-server\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.api.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.api.autoscaling.enabled }}\n  replicas: {{ .Values.api.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.api.deploymentLabels }}\n      {{- toYaml .Values.api.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n        {{- $pgIntoChecksum := include \"onyx.pgInto.checksumAnnotation\" . }}\n        {{- if $pgIntoChecksum }}\n        {{- $pgIntoChecksum | nindent 8 }}\n        {{- end }}\n      {{- with .Values.api.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.api.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.api.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.api.podSecurityContext | nindent 8 }}\n      {{- with .Values.api.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.api.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.api.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: api-server\n          securityContext:\n            {{- toYaml .Values.api.securityContext | nindent 12 }}\n          image: \"{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            - \"/bin/sh\"\n            - \"-c\"\n            - |\n              alembic upgrade head &&\n              echo \"Starting Onyx Api Server\" &&\n              uvicorn onyx.main:app --host {{ .Values.global.host }} --port {{ .Values.api.containerPorts.server }}\n          ports:\n            - name: api-server-port\n              containerPort: {{ .Values.api.containerPorts.server }}\n              protocol: TCP\n          resources:\n            {{- toYaml .Values.api.resources | nindent 12 }}\n          {{- with .Values.api.startupProbe }}\n          startupProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          {{- with .Values.api.readinessProbe }}\n          readinessProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          {{- with .Values.api.livenessProbe }}\n          livenessProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- $apiVolumeMounts := include \"onyx.renderVolumeMounts\" (dict \"ctx\" . \"volumeMounts\" .Values.api.volumeMounts) }}\n          {{- if $apiVolumeMounts }}\n          {{- $apiVolumeMounts | nindent 10 }}\n          {{- end }}\n      {{- $apiVolumes := include \"onyx.renderVolumes\" (dict \"ctx\" . \"volumes\" .Values.api.volumes) }}\n      {{- if $apiVolumes }}\n      {{- $apiVolumes | nindent 6 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/api-hpa.yaml",
    "content": "{{- if and (.Values.api.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-api\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}\n  minReplicas: {{ .Values.api.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.api.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/api-scaledobject.yaml",
    "content": "{{- if and (.Values.api.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-api\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-api-server\n  minReplicaCount: {{ .Values.api.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.api.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.api.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.api.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.api.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.api.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.api.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.api.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.api.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.api.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.api.autoscaling.customTriggers }}\n    {{- toYaml .Values.api.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/api-service.yaml",
    "content": "apiVersion: v1\nkind: Service\nmetadata:\n  # INTERNAL_URL env variable depends on this, don't change without changing INTERNAL_URL\n  name: {{ include \"onyx.fullname\" . }}-api-service\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.api.deploymentLabels }}\n    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}\n    {{- end }}\nspec:\n  type: {{ .Values.api.service.type }}\n  ports:\n    - port: {{ .Values.api.service.servicePort }}\n      targetPort: {{ .Values.api.service.targetPort }}\n      protocol: TCP\n      name: {{ .Values.api.service.portName }}\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.api.deploymentLabels }}\n    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}\n    {{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/auth-secrets.yaml",
    "content": "{{- if hasKey .Values.auth \"secretKeys\" }}\n{{- fail \"ERROR: Secrets handling has been refactored under 'auth' and must be updated before upgrading to this chart version.\" }}\n{{- end }}\n{{- range $secretKey, $secretContent := .Values.auth }}\n{{- if and (empty $secretContent.existingSecret) (or (not (hasKey $secretContent \"enabled\")) $secretContent.enabled) }}\n{{- $secretName := include \"onyx.secretName\" $secretContent }}\n{{- $existingSecret := lookup \"v1\" \"Secret\" $.Release.Namespace $secretName }}\n{{- /* Pre-validate: fail before emitting YAML if any required value is missing */ -}}\n{{- range $name, $value := $secretContent.values }}\n{{- if and (empty $value) (not (and $existingSecret (hasKey $existingSecret.data $name))) }}\n{{- fail (printf \"Secret value for '%s' is required but not set and no existing secret found. Please set auth.%s.values.%s in values.yaml\" $name $secretKey $name) }}\n{{- end }}\n{{- end }}\n---\napiVersion: v1\nkind: Secret\nmetadata:\n  name: {{ $secretName }}\ntype: Opaque\nstringData:\n{{- range $name, $value := $secretContent.values }}\n{{- if not (empty $value) }}\n  {{ $name }}: {{ $value | quote }}\n{{- else if and $existingSecret (hasKey $existingSecret.data $name) }}\n  {{ $name }}: {{ index $existingSecret.data $name | b64dec | quote }}\n{{- end }}\n{{- end }}\n{{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-beat.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_beat.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-beat\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_beat.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  replicas: {{ .Values.celery_beat.replicaCount }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_beat.deploymentLabels }}\n      {{- toYaml .Values.celery_beat.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_beat.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_beat.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_beat.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_beat.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_beat.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_beat.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-beat\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.beat\",\n              \"beat\",\n              {{ printf \"--loglevel=%s\" .Values.celery_beat.logLevel | quote }},\n            ]\n          resources:\n            {{- toYaml .Values.celery_beat.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_beat.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_beat_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_beat_liveness.txt\n      {{- with .Values.celery_beat.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docfetching-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docfetching.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  minReplicas: {{ .Values.celery_worker_docfetching.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_docfetching.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docfetching-metrics-service.yaml",
    "content": "{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).\n       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}\n{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docfetching.replicaCount) 0) }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching-metrics\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_docfetching.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 4 }}\n    {{- end }}\n    metrics: \"true\"\nspec:\n  type: ClusterIP\n  ports:\n    - port: 9092\n      targetPort: metrics\n      protocol: TCP\n      name: metrics\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_docfetching.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docfetching-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docfetching.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  minReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.maxReplicas | default 20 }}\n  pollingInterval: {{ .Values.celery_worker_docfetching.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_docfetching.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_docfetching.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_docfetching.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_docfetching.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_docfetching.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_docfetching.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metadata:\n        type: Utilization\n        value: \"{{ .Values.celery_worker_docfetching.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metadata:\n        type: Utilization\n        value: \"{{ .Values.celery_worker_docfetching.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_docfetching.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_docfetching.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docfetching.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docfetching.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_docfetching.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_docfetching.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_docfetching.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_docfetching.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_docfetching.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_docfetching.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_docfetching.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_docfetching.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_docfetching.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_docfetching.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_docfetching.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-docfetching\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.docfetching\",\n              \"worker\",\n              \"--pool=threads\",\n              \"--concurrency=2\",\n              \"--prefetch-multiplier=1\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_docfetching.logLevel | quote }},\n              \"--hostname=docfetching@%n\",\n              \"-Q\",\n              \"connector_doc_fetching\",\n            ]\n          ports:\n            - name: metrics\n              containerPort: 9092\n              protocol: TCP\n          resources:\n            {{- toYaml .Values.celery_worker_docfetching.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_docfetching.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_docfetching_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_docfetching_liveness.txt\n      {{- with .Values.celery_worker_docfetching.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docprocessing-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docprocessing.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  minReplicas: {{ .Values.celery_worker_docprocessing.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_docprocessing.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docprocessing-metrics-service.yaml",
    "content": "{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).\n       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}\n{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docprocessing.replicaCount) 0) }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing-metrics\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_docprocessing.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 4 }}\n    {{- end }}\n    metrics: \"true\"\nspec:\n  type: ClusterIP\n  ports:\n    - port: 9093\n      targetPort: metrics\n      protocol: TCP\n      name: metrics\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_docprocessing.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docprocessing-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_docprocessing.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  minReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_docprocessing.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_docprocessing.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_docprocessing.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_docprocessing.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_docprocessing.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_docprocessing.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_docprocessing.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_docprocessing.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_docprocessing.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_docprocessing.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_docprocessing.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-docprocessing.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_docprocessing.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_docprocessing.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_docprocessing.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_docprocessing.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_docprocessing.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_docprocessing.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_docprocessing.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_docprocessing.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_docprocessing.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_docprocessing.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_docprocessing.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_docprocessing.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-docprocessing\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.docprocessing\",\n              \"worker\",\n              \"--pool=threads\",\n              \"--concurrency=6\",\n              \"--prefetch-multiplier=1\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_docprocessing.logLevel | quote }},\n              \"--hostname=docprocessing@%n\",\n              \"-Q\",\n              \"docprocessing\",\n            ]\n          ports:\n            - name: metrics\n              containerPort: 9093\n              protocol: TCP\n          resources:\n            {{- toYaml .Values.celery_worker_docprocessing.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            - name: ENABLE_MULTIPASS_INDEXING\n              value: \"{{ .Values.celery_worker_docprocessing.enableMiniChunk }}\"\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_docprocessing.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_docprocessing_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_docprocessing_liveness.txt\n      {{- with .Values.celery_worker_docprocessing.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-heavy-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_heavy.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-heavy\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-heavy\n  minReplicas: {{ .Values.celery_worker_heavy.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_heavy.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-heavy-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_heavy.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-heavy\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-heavy\n  minReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_heavy.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_heavy.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_heavy.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_heavy.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_heavy.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_heavy.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_heavy.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_heavy.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_heavy.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_heavy.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_heavy.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-heavy.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_heavy.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-heavy\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_heavy.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_heavy.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_heavy.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_heavy.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_heavy.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_heavy.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_heavy.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_heavy.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_heavy.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_heavy.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_heavy.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-heavy\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.heavy\",\n              \"worker\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_heavy.logLevel | quote }},\n              \"--hostname=heavy@%n\",\n              \"-Q\",\n              \"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox\",\n            ]\n          resources:\n            {{- toYaml .Values.celery_worker_heavy.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_heavy.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_heavy_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_heavy_liveness.txt\n      {{- with .Values.celery_worker_heavy.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-light-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_light.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-light\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-light\n  minReplicas: {{ .Values.celery_worker_light.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_light.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-light-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_light.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-light\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-light\n  minReplicaCount: {{ .Values.celery_worker_light.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_light.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_light.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_light.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_light.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_light.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_light.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_light.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_light.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_light.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_light.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_light.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_light.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-light.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_light.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-light\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_light.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_light.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_light.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_light.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_light.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_light.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_light.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_light.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_light.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_light.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_light.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-light\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.light\",\n              \"worker\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_light.logLevel | quote }},\n              \"--hostname=light@%n\",\n              \"-Q\",\n              \"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,opensearch_migration\",\n            ]\n          resources:\n            {{- toYaml .Values.celery_worker_light.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_light.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_light_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_light_liveness.txt\n      {{- with .Values.celery_worker_light.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-monitoring-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_monitoring.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  minReplicas: {{ .Values.celery_worker_monitoring.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_monitoring.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-monitoring-metrics-service.yaml",
    "content": "{{- /* Metrics port must match the default in metrics_server.py (_DEFAULT_PORTS).\n       Do NOT use PROMETHEUS_METRICS_PORT env var in Helm — each worker needs its own port. */ -}}\n{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_monitoring.replicaCount) 0) }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring-metrics\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_monitoring.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 4 }}\n    {{- end }}\n    metrics: \"true\"\nspec:\n  type: ClusterIP\n  ports:\n    - port: 9096\n      targetPort: metrics\n      protocol: TCP\n      name: metrics\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.celery_worker_monitoring.deploymentLabels }}\n    {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-monitoring-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_monitoring.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  minReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_monitoring.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_monitoring.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_monitoring.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_monitoring.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_monitoring.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_monitoring.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_monitoring.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_monitoring.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_monitoring.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_monitoring.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_monitoring.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-monitoring.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_monitoring.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_monitoring.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_monitoring.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_monitoring.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_monitoring.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_monitoring.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_monitoring.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_monitoring.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_monitoring.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_monitoring.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_monitoring.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_monitoring.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-monitoring\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.monitoring\",\n              \"worker\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_monitoring.logLevel | quote }},\n              \"--hostname=monitoring@%n\",\n              \"-Q\",\n              \"monitoring\",\n            ]\n          ports:\n            - name: metrics\n              containerPort: 9096\n              protocol: TCP\n          resources:\n            {{- toYaml .Values.celery_worker_monitoring.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_monitoring.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_monitoring_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_monitoring_liveness.txt\n      {{- with .Values.celery_worker_monitoring.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-primary-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_primary.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-primary\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-primary\n  minReplicas: {{ .Values.celery_worker_primary.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_primary.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-primary-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_primary.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-primary\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-primary\n  minReplicaCount: {{ .Values.celery_worker_primary.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_primary.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_primary.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_primary.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_primary.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_primary.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_primary.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_primary.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_primary.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_primary.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_primary.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_primary.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_primary.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-primary.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_primary.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-primary\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_primary.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_primary.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_primary.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_primary.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_primary.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_primary.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_primary.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_primary.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_primary.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_primary.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_primary.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-primary\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.primary\",\n              \"worker\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_primary.logLevel | quote }},\n              \"--hostname=primary@%n\",\n              \"-Q\",\n              \"celery,periodic_tasks\",\n            ]\n          resources:\n            {{- toYaml .Values.celery_worker_primary.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_primary.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_primary_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_primary_liveness.txt\n      {{- with .Values.celery_worker_primary.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-servicemonitors.yaml",
    "content": "{{- if and .Values.monitoring.serviceMonitors.enabled .Values.vectorDB.enabled }}\n{{- if gt (int .Values.celery_worker_monitoring.replicaCount) 0 }}\n---\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-monitoring\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.monitoring.serviceMonitors.labels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  namespaceSelector:\n    matchNames:\n      - {{ .Release.Namespace }}\n  selector:\n    matchLabels:\n      app: {{ .Values.celery_worker_monitoring.deploymentLabels.app }}\n      metrics: \"true\"\n  endpoints:\n    - port: metrics\n      path: /metrics\n      interval: 30s\n      scrapeTimeout: 10s\n{{- end }}\n{{- if gt (int .Values.celery_worker_docfetching.replicaCount) 0 }}\n---\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docfetching\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.monitoring.serviceMonitors.labels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  namespaceSelector:\n    matchNames:\n      - {{ .Release.Namespace }}\n  selector:\n    matchLabels:\n      app: {{ .Values.celery_worker_docfetching.deploymentLabels.app }}\n      metrics: \"true\"\n  endpoints:\n    - port: metrics\n      path: /metrics\n      interval: 30s\n      scrapeTimeout: 10s\n{{- end }}\n{{- if gt (int .Values.celery_worker_docprocessing.replicaCount) 0 }}\n---\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-docprocessing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.monitoring.serviceMonitors.labels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  namespaceSelector:\n    matchNames:\n      - {{ .Release.Namespace }}\n  selector:\n    matchLabels:\n      app: {{ .Values.celery_worker_docprocessing.deploymentLabels.app }}\n      metrics: \"true\"\n  endpoints:\n    - port: metrics\n      path: /metrics\n      interval: 30s\n      scrapeTimeout: 10s\n{{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-user-file-processing-hpa.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_user_file_processing.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-user-file-processing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-user-file-processing\n  minReplicas: {{ .Values.celery_worker_user_file_processing.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.celery_worker_user_file_processing.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-user-file-processing-scaledobject.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (.Values.celery_worker_user_file_processing.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-user-file-processing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-celery-worker-user-file-processing\n  minReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.minReplicas | default 1 }}\n  maxReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.maxReplicas | default 10 }}\n  pollingInterval: {{ .Values.celery_worker_user_file_processing.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.celery_worker_user_file_processing.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.celery_worker_user_file_processing.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.celery_worker_user_file_processing.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.celery_worker_user_file_processing.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.celery_worker_user_file_processing.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.celery_worker_user_file_processing.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_user_file_processing.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.celery_worker_user_file_processing.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.celery_worker_user_file_processing.autoscaling.customTriggers }}\n    {{- toYaml .Values.celery_worker_user_file_processing.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/celery-worker-user-file-processing.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.celery_worker_user_file_processing.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-celery-worker-user-file-processing\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.celery_worker_user_file_processing.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.celery_worker_user_file_processing.autoscaling.enabled }}\n  replicas: {{ .Values.celery_worker_user_file_processing.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.celery_worker_user_file_processing.deploymentLabels }}\n      {{- toYaml .Values.celery_worker_user_file_processing.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.celery_worker_user_file_processing.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.celery_worker_user_file_processing.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.celery_worker_user_file_processing.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.celery_shared.podSecurityContext | nindent 8 }}\n      {{- with .Values.celery_worker_user_file_processing.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_user_file_processing.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.celery_worker_user_file_processing.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: celery-worker-user-file-processing\n          securityContext:\n            {{- toYaml .Values.celery_shared.securityContext | nindent 12 }}\n          image: \"{{ .Values.celery_shared.image.repository }}:{{ .Values.celery_shared.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command:\n            [\n              \"celery\",\n              \"-A\",\n              \"onyx.background.celery.versioned_apps.user_file_processing\",\n              \"worker\",\n              {{ printf \"--loglevel=%s\" .Values.celery_worker_user_file_processing.logLevel | quote }},\n              \"--hostname=user-file-processing@%n\",\n              \"-Q\",\n              \"user_file_processing,user_file_project_sync,user_file_delete\",\n            ]\n          resources:\n            {{- toYaml .Values.celery_worker_user_file_processing.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.celery_worker_user_file_processing.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          startupProbe:\n            {{ .Values.celery_shared.startupProbe | toYaml | nindent 12}}\n          readinessProbe:\n            {{ .Values.celery_shared.readinessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe readiness\n                    --filename /tmp/onyx_k8s_userfileprocessing_readiness.txt\n          livenessProbe:\n            {{ .Values.celery_shared.livenessProbe | toYaml | nindent 12}}\n            exec:\n              command:\n                - /bin/bash\n                - -c\n                - >\n                    python onyx/background/celery/celery_k8s_probe.py\n                    --probe liveness\n                    --filename /tmp/onyx_k8s_userfileprocessing_liveness.txt\n      {{- with .Values.celery_worker_user_file_processing.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/configmap.yaml",
    "content": "apiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: {{ .Values.config.envConfigMapName }}\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\ndata:\n  INTERNAL_URL: \"http://{{ include \"onyx.fullname\" . }}-api-service:{{ .Values.api.service.servicePort | default 8080 }}\"\n  {{- if .Values.postgresql.enabled }}\n  POSTGRES_HOST: {{ .Release.Name }}-{{ default \"postgresql\" .Values.postgresql.nameOverride }}-rw\n  {{- end }}\n  {{- if .Values.vespa.enabled }}\n  VESPA_HOST: {{ .Values.vespa.name }}.{{ .Values.vespa.service.name }}.{{ .Release.Namespace }}.svc.cluster.local\n  {{- end }}\n  {{- if .Values.opensearch.enabled }}\n  OPENSEARCH_HOST: {{ .Values.opensearch.clusterName }}-{{ .Values.opensearch.nodeGroup }}.{{ .Release.Namespace }}.svc.cluster.local\n  OPENSEARCH_REST_API_PORT: \"9200\"\n  ENABLE_OPENSEARCH_INDEXING_FOR_ONYX: \"true\"\n  ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX: \"false\"\n  {{- end }}\n  {{- if .Values.redis.enabled }}\n  REDIS_HOST: {{ .Values.redis.redisStandalone.name | default .Release.Name }}-master\n  {{- end }}\n  MODEL_SERVER_HOST: \"{{ include \"onyx.fullname\" . }}-inference-model-service\"\n  {{- if .Values.vectorDB.enabled }}\n  INDEXING_MODEL_SERVER_HOST: \"{{ include \"onyx.fullname\" . }}-indexing-model-service\"\n  DISABLE_VECTOR_DB: \"false\"\n  {{- else }}\n  DISABLE_VECTOR_DB: \"true\"\n  {{- end }}\n{{- range $key, $value := .Values.configMap }}\n{{- if not (empty $value) }}\n  {{ $key }}: \"{{ $value }}\"\n{{- end }}\n{{- end }}\n  {{- if .Values.minio.enabled }}\n  S3_ENDPOINT_URL: \"http://{{ .Release.Name }}-minio:{{ default 9000 .Values.minio.service.port }}\"\n  {{- end }}\n  {{- if .Values.codeInterpreter.enabled }}\n  CODE_INTERPRETER_BASE_URL: \"http://{{ .Release.Name }}-code-interpreter:{{ .Values.codeInterpreter.service.port }}\"\n  {{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/discordbot.yaml",
    "content": "{{- if .Values.discordbot.enabled }}\n# Discord bot MUST run as a single replica - Discord only allows one client connection per bot token.\n# Do NOT enable HPA or increase replicas. Message processing is offloaded to scalable API pods via HTTP.\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-discordbot\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.discordbot.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  # CRITICAL: Discord bots cannot be horizontally scaled - only one WebSocket connection per token is allowed\n  replicas: 1\n  strategy:\n    type: Recreate  # Ensure old pod is terminated before new one starts to avoid duplicate connections\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.discordbot.deploymentLabels }}\n      {{- toYaml .Values.discordbot.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.discordbot.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.discordbot.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.discordbot.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.discordbot.podSecurityContext | nindent 8 }}\n      {{- with .Values.discordbot.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.discordbot.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.discordbot.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: discordbot\n          securityContext:\n            {{- toYaml .Values.discordbot.securityContext | nindent 12 }}\n          image: \"{{ .Values.discordbot.image.repository }}:{{ .Values.discordbot.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command: [\"python\", \"onyx/onyxbot/discord/client.py\"]\n          resources:\n            {{- toYaml .Values.discordbot.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n            # Discord bot token - required for bot to connect\n            {{- if .Values.discordbot.botToken }}\n            - name: DISCORD_BOT_TOKEN\n              value: {{ .Values.discordbot.botToken | quote }}\n            {{- end }}\n            {{- if .Values.discordbot.botTokenSecretName }}\n            - name: DISCORD_BOT_TOKEN\n              valueFrom:\n                secretKeyRef:\n                  name: {{ .Values.discordbot.botTokenSecretName }}\n                  key: {{ .Values.discordbot.botTokenSecretKey | default \"token\" }}\n            {{- end }}\n            # Command prefix for bot commands (default: \"!\")\n            {{- if .Values.discordbot.invokeChar }}\n            - name: DISCORD_BOT_INVOKE_CHAR\n              value: {{ .Values.discordbot.invokeChar | quote }}\n            {{- end }}\n          {{- with .Values.discordbot.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n      {{- with .Values.discordbot.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/grafana-dashboards.yaml",
    "content": "{{- if .Values.monitoring.grafana.dashboards.enabled }}\n---\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-indexing-pipeline-dashboard\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    grafana_dashboard: \"1\"\n  annotations:\n    grafana_folder: \"Onyx\"\ndata:\n  onyx-indexing-pipeline.json: |\n    {{- .Files.Get \"dashboards/indexing-pipeline.json\" | nindent 4 }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/indexing-model-deployment.yaml",
    "content": "{{- if and .Values.vectorDB.enabled (gt (int .Values.indexCapability.replicaCount) 0) }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-indexing-model\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.indexCapability.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  replicas: {{ .Values.indexCapability.replicaCount }}\n  {{- with .Values.indexCapability.strategy }}\n  strategy:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.indexCapability.deploymentLabels }}\n      {{- toYaml .Values.indexCapability.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.indexCapability.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.indexCapability.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.indexCapability.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- if .Values.indexCapability.podSecurityContext }}\n      securityContext:\n        {{- toYaml .Values.indexCapability.podSecurityContext | nindent 8 }}\n      {{- end }}\n      {{- with .Values.indexCapability.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.indexCapability.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.indexCapability.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n      - name: {{ .Values.indexCapability.name }}\n        image: \"{{ .Values.indexCapability.image.repository }}:{{ .Values.indexCapability.image.tag | default .Values.global.version }}\"\n        imagePullPolicy: {{ .Values.global.pullPolicy }}\n        command: [ \"uvicorn\", \"model_server.main:app\", \"--host\", \"{{ .Values.global.host }}\", \"--port\", \"{{ .Values.indexCapability.containerPorts.server }}\", \"--limit-concurrency\", \"{{ .Values.indexCapability.limitConcurrency }}\" ]\n        ports:\n        - name: model-server\n          containerPort: {{ .Values.indexCapability.containerPorts.server }}\n          protocol: TCP\n        envFrom:\n          - configMapRef:\n              name: {{ .Values.config.envConfigMapName }}\n        env:\n          - name: INDEXING_ONLY\n            value: \"{{ default \"True\" .Values.indexCapability.indexingOnly }}\"\n          {{- include \"onyx.envSecrets\" . | nindent 10}}\n        {{- if .Values.indexCapability.securityContext }}\n        securityContext:\n          {{- toYaml .Values.indexCapability.securityContext | nindent 10 }}\n        {{- end }}\n        {{- if .Values.indexCapability.resources }}\n        resources:\n          {{- toYaml .Values.indexCapability.resources | nindent 10 }}\n        {{- end }}\n        {{- with .Values.indexCapability.startupProbe }}\n        startupProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n        {{- with .Values.indexCapability.readinessProbe }}\n        readinessProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n        {{- with .Values.indexCapability.livenessProbe }}\n        livenessProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/indexing-model-service.yaml",
    "content": "{{- if .Values.vectorDB.enabled }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-indexing-model-service\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.indexCapability.deploymentLabels }}\n    {{- toYaml .Values.indexCapability.deploymentLabels | nindent 4 }}\n    {{- end }}\n  ports:\n    - name: {{ .Values.indexCapability.service.portName }}\n      protocol: TCP\n      port: {{ .Values.indexCapability.service.servicePort  }}\n      targetPort: {{ .Values.indexCapability.service.targetPort }}\n  type: {{ .Values.indexCapability.service.type }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/inference-model-deployment.yaml",
    "content": "{{- if gt (int .Values.inferenceCapability.replicaCount) 0 }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-inference-model\n  labels:\n    {{- range .Values.inferenceCapability.labels }}\n    {{ .key }}: {{ .value }}\n    {{- end }}\nspec:\n  replicas: {{ .Values.inferenceCapability.replicaCount }}\n  {{- with .Values.inferenceCapability.strategy }}\n  strategy:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- range .Values.inferenceCapability.labels }}\n      {{ .key }}: {{ .value }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      labels:\n        {{- range .Values.inferenceCapability.podLabels }}\n        {{ .key }}: {{ .value }}\n        {{- end }}\n    spec:\n      {{- if .Values.inferenceCapability.podSecurityContext }}\n      securityContext:\n        {{- toYaml .Values.inferenceCapability.podSecurityContext | nindent 8 }}\n      {{- end }}\n      {{- with .Values.inferenceCapability.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.inferenceCapability.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.inferenceCapability.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n      - name: model-server-inference\n        image: \"{{ .Values.inferenceCapability.image.repository }}:{{ .Values.inferenceCapability.image.tag | default .Values.global.version }}\"\n        imagePullPolicy: {{ .Values.global.pullPolicy }}\n        command: [ \"uvicorn\", \"model_server.main:app\", \"--host\", \"{{ .Values.global.host }}\", \"--port\", \"{{ .Values.inferenceCapability.containerPorts.server }}\" ]\n        ports:\n        - name: model-server\n          containerPort: {{ .Values.inferenceCapability.containerPorts.server }}\n          protocol: TCP\n        envFrom:\n        - configMapRef:\n            name: {{ .Values.config.envConfigMapName }}\n        env:\n          {{- include \"onyx.envSecrets\" . | nindent 12}}\n        {{- if .Values.inferenceCapability.securityContext }}\n        securityContext:\n          {{- toYaml .Values.inferenceCapability.securityContext | nindent 10 }}\n        {{- end }}\n        {{- if .Values.inferenceCapability.resources }}\n        resources:\n          {{- toYaml .Values.inferenceCapability.resources | nindent 10 }}\n        {{- end }}\n        {{- with .Values.inferenceCapability.startupProbe }}\n        startupProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n        {{- with .Values.inferenceCapability.readinessProbe }}\n        readinessProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n        {{- with .Values.inferenceCapability.livenessProbe }}\n        livenessProbe:\n          {{- toYaml . | nindent 10 }}\n        {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/inference-model-service.yaml",
    "content": "apiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-inference-model-service\nspec:\n  type: {{ .Values.inferenceCapability.service.type }}\n  ports:\n    - port: {{ .Values.inferenceCapability.service.servicePort}}\n      targetPort: {{ .Values.inferenceCapability.service.targetPort }}\n      protocol: TCP\n      name: {{ .Values.inferenceCapability.service.portName }}\n  selector:\n    {{- range .Values.inferenceCapability.labels }}\n    {{ .key }}: {{ .value }}\n    {{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/ingress-api.yaml",
    "content": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-ingress-api\n  annotations:\n    {{- if not .Values.ingress.className }}\n    kubernetes.io/ingress.class: nginx\n    {{- end }}\n    nginx.ingress.kubernetes.io/rewrite-target: /$2\n    nginx.ingress.kubernetes.io/use-regex: \"true\"\n    cert-manager.io/cluster-issuer: {{ include \"onyx.fullname\" . }}-letsencrypt\nspec:\n  {{- if .Values.ingress.className }}\n  ingressClassName: {{ .Values.ingress.className }}\n  {{- end }}\n  rules:\n    - host: {{ .Values.ingress.api.host }}\n      http:\n        paths:\n          - path: /api(/|$)(.*)\n            pathType: ImplementationSpecific\n            backend:\n              service:\n                name: {{ include \"onyx.fullname\" . }}-api-service\n                port:\n                  number: {{ .Values.api.service.servicePort }}\n  tls:\n    - hosts:\n        - {{ .Values.ingress.api.host }}\n      secretName: {{ include \"onyx.fullname\" . }}-ingress-api-tls\n{{- end }}"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/ingress-mcp.yaml",
    "content": "{{- if and .Values.ingress.enabled .Values.mcpServer.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-ingress-mcp\n  annotations:\n    {{- if not .Values.ingress.className }}\n    kubernetes.io/ingress.class: nginx\n    {{- end }}\n    nginx.ingress.kubernetes.io/rewrite-target: /$2\n    nginx.ingress.kubernetes.io/use-regex: \"true\"\n    cert-manager.io/cluster-issuer: {{ include \"onyx.fullname\" . }}-letsencrypt\nspec:\n  {{- if .Values.ingress.className }}\n  ingressClassName: {{ .Values.ingress.className }}\n  {{- end }}\n  rules:\n    - host: {{ .Values.ingress.api.host }}\n      http:\n        paths:\n          - path: /mcp(/|$)(.*)\n            pathType: ImplementationSpecific\n            backend:\n              service:\n                name: {{ include \"onyx.fullname\" . }}-mcp-server-service\n                port:\n                  number: {{ .Values.mcpServer.service.servicePort }}\n  tls:\n    - hosts:\n        - {{ .Values.ingress.api.host }}\n      secretName: {{ include \"onyx.fullname\" . }}-ingress-mcp-tls\n{{- end }}\n\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/ingress-webserver.yaml",
    "content": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-ingress-webserver\n  annotations:\n    {{- if not .Values.ingress.className }}\n    kubernetes.io/ingress.class: nginx\n    {{- end }}\n    cert-manager.io/cluster-issuer: {{ include \"onyx.fullname\" . }}-letsencrypt\n    kubernetes.io/tls-acme: \"true\"\nspec:\n  {{- if .Values.ingress.className }}\n  ingressClassName: {{ .Values.ingress.className }}\n  {{- end }}\n  rules:\n    - host: {{ .Values.ingress.webserver.host }}\n      http:\n        paths:\n          - path: /\n            pathType: Prefix\n            backend:\n              service:\n                name: {{ include \"onyx.fullname\" . }}-webserver\n                port:\n                  number: {{ .Values.webserver.service.servicePort }}\n  tls:\n    - hosts:\n        - {{ .Values.ingress.webserver.host }}\n      secretName: {{ include \"onyx.fullname\" . }}-ingress-webserver-tls\n{{- end }}"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/lets-encrypt.yaml",
    "content": "{{- if .Values.letsencrypt.enabled -}}\napiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-letsencrypt\nspec:\n  acme:\n    # The ACME server URL\n    server: https://acme-v02.api.letsencrypt.org/directory\n    # Email address used for ACME registration\n    email: {{ .Values.letsencrypt.email }}\n    # Name of a secret used to store the ACME account private key\n    privateKeySecretRef:\n      name: {{ include \"onyx.fullname\" . }}-letsencrypt\n    # Enable the HTTP-01 challenge provider\n    solvers:\n      - http01:\n          ingress:\n            class: nginx\n{{- end }}"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/mcp-server-deployment.yaml",
    "content": "{{- if .Values.mcpServer.enabled }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-mcp-server\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.mcpServer.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  replicas: {{ .Values.mcpServer.replicaCount }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.mcpServer.deploymentLabels }}\n      {{- toYaml .Values.mcpServer.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.mcpServer.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.mcpServer.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.mcpServer.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.mcpServer.podSecurityContext | nindent 8 }}\n      {{- with .Values.mcpServer.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.mcpServer.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.mcpServer.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: mcp-server\n          securityContext:\n            {{- toYaml .Values.mcpServer.securityContext | nindent 12 }}\n          image: \"{{ .Values.mcpServer.image.repository }}:{{ .Values.mcpServer.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command: [\"python\", \"onyx/mcp_server_main.py\"]\n          ports:\n            - name: mcp-server-port\n              containerPort: {{ .Values.mcpServer.containerPorts.server }}\n              protocol: TCP\n          livenessProbe:\n            httpGet:\n              path: /health\n              port: mcp-server-port\n            initialDelaySeconds: {{ .Values.mcpServer.livenessProbe.initialDelaySeconds }}\n            periodSeconds: {{ .Values.mcpServer.livenessProbe.periodSeconds }}\n            timeoutSeconds: {{ .Values.mcpServer.livenessProbe.timeoutSeconds }}\n            failureThreshold: {{ .Values.mcpServer.livenessProbe.failureThreshold }}\n          readinessProbe:\n            httpGet:\n              path: /health\n              port: mcp-server-port\n            initialDelaySeconds: {{ .Values.mcpServer.readinessProbe.initialDelaySeconds }}\n            periodSeconds: {{ .Values.mcpServer.readinessProbe.periodSeconds }}\n            timeoutSeconds: {{ .Values.mcpServer.readinessProbe.timeoutSeconds }}\n            failureThreshold: {{ .Values.mcpServer.readinessProbe.failureThreshold }}\n          resources:\n            {{- toYaml .Values.mcpServer.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            - name: MCP_SERVER_ENABLED\n              value: \"true\"\n            - name: MCP_SERVER_PORT\n              value: \"{{ .Values.mcpServer.containerPorts.server }}\"\n            - name: MCP_SERVER_HOST\n              value: \"{{ .Values.global.host }}\"\n            {{- if .Values.mcpServer.corsOrigins }}\n            - name: MCP_SERVER_CORS_ORIGINS\n              value: \"{{ .Values.mcpServer.corsOrigins }}\"\n            {{- end }}\n            # API server connection for authentication and proxying\n            # Uses full override variable to set the port instead of using default 8080\n            - name: API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS\n              value: \"http://{{ include \"onyx.fullname\" . }}-api-service:{{ .Values.api.service.servicePort }}\"\n            {{- include \"onyx.envSecrets\" . | nindent 12 }}\n          {{- with .Values.mcpServer.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n      {{- with .Values.mcpServer.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/mcp-server-service.yaml",
    "content": "{{- if .Values.mcpServer.enabled }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-mcp-server-service\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.mcpServer.deploymentLabels }}\n    {{- toYaml .Values.mcpServer.deploymentLabels | nindent 4 }}\n    {{- end }}\nspec:\n  type: {{ .Values.mcpServer.service.type }}\n  ports:\n    - port: {{ .Values.mcpServer.service.servicePort }}\n      targetPort: {{ .Values.mcpServer.service.targetPort }}\n      protocol: TCP\n      name: {{ .Values.mcpServer.service.portName }}\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.mcpServer.deploymentLabels }}\n    {{- toYaml .Values.mcpServer.deploymentLabels | nindent 4 }}\n    {{- end }}\n{{- end }}\n\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/nginx-conf.yaml",
    "content": "###############################################################################\n# NOTE: If you make changes to this file, increment the following in values.yaml\n# before running `helm upgrade` to trigger an automatic nginx restart:\n#\n#   nginx.controller.podAnnotations:\n#     onyx.app/nginx-config-version: \"<new_version>\"\n#\n# Otherwise, changes won't apply until you manually restart the nginx pods.\n###############################################################################\n\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: onyx-nginx-conf\ndata:\n  upstreams.conf: |\n    upstream api_server {\n        server {{ include \"onyx.fullname\" . }}-api-service:{{ .Values.api.service.servicePort }} fail_timeout=0;\n    }\n\n    upstream web_server {\n        server {{ include \"onyx.fullname\" . }}-webserver:{{ .Values.webserver.service.servicePort }} fail_timeout=0;\n    }\n    {{- if .Values.mcpServer.enabled }}\n\n    upstream mcp_server {\n        server {{ include \"onyx.fullname\" . }}-mcp-server-service:{{ .Values.mcpServer.service.servicePort }} fail_timeout=0;\n    }\n    {{- end }}\n\n    # WebSocket support: only set Connection \"upgrade\" for actual upgrade requests\n    map $http_upgrade $connection_upgrade {\n        default upgrade;\n        ''      close;\n    }\n\n  server.conf: |\n    server {\n        listen 1024;\n        server_name $$DOMAIN;\n\n        client_max_body_size 5G;\n        {{- if .Values.mcpServer.enabled }}\n\n        # MCP Server - Model Context Protocol for LLM integrations\n        # Match /mcp, /mcp/, or /mcp/* but NOT /mcpserver, /mcpapi, etc.\n        location ~ ^/mcp(/.*)?$ {\n            rewrite ^/mcp(/.*)$ $1 break;\n            rewrite ^/mcp/?$ / break;\n            proxy_set_header X-Real-IP $remote_addr;\n            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n            proxy_set_header X-Forwarded-Proto $scheme;\n            proxy_set_header X-Forwarded-Host $host;\n            proxy_set_header Host $host;\n            proxy_http_version 1.1;\n            proxy_buffering off;\n            proxy_redirect off;\n            # timeout settings\n            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;\n            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;\n            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;\n            proxy_pass http://mcp_server;\n        }\n        {{- end }}\n\n        location ~ ^/scim(/.*)?$ {\n            proxy_set_header X-Real-IP $remote_addr;\n            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n            proxy_set_header X-Forwarded-Proto $scheme;\n            proxy_set_header X-Forwarded-Host $host;\n            proxy_set_header Host $host;\n            proxy_http_version 1.1;\n            proxy_buffering off;\n            proxy_redirect off;\n            # timeout settings\n            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;\n            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;\n            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;\n            proxy_pass http://api_server;\n        }\n\n        location ~ ^/(api|openapi\\.json)(/.*)?$ {\n            rewrite ^/api(/.*)$ $1 break;\n            proxy_set_header X-Real-IP $remote_addr;\n            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n            proxy_set_header X-Forwarded-Proto $scheme;\n            proxy_set_header X-Forwarded-Host $host;\n            proxy_set_header Host $host;\n            proxy_http_version 1.1;\n            proxy_set_header Upgrade $http_upgrade;\n            proxy_set_header Connection $connection_upgrade;\n            proxy_buffering off;\n            proxy_redirect off;\n            # timeout settings\n            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;\n            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;\n            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;\n            proxy_pass http://api_server;\n        }\n\n        location / {\n            proxy_set_header X-Real-IP $remote_addr;\n            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n            proxy_set_header X-Forwarded-Proto $scheme;\n            proxy_set_header X-Forwarded-Host $host;\n            proxy_set_header Host $host;\n            proxy_http_version 1.1;\n            proxy_redirect off;\n            # timeout settings\n            proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;\n            proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;\n            proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;\n            proxy_pass http://web_server;\n        }\n    }\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/serviceaccount.yaml",
    "content": "{{- if .Values.serviceAccount.create -}}\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: {{ include \"onyx.serviceAccountName\" . }}\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n  {{- with .Values.serviceAccount.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\nautomountServiceAccountToken: {{ .Values.serviceAccount.automount }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/slackbot.yaml",
    "content": "{{- if .Values.slackbot.enabled }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-slackbot\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.slackbot.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.slackbot.deploymentLabels }}\n      {{- toYaml .Values.slackbot.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.slackbot.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.slackbot.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.slackbot.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.slackbot.podSecurityContext | nindent 8 }}\n      {{- with .Values.slackbot.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.slackbot.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.slackbot.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: slackbot\n          securityContext:\n            {{- toYaml .Values.slackbot.securityContext | nindent 12 }}\n          image: \"{{ .Values.slackbot.image.repository }}:{{ .Values.slackbot.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          command: [\"python\", \"onyx/onyxbot/slack/listener.py\"]\n          resources:\n            {{- toYaml .Values.slackbot.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.slackbot.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n      {{- with .Values.slackbot.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/tests/test-connection.yaml",
    "content": "{{- if gt (int .Values.webserver.replicaCount) 0 }}\napiVersion: v1\nkind: Pod\nmetadata:\n  name: \"{{ include \"onyx.fullname\" . }}-test-connection\"\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n  annotations:\n    \"helm.sh/hook\": test\n    \"helm.sh/hook-delete-policy\": hook-succeeded,before-hook-creation\nspec:\n  containers:\n    - name: curl\n      image: curlimages/curl:8.10.1\n      command:\n        - /bin/sh\n        - -c\n      args:\n        - |\n          SVC=\"{{ include \"onyx.fullname\" . }}-webserver\"\n          PORT=\"{{ .Values.webserver.service.servicePort }}\"\n          URL=\"http://${SVC}:${PORT}/\"\n          for i in $(seq 1 40); do\n            echo \"Attempt $i: curl ${URL}\"\n            # Treat any successful TCP/HTTP response as success (even 5xx).\n            # curl exits 0 on HTTP 4xx/5xx if -f is not used; non-zero indicates connection error.\n            if curl --connect-timeout 3 --max-time 5 -sS -o /dev/null \"$${URL}\"; then\n              echo \"Connection succeeded\"\n              exit 0\n            fi\n            sleep 10\n          done\n          echo \"Service not reachable after 40 attempts\"\n          exit 1\n  restartPolicy: Never\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/tooling-pginto-configmap.yaml",
    "content": "{{- if (include \"onyx.pgInto.enabled\" .) }}\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: {{ include \"onyx.pgInto.configMapName\" . }}\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\ndata:\n  pginto: |\n    #!/usr/bin/env sh\n    set -eu\n\n    HOST=\"${PGINTO_HOST:-${POSTGRES_HOST:-localhost}}\"\n    PORT=\"${POSTGRES_PORT:-5432}\"\n    USER=\"${POSTGRES_USER:-postgres}\"\n    DB=\"${POSTGRES_DB:-postgres}\"\n    PSQL_BIN=\"${PGINTO_PSQL_BIN:-{{ default \"psql\" .Values.tooling.pgInto.psqlBinary }}}\"\n    USE_IAM=\"$(printf '%s' \"${USE_IAM_AUTH:-false}\" | tr '[:upper:]' '[:lower:]')\"\n\n    if ! command -v \"${PSQL_BIN}\" >/dev/null 2>&1; then\n      echo \"psql client '${PSQL_BIN}' not found in PATH\" >&2\n      exit 1\n    fi\n\n    if [ \"${USE_IAM}\" = \"true\" ]; then\n      REGION=\"${AWS_REGION:-${AWS_DEFAULT_REGION:-${AWS_REGION_NAME:-}}}\"\n      if [ -z \"${REGION}\" ]; then\n        REGION=\"$(printf \"%s\\n\" \"${HOST}\" | sed -n 's/.*\\.\\([a-z0-9-]*\\)\\.rds\\.amazonaws\\.com.*/\\1/p')\"\n      fi\n      if [ -z \"${REGION}\" ]; then\n        echo \"USE_IAM_AUTH is true but AWS region is not set (AWS_REGION/AWS_DEFAULT_REGION/AWS_REGION_NAME)\" >&2\n        exit 1\n      fi\n      PY_BIN=\"$(command -v python3 || command -v python || true)\"\n      if [ -z \"${PY_BIN}\" ]; then\n        echo \"python is required to generate RDS IAM auth token\" >&2\n        exit 1\n      fi\n      if [ -z \"${PGSSLMODE:-}\" ]; then\n        export PGSSLMODE=require\n      fi\n      PGPASSWORD=\"$(\"${PY_BIN}\" -c 'import sys,boto3; host,port,user,region=sys.argv[1:]; port_int=int(port); token=boto3.client(\"rds\", region_name=region).generate_db_auth_token(DBHostname=host, Port=port_int, DBUsername=user); sys.stdout.write(token)' \"${HOST}\" \"${PORT}\" \"${USER}\" \"${REGION}\")\"\n      if [ -z \"${PGPASSWORD}\" ]; then\n        echo \"failed to generate IAM auth token\" >&2\n        exit 1\n      fi\n      export PGPASSWORD\n    else\n      if [ -z \"${PGPASSWORD:-}\" ] && [ -n \"${POSTGRES_PASSWORD:-}\" ]; then\n        export PGPASSWORD=\"${POSTGRES_PASSWORD}\"\n      fi\n\n      if [ -z \"${PGPASSWORD:-}\" ]; then\n        printf \"Postgres password: \" >&2\n        if command -v stty >/dev/null 2>&1; then\n          stty -echo || true\n        fi\n        if ! read -r PGPASSWORD; then\n          echo \"failed to read password\" >&2\n          exit 1\n        fi\n        if command -v stty >/dev/null 2>&1; then\n          stty echo || true\n        fi\n        printf \"\\n\" >&2\n        export PGPASSWORD\n      fi\n    fi\n\n    if [ -n \"${POSTGRES_SSLMODE:-}\" ] && [ -z \"${PGSSLMODE:-}\" ]; then\n      export PGSSLMODE=\"${POSTGRES_SSLMODE}\"\n    fi\n\n    echo \"Connecting to ${DB} on ${HOST}:${PORT} as ${USER}\"\n    exec \"${PSQL_BIN}\" -h \"${HOST}\" -p \"${PORT}\" -U \"${USER}\" \"${DB}\" \"$@\"\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/webserver-deployment.yaml",
    "content": "{{- if gt (int .Values.webserver.replicaCount) 0 }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-web-server\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- with .Values.webserver.deploymentLabels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\nspec:\n  {{- if not .Values.webserver.autoscaling.enabled }}\n  replicas: {{ .Values.webserver.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.webserver.deploymentLabels }}\n      {{- toYaml .Values.webserver.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      annotations:\n        checksum/config: {{ include (print $.Template.BasePath \"/configmap.yaml\") . | sha256sum }}\n      {{- with .Values.webserver.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.webserver.deploymentLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n        {{- with .Values.webserver.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.webserver.podSecurityContext | nindent 8 }}\n      {{- with .Values.webserver.nodeSelector }}\n      nodeSelector:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.webserver.affinity }}\n      affinity:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      {{- with .Values.webserver.tolerations }}\n      tolerations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      containers:\n        - name: web-server\n          securityContext:\n            {{- toYaml .Values.webserver.securityContext | nindent 12 }}\n          image: \"{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Values.global.version }}\"\n          imagePullPolicy: {{ .Values.global.pullPolicy }}\n          ports:\n            - name: http\n              containerPort: {{ .Values.webserver.containerPorts.server }}\n              protocol: TCP\n          resources:\n            {{- toYaml .Values.webserver.resources | nindent 12 }}\n          {{- with .Values.webserver.startupProbe }}\n          startupProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          {{- with .Values.webserver.readinessProbe }}\n          readinessProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          {{- with .Values.webserver.livenessProbe }}\n          livenessProbe:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n          {{- with .Values.webserver.volumeMounts }}\n          volumeMounts:\n            {{- toYaml . | nindent 12 }}\n          {{- end }}\n      {{- with .Values.webserver.volumes }}\n      volumes:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/webserver-hpa.yaml",
    "content": "{{- if and (.Values.webserver.autoscaling.enabled) (ne (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-webserver\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}\n  minReplicas: {{ .Values.webserver.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.webserver.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/webserver-scaledobject.yaml",
    "content": "{{- if and (.Values.webserver.autoscaling.enabled) (eq (include \"onyx.autoscaling.engine\" .) \"keda\") }}\napiVersion: keda.sh/v1alpha1\nkind: ScaledObject\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-web-server\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}-web-server\n  minReplicaCount: {{ .Values.webserver.autoscaling.minReplicas }}\n  maxReplicaCount: {{ .Values.webserver.autoscaling.maxReplicas }}\n  pollingInterval: {{ .Values.webserver.autoscaling.pollingInterval | default 30 }}\n  cooldownPeriod: {{ .Values.webserver.autoscaling.cooldownPeriod | default 300 }}\n  {{- if hasKey .Values.webserver.autoscaling \"idleReplicaCount\" }}\n  idleReplicaCount: {{ .Values.webserver.autoscaling.idleReplicaCount }}\n  {{- end }}\n  {{- if .Values.webserver.autoscaling.customTriggers }}\n  fallback:\n    failureThreshold: {{ .Values.webserver.autoscaling.failureThreshold | default 3 }}\n    replicas: {{ .Values.webserver.autoscaling.fallbackReplicas | default 1 }}\n  {{- end }}\n  triggers:\n    {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}\n    - type: cpu\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: memory\n      metricType: Utilization\n      metadata:\n        value: \"{{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}\"\n    {{- end }}\n    {{- if .Values.webserver.autoscaling.customTriggers }}\n    {{- toYaml .Values.webserver.autoscaling.customTriggers | nindent 4 }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates/webserver-service.yaml",
    "content": "apiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-webserver\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\n    {{- if .Values.webserver.deploymentLabels }}\n    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}\n    {{- end }}\nspec:\n  type: {{ .Values.webserver.service.type }}\n  ports:\n    - port: {{ .Values.webserver.service.servicePort }}\n      targetPort: {{ .Values.webserver.service.targetPort }}\n      protocol: TCP\n      name: http\n  selector:\n    {{- include \"onyx.selectorLabels\" . | nindent 4 }}\n    {{- if .Values.webserver.deploymentLabels }}\n    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}\n    {{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates_disabled/background-deployment.yaml",
    "content": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-background\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  {{- if not .Values.background.autoscaling.enabled }}\n  replicas: {{ .Values.background.replicaCount }}\n  {{- end }}\n  selector:\n    matchLabels:\n      {{- include \"onyx.selectorLabels\" . | nindent 6 }}\n      {{- if .Values.background.deploymentLabels }}\n      {{- toYaml .Values.background.deploymentLabels | nindent 6 }}\n      {{- end }}\n  template:\n    metadata:\n      {{- with .Values.background.podAnnotations }}\n      annotations:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      labels:\n        {{- include \"onyx.labels\" . | nindent 8 }}\n        {{- with .Values.background.podLabels }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- with .Values.imagePullSecrets }}\n      imagePullSecrets:\n        {{- toYaml . | nindent 8 }}\n      {{- end }}\n      serviceAccountName: {{ include \"onyx.serviceAccountName\" . }}\n      securityContext:\n        {{- toYaml .Values.background.podSecurityContext | nindent 8 }}\n      containers:\n        - name: background\n          securityContext:\n            {{- toYaml .Values.background.securityContext | nindent 12 }}\n          image: \"{{ .Values.background.image.repository }}:{{ .Values.background.image.tag | default .Chart.AppVersion }}\"\n          imagePullPolicy: {{ .Values.background.image.pullPolicy }}\n          command: [\"/usr/bin/supervisord\"]\n          resources:\n            {{- toYaml .Values.background.resources | nindent 12 }}\n          envFrom:\n            - configMapRef:\n                name: {{ .Values.config.envConfigMapName }}\n          env:\n            - name: ENABLE_MULTIPASS_INDEXING\n              value: \"{{ .Values.background.enableMiniChunk }}\"\n            {{- include \"onyx.envSecrets\" . | nindent 12}}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates_disabled/background-hpa.yaml",
    "content": "{{- if .Values.background.autoscaling.enabled }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: {{ include \"onyx.fullname\" . }}-background\n  labels:\n    {{- include \"onyx.labels\" . | nindent 4 }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: {{ include \"onyx.fullname\" . }}\n  minReplicas: {{ .Values.background.autoscaling.minReplicas }}\n  maxReplicas: {{ .Values.background.autoscaling.maxReplicas }}\n  metrics:\n    {{- if .Values.background.autoscaling.targetCPUUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.background.autoscaling.targetCPUUtilizationPercentage }}\n    {{- end }}\n    {{- if .Values.background.autoscaling.targetMemoryUtilizationPercentage }}\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: {{ .Values.background.autoscaling.targetMemoryUtilizationPercentage }}\n    {{- end }}\n{{- end }}\n"
  },
  {
    "path": "deployment/helm/charts/onyx/templates_disabled/onyx-secret.yaml",
    "content": "{{- if not .Values.auth.existingSecret -}}\napiVersion: v1\nkind: Secret\nmetadata:\n  name: {{ include \"onyx.secretName\" . }}\ntype: Opaque\nstringData:\n  {{- range $name, $value := .Values.auth.secrets }}\n  {{ $name }}: {{ $value | quote }}\n  {{- end }}\n{{- end }}"
  },
  {
    "path": "deployment/helm/charts/onyx/values-lite.yaml",
    "content": "# =============================================================================\n# ONYX LITE — MINIMAL DEPLOYMENT VALUES\n# =============================================================================\n# Minimal Onyx deployment: no vector database, no Redis, no model servers.\n# Only PostgreSQL is required. Connectors and RAG search are disabled, but the\n# core chat experience (LLM conversations, tools, user file uploads, Projects,\n# Agent knowledge) still works.\n#\n# Usage:\n#   helm install onyx ./deployment/helm/charts/onyx \\\n#     -f ./deployment/helm/charts/onyx/values-lite.yaml\n#\n# Or merged with your own overrides:\n#   helm install onyx ./deployment/helm/charts/onyx \\\n#     -f ./deployment/helm/charts/onyx/values-lite.yaml \\\n#     -f my-overrides.yaml\n# =============================================================================\n\nvectorDB:\n  enabled: false\n\nvespa:\n  enabled: false\n\nredis:\n  enabled: false\n\nconfigMap:\n  CACHE_BACKEND: \"postgres\"\n  AUTH_BACKEND: \"postgres\"\n  FILE_STORE_BACKEND: \"postgres\"\n"
  },
  {
    "path": "deployment/helm/charts/onyx/values.yaml",
    "content": "# Default values for onyx.\n# This is a YAML-formatted file.\n# Declare variables to be passed into your templates.\n\nglobal:\n  # Global version for all Onyx components (overrides .Chart.AppVersion)\n  version: \"latest\"\n  # Global pull policy for all Onyx component images\n  pullPolicy: \"IfNotPresent\"\n  # Host for all Onyx components\n  host: \"0.0.0.0\"\n\npostgresql:\n  enabled: true\n  # IMPORTANT: This nameOverride is required for the CloudNativePG operator to find itself.\n  # The operator looks for a deployment with label app.kubernetes.io/name=cloudnative-pg,\n  # but since the subchart is aliased as \"postgresql\", Helm defaults to that name.\n  nameOverride: cloudnative-pg\n  cluster:\n    instances: 1\n    storage:\n      storageClass: \"\"\n      size: 10Gi\n    enableSuperuserAccess: true\n    superuserSecret:\n      name: onyx-postgresql  # keep in sync with auth.postgresql\n\n# -- Master toggle for vector database support. When false:\n#   - Sets DISABLE_VECTOR_DB=true on all backend pods\n#   - Skips the indexing model server deployment (embeddings not needed)\n#   - Skips ALL celery worker deployments (beat, primary, light, heavy,\n#     monitoring, user-file-processing, docprocessing, docfetching) — the\n#     API server handles background work via FastAPI BackgroundTasks\n#   - You should also set vespa.enabled=false and opensearch.enabled=false\n#     to prevent those subcharts from deploying\nvectorDB:\n  enabled: true\n\nvespa:\n  name: da-vespa-0\n  service:\n    name: vespa-service\n  volumeClaimTemplates:\n    - metadata:\n        name: vespa-storage\n      spec:\n        accessModes:\n          - ReadWriteOnce\n        resources:\n          requests:\n            storage: 30Gi\n        storageClassName: \"\"\n  enabled: true\n  replicaCount: 1\n  image:\n    repository: vespa\n    tag: \"8.609.39\"\n  podAnnotations: {}\n  podLabels:\n    app: vespa\n    app.kubernetes.io/instance: onyx\n    app.kubernetes.io/name: vespa\n  securityContext:\n    privileged: true\n    runAsUser: 0\n  resources:\n    # The Vespa Helm chart specifies default resources, which are quite modest. We override\n    # them here to increase chances of the chart running successfully. If you plan to index at\n    # scale, you will likely need to increase these limits further.\n    # At large scale, it is recommended to use a dedicated Vespa cluster / Vespa cloud.\n    requests:\n      cpu: 4000m\n      memory: 8000Mi\n    limits:\n      cpu: 8000m\n      memory: 32000Mi\n\nopensearch:\n  # Enabled by default. Override to false and set the appropriate env vars in\n  # the instance-specific values yaml if using AWS-managed OpenSearch, or simply\n  # override to false to entirely disable.\n  enabled: true\n  # These values are passed to the opensearch subchart.\n  # See https://github.com/opensearch-project/helm-charts/blob/main/charts/opensearch/values.yaml\n\n  singleNode: true  # Forces replicas=1, sets discovery.type=single-node\n\n  # Determines service DNS: onyx-opensearch-master.<namespace>.svc.cluster.local\n  clusterName: \"onyx-opensearch\"\n  nodeGroup: \"master\"\n  masterService: \"onyx-opensearch-master\"\n\n  replicas: 1\n\n  image:\n    repository: \"opensearchproject/opensearch\"\n    tag: \"\"  # Empty uses chart's appVersion (3.4.0).\n\n  # The security plugin requires OPENSEARCH_INITIAL_ADMIN_PASSWORD for\n  # OpenSearch 2.12+.\n  # See https://docs.opensearch.org/latest/install-and-configure/install-opensearch/helm/#prerequisites\n  extraEnvs:\n    - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD\n      valueFrom:\n        secretKeyRef:\n          name: onyx-opensearch  # Must match auth.opensearch.secretName or auth.opensearch.existingSecret if defined.\n          key: opensearch_admin_password  # Must match auth.opensearch.secretKeys value.\n\n  resources:\n    requests:\n      cpu: 2000m\n      memory: 4Gi\n    limits:\n      cpu: 4000m\n      memory: 8Gi\n\n  persistence:\n    enabled: true\n    size: 30Gi\n    storageClass: \"\"\n\n  # Java heap should be ~50% of memory limit.\n  # See https://opster.com/guides/opensearch/opensearch-basics/opensearch-heap-size-usage-and-jvm-garbage-collection/\n  # Xms is the starting size, Xmx is the maximum size. These should be the same.\n  opensearchJavaOpts: \"-Xmx4g -Xms4g\"\n\npersistent:\n  storageClassName: \"\"\n\nimagePullSecrets: []\nnameOverride: \"\"\nfullnameOverride: \"\"\n\nautoscaling:\n  # Valid options: 'hpa' (default) or 'keda'.\n  # Set to 'keda' to render KEDA ScaledObjects for components that have autoscaling enabled.\n  # When using KEDA you must install and manage the KEDA operator separately; it is not bundled with this chart.\n  engine: hpa\n\ninferenceCapability:\n  service:\n    portName: modelserver\n    type: ClusterIP\n    servicePort: 9000\n    targetPort: 9000\n  name: inference-model-server\n  replicaCount: 1\n  labels:\n    - key: app\n      value: inference-model-server\n  image:\n    repository: onyxdotapp/onyx-model-server\n    # Overrides the image tag whose default is the chart appVersion.\n    tag: \"\"\n  containerPorts:\n    server: 9000\n  podLabels:\n    - key: app\n      value: inference-model-server\n  resources:\n    requests:\n      cpu: 2000m\n      memory: 3Gi\n    limits:\n      cpu: 4000m\n      memory: 10Gi\n  # Optional health probes\n  # Example:\n  # readinessProbe:\n  #   httpGet:\n  #     path: /health\n  #     port: model-server\n  startupProbe: {}\n  readinessProbe: {}\n  livenessProbe: {}\n  podSecurityContext: {}\n  securityContext:\n    privileged: true\n    runAsUser: 0\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n  # Deployment strategy - use Recreate or RollingUpdate with maxSurge: 0 to terminate old pod first\n  # This prevents pending pods when cluster resources are constrained\n  strategy: {}\n  # Example for RollingUpdate that terminates old pod first:\n  # strategy:\n  #   type: RollingUpdate\n  #   rollingUpdate:\n  #     maxSurge: 0\n  #     maxUnavailable: 1\n\n\nindexCapability:\n  service:\n    portName: modelserver\n    type: ClusterIP\n    servicePort: 9000\n    targetPort: 9000\n  replicaCount: 1\n  name: indexing-model-server\n  deploymentLabels:\n    app: indexing-model-server\n  podLabels:\n    scope: onyx-backend\n  indexingOnly: \"True\"\n  podAnnotations: {}\n  containerPorts:\n    server: 9000\n  image:\n    repository: onyxdotapp/onyx-model-server\n    # Overrides the image tag whose default is the chart appVersion.\n    tag: \"\"\n  limitConcurrency: 10\n  resources:\n    requests:\n      cpu: 4000m\n      memory: 3Gi\n    limits:\n      cpu: 6000m\n      memory: 6Gi\n  # Optional health probes\n  # Example:\n  # readinessProbe:\n  #   httpGet:\n  #     path: /health\n  #     port: model-server\n  startupProbe: {}\n  readinessProbe: {}\n  livenessProbe: {}\n  podSecurityContext: {}\n  securityContext:\n    privileged: true\n    runAsUser: 0\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n  # Deployment strategy - use Recreate or RollingUpdate with maxSurge: 0 to terminate old pod first\n  # This prevents pending pods when cluster resources are constrained\n  strategy: {}\n  # Example for RollingUpdate that terminates old pod first:\n  # strategy:\n  #   type: RollingUpdate\n  #   rollingUpdate:\n  #     maxSurge: 0\n  #     maxUnavailable: 1\nconfig:\n  envConfigMapName: env-configmap\n\ntooling:\n  pgInto:\n    # -- Mounts a small helper script into app pods that opens psql using the pod's POSTGRES_* env vars.\n    enabled: false\n    # -- Where to place the helper inside the container.\n    mountPath: /usr/local/bin/pginto\n    # -- Which client binary to call; change if your image uses a non-default path.\n    psqlBinary: psql\n\nmonitoring:\n  grafana:\n    dashboards:\n      # -- Set to true to deploy Grafana dashboard ConfigMaps for the Onyx indexing pipeline.\n      # Requires kube-prometheus-stack (or equivalent) with the Grafana sidecar enabled and watching this namespace.\n      # The sidecar must be configured with label selector: grafana_dashboard=1\n      enabled: false\n  serviceMonitors:\n    # -- Set to true to deploy ServiceMonitor resources for Celery worker metrics endpoints.\n    # Requires the Prometheus Operator CRDs (included in kube-prometheus-stack).\n    # Use `labels` to match your Prometheus CR's serviceMonitorSelector (e.g. release: onyx-monitoring).\n    enabled: false\n    labels: {}\n\nserviceAccount:\n  # Specifies whether a service account should be created\n  create: false\n  # Automatically mount a ServiceAccount's API credentials?\n  automount: true\n  # Annotations to add to the service account\n  annotations: {}\n  # The name of the service account to use.\n  # If not set and create is true, a name is generated using the fullname template\n  name: \"\"\n\nnginx:\n  enabled: true\n  # Nginx proxy timeout settings (in seconds)\n  timeouts:\n    connect: 300  # Time to establish connection with upstream server\n    send: 300     # Time to send request to upstream server\n    read: 300     # Time to read response from upstream server\n  controller:\n    containerPort:\n      http: 1024\n\n    # NOTE: When onyx-nginx-conf changes, nginx pods need to restart.\n    # The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.\n    # Workaround: Helm upgrade will restart if the following annotation value changes.\n    podAnnotations:\n      onyx.app/nginx-config-version: \"3\"\n\n    # Propagate DOMAIN into nginx so server_name continues to use the same env var\n    extraEnvs:\n      - name: DOMAIN\n        value: localhost\n\n    config:\n      # Expose DOMAIN to the nginx config and pull in our custom snippets\n      main-snippet: |\n        env DOMAIN;\n      http-snippet: |\n        include /etc/nginx/custom-snippets/upstreams.conf;\n        include /etc/nginx/custom-snippets/server.conf;\n\n    # Mount the existing nginx ConfigMap that holds the upstream and server snippets\n    extraVolumes:\n      - name: nginx-config\n        configMap:\n          name: onyx-nginx-conf\n    extraVolumeMounts:\n      - name: nginx-config\n        mountPath: /etc/nginx/custom-snippets\n        readOnly: true\n\n    service:\n      type: LoadBalancer\n      ports:\n        http: 80\n      targetPorts:\n        http: http\n\nwebserver:\n  replicaCount: 1\n  image:\n    repository: onyxdotapp/onyx-web-server\n    # Overrides the image tag whose default is the chart appVersion.\n    tag: \"\"\n  deploymentLabels:\n    app: web-server\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-frontend\n  podSecurityContext:\n    {}\n    # fsGroup: 2000\n\n  securityContext:\n    {}\n    # capabilities:\n    #   drop:\n    #   - ALL\n    # readOnlyRootFilesystem: true\n    # runAsNonRoot: true\n    # runAsUser: 1000\n\n  containerPorts:\n    server: 3000\n\n  service:\n    type: ClusterIP\n    servicePort: 3000\n    targetPort: http\n\n  resources:\n    requests:\n      cpu: 200m\n      memory: 512Mi\n    limits:\n      cpu: 1000m\n      memory: 1Gi\n\n  # Optional health probes\n  # Example:\n  # readinessProbe:\n  #   httpGet:\n  #     path: /api/health\n  #     port: http\n  startupProbe: {}\n  readinessProbe: {}\n  livenessProbe: {}\n\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 100\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    # Example: customTriggers: []\n    #   - type: prometheus\n    #     metadata:\n    #       serverAddress: http://prometheus:9090\n    #       metricName: http_requests_per_second\n    #       threshold: '100'\n    customTriggers: []\n\n  # Additional volumes on the output Deployment definition.\n  volumes: []\n  # - name: foo\n  #   secret:\n  #     secretName: mysecret\n  #     optional: false\n\n  # Additional volumeMounts on the output Deployment definition.\n  volumeMounts: []\n  # - name: foo\n  #   mountPath: \"/etc/foo\"\n  #   readOnly: true\n\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\napi:\n  replicaCount: 1\n  image:\n    repository: onyxdotapp/onyx-backend\n    # Overrides the image tag whose default is the chart appVersion.\n    tag: \"\"\n  deploymentLabels:\n    app: api-server\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend\n\n  containerPorts:\n    server: 8080\n\n  podSecurityContext:\n    {}\n    # fsGroup: 2000\n\n  securityContext:\n    {}\n    # capabilities:\n    #   drop:\n    #   - ALL\n    # readOnlyRootFilesystem: true\n    # runAsNonRoot: true\n    # runAsUser: 1000\n\n  service:\n    type: ClusterIP\n    servicePort: 8080\n    targetPort: api-server-port\n    portName: api-server-port\n\n  resources:\n    requests:\n      cpu: 500m\n      memory: 1Gi\n    limits:\n      cpu: 1000m\n      memory: 3Gi\n\n  # Optional health probes\n  # Example:\n  # readinessProbe:\n  #   httpGet:\n  #     path: /health\n  #     port: api-server-port\n  startupProbe: {}\n  readinessProbe: {}\n  livenessProbe: {}\n\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 100\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    # Example: customTriggers: []\n    #   - type: prometheus\n    #     metadata:\n    #       serverAddress: http://prometheus:9090\n    #       metricName: http_requests_per_second\n    #       threshold: '100'\n    customTriggers: []\n\n  # Additional volumes on the output Deployment definition.\n  volumes: []\n  # - name: foo\n  #   secret:\n  #     secretName: mysecret\n  #     optional: false\n\n  # Additional volumeMounts on the output Deployment definition.\n  volumeMounts: []\n  # - name: foo\n  #   mountPath: \"/etc/foo\"\n  #   readOnly: true\n\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n\n######################################################################\n#\n# Background workers\n#\n######################################################################\n\ncelery_shared:\n  image:\n    repository: onyxdotapp/onyx-backend\n    tag: \"\"  # Overrides the image tag whose default is the chart appVersion.\n  startupProbe:\n    # startupProbe fails after 2m\n    exec:\n      command: [\"test\", \"-f\", \"/app/onyx/main.py\"]\n    failureThreshold: 24\n    periodSeconds: 5\n    timeoutSeconds: 3\n  readinessProbe:\n    # readinessProbe fails after 15s + 2m of inactivity\n    # it's ok to see the readinessProbe fail transiently while the container starts\n    initialDelaySeconds: 15\n    periodSeconds: 5\n    failureThreshold: 24\n    timeoutSeconds: 3\n  livenessProbe:\n    # livenessProbe fails after 5m of inactivity\n    initialDelaySeconds: 60\n    periodSeconds: 60\n    failureThreshold: 5\n    timeoutSeconds: 3\n  podSecurityContext: {}\n  securityContext:\n    privileged: true\n    runAsUser: 0\n\ncelery_beat:\n  replicaCount: 1\n  logLevel: INFO\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-beat\n  resources:\n    requests:\n      cpu: 500m\n      memory: 512Mi\n    limits:\n      cpu: 1000m\n      memory: 1Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_heavy:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 10\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-heavy\n  resources:\n    requests:\n      cpu: 500m\n      memory: 512Mi\n    limits:\n      cpu: 1000m\n      memory: 2Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_docprocessing:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 20\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-docprocessing\n  resources:\n    requests:\n      cpu: 500m\n      memory: 2Gi\n    limits:\n      cpu: 1000m\n      memory: 12Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_light:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 10\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-light\n  resources:\n    requests:\n      cpu: 250m\n      memory: 512Mi\n    limits:\n      cpu: 2000m\n      memory: 4Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_monitoring:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 10\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-monitoring\n  resources:\n    requests:\n      cpu: 500m\n      memory: 512Mi\n    limits:\n      cpu: 1000m\n      memory: 4Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_primary:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 10\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-primary\n  resources:\n    requests:\n      cpu: 500m\n      memory: 2Gi\n    limits:\n      cpu: 1000m\n      memory: 4Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_user_file_processing:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 10\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-user-file-processing\n  resources:\n    requests:\n      cpu: 500m\n      memory: 512Mi\n    limits:\n      cpu: 2000m\n      memory: 2Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n# Discord bot for Onyx\n# The bot offloads message processing to scalable API pods via HTTP requests.\ndiscordbot:\n  enabled: false  # Disabled by default - requires bot token configuration\n  # Bot token can be provided directly or via a Kubernetes secret\n  # Option 1: Direct token (not recommended for production)\n  botToken: \"\"\n  # Option 2: Reference a Kubernetes secret (recommended)\n  botTokenSecretName: \"\"  # Name of the secret containing the bot token\n  botTokenSecretKey: \"token\"  # Key within the secret (default: \"token\")\n  # Command prefix for bot commands (default: \"!\")\n  invokeChar: \"!\"\n  image:\n    repository: onyxdotapp/onyx-backend\n    tag: \"\"  # Overrides the image tag whose default is the chart appVersion.\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend\n  deploymentLabels:\n    app: discord-bot\n  podSecurityContext:\n    {}\n  securityContext:\n    {}\n  resources:\n    requests:\n      cpu: \"500m\"\n      memory: \"512Mi\"\n    limits:\n      cpu: \"1000m\"\n      memory: \"2000Mi\"\n  volumes: []\n  volumeMounts: []\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\nslackbot:\n  enabled: true\n  replicaCount: 1\n  image:\n    repository: onyxdotapp/onyx-backend\n    tag: \"\"  # Overrides the image tag whose default is the chart appVersion.\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend\n  deploymentLabels:\n    app: slack-bot\n  podSecurityContext:\n    {}\n  securityContext:\n    {}\n  resources:\n    requests:\n      cpu: \"500m\"\n      memory: \"512Mi\"\n    limits:\n      cpu: \"1000m\"\n      memory: \"2000Mi\"\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n# Onyx Model Context Protocol (MCP) Server\n# Allows LLMs to use Onyx like invoking tools or accessing resources\nmcpServer:\n  enabled: false  # Disabled by default\n  replicaCount: 1\n  image:\n    repository: onyxdotapp/onyx-backend\n    tag: \"\"  # Overrides the image tag whose default is the chart appVersion.\n  # CORS origins for MCP clients (comma-separated)\n  # Example: \"https://claude.ai,https://app.cursor.sh\"\n  corsOrigins: \"\"\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend\n  deploymentLabels:\n    app: mcp-server\n  containerPorts:\n    server: 8090\n  service:\n    type: ClusterIP\n    servicePort: 8090\n    targetPort: mcp-server-port\n    portName: mcp-server-port\n  podSecurityContext: {}\n  securityContext: {}\n  resources:\n    requests:\n      cpu: \"250m\"\n      memory: \"256Mi\"\n    limits:\n      cpu: \"500m\"\n      memory: \"512Mi\"\n  livenessProbe:\n    initialDelaySeconds: 10\n    periodSeconds: 30\n    timeoutSeconds: 5\n    failureThreshold: 3\n  readinessProbe:\n    initialDelaySeconds: 5\n    periodSeconds: 10\n    timeoutSeconds: 5\n    failureThreshold: 3\n  volumes: []\n  volumeMounts: []\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\ncelery_worker_docfetching:\n  replicaCount: 1\n  logLevel: INFO\n  autoscaling:\n    enabled: false\n    minReplicas: 1\n    maxReplicas: 20\n    targetCPUUtilizationPercentage: 80\n    targetMemoryUtilizationPercentage: 80\n    # KEDA specific configurations (only used when autoscaling.engine is set to 'keda')\n    pollingInterval: 30  # seconds\n    cooldownPeriod: 300  # seconds\n    failureThreshold: 3  # number of failures before fallback\n    fallbackReplicas: 1  # replicas to maintain on failure\n    # Custom triggers for advanced KEDA configurations\n    customTriggers: []\n  podAnnotations: {}\n  podLabels:\n    scope: onyx-backend-celery\n  deploymentLabels:\n    app: celery-worker-docfetching\n  resources:\n    requests:\n      cpu: 500m\n      memory: 2Gi\n    limits:\n      cpu: 1000m\n      memory: 16Gi\n  volumes: []  # Additional volumes on the output Deployment definition.\n  volumeMounts: []  # Additional volumeMounts on the output Deployment definition.\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n######################################################################\n#\n# End background workers section\n#\n######################################################################\n\nredis:\n  enabled: true\n  redisStandalone:\n    image: quay.io/opstree/redis\n    tag: v7.0.15\n    imagePullPolicy: IfNotPresent\n    serviceType: ClusterIP\n    resources:\n      requests:\n        cpu: 100m\n        memory: 128Mi\n      limits:\n        cpu: 500m\n        memory: 512Mi\n    # Use existing secret for Redis password\n    redisSecret:\n      secretName: onyx-redis\n      secretKey: redis_password\n  # Redis configuration\n  externalConfig:\n    enabled: true\n    data: |\n      appendonly no\n      save \"\"\n      maxmemory 400mb\n      maxmemory-policy allkeys-lru\n      timeout 0\n      tcp-keepalive 300\n  storageSpec:\n    volumeClaimTemplate:\n      spec:\n        accessModes: [\"ReadWriteOnce\"]\n        resources:\n          requests:\n            storage: 1Gi\n\nminio:\n  enabled: true\n  mode: standalone\n  replicas: 1\n  drivesPerNode: 1\n  existingSecret: onyx-objectstorage\n  buckets:\n    - name: onyx-file-store-bucket\n  persistence:\n    enabled: true\n    size: 30Gi\n    storageClass: \"\"\n  service:\n    type: ClusterIP\n    port: 9000\n  consoleService:\n    type: ClusterIP\n    port: 9001\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n# Code Interpreter - Python code execution service (beta feature)\ncodeInterpreter:\n  enabled: true\n\n  replicaCount: 1\n\n  image:\n    repository: onyxdotapp/code-interpreter\n    pullPolicy: Always\n    tag: \"\"  # Empty uses chart appVersion\n\n  # Service configuration\n  service:\n    type: ClusterIP\n    port: 8000\n    targetPort: 8000\n\n  # Execution limits\n  codeInterpreter:\n    maxExecTimeoutMs: 60000\n    maxOutputBytes: \"1000000\"\n    cpuTimeLimitSec: 5\n    memoryLimitMb: 256\n    host: \"0.0.0.0\"\n    port: 8000\n\n    # Kubernetes executor configuration (creates pods for code execution)\n    kubernetesExecutor:\n      namespace: \"\"  # Empty = same namespace as release\n      image: \"\"  # Empty = default sandbox image\n      serviceAccount: \"\"\n      podResources:\n        limits:\n          cpu: \"1\"\n          memory: \"256Mi\"\n        requests:\n          cpu: \"100m\"\n          memory: \"64Mi\"\n\n  # API container resources\n  resources:\n    requests:\n      cpu: 100m\n      memory: 128Mi\n    limits:\n      cpu: 1000m\n      memory: 512Mi\n\n  # RBAC for pod management (required for kubernetes executor)\n  rbac:\n    create: true\n\n  # Security context\n  podSecurityContext:\n    runAsNonRoot: true\n    runAsUser: 1000\n    fsGroup: 1000\n\n  securityContext:\n    capabilities:\n      drop:\n        - ALL\n    readOnlyRootFilesystem: false\n    runAsNonRoot: true\n    runAsUser: 1000\n    allowPrivilegeEscalation: false\n\n  # Health probes\n  livenessProbe:\n    httpGet:\n      path: /health\n      port: http\n    initialDelaySeconds: 10\n    periodSeconds: 10\n\n  readinessProbe:\n    httpGet:\n      path: /health\n      port: http\n    initialDelaySeconds: 5\n    periodSeconds: 5\n\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n  # Optional features\n  ingress:\n    enabled: false\n  networkPolicy:\n    enabled: false\n  serviceAccount:\n    create: true\n    automount: true\n\ningress:\n  enabled: false\n  className: \"\"\n  api:\n    host: onyx.local\n  webserver:\n    host: onyx.local\n\nletsencrypt:\n  enabled: false\n  email: \"abc@abc.com\"\n\n# -- Governs all Secrets created or used by this chart. Values set by this chart will be base64 encoded in the k8s cluster.\nauth:\n  postgresql:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: true\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-postgresql'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      # CloudNativePG requires `username` and `password` keys for the superuser secret.\n      POSTGRES_USER: username\n      POSTGRES_PASSWORD: password\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      username: \"postgres\"\n      password: \"postgres\"\n  redis:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: true\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-redis'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      REDIS_PASSWORD: redis_password\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      redis_password: \"password\"\n  objectstorage:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: true\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-objectstorage'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      S3_AWS_ACCESS_KEY_ID: s3_aws_access_key_id\n      S3_AWS_SECRET_ACCESS_KEY: s3_aws_secret_access_key\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      s3_aws_access_key_id: \"minioadmin\"\n      s3_aws_secret_access_key: \"minioadmin\"\n      rootUser: \"minioadmin\"\n      rootPassword: \"minioadmin\"\n  oauth:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: false\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-oauth'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      OAUTH_CLIENT_ID: \"oauth_client_id\"\n      OAUTH_CLIENT_SECRET: \"oauth_client_secret\"\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      oauth_client_id: \"\"\n      oauth_client_secret: \"\"\n  smtp:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: false\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-smtp'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      SMTP_PASS: \"smtp_pass\"\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      smtp_pass: \"\"\n  dbreadonly:\n    # -- Enable or disable this secret entirely. Will remove from env var configurations and remove any created secrets.\n    enabled: false\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-dbreadonly'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map, key is always upper-cased as an env var\n    secretKeys:\n      DB_READONLY_USER: db_readonly_user\n      DB_READONLY_PASSWORD: db_readonly_password\n    # -- Secrets values IF existingSecret is empty. Key here must match the value in secretKeys to be used. Values will be base64 encoded in the k8s cluster.\n    values:\n      db_readonly_user: \"\"\n      db_readonly_password: \"\"\n  opensearch:\n    # Enable or disable this secret entirely. Will remove from env var\n    # configurations and remove any created secrets.\n    # Enabled by default. Override to false and set the appropriate env vars in\n    # the instance-specific values yaml if using AWS-managed OpenSearch, or\n    # simply override to false to entirely disable.\n    enabled: true\n    # Overwrite the default secret name, ignored if existingSecret is defined.\n    secretName: 'onyx-opensearch'\n    # Use a secret specified elsewhere.\n    existingSecret: \"\"\n    # This defines the env var to secret map, key is always upper-cased as an\n    # env var.\n    secretKeys:\n      OPENSEARCH_ADMIN_USERNAME: opensearch_admin_username\n      OPENSEARCH_ADMIN_PASSWORD: opensearch_admin_password\n    # Secrets values IF existingSecret is empty. Key here must match the value\n    # in secretKeys to be used. Values will be base64 encoded in the k8s\n    # cluster.\n    # For the bundled OpenSearch chart, the admin password is consumed during\n    # initial cluster setup. Changing this value later will update Onyx's\n    # client credentials, but will not rotate the OpenSearch admin password.\n    # Set this before first install or use existingSecret to preserve the\n    # current secret on upgrade.\n    # Password must meet OpenSearch complexity requirements:\n    # min 8 chars, uppercase, lowercase, digit, and special character.\n    # Required when auth.opensearch.enabled=true and no existing secret exists.\n    values:\n      opensearch_admin_username: \"admin\"\n      opensearch_admin_password: \"\"\n  userauth:\n    # -- Used for password reset / verification tokens and OAuth/OIDC state signing.\n    # Disabled by default to preserve upgrade compatibility for existing Helm customers.\n    enabled: false\n    # -- Overwrite the default secret name, ignored if existingSecret is defined\n    secretName: 'onyx-userauth'\n    # -- Use a secret specified elsewhere\n    existingSecret: \"\"\n    # -- This defines the env var to secret map\n    secretKeys:\n      USER_AUTH_SECRET: user_auth_secret\n    # -- Secret value. Required when this secret is enabled - generate with: openssl rand -hex 32\n    # If not set, helm install/upgrade will fail when auth.userauth.enabled=true.\n    values:\n      user_auth_secret: \"\"\n\nconfigMap:\n  # Auth type: \"basic\" (default), \"google_oauth\", \"oidc\", or \"saml\"\n  # UPGRADE NOTE: Default changed from \"disabled\" to \"basic\" in 0.4.34.\n  # Set auth.userauth.enabled=true and provide auth.userauth.values.user_auth_secret\n  # before enabling flows that require it.\n  AUTH_TYPE: \"basic\"\n  # Enable PKCE for OIDC login flow. Leave empty/false for backward compatibility.\n  OIDC_PKCE_ENABLED: \"\"\n  # 1 Day Default\n  SESSION_EXPIRE_TIME_SECONDS: \"86400\"\n  # Can be something like onyx.app, as an extra double-check\n  VALID_EMAIL_DOMAINS: \"\"\n  # For sending verification emails, true or false\n  REQUIRE_EMAIL_VERIFICATION: \"\"\n  # If unspecified then defaults to 'smtp.gmail.com'\n  SMTP_SERVER: \"\"\n  # For sending verification emails, if unspecified then defaults to '587'\n  SMTP_PORT: \"\"\n# 'your-email@company.com'\n  SMTP_USER: \"\"\n  # 'your-gmail-password'\n  # SMTP_PASS: \"\"\n  # 'your-email@company.com' SMTP_USER missing used instead\n  EMAIL_FROM: \"\"\n  # MinIO/S3 Configuration override\n  S3_ENDPOINT_URL: \"\"  # only used if minio is not enabled\n  S3_FILE_STORE_BUCKET_NAME: \"\"\n  # Gen AI Settings\n  GEN_AI_MAX_TOKENS: \"\"\n  LLM_SOCKET_READ_TIMEOUT: \"60\"\n  MAX_CHUNKS_FED_TO_CHAT: \"\"\n  # Query Options\n  DOC_TIME_DECAY: \"\"\n  HYBRID_ALPHA: \"\"\n  EDIT_KEYWORD_QUERY: \"\"\n  # Don't change the NLP models unless you know what you're doing\n  EMBEDDING_BATCH_SIZE: \"\"\n  DOCUMENT_ENCODER_MODEL: \"\"\n  NORMALIZE_EMBEDDINGS: \"\"\n  ASYM_QUERY_PREFIX: \"\"\n  ASYM_PASSAGE_PREFIX: \"\"\n  DISABLE_RERANK_FOR_STREAMING: \"\"\n  MODEL_SERVER_PORT: \"\"\n  MIN_THREADS_ML_MODELS: \"\"\n  # Indexing Configs\n  VESPA_SEARCHER_THREADS: \"\"\n  NUM_INDEXING_WORKERS: \"\"\n  DISABLE_INDEX_UPDATE_ON_SWAP: \"\"\n  DASK_JOB_CLIENT_ENABLED: \"\"\n  CONTINUE_ON_CONNECTOR_FAILURE: \"\"\n  EXPERIMENTAL_CHECKPOINTING_ENABLED: \"\"\n  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: \"\"\n  JIRA_CLOUD_API_VERSION: \"\"\n  JIRA_SERVER_API_VERSION: \"\"\n  GONG_CONNECTOR_START_TIME: \"\"\n  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: \"\"\n  # Worker Parallelism\n  CELERY_WORKER_DOCPROCESSING_CONCURRENCY: \"\"\n  CELERY_WORKER_LIGHT_CONCURRENCY: \"\"\n  CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER: \"\"\n  CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY: \"\"\n  # OnyxBot SlackBot Configs\n  ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER: \"\"\n  ONYX_BOT_DISPLAY_ERROR_MSGS: \"\"\n  ONYX_BOT_RESPOND_EVERY_CHANNEL: \"\"\n  NOTIFY_SLACKBOT_NO_ANSWER: \"\"\n  DISCORD_BOT_TOKEN: \"\"\n  DISCORD_BOT_INVOKE_CHAR: \"\"\n  # Logging\n  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3\n  DISABLE_TELEMETRY: \"\"\n  LOG_LEVEL: \"\"\n  LOG_ALL_MODEL_INTERACTIONS: \"\"\n  LOG_ONYX_MODEL_INTERACTIONS: \"\"\n  LOG_VESPA_TIMING_INFORMATION: \"\"\n  # Shared or Non-backend Related\n  WEB_DOMAIN: \"http://localhost:3000\"\n  # DOMAIN used by nginx\n  DOMAIN: \"localhost\"\n  # Chat Configs\n  HARD_DELETE_CHATS: \"\"\n  MAX_ALLOWED_UPLOAD_SIZE_MB: \"\"\n  DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB: \"\"\n"
  },
  {
    "path": "deployment/terraform/modules/aws/README.md",
    "content": "# Onyx AWS modules\n\n## Overview\nThis directory contains Terraform modules to provision the core AWS infrastructure for Onyx:\n\n- `vpc`: Creates a VPC with public/private subnets sized for EKS\n- `eks`: Provisions an Amazon EKS cluster, essential addons (EBS CSI, metrics server, cluster autoscaler), and optional IRSA for S3 access\n- `postgres`: Creates an Amazon RDS for PostgreSQL instance and returns a connection URL\n- `redis`: Creates an ElastiCache for Redis replication group\n- `s3`: Creates an S3 bucket and locks access to a provided S3 VPC endpoint\n- `opensearch`: Creates an Amazon OpenSearch domain for managed search workloads\n- `onyx`: A higher-level composition that wires the above modules together for a complete, opinionated stack\n\nUse the `onyx` module if you want a working EKS + Postgres + Redis + S3 stack with sane defaults. Use the individual modules if you need more granular control.\n\n## Quickstart (copy/paste)\nThe snippet below shows a minimal working example that:\n- Sets up providers\n- Waits for EKS to be ready\n- Configures `kubernetes` and `helm` providers against the created cluster\n- Provisions the full Onyx AWS stack via the `onyx` module\n\n```hcl\nlocals {\n  region = \"us-west-2\"\n}\n\nprovider \"aws\" {\n  region = local.region\n}\n\nmodule \"onyx\" {\n  # If your root module is next to this modules/ directory:\n  # source = \"./modules/aws/onyx\"\n  # If referencing from this repo as a template, adjust the path accordingly.\n  source = \"./modules/aws/onyx\"\n\n  region            = local.region\n  name              = \"onyx\"            # used as a prefix and workspace-aware\n  postgres_username = \"pgusername\"\n  postgres_password = \"your-postgres-password\"\n  # create_vpc    = true  # default true; set to false to use an existing VPC (see below)\n}\n\nresource \"null_resource\" \"wait_for_cluster\" {\n  provisioner \"local-exec\" {\n    command = \"aws eks wait cluster-active --name ${module.onyx.cluster_name} --region ${local.region}\"\n  }\n}\n\ndata \"aws_eks_cluster\" \"eks\" {\n  name       = module.onyx.cluster_name\n  depends_on = [null_resource.wait_for_cluster]\n}\n\ndata \"aws_eks_cluster_auth\" \"eks\" {\n  name       = module.onyx.cluster_name\n  depends_on = [null_resource.wait_for_cluster]\n}\n\nprovider \"kubernetes\" {\n  host                   = data.aws_eks_cluster.eks.endpoint\n  cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data)\n  token                  = data.aws_eks_cluster_auth.eks.token\n}\n\nprovider \"helm\" {\n  kubernetes {\n    host                   = data.aws_eks_cluster.eks.endpoint\n    cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks.certificate_authority[0].data)\n    token                  = data.aws_eks_cluster_auth.eks.token\n  }\n}\n\n# Optional: expose handy outputs at the root module level\noutput \"cluster_name\" {\n  value = module.onyx.cluster_name\n}\noutput \"postgres_connection_url\" {\n  value     = module.onyx.postgres_connection_url\n  sensitive = true\n}\noutput \"redis_connection_url\" {\n  value     = module.onyx.redis_connection_url\n  sensitive = true\n}\n```\n\nApply with:\n\n```bash\nterraform init\nterraform apply\n```\n\n### Using an existing VPC\nIf you already have a VPC and subnets, disable VPC creation and provide IDs, CIDR, and the ID of the existing S3 gateway endpoint in that VPC:\n\n```hcl\nmodule \"onyx\" {\n  source = \"./modules/aws/onyx\"\n\n  region            = local.region\n  name              = \"onyx\"\n  postgres_username = \"pgusername\"\n  postgres_password = \"your-postgres-password\"\n\n  create_vpc       = false\n  vpc_id           = \"vpc-xxxxxxxx\"\n  private_subnets  = [\"subnet-aaaa\", \"subnet-bbbb\", \"subnet-cccc\"]\n  public_subnets   = [\"subnet-dddd\", \"subnet-eeee\", \"subnet-ffff\"]\n  vpc_cidr_block   = \"10.0.0.0/16\"\n  s3_vpc_endpoint_id = \"vpce-xxxxxxxxxxxxxxxxx\"\n}\n```\n\n## What each module does\n\n### `onyx`\n- Orchestrates `vpc`, `eks`, `postgres`, `redis`, and `s3`\n- Names resources using `name` and the current Terraform workspace\n- Exposes convenient outputs:\n  - `cluster_name`: EKS cluster name\n  - `postgres_connection_url` (sensitive): `postgres://...`\n  - `redis_connection_url` (sensitive): hostname:port\n\nInputs (common):\n- `name` (default `onyx`), `region` (default `us-west-2`), `tags`\n- `postgres_username`, `postgres_password`\n- `create_vpc` (default true) or existing VPC details and `s3_vpc_endpoint_id`\n- WAF controls such as `waf_allowed_ip_cidrs`, `waf_common_rule_set_count_rules`, rate limits, geo restrictions, and logging retention\n- Optional OpenSearch controls such as `enable_opensearch`, sizing, credentials, and log retention\n\n### `vpc`\n- Builds a VPC sized for EKS with multiple private and public subnets\n- Outputs: `vpc_id`, `private_subnets`, `public_subnets`, `vpc_cidr_block`, `s3_vpc_endpoint_id`\n\n### `eks`\n- Creates the EKS cluster and node groups\n- Enables addons: EBS CSI driver, metrics server, cluster autoscaler\n- Optionally configures IRSA for S3 access to specified buckets\n- Outputs: `cluster_name`, `cluster_endpoint`, `cluster_certificate_authority_data`, `s3_access_role_arn` (if created)\n\nKey inputs include:\n- `cluster_name`, `cluster_version` (default `1.33`)\n- `vpc_id`, `subnet_ids`\n- `public_cluster_enabled` (default true), `private_cluster_enabled` (default false)\n- `cluster_endpoint_public_access_cidrs` (optional)\n- `eks_managed_node_groups` (defaults include a main and a vespa-dedicated group with GP3 volumes)\n- `s3_bucket_names` (optional list). If set, creates an IRSA role and Kubernetes service account for S3 access\n\n### `postgres`\n- Amazon RDS for PostgreSQL with parameterized instance size, storage, version\n- Accepts VPC/subnets and ingress CIDRs; returns a ready-to-use connection URL\n\n### `redis`\n- ElastiCache for Redis (transit encryption enabled by default)\n- Supports optional `auth_token` and instance sizing\n- Outputs endpoint, port, and whether SSL is enabled\n\n### `s3`\n- Creates an S3 bucket for file storage and scopes access to the provided S3 gateway VPC endpoint\n\n### `opensearch`\n- Creates an Amazon OpenSearch domain inside the VPC\n- Supports custom subnets, security groups, fine-grained access control, encryption, and CloudWatch log publishing\n- Outputs domain endpoints, ARN, and the managed security group ID when it creates one\n\n## Installing the Onyx Helm chart (after Terraform)\nOnce the cluster is active, deploy application workloads via Helm. You can use the chart in `deployment/helm/charts/onyx`.\n\n```bash\n# Set kubeconfig to your new cluster (if you’re not using the TF providers for kubernetes/helm)\naws eks update-kubeconfig --name $(terraform output -raw cluster_name) --region ${AWS_REGION:-us-west-2}\n\nkubectl create namespace onyx --dry-run=client -o yaml | kubectl apply -f -\n\n# If using AWS S3 via IRSA created by the EKS module, consider disabling MinIO\n# Replace the path below with the absolute or correct relative path to the onyx Helm chart\nhelm upgrade --install onyx /path/to/onyx/deployment/helm/charts/onyx \\\n  --namespace onyx \\\n  --set minio.enabled=false \\\n  --set serviceAccount.create=false \\\n  --set serviceAccount.name=onyx-s3-access\n```\n\nNotes:\n- The EKS module can create an IRSA role plus a Kubernetes `ServiceAccount` named `onyx-s3-access` (by default in namespace `onyx`) when `s3_bucket_names` is provided. Use that service account in the Helm chart to avoid static S3 credentials.\n- If you prefer MinIO inside the cluster, leave `minio.enabled=true` (default) and skip IRSA.\n\n## Workflow tips\n- First apply can be infra-only; once EKS is active, install the Helm chart.\n- Use Terraform workspaces to create isolated environments; the `onyx` module automatically includes the workspace in resource names.\n\n## Security\n- Database and Redis connection outputs are marked sensitive. Handle them carefully.\n- When using IRSA, avoid storing long-lived S3 credentials in secrets.\n"
  },
  {
    "path": "deployment/terraform/modules/aws/eks/main.tf",
    "content": "locals {\n  s3_bucket_arns = [for name in var.s3_bucket_names : {\n    bucket_arn     = \"arn:aws:s3:::${name}\"\n    bucket_objects = \"arn:aws:s3:::${name}/*\"\n  }]\n}\n\nmodule \"eks\" {\n  source  = \"terraform-aws-modules/eks/aws\"\n  version = \"~> 20.0\"\n\n  cluster_name    = var.cluster_name\n  cluster_version = var.cluster_version\n\n  vpc_id                                   = var.vpc_id\n  subnet_ids                               = var.subnet_ids\n  cluster_endpoint_public_access           = var.public_cluster_enabled\n  cluster_endpoint_private_access          = var.private_cluster_enabled\n  cluster_endpoint_public_access_cidrs     = var.cluster_endpoint_public_access_cidrs\n  enable_cluster_creator_admin_permissions = true\n\n  # Control plane logging\n  cluster_enabled_log_types              = var.cluster_enabled_log_types\n  cloudwatch_log_group_retention_in_days = var.cloudwatch_log_group_retention_in_days\n\n  eks_managed_node_group_defaults = {\n    ami_type = \"AL2023_x86_64_STANDARD\"\n  }\n\n  eks_managed_node_groups = {\n    for k, v in var.eks_managed_node_groups : k => merge(v,\n      {\n        instance_types = v.instance_types != null ? v.instance_types : (\n          k == \"main\" ? var.main_node_instance_types :\n          k == \"vespa\" ? var.vespa_node_instance_types :\n          v.instance_types\n        )\n      },\n      # Only add subnet_ids override for vespa node group if specified\n      k == \"vespa\" && length(var.vespa_node_subnet_ids) > 0 ? {\n        subnet_ids = var.vespa_node_subnet_ids\n      } : {}\n    )\n  }\n\n  tags = var.tags\n}\n\n# https://aws.amazon.com/blogs/containers/amazon-ebs-csi-driver-is-now-generally-available-in-amazon-eks-add-ons/\ndata \"aws_iam_policy\" \"ebs_csi_policy\" {\n  arn = \"arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy\"\n}\n\nmodule \"irsa-ebs-csi\" {\n  source  = \"terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc\"\n  version = \"4.7.0\"\n\n  create_role                   = true\n  role_name                     = \"AmazonEKSTFEBSCSIRole-${module.eks.cluster_name}\"\n  provider_url                  = module.eks.oidc_provider\n  role_policy_arns              = [data.aws_iam_policy.ebs_csi_policy.arn]\n  oidc_fully_qualified_subjects = [\"system:serviceaccount:kube-system:ebs-csi-controller-sa\"]\n\n  depends_on = [module.eks]\n}\n\n# Create the EBS CSI Driver addon for volume provisioning.\nresource \"aws_eks_addon\" \"ebs-csi\" {\n  cluster_name             = module.eks.cluster_name\n  addon_name               = \"aws-ebs-csi-driver\"\n  service_account_role_arn = module.irsa-ebs-csi.iam_role_arn\n  tags                     = var.tags\n\n  depends_on = [module.eks]\n}\n\n# Create GP3 storage class for EBS volumes\nresource \"kubernetes_storage_class\" \"gp3_default\" {\n  count = var.create_gp3_storage_class ? 1 : 0\n  metadata {\n    name = \"gp3\"\n    annotations = {\n      \"storageclass.kubernetes.io/is-default-class\" = \"true\"\n    }\n  }\n\n  storage_provisioner    = \"ebs.csi.aws.com\"\n  reclaim_policy         = \"Delete\"\n  volume_binding_mode    = \"WaitForFirstConsumer\"\n  allow_volume_expansion = true\n\n  parameters = {\n    type = \"gp3\"\n  }\n\n  depends_on = [aws_eks_addon.ebs-csi]\n}\n\n# Create some important addons for the EKS cluster.\nmodule \"eks_blueprints_addons\" {\n  source  = \"aws-ia/eks-blueprints-addons/aws\"\n  version = \"1.16.3\"\n\n  cluster_name      = module.eks.cluster_name\n  cluster_endpoint  = module.eks.cluster_endpoint\n  cluster_version   = module.eks.cluster_version\n  oidc_provider_arn = module.eks.oidc_provider_arn\n\n  enable_aws_load_balancer_controller = true\n  enable_karpenter                    = false\n  enable_metrics_server               = true\n  enable_cluster_autoscaler           = true\n\n  depends_on = [module.eks]\n}\n\n# Create IAM policy for S3 access (optional)\nresource \"aws_iam_policy\" \"s3_access_policy\" {\n  count       = length(var.s3_bucket_names) == 0 ? 0 : 1\n  name        = \"${module.eks.cluster_name}-s3-access-policy\"\n  description = \"Policy for S3 access from EKS cluster\"\n\n  policy = jsonencode({\n    Version = \"2012-10-17\"\n    Statement = [\n      {\n        Effect = \"Allow\"\n        Action = [\n          \"s3:GetObject\",\n          \"s3:PutObject\",\n          \"s3:DeleteObject\",\n          \"s3:ListBucket\"\n        ]\n        Resource = flatten([\n          for a in local.s3_bucket_arns : [a.bucket_arn, a.bucket_objects]\n        ])\n      }\n    ]\n  })\n}\n\n# Create IAM role for workload access using IRSA (S3 + RDS)\nmodule \"irsa-workload-access\" {\n  count   = length(var.s3_bucket_names) == 0 ? 0 : 1\n  source  = \"terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc\"\n  version = \"4.7.0\"\n\n  create_role                   = true\n  role_name                     = \"AmazonEKSTFWorkloadAccessRole-${module.eks.cluster_name}\"\n  provider_url                  = module.eks.oidc_provider\n  role_policy_arns              = [aws_iam_policy.s3_access_policy[0].arn]\n  oidc_fully_qualified_subjects = [\"system:serviceaccount:${var.irsa_service_account_namespace}:${var.irsa_service_account_name}\"]\n\n  depends_on = [module.eks]\n}\n\n# Create Kubernetes service account for S3 access (optional)\nresource \"kubernetes_service_account\" \"s3_access\" {\n  count = length(var.s3_bucket_names) == 0 ? 0 : 1\n  metadata {\n    name      = var.irsa_service_account_name\n    namespace = var.irsa_service_account_namespace\n    annotations = {\n      \"eks.amazonaws.com/role-arn\" = module.irsa-workload-access[0].iam_role_arn\n    }\n  }\n}\n\n# If RDS IAM auth is enabled, create a policy to allow the workload IRSA role to connect to RDS using IAM auth\nresource \"aws_iam_policy\" \"rds_iam_connect_policy\" {\n  count       = var.enable_rds_iam_for_service_account && var.rds_db_connect_arn != null ? 1 : 0\n  name        = \"${module.eks.cluster_name}-rds-iam-connect-policy\"\n  description = \"Allow EKS service account to connect to RDS using IAM auth\"\n\n  policy = jsonencode({\n    Version = \"2012-10-17\",\n    Statement = [\n      {\n        Effect = \"Allow\",\n        Action = [\n          \"rds-db:connect\"\n        ],\n        Resource = [\n          var.rds_db_connect_arn\n        ]\n      }\n    ]\n  })\n}\n\nresource \"aws_iam_role_policy_attachment\" \"attach_rds_connect_to_workload_role\" {\n  count      = var.enable_rds_iam_for_service_account && var.rds_db_connect_arn != null ? 1 : 0\n  role       = module.irsa-workload-access[0].iam_role_name\n  policy_arn = aws_iam_policy.rds_iam_connect_policy[0].arn\n\n  depends_on = [module.irsa-workload-access]\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/eks/outputs.tf",
    "content": "output \"cluster_name\" {\n  value = module.eks.cluster_name\n}\n\noutput \"cluster_endpoint\" {\n  value = module.eks.cluster_endpoint\n}\n\noutput \"cluster_certificate_authority_data\" {\n  value     = module.eks.cluster_certificate_authority_data\n  sensitive = true\n}\n\noutput \"workload_irsa_role_arn\" {\n  description = \"ARN of the IAM role for workloads (S3 + optional RDS)\"\n  value       = length(module.irsa-workload-access) > 0 ? module.irsa-workload-access[0].iam_role_arn : null\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/eks/variables.tf",
    "content": "variable \"cluster_name\" {\n  type        = string\n  description = \"The name of the cluster\"\n}\n\nvariable \"cluster_version\" {\n  type        = string\n  description = \"The EKS version of the cluster\"\n  default     = \"1.33\"\n}\n\nvariable \"vpc_id\" {\n  type        = string\n  description = \"The ID of the VPC\"\n}\n\nvariable \"subnet_ids\" {\n  type        = list(string)\n  description = \"The IDs of the subnets\"\n}\n\nvariable \"public_cluster_enabled\" {\n  type        = bool\n  description = \"Whether to enable public cluster access\"\n  default     = true\n}\n\nvariable \"private_cluster_enabled\" {\n  type        = bool\n  description = \"Whether to enable private cluster access\"\n  default     = false\n}\n\nvariable \"cluster_endpoint_public_access_cidrs\" {\n  type        = list(string)\n  description = \"List of CIDR blocks allowed to access the public EKS API endpoint\"\n  default     = []\n}\n\nvariable \"main_node_instance_types\" {\n  type        = list(string)\n  description = \"Instance types for the main node group\"\n  default     = [\"m7i.4xlarge\"]\n}\n\nvariable \"vespa_node_instance_types\" {\n  type        = list(string)\n  description = \"Instance types for the Vespa node group\"\n  default     = [\"m6i.2xlarge\"]\n}\n\nvariable \"vespa_node_subnet_ids\" {\n  type        = list(string)\n  description = \"Subnet IDs for the Vespa node group (must be in same AZ as Vespa PV). If not specified, uses all cluster subnets.\"\n  default     = []\n}\n\nvariable \"eks_managed_node_groups\" {\n  type        = map(any)\n  description = \"EKS managed node groups with EBS volume configuration\"\n  default = {\n    # Main node group for all pods except Vespa\n    main = {\n      name           = \"main-node-group\"\n      instance_types = null # Will be set from var.main_node_instance_types\n      min_size       = 1\n      max_size       = 5\n      # EBS volume configuration\n      block_device_mappings = {\n        xvda = {\n          device_name = \"/dev/xvda\"\n          ebs = {\n            volume_size           = 50\n            volume_type           = \"gp3\"\n            encrypted             = true\n            delete_on_termination = true\n            iops                  = 3000\n            throughput            = 125\n          }\n        }\n      }\n      # No taints for main node group\n      taints = []\n    }\n    # Vespa dedicated node group\n    vespa = {\n      name           = \"vespa-node-group\"\n      instance_types = null # Will be set from var.vespa_node_instance_types\n      min_size       = 1\n      max_size       = 1\n      # Larger EBS volume for Vespa storage\n      block_device_mappings = {\n        xvda = {\n          device_name = \"/dev/xvda\"\n          ebs = {\n            volume_size           = 100\n            volume_type           = \"gp3\"\n            encrypted             = true\n            delete_on_termination = true\n            iops                  = 3000\n            throughput            = 125\n          }\n        }\n      }\n      # Taint to ensure only Vespa pods can schedule here\n      taints = [\n        {\n          key    = \"vespa-dedicated\"\n          value  = \"true\"\n          effect = \"NO_SCHEDULE\"\n        }\n      ]\n    }\n  }\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Tags to apply to the resources\"\n  default     = {}\n}\n\nvariable \"create_gp3_storage_class\" {\n  type        = bool\n  description = \"Whether to create the gp3 storage class. The gp3 storage class will be patched to make it default and allow volume expansion.\"\n  default     = true\n}\n\nvariable \"s3_bucket_names\" {\n  type        = list(string)\n  description = \"List of S3 bucket names that workloads in this cluster are allowed to access via IRSA. If empty, no S3 access role/policy/service account will be created.\"\n  default     = []\n}\n\nvariable \"irsa_service_account_namespace\" {\n  type        = string\n  description = \"Namespace for IRSA-enabled Kubernetes service accounts (used by S3 and RDS)\"\n  default     = \"onyx\"\n}\n\nvariable \"irsa_service_account_name\" {\n  type        = string\n  description = \"Name of the IRSA-enabled Kubernetes service account for workload access (S3 + optional RDS)\"\n  default     = \"onyx-workload-access\"\n}\n\nvariable \"enable_rds_iam_for_service_account\" {\n  type        = bool\n  description = \"Whether to create a dedicated RDS IRSA role and service account (grants rds-db:connect)\"\n  default     = false\n}\n\nvariable \"rds_db_username\" {\n  type        = string\n  description = \"Database username to allow via rds-db:connect\"\n  default     = null\n}\n\nvariable \"rds_db_connect_arn\" {\n  type        = string\n  description = \"Full rds-db:connect ARN to allow (required when enable_rds_iam_for_service_account is true)\"\n  default     = null\n}\n\nvariable \"cluster_enabled_log_types\" {\n  type        = list(string)\n  description = \"EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)\"\n  default     = [\"api\", \"audit\", \"authenticator\", \"controllerManager\", \"scheduler\"]\n\n  validation {\n    condition     = alltrue([for t in var.cluster_enabled_log_types : contains([\"api\", \"audit\", \"authenticator\", \"controllerManager\", \"scheduler\"], t)])\n    error_message = \"Each entry must be one of: api, audit, authenticator, controllerManager, scheduler.\"\n  }\n}\n\nvariable \"cloudwatch_log_group_retention_in_days\" {\n  type        = number\n  description = \"Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)\"\n  default     = 30\n\n  validation {\n    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.cloudwatch_log_group_retention_in_days)\n    error_message = \"Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653).\"\n  }\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/onyx/main.tf",
    "content": "locals {\n  workspace       = terraform.workspace\n  name            = var.name\n  merged_tags     = merge(var.tags, { tenant = local.name, environment = local.workspace })\n  vpc_name        = \"${var.name}-vpc-${local.workspace}\"\n  cluster_name    = \"${var.name}-${local.workspace}\"\n  bucket_name     = \"${var.name}-file-store-${local.workspace}\"\n  redis_name      = \"${var.name}-redis-${local.workspace}\"\n  postgres_name   = \"${var.name}-postgres-${local.workspace}\"\n  opensearch_name = var.opensearch_domain_name != null ? var.opensearch_domain_name : \"${var.name}-opensearch-${local.workspace}\"\n\n  vpc_id          = var.create_vpc ? module.vpc[0].vpc_id : var.vpc_id\n  private_subnets = var.create_vpc ? module.vpc[0].private_subnets : var.private_subnets\n  public_subnets  = var.create_vpc ? module.vpc[0].public_subnets : var.public_subnets\n  vpc_cidr_block  = var.create_vpc ? module.vpc[0].vpc_cidr_block : var.vpc_cidr_block\n}\n\nprovider \"aws\" {\n  region = var.region\n  default_tags {\n    tags = local.merged_tags\n  }\n}\n\nmodule \"vpc\" {\n  source = \"../vpc\"\n\n  count    = var.create_vpc ? 1 : 0\n  vpc_name = local.vpc_name\n  tags     = local.merged_tags\n}\n\nmodule \"redis\" {\n  source        = \"../redis\"\n  name          = local.redis_name\n  vpc_id        = local.vpc_id\n  subnet_ids    = local.private_subnets\n  instance_type = \"cache.m6g.xlarge\"\n  ingress_cidrs = [local.vpc_cidr_block]\n  tags          = local.merged_tags\n\n  # Pass Redis authentication token as a sensitive input variable\n  auth_token = var.redis_auth_token\n}\n\nmodule \"postgres\" {\n  source        = \"../postgres\"\n  identifier    = local.postgres_name\n  vpc_id        = local.vpc_id\n  subnet_ids    = local.private_subnets\n  ingress_cidrs = [local.vpc_cidr_block]\n\n  username            = var.postgres_username\n  password            = var.postgres_password\n  tags                = local.merged_tags\n  enable_rds_iam_auth = var.enable_iam_auth\n\n  backup_retention_period = var.postgres_backup_retention_period\n  backup_window           = var.postgres_backup_window\n}\n\nmodule \"s3\" {\n  source             = \"../s3\"\n  bucket_name        = local.bucket_name\n  tags               = local.merged_tags\n  s3_vpc_endpoint_id = var.create_vpc ? module.vpc[0].s3_vpc_endpoint_id : var.s3_vpc_endpoint_id\n}\n\nmodule \"eks\" {\n  source          = \"../eks\"\n  cluster_name    = local.cluster_name\n  vpc_id          = local.vpc_id\n  subnet_ids      = concat(local.private_subnets, local.public_subnets)\n  tags            = local.merged_tags\n  s3_bucket_names = [local.bucket_name]\n\n  # Wire RDS IAM connection for the same IRSA service account used by apps\n  enable_rds_iam_for_service_account = var.enable_iam_auth\n  rds_db_username                    = var.postgres_username\n  rds_db_connect_arn                 = var.rds_db_connect_arn\n\n  # These variables must be defined in variables.tf or passed in via parent module\n  public_cluster_enabled               = var.public_cluster_enabled\n  private_cluster_enabled              = var.private_cluster_enabled\n  cluster_endpoint_public_access_cidrs = var.cluster_endpoint_public_access_cidrs\n\n  # Control plane logging\n  cluster_enabled_log_types              = var.eks_cluster_enabled_log_types\n  cloudwatch_log_group_retention_in_days = var.eks_cloudwatch_log_group_retention_in_days\n}\n\nmodule \"waf\" {\n  source = \"../waf\"\n\n  name = local.name\n  tags = local.merged_tags\n\n  # WAF configuration with sensible defaults\n  allowed_ip_cidrs                      = var.waf_allowed_ip_cidrs\n  common_rule_set_count_rules           = var.waf_common_rule_set_count_rules\n  rate_limit_requests_per_5_minutes     = var.waf_rate_limit_requests_per_5_minutes\n  api_rate_limit_requests_per_5_minutes = var.waf_api_rate_limit_requests_per_5_minutes\n  geo_restriction_countries             = var.waf_geo_restriction_countries\n  enable_logging                        = var.waf_enable_logging\n  log_retention_days                    = var.waf_log_retention_days\n}\n\nmodule \"opensearch\" {\n  source = \"../opensearch\"\n  count  = var.enable_opensearch ? 1 : 0\n\n  name   = local.opensearch_name\n  vpc_id = local.vpc_id\n  # Prefer setting subnet_ids explicitly if the state of private_subnets is\n  # unclear.\n  subnet_ids    = length(var.opensearch_subnet_ids) > 0 ? var.opensearch_subnet_ids : slice(local.private_subnets, 0, 3)\n  ingress_cidrs = [local.vpc_cidr_block]\n  tags          = local.merged_tags\n\n  # Reuse EKS security groups\n  security_group_ids = [module.eks.node_security_group_id, module.eks.cluster_security_group_id]\n\n  # Configuration\n  engine_version                = var.opensearch_engine_version\n  instance_type                 = var.opensearch_instance_type\n  instance_count                = var.opensearch_instance_count\n  dedicated_master_enabled      = var.opensearch_dedicated_master_enabled\n  dedicated_master_type         = var.opensearch_dedicated_master_type\n  multi_az_with_standby_enabled = var.opensearch_multi_az_with_standby_enabled\n  ebs_volume_size               = var.opensearch_ebs_volume_size\n  ebs_throughput                = var.opensearch_ebs_throughput\n\n  # Authentication\n  internal_user_database_enabled = var.opensearch_internal_user_database_enabled\n  master_user_name               = var.opensearch_master_user_name\n  master_user_password           = var.opensearch_master_user_password\n\n  # Logging\n  enable_logging     = var.opensearch_enable_logging\n  log_retention_days = var.opensearch_log_retention_days\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/onyx/outputs.tf",
    "content": "output \"redis_connection_url\" {\n  value     = module.redis.redis_endpoint\n  sensitive = true\n}\n\noutput \"cluster_name\" {\n  value = module.eks.cluster_name\n}\n\noutput \"postgres_endpoint\" {\n  description = \"RDS endpoint hostname\"\n  value       = module.postgres.endpoint\n}\n\noutput \"postgres_port\" {\n  description = \"RDS port\"\n  value       = module.postgres.port\n}\n\noutput \"postgres_db_name\" {\n  description = \"RDS database name\"\n  value       = module.postgres.db_name\n}\n\noutput \"postgres_username\" {\n  description = \"RDS master username\"\n  value       = module.postgres.username\n  sensitive   = true\n}\n\noutput \"postgres_dbi_resource_id\" {\n  description = \"RDS DB instance resource id\"\n  value       = module.postgres.dbi_resource_id\n}\n\noutput \"opensearch_endpoint\" {\n  description = \"OpenSearch domain endpoint\"\n  value       = var.enable_opensearch ? module.opensearch[0].domain_endpoint : null\n}\n\noutput \"opensearch_dashboard_endpoint\" {\n  description = \"OpenSearch Dashboards endpoint\"\n  value       = var.enable_opensearch ? module.opensearch[0].kibana_endpoint : null\n}\n\noutput \"opensearch_domain_arn\" {\n  description = \"OpenSearch domain ARN\"\n  value       = var.enable_opensearch ? module.opensearch[0].domain_arn : null\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/onyx/variables.tf",
    "content": "variable \"name\" {\n  type        = string\n  description = \"Name of the Onyx resources. Example: 'onyx'\"\n  default     = \"onyx\"\n}\n\nvariable \"region\" {\n  type        = string\n  description = \"AWS region for all resources\"\n  default     = \"us-west-2\"\n}\n\nvariable \"create_vpc\" {\n  type        = bool\n  description = \"Whether to create a new VPC\"\n  default     = true\n}\n\nvariable \"vpc_id\" {\n  type        = string\n  description = \"ID of the VPC. Required if create_vpc is false.\"\n  default     = null\n}\n\nvariable \"private_subnets\" {\n  type        = list(string)\n  description = \"Private subnets. Required if create_vpc is false.\"\n  default     = [] # This will default to 0.0.0.0/0 if not provided\n}\n\nvariable \"public_subnets\" {\n  type        = list(string)\n  description = \"Public subnets. Required if create_vpc is false.\"\n  default     = []\n}\n\nvariable \"vpc_cidr_block\" {\n  type        = string\n  description = \"VPC CIDR block. Required if create_vpc is false.\"\n  default     = null\n}\n\nvariable \"s3_vpc_endpoint_id\" {\n  type        = string\n  description = \"ID of an existing S3 gateway VPC endpoint when reusing an existing VPC\"\n  default     = null\n\n  validation {\n    condition     = var.create_vpc || var.s3_vpc_endpoint_id != null\n    error_message = \"s3_vpc_endpoint_id must be provided when create_vpc is false.\"\n  }\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Base tags applied to all AWS resources\"\n  default = {\n    \"project\" = \"onyx\"\n  }\n}\n\nvariable \"postgres_username\" {\n  type        = string\n  description = \"Username for the postgres database\"\n  default     = \"postgres\"\n  sensitive   = true\n}\n\nvariable \"postgres_password\" {\n  type        = string\n  description = \"Password for the postgres database\"\n  default     = null\n  sensitive   = true\n}\n\nvariable \"public_cluster_enabled\" {\n  type        = bool\n  description = \"Whether to enable public cluster access\"\n  default     = true\n}\n\nvariable \"private_cluster_enabled\" {\n  type        = bool\n  description = \"Whether to enable private cluster access\"\n  default     = false # Should be true for production, false for dev/staging\n}\n\nvariable \"cluster_endpoint_public_access_cidrs\" {\n  type        = list(string)\n  description = \"CIDR blocks allowed to access the public EKS API endpoint\"\n  default     = []\n}\n\nvariable \"redis_auth_token\" {\n  type        = string\n  description = \"Authentication token for the Redis cluster\"\n  default     = null\n  sensitive   = true\n}\n\nvariable \"enable_iam_auth\" {\n  type        = bool\n  description = \"Enable AWS IAM authentication for the RDS Postgres instance and wire IRSA policies\"\n  default     = false\n}\n\nvariable \"rds_db_connect_arn\" {\n  type        = string\n  description = \"Full rds-db:connect ARN to pass to the EKS module. Required when enable_rds_iam_auth is true.\"\n  default     = null\n}\n\n# WAF Configuration Variables\nvariable \"waf_rate_limit_requests_per_5_minutes\" {\n  type        = number\n  description = \"Rate limit for requests per 5 minutes per IP address\"\n  default     = 2000\n}\n\nvariable \"waf_allowed_ip_cidrs\" {\n  type        = list(string)\n  description = \"Optional IPv4 CIDR ranges allowed through the WAF. Leave empty to disable IP allowlisting.\"\n  default     = []\n}\n\nvariable \"waf_common_rule_set_count_rules\" {\n  type        = list(string)\n  description = \"Subrules within AWSManagedRulesCommonRuleSet to override to COUNT instead of BLOCK.\"\n  default     = []\n}\n\nvariable \"waf_api_rate_limit_requests_per_5_minutes\" {\n  type        = number\n  description = \"Rate limit for API requests per 5 minutes per IP address\"\n  default     = 1000\n}\n\nvariable \"waf_geo_restriction_countries\" {\n  type        = list(string)\n  description = \"List of country codes to block. Leave empty to disable geo restrictions\"\n  default     = []\n}\n\nvariable \"waf_enable_logging\" {\n  type        = bool\n  description = \"Enable WAF logging to CloudWatch\"\n  default     = true\n}\n\nvariable \"waf_log_retention_days\" {\n  type        = number\n  description = \"Number of days to retain WAF logs\"\n  default     = 90\n}\n\n# OpenSearch Configuration Variables\nvariable \"enable_opensearch\" {\n  type        = bool\n  description = \"Whether to create an OpenSearch domain\"\n  default     = false\n}\n\nvariable \"opensearch_engine_version\" {\n  type        = string\n  description = \"OpenSearch engine version\"\n  default     = \"3.3\"\n}\n\nvariable \"opensearch_instance_type\" {\n  type        = string\n  description = \"Instance type for OpenSearch data nodes\"\n  default     = \"r8g.large.search\"\n}\n\nvariable \"opensearch_instance_count\" {\n  type        = number\n  description = \"Number of OpenSearch data nodes\"\n  default     = 3\n}\n\nvariable \"opensearch_dedicated_master_enabled\" {\n  type        = bool\n  description = \"Whether to enable dedicated master nodes for OpenSearch\"\n  default     = true\n}\n\nvariable \"opensearch_dedicated_master_type\" {\n  type        = string\n  description = \"Instance type for dedicated master nodes\"\n  default     = \"m7g.large.search\"\n}\n\nvariable \"opensearch_multi_az_with_standby_enabled\" {\n  type        = bool\n  description = \"Whether to enable Multi-AZ with Standby deployment\"\n  default     = true\n}\n\nvariable \"opensearch_ebs_volume_size\" {\n  type        = number\n  description = \"EBS volume size in GiB per OpenSearch node\"\n  default     = 512\n}\n\nvariable \"opensearch_ebs_throughput\" {\n  type        = number\n  description = \"Throughput in MiB/s for gp3 volumes\"\n  default     = 256\n}\n\nvariable \"opensearch_internal_user_database_enabled\" {\n  type        = bool\n  description = \"Whether to enable the internal user database for fine-grained access control\"\n  default     = true\n}\n\nvariable \"opensearch_master_user_name\" {\n  type        = string\n  description = \"Master user name for OpenSearch internal user database\"\n  default     = null\n  sensitive   = true\n}\n\nvariable \"opensearch_master_user_password\" {\n  type        = string\n  description = \"Master user password for OpenSearch internal user database\"\n  default     = null\n  sensitive   = true\n}\n\nvariable \"opensearch_domain_name\" {\n  type        = string\n  description = \"Override the OpenSearch domain name. If null, defaults to {name}-opensearch-{workspace}.\"\n  default     = null\n}\n\nvariable \"opensearch_enable_logging\" {\n  type    = bool\n  default = false\n}\n\nvariable \"opensearch_log_retention_days\" {\n  type        = number\n  description = \"Number of days to retain OpenSearch CloudWatch logs (0 = never expire)\"\n  default     = 0\n}\n\nvariable \"opensearch_subnet_ids\" {\n  type        = list(string)\n  description = \"Subnet IDs for OpenSearch. If empty, uses first 3 private subnets.\"\n  default     = []\n}\n\n# RDS Backup Configuration\nvariable \"postgres_backup_retention_period\" {\n  type        = number\n  description = \"Number of days to retain automated RDS backups (0 to disable)\"\n  default     = 7\n}\n\nvariable \"postgres_backup_window\" {\n  type        = string\n  description = \"Preferred UTC time window for automated RDS backups (hh24:mi-hh24:mi)\"\n  default     = \"03:00-04:00\"\n}\n\n# EKS Control Plane Logging\nvariable \"eks_cluster_enabled_log_types\" {\n  type        = list(string)\n  description = \"EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)\"\n  default     = [\"api\", \"audit\", \"authenticator\", \"controllerManager\", \"scheduler\"]\n}\n\nvariable \"eks_cloudwatch_log_group_retention_in_days\" {\n  type        = number\n  description = \"Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)\"\n  default     = 30\n\n  validation {\n    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.eks_cloudwatch_log_group_retention_in_days)\n    error_message = \"Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653).\"\n  }\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/onyx/versions.tf",
    "content": "terraform {\n  required_version = \">= 1.12.0\"\n\n  required_providers {\n    aws = {\n      source  = \"hashicorp/aws\"\n      version = \"~> 5.100\"\n    }\n    helm = {\n      source  = \"hashicorp/helm\"\n      version = \"~> 2.16\"\n    }\n    kubernetes = {\n      source  = \"hashicorp/kubernetes\"\n      version = \"~> 2.37\"\n    }\n  }\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/opensearch/main.tf",
    "content": "# OpenSearch domain security group\nresource \"aws_security_group\" \"opensearch_sg\" {\n  count       = length(var.security_group_ids) > 0 ? 0 : 1\n  name        = \"${var.name}-sg\"\n  description = \"Allow inbound traffic to OpenSearch from VPC\"\n  vpc_id      = var.vpc_id\n  tags        = var.tags\n\n  ingress {\n    from_port   = 443\n    to_port     = 443\n    protocol    = \"tcp\"\n    cidr_blocks = var.ingress_cidrs\n  }\n\n  egress {\n    from_port   = 0\n    to_port     = 0\n    protocol    = \"-1\"\n    cidr_blocks = [\"0.0.0.0/0\"]\n  }\n}\n\n# Service-linked role for OpenSearch (required for VPC deployment)\n# This may already exist in your account - if so, import it or set create_service_linked_role = false\nresource \"aws_iam_service_linked_role\" \"opensearch\" {\n  count            = var.create_service_linked_role ? 1 : 0\n  aws_service_name = \"opensearchservice.amazonaws.com\"\n}\n\n# IAM policy for OpenSearch access\ndata \"aws_caller_identity\" \"current\" {}\ndata \"aws_region\" \"current\" {}\n\n# KMS key lookup for encryption at rest\ndata \"aws_kms_key\" \"opensearch\" {\n  key_id = \"alias/aws/es\"\n}\n\n# Access policy - allows all principals within the VPC (secured by VPC + security groups)\ndata \"aws_iam_policy_document\" \"opensearch_access\" {\n  statement {\n    effect = \"Allow\"\n\n    principals {\n      type        = \"AWS\"\n      identifiers = [\"*\"]\n    }\n\n    actions = [\"es:*\"]\n\n    resources = [\n      \"arn:aws:es:${data.aws_region.current.id}:${data.aws_caller_identity.current.account_id}:domain/${var.name}/*\"\n    ]\n  }\n}\n\n# OpenSearch domain\nresource \"aws_opensearch_domain\" \"main\" {\n  domain_name    = var.name\n  engine_version = \"OpenSearch_${var.engine_version}\"\n\n  cluster_config {\n    instance_type                 = var.instance_type\n    instance_count                = var.instance_count\n    zone_awareness_enabled        = var.zone_awareness_enabled\n    dedicated_master_enabled      = var.dedicated_master_enabled\n    dedicated_master_type         = var.dedicated_master_enabled ? var.dedicated_master_type : null\n    dedicated_master_count        = var.dedicated_master_enabled ? var.dedicated_master_count : null\n    multi_az_with_standby_enabled = var.multi_az_with_standby_enabled\n    warm_enabled                  = var.warm_enabled\n    warm_type                     = var.warm_enabled ? var.warm_type : null\n    warm_count                    = var.warm_enabled ? var.warm_count : null\n\n    dynamic \"zone_awareness_config\" {\n      for_each = var.zone_awareness_enabled ? [1] : []\n      content {\n        availability_zone_count = var.availability_zone_count\n      }\n    }\n\n    dynamic \"cold_storage_options\" {\n      for_each = var.cold_storage_enabled ? [1] : []\n      content {\n        enabled = true\n      }\n    }\n  }\n\n  ebs_options {\n    ebs_enabled = true\n    volume_type = var.ebs_volume_type\n    volume_size = var.ebs_volume_size\n    iops        = var.ebs_volume_type == \"gp3\" || var.ebs_volume_type == \"io1\" ? var.ebs_iops : null\n    throughput  = var.ebs_volume_type == \"gp3\" ? var.ebs_throughput : null\n  }\n\n  vpc_options {\n    subnet_ids         = var.subnet_ids\n    security_group_ids = length(var.security_group_ids) > 0 ? var.security_group_ids : [aws_security_group.opensearch_sg[0].id]\n  }\n\n  encrypt_at_rest {\n    enabled    = true\n    kms_key_id = var.kms_key_id != null ? var.kms_key_id : data.aws_kms_key.opensearch.arn\n  }\n\n  node_to_node_encryption {\n    enabled = true\n  }\n\n  domain_endpoint_options {\n    enforce_https       = true\n    tls_security_policy = var.tls_security_policy\n  }\n\n  advanced_security_options {\n    enabled                        = true\n    anonymous_auth_enabled         = false\n    internal_user_database_enabled = var.internal_user_database_enabled\n\n    dynamic \"master_user_options\" {\n      for_each = var.internal_user_database_enabled ? [1] : []\n      content {\n        master_user_name     = var.master_user_name\n        master_user_password = var.master_user_password\n      }\n    }\n\n    dynamic \"master_user_options\" {\n      for_each = var.internal_user_database_enabled ? [] : [1]\n      content {\n        master_user_arn = var.master_user_arn\n      }\n    }\n  }\n\n  advanced_options = var.advanced_options\n\n  access_policies = data.aws_iam_policy_document.opensearch_access.json\n\n  auto_tune_options {\n    desired_state       = var.auto_tune_enabled ? \"ENABLED\" : \"DISABLED\"\n    rollback_on_disable = var.auto_tune_rollback_on_disable\n  }\n\n  off_peak_window_options {\n    enabled = var.off_peak_window_enabled\n\n    dynamic \"off_peak_window\" {\n      for_each = var.off_peak_window_enabled ? [1] : []\n      content {\n        window_start_time {\n          hours   = var.off_peak_window_start_hours\n          minutes = var.off_peak_window_start_minutes\n        }\n      }\n    }\n  }\n\n  software_update_options {\n    auto_software_update_enabled = var.auto_software_update_enabled\n  }\n\n  dynamic \"log_publishing_options\" {\n    for_each = var.enable_logging ? [\"INDEX_SLOW_LOGS\", \"SEARCH_SLOW_LOGS\", \"ES_APPLICATION_LOGS\"] : []\n    content {\n      cloudwatch_log_group_arn = \"arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.log_group_name}\"\n      log_type                 = log_publishing_options.value\n    }\n  }\n\n  tags = var.tags\n\n  depends_on = [\n    aws_iam_service_linked_role.opensearch,\n    aws_cloudwatch_log_resource_policy.opensearch\n  ]\n\n  lifecycle {\n    precondition {\n      condition     = !var.internal_user_database_enabled || var.master_user_name != null\n      error_message = \"master_user_name is required when internal_user_database_enabled is true.\"\n    }\n    precondition {\n      condition     = !var.internal_user_database_enabled || var.master_user_password != null\n      error_message = \"master_user_password is required when internal_user_database_enabled is true.\"\n    }\n  }\n}\n\n# CloudWatch log group for OpenSearch\nlocals {\n  log_group_name = var.log_group_name != null ? var.log_group_name : \"/aws/OpenSearchService/domains/${var.name}/search-logs\"\n}\n\nresource \"aws_cloudwatch_log_group\" \"opensearch\" {\n  count             = var.enable_logging ? 1 : 0\n  name              = local.log_group_name\n  retention_in_days = var.log_retention_days\n  tags              = var.tags\n}\n\n# CloudWatch log resource policy for OpenSearch\ndata \"aws_iam_policy_document\" \"opensearch_log_policy\" {\n  count = var.enable_logging ? 1 : 0\n\n  statement {\n    effect = \"Allow\"\n\n    principals {\n      type        = \"Service\"\n      identifiers = [\"es.amazonaws.com\"]\n    }\n\n    actions = [\n      \"logs:PutLogEvents\",\n      \"logs:CreateLogStream\",\n    ]\n\n    resources = [\"arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.log_group_name}:*\"]\n  }\n}\n\nresource \"aws_cloudwatch_log_resource_policy\" \"opensearch\" {\n  count           = var.enable_logging ? 1 : 0\n  policy_name     = \"OpenSearchService-${var.name}-Search-logs\"\n  policy_document = data.aws_iam_policy_document.opensearch_log_policy[0].json\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/opensearch/outputs.tf",
    "content": "output \"domain_endpoint\" {\n  description = \"The endpoint of the OpenSearch domain\"\n  value       = aws_opensearch_domain.main.endpoint\n}\n\noutput \"domain_arn\" {\n  description = \"The ARN of the OpenSearch domain\"\n  value       = aws_opensearch_domain.main.arn\n}\n\noutput \"domain_id\" {\n  description = \"The unique identifier for the OpenSearch domain\"\n  value       = aws_opensearch_domain.main.domain_id\n}\n\noutput \"domain_name\" {\n  description = \"The name of the OpenSearch domain\"\n  value       = aws_opensearch_domain.main.domain_name\n}\n\noutput \"kibana_endpoint\" {\n  description = \"The OpenSearch Dashboards endpoint\"\n  value       = aws_opensearch_domain.main.dashboard_endpoint\n}\n\noutput \"security_group_id\" {\n  description = \"The ID of the OpenSearch security group\"\n  value       = length(aws_security_group.opensearch_sg) > 0 ? aws_security_group.opensearch_sg[0].id : null\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/opensearch/variables.tf",
    "content": "variable \"name\" {\n  description = \"Name of the OpenSearch domain\"\n  type        = string\n}\n\nvariable \"vpc_id\" {\n  description = \"ID of the VPC to deploy the OpenSearch domain into\"\n  type        = string\n}\n\nvariable \"subnet_ids\" {\n  description = \"List of subnet IDs for the OpenSearch domain\"\n  type        = list(string)\n}\n\nvariable \"ingress_cidrs\" {\n  description = \"CIDR blocks allowed to access OpenSearch\"\n  type        = list(string)\n}\n\nvariable \"engine_version\" {\n  description = \"OpenSearch engine version (e.g., 2.17, 3.3)\"\n  type        = string\n  default     = \"3.3\"\n}\n\nvariable \"instance_type\" {\n  description = \"Instance type for data nodes\"\n  type        = string\n  default     = \"r8g.large.search\"\n}\n\nvariable \"instance_count\" {\n  description = \"Number of data nodes\"\n  type        = number\n  default     = 3\n}\n\nvariable \"zone_awareness_enabled\" {\n  description = \"Whether to enable zone awareness for the cluster\"\n  type        = bool\n  default     = true\n}\n\nvariable \"availability_zone_count\" {\n  description = \"Number of availability zones (2 or 3)\"\n  type        = number\n  default     = 3\n}\n\nvariable \"dedicated_master_enabled\" {\n  description = \"Whether to enable dedicated master nodes\"\n  type        = bool\n  default     = true\n}\n\nvariable \"dedicated_master_type\" {\n  description = \"Instance type for dedicated master nodes\"\n  type        = string\n  default     = \"m7g.large.search\"\n}\n\nvariable \"dedicated_master_count\" {\n  description = \"Number of dedicated master nodes (must be 3 or 5)\"\n  type        = number\n  default     = 3\n}\n\nvariable \"multi_az_with_standby_enabled\" {\n  description = \"Whether to enable Multi-AZ with Standby deployment\"\n  type        = bool\n  default     = true\n}\n\nvariable \"warm_enabled\" {\n  description = \"Whether to enable warm storage\"\n  type        = bool\n  default     = false\n}\n\nvariable \"warm_type\" {\n  description = \"Instance type for warm nodes\"\n  type        = string\n  default     = \"ultrawarm1.medium.search\"\n}\n\nvariable \"warm_count\" {\n  description = \"Number of warm nodes\"\n  type        = number\n  default     = 2\n}\n\nvariable \"cold_storage_enabled\" {\n  description = \"Whether to enable cold storage\"\n  type        = bool\n  default     = false\n}\n\nvariable \"ebs_volume_type\" {\n  description = \"EBS volume type (gp3, gp2, io1)\"\n  type        = string\n  default     = \"gp3\"\n}\n\nvariable \"ebs_volume_size\" {\n  description = \"EBS volume size in GB per node\"\n  type        = number\n  default     = 512\n}\n\nvariable \"ebs_iops\" {\n  description = \"IOPS for gp3/io1 volumes\"\n  type        = number\n  default     = 3000\n}\n\nvariable \"ebs_throughput\" {\n  description = \"Throughput in MiB/s for gp3 volumes\"\n  type        = number\n  default     = 256\n}\n\nvariable \"kms_key_id\" {\n  description = \"KMS key ID for encryption at rest (uses AWS managed key if not specified)\"\n  type        = string\n  default     = null\n}\n\nvariable \"tls_security_policy\" {\n  description = \"TLS security policy for HTTPS endpoints\"\n  type        = string\n  default     = \"Policy-Min-TLS-1-2-2019-07\"\n}\n\nvariable \"internal_user_database_enabled\" {\n  description = \"Whether to enable the internal user database for fine-grained access control\"\n  type        = bool\n  default     = true\n}\n\nvariable \"master_user_name\" {\n  description = \"Master user name for internal user database\"\n  type        = string\n  default     = null\n  sensitive   = true\n}\n\nvariable \"master_user_password\" {\n  description = \"Master user password for internal user database\"\n  type        = string\n  default     = null\n  sensitive   = true\n}\n\nvariable \"master_user_arn\" {\n  description = \"IAM ARN for the master user (used when internal_user_database_enabled is false)\"\n  type        = string\n  default     = null\n}\n\nvariable \"advanced_options\" {\n  description = \"Advanced options for OpenSearch\"\n  type        = map(string)\n  default = {\n    \"indices.fielddata.cache.size\"           = \"20\"\n    \"indices.query.bool.max_clause_count\"    = \"1024\"\n    \"override_main_response_version\"         = \"false\"\n    \"rest.action.multi.allow_explicit_index\" = \"true\"\n  }\n}\n\nvariable \"auto_tune_enabled\" {\n  description = \"Whether to enable Auto-Tune\"\n  type        = bool\n  default     = true\n}\n\nvariable \"auto_tune_rollback_on_disable\" {\n  description = \"Whether to roll back Auto-Tune changes when disabled\"\n  type        = string\n  default     = \"NO_ROLLBACK\"\n}\n\nvariable \"off_peak_window_enabled\" {\n  description = \"Whether to enable off-peak window for maintenance\"\n  type        = bool\n  default     = true\n}\n\nvariable \"off_peak_window_start_hours\" {\n  description = \"Hour (UTC) when off-peak window starts (0-23)\"\n  type        = number\n  default     = 6\n}\n\nvariable \"off_peak_window_start_minutes\" {\n  description = \"Minutes when off-peak window starts (0-59)\"\n  type        = number\n  default     = 0\n}\n\nvariable \"auto_software_update_enabled\" {\n  description = \"Whether to enable automatic software updates\"\n  type        = bool\n  default     = false\n}\n\nvariable \"enable_logging\" {\n  description = \"Whether to enable CloudWatch logging\"\n  type        = bool\n  default     = false\n}\n\nvariable \"create_service_linked_role\" {\n  description = \"Whether to create the OpenSearch service-linked role (set to false if it already exists)\"\n  type        = bool\n  default     = false\n}\n\nvariable \"log_retention_days\" {\n  description = \"Number of days to retain CloudWatch logs\"\n  type        = number\n  default     = 30\n}\n\nvariable \"security_group_ids\" {\n  description = \"Existing security group IDs to attach. If empty, a new SG is created.\"\n  type        = list(string)\n  default     = []\n}\n\nvariable \"log_group_name\" {\n  description = \"CloudWatch log group name. Defaults to AWS console convention.\"\n  type        = string\n  default     = null\n}\n\nvariable \"tags\" {\n  description = \"Tags to apply to OpenSearch resources\"\n  type        = map(string)\n  default     = {}\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/postgres/main.tf",
    "content": "resource \"aws_db_subnet_group\" \"this\" {\n  name       = \"${var.identifier}-subnet-group\"\n  subnet_ids = var.subnet_ids\n  tags       = var.tags\n}\n\nresource \"aws_security_group\" \"this\" {\n  name        = \"${var.identifier}-sg\"\n  description = \"Allow PostgreSQL access\"\n  vpc_id      = var.vpc_id\n  tags        = var.tags\n\n  ingress {\n    description = \"Postgres ingress\"\n    from_port   = 5432\n    to_port     = 5432\n    protocol    = \"tcp\"\n    cidr_blocks = var.ingress_cidrs\n  }\n\n  egress {\n    from_port   = 0\n    to_port     = 0\n    protocol    = \"-1\"\n    cidr_blocks = [\"0.0.0.0/0\"]\n  }\n}\n\nresource \"aws_db_instance\" \"this\" {\n  identifier        = var.identifier\n  db_name           = var.db_name\n  engine            = \"postgres\"\n  engine_version    = var.engine_version\n  instance_class    = var.instance_type\n  allocated_storage = var.storage_gb\n  username          = var.username\n  password          = var.password\n\n  # Enable IAM authentication for the RDS instance\n  iam_database_authentication_enabled = var.enable_rds_iam_auth\n\n  db_subnet_group_name   = aws_db_subnet_group.this.name\n  vpc_security_group_ids = [aws_security_group.this.id]\n  publicly_accessible    = false\n  deletion_protection    = true\n  storage_encrypted      = true\n\n  # Automated backups\n  backup_retention_period = var.backup_retention_period\n  backup_window           = var.backup_window\n\n  tags = var.tags\n}\n\n# CloudWatch alarm for CPU utilization monitoring\nresource \"aws_cloudwatch_metric_alarm\" \"cpu_utilization\" {\n  alarm_name          = \"${var.identifier}-cpu-utilization\"\n  alarm_description   = \"RDS CPU utilization for ${var.identifier}\"\n  comparison_operator = \"GreaterThanThreshold\"\n  evaluation_periods  = var.cpu_alarm_evaluation_periods\n  metric_name         = \"CPUUtilization\"\n  namespace           = \"AWS/RDS\"\n  period              = var.cpu_alarm_period\n  statistic           = \"Average\"\n  threshold           = var.cpu_alarm_threshold\n  treat_missing_data  = \"missing\"\n\n  alarm_actions = var.alarm_actions\n  ok_actions    = var.alarm_actions\n\n  dimensions = {\n    DBInstanceIdentifier = aws_db_instance.this.identifier\n  }\n\n  tags = var.tags\n}\n\n# CloudWatch alarm for disk IO monitoring\nresource \"aws_cloudwatch_metric_alarm\" \"read_iops\" {\n  alarm_name          = \"${var.identifier}-read-iops\"\n  alarm_description   = \"RDS ReadIOPS for ${var.identifier}\"\n  comparison_operator = \"GreaterThanThreshold\"\n  evaluation_periods  = var.iops_alarm_evaluation_periods\n  metric_name         = \"ReadIOPS\"\n  namespace           = \"AWS/RDS\"\n  period              = var.iops_alarm_period\n  statistic           = \"Average\"\n  threshold           = var.read_iops_alarm_threshold\n  treat_missing_data  = \"missing\"\n\n  alarm_actions = var.alarm_actions\n  ok_actions    = var.alarm_actions\n\n  dimensions = {\n    DBInstanceIdentifier = aws_db_instance.this.identifier\n  }\n\n  tags = var.tags\n}\n\n# CloudWatch alarm for freeable memory monitoring\nresource \"aws_cloudwatch_metric_alarm\" \"freeable_memory\" {\n  alarm_name          = \"${var.identifier}-freeable-memory\"\n  alarm_description   = \"RDS freeable memory for ${var.identifier}\"\n  comparison_operator = \"LessThanThreshold\"\n  evaluation_periods  = var.memory_alarm_evaluation_periods\n  metric_name         = \"FreeableMemory\"\n  namespace           = \"AWS/RDS\"\n  period              = var.memory_alarm_period\n  statistic           = \"Average\"\n  threshold           = var.memory_alarm_threshold\n  treat_missing_data  = \"missing\"\n\n  alarm_actions = var.alarm_actions\n  ok_actions    = var.alarm_actions\n\n  dimensions = {\n    DBInstanceIdentifier = aws_db_instance.this.identifier\n  }\n\n  tags = var.tags\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/postgres/outputs.tf",
    "content": "output \"endpoint\" {\n  description = \"RDS endpoint hostname\"\n  value       = aws_db_instance.this.endpoint\n}\n\noutput \"port\" {\n  description = \"RDS port\"\n  value       = aws_db_instance.this.port\n}\n\noutput \"db_name\" {\n  description = \"Database name\"\n  value       = aws_db_instance.this.db_name\n}\n\noutput \"username\" {\n  description = \"Master username\"\n  value       = aws_db_instance.this.username\n  sensitive   = true\n}\n\noutput \"dbi_resource_id\" {\n  description = \"DB instance resource ID used for IAM auth resource ARNs\"\n  value       = aws_db_instance.this.resource_id\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/postgres/variables.tf",
    "content": "variable \"identifier\" {\n  type        = string\n  description = \"Identifier for the database and related resources\"\n}\n\nvariable \"db_name\" {\n  type        = string\n  description = \"Name of the database\"\n  default     = \"postgres\"\n}\n\nvariable \"instance_type\" {\n  type        = string\n  description = \"Instance type\"\n  default     = \"db.t4g.large\" # 2 vCPU and 8 GB of memory\n}\n\nvariable \"storage_gb\" {\n  type        = number\n  description = \"Storage size in GB\"\n  default     = 20\n}\n\nvariable \"engine_version\" {\n  type        = string\n  description = \"Engine version\"\n  default     = \"17\"\n}\n\nvariable \"vpc_id\" {\n  type        = string\n  description = \"VPC ID\"\n}\n\nvariable \"subnet_ids\" {\n  type        = list(string)\n  description = \"Subnet IDs\"\n}\n\nvariable \"ingress_cidrs\" {\n  type        = list(string)\n  description = \"Ingress CIDR blocks\"\n}\n\nvariable \"username\" {\n  type        = string\n  description = \"Username for the database\"\n  default     = \"postgres\"\n  sensitive   = true\n}\n\nvariable \"password\" {\n  type        = string\n  description = \"Password for the database\"\n  default     = null\n  sensitive   = true\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Tags to apply to RDS resources\"\n  default     = {}\n}\n\nvariable \"enable_rds_iam_auth\" {\n  type        = bool\n  description = \"Enable AWS IAM database authentication for this RDS instance\"\n  default     = false\n}\n\nvariable \"backup_retention_period\" {\n  type        = number\n  description = \"Number of days to retain automated backups (0 to disable)\"\n  default     = 7\n\n  validation {\n    condition     = var.backup_retention_period >= 0 && var.backup_retention_period <= 35\n    error_message = \"backup_retention_period must be between 0 and 35 (AWS RDS limit).\"\n  }\n}\n\nvariable \"backup_window\" {\n  type        = string\n  description = \"Preferred UTC time window for automated backups (hh24:mi-hh24:mi)\"\n  default     = \"03:00-04:00\"\n\n  validation {\n    condition     = can(regex(\"^([01]\\\\d|2[0-3]):[0-5]\\\\d-([01]\\\\d|2[0-3]):[0-5]\\\\d$\", var.backup_window))\n    error_message = \"backup_window must be in hh24:mi-hh24:mi format (e.g. \\\"03:00-04:00\\\").\"\n  }\n}\n\n# CloudWatch CPU alarm configuration\nvariable \"cpu_alarm_threshold\" {\n  type        = number\n  description = \"CPU utilization percentage threshold for the CloudWatch alarm\"\n  default     = 80\n\n  validation {\n    condition     = var.cpu_alarm_threshold >= 0 && var.cpu_alarm_threshold <= 100\n    error_message = \"cpu_alarm_threshold must be between 0 and 100 (percentage).\"\n  }\n}\n\nvariable \"cpu_alarm_evaluation_periods\" {\n  type        = number\n  description = \"Number of consecutive periods the threshold must be breached before alarming\"\n  default     = 3\n\n  validation {\n    condition     = var.cpu_alarm_evaluation_periods >= 1\n    error_message = \"cpu_alarm_evaluation_periods must be at least 1.\"\n  }\n}\n\nvariable \"cpu_alarm_period\" {\n  type        = number\n  description = \"Period in seconds over which the CPU metric is evaluated\"\n  default     = 300\n\n  validation {\n    condition     = var.cpu_alarm_period >= 60 && var.cpu_alarm_period % 60 == 0\n    error_message = \"cpu_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement).\"\n  }\n}\n\nvariable \"memory_alarm_threshold\" {\n  type        = number\n  description = \"Freeable memory threshold in bytes. Alarm fires when memory drops below this value.\"\n  default     = 256000000 # 256 MB\n\n  validation {\n    condition     = var.memory_alarm_threshold > 0\n    error_message = \"memory_alarm_threshold must be greater than 0.\"\n  }\n}\n\nvariable \"memory_alarm_evaluation_periods\" {\n  type        = number\n  description = \"Number of consecutive periods the threshold must be breached before alarming\"\n  default     = 3\n\n  validation {\n    condition     = var.memory_alarm_evaluation_periods >= 1\n    error_message = \"memory_alarm_evaluation_periods must be at least 1.\"\n  }\n}\n\nvariable \"memory_alarm_period\" {\n  type        = number\n  description = \"Period in seconds over which the freeable memory metric is evaluated\"\n  default     = 300\n\n  validation {\n    condition     = var.memory_alarm_period >= 60 && var.memory_alarm_period % 60 == 0\n    error_message = \"memory_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement).\"\n  }\n}\n\nvariable \"read_iops_alarm_threshold\" {\n  type        = number\n  description = \"ReadIOPS threshold. Alarm fires when IOPS exceeds this value.\"\n  default     = 3000\n\n  validation {\n    condition     = var.read_iops_alarm_threshold > 0\n    error_message = \"read_iops_alarm_threshold must be greater than 0.\"\n  }\n}\n\nvariable \"iops_alarm_evaluation_periods\" {\n  type        = number\n  description = \"Number of consecutive periods the IOPS threshold must be breached before alarming\"\n  default     = 3\n\n  validation {\n    condition     = var.iops_alarm_evaluation_periods >= 1\n    error_message = \"iops_alarm_evaluation_periods must be at least 1.\"\n  }\n}\n\nvariable \"iops_alarm_period\" {\n  type        = number\n  description = \"Period in seconds over which the IOPS metric is evaluated\"\n  default     = 300\n\n  validation {\n    condition     = var.iops_alarm_period >= 60 && var.iops_alarm_period % 60 == 0\n    error_message = \"iops_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement).\"\n  }\n}\n\nvariable \"alarm_actions\" {\n  type        = list(string)\n  description = \"List of ARNs to notify when the alarm transitions state (e.g. SNS topic ARNs)\"\n  default     = []\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/redis/main.tf",
    "content": "# Define the Redis security group\nresource \"aws_security_group\" \"redis_sg\" {\n  name        = \"${var.name}-sg\"\n  description = \"Allow inbound traffic from EKS to Redis\"\n  vpc_id      = var.vpc_id\n  tags        = var.tags\n\n  # Standard Redis port\n  ingress {\n    from_port   = 6379\n    to_port     = 6379\n    protocol    = \"tcp\"\n    cidr_blocks = var.ingress_cidrs\n  }\n\n  egress {\n    from_port   = 0\n    to_port     = 0\n    protocol    = \"-1\"\n    cidr_blocks = [\"0.0.0.0/0\"]\n  }\n}\n\nresource \"aws_elasticache_subnet_group\" \"elasticache_subnet_group\" {\n  name       = \"${var.name}-subnet-group\"\n  subnet_ids = var.subnet_ids\n  tags       = var.tags\n}\n\n# The actual Redis instance\nresource \"aws_elasticache_replication_group\" \"redis\" {\n  replication_group_id = var.name\n  description          = \"Redis cluster for ${var.name}\"\n  engine               = \"redis\"\n  node_type            = var.instance_type\n  num_cache_clusters   = 1\n  parameter_group_name = \"default.redis7\"\n  engine_version       = \"7.0\"\n  port                 = 6379\n  security_group_ids   = [aws_security_group.redis_sg.id]\n  subnet_group_name    = aws_elasticache_subnet_group.elasticache_subnet_group.name\n\n  # Enable transit encryption (SSL/TLS)\n  transit_encryption_enabled = var.transit_encryption_enabled\n\n  # Enable encryption at rest\n  at_rest_encryption_enabled = true\n\n  # Enable authentication if auth_token is provided\n  # If transit_encryption_enabled is true, AWS requires an auth_token to be set.\n  auth_token = var.auth_token\n  tags       = var.tags\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/redis/outputs.tf",
    "content": "output \"redis_endpoint\" {\n  description = \"The endpoint of the Redis cluster\"\n  value       = aws_elasticache_replication_group.redis.primary_endpoint_address\n}\n\noutput \"redis_port\" {\n  description = \"The port of the Redis cluster\"\n  value       = aws_elasticache_replication_group.redis.port\n}\n\noutput \"redis_ssl_enabled\" {\n  description = \"Whether SSL/TLS is enabled for Redis\"\n  value       = var.transit_encryption_enabled\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/redis/variables.tf",
    "content": "variable \"name\" {\n  description = \"The name of the redis instance\"\n  type        = string\n}\n\nvariable \"vpc_id\" {\n  description = \"The ID of the vpc to deploy the redis instance into\"\n  type        = string\n}\n\nvariable \"subnet_ids\" {\n  description = \"The subnets of the vpc to deploy into\"\n  type        = list(string)\n}\n\nvariable \"ingress_cidrs\" {\n  description = \"CIDR block to allow ingress from\"\n  type        = list(string)\n}\n\nvariable \"instance_type\" {\n  description = \"The instance type of the redis instance\"\n  type        = string\n  default     = \"cache.m5.large\" # 2 vCPU and 6 GB of memory\n}\n\nvariable \"transit_encryption_enabled\" {\n  description = \"Enable transit encryption (SSL/TLS) for Redis\"\n  type        = bool\n  default     = true\n}\n\nvariable \"auth_token\" {\n  description = \"The password used to access a password protected server\"\n  type        = string\n  default     = null\n  sensitive   = true\n}\n\nvariable \"tags\" {\n  description = \"Tags to apply to ElastiCache resources\"\n  type        = map(string)\n  default     = {}\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/s3/main.tf",
    "content": "resource \"aws_s3_bucket\" \"bucket\" {\n  bucket = var.bucket_name\n  tags   = var.tags\n}\n\nresource \"aws_s3_bucket_policy\" \"bucket_policy\" {\n  bucket = aws_s3_bucket.bucket.id\n\n  policy = jsonencode({\n    Version = \"2012-10-17\",\n    Statement = [\n      {\n        Sid       = \"AllowAccessViaVPCE\",\n        Effect    = \"Allow\",\n        Principal = \"*\", # Update this to be the specific IAM roles, users, or service principals as needed\n        Action = [\n          \"s3:GetObject\",\n          \"s3:ListBucket\"\n        ],\n        Resource = [\n          aws_s3_bucket.bucket.arn,\n          \"${aws_s3_bucket.bucket.arn}/*\"\n        ],\n        Condition = {\n          StringEquals = {\n            \"aws:SourceVpce\" = var.s3_vpc_endpoint_id\n          }\n        }\n      }\n    ]\n  })\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/s3/variables.tf",
    "content": "variable \"bucket_name\" {\n  type        = string\n  description = \"Name of the S3 bucket\"\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Tags to apply to S3 resources\"\n  default     = {}\n}\n\nvariable \"s3_vpc_endpoint_id\" {\n  type        = string\n  description = \"ID of the S3 gateway VPC endpoint allowed to access this bucket\"\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/vpc/main.tf",
    "content": "# Get the availability zones for the region without requiring opt-in\ndata \"aws_availability_zones\" \"available\" {\n  filter {\n    name   = \"opt-in-status\"\n    values = [\"opt-in-not-required\"]\n  }\n}\n\ndata \"aws_region\" \"current\" {}\n\nmodule \"vpc\" {\n  source  = \"terraform-aws-modules/vpc/aws\"\n  version = \"5.0.0\"\n\n  name = var.vpc_name\n\n  cidr = var.cidr_block\n  azs  = slice(data.aws_availability_zones.available.names, 0, 3)\n\n  private_subnets         = var.private_subnets\n  public_subnets          = var.public_subnets\n  map_public_ip_on_launch = true\n\n  enable_nat_gateway   = true\n  single_nat_gateway   = false\n  enable_dns_hostnames = true\n\n  public_subnet_tags = {\n    \"kubernetes.io/role/elb\" = \"1\"\n  }\n\n  private_subnet_tags = {\n    \"kubernetes.io/role/internal-elb\" = \"1\"\n  }\n\n  tags = var.tags\n}\n\ndata \"aws_route_tables\" \"this\" {\n  filter {\n    name   = \"vpc-id\"\n    values = [module.vpc.vpc_id]\n  }\n\n  depends_on = [module.vpc]\n}\n\nresource \"aws_vpc_endpoint\" \"s3\" {\n  vpc_id            = module.vpc.vpc_id\n  service_name      = \"com.amazonaws.${data.aws_region.current.name}.s3\"\n  vpc_endpoint_type = \"Gateway\"\n  route_table_ids   = data.aws_route_tables.this.ids\n  tags              = var.tags\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/vpc/outputs.tf",
    "content": "output \"vpc_id\" {\n  value = module.vpc.vpc_id\n}\n\noutput \"private_subnets\" {\n  value = module.vpc.private_subnets\n}\n\noutput \"public_subnets\" {\n  value = module.vpc.public_subnets\n}\n\noutput \"vpc_cidr_block\" {\n  value = module.vpc.vpc_cidr_block\n}\n\noutput \"s3_vpc_endpoint_id\" {\n  description = \"ID of the S3 gateway VPC endpoint created for this VPC\"\n  value       = aws_vpc_endpoint.s3.id\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/vpc/variables.tf",
    "content": "variable \"vpc_name\" {\n  type        = string\n  description = \"The name of the VPC\"\n  default     = \"onyx-vpc\"\n}\n\nvariable \"cidr_block\" {\n  type        = string\n  description = \"The CIDR block for the VPC\"\n  default     = \"10.0.0.0/16\"\n}\n\nvariable \"private_subnets\" {\n  type        = list(string)\n  description = \"The private subnets for the VPC\"\n  default     = [\"10.0.0.0/21\", \"10.0.8.0/21\", \"10.0.16.0/21\", \"10.0.24.0/21\", \"10.0.32.0/21\"]\n}\n\nvariable \"public_subnets\" {\n  type        = list(string)\n  description = \"The public subnets for the VPC\"\n  default     = [\"10.0.40.0/21\", \"10.0.48.0/21\", \"10.0.56.0/21\"]\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Tags to apply to all VPC-related resources\"\n  default     = {}\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/waf/main.tf",
    "content": "locals {\n  name                  = var.name\n  tags                  = var.tags\n  ip_allowlist_enabled  = length(var.allowed_ip_cidrs) > 0\n  managed_rule_priority = local.ip_allowlist_enabled ? 1 : 0\n}\n\nresource \"aws_wafv2_ip_set\" \"allowed_ips\" {\n  count = local.ip_allowlist_enabled ? 1 : 0\n\n  name               = \"${local.name}-allowed-ips\"\n  description        = \"IP allowlist for ${local.name}\"\n  scope              = \"REGIONAL\"\n  ip_address_version = \"IPV4\"\n  addresses          = var.allowed_ip_cidrs\n\n  tags = local.tags\n}\n\n# AWS WAFv2 Web ACL\nresource \"aws_wafv2_web_acl\" \"main\" {\n  name        = \"${local.name}-web-acl\"\n  description = \"WAF Web ACL for ${local.name}\"\n  scope       = \"REGIONAL\"\n\n  default_action {\n    allow {}\n  }\n\n  dynamic \"rule\" {\n    for_each = local.ip_allowlist_enabled ? [1] : []\n    content {\n      name     = \"BlockRequestsOutsideAllowedIPs\"\n      priority = 1\n\n      action {\n        block {}\n      }\n\n      statement {\n        not_statement {\n          statement {\n            ip_set_reference_statement {\n              arn = aws_wafv2_ip_set.allowed_ips[0].arn\n            }\n          }\n        }\n      }\n\n      visibility_config {\n        cloudwatch_metrics_enabled = true\n        metric_name                = \"BlockRequestsOutsideAllowedIPsMetric\"\n        sampled_requests_enabled   = true\n      }\n    }\n  }\n\n  # AWS Managed Rules - Core Rule Set\n  rule {\n    name     = \"AWSManagedRulesCommonRuleSet\"\n    priority = 1 + local.managed_rule_priority\n\n    override_action {\n      none {}\n    }\n\n    statement {\n      managed_rule_group_statement {\n        name        = \"AWSManagedRulesCommonRuleSet\"\n        vendor_name = \"AWS\"\n\n        dynamic \"rule_action_override\" {\n          for_each = var.common_rule_set_count_rules\n          content {\n            name = rule_action_override.value\n            action_to_use {\n              count {}\n            }\n          }\n        }\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"AWSManagedRulesCommonRuleSetMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  # AWS Managed Rules - Known Bad Inputs\n  rule {\n    name     = \"AWSManagedRulesKnownBadInputsRuleSet\"\n    priority = 2 + local.managed_rule_priority\n\n    override_action {\n      none {}\n    }\n\n    statement {\n      managed_rule_group_statement {\n        name        = \"AWSManagedRulesKnownBadInputsRuleSet\"\n        vendor_name = \"AWS\"\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"AWSManagedRulesKnownBadInputsRuleSetMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  # Rate Limiting Rule\n  rule {\n    name     = \"RateLimitRule\"\n    priority = 3 + local.managed_rule_priority\n\n    action {\n      block {}\n    }\n\n    statement {\n      rate_based_statement {\n        limit              = var.rate_limit_requests_per_5_minutes\n        aggregate_key_type = \"IP\"\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"RateLimitRuleMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  # Geo Restriction (if enabled)\n  dynamic \"rule\" {\n    for_each = length(var.geo_restriction_countries) > 0 ? [1] : []\n    content {\n      name     = \"GeoRestrictionRule\"\n      priority = 4 + local.managed_rule_priority\n\n      action {\n        block {}\n      }\n\n      statement {\n        geo_match_statement {\n          country_codes = var.geo_restriction_countries\n        }\n      }\n\n      visibility_config {\n        cloudwatch_metrics_enabled = true\n        metric_name                = \"GeoRestrictionRuleMetric\"\n        sampled_requests_enabled   = true\n      }\n    }\n  }\n\n  # IP Rate Limiting\n  rule {\n    name     = \"APIRateLimitRule\"\n    priority = 5 + local.managed_rule_priority\n\n    action {\n      block {}\n    }\n\n    statement {\n      rate_based_statement {\n        limit              = var.api_rate_limit_requests_per_5_minutes\n        aggregate_key_type = \"IP\"\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"APIRateLimitRuleMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  # SQL Injection Protection\n  rule {\n    name     = \"AWSManagedRulesSQLiRuleSet\"\n    priority = 6 + local.managed_rule_priority\n\n    override_action {\n      none {}\n    }\n\n    statement {\n      managed_rule_group_statement {\n        name        = \"AWSManagedRulesSQLiRuleSet\"\n        vendor_name = \"AWS\"\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"AWSManagedRulesSQLiRuleSetMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  # Anonymous IP Protection\n  rule {\n    name     = \"AWSManagedRulesAnonymousIpList\"\n    priority = 7 + local.managed_rule_priority\n\n    override_action {\n      none {}\n    }\n\n    statement {\n      managed_rule_group_statement {\n        name        = \"AWSManagedRulesAnonymousIpList\"\n        vendor_name = \"AWS\"\n      }\n    }\n\n    visibility_config {\n      cloudwatch_metrics_enabled = true\n      metric_name                = \"AWSManagedRulesAnonymousIpListMetric\"\n      sampled_requests_enabled   = true\n    }\n  }\n\n  visibility_config {\n    cloudwatch_metrics_enabled = true\n    metric_name                = \"${local.name}WebACLMetric\"\n    sampled_requests_enabled   = true\n  }\n\n  tags = local.tags\n}\n\n# WAF Logging Configuration (simplified - just CloudWatch)\nresource \"aws_cloudwatch_log_group\" \"waf_logs\" {\n  count             = var.enable_logging ? 1 : 0\n  name              = \"/aws/waf/${local.name}\"\n  retention_in_days = var.log_retention_days\n\n  tags = local.tags\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/waf/outputs.tf",
    "content": "output \"web_acl_arn\" {\n  description = \"ARN of the WAF Web ACL\"\n  value       = aws_wafv2_web_acl.main.arn\n}\n\noutput \"web_acl_id\" {\n  description = \"ID of the WAF Web ACL\"\n  value       = aws_wafv2_web_acl.main.id\n}\n\noutput \"web_acl_name\" {\n  description = \"Name of the WAF Web ACL\"\n  value       = aws_wafv2_web_acl.main.name\n}\n\noutput \"log_group_name\" {\n  description = \"Name of the CloudWatch log group for WAF logs\"\n  value       = var.enable_logging ? aws_cloudwatch_log_group.waf_logs[0].name : null\n}\n"
  },
  {
    "path": "deployment/terraform/modules/aws/waf/variables.tf",
    "content": "variable \"name\" {\n  type        = string\n  description = \"Name prefix for WAF resources\"\n}\n\nvariable \"tags\" {\n  type        = map(string)\n  description = \"Tags to apply to all WAF resources\"\n  default     = {}\n}\n\nvariable \"allowed_ip_cidrs\" {\n  type        = list(string)\n  description = \"Optional IPv4 CIDR ranges allowed to reach the application. Leave empty to disable IP allowlisting.\"\n  default     = []\n}\n\nvariable \"common_rule_set_count_rules\" {\n  type        = list(string)\n  description = \"Subrules within AWSManagedRulesCommonRuleSet to override to COUNT instead of BLOCK.\"\n  default     = []\n}\n\nvariable \"rate_limit_requests_per_5_minutes\" {\n  type        = number\n  description = \"Rate limit for requests per 5 minutes per IP address\"\n  default     = 2000\n}\n\nvariable \"api_rate_limit_requests_per_5_minutes\" {\n  type        = number\n  description = \"Rate limit for API requests per 5 minutes per IP address\"\n  default     = 1000\n}\n\nvariable \"geo_restriction_countries\" {\n  type        = list(string)\n  description = \"List of country codes to block. Leave empty to disable geo restrictions\"\n  default     = []\n}\n\nvariable \"enable_logging\" {\n  type        = bool\n  description = \"Enable WAF logging to S3\"\n  default     = true\n}\n\nvariable \"log_retention_days\" {\n  type        = number\n  description = \"Number of days to retain WAF logs\"\n  default     = 90\n}\n"
  },
  {
    "path": "desktop/.gitignore",
    "content": "# Dependencies\nnode_modules/\n\n# Build outputs\ndist/\nsrc-tauri/target/\n\n# IDE\n.vscode/\n.idea/\n*.swp\n*.swo\n\n# OS\n.DS_Store\nThumbs.db\n\n# Logs\n*.log\nnpm-debug.log*\n\n# Local env files\n.env\n.env.local\n\n# Generated files\nsrc-tauri/gen/schemas/acl-manifests.json\n"
  },
  {
    "path": "desktop/README.md",
    "content": "# Onyx Desktop\n\nA lightweight macOS desktop application for [Onyx Cloud](https://cloud.onyx.app).\n\nBuilt with [Tauri](https://tauri.app) for minimal bundle size (~10MB vs Electron's 150MB+).\n\n## Features\n\n- 🪶 **Lightweight** - Native macOS WebKit, no bundled Chromium\n- ⌨️ **Keyboard Shortcuts** - Quick navigation and actions\n- 🪟 **Native Feel** - macOS-style title bar with traffic lights\n- 💾 **Window State** - Remembers size/position between sessions\n- 🔗 **Multi-window** - Open multiple Onyx windows\n\n## Keyboard Shortcuts\n\n| Shortcut | Action           |\n| -------- | ---------------- |\n| `⌘ N`    | New Chat         |\n| `⌘ ⇧ N`  | New Window       |\n| `⌘ R`    | Reload           |\n| `⌘ [`    | Go Back          |\n| `⌘ ]`    | Go Forward       |\n| `⌘ ,`    | Open Config File |\n| `⌘ W`    | Close Window     |\n| `⌘ Q`    | Quit             |\n\n## Prerequisites\n\n1. **Rust** (latest stable)\n\n   ```bash\n   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh\n   source $HOME/.cargo/env\n   ```\n\n2. **Node.js** (18+)\n\n   ```bash\n   # Using homebrew\n   brew install node\n\n   # Or using nvm\n   nvm install 18\n   ```\n\n3. **Xcode Command Line Tools**\n   ```bash\n   xcode-select --install\n   ```\n\n## Development\n\n```bash\n# Install dependencies\nnpm install\n\n# Run in development mode\nnpm run dev\n\n# Run in debug mode\nnpm run debug\n```\n\n## Building\n\n### Build for current architecture\n\n```bash\nnpm run build\n```\n\n### Build Universal Binary (Intel + Apple Silicon)\n\n```bash\n# First, add the targets\nrustup target add x86_64-apple-darwin\nrustup target add aarch64-apple-darwin\n\n# Build universal binary\nnpm run build:dmg\n```\n\nThe built `.dmg` will be in `src-tauri/target/release/bundle/dmg/`.\n\n## Project Structure\n\n```\nonyx-desktop/\n├── package.json          # Node dependencies & scripts\n├── src/\n│   └── index.html        # Fallback/loading page\n└── src-tauri/\n    ├── Cargo.toml        # Rust dependencies\n    ├── tauri.conf.json   # Tauri configuration\n    ├── build.rs          # Build script\n    ├── icons/            # App icons\n    └── src/\n        └── main.rs       # Rust backend code\n```\n\n## Icons\n\nBefore building, add your app icons to `src-tauri/icons/`:\n\n- `32x32.png`\n- `128x128.png`\n- `128x128@2x.png`\n- `icon.icns` (macOS)\n- `icon.ico` (Windows, optional)\n\nYou can generate these from a 1024x1024 source image using:\n\n```bash\n# Using tauri's icon generator\nnpm run tauri icon path/to/your-icon.png\n```\n\n## Customization\n\n### Self-Hosted / Custom Server URL\n\nThe app defaults to `https://cloud.onyx.app` but supports any Onyx instance.\n\n**Config file location:**\n\n- macOS: `~/Library/Application Support/app.onyx.desktop/config.json`\n- Linux: `~/.config/app.onyx.desktop/config.json`\n- Windows: `%APPDATA%/app.onyx.desktop/config.json`\n\n**To use a self-hosted instance:**\n\n1. Launch the app once (creates default config)\n2. Press `⌘ ,` to open the config file, or edit it manually\n3. Change the `server_url`:\n\n```json\n{\n  \"server_url\": \"https://your-onyx-instance.company.com\",\n  \"window_title\": \"Onyx\"\n}\n```\n\n4. Restart the app\n\n**Quick edit via terminal:**\n\n```bash\n# macOS\nopen -t ~/Library/Application\\ Support/app.onyx.desktop/config.json\n\n# Or use any editor\ncode ~/Library/Application\\ Support/app.onyx.desktop/config.json\n```\n\n### Change the default URL in build\n\nEdit `src-tauri/tauri.conf.json`:\n\n```json\n{\n  \"app\": {\n    \"windows\": [\n      {\n        \"url\": \"https://your-onyx-instance.com\"\n      }\n    ]\n  }\n}\n```\n\n### Add more shortcuts\n\nEdit `src-tauri/src/main.rs` in the `setup_shortcuts` function.\n\n### Window appearance\n\nModify the window configuration in `src-tauri/tauri.conf.json`:\n\n- `titleBarStyle`: `\"Overlay\"` (macOS native) or `\"Visible\"`\n- `decorations`: Window chrome\n- `transparent`: For custom backgrounds\n\n## Troubleshooting\n\n### \"Unable to resolve host\"\n\nMake sure you have an internet connection. The app loads content from `cloud.onyx.app`.\n\n### Build fails on M1/M2 Mac\n\n```bash\n# Ensure you have the right target\nrustup target add aarch64-apple-darwin\n```\n\n### Code signing for distribution\n\nFor distributing outside the App Store, you'll need to:\n\n1. Get an Apple Developer certificate\n2. Sign the app: `codesign --deep --force --sign \"Developer ID\" target/release/bundle/macos/Onyx.app`\n3. Notarize with Apple\n\n## License\n\nMIT\n"
  },
  {
    "path": "desktop/package.json",
    "content": "{\n  \"name\": \"onyx-desktop\",\n  \"version\": \"0.0.0-dev\",\n  \"description\": \"Lightweight desktop app for Onyx Cloud\",\n  \"scripts\": {\n    \"dev\": \"tauri dev\",\n    \"debug\": \"tauri dev -- -- --debug\",\n    \"build\": \"tauri build\",\n    \"build:dmg\": \"tauri build --target universal-apple-darwin\",\n    \"build:linux\": \"tauri build --bundles deb,rpm\"\n  },\n  \"dependencies\": {\n    \"@tauri-apps/api\": \"^2.10.1\"\n  },\n  \"devDependencies\": {\n    \"@tauri-apps/cli\": \"^2.10.1\"\n  }\n}\n"
  },
  {
    "path": "desktop/scripts/generate-icons.sh",
    "content": "#!/bin/bash\n# Icon generation script for Onyx Desktop\n# Requires: ImageMagick (brew install imagemagick)\n\nset -e\n\nICON_DIR=\"src-tauri/icons\"\nSOURCE_SVG=\"$ICON_DIR/icon.svg\"\n\n# Check if ImageMagick is installed\nif ! command -v magick &> /dev/null; then\n    echo \"ImageMagick not found. Install with: brew install imagemagick\"\n    exit 1\nfi\n\necho \"Generating icons from $SOURCE_SVG...\"\n\n# Generate PNG icons\nmagick -background none \"$SOURCE_SVG\" -resize 32x32 \"$ICON_DIR/32x32.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 128x128 \"$ICON_DIR/128x128.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 256x256 \"$ICON_DIR/128x128@2x.png\"\n\n# Generate macOS .icns\n# Create iconset directory\nICONSET=\"$ICON_DIR/icon.iconset\"\nmkdir -p \"$ICONSET\"\n\nmagick -background none \"$SOURCE_SVG\" -resize 16x16 \"$ICONSET/icon_16x16.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 32x32 \"$ICONSET/icon_16x16@2x.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 32x32 \"$ICONSET/icon_32x32.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 64x64 \"$ICONSET/icon_32x32@2x.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 128x128 \"$ICONSET/icon_128x128.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 256x256 \"$ICONSET/icon_128x128@2x.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 256x256 \"$ICONSET/icon_256x256.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 512x512 \"$ICONSET/icon_256x256@2x.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 512x512 \"$ICONSET/icon_512x512.png\"\nmagick -background none \"$SOURCE_SVG\" -resize 1024x1024 \"$ICONSET/icon_512x512@2x.png\"\n\n# Convert to icns (macOS only)\nif command -v iconutil &> /dev/null; then\n    iconutil -c icns \"$ICONSET\" -o \"$ICON_DIR/icon.icns\"\n    rm -rf \"$ICONSET\"\n    echo \"Generated icon.icns\"\nelse\n    echo \"iconutil not found (not on macOS?), skipping .icns generation\"\nfi\n\n# Generate Windows .ico\nmagick \"$ICON_DIR/32x32.png\" \"$ICON_DIR/128x128.png\" \"$ICON_DIR/icon.ico\"\n\necho \"Done! Icons generated in $ICON_DIR/\"\nls -la \"$ICON_DIR/\"\n"
  },
  {
    "path": "desktop/src/index.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <title>Onyx</title>\n    <link\n      href=\"https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@400;500;600;700&display=swap\"\n      rel=\"stylesheet\"\n    />\n    <style>\n      :root {\n        --background-900: #f5f5f5;\n        --background-800: #ffffff;\n        --text-light-05: rgba(0, 0, 0, 0.95);\n        --text-light-03: rgba(0, 0, 0, 0.6);\n        --white-10: rgba(0, 0, 0, 0.1);\n        --white-15: rgba(0, 0, 0, 0.15);\n        --white-20: rgba(0, 0, 0, 0.2);\n        --white-30: rgba(0, 0, 0, 0.3);\n        --font-hanken-grotesk: \"Hanken Grotesk\", -apple-system,\n          BlinkMacSystemFont, \"Segoe UI\", Roboto, sans-serif;\n      }\n\n      .dark {\n        --background-900: #1a1a1a;\n        --background-800: #262626;\n        --text-light-05: rgba(255, 255, 255, 0.95);\n        --text-light-03: rgba(255, 255, 255, 0.6);\n        --white-10: rgba(255, 255, 255, 0.08);\n        --white-15: rgba(255, 255, 255, 0.12);\n        --white-20: rgba(255, 255, 255, 0.15);\n        --white-30: rgba(255, 255, 255, 0.25);\n      }\n\n      * {\n        box-sizing: border-box;\n        margin: 0;\n        padding: 0;\n      }\n\n      body {\n        font-family: var(--font-hanken-grotesk);\n        background: linear-gradient(\n          135deg,\n          var(--background-900) 0%,\n          var(--background-800) 100%\n        );\n        min-height: 100vh;\n        color: var(--text-light-05);\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        padding: 20px;\n        -webkit-user-select: none;\n        user-select: none;\n        transition:\n          background 0.3s ease,\n          color 0.3s ease;\n      }\n\n      .titlebar {\n        position: fixed;\n        top: 0;\n        left: 0;\n        right: 0;\n        height: 28px;\n        -webkit-app-region: drag;\n        z-index: 10000;\n      }\n\n      .settings-container {\n        max-width: 500px;\n        width: 100%;\n        opacity: 0;\n        transform: translateY(8px);\n        pointer-events: none;\n        transition:\n          opacity 0.18s ease,\n          transform 0.18s ease;\n      }\n\n      body.show-settings .settings-container {\n        opacity: 1;\n        transform: translateY(0);\n        pointer-events: auto;\n      }\n\n      .settings-panel {\n        background: var(--background-800);\n        backdrop-filter: blur(24px);\n        border-radius: 16px;\n        border: 1px solid var(--white-10);\n        overflow: hidden;\n        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);\n        transition:\n          background 0.3s ease,\n          border 0.3s ease;\n      }\n\n      .dark .settings-panel {\n        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);\n      }\n\n      .settings-header {\n        padding: 24px;\n        border-bottom: 1px solid var(--white-10);\n        display: flex;\n        align-items: center;\n        gap: 12px;\n      }\n\n      .settings-icon {\n        width: 40px;\n        height: 40px;\n        border-radius: 12px;\n        background: var(--background-900);\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        overflow: hidden;\n        transition: background 0.3s ease;\n      }\n\n      .settings-icon svg {\n        width: 24px;\n        height: 24px;\n        color: var(--text-light-05);\n        transition: color 0.3s ease;\n      }\n\n      .settings-title {\n        font-size: 20px;\n        font-weight: 600;\n        color: var(--text-light-05);\n      }\n\n      .settings-content {\n        padding: 24px;\n      }\n\n      .settings-section {\n        margin-bottom: 32px;\n      }\n\n      .settings-section:last-child {\n        margin-bottom: 0;\n      }\n\n      .section-title {\n        font-size: 11px;\n        font-weight: 600;\n        text-transform: uppercase;\n        letter-spacing: 0.05em;\n        color: var(--text-light-03);\n        margin-bottom: 12px;\n      }\n\n      .settings-group {\n        background: var(--background-900);\n        border-radius: 16px;\n        padding: 4px;\n        transition: background 0.3s ease;\n      }\n\n      .setting-row {\n        display: flex;\n        justify-content: space-between;\n        align-items: center;\n        padding: 12px;\n      }\n\n      .setting-row-content {\n        display: flex;\n        flex-direction: column;\n        gap: 4px;\n        flex: 1;\n      }\n\n      .setting-label {\n        font-size: 14px;\n        font-weight: 400;\n        color: var(--text-light-05);\n      }\n\n      .setting-description {\n        font-size: 12px;\n        color: var(--text-light-03);\n      }\n\n      .setting-divider {\n        height: 1px;\n        background: var(--white-10);\n        margin: 0 4px;\n      }\n\n      .input-field {\n        width: 100%;\n        padding: 10px 12px;\n        border: 1px solid var(--white-10);\n        border-radius: 8px;\n        font-size: 14px;\n        background: var(--background-800);\n        color: var(--text-light-05);\n        font-family: var(--font-hanken-grotesk);\n        transition: all 0.2s;\n        -webkit-app-region: no-drag;\n      }\n\n      .input-field:focus {\n        outline: none;\n        border-color: var(--white-30);\n        background: var(--background-900);\n        box-shadow: 0 0 0 2px var(--white-10);\n      }\n\n      .input-field::placeholder {\n        color: var(--text-light-03);\n      }\n\n      .input-field.error {\n        border-color: #ef4444;\n      }\n\n      .error-message {\n        color: #ef4444;\n        font-size: 12px;\n        margin-top: 4px;\n        padding-left: 12px;\n        display: none;\n      }\n\n      .error-message.visible {\n        display: block;\n      }\n\n      .toggle-switch {\n        position: relative;\n        display: inline-block;\n        width: 44px;\n        height: 24px;\n        flex-shrink: 0;\n      }\n\n      .toggle-switch input {\n        opacity: 0;\n        width: 0;\n        height: 0;\n      }\n\n      .toggle-slider {\n        position: absolute;\n        cursor: pointer;\n        top: 0;\n        left: 0;\n        right: 0;\n        bottom: 0;\n        background-color: var(--white-15);\n        transition: 0.3s;\n        border-radius: 24px;\n      }\n\n      .toggle-slider:before {\n        position: absolute;\n        content: \"\";\n        height: 18px;\n        width: 18px;\n        left: 3px;\n        bottom: 3px;\n        background-color: var(--background-800);\n        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);\n        transition: 0.3s;\n        border-radius: 50%;\n      }\n\n      .dark .toggle-slider:before {\n        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.5);\n      }\n\n      input:checked + .toggle-slider {\n        background-color: var(--white-30);\n      }\n\n      input:checked + .toggle-slider:before {\n        transform: translateX(20px);\n      }\n\n      .button {\n        padding: 12px 24px;\n        border-radius: 8px;\n        border: none;\n        cursor: pointer;\n        font-size: 14px;\n        font-weight: 600;\n        transition: all 0.2s;\n        font-family: var(--font-hanken-grotesk);\n        width: 100%;\n        margin-top: 24px;\n        -webkit-app-region: no-drag;\n      }\n\n      .button.primary {\n        background: #286df8;\n        color: white;\n      }\n\n      .button.primary:hover {\n        background: #1e5cd6;\n        box-shadow: 0 4px 12px rgba(40, 109, 248, 0.3);\n      }\n\n      .button.primary:disabled {\n        opacity: 0.5;\n        cursor: not-allowed;\n        box-shadow: none;\n      }\n\n      kbd {\n        background: var(--white-10);\n        border: 1px solid var(--white-15);\n        border-radius: 4px;\n        padding: 2px 6px;\n        font-family: monospace;\n        font-weight: 500;\n        color: var(--text-light-05);\n        font-size: 11px;\n        transition: all 0.3s ease;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"titlebar\"></div>\n\n    <div class=\"settings-container\">\n      <div class=\"settings-panel\">\n        <div class=\"settings-header\">\n          <div class=\"settings-icon\">\n            <svg\n              viewBox=\"0 0 56 56\"\n              xmlns=\"http://www.w3.org/2000/svg\"\n              fill=\"currentColor\"\n            >\n              <path\n                fill-rule=\"evenodd\"\n                clip-rule=\"evenodd\"\n                d=\"M28 0 10.869 7.77 28 15.539l17.131-7.77L28 0Zm0 40.461-17.131 7.77L28 56l17.131-7.77L28 40.461Zm20.231-29.592L56 28.001l-7.769 17.131L40.462 28l7.769-17.131ZM15.538 28 7.77 10.869 0 28l7.769 17.131L15.538 28Z\"\n              />\n            </svg>\n          </div>\n          <h1 class=\"settings-title\">Settings</h1>\n        </div>\n\n        <div class=\"settings-content\">\n          <section class=\"settings-section\">\n            <div class=\"section-title\">GENERAL</div>\n            <div class=\"settings-group\">\n              <div class=\"setting-row\">\n                <div class=\"setting-row-content\">\n                  <label class=\"setting-label\" for=\"onyxDomain\"\n                    >Root Domain</label\n                  >\n                  <div class=\"setting-description\">\n                    The root URL for your Onyx instance\n                  </div>\n                </div>\n              </div>\n              <div class=\"setting-divider\"></div>\n              <div class=\"setting-row\" style=\"padding: 12px\">\n                <input\n                  type=\"text\"\n                  id=\"onyxDomain\"\n                  class=\"input-field\"\n                  placeholder=\"https://cloud.onyx.app\"\n                  autocomplete=\"off\"\n                  autocorrect=\"off\"\n                  autocapitalize=\"off\"\n                  spellcheck=\"false\"\n                />\n              </div>\n              <div class=\"error-message\" id=\"errorMessage\">\n                Please enter a valid URL starting with http:// or https://\n              </div>\n            </div>\n          </section>\n\n          <button class=\"button primary\" id=\"saveBtn\">Save & Connect</button>\n        </div>\n      </div>\n    </div>\n\n    <script>\n      // Import Tauri API\n      const { invoke } = window.__TAURI__.core;\n\n      // Configuration\n      const DEFAULT_DOMAIN = \"https://cloud.onyx.app\";\n      let currentServerUrl = \"\";\n\n      // DOM elements\n      const domainInput = document.getElementById(\"onyxDomain\");\n      const errorMessage = document.getElementById(\"errorMessage\");\n      const saveBtn = document.getElementById(\"saveBtn\");\n\n      // Theme detection based on system preferences\n      function applySystemTheme() {\n        const darkModeQuery = window.matchMedia(\"(prefers-color-scheme: dark)\");\n\n        function updateTheme(e) {\n          if (e.matches) {\n            document.documentElement.classList.add(\"dark\");\n            document.body.classList.add(\"dark\");\n          } else {\n            document.documentElement.classList.remove(\"dark\");\n            document.body.classList.remove(\"dark\");\n          }\n        }\n\n        // Apply initial theme\n        updateTheme(darkModeQuery);\n\n        // Listen for changes\n        darkModeQuery.addEventListener(\"change\", updateTheme);\n      }\n\n      function showSettings() {\n        document.body.classList.add(\"show-settings\");\n      }\n\n      // Apply system theme immediately\n      applySystemTheme();\n\n      // Initialize the app\n      async function init() {\n        try {\n          const bootstrap = await invoke(\"get_bootstrap_state\");\n          currentServerUrl = bootstrap.server_url;\n\n          // Set the input value\n          domainInput.value = currentServerUrl || DEFAULT_DOMAIN;\n\n          // Check if user came here explicitly (via Settings menu/shortcut)\n          const urlParams = new URLSearchParams(window.location.search);\n          const isExplicitSettings =\n            window.location.hash === \"#settings\" ||\n            urlParams.get(\"settings\") === \"true\";\n\n          // If user explicitly opened settings, show modal\n          if (isExplicitSettings) {\n            // Modal is already visible, user can edit and save\n            showSettings();\n            return;\n          }\n\n          // Otherwise, check if this is first launch\n          // First launch = config doesn't exist\n          if (!bootstrap.config_exists || !currentServerUrl) {\n            // First launch - show modal, require user to configure\n            showSettings();\n            return;\n          }\n\n          // Not first launch and not explicit settings\n          // Auto-redirect to configured domain\n          window.location.href = currentServerUrl;\n        } catch (error) {\n          // On error, default to cloud\n          domainInput.value = DEFAULT_DOMAIN;\n          showSettings();\n        }\n      }\n\n      // Validate URL\n      function validateUrl(url) {\n        const trimmedUrl = url.trim();\n        if (!trimmedUrl) {\n          return { valid: false, error: \"URL cannot be empty\" };\n        }\n        if (\n          !trimmedUrl.startsWith(\"http://\") &&\n          !trimmedUrl.startsWith(\"https://\")\n        ) {\n          return {\n            valid: false,\n            error: \"URL must start with http:// or https://\",\n          };\n        }\n        try {\n          new URL(trimmedUrl);\n          return { valid: true, url: trimmedUrl };\n        } catch {\n          return { valid: false, error: \"Please enter a valid URL\" };\n        }\n      }\n\n      // Show error\n      function showError(message) {\n        domainInput.classList.add(\"error\");\n        errorMessage.textContent = message;\n        errorMessage.classList.add(\"visible\");\n      }\n\n      // Clear error\n      function clearError() {\n        domainInput.classList.remove(\"error\");\n        errorMessage.classList.remove(\"visible\");\n      }\n\n      // Save configuration\n      async function saveConfiguration() {\n        clearError();\n\n        const validation = validateUrl(domainInput.value);\n        if (!validation.valid) {\n          showError(validation.error);\n          return;\n        }\n\n        try {\n          saveBtn.disabled = true;\n          saveBtn.textContent = \"Saving...\";\n\n          // Call Tauri command to save the URL\n          await invoke(\"set_server_url\", { url: validation.url });\n\n          // Success - redirect to the new URL (login page)\n          window.location.href = validation.url;\n        } catch (error) {\n          showError(error || \"Failed to save configuration\");\n          saveBtn.disabled = false;\n          saveBtn.textContent = \"Save & Connect\";\n        }\n      }\n\n      // Event listeners\n      domainInput.addEventListener(\"input\", clearError);\n      domainInput.addEventListener(\"keypress\", (e) => {\n        if (e.key === \"Enter\") {\n          saveConfiguration();\n        }\n      });\n      saveBtn.addEventListener(\"click\", saveConfiguration);\n\n      // Initialize when DOM is ready\n      if (document.readyState === \"loading\") {\n        document.addEventListener(\"DOMContentLoaded\", init);\n      } else {\n        init();\n      }\n    </script>\n  </body>\n</html>\n"
  },
  {
    "path": "desktop/src/titlebar.js",
    "content": "// Custom title bar for Onyx Desktop\n// This script injects a draggable title bar that matches Onyx design system\n\n(function () {\n  const TITLEBAR_ID = \"onyx-desktop-titlebar\";\n  const TITLEBAR_HEIGHT = 36;\n  const STYLE_ID = \"onyx-desktop-titlebar-style\";\n  const VIEWPORT_VAR = \"--onyx-desktop-viewport-height\";\n\n  // Wait for DOM to be ready\n  if (document.readyState === \"loading\") {\n    document.addEventListener(\"DOMContentLoaded\", init);\n  } else {\n    init();\n  }\n\n  function getInvoke() {\n    if (window.__TAURI__?.core?.invoke) return window.__TAURI__.core.invoke;\n    if (window.__TAURI__?.invoke) return window.__TAURI__.invoke;\n    if (window.__TAURI_INTERNALS__?.invoke)\n      return window.__TAURI_INTERNALS__.invoke;\n    return null;\n  }\n\n  async function startWindowDrag() {\n    const invoke = getInvoke();\n\n    if (invoke) {\n      try {\n        await invoke(\"start_drag_window\");\n        return;\n      } catch (err) {}\n    }\n\n    const appWindow =\n      window.__TAURI__?.window?.getCurrent?.() ??\n      window.__TAURI__?.window?.appWindow;\n\n    if (appWindow?.startDragging) {\n      try {\n        await appWindow.startDragging();\n      } catch (err) {}\n    }\n  }\n\n  function injectStyles() {\n    if (document.getElementById(STYLE_ID)) return;\n    const style = document.createElement(\"style\");\n    style.id = STYLE_ID;\n    style.textContent = `\n      :root {\n        --onyx-desktop-titlebar-height: ${TITLEBAR_HEIGHT}px;\n        --onyx-desktop-viewport-height: 100dvh;\n        --onyx-desktop-safe-height: calc(var(--onyx-desktop-viewport-height) - var(--onyx-desktop-titlebar-height));\n      }\n\n      @supports not (height: 100dvh) {\n        :root {\n          --onyx-desktop-viewport-height: 100vh;\n        }\n      }\n\n      html,\n      body {\n        height: var(--onyx-desktop-viewport-height);\n        min-height: var(--onyx-desktop-viewport-height);\n        margin: 0;\n        padding: 0;\n        overflow: hidden;\n      }\n\n      body {\n        padding-top: var(--onyx-desktop-titlebar-height) !important;\n        box-sizing: border-box;\n      }\n\n      body > div#__next,\n      body > div#root,\n      body > main {\n        height: var(--onyx-desktop-safe-height);\n        min-height: var(--onyx-desktop-safe-height);\n        overflow: auto;\n      }\n\n      /* Override common Tailwind viewport helpers so content fits under the titlebar */\n      .h-screen {\n        height: var(--onyx-desktop-safe-height) !important;\n      }\n\n      .min-h-screen {\n        min-height: var(--onyx-desktop-safe-height) !important;\n      }\n\n      .max-h-screen {\n        max-height: var(--onyx-desktop-safe-height) !important;\n      }\n\n      #${TITLEBAR_ID} {\n        cursor: default !important;\n        -webkit-user-select: none !important;\n        user-select: none !important;\n        -webkit-app-region: drag;\n        background: rgba(255, 255, 255, 0.85);\n        height: var(--onyx-desktop-titlebar-height);\n      }\n\n      /* Dark mode support */\n      .dark #${TITLEBAR_ID} {\n        background: linear-gradient(180deg, rgba(18, 18, 18, 0.82) 0%, rgba(18, 18, 18, 0.72) 100%);\n        border-bottom-color: rgba(255, 255, 255, 0.08);\n      }\n    `;\n    document.head.appendChild(style);\n  }\n\n  function updateTitleBarTheme(isDark) {\n    const titleBar = document.getElementById(TITLEBAR_ID);\n    if (!titleBar) return;\n\n    if (isDark) {\n      titleBar.style.background =\n        \"linear-gradient(180deg, rgba(18, 18, 18, 0.82) 0%, rgba(18, 18, 18, 0.72) 100%)\";\n      titleBar.style.borderBottom = \"1px solid rgba(255, 255, 255, 0.08)\";\n      titleBar.style.boxShadow = \"0 8px 28px rgba(0, 0, 0, 0.2)\";\n    } else {\n      titleBar.style.background =\n        \"linear-gradient(180deg, rgba(255, 255, 255, 0.94) 0%, rgba(255, 255, 255, 0.78) 100%)\";\n      titleBar.style.borderBottom = \"1px solid rgba(0, 0, 0, 0.06)\";\n      titleBar.style.boxShadow = \"0 8px 28px rgba(0, 0, 0, 0.04)\";\n    }\n  }\n\n  function buildTitleBar() {\n    const titleBar = document.createElement(\"div\");\n    titleBar.id = TITLEBAR_ID;\n    titleBar.setAttribute(\"data-tauri-drag-region\", \"\");\n\n    titleBar.addEventListener(\"mousedown\", (e) => {\n      // Only start drag on left click and not on buttons/inputs\n      const nonDraggable = [\n        \"BUTTON\",\n        \"INPUT\",\n        \"TEXTAREA\",\n        \"A\",\n        \"SELECT\",\n        \"OPTION\",\n      ];\n      if (e.button === 0 && !nonDraggable.includes(e.target.tagName)) {\n        e.preventDefault();\n        startWindowDrag();\n      }\n    });\n\n    // Apply initial styles matching current theme\n    const htmlHasDark = document.documentElement.classList.contains(\"dark\");\n    const bodyHasDark = document.body?.classList.contains(\"dark\");\n    const isDark = htmlHasDark || bodyHasDark;\n\n    // Apply styles matching Onyx design system with translucent glass effect\n    titleBar.style.cssText = `\n      position: fixed;\n      top: 0;\n      left: 0;\n      right: 0;\n      height: ${TITLEBAR_HEIGHT}px;\n      background: linear-gradient(180deg, rgba(255, 255, 255, 0.94) 0%, rgba(255, 255, 255, 0.78) 100%);\n      border-bottom: 1px solid rgba(0, 0, 0, 0.06);\n      box-shadow: 0 8px 28px rgba(0, 0, 0, 0.04);\n      z-index: 999999;\n      display: flex;\n      align-items: center;\n      justify-content: center;\n      cursor: default;\n      user-select: none;\n      -webkit-user-select: none;\n      font-family: 'Hanken Grotesk', -apple-system, BlinkMacSystemFont, sans-serif;\n      backdrop-filter: blur(18px) saturate(180%);\n      -webkit-backdrop-filter: blur(18px) saturate(180%);\n      -webkit-app-region: drag;\n      padding: 0 12px;\n      transition: background 0.3s ease, border-bottom 0.3s ease, box-shadow 0.3s ease;\n    `;\n\n    // Apply correct theme\n    updateTitleBarTheme(isDark);\n\n    return titleBar;\n  }\n\n  function mountTitleBar() {\n    if (!document.body) {\n      return;\n    }\n\n    const existing = document.getElementById(TITLEBAR_ID);\n    if (existing?.parentElement === document.body) {\n      // Update theme on existing titlebar\n      const htmlHasDark = document.documentElement.classList.contains(\"dark\");\n      const bodyHasDark = document.body?.classList.contains(\"dark\");\n      const isDark = htmlHasDark || bodyHasDark;\n      updateTitleBarTheme(isDark);\n      return;\n    }\n\n    if (existing) {\n      existing.remove();\n    }\n\n    const titleBar = buildTitleBar();\n    document.body.insertBefore(titleBar, document.body.firstChild);\n    injectStyles();\n\n    // Ensure theme is applied immediately after mount\n    setTimeout(() => {\n      const htmlHasDark = document.documentElement.classList.contains(\"dark\");\n      const bodyHasDark = document.body?.classList.contains(\"dark\");\n      const isDark = htmlHasDark || bodyHasDark;\n      updateTitleBarTheme(isDark);\n    }, 0);\n  }\n\n  function syncViewportHeight() {\n    const viewportHeight =\n      window.visualViewport?.height ??\n      document.documentElement?.clientHeight ??\n      window.innerHeight;\n\n    if (viewportHeight) {\n      document.documentElement.style.setProperty(\n        VIEWPORT_VAR,\n        `${viewportHeight}px`,\n      );\n    }\n  }\n\n  function observeThemeChanges() {\n    let lastKnownTheme = null;\n\n    function checkAndUpdateTheme() {\n      // Check both html and body for dark class (some apps use body)\n      const htmlHasDark = document.documentElement.classList.contains(\"dark\");\n      const bodyHasDark = document.body?.classList.contains(\"dark\");\n      const isDark = htmlHasDark || bodyHasDark;\n\n      if (lastKnownTheme !== isDark) {\n        lastKnownTheme = isDark;\n        updateTitleBarTheme(isDark);\n      }\n    }\n\n    // Immediate check on setup\n    checkAndUpdateTheme();\n\n    // Watch for theme changes on the HTML element\n    const themeObserver = new MutationObserver(() => {\n      checkAndUpdateTheme();\n    });\n\n    themeObserver.observe(document.documentElement, {\n      attributes: true,\n      attributeFilter: [\"class\"],\n    });\n\n    // Also observe body if it exists\n    if (document.body) {\n      const bodyObserver = new MutationObserver(() => {\n        checkAndUpdateTheme();\n      });\n      bodyObserver.observe(document.body, {\n        attributes: true,\n        attributeFilter: [\"class\"],\n      });\n    }\n\n    // Also check periodically in case classList is manipulated directly\n    // or the theme loads asynchronously after page load\n    const intervalId = setInterval(() => {\n      checkAndUpdateTheme();\n    }, 300);\n\n    // Clean up after 30 seconds once theme should be stable\n    setTimeout(() => {\n      clearInterval(intervalId);\n      // But keep checking every 2 seconds for manual theme changes\n      setInterval(() => {\n        checkAndUpdateTheme();\n      }, 2000);\n    }, 30000);\n  }\n\n  function init() {\n    mountTitleBar();\n    syncViewportHeight();\n    observeThemeChanges();\n\n    window.addEventListener(\"resize\", syncViewportHeight, { passive: true });\n    window.visualViewport?.addEventListener(\"resize\", syncViewportHeight, {\n      passive: true,\n    });\n\n    // Keep it around even if the app DOM re-renders\n    const observer = new MutationObserver(() => {\n      if (!document.getElementById(TITLEBAR_ID)) {\n        mountTitleBar();\n      }\n    });\n\n    observer.observe(document.documentElement, {\n      childList: true,\n      subtree: true,\n    });\n\n    // Fallback keep-alive check\n    setInterval(() => {\n      if (!document.getElementById(TITLEBAR_ID)) {\n        mountTitleBar();\n      }\n    }, 1500);\n  }\n})();\n"
  },
  {
    "path": "desktop/src-tauri/Cargo.toml",
    "content": "[package]\nname = \"onyx\"\nversion = \"0.0.0-dev\"\ndescription = \"Lightweight desktop app for Onyx Cloud\"\nauthors = [\"you\"]\nedition = \"2021\"\n\n[build-dependencies]\ntauri-build = { version = \"2.5\", features = [] }\n\n[dependencies]\ntauri = { version = \"2.10\", features = [\"macos-private-api\", \"tray-icon\", \"image-png\"] }\ntauri-plugin-shell = \"2.3.5\"\ntauri-plugin-window-state = \"2.4.1\"\nserde = { version = \"1.0\", features = [\"derive\"] }\nserde_json = \"1.0\"\nuuid = { version = \"1.0\", features = [\"v4\"] }\ndirectories = \"5.0\"\ntokio = { version = \"1\", features = [\"time\"] }\nwindow-vibrancy = \"0.7.1\"\nurl = \"2.5\"\n\n[features]\ndefault = [\"custom-protocol\"]\ncustom-protocol = [\"tauri/custom-protocol\"]\ndevtools = [\"tauri/devtools\"]\n"
  },
  {
    "path": "desktop/src-tauri/build.rs",
    "content": "fn main() {\n    tauri_build::build()\n}\n"
  },
  {
    "path": "desktop/src-tauri/gen/schemas/acl-manifests.json",
    "content": "{\"core\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default core plugins set.\",\"permissions\":[\"core:path:default\",\"core:event:default\",\"core:window:default\",\"core:webview:default\",\"core:app:default\",\"core:image:default\",\"core:resources:default\",\"core:menu:default\",\"core:tray:default\"]},\"permissions\":{},\"permission_sets\":{},\"global_scope_schema\":null},\"core:app\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin.\",\"permissions\":[\"allow-version\",\"allow-name\",\"allow-tauri-version\",\"allow-identifier\",\"allow-bundle-type\",\"allow-register-listener\",\"allow-remove-listener\"]},\"permissions\":{\"allow-app-hide\":{\"identifier\":\"allow-app-hide\",\"description\":\"Enables the app_hide command without any pre-configured scope.\",\"commands\":{\"allow\":[\"app_hide\"],\"deny\":[]}},\"allow-app-show\":{\"identifier\":\"allow-app-show\",\"description\":\"Enables the app_show command without any pre-configured scope.\",\"commands\":{\"allow\":[\"app_show\"],\"deny\":[]}},\"allow-bundle-type\":{\"identifier\":\"allow-bundle-type\",\"description\":\"Enables the bundle_type command without any pre-configured scope.\",\"commands\":{\"allow\":[\"bundle_type\"],\"deny\":[]}},\"allow-default-window-icon\":{\"identifier\":\"allow-default-window-icon\",\"description\":\"Enables the default_window_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"default_window_icon\"],\"deny\":[]}},\"allow-fetch-data-store-identifiers\":{\"identifier\":\"allow-fetch-data-store-identifiers\",\"description\":\"Enables the fetch_data_store_identifiers command without any pre-configured scope.\",\"commands\":{\"allow\":[\"fetch_data_store_identifiers\"],\"deny\":[]}},\"allow-identifier\":{\"identifier\":\"allow-identifier\",\"description\":\"Enables the identifier command without any pre-configured scope.\",\"commands\":{\"allow\":[\"identifier\"],\"deny\":[]}},\"allow-name\":{\"identifier\":\"allow-name\",\"description\":\"Enables the name command without any pre-configured scope.\",\"commands\":{\"allow\":[\"name\"],\"deny\":[]}},\"allow-register-listener\":{\"identifier\":\"allow-register-listener\",\"description\":\"Enables the register_listener command without any pre-configured scope.\",\"commands\":{\"allow\":[\"register_listener\"],\"deny\":[]}},\"allow-remove-data-store\":{\"identifier\":\"allow-remove-data-store\",\"description\":\"Enables the remove_data_store command without any pre-configured scope.\",\"commands\":{\"allow\":[\"remove_data_store\"],\"deny\":[]}},\"allow-remove-listener\":{\"identifier\":\"allow-remove-listener\",\"description\":\"Enables the remove_listener command without any pre-configured scope.\",\"commands\":{\"allow\":[\"remove_listener\"],\"deny\":[]}},\"allow-set-app-theme\":{\"identifier\":\"allow-set-app-theme\",\"description\":\"Enables the set_app_theme command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_app_theme\"],\"deny\":[]}},\"allow-set-dock-visibility\":{\"identifier\":\"allow-set-dock-visibility\",\"description\":\"Enables the set_dock_visibility command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_dock_visibility\"],\"deny\":[]}},\"allow-tauri-version\":{\"identifier\":\"allow-tauri-version\",\"description\":\"Enables the tauri_version command without any pre-configured scope.\",\"commands\":{\"allow\":[\"tauri_version\"],\"deny\":[]}},\"allow-version\":{\"identifier\":\"allow-version\",\"description\":\"Enables the version command without any pre-configured scope.\",\"commands\":{\"allow\":[\"version\"],\"deny\":[]}},\"deny-app-hide\":{\"identifier\":\"deny-app-hide\",\"description\":\"Denies the app_hide command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"app_hide\"]}},\"deny-app-show\":{\"identifier\":\"deny-app-show\",\"description\":\"Denies the app_show command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"app_show\"]}},\"deny-bundle-type\":{\"identifier\":\"deny-bundle-type\",\"description\":\"Denies the bundle_type command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"bundle_type\"]}},\"deny-default-window-icon\":{\"identifier\":\"deny-default-window-icon\",\"description\":\"Denies the default_window_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"default_window_icon\"]}},\"deny-fetch-data-store-identifiers\":{\"identifier\":\"deny-fetch-data-store-identifiers\",\"description\":\"Denies the fetch_data_store_identifiers command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"fetch_data_store_identifiers\"]}},\"deny-identifier\":{\"identifier\":\"deny-identifier\",\"description\":\"Denies the identifier command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"identifier\"]}},\"deny-name\":{\"identifier\":\"deny-name\",\"description\":\"Denies the name command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"name\"]}},\"deny-register-listener\":{\"identifier\":\"deny-register-listener\",\"description\":\"Denies the register_listener command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"register_listener\"]}},\"deny-remove-data-store\":{\"identifier\":\"deny-remove-data-store\",\"description\":\"Denies the remove_data_store command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"remove_data_store\"]}},\"deny-remove-listener\":{\"identifier\":\"deny-remove-listener\",\"description\":\"Denies the remove_listener command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"remove_listener\"]}},\"deny-set-app-theme\":{\"identifier\":\"deny-set-app-theme\",\"description\":\"Denies the set_app_theme command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_app_theme\"]}},\"deny-set-dock-visibility\":{\"identifier\":\"deny-set-dock-visibility\",\"description\":\"Denies the set_dock_visibility command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_dock_visibility\"]}},\"deny-tauri-version\":{\"identifier\":\"deny-tauri-version\",\"description\":\"Denies the tauri_version command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"tauri_version\"]}},\"deny-version\":{\"identifier\":\"deny-version\",\"description\":\"Denies the version command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"version\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:event\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-listen\",\"allow-unlisten\",\"allow-emit\",\"allow-emit-to\"]},\"permissions\":{\"allow-emit\":{\"identifier\":\"allow-emit\",\"description\":\"Enables the emit command without any pre-configured scope.\",\"commands\":{\"allow\":[\"emit\"],\"deny\":[]}},\"allow-emit-to\":{\"identifier\":\"allow-emit-to\",\"description\":\"Enables the emit_to command without any pre-configured scope.\",\"commands\":{\"allow\":[\"emit_to\"],\"deny\":[]}},\"allow-listen\":{\"identifier\":\"allow-listen\",\"description\":\"Enables the listen command without any pre-configured scope.\",\"commands\":{\"allow\":[\"listen\"],\"deny\":[]}},\"allow-unlisten\":{\"identifier\":\"allow-unlisten\",\"description\":\"Enables the unlisten command without any pre-configured scope.\",\"commands\":{\"allow\":[\"unlisten\"],\"deny\":[]}},\"deny-emit\":{\"identifier\":\"deny-emit\",\"description\":\"Denies the emit command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"emit\"]}},\"deny-emit-to\":{\"identifier\":\"deny-emit-to\",\"description\":\"Denies the emit_to command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"emit_to\"]}},\"deny-listen\":{\"identifier\":\"deny-listen\",\"description\":\"Denies the listen command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"listen\"]}},\"deny-unlisten\":{\"identifier\":\"deny-unlisten\",\"description\":\"Denies the unlisten command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"unlisten\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:image\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-new\",\"allow-from-bytes\",\"allow-from-path\",\"allow-rgba\",\"allow-size\"]},\"permissions\":{\"allow-from-bytes\":{\"identifier\":\"allow-from-bytes\",\"description\":\"Enables the from_bytes command without any pre-configured scope.\",\"commands\":{\"allow\":[\"from_bytes\"],\"deny\":[]}},\"allow-from-path\":{\"identifier\":\"allow-from-path\",\"description\":\"Enables the from_path command without any pre-configured scope.\",\"commands\":{\"allow\":[\"from_path\"],\"deny\":[]}},\"allow-new\":{\"identifier\":\"allow-new\",\"description\":\"Enables the new command without any pre-configured scope.\",\"commands\":{\"allow\":[\"new\"],\"deny\":[]}},\"allow-rgba\":{\"identifier\":\"allow-rgba\",\"description\":\"Enables the rgba command without any pre-configured scope.\",\"commands\":{\"allow\":[\"rgba\"],\"deny\":[]}},\"allow-size\":{\"identifier\":\"allow-size\",\"description\":\"Enables the size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"size\"],\"deny\":[]}},\"deny-from-bytes\":{\"identifier\":\"deny-from-bytes\",\"description\":\"Denies the from_bytes command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"from_bytes\"]}},\"deny-from-path\":{\"identifier\":\"deny-from-path\",\"description\":\"Denies the from_path command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"from_path\"]}},\"deny-new\":{\"identifier\":\"deny-new\",\"description\":\"Denies the new command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"new\"]}},\"deny-rgba\":{\"identifier\":\"deny-rgba\",\"description\":\"Denies the rgba command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"rgba\"]}},\"deny-size\":{\"identifier\":\"deny-size\",\"description\":\"Denies the size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"size\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:menu\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-new\",\"allow-append\",\"allow-prepend\",\"allow-insert\",\"allow-remove\",\"allow-remove-at\",\"allow-items\",\"allow-get\",\"allow-popup\",\"allow-create-default\",\"allow-set-as-app-menu\",\"allow-set-as-window-menu\",\"allow-text\",\"allow-set-text\",\"allow-is-enabled\",\"allow-set-enabled\",\"allow-set-accelerator\",\"allow-set-as-windows-menu-for-nsapp\",\"allow-set-as-help-menu-for-nsapp\",\"allow-is-checked\",\"allow-set-checked\",\"allow-set-icon\"]},\"permissions\":{\"allow-append\":{\"identifier\":\"allow-append\",\"description\":\"Enables the append command without any pre-configured scope.\",\"commands\":{\"allow\":[\"append\"],\"deny\":[]}},\"allow-create-default\":{\"identifier\":\"allow-create-default\",\"description\":\"Enables the create_default command without any pre-configured scope.\",\"commands\":{\"allow\":[\"create_default\"],\"deny\":[]}},\"allow-get\":{\"identifier\":\"allow-get\",\"description\":\"Enables the get command without any pre-configured scope.\",\"commands\":{\"allow\":[\"get\"],\"deny\":[]}},\"allow-insert\":{\"identifier\":\"allow-insert\",\"description\":\"Enables the insert command without any pre-configured scope.\",\"commands\":{\"allow\":[\"insert\"],\"deny\":[]}},\"allow-is-checked\":{\"identifier\":\"allow-is-checked\",\"description\":\"Enables the is_checked command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_checked\"],\"deny\":[]}},\"allow-is-enabled\":{\"identifier\":\"allow-is-enabled\",\"description\":\"Enables the is_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_enabled\"],\"deny\":[]}},\"allow-items\":{\"identifier\":\"allow-items\",\"description\":\"Enables the items command without any pre-configured scope.\",\"commands\":{\"allow\":[\"items\"],\"deny\":[]}},\"allow-new\":{\"identifier\":\"allow-new\",\"description\":\"Enables the new command without any pre-configured scope.\",\"commands\":{\"allow\":[\"new\"],\"deny\":[]}},\"allow-popup\":{\"identifier\":\"allow-popup\",\"description\":\"Enables the popup command without any pre-configured scope.\",\"commands\":{\"allow\":[\"popup\"],\"deny\":[]}},\"allow-prepend\":{\"identifier\":\"allow-prepend\",\"description\":\"Enables the prepend command without any pre-configured scope.\",\"commands\":{\"allow\":[\"prepend\"],\"deny\":[]}},\"allow-remove\":{\"identifier\":\"allow-remove\",\"description\":\"Enables the remove command without any pre-configured scope.\",\"commands\":{\"allow\":[\"remove\"],\"deny\":[]}},\"allow-remove-at\":{\"identifier\":\"allow-remove-at\",\"description\":\"Enables the remove_at command without any pre-configured scope.\",\"commands\":{\"allow\":[\"remove_at\"],\"deny\":[]}},\"allow-set-accelerator\":{\"identifier\":\"allow-set-accelerator\",\"description\":\"Enables the set_accelerator command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_accelerator\"],\"deny\":[]}},\"allow-set-as-app-menu\":{\"identifier\":\"allow-set-as-app-menu\",\"description\":\"Enables the set_as_app_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_as_app_menu\"],\"deny\":[]}},\"allow-set-as-help-menu-for-nsapp\":{\"identifier\":\"allow-set-as-help-menu-for-nsapp\",\"description\":\"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_as_help_menu_for_nsapp\"],\"deny\":[]}},\"allow-set-as-window-menu\":{\"identifier\":\"allow-set-as-window-menu\",\"description\":\"Enables the set_as_window_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_as_window_menu\"],\"deny\":[]}},\"allow-set-as-windows-menu-for-nsapp\":{\"identifier\":\"allow-set-as-windows-menu-for-nsapp\",\"description\":\"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_as_windows_menu_for_nsapp\"],\"deny\":[]}},\"allow-set-checked\":{\"identifier\":\"allow-set-checked\",\"description\":\"Enables the set_checked command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_checked\"],\"deny\":[]}},\"allow-set-enabled\":{\"identifier\":\"allow-set-enabled\",\"description\":\"Enables the set_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_enabled\"],\"deny\":[]}},\"allow-set-icon\":{\"identifier\":\"allow-set-icon\",\"description\":\"Enables the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_icon\"],\"deny\":[]}},\"allow-set-text\":{\"identifier\":\"allow-set-text\",\"description\":\"Enables the set_text command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_text\"],\"deny\":[]}},\"allow-text\":{\"identifier\":\"allow-text\",\"description\":\"Enables the text command without any pre-configured scope.\",\"commands\":{\"allow\":[\"text\"],\"deny\":[]}},\"deny-append\":{\"identifier\":\"deny-append\",\"description\":\"Denies the append command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"append\"]}},\"deny-create-default\":{\"identifier\":\"deny-create-default\",\"description\":\"Denies the create_default command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"create_default\"]}},\"deny-get\":{\"identifier\":\"deny-get\",\"description\":\"Denies the get command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"get\"]}},\"deny-insert\":{\"identifier\":\"deny-insert\",\"description\":\"Denies the insert command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"insert\"]}},\"deny-is-checked\":{\"identifier\":\"deny-is-checked\",\"description\":\"Denies the is_checked command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_checked\"]}},\"deny-is-enabled\":{\"identifier\":\"deny-is-enabled\",\"description\":\"Denies the is_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_enabled\"]}},\"deny-items\":{\"identifier\":\"deny-items\",\"description\":\"Denies the items command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"items\"]}},\"deny-new\":{\"identifier\":\"deny-new\",\"description\":\"Denies the new command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"new\"]}},\"deny-popup\":{\"identifier\":\"deny-popup\",\"description\":\"Denies the popup command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"popup\"]}},\"deny-prepend\":{\"identifier\":\"deny-prepend\",\"description\":\"Denies the prepend command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"prepend\"]}},\"deny-remove\":{\"identifier\":\"deny-remove\",\"description\":\"Denies the remove command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"remove\"]}},\"deny-remove-at\":{\"identifier\":\"deny-remove-at\",\"description\":\"Denies the remove_at command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"remove_at\"]}},\"deny-set-accelerator\":{\"identifier\":\"deny-set-accelerator\",\"description\":\"Denies the set_accelerator command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_accelerator\"]}},\"deny-set-as-app-menu\":{\"identifier\":\"deny-set-as-app-menu\",\"description\":\"Denies the set_as_app_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_as_app_menu\"]}},\"deny-set-as-help-menu-for-nsapp\":{\"identifier\":\"deny-set-as-help-menu-for-nsapp\",\"description\":\"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_as_help_menu_for_nsapp\"]}},\"deny-set-as-window-menu\":{\"identifier\":\"deny-set-as-window-menu\",\"description\":\"Denies the set_as_window_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_as_window_menu\"]}},\"deny-set-as-windows-menu-for-nsapp\":{\"identifier\":\"deny-set-as-windows-menu-for-nsapp\",\"description\":\"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_as_windows_menu_for_nsapp\"]}},\"deny-set-checked\":{\"identifier\":\"deny-set-checked\",\"description\":\"Denies the set_checked command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_checked\"]}},\"deny-set-enabled\":{\"identifier\":\"deny-set-enabled\",\"description\":\"Denies the set_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_enabled\"]}},\"deny-set-icon\":{\"identifier\":\"deny-set-icon\",\"description\":\"Denies the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_icon\"]}},\"deny-set-text\":{\"identifier\":\"deny-set-text\",\"description\":\"Denies the set_text command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_text\"]}},\"deny-text\":{\"identifier\":\"deny-text\",\"description\":\"Denies the text command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"text\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:path\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-resolve-directory\",\"allow-resolve\",\"allow-normalize\",\"allow-join\",\"allow-dirname\",\"allow-extname\",\"allow-basename\",\"allow-is-absolute\"]},\"permissions\":{\"allow-basename\":{\"identifier\":\"allow-basename\",\"description\":\"Enables the basename command without any pre-configured scope.\",\"commands\":{\"allow\":[\"basename\"],\"deny\":[]}},\"allow-dirname\":{\"identifier\":\"allow-dirname\",\"description\":\"Enables the dirname command without any pre-configured scope.\",\"commands\":{\"allow\":[\"dirname\"],\"deny\":[]}},\"allow-extname\":{\"identifier\":\"allow-extname\",\"description\":\"Enables the extname command without any pre-configured scope.\",\"commands\":{\"allow\":[\"extname\"],\"deny\":[]}},\"allow-is-absolute\":{\"identifier\":\"allow-is-absolute\",\"description\":\"Enables the is_absolute command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_absolute\"],\"deny\":[]}},\"allow-join\":{\"identifier\":\"allow-join\",\"description\":\"Enables the join command without any pre-configured scope.\",\"commands\":{\"allow\":[\"join\"],\"deny\":[]}},\"allow-normalize\":{\"identifier\":\"allow-normalize\",\"description\":\"Enables the normalize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"normalize\"],\"deny\":[]}},\"allow-resolve\":{\"identifier\":\"allow-resolve\",\"description\":\"Enables the resolve command without any pre-configured scope.\",\"commands\":{\"allow\":[\"resolve\"],\"deny\":[]}},\"allow-resolve-directory\":{\"identifier\":\"allow-resolve-directory\",\"description\":\"Enables the resolve_directory command without any pre-configured scope.\",\"commands\":{\"allow\":[\"resolve_directory\"],\"deny\":[]}},\"deny-basename\":{\"identifier\":\"deny-basename\",\"description\":\"Denies the basename command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"basename\"]}},\"deny-dirname\":{\"identifier\":\"deny-dirname\",\"description\":\"Denies the dirname command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"dirname\"]}},\"deny-extname\":{\"identifier\":\"deny-extname\",\"description\":\"Denies the extname command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"extname\"]}},\"deny-is-absolute\":{\"identifier\":\"deny-is-absolute\",\"description\":\"Denies the is_absolute command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_absolute\"]}},\"deny-join\":{\"identifier\":\"deny-join\",\"description\":\"Denies the join command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"join\"]}},\"deny-normalize\":{\"identifier\":\"deny-normalize\",\"description\":\"Denies the normalize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"normalize\"]}},\"deny-resolve\":{\"identifier\":\"deny-resolve\",\"description\":\"Denies the resolve command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"resolve\"]}},\"deny-resolve-directory\":{\"identifier\":\"deny-resolve-directory\",\"description\":\"Denies the resolve_directory command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"resolve_directory\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:resources\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-close\"]},\"permissions\":{\"allow-close\":{\"identifier\":\"allow-close\",\"description\":\"Enables the close command without any pre-configured scope.\",\"commands\":{\"allow\":[\"close\"],\"deny\":[]}},\"deny-close\":{\"identifier\":\"deny-close\",\"description\":\"Denies the close command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"close\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:tray\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin, which enables all commands.\",\"permissions\":[\"allow-new\",\"allow-get-by-id\",\"allow-remove-by-id\",\"allow-set-icon\",\"allow-set-menu\",\"allow-set-tooltip\",\"allow-set-title\",\"allow-set-visible\",\"allow-set-temp-dir-path\",\"allow-set-icon-as-template\",\"allow-set-show-menu-on-left-click\"]},\"permissions\":{\"allow-get-by-id\":{\"identifier\":\"allow-get-by-id\",\"description\":\"Enables the get_by_id command without any pre-configured scope.\",\"commands\":{\"allow\":[\"get_by_id\"],\"deny\":[]}},\"allow-new\":{\"identifier\":\"allow-new\",\"description\":\"Enables the new command without any pre-configured scope.\",\"commands\":{\"allow\":[\"new\"],\"deny\":[]}},\"allow-remove-by-id\":{\"identifier\":\"allow-remove-by-id\",\"description\":\"Enables the remove_by_id command without any pre-configured scope.\",\"commands\":{\"allow\":[\"remove_by_id\"],\"deny\":[]}},\"allow-set-icon\":{\"identifier\":\"allow-set-icon\",\"description\":\"Enables the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_icon\"],\"deny\":[]}},\"allow-set-icon-as-template\":{\"identifier\":\"allow-set-icon-as-template\",\"description\":\"Enables the set_icon_as_template command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_icon_as_template\"],\"deny\":[]}},\"allow-set-menu\":{\"identifier\":\"allow-set-menu\",\"description\":\"Enables the set_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_menu\"],\"deny\":[]}},\"allow-set-show-menu-on-left-click\":{\"identifier\":\"allow-set-show-menu-on-left-click\",\"description\":\"Enables the set_show_menu_on_left_click command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_show_menu_on_left_click\"],\"deny\":[]}},\"allow-set-temp-dir-path\":{\"identifier\":\"allow-set-temp-dir-path\",\"description\":\"Enables the set_temp_dir_path command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_temp_dir_path\"],\"deny\":[]}},\"allow-set-title\":{\"identifier\":\"allow-set-title\",\"description\":\"Enables the set_title command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_title\"],\"deny\":[]}},\"allow-set-tooltip\":{\"identifier\":\"allow-set-tooltip\",\"description\":\"Enables the set_tooltip command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_tooltip\"],\"deny\":[]}},\"allow-set-visible\":{\"identifier\":\"allow-set-visible\",\"description\":\"Enables the set_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_visible\"],\"deny\":[]}},\"deny-get-by-id\":{\"identifier\":\"deny-get-by-id\",\"description\":\"Denies the get_by_id command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"get_by_id\"]}},\"deny-new\":{\"identifier\":\"deny-new\",\"description\":\"Denies the new command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"new\"]}},\"deny-remove-by-id\":{\"identifier\":\"deny-remove-by-id\",\"description\":\"Denies the remove_by_id command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"remove_by_id\"]}},\"deny-set-icon\":{\"identifier\":\"deny-set-icon\",\"description\":\"Denies the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_icon\"]}},\"deny-set-icon-as-template\":{\"identifier\":\"deny-set-icon-as-template\",\"description\":\"Denies the set_icon_as_template command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_icon_as_template\"]}},\"deny-set-menu\":{\"identifier\":\"deny-set-menu\",\"description\":\"Denies the set_menu command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_menu\"]}},\"deny-set-show-menu-on-left-click\":{\"identifier\":\"deny-set-show-menu-on-left-click\",\"description\":\"Denies the set_show_menu_on_left_click command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_show_menu_on_left_click\"]}},\"deny-set-temp-dir-path\":{\"identifier\":\"deny-set-temp-dir-path\",\"description\":\"Denies the set_temp_dir_path command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_temp_dir_path\"]}},\"deny-set-title\":{\"identifier\":\"deny-set-title\",\"description\":\"Denies the set_title command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_title\"]}},\"deny-set-tooltip\":{\"identifier\":\"deny-set-tooltip\",\"description\":\"Denies the set_tooltip command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_tooltip\"]}},\"deny-set-visible\":{\"identifier\":\"deny-set-visible\",\"description\":\"Denies the set_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_visible\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:webview\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin.\",\"permissions\":[\"allow-get-all-webviews\",\"allow-webview-position\",\"allow-webview-size\",\"allow-internal-toggle-devtools\"]},\"permissions\":{\"allow-clear-all-browsing-data\":{\"identifier\":\"allow-clear-all-browsing-data\",\"description\":\"Enables the clear_all_browsing_data command without any pre-configured scope.\",\"commands\":{\"allow\":[\"clear_all_browsing_data\"],\"deny\":[]}},\"allow-create-webview\":{\"identifier\":\"allow-create-webview\",\"description\":\"Enables the create_webview command without any pre-configured scope.\",\"commands\":{\"allow\":[\"create_webview\"],\"deny\":[]}},\"allow-create-webview-window\":{\"identifier\":\"allow-create-webview-window\",\"description\":\"Enables the create_webview_window command without any pre-configured scope.\",\"commands\":{\"allow\":[\"create_webview_window\"],\"deny\":[]}},\"allow-get-all-webviews\":{\"identifier\":\"allow-get-all-webviews\",\"description\":\"Enables the get_all_webviews command without any pre-configured scope.\",\"commands\":{\"allow\":[\"get_all_webviews\"],\"deny\":[]}},\"allow-internal-toggle-devtools\":{\"identifier\":\"allow-internal-toggle-devtools\",\"description\":\"Enables the internal_toggle_devtools command without any pre-configured scope.\",\"commands\":{\"allow\":[\"internal_toggle_devtools\"],\"deny\":[]}},\"allow-print\":{\"identifier\":\"allow-print\",\"description\":\"Enables the print command without any pre-configured scope.\",\"commands\":{\"allow\":[\"print\"],\"deny\":[]}},\"allow-reparent\":{\"identifier\":\"allow-reparent\",\"description\":\"Enables the reparent command without any pre-configured scope.\",\"commands\":{\"allow\":[\"reparent\"],\"deny\":[]}},\"allow-set-webview-auto-resize\":{\"identifier\":\"allow-set-webview-auto-resize\",\"description\":\"Enables the set_webview_auto_resize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_auto_resize\"],\"deny\":[]}},\"allow-set-webview-background-color\":{\"identifier\":\"allow-set-webview-background-color\",\"description\":\"Enables the set_webview_background_color command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_background_color\"],\"deny\":[]}},\"allow-set-webview-focus\":{\"identifier\":\"allow-set-webview-focus\",\"description\":\"Enables the set_webview_focus command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_focus\"],\"deny\":[]}},\"allow-set-webview-position\":{\"identifier\":\"allow-set-webview-position\",\"description\":\"Enables the set_webview_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_position\"],\"deny\":[]}},\"allow-set-webview-size\":{\"identifier\":\"allow-set-webview-size\",\"description\":\"Enables the set_webview_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_size\"],\"deny\":[]}},\"allow-set-webview-zoom\":{\"identifier\":\"allow-set-webview-zoom\",\"description\":\"Enables the set_webview_zoom command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_webview_zoom\"],\"deny\":[]}},\"allow-webview-close\":{\"identifier\":\"allow-webview-close\",\"description\":\"Enables the webview_close command without any pre-configured scope.\",\"commands\":{\"allow\":[\"webview_close\"],\"deny\":[]}},\"allow-webview-hide\":{\"identifier\":\"allow-webview-hide\",\"description\":\"Enables the webview_hide command without any pre-configured scope.\",\"commands\":{\"allow\":[\"webview_hide\"],\"deny\":[]}},\"allow-webview-position\":{\"identifier\":\"allow-webview-position\",\"description\":\"Enables the webview_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"webview_position\"],\"deny\":[]}},\"allow-webview-show\":{\"identifier\":\"allow-webview-show\",\"description\":\"Enables the webview_show command without any pre-configured scope.\",\"commands\":{\"allow\":[\"webview_show\"],\"deny\":[]}},\"allow-webview-size\":{\"identifier\":\"allow-webview-size\",\"description\":\"Enables the webview_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"webview_size\"],\"deny\":[]}},\"deny-clear-all-browsing-data\":{\"identifier\":\"deny-clear-all-browsing-data\",\"description\":\"Denies the clear_all_browsing_data command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"clear_all_browsing_data\"]}},\"deny-create-webview\":{\"identifier\":\"deny-create-webview\",\"description\":\"Denies the create_webview command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"create_webview\"]}},\"deny-create-webview-window\":{\"identifier\":\"deny-create-webview-window\",\"description\":\"Denies the create_webview_window command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"create_webview_window\"]}},\"deny-get-all-webviews\":{\"identifier\":\"deny-get-all-webviews\",\"description\":\"Denies the get_all_webviews command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"get_all_webviews\"]}},\"deny-internal-toggle-devtools\":{\"identifier\":\"deny-internal-toggle-devtools\",\"description\":\"Denies the internal_toggle_devtools command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"internal_toggle_devtools\"]}},\"deny-print\":{\"identifier\":\"deny-print\",\"description\":\"Denies the print command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"print\"]}},\"deny-reparent\":{\"identifier\":\"deny-reparent\",\"description\":\"Denies the reparent command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"reparent\"]}},\"deny-set-webview-auto-resize\":{\"identifier\":\"deny-set-webview-auto-resize\",\"description\":\"Denies the set_webview_auto_resize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_auto_resize\"]}},\"deny-set-webview-background-color\":{\"identifier\":\"deny-set-webview-background-color\",\"description\":\"Denies the set_webview_background_color command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_background_color\"]}},\"deny-set-webview-focus\":{\"identifier\":\"deny-set-webview-focus\",\"description\":\"Denies the set_webview_focus command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_focus\"]}},\"deny-set-webview-position\":{\"identifier\":\"deny-set-webview-position\",\"description\":\"Denies the set_webview_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_position\"]}},\"deny-set-webview-size\":{\"identifier\":\"deny-set-webview-size\",\"description\":\"Denies the set_webview_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_size\"]}},\"deny-set-webview-zoom\":{\"identifier\":\"deny-set-webview-zoom\",\"description\":\"Denies the set_webview_zoom command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_webview_zoom\"]}},\"deny-webview-close\":{\"identifier\":\"deny-webview-close\",\"description\":\"Denies the webview_close command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"webview_close\"]}},\"deny-webview-hide\":{\"identifier\":\"deny-webview-hide\",\"description\":\"Denies the webview_hide command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"webview_hide\"]}},\"deny-webview-position\":{\"identifier\":\"deny-webview-position\",\"description\":\"Denies the webview_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"webview_position\"]}},\"deny-webview-show\":{\"identifier\":\"deny-webview-show\",\"description\":\"Denies the webview_show command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"webview_show\"]}},\"deny-webview-size\":{\"identifier\":\"deny-webview-size\",\"description\":\"Denies the webview_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"webview_size\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"core:window\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"Default permissions for the plugin.\",\"permissions\":[\"allow-get-all-windows\",\"allow-scale-factor\",\"allow-inner-position\",\"allow-outer-position\",\"allow-inner-size\",\"allow-outer-size\",\"allow-is-fullscreen\",\"allow-is-minimized\",\"allow-is-maximized\",\"allow-is-focused\",\"allow-is-decorated\",\"allow-is-resizable\",\"allow-is-maximizable\",\"allow-is-minimizable\",\"allow-is-closable\",\"allow-is-visible\",\"allow-is-enabled\",\"allow-title\",\"allow-current-monitor\",\"allow-primary-monitor\",\"allow-monitor-from-point\",\"allow-available-monitors\",\"allow-cursor-position\",\"allow-theme\",\"allow-is-always-on-top\",\"allow-internal-toggle-maximize\"]},\"permissions\":{\"allow-available-monitors\":{\"identifier\":\"allow-available-monitors\",\"description\":\"Enables the available_monitors command without any pre-configured scope.\",\"commands\":{\"allow\":[\"available_monitors\"],\"deny\":[]}},\"allow-center\":{\"identifier\":\"allow-center\",\"description\":\"Enables the center command without any pre-configured scope.\",\"commands\":{\"allow\":[\"center\"],\"deny\":[]}},\"allow-close\":{\"identifier\":\"allow-close\",\"description\":\"Enables the close command without any pre-configured scope.\",\"commands\":{\"allow\":[\"close\"],\"deny\":[]}},\"allow-create\":{\"identifier\":\"allow-create\",\"description\":\"Enables the create command without any pre-configured scope.\",\"commands\":{\"allow\":[\"create\"],\"deny\":[]}},\"allow-current-monitor\":{\"identifier\":\"allow-current-monitor\",\"description\":\"Enables the current_monitor command without any pre-configured scope.\",\"commands\":{\"allow\":[\"current_monitor\"],\"deny\":[]}},\"allow-cursor-position\":{\"identifier\":\"allow-cursor-position\",\"description\":\"Enables the cursor_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"cursor_position\"],\"deny\":[]}},\"allow-destroy\":{\"identifier\":\"allow-destroy\",\"description\":\"Enables the destroy command without any pre-configured scope.\",\"commands\":{\"allow\":[\"destroy\"],\"deny\":[]}},\"allow-get-all-windows\":{\"identifier\":\"allow-get-all-windows\",\"description\":\"Enables the get_all_windows command without any pre-configured scope.\",\"commands\":{\"allow\":[\"get_all_windows\"],\"deny\":[]}},\"allow-hide\":{\"identifier\":\"allow-hide\",\"description\":\"Enables the hide command without any pre-configured scope.\",\"commands\":{\"allow\":[\"hide\"],\"deny\":[]}},\"allow-inner-position\":{\"identifier\":\"allow-inner-position\",\"description\":\"Enables the inner_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"inner_position\"],\"deny\":[]}},\"allow-inner-size\":{\"identifier\":\"allow-inner-size\",\"description\":\"Enables the inner_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"inner_size\"],\"deny\":[]}},\"allow-internal-toggle-maximize\":{\"identifier\":\"allow-internal-toggle-maximize\",\"description\":\"Enables the internal_toggle_maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"internal_toggle_maximize\"],\"deny\":[]}},\"allow-is-always-on-top\":{\"identifier\":\"allow-is-always-on-top\",\"description\":\"Enables the is_always_on_top command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_always_on_top\"],\"deny\":[]}},\"allow-is-closable\":{\"identifier\":\"allow-is-closable\",\"description\":\"Enables the is_closable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_closable\"],\"deny\":[]}},\"allow-is-decorated\":{\"identifier\":\"allow-is-decorated\",\"description\":\"Enables the is_decorated command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_decorated\"],\"deny\":[]}},\"allow-is-enabled\":{\"identifier\":\"allow-is-enabled\",\"description\":\"Enables the is_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_enabled\"],\"deny\":[]}},\"allow-is-focused\":{\"identifier\":\"allow-is-focused\",\"description\":\"Enables the is_focused command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_focused\"],\"deny\":[]}},\"allow-is-fullscreen\":{\"identifier\":\"allow-is-fullscreen\",\"description\":\"Enables the is_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_fullscreen\"],\"deny\":[]}},\"allow-is-maximizable\":{\"identifier\":\"allow-is-maximizable\",\"description\":\"Enables the is_maximizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_maximizable\"],\"deny\":[]}},\"allow-is-maximized\":{\"identifier\":\"allow-is-maximized\",\"description\":\"Enables the is_maximized command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_maximized\"],\"deny\":[]}},\"allow-is-minimizable\":{\"identifier\":\"allow-is-minimizable\",\"description\":\"Enables the is_minimizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_minimizable\"],\"deny\":[]}},\"allow-is-minimized\":{\"identifier\":\"allow-is-minimized\",\"description\":\"Enables the is_minimized command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_minimized\"],\"deny\":[]}},\"allow-is-resizable\":{\"identifier\":\"allow-is-resizable\",\"description\":\"Enables the is_resizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_resizable\"],\"deny\":[]}},\"allow-is-visible\":{\"identifier\":\"allow-is-visible\",\"description\":\"Enables the is_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[\"is_visible\"],\"deny\":[]}},\"allow-maximize\":{\"identifier\":\"allow-maximize\",\"description\":\"Enables the maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"maximize\"],\"deny\":[]}},\"allow-minimize\":{\"identifier\":\"allow-minimize\",\"description\":\"Enables the minimize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"minimize\"],\"deny\":[]}},\"allow-monitor-from-point\":{\"identifier\":\"allow-monitor-from-point\",\"description\":\"Enables the monitor_from_point command without any pre-configured scope.\",\"commands\":{\"allow\":[\"monitor_from_point\"],\"deny\":[]}},\"allow-outer-position\":{\"identifier\":\"allow-outer-position\",\"description\":\"Enables the outer_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"outer_position\"],\"deny\":[]}},\"allow-outer-size\":{\"identifier\":\"allow-outer-size\",\"description\":\"Enables the outer_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"outer_size\"],\"deny\":[]}},\"allow-primary-monitor\":{\"identifier\":\"allow-primary-monitor\",\"description\":\"Enables the primary_monitor command without any pre-configured scope.\",\"commands\":{\"allow\":[\"primary_monitor\"],\"deny\":[]}},\"allow-request-user-attention\":{\"identifier\":\"allow-request-user-attention\",\"description\":\"Enables the request_user_attention command without any pre-configured scope.\",\"commands\":{\"allow\":[\"request_user_attention\"],\"deny\":[]}},\"allow-scale-factor\":{\"identifier\":\"allow-scale-factor\",\"description\":\"Enables the scale_factor command without any pre-configured scope.\",\"commands\":{\"allow\":[\"scale_factor\"],\"deny\":[]}},\"allow-set-always-on-bottom\":{\"identifier\":\"allow-set-always-on-bottom\",\"description\":\"Enables the set_always_on_bottom command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_always_on_bottom\"],\"deny\":[]}},\"allow-set-always-on-top\":{\"identifier\":\"allow-set-always-on-top\",\"description\":\"Enables the set_always_on_top command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_always_on_top\"],\"deny\":[]}},\"allow-set-background-color\":{\"identifier\":\"allow-set-background-color\",\"description\":\"Enables the set_background_color command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_background_color\"],\"deny\":[]}},\"allow-set-badge-count\":{\"identifier\":\"allow-set-badge-count\",\"description\":\"Enables the set_badge_count command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_badge_count\"],\"deny\":[]}},\"allow-set-badge-label\":{\"identifier\":\"allow-set-badge-label\",\"description\":\"Enables the set_badge_label command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_badge_label\"],\"deny\":[]}},\"allow-set-closable\":{\"identifier\":\"allow-set-closable\",\"description\":\"Enables the set_closable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_closable\"],\"deny\":[]}},\"allow-set-content-protected\":{\"identifier\":\"allow-set-content-protected\",\"description\":\"Enables the set_content_protected command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_content_protected\"],\"deny\":[]}},\"allow-set-cursor-grab\":{\"identifier\":\"allow-set-cursor-grab\",\"description\":\"Enables the set_cursor_grab command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_cursor_grab\"],\"deny\":[]}},\"allow-set-cursor-icon\":{\"identifier\":\"allow-set-cursor-icon\",\"description\":\"Enables the set_cursor_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_cursor_icon\"],\"deny\":[]}},\"allow-set-cursor-position\":{\"identifier\":\"allow-set-cursor-position\",\"description\":\"Enables the set_cursor_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_cursor_position\"],\"deny\":[]}},\"allow-set-cursor-visible\":{\"identifier\":\"allow-set-cursor-visible\",\"description\":\"Enables the set_cursor_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_cursor_visible\"],\"deny\":[]}},\"allow-set-decorations\":{\"identifier\":\"allow-set-decorations\",\"description\":\"Enables the set_decorations command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_decorations\"],\"deny\":[]}},\"allow-set-effects\":{\"identifier\":\"allow-set-effects\",\"description\":\"Enables the set_effects command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_effects\"],\"deny\":[]}},\"allow-set-enabled\":{\"identifier\":\"allow-set-enabled\",\"description\":\"Enables the set_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_enabled\"],\"deny\":[]}},\"allow-set-focus\":{\"identifier\":\"allow-set-focus\",\"description\":\"Enables the set_focus command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_focus\"],\"deny\":[]}},\"allow-set-focusable\":{\"identifier\":\"allow-set-focusable\",\"description\":\"Enables the set_focusable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_focusable\"],\"deny\":[]}},\"allow-set-fullscreen\":{\"identifier\":\"allow-set-fullscreen\",\"description\":\"Enables the set_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_fullscreen\"],\"deny\":[]}},\"allow-set-icon\":{\"identifier\":\"allow-set-icon\",\"description\":\"Enables the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_icon\"],\"deny\":[]}},\"allow-set-ignore-cursor-events\":{\"identifier\":\"allow-set-ignore-cursor-events\",\"description\":\"Enables the set_ignore_cursor_events command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_ignore_cursor_events\"],\"deny\":[]}},\"allow-set-max-size\":{\"identifier\":\"allow-set-max-size\",\"description\":\"Enables the set_max_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_max_size\"],\"deny\":[]}},\"allow-set-maximizable\":{\"identifier\":\"allow-set-maximizable\",\"description\":\"Enables the set_maximizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_maximizable\"],\"deny\":[]}},\"allow-set-min-size\":{\"identifier\":\"allow-set-min-size\",\"description\":\"Enables the set_min_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_min_size\"],\"deny\":[]}},\"allow-set-minimizable\":{\"identifier\":\"allow-set-minimizable\",\"description\":\"Enables the set_minimizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_minimizable\"],\"deny\":[]}},\"allow-set-overlay-icon\":{\"identifier\":\"allow-set-overlay-icon\",\"description\":\"Enables the set_overlay_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_overlay_icon\"],\"deny\":[]}},\"allow-set-position\":{\"identifier\":\"allow-set-position\",\"description\":\"Enables the set_position command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_position\"],\"deny\":[]}},\"allow-set-progress-bar\":{\"identifier\":\"allow-set-progress-bar\",\"description\":\"Enables the set_progress_bar command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_progress_bar\"],\"deny\":[]}},\"allow-set-resizable\":{\"identifier\":\"allow-set-resizable\",\"description\":\"Enables the set_resizable command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_resizable\"],\"deny\":[]}},\"allow-set-shadow\":{\"identifier\":\"allow-set-shadow\",\"description\":\"Enables the set_shadow command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_shadow\"],\"deny\":[]}},\"allow-set-simple-fullscreen\":{\"identifier\":\"allow-set-simple-fullscreen\",\"description\":\"Enables the set_simple_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_simple_fullscreen\"],\"deny\":[]}},\"allow-set-size\":{\"identifier\":\"allow-set-size\",\"description\":\"Enables the set_size command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_size\"],\"deny\":[]}},\"allow-set-size-constraints\":{\"identifier\":\"allow-set-size-constraints\",\"description\":\"Enables the set_size_constraints command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_size_constraints\"],\"deny\":[]}},\"allow-set-skip-taskbar\":{\"identifier\":\"allow-set-skip-taskbar\",\"description\":\"Enables the set_skip_taskbar command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_skip_taskbar\"],\"deny\":[]}},\"allow-set-theme\":{\"identifier\":\"allow-set-theme\",\"description\":\"Enables the set_theme command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_theme\"],\"deny\":[]}},\"allow-set-title\":{\"identifier\":\"allow-set-title\",\"description\":\"Enables the set_title command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_title\"],\"deny\":[]}},\"allow-set-title-bar-style\":{\"identifier\":\"allow-set-title-bar-style\",\"description\":\"Enables the set_title_bar_style command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_title_bar_style\"],\"deny\":[]}},\"allow-set-visible-on-all-workspaces\":{\"identifier\":\"allow-set-visible-on-all-workspaces\",\"description\":\"Enables the set_visible_on_all_workspaces command without any pre-configured scope.\",\"commands\":{\"allow\":[\"set_visible_on_all_workspaces\"],\"deny\":[]}},\"allow-show\":{\"identifier\":\"allow-show\",\"description\":\"Enables the show command without any pre-configured scope.\",\"commands\":{\"allow\":[\"show\"],\"deny\":[]}},\"allow-start-dragging\":{\"identifier\":\"allow-start-dragging\",\"description\":\"Enables the start_dragging command without any pre-configured scope.\",\"commands\":{\"allow\":[\"start_dragging\"],\"deny\":[]}},\"allow-start-resize-dragging\":{\"identifier\":\"allow-start-resize-dragging\",\"description\":\"Enables the start_resize_dragging command without any pre-configured scope.\",\"commands\":{\"allow\":[\"start_resize_dragging\"],\"deny\":[]}},\"allow-theme\":{\"identifier\":\"allow-theme\",\"description\":\"Enables the theme command without any pre-configured scope.\",\"commands\":{\"allow\":[\"theme\"],\"deny\":[]}},\"allow-title\":{\"identifier\":\"allow-title\",\"description\":\"Enables the title command without any pre-configured scope.\",\"commands\":{\"allow\":[\"title\"],\"deny\":[]}},\"allow-toggle-maximize\":{\"identifier\":\"allow-toggle-maximize\",\"description\":\"Enables the toggle_maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"toggle_maximize\"],\"deny\":[]}},\"allow-unmaximize\":{\"identifier\":\"allow-unmaximize\",\"description\":\"Enables the unmaximize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"unmaximize\"],\"deny\":[]}},\"allow-unminimize\":{\"identifier\":\"allow-unminimize\",\"description\":\"Enables the unminimize command without any pre-configured scope.\",\"commands\":{\"allow\":[\"unminimize\"],\"deny\":[]}},\"deny-available-monitors\":{\"identifier\":\"deny-available-monitors\",\"description\":\"Denies the available_monitors command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"available_monitors\"]}},\"deny-center\":{\"identifier\":\"deny-center\",\"description\":\"Denies the center command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"center\"]}},\"deny-close\":{\"identifier\":\"deny-close\",\"description\":\"Denies the close command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"close\"]}},\"deny-create\":{\"identifier\":\"deny-create\",\"description\":\"Denies the create command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"create\"]}},\"deny-current-monitor\":{\"identifier\":\"deny-current-monitor\",\"description\":\"Denies the current_monitor command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"current_monitor\"]}},\"deny-cursor-position\":{\"identifier\":\"deny-cursor-position\",\"description\":\"Denies the cursor_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"cursor_position\"]}},\"deny-destroy\":{\"identifier\":\"deny-destroy\",\"description\":\"Denies the destroy command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"destroy\"]}},\"deny-get-all-windows\":{\"identifier\":\"deny-get-all-windows\",\"description\":\"Denies the get_all_windows command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"get_all_windows\"]}},\"deny-hide\":{\"identifier\":\"deny-hide\",\"description\":\"Denies the hide command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"hide\"]}},\"deny-inner-position\":{\"identifier\":\"deny-inner-position\",\"description\":\"Denies the inner_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"inner_position\"]}},\"deny-inner-size\":{\"identifier\":\"deny-inner-size\",\"description\":\"Denies the inner_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"inner_size\"]}},\"deny-internal-toggle-maximize\":{\"identifier\":\"deny-internal-toggle-maximize\",\"description\":\"Denies the internal_toggle_maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"internal_toggle_maximize\"]}},\"deny-is-always-on-top\":{\"identifier\":\"deny-is-always-on-top\",\"description\":\"Denies the is_always_on_top command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_always_on_top\"]}},\"deny-is-closable\":{\"identifier\":\"deny-is-closable\",\"description\":\"Denies the is_closable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_closable\"]}},\"deny-is-decorated\":{\"identifier\":\"deny-is-decorated\",\"description\":\"Denies the is_decorated command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_decorated\"]}},\"deny-is-enabled\":{\"identifier\":\"deny-is-enabled\",\"description\":\"Denies the is_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_enabled\"]}},\"deny-is-focused\":{\"identifier\":\"deny-is-focused\",\"description\":\"Denies the is_focused command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_focused\"]}},\"deny-is-fullscreen\":{\"identifier\":\"deny-is-fullscreen\",\"description\":\"Denies the is_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_fullscreen\"]}},\"deny-is-maximizable\":{\"identifier\":\"deny-is-maximizable\",\"description\":\"Denies the is_maximizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_maximizable\"]}},\"deny-is-maximized\":{\"identifier\":\"deny-is-maximized\",\"description\":\"Denies the is_maximized command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_maximized\"]}},\"deny-is-minimizable\":{\"identifier\":\"deny-is-minimizable\",\"description\":\"Denies the is_minimizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_minimizable\"]}},\"deny-is-minimized\":{\"identifier\":\"deny-is-minimized\",\"description\":\"Denies the is_minimized command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_minimized\"]}},\"deny-is-resizable\":{\"identifier\":\"deny-is-resizable\",\"description\":\"Denies the is_resizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_resizable\"]}},\"deny-is-visible\":{\"identifier\":\"deny-is-visible\",\"description\":\"Denies the is_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"is_visible\"]}},\"deny-maximize\":{\"identifier\":\"deny-maximize\",\"description\":\"Denies the maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"maximize\"]}},\"deny-minimize\":{\"identifier\":\"deny-minimize\",\"description\":\"Denies the minimize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"minimize\"]}},\"deny-monitor-from-point\":{\"identifier\":\"deny-monitor-from-point\",\"description\":\"Denies the monitor_from_point command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"monitor_from_point\"]}},\"deny-outer-position\":{\"identifier\":\"deny-outer-position\",\"description\":\"Denies the outer_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"outer_position\"]}},\"deny-outer-size\":{\"identifier\":\"deny-outer-size\",\"description\":\"Denies the outer_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"outer_size\"]}},\"deny-primary-monitor\":{\"identifier\":\"deny-primary-monitor\",\"description\":\"Denies the primary_monitor command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"primary_monitor\"]}},\"deny-request-user-attention\":{\"identifier\":\"deny-request-user-attention\",\"description\":\"Denies the request_user_attention command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"request_user_attention\"]}},\"deny-scale-factor\":{\"identifier\":\"deny-scale-factor\",\"description\":\"Denies the scale_factor command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"scale_factor\"]}},\"deny-set-always-on-bottom\":{\"identifier\":\"deny-set-always-on-bottom\",\"description\":\"Denies the set_always_on_bottom command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_always_on_bottom\"]}},\"deny-set-always-on-top\":{\"identifier\":\"deny-set-always-on-top\",\"description\":\"Denies the set_always_on_top command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_always_on_top\"]}},\"deny-set-background-color\":{\"identifier\":\"deny-set-background-color\",\"description\":\"Denies the set_background_color command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_background_color\"]}},\"deny-set-badge-count\":{\"identifier\":\"deny-set-badge-count\",\"description\":\"Denies the set_badge_count command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_badge_count\"]}},\"deny-set-badge-label\":{\"identifier\":\"deny-set-badge-label\",\"description\":\"Denies the set_badge_label command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_badge_label\"]}},\"deny-set-closable\":{\"identifier\":\"deny-set-closable\",\"description\":\"Denies the set_closable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_closable\"]}},\"deny-set-content-protected\":{\"identifier\":\"deny-set-content-protected\",\"description\":\"Denies the set_content_protected command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_content_protected\"]}},\"deny-set-cursor-grab\":{\"identifier\":\"deny-set-cursor-grab\",\"description\":\"Denies the set_cursor_grab command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_cursor_grab\"]}},\"deny-set-cursor-icon\":{\"identifier\":\"deny-set-cursor-icon\",\"description\":\"Denies the set_cursor_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_cursor_icon\"]}},\"deny-set-cursor-position\":{\"identifier\":\"deny-set-cursor-position\",\"description\":\"Denies the set_cursor_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_cursor_position\"]}},\"deny-set-cursor-visible\":{\"identifier\":\"deny-set-cursor-visible\",\"description\":\"Denies the set_cursor_visible command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_cursor_visible\"]}},\"deny-set-decorations\":{\"identifier\":\"deny-set-decorations\",\"description\":\"Denies the set_decorations command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_decorations\"]}},\"deny-set-effects\":{\"identifier\":\"deny-set-effects\",\"description\":\"Denies the set_effects command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_effects\"]}},\"deny-set-enabled\":{\"identifier\":\"deny-set-enabled\",\"description\":\"Denies the set_enabled command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_enabled\"]}},\"deny-set-focus\":{\"identifier\":\"deny-set-focus\",\"description\":\"Denies the set_focus command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_focus\"]}},\"deny-set-focusable\":{\"identifier\":\"deny-set-focusable\",\"description\":\"Denies the set_focusable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_focusable\"]}},\"deny-set-fullscreen\":{\"identifier\":\"deny-set-fullscreen\",\"description\":\"Denies the set_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_fullscreen\"]}},\"deny-set-icon\":{\"identifier\":\"deny-set-icon\",\"description\":\"Denies the set_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_icon\"]}},\"deny-set-ignore-cursor-events\":{\"identifier\":\"deny-set-ignore-cursor-events\",\"description\":\"Denies the set_ignore_cursor_events command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_ignore_cursor_events\"]}},\"deny-set-max-size\":{\"identifier\":\"deny-set-max-size\",\"description\":\"Denies the set_max_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_max_size\"]}},\"deny-set-maximizable\":{\"identifier\":\"deny-set-maximizable\",\"description\":\"Denies the set_maximizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_maximizable\"]}},\"deny-set-min-size\":{\"identifier\":\"deny-set-min-size\",\"description\":\"Denies the set_min_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_min_size\"]}},\"deny-set-minimizable\":{\"identifier\":\"deny-set-minimizable\",\"description\":\"Denies the set_minimizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_minimizable\"]}},\"deny-set-overlay-icon\":{\"identifier\":\"deny-set-overlay-icon\",\"description\":\"Denies the set_overlay_icon command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_overlay_icon\"]}},\"deny-set-position\":{\"identifier\":\"deny-set-position\",\"description\":\"Denies the set_position command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_position\"]}},\"deny-set-progress-bar\":{\"identifier\":\"deny-set-progress-bar\",\"description\":\"Denies the set_progress_bar command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_progress_bar\"]}},\"deny-set-resizable\":{\"identifier\":\"deny-set-resizable\",\"description\":\"Denies the set_resizable command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_resizable\"]}},\"deny-set-shadow\":{\"identifier\":\"deny-set-shadow\",\"description\":\"Denies the set_shadow command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_shadow\"]}},\"deny-set-simple-fullscreen\":{\"identifier\":\"deny-set-simple-fullscreen\",\"description\":\"Denies the set_simple_fullscreen command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_simple_fullscreen\"]}},\"deny-set-size\":{\"identifier\":\"deny-set-size\",\"description\":\"Denies the set_size command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_size\"]}},\"deny-set-size-constraints\":{\"identifier\":\"deny-set-size-constraints\",\"description\":\"Denies the set_size_constraints command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_size_constraints\"]}},\"deny-set-skip-taskbar\":{\"identifier\":\"deny-set-skip-taskbar\",\"description\":\"Denies the set_skip_taskbar command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_skip_taskbar\"]}},\"deny-set-theme\":{\"identifier\":\"deny-set-theme\",\"description\":\"Denies the set_theme command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_theme\"]}},\"deny-set-title\":{\"identifier\":\"deny-set-title\",\"description\":\"Denies the set_title command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_title\"]}},\"deny-set-title-bar-style\":{\"identifier\":\"deny-set-title-bar-style\",\"description\":\"Denies the set_title_bar_style command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_title_bar_style\"]}},\"deny-set-visible-on-all-workspaces\":{\"identifier\":\"deny-set-visible-on-all-workspaces\",\"description\":\"Denies the set_visible_on_all_workspaces command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"set_visible_on_all_workspaces\"]}},\"deny-show\":{\"identifier\":\"deny-show\",\"description\":\"Denies the show command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"show\"]}},\"deny-start-dragging\":{\"identifier\":\"deny-start-dragging\",\"description\":\"Denies the start_dragging command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"start_dragging\"]}},\"deny-start-resize-dragging\":{\"identifier\":\"deny-start-resize-dragging\",\"description\":\"Denies the start_resize_dragging command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"start_resize_dragging\"]}},\"deny-theme\":{\"identifier\":\"deny-theme\",\"description\":\"Denies the theme command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"theme\"]}},\"deny-title\":{\"identifier\":\"deny-title\",\"description\":\"Denies the title command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"title\"]}},\"deny-toggle-maximize\":{\"identifier\":\"deny-toggle-maximize\",\"description\":\"Denies the toggle_maximize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"toggle_maximize\"]}},\"deny-unmaximize\":{\"identifier\":\"deny-unmaximize\",\"description\":\"Denies the unmaximize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"unmaximize\"]}},\"deny-unminimize\":{\"identifier\":\"deny-unminimize\",\"description\":\"Denies the unminimize command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"unminimize\"]}}},\"permission_sets\":{},\"global_scope_schema\":null},\"shell\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\",\"permissions\":[\"allow-open\"]},\"permissions\":{\"allow-execute\":{\"identifier\":\"allow-execute\",\"description\":\"Enables the execute command without any pre-configured scope.\",\"commands\":{\"allow\":[\"execute\"],\"deny\":[]}},\"allow-kill\":{\"identifier\":\"allow-kill\",\"description\":\"Enables the kill command without any pre-configured scope.\",\"commands\":{\"allow\":[\"kill\"],\"deny\":[]}},\"allow-open\":{\"identifier\":\"allow-open\",\"description\":\"Enables the open command without any pre-configured scope.\",\"commands\":{\"allow\":[\"open\"],\"deny\":[]}},\"allow-spawn\":{\"identifier\":\"allow-spawn\",\"description\":\"Enables the spawn command without any pre-configured scope.\",\"commands\":{\"allow\":[\"spawn\"],\"deny\":[]}},\"allow-stdin-write\":{\"identifier\":\"allow-stdin-write\",\"description\":\"Enables the stdin_write command without any pre-configured scope.\",\"commands\":{\"allow\":[\"stdin_write\"],\"deny\":[]}},\"deny-execute\":{\"identifier\":\"deny-execute\",\"description\":\"Denies the execute command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"execute\"]}},\"deny-kill\":{\"identifier\":\"deny-kill\",\"description\":\"Denies the kill command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"kill\"]}},\"deny-open\":{\"identifier\":\"deny-open\",\"description\":\"Denies the open command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"open\"]}},\"deny-spawn\":{\"identifier\":\"deny-spawn\",\"description\":\"Denies the spawn command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"spawn\"]}},\"deny-stdin-write\":{\"identifier\":\"deny-stdin-write\",\"description\":\"Denies the stdin_write command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"stdin_write\"]}}},\"permission_sets\":{},\"global_scope_schema\":{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"anyOf\":[{\"additionalProperties\":false,\"properties\":{\"args\":{\"allOf\":[{\"$ref\":\"#/definitions/ShellScopeEntryAllowedArgs\"}],\"description\":\"The allowed arguments for the command execution.\"},\"cmd\":{\"description\":\"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.\",\"type\":\"string\"},\"name\":{\"description\":\"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\"type\":\"string\"}},\"required\":[\"cmd\",\"name\"],\"type\":\"object\"},{\"additionalProperties\":false,\"properties\":{\"args\":{\"allOf\":[{\"$ref\":\"#/definitions/ShellScopeEntryAllowedArgs\"}],\"description\":\"The allowed arguments for the command execution.\"},\"name\":{\"description\":\"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\"type\":\"string\"},\"sidecar\":{\"description\":\"If this command is a sidecar command.\",\"type\":\"boolean\"}},\"required\":[\"name\",\"sidecar\"],\"type\":\"object\"}],\"definitions\":{\"ShellScopeEntryAllowedArg\":{\"anyOf\":[{\"description\":\"A non-configurable argument that is passed to the command in the order it was specified.\",\"type\":\"string\"},{\"additionalProperties\":false,\"description\":\"A variable that is set while calling the command from the webview API.\",\"properties\":{\"raw\":{\"default\":false,\"description\":\"Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\\n\\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.\",\"type\":\"boolean\"},\"validator\":{\"description\":\"[regex] validator to require passed values to conform to an expected input.\\n\\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\\n\\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\\\w+` regex would be registered as `^https?://\\\\w+$`.\\n\\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>\",\"type\":\"string\"}},\"required\":[\"validator\"],\"type\":\"object\"}],\"description\":\"A command argument allowed to be executed by the webview API.\"},\"ShellScopeEntryAllowedArgs\":{\"anyOf\":[{\"description\":\"Use a simple boolean to allow all or disable all arguments to this command configuration.\",\"type\":\"boolean\"},{\"description\":\"A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.\",\"items\":{\"$ref\":\"#/definitions/ShellScopeEntryAllowedArg\"},\"type\":\"array\"}],\"description\":\"A set of command arguments allowed to be executed by the webview API.\\n\\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration.\"}},\"description\":\"Shell scope entry.\",\"title\":\"ShellScopeEntry\"}},\"window-state\":{\"default_permission\":{\"identifier\":\"default\",\"description\":\"This permission set configures what kind of\\noperations are available from the window state plugin.\\n\\n#### Granted Permissions\\n\\nAll operations are enabled by default.\\n\\n\",\"permissions\":[\"allow-filename\",\"allow-restore-state\",\"allow-save-window-state\"]},\"permissions\":{\"allow-filename\":{\"identifier\":\"allow-filename\",\"description\":\"Enables the filename command without any pre-configured scope.\",\"commands\":{\"allow\":[\"filename\"],\"deny\":[]}},\"allow-restore-state\":{\"identifier\":\"allow-restore-state\",\"description\":\"Enables the restore_state command without any pre-configured scope.\",\"commands\":{\"allow\":[\"restore_state\"],\"deny\":[]}},\"allow-save-window-state\":{\"identifier\":\"allow-save-window-state\",\"description\":\"Enables the save_window_state command without any pre-configured scope.\",\"commands\":{\"allow\":[\"save_window_state\"],\"deny\":[]}},\"deny-filename\":{\"identifier\":\"deny-filename\",\"description\":\"Denies the filename command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"filename\"]}},\"deny-restore-state\":{\"identifier\":\"deny-restore-state\",\"description\":\"Denies the restore_state command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"restore_state\"]}},\"deny-save-window-state\":{\"identifier\":\"deny-save-window-state\",\"description\":\"Denies the save_window_state command without any pre-configured scope.\",\"commands\":{\"allow\":[],\"deny\":[\"save_window_state\"]}}},\"permission_sets\":{},\"global_scope_schema\":null}}"
  },
  {
    "path": "desktop/src-tauri/gen/schemas/capabilities.json",
    "content": "{}"
  },
  {
    "path": "desktop/src-tauri/gen/schemas/desktop-schema.json",
    "content": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"CapabilityFile\",\n  \"description\": \"Capability formats accepted in a capability file.\",\n  \"anyOf\": [\n    {\n      \"description\": \"A single capability.\",\n      \"allOf\": [\n        {\n          \"$ref\": \"#/definitions/Capability\"\n        }\n      ]\n    },\n    {\n      \"description\": \"A list of capabilities.\",\n      \"type\": \"array\",\n      \"items\": {\n        \"$ref\": \"#/definitions/Capability\"\n      }\n    },\n    {\n      \"description\": \"A list of capabilities.\",\n      \"type\": \"object\",\n      \"required\": [\n        \"capabilities\"\n      ],\n      \"properties\": {\n        \"capabilities\": {\n          \"description\": \"The list of capabilities.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/Capability\"\n          }\n        }\n      }\n    }\n  ],\n  \"definitions\": {\n    \"Capability\": {\n      \"description\": \"A grouping and boundary mechanism developers can use to isolate access to the IPC layer.\\n\\nIt controls application windows' and webviews' fine grained access to the Tauri core, application, or plugin commands. If a webview or its window is not matching any capability then it has no access to the IPC layer at all.\\n\\nThis can be done to create groups of windows, based on their required system access, which can reduce impact of frontend vulnerabilities in less privileged windows. Windows can be added to a capability by exact name (e.g. `main-window`) or glob patterns like `*` or `admin-*`. A Window can have none, one, or multiple associated capabilities.\\n\\n## Example\\n\\n```json { \\\"identifier\\\": \\\"main-user-files-write\\\", \\\"description\\\": \\\"This capability allows the `main` window on macOS and Windows access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\\\", \\\"windows\\\": [ \\\"main\\\" ], \\\"permissions\\\": [ \\\"core:default\\\", \\\"dialog:open\\\", { \\\"identifier\\\": \\\"fs:allow-write-text-file\\\", \\\"allow\\\": [{ \\\"path\\\": \\\"$HOME/test.txt\\\" }] }, ], \\\"platforms\\\": [\\\"macOS\\\",\\\"windows\\\"] } ```\",\n      \"type\": \"object\",\n      \"required\": [\n        \"identifier\",\n        \"permissions\"\n      ],\n      \"properties\": {\n        \"identifier\": {\n          \"description\": \"Identifier of the capability.\\n\\n## Example\\n\\n`main-user-files-write`\",\n          \"type\": \"string\"\n        },\n        \"description\": {\n          \"description\": \"Description of what the capability is intended to allow on associated windows.\\n\\nIt should contain a description of what the grouped permissions should allow.\\n\\n## Example\\n\\nThis capability allows the `main` window access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\",\n          \"default\": \"\",\n          \"type\": \"string\"\n        },\n        \"remote\": {\n          \"description\": \"Configure remote URLs that can use the capability permissions.\\n\\nThis setting is optional and defaults to not being set, as our default use case is that the content is served from our local application.\\n\\n:::caution Make sure you understand the security implications of providing remote sources with local system access. :::\\n\\n## Example\\n\\n```json { \\\"urls\\\": [\\\"https://*.mydomain.dev\\\"] } ```\",\n          \"anyOf\": [\n            {\n              \"$ref\": \"#/definitions/CapabilityRemote\"\n            },\n            {\n              \"type\": \"null\"\n            }\n          ]\n        },\n        \"local\": {\n          \"description\": \"Whether this capability is enabled for local app URLs or not. Defaults to `true`.\",\n          \"default\": true,\n          \"type\": \"boolean\"\n        },\n        \"windows\": {\n          \"description\": \"List of windows that are affected by this capability. Can be a glob pattern.\\n\\nIf a window label matches any of the patterns in this list, the capability will be enabled on all the webviews of that window, regardless of the value of [`Self::webviews`].\\n\\nOn multiwebview windows, prefer specifying [`Self::webviews`] and omitting [`Self::windows`] for a fine grained access control.\\n\\n## Example\\n\\n`[\\\"main\\\"]`\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        },\n        \"webviews\": {\n          \"description\": \"List of webviews that are affected by this capability. Can be a glob pattern.\\n\\nThe capability will be enabled on all the webviews whose label matches any of the patterns in this list, regardless of whether the webview's window label matches a pattern in [`Self::windows`].\\n\\n## Example\\n\\n`[\\\"sub-webview-one\\\", \\\"sub-webview-two\\\"]`\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        },\n        \"permissions\": {\n          \"description\": \"List of permissions attached to this capability.\\n\\nMust include the plugin name as prefix in the form of `${plugin-name}:${permission-name}`. For commands directly implemented in the application itself only `${permission-name}` is required.\\n\\n## Example\\n\\n```json [ \\\"core:default\\\", \\\"shell:allow-open\\\", \\\"dialog:open\\\", { \\\"identifier\\\": \\\"fs:allow-write-text-file\\\", \\\"allow\\\": [{ \\\"path\\\": \\\"$HOME/test.txt\\\" }] } ] ```\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/PermissionEntry\"\n          },\n          \"uniqueItems\": true\n        },\n        \"platforms\": {\n          \"description\": \"Limit which target platforms this capability applies to.\\n\\nBy default all platforms are targeted.\\n\\n## Example\\n\\n`[\\\"macOS\\\",\\\"windows\\\"]`\",\n          \"type\": [\n            \"array\",\n            \"null\"\n          ],\n          \"items\": {\n            \"$ref\": \"#/definitions/Target\"\n          }\n        }\n      }\n    },\n    \"CapabilityRemote\": {\n      \"description\": \"Configuration for remote URLs that are associated with the capability.\",\n      \"type\": \"object\",\n      \"required\": [\n        \"urls\"\n      ],\n      \"properties\": {\n        \"urls\": {\n          \"description\": \"Remote domains this capability refers to using the [URLPattern standard](https://urlpattern.spec.whatwg.org/).\\n\\n## Examples\\n\\n- \\\"https://*.mydomain.dev\\\": allows subdomains of mydomain.dev - \\\"https://mydomain.dev/api/*\\\": allows any subpath of mydomain.dev/api\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        }\n      }\n    },\n    \"PermissionEntry\": {\n      \"description\": \"An entry for a permission value in a [`Capability`] can be either a raw permission [`Identifier`] or an object that references a permission and extends its scope.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Reference a permission or permission set by identifier.\",\n          \"allOf\": [\n            {\n              \"$ref\": \"#/definitions/Identifier\"\n            }\n          ]\n        },\n        {\n          \"description\": \"Reference a permission or permission set by identifier and extends its scope.\",\n          \"type\": \"object\",\n          \"allOf\": [\n            {\n              \"if\": {\n                \"properties\": {\n                  \"identifier\": {\n                    \"anyOf\": [\n                      {\n                        \"description\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:default\",\n                        \"markdownDescription\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\"\n                      },\n                      {\n                        \"description\": \"Enables the execute command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-execute\",\n                        \"markdownDescription\": \"Enables the execute command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the kill command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-kill\",\n                        \"markdownDescription\": \"Enables the kill command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the open command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-open\",\n                        \"markdownDescription\": \"Enables the open command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the spawn command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-spawn\",\n                        \"markdownDescription\": \"Enables the spawn command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the stdin_write command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-stdin-write\",\n                        \"markdownDescription\": \"Enables the stdin_write command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the execute command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-execute\",\n                        \"markdownDescription\": \"Denies the execute command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the kill command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-kill\",\n                        \"markdownDescription\": \"Denies the kill command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the open command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-open\",\n                        \"markdownDescription\": \"Denies the open command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the spawn command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-spawn\",\n                        \"markdownDescription\": \"Denies the spawn command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the stdin_write command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-stdin-write\",\n                        \"markdownDescription\": \"Denies the stdin_write command without any pre-configured scope.\"\n                      }\n                    ]\n                  }\n                }\n              },\n              \"then\": {\n                \"properties\": {\n                  \"allow\": {\n                    \"items\": {\n                      \"title\": \"ShellScopeEntry\",\n                      \"description\": \"Shell scope entry.\",\n                      \"anyOf\": [\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"cmd\",\n                            \"name\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"cmd\": {\n                              \"description\": \"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.\",\n                              \"type\": \"string\"\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        },\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"name\",\n                            \"sidecar\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            },\n                            \"sidecar\": {\n                              \"description\": \"If this command is a sidecar command.\",\n                              \"type\": \"boolean\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        }\n                      ]\n                    }\n                  },\n                  \"deny\": {\n                    \"items\": {\n                      \"title\": \"ShellScopeEntry\",\n                      \"description\": \"Shell scope entry.\",\n                      \"anyOf\": [\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"cmd\",\n                            \"name\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"cmd\": {\n                              \"description\": \"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.\",\n                              \"type\": \"string\"\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        },\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"name\",\n                            \"sidecar\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            },\n                            \"sidecar\": {\n                              \"description\": \"If this command is a sidecar command.\",\n                              \"type\": \"boolean\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        }\n                      ]\n                    }\n                  }\n                }\n              },\n              \"properties\": {\n                \"identifier\": {\n                  \"description\": \"Identifier of the permission or permission set.\",\n                  \"allOf\": [\n                    {\n                      \"$ref\": \"#/definitions/Identifier\"\n                    }\n                  ]\n                }\n              }\n            },\n            {\n              \"properties\": {\n                \"identifier\": {\n                  \"description\": \"Identifier of the permission or permission set.\",\n                  \"allOf\": [\n                    {\n                      \"$ref\": \"#/definitions/Identifier\"\n                    }\n                  ]\n                },\n                \"allow\": {\n                  \"description\": \"Data that defines what is allowed by the scope.\",\n                  \"type\": [\n                    \"array\",\n                    \"null\"\n                  ],\n                  \"items\": {\n                    \"$ref\": \"#/definitions/Value\"\n                  }\n                },\n                \"deny\": {\n                  \"description\": \"Data that defines what is denied by the scope. This should be prioritized by validation logic.\",\n                  \"type\": [\n                    \"array\",\n                    \"null\"\n                  ],\n                  \"items\": {\n                    \"$ref\": \"#/definitions/Value\"\n                  }\n                }\n              }\n            }\n          ],\n          \"required\": [\n            \"identifier\"\n          ]\n        }\n      ]\n    },\n    \"Identifier\": {\n      \"description\": \"Permission identifier\",\n      \"oneOf\": [\n        {\n          \"description\": \"Default core plugins set.\\n#### This default permission set includes:\\n\\n- `core:path:default`\\n- `core:event:default`\\n- `core:window:default`\\n- `core:webview:default`\\n- `core:app:default`\\n- `core:image:default`\\n- `core:resources:default`\\n- `core:menu:default`\\n- `core:tray:default`\",\n          \"type\": \"string\",\n          \"const\": \"core:default\",\n          \"markdownDescription\": \"Default core plugins set.\\n#### This default permission set includes:\\n\\n- `core:path:default`\\n- `core:event:default`\\n- `core:window:default`\\n- `core:webview:default`\\n- `core:app:default`\\n- `core:image:default`\\n- `core:resources:default`\\n- `core:menu:default`\\n- `core:tray:default`\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-version`\\n- `allow-name`\\n- `allow-tauri-version`\\n- `allow-identifier`\\n- `allow-bundle-type`\\n- `allow-register-listener`\\n- `allow-remove-listener`\",\n          \"type\": \"string\",\n          \"const\": \"core:app:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-version`\\n- `allow-name`\\n- `allow-tauri-version`\\n- `allow-identifier`\\n- `allow-bundle-type`\\n- `allow-register-listener`\\n- `allow-remove-listener`\"\n        },\n        {\n          \"description\": \"Enables the app_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-app-hide\",\n          \"markdownDescription\": \"Enables the app_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the app_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-app-show\",\n          \"markdownDescription\": \"Enables the app_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the bundle_type command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-bundle-type\",\n          \"markdownDescription\": \"Enables the bundle_type command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the default_window_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-default-window-icon\",\n          \"markdownDescription\": \"Enables the default_window_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the fetch_data_store_identifiers command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-fetch-data-store-identifiers\",\n          \"markdownDescription\": \"Enables the fetch_data_store_identifiers command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the identifier command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-identifier\",\n          \"markdownDescription\": \"Enables the identifier command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the name command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-name\",\n          \"markdownDescription\": \"Enables the name command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the register_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-register-listener\",\n          \"markdownDescription\": \"Enables the register_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_data_store command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-remove-data-store\",\n          \"markdownDescription\": \"Enables the remove_data_store command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-remove-listener\",\n          \"markdownDescription\": \"Enables the remove_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_app_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-set-app-theme\",\n          \"markdownDescription\": \"Enables the set_app_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_dock_visibility command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-set-dock-visibility\",\n          \"markdownDescription\": \"Enables the set_dock_visibility command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the tauri_version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-tauri-version\",\n          \"markdownDescription\": \"Enables the tauri_version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-version\",\n          \"markdownDescription\": \"Enables the version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the app_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-app-hide\",\n          \"markdownDescription\": \"Denies the app_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the app_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-app-show\",\n          \"markdownDescription\": \"Denies the app_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the bundle_type command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-bundle-type\",\n          \"markdownDescription\": \"Denies the bundle_type command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the default_window_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-default-window-icon\",\n          \"markdownDescription\": \"Denies the default_window_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the fetch_data_store_identifiers command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-fetch-data-store-identifiers\",\n          \"markdownDescription\": \"Denies the fetch_data_store_identifiers command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the identifier command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-identifier\",\n          \"markdownDescription\": \"Denies the identifier command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the name command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-name\",\n          \"markdownDescription\": \"Denies the name command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the register_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-register-listener\",\n          \"markdownDescription\": \"Denies the register_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_data_store command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-remove-data-store\",\n          \"markdownDescription\": \"Denies the remove_data_store command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-remove-listener\",\n          \"markdownDescription\": \"Denies the remove_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_app_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-set-app-theme\",\n          \"markdownDescription\": \"Denies the set_app_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_dock_visibility command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-set-dock-visibility\",\n          \"markdownDescription\": \"Denies the set_dock_visibility command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the tauri_version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-tauri-version\",\n          \"markdownDescription\": \"Denies the tauri_version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-version\",\n          \"markdownDescription\": \"Denies the version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-listen`\\n- `allow-unlisten`\\n- `allow-emit`\\n- `allow-emit-to`\",\n          \"type\": \"string\",\n          \"const\": \"core:event:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-listen`\\n- `allow-unlisten`\\n- `allow-emit`\\n- `allow-emit-to`\"\n        },\n        {\n          \"description\": \"Enables the emit command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-emit\",\n          \"markdownDescription\": \"Enables the emit command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the emit_to command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-emit-to\",\n          \"markdownDescription\": \"Enables the emit_to command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the listen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-listen\",\n          \"markdownDescription\": \"Enables the listen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unlisten command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-unlisten\",\n          \"markdownDescription\": \"Enables the unlisten command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the emit command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-emit\",\n          \"markdownDescription\": \"Denies the emit command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the emit_to command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-emit-to\",\n          \"markdownDescription\": \"Denies the emit_to command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the listen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-listen\",\n          \"markdownDescription\": \"Denies the listen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unlisten command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-unlisten\",\n          \"markdownDescription\": \"Denies the unlisten command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-from-bytes`\\n- `allow-from-path`\\n- `allow-rgba`\\n- `allow-size`\",\n          \"type\": \"string\",\n          \"const\": \"core:image:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-from-bytes`\\n- `allow-from-path`\\n- `allow-rgba`\\n- `allow-size`\"\n        },\n        {\n          \"description\": \"Enables the from_bytes command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-from-bytes\",\n          \"markdownDescription\": \"Enables the from_bytes command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the from_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-from-path\",\n          \"markdownDescription\": \"Enables the from_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the rgba command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-rgba\",\n          \"markdownDescription\": \"Enables the rgba command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-size\",\n          \"markdownDescription\": \"Enables the size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the from_bytes command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-from-bytes\",\n          \"markdownDescription\": \"Denies the from_bytes command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the from_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-from-path\",\n          \"markdownDescription\": \"Denies the from_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the rgba command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-rgba\",\n          \"markdownDescription\": \"Denies the rgba command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-size\",\n          \"markdownDescription\": \"Denies the size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-append`\\n- `allow-prepend`\\n- `allow-insert`\\n- `allow-remove`\\n- `allow-remove-at`\\n- `allow-items`\\n- `allow-get`\\n- `allow-popup`\\n- `allow-create-default`\\n- `allow-set-as-app-menu`\\n- `allow-set-as-window-menu`\\n- `allow-text`\\n- `allow-set-text`\\n- `allow-is-enabled`\\n- `allow-set-enabled`\\n- `allow-set-accelerator`\\n- `allow-set-as-windows-menu-for-nsapp`\\n- `allow-set-as-help-menu-for-nsapp`\\n- `allow-is-checked`\\n- `allow-set-checked`\\n- `allow-set-icon`\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-append`\\n- `allow-prepend`\\n- `allow-insert`\\n- `allow-remove`\\n- `allow-remove-at`\\n- `allow-items`\\n- `allow-get`\\n- `allow-popup`\\n- `allow-create-default`\\n- `allow-set-as-app-menu`\\n- `allow-set-as-window-menu`\\n- `allow-text`\\n- `allow-set-text`\\n- `allow-is-enabled`\\n- `allow-set-enabled`\\n- `allow-set-accelerator`\\n- `allow-set-as-windows-menu-for-nsapp`\\n- `allow-set-as-help-menu-for-nsapp`\\n- `allow-is-checked`\\n- `allow-set-checked`\\n- `allow-set-icon`\"\n        },\n        {\n          \"description\": \"Enables the append command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-append\",\n          \"markdownDescription\": \"Enables the append command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_default command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-create-default\",\n          \"markdownDescription\": \"Enables the create_default command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-get\",\n          \"markdownDescription\": \"Enables the get command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the insert command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-insert\",\n          \"markdownDescription\": \"Enables the insert command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-is-checked\",\n          \"markdownDescription\": \"Enables the is_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-is-enabled\",\n          \"markdownDescription\": \"Enables the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the items command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-items\",\n          \"markdownDescription\": \"Enables the items command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the popup command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-popup\",\n          \"markdownDescription\": \"Enables the popup command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the prepend command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-prepend\",\n          \"markdownDescription\": \"Enables the prepend command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-remove\",\n          \"markdownDescription\": \"Enables the remove command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_at command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-remove-at\",\n          \"markdownDescription\": \"Enables the remove_at command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_accelerator command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-accelerator\",\n          \"markdownDescription\": \"Enables the set_accelerator command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_app_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-app-menu\",\n          \"markdownDescription\": \"Enables the set_as_app_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-help-menu-for-nsapp\",\n          \"markdownDescription\": \"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_window_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-window-menu\",\n          \"markdownDescription\": \"Enables the set_as_window_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-windows-menu-for-nsapp\",\n          \"markdownDescription\": \"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-checked\",\n          \"markdownDescription\": \"Enables the set_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-enabled\",\n          \"markdownDescription\": \"Enables the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-text\",\n          \"markdownDescription\": \"Enables the set_text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-text\",\n          \"markdownDescription\": \"Enables the text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the append command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-append\",\n          \"markdownDescription\": \"Denies the append command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_default command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-create-default\",\n          \"markdownDescription\": \"Denies the create_default command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-get\",\n          \"markdownDescription\": \"Denies the get command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the insert command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-insert\",\n          \"markdownDescription\": \"Denies the insert command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-is-checked\",\n          \"markdownDescription\": \"Denies the is_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-is-enabled\",\n          \"markdownDescription\": \"Denies the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the items command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-items\",\n          \"markdownDescription\": \"Denies the items command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the popup command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-popup\",\n          \"markdownDescription\": \"Denies the popup command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the prepend command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-prepend\",\n          \"markdownDescription\": \"Denies the prepend command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-remove\",\n          \"markdownDescription\": \"Denies the remove command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_at command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-remove-at\",\n          \"markdownDescription\": \"Denies the remove_at command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_accelerator command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-accelerator\",\n          \"markdownDescription\": \"Denies the set_accelerator command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_app_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-app-menu\",\n          \"markdownDescription\": \"Denies the set_as_app_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-help-menu-for-nsapp\",\n          \"markdownDescription\": \"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_window_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-window-menu\",\n          \"markdownDescription\": \"Denies the set_as_window_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-windows-menu-for-nsapp\",\n          \"markdownDescription\": \"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-checked\",\n          \"markdownDescription\": \"Denies the set_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-enabled\",\n          \"markdownDescription\": \"Denies the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-text\",\n          \"markdownDescription\": \"Denies the set_text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-text\",\n          \"markdownDescription\": \"Denies the text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-resolve-directory`\\n- `allow-resolve`\\n- `allow-normalize`\\n- `allow-join`\\n- `allow-dirname`\\n- `allow-extname`\\n- `allow-basename`\\n- `allow-is-absolute`\",\n          \"type\": \"string\",\n          \"const\": \"core:path:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-resolve-directory`\\n- `allow-resolve`\\n- `allow-normalize`\\n- `allow-join`\\n- `allow-dirname`\\n- `allow-extname`\\n- `allow-basename`\\n- `allow-is-absolute`\"\n        },\n        {\n          \"description\": \"Enables the basename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-basename\",\n          \"markdownDescription\": \"Enables the basename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the dirname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-dirname\",\n          \"markdownDescription\": \"Enables the dirname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the extname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-extname\",\n          \"markdownDescription\": \"Enables the extname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_absolute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-is-absolute\",\n          \"markdownDescription\": \"Enables the is_absolute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the join command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-join\",\n          \"markdownDescription\": \"Enables the join command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the normalize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-normalize\",\n          \"markdownDescription\": \"Enables the normalize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the resolve command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-resolve\",\n          \"markdownDescription\": \"Enables the resolve command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the resolve_directory command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-resolve-directory\",\n          \"markdownDescription\": \"Enables the resolve_directory command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the basename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-basename\",\n          \"markdownDescription\": \"Denies the basename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the dirname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-dirname\",\n          \"markdownDescription\": \"Denies the dirname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the extname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-extname\",\n          \"markdownDescription\": \"Denies the extname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_absolute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-is-absolute\",\n          \"markdownDescription\": \"Denies the is_absolute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the join command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-join\",\n          \"markdownDescription\": \"Denies the join command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the normalize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-normalize\",\n          \"markdownDescription\": \"Denies the normalize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the resolve command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-resolve\",\n          \"markdownDescription\": \"Denies the resolve command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the resolve_directory command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-resolve-directory\",\n          \"markdownDescription\": \"Denies the resolve_directory command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-close`\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-close`\"\n        },\n        {\n          \"description\": \"Enables the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:allow-close\",\n          \"markdownDescription\": \"Enables the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:deny-close\",\n          \"markdownDescription\": \"Denies the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-get-by-id`\\n- `allow-remove-by-id`\\n- `allow-set-icon`\\n- `allow-set-menu`\\n- `allow-set-tooltip`\\n- `allow-set-title`\\n- `allow-set-visible`\\n- `allow-set-temp-dir-path`\\n- `allow-set-icon-as-template`\\n- `allow-set-show-menu-on-left-click`\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-get-by-id`\\n- `allow-remove-by-id`\\n- `allow-set-icon`\\n- `allow-set-menu`\\n- `allow-set-tooltip`\\n- `allow-set-title`\\n- `allow-set-visible`\\n- `allow-set-temp-dir-path`\\n- `allow-set-icon-as-template`\\n- `allow-set-show-menu-on-left-click`\"\n        },\n        {\n          \"description\": \"Enables the get_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-get-by-id\",\n          \"markdownDescription\": \"Enables the get_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-remove-by-id\",\n          \"markdownDescription\": \"Enables the remove_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon_as_template command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-icon-as-template\",\n          \"markdownDescription\": \"Enables the set_icon_as_template command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-menu\",\n          \"markdownDescription\": \"Enables the set_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_show_menu_on_left_click command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-show-menu-on-left-click\",\n          \"markdownDescription\": \"Enables the set_show_menu_on_left_click command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_temp_dir_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-temp-dir-path\",\n          \"markdownDescription\": \"Enables the set_temp_dir_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-title\",\n          \"markdownDescription\": \"Enables the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_tooltip command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-tooltip\",\n          \"markdownDescription\": \"Enables the set_tooltip command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-visible\",\n          \"markdownDescription\": \"Enables the set_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-get-by-id\",\n          \"markdownDescription\": \"Denies the get_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-remove-by-id\",\n          \"markdownDescription\": \"Denies the remove_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon_as_template command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-icon-as-template\",\n          \"markdownDescription\": \"Denies the set_icon_as_template command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-menu\",\n          \"markdownDescription\": \"Denies the set_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_show_menu_on_left_click command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-show-menu-on-left-click\",\n          \"markdownDescription\": \"Denies the set_show_menu_on_left_click command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_temp_dir_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-temp-dir-path\",\n          \"markdownDescription\": \"Denies the set_temp_dir_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-title\",\n          \"markdownDescription\": \"Denies the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_tooltip command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-tooltip\",\n          \"markdownDescription\": \"Denies the set_tooltip command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-visible\",\n          \"markdownDescription\": \"Denies the set_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-webviews`\\n- `allow-webview-position`\\n- `allow-webview-size`\\n- `allow-internal-toggle-devtools`\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-webviews`\\n- `allow-webview-position`\\n- `allow-webview-size`\\n- `allow-internal-toggle-devtools`\"\n        },\n        {\n          \"description\": \"Enables the clear_all_browsing_data command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-clear-all-browsing-data\",\n          \"markdownDescription\": \"Enables the clear_all_browsing_data command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_webview command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-create-webview\",\n          \"markdownDescription\": \"Enables the create_webview command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_webview_window command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-create-webview-window\",\n          \"markdownDescription\": \"Enables the create_webview_window command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get_all_webviews command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-get-all-webviews\",\n          \"markdownDescription\": \"Enables the get_all_webviews command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the internal_toggle_devtools command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-internal-toggle-devtools\",\n          \"markdownDescription\": \"Enables the internal_toggle_devtools command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the print command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-print\",\n          \"markdownDescription\": \"Enables the print command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the reparent command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-reparent\",\n          \"markdownDescription\": \"Enables the reparent command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_auto_resize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-auto-resize\",\n          \"markdownDescription\": \"Enables the set_webview_auto_resize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-background-color\",\n          \"markdownDescription\": \"Enables the set_webview_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-focus\",\n          \"markdownDescription\": \"Enables the set_webview_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-position\",\n          \"markdownDescription\": \"Enables the set_webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-size\",\n          \"markdownDescription\": \"Enables the set_webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_zoom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-zoom\",\n          \"markdownDescription\": \"Enables the set_webview_zoom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-close\",\n          \"markdownDescription\": \"Enables the webview_close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-hide\",\n          \"markdownDescription\": \"Enables the webview_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-position\",\n          \"markdownDescription\": \"Enables the webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-show\",\n          \"markdownDescription\": \"Enables the webview_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-size\",\n          \"markdownDescription\": \"Enables the webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the clear_all_browsing_data command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-clear-all-browsing-data\",\n          \"markdownDescription\": \"Denies the clear_all_browsing_data command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_webview command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-create-webview\",\n          \"markdownDescription\": \"Denies the create_webview command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_webview_window command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-create-webview-window\",\n          \"markdownDescription\": \"Denies the create_webview_window command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_all_webviews command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-get-all-webviews\",\n          \"markdownDescription\": \"Denies the get_all_webviews command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the internal_toggle_devtools command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-internal-toggle-devtools\",\n          \"markdownDescription\": \"Denies the internal_toggle_devtools command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the print command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-print\",\n          \"markdownDescription\": \"Denies the print command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the reparent command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-reparent\",\n          \"markdownDescription\": \"Denies the reparent command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_auto_resize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-auto-resize\",\n          \"markdownDescription\": \"Denies the set_webview_auto_resize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-background-color\",\n          \"markdownDescription\": \"Denies the set_webview_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-focus\",\n          \"markdownDescription\": \"Denies the set_webview_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-position\",\n          \"markdownDescription\": \"Denies the set_webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-size\",\n          \"markdownDescription\": \"Denies the set_webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_zoom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-zoom\",\n          \"markdownDescription\": \"Denies the set_webview_zoom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-close\",\n          \"markdownDescription\": \"Denies the webview_close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-hide\",\n          \"markdownDescription\": \"Denies the webview_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-position\",\n          \"markdownDescription\": \"Denies the webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-show\",\n          \"markdownDescription\": \"Denies the webview_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-size\",\n          \"markdownDescription\": \"Denies the webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-windows`\\n- `allow-scale-factor`\\n- `allow-inner-position`\\n- `allow-outer-position`\\n- `allow-inner-size`\\n- `allow-outer-size`\\n- `allow-is-fullscreen`\\n- `allow-is-minimized`\\n- `allow-is-maximized`\\n- `allow-is-focused`\\n- `allow-is-decorated`\\n- `allow-is-resizable`\\n- `allow-is-maximizable`\\n- `allow-is-minimizable`\\n- `allow-is-closable`\\n- `allow-is-visible`\\n- `allow-is-enabled`\\n- `allow-title`\\n- `allow-current-monitor`\\n- `allow-primary-monitor`\\n- `allow-monitor-from-point`\\n- `allow-available-monitors`\\n- `allow-cursor-position`\\n- `allow-theme`\\n- `allow-is-always-on-top`\\n- `allow-internal-toggle-maximize`\",\n          \"type\": \"string\",\n          \"const\": \"core:window:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-windows`\\n- `allow-scale-factor`\\n- `allow-inner-position`\\n- `allow-outer-position`\\n- `allow-inner-size`\\n- `allow-outer-size`\\n- `allow-is-fullscreen`\\n- `allow-is-minimized`\\n- `allow-is-maximized`\\n- `allow-is-focused`\\n- `allow-is-decorated`\\n- `allow-is-resizable`\\n- `allow-is-maximizable`\\n- `allow-is-minimizable`\\n- `allow-is-closable`\\n- `allow-is-visible`\\n- `allow-is-enabled`\\n- `allow-title`\\n- `allow-current-monitor`\\n- `allow-primary-monitor`\\n- `allow-monitor-from-point`\\n- `allow-available-monitors`\\n- `allow-cursor-position`\\n- `allow-theme`\\n- `allow-is-always-on-top`\\n- `allow-internal-toggle-maximize`\"\n        },\n        {\n          \"description\": \"Enables the available_monitors command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-available-monitors\",\n          \"markdownDescription\": \"Enables the available_monitors command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the center command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-center\",\n          \"markdownDescription\": \"Enables the center command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-close\",\n          \"markdownDescription\": \"Enables the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-create\",\n          \"markdownDescription\": \"Enables the create command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the current_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-current-monitor\",\n          \"markdownDescription\": \"Enables the current_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-cursor-position\",\n          \"markdownDescription\": \"Enables the cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the destroy command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-destroy\",\n          \"markdownDescription\": \"Enables the destroy command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get_all_windows command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-get-all-windows\",\n          \"markdownDescription\": \"Enables the get_all_windows command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-hide\",\n          \"markdownDescription\": \"Enables the hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the inner_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-inner-position\",\n          \"markdownDescription\": \"Enables the inner_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the inner_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-inner-size\",\n          \"markdownDescription\": \"Enables the inner_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the internal_toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-internal-toggle-maximize\",\n          \"markdownDescription\": \"Enables the internal_toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-always-on-top\",\n          \"markdownDescription\": \"Enables the is_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-closable\",\n          \"markdownDescription\": \"Enables the is_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_decorated command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-decorated\",\n          \"markdownDescription\": \"Enables the is_decorated command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-enabled\",\n          \"markdownDescription\": \"Enables the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_focused command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-focused\",\n          \"markdownDescription\": \"Enables the is_focused command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-fullscreen\",\n          \"markdownDescription\": \"Enables the is_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-maximizable\",\n          \"markdownDescription\": \"Enables the is_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_maximized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-maximized\",\n          \"markdownDescription\": \"Enables the is_maximized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-minimizable\",\n          \"markdownDescription\": \"Enables the is_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_minimized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-minimized\",\n          \"markdownDescription\": \"Enables the is_minimized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-resizable\",\n          \"markdownDescription\": \"Enables the is_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-visible\",\n          \"markdownDescription\": \"Enables the is_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-maximize\",\n          \"markdownDescription\": \"Enables the maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the minimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-minimize\",\n          \"markdownDescription\": \"Enables the minimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the monitor_from_point command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-monitor-from-point\",\n          \"markdownDescription\": \"Enables the monitor_from_point command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the outer_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-outer-position\",\n          \"markdownDescription\": \"Enables the outer_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the outer_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-outer-size\",\n          \"markdownDescription\": \"Enables the outer_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the primary_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-primary-monitor\",\n          \"markdownDescription\": \"Enables the primary_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the request_user_attention command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-request-user-attention\",\n          \"markdownDescription\": \"Enables the request_user_attention command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the scale_factor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-scale-factor\",\n          \"markdownDescription\": \"Enables the scale_factor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_always_on_bottom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-always-on-bottom\",\n          \"markdownDescription\": \"Enables the set_always_on_bottom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-always-on-top\",\n          \"markdownDescription\": \"Enables the set_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-background-color\",\n          \"markdownDescription\": \"Enables the set_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_badge_count command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-badge-count\",\n          \"markdownDescription\": \"Enables the set_badge_count command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_badge_label command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-badge-label\",\n          \"markdownDescription\": \"Enables the set_badge_label command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-closable\",\n          \"markdownDescription\": \"Enables the set_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_content_protected command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-content-protected\",\n          \"markdownDescription\": \"Enables the set_content_protected command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_grab command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-grab\",\n          \"markdownDescription\": \"Enables the set_cursor_grab command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-icon\",\n          \"markdownDescription\": \"Enables the set_cursor_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-position\",\n          \"markdownDescription\": \"Enables the set_cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-visible\",\n          \"markdownDescription\": \"Enables the set_cursor_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_decorations command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-decorations\",\n          \"markdownDescription\": \"Enables the set_decorations command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_effects command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-effects\",\n          \"markdownDescription\": \"Enables the set_effects command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-enabled\",\n          \"markdownDescription\": \"Enables the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-focus\",\n          \"markdownDescription\": \"Enables the set_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_focusable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-focusable\",\n          \"markdownDescription\": \"Enables the set_focusable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-fullscreen\",\n          \"markdownDescription\": \"Enables the set_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_ignore_cursor_events command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-ignore-cursor-events\",\n          \"markdownDescription\": \"Enables the set_ignore_cursor_events command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_max_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-max-size\",\n          \"markdownDescription\": \"Enables the set_max_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-maximizable\",\n          \"markdownDescription\": \"Enables the set_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_min_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-min-size\",\n          \"markdownDescription\": \"Enables the set_min_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-minimizable\",\n          \"markdownDescription\": \"Enables the set_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_overlay_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-overlay-icon\",\n          \"markdownDescription\": \"Enables the set_overlay_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-position\",\n          \"markdownDescription\": \"Enables the set_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_progress_bar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-progress-bar\",\n          \"markdownDescription\": \"Enables the set_progress_bar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-resizable\",\n          \"markdownDescription\": \"Enables the set_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_shadow command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-shadow\",\n          \"markdownDescription\": \"Enables the set_shadow command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_simple_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-simple-fullscreen\",\n          \"markdownDescription\": \"Enables the set_simple_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-size\",\n          \"markdownDescription\": \"Enables the set_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_size_constraints command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-size-constraints\",\n          \"markdownDescription\": \"Enables the set_size_constraints command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_skip_taskbar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-skip-taskbar\",\n          \"markdownDescription\": \"Enables the set_skip_taskbar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-theme\",\n          \"markdownDescription\": \"Enables the set_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-title\",\n          \"markdownDescription\": \"Enables the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title_bar_style command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-title-bar-style\",\n          \"markdownDescription\": \"Enables the set_title_bar_style command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_visible_on_all_workspaces command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-visible-on-all-workspaces\",\n          \"markdownDescription\": \"Enables the set_visible_on_all_workspaces command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-show\",\n          \"markdownDescription\": \"Enables the show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the start_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-start-dragging\",\n          \"markdownDescription\": \"Enables the start_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the start_resize_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-start-resize-dragging\",\n          \"markdownDescription\": \"Enables the start_resize_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-theme\",\n          \"markdownDescription\": \"Enables the theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-title\",\n          \"markdownDescription\": \"Enables the title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-toggle-maximize\",\n          \"markdownDescription\": \"Enables the toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unmaximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-unmaximize\",\n          \"markdownDescription\": \"Enables the unmaximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unminimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-unminimize\",\n          \"markdownDescription\": \"Enables the unminimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the available_monitors command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-available-monitors\",\n          \"markdownDescription\": \"Denies the available_monitors command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the center command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-center\",\n          \"markdownDescription\": \"Denies the center command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-close\",\n          \"markdownDescription\": \"Denies the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-create\",\n          \"markdownDescription\": \"Denies the create command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the current_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-current-monitor\",\n          \"markdownDescription\": \"Denies the current_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-cursor-position\",\n          \"markdownDescription\": \"Denies the cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the destroy command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-destroy\",\n          \"markdownDescription\": \"Denies the destroy command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_all_windows command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-get-all-windows\",\n          \"markdownDescription\": \"Denies the get_all_windows command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-hide\",\n          \"markdownDescription\": \"Denies the hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the inner_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-inner-position\",\n          \"markdownDescription\": \"Denies the inner_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the inner_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-inner-size\",\n          \"markdownDescription\": \"Denies the inner_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the internal_toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-internal-toggle-maximize\",\n          \"markdownDescription\": \"Denies the internal_toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-always-on-top\",\n          \"markdownDescription\": \"Denies the is_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-closable\",\n          \"markdownDescription\": \"Denies the is_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_decorated command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-decorated\",\n          \"markdownDescription\": \"Denies the is_decorated command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-enabled\",\n          \"markdownDescription\": \"Denies the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_focused command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-focused\",\n          \"markdownDescription\": \"Denies the is_focused command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-fullscreen\",\n          \"markdownDescription\": \"Denies the is_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-maximizable\",\n          \"markdownDescription\": \"Denies the is_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_maximized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-maximized\",\n          \"markdownDescription\": \"Denies the is_maximized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-minimizable\",\n          \"markdownDescription\": \"Denies the is_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_minimized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-minimized\",\n          \"markdownDescription\": \"Denies the is_minimized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-resizable\",\n          \"markdownDescription\": \"Denies the is_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-visible\",\n          \"markdownDescription\": \"Denies the is_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-maximize\",\n          \"markdownDescription\": \"Denies the maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the minimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-minimize\",\n          \"markdownDescription\": \"Denies the minimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the monitor_from_point command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-monitor-from-point\",\n          \"markdownDescription\": \"Denies the monitor_from_point command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the outer_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-outer-position\",\n          \"markdownDescription\": \"Denies the outer_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the outer_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-outer-size\",\n          \"markdownDescription\": \"Denies the outer_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the primary_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-primary-monitor\",\n          \"markdownDescription\": \"Denies the primary_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the request_user_attention command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-request-user-attention\",\n          \"markdownDescription\": \"Denies the request_user_attention command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the scale_factor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-scale-factor\",\n          \"markdownDescription\": \"Denies the scale_factor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_always_on_bottom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-always-on-bottom\",\n          \"markdownDescription\": \"Denies the set_always_on_bottom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-always-on-top\",\n          \"markdownDescription\": \"Denies the set_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-background-color\",\n          \"markdownDescription\": \"Denies the set_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_badge_count command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-badge-count\",\n          \"markdownDescription\": \"Denies the set_badge_count command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_badge_label command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-badge-label\",\n          \"markdownDescription\": \"Denies the set_badge_label command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-closable\",\n          \"markdownDescription\": \"Denies the set_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_content_protected command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-content-protected\",\n          \"markdownDescription\": \"Denies the set_content_protected command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_grab command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-grab\",\n          \"markdownDescription\": \"Denies the set_cursor_grab command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-icon\",\n          \"markdownDescription\": \"Denies the set_cursor_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-position\",\n          \"markdownDescription\": \"Denies the set_cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-visible\",\n          \"markdownDescription\": \"Denies the set_cursor_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_decorations command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-decorations\",\n          \"markdownDescription\": \"Denies the set_decorations command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_effects command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-effects\",\n          \"markdownDescription\": \"Denies the set_effects command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-enabled\",\n          \"markdownDescription\": \"Denies the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-focus\",\n          \"markdownDescription\": \"Denies the set_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_focusable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-focusable\",\n          \"markdownDescription\": \"Denies the set_focusable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-fullscreen\",\n          \"markdownDescription\": \"Denies the set_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_ignore_cursor_events command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-ignore-cursor-events\",\n          \"markdownDescription\": \"Denies the set_ignore_cursor_events command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_max_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-max-size\",\n          \"markdownDescription\": \"Denies the set_max_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-maximizable\",\n          \"markdownDescription\": \"Denies the set_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_min_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-min-size\",\n          \"markdownDescription\": \"Denies the set_min_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-minimizable\",\n          \"markdownDescription\": \"Denies the set_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_overlay_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-overlay-icon\",\n          \"markdownDescription\": \"Denies the set_overlay_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-position\",\n          \"markdownDescription\": \"Denies the set_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_progress_bar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-progress-bar\",\n          \"markdownDescription\": \"Denies the set_progress_bar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-resizable\",\n          \"markdownDescription\": \"Denies the set_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_shadow command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-shadow\",\n          \"markdownDescription\": \"Denies the set_shadow command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_simple_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-simple-fullscreen\",\n          \"markdownDescription\": \"Denies the set_simple_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-size\",\n          \"markdownDescription\": \"Denies the set_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_size_constraints command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-size-constraints\",\n          \"markdownDescription\": \"Denies the set_size_constraints command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_skip_taskbar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-skip-taskbar\",\n          \"markdownDescription\": \"Denies the set_skip_taskbar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-theme\",\n          \"markdownDescription\": \"Denies the set_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-title\",\n          \"markdownDescription\": \"Denies the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title_bar_style command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-title-bar-style\",\n          \"markdownDescription\": \"Denies the set_title_bar_style command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_visible_on_all_workspaces command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-visible-on-all-workspaces\",\n          \"markdownDescription\": \"Denies the set_visible_on_all_workspaces command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-show\",\n          \"markdownDescription\": \"Denies the show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the start_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-start-dragging\",\n          \"markdownDescription\": \"Denies the start_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the start_resize_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-start-resize-dragging\",\n          \"markdownDescription\": \"Denies the start_resize_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-theme\",\n          \"markdownDescription\": \"Denies the theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-title\",\n          \"markdownDescription\": \"Denies the title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-toggle-maximize\",\n          \"markdownDescription\": \"Denies the toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unmaximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-unmaximize\",\n          \"markdownDescription\": \"Denies the unmaximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unminimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-unminimize\",\n          \"markdownDescription\": \"Denies the unminimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\",\n          \"type\": \"string\",\n          \"const\": \"shell:default\",\n          \"markdownDescription\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\"\n        },\n        {\n          \"description\": \"Enables the execute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-execute\",\n          \"markdownDescription\": \"Enables the execute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the kill command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-kill\",\n          \"markdownDescription\": \"Enables the kill command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the open command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-open\",\n          \"markdownDescription\": \"Enables the open command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the spawn command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-spawn\",\n          \"markdownDescription\": \"Enables the spawn command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the stdin_write command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-stdin-write\",\n          \"markdownDescription\": \"Enables the stdin_write command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the execute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-execute\",\n          \"markdownDescription\": \"Denies the execute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the kill command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-kill\",\n          \"markdownDescription\": \"Denies the kill command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the open command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-open\",\n          \"markdownDescription\": \"Denies the open command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the spawn command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-spawn\",\n          \"markdownDescription\": \"Denies the spawn command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the stdin_write command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-stdin-write\",\n          \"markdownDescription\": \"Denies the stdin_write command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"This permission set configures what kind of\\noperations are available from the window state plugin.\\n\\n#### Granted Permissions\\n\\nAll operations are enabled by default.\\n\\n\\n#### This default permission set includes:\\n\\n- `allow-filename`\\n- `allow-restore-state`\\n- `allow-save-window-state`\",\n          \"type\": \"string\",\n          \"const\": \"window-state:default\",\n          \"markdownDescription\": \"This permission set configures what kind of\\noperations are available from the window state plugin.\\n\\n#### Granted Permissions\\n\\nAll operations are enabled by default.\\n\\n\\n#### This default permission set includes:\\n\\n- `allow-filename`\\n- `allow-restore-state`\\n- `allow-save-window-state`\"\n        },\n        {\n          \"description\": \"Enables the filename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-filename\",\n          \"markdownDescription\": \"Enables the filename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the restore_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-restore-state\",\n          \"markdownDescription\": \"Enables the restore_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the save_window_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-save-window-state\",\n          \"markdownDescription\": \"Enables the save_window_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the filename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-filename\",\n          \"markdownDescription\": \"Denies the filename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the restore_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-restore-state\",\n          \"markdownDescription\": \"Denies the restore_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the save_window_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-save-window-state\",\n          \"markdownDescription\": \"Denies the save_window_state command without any pre-configured scope.\"\n        }\n      ]\n    },\n    \"Value\": {\n      \"description\": \"All supported ACL values.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Represents a null JSON value.\",\n          \"type\": \"null\"\n        },\n        {\n          \"description\": \"Represents a [`bool`].\",\n          \"type\": \"boolean\"\n        },\n        {\n          \"description\": \"Represents a valid ACL [`Number`].\",\n          \"allOf\": [\n            {\n              \"$ref\": \"#/definitions/Number\"\n            }\n          ]\n        },\n        {\n          \"description\": \"Represents a [`String`].\",\n          \"type\": \"string\"\n        },\n        {\n          \"description\": \"Represents a list of other [`Value`]s.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/Value\"\n          }\n        },\n        {\n          \"description\": \"Represents a map of [`String`] keys to [`Value`]s.\",\n          \"type\": \"object\",\n          \"additionalProperties\": {\n            \"$ref\": \"#/definitions/Value\"\n          }\n        }\n      ]\n    },\n    \"Number\": {\n      \"description\": \"A valid ACL number.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Represents an [`i64`].\",\n          \"type\": \"integer\",\n          \"format\": \"int64\"\n        },\n        {\n          \"description\": \"Represents a [`f64`].\",\n          \"type\": \"number\",\n          \"format\": \"double\"\n        }\n      ]\n    },\n    \"Target\": {\n      \"description\": \"Platform target.\",\n      \"oneOf\": [\n        {\n          \"description\": \"MacOS.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"macOS\"\n          ]\n        },\n        {\n          \"description\": \"Windows.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"windows\"\n          ]\n        },\n        {\n          \"description\": \"Linux.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"linux\"\n          ]\n        },\n        {\n          \"description\": \"Android.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"android\"\n          ]\n        },\n        {\n          \"description\": \"iOS.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"iOS\"\n          ]\n        }\n      ]\n    },\n    \"ShellScopeEntryAllowedArg\": {\n      \"description\": \"A command argument allowed to be executed by the webview API.\",\n      \"anyOf\": [\n        {\n          \"description\": \"A non-configurable argument that is passed to the command in the order it was specified.\",\n          \"type\": \"string\"\n        },\n        {\n          \"description\": \"A variable that is set while calling the command from the webview API.\",\n          \"type\": \"object\",\n          \"required\": [\n            \"validator\"\n          ],\n          \"properties\": {\n            \"raw\": {\n              \"description\": \"Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\\n\\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.\",\n              \"default\": false,\n              \"type\": \"boolean\"\n            },\n            \"validator\": {\n              \"description\": \"[regex] validator to require passed values to conform to an expected input.\\n\\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\\n\\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\\\w+` regex would be registered as `^https?://\\\\w+$`.\\n\\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>\",\n              \"type\": \"string\"\n            }\n          },\n          \"additionalProperties\": false\n        }\n      ]\n    },\n    \"ShellScopeEntryAllowedArgs\": {\n      \"description\": \"A set of command arguments allowed to be executed by the webview API.\\n\\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Use a simple boolean to allow all or disable all arguments to this command configuration.\",\n          \"type\": \"boolean\"\n        },\n        {\n          \"description\": \"A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/ShellScopeEntryAllowedArg\"\n          }\n        }\n      ]\n    }\n  }\n}"
  },
  {
    "path": "desktop/src-tauri/gen/schemas/macOS-schema.json",
    "content": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"CapabilityFile\",\n  \"description\": \"Capability formats accepted in a capability file.\",\n  \"anyOf\": [\n    {\n      \"description\": \"A single capability.\",\n      \"allOf\": [\n        {\n          \"$ref\": \"#/definitions/Capability\"\n        }\n      ]\n    },\n    {\n      \"description\": \"A list of capabilities.\",\n      \"type\": \"array\",\n      \"items\": {\n        \"$ref\": \"#/definitions/Capability\"\n      }\n    },\n    {\n      \"description\": \"A list of capabilities.\",\n      \"type\": \"object\",\n      \"required\": [\n        \"capabilities\"\n      ],\n      \"properties\": {\n        \"capabilities\": {\n          \"description\": \"The list of capabilities.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/Capability\"\n          }\n        }\n      }\n    }\n  ],\n  \"definitions\": {\n    \"Capability\": {\n      \"description\": \"A grouping and boundary mechanism developers can use to isolate access to the IPC layer.\\n\\nIt controls application windows' and webviews' fine grained access to the Tauri core, application, or plugin commands. If a webview or its window is not matching any capability then it has no access to the IPC layer at all.\\n\\nThis can be done to create groups of windows, based on their required system access, which can reduce impact of frontend vulnerabilities in less privileged windows. Windows can be added to a capability by exact name (e.g. `main-window`) or glob patterns like `*` or `admin-*`. A Window can have none, one, or multiple associated capabilities.\\n\\n## Example\\n\\n```json { \\\"identifier\\\": \\\"main-user-files-write\\\", \\\"description\\\": \\\"This capability allows the `main` window on macOS and Windows access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\\\", \\\"windows\\\": [ \\\"main\\\" ], \\\"permissions\\\": [ \\\"core:default\\\", \\\"dialog:open\\\", { \\\"identifier\\\": \\\"fs:allow-write-text-file\\\", \\\"allow\\\": [{ \\\"path\\\": \\\"$HOME/test.txt\\\" }] }, ], \\\"platforms\\\": [\\\"macOS\\\",\\\"windows\\\"] } ```\",\n      \"type\": \"object\",\n      \"required\": [\n        \"identifier\",\n        \"permissions\"\n      ],\n      \"properties\": {\n        \"identifier\": {\n          \"description\": \"Identifier of the capability.\\n\\n## Example\\n\\n`main-user-files-write`\",\n          \"type\": \"string\"\n        },\n        \"description\": {\n          \"description\": \"Description of what the capability is intended to allow on associated windows.\\n\\nIt should contain a description of what the grouped permissions should allow.\\n\\n## Example\\n\\nThis capability allows the `main` window access to `filesystem` write related commands and `dialog` commands to enable programmatic access to files selected by the user.\",\n          \"default\": \"\",\n          \"type\": \"string\"\n        },\n        \"remote\": {\n          \"description\": \"Configure remote URLs that can use the capability permissions.\\n\\nThis setting is optional and defaults to not being set, as our default use case is that the content is served from our local application.\\n\\n:::caution Make sure you understand the security implications of providing remote sources with local system access. :::\\n\\n## Example\\n\\n```json { \\\"urls\\\": [\\\"https://*.mydomain.dev\\\"] } ```\",\n          \"anyOf\": [\n            {\n              \"$ref\": \"#/definitions/CapabilityRemote\"\n            },\n            {\n              \"type\": \"null\"\n            }\n          ]\n        },\n        \"local\": {\n          \"description\": \"Whether this capability is enabled for local app URLs or not. Defaults to `true`.\",\n          \"default\": true,\n          \"type\": \"boolean\"\n        },\n        \"windows\": {\n          \"description\": \"List of windows that are affected by this capability. Can be a glob pattern.\\n\\nIf a window label matches any of the patterns in this list, the capability will be enabled on all the webviews of that window, regardless of the value of [`Self::webviews`].\\n\\nOn multiwebview windows, prefer specifying [`Self::webviews`] and omitting [`Self::windows`] for a fine grained access control.\\n\\n## Example\\n\\n`[\\\"main\\\"]`\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        },\n        \"webviews\": {\n          \"description\": \"List of webviews that are affected by this capability. Can be a glob pattern.\\n\\nThe capability will be enabled on all the webviews whose label matches any of the patterns in this list, regardless of whether the webview's window label matches a pattern in [`Self::windows`].\\n\\n## Example\\n\\n`[\\\"sub-webview-one\\\", \\\"sub-webview-two\\\"]`\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        },\n        \"permissions\": {\n          \"description\": \"List of permissions attached to this capability.\\n\\nMust include the plugin name as prefix in the form of `${plugin-name}:${permission-name}`. For commands directly implemented in the application itself only `${permission-name}` is required.\\n\\n## Example\\n\\n```json [ \\\"core:default\\\", \\\"shell:allow-open\\\", \\\"dialog:open\\\", { \\\"identifier\\\": \\\"fs:allow-write-text-file\\\", \\\"allow\\\": [{ \\\"path\\\": \\\"$HOME/test.txt\\\" }] } ] ```\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/PermissionEntry\"\n          },\n          \"uniqueItems\": true\n        },\n        \"platforms\": {\n          \"description\": \"Limit which target platforms this capability applies to.\\n\\nBy default all platforms are targeted.\\n\\n## Example\\n\\n`[\\\"macOS\\\",\\\"windows\\\"]`\",\n          \"type\": [\n            \"array\",\n            \"null\"\n          ],\n          \"items\": {\n            \"$ref\": \"#/definitions/Target\"\n          }\n        }\n      }\n    },\n    \"CapabilityRemote\": {\n      \"description\": \"Configuration for remote URLs that are associated with the capability.\",\n      \"type\": \"object\",\n      \"required\": [\n        \"urls\"\n      ],\n      \"properties\": {\n        \"urls\": {\n          \"description\": \"Remote domains this capability refers to using the [URLPattern standard](https://urlpattern.spec.whatwg.org/).\\n\\n## Examples\\n\\n- \\\"https://*.mydomain.dev\\\": allows subdomains of mydomain.dev - \\\"https://mydomain.dev/api/*\\\": allows any subpath of mydomain.dev/api\",\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"string\"\n          }\n        }\n      }\n    },\n    \"PermissionEntry\": {\n      \"description\": \"An entry for a permission value in a [`Capability`] can be either a raw permission [`Identifier`] or an object that references a permission and extends its scope.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Reference a permission or permission set by identifier.\",\n          \"allOf\": [\n            {\n              \"$ref\": \"#/definitions/Identifier\"\n            }\n          ]\n        },\n        {\n          \"description\": \"Reference a permission or permission set by identifier and extends its scope.\",\n          \"type\": \"object\",\n          \"allOf\": [\n            {\n              \"if\": {\n                \"properties\": {\n                  \"identifier\": {\n                    \"anyOf\": [\n                      {\n                        \"description\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:default\",\n                        \"markdownDescription\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\"\n                      },\n                      {\n                        \"description\": \"Enables the execute command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-execute\",\n                        \"markdownDescription\": \"Enables the execute command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the kill command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-kill\",\n                        \"markdownDescription\": \"Enables the kill command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the open command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-open\",\n                        \"markdownDescription\": \"Enables the open command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the spawn command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-spawn\",\n                        \"markdownDescription\": \"Enables the spawn command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Enables the stdin_write command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:allow-stdin-write\",\n                        \"markdownDescription\": \"Enables the stdin_write command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the execute command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-execute\",\n                        \"markdownDescription\": \"Denies the execute command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the kill command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-kill\",\n                        \"markdownDescription\": \"Denies the kill command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the open command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-open\",\n                        \"markdownDescription\": \"Denies the open command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the spawn command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-spawn\",\n                        \"markdownDescription\": \"Denies the spawn command without any pre-configured scope.\"\n                      },\n                      {\n                        \"description\": \"Denies the stdin_write command without any pre-configured scope.\",\n                        \"type\": \"string\",\n                        \"const\": \"shell:deny-stdin-write\",\n                        \"markdownDescription\": \"Denies the stdin_write command without any pre-configured scope.\"\n                      }\n                    ]\n                  }\n                }\n              },\n              \"then\": {\n                \"properties\": {\n                  \"allow\": {\n                    \"items\": {\n                      \"title\": \"ShellScopeEntry\",\n                      \"description\": \"Shell scope entry.\",\n                      \"anyOf\": [\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"cmd\",\n                            \"name\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"cmd\": {\n                              \"description\": \"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.\",\n                              \"type\": \"string\"\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        },\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"name\",\n                            \"sidecar\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            },\n                            \"sidecar\": {\n                              \"description\": \"If this command is a sidecar command.\",\n                              \"type\": \"boolean\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        }\n                      ]\n                    }\n                  },\n                  \"deny\": {\n                    \"items\": {\n                      \"title\": \"ShellScopeEntry\",\n                      \"description\": \"Shell scope entry.\",\n                      \"anyOf\": [\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"cmd\",\n                            \"name\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"cmd\": {\n                              \"description\": \"The command name. It can start with a variable that resolves to a system base directory. The variables are: `$AUDIO`, `$CACHE`, `$CONFIG`, `$DATA`, `$LOCALDATA`, `$DESKTOP`, `$DOCUMENT`, `$DOWNLOAD`, `$EXE`, `$FONT`, `$HOME`, `$PICTURE`, `$PUBLIC`, `$RUNTIME`, `$TEMPLATE`, `$VIDEO`, `$RESOURCE`, `$LOG`, `$TEMP`, `$APPCONFIG`, `$APPDATA`, `$APPLOCALDATA`, `$APPCACHE`, `$APPLOG`.\",\n                              \"type\": \"string\"\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        },\n                        {\n                          \"type\": \"object\",\n                          \"required\": [\n                            \"name\",\n                            \"sidecar\"\n                          ],\n                          \"properties\": {\n                            \"args\": {\n                              \"description\": \"The allowed arguments for the command execution.\",\n                              \"allOf\": [\n                                {\n                                  \"$ref\": \"#/definitions/ShellScopeEntryAllowedArgs\"\n                                }\n                              ]\n                            },\n                            \"name\": {\n                              \"description\": \"The name for this allowed shell command configuration.\\n\\nThis name will be used inside of the webview API to call this command along with any specified arguments.\",\n                              \"type\": \"string\"\n                            },\n                            \"sidecar\": {\n                              \"description\": \"If this command is a sidecar command.\",\n                              \"type\": \"boolean\"\n                            }\n                          },\n                          \"additionalProperties\": false\n                        }\n                      ]\n                    }\n                  }\n                }\n              },\n              \"properties\": {\n                \"identifier\": {\n                  \"description\": \"Identifier of the permission or permission set.\",\n                  \"allOf\": [\n                    {\n                      \"$ref\": \"#/definitions/Identifier\"\n                    }\n                  ]\n                }\n              }\n            },\n            {\n              \"properties\": {\n                \"identifier\": {\n                  \"description\": \"Identifier of the permission or permission set.\",\n                  \"allOf\": [\n                    {\n                      \"$ref\": \"#/definitions/Identifier\"\n                    }\n                  ]\n                },\n                \"allow\": {\n                  \"description\": \"Data that defines what is allowed by the scope.\",\n                  \"type\": [\n                    \"array\",\n                    \"null\"\n                  ],\n                  \"items\": {\n                    \"$ref\": \"#/definitions/Value\"\n                  }\n                },\n                \"deny\": {\n                  \"description\": \"Data that defines what is denied by the scope. This should be prioritized by validation logic.\",\n                  \"type\": [\n                    \"array\",\n                    \"null\"\n                  ],\n                  \"items\": {\n                    \"$ref\": \"#/definitions/Value\"\n                  }\n                }\n              }\n            }\n          ],\n          \"required\": [\n            \"identifier\"\n          ]\n        }\n      ]\n    },\n    \"Identifier\": {\n      \"description\": \"Permission identifier\",\n      \"oneOf\": [\n        {\n          \"description\": \"Default core plugins set.\\n#### This default permission set includes:\\n\\n- `core:path:default`\\n- `core:event:default`\\n- `core:window:default`\\n- `core:webview:default`\\n- `core:app:default`\\n- `core:image:default`\\n- `core:resources:default`\\n- `core:menu:default`\\n- `core:tray:default`\",\n          \"type\": \"string\",\n          \"const\": \"core:default\",\n          \"markdownDescription\": \"Default core plugins set.\\n#### This default permission set includes:\\n\\n- `core:path:default`\\n- `core:event:default`\\n- `core:window:default`\\n- `core:webview:default`\\n- `core:app:default`\\n- `core:image:default`\\n- `core:resources:default`\\n- `core:menu:default`\\n- `core:tray:default`\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-version`\\n- `allow-name`\\n- `allow-tauri-version`\\n- `allow-identifier`\\n- `allow-bundle-type`\\n- `allow-register-listener`\\n- `allow-remove-listener`\",\n          \"type\": \"string\",\n          \"const\": \"core:app:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-version`\\n- `allow-name`\\n- `allow-tauri-version`\\n- `allow-identifier`\\n- `allow-bundle-type`\\n- `allow-register-listener`\\n- `allow-remove-listener`\"\n        },\n        {\n          \"description\": \"Enables the app_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-app-hide\",\n          \"markdownDescription\": \"Enables the app_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the app_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-app-show\",\n          \"markdownDescription\": \"Enables the app_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the bundle_type command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-bundle-type\",\n          \"markdownDescription\": \"Enables the bundle_type command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the default_window_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-default-window-icon\",\n          \"markdownDescription\": \"Enables the default_window_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the fetch_data_store_identifiers command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-fetch-data-store-identifiers\",\n          \"markdownDescription\": \"Enables the fetch_data_store_identifiers command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the identifier command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-identifier\",\n          \"markdownDescription\": \"Enables the identifier command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the name command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-name\",\n          \"markdownDescription\": \"Enables the name command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the register_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-register-listener\",\n          \"markdownDescription\": \"Enables the register_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_data_store command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-remove-data-store\",\n          \"markdownDescription\": \"Enables the remove_data_store command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-remove-listener\",\n          \"markdownDescription\": \"Enables the remove_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_app_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-set-app-theme\",\n          \"markdownDescription\": \"Enables the set_app_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_dock_visibility command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-set-dock-visibility\",\n          \"markdownDescription\": \"Enables the set_dock_visibility command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the tauri_version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-tauri-version\",\n          \"markdownDescription\": \"Enables the tauri_version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:allow-version\",\n          \"markdownDescription\": \"Enables the version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the app_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-app-hide\",\n          \"markdownDescription\": \"Denies the app_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the app_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-app-show\",\n          \"markdownDescription\": \"Denies the app_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the bundle_type command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-bundle-type\",\n          \"markdownDescription\": \"Denies the bundle_type command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the default_window_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-default-window-icon\",\n          \"markdownDescription\": \"Denies the default_window_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the fetch_data_store_identifiers command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-fetch-data-store-identifiers\",\n          \"markdownDescription\": \"Denies the fetch_data_store_identifiers command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the identifier command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-identifier\",\n          \"markdownDescription\": \"Denies the identifier command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the name command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-name\",\n          \"markdownDescription\": \"Denies the name command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the register_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-register-listener\",\n          \"markdownDescription\": \"Denies the register_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_data_store command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-remove-data-store\",\n          \"markdownDescription\": \"Denies the remove_data_store command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_listener command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-remove-listener\",\n          \"markdownDescription\": \"Denies the remove_listener command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_app_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-set-app-theme\",\n          \"markdownDescription\": \"Denies the set_app_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_dock_visibility command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-set-dock-visibility\",\n          \"markdownDescription\": \"Denies the set_dock_visibility command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the tauri_version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-tauri-version\",\n          \"markdownDescription\": \"Denies the tauri_version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the version command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:app:deny-version\",\n          \"markdownDescription\": \"Denies the version command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-listen`\\n- `allow-unlisten`\\n- `allow-emit`\\n- `allow-emit-to`\",\n          \"type\": \"string\",\n          \"const\": \"core:event:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-listen`\\n- `allow-unlisten`\\n- `allow-emit`\\n- `allow-emit-to`\"\n        },\n        {\n          \"description\": \"Enables the emit command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-emit\",\n          \"markdownDescription\": \"Enables the emit command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the emit_to command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-emit-to\",\n          \"markdownDescription\": \"Enables the emit_to command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the listen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-listen\",\n          \"markdownDescription\": \"Enables the listen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unlisten command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:allow-unlisten\",\n          \"markdownDescription\": \"Enables the unlisten command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the emit command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-emit\",\n          \"markdownDescription\": \"Denies the emit command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the emit_to command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-emit-to\",\n          \"markdownDescription\": \"Denies the emit_to command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the listen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-listen\",\n          \"markdownDescription\": \"Denies the listen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unlisten command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:event:deny-unlisten\",\n          \"markdownDescription\": \"Denies the unlisten command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-from-bytes`\\n- `allow-from-path`\\n- `allow-rgba`\\n- `allow-size`\",\n          \"type\": \"string\",\n          \"const\": \"core:image:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-from-bytes`\\n- `allow-from-path`\\n- `allow-rgba`\\n- `allow-size`\"\n        },\n        {\n          \"description\": \"Enables the from_bytes command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-from-bytes\",\n          \"markdownDescription\": \"Enables the from_bytes command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the from_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-from-path\",\n          \"markdownDescription\": \"Enables the from_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the rgba command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-rgba\",\n          \"markdownDescription\": \"Enables the rgba command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:allow-size\",\n          \"markdownDescription\": \"Enables the size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the from_bytes command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-from-bytes\",\n          \"markdownDescription\": \"Denies the from_bytes command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the from_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-from-path\",\n          \"markdownDescription\": \"Denies the from_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the rgba command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-rgba\",\n          \"markdownDescription\": \"Denies the rgba command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:image:deny-size\",\n          \"markdownDescription\": \"Denies the size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-append`\\n- `allow-prepend`\\n- `allow-insert`\\n- `allow-remove`\\n- `allow-remove-at`\\n- `allow-items`\\n- `allow-get`\\n- `allow-popup`\\n- `allow-create-default`\\n- `allow-set-as-app-menu`\\n- `allow-set-as-window-menu`\\n- `allow-text`\\n- `allow-set-text`\\n- `allow-is-enabled`\\n- `allow-set-enabled`\\n- `allow-set-accelerator`\\n- `allow-set-as-windows-menu-for-nsapp`\\n- `allow-set-as-help-menu-for-nsapp`\\n- `allow-is-checked`\\n- `allow-set-checked`\\n- `allow-set-icon`\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-append`\\n- `allow-prepend`\\n- `allow-insert`\\n- `allow-remove`\\n- `allow-remove-at`\\n- `allow-items`\\n- `allow-get`\\n- `allow-popup`\\n- `allow-create-default`\\n- `allow-set-as-app-menu`\\n- `allow-set-as-window-menu`\\n- `allow-text`\\n- `allow-set-text`\\n- `allow-is-enabled`\\n- `allow-set-enabled`\\n- `allow-set-accelerator`\\n- `allow-set-as-windows-menu-for-nsapp`\\n- `allow-set-as-help-menu-for-nsapp`\\n- `allow-is-checked`\\n- `allow-set-checked`\\n- `allow-set-icon`\"\n        },\n        {\n          \"description\": \"Enables the append command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-append\",\n          \"markdownDescription\": \"Enables the append command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_default command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-create-default\",\n          \"markdownDescription\": \"Enables the create_default command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-get\",\n          \"markdownDescription\": \"Enables the get command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the insert command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-insert\",\n          \"markdownDescription\": \"Enables the insert command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-is-checked\",\n          \"markdownDescription\": \"Enables the is_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-is-enabled\",\n          \"markdownDescription\": \"Enables the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the items command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-items\",\n          \"markdownDescription\": \"Enables the items command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the popup command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-popup\",\n          \"markdownDescription\": \"Enables the popup command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the prepend command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-prepend\",\n          \"markdownDescription\": \"Enables the prepend command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-remove\",\n          \"markdownDescription\": \"Enables the remove command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_at command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-remove-at\",\n          \"markdownDescription\": \"Enables the remove_at command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_accelerator command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-accelerator\",\n          \"markdownDescription\": \"Enables the set_accelerator command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_app_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-app-menu\",\n          \"markdownDescription\": \"Enables the set_as_app_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-help-menu-for-nsapp\",\n          \"markdownDescription\": \"Enables the set_as_help_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_window_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-window-menu\",\n          \"markdownDescription\": \"Enables the set_as_window_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-as-windows-menu-for-nsapp\",\n          \"markdownDescription\": \"Enables the set_as_windows_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-checked\",\n          \"markdownDescription\": \"Enables the set_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-enabled\",\n          \"markdownDescription\": \"Enables the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-set-text\",\n          \"markdownDescription\": \"Enables the set_text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:allow-text\",\n          \"markdownDescription\": \"Enables the text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the append command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-append\",\n          \"markdownDescription\": \"Denies the append command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_default command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-create-default\",\n          \"markdownDescription\": \"Denies the create_default command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-get\",\n          \"markdownDescription\": \"Denies the get command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the insert command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-insert\",\n          \"markdownDescription\": \"Denies the insert command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-is-checked\",\n          \"markdownDescription\": \"Denies the is_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-is-enabled\",\n          \"markdownDescription\": \"Denies the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the items command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-items\",\n          \"markdownDescription\": \"Denies the items command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the popup command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-popup\",\n          \"markdownDescription\": \"Denies the popup command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the prepend command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-prepend\",\n          \"markdownDescription\": \"Denies the prepend command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-remove\",\n          \"markdownDescription\": \"Denies the remove command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_at command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-remove-at\",\n          \"markdownDescription\": \"Denies the remove_at command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_accelerator command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-accelerator\",\n          \"markdownDescription\": \"Denies the set_accelerator command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_app_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-app-menu\",\n          \"markdownDescription\": \"Denies the set_as_app_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-help-menu-for-nsapp\",\n          \"markdownDescription\": \"Denies the set_as_help_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_window_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-window-menu\",\n          \"markdownDescription\": \"Denies the set_as_window_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-as-windows-menu-for-nsapp\",\n          \"markdownDescription\": \"Denies the set_as_windows_menu_for_nsapp command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_checked command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-checked\",\n          \"markdownDescription\": \"Denies the set_checked command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-enabled\",\n          \"markdownDescription\": \"Denies the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-set-text\",\n          \"markdownDescription\": \"Denies the set_text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the text command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:menu:deny-text\",\n          \"markdownDescription\": \"Denies the text command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-resolve-directory`\\n- `allow-resolve`\\n- `allow-normalize`\\n- `allow-join`\\n- `allow-dirname`\\n- `allow-extname`\\n- `allow-basename`\\n- `allow-is-absolute`\",\n          \"type\": \"string\",\n          \"const\": \"core:path:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-resolve-directory`\\n- `allow-resolve`\\n- `allow-normalize`\\n- `allow-join`\\n- `allow-dirname`\\n- `allow-extname`\\n- `allow-basename`\\n- `allow-is-absolute`\"\n        },\n        {\n          \"description\": \"Enables the basename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-basename\",\n          \"markdownDescription\": \"Enables the basename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the dirname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-dirname\",\n          \"markdownDescription\": \"Enables the dirname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the extname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-extname\",\n          \"markdownDescription\": \"Enables the extname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_absolute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-is-absolute\",\n          \"markdownDescription\": \"Enables the is_absolute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the join command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-join\",\n          \"markdownDescription\": \"Enables the join command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the normalize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-normalize\",\n          \"markdownDescription\": \"Enables the normalize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the resolve command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-resolve\",\n          \"markdownDescription\": \"Enables the resolve command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the resolve_directory command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:allow-resolve-directory\",\n          \"markdownDescription\": \"Enables the resolve_directory command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the basename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-basename\",\n          \"markdownDescription\": \"Denies the basename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the dirname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-dirname\",\n          \"markdownDescription\": \"Denies the dirname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the extname command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-extname\",\n          \"markdownDescription\": \"Denies the extname command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_absolute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-is-absolute\",\n          \"markdownDescription\": \"Denies the is_absolute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the join command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-join\",\n          \"markdownDescription\": \"Denies the join command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the normalize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-normalize\",\n          \"markdownDescription\": \"Denies the normalize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the resolve command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-resolve\",\n          \"markdownDescription\": \"Denies the resolve command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the resolve_directory command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:path:deny-resolve-directory\",\n          \"markdownDescription\": \"Denies the resolve_directory command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-close`\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-close`\"\n        },\n        {\n          \"description\": \"Enables the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:allow-close\",\n          \"markdownDescription\": \"Enables the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:resources:deny-close\",\n          \"markdownDescription\": \"Denies the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-get-by-id`\\n- `allow-remove-by-id`\\n- `allow-set-icon`\\n- `allow-set-menu`\\n- `allow-set-tooltip`\\n- `allow-set-title`\\n- `allow-set-visible`\\n- `allow-set-temp-dir-path`\\n- `allow-set-icon-as-template`\\n- `allow-set-show-menu-on-left-click`\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:default\",\n          \"markdownDescription\": \"Default permissions for the plugin, which enables all commands.\\n#### This default permission set includes:\\n\\n- `allow-new`\\n- `allow-get-by-id`\\n- `allow-remove-by-id`\\n- `allow-set-icon`\\n- `allow-set-menu`\\n- `allow-set-tooltip`\\n- `allow-set-title`\\n- `allow-set-visible`\\n- `allow-set-temp-dir-path`\\n- `allow-set-icon-as-template`\\n- `allow-set-show-menu-on-left-click`\"\n        },\n        {\n          \"description\": \"Enables the get_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-get-by-id\",\n          \"markdownDescription\": \"Enables the get_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-new\",\n          \"markdownDescription\": \"Enables the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the remove_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-remove-by-id\",\n          \"markdownDescription\": \"Enables the remove_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon_as_template command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-icon-as-template\",\n          \"markdownDescription\": \"Enables the set_icon_as_template command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-menu\",\n          \"markdownDescription\": \"Enables the set_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_show_menu_on_left_click command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-show-menu-on-left-click\",\n          \"markdownDescription\": \"Enables the set_show_menu_on_left_click command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_temp_dir_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-temp-dir-path\",\n          \"markdownDescription\": \"Enables the set_temp_dir_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-title\",\n          \"markdownDescription\": \"Enables the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_tooltip command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-tooltip\",\n          \"markdownDescription\": \"Enables the set_tooltip command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:allow-set-visible\",\n          \"markdownDescription\": \"Enables the set_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-get-by-id\",\n          \"markdownDescription\": \"Denies the get_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the new command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-new\",\n          \"markdownDescription\": \"Denies the new command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the remove_by_id command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-remove-by-id\",\n          \"markdownDescription\": \"Denies the remove_by_id command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon_as_template command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-icon-as-template\",\n          \"markdownDescription\": \"Denies the set_icon_as_template command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_menu command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-menu\",\n          \"markdownDescription\": \"Denies the set_menu command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_show_menu_on_left_click command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-show-menu-on-left-click\",\n          \"markdownDescription\": \"Denies the set_show_menu_on_left_click command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_temp_dir_path command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-temp-dir-path\",\n          \"markdownDescription\": \"Denies the set_temp_dir_path command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-title\",\n          \"markdownDescription\": \"Denies the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_tooltip command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-tooltip\",\n          \"markdownDescription\": \"Denies the set_tooltip command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:tray:deny-set-visible\",\n          \"markdownDescription\": \"Denies the set_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-webviews`\\n- `allow-webview-position`\\n- `allow-webview-size`\\n- `allow-internal-toggle-devtools`\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-webviews`\\n- `allow-webview-position`\\n- `allow-webview-size`\\n- `allow-internal-toggle-devtools`\"\n        },\n        {\n          \"description\": \"Enables the clear_all_browsing_data command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-clear-all-browsing-data\",\n          \"markdownDescription\": \"Enables the clear_all_browsing_data command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_webview command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-create-webview\",\n          \"markdownDescription\": \"Enables the create_webview command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create_webview_window command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-create-webview-window\",\n          \"markdownDescription\": \"Enables the create_webview_window command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get_all_webviews command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-get-all-webviews\",\n          \"markdownDescription\": \"Enables the get_all_webviews command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the internal_toggle_devtools command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-internal-toggle-devtools\",\n          \"markdownDescription\": \"Enables the internal_toggle_devtools command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the print command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-print\",\n          \"markdownDescription\": \"Enables the print command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the reparent command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-reparent\",\n          \"markdownDescription\": \"Enables the reparent command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_auto_resize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-auto-resize\",\n          \"markdownDescription\": \"Enables the set_webview_auto_resize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-background-color\",\n          \"markdownDescription\": \"Enables the set_webview_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-focus\",\n          \"markdownDescription\": \"Enables the set_webview_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-position\",\n          \"markdownDescription\": \"Enables the set_webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-size\",\n          \"markdownDescription\": \"Enables the set_webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_webview_zoom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-set-webview-zoom\",\n          \"markdownDescription\": \"Enables the set_webview_zoom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-close\",\n          \"markdownDescription\": \"Enables the webview_close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-hide\",\n          \"markdownDescription\": \"Enables the webview_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-position\",\n          \"markdownDescription\": \"Enables the webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-show\",\n          \"markdownDescription\": \"Enables the webview_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:allow-webview-size\",\n          \"markdownDescription\": \"Enables the webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the clear_all_browsing_data command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-clear-all-browsing-data\",\n          \"markdownDescription\": \"Denies the clear_all_browsing_data command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_webview command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-create-webview\",\n          \"markdownDescription\": \"Denies the create_webview command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create_webview_window command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-create-webview-window\",\n          \"markdownDescription\": \"Denies the create_webview_window command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_all_webviews command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-get-all-webviews\",\n          \"markdownDescription\": \"Denies the get_all_webviews command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the internal_toggle_devtools command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-internal-toggle-devtools\",\n          \"markdownDescription\": \"Denies the internal_toggle_devtools command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the print command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-print\",\n          \"markdownDescription\": \"Denies the print command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the reparent command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-reparent\",\n          \"markdownDescription\": \"Denies the reparent command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_auto_resize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-auto-resize\",\n          \"markdownDescription\": \"Denies the set_webview_auto_resize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-background-color\",\n          \"markdownDescription\": \"Denies the set_webview_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-focus\",\n          \"markdownDescription\": \"Denies the set_webview_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-position\",\n          \"markdownDescription\": \"Denies the set_webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-size\",\n          \"markdownDescription\": \"Denies the set_webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_webview_zoom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-set-webview-zoom\",\n          \"markdownDescription\": \"Denies the set_webview_zoom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-close\",\n          \"markdownDescription\": \"Denies the webview_close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-hide\",\n          \"markdownDescription\": \"Denies the webview_hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-position\",\n          \"markdownDescription\": \"Denies the webview_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-show\",\n          \"markdownDescription\": \"Denies the webview_show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the webview_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:webview:deny-webview-size\",\n          \"markdownDescription\": \"Denies the webview_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-windows`\\n- `allow-scale-factor`\\n- `allow-inner-position`\\n- `allow-outer-position`\\n- `allow-inner-size`\\n- `allow-outer-size`\\n- `allow-is-fullscreen`\\n- `allow-is-minimized`\\n- `allow-is-maximized`\\n- `allow-is-focused`\\n- `allow-is-decorated`\\n- `allow-is-resizable`\\n- `allow-is-maximizable`\\n- `allow-is-minimizable`\\n- `allow-is-closable`\\n- `allow-is-visible`\\n- `allow-is-enabled`\\n- `allow-title`\\n- `allow-current-monitor`\\n- `allow-primary-monitor`\\n- `allow-monitor-from-point`\\n- `allow-available-monitors`\\n- `allow-cursor-position`\\n- `allow-theme`\\n- `allow-is-always-on-top`\\n- `allow-internal-toggle-maximize`\",\n          \"type\": \"string\",\n          \"const\": \"core:window:default\",\n          \"markdownDescription\": \"Default permissions for the plugin.\\n#### This default permission set includes:\\n\\n- `allow-get-all-windows`\\n- `allow-scale-factor`\\n- `allow-inner-position`\\n- `allow-outer-position`\\n- `allow-inner-size`\\n- `allow-outer-size`\\n- `allow-is-fullscreen`\\n- `allow-is-minimized`\\n- `allow-is-maximized`\\n- `allow-is-focused`\\n- `allow-is-decorated`\\n- `allow-is-resizable`\\n- `allow-is-maximizable`\\n- `allow-is-minimizable`\\n- `allow-is-closable`\\n- `allow-is-visible`\\n- `allow-is-enabled`\\n- `allow-title`\\n- `allow-current-monitor`\\n- `allow-primary-monitor`\\n- `allow-monitor-from-point`\\n- `allow-available-monitors`\\n- `allow-cursor-position`\\n- `allow-theme`\\n- `allow-is-always-on-top`\\n- `allow-internal-toggle-maximize`\"\n        },\n        {\n          \"description\": \"Enables the available_monitors command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-available-monitors\",\n          \"markdownDescription\": \"Enables the available_monitors command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the center command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-center\",\n          \"markdownDescription\": \"Enables the center command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-close\",\n          \"markdownDescription\": \"Enables the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the create command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-create\",\n          \"markdownDescription\": \"Enables the create command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the current_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-current-monitor\",\n          \"markdownDescription\": \"Enables the current_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-cursor-position\",\n          \"markdownDescription\": \"Enables the cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the destroy command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-destroy\",\n          \"markdownDescription\": \"Enables the destroy command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the get_all_windows command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-get-all-windows\",\n          \"markdownDescription\": \"Enables the get_all_windows command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-hide\",\n          \"markdownDescription\": \"Enables the hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the inner_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-inner-position\",\n          \"markdownDescription\": \"Enables the inner_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the inner_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-inner-size\",\n          \"markdownDescription\": \"Enables the inner_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the internal_toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-internal-toggle-maximize\",\n          \"markdownDescription\": \"Enables the internal_toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-always-on-top\",\n          \"markdownDescription\": \"Enables the is_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-closable\",\n          \"markdownDescription\": \"Enables the is_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_decorated command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-decorated\",\n          \"markdownDescription\": \"Enables the is_decorated command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-enabled\",\n          \"markdownDescription\": \"Enables the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_focused command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-focused\",\n          \"markdownDescription\": \"Enables the is_focused command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-fullscreen\",\n          \"markdownDescription\": \"Enables the is_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-maximizable\",\n          \"markdownDescription\": \"Enables the is_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_maximized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-maximized\",\n          \"markdownDescription\": \"Enables the is_maximized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-minimizable\",\n          \"markdownDescription\": \"Enables the is_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_minimized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-minimized\",\n          \"markdownDescription\": \"Enables the is_minimized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-resizable\",\n          \"markdownDescription\": \"Enables the is_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the is_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-is-visible\",\n          \"markdownDescription\": \"Enables the is_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-maximize\",\n          \"markdownDescription\": \"Enables the maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the minimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-minimize\",\n          \"markdownDescription\": \"Enables the minimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the monitor_from_point command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-monitor-from-point\",\n          \"markdownDescription\": \"Enables the monitor_from_point command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the outer_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-outer-position\",\n          \"markdownDescription\": \"Enables the outer_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the outer_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-outer-size\",\n          \"markdownDescription\": \"Enables the outer_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the primary_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-primary-monitor\",\n          \"markdownDescription\": \"Enables the primary_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the request_user_attention command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-request-user-attention\",\n          \"markdownDescription\": \"Enables the request_user_attention command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the scale_factor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-scale-factor\",\n          \"markdownDescription\": \"Enables the scale_factor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_always_on_bottom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-always-on-bottom\",\n          \"markdownDescription\": \"Enables the set_always_on_bottom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-always-on-top\",\n          \"markdownDescription\": \"Enables the set_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-background-color\",\n          \"markdownDescription\": \"Enables the set_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_badge_count command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-badge-count\",\n          \"markdownDescription\": \"Enables the set_badge_count command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_badge_label command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-badge-label\",\n          \"markdownDescription\": \"Enables the set_badge_label command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-closable\",\n          \"markdownDescription\": \"Enables the set_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_content_protected command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-content-protected\",\n          \"markdownDescription\": \"Enables the set_content_protected command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_grab command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-grab\",\n          \"markdownDescription\": \"Enables the set_cursor_grab command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-icon\",\n          \"markdownDescription\": \"Enables the set_cursor_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-position\",\n          \"markdownDescription\": \"Enables the set_cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_cursor_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-cursor-visible\",\n          \"markdownDescription\": \"Enables the set_cursor_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_decorations command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-decorations\",\n          \"markdownDescription\": \"Enables the set_decorations command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_effects command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-effects\",\n          \"markdownDescription\": \"Enables the set_effects command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-enabled\",\n          \"markdownDescription\": \"Enables the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-focus\",\n          \"markdownDescription\": \"Enables the set_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_focusable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-focusable\",\n          \"markdownDescription\": \"Enables the set_focusable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-fullscreen\",\n          \"markdownDescription\": \"Enables the set_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-icon\",\n          \"markdownDescription\": \"Enables the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_ignore_cursor_events command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-ignore-cursor-events\",\n          \"markdownDescription\": \"Enables the set_ignore_cursor_events command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_max_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-max-size\",\n          \"markdownDescription\": \"Enables the set_max_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-maximizable\",\n          \"markdownDescription\": \"Enables the set_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_min_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-min-size\",\n          \"markdownDescription\": \"Enables the set_min_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-minimizable\",\n          \"markdownDescription\": \"Enables the set_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_overlay_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-overlay-icon\",\n          \"markdownDescription\": \"Enables the set_overlay_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-position\",\n          \"markdownDescription\": \"Enables the set_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_progress_bar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-progress-bar\",\n          \"markdownDescription\": \"Enables the set_progress_bar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-resizable\",\n          \"markdownDescription\": \"Enables the set_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_shadow command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-shadow\",\n          \"markdownDescription\": \"Enables the set_shadow command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_simple_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-simple-fullscreen\",\n          \"markdownDescription\": \"Enables the set_simple_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-size\",\n          \"markdownDescription\": \"Enables the set_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_size_constraints command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-size-constraints\",\n          \"markdownDescription\": \"Enables the set_size_constraints command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_skip_taskbar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-skip-taskbar\",\n          \"markdownDescription\": \"Enables the set_skip_taskbar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-theme\",\n          \"markdownDescription\": \"Enables the set_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-title\",\n          \"markdownDescription\": \"Enables the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_title_bar_style command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-title-bar-style\",\n          \"markdownDescription\": \"Enables the set_title_bar_style command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the set_visible_on_all_workspaces command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-set-visible-on-all-workspaces\",\n          \"markdownDescription\": \"Enables the set_visible_on_all_workspaces command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-show\",\n          \"markdownDescription\": \"Enables the show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the start_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-start-dragging\",\n          \"markdownDescription\": \"Enables the start_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the start_resize_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-start-resize-dragging\",\n          \"markdownDescription\": \"Enables the start_resize_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-theme\",\n          \"markdownDescription\": \"Enables the theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-title\",\n          \"markdownDescription\": \"Enables the title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-toggle-maximize\",\n          \"markdownDescription\": \"Enables the toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unmaximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-unmaximize\",\n          \"markdownDescription\": \"Enables the unmaximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the unminimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:allow-unminimize\",\n          \"markdownDescription\": \"Enables the unminimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the available_monitors command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-available-monitors\",\n          \"markdownDescription\": \"Denies the available_monitors command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the center command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-center\",\n          \"markdownDescription\": \"Denies the center command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the close command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-close\",\n          \"markdownDescription\": \"Denies the close command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the create command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-create\",\n          \"markdownDescription\": \"Denies the create command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the current_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-current-monitor\",\n          \"markdownDescription\": \"Denies the current_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-cursor-position\",\n          \"markdownDescription\": \"Denies the cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the destroy command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-destroy\",\n          \"markdownDescription\": \"Denies the destroy command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the get_all_windows command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-get-all-windows\",\n          \"markdownDescription\": \"Denies the get_all_windows command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the hide command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-hide\",\n          \"markdownDescription\": \"Denies the hide command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the inner_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-inner-position\",\n          \"markdownDescription\": \"Denies the inner_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the inner_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-inner-size\",\n          \"markdownDescription\": \"Denies the inner_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the internal_toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-internal-toggle-maximize\",\n          \"markdownDescription\": \"Denies the internal_toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-always-on-top\",\n          \"markdownDescription\": \"Denies the is_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-closable\",\n          \"markdownDescription\": \"Denies the is_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_decorated command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-decorated\",\n          \"markdownDescription\": \"Denies the is_decorated command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-enabled\",\n          \"markdownDescription\": \"Denies the is_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_focused command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-focused\",\n          \"markdownDescription\": \"Denies the is_focused command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-fullscreen\",\n          \"markdownDescription\": \"Denies the is_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-maximizable\",\n          \"markdownDescription\": \"Denies the is_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_maximized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-maximized\",\n          \"markdownDescription\": \"Denies the is_maximized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-minimizable\",\n          \"markdownDescription\": \"Denies the is_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_minimized command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-minimized\",\n          \"markdownDescription\": \"Denies the is_minimized command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-resizable\",\n          \"markdownDescription\": \"Denies the is_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the is_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-is-visible\",\n          \"markdownDescription\": \"Denies the is_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-maximize\",\n          \"markdownDescription\": \"Denies the maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the minimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-minimize\",\n          \"markdownDescription\": \"Denies the minimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the monitor_from_point command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-monitor-from-point\",\n          \"markdownDescription\": \"Denies the monitor_from_point command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the outer_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-outer-position\",\n          \"markdownDescription\": \"Denies the outer_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the outer_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-outer-size\",\n          \"markdownDescription\": \"Denies the outer_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the primary_monitor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-primary-monitor\",\n          \"markdownDescription\": \"Denies the primary_monitor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the request_user_attention command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-request-user-attention\",\n          \"markdownDescription\": \"Denies the request_user_attention command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the scale_factor command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-scale-factor\",\n          \"markdownDescription\": \"Denies the scale_factor command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_always_on_bottom command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-always-on-bottom\",\n          \"markdownDescription\": \"Denies the set_always_on_bottom command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_always_on_top command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-always-on-top\",\n          \"markdownDescription\": \"Denies the set_always_on_top command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_background_color command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-background-color\",\n          \"markdownDescription\": \"Denies the set_background_color command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_badge_count command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-badge-count\",\n          \"markdownDescription\": \"Denies the set_badge_count command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_badge_label command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-badge-label\",\n          \"markdownDescription\": \"Denies the set_badge_label command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_closable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-closable\",\n          \"markdownDescription\": \"Denies the set_closable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_content_protected command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-content-protected\",\n          \"markdownDescription\": \"Denies the set_content_protected command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_grab command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-grab\",\n          \"markdownDescription\": \"Denies the set_cursor_grab command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-icon\",\n          \"markdownDescription\": \"Denies the set_cursor_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-position\",\n          \"markdownDescription\": \"Denies the set_cursor_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_cursor_visible command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-cursor-visible\",\n          \"markdownDescription\": \"Denies the set_cursor_visible command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_decorations command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-decorations\",\n          \"markdownDescription\": \"Denies the set_decorations command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_effects command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-effects\",\n          \"markdownDescription\": \"Denies the set_effects command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_enabled command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-enabled\",\n          \"markdownDescription\": \"Denies the set_enabled command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_focus command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-focus\",\n          \"markdownDescription\": \"Denies the set_focus command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_focusable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-focusable\",\n          \"markdownDescription\": \"Denies the set_focusable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-fullscreen\",\n          \"markdownDescription\": \"Denies the set_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-icon\",\n          \"markdownDescription\": \"Denies the set_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_ignore_cursor_events command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-ignore-cursor-events\",\n          \"markdownDescription\": \"Denies the set_ignore_cursor_events command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_max_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-max-size\",\n          \"markdownDescription\": \"Denies the set_max_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_maximizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-maximizable\",\n          \"markdownDescription\": \"Denies the set_maximizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_min_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-min-size\",\n          \"markdownDescription\": \"Denies the set_min_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_minimizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-minimizable\",\n          \"markdownDescription\": \"Denies the set_minimizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_overlay_icon command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-overlay-icon\",\n          \"markdownDescription\": \"Denies the set_overlay_icon command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_position command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-position\",\n          \"markdownDescription\": \"Denies the set_position command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_progress_bar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-progress-bar\",\n          \"markdownDescription\": \"Denies the set_progress_bar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_resizable command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-resizable\",\n          \"markdownDescription\": \"Denies the set_resizable command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_shadow command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-shadow\",\n          \"markdownDescription\": \"Denies the set_shadow command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_simple_fullscreen command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-simple-fullscreen\",\n          \"markdownDescription\": \"Denies the set_simple_fullscreen command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_size command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-size\",\n          \"markdownDescription\": \"Denies the set_size command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_size_constraints command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-size-constraints\",\n          \"markdownDescription\": \"Denies the set_size_constraints command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_skip_taskbar command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-skip-taskbar\",\n          \"markdownDescription\": \"Denies the set_skip_taskbar command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-theme\",\n          \"markdownDescription\": \"Denies the set_theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-title\",\n          \"markdownDescription\": \"Denies the set_title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_title_bar_style command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-title-bar-style\",\n          \"markdownDescription\": \"Denies the set_title_bar_style command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the set_visible_on_all_workspaces command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-set-visible-on-all-workspaces\",\n          \"markdownDescription\": \"Denies the set_visible_on_all_workspaces command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the show command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-show\",\n          \"markdownDescription\": \"Denies the show command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the start_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-start-dragging\",\n          \"markdownDescription\": \"Denies the start_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the start_resize_dragging command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-start-resize-dragging\",\n          \"markdownDescription\": \"Denies the start_resize_dragging command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the theme command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-theme\",\n          \"markdownDescription\": \"Denies the theme command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the title command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-title\",\n          \"markdownDescription\": \"Denies the title command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the toggle_maximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-toggle-maximize\",\n          \"markdownDescription\": \"Denies the toggle_maximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unmaximize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-unmaximize\",\n          \"markdownDescription\": \"Denies the unmaximize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the unminimize command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"core:window:deny-unminimize\",\n          \"markdownDescription\": \"Denies the unminimize command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\",\n          \"type\": \"string\",\n          \"const\": \"shell:default\",\n          \"markdownDescription\": \"This permission set configures which\\nshell functionality is exposed by default.\\n\\n#### Granted Permissions\\n\\nIt allows to use the `open` functionality with a reasonable\\nscope pre-configured. It will allow opening `http(s)://`,\\n`tel:` and `mailto:` links.\\n\\n#### This default permission set includes:\\n\\n- `allow-open`\"\n        },\n        {\n          \"description\": \"Enables the execute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-execute\",\n          \"markdownDescription\": \"Enables the execute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the kill command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-kill\",\n          \"markdownDescription\": \"Enables the kill command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the open command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-open\",\n          \"markdownDescription\": \"Enables the open command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the spawn command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-spawn\",\n          \"markdownDescription\": \"Enables the spawn command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the stdin_write command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:allow-stdin-write\",\n          \"markdownDescription\": \"Enables the stdin_write command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the execute command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-execute\",\n          \"markdownDescription\": \"Denies the execute command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the kill command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-kill\",\n          \"markdownDescription\": \"Denies the kill command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the open command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-open\",\n          \"markdownDescription\": \"Denies the open command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the spawn command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-spawn\",\n          \"markdownDescription\": \"Denies the spawn command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the stdin_write command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"shell:deny-stdin-write\",\n          \"markdownDescription\": \"Denies the stdin_write command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"This permission set configures what kind of\\noperations are available from the window state plugin.\\n\\n#### Granted Permissions\\n\\nAll operations are enabled by default.\\n\\n\\n#### This default permission set includes:\\n\\n- `allow-filename`\\n- `allow-restore-state`\\n- `allow-save-window-state`\",\n          \"type\": \"string\",\n          \"const\": \"window-state:default\",\n          \"markdownDescription\": \"This permission set configures what kind of\\noperations are available from the window state plugin.\\n\\n#### Granted Permissions\\n\\nAll operations are enabled by default.\\n\\n\\n#### This default permission set includes:\\n\\n- `allow-filename`\\n- `allow-restore-state`\\n- `allow-save-window-state`\"\n        },\n        {\n          \"description\": \"Enables the filename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-filename\",\n          \"markdownDescription\": \"Enables the filename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the restore_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-restore-state\",\n          \"markdownDescription\": \"Enables the restore_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Enables the save_window_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:allow-save-window-state\",\n          \"markdownDescription\": \"Enables the save_window_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the filename command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-filename\",\n          \"markdownDescription\": \"Denies the filename command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the restore_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-restore-state\",\n          \"markdownDescription\": \"Denies the restore_state command without any pre-configured scope.\"\n        },\n        {\n          \"description\": \"Denies the save_window_state command without any pre-configured scope.\",\n          \"type\": \"string\",\n          \"const\": \"window-state:deny-save-window-state\",\n          \"markdownDescription\": \"Denies the save_window_state command without any pre-configured scope.\"\n        }\n      ]\n    },\n    \"Value\": {\n      \"description\": \"All supported ACL values.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Represents a null JSON value.\",\n          \"type\": \"null\"\n        },\n        {\n          \"description\": \"Represents a [`bool`].\",\n          \"type\": \"boolean\"\n        },\n        {\n          \"description\": \"Represents a valid ACL [`Number`].\",\n          \"allOf\": [\n            {\n              \"$ref\": \"#/definitions/Number\"\n            }\n          ]\n        },\n        {\n          \"description\": \"Represents a [`String`].\",\n          \"type\": \"string\"\n        },\n        {\n          \"description\": \"Represents a list of other [`Value`]s.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/Value\"\n          }\n        },\n        {\n          \"description\": \"Represents a map of [`String`] keys to [`Value`]s.\",\n          \"type\": \"object\",\n          \"additionalProperties\": {\n            \"$ref\": \"#/definitions/Value\"\n          }\n        }\n      ]\n    },\n    \"Number\": {\n      \"description\": \"A valid ACL number.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Represents an [`i64`].\",\n          \"type\": \"integer\",\n          \"format\": \"int64\"\n        },\n        {\n          \"description\": \"Represents a [`f64`].\",\n          \"type\": \"number\",\n          \"format\": \"double\"\n        }\n      ]\n    },\n    \"Target\": {\n      \"description\": \"Platform target.\",\n      \"oneOf\": [\n        {\n          \"description\": \"MacOS.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"macOS\"\n          ]\n        },\n        {\n          \"description\": \"Windows.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"windows\"\n          ]\n        },\n        {\n          \"description\": \"Linux.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"linux\"\n          ]\n        },\n        {\n          \"description\": \"Android.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"android\"\n          ]\n        },\n        {\n          \"description\": \"iOS.\",\n          \"type\": \"string\",\n          \"enum\": [\n            \"iOS\"\n          ]\n        }\n      ]\n    },\n    \"ShellScopeEntryAllowedArg\": {\n      \"description\": \"A command argument allowed to be executed by the webview API.\",\n      \"anyOf\": [\n        {\n          \"description\": \"A non-configurable argument that is passed to the command in the order it was specified.\",\n          \"type\": \"string\"\n        },\n        {\n          \"description\": \"A variable that is set while calling the command from the webview API.\",\n          \"type\": \"object\",\n          \"required\": [\n            \"validator\"\n          ],\n          \"properties\": {\n            \"raw\": {\n              \"description\": \"Marks the validator as a raw regex, meaning the plugin should not make any modification at runtime.\\n\\nThis means the regex will not match on the entire string by default, which might be exploited if your regex allow unexpected input to be considered valid. When using this option, make sure your regex is correct.\",\n              \"default\": false,\n              \"type\": \"boolean\"\n            },\n            \"validator\": {\n              \"description\": \"[regex] validator to require passed values to conform to an expected input.\\n\\nThis will require the argument value passed to this variable to match the `validator` regex before it will be executed.\\n\\nThe regex string is by default surrounded by `^...$` to match the full string. For example the `https?://\\\\w+` regex would be registered as `^https?://\\\\w+$`.\\n\\n[regex]: <https://docs.rs/regex/latest/regex/#syntax>\",\n              \"type\": \"string\"\n            }\n          },\n          \"additionalProperties\": false\n        }\n      ]\n    },\n    \"ShellScopeEntryAllowedArgs\": {\n      \"description\": \"A set of command arguments allowed to be executed by the webview API.\\n\\nA value of `true` will allow any arguments to be passed to the command. `false` will disable all arguments. A list of [`ShellScopeEntryAllowedArg`] will set those arguments as the only valid arguments to be passed to the attached command configuration.\",\n      \"anyOf\": [\n        {\n          \"description\": \"Use a simple boolean to allow all or disable all arguments to this command configuration.\",\n          \"type\": \"boolean\"\n        },\n        {\n          \"description\": \"A specific set of [`ShellScopeEntryAllowedArg`] that are valid to call for the command configuration.\",\n          \"type\": \"array\",\n          \"items\": {\n            \"$ref\": \"#/definitions/ShellScopeEntryAllowedArg\"\n          }\n        }\n      ]\n    }\n  }\n}"
  },
  {
    "path": "desktop/src-tauri/icons/android/mipmap-anydpi-v26/ic_launcher.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<adaptive-icon xmlns:android=\"http://schemas.android.com/apk/res/android\">\n  <foreground android:drawable=\"@mipmap/ic_launcher_foreground\"/>\n  <background android:drawable=\"@color/ic_launcher_background\"/>\n</adaptive-icon>"
  },
  {
    "path": "desktop/src-tauri/icons/android/values/ic_launcher_background.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>\n  <color name=\"ic_launcher_background\">#fff</color>\n</resources>"
  },
  {
    "path": "desktop/src-tauri/src/main.rs",
    "content": "// Prevents additional console window on Windows in release\n#![cfg_attr(not(debug_assertions), windows_subsystem = \"windows\")]\n\nuse directories::ProjectDirs;\nuse serde::{Deserialize, Serialize};\nuse std::fs;\nuse std::path::PathBuf;\nuse std::process::Command;\nuse std::sync::{Mutex, RwLock};\nuse std::io::Write as IoWrite;\nuse std::time::SystemTime;\n#[cfg(target_os = \"macos\")]\nuse std::time::Duration;\nuse tauri::image::Image;\nuse tauri::menu::{\n    CheckMenuItem, Menu, MenuBuilder, MenuItem, PredefinedMenuItem, SubmenuBuilder, HELP_SUBMENU_ID,\n};\nuse tauri::tray::{TrayIconBuilder, TrayIconEvent};\n#[cfg(target_os = \"macos\")]\nuse tauri::WebviewWindow;\nuse tauri::Wry;\nuse tauri::{\n    webview::PageLoadPayload, AppHandle, Manager, Webview, WebviewUrl, WebviewWindowBuilder,\n};\n#[cfg(target_os = \"macos\")]\nuse tokio::time::sleep;\nuse url::Url;\n#[cfg(target_os = \"macos\")]\nuse window_vibrancy::{apply_vibrancy, NSVisualEffectMaterial};\n\n// ============================================================================\n// Configuration\n// ============================================================================\n\nconst DEFAULT_SERVER_URL: &str = \"https://cloud.onyx.app\";\nconst CONFIG_FILE_NAME: &str = \"config.json\";\n#[cfg(target_os = \"macos\")]\nconst TITLEBAR_SCRIPT: &str = include_str!(\"../../src/titlebar.js\");\nconst TRAY_ID: &str = \"onyx-tray\";\nconst TRAY_ICON_BYTES: &[u8] = include_bytes!(\"../icons/tray-icon.png\");\nconst TRAY_MENU_OPEN_APP_ID: &str = \"tray_open_app\";\nconst TRAY_MENU_OPEN_CHAT_ID: &str = \"tray_open_chat\";\nconst TRAY_MENU_SHOW_IN_BAR_ID: &str = \"tray_show_in_menu_bar\";\nconst TRAY_MENU_QUIT_ID: &str = \"tray_quit\";\nconst MENU_SHOW_MENU_BAR_ID: &str = \"show_menu_bar\";\nconst MENU_HIDE_DECORATIONS_ID: &str = \"hide_window_decorations\";\nconst CHAT_LINK_INTERCEPT_SCRIPT: &str = r##\"\n(() => {\n  if (window.__ONYX_CHAT_LINK_INTERCEPT_INSTALLED__) {\n    return;\n  }\n\n  window.__ONYX_CHAT_LINK_INTERCEPT_INSTALLED__ = true;\n\n  function isChatSessionPage() {\n    try {\n      const currentUrl = new URL(window.location.href);\n      return (\n        currentUrl.pathname.startsWith(\"/app\") &&\n        currentUrl.searchParams.has(\"chatId\")\n      );\n    } catch {\n      return false;\n    }\n  }\n\n  function getAllowedNavigationUrl(rawUrl) {\n    try {\n      const parsed = new URL(String(rawUrl), window.location.href);\n      const scheme = parsed.protocol.toLowerCase();\n      if (![\"http:\", \"https:\", \"mailto:\", \"tel:\"].includes(scheme)) {\n        return null;\n      }\n      return parsed;\n    } catch {\n      return null;\n    }\n  }\n\n  async function openWithTauri(url) {\n    try {\n      const invoke =\n        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;\n      if (typeof invoke !== \"function\") {\n        return false;\n      }\n\n      await invoke(\"open_in_browser\", { url });\n      return true;\n    } catch {\n      return false;\n    }\n  }\n\n  function handleChatNavigation(rawUrl) {\n    const parsedUrl = getAllowedNavigationUrl(rawUrl);\n    if (!parsedUrl) {\n      return false;\n    }\n\n    const safeUrl = parsedUrl.toString();\n    const scheme = parsedUrl.protocol.toLowerCase();\n    if (scheme === \"mailto:\" || scheme === \"tel:\") {\n      void openWithTauri(safeUrl).then((opened) => {\n        if (!opened) {\n          window.location.assign(safeUrl);\n        }\n      });\n      return true;\n    }\n\n    window.location.assign(safeUrl);\n    return true;\n  }\n\n  document.addEventListener(\n    \"click\",\n    (event) => {\n      if (!isChatSessionPage() || event.defaultPrevented) {\n        return;\n      }\n\n      const element = event.target;\n      if (!(element instanceof Element)) {\n        return;\n      }\n\n      const anchor = element.closest(\"a\");\n      if (!(anchor instanceof HTMLAnchorElement)) {\n        return;\n      }\n\n      const target = (anchor.getAttribute(\"target\") || \"\").toLowerCase();\n      if (target !== \"_blank\") {\n        return;\n      }\n\n      const href = anchor.getAttribute(\"href\");\n      if (!href || href.startsWith(\"#\")) {\n        return;\n      }\n\n      if (!handleChatNavigation(href)) {\n        return;\n      }\n\n      event.preventDefault();\n      event.stopPropagation();\n    },\n    true\n  );\n\n  const nativeWindowOpen = window.open;\n  window.open = function(url, target, features) {\n    const resolvedTarget = typeof target === \"string\" ? target.toLowerCase() : \"\";\n    const shouldNavigateInPlace = resolvedTarget === \"\" || resolvedTarget === \"_blank\";\n\n    if (\n      isChatSessionPage() &&\n      shouldNavigateInPlace &&\n      url != null &&\n      String(url).length > 0\n    ) {\n      if (!handleChatNavigation(url)) {\n        return null;\n      }\n      return null;\n    }\n\n    if (typeof nativeWindowOpen === \"function\") {\n      return nativeWindowOpen.call(window, url, target, features);\n    }\n    return null;\n  };\n})();\n\"##;\n\n#[cfg(not(target_os = \"macos\"))]\nconst MENU_KEY_HANDLER_SCRIPT: &str = r#\"\n(() => {\n  if (window.__ONYX_MENU_KEY_HANDLER__) return;\n  window.__ONYX_MENU_KEY_HANDLER__ = true;\n\n  let altHeld = false;\n\n  function invoke(cmd) {\n    const fn_ =\n      window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;\n    if (typeof fn_ === 'function') fn_(cmd);\n  }\n\n  function releaseAltAndHideMenu() {\n    if (!altHeld) {\n      return;\n    }\n    altHeld = false;\n    invoke('hide_menu_bar_temporary');\n  }\n\n  document.addEventListener('keydown', (e) => {\n    if (e.key === 'Alt') {\n      if (!altHeld) {\n        altHeld = true;\n        invoke('show_menu_bar_temporarily');\n      }\n      return;\n    }\n    if (e.altKey && e.key === 'F1') {\n      e.preventDefault();\n      e.stopPropagation();\n      altHeld = false;\n      invoke('toggle_menu_bar');\n      return;\n    }\n  }, true);\n\n  document.addEventListener('keyup', (e) => {\n    if (e.key === 'Alt' && altHeld) {\n      releaseAltAndHideMenu();\n    }\n  }, true);\n\n  window.addEventListener('blur', () => {\n    releaseAltAndHideMenu();\n  });\n\n  document.addEventListener('visibilitychange', () => {\n    if (document.hidden) {\n      releaseAltAndHideMenu();\n    }\n  });\n})();\n\"#;\n\nconst CONSOLE_CAPTURE_SCRIPT: &str = r#\"\n(() => {\n  if (window.__ONYX_CONSOLE_CAPTURE__) return;\n  window.__ONYX_CONSOLE_CAPTURE__ = true;\n\n  const levels = ['log', 'warn', 'error', 'info', 'debug'];\n  const originals = {};\n\n  levels.forEach(level => {\n    originals[level] = console[level];\n    console[level] = function(...args) {\n      originals[level].apply(console, args);\n      try {\n        const invoke =\n          window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;\n        if (typeof invoke === 'function') {\n          const message = args.map(a => {\n            try { return typeof a === 'string' ? a : JSON.stringify(a); }\n            catch { return String(a); }\n          }).join(' ');\n          invoke('log_from_frontend', { level, message });\n        }\n      } catch {}\n    };\n  });\n\n  window.addEventListener('error', (event) => {\n    try {\n      const invoke =\n        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;\n      if (typeof invoke === 'function') {\n        invoke('log_from_frontend', {\n          level: 'error',\n          message: `[uncaught] ${event.message} at ${event.filename}:${event.lineno}:${event.colno}`\n        });\n      }\n    } catch {}\n  });\n\n  window.addEventListener('unhandledrejection', (event) => {\n    try {\n      const invoke =\n        window.__TAURI__?.core?.invoke || window.__TAURI_INTERNALS__?.invoke;\n      if (typeof invoke === 'function') {\n        invoke('log_from_frontend', {\n          level: 'error',\n          message: `[unhandled rejection] ${event.reason}`\n        });\n      }\n    } catch {}\n  });\n})();\n\"#;\n\nconst MENU_TOGGLE_DEVTOOLS_ID: &str = \"toggle_devtools\";\nconst MENU_OPEN_DEBUG_LOG_ID: &str = \"open_debug_log\";\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct AppConfig {\n    pub server_url: String,\n\n    #[serde(default = \"default_window_title\")]\n    pub window_title: String,\n\n    #[serde(default = \"default_show_menu_bar\")]\n    pub show_menu_bar: bool,\n\n    #[serde(default)]\n    pub hide_window_decorations: bool,\n}\n\nfn default_window_title() -> String {\n    \"Onyx\".to_string()\n}\n\nfn default_show_menu_bar() -> bool {\n    true\n}\n\nimpl Default for AppConfig {\n    fn default() -> Self {\n        Self {\n            server_url: DEFAULT_SERVER_URL.to_string(),\n            window_title: default_window_title(),\n            show_menu_bar: true,\n            hide_window_decorations: false,\n        }\n    }\n}\n\n/// Get the config directory path\nfn get_config_dir() -> Option<PathBuf> {\n    ProjectDirs::from(\"app\", \"onyx\", \"onyx-desktop\").map(|dirs| dirs.config_dir().to_path_buf())\n}\n\n/// Get the full config file path\nfn get_config_path() -> Option<PathBuf> {\n    get_config_dir().map(|dir| dir.join(CONFIG_FILE_NAME))\n}\n\n/// Load config from file, or create default if it doesn't exist\nfn load_config() -> (AppConfig, bool) {\n    let config_path = match get_config_path() {\n        Some(path) => path,\n        None => {\n            return (AppConfig::default(), false);\n        }\n    };\n\n    if !config_path.exists() {\n        return (AppConfig::default(), false);\n    }\n\n    match fs::read_to_string(&config_path) {\n        Ok(contents) => match serde_json::from_str(&contents) {\n            Ok(config) => (config, true),\n            Err(_) => (AppConfig::default(), false),\n        },\n        Err(_) => (AppConfig::default(), false),\n    }\n}\n\n/// Save config to file\nfn save_config(config: &AppConfig) -> Result<(), String> {\n    let config_dir = get_config_dir().ok_or(\"Could not determine config directory\")?;\n    let config_path = config_dir.join(CONFIG_FILE_NAME);\n\n    // Ensure config directory exists\n    fs::create_dir_all(&config_dir).map_err(|e| format!(\"Failed to create config dir: {}\", e))?;\n\n    let json = serde_json::to_string_pretty(config)\n        .map_err(|e| format!(\"Failed to serialize config: {}\", e))?;\n\n    fs::write(&config_path, json).map_err(|e| format!(\"Failed to write config: {}\", e))?;\n\n    Ok(())\n}\n\n// ============================================================================\n// Debug Mode\n// ============================================================================\n\nfn is_debug_mode() -> bool {\n    std::env::args().any(|arg| arg == \"--debug\") || std::env::var(\"ONYX_DEBUG\").is_ok()\n}\n\nfn get_debug_log_path() -> Option<PathBuf> {\n    get_config_dir().map(|dir| dir.join(\"frontend_debug.log\"))\n}\n\nfn init_debug_log_file() -> Option<fs::File> {\n    let log_path = get_debug_log_path()?;\n    if let Some(parent) = log_path.parent() {\n        let _ = fs::create_dir_all(parent);\n    }\n    fs::OpenOptions::new()\n        .create(true)\n        .append(true)\n        .open(&log_path)\n        .ok()\n}\n\nfn format_utc_timestamp() -> String {\n    let now = SystemTime::now()\n        .duration_since(SystemTime::UNIX_EPOCH)\n        .unwrap_or_default();\n    let total_secs = now.as_secs();\n    let millis = now.subsec_millis();\n\n    let days = total_secs / 86400;\n    let secs_of_day = total_secs % 86400;\n    let hours = secs_of_day / 3600;\n    let mins = (secs_of_day % 3600) / 60;\n    let secs = secs_of_day % 60;\n\n    // Days since Unix epoch -> Y/M/D via civil calendar arithmetic\n    let z = days as i64 + 719468;\n    let era = z / 146097;\n    let doe = z - era * 146097;\n    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;\n    let y = yoe + era * 400;\n    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);\n    let mp = (5 * doy + 2) / 153;\n    let d = doy - (153 * mp + 2) / 5 + 1;\n    let m = if mp < 10 { mp + 3 } else { mp - 9 };\n    let y = if m <= 2 { y + 1 } else { y };\n\n    format!(\n        \"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}Z\",\n        y, m, d, hours, mins, secs, millis\n    )\n}\n\nfn inject_console_capture(webview: &Webview) {\n    let _ = webview.eval(CONSOLE_CAPTURE_SCRIPT);\n}\n\nfn maybe_open_devtools(app: &AppHandle, window: &tauri::WebviewWindow) {\n    #[cfg(any(debug_assertions, feature = \"devtools\"))]\n    {\n        let state = app.state::<ConfigState>();\n        if state.debug_mode {\n            window.open_devtools();\n        }\n    }\n    #[cfg(not(any(debug_assertions, feature = \"devtools\")))]\n    {\n        let _ = (app, window);\n    }\n}\n\n// Global config state\nstruct ConfigState {\n    config: RwLock<AppConfig>,\n    config_initialized: RwLock<bool>,\n    app_base_url: RwLock<Option<Url>>,\n    menu_temporarily_visible: RwLock<bool>,\n    debug_mode: bool,\n    debug_log_file: Mutex<Option<fs::File>>,\n}\n\nfn focus_main_window(app: &AppHandle) {\n    if let Some(window) = app.get_webview_window(\"main\") {\n        let _ = window.unminimize();\n        let _ = window.show();\n        let _ = window.set_focus();\n    } else {\n        trigger_new_window(app);\n    }\n}\n\nfn trigger_new_chat(app: &AppHandle) {\n    let state = app.state::<ConfigState>();\n    let server_url = state.config.read().unwrap().server_url.clone();\n\n    if let Some(window) = app.get_webview_window(\"main\") {\n        let url = format!(\"{}/chat\", server_url);\n        let _ = window.eval(&format!(\"window.location.href = '{}'\", url));\n    }\n}\n\nfn trigger_new_window(app: &AppHandle) {\n    let state = app.state::<ConfigState>();\n    let server_url = state.config.read().unwrap().server_url.clone();\n    let handle = app.clone();\n\n    tauri::async_runtime::spawn(async move {\n        let window_label = format!(\"onyx-{}\", uuid::Uuid::new_v4());\n        let builder = WebviewWindowBuilder::new(\n            &handle,\n            &window_label,\n            WebviewUrl::External(server_url.parse().unwrap()),\n        )\n        .title(\"Onyx\")\n        .inner_size(1200.0, 800.0)\n        .min_inner_size(800.0, 600.0)\n        .transparent(true);\n\n        #[cfg(target_os = \"macos\")]\n        let builder = builder\n            .title_bar_style(tauri::TitleBarStyle::Overlay)\n            .hidden_title(true);\n\n        #[cfg(target_os = \"linux\")]\n        let builder = builder.background_color(tauri::window::Color(0x1a, 0x1a, 0x2e, 0xff));\n\n        if let Ok(window) = builder.build() {\n            #[cfg(target_os = \"macos\")]\n            {\n                let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);\n                inject_titlebar(window.clone());\n            }\n\n            apply_settings_to_window(&handle, &window);\n            maybe_open_devtools(&handle, &window);\n            let _ = window.set_focus();\n        }\n    });\n}\n\nfn open_docs() {\n    let _ = open_in_default_browser(\"https://docs.onyx.app\");\n}\n\nfn open_settings(app: &AppHandle) {\n    // Navigate main window to the settings page (index.html) with settings flag\n    let state = app.state::<ConfigState>();\n    let settings_url = state\n        .app_base_url\n        .read()\n        .unwrap()\n        .as_ref()\n        .cloned()\n        .and_then(|mut url| {\n            url.set_query(None);\n            url.set_fragment(Some(\"settings\"));\n            url.set_path(\"/\");\n            Some(url)\n        })\n        .or_else(|| Url::parse(\"tauri://localhost/#settings\").ok());\n\n    if let Some(window) = app.get_webview_window(\"main\") {\n        if let Some(url) = settings_url {\n            let _ = window.navigate(url);\n        }\n    }\n}\n\nfn same_origin(left: &Url, right: &Url) -> bool {\n    left.scheme() == right.scheme()\n        && left.host_str() == right.host_str()\n        && left.port_or_known_default() == right.port_or_known_default()\n}\n\nfn is_chat_session_url(url: &Url) -> bool {\n    url.path().starts_with(\"/app\") && url.query_pairs().any(|(key, _)| key == \"chatId\")\n}\n\nfn should_open_in_external_browser(current_url: &Url, destination_url: &Url) -> bool {\n    if !is_chat_session_url(current_url) {\n        return false;\n    }\n\n    match destination_url.scheme() {\n        \"mailto\" | \"tel\" => true,\n        \"http\" | \"https\" => !same_origin(current_url, destination_url),\n        _ => false,\n    }\n}\n\nfn open_in_default_browser(url: &str) -> bool {\n    #[cfg(target_os = \"macos\")]\n    {\n        return Command::new(\"open\").arg(url).status().is_ok();\n    }\n    #[cfg(target_os = \"linux\")]\n    {\n        return Command::new(\"xdg-open\").arg(url).status().is_ok();\n    }\n    #[cfg(target_os = \"windows\")]\n    {\n        return Command::new(\"rundll32\")\n            .arg(\"url.dll,FileProtocolHandler\")\n            .arg(url)\n            .status()\n            .is_ok();\n    }\n    #[allow(unreachable_code)]\n    false\n}\n\n#[tauri::command]\nfn open_in_browser(url: String) -> Result<(), String> {\n    let parsed_url = Url::parse(&url).map_err(|_| \"Invalid URL\".to_string())?;\n    match parsed_url.scheme() {\n        \"http\" | \"https\" | \"mailto\" | \"tel\" => {}\n        _ => return Err(\"Unsupported URL scheme\".to_string()),\n    }\n\n    if open_in_default_browser(parsed_url.as_str()) {\n        Ok(())\n    } else {\n        Err(\"Failed to open URL in default browser\".to_string())\n    }\n}\n\nfn inject_chat_link_intercept(webview: &Webview) {\n    let _ = webview.eval(CHAT_LINK_INTERCEPT_SCRIPT);\n}\n\nfn handle_toggle_devtools(app: &AppHandle) {\n    #[cfg(any(debug_assertions, feature = \"devtools\"))]\n    {\n        let windows: Vec<_> = app.webview_windows().into_values().collect();\n        let any_open = windows.iter().any(|w| w.is_devtools_open());\n        for window in &windows {\n            if any_open {\n                window.close_devtools();\n            } else {\n                window.open_devtools();\n            }\n        }\n    }\n    #[cfg(not(any(debug_assertions, feature = \"devtools\")))]\n    {\n        let _ = app;\n    }\n}\n\nfn handle_open_debug_log() {\n    let log_path = match get_debug_log_path() {\n        Some(p) => p,\n        None => return,\n    };\n\n    if !log_path.exists() {\n        eprintln!(\"[ONYX DEBUG] Log file does not exist yet: {:?}\", log_path);\n        return;\n    }\n\n    let url_path = log_path.to_string_lossy().replace('\\\\', \"/\");\n    let _ = open_in_default_browser(&format!(\n        \"file:///{}\",\n        url_path.trim_start_matches('/')\n    ));\n}\n\n// ============================================================================\n// Tauri Commands\n// ============================================================================\n\n#[tauri::command]\nfn log_from_frontend(level: String, message: String, state: tauri::State<ConfigState>) {\n    if !state.debug_mode {\n        return;\n    }\n    let timestamp = format_utc_timestamp();\n    let log_line = format!(\"[{}] [{}] {}\", timestamp, level.to_uppercase(), message);\n\n    eprintln!(\"{}\", log_line);\n\n    if let Ok(mut guard) = state.debug_log_file.lock() {\n        if let Some(ref mut file) = *guard {\n            let _ = writeln!(file, \"{}\", log_line);\n            let _ = file.flush();\n        }\n    }\n}\n\n/// Get the current server URL\n#[tauri::command]\nfn get_server_url(state: tauri::State<ConfigState>) -> String {\n    state.config.read().unwrap().server_url.clone()\n}\n\n#[derive(Serialize)]\nstruct BootstrapState {\n    server_url: String,\n    config_exists: bool,\n}\n\n/// Get the server URL plus whether a config file exists\n#[tauri::command]\nfn get_bootstrap_state(state: tauri::State<ConfigState>) -> BootstrapState {\n    let server_url = state.config.read().unwrap().server_url.clone();\n    let config_initialized = *state.config_initialized.read().unwrap();\n    let config_exists =\n        config_initialized && get_config_path().map(|path| path.exists()).unwrap_or(false);\n\n    BootstrapState {\n        server_url,\n        config_exists,\n    }\n}\n\n/// Set a new server URL and save to config\n#[tauri::command]\nfn set_server_url(state: tauri::State<ConfigState>, url: String) -> Result<String, String> {\n    // Validate URL\n    if !url.starts_with(\"http://\") && !url.starts_with(\"https://\") {\n        return Err(\"URL must start with http:// or https://\".to_string());\n    }\n\n    let mut config = state.config.write().unwrap();\n    config.server_url = url.trim_end_matches('/').to_string();\n    save_config(&config)?;\n    *state.config_initialized.write().unwrap() = true;\n\n    Ok(config.server_url.clone())\n}\n\n/// Get the config file path (so users know where to edit)\n#[tauri::command]\nfn get_config_path_cmd() -> Result<String, String> {\n    get_config_path()\n        .map(|p| p.to_string_lossy().to_string())\n        .ok_or_else(|| \"Could not determine config path\".to_string())\n}\n\n/// Open the config file in the default editor\n#[tauri::command]\nfn open_config_file() -> Result<(), String> {\n    let config_path = get_config_path().ok_or(\"Could not determine config path\")?;\n\n    // Ensure config exists\n    if !config_path.exists() {\n        save_config(&AppConfig::default())?;\n    }\n\n    #[cfg(target_os = \"macos\")]\n    {\n        std::process::Command::new(\"open\")\n            .arg(\"-t\")\n            .arg(&config_path)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open config: {}\", e))?;\n    }\n\n    #[cfg(target_os = \"linux\")]\n    {\n        std::process::Command::new(\"xdg-open\")\n            .arg(&config_path)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open config: {}\", e))?;\n    }\n\n    #[cfg(target_os = \"windows\")]\n    {\n        std::process::Command::new(\"notepad\")\n            .arg(&config_path)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open config: {}\", e))?;\n    }\n\n    Ok(())\n}\n\n/// Open the config directory in file manager\n#[tauri::command]\nfn open_config_directory() -> Result<(), String> {\n    let config_dir = get_config_dir().ok_or(\"Could not determine config directory\")?;\n\n    // Ensure directory exists\n    fs::create_dir_all(&config_dir).map_err(|e| format!(\"Failed to create config dir: {}\", e))?;\n\n    #[cfg(target_os = \"macos\")]\n    {\n        std::process::Command::new(\"open\")\n            .arg(&config_dir)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open directory: {}\", e))?;\n    }\n\n    #[cfg(target_os = \"linux\")]\n    {\n        std::process::Command::new(\"xdg-open\")\n            .arg(&config_dir)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open directory: {}\", e))?;\n    }\n\n    #[cfg(target_os = \"windows\")]\n    {\n        std::process::Command::new(\"explorer\")\n            .arg(&config_dir)\n            .spawn()\n            .map_err(|e| format!(\"Failed to open directory: {}\", e))?;\n    }\n\n    Ok(())\n}\n\n/// Navigate to a specific path on the configured server\n#[tauri::command]\nfn navigate_to(window: tauri::WebviewWindow, state: tauri::State<ConfigState>, path: &str) {\n    let base_url = state.config.read().unwrap().server_url.clone();\n    let url = format!(\"{}{}\", base_url, path);\n    let _ = window.eval(&format!(\"window.location.href = '{}'\", url));\n}\n\n/// Reload the current page\n#[tauri::command]\nfn reload_page(window: tauri::WebviewWindow) {\n    let _ = window.eval(\"window.location.reload()\");\n}\n\n/// Go back in history\n#[tauri::command]\nfn go_back(window: tauri::WebviewWindow) {\n    let _ = window.eval(\"window.history.back()\");\n}\n\n/// Go forward in history\n#[tauri::command]\nfn go_forward(window: tauri::WebviewWindow) {\n    let _ = window.eval(\"window.history.forward()\");\n}\n\n/// Open a new window\n#[tauri::command]\nasync fn new_window(app: AppHandle, state: tauri::State<'_, ConfigState>) -> Result<(), String> {\n    let server_url = state.config.read().unwrap().server_url.clone();\n    let window_label = format!(\"onyx-{}\", uuid::Uuid::new_v4());\n\n    let builder = WebviewWindowBuilder::new(\n        &app,\n        &window_label,\n        WebviewUrl::External(\n            server_url\n                .parse()\n                .map_err(|e| format!(\"Invalid URL: {}\", e))?,\n        ),\n    )\n    .title(\"Onyx\")\n    .inner_size(1200.0, 800.0)\n    .min_inner_size(800.0, 600.0)\n    .transparent(true);\n\n    #[cfg(target_os = \"macos\")]\n    let builder = builder\n        .title_bar_style(tauri::TitleBarStyle::Overlay)\n        .hidden_title(true);\n\n    #[cfg(target_os = \"linux\")]\n    let builder = builder.background_color(tauri::window::Color(0x1a, 0x1a, 0x2e, 0xff));\n\n    let window = builder.build().map_err(|e| e.to_string())?;\n\n    #[cfg(target_os = \"macos\")]\n    {\n        let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);\n        inject_titlebar(window.clone());\n    }\n\n    apply_settings_to_window(&app, &window);\n    maybe_open_devtools(&app, &window);\n\n    Ok(())\n}\n\n/// Reset config to defaults\n#[tauri::command]\nfn reset_config(state: tauri::State<ConfigState>) -> Result<(), String> {\n    let mut config = state.config.write().unwrap();\n    *config = AppConfig::default();\n    save_config(&config)?;\n    *state.config_initialized.write().unwrap() = true;\n    Ok(())\n}\n\n#[cfg(target_os = \"macos\")]\nfn inject_titlebar(window: WebviewWindow) {\n    let script = TITLEBAR_SCRIPT.to_string();\n    tauri::async_runtime::spawn(async move {\n        // Keep trying for a few seconds to survive navigations and slow loads\n        let delays = [0u64, 200, 600, 1200, 2000, 4000, 6000, 8000, 10000];\n        for delay in delays {\n            if delay > 0 {\n                sleep(Duration::from_millis(delay)).await;\n            }\n            let _ = window.eval(&script);\n        }\n    });\n}\n\n/// Start dragging the window\n#[tauri::command]\nasync fn start_drag_window(window: tauri::Window) -> Result<(), String> {\n    window.start_dragging().map_err(|e| e.to_string())\n}\n\n// ============================================================================\n// Window Settings\n// ============================================================================\n\nfn find_check_menu_item(\n    app: &AppHandle,\n    id: &str,\n) -> Option<CheckMenuItem<tauri::Wry>> {\n    let menu = app.menu()?;\n    for item in menu.items().ok()? {\n        if let Some(submenu) = item.as_submenu() {\n            for sub_item in submenu.items().ok()? {\n                if let Some(check) = sub_item.as_check_menuitem() {\n                    if check.id().as_ref() == id {\n                        return Some(check.clone());\n                    }\n                }\n            }\n        }\n    }\n    None\n}\n\nfn apply_settings_to_window(app: &AppHandle, window: &tauri::WebviewWindow) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    let state = app.state::<ConfigState>();\n    let config = state.config.read().unwrap();\n    let temp_visible = *state.menu_temporarily_visible.read().unwrap();\n    if !config.show_menu_bar && !temp_visible {\n        let _ = window.hide_menu();\n    }\n    if config.hide_window_decorations {\n        let _ = window.set_decorations(false);\n    }\n}\n\nfn handle_menu_bar_toggle(app: &AppHandle) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    let state = app.state::<ConfigState>();\n    let show = {\n        let mut config = state.config.write().unwrap();\n        config.show_menu_bar = !config.show_menu_bar;\n        let _ = save_config(&config);\n        config.show_menu_bar\n    };\n\n    *state.menu_temporarily_visible.write().unwrap() = false;\n\n    for (_, window) in app.webview_windows() {\n        if show {\n            let _ = window.show_menu();\n        } else {\n            let _ = window.hide_menu();\n        }\n    }\n}\n\nfn handle_decorations_toggle(app: &AppHandle) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    let state = app.state::<ConfigState>();\n    let hide = {\n        let mut config = state.config.write().unwrap();\n        config.hide_window_decorations = !config.hide_window_decorations;\n        let _ = save_config(&config);\n        config.hide_window_decorations\n    };\n\n    for (_, window) in app.webview_windows() {\n        let _ = window.set_decorations(!hide);\n    }\n}\n\n#[tauri::command]\nfn toggle_menu_bar(app: AppHandle) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    handle_menu_bar_toggle(&app);\n\n    let state = app.state::<ConfigState>();\n    let checked = state.config.read().unwrap().show_menu_bar;\n    if let Some(check) = find_check_menu_item(&app, MENU_SHOW_MENU_BAR_ID) {\n        let _ = check.set_checked(checked);\n    }\n}\n\n#[tauri::command]\nfn show_menu_bar_temporarily(app: AppHandle) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    let state = app.state::<ConfigState>();\n    if state.config.read().unwrap().show_menu_bar {\n        return;\n    }\n\n    let mut temp = state.menu_temporarily_visible.write().unwrap();\n    if *temp {\n        return;\n    }\n    *temp = true;\n    drop(temp);\n\n    for (_, window) in app.webview_windows() {\n        let _ = window.show_menu();\n    }\n}\n\n#[tauri::command]\nfn hide_menu_bar_temporary(app: AppHandle) {\n    if cfg!(target_os = \"macos\") {\n        return;\n    }\n    let state = app.state::<ConfigState>();\n    let mut temp = state.menu_temporarily_visible.write().unwrap();\n    if !*temp {\n        return;\n    }\n    *temp = false;\n    drop(temp);\n\n    if state.config.read().unwrap().show_menu_bar {\n        return;\n    }\n\n    for (_, window) in app.webview_windows() {\n        let _ = window.hide_menu();\n    }\n}\n\n// ============================================================================\n// Menu Setup\n// ============================================================================\n\nfn setup_app_menu(app: &AppHandle) -> tauri::Result<()> {\n    let menu = app.menu().unwrap_or(Menu::default(app)?);\n\n    let new_chat_item = MenuItem::with_id(app, \"new_chat\", \"New Chat\", true, Some(\"CmdOrCtrl+N\"))?;\n    let new_window_item = MenuItem::with_id(\n        app,\n        \"new_window\",\n        \"New Window\",\n        true,\n        Some(\"CmdOrCtrl+Shift+N\"),\n    )?;\n    let settings_item = MenuItem::with_id(\n        app,\n        \"open_settings\",\n        \"Settings...\",\n        true,\n        Some(\"CmdOrCtrl+Comma\"),\n    )?;\n    let docs_item = MenuItem::with_id(app, \"open_docs\", \"Onyx Documentation\", true, None::<&str>)?;\n\n    if let Some(file_menu) = menu\n        .items()?\n        .into_iter()\n        .filter_map(|item| item.as_submenu().cloned())\n        .find(|submenu| submenu.text().ok().as_deref() == Some(\"File\"))\n    {\n        file_menu.insert_items(&[&new_chat_item, &new_window_item, &settings_item], 0)?;\n    } else {\n        let file_menu = SubmenuBuilder::new(app, \"File\")\n            .items(&[\n                &new_chat_item,\n                &new_window_item,\n                &settings_item,\n                &PredefinedMenuItem::close_window(app, None)?,\n            ])\n            .build()?;\n        menu.prepend(&file_menu)?;\n    }\n\n    #[cfg(not(target_os = \"macos\"))]\n    {\n        let config = app.state::<ConfigState>();\n        let config_guard = config.config.read().unwrap();\n\n        let show_menu_bar_item = CheckMenuItem::with_id(\n            app,\n            MENU_SHOW_MENU_BAR_ID,\n            \"Show Menu Bar\",\n            true,\n            config_guard.show_menu_bar,\n            None::<&str>,\n        )?;\n\n        let hide_decorations_item = CheckMenuItem::with_id(\n            app,\n            MENU_HIDE_DECORATIONS_ID,\n            \"Hide Window Decorations\",\n            true,\n            config_guard.hide_window_decorations,\n            None::<&str>,\n        )?;\n\n        drop(config_guard);\n\n        if let Some(window_menu) = menu\n            .items()?\n            .into_iter()\n            .filter_map(|item| item.as_submenu().cloned())\n            .find(|submenu| submenu.text().ok().as_deref() == Some(\"Window\"))\n        {\n            window_menu.append(&show_menu_bar_item)?;\n            window_menu.append(&hide_decorations_item)?;\n        } else {\n            let window_menu = SubmenuBuilder::new(app, \"Window\")\n                .item(&show_menu_bar_item)\n                .item(&hide_decorations_item)\n                .build()?;\n\n            let items = menu.items()?;\n            let help_idx = items\n                .iter()\n                .position(|item| {\n                    item.as_submenu()\n                        .and_then(|s| s.text().ok())\n                        .as_deref()\n                        == Some(\"Help\")\n                })\n                .unwrap_or(items.len());\n            menu.insert(&window_menu, help_idx)?;\n        }\n    }\n\n    if let Some(help_menu) = menu\n        .get(HELP_SUBMENU_ID)\n        .and_then(|item| item.as_submenu().cloned())\n    {\n        help_menu.append(&docs_item)?;\n    } else {\n        let help_menu = SubmenuBuilder::with_id(app, HELP_SUBMENU_ID, \"Help\")\n            .item(&docs_item)\n            .build()?;\n        menu.append(&help_menu)?;\n    }\n\n    let state = app.state::<ConfigState>();\n    if state.debug_mode {\n        let toggle_devtools_item = MenuItem::with_id(\n            app,\n            MENU_TOGGLE_DEVTOOLS_ID,\n            \"Toggle DevTools\",\n            true,\n            Some(\"F12\"),\n        )?;\n        let open_log_item = MenuItem::with_id(\n            app,\n            MENU_OPEN_DEBUG_LOG_ID,\n            \"Open Debug Log\",\n            true,\n            None::<&str>,\n        )?;\n\n        let debug_menu = SubmenuBuilder::new(app, \"Debug\")\n            .item(&toggle_devtools_item)\n            .item(&open_log_item)\n            .build()?;\n        menu.append(&debug_menu)?;\n    }\n\n    app.set_menu(menu)?;\n    Ok(())\n}\n\nfn build_tray_menu(app: &AppHandle) -> tauri::Result<Menu<Wry>> {\n    let open_app = MenuItem::with_id(app, TRAY_MENU_OPEN_APP_ID, \"Open Onyx\", true, None::<&str>)?;\n    let open_chat = MenuItem::with_id(\n        app,\n        TRAY_MENU_OPEN_CHAT_ID,\n        \"Open Chat Window\",\n        true,\n        None::<&str>,\n    )?;\n    let show_in_menu_bar = CheckMenuItem::with_id(\n        app,\n        TRAY_MENU_SHOW_IN_BAR_ID,\n        \"Show in Menu Bar\",\n        true,\n        true,\n        None::<&str>,\n    )?;\n    // Keep it visible/pinned without letting users uncheck (avoids orphaning the tray)\n    let _ = show_in_menu_bar.set_enabled(false);\n    let quit = PredefinedMenuItem::quit(app, Some(\"Quit Onyx\"))?;\n\n    MenuBuilder::new(app)\n        .item(&open_app)\n        .item(&open_chat)\n        .separator()\n        .item(&show_in_menu_bar)\n        .separator()\n        .item(&quit)\n        .build()\n}\n\nfn handle_tray_menu_event(app: &AppHandle, id: &str) {\n    match id {\n        TRAY_MENU_OPEN_APP_ID => {\n            focus_main_window(app);\n        }\n        TRAY_MENU_OPEN_CHAT_ID => {\n            focus_main_window(app);\n            trigger_new_chat(app);\n        }\n        TRAY_MENU_QUIT_ID => {\n            app.exit(0);\n        }\n        TRAY_MENU_SHOW_IN_BAR_ID => {\n            // No-op for now; the item stays checked/disabled to indicate it's pinned.\n        }\n        _ => {}\n    }\n}\n\nfn setup_tray_icon(app: &AppHandle) -> tauri::Result<()> {\n    let mut builder = TrayIconBuilder::with_id(TRAY_ID).tooltip(\"Onyx\");\n\n    let tray_icon = Image::from_bytes(TRAY_ICON_BYTES)\n        .ok()\n        .or_else(|| app.default_window_icon().cloned());\n\n    if let Some(icon) = tray_icon {\n        builder = builder.icon(icon);\n\n        #[cfg(target_os = \"macos\")]\n        {\n            builder = builder.icon_as_template(true);\n        }\n    }\n\n    if let Ok(menu) = build_tray_menu(app) {\n        builder = builder.menu(&menu);\n    }\n\n    builder\n        .on_tray_icon_event(|tray, event| {\n            if let TrayIconEvent::Click { .. } = event {\n                focus_main_window(tray.app_handle());\n            }\n        })\n        .on_menu_event(|app, event| handle_tray_menu_event(app, event.id().as_ref()))\n        .build(app)?;\n\n    Ok(())\n}\n\n// ============================================================================\n// Main\n// ============================================================================\n\nfn main() {\n    let (config, config_initialized) = load_config();\n    let debug_mode = is_debug_mode();\n\n    let debug_log_file = if debug_mode {\n        eprintln!(\"[ONYX DEBUG] Debug mode enabled\");\n        if let Some(path) = get_debug_log_path() {\n            eprintln!(\"[ONYX DEBUG] Frontend logs: {}\", path.display());\n        }\n        eprintln!(\"[ONYX DEBUG] DevTools will open automatically\");\n        eprintln!(\"[ONYX DEBUG] Capturing console.log/warn/error/info/debug from webview\");\n        init_debug_log_file()\n    } else {\n        None\n    };\n\n    tauri::Builder::default()\n        .plugin(tauri_plugin_shell::init())\n        .plugin(\n            tauri::plugin::Builder::<Wry>::new(\"chat-external-navigation-handler\")\n                .on_navigation(|webview, destination_url| {\n                    let Ok(current_url) = webview.url() else {\n                        return true;\n                    };\n\n                    if should_open_in_external_browser(&current_url, destination_url) {\n                        if !open_in_default_browser(destination_url.as_str()) {\n                            eprintln!(\n                                \"Failed to open external URL in default browser: {}\",\n                                destination_url\n                            );\n                        }\n                        return false;\n                    }\n\n                    true\n                })\n                .build(),\n        )\n        .plugin(tauri_plugin_window_state::Builder::default().build())\n        .manage(ConfigState {\n            config: RwLock::new(config),\n            config_initialized: RwLock::new(config_initialized),\n            app_base_url: RwLock::new(None),\n            menu_temporarily_visible: RwLock::new(false),\n            debug_mode,\n            debug_log_file: Mutex::new(debug_log_file),\n        })\n        .invoke_handler(tauri::generate_handler![\n            get_server_url,\n            get_bootstrap_state,\n            set_server_url,\n            get_config_path_cmd,\n            open_in_browser,\n            open_config_file,\n            open_config_directory,\n            navigate_to,\n            reload_page,\n            go_back,\n            go_forward,\n            new_window,\n            reset_config,\n            start_drag_window,\n            toggle_menu_bar,\n            show_menu_bar_temporarily,\n            hide_menu_bar_temporary,\n            log_from_frontend\n        ])\n        .on_menu_event(|app, event| match event.id().as_ref() {\n            \"open_docs\" => open_docs(),\n            \"new_chat\" => trigger_new_chat(app),\n            \"new_window\" => trigger_new_window(app),\n            \"open_settings\" => open_settings(app),\n            \"show_menu_bar\" => handle_menu_bar_toggle(app),\n            \"hide_window_decorations\" => handle_decorations_toggle(app),\n            MENU_TOGGLE_DEVTOOLS_ID => handle_toggle_devtools(app),\n            MENU_OPEN_DEBUG_LOG_ID => handle_open_debug_log(),\n            _ => {}\n        })\n        .setup(move |app| {\n            let app_handle = app.handle();\n\n            if let Err(e) = setup_app_menu(&app_handle) {\n                eprintln!(\"Failed to setup menu: {}\", e);\n            }\n\n            if let Err(e) = setup_tray_icon(&app_handle) {\n                eprintln!(\"Failed to setup tray icon: {}\", e);\n            }\n\n            // Setup main window with vibrancy effect\n            if let Some(window) = app.get_webview_window(\"main\") {\n                // Apply vibrancy effect for translucent glass look\n                #[cfg(target_os = \"macos\")]\n                {\n                    let _ = apply_vibrancy(&window, NSVisualEffectMaterial::Sidebar, None, None);\n                }\n\n                if let Ok(url) = window.url() {\n                    let mut base_url = url;\n                    base_url.set_query(None);\n                    base_url.set_fragment(None);\n                    base_url.set_path(\"/\");\n                    *app.state::<ConfigState>().app_base_url.write().unwrap() = Some(base_url);\n                }\n\n                #[cfg(target_os = \"macos\")]\n                inject_titlebar(window.clone());\n\n                apply_settings_to_window(&app_handle, &window);\n                maybe_open_devtools(&app_handle, &window);\n\n                let _ = window.set_focus();\n            }\n\n            Ok(())\n        })\n        .on_page_load(|webview: &Webview, _payload: &PageLoadPayload| {\n            inject_chat_link_intercept(webview);\n\n            {\n                let app = webview.app_handle();\n                let state = app.state::<ConfigState>();\n                if state.debug_mode {\n                    inject_console_capture(webview);\n                }\n            }\n\n            #[cfg(not(target_os = \"macos\"))]\n            {\n                let _ = webview.eval(MENU_KEY_HANDLER_SCRIPT);\n\n                let app = webview.app_handle();\n                let state = app.state::<ConfigState>();\n                let config = state.config.read().unwrap();\n                let temp_visible = *state.menu_temporarily_visible.read().unwrap();\n                let label = webview.label().to_string();\n                if !config.show_menu_bar && !temp_visible {\n                    if let Some(win) = app.get_webview_window(&label) {\n                        let _ = win.hide_menu();\n                    }\n                }\n                if config.hide_window_decorations {\n                    if let Some(win) = app.get_webview_window(&label) {\n                        let _ = win.set_decorations(false);\n                    }\n                }\n            }\n\n            #[cfg(target_os = \"macos\")]\n            let _ = webview.eval(TITLEBAR_SCRIPT);\n        })\n        .run(tauri::generate_context!())\n        .expect(\"error while running tauri application\");\n}\n"
  },
  {
    "path": "desktop/src-tauri/tauri.conf.json",
    "content": "{\n  \"$schema\": \"https://schema.tauri.app/config/2.0.0\",\n  \"productName\": \"Onyx\",\n  \"version\": \"0.0.0-dev\",\n  \"identifier\": \"app.onyx.desktop\",\n  \"build\": {\n    \"beforeBuildCommand\": \"\",\n    \"beforeDevCommand\": \"\",\n    \"frontendDist\": \"../src\"\n  },\n  \"app\": {\n    \"withGlobalTauri\": true,\n    \"windows\": [\n      {\n        \"title\": \"Onyx\",\n        \"label\": \"main\",\n        \"url\": \"index.html\",\n        \"width\": 1200,\n        \"height\": 800,\n        \"minWidth\": 800,\n        \"minHeight\": 600,\n        \"resizable\": true,\n        \"fullscreen\": false,\n        \"decorations\": true,\n        \"transparent\": true,\n        \"backgroundColor\": \"#1a1a2e\",\n        \"titleBarStyle\": \"Overlay\",\n        \"hiddenTitle\": true,\n        \"acceptFirstMouse\": true,\n        \"tabbingIdentifier\": \"onyx\"\n      }\n    ],\n    \"security\": {\n      \"csp\": null\n    },\n    \"macOSPrivateApi\": true\n  },\n  \"bundle\": {\n    \"active\": true,\n    \"targets\": \"all\",\n    \"icon\": [\n      \"icons/32x32.png\",\n      \"icons/128x128.png\",\n      \"icons/128x128@2x.png\",\n      \"icons/icon.icns\",\n      \"icons/icon.ico\"\n    ],\n    \"category\": \"Productivity\",\n    \"shortDescription\": \"Onyx Cloud Desktop App\",\n    \"longDescription\": \"A lightweight desktop wrapper for Onyx Cloud - your AI-powered knowledge assistant.\",\n    \"macOS\": {\n      \"entitlements\": null,\n      \"exceptionDomain\": \"cloud.onyx.app\",\n      \"minimumSystemVersion\": \"10.15\",\n      \"signingIdentity\": null,\n      \"dmg\": {\n        \"windowSize\": {\n          \"width\": 660,\n          \"height\": 400\n        }\n      }\n    }\n  },\n  \"plugins\": {\n    \"shell\": {\n      \"open\": true\n    }\n  }\n}\n"
  },
  {
    "path": "docker-bake.hcl",
    "content": "group \"default\" {\n  targets = [\"backend\", \"model-server\", \"web\"]\n}\n\nvariable \"BACKEND_REPOSITORY\" {\n  default = \"onyxdotapp/onyx-backend\"\n}\n\nvariable \"WEB_SERVER_REPOSITORY\" {\n  default = \"onyxdotapp/onyx-web-server\"\n}\n\nvariable \"MODEL_SERVER_REPOSITORY\" {\n  default = \"onyxdotapp/onyx-model-server\"\n}\n\nvariable \"INTEGRATION_REPOSITORY\" {\n  default = \"onyxdotapp/onyx-integration\"\n}\n\nvariable \"CLI_REPOSITORY\" {\n  default = \"onyxdotapp/onyx-cli\"\n}\n\nvariable \"TAG\" {\n  default = \"latest\"\n}\n\ntarget \"backend\" {\n  context    = \"backend\"\n  dockerfile = \"Dockerfile\"\n\n  cache-from = [\"type=registry,ref=${BACKEND_REPOSITORY}:latest\"]\n  cache-to   = [\"type=inline\"]\n\n  tags      = [\"${BACKEND_REPOSITORY}:${TAG}\"]\n}\n\ntarget \"web\" {\n  context    = \"web\"\n  dockerfile = \"Dockerfile\"\n\n  cache-from = [\"type=registry,ref=${WEB_SERVER_REPOSITORY}:latest\"]\n  cache-to   = [\"type=inline\"]\n\n  tags      = [\"${WEB_SERVER_REPOSITORY}:${TAG}\"]\n}\n\ntarget \"model-server\" {\n  context = \"backend\"\n\n  dockerfile = \"Dockerfile.model_server\"\n\n  cache-from = [\"type=registry,ref=${MODEL_SERVER_REPOSITORY}:latest\"]\n  cache-to   = [\"type=inline\"]\n\n  tags      = [\"${MODEL_SERVER_REPOSITORY}:${TAG}\"]\n}\n\ntarget \"integration\" {\n  context    = \"backend\"\n  dockerfile = \"tests/integration/Dockerfile\"\n\n  // Provide the base image via build context from the backend target\n  contexts = {\n    base = \"target:backend\"\n  }\n\n  tags      = [\"${INTEGRATION_REPOSITORY}:${TAG}\"]\n}\n\ntarget \"cli\" {\n  context    = \"cli\"\n  dockerfile = \"Dockerfile\"\n\n  cache-from = [\"type=registry,ref=${CLI_REPOSITORY}:latest\"]\n  cache-to   = [\"type=inline\"]\n\n  tags      = [\"${CLI_REPOSITORY}:${TAG}\"]\n}\n"
  },
  {
    "path": "docs/METRICS.md",
    "content": "# Onyx Prometheus Metrics Reference\n\n## Adding New Metrics\n\nAll Prometheus metrics live in the `backend/onyx/server/metrics/` package. Follow these steps to add a new metric.\n\n### 1. Choose the right file (or create a new one)\n\n| File | Purpose |\n|------|---------|\n| `metrics/slow_requests.py` | Slow request counter + callback |\n| `metrics/postgres_connection_pool.py` | SQLAlchemy connection pool metrics |\n| `metrics/prometheus_setup.py` | FastAPI instrumentator config (orchestrator) |\n\nIf your metric is a standalone concern (e.g. cache hit rates, queue depths), create a new file under `metrics/` and keep one metric concept per file.\n\n### 2. Define the metric\n\nUse `prometheus_client` types directly at module level:\n\n```python\n# metrics/my_metric.py\nfrom prometheus_client import Counter\n\n_my_counter = Counter(\n    \"onyx_my_counter_total\",          # Always prefix with onyx_\n    \"Human-readable description\",\n    [\"label_a\", \"label_b\"],           # Keep label cardinality low\n)\n```\n\n**Naming conventions:**\n- Prefix all metric names with `onyx_`\n- Counters: `_total` suffix (e.g. `onyx_api_slow_requests_total`)\n- Histograms: `_seconds` or `_bytes` suffix for durations/sizes\n- Gauges: no special suffix\n\n**Label cardinality:** Avoid high-cardinality labels (raw user IDs, UUIDs, raw paths). Use route templates like `/api/items/{item_id}` instead of `/api/items/abc-123`.\n\n### 3. Wire it into the instrumentator (if request-scoped)\n\nIf your metric needs to run on every HTTP request, write a callback and register it in `prometheus_setup.py`:\n\n```python\n# metrics/my_metric.py\nfrom prometheus_fastapi_instrumentator.metrics import Info\n\ndef my_metric_callback(info: Info) -> None:\n    _my_counter.labels(label_a=info.method, label_b=info.modified_handler).inc()\n```\n\n```python\n# metrics/prometheus_setup.py\nfrom onyx.server.metrics.my_metric import my_metric_callback\n\n# Inside setup_prometheus_metrics():\ninstrumentator.add(my_metric_callback)\n```\n\n### 4. Wire it into setup_prometheus_metrics (if infrastructure-scoped)\n\nFor metrics that attach to engines, pools, or background systems, add a setup function and call it from `setup_prometheus_metrics()` in `metrics/prometheus_setup.py`:\n\n```python\n# metrics/my_metric.py\ndef setup_my_metrics(resource: SomeResource) -> None:\n    # Register collectors, attach event listeners, etc.\n    ...\n```\n\n```python\n# metrics/prometheus_setup.py — inside setup_prometheus_metrics()\nfrom onyx.server.metrics.my_metric import setup_my_metrics\n\ndef setup_prometheus_metrics(app, engines=None) -> None:\n    setup_my_metrics(resource)  # Add your call here\n    ...\n```\n\nAll metrics initialization is funneled through the single `setup_prometheus_metrics()` call in `onyx/main.py:lifespan()`. Do not add separate setup calls to `main.py`.\n\n### 5. Write tests\n\nAdd tests in `backend/tests/unit/onyx/server/`. Use `unittest.mock.patch` to mock the prometheus objects — don't increment real global counters in tests.\n\n### 6. Document the metric\n\nAdd your metric to the reference tables below in this file. Include the metric name, type, labels, and description.\n\n### 7. Update Grafana dashboards\n\nAfter deploying, add panels to the relevant Grafana dashboard:\n\n1. Open Grafana and navigate to the Onyx dashboard (or create a new one)\n2. Add a new panel — choose the appropriate visualization:\n   - **Counters** → use `rate()` in a time series panel (e.g. `rate(onyx_my_counter_total[5m])`)\n   - **Histograms** → use `histogram_quantile()` for percentiles, or `_sum/_count` for averages\n   - **Gauges** → display directly as a stat or gauge panel\n3. Add meaningful thresholds and alerts where appropriate\n4. Group related panels into rows (e.g. \"API Performance\", \"Database Pool\")\n\n---\n\n## API Server Metrics\n\nThese metrics are exposed at `GET /metrics` on the API server.\n\n### Built-in (via `prometheus-fastapi-instrumentator`)\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `http_requests_total` | Counter | `method`, `status`, `handler` | Total request count |\n| `http_request_duration_highr_seconds` | Histogram | _(none)_ | High-resolution latency (many buckets, no labels) |\n| `http_request_duration_seconds` | Histogram | `method`, `handler` | Latency by handler (custom buckets for P95/P99) |\n| `http_request_size_bytes` | Summary | `handler` | Incoming request content length |\n| `http_response_size_bytes` | Summary | `handler` | Outgoing response content length |\n| `http_requests_inprogress` | Gauge | `method`, `handler` | Currently in-flight requests |\n\n### Custom (via `onyx.server.metrics`)\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `onyx_api_slow_requests_total` | Counter | `method`, `handler`, `status` | Requests exceeding `SLOW_REQUEST_THRESHOLD_SECONDS` (default 1s) |\n\n### Configuration\n\n| Env Var | Default | Description |\n|---------|---------|-------------|\n| `SLOW_REQUEST_THRESHOLD_SECONDS` | `1.0` | Duration threshold for slow request counting |\n\n### Instrumentator Settings\n\n- `should_group_status_codes=False` — Reports exact HTTP status codes (e.g. 401, 403, 500)\n- `should_instrument_requests_inprogress=True` — Enables the in-progress request gauge\n- `inprogress_labels=True` — Breaks down in-progress gauge by `method` and `handler`\n- `excluded_handlers=[\"/health\", \"/metrics\", \"/openapi.json\"]` — Excludes noisy endpoints from metrics\n\n## Database Pool Metrics\n\nThese metrics provide visibility into SQLAlchemy connection pool state across all three engines (`sync`, `async`, `readonly`). Collected via `onyx.server.metrics.postgres_connection_pool`.\n\n### Pool State (via custom Prometheus collector — snapshot on each scrape)\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `onyx_db_pool_checked_out` | Gauge | `engine` | Currently checked-out connections |\n| `onyx_db_pool_checked_in` | Gauge | `engine` | Idle connections available in the pool |\n| `onyx_db_pool_overflow` | Gauge | `engine` | Current overflow connections beyond `pool_size` |\n| `onyx_db_pool_size` | Gauge | `engine` | Configured pool size (constant) |\n\n### Pool Lifecycle (via SQLAlchemy pool event listeners)\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `onyx_db_pool_checkout_total` | Counter | `engine` | Total connection checkouts from the pool |\n| `onyx_db_pool_checkin_total` | Counter | `engine` | Total connection checkins to the pool |\n| `onyx_db_pool_connections_created_total` | Counter | `engine` | Total new database connections created |\n| `onyx_db_pool_invalidations_total` | Counter | `engine` | Total connection invalidations |\n| `onyx_db_pool_checkout_timeout_total` | Counter | `engine` | Total connection checkout timeouts |\n\n### Per-Endpoint Attribution (via pool events + endpoint context middleware)\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `onyx_db_connections_held_by_endpoint` | Gauge | `handler`, `engine` | DB connections currently held, by endpoint |\n| `onyx_db_connection_hold_seconds` | Histogram | `handler`, `engine` | Duration a DB connection is held by an endpoint |\n\nEngine label values: `sync` (main read-write), `async` (async sessions), `readonly` (read-only user).\n\nConnections from background tasks (Celery) or boot-time warmup appear as `handler=\"unknown\"`.\n\n## OpenSearch Search Metrics\n\nThese metrics track OpenSearch search latency and throughput. Collected via `onyx.server.metrics.opensearch_search`.\n\n| Metric | Type | Labels | Description |\n|--------|------|--------|-------------|\n| `onyx_opensearch_search_client_duration_seconds` | Histogram | `search_type` | Client-side end-to-end latency (network + serialization + server execution) |\n| `onyx_opensearch_search_server_duration_seconds` | Histogram | `search_type` | Server-side execution time from OpenSearch `took` field |\n| `onyx_opensearch_search_total` | Counter | `search_type` | Total search requests sent to OpenSearch |\n| `onyx_opensearch_searches_in_progress` | Gauge | `search_type` | Currently in-flight OpenSearch searches |\n\nSearch type label values: See `OpenSearchSearchType`.\n\n---\n\n## Example PromQL Queries\n\n### Which endpoints are saturated right now?\n\n```promql\n# Top 10 endpoints by in-progress requests\ntopk(10, http_requests_inprogress)\n```\n\n### What's the P99 latency per endpoint?\n\n```promql\n# P99 latency by handler over the last 5 minutes\nhistogram_quantile(0.99, sum by (handler, le) (rate(http_request_duration_seconds_bucket[5m])))\n```\n\n### Which endpoints have the highest request rate?\n\n```promql\n# Requests per second by handler, top 10\ntopk(10, sum by (handler) (rate(http_requests_total[5m])))\n```\n\n### Which endpoints are returning errors?\n\n```promql\n# 5xx error rate by handler\nsum by (handler) (rate(http_requests_total{status=~\"5..\"}[5m]))\n```\n\n### Slow request hotspots\n\n```promql\n# Slow requests per minute by handler\nsum by (handler) (rate(onyx_api_slow_requests_total[5m])) * 60\n```\n\n### Latency trending up?\n\n```promql\n# Compare P50 latency now vs 1 hour ago\nhistogram_quantile(0.5, sum by (le) (rate(http_request_duration_highr_seconds_bucket[5m])))\n  -\nhistogram_quantile(0.5, sum by (le) (rate(http_request_duration_highr_seconds_bucket[5m] offset 1h)))\n```\n\n### Overall request throughput\n\n```promql\n# Total requests per second across all endpoints\nsum(rate(http_requests_total[5m]))\n```\n\n### Pool utilization (% of capacity in use)\n\n```promql\n# Sync pool utilization: checked-out / (pool_size + max_overflow)\n# NOTE: Replace 10 with your actual POSTGRES_API_SERVER_POOL_OVERFLOW value.\nonyx_db_pool_checked_out{engine=\"sync\"} / (onyx_db_pool_size{engine=\"sync\"} + 10) * 100\n```\n\n### Pool approaching exhaustion?\n\n```promql\n# Alert when checked-out connections exceed 80% of pool capacity\n# NOTE: Replace 10 with your actual POSTGRES_API_SERVER_POOL_OVERFLOW value.\nonyx_db_pool_checked_out{engine=\"sync\"} > 0.8 * (onyx_db_pool_size{engine=\"sync\"} + 10)\n```\n\n### Which endpoints are hogging DB connections?\n\n```promql\n# Top 10 endpoints by connections currently held\ntopk(10, onyx_db_connections_held_by_endpoint{engine=\"sync\"})\n```\n\n### Which endpoints hold connections the longest?\n\n```promql\n# P99 connection hold time by endpoint\nhistogram_quantile(0.99, sum by (handler, le) (rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[5m])))\n```\n\n### Connection checkout/checkin rate\n\n```promql\n# Checkouts per second by engine\nsum by (engine) (rate(onyx_db_pool_checkout_total[5m]))\n```\n\n### OpenSearch P99 search latency by type\n\n```promql\n# P99 client-side latency by search type\nhistogram_quantile(0.99, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\n```\n\n### OpenSearch search throughput\n\n```promql\n# Searches per second by type\nsum by (search_type) (rate(onyx_opensearch_search_total[5m]))\n```\n\n### OpenSearch concurrent searches\n\n```promql\n# Total in-flight searches across all instances\nsum(onyx_opensearch_searches_in_progress)\n```\n\n### OpenSearch network overhead\n\n```promql\n# Difference between client and server P50 reveals network/serialization cost.\nhistogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\n  -\nhistogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\n```\n"
  },
  {
    "path": "examples/assistants-api/topics_analyzer.py",
    "content": "import argparse\nimport os\nimport time\nfrom datetime import datetime\nfrom datetime import timedelta\nfrom datetime import timezone\n\nfrom openai import OpenAI\n\n\nASSISTANT_NAME = \"Topic Analyzer\"\nSYSTEM_PROMPT = \"\"\"\nYou are a helpful assistant that analyzes topics by searching through available \\\ndocuments and providing insights. These available documents come from common \\\nworkplace tools like Slack, emails, Confluence, Google Drive, etc.\n\nWhen analyzing a topic:\n1. Search for relevant information using the search tool\n2. Synthesize the findings into clear insights\n3. Highlight key trends, patterns, or notable developments\n4. Maintain objectivity and cite sources where relevant\n\"\"\"\nUSER_PROMPT = \"\"\"\nPlease analyze and provide insights about this topic: {topic}.\n\nIMPORTANT: do not mention things that are not relevant to the specified topic. \\\nIf there is no relevant information, just say \"No relevant information found.\"\n\"\"\"\n\n\ndef wait_on_run(client: OpenAI, run, thread):  # type: ignore\n    while run.status == \"queued\" or run.status == \"in_progress\":\n        run = client.beta.threads.runs.retrieve(\n            thread_id=thread.id,\n            run_id=run.id,\n        )\n        time.sleep(0.5)\n    return run\n\n\ndef show_response(messages) -> None:  # type: ignore\n    # Get only the assistant's response text\n    for message in messages.data[::-1]:\n        if message.role == \"assistant\":\n            for content in message.content:\n                if content.type == \"text\":\n                    print(content.text)\n                    break\n\n\ndef analyze_topics(topics: list[str]) -> None:\n    openai_api_key = os.environ.get(\n        \"OPENAI_API_KEY\", \"<your OpenAI API key if not set as env var>\"\n    )\n    onyx_api_key = os.environ.get(\n        \"DANSWER_API_KEY\", \"<your Onyx API key if not set as env var>\"\n    )\n    client = OpenAI(\n        api_key=openai_api_key,\n        base_url=\"http://localhost:8080/openai-assistants\",\n        default_headers={\n            \"Authorization\": f\"Bearer {onyx_api_key}\",\n        },\n    )\n\n    # Create an assistant if it doesn't exist\n    try:\n        assistants = client.beta.assistants.list(limit=100)\n        # Find the Topic Analyzer assistant if it exists\n        assistant = next((a for a in assistants.data if a.name == ASSISTANT_NAME))\n        client.beta.assistants.delete(assistant.id)\n    except Exception:\n        pass\n\n    assistant = client.beta.assistants.create(\n        name=ASSISTANT_NAME,\n        instructions=SYSTEM_PROMPT,\n        tools=[{\"type\": \"SearchTool\"}],  # type: ignore\n        model=\"gpt-4o\",\n    )\n\n    # Process each topic individually\n    for topic in topics:\n        thread = client.beta.threads.create()\n        message = client.beta.threads.messages.create(\n            thread_id=thread.id,\n            role=\"user\",\n            content=USER_PROMPT.format(topic=topic),\n        )\n\n        run = client.beta.threads.runs.create(\n            thread_id=thread.id,\n            assistant_id=assistant.id,\n            tools=[\n                {  # type: ignore\n                    \"type\": \"SearchTool\",\n                    \"retrieval_details\": {\n                        \"run_search\": \"always\",\n                        \"filters\": {\n                            \"time_cutoff\": str(\n                                datetime.now(timezone.utc) - timedelta(days=7)\n                            )\n                        },\n                    },\n                }\n            ],\n        )\n\n        run = wait_on_run(client, run, thread)\n        messages = client.beta.threads.messages.list(\n            thread_id=thread.id, order=\"asc\", after=message.id\n        )\n        print(f\"\\nAnalysis for topic: {topic}\")\n        print(\"-\" * 40)\n        show_response(messages)\n        print()\n\n\n# Example usage\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Analyze specific topics\")\n    parser.add_argument(\"topics\", nargs=\"+\", help=\"Topics to analyze (one or more)\")\n\n    args = parser.parse_args()\n    analyze_topics(args.topics)\n"
  },
  {
    "path": "examples/widget/.eslintrc.json",
    "content": "{\n  \"extends\": \"next/core-web-vitals\"\n}\n"
  },
  {
    "path": "examples/widget/.gitignore",
    "content": "# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.\n\n# dependencies\n/node_modules\n/.pnp\n.pnp.js\n.yarn/install-state.gz\n\n# testing\n/coverage\n\n# next.js\n/.next/\n/out/\n\n# production\n/build\n\n# misc\n.DS_Store\n*.pem\n\n# debug\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\n\n# local env files\n.env*.local\n\n# vercel\n.vercel\n\n# typescript\n*.tsbuildinfo\nnext-env.d.ts\n"
  },
  {
    "path": "examples/widget/README.md",
    "content": "# Onyx Chat Bot Widget\n\nNote: The widget requires a Onyx API key, which is a paid (cloud/enterprise) feature.\n\nThis is a code example for how you can use Onyx's APIs to build a chat bot widget for a website! The main code to look at can be found in `src/app/widget/Widget.tsx`.\n\n## Getting Started\n\nTo get the widget working on your webpage, follow these steps:\n\n### 1. Install Dependencies\n\nEnsure you have the necessary dependencies installed. From the `examples/widget/README.md` file:\n\n```bash\nnpm i\n```\n\n### 2. Set Environment Variables\n\nMake sure to set the environment variables `NEXT_PUBLIC_API_URL` and `NEXT_PUBLIC_API_KEY` in a `.env` file at the root of your project:\n\n```bash\nNEXT_PUBLIC_API_URL=\nNEXT_PUBLIC_API_KEY=\n```\n\n### 3. Run the Development Server\n\nStart the development server to see the widget in action.\n\n```bash\nnpm run dev\n```\n\nOpen [http://localhost:3000](http://localhost:3000) with your browser to see the result.\n\n### 4. Integrate the Widget\n\nTo integrate the widget into your webpage, you can use the `ChatWidget` component. Here’s an example of how to include it in a page component:\n\n```jsx\nimport ChatWidget from \"path/to/ChatWidget\";\nfunction MyPage() {\n  return (\n    <div>\n      <h1>My Webpage</h1>\n      <ChatWidget />\n    </div>\n  );\n}\nexport default MyPage;\n```\n\n### 5. Deploy\n\nOnce you are satisfied with the widget, you can build and start the application for production:\n\n```bash\nnpm run build\nnpm run start\n```\n\n### Custom Styling and Configuration\n\nIf you need to customize the widget, you can modify the `ChatWidget` component in the `examples/widget/src/app/widget/Widget.tsx` file.\n\nBy following these steps, you should be able to get the chat widget working on your webpage.\n\nIf you want to get fancier, then take a peek at the Chat implementation within Onyx itself [here](https://github.com/onyx-dot-app/onyx/blob/main/web/src/app/chat/ChatPage.tsx#L82).\n"
  },
  {
    "path": "examples/widget/next.config.mjs",
    "content": "/** @type {import('next').NextConfig} */\nconst nextConfig = {};\n\nexport default nextConfig;\n"
  },
  {
    "path": "examples/widget/package.json",
    "content": "{\n  \"name\": \"widget\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"dev\": \"next dev\",\n    \"build\": \"next build\",\n    \"start\": \"next start\",\n    \"lint\": \"next lint\"\n  },\n  \"dependencies\": {\n    \"next\": \"^16.1.7\",\n    \"react\": \"^19\",\n    \"react-dom\": \"^19\",\n    \"react-markdown\": \"^10.1.0\"\n  },\n  \"devDependencies\": {\n    \"@tailwindcss/postcss\": \"^4.1.18\",\n    \"@types/node\": \"^25\",\n    \"@types/react\": \"^19\",\n    \"@types/react-dom\": \"^19\",\n    \"autoprefixer\": \"^10.4.23\",\n    \"eslint\": \"^9\",\n    \"eslint-config-next\": \"16.1.2\",\n    \"postcss\": \"^8.5.6\",\n    \"tailwindcss\": \"^4.1.18\",\n    \"typescript\": \"^5\"\n  }\n}\n"
  },
  {
    "path": "examples/widget/postcss.config.mjs",
    "content": "/** @type {import('postcss-load-config').Config} */\nconst config = {\n  plugins: {\n    \"@tailwindcss/postcss\": {},\n  },\n};\n\nexport default config;\n"
  },
  {
    "path": "examples/widget/src/app/globals.css",
    "content": "@import \"tailwindcss\";\n"
  },
  {
    "path": "examples/widget/src/app/layout.tsx",
    "content": "import type { Metadata } from \"next\";\nimport { Inter } from \"next/font/google\";\n\nimport \"./globals.css\";\n\nconst inter = Inter({ subsets: [\"latin\"] });\n\nexport const metadata: Metadata = {\n  title: \"Example Onyx Widget\",\n  description: \"Example Onyx Widget\",\n};\n\nexport default function RootLayout({\n  children,\n}: Readonly<{\n  children: React.ReactNode;\n}>) {\n  return (\n    <html lang=\"en\">\n      <body className={inter.className}>{children}</body>\n    </html>\n  );\n}\n"
  },
  {
    "path": "examples/widget/src/app/page.tsx",
    "content": "import { ChatWidget } from \"./widget/Widget\";\n\nexport default function Home() {\n  return (\n    <main className=\"flex min-h-screen flex-col items-center justify-between p-24\">\n      <ChatWidget />\n    </main>\n  );\n}\n"
  },
  {
    "path": "examples/widget/src/app/widget/Widget.tsx",
    "content": "\"use client\";\n\nimport React, { useState } from \"react\";\nimport ReactMarkdown from \"react-markdown\";\n\nconst API_URL = process.env.NEXT_PUBLIC_API_URL || \"http://localhost:8080\";\nconst API_KEY = process.env.NEXT_PUBLIC_API_KEY || \"\";\n\ntype NonEmptyObject = { [k: string]: any };\n\nconst processSingleChunk = <T extends NonEmptyObject>(\n  chunk: string,\n  currPartialChunk: string | null,\n): [T | null, string | null] => {\n  const completeChunk = (currPartialChunk || \"\") + chunk;\n  try {\n    // every complete chunk should be valid JSON\n    const chunkJson = JSON.parse(completeChunk);\n    return [chunkJson, null];\n  } catch (err) {\n    // if it's not valid JSON, then it's probably an incomplete chunk\n    return [null, completeChunk];\n  }\n};\n\nconst processRawChunkString = <T extends NonEmptyObject>(\n  rawChunkString: string,\n  previousPartialChunk: string | null,\n): [T[], string | null] => {\n  /* This is required because, in practice, we see that nginx does not send over\n  each chunk one at a time even with buffering turned off. Instead,\n  chunks are sometimes in batches or are sometimes incomplete */\n  if (!rawChunkString) {\n    return [[], null];\n  }\n  const chunkSections = rawChunkString\n    .split(\"\\n\")\n    .filter((chunk) => chunk.length > 0);\n  let parsedChunkSections: T[] = [];\n  let currPartialChunk = previousPartialChunk;\n  chunkSections.forEach((chunk) => {\n    const [processedChunk, partialChunk] = processSingleChunk<T>(\n      chunk,\n      currPartialChunk,\n    );\n    if (processedChunk) {\n      parsedChunkSections.push(processedChunk);\n      currPartialChunk = null;\n    } else {\n      currPartialChunk = partialChunk;\n    }\n  });\n\n  return [parsedChunkSections, currPartialChunk];\n};\n\nasync function* handleStream<T extends NonEmptyObject>(\n  streamingResponse: Response,\n): AsyncGenerator<T[], void, unknown> {\n  const reader = streamingResponse.body?.getReader();\n  const decoder = new TextDecoder(\"utf-8\");\n\n  let previousPartialChunk: string | null = null;\n  while (true) {\n    const rawChunk = await reader?.read();\n    if (!rawChunk) {\n      throw new Error(\"Unable to process chunk\");\n    }\n    const { done, value } = rawChunk;\n    if (done) {\n      break;\n    }\n\n    const [completedChunks, partialChunk] = processRawChunkString<T>(\n      decoder.decode(value, { stream: true }),\n      previousPartialChunk,\n    );\n    if (!completedChunks.length && !partialChunk) {\n      break;\n    }\n    previousPartialChunk = partialChunk as string | null;\n\n    yield await Promise.resolve(completedChunks);\n  }\n}\n\nasync function* sendMessage({\n  message,\n  chatSessionId,\n  parentMessageId,\n}: {\n  message: string;\n  chatSessionId?: number;\n  parentMessageId?: number;\n}) {\n  if (!chatSessionId || !parentMessageId) {\n    // Create a new chat session if one doesn't exist\n    const createSessionResponse = await fetch(\n      `${API_URL}/chat/create-chat-session`,\n      {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n          Authorization: `Bearer ${API_KEY}`,\n        },\n        body: JSON.stringify({\n          // or specify an assistant you have defined\n          persona_id: 0,\n        }),\n      },\n    );\n\n    if (!createSessionResponse.ok) {\n      const errorJson = await createSessionResponse.json();\n      const errorMsg = errorJson.message || errorJson.detail || \"\";\n      throw Error(`Failed to create chat session - ${errorMsg}`);\n    }\n\n    const sessionData = await createSessionResponse.json();\n    chatSessionId = sessionData.chat_session_id;\n  }\n\n  const sendMessageResponse = await fetch(`${API_URL}/chat/send-message`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n      Authorization: `Bearer ${API_KEY}`,\n    },\n    body: JSON.stringify({\n      chat_session_id: chatSessionId,\n      parent_message_id: parentMessageId || null,\n      message: message,\n      prompt_id: null,\n      search_doc_ids: null,\n      file_descriptors: [],\n      // checkout https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/search/models.py#L105 for\n      // all available options\n      retrieval_options: {\n        run_search: \"always\",\n        filters: null,\n      },\n      query_override: null,\n    }),\n  });\n  if (!sendMessageResponse.ok) {\n    const errorJson = await sendMessageResponse.json();\n    const errorMsg = errorJson.message || errorJson.detail || \"\";\n    throw Error(`Failed to send message - ${errorMsg}`);\n  }\n\n  yield* handleStream<NonEmptyObject>(sendMessageResponse);\n}\n\nexport const ChatWidget = () => {\n  const [messages, setMessages] = useState<{ text: string; isUser: boolean }[]>(\n    [],\n  );\n  const [inputText, setInputText] = useState(\"\");\n  const [isLoading, setIsLoading] = useState(false);\n\n  const handleSubmit = async (e: React.FormEvent) => {\n    e.preventDefault();\n    if (inputText.trim()) {\n      const initialPrevMessages = messages;\n      setMessages([...initialPrevMessages, { text: inputText, isUser: true }]);\n      setInputText(\"\");\n      setIsLoading(true);\n\n      try {\n        const messageGenerator = sendMessage({\n          message: inputText,\n          chatSessionId: undefined,\n          parentMessageId: undefined,\n        });\n        let fullResponse = \"\";\n\n        for await (const chunks of messageGenerator) {\n          for (const chunk of chunks) {\n            if (\"answer_piece\" in chunk) {\n              fullResponse += chunk.answer_piece;\n              setMessages([\n                ...initialPrevMessages,\n                { text: inputText, isUser: true },\n                { text: fullResponse, isUser: false },\n              ]);\n            }\n          }\n        }\n      } catch (error) {\n        console.error(\"Error sending message:\", error);\n        setMessages((prevMessages) => [\n          ...prevMessages,\n          { text: \"An error occurred. Please try again.\", isUser: false },\n        ]);\n      } finally {\n        setIsLoading(false);\n      }\n    }\n  };\n\n  return (\n    <div\n      className=\"\n      fixed\n      bottom-4\n      right-4\n      z-50\n      bg-white\n      rounded-lg\n      shadow-xl\n      w-96\n      h-[32rem]\n      flex\n      flex-col\n      overflow-hidden\n      transition-all\n      duration-300\n      ease-in-out\n    \"\n    >\n      <div\n        className=\"\n        bg-gradient-to-r\n        from-blue-600\n        to-blue-800\n        text-white\n        p-4\n        font-bold\n        flex\n        justify-between\n        items-center\n      \"\n      >\n        <span>Chat Support</span>\n      </div>\n      <div\n        className=\"\n        flex-grow\n        overflow-y-auto\n        p-4\n        space-y-4\n        bg-gray-50\n        border-b\n        border-gray-200\n      \"\n      >\n        {messages.map((message, index) => (\n          <div\n            key={index}\n            className={`\n            flex\n            ${message.isUser ? \"justify-end\" : \"justify-start\"}\n          `}\n          >\n            <div\n              className={`\n              max-w-[75%]\n              p-3\n              rounded-lg\n              ${\n                message.isUser\n                  ? \"bg-blue-500 text-white\"\n                  : \"bg-white text-black\"\n              }\n              shadow\n            `}\n            >\n              <ReactMarkdown>{message.text}</ReactMarkdown>\n            </div>\n          </div>\n        ))}\n        {isLoading && (\n          <div className=\"flex justify-center\">\n            <div className=\"animate-pulse flex space-x-2\">\n              <div className=\"w-2 h-2 bg-gray-500 rounded-full\"></div>\n              <div className=\"w-2 h-2 bg-gray-500 rounded-full\"></div>\n              <div className=\"w-2 h-2 bg-gray-500 rounded-full\"></div>\n            </div>\n          </div>\n        )}\n      </div>\n      <form\n        onSubmit={handleSubmit}\n        className=\"\n        p-4\n        bg-white\n        border-t\n        border-gray-200\n      \"\n      >\n        <div className=\"relative\">\n          <input\n            type=\"text\"\n            value={inputText}\n            onChange={(e) => setInputText(e.target.value)}\n            placeholder=\"Type a message...\"\n            className=\"\n              w-full\n              p-2\n              pr-10\n              border\n              border-gray-300\n              rounded-full\n              focus:outline-none\n              focus:ring-2\n              focus:ring-blue-500\n              focus:border-transparent\n            \"\n            disabled={isLoading}\n          />\n          <button\n            type=\"submit\"\n            disabled={isLoading}\n            className=\"\n              absolute\n              right-2\n              top-1/2\n              transform\n              -translate-y-1/2\n              text-blue-500\n              hover:text-blue-600\n              focus:outline-none\n            \"\n          >\n            <svg\n              xmlns=\"http://www.w3.org/2000/svg\"\n              className=\"h-6 w-6\"\n              fill=\"none\"\n              viewBox=\"0 0 24 24\"\n              stroke=\"currentColor\"\n            >\n              <path\n                strokeLinecap=\"round\"\n                strokeLinejoin=\"round\"\n                strokeWidth={2}\n                d=\"M12 19l9 2-9-18-9 18 9-2zm0 0v-8\"\n              />\n            </svg>\n          </button>\n        </div>\n      </form>\n    </div>\n  );\n};\n"
  },
  {
    "path": "examples/widget/tailwind.config.ts",
    "content": "import type { Config } from \"tailwindcss\";\n\nconst config: Config = {\n  content: [\n    \"./src/pages/**/*.{js,ts,jsx,tsx,mdx}\",\n    \"./src/components/**/*.{js,ts,jsx,tsx,mdx}\",\n    \"./src/app/**/*.{js,ts,jsx,tsx,mdx}\",\n  ],\n  theme: {\n    extend: {\n      backgroundImage: {\n        \"gradient-radial\": \"radial-gradient(var(--tw-gradient-stops))\",\n        \"gradient-conic\":\n          \"conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))\",\n      },\n    },\n  },\n  plugins: [],\n};\nexport default config;\n"
  },
  {
    "path": "examples/widget/tsconfig.json",
    "content": "{\n  \"compilerOptions\": {\n    \"lib\": [\n      \"dom\",\n      \"dom.iterable\",\n      \"esnext\"\n    ],\n    \"allowJs\": true,\n    \"skipLibCheck\": true,\n    \"strict\": true,\n    \"noEmit\": true,\n    \"esModuleInterop\": true,\n    \"module\": \"esnext\",\n    \"moduleResolution\": \"bundler\",\n    \"resolveJsonModule\": true,\n    \"isolatedModules\": true,\n    \"jsx\": \"preserve\",\n    \"incremental\": true,\n    \"plugins\": [\n      {\n        \"name\": \"next\"\n      }\n    ],\n    \"paths\": {\n      \"@/*\": [\n        \"./src/*\"\n      ]\n    },\n    \"target\": \"ES2017\"\n  },\n  \"include\": [\n    \"next-env.d.ts\",\n    \"**/*.ts\",\n    \"**/*.tsx\",\n    \".next/types/**/*.ts\",\n    \".next/dev/types/**/*.ts\"\n  ],\n  \"exclude\": [\n    \"node_modules\"\n  ]\n}\n"
  },
  {
    "path": "extensions/chrome/LICENSE",
    "content": "MIT License\n\nCopyright (c) 2025 DanswerAI, Inc.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "extensions/chrome/README.md",
    "content": "# Onyx Chrome Extension\n\nThe Onyx chrome extension lets you research, create, and automate with LLMs powered by your team's unique knowledge. Just hit Ctrl + O on Mac or Alt + O on Windows to instantly access Onyx in your browser:\n\n💡 Know what your company knows, instantly with the Onyx sidebar\n💬 Chat: Onyx provides a natural language chat interface as the main way of interacting with the features.\n🌎 Internal Search: Ask questions and get answers from all your team's knowledge, powered by Onyx's 50+ connectors to all the tools your team uses\n🚀 With a simple Ctrl + O on Mac or Alt + O on Windows - instantly summarize information from any work application\n\n⚡️ Get quick access to the work resources you need.\n🆕 Onyx new tab page puts all of your company’s knowledge at your fingertips\n🤖 Access custom AI Agents for unique use cases, and give them access to tools to take action.\n\n—\n\nOnyx connects with dozens of popular workplace apps like Google Drive, Jira, Confluence, Slack, and more. Use this extension if you have an account created by your team admin.\n\n## Installation\n\nFor Onyx Cloud Users, please visit the Chrome Plugin Store (pending approval still)\n\n## Development\n\n- Load unpacked extension in your browser\n- Modify files in `src` directory\n- Refresh extension in Chrome\n\n## Contributing\n\nSubmit issues or pull requests for improvements\n"
  },
  {
    "path": "extensions/chrome/manifest.json",
    "content": "{\n  \"manifest_version\": 3,\n  \"name\": \"Onyx\",\n  \"version\": \"1.1\",\n  \"description\": \"Onyx lets you research, create, and automate with LLMs powered by your team's unique knowledge\",\n  \"permissions\": [\n    \"sidePanel\",\n    \"storage\",\n    \"activeTab\",\n    \"tabs\"\n  ],\n  \"host_permissions\": [\"<all_urls>\"],\n  \"background\": {\n    \"service_worker\": \"service_worker.js\",\n    \"type\": \"module\"\n  },\n  \"action\": {\n    \"default_icon\": {\n      \"16\": \"public/icon16.png\",\n      \"48\": \"public/icon48.png\",\n      \"128\": \"public/icon128.png\"\n    },\n    \"default_popup\": \"src/pages/popup.html\"\n  },\n  \"icons\": {\n    \"16\": \"public/icon16.png\",\n    \"48\": \"public/icon48.png\",\n    \"128\": \"public/icon128.png\"\n  },\n  \"options_page\": \"src/pages/options.html\",\n  \"chrome_url_overrides\": {\n    \"newtab\": \"src/pages/onyx_home.html\"\n  },\n  \"commands\": {\n    \"toggleNewTabOverride\": {\n      \"suggested_key\": {\n        \"default\": \"Ctrl+Shift+O\",\n        \"mac\": \"Command+Shift+O\"\n      },\n      \"description\": \"Toggle Onyx New Tab Override\"\n    },\n    \"openSidePanel\": {\n      \"suggested_key\": {\n        \"default\": \"Ctrl+O\",\n        \"windows\": \"Alt+O\",\n        \"mac\": \"MacCtrl+O\"\n      },\n      \"description\": \"Open Onyx Side Panel\"\n    }\n  },\n  \"side_panel\": {\n    \"default_path\": \"src/pages/panel.html\"\n  },\n  \"omnibox\": {\n    \"keyword\": \"onyx\"\n  },\n  \"content_scripts\": [\n    {\n      \"matches\": [\"<all_urls>\"],\n      \"js\": [\"src/utils/selection-icon.js\"],\n      \"css\": [\"src/styles/selection-icon.css\"]\n    }\n  ],\n  \"web_accessible_resources\": [\n    {\n      \"resources\": [\"public/icon32.png\"],\n      \"matches\": [\"<all_urls>\"]\n    }\n  ]\n}\n"
  },
  {
    "path": "extensions/chrome/service_worker.js",
    "content": "import {\n  DEFAULT_ONYX_DOMAIN,\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  ACTIONS,\n  SIDE_PANEL_PATH,\n} from \"./src/utils/constants.js\";\n\n// Track side panel state per window\nconst sidePanelOpenState = new Map();\n\n// Open welcome page on first install\nchrome.runtime.onInstalled.addListener((details) => {\n  if (details.reason === \"install\") {\n    chrome.storage.local.get(\n      { [CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]: false },\n      (result) => {\n        if (!result[CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]) {\n          chrome.tabs.create({ url: \"src/pages/welcome.html\" });\n        }\n      },\n    );\n  }\n});\n\nasync function setupSidePanel() {\n  if (chrome.sidePanel) {\n    try {\n      // Don't auto-open side panel on action click since we have a popup menu\n      await chrome.sidePanel.setPanelBehavior({\n        openPanelOnActionClick: false,\n      });\n    } catch (error) {\n      console.error(\"Error setting up side panel:\", error);\n    }\n  }\n}\n\nasync function openSidePanel(tabId) {\n  try {\n    await chrome.sidePanel.open({ tabId });\n  } catch (error) {\n    console.error(\"Error opening side panel:\", error);\n  }\n}\n\nfunction encodeUserPrompt(text) {\n  return encodeURIComponent(text).replace(/\\(/g, \"%28\").replace(/\\)/g, \"%29\");\n}\n\nasync function sendToOnyx(info, tab) {\n  const selectedText = encodeUserPrompt(info.selectionText);\n  const currentUrl = encodeURIComponent(tab.url);\n\n  try {\n    const result = await chrome.storage.local.get({\n      [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,\n    });\n    const url = `${\n      result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]\n    }${SIDE_PANEL_PATH}?user-prompt=${selectedText}`;\n\n    await openSidePanel(tab.id);\n    chrome.runtime.sendMessage({\n      action: ACTIONS.OPEN_SIDE_PANEL_WITH_INPUT,\n      url: url,\n      pageUrl: tab.url,\n    });\n  } catch (error) {\n    console.error(\"Error sending to Onyx:\", error);\n  }\n}\n\nasync function toggleNewTabOverride() {\n  try {\n    const result = await chrome.storage.local.get(\n      CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,\n    );\n    const newValue =\n      !result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];\n    await chrome.storage.local.set({\n      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: newValue,\n    });\n\n    chrome.notifications.create({\n      type: \"basic\",\n      iconUrl: \"icon.png\",\n      title: \"Onyx New Tab\",\n      message: `New Tab Override ${newValue ? \"enabled\" : \"disabled\"}`,\n    });\n\n    // Send a message to inform all tabs about the change\n    chrome.tabs.query({}, (tabs) => {\n      tabs.forEach((tab) => {\n        chrome.tabs.sendMessage(tab.id, {\n          action: \"newTabOverrideToggled\",\n          value: newValue,\n        });\n      });\n    });\n  } catch (error) {\n    console.error(\"Error toggling new tab override:\", error);\n  }\n}\n\n// Note: This listener won't fire when a popup is defined in manifest.json\n// The popup will show instead. This is kept as a fallback if popup is removed.\nchrome.action.onClicked.addListener((tab) => {\n  openSidePanel(tab.id);\n});\n\nchrome.commands.onCommand.addListener(async (command) => {\n  if (command === ACTIONS.SEND_TO_ONYX) {\n    try {\n      const [tab] = await chrome.tabs.query({\n        active: true,\n        lastFocusedWindow: true,\n      });\n      if (tab) {\n        const response = await chrome.tabs.sendMessage(tab.id, {\n          action: ACTIONS.GET_SELECTED_TEXT,\n        });\n        const selectedText = response?.selectedText || \"\";\n        sendToOnyx({ selectionText: selectedText }, tab);\n      }\n    } catch (error) {\n      console.error(\"Error sending to Onyx:\", error);\n    }\n  } else if (command === ACTIONS.TOGGLE_NEW_TAB_OVERRIDE) {\n    toggleNewTabOverride();\n  } else if (command === ACTIONS.CLOSE_SIDE_PANEL) {\n    try {\n      await chrome.sidePanel.hide();\n    } catch (error) {\n      console.error(\"Error closing side panel via command:\", error);\n    }\n  } else if (command === ACTIONS.OPEN_SIDE_PANEL) {\n    chrome.tabs.query({ active: true, lastFocusedWindow: true }, (tabs) => {\n      if (tabs && tabs.length > 0) {\n        const tab = tabs[0];\n        const windowId = tab.windowId;\n        const isOpen = sidePanelOpenState.get(windowId) || false;\n\n        if (isOpen) {\n          chrome.sidePanel.setOptions({ enabled: false }, () => {\n            chrome.sidePanel.setOptions({ enabled: true });\n            sidePanelOpenState.set(windowId, false);\n          });\n        } else {\n          chrome.sidePanel.open({ tabId: tab.id });\n          sidePanelOpenState.set(windowId, true);\n        }\n      }\n    });\n    return;\n  } else {\n    console.log(\"Unhandled command:\", command);\n  }\n});\n\nasync function sendActiveTabUrlToPanel() {\n  try {\n    const [tab] = await chrome.tabs.query({\n      active: true,\n      lastFocusedWindow: true,\n    });\n    if (tab?.url) {\n      chrome.runtime.sendMessage({\n        action: ACTIONS.TAB_URL_UPDATED,\n        url: tab.url,\n      });\n    }\n  } catch (error) {\n    console.error(\"[Onyx SW] Error sending tab URL:\", error);\n  }\n}\n\nchrome.runtime.onMessage.addListener((request, sender, sendResponse) => {\n  if (request.action === ACTIONS.GET_CURRENT_ONYX_DOMAIN) {\n    chrome.storage.local.get(\n      { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },\n      (result) => {\n        sendResponse({\n          [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]:\n            result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN],\n        });\n      },\n    );\n    return true;\n  }\n  if (request.action === ACTIONS.CLOSE_SIDE_PANEL) {\n    closeSidePanel();\n    chrome.storage.local.get(\n      { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },\n      (result) => {\n        chrome.tabs.create({\n          url: `${result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]}/auth/login`,\n          active: true,\n        });\n      },\n    );\n    return true;\n  }\n  if (request.action === ACTIONS.OPEN_SIDE_PANEL_WITH_INPUT) {\n    const { selectedText, pageUrl } = request;\n    const tabId = sender.tab?.id;\n    const windowId = sender.tab?.windowId;\n\n    if (tabId && windowId) {\n      chrome.storage.local.get(\n        { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },\n        (result) => {\n          const encodedText = encodeUserPrompt(selectedText);\n          const onyxDomain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n          const url = `${onyxDomain}${SIDE_PANEL_PATH}?user-prompt=${encodedText}`;\n\n          chrome.storage.session.set({\n            pendingInput: {\n              url: url,\n              pageUrl: pageUrl,\n              timestamp: Date.now(),\n            },\n          });\n\n          chrome.sidePanel\n            .open({ windowId })\n            .then(() => {\n              chrome.runtime.sendMessage({\n                action: ACTIONS.OPEN_ONYX_WITH_INPUT,\n                url: url,\n                pageUrl: pageUrl,\n              });\n            })\n            .catch((error) => {\n              console.error(\n                \"[Onyx SW] Error opening side panel with text:\",\n                error,\n              );\n            });\n        },\n      );\n    } else {\n      console.error(\"[Onyx SW] Missing tabId or windowId\");\n    }\n    return true;\n  }\n  if (request.action === ACTIONS.TAB_READING_ENABLED) {\n    chrome.storage.session.set({ tabReadingEnabled: true });\n    sendActiveTabUrlToPanel();\n    return false;\n  }\n  if (request.action === ACTIONS.TAB_READING_DISABLED) {\n    chrome.storage.session.set({ tabReadingEnabled: false });\n    return false;\n  }\n});\n\nchrome.storage.onChanged.addListener((changes, namespace) => {\n  if (\n    namespace === \"local\" &&\n    changes[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]\n  ) {\n    const newValue =\n      changes[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]\n        .newValue;\n\n    if (newValue === false) {\n      chrome.runtime.openOptionsPage();\n    }\n  }\n});\n\nchrome.windows.onRemoved.addListener((windowId) => {\n  sidePanelOpenState.delete(windowId);\n});\n\nchrome.omnibox.setDefaultSuggestion({\n  description: 'Search Onyx for \"%s\"',\n});\n\nchrome.omnibox.onInputEntered.addListener(async (text) => {\n  try {\n    const result = await chrome.storage.local.get({\n      [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,\n    });\n\n    const domain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n    const searchUrl = `${domain}/chat?user-prompt=${encodeURIComponent(text)}`;\n\n    chrome.tabs.update({ url: searchUrl });\n  } catch (error) {\n    console.error(\"Error handling omnibox search:\", error);\n  }\n});\n\nchrome.omnibox.onInputChanged.addListener((text, suggest) => {\n  if (text.trim()) {\n    suggest([\n      {\n        content: text,\n        description: `Search Onyx for \"<match>${text}</match>\"`,\n      },\n    ]);\n  }\n});\n\nchrome.tabs.onActivated.addListener(async (activeInfo) => {\n  const result = await chrome.storage.session.get({ tabReadingEnabled: false });\n  if (!result.tabReadingEnabled) return;\n  try {\n    const tab = await chrome.tabs.get(activeInfo.tabId);\n    if (tab.url) {\n      chrome.runtime.sendMessage({\n        action: ACTIONS.TAB_URL_UPDATED,\n        url: tab.url,\n      });\n    }\n  } catch (error) {\n    console.error(\"[Onyx SW] Error on tab activated:\", error);\n  }\n});\n\nchrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {\n  if (!changeInfo.url) return;\n  const result = await chrome.storage.session.get({ tabReadingEnabled: false });\n  if (!result.tabReadingEnabled) return;\n  try {\n    const [activeTab] = await chrome.tabs.query({\n      active: true,\n      lastFocusedWindow: true,\n    });\n    if (activeTab?.id === tabId) {\n      chrome.runtime.sendMessage({\n        action: ACTIONS.TAB_URL_UPDATED,\n        url: changeInfo.url,\n      });\n    }\n  } catch (error) {\n    console.error(\"[Onyx SW] Error on tab updated:\", error);\n  }\n});\n\nsetupSidePanel();\n"
  },
  {
    "path": "extensions/chrome/src/pages/onyx_home.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta http-equiv=\"Permissions-Policy\" content=\"clipboard-write=(self)\" />\n    <title>Onyx Home</title>\n    <link rel=\"stylesheet\" href=\"../styles/shared.css\" />\n    <style>\n      body,\n      html {\n        margin: 0;\n        padding: 0;\n        width: 100%;\n        height: 100vh;\n        overflow: hidden;\n      }\n\n      @media (prefers-color-scheme: dark) {\n        html,\n        body {\n          background-color: #000;\n        }\n      }\n\n      @media (prefers-color-scheme: light) {\n        html,\n        body {\n          background-color: #f6f6f6;\n        }\n      }\n\n      #background {\n        position: fixed;\n        top: 0;\n        left: 0;\n        width: 100%;\n        height: 100%;\n        background-size: cover;\n        background-position: center;\n        background-repeat: no-repeat;\n        transition: opacity 0.5s ease-in-out;\n      }\n\n      #content {\n        position: relative;\n        width: 100%;\n        height: 100%;\n        opacity: 0;\n        transition: opacity 0.5s ease-in-out;\n      }\n\n      iframe {\n        border: none;\n        width: 100%;\n        height: 100%;\n        position: absolute;\n        top: 0;\n        left: 0;\n        visibility: hidden;\n      }\n    </style>\n  </head>\n\n  <body>\n    <div id=\"background\"></div>\n    <div id=\"content\">\n      <iframe\n        id=\"onyx-iframe\"\n        allowfullscreen\n        allow=\"clipboard-read; clipboard-write\"\n      ></iframe>\n    </div>\n    <script src=\"onyx_home.js\" type=\"module\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "extensions/chrome/src/pages/onyx_home.js",
    "content": "import {\n  CHROME_MESSAGE,\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  WEB_MESSAGE,\n} from \"../utils/constants.js\";\nimport {\n  showErrorModal,\n  hideErrorModal,\n  initErrorModal,\n} from \"../utils/error-modal.js\";\nimport { getOnyxDomain } from \"../utils/storage.js\";\n\n(function () {\n  let mainIframe = document.getElementById(\"onyx-iframe\");\n  let preloadedIframe = null;\n  const background = document.getElementById(\"background\");\n  const content = document.getElementById(\"content\");\n  const DEFAULT_LIGHT_BACKGROUND_IMAGE =\n    \"https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\";\n  const DEFAULT_DARK_BACKGROUND_IMAGE =\n    \"https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\";\n\n  let iframeLoadTimeout;\n  let iframeLoaded = false;\n\n  initErrorModal();\n\n  async function preloadChatInterface() {\n    preloadedIframe = document.createElement(\"iframe\");\n\n    const domain = await getOnyxDomain();\n    preloadedIframe.src = domain + \"/chat\";\n    preloadedIframe.style.opacity = \"0\";\n    preloadedIframe.style.visibility = \"hidden\";\n    preloadedIframe.style.transition = \"opacity 0.3s ease-in\";\n    preloadedIframe.style.border = \"none\";\n    preloadedIframe.style.width = \"100%\";\n    preloadedIframe.style.height = \"100%\";\n    preloadedIframe.style.position = \"absolute\";\n    preloadedIframe.style.top = \"0\";\n    preloadedIframe.style.left = \"0\";\n    preloadedIframe.style.zIndex = \"1\";\n    content.appendChild(preloadedIframe);\n  }\n\n  function setIframeSrc(url) {\n    mainIframe.src = url;\n    startIframeLoadTimeout();\n    iframeLoaded = false;\n  }\n\n  function startIframeLoadTimeout() {\n    clearTimeout(iframeLoadTimeout);\n    iframeLoadTimeout = setTimeout(() => {\n      if (!iframeLoaded) {\n        try {\n          if (\n            mainIframe.contentWindow.location.pathname.includes(\"/auth/login\")\n          ) {\n            showLoginPage();\n          } else {\n            showErrorModal(mainIframe.src);\n          }\n        } catch (error) {\n          showErrorModal(mainIframe.src);\n        }\n      }\n    }, 2500);\n  }\n\n  function showLoginPage() {\n    background.style.opacity = \"0\";\n    mainIframe.style.opacity = \"1\";\n    mainIframe.style.visibility = \"visible\";\n    content.style.opacity = \"1\";\n    hideErrorModal();\n  }\n\n  function setTheme(theme, customBackgroundImage) {\n    const imageUrl =\n      customBackgroundImage ||\n      (theme === \"dark\"\n        ? DEFAULT_DARK_BACKGROUND_IMAGE\n        : DEFAULT_LIGHT_BACKGROUND_IMAGE);\n    background.style.backgroundImage = `url('${imageUrl}')`;\n  }\n\n  function fadeInContent() {\n    content.style.transition = \"opacity 0.5s ease-in\";\n    mainIframe.style.transition = \"opacity 0.5s ease-in\";\n    content.style.opacity = \"0\";\n    mainIframe.style.opacity = \"0\";\n    mainIframe.style.visibility = \"visible\";\n\n    requestAnimationFrame(() => {\n      content.style.opacity = \"1\";\n      mainIframe.style.opacity = \"1\";\n\n      setTimeout(() => {\n        background.style.transition = \"opacity 0.3s ease-out\";\n        background.style.opacity = \"0\";\n      }, 500);\n    });\n  }\n\n  function checkOnyxPreference() {\n    chrome.storage.local.get(\n      [\n        CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,\n        CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN,\n      ],\n      (items) => {\n        let useOnyxAsDefaultNewTab =\n          items[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];\n\n        if (useOnyxAsDefaultNewTab === undefined) {\n          useOnyxAsDefaultNewTab = !!(\n            localStorage.getItem(\n              CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB,\n            ) === \"1\"\n          );\n          chrome.storage.local.set({\n            [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:\n              useOnyxAsDefaultNewTab,\n          });\n        }\n\n        if (!useOnyxAsDefaultNewTab) {\n          chrome.tabs.update({\n            url: \"chrome://new-tab-page\",\n          });\n          return;\n        }\n\n        setIframeSrc(items[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN] + \"/nrf\");\n      },\n    );\n  }\n\n  function loadThemeAndBackground() {\n    chrome.storage.local.get(\n      [\n        CHROME_SPECIFIC_STORAGE_KEYS.THEME,\n        CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE,\n        CHROME_SPECIFIC_STORAGE_KEYS.DARK_BG_URL,\n        CHROME_SPECIFIC_STORAGE_KEYS.LIGHT_BG_URL,\n      ],\n      function (result) {\n        const theme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME] || \"light\";\n        const customBackgroundImage =\n          result[CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE];\n        const darkBgUrl = result[CHROME_SPECIFIC_STORAGE_KEYS.DARK_BG_URL];\n        const lightBgUrl = result[CHROME_SPECIFIC_STORAGE_KEYS.LIGHT_BG_URL];\n\n        let backgroundImage;\n        if (customBackgroundImage) {\n          backgroundImage = customBackgroundImage;\n        } else if (theme === \"dark\" && darkBgUrl) {\n          backgroundImage = darkBgUrl;\n        } else if (theme === \"light\" && lightBgUrl) {\n          backgroundImage = lightBgUrl;\n        }\n\n        setTheme(theme, backgroundImage);\n        checkOnyxPreference();\n      },\n    );\n  }\n\n  function loadNewPage(newSrc) {\n    if (preloadedIframe && preloadedIframe.contentWindow) {\n      preloadedIframe.contentWindow.postMessage(\n        { type: WEB_MESSAGE.PAGE_CHANGE, href: newSrc },\n        \"*\",\n      );\n    } else {\n      console.error(\"Preloaded iframe not available\");\n    }\n  }\n\n  function completePendingPageLoad() {\n    if (preloadedIframe) {\n      preloadedIframe.style.visibility = \"visible\";\n      preloadedIframe.style.opacity = \"1\";\n      preloadedIframe.style.zIndex = \"1\";\n      mainIframe.style.zIndex = \"2\";\n      mainIframe.style.opacity = \"0\";\n\n      setTimeout(() => {\n        if (content.contains(mainIframe)) {\n          content.removeChild(mainIframe);\n        }\n\n        mainIframe = preloadedIframe;\n        mainIframe.id = \"onyx-iframe\";\n        mainIframe.style.zIndex = \"\";\n        iframeLoaded = true;\n        clearTimeout(iframeLoadTimeout);\n      }, 200);\n    } else {\n      console.warn(\"No preloaded iframe available\");\n    }\n  }\n\n  chrome.storage.onChanged.addListener(function (changes, namespace) {\n    if (namespace === \"local\" && changes.useOnyxAsDefaultNewTab) {\n      checkOnyxPreference();\n    }\n  });\n\n  window.addEventListener(\"message\", function (event) {\n    if (event.data.type === CHROME_MESSAGE.SET_DEFAULT_NEW_TAB) {\n      chrome.storage.local.set({ useOnyxAsDefaultNewTab: event.data.value });\n    } else if (event.data.type === CHROME_MESSAGE.ONYX_APP_LOADED) {\n      clearTimeout(iframeLoadTimeout);\n      hideErrorModal();\n      fadeInContent();\n      iframeLoaded = true;\n    } else if (event.data.type === CHROME_MESSAGE.PREFERENCES_UPDATED) {\n      const { theme, backgroundUrl } = event.data.payload;\n      chrome.storage.local.set(\n        {\n          [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: theme,\n          [CHROME_SPECIFIC_STORAGE_KEYS.BACKGROUND_IMAGE]: backgroundUrl,\n        },\n        () => {},\n      );\n    } else if (event.data.type === CHROME_MESSAGE.LOAD_NEW_PAGE) {\n      loadNewPage(event.data.href);\n    } else if (event.data.type === CHROME_MESSAGE.LOAD_NEW_CHAT_PAGE) {\n      completePendingPageLoad();\n    }\n  });\n\n  mainIframe.onload = function () {\n    clearTimeout(iframeLoadTimeout);\n    startIframeLoadTimeout();\n  };\n\n  mainIframe.onerror = function (error) {\n    showErrorModal(mainIframe.src);\n  };\n\n  loadThemeAndBackground();\n  preloadChatInterface();\n})();\n"
  },
  {
    "path": "extensions/chrome/src/pages/options.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta http-equiv=\"Permissions-Policy\" content=\"clipboard-write=(self)\" />\n    <title>Onyx - Settings</title>\n    <link rel=\"stylesheet\" href=\"../styles/shared.css\" />\n    <style>\n      :root {\n        --background-900: #0a0a0a;\n        --background-800: #1a1a1a;\n        --text-light-05: rgba(255, 255, 255, 0.95);\n        --text-light-03: rgba(255, 255, 255, 0.6);\n        --white-10: rgba(255, 255, 255, 0.1);\n        --white-15: rgba(255, 255, 255, 0.15);\n        --white-20: rgba(255, 255, 255, 0.2);\n        --white-30: rgba(255, 255, 255, 0.3);\n        --white-40: rgba(255, 255, 255, 0.4);\n        --white-80: rgba(255, 255, 255, 0.8);\n        --black-40: rgba(0, 0, 0, 0.4);\n      }\n\n      * {\n        box-sizing: border-box;\n      }\n\n      body {\n        margin: 0;\n        padding: 0;\n        font-family: var(--font-hanken-grotesk);\n        background: linear-gradient(\n          135deg,\n          var(--background-900) 0%,\n          var(--background-800) 100%\n        );\n        min-height: 100vh;\n        color: var(--text-light-05);\n        transition: background 0.3s ease;\n      }\n\n      body.light-theme {\n        --background-900: #f5f5f5;\n        --background-800: #ffffff;\n        --text-light-05: rgba(0, 0, 0, 0.95);\n        --text-light-03: rgba(0, 0, 0, 0.6);\n        background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);\n      }\n\n      body.light-theme .settings-panel {\n        background: linear-gradient(\n          to bottom,\n          rgba(255, 255, 255, 0.95),\n          rgba(245, 245, 245, 0.95)\n        );\n        border: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .settings-header {\n        border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .settings-icon {\n        background: rgba(0, 0, 0, 0.05);\n      }\n\n      body.light-theme .theme-toggle {\n        background: rgba(0, 0, 0, 0.05);\n        border: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .theme-toggle:hover {\n        background: rgba(0, 0, 0, 0.08);\n      }\n\n      body.light-theme .theme-toggle svg {\n        stroke: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .settings-group {\n        background: rgba(0, 0, 0, 0.03);\n      }\n\n      body.light-theme .setting-divider {\n        background: rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .input-field {\n        border: 1px solid rgba(0, 0, 0, 0.1);\n        background: rgba(0, 0, 0, 0.05);\n        color: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .input-field:focus {\n        outline: none;\n        border-color: rgba(0, 0, 0, 0.25);\n        background: rgba(0, 0, 0, 0.08);\n        box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.05);\n        color: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .status-container {\n        background: rgba(0, 0, 0, 0.03);\n      }\n\n      body.light-theme .button.secondary {\n        background: rgba(0, 0, 0, 0.05);\n        color: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .button.secondary:hover {\n        background: rgba(0, 0, 0, 0.08);\n      }\n\n      body.light-theme .toggle-slider {\n        background-color: rgba(0, 0, 0, 0.15);\n      }\n\n      body.light-theme input:checked + .toggle-slider {\n        background-color: rgba(0, 0, 0, 0.3);\n      }\n\n      body.light-theme .toggle-slider:before {\n        background-color: white;\n        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);\n      }\n\n      .settings-container {\n        max-width: 500px;\n        width: 100%;\n        margin: 0 auto;\n        padding: 40px 20px;\n      }\n\n      .settings-panel {\n        background: linear-gradient(\n          to bottom,\n          rgba(10, 10, 10, 0.95),\n          rgba(26, 26, 26, 0.95)\n        );\n        backdrop-filter: blur(24px);\n        border-radius: 16px;\n        border: 1px solid var(--white-10);\n        overflow: hidden;\n        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);\n      }\n\n      .settings-header {\n        padding: 24px;\n        border-bottom: 1px solid var(--white-10);\n        display: flex;\n        align-items: center;\n        justify-content: space-between;\n        background: transparent;\n      }\n\n      .settings-header-left {\n        display: flex;\n        align-items: center;\n        gap: 12px;\n      }\n\n      .settings-icon {\n        width: 40px;\n        height: 40px;\n        border-radius: 12px;\n        background: white;\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        overflow: hidden;\n      }\n\n      .settings-icon img {\n        width: 100%;\n        height: 100%;\n        object-fit: contain;\n        padding: 6px;\n      }\n\n      .settings-title {\n        font-size: 20px;\n        font-weight: 600;\n        color: var(--text-light-05);\n        margin: 0;\n      }\n\n      .theme-toggle {\n        display: flex;\n        align-items: center;\n        gap: 8px;\n        padding: 6px 12px;\n        border-radius: 999px;\n        background: var(--white-10);\n        border: 1px solid var(--white-10);\n        cursor: pointer;\n        transition: all 0.2s;\n      }\n\n      .theme-toggle:hover {\n        background: var(--white-15);\n      }\n\n      .theme-toggle svg {\n        width: 16px;\n        height: 16px;\n        stroke: var(--text-light-05);\n      }\n\n      .settings-content {\n        padding: 24px;\n      }\n\n      .settings-section {\n        margin-bottom: 32px;\n      }\n\n      .settings-section:last-child {\n        margin-bottom: 0;\n      }\n\n      .section-title {\n        font-size: 11px;\n        font-weight: 600;\n        text-transform: uppercase;\n        letter-spacing: 0.05em;\n        color: var(--text-light-03);\n        margin-bottom: 12px;\n      }\n\n      .settings-group {\n        background: rgba(255, 255, 255, 0.05);\n        border-radius: 16px;\n        padding: 4px;\n      }\n\n      .setting-row {\n        display: flex;\n        justify-content: space-between;\n        align-items: center;\n        padding: 12px;\n      }\n\n      .setting-row-content {\n        display: flex;\n        flex-direction: column;\n        gap: 4px;\n        flex: 1;\n      }\n\n      .setting-label {\n        font-size: 14px;\n        font-weight: 400;\n        color: var(--text-light-05);\n      }\n\n      .setting-description {\n        font-size: 12px;\n        color: var(--text-light-03);\n      }\n\n      .setting-divider {\n        height: 1px;\n        background: var(--white-10);\n        margin: 0 4px;\n      }\n\n      .input-field {\n        width: 100%;\n        padding: 10px 12px;\n        border: 1px solid var(--white-10);\n        border-radius: 8px;\n        font-size: 14px;\n        background: rgba(255, 255, 255, 0.05);\n        color: var(--text-light-05);\n        font-family: var(--font-hanken-grotesk);\n        transition: all 0.2s;\n        margin: 0;\n      }\n\n      .input-field:focus {\n        outline: none;\n        border-color: var(--white-30);\n        background: rgba(255, 255, 255, 0.1);\n        box-shadow: 0 0 0 2px rgba(255, 255, 255, 0.1);\n        color: var(--text-light-05);\n      }\n\n      .input-field::placeholder {\n        color: var(--text-light-03);\n      }\n\n      .setting-row .input-field {\n        margin-top: 0;\n      }\n\n      .toggle-switch {\n        position: relative;\n        display: inline-block;\n        width: 44px;\n        height: 24px;\n      }\n\n      .toggle-switch input {\n        opacity: 0;\n        width: 0;\n        height: 0;\n      }\n\n      .toggle-slider {\n        position: absolute;\n        cursor: pointer;\n        top: 0;\n        left: 0;\n        right: 0;\n        bottom: 0;\n        background-color: rgba(255, 255, 255, 0.2);\n        transition: 0.3s;\n        border-radius: 24px;\n      }\n\n      .toggle-slider:before {\n        position: absolute;\n        content: \"\";\n        height: 18px;\n        width: 18px;\n        left: 3px;\n        bottom: 3px;\n        background-color: white;\n        transition: 0.3s;\n        border-radius: 50%;\n      }\n\n      input:checked + .toggle-slider {\n        background-color: rgba(255, 255, 255, 0.4);\n      }\n\n      input:checked + .toggle-slider:before {\n        transform: translateX(20px);\n      }\n\n      .status-container {\n        margin-top: 20px;\n        padding: 12px;\n        background: rgba(255, 255, 255, 0.05);\n        border-radius: 8px;\n        opacity: 0;\n        transition: opacity 0.3s;\n      }\n\n      .status-container.show {\n        opacity: 1;\n      }\n\n      .status-message {\n        margin: 0 0 12px 0;\n        color: var(--text-light-05);\n        font-size: 14px;\n        line-height: 1.5;\n      }\n\n      .button {\n        padding: 10px 20px;\n        border-radius: 8px;\n        border: none;\n        cursor: pointer;\n        font-size: 14px;\n        font-weight: 500;\n        transition: all 0.2s;\n        font-family: var(--font-hanken-grotesk);\n      }\n\n      .button.secondary {\n        background: var(--white-10);\n        color: var(--text-light-05);\n        width: 100%;\n      }\n\n      .button.secondary:hover {\n        background: var(--white-15);\n      }\n\n      kbd {\n        background: rgba(255, 255, 255, 0.1);\n        border: 1px solid var(--white-10);\n        border-radius: 4px;\n        padding: 2px 6px;\n        font-family: monospace;\n        font-weight: 500;\n        color: var(--text-light-05);\n        font-size: 11px;\n      }\n\n      @media (max-width: 600px) {\n        .settings-container {\n          padding: 20px 16px;\n        }\n\n        .settings-header {\n          padding: 20px;\n        }\n\n        .settings-content {\n          padding: 20px;\n        }\n      }\n    </style>\n  </head>\n\n  <body>\n    <div class=\"settings-container\">\n      <div class=\"settings-panel\">\n        <div class=\"settings-header\">\n          <div class=\"settings-header-left\">\n            <div class=\"settings-icon\">\n              <img src=\"../../public/icon48.png\" alt=\"Onyx\" />\n            </div>\n            <h1 class=\"settings-title\">Settings</h1>\n          </div>\n          <button\n            class=\"theme-toggle\"\n            id=\"themeToggle\"\n            aria-label=\"Toggle theme\"\n          >\n            <svg\n              id=\"themeIcon\"\n              viewBox=\"0 0 24 24\"\n              fill=\"none\"\n              stroke=\"currentColor\"\n            >\n              <circle cx=\"12\" cy=\"12\" r=\"4\"></circle>\n              <path\n                d=\"M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41\"\n              ></path>\n            </svg>\n          </button>\n        </div>\n\n        <div class=\"settings-content\">\n          <!-- General Section -->\n          <section class=\"settings-section\">\n            <div class=\"section-title\">General</div>\n            <div class=\"settings-group\">\n              <div class=\"setting-row\">\n                <div class=\"setting-row-content\">\n                  <label class=\"setting-label\" for=\"onyxDomain\"\n                    >Root Domain</label\n                  >\n                  <div class=\"setting-description\">\n                    The root URL for your Onyx instance\n                  </div>\n                </div>\n              </div>\n              <div class=\"setting-divider\"></div>\n              <div class=\"setting-row\" style=\"padding: 12px\">\n                <input\n                  type=\"text\"\n                  id=\"onyxDomain\"\n                  class=\"input-field\"\n                  placeholder=\"https://cloud.onyx.app\"\n                />\n              </div>\n              <div class=\"setting-divider\"></div>\n              <div class=\"setting-row\">\n                <div class=\"setting-row-content\">\n                  <label class=\"setting-label\" for=\"useOnyxAsDefault\"\n                    >Use Onyx as new tab page</label\n                  >\n                </div>\n                <label class=\"toggle-switch\">\n                  <input type=\"checkbox\" id=\"useOnyxAsDefault\" />\n                  <span class=\"toggle-slider\"></span>\n                </label>\n              </div>\n            </div>\n          </section>\n\n          <!-- Search Engine Section -->\n          <section class=\"settings-section\">\n            <div class=\"section-title\">Search Engine</div>\n            <div class=\"settings-group\">\n              <div class=\"setting-row\">\n                <div class=\"setting-row-content\">\n                  <label class=\"setting-label\">Use Onyx in Address Bar</label>\n                  <div class=\"setting-description\">\n                    Type <kbd>onyx</kbd> followed by a space in Chrome's address\n                    bar, then enter your search query and press Enter\n                  </div>\n                </div>\n              </div>\n              <div class=\"setting-divider\"></div>\n              <div class=\"setting-row\">\n                <div class=\"setting-row-content\">\n                  <div class=\"setting-description\">\n                    Searches will be directed to your configured Onyx instance\n                    at the Root Domain above\n                  </div>\n                </div>\n              </div>\n            </div>\n          </section>\n\n          <!-- Status Message -->\n          <div id=\"statusContainer\" class=\"status-container\">\n            <p id=\"status\" class=\"status-message\"></p>\n            <button id=\"newTab\" class=\"button secondary\" style=\"display: none\">\n              Open New Tab to Test\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n    <script type=\"module\" src=\"options.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "extensions/chrome/src/pages/options.js",
    "content": "import {\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  DEFAULT_ONYX_DOMAIN,\n} from \"../utils/constants.js\";\n\ndocument.addEventListener(\"DOMContentLoaded\", function () {\n  const domainInput = document.getElementById(\"onyxDomain\");\n  const useOnyxAsDefaultToggle = document.getElementById(\"useOnyxAsDefault\");\n  const statusContainer = document.getElementById(\"statusContainer\");\n  const statusElement = document.getElementById(\"status\");\n  const newTabButton = document.getElementById(\"newTab\");\n  const themeToggle = document.getElementById(\"themeToggle\");\n  const themeIcon = document.getElementById(\"themeIcon\");\n\n  let currentTheme = \"dark\";\n\n  function updateThemeIcon(theme) {\n    if (!themeIcon) return;\n\n    if (theme === \"light\") {\n      themeIcon.innerHTML = `\n        <circle cx=\"12\" cy=\"12\" r=\"4\"></circle>\n        <path d=\"M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41\"></path>\n      `;\n    } else {\n      themeIcon.innerHTML = `\n        <path d=\"M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z\"></path>\n      `;\n    }\n  }\n\n  function loadStoredValues() {\n    chrome.storage.local.get(\n      {\n        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,\n        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: false,\n        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: \"dark\",\n      },\n      (result) => {\n        if (domainInput)\n          domainInput.value = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n        if (useOnyxAsDefaultToggle)\n          useOnyxAsDefaultToggle.checked =\n            result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];\n\n        currentTheme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME] || \"dark\";\n        updateThemeIcon(currentTheme);\n\n        document.body.className = currentTheme === \"light\" ? \"light-theme\" : \"\";\n      },\n    );\n  }\n\n  function saveSettings() {\n    const domain = domainInput.value.trim();\n    const useOnyxAsDefault = useOnyxAsDefaultToggle\n      ? useOnyxAsDefaultToggle.checked\n      : false;\n\n    chrome.storage.local.set(\n      {\n        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain,\n        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:\n          useOnyxAsDefault,\n        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,\n      },\n      () => {\n        showStatusMessage(\n          useOnyxAsDefault\n            ? \"Settings updated. Open a new tab to test it out. Click on the extension icon to bring up Onyx from any page.\"\n            : \"Settings updated.\",\n        );\n      },\n    );\n  }\n\n  function showStatusMessage(message) {\n    if (statusElement) {\n      const useOnyxAsDefault = useOnyxAsDefaultToggle\n        ? useOnyxAsDefaultToggle.checked\n        : false;\n\n      statusElement.textContent =\n        message ||\n        (useOnyxAsDefault\n          ? \"Settings updated. Open a new tab to test it out. Click on the extension icon to bring up Onyx from any page.\"\n          : \"Settings updated.\");\n\n      if (newTabButton) {\n        newTabButton.style.display = useOnyxAsDefault ? \"block\" : \"none\";\n      }\n    }\n\n    if (statusContainer) {\n      statusContainer.classList.add(\"show\");\n    }\n\n    setTimeout(hideStatusMessage, 5000);\n  }\n\n  function hideStatusMessage() {\n    if (statusContainer) {\n      statusContainer.classList.remove(\"show\");\n    }\n  }\n\n  function toggleTheme() {\n    currentTheme = currentTheme === \"light\" ? \"dark\" : \"light\";\n    updateThemeIcon(currentTheme);\n\n    document.body.className = currentTheme === \"light\" ? \"light-theme\" : \"\";\n\n    chrome.storage.local.set({\n      [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,\n    });\n  }\n\n  function openNewTab() {\n    chrome.tabs.create({});\n  }\n\n  if (domainInput) {\n    domainInput.addEventListener(\"input\", () => {\n      clearTimeout(domainInput.saveTimeout);\n      domainInput.saveTimeout = setTimeout(saveSettings, 1000);\n    });\n  }\n\n  if (useOnyxAsDefaultToggle) {\n    useOnyxAsDefaultToggle.addEventListener(\"change\", saveSettings);\n  }\n\n  if (themeToggle) {\n    themeToggle.addEventListener(\"click\", toggleTheme);\n  }\n\n  if (newTabButton) {\n    newTabButton.addEventListener(\"click\", openNewTab);\n  }\n\n  loadStoredValues();\n});\n"
  },
  {
    "path": "extensions/chrome/src/pages/panel.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta http-equiv=\"Permissions-Policy\" content=\"clipboard-write=(self)\" />\n    <title>Onyx Panel</title>\n    <link rel=\"stylesheet\" href=\"../styles/shared.css\" />\n    <style>\n      body,\n      html {\n        margin: 0;\n        padding: 0;\n        width: 100%;\n        height: 100vh;\n        overflow: hidden;\n      }\n\n      #loading-screen {\n        position: fixed;\n        top: 0;\n        left: 0;\n        width: 100%;\n        height: 100%;\n        background-color: #f5f5f5;\n        display: flex;\n        flex-direction: column;\n        justify-content: center;\n        align-items: center;\n        z-index: 1000;\n        transition: opacity 0.5s ease-in-out;\n      }\n\n      #logo {\n        width: 100px;\n        height: 100px;\n        background-image: url(\"/public/logo.png\");\n        background-size: contain;\n        background-repeat: no-repeat;\n        background-position: center;\n        animation: pulse 2s infinite;\n      }\n\n      @keyframes pulse {\n        0% {\n          transform: scale(1);\n        }\n\n        50% {\n          transform: scale(1.1);\n        }\n\n        100% {\n          transform: scale(1);\n        }\n      }\n\n      #loading-text {\n        color: #0a0a0a;\n        margin-top: 20px;\n        font-size: 1.125rem;\n        font-weight: 600;\n        text-align: center;\n      }\n\n      iframe {\n        border: none;\n        width: 100%;\n        height: 100%;\n        position: absolute;\n        top: 0;\n        left: 0;\n        opacity: 0;\n        transition: opacity 0.5s ease-in-out;\n      }\n    </style>\n  </head>\n\n  <body>\n    <div id=\"loading-screen\">\n      <div id=\"logo\"></div>\n      <div id=\"loading-text\">Loading Onyx...</div>\n    </div>\n    <iframe\n      id=\"onyx-panel-iframe\"\n      allow=\"clipboard-read; clipboard-write\"\n    ></iframe>\n    <script src=\"../utils/error-modal.js\" type=\"module\"></script>\n    <script src=\"panel.js\" type=\"module\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "extensions/chrome/src/pages/panel.js",
    "content": "import { showErrorModal, showAuthModal } from \"../utils/error-modal.js\";\nimport {\n  ACTIONS,\n  CHROME_MESSAGE,\n  WEB_MESSAGE,\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  SIDE_PANEL_PATH,\n} from \"../utils/constants.js\";\n(function () {\n  const iframe = document.getElementById(\"onyx-panel-iframe\");\n  const loadingScreen = document.getElementById(\"loading-screen\");\n\n  let currentUrl = \"\";\n  let iframeLoaded = false;\n  let iframeLoadTimeout;\n  let authRequired = false;\n\n  // Returns the origin of the Onyx app loaded in the iframe.\n  // We derive the origin from iframe.src so postMessage payloads\n  // (including tab URLs) are only delivered to the expected page.\n  // Throws if iframe.src is not a valid URL — this is intentional:\n  // postMessage must never fall back to the unsafe wildcard \"*\".\n  function getIframeOrigin() {\n    return new URL(iframe.src).origin;\n  }\n\n  async function checkPendingInput() {\n    try {\n      const result = await chrome.storage.session.get(\"pendingInput\");\n      if (result.pendingInput) {\n        const { url, pageUrl, timestamp } = result.pendingInput;\n        if (Date.now() - timestamp < 5000) {\n          setIframeSrc(url, pageUrl);\n          await chrome.storage.session.remove(\"pendingInput\");\n          return true;\n        }\n        await chrome.storage.session.remove(\"pendingInput\");\n      }\n    } catch (error) {\n      console.error(\"[Onyx Panel] Error checking pending input:\", error);\n    }\n    return false;\n  }\n\n  async function initializePanel() {\n    loadingScreen.style.display = \"flex\";\n    loadingScreen.style.opacity = \"1\";\n    iframe.style.opacity = \"0\";\n\n    // Check for pending input first (from selection icon click)\n    const hasPendingInput = await checkPendingInput();\n    if (!hasPendingInput) {\n      loadOnyxDomain();\n    }\n  }\n\n  function setIframeSrc(url, pageUrl) {\n    iframe.src = url;\n    currentUrl = pageUrl;\n  }\n\n  function sendWebsiteToIframe(pageUrl) {\n    if (iframe.contentWindow && pageUrl !== currentUrl) {\n      iframe.contentWindow.postMessage(\n        {\n          type: WEB_MESSAGE.PAGE_CHANGE,\n          url: pageUrl,\n        },\n        getIframeOrigin(),\n      );\n      currentUrl = pageUrl;\n    }\n  }\n\n  function startIframeLoadTimeout() {\n    iframeLoadTimeout = setTimeout(() => {\n      if (!iframeLoaded) {\n        if (authRequired) {\n          showAuthModal();\n        } else {\n          showErrorModal(iframe.src);\n        }\n      }\n    }, 2500);\n  }\n\n  function handleMessage(event) {\n    // Only trust messages from the Onyx app iframe.\n    // Check both source identity and origin so that a cross-origin page\n    // navigated to inside the iframe cannot send privileged extension\n    // messages (e.g. TAB_READING_ENABLED) after iframe.src changes.\n    // getIframeOrigin() throws if iframe.src is not yet a valid URL —\n    // catching it here fails closed (message is rejected, not processed).\n    if (event.source !== iframe.contentWindow) return;\n    try {\n      if (event.origin !== getIframeOrigin()) return;\n    } catch {\n      return;\n    }\n    if (event.data.type === CHROME_MESSAGE.ONYX_APP_LOADED) {\n      clearTimeout(iframeLoadTimeout);\n      iframeLoaded = true;\n      showIframe();\n      if (iframe.contentWindow) {\n        iframe.contentWindow.postMessage(\n          { type: \"PANEL_READY\" },\n          getIframeOrigin(),\n        );\n      }\n    } else if (event.data.type === CHROME_MESSAGE.AUTH_REQUIRED) {\n      authRequired = true;\n    } else if (event.data.type === CHROME_MESSAGE.TAB_READING_ENABLED) {\n      chrome.runtime.sendMessage({ action: ACTIONS.TAB_READING_ENABLED });\n    } else if (event.data.type === CHROME_MESSAGE.TAB_READING_DISABLED) {\n      chrome.runtime.sendMessage({ action: ACTIONS.TAB_READING_DISABLED });\n    }\n  }\n\n  function showIframe() {\n    iframe.style.opacity = \"1\";\n    loadingScreen.style.opacity = \"0\";\n    setTimeout(() => {\n      loadingScreen.style.display = \"none\";\n    }, 500);\n  }\n\n  async function loadOnyxDomain() {\n    const response = await chrome.runtime.sendMessage({\n      action: ACTIONS.GET_CURRENT_ONYX_DOMAIN,\n    });\n    if (response && response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]) {\n      setIframeSrc(\n        response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN] + SIDE_PANEL_PATH,\n        \"\",\n      );\n    } else {\n      console.warn(\"Onyx domain not found, using default\");\n      const domain = await getOnyxDomain();\n      setIframeSrc(domain + SIDE_PANEL_PATH, \"\");\n    }\n  }\n\n  chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {\n    if (request.action === ACTIONS.OPEN_ONYX_WITH_INPUT) {\n      setIframeSrc(request.url, request.pageUrl);\n    } else if (request.action === ACTIONS.UPDATE_PAGE_URL) {\n      sendWebsiteToIframe(request.pageUrl);\n    } else if (request.action === ACTIONS.TAB_URL_UPDATED) {\n      if (iframe.contentWindow) {\n        iframe.contentWindow.postMessage(\n          { type: CHROME_MESSAGE.TAB_URL_UPDATED, url: request.url },\n          getIframeOrigin(),\n        );\n      }\n    }\n  });\n\n  window.addEventListener(\"message\", handleMessage);\n\n  initializePanel();\n  startIframeLoadTimeout();\n})();\n"
  },
  {
    "path": "extensions/chrome/src/pages/popup.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <meta http-equiv=\"Permissions-Policy\" content=\"clipboard-write=(self)\" />\n    <title>Onyx</title>\n    <link rel=\"stylesheet\" href=\"../styles/shared.css\" />\n    <style>\n      :root {\n        --background-900: #0a0a0a;\n        --background-800: #1a1a1a;\n        --text-light-05: rgba(255, 255, 255, 0.95);\n        --text-light-03: rgba(255, 255, 255, 0.6);\n        --white-10: rgba(255, 255, 255, 0.1);\n        --white-15: rgba(255, 255, 255, 0.15);\n        --white-20: rgba(255, 255, 255, 0.2);\n      }\n\n      * {\n        box-sizing: border-box;\n      }\n\n      body {\n        width: 300px;\n        margin: 0;\n        padding: 0;\n        font-family: var(--font-hanken-grotesk);\n        background: linear-gradient(\n          135deg,\n          var(--background-900) 0%,\n          var(--background-800) 100%\n        );\n        color: var(--text-light-05);\n      }\n\n      .popup-container {\n        padding: 16px;\n      }\n\n      .popup-header {\n        display: flex;\n        align-items: center;\n        gap: 12px;\n        padding-bottom: 16px;\n        border-bottom: 1px solid var(--white-10);\n        margin-bottom: 16px;\n      }\n\n      .popup-icon {\n        width: 36px;\n        height: 36px;\n        border-radius: 10px;\n        background: white;\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        overflow: hidden;\n      }\n\n      .popup-icon img {\n        width: 100%;\n        height: 100%;\n        object-fit: contain;\n        padding: 4px;\n      }\n\n      .popup-title {\n        margin: 0;\n        font-size: 18px;\n        font-weight: 600;\n        color: var(--text-light-05);\n      }\n\n      .menu-button-content {\n        display: flex;\n        align-items: center;\n        justify-content: space-between;\n        width: 100%;\n      }\n\n      .menu-button-text {\n        display: flex;\n        align-items: center;\n        gap: 10px;\n      }\n\n      .menu-button-shortcut {\n        font-size: 11px;\n        color: var(--text-light-03);\n        font-weight: 400;\n        margin-left: auto;\n      }\n\n      .settings-group {\n        background: rgba(255, 255, 255, 0.05);\n        border-radius: 12px;\n        padding: 4px;\n        margin-bottom: 12px;\n      }\n\n      .setting-row {\n        display: flex;\n        justify-content: space-between;\n        align-items: center;\n        padding: 12px;\n      }\n\n      .setting-label {\n        font-size: 14px;\n        font-weight: 400;\n        color: var(--text-light-05);\n      }\n\n      .setting-divider {\n        height: 1px;\n        background: var(--white-10);\n        margin: 0 4px;\n      }\n\n      .menu-button {\n        background: rgba(255, 255, 255, 0.05);\n        border: none;\n        padding: 12px;\n        width: 100%;\n        text-align: left;\n        cursor: pointer;\n        font-size: 14px;\n        color: var(--text-light-05);\n        font-weight: 400;\n        transition: background 0.2s;\n        border-radius: 12px;\n        font-family: var(--font-hanken-grotesk);\n      }\n\n      .menu-button:hover {\n        background: rgba(255, 255, 255, 0.1);\n      }\n\n      .menu-button svg {\n        width: 18px;\n        height: 18px;\n        stroke: var(--text-light-05);\n        fill: none;\n        stroke-width: 2;\n        stroke-linecap: round;\n        stroke-linejoin: round;\n      }\n\n      .button-group {\n        display: flex;\n        flex-direction: column;\n        gap: 8px;\n      }\n\n      .toggle-switch {\n        position: relative;\n        display: inline-block;\n        width: 44px;\n        height: 24px;\n      }\n\n      .toggle-switch input {\n        opacity: 0;\n        width: 0;\n        height: 0;\n      }\n\n      .toggle-slider {\n        position: absolute;\n        cursor: pointer;\n        top: 0;\n        left: 0;\n        right: 0;\n        bottom: 0;\n        background-color: rgba(255, 255, 255, 0.2);\n        transition: 0.3s;\n        border-radius: 24px;\n      }\n\n      .toggle-slider:before {\n        position: absolute;\n        content: \"\";\n        height: 18px;\n        width: 18px;\n        left: 3px;\n        bottom: 3px;\n        background-color: white;\n        transition: 0.3s;\n        border-radius: 50%;\n      }\n\n      input:checked + .toggle-slider {\n        background-color: rgba(255, 255, 255, 0.4);\n      }\n\n      input:checked + .toggle-slider:before {\n        transform: translateX(20px);\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"popup-container\">\n      <div class=\"popup-header\">\n        <div class=\"popup-icon\">\n          <img src=\"../../public/icon48.png\" alt=\"Onyx\" />\n        </div>\n        <h2 class=\"popup-title\">Onyx</h2>\n      </div>\n\n      <div class=\"settings-group\">\n        <div class=\"setting-row\">\n          <label class=\"setting-label\" for=\"defaultNewTabToggle\">\n            Use Onyx as new tab page\n          </label>\n          <label class=\"toggle-switch\">\n            <input type=\"checkbox\" id=\"defaultNewTabToggle\" />\n            <span class=\"toggle-slider\"></span>\n          </label>\n        </div>\n      </div>\n\n      <div class=\"button-group\">\n        <button class=\"menu-button\" id=\"openSidePanel\">\n          <div class=\"menu-button-content\">\n            <div class=\"menu-button-text\">\n              <svg viewBox=\"0 0 24 24\">\n                <rect x=\"3\" y=\"3\" width=\"18\" height=\"18\" rx=\"2\" ry=\"2\"></rect>\n                <line x1=\"15\" y1=\"3\" x2=\"15\" y2=\"21\"></line>\n              </svg>\n              Open Onyx Panel\n            </div>\n            <span class=\"menu-button-shortcut\">Ctrl+O</span>\n          </div>\n        </button>\n\n        <button class=\"menu-button\" id=\"openOptions\">\n          <div class=\"menu-button-text\">\n            <svg viewBox=\"0 0 24 24\">\n              <circle cx=\"12\" cy=\"12\" r=\"3\"></circle>\n              <path\n                d=\"M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z\"\n              ></path>\n            </svg>\n            Extension Settings\n          </div>\n        </button>\n      </div>\n    </div>\n    <script type=\"module\" src=\"popup.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "extensions/chrome/src/pages/popup.js",
    "content": "import { CHROME_SPECIFIC_STORAGE_KEYS } from \"../utils/constants.js\";\n\ndocument.addEventListener(\"DOMContentLoaded\", async function () {\n  const defaultNewTabToggle = document.getElementById(\"defaultNewTabToggle\");\n  const openSidePanelButton = document.getElementById(\"openSidePanel\");\n  const openOptionsButton = document.getElementById(\"openOptions\");\n\n  async function loadSetting() {\n    const result = await chrome.storage.local.get({\n      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: false,\n    });\n    if (defaultNewTabToggle) {\n      defaultNewTabToggle.checked =\n        result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];\n    }\n  }\n\n  async function toggleSetting() {\n    const currentValue = defaultNewTabToggle.checked;\n    await chrome.storage.local.set({\n      [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: currentValue,\n    });\n  }\n\n  async function openSidePanel() {\n    try {\n      const [tab] = await chrome.tabs.query({\n        active: true,\n        currentWindow: true,\n      });\n      if (tab && chrome.sidePanel) {\n        await chrome.sidePanel.open({ tabId: tab.id });\n        window.close();\n      }\n    } catch (error) {\n      console.error(\"Error opening side panel:\", error);\n    }\n  }\n\n  function openOptions() {\n    chrome.runtime.openOptionsPage();\n    window.close();\n  }\n\n  await loadSetting();\n\n  if (defaultNewTabToggle) {\n    defaultNewTabToggle.addEventListener(\"change\", toggleSetting);\n  }\n\n  if (openSidePanelButton) {\n    openSidePanelButton.addEventListener(\"click\", openSidePanel);\n  }\n\n  if (openOptionsButton) {\n    openOptionsButton.addEventListener(\"click\", openOptions);\n  }\n});\n"
  },
  {
    "path": "extensions/chrome/src/pages/welcome.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <title>Welcome to Onyx</title>\n    <link rel=\"preconnect\" href=\"https://fonts.googleapis.com\" />\n    <link rel=\"preconnect\" href=\"https://fonts.gstatic.com\" crossorigin />\n    <link\n      href=\"https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&display=swap\"\n      rel=\"stylesheet\"\n    />\n    <link rel=\"stylesheet\" href=\"../styles/shared.css\" />\n    <style>\n      :root {\n        --background-900: #0a0a0a;\n        --background-800: #1a1a1a;\n        --text-light-05: rgba(255, 255, 255, 0.95);\n        --text-light-03: rgba(255, 255, 255, 0.6);\n        --white-10: rgba(255, 255, 255, 0.1);\n        --white-15: rgba(255, 255, 255, 0.15);\n        --white-20: rgba(255, 255, 255, 0.2);\n        --white-30: rgba(255, 255, 255, 0.3);\n        --white-40: rgba(255, 255, 255, 0.4);\n        --white-80: rgba(255, 255, 255, 0.8);\n        --black-40: rgba(0, 0, 0, 0.4);\n      }\n\n      * {\n        box-sizing: border-box;\n      }\n\n      body {\n        margin: 0;\n        padding: 0;\n        font-family: var(--font-hanken-grotesk);\n        background: linear-gradient(\n          135deg,\n          var(--background-900) 0%,\n          var(--background-800) 100%\n        );\n        min-height: 100vh;\n        color: var(--text-light-05);\n        transition: background 0.3s ease;\n        display: flex;\n        align-items: center;\n        justify-content: center;\n      }\n\n      body.light-theme {\n        --background-900: #f5f5f5;\n        --background-800: #ffffff;\n        --text-light-05: rgba(0, 0, 0, 0.95);\n        --text-light-03: rgba(0, 0, 0, 0.6);\n        background: linear-gradient(135deg, #f5f5f5 0%, #ffffff 100%);\n      }\n\n      body.light-theme .welcome-panel {\n        background: linear-gradient(\n          to bottom,\n          rgba(255, 255, 255, 0.95),\n          rgba(245, 245, 245, 0.95)\n        );\n        border: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .welcome-header {\n        border-bottom: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .logo-container {\n        background: rgba(0, 0, 0, 0.05);\n      }\n\n      body.light-theme .theme-toggle {\n        background: rgba(0, 0, 0, 0.05);\n        border: 1px solid rgba(0, 0, 0, 0.1);\n      }\n\n      body.light-theme .theme-toggle:hover {\n        background: rgba(0, 0, 0, 0.08);\n      }\n\n      body.light-theme .theme-toggle svg {\n        stroke: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .input-field {\n        border: 1px solid rgba(0, 0, 0, 0.1);\n        background: rgba(0, 0, 0, 0.05);\n        color: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .input-field:focus {\n        outline: none;\n        border-color: rgba(0, 0, 0, 0.25);\n        background: rgba(0, 0, 0, 0.08);\n        box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.05);\n      }\n\n      body.light-theme .input-field::placeholder {\n        color: rgba(0, 0, 0, 0.4);\n      }\n\n      body.light-theme .toggle-slider {\n        background-color: rgba(0, 0, 0, 0.15);\n      }\n\n      body.light-theme input:checked + .toggle-slider {\n        background-color: rgba(0, 0, 0, 0.3);\n      }\n\n      body.light-theme .toggle-slider:before {\n        background-color: white;\n        box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2);\n      }\n\n      body.light-theme .step-dot {\n        background: rgba(0, 0, 0, 0.2);\n      }\n\n      body.light-theme .step-dot.active {\n        background: rgba(0, 0, 0, 0.6);\n      }\n\n      body.light-theme .btn-primary {\n        background: rgba(0, 0, 0, 0.9);\n        color: white;\n      }\n\n      body.light-theme .btn-primary:hover {\n        background: rgba(0, 0, 0, 0.8);\n      }\n\n      body.light-theme .btn-secondary {\n        background: rgba(0, 0, 0, 0.05);\n        color: rgba(0, 0, 0, 0.95);\n      }\n\n      body.light-theme .btn-secondary:hover {\n        background: rgba(0, 0, 0, 0.08);\n      }\n\n      body.light-theme .settings-group {\n        background: rgba(0, 0, 0, 0.03);\n      }\n\n      body.light-theme .setting-divider {\n        background: rgba(0, 0, 0, 0.1);\n      }\n\n      .welcome-container {\n        max-width: 480px;\n        width: 100%;\n        margin: 0 auto;\n        padding: 40px 20px;\n      }\n\n      .welcome-panel {\n        background: linear-gradient(\n          to bottom,\n          rgba(10, 10, 10, 0.95),\n          rgba(26, 26, 26, 0.95)\n        );\n        backdrop-filter: blur(24px);\n        border-radius: 20px;\n        border: 1px solid var(--white-10);\n        overflow: hidden;\n        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);\n        animation: panelFadeIn 0.5s ease-out;\n      }\n\n      @keyframes panelFadeIn {\n        from {\n          opacity: 0;\n          transform: translateY(20px);\n        }\n        to {\n          opacity: 1;\n          transform: translateY(0);\n        }\n      }\n\n      .welcome-header {\n        padding: 24px;\n        border-bottom: 1px solid var(--white-10);\n        display: flex;\n        align-items: center;\n        justify-content: space-between;\n      }\n\n      .header-left {\n        display: flex;\n        align-items: center;\n        gap: 14px;\n      }\n\n      .logo-container {\n        width: 48px;\n        height: 48px;\n        border-radius: 14px;\n        background: white;\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        overflow: hidden;\n      }\n\n      .logo-container img {\n        width: 100%;\n        height: 100%;\n        object-fit: contain;\n        padding: 8px;\n      }\n\n      .welcome-title {\n        font-size: 22px;\n        font-weight: 600;\n        color: var(--text-light-05);\n        margin: 0;\n      }\n\n      .theme-toggle {\n        display: flex;\n        align-items: center;\n        gap: 8px;\n        padding: 8px 12px;\n        border-radius: 999px;\n        background: var(--white-10);\n        border: 1px solid var(--white-10);\n        cursor: pointer;\n        transition: all 0.2s;\n      }\n\n      .theme-toggle:hover {\n        background: var(--white-15);\n      }\n\n      .theme-toggle svg {\n        width: 18px;\n        height: 18px;\n        stroke: var(--text-light-05);\n      }\n\n      .welcome-content {\n        padding: 32px 24px;\n      }\n\n      /* Step indicator */\n      .step-indicator {\n        display: flex;\n        justify-content: center;\n        gap: 8px;\n        margin-bottom: 32px;\n      }\n\n      .step-dot {\n        width: 8px;\n        height: 8px;\n        border-radius: 50%;\n        background: var(--white-20);\n        transition: all 0.3s ease;\n      }\n\n      .step-dot.active {\n        background: var(--white-80);\n        transform: scale(1.2);\n      }\n\n      /* Steps */\n      .step {\n        display: none;\n        animation: stepFadeIn 0.4s ease-out;\n      }\n\n      .step.active {\n        display: block;\n      }\n\n      @keyframes stepFadeIn {\n        from {\n          opacity: 0;\n          transform: translateX(20px);\n        }\n        to {\n          opacity: 1;\n          transform: translateX(0);\n        }\n      }\n\n      .step-title {\n        font-size: 24px;\n        font-weight: 600;\n        margin: 0 0 8px 0;\n        text-align: center;\n      }\n\n      .step-description {\n        font-size: 15px;\n        color: var(--text-light-03);\n        text-align: center;\n        margin: 0 0 28px 0;\n        line-height: 1.5;\n      }\n\n      /* Form elements */\n      .input-group {\n        margin-bottom: 24px;\n      }\n\n      .input-label {\n        display: block;\n        font-size: 13px;\n        font-weight: 500;\n        color: var(--text-light-03);\n        margin-bottom: 8px;\n        text-transform: uppercase;\n        letter-spacing: 0.03em;\n      }\n\n      .input-field {\n        width: 100%;\n        padding: 14px 16px;\n        border: 1px solid var(--white-10);\n        border-radius: 12px;\n        font-size: 15px;\n        background: rgba(255, 255, 255, 0.95);\n        color: rgba(0, 0, 0, 0.9);\n        font-family: var(--font-hanken-grotesk);\n        transition: all 0.2s;\n      }\n\n      .input-field:focus {\n        outline: none;\n        border-color: var(--white-30);\n        background: rgba(255, 255, 255, 1);\n        box-shadow: 0 0 0 3px rgba(255, 255, 255, 0.15);\n      }\n\n      .input-field::placeholder {\n        color: rgba(0, 0, 0, 0.4);\n      }\n\n      /* Settings group for step 2 */\n      .settings-group {\n        background: rgba(255, 255, 255, 0.05);\n        border-radius: 16px;\n        padding: 4px;\n        margin-bottom: 24px;\n      }\n\n      .setting-row {\n        display: flex;\n        justify-content: space-between;\n        align-items: center;\n        padding: 16px;\n      }\n\n      .setting-content {\n        display: flex;\n        flex-direction: column;\n        gap: 4px;\n        flex: 1;\n        padding-right: 16px;\n      }\n\n      .setting-label {\n        font-size: 15px;\n        font-weight: 500;\n        color: var(--text-light-05);\n      }\n\n      .setting-description {\n        font-size: 13px;\n        color: var(--text-light-03);\n        line-height: 1.4;\n      }\n\n      .setting-divider {\n        height: 1px;\n        background: var(--white-10);\n        margin: 0 8px;\n      }\n\n      /* Toggle switch */\n      .toggle-switch {\n        position: relative;\n        display: inline-block;\n        width: 52px;\n        height: 28px;\n        flex-shrink: 0;\n      }\n\n      .toggle-switch input {\n        opacity: 0;\n        width: 0;\n        height: 0;\n      }\n\n      .toggle-slider {\n        position: absolute;\n        cursor: pointer;\n        top: 0;\n        left: 0;\n        right: 0;\n        bottom: 0;\n        background-color: rgba(255, 255, 255, 0.2);\n        transition: 0.3s;\n        border-radius: 28px;\n      }\n\n      .toggle-slider:before {\n        position: absolute;\n        content: \"\";\n        height: 22px;\n        width: 22px;\n        left: 3px;\n        bottom: 3px;\n        background-color: white;\n        transition: 0.3s;\n        border-radius: 50%;\n      }\n\n      input:checked + .toggle-slider {\n        background-color: rgba(255, 255, 255, 0.4);\n      }\n\n      input:checked + .toggle-slider:before {\n        transform: translateX(24px);\n      }\n\n      /* Buttons */\n      .button-group {\n        display: flex;\n        gap: 12px;\n        margin-top: 8px;\n      }\n\n      .btn {\n        flex: 1;\n        padding: 14px 24px;\n        border-radius: 12px;\n        border: none;\n        cursor: pointer;\n        font-size: 15px;\n        font-weight: 500;\n        font-family: var(--font-hanken-grotesk);\n        transition: all 0.2s;\n      }\n\n      .btn-primary {\n        background: rgba(255, 255, 255, 0.95);\n        color: #0a0a0a;\n      }\n\n      .btn-primary:hover {\n        background: rgba(255, 255, 255, 0.85);\n        transform: translateY(-1px);\n      }\n\n      .btn-secondary {\n        background: var(--white-10);\n        color: var(--text-light-05);\n      }\n\n      .btn-secondary:hover {\n        background: var(--white-15);\n      }\n\n      .btn:active {\n        transform: translateY(0);\n      }\n\n      /* Success animation for completion */\n      .success-icon {\n        width: 64px;\n        height: 64px;\n        margin: 0 auto 24px;\n        border-radius: 50%;\n        background: rgba(255, 255, 255, 0.1);\n        display: flex;\n        align-items: center;\n        justify-content: center;\n        animation: successPop 0.5s ease-out;\n      }\n\n      .success-icon svg {\n        width: 32px;\n        height: 32px;\n        stroke: var(--text-light-05);\n        stroke-width: 2.5;\n      }\n\n      @keyframes successPop {\n        0% {\n          transform: scale(0);\n          opacity: 0;\n        }\n        50% {\n          transform: scale(1.1);\n        }\n        100% {\n          transform: scale(1);\n          opacity: 1;\n        }\n      }\n\n      @media (max-width: 500px) {\n        .welcome-container {\n          padding: 20px 16px;\n        }\n\n        .welcome-content {\n          padding: 24px 20px;\n        }\n\n        .step-title {\n          font-size: 20px;\n        }\n\n        .button-group {\n          flex-direction: column;\n        }\n      }\n    </style>\n  </head>\n\n  <body>\n    <div class=\"welcome-container\">\n      <div class=\"welcome-panel\">\n        <div class=\"welcome-header\">\n          <div class=\"header-left\">\n            <div class=\"logo-container\">\n              <img src=\"../../public/icon48.png\" alt=\"Onyx\" />\n            </div>\n            <h1 class=\"welcome-title\">Onyx</h1>\n          </div>\n          <button\n            class=\"theme-toggle\"\n            id=\"themeToggle\"\n            aria-label=\"Toggle theme\"\n          >\n            <svg\n              id=\"themeIcon\"\n              viewBox=\"0 0 24 24\"\n              fill=\"none\"\n              stroke=\"currentColor\"\n            >\n              <path d=\"M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z\"></path>\n            </svg>\n          </button>\n        </div>\n\n        <div class=\"welcome-content\">\n          <div class=\"step-indicator\">\n            <div class=\"step-dot active\" data-step=\"1\"></div>\n            <div class=\"step-dot\" data-step=\"2\"></div>\n          </div>\n\n          <!-- Step 1: Root Domain -->\n          <div class=\"step active\" id=\"step1\">\n            <h2 class=\"step-title\">Welcome to Onyx</h2>\n            <p class=\"step-description\">\n              Enter your Onyx instance URL to get started. This is where your\n              Onyx deployment is hosted.\n            </p>\n\n            <div class=\"input-group\">\n              <label class=\"input-label\" for=\"onyxDomain\">Root Domain</label>\n              <input\n                type=\"text\"\n                id=\"onyxDomain\"\n                class=\"input-field\"\n                placeholder=\"https://cloud.onyx.app\"\n              />\n            </div>\n\n            <div class=\"button-group\">\n              <button class=\"btn btn-primary\" id=\"continueBtn\">Continue</button>\n            </div>\n          </div>\n\n          <!-- Step 2: New Tab Setting -->\n          <div class=\"step\" id=\"step2\">\n            <h2 class=\"step-title\">Customize Your Experience</h2>\n            <p class=\"step-description\">\n              Set Onyx as your new tab page for quick access to your AI\n              assistant.\n            </p>\n\n            <div class=\"settings-group\">\n              <div class=\"setting-row\">\n                <div class=\"setting-content\">\n                  <span class=\"setting-label\">Use Onyx as new tab page</span>\n                  <span class=\"setting-description\"\n                    >Open Onyx every time you create a new tab</span\n                  >\n                </div>\n                <label class=\"toggle-switch\">\n                  <input type=\"checkbox\" id=\"useOnyxAsDefault\" checked />\n                  <span class=\"toggle-slider\"></span>\n                </label>\n              </div>\n            </div>\n\n            <div class=\"button-group\">\n              <button class=\"btn btn-secondary\" id=\"backBtn\">Back</button>\n              <button class=\"btn btn-primary\" id=\"finishBtn\">\n                Get Started\n              </button>\n            </div>\n          </div>\n        </div>\n      </div>\n    </div>\n    <script type=\"module\" src=\"welcome.js\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "extensions/chrome/src/pages/welcome.js",
    "content": "import {\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  DEFAULT_ONYX_DOMAIN,\n} from \"../utils/constants.js\";\n\ndocument.addEventListener(\"DOMContentLoaded\", function () {\n  const domainInput = document.getElementById(\"onyxDomain\");\n  const useOnyxAsDefaultToggle = document.getElementById(\"useOnyxAsDefault\");\n  const continueBtn = document.getElementById(\"continueBtn\");\n  const backBtn = document.getElementById(\"backBtn\");\n  const finishBtn = document.getElementById(\"finishBtn\");\n  const themeToggle = document.getElementById(\"themeToggle\");\n  const themeIcon = document.getElementById(\"themeIcon\");\n\n  const step1 = document.getElementById(\"step1\");\n  const step2 = document.getElementById(\"step2\");\n  const stepDots = document.querySelectorAll(\".step-dot\");\n\n  let currentStep = 1;\n  let currentTheme = \"dark\";\n\n  // Initialize theme based on system preference or stored value\n  function initTheme() {\n    chrome.storage.local.get(\n      { [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: null },\n      (result) => {\n        const storedTheme = result[CHROME_SPECIFIC_STORAGE_KEYS.THEME];\n        if (storedTheme) {\n          currentTheme = storedTheme;\n        } else {\n          // Check system preference\n          currentTheme = window.matchMedia(\"(prefers-color-scheme: light)\")\n            .matches\n            ? \"light\"\n            : \"dark\";\n        }\n        applyTheme();\n      },\n    );\n  }\n\n  function applyTheme() {\n    document.body.className = currentTheme === \"light\" ? \"light-theme\" : \"\";\n    updateThemeIcon();\n  }\n\n  function updateThemeIcon() {\n    if (!themeIcon) return;\n\n    if (currentTheme === \"light\") {\n      themeIcon.innerHTML = `\n        <circle cx=\"12\" cy=\"12\" r=\"4\"></circle>\n        <path d=\"M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M4.93 19.07l1.41-1.41M17.66 6.34l1.41-1.41\"></path>\n      `;\n    } else {\n      themeIcon.innerHTML = `\n        <path d=\"M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z\"></path>\n      `;\n    }\n  }\n\n  function toggleTheme() {\n    currentTheme = currentTheme === \"light\" ? \"dark\" : \"light\";\n    applyTheme();\n    chrome.storage.local.set({\n      [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,\n    });\n  }\n\n  function goToStep(step) {\n    if (step === 1) {\n      step2.classList.remove(\"active\");\n      setTimeout(() => {\n        step1.classList.add(\"active\");\n      }, 50);\n    } else if (step === 2) {\n      step1.classList.remove(\"active\");\n      setTimeout(() => {\n        step2.classList.add(\"active\");\n      }, 50);\n    }\n\n    stepDots.forEach((dot) => {\n      const dotStep = parseInt(dot.dataset.step);\n      if (dotStep === step) {\n        dot.classList.add(\"active\");\n      } else {\n        dot.classList.remove(\"active\");\n      }\n    });\n\n    currentStep = step;\n  }\n\n  // Validate domain input\n  function validateDomain(domain) {\n    if (!domain) return false;\n    try {\n      new URL(domain);\n      return true;\n    } catch {\n      return false;\n    }\n  }\n\n  function handleContinue() {\n    const domain = domainInput.value.trim();\n\n    if (domain && !validateDomain(domain)) {\n      domainInput.style.borderColor = \"rgba(255, 100, 100, 0.5)\";\n      domainInput.focus();\n      return;\n    }\n\n    domainInput.style.borderColor = \"\";\n    goToStep(2);\n  }\n\n  function handleBack() {\n    goToStep(1);\n  }\n\n  function handleFinish() {\n    const domain = domainInput.value.trim() || DEFAULT_ONYX_DOMAIN;\n    const useOnyxAsDefault = useOnyxAsDefaultToggle.checked;\n\n    chrome.storage.local.set(\n      {\n        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain,\n        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]:\n          useOnyxAsDefault,\n        [CHROME_SPECIFIC_STORAGE_KEYS.THEME]: currentTheme,\n        [CHROME_SPECIFIC_STORAGE_KEYS.ONBOARDING_COMPLETE]: true,\n      },\n      () => {\n        // Open a new tab if they enabled the new tab feature, otherwise just close\n        if (useOnyxAsDefault) {\n          chrome.tabs.create({}, () => {\n            window.close();\n          });\n        } else {\n          window.close();\n        }\n      },\n    );\n  }\n\n  // Load any existing values (in case user returns to this page)\n  function loadStoredValues() {\n    chrome.storage.local.get(\n      {\n        [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: \"\",\n        [CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB]: true,\n      },\n      (result) => {\n        if (result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]) {\n          domainInput.value = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n        }\n        useOnyxAsDefaultToggle.checked =\n          result[CHROME_SPECIFIC_STORAGE_KEYS.USE_ONYX_AS_DEFAULT_NEW_TAB];\n      },\n    );\n  }\n\n  if (themeToggle) {\n    themeToggle.addEventListener(\"click\", toggleTheme);\n  }\n\n  if (continueBtn) {\n    continueBtn.addEventListener(\"click\", handleContinue);\n  }\n\n  if (backBtn) {\n    backBtn.addEventListener(\"click\", handleBack);\n  }\n\n  if (finishBtn) {\n    finishBtn.addEventListener(\"click\", handleFinish);\n  }\n\n  // Allow Enter key to proceed\n  if (domainInput) {\n    domainInput.addEventListener(\"keydown\", (e) => {\n      if (e.key === \"Enter\") {\n        handleContinue();\n      }\n    });\n  }\n\n  initTheme();\n  loadStoredValues();\n});\n"
  },
  {
    "path": "extensions/chrome/src/styles/selection-icon.css",
    "content": "#onyx-selection-icon {\n  position: fixed;\n  z-index: 2147483647;\n  width: 32px;\n  height: 32px;\n  border-radius: 50%;\n  background-color: #ffffff;\n  border: 1px solid #e0e0e0;\n  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);\n  cursor: pointer;\n  display: flex;\n  align-items: center;\n  justify-content: center;\n  opacity: 0;\n  transform: scale(0.8);\n  transition:\n    opacity 0.15s ease,\n    transform 0.15s ease,\n    box-shadow 0.15s ease;\n  pointer-events: none;\n}\n\n#onyx-selection-icon.visible {\n  opacity: 1;\n  transform: scale(1);\n  pointer-events: auto;\n}\n\n#onyx-selection-icon:hover {\n  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);\n  transform: scale(1.1);\n}\n\n#onyx-selection-icon:active {\n  transform: scale(0.95);\n}\n\n#onyx-selection-icon img {\n  width: 20px;\n  height: 20px;\n  pointer-events: none;\n}\n"
  },
  {
    "path": "extensions/chrome/src/styles/shared.css",
    "content": "/* Import Hanken Grotesk font */\n@import url(\"https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&display=swap\");\n\n:root {\n  --primary-color: #4285f4;\n  --primary-hover-color: #3367d6;\n  --secondary-color: #f1f3f4;\n  --secondary-hover-color: #e8eaed;\n  --text-color: #333;\n  --text-light-color: #666;\n  --background-color: #f1f3f4;\n  --card-background-color: #fff;\n  --border-color: #ccc;\n  --font-family: Arial, sans-serif;\n  --font-hanken-grotesk: \"Hanken Grotesk\", sans-serif;\n}\n\nbody {\n  font-family: var(--font-hanken-grotesk);\n  margin: 0;\n  padding: 0;\n}\n\n.container {\n  max-width: 500px;\n  width: 90%;\n  margin: 0 auto;\n}\n\n.card {\n  background-color: var(--card-background-color);\n  padding: 25px;\n  border-radius: 10px;\n  box-shadow: 0 3px 5px rgba(0, 0, 0, 0.1);\n}\n\nh1 {\n  color: var(--text-color);\n  font-size: 24px;\n  font-weight: 600;\n  margin-top: 0;\n  margin-bottom: 20px;\n}\n\n.option-group {\n  margin-bottom: 20px;\n}\n\nlabel {\n  display: block;\n  margin-bottom: 5px;\n  color: var(--text-light-color);\n  font-weight: 400;\n  font-size: 16px;\n}\n\ninput[type=\"text\"] {\n  width: 100%;\n  padding: 8px;\n  border: 1px solid var(--border-color);\n  border-radius: 4px;\n  font-size: 14px;\n  background-color: var(--card-background-color);\n  color: var(--text-color);\n}\n\n.button {\n  width: 100%;\n  padding: 10px 20px;\n  border-radius: 5px;\n  border: none;\n  cursor: pointer;\n  font-size: 16px;\n  font-weight: 500;\n  transition: background-color 0.3s;\n}\n\n.button.primary {\n  background-color: var(--primary-color);\n  color: #fff;\n}\n\n.button.primary:hover {\n  background-color: var(--primary-hover-color);\n}\n\n.button.secondary {\n  background-color: var(--secondary-color);\n  color: var(--text-color);\n}\n\n.button.secondary:hover {\n  background-color: var(--secondary-hover-color);\n}\n\n.status-container {\n  margin-top: 10px;\n  margin-bottom: 15px;\n}\n\n.status-message {\n  margin: 0 0 10px 0;\n  color: var(--text-color);\n  font-weight: 500;\n  text-align: center;\n  font-size: 16px;\n  transition: opacity 0.5s ease-in-out;\n}\n\nkbd {\n  background-color: var(--secondary-color);\n  border: 1px solid var(--border-color);\n  border-radius: 3px;\n  padding: 2px 5px;\n  font-family: monospace;\n  font-weight: 500;\n  color: var(--text-color);\n}\n\n.toggle-label {\n  display: flex;\n  justify-content: space-between;\n  align-items: center;\n}\n\n.toggle-switch {\n  position: relative;\n  display: inline-block;\n  width: 50px;\n  height: 24px;\n}\n\n.toggle-switch input {\n  opacity: 0;\n  width: 0;\n  height: 0;\n}\n\n.slider {\n  position: absolute;\n  cursor: pointer;\n  top: 0;\n  left: 0;\n  right: 0;\n  bottom: 0;\n  background-color: var(--secondary-color);\n  transition: 0.4s;\n  border-radius: 24px;\n}\n\n.slider:before {\n  position: absolute;\n  content: \"\";\n  height: 20px;\n  width: 20px;\n  left: 2px;\n  bottom: 2px;\n  background-color: white;\n  transition: 0.4s;\n  border-radius: 50%;\n}\n\ninput:checked + .slider {\n  background-color: var(--primary-color);\n}\n\ninput:checked + .slider:before {\n  transform: translateX(26px);\n}\n"
  },
  {
    "path": "extensions/chrome/src/utils/constants.js",
    "content": "export const THEMES = {\n  LIGHT: \"light\",\n  DARK: \"dark\",\n};\n\nexport const DEFAULT_ONYX_DOMAIN = \"http://localhost:3000\";\n\nexport const SIDE_PANEL_PATH = \"/nrf/side-panel\";\n\nexport const ACTIONS = {\n  GET_SELECTED_TEXT: \"getSelectedText\",\n  GET_CURRENT_ONYX_DOMAIN: \"getCurrentOnyxDomain\",\n  UPDATE_PAGE_URL: \"updatePageUrl\",\n  SEND_TO_ONYX: \"sendToOnyx\",\n  OPEN_SIDE_PANEL: \"openSidePanel\",\n  TOGGLE_NEW_TAB_OVERRIDE: \"toggleNewTabOverride\",\n  OPEN_SIDE_PANEL_WITH_INPUT: \"openSidePanelWithInput\",\n  OPEN_ONYX_WITH_INPUT: \"openOnyxWithInput\",\n  CLOSE_SIDE_PANEL: \"closeSidePanel\",\n  TAB_URL_UPDATED: \"tabUrlUpdated\",\n  TAB_READING_ENABLED: \"tabReadingEnabled\",\n  TAB_READING_DISABLED: \"tabReadingDisabled\",\n};\n\nexport const CHROME_SPECIFIC_STORAGE_KEYS = {\n  ONYX_DOMAIN: \"onyxExtensionDomain\",\n  USE_ONYX_AS_DEFAULT_NEW_TAB: \"onyxExtensionDefaultNewTab\",\n  THEME: \"onyxExtensionTheme\",\n  BACKGROUND_IMAGE: \"onyxExtensionBackgroundImage\",\n  DARK_BG_URL: \"onyxExtensionDarkBgUrl\",\n  LIGHT_BG_URL: \"onyxExtensionLightBgUrl\",\n  ONBOARDING_COMPLETE: \"onyxExtensionOnboardingComplete\",\n};\n\nexport const CHROME_MESSAGE = {\n  PREFERENCES_UPDATED: \"PREFERENCES_UPDATED\",\n  ONYX_APP_LOADED: \"ONYX_APP_LOADED\",\n  SET_DEFAULT_NEW_TAB: \"SET_DEFAULT_NEW_TAB\",\n  LOAD_NEW_CHAT_PAGE: \"LOAD_NEW_CHAT_PAGE\",\n  LOAD_NEW_PAGE: \"LOAD_NEW_PAGE\",\n  AUTH_REQUIRED: \"AUTH_REQUIRED\",\n  TAB_READING_ENABLED: \"TAB_READING_ENABLED\",\n  TAB_READING_DISABLED: \"TAB_READING_DISABLED\",\n  TAB_URL_UPDATED: \"TAB_URL_UPDATED\",\n};\n\nexport const WEB_MESSAGE = {\n  PAGE_CHANGE: \"PAGE_CHANGE\",\n};\n"
  },
  {
    "path": "extensions/chrome/src/utils/content.js",
    "content": "let sidePanel = null;\n\nfunction createSidePanel() {\n  sidePanel = document.createElement(\"div\");\n  sidePanel.id = \"onyx-side-panel\";\n  sidePanel.style.cssText = `\n    position: fixed;\n    top: 0;\n    right: -400px;\n    width: 400px;\n    height: 100%;\n    background-color: white;\n    box-shadow: -2px 0 5px rgba(0,0,0,0.2);\n    transition: right 0.3s ease-in-out;\n    z-index: 9999;\n  `;\n\n  const iframe = document.createElement(\"iframe\");\n  iframe.style.cssText = `\n    width: 100%;\n    height: 100%;\n    border: none;\n  `;\n\n  chrome.runtime.sendMessage(\n    { action: ACTIONS.GET_CURRENT_ONYX_DOMAIN },\n    function (response) {\n      iframe.src = response[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n    },\n  );\n\n  sidePanel.appendChild(iframe);\n  document.body.appendChild(sidePanel);\n}\n"
  },
  {
    "path": "extensions/chrome/src/utils/error-modal.js",
    "content": "import {\n  CHROME_SPECIFIC_STORAGE_KEYS,\n  DEFAULT_ONYX_DOMAIN,\n  ACTIONS,\n} from \"./constants.js\";\n\nconst errorModalHTML = `\n  <div id=\"error-modal\">\n    <div class=\"modal-backdrop\"></div>\n    <div class=\"modal-content\">\n      <div class=\"modal-header\">\n        <div class=\"modal-icon\">\n          <svg viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\">\n            <circle cx=\"12\" cy=\"12\" r=\"10\"></circle>\n            <line x1=\"12\" y1=\"8\" x2=\"12\" y2=\"12\"></line>\n            <line x1=\"12\" y1=\"16\" x2=\"12.01\" y2=\"16\"></line>\n          </svg>\n        </div>\n        <h2>Configuration Error</h2>\n      </div>\n      <div class=\"modal-body\">\n        <p class=\"modal-description\">The Onyx configuration needs to be updated. Please check your settings or contact your Onyx administrator.</p>\n        <div class=\"url-display\">\n          <span class=\"url-label\">Attempted to load:</span>\n          <span id=\"attempted-url\" class=\"url-value\"></span>\n        </div>\n      </div>\n      <div class=\"modal-footer\">\n        <div class=\"button-container\">\n          <button id=\"open-options\" class=\"button primary\">Open Extension Options</button>\n          <button id=\"disable-override\" class=\"button secondary\">Disable New Tab Override</button>\n        </div>\n      </div>\n    </div>\n  </div>\n`;\n\nconst style = document.createElement(\"style\");\nstyle.textContent = `\n  :root {\n    --background-900: #0a0a0a;\n    --background-800: #1a1a1a;\n    --text-light-05: rgba(255, 255, 255, 0.95);\n    --text-light-03: rgba(255, 255, 255, 0.6);\n    --white-10: rgba(255, 255, 255, 0.1);\n    --white-15: rgba(255, 255, 255, 0.15);\n    --white-20: rgba(255, 255, 255, 0.2);\n    --white-30: rgba(255, 255, 255, 0.3);\n  }\n\n  #error-modal {\n    position: fixed;\n    top: 0;\n    left: 0;\n    width: 100%;\n    height: 100%;\n    display: none;\n    align-items: center;\n    justify-content: center;\n    z-index: 2000;\n    font-family: var(--font-hanken-grotesk), 'Hanken Grotesk', sans-serif;\n  }\n\n  #error-modal .modal-backdrop {\n    position: absolute;\n    top: 0;\n    left: 0;\n    width: 100%;\n    height: 100%;\n    background: rgba(0, 0, 0, 0.7);\n    backdrop-filter: blur(8px);\n  }\n\n  #error-modal .modal-content {\n    position: relative;\n    background: linear-gradient(to bottom, rgba(10, 10, 10, 0.95), rgba(26, 26, 26, 0.95));\n    backdrop-filter: blur(24px);\n    border-radius: 16px;\n    border: 1px solid var(--white-10);\n    max-width: 95%;\n    width: 500px;\n    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);\n    overflow: hidden;\n  }\n\n  #error-modal .modal-header {\n    padding: 24px;\n    border-bottom: 1px solid var(--white-10);\n    display: flex;\n    align-items: center;\n    gap: 12px;\n  }\n\n  #error-modal .modal-icon {\n    width: 40px;\n    height: 40px;\n    border-radius: 12px;\n    background: rgba(255, 87, 87, 0.15);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    flex-shrink: 0;\n  }\n\n  #error-modal .modal-icon svg {\n    width: 24px;\n    height: 24px;\n    stroke: #ff5757;\n  }\n\n  #error-modal .modal-icon.auth-icon {\n    background: rgba(66, 133, 244, 0.15);\n  }\n\n  #error-modal .modal-icon.auth-icon svg {\n    stroke: #4285f4;\n  }\n\n  #error-modal h2 {\n    margin: 0;\n    color: var(--text-light-05);\n    font-size: 20px;\n    font-weight: 600;\n  }\n\n  #error-modal .modal-body {\n    padding: 24px;\n  }\n\n  #error-modal .modal-description {\n    color: var(--text-light-05);\n    margin: 0 0 20px 0;\n    font-size: 14px;\n    line-height: 1.6;\n    font-weight: 400;\n  }\n\n  #error-modal .url-display {\n    background: rgba(255, 255, 255, 0.05);\n    border-radius: 8px;\n    padding: 12px;\n    border: 1px solid var(--white-10);\n  }\n\n  #error-modal .url-label {\n    display: block;\n    font-size: 12px;\n    color: var(--text-light-03);\n    margin-bottom: 6px;\n    font-weight: 500;\n    text-transform: uppercase;\n    letter-spacing: 0.05em;\n  }\n\n  #error-modal .url-value {\n    display: block;\n    font-size: 13px;\n    color: var(--text-light-05);\n    word-break: break-all;\n    font-family: monospace;\n    line-height: 1.5;\n  }\n\n  #error-modal .modal-footer {\n    padding: 0 24px 24px 24px;\n  }\n\n  #error-modal .button-container {\n    display: flex;\n    flex-direction: column;\n    gap: 10px;\n    margin-bottom: 16px;\n  }\n\n  #error-modal .button {\n    padding: 12px 20px;\n    border-radius: 8px;\n    border: none;\n    cursor: pointer;\n    font-size: 14px;\n    font-weight: 500;\n    transition: all 0.2s;\n    font-family: var(--font-hanken-grotesk), 'Hanken Grotesk', sans-serif;\n  }\n\n  #error-modal .button.primary {\n    background: rgba(255, 255, 255, 0.15);\n    color: var(--text-light-05);\n    border: 1px solid var(--white-10);\n  }\n\n  #error-modal .button.primary:hover {\n    background: rgba(255, 255, 255, 0.2);\n    border-color: var(--white-20);\n  }\n\n  #error-modal .button.secondary {\n    background: rgba(255, 255, 255, 0.05);\n    color: var(--text-light-05);\n    border: 1px solid var(--white-10);\n  }\n\n  #error-modal .button.secondary:hover {\n    background: rgba(255, 255, 255, 0.1);\n    border-color: var(--white-15);\n  }\n\n  #error-modal kbd {\n    background: rgba(255, 255, 255, 0.1);\n    border: 1px solid var(--white-10);\n    border-radius: 4px;\n    padding: 2px 6px;\n    font-family: monospace;\n    font-weight: 500;\n    color: var(--text-light-05);\n    font-size: 11px;\n  }\n\n  @media (min-width: 768px) {\n    #error-modal .button-container {\n      flex-direction: row;\n    }\n\n    #error-modal .button {\n      flex: 1;\n    }\n  }\n`;\n\nconst authModalHTML = `\n  <div id=\"error-modal\">\n    <div class=\"modal-backdrop\"></div>\n    <div class=\"modal-content\">\n      <div class=\"modal-header\">\n        <div class=\"modal-icon auth-icon\">\n          <svg viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\">\n            <rect x=\"3\" y=\"11\" width=\"18\" height=\"11\" rx=\"2\" ry=\"2\"></rect>\n            <path d=\"M7 11V7a5 5 0 0 1 10 0v4\"></path>\n          </svg>\n        </div>\n        <h2>Authentication Required</h2>\n      </div>\n      <div class=\"modal-body\">\n        <p class=\"modal-description\">You need to log in to access Onyx. Click the button below to authenticate.</p>\n      </div>\n      <div class=\"modal-footer\">\n        <div class=\"button-container\">\n          <button id=\"open-auth\" class=\"button primary\">Log In to Onyx</button>\n        </div>\n      </div>\n    </div>\n  </div>\n`;\n\nlet errorModal, attemptedUrlSpan, openOptionsButton, disableOverrideButton;\n\nlet authModal, openAuthButton;\n\nexport function initErrorModal() {\n  if (!document.getElementById(\"error-modal\")) {\n    const link = document.createElement(\"link\");\n    link.rel = \"stylesheet\";\n    link.href = \"../styles/shared.css\";\n    document.head.appendChild(link);\n\n    document.body.insertAdjacentHTML(\"beforeend\", errorModalHTML);\n    document.head.appendChild(style);\n\n    errorModal = document.getElementById(\"error-modal\");\n    authModal = document.getElementById(\"error-modal\");\n    attemptedUrlSpan = document.getElementById(\"attempted-url\");\n    openOptionsButton = document.getElementById(\"open-options\");\n    disableOverrideButton = document.getElementById(\"disable-override\");\n\n    openOptionsButton.addEventListener(\"click\", (e) => {\n      e.preventDefault();\n      chrome.runtime.openOptionsPage();\n    });\n\n    disableOverrideButton.addEventListener(\"click\", () => {\n      chrome.storage.local.set({ useOnyxAsDefaultNewTab: false }, () => {\n        chrome.tabs.update({ url: \"chrome://new-tab-page\" });\n      });\n    });\n  }\n}\n\nexport function showErrorModal(url) {\n  if (!errorModal) {\n    initErrorModal();\n  }\n  if (errorModal) {\n    errorModal.style.display = \"flex\";\n    errorModal.style.zIndex = \"9999\";\n    attemptedUrlSpan.textContent = url;\n    document.body.style.overflow = \"hidden\";\n  }\n}\n\nexport function hideErrorModal() {\n  if (errorModal) {\n    errorModal.style.display = \"none\";\n    document.body.style.overflow = \"auto\";\n  }\n}\n\nexport function checkModalVisibility() {\n  return errorModal\n    ? window.getComputedStyle(errorModal).display !== \"none\"\n    : false;\n}\n\nexport function initAuthModal() {\n  if (!document.getElementById(\"error-modal\")) {\n    const link = document.createElement(\"link\");\n    link.rel = \"stylesheet\";\n    link.href = \"../styles/shared.css\";\n    document.head.appendChild(link);\n\n    document.body.insertAdjacentHTML(\"beforeend\", authModalHTML);\n    document.head.appendChild(style);\n\n    authModal = document.getElementById(\"error-modal\");\n    openAuthButton = document.getElementById(\"open-auth\");\n\n    openAuthButton.addEventListener(\"click\", (e) => {\n      e.preventDefault();\n      chrome.storage.local.get(\n        { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN },\n        (result) => {\n          const onyxDomain = result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n          chrome.runtime.sendMessage(\n            { action: ACTIONS.CLOSE_SIDE_PANEL },\n            () => {\n              if (chrome.runtime.lastError) {\n                console.error(\n                  \"Error closing side panel:\",\n                  chrome.runtime.lastError,\n                );\n              }\n              chrome.tabs.create(\n                {\n                  url: `${onyxDomain}/auth/login`,\n                  active: true,\n                },\n                (_) => {\n                  if (chrome.runtime.lastError) {\n                    console.error(\n                      \"Error opening auth tab:\",\n                      chrome.runtime.lastError,\n                    );\n                  }\n                },\n              );\n            },\n          );\n        },\n      );\n    });\n  }\n}\n\nexport function showAuthModal() {\n  if (!authModal) {\n    initAuthModal();\n  }\n  if (authModal) {\n    authModal.style.display = \"flex\";\n    authModal.style.zIndex = \"9999\";\n    document.body.style.overflow = \"hidden\";\n  }\n}\n\nexport function hideAuthModal() {\n  if (authModal) {\n    authModal.style.display = \"none\";\n    document.body.style.overflow = \"auto\";\n  }\n}\n"
  },
  {
    "path": "extensions/chrome/src/utils/selection-icon.js",
    "content": "(function () {\n  const OPEN_SIDE_PANEL_WITH_INPUT = \"openSidePanelWithInput\";\n\n  let selectionIcon = null;\n  let currentSelectedText = \"\";\n\n  function createSelectionIcon() {\n    if (selectionIcon) return;\n\n    selectionIcon = document.createElement(\"div\");\n    selectionIcon.id = \"onyx-selection-icon\";\n\n    const img = document.createElement(\"img\");\n    img.src = chrome.runtime.getURL(\"public/icon32.png\");\n    img.alt = \"Search with Onyx\";\n\n    selectionIcon.appendChild(img);\n    document.body.appendChild(selectionIcon);\n\n    selectionIcon.addEventListener(\"mousedown\", handleIconClick);\n  }\n\n  function showIcon(text) {\n    if (!selectionIcon) {\n      createSelectionIcon();\n    }\n\n    currentSelectedText = text;\n\n    const selection = window.getSelection();\n    if (!selection.rangeCount) return;\n\n    const range = selection.getRangeAt(0);\n    const rect = range.getBoundingClientRect();\n\n    const iconSize = 32;\n    const offset = 4;\n\n    let posX = rect.right + offset;\n    let posY = rect.bottom + offset;\n\n    if (posX + iconSize > window.innerWidth) {\n      posX = rect.left - iconSize - offset;\n    }\n    if (posY + iconSize > window.innerHeight) {\n      posY = rect.top - iconSize - offset;\n    }\n\n    posX = Math.max(\n      offset,\n      Math.min(posX, window.innerWidth - iconSize - offset),\n    );\n    posY = Math.max(\n      offset,\n      Math.min(posY, window.innerHeight - iconSize - offset),\n    );\n\n    selectionIcon.style.left = `${posX}px`;\n    selectionIcon.style.top = `${posY}px`;\n    selectionIcon.classList.add(\"visible\");\n  }\n\n  function hideIcon() {\n    if (selectionIcon) {\n      selectionIcon.classList.remove(\"visible\");\n    }\n    currentSelectedText = \"\";\n  }\n\n  function handleIconClick(e) {\n    e.preventDefault();\n    e.stopPropagation();\n\n    const textToSend = currentSelectedText;\n\n    if (textToSend) {\n      chrome.runtime.sendMessage(\n        {\n          action: OPEN_SIDE_PANEL_WITH_INPUT,\n          selectedText: textToSend,\n          pageUrl: window.location.href,\n        },\n        (response) => {\n          if (chrome.runtime.lastError) {\n            console.error(\n              \"[Onyx] Error sending message:\",\n              chrome.runtime.lastError.message,\n            );\n          } else {\n          }\n        },\n      );\n    }\n\n    hideIcon();\n  }\n\n  document.addEventListener(\"mouseup\", (e) => {\n    if (\n      e.target.id === \"onyx-selection-icon\" ||\n      e.target.closest(\"#onyx-selection-icon\")\n    ) {\n      return;\n    }\n\n    setTimeout(() => {\n      const selection = window.getSelection();\n      const selectedText = selection.toString().trim();\n\n      if (selectedText && selectedText.length > 0) {\n        showIcon(selectedText);\n      } else {\n        hideIcon();\n      }\n    }, 10);\n  });\n\n  document.addEventListener(\"mousedown\", (e) => {\n    if (\n      e.target.id !== \"onyx-selection-icon\" &&\n      !e.target.closest(\"#onyx-selection-icon\")\n    ) {\n      const selection = window.getSelection();\n      const selectedText = selection.toString().trim();\n      if (!selectedText) {\n        hideIcon();\n      }\n    }\n  });\n\n  document.addEventListener(\n    \"scroll\",\n    () => {\n      hideIcon();\n    },\n    true,\n  );\n\n  document.addEventListener(\"selectionchange\", () => {\n    const selection = window.getSelection();\n    const selectedText = selection.toString().trim();\n    if (!selectedText) {\n      hideIcon();\n    }\n  });\n\n  if (document.readyState === \"loading\") {\n    document.addEventListener(\"DOMContentLoaded\", createSelectionIcon);\n  } else {\n    createSelectionIcon();\n  }\n})();\n"
  },
  {
    "path": "extensions/chrome/src/utils/storage.js",
    "content": "import {\n  DEFAULT_ONYX_DOMAIN,\n  CHROME_SPECIFIC_STORAGE_KEYS,\n} from \"./constants.js\";\n\nexport async function getOnyxDomain() {\n  const result = await chrome.storage.local.get({\n    [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: DEFAULT_ONYX_DOMAIN,\n  });\n  return result[CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN];\n}\n\nexport function setOnyxDomain(domain, callback) {\n  chrome.storage.local.set(\n    { [CHROME_SPECIFIC_STORAGE_KEYS.ONYX_DOMAIN]: domain },\n    callback,\n  );\n}\n\nexport function getOnyxDomainSync() {\n  return new Promise((resolve) => {\n    getOnyxDomain(resolve);\n  });\n}\n"
  },
  {
    "path": "profiling/grafana/dashboards/onyx/opensearch-search-latency.json",
    "content": "{\n  \"annotations\": {\n    \"list\": [\n      {\n        \"builtIn\": 1,\n        \"datasource\": { \"type\": \"grafana\", \"uid\": \"-- Grafana --\" },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"graphTooltip\": 1,\n  \"id\": null,\n  \"links\": [],\n  \"liveNow\": true,\n  \"panels\": [\n    {\n      \"title\": \"Client-Side Search Latency (P50 / P95 / P99)\",\n      \"description\": \"End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 0, \"y\": 0 },\n      \"id\": 1,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"seconds\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"none\" },\n            \"thresholdsStyle\": { \"mode\": \"dashed\" }\n          },\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              { \"color\": \"green\", \"value\": null },\n              { \"color\": \"yellow\", \"value\": 0.5 },\n              { \"color\": \"red\", \"value\": 2.0 }\n            ]\n          },\n          \"unit\": \"s\",\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P50\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P95\",\n          \"refId\": \"B\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P99\",\n          \"refId\": \"C\"\n        }\n      ]\n    },\n    {\n      \"title\": \"Server-Side Search Latency (P50 / P95 / P99)\",\n      \"description\": \"OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 12, \"y\": 0 },\n      \"id\": 2,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"seconds\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"none\" },\n            \"thresholdsStyle\": { \"mode\": \"dashed\" }\n          },\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              { \"color\": \"green\", \"value\": null },\n              { \"color\": \"yellow\", \"value\": 0.5 },\n              { \"color\": \"red\", \"value\": 2.0 }\n            ]\n          },\n          \"unit\": \"s\",\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P50\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P95\",\n          \"refId\": \"B\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"P99\",\n          \"refId\": \"C\"\n        }\n      ]\n    },\n    {\n      \"title\": \"Client-Side Latency by Search Type (P95)\",\n      \"description\": \"P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 0, \"y\": 10 },\n      \"id\": 3,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"seconds\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"none\" },\n            \"thresholdsStyle\": { \"mode\": \"off\" }\n          },\n          \"unit\": \"s\",\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"{{ search_type }}\",\n          \"refId\": \"A\"\n        }\n      ]\n    },\n    {\n      \"title\": \"Search Throughput by Type\",\n      \"description\": \"Searches per second broken down by search type.\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 12, \"y\": 10 },\n      \"id\": 4,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"searches/s\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"normal\" },\n            \"thresholdsStyle\": { \"mode\": \"off\" }\n          },\n          \"unit\": \"ops\",\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"sum by (search_type) (rate(onyx_opensearch_search_total[5m]))\",\n          \"legendFormat\": \"{{ search_type }}\",\n          \"refId\": \"A\"\n        }\n      ]\n    },\n    {\n      \"title\": \"Concurrent Searches In Progress\",\n      \"description\": \"Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 0, \"y\": 20 },\n      \"id\": 5,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"searches\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"normal\" },\n            \"thresholdsStyle\": { \"mode\": \"off\" }\n          },\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"sum by (search_type) (onyx_opensearch_searches_in_progress)\",\n          \"legendFormat\": \"{{ search_type }}\",\n          \"refId\": \"A\"\n        }\n      ]\n    },\n    {\n      \"title\": \"Client vs Server Latency Overhead (P50)\",\n      \"description\": \"Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.\",\n      \"type\": \"timeseries\",\n      \"gridPos\": { \"h\": 10, \"w\": 12, \"x\": 12, \"y\": 20 },\n      \"id\": 6,\n      \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": { \"mode\": \"palette-classic\" },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisLabel\": \"seconds\",\n            \"axisPlacement\": \"auto\",\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": { \"type\": \"linear\" },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": { \"group\": \"A\", \"mode\": \"none\" },\n            \"thresholdsStyle\": { \"mode\": \"off\" }\n          },\n          \"unit\": \"s\",\n          \"min\": 0\n        },\n        \"overrides\": []\n      },\n      \"targets\": [\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"Client - Server overhead (P50)\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"Client P50\",\n          \"refId\": \"B\"\n        },\n        {\n          \"datasource\": { \"type\": \"prometheus\", \"uid\": \"${DS_PROMETHEUS}\" },\n          \"expr\": \"histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))\",\n          \"legendFormat\": \"Server P50\",\n          \"refId\": \"C\"\n        }\n      ]\n    }\n  ],\n  \"refresh\": \"5s\",\n  \"schemaVersion\": 37,\n  \"style\": \"dark\",\n  \"tags\": [\"onyx\", \"opensearch\", \"search\", \"latency\"],\n  \"templating\": {\n    \"list\": [\n      {\n        \"current\": {\n          \"text\": \"Prometheus\",\n          \"value\": \"prometheus\"\n        },\n        \"includeAll\": false,\n        \"name\": \"DS_PROMETHEUS\",\n        \"options\": [],\n        \"query\": \"prometheus\",\n        \"refresh\": 1,\n        \"type\": \"datasource\"\n      }\n    ]\n  },\n  \"time\": { \"from\": \"now-60m\", \"to\": \"now\" },\n  \"timepicker\": {\n    \"refresh_intervals\": [\"5s\", \"10s\", \"30s\", \"1m\"]\n  },\n  \"timezone\": \"\",\n  \"title\": \"Onyx OpenSearch Search Latency\",\n  \"uid\": \"onyx-opensearch-search-latency\",\n  \"version\": 0,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools>=61\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"onyx\"\nversion = \"0.0.0\"\nrequires-python = \">=3.11\"\n# Shared dependencies between backend and model_server\ndependencies = [\n    \"aioboto3==15.1.0\",\n    \"cohere==5.6.1\",\n    \"fastapi==0.133.1\",\n    \"google-genai==1.52.0\",\n    \"litellm==1.81.6\",\n    \"openai==2.14.0\",\n    \"pydantic==2.11.7\",\n    \"prometheus_client>=0.21.1\",\n    \"prometheus_fastapi_instrumentator==7.1.0\",\n    \"retry==0.9.2\", # This pulls in py which is in CVE-2022-42969, must remove py from image\n    \"sentry-sdk==2.14.0\",\n    \"uvicorn==0.35.0\",\n    \"voyageai==0.2.3\",\n    \"brotli>=1.2.0\",\n    \"claude-agent-sdk>=0.1.19\",\n    \"agent-client-protocol>=0.7.1\",\n    \"discord-py==2.4.0\",\n    \"kubernetes>=31.0.0\",\n]\n\n[project.optional-dependencies]\n# Main backend application dependencies\nbackend = [\n    \"aiohttp==3.13.4\",\n    \"alembic==1.10.4\",\n    \"asyncpg==0.30.0\",\n    \"atlassian-python-api==3.41.16\",\n    \"azure-cognitiveservices-speech==1.38.0\",\n    \"beautifulsoup4==4.12.3\",\n    \"boto3==1.39.11\",\n    \"boto3-stubs[s3]==1.39.11\",\n    \"celery==5.5.1\",\n    \"chardet==5.2.0\",\n    \"chonkie==1.0.10\",\n    \"dask==2026.1.1\",\n    \"ddtrace==3.10.0\",\n    \"discord.py==2.4.0\",\n    \"distributed==2026.1.1\",\n    \"fastapi-users==15.0.4\",\n    \"fastapi-users-db-sqlalchemy==7.0.0\",\n    \"fastapi-limiter==0.1.6\",\n    \"fastmcp==3.2.0\",\n    \"filelock==3.20.3\",\n    \"google-api-python-client==2.86.0\",\n    \"google-auth-httplib2==0.1.0\",\n    \"google-auth-oauthlib==1.0.0\",\n    # GPT4All library has issues running on Macs and python:3.11.4-slim-bookworm\n    # will reintroduce this when library version catches up\n    # \"gpt4all==2.0.2\",\n    \"httpcore==1.0.9\",\n    \"httpx[http2]==0.28.1\",\n    \"httpx-oauth==0.15.1\",\n    \"huggingface-hub==0.35.3\",\n    \"inflection==0.5.1\",\n    \"jira==3.10.5\",\n    \"jsonref==1.1.0\",\n    \"kubernetes==31.0.0\",\n    \"trafilatura==1.12.2\",\n    \"langchain-core==1.2.22\",\n    \"lazy_imports==1.0.1\",\n    \"lxml==5.3.0\",\n    \"Mako==1.2.4\",\n    \"markitdown[pdf, docx, pptx, xlsx, xls]==0.1.2\",\n    \"mcp[cli]==1.26.0\",\n    \"msal==1.34.0\",\n    \"msoffcrypto-tool==5.4.2\",\n    \"Office365-REST-Python-Client==2.6.2\",\n    \"oauthlib==3.2.2\",\n    # NOTE: This is frozen to avoid https://foss.heptapod.net/openpyxl/openpyxl/-/issues/2147\n    \"openpyxl==3.0.10\",\n    \"opensearch-py==3.0.0\",\n    \"passlib==1.7.4\",\n    \"playwright==1.55.0\",\n    \"psutil==7.1.3\",\n    \"psycopg2-binary==2.9.9\",\n    \"puremagic==1.28\",\n    \"pyairtable==3.0.1\",\n    \"pycryptodome==3.19.1\",\n    \"PyGithub==2.5.0\",\n    \"pympler==1.1\",\n    \"python-dateutil==2.8.2\",\n    \"python-gitlab==5.6.0\",\n    \"python-pptx==0.6.23\",\n    \"pypandoc_binary==1.16.2\",\n    \"pypdf==6.9.2\",\n    \"pytest-mock==3.12.0\",\n    \"pytest-playwright==0.7.0\",\n    \"python-docx==1.1.2\",\n    \"python-dotenv==1.1.1\",\n    \"python-multipart==0.0.22\",\n    \"pywikibot==9.0.0\",\n    \"redis==5.0.8\",\n    \"requests==2.33.0\",\n    \"requests-oauthlib==1.3.1\",\n    \"rfc3986==1.5.0\",\n    \"simple-salesforce==1.12.6\",\n    \"slack-sdk==3.20.2\",\n    \"SQLAlchemy[mypy]==2.0.15\",\n    \"starlette==0.49.3\",\n    \"supervisor==4.3.0\",\n    \"RapidFuzz==3.13.0\",\n    \"tiktoken==0.7.0\",\n    \"timeago==1.0.16\",\n    \"types-openpyxl==3.0.4.7\",\n    \"unstructured==0.18.27\",\n    \"unstructured-client==0.42.6\",\n    \"zulip==0.8.2\",\n    \"hubspot-api-client==11.1.0\",\n    \"asana==5.0.8\",\n    \"dropbox==12.0.2\",\n    \"shapely==2.0.6\",\n    \"stripe==10.12.0\",\n    \"urllib3==2.6.3\",\n    \"mistune==3.2.0\",\n    \"sendgrid==6.12.5\",\n    \"exa_py==1.15.4\",\n    \"braintrust==0.3.9\",\n    \"langfuse==3.10.0\",\n    \"nest_asyncio==1.6.0\",\n    \"openinference-instrumentation==0.1.42\",\n    \"opentelemetry-proto>=1.39.0\",\n    \"python3-saml==1.15.0\",\n    \"xmlsec==1.3.14\",\n]\n\n# Dev tools\ndev = [\n    \"black==25.1.0\",\n    \"celery-types==0.19.0\",\n    \"faker==40.1.2\",\n    \"hatchling==1.28.0\",\n    \"ipykernel==6.29.5\",\n    \"manygo==0.2.0\",\n    \"matplotlib==3.10.8\",\n    \"mypy-extensions==1.0.0\",\n    \"mypy==1.13.0\",\n    \"onyx-devtools==0.7.2\",\n    \"openapi-generator-cli==7.17.0\",\n    \"pandas-stubs~=2.3.3\",\n    \"pre-commit==3.2.2\",\n    \"pytest-alembic==0.12.1\",\n    \"pytest-asyncio==1.3.0\",\n    \"pytest-dotenv==0.5.2\",\n    \"pytest-repeat==0.9.4\",\n    \"pytest-xdist==3.8.0\",\n    \"pytest==8.3.5\",\n    \"release-tag==0.5.2\",\n    \"reorder-python-imports-black==3.14.0\",\n    \"ruff==0.12.0\",\n    \"types-beautifulsoup4==4.12.0.3\",\n    \"types-html5lib==1.1.11.13\",\n    \"types-oauthlib==3.2.0.9\",\n    \"types-passlib==1.7.7.20240106\",\n    \"types-Pillow==10.2.0.20240822\",\n    \"types-psutil==7.1.3.20251125\",\n    \"types-psycopg2==2.9.21.10\",\n    \"types-python-dateutil==2.8.19.13\",\n    \"types-PyYAML==6.0.12.11\",\n    \"types-pytz==2023.3.1.1\",\n    \"types-regex==2023.3.23.1\",\n    \"types-requests==2.32.0.20250328\",\n    \"types-retry==0.9.9.3\",\n    \"types-setuptools==68.0.0.3\",\n    \"zizmor==1.18.0\",\n]\n\n# Enterprise Edition features\nee = [\n    \"posthog==3.7.4\",\n]\n\n# Model server specific dependencies (ML packages)\nmodel_server = [\n    \"accelerate==1.6.0\",\n    \"einops==0.8.1\",\n    \"numpy==2.4.1\",\n    \"safetensors==0.5.3\",\n    \"sentence-transformers==4.0.2\",\n    \"torch==2.9.1\",\n    \"transformers==4.53.0\",\n    \"sentry-sdk[fastapi,celery,starlette]==2.14.0\",\n]\n\n[tool.mypy]\nplugins = \"sqlalchemy.ext.mypy.plugin\"\nmypy_path = \"backend\"\nexplicit_package_bases = true\ndisallow_untyped_defs = true\nwarn_unused_ignores = true\nenable_error_code = [\"possibly-undefined\"]\nstrict_equality = true\n# Patterns match paths whether mypy is run from backend/ (CI) or repo root (e.g. VS Code extension with target ./backend)\nexclude = [\n  \"(?:^|/)generated/\",\n  \"(?:^|/)\\\\.venv/\",\n  \"(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/skills/\",\n  \"(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/templates/\",\n]\n\n[[tool.mypy.overrides]]\nmodule = \"alembic.versions.*\"\ndisable_error_code = [\"var-annotated\"]\n\n[[tool.mypy.overrides]]\nmodule = \"alembic_tenants.versions.*\"\ndisable_error_code = [\"var-annotated\"]\n\n[[tool.mypy.overrides]]\nmodule = \"generated.*\"\nfollow_imports = \"silent\"\nignore_errors = true\n\n[[tool.mypy.overrides]]\nmodule = \"transformers.*\"\nfollow_imports = \"skip\"\nignore_errors = true\n\n[tool.uv.workspace]\nmembers = [\"backend\", \"tools/ods\"]\n\n[tool.basedpyright]\ninclude = [\"backend\"]\nexclude = [\"backend/generated\", \"backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx\", \"backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv\"]\ntypeCheckingMode = \"off\"\n\n[tool.ruff]\nline-length = 130\ntarget-version = \"py311\"\n\n[tool.ruff.lint]\nignore = [\n  \"E501\", # Long lines are handled by Black.\n]\nselect = [\n  \"ARG\",\n  \"E\",\n  \"F\",\n  \"S324\",\n  \"W\",\n]\n\n[tool.setuptools.packages.find]\nwhere = [\"backend\"]\ninclude = [\"onyx*\", \"tests*\"]\n"
  },
  {
    "path": "web/.dockerignore",
    "content": "node_modules\n.next\n/tests/\n\n# Explicitly include src/app/build (overrides .gitignore /build pattern)\n!src/app/build\n"
  },
  {
    "path": "web/.eslintrc.json",
    "content": "{\n  \"extends\": \"next/core-web-vitals\",\n  \"plugins\": [\"unused-imports\"],\n  \"rules\": {\n    \"@next/next/no-img-element\": \"off\",\n    \"react-hooks/exhaustive-deps\": \"off\",\n    \"no-unused-vars\": \"off\",\n    \"@typescript-eslint/no-unused-vars\": \"off\",\n    \"unused-imports/no-unused-imports\": \"warn\",\n    \"unused-imports/no-unused-vars\": [\n      \"warn\",\n      {\n        \"vars\": \"all\",\n        \"varsIgnorePattern\": \"^_\",\n        \"args\": \"after-used\",\n        \"argsIgnorePattern\": \"^_\",\n        \"ignoreRestSiblings\": true\n      }\n    ]\n  }\n}\n"
  },
  {
    "path": "web/.gitignore",
    "content": "# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.\n.env.sentry-build-plugin\n\n# dependencies\nnode_modules\n/.pnp\n.pnp.js\n\n# testing\n/coverage\n\n# next.js\n/.next/\n/out/\n\n# production\n/build\n\n# misc\n.DS_Store\n*.pem\n\n# debug\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\n.pnpm-debug.log*\n\n# local env files\n.env*.local\n\n# vercel\n.vercel\n\n# typescript\n*.tsbuildinfo\nnext-env.d.ts\n\n# playwright testing temp files\n/admin*_auth.json\n/worker*_auth.json\n/user_auth.json\n/build-archive.log\n/test-results\n/output/\n\n# generated clients ... in particular, the API to the Onyx backend itself!\n/src/lib/generated\n.jest-cache\n\n# storybook\nstorybook-static\n"
  },
  {
    "path": "web/.prettierignore",
    "content": "**/.git\n**/.svn\n**/.hg\n**/node_modules\n**/.next\n**/.vscode"
  },
  {
    "path": "web/.prettierrc.json",
    "content": "{\n  \"trailingComma\": \"es5\"\n}\n"
  },
  {
    "path": "web/.storybook/Introduction.mdx",
    "content": "import { Meta } from \"@storybook/blocks\";\n\n<Meta title=\"Getting Started\" />\n\n# Onyx Storybook\n\nA living catalog for browsing, testing, and documenting Onyx UI components in isolation.\n\n---\n\n## What is this?\n\nThis Storybook contains interactive examples of every reusable UI component in the Onyx frontend. Each component has a dedicated page with:\n\n- **Live demos** you can interact with directly\n- **Controls** to tweak props and see how the component responds\n- **Auto-generated docs** showing the full props API\n- **Dark mode toggle** in the toolbar to preview both themes\n\n---\n\n## Navigating Storybook\n\n### Sidebar\n\nThe left sidebar organizes components by layer:\n\n- **opal/core** — Low-level primitives (`Interactive`, `Hoverable`)\n- **opal/components** — Design system atoms (`Button`, `OpenButton`, `Tag`)\n- **Layouts** — Structural layouts (`Content`, `ContentAction`, `IllustrationContent`)\n- **refresh-components** — App-level components (inputs, modals, tables, text, etc.)\n\nClick any component to see its stories. Click **Docs** to see the auto-generated props table.\n\n### Controls panel\n\nAt the bottom of each story, the **Controls** panel lets you change props in real time. Toggle booleans, pick from enums, type in strings — the preview updates instantly.\n\n### Theme toggle\n\nUse the paint roller icon in the top toolbar to switch between **light** and **dark** mode. All components use CSS variables that automatically adapt.\n\n---\n\n## Running locally\n\n```bash\ncd web\nnpm run storybook        # dev server on :6006\nnpm run storybook:build  # static build to storybook-static/\n```\n\n---\n\n## Adding a new story\n\nStories are **co-located** next to their component:\n\n```\nlib/opal/src/components/buttons/Button/\n├── components.tsx       ← the component\n├── Button.stories.tsx   ← the story\n├── styles.css\n└── README.md\n```\n\n### Minimal template\n\n```tsx\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport { MyComponent } from \"./MyComponent\";\n\nconst meta: Meta<typeof MyComponent> = {\n  title: \"opal/components/MyComponent\",  // sidebar path\n  component: MyComponent,\n  tags: [\"autodocs\"],                     // auto-generate docs page\n};\n\nexport default meta;\ntype Story = StoryObj<typeof MyComponent>;\n\nexport const Default: Story = {\n  args: {\n    title: \"Hello\",\n  },\n};\n\nexport const WithCustomLayout: Story = {\n  render: () => (\n    <div className=\"flex gap-2\">\n      <MyComponent title=\"One\" />\n      <MyComponent title=\"Two\" />\n    </div>\n  ),\n};\n```\n\n### Conventions\n\n- **Title format:** `opal/core/Name`, `opal/components/Name`, `Layouts/Name`, or `refresh-components/Name`\n- **Tags:** Add `tags: [\"autodocs\"]` to auto-generate a docs page from props\n- **Decorators:** If your component needs `TooltipPrimitive.Provider` (anything with tooltips), add it as a decorator\n- **Layout:** Use `parameters: { layout: \"fullscreen\" }` for modals/popovers that use portals\n\n---\n\n## Deployment\n\nProduction builds deploy to [onyx-storybook.vercel.app](https://onyx-storybook.vercel.app) automatically when PRs touching component files merge to `main`.\n\nMonitored paths:\n\n- `web/lib/opal/**`\n- `web/src/refresh-components/**`\n- `web/.storybook/**`\n"
  },
  {
    "path": "web/.storybook/README.md",
    "content": "# Onyx Storybook\n\nStorybook is an isolated development environment for UI components. It renders each component in a standalone \"story\" outside of the main app, so you can visually verify appearance, interact with props, and catch regressions without navigating through the full application.\n\nThe Onyx Storybook covers the full component library — from low-level `@opal/core` primitives up through `refresh-components` — giving designers and engineers a shared reference for every visual state.\n\n**Production:** [onyx-storybook.vercel.app](https://onyx-storybook.vercel.app)\n\n## Running Locally\n\n```bash\ncd web\nnpm run storybook        # dev server on http://localhost:6006\nnpm run storybook:build  # static build to storybook-static/\n```\n\nThe dev server hot-reloads when you edit a component or story file.\n\n## Writing Stories\n\nStories are **co-located** next to their component source:\n\n```\nlib/opal/src/core/interactive/\n├── components.tsx              ← the component\n├── Interactive.stories.tsx     ← the story\n└── styles.css\n\nsrc/refresh-components/buttons/\n├── Button.tsx\n└── Button.stories.tsx\n```\n\n### Minimal Template\n\n```tsx\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport { MyComponent } from \"./MyComponent\";\n\nconst meta: Meta<typeof MyComponent> = {\n  title: \"Category/MyComponent\",   // sidebar path\n  component: MyComponent,\n  tags: [\"autodocs\"],               // generates a docs page from props\n};\n\nexport default meta;\ntype Story = StoryObj<typeof MyComponent>;\n\nexport const Default: Story = {\n  args: { label: \"Hello\" },\n};\n```\n\n### Conventions\n\n- **Title format:** `Core/Name`, `Components/Name`, `Layouts/Name`, or `refresh-components/category/Name`\n- **Tags:** Add `tags: [\"autodocs\"]` to auto-generate a props docs page\n- **Decorators:** Components that use Radix tooltips need a `TooltipPrimitive.Provider` decorator\n- **Layout:** Use `parameters: { layout: \"fullscreen\" }` for modals/popovers that use portals\n\n## Dark Mode\n\nUse the theme toggle (paint roller icon) in the Storybook toolbar to switch between light and dark modes. This adds/removes the `dark` class on the preview body, matching the app's `darkMode: \"class\"` Tailwind config. All color tokens from `colors.css` adapt automatically.\n\n## Deployment\n\nThe production Storybook is deployed as a static site on Vercel. The build runs `npm run storybook:build` which outputs to `storybook-static/`, and Vercel serves that directory.\n\nDeploys are triggered on merges to `main` when files in `web/lib/opal/`, `web/src/refresh-components/`, or `web/.storybook/` change.\n\n## Component Layers\n\nThe sidebar organizes components by their layer in the design system:\n\n| Layer | Path | Examples |\n|-------|------|----------|\n| **Core** | `lib/opal/src/core/` | Interactive, Hoverable |\n| **Components** | `lib/opal/src/components/` | Button, OpenButton, Tag |\n| **Layouts** | `lib/opal/src/layouts/` | Content, ContentAction, IllustrationContent |\n| **refresh-components** | `src/refresh-components/` | Inputs, tables, modals, text, cards, tiles, etc. |\n"
  },
  {
    "path": "web/.storybook/main.ts",
    "content": "import type { StorybookConfig } from \"@storybook/react-vite\";\nimport path from \"path\";\n\nconst config: StorybookConfig = {\n  stories: [\n    \"./*.mdx\",\n    \"../lib/opal/src/**/*.stories.@(ts|tsx)\",\n    \"../src/refresh-components/**/*.stories.@(ts|tsx)\",\n  ],\n  addons: [\"@storybook/addon-essentials\", \"@storybook/addon-themes\"],\n  framework: {\n    name: \"@storybook/react-vite\",\n    options: {},\n  },\n  staticDirs: [\"../public\"],\n  docs: {\n    autodocs: \"tag\",\n  },\n  typescript: {\n    reactDocgen: \"react-docgen-typescript\",\n  },\n  viteFinal: async (config) => {\n    config.resolve = config.resolve ?? {};\n    config.resolve.alias = {\n      ...config.resolve.alias,\n      \"@\": path.resolve(__dirname, \"../src\"),\n      \"@opal\": path.resolve(__dirname, \"../lib/opal/src\"),\n      \"@public\": path.resolve(__dirname, \"../public\"),\n      // Next.js module stubs for Vite\n      \"next/link\": path.resolve(__dirname, \"mocks/next-link.tsx\"),\n      \"next/navigation\": path.resolve(__dirname, \"mocks/next-navigation.tsx\"),\n      \"next/image\": path.resolve(__dirname, \"mocks/next-image.tsx\"),\n    };\n\n    // Process CSS with Tailwind via PostCSS\n    config.css = config.css ?? {};\n    config.css.postcss = path.resolve(__dirname, \"..\");\n\n    return config;\n  },\n};\n\nexport default config;\n"
  },
  {
    "path": "web/.storybook/mocks/next-image.tsx",
    "content": "import React from \"react\";\n\ninterface ImageProps {\n  src: string;\n  alt: string;\n  width?: number;\n  height?: number;\n  fill?: boolean;\n  [key: string]: unknown;\n}\n\nfunction Image({ src, alt, width, height, fill, ...props }: ImageProps) {\n  const fillStyle: React.CSSProperties = fill\n    ? { position: \"absolute\", inset: 0, width: \"100%\", height: \"100%\" }\n    : {};\n  return (\n    <img\n      {...(props as React.ImgHTMLAttributes<HTMLImageElement>)}\n      src={src}\n      alt={alt}\n      width={fill ? undefined : width}\n      height={fill ? undefined : height}\n      style={{ ...(props.style as React.CSSProperties), ...fillStyle }}\n    />\n  );\n}\n\nexport default Image;\n"
  },
  {
    "path": "web/.storybook/mocks/next-link.tsx",
    "content": "import React from \"react\";\n\ninterface LinkProps {\n  href: string;\n  children: React.ReactNode;\n  [key: string]: unknown;\n}\n\nfunction Link({\n  href,\n  children,\n  prefetch: _prefetch,\n  scroll: _scroll,\n  shallow: _shallow,\n  replace: _replace,\n  passHref: _passHref,\n  locale: _locale,\n  legacyBehavior: _legacyBehavior,\n  ...props\n}: LinkProps) {\n  return (\n    <a href={href} {...props}>\n      {children}\n    </a>\n  );\n}\n\nexport default Link;\n"
  },
  {
    "path": "web/.storybook/mocks/next-navigation.tsx",
    "content": "export function useRouter() {\n  return {\n    push: (_url: string) => {},\n    replace: (_url: string) => {},\n    back: () => {},\n    forward: () => {},\n    refresh: () => {},\n    prefetch: (_url: string) => Promise.resolve(),\n  };\n}\n\nexport function usePathname() {\n  return \"/\";\n}\n\nexport function useSearchParams() {\n  return new URLSearchParams() as ReadonlyURLSearchParams;\n}\n\nexport function useParams() {\n  return {};\n}\n\nexport function redirect(_url: string): never {\n  throw new Error(\"redirect() called in Storybook\");\n}\n\nexport function notFound(): never {\n  throw new Error(\"notFound() called in Storybook\");\n}\n"
  },
  {
    "path": "web/.storybook/preview-head.html",
    "content": "<!-- Preconnect for fonts loaded via globals.css @import -->\n<link\n  rel=\"preconnect\"\n  href=\"https://fonts.googleapis.com\"\n  crossorigin=\"anonymous\"\n/>\n<link\n  rel=\"preconnect\"\n  href=\"https://fonts.gstatic.com\"\n  crossorigin=\"anonymous\"\n/>\n"
  },
  {
    "path": "web/.storybook/preview.ts",
    "content": "import type { Preview } from \"@storybook/react\";\nimport { withThemeByClassName } from \"@storybook/addon-themes\";\nimport \"../src/app/globals.css\";\n\nconst preview: Preview = {\n  parameters: {\n    layout: \"centered\",\n    backgrounds: { disable: true },\n    controls: {\n      matchers: {\n        color: /(background|color)$/i,\n        date: /Date$/i,\n      },\n    },\n  },\n  decorators: [\n    withThemeByClassName({\n      themes: {\n        light: \"\",\n        dark: \"dark\",\n      },\n      defaultTheme: \"light\",\n    }),\n  ],\n};\n\nexport default preview;\n"
  },
  {
    "path": "web/@types/favicon-fetch.d.ts",
    "content": "declare module \"favicon-fetch\" {\n  interface FaviconFetchOptions {\n    uri: string;\n  }\n\n  function faviconFetch(options: FaviconFetchOptions): string | null;\n\n  export default faviconFetch;\n}\n"
  },
  {
    "path": "web/@types/images.d.ts",
    "content": "declare module \"*.png\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.svg\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.jpeg\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.jpg\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.gif\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.webp\" {\n  const content: string;\n  export default content;\n}\n\ndeclare module \"*.ico\" {\n  const content: string;\n  export default content;\n}\n"
  },
  {
    "path": "web/AGENTS.md",
    "content": "# Frontend Standards\n\nThis file is the single source of truth for frontend coding standards across all Onyx frontend\nprojects (including, but not limited to, `/web`, `/desktop`).\n\n# Components\n\nUI components are spread across several directories while the codebase migrates to Opal:\n\n- **`web/lib/opal/src/`** — The Opal design system. Preferred for all new components.\n- **`web/src/refresh-components/`** — Production components not yet migrated to Opal.\n- **`web/src/sections/`** — Feature-specific composite components (cards, modals, etc.).\n- **`web/src/layouts/`** — Page-level layout components (settings pages, etc.).\n\n**Do NOT use anything from `web/src/components/`** — this directory contains legacy components\nthat are being phased out. Always prefer Opal first; fall back to `refresh-components` only for\ncomponents not yet available in Opal.\n\n## Opal Layouts (`lib/opal/src/layouts/`)\n\nAll layout primitives are imported from `@opal/layouts`. They handle sizing, font selection, icon\nalignment, and optional inline editing.\n\n```typescript\nimport { Content, ContentAction, IllustrationContent } from \"@opal/layouts\";\n```\n\n### Content\n\n**Use this for any combination of icon + title + description.**\n\nA two-axis layout component that automatically routes to the correct internal layout\n(`ContentXl`, `ContentLg`, `ContentMd`, `ContentSm`) based on `sizePreset` and `variant`:\n\n| sizePreset | variant | Routes to | Layout |\n|---|---|---|---|\n| `headline` / `section` | `heading` | `ContentXl` | Icon on top (flex-col) |\n| `headline` / `section` | `section` | `ContentLg` | Icon inline (flex-row) |\n| `main-content` / `main-ui` / `secondary` | `section` / `heading` | `ContentMd` | Compact inline |\n| `main-content` / `main-ui` / `secondary` | `body` | `ContentSm` | Body text layout |\n\n```typescript\n<Content\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n  icon={SvgSettings}\n  title=\"Settings\"\n  description=\"Manage your preferences\"\n/>\n```\n\n### ContentAction\n\n**Use this when a Content block needs right-side actions** (buttons, badges, icons, etc.).\n\nWraps `Content` and adds a `rightChildren` slot. Accepts all `Content` props plus:\n- `rightChildren`: `ReactNode` — actions rendered on the right\n- `paddingVariant`: `SizeVariant` — controls outer padding\n\n```typescript\n<ContentAction\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n  icon={SvgUser}\n  title=\"John Doe\"\n  description=\"Admin\"\n  rightChildren={<Button icon={SvgEdit}>Edit</Button>}\n/>\n```\n\n### IllustrationContent\n\n**Use this for empty states, error pages, and informational placeholders.**\n\nA vertically-stacked, center-aligned layout that pairs a large illustration (7.5rem x 7.5rem)\nwith a title and optional description.\n\n```typescript\nimport SvgNoResult from \"@opal/illustrations/no-result\";\n\n<IllustrationContent\n  illustration={SvgNoResult}\n  title=\"No results found\"\n  description=\"Try adjusting your search or filters.\"\n/>\n```\n\nProps:\n- `illustration`: `IconFunctionComponent` — optional, from `@opal/illustrations`\n- `title`: `string` — required\n- `description`: `string` — optional\n\n## Settings Page Layout (`src/layouts/settings-layouts.tsx`)\n\n**Use this for all admin/settings pages.** Provides a standardized layout with scroll-aware\nsticky headers, centered content containers, and responsive behavior.\n\n```typescript\nimport SettingsLayouts from \"@/layouts/settings-layouts\";\n\nfunction MySettingsPage() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgSettings}\n        title=\"Account Settings\"\n        description=\"Manage your account preferences\"\n        rightChildren={<Button>Save</Button>}\n      >\n        <InputTypeIn placeholder=\"Search settings...\" />\n      </SettingsLayouts.Header>\n\n      <SettingsLayouts.Body>\n        <Card>Settings content here</Card>\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n```\n\nSub-components:\n- **`SettingsLayouts.Root`** — Wrapper with centered, scrollable container. Width options:\n  `\"sm\"` (672px), `\"sm-md\"` (752px), `\"md\"` (872px, default), `\"lg\"` (992px), `\"full\"` (100%).\n- **`SettingsLayouts.Header`** — Sticky header with icon, title, description, optional\n  `rightChildren` actions, optional `children` below (e.g., search/filter), optional `backButton`,\n  and optional `separator`. Automatically shows a scroll shadow when scrolled.\n- **`SettingsLayouts.Body`** — Content container with consistent padding and vertical spacing.\n\n## Cards (`src/sections/cards/`)\n\n**When building a card that displays information about a specific entity (agent, document set,\nfile, connector, etc.), add it to `web/src/sections/cards/`.**\n\nEach card is a self-contained component focused on a single entity type. Cards typically include\nentity identification (name, avatar, icon), summary information, and quick actions.\n\n```typescript\nimport AgentCard from \"@/sections/cards/AgentCard\";\nimport DocumentSetCard from \"@/sections/cards/DocumentSetCard\";\nimport FileCard from \"@/sections/cards/FileCard\";\n```\n\nGuidelines:\n- One card per entity type — keep card-specific logic within the card component.\n- Cards should be reusable across different pages and contexts.\n- Use shared components from `@opal/components`, `@opal/layouts`, and `@/refresh-components`\n  inside cards — do not duplicate layout or styling logic.\n\n## Button (`components/buttons/button/`)\n\n**Always use the Opal `Button`.** Do not use raw `<button>` elements.\n\nBuilt on `Interactive.Stateless` > `Interactive.Container`, so it inherits the full color/state\nsystem automatically.\n\n```typescript\nimport { Button } from \"@opal/components/buttons/button/components\";\n\n// Labeled button\n<Button variant=\"default\" prominence=\"primary\" icon={SvgPlus}>\n  Create\n</Button>\n\n// Icon-only button (omit children)\n<Button variant=\"default\" prominence=\"tertiary\" icon={SvgTrash} size=\"sm\" />\n```\n\nKey props:\n- `variant`: `\"default\"` | `\"action\"` | `\"danger\"` | `\"none\"`\n- `prominence`: `\"primary\"` | `\"secondary\"` | `\"tertiary\"` | `\"internal\"`\n- `size`: `\"lg\"` | `\"md\"` | `\"sm\"` | `\"xs\"` | `\"2xs\"` | `\"fit\"`\n- `icon`, `rightIcon`, `children`, `disabled`, `href`, `tooltip`\n\n## Core Primitives (`core/`)\n\nThe `core/` directory contains the lowest-level building blocks that power all Opal components.\n**Most code should not interface with these directly** — use higher-level components like `Button`,\n`Content`, and `ContentAction` instead. These are documented here for understanding, not everyday use.\n\n### Interactive (`core/interactive/`)\n\nThe foundational layer for all clickable/interactive surfaces. Defines the color matrix for\nhover, active, and disabled states.\n\n- **`Interactive.Stateless`** — Color system for stateless elements (buttons, links). Applies\n  variant/prominence/state combinations via CSS custom properties.\n- **`Interactive.Stateful`** — Color system for stateful elements (toggles, sidebar items, selects).\n  Uses `state` (`\"empty\"` | `\"filled\"` | `\"selected\"`) instead of prominence.\n- **`Interactive.Container`** — Structural box providing height, rounding, padding, and border.\n  Shared by both Stateless and Stateful. Renders as `<div>`, `<button>`, or `<Link>` depending\n  on context.\n- **`Interactive.Foldable`** — Zero-width collapsible wrapper with CSS grid animation.\n\n### Disabled (`core/disabled/`)\n\nA pure CSS wrapper that applies disabled visuals (`opacity-50`, `cursor-not-allowed`,\n`pointer-events: none`) to a single child element via Radix `Slot`. Has no React context —\nInteractive primitives and buttons manage their own disabled state via a `disabled` prop.\n\n### Hoverable (`core/animations/`)\n\nA standardized way to provide \"opacity-100 on hover\" behavior. Instead of manually wiring\n`opacity-0 group-hover:opacity-100` with Tailwind, use `Hoverable` for consistent, coordinated\nhover-to-reveal patterns.\n\n- **`Hoverable.Root`** — Wraps a hover group. Tracks mouse enter/leave and broadcasts hover\n  state to descendants via a per-group React context.\n- **`Hoverable.Item`** — Marks an element that should appear on hover. Supports two modes:\n  - **Group mode** (`group` prop provided): visibility driven by a matching `Hoverable.Root`\n    ancestor. Throws if no matching Root is found.\n  - **Local mode** (`group` omitted): uses CSS `:hover` on the item itself.\n\n```typescript\nimport { Hoverable } from \"@opal/core\";\n\n// Group mode — hovering anywhere on the row reveals the trash icon\n<Hoverable.Root group=\"row\">\n  <div className=\"flex items-center gap-2\">\n    <span>Row content</span>\n    <Hoverable.Item group=\"row\" variant=\"opacity-on-hover\">\n      <SvgTrash />\n    </Hoverable.Item>\n  </div>\n</Hoverable.Root>\n\n// Local mode — hovering the item itself reveals it\n<Hoverable.Item variant=\"opacity-on-hover\">\n  <SvgTrash />\n</Hoverable.Item>\n```\n\n# Best Practices\n\n## 0. Size Variant Defaults\n\n**When using `SizeVariants` (or any subset like `PaddingVariants`, `RoundingVariants`) as a prop\ntype, always default to `\"md\"`.**\n\n**Reason:** `\"md\"` is the standard middle-of-the-road preset across the design system. Consistent\ndefaults make components predictable — callers only need to specify a size when they want something\nother than the norm.\n\n```typescript\n// ✅ Good — default to \"md\"\nfunction MyCard({ padding = \"md\", rounding = \"md\" }: MyCardProps) { ... }\n\n// ❌ Bad — arbitrary or inconsistent defaults\nfunction MyCard({ padding = \"sm\", rounding = \"lg\" }: MyCardProps) { ... }\n```\n\n## 1. Tailwind Dark Mode\n\n**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**\n\n**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.\n\n**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.\n\n```typescript\n// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`\n<div className=\"bg-background-neutral-03 text-text-02\">\n  Content\n</div>\n\n// ✅ Good - Logo icons with dark mode handling via createLogoIcon\nexport const GithubIcon = createLogoIcon(githubLightIcon, {\n  monochromatic: true,  // Will apply dark:invert internally\n});\n\nexport const GitbookIcon = createLogoIcon(gitbookLightIcon, {\n  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally\n});\n\n// ❌ Bad - Manual dark mode overrides\n<div className=\"bg-white dark:bg-black text-black dark:text-white\">\n  Content\n</div>\n```\n\n## 2. Icon Usage\n\n**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**\n\n**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.\n\n```typescript\n// ✅ Good\nimport SvgX from \"@/icons/x\";\nimport SvgMoreHorizontal from \"@/icons/more-horizontal\";\n\n// ❌ Bad\nimport { User } from \"lucide-react\";\nimport { FiSearch } from \"react-icons/fi\";\n```\n\n**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.\nIf you need help with this step, reach out to `raunak@onyx.app`.\n\n## 3. Text Rendering\n\n**Use the Opal `Text` component for all text rendering. Avoid \"naked\" text nodes.**\n\n**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It uses\nstring-enum props (`font` and `color`) for font preset and color selection. Inline markdown is\nopt-in via the `markdown()` function from `@opal/types`.\n\n```typescript\n// ✅ Good — Opal Text with string-enum props\nimport { Text } from \"@opal/components\";\n\nfunction UserCard({ name }: { name: string }) {\n  return (\n    <Text font=\"main-ui-action\" color=\"text-03\">\n      {name}\n    </Text>\n  )\n}\n\n// ✅ Good — inline markdown via markdown()\nimport { markdown } from \"@opal/utils\";\n\n<Text font=\"main-ui-body\" color=\"text-05\">\n  {markdown(\"*Hello*, **world**! Visit [Onyx](https://onyx.app) and run `onyx start`.\")}\n</Text>\n\n// ✅ Good — plain strings are never parsed as markdown\n<Text font=\"main-ui-body\" color=\"text-03\">\n  {userProvidedString}\n</Text>\n\n// ✅ Good — component props that support optional markdown use `string | RichStr`\nimport type { RichStr } from \"@opal/types\";\n\ninterface MyCardProps {\n  title: string | RichStr;\n}\n\n// ❌ Bad — legacy boolean-flag API (still works but deprecated)\nimport Text from \"@/refresh-components/texts/Text\";\n<Text text03 mainUiAction>{name}</Text>\n\n// ❌ Bad — naked text nodes\n<div>\n  <h2>{name}</h2>\n  <p>User details</p>\n</div>\n```\n\nKey props:\n- `font`: `TextFont` — font preset (e.g., `\"main-ui-body\"`, `\"heading-h2\"`, `\"secondary-action\"`)\n- `color`: `TextColor` — text color (e.g., `\"text-03\"`, `\"text-inverted-05\"`)\n- `as`: `\"p\" | \"span\" | \"li\" | \"h1\" | \"h2\" | \"h3\"` — HTML tag (default: `\"span\"`)\n- `nowrap`: `boolean` — prevent text wrapping\n\n**`RichStr` convention:** When creating new components, any string prop that will be rendered as\nvisible text in the DOM (e.g., `title`, `description`, `label`) should be typed as\n`string | RichStr` instead of plain `string`. This gives callers opt-in markdown support via\n`markdown()` without requiring any additional props or API surface on the component.\n\n```typescript\nimport type { RichStr } from \"@opal/types\";\nimport { Text } from \"@opal/components\";\n\n// ✅ Good — new components accept string | RichStr and render via Text\ninterface InfoCardProps {\n  title: string | RichStr;\n  description?: string | RichStr;\n}\n\nfunction InfoCard({ title, description }: InfoCardProps) {\n  return (\n    <div>\n      <Text font=\"main-ui-action\">{title}</Text>\n      {description && (\n        <Text font=\"secondary-body\" color=\"text-03\">{description}</Text>\n      )}\n    </div>\n  );\n}\n\n// ❌ Bad — plain string props block markdown support for callers\ninterface InfoCardProps {\n  title: string;\n  description?: string;\n}\n```\n\n## 4. Component Usage\n\n**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**\n\n**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.\n\n```typescript\n// ✅ Good\nimport Button from '@/refresh-components/buttons/Button'\nimport InputTypeIn from '@/refresh-components/inputs/InputTypeIn'\nimport SvgPlusCircle from '@/icons/plus-circle'\n\nfunction ContactForm() {\n  return (\n    <form>\n      <InputTypeIn placeholder=\"Search...\" />\n      <Button type=\"submit\" leftIcon={SvgPlusCircle}>Submit</Button>\n    </form>\n  )\n}\n\n// ❌ Bad\nfunction ContactForm() {\n  return (\n    <form>\n      <input placeholder=\"Name\" />\n      <textarea placeholder=\"Message\" />\n      <button type=\"submit\">Submit</button>\n    </form>\n  )\n}\n```\n\n## 5. Colors\n\n**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**\n\n**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.\n\n**Available color categories:**\n\n- **Text:** `text-01` through `text-05`, `text-inverted-XX`\n- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)\n- **Borders:** `border-01` through `border-05`, `border-inverted-XX`\n- **Actions:** `action-link-XX`, `action-danger-XX`\n- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`\n- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.\n\n```typescript\n// ✅ Good - Use custom Onyx color classes\n<div className=\"bg-background-neutral-01 border border-border-02\" />\n<div className=\"bg-background-tint-02 border border-border-01\" />\n<div className=\"bg-status-success-01\" />\n<div className=\"bg-action-link-01\" />\n<div className=\"bg-theme-primary-05\" />\n\n// ❌ Bad - Do NOT use standard Tailwind colors\n<div className=\"bg-gray-100 border border-gray-300 text-gray-600\" />\n<div className=\"bg-white border border-slate-200\" />\n<div className=\"bg-green-100 text-green-700\" />\n<div className=\"bg-blue-100 text-blue-600\" />\n<div className=\"bg-indigo-500\" />\n```\n\n## 6. Data Fetching\n\n**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**\n\n**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).\n\n# Stylistic Preferences\n\n## 1. Import Standards\n\n**Always use absolute imports with the `@` prefix.**\n\n**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.\n\n```typescript\n// ✅ Good\nimport { Button } from \"@/components/ui/button\";\nimport { useAuth } from \"@/hooks/useAuth\";\nimport { Text } from \"@/refresh-components/texts/Text\";\n\n// ❌ Bad\nimport { Button } from \"../../../components/ui/button\";\nimport { useAuth } from \"./hooks/useAuth\";\n```\n\n## 2. React Component Functions\n\n**Prefer regular functions over arrow functions for React components.**\n\n**Reason:** Functions just become easier to read.\n\n```typescript\n// ✅ Good\nfunction UserProfile({ userId }: UserProfileProps) {\n  return <div>User Profile</div>\n}\n\n// ❌ Bad\nconst UserProfile = ({ userId }: UserProfileProps) => {\n  return <div>User Profile</div>\n}\n```\n\n## 3. Props Interface Extraction\n\n**Extract prop types into their own interface definitions. Keep prop interfaces in the same file\nas the component they belong to. Non-prop types (shared models, API response shapes, enums, etc.)\nshould be placed in a co-located `interfaces.ts` file.**\n\n**Reason:** Prop interfaces are tightly coupled to their component and rarely imported elsewhere,\nso co-location keeps things simple. Shared types belong in `interfaces.ts` so they can be\nimported without pulling in component code.\n\n```typescript\n// ✅ Good — props interface in the same file as the component\n// UserCard.tsx\ninterface UserCardProps {\n  user: User\n  showActions?: boolean\n  onEdit?: (userId: string) => void\n}\n\nfunction UserCard({ user, showActions = false, onEdit }: UserCardProps) {\n  return <div>User Card</div>\n}\n\n// ✅ Good — shared types in interfaces.ts\n// interfaces.ts\nexport interface User {\n  id: string\n  name: string\n  role: UserRole\n}\n\nexport type UserRole = \"admin\" | \"member\" | \"viewer\"\n\n// ❌ Bad — inline prop types\nfunction UserCard({\n  user,\n  showActions = false,\n  onEdit\n}: {\n  user: User\n  showActions?: boolean\n  onEdit?: (userId: string) => void\n}) {\n  return <div>User Card</div>\n}\n```\n\n## 4. Spacing Guidelines\n\n**Prefer padding over margins for spacing. When a library component exposes a padding prop\n(e.g., `paddingVariant`), use that prop instead of wrapping it in a `<div>` with padding classes.\nIf a library component does not expose a padding override and you find yourself adding a wrapper\ndiv for spacing, consider updating the library component to accept one.**\n\n**Reason:** We want to consolidate usage to paddings instead of margins, and minimize wrapper\ndivs that exist solely for spacing.\n\n```typescript\n// ✅ Good — use the component's padding prop\n<ContentAction paddingVariant=\"md\" ... />\n\n// ✅ Good — padding utilities when no component prop exists\n<div className=\"p-4 space-y-2\">\n  <div className=\"p-2\">Content</div>\n</div>\n\n// ❌ Bad — wrapper div just for spacing\n<div className=\"p-4\">\n  <ContentAction ... />\n</div>\n\n// ❌ Bad — margins\n<div className=\"m-4 space-y-2\">\n  <div className=\"m-2\">Content</div>\n</div>\n```\n\n## 5. Class Name Utilities\n\n**Use the `cn` utility instead of raw string formatting for classNames.**\n\n**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && \"some-tailwind-class\"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.\n\n```typescript\nimport { cn } from '@/lib/utils'\n\n// ✅ Good\n<div className={cn(\n  'base-class',\n  isActive && 'active-class',\n  className\n)}>\n  Content\n</div>\n\n// ❌ Bad\n<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>\n  Content\n</div>\n```\n\n## 6. Custom Hooks Organization\n\n**Follow a \"hook-per-file\" layout. Each hook should live in its own file within `web/src/hooks`.**\n\n**Reason:** This is just a layout preference. Keeps code clean.\n\n```typescript\n// web/src/hooks/useUserData.ts\nexport function useUserData(userId: string) {\n  // hook implementation\n}\n\n// web/src/hooks/useLocalStorage.ts\nexport function useLocalStorage<T>(key: string, initialValue: T) {\n  // hook implementation\n}\n```\n"
  },
  {
    "path": "web/Dockerfile",
    "content": "FROM node:20-alpine AS base\n\nLABEL com.onyx.maintainer=\"founders@onyx.app\"\nLABEL com.onyx.description=\"This image is the web/frontend container of Onyx which \\\ncontains code for both the Community and Enterprise editions of Onyx. If you do not \\\nhave a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \\\nEdition features outside of personal development or testing purposes. Please reach out to \\\nfounders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx\"\n\n# Step 1. Install dependencies + rebuild the source code only when needed\nFROM base AS builder\n# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.\nRUN apk add --no-cache libc6-compat\nWORKDIR /app\n\n# Copy package files first for better layer caching\n# This layer will be cached unless package.json or package-lock.json changes\nCOPY package.json package-lock.json ./\nCOPY lib/opal/package.json ./lib/opal/\n\n# Install dependencies\nRUN npm ci\n\n# pull in source code / package.json / package-lock.json\nCOPY . .\n\n# needed to get the `standalone` dir we expect later\nENV NEXT_PRIVATE_STANDALONE=true\n\n# Disable automatic telemetry collection\nENV NEXT_TELEMETRY_DISABLED=1\n\n# Environment variables must be present at build time\n# https://github.com/vercel/next.js/discussions/14030\n# NOTE: if you add something here, make sure to add it to the runner as well\n\n\nARG NEXT_PUBLIC_THEME\nENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME}\n\nARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED\nENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED}\n\nARG NEXT_PUBLIC_DISABLE_LOGOUT\nENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}\n\nARG NEXT_PUBLIC_CUSTOM_REFRESH_URL\nENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL}\n\nARG NEXT_PUBLIC_POSTHOG_KEY\nARG NEXT_PUBLIC_POSTHOG_HOST\nENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}\nENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}\n\nARG NEXT_PUBLIC_CLOUD_ENABLED\nENV NEXT_PUBLIC_CLOUD_ENABLED=${NEXT_PUBLIC_CLOUD_ENABLED}\n\nARG NEXT_PUBLIC_SENTRY_DSN\nENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}\n\nARG NEXT_PUBLIC_GTM_ENABLED\nENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}\n\nARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED\nENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}\n\nARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK\nENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}\n\nARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY\nENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}\n\nARG NEXT_PUBLIC_RECAPTCHA_SITE_KEY\nENV NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${NEXT_PUBLIC_RECAPTCHA_SITE_KEY}\n\nARG SENTRY_RELEASE\nENV SENTRY_RELEASE=${SENTRY_RELEASE}\n\n# Add NODE_OPTIONS argument\nARG NODE_OPTIONS\n\n# SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written\n# to any image layer, build cache, or registry manifest.\n# Use NODE_OPTIONS in the build command\nRUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \\\n    NODE_OPTIONS=\"${NODE_OPTIONS}\" npx next build\n\n# Step 2. Production image, copy all the files and run next\nFROM base AS runner\nWORKDIR /app\n\n# Remove global node modules, since they are not needed by the actual app\n# (all dependencies are copied over into the `/app` dir itself). These\n# global modules may be outdated and trigger security scans.\nRUN rm -rf /usr/local/lib/node_modules\n\n# Not needed, set by compose\n# ENV NODE_ENV production\n\n# Disable automatic telemetry collection\nENV NEXT_TELEMETRY_DISABLED=1\n\n# Don't run production as root\nRUN addgroup --system --gid 1001 nodejs\nRUN adduser --system --uid 1001 nextjs\nUSER nextjs\n\n# Add back in if we add anything to `public`\nCOPY --from=builder /app/public ./public\n\n# Automatically leverage output traces to reduce image size\n# https://nextjs.org/docs/advanced-features/output-file-tracing\nCOPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./\nCOPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static\n\n# Environment variables must be redefined at run time\n# NOTE: if you add something here, make sure to add it to the builder as well\n\n\n# allow user to specify custom feedback options\nARG NEXT_PUBLIC_THEME\nENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME}\n\nARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED\nENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED}\n\nARG NEXT_PUBLIC_DISABLE_LOGOUT\nENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}\n\nARG NEXT_PUBLIC_CUSTOM_REFRESH_URL\nENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL}\n\nARG NEXT_PUBLIC_POSTHOG_KEY\nARG NEXT_PUBLIC_POSTHOG_HOST\nENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}\nENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}\n\nARG NEXT_PUBLIC_CLOUD_ENABLED\nENV NEXT_PUBLIC_CLOUD_ENABLED=${NEXT_PUBLIC_CLOUD_ENABLED}\n\nARG NEXT_PUBLIC_SENTRY_DSN\nENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}\n\nARG NEXT_PUBLIC_GTM_ENABLED\nENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}\n\nARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED\nENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}\n\nARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK\nENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}\n\nARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY\nENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}\n\nARG NEXT_PUBLIC_RECAPTCHA_SITE_KEY\nENV NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${NEXT_PUBLIC_RECAPTCHA_SITE_KEY}\n\nARG SENTRY_RELEASE\nENV SENTRY_RELEASE=${SENTRY_RELEASE}\n\n# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.\nARG ONYX_VERSION=0.0.0-dev\nENV ONYX_VERSION=${ONYX_VERSION}\n\n# Note: Don't expose ports here, Compose will handle that for us if necessary.\n# If you want to run this without compose, specify the ports to\n# expose via cli\n\nCMD [\"node\", \"server.js\"]\n"
  },
  {
    "path": "web/README.md",
    "content": "<!-- ONYX_METADATA={\"link\": \"https://github.com/onyx-dot-app/onyx/blob/main/web/README.md\"} -->\n\nThis is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).\n\n## Getting Started\n\nInstall node / npm: https://docs.npmjs.com/downloading-and-installing-node-js-and-npm\nInstall all dependencies: `npm i`.\n\nThen, run the development server:\n\n```bash\nnpm run dev\n```\n\nOpen [http://localhost:3000](http://localhost:3000) with your browser to see the result.\n\n_Note:_ if you are having problems accessing the ^, try setting the `WEB_DOMAIN` env variable to\n`http://127.0.0.1:3000` and accessing it there.\n\n> [!TIP]\n> Packages are installed automatically when switching branches after `package.json` changes with [pre-commit](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md#formatting-and-linting) configured.\n\n### Connecting to a Cloud Backend\n\nTo test your local frontend development server against a cloud backend (e.g., staging or production),\ncreate a `.env.local` file in the `web/` directory with the following configuration:\n\n```text\n# Point local dev server to cloud backend\nINTERNAL_URL=https://st-dev.onyx.app/api\n\n# Debug auth cookie for authenticating against remote backend\n# This cookie is automatically injected into API requests when in development mode\n# To get this value:\n#   1. Go to https://st-dev.onyx.app (or your target backend URL) and log in\n#   2. Open DevTools (F12) → Application → Cookies → [your backend domain]\n#   3. Find the \"fastapiusersauth\" cookie and copy its value\n#   4. Paste the value below (without quotes)\n# Note: This cookie may expire, so you may need to refresh it periodically\nDEBUG_AUTH_COOKIE=your_cookie_value_here\n```\n\nBy default, this does _NOT_ override existing cookies, so if you've logged in previously, you\nmay need to delete the cookies for the `localhost` domain.\n\n**Important notes:**\n\n- The `.env.local` file should be created in the `web/` directory (same level as `package.json`)\n- After creating or modifying `.env.local`, restart your development server for changes to take effect\n- The `DEBUG_AUTH_COOKIE` is only used in development mode (`NODE_ENV=development`)\n- If `INTERNAL_URL` is not set, the frontend will connect to the local backend at `http://127.0.0.1:8080`\n- Keep your `.env.local` file secure and never commit it to version control (it should already be in `.gitignore`)\n\n## Testing\n\nThis testing process will reset your application into a clean state.\nDon't run these tests if you don't want to do this!\n\nBring up the entire application.\n\n0. Install playwright dependencies\n\n```bash\nnpx playwright install\n```\n\n1. Run playwright\n\n```bash\nnpx playwright test\n```\n\nTo run a single test:\n\n```bash\nnpx playwright test landing-page.spec.ts\n```\n\nIf running locally, interactive options can help you see exactly what is happening in\nthe test.\n\n```bash\nnpx playwright test --ui\nnpx playwright test --headed\n```\n\n2. Inspect results\n\nBy default, playwright.config.ts is configured to output the results to:\n\n```bash\nweb/output/playwright/\n```\n\n3. Visual regression screenshots\n\nScreenshots are captured automatically during test runs and saved to `web/output/screenshots/`.\nTo compare screenshots across CI runs, use:\n\n```bash\nods screenshot-diff compare --project admin\n```\n\nFor more information, see [tools/ods/README.md](https://github.com/onyx-dot-app/onyx/blob/main/tools/ods/README.md#screenshot-diff---visual-regression-testing).\n"
  },
  {
    "path": "web/components.json",
    "content": "{\n  \"$schema\": \"https://ui.shadcn.com/schema.json\",\n  \"style\": \"default\",\n  \"rsc\": true,\n  \"tsx\": true,\n  \"tailwind\": {\n    \"config\": \"tailwind-themes/tailwind.config.js\",\n    \"css\": \"src/app/globals.css\",\n    \"baseColor\": \"neutral\",\n    \"cssVariables\": false,\n    \"prefix\": \"\"\n  },\n  \"aliases\": {\n    \"components\": \"@/components\",\n    \"utils\": \"@/lib/utils\",\n    \"ui\": \"@/components/ui\",\n    \"lib\": \"@/lib\",\n    \"hooks\": \"@/hooks\"\n  }\n}\n"
  },
  {
    "path": "web/jest.config.js",
    "content": "/**\n * Jest configuration with separate projects for different test environments.\n *\n * We use two separate projects:\n * 1. \"unit\" - Node environment for pure unit tests (no DOM needed)\n * 2. \"integration\" - jsdom environment for React integration tests\n *\n * This allows us to run tests with the correct environment automatically\n * without needing @jest-environment comments in every test file.\n */\n\n// Shared configuration\nconst sharedConfig = {\n  preset: \"ts-jest\",\n  setupFilesAfterEnv: [\"<rootDir>/tests/setup/jest.setup.ts\"],\n\n  // Performance: Use 50% of CPU cores for parallel execution\n  maxWorkers: \"50%\",\n\n  moduleNameMapper: {\n    // Mock CSS files (before path alias resolution)\n    // CSS/static assets cannot be executed in tests and must be mocked\n    \"^@/.*\\\\.(css|less|scss|sass)$\": \"<rootDir>/tests/setup/mocks/cssMock.js\",\n    \"^katex/dist/katex.min.css$\": \"<rootDir>/tests/setup/mocks/cssMock.js\",\n    \"\\\\.(css|less|scss|sass)$\": \"<rootDir>/tests/setup/mocks/cssMock.js\",\n    // Mock static file imports\n    \"\\\\.(jpg|jpeg|png|gif|svg|woff|woff2|ttf|eot)$\":\n      \"<rootDir>/tests/setup/fileMock.js\",\n    // Mock specific components that have complex dependencies\n    \"^@/providers/UserProvider$\":\n      \"<rootDir>/tests/setup/mocks/components/UserProvider.tsx\",\n    // Path aliases (must come after specific mocks)\n    \"^@/(.*)$\": \"<rootDir>/src/$1\",\n    \"^@tests/(.*)$\": \"<rootDir>/tests/$1\",\n    \"^@opal$\": \"<rootDir>/lib/opal/src/index.ts\",\n    \"^@opal/(.*)$\": \"<rootDir>/lib/opal/src/$1\",\n  },\n\n  testPathIgnorePatterns: [\"/node_modules/\", \"/tests/e2e/\", \"/.next/\"],\n\n  // Transform ES Modules in node_modules to CommonJS for Jest compatibility\n  // Add packages here when you encounter: \"SyntaxError: Unexpected token 'export'\"\n  // These packages ship as ESM and need to be transformed to work in Jest\n  transformIgnorePatterns: [\n    \"/node_modules/(?!(\" +\n      [\n        // Auth & Security\n        \"jose\",\n        // UI Libraries\n        \"@radix-ui\",\n        \"@headlessui\",\n        \"@phosphor-icons\",\n        // Testing & Mocking\n        \"msw\",\n        \"until-async\",\n        // Language Detection\n        \"linguist-languages\",\n        // Markdown & Syntax Highlighting\n        \"react-markdown\",\n        \"remark-.*\", // All remark packages\n        \"rehype-.*\", // All rehype packages\n        \"unified\",\n        \"lowlight\",\n        \"highlight\\\\.js\",\n        // Markdown Utilities\n        \"bail\",\n        \"is-plain-obj\",\n        \"trough\",\n        \"vfile\",\n        \"vfile-.*\", // All vfile packages\n        \"unist-.*\", // All unist packages\n        \"mdast-.*\", // All mdast packages\n        \"hast-.*\", // All hast packages\n        \"hastscript\",\n        \"micromark.*\", // All micromark packages\n        \"decode-named-character-reference\",\n        \"character-entities\",\n        \"devlop\",\n        \"comma-separated-tokens\",\n        \"property-information\",\n        \"space-separated-tokens\",\n        \"html-void-elements\",\n        \"html-url-attributes\",\n        \"aria-attributes\",\n        \"web-namespaces\",\n        \"svg-tag-names\",\n        \"style-to-object\",\n        \"inline-style-parser\",\n        \"ccount\",\n        \"escape-string-regexp\",\n        \"markdown-table\",\n        \"longest-streak\",\n        \"zwitch\",\n        \"trim-lines\",\n        \"stringify-entities\",\n        \"estree-.*\", // All estree packages\n      ].join(\"|\") +\n      \")/)\",\n  ],\n\n  transform: {\n    \"^.+\\\\.(t|j)sx?$\": [\n      \"ts-jest\",\n      {\n        // Performance: Disable type-checking in tests (types are checked by tsc)\n        isolatedModules: true,\n        tsconfig: {\n          jsx: \"react-jsx\",\n          // Allow ts-jest to process JavaScript files from node_modules\n          allowJs: true,\n        },\n      },\n    ],\n  },\n\n  // Performance: Cache results between runs\n  cache: true,\n  cacheDirectory: \"<rootDir>/.jest-cache\",\n\n  collectCoverageFrom: [\n    \"src/**/*.{ts,tsx}\",\n    \"!src/**/*.d.ts\",\n    \"!src/**/*.stories.tsx\",\n  ],\n\n  coveragePathIgnorePatterns: [\"/node_modules/\", \"/tests/\", \"/.next/\"],\n\n  // Performance: Clear mocks automatically between tests\n  clearMocks: true,\n  resetMocks: false,\n  restoreMocks: false,\n};\n\nmodule.exports = {\n  projects: [\n    {\n      displayName: \"unit\",\n      ...sharedConfig,\n      testEnvironment: \"node\",\n      testMatch: [\n        // Pure unit tests that don't need DOM\n        \"**/src/**/codeUtils.test.ts\",\n        \"**/src/lib/**/*.test.ts\",\n        \"**/src/app/**/services/*.test.ts\",\n        \"**/src/app/**/utils/*.test.ts\",\n        \"**/src/app/**/hooks/*.test.ts\", // Pure packet processor tests\n        \"**/src/hooks/**/*.test.ts\",\n        \"**/src/refresh-components/**/*.test.ts\",\n        \"**/src/refresh-pages/**/*.test.ts\",\n        \"**/src/sections/**/*.test.ts\",\n        \"**/src/components/**/*.test.ts\",\n        // Add more patterns here as you add more unit tests\n      ],\n    },\n    {\n      displayName: \"integration\",\n      ...sharedConfig,\n      testEnvironment: \"jsdom\",\n      testMatch: [\n        // React component integration tests\n        \"**/src/app/**/*.test.tsx\",\n        \"**/src/components/**/*.test.tsx\",\n        \"**/src/lib/**/*.test.tsx\",\n        \"**/src/providers/**/*.test.tsx\",\n        \"**/src/refresh-components/**/*.test.tsx\",\n        \"**/src/hooks/**/*.test.tsx\",\n        \"**/src/sections/**/*.test.tsx\",\n        // Add more patterns here as you add more integration tests\n      ],\n    },\n  ],\n};\n"
  },
  {
    "path": "web/lib/opal/README.md",
    "content": "# Opal\n\nA Typescript component library for Onyx.\n\n## Usage\n\n```tsx\nimport { Button } from \"@opal/components\";\n\nfunction MyComponent() {\n  return <Button onClick={() => console.log(\"Clicked!\")}>Click me</Button>;\n}\n```\n\n## Build\n\nOpal is built in such a way that it _reuses_ the `/web/node_modules` directory.\nTherefore, builds don't incur duplicate space-costs (i.e., what would have happened if Opal had its own `node_modules`).\nIf you want to add dependencies to Opal, define that dependency inside of `/web/lib/opal/package.json` under `peerDependencies`.\nThen, go to `/web` and run the install:\n\n```sh\nnpm i\n\n# Or, if you prefer `bun`\nbun i\n```\n\nThose dependencies will then install inside of `/web/node_modules` and be available to Opal.\n\n## Structure\n\n```\n/web/lib/opal/\n├── src/\n│   ├── core/           # Low-level primitives (Interactive, Hoverable)\n│   ├── components/     # High-level React components (Button, SelectButton, OpenButton, Tag)\n│   ├── layouts/        # Layout primitives (Content, ContentAction, IllustrationContent)\n│   └── index.ts        # Main export file\n├── package.json\n├── tsconfig.json\n└── README.md\n```\n\n## Conventions\n\n- **Directory names** are kebab-case (e.g. `select-button/`, `open-button/`, `content-action/`)\n- **Each component directory** contains `components.tsx`, `styles.css` (if needed), and `README.md`\n- **Imports** use `@opal/` path aliases (e.g. `@opal/components`, `@opal/core`)\n"
  },
  {
    "path": "web/lib/opal/package.json",
    "content": "{\n  \"name\": \"@onyx/opal\",\n  \"version\": \"0.0.1\",\n  \"exports\": {\n    \"./components\": {\n      \"types\": \"./src/components/index.ts\",\n      \"default\": \"./src/components/index.ts\"\n    },\n    \"./layouts\": {\n      \"types\": \"./src/layouts/index.ts\",\n      \"default\": \"./src/layouts/index.ts\"\n    },\n    \"./core\": {\n      \"types\": \"./src/core/index.ts\",\n      \"default\": \"./src/core/index.ts\"\n    },\n    \"./icons\": {\n      \"types\": \"./src/icons/index.ts\",\n      \"default\": \"./src/icons/index.ts\"\n    },\n    \"./illustrations\": {\n      \"types\": \"./src/illustrations/index.ts\",\n      \"default\": \"./src/illustrations/index.ts\"\n    },\n    \"./types\": {\n      \"types\": \"./src/types.ts\",\n      \"default\": \"./src/types.ts\"\n    },\n    \"./utils\": {\n      \"types\": \"./src/utils.ts\",\n      \"default\": \"./src/utils.ts\"\n    }\n  }\n}\n"
  },
  {
    "path": "web/lib/opal/scripts/README.md",
    "content": "# SVG-to-TSX Conversion Scripts\n\n## Overview\n\nIntegrating `@svgr/webpack` into the TypeScript compiler was not working via the recommended route (Next.js webpack configuration).\nThe automatic SVG-to-React component conversion was causing compilation issues and import resolution problems.\nTherefore, we manually convert each SVG into a TSX file using SVGR CLI with a custom template.\n\nAll scripts in this directory should be run from the **opal package root** (`web/lib/opal/`).\n\n## Directory Layout\n\n```\nweb/lib/opal/\n├── scripts/                          # SVG conversion tooling (this directory)\n│   ├── convert-svg.sh                # Converts SVGs into React components\n│   └── icon-template.js              # Shared SVGR template (used for both icons and illustrations)\n├── src/\n│   ├── icons/                        # Small, single-colour icons (stroke = currentColor)\n│   └── illustrations/                # Larger, multi-colour illustrations (colours preserved)\n└── package.json\n```\n\n## Icons vs Illustrations\n\n| | Icons | Illustrations |\n|---|---|---|\n| **Import path** | `@opal/icons` | `@opal/illustrations` |\n| **Location** | `src/icons/` | `src/illustrations/` |\n| **Colour** | Overridable via `currentColor` | Fixed — original SVG colours preserved |\n| **Script flag** | (none) | `--illustration` |\n\n## Files in This Directory\n\n### `icon-template.js`\n\nA custom SVGR template that generates components with the following features:\n- Imports `IconProps` from `@opal/types` for consistent typing\n- Supports the `size` prop for controlling icon dimensions\n- Includes `width` and `height` attributes bound to the `size` prop\n- Maintains all standard SVG props (className, color, title, etc.)\n\n### `convert-svg.sh`\n\nConverts an SVG into a React component. Behaviour depends on the mode:\n\n**Icon mode** (default):\n- Strips `stroke`, `stroke-opacity`, `width`, and `height` attributes\n- Adds `width={size}`, `height={size}`, and `stroke=\"currentColor\"`\n- Result is colour-overridable via CSS `color` property\n\n**Illustration mode** (`--illustration`):\n- Strips only `width` and `height` attributes (all colours preserved)\n- Adds `width={size}` and `height={size}`\n- Does **not** add `stroke=\"currentColor\"` — illustrations keep their original colours\n\nBoth modes automatically delete the source SVG file after successful conversion.\n\n## Adding New SVGs\n\n### Icons\n\n```sh\n# From web/lib/opal/\n./scripts/convert-svg.sh src/icons/my-icon.svg\n```\n\nThen add the export to `src/icons/index.ts`:\n```ts\nexport { default as SvgMyIcon } from \"@opal/icons/my-icon\";\n```\n\n### Illustrations\n\n```sh\n# From web/lib/opal/\n./scripts/convert-svg.sh --illustration src/illustrations/my-illustration.svg\n```\n\nThen add the export to `src/illustrations/index.ts`:\n```ts\nexport { default as SvgMyIllustration } from \"@opal/illustrations/my-illustration\";\n```\n\n## Manual Conversion\n\nIf you prefer to run the SVGR command directly:\n\n**For icons** (strips colours):\n```sh\nbunx @svgr/cli <file>.svg --typescript --svgo-config '{\"plugins\":[{\"name\":\"removeAttrs\",\"params\":{\"attrs\":[\"stroke\",\"stroke-opacity\",\"width\",\"height\"]}}]}' --template scripts/icon-template.js > <file>.tsx\n```\n\n**For illustrations** (preserves colours):\n```sh\nbunx @svgr/cli <file>.svg --typescript --svgo-config '{\"plugins\":[{\"name\":\"removeAttrs\",\"params\":{\"attrs\":[\"width\",\"height\"]}}]}' --template scripts/icon-template.js > <file>.tsx\n```\n\nAfter running either manual command, remember to delete the original SVG file.\n"
  },
  {
    "path": "web/lib/opal/scripts/convert-svg.sh",
    "content": "#!/bin/bash\n\n# Convert an SVG file to a TypeScript React component.\n#\n# By default, converts to a colour-overridable icon (stroke colours stripped, replaced with currentColor).\n# With --illustration, converts to a fixed-colour illustration (all original colours preserved).\n#\n# Usage (from the opal package root — web/lib/opal/):\n#   ./scripts/convert-svg.sh src/icons/<filename.svg>\n#   ./scripts/convert-svg.sh --illustration src/illustrations/<filename.svg>\n\nILLUSTRATION=false\n\n# Parse flags\nwhile [[ \"$1\" == --* ]]; do\n  case \"$1\" in\n    --illustration)\n      ILLUSTRATION=true\n      shift\n      ;;\n    *)\n      echo \"Unknown flag: $1\" >&2\n      echo \"Usage: ./scripts/convert-svg.sh [--illustration] <filename.svg>\" >&2\n      exit 1\n      ;;\n  esac\ndone\n\nif [ -z \"$1\" ]; then\n  echo \"Usage: ./scripts/convert-svg.sh [--illustration] <filename.svg>\" >&2\n  exit 1\nfi\n\nSVG_FILE=\"$1\"\n\n# Check if file exists\nif [ ! -f \"$SVG_FILE\" ]; then\n  echo \"Error: File '$SVG_FILE' not found\" >&2\n  exit 1\nfi\n\n# Check if it's an SVG file\nif [[ ! \"$SVG_FILE\" == *.svg ]]; then\n  echo \"Error: File must have .svg extension\" >&2\n  exit 1\nfi\n\n# Get the base name without extension\nBASE_NAME=\"${SVG_FILE%.svg}\"\n\n# Build the SVGO config based on mode\nif [ \"$ILLUSTRATION\" = true ]; then\n  # Illustrations: only strip width and height (preserve all colours)\n  SVGO_CONFIG='{\"plugins\":[{\"name\":\"removeAttrs\",\"params\":{\"attrs\":[\"width\",\"height\"]}}]}'\nelse\n  # Icons: strip stroke, stroke-opacity, width, and height\n  SVGO_CONFIG='{\"plugins\":[{\"name\":\"removeAttrs\",\"params\":{\"attrs\":[\"stroke\",\"stroke-opacity\",\"width\",\"height\"]}}]}'\nfi\n\n# Resolve the template path relative to this script (not the caller's CWD)\nSCRIPT_DIR=\"$(dirname \"${BASH_SOURCE[0]}\")\"\n\n# Run the conversion into a temp file so a failed run doesn't destroy an existing .tsx\nTMPFILE=\"${BASE_NAME}.tsx.tmp\"\nbunx @svgr/cli \"$SVG_FILE\" --typescript --svgo-config \"$SVGO_CONFIG\" --template \"${SCRIPT_DIR}/icon-template.js\" > \"$TMPFILE\"\n\nif [ $? -eq 0 ]; then\n  # Verify the temp file has content before replacing the destination\n  if [ ! -s \"$TMPFILE\" ]; then\n    rm -f \"$TMPFILE\"\n    echo \"Error: Output file was not created or is empty\" >&2\n    exit 1\n  fi\n\n  mv \"$TMPFILE\" \"${BASE_NAME}.tsx\" || { echo \"Error: Failed to move temp file\" >&2; exit 1; }\n\n  # Post-process the file to add width and height attributes bound to the size prop\n  # Using perl for cross-platform compatibility (works on macOS, Linux, Windows with WSL)\n  # Note: perl -i returns 0 even on some failures, so we validate the output\n\n  perl -i -pe 's/<svg/<svg width={size} height={size}/g' \"${BASE_NAME}.tsx\"\n  if [ $? -ne 0 ]; then\n    echo \"Error: Failed to add width/height attributes\" >&2\n    exit 1\n  fi\n\n  # Icons additionally get stroke=\"currentColor\"\n  if [ \"$ILLUSTRATION\" = false ]; then\n    perl -i -pe 's/\\{\\.\\.\\.props\\}/stroke=\"currentColor\" {...props}/g' \"${BASE_NAME}.tsx\"\n    if [ $? -ne 0 ]; then\n      echo \"Error: Failed to add stroke attribute\" >&2\n      exit 1\n    fi\n  fi\n\n  # Verify the file still exists and has content after post-processing\n  if [ ! -s \"${BASE_NAME}.tsx\" ]; then\n    echo \"Error: Output file corrupted during post-processing\" >&2\n    exit 1\n  fi\n\n  # Verify required attributes are present in the output\n  if ! grep -q 'width={size}' \"${BASE_NAME}.tsx\" || ! grep -q 'height={size}' \"${BASE_NAME}.tsx\"; then\n    echo \"Error: Post-processing did not add required attributes\" >&2\n    exit 1\n  fi\n\n  # For icons, also verify stroke=\"currentColor\" was added\n  if [ \"$ILLUSTRATION\" = false ]; then\n    if ! grep -q 'stroke=\"currentColor\"' \"${BASE_NAME}.tsx\"; then\n      echo \"Error: Post-processing did not add stroke=\\\"currentColor\\\"\" >&2\n      exit 1\n    fi\n  fi\n\n  echo \"Created ${BASE_NAME}.tsx\"\n  rm \"$SVG_FILE\"\n  echo \"Deleted $SVG_FILE\"\nelse\n  rm -f \"$TMPFILE\"\n  echo \"Error: Conversion failed\" >&2\n  exit 1\nfi\n"
  },
  {
    "path": "web/lib/opal/scripts/icon-template.js",
    "content": "// Template for SVGR to generate icon components with size prop support\nconst template = (variables, { tpl }) => {\n  return tpl`\nimport type { IconProps } from \"@opal/types\";\n\nconst ${variables.componentName} = ({ size, ...props }: IconProps) => (\n  ${variables.jsx}\n);\n\n${variables.exports};\n`;\n};\n\nmodule.exports = template;\n"
  },
  {
    "path": "web/lib/opal/src/components/README.md",
    "content": "# Opal Components\n\nHigh-level UI components built on the [`@opal/core`](../core/) primitives. Every component in this directory delegates state styling (hover, active, disabled) to `Interactive.Stateless` or `Interactive.Stateful` via CSS data-attributes and the `--interactive-foreground` / `--interactive-foreground-icon` custom properties — no duplicated Tailwind class maps.\n\n## Package export\n\nComponents are exposed via:\n\n```ts\nimport { Button, SelectButton, OpenButton, Tag } from \"@opal/components\";\n```\n\nThe barrel file at `index.ts` re-exports each component and its prop types. Each component imports its own `styles.css` internally.\n\n## Components\n\n| Component | Description | Docs |\n|-----------|-------------|------|\n| [Button](./buttons/button/) | Label and/or icon-only stateless button | [README](./buttons/button/README.md) |\n| [SelectButton](./buttons/select-button/) | Stateful toggle button with optional foldable content | [README](./buttons/select-button/README.md) |\n| [OpenButton](./buttons/open-button/) | Trigger button with rotating chevron for popovers | [README](./buttons/open-button/README.md) |\n| [Tag](./tag/) | Small colored label for status/category metadata | [README](./tag/README.md) |\n\n## Adding new components\n\n1. Create a directory under `components/` in kebab-case (e.g. `components/inputs/text-input/`)\n2. Add a `styles.css` for layout-only CSS (colors come from Interactive primitives)\n3. Add a `components.tsx` with the component and its exported props type\n4. Import `styles.css` at the top of your `components.tsx`\n5. Add a `README.md` inside the component directory with architecture, props, and usage examples\n6. In `components/index.ts`, re-export the component:\n   ```ts\n   export { TextInput, type TextInputProps } from \"@opal/components/inputs/text-input/components\";\n   ```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/Button/Button.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport { Button } from \"@opal/components\";\nimport { SvgPlus, SvgArrowRight, SvgSettings } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Button> = {\n  title: \"opal/components/Button\",\n  component: Button,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Button>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Button\",\n    variant: \"default\",\n    prominence: \"primary\",\n  },\n};\n\nconst VARIANTS = [\"default\", \"action\", \"danger\"] as const;\nconst PROMINENCES = [\"primary\", \"secondary\", \"tertiary\"] as const;\n\nexport const VariantProminenceGrid: Story = {\n  render: () => (\n    <div\n      style={{\n        display: \"grid\",\n        gridTemplateColumns: \"auto repeat(3, 1fr)\",\n        gap: 12,\n        alignItems: \"center\",\n      }}\n    >\n      {/* Header row */}\n      <div />\n      {PROMINENCES.map((p) => (\n        <div\n          key={p}\n          style={{\n            fontWeight: 600,\n            textAlign: \"center\",\n            textTransform: \"capitalize\",\n          }}\n        >\n          {p}\n        </div>\n      ))}\n\n      {/* Variant rows */}\n      {VARIANTS.map((variant) => (\n        <React.Fragment key={variant}>\n          <div style={{ fontWeight: 600, textTransform: \"capitalize\" }}>\n            {variant}\n          </div>\n          {PROMINENCES.map((prominence) => (\n            <Button\n              key={`${variant}-${prominence}`}\n              variant={variant}\n              prominence={prominence}\n            >\n              {`${variant} ${prominence}`}\n            </Button>\n          ))}\n        </React.Fragment>\n      ))}\n    </div>\n  ),\n};\n\nexport const WithLeftIcon: Story = {\n  args: {\n    icon: SvgPlus,\n    children: \"Add item\",\n  },\n};\n\nexport const WithRightIcon: Story = {\n  args: {\n    rightIcon: SvgArrowRight,\n    children: \"Continue\",\n  },\n};\n\nexport const IconOnly: Story = {\n  args: {\n    icon: SvgSettings,\n  },\n};\n\nexport const Sizes: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", alignItems: \"center\", gap: 12 }}>\n      {([\"lg\", \"md\", \"sm\", \"xs\", \"2xs\", \"fit\"] as const).map((size) => (\n        <Button key={size} size={size} icon={SvgPlus}>\n          {size}\n        </Button>\n      ))}\n    </div>\n  ),\n};\n\nexport const Foldable: Story = {\n  args: {\n    foldable: true,\n    icon: SvgPlus,\n    children: \"Add item\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    disabled: true,\n    children: \"Disabled\",\n  },\n};\n\nexport const WidthFull: Story = {\n  args: {\n    width: \"full\",\n    children: \"Full width\",\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 400 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport const AsLink: Story = {\n  args: {\n    href: \"https://example.com\",\n    children: \"Visit site\",\n    rightIcon: SvgArrowRight,\n  },\n};\n\nexport const WithTooltip: Story = {\n  args: {\n    icon: SvgSettings,\n    tooltip: \"Open settings\",\n    tooltipSide: \"bottom\",\n  },\n};\n\nexport const ResponsiveHideText: Story = {\n  args: {\n    icon: SvgPlus,\n    children: \"Create\",\n    responsiveHideText: true,\n  },\n};\n\nexport const InternalProminence: Story = {\n  args: {\n    variant: \"default\",\n    prominence: \"internal\",\n    children: \"Internal\",\n  },\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/button/README.md",
    "content": "# Button\n\n**Import:** `import { Button, type ButtonProps } from \"@opal/components\";`\n\nA single component that handles both labeled buttons and icon-only buttons. Built on `Interactive.Stateless` > `Interactive.Container`.\n\n## Architecture\n\n```\nInteractive.Stateless          <- variant, prominence, interaction, disabled, href, onClick\n  └─ Interactive.Container     <- height, rounding, padding (from `size`), border (auto for secondary)\n       └─ div.opal-button.interactive-foreground\n            ├─ div > Icon?       (interactive-foreground-icon)\n            ├─ <span>?           .opal-button-label\n            └─ div > RightIcon?  (interactive-foreground-icon)\n```\n\n- **Colors are not in the Button.** `Interactive.Stateless` sets `background-color`, `--interactive-foreground`, and `--interactive-foreground-icon` per variant/prominence/state. Descendants opt in via the `.interactive-foreground` and `.interactive-foreground-icon` utility classes.\n- **Icon-only buttons render as squares** because `Interactive.Container` enforces `min-width >= height`.\n- **Border is automatic for `prominence=\"secondary\"`.** The Container receives `border={prominence === \"secondary\"}` internally.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `variant` | `\"default\" \\| \"action\" \\| \"danger\" \\| \"none\"` | `\"default\"` | Color variant |\n| `prominence` | `\"primary\" \\| \"secondary\" \\| \"tertiary\" \\| \"internal\"` | `\"primary\"` | Color prominence |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | `\"rest\"` | JS-controlled interaction override |\n| `icon` | `IconFunctionComponent` | — | Left icon |\n| `children` | `string` | — | Label text. Omit for icon-only buttons |\n| `rightIcon` | `IconFunctionComponent` | — | Right icon |\n| `responsiveHideText` | `boolean` | `false` | Hides label on small screens |\n| `size` | `SizeVariant` | `\"lg\"` | Size preset |\n| `type` | `\"submit\" \\| \"button\" \\| \"reset\"` | `\"button\"` | HTML button type |\n| `width` | `WidthVariant` | — | Width preset |\n| `tooltip` | `string` | — | Tooltip text |\n| `tooltipSide` | `TooltipSide` | `\"top\"` | Tooltip placement |\n| `disabled` | `boolean` | `false` | Disables the button |\n| `href` | `string` | — | URL; renders as a link |\n\n## Usage\n\n```tsx\nimport { Button } from \"@opal/components\";\nimport { SvgPlus, SvgArrowRight } from \"@opal/icons\";\n\n// Primary button with label\n<Button variant=\"default\" onClick={handleClick}>Save changes</Button>\n\n// Icon-only button (renders as a square)\n<Button icon={SvgPlus} prominence=\"tertiary\" size=\"sm\" />\n\n// Secondary button (auto border)\n<Button rightIcon={SvgArrowRight} prominence=\"secondary\">Continue</Button>\n\n// Interaction override (e.g. inside a popover trigger)\n<Button icon={SvgFilter} prominence=\"tertiary\" interaction={isOpen ? \"hover\" : \"rest\"} />\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/button/components.tsx",
    "content": "import \"@opal/components/tooltip.css\";\nimport { Interactive, type InteractiveStatelessProps } from \"@opal/core\";\nimport type {\n  ContainerSizeVariants,\n  ExtremaSizeVariants,\n  RichStr,\n} from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport type { TooltipSide } from \"@opal/components\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { iconWrapper } from \"@opal/components/buttons/icon-wrapper\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ButtonContentProps =\n  | {\n      icon?: IconFunctionComponent;\n      children: string | RichStr;\n      rightIcon?: IconFunctionComponent;\n      responsiveHideText?: never;\n    }\n  | {\n      icon: IconFunctionComponent;\n      children?: string | RichStr;\n      rightIcon?: IconFunctionComponent;\n      responsiveHideText?: boolean;\n    };\n\ntype ButtonProps = InteractiveStatelessProps &\n  ButtonContentProps & {\n    /**\n     * Size preset — controls gap, text size, and Container height/rounding.\n     */\n    size?: ContainerSizeVariants;\n\n    /** Tooltip text shown on hover. */\n    tooltip?: string;\n\n    /** Width preset. `\"fit\"` shrink-wraps, `\"full\"` stretches to parent width. */\n    width?: ExtremaSizeVariants;\n\n    /** Which side the tooltip appears on. */\n    tooltipSide?: TooltipSide;\n\n    /** Applies disabled styling and suppresses clicks. */\n    disabled?: boolean;\n  };\n\n// ---------------------------------------------------------------------------\n// Button\n// ---------------------------------------------------------------------------\n\nfunction Button({\n  icon: Icon,\n  children,\n  rightIcon: RightIcon,\n  size = \"lg\",\n  type = \"button\",\n  width,\n  tooltip,\n  tooltipSide = \"top\",\n  responsiveHideText = false,\n  disabled,\n  ...interactiveProps\n}: ButtonProps) {\n  const isLarge = size === \"lg\";\n\n  const labelEl = children ? (\n    responsiveHideText ? (\n      <span className=\"hidden md:inline whitespace-nowrap\">\n        <Text\n          font={isLarge ? \"main-ui-body\" : \"secondary-body\"}\n          color=\"inherit\"\n        >\n          {children}\n        </Text>\n      </span>\n    ) : (\n      <Text\n        font={isLarge ? \"main-ui-body\" : \"secondary-body\"}\n        color=\"inherit\"\n        nowrap\n      >\n        {children}\n      </Text>\n    )\n  ) : null;\n\n  const button = (\n    <Interactive.Stateless\n      type={type}\n      disabled={disabled}\n      {...interactiveProps}\n    >\n      <Interactive.Container\n        type={type}\n        border={interactiveProps.prominence === \"secondary\"}\n        heightVariant={size}\n        widthVariant={width}\n        roundingVariant={isLarge ? \"md\" : size === \"2xs\" ? \"xs\" : \"sm\"}\n      >\n        <div className=\"flex flex-row items-center gap-1\">\n          {iconWrapper(Icon, size, !!children)}\n\n          {labelEl}\n          {responsiveHideText ? (\n            <span className=\"hidden md:inline-flex\">\n              {iconWrapper(RightIcon, size, !!children)}\n            </span>\n          ) : (\n            iconWrapper(RightIcon, size, !!children)\n          )}\n        </div>\n      </Interactive.Container>\n    </Interactive.Stateless>\n  );\n\n  if (tooltip) {\n    return (\n      <TooltipPrimitive.Root>\n        <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>\n        <TooltipPrimitive.Portal>\n          <TooltipPrimitive.Content\n            className=\"opal-tooltip\"\n            side={tooltipSide}\n            sideOffset={4}\n          >\n            {tooltip}\n          </TooltipPrimitive.Content>\n        </TooltipPrimitive.Portal>\n      </TooltipPrimitive.Root>\n    );\n  }\n\n  return button;\n}\n\nexport { Button, type ButtonProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/chevron.css",
    "content": ".opal-button-chevron {\n  transition: rotate 200ms ease;\n}\n\n.interactive[data-interaction=\"hover\"] .opal-button-chevron,\n.interactive[data-interaction=\"active\"] .opal-button-chevron {\n  rotate: -180deg;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/chevron.tsx",
    "content": "import \"@opal/components/buttons/chevron.css\";\nimport type { IconProps } from \"@opal/types\";\nimport { SvgChevronDownSmall } from \"@opal/icons\";\nimport { cn } from \"@opal/utils\";\n\n/**\n * Chevron icon that rotates 180° when its parent `.interactive` enters\n * hover / active state.  Shared by OpenButton, FilterButton, and any\n * future button that needs an animated dropdown indicator.\n *\n * Stable component identity — never causes React to remount the SVG.\n */\nfunction ChevronIcon({ className, ...props }: IconProps) {\n  return (\n    <SvgChevronDownSmall\n      className={cn(className, \"opal-button-chevron\")}\n      {...props}\n    />\n  );\n}\n\nexport { ChevronIcon };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/filter-button/FilterButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { FilterButton } from \"@opal/components\";\nimport { Disabled as DisabledProvider } from \"@opal/core\";\nimport { SvgUser, SvgActions, SvgTag } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof FilterButton> = {\n  title: \"opal/components/FilterButton\",\n  component: FilterButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FilterButton>;\n\nexport const Empty: Story = {\n  args: {\n    icon: SvgUser,\n    children: \"Everyone\",\n  },\n};\n\nexport const Active: Story = {\n  args: {\n    icon: SvgUser,\n    active: true,\n    children: \"By alice@example.com\",\n    onClear: () => console.log(\"clear\"),\n  },\n};\n\nexport const Open: Story = {\n  args: {\n    icon: SvgActions,\n    interaction: \"hover\",\n    children: \"All Actions\",\n  },\n};\n\nexport const ActiveOpen: Story = {\n  args: {\n    icon: SvgActions,\n    active: true,\n    interaction: \"hover\",\n    children: \"2 selected\",\n    onClear: () => console.log(\"clear\"),\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    icon: SvgTag,\n    children: \"All Tags\",\n  },\n  decorators: [\n    (Story) => (\n      <DisabledProvider disabled>\n        <Story />\n      </DisabledProvider>\n    ),\n  ],\n};\n\nexport const DisabledActive: Story = {\n  args: {\n    icon: SvgTag,\n    active: true,\n    children: \"2 tags\",\n    onClear: () => console.log(\"clear\"),\n  },\n  decorators: [\n    (Story) => (\n      <DisabledProvider disabled>\n        <Story />\n      </DisabledProvider>\n    ),\n  ],\n};\n\nexport const StateComparison: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <FilterButton icon={SvgUser} onClear={() => undefined}>\n        Everyone\n      </FilterButton>\n      <FilterButton icon={SvgUser} active onClear={() => console.log(\"clear\")}>\n        By alice@example.com\n      </FilterButton>\n    </div>\n  ),\n};\n\nexport const WithTooltip: Story = {\n  args: {\n    icon: SvgUser,\n    children: \"Everyone\",\n    tooltip: \"Filter by creator\",\n    tooltipSide: \"bottom\",\n  },\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/filter-button/README.md",
    "content": "# FilterButton\n\n**Import:** `import { FilterButton, type FilterButtonProps } from \"@opal/components\";`\n\nA stateful filter trigger with a built-in chevron (when empty) and a clear button (when selected). Hardcodes `variant=\"select-filter\"` and delegates to `Interactive.Stateful`, adding automatic open-state detection from Radix `data-state`. Designed to sit inside a `Popover.Trigger` for filter dropdowns.\n\n## Relationship to OpenButton\n\nFilterButton shares a similar call stack to `OpenButton`:\n\n```\nInteractive.Stateful → Interactive.Container → content row (icon + label + trailing indicator)\n```\n\nFilterButton is a **narrower, filter-specific** variant:\n\n- It hardcodes `variant=\"select-filter\"` (OpenButton uses `\"select-heavy\"`)\n- It exposes `active?: boolean` instead of the raw `state` prop (maps to `\"selected\"` / `\"empty\"` internally)\n- When active, the chevron is hidden via `visibility` and an absolutely-positioned clear `Button` with `prominence=\"tertiary\"` overlays it — placed as a sibling outside the `<button>` to avoid nesting buttons\n- It uses the shared `ChevronIcon` from `buttons/chevron` (same as OpenButton)\n- It does not support `foldable`, `size`, or `width` — it is always `\"lg\"`\n\n## Architecture\n\n```\ndiv.relative                               <- bounding wrapper\n  Interactive.Stateful                     <- variant=\"select-filter\", interaction, state\n    └─ Interactive.Container (button)      <- height=\"lg\", default rounding/padding\n         └─ div.interactive-foreground\n              ├─ div > Icon                (interactive-foreground-icon)\n              ├─ <span>                    label text\n              └─ ChevronIcon               (when empty)\n                 OR spacer div             (when selected — reserves chevron space)\n  div.absolute                             <- clear Button overlay (when selected)\n    └─ Button (SvgX, size=\"2xs\", prominence=\"tertiary\")\n```\n\n- **Open-state detection** reads `data-state=\"open\"` injected by Radix triggers (e.g. `Popover.Trigger`), falling back to the explicit `interaction` prop.\n- **Chevron rotation** uses the shared `ChevronIcon` component and `buttons/chevron.css`, which rotates 180deg when `data-interaction=\"hover\"`.\n- **Clear button** is absolutely positioned outside the `<button>` element tree to avoid invalid nested `<button>` elements. An invisible spacer inside the button reserves the same space so layout doesn't shift between states.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `icon` | `IconFunctionComponent` | **required** | Left icon component |\n| `children` | `string` | **required** | Label text between icon and trailing indicator |\n| `active` | `boolean` | `false` | Whether the filter has an active selection |\n| `onClear` | `() => void` | **required** | Called when the clear (X) button is clicked |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | auto | JS-controlled interaction override. Falls back to Radix `data-state=\"open\"`. |\n| `tooltip` | `string` | — | Tooltip text shown on hover |\n| `tooltipSide` | `TooltipSide` | `\"top\"` | Which side the tooltip appears on |\n\n## Usage\n\n```tsx\nimport { FilterButton } from \"@opal/components\";\nimport { SvgUser } from \"@opal/icons\";\n\n// Inside a Popover (auto-detects open state)\n<Popover.Trigger asChild>\n  <FilterButton\n    icon={SvgUser}\n    active={hasSelection}\n    onClear={() => clearSelection()}\n  >\n    {hasSelection ? selectionLabel : \"Everyone\"}\n  </FilterButton>\n</Popover.Trigger>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/filter-button/components.tsx",
    "content": "import {\n  Interactive,\n  type InteractiveStatefulInteraction,\n  type InteractiveStatefulProps,\n} from \"@opal/core\";\nimport type { TooltipSide } from \"@opal/components\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport { SvgX } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { iconWrapper } from \"@opal/components/buttons/icon-wrapper\";\nimport { ChevronIcon } from \"@opal/components/buttons/chevron\";\nimport { Button } from \"@opal/components/buttons/button/components\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface FilterButtonProps\n  extends Omit<InteractiveStatefulProps, \"variant\" | \"state\" | \"children\"> {\n  /** Left icon — always visible. */\n  icon: IconFunctionComponent;\n\n  /** Label text between icon and trailing indicator. */\n  children: string | RichStr;\n\n  /** Whether the filter has an active selection. @default false */\n  active?: boolean;\n\n  /** Called when the clear (X) button is clicked in active state. */\n  onClear: () => void;\n\n  /** Tooltip text shown on hover. */\n  tooltip?: string;\n\n  /** Which side the tooltip appears on. */\n  tooltipSide?: TooltipSide;\n}\n\n// ---------------------------------------------------------------------------\n// FilterButton\n// ---------------------------------------------------------------------------\n\nfunction FilterButton({\n  icon: Icon,\n  children,\n  onClear,\n  tooltip,\n  tooltipSide = \"top\",\n  active = false,\n  interaction,\n  ...statefulProps\n}: FilterButtonProps) {\n  // Derive open state: explicit prop > Radix data-state (injected via Slot chain)\n  const dataState = (statefulProps as Record<string, unknown>)[\"data-state\"] as\n    | string\n    | undefined;\n  const resolvedInteraction: InteractiveStatefulInteraction =\n    interaction ?? (dataState === \"open\" ? \"hover\" : \"rest\");\n\n  const button = (\n    <div className=\"relative\">\n      <Interactive.Stateful\n        {...statefulProps}\n        variant=\"select-filter\"\n        interaction={resolvedInteraction}\n        state={active ? \"selected\" : \"empty\"}\n      >\n        <Interactive.Container type=\"button\">\n          <div className=\"flex flex-row items-center gap-1\">\n            {iconWrapper(Icon, \"lg\", true)}\n            <Text font=\"main-ui-action\" color=\"inherit\" nowrap>\n              {children}\n            </Text>\n            <div style={{ visibility: active ? \"hidden\" : \"visible\" }}>\n              {iconWrapper(ChevronIcon, \"lg\", true)}\n            </div>\n          </div>\n        </Interactive.Container>\n      </Interactive.Stateful>\n\n      {active && (\n        <div className=\"absolute right-2 top-1/2 -translate-y-1/2\">\n          {/* Force hover state so the X stays visually prominent against\n              the inverted selected background — without this it renders\n              dimmed and looks disabled. */}\n          <Button\n            icon={SvgX}\n            size=\"2xs\"\n            prominence=\"tertiary\"\n            tooltip=\"Clear filter\"\n            interaction=\"hover\"\n            onClick={(e) => {\n              e.stopPropagation();\n              onClear();\n            }}\n          />\n        </div>\n      )}\n    </div>\n  );\n\n  if (!tooltip) return button;\n\n  return (\n    <TooltipPrimitive.Root>\n      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>\n      <TooltipPrimitive.Portal>\n        <TooltipPrimitive.Content\n          className=\"opal-tooltip\"\n          side={tooltipSide}\n          sideOffset={4}\n        >\n          {tooltip}\n        </TooltipPrimitive.Content>\n      </TooltipPrimitive.Portal>\n    </TooltipPrimitive.Root>\n  );\n}\n\nexport { FilterButton, type FilterButtonProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/icon-wrapper.tsx",
    "content": "import type { ContainerSizeVariants } from \"@opal/types\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport { cn } from \"@opal/utils\";\n\nconst iconVariants = {\n  lg: { padding: \"p-0.5\", size: 1 },\n  md: { padding: \"p-0.5\", size: 1 },\n  sm: { padding: \"p-0\", size: 1 },\n  xs: { padding: \"p-0.5\", size: 0.75 },\n  \"2xs\": { padding: \"p-0\", size: 0.75 },\n  fit: { padding: \"p-0.5\", size: 1 },\n} as const;\n\nfunction iconWrapper(\n  Icon: IconFunctionComponent | undefined,\n  size: ContainerSizeVariants,\n  includeSpacer: boolean\n) {\n  const { padding: p, size: s } = iconVariants[size];\n\n  return Icon ? (\n    <div className={cn(\"interactive-foreground-icon\", p)}>\n      <Icon\n        className=\"shrink-0\"\n        style={{\n          height: `${s}rem`,\n          width: `${s}rem`,\n        }}\n      />\n    </div>\n  ) : includeSpacer ? (\n    <div />\n  ) : null;\n}\n\nexport { iconWrapper, iconVariants };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/line-item-button/README.md",
    "content": "# LineItemButton\n\n**Import:** `import { LineItemButton, type LineItemButtonProps } from \"@opal/components\";`\n\nA composite component that wraps `Interactive.Stateful > Interactive.Container > ContentAction` into a single API. Use it for selectable list rows such as model pickers, menu items, or any row that acts like a button.\n\n## Architecture\n\n```\nInteractive.Stateful         <- selectVariant, state, interaction, onClick, href, ref\n  └─ Interactive.Container   <- type, width, roundingVariant\n       └─ ContentAction      <- withInteractive, paddingVariant=\"lg\"\n            ├─ Content       <- icon, title, description, sizePreset, variant, ...\n            └─ rightChildren\n```\n\n`paddingVariant` is hardcoded to `\"lg\"` and `withInteractive` is always `true`. These are not exposed as props.\n\n## Props\n\n### Interactive surface\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `selectVariant` | `\"select-light\" \\| \"select-heavy\"` | `\"select-light\"` | Interactive select variant |\n| `state` | `InteractiveStatefulState` | `\"empty\"` | Value state (`\"empty\"`, `\"filled\"`, `\"selected\"`) |\n| `interaction` | `InteractiveStatefulInteraction` | `\"rest\"` | JS-controlled interaction state override |\n| `onClick` | `MouseEventHandler<HTMLElement>` | — | Click handler |\n| `href` | `string` | — | Renders an anchor instead of a div |\n| `target` | `string` | — | Anchor target (e.g. `\"_blank\"`) |\n| `group` | `string` | — | Interactive group key |\n| `ref` | `React.Ref<HTMLElement>` | — | Forwarded ref |\n\n### Sizing\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `roundingVariant` | `InteractiveContainerRoundingVariant` | `\"md\"` | Corner rounding preset (height is content-driven) |\n| `width` | `WidthVariant` | `\"full\"` | Container width |\n| `type` | `\"submit\" \\| \"button\" \\| \"reset\"` | `\"button\"` | HTML button type |\n| `tooltip` | `string` | — | Tooltip text shown on hover |\n| `tooltipSide` | `TooltipSide` | `\"top\"` | Tooltip side |\n\n### Content (pass-through to ContentAction)\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `title` | `string` | **(required)** | Row label |\n| `icon` | `IconFunctionComponent` | — | Left icon |\n| `description` | `string` | — | Description below the title |\n| `sizePreset` | `SizePreset` | `\"headline\"` | Content size preset |\n| `variant` | `ContentVariant` | `\"heading\"` | Content layout variant |\n| `rightChildren` | `ReactNode` | — | Content after the label (e.g. action button) |\n\nAll other `ContentAction` / `Content` props (`editable`, `onTitleChange`, `optional`, `auxIcon`, `tag`, etc.) are also passed through. Note: `withInteractive` is always `true` inside `LineItemButton` and cannot be overridden.\n\n## Usage\n\n```tsx\nimport { LineItemButton } from \"@opal/components\";\n\n// Simple selectable row\n<LineItemButton\n  selectVariant=\"select-heavy\"\n  state={isSelected ? \"selected\" : \"empty\"}\n  roundingVariant=\"sm\"\n  onClick={handleClick}\n  title=\"gpt-4o\"\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n/>\n\n// With right-side action\n<LineItemButton\n  selectVariant=\"select-heavy\"\n  state={isSelected ? \"selected\" : \"empty\"}\n  onClick={handleClick}\n  title=\"claude-opus-4\"\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n  rightChildren={<Tag title=\"Default\" color=\"blue\" />}\n/>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/line-item-button/components.tsx",
    "content": "import {\n  Interactive,\n  type InteractiveStatefulProps,\n  InteractiveContainerRoundingVariant,\n} from \"@opal/core\";\nimport type { ExtremaSizeVariants } from \"@opal/types\";\nimport type { TooltipSide } from \"@opal/components\";\nimport type { DistributiveOmit } from \"@opal/types\";\nimport type { ContentActionProps } from \"@opal/layouts/content-action/components\";\nimport { ContentAction } from \"@opal/layouts\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentPassthroughProps = DistributiveOmit<\n  ContentActionProps,\n  \"paddingVariant\" | \"widthVariant\" | \"ref\"\n>;\n\ntype LineItemButtonOwnProps = Pick<\n  InteractiveStatefulProps,\n  | \"state\"\n  | \"interaction\"\n  | \"onClick\"\n  | \"href\"\n  | \"target\"\n  | \"group\"\n  | \"ref\"\n  | \"type\"\n> & {\n  /** Interactive select variant. @default \"select-light\" */\n  selectVariant?: \"select-light\" | \"select-heavy\";\n\n  /** Corner rounding preset (height is always content-driven). @default \"md\" */\n  roundingVariant?: InteractiveContainerRoundingVariant;\n\n  /** Container width. @default \"full\" */\n  width?: ExtremaSizeVariants;\n\n  /** Tooltip text shown on hover. */\n  tooltip?: string;\n\n  /** Which side the tooltip appears on. @default \"top\" */\n  tooltipSide?: TooltipSide;\n};\n\ntype LineItemButtonProps = ContentPassthroughProps & LineItemButtonOwnProps;\n\n// ---------------------------------------------------------------------------\n// LineItemButton\n// ---------------------------------------------------------------------------\n\nfunction LineItemButton({\n  // Interactive surface\n  selectVariant = \"select-light\",\n  state,\n  interaction,\n  onClick,\n  href,\n  target,\n  group,\n  ref,\n  type = \"button\",\n\n  // Sizing\n  roundingVariant = \"md\",\n  width = \"full\",\n  tooltip,\n  tooltipSide = \"top\",\n\n  // ContentAction pass-through\n  ...contentActionProps\n}: LineItemButtonProps) {\n  const item = (\n    <Interactive.Stateful\n      variant={selectVariant}\n      state={state}\n      interaction={interaction}\n      onClick={onClick}\n      href={href}\n      target={target}\n      group={group}\n      ref={ref}\n    >\n      <Interactive.Container\n        type={type}\n        widthVariant={width}\n        heightVariant=\"lg\"\n        roundingVariant={roundingVariant}\n      >\n        <ContentAction\n          {...(contentActionProps as ContentActionProps)}\n          paddingVariant=\"fit\"\n        />\n      </Interactive.Container>\n    </Interactive.Stateful>\n  );\n\n  if (!tooltip) return item;\n\n  return (\n    <TooltipPrimitive.Root>\n      <TooltipPrimitive.Trigger asChild>{item}</TooltipPrimitive.Trigger>\n      <TooltipPrimitive.Portal>\n        <TooltipPrimitive.Content\n          className=\"opal-tooltip\"\n          side={tooltipSide}\n          sideOffset={4}\n        >\n          {tooltip}\n        </TooltipPrimitive.Content>\n      </TooltipPrimitive.Portal>\n    </TooltipPrimitive.Root>\n  );\n}\n\nexport { LineItemButton, type LineItemButtonProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/open-button/OpenButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { OpenButton } from \"@opal/components\";\nimport { Disabled as DisabledProvider } from \"@opal/core\";\nimport { SvgSettings } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof OpenButton> = {\n  title: \"opal/components/OpenButton\",\n  component: OpenButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof OpenButton>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Select option\",\n  },\n};\n\nexport const WithIcon: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n  },\n};\n\nexport const Open: Story = {\n  args: {\n    interaction: \"hover\",\n    children: \"Open state\",\n  },\n};\n\nexport const Foldable: Story = {\n  args: {\n    foldable: true,\n    icon: SvgSettings,\n    children: \"Settings\",\n  },\n};\n\nexport const FoldableDisabled: Story = {\n  args: {\n    foldable: true,\n    icon: SvgSettings,\n    children: \"Settings\",\n  },\n  decorators: [\n    (Story) => (\n      <DisabledProvider disabled>\n        <Story />\n      </DisabledProvider>\n    ),\n  ],\n};\n\nexport const Sizes: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", alignItems: \"center\", gap: 12 }}>\n      {([\"lg\", \"md\", \"sm\", \"xs\", \"2xs\"] as const).map((size) => (\n        <OpenButton key={size} size={size}>\n          {size}\n        </OpenButton>\n      ))}\n    </div>\n  ),\n};\n\nexport const WithTooltip: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n    tooltip: \"Open settings\",\n    tooltipSide: \"bottom\",\n  },\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/open-button/README.md",
    "content": "# OpenButton\n\n**Import:** `import { OpenButton, type OpenButtonProps } from \"@opal/components\";`\n\nA trigger button with a built-in chevron that rotates when open. Hardcodes `variant=\"select-heavy\"` and delegates to `Interactive.Stateful`, adding automatic open-state detection from Radix `data-state`. Designed to work automatically with Radix primitives while also supporting explicit control via the `interaction` prop.\n\n## Relationship to SelectButton\n\nOpenButton is structurally near-identical to `SelectButton` — both share the same call stack:\n\n```\nInteractive.Stateful → Interactive.Container → content row (icon + label + trailing icon)\n```\n\nOpenButton is a **tighter, specialized use-case** of SelectButton:\n\n- It hardcodes `variant=\"select-heavy\"` (SelectButton exposes `variant`)\n- It adds a built-in chevron with CSS-driven rotation (SelectButton has no chevron)\n- It auto-detects Radix `data-state=\"open\"` to derive `interaction` (SelectButton has no Radix awareness)\n- It does not support `rightIcon` (SelectButton does)\n\nBoth components support `foldable` using the same pattern: `interactive-foldable-host` class + `Interactive.Foldable` wrapper around the label and trailing icon. When foldable, the left icon stays visible while the rest collapses. If you change the foldable implementation in one, update the other to match.\n\nIf you need a general-purpose stateful toggle, use `SelectButton`. If you need a popover/dropdown trigger with a chevron, use `OpenButton`.\n\n## Architecture\n\n```\nInteractive.Stateful           <- variant=\"select-heavy\", interaction, state, disabled, onClick\n  └─ Interactive.Container     <- height, rounding, padding (from `size`)\n       └─ div.opal-button.interactive-foreground [.interactive-foldable-host]\n            ├─ div > Icon?                 (interactive-foreground-icon)\n            ├─ [Foldable]?                 (wraps label + chevron when foldable)\n            │    ├─ <span>?                .opal-button-label\n            │    └─ div > ChevronIcon      .opal-open-button-chevron\n            └─ <span>? / ChevronIcon       (non-foldable)\n```\n\n- **`interaction` controls both the chevron and the hover visual state.** When `interaction` is `\"hover\"` (explicitly or via Radix `data-state=\"open\"`), the chevron rotates 180° and the hover background activates.\n- **Open-state detection** is dual-resolution: the explicit `interaction` prop takes priority; otherwise the component reads `data-state=\"open\"` injected by Radix triggers (e.g. `Popover.Trigger`).\n- **Chevron rotation** is CSS-driven via `.interactive[data-interaction=\"hover\"] .opal-open-button-chevron { rotate: -180deg }`. The `ChevronIcon` is a stable named component (not an inline function) to preserve React element identity across renders.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `state` | `\"empty\" \\| \"filled\" \\| \"selected\"` | `\"empty\"` | Current value state |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | auto | JS-controlled interaction override. Falls back to Radix `data-state=\"open\"` when omitted. |\n| `icon` | `IconFunctionComponent` | — | Left icon component |\n| `children` | `string` | — | Content between icon and chevron |\n| `foldable` | `boolean` | `false` | When `true`, requires both `icon` and `children`; the left icon stays visible while the label + chevron collapse when not hovered. If `tooltip` is omitted on a disabled foldable button, the label text is used as the tooltip. |\n| `size` | `SizeVariant` | `\"lg\"` | Size preset controlling height, rounding, and padding |\n| `width` | `WidthVariant` | — | Width preset |\n| `tooltip` | `string` | — | Tooltip text shown on hover |\n| `tooltipSide` | `TooltipSide` | `\"top\"` | Which side the tooltip appears on |\n| `disabled` | `boolean` | `false` | Disables the button |\n\n## Usage\n\n```tsx\nimport { OpenButton } from \"@opal/components\";\nimport { SvgFilter } from \"@opal/icons\";\n\n// Basic usage with Radix Popover (auto-detects open state)\n<Popover.Trigger asChild>\n  <OpenButton>Select option</OpenButton>\n</Popover.Trigger>\n\n// Explicit interaction control\n<OpenButton interaction={isExpanded ? \"hover\" : \"rest\"} onClick={toggle}>\n  Advanced settings\n</OpenButton>\n\n// With left icon\n<OpenButton icon={SvgFilter} state=\"filled\">\n  Filters\n</OpenButton>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/open-button/components.tsx",
    "content": "import {\n  Interactive,\n  type InteractiveStatefulProps,\n  type InteractiveStatefulInteraction,\n} from \"@opal/core\";\nimport type {\n  ContainerSizeVariants,\n  ExtremaSizeVariants,\n  RichStr,\n} from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport type { InteractiveContainerRoundingVariant } from \"@opal/core\";\nimport type { TooltipSide } from \"@opal/components\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { cn } from \"@opal/utils\";\nimport { iconWrapper } from \"@opal/components/buttons/icon-wrapper\";\nimport { ChevronIcon } from \"@opal/components/buttons/chevron\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\n/**\n * Content props — a discriminated union on `foldable` that enforces:\n *\n * - `foldable: true`  → `icon` and `children` are required (icon stays visible,\n *                        label + chevron fold away)\n * - `foldable?: false` → at least one of `icon` or `children` must be provided\n */\ntype OpenButtonContentProps =\n  | {\n      foldable: true;\n      icon: IconFunctionComponent;\n      children: string | RichStr;\n    }\n  | {\n      foldable?: false;\n      icon?: IconFunctionComponent;\n      children: string | RichStr;\n    }\n  | {\n      foldable?: false;\n      icon: IconFunctionComponent;\n      children?: string | RichStr;\n    };\n\ntype OpenButtonVariant = \"select-light\" | \"select-heavy\" | \"select-tinted\";\n\ntype OpenButtonProps = Omit<InteractiveStatefulProps, \"variant\"> & {\n  variant?: OpenButtonVariant;\n} & OpenButtonContentProps & {\n    /**\n     * Size preset — controls gap, text size, and Container height/rounding.\n     */\n    size?: ContainerSizeVariants;\n\n    /** Width preset. */\n    width?: ExtremaSizeVariants;\n\n    /**\n     * Content justify mode. When `\"between\"`, icon+label group left and\n     * chevron pushes to the right edge. Default keeps all items in a\n     * tight `gap-1` row.\n     */\n    justifyContent?: \"between\";\n\n    /** Tooltip text shown on hover. */\n    tooltip?: string;\n\n    /** Which side the tooltip appears on. */\n    tooltipSide?: TooltipSide;\n\n    /** Override the default rounding derived from `size`. */\n    roundingVariant?: InteractiveContainerRoundingVariant;\n\n    /** Applies disabled styling and suppresses clicks. */\n    disabled?: boolean;\n  };\n\n// ---------------------------------------------------------------------------\n// OpenButton\n// ---------------------------------------------------------------------------\n\nfunction OpenButton({\n  icon: Icon,\n  children,\n  size = \"lg\",\n  foldable,\n  width,\n  justifyContent,\n  tooltip,\n  tooltipSide = \"top\",\n  roundingVariant: roundingVariantOverride,\n  interaction,\n  variant = \"select-heavy\",\n  disabled,\n  ...statefulProps\n}: OpenButtonProps) {\n  // Derive open state: explicit prop → Radix data-state (injected via Slot chain)\n  const dataState = (statefulProps as Record<string, unknown>)[\"data-state\"] as\n    | string\n    | undefined;\n  const resolvedInteraction: InteractiveStatefulInteraction =\n    interaction ?? (dataState === \"open\" ? \"hover\" : \"rest\");\n\n  const isLarge = size === \"lg\";\n\n  const labelEl = children ? (\n    <Text\n      font={isLarge ? \"main-ui-body\" : \"secondary-body\"}\n      color=\"inherit\"\n      nowrap\n    >\n      {children}\n    </Text>\n  ) : null;\n\n  const button = (\n    <Interactive.Stateful\n      variant={variant}\n      interaction={resolvedInteraction}\n      disabled={disabled}\n      {...statefulProps}\n    >\n      <Interactive.Container\n        type=\"button\"\n        heightVariant={size}\n        widthVariant={width}\n        roundingVariant={\n          roundingVariantOverride ??\n          (isLarge ? \"md\" : size === \"2xs\" ? \"xs\" : \"sm\")\n        }\n      >\n        <div\n          className={cn(\n            \"flex flex-row items-center\",\n            justifyContent === \"between\" ? \"w-full justify-between\" : \"gap-1\",\n            foldable &&\n              justifyContent !== \"between\" &&\n              \"interactive-foldable-host\"\n          )}\n        >\n          {justifyContent === \"between\" ? (\n            <>\n              <span className=\"flex flex-row items-center gap-1\">\n                {iconWrapper(Icon, size, !foldable && !!children)}\n                {labelEl}\n              </span>\n              {iconWrapper(ChevronIcon, size, !!children)}\n            </>\n          ) : foldable ? (\n            <>\n              {iconWrapper(Icon, size, !foldable && !!children)}\n              <Interactive.Foldable>\n                {labelEl}\n                {iconWrapper(ChevronIcon, size, !!children)}\n              </Interactive.Foldable>\n            </>\n          ) : (\n            <>\n              {iconWrapper(Icon, size, !foldable && !!children)}\n              {labelEl}\n              {iconWrapper(ChevronIcon, size, !!children)}\n            </>\n          )}\n        </div>\n      </Interactive.Container>\n    </Interactive.Stateful>\n  );\n\n  const resolvedTooltip =\n    tooltip ?? (foldable && disabled && children ? children : undefined);\n\n  if (!resolvedTooltip) return button;\n\n  return (\n    <TooltipPrimitive.Root>\n      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>\n      <TooltipPrimitive.Portal>\n        <TooltipPrimitive.Content\n          className=\"opal-tooltip\"\n          side={tooltipSide}\n          sideOffset={4}\n        >\n          <Text>{resolvedTooltip}</Text>\n        </TooltipPrimitive.Content>\n      </TooltipPrimitive.Portal>\n    </TooltipPrimitive.Root>\n  );\n}\n\nexport { OpenButton, type OpenButtonProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/select-button/README.md",
    "content": "# SelectButton\n\n**Import:** `import { SelectButton, type SelectButtonProps } from \"@opal/components\";`\n\nA stateful button for togglable selections — the stateful counterpart to `Button`. Built on `Interactive.Stateful` > `Interactive.Container`.\n\n## Relationship to OpenButton\n\nSelectButton and `OpenButton` are structurally near-identical — both share the same call stack:\n\n```\nInteractive.Stateful → Interactive.Container → content row (icon + label + trailing icon)\n```\n\n`OpenButton` is a **tighter, specialized use-case** of SelectButton:\n\n- OpenButton hardcodes `variant=\"select-heavy\"` (SelectButton exposes `variant`)\n- OpenButton adds a built-in chevron with CSS-driven rotation (SelectButton has no chevron)\n- OpenButton auto-detects Radix `data-state=\"open\"` to derive `interaction` (SelectButton has no Radix awareness)\n- OpenButton does not support `rightIcon` (SelectButton does)\n\nBoth components support `foldable` using the same pattern: `interactive-foldable-host` class + `Interactive.Foldable` wrapper around the label and trailing icon. When foldable, the left icon stays visible while the rest collapses. If you change the foldable implementation in one, update the other to match.\n\nUse SelectButton for general-purpose stateful toggles. Use `OpenButton` for popover/dropdown triggers with a chevron.\n\n## Architecture\n\n```\nInteractive.Stateful           <- variant, state, interaction, disabled, onClick\n  └─ Interactive.Container     <- height, rounding, padding (from `size`)\n       └─ div.opal-select-button.interactive-foreground\n            ├─ Icon?           (interactive-foreground-icon)\n            ├─ [Foldable]?     (wraps label + rightIcon when foldable)\n            │    ├─ <span>     .opal-select-button-label\n            │    └─ RightIcon?\n            └─ <span>? / RightIcon?  (non-foldable)\n```\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `variant` | `\"select-light\" \\| \"select-heavy\" \\| \"sidebar\"` | `\"select-heavy\"` | Stateful color variant |\n| `state` | `\"empty\" \\| \"filled\" \\| \"selected\"` | `\"empty\"` | Current value state |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | `\"rest\"` | JS-controlled interaction override |\n| `icon` | `IconFunctionComponent` | — | Left icon |\n| `children` | `string` | — | Label text |\n| `rightIcon` | `IconFunctionComponent` | — | Right icon |\n| `foldable` | `boolean` | `false` | When `true`, label + rightIcon collapse when not hovered |\n| `size` | `SizeVariant` | `\"lg\"` | Size preset |\n| `width` | `WidthVariant` | — | Width preset |\n| `tooltip` | `string` | — | Tooltip text |\n| `tooltipSide` | `TooltipSide` | `\"top\"` | Tooltip placement |\n| `disabled` | `boolean` | `false` | Disables the button |\n\n## Usage\n\n```tsx\nimport { SelectButton } from \"@opal/components\";\nimport { SvgStar } from \"@opal/icons\";\n\n// Basic toggle\n<SelectButton\n  icon={SvgStar}\n  state={isFavorite ? \"selected\" : \"empty\"}\n  onClick={toggleFavorite}\n>\n  Favorite\n</SelectButton>\n\n// Foldable — icon stays visible, label folds away\n<SelectButton\n  foldable\n  icon={SvgStar}\n  state=\"empty\"\n>\n  Favorite\n</SelectButton>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/select-button/components.tsx",
    "content": "\"use client\";\n\nimport \"@opal/components/buttons/select-button/styles.css\";\nimport { Interactive, type InteractiveStatefulProps } from \"@opal/core\";\nimport type {\n  ContainerSizeVariants,\n  ExtremaSizeVariants,\n  RichStr,\n} from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport type { TooltipSide } from \"@opal/components\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { cn } from \"@opal/utils\";\nimport { iconWrapper } from \"@opal/components/buttons/icon-wrapper\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\n/**\n * Content props — a discriminated union on `foldable` that enforces:\n *\n * - `foldable: true`  → `icon` and `children` are required (icon stays visible,\n *                        label + rightIcon fold away)\n * - `foldable?: false` → at least one of `icon` or `children` must be provided\n */\ntype SelectButtonContentProps =\n  | {\n      foldable: true;\n      icon: IconFunctionComponent;\n      children: string | RichStr;\n      rightIcon?: IconFunctionComponent;\n    }\n  | {\n      foldable?: false;\n      icon?: IconFunctionComponent;\n      children: string | RichStr;\n      rightIcon?: IconFunctionComponent;\n    }\n  | {\n      foldable?: false;\n      icon: IconFunctionComponent;\n      children?: string | RichStr;\n      rightIcon?: IconFunctionComponent;\n    };\n\ntype SelectButtonProps = InteractiveStatefulProps &\n  SelectButtonContentProps & {\n    /**\n     * Size preset — controls gap, text size, and Container height/rounding.\n     */\n    size?: ContainerSizeVariants;\n\n    /** Tooltip text shown on hover. */\n    tooltip?: string;\n\n    /** Width preset. `\"fit\"` shrink-wraps, `\"full\"` stretches to parent width. */\n    width?: ExtremaSizeVariants;\n\n    /** Which side the tooltip appears on. */\n    tooltipSide?: TooltipSide;\n\n    /** Applies disabled styling and suppresses clicks. */\n    disabled?: boolean;\n  };\n\n// ---------------------------------------------------------------------------\n// SelectButton\n// ---------------------------------------------------------------------------\n\nfunction SelectButton({\n  icon: Icon,\n  children,\n  rightIcon: RightIcon,\n  size = \"lg\",\n  type = \"button\",\n  foldable,\n  width,\n  tooltip,\n  tooltipSide = \"top\",\n  disabled,\n  ...statefulProps\n}: SelectButtonProps) {\n  const isLarge = size === \"lg\";\n\n  const labelEl = children ? (\n    <Text\n      font={isLarge ? \"main-ui-body\" : \"secondary-body\"}\n      color=\"inherit\"\n      nowrap\n    >\n      {children}\n    </Text>\n  ) : null;\n\n  const button = (\n    <Interactive.Stateful disabled={disabled} {...statefulProps}>\n      <Interactive.Container\n        type={type}\n        heightVariant={size}\n        widthVariant={width}\n        roundingVariant={isLarge ? \"md\" : size === \"2xs\" ? \"xs\" : \"sm\"}\n      >\n        <div\n          className={cn(\n            \"opal-select-button\",\n            foldable && \"interactive-foldable-host\"\n          )}\n        >\n          {iconWrapper(Icon, size, !foldable && !!children)}\n\n          {foldable ? (\n            <Interactive.Foldable>\n              {labelEl}\n              {iconWrapper(RightIcon, size, !!children)}\n            </Interactive.Foldable>\n          ) : (\n            <>\n              {labelEl}\n              {iconWrapper(RightIcon, size, !!children)}\n            </>\n          )}\n        </div>\n      </Interactive.Container>\n    </Interactive.Stateful>\n  );\n\n  const resolvedTooltip =\n    tooltip ?? (foldable && disabled && children ? children : undefined);\n\n  if (!resolvedTooltip) return button;\n\n  return (\n    <TooltipPrimitive.Root>\n      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>\n      <TooltipPrimitive.Portal>\n        <TooltipPrimitive.Content\n          className=\"opal-tooltip\"\n          side={tooltipSide}\n          sideOffset={4}\n        >\n          <Text>{resolvedTooltip}</Text>\n        </TooltipPrimitive.Content>\n      </TooltipPrimitive.Portal>\n    </TooltipPrimitive.Root>\n  );\n}\n\nexport { SelectButton, type SelectButtonProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/select-button/styles.css",
    "content": "/* SelectButton — layout only; colors handled by Interactive.Stateful */\n\n.opal-select-button {\n  @apply flex flex-row items-center gap-1;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/sidebar-tab/README.md",
    "content": "# SidebarTab\n\n**Import:** `import { SidebarTab, type SidebarTabProps } from \"@opal/components\";`\n\nA sidebar navigation tab built on `Interactive.Stateful` > `Interactive.Container`. Designed for admin and app sidebars.\n\n## Architecture\n\n```\ndiv.relative\n  └─ Interactive.Stateful        <- variant (sidebar-heavy | sidebar-light), state, disabled\n       └─ Interactive.Container  <- rounding, height, width\n            ├─ Link?             (absolute overlay for client-side navigation)\n            ├─ rightChildren?    (absolute, above Link for inline actions)\n            └─ ContentAction     (icon + title + truncation spacer)\n```\n\n- **`sidebar-heavy`** (default) — muted when unselected (text-03/text-02), bold when selected (text-04/text-03)\n- **`sidebar-light`** — uniformly muted across all states (text-02/text-02)\n- **Disabled** — both variants use text-02 foreground, transparent background, no hover/active states\n- **Navigation** uses an absolutely positioned `<Link>` overlay rather than `href` on the Interactive element, so `rightChildren` can sit above it with `pointer-events-auto`.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `variant` | `\"sidebar-heavy\" \\| \"sidebar-light\"` | `\"sidebar-heavy\"` | Sidebar color variant |\n| `selected` | `boolean` | `false` | Active/selected state |\n| `icon` | `IconFunctionComponent` | — | Left icon |\n| `children` | `ReactNode` | — | Label text or custom content |\n| `disabled` | `boolean` | `false` | Disables the tab |\n| `folded` | `boolean` | `false` | Collapses label, shows tooltip on hover |\n| `nested` | `boolean` | `false` | Renders spacer instead of icon for indented items |\n| `href` | `string` | — | Client-side navigation URL |\n| `onClick` | `MouseEventHandler` | — | Click handler |\n| `type` | `ButtonType` | — | HTML button type |\n| `rightChildren` | `ReactNode` | — | Actions rendered on the right side |\n\n## Usage\n\n```tsx\nimport { SidebarTab } from \"@opal/components\";\nimport { SvgSettings, SvgLock } from \"@opal/icons\";\n\n// Active tab\n<SidebarTab icon={SvgSettings} href=\"/admin/settings\" selected>\n  Settings\n</SidebarTab>\n\n// Muted variant\n<SidebarTab icon={SvgSettings} variant=\"sidebar-light\">\n  Exit Admin Panel\n</SidebarTab>\n\n// Disabled enterprise-only tab\n<SidebarTab icon={SvgLock} disabled>\n  Groups\n</SidebarTab>\n\n// Folded sidebar (icon only, tooltip on hover)\n<SidebarTab icon={SvgSettings} href=\"/admin/settings\" folded>\n  Settings\n</SidebarTab>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/sidebar-tab/SidebarTab.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { SidebarTab } from \"@opal/components/buttons/sidebar-tab/components\";\nimport {\n  SvgSettings,\n  SvgUsers,\n  SvgLock,\n  SvgArrowUpCircle,\n  SvgTrash,\n} from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof SidebarTab> = {\n  title: \"opal/components/SidebarTab\",\n  component: SidebarTab,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 260, background: \"var(--background-neutral-01)\" }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SidebarTab>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n  },\n};\n\nexport const Selected: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n    selected: true,\n  },\n};\n\nexport const Light: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n    variant: \"sidebar-light\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    icon: SvgLock,\n    children: \"Enterprise Only\",\n    disabled: true,\n  },\n};\n\nexport const WithRightChildren: Story = {\n  args: {\n    icon: SvgUsers,\n    children: \"Users\",\n    rightChildren: (\n      <Button\n        icon={SvgTrash}\n        size=\"xs\"\n        prominence=\"tertiary\"\n        variant=\"danger\"\n      />\n    ),\n  },\n};\n\nexport const SidebarExample: Story = {\n  render: () => (\n    <div className=\"flex flex-col\">\n      <SidebarTab icon={SvgSettings} selected>\n        LLM Models\n      </SidebarTab>\n      <SidebarTab icon={SvgSettings}>Web Search</SidebarTab>\n      <SidebarTab icon={SvgUsers}>Users</SidebarTab>\n      <SidebarTab icon={SvgLock} disabled>\n        Groups\n      </SidebarTab>\n      <SidebarTab icon={SvgLock} disabled>\n        SCIM\n      </SidebarTab>\n      <SidebarTab icon={SvgArrowUpCircle}>Upgrade Plan</SidebarTab>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/buttons/sidebar-tab/components.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport type { ButtonType, IconFunctionComponent } from \"@opal/types\";\nimport type { Route } from \"next\";\nimport { Interactive, type InteractiveStatefulVariant } from \"@opal/core\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Text } from \"@opal/components\";\nimport Link from \"next/link\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport \"@opal/components/tooltip.css\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface SidebarTabProps {\n  /** Collapses the label, showing only the icon. */\n  folded?: boolean;\n\n  /** Marks this tab as the currently active/selected item. */\n  selected?: boolean;\n\n  /**\n   * Sidebar color variant.\n   * @default \"sidebar-heavy\"\n   */\n  variant?: Extract<\n    InteractiveStatefulVariant,\n    \"sidebar-light\" | \"sidebar-heavy\"\n  >;\n\n  /** Renders an empty spacer in place of the icon for nested items. */\n  nested?: boolean;\n\n  /** Disables the tab — applies muted colors and suppresses clicks. */\n  disabled?: boolean;\n\n  onClick?: React.MouseEventHandler<HTMLElement>;\n  href?: string;\n  type?: ButtonType;\n  icon?: IconFunctionComponent;\n  children?: React.ReactNode;\n\n  /** Content rendered on the right side (e.g. action buttons). */\n  rightChildren?: React.ReactNode;\n}\n\n// ---------------------------------------------------------------------------\n// SidebarTab\n// ---------------------------------------------------------------------------\n\n/**\n * Sidebar navigation tab built on `Interactive.Stateful` > `Interactive.Container`.\n *\n * Uses `sidebar-heavy` (default) or `sidebar-light` (via `variant`) variants\n * for color styling. Supports an overlay `Link` for client-side navigation,\n * `rightChildren` for inline actions, and folded mode with an auto-tooltip.\n */\nfunction SidebarTab({\n  folded,\n  selected,\n  variant = \"sidebar-heavy\",\n  nested,\n  disabled,\n\n  onClick,\n  href,\n  type,\n  icon,\n  rightChildren,\n  children,\n}: SidebarTabProps) {\n  const Icon =\n    icon ??\n    (nested\n      ? ((() => (\n          <div className=\"w-6\" aria-hidden=\"true\" />\n        )) as IconFunctionComponent)\n      : null);\n\n  // The `rightChildren` node is absolutely positioned to sit on top of the\n  // overlay Link. A zero-width spacer reserves truncation space for the title.\n  const truncationSpacer = rightChildren && (\n    <div className=\"w-0 group-hover/SidebarTab:w-6\" />\n  );\n\n  const content = (\n    <div className=\"relative\">\n      <Interactive.Stateful\n        variant={variant}\n        state={selected ? \"selected\" : \"empty\"}\n        disabled={disabled}\n        onClick={onClick}\n        type=\"button\"\n        group=\"group/SidebarTab\"\n      >\n        <Interactive.Container\n          roundingVariant=\"sm\"\n          heightVariant=\"lg\"\n          widthVariant=\"full\"\n          type={type}\n        >\n          {href && !disabled && (\n            <Link\n              href={href as Route}\n              scroll={false}\n              className=\"absolute z-[99] inset-0 rounded-08\"\n              tabIndex={-1}\n            />\n          )}\n\n          {!folded && rightChildren && (\n            <div className=\"absolute z-[100] right-1.5 top-0 bottom-0 flex flex-col justify-center items-center pointer-events-auto\">\n              {rightChildren}\n            </div>\n          )}\n\n          {typeof children === \"string\" ? (\n            <ContentAction\n              icon={Icon ?? undefined}\n              title={folded ? \"\" : children}\n              sizePreset=\"main-ui\"\n              variant=\"body\"\n              widthVariant=\"full\"\n              paddingVariant=\"fit\"\n              rightChildren={truncationSpacer}\n            />\n          ) : (\n            <div className=\"flex flex-row items-center gap-2 flex-1\">\n              {Icon && (\n                <div className=\"flex items-center justify-center p-0.5\">\n                  <Icon className=\"h-[1rem] w-[1rem] text-text-03\" />\n                </div>\n              )}\n              {children}\n              {truncationSpacer}\n            </div>\n          )}\n        </Interactive.Container>\n      </Interactive.Stateful>\n    </div>\n  );\n\n  if (typeof children !== \"string\") return content;\n  if (folded) {\n    return (\n      <TooltipPrimitive.Root>\n        <TooltipPrimitive.Trigger asChild>{content}</TooltipPrimitive.Trigger>\n        <TooltipPrimitive.Portal>\n          <TooltipPrimitive.Content\n            className=\"opal-tooltip\"\n            side=\"right\"\n            sideOffset={4}\n          >\n            <Text>{children}</Text>\n          </TooltipPrimitive.Content>\n        </TooltipPrimitive.Portal>\n      </TooltipPrimitive.Root>\n    );\n  }\n  return content;\n}\n\nexport { SidebarTab, type SidebarTabProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/card/Card.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Card } from \"@opal/components\";\n\nconst BACKGROUND_VARIANTS = [\"none\", \"light\", \"heavy\"] as const;\nconst BORDER_VARIANTS = [\"none\", \"dashed\", \"solid\"] as const;\nconst PADDING_VARIANTS = [\"fit\", \"2xs\", \"xs\", \"sm\", \"md\", \"lg\"] as const;\nconst ROUNDING_VARIANTS = [\"xs\", \"sm\", \"md\", \"lg\"] as const;\n\nconst meta: Meta<typeof Card> = {\n  title: \"opal/components/Card\",\n  component: Card,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Card>;\n\nexport const Default: Story = {\n  render: () => (\n    <Card>\n      <p>\n        Default card with light background, no border, sm padding, md rounding.\n      </p>\n    </Card>\n  ),\n};\n\nexport const BackgroundVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {BACKGROUND_VARIANTS.map((bg) => (\n        <Card key={bg} background={bg} border=\"solid\">\n          <p>backgroundVariant: {bg}</p>\n        </Card>\n      ))}\n    </div>\n  ),\n};\n\nexport const BorderVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {BORDER_VARIANTS.map((border) => (\n        <Card key={border} border={border}>\n          <p>borderVariant: {border}</p>\n        </Card>\n      ))}\n    </div>\n  ),\n};\n\nexport const PaddingVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {PADDING_VARIANTS.map((padding) => (\n        <Card key={padding} padding={padding} border=\"solid\">\n          <p>paddingVariant: {padding}</p>\n        </Card>\n      ))}\n    </div>\n  ),\n};\n\nexport const RoundingVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {ROUNDING_VARIANTS.map((rounding) => (\n        <Card key={rounding} rounding={rounding} border=\"solid\">\n          <p>roundingVariant: {rounding}</p>\n        </Card>\n      ))}\n    </div>\n  ),\n};\n\nexport const AllCombinations: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-8\">\n      {PADDING_VARIANTS.map((padding) => (\n        <div key={padding}>\n          <p className=\"font-bold pb-2\">paddingVariant: {padding}</p>\n          <div className=\"grid grid-cols-3 gap-4\">\n            {BACKGROUND_VARIANTS.map((bg) =>\n              BORDER_VARIANTS.map((border) => (\n                <Card\n                  key={`${padding}-${bg}-${border}`}\n                  padding={padding}\n                  background={bg}\n                  border={border}\n                >\n                  <p className=\"text-xs\">\n                    bg: {bg}, border: {border}\n                  </p>\n                </Card>\n              ))\n            )}\n          </div>\n        </div>\n      ))}\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/card/README.md",
    "content": "# Card\n\n**Import:** `import { Card, type CardProps } from \"@opal/components\";`\n\nA plain container component with configurable background, border, padding, and rounding. Uses a simple `<div>` internally with `overflow-clip`.\n\n## Architecture\n\nPadding and rounding are controlled independently:\n\n| `padding` | Class   |\n|-----------|---------|\n| `\"lg\"`    | `p-6`   |\n| `\"md\"`    | `p-4`   |\n| `\"sm\"`    | `p-2`   |\n| `\"xs\"`    | `p-1`   |\n| `\"2xs\"`   | `p-0.5` |\n| `\"fit\"`   | `p-0`   |\n\n| `rounding` | Class        |\n|------------|--------------|\n| `\"xs\"`     | `rounded-04` |\n| `\"sm\"`     | `rounded-08` |\n| `\"md\"`     | `rounded-12` |\n| `\"lg\"`     | `rounded-16` |\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `padding` | `PaddingVariants` | `\"sm\"` | Padding preset |\n| `rounding` | `RoundingVariants` | `\"md\"` | Border-radius preset |\n| `background` | `\"none\" \\| \"light\" \\| \"heavy\"` | `\"light\"` | Background fill intensity |\n| `border` | `\"none\" \\| \"dashed\" \\| \"solid\"` | `\"none\"` | Border style |\n| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |\n| `children` | `React.ReactNode` | — | Card content |\n\n## Usage\n\n```tsx\nimport { Card } from \"@opal/components\";\n\n// Default card (light background, no border, sm padding, md rounding)\n<Card>\n  <h2>Card Title</h2>\n  <p>Card content</p>\n</Card>\n\n// Large padding + rounding with solid border\n<Card padding=\"lg\" rounding=\"lg\" border=\"solid\">\n  <p>Spacious card</p>\n</Card>\n\n// Compact card with solid border\n<Card padding=\"xs\" rounding=\"sm\" border=\"solid\">\n  <p>Compact card</p>\n</Card>\n\n// Empty state card\n<Card background=\"none\" border=\"dashed\">\n  <p>No items yet</p>\n</Card>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/card/components.tsx",
    "content": "import \"@opal/components/cards/card/styles.css\";\nimport type { PaddingVariants, RoundingVariants } from \"@opal/types\";\nimport { cardPaddingVariants, cardRoundingVariants } from \"@opal/shared\";\nimport { cn } from \"@opal/utils\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype BackgroundVariant = \"none\" | \"light\" | \"heavy\";\ntype BorderVariant = \"none\" | \"dashed\" | \"solid\";\n\ntype CardProps = {\n  /**\n   * Padding preset.\n   *\n   * | Value   | Class   |\n   * |---------|---------|\n   * | `\"lg\"`  | `p-6`   |\n   * | `\"md\"`  | `p-4`   |\n   * | `\"sm\"`  | `p-2`   |\n   * | `\"xs\"`  | `p-1`   |\n   * | `\"2xs\"` | `p-0.5` |\n   * | `\"fit\"` | `p-0`   |\n   *\n   * @default \"md\"\n   */\n  padding?: PaddingVariants;\n\n  /**\n   * Border-radius preset.\n   *\n   * | Value  | Class        |\n   * |--------|--------------|\n   * | `\"xs\"` | `rounded-04` |\n   * | `\"sm\"` | `rounded-08` |\n   * | `\"md\"` | `rounded-12` |\n   * | `\"lg\"` | `rounded-16` |\n   *\n   * @default \"md\"\n   */\n  rounding?: RoundingVariants;\n\n  /**\n   * Background fill intensity.\n   * - `\"none\"`: transparent background.\n   * - `\"light\"`: subtle tinted background (`bg-background-tint-00`).\n   * - `\"heavy\"`: stronger tinted background (`bg-background-tint-01`).\n   *\n   * @default \"light\"\n   */\n  background?: BackgroundVariant;\n\n  /**\n   * Border style.\n   * - `\"none\"`: no border.\n   * - `\"dashed\"`: dashed border.\n   * - `\"solid\"`: solid border.\n   *\n   * @default \"none\"\n   */\n  border?: BorderVariant;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n\n  children?: React.ReactNode;\n};\n\n// ---------------------------------------------------------------------------\n// Card\n// ---------------------------------------------------------------------------\n\nfunction Card({\n  padding: paddingProp = \"md\",\n  rounding: roundingProp = \"md\",\n  background = \"light\",\n  border = \"none\",\n  ref,\n  children,\n}: CardProps) {\n  const padding = cardPaddingVariants[paddingProp];\n  const rounding = cardRoundingVariants[roundingProp];\n\n  return (\n    <div\n      ref={ref}\n      className={cn(\"opal-card\", padding, rounding)}\n      data-background={background}\n      data-border={border}\n    >\n      {children}\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { Card, type CardProps, type BackgroundVariant, type BorderVariant };\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/card/styles.css",
    "content": ".opal-card {\n  @apply w-full overflow-clip;\n}\n\n/* Background variants */\n.opal-card[data-background=\"none\"] {\n  @apply bg-transparent;\n}\n\n.opal-card[data-background=\"light\"] {\n  @apply bg-background-tint-00;\n}\n\n.opal-card[data-background=\"heavy\"] {\n  @apply bg-background-tint-01;\n}\n\n/* Border variants */\n.opal-card[data-border=\"none\"] {\n  border: none;\n}\n\n.opal-card[data-border=\"dashed\"] {\n  @apply border border-dashed;\n}\n\n.opal-card[data-border=\"solid\"] {\n  @apply border;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/empty-message-card/EmptyMessageCard.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { EmptyMessageCard } from \"@opal/components\";\nimport { SvgSparkle, SvgUsers } from \"@opal/icons\";\n\nconst PADDING_VARIANTS = [\"fit\", \"2xs\", \"xs\", \"sm\", \"md\", \"lg\"] as const;\n\nconst meta: Meta<typeof EmptyMessageCard> = {\n  title: \"opal/components/EmptyMessageCard\",\n  component: EmptyMessageCard,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof EmptyMessageCard>;\n\nexport const Default: Story = {\n  args: {\n    title: \"No items available.\",\n  },\n};\n\nexport const WithCustomIcon: Story = {\n  args: {\n    icon: SvgSparkle,\n    title: \"No agents selected.\",\n  },\n};\n\nexport const PaddingVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {PADDING_VARIANTS.map((padding) => (\n        <EmptyMessageCard\n          key={padding}\n          padding={padding}\n          title={`padding: ${padding}`}\n        />\n      ))}\n    </div>\n  ),\n};\n\nexport const Multiple: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      <EmptyMessageCard title=\"No models available.\" />\n      <EmptyMessageCard icon={SvgSparkle} title=\"No agents selected.\" />\n      <EmptyMessageCard icon={SvgUsers} title=\"No groups added.\" />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/empty-message-card/README.md",
    "content": "# EmptyMessageCard\n\n**Import:** `import { EmptyMessageCard, type EmptyMessageCardProps } from \"@opal/components\";`\n\nA pre-configured Card for empty states. Renders a transparent card with a dashed border containing a muted icon and message text using the `Content` layout.\n\n## Props\n\n| Prop      | Type                        | Default    | Description                      |\n| --------- | --------------------------- | ---------- | -------------------------------- |\n| `icon`    | `IconFunctionComponent`     | `SvgEmpty` | Icon displayed alongside the title |\n| `title`   | `string`                    | —          | Primary message text (required)  |\n| `padding` | `PaddingVariants`           | `\"sm\"`     | Padding preset for the card      |\n| `ref`     | `React.Ref<HTMLDivElement>` | —          | Ref forwarded to the root div    |\n\n## Usage\n\n```tsx\nimport { EmptyMessageCard } from \"@opal/components\";\nimport { SvgSparkle, SvgFileText } from \"@opal/icons\";\n\n// Default empty state\n<EmptyMessageCard title=\"No items yet.\" />\n\n// With custom icon\n<EmptyMessageCard icon={SvgSparkle} title=\"No agents selected.\" />\n\n// With custom padding\n<EmptyMessageCard padding=\"xs\" icon={SvgFileText} title=\"No documents available.\" />\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/empty-message-card/components.tsx",
    "content": "import { Card } from \"@opal/components/cards/card/components\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgEmpty } from \"@opal/icons\";\nimport type { IconFunctionComponent, PaddingVariants } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype EmptyMessageCardProps = {\n  /** Icon displayed alongside the title. */\n  icon?: IconFunctionComponent;\n\n  /** Primary message text. */\n  title: string;\n\n  /** Padding preset for the card. @default \"md\" */\n  padding?: PaddingVariants;\n\n  /** Ref forwarded to the root Card div. */\n  ref?: React.Ref<HTMLDivElement>;\n};\n\n// ---------------------------------------------------------------------------\n// EmptyMessageCard\n// ---------------------------------------------------------------------------\n\nfunction EmptyMessageCard({\n  icon = SvgEmpty,\n  title,\n  padding = \"md\",\n  ref,\n}: EmptyMessageCardProps) {\n  return (\n    <Card\n      ref={ref}\n      background=\"none\"\n      border=\"dashed\"\n      padding={padding}\n      rounding=\"md\"\n    >\n      <Content\n        icon={icon}\n        title={title}\n        sizePreset=\"secondary\"\n        variant=\"body\"\n        prominence=\"muted\"\n      />\n    </Card>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { EmptyMessageCard, type EmptyMessageCardProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/select-card/README.md",
    "content": "# SelectCard\n\n**Import:** `import { SelectCard, type SelectCardProps } from \"@opal/components\";`\n\nA stateful interactive card — the card counterpart to [`SelectButton`](../../buttons/select-button/README.md). Built on `Interactive.Stateful` (Slot) with a structural `<div>` that owns padding, rounding, border, and overflow. Always uses the `select-card` Interactive.Stateful variant internally.\n\n## Relationship to Card\n\n`Card` is a plain, non-interactive container. `SelectCard` adds stateful interactivity (hover, active, disabled, state-driven colors) by wrapping its root div with `Interactive.Stateful`. Both share the same independent `padding` / `rounding` API.\n\n## Relationship to SelectButton\n\nSelectCard and SelectButton share the same call stack:\n\n```\nInteractive.Stateful → structural element → content\n```\n\nThe key differences:\n\n- SelectCard renders a `<div>` (not `Interactive.Container`) — cards have their own rounding scale and don't need Container's height/min-width.\n- SelectCard has no `foldable` prop — use `Interactive.Foldable` directly inside children.\n- SelectCard's children are fully composable — use `CardHeaderLayout`, `ContentAction`, `Content`, buttons, etc. inside.\n\n## Architecture\n\n```\nInteractive.Stateful (variant=\"select-card\")  <- state, interaction, disabled, onClick\n  └─ div.opal-select-card                    <- padding, rounding, border, overflow\n       └─ children (composable)\n```\n\nThe `Interactive.Stateful` Slot merges onto the div, producing a single DOM element with both `.opal-select-card` and `.interactive` classes plus `data-interactive-*` attributes. This activates the Stateful color matrix for backgrounds and `--interactive-foreground` / `--interactive-foreground-icon` CSS properties for descendants.\n\n## Props\n\nInherits **all** props from `InteractiveStatefulProps` (except `variant`, which is hardcoded to `select-card`) plus:\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `padding` | `PaddingVariants` | `\"sm\"` | Padding preset |\n| `rounding` | `RoundingVariants` | `\"lg\"` | Border-radius preset |\n| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |\n| `children` | `React.ReactNode` | — | Card content |\n\n### Padding scale\n\n| `padding` | Class   |\n|-----------|---------|\n| `\"lg\"`    | `p-6`   |\n| `\"md\"`    | `p-4`   |\n| `\"sm\"`    | `p-2`   |\n| `\"xs\"`    | `p-1`   |\n| `\"2xs\"`   | `p-0.5` |\n| `\"fit\"`   | `p-0`   |\n\n### Rounding scale\n\n| `rounding` | Class        |\n|------------|--------------|\n| `\"xs\"`     | `rounded-04` |\n| `\"sm\"`     | `rounded-08` |\n| `\"md\"`     | `rounded-12` |\n| `\"lg\"`     | `rounded-16` |\n\n### State colors (`select-card` variant)\n\n| State | Rest background | Rest foreground |\n|---|---|---|\n| `empty` | transparent | `text-04` / icon `text-03` |\n| `filled` | `background-tint-00` | `text-04` / icon `text-03` |\n| `selected` | `action-link-01` | `action-link-05` |\n\nThe selected state also gets a `border-action-link-05` via SelectCard's CSS.\n\n## CSS\n\nSelectCard's stylesheet (`styles.css`) provides:\n\n- `w-full overflow-clip border` on all states\n- `border-action-link-05` when `data-interactive-state=\"selected\"`\n\nAll background and foreground colors come from the Interactive.Stateful CSS, not from SelectCard.\n\n## Usage\n\n### Provider selection card\n\n```tsx\nimport { SelectCard } from \"@opal/components\";\nimport { CardHeaderLayout } from \"@opal/layouts\";\n\n<SelectCard state=\"selected\" onClick={handleClick}>\n  <CardHeaderLayout\n    icon={SvgGlobe}\n    title=\"Google\"\n    description=\"Search engine\"\n    sizePreset=\"main-ui\"\n    variant=\"section\"\n    rightChildren={<Button icon={SvgCheckSquare} variant=\"action\" prominence=\"tertiary\">Current Default</Button>}\n    bottomRightChildren={\n      <Button icon={SvgSettings} size=\"sm\" prominence=\"tertiary\" />\n    }\n  />\n</SelectCard>\n```\n\n### Disconnected state (clickable)\n\n```tsx\n<SelectCard state=\"empty\" onClick={handleConnect}>\n  <CardHeaderLayout\n    icon={SvgCloud}\n    title=\"OpenAI\"\n    description=\"Not configured\"\n    sizePreset=\"main-ui\"\n    variant=\"section\"\n    rightChildren={<Button rightIcon={SvgArrowExchange} prominence=\"tertiary\">Connect</Button>}\n  />\n</SelectCard>\n```\n\n### With foldable hover-reveal\n\n```tsx\n<SelectCard state=\"filled\">\n  <CardHeaderLayout\n    icon={SvgCloud}\n    title=\"OpenAI\"\n    description=\"Connected\"\n    sizePreset=\"main-ui\"\n    variant=\"section\"\n    rightChildren={\n      <div className=\"interactive-foldable-host flex items-center\">\n        <Interactive.Foldable>\n          <Button rightIcon={SvgArrowRightCircle} prominence=\"tertiary\">\n            Set as Default\n          </Button>\n        </Interactive.Foldable>\n      </div>\n    }\n  />\n</SelectCard>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/select-card/SelectCard.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { SelectCard } from \"@opal/components\";\nimport { Button } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport {\n  SvgArrowExchange,\n  SvgArrowRightCircle,\n  SvgCheckSquare,\n  SvgGlobe,\n  SvgSettings,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport { Interactive } from \"@opal/core\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport type { Decorator } from \"@storybook/react\";\n\nconst withTooltipProvider: Decorator = (Story) => (\n  <TooltipPrimitive.Provider>\n    <Story />\n  </TooltipPrimitive.Provider>\n);\n\nconst STATES = [\"empty\", \"filled\", \"selected\"] as const;\nconst PADDING_VARIANTS = [\"fit\", \"2xs\", \"xs\", \"sm\", \"md\", \"lg\"] as const;\nconst ROUNDING_VARIANTS = [\"xs\", \"sm\", \"md\", \"lg\"] as const;\n\nconst meta = {\n  title: \"opal/components/SelectCard\",\n  component: SelectCard,\n  tags: [\"autodocs\"],\n  decorators: [withTooltipProvider],\n  parameters: {\n    layout: \"centered\",\n  },\n} satisfies Meta<typeof SelectCard>;\n\nexport default meta;\n\ntype Story = StoryObj<typeof meta>;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\nexport const Default: Story = {\n  render: () => (\n    <div className=\"w-96\">\n      <SelectCard state=\"empty\">\n        <div className=\"p-2\">\n          <Content\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            icon={SvgGlobe}\n            title=\"Google Search\"\n            description=\"Web search provider\"\n          />\n        </div>\n      </SelectCard>\n    </div>\n  ),\n};\n\nexport const AllStates: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {STATES.map((state) => (\n        <SelectCard key={state} state={state}>\n          <div className=\"p-2\">\n            <Content\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n              icon={SvgGlobe}\n              title={`State: ${state}`}\n              description=\"Hover to see interaction states.\"\n            />\n          </div>\n        </SelectCard>\n      ))}\n    </div>\n  ),\n};\n\nexport const Clickable: Story = {\n  render: () => (\n    <div className=\"w-96\">\n      <SelectCard state=\"empty\" onClick={() => alert(\"Card clicked\")}>\n        <div className=\"p-2\">\n          <Content\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            icon={SvgGlobe}\n            title=\"Clickable Card\"\n            description=\"Click anywhere on this card.\"\n          />\n        </div>\n      </SelectCard>\n    </div>\n  ),\n};\n\nexport const WithActions: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-[28rem]\">\n      {/* Disconnected */}\n      <SelectCard state=\"empty\" onClick={() => {}}>\n        <div className=\"flex flex-row items-stretch w-full\">\n          <div className=\"flex-1 p-2\">\n            <Content\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n              icon={SvgGlobe}\n              title=\"Disconnected\"\n              description=\"Click to connect.\"\n            />\n          </div>\n          <div className=\"flex items-center\">\n            <Button prominence=\"tertiary\" rightIcon={SvgArrowExchange}>\n              Connect\n            </Button>\n          </div>\n        </div>\n      </SelectCard>\n\n      {/* Connected with foldable */}\n      <SelectCard state=\"filled\">\n        <div className=\"flex flex-row items-stretch w-full\">\n          <div className=\"flex-1 p-2\">\n            <Content\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n              icon={SvgGlobe}\n              title=\"Connected\"\n              description=\"Hover to reveal Set as Default.\"\n            />\n          </div>\n          <div className=\"flex flex-col items-end justify-between\">\n            <div className=\"interactive-foldable-host flex items-center\">\n              <Interactive.Foldable>\n                <Button prominence=\"tertiary\" rightIcon={SvgArrowRightCircle}>\n                  Set as Default\n                </Button>\n              </Interactive.Foldable>\n            </div>\n            <div className=\"flex flex-row px-1 pb-1\">\n              <Button\n                icon={SvgUnplug}\n                tooltip=\"Disconnect\"\n                prominence=\"tertiary\"\n                size=\"sm\"\n              />\n              <Button\n                icon={SvgSettings}\n                tooltip=\"Edit\"\n                prominence=\"tertiary\"\n                size=\"sm\"\n              />\n            </div>\n          </div>\n        </div>\n      </SelectCard>\n\n      {/* Selected */}\n      <SelectCard state=\"selected\">\n        <div className=\"flex flex-row items-stretch w-full\">\n          <div className=\"flex-1 p-2\">\n            <Content\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n              icon={SvgGlobe}\n              title=\"Selected\"\n              description=\"Currently the default provider.\"\n            />\n          </div>\n          <div className=\"flex flex-col items-end justify-between\">\n            <Button\n              variant=\"action\"\n              prominence=\"tertiary\"\n              icon={SvgCheckSquare}\n            >\n              Current Default\n            </Button>\n            <div className=\"flex flex-row px-1 pb-1\">\n              <Button\n                icon={SvgUnplug}\n                tooltip=\"Disconnect\"\n                prominence=\"tertiary\"\n                size=\"sm\"\n              />\n              <Button\n                icon={SvgSettings}\n                tooltip=\"Edit\"\n                prominence=\"tertiary\"\n                size=\"sm\"\n              />\n            </div>\n          </div>\n        </div>\n      </SelectCard>\n    </div>\n  ),\n};\n\nexport const PaddingVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {PADDING_VARIANTS.map((padding) => (\n        <SelectCard key={padding} state=\"filled\" padding={padding}>\n          <Content\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            icon={SvgGlobe}\n            title={`paddingVariant: ${padding}`}\n            description=\"Shows padding differences.\"\n          />\n        </SelectCard>\n      ))}\n    </div>\n  ),\n};\n\nexport const RoundingVariants: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 w-96\">\n      {ROUNDING_VARIANTS.map((rounding) => (\n        <SelectCard key={rounding} state=\"filled\" rounding={rounding}>\n          <Content\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            icon={SvgGlobe}\n            title={`roundingVariant: ${rounding}`}\n            description=\"Shows rounding differences.\"\n          />\n        </SelectCard>\n      ))}\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/select-card/components.tsx",
    "content": "import \"@opal/components/cards/select-card/styles.css\";\nimport type { PaddingVariants, RoundingVariants } from \"@opal/types\";\nimport { cardPaddingVariants, cardRoundingVariants } from \"@opal/shared\";\nimport { cn } from \"@opal/utils\";\nimport { Interactive, type InteractiveStatefulProps } from \"@opal/core\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype SelectCardProps = Omit<InteractiveStatefulProps, \"variant\"> & {\n  /**\n   * Padding preset.\n   *\n   * | Value   | Class   |\n   * |---------|---------|\n   * | `\"lg\"`  | `p-6`   |\n   * | `\"md\"`  | `p-4`   |\n   * | `\"sm\"`  | `p-2`   |\n   * | `\"xs\"`  | `p-1`   |\n   * | `\"2xs\"` | `p-0.5` |\n   * | `\"fit\"` | `p-0`   |\n   *\n   * @default \"md\"\n   */\n  padding?: PaddingVariants;\n\n  /**\n   * Border-radius preset.\n   *\n   * | Value  | Class        |\n   * |--------|--------------|\n   * | `\"xs\"` | `rounded-04` |\n   * | `\"sm\"` | `rounded-08` |\n   * | `\"md\"` | `rounded-12` |\n   * | `\"lg\"` | `rounded-16` |\n   *\n   * @default \"md\"\n   */\n  rounding?: RoundingVariants;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n\n  children?: React.ReactNode;\n};\n\n// ---------------------------------------------------------------------------\n// SelectCard\n// ---------------------------------------------------------------------------\n\n/**\n * A stateful interactive card — the card counterpart to `SelectButton`.\n *\n * Built on `Interactive.Stateful` (Slot) → a structural `<div>`. The\n * Stateful system owns background and foreground colors; the card owns\n * padding, rounding, border, and overflow.\n *\n * Children are fully composable — use `ContentAction`, `Content`, buttons,\n * `Interactive.Foldable`, etc. inside.\n *\n * @example\n * ```tsx\n * <SelectCard state=\"selected\" onClick={handleClick}>\n *   <ContentAction\n *     icon={SvgGlobe}\n *     title=\"Google\"\n *     description=\"Search engine\"\n *     rightChildren={<Button>Set as Default</Button>}\n *   />\n * </SelectCard>\n * ```\n */\nfunction SelectCard({\n  padding: paddingProp = \"md\",\n  rounding: roundingProp = \"md\",\n  ref,\n  children,\n  ...statefulProps\n}: SelectCardProps) {\n  const padding = cardPaddingVariants[paddingProp];\n  const rounding = cardRoundingVariants[roundingProp];\n\n  return (\n    <Interactive.Stateful {...statefulProps} variant=\"select-card\">\n      <div ref={ref} className={cn(\"opal-select-card\", padding, rounding)}>\n        {children}\n      </div>\n    </Interactive.Stateful>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { SelectCard, type SelectCardProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/cards/select-card/styles.css",
    "content": "/* SelectCard — structural styles; colors handled by Interactive.Stateful */\n\n.opal-select-card {\n  @apply w-full overflow-clip border;\n}\n\n.opal-select-card[data-interactive-state=\"selected\"] {\n  @apply border-action-link-05;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/index.ts",
    "content": "import \"@opal/components/tooltip.css\";\n\n/* Shared types */\nexport type TooltipSide = \"top\" | \"bottom\" | \"left\" | \"right\";\n\n/* Button */\nexport {\n  Button,\n  type ButtonProps,\n} from \"@opal/components/buttons/button/components\";\n\n/* SelectButton */\nexport {\n  SelectButton,\n  type SelectButtonProps,\n} from \"@opal/components/buttons/select-button/components\";\n\n/* OpenButton */\nexport {\n  OpenButton,\n  type OpenButtonProps,\n} from \"@opal/components/buttons/open-button/components\";\n\n/* FilterButton */\nexport {\n  FilterButton,\n  type FilterButtonProps,\n} from \"@opal/components/buttons/filter-button/components\";\n\n/* LineItemButton */\nexport {\n  LineItemButton,\n  type LineItemButtonProps,\n} from \"@opal/components/buttons/line-item-button/components\";\n\n/* SidebarTab */\nexport {\n  SidebarTab,\n  type SidebarTabProps,\n} from \"@opal/components/buttons/sidebar-tab/components\";\n\n/* Text */\nexport {\n  Text,\n  type TextProps,\n  type TextFont,\n  type TextColor,\n} from \"@opal/components/text/components\";\n\n/* Tag */\nexport {\n  Tag,\n  type TagProps,\n  type TagColor,\n} from \"@opal/components/tag/components\";\n\n/* Card */\nexport {\n  Card,\n  type CardProps,\n  type BackgroundVariant,\n  type BorderVariant,\n} from \"@opal/components/cards/card/components\";\n\n/* SelectCard */\nexport {\n  SelectCard,\n  type SelectCardProps,\n} from \"@opal/components/cards/select-card/components\";\n\n/* EmptyMessageCard */\nexport {\n  EmptyMessageCard,\n  type EmptyMessageCardProps,\n} from \"@opal/components/cards/empty-message-card/components\";\n\n/* Pagination */\nexport {\n  Pagination,\n  type PaginationProps,\n  type PaginationSize,\n} from \"@opal/components/pagination/components\";\n\n/* Table */\nexport { Table } from \"@opal/components/table/components\";\nexport { createTableColumns } from \"@opal/components/table/columns\";\nexport type { DataTableProps } from \"@opal/components/table/components\";\n"
  },
  {
    "path": "web/lib/opal/src/components/pagination/Pagination.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Pagination } from \"@opal/components\";\nimport { useState } from \"react\";\n\nconst meta: Meta<typeof Pagination> = {\n  title: \"opal/components/Pagination\",\n  component: Pagination,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Pagination>;\n\n// ===========================================================================\n// variant=\"simple\"\n// ===========================================================================\n\nexport const Simple: Story = {\n  args: {\n    variant: \"simple\",\n    currentPage: 3,\n    totalPages: 10,\n  },\n};\n\nexport const SimpleSmall: Story = {\n  args: {\n    variant: \"simple\",\n    currentPage: 2,\n    totalPages: 8,\n    size: \"sm\",\n  },\n};\n\nexport const SimpleWithUnits: Story = {\n  args: {\n    variant: \"simple\",\n    currentPage: 1,\n    totalPages: 5,\n    units: \"pages\",\n  },\n};\n\nexport const SimpleArrowsOnly: Story = {\n  args: {\n    variant: \"simple\",\n    currentPage: 2,\n    totalPages: 8,\n    hidePages: true,\n  },\n};\n\nexport const SimpleAllSizes: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 items-start\">\n      {([\"lg\", \"md\", \"sm\"] as const).map((size) => (\n        <div key={size} className=\"flex flex-col gap-1\">\n          <span className=\"font-secondary-body text-text-03\">\n            size=&quot;{size}&quot;\n          </span>\n          <Pagination\n            variant=\"simple\"\n            currentPage={3}\n            totalPages={10}\n            size={size}\n          />\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n// ===========================================================================\n// variant=\"count\"\n// ===========================================================================\n\nexport const Count: Story = {\n  args: {\n    variant: \"count\",\n    pageSize: 10,\n    totalItems: 95,\n    currentPage: 2,\n    totalPages: 10,\n  },\n};\n\nexport const CountWithUnits: Story = {\n  args: {\n    variant: \"count\",\n    pageSize: 25,\n    totalItems: 203,\n    currentPage: 1,\n    totalPages: 9,\n    units: \"items\",\n  },\n};\n\nexport const CountArrowsOnly: Story = {\n  args: {\n    variant: \"count\",\n    pageSize: 10,\n    totalItems: 50,\n    currentPage: 2,\n    totalPages: 5,\n    hidePages: true,\n  },\n};\n\nexport const CountAllSizes: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 items-start\">\n      {([\"lg\", \"md\", \"sm\"] as const).map((size) => (\n        <div key={size} className=\"flex flex-col gap-1\">\n          <span className=\"font-secondary-body text-text-03\">\n            size=&quot;{size}&quot;\n          </span>\n          <Pagination\n            variant=\"count\"\n            pageSize={10}\n            totalItems={95}\n            currentPage={3}\n            totalPages={10}\n            size={size}\n            units=\"items\"\n          />\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n// ===========================================================================\n// variant=\"list\" (default)\n// ===========================================================================\n\nexport const List: Story = {\n  args: {\n    currentPage: 5,\n    totalPages: 20,\n    onChange: () => {},\n  },\n};\n\nexport const ListFewPages: Story = {\n  args: {\n    currentPage: 2,\n    totalPages: 4,\n    onChange: () => {},\n  },\n};\n\nexport const ListAllSizes: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-4 items-start\">\n      {([\"lg\", \"md\", \"sm\"] as const).map((size) => (\n        <div key={size} className=\"flex flex-col gap-1\">\n          <span className=\"font-secondary-body text-text-03\">\n            size=&quot;{size}&quot;\n          </span>\n          <Pagination\n            currentPage={3}\n            totalPages={10}\n            onChange={() => {}}\n            size={size}\n          />\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n// ===========================================================================\n// Interactive\n// ===========================================================================\n\nfunction InteractiveSimpleDemo() {\n  const [page, setPage] = useState(1);\n  return (\n    <div className=\"flex flex-col gap-4 items-start\">\n      <Pagination\n        variant=\"simple\"\n        currentPage={page}\n        totalPages={15}\n        onChange={setPage}\n        units=\"pages\"\n      />\n      <span className=\"font-secondary-body text-text-03\">\n        Current page: {page}\n      </span>\n    </div>\n  );\n}\n\nexport const InteractiveSimple: Story = {\n  render: () => <InteractiveSimpleDemo />,\n};\n\nfunction InteractiveListDemo() {\n  const [page, setPage] = useState(1);\n  return (\n    <div className=\"flex flex-col gap-4 items-start\">\n      <Pagination currentPage={page} totalPages={15} onChange={setPage} />\n      <span className=\"font-secondary-body text-text-03\">\n        Current page: {page}\n      </span>\n    </div>\n  );\n}\n\nexport const InteractiveList: Story = {\n  render: () => <InteractiveListDemo />,\n};\n\nfunction InteractiveCountDemo() {\n  const [page, setPage] = useState(1);\n  const pageSize = 10;\n  const totalItems = 95;\n  const totalPages = Math.ceil(totalItems / pageSize);\n  return (\n    <div className=\"flex flex-col gap-4 items-start\">\n      <Pagination\n        variant=\"count\"\n        currentPage={page}\n        totalPages={totalPages}\n        pageSize={pageSize}\n        totalItems={totalItems}\n        onChange={setPage}\n        units=\"items\"\n      />\n      <span className=\"font-secondary-body text-text-03\">\n        Current page: {page}\n      </span>\n    </div>\n  );\n}\n\nexport const InteractiveCount: Story = {\n  render: () => <InteractiveCountDemo />,\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/pagination/README.md",
    "content": "# Pagination\n\n**Import:** `import { Pagination, type PaginationProps } from \"@opal/components\";`\n\nPage navigation with three display variants and prev/next arrow controls.\n\n## Variants\n\n### `\"list\"` (default)\n\nNumbered page buttons with ellipsis truncation for large page counts.\n\n```tsx\n<Pagination currentPage={3} totalPages={10} onChange={setPage} />\n```\n\n### `\"simple\"`\n\nCompact `currentPage/totalPages` display with prev/next arrows. Can be reduced to just arrows via `hidePages`.\n\n```tsx\n// With summary (default)\n<Pagination variant=\"simple\" currentPage={1} totalPages={5} onChange={setPage} />\n\n// Arrows only\n<Pagination variant=\"simple\" currentPage={1} totalPages={5} onChange={setPage} hidePages />\n\n// With units\n<Pagination variant=\"simple\" currentPage={1} totalPages={5} onChange={setPage} units=\"pages\" />\n```\n\n### `\"count\"`\n\nItem-count display (`X~Y of Z`) with prev/next arrows. Designed for table footers.\n\n```tsx\n// Basic\n<Pagination\n  variant=\"count\"\n  pageSize={10}\n  totalItems={95}\n  currentPage={2}\n  totalPages={10}\n  onChange={setPage}\n/>\n\n// With units\n<Pagination\n  variant=\"count\"\n  pageSize={10}\n  totalItems={95}\n  currentPage={2}\n  totalPages={10}\n  onChange={setPage}\n  units=\"items\"\n/>\n```\n\n## Props (shared)\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `variant` | `\"list\" \\| \"simple\" \\| \"count\"` | `\"list\"` | Display variant |\n| `currentPage` | `number` | **(required)** | 1-based current page number |\n| `totalPages` | `number` | **(required)** | Total number of pages |\n| `onChange` | `(page: number) => void` | — | Called when the page changes |\n| `size` | `PaginationSize` | `\"lg\"` | Button and text sizing |\n\n## Props (variant-specific)\n\n### `\"simple\"`\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `hidePages` | `boolean` | `false` | Hides the `currentPage/totalPages` text between arrows |\n| `units` | `string` | — | Label after the summary (e.g. `\"pages\"`), always 4px spacing |\n\n### `\"count\"`\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `pageSize` | `number` | **(required)** | Items per page (for range calculation) |\n| `totalItems` | `number` | **(required)** | Total item count |\n| `hidePages` | `boolean` | `false` | Hides the current page number between arrows |\n| `units` | `string` | — | Label after the total (e.g. `\"items\"`), always 4px spacing |\n\n### `PaginationSize`\n\n`\"lg\" | \"md\" | \"sm\"`\n"
  },
  {
    "path": "web/lib/opal/src/components/pagination/components.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components\";\nimport { Disabled } from \"@opal/core\";\nimport { SvgArrowRight, SvgChevronLeft, SvgChevronRight } from \"@opal/icons\";\nimport { containerSizeVariants } from \"@opal/shared\";\nimport type { RichStr, WithoutStyles } from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { cn } from \"@opal/utils\";\nimport * as PopoverPrimitive from \"@radix-ui/react-popover\";\nimport {\n  useState,\n  type ChangeEvent,\n  type HTMLAttributes,\n  type KeyboardEvent,\n  type ReactNode,\n} from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype PaginationSize = \"lg\" | \"md\" | \"sm\";\n\n/**\n * Compact `currentPage / totalPages` display with prev/next arrows.\n */\ninterface SimplePaginationProps\n  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, \"onChange\"> {\n  variant: \"simple\";\n  /** The 1-based current page number. */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Called when the page changes. */\n  onChange?: (page: number) => void;\n  /** Controls button and text sizing. Default: `\"lg\"`. */\n  size?: PaginationSize;\n  /** Hides the `currentPage/totalPages` summary text between arrows. Default: `false`. */\n  hidePages?: boolean;\n  /** Unit label shown after the summary (e.g. `\"pages\"`). Always has 4px spacing. */\n  units?: string | RichStr;\n}\n\n/**\n * Item-count display (`X~Y of Z`) with prev/next arrows.\n * Designed for table footers.\n */\ninterface CountPaginationProps\n  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, \"onChange\"> {\n  variant: \"count\";\n  /** The 1-based current page number. */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Number of items displayed per page. Used to compute the visible range. */\n  pageSize: number;\n  /** Total number of items across all pages. */\n  totalItems: number;\n  /** Called when the page changes. */\n  onChange?: (page: number) => void;\n  /** Controls button and text sizing. Default: `\"lg\"`. */\n  size?: PaginationSize;\n  /** Hides the current page number between the arrows. Default: `false`. */\n  hidePages?: boolean;\n  /** Unit label shown after the total count (e.g. `\"items\"`). Always has 4px spacing. */\n  units?: string | RichStr;\n}\n\n/**\n * Numbered page buttons with ellipsis truncation for large page counts.\n * This is the default variant.\n */\ninterface ListPaginationProps\n  extends Omit<WithoutStyles<HTMLAttributes<HTMLDivElement>>, \"onChange\"> {\n  variant?: \"list\";\n  /** The 1-based current page number. */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Called when the page changes. */\n  onChange: (page: number) => void;\n  /** Controls button and text sizing. Default: `\"lg\"`. */\n  size?: PaginationSize;\n}\n\n/**\n * Discriminated union of all pagination variants.\n * Use `variant` to select between `\"simple\"`, `\"count\"`, and `\"list\"` (default).\n */\ntype PaginationProps =\n  | SimplePaginationProps\n  | CountPaginationProps\n  | ListPaginationProps;\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Computes the page numbers to display.\n *\n * - <=7 pages: render all pages individually (no ellipsis).\n * - >7 pages: always render exactly 7 slots (numbers or ellipsis).\n *   First and last page are always shown. Ellipsis takes one slot.\n *\n * Examples for totalPages=20:\n * - page 1:  `1  2  3  4  5  ...  20`\n * - page 4:  `1  2  3  4  5  ...  20`\n * - page 5:  `1  ...  4  5  6  ...  20`\n * - page 16: `1  ...  15  16  17  ...  20`\n * - page 17: `1  ...  16  17  18  19  20`\n * - page 20: `1  ...  16  17  18  19  20`\n */\nfunction getPageNumbers(\n  currentPage: number,\n  totalPages: number\n): (number | string)[] {\n  if (totalPages <= 7) {\n    const pages: number[] = [];\n    for (let i = 1; i <= totalPages; i++) pages.push(i);\n    return pages;\n  }\n\n  // Always 7 slots. First and last are always page 1 and totalPages.\n  // That leaves 5 inner slots.\n\n  // Near the start: no start-ellipsis needed\n  // Slots: 1, 2, 3, 4, 5, ..., totalPages\n  if (currentPage <= 4) {\n    return [1, 2, 3, 4, 5, \"end-ellipsis\", totalPages];\n  }\n\n  // Near the end: no end-ellipsis needed\n  // Slots: 1, ..., tp-4, tp-3, tp-2, tp-1, tp\n  if (currentPage >= totalPages - 3) {\n    return [\n      1,\n      \"start-ellipsis\",\n      totalPages - 4,\n      totalPages - 3,\n      totalPages - 2,\n      totalPages - 1,\n      totalPages,\n    ];\n  }\n\n  // Middle: both ellipses\n  // Slots: 1, ..., cur-1, cur, cur+1, ..., totalPages\n  return [\n    1,\n    \"start-ellipsis\",\n    currentPage - 1,\n    currentPage,\n    currentPage + 1,\n    \"end-ellipsis\",\n    totalPages,\n  ];\n}\n\nfunction monoClass(size: PaginationSize): string {\n  return size === \"sm\" ? \"font-secondary-mono\" : \"font-main-ui-mono\";\n}\n\nfunction textClasses(size: PaginationSize, style: \"mono\" | \"muted\"): string {\n  if (style === \"mono\") return monoClass(size);\n  return size === \"sm\" ? \"font-secondary-body\" : \"font-main-ui-muted\";\n}\n\nconst PAGE_NUMBER_FONT: Record<\n  PaginationSize,\n  { active: string; inactive: string }\n> = {\n  lg: {\n    active: \"font-main-ui-body text-text-04\",\n    inactive: \"font-main-ui-muted text-text-02\",\n  },\n  md: {\n    active: \"font-secondary-action text-text-04\",\n    inactive: \"font-secondary-body text-text-02\",\n  },\n  sm: {\n    active: \"font-secondary-action text-text-04\",\n    inactive: \"font-secondary-body text-text-02\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// GoToPagePopup\n// ---------------------------------------------------------------------------\n\ninterface GoToPagePopupProps {\n  totalPages: number;\n  onSubmit: (page: number) => void;\n  children: ReactNode;\n}\n\nfunction GoToPagePopup({ totalPages, onSubmit, children }: GoToPagePopupProps) {\n  const [open, setOpen] = useState(false);\n  const [value, setValue] = useState(\"\");\n\n  const parsed = parseInt(value, 10);\n  const isValid = !isNaN(parsed) && parsed >= 1 && parsed <= totalPages;\n\n  function handleChange(e: ChangeEvent<HTMLInputElement>) {\n    const raw = e.target.value;\n    if (raw === \"\" || /^\\d+$/.test(raw)) {\n      setValue(raw);\n    }\n  }\n\n  function handleSubmit() {\n    if (!isValid) return;\n    onSubmit(parsed);\n    setOpen(false);\n    setValue(\"\");\n  }\n\n  function handleKeyDown(e: KeyboardEvent<HTMLInputElement>) {\n    if (e.key === \"Enter\") {\n      handleSubmit();\n    }\n  }\n\n  return (\n    <PopoverPrimitive.Root\n      open={open}\n      onOpenChange={(next) => {\n        setOpen(next);\n        if (!next) setValue(\"\");\n      }}\n    >\n      <PopoverPrimitive.Trigger asChild>{children}</PopoverPrimitive.Trigger>\n      <PopoverPrimitive.Portal>\n        <PopoverPrimitive.Content\n          className={cn(\n            \"flex items-center gap-1 p-1\",\n            \"bg-background-neutral-00 rounded-12 border border-border-01 shadow-md z-popover\",\n            \"data-[state=open]:animate-in data-[state=closed]:animate-out\",\n            \"data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0\",\n            \"data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95\"\n          )}\n          sideOffset={4}\n        >\n          {/* TODO(@raunakab): migrate this input to the opal Input component once inputs have been migrated into Opal */}\n          <input\n            type=\"text\"\n            inputMode=\"numeric\"\n            value={value}\n            onChange={handleChange}\n            onKeyDown={handleKeyDown}\n            placeholder=\"Go to page\"\n            autoFocus\n            className={cn(\n              \"w-[7rem] bg-transparent px-1.5 py-1 rounded-08\",\n              containerSizeVariants.lg.height,\n              \"border border-border-02 focus:outline-none focus:border-border-04\",\n              \"font-main-ui-body\",\n              \"text-text-04 placeholder:text-text-02\"\n            )}\n          />\n          <Disabled disabled={!isValid}>\n            <Button\n              icon={SvgArrowRight}\n              size=\"lg\"\n              onClick={handleSubmit}\n              tooltip=\"Go to page\"\n            />\n          </Disabled>\n        </PopoverPrimitive.Content>\n      </PopoverPrimitive.Portal>\n    </PopoverPrimitive.Root>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Nav buttons (shared across all variants)\n// ---------------------------------------------------------------------------\n\ninterface NavButtonsProps {\n  currentPage: number;\n  totalPages: number;\n  onChange: (page: number) => void;\n  size: PaginationSize;\n  children?: ReactNode;\n}\n\nfunction NavButtons({\n  currentPage,\n  totalPages,\n  onChange,\n  size,\n  children,\n}: NavButtonsProps) {\n  return (\n    <>\n      <Disabled disabled={currentPage <= 1}>\n        <Button\n          icon={SvgChevronLeft}\n          onClick={() => onChange(Math.max(1, currentPage - 1))}\n          size={size}\n          prominence=\"tertiary\"\n          tooltip=\"Previous page\"\n        />\n      </Disabled>\n      {children}\n      <Disabled disabled={currentPage >= totalPages}>\n        <Button\n          icon={SvgChevronRight}\n          onClick={() => onChange(Math.min(totalPages, currentPage + 1))}\n          size={size}\n          prominence=\"tertiary\"\n          tooltip=\"Next page\"\n        />\n      </Disabled>\n    </>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// PaginationSimple\n// ---------------------------------------------------------------------------\n\nfunction PaginationSimple({\n  currentPage,\n  totalPages,\n  onChange,\n  size = \"lg\",\n  hidePages = false,\n  units,\n  ...props\n}: SimplePaginationProps) {\n  const handleChange = (page: number) => onChange?.(page);\n\n  const label = `${currentPage}/${totalPages}${\n    units ? ` ${toPlainString(units)}` : \"\"\n  }`;\n\n  return (\n    <div {...props} className=\"flex items-center\">\n      <NavButtons\n        currentPage={currentPage}\n        totalPages={totalPages}\n        onChange={handleChange}\n        size={size}\n      >\n        {!hidePages && (\n          <GoToPagePopup totalPages={totalPages} onSubmit={handleChange}>\n            <Button size={size} prominence=\"tertiary\">\n              {label}\n            </Button>\n          </GoToPagePopup>\n        )}\n      </NavButtons>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// PaginationCount\n// ---------------------------------------------------------------------------\n\nfunction PaginationCount({\n  pageSize,\n  totalItems,\n  currentPage,\n  totalPages,\n  onChange,\n  size = \"lg\",\n  hidePages = false,\n  units,\n  ...props\n}: CountPaginationProps) {\n  const handleChange = (page: number) => onChange?.(page);\n  const rangeStart = totalItems === 0 ? 0 : (currentPage - 1) * pageSize + 1;\n  const rangeEnd = Math.min(currentPage * pageSize, totalItems);\n\n  return (\n    <div {...props} className=\"flex items-center gap-1\">\n      {/* Summary: range of total [units] */}\n      <span\n        className={cn(\n          \"inline-flex items-center gap-1\",\n          monoClass(size),\n          \"text-text-03\"\n        )}\n      >\n        {rangeStart}~{rangeEnd}\n        <span className={textClasses(size, \"muted\")}>of</span>\n        {totalItems}\n        {units && (\n          <Text\n            color=\"inherit\"\n            font={size === \"sm\" ? \"secondary-body\" : \"main-ui-muted\"}\n          >\n            {units}\n          </Text>\n        )}\n      </span>\n\n      {/* Buttons: < [page] > */}\n      <div className=\"flex items-center\">\n        <NavButtons\n          currentPage={currentPage}\n          totalPages={totalPages}\n          onChange={handleChange}\n          size={size}\n        >\n          {!hidePages && (\n            <GoToPagePopup totalPages={totalPages} onSubmit={handleChange}>\n              <Button size={size} prominence=\"tertiary\">\n                {String(currentPage)}\n              </Button>\n            </GoToPagePopup>\n          )}\n        </NavButtons>\n      </div>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// PaginationList (default)\n// ---------------------------------------------------------------------------\n\nfunction PaginationList({\n  currentPage,\n  totalPages,\n  onChange,\n  size = \"lg\",\n  ...props\n}: ListPaginationProps) {\n  const pageNumbers = getPageNumbers(currentPage, totalPages);\n  const fonts = PAGE_NUMBER_FONT[size];\n\n  return (\n    <div {...props} className=\"flex items-center gap-1\">\n      <NavButtons\n        currentPage={currentPage}\n        totalPages={totalPages}\n        onChange={onChange}\n        size={size}\n      >\n        <div className=\"flex items-center\">\n          {pageNumbers.map((page) => {\n            if (typeof page === \"string\") {\n              return (\n                <GoToPagePopup\n                  key={page}\n                  totalPages={totalPages}\n                  onSubmit={onChange}\n                >\n                  <Button\n                    size={size}\n                    prominence=\"tertiary\"\n                    icon={({ className: iconClassName }) => (\n                      <div\n                        className={cn(\n                          iconClassName,\n                          \"flex flex-col justify-center\",\n                          fonts.inactive\n                        )}\n                      >\n                        ...\n                      </div>\n                    )}\n                  />\n                </GoToPagePopup>\n              );\n            }\n\n            const isActive = page === currentPage;\n\n            return (\n              <Button\n                key={page}\n                onClick={() => onChange(page)}\n                size={size}\n                prominence=\"tertiary\"\n                interaction={isActive ? \"hover\" : \"rest\"}\n                icon={({ className: iconClassName }) => (\n                  <div\n                    className={cn(\n                      iconClassName,\n                      \"flex flex-col justify-center\",\n                      isActive ? fonts.active : fonts.inactive\n                    )}\n                  >\n                    {page}\n                  </div>\n                )}\n              />\n            );\n          })}\n        </div>\n      </NavButtons>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Pagination (entry point)\n// ---------------------------------------------------------------------------\n\n/**\n * Page navigation component with three variants:\n *\n * - `\"list\"` (default) — Numbered page buttons with ellipsis truncation.\n * - `\"simple\"` — Compact `currentPage / totalPages` with prev/next arrows.\n * - `\"count\"` — Item-count display (`X~Y of Z`) with prev/next arrows.\n *\n * All variants include a \"go to page\" popup activated by clicking on the\n * page indicator (simple/count) or the ellipsis (list).\n *\n * @example\n * ```tsx\n * // List (default)\n * <Pagination currentPage={3} totalPages={10} onChange={setPage} />\n *\n * // Simple\n * <Pagination variant=\"simple\" currentPage={1} totalPages={5} onChange={setPage} />\n *\n * // Count\n * <Pagination variant=\"count\" pageSize={10} totalItems={95} currentPage={2} totalPages={10} onChange={setPage} />\n * ```\n */\nfunction Pagination(props: PaginationProps) {\n  const normalized = {\n    ...props,\n    totalPages: Math.max(1, props.totalPages),\n    currentPage: Math.max(\n      1,\n      Math.min(props.currentPage, Math.max(1, props.totalPages))\n    ),\n  };\n  const variant = normalized.variant ?? \"list\";\n  switch (variant) {\n    case \"simple\":\n      return <PaginationSimple {...(normalized as SimplePaginationProps)} />;\n    case \"count\":\n      return <PaginationCount {...(normalized as CountPaginationProps)} />;\n    case \"list\":\n      return <PaginationList {...(normalized as ListPaginationProps)} />;\n  }\n}\n\nexport { Pagination, type PaginationProps, type PaginationSize };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/ActionsContainer.tsx",
    "content": "\"use client\";\n\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\n\ninterface ActionsContainerProps {\n  type: \"head\" | \"cell\";\n  /** Pass-through click handler (e.g. stopPropagation on body cells). */\n  onClick?: (e: React.MouseEvent) => void;\n  children: React.ReactNode;\n}\n\nexport default function ActionsContainer({\n  type,\n  children,\n  onClick,\n}: ActionsContainerProps) {\n  const size = useTableSize();\n  const Tag = type === \"head\" ? \"th\" : \"td\";\n\n  return (\n    <Tag\n      className=\"tbl-actions\"\n      data-type={type}\n      data-size={size}\n      onClick={onClick}\n    >\n      <div className=\"flex h-full items-center justify-end\">{children}</div>\n    </Tag>\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/ColumnSortabilityPopover.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport {\n  type Table,\n  type ColumnDef,\n  type RowData,\n  type SortingState,\n} from \"@tanstack/react-table\";\nimport { Button, LineItemButton } from \"@opal/components\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport { SvgArrowUpDown, SvgSortOrder, SvgCheck } from \"@opal/icons\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Divider from \"@/refresh-components/Divider\";\nimport Text from \"@/refresh-components/texts/Text\";\n\n// ---------------------------------------------------------------------------\n// Popover UI\n// ---------------------------------------------------------------------------\n\ninterface SortingPopoverProps<TData extends RowData = RowData> {\n  table: Table<TData>;\n  sorting: SortingState;\n  footerText?: string;\n  ascendingLabel?: string;\n  descendingLabel?: string;\n}\n\nfunction SortingPopover<TData extends RowData>({\n  table,\n  sorting,\n  footerText,\n  ascendingLabel = \"Ascending\",\n  descendingLabel = \"Descending\",\n}: SortingPopoverProps<TData>) {\n  const size = useTableSize();\n  const [open, setOpen] = useState(false);\n  const sortableColumns = table\n    .getAllLeafColumns()\n    .filter((col) => col.getCanSort());\n\n  const currentSort = sorting[0] ?? null;\n\n  return (\n    <Popover open={open} onOpenChange={setOpen}>\n      <Popover.Trigger asChild>\n        <Button\n          icon={currentSort === null ? SvgArrowUpDown : SvgSortOrder}\n          interaction={open ? \"hover\" : \"rest\"}\n          size={size === \"md\" ? \"sm\" : \"md\"}\n          prominence=\"tertiary\"\n          tooltip=\"Sort\"\n        />\n      </Popover.Trigger>\n\n      <Popover.Content width=\"lg\" align=\"end\" side=\"bottom\">\n        <Popover.Menu\n          footer={\n            footerText ? (\n              <div className=\"px-2 py-1\">\n                <Text secondaryBody text03>\n                  {footerText}\n                </Text>\n              </div>\n            ) : undefined\n          }\n        >\n          <Divider showTitle text=\"Sort by\" />\n\n          <LineItemButton\n            selectVariant=\"select-heavy\"\n            state={currentSort === null ? \"selected\" : \"empty\"}\n            title=\"Manual Ordering\"\n            sizePreset=\"main-ui\"\n            rightChildren={\n              currentSort === null ? (\n                <SvgCheck size={16} className=\"text-action-link-05\" />\n              ) : undefined\n            }\n            onClick={() => {\n              table.resetSorting();\n            }}\n          />\n\n          {sortableColumns.map((column) => {\n            const isSorted = currentSort?.id === column.id;\n            const label =\n              typeof column.columnDef.header === \"string\"\n                ? column.columnDef.header\n                : column.id;\n\n            return (\n              <LineItemButton\n                key={column.id}\n                selectVariant=\"select-heavy\"\n                state={isSorted ? \"selected\" : \"empty\"}\n                title={label}\n                sizePreset=\"main-ui\"\n                rightChildren={\n                  isSorted ? (\n                    <SvgCheck size={16} className=\"text-action-link-05\" />\n                  ) : undefined\n                }\n                onClick={() => {\n                  if (isSorted) {\n                    table.resetSorting();\n                    return;\n                  }\n                  column.toggleSorting(false);\n                }}\n              />\n            );\n          })}\n\n          {currentSort !== null && (\n            <>\n              <Divider showTitle text=\"Sorting Order\" />\n\n              <LineItemButton\n                selectVariant=\"select-heavy\"\n                state={!currentSort.desc ? \"selected\" : \"empty\"}\n                title={ascendingLabel}\n                sizePreset=\"main-ui\"\n                rightChildren={\n                  !currentSort.desc ? (\n                    <SvgCheck size={16} className=\"text-action-link-05\" />\n                  ) : undefined\n                }\n                onClick={() => {\n                  table.setSorting([{ id: currentSort.id, desc: false }]);\n                }}\n              />\n\n              <LineItemButton\n                selectVariant=\"select-heavy\"\n                state={currentSort.desc ? \"selected\" : \"empty\"}\n                title={descendingLabel}\n                sizePreset=\"main-ui\"\n                rightChildren={\n                  currentSort.desc ? (\n                    <SvgCheck size={16} className=\"text-action-link-05\" />\n                  ) : undefined\n                }\n                onClick={() => {\n                  table.setSorting([{ id: currentSort.id, desc: true }]);\n                }}\n              />\n            </>\n          )}\n        </Popover.Menu>\n      </Popover.Content>\n    </Popover>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Column definition factory\n// ---------------------------------------------------------------------------\n\ninterface CreateSortingColumnOptions {\n  footerText?: string;\n  ascendingLabel?: string;\n  descendingLabel?: string;\n}\n\nfunction createSortingColumn<TData>(\n  options?: CreateSortingColumnOptions\n): ColumnDef<TData, unknown> {\n  return {\n    id: \"__sorting\",\n    size: 44,\n    enableHiding: false,\n    enableSorting: false,\n    enableResizing: false,\n    header: ({ table }) => (\n      <SortingPopover\n        table={table}\n        sorting={table.getState().sorting}\n        footerText={options?.footerText}\n        ascendingLabel={options?.ascendingLabel}\n        descendingLabel={options?.descendingLabel}\n      />\n    ),\n    cell: () => null,\n  };\n}\n\nexport { SortingPopover, createSortingColumn };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/ColumnVisibilityPopover.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport {\n  type Table,\n  type ColumnDef,\n  type RowData,\n  type VisibilityState,\n} from \"@tanstack/react-table\";\nimport { Button, LineItemButton, Tag } from \"@opal/components\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport { SvgColumn, SvgCheck } from \"@opal/icons\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Divider from \"@/refresh-components/Divider\";\n\n// ---------------------------------------------------------------------------\n// Popover UI\n// ---------------------------------------------------------------------------\n\ninterface ColumnVisibilityPopoverProps<TData extends RowData = RowData> {\n  table: Table<TData>;\n  columnVisibility: VisibilityState;\n}\n\nfunction ColumnVisibilityPopover<TData extends RowData>({\n  table,\n  columnVisibility,\n}: ColumnVisibilityPopoverProps<TData>) {\n  const size = useTableSize();\n  const [open, setOpen] = useState(false);\n\n  // User-defined columns only (exclude internal qualifier/actions)\n  const dataColumns = table\n    .getAllLeafColumns()\n    .filter(\n      (col) =>\n        !col.id.startsWith(\"__\") &&\n        col.id !== \"qualifier\" &&\n        typeof col.columnDef.header === \"string\" &&\n        col.columnDef.header.trim() !== \"\"\n    );\n\n  return (\n    <Popover open={open} onOpenChange={setOpen}>\n      <Popover.Trigger asChild>\n        <Button\n          icon={SvgColumn}\n          interaction={open ? \"hover\" : \"rest\"}\n          size={size === \"md\" ? \"sm\" : \"md\"}\n          prominence=\"tertiary\"\n          tooltip=\"Columns\"\n        />\n      </Popover.Trigger>\n\n      <Popover.Content width=\"lg\" align=\"end\" side=\"bottom\">\n        <Divider showTitle text=\"Shown Columns\" />\n        <Popover.Menu>\n          {dataColumns.map((column) => {\n            const canHide = column.getCanHide();\n            const isVisible = columnVisibility[column.id] !== false;\n            const label =\n              typeof column.columnDef.header === \"string\"\n                ? column.columnDef.header\n                : column.id;\n\n            return (\n              <LineItemButton\n                key={column.id}\n                selectVariant=\"select-heavy\"\n                state={isVisible ? \"selected\" : \"empty\"}\n                title={label}\n                sizePreset=\"main-ui\"\n                rightChildren={\n                  !canHide ? (\n                    <div className=\"flex items-center\">\n                      <Tag title=\"Always Shown\" color=\"blue\" />\n                    </div>\n                  ) : isVisible ? (\n                    <SvgCheck size={16} className=\"text-action-link-05\" />\n                  ) : undefined\n                }\n                onClick={canHide ? () => column.toggleVisibility() : undefined}\n              />\n            );\n          })}\n        </Popover.Menu>\n      </Popover.Content>\n    </Popover>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Column definition factory\n// ---------------------------------------------------------------------------\n\nfunction createColumnVisibilityColumn<TData>(): ColumnDef<TData, unknown> {\n  return {\n    id: \"__columnVisibility\",\n    size: 44,\n    enableHiding: false,\n    enableSorting: false,\n    enableResizing: false,\n    header: ({ table }) => (\n      <ColumnVisibilityPopover\n        table={table}\n        columnVisibility={table.getState().columnVisibility}\n      />\n    ),\n    cell: () => null,\n  };\n}\n\nexport { ColumnVisibilityPopover, createColumnVisibilityColumn };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/DragOverlayRow.tsx",
    "content": "import { memo } from \"react\";\nimport { type Row, flexRender } from \"@tanstack/react-table\";\nimport TableRow from \"@opal/components/table/TableRow\";\nimport TableCell from \"@opal/components/table/TableCell\";\nimport QualifierContainer from \"@opal/components/table/QualifierContainer\";\nimport TableQualifier from \"@opal/components/table/TableQualifier\";\nimport ActionsContainer from \"@opal/components/table/ActionsContainer\";\nimport type {\n  OnyxColumnDef,\n  OnyxQualifierColumn,\n} from \"@opal/components/table/types\";\n\ninterface DragOverlayRowProps<TData> {\n  row: Row<TData>;\n  columnWidths?: Record<string, number>;\n  columnKindMap?: Map<string, OnyxColumnDef<TData>>;\n  qualifierColumn?: OnyxQualifierColumn<TData> | null;\n  isSelectable?: boolean;\n}\n\nfunction DragOverlayRowInner<TData>({\n  row,\n  columnWidths,\n  columnKindMap,\n  qualifierColumn,\n  isSelectable = false,\n}: DragOverlayRowProps<TData>) {\n  const tableWidth = columnWidths\n    ? Object.values(columnWidths).reduce((sum, w) => sum + w, 0)\n    : undefined;\n\n  return (\n    <table\n      className=\"border-collapse\"\n      style={{\n        tableLayout: \"fixed\",\n        ...(tableWidth != null ? { width: tableWidth } : { minWidth: \"100%\" }),\n      }}\n    >\n      {columnWidths && (\n        <colgroup>\n          {row.getVisibleCells().map((cell) => (\n            <col\n              key={cell.column.id}\n              style={{ width: columnWidths[cell.column.id] }}\n            />\n          ))}\n        </colgroup>\n      )}\n      <tbody>\n        <TableRow selected={row.getIsSelected()}>\n          {row.getVisibleCells().map((cell) => {\n            const colDef = columnKindMap?.get(cell.column.id);\n\n            if (colDef?.kind === \"qualifier\" && qualifierColumn) {\n              return (\n                <QualifierContainer key={cell.id} type=\"cell\">\n                  <TableQualifier\n                    content={qualifierColumn.content}\n                    icon={qualifierColumn.getContent?.(row.original)}\n                    imageSrc={qualifierColumn.getImageSrc?.(row.original)}\n                    imageAlt={qualifierColumn.getImageAlt?.(row.original)}\n                    background={qualifierColumn.background}\n                    iconSize={qualifierColumn.iconSize}\n                    selectable={isSelectable}\n                    selected={isSelectable && row.getIsSelected()}\n                  />\n                </QualifierContainer>\n              );\n            }\n\n            if (colDef?.kind === \"actions\") {\n              return (\n                <ActionsContainer key={cell.id} type=\"cell\">\n                  {flexRender(cell.column.columnDef.cell, cell.getContext())}\n                </ActionsContainer>\n              );\n            }\n\n            return (\n              <TableCell key={cell.id}>\n                {flexRender(cell.column.columnDef.cell, cell.getContext())}\n              </TableCell>\n            );\n          })}\n        </TableRow>\n      </tbody>\n    </table>\n  );\n}\n\nconst DragOverlayRow = memo(DragOverlayRowInner) as typeof DragOverlayRowInner;\n\nexport default DragOverlayRow;\nexport type { DragOverlayRowProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/Footer.tsx",
    "content": "\"use client\";\n\nimport { Button, Pagination, SelectButton } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport { SvgEye, SvgXCircle } from \"@opal/icons\";\nimport type { ReactNode } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype SelectionState = \"none\" | \"partial\" | \"all\";\n\n/**\n * Footer mode for tables with selectable rows.\n * Displays a selection message on the left (with optional view/clear actions)\n * and a `count`-type pagination on the right.\n */\ninterface FooterSelectionModeProps {\n  mode: \"selection\";\n  /** Whether the table supports selecting multiple rows. */\n  multiSelect: boolean;\n  /** Current selection state: `\"none\"`, `\"partial\"`, or `\"all\"`. */\n  selectionState: SelectionState;\n  /** Number of currently selected items. */\n  selectedCount: number;\n  /** Toggle view-filter on/off. */\n  onView?: () => void;\n  /** Whether the view-filter is currently active. */\n  isViewingSelected?: boolean;\n  /** Clears all selections. */\n  onClear?: () => void;\n  /** Number of items displayed per page. */\n  pageSize: number;\n  /** Total number of items across all pages. */\n  totalItems: number;\n  /** The 1-based current page number. */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Called when the user navigates to a different page. */\n  onPageChange: (page: number) => void;\n  /** Unit label for count pagination. @default \"items\" */\n  units?: string;\n}\n\n/**\n * Footer mode for read-only tables (no row selection).\n * Displays \"Showing X~Y of Z\" on the left and a `list`-type pagination\n * on the right.\n */\ninterface FooterSummaryModeProps {\n  mode: \"summary\";\n  /** First item number in the current page (e.g. `1`). */\n  rangeStart: number;\n  /** Last item number in the current page (e.g. `25`). */\n  rangeEnd: number;\n  /** Total number of items across all pages. */\n  totalItems: number;\n  /** The 1-based current page number. */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Called when the user navigates to a different page. */\n  onPageChange: (page: number) => void;\n  /** Optional extra element rendered after the summary text (e.g. a download icon). */\n  leftExtra?: ReactNode;\n  /** Unit label for the summary text, e.g. \"users\". */\n  units?: string;\n}\n\n/**\n * Discriminated union of footer modes.\n * Use `mode: \"selection\"` for tables with selectable rows, or\n * `mode: \"summary\"` for read-only tables.\n */\nexport type FooterProps = FooterSelectionModeProps | FooterSummaryModeProps;\n\n// ---------------------------------------------------------------------------\n// Footer\n// ---------------------------------------------------------------------------\n\nfunction getSelectionMessage(\n  state: SelectionState,\n  multi: boolean,\n  count: number,\n  isViewingSelected: boolean\n): string {\n  if (state === \"none\" && !isViewingSelected) {\n    return multi ? \"Select items to continue\" : \"Select an item to continue\";\n  }\n  if (!multi) return \"Item selected\";\n  return `${count} item${count !== 1 ? \"s\" : \"\"} selected`;\n}\n\n/**\n * Table footer combining status information on the left with pagination on the\n * right. Use `mode: \"selection\"` for tables with selectable rows, or\n * `mode: \"summary\"` for read-only tables.\n */\nexport default function Footer(props: FooterProps) {\n  const resolvedSize = useTableSize();\n  const isSmall = resolvedSize === \"md\";\n  return (\n    <div\n      className=\"table-footer flex w-full items-center justify-between border-t border-border-01\"\n      data-size={resolvedSize}\n    >\n      {/* Left side */}\n      <div className=\"flex items-center gap-1 px-1\">\n        {props.mode === \"selection\" ? (\n          <SelectionLeft\n            selectionState={props.selectionState}\n            multiSelect={props.multiSelect}\n            selectedCount={props.selectedCount}\n            onView={props.onView}\n            isViewingSelected={props.isViewingSelected}\n            onClear={props.onClear}\n            isSmall={isSmall}\n          />\n        ) : (\n          <>\n            <SummaryLeft\n              rangeStart={props.rangeStart}\n              rangeEnd={props.rangeEnd}\n              totalItems={props.totalItems}\n              units={props.units}\n              isSmall={isSmall}\n            />\n            {props.leftExtra}\n          </>\n        )}\n      </div>\n\n      {/* Right side */}\n      <div className=\"flex items-center gap-2 px-1 py-2\">\n        {props.mode === \"selection\" ? (\n          <Pagination\n            variant=\"count\"\n            pageSize={props.pageSize}\n            totalItems={props.totalItems}\n            currentPage={props.currentPage}\n            totalPages={props.totalPages}\n            onChange={props.onPageChange}\n            units={props.units}\n            size={isSmall ? \"sm\" : \"md\"}\n          />\n        ) : (\n          <Pagination\n            currentPage={props.currentPage}\n            totalPages={props.totalPages}\n            onChange={props.onPageChange}\n            size={isSmall ? \"md\" : \"lg\"}\n          />\n        )}\n      </div>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Footer — left-side content\n// ---------------------------------------------------------------------------\n\ninterface SelectionLeftProps {\n  selectionState: SelectionState;\n  multiSelect: boolean;\n  selectedCount: number;\n  onView?: () => void;\n  isViewingSelected?: boolean;\n  onClear?: () => void;\n  isSmall: boolean;\n}\n\nfunction SelectionLeft({\n  selectionState,\n  multiSelect,\n  selectedCount,\n  onView,\n  isViewingSelected = false,\n  onClear,\n  isSmall,\n}: SelectionLeftProps) {\n  const message = getSelectionMessage(\n    selectionState,\n    multiSelect,\n    selectedCount,\n    isViewingSelected\n  );\n  const hasSelection = selectionState !== \"none\";\n  // Show buttons when items are selected OR when the view filter is active\n  const showActions = hasSelection || isViewingSelected;\n\n  return (\n    <div className=\"flex flex-row gap-1 items-center justify-center w-fit flex-shrink-0 h-fit px-1\">\n      {isSmall ? (\n        <Text\n          secondaryAction={hasSelection}\n          secondaryBody={!hasSelection}\n          text03\n        >\n          {message}\n        </Text>\n      ) : (\n        <Text mainUiBody={hasSelection} mainUiMuted={!hasSelection} text03>\n          {message}\n        </Text>\n      )}\n\n      {showActions && (\n        <div className=\"flex flex-row items-center w-fit flex-shrink-0 h-fit\">\n          {onView && (\n            <SelectButton\n              icon={SvgEye}\n              state={isViewingSelected ? \"selected\" : \"empty\"}\n              onClick={onView}\n              tooltip=\"View selected\"\n              size={isSmall ? \"sm\" : \"md\"}\n            />\n          )}\n          {onClear && (\n            <Button\n              icon={SvgXCircle}\n              onClick={onClear}\n              tooltip=\"Deselect all\"\n              size={isSmall ? \"sm\" : \"md\"}\n              prominence=\"tertiary\"\n            />\n          )}\n        </div>\n      )}\n    </div>\n  );\n}\n\ninterface SummaryLeftProps {\n  rangeStart: number;\n  rangeEnd: number;\n  totalItems: number;\n  units?: string;\n  isSmall: boolean;\n}\n\nfunction SummaryLeft({\n  rangeStart,\n  rangeEnd,\n  totalItems,\n  units,\n  isSmall,\n}: SummaryLeftProps) {\n  const suffix = units ? ` ${units}` : \"\";\n  return (\n    <div className=\"flex flex-row gap-1 items-center w-fit h-fit px-1\">\n      {isSmall ? (\n        <Text secondaryBody text03>\n          Showing{\" \"}\n          <Text as=\"span\" secondaryMono text03>\n            {rangeStart}~{rangeEnd}\n          </Text>{\" \"}\n          of{\" \"}\n          <Text as=\"span\" secondaryMono text03>\n            {totalItems}\n          </Text>\n          {suffix}\n        </Text>\n      ) : (\n        <Text mainUiMuted text03>\n          Showing{\" \"}\n          <Text as=\"span\" mainUiMono text03>\n            {rangeStart}~{rangeEnd}\n          </Text>{\" \"}\n          of{\" \"}\n          <Text as=\"span\" mainUiMono text03>\n            {totalItems}\n          </Text>\n          {suffix}\n        </Text>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/QualifierContainer.tsx",
    "content": "\"use client\";\n\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\n\ninterface QualifierContainerProps {\n  type: \"head\" | \"cell\";\n  children?: React.ReactNode;\n  /** Pass-through click handler (e.g. stopPropagation on body cells). */\n  onClick?: (e: React.MouseEvent) => void;\n}\n\nexport default function QualifierContainer({\n  type,\n  children,\n  onClick,\n}: QualifierContainerProps) {\n  const resolvedSize = useTableSize();\n\n  const Tag = type === \"head\" ? \"th\" : \"td\";\n\n  return (\n    <Tag\n      className=\"tbl-qualifier\"\n      data-type={type}\n      data-size={resolvedSize}\n      onClick={onClick}\n    >\n      <div className=\"flex h-full items-center justify-center\">{children}</div>\n    </Tag>\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/README.md",
    "content": "# Table\n\nConfig-driven table component with sorting, pagination, column visibility,\nrow selection, drag-and-drop reordering, and server-side mode.\n\n## Usage\n\n```tsx\nimport { Table, createTableColumns } from \"@opal/components\";\nimport { SvgUser } from \"@opal/icons\";\n\ninterface User {\n  id: string;\n  email: string;\n  name: string | null;\n  status: \"active\" | \"invited\";\n}\n\nconst tc = createTableColumns<User>();\n\nconst columns = [\n  tc.qualifier({ content: \"icon\", getContent: () => SvgUser }),\n  tc.column(\"email\", {\n    header: \"Name\",\n    weight: 22,\n    cell: (email, row) => <span>{row.name ?? email}</span>,\n  }),\n  tc.column(\"status\", {\n    header: \"Status\",\n    weight: 14,\n    cell: (status) => <span>{status}</span>,\n  }),\n  tc.actions(),\n];\n\nfunction UsersTable({ users }: { users: User[] }) {\n  return (\n    <Table\n      data={users}\n      columns={columns}\n      getRowId={(r) => r.id}\n      pageSize={10}\n      footer={{}}\n    />\n  );\n}\n```\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `data` | `TData[]` | required | Row data array |\n| `columns` | `OnyxColumnDef<TData>[]` | required | Column definitions from `createTableColumns()` |\n| `getRowId` | `(row: TData) => string` | required | Unique row identifier |\n| `pageSize` | `number` | `10` | Rows per page (`Infinity` disables pagination) |\n| `size` | `\"md\" \\| \"lg\"` | `\"lg\"` | Density variant |\n| `footer` | `DataTableFooterConfig` | — | Footer configuration (mode is derived from `selectionBehavior`) |\n| `initialSorting` | `SortingState` | — | Initial sort state |\n| `initialColumnVisibility` | `VisibilityState` | — | Initial column visibility |\n| `draggable` | `DataTableDraggableConfig` | — | Enable drag-and-drop reordering |\n| `onSelectionChange` | `(ids: string[]) => void` | — | Selection callback |\n| `onRowClick` | `(row: TData) => void` | — | Row click handler |\n| `searchTerm` | `string` | — | Global text filter |\n| `height` | `number \\| string` | — | Max scrollable height |\n| `serverSide` | `ServerSideConfig` | — | Server-side pagination/sorting/filtering |\n| `emptyState` | `ReactNode` | — | Empty state content |\n\n## Column Builder\n\n`createTableColumns<TData>()` returns a builder with:\n\n- `tc.qualifier(opts)` — leading avatar/icon/checkbox column\n- `tc.column(accessor, opts)` — data column with sorting/resizing\n- `tc.displayColumn(opts)` — non-accessor custom column\n- `tc.actions(opts)` — trailing actions column with visibility/sorting popovers\n\n## Footer\n\nThe footer mode is derived automatically from `selectionBehavior`:\n- **Selection footer** (when `selectionBehavior` is `\"single-select\"` or `\"multi-select\"`) — shows selection count, optional view/clear buttons, count pagination\n- **Summary footer** (when `selectionBehavior` is `\"no-select\"` or omitted) — shows \"Showing X\\~Y of Z\", list pagination, optional extra element\n"
  },
  {
    "path": "web/lib/opal/src/components/table/Table.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Table, createTableColumns } from \"@opal/components\";\nimport { SvgUser } from \"@opal/icons\";\n\n// ---------------------------------------------------------------------------\n// Sample data\n// ---------------------------------------------------------------------------\n\ninterface User {\n  id: string;\n  email: string;\n  name: string;\n  role: \"admin\" | \"user\" | \"viewer\";\n  status: \"active\" | \"invited\" | \"inactive\";\n}\n\nconst USERS: User[] = [\n  {\n    id: \"1\",\n    email: \"alice@example.com\",\n    name: \"Alice Johnson\",\n    role: \"admin\",\n    status: \"active\",\n  },\n  {\n    id: \"2\",\n    email: \"bob@example.com\",\n    name: \"Bob Smith\",\n    role: \"user\",\n    status: \"active\",\n  },\n  {\n    id: \"3\",\n    email: \"carol@example.com\",\n    name: \"Carol White\",\n    role: \"viewer\",\n    status: \"invited\",\n  },\n  {\n    id: \"4\",\n    email: \"dave@example.com\",\n    name: \"Dave Brown\",\n    role: \"user\",\n    status: \"inactive\",\n  },\n  {\n    id: \"5\",\n    email: \"eve@example.com\",\n    name: \"Eve Davis\",\n    role: \"admin\",\n    status: \"active\",\n  },\n  {\n    id: \"6\",\n    email: \"frank@example.com\",\n    name: \"Frank Miller\",\n    role: \"viewer\",\n    status: \"active\",\n  },\n  {\n    id: \"7\",\n    email: \"grace@example.com\",\n    name: \"Grace Lee\",\n    role: \"user\",\n    status: \"invited\",\n  },\n  {\n    id: \"8\",\n    email: \"hank@example.com\",\n    name: \"Hank Wilson\",\n    role: \"user\",\n    status: \"active\",\n  },\n  {\n    id: \"9\",\n    email: \"iris@example.com\",\n    name: \"Iris Taylor\",\n    role: \"viewer\",\n    status: \"active\",\n  },\n  {\n    id: \"10\",\n    email: \"jack@example.com\",\n    name: \"Jack Moore\",\n    role: \"admin\",\n    status: \"active\",\n  },\n  {\n    id: \"11\",\n    email: \"kate@example.com\",\n    name: \"Kate Anderson\",\n    role: \"user\",\n    status: \"inactive\",\n  },\n  {\n    id: \"12\",\n    email: \"leo@example.com\",\n    name: \"Leo Thomas\",\n    role: \"viewer\",\n    status: \"active\",\n  },\n];\n\n// ---------------------------------------------------------------------------\n// Columns\n// ---------------------------------------------------------------------------\n\nconst tc = createTableColumns<User>();\n\nconst columns = [\n  tc.qualifier({\n    content: \"icon\",\n    getContent: () => SvgUser,\n    background: true,\n  }),\n  tc.column(\"name\", { header: \"Name\", weight: 25 }),\n  tc.column(\"email\", { header: \"Email\", weight: 30 }),\n  tc.column(\"role\", { header: \"Role\", weight: 15 }),\n  tc.column(\"status\", { header: \"Status\", weight: 15 }),\n  tc.actions(),\n];\n\n// ---------------------------------------------------------------------------\n// Story\n// ---------------------------------------------------------------------------\n\nconst meta: Meta<typeof Table> = {\n  title: \"opal/components/Table\",\n  component: Table,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Table>;\n\nexport const Default: Story = {\n  render: () => (\n    <Table\n      data={USERS}\n      columns={columns}\n      getRowId={(r) => r.id}\n      pageSize={8}\n      footer={{}}\n    />\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableBody.tsx",
    "content": "\"use client\";\n\nimport type { ReactNode } from \"react\";\nimport {\n  DndContext,\n  DragOverlay,\n  type DragStartEvent,\n  type DragEndEvent,\n  type CollisionDetection,\n  type Modifier,\n  type SensorDescriptor,\n  type SensorOptions,\n} from \"@dnd-kit/core\";\nimport {\n  SortableContext,\n  verticalListSortingStrategy,\n} from \"@dnd-kit/sortable\";\nimport type { WithoutStyles } from \"@/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface DraggableProps {\n  dndContextProps: {\n    sensors: SensorDescriptor<SensorOptions>[];\n    collisionDetection: CollisionDetection;\n    modifiers: Modifier[];\n    onDragStart: (event: DragStartEvent) => void;\n    onDragEnd: (event: DragEndEvent) => void;\n    onDragCancel: () => void;\n  };\n  sortableItems: string[];\n  activeId: string | null;\n  isEnabled: boolean;\n}\n\ninterface TableBodyProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLTableSectionElement>> {\n  ref?: React.Ref<HTMLTableSectionElement>;\n  /** DnD context props from useDraggableRows — enables drag-and-drop reordering */\n  dndSortable?: DraggableProps;\n  /** Render function for the drag overlay row */\n  renderDragOverlay?: (activeId: string) => ReactNode;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nfunction TableBody({\n  ref,\n  dndSortable,\n  renderDragOverlay,\n  ...props\n}: TableBodyProps) {\n  if (dndSortable?.isEnabled) {\n    const { dndContextProps, sortableItems, activeId } = dndSortable;\n    return (\n      <DndContext\n        sensors={dndContextProps.sensors}\n        collisionDetection={dndContextProps.collisionDetection}\n        modifiers={dndContextProps.modifiers}\n        onDragStart={dndContextProps.onDragStart}\n        onDragEnd={dndContextProps.onDragEnd}\n        onDragCancel={dndContextProps.onDragCancel}\n      >\n        <SortableContext\n          items={sortableItems}\n          strategy={verticalListSortingStrategy}\n        >\n          <tbody ref={ref} {...props} />\n        </SortableContext>\n        <DragOverlay dropAnimation={null}>\n          {activeId && renderDragOverlay ? renderDragOverlay(activeId) : null}\n        </DragOverlay>\n      </DndContext>\n    );\n  }\n\n  return <tbody ref={ref} {...props} />;\n}\n\nexport default TableBody;\nexport type { TableBodyProps, DraggableProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableCell.tsx",
    "content": "import { cn } from \"@opal/utils\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { WithoutStyles } from \"@/types\";\n\ninterface TableCellProps\n  extends WithoutStyles<React.TdHTMLAttributes<HTMLTableCellElement>> {\n  children: React.ReactNode;\n  /** Explicit pixel width for the cell. */\n  width?: number;\n}\n\nexport default function TableCell({\n  width,\n  children,\n  ...props\n}: TableCellProps) {\n  const resolvedSize = useTableSize();\n  return (\n    <td\n      className=\"tbl-cell overflow-hidden\"\n      data-size={resolvedSize}\n      style={width != null ? { width } : undefined}\n      {...props}\n    >\n      <div\n        className={cn(\"tbl-cell-inner\", \"flex items-center overflow-hidden\")}\n        data-size={resolvedSize}\n      >\n        {children}\n      </div>\n    </td>\n  );\n}\n\nexport type { TableCellProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableElement.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@opal/utils\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { WithoutStyles } from \"@/types\";\nimport type { ExtremaSizeVariants, SizeVariants } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype TableSize = Extract<SizeVariants, \"md\" | \"lg\">;\ntype TableVariant = \"rows\" | \"cards\";\ntype SelectionBehavior = \"no-select\" | \"single-select\" | \"multi-select\";\n\ninterface TableProps\n  extends WithoutStyles<React.TableHTMLAttributes<HTMLTableElement>> {\n  ref?: React.Ref<HTMLTableElement>;\n  /** Visual row variant. @default \"cards\" */\n  variant?: TableVariant;\n  /** Row selection behavior. @default \"no-select\" */\n  selectionBehavior?: SelectionBehavior;\n  /** Height behavior. `\"fit\"` = shrink to content, `\"full\"` = fill available space. */\n  heightVariant?: ExtremaSizeVariants;\n  /** Explicit pixel width for the table (e.g. from `table.getTotalSize()`).\n   *  When provided the table uses exactly this width instead of stretching\n   *  to fill its container, which prevents `table-layout: fixed` from\n   *  redistributing extra space across columns on resize. */\n  width?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nfunction Table({\n  ref,\n  variant = \"cards\",\n  selectionBehavior = \"no-select\",\n  heightVariant,\n  width,\n  ...props\n}: TableProps) {\n  const size = useTableSize();\n  return (\n    <table\n      ref={ref}\n      className={cn(\"border-separate border-spacing-0\", !width && \"min-w-full\")}\n      style={{ width }}\n      data-size={size}\n      data-variant={variant}\n      data-selection={selectionBehavior}\n      data-height={heightVariant}\n      {...props}\n    />\n  );\n}\n\nexport default Table;\nexport type { TableProps, TableSize, TableVariant, SelectionBehavior };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableHead.tsx",
    "content": "import { cn } from \"@opal/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { WithoutStyles } from \"@/types\";\nimport { Button } from \"@opal/components\";\nimport { SvgChevronDown, SvgChevronUp, SvgHandle, SvgSort } from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\n\nexport type SortDirection = \"none\" | \"ascending\" | \"descending\";\n\n/**\n * A table header cell with optional sort controls and a resize handle indicator.\n * Renders as a `<th>` element with Figma-matched typography and spacing.\n */\ninterface TableHeadCustomProps {\n  /** Header label content. */\n  children: React.ReactNode;\n  /** Current sort state. When omitted, no sort button is shown. */\n  sorted?: SortDirection;\n  /** Called when the sort button is clicked. Required to show the sort button. */\n  onSort?: () => void;\n  /** When `true`, renders a thin resize handle on the right edge. */\n  resizable?: boolean;\n  /** Called when a resize drag begins on the handle. Attach TanStack's\n   *  `header.getResizeHandler()` here to enable column resizing. */\n  onResizeStart?: (event: React.MouseEvent | React.TouchEvent) => void;\n  /** Override the sort icon for this column. Receives the current sort state and\n   *  returns the icon component to render. Falls back to the built-in icons. */\n  icon?: (sorted: SortDirection) => IconFunctionComponent;\n  /** Text alignment for the column. Defaults to `\"left\"`. */\n  alignment?: \"left\" | \"center\" | \"right\";\n  /** Column width in pixels. Applied as an inline style on the `<th>`. */\n  width?: number;\n  /** When `true`, shows a bottom border on hover. Defaults to `true`. */\n  bottomBorder?: boolean;\n}\n\ntype TableHeadProps = WithoutStyles<\n  TableHeadCustomProps &\n    Omit<\n      React.ThHTMLAttributes<HTMLTableCellElement>,\n      keyof TableHeadCustomProps\n    >\n>;\n\n/**\n * Table header cell primitive. Displays a column label with optional sort\n * functionality and a resize handle indicator.\n */\nfunction defaultSortIcon(sorted: SortDirection): IconFunctionComponent {\n  switch (sorted) {\n    case \"ascending\":\n      return SvgChevronUp;\n    case \"descending\":\n      return SvgChevronDown;\n    default:\n      return SvgSort;\n  }\n}\n\nconst alignmentThClass = {\n  left: \"text-left\",\n  center: \"text-center\",\n  right: \"text-right\",\n} as const;\n\nconst alignmentFlexClass = {\n  left: \"justify-start\",\n  center: \"justify-center\",\n  right: \"justify-end\",\n} as const;\n\nexport default function TableHead({\n  children,\n  sorted,\n  onSort,\n  icon: iconFn = defaultSortIcon,\n  resizable,\n  onResizeStart,\n  alignment = \"left\",\n  width,\n  bottomBorder = true,\n  ...thProps\n}: TableHeadProps) {\n  const resolvedSize = useTableSize();\n  const isSmall = resolvedSize === \"md\";\n  return (\n    <th\n      {...thProps}\n      style={width != null ? { width } : undefined}\n      className={cn(\"table-head group\", alignmentThClass[alignment])}\n      data-size={resolvedSize}\n      data-bottom-border={bottomBorder || undefined}\n    >\n      <div className=\"flex items-center gap-1\">\n        <div className=\"table-head-label\">\n          <Text\n            mainUiAction={!isSmall}\n            secondaryAction={isSmall}\n            text04\n            className=\"truncate\"\n          >\n            {children}\n          </Text>\n        </div>\n        <div\n          className={cn(\n            \"table-head-sort\",\n            \"opacity-0 group-hover:opacity-100 transition-opacity\"\n          )}\n        >\n          {onSort && (\n            <Button\n              icon={iconFn(sorted ?? \"none\")}\n              onClick={onSort}\n              tooltip=\"Sort\"\n              tooltipSide=\"top\"\n              prominence=\"internal\"\n              size=\"sm\"\n            />\n          )}\n        </div>\n      </div>\n      {resizable && (\n        <div\n          onMouseDown={onResizeStart}\n          onTouchStart={onResizeStart}\n          className={cn(\n            \"absolute right-0 top-0 flex h-full items-center\",\n            \"text-border-02\",\n            \"opacity-0 group-hover:opacity-100\",\n            \"cursor-col-resize\",\n            \"select-none touch-none\"\n          )}\n        >\n          <SvgHandle size={22} className=\"stroke-border-02\" />\n        </div>\n      )}\n    </th>\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableHeader.tsx",
    "content": "import type { WithoutStyles } from \"@/types\";\n\ninterface TableHeaderProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLTableSectionElement>> {\n  ref?: React.Ref<HTMLTableSectionElement>;\n}\n\nfunction TableHeader({ ref, ...props }: TableHeaderProps) {\n  return <thead ref={ref} {...props} />;\n}\n\nexport default TableHeader;\nexport type { TableHeaderProps };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableQualifier.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@opal/utils\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport type { QualifierContentType } from \"@opal/components/table/types\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\n\ninterface TableQualifierProps {\n  /** Content type displayed in the qualifier */\n  content: QualifierContentType;\n  /** Disables interaction */\n  disabled?: boolean;\n  /** Whether to show a selection checkbox overlay */\n  selectable?: boolean;\n  /** Whether the row is currently selected */\n  selected?: boolean;\n  /** Called when the checkbox is toggled */\n  onSelectChange?: (selected: boolean) => void;\n  /** Icon component to render (for \"icon\" content). */\n  icon?: IconFunctionComponent;\n  /** Image source URL (for \"image\" content). */\n  imageSrc?: string;\n  /** Image alt text (for \"image\" content). */\n  imageAlt?: string;\n  /** Show a tinted background container behind the content. */\n  background?: boolean;\n  /** Icon size preset. `\"lg\"` = 28/24, `\"md\"` = 20/16. @default \"md\" */\n  iconSize?: \"lg\" | \"md\";\n}\n\nconst iconSizesMap = {\n  lg: { lg: 28, md: 24 },\n  md: { lg: 20, md: 16 },\n} as const;\n\nfunction getOverlayStyles(selected: boolean, disabled: boolean) {\n  if (disabled) {\n    return selected ? \"flex bg-action-link-00\" : \"hidden\";\n  }\n  if (selected) {\n    return \"flex bg-action-link-00\";\n  }\n  return \"flex opacity-0 group-hover/row:opacity-100 group-focus-within/row:opacity-100 bg-background-tint-01\";\n}\n\nfunction TableQualifier({\n  content,\n  disabled = false,\n  selectable = false,\n  selected = false,\n  onSelectChange,\n  icon: Icon,\n  imageSrc,\n  imageAlt = \"\",\n  background = false,\n  iconSize: iconSizePreset = \"md\",\n}: TableQualifierProps) {\n  const resolvedSize = useTableSize();\n  const iconSize = iconSizesMap[iconSizePreset][resolvedSize];\n  const overlayStyles = getOverlayStyles(selected, disabled);\n\n  function renderContent() {\n    switch (content) {\n      case \"icon\":\n        return Icon ? <Icon size={iconSize} /> : null;\n\n      case \"image\":\n        return imageSrc ? (\n          <img\n            src={imageSrc}\n            alt={imageAlt}\n            className=\"h-full w-full rounded-08 object-cover\"\n          />\n        ) : null;\n\n      case \"simple\":\n      default:\n        return null;\n    }\n  }\n\n  const inner = renderContent();\n  const showBackground = background && content !== \"simple\";\n\n  return (\n    <div\n      className={cn(\n        \"group relative inline-flex shrink-0 items-center justify-center\",\n        resolvedSize === \"lg\" ? \"h-9 w-9\" : \"h-7 w-7\",\n        disabled ? \"cursor-not-allowed\" : \"cursor-default\"\n      )}\n    >\n      {showBackground ? (\n        <div\n          className={cn(\n            \"flex items-center justify-center overflow-hidden rounded-08 transition-colors\",\n            resolvedSize === \"lg\" ? \"h-9 w-9\" : \"h-7 w-7\",\n            disabled\n              ? \"bg-background-neutral-03\"\n              : selected\n                ? \"bg-action-link-00\"\n                : \"bg-background-tint-01\"\n          )}\n        >\n          {inner}\n        </div>\n      ) : (\n        inner\n      )}\n\n      {/* Selection overlay */}\n      {selectable && (\n        <div\n          className={cn(\n            \"absolute inset-0 items-center justify-center rounded-08\",\n            content === \"simple\" ? \"flex\" : overlayStyles\n          )}\n        >\n          <Checkbox\n            checked={selected}\n            onCheckedChange={onSelectChange}\n            disabled={disabled}\n          />\n        </div>\n      )}\n    </div>\n  );\n}\n\nexport default TableQualifier;\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableRow.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@opal/utils\";\nimport { useTableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { WithoutStyles } from \"@/types\";\nimport { useSortable } from \"@dnd-kit/sortable\";\nimport { CSS } from \"@dnd-kit/utilities\";\nimport { SvgHandle } from \"@opal/icons\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface TableRowProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLTableRowElement>> {\n  ref?: React.Ref<HTMLTableRowElement>;\n  selected?: boolean;\n  /** Disables interaction and applies disabled styling */\n  disabled?: boolean;\n  /** When provided, makes this row sortable via @dnd-kit */\n  sortableId?: string;\n  /** Show drag handle overlay. Defaults to true when sortableId is set. */\n  showDragHandle?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Internal: sortable row\n// ---------------------------------------------------------------------------\n\nfunction SortableTableRow({\n  sortableId,\n  showDragHandle = true,\n  selected,\n  disabled,\n  ref: _externalRef,\n  children,\n  ...props\n}: TableRowProps) {\n  const resolvedSize = useTableSize();\n\n  const {\n    attributes,\n    listeners,\n    setNodeRef,\n    transform,\n    transition,\n    isDragging,\n  } = useSortable({ id: sortableId! });\n\n  const style: React.CSSProperties = {\n    transform: CSS.Transform.toString(transform),\n    transition,\n    opacity: isDragging ? 0 : undefined,\n  };\n\n  return (\n    <tr\n      ref={setNodeRef}\n      style={style}\n      className=\"tbl-row group/row\"\n      data-drag-handle={showDragHandle || undefined}\n      data-selected={selected || undefined}\n      data-disabled={disabled || undefined}\n      {...attributes}\n      {...props}\n    >\n      {children}\n      {showDragHandle && (\n        <td\n          style={{\n            width: 0,\n            padding: 0,\n            position: \"relative\",\n            zIndex: 20,\n          }}\n        >\n          <button\n            type=\"button\"\n            className={cn(\n              \"absolute right-0 top-1/2 -translate-y-1/2 cursor-grab\",\n              \"opacity-0 group-hover/row:opacity-100 transition-opacity\",\n              \"flex items-center justify-center rounded\"\n            )}\n            aria-label=\"Drag to reorder\"\n            onMouseDown={(e) => e.preventDefault()}\n            {...listeners}\n          >\n            <SvgHandle\n              size={resolvedSize === \"md\" ? 12 : 16}\n              className=\"text-border-02\"\n            />\n          </button>\n        </td>\n      )}\n    </tr>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Main component\n// ---------------------------------------------------------------------------\n\nexport default function TableRow({\n  sortableId,\n  showDragHandle,\n  selected,\n  disabled,\n  ref,\n  ...props\n}: TableRowProps) {\n  if (sortableId) {\n    return (\n      <SortableTableRow\n        sortableId={sortableId}\n        showDragHandle={showDragHandle}\n        selected={selected}\n        disabled={disabled}\n        ref={ref}\n        {...props}\n      />\n    );\n  }\n\n  return (\n    <tr\n      ref={ref}\n      className=\"tbl-row group/row\"\n      data-selected={selected || undefined}\n      data-disabled={disabled || undefined}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/TableSizeContext.tsx",
    "content": "\"use client\";\n\nimport { createContext, useContext } from \"react\";\nimport type { SizeVariants } from \"@opal/types\";\n\ntype TableSize = Extract<SizeVariants, \"md\" | \"lg\">;\n\nconst TableSizeContext = createContext<TableSize>(\"lg\");\n\ninterface TableSizeProviderProps {\n  size: TableSize;\n  children: React.ReactNode;\n}\n\nfunction TableSizeProvider({ size, children }: TableSizeProviderProps) {\n  return (\n    <TableSizeContext.Provider value={size}>\n      {children}\n    </TableSizeContext.Provider>\n  );\n}\n\nfunction useTableSize(): TableSize {\n  return useContext(TableSizeContext);\n}\n\nexport { TableSizeProvider, useTableSize };\nexport type { TableSize };\n"
  },
  {
    "path": "web/lib/opal/src/components/table/columns.ts",
    "content": "import type { ReactNode } from \"react\";\nimport {\n  createColumnHelper,\n  type ColumnDef,\n  type DeepKeys,\n  type DeepValue,\n  type CellContext,\n} from \"@tanstack/react-table\";\nimport type {\n  ColumnWidth,\n  QualifierContentType,\n  OnyxQualifierColumn,\n  OnyxDataColumn,\n  OnyxDisplayColumn,\n  OnyxActionsColumn,\n} from \"@opal/components/table/types\";\nimport type { TableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport type { SortDirection } from \"@opal/components/table/TableHead\";\n\n// ---------------------------------------------------------------------------\n// Qualifier column config\n// ---------------------------------------------------------------------------\n\ninterface QualifierConfig<TData> {\n  /** Content type for body-row `<TableQualifier>`. @default \"simple\" */\n  content?: QualifierContentType;\n  /** Return the icon component to render for a row (for \"icon\" content). */\n  getContent?: (row: TData) => IconFunctionComponent;\n  /** Return the image URL to render for a row (for \"image\" content). */\n  getImageSrc?: (row: TData) => string;\n  /** Return the image alt text for a row (for \"image\" content). @default \"\" */\n  getImageAlt?: (row: TData) => string;\n  /** Show a tinted background container behind the content. @default false */\n  background?: boolean;\n  /** Icon size preset. `\"lg\"` = 28/24, `\"md\"` = 20/16. @default \"md\" */\n  iconSize?: \"lg\" | \"md\";\n}\n\n// ---------------------------------------------------------------------------\n// Data column config\n// ---------------------------------------------------------------------------\n\ninterface DataColumnConfig<TData, TValue> {\n  /** Column header label. */\n  header: string;\n  /** Custom cell renderer. If omitted, the value is rendered as a string. */\n  cell?: (value: TValue, row: TData) => ReactNode;\n  /** Enable sorting for this column. @default true */\n  enableSorting?: boolean;\n  /** Enable resizing for this column. @default true */\n  enableResizing?: boolean;\n  /** Enable hiding for this column. @default true */\n  enableHiding?: boolean;\n  /** Override the sort icon for this column. */\n  icon?: (sorted: SortDirection) => IconFunctionComponent;\n  /** Column weight for proportional distribution. @default 20 */\n  weight?: number;\n}\n\n// ---------------------------------------------------------------------------\n// Display column config\n// ---------------------------------------------------------------------------\n\ninterface DisplayColumnConfig<TData> {\n  /** Unique column ID. */\n  id: string;\n  /** Column header label. */\n  header?: string;\n  /** Cell renderer. */\n  cell: (row: TData) => ReactNode;\n  /** Column width config. */\n  width: ColumnWidth;\n  /** Enable hiding. @default true */\n  enableHiding?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Actions column config\n// ---------------------------------------------------------------------------\n\ninterface ActionsConfig<TData = any> {\n  /** Show column visibility popover. @default true */\n  showColumnVisibility?: boolean;\n  /** Show sorting popover. @default true */\n  showSorting?: boolean;\n  /** Footer text for the sorting popover. */\n  sortingFooterText?: string;\n  /** Optional cell renderer for row-level action buttons. */\n  cell?: (row: TData) => ReactNode;\n}\n\n// ---------------------------------------------------------------------------\n// Builder return type\n// ---------------------------------------------------------------------------\n\ninterface TableColumnsBuilder<TData> {\n  /** Create a qualifier (leading avatar/checkbox) column. */\n  qualifier(config?: QualifierConfig<TData>): OnyxQualifierColumn<TData>;\n\n  /** Create a data (accessor) column. */\n  column<TKey extends DeepKeys<TData>>(\n    accessor: TKey,\n    config: DataColumnConfig<TData, DeepValue<TData, TKey>>\n  ): OnyxDataColumn<TData>;\n\n  /** Create a display (non-accessor) column. */\n  displayColumn(config: DisplayColumnConfig<TData>): OnyxDisplayColumn<TData>;\n\n  /** Create an actions column (visibility/sorting popovers). */\n  actions(config?: ActionsConfig<TData>): OnyxActionsColumn<TData>;\n}\n\n// ---------------------------------------------------------------------------\n// Factory\n// ---------------------------------------------------------------------------\n\n/**\n * Creates a typed column builder for a given row type.\n *\n * Internally uses TanStack's `createColumnHelper<TData>()` to get free\n * `DeepKeys`/`DeepValue` inference for accessor columns.\n *\n * **Important**: Define columns at module scope or wrap in `useMemo` to avoid\n * creating new array references per render.\n *\n * @example\n * ```ts\n * const tc = createTableColumns<TeamMember>();\n * const columns = [\n *   tc.qualifier({ content: \"icon\", getContent: (r) => UserIcon }),\n *   tc.column(\"name\", { header: \"Name\", weight: 23 }),\n *   tc.column(\"email\", { header: \"Email\", weight: 28 }),\n *   tc.actions(),\n * ];\n * ```\n */\nexport function createTableColumns<TData>(): TableColumnsBuilder<TData> {\n  const helper = createColumnHelper<TData>();\n\n  return {\n    qualifier(config?: QualifierConfig<TData>): OnyxQualifierColumn<TData> {\n      const content = config?.content ?? \"simple\";\n\n      const def: ColumnDef<TData, any> = helper.display({\n        id: \"qualifier\",\n        enableResizing: false,\n        enableSorting: false,\n        enableHiding: false,\n        // Cell rendering is handled by DataTable based on the qualifier config\n        cell: () => null,\n      });\n\n      return {\n        kind: \"qualifier\",\n        id: \"qualifier\",\n        def,\n        width: (size: TableSize) =>\n          size === \"md\" ? { fixed: 36 } : { fixed: 44 },\n        content,\n        getContent: config?.getContent,\n        getImageSrc: config?.getImageSrc,\n        getImageAlt: config?.getImageAlt,\n        background: config?.background,\n        iconSize: config?.iconSize,\n      };\n    },\n\n    column<TKey extends DeepKeys<TData>>(\n      accessor: TKey,\n      config: DataColumnConfig<TData, DeepValue<TData, TKey>>\n    ): OnyxDataColumn<TData> {\n      const {\n        header,\n        cell,\n        enableSorting = true,\n        enableResizing = true,\n        enableHiding = true,\n        icon,\n        weight = 20,\n      } = config;\n\n      const def = helper.accessor(accessor as any, {\n        header,\n        enableSorting,\n        enableResizing,\n        enableHiding,\n        cell: cell\n          ? (info: CellContext<TData, any>) =>\n              cell(info.getValue(), info.row.original)\n          : undefined,\n      }) as ColumnDef<TData, any>;\n\n      return {\n        kind: \"data\",\n        id: accessor as string,\n        def,\n        width: { weight, minWidth: Math.max(header.length * 8 + 40, 80) },\n        icon,\n      };\n    },\n\n    displayColumn(\n      config: DisplayColumnConfig<TData>\n    ): OnyxDisplayColumn<TData> {\n      const { id, header, cell, width, enableHiding = true } = config;\n\n      const def: ColumnDef<TData, any> = helper.display({\n        id,\n        header: header ?? undefined,\n        enableHiding,\n        enableSorting: false,\n        enableResizing: false,\n        cell: (info) => cell(info.row.original),\n      });\n\n      return {\n        kind: \"display\",\n        id,\n        def,\n        width,\n      };\n    },\n\n    actions(config?: ActionsConfig<TData>): OnyxActionsColumn<TData> {\n      const def: ColumnDef<TData, any> = {\n        id: \"__actions\",\n        enableHiding: false,\n        enableSorting: false,\n        enableResizing: false,\n        // Header rendering is handled by DataTable based on the actions config\n        header: () => null,\n        cell: config?.cell\n          ? (info: CellContext<TData, any>) => config.cell!(info.row.original)\n          : () => null,\n      };\n\n      const showVisibility = config?.showColumnVisibility ?? true;\n      const showSorting = config?.showSorting ?? true;\n      const buttonCount = (showVisibility ? 1 : 0) + (showSorting ? 1 : 0);\n\n      // Icon button sizes: \"md\" button = 28px, \"sm\" button = 24px\n      // px-1 on .tbl-actions = 4px each side = 8px total\n      const BUTTON_MD = 28;\n      const BUTTON_SM = 24;\n      const PADDING = 8;\n\n      return {\n        kind: \"actions\",\n        id: \"__actions\",\n        def,\n        width: (size: TableSize) => ({\n          fixed:\n            Math.max(\n              buttonCount * (size === \"md\" ? BUTTON_SM : BUTTON_MD),\n              size === \"md\" ? BUTTON_SM : BUTTON_MD\n            ) + PADDING,\n        }),\n        showColumnVisibility: showVisibility,\n        showSorting: showSorting,\n        sortingFooterText: config?.sortingFooterText,\n      };\n    },\n  };\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/components.tsx",
    "content": "\"use client\";\n\"use no memo\";\n\nimport \"@opal/components/table/styles.css\";\n\nimport { useEffect, useMemo } from \"react\";\nimport { flexRender } from \"@tanstack/react-table\";\nimport useDataTable, {\n  toOnyxSortDirection,\n} from \"@opal/components/table/hooks/useDataTable\";\nimport useColumnWidths from \"@opal/components/table/hooks/useColumnWidths\";\nimport useDraggableRows from \"@opal/components/table/hooks/useDraggableRows\";\nimport TableElement from \"@opal/components/table/TableElement\";\nimport TableHeader from \"@opal/components/table/TableHeader\";\nimport TableBody from \"@opal/components/table/TableBody\";\nimport TableRow from \"@opal/components/table/TableRow\";\nimport TableHead from \"@opal/components/table/TableHead\";\nimport TableCell from \"@opal/components/table/TableCell\";\nimport TableQualifier from \"@opal/components/table/TableQualifier\";\nimport QualifierContainer from \"@opal/components/table/QualifierContainer\";\nimport ActionsContainer from \"@opal/components/table/ActionsContainer\";\nimport DragOverlayRow from \"@opal/components/table/DragOverlayRow\";\nimport Footer from \"@opal/components/table/Footer\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { TableSizeProvider } from \"@opal/components/table/TableSizeContext\";\nimport { ColumnVisibilityPopover } from \"@opal/components/table/ColumnVisibilityPopover\";\nimport { SortingPopover } from \"@opal/components/table/ColumnSortabilityPopover\";\nimport type { WidthConfig } from \"@opal/components/table/hooks/useColumnWidths\";\nimport type { ColumnDef } from \"@tanstack/react-table\";\nimport { cn } from \"@opal/utils\";\nimport type {\n  DataTableProps as BaseDataTableProps,\n  DataTableFooterConfig,\n  OnyxColumnDef,\n  OnyxDataColumn,\n  OnyxQualifierColumn,\n  OnyxActionsColumn,\n} from \"@opal/components/table/types\";\nimport type { TableSize } from \"@opal/components/table/TableSizeContext\";\n\n// ---------------------------------------------------------------------------\n// SelectionBehavior\n// ---------------------------------------------------------------------------\n\ntype SelectionBehavior = \"no-select\" | \"single-select\" | \"multi-select\";\n\nexport type DataTableProps<TData> = BaseDataTableProps<TData> & {\n  /** Row selection behavior. @default \"no-select\" */\n  selectionBehavior?: SelectionBehavior;\n};\n\n// ---------------------------------------------------------------------------\n// Internal: resolve size-dependent widths and build TanStack columns\n// ---------------------------------------------------------------------------\n\ninterface ProcessedColumns<TData> {\n  tanstackColumns: ColumnDef<TData, any>[];\n  widthConfig: WidthConfig;\n  qualifierColumn: OnyxQualifierColumn<TData> | null;\n  /** Map from column ID → OnyxColumnDef for dispatch in render loops. */\n  columnKindMap: Map<string, OnyxColumnDef<TData>>;\n}\n\nfunction processColumns<TData>(\n  columns: OnyxColumnDef<TData>[],\n  size: TableSize\n): ProcessedColumns<TData> {\n  const tanstackColumns: ColumnDef<TData, any>[] = [];\n  const fixedColumnIds = new Set<string>();\n  const columnWeights: Record<string, number> = {};\n  const columnMinWidths: Record<string, number> = {};\n  const columnKindMap = new Map<string, OnyxColumnDef<TData>>();\n  let qualifierColumn: OnyxQualifierColumn<TData> | null = null;\n  let firstDataColumnSeen = false;\n\n  for (const col of columns) {\n    const resolvedWidth =\n      typeof col.width === \"function\" ? col.width(size) : col.width;\n\n    // Clone def to avoid mutating the caller's column definitions\n    const clonedDef: ColumnDef<TData, any> = {\n      ...col.def,\n      id: col.id,\n      size:\n        \"fixed\" in resolvedWidth ? resolvedWidth.fixed : resolvedWidth.weight,\n    };\n\n    // First data column is never hideable\n    if (col.kind === \"data\" && !firstDataColumnSeen) {\n      firstDataColumnSeen = true;\n      clonedDef.enableHiding = false;\n    }\n\n    tanstackColumns.push(clonedDef);\n\n    const id = col.id;\n    columnKindMap.set(id, col);\n\n    if (\"fixed\" in resolvedWidth) {\n      fixedColumnIds.add(id);\n    } else {\n      columnWeights[id] = resolvedWidth.weight;\n      columnMinWidths[id] = resolvedWidth.minWidth ?? 50;\n    }\n\n    if (col.kind === \"qualifier\") qualifierColumn = col;\n  }\n\n  return {\n    tanstackColumns,\n    widthConfig: { fixedColumnIds, columnWeights, columnMinWidths },\n    qualifierColumn,\n    columnKindMap,\n  };\n}\n\n// ---------------------------------------------------------------------------\n// DataTable component\n// ---------------------------------------------------------------------------\n\n/**\n * Config-driven table component that wires together `useDataTable`,\n * `useColumnWidths`, and `useDraggableRows` automatically.\n *\n * Full flexibility via the column definitions from `createTableColumns()`.\n *\n * @example\n * ```tsx\n * const tc = createTableColumns<TeamMember>();\n * const columns = [\n *   tc.qualifier({ content: \"icon\", getContent: (r) => UserIcon }),\n *   tc.column(\"name\", { header: \"Name\", weight: 23 }),\n *   tc.column(\"email\", { header: \"Email\", weight: 28 }),\n *   tc.actions(),\n * ];\n *\n * <Table data={items} columns={columns} footer={{}} />\n * ```\n */\nexport function Table<TData>(props: DataTableProps<TData>) {\n  const {\n    data,\n    columns,\n    getRowId,\n    pageSize,\n    initialSorting,\n    initialColumnVisibility,\n    initialRowSelection,\n    initialViewSelected,\n    draggable,\n    footer,\n    size = \"lg\",\n    variant = \"cards\",\n    selectionBehavior = \"no-select\",\n    onSelectionChange,\n    onRowClick,\n    searchTerm,\n    height,\n    serverSide,\n    emptyState,\n  } = props;\n\n  const effectivePageSize = pageSize ?? (footer ? 10 : data.length);\n\n  // Whether the qualifier column should exist in the DOM.\n  // Derived from the column definitions: if a qualifier column exists with\n  // content !== \"simple\", always show it. If content === \"simple\" (or no\n  // qualifier column defined), show only for multi-select (checkboxes).\n  const qualifierColDef = columns.find(\n    (c): c is OnyxQualifierColumn<TData> => c.kind === \"qualifier\"\n  );\n  const hasQualifierColumn =\n    (qualifierColDef != null && qualifierColDef.content !== \"simple\") ||\n    selectionBehavior === \"multi-select\";\n\n  // 1. Process columns (memoized on columns + size)\n  const { tanstackColumns, widthConfig, qualifierColumn, columnKindMap } =\n    useMemo(() => {\n      const processed = processColumns(columns, size);\n      if (!hasQualifierColumn) {\n        // Remove qualifier from TanStack columns and width config entirely\n        return {\n          ...processed,\n          tanstackColumns: processed.tanstackColumns.filter(\n            (c) => c.id !== \"qualifier\"\n          ),\n          widthConfig: {\n            ...processed.widthConfig,\n            fixedColumnIds: new Set(\n              Array.from(processed.widthConfig.fixedColumnIds).filter(\n                (id) => id !== \"qualifier\"\n              )\n            ),\n          },\n          qualifierColumn: null,\n        };\n      }\n      return processed;\n    }, [columns, size, hasQualifierColumn]);\n\n  // 2. Call useDataTable\n  const {\n    table,\n    currentPage,\n    totalPages,\n    totalItems,\n    setPage,\n    pageSize: resolvedPageSize,\n    selectionState,\n    selectedCount,\n    selectedRowIds,\n    clearSelection,\n    toggleAllPageRowsSelected,\n    toggleAllRowsSelected,\n    isAllPageRowsSelected,\n    isAllRowsSelected,\n    isViewingSelected,\n    enterViewMode,\n    exitViewMode,\n  } = useDataTable({\n    data,\n    columns: tanstackColumns,\n    pageSize: effectivePageSize,\n    initialSorting,\n    initialColumnVisibility,\n    initialRowSelection,\n    initialViewSelected,\n    getRowId,\n    onSelectionChange,\n    searchTerm,\n    serverSide: serverSide\n      ? {\n          totalItems: serverSide.totalItems,\n          onSortingChange: serverSide.onSortingChange,\n          onPaginationChange: serverSide.onPaginationChange,\n          onSearchTermChange: serverSide.onSearchTermChange,\n        }\n      : undefined,\n  });\n\n  // 3. Call useColumnWidths\n  const { containerRef, columnWidths, createResizeHandler } = useColumnWidths({\n    headers: table.getHeaderGroups()[0]?.headers ?? [],\n    ...widthConfig,\n  });\n\n  // 4. Call useDraggableRows (conditional — disabled in server-side mode)\n  useEffect(() => {\n    if (process.env.NODE_ENV !== \"production\" && serverSide && draggable) {\n      console.warn(\n        \"DataTable: `draggable` is ignored when `serverSide` is enabled. \" +\n          \"Drag-and-drop reordering is not supported with server-side pagination.\"\n      );\n    }\n  }, [!!serverSide, !!draggable]); // eslint-disable-line react-hooks/exhaustive-deps\n  const effectiveDraggable = serverSide ? undefined : draggable;\n  const draggableReturn = useDraggableRows({\n    data,\n    getRowId,\n    enabled: !!effectiveDraggable && table.getState().sorting.length === 0,\n    onReorder: effectiveDraggable?.onReorder,\n  });\n\n  const hasDraggable = !!effectiveDraggable;\n\n  const isSelectable = selectionBehavior !== \"no-select\";\n  const isMultiSelect = selectionBehavior === \"multi-select\";\n  // Checkboxes appear for any selectable table\n  const showQualifierCheckbox = isSelectable;\n\n  // ---------------------------------------------------------------------------\n  // Render\n  // ---------------------------------------------------------------------------\n\n  const isServerLoading = !!serverSide?.isLoading;\n\n  function renderFooter(footerConfig: DataTableFooterConfig) {\n    // Mode derived from selectionBehavior — single/multi-select use selection\n    // footer, no-select uses summary footer.\n    if (isSelectable) {\n      return (\n        <Footer\n          mode=\"selection\"\n          multiSelect={isMultiSelect}\n          selectionState={selectionState}\n          selectedCount={selectedCount}\n          onClear={\n            footerConfig.onClear ??\n            (() => {\n              if (isViewingSelected) exitViewMode();\n              clearSelection();\n            })\n          }\n          onView={\n            !serverSide\n              ? isViewingSelected\n                ? exitViewMode\n                : enterViewMode\n              : undefined\n          }\n          isViewingSelected={isViewingSelected}\n          pageSize={resolvedPageSize}\n          totalItems={totalItems}\n          currentPage={currentPage}\n          totalPages={totalPages}\n          onPageChange={setPage}\n          units={footerConfig.units}\n        />\n      );\n    }\n\n    // Summary mode (no-select only)\n    const rangeStart =\n      totalItems === 0\n        ? 0\n        : !isFinite(resolvedPageSize)\n          ? 1\n          : (currentPage - 1) * resolvedPageSize + 1;\n    const rangeEnd = !isFinite(resolvedPageSize)\n      ? totalItems\n      : Math.min(currentPage * resolvedPageSize, totalItems);\n\n    return (\n      <Footer\n        mode=\"summary\"\n        rangeStart={rangeStart}\n        rangeEnd={rangeEnd}\n        totalItems={totalItems}\n        currentPage={currentPage}\n        totalPages={totalPages}\n        onPageChange={setPage}\n        leftExtra={footerConfig.leftExtra}\n        units={footerConfig.units}\n      />\n    );\n  }\n\n  return (\n    <TableSizeProvider size={size}>\n      <div>\n        <div\n          className={cn(\n            \"overflow-x-auto transition-opacity duration-150\",\n            isServerLoading && \"opacity-50 pointer-events-none\"\n          )}\n          ref={containerRef}\n          style={{\n            ...(height != null\n              ? {\n                  maxHeight:\n                    typeof height === \"number\" ? `${height}px` : height,\n                  overflowY: \"auto\" as const,\n                }\n              : undefined),\n          }}\n        >\n          <TableElement\n            variant={variant}\n            selectionBehavior={selectionBehavior}\n            width={\n              Object.keys(columnWidths).length > 0\n                ? Object.values(columnWidths).reduce((sum, w) => sum + w, 0)\n                : undefined\n            }\n          >\n            <colgroup>\n              {table.getVisibleLeafColumns().map((col) => (\n                <col\n                  key={col.id}\n                  style={\n                    columnWidths[col.id] != null\n                      ? { width: columnWidths[col.id] }\n                      : undefined\n                  }\n                />\n              ))}\n            </colgroup>\n            <TableHeader>\n              {table.getHeaderGroups().map((headerGroup) => (\n                <TableRow key={headerGroup.id}>\n                  {headerGroup.headers.map((header, headerIndex) => {\n                    const colDef = columnKindMap.get(header.id);\n\n                    // Qualifier header — select-all checkbox only for multi-select\n                    if (colDef?.kind === \"qualifier\") {\n                      return (\n                        <QualifierContainer key={header.id} type=\"head\">\n                          {isMultiSelect && (\n                            <Checkbox\n                              checked={isAllRowsSelected}\n                              indeterminate={\n                                !isAllRowsSelected && selectedCount > 0\n                              }\n                              onCheckedChange={(checked) => {\n                                // Indeterminate → clear all; otherwise toggle normally\n                                if (!isAllRowsSelected && selectedCount > 0) {\n                                  toggleAllRowsSelected(false);\n                                } else {\n                                  toggleAllRowsSelected(checked);\n                                }\n                              }}\n                            />\n                          )}\n                        </QualifierContainer>\n                      );\n                    }\n\n                    // Actions header\n                    if (colDef?.kind === \"actions\") {\n                      const actionsDef = colDef as OnyxActionsColumn<TData>;\n                      return (\n                        <ActionsContainer key={header.id} type=\"head\">\n                          {actionsDef.showColumnVisibility !== false && (\n                            <ColumnVisibilityPopover\n                              table={table}\n                              columnVisibility={\n                                table.getState().columnVisibility\n                              }\n                            />\n                          )}\n                          {actionsDef.showSorting !== false && (\n                            <SortingPopover\n                              table={table}\n                              sorting={table.getState().sorting}\n                              footerText={actionsDef.sortingFooterText}\n                            />\n                          )}\n                        </ActionsContainer>\n                      );\n                    }\n\n                    // Data / Display header\n                    const canSort = header.column.getCanSort();\n                    const sortDir = header.column.getIsSorted();\n                    const nextHeader = headerGroup.headers[headerIndex + 1];\n                    const canResize =\n                      header.column.getCanResize() &&\n                      !!nextHeader &&\n                      !widthConfig.fixedColumnIds.has(nextHeader.id);\n\n                    const dataCol =\n                      colDef?.kind === \"data\"\n                        ? (colDef as OnyxDataColumn<TData>)\n                        : null;\n\n                    return (\n                      <TableHead\n                        key={header.id}\n                        width={columnWidths[header.id]}\n                        sorted={\n                          canSort ? toOnyxSortDirection(sortDir) : undefined\n                        }\n                        onSort={\n                          canSort\n                            ? () => header.column.toggleSorting()\n                            : undefined\n                        }\n                        icon={dataCol?.icon}\n                        resizable={canResize}\n                        onResizeStart={\n                          canResize\n                            ? createResizeHandler(header.id, nextHeader.id)\n                            : undefined\n                        }\n                      >\n                        {flexRender(\n                          header.column.columnDef.header,\n                          header.getContext()\n                        )}\n                      </TableHead>\n                    );\n                  })}\n                </TableRow>\n              ))}\n            </TableHeader>\n\n            <TableBody\n              dndSortable={hasDraggable ? draggableReturn : undefined}\n              renderDragOverlay={\n                hasDraggable\n                  ? (activeId) => {\n                      const row = table\n                        .getRowModel()\n                        .rows.find((r) => getRowId(r.original) === activeId);\n                      if (!row) return null;\n                      return (\n                        <DragOverlayRow\n                          row={row}\n                          columnWidths={columnWidths}\n                          columnKindMap={columnKindMap}\n                          qualifierColumn={qualifierColumn}\n                          isSelectable={isSelectable}\n                        />\n                      );\n                    }\n                  : undefined\n              }\n            >\n              {emptyState && table.getRowModel().rows.length === 0 && (\n                <tr>\n                  <td colSpan={table.getVisibleLeafColumns().length}>\n                    {emptyState}\n                  </td>\n                </tr>\n              )}\n              {table.getRowModel().rows.map((row) => {\n                const rowId = hasDraggable ? getRowId(row.original) : undefined;\n\n                return (\n                  <TableRow\n                    key={row.id}\n                    sortableId={rowId}\n                    selected={row.getIsSelected()}\n                    onClick={() => {\n                      if (\n                        hasDraggable &&\n                        draggableReturn.wasDraggingRef.current\n                      ) {\n                        return;\n                      }\n                      if (onRowClick) {\n                        onRowClick(row.original);\n                      } else if (isSelectable) {\n                        if (!isMultiSelect) {\n                          // single-select: clear all, then select this row\n                          table.toggleAllRowsSelected(false);\n                        }\n                        row.toggleSelected();\n                      }\n                    }}\n                  >\n                    {row.getVisibleCells().map((cell) => {\n                      const cellColDef = columnKindMap.get(cell.column.id);\n\n                      // Qualifier cell\n                      if (cellColDef?.kind === \"qualifier\") {\n                        const qDef = cellColDef as OnyxQualifierColumn<TData>;\n\n                        return (\n                          <QualifierContainer\n                            key={cell.id}\n                            type=\"cell\"\n                            onClick={(e) => e.stopPropagation()}\n                          >\n                            <TableQualifier\n                              content={qDef.content}\n                              icon={qDef.getContent?.(row.original)}\n                              imageSrc={qDef.getImageSrc?.(row.original)}\n                              imageAlt={qDef.getImageAlt?.(row.original)}\n                              background={qDef.background}\n                              iconSize={qDef.iconSize}\n                              selectable={showQualifierCheckbox}\n                              selected={\n                                showQualifierCheckbox && row.getIsSelected()\n                              }\n                              onSelectChange={\n                                showQualifierCheckbox\n                                  ? (checked) => {\n                                      if (!isMultiSelect) {\n                                        table.toggleAllRowsSelected(false);\n                                      }\n                                      row.toggleSelected(checked);\n                                    }\n                                  : undefined\n                              }\n                            />\n                          </QualifierContainer>\n                        );\n                      }\n\n                      // Actions cell\n                      if (cellColDef?.kind === \"actions\") {\n                        return (\n                          <ActionsContainer\n                            key={cell.id}\n                            type=\"cell\"\n                            onClick={(e) => e.stopPropagation()}\n                          >\n                            {flexRender(\n                              cell.column.columnDef.cell,\n                              cell.getContext()\n                            )}\n                          </ActionsContainer>\n                        );\n                      }\n\n                      // Data / Display cell\n                      return (\n                        <TableCell\n                          key={cell.id}\n                          data-column-id={cell.column.id}\n                        >\n                          {flexRender(\n                            cell.column.columnDef.cell,\n                            cell.getContext()\n                          )}\n                        </TableCell>\n                      );\n                    })}\n                  </TableRow>\n                );\n              })}\n            </TableBody>\n          </TableElement>\n        </div>\n\n        {footer && renderFooter(footer)}\n      </div>\n    </TableSizeProvider>\n  );\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/hooks/useColumnWidths.ts",
    "content": "\"use client\";\n\n/**\n * useColumnWidths — Proportional column widths with splitter resize.\n *\n * WHY NOT TANSTACK'S BUILT-IN COLUMN SIZING?\n *\n * TanStack Table's column resize system (columnSizing state,\n * header.getResizeHandler(), columnResizeMode) doesn't support the\n * behavior our design requires:\n *\n * 1. No proportional fill — TanStack uses absolute pixel widths from\n *    columnDef.size. When the container is wider than the sum of sizes,\n *    the extra space is not distributed. We need weight-based proportional\n *    distribution so columns fill the container at any width.\n *\n * 2. No splitter semantics — TanStack's resize changes one column's size\n *    in isolation (the total table width grows/shrinks). We need \"splitter\"\n *    behavior: dragging column i's right edge grows column i and shrinks\n *    column i+1 by the same amount, keeping the total fixed. This prevents\n *    the actions column from jittering.\n *\n * 3. No per-column min-width enforcement during drag — TanStack only has a\n *    global minSize default. We enforce per-column min-widths and clamp the\n *    drag delta so neither the dragged column nor its neighbor can shrink\n *    below their floor.\n *\n * 4. No weight-based resize persistence — TanStack stores absolute pixel\n *    deltas. When the window resizes after a column drag, the proportions\n *    drift. We store weights, so a user-resized column scales proportionally\n *    with the container — the ratio is preserved, not the pixel count.\n *\n * APPROACH:\n *\n * We still rely on TanStack for everything else (sorting, pagination,\n * visibility, row selection). Only column width computation and resize\n * interaction are handled here. The columnDef.size values are used as\n * initial weights, and TanStack's enableResizing / getCanResize() flags\n * are still respected in the render loop.\n */\n\nimport {\n  useState,\n  useRef,\n  useEffect,\n  useLayoutEffect,\n  useCallback,\n} from \"react\";\nimport { Header } from \"@tanstack/react-table\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\n/** Extracted config ready to pass to useColumnWidths. */\nexport interface WidthConfig {\n  fixedColumnIds: Set<string>;\n  columnWeights: Record<string, number>;\n  columnMinWidths: Record<string, number>;\n}\n\ninterface UseColumnWidthsOptions {\n  /** Visible headers from TanStack's first header group. */\n  headers: Header<any, unknown>[];\n  /** Column IDs that have fixed pixel widths (e.g. qualifier, actions). */\n  fixedColumnIds: Set<string>;\n  /** Explicit column weights (takes precedence over columnDef.size). */\n  columnWeights?: Record<string, number>;\n  /** Per-column minimum widths for data (non-fixed) columns. */\n  columnMinWidths: Record<string, number>;\n}\n\ninterface UseColumnWidthsReturn {\n  /** Attach to the scrollable container for width measurement. */\n  containerRef: React.RefObject<HTMLDivElement | null>;\n  /** Computed pixel widths keyed by column ID. */\n  columnWidths: Record<string, number>;\n  /** Factory to create a splitter resize handler for a column pair. */\n  createResizeHandler: (\n    columnId: string,\n    neighborId: string\n  ) => (event: React.MouseEvent | React.TouchEvent) => void;\n}\n\n// ---------------------------------------------------------------------------\n// Internal: measure container width via ResizeObserver\n// ---------------------------------------------------------------------------\n\n/** Tracks an element's content width via ResizeObserver, returning a ref and the current width. */\nfunction useElementWidth(): [React.RefObject<HTMLDivElement | null>, number] {\n  const ref = useRef<HTMLDivElement>(null);\n  const [width, setWidth] = useState(0);\n  useLayoutEffect(() => {\n    const el = ref.current;\n    if (!el) return;\n    setWidth(el.clientWidth);\n    const ro = new ResizeObserver((entries) => {\n      const entry = entries[0];\n      if (entry) setWidth(entry.contentRect.width);\n    });\n    ro.observe(el);\n    return () => ro.disconnect();\n  }, []);\n  return [ref, width];\n}\n\n// ---------------------------------------------------------------------------\n// Pure function: compute pixel widths from weights\n// ---------------------------------------------------------------------------\n\n/** Converts column weights into pixel widths, enforcing per-column minimums and fixed-column sizes. */\nfunction computeColumnWidths(\n  containerWidth: number,\n  headers: Header<any, unknown>[],\n  customWeights: Record<string, number>,\n  fixedColumnIds: Set<string>,\n  columnWeights: Record<string, number>,\n  columnMinWidths: Record<string, number>\n): Record<string, number> {\n  const result: Record<string, number> = {};\n\n  let fixedTotal = 0;\n  const dataColumns: { id: string; weight: number; minWidth: number }[] = [];\n\n  for (const h of headers) {\n    const baseSize = h.column.columnDef.size ?? 20;\n    if (fixedColumnIds.has(h.id)) {\n      fixedTotal += baseSize;\n    } else {\n      dataColumns.push({\n        id: h.id,\n        weight: customWeights[h.id] ?? columnWeights[h.id] ?? baseSize,\n        minWidth: columnMinWidths[h.id] ?? 50,\n      });\n    }\n  }\n\n  const tableMinWidth =\n    fixedTotal + dataColumns.reduce((sum, col) => sum + col.minWidth, 0);\n  const tableWidth =\n    containerWidth > 0 ? Math.max(containerWidth, tableMinWidth) : 0;\n\n  if (tableWidth === 0) {\n    for (const h of headers) {\n      result[h.id] = h.column.columnDef.size ?? 20;\n    }\n    return result;\n  }\n\n  const available = tableWidth - fixedTotal;\n\n  // Iterative proportional allocation with min-width clamping.\n  // Each pass clamps columns whose proportional share falls below their\n  // minimum, then redistributes remaining space. Repeats until stable.\n  let clampedTotal = 0;\n  const clamped = new Set<string>();\n\n  let stable = false;\n  while (!stable) {\n    stable = true;\n    const unclamped = dataColumns.filter((col) => !clamped.has(col.id));\n    const unclampedWeight = unclamped.reduce((s, c) => s + c.weight, 0);\n    const remaining = available - clampedTotal;\n\n    for (const col of unclamped) {\n      const proportional = remaining * (col.weight / unclampedWeight);\n      if (proportional < col.minWidth) {\n        result[col.id] = col.minWidth;\n        clampedTotal += col.minWidth;\n        clamped.add(col.id);\n        stable = false;\n      }\n    }\n  }\n\n  // Distribute remaining space among unclamped columns\n  const unclampedCols = dataColumns.filter((col) => !clamped.has(col.id));\n  const unclampedWeight = unclampedCols.reduce((s, c) => s + c.weight, 0);\n  const remainingSpace = available - clampedTotal;\n  let assigned = 0;\n\n  for (let i = 0; i < unclampedCols.length; i++) {\n    const col = unclampedCols[i]!;\n    if (i === unclampedCols.length - 1) {\n      result[col.id] = remainingSpace - assigned;\n    } else {\n      const w = Math.round(remainingSpace * (col.weight / unclampedWeight));\n      result[col.id] = w;\n      assigned += w;\n    }\n  }\n\n  // Fixed columns keep their base size\n  for (const h of headers) {\n    if (fixedColumnIds.has(h.id)) {\n      result[h.id] = h.column.columnDef.size ?? 20;\n    }\n  }\n\n  return result;\n}\n\n// ---------------------------------------------------------------------------\n// Pure function: create a splitter resize handler for a column pair\n// ---------------------------------------------------------------------------\n\n/** Creates a mouse/touch drag handler that redistributes weight between two adjacent columns. */\nfunction createSplitterResizeHandler(\n  columnId: string,\n  neighborId: string,\n  startColumnWidth: number,\n  startNeighborWidth: number,\n  startColumnWeight: number,\n  startNeighborWeight: number,\n  columnMinWidth: number,\n  neighborMinWidth: number,\n  setter: (value: React.SetStateAction<Record<string, number>>) => void,\n  isDraggingRef: React.MutableRefObject<boolean>\n): (event: React.MouseEvent | React.TouchEvent) => void {\n  return (event: React.MouseEvent | React.TouchEvent) => {\n    const startX =\n      \"touches\" in event ? event.touches[0]!.clientX : event.clientX;\n\n    isDraggingRef.current = true;\n\n    const onMove = (e: MouseEvent | TouchEvent) => {\n      const currentX =\n        \"touches\" in e\n          ? (e as TouchEvent).touches[0]!.clientX\n          : (e as MouseEvent).clientX;\n      const rawDelta = currentX - startX;\n      const minDelta = columnMinWidth - startColumnWidth;\n      const maxDelta = startNeighborWidth - neighborMinWidth;\n      const delta = Math.max(minDelta, Math.min(maxDelta, rawDelta));\n\n      setter((prev) => ({\n        ...prev,\n        [columnId]:\n          startColumnWeight * ((startColumnWidth + delta) / startColumnWidth),\n        [neighborId]:\n          startNeighborWeight *\n          ((startNeighborWidth - delta) / startNeighborWidth),\n      }));\n    };\n\n    const onUp = () => {\n      document.removeEventListener(\"mousemove\", onMove);\n      document.removeEventListener(\"mouseup\", onUp);\n      document.removeEventListener(\"touchmove\", onMove);\n      document.removeEventListener(\"touchend\", onUp);\n      document.removeEventListener(\"touchcancel\", onUp);\n      document.body.style.userSelect = \"\";\n      document.body.style.cursor = \"\";\n      isDraggingRef.current = false;\n    };\n\n    document.body.style.userSelect = \"none\";\n    document.body.style.cursor = \"col-resize\";\n    document.addEventListener(\"mousemove\", onMove);\n    document.addEventListener(\"mouseup\", onUp);\n    document.addEventListener(\"touchmove\", onMove);\n    document.addEventListener(\"touchend\", onUp);\n    document.addEventListener(\"touchcancel\", onUp);\n  };\n}\n\n// ---------------------------------------------------------------------------\n// Hook\n// ---------------------------------------------------------------------------\n\n/**\n * Computes proportional column pixel widths from weights and provides\n * splitter-style resize handlers that keep total table width constant.\n *\n * @example\n * ```tsx\n * const { containerRef, columnWidths, createResizeHandler } = useColumnWidths({\n *   headers: table.getHeaderGroups()[0].headers,\n *   fixedColumnIds: new Set([\"actions\"]),\n *   columnMinWidths: { name: 72, status: 80 },\n * });\n * ```\n */\nexport default function useColumnWidths({\n  headers,\n  fixedColumnIds,\n  columnWeights = {},\n  columnMinWidths,\n}: UseColumnWidthsOptions): UseColumnWidthsReturn {\n  const [containerRef, containerWidth] = useElementWidth();\n  const [customWeights, setCustomWeights] = useState<Record<string, number>>(\n    {}\n  );\n  const isDraggingRef = useRef(false);\n\n  useEffect(() => {\n    return () => {\n      if (isDraggingRef.current) {\n        document.body.style.userSelect = \"\";\n        document.body.style.cursor = \"\";\n      }\n    };\n  }, []);\n\n  const columnWidths = computeColumnWidths(\n    containerWidth,\n    headers,\n    customWeights,\n    fixedColumnIds,\n    columnWeights,\n    columnMinWidths\n  );\n\n  const createResizeHandler = useCallback(\n    (columnId: string, neighborId: string) => {\n      const header = headers.find((h) => h.id === columnId);\n      const neighbor = headers.find((h) => h.id === neighborId);\n\n      return createSplitterResizeHandler(\n        columnId,\n        neighborId,\n        columnWidths[columnId] ?? 0,\n        columnWidths[neighborId] ?? 0,\n        customWeights[columnId] ??\n          columnWeights[columnId] ??\n          header?.column.columnDef.size ??\n          20,\n        customWeights[neighborId] ??\n          columnWeights[neighborId] ??\n          neighbor?.column.columnDef.size ??\n          20,\n        columnMinWidths[columnId] ?? 50,\n        columnMinWidths[neighborId] ?? 50,\n        setCustomWeights,\n        isDraggingRef\n      );\n    },\n    [headers, columnWidths, customWeights, columnWeights, columnMinWidths]\n  );\n\n  return { containerRef, columnWidths, createResizeHandler };\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/hooks/useDataTable.ts",
    "content": "\"use client\";\n\"use no memo\";\n\nimport { useState, useEffect, useMemo, useRef } from \"react\";\nimport {\n  useReactTable,\n  getCoreRowModel,\n  getSortedRowModel,\n  getPaginationRowModel,\n  getFilteredRowModel,\n  type Table,\n  type ColumnDef,\n  type RowData,\n  type SortingState,\n  type RowSelectionState,\n  type ColumnSizingState,\n  type PaginationState,\n  type ColumnResizeMode,\n  type TableOptions,\n  type VisibilityState,\n} from \"@tanstack/react-table\";\n\n// ---------------------------------------------------------------------------\n// Exported types\n// ---------------------------------------------------------------------------\n\nexport type OnyxSortDirection = \"none\" | \"ascending\" | \"descending\";\nexport type OnyxSelectionState = \"none\" | \"partial\" | \"all\";\n\n// ---------------------------------------------------------------------------\n// Exported utility\n// ---------------------------------------------------------------------------\n\n/**\n * Convert a TanStack sort direction to an Onyx sort direction string.\n *\n * This is a **named export** (not on the return object) because it is used\n * statically inside JSX header loops, not tied to hook state.\n */\nexport function toOnyxSortDirection(\n  dir: false | \"asc\" | \"desc\"\n): OnyxSortDirection {\n  if (dir === \"asc\") return \"ascending\";\n  if (dir === \"desc\") return \"descending\";\n  return \"none\";\n}\n\n// ---------------------------------------------------------------------------\n// Global filter value (combines view-mode + text search)\n// ---------------------------------------------------------------------------\n\ninterface GlobalFilterValue {\n  selectedIds: Set<string> | null;\n  searchTerm: string;\n}\n\n// ---------------------------------------------------------------------------\n// Hook options & return types\n// ---------------------------------------------------------------------------\n\n/** Keys managed internally — callers cannot override these via `tableOptions`. */\ntype ManagedKeys =\n  | \"data\"\n  | \"columns\"\n  | \"state\"\n  | \"onSortingChange\"\n  | \"onRowSelectionChange\"\n  | \"onColumnSizingChange\"\n  | \"onColumnVisibilityChange\"\n  | \"onPaginationChange\"\n  | \"onGlobalFilterChange\"\n  | \"getCoreRowModel\"\n  | \"getSortedRowModel\"\n  | \"getPaginationRowModel\"\n  | \"getFilteredRowModel\"\n  | \"globalFilterFn\"\n  | \"columnResizeMode\"\n  | \"enableRowSelection\"\n  | \"enableColumnResizing\"\n  | \"getRowId\";\n\n/**\n * Options accepted by {@link useDataTable}.\n *\n * Only `data` and `columns` are required — everything else has sensible defaults.\n */\ninterface UseDataTableOptions<TData extends RowData> {\n  /** The row data array. */\n  data: TData[];\n  /** TanStack column definitions. */\n  columns: ColumnDef<TData, any>[];\n  /** Rows per page. Set `Infinity` to disable pagination. @default 10 */\n  pageSize?: number;\n  /** Whether rows can be selected. @default true */\n  enableRowSelection?: boolean;\n  /** Whether columns can be resized. @default true */\n  enableColumnResizing?: boolean;\n  /** Stable row identity function. TanStack tracks selection by ID instead of array index. */\n  getRowId: TableOptions<TData>[\"getRowId\"];\n  /** Resize strategy. @default \"onChange\" */\n  columnResizeMode?: ColumnResizeMode;\n  /** Initial sorting state. @default [] */\n  initialSorting?: SortingState;\n  /** Initial column visibility state. @default {} */\n  initialColumnVisibility?: VisibilityState;\n  /** Initial row selection state. Keys are row IDs (from `getRowId`), values are `true`. @default {} */\n  initialRowSelection?: RowSelectionState;\n  /** When true AND `initialRowSelection` is non-empty, start in view-selected mode (filtered to selected rows). @default false */\n  initialViewSelected?: boolean;\n  /** Called whenever the set of selected row IDs changes. */\n  onSelectionChange?: (selectedIds: string[]) => void;\n  /** Search term for global text filtering. Rows are filtered to those containing\n   *  the term in any accessor column value (case-insensitive). */\n  searchTerm?: string;\n  /** Server-side configuration. When provided, enables manual pagination/sorting/filtering. */\n  serverSide?: {\n    totalItems: number;\n    onSortingChange: (sorting: SortingState) => void;\n    onPaginationChange: (pageIndex: number, pageSize: number) => void;\n    onSearchTermChange: (searchTerm: string) => void;\n  };\n  /** Escape-hatch: extra options spread into `useReactTable`. Managed keys are excluded. */\n  tableOptions?: Partial<Omit<TableOptions<TData>, ManagedKeys>>;\n}\n\n/**\n * Values returned by {@link useDataTable}.\n */\ninterface UseDataTableReturn<TData extends RowData> {\n  /** Full TanStack table instance for rendering. */\n  table: Table<TData>;\n\n  // Pagination (1-based, matching Onyx Footer)\n  /** Current page number (1-based). */\n  currentPage: number;\n  /** Total number of pages. */\n  totalPages: number;\n  /** Total number of rows. */\n  totalItems: number;\n  /** Rows per page. */\n  pageSize: number;\n  /** Navigate to a page (1-based, clamped to valid range). */\n  setPage: (page: number) => void;\n  /** Whether pagination is active (pageSize is finite). */\n  isPaginated: boolean;\n\n  // Selection (pre-computed for Onyx Footer)\n  /** Aggregate selection state for the current page. */\n  selectionState: OnyxSelectionState;\n  /** Number of selected rows. */\n  selectedCount: number;\n  /** Whether every row on the current page is selected. */\n  isAllPageRowsSelected: boolean;\n  /** IDs of currently selected rows (derived from `getRowId`). */\n  selectedRowIds: string[];\n  /** Deselect all rows. */\n  clearSelection: () => void;\n  /** Select or deselect all rows on the current page. */\n  toggleAllPageRowsSelected: (selected: boolean) => void;\n  /** Select or deselect all rows across all pages. */\n  toggleAllRowsSelected: (selected: boolean) => void;\n  /** Whether every row across all pages is selected. */\n  isAllRowsSelected: boolean;\n\n  // View-mode (filter to selected rows)\n  /** Whether the table is currently filtered to show only selected rows. */\n  isViewingSelected: boolean;\n  /** Enter view mode — freeze the current selection as a filter. */\n  enterViewMode: () => void;\n  /** Exit view mode — remove the selection filter. */\n  exitViewMode: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Hook\n// ---------------------------------------------------------------------------\n\n/**\n * Wraps TanStack `useReactTable` with Onyx-specific defaults and derived\n * state so that consumers only need to provide `data` + `columns`.\n *\n * @example\n * ```tsx\n * const {\n *   table, currentPage, totalPages, setPage, pageSize,\n *   selectionState, selectedCount, clearSelection,\n * } = useDataTable({ data: rows, columns });\n * ```\n */\nexport default function useDataTable<TData extends RowData>(\n  options: UseDataTableOptions<TData>\n): UseDataTableReturn<TData> {\n  const {\n    data,\n    columns,\n    pageSize: pageSizeOption = 10,\n    enableRowSelection = true,\n    enableColumnResizing = true,\n    columnResizeMode = \"onChange\",\n    initialSorting = [],\n    initialColumnVisibility = {},\n    initialRowSelection = {},\n    initialViewSelected = false,\n    getRowId,\n    onSelectionChange,\n    searchTerm,\n    serverSide,\n    tableOptions,\n  } = options;\n\n  const isServerSide = !!serverSide;\n\n  // ---- internal state -----------------------------------------------------\n  const [sorting, setSorting] = useState<SortingState>(initialSorting);\n  const [rowSelection, setRowSelection] =\n    useState<RowSelectionState>(initialRowSelection);\n  const [columnSizing, setColumnSizing] = useState<ColumnSizingState>({});\n  const [columnVisibility, setColumnVisibility] = useState<VisibilityState>(\n    initialColumnVisibility\n  );\n  const [pagination, setPagination] = useState<PaginationState>({\n    pageIndex: 0,\n    pageSize: pageSizeOption,\n  });\n  /** Combined global filter: view-mode (selected IDs) + text search. */\n  const initialSelectedIds =\n    initialViewSelected && Object.keys(initialRowSelection).length > 0\n      ? new Set(Object.keys(initialRowSelection))\n      : null;\n  const [globalFilter, setGlobalFilter] = useState<GlobalFilterValue>({\n    selectedIds: initialSelectedIds,\n    searchTerm: \"\",\n  });\n\n  // ---- sync pageSize prop to internal state --------------------------------\n  useEffect(() => {\n    setPagination((prev) => ({\n      ...prev,\n      pageSize: pageSizeOption,\n      pageIndex: 0,\n    }));\n  }, [pageSizeOption]);\n\n  // ---- sync external searchTerm prop into combined filter state ------------\n  // (client-side only — server-side uses separate callbacks instead)\n  const preSearchPageRef = useRef<number>(0);\n\n  useEffect(() => {\n    if (isServerSide) return;\n    const term = searchTerm ?? \"\";\n    const wasSearching = !!globalFilter.searchTerm;\n\n    if (!wasSearching && term) {\n      // Entering search — save current page, reset to 0\n      preSearchPageRef.current = pagination.pageIndex;\n      setPagination((p) => ({ ...p, pageIndex: 0 }));\n    } else if (wasSearching && !term) {\n      // Clearing search — restore saved page\n      setPagination((p) => ({ ...p, pageIndex: preSearchPageRef.current }));\n    }\n\n    setGlobalFilter((prev) => ({ ...prev, searchTerm: term }));\n    // eslint-disable-next-line react-hooks/exhaustive-deps -- Intentionally\n    // omits `globalFilter` and `pagination.pageIndex`: we only read snapshot\n    // values to detect the search enter/clear transition, not to react to\n    // every filter or page change.\n  }, [searchTerm, isServerSide]);\n\n  // ---- server-side: 3 separate callbacks -----------------------------------\n  // Single ref for the whole serverSide config — prevents effects from\n  // re-firing when the consumer passes an inline object each render.\n  const serverSideRef = useRef(serverSide);\n  serverSideRef.current = serverSide;\n\n  useEffect(() => {\n    if (!isServerSide) return;\n    serverSideRef.current!.onSortingChange(sorting);\n  }, [sorting, isServerSide]);\n\n  useEffect(() => {\n    if (!isServerSide) return;\n    serverSideRef.current!.onPaginationChange(\n      pagination.pageIndex,\n      pagination.pageSize\n    );\n  }, [pagination.pageIndex, pagination.pageSize, isServerSide]);\n\n  useEffect(() => {\n    if (!isServerSide) return;\n    setPagination((p) => ({ ...p, pageIndex: 0 }));\n    serverSideRef.current!.onSearchTermChange(searchTerm ?? \"\");\n  }, [searchTerm, isServerSide]);\n\n  // ---- TanStack table instance --------------------------------------------\n  const serverPageCount = isServerSide\n    ? isFinite(pagination.pageSize) && pagination.pageSize > 0\n      ? Math.ceil((serverSide!.totalItems || 0) / pagination.pageSize)\n      : 1\n    : undefined;\n\n  const tableOpts: TableOptions<TData> = {\n    data,\n    columns,\n    getRowId,\n    state: {\n      sorting,\n      rowSelection,\n      columnSizing,\n      columnVisibility,\n      pagination,\n      ...(isServerSide ? {} : { globalFilter }),\n    },\n    onSortingChange: isServerSide\n      ? (updater) => {\n          setSorting(updater);\n          setPagination((p) => ({ ...p, pageIndex: 0 }));\n        }\n      : setSorting,\n    onRowSelectionChange: setRowSelection,\n    onColumnSizingChange: setColumnSizing,\n    onColumnVisibilityChange: setColumnVisibility,\n    onPaginationChange: setPagination,\n    getCoreRowModel: getCoreRowModel(),\n    // We manage page resets explicitly (search enter/clear, view mode,\n    // pageSize change) so disable TanStack's auto-reset which would\n    // clobber our restored page index when the filter changes.\n    autoResetPageIndex: false,\n    columnResizeMode,\n    enableRowSelection,\n    enableColumnResizing,\n    ...tableOptions,\n  };\n\n  if (isServerSide) {\n    tableOpts.manualPagination = true;\n    tableOpts.manualSorting = true;\n    tableOpts.manualFiltering = true;\n    tableOpts.pageCount = serverPageCount;\n  } else {\n    tableOpts.onGlobalFilterChange = setGlobalFilter;\n    tableOpts.getSortedRowModel = getSortedRowModel();\n    tableOpts.getPaginationRowModel = getPaginationRowModel();\n    tableOpts.getFilteredRowModel = getFilteredRowModel();\n    tableOpts.globalFilterFn = (\n      row,\n      _columnId,\n      filterValue: GlobalFilterValue\n    ) => {\n      // View-mode filter (selected IDs)\n      if (\n        filterValue.selectedIds != null &&\n        !filterValue.selectedIds.has(row.id)\n      ) {\n        return false;\n      }\n      // Text search filter\n      if (filterValue.searchTerm) {\n        const term = filterValue.searchTerm.toLowerCase();\n        return row.getAllCells().some((cell) => {\n          const value = cell.getValue();\n          if (value == null) return false;\n          return String(value).toLowerCase().includes(term);\n        });\n      }\n      return true;\n    };\n  }\n\n  const table = useReactTable(tableOpts);\n\n  // ---- derived values -----------------------------------------------------\n  const isAllPageRowsSelected = table.getIsAllPageRowsSelected();\n  const isSomePageRowsSelected = table.getIsSomePageRowsSelected();\n\n  const selectionState: OnyxSelectionState = isAllPageRowsSelected\n    ? \"all\"\n    : isSomePageRowsSelected\n      ? \"partial\"\n      : \"none\";\n\n  const selectedRowIds = useMemo(\n    () => Object.keys(rowSelection),\n    [rowSelection]\n  );\n  const selectedCount = selectedRowIds.length;\n  const totalPages = Math.max(1, table.getPageCount());\n  const currentPage = pagination.pageIndex + 1;\n  const hasActiveFilter =\n    !isServerSide &&\n    (globalFilter.selectedIds != null || !!globalFilter.searchTerm);\n  const totalItems = isServerSide\n    ? serverSide!.totalItems\n    : hasActiveFilter\n      ? table.getPrePaginationRowModel().rows.length\n      : data.length;\n  const isPaginated = isFinite(pagination.pageSize);\n\n  // ---- keep view-mode filter in sync with selection ----------------------\n  // When in view-selected mode, deselecting a row should remove it from\n  // the visible set so it disappears immediately.\n  useEffect(() => {\n    if (isServerSide) return;\n    if (globalFilter.selectedIds == null) return;\n\n    const currentIds = new Set(Object.keys(rowSelection));\n    // Remove any ID from the filter that is no longer selected\n    let changed = false;\n    const next = new Set<string>();\n    globalFilter.selectedIds.forEach((id) => {\n      if (currentIds.has(id)) {\n        next.add(id);\n      } else {\n        changed = true;\n      }\n    });\n    if (changed) {\n      setGlobalFilter((prev) => ({ ...prev, selectedIds: next }));\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps -- only react to\n    // selection changes while in view mode\n  }, [rowSelection, isServerSide]);\n\n  // ---- selection change callback ------------------------------------------\n  const isFirstRenderRef = useRef(true);\n  const onSelectionChangeRef = useRef(onSelectionChange);\n  onSelectionChangeRef.current = onSelectionChange;\n\n  useEffect(() => {\n    if (isFirstRenderRef.current) {\n      isFirstRenderRef.current = false;\n      // Still fire the callback on first render if there's an initial selection\n      if (selectedRowIds.length > 0) {\n        onSelectionChangeRef.current?.(selectedRowIds);\n      }\n      return;\n    }\n    onSelectionChangeRef.current?.(selectedRowIds);\n  }, [selectedRowIds]);\n\n  // ---- actions ------------------------------------------------------------\n  const setPage = (page: number) => {\n    const clamped = Math.max(1, Math.min(page, totalPages));\n    setPagination((prev) => ({ ...prev, pageIndex: clamped - 1 }));\n  };\n\n  const clearSelection = () => {\n    table.resetRowSelection();\n  };\n\n  const toggleAllPageRowsSelected = (selected: boolean) => {\n    table.toggleAllPageRowsSelected(selected);\n  };\n\n  // TODO (@raunakab): In server-side mode, these only operate on the loaded\n  // page data, not all rows across all pages. TanStack can't select rows it\n  // doesn't have. Fixing this requires a server-side callback (e.g.\n  // `onSelectAll`) and a `totalItems`-aware selection model.\n  const toggleAllRowsSelected = (selected: boolean) => {\n    table.toggleAllRowsSelected(selected);\n  };\n\n  const isAllRowsSelected = table.getIsAllRowsSelected();\n\n  // ---- view mode (filter to selected rows) --------------------------------\n  const isViewingSelected = globalFilter.selectedIds != null;\n\n  const enterViewMode = () => {\n    if (isServerSide) return;\n    if (selectedRowIds.length > 0) {\n      setGlobalFilter((prev) => ({\n        ...prev,\n        selectedIds: new Set(selectedRowIds),\n      }));\n      setPagination((prev) => ({ ...prev, pageIndex: 0 }));\n    }\n  };\n\n  const exitViewMode = () => {\n    if (isServerSide) return;\n    setGlobalFilter((prev) => ({ ...prev, selectedIds: null }));\n    setPagination((prev) => ({ ...prev, pageIndex: 0 }));\n  };\n\n  return {\n    table,\n    currentPage,\n    totalPages,\n    totalItems,\n    pageSize: pagination.pageSize,\n    setPage,\n    isPaginated,\n    selectionState,\n    selectedCount,\n    selectedRowIds,\n    isAllPageRowsSelected,\n    isAllRowsSelected,\n    clearSelection,\n    toggleAllPageRowsSelected,\n    toggleAllRowsSelected,\n    isViewingSelected,\n    enterViewMode,\n    exitViewMode,\n  };\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/hooks/useDraggableRows.ts",
    "content": "\"use client\";\n\nimport { useState, useCallback, useMemo, useRef } from \"react\";\nimport {\n  useSensors,\n  useSensor,\n  PointerSensor,\n  KeyboardSensor,\n  closestCenter,\n  type DragStartEvent,\n  type DragEndEvent,\n} from \"@dnd-kit/core\";\nimport { arrayMove, sortableKeyboardCoordinates } from \"@dnd-kit/sortable\";\nimport { restrictToVerticalAxis } from \"@dnd-kit/modifiers\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface UseDraggableRowsOptions<TData> {\n  /** Current display-order data. */\n  data: TData[];\n  /** Extract a unique string ID from each row. */\n  getRowId: (row: TData) => string;\n  /** Whether DnD row reordering is active (e.g. set to `false` when column sorting is active). @default true */\n  enabled?: boolean;\n  /** Called after a successful reorder with the new ID order and a map of changed positions. */\n  onReorder?: (\n    ids: string[],\n    changedOrders: Record<string, number>\n  ) => void | Promise<void>;\n}\n\ninterface DraggableRowsReturn {\n  /** Props to pass to TableBody's `dndSortable` prop. */\n  dndContextProps: {\n    sensors: ReturnType<typeof useSensors>;\n    collisionDetection: typeof closestCenter;\n    modifiers: Array<typeof restrictToVerticalAxis>;\n    onDragStart: (event: DragStartEvent) => void;\n    onDragEnd: (event: DragEndEvent) => void;\n    onDragCancel: () => void;\n  };\n  /** Ordered list of IDs for SortableContext. */\n  sortableItems: string[];\n  /** ID of the currently dragged row, or null. */\n  activeId: string | null;\n  /** Whether a drag is in progress. */\n  isDragging: boolean;\n  /** Whether DnD is enabled. */\n  isEnabled: boolean;\n  /** Ref that is `true` briefly after a drag ends, used to suppress the trailing click. */\n  wasDraggingRef: React.RefObject<boolean>;\n}\n\n// ---------------------------------------------------------------------------\n// Hook\n// ---------------------------------------------------------------------------\n\n/**\n * Manages drag-and-drop row reordering using @dnd-kit, providing sensor\n * configuration, sortable item IDs, drag state, and a reorder callback\n * that reports only the changed positions.\n *\n * @example\n * ```tsx\n * const { dndContextProps, sortableItems, activeId } = useDraggableRows({\n *   data: rows,\n *   getRowId: (row) => row.id,\n *   onReorder: (ids, changed) => saveNewOrder(changed),\n * });\n * ```\n */\nexport default function useDraggableRows<TData>(\n  options: UseDraggableRowsOptions<TData>\n): DraggableRowsReturn {\n  const { data, getRowId, enabled = true, onReorder } = options;\n\n  const [activeId, setActiveId] = useState<string | null>(null);\n  const wasDraggingRef = useRef(false);\n\n  const sensors = useSensors(\n    useSensor(PointerSensor, {\n      activationConstraint: { distance: 5 },\n    }),\n    useSensor(KeyboardSensor, {\n      coordinateGetter: sortableKeyboardCoordinates,\n    })\n  );\n\n  const sortableItems = useMemo(\n    () => data.map((row) => getRowId(row)),\n    [data, getRowId]\n  );\n\n  const sortableIndexMap = useMemo(() => {\n    const map = new Map<string, number>();\n    for (let i = 0; i < sortableItems.length; i++) {\n      const item = sortableItems[i];\n      if (item !== undefined) {\n        map.set(item, i);\n      }\n    }\n    return map;\n  }, [sortableItems]);\n\n  const handleDragStart = useCallback((event: DragStartEvent) => {\n    setActiveId(String(event.active.id));\n  }, []);\n\n  const handleDragEnd = useCallback(\n    (event: DragEndEvent) => {\n      setActiveId(null);\n      // Suppress the trailing click event that the browser fires after pointerup.\n      wasDraggingRef.current = true;\n      requestAnimationFrame(() => {\n        wasDraggingRef.current = false;\n      });\n      if (event.activatorEvent instanceof PointerEvent) {\n        (document.activeElement as HTMLElement)?.blur();\n      }\n      const { active, over } = event;\n      if (!over || active.id === over.id) return;\n\n      const oldIndex = sortableIndexMap.get(String(active.id));\n      const newIndex = sortableIndexMap.get(String(over.id));\n      if (oldIndex === undefined || newIndex === undefined) return;\n\n      const reordered = arrayMove(sortableItems, oldIndex, newIndex);\n\n      const minIdx = Math.min(oldIndex, newIndex);\n      const maxIdx = Math.max(oldIndex, newIndex);\n      const changedOrders: Record<string, number> = {};\n      for (let i = minIdx; i <= maxIdx; i++) {\n        const id = reordered[i];\n        if (id !== undefined) {\n          changedOrders[id] = i;\n        }\n      }\n\n      onReorder?.(reordered, changedOrders);\n    },\n    [sortableItems, sortableIndexMap, onReorder]\n  );\n\n  const handleDragCancel = useCallback(() => {\n    setActiveId(null);\n  }, []);\n\n  return {\n    dndContextProps: {\n      sensors,\n      collisionDetection: closestCenter,\n      modifiers: [restrictToVerticalAxis],\n      onDragStart: handleDragStart,\n      onDragEnd: handleDragEnd,\n      onDragCancel: handleDragCancel,\n    },\n    sortableItems,\n    activeId,\n    isDragging: activeId !== null,\n    isEnabled: enabled,\n    wasDraggingRef,\n  };\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/styles.css",
    "content": "/* Imports shared timing tokens (--interactive-duration, --interactive-easing) */\n@import \"@opal/core/interactive/shared.css\";\n\n/* ---------------------------------------------------------------------------\n * Table primitives — data-attribute driven styling\n * Follows the same pattern as card.css / line-item.css.\n * ------------------------------------------------------------------------- */\n\n/* ---- TableCell ---- */\n\n.tbl-cell[data-size=\"lg\"] {\n  @apply px-1 py-0.5;\n}\n.tbl-cell[data-size=\"md\"] {\n  @apply pl-0.5 pr-1.5 py-1.5;\n}\n\n.tbl-cell-inner[data-size=\"lg\"] {\n  @apply h-10 px-1;\n}\n.tbl-cell-inner[data-size=\"md\"] {\n  @apply h-6 px-0.5;\n}\n\n/* ---- TableHead ---- */\n\n.table-head {\n  @apply relative;\n}\n.table-head[data-size=\"lg\"] {\n  @apply px-2 py-1;\n}\n.table-head[data-size=\"md\"] {\n  @apply px-2 py-1;\n}\n.table-head[data-bottom-border] {\n  @apply border-b border-transparent hover:border-border-03;\n}\n\n/* Inner text wrapper */\n.table-head[data-size=\"lg\"] .table-head-label {\n  @apply py-2 px-0.5;\n}\n.table-head[data-size=\"md\"] .table-head-label {\n  @apply py-1;\n}\n\n/* Sort button wrapper */\n.table-head[data-size=\"lg\"] .table-head-sort {\n  @apply py-1.5;\n}\n\n/* ---- TableRow (base) ---- */\n\n.tbl-row > td {\n  @apply bg-background-tint-00;\n  transition: background-color var(--interactive-duration)\n    var(--interactive-easing);\n}\n\n.tbl-row[data-selected] > td {\n  @apply bg-[var(--action-link-01)];\n}\n\n.tbl-row[data-disabled] {\n  @apply pointer-events-none;\n}\n\n/* Suppress default focus ring on rows — the row bg is the indicator */\n.tbl-row:focus,\n.tbl-row:focus-visible {\n  outline: none;\n}\n\n/* ---- variant=\"rows\" — traditional borders, no gaps ---- */\n\ntable[data-variant=\"rows\"] .tbl-row > td {\n  @apply border-b border-border-01;\n}\n\n/* Hover/focus only for selectable tables */\ntable[data-variant=\"rows\"][data-selection=\"single-select\"] .tbl-row,\ntable[data-variant=\"rows\"][data-selection=\"multi-select\"] .tbl-row {\n  @apply cursor-pointer;\n}\ntable[data-variant=\"rows\"][data-selection=\"single-select\"] .tbl-row:hover > td,\ntable[data-variant=\"rows\"][data-selection=\"multi-select\"] .tbl-row:hover > td {\n  @apply bg-background-tint-02;\n}\ntable[data-variant=\"rows\"] .tbl-row:focus-visible > td,\ntable[data-variant=\"rows\"] .tbl-row:has(:focus-visible) > td {\n  @apply bg-action-link-01;\n}\n\n/* ---- variant=\"cards\" — rounded cards with gap ---- */\n\ntable[data-variant=\"cards\"] .tbl-row > td {\n  @apply bg-clip-padding border-y-[2px] border-x-0 border-transparent;\n}\ntable[data-variant=\"cards\"] .tbl-row > td:first-child {\n  @apply rounded-l-12;\n}\ntable[data-variant=\"cards\"] .tbl-row > td:last-child {\n  @apply rounded-r-12;\n}\n\n/* When a drag handle is present the second-to-last td gets the rounding */\ntable[data-variant=\"cards\"] .tbl-row[data-drag-handle] > td:nth-last-child(2) {\n  @apply rounded-r-12;\n}\ntable[data-variant=\"cards\"] .tbl-row[data-drag-handle] > td:last-child {\n  border-radius: 0;\n}\n\n/* Hover/focus only for selectable tables */\ntable[data-variant=\"cards\"][data-selection=\"single-select\"] .tbl-row,\ntable[data-variant=\"cards\"][data-selection=\"multi-select\"] .tbl-row {\n  @apply cursor-pointer;\n}\ntable[data-variant=\"cards\"][data-selection=\"single-select\"] .tbl-row:hover > td,\ntable[data-variant=\"cards\"][data-selection=\"multi-select\"] .tbl-row:hover > td {\n  @apply bg-background-tint-02;\n}\ntable[data-variant=\"cards\"] .tbl-row:focus-visible > td,\ntable[data-variant=\"cards\"] .tbl-row:has(:focus-visible) > td {\n  @apply bg-action-link-01;\n}\n\n/* ---- QualifierContainer ---- */\n\n.tbl-qualifier[data-type=\"head\"] {\n  @apply w-px whitespace-nowrap py-1;\n}\n.tbl-qualifier[data-type=\"head\"][data-size=\"md\"] {\n  @apply py-0.5;\n}\n\n.tbl-qualifier[data-type=\"cell\"] {\n  @apply w-px whitespace-nowrap py-1;\n}\n.tbl-qualifier[data-type=\"cell\"][data-size=\"md\"] {\n  @apply py-0.5;\n}\n\n/* ---- ActionsContainer ---- */\n\n.tbl-actions {\n  @apply w-px whitespace-nowrap px-1;\n}\n.tbl-actions[data-type=\"head\"] {\n  @apply px-2 py-1;\n}\n\n/* ---- Footer ---- */\n\n.table-footer[data-size=\"lg\"] {\n  @apply min-h-[2.75rem];\n}\n.table-footer[data-size=\"md\"] {\n  @apply min-h-[2.25rem];\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/table/types.ts",
    "content": "import type { ReactNode } from \"react\";\nimport type {\n  ColumnDef,\n  SortingState,\n  VisibilityState,\n} from \"@tanstack/react-table\";\nimport type { TableSize } from \"@opal/components/table/TableSizeContext\";\nimport type { TableVariant } from \"@opal/components/table/TableElement\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport type { SortDirection } from \"@opal/components/table/TableHead\";\n\n// ---------------------------------------------------------------------------\n// Column width (mirrors useColumnWidths types)\n// ---------------------------------------------------------------------------\n\n/** Width config for a data column (participates in proportional distribution). */\nexport interface DataColumnWidth {\n  weight: number;\n  minWidth?: number;\n}\n\n/** Width config for a fixed column (exact pixels, no proportional distribution). */\nexport interface FixedColumnWidth {\n  fixed: number;\n}\n\nexport type ColumnWidth = DataColumnWidth | FixedColumnWidth;\n\n// ---------------------------------------------------------------------------\n// Column kind discriminant\n// ---------------------------------------------------------------------------\n\nexport type QualifierContentType = \"simple\" | \"icon\" | \"image\";\n\nexport type OnyxColumnKind = \"qualifier\" | \"data\" | \"display\" | \"actions\";\n\n// ---------------------------------------------------------------------------\n// Column definitions (discriminated union on `kind`)\n// ---------------------------------------------------------------------------\n\ninterface OnyxColumnBase<TData> {\n  kind: OnyxColumnKind;\n  /** Stable column identifier (mirrors the TanStack column ID). */\n  id: string;\n  def: ColumnDef<TData, any>;\n  width: ColumnWidth | ((size: TableSize) => ColumnWidth);\n}\n\n/** Qualifier column — leading avatar/icon/checkbox column. */\nexport interface OnyxQualifierColumn<TData> extends OnyxColumnBase<TData> {\n  kind: \"qualifier\";\n  /** Content type for body-row `<TableQualifier>`. */\n  content: QualifierContentType;\n  /** Return the icon component to render for a row (for \"icon\" content). */\n  getContent?: (row: TData) => IconFunctionComponent;\n  /** Return the image URL to render for a row (for \"image\" content). */\n  getImageSrc?: (row: TData) => string;\n  /** Return the image alt text for a row (for \"image\" content). @default \"\" */\n  getImageAlt?: (row: TData) => string;\n  /** Show a tinted background container behind the content. @default false */\n  background?: boolean;\n  /** Icon size preset. Use `\"lg\"` for avatars, `\"md\"` for regular icons. @default \"md\" */\n  iconSize?: \"lg\" | \"md\";\n}\n\n/** Data column — accessor-based column with sorting/resizing. */\nexport interface OnyxDataColumn<TData> extends OnyxColumnBase<TData> {\n  kind: \"data\";\n  /** Override the sort icon for this column. */\n  icon?: (sorted: SortDirection) => IconFunctionComponent;\n}\n\n/** Display column — non-accessor column with custom rendering. */\nexport interface OnyxDisplayColumn<TData> extends OnyxColumnBase<TData> {\n  kind: \"display\";\n}\n\n/** Actions column — fixed column with visibility/sorting popovers. */\nexport interface OnyxActionsColumn<TData> extends OnyxColumnBase<TData> {\n  kind: \"actions\";\n  /** Show column visibility popover. @default true */\n  showColumnVisibility?: boolean;\n  /** Show sorting popover. @default true */\n  showSorting?: boolean;\n  /** Footer text for the sorting popover. */\n  sortingFooterText?: string;\n}\n\n/** Discriminated union of all column types. */\nexport type OnyxColumnDef<TData> =\n  | OnyxQualifierColumn<TData>\n  | OnyxDataColumn<TData>\n  | OnyxDisplayColumn<TData>\n  | OnyxActionsColumn<TData>;\n\n// ---------------------------------------------------------------------------\n// Server-side pagination / sorting / search\n// ---------------------------------------------------------------------------\n\n/** Server-side configuration for DataTable. */\nexport interface ServerSideConfig {\n  /** Total row count from the server. Used to compute page count. */\n  totalItems: number;\n  /** Whether data is currently being fetched. Shows loading state. */\n  isLoading?: boolean;\n  /** Fired when sorting state changes. */\n  onSortingChange: (sorting: SortingState) => void;\n  /** Fired when pagination changes (including page resets from sort/search). */\n  onPaginationChange: (pageIndex: number, pageSize: number) => void;\n  /** Fired when searchTerm changes. */\n  onSearchTermChange: (searchTerm: string) => void;\n}\n\n// ---------------------------------------------------------------------------\n// DataTable props\n// ---------------------------------------------------------------------------\n\nexport interface DataTableDraggableConfig {\n  /** Called after a successful reorder with the new ID order and changed positions. */\n  onReorder: (\n    ids: string[],\n    changedOrders: Record<string, number>\n  ) => void | Promise<void>;\n}\n\n/** Footer configuration. Mode is derived from `selectionBehavior` automatically. */\nexport interface DataTableFooterConfig {\n  /** Handler for the \"Clear\" button (multi-select only). When omitted, the default clearSelection is used. */\n  onClear?: () => void;\n  /** Unit label for count pagination, e.g. \"users\", \"documents\" (multi-select only). */\n  units?: string;\n  /** Optional extra element rendered after the summary text, e.g. a download icon (summary mode only). */\n  leftExtra?: ReactNode;\n}\n\nexport interface DataTableProps<TData> {\n  /** Row data array. */\n  data: TData[];\n  /** Column definitions created via `createTableColumns()`. */\n  columns: OnyxColumnDef<TData>[];\n  /** Extract a unique string ID from each row. Used for stable row identity. */\n  getRowId: (row: TData) => string;\n  /** Rows per page. Set `Infinity` to disable pagination. @default 10 */\n  pageSize?: number;\n  /** Initial sorting state. */\n  initialSorting?: SortingState;\n  /** Initial column visibility state. */\n  initialColumnVisibility?: VisibilityState;\n  /** Initial row selection state. Keys are row IDs (from `getRowId`), values are `true`. */\n  initialRowSelection?: Record<string, boolean>;\n  /** When true AND `initialRowSelection` is non-empty, start in view-selected mode. @default false */\n  initialViewSelected?: boolean;\n  /** Enable drag-and-drop row reordering. */\n  draggable?: DataTableDraggableConfig;\n  /** Footer configuration. */\n  footer?: DataTableFooterConfig;\n  /** Table size variant. @default \"lg\" */\n  size?: TableSize;\n  /** Visual row variant. @default \"cards\" */\n  variant?: TableVariant;\n  /** Called whenever the set of selected row IDs changes. Receives IDs produced by `getRowId`. */\n  onSelectionChange?: (selectedIds: string[]) => void;\n  /** Called when a row is clicked (replaces the default selection toggle). */\n  onRowClick?: (row: TData) => void;\n  /** Search term for global text filtering. When provided, rows are filtered\n   *  to those containing the term in any accessor column value (case-insensitive). */\n  searchTerm?: string;\n  /**\n   * Max height of the scrollable table area. When set, the table body scrolls\n   * vertically while the header stays pinned at the top.\n   * Accepts a pixel number (e.g. `300`) or a CSS value string (e.g. `\"50vh\"`).\n   */\n  height?: number | string;\n  /**\n   * Enable server-side mode. When provided:\n   * - TanStack uses manualPagination/manualSorting/manualFiltering\n   * - `data` should contain only the current page's rows\n   * - Dragging is automatically disabled\n   * - Fires separate callbacks for sorting, pagination, and search changes\n   */\n  serverSide?: ServerSideConfig;\n  /** Content to render inside the table body when there are no rows. */\n  emptyState?: React.ReactNode;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/tag/README.md",
    "content": "# Tag\n\n**Import:** `import { Tag, type TagProps } from \"@opal/components\";`\n\nA small colored label used to annotate items with status, category, or metadata. Fixed at 1rem height, uses `font-figure-small-value`.\n\n## Props\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `title` | `string` | **(required)** | Tag label text |\n| `color` | `TagColor` | `\"gray\"` | Color variant |\n| `icon` | `IconFunctionComponent` | — | Optional icon before the title |\n\n### `TagColor`\n\n`\"green\" | \"blue\" | \"purple\" | \"amber\" | \"gray\"`\n\n| Color | Background | Text |\n|---|---|---|\n| `green` | `theme-green-01` | `theme-green-05` |\n| `blue` | `theme-blue-01` | `theme-blue-05` |\n| `purple` | `theme-purple-01` | `theme-purple-05` |\n| `amber` | `theme-amber-01` | `theme-amber-05` |\n| `gray` | `background-tint-02` | `text-03` |\n\n## Usage Examples\n\n```tsx\nimport { Tag } from \"@opal/components\";\nimport SvgStar from \"@opal/icons/star\";\n\n// Basic\n<Tag title=\"New\" color=\"green\" />\n\n// With icon\n<Tag icon={SvgStar} title=\"Featured\" color=\"purple\" />\n\n// Default gray\n<Tag title=\"Draft\" />\n```\n\n## Usage inside Content\n\nTag can be rendered as an accessory inside `Content`'s ContentMd via the `tag` prop:\n\n```tsx\nimport { Content } from \"@opal/layouts\";\nimport SvgSearch from \"@opal/icons/search\";\n\n<Content\n  icon={SvgSearch}\n  sizePreset=\"main-ui\"\n  title=\"My Item\"\n  tag={{ title: \"New\", color: \"green\" }}\n/>\n```\n"
  },
  {
    "path": "web/lib/opal/src/components/tag/Tag.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Tag } from \"@opal/components\";\nimport { SvgAlertCircle } from \"@opal/icons\";\n\nconst TAG_COLORS = [\"green\", \"purple\", \"blue\", \"gray\", \"amber\"] as const;\n\nconst meta: Meta<typeof Tag> = {\n  title: \"opal/components/Tag\",\n  component: Tag,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Tag>;\n\nexport const Default: Story = {\n  args: {\n    title: \"Label\",\n  },\n};\n\nexport const AllColors: Story = {\n  render: () => (\n    <div className=\"flex items-center gap-2\">\n      {TAG_COLORS.map((color) => (\n        <Tag key={color} title={color} color={color} />\n      ))}\n    </div>\n  ),\n};\n\nexport const WithIcon: Story = {\n  args: {\n    title: \"Alert\",\n    icon: SvgAlertCircle,\n  },\n};\n\nexport const AllColorsWithIcon: Story = {\n  render: () => (\n    <div className=\"flex items-center gap-2\">\n      {TAG_COLORS.map((color) => (\n        <Tag key={color} title={color} color={color} icon={SvgAlertCircle} />\n      ))}\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/tag/components.tsx",
    "content": "import \"@opal/components/tag/styles.css\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text } from \"@opal/components\";\nimport { cn } from \"@opal/utils\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype TagColor = \"green\" | \"purple\" | \"blue\" | \"gray\" | \"amber\";\n\ntype TagSize = \"sm\" | \"md\";\n\ninterface TagProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Tag label text. */\n  title: string | RichStr;\n\n  /** Color variant. Default: `\"gray\"`. */\n  color?: TagColor;\n\n  /** Size variant. Default: `\"sm\"`. */\n  size?: TagSize;\n}\n\n// ---------------------------------------------------------------------------\n// Color config\n// ---------------------------------------------------------------------------\n\nconst COLOR_CONFIG: Record<TagColor, { bg: string; text: string }> = {\n  green: { bg: \"bg-theme-green-01\", text: \"text-theme-green-05\" },\n  blue: { bg: \"bg-theme-blue-01\", text: \"text-theme-blue-05\" },\n  purple: { bg: \"bg-theme-purple-01\", text: \"text-theme-purple-05\" },\n  amber: { bg: \"bg-theme-amber-01\", text: \"text-theme-amber-05\" },\n  gray: { bg: \"bg-background-tint-02\", text: \"text-text-03\" },\n};\n\n// ---------------------------------------------------------------------------\n// Tag\n// ---------------------------------------------------------------------------\n\nfunction Tag({ icon: Icon, title, color = \"gray\", size = \"sm\" }: TagProps) {\n  const config = COLOR_CONFIG[color];\n\n  return (\n    <div\n      className={cn(\"opal-auxiliary-tag\", config.bg, config.text)}\n      data-size={size}\n    >\n      {Icon && (\n        <div className=\"opal-auxiliary-tag-icon-container\">\n          <Icon className={cn(\"opal-auxiliary-tag-icon\", config.text)} />\n        </div>\n      )}\n      <Text\n        font={size === \"md\" ? \"secondary-body\" : \"figure-small-value\"}\n        color=\"inherit\"\n        nowrap\n      >\n        {title}\n      </Text>\n    </div>\n  );\n}\n\nexport { Tag, type TagProps, type TagColor, type TagSize };\n"
  },
  {
    "path": "web/lib/opal/src/components/tag/styles.css",
    "content": "/* ---------------------------------------------------------------------------\n   AuxiliaryTag\n\n   Fixed height of 1rem (16px). Icon is 0.75rem (12px) with p-0.5 (2px)\n   padding to match the font-figure-small-value line-height (12px).\n   --------------------------------------------------------------------------- */\n\n.opal-auxiliary-tag {\n  @apply flex flex-row items-center shrink-0;\n  height: 1rem;\n  border-radius: 0.25rem;\n  padding: 0 0.25rem;\n  gap: 0;\n}\n\n.opal-auxiliary-tag[data-size=\"md\"] {\n  height: 1.375rem;\n  padding: 0 0.375rem;\n  border-radius: 0.375rem;\n}\n\n.opal-auxiliary-tag-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n  padding: 1px;\n}\n\n.opal-auxiliary-tag-icon {\n  width: 10px;\n  height: 10px;\n}\n\n.opal-auxiliary-tag-title {\n  white-space: nowrap;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/text/InlineMarkdown.tsx",
    "content": "import type { ReactNode } from \"react\";\nimport ReactMarkdown from \"react-markdown\";\nimport remarkGfm from \"remark-gfm\";\n\nimport type { RichStr } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// InlineMarkdown\n// ---------------------------------------------------------------------------\n\nconst SAFE_PROTOCOL = /^https?:|^mailto:|^tel:/i;\n\nconst ALLOWED_ELEMENTS = [\"p\", \"br\", \"a\", \"strong\", \"em\", \"code\", \"del\"];\n\nconst INLINE_COMPONENTS = {\n  p: ({ children }: { children?: ReactNode }) => (\n    <span className=\"block\">{children}</span>\n  ),\n  a: ({ children, href }: { children?: ReactNode; href?: string }) => {\n    if (!href || !SAFE_PROTOCOL.test(href)) {\n      return <>{children}</>;\n    }\n    const isHttp = /^https?:/i.test(href);\n    return (\n      <a\n        href={href}\n        className=\"underline underline-offset-2\"\n        {...(isHttp ? { target: \"_blank\", rel: \"noopener noreferrer\" } : {})}\n      >\n        {children}\n      </a>\n    );\n  },\n  code: ({ children }: { children?: ReactNode }) => (\n    <code className=\"[font-family:var(--font-dm-mono)] bg-background-tint-02 rounded px-1 py-0.5\">\n      {children}\n    </code>\n  ),\n};\n\ninterface InlineMarkdownProps {\n  content: string;\n}\n\nexport default function InlineMarkdown({ content }: InlineMarkdownProps) {\n  // Convert \\n to CommonMark hard line breaks (two trailing spaces + newline).\n  // react-markdown renders these as <br />, which inherits the parent's\n  // line-height for font-appropriate spacing.\n  const normalized = content.replace(/\\n/g, \"  \\n\");\n\n  return (\n    <ReactMarkdown\n      components={INLINE_COMPONENTS}\n      allowedElements={ALLOWED_ELEMENTS}\n      unwrapDisallowed\n      remarkPlugins={[remarkGfm]}\n    >\n      {normalized}\n    </ReactMarkdown>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// RichStr helpers\n// ---------------------------------------------------------------------------\n\nfunction isRichStr(value: unknown): value is RichStr {\n  return (\n    typeof value === \"object\" &&\n    value !== null &&\n    (value as RichStr).__brand === \"RichStr\"\n  );\n}\n\n/** Resolves `string | RichStr` to a `ReactNode`. */\nexport function resolveStr(value: string | RichStr): ReactNode {\n  return isRichStr(value) ? <InlineMarkdown content={value.raw} /> : value;\n}\n\n/** Extracts the plain string from `string | RichStr`. */\nexport function toPlainString(value: string | RichStr): string {\n  return isRichStr(value) ? value.raw : value;\n}\n"
  },
  {
    "path": "web/lib/opal/src/components/text/README.md",
    "content": "# Text\n\n**Import:** `import { Text, type TextProps, type TextFont, type TextColor } from \"@opal/components\";`\n\nA styled text component with string-enum props for font preset and color selection. Supports\ninline markdown rendering via `RichStr` — pass `markdown(\"*bold* text\")` as children to enable.\n\n## Props\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `font` | `TextFont` | `\"main-ui-body\"` | Font preset (size, weight, line-height) |\n| `color` | `TextColor` | `\"text-04\"` | Text color |\n| `as` | `\"p\" \\| \"span\" \\| \"li\" \\| \"h1\" \\| \"h2\" \\| \"h3\"` | `\"span\"` | HTML tag to render |\n| `nowrap` | `boolean` | `false` | Prevent text wrapping |\n| `children` | `string \\| RichStr` | — | Plain string or `markdown()` for inline markdown |\n\n### `TextFont`\n\n| Value | Size | Weight | Line-height |\n|---|---|---|---|\n| `\"heading-h1\"` | 48px | 600 | 64px |\n| `\"heading-h2\"` | 24px | 600 | 36px |\n| `\"heading-h3\"` | 18px | 600 | 28px |\n| `\"heading-h3-muted\"` | 18px | 500 | 28px |\n| `\"main-content-body\"` | 16px | 450 | 24px |\n| `\"main-content-muted\"` | 16px | 400 | 24px |\n| `\"main-content-emphasis\"` | 16px | 700 | 24px |\n| `\"main-content-mono\"` | 16px | 400 | 23px |\n| `\"main-ui-body\"` | 14px | 500 | 20px |\n| `\"main-ui-muted\"` | 14px | 400 | 20px |\n| `\"main-ui-action\"` | 14px | 600 | 20px |\n| `\"main-ui-mono\"` | 14px | 400 | 20px |\n| `\"secondary-body\"` | 12px | 400 | 18px |\n| `\"secondary-action\"` | 12px | 600 | 18px |\n| `\"secondary-mono\"` | 12px | 400 | 18px |\n| `\"figure-small-label\"` | 10px | 600 | 14px |\n| `\"figure-small-value\"` | 10px | 400 | 14px |\n| `\"figure-keystroke\"` | 11px | 400 | 16px |\n\n### `TextColor`\n\n`\"text-01\" | \"text-02\" | \"text-03\" | \"text-04\" | \"text-05\" | \"text-inverted-01\" | \"text-inverted-02\" | \"text-inverted-03\" | \"text-inverted-04\" | \"text-inverted-05\" | \"text-light-03\" | \"text-light-05\" | \"text-dark-03\" | \"text-dark-05\"`\n\n## Usage Examples\n\n```tsx\nimport { Text } from \"@opal/components\";\n\n// Basic\n<Text font=\"main-ui-body\" color=\"text-03\">\n  Hello world\n</Text>\n\n// Heading\n<Text font=\"heading-h2\" color=\"text-05\" as=\"h2\">\n  Page Title\n</Text>\n\n// Inverted (for dark backgrounds)\n<Text font=\"main-ui-body\" color=\"text-inverted-05\">\n  Light text on dark\n</Text>\n\n// As paragraph\n<Text font=\"main-content-body\" color=\"text-03\" as=\"p\">\n  A full paragraph of text.\n</Text>\n```\n\n## Inline Markdown via `RichStr`\n\nInline markdown is opt-in via the `markdown()` function, which returns a `RichStr`. When `Text`\nreceives a `RichStr` as children, it parses the inner string as inline markdown. Plain strings\nare rendered as-is — no parsing, no surprises. `Text` does not accept arbitrary JSX as children;\nuse `string | RichStr` only.\n\n```tsx\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\n\n// Inline markdown — bold, italic, links, code, strikethrough\n<Text font=\"main-ui-body\" color=\"text-05\">\n  {markdown(\"*Hello*, **world**! Visit [Onyx](https://onyx.app) and run `onyx start`.\")}\n</Text>\n\n// Plain string — no markdown parsing\n<Text font=\"main-ui-body\" color=\"text-03\">\n  This *stays* as-is, no formatting applied.\n</Text>\n```\n\nSupported syntax: `**bold**`, `*italic*`, `` `code` ``, `[link](url)`, `~~strikethrough~~`, `\\n` (newline → `<br />`).\n\nMarkdown rendering uses `react-markdown` internally, restricted to inline elements only.\n`http(s)` links open in a new tab; `mailto:` and `tel:` links open natively. Inline code\ninherits the parent font size and switches to the monospace family.\n\nNewlines (`\\n`) are converted to `<br />` elements that inherit the parent's line-height,\nso line spacing is proportional to the font size. For full block-level markdown (code blocks,\nheadings, lists), use `MinimalMarkdown` instead.\n\n### Using `RichStr` in component props\n\nComponents that want to support optional markdown in their text props should accept\n`string | RichStr`:\n\n```tsx\nimport type { RichStr } from \"@opal/types\";\n\ninterface MyComponentProps {\n  title: string | RichStr;\n  description?: string | RichStr;\n}\n```\n\nThis avoids API coloring — no `markdown` boolean needs to be threaded through intermediate\ncomponents. The decision to use markdown lives at the call site.\n\n## Compatibility\n\n`@/refresh-components/texts/Text` is an independent legacy component that implements the same\nfont/color presets via a boolean-flag API. It is **not** a wrapper around this component. New\ncode should import directly from `@opal/components`.\n"
  },
  {
    "path": "web/lib/opal/src/components/text/Text.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Text } from \"@opal/components\";\nimport type { TextFont, TextColor } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\n\nconst meta: Meta<typeof Text> = {\n  title: \"opal/components/Text\",\n  component: Text,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Text>;\n\n// ---------------------------------------------------------------------------\n// Basic\n// ---------------------------------------------------------------------------\n\nexport const Default: Story = {\n  args: {\n    children: \"The quick brown fox jumps over the lazy dog\",\n  },\n};\n\nexport const AsHeading: Story = {\n  args: {\n    font: \"heading-h2\",\n    color: \"text-05\",\n    as: \"h2\",\n    children: \"Page Title\",\n  },\n};\n\nexport const AsParagraph: Story = {\n  args: {\n    font: \"main-content-body\",\n    color: \"text-03\",\n    as: \"p\",\n    children: \"A full paragraph of body text rendered as a p element.\",\n  },\n};\n\nexport const Nowrap: Story = {\n  render: () => (\n    <div className=\"w-48 border border-border-02 rounded p-2\">\n      <Text font=\"main-ui-body\" color=\"text-05\" nowrap>\n        This text will not wrap even though the container is narrow\n      </Text>\n    </div>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Fonts\n// ---------------------------------------------------------------------------\n\nconst ALL_FONTS: TextFont[] = [\n  \"heading-h1\",\n  \"heading-h2\",\n  \"heading-h3\",\n  \"heading-h3-muted\",\n  \"main-content-body\",\n  \"main-content-muted\",\n  \"main-content-emphasis\",\n  \"main-content-mono\",\n  \"main-ui-body\",\n  \"main-ui-muted\",\n  \"main-ui-action\",\n  \"main-ui-mono\",\n  \"secondary-body\",\n  \"secondary-action\",\n  \"secondary-mono\",\n  \"figure-small-label\",\n  \"figure-small-value\",\n  \"figure-keystroke\",\n];\n\nexport const AllFonts: Story = {\n  render: () => (\n    <div className=\"space-y-2\">\n      {ALL_FONTS.map((font) => (\n        <div key={font} className=\"flex items-baseline gap-4\">\n          <span className=\"w-56 shrink-0 font-secondary-body text-text-03\">\n            {font}\n          </span>\n          <Text font={font} color=\"text-05\">\n            The quick brown fox\n          </Text>\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Colors\n// ---------------------------------------------------------------------------\n\nconst STANDARD_COLORS: TextColor[] = [\n  \"text-01\",\n  \"text-02\",\n  \"text-03\",\n  \"text-04\",\n  \"text-05\",\n];\n\nconst INVERTED_COLORS: TextColor[] = [\n  \"text-inverted-01\",\n  \"text-inverted-02\",\n  \"text-inverted-03\",\n  \"text-inverted-04\",\n  \"text-inverted-05\",\n];\n\nexport const AllColors: Story = {\n  render: () => (\n    <div className=\"space-y-2\">\n      {STANDARD_COLORS.map((color) => (\n        <div key={color} className=\"flex items-baseline gap-4\">\n          <span className=\"w-56 shrink-0 font-secondary-body text-text-03\">\n            {color}\n          </span>\n          <Text font=\"main-ui-body\" color={color}>\n            The quick brown fox\n          </Text>\n        </div>\n      ))}\n    </div>\n  ),\n};\n\nexport const InvertedColors: Story = {\n  render: () => (\n    <div className=\"bg-background-inverted-01 rounded-lg p-6 space-y-2\">\n      {INVERTED_COLORS.map((color) => (\n        <div key={color} className=\"flex items-baseline gap-4\">\n          <span\n            className=\"w-56 shrink-0 font-secondary-body\"\n            style={{ color: \"rgba(255,255,255,0.5)\" }}\n          >\n            {color}\n          </span>\n          <Text font=\"main-ui-body\" color={color}>\n            The quick brown fox\n          </Text>\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Markdown via RichStr\n// ---------------------------------------------------------------------------\n\nexport const MarkdownBold: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\"This is **bold** text\"),\n  },\n};\n\nexport const MarkdownItalic: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\"This is *italic* text\"),\n  },\n};\n\nexport const MarkdownCode: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\"Run `npm install` to get started\"),\n  },\n};\n\nexport const MarkdownLink: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\"Visit [Onyx](https://www.onyx.app/) for more info\"),\n  },\n};\n\nexport const MarkdownStrikethrough: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\"This is ~~deleted~~ text\"),\n  },\n};\n\nexport const MarkdownCombined: Story = {\n  args: {\n    font: \"main-ui-body\",\n    color: \"text-05\",\n    children: markdown(\n      \"*Hello*, **world**! Check out [Onyx](https://www.onyx.app/) and run `onyx start` to begin.\"\n    ),\n  },\n};\n\nexport const MarkdownAtDifferentSizes: Story = {\n  render: () => (\n    <div className=\"space-y-3\">\n      <Text font=\"heading-h2\" color=\"text-05\" as=\"h2\">\n        {markdown(\"**Heading** with *emphasis* and `code`\")}\n      </Text>\n      <Text font=\"main-content-body\" color=\"text-03\" as=\"p\">\n        {markdown(\"**Main content** with *emphasis* and `code`\")}\n      </Text>\n      <Text font=\"secondary-body\" color=\"text-03\">\n        {markdown(\"**Secondary** with *emphasis* and `code`\")}\n      </Text>\n    </div>\n  ),\n};\n\nexport const PlainStringNotParsed: Story = {\n  render: () => (\n    <div className=\"space-y-2\">\n      <Text font=\"main-ui-body\" color=\"text-05\">\n        {\n          \"This has *asterisks* and **double asterisks** but they are NOT parsed.\"\n        }\n      </Text>\n    </div>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Tag Variants\n// ---------------------------------------------------------------------------\n\nexport const TagVariants: Story = {\n  render: () => (\n    <div className=\"space-y-2\">\n      <Text font=\"main-ui-body\" color=\"text-05\">\n        Default (span): inline text\n      </Text>\n      <Text font=\"main-ui-body\" color=\"text-05\" as=\"p\">\n        Paragraph (p): block text\n      </Text>\n      <Text font=\"heading-h2\" color=\"text-05\" as=\"h2\">\n        Heading (h2): semantic heading\n      </Text>\n      <ul className=\"list-disc pl-6\">\n        <Text font=\"main-ui-body\" color=\"text-05\" as=\"li\">\n          List item (li): inside a list\n        </Text>\n      </ul>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/components/text/components.tsx",
    "content": "import type { HTMLAttributes } from \"react\";\n\nimport type { RichStr, WithoutStyles } from \"@opal/types\";\nimport { cn } from \"@opal/utils\";\nimport { resolveStr } from \"@opal/components/text/InlineMarkdown\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype TextFont =\n  | \"heading-h1\"\n  | \"heading-h2\"\n  | \"heading-h3\"\n  | \"heading-h3-muted\"\n  | \"main-content-body\"\n  | \"main-content-muted\"\n  | \"main-content-emphasis\"\n  | \"main-content-mono\"\n  | \"main-ui-body\"\n  | \"main-ui-muted\"\n  | \"main-ui-action\"\n  | \"main-ui-mono\"\n  | \"secondary-body\"\n  | \"secondary-action\"\n  | \"secondary-mono\"\n  | \"secondary-mono-label\"\n  | \"figure-small-label\"\n  | \"figure-small-value\"\n  | \"figure-keystroke\";\n\ntype TextColor =\n  | \"inherit\"\n  | \"text-01\"\n  | \"text-02\"\n  | \"text-03\"\n  | \"text-04\"\n  | \"text-05\"\n  | \"text-inverted-01\"\n  | \"text-inverted-02\"\n  | \"text-inverted-03\"\n  | \"text-inverted-04\"\n  | \"text-inverted-05\"\n  | \"text-light-03\"\n  | \"text-light-05\"\n  | \"text-dark-03\"\n  | \"text-dark-05\";\n\ninterface TextProps\n  extends WithoutStyles<\n    Omit<HTMLAttributes<HTMLElement>, \"color\" | \"children\">\n  > {\n  /** Font preset. Default: `\"main-ui-body\"`. */\n  font?: TextFont;\n\n  /** Color variant. Default: `\"text-04\"`. */\n  color?: TextColor;\n\n  /** HTML tag to render. Default: `\"span\"`. */\n  as?: \"p\" | \"span\" | \"li\" | \"h1\" | \"h2\" | \"h3\";\n\n  /** Prevent text wrapping. */\n  nowrap?: boolean;\n\n  /** Truncate text to N lines with ellipsis. `1` uses simple truncation; `2+` uses `-webkit-line-clamp`. */\n  maxLines?: number;\n\n  /** Plain string or `markdown()` for inline markdown. */\n  children?: string | RichStr;\n}\n\n// ---------------------------------------------------------------------------\n// Config\n// ---------------------------------------------------------------------------\n\nconst FONT_CONFIG: Record<TextFont, string> = {\n  \"heading-h1\": \"font-heading-h1\",\n  \"heading-h2\": \"font-heading-h2\",\n  \"heading-h3\": \"font-heading-h3\",\n  \"heading-h3-muted\": \"font-heading-h3-muted\",\n  \"main-content-body\": \"font-main-content-body\",\n  \"main-content-muted\": \"font-main-content-muted\",\n  \"main-content-emphasis\": \"font-main-content-emphasis\",\n  \"main-content-mono\": \"font-main-content-mono\",\n  \"main-ui-body\": \"font-main-ui-body\",\n  \"main-ui-muted\": \"font-main-ui-muted\",\n  \"main-ui-action\": \"font-main-ui-action\",\n  \"main-ui-mono\": \"font-main-ui-mono\",\n  \"secondary-body\": \"font-secondary-body\",\n  \"secondary-action\": \"font-secondary-action\",\n  \"secondary-mono\": \"font-secondary-mono\",\n  \"secondary-mono-label\": \"font-secondary-mono-label\",\n  \"figure-small-label\": \"font-figure-small-label\",\n  \"figure-small-value\": \"font-figure-small-value\",\n  \"figure-keystroke\": \"font-figure-keystroke\",\n};\n\nconst COLOR_CONFIG: Record<TextColor, string | null> = {\n  inherit: null,\n  \"text-01\": \"text-text-01\",\n  \"text-02\": \"text-text-02\",\n  \"text-03\": \"text-text-03\",\n  \"text-04\": \"text-text-04\",\n  \"text-05\": \"text-text-05\",\n  \"text-inverted-01\": \"text-text-inverted-01\",\n  \"text-inverted-02\": \"text-text-inverted-02\",\n  \"text-inverted-03\": \"text-text-inverted-03\",\n  \"text-inverted-04\": \"text-text-inverted-04\",\n  \"text-inverted-05\": \"text-text-inverted-05\",\n  \"text-light-03\": \"text-text-light-03\",\n  \"text-light-05\": \"text-text-light-05\",\n  \"text-dark-03\": \"text-text-dark-03\",\n  \"text-dark-05\": \"text-text-dark-05\",\n};\n\n// ---------------------------------------------------------------------------\n// Text\n// ---------------------------------------------------------------------------\n\nfunction Text({\n  font = \"main-ui-body\",\n  color = \"text-04\",\n  as: Tag = \"span\",\n  nowrap,\n  maxLines,\n  children,\n  ...rest\n}: TextProps) {\n  const resolvedClassName = cn(\n    \"px-[2px]\",\n    FONT_CONFIG[font],\n    COLOR_CONFIG[color],\n    nowrap && \"whitespace-nowrap\",\n    maxLines === 1 && \"truncate\",\n    maxLines && maxLines > 1 && \"overflow-hidden\"\n  );\n\n  const style =\n    maxLines && maxLines > 1\n      ? ({\n          display: \"-webkit-box\",\n          WebkitBoxOrient: \"vertical\",\n          WebkitLineClamp: maxLines,\n        } as React.CSSProperties)\n      : undefined;\n\n  return (\n    <Tag {...rest} className={resolvedClassName} style={style}>\n      {children && resolveStr(children)}\n    </Tag>\n  );\n}\n\nexport { Text, type TextProps, type TextFont, type TextColor };\n"
  },
  {
    "path": "web/lib/opal/src/components/tooltip.css",
    "content": "/* Shared tooltip content styling */\n\n.opal-tooltip {\n  z-index: var(--z-tooltip, 1300);\n  @apply rounded-08 px-3 py-2 text-sm\n    bg-background-neutral-dark-03 text-text-light-05\n    animate-in fade-in-0 zoom-in-95\n    data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95\n    data-[side=bottom]:slide-in-from-top-2\n    data-[side=left]:slide-in-from-right-2\n    data-[side=right]:slide-in-from-left-2\n    data-[side=top]:slide-in-from-bottom-2;\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/README.md",
    "content": "# Core\n\nThe lowest-level primitives of the Opal design system. Think of `core` like Rust's `core` crate — compiler intrinsics and foundational types — while higher-level modules (like Rust's `std`) provide the public-facing components that most consumers should reach for first.\n\nEnd-users *can* use these components directly when needed, but in most cases they should prefer the higher-level components (such as `Button`, `OpenButton`, `SelectButton`, etc.) that are built on top of `core`.\n\n## Contents\n\n| Primitive | Description | Docs |\n|-----------|-------------|------|\n| [Interactive](./interactive/) | Foundational interactive surface styling (`Stateless`, `Stateful`, `Container`, `Foldable`) | [README](./interactive/README.md) |\n| [Animations](./animations/) | Coordinated hover-state animations across grouped elements (`Hoverable`) | [README](./animations/README.md) |\n"
  },
  {
    "path": "web/lib/opal/src/core/animations/Hoverable.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Hoverable } from \"@opal/core\";\n\n// ---------------------------------------------------------------------------\n// Meta\n// ---------------------------------------------------------------------------\n\nconst meta: Meta = {\n  title: \"Core/Hoverable\",\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\n/** Group mode — hovering the root reveals hidden items. */\nexport const GroupMode: StoryObj = {\n  render: () => (\n    <Hoverable.Root group=\"demo\">\n      <div\n        style={{\n          display: \"flex\",\n          alignItems: \"center\",\n          gap: \"0.75rem\",\n          padding: \"1rem\",\n          border: \"1px solid var(--border-02)\",\n          borderRadius: \"0.5rem\",\n          minWidth: 260,\n        }}\n      >\n        <span style={{ color: \"var(--text-01)\" }}>Hover this card</span>\n        <Hoverable.Item group=\"demo\" variant=\"opacity-on-hover\">\n          <span style={{ color: \"var(--text-03)\" }}>✓ Revealed</span>\n        </Hoverable.Item>\n      </div>\n    </Hoverable.Root>\n  ),\n};\n\n/** Local mode — hovering the item itself reveals it (no Root needed). */\nexport const LocalMode: StoryObj = {\n  render: () => (\n    <div\n      style={{\n        display: \"flex\",\n        alignItems: \"center\",\n        gap: \"0.75rem\",\n        padding: \"1rem\",\n      }}\n    >\n      <span style={{ color: \"var(--text-01)\" }}>Hover the icon →</span>\n      <Hoverable.Item variant=\"opacity-on-hover\">\n        <span style={{ fontSize: \"1.25rem\" }}>🗑</span>\n      </Hoverable.Item>\n    </div>\n  ),\n};\n\n/** Multiple independent groups on the same page. */\nexport const MultipleGroups: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: \"0.75rem\" }}>\n      {([\"alpha\", \"beta\"] as const).map((group) => (\n        <Hoverable.Root key={group} group={group}>\n          <div\n            style={{\n              display: \"flex\",\n              alignItems: \"center\",\n              gap: \"0.75rem\",\n              padding: \"1rem\",\n              border: \"1px solid var(--border-02)\",\n              borderRadius: \"0.5rem\",\n            }}\n          >\n            <span style={{ color: \"var(--text-01)\" }}>Group: {group}</span>\n            <Hoverable.Item group={group} variant=\"opacity-on-hover\">\n              <span style={{ color: \"var(--text-03)\" }}>✓ Revealed</span>\n            </Hoverable.Item>\n          </div>\n        </Hoverable.Root>\n      ))}\n    </div>\n  ),\n};\n\n/** Multiple items revealed by a single root. */\nexport const MultipleItems: StoryObj = {\n  render: () => (\n    <Hoverable.Root group=\"multi\">\n      <div\n        style={{\n          display: \"flex\",\n          alignItems: \"center\",\n          gap: \"0.75rem\",\n          padding: \"1rem\",\n          border: \"1px solid var(--border-02)\",\n          borderRadius: \"0.5rem\",\n        }}\n      >\n        <span style={{ color: \"var(--text-01)\" }}>Hover to reveal all</span>\n        <Hoverable.Item group=\"multi\" variant=\"opacity-on-hover\">\n          <span>Edit</span>\n        </Hoverable.Item>\n        <Hoverable.Item group=\"multi\" variant=\"opacity-on-hover\">\n          <span>Delete</span>\n        </Hoverable.Item>\n        <Hoverable.Item group=\"multi\" variant=\"opacity-on-hover\">\n          <span>Share</span>\n        </Hoverable.Item>\n      </div>\n    </Hoverable.Root>\n  ),\n};\n\n/** Nested groups — inner and outer hover independently. */\nexport const NestedGroups: StoryObj = {\n  render: () => (\n    <Hoverable.Root group=\"outer\">\n      <div\n        style={{\n          padding: \"1rem\",\n          border: \"1px solid var(--border-02)\",\n          borderRadius: \"0.5rem\",\n          display: \"flex\",\n          flexDirection: \"column\",\n          gap: \"0.75rem\",\n        }}\n      >\n        <div style={{ display: \"flex\", alignItems: \"center\", gap: \"0.75rem\" }}>\n          <span style={{ color: \"var(--text-01)\" }}>Outer card</span>\n          <Hoverable.Item group=\"outer\" variant=\"opacity-on-hover\">\n            <span style={{ color: \"var(--text-03)\" }}>Outer action</span>\n          </Hoverable.Item>\n        </div>\n\n        <Hoverable.Root group=\"inner\">\n          <div\n            style={{\n              display: \"flex\",\n              alignItems: \"center\",\n              gap: \"0.75rem\",\n              padding: \"0.75rem\",\n              border: \"1px solid var(--border-03)\",\n              borderRadius: \"0.375rem\",\n            }}\n          >\n            <span style={{ color: \"var(--text-02)\" }}>Inner card</span>\n            <Hoverable.Item group=\"inner\" variant=\"opacity-on-hover\">\n              <span style={{ color: \"var(--text-03)\" }}>Inner action</span>\n            </Hoverable.Item>\n          </div>\n        </Hoverable.Root>\n      </div>\n    </Hoverable.Root>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/animations/README.md",
    "content": "# Animations (Hoverable)\n\n**Import:** `import { Hoverable } from \"@opal/core\";`\n\nProvides coordinated hover-state animations across a group of elements. A `Hoverable.Root` tracks hover state and broadcasts it to `Hoverable.Item` descendants via a per-group React context.\n\n## Sub-components\n\n| Sub-component | Role |\n|---|---|\n| `Hoverable.Root` | Wraps a group of items. Tracks mouse enter/leave and provides hover state via context. |\n| `Hoverable.Item` | Reads hover state from its group's context. Applies a CSS class (`opal-hoverable-item`) with variant-specific transitions (e.g. opacity, scale). |\n\n## Props\n\n### Hoverable.Root\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `group` | `string` | `\"default\"` | Named group for independent hover tracking |\n\n### Hoverable.Item\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `group` | `string` | `\"default\"` | Which group to listen to |\n| `variant` | `HoverableItemVariant` | `\"fade\"` | Animation variant |\n\n## Usage\n\n```tsx\nimport { Hoverable } from \"@opal/core\";\n\n<Hoverable.Root group=\"card\">\n  <div>\n    <Hoverable.Item group=\"card\" variant=\"fade\">\n      <span>Appears on hover</span>\n    </Hoverable.Item>\n  </div>\n</Hoverable.Root>\n```\n"
  },
  {
    "path": "web/lib/opal/src/core/animations/components.tsx",
    "content": "\"use client\";\n\nimport \"@opal/core/animations/styles.css\";\nimport React, { createContext, useContext, useState, useCallback } from \"react\";\nimport { cn } from \"@opal/utils\";\nimport type { WithoutStyles, ExtremaSizeVariants } from \"@opal/types\";\nimport { widthVariants } from \"@opal/shared\";\n\n// ---------------------------------------------------------------------------\n// Context-per-group registry\n// ---------------------------------------------------------------------------\n\n/**\n * Lazily-created map of group names to React contexts.\n *\n * Each group gets its own `React.Context<boolean | null>` so that a\n * `Hoverable.Item` only re-renders when its *own* group's hover state\n * changes — not when any unrelated group changes.\n *\n * The default value is `null` (no provider found), which lets\n * `Hoverable.Item` distinguish \"no Root ancestor\" from \"Root says\n * not hovered\" and throw when `group` was explicitly specified.\n */\nconst contextMap = new Map<string, React.Context<boolean | null>>();\n\nfunction getOrCreateContext(group: string): React.Context<boolean | null> {\n  let ctx = contextMap.get(group);\n  if (!ctx) {\n    ctx = createContext<boolean | null>(null);\n    ctx.displayName = `HoverableContext(${group})`;\n    contextMap.set(group, ctx);\n  }\n  return ctx;\n}\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface HoverableRootProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  children: React.ReactNode;\n  group: string;\n  /** Width preset. @default \"auto\" */\n  widthVariant?: ExtremaSizeVariants;\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\ntype HoverableItemVariant = \"opacity-on-hover\";\n\ninterface HoverableItemProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  children: React.ReactNode;\n  group?: string;\n  variant?: HoverableItemVariant;\n  /** Ref forwarded to the item `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// HoverableRoot\n// ---------------------------------------------------------------------------\n\n/**\n * Hover-tracking container for a named group.\n *\n * Wraps children in a `<div>` that tracks mouse-enter / mouse-leave and\n * provides the hover state via a per-group React context.\n *\n * Nesting works because each `Hoverable.Root` creates a **new** context\n * provider that shadows the parent — so an inner `Hoverable.Item group=\"b\"`\n * reads from the inner provider, not the outer `group=\"a\"` provider.\n *\n * @example\n * ```tsx\n * <Hoverable.Root group=\"card\">\n *   <Card>\n *     <Hoverable.Item group=\"card\" variant=\"opacity-on-hover\">\n *       <TrashIcon />\n *     </Hoverable.Item>\n *   </Card>\n * </Hoverable.Root>\n * ```\n */\nfunction HoverableRoot({\n  group,\n  children,\n  widthVariant = \"full\",\n  ref,\n  onMouseEnter: consumerMouseEnter,\n  onMouseLeave: consumerMouseLeave,\n  onFocusCapture: consumerFocusCapture,\n  onBlurCapture: consumerBlurCapture,\n  ...props\n}: HoverableRootProps) {\n  const [hovered, setHovered] = useState(false);\n  const [focused, setFocused] = useState(false);\n\n  const onMouseEnter = useCallback(\n    (e: React.MouseEvent<HTMLDivElement>) => {\n      setHovered(true);\n      consumerMouseEnter?.(e);\n    },\n    [consumerMouseEnter]\n  );\n\n  const onMouseLeave = useCallback(\n    (e: React.MouseEvent<HTMLDivElement>) => {\n      setHovered(false);\n      consumerMouseLeave?.(e);\n    },\n    [consumerMouseLeave]\n  );\n\n  const onFocusCapture = useCallback(\n    (e: React.FocusEvent<HTMLDivElement>) => {\n      setFocused(true);\n      consumerFocusCapture?.(e);\n    },\n    [consumerFocusCapture]\n  );\n\n  const onBlurCapture = useCallback(\n    (e: React.FocusEvent<HTMLDivElement>) => {\n      if (\n        !(e.relatedTarget instanceof Node) ||\n        !e.currentTarget.contains(e.relatedTarget)\n      ) {\n        setFocused(false);\n      }\n      consumerBlurCapture?.(e);\n    },\n    [consumerBlurCapture]\n  );\n\n  const active = hovered || focused;\n  const GroupContext = getOrCreateContext(group);\n\n  return (\n    <GroupContext.Provider value={active}>\n      <div\n        {...props}\n        ref={ref}\n        className={cn(widthVariants[widthVariant])}\n        onMouseEnter={onMouseEnter}\n        onMouseLeave={onMouseLeave}\n        onFocusCapture={onFocusCapture}\n        onBlurCapture={onBlurCapture}\n      >\n        {children}\n      </div>\n    </GroupContext.Provider>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// HoverableItem\n// ---------------------------------------------------------------------------\n\n/**\n * An element whose visibility is controlled by hover state.\n *\n * **Local mode** (`group` omitted): the item handles hover on its own\n * element via CSS `:hover`. This is the core abstraction.\n *\n * **Group mode** (`group` provided): visibility is driven by a matching\n * `Hoverable.Root` ancestor's hover state via React context. If no\n * matching Root is found, an error is thrown.\n *\n * Uses data-attributes for variant styling (see `styles.css`).\n *\n * @example\n * ```tsx\n * // Local mode — hover on the item itself\n * <Hoverable.Item variant=\"opacity-on-hover\">\n *   <TrashIcon />\n * </Hoverable.Item>\n *\n * // Group mode — hover on the Root reveals the item\n * <Hoverable.Root group=\"card\">\n *   <Hoverable.Item group=\"card\" variant=\"opacity-on-hover\">\n *     <TrashIcon />\n *   </Hoverable.Item>\n * </Hoverable.Root>\n * ```\n *\n * @throws If `group` is specified but no matching `Hoverable.Root` ancestor exists.\n */\nfunction HoverableItem({\n  group,\n  variant = \"opacity-on-hover\",\n  children,\n  ref,\n  ...props\n}: HoverableItemProps) {\n  const contextValue = useContext(\n    group ? getOrCreateContext(group) : NOOP_CONTEXT\n  );\n\n  if (group && contextValue === null) {\n    throw new Error(\n      `Hoverable.Item group=\"${group}\" has no matching Hoverable.Root ancestor. ` +\n        `Either wrap it in <Hoverable.Root group=\"${group}\"> or remove the group prop for local hover.`\n    );\n  }\n\n  const isLocal = group === undefined;\n\n  return (\n    <div\n      {...props}\n      ref={ref}\n      className={cn(\"hoverable-item\")}\n      data-hoverable-variant={variant}\n      data-hoverable-active={\n        isLocal ? undefined : contextValue ? \"true\" : undefined\n      }\n      data-hoverable-local={isLocal ? \"true\" : undefined}\n    >\n      {children}\n    </div>\n  );\n}\n\n/** Stable context used when no group is specified (local mode). */\nconst NOOP_CONTEXT = createContext<boolean | null>(null);\n\n// ---------------------------------------------------------------------------\n// Compound export\n// ---------------------------------------------------------------------------\n\n/**\n * Hoverable compound component for hover-to-reveal patterns.\n *\n * Provides two sub-components:\n *\n * - `Hoverable.Root` — A container that tracks hover state for a named group\n *   and provides it via React context.\n *\n * - `Hoverable.Item` — The core abstraction. On its own (no `group`), it\n *   applies local CSS `:hover` for the variant effect. When `group` is\n *   specified, it reads hover state from the nearest matching\n *   `Hoverable.Root` — and throws if no matching Root is found.\n *\n * Supports nesting: a child `Hoverable.Root` shadows the parent's context,\n * so each group's items only respond to their own root's hover.\n *\n * @example\n * ```tsx\n * import { Hoverable } from \"@opal/core\";\n *\n * // Group mode — hovering the card reveals the trash icon\n * <Hoverable.Root group=\"card\">\n *   <Card>\n *     <span>Card content</span>\n *     <Hoverable.Item group=\"card\" variant=\"opacity-on-hover\">\n *       <TrashIcon />\n *     </Hoverable.Item>\n *   </Card>\n * </Hoverable.Root>\n *\n * // Local mode — hovering the item itself reveals it\n * <Hoverable.Item variant=\"opacity-on-hover\">\n *   <TrashIcon />\n * </Hoverable.Item>\n * ```\n */\nconst Hoverable = {\n  Root: HoverableRoot,\n  Item: HoverableItem,\n};\n\nexport {\n  Hoverable,\n  type HoverableRootProps,\n  type HoverableItemProps,\n  type HoverableItemVariant,\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/animations/styles.css",
    "content": "/* Hoverable — item transitions */\n.hoverable-item {\n  transition: opacity 150ms ease-in-out;\n}\n\n.hoverable-item[data-hoverable-variant=\"opacity-on-hover\"] {\n  opacity: 0;\n}\n\n/* Group mode — Root controls visibility via React context */\n.hoverable-item[data-hoverable-variant=\"opacity-on-hover\"][data-hoverable-active=\"true\"] {\n  opacity: 1;\n}\n\n/* Local mode — item handles its own :hover */\n.hoverable-item[data-hoverable-variant=\"opacity-on-hover\"][data-hoverable-local=\"true\"]:hover {\n  opacity: 1;\n}\n\n/* Focus — item (or a focusable descendant) receives keyboard focus */\n.hoverable-item[data-hoverable-variant=\"opacity-on-hover\"]:has(:focus-visible) {\n  opacity: 1;\n}\n\n/* Focus ring on keyboard focus */\n.hoverable-item:focus-visible {\n  outline: 2px solid var(--border-04);\n  outline-offset: 2px;\n  border-radius: 0.25rem;\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/disabled/components.tsx",
    "content": "import \"@opal/core/disabled/styles.css\";\nimport React from \"react\";\nimport { Slot } from \"@radix-ui/react-slot\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface DisabledProps extends React.HTMLAttributes<HTMLElement> {\n  ref?: React.Ref<HTMLElement>;\n\n  /**\n   * When truthy, applies disabled styling to child elements.\n   */\n  disabled?: boolean;\n\n  /**\n   * When `true`, re-enables pointer events while keeping the disabled\n   * visual treatment. Useful for elements that need to show tooltips or\n   * error messages on click.\n   * @default false\n   */\n  allowClick?: boolean;\n\n  children: React.ReactElement;\n}\n\n// ---------------------------------------------------------------------------\n// Disabled\n// ---------------------------------------------------------------------------\n\n/**\n * Wrapper component that applies baseline disabled CSS (opacity, cursor,\n * pointer-events) to its child element.\n *\n * Uses Radix `Slot` — merges props onto the single child element without\n * adding any DOM node. Works correctly inside Radix `asChild` chains.\n *\n * @example\n * ```tsx\n * <Disabled disabled={!canSubmit}>\n *   <div>...</div>\n * </Disabled>\n * ```\n */\nfunction Disabled({\n  disabled,\n  allowClick,\n  children,\n  ref,\n  ...rest\n}: DisabledProps) {\n  return (\n    <Slot\n      ref={ref}\n      {...rest}\n      aria-disabled={disabled || undefined}\n      data-opal-disabled={disabled || undefined}\n      data-allow-click={disabled && allowClick ? \"\" : undefined}\n    >\n      {children}\n    </Slot>\n  );\n}\n\nexport { Disabled, type DisabledProps };\n"
  },
  {
    "path": "web/lib/opal/src/core/disabled/styles.css",
    "content": "/* Disabled — baseline disabled visuals via Radix Slot (no extra DOM node).\n *\n * [data-opal-disabled]                   → cursor + pointer-events for all\n * [data-opal-disabled]:not(.interactive) → opacity for non-Interactive elements\n * [data-opal-disabled][data-allow-click] → re-enables clicks\n *\n * Interactive elements (.interactive) handle their own disabled colors via\n * variant CSS — no blanket opacity is applied to them. Pointer-events are\n * re-enabled so the JS layer can suppress onClick.\n */\n\n[data-opal-disabled] {\n  @apply cursor-not-allowed select-none;\n  pointer-events: none;\n}\n\n/* Only apply blanket opacity to non-Interactive elements.\n   Interactive variants define their own disabled backgrounds/foregrounds. */\n[data-opal-disabled]:not(.interactive) {\n  @apply opacity-50;\n}\n\n/* Re-enable pointer-events so the Interactive JS layer can suppress onClick. */\n[data-opal-disabled].interactive {\n  pointer-events: auto;\n}\n\n[data-opal-disabled][data-allow-click] {\n  pointer-events: auto;\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/index.ts",
    "content": "/* Disabled */\nexport { Disabled, type DisabledProps } from \"@opal/core/disabled/components\";\n\n/* Animations (formerly Hoverable) */\nexport {\n  Hoverable,\n  type HoverableRootProps,\n  type HoverableItemProps,\n  type HoverableItemVariant,\n} from \"@opal/core/animations/components\";\n\n/* Interactive — compound component */\nimport { InteractiveStateless } from \"@opal/core/interactive/stateless/components\";\nimport { InteractiveStateful } from \"@opal/core/interactive/stateful/components\";\nimport { InteractiveContainer } from \"@opal/core/interactive/container/components\";\nimport { InteractiveSimple } from \"@opal/core/interactive/simple/components\";\nimport { Foldable } from \"@opal/core/interactive/foldable/components\";\n\nconst Interactive = {\n  Simple: InteractiveSimple,\n  Stateless: InteractiveStateless,\n  Stateful: InteractiveStateful,\n  Container: InteractiveContainer,\n  Foldable,\n};\n\nexport { Interactive };\n\n/* Interactive — types */\nexport type {\n  InteractiveStatelessProps,\n  InteractiveStatelessVariant,\n  InteractiveStatelessProminence,\n  InteractiveStatelessInteraction,\n} from \"@opal/core/interactive/stateless/components\";\n\nexport type {\n  InteractiveStatefulProps,\n  InteractiveStatefulVariant,\n  InteractiveStatefulState,\n  InteractiveStatefulInteraction,\n} from \"@opal/core/interactive/stateful/components\";\n\nexport type {\n  InteractiveContainerProps,\n  InteractiveContainerRoundingVariant,\n} from \"@opal/core/interactive/container/components\";\n\nexport type { FoldableProps } from \"@opal/core/interactive/foldable/components\";\n\nexport type { InteractiveSimpleProps } from \"@opal/core/interactive/simple/components\";\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/Interactive.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Interactive, Disabled } from \"@opal/core\";\n\n// ---------------------------------------------------------------------------\n// Variant / Prominence mappings for the matrix story\n// ---------------------------------------------------------------------------\n\nconst VARIANT_PROMINENCE_MAP: Record<string, string[]> = {\n  default: [\"primary\", \"secondary\", \"tertiary\", \"internal\"],\n  action: [\"primary\", \"secondary\", \"tertiary\", \"internal\"],\n  danger: [\"primary\", \"secondary\", \"tertiary\", \"internal\"],\n};\n\nconst SIZE_VARIANTS = [\"lg\", \"md\", \"sm\", \"xs\", \"2xs\", \"fit\"] as const;\nconst ROUNDING_VARIANTS = [\"default\", \"compact\", \"mini\"] as const;\n\n// ---------------------------------------------------------------------------\n// Meta\n// ---------------------------------------------------------------------------\n\nconst meta: Meta = {\n  title: \"Core/Interactive\",\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\n/** Basic Interactive.Stateless + Container with text content. */\nexport const Default: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\", alignItems: \"center\" }}>\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Secondary</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"primary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Primary</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"tertiary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Tertiary</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n    </div>\n  ),\n};\n\n/** All variant x prominence combinations displayed in a grid. */\nexport const VariantMatrix: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: \"1.5rem\" }}>\n      {Object.entries(VARIANT_PROMINENCE_MAP).map(([variant, prominences]) => (\n        <div key={variant}>\n          <div\n            style={{\n              fontSize: \"0.75rem\",\n              fontWeight: 600,\n              textTransform: \"uppercase\",\n              letterSpacing: \"0.05em\",\n              paddingBottom: \"0.5rem\",\n            }}\n          >\n            {variant}\n          </div>\n\n          {prominences.length === 0 ? (\n            <Interactive.Stateless variant=\"none\" onClick={() => {}}>\n              <Interactive.Container border>\n                <span style={{ color: \"var(--text-01)\" }}>\n                  none (no prominence)\n                </span>\n              </Interactive.Container>\n            </Interactive.Stateless>\n          ) : (\n            <div style={{ display: \"flex\", gap: \"0.5rem\", flexWrap: \"wrap\" }}>\n              {prominences.map((prominence) => (\n                <div\n                  key={prominence}\n                  style={{\n                    display: \"flex\",\n                    flexDirection: \"column\",\n                    alignItems: \"center\",\n                    gap: \"0.25rem\",\n                  }}\n                >\n                  <Interactive.Stateless\n                    // Cast required because the discriminated union can't be\n                    // resolved from dynamic strings at the type level.\n                    {...({ variant, prominence } as any)}\n                    onClick={() => {}}\n                  >\n                    <Interactive.Container border>\n                      <span>{prominence}</span>\n                    </Interactive.Container>\n                  </Interactive.Stateless>\n                  <span\n                    style={{\n                      fontSize: \"0.625rem\",\n                      opacity: 0.6,\n                    }}\n                  >\n                    {prominence}\n                  </span>\n                </div>\n              ))}\n            </div>\n          )}\n        </div>\n      ))}\n    </div>\n  ),\n};\n\n/** All heightVariant sizes (lg, md, sm, xs, 2xs, fit). */\nexport const Sizes: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", alignItems: \"center\", gap: \"0.75rem\" }}>\n      {SIZE_VARIANTS.map((size) => (\n        <Interactive.Stateless\n          key={size}\n          variant=\"default\"\n          prominence=\"secondary\"\n          onClick={() => {}}\n        >\n          <Interactive.Container border heightVariant={size}>\n            <span>{size}</span>\n          </Interactive.Container>\n        </Interactive.Stateless>\n      ))}\n    </div>\n  ),\n};\n\n/** Container with widthVariant=\"full\" stretching to fill its parent. */\nexport const WidthFull: StoryObj = {\n  render: () => (\n    <div style={{ width: 400 }}>\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border widthVariant=\"full\">\n          <span>Full width container</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n    </div>\n  ),\n};\n\n/** All rounding variants side by side. */\nexport const Rounding: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\" }}>\n      {ROUNDING_VARIANTS.map((rounding) => (\n        <Interactive.Stateless\n          key={rounding}\n          variant=\"default\"\n          prominence=\"secondary\"\n          onClick={() => {}}\n        >\n          <Interactive.Container border roundingVariant={rounding}>\n            <span>{rounding}</span>\n          </Interactive.Container>\n        </Interactive.Stateless>\n      ))}\n    </div>\n  ),\n};\n\n/** Disabled state prevents clicks and shows disabled styling. */\nexport const DisabledStory: StoryObj = {\n  name: \"Disabled\",\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\" }}>\n      <Disabled disabled>\n        <Interactive.Stateless\n          variant=\"default\"\n          prominence=\"secondary\"\n          onClick={() => {}}\n        >\n          <Interactive.Container border>\n            <span>Disabled</span>\n          </Interactive.Container>\n        </Interactive.Stateless>\n      </Disabled>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Enabled</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n    </div>\n  ),\n};\n\n/** Interaction override forces the hover/active visual state. */\nexport const Interaction: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\" }}>\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        interaction=\"hover\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Forced hover</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        interaction=\"active\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Forced active</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Normal (rest)</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n    </div>\n  ),\n};\n\n/** Container with border={true}. */\nexport const WithBorder: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\" }}>\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>With border</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n\n      <Interactive.Stateless\n        variant=\"default\"\n        prominence=\"secondary\"\n        onClick={() => {}}\n      >\n        <Interactive.Container>\n          <span>Without border</span>\n        </Interactive.Container>\n      </Interactive.Stateless>\n    </div>\n  ),\n};\n\n/** Using href to render as a link. */\nexport const AsLink: StoryObj = {\n  render: () => (\n    <Interactive.Stateless variant=\"action\" href=\"/settings\">\n      <Interactive.Container border>\n        <span>Go to Settings</span>\n      </Interactive.Container>\n    </Interactive.Stateless>\n  ),\n};\n\n/** Stateful select variant with selected and unselected states. */\nexport const SelectVariant: StoryObj = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: \"0.75rem\" }}>\n      <Interactive.Stateful\n        variant=\"select-light\"\n        state=\"selected\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Selected (light)</span>\n        </Interactive.Container>\n      </Interactive.Stateful>\n\n      <Interactive.Stateful\n        variant=\"select-light\"\n        state=\"empty\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Unselected (light)</span>\n        </Interactive.Container>\n      </Interactive.Stateful>\n\n      <Interactive.Stateful\n        variant=\"select-heavy\"\n        state=\"selected\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Selected (heavy)</span>\n        </Interactive.Container>\n      </Interactive.Stateful>\n\n      <Interactive.Stateful\n        variant=\"select-heavy\"\n        state=\"empty\"\n        onClick={() => {}}\n      >\n        <Interactive.Container border>\n          <span>Unselected (heavy)</span>\n        </Interactive.Container>\n      </Interactive.Stateful>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/README.md",
    "content": "# Interactive\n\nThe foundational layer for all clickable surfaces in the design system. Defines hover, active, disabled, and interaction-override state styling in a single place. Higher-level components (Button, SelectButton, OpenButton, etc.) compose on top of it.\n\n## Sub-components\n\n| Sub-component | Role | Docs |\n|---|---|---|\n| `Interactive.Stateless` | Stateless surfaces (buttons, links, cards). Variant × prominence color matrix. | [README](./stateless/README.md) |\n| `Interactive.Stateful` | Stateful surfaces (toggles, sidebar items). Variant × state color matrix. | [README](./stateful/README.md) |\n| `Interactive.Container` | Structural box with height, rounding, padding, and optional border. Shared by both. | [README](./container/README.md) |\n| `Interactive.Foldable` | Zero-width collapsible wrapper with CSS grid animation. | [README](./foldable/README.md) |\n\n## Foreground colour system\n\nEach variant/prominence/state combination sets two CSS custom properties:\n- `--interactive-foreground` — text color\n- `--interactive-foreground-icon` — icon color\n\nBoth are registered via `@property` as `<color>` in `shared.css`, enabling the browser to interpolate them directly on the parent `.interactive` element. Children read the variables with no independent transitions, guaranteeing perfect sync.\n\n**Opt-in classes:**\n- `.interactive-foreground` — sets `color: var(--interactive-foreground)`\n- `.interactive-foreground-icon` — sets `color: var(--interactive-foreground-icon)`\n\n## Interaction override\n\nBoth `Stateless` and `Stateful` support `interaction?: \"rest\" | \"hover\" | \"active\"` for JS-controlled visual state overrides via `data-interaction`.\n\n## Colour tables\n\n### Stateless: Default\n\n**Background**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `theme-primary-05` | `background-tint-01` | `transparent` | `transparent` |\n| **Hover** | `theme-primary-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |\n| **Active** | `theme-primary-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |\n| **Disabled** | `background-neutral-04` | `background-neutral-03` | `transparent` | `transparent` |\n\n**Foreground**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `text-inverted-05` | `text-03` | `text-03` | `text-03` |\n| **Hover** | `text-inverted-05` | `text-04` | `text-04` | `text-04` |\n| **Active** | `text-inverted-05` | `text-05` | `text-05` | `text-05` |\n| **Disabled** | `text-inverted-04` | `text-01` | `text-01` | `text-01` |\n\n### Stateless: Action\n\n**Background**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `action-link-05` | `background-tint-01` | `transparent` | `transparent` |\n| **Hover** | `action-link-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |\n| **Active** | `action-link-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |\n| **Disabled** | `action-link-02` | `background-neutral-02` | `transparent` | `transparent` |\n\n**Foreground**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |\n| **Hover** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |\n| **Active** | `text-light-05` | `action-text-link-05` | `action-text-link-05` | `action-text-link-05` |\n| **Disabled** | `text-01` | `action-link-03` | `action-link-03` | `action-link-03` |\n\n### Stateless: Danger\n\n**Background**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `action-danger-05` | `background-tint-01` | `transparent` | `transparent` |\n| **Hover** | `action-danger-04` | `background-tint-02` | `background-tint-02` | `background-tint-00` |\n| **Active** | `action-danger-06` | `background-tint-00` | `background-tint-00` | `background-tint-00` |\n| **Disabled** | `action-danger-02` | `background-neutral-02` | `transparent` | `transparent` |\n\n**Foreground**\n\n| | Primary | Secondary | Tertiary | Internal |\n|---|---|---|---|---|\n| **Rest** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |\n| **Hover** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |\n| **Active** | `text-light-05` | `action-text-danger-05` | `action-text-danger-05` | `action-text-danger-05` |\n| **Disabled** | `text-01` | `action-danger-03` | `action-danger-03` | `action-danger-03` |\n\n### Stateful: Select-Heavy / Select-Light\n\n**Background (empty/filled)**\n\n| | Select-Heavy | Select-Light |\n|---|---|---|\n| **Rest** | `transparent` | `transparent` |\n| **Hover** | `background-tint-02` | `background-tint-02` |\n| **Active** | `background-neutral-00` | `background-neutral-00` |\n| **Disabled** | `transparent` | `transparent` |\n\n**Background (selected)**\n\n| | Select-Heavy | Select-Light |\n|---|---|---|\n| **Rest** | `action-link-01` | `transparent` |\n| **Hover** | `background-tint-02` | `background-tint-02` |\n| **Active** | `background-tint-00` | `background-tint-00` |\n| **Disabled** | `transparent` | `transparent` |\n\n**Foreground (empty)**\n\n| | Text | Icon |\n|---|---|---|\n| **Rest** | `text-04` | `text-03` |\n| **Hover** | `text-04` | `text-04` |\n| **Active** | `text-05` | `text-05` |\n| **Disabled** | `text-01` | `text-01` |\n\n**Foreground (selected)**\n\n| | Text | Icon |\n|---|---|---|\n| **Rest** | `action-link-05` | `action-link-05` |\n| **Hover** | `action-link-05` | `action-link-05` |\n| **Active** | `action-link-05` | `action-link-05` |\n| **Disabled** | `action-link-03` | `action-link-03` |\n\n### Stateful: Sidebar\n\n**Background**\n\n| | Empty/Filled | Selected |\n|---|---|---|\n| **Rest** | `transparent` | `background-tint-00` |\n| **Hover** | `background-tint-03` | `background-tint-03` |\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/container/README.md",
    "content": "# Interactive.Container\n\n**Import:** `import { Interactive } from \"@opal/core\";` — use as `Interactive.Container`.\n\nStructural container shared by both `Interactive.Stateless` and `Interactive.Stateful`. Provides consistent height, rounding, padding, and optional border. Renders a `<div>` by default, or a `<button>` when `type` is provided.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `heightVariant` | `SizeVariant` | `\"lg\"` | Height preset (`2xs`–`lg`, `fit`) |\n| `roundingVariant` | `\"md\" \\| \"sm\" \\| \"xs\"` | `\"md\"` | Border-radius preset |\n| `widthVariant` | `WidthVariant` | — | Width preset (`\"auto\"`, `\"fit\"`, `\"full\"`) |\n| `border` | `boolean` | `false` | Renders a 1px border |\n| `type` | `\"submit\" \\| \"button\" \\| \"reset\"` | — | When set, renders a `<button>` element |\n\n## Usage\n\n```tsx\n<Interactive.Stateless variant=\"default\" prominence=\"primary\">\n  <Interactive.Container heightVariant=\"sm\" roundingVariant=\"sm\" border>\n    <span>Content</span>\n  </Interactive.Container>\n</Interactive.Stateless>\n```\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/container/components.tsx",
    "content": "import Link from \"next/link\";\nimport type { Route } from \"next\";\nimport \"@opal/core/interactive/shared.css\";\nimport React from \"react\";\nimport { cn } from \"@opal/utils\";\nimport type { ButtonType, RoundingVariants, WithoutStyles } from \"@opal/types\";\nimport {\n  containerSizeVariants,\n  type ContainerSizeVariants,\n  widthVariants,\n  type ExtremaSizeVariants,\n} from \"@opal/shared\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype InteractiveContainerRoundingVariant = Extract<\n  RoundingVariants,\n  \"md\" | \"sm\" | \"xs\"\n>;\nconst interactiveContainerRoundingVariants: Record<\n  InteractiveContainerRoundingVariant,\n  string\n> = {\n  md: \"rounded-12\",\n  sm: \"rounded-08\",\n  xs: \"rounded-04\",\n} as const;\n\n/**\n * Props for {@link InteractiveContainer}.\n *\n * Extends standard `<div>` attributes (minus `className` and `style`).\n */\ninterface InteractiveContainerProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  /**\n   * Ref forwarded to the underlying element.\n   */\n  ref?: React.Ref<HTMLElement>;\n\n  /**\n   * HTML button type (e.g. `\"submit\"`, `\"button\"`, `\"reset\"`).\n   *\n   * When provided, renders a `<button>` element instead of a `<div>`.\n   * This keeps all styling (background, rounding, height) on a single\n   * element — unlike a wrapper approach which would split them.\n   *\n   * Mutually exclusive with `href`.\n   */\n  type?: ButtonType;\n\n  /**\n   * When `true`, applies a 1px border using the theme's border color.\n   *\n   * @default false\n   */\n  border?: boolean;\n\n  /**\n   * Border-radius preset controlling corner rounding.\n   *\n   * @default \"default\"\n   */\n  roundingVariant?: InteractiveContainerRoundingVariant;\n\n  /**\n   * Size preset controlling the container's height, min-width, and padding.\n   *\n   * @default \"lg\"\n   */\n  heightVariant?: ContainerSizeVariants;\n\n  /**\n   * Width preset controlling the container's horizontal size.\n   *\n   * @default \"fit\"\n   */\n  widthVariant?: ExtremaSizeVariants;\n}\n\n// ---------------------------------------------------------------------------\n// InteractiveContainer\n// ---------------------------------------------------------------------------\n\n/**\n * Structural container for use inside `Interactive.Stateless` or\n * `Interactive.Stateful`.\n *\n * Provides a `<div>` with design-system-controlled border, padding, rounding,\n * and height. When nested under a Radix Slot-based parent, correctly extracts\n * and merges injected `className` and `style` values.\n */\nfunction InteractiveContainer({\n  ref,\n  type,\n  border,\n  roundingVariant = \"md\",\n  heightVariant = \"lg\",\n  widthVariant = \"fit\",\n  ...props\n}: InteractiveContainerProps) {\n  const {\n    className: slotClassName,\n    style: slotStyle,\n    href,\n    target,\n    rel,\n    ...rest\n  } = props as typeof props & {\n    className?: string;\n    style?: React.CSSProperties;\n    href?: string;\n    target?: string;\n    rel?: string;\n  };\n  const { height, minWidth, padding } = containerSizeVariants[heightVariant];\n  const sharedProps = {\n    ...rest,\n    className: cn(\n      \"interactive-container\",\n      interactiveContainerRoundingVariants[roundingVariant],\n      height,\n      minWidth,\n      padding,\n      widthVariants[widthVariant],\n      slotClassName\n    ),\n    \"data-border\": border ? (\"true\" as const) : undefined,\n    style: slotStyle,\n  };\n\n  if (href) {\n    return (\n      <Link\n        ref={ref as React.Ref<HTMLAnchorElement>}\n        href={href as Route}\n        target={target}\n        rel={rel}\n        {...(sharedProps as React.HTMLAttributes<HTMLAnchorElement>)}\n      />\n    );\n  }\n\n  if (type) {\n    const ariaDisabled = (rest as Record<string, unknown>)[\"aria-disabled\"];\n    const nativeDisabled =\n      ariaDisabled === true || ariaDisabled === \"true\" || undefined;\n    return (\n      <button\n        ref={ref as React.Ref<HTMLButtonElement>}\n        type={type}\n        disabled={nativeDisabled}\n        {...(sharedProps as React.HTMLAttributes<HTMLButtonElement>)}\n      />\n    );\n  }\n  return <div ref={ref as React.Ref<HTMLDivElement>} {...sharedProps} />;\n}\n\nexport {\n  InteractiveContainer,\n  type InteractiveContainerProps,\n  type InteractiveContainerRoundingVariant,\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/foldable/README.md",
    "content": "# Interactive.Foldable\n\n**Import:** `import { Interactive } from \"@opal/core\";` — use as `Interactive.Foldable`.\n\nA zero-width collapsible wrapper that expands when its ancestor `.interactive` element is hovered or has an interaction override. Uses a CSS grid `0fr → 1fr` animation for smooth expand/collapse.\n\n## Requirements\n\n- Must be placed inside an `Interactive.Stateless` or `Interactive.Stateful` tree.\n- The direct parent element should add the `interactive-foldable-host` class for synchronized gap transitions.\n\n## Props\n\n| Prop | Type | Description |\n|------|------|-------------|\n| `children` | `ReactNode` | Content that folds/unfolds |\n\n## CSS triggers\n\nThe foldable expands when any of these conditions are met on an ancestor `.interactive`:\n- `:hover` pseudo-class\n- `data-interaction=\"hover\"`\n- `data-interaction=\"active\"`\n\n## Usage\n\n```tsx\n<Interactive.Stateful variant=\"select-heavy\" state=\"empty\">\n  <Interactive.Container>\n    <div className=\"interactive-foldable-host flex items-center\">\n      <Icon />\n      <Interactive.Foldable>\n        <span>Label text</span>\n      </Interactive.Foldable>\n    </div>\n  </Interactive.Container>\n</Interactive.Stateful>\n```\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/foldable/components.tsx",
    "content": "import \"@opal/core/interactive/foldable/styles.css\";\nimport React from \"react\";\nimport type { WithoutStyles } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface FoldableProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  children: React.ReactNode;\n}\n\n// ---------------------------------------------------------------------------\n// Foldable\n// ---------------------------------------------------------------------------\n\n/**\n * A zero-width collapsible wrapper that expands when its ancestor\n * `.interactive` element is hovered or has an interaction override.\n *\n * Uses a CSS grid `0fr ↔ 1fr` animation for smooth expand/collapse.\n * Must be placed inside an `Interactive.Stateless` or `Interactive.Stateful`\n * tree for the CSS triggers to work.\n *\n * The parent element should add the `interactive-foldable-host` class to\n * get synchronized gap transitions.\n *\n * @example\n * ```tsx\n * <Interactive.Stateful variant=\"select-heavy\" state=\"empty\">\n *   <Interactive.Container>\n *     <div className=\"interactive-foldable-host flex items-center\">\n *       <Icon />\n *       <Foldable>\n *         <span>Label text</span>\n *       </Foldable>\n *     </div>\n *   </Interactive.Container>\n * </Interactive.Stateful>\n * ```\n */\nfunction Foldable({ children, ...props }: FoldableProps) {\n  return (\n    <div {...props} className=\"interactive-foldable\">\n      <div className=\"interactive-foldable-inner\">{children}</div>\n    </div>\n  );\n}\n\nexport { Foldable, type FoldableProps };\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/foldable/styles.css",
    "content": "/* ---------------------------------------------------------------------------\n   Foldable — CSS grid collapse/expand animation.\n\n   Expands when an ancestor `.interactive` element is hovered, focused\n   within, or has `data-interaction=\"hover\"` / `data-interaction=\"active\"`.\n\n   Structure:\n     .interactive-foldable-host   — flex parent, gap transitions 0 → 0.25rem\n       [always-visible content]\n       .interactive-foldable      — grid container, column 0fr ↔ 1fr\n         .interactive-foldable-inner — single grid item, flex + overflow clip\n           [foldable content]\n   --------------------------------------------------------------------------- */\n\n/* Host: the flex parent that includes both persistent + foldable content */\n.interactive-foldable-host {\n  gap: 0;\n  transition: gap 200ms ease-in-out;\n}\n\n.interactive:hover:not([data-disabled]) .interactive-foldable-host,\n.interactive:focus-within:not([data-disabled]) .interactive-foldable-host,\n.interactive[data-interaction=\"hover\"]:not([data-disabled])\n  .interactive-foldable-host,\n.interactive[data-interaction=\"active\"]:not([data-disabled])\n  .interactive-foldable-host {\n  gap: 0.25rem;\n}\n\n/* Grid container — collapse animation */\n.interactive-foldable {\n  display: grid;\n  grid-template-columns: 0fr;\n  opacity: 0;\n  transition:\n    grid-template-columns 200ms ease-in-out,\n    opacity 200ms ease-in-out;\n}\n\n/* Single grid item — content layout + overflow clipping */\n.interactive-foldable-inner {\n  @apply flex items-center gap-1;\n  overflow: hidden;\n  min-width: 0;\n}\n\n/* Expanded: hovered, focused within, or interaction override */\n.interactive:hover:not([data-disabled]) .interactive-foldable,\n.interactive:focus-within:not([data-disabled]) .interactive-foldable,\n.interactive[data-interaction=\"hover\"]:not([data-disabled])\n  .interactive-foldable,\n.interactive[data-interaction=\"active\"]:not([data-disabled])\n  .interactive-foldable {\n  grid-template-columns: 1fr;\n  opacity: 1;\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/shared.css",
    "content": "/* Interactive — shared base classes for stateless + stateful primitives */\n\n/* Register --interactive-foreground as a <color> so the browser can\n   interpolate it directly on the parent. Children just read the variable\n   with no independent transitions — guaranteeing perfect sync. */\n@property --interactive-foreground {\n  syntax: \"<color>\";\n  inherits: true;\n  initial-value: transparent;\n}\n\n@property --interactive-foreground-icon {\n  syntax: \"<color>\";\n  inherits: true;\n  initial-value: transparent;\n}\n\n/* Shared timing tokens — used by .interactive and other surfaces (e.g. table rows) */\n:root {\n  --interactive-duration: 150ms;\n  --interactive-easing: ease-in-out;\n}\n\n/* Base interactive surface — sets color directly so all descendants inherit. */\n.interactive {\n  @apply cursor-pointer select-none;\n  color: var(--interactive-foreground);\n  transition:\n    background-color var(--interactive-duration) var(--interactive-easing),\n    --interactive-foreground var(--interactive-duration)\n      var(--interactive-easing),\n    --interactive-foreground-icon var(--interactive-duration)\n      var(--interactive-easing);\n}\n.interactive[data-disabled] {\n  @apply cursor-not-allowed;\n}\n\n/* Container — structural box */\n.interactive-container {\n  @apply flex items-center justify-center overflow-clip;\n}\n.interactive-container[data-border=\"true\"] {\n  @apply border;\n}\n\n/* Icon foreground — reads from --interactive-foreground-icon, which may differ\n   from --interactive-foreground (e.g. muted icons beside normal text). */\n.interactive-foreground-icon {\n  color: var(--interactive-foreground-icon);\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/simple/components.tsx",
    "content": "import React from \"react\";\nimport { Slot } from \"@radix-ui/react-slot\";\nimport { cn } from \"@opal/utils\";\nimport { guardPortalClick } from \"@opal/core/interactive/utils\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface InteractiveSimpleProps\n  extends Omit<\n    React.HTMLAttributes<HTMLElement>,\n    \"className\" | \"style\" | \"color\"\n  > {\n  ref?: React.Ref<HTMLElement>;\n\n  /**\n   * Tailwind group class (e.g. `\"group/Card\"`) for `group-hover:*` utilities.\n   */\n  group?: string;\n\n  /**\n   * URL to navigate to when clicked. Passed through Slot to the child.\n   */\n  href?: string;\n\n  /**\n   * Link target (e.g. `\"_blank\"`). Only used when `href` is provided.\n   */\n  target?: string;\n\n  /**\n   * Applies disabled cursor and suppresses clicks.\n   */\n  disabled?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// InteractiveSimple\n// ---------------------------------------------------------------------------\n\n/**\n * Minimal interactive surface primitive.\n *\n * Provides cursor styling, click handling, and optional link/group\n * support — but **no color or background styling**.\n *\n * Use this for elements that need interactivity (click, cursor, disabled)\n * without participating in the Interactive color system.\n *\n * Uses Radix `Slot` — merges props onto a single child element without\n * adding any DOM node.\n *\n * @example\n * ```tsx\n * <Interactive.Simple onClick={handleClick} group=\"group/Card\">\n *   <Card>...</Card>\n * </Interactive.Simple>\n * ```\n */\nfunction InteractiveSimple({\n  ref,\n  group,\n  href,\n  target,\n  disabled,\n  ...props\n}: InteractiveSimpleProps) {\n  const isDisabled = !!disabled;\n\n  const classes = cn(\n    \"cursor-pointer select-none\",\n    isDisabled && \"cursor-not-allowed\",\n    !props.onClick && !href && \"!cursor-default !select-auto\",\n    group\n  );\n\n  const { onClick, ...slotProps } = props;\n\n  const linkAttrs = href\n    ? {\n        href: isDisabled ? undefined : href,\n        target,\n        rel: target === \"_blank\" ? \"noopener noreferrer\" : undefined,\n      }\n    : {};\n\n  return (\n    <Slot\n      ref={ref}\n      className={classes}\n      aria-disabled={isDisabled || undefined}\n      {...linkAttrs}\n      {...slotProps}\n      onClick={\n        isDisabled\n          ? href\n            ? (e: React.MouseEvent) => e.preventDefault()\n            : undefined\n          : guardPortalClick(onClick)\n      }\n    />\n  );\n}\n\nexport { InteractiveSimple, type InteractiveSimpleProps };\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateful/README.md",
    "content": "# Interactive.Stateful\n\n**Import:** `import { Interactive } from \"@opal/core\";` — use as `Interactive.Stateful`.\n\nStateful interactive surface primitive for elements that maintain a value state (empty/filled/selected). Used for toggles, sidebar items, and selectable list rows. Applies variant/state color styling via CSS data-attributes and merges onto a single child element via Radix `Slot`.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `variant` | `\"select-light\" \\| \"select-heavy\" \\| \"select-card\" \\| \"select-tinted\" \\| \"select-filter\" \\| \"sidebar-heavy\" \\| \"sidebar-light\"` | `\"select-heavy\"` | Color variant |\n| `state` | `\"empty\" \\| \"filled\" \\| \"selected\"` | `\"empty\"` | Current value state |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | `\"rest\"` | JS-controlled interaction override |\n| `group` | `string` | — | Tailwind group class for `group-hover:*` |\n| `disabled` | `boolean` | `false` | Disables the element |\n| `href` | `string` | — | URL for link behavior |\n| `target` | `string` | — | Link target (e.g. `\"_blank\"`) |\n\n## Variants\n\n- **`select-light`** — Transparent selected background. For inline toggles.\n- **`select-heavy`** — Tinted selected background (`action-link-01`). For list rows, model pickers, buttons.\n- **`select-card`** — Like `select-heavy`, but the filled state gets a visible background (`background-tint-00`) with neutral foreground. Designed for larger surfaces (cards) where background carries more of the visual distinction than foreground color alone.\n- **`select-tinted`** — Like `select-heavy` but with a tinted rest background (`background-tint-01`).\n- **`select-filter`** — Like `select-tinted` for empty/filled; selected state uses inverted backgrounds and inverted text.\n- **`sidebar-heavy`** — Sidebar navigation: muted when unselected, bold when selected.\n- **`sidebar-light`** — Sidebar navigation: uniformly muted across all states.\n\n## State attribute\n\nUses `data-interactive-state` (not `data-state`) to avoid conflicts with Radix UI, which injects its own `data-state` on trigger elements.\n\n## CSS custom properties\n\nSets `--interactive-foreground` and `--interactive-foreground-icon` per variant/state. In the `empty` state, icon color (`--text-03`) is intentionally lighter than text color (`--text-04`).\n\n## Usage\n\n```tsx\n<Interactive.Stateful variant=\"select-heavy\" state=\"selected\" onClick={toggle}>\n  <Interactive.Container>\n    <span className=\"interactive-foreground\">Selected item</span>\n  </Interactive.Container>\n</Interactive.Stateful>\n```\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateful/components.tsx",
    "content": "import \"@opal/core/interactive/shared.css\";\nimport \"@opal/core/interactive/stateful/styles.css\";\nimport React from \"react\";\nimport { Slot } from \"@radix-ui/react-slot\";\nimport { cn } from \"@opal/utils\";\nimport { guardPortalClick } from \"@opal/core/interactive/utils\";\nimport type { ButtonType, WithoutStyles } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype InteractiveStatefulVariant =\n  | \"select-light\"\n  | \"select-heavy\"\n  | \"select-card\"\n  | \"select-tinted\"\n  | \"select-filter\"\n  | \"sidebar-heavy\"\n  | \"sidebar-light\";\ntype InteractiveStatefulState = \"empty\" | \"filled\" | \"selected\";\ntype InteractiveStatefulInteraction = \"rest\" | \"hover\" | \"active\";\n\n/**\n * Props for {@link InteractiveStateful}.\n */\ninterface InteractiveStatefulProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {\n  ref?: React.Ref<HTMLElement>;\n\n  /**\n   * Visual variant controlling the color palette and behavior.\n   *\n   * - `\"select-light\"` — transparent selected background (for inline toggles)\n   * - `\"select-heavy\"` — tinted selected background (for list rows, model pickers)\n   * - `\"select-card\"` — like select-heavy but filled state has a visible background (for cards/larger surfaces)\n   * - `\"select-tinted\"` — like select-heavy but with a tinted rest background\n   * - `\"select-filter\"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)\n   * - `\"sidebar-heavy\"` — sidebar navigation items: muted when unselected (text-03/text-02), bold when selected (text-04/text-03)\n   * - `\"sidebar-light\"` — sidebar navigation items: uniformly muted across all states (text-02/text-02)\n   *\n   * @default \"select-heavy\"\n   */\n  variant?: InteractiveStatefulVariant;\n\n  /**\n   * The current value state of this element.\n   *\n   * - `\"empty\"` — no value / unset\n   * - `\"filled\"` — has a value but not actively selected\n   * - `\"selected\"` — actively chosen / focused\n   *\n   * @default \"empty\"\n   */\n  state?: InteractiveStatefulState;\n\n  /**\n   * JS-controllable interaction state override.\n   *\n   * - `\"rest\"` — default appearance (no override)\n   * - `\"hover\"` — forces hover visual state\n   * - `\"active\"` — forces active/pressed visual state\n   *\n   * @default \"rest\"\n   */\n  interaction?: InteractiveStatefulInteraction;\n\n  /**\n   * Tailwind group class (e.g. `\"group/Card\"`) for `group-hover:*` utilities.\n   */\n  group?: string;\n\n  /**\n   * HTML button type. When set to `\"submit\"`, `\"button\"`, or `\"reset\"`, the\n   * element is treated as inherently interactive for cursor styling purposes\n   * even without an explicit `onClick` or `href`.\n   */\n  type?: ButtonType;\n\n  /**\n   * URL to navigate to when clicked. Passed through Slot to the child.\n   */\n  href?: string;\n\n  /**\n   * Link target (e.g. `\"_blank\"`). Only used when `href` is provided.\n   */\n  target?: string;\n\n  /**\n   * Applies variant-specific disabled colors and suppresses clicks.\n   */\n  disabled?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// InteractiveStateful\n// ---------------------------------------------------------------------------\n\n/**\n * Stateful interactive surface primitive.\n *\n * The foundational building block for elements that maintain a value state\n * (empty/filled/selected). Applies variant/state color styling via CSS\n * data-attributes and merges onto a single child element via Radix `Slot`.\n *\n * Disabled state is controlled via the `disabled` prop.\n */\nfunction InteractiveStateful({\n  ref,\n  variant = \"select-heavy\",\n  state = \"empty\",\n  interaction = \"rest\",\n  group,\n  type,\n  href,\n  target,\n  disabled,\n  ...props\n}: InteractiveStatefulProps) {\n  const isDisabled = !!disabled;\n\n  // onClick/href are always passed directly — Stateful is the outermost Slot,\n  // so Radix Slot-injected handlers don't bypass this guard.\n  const classes = cn(\n    \"interactive\",\n    !props.onClick && !href && !type && \"!cursor-default !select-auto\",\n    group\n  );\n\n  const dataAttrs = {\n    \"data-interactive-variant\": variant,\n    \"data-interactive-state\": state,\n    \"data-interaction\": interaction !== \"rest\" ? interaction : undefined,\n    \"data-disabled\": isDisabled ? \"true\" : undefined,\n    \"aria-disabled\": isDisabled || undefined,\n  };\n\n  const { onClick, ...slotProps } = props;\n\n  const linkAttrs = href\n    ? {\n        href: isDisabled ? undefined : href,\n        target,\n        rel: target === \"_blank\" ? \"noopener noreferrer\" : undefined,\n      }\n    : {};\n\n  return (\n    <Slot\n      ref={ref}\n      className={classes}\n      {...dataAttrs}\n      {...linkAttrs}\n      {...slotProps}\n      onClick={\n        isDisabled\n          ? href\n            ? (e: React.MouseEvent) => e.preventDefault()\n            : undefined\n          : guardPortalClick(onClick)\n      }\n    />\n  );\n}\n\nexport {\n  InteractiveStateful,\n  type InteractiveStatefulProps,\n  type InteractiveStatefulVariant,\n  type InteractiveStatefulState,\n  type InteractiveStatefulInteraction,\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateful/styles.css",
    "content": "/* ============================================================================\n   Stateful — variant x state color matrix\n\n   Each combination sets:\n     - background-color (via @apply)\n     - --interactive-foreground (CSS custom property for descendant text color)\n     - --interactive-foreground-icon (CSS custom property for descendant icon color)\n\n   Both foreground variables are registered as <color> via @property in\n   shared.css, so the browser interpolates them on the parent element.\n   Children read the variables with no independent transitions.\n\n   State dimension: `data-interactive-state` = \"empty\" | \"filled\" | \"selected\"\n   Variant dimension: `data-interactive-variant` = \"select-light\" | \"select-heavy\" | \"select-card\" | \"select-tinted\" | \"select-filter\" | \"sidebar-heavy\" | \"sidebar-light\"\n\n   Interaction override: `data-interaction=\"hover\"` and `data-interaction=\"active\"`\n   allow JS-controlled visual state overrides.\n============================================================================ */\n\n/* ===========================================================================\n   Select-Heavy\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Select-Heavy — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-neutral-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"empty\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Heavy — Filled\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"filled\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Heavy — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"] {\n  @apply bg-[var(--action-link-01)];\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-heavy\"][data-interactive-state=\"selected\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ===========================================================================\n   Select-Card — like Select-Heavy but filled has a visible background.\n   Designed for larger surfaces (cards) where background carries more of\n   the visual distinction than foreground color alone.\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Select-Card — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-neutral-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"empty\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Card — Filled (visible background, neutral foreground)\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"] {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"filled\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Card — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"] {\n  @apply bg-[var(--action-link-01)];\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-card\"][data-interactive-state=\"selected\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ===========================================================================\n   Select-Light — identical to Select-Heavy except selected bg is transparent\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Select-Light — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-neutral-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"empty\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Light — Filled\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"filled\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Light — Selected (transparent background, unlike select-heavy)\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-light\"][data-interactive-state=\"selected\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ===========================================================================\n   Select-Tinted — like Select-Heavy but with a tinted rest background\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Select-Tinted — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"] {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-neutral-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"empty\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Tinted — Filled\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"] {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"filled\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Tinted — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"] {\n  @apply bg-[var(--action-link-01)];\n  --interactive-foreground: var(--action-link-05);\n  --interactive-foreground-icon: var(--action-link-05);\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"select-tinted\"][data-interactive-state=\"selected\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ===========================================================================\n   Select-Filter — empty/filled identical to Select-Tinted;\n   selected uses inverted tint backgrounds and inverted text\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Select-Filter — Empty & Filled (identical colors)\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  ) {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--text-02);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  ):hover:not([data-disabled]),\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  )[data-interaction=\"hover\"]:not([data-disabled]) {\n  @apply bg-background-tint-02;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  ):active:not([data-disabled]),\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  )[data-interaction=\"active\"]:not([data-disabled]) {\n  @apply bg-background-neutral-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"select-filter\"]:is(\n    [data-interactive-state=\"empty\"],\n    [data-interactive-state=\"filled\"]\n  )[data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Select-Filter — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"] {\n  @apply bg-background-tint-inverted-03;\n  --interactive-foreground: var(--text-inverted-05);\n  --interactive-foreground-icon: var(--text-inverted-05);\n}\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-inverted-04;\n  --interactive-foreground: var(--text-inverted-05);\n  --interactive-foreground-icon: var(--text-inverted-05);\n}\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-inverted-04;\n  --interactive-foreground: var(--text-inverted-04);\n  --interactive-foreground-icon: var(--text-inverted-04);\n}\n.interactive[data-interactive-variant=\"select-filter\"][data-interactive-state=\"selected\"][data-disabled] {\n  @apply bg-background-neutral-04;\n  --interactive-foreground: var(--text-inverted-04);\n  --interactive-foreground-icon: var(--text-inverted-02);\n}\n\n/* ===========================================================================\n   Sidebar-Heavy\n\n   Not selected: muted (text-03 / icon text-02)\n   Selected: default (text-04 / icon text-03)\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Heavy — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"empty\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Heavy — Filled\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"filled\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Heavy — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"selected\"] {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n/* ---------------------------------------------------------------------------\n   Sidebar-Heavy — Disabled (all states)\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-heavy\"][data-disabled] {\n  @apply bg-transparent opacity-50;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-03);\n}\n\n/* ===========================================================================\n   Sidebar-Light\n\n   All states: prominence=\"muted-2x\" colors (text-02 / icon text-02)\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Light — Empty\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"empty\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-02);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"empty\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"empty\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Light — Filled\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"filled\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-02);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"filled\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"filled\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n\n/* ---------------------------------------------------------------------------\n   Sidebar-Light — Selected\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"selected\"] {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-02);\n  --interactive-foreground-icon: var(--text-02);\n}\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"selected\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"sidebar-light\"][data-interactive-state=\"selected\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-03;\n}\n/* ---------------------------------------------------------------------------\n   Sidebar-Light — Disabled (all states)\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"sidebar-light\"][data-disabled] {\n  @apply bg-transparent opacity-50;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-03);\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateless/README.md",
    "content": "# Interactive.Stateless\n\n**Import:** `import { Interactive } from \"@opal/core\";` — use as `Interactive.Stateless`.\n\nStateless interactive surface primitive for buttons, links, and cards. Applies variant/prominence color styling via CSS data-attributes and merges onto a single child element via Radix `Slot`.\n\n## Props\n\n| Prop | Type | Default | Description |\n|------|------|---------|-------------|\n| `variant` | `\"none\" \\| \"default\" \\| \"action\" \\| \"danger\"` | `\"default\"` | Color variant |\n| `prominence` | `\"primary\" \\| \"secondary\" \\| \"tertiary\" \\| \"internal\"` | `\"primary\"` | Color prominence within the variant |\n| `interaction` | `\"rest\" \\| \"hover\" \\| \"active\"` | `\"rest\"` | JS-controlled interaction override |\n| `group` | `string` | — | Tailwind group class for `group-hover:*` |\n| `disabled` | `boolean` | `false` | Disables the element |\n| `href` | `string` | — | URL for link behavior |\n| `target` | `string` | — | Link target (e.g. `\"_blank\"`) |\n\n## CSS custom properties\n\nSets `--interactive-foreground` and `--interactive-foreground-icon` per variant/prominence/state. Descendants opt in via:\n- `.interactive-foreground` — text color\n- `.interactive-foreground-icon` — icon color\n\n## Usage\n\n```tsx\n<Interactive.Stateless variant=\"default\" prominence=\"primary\" onClick={handleClick}>\n  <Interactive.Container border>\n    <span className=\"interactive-foreground\">Click me</span>\n  </Interactive.Container>\n</Interactive.Stateless>\n```\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateless/components.tsx",
    "content": "import \"@opal/core/interactive/shared.css\";\nimport \"@opal/core/interactive/stateless/styles.css\";\nimport React from \"react\";\nimport { Slot } from \"@radix-ui/react-slot\";\nimport { cn } from \"@opal/utils\";\nimport { guardPortalClick } from \"@opal/core/interactive/utils\";\nimport type { ButtonType, WithoutStyles } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype InteractiveStatelessVariant = \"default\" | \"action\" | \"danger\";\ntype InteractiveStatelessProminence =\n  | \"primary\"\n  | \"secondary\"\n  | \"tertiary\"\n  | \"internal\";\ntype InteractiveStatelessInteraction = \"rest\" | \"hover\" | \"active\";\n\n/**\n * Props for {@link InteractiveStateless}.\n */\ninterface InteractiveStatelessProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {\n  ref?: React.Ref<HTMLElement>;\n\n  /**\n   * Visual variant controlling the color palette.\n   * @default \"default\"\n   */\n  variant?: InteractiveStatelessVariant;\n\n  /**\n   * Prominence level controlling background intensity.\n   * @default \"primary\"\n   */\n  prominence?: InteractiveStatelessProminence;\n\n  /**\n   * JS-controllable interaction state override.\n   *\n   * - `\"rest\"` — default appearance (no override)\n   * - `\"hover\"` — forces hover visual state\n   * - `\"active\"` — forces active/pressed visual state\n   *\n   * @default \"rest\"\n   */\n  interaction?: InteractiveStatelessInteraction;\n\n  /**\n   * Tailwind group class (e.g. `\"group/Card\"`) for `group-hover:*` utilities.\n   */\n  group?: string;\n\n  /**\n   * HTML button type. When set to `\"submit\"`, `\"button\"`, or `\"reset\"`, the\n   * element is treated as inherently interactive for cursor styling purposes\n   * even without an explicit `onClick` or `href`.\n   */\n  type?: ButtonType;\n\n  /**\n   * URL to navigate to when clicked. Passed through Slot to the child.\n   */\n  href?: string;\n\n  /**\n   * Link target (e.g. `\"_blank\"`). Only used when `href` is provided.\n   */\n  target?: string;\n\n  /**\n   * Applies variant-specific disabled colors and suppresses clicks.\n   */\n  disabled?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// InteractiveStateless\n// ---------------------------------------------------------------------------\n\n/**\n * Stateless interactive surface primitive.\n *\n * The foundational building block for buttons, links, and any clickable\n * element that does not maintain selection state. Applies variant/prominence\n * color styling via CSS data-attributes and merges onto a single child\n * element via Radix `Slot`.\n *\n * Disabled state is controlled via the `disabled` prop.\n */\nfunction InteractiveStateless({\n  ref,\n  variant = \"default\",\n  prominence = \"primary\",\n  interaction = \"rest\",\n  group,\n  type,\n  href,\n  target,\n  disabled,\n  ...props\n}: InteractiveStatelessProps) {\n  const isDisabled = !!disabled;\n\n  // onClick/href are always passed directly — Stateless is the outermost Slot,\n  // so Radix Slot-injected handlers don't bypass this guard.\n  const classes = cn(\n    \"interactive\",\n    !props.onClick && !href && !type && \"!cursor-default !select-auto\",\n    group\n  );\n\n  const dataAttrs = {\n    \"data-interactive-variant\": variant,\n    \"data-interactive-prominence\": prominence,\n    \"data-interaction\": interaction !== \"rest\" ? interaction : undefined,\n    \"data-disabled\": isDisabled ? \"true\" : undefined,\n    \"aria-disabled\": isDisabled || undefined,\n  };\n\n  const { onClick, ...slotProps } = props;\n\n  const linkAttrs = href\n    ? {\n        href: isDisabled ? undefined : href,\n        target,\n        rel: target === \"_blank\" ? \"noopener noreferrer\" : undefined,\n      }\n    : {};\n\n  return (\n    <Slot\n      ref={ref}\n      className={classes}\n      {...dataAttrs}\n      {...linkAttrs}\n      {...slotProps}\n      onClick={\n        isDisabled\n          ? href\n            ? (e: React.MouseEvent) => e.preventDefault()\n            : undefined\n          : guardPortalClick(onClick)\n      }\n    />\n  );\n}\n\nexport {\n  InteractiveStateless,\n  type InteractiveStatelessProps,\n  type InteractiveStatelessVariant,\n  type InteractiveStatelessProminence,\n  type InteractiveStatelessInteraction,\n};\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/stateless/styles.css",
    "content": "/* ============================================================================\n   Stateless — variant x prominence color matrix\n\n   Each combination sets:\n     - background-color (via @apply)\n     - --interactive-foreground (CSS custom property for descendant text color)\n     - --interactive-foreground-icon (CSS custom property for descendant icon color)\n\n   Both foreground variables are registered as <color> via @property in\n   shared.css, so the browser interpolates them on the parent element.\n   Children read the variables with no independent transitions.\n\n   Interaction override: `data-interaction=\"hover\"` and `data-interaction=\"active\"`\n   allow JS-controlled visual state overrides without actual pointer events.\n============================================================================ */\n\n/* ---------------------------------------------------------------------------\n   Default + Primary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"] {\n  @apply bg-[var(--theme-primary-05)];\n  --interactive-foreground: var(--text-inverted-05);\n  --interactive-foreground-icon: var(--text-inverted-05);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--theme-primary-04)];\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--theme-primary-06)];\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"primary\"][data-disabled] {\n  @apply bg-background-neutral-04;\n  --interactive-foreground: var(--text-inverted-04);\n  --interactive-foreground-icon: var(--text-inverted-04);\n}\n\n/* ---------------------------------------------------------------------------\n   Default + Secondary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"] {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"secondary\"][data-disabled] {\n  @apply bg-background-neutral-03;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Default + Tertiary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"tertiary\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Default + Internal\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-03);\n  --interactive-foreground-icon: var(--text-03);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-04);\n  --interactive-foreground-icon: var(--text-04);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n  --interactive-foreground: var(--text-05);\n  --interactive-foreground-icon: var(--text-05);\n}\n.interactive[data-interactive-variant=\"default\"][data-interactive-prominence=\"internal\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Action + Primary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"] {\n  @apply bg-[var(--action-link-05)];\n  --interactive-foreground: var(--text-light-05);\n  --interactive-foreground-icon: var(--text-light-05);\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--action-link-04)];\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--action-link-06)];\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"primary\"][data-disabled] {\n  @apply bg-[var(--action-link-02)];\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Action + Secondary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"] {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--action-text-link-05);\n  --interactive-foreground-icon: var(--action-text-link-05);\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"secondary\"][data-disabled] {\n  @apply bg-background-neutral-02;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ---------------------------------------------------------------------------\n   Action + Tertiary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-text-link-05);\n  --interactive-foreground-icon: var(--action-text-link-05);\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"tertiary\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ---------------------------------------------------------------------------\n   Action + Internal\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-text-link-05);\n  --interactive-foreground-icon: var(--action-text-link-05);\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"action\"][data-interactive-prominence=\"internal\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-link-03);\n  --interactive-foreground-icon: var(--action-link-03);\n}\n\n/* ---------------------------------------------------------------------------\n   Danger + Primary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"] {\n  @apply bg-[var(--action-danger-05)];\n  --interactive-foreground: var(--text-light-05);\n  --interactive-foreground-icon: var(--text-light-05);\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--action-danger-04)];\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-[var(--action-danger-06)];\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"primary\"][data-disabled] {\n  @apply bg-[var(--action-danger-02)];\n  --interactive-foreground: var(--text-01);\n  --interactive-foreground-icon: var(--text-01);\n}\n\n/* ---------------------------------------------------------------------------\n   Danger + Secondary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"] {\n  @apply bg-background-tint-01;\n  --interactive-foreground: var(--action-text-danger-05);\n  --interactive-foreground-icon: var(--action-text-danger-05);\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"secondary\"][data-disabled] {\n  @apply bg-background-neutral-02;\n  --interactive-foreground: var(--action-danger-03);\n  --interactive-foreground-icon: var(--action-danger-03);\n}\n\n/* ---------------------------------------------------------------------------\n   Danger + Tertiary\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-text-danger-05);\n  --interactive-foreground-icon: var(--action-text-danger-05);\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-02;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"tertiary\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-danger-03);\n  --interactive-foreground-icon: var(--action-danger-03);\n}\n\n/* ---------------------------------------------------------------------------\n   Danger + Internal\n   --------------------------------------------------------------------------- */\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-text-danger-05);\n  --interactive-foreground-icon: var(--action-text-danger-05);\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"]:hover:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"][data-interaction=\"hover\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"]:active:not(\n    [data-disabled]\n  ),\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"][data-interaction=\"active\"]:not(\n    [data-disabled]\n  ) {\n  @apply bg-background-tint-00;\n}\n.interactive[data-interactive-variant=\"danger\"][data-interactive-prominence=\"internal\"][data-disabled] {\n  @apply bg-transparent;\n  --interactive-foreground: var(--action-danger-03);\n  --interactive-foreground-icon: var(--action-danger-03);\n}\n"
  },
  {
    "path": "web/lib/opal/src/core/interactive/utils.ts",
    "content": "import type React from \"react\";\n\n/**\n * Guards an onClick handler against React synthetic event bubbling from\n * portalled children (e.g. Radix Dialog overlays).\n *\n * React bubbles synthetic events through the **fiber tree** (component\n * hierarchy), not the DOM tree. This means a click on a portalled modal\n * overlay will bubble to a parent component's onClick even though the\n * overlay is not a DOM descendant. This guard checks that the click\n * target is actually inside the handler's DOM element before firing.\n */\nfunction guardPortalClick<E extends React.MouseEvent>(\n  onClick: ((e: E) => void) | undefined\n): ((e: E) => void) | undefined {\n  if (!onClick) return undefined;\n  return (e: E) => {\n    if (\n      e.currentTarget instanceof Node &&\n      e.target instanceof Node &&\n      e.currentTarget.contains(e.target)\n    ) {\n      onClick(e);\n    }\n  };\n}\n\nexport { guardPortalClick };\n"
  },
  {
    "path": "web/lib/opal/src/icons/DiscordMono.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgDiscordMono = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 52 52\"\n    fill=\"currentColor\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path d=\"M32.7571 7.80005C32.288 8.63286 31.8668 9.4944 31.4839 10.3751C27.8463 9.82945 24.1417 9.82945 20.4946 10.3751C20.1213 9.4944 19.6905 8.63286 19.2214 7.80005C15.804 8.384 12.4727 9.40825 9.31379 10.8537C3.05329 20.1296 1.35894 29.1661 2.20134 38.0782C5.86763 40.7872 9.97429 42.8549 14.349 44.1759C15.3349 42.8549 16.2061 41.4477 16.9527 39.9831C15.536 39.4566 14.1671 38.7961 12.8556 38.0303C13.2002 37.7814 13.5353 37.523 13.8608 37.2741C21.5476 40.8925 30.4501 40.8925 38.1465 37.2741C38.4719 37.5421 38.807 37.8006 39.1516 38.0303C37.8401 38.8057 36.4713 39.4566 35.0449 39.9927C35.7916 41.4573 36.6627 42.8645 37.6487 44.1855C42.0233 42.8645 46.1299 40.8064 49.7965 38.0973C50.7918 27.7589 48.0924 18.799 42.6646 10.8633C39.5154 9.41784 36.1841 8.39355 32.7666 7.81919L32.7571 7.80005ZM18.0248 32.5931C15.6604 32.5931 13.698 30.4488 13.698 27.7972C13.698 25.1456 15.5838 22.9918 18.0153 22.9918C20.4468 22.9918 22.3804 25.1552 22.3421 27.7972C22.3038 30.4393 20.4372 32.5931 18.0248 32.5931ZM33.9728 32.5931C31.5988 32.5931 29.6556 30.4488 29.6556 27.7972C29.6556 25.1456 31.5414 22.9918 33.9728 22.9918C36.4043 22.9918 38.3284 25.1552 38.29 27.7972C38.2518 30.4393 36.3851 32.5931 33.9728 32.5931Z\" />\n  </svg>\n);\nexport default SvgDiscordMono;\n"
  },
  {
    "path": "web/lib/opal/src/icons/actions.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgActions = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M3.06 6.24449L5.12 4.12225L3.06 2.00001M11.5501 14L14 11.5501M14 11.5501L11.5501 9.10017M14 11.5501H9.75552M4.12224 9.09889L6.24448 10.3242V12.7747L4.12224 14L2 12.7747V10.3242L4.12224 9.09889ZM14 4.12225C14 5.29433 13.0498 6.24449 11.8778 6.24449C10.7057 6.24449 9.75552 5.29433 9.75552 4.12225C9.75552 2.95017 10.7057 2.00001 11.8778 2.00001C13.0498 2.00001 14 2.95017 14 4.12225Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgActions;\n"
  },
  {
    "path": "web/lib/opal/src/icons/activity-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgActivitySmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11.5 8H10L9 11L7 5L6 8H4.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgActivitySmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/activity.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgActivity = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.6667 8H12L9.99999 14L5.99999 2L3.99999 8H1.33333\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgActivity;\n"
  },
  {
    "path": "web/lib/opal/src/icons/add-lines.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgAddLines = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 6H2M14 3H2M6 12H2M11.5 9.5V12M11.5 12V14.5M11.5 12H9M11.5 12H14M8.5 9H2\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgAddLines;\n"
  },
  {
    "path": "web/lib/opal/src/icons/alert-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgAlertCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <circle\n      cx=\"12\"\n      cy=\"12\"\n      r=\"10\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M12 8v4\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M12 16h.01\"\n      strokeWidth={2.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgAlertCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/alert-triangle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgAlertTriangle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.71535 5.8185V8.48516M7.71535 11.1518H7.72201M6.57535 2.39183L0.928679 11.8185C0.812258 12.0201 0.750657 12.2487 0.750005 12.4815C0.749353 12.7143 0.809673 12.9432 0.924964 13.1455C1.04025 13.3478 1.2065 13.5163 1.40715 13.6344C1.60781 13.7525 1.83588 13.8159 2.06868 13.8185H13.362C13.5948 13.8159 13.8229 13.7525 14.0235 13.6344C14.2242 13.5163 14.3904 13.3478 14.5057 13.1455C14.621 12.9432 14.6813 12.7143 14.6807 12.4815C14.68 12.2487 14.6184 12.0201 14.502 11.8185L8.85535 2.39183C8.7365 2.1959 8.56916 2.03391 8.36948 1.92149C8.16979 1.80906 7.9445 1.75 7.71535 1.75C7.48619 1.75 7.2609 1.80906 7.06122 1.92149C6.86153 2.03391 6.69419 2.1959 6.57535 2.39183Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgAlertTriangle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-down-dot.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgArrowDownDot = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 9 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.25002 12.75L4.25002 6.25M4.25002 12.75L7.75 9.25M4.25002 12.75L0.75 9.25M4.25002 3.75C3.42158 3.75 2.75 3.07843 2.75 2.25C2.75 1.42157 3.42158 0.75 4.25002 0.75C5.07845 0.75 5.75 1.42157 5.75 2.25C5.75 3.07843 5.07845 3.75 4.25002 3.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowDownDot;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-exchange.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowExchange = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.22381 2.5L3.19527 4.52854C3.06509 4.65871 3 4.82932 3 4.99994M5.22392 7.5L3.19526 5.47134C3.06509 5.34117 3 5.17056 3 4.99994M13 4.99994H3M10.7761 8.50003L12.8047 10.5286C12.9349 10.6587 13 10.8294 13 11M10.7761 13.5L12.8047 11.4714C12.9349 11.3412 13 11.1706 13 11M3 11H13\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgArrowExchange;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-left-dot.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgArrowLeftDot = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 9\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M0.75 4.25H7.24999M0.75 4.25L4.25 0.75M0.75 4.25L4.25 7.75M9.74999 4.25C9.74999 5.07844 10.4216 5.75001 11.25 5.75001C12.0784 5.75001 12.75 5.07844 12.75 4.25C12.75 3.42156 12.0784 2.75001 11.25 2.75001C10.4216 2.75001 9.74999 3.42156 9.74999 4.25Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowLeftDot;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-left.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowLeft = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12 8H4M4 8L8 4M4 8L8 12\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowLeft;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-right-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowRightCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.99999 10.6667L10.6667 8.00001M10.6667 8.00001L7.99999 5.33334M10.6667 8.00001L5.33333 8.00001M14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 8.00001C1.33333 4.31811 4.3181 1.33334 7.99999 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowRightCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-right-dot.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgArrowRightDot = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 9\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.75 4.25H6.25M12.75 4.25L9.25 0.75M12.75 4.25L9.25 7.75M3.75 4.25C3.75 5.07844 3.07843 5.75001 2.25 5.75001C1.42157 5.75001 0.75 5.07844 0.75 4.25C0.75 3.42156 1.42157 2.75001 2.25 2.75001C3.07843 2.75001 3.75 3.42156 3.75 4.25Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowRightDot;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-right.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowRight = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4 8H12M12 8L8 4M12 8L8 12\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowRight;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-up-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowUpCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.33333 8.00001L8 5.33334M8 5.33334L10.6667 8.00001M8 5.33334L8 10.6667M14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 8 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 8.00001C1.33333 4.31811 4.3181 1.33334 8 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowUpCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-up-dot.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgArrowUpDot = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 9 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.25002 0.75V7.24999M4.25002 0.75L0.75 4.25M4.25002 0.75L7.75 4.25M4.25002 9.74999C5.07845 9.74999 5.75003 10.4216 5.75003 11.25C5.75003 12.0784 5.07845 12.75 4.25002 12.75C3.42158 12.75 2.75003 12.0784 2.75003 11.25C2.75003 10.4216 3.42158 9.74999 4.25002 9.74999Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowUpDot;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-up-down.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowUpDown = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 13 12\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11.75 2.97381L9.72145 0.945267C9.59128 0.81509 9.42066 0.750002 9.25005 0.750001M6.74999 2.97392L8.77865 0.94526C8.90881 0.815087 9.07943 0.75 9.25005 0.750001M9.25005 10.75V0.750001M5.74996 8.52613L3.72141 10.5547C3.59124 10.6849 3.42062 10.75 3.25001 10.75M0.75 8.52613L2.77861 10.5547C2.90877 10.6849 3.07939 10.75 3.25001 10.75M3.25001 0.75L3.25001 10.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowUpDown;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-up-right.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowUpRight = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.66667 11.3333L11 5M4.66667 4.66663H11.3333V11.3333\"\n      strokeWidth={1.5}\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowUpRight;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-up.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowUp = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgArrowUp;\n"
  },
  {
    "path": "web/lib/opal/src/icons/arrow-wall-right.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgArrowWallRight = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 12\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8.44281 2.99998L10.8047 5.36191C10.9349 5.49208 11 5.6627 11 5.83331M8.44281 8.66665L10.8047 6.30471C10.9349 6.17455 11 6.00393 11 5.83331M1 5.83331H11M14 1V10.6667\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgArrowWallRight;\n"
  },
  {
    "path": "web/lib/opal/src/icons/audio-eq-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgAudioEqSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5 9V7M7 11V5M9 9.5V6.5M11 9V7\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgAudioEqSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/audio.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgAudio = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 10V6M5 14V2M11 11V5M14 9V7M8 10V6\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgAudio;\n"
  },
  {
    "path": "web/lib/opal/src/icons/aws.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgAws = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 52 52\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <title>AWS</title>\n    <path\n      d=\"M14.6195 23.2934C14.6195 23.9333 14.7233 24.4522 14.8443 24.8326C14.9827 25.2131 15.1556 25.6282 15.3978 26.0778C15.4842 26.2162 15.5188 26.3546 15.5188 26.4756C15.5188 26.6486 15.4151 26.8215 15.1902 26.9945L14.1007 27.7208C13.945 27.8246 13.7894 27.8765 13.651 27.8765C13.4781 27.8765 13.3051 27.79 13.1322 27.6344C12.89 27.3749 12.6825 27.0982 12.5096 26.8215C12.3366 26.5275 12.1637 26.1989 11.9734 25.8011C10.6245 27.3922 8.92958 28.1878 6.88881 28.1878C5.43606 28.1878 4.27731 27.7727 3.42988 26.9426C2.58244 26.1124 2.15007 25.0056 2.15007 23.622C2.15007 22.152 2.66891 20.9586 3.72389 20.0593C4.77886 19.16 6.17973 18.7103 7.96108 18.7103C8.54909 18.7103 9.15441 18.7622 9.79431 18.8487C10.4342 18.9352 11.0914 19.0735 11.7832 19.2292V17.9667C11.7832 16.6523 11.5065 15.7356 10.9703 15.1995C10.4169 14.6634 9.483 14.404 8.15132 14.404C7.546 14.404 6.9234 14.4731 6.28349 14.6288C5.64359 14.7844 5.02098 14.9747 4.41567 15.2168C4.13896 15.3379 3.93142 15.407 3.81036 15.4416C3.6893 15.4762 3.60282 15.4935 3.53364 15.4935C3.29152 15.4935 3.17046 15.3206 3.17046 14.9574V14.1099C3.17046 13.8332 3.20505 13.6257 3.29152 13.5046C3.37799 13.3836 3.53364 13.2625 3.77577 13.1414C4.38108 12.8301 5.10746 12.5707 5.9549 12.3632C6.80233 12.1384 7.70165 12.0346 8.65286 12.0346C10.7109 12.0346 12.2156 12.5015 13.1841 13.4355C14.1353 14.3694 14.6195 15.7875 14.6195 17.6899V23.2934ZM7.63248 25.9222C8.2032 25.9222 8.79122 25.8184 9.41383 25.6109C10.0364 25.4034 10.5899 25.0229 11.0568 24.504C11.3335 24.1754 11.5411 23.8122 11.6448 23.3972C11.7486 22.9821 11.8178 22.4806 11.8178 21.8925V21.1662C11.3162 21.0451 10.7801 20.9413 10.2267 20.8722C9.67325 20.803 9.13711 20.7684 8.60098 20.7684C7.44224 20.7684 6.5948 20.9932 6.02407 21.4602C5.45335 21.9271 5.17664 22.5843 5.17664 23.4491C5.17664 24.2619 5.38417 24.8672 5.81654 25.2823C6.23161 25.7147 6.83692 25.9222 7.63248 25.9222ZM21.5201 27.79C21.2088 27.79 21.0012 27.7381 20.8629 27.6171C20.7245 27.5133 20.6035 27.2712 20.4997 26.9426L16.4355 13.5738C16.3317 13.2279 16.2798 13.0031 16.2798 12.882C16.2798 12.6053 16.4182 12.4497 16.6949 12.4497H18.3897C18.7183 12.4497 18.9432 12.5015 19.0642 12.6226C19.2026 12.7264 19.3064 12.9685 19.4101 13.2971L22.3156 24.7462L25.0136 13.2971C25.1001 12.9512 25.2038 12.7264 25.3422 12.6226C25.4806 12.5188 25.7227 12.4497 26.034 12.4497H27.4176C27.7462 12.4497 27.971 12.5015 28.1093 12.6226C28.2477 12.7264 28.3688 12.9685 28.4379 13.2971L31.1705 24.8845L34.1625 13.2971C34.2662 12.9512 34.3873 12.7264 34.5084 12.6226C34.6467 12.5188 34.8716 12.4497 35.1829 12.4497H36.7913C37.068 12.4497 37.2236 12.588 37.2236 12.882C37.2236 12.9685 37.2063 13.055 37.189 13.1587C37.1717 13.2625 37.1372 13.4009 37.068 13.5911L32.9 26.9599C32.7962 27.3058 32.6751 27.5306 32.5368 27.6344C32.3984 27.7381 32.1736 27.8073 31.8796 27.8073H30.3922C30.0636 27.8073 29.8388 27.7554 29.7004 27.6344C29.5621 27.5133 29.441 27.2885 29.3719 26.9426L26.6912 15.7875L24.0278 26.9253C23.9413 27.2712 23.8376 27.496 23.6992 27.6171C23.5609 27.7381 23.3187 27.79 23.0074 27.79H21.5201ZM43.7437 28.257C42.8444 28.257 41.9451 28.1532 41.0803 27.9457C40.2156 27.7381 39.5411 27.5133 39.0914 27.2539C38.8147 27.0982 38.6245 26.9253 38.5553 26.7696C38.4861 26.614 38.4515 26.441 38.4515 26.2854V25.4034C38.4515 25.0402 38.5899 24.8672 38.8493 24.8672C38.9531 24.8672 39.0569 24.8845 39.1606 24.9191C39.2644 24.9537 39.42 25.0229 39.593 25.0921C40.181 25.3515 40.8209 25.559 41.4954 25.6974C42.1872 25.8357 42.8617 25.9049 43.5535 25.9049C44.643 25.9049 45.4905 25.7147 46.0785 25.3342C46.6665 24.9537 46.9778 24.4003 46.9778 23.6912C46.9778 23.2069 46.8222 22.8092 46.5109 22.4806C46.1996 22.152 45.6115 21.858 44.7641 21.5812L42.2564 20.803C40.9939 20.4052 40.0599 19.8172 39.4892 19.0389C38.9185 18.278 38.6245 17.4305 38.6245 16.5312C38.6245 15.8048 38.7801 15.1649 39.0914 14.6115C39.4027 14.0581 39.8178 13.5738 40.3367 13.1933C40.8555 12.7956 41.4435 12.5015 42.1353 12.294C42.8271 12.0865 43.5535 12 44.3144 12C44.6949 12 45.0927 12.0173 45.4732 12.0692C45.871 12.1211 46.2341 12.1902 46.5973 12.2594C46.9432 12.3459 47.2718 12.4324 47.5831 12.5361C47.8944 12.6399 48.1366 12.7437 48.3095 12.8474C48.5516 12.9858 48.7246 13.1242 48.8283 13.2798C48.9321 13.4182 48.984 13.6084 48.984 13.8505V14.6634C48.984 15.0266 48.8456 15.2168 48.5862 15.2168C48.4479 15.2168 48.223 15.1476 47.929 15.0093C46.9432 14.5596 45.8364 14.3348 44.6084 14.3348C43.6227 14.3348 42.8444 14.4904 42.3083 14.819C41.7721 15.1476 41.4954 15.6492 41.4954 16.3583C41.4954 16.8425 41.6684 17.2576 42.0142 17.5862C42.3601 17.9148 43 18.2434 43.9167 18.5374L46.3725 19.3156C47.6177 19.7134 48.517 20.2668 49.0532 20.9759C49.5893 21.685 49.8487 22.4979 49.8487 23.3972C49.8487 24.1408 49.6931 24.8153 49.3991 25.4034C49.0878 25.9914 48.6727 26.5102 48.1366 26.9253C47.6004 27.3577 46.9605 27.669 46.2168 27.8938C45.4386 28.1359 44.6257 28.257 43.7437 28.257Z\"\n      fill=\"#252F3E\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M47.0124 36.6621C41.3225 40.8647 33.0556 43.0958 25.9475 43.0958C15.9858 43.0958 7.00987 39.412 0.230368 33.2897C-0.305766 32.8054 0.178484 32.1482 0.818386 32.5287C8.15132 36.7832 17.1964 39.3601 26.5528 39.3601C32.8654 39.3601 39.8005 38.0457 46.1823 35.3478C47.1335 34.9154 47.9463 35.9704 47.0124 36.6621Z\"\n      fill=\"#FF9900\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M49.3818 33.9642C48.6554 33.0303 44.5738 33.5145 42.7233 33.7394C42.1699 33.8085 42.0834 33.3243 42.585 32.9611C45.8364 30.6782 51.1804 31.3354 51.803 32.0964C52.4256 32.8746 51.6301 38.2187 48.5862 40.7783C48.1193 41.1761 47.6696 40.9685 47.8771 40.4497C48.5689 38.7375 50.1081 34.8808 49.3818 33.9642Z\"\n      fill=\"#FF9900\"\n    />\n  </svg>\n);\n\nexport default SvgAws;\n"
  },
  {
    "path": "web/lib/opal/src/icons/azure.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgAzure = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 52 52\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M18.3281 3.40002C17.3811 3.40002 16.5394 4.00473 16.2763 4.89865L3.12373 43.8366C2.72915 44.9672 3.30787 46.2029 4.43899 46.5973C4.67574 46.6761 4.91248 46.7287 5.17554 46.7287H16.0396C16.855 46.571 17.539 45.9926 17.8283 45.2038L32.0068 3.40002H18.3281Z\"\n      fill=\"url(#paint0_linear_9_943)\"\n    />\n    <path\n      d=\"M38.136 31.4795H16.5394C15.987 31.4795 15.5398 31.9264 15.5398 32.4786C15.5398 32.7678 15.645 33.0307 15.8555 33.2147L29.7446 46.1503C30.1392 46.5183 30.6916 46.7287 31.244 46.7287H43.4759L38.136 31.4795Z\"\n      fill=\"#0078D4\"\n    />\n    <path\n      d=\"M18.3281 3.40002C17.3811 3.40002 16.5394 4.00473 16.2763 4.89865L3.12373 43.8366C2.72915 44.9672 3.30787 46.2029 4.43899 46.5973C4.67574 46.6761 4.91248 46.7287 5.17554 46.7287H16.0396C16.855 46.571 17.539 45.9926 17.8283 45.2038L20.4589 37.4741L29.8235 46.2555C30.2181 46.571 30.7179 46.755 31.2177 46.755H43.397L38.057 31.4796H22.4844L32.0068 3.40002H18.3281Z\"\n      fill=\"url(#paint1_linear_9_943)\"\n    />\n    <path\n      d=\"M35.7422 4.87236C35.4528 3.97844 34.611 3.40002 33.6904 3.40002H18.5123C19.4329 3.40002 20.2747 4.00473 20.5641 4.87236L33.7167 43.8892C34.1112 45.0198 33.4799 46.2555 32.3488 46.6236C32.1384 46.7024 31.9016 46.7287 31.6649 46.7287H46.843C48.053 46.7287 49 45.7559 49 44.5728C49 44.3362 48.9737 44.0996 48.8948 43.8892L35.7422 4.87236Z\"\n      fill=\"url(#paint2_linear_9_943)\"\n    />\n    <defs>\n      <linearGradient\n        id=\"paint0_linear_9_943\"\n        x1={23.3411}\n        y1={6.61094}\n        x2={9.24122}\n        y2={48.3769}\n        gradientUnits=\"userSpaceOnUse\"\n      >\n        <stop stopColor=\"#114A8B\" />\n        <stop offset={1} stopColor=\"#0765B6\" />\n      </linearGradient>\n      <linearGradient\n        id=\"paint1_linear_9_943\"\n        x1={27.7206}\n        y1={26.0775}\n        x2={24.4488}\n        y2={27.1844}\n        gradientUnits=\"userSpaceOnUse\"\n      >\n        <stop stopOpacity={0.3} />\n        <stop offset={0.071} stopOpacity={0.2} />\n        <stop offset={0.321} stopOpacity={0.1} />\n        <stop offset={0.623} stopOpacity={0.05} />\n        <stop offset={1} stopOpacity={0} />\n      </linearGradient>\n      <linearGradient\n        id=\"paint2_linear_9_943\"\n        x1={26.0229}\n        y1={5.35655}\n        x2={41.5367}\n        y2={46.7094}\n        gradientUnits=\"userSpaceOnUse\"\n      >\n        <stop stopColor=\"#3BC9F3\" />\n        <stop offset={1} stopColor=\"#2892DF\" />\n      </linearGradient>\n    </defs>\n  </svg>\n);\nexport default SvgAzure;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bar-chart-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBarChartSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 10.5V7M8 10.5V4.5M5 10.5V8\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBarChartSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bar-chart.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBarChart = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12 13.3333V6.66666M8 13.3333V2.66666M4 13.3333V9.33332\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBarChart;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bell.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBell = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.15333 14C9.03613 14.2021 8.86789 14.3698 8.66548 14.4864C8.46307 14.6029 8.23359 14.6643 8 14.6643C7.76641 14.6643 7.53693 14.6029 7.33452 14.4864C7.1321 14.3698 6.96387 14.2021 6.84667 14M12 5.33334C12 4.27248 11.5786 3.25506 10.8284 2.50492C10.0783 1.75477 9.06087 1.33334 8 1.33334C6.93913 1.33334 5.92172 1.75477 5.17157 2.50492C4.42143 3.25506 4 4.27248 4 5.33334C4 10 2 11.3333 2 11.3333H14C14 11.3333 12 10 12 5.33334Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBell;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bifrost.tsx",
    "content": "import { cn } from \"@opal/utils\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgBifrost = ({ size, className, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 37 46\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    className={cn(className, \"text-[#33C19E] dark:text-white\")}\n    {...props}\n  >\n    <title>Bifrost</title>\n    <path\n      d=\"M27.6219 46H0V36.8H27.6219V46ZM36.8268 36.8H27.6219V27.6H36.8268V36.8ZM18.4146 27.6H9.2073V18.4H18.4146V27.6ZM36.8268 18.4H27.6219V9.2H36.8268V18.4ZM27.6219 9.2H0V0H27.6219V9.2Z\"\n      fill=\"currentColor\"\n    />\n  </svg>\n);\n\nexport default SvgBifrost;\n"
  },
  {
    "path": "web/lib/opal/src/icons/blocks.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBlocks = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    strokeWidth={1.5}\n    strokeLinecap=\"round\"\n    strokeLinejoin=\"round\"\n    className=\"lucide lucide-blocks-icon lucide-blocks\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path d=\"M10 22V7a1 1 0 0 0-1-1H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-5a1 1 0 0 0-1-1H2\" />\n    <rect x={14} y={2} rx={1} />\n  </svg>\n);\nexport default SvgBlocks;\n"
  },
  {
    "path": "web/lib/opal/src/icons/book-open.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBookOpen = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.99999 4.66667C7.99999 3.95942 7.71904 3.28115 7.21895 2.78105C6.71885 2.28095 6.04057 2 5.33333 2H1.33333V12H5.99999C6.53043 12 7.03914 12.2107 7.41421 12.5858C7.78928 12.9609 7.99999 13.4696 7.99999 14M7.99999 4.66667V14M7.99999 4.66667C7.99999 3.95942 8.28095 3.28115 8.78104 2.78105C9.28114 2.28095 9.95942 2 10.6667 2H14.6667V12H9.99999C9.46956 12 8.96085 12.2107 8.58578 12.5858C8.21071 12.9609 7.99999 13.4696 7.99999 14\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBookOpen;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bookmark.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBookmark = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.6667 14L7.99999 10.6667L3.33333 14V3.33333C3.33333 2.97971 3.4738 2.64057 3.72385 2.39052C3.9739 2.14048 4.31304 2 4.66666 2H11.3333C11.6869 2 12.0261 2.14048 12.2761 2.39052C12.5262 2.64057 12.6667 2.97971 12.6667 3.33333V14Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBookmark;\n"
  },
  {
    "path": "web/lib/opal/src/icons/books-line-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBooksLineSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8.25 5.5V10M10.75 5.5V10M5.91469 5.65333L4.75 10\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBooksLineSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/books-stack-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBooksStackSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 10.5H10.5M5 8H9.5M6.5 5.5H11\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBooksStackSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bracket-curly.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBracketCurly = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.25 0.750024C3.14543 0.750024 2.25 1.64545 2.25 2.75002V4.67966C2.25 5.34836 1.9158 5.97283 1.3594 6.34376L0.75 6.75002L1.3594 7.15629C1.9158 7.52722 2.25 8.15169 2.25 8.82039V10.75C2.25 11.8546 3.14543 12.75 4.25 12.75M10.25 12.75C11.3546 12.75 12.25 11.8546 12.25 10.75V8.82038C12.25 8.15167 12.5842 7.5272 13.1406 7.15627L13.75 6.75002L13.1406 6.34373C12.5842 5.9728 12.25 5.34835 12.25 4.67965V2.75C12.25 1.64543 11.3546 0.75 10.25 0.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgBracketCurly;\n"
  },
  {
    "path": "web/lib/opal/src/icons/branch.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBranch = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.75001 5C5.71651 5 6.50001 4.2165 6.50001 3.25C6.50001 2.2835 5.7165 1.5 4.75 1.5C3.78351 1.5 3.00001 2.2835 3.00001 3.25C3.00001 4.2165 3.78351 5 4.75001 5ZM4.75001 5L4.75001 6.24999M4.75 11C3.7835 11 3 11.7835 3 12.75C3 13.7165 3.7835 14.5 4.75 14.5C5.7165 14.5 6.5 13.7165 6.5 12.75C6.5 11.7835 5.71649 11 4.75 11ZM4.75 11L4.75001 6.24999M10.5 8.74997C10.5 9.71646 11.2835 10.5 12.25 10.5C13.2165 10.5 14 9.71646 14 8.74997C14 7.78347 13.2165 7 12.25 7C11.2835 7 10.5 7.78347 10.5 8.74997ZM10.5 8.74997L7.25001 8.74999C5.8693 8.74999 4.75001 7.6307 4.75001 6.24999\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBranch;\n"
  },
  {
    "path": "web/lib/opal/src/icons/bubble-text.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgBubbleText = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.4939 6.5H5.5M8.00607 9.5H5.50607M1.5 13.5H10.5C12.7091 13.5 14.5 11.7091 14.5 9.5V6.5C14.5 4.29086 12.7091 2.5 10.5 2.5H5.5C3.29086 2.5 1.5 4.29086 1.5 6.5V13.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBubbleText;\n"
  },
  {
    "path": "web/lib/opal/src/icons/calendar.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCalendar = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.41667 0.75V3.41667M4.08333 0.75V3.41667M0.75 6.08333H12.75M2.08333 2.08333H11.4167C12.153 2.08333 12.75 2.68029 12.75 3.41667V12.75C12.75 13.4864 12.153 14.0833 11.4167 14.0833H2.08333C1.34695 14.0833 0.75 13.4864 0.75 12.75V3.41667C0.75 2.68029 1.34695 2.08333 2.08333 2.08333Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgCalendar;\n"
  },
  {
    "path": "web/lib/opal/src/icons/check-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCheckCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2879)\">\n      <path\n        d=\"M14.6667 7.38668V8.00001C14.6658 9.43763 14.2003 10.8365 13.3396 11.9879C12.4788 13.1393 11.2689 13.9817 9.89023 14.3893C8.51162 14.7969 7.03817 14.7479 5.68964 14.2497C4.34112 13.7515 3.18976 12.8307 2.4073 11.6247C1.62484 10.4187 1.25319 8.99205 1.34778 7.55755C1.44237 6.12305 1.99813 4.75756 2.93218 3.66473C3.86623 2.57189 5.12852 1.81027 6.53079 1.49344C7.93306 1.17662 9.40017 1.32157 10.7133 1.90668M14.6667 2.66668L8 9.34001L6 7.34001\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2879\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgCheckCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/check-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgCheckSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 6L7 10L5 8\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgCheckSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/check-square.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCheckSquare = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_604_4473)\">\n      <path\n        d=\"M6 7.33333L8 9.33333L14.6667 2.66667M14 8V12.6667C14 13.0203 13.8595 13.3594 13.6095 13.6095C13.3594 13.8595 13.0203 14 12.6667 14H3.33333C2.97971 14 2.64057 13.8595 2.39052 13.6095C2.14048 13.3594 2 13.0203 2 12.6667V3.33333C2 2.97971 2.14048 2.64057 2.39052 2.39052C2.64057 2.14048 2.97971 2 3.33333 2H10.6667\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_604_4473\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\n\nexport default SvgCheckSquare;\n"
  },
  {
    "path": "web/lib/opal/src/icons/check.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCheck = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M13.5 4.5L6 12L2.5 8.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgCheck;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-down-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronDownSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5 6.50001L7.5286 9.0286C7.78894 9.28893 8.21107 9.28893 8.47141 9.0286L11 6.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronDownSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-down.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronDown = ({ size, strokeWidth = 1.5, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4 6L8 10L12 6\"\n      strokeWidth={strokeWidth}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronDown;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-left.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronLeft = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10 12L6 8L10 4\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronLeft;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-right.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronRight = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 12L10 8L6 4\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronRight;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-up-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronUpSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.99999 9.50385L7.5286 6.97525C7.78893 6.71492 8.21106 6.71492 8.4714 6.97525L11 9.50385\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronUpSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/chevron-up.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgChevronUp = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4 10L8 6L12 10\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgChevronUp;\n"
  },
  {
    "path": "web/lib/opal/src/icons/circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <circle cx=\"8\" cy=\"8\" r=\"4\" strokeWidth={1.5} />\n  </svg>\n);\nexport default SvgCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/claude.tsx",
    "content": "import React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgClaude = ({ size, ...props }: IconProps) => {\n  const clipId = React.useId();\n  return (\n    <svg\n      width={size}\n      height={size}\n      viewBox=\"0 0 16 16\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n      {...props}\n    >\n      <g clipPath={`url(#${clipId})`}>\n        <path\n          d=\"M3.51067 10.7446L6.41508 9.11502L6.46392 8.97348L6.41508 8.89474H6.27355L5.78815 8.86484L4.12863 8.81999L2.68939 8.76019L1.29499 8.68543L0.944149 8.61068L0.615234 8.17711L0.649123 7.96083L0.944149 7.76248L1.36675 7.79936L2.30067 7.86315L3.70204 7.95983L4.71868 8.01963L6.22471 8.17611H6.46392L6.49781 8.07943L6.41608 8.01963L6.35229 7.95983L4.90208 6.97707L3.33226 5.93851L2.50998 5.34048L2.06545 5.03748L1.84119 4.75342L1.74451 4.13347L2.14817 3.68893L2.69038 3.72581L2.82892 3.76269L3.37811 4.18529L4.55124 5.09329L6.08318 6.22157L6.30744 6.40795L6.39714 6.34417L6.40811 6.29931L6.30744 6.13087L5.47419 4.62484L4.58513 3.0929L4.18943 2.458L4.08478 2.07725C4.0479 1.92077 4.02099 1.78921 4.02099 1.62874L4.48047 1.0048L4.73463 0.923065L5.34761 1.0048L5.60576 1.22906L5.9865 2.10018L6.60346 3.47165L7.5603 5.33649L7.84038 5.88966L7.98988 6.40197L8.0457 6.55846H8.14238V6.46875L8.22112 5.41822L8.36664 4.12848L8.50817 2.46896L8.55701 2.0015L8.78825 1.44135L9.24773 1.13835L9.60654 1.30979L9.90157 1.73239L9.86071 2.00549L9.68529 3.14573L9.34142 4.93183L9.11716 6.12788H9.24773L9.39724 5.97837L10.0022 5.17503L11.0189 3.90422L11.4674 3.39989L11.9907 2.84273L12.3266 2.5776H12.9615L13.4289 3.27231L13.2196 3.98994L12.5658 4.8192L12.0236 5.52188L11.2461 6.56843L10.7607 7.40566L10.8056 7.47244L10.9212 7.46148L12.6774 7.08771L13.6263 6.91628L14.7585 6.72192L15.2708 6.96113L15.3267 7.20432L15.1253 7.70168L13.9143 8.0007L12.494 8.28476L10.379 8.78511L10.3531 8.80404L10.383 8.84092L11.3358 8.93062L11.7435 8.95255H12.7412L14.5991 9.0911L15.0845 9.41204L15.3755 9.80474L15.3267 10.1038L14.5791 10.4845L13.5705 10.2453L11.2162 9.68513L10.4089 9.4838H10.2973V9.55058L10.97 10.2084L12.203 11.3217L13.7469 12.757L13.8256 13.1118L13.6273 13.3919L13.418 13.362L12.0614 12.3414L11.5382 11.8819L10.3531 10.8842H10.2743V10.9888L10.5474 11.3885L11.9897 13.5563L12.0644 14.2212L11.9598 14.4374L11.586 14.568L11.1754 14.4933L10.3312 13.3082L9.46003 11.9736L8.75735 10.7775L8.67163 10.8264L8.257 15.2926L8.06264 15.5209L7.61412 15.6923L7.24036 15.4082L7.04201 14.9488L7.24036 14.0408L7.47957 12.8557L7.67393 11.9138L7.84935 10.7436L7.954 10.3549L7.94702 10.329L7.86131 10.34L6.97922 11.551L5.63765 13.364L4.57615 14.5002L4.32199 14.6009L3.88145 14.3727L3.92231 13.965L4.1685 13.6022L5.63765 11.7334L6.52372 10.5752L7.09583 9.9064L7.09185 9.80972H7.05796L3.15584 12.3434L2.46114 12.4331L2.16213 12.153L2.199 11.6935L2.34054 11.544L3.51366 10.7367L3.50968 10.7406L3.51067 10.7446Z\"\n          fill=\"#D97757\"\n        />\n      </g>\n      <defs>\n        <clipPath id={clipId}>\n          <rect width=\"16\" height=\"16\" fill=\"white\" />\n        </clipPath>\n      </defs>\n    </svg>\n  );\n};\n\nexport default SvgClaude;\n"
  },
  {
    "path": "web/lib/opal/src/icons/clipboard.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgClipboard = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.6667 2.66665H12C12.3536 2.66665 12.6927 2.80712 12.9428 3.05717C13.1928 3.30722 13.3333 3.64636 13.3333 3.99998V13.3333C13.3333 13.6869 13.1928 14.0261 12.9428 14.2761C12.6927 14.5262 12.3536 14.6666 12 14.6666H3.99999C3.64637 14.6666 3.30723 14.5262 3.05718 14.2761C2.80713 14.0261 2.66666 13.6869 2.66666 13.3333V3.99998C2.66666 3.64636 2.80713 3.30722 3.05718 3.05717C3.30723 2.80712 3.64637 2.66665 3.99999 2.66665H5.33332M10.6667 2.66665V1.99998C10.6667 1.63179 10.3682 1.33331 9.99999 1.33331H5.99999C5.6318 1.33331 5.33332 1.63179 5.33332 1.99998V2.66665M10.6667 2.66665V3.33331C10.6667 3.7015 10.3682 3.99998 9.99999 3.99998H5.99999C5.6318 3.99998 5.33332 3.7015 5.33332 3.33331V2.66665\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgClipboard;\n"
  },
  {
    "path": "web/lib/opal/src/icons/clock-hands-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgClockHandsSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 4.5V8L10 10\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgClockHandsSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/clock.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgClock = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2605)\">\n      <path\n        d=\"M7.99999 3.99999V7.99999L10.6667 9.33333M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2605\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgClock;\n"
  },
  {
    "path": "web/lib/opal/src/icons/cloud.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCloud = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_170_23)\">\n      <path\n        d=\"M12 6.66669H11.16C10.9106 5.70069 10.3952 4.82401 9.67243 4.13628C8.94966 3.44856 8.04848 2.97735 7.07128 2.7762C6.09409 2.57506 5.08007 2.65205 4.14444 2.99842C3.20881 3.34478 2.3891 3.94664 1.77844 4.73561C1.16778 5.52457 0.790662 6.469 0.689941 7.46159C0.589219 8.45417 0.76893 9.45511 1.20865 10.3507C1.64838 11.2462 2.33048 12.0005 3.17746 12.5277C4.02443 13.055 5.00232 13.3341 6 13.3334H12C12.8841 13.3334 13.7319 12.9822 14.357 12.357C14.9821 11.7319 15.3333 10.8841 15.3333 10C15.3333 9.11597 14.9821 8.26812 14.357 7.643C13.7319 7.01788 12.8841 6.66669 12 6.66669Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_170_23\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgCloud;\n"
  },
  {
    "path": "web/lib/opal/src/icons/code.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCode = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.6667 12L14.6667 8L10.6667 4M5.33334 4L1.33334 8L5.33334 12\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgCode;\n"
  },
  {
    "path": "web/lib/opal/src/icons/column.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgColumn = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 14H3.33333C2.59695 14 2 13.403 2 12.6667V3.33333C2 2.59695 2.59695 2 3.33333 2H6M6 14V2M6 14H10M6 2H10M10 2H12.6667C13.403 2 14 2.59695 14 3.33333V12.6667C14 13.403 13.403 14 12.6667 14H10M10 2V14\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgColumn;\n"
  },
  {
    "path": "web/lib/opal/src/icons/copy.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCopy = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2607)\">\n      <path\n        d=\"M3.33333 9.99999H2.66666C2.31304 9.99999 1.9739 9.85952 1.72385 9.60947C1.4738 9.35942 1.33333 9.02028 1.33333 8.66666V2.66666C1.33333 2.31304 1.4738 1.9739 1.72385 1.72385C1.9739 1.4738 2.31304 1.33333 2.66666 1.33333H8.66666C9.02028 1.33333 9.35942 1.4738 9.60947 1.72385C9.85952 1.9739 9.99999 2.31304 9.99999 2.66666V3.33333M7.33333 5.99999H13.3333C14.0697 5.99999 14.6667 6.59695 14.6667 7.33333V13.3333C14.6667 14.0697 14.0697 14.6667 13.3333 14.6667H7.33333C6.59695 14.6667 5.99999 14.0697 5.99999 13.3333V7.33333C5.99999 6.59695 6.59695 5.99999 7.33333 5.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2607\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgCopy;\n"
  },
  {
    "path": "web/lib/opal/src/icons/corner-right-up-dot.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgCornerRightUpDot = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 9 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.25002 12.75L4.25002 6.25M4.25002 12.75L7.75 9.25M4.25002 12.75L0.75 9.25M4.25002 3.75C3.42158 3.75 2.75 3.07843 2.75 2.25C2.75 1.42157 3.42158 0.75 4.25002 0.75C5.07845 0.75 5.75 1.42157 5.75 2.25C5.75 3.07843 5.07845 3.75 4.25002 3.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgCornerRightUpDot;\n"
  },
  {
    "path": "web/lib/opal/src/icons/cpu.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgCpu = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2615)\">\n      <path\n        d=\"M6.09091 1V2.90909M9.90909 1V2.90909M6.09091 13.0909V15M9.90909 13.0909V15M13.0909 6.09091H15M13.0909 9.27273H15M1 6.09091H2.90909M1 9.27273H2.90909M4.18182 2.90909H11.8182C12.5211 2.90909 13.0909 3.47891 13.0909 4.18182V11.8182C13.0909 12.5211 12.5211 13.0909 11.8182 13.0909H4.18182C3.47891 13.0909 2.90909 12.5211 2.90909 11.8182V4.18182C2.90909 3.47891 3.47891 2.90909 4.18182 2.90909ZM6 6H10V10H6V6Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2615\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgCpu;\n"
  },
  {
    "path": "web/lib/opal/src/icons/credit-card.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgCreditCard = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.6667 6V4.00008C14.6667 3.26675 14.0667 2.66675 13.3333 2.66675H2.66668C1.93334 2.66675 1.33334 3.26675 1.33334 4.00008V6M14.6667 6V12.0001C14.6667 12.7334 14.0667 13.3334 13.3333 13.3334H2.66668C1.93334 13.3334 1.33334 12.7334 1.33334 12.0001V6M14.6667 6H1.33334\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgCreditCard;\n"
  },
  {
    "path": "web/lib/opal/src/icons/curate.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgCurate = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 9L8 14.5M8 9C7.35971 8.35971 6.9055 8 6 8H2.5L2.5 13.5H6C6.9055 13.5 7.35971 13.8597 8 14.5M8 9C8.64029 8.35971 9.09449 8 10 8H13.5L13.5 13.5H10C9.09449 13.5 8.64029 13.8597 8 14.5M10.25 3.75C10.25 4.99264 9.24264 6 8 6C6.75736 6 5.75 4.99264 5.75 3.75C5.75 2.50736 6.75736 1.5 8 1.5C9.24264 1.5 10.25 2.50736 10.25 3.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgCurate;\n"
  },
  {
    "path": "web/lib/opal/src/icons/dashboard.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgDashboard = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 6V3.33333C14 2.59695 13.403 2 12.6667 2H3.33333C2.59695 2 2 2.59695 2 3.33333V6M14 6V12.6667C14 13.403 13.403 14 12.6667 14H6M14 6H6M2 6V12.6667C2 13.403 2.59695 14 3.33333 14H6M2 6H6M6 6V14\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgDashboard;\n"
  },
  {
    "path": "web/lib/opal/src/icons/dev-kit.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgDevKit = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 5H14M2 5V14H14V5M2 5C2 4.67722 2.11475 4.36495 2.32376 4.11897L4.12423 2H11.8795L13.6766 4.11869C13.8854 4.36487 14 4.67719 14 5M9.66666 11.1733L11.3333 9.50667L9.66666 7.84M6.33333 7.84L4.66666 9.50667L6.33333 11.1733\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgDevKit;\n"
  },
  {
    "path": "web/lib/opal/src/icons/download-cloud.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgDownloadCloud = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.08333 10.0833L7.75 12.75M7.75 12.75L10.4167 10.0833M7.75 12.75L7.75 6.75M13.3992 10.3545C13.9521 9.9657 14.3668 9.41081 14.583 8.77036C14.7992 8.12991 14.8056 7.43724 14.6015 6.79286C14.3973 6.14848 13.9931 5.58591 13.4476 5.18681C12.902 4.78771 12.2435 4.57283 11.5676 4.57336H10.7662C10.5749 3.8279 10.217 3.13554 9.71944 2.54841C9.22186 1.96129 8.59757 1.49469 7.89357 1.18375C7.18956 0.872809 6.42419 0.725628 5.65508 0.753287C4.88596 0.780946 4.13314 0.982724 3.45329 1.34343C2.77344 1.70414 2.18428 2.21437 1.73016 2.83572C1.27604 3.45707 0.968792 4.17335 0.831551 4.93063C0.69431 5.6879 0.730651 6.46645 0.937838 7.20765C1.14502 7.94885 1.51766 8.63339 2.02769 9.20974\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgDownloadCloud;\n"
  },
  {
    "path": "web/lib/opal/src/icons/download.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgDownload = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 10V12.6667C14 13.3929 13.3929 14 12.6667 14H3.33333C2.60711 14 2 13.3929 2 12.6667V10M4.66667 6.66667L8 10M8 10L11.3333 6.66667M8 10L8 2\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgDownload;\n"
  },
  {
    "path": "web/lib/opal/src/icons/edit-big.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgEditBig = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 2.5H4C3.17157 2.5 2.5 3.17157 2.5 4V12C2.5 12.8284 3.17157 13.5 4 13.5H12C12.8284 13.5 13.5 12.8284 13.5 12V8M6 10V8.26485C6 8.08682 6.0707 7.91617 6.19654 7.79028L11.5938 2.3931C12.1179 1.86897 12.9677 1.86897 13.4918 2.3931L13.6069 2.50823C14.131 3.03236 14.131 3.88213 13.6069 4.40626L8.20971 9.80345C8.08389 9.92934 7.91317 10 7.73521 10H6Z\"\n      strokeWidth={1.5}\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEditBig;\n"
  },
  {
    "path": "web/lib/opal/src/icons/edit.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgEdit = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 13.3333H14M11 2.33334C11.2652 2.06813 11.6249 1.91913 12 1.91913C12.1857 1.91913 12.3696 1.95571 12.5412 2.02678C12.7128 2.09785 12.8687 2.20202 13 2.33334C13.1313 2.46466 13.2355 2.62057 13.3066 2.79215C13.3776 2.96373 13.4142 3.14762 13.4142 3.33334C13.4142 3.51906 13.3776 3.70296 13.3066 3.87454C13.2355 4.04612 13.1313 4.20202 13 4.33334L4.66667 12.6667L2 13.3333L2.66667 10.6667L11 2.33334Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEdit;\n"
  },
  {
    "path": "web/lib/opal/src/icons/empty.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgEmpty = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 10V12.6667C14 13.3929 13.3929 14 12.6667 14H3.33333C2.60711 14 2 13.3929 2 12.6667V10M8 2V5M13.5 4.5L11.5 6.5M2.5 4.5L4.5 6.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEmpty;\n"
  },
  {
    "path": "web/lib/opal/src/icons/expand.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgExpand = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.99994 5.49995L7.52858 2.97131C7.78891 2.71098 8.21105 2.71098 8.47138 2.97131L11 5.49995M5.00024 10.5L7.5288 13.0286C7.78914 13.2889 8.21127 13.2889 8.4716 13.0286L11.0002 10.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgExpand;\n"
  },
  {
    "path": "web/lib/opal/src/icons/external-link.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgExternalLink = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M15 3h6v6\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M10 14L21 3\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgExternalLink;\n"
  },
  {
    "path": "web/lib/opal/src/icons/eye-closed.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgEyeClosed = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 10\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 1.5C1 1.5 1.69706 2.89413 3 4.22328M15 1.5C15 1.5 14.3029 2.89413 13 4.22328M3 4.22328C3.78612 5.02522 4.7928 5.80351 6 6.23767M3 4.22328L1 6.22328M6 6.23767C6.61544 6.45901 7.28299 6.59091 8 6.59091C8.71701 6.59091 9.38456 6.45901 10 6.23767M6 6.23767L5 8.99908M10 6.23767C11.2072 5.80351 12.2139 5.02522 13 4.22328M10 6.23767L11 8.99908M13 4.22328L15 6.22328\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEyeClosed;\n"
  },
  {
    "path": "web/lib/opal/src/icons/eye-off.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgEyeOff = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11.78 11.78C10.6922 12.6092 9.36761 13.0685 8 13.0909C3.54545 13.0909 1 8 1 8C1.79157 6.52484 2.88945 5.23602 4.22 4.22M11.78 11.78L9.34909 9.34909M11.78 11.78L15 15M4.22 4.22L1 1M4.22 4.22L6.65091 6.65091M6.66364 3.06182C7.10167 2.95929 7.55013 2.90803 8 2.90909C12.4545 2.90909 15 8 15 8C14.6137 8.72266 14.153 9.40301 13.6255 10.03M9.34909 9.34909L6.65091 6.65091M9.34909 9.34909C8.99954 9.72422 8.49873 9.94737 7.98606 9.95641C6.922 9.97519 6.02481 9.078 6.04358 8.01394C6.05263 7.50127 6.27578 7.00046 6.65091 6.65091\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEyeOff;\n"
  },
  {
    "path": "web/lib/opal/src/icons/eye.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgEye = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 12\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 6.00088C1 6.00088 3.54545 0.909973 8 0.909973C12.4545 0.909973 15 6.00088 15 6.00088C15 6.00088 12.4545 11.0918 8 11.0918C3.54545 11.0918 1 6.00088 1 6.00088Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M8 7.90997C9.05436 7.90997 9.90909 7.05524 9.90909 6.00088C9.90909 4.94652 9.05436 4.09179 8 4.09179C6.94564 4.09179 6.09091 4.94652 6.09091 6.00088C6.09091 7.05524 6.94564 7.90997 8 7.90997Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEye;\n"
  },
  {
    "path": "web/lib/opal/src/icons/file-braces.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFileBraces = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 22a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h8a2.4 2.4 0 0 1 1.704.706l3.588 3.588A2.4 2.4 0 0 1 20 8v12a2 2 0 0 1-2 2z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M14 2v5a1 1 0 0 0 1 1h5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M10 12a1 1 0 0 0-1 1v1a1 1 0 0 1-1 1 1 1 0 0 1 1 1v1a1 1 0 0 0 1 1\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M14 18a1 1 0 0 0 1-1v-1a1 1 0 0 1 1-1 1 1 0 0 1-1-1v-1a1 1 0 0 0-1-1\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFileBraces;\n"
  },
  {
    "path": "web/lib/opal/src/icons/file-broadcast.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFileBroadcast = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 18 18\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.1875 2.25003H2.625C1.808 2.25003 1.125 2.93303 1.125 3.75003L1.125 14.25C1.125 15.067 1.808 15.75 2.625 15.75L9.37125 15.75C10.1883 15.75 10.8713 15.067 10.8713 14.25L10.8713 6.94128M6.1875 2.25003L10.8713 6.94128M6.1875 2.25003V6.94128H10.8713M10.3069 2.25L13.216 5.15914C13.6379 5.5811 13.875 6.15339 13.875 6.75013V13.875C13.875 14.5212 13.737 15.2081 13.4392 15.7538M16.4391 15.7538C16.737 15.2081 16.875 14.5213 16.875 13.8751L16.875 7.02481C16.875 5.53418 16.2833 4.10451 15.23 3.04982L14.4301 2.25003\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFileBroadcast;\n"
  },
  {
    "path": "web/lib/opal/src/icons/file-chart-pie.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFileChartPie = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M15.941 22H18a2 2 0 0 0 2-2V8a2.4 2.4 0 0 0-.706-1.704l-3.588-3.588A2.4 2.4 0 0 0 14 2H6a2 2 0 0 0-2 2v3.512\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M14 2v5a1 1 0 0 0 1 1h5\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M4.017 11.512a6 6 0 1 0 8.466 8.475\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M9 16a1 1 0 0 1-1-1v-4c0-.552.45-1.008.995-.917a6 6 0 0 1 4.922 4.922c.091.544-.365.995-.917.995z\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFileChartPie;\n"
  },
  {
    "path": "web/lib/opal/src/icons/file-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgFileSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8.75 4.75H5.75001C5.47386 4.75 5.25001 4.97386 5.25001 5.25V10.75C5.25001 11.0261 5.47386 11.25 5.75001 11.25H10.25C10.5261 11.25 10.75 11.0261 10.75 10.75V6.75M8.75 4.75L10.75 6.75M8.75 4.75V6.75H10.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFileSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/file-text.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFileText = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 20\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.66634 1.6665H2.99967C2.55765 1.6665 2.13372 1.8421 1.82116 2.15466C1.5086 2.46722 1.33301 2.89114 1.33301 3.33317V16.6665C1.33301 17.1085 1.5086 17.5325 1.82116 17.845C2.13372 18.1576 2.55765 18.3332 2.99967 18.3332H12.9997C13.4417 18.3332 13.8656 18.1576 14.1782 17.845C14.4907 17.5325 14.6663 17.1085 14.6663 16.6665V6.6665M9.66634 1.6665L14.6663 6.6665M9.66634 1.6665L9.66634 6.6665L14.6663 6.6665M11.333 10.8332H4.66634M11.333 14.1665H4.66634M6.33301 7.49984H4.66634\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFileText;\n"
  },
  {
    "path": "web/lib/opal/src/icons/files.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFiles = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.5 1.9999H2.33334C1.97971 1.9999 1.64058 2.14038 1.39053 2.39043C1.14048 2.64048 1 2.97961 1 3.33324L1 12.6666C1 13.0202 1.14048 13.3593 1.39052 13.6094C1.64057 13.8594 1.97971 13.9999 2.33333 13.9999L8.33 13.9999C8.68362 13.9999 9.02276 13.8594 9.27281 13.6094C9.52286 13.3593 9.66333 13.0202 9.66333 12.6666L9.66334 6.1699M5.5 1.9999L9.66334 6.1699M5.5 1.9999V6.1699H9.66334M9.16167 1.99988L11.7475 4.58578C12.1226 4.96085 12.3333 5.46956 12.3333 5.99999V12.3332C12.3333 12.9076 12.2107 13.5182 11.9459 14.0032M14.6126 14.0033C14.8773 13.5182 15 12.9077 15 12.3333L15 6.24415C15 4.91915 14.4741 3.64833 13.5377 2.71083L12.8268 1.99991\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFiles;\n"
  },
  {
    "path": "web/lib/opal/src/icons/filter-plus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFilterPlus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.5 12.5L6.83334 11.1667V7.80667L1.5 1.5H14.8333L12.1667 4.65333M12.1667 7V9.5M12.1667 9.5V12M12.1667 9.5H9.66667M12.1667 9.5H14.6667\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFilterPlus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/filter.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFilter = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.6667 3H1.33334L6.66668 9.30667V12.6667L9.33334 14V9.30667L14.6667 3Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFilter;\n"
  },
  {
    "path": "web/lib/opal/src/icons/fold.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFold = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 3.25L8.47136 5.77857C8.21103 6.0389 7.78889 6.0389 7.52856 5.77857L4.99999 3.25M11 12.75L8.47136 10.2214C8.21103 9.96103 7.78889 9.96103 7.52856 10.2214L4.99999 12.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgFold;\n"
  },
  {
    "path": "web/lib/opal/src/icons/folder-in.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFolderIn = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5 2.5L3 2.50001C2.17157 2.50001 1.5 3.17158 1.5 4.00001V12C1.5 12.8284 2.17157 13.5 3 13.5H13C13.8284 13.5 14.5 12.8284 14.5 12V6.00001C14.5 5.17158 13.8284 4.50001 13 4.50001L11 4.5M11 7.5L8.47141 10.0286C8.34124 10.1588 8.17062 10.2239 8.00001 10.2239M5.00001 7.5L7.52861 10.0286C7.65877 10.1588 7.82939 10.2239 8.00001 10.2239M7.99999 1.5L8.00001 10.2239\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFolderIn;\n"
  },
  {
    "path": "web/lib/opal/src/icons/folder-open.tsx",
    "content": "import React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgFolderOpen = React.forwardRef<SVGSVGElement, IconProps>(\n  ({ size = 32, color = \"currentColor\", title, className, ...props }, ref) => (\n    <svg\n      ref={ref}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      viewBox=\"0 0 32 26\"\n      width={size}\n      height={size}\n      fill=\"none\"\n      role={title ? \"img\" : \"presentation\"}\n      aria-label={title}\n      className={className}\n      stroke=\"currentColor\"\n      {...props}\n    >\n      {title ? <title>{title}</title> : null}\n      <path\n        d=\"M30.4177 15.4931L29.1847 15.2876L30.4177 15.4931ZM29.4177 21.4932L30.6507 21.6987V21.6987L29.4177 21.4932ZM2.58209 21.4932L1.3491 21.6987L2.58209 21.4932ZM1.58209 15.4931L0.349095 15.6986L1.58209 15.4931ZM13.8786 2.87868L12.9947 3.76256V3.76256L13.8786 2.87868ZM16.1212 5.12132L17.0051 4.23744V4.23744L16.1212 5.12132ZM4.54127 11.9999V13.2499H27.4585V11.9999V10.7499H4.54127V11.9999ZM30.4177 15.4931L29.1847 15.2876L28.1847 21.2877L29.4177 21.4932L30.6507 21.6987L31.6507 15.6986L30.4177 15.4931ZM26.4585 24V22.75H5.54128V24V25.25H26.4585V24ZM2.58209 21.4932L3.81509 21.2877L2.81508 15.2876L1.58209 15.4931L0.349095 15.6986L1.3491 21.6987L2.58209 21.4932ZM5.54128 24V22.75C4.68581 22.75 3.95572 22.1315 3.81509 21.2877L2.58209 21.4932L1.3491 21.6987C1.69065 23.748 3.46371 25.25 5.54128 25.25V24ZM29.4177 21.4932L28.1847 21.2877C28.0441 22.1315 27.314 22.75 26.4585 22.75V24V25.25C28.5361 25.25 30.3091 23.748 30.6507 21.6987L29.4177 21.4932ZM18.2425 6V7.25H25.9999V6V4.75H18.2425V6ZM5.9999 2V3.25H11.7573V2V0.75H5.9999V2ZM13.8786 2.87868L12.9947 3.76256L15.2373 6.0052L16.1212 5.12132L17.0051 4.23744L14.7625 1.9948L13.8786 2.87868ZM11.7573 2V3.25C12.2214 3.25 12.6665 3.43437 12.9947 3.76256L13.8786 2.87868L14.7625 1.9948C13.9654 1.19777 12.8844 0.75 11.7573 0.75V2ZM18.2425 6V4.75C17.7784 4.75 17.3333 4.56563 17.0051 4.23744L16.1212 5.12132L15.2373 6.0052C16.0344 6.80223 17.1154 7.25 18.2425 7.25V6ZM28.9999 9H30.2499C30.2499 6.65279 28.3471 4.75 25.9999 4.75V6V7.25C26.9664 7.25 27.7499 8.0335 27.7499 9H28.9999ZM2.99989 5H4.24989C4.24989 4.0335 5.0334 3.25 5.9999 3.25V2V0.75C3.65269 0.75 1.74989 2.65279 1.74989 5H2.99989ZM28.9999 9H27.7499V12.4249H28.9999H30.2499V9H28.9999ZM27.4585 11.9999V13.2499C27.7932 13.2499 28.0975 13.3411 28.3564 13.4965L28.9999 12.4249L29.6434 11.3533C29.0065 10.9708 28.2589 10.7499 27.4585 10.7499V11.9999ZM28.9999 12.4249L28.3564 13.4965C28.9538 13.8553 29.3076 14.5505 29.1847 15.2876L30.4177 15.4931L31.6507 15.6986C31.9508 13.8982 31.0763 12.2138 29.6434 11.3533L28.9999 12.4249ZM2.99989 12.4249H4.24989V5H2.99989H1.74989V12.4249H2.99989ZM4.54127 11.9999V10.7499C3.74089 10.7499 2.99329 10.9708 2.35636 11.3533L2.99989 12.4249L3.64343 13.4965C3.90228 13.3411 4.20658 13.2499 4.54127 13.2499V11.9999ZM2.99989 12.4249L2.35636 11.3533C0.923529 12.2138 0.0490297 13.8982 0.349095 15.6986L1.58209 15.4931L2.81508 15.2876C2.69222 14.5505 3.04602 13.8553 3.64343 13.4965L2.99989 12.4249Z\"\n        fill={color}\n        fillOpacity={0.8}\n        stroke={color}\n        strokeOpacity={0.8}\n        strokeWidth={0.2}\n      />\n    </svg>\n  )\n);\n\nSvgFolderOpen.displayName = \"SvgFolderOpen\";\nexport default SvgFolderOpen;\n"
  },
  {
    "path": "web/lib/opal/src/icons/folder-partial-open.tsx",
    "content": "import React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgFolderPartialOpen = React.forwardRef<SVGSVGElement, IconProps>(\n  ({ size = 32, color = \"currentColor\", title, className, ...props }, ref) => (\n    <svg\n      ref={ref}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      viewBox=\"0 0 16 13\"\n      width={size}\n      height={size}\n      fill=\"none\"\n      role={title ? \"img\" : \"presentation\"}\n      aria-label={title}\n      className={className}\n      stroke=\"currentColor\"\n      {...props}\n    >\n      {title ? <title>{title}</title> : null}\n      <path\n        d=\"M14.1431 4.98782V4.25C14.1431 3.42157 13.4715 2.75 12.6431 2.75H8.76442C8.36659 2.75 7.98506 2.59196 7.70376 2.31066L6.58244 1.18934C6.30113 0.908035 5.9196 0.75 5.52178 0.75H2.6431C1.81467 0.75 1.1431 1.42157 1.1431 2.25V4.9878\"\n        stroke={color}\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M14.2394 10.3532C14.1852 11.1397 13.5313 11.75 12.743 11.75H2.54321C1.75483 11.75 1.101 11.1397 1.04676 10.3532L0.753657 6.1032C0.693864 5.23621 1.38105 4.5 2.2501 4.5H13.0361C13.9051 4.5 14.5923 5.2362 14.5325 6.1032L14.2394 10.3532Z\"\n        stroke={color}\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n      />\n    </svg>\n  )\n);\n\nSvgFolderPartialOpen.displayName = \"SvgFolderPartialOpen\";\nexport default SvgFolderPartialOpen;\n"
  },
  {
    "path": "web/lib/opal/src/icons/folder-plus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFolderPlus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.99999 7.33333V11.3333M5.99999 9.33333H10M14.6667 12.6667C14.6667 13.0203 14.5262 13.3594 14.2761 13.6095C14.0261 13.8595 13.6869 14 13.3333 14H2.66666C2.31304 14 1.9739 13.8595 1.72385 13.6095C1.4738 13.3594 1.33333 13.0203 1.33333 12.6667V3.33333C1.33333 2.97971 1.4738 2.64057 1.72385 2.39052C1.9739 2.14048 2.31304 2 2.66666 2H5.99999L7.33333 4H13.3333C13.6869 4 14.0261 4.14048 14.2761 4.39052C14.5262 4.64057 14.6667 4.97971 14.6667 5.33333V12.6667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFolderPlus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/folder.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgFolder = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.5 12V6C14.5 5.17157 13.8284 4.5 13 4.5H9.12132C8.7235 4.5 8.34196 4.34196 8.06066 4.06066L6.93934 2.93934C6.65804 2.65804 6.2765 2.5 5.87868 2.5H3C2.17157 2.5 1.5 3.17157 1.5 4V12C1.5 12.8284 2.17157 13.5 3 13.5H13C13.8284 13.5 14.5 12.8284 14.5 12Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgFolder;\n"
  },
  {
    "path": "web/lib/opal/src/icons/gemini.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgGemini = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 52 52\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M26 2C26.5034 2 26.9412 2.34378 27.064 2.83212C27.4405 4.3258 27.9315 5.78274 28.5426 7.20002C30.1345 10.8981 32.3187 14.1349 35.092 16.9081C37.8664 19.6813 41.102 21.8655 44.8 23.4574C46.2186 24.0685 47.6743 24.5595 49.1679 24.936C49.6562 25.0588 49.9999 25.4967 50 26C50 26.5034 49.6563 26.9413 49.1679 27.064C47.6743 27.4405 46.2172 27.9315 44.8 28.5426C41.1019 30.1345 37.8651 32.3187 35.092 35.092C32.3187 37.8665 30.1345 41.1019 28.5426 44.8C27.9315 46.2186 27.4405 47.6743 27.064 49.1679C26.9413 49.6563 26.5034 50 26 50C25.4967 49.9999 25.0588 49.6562 24.936 49.1679C24.5595 47.6743 24.0685 46.2172 23.4574 44.8C21.8655 41.102 19.6826 37.8651 16.9081 35.092C14.1335 32.3187 10.8981 30.1345 7.20002 28.5426C5.78137 27.9315 4.3258 27.4405 2.83212 27.064C2.34378 26.9412 2 26.5034 2 26C2.00006 25.4967 2.34381 25.0588 2.83212 24.936C4.32581 24.5595 5.78273 24.0686 7.20002 23.4574C10.8981 21.8655 14.1349 19.6813 16.9081 16.9081C19.6813 14.1349 21.8655 10.8981 23.4574 7.20002C24.0686 5.78137 24.5595 4.32581 24.936 2.83212C25.0588 2.34381 25.4967 2.00006 26 2Z\"\n      fill=\"url(#paint0_linear_9_973)\"\n    />\n    <defs>\n      <linearGradient\n        id=\"paint0_linear_9_973\"\n        x1={15.6448}\n        y1={34.1163}\n        x2={40.5754}\n        y2={13.0975}\n        gradientUnits=\"userSpaceOnUse\"\n      >\n        <stop stopColor=\"#4893FC\" />\n        <stop offset={0.27} stopColor=\"#4893FC\" />\n        <stop offset={0.776981} stopColor=\"#969DFF\" />\n        <stop offset={1} stopColor=\"#BD99FE\" />\n      </linearGradient>\n    </defs>\n  </svg>\n);\nexport default SvgGemini;\n"
  },
  {
    "path": "web/lib/opal/src/icons/globe.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgGlobe = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2601)\">\n      <path\n        d=\"M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 8.00001 14.6667M14.6667 7.99999C14.6667 4.3181 11.6819 1.33333 8.00001 1.33333M14.6667 7.99999H1.33334M8.00001 14.6667C4.31811 14.6667 1.33334 11.6819 1.33334 7.99999M8.00001 14.6667C9.66753 12.8411 10.6152 10.472 10.6667 7.99999C10.6152 5.52802 9.66753 3.1589 8.00001 1.33333M8.00001 14.6667C6.33249 12.8411 5.38484 10.472 5.33334 7.99999C5.38484 5.52802 6.33249 3.1589 8.00001 1.33333M1.33334 7.99999C1.33334 4.3181 4.31811 1.33333 8.00001 1.33333\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2601\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgGlobe;\n"
  },
  {
    "path": "web/lib/opal/src/icons/handle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgHandle = ({ size = 16, ...props }: IconProps) => (\n  <svg\n    width={Math.round((size * 3) / 17)}\n    height={size}\n    viewBox=\"0 0 3 17\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M0.5 0.5V16.5M2.5 0.5V16.5\"\n      stroke=\"currentColor\"\n      strokeLinecap=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgHandle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/hard-drive.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgHardDrive = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.6667 8.00002H1.33334M14.6667 8.00002V12C14.6667 12.3536 14.5262 12.6928 14.2762 12.9428C14.0261 13.1929 13.687 13.3334 13.3333 13.3334H2.66668C2.31305 13.3334 1.97392 13.1929 1.72387 12.9428C1.47382 12.6928 1.33334 12.3536 1.33334 12V8.00002M14.6667 8.00002L12.3667 3.40669C12.2563 3.18455 12.0861 2.9976 11.8753 2.86687C11.6645 2.73615 11.4214 2.66682 11.1733 2.66669H4.82668C4.57862 2.66682 4.33552 2.73615 4.12471 2.86687C3.91389 2.9976 3.74373 3.18455 3.63334 3.40669L1.33334 8.00002M4.00001 10.6667H4.00668M6.66668 10.6667H6.67334\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHardDrive;\n"
  },
  {
    "path": "web/lib/opal/src/icons/hash-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgHashSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5 6.5H6.5M11 6.5H9.5M5 9.5H6.5M11 9.5H9.5M6.5 5V6.5M6.5 11V9.5M9.5 5V6.5M9.5 11V9.5M6.5 9.5H9.5M6.5 9.5V6.5M9.5 9.5V6.5M9.5 6.5H6.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHashSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/hash.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgHash = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.66667 6H13.3333M2.66667 10H13.3333M6.66667 2L5.33334 14M10.6667 2L9.33334 14\"\n      stroke=\"currentColor\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHash;\n"
  },
  {
    "path": "web/lib/opal/src/icons/headset-mic.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgHeadsetMic = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.5 7.75002L2.5 7.25C2.5 4.21243 4.96243 1.75 8 1.75C11.0376 1.75 13.5 4.21243 13.5 7.25V7.75M2.5 7.75002L4 7.75C4.55228 7.75 5 8.19772 5 8.75V10.25C5 10.8023 4.55228 11.25 4 11.25H3.5C2.94772 11.25 2.5 10.8023 2.5 10.25V7.75002ZM13.5 7.75H12C11.4477 7.75 11 8.19772 11 8.75V10.25C11 10.8023 11.4477 11.25 12 11.25H12.5C13.0523 11.25 13.5 10.8023 13.5 10.25M13.5 7.75V10.25M13.5 10.25V11.25C13.5 12.9069 12.1569 14.25 10.5 14.25L8 14.25\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHeadsetMic;\n"
  },
  {
    "path": "web/lib/opal/src/icons/history.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgHistory = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.99998 4.00001V8.00001L11 9.50003M1.33332 1.40151V5.23535M1.33332 5.23535H4.99998M1.33332 5.23535L3.28593 3.28597C4.49236 2.07954 6.15903 1.33334 7.99998 1.33334C11.6819 1.33334 14.6667 4.31811 14.6667 8.00001C14.6667 11.6819 11.6819 14.6667 7.99998 14.6667C4.83386 14.6667 2.18324 12.4596 1.50274 9.50003\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHistory;\n"
  },
  {
    "path": "web/lib/opal/src/icons/hourglass.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgHourglass = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 7.99999L4.44793 5.72667C4.06499 5.48159 3.83333 5.05828 3.83333 4.60364V1.83333H12.1667V4.60364C12.1667 5.05828 11.935 5.48159 11.5521 5.72667L8 7.99999ZM8 7.99999L11.5521 10.2733C11.935 10.5184 12.1667 10.9417 12.1667 11.3963V14.1667H3.83333V11.3963C3.83333 10.9417 4.06499 10.5184 4.44793 10.2733L8 7.99999ZM13.5 14.1667H2.5M13.5 1.83333H2.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgHourglass;\n"
  },
  {
    "path": "web/lib/opal/src/icons/image-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgImageSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.5 11.5L6.20711 8.20711C5.81658 7.81658 5.18342 7.81658 4.79289 8.20711L4 9M9.75 7.5C10.4404 7.5 11 6.94037 11 6.25C11 5.55964 10.4404 5 9.75 5C9.05963 5 8.5 5.55964 8.5 6.25C8.5 6.94037 9.05963 7.5 9.75 7.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgImageSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/image.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgImage = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 14L6.06066 9.06072C5.47487 8.47498 4.52513 8.47498 3.93934 9.06072L2 11M2 3.49998C2 2.67156 2.67157 2 3.5 2H12.5C13.3285 2 14 2.67156 14 3.49998V12.4999C14 13.3283 13.3285 13.9998 12.5 13.9998H3.5C2.67157 13.9998 2 13.3283 2 12.4999V3.49998ZM9.875 7.62492C10.7034 7.62492 11.375 6.95338 11.375 6.12494C11.375 5.29653 10.7034 4.62496 9.875 4.62496C9.04655 4.62496 8.375 5.29653 8.375 6.12494C8.375 6.95338 9.04655 7.62492 9.875 7.62492Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n    />\n  </svg>\n);\nexport default SvgImage;\n"
  },
  {
    "path": "web/lib/opal/src/icons/import-icon.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgImport = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.75 9.41667L9.41667 6.75M9.41667 6.75L6.75 4.08333M9.41667 6.75L0.75 6.74667M2.75 3.75V2.08C2.75 1.34546 3.34546 0.75 4.08 0.75H11.4167C11.7703 0.75 12.1094 0.890476 12.3595 1.14052C12.6095 1.39057 12.75 1.72971 12.75 2.08333V11.4167C12.75 11.7703 12.6095 12.1094 12.3595 12.3595C12.1094 12.6095 11.7703 12.75 11.4167 12.75H4.08C3.34546 12.75 2.75 12.1545 2.75 11.42V9.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgImport;\n"
  },
  {
    "path": "web/lib/opal/src/icons/index.ts",
    "content": "export { default as SvgActions } from \"@opal/icons/actions\";\nexport { default as SvgActivity } from \"@opal/icons/activity\";\nexport { default as SvgActivitySmall } from \"@opal/icons/activity-small\";\nexport { default as SvgAddLines } from \"@opal/icons/add-lines\";\nexport { default as SvgAlertCircle } from \"@opal/icons/alert-circle\";\nexport { default as SvgAlertTriangle } from \"@opal/icons/alert-triangle\";\nexport { default as SvgArrowDownDot } from \"@opal/icons/arrow-down-dot\";\nexport { default as SvgArrowExchange } from \"@opal/icons/arrow-exchange\";\nexport { default as SvgArrowLeft } from \"@opal/icons/arrow-left\";\nexport { default as SvgArrowLeftDot } from \"@opal/icons/arrow-left-dot\";\nexport { default as SvgArrowRight } from \"@opal/icons/arrow-right\";\nexport { default as SvgArrowRightCircle } from \"@opal/icons/arrow-right-circle\";\nexport { default as SvgArrowRightDot } from \"@opal/icons/arrow-right-dot\";\nexport { default as SvgArrowUpCircle } from \"@opal/icons/arrow-up-circle\";\nexport { default as SvgArrowUp } from \"@opal/icons/arrow-up\";\nexport { default as SvgArrowUpDown } from \"@opal/icons/arrow-up-down\";\nexport { default as SvgArrowUpDot } from \"@opal/icons/arrow-up-dot\";\nexport { default as SvgArrowUpRight } from \"@opal/icons/arrow-up-right\";\nexport { default as SvgArrowWallRight } from \"@opal/icons/arrow-wall-right\";\nexport { default as SvgAudio } from \"@opal/icons/audio\";\nexport { default as SvgAudioEqSmall } from \"@opal/icons/audio-eq-small\";\nexport { default as SvgAws } from \"@opal/icons/aws\";\nexport { default as SvgAzure } from \"@opal/icons/azure\";\nexport { default as SvgBarChart } from \"@opal/icons/bar-chart\";\nexport { default as SvgBarChartSmall } from \"@opal/icons/bar-chart-small\";\nexport { default as SvgBell } from \"@opal/icons/bell\";\nexport { default as SvgBifrost } from \"@opal/icons/bifrost\";\nexport { default as SvgBlocks } from \"@opal/icons/blocks\";\nexport { default as SvgBookOpen } from \"@opal/icons/book-open\";\nexport { default as SvgBookmark } from \"@opal/icons/bookmark\";\nexport { default as SvgBooksLineSmall } from \"@opal/icons/books-line-small\";\nexport { default as SvgBooksStackSmall } from \"@opal/icons/books-stack-small\";\nexport { default as SvgBracketCurly } from \"@opal/icons/bracket-curly\";\nexport { default as SvgBranch } from \"@opal/icons/branch\";\nexport { default as SvgBubbleText } from \"@opal/icons/bubble-text\";\nexport { default as SvgCalendar } from \"@opal/icons/calendar\";\nexport { default as SvgCheck } from \"@opal/icons/check\";\nexport { default as SvgCheckCircle } from \"@opal/icons/check-circle\";\nexport { default as SvgCheckSmall } from \"@opal/icons/check-small\";\nexport { default as SvgCheckSquare } from \"@opal/icons/check-square\";\nexport { default as SvgChevronDown } from \"@opal/icons/chevron-down\";\nexport { default as SvgChevronDownSmall } from \"@opal/icons/chevron-down-small\";\nexport { default as SvgChevronLeft } from \"@opal/icons/chevron-left\";\nexport { default as SvgChevronRight } from \"@opal/icons/chevron-right\";\nexport { default as SvgChevronUp } from \"@opal/icons/chevron-up\";\nexport { default as SvgChevronUpSmall } from \"@opal/icons/chevron-up-small\";\nexport { default as SvgCircle } from \"@opal/icons/circle\";\nexport { default as SvgClaude } from \"@opal/icons/claude\";\nexport { default as SvgClipboard } from \"@opal/icons/clipboard\";\nexport { default as SvgClock } from \"@opal/icons/clock\";\nexport { default as SvgClockHandsSmall } from \"@opal/icons/clock-hands-small\";\nexport { default as SvgCloud } from \"@opal/icons/cloud\";\nexport { default as SvgCode } from \"@opal/icons/code\";\nexport { default as SvgColumn } from \"@opal/icons/column\";\nexport { default as SvgCopy } from \"@opal/icons/copy\";\nexport { default as SvgCornerRightUpDot } from \"@opal/icons/corner-right-up-dot\";\nexport { default as SvgCpu } from \"@opal/icons/cpu\";\nexport { default as SvgCurate } from \"@opal/icons/curate\";\nexport { default as SvgCreditCard } from \"@opal/icons/credit-card\";\nexport { default as SvgDashboard } from \"@opal/icons/dashboard\";\nexport { default as SvgDevKit } from \"@opal/icons/dev-kit\";\nexport { default as SvgDownload } from \"@opal/icons/download\";\nexport { default as SvgDiscordMono } from \"@opal/icons/DiscordMono\";\nexport { default as SvgDownloadCloud } from \"@opal/icons/download-cloud\";\nexport { default as SvgEdit } from \"@opal/icons/edit\";\nexport { default as SvgEditBig } from \"@opal/icons/edit-big\";\nexport { default as SvgEmpty } from \"@opal/icons/empty\";\nexport { default as SvgExpand } from \"@opal/icons/expand\";\nexport { default as SvgExternalLink } from \"@opal/icons/external-link\";\nexport { default as SvgEye } from \"@opal/icons/eye\";\nexport { default as SvgEyeClosed } from \"@opal/icons/eye-closed\";\nexport { default as SvgEyeOff } from \"@opal/icons/eye-off\";\nexport { default as SvgFileBraces } from \"@opal/icons/file-braces\";\nexport { default as SvgFileBroadcast } from \"@opal/icons/file-broadcast\";\nexport { default as SvgFiles } from \"@opal/icons/files\";\nexport { default as SvgFileChartPie } from \"@opal/icons/file-chart-pie\";\nexport { default as SvgFileSmall } from \"@opal/icons/file-small\";\nexport { default as SvgFileText } from \"@opal/icons/file-text\";\nexport { default as SvgFilter } from \"@opal/icons/filter\";\nexport { default as SvgFilterPlus } from \"@opal/icons/filter-plus\";\nexport { default as SvgFold } from \"@opal/icons/fold\";\nexport { default as SvgFolder } from \"@opal/icons/folder\";\nexport { default as SvgFolderIn } from \"@opal/icons/folder-in\";\nexport { default as SvgFolderOpen } from \"@opal/icons/folder-open\";\nexport { default as SvgFolderPartialOpen } from \"@opal/icons/folder-partial-open\";\nexport { default as SvgFolderPlus } from \"@opal/icons/folder-plus\";\nexport { default as SvgGemini } from \"@opal/icons/gemini\";\nexport { default as SvgGlobe } from \"@opal/icons/globe\";\nexport { default as SvgHandle } from \"@opal/icons/handle\";\nexport { default as SvgHardDrive } from \"@opal/icons/hard-drive\";\nexport { default as SvgHashSmall } from \"@opal/icons/hash-small\";\nexport { default as SvgHash } from \"@opal/icons/hash\";\nexport { default as SvgHeadsetMic } from \"@opal/icons/headset-mic\";\nexport { default as SvgHistory } from \"@opal/icons/history\";\nexport { default as SvgShareWebhook } from \"@opal/icons/share-webhook\";\nexport { default as SvgHourglass } from \"@opal/icons/hourglass\";\nexport { default as SvgImage } from \"@opal/icons/image\";\nexport { default as SvgImageSmall } from \"@opal/icons/image-small\";\nexport { default as SvgImport } from \"@opal/icons/import-icon\";\nexport { default as SvgInfo } from \"@opal/icons/info\";\nexport { default as SvgInfoSmall } from \"@opal/icons/info-small\";\nexport { default as SvgKey } from \"@opal/icons/key\";\nexport { default as SvgKeystroke } from \"@opal/icons/keystroke\";\nexport { default as SvgLightbulbSimple } from \"@opal/icons/lightbulb-simple\";\nexport { default as SvgLineChartUp } from \"@opal/icons/line-chart-up\";\nexport { default as SvgLink } from \"@opal/icons/link\";\nexport { default as SvgLinkedDots } from \"@opal/icons/linked-dots\";\nexport { default as SvgLitellm } from \"@opal/icons/litellm\";\nexport { default as SvgLmStudio } from \"@opal/icons/lm-studio\";\nexport { default as SvgLoader } from \"@opal/icons/loader\";\nexport { default as SvgLock } from \"@opal/icons/lock\";\nexport { default as SvgLogOut } from \"@opal/icons/log-out\";\nexport { default as SvgMaximize2 } from \"@opal/icons/maximize-2\";\nexport { default as SvgMcp } from \"@opal/icons/mcp\";\nexport { default as SvgMenu } from \"@opal/icons/menu\";\nexport { default as SvgMicrophone } from \"@opal/icons/microphone\";\nexport { default as SvgMicrophoneOff } from \"@opal/icons/microphone-off\";\nexport { default as SvgMinus } from \"@opal/icons/minus\";\nexport { default as SvgMinusCircle } from \"@opal/icons/minus-circle\";\nexport { default as SvgMoon } from \"@opal/icons/moon\";\nexport { default as SvgMoreHorizontal } from \"@opal/icons/more-horizontal\";\nexport { default as SvgMusicSmall } from \"@opal/icons/music-small\";\nexport { default as SvgNetworkGraph } from \"@opal/icons/network-graph\";\nexport { default as SvgNotificationBubble } from \"@opal/icons/notification-bubble\";\nexport { default as SvgOllama } from \"@opal/icons/ollama\";\nexport { default as SvgOnyxLogo } from \"@opal/icons/onyx-logo\";\nexport { default as SvgOnyxLogoTyped } from \"@opal/icons/onyx-logo-typed\";\nexport { default as SvgOnyxOctagon } from \"@opal/icons/onyx-octagon\";\nexport { default as SvgOnyxTyped } from \"@opal/icons/onyx-typed\";\nexport { default as SvgOpenai } from \"@opal/icons/openai\";\nexport { default as SvgOpenrouter } from \"@opal/icons/openrouter\";\nexport { default as SvgOrganization } from \"@opal/icons/organization\";\nexport { default as SvgPaintBrush } from \"@opal/icons/paint-brush\";\nexport { default as SvgPaperclip } from \"@opal/icons/paperclip\";\nexport { default as SvgPauseCircle } from \"@opal/icons/pause-circle\";\nexport { default as SvgPenSmall } from \"@opal/icons/pen-small\";\nexport { default as SvgPencilRuler } from \"@opal/icons/pencil-ruler\";\nexport { default as SvgPieChart } from \"@opal/icons/pie-chart\";\nexport { default as SvgPin } from \"@opal/icons/pin\";\nexport { default as SvgPinned } from \"@opal/icons/pinned\";\nexport { default as SvgPlayCircle } from \"@opal/icons/play-circle\";\nexport { default as SvgPlug } from \"@opal/icons/plug\";\nexport { default as SvgPlus } from \"@opal/icons/plus\";\nexport { default as SvgPlusCircle } from \"@opal/icons/plus-circle\";\nexport { default as SvgProgressBars } from \"@opal/icons/progress-bars\";\nexport { default as SvgProgressCircle } from \"@opal/icons/progress-circle\";\nexport { default as SvgQuestionMarkSmall } from \"@opal/icons/question-mark-small\";\nexport { default as SvgQuoteEnd } from \"@opal/icons/quote-end\";\nexport { default as SvgQuoteStart } from \"@opal/icons/quote-start\";\nexport { default as SvgRefreshCw } from \"@opal/icons/refresh-cw\";\nexport { default as SvgRevert } from \"@opal/icons/revert\";\nexport { default as SvgSearch } from \"@opal/icons/search\";\nexport { default as SvgSearchMenu } from \"@opal/icons/search-menu\";\nexport { default as SvgSearchSmall } from \"@opal/icons/search-small\";\nexport { default as SvgServer } from \"@opal/icons/server\";\nexport { default as SvgSettings } from \"@opal/icons/settings\";\nexport { default as SvgShare } from \"@opal/icons/share\";\nexport { default as SvgShield } from \"@opal/icons/shield\";\nexport { default as SvgSidebar } from \"@opal/icons/sidebar\";\nexport { default as SvgSlack } from \"@opal/icons/slack\";\nexport { default as SvgSlash } from \"@opal/icons/slash\";\nexport { default as SvgSliders } from \"@opal/icons/sliders\";\nexport { default as SvgSlidersSmall } from \"@opal/icons/sliders-small\";\nexport { default as SvgSort } from \"@opal/icons/sort\";\nexport { default as SvgSortOrder } from \"@opal/icons/sort-order\";\nexport { default as SvgSparkle } from \"@opal/icons/sparkle\";\nexport { default as SvgStar } from \"@opal/icons/star\";\nexport { default as SvgStarOff } from \"@opal/icons/star-off\";\nexport { default as SvgStep1 } from \"@opal/icons/step1\";\nexport { default as SvgStep2 } from \"@opal/icons/step2\";\nexport { default as SvgStep3 } from \"@opal/icons/step3\";\nexport { default as SvgStep3End } from \"@opal/icons/step3-end\";\nexport { default as SvgStop } from \"@opal/icons/stop\";\nexport { default as SvgStopCircle } from \"@opal/icons/stop-circle\";\nexport { default as SvgSun } from \"@opal/icons/sun\";\nexport { default as SvgTag } from \"@opal/icons/tag\";\nexport { default as SvgTerminal } from \"@opal/icons/terminal\";\nexport { default as SvgTerminalSmall } from \"@opal/icons/terminal-small\";\nexport { default as SvgTextLines } from \"@opal/icons/text-lines\";\nexport { default as SvgTextLinesSmall } from \"@opal/icons/text-lines-small\";\nexport { default as SvgThumbsDown } from \"@opal/icons/thumbs-down\";\nexport { default as SvgThumbsUp } from \"@opal/icons/thumbs-up\";\nexport { default as SvgTrash } from \"@opal/icons/trash\";\nexport { default as SvgTwoLineSmall } from \"@opal/icons/two-line-small\";\nexport { default as SvgUnplug } from \"@opal/icons/unplug\";\nexport { default as SvgUploadCloud } from \"@opal/icons/upload-cloud\";\nexport { default as SvgUser } from \"@opal/icons/user\";\nexport { default as SvgUserCheck } from \"@opal/icons/user-check\";\nexport { default as SvgUserEdit } from \"@opal/icons/user-edit\";\nexport { default as SvgUserKey } from \"@opal/icons/user-key\";\nexport { default as SvgUserManage } from \"@opal/icons/user-manage\";\nexport { default as SvgUserMinus } from \"@opal/icons/user-minus\";\nexport { default as SvgUserPlus } from \"@opal/icons/user-plus\";\nexport { default as SvgUserShield } from \"@opal/icons/user-shield\";\nexport { default as SvgUserSpeaker } from \"@opal/icons/user-speaker\";\nexport { default as SvgUserSync } from \"@opal/icons/user-sync\";\nexport { default as SvgUserX } from \"@opal/icons/user-x\";\nexport { default as SvgUsers } from \"@opal/icons/users\";\nexport { default as SvgVolume } from \"@opal/icons/volume\";\nexport { default as SvgVolumeOff } from \"@opal/icons/volume-off\";\nexport { default as SvgWallet } from \"@opal/icons/wallet\";\nexport { default as SvgWorkflow } from \"@opal/icons/workflow\";\nexport { default as SvgX } from \"@opal/icons/x\";\nexport { default as SvgXCircle } from \"@opal/icons/x-circle\";\nexport { default as SvgXOctagon } from \"@opal/icons/x-octagon\";\nexport { default as SvgZoomIn } from \"@opal/icons/zoom-in\";\nexport { default as SvgZoomOut } from \"@opal/icons/zoom-out\";\n"
  },
  {
    "path": "web/lib/opal/src/icons/info-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgInfoSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 11V7H7M8 11H7M8 11H9M8 4.7V4.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgInfoSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/info.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgInfo = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8.00001 10.6666V7.99998M8.00001 5.33331H8.00668M14.6667 7.99998C14.6667 11.6819 11.6819 14.6666 8.00001 14.6666C4.31811 14.6666 1.33334 11.6819 1.33334 7.99998C1.33334 4.31808 4.31811 1.33331 8.00001 1.33331C11.6819 1.33331 14.6667 4.31808 14.6667 7.99998Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgInfo;\n"
  },
  {
    "path": "web/lib/opal/src/icons/key.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgKey = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 1.33331L12.6667 2.66665M12.6667 2.66665L14.6667 4.66665L12.3333 6.99998L10.3333 4.99998M12.6667 2.66665L10.3333 4.99998M7.59333 7.73998C7.93756 8.07962 8.2112 8.48401 8.3985 8.92984C8.5858 9.37568 8.68306 9.85416 8.68468 10.3377C8.68631 10.8213 8.59225 11.3004 8.40794 11.7475C8.22363 12.1946 7.95271 12.6008 7.61076 12.9427C7.26882 13.2847 6.86261 13.5556 6.41554 13.7399C5.96846 13.9242 5.48933 14.0183 5.00575 14.0167C4.52218 14.015 4.0437 13.9178 3.59786 13.7305C3.15203 13.5432 2.74764 13.2695 2.408 12.9253C1.74009 12.2338 1.37051 11.3076 1.37886 10.3462C1.38722 9.38479 1.77284 8.46514 2.45267 7.78531C3.13249 7.10548 4.05214 6.71986 5.01353 6.71151C5.97492 6.70315 6.90113 7.07273 7.59267 7.74065L7.59333 7.73998ZM7.59333 7.73998L10.3333 4.99998\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgKey;\n"
  },
  {
    "path": "web/lib/opal/src/icons/keystroke.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgKeystroke = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12 4V9C12 9.55228 11.5523 10 11 10H5M5 10L6.5 8.5M5 10L6.5 11.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgKeystroke;\n"
  },
  {
    "path": "web/lib/opal/src/icons/lightbulb-simple.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLightbulbSimple = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.99998 11.67H5.99998M7.99998 1.67001C5.42265 1.67001 3.33331 3.75935 3.33331 6.33668C3.33331 8.03421 4.2397 9.52008 5.59492 10.3367C5.83556 10.4817 5.99998 10.7333 5.99998 11.0142V12.3367C5.99998 13.4413 6.89538 14.3367 7.99998 14.3367C9.10458 14.3367 9.99998 13.4413 9.99998 12.3367V11.0142C9.99998 10.7333 10.1644 10.4817 10.405 10.3367C11.7602 9.52008 12.6666 8.03421 12.6666 6.33668C12.6666 3.75935 10.5773 1.67001 7.99998 1.67001Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLightbulbSimple;\n"
  },
  {
    "path": "web/lib/opal/src/icons/line-chart-up.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLineChartUp = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M13 6.5L13 3M13 3H9.5M13 3L7.99999 8L6.49999 6.5L3 10M3 13H13\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLineChartUp;\n"
  },
  {
    "path": "web/lib/opal/src/icons/link.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLink = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 17 9\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    style={{ transform: \"rotate(315deg)\" }}\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.0833 0.75H12.0833C12.5211 0.75 12.9545 0.836219 13.3589 1.00373C13.7634 1.17125 14.1308 1.41678 14.4404 1.72631C14.7499 2.03584 14.9954 2.4033 15.1629 2.80772C15.3304 3.21214 15.4167 3.64559 15.4167 4.08333C15.4167 4.52107 15.3304 4.95453 15.1629 5.35894C14.9954 5.76336 14.7499 6.13083 14.4404 6.44036C14.1308 6.74988 13.7634 6.99542 13.3589 7.16293C12.9545 7.33045 12.5211 7.41667 12.0833 7.41667H10.0833M6.08333 7.41667H4.08333C3.64559 7.41667 3.21214 7.33045 2.80772 7.16293C2.4033 6.99542 2.03584 6.74988 1.72631 6.44036C1.10119 5.81523 0.75 4.96739 0.75 4.08333C0.75 3.19928 1.10119 2.35143 1.72631 1.72631C2.35143 1.10119 3.19928 0.75 4.08333 0.75H6.08333M5.41667 4.08333H10.75\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgLink;\n"
  },
  {
    "path": "web/lib/opal/src/icons/linked-dots.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLinkedDots = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 4C6 5.10457 5.10457 6 4 6M6 4C6 2.89543 5.10457 2 4 2C2.89543 2 2 2.89543 2 4C2 5.10457 2.89543 6 4 6M6 4H10M4 6V10M10 4C10 5.10457 10.8954 6 12 6C13.1046 6 14 5.10457 14 4C14 2.89543 13.1046 2 12 2C10.8954 2 10 2.89543 10 4ZM4 10C2.89543 10 2 10.8954 2 12C2 13.1046 2.89543 14 4 14C5.10457 14 6 13.1046 6 12C6 10.8954 5.10457 10 4 10ZM14 12C14 13.1046 13.1046 14 12 14C10.8954 14 10 13.1046 10 12C10 10.8954 10.8954 10 12 10C13.1046 10 14 10.8954 14 12Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLinkedDots;\n"
  },
  {
    "path": "web/lib/opal/src/icons/litellm.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgLitellm = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 52 52\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path d=\"M48 30H34L29 38H48V30Z\" fill=\"#3B88C3\" />\n    <path d=\"M29 38H11L8.58914 42L8 43H48V38H29Z\" fill=\"#226699\" />\n    <path d=\"M48 43H8L8.58914 42H4V44H48V43Z\" fill=\"#939598\" />\n    <path\n      d=\"M18.6953 17.7969H31.7578C32.4927 17.7969 33.121 17.4087 33.4766 16.7656L35.5664 13H21.9023L18.6953 17.7969Z\"\n      fill=\"#55ACEE\"\n    />\n    <path\n      d=\"M36.3984 11.5234C36.636 11.0939 36.3737 10.4922 35.8828 10.4922H23.5651L21.9023 13H35.5664L36.3984 11.5234Z\"\n      fill=\"#3B88C3\"\n    />\n    <path\n      d=\"M48 30V8H29.5C27.0938 8 24.625 8.94531 23.8516 10.0625L23.5651 10.4922H35.8828C36.3737 10.4922 36.636 11.0939 36.3984 11.5234L35.5664 13L33.4766 16.7656C33.121 17.4087 32.4927 17.7969 31.7578 17.7969H18.6953C16.7788 18.6979 13.786 19.9843 11 21.6321V38H29L34 30H48Z\"\n      fill=\"#D1D3D4\"\n    />\n    <path\n      d=\"M4 30C4 34.6406 11 38 11 38V21.6321C7.3195 23.809 4 26.6167 4 30Z\"\n      fill=\"#3B88C3\"\n    />\n  </svg>\n);\nexport default SvgLitellm;\n"
  },
  {
    "path": "web/lib/opal/src/icons/lm-studio.tsx",
    "content": "import React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgLmStudio = ({ size, ...props }: IconProps) => {\n  const gradientId = React.useId();\n  return (\n    <svg\n      width={size}\n      height={size}\n      viewBox=\"0 0 480 480\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n      {...props}\n    >\n      <rect width={480} height={480} rx={96} fill={`url(#${gradientId})`} />\n      <rect\n        opacity={0.25}\n        x={128}\n        y={80}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={64}\n        y={80}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.25}\n        x={208}\n        y={136}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={144}\n        y={136}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.25}\n        x={160}\n        y={192}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={96}\n        y={192}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.25}\n        x={104}\n        y={248}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={40}\n        y={248}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.25}\n        x={160}\n        y={304}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={96}\n        y={304}\n        width={208}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.25}\n        x={296}\n        y={360}\n        width={136}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <rect\n        opacity={0.9}\n        x={224}\n        y={360}\n        width={144}\n        height={40}\n        rx={20}\n        fill=\"white\"\n      />\n      <defs>\n        <linearGradient\n          id={gradientId}\n          x1={-206.055}\n          y1={215.087}\n          x2={224.119}\n          y2={658.689}\n          gradientUnits=\"userSpaceOnUse\"\n        >\n          <stop stopColor=\"#6E7EF3\" />\n          <stop offset={1} stopColor=\"#4F13BE\" />\n        </linearGradient>\n      </defs>\n    </svg>\n  );\n};\n\nexport default SvgLmStudio;\n"
  },
  {
    "path": "web/lib/opal/src/icons/loader.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLoader = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgLoader;\n"
  },
  {
    "path": "web/lib/opal/src/icons/lock.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLock = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.66667 7.33334V4.66668C4.66667 3.78262 5.01786 2.93478 5.64298 2.30965C6.2681 1.68453 7.11595 1.33334 8 1.33334C8.88406 1.33334 9.7319 1.68453 10.357 2.30965C10.9821 2.93478 11.3333 3.78262 11.3333 4.66668V7.33334M3.33333 7.33334H12.6667C13.403 7.33334 14 7.9303 14 8.66668V13.3333C14 14.0697 13.403 14.6667 12.6667 14.6667H3.33333C2.59695 14.6667 2 14.0697 2 13.3333V8.66668C2 7.9303 2.59695 7.33334 3.33333 7.33334Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLock;\n"
  },
  {
    "path": "web/lib/opal/src/icons/log-out.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgLogOut = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9 21H5C4.46957 21 3.96086 20.7893 3.58579 20.4142C3.21071 20.0391 3 19.5304 3 19V5C3 4.46957 3.21071 3.96086 3.58579 3.58579C3.96086 3.21071 4.46957 3 5 3H9\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M16 17L21 12L16 7\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M21 12H9\"\n      strokeWidth={2}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLogOut;\n"
  },
  {
    "path": "web/lib/opal/src/icons/maximize-2.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgMaximize2 = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10 2H14M14 2V6M14 2L9.33333 6.66667M6 14H2M2 14V10M2 14L6.66667 9.33333\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMaximize2;\n"
  },
  {
    "path": "web/lib/opal/src/icons/mcp.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMcp = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.21111 3.25011L4.28535 6.17584C3.30914 7.15205 3.30914 8.7348 4.28535 9.71101C5.26155 10.6872 6.8443 10.6872 7.82051 9.71101L10.7463 6.78528M0.75 6.17566L5.44353 1.48216C6.41974 0.505948 8.00249 0.505947 8.9787 1.48216C9.95491 2.45837 9.95491 4.04111 8.9787 5.01732M8.9787 5.01732L6.05294 7.94306M8.9787 5.01732C9.95491 4.04111 11.538 4.04148 12.5142 5.01769C13.4904 5.9939 13.4904 7.57665 12.5142 8.55286L8.17457 12.8932C7.97933 13.0884 7.97934 13.405 8.17459 13.6003L8.82434 14.25\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgMcp;\n"
  },
  {
    "path": "web/lib/opal/src/icons/menu.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMenu = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 32 32\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M26.5 9H5.5M5.5 23H26.5M26.5 16H5.5\"\n      strokeWidth={2}\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMenu;\n"
  },
  {
    "path": "web/lib/opal/src/icons/microphone-off.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMicrophoneOff = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    {/* Microphone body */}\n    <path\n      d=\"M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    {/* Diagonal slash */}\n    <path\n      d=\"M2 2L14 14\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMicrophoneOff;\n"
  },
  {
    "path": "web/lib/opal/src/icons/microphone.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMicrophone = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.5 7V7.5C12.5 9.98528 10.4853 12 8 12M3.5 7V7.5C3.5 9.98528 5.51472 12 8 12M8 12V14.5M8 14.5H5M8 14.5H11M8 9.5C6.89543 9.5 6 8.60457 6 7.5V3.5C6 2.39543 6.89543 1.5 8 1.5C9.10457 1.5 10 2.39543 10 3.5V7.5C10 8.60457 9.10457 9.5 8 9.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMicrophone;\n"
  },
  {
    "path": "web/lib/opal/src/icons/minus-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMinusCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.33333 7.99998H10.6667M14.6667 7.99998C14.6667 11.6819 11.6819 14.6666 7.99999 14.6666C4.3181 14.6666 1.33333 11.6819 1.33333 7.99998C1.33333 4.31808 4.3181 1.33331 7.99999 1.33331C11.6819 1.33331 14.6667 4.31808 14.6667 7.99998Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgMinusCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/minus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMinus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    strokeWidth={2.5}\n    {...props}\n  >\n    <path d=\"M4 8H12\" strokeLinecap=\"round\" />\n  </svg>\n);\n\nexport default SvgMinus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/moon.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMoon = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.7696 11.665C13.4681 10.7615 13.8949 9.67776 14 8.54054C13.0992 9.20635 11.9894 9.52673 10.8724 9.44344C9.75541 9.36014 8.7054 8.87868 7.91336 8.08664C7.12132 7.2946 6.63986 6.24459 6.55656 5.12758C6.47327 4.01056 6.79365 2.90076 7.45946 2C6.32224 2.10509 5.23848 2.53189 4.33497 3.23045C3.43147 3.929 2.74559 4.87043 2.35761 5.94457C1.96962 7.0187 1.89557 8.18112 2.14412 9.29581C2.39267 10.4105 2.95354 11.4313 3.7611 12.2389C4.56866 13.0465 5.5895 13.6073 6.70419 13.8559C7.81888 14.1044 8.9813 14.0304 10.0554 13.6424C11.1296 13.2544 12.071 12.5685 12.7696 11.665Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMoon;\n"
  },
  {
    "path": "web/lib/opal/src/icons/more-horizontal.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgMoreHorizontal = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 8.75C8.41421 8.75 8.75 8.41421 8.75 8C8.75 7.58579 8.41421 7.25 8 7.25C7.58579 7.25 7.25 7.58579 7.25 8C7.25 8.41421 7.58579 8.75 8 8.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M12.75 8.75C13.1642 8.75 13.5 8.41421 13.5 8C13.5 7.58579 13.1642 7.25 12.75 7.25C12.3358 7.25 12 7.58579 12 8C12 8.41421 12.3358 8.75 12.75 8.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M3.25 8.75C3.66421 8.75 4 8.41421 4 8C4 7.58579 3.66421 7.25 3.25 7.25C2.83579 7.25 2.5 7.58579 2.5 8C2.5 8.41421 2.83579 8.75 3.25 8.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMoreHorizontal;\n"
  },
  {
    "path": "web/lib/opal/src/icons/music-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgMusicSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.5 10V5L10.5 4.75M9.5 10C9.5 10.8284 8.82843 11.5 8 11.5C7.17157 11.5 6.5 10.8284 6.5 10C6.5 9.17157 7.17157 8.5 8 8.5C8.82843 8.5 9.5 9.17157 9.5 10Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgMusicSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/network-graph.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgNetworkGraph = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_2828_22555)\">\n      <path\n        d=\"M9.23744 4.48744C9.92086 3.80402 9.92086 2.69598 9.23744 2.01256C8.55402 1.32915 7.44598 1.32915 6.76256 2.01256C6.07915 2.69598 6.07915 3.80402 6.76256 4.48744M9.23744 4.48744C8.89573 4.82915 8.44787 5 8 5M9.23744 4.48744L11.7626 8.01256M6.76256 4.48744C7.10427 4.82915 7.55214 5 8 5M6.76256 4.48744L4.23744 8.01256M8 11C7.0335 11 6.25001 11.7835 6.25001 12.75C6.25001 13.7165 7.03351 14.5 8.00001 14.5C8.9665 14.5 9.75 13.7165 9.75 12.75C9.75 11.7835 8.9665 11 8 11ZM8 11V5M4.23744 8.01256C4.92085 8.69598 4.92422 9.81658 4.2408 10.5C3.55739 11.1834 2.44598 11.1709 1.76256 10.4874C1.07915 9.80402 1.07915 8.69598 1.76256 8.01256C2.44598 7.32915 3.55402 7.32915 4.23744 8.01256ZM11.7626 8.01256C11.0791 8.69598 11.0791 9.80402 11.7626 10.4874C12.446 11.1709 13.554 11.1709 14.2374 10.4874C14.9209 9.80402 14.9209 8.69598 14.2374 8.01256C13.554 7.32915 12.446 7.32915 11.7626 8.01256Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_2828_22555\">\n        <rect width=\"16\" height=\"16\" fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgNetworkGraph;\n"
  },
  {
    "path": "web/lib/opal/src/icons/notification-bubble.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgNotificationBubble = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 6 6\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    className={props.className}\n    {...props}\n  >\n    <path\n      d=\"M0 3C0 1.34315 1.34315 0 3 0C4.65685 0 6 1.34315 6 3C6 4.65685 4.65685 6 3 6C1.34315 6 0 4.65685 0 3Z\"\n      fill=\"#DC2626\"\n    />\n  </svg>\n);\nexport default SvgNotificationBubble;\n"
  },
  {
    "path": "web/lib/opal/src/icons/ollama.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgOllama = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M5.24969 0.983626C5.3947 1.04069 5.5256 1.13467 5.64442 1.25886C5.84246 1.46428 6.00961 1.75831 6.13716 2.10672C6.26538 2.45715 6.34862 2.84516 6.38018 3.23452C6.80284 2.9954 7.27191 2.84981 7.75568 2.80757L7.78992 2.80488C8.37396 2.75789 8.95128 2.86329 9.45476 3.12308C9.52256 3.15866 9.58902 3.19693 9.65414 3.2372C9.68771 2.85523 9.76961 2.47594 9.89581 2.13358C10.0234 1.7845 10.1905 1.49113 10.3879 1.28504C10.4982 1.16573 10.6331 1.07181 10.7833 1.00981C10.9558 0.942676 11.1391 0.930593 11.3176 0.981612C11.5868 1.05814 11.8178 1.22865 11.9997 1.47637C12.1662 1.7026 12.291 1.9926 12.3763 2.34034C12.5307 2.96734 12.5575 3.79238 12.4535 4.78725L12.4891 4.81411L12.5065 4.82686C13.0147 5.21353 13.3685 5.76468 13.5558 6.40443C13.8478 7.40267 13.7008 8.52241 13.1973 9.14874L13.1852 9.16283L13.1866 9.16485C13.4665 9.67638 13.6363 10.2168 13.6726 10.776L13.6739 10.7961C13.7169 11.5111 13.5397 12.2307 13.1275 12.9376L13.1228 12.9443L13.1295 12.9604C13.4464 13.7371 13.5457 14.5192 13.4235 15.3006L13.4195 15.3268C13.4006 15.4409 13.3372 15.5429 13.2431 15.6103C13.1491 15.6778 13.0322 15.7052 12.918 15.6866C12.8615 15.6778 12.8072 15.6579 12.7584 15.628C12.7096 15.5982 12.6671 15.559 12.6334 15.5128C12.5997 15.4665 12.5755 15.414 12.5622 15.3584C12.5488 15.3027 12.5465 15.245 12.5555 15.1885C12.6676 14.495 12.5622 13.7996 12.2333 13.092C12.2026 13.0263 12.189 12.9539 12.1937 12.8815C12.1984 12.8091 12.2212 12.739 12.2602 12.6778L12.2628 12.6738C12.6683 12.0535 12.8361 11.4453 12.7999 10.8478C12.769 10.3249 12.5817 9.81132 12.2628 9.32193C12.2008 9.22679 12.1787 9.11107 12.2013 8.99978C12.224 8.88848 12.2894 8.79055 12.3837 8.72716L12.3897 8.72313C12.5528 8.61639 12.7032 8.34384 12.7791 7.97126C12.8628 7.53071 12.8409 7.07659 12.7153 6.6461C12.5777 6.17619 12.3259 5.78414 11.9735 5.51629C11.5741 5.21152 11.0451 5.0645 10.3758 5.1068C10.2883 5.11249 10.2011 5.09179 10.1254 5.0474C10.0498 5.003 9.98922 4.93693 9.95153 4.85774C9.74074 4.41132 9.43328 4.09178 9.04996 3.89374C8.68195 3.71007 8.26995 3.63288 7.86041 3.67087C7.02463 3.73733 6.28753 4.20859 6.06802 4.80269C6.03696 4.88631 5.9811 4.95844 5.90792 5.00943C5.83473 5.06042 5.74772 5.08783 5.65852 5.088C4.94223 5.08934 4.38774 5.25717 3.98227 5.55993C3.63184 5.82174 3.39286 6.1876 3.26665 6.62596C3.15245 7.03859 3.13681 7.47227 3.221 7.89205C3.29619 8.26664 3.44321 8.57678 3.6117 8.74394L3.61707 8.74864C3.75939 8.8876 3.7896 9.10443 3.69025 9.27561C3.44858 9.69317 3.26799 10.3155 3.23846 10.9136C3.20489 11.597 3.36332 12.1904 3.72113 12.616L3.73187 12.6288C3.78587 12.6917 3.82059 12.7689 3.8319 12.851C3.84321 12.9332 3.83062 13.0168 3.79564 13.092C3.40897 13.9217 3.29015 14.6038 3.41837 15.1408C3.44142 15.2525 3.41997 15.3688 3.3586 15.4649C3.29723 15.561 3.20078 15.6293 3.08977 15.6554C2.97876 15.6815 2.86196 15.6632 2.76423 15.6044C2.6665 15.5457 2.59555 15.4511 2.56648 15.3409C2.40335 14.6575 2.51412 13.8747 2.88401 12.9926L2.89341 12.9691L2.88803 12.9611C2.70624 12.6926 2.57056 12.3956 2.48659 12.0823L2.48324 12.0696C2.38134 11.6788 2.34125 11.2745 2.36442 10.8713C2.39395 10.2604 2.55104 9.63476 2.78197 9.13262L2.79002 9.11517L2.78868 9.11383C2.59199 8.83322 2.44631 8.47407 2.36576 8.07666L2.3624 8.06055C2.25141 7.50621 2.2728 6.93351 2.42483 6.38899C2.60072 5.77475 2.94644 5.2471 3.45596 4.8658C3.49624 4.83559 3.53853 4.80538 3.58082 4.77718C3.47409 3.77492 3.50094 2.94451 3.65601 2.31349C3.74127 1.96575 3.8668 1.67574 4.03329 1.44951C4.21454 1.20247 4.44547 1.03196 4.71466 0.95476C4.89323 0.903741 5.07717 0.915153 5.24969 0.982955V0.983626ZM8.01279 7.08581C8.64114 7.08581 9.22115 7.29593 9.65481 7.65978C10.0777 8.01356 10.3295 8.48884 10.3295 8.96211C10.3295 9.55823 10.0569 10.0228 9.56888 10.3195C9.15267 10.5712 8.59482 10.6934 7.95573 10.6934C7.27838 10.6934 6.69972 10.5195 6.28216 10.2007C5.86797 9.88516 5.6357 9.4421 5.6357 8.96211C5.6357 8.4875 5.90288 8.01087 6.3446 7.65575C6.79303 7.29526 7.38512 7.08581 8.01279 7.08581ZM8.01279 7.6873C7.54706 7.68323 7.09369 7.83704 6.72657 8.12365C6.4171 8.37203 6.24189 8.68419 6.24189 8.96278C6.24189 9.2501 6.38286 9.5193 6.65138 9.72405C6.95683 9.95699 7.40593 10.0919 7.95573 10.0919C8.49211 10.0919 8.94457 9.99324 9.2527 9.80595C9.56351 9.61798 9.72261 9.34543 9.72261 8.96211C9.72261 8.67815 9.55747 8.36465 9.26411 8.11895C8.9392 7.84707 8.49882 7.6873 8.01279 7.6873ZM8.4572 8.49958L8.45989 8.50227C8.54044 8.60363 8.52366 8.75065 8.42229 8.83121L8.22627 8.98561V9.28501C8.22591 9.35166 8.19914 9.41545 8.15183 9.46239C8.10451 9.50933 8.04051 9.53559 7.97386 9.53541C7.90721 9.53559 7.84321 9.50933 7.79589 9.46239C7.74857 9.41545 7.7218 9.35166 7.72145 9.28501V8.97621L7.53952 8.82986C7.51552 8.81063 7.49557 8.78684 7.48082 8.75986C7.46606 8.73288 7.45679 8.70325 7.45355 8.67267C7.4503 8.64209 7.45314 8.61117 7.46191 8.58169C7.47067 8.55221 7.48519 8.52476 7.50461 8.50092C7.54424 8.45269 7.6013 8.42204 7.66339 8.41563C7.72548 8.40922 7.7876 8.42757 7.83624 8.46669L7.98057 8.58215L8.12826 8.46534C8.17673 8.42705 8.23825 8.4092 8.29969 8.41559C8.36113 8.42199 8.41765 8.45213 8.4572 8.49958ZM5.07381 7.21134C5.3947 7.21134 5.65583 7.47315 5.65583 7.79605C5.65601 7.95083 5.59474 8.09935 5.48549 8.20899C5.37623 8.31862 5.22792 8.3804 5.07314 8.38076C4.91859 8.38023 4.77056 8.31846 4.66146 8.20899C4.55237 8.09952 4.49112 7.95127 4.49112 7.79672C4.49076 7.64194 4.55186 7.49335 4.66099 7.38359C4.77012 7.27383 4.91903 7.21188 5.07381 7.21134ZM10.9182 7.21134C11.2404 7.21134 11.5009 7.47315 11.5009 7.79605C11.5011 7.95083 11.4398 8.09935 11.3306 8.20899C11.2213 8.31862 11.073 8.3804 10.9182 8.38076C10.7637 8.38023 10.6156 8.31846 10.5065 8.20899C10.3974 8.09952 10.3362 7.95127 10.3362 7.79672C10.3358 7.64194 10.3969 7.49335 10.5061 7.38359C10.6152 7.27383 10.7634 7.21188 10.9182 7.21134ZM4.93754 1.79591L4.93552 1.79725C4.85775 1.83107 4.79134 1.88653 4.7442 1.95702L4.74084 1.96105C4.6482 2.08793 4.56765 2.27455 4.50723 2.51958C4.39311 2.98412 4.36223 3.61448 4.42399 4.38715C4.71265 4.30123 5.02749 4.24752 5.3665 4.22805L5.37321 4.22738L5.38597 4.20456C5.41685 4.14951 5.44974 4.09648 5.48532 4.04412C5.56789 3.52654 5.50009 2.90826 5.31548 2.40344C5.22553 2.15909 5.1161 1.96709 5.01138 1.85767C4.98976 1.83492 4.96567 1.81465 4.93955 1.79725L4.93754 1.79591ZM11.0961 1.82276L11.0948 1.82343C11.0686 1.84083 11.0446 1.8611 11.0229 1.88385C10.9182 1.99327 10.8081 2.18594 10.7188 2.43029C10.5242 2.96331 10.459 3.62253 10.5644 4.1569L10.6034 4.22201L10.6087 4.23141H10.6289C10.962 4.2315 11.2935 4.27942 11.613 4.37373C11.6707 3.61918 11.6385 3.00225 11.5271 2.54643C11.4667 2.3014 11.3861 2.11478 11.2928 1.9879L11.2901 1.98387C11.2431 1.91313 11.1767 1.85743 11.0988 1.82343H11.0961V1.82276Z\"\n      fill=\"currentColor\"\n    />\n  </svg>\n);\n\nexport default SvgOllama;\n"
  },
  {
    "path": "web/lib/opal/src/icons/onyx-logo-typed.tsx",
    "content": "import SvgOnyxLogo from \"@opal/icons/onyx-logo\";\nimport SvgOnyxTyped from \"@opal/icons/onyx-typed\";\nimport { cn } from \"@opal/utils\";\n\ninterface OnyxLogoTypedProps {\n  size?: number;\n  className?: string;\n}\n\n// # NOTE(@raunakab):\n// This ratio is not some random, magical number; it is available on Figma.\nconst HEIGHT_TO_GAP_RATIO = 5 / 16;\n\nconst SvgOnyxLogoTyped = ({ size: height, className }: OnyxLogoTypedProps) => {\n  const gap = height != null ? height * HEIGHT_TO_GAP_RATIO : undefined;\n\n  return (\n    <div\n      className={cn(`flex flex-row items-center`, className)}\n      style={{ gap }}\n    >\n      <SvgOnyxLogo size={height} />\n      <SvgOnyxTyped size={height} />\n    </div>\n  );\n};\nexport default SvgOnyxLogoTyped;\n"
  },
  {
    "path": "web/lib/opal/src/icons/onyx-logo.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgOnyxLogo = ({ size, ...props }: IconProps) => (\n  <svg\n    height={size}\n    viewBox=\"0 0 64 64\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M10.4014 13.25L18.875 32L10.3852 50.75L2 32L10.4014 13.25Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M53.5264 13.25L62 32L53.5102 50.75L45.125 32L53.5264 13.25Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M32 45.125L50.75 53.5625L32 62L13.25 53.5625L32 45.125Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M32 2L50.75 10.4375L32 18.875L13.25 10.4375L32 2Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n  </svg>\n);\nexport default SvgOnyxLogo;\n"
  },
  {
    "path": "web/lib/opal/src/icons/onyx-octagon.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgOnyxOctagon = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_586_578)\">\n      <path\n        d=\"M4.5 2.50002L8 1.00002L11.5 2.50002M13.5 4.50002L15 8.00001L13.5 11.5M11.5 13.5L8 15L4.5 13.5M2.5 11.5L1 8L2.5 4.50002\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_586_578\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgOnyxOctagon;\n"
  },
  {
    "path": "web/lib/opal/src/icons/onyx-typed.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgOnyxTyped = ({ size, ...props }: IconProps) => (\n  <svg\n    height={size}\n    viewBox=\"0 0 152 64\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M19.1795 51.2136C15.6695 51.2136 12.4353 50.3862 9.47691 48.7315C6.56865 47.0768 4.2621 44.8454 2.55726 42.0374C0.85242 39.1793 0 36.0955 0 32.7861C0 30.279 0.451281 27.9223 1.35384 25.716C2.30655 23.4596 3.76068 21.3285 5.71623 19.3228L11.8085 13.08C12.4604 12.6789 13.4131 12.3529 14.6666 12.1022C15.9202 11.8014 17.2991 11.6509 18.8034 11.6509C22.3134 11.6509 25.5225 12.4783 28.4307 14.133C31.3891 15.7877 33.7208 18.0441 35.4256 20.9023C37.1304 23.7103 37.9829 26.794 37.9829 30.1536C37.9829 32.6106 37.5065 34.9673 36.5538 37.2237C35.6512 39.4802 34.147 41.6864 32.041 43.8426L26.3248 49.7845C25.3219 50.2358 24.2188 50.5868 23.0154 50.8375C21.8621 51.0882 20.5835 51.2136 19.1795 51.2136ZM20.1572 43.8426C21.8621 43.8426 23.4917 43.4164 25.0461 42.5639C26.6005 41.6614 27.8541 40.3577 28.8068 38.6528C29.8097 36.948 30.3111 34.9172 30.3111 32.5605C30.3111 30.0032 29.6843 27.6966 28.4307 25.6408C27.2273 23.5849 25.6478 21.9803 23.6923 20.8271C21.7869 19.6236 19.8313 19.0219 17.8256 19.0219C16.0706 19.0219 14.4159 19.4732 12.8615 20.3758C11.3573 21.2282 10.1288 22.5068 9.17606 24.2117C8.22335 25.9166 7.747 27.9473 7.747 30.304C7.747 32.8613 8.34871 35.1679 9.55212 37.2237C10.7555 39.2796 12.31 40.9092 14.2154 42.1127C16.1709 43.2659 18.1515 43.8426 20.1572 43.8426Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M42.6413 50.4614V12.4031H50.6891V17.7433L55.5028 12.7039C56.0544 12.4532 56.8065 12.2276 57.7592 12.027C58.7621 11.7763 59.8903 11.6509 61.1438 11.6509C64.0521 11.6509 66.5843 12.3028 68.7404 13.6065C70.9467 14.8601 72.6264 16.6401 73.7797 18.9467C74.9831 21.2533 75.5848 23.961 75.5848 27.0698V50.4614H67.6122V29.1006C67.6122 26.9946 67.2612 25.1895 66.5592 23.6852C65.9074 22.1308 64.9547 20.9775 63.7011 20.2253C62.4977 19.4231 61.0686 19.0219 59.4139 19.0219C56.7564 19.0219 54.6253 19.9245 53.0208 21.7296C51.4663 23.4846 50.6891 25.9416 50.6891 29.1006V50.4614H42.6413Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M82.3035 64V56.0273H89.9753C91.2288 56.0273 92.2066 55.7264 92.9086 55.1247C93.6607 54.523 94.2625 53.5452 94.7137 52.1913L108.027 12.4031H116.751L103.664 49.4084C103.062 51.1634 102.461 52.5173 101.859 53.47C101.307 54.4227 100.53 55.4506 99.5274 56.5538L92.4573 64H82.3035ZM90.7274 46.6255L76.9633 12.4031H85.989L99.4522 46.6255H90.7274Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n    <path\n      d=\"M115.657 50.4614L129.045 31.2066L116.033 12.4031H125.435L134.085 24.8134L142.358 12.4031H151.308L138.372 31.0562L151.684 50.4614H142.358L133.332 37.3742L124.683 50.4614H115.657Z\"\n      fill=\"var(--theme-primary-05)\"\n    />\n  </svg>\n);\nexport default SvgOnyxTyped;\n"
  },
  {
    "path": "web/lib/opal/src/icons/openai.tsx",
    "content": "import React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\nconst SvgOpenAI = ({ size, ...props }: IconProps) => {\n  const clipId = React.useId();\n  return (\n    <svg\n      width={size}\n      height={size}\n      viewBox=\"0 0 16 16\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n      {...props}\n    >\n      <g clipPath={`url(#${clipId})`}>\n        <path\n          d=\"M6.27989 5.99136V4.58828C6.27989 4.4701 6.32383 4.38143 6.42625 4.32242L9.22206 2.69783C9.60266 2.4763 10.0564 2.37296 10.5247 2.37296C12.2813 2.37296 13.3937 3.74654 13.3937 5.20864C13.3937 5.31199 13.3937 5.43016 13.379 5.54833L10.4808 3.83506C10.3052 3.73172 10.1295 3.73172 9.95386 3.83506L6.27989 5.99136ZM12.8082 11.4561V8.10334C12.8082 7.89651 12.7203 7.74883 12.5447 7.64548L8.87071 5.48918L10.071 4.79498C10.1734 4.73597 10.2613 4.73597 10.3637 4.79498L13.1595 6.41959C13.9647 6.89226 14.5061 7.89651 14.5061 8.87124C14.5061 9.99365 13.8476 11.0277 12.8082 11.4561ZM5.41629 8.50218L4.21603 7.7933C4.11361 7.73429 4.06967 7.64563 4.06967 7.52745V4.27824C4.06967 2.69797 5.26993 1.50157 6.89473 1.50157C7.50955 1.50157 8.08029 1.70841 8.56345 2.07761L5.67991 3.76136C5.5043 3.86471 5.41643 4.01239 5.41643 4.21923L5.41629 8.50218ZM7.99984 10.0086L6.27988 9.03389V6.96624L7.99984 5.99151L9.71963 6.96624V9.03389L7.99984 10.0086ZM9.10494 14.4985C8.49012 14.4985 7.91938 14.2917 7.43622 13.9226L10.3197 12.2387C10.4953 12.1354 10.5832 11.9878 10.5832 11.7809V7.4978L11.7982 8.20668C11.9006 8.2657 11.9445 8.35436 11.9445 8.47254V11.7218C11.9445 13.302 10.7296 14.4985 9.10494 14.4985ZM5.63583 11.205L2.84002 9.58041C2.03489 9.10771 1.4934 8.10348 1.4934 7.12875C1.4934 5.99151 2.16672 4.97244 3.20591 4.5441V7.91148C3.20591 8.11831 3.29379 8.26599 3.46939 8.36934L7.12882 10.5108L5.92856 11.205C5.82613 11.264 5.73825 11.264 5.63583 11.205ZM5.47491 13.6272C3.82088 13.6272 2.60592 12.3717 2.60592 10.821C2.60592 10.7028 2.62061 10.5846 2.63517 10.4665L5.51871 12.1502C5.69432 12.2535 5.87006 12.2535 6.04567 12.1502L9.71964 10.0088V11.4119C9.71964 11.53 9.67571 11.6186 9.57328 11.6777L6.77746 13.3023C6.39688 13.5238 5.94323 13.6272 5.47491 13.6272ZM9.10494 15.3846C10.8761 15.3846 12.3544 14.1145 12.6912 12.4307C14.3305 12.0024 15.3845 10.4516 15.3845 8.87139C15.3845 7.8375 14.9453 6.83326 14.1549 6.10955C14.2281 5.79937 14.2721 5.48918 14.2721 5.17914C14.2721 3.06718 12.5741 1.48677 10.6126 1.48677C10.2175 1.48677 9.83689 1.54578 9.4563 1.67878C8.79753 1.02891 7.88999 0.615387 6.89473 0.615387C5.12357 0.615387 3.64528 1.88548 3.30848 3.56923C1.66914 3.99756 0.615234 5.54834 0.615234 7.1286C0.615234 8.1625 1.05431 9.16673 1.84474 9.89044C1.77155 10.2006 1.72762 10.5108 1.72762 10.8209C1.72762 12.9328 3.42558 14.5132 5.38704 14.5132C5.78218 14.5132 6.16278 14.4542 6.54336 14.3213C7.20198 14.9711 8.10953 15.3846 9.10494 15.3846Z\"\n          fill=\"currentColor\"\n        />\n      </g>\n      <defs>\n        <clipPath id={clipId}>\n          <rect width=\"16\" height=\"16\" fill=\"white\" />\n        </clipPath>\n      </defs>\n    </svg>\n  );\n};\n\nexport default SvgOpenAI;\n"
  },
  {
    "path": "web/lib/opal/src/icons/openrouter.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgOpenrouter = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 48 40\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <title>OpenRouter</title>\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M33.6 0L48 8.19239V8.36602L33.6 16.4V12.2457L31.8202 12.1858C29.7043 12.1299 28.6014 12.1898 27.2887 12.4053C25.1628 12.7546 23.2168 13.5569 21.001 15.1035L16.6733 18.1071C16.1059 18.4962 15.6843 18.7776 15.3147 19.0151L14.2857 19.6577L13.4925 20.1247L14.2617 20.5837L15.3207 21.2583C16.2717 21.8849 17.6583 22.8469 20.7173 24.9823C22.9351 26.529 24.8791 27.3312 27.005 27.6805L27.6044 27.7703C28.991 27.9519 30.7029 27.9579 33.6 27.8362V23.6L48 31.7198V31.8934L33.6 40V36.284L31.9041 36.3279C29.1349 36.4117 27.6344 36.3319 25.6344 36.0046C22.2498 35.4458 19.1209 34.1566 15.8821 31.8954L11.5704 28.9019C11.0745 28.5603 10.5715 28.2289 10.0619 27.908L9.12887 27.3492C8.62495 27.0592 8.11878 26.7731 7.61039 26.491C5.81019 25.4912 1.12488 24.2658 0 24.2658V15.836C1.12687 15.822 6.09391 14.5946 7.89011 13.5928L9.92008 12.4353L10.7952 11.8884C11.6503 11.3296 12.9371 10.4396 16.1618 8.19039C19.4006 5.92925 22.5275 4.63803 25.9141 4.08123C28.2158 3.70204 29.9237 3.65614 33.6 3.80582V0Z\"\n      fill=\"currentColor\"\n    />\n  </svg>\n);\n\nexport default SvgOpenrouter;\n"
  },
  {
    "path": "web/lib/opal/src/icons/organization.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgOrganization = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.5 14H13.5C14.0523 14 14.5 13.5523 14.5 13V6C14.5 5.44772 14.0523 5 13.5 5H7.5M7.5 14V11M7.5 14H4.5M7.5 5V3C7.5 2.44772 7.05228 2 6.5 2H4.5M7.5 5H1.5M7.5 5V8M1.5 5V3C1.5 2.44772 1.94772 2 2.5 2H4.5M1.5 5V8M7.5 8V11M7.5 8H4.5M1.5 8V11M1.5 8H4.5M7.5 11H4.5M1.5 11V13C1.5 13.5523 1.94772 14 2.5 14H4.5M1.5 11H4.5M4.5 2V8M4.5 14V11M4.5 11V8M10 8H12M10 11H12\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgOrganization;\n"
  },
  {
    "path": "web/lib/opal/src/icons/paint-brush.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPaintBrush = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 32 32\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.00001 17L5.00002 19.2344C5.00003 20.2431 5.7511 21.0939 6.75195 21.219L11.2481 21.781C12.2489 21.9061 13 22.7569 13 23.7656L13 26C13 27.6569 14.3431 29 16 29C17.6569 29 19 27.6569 19 26L19 23.7656C19 22.7569 19.7511 21.9061 20.7519 21.781L25.2481 21.219C26.2489 21.0939 27 20.2431 27 19.2344L27 17M5.00001 17L5 9C5 5.68629 7.68629 3 11 3H17M5.00001 17H27M27 17L27 3H22M22 3L22 10M22 3H17M17 3L17 8\"\n      strokeWidth={2.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgPaintBrush;\n"
  },
  {
    "path": "web/lib/opal/src/icons/paperclip.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPaperclip = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.0924 3.99814L12.0924 10.6626C12.0924 11.724 11.6707 12.742 10.9202 13.4926C10.1696 14.2431 9.15163 14.6648 8.09018 14.6648C7.02872 14.6648 6.01074 14.2431 5.26018 13.4926C4.50961 12.742 4.08795 11.724 4.08795 10.6626L4.08795 3.99814C4.08795 3.2905 4.36906 2.61184 4.86944 2.11147C5.36981 1.6111 6.04847 1.32999 6.7561 1.32999C7.46374 1.32999 8.14239 1.61109 8.64277 2.11147C9.14314 2.61184 9.42425 3.2905 9.42425 3.99814L9.41954 10.6673C9.41954 11.0211 9.27898 11.3604 9.0288 11.6106C8.77861 11.8608 8.43928 12.0013 8.08546 12.0013C7.73164 12.0013 7.39232 11.8608 7.14213 11.6106C6.89194 11.3604 6.75139 11.0211 6.75139 10.6673L6.7561 4.66753\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgPaperclip;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pause-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPauseCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.08333 9.41667V5.41667M8.75 9.41667V5.41667M14.0833 7.41667C14.0833 11.0986 11.0986 14.0833 7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgPauseCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pen-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgPenSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.5 11L11.5 6L10 4.5L5 9.5L5 11H6.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgPenSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pencil-ruler.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgPencilRuler = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 24 24\"\n    fill=\"none\"\n    strokeWidth={1.5}\n    strokeLinecap=\"round\"\n    strokeLinejoin=\"round\"\n    className=\"lucide lucide-pencil-ruler-icon lucide-pencil-ruler\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path d=\"M13 7 8.7 2.7a2.41 2.41 0 0 0-3.4 0L2.7 5.3a2.41 2.41 0 0 0 0 3.4L7 13\" />\n    <path d=\"m8 6 2-2\" />\n    <path d=\"m18 16 2-2\" />\n    <path d=\"m17 11 4.3 4.3c.94.94.94 2.46 0 3.4l-2.6 2.6c-.94.94-2.46.94-3.4 0L11 17\" />\n    <path d=\"M21.174 6.812a1 1 0 0 0-3.986-3.987L3.842 16.174a2 2 0 0 0-.5.83l-1.321 4.352a.5.5 0 0 0 .623.622l4.353-1.32a2 2 0 0 0 .83-.497z\" />\n    <path d=\"m15 5 4 4\" />\n  </svg>\n);\nexport default SvgPencilRuler;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pie-chart.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPieChart = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_76_2931)\">\n      <path\n        d=\"M14.14 10.5933C13.7159 11.5963 13.0525 12.4802 12.2079 13.1675C11.3633 13.8549 10.3632 14.325 9.29496 14.5365C8.22674 14.7481 7.12295 14.6948 6.0801 14.3812C5.03725 14.0676 4.08709 13.5034 3.31268 12.7378C2.53828 11.9722 1.96321 11.0285 1.63776 9.98931C1.31231 8.95011 1.24638 7.847 1.44574 6.77643C1.64509 5.70586 2.10367 4.70043 2.78137 3.84803C3.45907 2.99563 4.33526 2.32222 5.33334 1.88668M14.6667 8.00001C14.6667 7.12453 14.4942 6.25762 14.1592 5.44879C13.8242 4.63995 13.3331 3.90502 12.7141 3.28597C12.095 2.66691 11.3601 2.17584 10.5512 1.84081C9.74239 1.50578 8.87548 1.33334 8 1.33334V8.00001H14.6667Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_76_2931\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgPieChart;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pin.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPin = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.70001 9.29581L2.20001 13.7958M6.70001 9.29581L9.99291 12.5887C10.6229 13.2187 11.7 12.7725 11.7 11.8816V10.5384C11.7 9.7428 12.0161 8.97974 12.5787 8.41713L13.4929 7.50292C13.8834 7.11239 13.8834 6.47923 13.4929 6.0887L9.90712 2.50292C9.51659 2.11239 8.88343 2.11239 8.49291 2.50292L7.57869 3.41713C7.01608 3.97974 6.25302 4.29581 5.45737 4.29581H4.11423C3.22332 4.29581 2.77715 5.37295 3.40712 6.00291L6.70001 9.29581Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgPin;\n"
  },
  {
    "path": "web/lib/opal/src/icons/pinned.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPinned = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 8.85714V14.14286M8 8.85714L13.14286 8.85714C14.03377 8.85714 14.47993 7.78 13.84997 7.15003L12.90022 6.20028C12.33761 5.63767 12.02155 4.87461 12.02155 4.07896V2.78571C12.02155 2.23342 11.57384 1.78571 11.02155 1.78571L4.97845 1.78571C4.42616 1.78571 3.97845 2.23342 3.97845 2.78571L3.97845 4.07896C3.97845 4.87461 3.66238 5.63767 3.09977 6.20028L2.15002 7.15003C1.52006 7.78 1.96622 8.85714 2.85713 8.85714H8Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgPinned;\n"
  },
  {
    "path": "web/lib/opal/src/icons/play-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPlayCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M6.08333 4.75L10.0833 7.41667L6.08333 10.0833V4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgPlayCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/plug.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPlug = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12 10.5H15M12 10.5V12.5M12 10.5V5.5M12 3.5H8.5C6.01472 3.5 4 5.51472 4 8M12 3.5V5.5M12 3.5V2M12 12.5H8.5C6.01472 12.5 4 10.4853 4 8M12 12.5V14M4 8H1M12 5.5H15\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgPlug;\n"
  },
  {
    "path": "web/lib/opal/src/icons/plus-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPlusCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2625)\">\n      <path\n        d=\"M7.99999 5.33333V10.6667M5.33333 7.99999H10.6667M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2625\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgPlusCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/plus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgPlus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8 2V14M2 8H14\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgPlus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/progress-bars.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgProgressBars = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.5 2.00003L13.25 2C13.9403 2 14.5 2.55964 14.5 3.25C14.5 3.94036 13.9403 4.5 13.25 4.5L5.5 4.50003M5.5 2.00003L2.74998 2C2.05963 2 1.49998 2.55964 1.49998 3.25C1.49998 3.94036 2.05963 4.5 2.74998 4.5L5.5 4.50003M5.5 2.00003V4.50003M10.5 11.5H13.25C13.9403 11.5 14.5 12.0596 14.5 12.75C14.5 13.4404 13.9403 14 13.25 14H10.5M10.5 11.5H2.74998C2.05963 11.5 1.49998 12.0596 1.49998 12.75C1.49998 13.4404 2.05963 14 2.74999 14H10.5M10.5 11.5V14M8 6.75H13.25C13.9403 6.75 14.5 7.30964 14.5 8C14.5 8.69036 13.9403 9.25 13.25 9.25H8M8 6.75H2.74998C2.05963 6.75 1.49998 7.30964 1.49998 8C1.49998 8.69036 2.05963 9.25 2.74998 9.25H8M8 6.75V9.25\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgProgressBars;\n"
  },
  {
    "path": "web/lib/opal/src/icons/progress-circle.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport SvgCheckCircle from \"@opal/icons/check-circle\";\nimport type { IconProps } from \"@opal/types\";\n\nexport interface SvgProgressCircleProps extends IconProps {\n  value?: number;\n}\n\nconst SvgProgressCircle = ({\n  value = 100,\n  className,\n  ...props\n}: SvgProgressCircleProps) => {\n  // Clamp value between 0 and 100\n  const progress = Math.min(Math.max(value, 0), 100);\n  const isComplete = progress >= 100;\n\n  // Calculate circumference for circular progress\n  // For a stroke to fill from center to radius R, we need:\n  // - Circle at radius R/2 with strokeWidth R\n  // This way stroke extends from 0 to R (R/2 - R/2 to R/2 + R/2)\n  const maxRadius = 5; // Maximum inner circle radius\n  const strokeRadius = maxRadius / 2; // Position circle at half the desired radius\n  const strokeWidth = maxRadius; // Stroke width equals max radius\n  const circumference = 2 * Math.PI * strokeRadius;\n  // Calculate how much of the circle to show (inverted for clockwise from top)\n  const offset = circumference - (progress / 100) * circumference;\n\n  return (\n    <div className={className}>\n      {isComplete ? (\n        <SvgCheckCircle\n          className={cn(className, \"!stroke-status-success-05\")}\n          {...props}\n        />\n      ) : (\n        <svg\n          width=\"16\"\n          height=\"16\"\n          viewBox=\"0 0 16 16\"\n          fill=\"none\"\n          xmlns=\"http://www.w3.org/2000/svg\"\n        >\n          {/* Outer circle - outline only */}\n          <circle\n            cx=\"8\"\n            cy=\"8\"\n            r=\"7\"\n            stroke=\"currentColor\"\n            strokeWidth=\"1.5\"\n            fill=\"none\"\n            className=\"text-border-medium\"\n          />\n\n          {/* Inner circle progress - fills like a pie using thick stroke */}\n          <circle\n            cx=\"8\"\n            cy=\"8\"\n            r={strokeRadius}\n            stroke=\"currentColor\"\n            strokeWidth={strokeWidth}\n            fill=\"none\"\n            strokeDasharray={circumference}\n            strokeDashoffset={offset}\n            className=\"-rotate-90 origin-center\"\n            style={{\n              transformOrigin: \"center\",\n            }}\n          />\n        </svg>\n      )}\n    </div>\n  );\n};\n\nexport default SvgProgressCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/question-mark-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgQuestionMarkSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6.06 5.99995C6.21673 5.5544 6.5261 5.17869 6.9333 4.93937C7.3405 4.70006 7.81926 4.61258 8.28478 4.69243C8.7503 4.77228 9.17254 5.0143 9.47672 5.37564C9.78089 5.73697 9.94737 6.1943 9.94666 6.66662C9.94666 7.99995 7.94666 8.66662 7.94666 8.66662M8 11.3333H8.00666\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgQuestionMarkSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/quote-end.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgQuoteEnd = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 22 18\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.344 10.0627C9.344 15.6947 5.824 18.0627 1.10262e-10 17.9987L2.91054e-07 14.6707C3.712 14.4787 4.8 12.9427 4.8 10.5747L4.8 9.67874L0.512 9.67874L0.512001 -1.87854e-06L9.344 -1.10642e-06L9.344 10.0627ZM22 0L22 10.0627C22 15.6947 18.416 18.0627 12.592 17.9987L12.592 14.6707C16.304 14.4787 17.392 12.9427 17.392 10.5747L17.392 9.67874L13.104 9.67874L13.104 -7.77713e-07L22 0Z\"\n      fill=\"#E6E6E9\"\n    />\n  </svg>\n);\nexport default SvgQuoteEnd;\n"
  },
  {
    "path": "web/lib/opal/src/icons/quote-start.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgQuoteStart = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 22 18\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.656 7.93726C12.656 2.30526 16.176 -0.0627379 22 0.00126124V3.32926C18.288 3.52126 17.2 5.05726 17.2 7.42526V8.32126H21.488V18H12.656V7.93726ZM0 18V7.93726C0 2.30526 3.584 -0.0627379 9.408 0.00126124V3.32926C5.696 3.52126 4.608 5.05726 4.608 7.42526V8.32126H8.896V18H0Z\"\n      fill=\"#E6E6E9\"\n    />\n  </svg>\n);\nexport default SvgQuoteStart;\n"
  },
  {
    "path": "web/lib/opal/src/icons/refresh-cw.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgRefreshCw = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14.448 3.10983V6.77746M14.448 6.77746H10.7803M14.448 6.77746L11.6117 4.11231C10.9547 3.45502 10.142 2.97486 9.24923 2.71664C8.35651 2.45842 7.41292 2.43055 6.50651 2.63564C5.6001 2.84072 4.76042 3.27208 4.06581 3.88945C3.3712 4.50683 2.84431 5.2901 2.53429 6.16618M1 12.8902V9.22254M1 9.22254H4.66763M1 9.22254L3.8363 11.8877C4.49326 12.545 5.30603 13.0251 6.19875 13.2834C7.09147 13.5416 8.03506 13.5694 8.94147 13.3644C9.84787 13.1593 10.6876 12.7279 11.3822 12.1105C12.0768 11.4932 12.6037 10.7099 12.9137 9.83381\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgRefreshCw;\n"
  },
  {
    "path": "web/lib/opal/src/icons/revert.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgRevert = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1.33333 2V6M1.33333 6H5.33333M1.33333 6L4.00432 3.33333C5.05887 2.27806 6.50634 1.66667 8.06318 1.66667C11.2745 1.66667 13.8799 4.27203 13.8799 7.48333C13.8799 10.6946 11.2745 13.3 8.06318 13.3C5.52018 13.3 3.35026 11.6635 2.54132 9.38632\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgRevert;\n"
  },
  {
    "path": "web/lib/opal/src/icons/search-menu.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSearchMenu = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1.00261 7.5H2.5M1 4H3.25M1.00261 11H3.25M15 13L12.682 10.682M12.682 10.682C13.4963 9.86764 14 8.74264 14 7.5C14 5.01472 11.9853 3 9.49999 3C7.01472 3 5 5.01472 5 7.5C5 9.98528 7.01472 12 9.49999 12C10.7426 12 11.8676 11.4963 12.682 10.682Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSearchMenu;\n"
  },
  {
    "path": "web/lib/opal/src/icons/search-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgSearchSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.69454 9.69454C10.7685 8.6206 10.7685 6.8794 9.69454 5.80546C8.6206 4.73151 6.8794 4.73151 5.80546 5.80546C4.73151 6.8794 4.73151 8.6206 5.80546 9.69454C6.8794 10.7685 8.6206 10.7685 9.69454 9.69454ZM9.69454 9.69454L11 11\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSearchSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/search.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSearch = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 14L11.1 11.1M12.6667 7.33333C12.6667 10.2789 10.2789 12.6667 7.33333 12.6667C4.38781 12.6667 2 10.2789 2 7.33333C2 4.38781 4.38781 2 7.33333 2C10.2789 2 12.6667 4.38781 12.6667 7.33333Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSearch;\n"
  },
  {
    "path": "web/lib/opal/src/icons/server.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgServer = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_170_22)\">\n      <path\n        d=\"M3.99999 4.00001H4.00666M3.99999 12H4.00666M2.66666 1.33334H13.3333C14.0697 1.33334 14.6667 1.9303 14.6667 2.66668V5.33334C14.6667 6.06972 14.0697 6.66668 13.3333 6.66668H2.66666C1.93028 6.66668 1.33333 6.06972 1.33333 5.33334V2.66668C1.33333 1.9303 1.93028 1.33334 2.66666 1.33334ZM2.66666 9.33334H13.3333C14.0697 9.33334 14.6667 9.9303 14.6667 10.6667V13.3333C14.6667 14.0697 14.0697 14.6667 13.3333 14.6667H2.66666C1.93028 14.6667 1.33333 14.0697 1.33333 13.3333V10.6667C1.33333 9.9303 1.93028 9.33334 2.66666 9.33334Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_170_22\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgServer;\n"
  },
  {
    "path": "web/lib/opal/src/icons/settings.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSettings = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2873)\">\n      <path\n        d=\"M8.00001 9.99999C9.10457 9.99999 10 9.10456 10 7.99999C10 6.89542 9.10457 5.99999 8.00001 5.99999C6.89544 5.99999 6.00001 6.89542 6.00001 7.99999C6.00001 9.10456 6.89544 9.99999 8.00001 9.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M12.9333 9.99999C12.8446 10.2011 12.8181 10.4241 12.8573 10.6404C12.8965 10.8566 12.9996 11.0562 13.1533 11.2133L13.1933 11.2533C13.3173 11.3772 13.4157 11.5242 13.4828 11.6861C13.5499 11.8479 13.5844 12.0214 13.5844 12.1967C13.5844 12.3719 13.5499 12.5454 13.4828 12.7072C13.4157 12.8691 13.3173 13.0162 13.1933 13.14C13.0695 13.264 12.9225 13.3623 12.7606 13.4294C12.5987 13.4965 12.4252 13.531 12.25 13.531C12.0748 13.531 11.9013 13.4965 11.7394 13.4294C11.5776 13.3623 11.4305 13.264 11.3067 13.14L11.2667 13.1C11.1096 12.9463 10.91 12.8432 10.6937 12.804C10.4775 12.7648 10.2544 12.7912 10.0533 12.88C9.85616 12.9645 9.68799 13.1048 9.56954 13.2837C9.45109 13.4625 9.38753 13.6721 9.38667 13.8867V14C9.38667 14.3536 9.2462 14.6928 8.99615 14.9428C8.7461 15.1928 8.40696 15.3333 8.05334 15.3333C7.69972 15.3333 7.36058 15.1928 7.11053 14.9428C6.86048 14.6928 6.72001 14.3536 6.72001 14V13.94C6.71484 13.7193 6.64342 13.5053 6.51501 13.3258C6.38661 13.1463 6.20716 13.0095 6.00001 12.9333C5.79893 12.8446 5.57588 12.8181 5.35961 12.8573C5.14335 12.8965 4.94379 12.9996 4.78667 13.1533L4.74667 13.1933C4.62284 13.3173 4.47579 13.4156 4.31393 13.4827C4.15206 13.5498 3.97856 13.5844 3.80334 13.5844C3.62812 13.5844 3.45462 13.5498 3.29275 13.4827C3.13089 13.4156 2.98384 13.3173 2.86001 13.1933C2.73604 13.0695 2.63769 12.9224 2.57059 12.7606C2.50349 12.5987 2.46896 12.4252 2.46896 12.25C2.46896 12.0748 2.50349 11.9013 2.57059 11.7394C2.63769 11.5775 2.73604 11.4305 2.86001 11.3067L2.90001 11.2667C3.0537 11.1095 3.1568 10.91 3.19601 10.6937C3.23522 10.4775 3.20875 10.2544 3.12001 10.0533C3.0355 9.85614 2.89518 9.68798 2.71632 9.56953C2.53746 9.45108 2.32786 9.38751 2.11334 9.38666H2.00001C1.64638 9.38666 1.30724 9.24618 1.0572 8.99613C0.807148 8.74608 0.666672 8.40695 0.666672 8.05332C0.666672 7.6997 0.807148 7.36056 1.0572 7.11051C1.30724 6.86047 1.64638 6.71999 2.00001 6.71999H2.06001C2.28067 6.71483 2.49467 6.6434 2.6742 6.515C2.85373 6.38659 2.99048 6.20715 3.06667 5.99999C3.15542 5.79891 3.18189 5.57586 3.14267 5.3596C3.10346 5.14333 3.00036 4.94378 2.84667 4.78666L2.80667 4.74666C2.6827 4.62283 2.58436 4.47577 2.51726 4.31391C2.45016 4.15205 2.41562 3.97854 2.41562 3.80332C2.41562 3.6281 2.45016 3.4546 2.51726 3.29274C2.58436 3.13087 2.6827 2.98382 2.80667 2.85999C2.9305 2.73602 3.07755 2.63768 3.23942 2.57058C3.40128 2.50348 3.57478 2.46894 3.75001 2.46894C3.92523 2.46894 4.09873 2.50348 4.26059 2.57058C4.42246 2.63768 4.56951 2.73602 4.69334 2.85999L4.73334 2.89999C4.89046 3.05368 5.09002 3.15678 5.30628 3.19599C5.52254 3.23521 5.74559 3.20873 5.94667 3.11999H6.00001C6.19718 3.03548 6.36535 2.89516 6.4838 2.7163C6.60225 2.53744 6.66582 2.32785 6.66667 2.11332V1.99999C6.66667 1.64637 6.80715 1.30723 7.0572 1.05718C7.30725 0.807132 7.64638 0.666656 8.00001 0.666656C8.35363 0.666656 8.69277 0.807132 8.94281 1.05718C9.19286 1.30723 9.33334 1.64637 9.33334 1.99999V2.05999C9.33419 2.27451 9.39776 2.48411 9.51621 2.66297C9.63466 2.84183 9.80283 2.98215 10 3.06666C10.2011 3.1554 10.4241 3.18187 10.6404 3.14266C10.8567 3.10345 11.0562 3.00035 11.2133 2.84666L11.2533 2.80666C11.3772 2.68269 11.5242 2.58434 11.6861 2.51724C11.8479 2.45014 12.0215 2.41561 12.1967 2.41561C12.3719 2.41561 12.5454 2.45014 12.7073 2.51724C12.8691 2.58434 13.0162 2.68269 13.14 2.80666C13.264 2.93049 13.3623 3.07754 13.4294 3.2394C13.4965 3.40127 13.5311 3.57477 13.5311 3.74999C13.5311 3.92521 13.4965 4.09871 13.4294 4.26058C13.3623 4.42244 13.264 4.56949 13.14 4.69332L13.1 4.73332C12.9463 4.89044 12.8432 5.09 12.804 5.30626C12.7648 5.52253 12.7913 5.74558 12.88 5.94666V5.99999C12.9645 6.19717 13.1048 6.36533 13.2837 6.48379C13.4626 6.60224 13.6721 6.6658 13.8867 6.66666H14C14.3536 6.66666 14.6928 6.80713 14.9428 7.05718C15.1929 7.30723 15.3333 7.64637 15.3333 7.99999C15.3333 8.35361 15.1929 8.69275 14.9428 8.9428C14.6928 9.19285 14.3536 9.33332 14 9.33332H13.94C13.7255 9.33418 13.5159 9.39774 13.337 9.5162C13.1582 9.63465 13.0178 9.80281 12.9333 9.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2873\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgSettings;\n"
  },
  {
    "path": "web/lib/opal/src/icons/share-webhook.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgShareWebhook = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.0002 4C10.0002 3.99708 10.0002 3.99415 10.0001 3.99123C9.99542 2.8907 9.10181 2 8.00016 2C6.89559 2 6.00016 2.89543 6.00016 4C6.00016 4.73701 6.39882 5.38092 6.99226 5.72784L4.67276 9.70412M11.6589 13.7278C11.9549 13.9009 12.2993 14 12.6668 14C13.7714 14 14.6668 13.1046 14.6668 12C14.6668 10.8954 13.7714 10 12.6668 10C12.2993 10 11.9549 10.0991 11.6589 10.2722L9.33943 6.29588M2.33316 10.2678C1.73555 10.6136 1.3335 11.2599 1.3335 12C1.3335 13.1046 2.22893 14 3.3335 14C4.43807 14 5.3335 13.1046 5.3335 12H10.0002\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgShareWebhook;\n"
  },
  {
    "path": "web/lib/opal/src/icons/share.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgShare = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.66667 8.00001V13.3333C2.66667 13.687 2.80715 14.0261 3.0572 14.2762C3.30724 14.5262 3.64638 14.6667 4.00001 14.6667H12C12.3536 14.6667 12.6928 14.5262 12.9428 14.2762C13.1929 14.0261 13.3333 13.687 13.3333 13.3333V8.00001M10.6667 4.00001L8.00001 1.33334M8.00001 1.33334L5.33334 4.00001M8.00001 1.33334V10\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgShare;\n"
  },
  {
    "path": "web/lib/opal/src/icons/shield.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgShield = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M8.00001 14.6667C8.00001 14.6667 13.3333 12 13.3333 8.00001V3.33334L8.00001 1.33334L2.66667 3.33334V8.00001C2.66667 12 8.00001 14.6667 8.00001 14.6667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgShield;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sidebar.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSidebar = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 2V14M3.33333 2H12.6667C13.403 2 14 2.59695 14 3.33333V12.6667C14 13.403 13.403 14 12.6667 14H3.33333C2.59695 14 2 13.403 2 12.6667V3.33333C2 2.59695 2.59695 2 3.33333 2Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSidebar;\n"
  },
  {
    "path": "web/lib/opal/src/icons/slack.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSlack = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_259_269)\">\n      <path\n        d=\"M9.66666 6.66665C9.11333 6.66665 8.66666 6.21998 8.66666 5.66665V2.33331C8.66666 1.77998 9.11333 1.33331 9.66666 1.33331C10.22 1.33331 10.6667 1.77998 10.6667 2.33331V5.66665C10.6667 6.21998 10.22 6.66665 9.66666 6.66665Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M13.6667 6.66665H12.6667V5.66665C12.6667 5.11331 13.1133 4.66665 13.6667 4.66665C14.22 4.66665 14.6667 5.11331 14.6667 5.66665C14.6667 6.21998 14.22 6.66665 13.6667 6.66665Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M6.33333 9.33331C6.88666 9.33331 7.33333 9.77998 7.33333 10.3333V13.6666C7.33333 14.22 6.88666 14.6666 6.33333 14.6666C5.78 14.6666 5.33333 14.22 5.33333 13.6666V10.3333C5.33333 9.77998 5.78 9.33331 6.33333 9.33331Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M2.33333 9.33331H3.33333V10.3333C3.33333 10.8866 2.88666 11.3333 2.33333 11.3333C1.77999 11.3333 1.33333 10.8866 1.33333 10.3333C1.33333 9.77998 1.77999 9.33331 2.33333 9.33331Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M9.33333 9.66665C9.33333 9.11331 9.78 8.66665 10.3333 8.66665H13.6667C14.22 8.66665 14.6667 9.11331 14.6667 9.66665C14.6667 10.22 14.22 10.6666 13.6667 10.6666H10.3333C9.78 10.6666 9.33333 10.22 9.33333 9.66665Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M10.3333 12.6666H9.33333V13.6666C9.33333 14.22 9.78 14.6666 10.3333 14.6666C10.8867 14.6666 11.3333 14.22 11.3333 13.6666C11.3333 13.1133 10.8867 12.6666 10.3333 12.6666Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M6.66666 6.33331C6.66666 5.77998 6.22 5.33331 5.66666 5.33331H2.33333C1.77999 5.33331 1.33333 5.77998 1.33333 6.33331C1.33333 6.88665 1.77999 7.33331 2.33333 7.33331H5.66666C6.22 7.33331 6.66666 6.88665 6.66666 6.33331Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M5.66666 3.33331H6.66666V2.33331C6.66666 1.77998 6.22 1.33331 5.66666 1.33331C5.11333 1.33331 4.66666 1.77998 4.66666 2.33331C4.66666 2.88665 5.11333 3.33331 5.66666 3.33331Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_259_269\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgSlack;\n"
  },
  {
    "path": "web/lib/opal/src/icons/slash.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSlash = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_slash)\">\n      <path\n        d=\"M14.6667 7.99999C14.6667 11.6819 11.6819 14.6667 7.99999 14.6667C4.3181 14.6667 1.33333 11.6819 1.33333 7.99999C1.33333 4.3181 4.3181 1.33333 7.99999 1.33333C11.6819 1.33333 14.6667 4.3181 14.6667 7.99999Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n      <path\n        d=\"M3.5 3.5L12.5 12.5\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n  </svg>\n);\nexport default SvgSlash;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sliders-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgSlidersSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 11V8.75M6 6.75V5M6 6.75H4.75M6 6.75H7.25M10 11V9.25M10 9.25H8.75M10 9.25H11.25M10 7.25V5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSlidersSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sliders.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSliders = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2627)\">\n      <path\n        d=\"M2.66666 14V9.33333M2.66666 6.66667V2M7.99999 14V8M7.99999 5.33333V2M13.3333 14V10.6667M13.3333 8V2M0.666656 9.33333H4.66666M5.99999 5.33333H9.99999M11.3333 10.6667H15.3333\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2627\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgSliders;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sort-order.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSortOrder = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.66675 12L7.67009 12.0001M2.66675 8H10.5001M2.66675 4H13.3334\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSortOrder;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sort.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSort = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 4.5H10M2 8H7M2 11.5H5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M12 5V12M12 12L14 10M12 12L10 10\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSort;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sparkle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSparkle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1.5 8C5.11111 6.91667 6.91667 5.11111 8 1.5C9.08333 5.11111 10.8889 6.91667 14.5 8C10.8889 9.08333 9.08333 10.8889 8 14.5C6.91667 10.8889 5.11111 9.08333 1.5 8Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"square\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgSparkle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/star-off.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStarOff = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 1L5.56196 5.56196M15 15L5.56196 5.56196M5.56196 5.56196L1.33333 6.18004L4.66666 9.42671L3.88 14.0134L8 11.8467L12.12 14.0134L11.7267 11.72M12.1405 8.64051L14.6667 6.18004L10.06 5.50671L8 1.33337L6.95349 3.45349\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStarOff;\n"
  },
  {
    "path": "web/lib/opal/src/icons/star.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgStar = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.99999 1.33331L10.06 5.50665L14.6667 6.17998L11.3333 9.42665L12.12 14.0133L7.99999 11.8466L3.87999 14.0133L4.66666 9.42665L1.33333 6.17998L5.93999 5.50665L7.99999 1.33331Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgStar;\n"
  },
  {
    "path": "web/lib/opal/src/icons/step1.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStep1 = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.41989 7.42018L11.7505 4.92023C10.8858 3.42605 9.27082 2.42116 7.42035 2.42106L7.41989 7.42018Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStep1;\n"
  },
  {
    "path": "web/lib/opal/src/icons/step2.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStep2 = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.4198 2.42L7.4198 7.42L11.7494 4.92083C10.8242 3.31822 9.14596 2.42161 7.4198 2.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStep2;\n"
  },
  {
    "path": "web/lib/opal/src/icons/step3-end.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStep3End = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.42012 7.42018L7.42039 2.41981C5.69408 2.42152 4.01631 3.31772 3.09099 4.92023L7.42012 7.42018Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M3.09 9.92018L7.42012 7.42018L3.09099 4.92023C2.16566 6.52274 2.22832 8.42448 3.09 9.92018Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStep3End;\n"
  },
  {
    "path": "web/lib/opal/src/icons/step3.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStep3 = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.42012 7.41982L3.08954 9.91977C3.95417 11.4139 5.56919 12.4188 7.41967 12.4189L7.42012 7.41982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M11.7502 9.91982L7.42012 7.41982L7.41967 12.4189C9.27014 12.419 10.8858 11.4139 11.7502 9.91982Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M7.4198 7.42V2.42C9.14596 2.42161 10.8242 3.31822 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.4198 7.42L11.7501 9.92041C12.6118 8.42453 12.6745 6.52344 11.7494 4.92083L7.4198 7.42Z\"\n      fill=\"currentColor\"\n    />\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStep3;\n"
  },
  {
    "path": "web/lib/opal/src/icons/stop-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStopCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M7.41667 14.0833C11.0986 14.0833 14.0833 11.0986 14.0833 7.41667C14.0833 3.73477 11.0986 0.75 7.41667 0.75C3.73477 0.75 0.75 3.73477 0.75 7.41667C0.75 11.0986 3.73477 14.0833 7.41667 14.0833Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M9.41667 5.41667H5.41667V9.41667H9.41667V5.41667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgStopCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/stop.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgStop = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12 4H4V12H12V4Z\"\n      strokeWidth={1.5}\n      fill=\"var(--background-tint-00)\"\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgStop;\n"
  },
  {
    "path": "web/lib/opal/src/icons/sun.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgSun = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_2458_12738)\">\n      <path\n        d=\"M8 1L8 2.5M8 13.5V15M3.04909 3.04909L4.11091 4.11091M11.8891 11.8891L12.9509 12.9509M1 8L2.5 8M13.5 8L15 8M3.04909 12.9509L4.11091 11.8891M11.8891 4.11091L12.9509 3.04909M11 8C11 9.65685 9.65685 11 8 11C6.34315 11 5 9.65685 5 8C5 6.34315 6.34315 5 8 5C9.65685 5 11 6.34315 11 8Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_2458_12738\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgSun;\n"
  },
  {
    "path": "web/lib/opal/src/icons/tag.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTag = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M4.66666 4.66668H4.67333M13.7267 8.94001L8.94666 13.72C8.82283 13.844 8.67578 13.9423 8.51392 14.0094C8.35205 14.0765 8.17855 14.1111 8.00333 14.1111C7.82811 14.1111 7.65461 14.0765 7.49274 14.0094C7.33088 13.9423 7.18383 13.844 7.05999 13.72L1.33333 8.00001V1.33334H7.99999L13.7267 7.06001C13.975 7.30983 14.1144 7.64776 14.1144 8.00001C14.1144 8.35226 13.975 8.69019 13.7267 8.94001Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTag;\n"
  },
  {
    "path": "web/lib/opal/src/icons/terminal-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTerminalSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5.5 10L7.5 8L5.5 6M8.5 10.5H10.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTerminalSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/terminal.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgTerminal = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.66667 11.3333L6.66667 7.33331L2.66667 3.33331M8.00001 12.6666H13.3333\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgTerminal;\n"
  },
  {
    "path": "web/lib/opal/src/icons/text-lines-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTextLinesSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M5 10.5H8.5M5 8H11M5 5.5H11\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTextLinesSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/text-lines.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTextLines = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 18 18\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M15.75 7.4925H2.25M15.75 4.5H2.25M9 13.5H2.25M15.75 10.4962H2.25\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTextLines;\n"
  },
  {
    "path": "web/lib/opal/src/icons/thumbs-down.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgThumbsDown = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2611)\">\n      <path\n        d=\"M11.3333 8.66667L8.66666 14.6667C8.13622 14.6667 7.62752 14.456 7.25244 14.0809C6.87737 13.7058 6.66666 13.1971 6.66666 12.6667V10H2.89332C2.70005 10.0022 2.50861 9.96234 2.33228 9.8832C2.15594 9.80405 1.99891 9.68752 1.87208 9.54166C1.74525 9.39581 1.65165 9.22413 1.59776 9.03851C1.54387 8.85289 1.53098 8.65777 1.55999 8.46667L2.47999 2.46667C2.52821 2.14874 2.6897 1.85894 2.93472 1.65067C3.17974 1.4424 3.49177 1.32971 3.81332 1.33334H11.3333M11.3333 8.66667V1.33334M11.3333 8.66667H13.1133C13.4906 8.67335 13.8573 8.54125 14.1436 8.29546C14.4299 8.04967 14.6161 7.7073 14.6667 7.33334V2.66667C14.6161 2.29271 14.4299 1.95034 14.1436 1.70455C13.8573 1.45876 13.4906 1.32667 13.1133 1.33334H11.3333\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2611\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgThumbsDown;\n"
  },
  {
    "path": "web/lib/opal/src/icons/thumbs-up.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgThumbsUp = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2609)\">\n      <path\n        d=\"M4.66666 7.33333L7.33333 1.33333C7.86376 1.33333 8.37247 1.54404 8.74754 1.91911C9.12262 2.29419 9.33333 2.8029 9.33333 3.33333V6H13.1067C13.2999 5.99781 13.4914 6.03766 13.6677 6.11681C13.844 6.19595 14.0011 6.31248 14.1279 6.45834C14.2547 6.60419 14.3483 6.77588 14.4022 6.9615C14.4561 7.14712 14.469 7.34223 14.44 7.53333L13.52 13.5333C13.4718 13.8513 13.3103 14.1411 13.0653 14.3493C12.8202 14.5576 12.5082 14.6703 12.1867 14.6667H4.66666M4.66666 7.33333V14.6667M4.66666 7.33333H2.66666C2.31304 7.33333 1.9739 7.4738 1.72385 7.72385C1.4738 7.9739 1.33333 8.31304 1.33333 8.66666V13.3333C1.33333 13.687 1.4738 14.0261 1.72385 14.2761C1.9739 14.5262 2.31304 14.6667 2.66666 14.6667H4.66666\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2609\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgThumbsUp;\n"
  },
  {
    "path": "web/lib/opal/src/icons/trash.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgTrash = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 3.99998H3.33333M3.33333 3.99998H14M3.33333 3.99998V13.3333C3.33333 13.6869 3.47381 14.0261 3.72386 14.2761C3.97391 14.5262 4.31304 14.6666 4.66667 14.6666H11.3333C11.687 14.6666 12.0261 14.5262 12.2761 14.2761C12.5262 14.0261 12.6667 13.6869 12.6667 13.3333V3.99998M5.33333 3.99998V2.66665C5.33333 2.31302 5.47381 1.97389 5.72386 1.72384C5.97391 1.47379 6.31304 1.33331 6.66667 1.33331H9.33333C9.68696 1.33331 10.0261 1.47379 10.2761 1.72384C10.5262 1.97389 10.6667 2.31302 10.6667 2.66665V3.99998M6.66667 7.33331V11.3333M9.33333 7.33331V11.3333\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTrash;\n"
  },
  {
    "path": "web/lib/opal/src/icons/two-line-small.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTwoLineSmall = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M6 6.50002V9.50002M10 6.50002V9.50002\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgTwoLineSmall;\n"
  },
  {
    "path": "web/lib/opal/src/icons/unplug.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUnplug = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_454_2471)\">\n      <path\n        d=\"M1 1L5.0778 5.0778M15 15L12 12M15 10.5H14M12 12.5H8.5C6.01472 12.5 4 10.4853 4 8M12 12.5V14M12 12.5V12M12 3.5H8.5C8.04537 3.5 7.60649 3.56742 7.1928 3.6928M12 3.5V5.5M12 3.5V2M12 5.5H15M12 5.5V8.5M4 8H1M4 8C4 6.88463 4.40579 5.86403 5.0778 5.0778M5.0778 5.0778L12 12\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_454_2471\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgUnplug;\n"
  },
  {
    "path": "web/lib/opal/src/icons/upload-cloud.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUploadCloud = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M10.4167 9.41502L7.75 6.74835M7.75 6.74835L5.08333 9.41502M7.75 6.74835V12.7484M13.0913 10.5412C13.712 10.2028 14.2022 9.66745 14.4848 9.01952C14.7673 8.3716 14.826 7.64804 14.6517 6.96303C14.4773 6.27802 14.0798 5.67058 13.5219 5.23658C12.964 4.80258 12.2774 4.56673 11.5706 4.56627H10.7688C10.5762 3.82131 10.2173 3.1297 9.71889 2.54345C9.22053 1.95719 8.59575 1.49155 7.89152 1.18151C7.1873 0.871475 6.42195 0.725121 5.65301 0.753452C4.88408 0.781783 4.13158 0.98406 3.45207 1.34508C2.77257 1.70609 2.18375 2.21646 1.72988 2.8378C1.27601 3.45913 0.968902 4.17528 0.831645 4.93239C0.694388 5.6895 0.730552 6.46788 0.93742 7.209C1.14429 7.95013 1.51648 8.63471 2.026 9.21129\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgUploadCloud;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-check.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserCheck = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.49999L12.25 9L15 6.24999M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserCheck;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-edit.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserEdit = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75ZM12.09 8.41421C12.3552 8.149 12.7149 8 13.09 8C13.2757 8 13.4596 8.03658 13.6312 8.10765C13.8028 8.17872 13.9587 8.28289 14.09 8.41421C14.2213 8.54554 14.3255 8.70144 14.3966 8.87302C14.4676 9.0446 14.5042 9.2285 14.5042 9.41421C14.5042 9.59993 14.4676 9.78383 14.3966 9.95541C14.3255 10.127 14.2213 10.2829 14.09 10.4142L10.6667 13.8333L8 14.5L8.66667 11.8333L12.09 8.41421Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserEdit;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-key.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserKey = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H8.5M12.625 10C13.6605 10 14.5 9.16053 14.5 8.125C14.5 7.08947 13.6605 6.25 12.625 6.25C11.5895 6.25 10.75 7.08947 10.75 8.125C10.75 9.16053 11.5895 10 12.625 10ZM12.625 10V12.25M12.625 14.5V13.5M12.625 13.5H13.875V12.25H12.625M12.625 13.5V12.25M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserKey;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-manage.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUserManage = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M0.75 12.75C0.75 12.4167 0.75 12.0833 0.75 11.75C0.750002 10.0931 2.09316 8.75 3.75002 8.75L5.75 8.75M12.25 11.25L13.2981 12.2981M12.25 11.25C12.5916 10.9084 12.7499 10.4481 12.75 10.0004M12.25 11.25C11.9083 11.5917 11.4479 11.75 11 11.75M9.75 11.25L8.7019 12.2981M9.75 11.25C10.0917 11.5917 10.5521 11.75 11 11.75M9.75 11.25C9.4084 10.9084 9.25011 10.4481 9.25 10.0004M9.75 8.75L8.7019 7.70193M9.75 8.75C10.0917 8.40829 10.5521 8.25 11 8.25M9.75 8.75C9.40818 9.09182 9.24989 9.55242 9.25 10.0004M12.25 8.75L13.2981 7.70193M12.25 8.75C12.5918 9.09182 12.7501 9.55242 12.75 10.0004M12.25 8.75C11.9083 8.40829 11.4479 8.25 11 8.25M12.75 10.0004L14.25 10M11 13.25V11.75M11 6.75V8.25M7.75 10L9.25 10.0004M8.5 3.5C8.5 5.01878 7.26878 6.25 5.75 6.25C4.23122 6.25 3 5.01878 3 3.5C3 1.98122 4.23122 0.75 5.75 0.75C7.26878 0.75 8.5 1.98122 8.5 3.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserManage;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-minus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserMinus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.49999L14.75 7.50007M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserMinus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-plus.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserPlus = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M10.75 7.50005L12.75 7.50007M12.75 7.50007H14.75M12.75 7.50007V9.5M12.75 7.50007V5.5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserPlus;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-shield.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserShield = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75ZM12 14.5C12 14.5 14.5 13.25 14.5 11.375V9L12 8L9.5 9V11.375C9.5 13.25 12 14.5 12 14.5Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserShield;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-speaker.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserSpeaker = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7.99998C9.65684 10 11 11.3431 11 13C11 13.3333 11 13.6667 11 14H14.5V10L12.7071 8.20711M12 7.5L12.7071 8.20711M12.7071 8.20711C13.0976 7.81658 13.0976 7.18342 12.7071 6.79289C12.3166 6.40237 11.6834 6.40237 11.2929 6.79289C10.9024 7.18342 10.9024 7.81658 11.2929 8.20711C11.6834 8.59763 12.3166 8.59763 12.7071 8.20711ZM8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserSpeaker;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-sync.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUserSync = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M1 14C1 13.6667 1 13.3333 1 13C1 11.3431 2.34316 10 4.00002 10H7M11 8.5L9.5 10L14.5 9.99985M13 14L14.5 12.5L9.5 12.5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgUserSync;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user-x.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUserX = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M11 14C11 13.6667 11 13.3333 11 13C11 11.3431 9.65684 10 7.99998 10H4.00002C2.34316 10 1 11.3431 1 13C1 13.3333 1 13.6667 1 14M11.5 8.5L13.25 6.75M13.25 6.75L15 5M13.25 6.75L15 8.5M13.25 6.75L11.5 5M8.75 4.75C8.75 6.26878 7.51878 7.5 6 7.5C4.48122 7.5 3.25 6.26878 3.25 4.75C3.25 3.23122 4.48122 2 6 2C7.51878 2 8.75 3.23122 8.75 4.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUserX;\n"
  },
  {
    "path": "web/lib/opal/src/icons/user.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUser = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M13.3333 14V12.6667C13.3333 11.9594 13.0524 11.2811 12.5523 10.781C12.0522 10.281 11.3739 10 10.6667 10H5.33334C4.62609 10 3.94782 10.281 3.44772 10.781C2.94762 11.2811 2.66667 11.9594 2.66667 12.6667V14M10.6667 4.66667C10.6667 6.13943 9.47276 7.33333 8.00001 7.33333C6.52725 7.33333 5.33334 6.13943 5.33334 4.66667C5.33334 3.19391 6.52725 2 8.00001 2C9.47276 2 10.6667 3.19391 10.6667 4.66667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUser;\n"
  },
  {
    "path": "web/lib/opal/src/icons/users.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgUsers = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <g clipPath=\"url(#clip0_16_2864)\">\n      <path\n        d=\"M11.3333 14V12.6667C11.3333 11.9594 11.0524 11.2811 10.5523 10.781C10.0522 10.281 9.3739 10 8.66666 10H3.33332C2.62608 10 1.9478 10.281 1.44771 10.781C0.947608 11.2811 0.666656 11.9594 0.666656 12.6667V14M15.3333 14V12.6667C15.3329 12.0758 15.1362 11.5018 14.7742 11.0349C14.4122 10.5679 13.9054 10.2344 13.3333 10.0867M10.6667 2.08667C11.2403 2.23353 11.7487 2.56713 12.1117 3.03487C12.4748 3.50261 12.6719 4.07789 12.6719 4.67C12.6719 5.26211 12.4748 5.83739 12.1117 6.30513C11.7487 6.77287 11.2403 7.10647 10.6667 7.25333M8.66666 4.66667C8.66666 6.13943 7.47275 7.33333 5.99999 7.33333C4.52723 7.33333 3.33332 6.13943 3.33332 4.66667C3.33332 3.19391 4.52723 2 5.99999 2C7.47275 2 8.66666 3.19391 8.66666 4.66667Z\"\n        strokeWidth={1.5}\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </g>\n    <defs>\n      <clipPath id=\"clip0_16_2864\">\n        <rect width={16} height={16} fill=\"white\" />\n      </clipPath>\n    </defs>\n  </svg>\n);\nexport default SvgUsers;\n"
  },
  {
    "path": "web/lib/opal/src/icons/volume-off.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgVolumeOff = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 6V10H5L9 13V3L5 6H2Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M14 6L11 9M11 6L14 9\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgVolumeOff;\n"
  },
  {
    "path": "web/lib/opal/src/icons/volume.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgVolume = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2 6V10H5L9 13V3L5 6H2Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M11.5 5.5C12.3 6.3 12.8 7.4 12.8 8.5C12.8 9.6 12.3 10.7 11.5 11.5\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgVolume;\n"
  },
  {
    "path": "web/lib/opal/src/icons/wallet.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgWallet = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M14 4.75H9C8.44772 4.75 8 5.19772 8 5.75L8 10.25C8 10.8023 8.44772 11.25 9 11.25H14M14 4.75C14.5523 4.75 15 5.19772 15 5.75V10.25C15 10.8023 14.5523 11.25 14 11.25M14 4.75V3.33333C14 2.6 13.4 2 12.6667 2H3.33333C2.6 2 2 2.6 2 3.33333V12.6667C2 13.4 2.6 14 3.33333 14H12.6667C13.4 14 14 13.4 14 12.6667L14 11.25M10.25 7V9\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgWallet;\n"
  },
  {
    "path": "web/lib/opal/src/icons/workflow.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgWorkflow = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 16 16\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M2.79986 5.60004C2.61157 5.85073 2.5 6.16234 2.5 6.5V11.9754C2.5 13.203 4.08461 13.6951 4.78005 12.6836L11.2199 3.31644C11.9154 2.30488 13.5 2.79705 13.5 4.0246V9.5C13.5 9.83766 13.3884 10.1493 13.2001 10.4M2.79986 5.60004C3.13415 5.85118 3.54969 6 4 6C5.10457 6 6 5.10457 6 4C6 2.89543 5.10457 2 4 2C2.89543 2 2 2.89543 2 4C2 4.65426 2.31416 5.23515 2.79986 5.60004ZM13.2001 10.4C12.8659 10.1488 12.4503 10 12 10C10.8954 10 10 10.8954 10 12C10 13.1046 10.8954 14 12 14C13.1046 14 14 13.1046 14 12C14 11.3457 13.6858 10.7648 13.2001 10.4Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgWorkflow;\n"
  },
  {
    "path": "web/lib/opal/src/icons/x-circle.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgXCircle = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.41667 5.41667L5.41667 9.41667M5.41667 5.41667L9.41667 9.41667M14.0833 7.41667C14.0833 11.0986 11.0986 14.0833 7.41667 14.0833C3.73477 14.0833 0.75 11.0986 0.75 7.41667C0.75 3.73477 3.73477 0.75 7.41667 0.75C11.0986 0.75 14.0833 3.73477 14.0833 7.41667Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgXCircle;\n"
  },
  {
    "path": "web/lib/opal/src/icons/x-octagon.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgXOctagon = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 15 15\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M9.41667 5.41667L5.41667 9.41667M5.41667 5.41667L9.41667 9.41667M4.65667 0.75H10.1767L14.0833 4.65667V10.1767L10.1767 14.0833H4.65667L0.75 10.1767V4.65667L4.65667 0.75Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgXOctagon;\n"
  },
  {
    "path": "web/lib/opal/src/icons/x.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgX = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 28 28\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    strokeWidth={2.5}\n    {...props}\n  >\n    <path d=\"M21 7L7 21M7 7L21 21\" strokeLinejoin=\"round\" />\n  </svg>\n);\nexport default SvgX;\n"
  },
  {
    "path": "web/lib/opal/src/icons/zoom-in.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgZoomIn = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.75 12.75L9.85 9.85M6.08333 4.08333V8.08333M4.08333 6.08333H8.08333M11.4167 6.08333C11.4167 9.02885 9.02885 11.4167 6.08333 11.4167C3.13781 11.4167 0.75 9.02885 0.75 6.08333C0.75 3.13781 3.13781 0.75 6.08333 0.75C9.02885 0.75 11.4167 3.13781 11.4167 6.08333Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgZoomIn;\n"
  },
  {
    "path": "web/lib/opal/src/icons/zoom-out.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\n\nconst SvgZoomOut = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 14 14\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    stroke=\"currentColor\"\n    {...props}\n  >\n    <path\n      d=\"M12.75 12.75L9.85 9.85M4.08333 6.08333H8.08333M11.4167 6.08333C11.4167 9.02885 9.02885 11.4167 6.08333 11.4167C3.13781 11.4167 0.75 9.02885 0.75 6.08333C0.75 3.13781 3.13781 0.75 6.08333 0.75C9.02885 0.75 11.4167 3.13781 11.4167 6.08333Z\"\n      strokeWidth={1.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\n\nexport default SvgZoomOut;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/broken-key.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgBrokenKey = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M54.375 43.125H43.125M69.375 28.125V16.875M58.125 31.875L48.75 22.5\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M108.75 18.75L98.5535 24.6369M98.5535 24.6369L104.044 34.1465L91.7404 41.25L86.25 31.7404M98.5535 24.6369L86.25 31.7404M86.25 31.7404L78.7499 36.0705M49.6599 62.8401C45.5882 58.7684 39.9632 56.25 33.75 56.25C21.3236 56.25 11.25 66.3236 11.25 78.75C11.25 91.1764 21.3236 101.25 33.75 101.25C46.1764 101.25 56.25 91.1764 56.25 78.75C56.25 72.5368 53.7316 66.9118 49.6599 62.8401ZM49.6599 62.8401L49.6406 62.8594M49.6599 62.8401L60 52.5\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgBrokenKey;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/connect.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgConnect = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M43.125 86.2644H73.379M73.379 86.2644H90.9447C95.6006 86.2644 99.375 90.0388 99.375 94.6947C99.375 99.3506 95.6006 103.125 90.9447 103.125H89.6455C86.3575 103.125 83.292 101.464 81.4959 98.7104L73.379 86.2644ZM73.379 86.2644L39.1266 33.7441M69.375 33.7372L39.1266 33.7441M39.1266 33.7441L21.5635 33.7481C16.9034 33.7491 13.125 29.9717 13.125 25.3115C13.125 20.6522 16.9022 16.875 21.5616 16.875H22.8545C26.1425 16.875 29.208 18.5356 31.0041 21.2896L39.1266 33.7441Z\"\n      stroke=\"#286DF8\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M99.3626 50.625V43.125V24.375V16.875L86.2376 16.875C76.9178 16.875 69.3626 24.4302 69.3626 33.75C69.3626 43.0698 76.9178 50.625 86.2376 50.625H99.3626Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M13.1126 103.125L13.1126 69.3751L26.2376 69.375C35.5574 69.375 43.1126 76.9302 43.1126 86.25C43.1126 95.5698 35.5574 103.125 26.2376 103.125L13.1126 103.125Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M99.3626 43.125H110.613M99.3626 43.125V24.375M99.3626 43.125V50.625M99.3626 24.375H110.613M99.3626 24.375V16.875M99.3626 50.625H86.2376C76.9178 50.625 69.3626 43.0698 69.3626 33.75C69.3626 24.4302 76.9178 16.875 86.2376 16.875L99.3626 16.875M99.3626 50.625V54.375M99.3626 16.875V13.125M13.1126 103.125L26.2376 103.125C35.5574 103.125 43.1126 95.5698 43.1126 86.25C43.1126 76.9302 35.5574 69.375 26.2376 69.375L13.1126 69.3751M13.1126 103.125L13.1126 69.3751M13.1126 103.125L13.1126 106.875M13.1126 69.3751V65.6251\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgConnect;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/connected.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgConnected = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M48.0722 48.0722L53.4375 53.4375L66.5625 66.5625L71.9324 71.9416L82.5 61.3648C89.0901 54.7747 89.0901 44.0901 82.5 37.5C75.9099 30.9099 65.2253 30.9099 58.6352 37.5L48.0722 48.0722Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M48.0722 48.0722L58.6352 37.5C65.2253 30.9099 75.9099 30.9099 82.5 37.5M48.0722 48.0722L43.125 43.125M48.0722 48.0722L53.4375 53.4375M71.9324 71.9416L82.5 61.3648C89.0901 54.7747 89.0901 44.0901 82.5 37.5M71.9324 71.9416L76.875 76.8842M71.9324 71.9416L66.5625 66.5625M82.5 37.5L105 15M53.4375 53.4375L43.125 63.75M53.4375 53.4375L66.5625 66.5625M66.5625 66.5625L56.25 76.875\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M71.9278 71.937L48.0676 48.0675L37.5 58.6443C30.9099 65.2344 30.9099 75.9191 37.5 82.5092C44.0901 89.0993 54.7748 89.0993 61.3649 82.5092L71.9278 71.937Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M71.9278 71.937L61.3649 82.5092C54.7748 89.0993 44.0901 89.0993 37.5 82.5092M71.9278 71.937L48.0676 48.0675M71.9278 71.937L76.875 76.8842M48.0676 48.0675L37.5 58.6443C30.9099 65.2344 30.9099 75.9191 37.5 82.5092M48.0676 48.0675L43.125 43.125M37.5 82.5092L15 105\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M24.375 24.375L33.75 33.75L52.5 15\"\n      stroke=\"#286DF8\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgConnected;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/disconnected.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgDisconnected = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M60 83.8554L36.1351 59.9906L26.25 69.8849C19.6599 76.475 19.6599 87.1597 26.25 93.7498C32.8401 100.34 43.5248 100.34 50.1149 93.7498L60 83.8554Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M60 83.8554L50.1149 93.7498C43.5248 100.34 32.8401 100.34 26.25 93.7498M60 83.8554L36.1351 59.9906M60 83.8554L63.75 87.6055M36.1351 59.9906L26.25 69.8849C19.6599 76.475 19.6599 87.1597 26.25 93.7498M36.1351 59.9906L32.3946 56.25M26.25 93.7498L15 105\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M60 36.1443L65.3033 41.4476L78.5616 54.7059L83.8649 60.0092L93.75 50.1148C100.34 43.5247 100.34 32.8401 93.75 26.25C87.1599 19.6599 76.4752 19.6599 69.8851 26.25L60 36.1443Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M65.3033 41.4476L56.25 50.5009M65.3033 41.4476L60 36.1443M65.3033 41.4476L78.5616 54.7059M60 36.1443L69.8851 26.25C76.4752 19.6599 87.1599 19.6599 93.75 26.25M60 36.1443L56.25 32.3942M83.8649 60.0092L93.75 50.1148C100.34 43.5247 100.34 32.8401 93.75 26.25M83.8649 60.0092L78.5616 54.7059M83.8649 60.0092L87.6054 63.7498M78.5616 54.7059L69.5177 63.7498M93.75 26.25L105 15\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M30 45H18.75M45 30V18.75M33.75 33.75L24.375 24.375\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgDisconnected;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/empty.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgEmpty = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M18.75 71.25V90C18.75 94.1421 22.1079 97.5 26.25 97.5H93.75C97.8921 97.5 101.25 94.1422 101.25 90V71.25H18.75Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path d=\"M18.75 71.25H101.25L86.25 48.75H33.75L18.75 71.25Z\" fill=\"white\" />\n    <path\n      d=\"M18.75 71.25V90C18.75 94.1421 22.1079 97.5 26.25 97.5H93.75C97.8921 97.5 101.25 94.1422 101.25 90V71.25M18.75 71.25H101.25M18.75 71.25L33.75 48.75H86.25L101.25 71.25M54.375 80.625H65.625\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M43.125 35.625L33.75 26.25M76.875 35.625L86.25 26.25M60 28.125V15\"\n      stroke=\"#FFC733\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEmpty;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/end-of-line.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgEndOfLine = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M67.5 33.75H88.125C93.3027 33.75 97.5 29.5527 97.5 24.375C97.5 19.1973 93.3027 15 88.125 15H76.875C71.6973 15 67.5 19.1973 67.5 24.375V33.75ZM67.5 33.75H15M67.5 33.75V82.5\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M30 82.5H105\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M41.25 93.75H93.75\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M52.5 105H82.5\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgEndOfLine;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/index.ts",
    "content": "export { default as SvgBrokenKey } from \"@opal/illustrations/broken-key\";\nexport { default as SvgConnect } from \"@opal/illustrations/connect\";\nexport { default as SvgConnected } from \"@opal/illustrations/connected\";\nexport { default as SvgDisconnected } from \"@opal/illustrations/disconnected\";\nexport { default as SvgEmpty } from \"@opal/illustrations/empty\";\nexport { default as SvgEndOfLine } from \"@opal/illustrations/end-of-line\";\nexport { default as SvgLimitAlert } from \"@opal/illustrations/limit-alert\";\nexport { default as SvgLongWait } from \"@opal/illustrations/long-wait\";\nexport { default as SvgNoAccess } from \"@opal/illustrations/no-access\";\nexport { default as SvgNoResult } from \"@opal/illustrations/no-result\";\nexport { default as SvgNotFound } from \"@opal/illustrations/not-found\";\nexport { default as SvgOverflow } from \"@opal/illustrations/overflow\";\nexport { default as SvgPlugBroken } from \"@opal/illustrations/plug-broken\";\nexport { default as SvgTimeout } from \"@opal/illustrations/timeout\";\nexport { default as SvgUnPlugged } from \"@opal/illustrations/un-plugged\";\nexport { default as SvgUsageAlert } from \"@opal/illustrations/usage-alert\";\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/limit-alert.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgLimitAlert = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M15 82.5C15 78.3579 18.3579 75 22.5 75L97.5 75C101.642 75 105 78.3579 105 82.5V90C105 94.1421 101.642 97.5 97.5 97.5L22.5 97.5C18.3579 97.5 15 94.1421 15 90V82.5Z\"\n      fill=\"#FBEAE4\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M93.75 86.25H78.75\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M67.5 86.2499H26.25\"\n      stroke=\"#F5A88B\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M15 48.75C15 44.6079 18.3579 41.25 22.5 41.25L52.5 41.25C56.6421 41.25 60 44.6079 60 48.75L60 56.25C60 60.3921 56.6421 63.75 52.5 63.75H22.5C18.3579 63.75 15 60.3921 15 56.25L15 48.75Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M45 52.5H26.25M52.5 63.75H22.5C18.3579 63.75 15 60.3921 15 56.25L15 48.75C15 44.6079 18.3579 41.25 22.5 41.25L52.5 41.25C56.6421 41.25 60 44.6079 60 48.75L60 56.25C60 60.3921 56.6421 63.75 52.5 63.75Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M86.25 41.25C81.0723 41.25 76.875 45.4473 76.875 50.625L76.875 63.75L86.25 63.75L95.625 63.75V50.625C95.625 45.4473 91.4277 41.25 86.25 41.25Z\"\n      fill=\"#FBEAE4\"\n    />\n    <path\n      d=\"M76.875 63.75L76.875 50.625C76.875 45.4473 81.0723 41.25 86.25 41.25C91.4277 41.25 95.625 45.4473 95.625 50.625V63.75M76.875 63.75L86.25 63.75M76.875 63.75L73.125 63.75M95.625 63.75H99.375M95.625 63.75L86.25 63.75M86.25 52.5V63.75M76.875 33.75L71.25 28.125M95.625 33.75L101.25 28.125M86.25 30L86.25 22.5\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLimitAlert;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/long-wait.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgLongWait = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M103.253 47.5404C104.391 51.4971 105 55.6774 105 60C105 84.8528 84.8528 105 60 105C35.1472 105 15 84.8528 15 60C15 35.1472 35.1472 15 60 15C64.3226 15 68.5029 15.6095 72.4596 16.7472C70.4991 20.0854 69.375 23.9739 69.375 28.125C69.375 40.5514 79.4486 50.625 91.875 50.625C96.0261 50.625 99.9146 49.5009 103.253 47.5404Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M69.375 28.125C69.375 40.5514 79.4486 50.625 91.875 50.625C96.0261 50.625 99.9146 49.5009 103.253 47.5404C109.908 43.6322 114.375 36.4003 114.375 28.125C114.375 15.6986 104.301 5.625 91.875 5.625C83.5997 5.625 76.3678 10.0925 72.4596 16.7472C70.4991 20.0854 69.375 23.9739 69.375 28.125Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M54.1223 104.615C56.0462 104.866 58.0077 105 60 105C61.9911 105 63.9513 104.866 65.874 104.615M42.7771 101.576C39.1175 100.058 35.7047 98.0716 32.6074 95.6909M87.3889 95.6909C84.2914 98.0715 80.8791 100.058 77.2192 101.576M24.3054 87.3889C21.9251 84.2915 19.9377 80.8789 18.4204 77.2192M101.576 77.2192C100.058 80.8791 98.0715 84.2914 95.6909 87.3889M15.3809 54.1223C15.1299 56.046 15 58.0079 15 60C15 61.9909 15.1302 63.9515 15.3809 65.874M104.615 65.874C104.866 63.9513 105 61.9911 105 60C105 58.0077 104.866 56.0462 104.615 54.1223M18.4204 42.7771C19.9379 39.1177 21.925 35.7046 24.3054 32.6074M32.6074 24.3054C35.7046 21.925 39.1177 19.9379 42.7771 18.4204M65.874 15.3809C63.9515 15.1302 61.9909 15 60 15C58.0079 15 56.046 15.1299 54.1223 15.3809\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M60 33.0001V60.0001L78 69.0001\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M84.375 20.625H99.375L84.375 35.625H99.375\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgLongWait;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/no-access.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgNoAccess = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M18.75 22.5V105L60 105L101.25 105V22.5C101.25 18.3578 97.8921 15 93.75 15H60H26.25C22.1079 15 18.75 18.3578 18.75 22.5Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M18.75 105V22.5C18.75 18.3578 22.1079 15 26.25 15H60M18.75 105L60 105M18.75 105L11.25 105M101.25 105V22.5C101.25 18.3578 97.8921 15 93.75 15H60M101.25 105L60 105M101.25 105H108.75M60 93.75V105M60 15V26.25\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M46.875 58.1249V50.625C46.875 43.3762 52.7512 37.5 60 37.5C67.2487 37.5 73.125 43.3762 73.125 50.625V58.125M46.875 58.1249L44.9999 58.1249C42.9289 58.125 41.25 59.8039 41.25 61.8749V78.75C41.25 80.821 42.9289 82.5 45 82.5L75 82.5C77.071 82.5 78.75 80.821 78.75 78.75V61.875C78.75 59.8039 77.071 58.125 75 58.125H73.125M46.875 58.1249L73.125 58.125M60 67.4999V73.1249\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgNoAccess;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/no-result.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgNoResult = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path d=\"M91.875 45H28.125L11.25 112.5H108.75L91.875 45Z\" fill=\"white\" />\n    <path\n      d=\"M26.25 45L50.0345 23.8582C52.8762 21.3323 56.4381 20.0693 60 20.0693C63.5619 20.0693 67.1238 21.3323 69.9655 23.8582L93.75 45H26.25Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M60 7.5V20.0693M60 20.0693C56.4381 20.0693 52.8762 21.3323 50.0345 23.8582L26.25 45H93.75L69.9655 23.8582C67.1238 21.3323 63.5619 20.0693 60 20.0693Z\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M43.125 99.375L33.75 90M60 91.875V78.75M76.875 99.375L86.25 90\"\n      stroke=\"#FFC733\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgNoResult;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/not-found.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgNotFound = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M61.875 95.625C80.5146 95.625 95.625 80.5146 95.625 61.875C95.625 43.2354 80.5146 28.125 61.875 28.125C43.2354 28.125 28.125 43.2354 28.125 61.875C28.125 80.5146 43.2354 95.625 61.875 95.625Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M103.125 103.125L85.7109 85.7109M95.625 61.875C95.625 80.5146 80.5146 95.625 61.875 95.625C43.2354 95.625 28.125 80.5146 28.125 61.875C28.125 43.2354 43.2354 28.125 61.875 28.125C80.5146 28.125 95.625 43.2354 95.625 61.875Z\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M56.713 46.3302C54.7486 47.4847 53.2561 49.2972 52.5 51.4466H51.5625V51.1341C52.3923 48.7766 54.0843 46.7901 56.239 45.5237C58.3943 44.257 60.9272 43.7937 63.3911 44.2163C65.855 44.639 68.091 45.9183 69.7009 47.8308C71.3108 49.7433 72.1912 52.1645 72.1875 54.6643C72.1868 58.3647 69.5002 61.0222 67.0935 62.6734C65.8647 63.5165 64.6397 64.1423 63.728 64.5594C63.2713 64.7682 62.8885 64.9259 62.6184 65.0318V67.5H61.875L61.875 64.3111C61.875 64.3111 71.25 61.095 71.25 54.6628C71.2534 52.3842 70.4503 50.178 68.9829 48.4348C67.5155 46.6917 65.4785 45.5241 63.2328 45.1389C60.987 44.7537 58.6774 45.1757 56.713 46.3302Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M62.8125 76.875H60.9375V78.75H62.8125V76.875Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M20.625 50.625H11.25M30 30L22.5 22.5M50.625 20.625V11.25\"\n      stroke=\"#FFC733\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgNotFound;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/overflow.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgOverflow = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M22.5 71.2501L25.3301 91.0607C25.8579 94.7555 29.0223 97.5 32.7547 97.5H87.2453C90.9777 97.5 94.1421 94.7555 94.6699 91.0607L97.5 71.2501H22.5Z\"\n      fill=\"#E6E6E6\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M18.7501 46.8752L78.5183 52.4717M32.7965 34.583L91.8752 45.0002M45.1839 22.5002L103.125 38.0255M90.0002 61.8752H30.0002\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgOverflow;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/plug-broken.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgPlugBroken = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M31.875 78.75L24.375 71.25M50.625 78.75L58.125 71.25M41.25 73.125V63.75\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M97.5 30H90H71.25H63.75V43.125C63.75 52.4448 71.3052 60 80.625 60C89.9448 60 97.5 52.4448 97.5 43.125V30Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M50.625 90H95.625C99.7671 90 103.125 93.3579 103.125 97.5C103.125 101.642 99.7671 105 95.625 105H88.125C83.9829 105 80.625 101.642 80.625 97.5V60M31.875 90H16.875M90 30V18.75M90 30H97.5M90 30H71.25M97.5 30V43.125C97.5 52.4448 89.9448 60 80.625 60M97.5 30H103.125M63.75 30V43.125C63.75 52.4448 71.3052 60 80.625 60M63.75 30H71.25M63.75 30H58.125M71.25 30V18.75\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgPlugBroken;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/timeout.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgTimeout = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M26.25 101.25H78.75V93.75L62.6392 83.3931C56.4628 79.4225 48.5372 79.4225 42.3608 83.3931L26.25 93.75V101.25Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M74.4446 77.8572L52.5 63.75L30.5554 77.8572C27.8721 79.5822 26.25 82.5533 26.25 85.7433V93.75L42.3608 83.3931C48.5372 79.4225 56.4628 79.4225 62.6392 83.3931L78.75 93.75V85.7433C78.75 82.5533 77.1279 79.5822 74.4446 77.8572Z\"\n      fill=\"white\"\n    />\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M26.25 26.25H68.7803C67.9512 28.5958 67.5 31.1202 67.5 33.75C67.5 40.0285 70.0716 45.7064 74.219 49.7878L52.5 63.75L30.5554 49.6428C27.8721 47.9178 26.25 44.9467 26.25 41.7567V26.25Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M112.5 33.75C112.5 21.3236 102.426 11.25 90 11.25C80.2034 11.25 71.8691 17.511 68.7803 26.25C67.9512 28.5958 67.5 31.1202 67.5 33.75C67.5 40.0285 70.0716 45.7064 74.219 49.7878C78.2801 53.7843 83.8521 56.25 90 56.25C102.426 56.25 112.5 46.1764 112.5 33.75Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M52.5 63.75L30.5554 49.6428C27.8721 47.9178 26.25 44.9467 26.25 41.7567V26.25M52.5 63.75L74.4446 77.8572C77.1279 79.5822 78.75 82.5533 78.75 85.7433V101.25M52.5 63.75L30.5554 77.8572C27.8721 79.5822 26.25 82.5533 26.25 85.7433V101.25M52.5 63.75L72.6052 50.8252M26.25 26.25H18.75M26.25 26.25H66.8006M78.75 101.25H26.25M78.75 101.25H86.25M26.25 101.25H18.75\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M82.5 26.25H97.5L82.5 41.25H97.5\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgTimeout;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/un-plugged.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUnPlugged = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      fillRule=\"evenodd\"\n      clipRule=\"evenodd\"\n      d=\"M56.25 16.875C43.8236 16.875 33.75 26.9486 33.75 39.375L33.75 54.375C33.75 66.8014 43.8236 76.875 56.25 76.875H71.25C83.6764 76.875 93.75 66.8014 93.75 54.375V39.375C93.75 26.9486 83.6764 16.875 71.25 16.875H56.25ZM67.5 65.625V60C67.5 56.8934 64.9816 54.375 61.875 54.375C58.7684 54.375 56.25 56.8934 56.25 60V65.625H67.5Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M67.5 60V65.625H56.25V60C56.25 56.8934 58.7684 54.375 61.875 54.375C64.9816 54.375 67.5 56.8934 67.5 60Z\"\n      fill=\"white\"\n    />\n    <path\n      d=\"M48.75 46.875V35.625M75 46.875V35.625M67.5 65.625V60C67.5 56.8934 64.9816 54.375 61.875 54.375C58.7684 54.375 56.25 56.8934 56.25 60V65.625H67.5ZM56.25 76.875H71.25C83.6764 76.875 93.75 66.8014 93.75 54.375V39.375C93.75 26.9486 83.6764 16.875 71.25 16.875H56.25C43.8236 16.875 33.75 26.9486 33.75 39.375L33.75 54.375C33.75 66.8014 43.8236 76.875 56.25 76.875Z\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path d=\"M26.25 87.1875V97.5H28.125V87.1875H26.25Z\" fill=\"#F0F0F0\" />\n    <path\n      d=\"M52.5 88.125V97.5H50.625V88.125C50.625 87.6072 51.0447 87.1875 51.5625 87.1875C52.0803 87.1875 52.5 87.6072 52.5 88.125Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M26.25 87.1875V97.5H28.125V87.1875H26.25Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M52.5 88.125V97.5H50.625V88.125C50.625 87.6072 51.0447 87.1875 51.5625 87.1875C52.0803 87.1875 52.5 87.6072 52.5 88.125Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M18.1798 109.239C16.3504 108.958 15 107.384 15 105.533V97.5H63.75V105V108.75C63.75 110.821 62.0711 112.5 60 112.5H39.6618C39.4709 112.5 39.2802 112.485 39.0916 112.456L18.1798 109.239Z\"\n      fill=\"#E6E6E6\"\n    />\n    <path\n      d=\"M63.75 105H105M63.75 105V108.75C63.75 110.821 62.0711 112.5 60 112.5H39.6618C39.4709 112.5 39.2802 112.485 39.0916 112.456L18.1798 109.239C16.3504 108.958 15 107.384 15 105.533V97.5H63.75V105Z\"\n      stroke=\"#A4A4A4\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M93.75 15L95.625 9.37498M103.125 31.875L108.75 33.75M101.25 22.5L106.875 18.75\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUnPlugged;\n"
  },
  {
    "path": "web/lib/opal/src/illustrations/usage-alert.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nconst SvgUsageAlert = ({ size, ...props }: IconProps) => (\n  <svg\n    width={size}\n    height={size}\n    viewBox=\"0 0 120 120\"\n    fill=\"none\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n    {...props}\n  >\n    <path\n      d=\"M15 90C15 85.8578 18.3579 82.5 22.5 82.5L60 82.5C64.1421 82.5 67.5 85.8578 67.5 90L67.5 97.5C67.5 101.642 64.1421 105 60 105H22.5C18.3579 105 15 101.642 15 97.5L15 90Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M15 22.5C15 18.3579 18.3579 15 22.5 15H45C49.1421 15 52.5 18.3579 52.5 22.5L52.5 29.9999C52.5 34.1421 49.1421 37.4999 45 37.4999H22.5C18.3579 37.4999 15 34.1421 15 29.9999V22.5Z\"\n      fill=\"#F0F0F0\"\n    />\n    <path\n      d=\"M52.5 93.75H26.25M37.5 26.25H26.25M22.5 15H45C49.1421 15 52.5 18.3579 52.5 22.5L52.5 29.9999C52.5 34.1421 49.1421 37.4999 45 37.4999H22.5C18.3579 37.4999 15 34.1421 15 29.9999V22.5C15 18.3579 18.3579 15 22.5 15ZM60 105H22.5C18.3579 105 15 101.642 15 97.5L15 90C15 85.8578 18.3579 82.5 22.5 82.5L60 82.5C64.1421 82.5 67.5 85.8578 67.5 90L67.5 97.5C67.5 101.642 64.1421 105 60 105Z\"\n      stroke=\"#CCCCCC\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M78.75 60H71.25M90 37.5V30M103.125 50.625H110.625M99.375 41.25L105 35.625M82.5 71.25L22.5 71.25C18.3579 71.25 15 67.8922 15 63.75V56.25C15 52.1079 18.3579 48.75 22.5 48.75L82.5 48.75C86.6421 48.75 90 52.1079 90 56.25V63.75C90 67.8922 86.6421 71.25 82.5 71.25Z\"\n      stroke=\"#EC5B13\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n    <path\n      d=\"M60 60H26.25\"\n      stroke=\"#F5A88B\"\n      strokeWidth={3.5}\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    />\n  </svg>\n);\nexport default SvgUsageAlert;\n"
  },
  {
    "path": "web/lib/opal/src/layouts/README.md",
    "content": "# @opal/layouts\n\n**Import:** `import { Content, ContentAction, IllustrationContent } from \"@opal/layouts\";`\n\nLayout primitives for composing content blocks. These components handle sizing, font selection, icon alignment, and optional inline editing — things that are tedious to get right by hand and easy to get wrong.\n\n## Components\n\n| Component | Description | Docs |\n|---|---|---|\n| [`Content`](./content/README.md) | Icon + title + description row. Routes to an internal layout (`ContentXl`, `ContentLg`, `ContentMd`, or `ContentSm`) based on `sizePreset` and `variant`. | [Content README](./content/README.md) |\n| [`ContentAction`](./content-action/README.md) | Wraps `Content` in a flex-row with an optional `rightChildren` slot for action buttons. Adds padding alignment via the shared `SizeVariant` scale. | [ContentAction README](./content-action/README.md) |\n| [`IllustrationContent`](./illustration-content/README.md) | Center-aligned illustration + title + description stack for empty states, error pages, and placeholders. | [IllustrationContent README](./illustration-content/README.md) |\n\n## Quick Start\n\n```tsx\nimport { Content, ContentAction, IllustrationContent } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport SvgSettings from \"@opal/icons/settings\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\n\n// Simple heading\n<Content\n  icon={SvgSettings}\n  title=\"Account Settings\"\n  description=\"Manage your preferences\"\n  sizePreset=\"headline\"\n  variant=\"heading\"\n/>\n\n// Label with tag\n<Content\n  icon={SvgSettings}\n  title=\"OpenAI\"\n  description=\"GPT\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n  tag={{ title: \"Default\", color: \"blue\" }}\n/>\n\n// Row with action button\n<ContentAction\n  icon={SvgSettings}\n  title=\"Provider Name\"\n  description=\"Some description\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n  paddingVariant=\"lg\"\n  rightChildren={\n    <Button icon={SvgSettings} prominence=\"tertiary\" />\n  }\n/>\n\n// Empty state with illustration\n<IllustrationContent\n  illustration={SvgNoResult}\n  title=\"No results found\"\n  description=\"Try adjusting your search or filters.\"\n/>\n```\n\n## Architecture\n\n### Two-axis design (`Content`)\n\n`Content` uses a two-axis system:\n\n- **`sizePreset`** — controls sizing tokens (icon size, padding, gap, font, line-height).\n- **`variant`** — controls structural layout (icon placement, description rendering).\n\nValid preset/variant combinations are enforced at the type level via a discriminated union. See the [Content README](./content/README.md) for the full matrix.\n\n### Shared size scale (`ContentAction`)\n\n`ContentAction` uses the same `SizeVariant` scale (`lg`, `md`, `sm`, `xs`, `2xs`, `fit`) defined in `@opal/shared` that powers `Interactive.Container` and `Button`. This ensures that padding on content rows aligns with adjacent interactive elements at the same size.\n\n## Exports\n\nFrom `@opal/layouts`:\n\n```ts\n// Components\nContent\nContentAction\nIllustrationContent\n\n// Types\nContentProps\nContentActionProps\nIllustrationContentProps\nSizePreset\nContentVariant\n```\n\n## Internal Layout Components\n\nThese are not exported — `Content` routes to them automatically:\n\n| Layout | Used when | File |\n|---|---|---|\n| `ContentXl` | `sizePreset` is `headline` or `section` with `variant=\"heading\"` | `content/ContentXl.tsx` |\n| `ContentLg` | `sizePreset` is `headline` or `section` with `variant=\"section\"` | `content/ContentLg.tsx` |\n| `ContentMd` | `sizePreset` is `main-content`, `main-ui`, or `secondary` with `variant=\"section\"` | `content/ContentMd.tsx` |\n| `ContentSm` | `variant=\"body\"` | `content/ContentSm.tsx` |\n"
  },
  {
    "path": "web/lib/opal/src/layouts/cards/header-layout/CardHeaderLayout.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { CardHeaderLayout } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgArrowExchange,\n  SvgCheckSquare,\n  SvgGlobe,\n  SvgSettings,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport type { Decorator } from \"@storybook/react\";\n\nconst withTooltipProvider: Decorator = (Story) => (\n  <TooltipPrimitive.Provider>\n    <Story />\n  </TooltipPrimitive.Provider>\n);\n\nconst meta = {\n  title: \"Layouts/CardHeaderLayout\",\n  component: CardHeaderLayout,\n  tags: [\"autodocs\"],\n  decorators: [withTooltipProvider],\n  parameters: {\n    layout: \"centered\",\n  },\n} satisfies Meta<typeof CardHeaderLayout>;\n\nexport default meta;\n\ntype Story = StoryObj<typeof meta>;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\nexport const Default: Story = {\n  render: () => (\n    <div className=\"w-[28rem] border rounded-16\">\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={SvgGlobe}\n        title=\"Google Search\"\n        description=\"Web search provider\"\n        rightChildren={\n          <Button prominence=\"tertiary\" rightIcon={SvgArrowExchange}>\n            Connect\n          </Button>\n        }\n      />\n    </div>\n  ),\n};\n\nexport const WithBothSlots: Story = {\n  render: () => (\n    <div className=\"w-[28rem] border rounded-16\">\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={SvgGlobe}\n        title=\"Google Search\"\n        description=\"Currently the default provider.\"\n        rightChildren={\n          <Button variant=\"action\" prominence=\"tertiary\" icon={SvgCheckSquare}>\n            Current Default\n          </Button>\n        }\n        bottomRightChildren={\n          <>\n            <Button\n              icon={SvgUnplug}\n              tooltip=\"Disconnect\"\n              prominence=\"tertiary\"\n              size=\"sm\"\n            />\n            <Button\n              icon={SvgSettings}\n              tooltip=\"Edit\"\n              prominence=\"tertiary\"\n              size=\"sm\"\n            />\n          </>\n        }\n      />\n    </div>\n  ),\n};\n\nexport const RightChildrenOnly: Story = {\n  render: () => (\n    <div className=\"w-[28rem] border rounded-16\">\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={SvgGlobe}\n        title=\"OpenAI\"\n        description=\"Not configured\"\n        rightChildren={\n          <Button prominence=\"tertiary\" rightIcon={SvgArrowExchange}>\n            Connect\n          </Button>\n        }\n      />\n    </div>\n  ),\n};\n\nexport const NoRightChildren: Story = {\n  render: () => (\n    <div className=\"w-[28rem] border rounded-16\">\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={SvgGlobe}\n        title=\"Section Header\"\n        description=\"No actions on the right.\"\n      />\n    </div>\n  ),\n};\n\nexport const LongContent: Story = {\n  render: () => (\n    <div className=\"w-[28rem] border rounded-16\">\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={SvgGlobe}\n        title=\"Very Long Provider Name That Should Truncate\"\n        description=\"This is a much longer description that tests how the layout handles overflow when the content area needs to shrink.\"\n        rightChildren={\n          <Button variant=\"action\" prominence=\"tertiary\" icon={SvgCheckSquare}>\n            Current Default\n          </Button>\n        }\n        bottomRightChildren={\n          <>\n            <Button\n              icon={SvgUnplug}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              tooltip=\"Disconnect\"\n            />\n            <Button\n              icon={SvgSettings}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              tooltip=\"Edit\"\n            />\n          </>\n        }\n      />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/cards/header-layout/README.md",
    "content": "# CardHeaderLayout\n\n**Import:** `import { CardHeaderLayout, type CardHeaderLayoutProps } from \"@opal/layouts\";`\n\nA card header layout that pairs a [`Content`](../../content/README.md) block with a right-side column of vertically stacked children.\n\n## Why CardHeaderLayout?\n\n[`ContentAction`](../../content-action/README.md) provides a single `rightChildren` slot. Card headers typically need two distinct right-side regions — a primary action on top and secondary actions on the bottom. `CardHeaderLayout` provides this with `rightChildren` and `bottomRightChildren` slots, with no padding or gap between them so the caller has full control over spacing.\n\n## Props\n\nInherits **all** props from [`Content`](../../content/README.md) (icon, title, description, sizePreset, variant, etc.) plus:\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `rightChildren` | `ReactNode` | `undefined` | Content rendered to the right of the Content block (top of right column). |\n| `bottomRightChildren` | `ReactNode` | `undefined` | Content rendered below `rightChildren` in the same column. Laid out as `flex flex-row`. |\n\n## Layout Structure\n\n```\n┌──────────────────────────────────────────────────────┐\n│ [Content (p-2, self-start)]  [rightChildren]         │\n│  icon + title + description  [bottomRightChildren]   │\n└──────────────────────────────────────────────────────┘\n```\n\n- Outer wrapper: `flex flex-row items-stretch w-full`\n- Content area: `flex-1 min-w-0 self-start p-2` — top-aligned with fixed padding\n- Right column: `flex flex-col items-end justify-between shrink-0` — no padding, no gap\n- `bottomRightChildren` wrapper: `flex flex-row` — lays children out horizontally\n\nThe right column uses `justify-between` so when both slots are present, `rightChildren` sits at the top and `bottomRightChildren` at the bottom.\n\n## Usage\n\n### Card with primary and secondary actions\n\n```tsx\nimport { CardHeaderLayout } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport { SvgGlobe, SvgSettings, SvgUnplug, SvgCheckSquare } from \"@opal/icons\";\n\n<CardHeaderLayout\n  icon={SvgGlobe}\n  title=\"Google Search\"\n  description=\"Web search provider\"\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n  rightChildren={\n    <Button icon={SvgCheckSquare} variant=\"action\" prominence=\"tertiary\">\n      Current Default\n    </Button>\n  }\n  bottomRightChildren={\n    <>\n      <Button icon={SvgUnplug} size=\"sm\" prominence=\"tertiary\" tooltip=\"Disconnect\" />\n      <Button icon={SvgSettings} size=\"sm\" prominence=\"tertiary\" tooltip=\"Edit\" />\n    </>\n  }\n/>\n```\n\n### Card with only a connect action\n\n```tsx\n<CardHeaderLayout\n  icon={SvgCloud}\n  title=\"OpenAI\"\n  description=\"Not configured\"\n  sizePreset=\"main-ui\"\n  variant=\"section\"\n  rightChildren={\n    <Button rightIcon={SvgArrowExchange} prominence=\"tertiary\">\n      Connect\n    </Button>\n  }\n/>\n```\n\n### No right children\n\n```tsx\n<CardHeaderLayout\n  icon={SvgInfo}\n  title=\"Section Header\"\n  description=\"Description text\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n/>\n```\n\nWhen both `rightChildren` and `bottomRightChildren` are omitted, the component renders only the padded `Content`.\n"
  },
  {
    "path": "web/lib/opal/src/layouts/cards/header-layout/components.tsx",
    "content": "import { Content, type ContentProps } from \"@opal/layouts/content/components\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype CardHeaderLayoutProps = ContentProps & {\n  /** Content rendered to the right of the Content block. */\n  rightChildren?: React.ReactNode;\n\n  /** Content rendered below `rightChildren` in the same column. */\n  bottomRightChildren?: React.ReactNode;\n};\n\n// ---------------------------------------------------------------------------\n// CardHeaderLayout\n// ---------------------------------------------------------------------------\n\n/**\n * A card header layout that pairs a {@link Content} block (with `p-2`)\n * with a right-side column.\n *\n * The right column contains two vertically stacked slots —\n * `rightChildren` on top, `bottomRightChildren` below — with no\n * padding or gap between them.\n *\n * @example\n * ```tsx\n * <CardHeaderLayout\n *   icon={SvgGlobe}\n *   title=\"Google\"\n *   description=\"Search engine\"\n *   sizePreset=\"main-ui\"\n *   variant=\"section\"\n *   rightChildren={<Button>Connect</Button>}\n *   bottomRightChildren={\n *     <>\n *       <Button icon={SvgUnplug} size=\"sm\" prominence=\"tertiary\" />\n *       <Button icon={SvgSettings} size=\"sm\" prominence=\"tertiary\" />\n *     </>\n *   }\n * />\n * ```\n */\nfunction CardHeaderLayout({\n  rightChildren,\n  bottomRightChildren,\n  ...contentProps\n}: CardHeaderLayoutProps) {\n  const hasRight = rightChildren || bottomRightChildren;\n\n  return (\n    <div className=\"flex flex-row items-stretch w-full\">\n      <div className=\"flex-1 min-w-0 self-start p-2\">\n        <Content {...contentProps} />\n      </div>\n      {hasRight && (\n        <div className=\"flex flex-col items-end shrink-0\">\n          {rightChildren && <div className=\"flex-1\">{rightChildren}</div>}\n          {bottomRightChildren && (\n            <div className=\"flex flex-row\">{bottomRightChildren}</div>\n          )}\n        </div>\n      )}\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { CardHeaderLayout, type CardHeaderLayoutProps };\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/Content.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgSettings, SvgStar, SvgRefreshCw } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta = {\n  title: \"Layouts/Content\",\n  component: Content,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n} satisfies Meta<typeof Content>;\n\nexport default meta;\n\ntype Story = StoryObj<typeof meta>;\n\n// ---------------------------------------------------------------------------\n// XL stories (sizePreset: headline | section, variant: heading)\n// ---------------------------------------------------------------------------\n\nexport const XlHeadline: Story = {\n  args: {\n    sizePreset: \"headline\",\n    variant: \"heading\",\n    title: \"Welcome to Onyx\",\n    description: \"Your enterprise search and AI assistant platform.\",\n  },\n};\n\nexport const XlSection: Story = {\n  args: {\n    sizePreset: \"section\",\n    variant: \"heading\",\n    title: \"Configuration\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// LG stories (sizePreset: headline | section, variant: section)\n// ---------------------------------------------------------------------------\n\nexport const LgHeadline: Story = {\n  args: {\n    sizePreset: \"headline\",\n    variant: \"section\",\n    title: \"Connectors Overview\",\n  },\n};\n\nexport const LgSection: Story = {\n  args: {\n    sizePreset: \"section\",\n    variant: \"section\",\n    title: \"Data Sources\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// MD stories (sizePreset: main-content | main-ui | secondary, variant: section)\n// ---------------------------------------------------------------------------\n\nexport const MdMainContent: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"General Settings\",\n    description: \"Manage your workspace preferences.\",\n    icon: SvgSettings,\n  },\n};\n\nexport const MdWithTag: Story = {\n  args: {\n    sizePreset: \"main-ui\",\n    variant: \"section\",\n    title: \"Knowledge Graph\",\n    tag: { title: \"Beta\", color: \"blue\" },\n  },\n};\n\nexport const MdMuted: Story = {\n  args: {\n    sizePreset: \"secondary\",\n    variant: \"section\",\n    title: \"Advanced Options\",\n    description: \"Fine-tune model behavior and parameters.\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// SM stories (sizePreset: main-content | main-ui | secondary, variant: body)\n// ---------------------------------------------------------------------------\n\nexport const SmBody: Story = {\n  args: {\n    sizePreset: \"secondary\",\n    variant: \"body\",\n    title: \"Last synced 2 minutes ago\",\n  },\n};\n\nexport const SmStacked: Story = {\n  args: {\n    sizePreset: \"secondary\",\n    variant: \"body\",\n    title: \"Document count\",\n    orientation: \"stacked\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// Editable\n// ---------------------------------------------------------------------------\n\nexport const Editable: Story = {\n  args: {\n    sizePreset: \"main-ui\",\n    variant: \"section\",\n    title: \"Editable Title\",\n    editable: true,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// MD — optional prop\n// ---------------------------------------------------------------------------\n\nexport const MdWithOptional: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"API Key\",\n    optional: true,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// MD — auxIcon prop\n// ---------------------------------------------------------------------------\n\nexport const MdWithAuxIcon: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"Connection Status\",\n    auxIcon: \"warning\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// XL — moreIcon1 / moreIcon2 props\n// ---------------------------------------------------------------------------\n\nexport const XlWithMoreIcons: Story = {\n  args: {\n    sizePreset: \"headline\",\n    variant: \"heading\",\n    title: \"Dashboard\",\n    moreIcon1: SvgStar,\n    moreIcon2: SvgRefreshCw,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// SM — prominence: muted\n// ---------------------------------------------------------------------------\n\nexport const SmMuted: Story = {\n  args: {\n    sizePreset: \"secondary\",\n    variant: \"body\",\n    title: \"Updated 5 min ago\",\n    prominence: \"muted\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// widthVariant: full\n// ---------------------------------------------------------------------------\n\nexport const WidthFull: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"Full Width Content\",\n    widthVariant: \"full\",\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 600, border: \"1px dashed gray\" }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/ContentLg.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components/buttons/button/components\";\nimport type { ContainerSizeVariants } from \"@opal/types\";\nimport SvgEdit from \"@opal/icons/edit\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text, type TextFont } from \"@opal/components/text/components\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { cn } from \"@opal/utils\";\nimport { useState } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentLgSizePreset = \"headline\" | \"section\";\n\ninterface ContentLgPresetConfig {\n  /** Icon width/height (CSS value). */\n  iconSize: string;\n  /** Tailwind padding class for the icon container. */\n  iconContainerPadding: string;\n  /** Gap between icon container and content (CSS value). */\n  gap: string;\n  /** Opal font name for the title (without `font-` prefix). */\n  titleFont: TextFont;\n  /** Title line-height — also used as icon container min-height (CSS value). */\n  lineHeight: string;\n  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */\n  editButtonSize: ContainerSizeVariants;\n  /** Tailwind padding class for the edit button container. */\n  editButtonPadding: string;\n}\n\ninterface ContentLgProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Main title text. */\n  title: string | RichStr;\n\n  /** Optional description below the title. */\n  description?: string | RichStr;\n\n  /** Enable inline editing of the title. */\n  editable?: boolean;\n\n  /** Called when the user commits an edit. */\n  onTitleChange?: (newTitle: string) => void;\n\n  /** Size preset. Default: `\"headline\"`. */\n  sizePreset?: ContentLgSizePreset;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// Presets\n// ---------------------------------------------------------------------------\n\nconst CONTENT_LG_PRESETS: Record<ContentLgSizePreset, ContentLgPresetConfig> = {\n  headline: {\n    iconSize: \"2rem\",\n    iconContainerPadding: \"p-0.5\",\n    gap: \"0.25rem\",\n    titleFont: \"heading-h2\",\n    lineHeight: \"2.25rem\",\n    editButtonSize: \"md\",\n    editButtonPadding: \"p-1\",\n  },\n  section: {\n    iconSize: \"1.25rem\",\n    iconContainerPadding: \"p-1\",\n    gap: \"0rem\",\n    titleFont: \"heading-h3-muted\",\n    lineHeight: \"1.75rem\",\n    editButtonSize: \"sm\",\n    editButtonPadding: \"p-0.5\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// ContentLg\n// ---------------------------------------------------------------------------\n\nfunction ContentLg({\n  sizePreset = \"headline\",\n  icon: Icon,\n  title,\n  description,\n  editable,\n  onTitleChange,\n  ref,\n}: ContentLgProps) {\n  const [editing, setEditing] = useState(false);\n  const [editValue, setEditValue] = useState(toPlainString(title));\n\n  const config = CONTENT_LG_PRESETS[sizePreset];\n\n  function startEditing() {\n    setEditValue(toPlainString(title));\n    setEditing(true);\n  }\n\n  function commit() {\n    const value = editValue.trim();\n    if (value && value !== toPlainString(title)) onTitleChange?.(value);\n    setEditing(false);\n  }\n\n  return (\n    <div ref={ref} className=\"opal-content-lg\" style={{ gap: config.gap }}>\n      {Icon && (\n        <div\n          className={cn(\n            \"opal-content-lg-icon-container shrink-0\",\n            config.iconContainerPadding\n          )}\n          style={{ minHeight: config.lineHeight }}\n        >\n          <Icon\n            className=\"opal-content-lg-icon\"\n            style={{ width: config.iconSize, height: config.iconSize }}\n          />\n        </div>\n      )}\n\n      <div className=\"opal-content-lg-body\">\n        <div className=\"opal-content-lg-title-row\">\n          {editing ? (\n            <div className=\"opal-content-lg-input-sizer\">\n              <span\n                className={cn(\n                  \"opal-content-lg-input-mirror\",\n                  `font-${config.titleFont}`\n                )}\n              >\n                {editValue || \"\\u00A0\"}\n              </span>\n              <input\n                className={cn(\n                  \"opal-content-lg-input\",\n                  `font-${config.titleFont}`,\n                  \"text-text-04\"\n                )}\n                value={editValue}\n                onChange={(e) => setEditValue(e.target.value)}\n                size={1}\n                autoFocus\n                onFocus={(e) => e.currentTarget.select()}\n                onBlur={commit}\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\") commit();\n                  if (e.key === \"Escape\") {\n                    setEditValue(toPlainString(title));\n                    setEditing(false);\n                  }\n                }}\n                style={{ height: config.lineHeight }}\n              />\n            </div>\n          ) : (\n            <Text\n              font={config.titleFont}\n              color=\"inherit\"\n              maxLines={1}\n              title={toPlainString(title)}\n              onClick={editable ? startEditing : undefined}\n            >\n              {title}\n            </Text>\n          )}\n\n          {editable && !editing && (\n            <div\n              className={cn(\n                \"opal-content-lg-edit-button\",\n                config.editButtonPadding\n              )}\n            >\n              <Button\n                icon={SvgEdit}\n                prominence=\"internal\"\n                size={config.editButtonSize}\n                tooltip=\"Edit\"\n                tooltipSide=\"right\"\n                onClick={startEditing}\n              />\n            </div>\n          )}\n        </div>\n\n        {description && toPlainString(description) && (\n          <div className=\"opal-content-lg-description\">\n            <Text font=\"secondary-body\" color=\"text-03\" as=\"p\">\n              {description}\n            </Text>\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n\nexport { ContentLg, type ContentLgProps, type ContentLgSizePreset };\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/ContentMd.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components/buttons/button/components\";\nimport { Tag, type TagProps } from \"@opal/components/tag/components\";\nimport type { ContainerSizeVariants } from \"@opal/types\";\nimport SvgAlertCircle from \"@opal/icons/alert-circle\";\nimport SvgAlertTriangle from \"@opal/icons/alert-triangle\";\nimport SvgEdit from \"@opal/icons/edit\";\nimport SvgXOctagon from \"@opal/icons/x-octagon\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text, type TextFont } from \"@opal/components/text/components\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { cn } from \"@opal/utils\";\nimport { useRef, useState } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentMdSizePreset = \"main-content\" | \"main-ui\" | \"secondary\";\n\ntype ContentMdAuxIcon = \"info-gray\" | \"info-blue\" | \"warning\" | \"error\";\n\ntype ContentMdSuffix = \"optional\" | (string & {});\n\ninterface ContentMdPresetConfig {\n  iconSize: string;\n  iconContainerPadding: string;\n  iconColorClass: string;\n  titleFont: TextFont;\n  lineHeight: string;\n  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */\n  editButtonSize: ContainerSizeVariants;\n  editButtonPadding: string;\n  optionalFont: TextFont;\n  /** Aux icon size = lineHeight − 2 × p-0.5. */\n  auxIconSize: string;\n  /** Left indent for the description so it aligns with the title (past the icon). */\n  descriptionIndent: string;\n}\n\ninterface ContentMdProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Main title text. */\n  title: string | RichStr;\n\n  /** Optional description text below the title. */\n  description?: string | RichStr;\n\n  /** Enable inline editing of the title. */\n  editable?: boolean;\n\n  /** Called when the user commits an edit. */\n  onTitleChange?: (newTitle: string) => void;\n\n  /**\n   * Muted suffix rendered beside the title.\n   * Use `\"optional\"` for the standard \"(Optional)\" label, or pass any string.\n   */\n  suffix?: ContentMdSuffix;\n\n  /** Auxiliary status icon rendered beside the title. */\n  auxIcon?: ContentMdAuxIcon;\n\n  /** Tag rendered beside the title. */\n  tag?: TagProps;\n\n  /** Size preset. Default: `\"main-ui\"`. */\n  sizePreset?: ContentMdSizePreset;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// Presets\n// ---------------------------------------------------------------------------\n\nconst CONTENT_MD_PRESETS: Record<ContentMdSizePreset, ContentMdPresetConfig> = {\n  \"main-content\": {\n    iconSize: \"1rem\",\n    iconContainerPadding: \"p-1\",\n    iconColorClass: \"text-text-04\",\n    titleFont: \"main-content-emphasis\",\n    lineHeight: \"1.5rem\",\n    editButtonSize: \"sm\",\n    editButtonPadding: \"p-0\",\n    optionalFont: \"main-content-muted\",\n    auxIconSize: \"1.25rem\",\n    descriptionIndent: \"1.625rem\",\n  },\n  \"main-ui\": {\n    iconSize: \"1rem\",\n    iconContainerPadding: \"p-0.5\",\n    iconColorClass: \"text-text-03\",\n    titleFont: \"main-ui-action\",\n    lineHeight: \"1.25rem\",\n    editButtonSize: \"xs\",\n    editButtonPadding: \"p-0\",\n    optionalFont: \"main-ui-muted\",\n    auxIconSize: \"1rem\",\n    descriptionIndent: \"1.375rem\",\n  },\n  secondary: {\n    iconSize: \"0.75rem\",\n    iconContainerPadding: \"p-0.5\",\n    iconColorClass: \"text-text-04\",\n    titleFont: \"secondary-action\",\n    lineHeight: \"1rem\",\n    editButtonSize: \"2xs\",\n    editButtonPadding: \"p-0\",\n    optionalFont: \"secondary-action\",\n    auxIconSize: \"0.75rem\",\n    descriptionIndent: \"1.125rem\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// ContentMd\n// ---------------------------------------------------------------------------\n\nconst AUX_ICON_CONFIG: Record<\n  ContentMdAuxIcon,\n  { icon: IconFunctionComponent; colorClass: string }\n> = {\n  \"info-gray\": { icon: SvgAlertCircle, colorClass: \"text-text-02\" },\n  \"info-blue\": { icon: SvgAlertCircle, colorClass: \"text-status-info-05\" },\n  warning: { icon: SvgAlertTriangle, colorClass: \"text-status-warning-05\" },\n  error: { icon: SvgXOctagon, colorClass: \"text-status-error-05\" },\n};\n\nfunction ContentMd({\n  icon: Icon,\n  title,\n  description,\n  editable,\n  onTitleChange,\n  suffix,\n  auxIcon,\n  tag,\n  sizePreset = \"main-ui\",\n  ref,\n}: ContentMdProps) {\n  const [editing, setEditing] = useState(false);\n  const [editValue, setEditValue] = useState(toPlainString(title));\n  const inputRef = useRef<HTMLInputElement>(null);\n\n  const config = CONTENT_MD_PRESETS[sizePreset];\n\n  function startEditing() {\n    setEditValue(toPlainString(title));\n    setEditing(true);\n  }\n\n  function commit() {\n    const value = editValue.trim();\n    if (value && value !== toPlainString(title)) onTitleChange?.(value);\n    setEditing(false);\n  }\n\n  return (\n    <div ref={ref} className=\"opal-content-md\">\n      <div\n        className=\"opal-content-md-header\"\n        data-editing={editing || undefined}\n      >\n        {Icon && (\n          <div\n            className={cn(\n              \"opal-content-md-icon-container shrink-0\",\n              config.iconContainerPadding\n            )}\n            style={{ minHeight: config.lineHeight }}\n          >\n            <Icon\n              className={cn(\"opal-content-md-icon\", config.iconColorClass)}\n              style={{ width: config.iconSize, height: config.iconSize }}\n            />\n          </div>\n        )}\n\n        <div className=\"opal-content-md-title-row\">\n          {editing ? (\n            <div className=\"opal-content-md-input-sizer\">\n              <span\n                className={cn(\n                  \"opal-content-md-input-mirror\",\n                  `font-${config.titleFont}`\n                )}\n              >\n                {editValue || \"\\u00A0\"}\n              </span>\n              <input\n                ref={inputRef}\n                className={cn(\n                  \"opal-content-md-input\",\n                  `font-${config.titleFont}`,\n                  \"text-text-04\"\n                )}\n                value={editValue}\n                onChange={(e) => setEditValue(e.target.value)}\n                size={1}\n                autoFocus\n                onFocus={(e) => e.currentTarget.select()}\n                onBlur={commit}\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\") commit();\n                  if (e.key === \"Escape\") {\n                    setEditValue(toPlainString(title));\n                    setEditing(false);\n                  }\n                }}\n                style={{ height: config.lineHeight }}\n              />\n            </div>\n          ) : (\n            <Text\n              font={config.titleFont}\n              color=\"inherit\"\n              maxLines={1}\n              title={toPlainString(title)}\n              onClick={editable ? startEditing : undefined}\n            >\n              {title}\n            </Text>\n          )}\n\n          {suffix && (\n            <Text font={config.optionalFont} color=\"text-03\">\n              {suffix === \"optional\" ? \"(Optional)\" : suffix}\n            </Text>\n          )}\n\n          {auxIcon &&\n            (() => {\n              const { icon: AuxIcon, colorClass } = AUX_ICON_CONFIG[auxIcon];\n              return (\n                <div\n                  className=\"opal-content-md-aux-icon shrink-0 p-0.5\"\n                  style={{ height: config.lineHeight }}\n                >\n                  <AuxIcon\n                    className={colorClass}\n                    style={{\n                      width: config.auxIconSize,\n                      height: config.auxIconSize,\n                    }}\n                  />\n                </div>\n              );\n            })()}\n\n          {tag && <Tag {...tag} />}\n\n          {editable && !editing && (\n            <div\n              className={cn(\n                \"opal-content-md-edit-button\",\n                config.editButtonPadding\n              )}\n            >\n              <Button\n                icon={SvgEdit}\n                prominence=\"internal\"\n                size={config.editButtonSize}\n                tooltip=\"Edit\"\n                tooltipSide=\"right\"\n                onClick={startEditing}\n              />\n            </div>\n          )}\n        </div>\n      </div>\n\n      {description && toPlainString(description) && (\n        <div\n          className=\"opal-content-md-description\"\n          style={Icon ? { paddingLeft: config.descriptionIndent } : undefined}\n        >\n          <Text font=\"secondary-body\" color=\"text-03\" as=\"p\">\n            {description}\n          </Text>\n        </div>\n      )}\n    </div>\n  );\n}\n\nexport {\n  ContentMd,\n  type ContentMdProps,\n  type ContentMdSizePreset,\n  type ContentMdSuffix,\n  type ContentMdAuxIcon,\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/ContentSm.tsx",
    "content": "\"use client\";\n\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text, type TextFont } from \"@opal/components/text/components\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { cn } from \"@opal/utils\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentSmSizePreset = \"main-content\" | \"main-ui\" | \"secondary\";\ntype ContentSmOrientation = \"vertical\" | \"inline\" | \"reverse\";\ntype ContentSmProminence = \"default\" | \"muted\";\n\ninterface ContentSmPresetConfig {\n  /** Icon width/height (CSS value). */\n  iconSize: string;\n  /** Tailwind padding class for the icon container. */\n  iconContainerPadding: string;\n  /** Font preset for the title. */\n  titleFont: TextFont;\n  /** Title line-height — also used as icon container min-height (CSS value). */\n  lineHeight: string;\n  /** Gap between icon container and title (CSS value). */\n  gap: string;\n}\n\n/** Props for {@link ContentSm}. Does not support editing or descriptions. */\ninterface ContentSmProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Main title text (read-only — editing is not supported). */\n  title: string | RichStr;\n\n  /** Size preset. Default: `\"main-ui\"`. */\n  sizePreset?: ContentSmSizePreset;\n\n  /** Layout orientation. Default: `\"inline\"`. */\n  orientation?: ContentSmOrientation;\n\n  /** Title prominence. Default: `\"default\"`. */\n  prominence?: ContentSmProminence;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// Presets\n// ---------------------------------------------------------------------------\n\nconst CONTENT_SM_PRESETS: Record<ContentSmSizePreset, ContentSmPresetConfig> = {\n  \"main-content\": {\n    iconSize: \"1rem\",\n    iconContainerPadding: \"p-1\",\n    titleFont: \"main-content-body\",\n    lineHeight: \"1.5rem\",\n    gap: \"0.125rem\",\n  },\n  \"main-ui\": {\n    iconSize: \"1rem\",\n    iconContainerPadding: \"p-0.5\",\n    titleFont: \"main-ui-action\",\n    lineHeight: \"1.25rem\",\n    gap: \"0.25rem\",\n  },\n  secondary: {\n    iconSize: \"0.75rem\",\n    iconContainerPadding: \"p-0.5\",\n    titleFont: \"secondary-action\",\n    lineHeight: \"1rem\",\n    gap: \"0.125rem\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// ContentSm\n// ---------------------------------------------------------------------------\n\nfunction ContentSm({\n  icon: Icon,\n  title,\n  sizePreset = \"main-ui\",\n  orientation = \"inline\",\n  prominence = \"default\",\n  ref,\n}: ContentSmProps) {\n  const config = CONTENT_SM_PRESETS[sizePreset];\n\n  return (\n    <div\n      ref={ref}\n      className=\"opal-content-sm\"\n      data-orientation={orientation}\n      data-prominence={prominence}\n      style={{ gap: config.gap }}\n    >\n      {Icon && (\n        <div\n          className={cn(\n            \"opal-content-sm-icon-container shrink-0\",\n            config.iconContainerPadding\n          )}\n          style={{ minHeight: config.lineHeight }}\n        >\n          <Icon\n            className=\"opal-content-sm-icon\"\n            style={{ width: config.iconSize, height: config.iconSize }}\n          />\n        </div>\n      )}\n\n      <Text\n        font={config.titleFont}\n        color=\"inherit\"\n        maxLines={1}\n        title={toPlainString(title)}\n      >\n        {title}\n      </Text>\n    </div>\n  );\n}\n\nexport {\n  ContentSm,\n  type ContentSmProps,\n  type ContentSmSizePreset,\n  type ContentSmOrientation,\n  type ContentSmProminence,\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/ContentXl.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components/buttons/button/components\";\nimport type { ContainerSizeVariants } from \"@opal/types\";\nimport SvgEdit from \"@opal/icons/edit\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text, type TextFont } from \"@opal/components/text/components\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { cn } from \"@opal/utils\";\nimport { useState } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentXlSizePreset = \"headline\" | \"section\";\n\ninterface ContentXlPresetConfig {\n  /** Icon width/height (CSS value). */\n  iconSize: string;\n  /** Tailwind padding class for the icon container. */\n  iconContainerPadding: string;\n  /** More-icon-1 width/height (CSS value). */\n  moreIcon1Size: string;\n  /** Tailwind padding class for the more-icon-1 container. */\n  moreIcon1ContainerPadding: string;\n  /** More-icon-2 width/height (CSS value). */\n  moreIcon2Size: string;\n  /** Tailwind padding class for the more-icon-2 container. */\n  moreIcon2ContainerPadding: string;\n  /** Opal font name for the title (without `font-` prefix). */\n  titleFont: TextFont;\n  /** Title line-height — also used as icon container min-height (CSS value). */\n  lineHeight: string;\n  /** Button `size` prop for the edit button. Uses the shared `SizeVariant` scale. */\n  editButtonSize: ContainerSizeVariants;\n  /** Tailwind padding class for the edit button container. */\n  editButtonPadding: string;\n}\n\ninterface ContentXlProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Main title text. */\n  title: string | RichStr;\n\n  /** Optional description below the title. */\n  description?: string | RichStr;\n\n  /** Enable inline editing of the title. */\n  editable?: boolean;\n\n  /** Called when the user commits an edit. */\n  onTitleChange?: (newTitle: string) => void;\n\n  /** Size preset. Default: `\"headline\"`. */\n  sizePreset?: ContentXlSizePreset;\n\n  /** Optional secondary icon rendered in the icon row. */\n  moreIcon1?: IconFunctionComponent;\n\n  /** Optional tertiary icon rendered in the icon row. */\n  moreIcon2?: IconFunctionComponent;\n\n  /** Ref forwarded to the root `<div>`. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// Presets\n// ---------------------------------------------------------------------------\n\nconst CONTENT_XL_PRESETS: Record<ContentXlSizePreset, ContentXlPresetConfig> = {\n  headline: {\n    iconSize: \"2rem\",\n    iconContainerPadding: \"p-0.5\",\n    moreIcon1Size: \"1rem\",\n    moreIcon1ContainerPadding: \"p-0.5\",\n    moreIcon2Size: \"2rem\",\n    moreIcon2ContainerPadding: \"p-0.5\",\n    titleFont: \"heading-h2\",\n    lineHeight: \"2.25rem\",\n    editButtonSize: \"md\",\n    editButtonPadding: \"p-1\",\n  },\n  section: {\n    iconSize: \"1.5rem\",\n    iconContainerPadding: \"p-0.5\",\n    moreIcon1Size: \"0.75rem\",\n    moreIcon1ContainerPadding: \"p-0.5\",\n    moreIcon2Size: \"1.5rem\",\n    moreIcon2ContainerPadding: \"p-0.5\",\n    titleFont: \"heading-h3\",\n    lineHeight: \"1.75rem\",\n    editButtonSize: \"sm\",\n    editButtonPadding: \"p-0.5\",\n  },\n};\n\n// ---------------------------------------------------------------------------\n// ContentXl\n// ---------------------------------------------------------------------------\n\nfunction ContentXl({\n  sizePreset = \"headline\",\n  icon: Icon,\n  title,\n  description,\n  editable,\n  onTitleChange,\n  moreIcon1: MoreIcon1,\n  moreIcon2: MoreIcon2,\n  ref,\n}: ContentXlProps) {\n  const [editing, setEditing] = useState(false);\n  const [editValue, setEditValue] = useState(toPlainString(title));\n\n  const config = CONTENT_XL_PRESETS[sizePreset];\n\n  function startEditing() {\n    setEditValue(toPlainString(title));\n    setEditing(true);\n  }\n\n  function commit() {\n    const value = editValue.trim();\n    if (value && value !== toPlainString(title)) onTitleChange?.(value);\n    setEditing(false);\n  }\n\n  return (\n    <div ref={ref} className=\"opal-content-xl\">\n      {(Icon || MoreIcon1 || MoreIcon2) && (\n        <div className=\"opal-content-xl-icon-row\">\n          {Icon && (\n            <div\n              className={cn(\n                \"opal-content-xl-icon-container shrink-0\",\n                config.iconContainerPadding\n              )}\n              style={{ minHeight: config.lineHeight }}\n            >\n              <Icon\n                className=\"opal-content-xl-icon\"\n                style={{ width: config.iconSize, height: config.iconSize }}\n              />\n            </div>\n          )}\n\n          {MoreIcon1 && (\n            <div\n              className={cn(\n                \"opal-content-xl-more-icon-container shrink-0\",\n                config.moreIcon1ContainerPadding\n              )}\n            >\n              <MoreIcon1\n                className=\"opal-content-xl-icon\"\n                style={{\n                  width: config.moreIcon1Size,\n                  height: config.moreIcon1Size,\n                }}\n              />\n            </div>\n          )}\n\n          {MoreIcon2 && (\n            <div\n              className={cn(\n                \"opal-content-xl-more-icon-container shrink-0\",\n                config.moreIcon2ContainerPadding\n              )}\n            >\n              <MoreIcon2\n                className=\"opal-content-xl-icon\"\n                style={{\n                  width: config.moreIcon2Size,\n                  height: config.moreIcon2Size,\n                }}\n              />\n            </div>\n          )}\n        </div>\n      )}\n\n      <div className=\"opal-content-xl-body\">\n        <div className=\"opal-content-xl-title-row\">\n          {editing ? (\n            <div className=\"opal-content-xl-input-sizer\">\n              <span\n                className={cn(\n                  \"opal-content-xl-input-mirror\",\n                  `font-${config.titleFont}`\n                )}\n              >\n                {editValue || \"\\u00A0\"}\n              </span>\n              <input\n                className={cn(\n                  \"opal-content-xl-input\",\n                  `font-${config.titleFont}`,\n                  \"text-text-04\"\n                )}\n                value={editValue}\n                onChange={(e) => setEditValue(e.target.value)}\n                size={1}\n                autoFocus\n                onFocus={(e) => e.currentTarget.select()}\n                onBlur={commit}\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\") commit();\n                  if (e.key === \"Escape\") {\n                    setEditValue(toPlainString(title));\n                    setEditing(false);\n                  }\n                }}\n                style={{ height: config.lineHeight }}\n              />\n            </div>\n          ) : (\n            <Text\n              font={config.titleFont}\n              color=\"inherit\"\n              maxLines={1}\n              title={toPlainString(title)}\n              onClick={editable ? startEditing : undefined}\n            >\n              {title}\n            </Text>\n          )}\n\n          {editable && !editing && (\n            <div\n              className={cn(\n                \"opal-content-xl-edit-button\",\n                config.editButtonPadding\n              )}\n            >\n              <Button\n                icon={SvgEdit}\n                prominence=\"internal\"\n                size={config.editButtonSize}\n                tooltip=\"Edit\"\n                tooltipSide=\"right\"\n                onClick={startEditing}\n              />\n            </div>\n          )}\n        </div>\n\n        {description && toPlainString(description) && (\n          <div className=\"opal-content-xl-description\">\n            <Text font=\"secondary-body\" color=\"text-03\" as=\"p\">\n              {description}\n            </Text>\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n\nexport { ContentXl, type ContentXlProps, type ContentXlSizePreset };\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/README.md",
    "content": "# Content\n\n**Import:** `import { Content, type ContentProps } from \"@opal/layouts\";`\n\nA two-axis layout component for displaying icon + title + description rows. Routes to an internal layout based on the `sizePreset` and `variant` combination.\n\n## Two-Axis Architecture\n\n### `sizePreset` — controls sizing (icon, padding, gap, font)\n\n#### ContentXl presets (variant=\"heading\")\n\n| Preset | Icon | Icon padding | moreIcon1 | mI1 padding | moreIcon2 | mI2 padding | Title font | Line-height |\n|---|---|---|---|---|---|---|---|---|\n| `headline` | 2rem (32px) | `p-0.5` (2px) | 1rem (16px) | `p-0.5` (2px) | 2rem (32px) | `p-0.5` (2px) | `font-heading-h2` | 2.25rem (36px) |\n| `section` | 1.5rem (24px) | `p-0.5` (2px) | 0.75rem (12px) | `p-0.5` (2px) | 1.5rem (24px) | `p-0.5` (2px) | `font-heading-h3` | 1.75rem (28px) |\n\n#### ContentLg presets (variant=\"section\")\n\n| Preset | Icon | Icon padding | Gap | Title font | Line-height |\n|---|---|---|---|---|---|\n| `headline` | 2rem (32px) | `p-0.5` (2px) | 0.25rem (4px) | `font-heading-h2` | 2.25rem (36px) |\n| `section` | 1.25rem (20px) | `p-1` (4px) | 0rem | `font-heading-h3-muted` | 1.75rem (28px) |\n\n#### ContentMd presets\n\n| Preset | Icon | Icon padding | Icon color | Gap | Title font | Line-height |\n|---|---|---|---|---|---|---|\n| `main-content` | 1rem (16px) | `p-1` (4px) | `text-04` | 0.125rem (2px) | `font-main-content-emphasis` | 1.5rem (24px) |\n| `main-ui` | 1rem (16px) | `p-0.5` (2px) | `text-03` | 0.25rem (4px) | `font-main-ui-action` | 1.25rem (20px) |\n| `secondary` | 0.75rem (12px) | `p-0.5` (2px) | `text-04` | 0.125rem (2px) | `font-secondary-action` | 1rem (16px) |\n\n> Icon container height (icon + 2 x padding) always equals the title line-height.\n\n### `variant` — controls structure / layout\n\n| variant | Description |\n|---|---|\n| `heading` | Icon on **top** (flex-col) — ContentXl |\n| `section` | Icon **inline** (flex-row) — ContentLg or ContentMd |\n| `body` | Body text layout — ContentSm |\n\n### Valid Combinations -> Internal Routing\n\n| sizePreset | variant | Routes to |\n|---|---|---|\n| `headline` / `section` | `heading` | **ContentXl** (icon on top) |\n| `headline` / `section` | `section` | **ContentLg** (icon inline) |\n| `main-content` / `main-ui` / `secondary` | `section` | **ContentMd** |\n| `main-content` / `main-ui` / `secondary` | `body` | **ContentSm** |\n\nInvalid combinations (e.g. `sizePreset=\"headline\" + variant=\"body\"`) are excluded at the type level.\n\n## Props\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `sizePreset` | `SizePreset` | `\"headline\"` | Size preset (see tables above) |\n| `variant` | `ContentVariant` | `\"heading\"` | Layout variant (see table above) |\n| `icon` | `IconFunctionComponent` | — | Optional icon component |\n| `title` | `string` | **(required)** | Main title text |\n| `description` | `string` | — | Optional description below the title |\n| `editable` | `boolean` | `false` | Enable inline editing of the title |\n| `onTitleChange` | `(newTitle: string) => void` | — | Called when user commits an edit |\n| `moreIcon1` | `IconFunctionComponent` | — | Secondary icon in icon row (ContentXl only) |\n| `moreIcon2` | `IconFunctionComponent` | — | Tertiary icon in icon row (ContentXl only) |\n\n## Internal Layouts\n\n### ContentXl\n\nFor `headline` / `section` presets with `variant=\"heading\"`. Icon row on top (flex-col), supports `moreIcon1` and `moreIcon2` in the icon row. Description is always `font-secondary-body text-text-03`.\n\n### ContentLg\n\nFor `headline` / `section` presets with `variant=\"section\"`. Always inline (flex-row). Description is always `font-secondary-body text-text-03`.\n\n### ContentMd\n\nFor `main-content` / `main-ui` / `secondary` presets. Always inline. Both `icon` and `description` are optional. Description is always `font-secondary-body text-text-03`.\n\n## Usage Examples\n\n```tsx\nimport { Content } from \"@opal/layouts\";\nimport SvgSearch from \"@opal/icons/search\";\n\n// ContentXl — headline, icon on top\n<Content\n  icon={SvgSearch}\n  sizePreset=\"headline\"\n  variant=\"heading\"\n  title=\"Agent Settings\"\n  description=\"Configure your agent's behavior\"\n/>\n\n// ContentXl — with more icons\n<Content\n  icon={SvgSearch}\n  sizePreset=\"headline\"\n  variant=\"heading\"\n  title=\"Agent Settings\"\n  moreIcon1={SvgStar}\n  moreIcon2={SvgLock}\n/>\n\n// ContentLg — section, icon inline\n<Content\n  icon={SvgSearch}\n  sizePreset=\"section\"\n  variant=\"section\"\n  title=\"Data Sources\"\n  description=\"Connected integrations\"\n/>\n\n// ContentMd — with icon and description\n<Content\n  icon={SvgSearch}\n  sizePreset=\"main-ui\"\n  title=\"Instructions\"\n  description=\"Agent system prompt\"\n/>\n\n// ContentMd — title only (no icon, no description)\n<Content\n  sizePreset=\"main-content\"\n  title=\"Featured Agent\"\n/>\n\n// Editable title\n<Content\n  icon={SvgSearch}\n  sizePreset=\"headline\"\n  variant=\"heading\"\n  title=\"My Agent\"\n  editable\n  onTitleChange={(newTitle) => save(newTitle)}\n/>\n```\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/components.tsx",
    "content": "import \"@opal/layouts/content/styles.css\";\nimport {\n  ContentSm,\n  type ContentSmOrientation,\n  type ContentSmProminence,\n} from \"@opal/layouts/content/ContentSm\";\nimport {\n  ContentXl,\n  type ContentXlProps,\n} from \"@opal/layouts/content/ContentXl\";\nimport {\n  ContentLg,\n  type ContentLgProps,\n} from \"@opal/layouts/content/ContentLg\";\nimport {\n  ContentMd,\n  type ContentMdProps,\n} from \"@opal/layouts/content/ContentMd\";\nimport type { TagProps } from \"@opal/components/tag/components\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { widthVariants } from \"@opal/shared\";\nimport type { ExtremaSizeVariants } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Shared types\n// ---------------------------------------------------------------------------\n\ntype SizePreset =\n  | \"headline\"\n  | \"section\"\n  | \"main-content\"\n  | \"main-ui\"\n  | \"secondary\";\n\ntype ContentVariant = \"heading\" | \"section\" | \"body\";\n\ninterface ContentBaseProps {\n  /** Optional icon component. */\n  icon?: IconFunctionComponent;\n\n  /** Main title text. */\n  title: string | RichStr;\n\n  /** Optional description below the title. */\n  description?: string | RichStr;\n\n  /** Enable inline editing of the title. */\n  editable?: boolean;\n\n  /** Called when the user commits an edit. */\n  onTitleChange?: (newTitle: string) => void;\n\n  /**\n   * Width preset controlling the component's horizontal size.\n   * Uses the shared `WidthVariant` scale from `@opal/shared`.\n   *\n   * - `\"auto\"` — Shrink-wraps to content width\n   * - `\"fit\"` — Shrink-wraps to content width\n   * - `\"full\"` — Stretches to fill the parent's width\n   *\n   * @default \"fit\"\n   */\n  widthVariant?: ExtremaSizeVariants;\n\n  /** Ref forwarded to the root `<div>` of the resolved layout. */\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n// ---------------------------------------------------------------------------\n// Discriminated union: valid sizePreset × variant combinations\n// ---------------------------------------------------------------------------\n\ntype XlContentProps = ContentBaseProps & {\n  /** Size preset. Default: `\"headline\"`. */\n  sizePreset?: \"headline\" | \"section\";\n  /** Variant. Default: `\"heading\"` for heading-eligible presets. */\n  variant?: \"heading\";\n  /** Optional secondary icon rendered in the icon row (ContentXl only). */\n  moreIcon1?: IconFunctionComponent;\n  /** Optional tertiary icon rendered in the icon row (ContentXl only). */\n  moreIcon2?: IconFunctionComponent;\n};\n\ntype LgContentProps = ContentBaseProps & {\n  /** Size preset. Default: `\"headline\"`. */\n  sizePreset?: \"headline\" | \"section\";\n  /** Variant. */\n  variant: \"section\";\n};\n\ntype MdContentProps = ContentBaseProps & {\n  sizePreset: \"main-content\" | \"main-ui\" | \"secondary\";\n  variant?: \"section\";\n  /** Muted suffix rendered beside the title. Use `\"optional\"` for \"(Optional)\". */\n  suffix?: \"optional\" | (string & {});\n  /** Auxiliary status icon rendered beside the title. */\n  auxIcon?: \"info-gray\" | \"info-blue\" | \"warning\" | \"error\";\n  /** Tag rendered beside the title. */\n  tag?: TagProps;\n};\n\n/** ContentSm does not support descriptions or inline editing. */\ntype SmContentProps = Omit<\n  ContentBaseProps,\n  \"description\" | \"editable\" | \"onTitleChange\"\n> & {\n  sizePreset: \"main-content\" | \"main-ui\" | \"secondary\";\n  variant: \"body\";\n  /** Layout orientation. Default: `\"inline\"`. */\n  orientation?: ContentSmOrientation;\n  /** Title prominence. Default: `\"default\"`. */\n  prominence?: ContentSmProminence;\n};\n\ntype ContentProps =\n  | XlContentProps\n  | LgContentProps\n  | MdContentProps\n  | SmContentProps;\n\n// ---------------------------------------------------------------------------\n// Content — routes to the appropriate internal layout\n// ---------------------------------------------------------------------------\n\nfunction Content(props: ContentProps) {\n  const {\n    sizePreset = \"headline\",\n    variant = \"heading\",\n    widthVariant = \"full\",\n    ref,\n    ...rest\n  } = props;\n\n  let layout: React.ReactNode = null;\n\n  // ContentXl / ContentLg: headline/section presets\n  if (sizePreset === \"headline\" || sizePreset === \"section\") {\n    if (variant === \"heading\") {\n      layout = (\n        <ContentXl\n          sizePreset={sizePreset}\n          ref={ref}\n          {...(rest as Omit<ContentXlProps, \"sizePreset\">)}\n        />\n      );\n    } else {\n      layout = (\n        <ContentLg\n          sizePreset={sizePreset}\n          ref={ref}\n          {...(rest as Omit<ContentLgProps, \"sizePreset\">)}\n        />\n      );\n    }\n  }\n\n  // ContentMd: main-content/main-ui/secondary with section/heading variant\n  // (variant defaults to \"heading\" when omitted on MdContentProps, so both arms are needed)\n  else if (variant === \"section\" || variant === \"heading\") {\n    layout = (\n      <ContentMd\n        sizePreset={sizePreset}\n        ref={ref}\n        {...(rest as Omit<ContentMdProps, \"sizePreset\">)}\n      />\n    );\n  }\n\n  // ContentSm: main-content/main-ui/secondary with body variant\n  else if (variant === \"body\") {\n    layout = (\n      <ContentSm\n        sizePreset={sizePreset}\n        ref={ref}\n        {...(rest as Omit<\n          React.ComponentProps<typeof ContentSm>,\n          \"sizePreset\"\n        >)}\n      />\n    );\n  }\n\n  // This case should NEVER be hit.\n  if (!layout)\n    throw new Error(\n      `Content: no layout matched for sizePreset=\"${sizePreset}\" variant=\"${variant}\"`\n    );\n\n  return <div className={widthVariants[widthVariant]}>{layout}</div>;\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport {\n  Content,\n  type ContentProps,\n  type SizePreset,\n  type ContentVariant,\n  type XlContentProps,\n  type LgContentProps,\n  type MdContentProps,\n  type SmContentProps,\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content/styles.css",
    "content": "/* ===========================================================================\n   Content — ContentXl\n\n   Icon row on top (flex-col). Icon row contains main icon + optional\n   moreIcon1 / moreIcon2 in a flex-row.\n\n   Sizing (icon size, gap, padding, font, line-height) is driven by the\n   sizePreset prop via inline styles + Tailwind classes in the component.\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Layout — flex-col (icon row above body)\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl {\n  @apply flex flex-col items-start text-text-04;\n}\n\n/* ---------------------------------------------------------------------------\n   Icon row — flex-row containing main icon + more icons\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-icon-row {\n  @apply flex flex-row items-center;\n  gap: 0.25rem;\n}\n\n/* ---------------------------------------------------------------------------\n   Icons\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n.opal-content-xl-more-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n.opal-content-xl-icon {\n  color: var(--text-04);\n}\n\n/* ---------------------------------------------------------------------------\n   Body column\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-body {\n  @apply flex flex-1 flex-col items-start;\n  min-width: 0.0625rem;\n}\n\n/* ---------------------------------------------------------------------------\n   Title row — title (or input) + edit button\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-title-row {\n  @apply flex items-center w-full;\n  gap: 0.25rem;\n}\n\n.opal-content-xl-input-sizer {\n  display: inline-grid;\n  align-items: stretch;\n}\n\n.opal-content-xl-input-sizer > * {\n  grid-area: 1 / 1;\n  padding: 0 0.125rem;\n  min-width: 0.0625rem;\n}\n\n.opal-content-xl-input-mirror {\n  visibility: hidden;\n  white-space: pre;\n}\n\n.opal-content-xl-input {\n  @apply bg-transparent outline-none border-none;\n}\n\n/* ---------------------------------------------------------------------------\n   Edit button — visible only on hover of the outer container\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-edit-button {\n  @apply opacity-0 transition-opacity shrink-0;\n}\n\n.opal-content-xl:hover .opal-content-xl-edit-button {\n  @apply opacity-100;\n}\n\n/* ---------------------------------------------------------------------------\n   Description\n   --------------------------------------------------------------------------- */\n\n.opal-content-xl-description {\n  @apply text-left w-full;\n}\n\n/* ===========================================================================\n   Content — ContentLg\n\n   Always inline (flex-row) — icon beside content.\n\n   Sizing (icon size, gap, padding, font, line-height) is driven by the\n   sizePreset prop via inline styles + Tailwind classes in the component.\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Layout\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg {\n  @apply flex flex-row items-start text-text-04;\n}\n\n/* ---------------------------------------------------------------------------\n   Icon\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n.opal-content-lg-icon {\n  color: var(--text-04);\n}\n\n/* ---------------------------------------------------------------------------\n   Body column\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg-body {\n  @apply flex flex-1 flex-col items-start;\n  min-width: 0.0625rem;\n}\n\n/* ---------------------------------------------------------------------------\n   Title row — title (or input) + edit button\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg-title-row {\n  @apply flex items-center w-full;\n  gap: 0.25rem;\n}\n\n.opal-content-lg-input-sizer {\n  display: inline-grid;\n  align-items: stretch;\n}\n\n.opal-content-lg-input-sizer > * {\n  grid-area: 1 / 1;\n  padding: 0 0.125rem;\n  min-width: 0.0625rem;\n}\n\n.opal-content-lg-input-mirror {\n  visibility: hidden;\n  white-space: pre;\n}\n\n.opal-content-lg-input {\n  @apply bg-transparent outline-none border-none;\n}\n\n/* ---------------------------------------------------------------------------\n   Edit button — visible only on hover of the outer container\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg-edit-button {\n  @apply opacity-0 transition-opacity shrink-0;\n}\n\n.opal-content-lg:hover .opal-content-lg-edit-button {\n  @apply opacity-100;\n}\n\n/* ---------------------------------------------------------------------------\n   Description\n   --------------------------------------------------------------------------- */\n\n.opal-content-lg-description {\n  @apply text-left w-full;\n}\n\n/* ===========================================================================\n   Content — ContentMd\n\n   Always inline (flex-row). Icon color varies per sizePreset and is applied\n   via Tailwind class from the component.\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Layout\n   --------------------------------------------------------------------------- */\n\n.opal-content-md {\n  @apply flex flex-col items-start text-text-04;\n}\n\n.opal-content-md-header {\n  @apply flex flex-row items-center w-full;\n}\n\n.opal-content-md-header[data-editing] {\n  @apply rounded-08;\n  box-shadow: inset 0 0 0 1px var(--border-02);\n}\n\n/* ---------------------------------------------------------------------------\n   Icon\n   --------------------------------------------------------------------------- */\n\n.opal-content-md-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n/* ---------------------------------------------------------------------------\n   Title row — title (or input) + edit button\n   --------------------------------------------------------------------------- */\n\n.opal-content-md-title-row {\n  @apply flex items-center w-full;\n  gap: 0.25rem;\n}\n\n.opal-content-md-input-sizer {\n  display: inline-grid;\n  align-items: stretch;\n  width: 100%;\n}\n\n.opal-content-md-input-sizer > * {\n  grid-area: 1 / 1;\n  padding: 0 0.125rem;\n  min-width: 0.0625rem;\n}\n\n.opal-content-md-input-mirror {\n  visibility: hidden;\n  white-space: pre;\n}\n\n.opal-content-md-input {\n  @apply bg-transparent outline-none border-none;\n}\n\n/* ---------------------------------------------------------------------------\n   Aux icon\n   --------------------------------------------------------------------------- */\n\n.opal-content-md-aux-icon {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n/* ---------------------------------------------------------------------------\n   Edit button — visible only on hover of the outer container\n   --------------------------------------------------------------------------- */\n\n.opal-content-md-edit-button {\n  @apply opacity-0 transition-opacity shrink-0;\n}\n\n.opal-content-md:hover .opal-content-md-edit-button {\n  @apply opacity-100;\n}\n\n/* ---------------------------------------------------------------------------\n   Description\n   --------------------------------------------------------------------------- */\n\n.opal-content-md-description {\n  @apply text-left w-full;\n}\n\n/* ===========================================================================\n   Content — ContentSm\n\n   Three orientation modes (driven by orientation prop):\n     inline  : flex-row         — icon left, title right\n     vertical: flex-col         — icon top, title below\n     reverse : flex-row-reverse — title left, icon right\n\n   Icon color is always text-03. Title color varies by prominence\n   (text-04 default, text-03 muted) via data-prominence.\n   =========================================================================== */\n\n/* ---------------------------------------------------------------------------\n   Layout — orientation\n   --------------------------------------------------------------------------- */\n\n.opal-content-sm {\n  /* since `ContentSm` doesn't have a description, it's possible to center-align the icon and text */\n  @apply flex items-center text-text-04;\n}\n\n.opal-content-sm[data-orientation=\"inline\"] {\n  @apply flex-row;\n}\n\n.opal-content-sm[data-orientation=\"vertical\"] {\n  @apply flex-col;\n}\n\n.opal-content-sm[data-orientation=\"reverse\"] {\n  @apply flex-row-reverse;\n}\n\n/* ---------------------------------------------------------------------------\n   Icon\n   --------------------------------------------------------------------------- */\n\n.opal-content-sm-icon-container {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n}\n\n.opal-content-sm-icon {\n  @apply text-text-03;\n}\n\n.opal-content-sm[data-prominence=\"muted\"] .opal-content-sm-icon {\n  @apply text-text-02;\n}\n\n/* ---------------------------------------------------------------------------\n   Title\n   --------------------------------------------------------------------------- */\n\n.opal-content-sm[data-prominence=\"muted\"] {\n  @apply text-text-03;\n}\n\n/* ===========================================================================\n   Interactive override\n\n   When a Content variant is nested inside an `.interactive` element,\n   the title inherits color from the Interactive's `--interactive-foreground`\n   and icons switch to `--interactive-foreground-icon`. This is automatic —\n   no opt-in prop is required.\n   =========================================================================== */\n\n.interactive .opal-content-xl {\n  color: inherit;\n}\n\n.interactive .opal-content-xl .opal-content-xl-icon {\n  color: var(--interactive-foreground-icon);\n}\n\n.interactive .opal-content-lg {\n  color: inherit;\n}\n\n.interactive .opal-content-lg .opal-content-lg-icon {\n  color: var(--interactive-foreground-icon);\n}\n\n.interactive .opal-content-md {\n  color: inherit;\n}\n\n.interactive .opal-content-md .opal-content-md-icon {\n  color: var(--interactive-foreground-icon);\n}\n\n.interactive .opal-content-sm {\n  color: inherit;\n}\n\n.interactive .opal-content-sm .opal-content-sm-icon {\n  color: var(--interactive-foreground-icon);\n}\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content-action/ContentAction.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport { SvgSettings } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport type { Decorator } from \"@storybook/react\";\n\nconst withTooltipProvider: Decorator = (Story) => (\n  <TooltipPrimitive.Provider>\n    <Story />\n  </TooltipPrimitive.Provider>\n);\n\nconst meta = {\n  title: \"Layouts/ContentAction\",\n  component: ContentAction,\n  tags: [\"autodocs\"],\n  decorators: [withTooltipProvider],\n  parameters: {\n    layout: \"centered\",\n  },\n} satisfies Meta<typeof ContentAction>;\n\nexport default meta;\n\ntype Story = StoryObj<typeof meta>;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\nexport const Default: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"OpenAI\",\n    description: \"GPT-4o language model provider.\",\n    icon: SvgSettings,\n    rightChildren: <Button prominence=\"tertiary\">Edit</Button>,\n  },\n};\n\nexport const MultipleActions: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"Connector\",\n    description: \"Manage your data source connector.\",\n    rightChildren: (\n      <div className=\"flex items-center gap-2\">\n        <Button prominence=\"tertiary\" icon={SvgSettings} />\n        <Button variant=\"danger\" prominence=\"primary\">\n          Delete\n        </Button>\n      </div>\n    ),\n  },\n};\n\nexport const NoPadding: Story = {\n  args: {\n    sizePreset: \"main-content\",\n    variant: \"section\",\n    title: \"Compact Row\",\n    description: \"No padding around content area.\",\n    paddingVariant: \"fit\",\n    rightChildren: <Button prominence=\"tertiary\">Action</Button>,\n  },\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content-action/README.md",
    "content": "# ContentAction\n\n**Import:** `import { ContentAction, type ContentActionProps } from \"@opal/layouts\";`\n\nA row layout that pairs a [`Content`](../content/README.md) block with optional right-side action children (buttons, badges, icons, etc.).\n\n## Why ContentAction?\n\n`Content` renders icon + title + description but has no slot for actions. When you need a settings row, card header, or list item with an action on the right you would typically wrap `Content` in a manual flex-row. `ContentAction` standardises that pattern and adds padding alignment with `Interactive.Container` and `Button` via the shared `SizeVariant` scale.\n\n## Props\n\nInherits **all** props from [`Content`](../content/README.md) (same discriminated-union API) plus:\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `rightChildren` | `ReactNode` | `undefined` | Content rendered on the right side. Wrapper stretches to the full height of the row. |\n| `paddingVariant` | `SizeVariant` | `\"lg\"` | Padding preset applied around the `Content` area. Uses the shared size scale from `@opal/shared`. |\n\n### `paddingVariant` reference\n\n| Value | Padding class | Effective padding |\n|---|---|---|\n| `lg` | `p-2` | 0.5rem (8px) |\n| `md` | `p-1` | 0.25rem (4px) |\n| `sm` | `p-1` | 0.25rem (4px) |\n| `xs` | `p-0.5` | 0.125rem (2px) |\n| `2xs` | `p-0.5` | 0.125rem (2px) |\n| `fit` | `p-0` | 0 |\n\nThese values are identical to the padding applied by `Interactive.Container` at each size, so `ContentAction` labels naturally align with adjacent buttons of the same size.\n\n## Layout Structure\n\n```\n[  Content (flex-1, padded)  ][  rightChildren (shrink-0, full height)  ]\n```\n\n- The outer wrapper is `flex flex-row items-stretch w-full`.\n- `Content` sits inside a `flex-1 min-w-0` div with padding from `paddingVariant`.\n- `rightChildren` is wrapped in `flex items-stretch shrink-0` so it stretches vertically.\n\n## Usage Examples\n\n### Settings row with an edit button\n\n```tsx\nimport { ContentAction } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport SvgSettings from \"@opal/icons/settings\";\n\n<ContentAction\n  icon={SvgSettings}\n  title=\"OpenAI\"\n  description=\"GPT\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n  tag={{ title: \"Default\", color: \"blue\" }}\n  paddingVariant=\"lg\"\n  rightChildren={\n    <Button icon={SvgSettings} prominence=\"tertiary\" onClick={handleEdit} />\n  }\n/>\n```\n\n### Card header with connect action\n\n```tsx\nimport { ContentAction } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport { SvgArrowExchange, SvgCloud } from \"@opal/icons\";\n\n<ContentAction\n  icon={SvgCloud}\n  title=\"Google Cloud Vertex AI\"\n  description=\"Gemini\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n  paddingVariant=\"md\"\n  rightChildren={\n    <Button rightIcon={SvgArrowExchange} prominence=\"tertiary\">\n      Connect\n    </Button>\n  }\n/>\n```\n\n### No right children (padding-only wrapper)\n\n```tsx\n<ContentAction\n  title=\"Section Header\"\n  sizePreset=\"main-content\"\n  variant=\"section\"\n  paddingVariant=\"lg\"\n/>\n```\n\nWhen `rightChildren` is omitted the component renders only the padded `Content` — useful for alignment consistency when some rows have actions and others don't.\n"
  },
  {
    "path": "web/lib/opal/src/layouts/content-action/components.tsx",
    "content": "import { Content, type ContentProps } from \"@opal/layouts/content/components\";\nimport {\n  containerSizeVariants,\n  type ContainerSizeVariants,\n} from \"@opal/shared\";\nimport { cn } from \"@opal/utils\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ntype ContentActionProps = ContentProps & {\n  /** Content rendered on the right side, stretched to full height. */\n  rightChildren?: React.ReactNode;\n\n  /**\n   * Padding applied around the `Content` area.\n   * Uses the shared `SizeVariant` scale from `@opal/shared`.\n   *\n   * @default \"lg\"\n   * @see {@link ContainerSizeVariants} for the full list of presets.\n   */\n  paddingVariant?: ContainerSizeVariants;\n};\n\n// ---------------------------------------------------------------------------\n// ContentAction\n// ---------------------------------------------------------------------------\n\n/**\n * A row layout that pairs a {@link Content} block with optional right-side\n * action children (e.g. buttons, badges).\n *\n * The `Content` area receives padding controlled by `paddingVariant`, using\n * the same size scale as `Interactive.Container` and `Button`. The\n * `rightChildren` wrapper stretches to the full height of the row.\n *\n * @example\n * ```tsx\n * import { ContentAction } from \"@opal/layouts\";\n * import { Button } from \"@opal/components\";\n * import SvgSettings from \"@opal/icons/settings\";\n *\n * <ContentAction\n *   icon={SvgSettings}\n *   title=\"OpenAI\"\n *   description=\"GPT\"\n *   sizePreset=\"main-content\"\n *   variant=\"section\"\n *   paddingVariant=\"lg\"\n *   rightChildren={<Button icon={SvgSettings} prominence=\"tertiary\" />}\n * />\n * ```\n */\nfunction ContentAction({\n  rightChildren,\n  paddingVariant = \"lg\",\n  ...contentProps\n}: ContentActionProps) {\n  const { padding } = containerSizeVariants[paddingVariant];\n\n  return (\n    <div className=\"flex flex-row items-stretch w-full\">\n      <div className={cn(\"flex-1 min-w-0 self-center\", padding)}>\n        <Content {...contentProps} />\n      </div>\n      {rightChildren && (\n        <div className=\"flex items-stretch shrink-0\">{rightChildren}</div>\n      )}\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { ContentAction, type ContentActionProps };\n"
  },
  {
    "path": "web/lib/opal/src/layouts/illustration-content/IllustrationContent.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport { SvgEmpty } from \"@opal/illustrations\";\n\nconst meta = {\n  title: \"Layouts/IllustrationContent\",\n  component: IllustrationContent,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n} satisfies Meta<typeof IllustrationContent>;\n\nexport default meta;\n\ntype Story = StoryObj<typeof meta>;\n\n// ---------------------------------------------------------------------------\n// Stories\n// ---------------------------------------------------------------------------\n\nexport const Default: Story = {\n  args: {\n    illustration: SvgEmpty,\n    title: \"No results found\",\n    description: \"Try adjusting your search or filters to find what you need.\",\n  },\n};\n\nexport const TitleOnly: Story = {\n  args: {\n    title: \"Nothing here yet\",\n  },\n};\n\nexport const NoIllustration: Story = {\n  args: {\n    title: \"No documents available\",\n    description:\n      \"Connect a data source to start indexing documents into your workspace.\",\n  },\n};\n"
  },
  {
    "path": "web/lib/opal/src/layouts/illustration-content/README.md",
    "content": "# IllustrationContent\n\n**Import:** `import { IllustrationContent, type IllustrationContentProps } from \"@opal/layouts\";`\n\nA vertically-stacked, center-aligned layout for empty states, error pages, and informational placeholders. Pairs a large illustration with a title and optional description.\n\n## Why IllustrationContent?\n\nEmpty states and placeholder screens share a recurring pattern: a large illustration centered above a title and description. `IllustrationContent` standardises that pattern so every empty state looks consistent without hand-rolling flex containers and spacing each time.\n\n## Layout Structure\n\n```\n┌─────────────────────────────────┐\n│          (1.25rem pad)          │\n│     ┌───────────────────┐       │\n│     │   illustration    │       │\n│     │   7.5rem × 7.5rem │       │\n│     └───────────────────┘       │\n│         (0.75rem gap)           │\n│          title (center)         │\n│         (0.75rem gap)           │\n│      description (center)       │\n│          (1.25rem pad)          │\n└─────────────────────────────────┘\n```\n\n- Outer container: `flex flex-col items-center gap-3 p-5 text-center`.\n- Illustration: `w-[7.5rem] h-[7.5rem]` (120px), no extra padding.\n- Title: `<p>` with `font-main-content-emphasis text-text-04`.\n- Description: `<p>` with `font-secondary-body text-text-03`.\n\n## Props\n\n| Prop | Type | Default | Description |\n|---|---|---|---|\n| `illustration` | `IconFunctionComponent` | — | Optional illustration component rendered at 7.5rem × 7.5rem, centered. Works with any `@opal/illustrations` SVG. |\n| `title` | `string` | **(required)** | Main title text, center-aligned. |\n| `description` | `string` | — | Optional description below the title, center-aligned. |\n\n## Usage Examples\n\n### Empty search results\n\n```tsx\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\n\n<IllustrationContent\n  illustration={SvgNoResult}\n  title=\"No results found\"\n  description=\"Try adjusting your search or filters.\"\n/>\n```\n\n### Not found page\n\n```tsx\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNotFound from \"@opal/illustrations/not-found\";\n\n<IllustrationContent\n  illustration={SvgNotFound}\n  title=\"Page not found\"\n  description=\"The page you're looking for doesn't exist or has been moved.\"\n/>\n```\n\n### Title only (no illustration, no description)\n\n```tsx\nimport { IllustrationContent } from \"@opal/layouts\";\n\n<IllustrationContent title=\"Nothing here yet\" />\n```\n\n### Empty state with illustration and title (no description)\n\n```tsx\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgEmpty from \"@opal/illustrations/empty\";\n\n<IllustrationContent\n  illustration={SvgEmpty}\n  title=\"No items\"\n/>\n```\n"
  },
  {
    "path": "web/lib/opal/src/layouts/illustration-content/components.tsx",
    "content": "import type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Text } from \"@opal/components\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface IllustrationContentProps {\n  /** Optional illustration rendered at 7.5rem × 7.5rem (120px), centered. */\n  illustration?: IconFunctionComponent;\n\n  /** Main title text, center-aligned. Uses `font-main-content-emphasis`. */\n  title: string | RichStr;\n\n  /** Optional description below the title, center-aligned. Uses `font-secondary-body`. */\n  description?: string | RichStr;\n}\n\n// ---------------------------------------------------------------------------\n// IllustrationContent\n// ---------------------------------------------------------------------------\n\n/**\n * A vertically-stacked, center-aligned layout for empty states, error pages,\n * and informational placeholders.\n *\n * Renders an optional illustration on top, followed by a title and an optional\n * description — all center-aligned with consistent spacing.\n *\n * **Layout structure:**\n *\n * ```\n * ┌─────────────────────────────────┐\n * │          (1.25rem pad)          │\n * │     ┌───────────────────┐       │\n * │     │   illustration    │       │\n * │     │   7.5rem × 7.5rem │       │\n * │     └───────────────────┘       │\n * │         (0.75rem gap)           │\n * │          title (center)         │\n * │      description (center)       │\n * │          (1.25rem pad)          │\n * └─────────────────────────────────┘\n * ```\n *\n * @example\n * ```tsx\n * import { IllustrationContent } from \"@opal/layouts\";\n * import SvgNoResult from \"@opal/illustrations/no-result\";\n *\n * <IllustrationContent\n *   illustration={SvgNoResult}\n *   title=\"No results found\"\n *   description=\"Try adjusting your search or filters.\"\n * />\n * ```\n */\nfunction IllustrationContent({\n  illustration: Illustration,\n  title,\n  description,\n}: IllustrationContentProps) {\n  return (\n    <div className=\"flex flex-col items-center gap-3 p-5 text-center\">\n      {Illustration && (\n        <Illustration\n          aria-hidden=\"true\"\n          className=\"shrink-0 w-[7.5rem] h-[7.5rem]\"\n        />\n      )}\n      <div className=\"flex flex-col items-center text-center\">\n        <Text font=\"main-content-emphasis\" color=\"text-04\" as=\"p\">\n          {title}\n        </Text>\n        {description && (\n          <Text font=\"secondary-body\" color=\"text-03\" as=\"p\">\n            {description}\n          </Text>\n        )}\n      </div>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Exports\n// ---------------------------------------------------------------------------\n\nexport { IllustrationContent, type IllustrationContentProps };\n"
  },
  {
    "path": "web/lib/opal/src/layouts/index.ts",
    "content": "/* Content */\nexport {\n  Content,\n  type ContentProps,\n  type SizePreset,\n  type ContentVariant,\n} from \"@opal/layouts/content/components\";\n\n/* ContentAction */\nexport {\n  ContentAction,\n  type ContentActionProps,\n} from \"@opal/layouts/content-action/components\";\n\n/* CardHeaderLayout */\nexport {\n  CardHeaderLayout,\n  type CardHeaderLayoutProps,\n} from \"@opal/layouts/cards/header-layout/components\";\n\n/* IllustrationContent */\nexport {\n  IllustrationContent,\n  type IllustrationContentProps,\n} from \"@opal/layouts/illustration-content/components\";\n"
  },
  {
    "path": "web/lib/opal/src/shared.ts",
    "content": "/**\n * @opal/shared — Shared constants and types for the opal design system.\n *\n * This module holds design tokens that are referenced by multiple opal\n * packages (core, components, layouts). Centralising them here avoids\n * circular imports and gives every consumer a single source of truth.\n */\n\nimport type {\n  SizeVariants,\n  OverridableExtremaSizeVariants,\n  ContainerSizeVariants,\n  ExtremaSizeVariants,\n  PaddingVariants,\n  RoundingVariants,\n} from \"@opal/types\";\n\n/**\n * Size-variant scale.\n *\n * Each entry maps a named preset to Tailwind utility classes for\n * `height`, `min-width`, and `padding`.\n *\n * | Key   | Height        | Padding  |\n * |-------|---------------|----------|\n * | `lg`  | 2.25rem (36px)| `p-2`   |\n * | `md`  | 1.75rem (28px)| `p-1`   |\n * | `sm`  | 1.5rem (24px) | `p-1`   |\n * | `xs`  | 1.25rem (20px)| `p-0.5` |\n * | `2xs` | 1rem (16px)   | `p-0.5` |\n * | `fit` | h-fit         | `p-0`   |\n */\ntype ContainerProperties = {\n  height: string;\n  minWidth: string;\n  padding: string;\n};\nconst containerSizeVariants: Record<\n  ContainerSizeVariants,\n  ContainerProperties\n> = {\n  fit: { height: \"h-fit\", minWidth: \"\", padding: \"p-0\" },\n  lg: { height: \"h-[2.25rem]\", minWidth: \"min-w-[2.25rem]\", padding: \"p-2\" },\n  md: { height: \"h-[1.75rem]\", minWidth: \"min-w-[1.75rem]\", padding: \"p-1\" },\n  sm: { height: \"h-[1.5rem]\", minWidth: \"min-w-[1.5rem]\", padding: \"p-1\" },\n  xs: {\n    height: \"h-[1.25rem]\",\n    minWidth: \"min-w-[1.25rem]\",\n    padding: \"p-0.5\",\n  },\n  \"2xs\": { height: \"h-[1rem]\", minWidth: \"min-w-[1rem]\", padding: \"p-0.5\" },\n} as const;\n\n// ---------------------------------------------------------------------------\n// Width/Height Variants\n//\n// A named scale of width/height presets that map to Tailwind width/height utility classes.\n//\n// Consumers (for width):\n//   - Interactive.Container  (widthVariant)\n//   - Button                 (width)\n//   - Content                (widthVariant)\n// ---------------------------------------------------------------------------\n\n/**\n * Width-variant scale.\n *\n * | Key    | Tailwind class |\n * |--------|----------------|\n * | `auto` | `w-auto`       |\n * | `fit`  | `w-fit`        |\n * | `full` | `w-full`       |\n */\nconst widthVariants: Record<ExtremaSizeVariants, string> = {\n  fit: \"w-fit\",\n  full: \"w-full\",\n} as const;\n\n/**\n * Height-variant scale.\n *\n * | Key    | Tailwind class |\n * |--------|----------------|\n * | `auto` | `h-auto`       |\n * | `fit`  | `h-fit`        |\n * | `full` | `h-full`       |\n */\nconst heightVariants: Record<ExtremaSizeVariants, string> = {\n  fit: \"h-fit\",\n  full: \"h-full\",\n} as const;\n\n// ---------------------------------------------------------------------------\n// Card Variants\n//\n// Shared padding and rounding scales for card components (Card, SelectCard).\n//\n// Consumers:\n//   - Card          (paddingVariant, roundingVariant)\n//   - SelectCard    (paddingVariant, roundingVariant)\n// ---------------------------------------------------------------------------\n\nconst cardPaddingVariants: Record<PaddingVariants, string> = {\n  lg: \"p-6\",\n  md: \"p-4\",\n  sm: \"p-2\",\n  xs: \"p-1\",\n  \"2xs\": \"p-0.5\",\n  fit: \"p-0\",\n};\n\nconst cardRoundingVariants: Record<RoundingVariants, string> = {\n  lg: \"rounded-16\",\n  md: \"rounded-12\",\n  sm: \"rounded-08\",\n  xs: \"rounded-04\",\n};\n\nexport {\n  type ExtremaSizeVariants,\n  type ContainerSizeVariants,\n  type OverridableExtremaSizeVariants,\n  type SizeVariants,\n  containerSizeVariants,\n  cardPaddingVariants,\n  cardRoundingVariants,\n  widthVariants,\n  heightVariants,\n};\n"
  },
  {
    "path": "web/lib/opal/src/types.ts",
    "content": "import type { SVGProps } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Size Variants\n//\n// A named scale of size presets (lg → 2xs, plus fit) that map to Tailwind\n// utility classes for height, min-width, and padding.\n//\n// Consumers:\n//   - Interactive.Container  (height + min-width + padding)\n//   - Button                 (icon sizing)\n//   - ContentAction          (padding only)\n//   - Content (ContentXl / ContentLg / ContentMd)  (edit-button size)\n// ---------------------------------------------------------------------------\n\n// Base Size Types:\n\n/**\n * Full range of size variants.\n *\n * This is the complete scale of size presets available in the design system.\n * Components needing the full range use this type directly.\n */\nexport type SizeVariants = \"fit\" | \"full\" | \"lg\" | \"md\" | \"sm\" | \"xs\" | \"2xs\";\n\n// Convenience Size Types:\n//\n// NOTE (@raunakab + @nmgarza5)\n// There are many components throughout the library that need to \"extract\" very specific sizings from the full gamut that is available.\n// For those components, we've extracted these below \"convenience\" types.\n\n/**\n * Size variants for container components (excludes \"full\").\n *\n * Used by components that control height, min-width, and padding.\n * Excludes \"full\" since containers need a fixed height preset.\n */\nexport type ContainerSizeVariants = Exclude<SizeVariants, \"full\">;\n\n/**\n * Padding size variants.\n *\n * | Variant | Class   |\n * |---------|---------|\n * | `lg`    | `p-6`   |\n * | `md`    | `p-4`   |\n * | `sm`    | `p-2`   |\n * | `xs`    | `p-1`   |\n * | `2xs`   | `p-0.5` |\n * | `fit`   | `p-0`   |\n */\nexport type PaddingVariants = Extract<\n  SizeVariants,\n  \"fit\" | \"lg\" | \"md\" | \"sm\" | \"xs\" | \"2xs\"\n>;\n\n/**\n * Rounding size variants.\n *\n * | Variant | Class        |\n * |---------|--------------|\n * | `lg`    | `rounded-16` |\n * | `md`    | `rounded-12` |\n * | `sm`    | `rounded-08` |\n * | `xs`    | `rounded-04` |\n */\nexport type RoundingVariants = Extract<SizeVariants, \"lg\" | \"md\" | \"sm\" | \"xs\">;\n\n/**\n * Extreme size variants (\"fit\" and \"full\" only).\n *\n * Used for width and height properties that only support extremal values.\n */\nexport type ExtremaSizeVariants = Extract<SizeVariants, \"fit\" | \"full\">;\n\n/**\n * Size variants with numeric overrides.\n *\n * Allows size specification as a named preset or a custom numeric value.\n * Used in components that need programmatic sizing flexibility.\n */\nexport type OverridableExtremaSizeVariants = ExtremaSizeVariants | number;\n\n// ---------------------------------------------------------------------------\n// Icon Props\n// ---------------------------------------------------------------------------\n\n/**\n * Base props for SVG icon components.\n *\n * Extends standard SVG element attributes with convenience props used across\n * the design system. All generated icon components (in `@opal/icons`) accept\n * this interface, ensuring a consistent API for sizing, coloring, and labeling.\n *\n * @example\n * ```tsx\n * import type { IconProps } from \"@opal/types\";\n *\n * function MyIcon({ size = 16, className, ...props }: IconProps) {\n *   return (\n *     <svg width={size} height={size} className={className} {...props}>\n *       ...\n *     </svg>\n *   );\n * }\n * ```\n */\nexport interface IconProps extends SVGProps<SVGSVGElement> {\n  className?: string;\n  size?: number;\n  title?: string;\n  color?: string;\n}\n\n/** Strips `className` and `style` from a props type to enforce design-system styling. */\nexport type WithoutStyles<T> = Omit<T, \"className\" | \"style\">;\n\n// ---------------------------------------------------------------------------\n// Rich Strings\n// ---------------------------------------------------------------------------\n\n/**\n * A branded string wrapper that signals inline markdown should be parsed.\n *\n * Created via the `markdown()` function. Components that accept `string | RichStr`\n * will parse the inner `raw` string as inline markdown when a `RichStr` is passed,\n * and render plain text when a regular `string` is passed.\n *\n * This avoids \"API coloring\" — components don't need a `markdown` boolean prop,\n * and intermediate wrappers don't need to thread it through. The decision to\n * use markdown lives at the call site via `markdown(\"*bold* text\")`.\n */\nexport interface RichStr {\n  readonly __brand: \"RichStr\";\n  readonly raw: string;\n}\n\n/**\n * HTML button `type` attribute values.\n *\n * Used by interactive primitives and button-like components to indicate that\n * the element is inherently interactive for cursor-styling purposes, even\n * without an explicit `onClick` or `href`.\n */\nexport type ButtonType = \"submit\" | \"button\" | \"reset\";\n\n/** Like `Omit` but distributes over union types, preserving discriminated unions. */\nexport type DistributiveOmit<T, K extends keyof any> = T extends any\n  ? Omit<T, K>\n  : never;\n\n/**\n * A React function component that accepts {@link IconProps}.\n *\n * Use this type when a component prop expects an icon — it ensures the icon\n * supports `className`, `size`, `title`, and `color` without callers needing\n * to import `IconProps` directly.\n *\n * @example\n * ```tsx\n * import type { IconFunctionComponent } from \"@opal/types\";\n *\n * interface ButtonProps {\n *   icon?: IconFunctionComponent;\n * }\n * ```\n */\nexport type IconFunctionComponent = React.FunctionComponent<IconProps>;\n"
  },
  {
    "path": "web/lib/opal/src/utils.ts",
    "content": "import { clsx, type ClassValue } from \"clsx\";\nimport { twMerge } from \"tailwind-merge\";\nimport type { RichStr } from \"@opal/types\";\n\nexport function cn(...inputs: ClassValue[]) {\n  return twMerge(clsx(inputs));\n}\n\n/**\n * Wraps strings for inline markdown parsing by `Text` and other Opal components.\n *\n * Multiple arguments are joined with newlines, so each string renders on its own line:\n * ```tsx\n * markdown(\"Line one\", \"Line two\", \"Line three\")\n * ```\n */\nexport function markdown(...lines: string[]): RichStr {\n  return { __brand: \"RichStr\", raw: lines.join(\"\\n\") };\n}\n"
  },
  {
    "path": "web/lib/opal/tsconfig.json",
    "content": "{\n  \"extends\": \"../../tsconfig.json\",\n  \"compilerOptions\": {\n    \"paths\": {\n      \"@opal/*\": [\"./src/*\"],\n      // TODO (@raunakab): Remove this once the table component migration is\n      // complete. The table internals still import app-layer modules (e.g.\n      // @/refresh-components/texts/Text, @/refresh-components/Popover) via the\n      // @/ alias. Without this entry the IDE cannot resolve those paths since\n      // opal's tsconfig only defines @opal/*. Once all @/ deps are replaced\n      // with opal-internal equivalents, this line should be deleted.\n      \"@/*\": [\"../../src/*\"]\n    }\n  },\n  \"include\": [\"src/**/*\"],\n  \"exclude\": [\"node_modules\"]\n}\n"
  },
  {
    "path": "web/next.config.js",
    "content": "// Always require withSentryConfig\nconst { withSentryConfig } = require(\"@sentry/nextjs\");\n\nconst cspHeader = `\n    style-src 'self' 'unsafe-inline' https://fonts.googleapis.com;\n    font-src 'self' https://fonts.gstatic.com;\n    object-src 'none';\n    base-uri 'self';\n    form-action 'self';\n    ${\n      process.env.NEXT_PUBLIC_CLOUD_ENABLED === \"true\" &&\n      process.env.NODE_ENV !== \"development\"\n        ? \"upgrade-insecure-requests;\"\n        : \"\"\n    }\n`;\n\n/** @type {import('next').NextConfig} */\nconst nextConfig = {\n  productionBrowserSourceMaps: false,\n  output: \"standalone\",\n  transpilePackages: [\"@onyx/opal\"],\n  typedRoutes: true,\n  reactCompiler: true,\n  images: {\n    // Used to fetch favicons\n    remotePatterns: [\n      {\n        protocol: \"https\",\n        hostname: \"www.google.com\",\n        port: \"\",\n        pathname: \"/s2/favicons/**\",\n      },\n    ],\n    unoptimized: true, // Disable image optimization to avoid requiring Sharp\n  },\n  async headers() {\n    const isDev = process.env.NODE_ENV === \"development\";\n    return [\n      {\n        source: \"/(.*)\",\n        headers: [\n          {\n            key: \"Content-Security-Policy\",\n            value: cspHeader.replace(/\\n/g, \"\"),\n          },\n          {\n            key: \"Strict-Transport-Security\",\n            value: \"max-age=63072000; includeSubDomains; preload\",\n          },\n          {\n            key: \"Referrer-Policy\",\n            value: \"strict-origin-when-cross-origin\",\n          },\n          {\n            key: \"X-Content-Type-Options\",\n            value: \"nosniff\",\n          },\n          {\n            key: \"Permissions-Policy\",\n            value:\n              \"accelerometer=(), ambient-light-sensor=(), autoplay=(), battery=(), camera=(), cross-origin-isolated=(), display-capture=(), document-domain=(), encrypted-media=(), execution-while-not-rendered=(), execution-while-out-of-viewport=(), fullscreen=(), geolocation=(), gyroscope=(), keyboard-map=(), magnetometer=(), microphone=(self), midi=(), navigation-override=(), payment=(), picture-in-picture=(), publickey-credentials-get=(), screen-wake-lock=(), sync-xhr=(), usb=(), web-share=(), xr-spatial-tracking=()\",\n          },\n        ],\n      },\n      {\n        // Cache static assets (images, icons, fonts, etc.) to prevent refetching and re-renders\n        source: \"/_next/static/:path*\",\n        headers: [\n          {\n            key: \"Cache-Control\",\n            value: isDev\n              ? \"no-cache, must-revalidate\" // Dev: always check if fresh\n              : \"public, max-age=2592000, immutable\", // Prod: cache for 30 days\n          },\n        ],\n      },\n    ];\n  },\n  async rewrites() {\n    return [\n      {\n        source: \"/ph_ingest/static/:path*\",\n        destination: \"https://us-assets.i.posthog.com/static/:path*\",\n      },\n      {\n        source: \"/ph_ingest/:path*\",\n        destination: `${\n          process.env.NEXT_PUBLIC_POSTHOG_HOST || \"https://us.i.posthog.com\"\n        }/:path*`,\n      },\n      {\n        source: \"/api/docs/:path*\", // catch /api/docs and /api/docs/...\n        destination: `${\n          process.env.INTERNAL_URL || \"http://localhost:8080\"\n        }/docs/:path*`,\n      },\n      {\n        source: \"/api/docs\", // if you also need the exact /api/docs\n        destination: `${\n          process.env.INTERNAL_URL || \"http://localhost:8080\"\n        }/docs`,\n      },\n      {\n        source: \"/openapi.json\",\n        destination: `${\n          process.env.INTERNAL_URL || \"http://localhost:8080\"\n        }/openapi.json`,\n      },\n    ];\n  },\n  async redirects() {\n    return [\n      {\n        source: \"/chat\",\n        destination: \"/app\",\n        permanent: true,\n      },\n      // NRF routes: Redirect to /nrf which doesn't require auth\n      // (NRFPage handles unauthenticated users gracefully with a login modal)\n      {\n        source: \"/app/nrf/side-panel\",\n        destination: \"/nrf/side-panel\",\n        permanent: true,\n      },\n      {\n        source: \"/app/nrf\",\n        destination: \"/nrf\",\n        permanent: true,\n      },\n      {\n        source: \"/chat/:path*\",\n        destination: \"/app/:path*\",\n        permanent: true,\n      },\n      // Legacy /assistants → /agents redirects (added in PR #8869).\n      // Preserves backward compatibility for bookmarks, shared links, and\n      // hardcoded URLs that still reference the old /assistants paths.\n      // TODO: Remove these redirects in v4.0 — https://linear.app/onyx-app/issue/ENG-3771\n      {\n        source: \"/admin/assistants\",\n        destination: \"/admin/agents\",\n        permanent: true,\n      },\n      {\n        source: \"/admin/assistants/:path*\",\n        destination: \"/admin/agents/:path*\",\n        permanent: true,\n      },\n      {\n        source: \"/ee/assistants/:path*\",\n        destination: \"/ee/agents/:path*\",\n        permanent: true,\n      },\n    ];\n  },\n};\n\n// Sentry configuration for error monitoring:\n// - Without SENTRY_AUTH_TOKEN and NEXT_PUBLIC_SENTRY_DSN: Sentry is completely disabled\n// - With both configured: Capture errors and limited performance data\n\n// Determine if Sentry should be enabled\nconst sentryEnabled = Boolean(\n  process.env.SENTRY_AUTH_TOKEN && process.env.NEXT_PUBLIC_SENTRY_DSN\n);\n\n// Sentry webpack plugin options\nconst sentryWebpackPluginOptions = {\n  org: process.env.SENTRY_ORG || \"onyx-vl\",\n  project: process.env.SENTRY_PROJECT || \"onyx-web\",\n  authToken: process.env.SENTRY_AUTH_TOKEN,\n  silent: !sentryEnabled, // Silence output when Sentry is disabled\n  dryRun: !sentryEnabled, // Don't upload source maps when Sentry is disabled\n  ...(sentryEnabled && {\n    sourceMaps: {\n      include: [\"./.next\"],\n      ignore: [\"node_modules\"],\n      urlPrefix: \"~/_next\",\n      stripPrefix: [\"webpack://_N_E/\"],\n      validate: true,\n      cleanArtifacts: true,\n    },\n  }),\n};\n\n// Export the module with conditional Sentry configuration\nmodule.exports = withSentryConfig(nextConfig, sentryWebpackPluginOptions);\n"
  },
  {
    "path": "web/package.json",
    "content": "{\n  \"name\": \"web\",\n  \"version\": \"1.0.0-dev\",\n  \"private\": true,\n  \"workspaces\": [\n    \"lib/opal\"\n  ],\n  \"scripts\": {\n    \"dev\": \"next dev\",\n    \"dev:profile\": \"NEXT_PUBLIC_ENABLE_STATS=true next dev\",\n    \"build\": \"next build\",\n    \"start\": \"next start\",\n    \"lint\": \"next lint\",\n    \"lint:unused\": \"eslint --ext .js,.jsx,.ts,.tsx --rule 'unused-imports/no-unused-imports: error' --quiet --fix=false src/\",\n    \"lint:fix-unused\": \"eslint --ext .js,.jsx,.ts,.tsx --rule 'unused-imports/no-unused-imports: error' --quiet --fix src/\",\n    \"lint:fix-unused-vars\": \"eslint --ext .js,.jsx,.ts,.tsx --fix --quiet src/\",\n    \"types:check\": \"tsgo --noEmit --project tsconfig.types.json\",\n    \"format\": \"prettier --write \\\"src/**/*.{ts,tsx,js,jsx,json,css,md}\\\"\",\n    \"format:check\": \"prettier --check \\\"src/**/*.{ts,tsx,js,jsx,json,css,md}\\\"\",\n    \"test\": \"jest\",\n    \"test:watch\": \"jest --watch\",\n    \"test:coverage\": \"jest --coverage\",\n    \"test:verbose\": \"jest --verbose\",\n    \"test:ci\": \"jest --ci --maxWorkers=2 --silent --bail\",\n    \"test:changed\": \"jest --onlyChanged\",\n    \"test:diff\": \"jest --changedSince=main\",\n    \"test:debug\": \"node --inspect-brk node_modules/.bin/jest --runInBand\",\n    \"storybook\": \"storybook dev -p 6006\",\n    \"storybook:build\": \"storybook build -o storybook-static\"\n  },\n  \"dependencies\": {\n    \"@dnd-kit/core\": \"^6.1.0\",\n    \"@dnd-kit/modifiers\": \"^7.0.0\",\n    \"@dnd-kit/sortable\": \"^8.0.0\",\n    \"@dnd-kit/utilities\": \"^3.2.2\",\n    \"@emotion/stylis\": \"^0.8.5\",\n    \"@headlessui/react\": \"^2.2.0\",\n    \"@headlessui/tailwindcss\": \"^0.2.1\",\n    \"@onyx/opal\": \"./lib/opal\",\n    \"@phosphor-icons/react\": \"^2.0.8\",\n    \"@radix-ui/react-accordion\": \"^1.2.2\",\n    \"@radix-ui/react-avatar\": \"^1.1.10\",\n    \"@radix-ui/react-collapsible\": \"^1.1.2\",\n    \"@radix-ui/react-dialog\": \"^1.1.6\",\n    \"@radix-ui/react-dropdown-menu\": \"^2.1.6\",\n    \"@radix-ui/react-hover-card\": \"^1.1.15\",\n    \"@radix-ui/react-label\": \"^2.1.1\",\n    \"@radix-ui/react-menubar\": \"^1.1.16\",\n    \"@radix-ui/react-popover\": \"^1.1.6\",\n    \"@radix-ui/react-radio-group\": \"^1.2.2\",\n    \"@radix-ui/react-scroll-area\": \"^1.2.2\",\n    \"@radix-ui/react-select\": \"^2.1.6\",\n    \"@radix-ui/react-separator\": \"^1.1.0\",\n    \"@radix-ui/react-slider\": \"^1.2.2\",\n    \"@radix-ui/react-slot\": \"^1.2.4\",\n    \"@radix-ui/react-tabs\": \"^1.1.1\",\n    \"@radix-ui/react-tooltip\": \"^1.2.8\",\n    \"@sentry/nextjs\": \"^10.27.0\",\n    \"@sentry/tracing\": \"^7.120.3\",\n    \"@stripe/stripe-js\": \"^4.6.0\",\n    \"@tailwindcss/container-queries\": \"^0.1.1\",\n    \"@tanstack/react-table\": \"^8.21.3\",\n    \"autoprefixer\": \"^10.4.22\",\n    \"class-variance-authority\": \"^0.7.0\",\n    \"clsx\": \"^2.1.1\",\n    \"cmdk\": \"^1.0.0\",\n    \"cookies-next\": \"^5.1.0\",\n    \"date-fns\": \"^3.6.0\",\n    \"docx-preview\": \"^0.3.7\",\n    \"favicon-fetch\": \"^1.0.0\",\n    \"formik\": \"^2.2.9\",\n    \"highlight.js\": \"^11.11.1\",\n    \"js-cookie\": \"^3.0.5\",\n    \"katex\": \"^0.16.38\",\n    \"linguist-languages\": \"^9.3.1\",\n    \"lodash\": \"^4.17.23\",\n    \"lowlight\": \"^3.3.0\",\n    \"lucide-react\": \"^0.454.0\",\n    \"mdast-util-find-and-replace\": \"^3.0.1\",\n    \"mime\": \"^4.1.0\",\n    \"motion\": \"^12.29.0\",\n    \"next\": \"16.1.7\",\n    \"next-themes\": \"^0.4.4\",\n    \"postcss\": \"^8.5.6\",\n    \"posthog-js\": \"^1.176.0\",\n    \"pptxgenjs\": \"^4.0.1\",\n    \"react\": \"19.2.4\",\n    \"react-datepicker\": \"^7.6.0\",\n    \"react-day-picker\": \"^9.13.0\",\n    \"react-dom\": \"19.2.4\",\n    \"react-dropzone\": \"^14.2.3\",\n    \"react-icons\": \"^4.8.0\",\n    \"react-loader-spinner\": \"^8.0.0\",\n    \"react-markdown\": \"^9.0.1\",\n    \"react-select\": \"^5.8.0\",\n    \"recharts\": \"^2.13.1\",\n    \"rehype-highlight\": \"^7.0.2\",\n    \"rehype-katex\": \"^7.0.1\",\n    \"rehype-sanitize\": \"^6.0.0\",\n    \"rehype-stringify\": \"^10.0.1\",\n    \"remark-gfm\": \"^4.0.0\",\n    \"remark-math\": \"^6.0.0\",\n    \"semver\": \"^7.5.4\",\n    \"sharp\": \"^0.33.5\",\n    \"stripe\": \"^17.0.0\",\n    \"swr\": \"^2.1.5\",\n    \"tailwind-merge\": \"^2.5.4\",\n    \"tailwindcss-animate\": \"^1.0.7\",\n    \"uuid\": \"^9.0.1\",\n    \"vaul\": \"^1.1.1\",\n    \"yup\": \"^1.4.0\",\n    \"zustand\": \"^5.0.8\"\n  },\n  \"devDependencies\": {\n    \"@playwright/test\": \"^1.39.0\",\n    \"@storybook/addon-essentials\": \"^8.6.18\",\n    \"@storybook/addon-themes\": \"^8.6.18\",\n    \"@storybook/blocks\": \"^8.6.18\",\n    \"@storybook/react\": \"^8.6.18\",\n    \"@storybook/react-vite\": \"^8.6.18\",\n    \"@tailwindcss/typography\": \"^0.5.19\",\n    \"@testing-library/jest-dom\": \"^6.9.1\",\n    \"@testing-library/react\": \"^16.3.0\",\n    \"@testing-library/user-event\": \"^14.6.1\",\n    \"@types/chrome\": \"^0.0.287\",\n    \"@types/hast\": \"^3.0.4\",\n    \"@types/jest\": \"^29.5.14\",\n    \"@types/js-cookie\": \"^3.0.6\",\n    \"@types/lodash\": \"^4.17.20\",\n    \"@types/node\": \"18.15.11\",\n    \"@types/react\": \"19.2.10\",\n    \"@types/react-dom\": \"19.2.3\",\n    \"@types/stats.js\": \"^0.17.4\",\n    \"@types/uuid\": \"^9.0.8\",\n    \"@typescript/native-preview\": \"7.0.0-dev.20251222.1\",\n    \"babel-plugin-react-compiler\": \"^1.0.0\",\n    \"baseline-browser-mapping\": \"^2.9.19\",\n    \"eslint\": \"^9.39.1\",\n    \"eslint-config-next\": \"16.1.6\",\n    \"eslint-plugin-unused-imports\": \"^4.1.4\",\n    \"identity-obj-proxy\": \"^3.0.0\",\n    \"jest\": \"^29.7.0\",\n    \"jest-environment-jsdom\": \"^30.2.0\",\n    \"prettier\": \"3.1.0\",\n    \"stats.js\": \"^0.17.0\",\n    \"storybook\": \"^8.6.18\",\n    \"tailwindcss\": \"^3.4.17\",\n    \"ts-jest\": \"^29.2.5\",\n    \"ts-unused-exports\": \"^11.0.1\",\n    \"typescript\": \"^5.9.3\",\n    \"whatwg-fetch\": \"^3.6.20\"\n  },\n  \"overrides\": {\n    \"react-is\": \"^19.0.0-rc-69d4b800-20241021\",\n    \"@types/react\": \"19.2.10\",\n    \"@types/react-dom\": \"19.2.3\"\n  }\n}\n"
  },
  {
    "path": "web/playwright.config.ts",
    "content": "import { defineConfig, devices } from \"@playwright/test\";\nimport * as dotenv from \"dotenv\";\n\ndotenv.config({ path: \".vscode/.env\" });\n\nexport default defineConfig({\n  globalSetup: require.resolve(\"./tests/e2e/global-setup\"),\n  timeout: 100000, // 100 seconds timeout\n  expect: {\n    timeout: 15000, // 15 seconds timeout for all assertions to reduce flakiness\n    toHaveScreenshot: {\n      // Allow up to 1% of pixels to differ (accounts for anti-aliasing, subpixel rendering)\n      maxDiffPixelRatio: 0.01,\n      // Threshold per-channel (0-1): how different a pixel can be before it counts as changed\n      threshold: 0.2,\n    },\n  },\n  retries: process.env.CI ? 2 : 0, // Retry failed tests 2 times in CI, 0 locally\n\n  // When debugging, comment out the first `workers` line and uncomment the second one.\n  // The second one runs the tests in serial, which helps when using the playwright-debugger to step through each test-step.\n  // - @raunakab\n  workers: process.env.CI ? 2 : undefined, // Limit to 2 parallel workers in CI to reduce flakiness\n  // workers: 1,\n\n  reporter: [[\"list\"]],\n  // Only run Playwright tests from tests/e2e directory (ignore Jest tests in src/)\n  testMatch: /.*\\/tests\\/e2e\\/.*\\.spec\\.ts/,\n  outputDir: \"output/playwright\",\n  use: {\n    // Base URL for the application, can be overridden via BASE_URL environment variable\n    baseURL: process.env.BASE_URL || \"http://localhost:3000\",\n    // Capture trace on failure\n    trace: \"retain-on-failure\",\n  },\n  projects: [\n    {\n      name: \"admin\",\n      use: {\n        ...devices[\"Desktop Chrome\"],\n        viewport: { width: 1280, height: 720 },\n        storageState: \"admin_auth.json\",\n      },\n      grepInvert: [/@exclusive/, /@lite/],\n    },\n    {\n      // this suite runs independently and serially + slower\n      // we should be cautious about bloating this suite\n      name: \"exclusive\",\n      use: {\n        ...devices[\"Desktop Chrome\"],\n        viewport: { width: 1280, height: 720 },\n        storageState: \"admin_auth.json\",\n      },\n      grep: /@exclusive/,\n      workers: 1,\n    },\n    {\n      // runs against the Onyx Lite stack (DISABLE_VECTOR_DB=true, no Vespa/Redis)\n      name: \"lite\",\n      use: {\n        ...devices[\"Desktop Chrome\"],\n        viewport: { width: 1280, height: 720 },\n        storageState: \"admin_auth.json\",\n      },\n      grep: /@lite/,\n    },\n  ],\n});\n"
  },
  {
    "path": "web/postcss.config.js",
    "content": "module.exports = {\n  plugins: {\n    tailwindcss: {},\n    autoprefixer: {},\n  },\n};\n"
  },
  {
    "path": "web/sentry.edge.config.ts",
    "content": "// This file configures the initialization of Sentry for edge features (middleware, edge routes, and so on).\n// The config you add here will be used whenever one of the edge features is loaded.\n// Note that this config is unrelated to the Vercel Edge Runtime and is also required when running locally.\n// https://docs.sentry.io/platforms/javascript/guides/nextjs/\n\nimport * as Sentry from \"@sentry/nextjs\";\n\nif (process.env.NEXT_PUBLIC_SENTRY_DSN) {\n  Sentry.init({\n    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,\n    release: process.env.SENTRY_RELEASE,\n    // Only capture unhandled exceptions\n    tracesSampleRate: 0,\n    debug: false,\n  });\n}\n"
  },
  {
    "path": "web/sentry.server.config.ts",
    "content": "// This file configures the initialization of Sentry on the server.\n// The config you add here will be used whenever the server handles a request.\n// https://docs.sentry.io/platforms/javascript/guides/nextjs/\n\nimport * as Sentry from \"@sentry/nextjs\";\n\nif (process.env.NEXT_PUBLIC_SENTRY_DSN) {\n  Sentry.init({\n    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,\n    release: process.env.SENTRY_RELEASE,\n\n    // Setting this option to true will print useful information to the console while you're setting up Sentry.\n    debug: false,\n\n    // Disable performance monitoring and only capture errors\n    tracesSampleRate: 0,\n    profilesSampleRate: 0,\n  });\n}\n"
  },
  {
    "path": "web/src/app/PostHogPageView.tsx",
    "content": "\"use client\";\n\nimport { usePathname, useSearchParams } from \"next/navigation\";\nimport { useEffect } from \"react\";\nimport { usePostHog } from \"posthog-js/react\";\n\nexport default function PostHogPageView(): null {\n  const pathname = usePathname();\n  const searchParams = useSearchParams();\n  const posthog = usePostHog();\n\n  useEffect(() => {\n    if (!posthog) {\n      return;\n    }\n\n    // Track pageviews\n    if (pathname) {\n      let url = window.origin + pathname;\n      if (searchParams?.toString()) {\n        url = url + `?${searchParams.toString()}`;\n      }\n      posthog.capture(\"$pageview\", {\n        $current_url: url,\n      });\n    }\n  }, [pathname, searchParams, posthog]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/edit/[toolId]/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\n\nexport default function EditToolPage() {\n  const router = useRouter();\n\n  useEffect(() => {\n    // Redirect to MCP actions page\n    router.replace(\"/admin/actions/mcp\" as Route);\n  }, [router]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/edit-mcp/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\n\nexport default function EditMCPPage() {\n  const router = useRouter();\n\n  useEffect(() => {\n    // Redirect to MCP actions page\n    router.replace(\"/admin/actions/mcp\" as Route);\n  }, [router]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/mcp/page.tsx",
    "content": "\"use client\";\n\nimport MCPPageContent from \"@/sections/actions/MCPPageContent\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.MCP_ACTIONS;\n\nexport default function Main() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Connect MCP (Model Context Protocol) servers to add custom actions and tools for your agents.\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <MCPPageContent />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/new/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\n\nexport default function NewActionPage() {\n  const router = useRouter();\n\n  useEffect(() => {\n    // Redirect to MCP actions page\n    router.replace(\"/admin/actions/mcp\" as Route);\n  }, [router]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/open-api/page.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport OpenApiPageContent from \"@/sections/actions/OpenApiPageContent\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.OPENAPI_ACTIONS;\n\nexport default function Main() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Connect OpenAPI servers to add custom actions and tools for your agents.\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <OpenApiPageContent />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/actions/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\n\nexport default function AdminActionsPage() {\n  const router = useRouter();\n\n  useEffect(() => {\n    // Redirect to MCP actions page as the default\n    router.replace(\"/admin/actions/mcp\" as Route);\n  }, [router]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/admin/add-connector/page.tsx",
    "content": "\"use client\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SourceCategory, SourceMetadata } from \"@/lib/search/interfaces\";\nimport { listSourceMetadata } from \"@/lib/sources\";\nimport { Button } from \"@opal/components\";\nimport {\n  useCallback,\n  useContext,\n  useDeferredValue,\n  useEffect,\n  useMemo,\n  useRef,\n  useState,\n} from \"react\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport { useFederatedConnectors } from \"@/lib/hooks\";\nimport {\n  FederatedConnectorDetail,\n  federatedSourceToRegularSource,\n  ValidSources,\n} from \"@/lib/types\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport SourceTile from \"@/components/SourceTile\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.ADD_CONNECTOR;\n\nfunction SourceTileTooltipWrapper({\n  sourceMetadata,\n  preSelect,\n  federatedConnectors,\n  slackCredentials,\n}: {\n  sourceMetadata: SourceMetadata;\n  preSelect?: boolean;\n  federatedConnectors?: FederatedConnectorDetail[];\n  slackCredentials?: Credential<any>[];\n}) {\n  // Check if there's already a federated connector for this source\n  const existingFederatedConnector = useMemo(() => {\n    if (!sourceMetadata.federated || !federatedConnectors) {\n      return null;\n    }\n\n    return federatedConnectors.find(\n      (connector) =>\n        federatedSourceToRegularSource(connector.source) ===\n        sourceMetadata.internalName\n    );\n  }, [sourceMetadata, federatedConnectors]);\n\n  // For Slack specifically, check if there are existing non-federated credentials\n  const isSlackTile = sourceMetadata.internalName === ValidSources.Slack;\n  const hasExistingSlackCredentials = useMemo(() => {\n    return isSlackTile && slackCredentials && slackCredentials.length > 0;\n  }, [isSlackTile, slackCredentials]);\n\n  // Determine the URL to navigate to\n  const navigationUrl = useMemo(() => {\n    // If there's an existing federated connector, route to edit it\n    if (existingFederatedConnector) {\n      return `/admin/federated/${existingFederatedConnector.id}`;\n    }\n\n    // For all other sources (including Slack), use the regular admin URL\n    return sourceMetadata.adminUrl;\n  }, [existingFederatedConnector, sourceMetadata]);\n\n  // Compute whether to hide the tooltip\n  const shouldHideTooltip =\n    !existingFederatedConnector &&\n    !hasExistingSlackCredentials &&\n    !sourceMetadata.federated;\n\n  // If tooltip should be hidden, just render the tile as a component\n  if (shouldHideTooltip) {\n    return (\n      <SourceTile\n        sourceMetadata={sourceMetadata}\n        preSelect={preSelect}\n        navigationUrl={navigationUrl}\n        hasExistingSlackCredentials={!!hasExistingSlackCredentials}\n      />\n    );\n  }\n\n  return (\n    <TooltipProvider>\n      <Tooltip>\n        <TooltipTrigger asChild>\n          <div>\n            <SourceTile\n              sourceMetadata={sourceMetadata}\n              preSelect={preSelect}\n              navigationUrl={navigationUrl}\n              hasExistingSlackCredentials={!!hasExistingSlackCredentials}\n            />\n          </div>\n        </TooltipTrigger>\n        <TooltipContent side=\"top\" className=\"max-w-sm\">\n          {existingFederatedConnector ? (\n            <Text as=\"p\" textLight05 secondaryBody>\n              <strong>Federated connector already configured.</strong> Click to\n              edit the existing connector.\n            </Text>\n          ) : hasExistingSlackCredentials ? (\n            <Text as=\"p\" textLight05 secondaryBody>\n              <strong>Existing Slack credentials found.</strong> Click to manage\n              your Slack connector.\n            </Text>\n          ) : null}\n        </TooltipContent>\n      </Tooltip>\n    </TooltipProvider>\n  );\n}\n\nexport default function Page() {\n  const sources = useMemo(() => listSourceMetadata(), []);\n\n  const [rawSearchTerm, setSearchTerm] = useState(\"\");\n  const searchTerm = useDeferredValue(rawSearchTerm);\n\n  const { data: federatedConnectors } = useFederatedConnectors();\n  const settings = useContext(SettingsContext);\n\n  // Fetch Slack credentials to determine navigation behavior\n  const { data: slackCredentials } = useSWR<Credential<any>[]>(\n    buildSimilarCredentialInfoURL(ValidSources.Slack),\n    errorHandlingFetcher\n  );\n\n  const searchInputRef = useRef<HTMLInputElement>(null);\n\n  useEffect(() => {\n    if (searchInputRef.current) {\n      searchInputRef.current.focus();\n    }\n  }, []);\n\n  const filterSources = useCallback(\n    (sources: SourceMetadata[]) => {\n      if (!searchTerm) return sources;\n      const lowerSearchTerm = searchTerm.toLowerCase();\n      return sources.filter(\n        (source) =>\n          source.displayName.toLowerCase().includes(lowerSearchTerm) ||\n          source.category.toLowerCase().includes(lowerSearchTerm)\n      );\n    },\n    [searchTerm]\n  );\n\n  const popularSources = useMemo(() => {\n    const filtered = filterSources(sources);\n    return sources.filter(\n      (source) =>\n        source.isPopular &&\n        (filtered.includes(source) ||\n          source.displayName.toLowerCase().includes(searchTerm.toLowerCase()))\n    );\n  }, [sources, filterSources, searchTerm]);\n\n  const categorizedSources = useMemo(() => {\n    const filtered = filterSources(sources);\n    const categories = Object.values(SourceCategory).reduce(\n      (acc, category) => {\n        acc[category] = sources.filter(\n          (source) =>\n            source.category === category &&\n            (filtered.includes(source) ||\n              category.toLowerCase().includes(searchTerm.toLowerCase()))\n        );\n        return acc;\n      },\n      {} as Record<SourceCategory, SourceMetadata[]>\n    );\n    // Filter out the \"Other\" category if show_extra_connectors is false\n    if (settings?.settings?.show_extra_connectors === false) {\n      const filteredCategories = Object.entries(categories).filter(\n        ([category]) => category !== SourceCategory.Other\n      );\n      return Object.fromEntries(filteredCategories) as Record<\n        SourceCategory,\n        SourceMetadata[]\n      >;\n    }\n    return categories;\n  }, [\n    sources,\n    filterSources,\n    searchTerm,\n    settings?.settings?.show_extra_connectors,\n  ]);\n\n  // When searching, dedupe Popular against whatever is already in results\n  const resultIds = useMemo(() => {\n    if (!searchTerm) return new Set<string>();\n    return new Set(\n      Object.values(categorizedSources)\n        .flat()\n        .map((s) => s.internalName)\n    );\n  }, [categorizedSources, searchTerm]);\n\n  const dedupedPopular = useMemo(() => {\n    if (!searchTerm) return popularSources;\n    return popularSources.filter((s) => !resultIds.has(s.internalName));\n  }, [popularSources, resultIds, searchTerm]);\n\n  const handleKeyPress = (e: React.KeyboardEvent<HTMLInputElement>) => {\n    if (e.key === \"Enter\") {\n      const filteredCategories = Object.entries(categorizedSources).filter(\n        ([_, sources]) => sources.length > 0\n      );\n      if (\n        filteredCategories.length > 0 &&\n        filteredCategories[0] !== undefined &&\n        filteredCategories[0][1].length > 0\n      ) {\n        const firstSource = filteredCategories[0][1][0];\n        if (firstSource) {\n          // Check if this source has an existing federated connector\n          const existingFederatedConnector =\n            firstSource.federated && federatedConnectors\n              ? federatedConnectors.find(\n                  (connector) =>\n                    connector.source === `federated_${firstSource.internalName}`\n                )\n              : null;\n\n          const url = existingFederatedConnector\n            ? `/admin/federated/${existingFederatedConnector.id}`\n            : firstSource.adminUrl;\n\n          window.open(url, \"_self\");\n        }\n      }\n    }\n  };\n\n  return (\n    <SettingsLayouts.Root width=\"full\">\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        rightChildren={\n          <Button href=\"/admin/indexing/status\">See Connectors</Button>\n        }\n        separator\n      />\n      <SettingsLayouts.Body>\n        <InputTypeIn\n          type=\"text\"\n          placeholder=\"Search Connectors\"\n          ref={searchInputRef}\n          value={rawSearchTerm} // keep the input bound to immediate state\n          onChange={(event) => setSearchTerm(event.target.value)}\n          onKeyDown={handleKeyPress}\n          className=\"w-96 flex-none\"\n        />\n\n        {dedupedPopular.length > 0 && (\n          <div className=\"pt-8\">\n            <Text as=\"p\" headingH3>\n              Popular\n            </Text>\n            <div className=\"flex flex-wrap gap-4 p-4\">\n              {dedupedPopular.map((source) => (\n                <SourceTileTooltipWrapper\n                  preSelect={false}\n                  key={source.internalName}\n                  sourceMetadata={source}\n                  federatedConnectors={federatedConnectors}\n                  slackCredentials={slackCredentials}\n                />\n              ))}\n            </div>\n          </div>\n        )}\n\n        {Object.entries(categorizedSources)\n          .filter(([_, sources]) => sources.length > 0)\n          .map(([category, sources], categoryInd) => (\n            <div key={category} className=\"pt-8\">\n              <Text as=\"p\" headingH3>\n                {category}\n              </Text>\n              <div className=\"flex flex-wrap gap-4 p-4\">\n                {sources.map((source, sourceInd) => (\n                  <SourceTileTooltipWrapper\n                    preSelect={\n                      (searchTerm?.length ?? 0) > 0 &&\n                      categoryInd == 0 &&\n                      sourceInd == 0\n                    }\n                    key={source.internalName}\n                    sourceMetadata={source}\n                    federatedConnectors={federatedConnectors}\n                    slackCredentials={slackCredentials}\n                  />\n                ))}\n              </div>\n            </div>\n          ))}\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/agents/CollapsibleSection.tsx",
    "content": "\"use client\";\nimport React, { ReactNode, useState } from \"react\";\nimport { FiSettings } from \"react-icons/fi\";\n\ninterface CollapsibleSectionProps {\n  children: ReactNode;\n  prompt?: string;\n  className?: string;\n}\n\nconst CollapsibleSection: React.FC<CollapsibleSectionProps> = ({\n  children,\n  prompt,\n  className = \"\",\n}) => {\n  const [isCollapsed, setIsCollapsed] = useState<boolean>(false);\n\n  const toggleCollapse = (e?: React.MouseEvent<HTMLDivElement>) => {\n    // Only toggle if the click is on the border or plus sign\n    if (\n      !e ||\n      e.currentTarget === e.target ||\n      (e.target as HTMLElement).classList.contains(\"collapse-toggle\")\n    ) {\n      setIsCollapsed(!isCollapsed);\n    }\n  };\n\n  return (\n    <div\n      className={`relative ${isCollapsed ? \"h-6\" : \"\"} ${className}`}\n      style={{ transition: \"height 0.3s ease-out\" }}\n    >\n      <div\n        className={`\n          cursor-pointer\n          ${isCollapsed ? \"h-6\" : \"pl-6 border-l-2  border-border\"}\n        `}\n        onClick={toggleCollapse}\n      >\n        {\" \"}\n        {isCollapsed ? (\n          <span className=\"collapse-toggle text-lg absolute left-0 top-0 text-sm flex items-center gap-x-3 cursor-pointer\">\n            <FiSettings className=\"pointer-events-none my-auto\" size={16} />\n            {prompt}{\" \"}\n          </span>\n        ) : (\n          <>{children}</>\n        )}\n      </div>\n    </div>\n  );\n};\n\nexport default CollapsibleSection;\n"
  },
  {
    "path": "web/src/app/admin/agents/interfaces.ts",
    "content": "import { ValidSources } from \"@/lib/types\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { DocumentSetSummary, MinimalUserSnapshot } from \"@/lib/types\";\n\n// Represents a hierarchy node (folder, space, channel, etc.) attached to a persona\nexport interface HierarchyNodeSnapshot {\n  id: number;\n  raw_node_id: string;\n  display_name: string;\n  link: string | null;\n  source: ValidSources;\n  node_type: string; // HierarchyNodeType enum value\n}\n\n// Represents a document attached to a persona\nexport interface AttachedDocumentSnapshot {\n  id: string;\n  title: string;\n  link: string | null;\n  parent_id: number | null;\n  last_modified: string | null;\n  last_synced: string | null;\n  source: ValidSources | null;\n}\n\nexport interface StarterMessageBase {\n  message: string;\n}\n\nexport interface StarterMessage extends StarterMessageBase {\n  name: string;\n}\n\nexport interface MinimalPersonaSnapshot {\n  id: number;\n  name: string;\n  description: string;\n  tools: ToolSnapshot[];\n  starter_messages: StarterMessage[] | null;\n  document_sets: DocumentSetSummary[];\n  // Counts for knowledge sources (used to determine if search tool should be enabled)\n  hierarchy_node_count?: number;\n  attached_document_count?: number;\n  // Unique sources from all knowledge (document sets + hierarchy nodes)\n  // Used to populate source filters in chat\n  knowledge_sources?: ValidSources[];\n  llm_model_version_override?: string;\n  llm_model_provider_override?: string;\n\n  uploaded_image_id?: string;\n  icon_name?: string;\n\n  is_public: boolean;\n  is_listed: boolean;\n  display_priority: number | null;\n  is_featured: boolean;\n  builtin_persona: boolean;\n\n  labels?: PersonaLabel[];\n  owner: MinimalUserSnapshot | null;\n}\n\nexport interface Persona extends MinimalPersonaSnapshot {\n  user_file_ids: string[];\n  users: MinimalUserSnapshot[];\n  groups: number[];\n  // Hierarchy nodes (folders, spaces, channels) attached for scoped search\n  hierarchy_nodes?: HierarchyNodeSnapshot[];\n  // Individual documents attached for scoped search\n  attached_documents?: AttachedDocumentSnapshot[];\n\n  // Embedded prompt fields on persona\n  system_prompt: string | null;\n  replace_base_system_prompt: boolean;\n  task_prompt: string | null;\n  datetime_aware: boolean;\n}\n\nexport interface FullPersona extends Persona {\n  search_start_date: string | null;\n}\n\nexport interface PersonaLabel {\n  id: number;\n  name: string;\n}\n"
  },
  {
    "path": "web/src/app/admin/agents/lib.ts",
    "content": "import {\n  MinimalPersonaSnapshot,\n  Persona,\n  StarterMessage,\n} from \"@/app/admin/agents/interfaces\";\n\ninterface PersonaUpsertRequest {\n  name: string;\n  description: string;\n  system_prompt: string;\n  task_prompt: string;\n  datetime_aware: boolean;\n  document_set_ids: number[];\n  is_public: boolean;\n  llm_model_provider_override: string | null;\n  llm_model_version_override: string | null;\n  starter_messages: StarterMessage[] | null;\n  users?: string[];\n  groups: number[];\n  tool_ids: number[];\n  remove_image?: boolean;\n  uploaded_image_id: string | null;\n  icon_name: string | null;\n  search_start_date: Date | null;\n  is_featured: boolean;\n  display_priority: number | null;\n  label_ids: number[] | null;\n  user_file_ids: string[] | null;\n  replace_base_system_prompt: boolean;\n  // Hierarchy nodes (folders, spaces, channels) for scoped search\n  hierarchy_node_ids: number[];\n  // Individual documents for scoped search\n  document_ids: string[];\n}\n\nexport interface PersonaUpsertParameters {\n  name: string;\n  description: string;\n  system_prompt: string;\n  replace_base_system_prompt: boolean;\n  task_prompt: string;\n  datetime_aware: boolean;\n  document_set_ids: number[];\n  is_public: boolean;\n  llm_model_provider_override: string | null;\n  llm_model_version_override: string | null;\n  starter_messages: StarterMessage[] | null;\n  users?: string[];\n  groups: number[];\n  tool_ids: number[];\n  remove_image?: boolean;\n  search_start_date: Date | null;\n  uploaded_image_id: string | null;\n  icon_name: string | null;\n  is_featured: boolean;\n  label_ids: number[] | null;\n  user_file_ids: string[];\n  // Hierarchy nodes (folders, spaces, channels) for scoped search\n  hierarchy_node_ids?: number[];\n  // Individual documents for scoped search\n  document_ids?: string[];\n}\n\nfunction buildPersonaUpsertRequest({\n  name,\n  description,\n  system_prompt,\n  task_prompt,\n  document_set_ids,\n  is_public,\n  groups,\n  datetime_aware,\n  users,\n  tool_ids,\n  remove_image,\n  search_start_date,\n  user_file_ids,\n  hierarchy_node_ids,\n  document_ids,\n  icon_name,\n  uploaded_image_id,\n  is_featured,\n  llm_model_provider_override,\n  llm_model_version_override,\n  starter_messages,\n  label_ids,\n  replace_base_system_prompt,\n}: PersonaUpsertParameters): PersonaUpsertRequest {\n  return {\n    name,\n    description,\n    system_prompt,\n    task_prompt,\n    document_set_ids,\n    is_public,\n    uploaded_image_id,\n    icon_name,\n    groups,\n    users,\n    tool_ids,\n    remove_image,\n    search_start_date,\n    datetime_aware,\n    is_featured: is_featured ?? false,\n    llm_model_provider_override: llm_model_provider_override ?? null,\n    llm_model_version_override: llm_model_version_override ?? null,\n    starter_messages: starter_messages ?? null,\n    display_priority: null,\n    label_ids: label_ids ?? null,\n    user_file_ids: user_file_ids ?? null,\n    replace_base_system_prompt,\n    hierarchy_node_ids: hierarchy_node_ids ?? [],\n    document_ids: document_ids ?? [],\n  };\n}\n\nexport async function uploadFile(file: File): Promise<string | null> {\n  const formData = new FormData();\n  formData.append(\"file\", file);\n  const response = await fetch(\"/api/admin/persona/upload-image\", {\n    method: \"POST\",\n    body: formData,\n    credentials: \"include\",\n  });\n\n  if (!response.ok) {\n    console.error(\"Failed to upload file\");\n    return null;\n  }\n\n  const responseJson = await response.json();\n  return responseJson.file_id;\n}\n\nexport async function createPersona(\n  personaUpsertParams: PersonaUpsertParameters\n): Promise<Response | null> {\n  const createPersonaResponse = await fetch(\"/api/persona\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(buildPersonaUpsertRequest(personaUpsertParams)),\n    credentials: \"include\",\n  });\n\n  return createPersonaResponse;\n}\n\nexport async function updatePersona(\n  id: number,\n  personaUpsertParams: PersonaUpsertParameters\n): Promise<Response | null> {\n  const updatePersonaResponse = await fetch(`/api/persona/${id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(buildPersonaUpsertRequest(personaUpsertParams)),\n    credentials: \"include\",\n  });\n\n  return updatePersonaResponse;\n}\n\nexport function deletePersona(personaId: number) {\n  return fetch(`/api/persona/${personaId}`, {\n    method: \"DELETE\",\n    credentials: \"include\",\n  });\n}\n\nfunction smallerNumberFirstComparator(a: number, b: number) {\n  return a > b ? 1 : -1;\n}\n\nfunction closerToZeroNegativesFirstComparator(a: number, b: number) {\n  if (a < 0 && b > 0) {\n    return -1;\n  }\n  if (a > 0 && b < 0) {\n    return 1;\n  }\n\n  const absA = Math.abs(a);\n  const absB = Math.abs(b);\n\n  if (absA === absB) {\n    return a > b ? 1 : -1;\n  }\n\n  return absA > absB ? 1 : -1;\n}\n\nexport function personaComparator(\n  a: MinimalPersonaSnapshot | Persona,\n  b: MinimalPersonaSnapshot | Persona\n) {\n  if (a.display_priority === null && b.display_priority === null) {\n    return closerToZeroNegativesFirstComparator(a.id, b.id);\n  }\n\n  if (a.display_priority !== b.display_priority) {\n    if (a.display_priority === null) {\n      return 1;\n    }\n    if (b.display_priority === null) {\n      return -1;\n    }\n\n    return smallerNumberFirstComparator(a.display_priority, b.display_priority);\n  }\n\n  return closerToZeroNegativesFirstComparator(a.id, b.id);\n}\n\nexport async function togglePersonaFeatured(\n  personaId: number,\n  featured: boolean\n) {\n  const response = await fetch(`/api/admin/persona/${personaId}/featured`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      is_featured: !featured,\n    }),\n    credentials: \"include\",\n  });\n  return response;\n}\n\nexport async function togglePersonaVisibility(\n  personaId: number,\n  isVisible: boolean\n) {\n  const response = await fetch(`/api/admin/persona/${personaId}/listed`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      is_listed: !isVisible,\n    }),\n    credentials: \"include\",\n  });\n  return response;\n}\n"
  },
  {
    "path": "web/src/app/admin/agents/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/AgentsPage\";\n"
  },
  {
    "path": "web/src/app/admin/billing/BillingDetailsView.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Link from \"next/link\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Content } from \"@opal/layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport InfoBlock from \"@/refresh-components/messages/InfoBlock\";\nimport InputNumber from \"@/refresh-components/inputs/InputNumber\";\nimport {\n  SvgUsers,\n  SvgExternalLink,\n  SvgArrowRight,\n  SvgPlus,\n  SvgWallet,\n  SvgFileText,\n  SvgOrganization,\n} from \"@opal/icons\";\nimport { BillingInformation, LicenseStatus } from \"@/lib/billing/interfaces\";\nimport {\n  createCustomerPortalSession,\n  resetStripeConnection,\n  updateSeatCount,\n  claimLicense,\n  refreshLicenseCache,\n} from \"@/lib/billing/svc\";\nimport { formatDateShort } from \"@/lib/dateUtils\";\nimport { humanReadableFormatShort } from \"@/lib/time\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport useUsers from \"@/hooks/useUsers\";\n\n// ----------------------------------------------------------------------------\n// Constants\n// ----------------------------------------------------------------------------\n\nconst GRACE_PERIOD_DAYS = 30;\n\n// ----------------------------------------------------------------------------\n// Helpers\n// ----------------------------------------------------------------------------\n\nfunction getExpirationState(\n  billing: BillingInformation,\n  license?: LicenseStatus\n) {\n  const isAnnualBilling = billing.billing_period === \"annual\";\n\n  // Check license expiration for self-hosted\n  if (license?.expires_at) {\n    const expiresAt = new Date(license.expires_at);\n    const now = new Date();\n    const daysRemaining = Math.ceil(\n      (expiresAt.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)\n    );\n\n    if (daysRemaining <= 0 || license.status === \"expired\") {\n      const gracePeriodEnd = license.grace_period_end\n        ? new Date(license.grace_period_end)\n        : new Date(\n            expiresAt.getTime() + GRACE_PERIOD_DAYS * 24 * 60 * 60 * 1000\n          );\n      const daysUntilDeletion = Math.max(\n        0,\n        Math.ceil(\n          (gracePeriodEnd.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)\n        )\n      );\n      return {\n        variant: \"error\" as const,\n        daysRemaining: 0,\n        daysUntilDeletion,\n        expirationDate: humanReadableFormatShort(gracePeriodEnd),\n      };\n    }\n\n    // Only show warning for annual subscriptions (30 days before expiration)\n    if (isAnnualBilling && daysRemaining <= 30) {\n      return {\n        variant: \"warning\" as const,\n        daysRemaining,\n        expirationDate: humanReadableFormatShort(expiresAt),\n      };\n    }\n  }\n\n  // Check billing expiration for cloud (only show warnings for canceled subscriptions)\n  if (billing.cancel_at_period_end && billing.current_period_end) {\n    const expiresAt = new Date(billing.current_period_end);\n    const now = new Date();\n    const daysRemaining = Math.ceil(\n      (expiresAt.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)\n    );\n\n    if (daysRemaining <= 0) {\n      const gracePeriodEnd = new Date(\n        expiresAt.getTime() + GRACE_PERIOD_DAYS * 24 * 60 * 60 * 1000\n      );\n      const daysUntilDeletion = Math.max(\n        0,\n        Math.ceil(\n          (gracePeriodEnd.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)\n        )\n      );\n      return {\n        variant: \"error\" as const,\n        daysRemaining: 0,\n        daysUntilDeletion,\n        expirationDate: humanReadableFormatShort(gracePeriodEnd),\n      };\n    }\n\n    // Only show warning for annual subscriptions (30 days before expiration)\n    // Monthly subscriptions auto-renew, so no warning needed\n    if (isAnnualBilling && daysRemaining <= 30) {\n      return {\n        variant: \"warning\" as const,\n        daysRemaining,\n        expirationDate: humanReadableFormatShort(expiresAt),\n      };\n    }\n  }\n\n  if (billing.status === \"expired\" || billing.status === \"cancelled\") {\n    return {\n      variant: \"error\" as const,\n      daysRemaining: 0,\n      daysUntilDeletion: GRACE_PERIOD_DAYS,\n      expirationDate: \"\",\n    };\n  }\n\n  return null;\n}\n\n// ----------------------------------------------------------------------------\n// SubscriptionCard\n// ----------------------------------------------------------------------------\n\nfunction SubscriptionCard({\n  billing,\n  license,\n  onViewPlans,\n  disabled,\n  isManualLicenseOnly,\n  onReconnect,\n}: {\n  billing?: BillingInformation;\n  license?: LicenseStatus;\n  onViewPlans: () => void;\n  disabled?: boolean;\n  isManualLicenseOnly?: boolean;\n  onReconnect?: () => Promise<void>;\n}) {\n  const [isReconnecting, setIsReconnecting] = useState(false);\n\n  const planName = isManualLicenseOnly ? \"Enterprise Plan\" : \"Business Plan\";\n  const PlanIcon = isManualLicenseOnly ? SvgOrganization : SvgUsers;\n  const expirationDate = billing?.current_period_end ?? license?.expires_at;\n  const formattedDate = formatDateShort(expirationDate);\n\n  const isExpiredFromBilling =\n    billing?.status === \"expired\" || billing?.status === \"cancelled\";\n  const isExpiredFromLicense =\n    license?.status === \"expired\" ||\n    license?.status === \"gated_access\" ||\n    (license?.expires_at && new Date(license.expires_at) < new Date());\n  const isExpired = isExpiredFromBilling || isExpiredFromLicense;\n  const isCanceling = billing?.cancel_at_period_end;\n\n  let subtitle: string;\n  if (isExpired) {\n    subtitle = `Expired on ${formattedDate}`;\n  } else if (isCanceling) {\n    subtitle = `Valid until ${formattedDate}`;\n  } else if (billing) {\n    subtitle = `Next payment on ${formattedDate}`;\n  } else {\n    subtitle = `Valid until ${formattedDate}`;\n  }\n\n  const handleManagePlan = async () => {\n    try {\n      const response = await createCustomerPortalSession({\n        return_url: `${window.location.origin}/admin/billing?portal_return=true`,\n      });\n      if (response.stripe_customer_portal_url) {\n        window.location.href = response.stripe_customer_portal_url;\n      }\n    } catch (error) {\n      console.error(\"Failed to open customer portal:\", error);\n    }\n  };\n\n  const handleReconnect = async () => {\n    setIsReconnecting(true);\n    try {\n      await resetStripeConnection();\n      await onReconnect?.();\n    } catch (error) {\n      console.error(\"Failed to reconnect to Stripe:\", error);\n    } finally {\n      setIsReconnecting(false);\n    }\n  };\n\n  return (\n    <Card>\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems=\"start\"\n        height=\"auto\"\n      >\n        <Section gap={0.25} alignItems=\"start\" height=\"auto\" width=\"auto\">\n          <PlanIcon className=\"w-5 h-5\" />\n          <Text headingH3Muted text04>\n            {planName}\n          </Text>\n          <Text secondaryBody text03>\n            {subtitle}\n          </Text>\n        </Section>\n        <Section\n          flexDirection=\"column\"\n          gap={0.25}\n          alignItems=\"end\"\n          height=\"auto\"\n          width=\"fit\"\n        >\n          {isManualLicenseOnly ? (\n            <Text secondaryBody text03 className=\"text-right\">\n              Your plan is managed through sales.\n              <br />\n              <a\n                href=\"mailto:support@onyx.app?subject=Billing%20change%20request\"\n                className=\"underline\"\n              >\n                Contact billing\n              </a>{\" \"}\n              to make changes.\n            </Text>\n          ) : disabled ? (\n            <OpalButton\n              disabled={isReconnecting}\n              prominence=\"secondary\"\n              onClick={handleReconnect}\n              rightIcon={SvgArrowRight}\n            >\n              {isReconnecting ? \"Connecting...\" : \"Connect to Stripe\"}\n            </OpalButton>\n          ) : (\n            <OpalButton onClick={handleManagePlan} rightIcon={SvgExternalLink}>\n              Manage Plan\n            </OpalButton>\n          )}\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <Button tertiary onClick={onViewPlans} className=\"billing-text-link\">\n            <Text secondaryBody text03>\n              View Plan Details\n            </Text>\n          </Button>\n        </Section>\n      </Section>\n    </Card>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// SeatsCard\n// ----------------------------------------------------------------------------\n\nfunction SeatsCard({\n  billing,\n  license,\n  onRefresh,\n  disabled,\n  hideUpdateSeats,\n}: {\n  billing?: BillingInformation;\n  license?: LicenseStatus;\n  onRefresh?: () => Promise<void>;\n  disabled?: boolean;\n  hideUpdateSeats?: boolean;\n}) {\n  const [isEditing, setIsEditing] = useState(false);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const { data: usersData, isLoading: isLoadingUsers } = useUsers({\n    includeApiKeys: false,\n  });\n\n  const totalSeats = billing?.seats ?? license?.seats ?? 0;\n  const acceptedUsers =\n    usersData?.accepted?.filter((u) => u.is_active).length ?? 0;\n  const slackUsers =\n    usersData?.slack_users?.filter((u) => u.is_active).length ?? 0;\n  const usedSeats = acceptedUsers + slackUsers;\n  const pendingSeats = usersData?.invited?.length ?? 0;\n  const remainingSeats = Math.max(0, totalSeats - usedSeats - pendingSeats);\n\n  const [newSeatCount, setNewSeatCount] = useState(totalSeats);\n  const minRequiredSeats = usedSeats + pendingSeats;\n  const isBelowMinimum = newSeatCount < minRequiredSeats;\n\n  const handleStartEdit = () => {\n    setNewSeatCount(totalSeats);\n    setError(null);\n    setIsEditing(true);\n  };\n\n  const handleCancel = () => {\n    setIsEditing(false);\n    setError(null);\n  };\n\n  const handleConfirm = async () => {\n    if (newSeatCount === totalSeats) {\n      setIsEditing(false);\n      return;\n    }\n    if (isBelowMinimum) return;\n\n    setIsSubmitting(true);\n    setError(null);\n\n    try {\n      await updateSeatCount({ new_seat_count: newSeatCount });\n      if (!NEXT_PUBLIC_CLOUD_ENABLED) {\n        // Wait for control plane to process the subscription update before claiming\n        await new Promise((resolve) => setTimeout(resolve, 1500));\n        await claimLicense();\n        // Force refresh the Redis cache from the database\n        await refreshLicenseCache();\n      }\n      await onRefresh?.();\n      setIsEditing(false);\n    } catch (err) {\n      setError(err instanceof Error ? err.message : \"Failed to update seats\");\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  const seatDifference = newSeatCount - totalSeats;\n  const isAdding = seatDifference > 0;\n  const isRemoving = seatDifference < 0;\n  const nextBillingDate = formatDateShort(billing?.current_period_end);\n  const seatCount = Math.abs(seatDifference);\n  const seatWord = seatCount === 1 ? \"seat\" : \"seats\";\n\n  if (isEditing) {\n    return (\n      <Card\n        padding={0}\n        gap={0}\n        alignItems=\"stretch\"\n        className=\"billing-card-enter\"\n      >\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"start\"\n          padding={1}\n          height=\"auto\"\n        >\n          <Content\n            title=\"Update Seats\"\n            description=\"Add or remove seats to reflect your team size.\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n          />\n          <OpalButton\n            disabled={isSubmitting}\n            prominence=\"secondary\"\n            onClick={handleCancel}\n          >\n            Cancel\n          </OpalButton>\n        </Section>\n\n        <div className=\"billing-content-area\">\n          <Section\n            flexDirection=\"column\"\n            alignItems=\"stretch\"\n            gap={0.25}\n            padding={1}\n            height=\"auto\"\n          >\n            <InputLayouts.Vertical title=\"Seats\">\n              <InputNumber\n                value={newSeatCount}\n                onChange={(v) => setNewSeatCount(v ?? 1)}\n                min={1}\n                defaultValue={totalSeats}\n                showReset\n                variant={isBelowMinimum ? \"error\" : \"primary\"}\n              />\n            </InputLayouts.Vertical>\n\n            {isBelowMinimum ? (\n              <InputLayouts.ErrorTextLayout type=\"error\">\n                You cannot set seats below current{\" \"}\n                <span className=\"font-semibold\">{minRequiredSeats}</span> seats\n                in use/pending.{\" \"}\n                <Link\n                  href=\"/admin/users\"\n                  className=\"underline hover:no-underline\"\n                >\n                  Remove users\n                </Link>{\" \"}\n                first before adjusting seats.\n              </InputLayouts.ErrorTextLayout>\n            ) : seatDifference !== 0 ? (\n              <Text secondaryBody text03>\n                {Math.abs(seatDifference)} seat\n                {Math.abs(seatDifference) !== 1 ? \"s\" : \"\"} to be{\" \"}\n                {isAdding ? \"added\" : \"removed\"}\n              </Text>\n            ) : null}\n\n            {error && (\n              <Text secondaryBody className=\"billing-error-text\">\n                {error}\n              </Text>\n            )}\n          </Section>\n        </div>\n\n        <Section\n          flexDirection=\"row\"\n          alignItems=\"center\"\n          justifyContent=\"between\"\n          padding={1}\n          height=\"auto\"\n        >\n          {isAdding ? (\n            <Text secondaryBody text03>\n              You will be billed for the{\" \"}\n              <Text secondaryBody text04>\n                {seatCount}\n              </Text>{\" \"}\n              additional {seatWord} at a pro-rated amount.\n            </Text>\n          ) : isRemoving ? (\n            <Text secondaryBody text03>\n              <Text secondaryBody text04>\n                {seatCount}\n              </Text>{\" \"}\n              {seatWord} will be removed on{\" \"}\n              <Text secondaryBody text04>\n                {nextBillingDate}\n              </Text>{\" \"}\n              (after current billing cycle).\n            </Text>\n          ) : (\n            <Text secondaryBody text03>\n              No changes to your billing.\n            </Text>\n          )}\n          <OpalButton\n            disabled={\n              isSubmitting || newSeatCount === totalSeats || isBelowMinimum\n            }\n            onClick={handleConfirm}\n          >\n            {isSubmitting ? \"Saving...\" : \"Confirm Change\"}\n          </OpalButton>\n        </Section>\n      </Card>\n    );\n  }\n\n  return (\n    <Card>\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems=\"center\"\n        height=\"auto\"\n      >\n        <Section gap={0.25} alignItems=\"start\" height=\"auto\" width=\"auto\">\n          <Text mainContentMuted text04>\n            {totalSeats} Seats\n          </Text>\n          <Text secondaryBody text03>\n            {usedSeats} in use • {pendingSeats} pending • {remainingSeats}{\" \"}\n            remaining\n          </Text>\n        </Section>\n        <Section\n          flexDirection=\"row\"\n          gap={0.5}\n          justifyContent=\"end\"\n          height=\"auto\"\n          width=\"auto\"\n        >\n          <OpalButton\n            prominence=\"tertiary\"\n            href=\"/admin/users\"\n            icon={SvgExternalLink}\n          >\n            View Users\n          </OpalButton>\n          {!hideUpdateSeats && (\n            <OpalButton\n              disabled={isLoadingUsers || disabled || !billing}\n              prominence=\"secondary\"\n              onClick={handleStartEdit}\n              icon={SvgPlus}\n            >\n              Update Seats\n            </OpalButton>\n          )}\n        </Section>\n      </Section>\n    </Card>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// PaymentSection\n// ----------------------------------------------------------------------------\n\nfunction PaymentSection({ billing }: { billing: BillingInformation }) {\n  const handleOpenPortal = async () => {\n    try {\n      const response = await createCustomerPortalSession({\n        return_url: `${window.location.origin}/admin/billing?portal_return=true`,\n      });\n      if (response.stripe_customer_portal_url) {\n        window.location.href = response.stripe_customer_portal_url;\n      }\n    } catch (error) {\n      console.error(\"Failed to open customer portal:\", error);\n    }\n  };\n\n  if (!billing.payment_method_enabled) return null;\n\n  const lastPaymentDate = formatDateShort(billing.current_period_start);\n\n  return (\n    <div className=\"billing-payment-section\">\n      <Section alignItems=\"start\" height=\"auto\" width=\"full\">\n        <Text mainContentEmphasis>Payment</Text>\n        <Section\n          flexDirection=\"row\"\n          gap={0.5}\n          alignItems=\"stretch\"\n          height=\"auto\"\n        >\n          <Card className=\"billing-payment-card\">\n            <Section\n              flexDirection=\"row\"\n              justifyContent=\"between\"\n              alignItems=\"start\"\n              height=\"auto\"\n            >\n              <InfoBlock\n                icon={SvgWallet}\n                title=\"Visa ending in 1234\"\n                description=\"Payment method\"\n              />\n              <OpalButton\n                prominence=\"tertiary\"\n                onClick={handleOpenPortal}\n                rightIcon={SvgExternalLink}\n              >\n                Update\n              </OpalButton>\n            </Section>\n          </Card>\n          {lastPaymentDate && (\n            <Card className=\"billing-payment-card\">\n              <Section\n                flexDirection=\"row\"\n                justifyContent=\"between\"\n                alignItems=\"start\"\n                height=\"auto\"\n              >\n                <InfoBlock\n                  icon={SvgFileText}\n                  title={lastPaymentDate}\n                  description=\"Last payment\"\n                />\n                <OpalButton\n                  prominence=\"tertiary\"\n                  onClick={handleOpenPortal}\n                  rightIcon={SvgExternalLink}\n                >\n                  View Invoice\n                </OpalButton>\n              </Section>\n            </Card>\n          )}\n        </Section>\n      </Section>\n    </div>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// BillingDetailsView\n// ----------------------------------------------------------------------------\n\ninterface BillingDetailsViewProps {\n  billing?: BillingInformation;\n  license?: LicenseStatus;\n  onViewPlans: () => void;\n  onRefresh?: () => Promise<void>;\n  isAirGapped?: boolean;\n  isManualLicenseOnly?: boolean;\n  hasStripeError?: boolean;\n  licenseCard?: React.ReactNode;\n}\n\nexport default function BillingDetailsView({\n  billing,\n  license,\n  onViewPlans,\n  onRefresh,\n  isAirGapped,\n  isManualLicenseOnly,\n  hasStripeError,\n  licenseCard,\n}: BillingDetailsViewProps) {\n  const expirationState = billing ? getExpirationState(billing, license) : null;\n  const disableBillingActions =\n    isAirGapped || hasStripeError || isManualLicenseOnly;\n\n  return (\n    <Section gap={1} height=\"auto\" width=\"full\">\n      {/* Stripe connection error banner */}\n      {hasStripeError && (\n        <Message\n          static\n          warning\n          text=\"Unable to connect to Stripe payment portal.\"\n          description=\"Check your internet connection or manually provide a license.\"\n          close={false}\n          className=\"w-full\"\n        />\n      )}\n\n      {/* Air-gapped mode info banner */}\n      {isAirGapped && !hasStripeError && !isManualLicenseOnly && (\n        <Message\n          static\n          info\n          text=\"Air-gapped deployment\"\n          description=\"Online billing management is disabled. Contact support to update your subscription.\"\n          close={false}\n          className=\"w-full\"\n        />\n      )}\n\n      {/* Expiration banner */}\n      {expirationState && (\n        <Message\n          static\n          warning={expirationState.variant === \"warning\"}\n          error={expirationState.variant === \"error\"}\n          text={\n            expirationState.variant === \"error\"\n              ? expirationState.daysUntilDeletion\n                ? `Your subscription has expired. Data will be deleted in ${expirationState.daysUntilDeletion} days.`\n                : \"Your subscription has expired.\"\n              : `Your subscription is expiring in ${expirationState.daysRemaining} days.`\n          }\n          description={\n            expirationState.variant === \"error\"\n              ? expirationState.expirationDate\n                ? `Renew your subscription by ${expirationState.expirationDate} to restore access.`\n                : \"Renew your subscription to restore access to paid features.\"\n              : `Renew your subscription by ${expirationState.expirationDate} to avoid disruption.`\n          }\n          close={false}\n          className=\"w-full\"\n        />\n      )}\n\n      {/* Subscription card */}\n      {(billing || license?.has_license) && (\n        <SubscriptionCard\n          billing={billing}\n          license={license}\n          onViewPlans={onViewPlans}\n          disabled={disableBillingActions}\n          isManualLicenseOnly={isManualLicenseOnly}\n          onReconnect={onRefresh}\n        />\n      )}\n\n      {/* License card (inline for manual license users) */}\n      {licenseCard}\n\n      {/* Seats card */}\n      <SeatsCard\n        billing={billing}\n        license={license}\n        onRefresh={onRefresh}\n        disabled={disableBillingActions}\n        hideUpdateSeats={isManualLicenseOnly}\n      />\n\n      {/* Payment section */}\n      {/* TODO: Re-enable payment section when APIs for fetching payment details are implemented */}\n      {/* {billing?.payment_method_enabled && !isAirGapped && <PaymentSection billing={billing} />} */}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/billing/CheckoutView.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo, useEffect } from \"react\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { SvgUsers, SvgCheck } from \"@opal/icons\";\nimport { createCheckoutSession } from \"@/lib/billing/svc\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { formatDateShort } from \"@/lib/dateUtils\";\nimport type { PlanType } from \"@/lib/billing/interfaces\";\nimport InputNumber from \"@/refresh-components/inputs/InputNumber\";\nimport useUsers from \"@/hooks/useUsers\";\n\n// ----------------------------------------------------------------------------\n// BillingOption\n// ----------------------------------------------------------------------------\n\ninterface BillingOptionProps {\n  selected: boolean;\n  onClick: () => void;\n  title: string;\n  price: number;\n  badge?: string;\n}\n\nfunction BillingOption({\n  selected,\n  onClick,\n  title,\n  price,\n  badge,\n}: BillingOptionProps) {\n  return (\n    <Card\n      onClick={onClick}\n      className=\"billing-option\"\n      data-selected={selected}\n      padding={0}\n    >\n      <Section\n        flexDirection=\"row\"\n        gap={0.5}\n        height=\"fit\"\n        justifyContent=\"between\"\n        alignItems=\"start\"\n      >\n        <Section\n          alignItems=\"start\"\n          justifyContent=\"center\"\n          gap={0}\n          height=\"fit\"\n          width=\"fit\"\n        >\n          <Text mainUiAction className=\"billing-option-title\">\n            {title}\n          </Text>\n          <div className=\"billing-option-price\">\n            <Text mainContentEmphasis text04>\n              ${price}\n            </Text>\n            <Text secondaryBody text03 nowrap>\n              per seat/month\n            </Text>\n          </div>\n        </Section>\n        {badge && (\n          <Section\n            flexDirection=\"row\"\n            gap={0.25}\n            alignItems=\"center\"\n            justifyContent=\"end\"\n            width=\"fit\"\n            height=\"fit\"\n          >\n            <Text secondaryAction className=\"billing-option-badge\">\n              {badge}\n            </Text>\n            <SvgCheck className=\"billing-option-check\" />\n          </Section>\n        )}\n      </Section>\n    </Card>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// CheckoutView\n// ----------------------------------------------------------------------------\n\ninterface CheckoutViewProps {\n  onAdjustPlan: () => void;\n}\n\nexport default function CheckoutView({ onAdjustPlan }: CheckoutViewProps) {\n  const { user } = useUser();\n  const { data: usersData } = useUsers({ includeApiKeys: false });\n\n  // Calculate minimum required seats based on current active users\n  const acceptedUsers =\n    usersData?.accepted?.filter((u) => u.is_active).length ?? 0;\n  const slackUsers =\n    usersData?.slack_users?.filter((u) => u.is_active).length ?? 0;\n  const minRequiredSeats = Math.max(1, acceptedUsers + slackUsers);\n\n  const [billingPeriod, setBillingPeriod] = useState<PlanType>(\"annual\");\n  const [seats, setSeats] = useState(minRequiredSeats);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  // Update seats if minRequiredSeats changes (e.g., after user data loads)\n  useEffect(() => {\n    if (seats < minRequiredSeats) {\n      setSeats(minRequiredSeats);\n    }\n  }, [minRequiredSeats, seats]);\n\n  const monthlyPrice = 25;\n  const annualPrice = 20;\n  const annualPriceSelected = billingPeriod === \"annual\";\n\n  const trialEndDate = useMemo(() => {\n    const date = new Date();\n    date.setMonth(date.getMonth() + 1);\n    return formatDateShort(date.toISOString());\n  }, []);\n\n  const handleSubmit = async () => {\n    setIsSubmitting(true);\n    setError(null);\n\n    try {\n      const response = await createCheckoutSession({\n        billing_period: billingPeriod,\n        seats,\n        email: user?.email,\n      });\n\n      if (response.stripe_checkout_url) {\n        window.location.href = response.stripe_checkout_url;\n      } else {\n        throw new Error(\"Invalid response from checkout session\");\n      }\n    } catch (err) {\n      console.error(\"Error creating checkout session:\", err);\n      setError(\n        err instanceof Error ? err.message : \"Failed to create checkout session\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  return (\n    <Card padding={0} gap={0} alignItems=\"stretch\">\n      {/* Header */}\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems=\"start\"\n        padding={1}\n        height=\"auto\"\n      >\n        <Section\n          flexDirection=\"column\"\n          alignItems=\"start\"\n          gap={0.25}\n          height=\"auto\"\n          width=\"fit\"\n        >\n          <SvgUsers size={24} />\n          <Text headingH2 text04>\n            Business\n          </Text>\n        </Section>\n        <Button prominence=\"secondary\" onClick={onAdjustPlan}>\n          Adjust Plan\n        </Button>\n      </Section>\n\n      {/* Content */}\n      <div className=\"billing-content-area\">\n        <Section\n          flexDirection=\"column\"\n          alignItems=\"stretch\"\n          gap={1}\n          padding={1}\n          height=\"auto\"\n        >\n          {/* Billing Cycle */}\n          <InputLayouts.Horizontal\n            title=\"Billing Cycle\"\n            description=\"after your 1-month free trial\"\n          >\n            <Section\n              flexDirection=\"row\"\n              gap={0.25}\n              width=\"fit\"\n              height=\"auto\"\n              justifyContent=\"start\"\n            >\n              <BillingOption\n                selected={billingPeriod === \"monthly\"}\n                onClick={() => setBillingPeriod(\"monthly\")}\n                title=\"Monthly\"\n                price={monthlyPrice}\n              />\n              <BillingOption\n                selected={billingPeriod === \"annual\"}\n                onClick={() => setBillingPeriod(\"annual\")}\n                title=\"Annual\"\n                price={annualPrice}\n                badge=\"Save 20%\"\n              />\n            </Section>\n          </InputLayouts.Horizontal>\n\n          <Separator noPadding />\n\n          {/* Seats */}\n          <InputLayouts.Horizontal\n            title=\"Seats\"\n            description={`Minimum ${minRequiredSeats} seat${\n              minRequiredSeats !== 1 ? \"s\" : \"\"\n            } required for your current users and Slack accounts.`}\n          >\n            <InputNumber\n              value={seats}\n              onChange={(v) => setSeats(v ?? minRequiredSeats)}\n              min={minRequiredSeats}\n              defaultValue={minRequiredSeats}\n              showReset\n            />\n          </InputLayouts.Horizontal>\n        </Section>\n      </div>\n\n      {/* Footer */}\n      <Section\n        flexDirection=\"row\"\n        alignItems=\"center\"\n        justifyContent=\"between\"\n        padding={1}\n        height=\"auto\"\n      >\n        {error ? (\n          <Text secondaryBody className=\"billing-error-text\">\n            {error}\n          </Text>\n        ) : !annualPriceSelected ? (\n          <Text secondaryBody text03>\n            You will be billed on{\" \"}\n            <Text secondaryBody text04>\n              {trialEndDate}\n            </Text>{\" \"}\n            After your 1-month free trial ends.\n          </Text>\n        ) : (\n          // Empty div to maintain space-between alignment\n          <div></div>\n        )}\n        <Button disabled={isSubmitting} onClick={handleSubmit}>\n          {isSubmitting ? \"Loading...\" : \"Continue to Payment\"}\n        </Button>\n      </Section>\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/billing/LicenseActivationCard.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputFile from \"@/refresh-components/inputs/InputFile\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { SvgXCircle, SvgCheckCircle, SvgXOctagon } from \"@opal/icons\";\nimport { uploadLicense } from \"@/lib/billing/svc\";\nimport { LicenseStatus } from \"@/lib/billing/interfaces\";\nimport { formatDateShort } from \"@/lib/dateUtils\";\n\nconst BILLING_HELP_URL = \"https://docs.onyx.app/more/billing\";\n\ninterface LicenseActivationCardProps {\n  isOpen: boolean;\n  onClose: () => void;\n  onSuccess: () => void;\n  license?: LicenseStatus;\n  hideClose?: boolean;\n}\n\nexport default function LicenseActivationCard({\n  isOpen,\n  onClose,\n  onSuccess,\n  license,\n  hideClose,\n}: LicenseActivationCardProps) {\n  const [licenseKey, setLicenseKey] = useState(\"\");\n  const [isActivating, setIsActivating] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n  const [success, setSuccess] = useState(false);\n  const [showInput, setShowInput] = useState(!license?.has_license);\n\n  const hasLicense = license?.has_license;\n  const isDateExpired = license?.expires_at\n    ? new Date(license.expires_at) < new Date()\n    : false;\n  const isExpired =\n    license?.status === \"expired\" ||\n    license?.status === \"gated_access\" ||\n    isDateExpired;\n  const expirationDate = license?.expires_at\n    ? formatDateShort(license.expires_at)\n    : null;\n\n  const handleActivate = async () => {\n    if (!licenseKey.trim()) {\n      setError(\"Please enter a license key\");\n      return;\n    }\n\n    setIsActivating(true);\n    setError(null);\n\n    try {\n      await uploadLicense(licenseKey.trim());\n      setSuccess(true);\n      setTimeout(() => {\n        onSuccess();\n        handleClose();\n      }, 1000);\n    } catch (err) {\n      console.error(\"Error activating license:\", err);\n      setError(\n        err instanceof Error ? err.message : \"Failed to activate license\"\n      );\n    } finally {\n      setIsActivating(false);\n    }\n  };\n\n  const handleClose = () => {\n    setLicenseKey(\"\");\n    setError(null);\n    setSuccess(false);\n    setShowInput(!license?.has_license);\n    onClose();\n  };\n\n  if (!isOpen) return null;\n\n  // License status view (when license exists and not editing)\n  if (hasLicense && !showInput) {\n    return (\n      <Card padding={1} alignItems=\"stretch\">\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"center\"\n          height=\"auto\"\n        >\n          <Section\n            flexDirection=\"column\"\n            alignItems=\"start\"\n            gap={0.5}\n            height=\"auto\"\n            width=\"auto\"\n          >\n            {isExpired ? (\n              <SvgXOctagon size={16} className=\"stroke-status-error-05\" />\n            ) : (\n              <SvgCheckCircle size={16} className=\"stroke-status-success-05\" />\n            )}\n            <Text secondaryBody text03>\n              {isExpired ? (\n                <>License key expired</>\n              ) : (\n                <>\n                  License key active until{\" \"}\n                  <Text secondaryBody text04>\n                    {expirationDate}\n                  </Text>\n                </>\n              )}\n            </Text>\n          </Section>\n          <Section flexDirection=\"row\" gap={0.5} height=\"auto\" width=\"auto\">\n            <Button prominence=\"secondary\" onClick={() => setShowInput(true)}>\n              Update Key\n            </Button>\n            {!hideClose && (\n              <Button prominence=\"tertiary\" onClick={handleClose}>\n                Close\n              </Button>\n            )}\n          </Section>\n        </Section>\n      </Card>\n    );\n  }\n\n  // License input form\n  return (\n    <Card padding={0} alignItems=\"stretch\" gap={0}>\n      {/* Header */}\n      <Section flexDirection=\"column\" alignItems=\"stretch\" gap={0} padding={1}>\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"center\"\n        >\n          <Text headingH3>\n            {hasLicense ? \"Update License Key\" : \"Activate License Key\"}\n          </Text>\n          <Button\n            disabled={isActivating}\n            prominence=\"secondary\"\n            onClick={handleClose}\n          >\n            Cancel\n          </Button>\n        </Section>\n        <Text secondaryBody text03>\n          Manually add and activate a license for this Onyx instance.\n        </Text>\n      </Section>\n\n      {/* Content */}\n      <div className=\"billing-content-area\">\n        <Section\n          flexDirection=\"column\"\n          alignItems=\"stretch\"\n          gap={0.5}\n          padding={1}\n        >\n          {success && (\n            <div className=\"billing-success-message\">\n              <Text secondaryBody>\n                License {hasLicense ? \"updated\" : \"activated\"} successfully!\n              </Text>\n            </div>\n          )}\n\n          <InputLayouts.Vertical\n            title=\"License Key\"\n            subDescription={\n              error\n                ? undefined\n                : \"Paste or attach your license key file you received from Onyx.\"\n            }\n          >\n            <InputFile\n              placeholder=\"eyJwYXlsb2FkIjogeyJ2ZXJzaW9...\"\n              setValue={(value) => {\n                setLicenseKey(value);\n                setError(null);\n              }}\n              error={!!error}\n              className=\"billing-license-input\"\n            />\n            {error && (\n              <Section\n                flexDirection=\"row\"\n                alignItems=\"center\"\n                justifyContent=\"start\"\n                gap={0.25}\n                height=\"auto\"\n              >\n                <div className=\"billing-error-icon\">\n                  <SvgXCircle />\n                </div>\n                <Text secondaryBody text04>\n                  {error}.{\" \"}\n                  <a\n                    href={BILLING_HELP_URL}\n                    target=\"_blank\"\n                    rel=\"noopener noreferrer\"\n                    className=\"billing-help-link\"\n                  >\n                    Billing Help\n                  </a>\n                </Text>\n              </Section>\n            )}\n          </InputLayouts.Vertical>\n        </Section>\n      </div>\n\n      {/* Footer */}\n      <Section flexDirection=\"row\" justifyContent=\"end\" padding={1}>\n        <Button\n          disabled={isActivating || !licenseKey.trim() || success}\n          onClick={handleActivate}\n        >\n          {isActivating\n            ? \"Activating...\"\n            : hasLicense\n              ? \"Update License\"\n              : \"Activate License\"}\n        </Button>\n      </Section>\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/billing/PlansView.tsx",
    "content": "\"use client\";\n\nimport {\n  SvgDashboard,\n  SvgHistory,\n  SvgFiles,\n  SvgGlobe,\n  SvgHardDrive,\n  SvgHeadsetMic,\n  SvgShareWebhook,\n  SvgKey,\n  SvgLock,\n  SvgPaintBrush,\n  SvgOrganization,\n  SvgServer,\n  SvgShield,\n  SvgSliders,\n  SvgUserManage,\n  SvgUsers,\n} from \"@opal/icons\";\nimport \"@/app/admin/billing/billing.css\";\nimport type { IconProps } from \"@opal/types\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nconst SALES_URL = \"https://www.onyx.app/contact-sales\";\n\n// ----------------------------------------------------------------------------\n// Types\n// ----------------------------------------------------------------------------\n\ninterface PlanFeature {\n  icon: React.FunctionComponent<IconProps>;\n  text: string;\n}\n\ninterface PlanConfig {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  pricing?: string;\n  description: string;\n  buttonLabel: string;\n  buttonVariant: \"primary\" | \"secondary\";\n  buttonIcon?: React.FunctionComponent<IconProps>;\n  onClick?: () => void;\n  href?: string;\n  features: PlanFeature[];\n  featuresPrefix: string;\n  isCurrentPlan?: boolean;\n}\n\n// ----------------------------------------------------------------------------\n// Plan Features\n// ----------------------------------------------------------------------------\n\nconst BUSINESS_FEATURES: PlanFeature[] = [\n  { icon: SvgFiles, text: \"Inherit Document Permissions\" },\n  { icon: SvgHistory, text: \"Query History and Usage Dashboard\" },\n  { icon: SvgShield, text: \"Role Based Access Control (RBAC)\" },\n  { icon: SvgLock, text: \"Encryption of Secrets\" },\n  { icon: SvgKey, text: \"Service Account API Keys\" },\n  { icon: SvgHardDrive, text: \"Self-hosting (Optional)\" },\n  { icon: SvgPaintBrush, text: \"Custom Theming\" },\n];\n\nconst ENTERPRISE_FEATURES: PlanFeature[] = [\n  { icon: SvgUsers, text: \"SCIM / Group Sync\" },\n  { icon: SvgDashboard, text: \"Full White-labeling\" },\n  { icon: SvgUserManage, text: \"Custom Roles and Permissions\" },\n  { icon: SvgSliders, text: \"Configurable Usage Limits\" },\n  { icon: SvgShareWebhook, text: \"Hook Extensions\" },\n  { icon: SvgServer, text: \"Custom Deployments\" },\n  { icon: SvgGlobe, text: \"Region-Specific Data Processing\" },\n  { icon: SvgHeadsetMic, text: \"Enterprise SLAs and Priority Support\" },\n];\n\n// ----------------------------------------------------------------------------\n// PlanCard (inlined)\n// ----------------------------------------------------------------------------\n\nfunction PlanCard({\n  icon: Icon,\n  title,\n  pricing,\n  description,\n  buttonLabel,\n  buttonIcon: ButtonIcon,\n  onClick,\n  href,\n  features,\n  featuresPrefix,\n  isCurrentPlan,\n  hideFeatures,\n}: PlanConfig & { hideFeatures?: boolean }) {\n  return (\n    <Card\n      padding={0}\n      gap={0}\n      alignItems=\"stretch\"\n      aria-label={title + \" plan card\"}\n      className=\"plan-card\"\n    >\n      <Section\n        flexDirection=\"column\"\n        alignItems=\"stretch\"\n        padding={1}\n        height=\"fit\"\n      >\n        {/* Title */}\n        <Section\n          flexDirection=\"column\"\n          alignItems=\"start\"\n          gap={0.25}\n          width=\"full\"\n        >\n          <Icon size={24} />\n          <Text headingH3 text04>\n            {title}\n          </Text>\n        </Section>\n\n        {/* Pricing */}\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"start\"\n          alignItems=\"center\"\n          gap={0.5}\n          height=\"auto\"\n        >\n          {pricing && (\n            <Text headingH2 text04>\n              {pricing}\n            </Text>\n          )}\n          <Text\n            secondaryBody\n            text03\n            className={\n              pricing ? \"whitespace-pre-line\" : \"whitespace-pre-line min-h-9\"\n            }\n          >\n            {description}\n          </Text>\n        </Section>\n\n        {/* Button */}\n        <div className=\"plan-card-button\">\n          {isCurrentPlan ? (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <Button tertiary transient className=\"pointer-events-none\">\n              <Text mainUiAction text03>\n                Your Current Plan\n              </Text>\n            </Button>\n          ) : href ? (\n            <OpalButton\n              prominence=\"secondary\"\n              href={href}\n              target=\"_blank\"\n              rel=\"noopener noreferrer\"\n            >\n              {buttonLabel}\n            </OpalButton>\n          ) : onClick ? (\n            <OpalButton onClick={onClick} icon={ButtonIcon}>\n              {buttonLabel}\n            </OpalButton>\n          ) : (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <Button tertiary transient className=\"pointer-events-none\">\n              <Text mainUiAction text03>\n                Included in your plan\n              </Text>\n            </Button>\n          )}\n        </div>\n      </Section>\n\n      {/* Features */}\n      <div\n        className=\"plan-card-features-container\"\n        data-hidden={hideFeatures ? \"true\" : \"false\"}\n      >\n        <Section\n          flexDirection=\"column\"\n          alignItems=\"start\"\n          justifyContent=\"start\"\n          gap={1}\n          padding={1}\n        >\n          <Text mainUiBody text03>\n            {featuresPrefix}\n          </Text>\n          <Section\n            flexDirection=\"column\"\n            alignItems=\"start\"\n            gap={0.5}\n            height=\"auto\"\n          >\n            {features.map((feature) => (\n              <Section\n                key={feature.text}\n                flexDirection=\"row\"\n                alignItems=\"start\"\n                justifyContent=\"start\"\n                gap={0.25}\n                width=\"fit\"\n                height=\"auto\"\n              >\n                <div className=\"plan-card-feature-icon\">\n                  <feature.icon size={16} className=\"stroke-text-03\" />\n                </div>\n                <Text mainUiBody text03>\n                  {feature.text}\n                </Text>\n              </Section>\n            ))}\n          </Section>\n        </Section>\n      </div>\n    </Card>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// PlansView\n// ----------------------------------------------------------------------------\n\ninterface PlansViewProps {\n  hasSubscription?: boolean;\n  hasLicense?: boolean;\n  onCheckout: () => void;\n  hideFeatures?: boolean;\n}\n\nexport default function PlansView({\n  hasSubscription,\n  hasLicense,\n  onCheckout,\n  hideFeatures,\n}: PlansViewProps) {\n  const plans: PlanConfig[] = [\n    {\n      icon: SvgUsers,\n      title: \"Business\",\n      pricing: \"$20\",\n      description:\n        \"per seat/month billed annually\\nor $25 per seat if billed monthly\",\n      buttonLabel: \"Get Business Plan\",\n      buttonVariant: \"primary\",\n      onClick: hasLicense ? undefined : onCheckout,\n      features: BUSINESS_FEATURES,\n      featuresPrefix: \"Get more work done with AI for your team.\",\n      isCurrentPlan: !!hasSubscription,\n    },\n    {\n      icon: SvgOrganization,\n      title: \"Enterprise\",\n      description:\n        \"Flexible pricing & deployment options\\nfor large organizations\",\n      buttonLabel: \"Contact Sales\",\n      buttonVariant: \"secondary\",\n      href: SALES_URL,\n      features: ENTERPRISE_FEATURES,\n      featuresPrefix: \"Everything in Business Plan, plus:\",\n      isCurrentPlan: !!hasLicense && !hasSubscription,\n    },\n  ];\n\n  return (\n    <Section flexDirection=\"row\" alignItems=\"stretch\" width=\"full\">\n      {plans.map((plan) => (\n        <PlanCard key={plan.title} {...plan} hideFeatures={hideFeatures} />\n      ))}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/billing/billing.css",
    "content": "/**\n * Billing page styles\n *\n * This file contains custom styles for billing components that cannot be achieved\n * with built-in component props. Prefer using component props over custom CSS.\n */\n\n/* -----------------------------------------------------------------------------\n * Plan Card\n * -------------------------------------------------------------------------- */\n\n.plan-card {\n  display: flex;\n  flex-direction: column;\n  overflow: hidden;\n  /* Let parent's align-items: stretch handle the height */\n  align-self: stretch;\n}\n\n/* Override Card's inner Section to grow and fill the card */\n.plan-card > div {\n  display: flex;\n  flex-direction: column;\n  flex: 1;\n}\n\n.plan-card-button button,\n.plan-card-button a {\n  width: 100%;\n}\n\n.plan-card-features-container {\n  display: flex;\n  flex-direction: column;\n  overflow: hidden;\n  transition:\n    max-height 0.25s ease-out,\n    opacity 0.2s ease-out;\n  background: var(--background-tint-01);\n  flex: 1;\n}\n\n.plan-card-features-container[data-hidden=\"true\"] {\n  max-height: 0;\n  opacity: 0;\n  flex: 0;\n}\n\n.plan-card-features-container[data-hidden=\"false\"] {\n  max-height: none;\n  opacity: 1;\n}\n\n.plan-card-feature-icon {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n  width: 1.25rem;\n  height: 1.25rem;\n  padding: 0.125rem;\n  flex-shrink: 0;\n}\n\n/* -----------------------------------------------------------------------------\n * Other Billing Components\n * -------------------------------------------------------------------------- */\n\n.billing-payment-card {\n  min-width: 17.5rem;\n  flex: 1;\n}\n\n.billing-license-input {\n  font-family: var(--font-mono);\n}\n\n.billing-error-icon {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n  width: 0.75rem;\n  height: 0.75rem;\n  flex-shrink: 0;\n  color: var(--status-error-05);\n}\n\n.billing-content-area {\n  background: var(--background-tint-01);\n}\n\n.billing-success-message {\n  width: 100%;\n  padding: 0.75rem;\n  background: var(--status-success-01);\n  border: 1px solid var(--status-success-02);\n  border-radius: 0.5rem;\n  color: var(--status-success-05);\n}\n\n/* -----------------------------------------------------------------------------\n * Checkout View\n * -------------------------------------------------------------------------- */\n\n.billing-option {\n  width: 14rem;\n  min-width: 10rem;\n  padding: 0.75rem;\n  border-radius: 0.5rem;\n  border: 1px solid var(--border-01);\n  background: var(--background-neutral-00);\n  text-align: left;\n  transition: border-color 0.15s ease;\n  cursor: pointer;\n}\n\n.billing-option:hover {\n  border-color: var(--border-02);\n}\n\n.billing-option[data-selected=\"true\"] {\n  border-color: var(--action-link-05);\n  background: var(--action-link-01);\n}\n\n.billing-option-title {\n  color: var(--text-04);\n}\n\n.billing-option-price {\n  display: flex;\n  flex-direction: row;\n  align-items: baseline;\n  gap: 0.25rem;\n}\n\n.billing-option[data-selected=\"true\"] .billing-option-title {\n  color: var(--action-link-05);\n}\n\n.billing-option-badge {\n  color: var(--action-link-05);\n}\n\n.billing-option-check {\n  width: 1rem;\n  height: 1rem;\n  stroke: var(--action-link-05);\n}\n\n.billing-error-text {\n  color: var(--status-error-05);\n}\n\n.billing-payment-section {\n  padding-top: 1rem;\n  width: 100%;\n}\n\n/* -----------------------------------------------------------------------------\n * View Transition Animations\n * -------------------------------------------------------------------------- */\n\n@keyframes billing-fade-in {\n  from {\n    opacity: 0;\n    transform: translateY(8px);\n  }\n  to {\n    opacity: 1;\n    transform: translateY(0);\n  }\n}\n\n.billing-view-enter {\n  animation: billing-fade-in 0.25s ease-out;\n}\n\n.billing-view-expand {\n  animation: billing-fade-in 0.25s ease-out;\n}\n\n.billing-view-collapse {\n  animation: billing-fade-in 0.25s ease-out;\n}\n\n.billing-card-enter {\n  animation: billing-fade-in 0.25s ease-out;\n}\n"
  },
  {
    "path": "web/src/app/admin/billing/page.test.tsx",
    "content": "/**\n * Tests for BillingPage handleBillingReturn retry logic.\n *\n * The retry logic retries claimLicense up to 3 times with 2s backoff\n * when returning from a Stripe checkout session. This prevents the user\n * from getting stranded when the Stripe webhook fires concurrently with\n * the browser redirect and the license isn't ready yet.\n */\nimport React from \"react\";\nimport { render, screen, waitFor } from \"@tests/setup/test-utils\";\nimport { act } from \"@testing-library/react\";\n\n// ---- Stable mock objects (must be named with mock* prefix for jest hoisting) ----\n// useRouter and useSearchParams must return the SAME reference each call, otherwise\n// React's useEffect sees them as changed and re-runs the effect on every render.\nconst mockRouter = {\n  replace: jest.fn() as jest.Mock,\n  refresh: jest.fn() as jest.Mock,\n};\nconst mockSearchParams = {\n  get: jest.fn() as jest.Mock,\n};\nconst mockClaimLicense = jest.fn() as jest.Mock;\nconst mockRefreshBilling = jest.fn() as jest.Mock;\nconst mockRefreshLicense = jest.fn() as jest.Mock;\n\n// ---- Mocks ----\n\njest.mock(\"next/navigation\", () => ({\n  useRouter: () => mockRouter,\n  useSearchParams: () => mockSearchParams,\n}));\n\njest.mock(\"@/layouts/settings-layouts\", () => ({\n  Root: ({ children }: { children: React.ReactNode }) => (\n    <div data-testid=\"settings-root\">{children}</div>\n  ),\n  Header: () => <div data-testid=\"settings-header\" />,\n  Body: ({ children }: { children: React.ReactNode }) => (\n    <div data-testid=\"settings-body\">{children}</div>\n  ),\n}));\n\njest.mock(\"@/layouts/general-layouts\", () => ({\n  Section: ({ children }: { children: React.ReactNode }) => (\n    <div>{children}</div>\n  ),\n}));\n\njest.mock(\"@opal/icons\", () => ({\n  SvgArrowUpCircle: () => <svg />,\n  SvgWallet: () => <svg />,\n}));\n\njest.mock(\"./PlansView\", () => ({\n  __esModule: true,\n  default: () => <div data-testid=\"plans-view\" />,\n}));\njest.mock(\"./CheckoutView\", () => ({\n  __esModule: true,\n  default: () => <div data-testid=\"checkout-view\" />,\n}));\njest.mock(\"./BillingDetailsView\", () => ({\n  __esModule: true,\n  default: () => <div data-testid=\"billing-details-view\" />,\n}));\njest.mock(\"./LicenseActivationCard\", () => ({\n  __esModule: true,\n  default: () => <div data-testid=\"license-activation-card\" />,\n}));\n\njest.mock(\"@/refresh-components/messages/Message\", () => ({\n  __esModule: true,\n  default: ({\n    text,\n    description,\n    onClose,\n  }: {\n    text: string;\n    description?: string;\n    onClose?: () => void;\n  }) => (\n    <div data-testid=\"activating-banner\">\n      <span data-testid=\"activating-banner-text\">{text}</span>\n      {description && (\n        <span data-testid=\"activating-banner-description\">{description}</span>\n      )}\n      {onClose && (\n        <button data-testid=\"activating-banner-close\" onClick={onClose}>\n          Close\n        </button>\n      )}\n    </div>\n  ),\n}));\n\njest.mock(\"@/lib/billing\", () => ({\n  useBillingInformation: jest.fn(),\n  useLicense: jest.fn(),\n  hasActiveSubscription: jest.fn().mockReturnValue(false),\n  claimLicense: (...args: unknown[]) => mockClaimLicense(...args),\n}));\n\njest.mock(\"@/lib/constants\", () => ({\n  NEXT_PUBLIC_CLOUD_ENABLED: false,\n}));\n\n// ---- Import after mocks ----\nimport BillingPage from \"./page\";\nimport { useBillingInformation, useLicense } from \"@/lib/billing\";\n\n// ---- Test helpers ----\n\nfunction setupHooks() {\n  (useBillingInformation as jest.Mock).mockReturnValue({\n    data: null,\n    isLoading: false,\n    error: null,\n    refresh: mockRefreshBilling,\n  });\n  (useLicense as jest.Mock).mockReturnValue({\n    data: null,\n    isLoading: false,\n    refresh: mockRefreshLicense,\n  });\n}\n\n// ---- Tests ----\n\ndescribe(\"BillingPage — handleBillingReturn retry logic\", () => {\n  beforeEach(() => {\n    jest.clearAllMocks();\n    jest.useFakeTimers();\n    setupHooks();\n    // Default: no billing-return params\n    mockSearchParams.get.mockReturnValue(null);\n    // Clear any activating state from prior tests\n    sessionStorage.clear();\n  });\n\n  afterEach(() => {\n    jest.useRealTimers();\n    jest.restoreAllMocks();\n  });\n\n  test(\"calls claimLicense once and refreshes on first-attempt success\", async () => {\n    mockSearchParams.get.mockImplementation((key: string) =>\n      key === \"session_id\" ? \"cs_test_123\" : null\n    );\n    mockClaimLicense.mockResolvedValueOnce({ success: true });\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    await waitFor(() => {\n      expect(mockClaimLicense).toHaveBeenCalledTimes(1);\n      expect(mockClaimLicense).toHaveBeenCalledWith(\"cs_test_123\");\n    });\n    expect(mockRouter.refresh).toHaveBeenCalled();\n    expect(mockRefreshBilling).toHaveBeenCalled();\n    // URL cleaned up after checkout return\n    expect(mockRouter.replace).toHaveBeenCalledWith(\"/admin/billing\", {\n      scroll: false,\n    });\n  });\n\n  test(\"retries after first failure and succeeds on second attempt\", async () => {\n    mockSearchParams.get.mockImplementation((key: string) =>\n      key === \"session_id\" ? \"cs_retry_test\" : null\n    );\n    mockClaimLicense\n      .mockRejectedValueOnce(new Error(\"License not ready yet\"))\n      .mockResolvedValueOnce({ success: true });\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    await waitFor(() => {\n      expect(mockClaimLicense).toHaveBeenCalledTimes(2);\n    });\n    // On eventual success, router and billing should be refreshed\n    expect(mockRouter.refresh).toHaveBeenCalled();\n    expect(mockRefreshBilling).toHaveBeenCalled();\n  });\n\n  test(\"retries all 3 times then navigates to details even on total failure\", async () => {\n    mockSearchParams.get.mockImplementation((key: string) =>\n      key === \"session_id\" ? \"cs_all_fail\" : null\n    );\n    // All 3 attempts fail\n    mockClaimLicense.mockRejectedValue(new Error(\"Webhook not processed yet\"));\n\n    const consoleSpy = jest\n      .spyOn(console, \"error\")\n      .mockImplementation(() => {});\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    await waitFor(() => {\n      expect(mockClaimLicense).toHaveBeenCalledTimes(3);\n    });\n    // User stays on plans view with the activating banner\n    await waitFor(() => {\n      expect(screen.getByTestId(\"plans-view\")).toBeInTheDocument();\n    });\n    // refreshBilling still fires so billing state is up to date\n    expect(mockRefreshBilling).toHaveBeenCalled();\n    // Failure is logged\n    expect(consoleSpy).toHaveBeenCalledWith(\n      expect.stringContaining(\"Failed to sync license after billing return\"),\n      expect.any(Error)\n    );\n\n    consoleSpy.mockRestore();\n  });\n\n  test(\"calls claimLicense without session_id on portal_return\", async () => {\n    mockSearchParams.get.mockImplementation((key: string) =>\n      key === \"portal_return\" ? \"true\" : null\n    );\n    mockClaimLicense.mockResolvedValueOnce({ success: true });\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    await waitFor(() => {\n      expect(mockClaimLicense).toHaveBeenCalledTimes(1);\n      // No session_id for portal returns — called with undefined\n      expect(mockClaimLicense).toHaveBeenCalledWith(undefined);\n    });\n    expect(mockRefreshBilling).toHaveBeenCalled();\n  });\n\n  test(\"does not call claimLicense when no billing-return params present\", async () => {\n    mockSearchParams.get.mockReturnValue(null);\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    expect(mockClaimLicense).not.toHaveBeenCalled();\n  });\n\n  test(\"shows activating banner and sets sessionStorage on 3x retry failure\", async () => {\n    mockSearchParams.get.mockImplementation((key: string) =>\n      key === \"session_id\" ? \"cs_all_fail\" : null\n    );\n    mockClaimLicense.mockRejectedValue(new Error(\"Webhook not processed yet\"));\n\n    const consoleSpy = jest\n      .spyOn(console, \"error\")\n      .mockImplementation(() => {});\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    await waitFor(() => {\n      expect(screen.getByTestId(\"activating-banner\")).toBeInTheDocument();\n    });\n    expect(screen.getByTestId(\"activating-banner-text\")).toHaveTextContent(\n      \"Your license is still activating\"\n    );\n    expect(\n      sessionStorage.getItem(\"billing_license_activating_until\")\n    ).not.toBeNull();\n\n    consoleSpy.mockRestore();\n  });\n\n  test(\"banner not rendered when no activating state\", async () => {\n    mockSearchParams.get.mockReturnValue(null);\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    expect(screen.queryByTestId(\"activating-banner\")).not.toBeInTheDocument();\n  });\n\n  test(\"banner shown on mount when sessionStorage key is set and not expired\", async () => {\n    sessionStorage.setItem(\n      \"billing_license_activating_until\",\n      String(Date.now() + 120_000)\n    );\n    mockSearchParams.get.mockReturnValue(null);\n\n    render(<BillingPage />);\n\n    // Flush React effects — banner is visible from lazy state init, no timer advancement needed\n    await act(async () => {});\n\n    expect(screen.getByTestId(\"activating-banner\")).toBeInTheDocument();\n  });\n\n  test(\"banner not shown on mount when sessionStorage key is expired\", async () => {\n    sessionStorage.setItem(\n      \"billing_license_activating_until\",\n      String(Date.now() - 1000)\n    );\n    mockSearchParams.get.mockReturnValue(null);\n\n    render(<BillingPage />);\n\n    await act(async () => {\n      await jest.runAllTimersAsync();\n    });\n\n    expect(screen.queryByTestId(\"activating-banner\")).not.toBeInTheDocument();\n    expect(\n      sessionStorage.getItem(\"billing_license_activating_until\")\n    ).toBeNull();\n  });\n\n  test(\"poll calls claimLicense after 15s and clears banner on success\", async () => {\n    sessionStorage.setItem(\n      \"billing_license_activating_until\",\n      String(Date.now() + 120_000)\n    );\n    mockSearchParams.get.mockReturnValue(null);\n    // Poll attempt succeeds\n    mockClaimLicense.mockResolvedValueOnce({ success: true });\n\n    render(<BillingPage />);\n\n    // Flush effects — banner visible from lazy state init\n    await act(async () => {});\n    expect(screen.getByTestId(\"activating-banner\")).toBeInTheDocument();\n\n    // Advance past one poll interval (15s)\n    await act(async () => {\n      await jest.advanceTimersByTimeAsync(15_000);\n    });\n\n    expect(mockClaimLicense).toHaveBeenCalledWith(undefined);\n    expect(screen.queryByTestId(\"activating-banner\")).not.toBeInTheDocument();\n    expect(\n      sessionStorage.getItem(\"billing_license_activating_until\")\n    ).toBeNull();\n    expect(mockRefreshBilling).toHaveBeenCalled();\n    expect(mockRefreshLicense).toHaveBeenCalled();\n    expect(mockRouter.refresh).toHaveBeenCalled();\n  });\n\n  test(\"close button removes banner and clears sessionStorage\", async () => {\n    sessionStorage.setItem(\n      \"billing_license_activating_until\",\n      String(Date.now() + 120_000)\n    );\n    mockSearchParams.get.mockReturnValue(null);\n\n    render(<BillingPage />);\n\n    // Flush effects — banner visible from lazy state init\n    await act(async () => {});\n    expect(screen.getByTestId(\"activating-banner\")).toBeInTheDocument();\n\n    const closeButton = screen.getByTestId(\"activating-banner-close\");\n    await act(async () => {\n      closeButton.click();\n    });\n\n    expect(screen.queryByTestId(\"activating-banner\")).not.toBeInTheDocument();\n    expect(\n      sessionStorage.getItem(\"billing_license_activating_until\")\n    ).toBeNull();\n  });\n});\n"
  },
  {
    "path": "web/src/app/admin/billing/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { useSearchParams, useRouter } from \"next/navigation\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgArrowUpCircle, SvgWallet } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\nimport {\n  useBillingInformation,\n  useLicense,\n  BillingInformation,\n  hasActiveSubscription,\n  claimLicense,\n} from \"@/lib/billing\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport Message from \"@/refresh-components/messages/Message\";\n\nimport PlansView from \"./PlansView\";\nimport CheckoutView from \"./CheckoutView\";\nimport BillingDetailsView from \"./BillingDetailsView\";\nimport LicenseActivationCard from \"./LicenseActivationCard\";\nimport \"./billing.css\";\n\n// sessionStorage key: value is a unix-ms expiry timestamp\nconst BILLING_ACTIVATING_KEY = \"billing_license_activating_until\";\n\n// ----------------------------------------------------------------------------\n// Types\n// ----------------------------------------------------------------------------\n\ntype BillingView = \"plans\" | \"details\" | \"checkout\" | null;\n\ninterface ViewConfig {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  showBackButton: boolean;\n}\n\n// ----------------------------------------------------------------------------\n// FooterLinks (inlined)\n// ----------------------------------------------------------------------------\n\nconst SUPPORT_EMAIL = \"support@onyx.app\";\n\nfunction FooterLinks({\n  hasSubscription,\n  onActivateLicense,\n  hideLicenseLink,\n}: {\n  hasSubscription?: boolean;\n  onActivateLicense?: () => void;\n  hideLicenseLink?: boolean;\n}) {\n  const { user } = useUser();\n  const licenseText = hasSubscription\n    ? \"Update License Key\"\n    : \"Activate License Key\";\n  const billingHelpHref = `mailto:${SUPPORT_EMAIL}?subject=${encodeURIComponent(\n    `[Billing] support for ${user?.email ?? \"unknown\"}`\n  )}`;\n\n  return (\n    <Section flexDirection=\"row\" justifyContent=\"center\" gap={1} height=\"auto\">\n      {onActivateLicense && !hideLicenseLink && (\n        <>\n          <Text secondaryBody text03>\n            Have a license key?\n          </Text>\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <Button action tertiary onClick={onActivateLicense}>\n            <Text secondaryBody text05 className=\"underline\">\n              {licenseText}\n            </Text>\n          </Button>\n        </>\n      )}\n      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n      <Button\n        action\n        tertiary\n        href={billingHelpHref}\n        className=\"billing-text-link\"\n      >\n        <Text secondaryBody text03 className=\"underline\">\n          Billing Help\n        </Text>\n      </Button>\n    </Section>\n  );\n}\n\n// ----------------------------------------------------------------------------\n// BillingPage\n// ----------------------------------------------------------------------------\n\nexport default function BillingPage() {\n  const router = useRouter();\n  const searchParams = useSearchParams();\n  // Start with null view to prevent flash - will be set once data loads\n  const [view, setView] = useState<BillingView | null>(null);\n  const [showLicenseActivationInput, setShowLicenseActivationInput] =\n    useState(false);\n  const [licenseCardAutoOpened, setLicenseCardAutoOpened] = useState(false);\n  const [viewChangeId, setViewChangeId] = useState(0);\n  const [transitionType, setTransitionType] = useState<\n    \"expand\" | \"collapse\" | \"fade\"\n  >(\"fade\");\n  const [isActivating, setIsActivating] = useState<boolean>(false);\n\n  const {\n    data: billingData,\n    isLoading: billingLoading,\n    error: billingError,\n    refresh: refreshBilling,\n  } = useBillingInformation();\n  const {\n    data: licenseData,\n    isLoading: licenseLoading,\n    refresh: refreshLicense,\n  } = useLicense();\n\n  const isLoading = billingLoading || licenseLoading;\n  const hasSubscription = billingData && hasActiveSubscription(billingData);\n  const billing = hasSubscription ? (billingData as BillingInformation) : null;\n  const isSelfHosted = !NEXT_PUBLIC_CLOUD_ENABLED;\n\n  const hasManualLicense = licenseData?.source === \"manual_upload\";\n\n  // Air-gapped: billing endpoint is unreachable (manual license + connectivity error)\n  const isAirGapped = !!(hasManualLicense && billingError);\n\n  // Stripe error: auto-fetched license but billing endpoint is unreachable\n  const hasStripeError = !!(\n    isSelfHosted &&\n    licenseData?.has_license &&\n    billingError &&\n    !hasManualLicense\n  );\n\n  // Manual license without active Stripe subscription\n  // Stripe-dependent actions (manage plan, update seats) won't work\n  const isManualLicenseOnly = !!(hasManualLicense && !hasSubscription);\n\n  // Set initial view based on subscription status (only once when data first loads)\n  useEffect(() => {\n    if (!isLoading && view === null) {\n      const shouldShowDetails =\n        hasSubscription || (isSelfHosted && licenseData?.has_license);\n      setView(shouldShowDetails ? \"details\" : \"plans\");\n    }\n  }, [\n    isLoading,\n    hasSubscription,\n    isSelfHosted,\n    licenseData?.has_license,\n    view,\n  ]);\n\n  // Read activating state from sessionStorage after mount (avoids SSR hydration mismatch)\n  useEffect(() => {\n    const raw = sessionStorage.getItem(BILLING_ACTIVATING_KEY);\n    if (!raw) return;\n    if (Number(raw) > Date.now()) {\n      setIsActivating(true);\n    } else {\n      sessionStorage.removeItem(BILLING_ACTIVATING_KEY);\n    }\n  }, []);\n\n  // Show license activation card when there's a Stripe error\n  useEffect(() => {\n    if (hasStripeError && !showLicenseActivationInput) {\n      setLicenseCardAutoOpened(true);\n      setShowLicenseActivationInput(true);\n    }\n  }, [hasStripeError, showLicenseActivationInput]);\n\n  // Handle return from checkout or customer portal\n  useEffect(() => {\n    const sessionId = searchParams.get(\"session_id\");\n    const portalReturn = searchParams.get(\"portal_return\");\n\n    if (!sessionId && !portalReturn) return;\n\n    router.replace(\"/admin/billing\", { scroll: false });\n\n    let cancelled = false;\n\n    const handleBillingReturn = async () => {\n      if (!NEXT_PUBLIC_CLOUD_ENABLED) {\n        // Retry up to 3 times with 2s backoff. The license may not be available\n        // immediately if the Stripe webhook hasn't finished processing yet\n        // (redirect and webhook fire nearly simultaneously).\n        let lastError: Error | null = null;\n        for (let attempt = 0; attempt < 3; attempt++) {\n          if (cancelled) return;\n          try {\n            // After checkout, exchange session_id for license; after portal, re-sync license\n            await claimLicense(sessionId ?? undefined);\n            if (cancelled) return;\n            refreshLicense();\n            // Refresh the page to update settings (including ee_features_enabled)\n            router.refresh();\n            // Navigate to billing details now that the license is active\n            changeView(\"details\");\n            lastError = null;\n            break;\n          } catch (err) {\n            lastError = err instanceof Error ? err : new Error(\"Unknown error\");\n            if (attempt < 2) {\n              await new Promise((resolve) => setTimeout(resolve, 2000));\n            }\n          }\n        }\n        if (cancelled) return;\n        if (lastError) {\n          console.error(\n            \"Failed to sync license after billing return:\",\n            lastError\n          );\n          // Show an activating banner on the plans view and keep retrying in the background.\n          sessionStorage.setItem(\n            BILLING_ACTIVATING_KEY,\n            String(Date.now() + 120_000)\n          );\n          setIsActivating(true);\n          changeView(\"plans\");\n        }\n      }\n      if (!cancelled) refreshBilling();\n    };\n    handleBillingReturn();\n\n    return () => {\n      cancelled = true;\n    };\n    // changeView intentionally omitted: it only calls stable state setters and the\n    // effect runs at most once (when session_id/portal_return params are present).\n  }, [searchParams, router, refreshBilling, refreshLicense]); // eslint-disable-line react-hooks/exhaustive-deps\n\n  // Poll every 15s while activating, up to 2 minutes, to detect when the license arrives.\n  useEffect(() => {\n    if (!isActivating) return;\n\n    let requestInFlight = false;\n\n    const intervalId = setInterval(async () => {\n      if (requestInFlight) return;\n      const raw = sessionStorage.getItem(BILLING_ACTIVATING_KEY);\n      if (!raw || Number(raw) <= Date.now()) {\n        // Expired — stop immediately without waiting for React cleanup\n        clearInterval(intervalId);\n        sessionStorage.removeItem(BILLING_ACTIVATING_KEY);\n        setIsActivating(false);\n        return;\n      }\n      requestInFlight = true;\n      try {\n        await claimLicense(undefined);\n        sessionStorage.removeItem(BILLING_ACTIVATING_KEY);\n        setIsActivating(false);\n        refreshLicense();\n        refreshBilling();\n        router.refresh();\n        changeView(\"details\");\n      } catch (err) {\n        // License not ready yet — keep polling. Log so unexpected failures\n        // (network errors, 500s) are distinguishable from expected 404s.\n        console.debug(\"License activation poll: will retry\", err);\n      } finally {\n        requestInFlight = false;\n      }\n    }, 15_000);\n\n    return () => clearInterval(intervalId);\n  }, [isActivating]); // eslint-disable-line react-hooks/exhaustive-deps\n\n  const handleRefresh = async () => {\n    await Promise.all([\n      refreshBilling(),\n      isSelfHosted ? refreshLicense() : Promise.resolve(),\n    ]);\n  };\n\n  // Hide license activation card when Stripe connection is restored (only if auto-opened)\n  useEffect(() => {\n    if (\n      !hasStripeError &&\n      !isAirGapped &&\n      showLicenseActivationInput &&\n      licenseCardAutoOpened &&\n      !isLoading\n    ) {\n      if (billingData && hasActiveSubscription(billingData)) {\n        setLicenseCardAutoOpened(false);\n        setShowLicenseActivationInput(false);\n      }\n    }\n  }, [\n    hasStripeError,\n    isAirGapped,\n    showLicenseActivationInput,\n    licenseCardAutoOpened,\n    isLoading,\n    billingData,\n  ]);\n\n  const handleLicenseActivated = () => {\n    refreshLicense();\n    refreshBilling();\n    // Refresh the page to update settings (including ee_features_enabled)\n    router.refresh();\n    // Navigate to billing details now that the license is active\n    changeView(\"details\");\n  };\n\n  // View configuration\n  const getViewConfig = (): ViewConfig => {\n    if (isLoading || view === null) {\n      return {\n        icon: SvgWallet,\n        title: \"Plans & Billing\",\n        showBackButton: false,\n      };\n    }\n    switch (view) {\n      case \"checkout\":\n        return {\n          icon: SvgArrowUpCircle,\n          title: \"Upgrade Plan\",\n          showBackButton: false,\n        };\n      case \"plans\":\n        return {\n          icon: hasSubscription ? SvgWallet : SvgArrowUpCircle,\n          title: hasSubscription ? \"View Plans\" : \"Upgrade Plan\",\n          showBackButton: !!(\n            hasSubscription ||\n            (isSelfHosted && licenseData?.has_license)\n          ),\n        };\n      case \"details\":\n        return {\n          icon: SvgWallet,\n          title: \"Plans & Billing\",\n          showBackButton: false,\n        };\n    }\n  };\n\n  const viewConfig = getViewConfig();\n\n  // Handle view changes with transition\n  const changeView = (newView: \"plans\" | \"details\" | \"checkout\") => {\n    if (newView === view) return;\n    if (newView === \"checkout\" && view === \"plans\") {\n      setTransitionType(\"expand\");\n    } else if (newView === \"plans\" && view === \"checkout\") {\n      setTransitionType(\"collapse\");\n    } else {\n      setTransitionType(\"fade\");\n    }\n    setViewChangeId((id) => id + 1);\n    setView(newView);\n  };\n\n  const handleBack = () => {\n    const hasEntitlement =\n      hasSubscription || (isSelfHosted && licenseData?.has_license);\n    if (view === \"checkout\") {\n      changeView(hasEntitlement ? \"details\" : \"plans\");\n    } else if (view === \"plans\" && hasEntitlement) {\n      changeView(\"details\");\n    }\n  };\n\n  const renderContent = () => {\n    if (isLoading || view === null) return null;\n\n    const animationClass =\n      transitionType === \"expand\"\n        ? \"billing-view-expand\"\n        : transitionType === \"collapse\"\n          ? \"billing-view-collapse\"\n          : \"billing-view-enter\";\n\n    const views: Record<typeof view, React.ReactNode> = {\n      checkout: <CheckoutView onAdjustPlan={() => changeView(\"plans\")} />,\n      plans: (\n        <PlansView\n          hasSubscription={!!hasSubscription}\n          hasLicense={!!licenseData?.has_license}\n          onCheckout={() => changeView(\"checkout\")}\n          hideFeatures={showLicenseActivationInput}\n        />\n      ),\n      details: (\n        <BillingDetailsView\n          billing={billing ?? undefined}\n          license={licenseData ?? undefined}\n          onViewPlans={() => changeView(\"plans\")}\n          onRefresh={handleRefresh}\n          isAirGapped={isAirGapped}\n          isManualLicenseOnly={isManualLicenseOnly}\n          hasStripeError={hasStripeError}\n          licenseCard={\n            isManualLicenseOnly ? (\n              <LicenseActivationCard\n                isOpen\n                onSuccess={handleLicenseActivated}\n                license={licenseData ?? undefined}\n                onClose={() => {}}\n                hideClose\n              />\n            ) : undefined\n          }\n        />\n      ),\n    };\n\n    return (\n      <div key={viewChangeId} className={`w-full ${animationClass}`}>\n        {views[view]}\n      </div>\n    );\n  };\n\n  // Render footer\n  const renderFooter = () => {\n    if (isLoading || view === null) return null;\n    return (\n      <>\n        {showLicenseActivationInput && !isManualLicenseOnly && (\n          <div className=\"w-full billing-card-enter\">\n            <LicenseActivationCard\n              isOpen={showLicenseActivationInput}\n              onSuccess={handleLicenseActivated}\n              license={licenseData ?? undefined}\n              onClose={() => {\n                setLicenseCardAutoOpened(false);\n                setShowLicenseActivationInput(false);\n              }}\n            />\n          </div>\n        )}\n        <FooterLinks\n          hasSubscription={!!hasSubscription || !!licenseData?.has_license}\n          onActivateLicense={\n            isSelfHosted ? () => setShowLicenseActivationInput(true) : undefined\n          }\n          hideLicenseLink={\n            isManualLicenseOnly ||\n            showLicenseActivationInput ||\n            (view === \"plans\" &&\n              (!!hasSubscription || !!licenseData?.has_license))\n          }\n        />\n      </>\n    );\n  };\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={viewConfig.icon}\n        title={viewConfig.title}\n        backButton={viewConfig.showBackButton}\n        onBack={handleBack}\n        separator\n      />\n      <SettingsLayouts.Body>\n        <div className=\"flex flex-col items-center gap-6\">\n          {isActivating && (\n            <Message\n              static\n              warning\n              large\n              text=\"Your license is still activating\"\n              description=\"Your license is being processed. You'll be taken to billing details automatically once confirmed.\"\n              icon\n              close\n              onClose={() => {\n                sessionStorage.removeItem(BILLING_ACTIVATING_KEY);\n                setIsActivating(false);\n              }}\n              className=\"w-full\"\n            />\n          )}\n          {renderContent()}\n          {renderFooter()}\n        </div>\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/SlackBotCreationForm.tsx",
    "content": "\"use client\";\n\nimport CardSection from \"@/components/admin/CardSection\";\nimport { useRouter } from \"next/navigation\";\nimport { useState } from \"react\";\nimport { SlackTokensForm } from \"./SlackTokensForm\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SvgSlack } from \"@opal/icons\";\n\nexport function NewSlackBotForm() {\n  const [formValues] = useState({\n    name: \"\",\n    enabled: true,\n    bot_token: \"\",\n    app_token: \"\",\n    user_token: \"\",\n  });\n  const router = useRouter();\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgSlack}\n        title=\"New Slack Bot\"\n        separator\n        backButton\n      />\n      <SettingsLayouts.Body>\n        <CardSection>\n          <div className=\"p-4\">\n            <SlackTokensForm\n              isUpdate={false}\n              initialValues={formValues}\n              router={router}\n            />\n          </div>\n        </CardSection>\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/SlackBotTable.tsx",
    "content": "\"use client\";\n\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { useEffect, useState } from \"react\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { SlackBot } from \"@/lib/types\";\nimport { EditIcon } from \"@/components/icons/icons\";\n\nconst NUM_IN_PAGE = 20;\n\nfunction ClickableTableRow({\n  url,\n  children,\n  ...props\n}: {\n  url: string;\n  children: React.ReactNode;\n  [key: string]: any;\n}) {\n  const router = useRouter();\n\n  useEffect(() => {\n    router.prefetch(url as Route);\n  }, [router, url]);\n\n  const navigate = () => {\n    router.push(url as Route);\n  };\n\n  return (\n    <TableRow {...props} onClick={navigate}>\n      {children}\n    </TableRow>\n  );\n}\n\nexport const SlackBotTable = ({ slackBots }: { slackBots: SlackBot[] }) => {\n  const [page, setPage] = useState(1);\n\n  // sort by id for consistent ordering\n  slackBots.sort((a, b) => {\n    if (a.id < b.id) {\n      return -1;\n    } else if (a.id > b.id) {\n      return 1;\n    } else {\n      return 0;\n    }\n  });\n\n  const slackBotsForPage = slackBots.slice(\n    NUM_IN_PAGE * (page - 1),\n    NUM_IN_PAGE * page\n  );\n\n  return (\n    <div>\n      <Table>\n        <TableHeader>\n          <TableRow>\n            <TableHead>Name</TableHead>\n            <TableHead>Status</TableHead>\n            <TableHead>Default Config</TableHead>\n            <TableHead>Channel Count</TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {slackBotsForPage.map((slackBot) => {\n            return (\n              <ClickableTableRow\n                url={`/admin/bots/${slackBot.id}`}\n                key={slackBot.id}\n                className=\"hover:bg-muted cursor-pointer\"\n              >\n                <TableCell>\n                  <div className=\"flex items-center\">\n                    <EditIcon className=\"mr-4\" />\n                    {slackBot.name}\n                  </div>\n                </TableCell>\n                <TableCell>\n                  {slackBot.enabled ? (\n                    <Badge variant=\"success\">Enabled</Badge>\n                  ) : (\n                    <Badge variant=\"destructive\">Disabled</Badge>\n                  )}\n                </TableCell>\n                <TableCell>\n                  <Badge variant=\"secondary\">Default Set</Badge>\n                </TableCell>\n                <TableCell>{slackBot.configs_count}</TableCell>\n                <TableCell>\n                  {/* Add any action buttons here if needed */}\n                </TableCell>\n              </ClickableTableRow>\n            );\n          })}\n          {slackBots.length === 0 && (\n            <TableRow>\n              <TableCell\n                colSpan={5}\n                className=\"text-center text-muted-foreground\"\n              >\n                Please add a New Slack Bot to begin chatting with Danswer!\n              </TableCell>\n            </TableRow>\n          )}\n        </TableBody>\n      </Table>\n      {slackBots.length > NUM_IN_PAGE && (\n        <div className=\"mt-3 flex\">\n          <div className=\"mx-auto\">\n            <PageSelector\n              totalPages={Math.ceil(slackBots.length / NUM_IN_PAGE)}\n              currentPage={page}\n              onPageChange={(newPage) => {\n                setPage(newPage);\n                window.scrollTo({\n                  top: 0,\n                  left: 0,\n                  behavior: \"smooth\",\n                });\n              }}\n            />\n          </div>\n        </div>\n      )}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/SlackBotUpdateForm.tsx",
    "content": "\"use client\";\n\nimport { toast } from \"@/hooks/useToast\";\nimport { SlackBot } from \"@/lib/types\";\nimport { useRouter } from \"next/navigation\";\nimport { useState, useEffect, useRef } from \"react\";\nimport { updateSlackBotField } from \"@/lib/updateSlackBotField\";\nimport { SlackTokensForm } from \"./SlackTokensForm\";\n\nimport { EditableStringFieldDisplay } from \"@/components/EditableStringFieldDisplay\";\nimport { deleteSlackBot } from \"./new/lib\";\nimport GenericConfirmModal from \"@/components/modals/GenericConfirmModal\";\nimport { Button } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgChevronDownSmall, SvgTrash } from \"@opal/icons\";\n\nfunction Checkbox({\n  label,\n  checked,\n  onChange,\n}: {\n  label: string;\n  checked: boolean;\n  onChange: (e: React.ChangeEvent<HTMLInputElement>) => void;\n}) {\n  return (\n    <label className=\"flex text-xs cursor-pointer\">\n      <input\n        checked={checked}\n        onChange={onChange}\n        type=\"checkbox\"\n        className=\"mr-2 w-3.5 h-3.5 my-auto\"\n      />\n      <span className=\"block font-medium text-text-700 text-sm\">{label}</span>\n    </label>\n  );\n}\n\nexport const ExistingSlackBotForm = ({\n  existingSlackBot,\n  refreshSlackBot,\n}: {\n  existingSlackBot: SlackBot;\n  refreshSlackBot?: () => void;\n}) => {\n  const [isExpanded, setIsExpanded] = useState(false);\n  const [formValues, setFormValues] = useState(existingSlackBot);\n  const router = useRouter();\n  const dropdownRef = useRef<HTMLDivElement>(null);\n  const [showDeleteModal, setShowDeleteModal] = useState(false);\n\n  const handleUpdateField = async (\n    field: keyof SlackBot,\n    value: string | boolean\n  ) => {\n    try {\n      const response = await updateSlackBotField(\n        existingSlackBot,\n        field,\n        value\n      );\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      toast.success(`Connector ${field} updated successfully`);\n    } catch (error) {\n      toast.error(`Failed to update connector ${field}`);\n    }\n    setFormValues((prev) => ({ ...prev, [field]: value }));\n  };\n\n  useEffect(() => {\n    const handleClickOutside = (event: MouseEvent) => {\n      if (\n        dropdownRef.current &&\n        !dropdownRef.current.contains(event.target as Node) &&\n        isExpanded\n      ) {\n        setIsExpanded(false);\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, [isExpanded]);\n\n  return (\n    <div>\n      <div className=\"flex items-center justify-between h-14\">\n        <div className=\"flex items-center gap-2\">\n          <div>\n            <EditableStringFieldDisplay\n              value={formValues.name}\n              isEditable={true}\n              onUpdate={(value) => handleUpdateField(\"name\", value)}\n              scale={2.1}\n            />\n          </div>\n        </div>\n\n        <div className=\"flex flex-col\" ref={dropdownRef}>\n          <div className=\"flex items-center gap-4\">\n            <Button\n              prominence=\"secondary\"\n              icon={({ className }) => (\n                <SvgChevronDownSmall\n                  className={cn(className, !isExpanded && \"-rotate-90\")}\n                />\n              )}\n              onClick={() => setIsExpanded(!isExpanded)}\n            >\n              Update Tokens\n            </Button>\n            <Button\n              variant=\"danger\"\n              onClick={() => setShowDeleteModal(true)}\n              icon={SvgTrash}\n            >\n              Delete\n            </Button>\n          </div>\n\n          {isExpanded && (\n            <div className=\"bg-background border rounded-lg border-background-200 shadow-lg absolute mt-12 right-0 z-10 w-full md:w-3/4 lg:w-1/2\">\n              <div className=\"p-4\">\n                <SlackTokensForm\n                  isUpdate={true}\n                  initialValues={formValues}\n                  existingSlackBotId={existingSlackBot.id}\n                  refreshSlackBot={refreshSlackBot}\n                  router={router}\n                  onValuesChange={(values) => setFormValues(values)}\n                />\n              </div>\n            </div>\n          )}\n        </div>\n      </div>\n      <div className=\"mt-2\">\n        <div className=\"inline-block border rounded-lg border-background-200 p-2\">\n          <Checkbox\n            label=\"Enabled\"\n            checked={formValues.enabled}\n            onChange={(e) => handleUpdateField(\"enabled\", e.target.checked)}\n          />\n        </div>\n        {showDeleteModal && (\n          <GenericConfirmModal\n            title=\"Delete Slack Bot\"\n            message=\"Are you sure you want to delete this Slack bot? This action cannot be undone.\"\n            confirmText=\"Delete\"\n            onClose={() => setShowDeleteModal(false)}\n            onConfirm={async () => {\n              try {\n                const response = await deleteSlackBot(existingSlackBot.id);\n                if (!response.ok) {\n                  throw new Error(await response.text());\n                }\n                toast.success(\"Slack bot deleted successfully\");\n                router.push(\"/admin/bots\");\n              } catch (error) {\n                toast.error(\"Failed to delete Slack bot\");\n              }\n              setShowDeleteModal(false);\n            }}\n          />\n        )}\n      </div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/SlackTokensForm.tsx",
    "content": "\"use client\";\n\nimport { TextFormField } from \"@/components/Field\";\nimport { Form, Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport { createSlackBot, updateSlackBot } from \"./new/lib\";\nimport { Button } from \"@opal/components\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { useEffect } from \"react\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\nimport { toast } from \"@/hooks/useToast\";\n\nexport const SlackTokensForm = ({\n  isUpdate,\n  initialValues,\n  existingSlackBotId,\n  refreshSlackBot,\n  router,\n  onValuesChange,\n}: {\n  isUpdate: boolean;\n  initialValues: any;\n  existingSlackBotId?: number;\n  refreshSlackBot?: () => void;\n  router: any;\n  onValuesChange?: (values: any) => void;\n}) => {\n  useEffect(() => {\n    if (onValuesChange) {\n      onValuesChange(initialValues);\n    }\n  }, [initialValues, onValuesChange]);\n\n  return (\n    <Formik\n      initialValues={{\n        ...initialValues,\n      }}\n      validationSchema={Yup.object().shape({\n        bot_token: Yup.string().required(),\n        app_token: Yup.string().required(),\n        name: Yup.string().required(),\n        user_token: Yup.string().optional(),\n      })}\n      onSubmit={async (values, formikHelpers) => {\n        formikHelpers.setSubmitting(true);\n\n        let response;\n        if (isUpdate) {\n          response = await updateSlackBot(existingSlackBotId!, values);\n        } else {\n          response = await createSlackBot(values);\n        }\n        formikHelpers.setSubmitting(false);\n        if (response.ok) {\n          if (refreshSlackBot) {\n            refreshSlackBot();\n          }\n          const responseJson = await response.json();\n          const botId = isUpdate ? existingSlackBotId : responseJson.id;\n          toast.success(\n            isUpdate\n              ? \"Successfully updated Slack Bot!\"\n              : \"Successfully created Slack Bot!\"\n          );\n          router.push(`/admin/bots/${encodeURIComponent(botId)}`);\n        } else {\n          const responseJson = await response.json();\n          let errorMsg = responseJson.detail || responseJson.message;\n\n          if (errorMsg.includes(\"Invalid bot token:\")) {\n            errorMsg = \"Slack Bot Token is invalid\";\n          } else if (errorMsg.includes(\"Invalid app token:\")) {\n            errorMsg = \"Slack App Token is invalid\";\n          }\n          toast.error(\n            isUpdate\n              ? `Error updating Slack Bot - ${errorMsg}`\n              : `Error creating Slack Bot - ${errorMsg}`\n          );\n        }\n      }}\n      enableReinitialize={true}\n    >\n      {({ isSubmitting, setFieldValue, values }) => (\n        <Form className=\"w-full\">\n          {!isUpdate && (\n            <div className=\"\">\n              <TextFormField\n                name=\"name\"\n                label=\"Name This Slack Bot:\"\n                type=\"text\"\n              />\n            </div>\n          )}\n\n          {!isUpdate && (\n            <div className=\"mt-4\">\n              <Separator />\n              Please refer to our{\" \"}\n              <a\n                className=\"text-blue-500 hover:underline\"\n                href={`${DOCS_ADMINS_PATH}/getting_started/slack_bot_setup`}\n                target=\"_blank\"\n                rel=\"noopener noreferrer\"\n              >\n                guide\n              </a>{\" \"}\n              if you are not sure how to get these tokens!\n            </div>\n          )}\n          <TextFormField\n            name=\"bot_token\"\n            label=\"Slack Bot Token\"\n            type=\"password\"\n          />\n          <TextFormField\n            name=\"app_token\"\n            label=\"Slack App Token\"\n            type=\"password\"\n          />\n          <TextFormField\n            name=\"user_token\"\n            label=\"Slack User Token (Optional)\"\n            type=\"password\"\n            subtext=\"Optional: User OAuth token for enhanced private channel access\"\n          />\n          <div className=\"flex justify-end w-full mt-4\">\n            <Button\n              disabled={\n                isSubmitting ||\n                !values.bot_token ||\n                !values.app_token ||\n                !values.name\n              }\n              type=\"submit\"\n            >\n              {isUpdate ? \"Update\" : \"Create\"}\n            </Button>\n          </div>\n        </Form>\n      )}\n    </Formik>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/SlackChannelConfigsTable.tsx",
    "content": "\"use client\";\n\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { toast } from \"@/hooks/useToast\";\nimport { EditIcon } from \"@/components/icons/icons\";\nimport { SlackChannelConfig } from \"@/lib/types\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { useState } from \"react\";\nimport { deleteSlackChannelConfig, isPersonaASlackBotPersona } from \"./lib\";\nimport { Card } from \"@/components/ui/card\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { Button } from \"@opal/components\";\nimport { SvgSettings, SvgTrash } from \"@opal/icons\";\nconst numToDisplay = 50;\n\nexport interface SlackChannelConfigsTableProps {\n  slackBotId: number;\n  slackChannelConfigs: SlackChannelConfig[];\n  refresh: () => void;\n}\n\nexport default function SlackChannelConfigsTable({\n  slackBotId,\n  slackChannelConfigs,\n  refresh,\n}: SlackChannelConfigsTableProps) {\n  const [page, setPage] = useState(1);\n\n  const defaultConfig = slackChannelConfigs.find((config) => config.is_default);\n  const channelConfigs = slackChannelConfigs.filter(\n    (config) => !config.is_default\n  );\n\n  return (\n    <div className=\"space-y-8\">\n      <div className=\"flex justify-between items-center mb-6\">\n        <Button\n          prominence=\"secondary\"\n          onClick={() => {\n            window.location.href = `/admin/bots/${slackBotId}/channels/${defaultConfig?.id}`;\n          }}\n          icon={SvgSettings}\n        >\n          Edit Default Configuration\n        </Button>\n        <CreateButton href={`/admin/bots/${slackBotId}/channels/new`} secondary>\n          New Channel Configuration\n        </CreateButton>\n      </div>\n\n      <div>\n        <h2 className=\"text-2xl font- mb-4\">Channel-Specific Configurations</h2>\n        <Card>\n          <Table>\n            <TableHeader>\n              <TableRow>\n                <TableHead>Channel</TableHead>\n                <TableHead>Assistant</TableHead>\n                <TableHead>Document Sets</TableHead>\n                <TableHead>Actions</TableHead>\n              </TableRow>\n            </TableHeader>\n            <TableBody>\n              {channelConfigs\n                .slice(numToDisplay * (page - 1), numToDisplay * page)\n                .map((slackChannelConfig) => {\n                  return (\n                    <TableRow\n                      key={slackChannelConfig.id}\n                      className=\"cursor-pointer transition-colors\"\n                      onClick={() => {\n                        window.location.href = `/admin/bots/${slackBotId}/channels/${slackChannelConfig.id}`;\n                      }}\n                    >\n                      <TableCell>\n                        <div className=\"flex gap-x-2\">\n                          <div className=\"my-auto\">\n                            <EditIcon className=\"text-muted-foreground\" />\n                          </div>\n                          <div className=\"my-auto\">\n                            {\"#\" +\n                              slackChannelConfig.channel_config.channel_name}\n                          </div>\n                        </div>\n                      </TableCell>\n                      <TableCell onClick={(e) => e.stopPropagation()}>\n                        {slackChannelConfig.persona &&\n                        !isPersonaASlackBotPersona(\n                          slackChannelConfig.persona\n                        ) ? (\n                          <Link\n                            href={\n                              `/app/agents/edit/${slackChannelConfig.persona.id}` as Route\n                            }\n                            className=\"text-primary hover:underline\"\n                          >\n                            {slackChannelConfig.persona.name}\n                          </Link>\n                        ) : (\n                          \"-\"\n                        )}\n                      </TableCell>\n                      <TableCell>\n                        <div>\n                          {slackChannelConfig.persona &&\n                          slackChannelConfig.persona.document_sets.length > 0\n                            ? slackChannelConfig.persona.document_sets\n                                .map((documentSet) => documentSet.name)\n                                .join(\", \")\n                            : \"-\"}\n                        </div>\n                      </TableCell>\n                      <TableCell onClick={(e) => e.stopPropagation()}>\n                        <Button\n                          onClick={async (e) => {\n                            e.stopPropagation();\n                            const response = await deleteSlackChannelConfig(\n                              slackChannelConfig.id\n                            );\n                            if (response.ok) {\n                              toast.success(\n                                `Slack bot config \"${slackChannelConfig.id}\" deleted`\n                              );\n                            } else {\n                              const errorMsg = await response.text();\n                              toast.error(\n                                `Failed to delete Slack bot config - ${errorMsg}`\n                              );\n                            }\n                            refresh();\n                          }}\n                          icon={SvgTrash}\n                          prominence=\"tertiary\"\n                          size=\"sm\"\n                        />\n                      </TableCell>\n                    </TableRow>\n                  );\n                })}\n\n              {channelConfigs.length === 0 && (\n                <TableRow>\n                  <TableCell\n                    colSpan={4}\n                    className=\"text-center text-muted-foreground\"\n                  >\n                    No channel-specific configurations. Add a new configuration\n                    to customize behavior for specific channels.\n                  </TableCell>\n                </TableRow>\n              )}\n            </TableBody>\n          </Table>\n        </Card>\n\n        {channelConfigs.length > numToDisplay && (\n          <div className=\"mt-4 flex justify-center\">\n            <PageSelector\n              totalPages={Math.ceil(channelConfigs.length / numToDisplay)}\n              currentPage={page}\n              onPageChange={(newPage) => setPage(newPage)}\n            />\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  DocumentSetSummary,\n  SlackChannelConfig,\n  SlackBotResponseType,\n} from \"@/lib/types\";\nimport {\n  createSlackChannelConfig,\n  isPersonaASlackBotPersona,\n  updateSlackChannelConfig,\n} from \"../lib\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { useRouter } from \"next/navigation\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { StandardAnswerCategoryResponse } from \"@/components/standardAnswers/getStandardAnswerCategoriesIfEE\";\nimport { SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { SlackChannelConfigFormFields } from \"./SlackChannelConfigFormFields\";\n\nexport const SlackChannelConfigCreationForm = ({\n  slack_bot_id,\n  documentSets,\n  personas,\n  standardAnswerCategoryResponse,\n  existingSlackChannelConfig,\n}: {\n  slack_bot_id: number;\n  documentSets: DocumentSetSummary[];\n  personas: MinimalPersonaSnapshot[];\n  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;\n  existingSlackChannelConfig?: SlackChannelConfig;\n}) => {\n  const router = useRouter();\n  const isUpdate = Boolean(existingSlackChannelConfig);\n  const isDefault = existingSlackChannelConfig?.is_default || false;\n  const existingSlackBotUsesPersona = existingSlackChannelConfig?.persona\n    ? !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)\n    : false;\n  const existingPersonaHasSearchTool = existingSlackChannelConfig?.persona\n    ? existingSlackChannelConfig.persona.tools.some(\n        (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID\n      )\n    : false;\n\n  const [searchEnabledAgents, nonSearchAgents] = useMemo(() => {\n    return personas.reduce(\n      (acc, persona) => {\n        if (\n          persona.tools.some((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)\n        ) {\n          acc[0].push(persona);\n        } else {\n          acc[1].push(persona);\n        }\n        return acc;\n      },\n      [[], []] as [MinimalPersonaSnapshot[], MinimalPersonaSnapshot[]]\n    );\n  }, [personas]);\n\n  return (\n    <CardSection className=\"!px-12 max-w-4xl\">\n      <Formik\n        initialValues={{\n          slack_bot_id: slack_bot_id,\n          channel_name: isDefault\n            ? \"\"\n            : existingSlackChannelConfig?.channel_config.channel_name || \"\",\n          response_type: \"citations\" as SlackBotResponseType,\n          answer_validity_check_enabled: (\n            existingSlackChannelConfig?.channel_config?.answer_filters || []\n          ).includes(\"well_answered_postfilter\"),\n          questionmark_prefilter_enabled: (\n            existingSlackChannelConfig?.channel_config?.answer_filters || []\n          ).includes(\"questionmark_prefilter\"),\n          respond_tag_only:\n            existingSlackChannelConfig?.channel_config?.respond_tag_only ||\n            false,\n          is_ephemeral:\n            existingSlackChannelConfig?.channel_config?.is_ephemeral || false,\n          respond_to_bots:\n            existingSlackChannelConfig?.channel_config?.respond_to_bots ||\n            false,\n          show_continue_in_web_ui:\n            existingSlackChannelConfig?.channel_config\n              ?.show_continue_in_web_ui ?? !isUpdate,\n          enable_auto_filters:\n            existingSlackChannelConfig?.enable_auto_filters || false,\n          respond_member_group_list:\n            existingSlackChannelConfig?.channel_config\n              ?.respond_member_group_list || [],\n          still_need_help_enabled:\n            existingSlackChannelConfig?.channel_config?.follow_up_tags !==\n            undefined,\n          follow_up_tags:\n            existingSlackChannelConfig?.channel_config?.follow_up_tags ||\n            undefined,\n          document_sets:\n            existingSlackChannelConfig && existingSlackChannelConfig.persona\n              ? existingSlackChannelConfig.persona.document_sets.map(\n                  (documentSet) => documentSet.id\n                )\n              : ([] as number[]),\n          persona_id:\n            existingSlackChannelConfig?.persona &&\n            !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)\n              ? existingSlackChannelConfig.persona.id\n              : null,\n          standard_answer_categories:\n            existingSlackChannelConfig?.standard_answer_categories || [],\n          knowledge_source: existingSlackBotUsesPersona\n            ? existingPersonaHasSearchTool\n              ? \"assistant\"\n              : \"non_search_agent\"\n            : existingSlackChannelConfig?.persona\n              ? \"document_sets\"\n              : \"all_public\",\n          disabled:\n            existingSlackChannelConfig?.channel_config?.disabled ?? false,\n        }}\n        validationSchema={Yup.object().shape({\n          slack_bot_id: Yup.number().required(),\n          channel_name: isDefault\n            ? Yup.string()\n            : Yup.string().required(\"Channel Name is required\"),\n          response_type: Yup.mixed<SlackBotResponseType>()\n            .oneOf([\"quotes\", \"citations\"])\n            .required(),\n          answer_validity_check_enabled: Yup.boolean().required(),\n          questionmark_prefilter_enabled: Yup.boolean().required(),\n          respond_tag_only: Yup.boolean().required(),\n          respond_to_bots: Yup.boolean().required(),\n          is_ephemeral: Yup.boolean().required(),\n          show_continue_in_web_ui: Yup.boolean().required(),\n          enable_auto_filters: Yup.boolean().required(),\n          respond_member_group_list: Yup.array().of(Yup.string()).required(),\n          still_need_help_enabled: Yup.boolean().required(),\n          follow_up_tags: Yup.array().of(Yup.string()),\n          document_sets: Yup.array()\n            .of(Yup.number())\n            .when(\"knowledge_source\", {\n              is: \"document_sets\",\n              then: (schema) =>\n                schema.min(\n                  1,\n                  \"At least one Document Set is required when using the 'Document Sets' knowledge source\"\n                ),\n            }),\n          persona_id: Yup.number()\n            .nullable()\n            .when(\"knowledge_source\", {\n              is: \"assistant\",\n              then: (schema) =>\n                schema.required(\n                  \"An agent is required when using the 'Agent' knowledge source\"\n                ),\n            }),\n          standard_answer_categories: Yup.array(),\n          knowledge_source: Yup.string()\n            .oneOf([\n              \"all_public\",\n              \"document_sets\",\n              \"assistant\",\n              \"non_search_agent\",\n            ])\n            .required(),\n          disabled: Yup.boolean().optional().default(false),\n        })}\n        onSubmit={async (values, formikHelpers) => {\n          formikHelpers.setSubmitting(true);\n\n          const cleanedValues = {\n            ...values,\n            slack_bot_id,\n            channel_name: values.channel_name,\n            respond_member_group_list: values.respond_member_group_list,\n            usePersona:\n              values.knowledge_source === \"assistant\" ||\n              values.knowledge_source === \"non_search_agent\",\n            document_sets:\n              values.knowledge_source === \"document_sets\"\n                ? values.document_sets\n                : [],\n            persona_id:\n              values.knowledge_source === \"assistant\" ||\n              values.knowledge_source === \"non_search_agent\"\n                ? values.persona_id\n                : null,\n            standard_answer_categories: values.standard_answer_categories.map(\n              (category: any) => category.id\n            ),\n            response_type: values.response_type as SlackBotResponseType,\n            disabled: values.disabled ?? false,\n          };\n\n          if (!cleanedValues.still_need_help_enabled) {\n            cleanedValues.follow_up_tags = undefined;\n          } else {\n            if (!cleanedValues.follow_up_tags) {\n              cleanedValues.follow_up_tags = [];\n            }\n          }\n\n          const response = isUpdate\n            ? await updateSlackChannelConfig(\n                existingSlackChannelConfig!.id,\n                cleanedValues\n              )\n            : await createSlackChannelConfig(cleanedValues);\n\n          formikHelpers.setSubmitting(false);\n          if (response.ok) {\n            router.push(`/admin/bots/${slack_bot_id}`);\n          } else {\n            const responseJson = await response.json();\n            const errorMsg = responseJson.detail || responseJson.message;\n            toast.error(\n              `Error ${\n                isUpdate ? \"updating\" : \"creating\"\n              } OnyxBot config - ${errorMsg}`\n            );\n          }\n        }}\n      >\n        {({ isSubmitting, values, setFieldValue, ...formikProps }) => (\n          <Form>\n            <div className=\"pb-6 w-full\">\n              <SlackChannelConfigFormFields\n                {...values}\n                isUpdate={isUpdate}\n                isDefault={isDefault}\n                documentSets={documentSets}\n                searchEnabledAgents={searchEnabledAgents}\n                nonSearchAgents={nonSearchAgents}\n                standardAnswerCategoryResponse={standardAnswerCategoryResponse}\n                slack_bot_id={slack_bot_id}\n                formikProps={formikProps}\n              />\n            </div>\n          </Form>\n        )}\n      </Formik>\n    </CardSection>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useMemo } from \"react\";\nimport { FieldArray, useFormikContext, ErrorMessage } from \"formik\";\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  Label,\n  SelectorFormField,\n  SubLabel,\n  TextArrayField,\n  TextFormField,\n} from \"@/components/Field\";\nimport { Button } from \"@opal/components\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport DocumentSetCard from \"@/sections/cards/DocumentSetCard\";\nimport CollapsibleSection from \"@/app/admin/agents/CollapsibleSection\";\nimport { StandardAnswerCategoryResponse } from \"@/components/standardAnswers/getStandardAnswerCategoriesIfEE\";\nimport { StandardAnswerCategoryDropdownField } from \"@/components/standardAnswers/StandardAnswerCategoryDropdown\";\nimport { RadioGroup } from \"@/components/ui/radio-group\";\nimport { RadioGroupItemField } from \"@/components/ui/RadioGroupItemField\";\nimport { AlertCircle } from \"lucide-react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport { TooltipProvider } from \"@radix-ui/react-tooltip\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport Link from \"next/link\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { Badge } from \"@/components/ui/badge\";\nimport {\n  Accordion,\n  AccordionContent,\n  AccordionItem,\n  AccordionTrigger,\n} from \"@/components/ui/accordion\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { CheckboxField } from \"@/refresh-components/form/LabeledCheckboxField\";\n\nexport interface SlackChannelConfigFormFieldsProps {\n  isUpdate: boolean;\n  isDefault: boolean;\n  documentSets: DocumentSetSummary[];\n  searchEnabledAgents: MinimalPersonaSnapshot[];\n  nonSearchAgents: MinimalPersonaSnapshot[];\n  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;\n  slack_bot_id: number;\n  formikProps: any;\n}\n\nexport function SlackChannelConfigFormFields({\n  isUpdate,\n  isDefault,\n  documentSets,\n  searchEnabledAgents,\n  nonSearchAgents,\n  standardAnswerCategoryResponse,\n  slack_bot_id,\n  formikProps,\n}: SlackChannelConfigFormFieldsProps) {\n  const router = useRouter();\n  const { values, setFieldValue } = useFormikContext<any>();\n  const [viewUnselectableSets, setViewUnselectableSets] = useState(false);\n  const [viewSyncEnabledAgents, setViewSyncEnabledAgents] = useState(false);\n\n  // Helper function to check if a document set contains sync connectors\n  const documentSetContainsSync = (documentSet: DocumentSetSummary) => {\n    return documentSet.cc_pair_summaries.some(\n      (summary) => summary.access_type === \"sync\"\n    );\n  };\n\n  // Helper function to check if a document set contains private connectors\n  const documentSetContainsPrivate = (documentSet: DocumentSetSummary) => {\n    return documentSet.cc_pair_summaries.some(\n      (summary) => summary.access_type === \"private\"\n    );\n  };\n\n  // Helper function to get cc_pair_summaries from DocumentSetSummary\n  const getCcPairSummaries = (documentSet: DocumentSetSummary) => {\n    return documentSet.cc_pair_summaries;\n  };\n\n  const [syncEnabledAgents, availableAgents] = useMemo(() => {\n    const sync: MinimalPersonaSnapshot[] = [];\n    const available: MinimalPersonaSnapshot[] = [];\n\n    searchEnabledAgents.forEach((persona) => {\n      const hasSyncSet = persona.document_sets.some(documentSetContainsSync);\n      if (hasSyncSet) {\n        sync.push(persona);\n      } else {\n        available.push(persona);\n      }\n    });\n\n    return [sync, available];\n  }, [searchEnabledAgents]);\n\n  const unselectableSets = useMemo(() => {\n    return documentSets.filter(documentSetContainsSync);\n  }, [documentSets]);\n\n  const memoizedPrivateConnectors = useMemo(() => {\n    const uniqueDescriptors = new Map();\n    documentSets.forEach((ds: DocumentSetSummary) => {\n      const ccPairSummaries = getCcPairSummaries(ds);\n      ccPairSummaries.forEach((summary: any) => {\n        if (\n          summary.access_type === \"private\" &&\n          !uniqueDescriptors.has(summary.id)\n        ) {\n          uniqueDescriptors.set(summary.id, summary);\n        }\n      });\n    });\n    return Array.from(uniqueDescriptors.values());\n  }, [documentSets]);\n\n  const selectableSets = useMemo(() => {\n    return documentSets.filter((ds) => !documentSetContainsSync(ds));\n  }, [documentSets]);\n\n  useEffect(() => {\n    const invalidSelected = values.document_sets.filter((dsId: number) =>\n      unselectableSets.some((us) => us.id === dsId)\n    );\n    if (invalidSelected.length > 0) {\n      setFieldValue(\n        \"document_sets\",\n        values.document_sets.filter(\n          (dsId: number) => !invalidSelected.includes(dsId)\n        )\n      );\n      toast.warning(\n        \"We removed one or more document sets from your selection because they are no longer valid. Please review and update your configuration.\"\n      );\n    }\n  }, [unselectableSets, values.document_sets, setFieldValue]);\n\n  const shouldShowPrivacyAlert = useMemo(() => {\n    if (values.knowledge_source === \"document_sets\") {\n      const selectedSets = documentSets.filter((ds) =>\n        values.document_sets.includes(ds.id)\n      );\n      return selectedSets.some((ds) => documentSetContainsPrivate(ds));\n    } else if (values.knowledge_source === \"assistant\") {\n      const chosenAgent = searchEnabledAgents.find(\n        (p) => p.id == values.persona_id\n      );\n      return chosenAgent?.document_sets.some((ds) =>\n        documentSetContainsPrivate(ds)\n      );\n    }\n    return false;\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [values.knowledge_source, values.document_sets, values.persona_id]);\n\n  return (\n    <>\n      <div className=\"w-full\">\n        {isDefault && (\n          <>\n            <Badge variant=\"agent\" className=\"bg-blue-100 text-blue-800\">\n              Default Configuration\n            </Badge>\n            <p className=\"mt-2 text-sm\">\n              This default configuration will apply to all channels and direct\n              messages (DMs) in your Slack workspace.\n            </p>\n            <div className=\"mt-4 p-4 bg-background rounded-md border border-neutral-300\">\n              <CheckboxField\n                name=\"disabled\"\n                label=\"Disable Default Configuration\"\n                labelClassName=\"text-text\"\n              />\n              <p className=\"mt-2 text-sm italic\">\n                Warning: Disabling the default configuration means OnyxBot\n                won&apos;t respond in Slack channels unless they are explicitly\n                configured. Additionally, OnyxBot will not respond to DMs.\n              </p>\n            </div>\n          </>\n        )}\n        {!isDefault && (\n          <>\n            <TextFormField\n              name=\"channel_name\"\n              label=\"Slack Channel Name\"\n              placeholder=\"Enter channel name (e.g., general, support)\"\n              subtext=\"Enter the name of the Slack channel (without the # symbol)\"\n            />\n          </>\n        )}\n        <div className=\"space-y-2 mt-4\">\n          <Label>Knowledge Source</Label>\n          <RadioGroup\n            className=\"flex flex-col gap-y-4\"\n            value={values.knowledge_source}\n            onValueChange={(value: string) => {\n              setFieldValue(\"knowledge_source\", value);\n            }}\n          >\n            <RadioGroupItemField\n              value=\"all_public\"\n              id=\"all_public\"\n              label=\"All Public Knowledge\"\n              sublabel=\"Let OnyxBot respond based on information from all public connectors\"\n            />\n            {selectableSets.length + unselectableSets.length > 0 && (\n              <RadioGroupItemField\n                value=\"document_sets\"\n                id=\"document_sets\"\n                label=\"Specific Document Sets\"\n                sublabel=\"Control which documents to use for answering questions\"\n              />\n            )}\n            <RadioGroupItemField\n              value=\"assistant\"\n              id=\"assistant\"\n              label=\"Search Agent\"\n              sublabel=\"Control both the documents and the prompt to use for answering questions\"\n            />\n            <RadioGroupItemField\n              value=\"non_search_agent\"\n              id=\"non_search_agent\"\n              label=\"Non-Search Agent\"\n              sublabel=\"Chat with an agent that does not use documents\"\n            />\n          </RadioGroup>\n        </div>\n        {values.knowledge_source === \"document_sets\" &&\n          documentSets.length > 0 && (\n            <div className=\"mt-4\">\n              <SubLabel>\n                <>\n                  Select the document sets OnyxBot will use while answering\n                  questions in Slack.\n                  <br />\n                  {unselectableSets.length > 0 ? (\n                    <span>\n                      Some incompatible document sets are{\" \"}\n                      {viewUnselectableSets ? \"visible\" : \"hidden\"}.{\" \"}\n                      <button\n                        type=\"button\"\n                        onClick={() =>\n                          setViewUnselectableSets(\n                            (viewUnselectableSets) => !viewUnselectableSets\n                          )\n                        }\n                        className=\"text-sm text-action-link-05\"\n                      >\n                        {viewUnselectableSets\n                          ? \"Hide un-selectable \"\n                          : \"View all \"}\n                        document sets\n                      </button>\n                    </span>\n                  ) : (\n                    \"\"\n                  )}\n                </>\n              </SubLabel>\n              <FieldArray\n                name=\"document_sets\"\n                render={(arrayHelpers) => (\n                  <>\n                    {selectableSets.length > 0 && (\n                      <div className=\"mb-3 mt-2 flex gap-2 flex-wrap text-sm\">\n                        {selectableSets.map((documentSet) => {\n                          const selectedIndex = values.document_sets.indexOf(\n                            documentSet.id\n                          );\n                          const isSelected = selectedIndex !== -1;\n\n                          return (\n                            <DocumentSetCard\n                              key={documentSet.id}\n                              documentSet={documentSet}\n                              isSelected={isSelected}\n                              onSelectToggle={(selected) => {\n                                if (selected) arrayHelpers.push(documentSet.id);\n                                else arrayHelpers.remove(selectedIndex);\n                              }}\n                            />\n                          );\n                        })}\n                      </div>\n                    )}\n\n                    {viewUnselectableSets && unselectableSets.length > 0 && (\n                      <div className=\"mt-4\">\n                        <p className=\"text-sm text-text-dark/80\">\n                          These document sets cannot be attached as they have\n                          auto-synced docs:\n                        </p>\n                        <div className=\"mb-3 mt-2 flex gap-2 flex-wrap text-sm\">\n                          {unselectableSets.map((documentSet) => (\n                            <DocumentSetCard\n                              key={documentSet.id}\n                              documentSet={documentSet}\n                              disabled\n                              disabledTooltip=\"Unable to use this document set because it contains a connector with auto-sync permissions. OnyxBot's responses in this channel are visible to all Slack users, so mirroring the asker's permissions could inadvertently expose private information.\"\n                              isSelected={false}\n                            />\n                          ))}\n                        </div>\n                      </div>\n                    )}\n                    <ErrorMessage\n                      className=\"text-red-500 text-sm mt-1\"\n                      name=\"document_sets\"\n                      component=\"div\"\n                    />\n                  </>\n                )}\n              />\n            </div>\n          )}\n        {values.knowledge_source === \"assistant\" && (\n          <div className=\"mt-4\">\n            <SubLabel>\n              <>\n                Select the search-enabled agent OnyxBot will use while answering\n                questions in Slack.\n                {syncEnabledAgents.length > 0 && (\n                  <>\n                    <br />\n                    <span className=\"text-sm text-text-dark/80\">\n                      Note: Some of your agents have auto-synced connectors in\n                      their document sets. You cannot select these agents as\n                      they will not be able to answer questions in Slack.{\" \"}\n                      <button\n                        type=\"button\"\n                        onClick={() =>\n                          setViewSyncEnabledAgents(\n                            (viewSyncEnabledAgents) => !viewSyncEnabledAgents\n                          )\n                        }\n                        className=\"text-sm text-action-link-05\"\n                      >\n                        {viewSyncEnabledAgents\n                          ? \"Hide un-selectable \"\n                          : \"View all \"}\n                        agents\n                      </button>\n                    </span>\n                  </>\n                )}\n              </>\n            </SubLabel>\n\n            <SelectorFormField\n              name=\"persona_id\"\n              options={availableAgents.map((persona) => ({\n                name: persona.name,\n                value: persona.id,\n              }))}\n            />\n            {viewSyncEnabledAgents && syncEnabledAgents.length > 0 && (\n              <div className=\"mt-4\">\n                <p className=\"text-sm text-text-dark/80\">\n                  Un-selectable agents:\n                </p>\n                <div className=\"mb-3 mt-2 flex gap-2 flex-wrap text-sm\">\n                  {syncEnabledAgents.map((persona: MinimalPersonaSnapshot) => (\n                    <button\n                      type=\"button\"\n                      onClick={() =>\n                        router.push(`/app/agents/edit/${persona.id}` as Route)\n                      }\n                      key={persona.id}\n                      className=\"p-2 bg-background-100 cursor-pointer rounded-md flex items-center gap-2\"\n                    >\n                      <AgentAvatar agent={persona} size={16} />\n                      {persona.name}\n                    </button>\n                  ))}\n                </div>\n              </div>\n            )}\n          </div>\n        )}\n        {values.knowledge_source === \"non_search_agent\" && (\n          <div className=\"mt-4\">\n            <SubLabel>\n              <>\n                Select the non-search agent OnyxBot will use while answering\n                questions in Slack.\n                {syncEnabledAgents.length > 0 && (\n                  <>\n                    <br />\n                    <span className=\"text-sm text-text-dark/80\">\n                      Note: Some of your agents have auto-synced connectors in\n                      their document sets. You cannot select these agents as\n                      they will not be able to answer questions in Slack.{\" \"}\n                      <button\n                        type=\"button\"\n                        onClick={() =>\n                          setViewSyncEnabledAgents(\n                            (viewSyncEnabledAgents) => !viewSyncEnabledAgents\n                          )\n                        }\n                        className=\"text-sm text-action-link-05\"\n                      >\n                        {viewSyncEnabledAgents\n                          ? \"Hide un-selectable \"\n                          : \"View all \"}\n                        agents\n                      </button>\n                    </span>\n                  </>\n                )}\n              </>\n            </SubLabel>\n\n            <SelectorFormField\n              name=\"persona_id\"\n              options={nonSearchAgents.map((persona) => ({\n                name: persona.name,\n                value: persona.id,\n              }))}\n            />\n          </div>\n        )}\n      </div>\n      <Separator className=\"my-4\" />\n      <Accordion type=\"multiple\" className=\"gap-y-2 w-full\">\n        {values.knowledge_source !== \"non_search_agent\" && (\n          <AccordionItem value=\"search-options\">\n            <AccordionTrigger className=\"text-text\">\n              Search Configuration\n            </AccordionTrigger>\n            <AccordionContent>\n              <div className=\"space-y-4 pb-3\">\n                <div className=\"w-64\">\n                  <SelectorFormField\n                    name=\"response_type\"\n                    label=\"Answer Type\"\n                    tooltip=\"Controls the format of OnyxBot's responses.\"\n                    options={[\n                      { name: \"Standard\", value: \"citations\" },\n                      { name: \"Detailed\", value: \"quotes\" },\n                    ]}\n                  />\n                </div>\n                <CheckboxField\n                  name=\"answer_validity_check_enabled\"\n                  label=\"Only respond if citations found\"\n                  tooltip=\"If set, will only answer questions where the model successfully produces citations\"\n                />\n              </div>\n            </AccordionContent>\n          </AccordionItem>\n        )}\n\n        <AccordionItem className=\"mt-4\" value=\"general-options\">\n          <AccordionTrigger>General Configuration</AccordionTrigger>\n          <AccordionContent className=\"overflow-visible\">\n            <div className=\"space-y-4\">\n              <CheckboxField\n                name=\"show_continue_in_web_ui\"\n                label=\"Show Continue in Web UI button\"\n                tooltip=\"If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI\"\n              />\n\n              <CheckboxField\n                name=\"still_need_help_enabled\"\n                onChange={(checked: boolean) => {\n                  setFieldValue(\"still_need_help_enabled\", checked);\n                  if (!checked) {\n                    setFieldValue(\"follow_up_tags\", []);\n                  }\n                }}\n                label={'Give a \"Still need help?\" button'}\n                tooltip={`OnyxBot's response will include a button at the bottom\n                      of the response that asks the user if they still need help.`}\n              />\n              {values.still_need_help_enabled && (\n                <CollapsibleSection prompt=\"Configure Still Need Help Button\">\n                  <TextArrayField\n                    name=\"follow_up_tags\"\n                    label=\"(Optional) Users / Groups to Tag\"\n                    values={values}\n                    subtext={\n                      <div>\n                        The Slack users / groups we should tag if the user\n                        clicks the &quot;Still need help?&quot; button. If no\n                        emails are provided, we will not tag anyone and will\n                        just react with a 🆘 emoji to the original message.\n                      </div>\n                    }\n                    placeholder=\"User email or user group name...\"\n                  />\n                </CollapsibleSection>\n              )}\n\n              <CheckboxField\n                name=\"questionmark_prefilter_enabled\"\n                label=\"Only respond to questions\"\n                tooltip=\"If set, OnyxBot will only respond to messages that contain a question mark\"\n              />\n              <CheckboxField\n                name=\"respond_tag_only\"\n                label=\"Respond to @OnyxBot Only\"\n                tooltip=\"If set, OnyxBot will only respond when directly tagged\"\n              />\n              <CheckboxField\n                name=\"respond_to_bots\"\n                label=\"Respond to Bot messages\"\n                tooltip=\"If not set, OnyxBot will always ignore messages from Bots\"\n              />\n              <CheckboxField\n                name=\"is_ephemeral\"\n                label=\"Respond to user in a private (ephemeral) message\"\n                tooltip=\"If set, OnyxBot will respond only to the user in a private (ephemeral) message. If you also\n                chose 'Search' Agent above, selecting this option will make documents that are private to the user\n                available for their queries.\"\n              />\n\n              <TextArrayField\n                name=\"respond_member_group_list\"\n                label=\"(Optional) Respond to Certain Users / Groups\"\n                subtext={\n                  \"If specified, OnyxBot responses will only \" +\n                  \"be visible to the members or groups in this list.\"\n                }\n                values={values}\n                placeholder=\"User email or user group name...\"\n              />\n\n              <StandardAnswerCategoryDropdownField\n                standardAnswerCategoryResponse={standardAnswerCategoryResponse}\n                categories={values.standard_answer_categories}\n                setCategories={(categories: any) =>\n                  setFieldValue(\"standard_answer_categories\", categories)\n                }\n              />\n            </div>\n          </AccordionContent>\n        </AccordionItem>\n      </Accordion>\n\n      <div className=\"flex mt-8 gap-x-2 w-full justify-end\">\n        {shouldShowPrivacyAlert && (\n          <TooltipProvider>\n            <Tooltip>\n              <TooltipTrigger asChild>\n                <div className=\"flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center\">\n                  <AlertCircle className=\"h-5 w-5 text-alert\" />\n                </div>\n              </TooltipTrigger>\n              <TooltipContent side=\"top\" className=\"bg-background p-4 w-80\">\n                <Label className=\"text-text mb-2 font-semibold\">\n                  Privacy Alert\n                </Label>\n                <p className=\"text-sm text-text-darker mb-4\">\n                  Please note that if the private (ephemeral) response is *not\n                  selected*, only public documents within the selected document\n                  sets will be accessible for user queries. If the private\n                  (ephemeral) response *is selected*, user quries can also\n                  leverage documents that the user has already been granted\n                  access to. Note that users will be able to share the response\n                  with others in the channel, so please ensure that this is\n                  aligned with your company sharing policies.\n                </p>\n                <div className=\"space-y-2\">\n                  <h4 className=\"text-sm text-text font-medium\">\n                    Relevant Connectors:\n                  </h4>\n                  <div className=\"max-h-40 overflow-y-auto border-t border-text-subtle flex-col gap-y-2\">\n                    {memoizedPrivateConnectors.map((ccpairinfo: any) => (\n                      <Link\n                        key={ccpairinfo.id}\n                        href={`/admin/connector/${ccpairinfo.id}`}\n                        className=\"flex items-center p-2 rounded-md hover:bg-background-100 transition-colors\"\n                      >\n                        <div className=\"mr-2\">\n                          <SourceIcon\n                            iconSize={16}\n                            sourceType={ccpairinfo.source}\n                          />\n                        </div>\n                        <span className=\"text-sm text-text-darker font-medium\">\n                          {ccpairinfo.name}\n                        </span>\n                      </Link>\n                    ))}\n                  </div>\n                </div>\n              </TooltipContent>\n            </Tooltip>\n          </TooltipProvider>\n        )}\n        <Button type=\"submit\">{isUpdate ? \"Update\" : \"Create\"}</Button>\n        <Button prominence=\"secondary\" onClick={() => router.back()}>\n          Cancel\n        </Button>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/channels/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { use } from \"react\";\nimport { SlackChannelConfigCreationForm } from \"@/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SvgSlack } from \"@opal/icons\";\nimport { useSlackChannelConfigs } from \"@/app/admin/bots/[bot-id]/hooks\";\nimport { useDocumentSets } from \"@/app/admin/documents/sets/hooks\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { useStandardAnswerCategories } from \"@/app/ee/admin/standard-answer/hooks\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport type { StandardAnswerCategoryResponse } from \"@/components/standardAnswers/getStandardAnswerCategoriesIfEE\";\n\nfunction EditSlackChannelConfigContent({ id }: { id: string }) {\n  const isPaidEnterprise = usePaidEnterpriseFeaturesEnabled();\n\n  const {\n    data: slackChannelConfigs,\n    isLoading: isChannelsLoading,\n    error: channelsError,\n  } = useSlackChannelConfigs();\n\n  const {\n    data: documentSets,\n    isLoading: isDocSetsLoading,\n    error: docSetsError,\n  } = useDocumentSets();\n\n  const {\n    agents,\n    isLoading: isAgentsLoading,\n    error: agentsError,\n  } = useAgents();\n\n  const {\n    data: standardAnswerCategories,\n    isLoading: isStdAnswerLoading,\n    error: stdAnswerError,\n  } = useStandardAnswerCategories();\n\n  const isLoading =\n    isChannelsLoading ||\n    isDocSetsLoading ||\n    isAgentsLoading ||\n    (isPaidEnterprise && isStdAnswerLoading);\n\n  const slackChannelConfig = slackChannelConfigs?.find(\n    (config) => config.id === Number(id)\n  );\n\n  const title = slackChannelConfig?.is_default\n    ? \"Edit Default Slack Config\"\n    : \"Edit Slack Channel Config\";\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgSlack}\n        title={title}\n        separator\n        backButton\n      />\n      <SettingsLayouts.Body>\n        {isLoading ? (\n          <SimpleLoader />\n        ) : channelsError || !slackChannelConfigs ? (\n          <ErrorCallout\n            errorTitle=\"Something went wrong :(\"\n            errorMsg={`Failed to fetch Slack Channels - ${\n              channelsError?.message ?? \"unknown error\"\n            }`}\n          />\n        ) : !slackChannelConfig ? (\n          <ErrorCallout\n            errorTitle=\"Something went wrong :(\"\n            errorMsg={`Did not find Slack Channel config with ID: ${id}`}\n          />\n        ) : docSetsError || !documentSets ? (\n          <ErrorCallout\n            errorTitle=\"Something went wrong :(\"\n            errorMsg={`Failed to fetch document sets - ${\n              docSetsError?.message ?? \"unknown error\"\n            }`}\n          />\n        ) : agentsError ? (\n          <ErrorCallout\n            errorTitle=\"Something went wrong :(\"\n            errorMsg={`Failed to fetch agents - ${\n              agentsError?.message ?? \"unknown error\"\n            }`}\n          />\n        ) : (\n          <SlackChannelConfigCreationForm\n            slack_bot_id={slackChannelConfig.slack_bot_id}\n            documentSets={documentSets}\n            personas={agents}\n            standardAnswerCategoryResponse={\n              isPaidEnterprise\n                ? {\n                    paidEnterpriseFeaturesEnabled: true,\n                    categories: standardAnswerCategories ?? [],\n                    ...(stdAnswerError\n                      ? { error: { message: String(stdAnswerError) } }\n                      : {}),\n                  }\n                : { paidEnterpriseFeaturesEnabled: false }\n            }\n            existingSlackChannelConfig={slackChannelConfig}\n          />\n        )}\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n\nexport default function Page(props: { params: Promise<{ id: string }> }) {\n  const params = use(props.params);\n\n  return <EditSlackChannelConfigContent id={params.id} />;\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/channels/new/page.tsx",
    "content": "\"use client\";\n\nimport { use, useEffect } from \"react\";\nimport { SlackChannelConfigCreationForm } from \"@/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SvgSlack } from \"@opal/icons\";\nimport { useDocumentSets } from \"@/app/admin/documents/sets/hooks\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { useStandardAnswerCategories } from \"@/app/ee/admin/standard-answer/hooks\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport type { StandardAnswerCategoryResponse } from \"@/components/standardAnswers/getStandardAnswerCategoriesIfEE\";\nimport { useRouter } from \"next/navigation\";\n\nfunction NewChannelConfigContent({ slackBotId }: { slackBotId: number }) {\n  const isPaidEnterprise = usePaidEnterpriseFeaturesEnabled();\n\n  const {\n    data: documentSets,\n    isLoading: isDocSetsLoading,\n    error: docSetsError,\n  } = useDocumentSets();\n\n  const {\n    agents,\n    isLoading: isAgentsLoading,\n    error: agentsError,\n  } = useAgents();\n\n  const {\n    data: standardAnswerCategories,\n    isLoading: isStdAnswerLoading,\n    error: stdAnswerError,\n  } = useStandardAnswerCategories();\n\n  if (\n    isDocSetsLoading ||\n    isAgentsLoading ||\n    (isPaidEnterprise && isStdAnswerLoading)\n  ) {\n    return <SimpleLoader />;\n  }\n\n  if (docSetsError || !documentSets) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch document sets - ${\n          docSetsError?.message ?? \"unknown error\"\n        }`}\n      />\n    );\n  }\n\n  if (agentsError) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch agents - ${\n          agentsError?.message ?? \"unknown error\"\n        }`}\n      />\n    );\n  }\n\n  const standardAnswerCategoryResponse: StandardAnswerCategoryResponse =\n    isPaidEnterprise\n      ? {\n          paidEnterpriseFeaturesEnabled: true,\n          categories: standardAnswerCategories ?? [],\n          ...(stdAnswerError\n            ? { error: { message: String(stdAnswerError) } }\n            : {}),\n        }\n      : { paidEnterpriseFeaturesEnabled: false };\n\n  return (\n    <SlackChannelConfigCreationForm\n      slack_bot_id={slackBotId}\n      documentSets={documentSets}\n      personas={agents}\n      standardAnswerCategoryResponse={standardAnswerCategoryResponse}\n    />\n  );\n}\n\nexport default function Page(props: { params: Promise<{ \"bot-id\": string }> }) {\n  const unwrappedParams = use(props.params);\n  const router = useRouter();\n\n  const slack_bot_id_raw = unwrappedParams?.[\"bot-id\"] || null;\n  const slack_bot_id = slack_bot_id_raw\n    ? parseInt(slack_bot_id_raw as string, 10)\n    : null;\n\n  useEffect(() => {\n    if (!slack_bot_id || isNaN(slack_bot_id)) {\n      router.replace(\"/admin/bots\");\n    }\n  }, [slack_bot_id, router]);\n\n  if (!slack_bot_id || isNaN(slack_bot_id)) {\n    return null;\n  }\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgSlack}\n        title=\"Configure OnyxBot for Slack Channel\"\n        separator\n        backButton\n      />\n      <SettingsLayouts.Body>\n        <NewChannelConfigContent slackBotId={slack_bot_id} />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/hooks.ts",
    "content": "import { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SlackBot, SlackChannelConfig } from \"@/lib/types\";\nimport useSWR, { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport const useSlackChannelConfigs = () => {\n  const swrResponse = useSWR<SlackChannelConfig[]>(\n    SWR_KEYS.slackChannels,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshSlackChannelConfigs: () => mutate(SWR_KEYS.slackChannels),\n  };\n};\n\nexport const useSlackBots = () => {\n  const swrResponse = useSWR<SlackBot[]>(\n    SWR_KEYS.slackBots,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshSlackBots: () => mutate(SWR_KEYS.slackBots),\n  };\n};\n\nexport const useSlackBot = (botId: number) => {\n  const swrResponse = useSWR<SlackBot>(\n    SWR_KEYS.slackBot(botId),\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshSlackBot: () => mutate(SWR_KEYS.slackBot(botId)),\n  };\n};\n\nexport const useSlackChannelConfigsByBot = (botId: number) => {\n  const swrResponse = useSWR<SlackChannelConfig[]>(\n    SWR_KEYS.slackBotConfig(botId),\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshSlackChannelConfigs: () => mutate(SWR_KEYS.slackBotConfig(botId)),\n  };\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/lib.ts",
    "content": "import { SlackBotResponseType } from \"@/lib/types\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\ninterface SlackChannelConfigCreationRequest {\n  slack_bot_id: number;\n  document_sets: number[];\n  persona_id: number | null;\n  enable_auto_filters: boolean;\n  channel_name: string;\n  answer_validity_check_enabled: boolean;\n  questionmark_prefilter_enabled: boolean;\n  respond_tag_only: boolean;\n  is_ephemeral: boolean;\n  respond_to_bots: boolean;\n  show_continue_in_web_ui: boolean;\n  respond_member_group_list: string[];\n  follow_up_tags?: string[];\n  usePersona: boolean;\n  response_type: SlackBotResponseType;\n  standard_answer_categories: number[];\n  disabled: boolean;\n}\n\nconst buildFiltersFromCreationRequest = (\n  creationRequest: SlackChannelConfigCreationRequest\n): string[] => {\n  const answerFilters = [] as string[];\n  if (creationRequest.answer_validity_check_enabled) {\n    answerFilters.push(\"well_answered_postfilter\");\n  }\n  if (creationRequest.questionmark_prefilter_enabled) {\n    answerFilters.push(\"questionmark_prefilter\");\n  }\n  return answerFilters;\n};\n\nconst buildRequestBodyFromCreationRequest = (\n  creationRequest: SlackChannelConfigCreationRequest\n) => {\n  return JSON.stringify({\n    slack_bot_id: creationRequest.slack_bot_id,\n    channel_name: creationRequest.channel_name,\n    respond_tag_only: creationRequest.respond_tag_only,\n    respond_to_bots: creationRequest.respond_to_bots,\n    is_ephemeral: creationRequest.is_ephemeral,\n    show_continue_in_web_ui: creationRequest.show_continue_in_web_ui,\n    enable_auto_filters: creationRequest.enable_auto_filters,\n    respond_member_group_list: creationRequest.respond_member_group_list,\n    answer_filters: buildFiltersFromCreationRequest(creationRequest),\n    follow_up_tags: creationRequest.follow_up_tags?.filter((tag) => tag !== \"\"),\n    ...(creationRequest.usePersona\n      ? { persona_id: creationRequest.persona_id }\n      : { document_sets: creationRequest.document_sets }),\n    response_type: creationRequest.response_type,\n    standard_answer_categories: creationRequest.standard_answer_categories,\n    disabled: creationRequest.disabled,\n  });\n};\n\nexport const createSlackChannelConfig = async (\n  creationRequest: SlackChannelConfigCreationRequest\n) => {\n  return fetch(\"/api/manage/admin/slack-app/channel\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromCreationRequest(creationRequest),\n  });\n};\n\nexport const updateSlackChannelConfig = async (\n  id: number,\n  creationRequest: SlackChannelConfigCreationRequest\n) => {\n  return fetch(`/api/manage/admin/slack-app/channel/${id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromCreationRequest(creationRequest),\n  });\n};\n\nexport const deleteSlackChannelConfig = async (id: number) => {\n  return fetch(`/api/manage/admin/slack-app/channel/${id}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n};\n\nexport function isPersonaASlackBotPersona(persona: Persona) {\n  return persona.name.startsWith(\"__slack_bot_persona__\");\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/[bot-id]/page.tsx",
    "content": "\"use client\";\n\nimport { use } from \"react\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport SlackChannelConfigsTable from \"./SlackChannelConfigsTable\";\nimport { useSlackBot, useSlackChannelConfigsByBot } from \"./hooks\";\nimport { ExistingSlackBotForm } from \"../SlackBotUpdateForm\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SvgSlack } from \"@opal/icons\";\nimport { getErrorMsg } from \"@/lib/error\";\n\nfunction SlackBotEditContent({ botId }: { botId: string }) {\n  const {\n    data: slackBot,\n    isLoading: isSlackBotLoading,\n    error: slackBotError,\n    refreshSlackBot,\n  } = useSlackBot(Number(botId));\n\n  const {\n    data: slackChannelConfigs,\n    isLoading: isSlackChannelConfigsLoading,\n    error: slackChannelConfigsError,\n    refreshSlackChannelConfigs,\n  } = useSlackChannelConfigsByBot(Number(botId));\n\n  if (isSlackBotLoading || isSlackChannelConfigsLoading) {\n    return <SimpleLoader />;\n  }\n\n  if (slackBotError || !slackBot) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch Slack Bot ${botId}: ${getErrorMsg(\n          slackBotError\n        )}`}\n      />\n    );\n  }\n\n  if (slackChannelConfigsError || !slackChannelConfigs) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch Slack Bot ${botId}: ${getErrorMsg(\n          slackChannelConfigsError\n        )}`}\n      />\n    );\n  }\n\n  return (\n    <>\n      <ExistingSlackBotForm\n        existingSlackBot={slackBot}\n        refreshSlackBot={refreshSlackBot}\n      />\n\n      <div className=\"mt-8\">\n        <SlackChannelConfigsTable\n          slackBotId={slackBot.id}\n          slackChannelConfigs={slackChannelConfigs}\n          refresh={refreshSlackChannelConfigs}\n        />\n      </div>\n    </>\n  );\n}\n\nexport default function Page({\n  params,\n}: {\n  params: Promise<{ \"bot-id\": string }>;\n}) {\n  const unwrappedParams = use(params);\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgSlack}\n        title=\"Edit Slack Bot\"\n        backButton\n        separator\n      />\n      <SettingsLayouts.Body>\n        <SlackBotEditContent botId={unwrappedParams[\"bot-id\"]} />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/new/lib.ts",
    "content": "export interface SlackBotCreationRequest {\n  name: string;\n  enabled: boolean;\n\n  bot_token: string;\n  app_token: string;\n  user_token?: string;\n}\n\nconst buildRequestBodyFromCreationRequest = (\n  creationRequest: SlackBotCreationRequest\n): string => {\n  return JSON.stringify({\n    name: creationRequest.name,\n    enabled: creationRequest.enabled,\n    bot_token: creationRequest.bot_token,\n    app_token: creationRequest.app_token,\n    user_token: creationRequest.user_token,\n  });\n};\n\nexport const createSlackBot = async (\n  creationRequest: SlackBotCreationRequest\n) => {\n  return fetch(\"/api/manage/admin/slack-app/bots\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromCreationRequest(creationRequest),\n  });\n};\n\nexport const updateSlackBot = async (\n  id: number,\n  creationRequest: SlackBotCreationRequest\n) => {\n  return fetch(`/api/manage/admin/slack-app/bots/${id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromCreationRequest(creationRequest),\n  });\n};\n\nexport const deleteSlackBot = async (id: number) => {\n  return fetch(`/api/manage/admin/slack-app/bots/${id}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n};\n"
  },
  {
    "path": "web/src/app/admin/bots/new/page.tsx",
    "content": "\"use client\";\n\nimport { NewSlackBotForm } from \"../SlackBotCreationForm\";\n\nexport default function Page() {\n  return <NewSlackBotForm />;\n}\n"
  },
  {
    "path": "web/src/app/admin/bots/page.tsx",
    "content": "\"use client\";\n\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { InstantSSRAutoRefresh } from \"@/components/SSRAutoRefresh\";\nimport { SlackBotTable } from \"./SlackBotTable\";\nimport { useSlackBots } from \"./[bot-id]/hooks\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\n\nconst route = ADMIN_ROUTES.SLACK_BOTS;\n\nfunction Main() {\n  const {\n    data: slackBots,\n    isLoading: isSlackBotsLoading,\n    error: slackBotsError,\n  } = useSlackBots();\n\n  if (isSlackBotsLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (slackBotsError || !slackBots) {\n    const errorMsg =\n      slackBotsError?.info?.message ||\n      slackBotsError?.info?.detail ||\n      \"An unknown error occurred\";\n\n    return (\n      <ErrorCallout errorTitle=\"Error loading apps\" errorMsg={`${errorMsg}`} />\n    );\n  }\n\n  return (\n    <div className=\"mb-8\">\n      <p className=\"mb-2 text-sm text-muted-foreground\">\n        Setup Slack bots that connect to Onyx. Once setup, you will be able to\n        ask questions to Onyx directly from Slack. Additionally, you can:\n      </p>\n\n      <div className=\"mb-2\">\n        <ul className=\"list-disc mt-2 ml-4 text-sm text-muted-foreground\">\n          <li>\n            Setup OnyxBot to automatically answer questions in certain channels.\n          </li>\n          <li>\n            Choose which document sets OnyxBot should answer from, depending on\n            the channel the question is being asked.\n          </li>\n          <li>\n            Directly message OnyxBot to search just as you would in the web UI.\n          </li>\n        </ul>\n      </div>\n\n      <p className=\"mb-6 text-sm text-muted-foreground\">\n        Follow the{\" \"}\n        <a\n          className=\"text-blue-500 hover:underline\"\n          href={`${DOCS_ADMINS_PATH}/getting_started/slack_bot_setup`}\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n        >\n          guide{\" \"}\n        </a>\n        found in the Onyx documentation to get started!\n      </p>\n\n      <CreateButton href=\"/admin/bots/new\">New Slack Bot</CreateButton>\n\n      <SlackBotTable slackBots={slackBots} />\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <InstantSSRAutoRefresh />\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/chat-preferences/page.tsx",
    "content": "\"use client\";\n\nimport ChatPreferencesPage from \"@/refresh-pages/admin/ChatPreferencesPage\";\n\nexport default function Page() {\n  return <ChatPreferencesPage />;\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/code-interpreter/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/CodeInterpreterPage\";\n"
  },
  {
    "path": "web/src/app/admin/configuration/document-processing/page.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { Button } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgLock } from \"@opal/icons\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_PROCESSING;\n\nfunction Main() {\n  const {\n    data: isApiKeySet,\n    error,\n    mutate,\n    isLoading,\n  } = useSWR<{\n    unstructured_api_key: string | null;\n  }>(SWR_KEYS.unstructuredApiKeySet, (url: string) =>\n    fetch(url).then((res) => res.json())\n  );\n\n  const [apiKey, setApiKey] = useState(\"\");\n\n  const handleSave = async () => {\n    try {\n      await fetch(\n        `/api/search-settings/upsert-unstructured-api-key?unstructured_api_key=${apiKey}`,\n        {\n          method: \"PUT\",\n        }\n      );\n    } catch (error) {\n      console.error(\"Failed to save API key:\", error);\n    }\n    mutate();\n  };\n\n  const handleDelete = async () => {\n    try {\n      await fetch(\"/api/search-settings/delete-unstructured-api-key\", {\n        method: \"DELETE\",\n      });\n      setApiKey(\"\");\n    } catch (error) {\n      console.error(\"Failed to delete API key:\", error);\n    }\n    mutate();\n  };\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n  return (\n    <div className=\"pb-36\">\n      <div className=\"w-full max-w-2xl\">\n        <CardSection className=\"flex flex-col gap-2\">\n          <Text\n            as=\"p\"\n            headingH3\n            text05\n            className=\"border-b border-border-01 pb-2\"\n          >\n            Process with Unstructured API\n          </Text>\n\n          <div className=\"flex flex-col gap-2\">\n            <Text as=\"p\" mainContentBody text04 className=\"leading-relaxed\">\n              Unstructured extracts and transforms complex data from formats\n              like .pdf, .docx, .png, .pptx, etc. into clean text for Onyx to\n              ingest. Provide an API key to enable Unstructured document\n              processing.\n            </Text>\n            <Text as=\"p\" mainContentMuted text03>\n              <span className=\"font-main-ui-action text-text-03\">Note:</span>{\" \"}\n              this will send documents to Unstructured servers for processing.\n            </Text>\n            <Text as=\"p\" mainContentBody text04 className=\"leading-relaxed\">\n              Learn more about Unstructured{\" \"}\n              <a\n                href=\"https://docs.unstructured.io/welcome\"\n                target=\"_blank\"\n                rel=\"noopener noreferrer\"\n                className=\"text-action-link-05 underline-offset-4 hover:underline\"\n              >\n                here\n              </a>\n              .\n            </Text>\n            <div className=\"pt-1.5\">\n              {isApiKeySet ? (\n                <div\n                  className={cn(\n                    \"flex\",\n                    \"items-center\",\n                    \"gap-0.5\",\n                    \"rounded-08\",\n                    \"border\",\n                    \"border-border-01\",\n                    \"bg-background-neutral-01\",\n                    \"px-2\",\n                    \"py-1.5\"\n                  )}\n                >\n                  <Text\n                    as=\"p\"\n                    mainUiMuted\n                    text03\n                    className=\"flex-1 tracking-[0.3em] text-text-03\"\n                  >\n                    ••••••••••••••••\n                  </Text>\n                  <SvgLock className=\"h-4 w-4 stroke-text-03\" aria-hidden />\n                </div>\n              ) : (\n                <InputTypeIn\n                  placeholder=\"Enter API Key\"\n                  value={apiKey}\n                  onChange={(e) => setApiKey(e.target.value)}\n                />\n              )}\n            </div>\n            <div className=\"flex flex-col gap-2 desktop:flex-row desktop:items-center desktop:gap-2\">\n              {isApiKeySet ? (\n                <>\n                  <Button variant=\"danger\" onClick={handleDelete}>\n                    Delete API Key\n                  </Button>\n                  <Text as=\"p\" mainContentBody text04 className=\"desktop:mt-0\">\n                    Delete the current API key before updating.\n                  </Text>\n                </>\n              ) : (\n                <Button variant=\"action\" onClick={handleSave}>\n                  Save API Key\n                </Button>\n              )}\n            </div>\n          </div>\n        </CardSection>\n      </div>\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/image-generation/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/ImageGenerationPage\";\n"
  },
  {
    "path": "web/src/app/admin/configuration/llm/ModelConfigurationField.tsx",
    "content": "\"use client\";\n\nimport { ArrayHelpers, FieldArray, FormikProps, useField } from \"formik\";\nimport { ModelConfiguration } from \"@/interfaces/llm\";\nimport { ManualErrorMessage, TextFormField } from \"@/components/Field\";\nimport { useEffect, useState } from \"react\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { Button } from \"@opal/components\";\nimport { SvgX } from \"@opal/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nfunction ModelConfigurationRow({\n  name,\n  index,\n  arrayHelpers,\n  formikProps,\n  setError,\n}: {\n  name: string;\n  index: number;\n  arrayHelpers: ArrayHelpers;\n  formikProps: FormikProps<{ model_configurations: ModelConfiguration[] }>;\n  setError: (value: string | null) => void;\n}) {\n  const [, input] = useField(`${name}[${index}]`);\n  useEffect(() => {\n    if (!input.touched) return;\n    setError((input.error as { name: string } | undefined)?.name ?? null);\n  }, [input.touched, input.error]);\n\n  return (\n    <div key={index} className=\"flex flex-row w-full gap-4\">\n      <div\n        className={`flex flex-[2] ${\n          input.touched && input.error ? \"border-2 border-error rounded-lg\" : \"\"\n        }`}\n      >\n        <TextFormField\n          name={`${name}[${index}].name`}\n          label=\"\"\n          placeholder={`model-name-${index + 1}`}\n          removeLabel\n          hideError\n        />\n      </div>\n      <div className=\"flex flex-[1]\">\n        <TextFormField\n          name={`${name}[${index}].max_input_tokens`}\n          label=\"\"\n          placeholder=\"Default\"\n          removeLabel\n          hideError\n          type=\"number\"\n          min={1}\n        />\n      </div>\n      <div className=\"flex flex-col justify-center\">\n        <Button\n          disabled={formikProps.values.model_configurations.length <= 1}\n          onClick={() => {\n            if (formikProps.values.model_configurations.length > 1) {\n              setError(null);\n              arrayHelpers.remove(index);\n            }\n          }}\n          icon={SvgX}\n          prominence=\"secondary\"\n        />\n      </div>\n    </div>\n  );\n}\n\nexport function ModelConfigurationField({\n  name,\n  formikProps,\n}: {\n  name: string;\n  formikProps: FormikProps<{ model_configurations: ModelConfiguration[] }>;\n}) {\n  const [errorMap, setErrorMap] = useState<{ [index: number]: string }>({});\n  const [finalError, setFinalError] = useState<string | undefined>();\n\n  return (\n    <div className=\"pb-5 flex flex-col w-full\">\n      <div className=\"flex flex-col\">\n        <Text as=\"p\" mainUiAction>\n          Model Configurations\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Add models and customize the number of input tokens that they accept.\n        </Text>\n      </div>\n      <FieldArray\n        name={name}\n        render={(arrayHelpers: ArrayHelpers) => (\n          <div className=\"flex flex-col\">\n            <div className=\"flex flex-col gap-4 py-4\">\n              <div className=\"flex\">\n                <Text as=\"p\" secondaryBody className=\"flex flex-[2]\">\n                  Model Name\n                </Text>\n                <Text as=\"p\" secondaryBody className=\"flex flex-[1]\">\n                  Max Input Tokens\n                </Text>\n                <div className=\"w-10\" />\n              </div>\n              {formikProps.values.model_configurations.map((_, index) => (\n                <ModelConfigurationRow\n                  key={index}\n                  name={name}\n                  formikProps={formikProps}\n                  arrayHelpers={arrayHelpers}\n                  index={index}\n                  setError={(message: string | null) => {\n                    const newErrors = { ...errorMap };\n                    if (message) {\n                      newErrors[index] = message;\n                    } else {\n                      delete newErrors[index];\n                      for (const key in newErrors) {\n                        const numKey = Number(key);\n                        if (numKey > index) {\n                          const errorValue = newErrors[key];\n                          if (errorValue !== undefined) {\n                            // Ensure the value is not undefined\n                            newErrors[numKey - 1] = errorValue;\n                            delete newErrors[numKey];\n                          }\n                        }\n                      }\n                    }\n                    setErrorMap(newErrors);\n                    setFinalError(\n                      Object.values(newErrors).filter((item) => item)[0]\n                    );\n                  }}\n                />\n              ))}\n            </div>\n            {finalError && (\n              <ManualErrorMessage>{finalError}</ManualErrorMessage>\n            )}\n            <div className=\"mt-3\">\n              <CreateButton\n                onClick={() => {\n                  arrayHelpers.push({\n                    name: \"\",\n                    is_visible: true,\n                    // Use null so Yup.number().nullable() accepts empty inputs\n                    max_input_tokens: null,\n                  });\n                }}\n              >\n                Add New\n              </CreateButton>\n            </div>\n          </div>\n        )}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/llm/ProviderIcon.tsx",
    "content": "import { defaultTailwindCSS, IconProps } from \"@/components/icons/icons\";\nimport { getProviderIcon } from \"@/app/admin/configuration/llm/utils\";\n\nexport interface ProviderIconProps extends IconProps {\n  provider: string;\n  modelName?: string;\n}\n\nexport const ProviderIcon = ({\n  provider,\n  modelName,\n  size = 16,\n  className = defaultTailwindCSS,\n}: ProviderIconProps) => {\n  const Icon = getProviderIcon(provider, modelName);\n  return <Icon size={size} className={className} />;\n};\n"
  },
  {
    "path": "web/src/app/admin/configuration/llm/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/LLMConfigurationPage\";\n"
  },
  {
    "path": "web/src/app/admin/configuration/llm/utils.ts",
    "content": "import { JSX } from \"react\";\nimport {\n  AnthropicIcon,\n  AmazonIcon,\n  AzureIcon,\n  CPUIcon,\n  MicrosoftIconSVG,\n  MistralIcon,\n  MetaIcon,\n  GeminiIcon,\n  IconProps,\n  DeepseekIcon,\n  OpenAISVG,\n  QwenIcon,\n  OllamaIcon,\n  LMStudioIcon,\n  LiteLLMIcon,\n  ZAIIcon,\n} from \"@/components/icons/icons\";\nimport {\n  OllamaModelResponse,\n  OpenRouterModelResponse,\n  BedrockModelResponse,\n  LMStudioModelResponse,\n  LiteLLMProxyModelResponse,\n  BifrostModelResponse,\n  ModelConfiguration,\n  LLMProviderName,\n  BedrockFetchParams,\n  OllamaFetchParams,\n  LMStudioFetchParams,\n  OpenRouterFetchParams,\n  LiteLLMProxyFetchParams,\n  BifrostFetchParams,\n} from \"@/interfaces/llm\";\nimport { SvgAws, SvgBifrost, SvgOpenrouter } from \"@opal/icons\";\n\n// Aggregator providers that host models from multiple vendors\nexport const AGGREGATOR_PROVIDERS = new Set([\n  \"bedrock\",\n  \"bedrock_converse\",\n  \"openrouter\",\n  \"ollama_chat\",\n  \"lm_studio\",\n  \"litellm_proxy\",\n  \"bifrost\",\n  \"vertex_ai\",\n]);\n\nexport const getProviderIcon = (\n  providerName: string,\n  modelName?: string\n): (({ size, className }: IconProps) => JSX.Element) => {\n  const iconMap: Record<\n    string,\n    ({ size, className }: IconProps) => JSX.Element\n  > = {\n    amazon: AmazonIcon,\n    phi: MicrosoftIconSVG,\n    mistral: MistralIcon,\n    ministral: MistralIcon,\n    llama: MetaIcon,\n    ollama_chat: OllamaIcon,\n    ollama: OllamaIcon,\n    lm_studio: LMStudioIcon,\n    gemini: GeminiIcon,\n    deepseek: DeepseekIcon,\n    claude: AnthropicIcon,\n    anthropic: AnthropicIcon,\n    openai: OpenAISVG,\n    // Azure OpenAI should display the Azure logo\n    azure: AzureIcon,\n    microsoft: MicrosoftIconSVG,\n    meta: MetaIcon,\n    google: GeminiIcon,\n    qwen: QwenIcon,\n    qwq: QwenIcon,\n    zai: ZAIIcon,\n    // Cloud providers - use AWS icon for Bedrock\n    bedrock: SvgAws,\n    bedrock_converse: SvgAws,\n    openrouter: SvgOpenrouter,\n    litellm_proxy: LiteLLMIcon,\n    bifrost: SvgBifrost,\n    vertex_ai: GeminiIcon,\n  };\n\n  const lowerProviderName = providerName.toLowerCase();\n\n  // For aggregator providers (bedrock, openrouter, vertex_ai), prioritize showing\n  // the vendor icon based on model name (e.g., show Claude icon for Bedrock Claude models)\n  if (AGGREGATOR_PROVIDERS.has(lowerProviderName) && modelName) {\n    const lowerModelName = modelName.toLowerCase();\n    for (const [key, icon] of Object.entries(iconMap)) {\n      if (lowerModelName.includes(key)) {\n        return icon;\n      }\n    }\n  }\n\n  // Check if provider name directly matches an icon\n  if (lowerProviderName in iconMap) {\n    const icon = iconMap[lowerProviderName];\n    if (icon) {\n      return icon;\n    }\n  }\n\n  // For non-aggregator providers, check if model name contains any of the keys\n  if (modelName) {\n    const lowerModelName = modelName.toLowerCase();\n    for (const [key, icon] of Object.entries(iconMap)) {\n      if (lowerModelName.includes(key)) {\n        return icon;\n      }\n    }\n  }\n\n  // Fallback to CPU icon if no matches\n  return CPUIcon;\n};\n\nexport const isAnthropic = (provider: string, modelName?: string) =>\n  provider === LLMProviderName.ANTHROPIC ||\n  !!modelName?.toLowerCase().includes(\"claude\");\n\n/**\n * Fetches Bedrock models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchBedrockModels = async (\n  params: BedrockFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  if (!params.aws_region_name) {\n    return { models: [], error: \"AWS region is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/bedrock/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        aws_region_name: params.aws_region_name,\n        aws_access_key_id: params.aws_access_key_id,\n        aws_secret_access_key: params.aws_secret_access_key,\n        aws_bearer_token_bedrock: params.aws_bearer_token_bedrock,\n        provider_name: params.provider_name,\n      }),\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch {\n        // ignore JSON parsing errors\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: BedrockModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.name,\n      display_name: modelData.display_name,\n      is_visible: false,\n      max_input_tokens: modelData.max_input_tokens,\n      supports_image_input: modelData.supports_image_input,\n      supports_reasoning: false,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches Ollama models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchOllamaModels = async (\n  params: OllamaFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  const apiBase = params.api_base;\n  if (!apiBase) {\n    return { models: [], error: \"API Base is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/ollama/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        api_base: apiBase,\n        provider_name: params.provider_name,\n      }),\n      signal: params.signal,\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch {\n        // ignore JSON parsing errors\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: OllamaModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.name,\n      display_name: modelData.display_name,\n      is_visible: true,\n      max_input_tokens: modelData.max_input_tokens,\n      supports_image_input: modelData.supports_image_input,\n      supports_reasoning: false,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches OpenRouter models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchOpenRouterModels = async (\n  params: OpenRouterFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  const apiBase = params.api_base;\n  const apiKey = params.api_key;\n  if (!apiBase) {\n    return { models: [], error: \"API Base is required\" };\n  }\n  if (!apiKey) {\n    return { models: [], error: \"API Key is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/openrouter/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        api_base: apiBase,\n        api_key: apiKey,\n        provider_name: params.provider_name,\n      }),\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch (jsonError) {\n        console.warn(\n          \"Failed to parse OpenRouter model fetch error response\",\n          jsonError\n        );\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: OpenRouterModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.name,\n      display_name: modelData.display_name,\n      is_visible: true,\n      max_input_tokens: modelData.max_input_tokens,\n      supports_image_input: modelData.supports_image_input,\n      supports_reasoning: false,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches LM Studio models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchLMStudioModels = async (\n  params: LMStudioFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  const apiBase = params.api_base;\n  if (!apiBase) {\n    return { models: [], error: \"API Base is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/lm-studio/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        api_base: apiBase,\n        api_key: params.api_key,\n        api_key_changed: params.api_key_changed ?? false,\n        provider_name: params.provider_name,\n      }),\n      signal: params.signal,\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch (jsonError) {\n        console.warn(\n          \"Failed to parse LM Studio model fetch error response\",\n          jsonError\n        );\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: LMStudioModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.name,\n      display_name: modelData.display_name,\n      is_visible: true,\n      max_input_tokens: modelData.max_input_tokens,\n      supports_image_input: modelData.supports_image_input,\n      supports_reasoning: modelData.supports_reasoning,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches Bifrost models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchBifrostModels = async (\n  params: BifrostFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  const apiBase = params.api_base;\n  if (!apiBase) {\n    return { models: [], error: \"API Base is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/bifrost/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        api_base: apiBase,\n        api_key: params.api_key,\n        provider_name: params.provider_name,\n      }),\n      signal: params.signal,\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch (jsonError) {\n        console.warn(\n          \"Failed to parse Bifrost model fetch error response\",\n          jsonError\n        );\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: BifrostModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.name,\n      display_name: modelData.display_name,\n      is_visible: true,\n      max_input_tokens: modelData.max_input_tokens,\n      supports_image_input: modelData.supports_image_input,\n      supports_reasoning: modelData.supports_reasoning,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches LiteLLM Proxy models directly without any form state dependencies.\n * Uses snake_case params to match API structure.\n */\nexport const fetchLiteLLMProxyModels = async (\n  params: LiteLLMProxyFetchParams\n): Promise<{ models: ModelConfiguration[]; error?: string }> => {\n  const apiBase = params.api_base;\n  const apiKey = params.api_key;\n  if (!apiBase) {\n    return { models: [], error: \"API Base is required\" };\n  }\n  if (!apiKey) {\n    return { models: [], error: \"API Key is required\" };\n  }\n\n  try {\n    const response = await fetch(\"/api/admin/llm/litellm/available-models\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        api_base: apiBase,\n        api_key: apiKey,\n        provider_name: params.provider_name,\n      }),\n      signal: params.signal,\n    });\n\n    if (!response.ok) {\n      let errorMessage = \"Failed to fetch models\";\n      try {\n        const errorData = await response.json();\n        errorMessage = errorData.detail || errorData.message || errorMessage;\n      } catch {\n        // ignore JSON parsing errors\n      }\n      return { models: [], error: errorMessage };\n    }\n\n    const data: LiteLLMProxyModelResponse[] = await response.json();\n    const models: ModelConfiguration[] = data.map((modelData) => ({\n      name: modelData.model_name,\n      display_name: modelData.model_name,\n      is_visible: true,\n      max_input_tokens: null,\n      supports_image_input: false,\n      supports_reasoning: false,\n    }));\n\n    return { models };\n  } catch (error) {\n    const errorMessage =\n      error instanceof Error ? error.message : \"Unknown error\";\n    return { models: [], error: errorMessage };\n  }\n};\n\n/**\n * Fetches models for a provider. Accepts form values directly and maps them\n * to the expected fetch params format internally.\n */\nexport const fetchModels = async (\n  providerName: string,\n  formValues: {\n    api_base?: string;\n    api_key?: string;\n    api_key_changed?: boolean;\n    name?: string;\n    custom_config?: Record<string, string>;\n    model_configurations?: ModelConfiguration[];\n  },\n  signal?: AbortSignal\n) => {\n  const customConfig = formValues.custom_config || {};\n\n  switch (providerName) {\n    case LLMProviderName.BEDROCK:\n      return fetchBedrockModels({\n        aws_region_name: customConfig.AWS_REGION_NAME || \"\",\n        aws_access_key_id: customConfig.AWS_ACCESS_KEY_ID,\n        aws_secret_access_key: customConfig.AWS_SECRET_ACCESS_KEY,\n        aws_bearer_token_bedrock: customConfig.AWS_BEARER_TOKEN_BEDROCK,\n        provider_name: formValues.name,\n      });\n    case LLMProviderName.OLLAMA_CHAT:\n      return fetchOllamaModels({\n        api_base: formValues.api_base,\n        provider_name: formValues.name,\n        signal,\n      });\n    case LLMProviderName.LM_STUDIO:\n      return fetchLMStudioModels({\n        api_base: formValues.api_base,\n        api_key: formValues.custom_config?.LM_STUDIO_API_KEY,\n        api_key_changed: formValues.api_key_changed ?? false,\n        provider_name: formValues.name,\n        signal,\n      });\n    case LLMProviderName.OPENROUTER:\n      return fetchOpenRouterModels({\n        api_base: formValues.api_base,\n        api_key: formValues.api_key,\n        provider_name: formValues.name,\n      });\n    case LLMProviderName.LITELLM_PROXY:\n      return fetchLiteLLMProxyModels({\n        api_base: formValues.api_base,\n        api_key: formValues.api_key,\n        provider_name: formValues.name,\n        signal,\n      });\n    case LLMProviderName.BIFROST:\n      return fetchBifrostModels({\n        api_base: formValues.api_base,\n        api_key: formValues.api_key,\n        provider_name: formValues.name,\n        signal,\n      });\n    default:\n      return { models: [], error: `Unknown provider: ${providerName}` };\n  }\n};\n\nexport function canProviderFetchModels(providerName?: string) {\n  if (!providerName) return false;\n  switch (providerName) {\n    case LLMProviderName.BEDROCK:\n    case LLMProviderName.OLLAMA_CHAT:\n    case LLMProviderName.LM_STUDIO:\n    case LLMProviderName.OPENROUTER:\n    case LLMProviderName.LITELLM_PROXY:\n    case LLMProviderName.BIFROST:\n      return true;\n    default:\n      return false;\n  }\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/search/UpgradingPage.tsx",
    "content": "import { ThreeDotsLoader } from \"@/components/Loading\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport {\n  ConnectorIndexingStatusLite,\n  ConnectorIndexingStatusLiteResponse,\n  FailedConnectorIndexingStatus,\n  ValidStatuses,\n} from \"@/lib/types\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Title from \"@/components/ui/title\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { useMemo, useState } from \"react\";\nimport useSWR, { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ReindexingProgressTable } from \"../../../../components/embedding/ReindexingProgressTable\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport {\n  CloudEmbeddingModel,\n  HostedEmbeddingModel,\n} from \"../../../../components/embedding/interfaces\";\nimport { Connector } from \"@/lib/connectors/connectors\";\nimport { FailedReIndexAttempts } from \"@/components/embedding/FailedReIndexAttempts\";\nimport { useConnectorIndexingStatusWithPagination } from \"@/lib/hooks\";\nimport { SvgX } from \"@opal/icons\";\nimport { ConnectorCredentialPairStatus } from \"@/app/admin/connector/[ccPairId]/types\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\n\nexport default function UpgradingPage({\n  futureEmbeddingModel,\n}: {\n  futureEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel;\n}) {\n  const [isCancelling, setIsCancelling] = useState<boolean>(false);\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  const { data: connectors, isLoading: isLoadingConnectors } = useSWR<\n    Connector<any>[]\n  >(vectorDbEnabled ? SWR_KEYS.connector : null, errorHandlingFetcher, {\n    refreshInterval: 5000,\n  });\n\n  const {\n    data: connectorIndexingStatuses,\n    isLoading: isLoadingOngoingReIndexingStatus,\n  } = useConnectorIndexingStatusWithPagination(\n    { secondary_index: true, get_all_connectors: true },\n    5000,\n    vectorDbEnabled\n  ) as {\n    data: ConnectorIndexingStatusLiteResponse[];\n    isLoading: boolean;\n  };\n\n  const { data: failedIndexingStatus } = useSWR<\n    FailedConnectorIndexingStatus[]\n  >(\n    vectorDbEnabled\n      ? \"/api/manage/admin/connector/failed-indexing-status?secondary_index=true\"\n      : null,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 }\n  );\n\n  const onCancel = async () => {\n    const response = await fetch(\"/api/search-settings/cancel-new-embedding\", {\n      method: \"POST\",\n    });\n    if (response.ok) {\n      mutate(SWR_KEYS.secondarySearchSettings);\n    } else {\n      alert(\n        `Failed to cancel embedding model update - ${await response.text()}`\n      );\n    }\n    setIsCancelling(false);\n  };\n  const statusOrder: Record<ValidStatuses, number> = useMemo(\n    () => ({\n      invalid: 0,\n      failed: 1,\n      canceled: 2,\n      completed_with_errors: 3,\n      not_started: 4,\n      in_progress: 5,\n      success: 6,\n    }),\n    []\n  );\n\n  const ongoingReIndexingStatus = useMemo(() => {\n    return connectorIndexingStatuses\n      .flatMap(\n        (status) => status.indexing_statuses as ConnectorIndexingStatusLite[]\n      )\n      .filter((status) => status.cc_pair_id !== undefined);\n  }, [connectorIndexingStatuses]);\n\n  const visibleReindexingStatus = useMemo(() => {\n    const statuses = ongoingReIndexingStatus || [];\n\n    if (futureEmbeddingModel.switchover_type === \"active_only\") {\n      return statuses.filter(\n        (status) =>\n          status.cc_pair_status !== ConnectorCredentialPairStatus.PAUSED\n      );\n    }\n\n    return statuses;\n  }, [futureEmbeddingModel.switchover_type, ongoingReIndexingStatus]);\n\n  const sortedReindexingProgress = useMemo(() => {\n    return [...(visibleReindexingStatus || [])].sort((a, b) => {\n      const statusComparison =\n        statusOrder[a.last_status || \"not_started\"] -\n        statusOrder[b.last_status || \"not_started\"];\n\n      if (statusComparison !== 0) {\n        return statusComparison;\n      }\n\n      return (a.cc_pair_id || 0) - (b.cc_pair_id || 0);\n    });\n  }, [visibleReindexingStatus, statusOrder]);\n\n  const hasVisibleReindexingProgress = sortedReindexingProgress.length > 0;\n\n  if (isLoadingConnectors || isLoadingOngoingReIndexingStatus) {\n    return <ThreeDotsLoader />;\n  }\n\n  return (\n    <>\n      {isCancelling && (\n        <Modal open onOpenChange={() => setIsCancelling(false)}>\n          <Modal.Content width=\"sm\" height=\"sm\">\n            <Modal.Header\n              icon={SvgX}\n              title=\"Cancel Embedding Model Switch\"\n              onClose={() => setIsCancelling(false)}\n            />\n            <Modal.Body>\n              <div>\n                Are you sure you want to cancel? Cancelling will revert to the\n                previous model and all progress will be lost.\n              </div>\n            </Modal.Body>\n            <Modal.Footer>\n              <OpalButton onClick={onCancel}>Confirm</OpalButton>\n              <OpalButton\n                prominence=\"secondary\"\n                onClick={() => setIsCancelling(false)}\n              >\n                Cancel\n              </OpalButton>\n            </Modal.Footer>\n          </Modal.Content>\n        </Modal>\n      )}\n\n      {futureEmbeddingModel && (\n        <div>\n          <Title className=\"mt-8\">Current Upgrade Status</Title>\n          <div className=\"mt-4\">\n            <div className=\"italic text-lg mb-2\">\n              Currently in the process of switching to:{\" \"}\n              {futureEmbeddingModel.model_name}\n            </div>\n\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Button\n              danger\n              className=\"mt-4\"\n              onClick={() => setIsCancelling(true)}\n            >\n              Cancel\n            </Button>\n\n            {connectors && connectors.length > 0 ? (\n              futureEmbeddingModel.switchover_type === \"instant\" ? (\n                <div className=\"mt-8\">\n                  <h3 className=\"text-lg font-semibold mb-2\">\n                    Switching Embedding Models\n                  </h3>\n                  <p className=\"mb-4 text-text-800\">\n                    You&apos;re currently switching embedding models, and\n                    you&apos;ve selected the instant switch option. The\n                    transition will complete shortly.\n                  </p>\n                  <p className=\"text-text-600\">\n                    The new model will be active soon.\n                  </p>\n                </div>\n              ) : (\n                <>\n                  {failedIndexingStatus && failedIndexingStatus.length > 0 && (\n                    <FailedReIndexAttempts\n                      failedIndexingStatuses={failedIndexingStatus}\n                    />\n                  )}\n\n                  <Spacer rem={1} />\n                  <Text as=\"p\">\n                    {futureEmbeddingModel.switchover_type === \"active_only\"\n                      ? markdown(\n                          \"The table below shows the re-indexing progress of active (non-paused) connectors. Once all active connectors have been re-indexed successfully, the new model will be used for all search queries. Paused connectors will continue to be indexed in the background but won't block the switchover. Until then, we will use the old model so that no downtime is necessary during this transition.\\nNote: User file re-indexing progress is not shown. You will see this page until all active connectors are re-indexed!\"\n                        )\n                      : markdown(\n                          \"The table below shows the re-indexing progress of all existing connectors. Once all connectors have been re-indexed successfully, the new model will be used for all search queries. Until then, we will use the old model so that no downtime is necessary during this transition.\\nNote: User file re-indexing progress is not shown. You will see this page until all user files are re-indexed!\"\n                        )}\n                  </Text>\n                  <Spacer rem={1} />\n\n                  {sortedReindexingProgress ? (\n                    <>\n                      {futureEmbeddingModel.switchover_type === \"active_only\" &&\n                        !hasVisibleReindexingProgress && (\n                          <>\n                            <Spacer rem={1} />\n                            <Text as=\"p\">\n                              All connectors are currently paused, so none are\n                              blocking the switchover. Paused connectors will\n                              keep re-indexing in the background.\n                            </Text>\n                          </>\n                        )}\n                      {hasVisibleReindexingProgress && (\n                        <ReindexingProgressTable\n                          reindexingProgress={sortedReindexingProgress}\n                        />\n                      )}\n                    </>\n                  ) : (\n                    <ErrorCallout errorTitle=\"Failed to fetch re-indexing progress\" />\n                  )}\n                </>\n              )\n            ) : (\n              <div className=\"mt-8 p-6 bg-background-100 border border-border-strong rounded-lg max-w-2xl\">\n                <h3 className=\"text-lg font-semibold mb-2\">\n                  Switching Embedding Models\n                </h3>\n                <p className=\"mb-4 text-text-800\">\n                  You&apos;re currently switching embedding models, but there\n                  are no connectors to reindex. This means the transition will\n                  be quick and seamless!\n                </p>\n                <p className=\"text-text-600\">\n                  The new model will be active soon.\n                </p>\n              </div>\n            )}\n          </div>\n        </div>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/search/page.tsx",
    "content": "\"use client\";\n\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\nimport { Button } from \"@opal/components\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ModelPreview } from \"@/components/embedding/ModelSelector\";\nimport {\n  HostedEmbeddingModel,\n  CloudEmbeddingModel,\n} from \"@/components/embedding/interfaces\";\nimport { SavedSearchSettings } from \"@/app/admin/embeddings/interfaces\";\nimport UpgradingPage from \"./UpgradingPage\";\nimport { useContext } from \"react\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { useToastFromQuery } from \"@/hooks/useToast\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.INDEX_SETTINGS;\n\nexport interface EmbeddingDetails {\n  api_key: string;\n  custom_config: any;\n  default_model_id?: number;\n  name: string;\n}\n\nfunction Main() {\n  const settings = useContext(SettingsContext);\n  useToastFromQuery({\n    \"search-settings\": {\n      message: `Changed search settings successfully`,\n      type: \"success\",\n    },\n  });\n  const {\n    data: currentEmeddingModel,\n    isLoading: isLoadingCurrentModel,\n    error: currentEmeddingModelError,\n  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(\n    SWR_KEYS.currentSearchSettings,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  const { data: searchSettings, isLoading: isLoadingSearchSettings } =\n    useSWR<SavedSearchSettings | null>(\n      SWR_KEYS.currentSearchSettings,\n      errorHandlingFetcher,\n      { refreshInterval: 5000 } // 5 seconds\n    );\n\n  const {\n    data: futureEmbeddingModel,\n    isLoading: isLoadingFutureModel,\n    error: futureEmeddingModelError,\n  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(\n    SWR_KEYS.secondarySearchSettings,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  if (\n    isLoadingCurrentModel ||\n    isLoadingFutureModel ||\n    isLoadingSearchSettings\n  ) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (\n    currentEmeddingModelError ||\n    !currentEmeddingModel ||\n    futureEmeddingModelError\n  ) {\n    return <ErrorCallout errorTitle=\"Failed to fetch embedding model status\" />;\n  }\n\n  return (\n    <div>\n      {!futureEmbeddingModel ? (\n        <>\n          {settings?.settings.needs_reindexing && (\n            <p className=\"max-w-3xl\">\n              Your search settings are currently out of date! We recommend\n              updating your search settings and re-indexing.\n            </p>\n          )}\n          <Title className=\"mb-6 mt-8 !text-2xl\">Embedding Model</Title>\n\n          {currentEmeddingModel ? (\n            <ModelPreview model={currentEmeddingModel} display showDetails />\n          ) : (\n            <Title className=\"mt-8 mb-4\">Choose your Embedding Model</Title>\n          )}\n\n          <Title className=\"mb-2 mt-8 !text-2xl\">Post-processing</Title>\n\n          <CardSection className=\"!mr-auto mt-8 !w-96 shadow-lg bg-background-tint-00 rounded-16\">\n            {searchSettings && (\n              <>\n                <div className=\"px-1 w-full rounded-lg\">\n                  <div className=\"space-y-4\">\n                    <div>\n                      <Text as=\"p\" font=\"main-ui-action\">\n                        Multipass Indexing\n                      </Text>\n                      <Text as=\"p\">\n                        {searchSettings.multipass_indexing\n                          ? \"Enabled\"\n                          : \"Disabled\"}\n                      </Text>\n                    </div>\n\n                    <div>\n                      <Text as=\"p\" font=\"main-ui-action\">\n                        Contextual RAG\n                      </Text>\n                      <Text as=\"p\">\n                        {searchSettings.enable_contextual_rag\n                          ? \"Enabled\"\n                          : \"Disabled\"}\n                      </Text>\n                    </div>\n                  </div>\n                </div>\n              </>\n            )}\n          </CardSection>\n\n          <div className=\"mt-4\">\n            <Button variant=\"action\" href=\"/admin/embeddings\">\n              Update Index Settings\n            </Button>\n          </div>\n        </>\n      ) : (\n        <UpgradingPage futureEmbeddingModel={futureEmbeddingModel} />\n      )}\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header title={route.title} icon={route.icon} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/voice/VoiceProviderSetupModal.tsx",
    "content": "\"use client\";\n\nimport { markdown } from \"@opal/utils\";\nimport Image from \"next/image\";\nimport { FunctionComponent, useState, useEffect } from \"react\";\nimport {\n  AzureIcon,\n  ElevenLabsIcon,\n  OpenAIIcon,\n} from \"@/components/icons/icons\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport { Vertical, Horizontal } from \"@/layouts/input-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { SvgArrowExchange, SvgOnyxLogo } from \"@opal/icons\";\nimport { Disabled } from \"@opal/core\";\nimport type { IconProps } from \"@opal/types\";\nimport { VoiceProviderView } from \"@/hooks/useVoiceProviders\";\nimport {\n  testVoiceProvider,\n  upsertVoiceProvider,\n  fetchVoicesByType,\n  fetchLLMProviders,\n} from \"@/lib/admin/voice/svc\";\n\ninterface VoiceOption {\n  value: string;\n  label: string;\n  description?: string;\n}\n\ninterface LLMProviderView {\n  id: number;\n  name: string;\n  provider: string;\n  api_key: string | null;\n}\n\ninterface ApiKeyOption {\n  value: string;\n  label: string;\n  description?: string;\n}\n\ninterface VoiceProviderSetupModalProps {\n  providerType: string;\n  existingProvider: VoiceProviderView | null;\n  mode: \"stt\" | \"tts\";\n  defaultModelId?: string | null;\n  onClose: () => void;\n  onSuccess: () => void;\n}\n\nconst PROVIDER_LABELS: Record<string, string> = {\n  openai: \"OpenAI\",\n  azure: \"Azure Speech Services\",\n  elevenlabs: \"ElevenLabs\",\n};\n\nconst PROVIDER_API_KEY_URLS: Record<string, string> = {\n  openai: \"https://platform.openai.com/api-keys\",\n  azure: \"https://portal.azure.com/\",\n  elevenlabs: \"https://elevenlabs.io/app/settings/api-keys\",\n};\n\nconst PROVIDER_LOGO_URLS: Record<string, string> = {\n  openai: \"/Openai.svg\",\n  azure: \"/Azure.png\",\n  elevenlabs: \"/ElevenLabs.svg\",\n};\n\nconst PROVIDER_DOCS_URLS: Record<string, string> = {\n  openai: \"https://platform.openai.com/docs/guides/text-to-speech\",\n  azure: \"https://learn.microsoft.com/en-us/azure/ai-services/speech-service/\",\n  elevenlabs: \"https://elevenlabs.io/docs\",\n};\n\nconst PROVIDER_VOICE_DOCS_URLS: Record<string, { url: string; label: string }> =\n  {\n    openai: {\n      url: \"https://platform.openai.com/docs/guides/text-to-speech#voice-options\",\n      label: \"OpenAI\",\n    },\n    azure: {\n      url: \"https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts\",\n      label: \"Azure\",\n    },\n    elevenlabs: {\n      url: \"https://elevenlabs.io/docs/voices/premade-voices\",\n      label: \"ElevenLabs\",\n    },\n  };\n\nconst OPENAI_STT_MODELS = [{ id: \"whisper-1\", name: \"Whisper v1\" }];\n\nconst OPENAI_TTS_MODELS = [\n  { id: \"tts-1\", name: \"TTS-1\" },\n  { id: \"tts-1-hd\", name: \"TTS-1 HD\" },\n];\n\n// Map model IDs from cards to actual API model IDs\nconst MODEL_ID_MAP: Record<string, string> = {\n  \"tts-1\": \"tts-1\",\n  \"tts-1-hd\": \"tts-1-hd\",\n  whisper: \"whisper-1\",\n};\n\ntype Phase = \"idle\" | \"validating\" | \"saving\";\ntype MessageState = {\n  kind: \"status\" | \"error\" | \"success\";\n  text: string;\n} | null;\n\nexport default function VoiceProviderSetupModal({\n  providerType,\n  existingProvider,\n  mode,\n  defaultModelId,\n  onClose,\n  onSuccess,\n}: VoiceProviderSetupModalProps) {\n  // Map the card model ID to the actual API model ID\n  // Prioritize defaultModelId (from the clicked card) over stored value\n  const initialTtsModel = defaultModelId\n    ? MODEL_ID_MAP[defaultModelId] ?? \"tts-1\"\n    : existingProvider?.tts_model ?? \"tts-1\";\n\n  const [apiKey, setApiKey] = useState(\"\");\n  const [apiKeyChanged, setApiKeyChanged] = useState(false);\n  const [targetUri, setTargetUri] = useState(\n    existingProvider?.target_uri ?? \"\"\n  );\n  const [selectedLlmProviderId, setSelectedLlmProviderId] = useState<\n    number | null\n  >(null);\n  const [sttModel, setSttModel] = useState(\n    existingProvider?.stt_model ?? \"whisper-1\"\n  );\n  const [ttsModel, setTtsModel] = useState(initialTtsModel);\n  const [defaultVoice, setDefaultVoice] = useState(\n    existingProvider?.default_voice ?? \"\"\n  );\n  const [phase, setPhase] = useState<Phase>(\"idle\");\n  const [message, setMessage] = useState<MessageState>(null);\n\n  // Dynamic voices fetched from backend\n  const [voiceOptions, setVoiceOptions] = useState<VoiceOption[]>([]);\n  const [isLoadingVoices, setIsLoadingVoices] = useState(false);\n\n  // Existing OpenAI LLM providers for API key reuse\n  const [existingApiKeyOptions, setExistingApiKeyOptions] = useState<\n    ApiKeyOption[]\n  >([]);\n  const [llmProviderMap, setLlmProviderMap] = useState<Map<string, number>>(\n    new Map()\n  );\n\n  // Fetch existing OpenAI LLM providers (for API key reuse)\n  useEffect(() => {\n    if (providerType !== \"openai\") return;\n\n    fetchLLMProviders()\n      .then((res) => res.json())\n      .then((data: { providers: LLMProviderView[] } | LLMProviderView[]) => {\n        const providers = Array.isArray(data) ? data : data.providers ?? [];\n        const openaiProviders = providers.filter(\n          (p) => p.provider === \"openai\" && p.api_key\n        );\n        const options: ApiKeyOption[] = openaiProviders.map((p) => ({\n          value: p.api_key!,\n          label: p.api_key!,\n          description: `Used for LLM provider **${p.name}**`,\n        }));\n        setExistingApiKeyOptions(options);\n\n        // Map masked API keys to provider IDs for lookup on selection\n        const providerMap = new Map<string, number>();\n        openaiProviders.forEach((p) => {\n          if (p.api_key) {\n            providerMap.set(p.api_key, p.id);\n          }\n        });\n        setLlmProviderMap(providerMap);\n      })\n      .catch(() => {\n        setExistingApiKeyOptions([]);\n      });\n  }, [providerType]);\n\n  // Fetch voices on mount (works without API key for ElevenLabs/OpenAI)\n  useEffect(() => {\n    setIsLoadingVoices(true);\n    fetchVoicesByType(providerType)\n      .then((res) => res.json())\n      .then((data: Array<{ id: string; name: string }>) => {\n        const options = data.map((v) => ({\n          value: v.id,\n          label: v.name,\n          description: v.id,\n        }));\n        setVoiceOptions(options);\n        // Set default voice to first option if not already set,\n        // or if current value doesn't exist in the new options\n        setDefaultVoice((prev) => {\n          if (!prev) return options[0]?.value ?? \"\";\n          const existsInOptions = options.some((opt) => opt.value === prev);\n          return existsInOptions ? prev : options[0]?.value ?? \"\";\n        });\n      })\n      .catch(() => {\n        setVoiceOptions([]);\n      })\n      .finally(() => {\n        setIsLoadingVoices(false);\n      });\n  }, [providerType]);\n\n  const isEditing = !!existingProvider;\n  const label = PROVIDER_LABELS[providerType] ?? providerType;\n  const isProcessing = phase !== \"idle\";\n  const hasNonEmptyApiKey = apiKey.trim().length > 0;\n  const shouldSendApiKey =\n    !selectedLlmProviderId && apiKeyChanged && hasNonEmptyApiKey;\n  const shouldUseStoredKey =\n    isEditing && !selectedLlmProviderId && !shouldSendApiKey;\n\n  const canConnect = (() => {\n    if (selectedLlmProviderId) return true;\n    if (!isEditing && !apiKey) return false;\n    if (providerType === \"azure\" && !isEditing && !targetUri) return false;\n    return true;\n  })();\n\n  // Logo arrangement component for the modal header\n  // No useMemo needed - providerType and label are stable props\n  const LogoArrangement: FunctionComponent<IconProps> = () => (\n    <div className=\"flex items-center gap-2\">\n      <div className=\"flex items-center justify-center size-7 shrink-0 overflow-clip\">\n        {providerType === \"openai\" ? (\n          <OpenAIIcon size={24} />\n        ) : providerType === \"azure\" ? (\n          <AzureIcon size={24} />\n        ) : providerType === \"elevenlabs\" ? (\n          <ElevenLabsIcon size={24} />\n        ) : (\n          <Image\n            src={PROVIDER_LOGO_URLS[providerType] ?? \"/Openai.svg\"}\n            alt={`${label} logo`}\n            width={24}\n            height={24}\n            className=\"object-contain\"\n          />\n        )}\n      </div>\n      <div className=\"flex items-center justify-center size-4 shrink-0\">\n        <SvgArrowExchange className=\"size-3 text-text-04\" />\n      </div>\n      <div className=\"flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip\">\n        <SvgOnyxLogo size={24} className=\"shrink-0\" />\n      </div>\n    </div>\n  );\n\n  const formFieldState: \"idle\" | \"error\" | \"success\" =\n    message?.kind === \"error\"\n      ? \"error\"\n      : message?.kind === \"success\"\n        ? \"success\"\n        : \"idle\";\n\n  const handleSubmit = async () => {\n    if (!canConnect) return;\n\n    setMessage(null);\n\n    try {\n      // Test the connection first (skip if reusing LLM provider key - validated on save)\n      if (!selectedLlmProviderId) {\n        setPhase(\"validating\");\n        setMessage({ kind: \"status\", text: \"Validating API key...\" });\n\n        const testResponse = await testVoiceProvider({\n          provider_type: providerType,\n          api_key: shouldSendApiKey ? apiKey : undefined,\n          target_uri: targetUri || undefined,\n          use_stored_key: shouldUseStoredKey,\n        });\n\n        if (!testResponse.ok) {\n          const data = await testResponse.json().catch(() => ({}));\n          const detail =\n            typeof data?.detail === \"string\"\n              ? data.detail\n              : \"Connection test failed\";\n          setPhase(\"idle\");\n          setMessage({ kind: \"error\", text: detail });\n          return;\n        }\n\n        setMessage({\n          kind: \"status\",\n          text: \"API key validated. Saving provider...\",\n        });\n      }\n\n      // Save the provider\n      setPhase(\"saving\");\n      const response = await upsertVoiceProvider({\n        id: existingProvider?.id,\n        name: label,\n        provider_type: providerType,\n        api_key: shouldSendApiKey ? apiKey : undefined,\n        api_key_changed: shouldSendApiKey,\n        target_uri: targetUri || undefined,\n        llm_provider_id: selectedLlmProviderId,\n        stt_model: sttModel,\n        tts_model: ttsModel,\n        default_voice: defaultVoice,\n        activate_stt: mode === \"stt\",\n        activate_tts: mode === \"tts\",\n      });\n\n      if (response.ok) {\n        onSuccess();\n      } else {\n        const data = await response.json().catch(() => ({}));\n        const detail =\n          typeof data?.detail === \"string\"\n            ? data.detail\n            : \"Failed to save provider\";\n        setPhase(\"idle\");\n        setMessage({ kind: \"error\", text: detail });\n      }\n    } catch {\n      setPhase(\"idle\");\n      setMessage({ kind: \"error\", text: \"Failed to save provider\" });\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={(isOpen) => !isOpen && onClose()}>\n      <Modal.Content width=\"sm\">\n        <Modal.Header\n          icon={LogoArrangement}\n          title={isEditing ? `Edit ${label}` : `Set up ${label}`}\n          description={`Connect to ${label} and set up your voice models.`}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          <Section gap={1} alignItems=\"stretch\">\n            <FormField name=\"api_key\" state={formFieldState} className=\"w-full\">\n              <FormField.Label>API Key</FormField.Label>\n              <FormField.Description>\n                {isEditing ? (\n                  \"Leave blank to keep existing key\"\n                ) : (\n                  <>\n                    Paste your{\" \"}\n                    <a\n                      href={PROVIDER_API_KEY_URLS[providerType]}\n                      target=\"_blank\"\n                      rel=\"noopener noreferrer\"\n                      className=\"underline\"\n                    >\n                      API key\n                    </a>{\" \"}\n                    from {label} to access your models.\n                  </>\n                )}\n              </FormField.Description>\n              <FormField.Control asChild>\n                {providerType === \"openai\" &&\n                existingApiKeyOptions.length > 0 ? (\n                  <InputComboBox\n                    placeholder={isEditing ? \"••••••••\" : \"Enter API key\"}\n                    value={apiKey}\n                    onChange={(e) => {\n                      setApiKey(e.target.value);\n                      setApiKeyChanged(true);\n                      setSelectedLlmProviderId(null);\n                      setMessage(null);\n                    }}\n                    onValueChange={(value) => {\n                      setApiKey(value);\n                      // Check if this is an existing key\n                      const llmProviderId = llmProviderMap.get(value);\n                      if (llmProviderId) {\n                        setSelectedLlmProviderId(llmProviderId);\n                        setApiKeyChanged(false);\n                      } else {\n                        setSelectedLlmProviderId(null);\n                        setApiKeyChanged(true);\n                      }\n                      setMessage(null);\n                    }}\n                    options={existingApiKeyOptions}\n                    separatorLabel=\"Reuse OpenAI API Keys\"\n                    strict={false}\n                    showAddPrefix\n                  />\n                ) : (\n                  <PasswordInputTypeIn\n                    placeholder={isEditing ? \"••••••••\" : \"Enter API key\"}\n                    value={apiKey}\n                    onChange={(e) => {\n                      setApiKey(e.target.value);\n                      setApiKeyChanged(true);\n                      setMessage(null);\n                    }}\n                    showClearButton={false}\n                  />\n                )}\n              </FormField.Control>\n              {isProcessing ? (\n                <FormField.APIMessage\n                  state=\"loading\"\n                  messages={{\n                    loading: message?.text ?? \"Validating API key...\",\n                  }}\n                />\n              ) : message ? (\n                <FormField.Message\n                  messages={{\n                    idle: \"\",\n                    error: message.kind === \"error\" ? message.text : \"\",\n                    success: message.kind === \"success\" ? message.text : \"\",\n                  }}\n                />\n              ) : null}\n            </FormField>\n\n            {providerType === \"azure\" && (\n              <Vertical\n                title=\"Target URI\"\n                subDescription={markdown(\n                  \"Paste the endpoint shown in [Azure Portal (Keys and Endpoint)](https://portal.azure.com/). Onyx extracts the speech region from this URL. Examples: https://westus.api.cognitive.microsoft.com/ or https://westus.tts.speech.microsoft.com/.\"\n                )}\n                nonInteractive\n              >\n                <InputTypeIn\n                  placeholder={\n                    isEditing\n                      ? \"Leave blank to keep existing\"\n                      : \"https://<region>.api.cognitive.microsoft.com/\"\n                  }\n                  value={targetUri}\n                  onChange={(e) => setTargetUri(e.target.value)}\n                />\n              </Vertical>\n            )}\n\n            {providerType === \"openai\" && mode === \"stt\" && (\n              <Horizontal title=\"STT Model\" center nonInteractive>\n                <InputSelect value={sttModel} onValueChange={setSttModel}>\n                  <InputSelect.Trigger />\n                  <InputSelect.Content>\n                    {OPENAI_STT_MODELS.map((model) => (\n                      <InputSelect.Item key={model.id} value={model.id}>\n                        {model.name}\n                      </InputSelect.Item>\n                    ))}\n                  </InputSelect.Content>\n                </InputSelect>\n              </Horizontal>\n            )}\n\n            {providerType === \"openai\" && mode === \"tts\" && (\n              <Vertical\n                title=\"Default Model\"\n                subDescription=\"This model will be used by Onyx by default for text-to-speech.\"\n                nonInteractive\n              >\n                <InputSelect value={ttsModel} onValueChange={setTtsModel}>\n                  <InputSelect.Trigger />\n                  <InputSelect.Content>\n                    {OPENAI_TTS_MODELS.map((model) => (\n                      <InputSelect.Item key={model.id} value={model.id}>\n                        {model.name}\n                      </InputSelect.Item>\n                    ))}\n                  </InputSelect.Content>\n                </InputSelect>\n              </Vertical>\n            )}\n\n            {mode === \"tts\" && (\n              <Vertical\n                title=\"Voice\"\n                subDescription={markdown(\n                  `This voice will be used for spoken responses. See full list of supported languages and voices at [${\n                    PROVIDER_VOICE_DOCS_URLS[providerType]?.label ?? label\n                  }](${\n                    PROVIDER_VOICE_DOCS_URLS[providerType]?.url ??\n                    PROVIDER_DOCS_URLS[providerType]\n                  }).`\n                )}\n                nonInteractive\n              >\n                <InputComboBox\n                  value={defaultVoice}\n                  onValueChange={setDefaultVoice}\n                  options={voiceOptions}\n                  placeholder={\n                    isLoadingVoices\n                      ? \"Loading voices...\"\n                      : \"Select a voice or enter voice ID\"\n                  }\n                  disabled={isLoadingVoices}\n                  strict={false}\n                />\n              </Vertical>\n            )}\n          </Section>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button secondary onClick={onClose}>\n            Cancel\n          </Button>\n          <Disabled disabled={!canConnect || isProcessing}>\n            <Button\n              onClick={handleSubmit}\n              disabled={!canConnect || isProcessing}\n            >\n              {isProcessing ? \"Connecting...\" : isEditing ? \"Save\" : \"Connect\"}\n            </Button>\n          </Disabled>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/configuration/voice/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/VoiceConfigurationPage\";\n"
  },
  {
    "path": "web/src/app/admin/configuration/web-search/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/WebSearchPage\";\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\n\nimport { ValidSources } from \"@/lib/types\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { SvgChevronUp, SvgChevronDown, SvgEdit } from \"@opal/icons\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\n\nfunction convertObjectToString(obj: any): string | any {\n  if (typeof obj === \"object\" && obj !== null) {\n    if (!Array.isArray(obj)) {\n      return JSON.stringify(obj);\n    } else {\n      if (obj.length === 0) {\n        return null;\n      }\n      return obj.map((item) => convertObjectToString(item)).join(\", \");\n    }\n  }\n  if (typeof obj === \"boolean\") {\n    return obj.toString();\n  }\n  return obj;\n}\n\nexport function buildConfigEntries(\n  obj: any,\n  sourceType: ValidSources\n): { [key: string]: string } {\n  if (sourceType === ValidSources.File) {\n    return {};\n  } else if (sourceType === ValidSources.GoogleSites) {\n    return {\n      base_url: obj.base_url,\n    };\n  }\n  return obj;\n}\n\ninterface ConfigItemProps {\n  label: string;\n  value: any;\n  onEdit?: () => void;\n}\n\nfunction ConfigItem({ label, value, onEdit }: ConfigItemProps) {\n  const [isExpanded, setIsExpanded] = useState(false);\n  const isExpandable = Array.isArray(value) && value.length > 5;\n\n  const renderValue = () => {\n    if (Array.isArray(value)) {\n      const displayedItems = isExpanded ? value : value.slice(0, 5);\n      return (\n        <Section\n          flexDirection=\"row\"\n          gap={0.25}\n          justifyContent=\"end\"\n          alignItems=\"center\"\n          height=\"fit\"\n        >\n          <Text secondaryBody text03 className=\"break-words\">\n            {displayedItems\n              .map((item) => convertObjectToString(item))\n              .join(\", \")}\n          </Text>\n        </Section>\n      );\n    } else if (typeof value === \"object\" && value !== null) {\n      return (\n        <Section gap={0.25} alignItems=\"end\" height=\"fit\">\n          {Object.entries(value).map(([key, val]) => (\n            <Text key={key} secondaryBody text03 className=\"break-words\">\n              <Text mainContentEmphasis text03>\n                {key}:\n              </Text>{\" \"}\n              {convertObjectToString(val)}\n            </Text>\n          ))}\n        </Section>\n      );\n    } else if (typeof value === \"boolean\") {\n      return (\n        <Text secondaryBody text03 className=\"text-right\">\n          {value ? \"True\" : \"False\"}\n        </Text>\n      );\n    }\n    return (\n      <Truncated secondaryBody text03 className=\"text-right\">\n        {convertObjectToString(value) || \"-\"}\n      </Truncated>\n    );\n  };\n\n  return (\n    <Section\n      flexDirection=\"row\"\n      justifyContent=\"between\"\n      alignItems=\"center\"\n      gap={1}\n    >\n      <Section alignItems=\"start\">\n        <Text mainUiBody text04>\n          {label}\n        </Text>\n      </Section>\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"end\"\n        alignItems=\"center\"\n        gap={0.5}\n      >\n        {renderValue()}\n\n        {isExpandable && (\n          <Button\n            prominence=\"tertiary\"\n            size=\"md\"\n            icon={isExpanded ? SvgChevronUp : SvgChevronDown}\n            onClick={() => setIsExpanded(!isExpanded)}\n          >\n            {isExpanded ? \"Show less\" : `Show all (${value.length} items)`}\n          </Button>\n        )}\n        {onEdit && (\n          <Button\n            prominence=\"tertiary\"\n            icon={SvgEdit}\n            onClick={onEdit}\n            tooltip=\"Edit\"\n          />\n        )}\n      </Section>\n    </Section>\n  );\n}\n\nexport function AdvancedConfigDisplay({\n  pruneFreq,\n  refreshFreq,\n  indexingStart,\n  onRefreshEdit,\n  onPruningEdit,\n}: {\n  pruneFreq: number | null;\n  refreshFreq: number | null;\n  indexingStart: Date | null;\n  onRefreshEdit: () => void;\n  onPruningEdit: () => void;\n}) {\n  const formatRefreshFrequency = (seconds: number | null): string => {\n    if (seconds === null) return \"-\";\n    const totalMinutes = seconds / 60;\n\n    // If it's 60 minutes or more and evenly divisible by 60, show in hours\n    if (totalMinutes >= 60 && totalMinutes % 60 === 0) {\n      const hours = totalMinutes / 60;\n      return `${hours} hour${hours !== 1 ? \"s\" : \"\"}`;\n    }\n\n    // Otherwise show in minutes\n    const minutes = Math.round(totalMinutes);\n    return `${minutes} minute${minutes !== 1 ? \"s\" : \"\"}`;\n  };\n  const formatPruneFrequency = (seconds: number | null): string => {\n    if (seconds === null) return \"-\";\n    const totalHours = seconds / 3600;\n\n    // If less than 1 hour, show in minutes\n    if (totalHours < 1) {\n      const minutes = Math.round(seconds / 60);\n      return `${minutes} minute${minutes !== 1 ? \"s\" : \"\"}`;\n    }\n\n    const hours = Math.round(totalHours);\n\n    // If it's 24 hours or more and evenly divisible by 24, show in days\n    if (hours >= 24 && hours % 24 === 0) {\n      const days = hours / 24;\n      return `${days} day${days !== 1 ? \"s\" : \"\"}`;\n    }\n\n    // Otherwise show in hours\n    return `${hours} hour${hours !== 1 ? \"s\" : \"\"}`;\n  };\n\n  const formatDate = (date: Date | null): string => {\n    if (date === null) return \"-\";\n    return date.toLocaleString(\"en-US\", {\n      year: \"numeric\",\n      month: \"long\",\n      day: \"numeric\",\n      hour: \"2-digit\",\n      minute: \"2-digit\",\n      timeZoneName: \"short\",\n    });\n  };\n\n  const items = [\n    pruneFreq !== null && {\n      label: \"Pruning Frequency\",\n      value: formatPruneFrequency(pruneFreq),\n      onEdit: onPruningEdit,\n    },\n    refreshFreq && {\n      label: \"Refresh Frequency\",\n      value: formatRefreshFrequency(refreshFreq),\n      onEdit: onRefreshEdit,\n    },\n    indexingStart && {\n      label: \"Indexing Start\",\n      value: formatDate(indexingStart),\n    },\n  ].filter(Boolean) as ConfigItemProps[];\n\n  return (\n    <Section gap={0} height=\"fit\">\n      {items.map((item, index) => (\n        <div key={item.label} className=\"w-full\">\n          <div className=\"py-4\">\n            <ConfigItem\n              label={item.label}\n              value={item.value}\n              onEdit={item.onEdit}\n            />\n          </div>\n          {index < items.length - 1 && <Separator noPadding />}\n        </div>\n      ))}\n    </Section>\n  );\n}\n\nexport function ConfigDisplay({\n  configEntries,\n  onEdit,\n}: {\n  configEntries: { [key: string]: string };\n  onEdit?: (key: string) => void;\n}) {\n  const entries = Object.entries(configEntries);\n\n  return (\n    <Section gap={0} height=\"fit\">\n      {entries.map(([key, value], index) => (\n        <div key={key} className=\"w-full\">\n          <div className=\"py-4\">\n            <ConfigItem\n              label={key}\n              value={value}\n              onEdit={onEdit ? () => onEdit(key) : undefined}\n            />\n          </div>\n          {index < entries.length - 1 && <Separator noPadding />}\n        </div>\n      ))}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/DeletionErrorStatus.tsx",
    "content": "import { FiInfo } from \"react-icons/fi\";\n\nexport default function DeletionErrorStatus({\n  deletion_failure_message,\n}: {\n  deletion_failure_message: string;\n}) {\n  return (\n    <div className=\"mt-2 rounded-md border border-error-300 bg-error-50 p-4 text-error-600 max-w-3xl\">\n      <div className=\"flex items-center\">\n        <h3 className=\"text-base font-medium\">Deletion Error</h3>\n        <div className=\"ml-2 relative group\">\n          <FiInfo className=\"h-4 w-4 text-error-600 cursor-help\" />\n          <div className=\"absolute z-10 w-64 p-2 mt-2 text-sm bg-white rounded-md shadow-lg opacity-0 group-hover:opacity-100 transition-opacity duration-300 border border-background-200\">\n            This error occurred while attempting to delete the connector. You\n            may re-attempt a deletion by clicking the &quot;Delete&quot; button.\n          </div>\n        </div>\n      </div>\n      <div className=\"mt-2 text-sm\">\n        <p>{deletion_failure_message}</p>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/IndexAttemptErrorsModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { IndexAttemptError } from \"./types\";\nimport { localizeAndPrettify } from \"@/lib/time\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { useCallback, useEffect, useRef, useState, useMemo } from \"react\";\nimport { SvgAlertTriangle } from \"@opal/icons\";\nexport interface IndexAttemptErrorsModalProps {\n  errors: {\n    items: IndexAttemptError[];\n    total_items: number;\n  };\n  onClose: () => void;\n  onResolveAll: () => void;\n  isResolvingErrors?: boolean;\n}\n\nconst ROW_HEIGHT = 65; // 4rem + 1px for border\n\nexport default function IndexAttemptErrorsModal({\n  errors,\n  onClose,\n  onResolveAll,\n  isResolvingErrors = false,\n}: IndexAttemptErrorsModalProps) {\n  const observerRef = useRef<ResizeObserver | null>(null);\n  const [pageSize, setPageSize] = useState(10);\n  const [currentPage, setCurrentPage] = useState(1);\n\n  const tableContainerRef = useCallback((container: HTMLDivElement | null) => {\n    if (observerRef.current) {\n      observerRef.current.disconnect();\n      observerRef.current = null;\n    }\n\n    if (!container) return;\n\n    const observer = new ResizeObserver(() => {\n      const thead = container.querySelector(\"thead\");\n      const theadHeight = thead?.getBoundingClientRect().height ?? 0;\n      const availableHeight = container.clientHeight - theadHeight;\n      const newPageSize = Math.max(3, Math.floor(availableHeight / ROW_HEIGHT));\n      setPageSize(newPageSize);\n    });\n\n    observer.observe(container);\n    observerRef.current = observer;\n  }, []);\n\n  // When data changes, reset to page 1.\n  // When page size changes (resize), preserve the user's position by\n  // finding which new page contains the first item they were looking at.\n  const prevPageSizeRef = useRef(pageSize);\n  useEffect(() => {\n    if (pageSize !== prevPageSizeRef.current) {\n      setCurrentPage((prev) => {\n        const firstVisibleIndex = (prev - 1) * prevPageSizeRef.current;\n        const newPage = Math.floor(firstVisibleIndex / pageSize) + 1;\n        const totalPages = Math.ceil(errors.items.length / pageSize);\n        return Math.min(newPage, totalPages);\n      });\n      prevPageSizeRef.current = pageSize;\n    } else {\n      setCurrentPage(1);\n    }\n  }, [errors.items.length, pageSize]);\n\n  const paginationData = useMemo(() => {\n    const totalPages = Math.ceil(errors.items.length / pageSize);\n    const startIndex = (currentPage - 1) * pageSize;\n    const currentPageItems = errors.items.slice(\n      startIndex,\n      startIndex + pageSize\n    );\n    return { totalPages, currentPageItems };\n  }, [errors.items, pageSize, currentPage]);\n\n  const hasUnresolvedErrors = useMemo(\n    () => errors.items.some((error) => !error.is_resolved),\n    [errors.items]\n  );\n\n  const handlePageChange = (page: number) => {\n    // Ensure we don't go to an invalid page\n    if (page >= 1 && page <= paginationData.totalPages) {\n      setCurrentPage(page);\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"full\" height=\"full\">\n        <Modal.Header\n          icon={SvgAlertTriangle}\n          title=\"Indexing Errors\"\n          description={\n            isResolvingErrors\n              ? \"Currently attempting to resolve all errors by performing a full re-index. This may take some time to complete.\"\n              : undefined\n          }\n          onClose={onClose}\n          height=\"fit\"\n        />\n        <Modal.Body height=\"full\">\n          {!isResolvingErrors && (\n            <div className=\"flex flex-col gap-2 flex-shrink-0\">\n              <Text as=\"p\">\n                Below are the errors encountered during indexing. Each row\n                represents a failed document or entity.\n              </Text>\n              <Text as=\"p\">\n                Click the button below to kick off a full re-index to try and\n                resolve these errors. This full re-index may take much longer\n                than a normal update.\n              </Text>\n            </div>\n          )}\n\n          <div\n            ref={tableContainerRef}\n            className=\"flex-1 w-full overflow-hidden min-h-0\"\n          >\n            <Table>\n              <TableHeader>\n                <TableRow>\n                  <TableHead>Time</TableHead>\n                  <TableHead>Document ID</TableHead>\n                  <TableHead className=\"w-1/2\">Error Message</TableHead>\n                  <TableHead>Status</TableHead>\n                </TableRow>\n              </TableHeader>\n              <TableBody>\n                {paginationData.currentPageItems.length > 0 ? (\n                  paginationData.currentPageItems.map((error) => (\n                    <TableRow key={error.id} className=\"h-[4rem]\">\n                      <TableCell>\n                        {localizeAndPrettify(error.time_created)}\n                      </TableCell>\n                      <TableCell>\n                        {error.document_link ? (\n                          <a\n                            href={error.document_link}\n                            target=\"_blank\"\n                            rel=\"noopener noreferrer\"\n                            className=\"text-link hover:underline\"\n                          >\n                            {error.document_id || error.entity_id || \"Unknown\"}\n                          </a>\n                        ) : (\n                          error.document_id || error.entity_id || \"Unknown\"\n                        )}\n                      </TableCell>\n                      <TableCell>\n                        <div className=\"flex items-center h-[2rem] overflow-y-auto whitespace-normal\">\n                          {error.failure_message}\n                        </div>\n                      </TableCell>\n                      <TableCell>\n                        <span\n                          className={`px-2 py-1 rounded text-xs ${\n                            error.is_resolved\n                              ? \"bg-green-100 text-green-800\"\n                              : \"bg-red-100 text-red-800\"\n                          }`}\n                        >\n                          {error.is_resolved ? \"Resolved\" : \"Unresolved\"}\n                        </span>\n                      </TableCell>\n                    </TableRow>\n                  ))\n                ) : (\n                  <TableRow className=\"h-[4rem]\">\n                    <TableCell\n                      colSpan={4}\n                      className=\"text-center py-8 text-gray-500\"\n                    >\n                      No errors found on this page\n                    </TableCell>\n                  </TableRow>\n                )}\n              </TableBody>\n            </Table>\n          </div>\n\n          {paginationData.totalPages > 1 && (\n            <div className=\"flex w-full justify-center\">\n              <PageSelector\n                totalPages={paginationData.totalPages}\n                currentPage={currentPage}\n                onPageChange={handlePageChange}\n              />\n            </div>\n          )}\n        </Modal.Body>\n        <Modal.Footer>\n          {hasUnresolvedErrors && !isResolvingErrors && (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <Button onClick={onResolveAll} className=\"ml-4 whitespace-nowrap\">\n              Resolve All\n            </Button>\n          )}\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/IndexAttemptsTable.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n  TableHeader,\n} from \"@/components/ui/table\";\nimport { Text } from \"@opal/components\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { CCPairFullInfo } from \"./types\";\nimport { IndexAttemptSnapshot } from \"@/lib/types\";\nimport { IndexAttemptStatus } from \"@/components/Status\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { localizeAndPrettify } from \"@/lib/time\";\nimport { getDocsProcessedPerMinute } from \"@/lib/indexAttempt\";\nimport { InfoIcon } from \"@/components/icons/icons\";\nimport ExceptionTraceModal from \"@/sections/modals/PreviewModal/ExceptionTraceModal\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { SvgClock } from \"@opal/icons\";\nexport interface IndexingAttemptsTableProps {\n  ccPair: CCPairFullInfo;\n  indexAttempts: IndexAttemptSnapshot[];\n  currentPage: number;\n  totalPages: number;\n  onPageChange: (page: number) => void;\n}\n\nexport function IndexAttemptsTable({\n  indexAttempts,\n  currentPage,\n  totalPages,\n  onPageChange,\n}: IndexingAttemptsTableProps) {\n  const [indexAttemptTracePopupId, setIndexAttemptTracePopupId] = useState<\n    number | null\n  >(null);\n\n  if (!indexAttempts?.length) {\n    return (\n      <Callout\n        className=\"mt-4\"\n        title=\"No indexing attempts scheduled yet\"\n        type=\"notice\"\n      >\n        Index attempts are scheduled in the background, and may take some time\n        to appear. Try refreshing the page in ~30 seconds!\n      </Callout>\n    );\n  }\n\n  const indexAttemptToDisplayTraceFor = indexAttempts?.find(\n    (indexAttempt) => indexAttempt.id === indexAttemptTracePopupId\n  );\n\n  return (\n    <>\n      {indexAttemptToDisplayTraceFor?.full_exception_trace && (\n        <ExceptionTraceModal\n          onOutsideClick={() => setIndexAttemptTracePopupId(null)}\n          exceptionTrace={indexAttemptToDisplayTraceFor.full_exception_trace}\n        />\n      )}\n\n      <Table>\n        <TableHeader>\n          <TableRow>\n            <TableHead>Time Started</TableHead>\n            <TableHead>Status</TableHead>\n            <TableHead className=\"whitespace-nowrap\">New Docs</TableHead>\n            <TableHead>\n              <SimpleTooltip\n                tooltip=\"Total number of documents replaced in the index during this indexing attempt\"\n                side=\"top\"\n              >\n                <span className=\"flex items-center\">\n                  Total Docs\n                  <InfoIcon className=\"ml-1 w-4 h-4\" />\n                </span>\n              </SimpleTooltip>\n            </TableHead>\n            <TableHead>Error Message</TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {indexAttempts.map((indexAttempt) => {\n            const docsPerMinute =\n              getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);\n            const isReindexInProgress =\n              indexAttempt.status === \"in_progress\" ||\n              indexAttempt.status === \"not_started\";\n            const reindexTooltip = `This index attempt ${\n              isReindexInProgress ? \"is\" : \"was\"\n            } a full re-index. All documents from the source ${\n              isReindexInProgress ? \"are being\" : \"were\"\n            } synced into the system.`;\n            return (\n              <TableRow\n                key={indexAttempt.id}\n                className={\n                  indexAttempt.full_exception_trace\n                    ? \"hover:bg-accent-background cursor-pointer relative select-none\"\n                    : undefined\n                }\n              >\n                <TableCell>\n                  {indexAttempt.time_started\n                    ? localizeAndPrettify(indexAttempt.time_started)\n                    : \"-\"}\n                </TableCell>\n                <TableCell>\n                  <IndexAttemptStatus\n                    status={indexAttempt.status || \"not_started\"}\n                  />\n                  {docsPerMinute ? (\n                    <div className=\"text-xs mt-1\">\n                      {docsPerMinute} docs / min\n                    </div>\n                  ) : (\n                    indexAttempt.status === \"success\" && (\n                      <div className=\"text-xs mt-1\">\n                        No additional docs processed\n                      </div>\n                    )\n                  )}\n                </TableCell>\n                <TableCell>\n                  <div className=\"flex\">\n                    <div className=\"text-right\">\n                      <div>{indexAttempt.new_docs_indexed}</div>\n                      {indexAttempt.docs_removed_from_index > 0 && (\n                        <div className=\"text-xs w-52 text-wrap flex italic overflow-hidden whitespace-normal px-1\">\n                          (also removed {indexAttempt.docs_removed_from_index}{\" \"}\n                          docs that were detected as deleted in the source)\n                        </div>\n                      )}\n                    </div>\n                  </div>\n                </TableCell>\n                <TableCell>\n                  <div className=\"flex items-center\">\n                    {indexAttempt.total_docs_indexed}\n                    {indexAttempt.from_beginning && (\n                      <SimpleTooltip side=\"top\" tooltip={reindexTooltip}>\n                        <span className=\"cursor-help flex items-center\">\n                          <SvgClock className=\"ml-2 h-3.5 w-3.5 stroke-current\" />\n                        </span>\n                      </SimpleTooltip>\n                    )}\n                  </div>\n                </TableCell>\n                <TableCell>\n                  {indexAttempt.status === \"success\" && <Text as=\"p\">-</Text>}\n\n                  {indexAttempt.status === \"failed\" &&\n                    indexAttempt.error_msg && (\n                      <Text as=\"p\">{indexAttempt.error_msg}</Text>\n                    )}\n                </TableCell>\n                <td className=\"w-0 p-0\">\n                  {indexAttempt.full_exception_trace && (\n                    <button\n                      type=\"button\"\n                      aria-label=\"View full trace\"\n                      onClick={() =>\n                        setIndexAttemptTracePopupId(indexAttempt.id)\n                      }\n                      className=\"absolute w-full h-full left-0 top-0\"\n                    />\n                  )}\n                </td>\n              </TableRow>\n            );\n          })}\n        </TableBody>\n      </Table>\n      {totalPages > 1 && (\n        <div className=\"flex flex-1 justify-center pt-3\">\n          <PageSelector\n            totalPages={totalPages}\n            currentPage={currentPage}\n            onPageChange={onPageChange}\n          />\n        </div>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/InlineFileManagement.tsx",
    "content": "\"use client\";\n\nimport { useState, useRef } from \"react\";\nimport { Button } from \"@opal/components\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport {\n  updateConnectorFiles,\n  type ConnectorFileInfo,\n} from \"@/lib/fileConnector\";\nimport { toast } from \"@/hooks/useToast\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  SvgCheck,\n  SvgEdit,\n  SvgFolderPlus,\n  SvgPlusCircle,\n  SvgX,\n} from \"@opal/icons\";\nimport { formatBytes } from \"@/lib/utils\";\nimport { timestampToReadableDate } from \"@/lib/dateUtils\";\n\ninterface InlineFileManagementProps {\n  connectorId: number;\n  onRefresh: () => void;\n}\n\nexport default function InlineFileManagement({\n  connectorId,\n  onRefresh,\n}: InlineFileManagementProps) {\n  const [isEditing, setIsEditing] = useState(false);\n  const [selectedFilesToRemove, setSelectedFilesToRemove] = useState<\n    Set<string>\n  >(new Set());\n  const [filesToAdd, setFilesToAdd] = useState<File[]>([]);\n  const [isSaving, setIsSaving] = useState(false);\n  const [showSaveConfirm, setShowSaveConfirm] = useState(false);\n  const fileInputRef = useRef<HTMLInputElement>(null);\n\n  const {\n    data: filesResponse,\n    isLoading,\n    error,\n    mutate: refreshFiles,\n  } = useSWR<{ files: ConnectorFileInfo[] }>(\n    `/api/manage/admin/connector/${connectorId}/files`,\n    errorHandlingFetcher,\n    { refreshInterval: isEditing ? 0 : 5000 } // Disable auto-refresh while editing\n  );\n\n  const files = filesResponse?.files || [];\n\n  const handleFileSelect = (event: React.ChangeEvent<HTMLInputElement>) => {\n    const selectedFiles = event.target.files;\n    if (!selectedFiles || selectedFiles.length === 0) return;\n\n    setFilesToAdd((prev) => [...prev, ...Array.from(selectedFiles)]);\n    // Reset the input\n    if (fileInputRef.current) {\n      fileInputRef.current.value = \"\";\n    }\n  };\n\n  const handleRemoveNewFile = (index: number) => {\n    setFilesToAdd((prev) => prev.filter((_, i) => i !== index));\n  };\n\n  const toggleFileForRemoval = (fileId: string) => {\n    setSelectedFilesToRemove((prev) => {\n      const newSet = new Set(prev);\n      if (newSet.has(fileId)) {\n        newSet.delete(fileId);\n      } else {\n        newSet.add(fileId);\n      }\n      return newSet;\n    });\n  };\n\n  const handleSaveClick = () => {\n    // Validate that we won't remove all files\n    const remainingFiles = files.filter(\n      (file) => !selectedFilesToRemove.has(file.file_id)\n    ).length;\n\n    if (remainingFiles === 0 && filesToAdd.length === 0) {\n      toast.error(\n        \"Cannot remove all files from a connector. Delete the connector if this is desired.\"\n      );\n      return;\n    }\n\n    // Show confirmation modal\n    setShowSaveConfirm(true);\n  };\n\n  const handleConfirmSave = async () => {\n    setShowSaveConfirm(false);\n    setIsSaving(true);\n    try {\n      await updateConnectorFiles(\n        connectorId,\n        Array.from(selectedFilesToRemove),\n        filesToAdd\n      );\n\n      toast.success(\n        \"Files updated successfully! Document index is being updated in the background. \" +\n          \"New files are being indexed and removed files will be pruned from the search results.\"\n      );\n\n      // Reset editing state\n      setIsEditing(false);\n      setSelectedFilesToRemove(new Set());\n      setFilesToAdd([]);\n\n      // Refresh data\n      refreshFiles();\n      onRefresh();\n    } catch (error) {\n      toast.error(\n        error instanceof Error ? error.message : \"Failed to update files\"\n      );\n    } finally {\n      setIsSaving(false);\n    }\n  };\n\n  const handleCancel = () => {\n    setIsEditing(false);\n    setSelectedFilesToRemove(new Set());\n    setFilesToAdd([]);\n  };\n\n  if (isLoading) {\n    return (\n      <div className=\"flex justify-center py-12\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  }\n\n  if (error) {\n    return (\n      <Text as=\"p\" className=\"text-error\">\n        Error loading files: {error.message}\n      </Text>\n    );\n  }\n\n  const currentFiles = files.filter(\n    (file) => !selectedFilesToRemove.has(file.file_id)\n  );\n  const totalFiles = currentFiles.length + filesToAdd.length;\n\n  return (\n    <>\n      {/* Header with Edit/Save buttons */}\n      <div className=\"flex justify-between items-center mb-4\">\n        <Text as=\"p\" mainUiBody>\n          Files ({totalFiles} file{totalFiles !== 1 ? \"s\" : \"\"})\n        </Text>\n        <div className=\"flex gap-2\">\n          {!isEditing ? (\n            <Button\n              prominence=\"secondary\"\n              onClick={() => setIsEditing(true)}\n              icon={SvgEdit}\n            >\n              Edit\n            </Button>\n          ) : (\n            <>\n              <Button\n                disabled={isSaving}\n                prominence=\"secondary\"\n                onClick={handleCancel}\n                icon={SvgX}\n              >\n                Cancel\n              </Button>\n              <Button\n                disabled={\n                  isSaving ||\n                  (selectedFilesToRemove.size === 0 && filesToAdd.length === 0)\n                }\n                onClick={handleSaveClick}\n                icon={SvgCheck}\n              >\n                {isSaving ? \"Saving...\" : \"Save Changes\"}\n              </Button>\n            </>\n          )}\n        </div>\n      </div>\n\n      {/* File List */}\n      {files.length === 0 && filesToAdd.length === 0 ? (\n        <Text as=\"p\" mainUiMuted className=\"text-center py-8\">\n          No files in this connector\n        </Text>\n      ) : (\n        <div className=\"border rounded-lg overflow-hidden mb-4\">\n          {/* Scrollable container with max height */}\n          <div className=\"max-h-[400px] overflow-y-auto\">\n            <Table>\n              <TableHeader className=\"sticky top-0 bg-background z-10\">\n                <TableRow>\n                  {isEditing && <TableHead className=\"w-12\"></TableHead>}\n                  <TableHead>File Name</TableHead>\n                  <TableHead>Size</TableHead>\n                  <TableHead>Upload Date</TableHead>\n                  {isEditing && <TableHead className=\"w-12\"></TableHead>}\n                </TableRow>\n              </TableHeader>\n              <TableBody>\n                {/* Existing files */}\n                {files.map((file) => {\n                  const isMarkedForRemoval = selectedFilesToRemove.has(\n                    file.file_id\n                  );\n                  return (\n                    <TableRow\n                      key={file.file_id}\n                      className={\n                        isMarkedForRemoval\n                          ? \"bg-red-100 dark:bg-red-900/20\"\n                          : \"\"\n                      }\n                    >\n                      {isEditing && (\n                        <TableCell>\n                          <Checkbox\n                            checked={isMarkedForRemoval}\n                            onCheckedChange={() =>\n                              toggleFileForRemoval(file.file_id)\n                            }\n                          />\n                        </TableCell>\n                      )}\n                      <TableCell className=\"font-medium\">\n                        <span\n                          className={\n                            isMarkedForRemoval ? \"line-through opacity-60\" : \"\"\n                          }\n                        >\n                          {file.file_name}\n                        </span>\n                        {isMarkedForRemoval && (\n                          <span className=\"ml-2 text-xs font-semibold text-red-600 dark:text-red-400\">\n                            Removing\n                          </span>\n                        )}\n                      </TableCell>\n                      <TableCell\n                        className={\n                          isMarkedForRemoval ? \"line-through opacity-60\" : \"\"\n                        }\n                      >\n                        {formatBytes(file.file_size)}\n                      </TableCell>\n                      <TableCell\n                        className={\n                          isMarkedForRemoval ? \"line-through opacity-60\" : \"\"\n                        }\n                      >\n                        {file.upload_date\n                          ? timestampToReadableDate(file.upload_date)\n                          : \"-\"}\n                      </TableCell>\n                      {isEditing && <TableCell></TableCell>}\n                    </TableRow>\n                  );\n                })}\n\n                {/* New files to be added */}\n                {filesToAdd.map((file, index) => (\n                  <TableRow\n                    key={`new-${index}`}\n                    className=\"bg-green-50 dark:bg-green-900/10\"\n                  >\n                    {isEditing && (\n                      <TableCell>\n                        <Button\n                          icon={SvgX}\n                          variant=\"danger\"\n                          prominence=\"tertiary\"\n                          size=\"sm\"\n                          onClick={() => handleRemoveNewFile(index)}\n                          tooltip=\"Remove file\"\n                          title=\"Remove file\"\n                        />\n                      </TableCell>\n                    )}\n                    <TableCell className=\"font-medium\">\n                      {file.name}\n                      <Text as=\"p\" figureSmallValue>\n                        New\n                      </Text>\n                    </TableCell>\n                    <TableCell>{formatBytes(file.size)}</TableCell>\n                    <TableCell>-</TableCell>\n                    {isEditing && <TableCell></TableCell>}\n                  </TableRow>\n                ))}\n              </TableBody>\n            </Table>\n          </div>\n        </div>\n      )}\n\n      {/* Add Files Button (only in edit mode) */}\n      {isEditing && (\n        <div className=\"mt-4\">\n          <input\n            ref={fileInputRef}\n            type=\"file\"\n            multiple\n            onChange={handleFileSelect}\n            className=\"hidden\"\n            id={`file-upload-${connectorId}`}\n          />\n          <Button\n            disabled={isSaving}\n            prominence=\"secondary\"\n            onClick={() => fileInputRef.current?.click()}\n            icon={SvgPlusCircle}\n          >\n            Add Files\n          </Button>\n        </div>\n      )}\n\n      {/* Confirmation Modal */}\n      <Modal open={showSaveConfirm} onOpenChange={setShowSaveConfirm}>\n        <Modal.Content width=\"sm\">\n          <Modal.Header\n            icon={SvgFolderPlus}\n            title=\"Confirm File Changes\"\n            description=\"When you save these changes, the following will happen:\"\n          />\n\n          <Modal.Body>\n            {selectedFilesToRemove.size > 0 && (\n              <div className=\"p-3 bg-red-50 dark:bg-red-900/10 rounded-md\">\n                <Text\n                  as=\"p\"\n                  mainUiBody\n                  className=\"font-semibold text-red-800 dark:text-red-200\"\n                >\n                  🗑️ {selectedFilesToRemove.size} file(s) will be removed\n                </Text>\n                <Text\n                  as=\"p\"\n                  secondaryBody\n                  className=\"text-red-700 dark:text-red-300 mt-1\"\n                >\n                  Documents from these files will be pruned from the Document\n                  Index\n                </Text>\n              </div>\n            )}\n\n            {filesToAdd.length > 0 && (\n              <div className=\"p-3 bg-green-50 dark:bg-green-900/10 rounded-md\">\n                <Text\n                  as=\"p\"\n                  mainUiBody\n                  className=\"font-semibold text-green-800 dark:text-green-200\"\n                >\n                  {filesToAdd.length} file(s) will be added\n                </Text>\n                <Text\n                  as=\"p\"\n                  secondaryBody\n                  className=\"text-green-700 dark:text-green-300 mt-1\"\n                >\n                  New files will be uploaded, chunked, embedded, and indexed in\n                  the Document Index\n                </Text>\n              </div>\n            )}\n          </Modal.Body>\n\n          <Modal.Footer>\n            <Button\n              disabled={isSaving}\n              prominence=\"secondary\"\n              onClick={() => setShowSaveConfirm(false)}\n            >\n              Cancel\n            </Button>\n            <Button disabled={isSaving} onClick={handleConfirmSave}>\n              {isSaving ? \"Saving...\" : \"Confirm & Save\"}\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/ReIndexModal.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components\";\nimport { useState } from \"react\";\nimport { toast } from \"@/hooks/useToast\";\nimport { triggerIndexing } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { SvgRefreshCw } from \"@opal/icons\";\n// Hook to handle re-indexing functionality\nexport function useReIndexModal(\n  connectorId: number | null,\n  credentialId: number | null,\n  ccPairId: number | null\n) {\n  const [reIndexPopupVisible, setReIndexPopupVisible] = useState(false);\n\n  const showReIndexModal = () => {\n    if (connectorId == null || credentialId == null || ccPairId == null) {\n      return;\n    }\n    setReIndexPopupVisible(true);\n  };\n\n  const hideReIndexModal = () => {\n    setReIndexPopupVisible(false);\n  };\n\n  const triggerReIndex = async (fromBeginning: boolean) => {\n    if (connectorId == null || credentialId == null || ccPairId == null) {\n      return;\n    }\n\n    try {\n      const result = await triggerIndexing(\n        fromBeginning,\n        connectorId,\n        credentialId,\n        ccPairId\n      );\n\n      // Show appropriate notification based on result\n      if (result.success) {\n        toast.success(\n          `${\n            fromBeginning ? \"Complete re-indexing\" : \"Indexing update\"\n          } started successfully`\n        );\n      } else {\n        toast.error(result.message || \"Failed to start indexing\");\n      }\n    } catch (error) {\n      console.error(\"Failed to trigger indexing:\", error);\n      toast.error(\n        \"An unexpected error occurred while trying to start indexing\"\n      );\n    }\n  };\n\n  const FinalReIndexModal =\n    reIndexPopupVisible &&\n    connectorId != null &&\n    credentialId != null &&\n    ccPairId != null ? (\n      <ReIndexModal hide={hideReIndexModal} onRunIndex={triggerReIndex} />\n    ) : null;\n\n  return {\n    showReIndexModal,\n    ReIndexModal: FinalReIndexModal,\n  };\n}\n\nexport interface ReIndexModalProps {\n  hide: () => void;\n  onRunIndex: (fromBeginning: boolean) => Promise<void>;\n}\n\nexport default function ReIndexModal({ hide, onRunIndex }: ReIndexModalProps) {\n  const [isProcessing, setIsProcessing] = useState(false);\n\n  const handleRunIndex = async (fromBeginning: boolean) => {\n    if (isProcessing) return;\n\n    setIsProcessing(true);\n    try {\n      // First show immediate feedback with a toast\n      toast.info(\n        `Starting ${\n          fromBeginning ? \"complete re-indexing\" : \"indexing update\"\n        }...`\n      );\n\n      // Then close the modal\n      hide();\n\n      // Then run the indexing operation\n      await onRunIndex(fromBeginning);\n    } catch (error) {\n      console.error(\"Error starting indexing:\", error);\n      // Show error in toast if needed\n      toast.error(\"Failed to start indexing process\");\n    } finally {\n      setIsProcessing(false);\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={hide}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header icon={SvgRefreshCw} title=\"Run Indexing\" onClose={hide} />\n        <Modal.Body>\n          <Text as=\"p\">\n            This will pull in and index all documents that have changed and/or\n            have been added since the last successful indexing run.\n          </Text>\n          <Button disabled={isProcessing} onClick={() => handleRunIndex(false)}>\n            Run Update\n          </Button>\n\n          <Separator />\n\n          <Text as=\"p\">\n            This will cause a complete re-indexing of all documents from the\n            source.\n          </Text>\n          <Text as=\"p\">\n            <strong>NOTE:</strong> depending on the number of documents stored\n            in the source, this may take a long time.\n          </Text>\n\n          <Button disabled={isProcessing} onClick={() => handleRunIndex(true)}>\n            Run Complete Re-Indexing\n          </Button>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/lib.ts",
    "content": "import { runConnector } from \"@/lib/connector\";\nimport { ValidSources } from \"@/lib/types\";\nimport { mutate } from \"swr\";\n\nexport function buildCCPairInfoUrl(ccPairId: string | number) {\n  return `/api/manage/admin/cc-pair/${ccPairId}`;\n}\n\nexport function buildSimilarCredentialInfoURL(\n  source_type: ValidSources,\n  get_editable: boolean = false\n) {\n  const base = `/api/manage/admin/similar-credentials/${source_type}`;\n  return get_editable ? `${base}?get_editable=True` : base;\n}\n\nexport async function triggerIndexing(\n  fromBeginning: boolean,\n  connectorId: number,\n  credentialId: number,\n  ccPairId: number\n): Promise<{ success: boolean; message: string }> {\n  const errorMsg = await runConnector(\n    connectorId,\n    [credentialId],\n    fromBeginning\n  );\n\n  mutate(buildCCPairInfoUrl(ccPairId));\n\n  if (errorMsg) {\n    return {\n      success: false,\n      message: errorMsg,\n    };\n  } else {\n    return {\n      success: true,\n      message: \"Triggered connector run\",\n    };\n  }\n}\n\nexport function getTooltipMessage(\n  isInvalid: boolean,\n  isDeleting: boolean,\n  isIndexing: boolean,\n  isDisabled: boolean\n): string | undefined {\n  if (isInvalid) {\n    return \"Connector is in an invalid state. Please update the credentials or configuration before re-indexing.\";\n  }\n  if (isDeleting) {\n    return \"Cannot index while connector is deleting\";\n  }\n  if (isIndexing) {\n    return \"Indexing is already in progress\";\n  }\n  if (isDisabled) {\n    return \"Connector must be re-enabled before indexing\";\n  }\n  return undefined;\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/page.tsx",
    "content": "\"use client\";\n\nimport BackButton from \"@/refresh-components/buttons/BackButton\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { CCPairStatus, PermissionSyncStatus } from \"@/components/Status\";\nimport { toast } from \"@/hooks/useToast\";\nimport CredentialSection from \"@/components/credentials/CredentialSection\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  updateConnectorCredentialPairName,\n  updateConnectorCredentialPairProperty,\n} from \"@/lib/connector\";\nimport { credentialTemplates } from \"@/lib/connectors/credentials\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport Title from \"@/components/ui/title\";\nimport { useRouter } from \"next/navigation\";\nimport { useCallback, useEffect, useRef, useState, use } from \"react\";\nimport useSWR, { mutate } from \"swr\";\nimport {\n  AdvancedConfigDisplay,\n  buildConfigEntries,\n  ConfigDisplay,\n} from \"./ConfigDisplay\";\nimport DeletionErrorStatus from \"./DeletionErrorStatus\";\nimport { IndexAttemptsTable } from \"./IndexAttemptsTable\";\nimport InlineFileManagement from \"./InlineFileManagement\";\nimport { buildCCPairInfoUrl, triggerIndexing } from \"./lib\";\nimport { Alert, AlertDescription, AlertTitle } from \"@/components/ui/alert\";\nimport {\n  CCPairFullInfo,\n  ConnectorCredentialPairStatus,\n  IndexAttemptError,\n  statusIsNotCurrentlyActive,\n} from \"./types\";\nimport { EditableStringFieldDisplay } from \"@/components/EditableStringFieldDisplay\";\nimport EditPropertyModal from \"@/components/modals/EditPropertyModal\";\nimport { AdvancedOptionsToggle } from \"@/components/AdvancedOptionsToggle\";\nimport { deleteCCPair } from \"@/lib/documentDeletion\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport * as Yup from \"yup\";\nimport {\n  AlertCircle,\n  PlayIcon,\n  PauseIcon,\n  Trash2Icon,\n  RefreshCwIcon,\n} from \"lucide-react\";\nimport IndexAttemptErrorsModal from \"./IndexAttemptErrorsModal\";\nimport usePaginatedFetch from \"@/hooks/usePaginatedFetch\";\nimport { IndexAttemptSnapshot } from \"@/lib/types\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { Card } from \"@/components/ui/card\";\nimport {\n  DropdownMenu,\n  DropdownMenuContent,\n  DropdownMenuTrigger,\n} from \"@/components/ui/dropdown-menu\";\nimport { DropdownMenuItemWithTooltip } from \"@/components/ui/dropdown-menu-with-tooltip\";\nimport { timeAgo } from \"@/lib/time\";\nimport { useStatusChange } from \"./useStatusChange\";\nimport { useReIndexModal } from \"./ReIndexModal\";\nimport { Button } from \"@opal/components\";\nimport { SvgSettings } from \"@opal/icons\";\nimport { UserRole } from \"@/lib/types\";\nimport { useUser } from \"@/providers/UserProvider\";\n// synchronize these validations with the SQLAlchemy connector class until we have a\n// centralized schema for both frontend and backend\nconst RefreshFrequencySchema = Yup.object().shape({\n  propertyValue: Yup.number()\n    .typeError(\"Property value must be a valid number\")\n    .integer(\"Property value must be an integer\")\n    .min(1, \"Property value must be greater than or equal to 1 minute\")\n    .required(\"Property value is required\"),\n});\n\nconst PruneFrequencySchema = Yup.object().shape({\n  propertyValue: Yup.number()\n    .typeError(\"Property value must be a valid number\")\n    .min(\n      0.083,\n      \"Property value must be greater than or equal to 0.083 hours (5 minutes)\"\n    )\n    .required(\"Property value is required\"),\n});\n\nconst ITEMS_PER_PAGE = 8;\nconst PAGES_PER_BATCH = 8;\n\nfunction Main({ ccPairId }: { ccPairId: number }) {\n  const router = useRouter();\n  const { user } = useUser();\n\n  const {\n    data: ccPair,\n    isLoading: isLoadingCCPair,\n    error: ccPairError,\n  } = useSWR<CCPairFullInfo>(\n    buildCCPairInfoUrl(ccPairId),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  const {\n    currentPageData: indexAttempts,\n    isLoading: isLoadingIndexAttempts,\n    currentPage,\n    totalPages,\n    goToPage,\n  } = usePaginatedFetch<IndexAttemptSnapshot>({\n    itemsPerPage: ITEMS_PER_PAGE,\n    pagesPerBatch: PAGES_PER_BATCH,\n    endpoint: `${buildCCPairInfoUrl(ccPairId)}/index-attempts`,\n  });\n\n  const { currentPageData: indexAttemptErrorsPage } =\n    usePaginatedFetch<IndexAttemptError>({\n      itemsPerPage: 10,\n      pagesPerBatch: 1,\n      endpoint: `/api/manage/admin/cc-pair/${ccPairId}/errors`,\n    });\n\n  // Initialize hooks at top level to avoid conditional hook calls\n  const { showReIndexModal, ReIndexModal } = useReIndexModal(\n    ccPair?.connector?.id ?? null,\n    ccPair?.credential?.id ?? null,\n    ccPairId\n  );\n\n  const {\n    handleStatusChange,\n    isUpdating: isStatusUpdating,\n    ConfirmModal,\n  } = useStatusChange(ccPair || null);\n\n  const indexAttemptErrors = indexAttemptErrorsPage\n    ? {\n        items: indexAttemptErrorsPage,\n        total_items: indexAttemptErrorsPage.length,\n      }\n    : null;\n\n  const [hasLoadedOnce, setHasLoadedOnce] = useState(false);\n  const [editingRefreshFrequency, setEditingRefreshFrequency] = useState(false);\n  const [editingPruningFrequency, setEditingPruningFrequency] = useState(false);\n  const [showIndexAttemptErrors, setShowIndexAttemptErrors] = useState(false);\n\n  const [showIsResolvingKickoffLoader, setShowIsResolvingKickoffLoader] =\n    useState(false);\n  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);\n  const [showDeleteConnectorConfirmModal, setShowDeleteConnectorConfirmModal] =\n    useState(false);\n  const isSchedulingConnectorDeletionRef = useRef(false);\n\n  const refresh = useCallback(() => {\n    mutate(buildCCPairInfoUrl(ccPairId));\n  }, [ccPairId]);\n\n  const finishConnectorDeletion = useCallback(() => {\n    router.push(\"/admin/indexing/status\");\n  }, [router]);\n\n  const scheduleConnectorDeletion = useCallback(() => {\n    if (!ccPair) return;\n    if (isSchedulingConnectorDeletionRef.current) return;\n    isSchedulingConnectorDeletionRef.current = true;\n\n    deleteCCPair(ccPair.connector.id, ccPair.credential.id).catch((error) => {\n      toast.error(\n        \"Failed to schedule deletion of connector - \" + error.message\n      );\n    });\n    finishConnectorDeletion();\n  }, [ccPair, finishConnectorDeletion]);\n\n  const latestIndexAttempt = indexAttempts?.[0];\n  const canManageInlineFileConnectorFiles =\n    ccPair?.connector.source === \"file\" &&\n    (ccPair.is_editable_for_current_user ||\n      (user?.role === UserRole.GLOBAL_CURATOR &&\n        ccPair.access_type === \"public\"));\n\n  const isResolvingErrors =\n    (latestIndexAttempt?.status === \"in_progress\" ||\n      latestIndexAttempt?.status === \"not_started\") &&\n    latestIndexAttempt?.from_beginning &&\n    // if there are errors in the latest index attempt, we don't want to show the loader\n    !indexAttemptErrors?.items?.some(\n      (error) => error.index_attempt_id === latestIndexAttempt?.id\n    );\n\n  const handleStatusUpdate = async (\n    newStatus: ConnectorCredentialPairStatus\n  ) => {\n    setShowIsResolvingKickoffLoader(true); // Show fullscreen spinner\n    await handleStatusChange(newStatus);\n    setShowIsResolvingKickoffLoader(false); // Hide fullscreen spinner\n  };\n\n  const triggerReIndex = async (fromBeginning: boolean) => {\n    if (!ccPair) return;\n\n    setShowIsResolvingKickoffLoader(true);\n\n    try {\n      const result = await triggerIndexing(\n        fromBeginning,\n        ccPair.connector.id,\n        ccPair.credential.id,\n        ccPair.id\n      );\n\n      if (result.success) {\n        toast.success(\n          `${\n            fromBeginning ? \"Complete re-indexing\" : \"Indexing update\"\n          } started successfully`\n        );\n      } else {\n        toast.error(result.message || \"Failed to start indexing\");\n      }\n    } catch (error) {\n      console.error(\"Failed to trigger indexing:\", error);\n      toast.error(\n        \"An unexpected error occurred while trying to start indexing\"\n      );\n    } finally {\n      setShowIsResolvingKickoffLoader(false);\n    }\n  };\n\n  useEffect(() => {\n    if (isLoadingCCPair) {\n      return;\n    }\n    if (ccPair && !ccPairError) {\n      setHasLoadedOnce(true);\n    }\n\n    if (\n      (hasLoadedOnce && (ccPairError || !ccPair)) ||\n      (ccPair?.status === ConnectorCredentialPairStatus.DELETING &&\n        !ccPair.connector)\n    ) {\n      finishConnectorDeletion();\n    }\n  }, [\n    isLoadingCCPair,\n    ccPair,\n    ccPairError,\n    hasLoadedOnce,\n    finishConnectorDeletion,\n  ]);\n\n  const handleUpdateName = async (newName: string) => {\n    try {\n      const response = await updateConnectorCredentialPairName(\n        ccPair?.id!,\n        newName\n      );\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      mutate(buildCCPairInfoUrl(ccPairId));\n      toast.success(\"Connector name updated successfully\");\n    } catch (error) {\n      toast.error(\"Failed to update connector name\");\n    }\n  };\n\n  const handleRefreshEdit = async () => {\n    setEditingRefreshFrequency(true);\n  };\n\n  const handlePruningEdit = async () => {\n    setEditingPruningFrequency(true);\n  };\n\n  const handleRefreshSubmit = async (\n    propertyName: string,\n    propertyValue: string\n  ) => {\n    const parsedRefreshFreqMinutes = parseInt(propertyValue, 10);\n\n    if (isNaN(parsedRefreshFreqMinutes)) {\n      toast.error(\"Invalid refresh frequency: must be an integer\");\n      return;\n    }\n\n    // Convert minutes to seconds\n    const parsedRefreshFreqSeconds = parsedRefreshFreqMinutes * 60;\n\n    try {\n      const response = await updateConnectorCredentialPairProperty(\n        ccPairId,\n        propertyName,\n        String(parsedRefreshFreqSeconds)\n      );\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      mutate(buildCCPairInfoUrl(ccPairId));\n      toast.success(\"Connector refresh frequency updated successfully\");\n    } catch (error) {\n      toast.error(\"Failed to update connector refresh frequency\");\n    }\n  };\n\n  const handlePruningSubmit = async (\n    propertyName: string,\n    propertyValue: string\n  ) => {\n    const parsedFreqHours = parseFloat(propertyValue);\n\n    if (isNaN(parsedFreqHours)) {\n      toast.error(\"Invalid pruning frequency: must be a valid number\");\n      return;\n    }\n\n    // Convert hours to seconds\n    const parsedFreqSeconds = parsedFreqHours * 3600;\n\n    try {\n      const response = await updateConnectorCredentialPairProperty(\n        ccPairId,\n        propertyName,\n        String(parsedFreqSeconds)\n      );\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      mutate(buildCCPairInfoUrl(ccPairId));\n      toast.success(\"Connector pruning frequency updated successfully\");\n    } catch (error) {\n      toast.error(\"Failed to update connector pruning frequency\");\n    }\n  };\n\n  if (isLoadingCCPair || isLoadingIndexAttempts) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (!ccPair || (!hasLoadedOnce && ccPairError)) {\n    return (\n      <ErrorCallout\n        errorTitle={`Failed to fetch info on Connector with ID ${ccPairId}`}\n        errorMsg={\n          ccPairError?.info?.detail ||\n          ccPairError?.toString() ||\n          \"Unknown error\"\n        }\n      />\n    );\n  }\n\n  const isDeleting = ccPair.status === ConnectorCredentialPairStatus.DELETING;\n\n  const {\n    prune_freq: pruneFreq,\n    refresh_freq: refreshFreq,\n    indexing_start: indexingStart,\n  } = ccPair.connector;\n\n  return (\n    <>\n      {showIsResolvingKickoffLoader && !isResolvingErrors && <Spinner />}\n      {ReIndexModal}\n      {ConfirmModal}\n\n      {showDeleteConnectorConfirmModal && (\n        <ConfirmEntityModal\n          danger\n          entityType=\"connector\"\n          entityName={ccPair.name}\n          additionalDetails=\"Deleting this connector schedules a deletion job that removes its indexed documents and deletes it for every user.\"\n          onClose={() => {\n            setShowDeleteConnectorConfirmModal(false);\n          }}\n          onSubmit={scheduleConnectorDeletion}\n        />\n      )}\n\n      {editingRefreshFrequency && (\n        <EditPropertyModal\n          propertyTitle=\"Refresh Frequency\"\n          propertyDetails=\"How often the connector should refresh (in minutes)\"\n          propertyName=\"refresh_frequency\"\n          propertyValue={String(Math.round((refreshFreq || 0) / 60))}\n          validationSchema={RefreshFrequencySchema}\n          onSubmit={handleRefreshSubmit}\n          onClose={() => setEditingRefreshFrequency(false)}\n        />\n      )}\n\n      {editingPruningFrequency && (\n        <EditPropertyModal\n          propertyTitle=\"Pruning Frequency\"\n          propertyDetails=\"How often the connector should be pruned (in hours)\"\n          propertyName=\"pruning_frequency\"\n          propertyValue={String(\n            ((pruneFreq || 0) / 3600).toFixed(3).replace(/\\.?0+$/, \"\")\n          )}\n          validationSchema={PruneFrequencySchema}\n          onSubmit={handlePruningSubmit}\n          onClose={() => setEditingPruningFrequency(false)}\n        />\n      )}\n\n      {showIndexAttemptErrors && indexAttemptErrors && (\n        <IndexAttemptErrorsModal\n          errors={indexAttemptErrors}\n          onClose={() => setShowIndexAttemptErrors(false)}\n          onResolveAll={async () => {\n            setShowIndexAttemptErrors(false);\n            setShowIsResolvingKickoffLoader(true);\n            await triggerReIndex(true);\n          }}\n          isResolvingErrors={isResolvingErrors}\n        />\n      )}\n\n      <BackButton />\n      <div\n        className=\"flex\n        items-center\n        justify-between\n        h-16\n        pb-2\n        border-b\n        border-neutral-200\n        dark:border-neutral-600\"\n      >\n        <div className=\"my-auto\">\n          <SourceIcon iconSize={32} sourceType={ccPair.connector.source} />\n        </div>\n\n        <div className=\"ml-2 overflow-hidden text-ellipsis whitespace-nowrap flex-1 mr-4\">\n          <EditableStringFieldDisplay\n            value={ccPair.name}\n            isEditable={ccPair.is_editable_for_current_user}\n            onUpdate={handleUpdateName}\n            scale={2.1}\n          />\n        </div>\n\n        <div className=\"ml-auto flex gap-x-2\">\n          {ccPair.is_editable_for_current_user && (\n            <DropdownMenu>\n              <DropdownMenuTrigger asChild>\n                <Button prominence=\"secondary\" icon={SvgSettings}>\n                  Manage\n                </Button>\n              </DropdownMenuTrigger>\n              <DropdownMenuContent align=\"end\">\n                <DropdownMenuItemWithTooltip\n                  onClick={() => {\n                    if (\n                      !ccPair.indexing &&\n                      ccPair.status !== ConnectorCredentialPairStatus.PAUSED &&\n                      ccPair.status !== ConnectorCredentialPairStatus.INVALID\n                    ) {\n                      showReIndexModal();\n                    }\n                  }}\n                  disabled={\n                    ccPair.indexing ||\n                    ccPair.status === ConnectorCredentialPairStatus.PAUSED ||\n                    ccPair.status === ConnectorCredentialPairStatus.INVALID\n                  }\n                  className=\"flex items-center gap-x-2 cursor-pointer px-3 py-2\"\n                  tooltip={\n                    ccPair.indexing\n                      ? \"Cannot re-index while indexing is already in progress\"\n                      : ccPair.status === ConnectorCredentialPairStatus.PAUSED\n                        ? \"Resume the connector before re-indexing\"\n                        : ccPair.status ===\n                            ConnectorCredentialPairStatus.INVALID\n                          ? \"Fix the connector configuration before re-indexing\"\n                          : undefined\n                  }\n                >\n                  <RefreshCwIcon className=\"h-4 w-4\" />\n                  <span>Re-Index</span>\n                </DropdownMenuItemWithTooltip>\n                {!isDeleting && (\n                  <DropdownMenuItemWithTooltip\n                    onClick={() =>\n                      handleStatusUpdate(\n                        statusIsNotCurrentlyActive(ccPair.status)\n                          ? ConnectorCredentialPairStatus.ACTIVE\n                          : ConnectorCredentialPairStatus.PAUSED\n                      )\n                    }\n                    disabled={isStatusUpdating}\n                    className=\"flex items-center gap-x-2 cursor-pointer px-3 py-2\"\n                    tooltip={\n                      isStatusUpdating ? \"Status update in progress\" : undefined\n                    }\n                  >\n                    {statusIsNotCurrentlyActive(ccPair.status) ? (\n                      <PlayIcon className=\"h-4 w-4\" />\n                    ) : (\n                      <PauseIcon className=\"h-4 w-4\" />\n                    )}\n                    <span>\n                      {statusIsNotCurrentlyActive(ccPair.status)\n                        ? \"Resume\"\n                        : \"Pause\"}\n                    </span>\n                  </DropdownMenuItemWithTooltip>\n                )}\n                {!isDeleting && (\n                  <DropdownMenuItemWithTooltip\n                    onClick={() => {\n                      setShowDeleteConnectorConfirmModal(true);\n                    }}\n                    disabled={!statusIsNotCurrentlyActive(ccPair.status)}\n                    className=\"flex items-center gap-x-2 cursor-pointer px-3 py-2 text-red-600 hover:text-red-700 dark:text-red-400 dark:hover:text-red-300\"\n                    tooltip={\n                      !statusIsNotCurrentlyActive(ccPair.status)\n                        ? \"Pause the connector before deleting\"\n                        : undefined\n                    }\n                  >\n                    <Trash2Icon className=\"h-4 w-4\" />\n                    <span>Delete</span>\n                  </DropdownMenuItemWithTooltip>\n                )}\n              </DropdownMenuContent>\n            </DropdownMenu>\n          )}\n        </div>\n      </div>\n\n      {ccPair.deletion_failure_message &&\n        ccPair.status === ConnectorCredentialPairStatus.DELETING && (\n          <>\n            <div className=\"mt-6\" />\n            <DeletionErrorStatus\n              deletion_failure_message={ccPair.deletion_failure_message}\n            />\n          </>\n        )}\n\n      {ccPair.status === ConnectorCredentialPairStatus.INVALID && (\n        <div className=\"mt-6\">\n          <Callout type=\"warning\" title=\"Invalid Connector State\">\n            This connector is in an invalid state. Please update your\n            credentials or create a new connector before re-indexing.\n          </Callout>\n        </div>\n      )}\n\n      {indexAttemptErrors && indexAttemptErrors.total_items > 0 && (\n        <Alert className=\"border-alert bg-yellow-50 dark:bg-yellow-800 my-2 mt-6\">\n          <AlertCircle className=\"h-4 w-4 text-yellow-700 dark:text-yellow-500\" />\n          <AlertTitle className=\"text-yellow-950 dark:text-yellow-200 font-semibold\">\n            Some documents failed to index\n          </AlertTitle>\n          <AlertDescription className=\"text-yellow-900 dark:text-yellow-300\">\n            {isResolvingErrors ? (\n              <span>\n                <span className=\"text-sm text-yellow-700 dark:text-yellow-400 da animate-pulse\">\n                  Resolving failures\n                </span>\n              </span>\n            ) : (\n              <>\n                We ran into some issues while processing some documents.{\" \"}\n                <b\n                  className=\"text-link cursor-pointer dark:text-blue-300\"\n                  onClick={() => setShowIndexAttemptErrors(true)}\n                >\n                  View details.\n                </b>\n              </>\n            )}\n          </AlertDescription>\n        </Alert>\n      )}\n\n      <Title className=\"mb-2 mt-6\" size=\"md\">\n        Indexing\n      </Title>\n\n      <Card className=\"px-8 py-12\">\n        <div className=\"flex\">\n          <div className=\"w-[200px]\">\n            <div className=\"text-sm font-medium mb-1\">Status</div>\n            <CCPairStatus\n              ccPairStatus={ccPair.status}\n              inRepeatedErrorState={ccPair.in_repeated_error_state}\n              lastIndexAttemptStatus={latestIndexAttempt?.status}\n            />\n          </div>\n\n          <div className=\"w-[200px]\">\n            <div className=\"text-sm font-medium mb-1\">Documents Indexed</div>\n            <div className=\"text-sm text-text-default flex items-center gap-x-1\">\n              {ccPair.num_docs_indexed.toLocaleString()}\n              {ccPair.status ===\n                ConnectorCredentialPairStatus.INITIAL_INDEXING &&\n                ccPair.overall_indexing_speed !== null &&\n                ccPair.num_docs_indexed > 0 && (\n                  <div className=\"ml-0.5 text-xs font-medium\">\n                    ({ccPair.overall_indexing_speed.toFixed(1)} docs / min)\n                  </div>\n                )}\n            </div>\n          </div>\n\n          <div className=\"w-[200px]\">\n            <div className=\"text-sm font-medium mb-1\">Last Indexed</div>\n            <div className=\"text-sm text-text-default\">\n              {timeAgo(ccPair?.last_indexed) ?? \"-\"}\n            </div>\n          </div>\n\n          {ccPair.access_type === \"sync\" && (\n            <>\n              <div className=\"w-[200px]\">\n                {/* TODO: Remove className and switch to text03 once Text is fully integrated across this page */}\n                <Text as=\"p\" className=\"text-sm font-medium mb-1\">\n                  Permission Syncing\n                </Text>\n                {ccPair.permission_syncing ||\n                ccPair.last_permission_sync_attempt_status ? (\n                  <PermissionSyncStatus\n                    status={ccPair.last_permission_sync_attempt_status}\n                    errorMsg={ccPair.last_permission_sync_attempt_error_message}\n                  />\n                ) : (\n                  <PermissionSyncStatus status={null} />\n                )}\n              </div>\n\n              <div className=\"w-[200px]\">\n                {/* TODO: Remove className and switch to text03 once Text is fully integrated across this page */}\n                <Text as=\"p\" className=\"text-sm font-medium mb-1\">\n                  Last Synced\n                </Text>\n                <Text as=\"p\" className=\"text-sm text-text-default\">\n                  {ccPair.last_permission_sync_attempt_finished\n                    ? timeAgo(ccPair.last_permission_sync_attempt_finished)\n                    : timeAgo(ccPair.last_full_permission_sync) ?? \"-\"}\n                </Text>\n              </div>\n            </>\n          )}\n        </div>\n      </Card>\n\n      {credentialTemplates[ccPair.connector.source] &&\n        ccPair.is_editable_for_current_user && (\n          <>\n            <Title size=\"md\" className=\"mt-10 mb-2\">\n              Credential\n            </Title>\n\n            <div className=\"mt-2\">\n              <CredentialSection\n                ccPair={ccPair}\n                sourceType={ccPair.connector.source}\n                refresh={() => refresh()}\n              />\n            </div>\n          </>\n        )}\n\n      {ccPair.connector.connector_specific_config &&\n        Object.keys(ccPair.connector.connector_specific_config).length > 0 && (\n          <>\n            <Title size=\"md\" className=\"mt-10 mb-2\">\n              Connector Configuration\n            </Title>\n\n            <Card className=\"px-8 py-4\">\n              <ConfigDisplay\n                configEntries={buildConfigEntries(\n                  ccPair.connector.connector_specific_config,\n                  ccPair.connector.source\n                )}\n              />\n\n              {/* Inline file management for file connectors */}\n              {canManageInlineFileConnectorFiles && (\n                <div className=\"mt-6\">\n                  <InlineFileManagement\n                    connectorId={ccPair.connector.id}\n                    onRefresh={refresh}\n                  />\n                </div>\n              )}\n            </Card>\n          </>\n        )}\n\n      <div className=\"mt-6\">\n        <div className=\"flex\">\n          <AdvancedOptionsToggle\n            showAdvancedOptions={showAdvancedOptions}\n            setShowAdvancedOptions={setShowAdvancedOptions}\n            title=\"Advanced\"\n          />\n        </div>\n        {showAdvancedOptions && (\n          <div className=\"pb-16\">\n            {(pruneFreq || indexingStart || refreshFreq) && (\n              <>\n                <Title size=\"md\" className=\"mt-3 mb-2\">\n                  Advanced Configuration\n                </Title>\n                <Card className=\"px-8 py-4\">\n                  <div>\n                    <AdvancedConfigDisplay\n                      pruneFreq={pruneFreq}\n                      indexingStart={indexingStart}\n                      refreshFreq={refreshFreq}\n                      onRefreshEdit={handleRefreshEdit}\n                      onPruningEdit={handlePruningEdit}\n                    />\n                  </div>\n                </Card>\n              </>\n            )}\n\n            <Title size=\"md\" className=\"mt-6 mb-2\">\n              Indexing Attempts\n            </Title>\n            {indexAttempts && (\n              <IndexAttemptsTable\n                ccPair={ccPair}\n                indexAttempts={indexAttempts}\n                currentPage={currentPage}\n                totalPages={totalPages}\n                onPageChange={goToPage}\n              />\n            )}\n          </div>\n        )}\n      </div>\n    </>\n  );\n}\n\nexport default function Page(props: { params: Promise<{ ccPairId: string }> }) {\n  const params = use(props.params);\n  const ccPairId = parseInt(params.ccPairId);\n\n  return (\n    <div className=\"mx-auto w-[800px]\">\n      <Main ccPairId={ccPairId} />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/types.ts",
    "content": "import { Connector } from \"@/lib/connectors/connectors\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport {\n  DeletionAttemptSnapshot,\n  IndexAttemptSnapshot,\n  ValidStatuses,\n  AccessType,\n} from \"@/lib/types\";\nimport { UUID } from \"crypto\";\n\nexport enum ConnectorCredentialPairStatus {\n  SCHEDULED = \"SCHEDULED\",\n  INITIAL_INDEXING = \"INITIAL_INDEXING\",\n  ACTIVE = \"ACTIVE\",\n  PAUSED = \"PAUSED\",\n  DELETING = \"DELETING\",\n  INVALID = \"INVALID\",\n}\n\nexport enum PermissionSyncStatusEnum {\n  CANCELED = \"canceled\",\n  COMPLETED_WITH_ERRORS = \"completed_with_errors\",\n  FAILED = \"failed\",\n  IN_PROGRESS = \"in_progress\",\n  NOT_STARTED = \"not_started\",\n  SUCCESS = \"success\",\n}\n\n/**\n * Returns true if the status is not currently active (i.e. paused or invalid), but not deleting\n */\nexport function statusIsNotCurrentlyActive(\n  status: ConnectorCredentialPairStatus\n): boolean {\n  return (\n    status === ConnectorCredentialPairStatus.PAUSED ||\n    status === ConnectorCredentialPairStatus.INVALID\n  );\n}\n\nexport interface CCPairFullInfo {\n  id: number;\n  name: string;\n  status: ConnectorCredentialPairStatus;\n  in_repeated_error_state: boolean;\n  num_docs_indexed: number;\n  connector: Connector<any>;\n  credential: Credential<any>;\n  number_of_index_attempts: number;\n  last_index_attempt_status: ValidStatuses | null;\n  latest_deletion_attempt: DeletionAttemptSnapshot | null;\n  access_type: AccessType;\n  is_editable_for_current_user: boolean;\n  deletion_failure_message: string | null;\n  indexing: boolean;\n  creator: UUID | null;\n  creator_email: string | null;\n\n  last_indexed: string | null;\n  last_pruned: string | null;\n  last_full_permission_sync: string | null;\n  overall_indexing_speed: number | null;\n  latest_checkpoint_description: string | null;\n\n  // permission sync attempt status\n  last_permission_sync_attempt_status: PermissionSyncStatusEnum | null;\n  permission_syncing: boolean;\n  last_permission_sync_attempt_finished: string | null;\n  last_permission_sync_attempt_error_message: string | null;\n}\n\nexport interface PaginatedIndexAttempts {\n  index_attempts: IndexAttemptSnapshot[];\n  page: number;\n  total_pages: number;\n}\n\nexport interface IndexAttemptError {\n  id: number;\n  connector_credential_pair_id: number;\n\n  document_id: string | null;\n  document_link: string | null;\n\n  entity_id: string | null;\n  failed_time_range_start: string | null;\n  failed_time_range_end: string | null;\n\n  failure_message: string;\n  is_resolved: boolean;\n\n  time_created: string;\n\n  index_attempt_id: number;\n}\n\nexport interface PaginatedIndexAttemptErrors {\n  items: IndexAttemptError[];\n  total_items: number;\n}\n"
  },
  {
    "path": "web/src/app/admin/connector/[ccPairId]/useStatusChange.tsx",
    "content": "\"use client\";\n\nimport { CCPairFullInfo, ConnectorCredentialPairStatus } from \"./types\";\nimport { mutate } from \"swr\";\nimport { buildCCPairInfoUrl } from \"./lib\";\nimport { setCCPairStatus } from \"@/lib/ccPair\";\nimport { useState } from \"react\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\n\n// Export the status change functionality separately\nexport function useStatusChange(ccPair: CCPairFullInfo | null) {\n  const [isUpdating, setIsUpdating] = useState(false);\n  const [showConfirmModal, setShowConfirmModal] = useState(false);\n\n  const updateStatus = async (newStatus: ConnectorCredentialPairStatus) => {\n    if (!ccPair) return false;\n\n    setIsUpdating(true);\n\n    try {\n      // Call the backend to update the status\n      await setCCPairStatus(ccPair.id, newStatus);\n\n      // Use mutate to revalidate the status on the backend\n      await mutate(buildCCPairInfoUrl(ccPair.id));\n    } catch (error) {\n      console.error(\"Failed to update status\", error);\n    } finally {\n      // Reset local updating state and button text after mutation\n      setIsUpdating(false);\n    }\n\n    return true;\n  };\n\n  const handleStatusChange = async (\n    newStatus: ConnectorCredentialPairStatus\n  ) => {\n    if (isUpdating || !ccPair) return false; // Prevent double-clicks or multiple requests\n\n    if (\n      ccPair.status === ConnectorCredentialPairStatus.INVALID &&\n      newStatus === ConnectorCredentialPairStatus.ACTIVE\n    ) {\n      setShowConfirmModal(true);\n      return false;\n    } else {\n      return await updateStatus(newStatus);\n    }\n  };\n\n  const ConfirmModal =\n    showConfirmModal && ccPair ? (\n      <ConfirmEntityModal\n        entityType=\"Invalid Connector\"\n        entityName={ccPair.name}\n        onClose={() => setShowConfirmModal(false)}\n        onSubmit={() => {\n          setShowConfirmModal(false);\n          updateStatus(ConnectorCredentialPairStatus.ACTIVE);\n        }}\n        additionalDetails=\"This connector was previously marked as invalid. Please verify that your configuration is correct before re-enabling. Are you sure you want to proceed?\"\n        actionButtonText=\"Re-Enable\"\n      />\n    ) : null;\n\n  return {\n    handleStatusChange,\n    isUpdating,\n    ConfirmModal,\n  };\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR, { mutate } from \"swr\";\nimport { AdminPageTitle } from \"@/components/admin/Title\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useFormContext } from \"@/components/context/FormContext\";\nimport { getSourceDisplayName, getSourceMetadata } from \"@/lib/sources\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { useEffect, useRef, useState } from \"react\";\nimport { deleteCredential, linkCredential } from \"@/lib/credential\";\nimport { submitFiles } from \"@/app/admin/connectors/[connector]/pages/utils/files\";\nimport { submitGoogleSite } from \"@/app/admin/connectors/[connector]/pages/utils/google_site\";\nimport AdvancedFormPage from \"@/app/admin/connectors/[connector]/pages/Advanced\";\nimport DynamicConnectionForm from \"@/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm\";\nimport CreateCredential from \"@/components/credentials/actions/CreateCredential\";\nimport ModifyCredential from \"@/components/credentials/actions/ModifyCredential\";\nimport {\n  ConfigurableSources,\n  oauthSupportedSources,\n  ValidSources,\n} from \"@/lib/types\";\nimport { Credential, credentialTemplates } from \"@/lib/connectors/credentials\";\nimport {\n  ConnectionConfiguration,\n  connectorConfigs,\n  createConnectorInitialValues,\n  createConnectorValidationSchema,\n  defaultPruneFreqHours,\n  defaultRefreshFreqMinutes,\n  isLoadState,\n  Connector,\n  ConnectorBase,\n} from \"@/lib/connectors/connectors\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { GmailMain } from \"@/app/admin/connectors/[connector]/pages/gmail/GmailPage\";\nimport {\n  useGmailCredentials,\n  useGoogleDriveCredentials,\n} from \"@/app/admin/connectors/[connector]/pages/utils/hooks\";\nimport { Formik } from \"formik\";\nimport NavigationRow from \"@/app/admin/connectors/[connector]/NavigationRow\";\nimport { useRouter } from \"next/navigation\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { prepareOAuthAuthorizationRequest } from \"@/lib/oauth_utils\";\nimport {\n  EE_ENABLED,\n  NEXT_PUBLIC_CLOUD_ENABLED,\n  NEXT_PUBLIC_TEST_ENV,\n} from \"@/lib/constants\";\nimport {\n  getConnectorOauthRedirectUrl,\n  useOAuthDetails,\n} from \"@/lib/connectors/oauth\";\nimport { CreateStdOAuthCredential } from \"@/components/credentials/actions/CreateStdOAuthCredential\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { Button } from \"@opal/components\";\nimport { deleteConnector } from \"@/lib/connector\";\nimport ConnectorDocsLink from \"@/components/admin/connectors/ConnectorDocsLink\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgKey, SvgAlertCircle } from \"@opal/icons\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport Link from \"next/link\";\n\nexport interface AdvancedConfig {\n  refreshFreq: number;\n  pruneFreq: number;\n  indexingStart: string;\n}\n\nconst BASE_CONNECTOR_URL = \"/api/manage/admin/connector\";\nconst CONNECTOR_CREATION_TIMEOUT_MS = 10000; // ~10 seconds is reasonable for longer connector validation\n\nexport async function submitConnector<T>(\n  connector: ConnectorBase<T>,\n  connectorId?: number,\n  fakeCredential?: boolean\n): Promise<{ message: string; isSuccess: boolean; response?: Connector<T> }> {\n  const isUpdate = connectorId !== undefined;\n  if (!connector.connector_specific_config) {\n    connector.connector_specific_config = {} as T;\n  }\n\n  try {\n    if (fakeCredential) {\n      const response = await fetch(\n        \"/api/manage/admin/connector-with-mock-credential\",\n        {\n          method: isUpdate ? \"PATCH\" : \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({ ...connector }),\n        }\n      );\n      if (response.ok) {\n        const responseJson = await response.json();\n        return { message: \"Success!\", isSuccess: true, response: responseJson };\n      } else {\n        const errorData = await response.json();\n        return { message: `Error: ${errorData.detail}`, isSuccess: false };\n      }\n    } else {\n      const response = await fetch(\n        BASE_CONNECTOR_URL + (isUpdate ? `/${connectorId}` : \"\"),\n        {\n          method: isUpdate ? \"PATCH\" : \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify(connector),\n        }\n      );\n\n      if (response.ok) {\n        const responseJson = await response.json();\n        return { message: \"Success!\", isSuccess: true, response: responseJson };\n      } else {\n        const errorData = await response.json();\n        return { message: `Error: ${errorData.detail}`, isSuccess: false };\n      }\n    }\n  } catch (error) {\n    return { message: `Error: ${error}`, isSuccess: false };\n  }\n}\n\nexport default function AddConnector({\n  connector,\n}: {\n  connector: ConfigurableSources;\n}) {\n  const [currentPageUrl, setCurrentPageUrl] = useState<string | null>(null);\n  const [oauthUrl, setOauthUrl] = useState<string | null>(null);\n  const [isAuthorizing, setIsAuthorizing] = useState(false);\n  const [isAuthorizeVisible, setIsAuthorizeVisible] = useState(false);\n  useEffect(() => {\n    if (typeof window !== \"undefined\") {\n      setCurrentPageUrl(window.location.href);\n    }\n\n    if (EE_ENABLED && (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV)) {\n      const sourceMetadata = getSourceMetadata(connector);\n      if (sourceMetadata?.oauthSupported == true) {\n        setIsAuthorizeVisible(true);\n      }\n    }\n  }, []);\n\n  const router = useRouter();\n\n  // State for managing credentials and files\n  const [currentCredential, setCurrentCredential] =\n    useState<Credential<any> | null>(null);\n  const [createCredentialFormToggle, setCreateCredentialFormToggle] =\n    useState(false);\n\n  // Fetch credentials data\n  const { data: credentials } = useSWR<Credential<any>[]>(\n    buildSimilarCredentialInfoURL(connector),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 }\n  );\n\n  const { data: editableCredentials } = useSWR<Credential<any>[]>(\n    buildSimilarCredentialInfoURL(connector, true),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 }\n  );\n\n  const { data: oauthDetails, isLoading: oauthDetailsLoading } =\n    useOAuthDetails(connector);\n\n  // Get credential template and configuration\n  const credentialTemplate = credentialTemplates[connector];\n  const configuration: ConnectionConfiguration = connectorConfigs[connector];\n\n  // Form context and popup management\n  const { setFormStep, setAllowCreate, formStep } = useFormContext();\n  const [uploading, setUploading] = useState(false);\n  const [creatingConnector, setCreatingConnector] = useState(false);\n\n  // Connector creation timeout management\n  const timeoutErrorHappenedRef = useRef<boolean>(false);\n  const connectorIdRef = useRef<number | null>(null);\n\n  useEffect(() => {\n    return () => {\n      // Cleanup refs when component unmounts\n      timeoutErrorHappenedRef.current = false;\n      connectorIdRef.current = null;\n    };\n  }, []);\n\n  // Hooks for Google Drive and Gmail credentials\n  const { liveGDriveCredential } = useGoogleDriveCredentials(connector);\n  const { liveGmailCredential } = useGmailCredentials(connector);\n\n  // Check if credential is activated\n  const credentialActivated =\n    (connector === \"google_drive\" && liveGDriveCredential) ||\n    (connector === \"gmail\" && liveGmailCredential) ||\n    currentCredential;\n\n  // Check if there are no credentials\n  const noCredentials = credentialTemplate == null;\n\n  useEffect(() => {\n    if (noCredentials && 1 != formStep) {\n      setFormStep(Math.max(1, formStep));\n    }\n\n    if (!noCredentials && !credentialActivated && formStep != 0) {\n      setFormStep(Math.min(formStep, 0));\n    }\n  }, [noCredentials, formStep, setFormStep]);\n\n  const convertStringToDateTime = (indexingStart: string | null) => {\n    return indexingStart ? new Date(indexingStart) : null;\n  };\n\n  const displayName = getSourceDisplayName(connector) || connector;\n  const sourceMetadata = getSourceMetadata(connector);\n  const hasFederatedOption = sourceMetadata.federated === true;\n\n  if (!credentials || !editableCredentials) {\n    return <></>;\n  }\n\n  // Credential handler functions\n  const refresh = () => {\n    mutate(buildSimilarCredentialInfoURL(connector));\n  };\n\n  const onDeleteCredential = async (credential: Credential<any | null>) => {\n    const response = await deleteCredential(credential.id, true);\n    if (response.ok) {\n      toast.success(\"Credential deleted successfully!\");\n    } else {\n      const errorData = await response.json();\n      toast.error(errorData.detail || errorData.message);\n    }\n  };\n\n  const onSwap = async (selectedCredential: Credential<any>) => {\n    setCurrentCredential(selectedCredential);\n    setAllowCreate(true);\n    toast.success(\"Swapped credential successfully!\");\n    refresh();\n  };\n\n  const onSuccess = () => {\n    router.push(\"/admin/indexing/status?message=connector-created\");\n  };\n\n  const handleAuthorize = async () => {\n    // authorize button handler\n    // gets an auth url from the server and directs the user to it in a popup\n\n    if (!currentPageUrl) return;\n\n    setIsAuthorizing(true);\n    try {\n      const response = await prepareOAuthAuthorizationRequest(\n        connector,\n        currentPageUrl\n      );\n      if (response.url) {\n        setOauthUrl(response.url);\n        window.open(response.url, \"_blank\", \"noopener,noreferrer\");\n      } else {\n        toast.error(\"Failed to fetch OAuth URL\");\n      }\n    } catch (error: unknown) {\n      // Narrow the type of error\n      if (error instanceof Error) {\n        toast.error(`Error: ${error.message}`);\n      } else {\n        // Handle non-standard errors\n        toast.error(\"An unknown error occurred\");\n      }\n    } finally {\n      setIsAuthorizing(false);\n    }\n  };\n\n  return (\n    <Formik\n      initialValues={createConnectorInitialValues(connector)}\n      validationSchema={createConnectorValidationSchema(connector)}\n      onSubmit={async (values) => {\n        const {\n          name,\n          groups,\n          access_type,\n          pruneFreq,\n          indexingStart,\n          refreshFreq,\n          auto_sync_options,\n          ...connector_specific_config\n        } = values;\n\n        // Apply special transforms according to application logic\n        const transformedConnectorSpecificConfig = Object.entries(\n          connector_specific_config\n        ).reduce(\n          (acc, [key, value]) => {\n            // Filter out empty strings from arrays\n            if (Array.isArray(value)) {\n              value = (value as any[]).filter(\n                (item) => typeof item !== \"string\" || item.trim() !== \"\"\n              );\n            }\n            const matchingConfigValue = configuration.values.find(\n              (configValue) => configValue.name === key\n            );\n            if (\n              matchingConfigValue &&\n              \"transform\" in matchingConfigValue &&\n              matchingConfigValue.transform\n            ) {\n              acc[key] = matchingConfigValue.transform(value as string[]);\n            } else {\n              acc[key] = value;\n            }\n            return acc;\n          },\n          {} as Record<string, any>\n        );\n\n        // Apply advanced configuration-specific transforms.\n        const advancedConfiguration: any = {\n          pruneFreq: (pruneFreq ?? defaultPruneFreqHours) * 3600,\n          indexingStart: convertStringToDateTime(indexingStart),\n          refreshFreq: (refreshFreq ?? defaultRefreshFreqMinutes) * 60,\n        };\n\n        // File-specific handling\n        const selectedFiles = Array.isArray(values.file_locations)\n          ? values.file_locations\n          : values.file_locations\n            ? [values.file_locations]\n            : [];\n\n        // Google sites-specific handling\n        if (connector == \"google_sites\") {\n          const response = await submitGoogleSite(\n            selectedFiles,\n            values?.base_url,\n            advancedConfiguration.refreshFreq,\n            advancedConfiguration.pruneFreq,\n            advancedConfiguration.indexingStart,\n            values.access_type,\n            groups,\n            name\n          );\n          if (response) {\n            onSuccess();\n          }\n          return;\n        }\n        // File-specific handling\n        if (connector == \"file\") {\n          setUploading(true);\n          try {\n            const response = await submitFiles(\n              selectedFiles,\n              name,\n              access_type,\n              groups\n            );\n            if (response) {\n              onSuccess();\n            }\n          } catch (error) {\n            toast.error(\"Error uploading files\");\n          } finally {\n            setUploading(false);\n          }\n\n          return;\n        }\n\n        setCreatingConnector(true);\n        try {\n          const timeoutPromise = new Promise<{ isTimeout: true }>((resolve) =>\n            setTimeout(\n              () => resolve({ isTimeout: true }),\n              CONNECTOR_CREATION_TIMEOUT_MS\n            )\n          );\n\n          const connectorCreationPromise = (async () => {\n            const { message, isSuccess, response } = await submitConnector<any>(\n              {\n                connector_specific_config: transformedConnectorSpecificConfig,\n                input_type: isLoadState(connector) ? \"load_state\" : \"poll\", // single case\n                name: name,\n                source: connector,\n                access_type: access_type,\n                refresh_freq: advancedConfiguration.refreshFreq || null,\n                prune_freq: advancedConfiguration.pruneFreq || null,\n                indexing_start: advancedConfiguration.indexingStart || null,\n                groups: groups,\n              },\n              undefined,\n              credentialActivated ? false : true\n            );\n\n            // Store the connector id immediately for potential timeout\n            if (response?.id) {\n              connectorIdRef.current = response.id;\n            }\n\n            // If no credential\n            if (!credentialActivated) {\n              if (isSuccess) {\n                onSuccess();\n              } else {\n                toast.error(message);\n              }\n            }\n\n            // With credential\n            if (credentialActivated && isSuccess && response) {\n              const credential =\n                currentCredential ||\n                liveGDriveCredential ||\n                liveGmailCredential;\n              const linkCredentialResponse = await linkCredential(\n                response.id,\n                credential?.id!,\n                name,\n                access_type,\n                groups,\n                auto_sync_options\n              );\n              if (linkCredentialResponse.ok) {\n                onSuccess();\n              } else {\n                const errorData = await linkCredentialResponse.json();\n\n                if (!timeoutErrorHappenedRef.current) {\n                  // Only show error if timeout didn't happen\n                  toast.error(errorData.detail || errorData.message);\n                }\n              }\n            } else if (isSuccess) {\n              onSuccess();\n            } else {\n              toast.error(message);\n            }\n\n            timeoutErrorHappenedRef.current = false;\n            return;\n          })();\n\n          const result = (await Promise.race([\n            connectorCreationPromise,\n            timeoutPromise,\n          ])) as {\n            isTimeout?: true;\n          };\n\n          if (result.isTimeout) {\n            timeoutErrorHappenedRef.current = true;\n            toast.error(\n              `Operation timed out after ${\n                CONNECTOR_CREATION_TIMEOUT_MS / 1000\n              } seconds. Check your configuration for errors?`\n            );\n\n            if (connectorIdRef.current) {\n              await deleteConnector(connectorIdRef.current);\n              connectorIdRef.current = null;\n            }\n          }\n          return;\n        } finally {\n          setCreatingConnector(false);\n        }\n      }}\n    >\n      {(formikProps) => (\n        <div className=\"mx-auto w-full\">\n          {uploading && <Spinner />}\n\n          {creatingConnector && <Spinner />}\n\n          <AdminPageTitle\n            includeDivider={false}\n            icon={<SourceIcon iconSize={32} sourceType={connector} />}\n            title={\n              hasFederatedOption ? (\n                <span className=\"inline-flex items-center gap-1.5\">\n                  {displayName}\n                  <SimpleTooltip\n                    tooltip={\n                      <div className=\"flex flex-col gap-2\">\n                        <Text as=\"p\" textLight05>\n                          A federated search option is available for this\n                          connector. It will result in greater latency and\n                          reduced search quality.\n                        </Text>\n                        <Link\n                          href={`/admin/connectors/${connector}?mode=federated`}\n                          className=\"text-action-link-04 hover:underline text-sm\"\n                        >\n                          Use federated version instead →\n                        </Link>\n                      </div>\n                    }\n                    side=\"bottom\"\n                    delayDuration={0}\n                  >\n                    <SvgAlertCircle size={20} />\n                  </SimpleTooltip>\n                </span>\n              ) : (\n                displayName\n              )\n            }\n            farRightElement={undefined}\n          />\n\n          {formStep == 0 && (\n            <CardSection>\n              <Text as=\"p\" headingH3 className=\"pb-2\">\n                Select a credential\n              </Text>\n\n              {connector == ValidSources.Gmail ? (\n                <GmailMain />\n              ) : (\n                <>\n                  <ModifyCredential\n                    showIfEmpty\n                    accessType={formikProps.values.access_type}\n                    defaultedCredential={currentCredential!}\n                    credentials={credentials}\n                    editableCredentials={editableCredentials}\n                    onDeleteCredential={onDeleteCredential}\n                    onSwitch={onSwap}\n                  />\n                  {!createCredentialFormToggle && (\n                    <div className=\"mt-6 flex gap-4\">\n                      {/* Button to pop up a form to manually enter credentials */}\n                      <Button\n                        onClick={async () => {\n                          if (oauthDetails && oauthDetails.oauth_enabled) {\n                            if (oauthDetails.additional_kwargs.length > 0) {\n                              setCreateCredentialFormToggle(true);\n                            } else {\n                              const redirectUrl =\n                                await getConnectorOauthRedirectUrl(\n                                  connector,\n                                  {}\n                                );\n                              // if redirect is supported, just use it\n                              if (redirectUrl) {\n                                window.location.href = redirectUrl;\n                              } else {\n                                setCreateCredentialFormToggle(\n                                  (createConnectorToggle) =>\n                                    !createConnectorToggle\n                                );\n                              }\n                            }\n                          } else {\n                            setCreateCredentialFormToggle(\n                              (createConnectorToggle) => !createConnectorToggle\n                            );\n                          }\n                        }}\n                      >\n                        Create New\n                      </Button>\n                      {/* Button to sign in via OAuth */}\n                      {oauthSupportedSources.includes(connector) &&\n                        (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV) && (\n                          <Button\n                            disabled={isAuthorizing}\n                            variant=\"action\"\n                            onClick={handleAuthorize}\n                            hidden={!isAuthorizeVisible}\n                          >\n                            {isAuthorizing\n                              ? \"Authorizing...\"\n                              : `Authorize with ${getSourceDisplayName(\n                                  connector\n                                )}`}\n                          </Button>\n                        )}\n                    </div>\n                  )}\n\n                  {createCredentialFormToggle && (\n                    <Modal\n                      open\n                      onOpenChange={() => setCreateCredentialFormToggle(false)}\n                    >\n                      <Modal.Content>\n                        <Modal.Header\n                          icon={SvgKey}\n                          title={`Create a ${getSourceDisplayName(\n                            connector\n                          )} credential`}\n                          onClose={() => setCreateCredentialFormToggle(false)}\n                        />\n                        <Modal.Body>\n                          {oauthDetailsLoading ? (\n                            <Spinner />\n                          ) : (\n                            <>\n                              {oauthDetails && oauthDetails.oauth_enabled ? (\n                                <CreateStdOAuthCredential\n                                  sourceType={connector}\n                                  additionalFields={\n                                    oauthDetails.additional_kwargs\n                                  }\n                                />\n                              ) : (\n                                <CreateCredential\n                                  close\n                                  refresh={refresh}\n                                  sourceType={connector}\n                                  accessType={formikProps.values.access_type}\n                                  onSwitch={onSwap}\n                                  onClose={() =>\n                                    setCreateCredentialFormToggle(false)\n                                  }\n                                />\n                              )}\n                            </>\n                          )}\n                        </Modal.Body>\n                      </Modal.Content>\n                    </Modal>\n                  )}\n                </>\n              )}\n            </CardSection>\n          )}\n\n          {formStep == 1 && (\n            <CardSection className=\"w-full py-8 flex gap-y-6 flex-col max-w-3xl px-12 mx-auto\">\n              <DynamicConnectionForm\n                values={formikProps.values}\n                config={configuration}\n                connector={connector}\n                currentCredential={\n                  currentCredential ||\n                  liveGDriveCredential ||\n                  liveGmailCredential ||\n                  null\n                }\n              />\n              <ConnectorDocsLink sourceType={connector} />\n            </CardSection>\n          )}\n\n          {formStep === 2 && (\n            <CardSection>\n              <AdvancedFormPage />\n            </CardSection>\n          )}\n\n          <NavigationRow\n            activatedCredential={credentialActivated != null}\n            isValid={formikProps.isValid}\n            onSubmit={formikProps.handleSubmit}\n            noCredentials={noCredentials}\n            noAdvanced={connector == \"file\"}\n          />\n        </div>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/ConnectorWrapper.tsx",
    "content": "\"use client\";\n\nimport {\n  ConfigurableSources,\n  FederatedConnectorDetail,\n  federatedSourceToRegularSource,\n  ValidSources,\n} from \"@/lib/types\";\nimport AddConnector from \"./AddConnectorPage\";\nimport { FormProvider } from \"@/components/context/FormContext\";\nimport Sidebar from \"../../../../sections/sidebar/CreateConnectorSidebar\";\nimport { HeaderTitle } from \"@/components/header/HeaderTitle\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { isValidSource, getSourceMetadata } from \"@/lib/sources\";\nimport { FederatedConnectorForm } from \"@/components/admin/federated/FederatedConnectorForm\";\nimport { useSearchParams } from \"next/navigation\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { useFederatedConnectors } from \"@/lib/hooks\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useToastFromQuery } from \"@/hooks/useToast\";\n\nexport default function ConnectorWrapper({\n  connector,\n}: {\n  connector: ConfigurableSources;\n}) {\n  const searchParams = useSearchParams();\n  const mode = searchParams?.get(\"mode\"); // 'federated' or 'regular'\n\n  useToastFromQuery({\n    oauth_failed: {\n      message: \"OAuth authentication failed. Please try again.\",\n      type: \"error\",\n    },\n  });\n\n  // Check if the connector is valid\n  if (!isValidSource(connector)) {\n    return (\n      <FormProvider connector={connector}>\n        <div className=\"flex justify-center w-full h-full\">\n          <Sidebar />\n          <div className=\"mt-12 w-full max-w-3xl mx-auto\">\n            <div className=\"mx-auto flex flex-col gap-y-2\">\n              <HeaderTitle>\n                <p>&lsquo;{connector}&rsquo; is not a valid Connector Type!</p>\n              </HeaderTitle>\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <Button\n                onClick={() => window.open(\"/admin/indexing/status\", \"_self\")}\n                className=\"mr-auto\"\n              >\n                {\" \"}\n                Go home{\" \"}\n              </Button>\n            </div>\n          </div>\n        </div>\n      </FormProvider>\n    );\n  }\n\n  const sourceMetadata = getSourceMetadata(connector);\n  const supportsFederated = sourceMetadata.federated === true;\n\n  // Only show federated form if explicitly requested via URL parameter\n  const showFederatedForm = mode === \"federated\" && supportsFederated;\n\n  // For federated form, use the specialized form without FormProvider\n  if (showFederatedForm) {\n    return (\n      <div className=\"flex justify-center w-full h-full\">\n        <div className=\"mt-12 w-full max-w-4xl mx-auto\">\n          <FederatedConnectorForm connector={connector} />\n        </div>\n      </div>\n    );\n  }\n\n  // For regular connectors, use the existing flow\n  return (\n    <FormProvider connector={connector}>\n      <div className=\"flex justify-center w-full h-full\">\n        <Sidebar />\n        <div className=\"mt-12 w-full max-w-3xl mx-auto\">\n          <AddConnector connector={connector} />\n        </div>\n      </div>\n    </FormProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/NavigationRow.tsx",
    "content": "import { useFormContext } from \"@/components/context/FormContext\";\nimport { Button } from \"@opal/components\";\nimport { SvgArrowLeft, SvgArrowRight, SvgPlusCircle } from \"@opal/icons\";\n\nconst NavigationRow = ({\n  noAdvanced,\n  noCredentials,\n  activatedCredential,\n  onSubmit,\n  isValid,\n}: {\n  isValid: boolean;\n  onSubmit: () => void;\n  noAdvanced: boolean;\n  noCredentials: boolean;\n  activatedCredential: boolean;\n}) => {\n  const { formStep, prevFormStep, nextFormStep } = useFormContext();\n\n  return (\n    <div className=\"mt-4 w-full grid grid-cols-3\">\n      <div>\n        {((formStep > 0 && !noCredentials) ||\n          (formStep > 1 && !noAdvanced)) && (\n          <Button\n            prominence=\"secondary\"\n            onClick={prevFormStep}\n            icon={SvgArrowLeft}\n          >\n            Previous\n          </Button>\n        )}\n      </div>\n      <div className=\"flex justify-center\">\n        {(formStep > 0 || noCredentials) && (\n          <Button\n            disabled={!isValid}\n            rightIcon={SvgPlusCircle}\n            onClick={onSubmit}\n          >\n            Create Connector\n          </Button>\n        )}\n      </div>\n      <div className=\"flex justify-end\">\n        {formStep === 0 && (\n          <Button\n            disabled={!activatedCredential}\n            variant=\"action\"\n            rightIcon={SvgArrowRight}\n            onClick={() => nextFormStep()}\n          >\n            Continue\n          </Button>\n        )}\n        {!noAdvanced && formStep === 1 && (\n          <Button\n            disabled={!isValid}\n            prominence=\"secondary\"\n            rightIcon={SvgArrowRight}\n            onClick={() => nextFormStep()}\n          >\n            Advanced\n          </Button>\n        )}\n      </div>\n    </div>\n  );\n};\nexport default NavigationRow;\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/auth/callback/route.ts",
    "content": "import { getDomain } from \"@/lib/redirectSS\";\nimport { buildUrl } from \"@/lib/utilsSS\";\nimport { NextRequest, NextResponse } from \"next/server\";\nimport { cookies } from \"next/headers\";\nimport {\n  CRAFT_OAUTH_COOKIE_NAME,\n  CRAFT_CONFIGURE_PATH,\n} from \"@/app/craft/v1/constants\";\nimport { processCookies } from \"@/lib/userSS\";\n\nexport const GET = async (request: NextRequest) => {\n  const requestCookies = await cookies();\n  const connector = request.url.includes(\"gmail\") ? \"gmail\" : \"google-drive\";\n\n  const callbackEndpoint = `/manage/connector/${connector}/callback`;\n  const url = new URL(buildUrl(callbackEndpoint));\n  url.search = request.nextUrl.search;\n\n  const response = await fetch(url.toString(), {\n    headers: {\n      cookie: processCookies(requestCookies),\n    },\n  });\n\n  if (!response.ok) {\n    return NextResponse.redirect(\n      new URL(\n        `/admin/connectors/${connector}?message=oauth_failed`,\n        getDomain(request)\n      )\n    );\n  }\n\n  // Check for build mode OAuth flag (redirects to build admin panel)\n  const isBuildMode =\n    requestCookies.get(CRAFT_OAUTH_COOKIE_NAME)?.value === \"true\";\n  if (isBuildMode) {\n    const redirectResponse = NextResponse.redirect(\n      new URL(CRAFT_CONFIGURE_PATH, getDomain(request))\n    );\n    redirectResponse.cookies.delete(CRAFT_OAUTH_COOKIE_NAME);\n    return redirectResponse;\n  }\n\n  return NextResponse.redirect(\n    new URL(`/admin/connectors/${connector}`, getDomain(request))\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/oauth/callback/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { usePathname, useSearchParams } from \"next/navigation\";\nimport { AdminPageTitle } from \"@/components/admin/Title\";\nimport { getSourceMetadata, isValidSource } from \"@/lib/sources\";\nimport { ValidSources } from \"@/lib/types\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { handleOAuthAuthorizationResponse } from \"@/lib/oauth_utils\";\nimport { SvgKey } from \"@opal/icons\";\nexport default function OAuthCallbackPage() {\n  const searchParams = useSearchParams();\n\n  const [statusMessage, setStatusMessage] = useState(\"Processing...\");\n  const [statusDetails, setStatusDetails] = useState(\n    \"Please wait while we complete the setup.\"\n  );\n  const [redirectUrl, setRedirectUrl] = useState<string | null>(null);\n  const [isError, setIsError] = useState(false);\n  const [pageTitle, setPageTitle] = useState(\n    \"Authorize with Third-Party service\"\n  );\n\n  // Extract query parameters\n  const code = searchParams?.get(\"code\");\n  const state = searchParams?.get(\"state\");\n\n  const pathname = usePathname();\n  const connector = pathname?.split(\"/\")[3];\n\n  useEffect(() => {\n    const onFirstLoad = async () => {\n      // Examples\n      // connector (url segment)= \"google-drive\"\n      // sourceType (for looking up metadata) = \"google_drive\"\n\n      if (!code || !state) {\n        setStatusMessage(\"Improperly formed OAuth authorization request.\");\n        setStatusDetails(\n          !code ? \"Missing authorization code.\" : \"Missing state parameter.\"\n        );\n        setIsError(true);\n        return;\n      }\n\n      if (!connector) {\n        setStatusMessage(\n          `The specified connector source type ${connector} does not exist.`\n        );\n        setStatusDetails(`${connector} is not a valid source type.`);\n        setIsError(true);\n        return;\n      }\n\n      const sourceType = connector.replaceAll(\"-\", \"_\");\n      if (!isValidSource(sourceType)) {\n        setStatusMessage(\n          `The specified connector source type ${sourceType} does not exist.`\n        );\n        setStatusDetails(`${sourceType} is not a valid source type.`);\n        setIsError(true);\n        return;\n      }\n\n      const sourceMetadata = getSourceMetadata(sourceType as ValidSources);\n      setPageTitle(`Authorize with ${sourceMetadata.displayName}`);\n\n      setStatusMessage(\"Processing...\");\n      setStatusDetails(\"Please wait while we complete authorization.\");\n      setIsError(false); // Ensure no error state during loading\n\n      try {\n        const response = await handleOAuthAuthorizationResponse(\n          connector,\n          code,\n          state\n        );\n\n        if (!response) {\n          throw new Error(\"Empty response from OAuth server.\");\n        }\n\n        setStatusMessage(\"Success!\");\n\n        // set the continuation link\n        if (response.finalize_url) {\n          setRedirectUrl(response.finalize_url);\n          setStatusDetails(\n            `Your authorization with ${sourceMetadata.displayName} completed successfully. Additional steps are required to complete credential setup.`\n          );\n        } else {\n          setRedirectUrl(response.redirect_on_success);\n          setStatusDetails(\n            `Your authorization with ${sourceMetadata.displayName} completed successfully.`\n          );\n        }\n        setIsError(false);\n      } catch (error) {\n        console.error(\"OAuth error:\", error);\n        setStatusMessage(\"Oops, something went wrong!\");\n        setStatusDetails(\n          \"An error occurred during the OAuth process. Please try again.\"\n        );\n        setIsError(true);\n      }\n    };\n\n    onFirstLoad();\n  }, [code, state, connector]);\n\n  return (\n    <div className=\"mx-auto h-screen flex flex-col\">\n      <AdminPageTitle title={pageTitle} icon={SvgKey} />\n\n      <div className=\"flex-1 flex flex-col items-center justify-center\">\n        <CardSection className=\"max-w-md w-[500px] h-[250px] p-8\">\n          <h1 className=\"text-2xl font-bold mb-4\">{statusMessage}</h1>\n          <p className=\"text-text-500\">{statusDetails}</p>\n          {redirectUrl && !isError && (\n            <div className=\"mt-4\">\n              <p className=\"text-sm\">\n                Click{\" \"}\n                <a href={redirectUrl} className=\"text-blue-500 underline\">\n                  here\n                </a>{\" \"}\n                to continue.\n              </p>\n            </div>\n          )}\n        </CardSection>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/oauth/finalize/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { usePathname, useRouter, useSearchParams } from \"next/navigation\";\nimport { AdminPageTitle } from \"@/components/admin/Title\";\nimport { Button } from \"@opal/components\";\nimport { getSourceMetadata, isValidSource } from \"@/lib/sources\";\nimport { ConfluenceAccessibleResource, ValidSources } from \"@/lib/types\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport {\n  handleOAuthConfluenceFinalize,\n  handleOAuthPrepareFinalization,\n} from \"@/lib/oauth_utils\";\nimport { SelectorFormField } from \"@/components/Field\";\nimport { ErrorMessage, Field, Form, Formik, useFormikContext } from \"formik\";\nimport * as Yup from \"yup\";\nimport { SvgKey } from \"@opal/icons\";\n// Helper component to keep the effect logic clean:\nfunction UpdateCloudURLOnCloudIdChange({\n  accessibleResources,\n}: {\n  accessibleResources: ConfluenceAccessibleResource[];\n}) {\n  const { values, setValues, setFieldValue } = useFormikContext<{\n    cloud_id: string;\n    cloud_name: string;\n    cloud_url: string;\n  }>();\n\n  useEffect(() => {\n    // Whenever cloud_id changes, find the matching resource and update cloud_url\n    if (values.cloud_id) {\n      const selectedResource = accessibleResources.find(\n        (resource) => resource.id === values.cloud_id\n      );\n      if (selectedResource) {\n        // Update multiple fields together ... somehow setting them in sequence\n        // doesn't work with the validator\n        // it may also be possible to await each setFieldValue call.\n        // https://github.com/jaredpalmer/formik/issues/2266\n        setValues((prevValues) => ({\n          ...prevValues,\n          cloud_name: selectedResource.name,\n          cloud_url: selectedResource.url,\n        }));\n      }\n    }\n  }, [values.cloud_id, accessibleResources, setFieldValue]);\n\n  // This component doesn't render anything visible:\n  return null;\n}\n\nexport default function OAuthFinalizePage() {\n  const router = useRouter();\n  const searchParams = useSearchParams();\n\n  const [statusMessage, setStatusMessage] = useState(\"Processing...\");\n  const [statusDetails, setStatusDetails] = useState(\n    \"Please wait while we complete the setup.\"\n  );\n  const [redirectUrl, setRedirectUrl] = useState<string | null>(null);\n  const [isError, setIsError] = useState(false);\n  const [isSubmitted, setIsSubmitted] = useState(false); // New state\n  const [pageTitle, setPageTitle] = useState(\n    \"Finalize Authorization with Third-Party service\"\n  );\n\n  const [accessibleResources, setAccessibleResources] = useState<\n    ConfluenceAccessibleResource[]\n  >([]);\n\n  // Extract query parameters\n  const credentialParam = searchParams?.get(\"credential\");\n  const credential = credentialParam ? parseInt(credentialParam, 10) : NaN;\n  const pathname = usePathname();\n  const connector = pathname?.split(\"/\")[3];\n\n  useEffect(() => {\n    const onFirstLoad = async () => {\n      // Examples\n      // connector (url segment)= \"google-drive\"\n      // sourceType (for looking up metadata) = \"google_drive\"\n\n      if (isNaN(credential) || !connector) {\n        setStatusMessage(\"Improperly formed OAuth finalization request.\");\n        setStatusDetails(\"Invalid or missing credential id.\");\n        setIsError(true);\n        return;\n      }\n\n      const sourceType = connector.replaceAll(\"-\", \"_\");\n      if (!isValidSource(sourceType)) {\n        setStatusMessage(\n          `The specified connector source type ${sourceType} does not exist.`\n        );\n        setStatusDetails(`${sourceType} is not a valid source type.`);\n        setIsError(true);\n        return;\n      }\n\n      const sourceMetadata = getSourceMetadata(sourceType as ValidSources);\n      setPageTitle(`Finalize Authorization with ${sourceMetadata.displayName}`);\n\n      setStatusMessage(\"Processing...\");\n      setStatusDetails(\n        \"Please wait while we retrieve a list of your accessible sites.\"\n      );\n      setIsError(false); // Ensure no error state during loading\n\n      try {\n        const response = await handleOAuthPrepareFinalization(\n          connector,\n          credential\n        );\n\n        if (!response) {\n          throw new Error(\"Empty response from OAuth server.\");\n        }\n\n        setAccessibleResources(response.accessible_resources);\n\n        setStatusMessage(\"Select a Confluence site\");\n        setStatusDetails(\"\");\n\n        setIsError(false);\n      } catch (error) {\n        console.error(\"OAuth finalization error:\", error);\n        setStatusMessage(\"Oops, something went wrong!\");\n        setStatusDetails(\n          \"An error occurred during the OAuth finalization process. Please try again.\"\n        );\n        setIsError(true);\n      }\n    };\n\n    onFirstLoad();\n  }, [credential, connector]);\n\n  useEffect(() => {}, [redirectUrl]);\n\n  return (\n    <div className=\"mx-auto h-screen flex flex-col\">\n      <AdminPageTitle title={pageTitle} icon={SvgKey} />\n\n      <div className=\"flex-1 flex flex-col items-center justify-center\">\n        <CardSection className=\"max-w-md w-[500px] h-[250px] p-8\">\n          <h1 className=\"text-2xl font-bold mb-4\">{statusMessage}</h1>\n          <p className=\"text-text-500\">{statusDetails}</p>\n\n          <Formik\n            initialValues={{\n              credential_id: credential,\n              cloud_id: \"\",\n              cloud_name: \"\",\n              cloud_url: \"\",\n            }}\n            validationSchema={Yup.object().shape({\n              credential_id: Yup.number().required(\n                \"Credential ID is required.\"\n              ),\n              cloud_id: Yup.string().required(\n                \"You must select a Confluence site (id not found).\"\n              ),\n              cloud_name: Yup.string().required(\n                \"You must select a Confluence site (name not found).\"\n              ),\n              cloud_url: Yup.string().required(\n                \"You must select a Confluence site (url not found).\"\n              ),\n            })}\n            validateOnMount\n            onSubmit={async (values, formikHelpers) => {\n              formikHelpers.setSubmitting(true);\n              try {\n                if (!values.cloud_id) {\n                  throw new Error(\"Cloud ID is required.\");\n                }\n\n                if (!values.cloud_name) {\n                  throw new Error(\"Cloud URL is required.\");\n                }\n\n                if (!values.cloud_url) {\n                  throw new Error(\"Cloud URL is required.\");\n                }\n\n                const response = await handleOAuthConfluenceFinalize(\n                  values.credential_id,\n                  values.cloud_id,\n                  values.cloud_name,\n                  values.cloud_url\n                );\n                formikHelpers.setSubmitting(false);\n\n                if (response) {\n                  setRedirectUrl(response.redirect_url);\n                  setStatusMessage(\"Confluence authorization finalized.\");\n                }\n\n                setIsSubmitted(true); // Mark as submitted\n              } catch (error) {\n                console.error(error);\n                setStatusMessage(\"Error during submission.\");\n                setStatusDetails(\n                  \"An error occurred during the submission process. Please try again.\"\n                );\n                setIsError(true);\n                formikHelpers.setSubmitting(false);\n              }\n            }}\n          >\n            {({ isSubmitting, isValid, setFieldValue }) => (\n              <Form>\n                {/* Debug info\n                <div className=\"mb-4 p-2 bg-gray-100 rounded text-xs\">\n                  <pre>\n                    isValid: {String(isValid)}\n                    errors: {JSON.stringify(errors, null, 2)}\n                    values: {JSON.stringify(values, null, 2)}\n                  </pre>\n                </div> */}\n\n                {/* Our helper component that reacts to changes in cloud_id */}\n                <UpdateCloudURLOnCloudIdChange\n                  accessibleResources={accessibleResources}\n                />\n\n                <Field type=\"hidden\" name=\"cloud_name\" />\n                <ErrorMessage\n                  name=\"cloud_name\"\n                  component=\"div\"\n                  className=\"error\"\n                />\n\n                <Field type=\"hidden\" name=\"cloud_url\" />\n                <ErrorMessage\n                  name=\"cloud_url\"\n                  component=\"div\"\n                  className=\"error\"\n                />\n\n                {!redirectUrl && accessibleResources.length > 0 && (\n                  <SelectorFormField\n                    name=\"cloud_id\"\n                    options={accessibleResources.map((resource) => ({\n                      name: `${resource.name} - ${resource.url}`,\n                      value: resource.id,\n                    }))}\n                    onSelect={(selectedValue) => {\n                      const selectedResource = accessibleResources.find(\n                        (resource) => resource.id === selectedValue\n                      );\n                      if (selectedResource) {\n                        setFieldValue(\"cloud_id\", selectedResource.id);\n                      }\n                    }}\n                  />\n                )}\n                <br />\n                {!redirectUrl && (\n                  <Button disabled={!isValid || isSubmitting} type=\"submit\">\n                    {isSubmitting ? \"Submitting...\" : \"Submit\"}\n                  </Button>\n                )}\n              </Form>\n            )}\n          </Formik>\n\n          {redirectUrl && !isError && (\n            <div className=\"mt-4\">\n              <p className=\"text-sm\">\n                Authorization finalized. Click{\" \"}\n                <a href={redirectUrl} className=\"text-blue-500 underline\">\n                  here\n                </a>{\" \"}\n                to continue.\n              </p>\n            </div>\n          )}\n        </CardSection>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/page.tsx",
    "content": "import { ConfigurableSources } from \"@/lib/types\";\nimport ConnectorWrapper from \"./ConnectorWrapper\";\n\nexport default async function Page(props: {\n  params: Promise<{ connector: string }>;\n}) {\n  const params = await props.params;\n  return (\n    <ConnectorWrapper\n      connector={params.connector.replace(\"-\", \"_\") as ConfigurableSources}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/Advanced.tsx",
    "content": "import React from \"react\";\nimport NumberInput from \"./ConnectorInput/NumberInput\";\nimport { TextFormField } from \"@/components/Field\";\nimport { Button } from \"@opal/components\";\nimport { SvgTrash } from \"@opal/icons\";\nexport default function AdvancedFormPage() {\n  return (\n    <div className=\"py-4 flex flex-col gap-y-6 rounded-lg max-w-2xl mx-auto\">\n      <h2 className=\"text-2xl font-bold mb-4 text-text-800\">\n        Advanced Configuration\n      </h2>\n\n      <NumberInput\n        description={`\n          Checks all documents against the source to delete those that no longer exist.\n          Note: This process checks every document, so be cautious when increasing frequency.\n          Default is 720 hours (30 days). Decimal hours are supported (e.g., 0.1 hours = 6 minutes).\n          Enter 0 to disable pruning for this connector.\n        `}\n        label=\"Prune Frequency (hours)\"\n        name=\"pruneFreq\"\n      />\n\n      <NumberInput\n        description=\"This is how frequently we pull new documents from the source (in minutes). If you input 0, we will never pull new documents for this connector.\"\n        label=\"Refresh Frequency (minutes)\"\n        name=\"refreshFreq\"\n      />\n\n      <TextFormField\n        type=\"date\"\n        subtext=\"Documents prior to this date will not be pulled in\"\n        optional\n        label=\"Indexing Start Date\"\n        name=\"indexingStart\"\n      />\n      <div className=\"mt-4 flex w-full mx-auto max-w-2xl justify-start\">\n        <Button variant=\"danger\" icon={SvgTrash} type=\"submit\">\n          Reset\n        </Button>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput.tsx",
    "content": "import { useField } from \"formik\";\nimport { FileUpload } from \"@/components/admin/connectors/FileUpload\";\nimport CredentialSubText from \"@/components/credentials/CredentialFields\";\n\ninterface FileInputProps {\n  name: string;\n  label?: string;\n  optional?: boolean;\n  description?: string;\n  multiple?: boolean;\n  isZip?: boolean;\n  hideError?: boolean;\n}\n\nexport default function FileInput({\n  name,\n  label,\n  optional = false,\n  description,\n  multiple = true,\n  isZip = false, // Default to false for multiple file uploads\n  hideError = false,\n}: FileInputProps) {\n  const [field, meta, helpers] = useField(name);\n\n  return (\n    <>\n      {label && (\n        <label\n          htmlFor={name}\n          className=\"block text-sm font-medium text-text-700 mb-1\"\n        >\n          {label}\n          {optional && <span className=\"text-text-500 ml-1\">(optional)</span>}\n        </label>\n      )}\n      {description && <CredentialSubText>{description}</CredentialSubText>}\n      <FileUpload\n        selectedFiles={\n          Array.isArray(field.value)\n            ? field.value\n            : field.value\n              ? [field.value]\n              : []\n        }\n        setSelectedFiles={(files: File[]) => {\n          if (isZip || !multiple) {\n            helpers.setValue(files[0] || null);\n          } else {\n            helpers.setValue(files);\n          }\n        }}\n        multiple={!isZip && multiple} // Allow multiple files if not a zip\n        accept={isZip ? \".zip\" : undefined} // Only accept zip files if isZip is true\n      />\n      {!hideError && meta.touched && meta.error && (\n        <div className=\"text-red-500 text-sm mt-1\">{meta.error}</div>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/ConnectorInput/ListInput.tsx",
    "content": "import React from \"react\";\nimport { TextArrayField } from \"@/components/Field\";\nimport { useFormikContext } from \"formik\";\n\ninterface ListInputProps {\n  name: string;\n  label: string | ((credential: any) => string);\n  description: string | ((credential: any) => string);\n}\n\nconst ListInput: React.FC<ListInputProps> = ({ name, label, description }) => {\n  const { values } = useFormikContext<any>();\n  return (\n    <TextArrayField\n      name={name}\n      label={typeof label === \"function\" ? label(null) : label}\n      values={values}\n      subtext={\n        typeof description === \"function\" ? description(null) : description\n      }\n      placeholder={`Enter ${\n        typeof label === \"function\" ? label(null) : label.toLowerCase()\n      }`}\n    />\n  );\n};\n\nexport default ListInput;\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/ConnectorInput/NumberInput.tsx",
    "content": "import { Label, SubLabel } from \"@/components/Field\";\nimport { ErrorMessage, useField } from \"formik\";\n\nexport default function NumberInput({\n  label,\n  optional,\n  description,\n  name,\n  showNeverIfZero,\n}: {\n  label: string;\n  name: string;\n  optional?: boolean;\n  description?: string;\n  showNeverIfZero?: boolean;\n}) {\n  const [field, meta, helpers] = useField(name);\n\n  const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {\n    // If the input is empty, set the value to undefined or null\n    // This prevents the \"NaN from empty string\" error\n    if (e.target.value === \"\") {\n      helpers.setValue(undefined);\n    } else {\n      helpers.setValue(Number(e.target.value));\n    }\n  };\n\n  return (\n    <div className=\"w-full flex flex-col\">\n      <Label>\n        <>\n          {label}\n          {optional && <span className=\"text-text-500 ml-1\">(optional)</span>}\n        </>\n      </Label>\n      {description && <SubLabel>{description}</SubLabel>}\n\n      <input\n        {...field}\n        type=\"number\"\n        min=\"-1\"\n        onChange={handleChange}\n        value={\n          field.value === undefined || field.value === null ? \"\" : field.value\n        }\n        className={`mt-2 block w-full px-3 py-2 \n                bg-[#fff] dark:bg-transparent border border-background-300 rounded-md \n                text-sm shadow-sm placeholder-text-400\n                focus:outline-none focus:border-sky-500 focus:ring-1 focus:ring-sky-500\n                disabled:bg-background-50 disabled:text-text-500 disabled:border-background-200 disabled:shadow-none\n                invalid:border-pink-500 invalid:text-pink-600\n                focus:invalid:border-pink-500 focus:invalid:ring-pink-500`}\n      />\n      <ErrorMessage\n        name={name}\n        component=\"div\"\n        className=\"text-error text-sm mt-1\"\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/ConnectorInput/SelectInput.tsx",
    "content": "import CredentialSubText from \"@/components/credentials/CredentialFields\";\nimport { StringWithDescription } from \"@/lib/connectors/connectors\";\nimport { Field } from \"formik\";\n\nexport default function SelectInput({\n  name,\n  optional,\n  description,\n  options,\n  label,\n}: {\n  name: string;\n  optional?: boolean;\n  description?: string;\n  options: StringWithDescription[];\n  label?: string;\n}) {\n  return (\n    <>\n      <label\n        htmlFor={name}\n        className=\"block text-sm font-medium text-text-700 mb-1\"\n      >\n        {label}\n        {optional && <span className=\"text-text-500 ml-1\">(optional)</span>}\n      </label>\n      {description && <CredentialSubText>{description}</CredentialSubText>}\n\n      <Field\n        as=\"select\"\n        name={name}\n        className=\"w-full p-2 border border-border-03 rounded-08 bg-transparent text-text-04 focus:ring-2 focus:ring-lighter-agent focus:border-lighter-agent focus:outline-none\"\n      >\n        <option value=\"\">Select an option</option>\n        {options?.map((option: any) => (\n          <option key={option.name} value={option.name}>\n            {option.name}\n          </option>\n        ))}\n      </Field>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx",
    "content": "import React, { useEffect, useState } from \"react\";\nimport CredentialSubText from \"@/components/credentials/CredentialFields\";\nimport { ConnectionConfiguration } from \"@/lib/connectors/connectors\";\nimport { TextFormField } from \"@/components/Field\";\nimport { AdvancedOptionsToggle } from \"@/components/AdvancedOptionsToggle\";\nimport { AccessTypeForm } from \"@/components/admin/connectors/AccessTypeForm\";\nimport { AccessTypeGroupSelector } from \"@/components/admin/connectors/AccessTypeGroupSelector\";\nimport { ConfigurableSources } from \"@/lib/types\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { RenderField } from \"./FieldRendering\";\nimport { useFormikContext } from \"formik\";\n\nexport interface DynamicConnectionFormProps {\n  config: ConnectionConfiguration;\n  values: any;\n  connector: ConfigurableSources;\n  currentCredential: Credential<any> | null;\n}\n\nexport default function DynamicConnectionForm({\n  config,\n  values,\n  connector,\n  currentCredential,\n}: DynamicConnectionFormProps) {\n  const { setFieldValue } = useFormikContext<any>(); // Get Formik's context functions\n\n  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);\n  const [connectorNameInitialized, setConnectorNameInitialized] =\n    useState(false);\n\n  let initialConnectorName = \"\";\n  if (config.initialConnectorName) {\n    initialConnectorName =\n      currentCredential?.credential_json?.[config.initialConnectorName] ?? \"\";\n  }\n\n  useEffect(() => {\n    const field_value = values[\"name\"];\n    if (initialConnectorName && !connectorNameInitialized && !field_value) {\n      setFieldValue(\"name\", initialConnectorName);\n      setConnectorNameInitialized(true);\n    }\n  }, [initialConnectorName, setFieldValue, values]);\n\n  return (\n    <>\n      {config.subtext && (\n        <CredentialSubText>{config.subtext}</CredentialSubText>\n      )}\n\n      <TextFormField\n        subtext=\"A descriptive name for the connector.\"\n        type={\"text\"}\n        label={\"Connector Name\"}\n        name={\"name\"}\n      />\n\n      {config.values.map(\n        (field) =>\n          !field.hidden && (\n            <RenderField\n              key={field.name}\n              field={field}\n              values={values}\n              connector={connector}\n              currentCredential={currentCredential}\n            />\n          )\n      )}\n\n      <AccessTypeForm\n        connector={connector}\n        currentCredential={currentCredential}\n      />\n      <AccessTypeGroupSelector connector={connector} />\n\n      {config.advanced_values.length > 0 &&\n        (!config.advancedValuesVisibleCondition ||\n          config.advancedValuesVisibleCondition(values, currentCredential)) && (\n          <>\n            <AdvancedOptionsToggle\n              showAdvancedOptions={showAdvancedOptions}\n              setShowAdvancedOptions={setShowAdvancedOptions}\n            />\n            {showAdvancedOptions &&\n              config.advanced_values.map(\n                (field) =>\n                  !field.hidden && (\n                    <RenderField\n                      key={field.name}\n                      field={field}\n                      values={values}\n                      connector={connector}\n                      currentCredential={currentCredential}\n                    />\n                  )\n              )}\n          </>\n        )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/FieldRendering.tsx",
    "content": "import React, { FC, useEffect } from \"react\";\nimport { TabOption } from \"@/lib/connectors/connectors\";\nimport SelectInput from \"./ConnectorInput/SelectInput\";\nimport NumberInput from \"./ConnectorInput/NumberInput\";\nimport { TextFormField, MultiSelectField } from \"@/components/Field\";\nimport ListInput from \"./ConnectorInput/ListInput\";\nimport FileInput from \"./ConnectorInput/FileInput\";\nimport { ConfigurableSources } from \"@/lib/types\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport CollapsibleSection from \"@/app/admin/agents/CollapsibleSection\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { useFormikContext } from \"formik\";\nimport * as GeneralLayouts from \"@/layouts/general-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Content } from \"@opal/layouts\";\nimport CheckboxField from \"@/refresh-components/form/LabeledCheckboxField\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\nimport Text from \"@/refresh-components/texts/Text\";\n\n// Define a general type for form values\ntype FormValues = Record<string, any>;\n\ninterface TabsFieldProps {\n  tabField: TabOption;\n  values: any;\n  connector: ConfigurableSources;\n  currentCredential: Credential<any> | null;\n}\n\nconst TabsField: FC<TabsFieldProps> = ({\n  tabField,\n  values,\n  connector,\n  currentCredential,\n}) => {\n  const { setFieldValue } = useFormikContext<FormValues>();\n\n  const resolvedLabel =\n    typeof tabField.label === \"function\"\n      ? tabField.label(currentCredential)\n      : tabField.label;\n  const resolvedDescription =\n    typeof tabField.description === \"function\"\n      ? tabField.description(currentCredential)\n      : tabField.description;\n\n  return (\n    <GeneralLayouts.Section gap={0.5} alignItems=\"start\">\n      {tabField.label && (\n        <Content\n          title={resolvedLabel ?? \"\"}\n          description={resolvedDescription}\n          sizePreset=\"main-content\"\n          variant=\"section\"\n        />\n      )}\n\n      {/* Ensure there's at least one tab before rendering */}\n      {tabField.tabs.length === 0 ? (\n        <Text text03 secondaryBody>\n          No tabs to display.\n        </Text>\n      ) : (\n        <Tabs\n          defaultValue={tabField.defaultTab || tabField.tabs[0]?.value}\n          onValueChange={(newTab) => {\n            // Clear values from other tabs but preserve defaults\n            tabField.tabs.forEach((tab) => {\n              if (tab.value !== newTab) {\n                tab.fields.forEach((field) => {\n                  // Only clear if not default value\n                  if (values[field.name] !== field.default) {\n                    setFieldValue(field.name, field.default);\n                  }\n                });\n              }\n            });\n          }}\n        >\n          <Tabs.List>\n            {tabField.tabs.map((tab) => (\n              <Tabs.Trigger key={tab.value} value={tab.value}>\n                {tab.label}\n              </Tabs.Trigger>\n            ))}\n          </Tabs.List>\n          {tabField.tabs.map((tab) => (\n            <Tabs.Content key={tab.value} value={tab.value}>\n              <GeneralLayouts.Section gap={0.75} alignItems=\"start\">\n                {tab.fields.map((subField) => {\n                  // Check visibility condition first\n                  if (\n                    subField.visibleCondition &&\n                    !subField.visibleCondition(values, currentCredential)\n                  ) {\n                    return null;\n                  }\n\n                  return (\n                    <RenderField\n                      key={subField.name}\n                      field={subField}\n                      values={values}\n                      connector={connector}\n                      currentCredential={currentCredential}\n                    />\n                  );\n                })}\n              </GeneralLayouts.Section>\n            </Tabs.Content>\n          ))}\n        </Tabs>\n      )}\n    </GeneralLayouts.Section>\n  );\n};\n\ninterface RenderFieldProps {\n  field: any;\n  values: any;\n  connector: ConfigurableSources;\n  currentCredential: Credential<any> | null;\n}\n\nexport const RenderField: FC<RenderFieldProps> = ({\n  field,\n  values,\n  connector,\n  currentCredential,\n}) => {\n  const { setFieldValue } = useFormikContext<FormValues>(); // Get Formik's context functions\n\n  const label =\n    typeof field.label === \"function\"\n      ? field.label(currentCredential)\n      : field.label;\n  const description =\n    typeof field.description === \"function\"\n      ? field.description(currentCredential)\n      : field.description;\n  const disabled =\n    typeof field.disabled === \"function\"\n      ? field.disabled(currentCredential)\n      : field.disabled ?? false;\n  const initialValue =\n    typeof field.initial === \"function\"\n      ? field.initial(currentCredential)\n      : field.initial ?? \"\";\n\n  // if initialValue exists, prepopulate the field with it\n  useEffect(() => {\n    const field_value = values[field.name];\n    if (initialValue && field_value === undefined) {\n      setFieldValue(field.name, initialValue);\n    }\n  }, [field.name, initialValue, setFieldValue, values]);\n\n  if (field.type === \"tab\") {\n    return (\n      <TabsField\n        tabField={field}\n        values={values}\n        connector={connector}\n        currentCredential={currentCredential}\n      />\n    );\n  }\n\n  const fieldContent = (\n    <>\n      {field.type === \"zip\" || field.type === \"file\" ? (\n        <FileInput\n          name={field.name}\n          isZip={field.type === \"zip\"}\n          label={label}\n          optional={field.optional}\n          description={description}\n        />\n      ) : field.type === \"list\" ? (\n        <ListInput name={field.name} label={label} description={description} />\n      ) : field.type === \"select\" ? (\n        <SelectInput\n          name={field.name}\n          optional={field.optional}\n          description={description}\n          options={field.options || []}\n          label={label}\n        />\n      ) : field.type === \"multiselect\" ? (\n        <MultiSelectField\n          name={field.name}\n          label={label}\n          subtext={description}\n          options={\n            field.options?.map((option: { value: string; name: string }) => ({\n              value: option.value,\n              label: option.name,\n            })) || []\n          }\n          selectedInitially={values[field.name] || field.default || []}\n          onChange={(selected) => setFieldValue(field.name, selected)}\n        />\n      ) : field.type === \"number\" ? (\n        <NumberInput\n          label={label}\n          optional={field.optional}\n          description={description}\n          name={field.name}\n        />\n      ) : field.type === \"checkbox\" ? (\n        <GeneralLayouts.Section\n          flexDirection=\"row\"\n          justifyContent=\"start\"\n          alignItems=\"start\"\n          gap={0.5}\n        >\n          <CheckboxField\n            name={field.name}\n            label={label}\n            sublabel={description}\n            disabled={disabled}\n            size=\"lg\"\n            onChange={(checked) => setFieldValue(field.name, checked)}\n          />\n        </GeneralLayouts.Section>\n      ) : field.type === \"text\" ? (\n        field.isTextArea ? (\n          <InputLayouts.Vertical\n            name={field.name}\n            title={label}\n            description={description}\n            suffix={field.optional ? \"optional\" : undefined}\n          >\n            <InputTextAreaField\n              name={field.name}\n              placeholder={field.placeholder}\n              variant={disabled ? \"disabled\" : undefined}\n              rows={1}\n            />\n          </InputLayouts.Vertical>\n        ) : (\n          <TextFormField\n            subtext={description}\n            optional={field.optional}\n            type={field.type}\n            label={label}\n            name={field.name}\n            isTextArea={false}\n            defaultHeight={\"h-15\"}\n            disabled={disabled}\n            onChange={(e) => setFieldValue(field.name, e.target.value)}\n          />\n        )\n      ) : field.type === \"string_tab\" ? (\n        <GeneralLayouts.Section>\n          <Text text03 secondaryBody>\n            {description}\n          </Text>\n        </GeneralLayouts.Section>\n      ) : (\n        <>INVALID FIELD TYPE</>\n      )}\n    </>\n  );\n\n  if (field.wrapInCollapsible) {\n    return (\n      <CollapsibleSection prompt={label} key={field.name}>\n        {fieldContent}\n      </CollapsibleSection>\n    );\n  }\n\n  return (\n    <GeneralLayouts.Section alignItems=\"start\">\n      {fieldContent}\n    </GeneralLayouts.Section>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/gdrive/Credential.tsx",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport React, { useState, useEffect } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport * as Yup from \"yup\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { adminDeleteCredential } from \"@/lib/credential\";\nimport { setupGoogleDriveOAuth } from \"@/lib/googleDrive\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\nimport { TextFormField, SectionHeader } from \"@/components/Field\";\nimport { Form, Formik } from \"formik\";\nimport { User } from \"@/lib/types\";\nimport { Button } from \"@opal/components\";\nimport {\n  Credential,\n  GoogleDriveCredentialJson,\n  GoogleDriveServiceAccountCredentialJson,\n} from \"@/lib/connectors/credentials\";\nimport { refreshAllGoogleData } from \"@/lib/googleConnector\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { FiFile, FiCheck, FiLink, FiAlertTriangle } from \"react-icons/fi\";\nimport { cn, truncateString } from \"@/lib/utils\";\n\ntype GoogleDriveCredentialJsonTypes = \"authorized_user\" | \"service_account\";\n\nexport const DriveJsonUpload = ({ onSuccess }: { onSuccess?: () => void }) => {\n  const { mutate } = useSWRConfig();\n  const [isUploading, setIsUploading] = useState(false);\n  const [fileName, setFileName] = useState<string | undefined>();\n  const [isDragging, setIsDragging] = useState(false);\n\n  const handleFileUpload = async (file: File) => {\n    setIsUploading(true);\n    setFileName(file.name);\n\n    const reader = new FileReader();\n    reader.onload = async (loadEvent) => {\n      if (!loadEvent?.target?.result) {\n        setIsUploading(false);\n        return;\n      }\n\n      const credentialJsonStr = loadEvent.target.result as string;\n\n      // Check credential type\n      let credentialFileType: GoogleDriveCredentialJsonTypes;\n      try {\n        const appCredentialJson = JSON.parse(credentialJsonStr);\n        if (appCredentialJson.web) {\n          credentialFileType = \"authorized_user\";\n        } else if (appCredentialJson.type === \"service_account\") {\n          credentialFileType = \"service_account\";\n        } else {\n          throw new Error(\n            \"Unknown credential type, expected one of 'OAuth Web application' or 'Service Account'\"\n          );\n        }\n      } catch (e) {\n        toast.error(`Invalid file provided - ${e}`);\n        setIsUploading(false);\n        return;\n      }\n\n      if (credentialFileType === \"authorized_user\") {\n        const response = await fetch(\n          \"/api/manage/admin/connector/google-drive/app-credential\",\n          {\n            method: \"PUT\",\n            headers: {\n              \"Content-Type\": \"application/json\",\n            },\n            body: credentialJsonStr,\n          }\n        );\n        if (response.ok) {\n          toast.success(\"Successfully uploaded app credentials\");\n          mutate(SWR_KEYS.googleConnectorAppCredential(\"google-drive\"));\n          if (onSuccess) {\n            onSuccess();\n          }\n        } else {\n          const errorMsg = await response.text();\n          toast.error(`Failed to upload app credentials - ${errorMsg}`);\n        }\n      }\n\n      if (credentialFileType === \"service_account\") {\n        const response = await fetch(\n          \"/api/manage/admin/connector/google-drive/service-account-key\",\n          {\n            method: \"PUT\",\n            headers: {\n              \"Content-Type\": \"application/json\",\n            },\n            body: credentialJsonStr,\n          }\n        );\n        if (response.ok) {\n          toast.success(\"Successfully uploaded service account key\");\n          mutate(SWR_KEYS.googleConnectorServiceAccountKey(\"google-drive\"));\n          if (onSuccess) {\n            onSuccess();\n          }\n        } else {\n          const errorMsg = await response.text();\n          toast.error(`Failed to upload service account key - ${errorMsg}`);\n        }\n      }\n      setIsUploading(false);\n    };\n\n    reader.readAsText(file);\n  };\n\n  const handleDragEnter = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    if (!isUploading) {\n      setIsDragging(true);\n    }\n  };\n\n  const handleDragLeave = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    setIsDragging(false);\n  };\n\n  const handleDragOver = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n  };\n\n  const handleDrop = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    setIsDragging(false);\n\n    if (isUploading) return;\n\n    const files = e.dataTransfer.files;\n    if (files.length > 0) {\n      const file = files[0];\n      if (\n        file !== undefined &&\n        (file.type === \"application/json\" || file.name.endsWith(\".json\"))\n      ) {\n        handleFileUpload(file);\n      } else {\n        toast.error(\"Please upload a JSON file\");\n      }\n    }\n  };\n\n  return (\n    <div className=\"flex flex-col mt-4\">\n      <div className=\"flex items-center\">\n        <div className=\"relative flex flex-1 items-center\">\n          <label\n            className={cn(\n              \"flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors\",\n              isUploading\n                ? \"opacity-70 cursor-not-allowed border-background-400 bg-background-50/30\"\n                : isDragging\n                  ? \"bg-background-50/50 border-primary dark:border-primary\"\n                  : \"cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600\"\n            )}\n            onDragEnter={handleDragEnter}\n            onDragLeave={handleDragLeave}\n            onDragOver={handleDragOver}\n            onDrop={handleDrop}\n          >\n            <div className=\"flex items-center space-x-2\">\n              {isUploading ? (\n                <div className=\"h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin\"></div>\n              ) : (\n                <FiFile className=\"h-4 w-4 text-text-500\" />\n              )}\n              <span className=\"text-sm text-text-500\">\n                {isUploading\n                  ? `Uploading ${truncateString(fileName || \"file\", 50)}...`\n                  : isDragging\n                    ? \"Drop JSON file here\"\n                    : truncateString(\n                        fileName || \"Select or drag JSON credentials file...\",\n                        50\n                      )}\n              </span>\n            </div>\n            <input\n              className=\"sr-only\"\n              type=\"file\"\n              accept=\".json\"\n              disabled={isUploading}\n              onChange={(event) => {\n                if (!event.target.files?.length) {\n                  return;\n                }\n                const file = event.target.files[0];\n                if (file === undefined) {\n                  return;\n                }\n                handleFileUpload(file);\n              }}\n            />\n          </label>\n        </div>\n      </div>\n    </div>\n  );\n};\n\ninterface DriveJsonUploadSectionProps {\n  appCredentialData?: { client_id: string };\n  serviceAccountCredentialData?: { service_account_email: string };\n  isAdmin: boolean;\n  onSuccess?: () => void;\n  existingAuthCredential?: boolean;\n}\n\nexport const DriveJsonUploadSection = ({\n  appCredentialData,\n  serviceAccountCredentialData,\n  isAdmin,\n  onSuccess,\n  existingAuthCredential,\n}: DriveJsonUploadSectionProps) => {\n  const { mutate } = useSWRConfig();\n  const router = useRouter();\n  const [localServiceAccountData, setLocalServiceAccountData] = useState(\n    serviceAccountCredentialData\n  );\n  const [localAppCredentialData, setLocalAppCredentialData] =\n    useState(appCredentialData);\n\n  // Update local state when props change\n  useEffect(() => {\n    setLocalServiceAccountData(serviceAccountCredentialData);\n    setLocalAppCredentialData(appCredentialData);\n  }, [serviceAccountCredentialData, appCredentialData]);\n\n  const handleSuccess = () => {\n    if (onSuccess) {\n      onSuccess();\n    } else {\n      refreshAllGoogleData(ValidSources.GoogleDrive);\n    }\n  };\n\n  if (!isAdmin) {\n    return (\n      <div>\n        <div className=\"flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded\">\n          <FiAlertTriangle className=\"text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n          <p className=\"text-sm\">\n            Curators are unable to set up the Google Drive credentials. To add a\n            Google Drive connector, please contact an administrator.\n          </p>\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div>\n      <p className=\"text-sm mb-3\">\n        To connect your Google Drive, create credentials (either OAuth App or\n        Service Account), download the JSON file, and upload it below.\n      </p>\n      <div className=\"mb-4\">\n        <a\n          className=\"text-primary hover:text-primary/80 flex items-center gap-1 text-sm\"\n          target=\"_blank\"\n          href={`${DOCS_ADMINS_PATH}/connectors/official/google_drive/overview`}\n          rel=\"noreferrer\"\n        >\n          <FiLink className=\"h-3 w-3\" />\n          View detailed setup instructions\n        </a>\n      </div>\n\n      {(localServiceAccountData?.service_account_email ||\n        localAppCredentialData?.client_id) && (\n        <div className=\"mb-4\">\n          <div className=\"relative flex flex-1 items-center\">\n            <label\n              className={cn(\n                \"flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors\",\n                false\n                  ? \"opacity-70 cursor-not-allowed border-background-400 bg-background-50/30\"\n                  : \"cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600\"\n              )}\n            >\n              <div className=\"flex items-center space-x-2\">\n                {false ? (\n                  <div className=\"h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin\"></div>\n                ) : (\n                  <FiFile className=\"h-4 w-4 text-text-500\" />\n                )}\n                <span className=\"text-sm text-text-500\">\n                  {truncateString(\n                    localServiceAccountData?.service_account_email ||\n                      localAppCredentialData?.client_id ||\n                      \"\",\n                    50\n                  )}\n                </span>\n              </div>\n            </label>\n          </div>\n          {isAdmin && !existingAuthCredential && (\n            <div className=\"mt-2\">\n              <Button\n                variant=\"danger\"\n                onClick={async () => {\n                  const endpoint =\n                    localServiceAccountData?.service_account_email\n                      ? SWR_KEYS.googleConnectorServiceAccountKey(\n                          \"google-drive\"\n                        )\n                      : SWR_KEYS.googleConnectorAppCredential(\"google-drive\");\n\n                  const response = await fetch(endpoint, {\n                    method: \"DELETE\",\n                  });\n\n                  if (response.ok) {\n                    mutate(endpoint);\n                    // Also mutate the credential endpoints to ensure Step 2 is reset\n                    mutate(\n                      buildSimilarCredentialInfoURL(ValidSources.GoogleDrive)\n                    );\n\n                    // Add additional mutations to refresh all credential-related endpoints\n                    mutate(SWR_KEYS.googleConnectorCredentials(\"google-drive\"));\n                    mutate(\n                      SWR_KEYS.googleConnectorPublicCredential(\"google-drive\")\n                    );\n                    mutate(\n                      SWR_KEYS.googleConnectorServiceAccountCredential(\n                        \"google-drive\"\n                      )\n                    );\n\n                    toast.success(\n                      `Successfully deleted ${\n                        localServiceAccountData\n                          ? \"service account key\"\n                          : \"app credentials\"\n                      }`\n                    );\n                    // Immediately update local state\n                    if (localServiceAccountData) {\n                      setLocalServiceAccountData(undefined);\n                    } else {\n                      setLocalAppCredentialData(undefined);\n                    }\n                    handleSuccess();\n                  } else {\n                    const errorMsg = await response.text();\n                    toast.error(`Failed to delete credentials - ${errorMsg}`);\n                  }\n                }}\n              >\n                Delete Credentials\n              </Button>\n            </div>\n          )}\n        </div>\n      )}\n\n      {!(\n        localServiceAccountData?.service_account_email ||\n        localAppCredentialData?.client_id\n      ) && <DriveJsonUpload onSuccess={handleSuccess} />}\n    </div>\n  );\n};\n\ninterface DriveCredentialSectionProps {\n  googleDrivePublicUploadedCredential?: Credential<GoogleDriveCredentialJson>;\n  googleDriveServiceAccountCredential?: Credential<GoogleDriveServiceAccountCredentialJson>;\n  serviceAccountKeyData?: { service_account_email: string };\n  appCredentialData?: { client_id: string };\n  refreshCredentials: () => void;\n  connectorAssociated: boolean;\n  user: User | null;\n}\n\nasync function handleRevokeAccess(\n  connectorAssociated: boolean,\n  existingCredential:\n    | Credential<GoogleDriveCredentialJson>\n    | Credential<GoogleDriveServiceAccountCredentialJson>,\n  refreshCredentials: () => void\n) {\n  if (connectorAssociated) {\n    const message =\n      \"Cannot revoke the Google Drive credential while any connector is still associated with the credential. \" +\n      \"Please delete all associated connectors, then try again.\";\n    toast.error(message);\n    return;\n  }\n\n  await adminDeleteCredential(existingCredential.id);\n  toast.success(\"Successfully revoked the Google Drive credential!\");\n\n  refreshCredentials();\n}\n\nexport const DriveAuthSection = ({\n  googleDrivePublicUploadedCredential,\n  googleDriveServiceAccountCredential,\n  serviceAccountKeyData,\n  appCredentialData,\n  refreshCredentials,\n  connectorAssociated,\n  user,\n}: DriveCredentialSectionProps) => {\n  const router = useRouter();\n  const [isAuthenticating, setIsAuthenticating] = useState(false);\n  const [localServiceAccountData, setLocalServiceAccountData] = useState(\n    serviceAccountKeyData\n  );\n  const [localAppCredentialData, setLocalAppCredentialData] =\n    useState(appCredentialData);\n  const [\n    localGoogleDrivePublicCredential,\n    setLocalGoogleDrivePublicCredential,\n  ] = useState(googleDrivePublicUploadedCredential);\n  const [\n    localGoogleDriveServiceAccountCredential,\n    setLocalGoogleDriveServiceAccountCredential,\n  ] = useState(googleDriveServiceAccountCredential);\n\n  // Update local state when props change\n  useEffect(() => {\n    setLocalServiceAccountData(serviceAccountKeyData);\n    setLocalAppCredentialData(appCredentialData);\n    setLocalGoogleDrivePublicCredential(googleDrivePublicUploadedCredential);\n    setLocalGoogleDriveServiceAccountCredential(\n      googleDriveServiceAccountCredential\n    );\n  }, [\n    serviceAccountKeyData,\n    appCredentialData,\n    googleDrivePublicUploadedCredential,\n    googleDriveServiceAccountCredential,\n  ]);\n\n  const existingCredential =\n    localGoogleDrivePublicCredential ||\n    localGoogleDriveServiceAccountCredential;\n  if (existingCredential) {\n    return (\n      <div>\n        <div className=\"mt-4\">\n          <div className=\"py-3 px-4 bg-blue-50/30 dark:bg-blue-900/5 rounded mb-4 flex items-start\">\n            <FiCheck className=\"text-blue-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n            <div className=\"flex-1\">\n              <span className=\"font-medium block\">Authentication Complete</span>\n              <p className=\"text-sm mt-1 text-text-500 dark:text-text-400 break-words\">\n                Your Google Drive credentials have been successfully uploaded\n                and authenticated.\n              </p>\n            </div>\n          </div>\n          <Button\n            variant=\"danger\"\n            onClick={async () => {\n              handleRevokeAccess(\n                connectorAssociated,\n                existingCredential,\n                refreshCredentials\n              );\n            }}\n          >\n            Revoke Access\n          </Button>\n        </div>\n      </div>\n    );\n  }\n\n  // If no credentials are uploaded, show message to complete step 1 first\n  if (\n    !localServiceAccountData?.service_account_email &&\n    !localAppCredentialData?.client_id\n  ) {\n    return (\n      <div>\n        <SectionHeader>Google Drive Authentication</SectionHeader>\n        <div className=\"mt-4\">\n          <div className=\"flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded\">\n            <FiAlertTriangle className=\"text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n            <p className=\"text-sm\">\n              Please complete Step 1 by uploading either OAuth credentials or a\n              Service Account key before proceeding with authentication.\n            </p>\n          </div>\n        </div>\n      </div>\n    );\n  }\n\n  if (localServiceAccountData?.service_account_email) {\n    return (\n      <div>\n        <div className=\"mt-4\">\n          <Formik\n            initialValues={{\n              google_primary_admin: user?.email || \"\",\n            }}\n            validationSchema={Yup.object().shape({\n              google_primary_admin: Yup.string()\n                .email(\"Must be a valid email\")\n                .required(\"Required\"),\n            })}\n            onSubmit={async (values, formikHelpers) => {\n              formikHelpers.setSubmitting(true);\n              try {\n                const response = await fetch(\n                  \"/api/manage/admin/connector/google-drive/service-account-credential\",\n                  {\n                    method: \"PUT\",\n                    headers: {\n                      \"Content-Type\": \"application/json\",\n                    },\n                    body: JSON.stringify({\n                      google_primary_admin: values.google_primary_admin,\n                    }),\n                  }\n                );\n\n                if (response.ok) {\n                  toast.success(\n                    \"Successfully created service account credential\"\n                  );\n                  refreshCredentials();\n                } else {\n                  const errorMsg = await response.text();\n                  toast.error(\n                    `Failed to create service account credential - ${errorMsg}`\n                  );\n                }\n              } catch (error) {\n                toast.error(\n                  `Failed to create service account credential - ${error}`\n                );\n              } finally {\n                formikHelpers.setSubmitting(false);\n              }\n            }}\n          >\n            {({ isSubmitting }) => (\n              <Form>\n                <TextFormField\n                  name=\"google_primary_admin\"\n                  label=\"Primary Admin Email:\"\n                  subtext=\"Enter the email of an admin/owner of the Google Organization that owns the Google Drive(s) you want to index.\"\n                />\n                <div className=\"flex\">\n                  <Button disabled={isSubmitting} type=\"submit\">\n                    {isSubmitting ? \"Creating...\" : \"Create Credential\"}\n                  </Button>\n                </div>\n              </Form>\n            )}\n          </Formik>\n        </div>\n      </div>\n    );\n  }\n\n  if (localAppCredentialData?.client_id) {\n    return (\n      <div>\n        <div className=\"bg-background-50/30 dark:bg-background-900/20 rounded mb-4\">\n          <p className=\"text-sm\">\n            Next, you need to authenticate with Google Drive via OAuth. This\n            gives us read access to the documents you have access to in your\n            Google Drive account.\n          </p>\n        </div>\n        <Button\n          disabled={isAuthenticating}\n          onClick={async () => {\n            setIsAuthenticating(true);\n            try {\n              const [authUrl, errorMsg] = await setupGoogleDriveOAuth({\n                isAdmin: true,\n                name: \"OAuth (uploaded)\",\n              });\n\n              if (authUrl) {\n                router.push(authUrl as Route);\n              } else {\n                toast.error(errorMsg);\n                setIsAuthenticating(false);\n              }\n            } catch (error) {\n              toast.error(\n                `Failed to authenticate with Google Drive - ${error}`\n              );\n              setIsAuthenticating(false);\n            }\n          }}\n        >\n          {isAuthenticating\n            ? \"Authenticating...\"\n            : \"Authenticate with Google Drive\"}\n        </Button>\n      </div>\n    );\n  }\n\n  // This code path should not be reached with the new conditions above\n  return null;\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { LoadingAnimation } from \"@/components/Loading\";\nimport { ValidSources } from \"@/lib/types\";\nimport { usePublicCredentials } from \"@/lib/hooks\";\nimport Title from \"@/components/ui/title\";\nimport { DriveJsonUploadSection, DriveAuthSection } from \"./Credential\";\nimport {\n  Credential,\n  GoogleDriveCredentialJson,\n  GoogleDriveServiceAccountCredentialJson,\n} from \"@/lib/connectors/credentials\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  useGoogleAppCredential,\n  useGoogleServiceAccountKey,\n  useGoogleCredentials,\n  useConnectorsByCredentialId,\n  checkCredentialsFetched,\n  filterUploadedCredentials,\n  checkConnectorsExist,\n  refreshAllGoogleData,\n} from \"@/lib/googleConnector\";\n\nconst GDriveMain = () => {\n  const { isAdmin, user } = useUser();\n\n  // Get app credential and service account key\n  const {\n    data: appCredentialData,\n    isLoading: isAppCredentialLoading,\n    error: isAppCredentialError,\n  } = useGoogleAppCredential(\"google_drive\");\n\n  const {\n    data: serviceAccountKeyData,\n    isLoading: isServiceAccountKeyLoading,\n    error: isServiceAccountKeyError,\n  } = useGoogleServiceAccountKey(\"google_drive\");\n\n  // Get all public credentials\n  const {\n    data: credentialsData,\n    isLoading: isCredentialsLoading,\n    error: credentialsError,\n    refreshCredentials,\n  } = usePublicCredentials();\n\n  // Get Google Drive-specific credentials\n  const {\n    data: googleDriveCredentials,\n    isLoading: isGoogleDriveCredentialsLoading,\n    error: googleDriveCredentialsError,\n  } = useGoogleCredentials(ValidSources.GoogleDrive);\n\n  // Filter uploaded credentials and get credential ID\n  const { credential_id, uploadedCredentials } = filterUploadedCredentials(\n    googleDriveCredentials\n  );\n\n  // Get connectors for the credential ID\n  const {\n    data: googleDriveConnectors,\n    isLoading: isGoogleDriveConnectorsLoading,\n    error: googleDriveConnectorsError,\n    refreshConnectorsByCredentialId,\n  } = useConnectorsByCredentialId(credential_id);\n\n  // Check if credentials were successfully fetched\n  const {\n    appCredentialSuccessfullyFetched,\n    serviceAccountKeySuccessfullyFetched,\n  } = checkCredentialsFetched(\n    appCredentialData,\n    isAppCredentialError,\n    serviceAccountKeyData,\n    isServiceAccountKeyError\n  );\n\n  // Handle refresh of all data\n  const handleRefresh = () => {\n    refreshCredentials();\n    refreshConnectorsByCredentialId();\n    refreshAllGoogleData(ValidSources.GoogleDrive);\n  };\n\n  // Loading state\n  if (\n    (!appCredentialSuccessfullyFetched && isAppCredentialLoading) ||\n    (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) ||\n    (!credentialsData && isCredentialsLoading) ||\n    (!googleDriveCredentials && isGoogleDriveCredentialsLoading) ||\n    (!googleDriveConnectors && isGoogleDriveConnectorsLoading)\n  ) {\n    return (\n      <div className=\"mx-auto\">\n        <LoadingAnimation text=\"\" />\n      </div>\n    );\n  }\n\n  // Error states\n  if (credentialsError || !credentialsData) {\n    return <ErrorCallout errorTitle=\"Failed to load credentials.\" />;\n  }\n\n  if (googleDriveCredentialsError || !googleDriveCredentials) {\n    return (\n      <ErrorCallout errorTitle=\"Failed to load Google Drive credentials.\" />\n    );\n  }\n\n  if (\n    !appCredentialSuccessfullyFetched ||\n    !serviceAccountKeySuccessfullyFetched\n  ) {\n    return (\n      <ErrorCallout errorTitle=\"Error loading Google Drive app credentials. Contact an administrator.\" />\n    );\n  }\n\n  if (googleDriveConnectorsError) {\n    return (\n      <ErrorCallout errorTitle=\"Failed to load Google Drive associated connectors.\" />\n    );\n  }\n\n  // Check if connectors exist\n  const connectorAssociated = checkConnectorsExist(googleDriveConnectors);\n\n  // Get the uploaded OAuth credential\n  const googleDrivePublicUploadedCredential:\n    | Credential<GoogleDriveCredentialJson>\n    | undefined = credentialsData.find(\n    (credential) =>\n      credential.credential_json?.google_tokens &&\n      credential.admin_public &&\n      credential.source === \"google_drive\" &&\n      credential.credential_json.authentication_method !== \"oauth_interactive\"\n  );\n\n  // Get the service account credential\n  const googleDriveServiceAccountCredential:\n    | Credential<GoogleDriveServiceAccountCredentialJson>\n    | undefined = credentialsData.find(\n    (credential) =>\n      credential.credential_json?.google_service_account_key &&\n      credential.source === \"google_drive\"\n  );\n\n  return (\n    <>\n      <Title className=\"mb-2 mt-6\">Step 1: Provide your Credentials</Title>\n      <DriveJsonUploadSection\n        appCredentialData={appCredentialData}\n        serviceAccountCredentialData={serviceAccountKeyData}\n        isAdmin={isAdmin}\n        onSuccess={handleRefresh}\n        existingAuthCredential={Boolean(\n          googleDrivePublicUploadedCredential ||\n            googleDriveServiceAccountCredential\n        )}\n      />\n\n      {isAdmin &&\n        (appCredentialData?.client_id ||\n          serviceAccountKeyData?.service_account_email) && (\n          <>\n            <Title className=\"mb-2 mt-6\">Step 2: Authenticate with Onyx</Title>\n            <DriveAuthSection\n              refreshCredentials={handleRefresh}\n              googleDrivePublicUploadedCredential={\n                googleDrivePublicUploadedCredential\n              }\n              googleDriveServiceAccountCredential={\n                googleDriveServiceAccountCredential\n              }\n              appCredentialData={appCredentialData}\n              serviceAccountKeyData={serviceAccountKeyData}\n              connectorAssociated={connectorAssociated}\n              user={user}\n            />\n          </>\n        )}\n    </>\n  );\n};\n\nexport default GDriveMain;\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/gmail/Credential.tsx",
    "content": "import { Button } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\nimport React, { useState, useEffect } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport * as Yup from \"yup\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { adminDeleteCredential } from \"@/lib/credential\";\nimport { setupGmailOAuth } from \"@/lib/gmail\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\nimport { CRAFT_OAUTH_COOKIE_NAME } from \"@/app/craft/v1/constants\";\nimport Cookies from \"js-cookie\";\nimport { TextFormField, SectionHeader } from \"@/components/Field\";\nimport { Form, Formik } from \"formik\";\nimport { User } from \"@/lib/types\";\nimport {\n  Credential,\n  GmailCredentialJson,\n  GmailServiceAccountCredentialJson,\n} from \"@/lib/connectors/credentials\";\nimport { refreshAllGoogleData } from \"@/lib/googleConnector\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { FiFile, FiCheck, FiLink, FiAlertTriangle } from \"react-icons/fi\";\nimport { cn, truncateString } from \"@/lib/utils\";\nimport { Section } from \"@/layouts/general-layouts\";\n\ntype GmailCredentialJsonTypes = \"authorized_user\" | \"service_account\";\n\nconst GmailCredentialUpload = ({ onSuccess }: { onSuccess?: () => void }) => {\n  const { mutate } = useSWRConfig();\n  const [isUploading, setIsUploading] = useState(false);\n  const [fileName, setFileName] = useState<string | undefined>();\n  const [isDragging, setIsDragging] = useState(false);\n\n  const handleFileUpload = async (file: File) => {\n    setIsUploading(true);\n    setFileName(file.name);\n\n    const reader = new FileReader();\n    reader.onload = async (loadEvent) => {\n      if (!loadEvent?.target?.result) {\n        setIsUploading(false);\n        return;\n      }\n\n      const credentialJsonStr = loadEvent.target.result as string;\n\n      // Check credential type\n      let credentialFileType: GmailCredentialJsonTypes;\n      try {\n        const appCredentialJson = JSON.parse(credentialJsonStr);\n        if (appCredentialJson.web) {\n          credentialFileType = \"authorized_user\";\n        } else if (appCredentialJson.type === \"service_account\") {\n          credentialFileType = \"service_account\";\n        } else {\n          throw new Error(\n            \"Unknown credential type, expected one of 'OAuth Web application' or 'Service Account'\"\n          );\n        }\n      } catch (e) {\n        toast.error(`Invalid file provided - ${e}`);\n        setIsUploading(false);\n        return;\n      }\n\n      if (credentialFileType === \"authorized_user\") {\n        const response = await fetch(\n          \"/api/manage/admin/connector/gmail/app-credential\",\n          {\n            method: \"PUT\",\n            headers: {\n              \"Content-Type\": \"application/json\",\n            },\n            body: credentialJsonStr,\n          }\n        );\n        if (response.ok) {\n          toast.success(\"Successfully uploaded app credentials\");\n          mutate(SWR_KEYS.googleConnectorAppCredential(\"gmail\"));\n          if (onSuccess) {\n            onSuccess();\n          }\n        } else {\n          const errorMsg = await response.text();\n          toast.error(`Failed to upload app credentials - ${errorMsg}`);\n        }\n      }\n\n      if (credentialFileType === \"service_account\") {\n        const response = await fetch(\n          \"/api/manage/admin/connector/gmail/service-account-key\",\n          {\n            method: \"PUT\",\n            headers: {\n              \"Content-Type\": \"application/json\",\n            },\n            body: credentialJsonStr,\n          }\n        );\n        if (response.ok) {\n          toast.success(\"Successfully uploaded service account key\");\n          mutate(SWR_KEYS.googleConnectorServiceAccountKey(\"gmail\"));\n          if (onSuccess) {\n            onSuccess();\n          }\n        } else {\n          const errorMsg = await response.text();\n          toast.error(`Failed to upload service account key - ${errorMsg}`);\n        }\n      }\n      setIsUploading(false);\n    };\n\n    reader.readAsText(file);\n  };\n\n  const handleDragEnter = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    if (!isUploading) {\n      setIsDragging(true);\n    }\n  };\n\n  const handleDragLeave = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    setIsDragging(false);\n  };\n\n  const handleDragOver = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n  };\n\n  const handleDrop = (e: React.DragEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    e.stopPropagation();\n    setIsDragging(false);\n\n    if (isUploading) return;\n\n    const files = e.dataTransfer.files;\n    if (files.length > 0) {\n      const file = files[0];\n      if (\n        file !== undefined &&\n        (file.type === \"application/json\" || file.name.endsWith(\".json\"))\n      ) {\n        handleFileUpload(file);\n      } else {\n        toast.error(\"Please upload a JSON file\");\n      }\n    }\n  };\n\n  return (\n    <div className=\"flex flex-col mt-4\">\n      <div className=\"flex items-center\">\n        <div className=\"relative flex flex-1 items-center\">\n          <label\n            className={cn(\n              \"flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors\",\n              isUploading\n                ? \"opacity-70 cursor-not-allowed border-background-400 bg-background-50/30\"\n                : isDragging\n                  ? \"bg-background-50/50 border-primary dark:border-primary\"\n                  : \"cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600\"\n            )}\n            onDragEnter={handleDragEnter}\n            onDragLeave={handleDragLeave}\n            onDragOver={handleDragOver}\n            onDrop={handleDrop}\n          >\n            <div className=\"flex items-center space-x-2\">\n              {isUploading ? (\n                <div className=\"h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin\"></div>\n              ) : (\n                <FiFile className=\"h-4 w-4 text-text-500\" />\n              )}\n              <span className=\"text-sm text-text-500\">\n                {isUploading\n                  ? `Uploading ${truncateString(fileName || \"file\", 50)}...`\n                  : isDragging\n                    ? \"Drop JSON file here\"\n                    : truncateString(\n                        fileName || \"Select or drag JSON credentials file...\",\n                        50\n                      )}\n              </span>\n            </div>\n            <input\n              className=\"sr-only\"\n              type=\"file\"\n              accept=\".json\"\n              disabled={isUploading}\n              onChange={(event) => {\n                if (!event.target.files?.length) {\n                  return;\n                }\n                const file = event.target.files[0];\n                if (file === undefined) {\n                  return;\n                }\n                handleFileUpload(file);\n              }}\n            />\n          </label>\n        </div>\n      </div>\n    </div>\n  );\n};\n\ninterface GmailJsonUploadSectionProps {\n  appCredentialData?: { client_id: string };\n  serviceAccountCredentialData?: { service_account_email: string };\n  isAdmin: boolean;\n  onSuccess?: () => void;\n  existingAuthCredential?: boolean;\n}\n\nexport const GmailJsonUploadSection = ({\n  appCredentialData,\n  serviceAccountCredentialData,\n  isAdmin,\n  onSuccess,\n  existingAuthCredential,\n}: GmailJsonUploadSectionProps) => {\n  const { mutate } = useSWRConfig();\n  const [localServiceAccountData, setLocalServiceAccountData] = useState(\n    serviceAccountCredentialData\n  );\n  const [localAppCredentialData, setLocalAppCredentialData] =\n    useState(appCredentialData);\n\n  // Update local state when props change\n  useEffect(() => {\n    setLocalServiceAccountData(serviceAccountCredentialData);\n    setLocalAppCredentialData(appCredentialData);\n  }, [serviceAccountCredentialData, appCredentialData]);\n\n  const handleSuccess = () => {\n    if (onSuccess) {\n      onSuccess();\n    } else {\n      refreshAllGoogleData(ValidSources.Gmail);\n    }\n  };\n\n  if (!isAdmin) {\n    return (\n      <div>\n        <div className=\"flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded\">\n          <FiAlertTriangle className=\"text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n          <p className=\"text-sm\">\n            Curators are unable to set up the Gmail credentials. To add a Gmail\n            connector, please contact an administrator.\n          </p>\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div>\n      <p className=\"text-sm mb-3\">\n        To connect your Gmail, create credentials (either OAuth App or Service\n        Account), download the JSON file, and upload it below.\n      </p>\n      <div className=\"mb-4\">\n        <a\n          className=\"text-primary hover:text-primary/80 flex items-center gap-1 text-sm\"\n          target=\"_blank\"\n          href={`${DOCS_ADMINS_PATH}/connectors/official/gmail/overview`}\n          rel=\"noreferrer\"\n        >\n          <FiLink className=\"h-3 w-3\" />\n          View detailed setup instructions\n        </a>\n      </div>\n\n      {(localServiceAccountData?.service_account_email ||\n        localAppCredentialData?.client_id) && (\n        <div className=\"mb-4\">\n          <div className=\"relative flex flex-1 items-center\">\n            <label\n              className={cn(\n                \"flex h-10 items-center justify-center w-full px-4 py-2 border border-dashed rounded-md transition-colors\",\n                false\n                  ? \"opacity-70 cursor-not-allowed border-background-400 bg-background-50/30\"\n                  : \"cursor-pointer hover:bg-background-50/30 hover:border-primary dark:hover:border-primary border-background-300 dark:border-background-600\"\n              )}\n            >\n              <div className=\"flex items-center space-x-2\">\n                {false ? (\n                  <div className=\"h-4 w-4 border-t-2 border-b-2 border-primary rounded-full animate-spin\"></div>\n                ) : (\n                  <FiFile className=\"h-4 w-4 text-text-500\" />\n                )}\n                <span className=\"text-sm text-text-500\">\n                  {truncateString(\n                    localServiceAccountData?.service_account_email ||\n                      localAppCredentialData?.client_id ||\n                      \"\",\n                    50\n                  )}\n                </span>\n              </div>\n            </label>\n          </div>\n          {isAdmin && !existingAuthCredential && (\n            <div className=\"mt-2\">\n              <Button\n                variant=\"danger\"\n                onClick={async () => {\n                  const endpoint =\n                    localServiceAccountData?.service_account_email\n                      ? SWR_KEYS.googleConnectorServiceAccountKey(\"gmail\")\n                      : SWR_KEYS.googleConnectorAppCredential(\"gmail\");\n\n                  const response = await fetch(endpoint, {\n                    method: \"DELETE\",\n                  });\n\n                  if (response.ok) {\n                    mutate(endpoint);\n                    // Also mutate the credential endpoints to ensure Step 2 is reset\n                    mutate(buildSimilarCredentialInfoURL(ValidSources.Gmail));\n\n                    // Add additional mutations to refresh all credential-related endpoints\n                    mutate(SWR_KEYS.googleConnectorCredentials(\"gmail\"));\n                    mutate(SWR_KEYS.googleConnectorPublicCredential(\"gmail\"));\n                    mutate(\n                      SWR_KEYS.googleConnectorServiceAccountCredential(\"gmail\")\n                    );\n\n                    toast.success(\n                      `Successfully deleted ${\n                        localServiceAccountData\n                          ? \"service account key\"\n                          : \"app credentials\"\n                      }`\n                    );\n                    // Immediately update local state\n                    if (localServiceAccountData) {\n                      setLocalServiceAccountData(undefined);\n                    } else {\n                      setLocalAppCredentialData(undefined);\n                    }\n                    handleSuccess();\n                  } else {\n                    const errorMsg = await response.text();\n                    toast.error(`Failed to delete credentials - ${errorMsg}`);\n                  }\n                }}\n              >\n                Delete Credentials\n              </Button>\n            </div>\n          )}\n        </div>\n      )}\n\n      {!(\n        localServiceAccountData?.service_account_email ||\n        localAppCredentialData?.client_id\n      ) && <GmailCredentialUpload onSuccess={handleSuccess} />}\n    </div>\n  );\n};\n\ninterface GmailCredentialSectionProps {\n  gmailPublicCredential?: Credential<GmailCredentialJson>;\n  gmailServiceAccountCredential?: Credential<GmailServiceAccountCredentialJson>;\n  serviceAccountKeyData?: { service_account_email: string };\n  appCredentialData?: { client_id: string };\n  refreshCredentials: () => void;\n  connectorExists: boolean;\n  user: User | null;\n  buildMode?: boolean;\n  onOAuthRedirect?: () => void;\n  onCredentialCreated?: (\n    credential: Credential<\n      GmailCredentialJson | GmailServiceAccountCredentialJson\n    >\n  ) => void;\n}\n\nasync function handleRevokeAccess(\n  connectorExists: boolean,\n  existingCredential:\n    | Credential<GmailCredentialJson>\n    | Credential<GmailServiceAccountCredentialJson>,\n  refreshCredentials: () => void\n) {\n  if (connectorExists) {\n    const message =\n      \"Cannot revoke the Gmail credential while any connector is still associated with the credential. \" +\n      \"Please delete all associated connectors, then try again.\";\n    toast.error(message);\n    return;\n  }\n\n  await adminDeleteCredential(existingCredential.id);\n  toast.success(\"Successfully revoked the Gmail credential!\");\n\n  refreshCredentials();\n}\n\nexport const GmailAuthSection = ({\n  gmailPublicCredential,\n  gmailServiceAccountCredential,\n  serviceAccountKeyData,\n  appCredentialData,\n  refreshCredentials,\n  connectorExists,\n  user,\n  buildMode = false,\n  onOAuthRedirect,\n  onCredentialCreated,\n}: GmailCredentialSectionProps) => {\n  const router = useRouter();\n  const [isAuthenticating, setIsAuthenticating] = useState(false);\n  const [localServiceAccountData, setLocalServiceAccountData] = useState(\n    serviceAccountKeyData\n  );\n  const [localAppCredentialData, setLocalAppCredentialData] =\n    useState(appCredentialData);\n  const [localGmailPublicCredential, setLocalGmailPublicCredential] = useState(\n    gmailPublicCredential\n  );\n  const [\n    localGmailServiceAccountCredential,\n    setLocalGmailServiceAccountCredential,\n  ] = useState(gmailServiceAccountCredential);\n\n  // Update local state when props change\n  useEffect(() => {\n    setLocalServiceAccountData(serviceAccountKeyData);\n    setLocalAppCredentialData(appCredentialData);\n    setLocalGmailPublicCredential(gmailPublicCredential);\n    setLocalGmailServiceAccountCredential(gmailServiceAccountCredential);\n  }, [\n    serviceAccountKeyData,\n    appCredentialData,\n    gmailPublicCredential,\n    gmailServiceAccountCredential,\n  ]);\n\n  const existingCredential =\n    localGmailPublicCredential || localGmailServiceAccountCredential;\n  if (existingCredential) {\n    return (\n      <div>\n        <div className=\"mt-4\">\n          <div className=\"py-3 px-4 bg-blue-50/30 dark:bg-blue-900/5 rounded mb-4 flex items-start\">\n            <FiCheck className=\"text-blue-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n            <div className=\"flex-1\">\n              <span className=\"font-medium block\">Authentication Complete</span>\n              <p className=\"text-sm mt-1 text-text-500 dark:text-text-400 break-words\">\n                Your Gmail credentials have been successfully uploaded and\n                authenticated.\n              </p>\n            </div>\n          </div>\n          <Section flexDirection=\"row\" justifyContent=\"between\" height=\"fit\">\n            <Button\n              variant=\"danger\"\n              onClick={async () => {\n                handleRevokeAccess(\n                  connectorExists,\n                  existingCredential,\n                  refreshCredentials\n                );\n              }}\n            >\n              Revoke Access\n            </Button>\n            {buildMode && onCredentialCreated && (\n              <Button onClick={() => onCredentialCreated(existingCredential)}>\n                Continue\n              </Button>\n            )}\n          </Section>\n        </div>\n      </div>\n    );\n  }\n\n  // If no credentials are uploaded, show message to complete step 1 first\n  if (\n    !localServiceAccountData?.service_account_email &&\n    !localAppCredentialData?.client_id\n  ) {\n    return (\n      <div>\n        <SectionHeader>Gmail Authentication</SectionHeader>\n        <div className=\"mt-4\">\n          <div className=\"flex items-start py-3 px-4 bg-yellow-50/30 dark:bg-yellow-900/5 rounded\">\n            <FiAlertTriangle className=\"text-yellow-500 h-5 w-5 mr-2 mt-0.5 flex-shrink-0\" />\n            <p className=\"text-sm\">\n              Please complete Step 1 by uploading either OAuth credentials or a\n              Service Account key before proceeding with authentication.\n            </p>\n          </div>\n        </div>\n      </div>\n    );\n  }\n\n  if (localServiceAccountData?.service_account_email) {\n    return (\n      <div>\n        <div className=\"mt-4\">\n          <Formik\n            initialValues={{\n              google_primary_admin: user?.email || \"\",\n            }}\n            validationSchema={Yup.object().shape({\n              google_primary_admin: Yup.string()\n                .email(\"Must be a valid email\")\n                .required(\"Required\"),\n            })}\n            onSubmit={async (values, formikHelpers) => {\n              formikHelpers.setSubmitting(true);\n              try {\n                const response = await fetch(\n                  \"/api/manage/admin/connector/gmail/service-account-credential\",\n                  {\n                    method: \"PUT\",\n                    headers: {\n                      \"Content-Type\": \"application/json\",\n                    },\n                    body: JSON.stringify({\n                      google_primary_admin: values.google_primary_admin,\n                    }),\n                  }\n                );\n\n                if (response.ok) {\n                  toast.success(\n                    \"Successfully created service account credential\"\n                  );\n                  refreshCredentials();\n                } else {\n                  const errorMsg = await response.text();\n                  toast.error(\n                    `Failed to create service account credential - ${errorMsg}`\n                  );\n                }\n              } catch (error) {\n                toast.error(\n                  `Failed to create service account credential - ${error}`\n                );\n              } finally {\n                formikHelpers.setSubmitting(false);\n              }\n            }}\n          >\n            {({ isSubmitting }) => (\n              <Form>\n                <TextFormField\n                  name=\"google_primary_admin\"\n                  label=\"Primary Admin Email:\"\n                  subtext=\"Enter the email of an admin/owner of the Google Organization that owns the Gmail account(s) you want to index.\"\n                />\n                <div className=\"flex\">\n                  <Button disabled={isSubmitting} type=\"submit\">\n                    {isSubmitting ? \"Creating...\" : \"Create Credential\"}\n                  </Button>\n                </div>\n              </Form>\n            )}\n          </Formik>\n        </div>\n      </div>\n    );\n  }\n\n  if (localAppCredentialData?.client_id) {\n    return (\n      <div>\n        <div className=\"bg-background-50/30 dark:bg-background-900/20 rounded mb-4\">\n          <p className=\"text-sm\">\n            Next, you need to authenticate with Gmail via OAuth. This gives us\n            read access to the emails you have access to in your Gmail account.\n          </p>\n        </div>\n        <Button\n          disabled={isAuthenticating}\n          onClick={async () => {\n            setIsAuthenticating(true);\n            try {\n              if (buildMode) {\n                Cookies.set(CRAFT_OAUTH_COOKIE_NAME, \"true\", {\n                  path: \"/\",\n                });\n              }\n              const [authUrl, errorMsg] = await setupGmailOAuth({\n                isAdmin: true,\n              });\n\n              if (authUrl) {\n                onOAuthRedirect?.();\n                router.push(authUrl as Route);\n              } else {\n                toast.error(errorMsg);\n                setIsAuthenticating(false);\n              }\n            } catch (error) {\n              toast.error(`Failed to authenticate with Gmail - ${error}`);\n              setIsAuthenticating(false);\n            }\n          }}\n        >\n          {isAuthenticating ? \"Authenticating...\" : \"Authenticate with Gmail\"}\n        </Button>\n      </div>\n    );\n  }\n\n  // This code path should not be reached with the new conditions above\n  return null;\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/gmail/GmailPage.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { LoadingAnimation } from \"@/components/Loading\";\nimport { toast } from \"@/hooks/useToast\";\nimport { CCPairBasicInfo, ValidSources } from \"@/lib/types\";\nimport {\n  Credential,\n  GmailCredentialJson,\n  GmailServiceAccountCredentialJson,\n} from \"@/lib/connectors/credentials\";\nimport { GmailAuthSection, GmailJsonUploadSection } from \"./Credential\";\nimport { usePublicCredentials, useBasicConnectorStatus } from \"@/lib/hooks\";\nimport Title from \"@/components/ui/title\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  useGoogleAppCredential,\n  useGoogleServiceAccountKey,\n  useGoogleCredentials,\n  useConnectorsByCredentialId,\n  checkCredentialsFetched,\n  filterUploadedCredentials,\n  checkConnectorsExist,\n  refreshAllGoogleData,\n} from \"@/lib/googleConnector\";\n\ninterface GmailMainProps {\n  buildMode?: boolean;\n  onOAuthRedirect?: () => void;\n  onCredentialCreated?: (\n    credential: Credential<\n      GmailCredentialJson | GmailServiceAccountCredentialJson\n    >\n  ) => void;\n}\n\nexport const GmailMain = ({\n  buildMode = false,\n  onOAuthRedirect,\n  onCredentialCreated,\n}: GmailMainProps) => {\n  const { isAdmin, user } = useUser();\n\n  const {\n    data: appCredentialData,\n    isLoading: isAppCredentialLoading,\n    error: isAppCredentialError,\n  } = useGoogleAppCredential(\"gmail\");\n\n  const {\n    data: serviceAccountKeyData,\n    isLoading: isServiceAccountKeyLoading,\n    error: isServiceAccountKeyError,\n  } = useGoogleServiceAccountKey(\"gmail\");\n\n  const {\n    data: connectorIndexingStatuses,\n    isLoading: isConnectorIndexingStatusesLoading,\n    error: connectorIndexingStatusesError,\n  } = useBasicConnectorStatus();\n\n  const {\n    data: credentialsData,\n    isLoading: isCredentialsLoading,\n    error: credentialsError,\n    refreshCredentials,\n  } = usePublicCredentials();\n\n  const {\n    data: gmailCredentials,\n    isLoading: isGmailCredentialsLoading,\n    error: gmailCredentialsError,\n  } = useGoogleCredentials(ValidSources.Gmail);\n\n  const { credential_id, uploadedCredentials } =\n    filterUploadedCredentials(gmailCredentials);\n\n  const {\n    data: gmailConnectors,\n    isLoading: isGmailConnectorsLoading,\n    error: gmailConnectorsError,\n    refreshConnectorsByCredentialId,\n  } = useConnectorsByCredentialId(credential_id);\n\n  const {\n    appCredentialSuccessfullyFetched,\n    serviceAccountKeySuccessfullyFetched,\n  } = checkCredentialsFetched(\n    appCredentialData,\n    isAppCredentialError,\n    serviceAccountKeyData,\n    isServiceAccountKeyError\n  );\n\n  const handleRefresh = () => {\n    refreshCredentials();\n    refreshConnectorsByCredentialId();\n    refreshAllGoogleData(ValidSources.Gmail);\n  };\n\n  if (\n    (!appCredentialSuccessfullyFetched && isAppCredentialLoading) ||\n    (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) ||\n    (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||\n    (!credentialsData && isCredentialsLoading) ||\n    (!gmailCredentials && isGmailCredentialsLoading) ||\n    (!gmailConnectors && isGmailConnectorsLoading)\n  ) {\n    return (\n      <div className=\"mx-auto\">\n        <LoadingAnimation text=\"\" />\n      </div>\n    );\n  }\n\n  if (credentialsError || !credentialsData) {\n    return <ErrorCallout errorTitle=\"Failed to load credentials.\" />;\n  }\n\n  if (gmailCredentialsError || !gmailCredentials) {\n    return <ErrorCallout errorTitle=\"Failed to load Gmail credentials.\" />;\n  }\n\n  if (connectorIndexingStatusesError || !connectorIndexingStatuses) {\n    return <ErrorCallout errorTitle=\"Failed to load connectors.\" />;\n  }\n\n  if (\n    !appCredentialSuccessfullyFetched ||\n    !serviceAccountKeySuccessfullyFetched\n  ) {\n    return (\n      <ErrorCallout errorTitle=\"Error loading Gmail app credentials. Contact an administrator.\" />\n    );\n  }\n\n  if (gmailConnectorsError) {\n    return (\n      <ErrorCallout errorTitle=\"Failed to load Gmail associated connectors.\" />\n    );\n  }\n\n  const connectorExistsFromCredential = checkConnectorsExist(gmailConnectors);\n\n  const gmailPublicUploadedCredential:\n    | Credential<GmailCredentialJson>\n    | undefined = credentialsData.find(\n    (credential) =>\n      credential.credential_json?.google_tokens &&\n      credential.admin_public &&\n      credential.source === \"gmail\" &&\n      credential.credential_json.authentication_method !== \"oauth_interactive\"\n  );\n\n  const gmailServiceAccountCredential:\n    | Credential<GmailServiceAccountCredentialJson>\n    | undefined = credentialsData.find(\n    (credential) =>\n      credential.credential_json?.google_service_account_key &&\n      credential.source === \"gmail\"\n  );\n\n  const gmailConnectorIndexingStatuses: CCPairBasicInfo[] =\n    connectorIndexingStatuses.filter(\n      (connectorIndexingStatus) => connectorIndexingStatus.source === \"gmail\"\n    );\n\n  const connectorExists =\n    connectorExistsFromCredential || gmailConnectorIndexingStatuses.length > 0;\n\n  const hasUploadedCredentials =\n    Boolean(appCredentialData?.client_id) ||\n    Boolean(serviceAccountKeyData?.service_account_email);\n\n  return (\n    <>\n      <Title className=\"mb-2 mt-6 ml-auto mr-auto\">\n        Step 1: Provide your Credentials\n      </Title>\n      <GmailJsonUploadSection\n        appCredentialData={appCredentialData}\n        serviceAccountCredentialData={serviceAccountKeyData}\n        isAdmin={isAdmin}\n        onSuccess={handleRefresh}\n        existingAuthCredential={Boolean(\n          gmailPublicUploadedCredential || gmailServiceAccountCredential\n        )}\n      />\n\n      {isAdmin && hasUploadedCredentials && (\n        <>\n          <Title className=\"mb-2 mt-6 ml-auto mr-auto\">\n            Step 2: Authenticate with Onyx\n          </Title>\n          <GmailAuthSection\n            refreshCredentials={handleRefresh}\n            gmailPublicCredential={gmailPublicUploadedCredential}\n            gmailServiceAccountCredential={gmailServiceAccountCredential}\n            appCredentialData={appCredentialData}\n            serviceAccountKeyData={serviceAccountKeyData}\n            connectorExists={connectorExists}\n            user={user}\n            buildMode={buildMode}\n            onOAuthRedirect={onOAuthRedirect}\n            // Necessary prop drilling for build mode v1.\n            // TODO: either integrate gmail into normal flow\n            // or create a build-mode specific Gmail flow\n            onCredentialCreated={onCredentialCreated}\n          />\n        </>\n      )}\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/utils/files.ts",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { createConnector, runConnector } from \"@/lib/connector\";\nimport { createCredential, linkCredential } from \"@/lib/credential\";\nimport { FileConfig } from \"@/lib/connectors/connectors\";\nimport { AccessType, ValidSources } from \"@/lib/types\";\n\nexport const submitFiles = async (\n  selectedFiles: File[],\n  name: string,\n  access_type: string,\n  groups?: number[]\n) => {\n  const formData = new FormData();\n\n  selectedFiles.forEach((file) => {\n    formData.append(\"files\", file);\n  });\n\n  const response = await fetch(\"/api/manage/admin/connector/file/upload\", {\n    method: \"POST\",\n    body: formData,\n  });\n  const responseJson = await response.json();\n  if (!response.ok) {\n    toast.error(`Unable to upload files - ${responseJson.detail}`);\n    return;\n  }\n\n  const filePaths = responseJson.file_paths as string[];\n  const fileNames = responseJson.file_names as string[];\n  const zipMetadataFileId = responseJson.zip_metadata_file_id as string | null;\n\n  const [connectorErrorMsg, connector] = await createConnector<FileConfig>({\n    name: \"FileConnector-\" + Date.now(),\n    source: ValidSources.File,\n    input_type: \"load_state\",\n    connector_specific_config: {\n      file_locations: filePaths,\n      file_names: fileNames,\n      zip_metadata_file_id: zipMetadataFileId,\n    },\n    refresh_freq: null,\n    prune_freq: null,\n    indexing_start: null,\n    access_type: access_type,\n    groups: groups,\n  });\n  if (connectorErrorMsg || !connector) {\n    toast.error(`Unable to create connector - ${connectorErrorMsg}`);\n    return;\n  }\n\n  // Since there is no \"real\" credential associated with a file connector\n  // we create a dummy one here so that we can associate the CC Pair with a\n  // user. This is needed since the user for a CC Pair is found via the credential\n  // associated with it.\n  const createCredentialResponse = await createCredential({\n    credential_json: {},\n    admin_public: true,\n    source: ValidSources.File,\n    curator_public: true,\n    groups: groups,\n    name,\n  });\n  if (!createCredentialResponse.ok) {\n    const errorMsg = await createCredentialResponse.text();\n    toast.error(`Error creating credential for CC Pair - ${errorMsg}`);\n    return false;\n  }\n  const credentialId = (await createCredentialResponse.json()).id;\n\n  const credentialResponse = await linkCredential(\n    connector.id,\n    credentialId,\n    name,\n    access_type as AccessType,\n    groups\n  );\n  if (!credentialResponse.ok) {\n    const credentialResponseJson = await credentialResponse.json();\n    toast.error(\n      `Unable to link connector to credential - ${credentialResponseJson.detail}`\n    );\n    return false;\n  }\n\n  const runConnectorErrorMsg = await runConnector(connector.id, [0]);\n  if (runConnectorErrorMsg) {\n    toast.error(`Unable to run connector - ${runConnectorErrorMsg}`);\n    return false;\n  }\n\n  toast.success(\"Successfully uploaded files!\");\n  return true;\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/utils/google_site.ts",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { createConnector, runConnector } from \"@/lib/connector\";\nimport { linkCredential } from \"@/lib/credential\";\nimport { GoogleSitesConfig } from \"@/lib/connectors/connectors\";\nimport { ValidSources } from \"@/lib/types\";\n\nexport const submitGoogleSite = async (\n  selectedFiles: File[],\n  base_url: any,\n  refreshFreq: number,\n  pruneFreq: number,\n  indexingStart: Date,\n  access_type: string,\n  groups: number[],\n  name?: string\n) => {\n  const uploadCreateAndTriggerConnector = async () => {\n    const formData = new FormData();\n\n    selectedFiles.forEach((file) => {\n      formData.append(\"files\", file);\n    });\n\n    const response = await fetch(\n      \"/api/manage/admin/connector/file/upload?unzip=false\",\n      {\n        method: \"POST\",\n        body: formData,\n      }\n    );\n    const responseJson = await response.json();\n    if (!response.ok) {\n      toast.error(`Unable to upload files - ${responseJson.detail}`);\n      return false;\n    }\n\n    const filePaths = responseJson.file_paths as string[];\n    if (!filePaths || filePaths.length === 0) {\n      toast.error(\n        \"File upload was successful, but no file path was returned. Cannot create connector.\"\n      );\n      return false;\n    }\n\n    const filePath = filePaths[0];\n    if (filePath === undefined) {\n      toast.error(\n        \"File upload was successful, but file path is undefined. Cannot create connector.\"\n      );\n      return false;\n    }\n\n    const [connectorErrorMsg, connector] =\n      await createConnector<GoogleSitesConfig>({\n        name: name ? name : `GoogleSitesConnector-${base_url}`,\n        source: ValidSources.GoogleSites,\n        input_type: \"load_state\",\n        connector_specific_config: {\n          base_url: base_url,\n          zip_path: filePath,\n        },\n        access_type: access_type,\n        refresh_freq: refreshFreq,\n        prune_freq: pruneFreq,\n        indexing_start: indexingStart,\n      });\n    if (connectorErrorMsg || !connector) {\n      toast.error(`Unable to create connector - ${connectorErrorMsg}`);\n      return false;\n    }\n\n    const credentialResponse = await linkCredential(\n      connector.id,\n      0,\n      base_url,\n      undefined,\n      groups\n    );\n    if (!credentialResponse.ok) {\n      const credentialResponseJson = await credentialResponse.json();\n      toast.error(\n        `Unable to link connector to credential - ${credentialResponseJson.detail}`\n      );\n      return false;\n    }\n\n    const runConnectorErrorMsg = await runConnector(connector.id, [0]);\n    if (runConnectorErrorMsg) {\n      toast.error(`Unable to run connector - ${runConnectorErrorMsg}`);\n      return false;\n    }\n    toast.success(\"Successfully created Google Site connector!\");\n    return true;\n  };\n\n  try {\n    const response = await uploadCreateAndTriggerConnector();\n    return response;\n  } catch (e) {\n    return false;\n  }\n};\n"
  },
  {
    "path": "web/src/app/admin/connectors/[connector]/pages/utils/hooks.ts",
    "content": "import { GmailConfig } from \"@/lib/connectors/connectors\";\n\nexport const gmailConnectorNameBuilder = (values: GmailConfig) =>\n  \"GmailConnector\";\n\nimport { usePublicCredentials } from \"@/lib/hooks\";\nimport {\n  Credential,\n  GmailCredentialJson,\n  GmailServiceAccountCredentialJson,\n  GoogleDriveCredentialJson,\n  GoogleDriveServiceAccountCredentialJson,\n} from \"@/lib/connectors/credentials\";\n\nexport const useGmailCredentials = (connector: string) => {\n  const {\n    data: credentialsData,\n    isLoading: isCredentialsLoading,\n    error: credentialsError,\n    refreshCredentials,\n  } = usePublicCredentials();\n\n  const gmailPublicCredential: Credential<GmailCredentialJson> | undefined =\n    credentialsData?.find(\n      (credential) =>\n        credential.credential_json?.google_tokens &&\n        credential.admin_public &&\n        credential.source === connector\n    );\n\n  const gmailServiceAccountCredential:\n    | Credential<GmailServiceAccountCredentialJson>\n    | undefined = credentialsData?.find(\n    (credential) =>\n      credential.credential_json?.google_service_account_key &&\n      credential.admin_public &&\n      credential.source === connector\n  );\n\n  const liveGmailCredential =\n    gmailPublicCredential || gmailServiceAccountCredential;\n\n  return {\n    liveGmailCredential: liveGmailCredential,\n  };\n};\n\nexport const useGoogleDriveCredentials = (connector: string) => {\n  const { data: credentialsData } = usePublicCredentials();\n\n  const googleDrivePublicCredential:\n    | Credential<GoogleDriveCredentialJson>\n    | undefined = credentialsData?.find(\n    (credential) =>\n      credential.credential_json?.google_tokens &&\n      credential.admin_public &&\n      credential.source === connector\n  );\n\n  const googleDriveServiceAccountCredential:\n    | Credential<GoogleDriveServiceAccountCredentialJson>\n    | undefined = credentialsData?.find(\n    (credential) =>\n      credential.credential_json?.google_service_account_key &&\n      credential.admin_public &&\n      credential.source === connector\n  );\n\n  const liveGDriveCredential =\n    googleDrivePublicCredential || googleDriveServiceAccountCredential;\n\n  return {\n    liveGDriveCredential: liveGDriveCredential,\n  };\n};\n"
  },
  {
    "path": "web/src/app/admin/debug/page.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { Button, Text } from \"@opal/components\";\nimport { Card } from \"@/components/ui/card\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { SvgDownloadCloud } from \"@opal/icons\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.DEBUG;\n\nfunction Main() {\n  const [categories, setCategories] = useState<string[]>([]);\n  const [isLoading, setIsLoading] = useState(true);\n  const [isDownloading, setIsDownloading] = useState(false);\n\n  useEffect(() => {\n    const fetchCategories = async () => {\n      try {\n        const response = await fetch(\"/api/admin/long-term-logs\");\n        if (!response.ok) throw new Error(\"Failed to fetch categories\");\n        const data = await response.json();\n        setCategories(data);\n      } catch (error) {\n        console.error(\"Error fetching categories:\", error);\n      } finally {\n        setIsLoading(false);\n      }\n    };\n\n    fetchCategories();\n  }, []);\n\n  const handleDownload = async (category: string) => {\n    setIsDownloading(true);\n    try {\n      const response = await fetch(\n        `/api/admin/long-term-logs/${category}/download`\n      );\n      if (!response.ok) throw new Error(\"Failed to download logs\");\n\n      const blob = await response.blob();\n      const url = window.URL.createObjectURL(blob);\n\n      const a = document.createElement(\"a\");\n      a.href = url;\n      a.download = `${category}-logs.zip`;\n      document.body.appendChild(a);\n      a.click();\n      window.URL.revokeObjectURL(url);\n      document.body.removeChild(a);\n    } catch (error) {\n      console.error(\"Error downloading logs:\", error);\n    } finally {\n      setIsDownloading(false);\n    }\n  };\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  return (\n    <>\n      {isDownloading && <Spinner />}\n      <div className=\"mb-8\">\n        <Text as=\"p\">\n          {markdown(\n            \"**Debug Logs** provide detailed information about system operations and events. You can download logs for each category to analyze system behavior or troubleshoot issues.\"\n          )}\n        </Text>\n        <Spacer rem={0.75} />\n\n        {categories.length > 0 && (\n          <Card className=\"mt-4\">\n            <Table>\n              <TableHeader>\n                <TableRow>\n                  <TableHead>Category</TableHead>\n                  <TableHead>Actions</TableHead>\n                </TableRow>\n              </TableHeader>\n              <TableBody>\n                {categories.map((category) => (\n                  <TableRow\n                    key={category}\n                    className=\"hover:bg-transparent dark:hover:bg-transparent\"\n                  >\n                    <TableCell className=\"font-medium\">{category}</TableCell>\n                    <TableCell>\n                      <Button\n                        prominence=\"secondary\"\n                        onClick={() => handleDownload(category)}\n                        icon={SvgDownloadCloud}\n                      >\n                        Download Logs\n                      </Button>\n                    </TableCell>\n                  </TableRow>\n                ))}\n              </TableBody>\n            </Table>\n          </Card>\n        )}\n      </div>\n    </>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/BotConfigCard.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Button } from \"@opal/components\";\nimport { Badge } from \"@/components/ui/badge\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport {\n  useDiscordBotConfig,\n  useDiscordGuilds,\n} from \"@/app/admin/discord-bot/hooks\";\nimport { createBotConfig, deleteBotConfig } from \"@/app/admin/discord-bot/lib\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport { getFormattedDateTime } from \"@/lib/dateUtils\";\n\nexport function BotConfigCard() {\n  const {\n    data: botConfig,\n    isLoading,\n    isManaged,\n    refreshBotConfig,\n  } = useDiscordBotConfig();\n  const { data: guilds } = useDiscordGuilds();\n\n  const [botToken, setBotToken] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [showDeleteConfirm, setShowDeleteConfirm] = useState(false);\n\n  // Don't render anything if managed externally (Cloud or env var)\n  if (isManaged) {\n    return null;\n  }\n\n  // Show loading while fetching initial state\n  if (isLoading) {\n    return (\n      <Card>\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"center\"\n        >\n          <Text mainContentEmphasis text05>\n            Bot Token\n          </Text>\n        </Section>\n        <ThreeDotsLoader />\n      </Card>\n    );\n  }\n\n  const isConfigured = botConfig?.configured ?? false;\n  const hasServerConfigs = (guilds?.length ?? 0) > 0;\n\n  const handleSaveToken = async () => {\n    if (!botToken.trim()) {\n      toast.error(\"Please enter a bot token\");\n      return;\n    }\n\n    setIsSubmitting(true);\n    try {\n      await createBotConfig(botToken.trim());\n      setBotToken(\"\");\n      refreshBotConfig();\n      toast.success(\"Bot token saved successfully\");\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to save bot token\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  const handleDeleteToken = async () => {\n    setIsSubmitting(true);\n    try {\n      await deleteBotConfig();\n      refreshBotConfig();\n      toast.success(\"Bot token deleted\");\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to delete bot token\"\n      );\n    } finally {\n      setIsSubmitting(false);\n      setShowDeleteConfirm(false);\n    }\n  };\n\n  return (\n    <>\n      {showDeleteConfirm && (\n        <ConfirmEntityModal\n          danger\n          entityType=\"Discord bot token\"\n          entityName=\"Discord Bot Token\"\n          onClose={() => setShowDeleteConfirm(false)}\n          onSubmit={handleDeleteToken}\n          additionalDetails=\"This will disconnect your Discord bot. You will need to re-enter the token to use the bot again.\"\n        />\n      )}\n      <Card>\n        <Section flexDirection=\"row\" justifyContent=\"between\">\n          <Section flexDirection=\"row\" gap={0.5} width=\"fit\">\n            <Text mainContentEmphasis text05>\n              Bot Token\n            </Text>\n            {isConfigured ? (\n              <Badge variant=\"success\">Configured</Badge>\n            ) : (\n              <Badge variant=\"secondary\">Not Configured</Badge>\n            )}\n          </Section>\n          {isConfigured && (\n            <SimpleTooltip\n              tooltip={\n                hasServerConfigs ? \"Delete server configs first\" : undefined\n              }\n              disabled={!hasServerConfigs}\n            >\n              <Button\n                disabled={isSubmitting || hasServerConfigs}\n                variant=\"danger\"\n                onClick={() => setShowDeleteConfirm(true)}\n              >\n                Delete Discord Token\n              </Button>\n            </SimpleTooltip>\n          )}\n        </Section>\n\n        {isConfigured ? (\n          <Section flexDirection=\"column\" alignItems=\"start\" gap={0.5}>\n            <Text text03 secondaryBody>\n              Your Discord bot token is configured.\n              {botConfig?.created_at && (\n                <>\n                  {\" \"}\n                  Added {getFormattedDateTime(new Date(botConfig.created_at))}.\n                </>\n              )}\n            </Text>\n            <Text text03 secondaryBody>\n              To change the token, delete the current one and add a new one.\n            </Text>\n          </Section>\n        ) : (\n          <Section flexDirection=\"column\" alignItems=\"start\" gap={0.75}>\n            <Text text03 secondaryBody>\n              Enter your Discord bot token to enable the bot. You can get this\n              from the Discord Developer Portal.\n            </Text>\n            <Section flexDirection=\"row\" alignItems=\"end\" gap={0.5}>\n              <PasswordInputTypeIn\n                value={botToken}\n                onChange={(e) => setBotToken(e.target.value)}\n                placeholder=\"Enter bot token...\"\n                disabled={isSubmitting}\n                className=\"flex-1\"\n              />\n              <Button\n                disabled={isSubmitting || !botToken.trim()}\n                onClick={handleSaveToken}\n              >\n                {isSubmitting ? \"Saving...\" : \"Save Token\"}\n              </Button>\n            </Section>\n          </Section>\n        )}\n      </Card>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/DiscordGuildsTable.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { DeleteButton } from \"@/components/DeleteButton\";\nimport { Button } from \"@opal/components\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport { SvgEdit, SvgServer } from \"@opal/icons\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport { DiscordGuildConfig } from \"@/app/admin/discord-bot/types\";\nimport {\n  deleteGuildConfig,\n  updateGuildConfig,\n} from \"@/app/admin/discord-bot/lib\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\n\ninterface Props {\n  guilds: DiscordGuildConfig[];\n  onRefresh: () => void;\n}\n\nexport function DiscordGuildsTable({ guilds, onRefresh }: Props) {\n  const router = useRouter();\n  const [guildToDelete, setGuildToDelete] = useState<DiscordGuildConfig | null>(\n    null\n  );\n  const [updatingGuildIds, setUpdatingGuildIds] = useState<Set<number>>(\n    new Set()\n  );\n\n  const handleDelete = async (guildId: number) => {\n    try {\n      await deleteGuildConfig(guildId);\n      onRefresh();\n      toast.success(\"Server configuration deleted\");\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to delete server config\"\n      );\n    } finally {\n      setGuildToDelete(null);\n    }\n  };\n\n  const handleToggleEnabled = async (guild: DiscordGuildConfig) => {\n    if (!guild.guild_id) {\n      toast.error(\"Server must be registered before it can be enabled\");\n      return;\n    }\n\n    setUpdatingGuildIds((prev) => new Set(prev).add(guild.id));\n    try {\n      await updateGuildConfig(guild.id, {\n        enabled: !guild.enabled,\n        default_persona_id: guild.default_persona_id,\n      });\n      onRefresh();\n      toast.success(`Server ${!guild.enabled ? \"enabled\" : \"disabled\"}`);\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to update server\"\n      );\n    } finally {\n      setUpdatingGuildIds((prev) => {\n        const next = new Set(prev);\n        next.delete(guild.id);\n        return next;\n      });\n    }\n  };\n\n  if (guilds.length === 0) {\n    return (\n      <EmptyMessage\n        icon={SvgServer}\n        title=\"No Discord servers configured yet\"\n        description=\"Create a server configuration to get started.\"\n      />\n    );\n  }\n\n  return (\n    <>\n      {guildToDelete && (\n        <ConfirmEntityModal\n          danger\n          entityType=\"Discord server configuration\"\n          entityName={guildToDelete.guild_name || `Server #${guildToDelete.id}`}\n          onClose={() => setGuildToDelete(null)}\n          onSubmit={() => handleDelete(guildToDelete.id)}\n          additionalDetails=\"This will remove all settings for this Discord server.\"\n        />\n      )}\n      <Table>\n        <TableHeader>\n          <TableRow>\n            <TableHead>Server</TableHead>\n            <TableHead>Status</TableHead>\n            <TableHead>Registered</TableHead>\n            <TableHead>Enabled</TableHead>\n            <TableHead>Actions</TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {guilds.map((guild) => (\n            <TableRow key={guild.id}>\n              <TableCell>\n                <Button\n                  disabled={!guild.guild_id}\n                  prominence=\"internal\"\n                  onClick={() => router.push(`/admin/discord-bot/${guild.id}`)}\n                  icon={SvgEdit}\n                >\n                  {guild.guild_name || `Server #${guild.id}`}\n                </Button>\n              </TableCell>\n              <TableCell>\n                {guild.guild_id ? (\n                  <Badge variant=\"success\">Registered</Badge>\n                ) : (\n                  <Badge variant=\"secondary\">Pending</Badge>\n                )}\n              </TableCell>\n              <TableCell>\n                {guild.registered_at\n                  ? new Date(guild.registered_at).toLocaleDateString()\n                  : \"-\"}\n              </TableCell>\n              <TableCell>\n                {!guild.guild_id ? (\n                  \"-\"\n                ) : (\n                  <Switch\n                    checked={guild.enabled}\n                    onCheckedChange={() => handleToggleEnabled(guild)}\n                    disabled={updatingGuildIds.has(guild.id)}\n                  />\n                )}\n              </TableCell>\n              <TableCell>\n                <DeleteButton onClick={() => setGuildToDelete(guild)} />\n              </TableCell>\n            </TableRow>\n          ))}\n        </TableBody>\n      </Table>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/[guild-id]/DiscordChannelsTable.tsx",
    "content": "\"use client\";\n\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport {\n  DiscordChannelConfig,\n  DiscordChannelType,\n} from \"@/app/admin/discord-bot/types\";\nimport { SvgHash, SvgBubbleText, SvgLock } from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\nfunction getChannelIcon(\n  channelType: DiscordChannelType,\n  isPrivate: boolean = false\n): React.ComponentType<IconProps> {\n  // TODO: Need different icon for private channel vs private forum\n  if (isPrivate) {\n    return SvgLock;\n  }\n  switch (channelType) {\n    case \"forum\":\n      return SvgBubbleText;\n    case \"text\":\n    default:\n      return SvgHash;\n  }\n}\n\ninterface Props {\n  channels: DiscordChannelConfig[];\n  personas: Persona[];\n  onChannelUpdate: (\n    channelId: number,\n    field:\n      | \"enabled\"\n      | \"require_bot_invocation\"\n      | \"thread_only_mode\"\n      | \"persona_override_id\",\n    value: boolean | number | null\n  ) => void;\n  disabled?: boolean;\n}\n\nexport function DiscordChannelsTable({\n  channels,\n  personas,\n  onChannelUpdate,\n  disabled = false,\n}: Props) {\n  if (channels.length === 0) {\n    return (\n      <EmptyMessage\n        title=\"No channels configured\"\n        description=\"Run !sync-channels in Discord to add channels.\"\n      />\n    );\n  }\n\n  return (\n    <Table>\n      <TableHeader>\n        <TableRow className=\"[&>th]:whitespace-nowrap\">\n          <TableHead>Channel</TableHead>\n          <TableHead>Enabled</TableHead>\n          <TableHead>Require @mention</TableHead>\n          <TableHead>Thread Only Mode</TableHead>\n          <TableHead>Agent Override</TableHead>\n        </TableRow>\n      </TableHeader>\n      <TableBody>\n        {channels.map((channel) => {\n          const ChannelIcon = getChannelIcon(\n            channel.channel_type,\n            channel.is_private\n          );\n          return (\n            <TableRow key={channel.id}>\n              <TableCell>\n                <Section\n                  flexDirection=\"row\"\n                  justifyContent=\"start\"\n                  gap={0.5}\n                  width=\"fit\"\n                >\n                  <ChannelIcon width={16} height={16} />\n                  <Text text04 mainUiBody>\n                    {channel.channel_name}\n                  </Text>\n                </Section>\n              </TableCell>\n              <TableCell>\n                <Switch\n                  checked={channel.enabled}\n                  onCheckedChange={(checked) =>\n                    onChannelUpdate(channel.id, \"enabled\", checked)\n                  }\n                  disabled={disabled}\n                />\n              </TableCell>\n              <TableCell>\n                <Switch\n                  checked={channel.require_bot_invocation}\n                  onCheckedChange={(checked) =>\n                    onChannelUpdate(\n                      channel.id,\n                      \"require_bot_invocation\",\n                      checked\n                    )\n                  }\n                  disabled={disabled}\n                />\n              </TableCell>\n              <TableCell>\n                {channel.channel_type !== \"forum\" && (\n                  <Switch\n                    checked={channel.thread_only_mode}\n                    onCheckedChange={(checked) =>\n                      onChannelUpdate(channel.id, \"thread_only_mode\", checked)\n                    }\n                    disabled={disabled}\n                  />\n                )}\n              </TableCell>\n              <TableCell>\n                <InputSelect\n                  value={channel.persona_override_id?.toString() ?? \"default\"}\n                  onValueChange={(value: string) =>\n                    onChannelUpdate(\n                      channel.id,\n                      \"persona_override_id\",\n                      value === \"default\" ? null : parseInt(value)\n                    )\n                  }\n                  disabled={disabled}\n                >\n                  <InputSelect.Trigger placeholder=\"-\" />\n                  <InputSelect.Content>\n                    <InputSelect.Item value=\"default\">-</InputSelect.Item>\n                    {personas.map((persona) => (\n                      <InputSelect.Item\n                        key={persona.id}\n                        value={persona.id.toString()}\n                      >\n                        {persona.name}\n                      </InputSelect.Item>\n                    ))}\n                  </InputSelect.Content>\n                </InputSelect>\n              </TableCell>\n            </TableRow>\n          );\n        })}\n      </TableBody>\n    </Table>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/[guild-id]/page.tsx",
    "content": "\"use client\";\n\nimport { use, useState, useEffect, useCallback, useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ContentAction } from \"@opal/layouts\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Callout } from \"@/components/ui/callout\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport { Button } from \"@opal/components\";\nimport { SvgServer } from \"@opal/icons\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport {\n  useDiscordGuild,\n  useDiscordChannels,\n} from \"@/app/admin/discord-bot/hooks\";\nimport {\n  updateGuildConfig,\n  bulkUpdateChannelConfigs,\n} from \"@/app/admin/discord-bot/lib\";\nimport { DiscordChannelsTable } from \"@/app/admin/discord-bot/[guild-id]/DiscordChannelsTable\";\nimport { DiscordChannelConfig } from \"@/app/admin/discord-bot/types\";\nimport { useAdminPersonas } from \"@/hooks/useAdminPersonas\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\ninterface Props {\n  params: Promise<{ \"guild-id\": string }>;\n}\n\nfunction GuildDetailContent({\n  guildId,\n  personas,\n  localChannels,\n  onChannelUpdate,\n  handleEnableAll,\n  handleDisableAll,\n  disabled,\n}: {\n  guildId: number;\n  personas: Persona[];\n  localChannels: DiscordChannelConfig[];\n  onChannelUpdate: (\n    channelId: number,\n    field:\n      | \"enabled\"\n      | \"require_bot_invocation\"\n      | \"thread_only_mode\"\n      | \"persona_override_id\",\n    value: boolean | number | null\n  ) => void;\n  handleEnableAll: () => void;\n  handleDisableAll: () => void;\n  disabled: boolean;\n}) {\n  const {\n    data: guild,\n    isLoading: guildLoading,\n    error: guildError,\n  } = useDiscordGuild(guildId);\n  const { isLoading: channelsLoading, error: channelsError } =\n    useDiscordChannels(guildId);\n\n  if (guildLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (guildError || !guild) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Failed to load server\"\n        errorMsg={guildError?.info?.detail || \"Server not found\"}\n      />\n    );\n  }\n\n  const isRegistered = !!guild.guild_id;\n\n  return (\n    <>\n      {!isRegistered && (\n        <Callout type=\"notice\" title=\"Waiting for Registration\">\n          Use the !register command in your Discord server with the registration\n          key to complete setup.\n        </Callout>\n      )}\n\n      <Card variant={disabled ? \"disabled\" : \"primary\"}>\n        <ContentAction\n          title=\"Channel Configuration\"\n          description=\"Run !sync-channels in Discord to update the channel list.\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          rightChildren={\n            isRegistered && !channelsLoading && !channelsError ? (\n              <Section\n                flexDirection=\"row\"\n                justifyContent=\"end\"\n                alignItems=\"center\"\n                width=\"fit\"\n                gap={0.5}\n              >\n                <Button\n                  disabled={disabled}\n                  prominence=\"secondary\"\n                  onClick={handleEnableAll}\n                >\n                  Enable All\n                </Button>\n                <Button\n                  disabled={disabled}\n                  prominence=\"secondary\"\n                  onClick={handleDisableAll}\n                >\n                  Disable All\n                </Button>\n              </Section>\n            ) : undefined\n          }\n        />\n\n        {!isRegistered ? (\n          <Text text03 secondaryBody>\n            Channel configuration will be available after the server is\n            registered.\n          </Text>\n        ) : channelsLoading ? (\n          <ThreeDotsLoader />\n        ) : channelsError ? (\n          <ErrorCallout\n            errorTitle=\"Failed to load channels\"\n            errorMsg={channelsError?.info?.detail || \"Could not load channels\"}\n          />\n        ) : (\n          <DiscordChannelsTable\n            channels={localChannels}\n            personas={personas}\n            onChannelUpdate={onChannelUpdate}\n            disabled={disabled}\n          />\n        )}\n      </Card>\n    </>\n  );\n}\n\nexport default function Page({ params }: Props) {\n  const unwrappedParams = use(params);\n  const guildId = Number(unwrappedParams[\"guild-id\"]);\n  const { data: guild, refreshGuild } = useDiscordGuild(guildId);\n  const {\n    data: channels,\n    isLoading: channelsLoading,\n    error: channelsError,\n    refreshChannels,\n  } = useDiscordChannels(guildId);\n  const { personas, isLoading: personasLoading } = useAdminPersonas({\n    includeDefault: true,\n  });\n  const [isUpdating, setIsUpdating] = useState(false);\n\n  // Local state for channel configurations\n  const [localChannels, setLocalChannels] = useState<DiscordChannelConfig[]>(\n    []\n  );\n\n  // Track the original server state to detect changes\n  const [originalChannels, setOriginalChannels] = useState<\n    DiscordChannelConfig[]\n  >([]);\n\n  // Sync local state with fetched channels\n  useEffect(() => {\n    if (channels) {\n      setLocalChannels(channels);\n      setOriginalChannels(channels);\n    }\n  }, [channels]);\n\n  // Check if there are unsaved changes\n  const hasUnsavedChanges = useMemo(() => {\n    for (const local of localChannels) {\n      const original = originalChannels.find((c) => c.id === local.id);\n      if (!original) return true;\n      if (\n        local.enabled !== original.enabled ||\n        local.require_bot_invocation !== original.require_bot_invocation ||\n        local.thread_only_mode !== original.thread_only_mode ||\n        local.persona_override_id !== original.persona_override_id\n      ) {\n        return true;\n      }\n    }\n    return false;\n  }, [localChannels, originalChannels]);\n\n  // Get list of changed channels for bulk update\n  const getChangedChannels = useCallback(() => {\n    const changes: {\n      channelConfigId: number;\n      update: {\n        enabled: boolean;\n        require_bot_invocation: boolean;\n        thread_only_mode: boolean;\n        persona_override_id: number | null;\n      };\n    }[] = [];\n\n    for (const local of localChannels) {\n      const original = originalChannels.find((c) => c.id === local.id);\n      if (!original) continue;\n      if (\n        local.enabled !== original.enabled ||\n        local.require_bot_invocation !== original.require_bot_invocation ||\n        local.thread_only_mode !== original.thread_only_mode ||\n        local.persona_override_id !== original.persona_override_id\n      ) {\n        changes.push({\n          channelConfigId: local.id,\n          update: {\n            enabled: local.enabled,\n            require_bot_invocation: local.require_bot_invocation,\n            thread_only_mode: local.thread_only_mode,\n            persona_override_id: local.persona_override_id,\n          },\n        });\n      }\n    }\n\n    return changes;\n  }, [localChannels, originalChannels]);\n\n  const handleChannelUpdate = useCallback(\n    (\n      channelId: number,\n      field:\n        | \"enabled\"\n        | \"require_bot_invocation\"\n        | \"thread_only_mode\"\n        | \"persona_override_id\",\n      value: boolean | number | null\n    ) => {\n      setLocalChannels((prev) =>\n        prev.map((channel) =>\n          channel.id === channelId ? { ...channel, [field]: value } : channel\n        )\n      );\n    },\n    []\n  );\n\n  const handleEnableAll = useCallback(() => {\n    setLocalChannels((prev) =>\n      prev.map((channel) => ({ ...channel, enabled: true }))\n    );\n  }, []);\n\n  const handleDisableAll = useCallback(() => {\n    setLocalChannels((prev) =>\n      prev.map((channel) => ({ ...channel, enabled: false }))\n    );\n  }, []);\n\n  const handleSaveChanges = async () => {\n    const changes = getChangedChannels();\n    if (changes.length === 0) return;\n\n    setIsUpdating(true);\n    try {\n      const { succeeded, failed } = await bulkUpdateChannelConfigs(\n        guildId,\n        changes\n      );\n\n      if (failed > 0) {\n        toast.error(`Updated ${succeeded} channels, but ${failed} failed`);\n        // Refresh to get actual server state when some updates failed\n        refreshChannels();\n      } else {\n        toast.success(\n          `Updated ${succeeded} channel${succeeded !== 1 ? \"s\" : \"\"}`\n        );\n        // Update original to match local (avoids flash from refresh)\n        setOriginalChannels(localChannels);\n      }\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to update channels\"\n      );\n    } finally {\n      setIsUpdating(false);\n    }\n  };\n\n  const handleDefaultPersonaChange = async (personaId: number | null) => {\n    if (!guild) return;\n    setIsUpdating(true);\n    try {\n      await updateGuildConfig(guildId, {\n        enabled: guild.enabled,\n        default_persona_id: personaId,\n      });\n      refreshGuild();\n      toast.success(\n        personaId ? \"Default agent updated\" : \"Default agent cleared\"\n      );\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to update agent\"\n      );\n    } finally {\n      setIsUpdating(false);\n    }\n  };\n\n  const registeredText = guild?.registered_at\n    ? `Registered: ${new Date(guild.registered_at).toLocaleString()}`\n    : \"Pending registration\";\n\n  const isRegistered = !!guild?.guild_id;\n  const isUpdateDisabled =\n    !isRegistered ||\n    channelsLoading ||\n    !!channelsError ||\n    !hasUnsavedChanges ||\n    !guild?.enabled ||\n    isUpdating;\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgServer}\n        title={guild?.guild_name || `Server #${guildId}`}\n        description={registeredText}\n        backButton\n        rightChildren={\n          <Button disabled={isUpdateDisabled} onClick={handleSaveChanges}>\n            Update Configuration\n          </Button>\n        }\n      />\n      <SettingsLayouts.Body>\n        {/* Default Persona Selector */}\n        <Card variant={!guild?.enabled ? \"disabled\" : \"primary\"}>\n          <ContentAction\n            title=\"Default Agent\"\n            description=\"The agent used by the bot in all channels unless overridden.\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n            rightChildren={\n              <InputSelect\n                value={guild?.default_persona_id?.toString() ?? \"default\"}\n                onValueChange={(value: string) =>\n                  handleDefaultPersonaChange(\n                    value === \"default\" ? null : parseInt(value)\n                  )\n                }\n                disabled={isUpdating || !guild?.enabled || personasLoading}\n              >\n                <InputSelect.Trigger placeholder=\"Select agent\" />\n                <InputSelect.Content>\n                  <InputSelect.Item value=\"default\">\n                    Default Agent\n                  </InputSelect.Item>\n                  {personas.map((persona) => (\n                    <InputSelect.Item\n                      key={persona.id}\n                      value={persona.id.toString()}\n                    >\n                      {persona.name}\n                    </InputSelect.Item>\n                  ))}\n                </InputSelect.Content>\n              </InputSelect>\n            }\n          />\n        </Card>\n\n        <GuildDetailContent\n          guildId={guildId}\n          personas={personas}\n          localChannels={localChannels}\n          onChannelUpdate={handleChannelUpdate}\n          handleEnableAll={handleEnableAll}\n          handleDisableAll={handleDisableAll}\n          disabled={!guild?.enabled}\n        />\n\n        {/* Unsaved changes indicator - sticky at bottom, centered in content area */}\n        <div\n          className={cn(\n            \"sticky z-toast bottom-4 w-fit mx-auto transition-all duration-300 ease-in-out\",\n            hasUnsavedChanges &&\n              isRegistered &&\n              !channelsLoading &&\n              guild?.enabled\n              ? \"opacity-100 translate-y-0\"\n              : \"opacity-0 translate-y-4 pointer-events-none\"\n          )}\n        >\n          <Message\n            warning\n            text=\"You have unsaved changes\"\n            description=\"Click Update to save them.\"\n            close={false}\n          />\n        </div>\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/hooks.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport {\n  DiscordBotConfig,\n  DiscordGuildConfig,\n  DiscordChannelConfig,\n} from \"@/app/admin/discord-bot/types\";\n\nconst BASE_URL = \"/api/manage/admin/discord-bot\";\n\n/**\n * Custom fetcher for bot config that handles 403 specially.\n * 403 means bot config is managed externally (Cloud or env var).\n */\nasync function botConfigFetcher(url: string): Promise<DiscordBotConfig | null> {\n  const res = await fetch(url);\n\n  if (res.status === 403) {\n    // Bot config is managed externally - return null to indicate not accessible\n    return null;\n  }\n\n  if (!res.ok) {\n    throw new Error(\"Failed to fetch bot config\");\n  }\n\n  return res.json();\n}\n\n/**\n * Hook for bot config. Returns null when managed externally (Cloud/env var).\n */\nexport function useDiscordBotConfig() {\n  const url = `${BASE_URL}/config`;\n  const swrResponse = useSWR<DiscordBotConfig | null>(url, botConfigFetcher);\n  return {\n    ...swrResponse,\n    // null = managed externally (403), undefined = loading\n    isManaged: swrResponse.data === null,\n    refreshBotConfig: () => swrResponse.mutate(),\n  };\n}\n\nexport function useDiscordGuilds() {\n  const url = `${BASE_URL}/guilds`;\n  const swrResponse = useSWR<DiscordGuildConfig[]>(url, errorHandlingFetcher);\n  return {\n    ...swrResponse,\n    refreshGuilds: () => swrResponse.mutate(),\n  };\n}\n\nexport function useDiscordGuild(configId: number) {\n  const url = `${BASE_URL}/guilds/${configId}`;\n  const swrResponse = useSWR<DiscordGuildConfig>(url, errorHandlingFetcher);\n  return {\n    ...swrResponse,\n    refreshGuild: () => swrResponse.mutate(),\n  };\n}\n\nexport function useDiscordChannels(guildConfigId: number) {\n  const url = guildConfigId\n    ? `${BASE_URL}/guilds/${guildConfigId}/channels`\n    : null;\n  const swrResponse = useSWR<DiscordChannelConfig[]>(url, errorHandlingFetcher);\n  return {\n    ...swrResponse,\n    refreshChannels: () => swrResponse.mutate(),\n  };\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/lib.ts",
    "content": "import {\n  DiscordBotConfig,\n  DiscordGuildConfig,\n  DiscordGuildConfigCreateResponse,\n  DiscordGuildConfigUpdate,\n  DiscordChannelConfig,\n  DiscordChannelConfigUpdate,\n} from \"@/app/admin/discord-bot/types\";\n\nconst BASE_URL = \"/api/manage/admin/discord-bot\";\n\n// === Bot Config (Self-hosted only) ===\n\nexport async function fetchBotConfig(): Promise<DiscordBotConfig> {\n  const response = await fetch(`${BASE_URL}/config`);\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch bot config\");\n  }\n  return response.json();\n}\n\nexport async function createBotConfig(\n  botToken: string\n): Promise<DiscordBotConfig> {\n  const response = await fetch(`${BASE_URL}/config`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ bot_token: botToken }),\n  });\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to create bot config\");\n  }\n  return response.json();\n}\n\nexport async function deleteBotConfig(): Promise<void> {\n  const response = await fetch(`${BASE_URL}/config`, { method: \"DELETE\" });\n  if (!response.ok) {\n    throw new Error(\"Failed to delete bot config\");\n  }\n}\n\n// === Guild Config ===\n\nexport async function fetchGuildConfigs(): Promise<DiscordGuildConfig[]> {\n  const response = await fetch(`${BASE_URL}/guilds`);\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch guild configs\");\n  }\n  return response.json();\n}\n\nexport async function createGuildConfig(): Promise<DiscordGuildConfigCreateResponse> {\n  const response = await fetch(`${BASE_URL}/guilds`, { method: \"POST\" });\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to create guild config\");\n  }\n  return response.json();\n}\n\nexport async function fetchGuildConfig(\n  configId: number\n): Promise<DiscordGuildConfig> {\n  const response = await fetch(`${BASE_URL}/guilds/${configId}`);\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch guild config\");\n  }\n  return response.json();\n}\n\nexport async function updateGuildConfig(\n  configId: number,\n  update: DiscordGuildConfigUpdate\n): Promise<DiscordGuildConfig> {\n  const response = await fetch(`${BASE_URL}/guilds/${configId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(update),\n  });\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to update guild config\");\n  }\n  return response.json();\n}\n\nexport async function deleteGuildConfig(configId: number): Promise<void> {\n  const response = await fetch(`${BASE_URL}/guilds/${configId}`, {\n    method: \"DELETE\",\n  });\n  if (!response.ok) {\n    throw new Error(\"Failed to delete guild config\");\n  }\n}\n\n// === Channel Config ===\n\nexport async function fetchChannelConfigs(\n  guildConfigId: number\n): Promise<DiscordChannelConfig[]> {\n  const response = await fetch(`${BASE_URL}/guilds/${guildConfigId}/channels`);\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch channel configs\");\n  }\n  return response.json();\n}\n\nexport async function updateChannelConfig(\n  guildConfigId: number,\n  channelConfigId: number,\n  update: DiscordChannelConfigUpdate\n): Promise<DiscordChannelConfig> {\n  const response = await fetch(\n    `${BASE_URL}/guilds/${guildConfigId}/channels/${channelConfigId}`,\n    {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify(update),\n    }\n  );\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to update channel config\");\n  }\n  return response.json();\n}\n\nexport async function bulkUpdateChannelConfigs(\n  guildConfigId: number,\n  updates: { channelConfigId: number; update: DiscordChannelConfigUpdate }[]\n): Promise<{ succeeded: number; failed: number }> {\n  let succeeded = 0;\n  let failed = 0;\n\n  for (const { channelConfigId, update } of updates) {\n    try {\n      await updateChannelConfig(guildConfigId, channelConfigId, update);\n      succeeded++;\n    } catch {\n      failed++;\n    }\n  }\n\n  return { succeeded, failed };\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/page.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport Modal from \"@/refresh-components/Modal\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { SvgKey } from \"@opal/icons\";\nimport {\n  useDiscordGuilds,\n  useDiscordBotConfig,\n} from \"@/app/admin/discord-bot/hooks\";\nimport { createGuildConfig } from \"@/app/admin/discord-bot/lib\";\nimport { DiscordGuildsTable } from \"@/app/admin/discord-bot/DiscordGuildsTable\";\nimport { BotConfigCard } from \"@/app/admin/discord-bot/BotConfigCard\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.DISCORD_BOTS;\n\nfunction DiscordBotContent() {\n  const { data: guilds, isLoading, error, refreshGuilds } = useDiscordGuilds();\n  const { data: botConfig, isManaged } = useDiscordBotConfig();\n  const [registrationKey, setRegistrationKey] = useState<string | null>(null);\n  const [isCreating, setIsCreating] = useState(false);\n\n  // Bot is available if:\n  // - Managed externally (Cloud/env) - assume it's configured\n  // - Self-hosted and explicitly configured via UI\n  const isBotAvailable = isManaged || botConfig?.configured === true;\n\n  const handleCreateGuild = async () => {\n    setIsCreating(true);\n    try {\n      const result = await createGuildConfig();\n      setRegistrationKey(result.registration_key);\n      refreshGuilds();\n      toast.success(\"Server configuration created!\");\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to create server\"\n      );\n    } finally {\n      setIsCreating(false);\n    }\n  };\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (error || !guilds) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Failed to load Discord servers\"\n        errorMsg={error?.info?.detail || \"An unknown error occurred\"}\n      />\n    );\n  }\n\n  return (\n    <>\n      <BotConfigCard />\n\n      <Modal open={!!registrationKey}>\n        <Modal.Content width=\"sm\">\n          <Modal.Header\n            title=\"Registration Key\"\n            icon={SvgKey}\n            onClose={() => setRegistrationKey(null)}\n            description=\"This key will only be shown once!\"\n          />\n          <Modal.Body>\n            <Text text04 mainUiBody>\n              Copy the command and send it from any text channel in your server!\n            </Text>\n            <Card variant=\"secondary\">\n              <Section\n                flexDirection=\"row\"\n                justifyContent=\"between\"\n                alignItems=\"center\"\n              >\n                <Text text03 secondaryMono>\n                  !register {registrationKey}\n                </Text>\n                <CopyIconButton\n                  getCopyText={() => `!register ${registrationKey}`}\n                />\n              </Section>\n            </Card>\n          </Modal.Body>\n        </Modal.Content>\n      </Modal>\n\n      <Card variant={!isBotAvailable ? \"disabled\" : \"primary\"}>\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"center\"\n        >\n          <Text mainContentEmphasis text05>\n            Server Configurations\n          </Text>\n          <CreateButton\n            onClick={handleCreateGuild}\n            disabled={isCreating || !isBotAvailable}\n          >\n            {isCreating ? \"Creating...\" : \"Add Server\"}\n          </CreateButton>\n        </Section>\n        <DiscordGuildsTable guilds={guilds} onRefresh={refreshGuilds} />\n      </Card>\n    </>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Connect Onyx to your Discord servers. Users can ask questions directly in Discord channels.\"\n      />\n      <SettingsLayouts.Body>\n        <DiscordBotContent />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/discord-bot/types.ts",
    "content": "// Types matching backend Pydantic models\n\nexport interface DiscordBotConfig {\n  configured: boolean;\n  created_at: string | null;\n}\n\nexport interface DiscordGuildConfig {\n  id: number;\n  guild_id: number | null;\n  guild_name: string | null;\n  registered_at: string | null;\n  default_persona_id: number | null;\n  enabled: boolean;\n}\n\nexport interface DiscordGuildConfigCreateResponse {\n  id: number;\n  registration_key: string; // Shown once!\n}\n\nexport type DiscordChannelType = \"text\" | \"forum\";\n\nexport interface DiscordChannelConfig {\n  id: number;\n  channel_id: number;\n  channel_name: string;\n  channel_type: DiscordChannelType;\n  is_private: boolean;\n  require_bot_invocation: boolean;\n  thread_only_mode: boolean;\n  persona_override_id: number | null;\n  enabled: boolean;\n}\n\nexport interface DiscordChannelConfigUpdate {\n  require_bot_invocation: boolean;\n  thread_only_mode: boolean;\n  persona_override_id: number | null;\n  enabled: boolean;\n}\n\nexport interface DiscordGuildConfigUpdate {\n  enabled: boolean;\n  default_persona_id: number | null;\n}\n"
  },
  {
    "path": "web/src/app/admin/document-index-migration/page.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.INDEX_MIGRATION;\n\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Content, ContentAction } from \"@opal/layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\ninterface MigrationStatus {\n  total_chunks_migrated: number;\n  created_at: string | null;\n  migration_completed_at: string | null;\n  approx_chunk_count_in_vespa: number | null;\n}\n\ninterface RetrievalStatus {\n  enable_opensearch_retrieval: boolean;\n}\n\nfunction formatTimestamp(iso: string): string {\n  return new Date(iso).toLocaleString();\n}\n\nfunction MigrationStatusSection() {\n  const { data, isLoading, error } = useSWR<MigrationStatus>(\n    SWR_KEYS.opensearchMigrationStatus,\n    errorHandlingFetcher\n  );\n\n  if (isLoading) {\n    return (\n      <Card>\n        <Text headingH3>Migration Status</Text>\n        <Text mainUiBody text03>\n          Loading...\n        </Text>\n      </Card>\n    );\n  }\n\n  if (error) {\n    return (\n      <Card>\n        <Text headingH3>Migration Status</Text>\n        <Text mainUiBody text03>\n          Failed to load migration status.\n        </Text>\n      </Card>\n    );\n  }\n\n  const hasStarted = data?.created_at != null;\n  const hasCompleted = data?.migration_completed_at != null;\n  const isOngoing = hasStarted && !hasCompleted;\n\n  const totalChunksMigrated = data?.total_chunks_migrated ?? 0;\n  const approxTotalChunks = data?.approx_chunk_count_in_vespa;\n\n  // Calculate percentage progress if migration is ongoing and we have approx\n  // total chunks.\n  const shouldShowProgress = isOngoing && approxTotalChunks;\n  const progressPercentage = shouldShowProgress\n    ? Math.min(99, (totalChunksMigrated / approxTotalChunks) * 100)\n    : null;\n\n  return (\n    <Card>\n      <Text headingH3>Migration Status</Text>\n\n      <ContentAction\n        title=\"Started\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        rightChildren={\n          <Text mainUiBody>\n            {hasStarted ? formatTimestamp(data.created_at!) : \"Not started\"}\n          </Text>\n        }\n      />\n\n      <ContentAction\n        title=\"Chunks Migrated\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        rightChildren={\n          <Text mainUiBody>\n            {progressPercentage !== null\n              ? `${totalChunksMigrated} (approx. progress ${Math.round(\n                  progressPercentage\n                )}%)`\n              : String(totalChunksMigrated)}\n          </Text>\n        }\n      />\n\n      <ContentAction\n        title=\"Completed\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        rightChildren={\n          <Text mainUiBody>\n            {hasCompleted\n              ? formatTimestamp(data.migration_completed_at!)\n              : hasStarted\n                ? \"In progress\"\n                : \"Not started\"}\n          </Text>\n        }\n      />\n    </Card>\n  );\n}\n\nfunction RetrievalSourceSection() {\n  const { data, isLoading, error, mutate } = useSWR<RetrievalStatus>(\n    SWR_KEYS.opensearchMigrationRetrieval,\n    errorHandlingFetcher\n  );\n  const [selectedSource, setSelectedSource] = useState<string | null>(null);\n  const [updating, setUpdating] = useState(false);\n\n  const serverValue = data?.enable_opensearch_retrieval\n    ? \"opensearch\"\n    : \"vespa\";\n  const currentValue = selectedSource ?? serverValue;\n  const hasChanges = selectedSource !== null && selectedSource !== serverValue;\n\n  async function handleUpdate() {\n    setUpdating(true);\n    try {\n      const response = await fetch(SWR_KEYS.opensearchMigrationRetrieval, {\n        method: \"PUT\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          enable_opensearch_retrieval: currentValue === \"opensearch\",\n        }),\n      });\n      if (!response.ok) {\n        throw new Error(\"Failed to update retrieval setting\");\n      }\n      await mutate();\n      setSelectedSource(null);\n    } finally {\n      setUpdating(false);\n    }\n  }\n\n  if (isLoading) {\n    return (\n      <Card>\n        <Text headingH3>Retrieval Source</Text>\n        <Text mainUiBody text03>\n          Loading...\n        </Text>\n      </Card>\n    );\n  }\n\n  if (error) {\n    return (\n      <Card>\n        <Text headingH3>Retrieval Source</Text>\n        <Text mainUiBody text03>\n          Failed to load retrieval settings.\n        </Text>\n      </Card>\n    );\n  }\n\n  return (\n    <Card>\n      <Content\n        title=\"Retrieval Source\"\n        description=\"Controls which document index is used for retrieval.\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n      />\n\n      <InputSelect\n        value={currentValue}\n        onValueChange={setSelectedSource}\n        disabled={updating}\n      >\n        <InputSelect.Trigger placeholder=\"Select retrieval source\" />\n        <InputSelect.Content>\n          <InputSelect.Item value=\"vespa\">Vespa</InputSelect.Item>\n          <InputSelect.Item value=\"opensearch\">OpenSearch</InputSelect.Item>\n        </InputSelect.Content>\n      </InputSelect>\n\n      {hasChanges && (\n        // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n        <Button\n          className=\"self-center\"\n          onClick={handleUpdate}\n          disabled={updating}\n        >\n          {updating ? \"Updating...\" : \"Update Settings\"}\n        </Button>\n      )}\n    </Card>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Monitor the migration from Vespa to OpenSearch and control the active retrieval source.\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <MigrationStatusSection />\n        <RetrievalSourceSection />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/ScoreEditor.tsx",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { updateBoost } from \"./lib\";\nimport { EditableValue } from \"@/components/EditableValue\";\n\nexport const ScoreSection = ({\n  documentId,\n  initialScore,\n  refresh,\n  consistentWidth = true,\n}: {\n  documentId: string;\n  initialScore: number;\n  refresh: () => void;\n  consistentWidth?: boolean;\n}) => {\n  const onSubmit = async (value: string) => {\n    const numericScore = Number(value);\n    if (isNaN(numericScore)) {\n      toast.error(\"Score must be a number\");\n      return false;\n    }\n\n    const errorMsg = await updateBoost(documentId, numericScore);\n    if (errorMsg) {\n      toast.error(errorMsg);\n      return false;\n    } else {\n      toast.success(\"Updated score!\");\n      refresh();\n    }\n\n    return true;\n  };\n\n  return (\n    <EditableValue\n      initialValue={initialScore.toString()}\n      onSubmit={onSubmit}\n      consistentWidth={consistentWidth}\n    />\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/explorer/DocumentExplorerPage.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Explorer } from \"./Explorer\";\nimport { Connector } from \"@/lib/connectors/connectors\";\nimport { DocumentSetSummary } from \"@/lib/types\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_EXPLORER;\n\ninterface DocumentExplorerPageProps {\n  initialSearchValue: string | undefined;\n  connectors: Connector<any>[];\n  documentSets: DocumentSetSummary[];\n}\n\nexport default function DocumentExplorerPage({\n  initialSearchValue,\n  connectors,\n  documentSets,\n}: DocumentExplorerPageProps) {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n\n      <SettingsLayouts.Body>\n        <Explorer\n          initialSearchValue={initialSearchValue}\n          connectors={connectors}\n          documentSets={documentSets}\n        />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/explorer/Explorer.tsx",
    "content": "\"use client\";\n\nimport { adminSearch } from \"./lib\";\nimport { MagnifyingGlass } from \"@phosphor-icons/react\";\nimport { useState, useEffect, useCallback } from \"react\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { buildDocumentSummaryDisplay } from \"@/components/search/DocumentDisplay\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { updateHiddenStatus } from \"../lib\";\nimport { toast } from \"@/hooks/useToast\";\nimport { getErrorMsg } from \"@/lib/fetchUtils\";\nimport { ScoreSection } from \"../ScoreEditor\";\nimport { useRouter } from \"next/navigation\";\nimport { useFilters } from \"@/lib/hooks\";\nimport { buildFilters } from \"@/lib/search/utils\";\nimport { DocumentUpdatedAtBadge } from \"@/components/search/DocumentUpdatedAtBadge\";\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { Connector } from \"@/lib/connectors/connectors\";\nimport { HorizontalFilters } from \"@/components/filters/SourceSelector\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\n\nconst DocumentDisplay = ({\n  document,\n  refresh,\n}: {\n  document: OnyxDocument;\n  refresh: () => void;\n}) => {\n  return (\n    <div\n      key={document.document_id}\n      className=\"text-sm border-b border-border mb-3\"\n    >\n      <div className=\"flex relative\">\n        <a\n          className={\n            \"rounded-lg flex font-bold \" +\n            (document.link ? \"\" : \"pointer-events-none\")\n          }\n          href={document.link}\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n        >\n          <SourceIcon sourceType={document.source_type} iconSize={22} />\n          <p className=\"truncate break-all ml-2 my-auto text-base\">\n            {document.semantic_identifier || document.document_id}\n          </p>\n        </a>\n      </div>\n      <div className=\"flex flex-wrap gap-x-2 mt-1 text-xs\">\n        <div className=\"px-1 py-0.5 bg-accent-background-hovered rounded flex\">\n          <p className=\"mr-1 my-auto\">Boost:</p>\n          <ScoreSection\n            documentId={document.document_id}\n            initialScore={document.boost}\n            refresh={refresh}\n            consistentWidth={false}\n          />\n        </div>\n        <div\n          onClick={async () => {\n            const response = await updateHiddenStatus(\n              document.document_id,\n              !document.hidden\n            );\n            if (response.ok) {\n              refresh();\n            } else {\n              toast.error(\n                `Failed to update document - ${getErrorMsg(response)}`\n              );\n            }\n          }}\n          className=\"px-1 py-0.5 bg-accent-background-hovered hover:bg-accent-background rounded flex cursor-pointer select-none\"\n        >\n          <div className=\"my-auto\">\n            {document.hidden ? (\n              <div className=\"text-error\">Hidden</div>\n            ) : (\n              \"Visible\"\n            )}\n          </div>\n          <div className=\"ml-1 my-auto\">\n            <Checkbox checked={!document.hidden} />\n          </div>\n        </div>\n      </div>\n      {document.updated_at && (\n        <div className=\"mt-2\">\n          <DocumentUpdatedAtBadge updatedAt={document.updated_at} />\n        </div>\n      )}\n      <p className=\"pl-1 pt-2 pb-3 break-words\">\n        {buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}\n      </p>\n    </div>\n  );\n};\n\nexport function Explorer({\n  initialSearchValue,\n  connectors,\n  documentSets,\n}: {\n  initialSearchValue: string | undefined;\n  connectors: Connector<any>[];\n  documentSets: DocumentSetSummary[];\n}) {\n  const router = useRouter();\n\n  const [query, setQuery] = useState(initialSearchValue || \"\");\n  const [timeoutId, setTimeoutId] = useState<number | null>(null);\n  const [results, setResults] = useState<OnyxDocument[]>([]);\n  const [isLoading, setIsLoading] = useState(false);\n\n  const filterManager = useFilters();\n\n  const onSearch = useCallback(\n    async (query: string) => {\n      setIsLoading(true);\n      try {\n        const filters = buildFilters(\n          filterManager.selectedSources,\n          filterManager.selectedDocumentSets,\n          filterManager.timeRange,\n          filterManager.selectedTags\n        );\n        const results = await adminSearch(query, filters);\n        if (results.ok) {\n          setResults((await results.json()).documents);\n        }\n      } finally {\n        setTimeoutId(null);\n        setIsLoading(false);\n      }\n    },\n    [\n      filterManager.selectedDocumentSets,\n      filterManager.selectedSources,\n      filterManager.timeRange,\n      filterManager.selectedTags,\n    ]\n  );\n\n  useEffect(() => {\n    if (timeoutId !== null) {\n      clearTimeout(timeoutId);\n    }\n    router.replace(\n      `/admin/documents/explorer?query=${encodeURIComponent(query)}`\n    );\n\n    const newTimeoutId = window.setTimeout(() => onSearch(query), 300);\n    setTimeoutId(newTimeoutId);\n  }, [\n    query,\n    filterManager.selectedDocumentSets,\n    filterManager.selectedSources,\n    filterManager.timeRange,\n  ]);\n\n  return (\n    <div className=\"flex flex-col gap-6\">\n      <div className=\"flex flex-col justify-center gap-2\">\n        <InputTypeIn\n          placeholder=\"Find documents based on title / content...\"\n          value={query}\n          onChange={(event) => {\n            setQuery(event.target.value);\n          }}\n          onKeyDown={(event) => {\n            if (\n              event.key === \"Enter\" &&\n              !event.shiftKey &&\n              !(event.nativeEvent as any).isComposing\n            ) {\n              onSearch(query);\n              event.preventDefault();\n            }\n          }}\n          role=\"textarea\"\n        />\n\n        <HorizontalFilters\n          {...filterManager}\n          availableDocumentSets={documentSets}\n          existingSources={connectors.map((connector) => connector.source)}\n          availableTags={[]}\n          toggleFilters={() => {}}\n          filtersUntoggled={false}\n          tagsOnLeft={true}\n        />\n        <div className=\"border-b\" />\n      </div>\n      {results.length > 0 && (\n        <div className=\"mt-3\">\n          {results.map((document) => {\n            return (\n              <DocumentDisplay\n                key={document.document_id}\n                document={document}\n                refresh={() => onSearch(query)}\n              />\n            );\n          })}\n        </div>\n      )}\n      {isLoading && <ThreeDotsLoader />}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/explorer/lib.ts",
    "content": "import { Filters } from \"@/lib/search/interfaces\";\n\nexport const adminSearch = async (query: string, filters: Filters) => {\n  const response = await fetch(\"/api/admin/search\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      query,\n      filters,\n    }),\n  });\n  return response;\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/explorer/page.tsx",
    "content": "import { fetchValidFilterInfo } from \"@/lib/search/utilsSS\";\nimport DocumentExplorerPage from \"./DocumentExplorerPage\";\n\nexport default async function Page(props: {\n  searchParams: Promise<{ [key: string]: string }>;\n}) {\n  const searchParams = await props.searchParams;\n  const { connectors, documentSets } = await fetchValidFilterInfo();\n\n  return (\n    <DocumentExplorerPage\n      initialSearchValue={searchParams.query}\n      connectors={connectors}\n      documentSets={documentSets}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/feedback/DocumentFeedbackTable.tsx",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { useState } from \"react\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableHeader,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { DocumentBoostStatus } from \"@/lib/types\";\nimport { updateHiddenStatus } from \"../lib\";\nimport { numToDisplay } from \"./constants\";\nimport { FiEye, FiEyeOff } from \"react-icons/fi\";\nimport { getErrorMsg } from \"@/lib/fetchUtils\";\nimport { HoverPopup } from \"@/components/HoverPopup\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { ScoreSection } from \"../ScoreEditor\";\nimport { truncateString } from \"@/lib/utils\";\n\nconst IsVisibleSection = ({\n  document,\n  onUpdate,\n}: {\n  document: DocumentBoostStatus;\n  onUpdate: (response: Response) => void;\n}) => {\n  return (\n    <HoverPopup\n      mainContent={\n        document.hidden ? (\n          <div\n            onClick={async () => {\n              const response = await updateHiddenStatus(\n                document.document_id,\n                false\n              );\n              onUpdate(response);\n            }}\n            className=\"flex text-error cursor-pointer hover:bg-accent-background-hovered py-1 px-2 w-fit rounded-full\"\n          >\n            <div className=\"select-none\">Hidden</div>\n            <div className=\"ml-1 my-auto\">\n              <Checkbox checked={false} />\n            </div>\n          </div>\n        ) : (\n          <div\n            onClick={async () => {\n              const response = await updateHiddenStatus(\n                document.document_id,\n                true\n              );\n              onUpdate(response);\n            }}\n            className=\"flex cursor-pointer hover:bg-accent-background-hovered py-1 px-2 w-fit rounded-full\"\n          >\n            <div className=\"my-auto select-none\">Visible</div>\n            <div className=\"ml-1 my-auto\">\n              <Checkbox checked={true} />\n            </div>\n          </div>\n        )\n      }\n      popupContent={\n        <div className=\"text-xs\">\n          {document.hidden ? (\n            <div className=\"flex\">\n              <FiEye className=\"my-auto mr-1\" /> Unhide\n            </div>\n          ) : (\n            <div className=\"flex\">\n              <FiEyeOff className=\"my-auto mr-1\" />\n              Hide\n            </div>\n          )}\n        </div>\n      }\n      direction=\"left\"\n    />\n  );\n};\n\nexport const DocumentFeedbackTable = ({\n  documents,\n  refresh,\n}: {\n  documents: DocumentBoostStatus[];\n  refresh: () => void;\n}) => {\n  const [page, setPage] = useState(1);\n\n  return (\n    <div>\n      <Table className=\"overflow-visible\">\n        <TableHeader>\n          <TableRow>\n            <TableHead>Document Name</TableHead>\n            <TableHead>Is Searchable?</TableHead>\n            <TableHead>Score</TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {documents\n            .slice((page - 1) * numToDisplay, page * numToDisplay)\n            .map((document) => {\n              return (\n                <TableRow key={document.document_id}>\n                  <TableCell className=\"whitespace-normal break-all\">\n                    <a\n                      className=\"text-blue-600 dark:text-blue-300\"\n                      href={document.link}\n                      target=\"_blank\"\n                      rel=\"noopener noreferrer\"\n                    >\n                      {truncateString(document.semantic_id, 100)}\n                    </a>\n                  </TableCell>\n                  <TableCell>\n                    <IsVisibleSection\n                      document={document}\n                      onUpdate={async (response) => {\n                        if (response.ok) {\n                          refresh();\n                        } else {\n                          toast.error(\n                            `Error updating hidden status - ${getErrorMsg(\n                              response\n                            )}`\n                          );\n                        }\n                      }}\n                    />\n                  </TableCell>\n                  <TableCell>\n                    <div className=\"relative\">\n                      <div\n                        key={document.document_id}\n                        className=\"h-10 ml-auto mr-8\"\n                      >\n                        <ScoreSection\n                          documentId={document.document_id}\n                          initialScore={document.boost}\n                          refresh={refresh}\n                        />\n                      </div>\n                    </div>\n                  </TableCell>\n                </TableRow>\n              );\n            })}\n        </TableBody>\n      </Table>\n\n      <div className=\"mt-3 flex\">\n        <div className=\"mx-auto\">\n          <PageSelector\n            totalPages={Math.ceil(documents.length / numToDisplay)}\n            currentPage={page}\n            onPageChange={(newPage) => setPage(newPage)}\n          />\n        </div>\n      </div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/feedback/constants.ts",
    "content": "export const numPages = 8;\nexport const numToDisplay = 10;\n"
  },
  {
    "path": "web/src/app/admin/documents/feedback/page.tsx",
    "content": "\"use client\";\n\nimport { LoadingAnimation } from \"@/components/Loading\";\nimport { useMostReactedToDocuments } from \"@/lib/hooks\";\nimport { DocumentFeedbackTable } from \"./DocumentFeedbackTable\";\nimport { numPages, numToDisplay } from \"./constants\";\nimport Title from \"@/components/ui/title\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_FEEDBACK;\n\nfunction Main() {\n  const {\n    data: mostLikedDocuments,\n    isLoading: isMostLikedDocumentsLoading,\n    error: mostLikedDocumentsError,\n    refreshDocs: refreshMostLikedDocuments,\n  } = useMostReactedToDocuments(false, numToDisplay * numPages);\n\n  const {\n    data: mostDislikedDocuments,\n    isLoading: isMostLikedDocumentLoading,\n    error: mostDislikedDocumentsError,\n    refreshDocs: refreshMostDislikedDocuments,\n  } = useMostReactedToDocuments(true, numToDisplay * numPages);\n\n  const refresh = () => {\n    refreshMostLikedDocuments();\n    refreshMostDislikedDocuments();\n  };\n\n  if (isMostLikedDocumentsLoading || isMostLikedDocumentLoading) {\n    return <LoadingAnimation text=\"Loading\" />;\n  }\n\n  if (\n    mostLikedDocumentsError ||\n    mostDislikedDocumentsError ||\n    !mostLikedDocuments ||\n    !mostDislikedDocuments\n  ) {\n    return (\n      <div className=\"text-red-600\">\n        Error loading documents -{\" \"}\n        {mostDislikedDocumentsError || mostLikedDocumentsError}\n      </div>\n    );\n  }\n\n  return (\n    <div>\n      <Title className=\"mb-2\">Most Liked Documents</Title>\n      <DocumentFeedbackTable documents={mostLikedDocuments} refresh={refresh} />\n\n      <Title className=\"mb-2 mt-6\">Most Disliked Documents</Title>\n      <DocumentFeedbackTable\n        documents={mostDislikedDocuments}\n        refresh={refresh}\n      />\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/lib.ts",
    "content": "export const updateBoost = async (documentId: string, boost: number) => {\n  const response = await fetch(\"/api/manage/admin/doc-boosts\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      document_id: documentId,\n      boost,\n    }),\n  });\n  if (response.ok) {\n    return null;\n  }\n  const responseJson = await response.json();\n  return responseJson.message || responseJson.detail || \"Unknown error\";\n};\n\nexport const updateHiddenStatus = async (\n  documentId: string,\n  isHidden: boolean\n) => {\n  const response = await fetch(\"/api/manage/admin/doc-hidden\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      document_id: documentId,\n      hidden: isHidden,\n    }),\n  });\n  return response;\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx",
    "content": "\"use client\";\n\nimport { Form, Formik } from \"formik\";\nimport { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport * as Yup from \"yup\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  createDocumentSet,\n  updateDocumentSet,\n  DocumentSetCreationRequest,\n} from \"./lib\";\nimport {\n  ConnectorStatus,\n  DocumentSetSummary,\n  UserGroup,\n  UserRole,\n  FederatedConnectorConfig,\n} from \"@/lib/types\";\nimport { TextFormField } from \"@/components/Field\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { IsPublicGroupSelector } from \"@/components/IsPublicGroupSelector\";\nimport React, { useEffect, useState } from \"react\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { ConnectorMultiSelect } from \"@/components/ConnectorMultiSelect\";\nimport { NonSelectableConnectors } from \"@/components/NonSelectableConnectors\";\nimport { FederatedConnectorSelector } from \"@/components/FederatedConnectorSelector\";\nimport { useFederatedConnectors } from \"@/lib/hooks\";\n\ninterface SetCreationPopupProps {\n  ccPairs: ConnectorStatus<any, any>[];\n  userGroups: UserGroup[] | undefined;\n  onClose: () => void;\n  existingDocumentSet?: DocumentSetSummary;\n}\n\nexport const DocumentSetCreationForm = ({\n  ccPairs,\n  userGroups,\n  onClose,\n  existingDocumentSet,\n}: SetCreationPopupProps) => {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const isUpdate = existingDocumentSet !== undefined;\n  const [localCcPairs, setLocalCcPairs] = useState(ccPairs);\n  const { user } = useUser();\n  const { data: federatedConnectors } = useFederatedConnectors();\n\n  useEffect(() => {\n    if (existingDocumentSet?.is_public) {\n      return;\n    }\n  }, [existingDocumentSet?.is_public]);\n\n  return (\n    <div className=\"max-w-full mx-auto\">\n      <Formik<DocumentSetCreationRequest>\n        initialValues={{\n          name: existingDocumentSet?.name ?? \"\",\n          description: existingDocumentSet?.description ?? \"\",\n          cc_pair_ids:\n            existingDocumentSet?.cc_pair_summaries.map(\n              (ccPairSummary) => ccPairSummary.id\n            ) ?? [],\n          is_public: existingDocumentSet?.is_public ?? true,\n          users: existingDocumentSet?.users ?? [],\n          groups: existingDocumentSet?.groups ?? [],\n          federated_connectors:\n            existingDocumentSet?.federated_connector_summaries?.map((fc) => ({\n              federated_connector_id: fc.id,\n              entities: fc.entities,\n            })) ?? [],\n        }}\n        validationSchema={Yup.object()\n          .shape({\n            name: Yup.string().required(\"Please enter a name for the set\"),\n            description: Yup.string().optional(),\n            cc_pair_ids: Yup.array().of(Yup.number().required()),\n            federated_connectors: Yup.array().of(\n              Yup.object().shape({\n                federated_connector_id: Yup.number().required(),\n                entities: Yup.object().required(),\n              })\n            ),\n          })\n          .test(\n            \"at-least-one-connector\",\n            \"Please select at least one connector (regular or federated)\",\n            function (values) {\n              const hasRegularConnectors =\n                values.cc_pair_ids && values.cc_pair_ids.length > 0;\n              const hasFederatedConnectors =\n                values.federated_connectors &&\n                values.federated_connectors.length > 0;\n              return hasRegularConnectors || hasFederatedConnectors;\n            }\n          )}\n        onSubmit={async (values, formikHelpers) => {\n          formikHelpers.setSubmitting(true);\n          // If the document set is public, then we don't want to send any groups\n          const processedValues = {\n            ...values,\n            groups: values.is_public ? [] : values.groups,\n          };\n\n          let response;\n          if (isUpdate) {\n            response = await updateDocumentSet({\n              id: existingDocumentSet.id,\n              ...processedValues,\n              users: processedValues.users,\n            });\n          } else {\n            response = await createDocumentSet(processedValues);\n          }\n          formikHelpers.setSubmitting(false);\n          if (response.ok) {\n            toast.success(\n              isUpdate\n                ? \"Successfully updated document set!\"\n                : \"Successfully created document set!\"\n            );\n            await Promise.all([\n              mutate(SWR_KEYS.documentSets),\n              mutate(SWR_KEYS.documentSetsEditable),\n            ]);\n            onClose();\n          } else {\n            const errorMsg = await response.text();\n            toast.error(\n              isUpdate\n                ? `Error updating document set - ${errorMsg}`\n                : `Error creating document set - ${errorMsg}`\n            );\n          }\n        }}\n      >\n        {(props) => {\n          // Filter visible cc pairs for curator role\n          const visibleCcPairs =\n            user?.role === UserRole.CURATOR\n              ? localCcPairs.filter(\n                  (ccPair) =>\n                    ccPair.access_type === \"public\" ||\n                    (ccPair.groups.length > 0 &&\n                      props.values.groups.every((group) =>\n                        ccPair.groups.includes(group)\n                      ))\n                )\n              : localCcPairs;\n\n          // Filter non-visible cc pairs for curator role\n          const nonVisibleCcPairs =\n            user?.role === UserRole.CURATOR\n              ? localCcPairs.filter(\n                  (ccPair) =>\n                    !(ccPair.access_type === \"public\") &&\n                    (ccPair.groups.length === 0 ||\n                      !props.values.groups.every((group) =>\n                        ccPair.groups.includes(group)\n                      ))\n                )\n              : [];\n\n          // Deselect filtered out cc pairs\n          if (user?.role === UserRole.CURATOR) {\n            const visibleCcPairIds = visibleCcPairs.map(\n              (ccPair) => ccPair.cc_pair_id\n            );\n            props.values.cc_pair_ids = props.values.cc_pair_ids.filter((id) =>\n              visibleCcPairIds.includes(id)\n            );\n          }\n\n          return (\n            <Form className=\"space-y-6 w-full \">\n              <div className=\"space-y-4 w-full\">\n                <TextFormField\n                  name=\"name\"\n                  label=\"Name:\"\n                  placeholder=\"A name for the document set\"\n                />\n                <TextFormField\n                  name=\"description\"\n                  label=\"Description:\"\n                  placeholder=\"Describe what the document set represents\"\n                  optional={true}\n                />\n\n                {isPaidEnterpriseFeaturesEnabled && (\n                  <IsPublicGroupSelector\n                    formikProps={props}\n                    objectName=\"document set\"\n                  />\n                )}\n              </div>\n\n              <div className=\"my-6 border-t border-border-02\" />\n\n              <div className=\"space-y-6\">\n                {user?.role === UserRole.CURATOR ? (\n                  <>\n                    <ConnectorMultiSelect\n                      name=\"cc_pair_ids\"\n                      label={`Connectors available to ${\n                        userGroups && userGroups.length > 1\n                          ? \"the selected group\"\n                          : \"the group you curate\"\n                      }`}\n                      connectors={visibleCcPairs}\n                      selectedIds={props.values.cc_pair_ids}\n                      onChange={(selectedIds) => {\n                        props.setFieldValue(\"cc_pair_ids\", selectedIds);\n                      }}\n                      placeholder=\"Search for connectors...\"\n                    />\n\n                    <NonSelectableConnectors\n                      connectors={nonVisibleCcPairs}\n                      title={`Connectors not available to the ${\n                        userGroups && userGroups.length > 1\n                          ? `group${\n                              props.values.groups.length > 1 ? \"s\" : \"\"\n                            } you have selected`\n                          : \"group you curate\"\n                      }`}\n                      description=\"Only connectors that are directly assigned to the group you are trying to add the document set to will be available.\"\n                    />\n                  </>\n                ) : (\n                  <ConnectorMultiSelect\n                    name=\"cc_pair_ids\"\n                    label=\"Pick your connectors\"\n                    connectors={visibleCcPairs}\n                    selectedIds={props.values.cc_pair_ids}\n                    onChange={(selectedIds) => {\n                      props.setFieldValue(\"cc_pair_ids\", selectedIds);\n                    }}\n                    placeholder=\"Search for connectors...\"\n                  />\n                )}\n\n                {/* Federated Connectors Section */}\n                {federatedConnectors && federatedConnectors.length > 0 && (\n                  <>\n                    <div className=\"my-4 border-t border-border-02\" />\n                    <FederatedConnectorSelector\n                      name=\"federated_connectors\"\n                      label=\"Federated Connectors\"\n                      federatedConnectors={federatedConnectors}\n                      selectedConfigs={props.values.federated_connectors}\n                      onChange={(selectedConfigs) => {\n                        props.setFieldValue(\n                          \"federated_connectors\",\n                          selectedConfigs\n                        );\n                      }}\n                      placeholder=\"Search for federated connectors...\"\n                    />\n                  </>\n                )}\n              </div>\n\n              <div className=\"flex mt-6 pt-4 border-t border-border-02\">\n                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                <Button\n                  type=\"submit\"\n                  disabled={props.isSubmitting}\n                  className=\"w-56 mx-auto\"\n                  primary\n                >\n                  {isUpdate ? \"Update Document Set\" : \"Create Document Set\"}\n                </Button>\n              </div>\n            </Form>\n          );\n        }}\n      </Formik>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/[documentSetId]/page.tsx",
    "content": "\"use client\";\nimport { use } from \"react\";\n\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { refreshDocumentSets, useDocumentSets } from \"../hooks\";\nimport { useConnectorStatus, useUserGroups } from \"@/lib/hooks\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { DocumentSetCreationForm } from \"../DocumentSetCreationForm\";\nimport { useRouter } from \"next/navigation\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_SETS;\n\nfunction Main({ documentSetId }: { documentSetId: number }) {\n  const router = useRouter();\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  const {\n    data: documentSets,\n    isLoading: isDocumentSetsLoading,\n    error: documentSetsError,\n  } = useDocumentSets();\n\n  const {\n    data: ccPairs,\n    isLoading: isCCPairsLoading,\n    error: ccPairsError,\n  } = useConnectorStatus(30000, vectorDbEnabled);\n\n  // EE only\n  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();\n\n  if (\n    isDocumentSetsLoading ||\n    (vectorDbEnabled && isCCPairsLoading) ||\n    userGroupsIsLoading\n  ) {\n    return (\n      <div className=\"flex justify-center items-center min-h-[400px]\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  }\n\n  if (documentSetsError || !documentSets) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Failed to fetch document sets\"\n        errorMsg={documentSetsError}\n      />\n    );\n  }\n\n  if (vectorDbEnabled && (ccPairsError || !ccPairs)) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Failed to fetch Connectors\"\n        errorMsg={ccPairsError}\n      />\n    );\n  }\n\n  const documentSet = documentSets.find(\n    (documentSet) => documentSet.id === documentSetId\n  );\n  if (!documentSet) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Document set not found\"\n        errorMsg={`Document set with id ${documentSetId} not found`}\n      />\n    );\n  }\n\n  return (\n    <CardSection>\n      <DocumentSetCreationForm\n        ccPairs={ccPairs ?? []}\n        userGroups={userGroups}\n        onClose={() => {\n          refreshDocumentSets();\n          router.push(\"/admin/documents/sets\");\n        }}\n        existingDocumentSet={documentSet}\n      />\n    </CardSection>\n  );\n}\n\nexport default function Page(props: {\n  params: Promise<{ documentSetId: string }>;\n}) {\n  const params = use(props.params);\n  const documentSetId = parseInt(params.documentSetId);\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title=\"Edit Document Set\"\n        separator\n        backButton\n      />\n      <SettingsLayouts.Body>\n        <Main documentSetId={documentSetId} />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/hooks.tsx",
    "content": "import { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport useSWR, { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function refreshDocumentSets() {\n  mutate(SWR_KEYS.documentSets);\n}\n\nexport function useDocumentSets(getEditable: boolean = false) {\n  const url = getEditable\n    ? SWR_KEYS.documentSetsEditable\n    : SWR_KEYS.documentSets;\n\n  const swrResponse = useSWR<DocumentSetSummary[]>(url, errorHandlingFetcher, {\n    refreshInterval: 5000, // 5 seconds\n  });\n\n  return {\n    ...swrResponse,\n    refreshDocumentSets: refreshDocumentSets,\n  };\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/lib.ts",
    "content": "import { FederatedConnectorConfig } from \"@/lib/types\";\n\nexport interface DocumentSetCreationRequest {\n  name: string;\n  description: string;\n  cc_pair_ids: number[];\n  is_public: boolean;\n  users: string[];\n  groups: number[];\n  federated_connectors: FederatedConnectorConfig[];\n}\n\nexport const createDocumentSet = async ({\n  name,\n  description,\n  cc_pair_ids,\n  is_public,\n  users,\n  groups,\n  federated_connectors,\n}: DocumentSetCreationRequest) => {\n  return fetch(\"/api/manage/admin/document-set\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      name,\n      description,\n      cc_pair_ids,\n      is_public,\n      users,\n      groups,\n      federated_connectors,\n    }),\n  });\n};\n\ninterface DocumentSetUpdateRequest {\n  id: number;\n  description: string;\n  cc_pair_ids: number[];\n  is_public: boolean;\n  users: string[];\n  groups: number[];\n  federated_connectors: FederatedConnectorConfig[];\n}\n\nexport const updateDocumentSet = async ({\n  id,\n  description,\n  cc_pair_ids,\n  is_public,\n  users,\n  groups,\n  federated_connectors,\n}: DocumentSetUpdateRequest) => {\n  return fetch(\"/api/manage/admin/document-set\", {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      id,\n      description,\n      cc_pair_ids,\n      is_public,\n      users,\n      groups,\n      federated_connectors,\n    }),\n  });\n};\n\nexport const deleteDocumentSet = async (id: number) => {\n  return fetch(`/api/manage/admin/document-set/${id}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n};\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/new/page.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { DocumentSetCreationForm } from \"../DocumentSetCreationForm\";\nimport { useConnectorStatus, useUserGroups } from \"@/lib/hooks\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { useRouter } from \"next/navigation\";\nimport { refreshDocumentSets } from \"../hooks\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_SETS;\n\nfunction Main() {\n  const router = useRouter();\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  const {\n    data: ccPairs,\n    isLoading: isCCPairsLoading,\n    error: ccPairsError,\n  } = useConnectorStatus(30000, vectorDbEnabled);\n\n  // EE only\n  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();\n\n  if ((vectorDbEnabled && isCCPairsLoading) || userGroupsIsLoading) {\n    return (\n      <div className=\"flex justify-center items-center min-h-[400px]\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  }\n\n  if (vectorDbEnabled && (ccPairsError || !ccPairs)) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Failed to fetch Connectors\"\n        errorMsg={ccPairsError}\n      />\n    );\n  }\n\n  return (\n    <>\n      <CardSection>\n        <DocumentSetCreationForm\n          ccPairs={ccPairs ?? []}\n          userGroups={userGroups}\n          onClose={() => {\n            refreshDocumentSets();\n            router.push(\"/admin/documents/sets\");\n          }}\n        />\n      </CardSection>\n    </>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title=\"New Document Set\"\n        separator\n        backButton\n      />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/documents/sets/page.tsx",
    "content": "\"use client\";\n\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { InfoIcon } from \"@/components/icons/icons\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Title from \"@/components/ui/title\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport { useState } from \"react\";\nimport { useDocumentSets } from \"./hooks\";\nimport { ConnectorTitle } from \"@/components/admin/connectors/ConnectorTitle\";\nimport { deleteDocumentSet } from \"./lib\";\nimport { toast } from \"@/hooks/useToast\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport {\n  FiAlertTriangle,\n  FiCheckCircle,\n  FiClock,\n  FiEdit2,\n  FiLock,\n  FiUnlock,\n} from \"react-icons/fi\";\nimport { DeleteButton } from \"@/components/DeleteButton\";\nimport { useRouter } from \"next/navigation\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport { Badge } from \"@/components/ui/badge\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport Link from \"next/link\";\n\nconst route = ADMIN_ROUTES.DOCUMENT_SETS;\nconst numToDisplay = 50;\n\n// Component to display federated connectors with consistent styling\nconst FederatedConnectorTitle = ({\n  federatedConnector,\n  showMetadata = true,\n  isLink = true,\n}: {\n  federatedConnector: any;\n  showMetadata?: boolean;\n  isLink?: boolean;\n}) => {\n  const sourceType = federatedConnector.source.replace(/^federated_/, \"\");\n\n  const mainSectionClassName = \"text-blue-500 dark:text-blue-100 flex w-fit\";\n  const mainDisplay = (\n    <>\n      <SourceIcon sourceType={sourceType as any} iconSize={16} />\n      <div className=\"ml-1 my-auto text-xs font-medium truncate\">\n        {federatedConnector.name}\n      </div>\n      <Badge variant=\"outline\" className=\"text-xs ml-2\">\n        Federated\n      </Badge>\n    </>\n  );\n\n  return (\n    <div className=\"my-auto max-w-full\">\n      {isLink ? (\n        <Link\n          className={mainSectionClassName}\n          href={`/admin/federated/${federatedConnector.id}`}\n        >\n          {mainDisplay}\n        </Link>\n      ) : (\n        <div className={mainSectionClassName}>{mainDisplay}</div>\n      )}\n      {showMetadata && Object.keys(federatedConnector.entities).length > 0 && (\n        <div className=\"text-[10px] mt-0.5 text-gray-600 dark:text-gray-400\">\n          {Object.entries(federatedConnector.entities)\n            .filter(\n              ([_, value]) =>\n                value &&\n                (Array.isArray(value) ? value.length > 0 : String(value).trim())\n            )\n            .map(([key, value]) => (\n              <div key={key} className=\"truncate\">\n                <i>{key}:</i>{\" \"}\n                {Array.isArray(value) ? value.join(\", \") : String(value)}\n              </div>\n            ))}\n        </div>\n      )}\n    </div>\n  );\n};\n\nconst EditRow = ({\n  documentSet,\n  isEditable,\n}: {\n  documentSet: DocumentSetSummary;\n  isEditable: boolean;\n}) => {\n  const router = useRouter();\n\n  if (!isEditable) {\n    return (\n      <div className=\"text-text-darker font-medium my-auto p-1\">\n        {documentSet.name}\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"relative flex\">\n      <TooltipProvider>\n        <Tooltip>\n          <TooltipTrigger asChild>\n            <div\n              className={`\n              text-text-darker font-medium my-auto p-1 hover:bg-accent-background flex items-center select-none\n              ${documentSet.is_up_to_date ? \"cursor-pointer\" : \"cursor-default\"}\n            `}\n              style={{ wordBreak: \"normal\", overflowWrap: \"break-word\" }}\n              onClick={() => {\n                if (documentSet.is_up_to_date) {\n                  router.push(`/admin/documents/sets/${documentSet.id}`);\n                }\n              }}\n            >\n              <FiEdit2 className=\"mr-2 flex-shrink-0\" />\n              <span className=\"font-medium\">{documentSet.name}</span>\n            </div>\n          </TooltipTrigger>\n          {!documentSet.is_up_to_date && (\n            <TooltipContent width=\"max-w-sm\">\n              <div className=\"flex break-words break-keep whitespace-pre-wrap items-start\">\n                <InfoIcon className=\"mr-2 mt-0.5\" />\n                Cannot update while syncing! Wait for the sync to finish, then\n                try again.\n              </div>\n            </TooltipContent>\n          )}\n        </Tooltip>\n      </TooltipProvider>\n    </div>\n  );\n};\n\ninterface DocumentFeedbackTableProps {\n  documentSets: DocumentSetSummary[];\n  refresh: () => void;\n  refreshEditable: () => void;\n  editableDocumentSets: DocumentSetSummary[];\n}\n\nconst DocumentSetTable = ({\n  documentSets,\n  editableDocumentSets,\n  refresh,\n  refreshEditable,\n}: DocumentFeedbackTableProps) => {\n  const [page, setPage] = useState(1);\n\n  // sort by name for consistent ordering\n  documentSets.sort((a, b) => {\n    if (a.name < b.name) {\n      return -1;\n    } else if (a.name > b.name) {\n      return 1;\n    } else {\n      return 0;\n    }\n  });\n\n  const sortedDocumentSets = [\n    ...editableDocumentSets,\n    ...documentSets.filter(\n      (ds) => !editableDocumentSets.some((eds) => eds.id === ds.id)\n    ),\n  ];\n\n  return (\n    <div>\n      <Title>Existing Document Sets</Title>\n      <Table className=\"overflow-visible mt-2\">\n        <TableHeader>\n          <TableRow>\n            <TableHead>Name</TableHead>\n            <TableHead>Connectors</TableHead>\n            <TableHead>Status</TableHead>\n            <TableHead>Public</TableHead>\n            <TableHead>Delete</TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {sortedDocumentSets\n            .slice((page - 1) * numToDisplay, page * numToDisplay)\n            .map((documentSet) => {\n              const isEditable = editableDocumentSets.some(\n                (eds) => eds.id === documentSet.id\n              );\n              return (\n                <TableRow key={documentSet.id}>\n                  <TableCell className=\"whitespace-normal break-all\">\n                    <div className=\"flex gap-x-1 text-emphasis\">\n                      <EditRow\n                        documentSet={documentSet}\n                        isEditable={isEditable}\n                      />\n                    </div>\n                  </TableCell>\n                  <TableCell>\n                    <div>\n                      {/* Regular Connectors */}\n                      {documentSet.cc_pair_summaries.map(\n                        (ccPairSummary, ind) => {\n                          return (\n                            <div\n                              className={\n                                ind !== documentSet.cc_pair_summaries.length - 1\n                                  ? \"mb-3\"\n                                  : \"\"\n                              }\n                              key={ccPairSummary.id}\n                            >\n                              <div className=\"text-blue-500 dark:text-blue-100 flex w-fit\">\n                                <SourceIcon\n                                  sourceType={ccPairSummary.source}\n                                  iconSize={16}\n                                />\n                                <div className=\"ml-1 my-auto text-xs font-medium truncate\">\n                                  {ccPairSummary.name || \"Unnamed\"}\n                                </div>\n                              </div>\n                            </div>\n                          );\n                        }\n                      )}\n\n                      {/* Federated Connectors */}\n                      {documentSet.federated_connector_summaries &&\n                        documentSet.federated_connector_summaries.length >\n                          0 && (\n                          <>\n                            {documentSet.cc_pair_summaries.length > 0 && (\n                              <div className=\"mb-3\" />\n                            )}\n                            {documentSet.federated_connector_summaries.map(\n                              (federatedConnector, ind) => {\n                                return (\n                                  <div\n                                    className={\n                                      ind !==\n                                      documentSet.federated_connector_summaries\n                                        .length -\n                                        1\n                                        ? \"mb-3\"\n                                        : \"\"\n                                    }\n                                    key={`federated-${federatedConnector.id}`}\n                                  >\n                                    <FederatedConnectorTitle\n                                      federatedConnector={federatedConnector}\n                                      showMetadata={true}\n                                    />\n                                  </div>\n                                );\n                              }\n                            )}\n                          </>\n                        )}\n                    </div>\n                  </TableCell>\n                  <TableCell>\n                    {documentSet.is_up_to_date ? (\n                      <Badge variant=\"success\" icon={FiCheckCircle}>\n                        Up to Date\n                      </Badge>\n                    ) : documentSet.cc_pair_summaries.length > 0 ||\n                      (documentSet.federated_connector_summaries &&\n                        documentSet.federated_connector_summaries.length >\n                          0) ? (\n                      <Badge variant=\"in_progress\" icon={FiClock}>\n                        Syncing\n                      </Badge>\n                    ) : (\n                      <Badge variant=\"destructive\" icon={FiAlertTriangle}>\n                        Deleting\n                      </Badge>\n                    )}\n                  </TableCell>\n                  <TableCell>\n                    {documentSet.is_public ? (\n                      <Badge\n                        variant={isEditable ? \"success\" : \"default\"}\n                        icon={FiUnlock}\n                      >\n                        Public\n                      </Badge>\n                    ) : (\n                      <Badge\n                        variant={isEditable ? \"private\" : \"default\"}\n                        icon={FiLock}\n                      >\n                        Private\n                      </Badge>\n                    )}\n                  </TableCell>\n                  <TableCell>\n                    {isEditable ? (\n                      <DeleteButton\n                        onClick={async () => {\n                          const response = await deleteDocumentSet(\n                            documentSet.id\n                          );\n                          if (response.ok) {\n                            toast.success(\n                              `Document set \"${documentSet.name}\" scheduled for deletion`\n                            );\n                          } else {\n                            const errorMsg = (await response.json()).detail;\n                            toast.error(\n                              `Failed to schedule document set for deletion - ${errorMsg}`\n                            );\n                          }\n                          refresh();\n                          refreshEditable();\n                        }}\n                      />\n                    ) : (\n                      \"-\"\n                    )}\n                  </TableCell>\n                </TableRow>\n              );\n            })}\n        </TableBody>\n      </Table>\n\n      <div className=\"mt-3 flex\">\n        <div className=\"mx-auto\">\n          <PageSelector\n            totalPages={Math.ceil(sortedDocumentSets.length / numToDisplay)}\n            currentPage={page}\n            onPageChange={(newPage) => setPage(newPage)}\n          />\n        </div>\n      </div>\n    </div>\n  );\n};\n\nfunction Main() {\n  const {\n    data: documentSets,\n    isLoading: isDocumentSetsLoading,\n    error: documentSetsError,\n    refreshDocumentSets,\n  } = useDocumentSets();\n\n  const {\n    data: editableDocumentSets,\n    isLoading: isEditableDocumentSetsLoading,\n    error: editableDocumentSetsError,\n    refreshDocumentSets: refreshEditableDocumentSets,\n  } = useDocumentSets(true);\n\n  if (isDocumentSetsLoading || isEditableDocumentSetsLoading) {\n    return (\n      <div className=\"flex justify-center items-center min-h-[400px]\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  }\n\n  if (documentSetsError || !documentSets) {\n    return <div>Error: {documentSetsError}</div>;\n  }\n\n  if (editableDocumentSetsError || !editableDocumentSets) {\n    return <div>Error: {editableDocumentSetsError}</div>;\n  }\n\n  return (\n    <div className=\"mb-8\">\n      <Text as=\"p\">\n        {markdown(\n          \"**Document Sets** allow you to group logically connected documents into a single bundle. These can then be used as a filter when performing searches to control the scope of information Onyx searches over.\"\n        )}\n      </Text>\n      <Spacer rem={0.75} />\n\n      <div className=\"mb-3\"></div>\n\n      <div className=\"flex mb-6\">\n        <CreateButton href=\"/admin/documents/sets/new\">\n          New Document Set\n        </CreateButton>\n      </div>\n\n      {documentSets.length > 0 && (\n        <>\n          <Separator />\n          <DocumentSetTable\n            documentSets={documentSets}\n            editableDocumentSets={editableDocumentSets}\n            refresh={refreshDocumentSets}\n            refreshEditable={refreshEditableDocumentSets}\n          />\n        </>\n      )}\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/EmbeddingModelSelectionForm.tsx",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR from \"swr\";\nimport { Dispatch, SetStateAction, useState } from \"react\";\nimport {\n  CloudEmbeddingProvider,\n  CloudEmbeddingModel,\n  AVAILABLE_MODELS,\n  AVAILABLE_CLOUD_PROVIDERS,\n  LITELLM_CLOUD_PROVIDER,\n  AZURE_CLOUD_PROVIDER,\n  HostedEmbeddingModel,\n  EmbeddingProvider,\n} from \"@/components/embedding/interfaces\";\nimport OpenEmbeddingPage from \"@/app/admin/embeddings/pages/OpenEmbeddingPage\";\nimport CloudEmbeddingPage from \"@/app/admin/embeddings/pages/CloudEmbeddingPage\";\nimport ProviderCreationModal from \"@/app/admin/embeddings/modals/ProviderCreationModal\";\nimport DeleteCredentialsModal from \"@/app/admin/embeddings/modals/DeleteCredentialsModal\";\nimport SelectModelModal from \"@/app/admin/embeddings/modals/SelectModelModal\";\nimport ChangeCredentialsModal from \"@/app/admin/embeddings/modals/ChangeCredentialsModal\";\nimport ModelSelectionConfirmationModal from \"@/app/admin/embeddings/modals/ModelSelectionModal\";\nimport AlreadyPickedModal from \"@/app/admin/embeddings/modals/AlreadyPickedModal\";\nimport { ModelOption } from \"@/components/embedding/ModelSelector\";\nimport {\n  EMBEDDING_MODELS_ADMIN_URL,\n  EMBEDDING_PROVIDERS_ADMIN_URL,\n} from \"@/lib/llmConfig/constants\";\nimport { AdvancedSearchConfiguration } from \"@/app/admin/embeddings/interfaces\";\nimport { Button } from \"@opal/components\";\n\nexport interface EmbeddingDetails {\n  api_key?: string;\n  api_url?: string;\n  api_version?: string;\n  deployment_name?: string;\n  custom_config: any;\n  provider_type: EmbeddingProvider;\n}\n\nexport interface EmbeddingModelSelectionProps {\n  modelTab: \"open\" | \"cloud\" | null;\n  setModelTab: Dispatch<SetStateAction<\"open\" | \"cloud\" | null>>;\n  currentEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel;\n  selectedProvider: CloudEmbeddingModel | HostedEmbeddingModel;\n  updateSelectedProvider: (\n    model: CloudEmbeddingModel | HostedEmbeddingModel\n  ) => void;\n  updateCurrentModel: (\n    newModel: string,\n    provider_type: EmbeddingProvider\n  ) => void;\n  advancedEmbeddingDetails: AdvancedSearchConfiguration;\n}\n\nexport default function EmbeddingModelSelection({\n  selectedProvider,\n  currentEmbeddingModel,\n  updateSelectedProvider,\n  modelTab,\n  setModelTab,\n  updateCurrentModel,\n  advancedEmbeddingDetails,\n}: EmbeddingModelSelectionProps) {\n  // Cloud Provider based modals\n  const [showTentativeProvider, setShowTentativeProvider] =\n    useState<CloudEmbeddingProvider | null>(null);\n\n  const [showUnconfiguredProvider, setShowUnconfiguredProvider] =\n    useState<CloudEmbeddingProvider | null>(null);\n  const [changeCredentialsProvider, setChangeCredentialsProvider] =\n    useState<CloudEmbeddingProvider | null>(null);\n\n  // Cloud Model based modals\n  const [alreadySelectedModel, setAlreadySelectedModel] =\n    useState<CloudEmbeddingModel | null>(null);\n  const [showTentativeModel, setShowTentativeModel] =\n    useState<CloudEmbeddingModel | null>(null);\n\n  const [showModelInQueue, setShowModelInQueue] =\n    useState<CloudEmbeddingModel | null>(null);\n\n  // Open Model based modals\n  const [showTentativeOpenProvider, setShowTentativeOpenProvider] =\n    useState<HostedEmbeddingModel | null>(null);\n\n  const [showDeleteCredentialsModal, setShowDeleteCredentialsModal] =\n    useState<boolean>(false);\n\n  const [showAddConnectorPopup, setShowAddConnectorPopup] =\n    useState<boolean>(false);\n\n  const { data: embeddingModelDetails } = useSWR<CloudEmbeddingModel[]>(\n    EMBEDDING_MODELS_ADMIN_URL,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  const {\n    data: embeddingProviderDetails,\n    mutate: mutateEmbeddingProviderDetails,\n  } = useSWR<EmbeddingDetails[]>(\n    EMBEDDING_PROVIDERS_ADMIN_URL,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  return (\n    <div className=\"p-2\">\n      {alreadySelectedModel && (\n        <AlreadyPickedModal\n          model={alreadySelectedModel}\n          onClose={() => setAlreadySelectedModel(null)}\n        />\n      )}\n\n      {showTentativeOpenProvider && (\n        <ModelSelectionConfirmationModal\n          selectedModel={showTentativeOpenProvider}\n          isCustom={\n            AVAILABLE_MODELS.find(\n              (model) =>\n                model.model_name === showTentativeOpenProvider.model_name\n            ) === undefined\n          }\n          onConfirm={() => {\n            updateSelectedProvider(showTentativeOpenProvider);\n            setShowTentativeOpenProvider(null);\n          }}\n          onCancel={() => setShowTentativeOpenProvider(null)}\n        />\n      )}\n\n      {showTentativeProvider && (\n        <ProviderCreationModal\n          updateCurrentModel={updateCurrentModel}\n          isProxy={\n            showTentativeProvider.provider_type == EmbeddingProvider.LITELLM\n          }\n          isAzure={\n            showTentativeProvider.provider_type == EmbeddingProvider.AZURE\n          }\n          selectedProvider={showTentativeProvider}\n          onConfirm={() => {\n            setShowTentativeProvider(showUnconfiguredProvider);\n            if (showModelInQueue) {\n              setShowTentativeModel(showModelInQueue);\n            }\n            mutateEmbeddingProviderDetails();\n          }}\n          onCancel={() => {\n            setShowModelInQueue(null);\n            setShowTentativeProvider(null);\n          }}\n        />\n      )}\n\n      {changeCredentialsProvider && (\n        <ChangeCredentialsModal\n          isProxy={\n            changeCredentialsProvider.provider_type == EmbeddingProvider.LITELLM\n          }\n          isAzure={\n            changeCredentialsProvider.provider_type == EmbeddingProvider.AZURE\n          }\n          useFileUpload={\n            changeCredentialsProvider.provider_type == EmbeddingProvider.GOOGLE\n          }\n          onDeleted={() => {\n            setChangeCredentialsProvider(null);\n            mutateEmbeddingProviderDetails();\n          }}\n          provider={changeCredentialsProvider}\n          onConfirm={() => setChangeCredentialsProvider(null)}\n          onCancel={() => setChangeCredentialsProvider(null)}\n        />\n      )}\n\n      {showTentativeModel && (\n        <SelectModelModal\n          model={showTentativeModel}\n          onConfirm={() => {\n            setShowModelInQueue(null);\n            updateSelectedProvider(showTentativeModel);\n            setShowTentativeModel(null);\n          }}\n          onCancel={() => {\n            setShowModelInQueue(null);\n            setShowTentativeModel(null);\n          }}\n        />\n      )}\n\n      {showDeleteCredentialsModal && (\n        <DeleteCredentialsModal\n          modelProvider={showTentativeProvider!}\n          onConfirm={() => {\n            setShowDeleteCredentialsModal(false);\n            mutateEmbeddingProviderDetails();\n          }}\n          onCancel={() => setShowDeleteCredentialsModal(false)}\n        />\n      )}\n\n      <p className=\"mb-4\">\n        Select from cloud, self-hosted models, or continue with your current\n        embedding model.\n      </p>\n      <div className=\"text-sm mr-auto mb-6 divide-x-2 flex\">\n        <button\n          onClick={() => setModelTab(null)}\n          className={`mr-4 p-2 font-bold  ${\n            !modelTab\n              ? \"rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline\"\n              : \" hover:underline bg-neutral-100 dark:bg-neutral-900\"\n          }`}\n        >\n          Current\n        </button>\n        <div className=\"px-2\">\n          <button\n            onClick={() => setModelTab(\"cloud\")}\n            className={`mx-2 p-2 font-bold  ${\n              modelTab == \"cloud\"\n                ? \"rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline\"\n                : \" hover:underline bg-neutral-100 dark:bg-neutral-900\"\n            }`}\n          >\n            Cloud-based\n          </button>\n        </div>\n        <div className=\"px-2\">\n          <button\n            onClick={() => setModelTab(\"open\")}\n            className={` mx-2 p-2 font-bold  ${\n              modelTab == \"open\"\n                ? \"rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline\"\n                : \"hover:underline bg-neutral-100 dark:bg-neutral-900\"\n            }`}\n          >\n            Self-hosted\n          </button>\n        </div>\n      </div>\n\n      {modelTab == \"open\" && (\n        <OpenEmbeddingPage\n          selectedProvider={selectedProvider}\n          onSelectOpenSource={(model: HostedEmbeddingModel) => {\n            setShowTentativeOpenProvider(model);\n          }}\n        />\n      )}\n\n      {modelTab == \"cloud\" && (\n        <CloudEmbeddingPage\n          advancedEmbeddingDetails={advancedEmbeddingDetails}\n          embeddingModelDetails={embeddingModelDetails}\n          setShowModelInQueue={setShowModelInQueue}\n          setShowTentativeModel={setShowTentativeModel}\n          currentModel={selectedProvider || currentEmbeddingModel}\n          setAlreadySelectedModel={setAlreadySelectedModel}\n          embeddingProviderDetails={embeddingProviderDetails}\n          setShowTentativeProvider={setShowTentativeProvider}\n          setChangeCredentialsProvider={setChangeCredentialsProvider}\n        />\n      )}\n\n      {!modelTab && (\n        <>\n          <button onClick={() => updateSelectedProvider(currentEmbeddingModel)}>\n            <ModelOption\n              model={currentEmbeddingModel}\n              selected={\n                selectedProvider.model_name == currentEmbeddingModel.model_name\n              }\n            />\n          </button>\n          {currentEmbeddingModel?.provider_type && (\n            <div className=\"mt-2\">\n              <Button\n                prominence=\"secondary\"\n                onClick={() => {\n                  const allProviders = [\n                    ...AVAILABLE_CLOUD_PROVIDERS,\n                    LITELLM_CLOUD_PROVIDER,\n                    AZURE_CLOUD_PROVIDER,\n                  ];\n                  const provider = allProviders.find(\n                    (p) =>\n                      p.provider_type === currentEmbeddingModel.provider_type\n                  );\n                  if (!provider) {\n                    return;\n                  }\n                  setChangeCredentialsProvider(provider);\n                }}\n              >\n                Update API key\n              </Button>\n            </div>\n          )}\n        </>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/RerankingFormPage.tsx",
    "content": "import React, {\n  Dispatch,\n  forwardRef,\n  SetStateAction,\n  useContext,\n  useState,\n} from \"react\";\nimport { Formik, Form, FormikProps } from \"formik\";\nimport * as Yup from \"yup\";\nimport {\n  RerankerProvider,\n  RerankingDetails,\n  RerankingModel,\n  rerankingModels,\n} from \"./interfaces\";\nimport { FiExternalLink } from \"react-icons/fi\";\nimport {\n  AmazonIcon,\n  CohereIcon,\n  LiteLLMIcon,\n  MixedBreadIcon,\n} from \"@/components/icons/icons\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { TextFormField } from \"@/components/Field\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { SvgAlertTriangle, SvgKey } from \"@opal/icons\";\n\ninterface RerankingDetailsFormProps {\n  setRerankingDetails: Dispatch<SetStateAction<RerankingDetails>>;\n  currentRerankingDetails: RerankingDetails;\n  originalRerankingDetails: RerankingDetails;\n  modelTab: \"open\" | \"cloud\" | null;\n  setModelTab: Dispatch<SetStateAction<\"open\" | \"cloud\" | null>>;\n  onValidationChange?: (\n    isValid: boolean,\n    errors: Record<string, string>\n  ) => void;\n}\n\nconst RerankingDetailsForm = forwardRef<\n  FormikProps<RerankingDetails>,\n  RerankingDetailsFormProps\n>(\n  (\n    {\n      setRerankingDetails,\n      originalRerankingDetails,\n      currentRerankingDetails,\n      modelTab,\n      setModelTab,\n      onValidationChange,\n    },\n    ref\n  ) => {\n    const [showGpuWarningModalModel, setShowGpuWarningModalModel] =\n      useState<RerankingModel | null>(null);\n    const [isApiKeyModalOpen, setIsApiKeyModalOpen] = useState(false);\n    const [showLiteLLMConfigurationModal, setShowLiteLLMConfigurationModal] =\n      useState(false);\n\n    const combinedSettings = useContext(SettingsContext);\n    const gpuEnabled = combinedSettings?.settings.gpu_enabled;\n\n    // Define the validation schema\n    const validationSchema = Yup.object().shape({\n      rerank_model_name: Yup.string().nullable(),\n      rerank_provider_type: Yup.mixed<RerankerProvider>()\n        .nullable()\n        .oneOf(Object.values(RerankerProvider))\n        .optional(),\n      rerank_api_key: Yup.string()\n        .nullable()\n        .test(\n          \"required-if-cohere\",\n          \"API Key is required for Cohere reranking\",\n          function (value) {\n            const { rerank_provider_type } = this.parent;\n            return (\n              rerank_provider_type !== RerankerProvider.COHERE ||\n              (value !== null && value !== \"\")\n            );\n          }\n        ),\n      rerank_api_url: Yup.string()\n        .url(\"Must be a valid URL\")\n        .matches(/^https?:\\/\\//, \"URL must start with http:// or https://\")\n        .nullable()\n        .test(\n          \"required-if-litellm\",\n          \"API URL is required for LiteLLM reranking\",\n          function (value) {\n            const { rerank_provider_type } = this.parent;\n            return (\n              rerank_provider_type !== RerankerProvider.LITELLM ||\n              (value !== null && value !== \"\")\n            );\n          }\n        ),\n    });\n\n    return (\n      <Formik\n        innerRef={ref}\n        initialValues={currentRerankingDetails}\n        validationSchema={validationSchema}\n        onSubmit={async (_, { setSubmitting }) => {\n          setSubmitting(false);\n        }}\n        validate={(values) => {\n          // Update parent component with values\n          setRerankingDetails(values);\n\n          // Run validation and report errors\n          if (onValidationChange) {\n            // We'll return an empty object here since Yup will handle the actual validation\n            // But we need to check if there are any validation errors\n            const errors: Record<string, string> = {};\n            try {\n              // Manually validate against the schema\n              validationSchema.validateSync(values, { abortEarly: false });\n              onValidationChange(true, {});\n            } catch (validationError) {\n              if (validationError instanceof Yup.ValidationError) {\n                validationError.inner.forEach((err) => {\n                  if (err.path) {\n                    errors[err.path] = err.message;\n                  }\n                });\n                onValidationChange(false, errors);\n              }\n            }\n          }\n\n          return {}; // Return empty object as Formik will handle the errors\n        }}\n        enableReinitialize={true}\n      >\n        {({ values, setFieldValue, resetForm }) => {\n          const resetRerankingValues = () => {\n            setRerankingDetails({\n              rerank_api_key: null,\n              rerank_provider_type: null,\n              rerank_model_name: null,\n              rerank_api_url: null,\n            });\n            resetForm();\n          };\n\n          return (\n            <div className=\"p-2 rounded-lg max-w-4xl mx-auto\">\n              <p className=\"mb-4\">\n                Select from cloud, self-hosted models, or use no reranking\n                model.\n              </p>\n              <div className=\"text-sm mr-auto mb-6 divide-x-2 flex\">\n                {originalRerankingDetails.rerank_model_name && (\n                  <button\n                    onClick={() => setModelTab(null)}\n                    className={`mx-2 p-2 font-bold  ${\n                      !modelTab\n                        ? \"rounded bg-background-900 text-text-100 underline\"\n                        : \" hover:underline bg-background-100\"\n                    }`}\n                  >\n                    Current\n                  </button>\n                )}\n                <div\n                  className={`${\n                    originalRerankingDetails.rerank_model_name && \"px-2 ml-2\"\n                  }`}\n                >\n                  <button\n                    onClick={() => setModelTab(\"cloud\")}\n                    className={`mr-2 p-2 font-bold  ${\n                      modelTab == \"cloud\"\n                        ? \"rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline\"\n                        : \" hover:underline bg-neutral-100 dark:bg-neutral-900\"\n                    }`}\n                  >\n                    Cloud-based\n                  </button>\n                </div>\n\n                <div className=\"px-2\">\n                  <button\n                    onClick={() => setModelTab(\"open\")}\n                    className={` mx-2 p-2 font-bold  ${\n                      modelTab == \"open\"\n                        ? \"rounded bg-neutral-900 dark:bg-neutral-950 text-neutral-100 dark:text-neutral-300 underline\"\n                        : \"hover:underline bg-neutral-100 dark:bg-neutral-900\"\n                    }`}\n                  >\n                    Self-hosted\n                  </button>\n                </div>\n                {values.rerank_model_name && (\n                  <div className=\"px-2\">\n                    <button\n                      onClick={() => resetRerankingValues()}\n                      className={`mx-2 p-2 font-bold rounded bg-neutral-100 dark:bg-neutral-900 text-neutral-900 dark:text-neutral-100 hover:underline`}\n                    >\n                      Remove Reranking\n                    </button>\n                  </div>\n                )}\n              </div>\n\n              <Form>\n                <div className=\"grid grid-cols-1 md:grid-cols-2 gap-4\">\n                  {(modelTab\n                    ? rerankingModels.filter(\n                        (model) => model.cloud == (modelTab == \"cloud\")\n                      )\n                    : rerankingModels.filter(\n                        (modelCard) =>\n                          (modelCard.modelName ==\n                            originalRerankingDetails.rerank_model_name &&\n                            modelCard.rerank_provider_type ==\n                              originalRerankingDetails.rerank_provider_type) ||\n                          (modelCard.rerank_provider_type ==\n                            RerankerProvider.LITELLM &&\n                            originalRerankingDetails.rerank_provider_type ==\n                              RerankerProvider.LITELLM)\n                      )\n                  ).map((card) => {\n                    const isSelected =\n                      values.rerank_provider_type ===\n                        card.rerank_provider_type &&\n                      (card.modelName == null ||\n                        values.rerank_model_name === card.modelName);\n\n                    return (\n                      <div\n                        key={`${card.rerank_provider_type}-${card.modelName}`}\n                        className={`p-4 border rounded-lg cursor-pointer transition-all duration-200 ${\n                          isSelected\n                            ? \"border-blue-800 bg-blue-50 dark:bg-blue-950 dark:border-blue-700 shadow-md\"\n                            : \"border-background-200 hover:border-blue-300 hover:shadow-sm dark:border-neutral-700 dark:hover:border-blue-300\"\n                        }`}\n                        onClick={() => {\n                          if (\n                            card.rerank_provider_type == RerankerProvider.COHERE\n                          ) {\n                            setIsApiKeyModalOpen(true);\n                          } else if (\n                            card.rerank_provider_type ==\n                            RerankerProvider.BEDROCK\n                          ) {\n                            setIsApiKeyModalOpen(true);\n                          } else if (\n                            card.rerank_provider_type ==\n                            RerankerProvider.LITELLM\n                          ) {\n                            setShowLiteLLMConfigurationModal(true);\n                          } else if (\n                            !card.rerank_provider_type &&\n                            !gpuEnabled\n                          ) {\n                            setShowGpuWarningModalModel(card);\n                          }\n\n                          if (!isSelected) {\n                            setRerankingDetails({\n                              ...values,\n                              rerank_provider_type: card.rerank_provider_type!,\n                              rerank_model_name: card.modelName || null,\n                              rerank_api_key: null,\n                              rerank_api_url: null,\n                            });\n                            setFieldValue(\n                              \"rerank_provider_type\",\n                              card.rerank_provider_type\n                            );\n                            setFieldValue(\"rerank_model_name\", card.modelName);\n                          }\n                        }}\n                      >\n                        <div className=\"flex items-center justify-between mb-3\">\n                          <div className=\"flex items-center\">\n                            {card.rerank_provider_type ===\n                            RerankerProvider.LITELLM ? (\n                              <LiteLLMIcon size={24} className=\"mr-2\" />\n                            ) : card.rerank_provider_type ===\n                              RerankerProvider.COHERE ? (\n                              <CohereIcon size={24} className=\"mr-2\" />\n                            ) : card.rerank_provider_type ===\n                              RerankerProvider.BEDROCK ? (\n                              <AmazonIcon size={24} className=\"mr-2\" />\n                            ) : (\n                              <MixedBreadIcon size={24} className=\"mr-2\" />\n                            )}\n                            <h3 className=\"font-bold text-lg\">\n                              {card.displayName}\n                            </h3>\n                          </div>\n                          {card.link && (\n                            <a\n                              href={card.link}\n                              target=\"_blank\"\n                              rel=\"noopener noreferrer\"\n                              onClick={(e) => e.stopPropagation()}\n                              className=\"text-blue-500 hover:text-blue-700 transition-colors duration-200\"\n                            >\n                              <FiExternalLink size={18} />\n                            </a>\n                          )}\n                        </div>\n                        <p className=\"text-sm text-text-600 mb-2\">\n                          {card.description}\n                        </p>\n                        <div className=\"text-xs text-text-500\">\n                          {card.cloud ? \"Cloud-based\" : \"Self-hosted\"}\n                        </div>\n                      </div>\n                    );\n                  })}\n                </div>\n\n                {showGpuWarningModalModel && (\n                  <Modal\n                    open\n                    onOpenChange={() => setShowGpuWarningModalModel(null)}\n                  >\n                    <Modal.Content width=\"sm\" height=\"sm\">\n                      <Modal.Header\n                        icon={SvgAlertTriangle}\n                        title=\"GPU Not Enabled\"\n                        onClose={() => setShowGpuWarningModalModel(null)}\n                      />\n                      <Modal.Body>\n                        <p className=\"text-error font-semibold\">Warning:</p>\n                        <p>\n                          Local reranking models require significant\n                          computational resources and may perform slowly without\n                          GPU acceleration. Consider switching to GPU-enabled\n                          infrastructure or using a cloud-based alternative for\n                          better performance.\n                        </p>\n                      </Modal.Body>\n                      <Modal.Footer>\n                        <Button\n                          onClick={() => setShowGpuWarningModalModel(null)}\n                        >\n                          Understood\n                        </Button>\n                      </Modal.Footer>\n                    </Modal.Content>\n                  </Modal>\n                )}\n\n                {showLiteLLMConfigurationModal && (\n                  <Modal\n                    open\n                    onOpenChange={() => {\n                      resetForm();\n                      setShowLiteLLMConfigurationModal(false);\n                    }}\n                  >\n                    <Modal.Content>\n                      <Modal.Header\n                        icon={SvgKey}\n                        title=\"API Key Configuration\"\n                        onClose={() => {\n                          resetForm();\n                          setShowLiteLLMConfigurationModal(false);\n                        }}\n                      />\n                      <Modal.Body>\n                        <div className=\"w-full flex flex-col gap-y-4\">\n                          <TextFormField\n                            subtext=\"Set the URL at which your LiteLLM Proxy is hosted\"\n                            placeholder={values.rerank_api_url || undefined}\n                            onChange={(\n                              e: React.ChangeEvent<HTMLInputElement>\n                            ) => {\n                              const value = e.target.value;\n                              setRerankingDetails({\n                                ...values,\n                                rerank_api_url: value,\n                              });\n                              setFieldValue(\"rerank_api_url\", value);\n                            }}\n                            type=\"text\"\n                            label=\"LiteLLM Proxy  URL\"\n                            name=\"rerank_api_url\"\n                          />\n\n                          <TextFormField\n                            subtext=\"Set the key to access your LiteLLM Proxy\"\n                            placeholder={\n                              values.rerank_api_key\n                                ? \"*\".repeat(values.rerank_api_key.length)\n                                : undefined\n                            }\n                            onChange={(\n                              e: React.ChangeEvent<HTMLInputElement>\n                            ) => {\n                              const value = e.target.value;\n                              setRerankingDetails({\n                                ...values,\n                                rerank_api_key: value,\n                              });\n                              setFieldValue(\"rerank_api_key\", value);\n                            }}\n                            type=\"password\"\n                            label=\"LiteLLM Proxy Key\"\n                            name=\"rerank_api_key\"\n                            optional\n                          />\n\n                          <TextFormField\n                            subtext=\"Set the model name to use for LiteLLM Proxy\"\n                            placeholder={\n                              values.rerank_model_name\n                                ? \"*\".repeat(values.rerank_model_name.length)\n                                : undefined\n                            }\n                            onChange={(\n                              e: React.ChangeEvent<HTMLInputElement>\n                            ) => {\n                              const value = e.target.value;\n                              setRerankingDetails({\n                                ...values,\n                                rerank_model_name: value,\n                              });\n                              setFieldValue(\"rerank_model_name\", value);\n                            }}\n                            label=\"LiteLLM Model Name\"\n                            name=\"rerank_model_name\"\n                            optional\n                          />\n                        </div>\n                      </Modal.Body>\n                      <Modal.Footer>\n                        <Button\n                          onClick={() => {\n                            setShowLiteLLMConfigurationModal(false);\n                          }}\n                        >\n                          Update\n                        </Button>\n                      </Modal.Footer>\n                    </Modal.Content>\n                  </Modal>\n                )}\n\n                {isApiKeyModalOpen && (\n                  <Modal\n                    open\n                    onOpenChange={() => {\n                      Object.keys(originalRerankingDetails).forEach((key) => {\n                        setFieldValue(\n                          key,\n                          originalRerankingDetails[\n                            key as keyof RerankingDetails\n                          ]\n                        );\n                      });\n\n                      setIsApiKeyModalOpen(false);\n                    }}\n                  >\n                    <Modal.Content>\n                      <Modal.Header\n                        icon={SvgKey}\n                        title=\"API Key Configuration\"\n                        onClose={() => {\n                          Object.keys(originalRerankingDetails).forEach(\n                            (key) => {\n                              setFieldValue(\n                                key,\n                                originalRerankingDetails[\n                                  key as keyof RerankingDetails\n                                ]\n                              );\n                            }\n                          );\n\n                          setIsApiKeyModalOpen(false);\n                        }}\n                      />\n                      <Modal.Body>\n                        <div className=\"w-full\">\n                          <TextFormField\n                            placeholder={\n                              values.rerank_api_key\n                                ? \"*\".repeat(values.rerank_api_key.length)\n                                : values.rerank_provider_type ===\n                                    RerankerProvider.BEDROCK\n                                  ? \"aws_ACCESSKEY_SECRETKEY_REGION\"\n                                  : \"Enter your API key\"\n                            }\n                            onChange={(\n                              e: React.ChangeEvent<HTMLInputElement>\n                            ) => {\n                              const value = e.target.value;\n                              setRerankingDetails({\n                                ...values,\n                                rerank_api_key: value,\n                              });\n                              setFieldValue(\"rerank_api_key\", value);\n                            }}\n                            type=\"password\"\n                            label={\n                              values.rerank_provider_type ===\n                              RerankerProvider.BEDROCK\n                                ? \"AWS Credentials in format: aws_ACCESSKEY_SECRETKEY_REGION\"\n                                : \"Cohere API Key\"\n                            }\n                            name=\"rerank_api_key\"\n                          />\n                        </div>\n                      </Modal.Body>\n                      <Modal.Footer>\n                        <Button onClick={() => setIsApiKeyModalOpen(false)}>\n                          Update\n                        </Button>\n                      </Modal.Footer>\n                    </Modal.Content>\n                  </Modal>\n                )}\n              </Form>\n            </div>\n          );\n        }}\n      </Formik>\n    );\n  }\n);\nRerankingDetailsForm.displayName = \"RerankingDetailsForm\";\n\nexport default RerankingDetailsForm;\n"
  },
  {
    "path": "web/src/app/admin/embeddings/interfaces.ts",
    "content": "import {\n  AVAILABLE_CLOUD_PROVIDERS,\n  AVAILABLE_MODELS,\n  CloudEmbeddingModel,\n  EmbeddingProvider,\n  HostedEmbeddingModel,\n} from \"@/components/embedding/interfaces\";\n\n// This is a slightly differnte interface than used in the backend\n// but is always used in conjunction with `AdvancedSearchConfiguration`\nexport interface RerankingDetails {\n  rerank_model_name: string | null;\n  rerank_provider_type: RerankerProvider | null;\n  rerank_api_key: string | null;\n  rerank_api_url: string | null;\n}\n\nexport enum SwitchoverType {\n  REINDEX = \"reindex\",\n  ACTIVE_ONLY = \"active_only\",\n  INSTANT = \"instant\",\n}\n\nexport enum RerankerProvider {\n  COHERE = \"cohere\",\n  LITELLM = \"litellm\",\n  BEDROCK = \"bedrock\",\n}\n\nexport enum EmbeddingPrecision {\n  FLOAT = \"float\",\n  BFLOAT16 = \"bfloat16\",\n}\n\nexport interface LLMContextualCost {\n  provider: string;\n  model_name: string;\n  cost: number;\n}\n\nexport interface AdvancedSearchConfiguration {\n  index_name: string | null;\n  multipass_indexing: boolean;\n  enable_contextual_rag: boolean;\n  contextual_rag_llm_name: string | null;\n  contextual_rag_llm_provider: string | null;\n  multilingual_expansion: string[];\n  disable_rerank_for_streaming: boolean;\n  api_url: string | null;\n  num_rerank: number;\n  embedding_precision: EmbeddingPrecision;\n  reduced_dimension: number | null;\n}\n\nexport interface SavedSearchSettings\n  extends RerankingDetails,\n    AdvancedSearchConfiguration {\n  provider_type: EmbeddingProvider | null;\n  switchover_type?: SwitchoverType;\n}\n\nexport interface RerankingModel {\n  rerank_provider_type: RerankerProvider | null;\n  modelName?: string;\n  displayName: string;\n  description: string;\n  link: string;\n  cloud: boolean;\n}\n\nexport const rerankingModels: RerankingModel[] = [\n  {\n    rerank_provider_type: RerankerProvider.LITELLM,\n    cloud: true,\n    displayName: \"LiteLLM\",\n    description: \"Host your own reranker or router with LiteLLM proxy\",\n    link: \"https://docs.litellm.ai/docs/simple_proxy\",\n  },\n  {\n    rerank_provider_type: null,\n    cloud: false,\n    modelName: \"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n    displayName: \"MixedBread XSmall\",\n    description: \"Fastest, smallest model for basic reranking tasks.\",\n    link: \"https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1\",\n  },\n  {\n    rerank_provider_type: null,\n    cloud: false,\n    modelName: \"mixedbread-ai/mxbai-rerank-base-v1\",\n    displayName: \"MixedBread Base\",\n    description: \"Balanced performance for general reranking needs.\",\n    link: \"https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1\",\n  },\n  {\n    rerank_provider_type: null,\n    cloud: false,\n    modelName: \"mixedbread-ai/mxbai-rerank-large-v1\",\n    displayName: \"MixedBread Large\",\n    description: \"Most powerful model for complex reranking tasks.\",\n    link: \"https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1\",\n  },\n  {\n    cloud: true,\n    rerank_provider_type: RerankerProvider.COHERE,\n    modelName: \"rerank-english-v3.0\",\n    displayName: \"Cohere English\",\n    description: \"High-performance English-focused reranking model.\",\n    link: \"https://docs.cohere.com/v2/reference/rerank\",\n  },\n  {\n    cloud: true,\n    rerank_provider_type: RerankerProvider.COHERE,\n    modelName: \"rerank-multilingual-v3.0\",\n    displayName: \"Cohere Multilingual\",\n    description: \"Powerful multilingual reranking model.\",\n    link: \"https://docs.cohere.com/v2/reference/rerank\",\n  },\n  {\n    cloud: true,\n    rerank_provider_type: RerankerProvider.BEDROCK,\n    modelName: \"cohere.rerank-v3-5:0\",\n    displayName: \"Cohere Rerank 3.5\",\n    description:\n      \"Powerful multilingual reranking model invoked through AWS Bedrock.\",\n    link: \"https://aws.amazon.com/blogs/machine-learning/cohere-rerank-3-5-is-now-available-in-amazon-bedrock-through-rerank-api\",\n  },\n];\n\nexport const getCurrentModelCopy = (\n  currentModelName: string\n): CloudEmbeddingModel | HostedEmbeddingModel | null => {\n  const AVAILABLE_CLOUD_PROVIDERS_FLATTENED = AVAILABLE_CLOUD_PROVIDERS.flatMap(\n    (provider) =>\n      provider.embedding_models.map((model) => ({\n        ...model,\n        provider_type: provider.provider_type,\n        model_name: model.model_name,\n      }))\n  );\n\n  return (\n    AVAILABLE_MODELS.find((model) => model.model_name === currentModelName) ||\n    AVAILABLE_CLOUD_PROVIDERS_FLATTENED.find(\n      (model) => model.model_name === currentModelName\n    ) ||\n    null\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { CloudEmbeddingModel } from \"../../../../components/embedding/interfaces\";\nimport { SvgCheck } from \"@opal/icons\";\n\nexport interface AlreadyPickedModalProps {\n  model: CloudEmbeddingModel;\n  onClose: () => void;\n}\n\nexport default function AlreadyPickedModal({\n  model,\n  onClose,\n}: AlreadyPickedModalProps) {\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgCheck}\n          title={`${model.model_name} already chosen`}\n          description=\"You can select a different one if you want!\"\n          onClose={onClose}\n        />\n        <Modal.Footer>\n          <Button onClick={onClose}>Close</Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx",
    "content": "\"use client\";\n\nimport React, { useRef, useState } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Callout } from \"@/components/ui/callout\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Label } from \"@/components/Field\";\nimport {\n  CloudEmbeddingProvider,\n  getFormattedProviderName,\n} from \"@/components/embedding/interfaces\";\nimport { EMBEDDING_PROVIDERS_ADMIN_URL } from \"@/lib/llmConfig/constants\";\nimport { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { testEmbedding } from \"@/app/admin/embeddings/pages/utils\";\nimport { SvgSettings } from \"@opal/icons\";\n\nexport interface ChangeCredentialsModalProps {\n  provider: CloudEmbeddingProvider;\n  onConfirm: () => void;\n  onCancel: () => void;\n  onDeleted: () => void;\n  useFileUpload: boolean;\n  isProxy?: boolean;\n  isAzure?: boolean;\n}\n\nexport default function ChangeCredentialsModal({\n  provider,\n  onConfirm,\n  onCancel,\n  onDeleted,\n  useFileUpload,\n  isProxy = false,\n  isAzure = false,\n}: ChangeCredentialsModalProps) {\n  const [apiKey, setApiKey] = useState(\"\");\n  const [apiUrl, setApiUrl] = useState(\"\");\n  const [modelName, setModelName] = useState(\"\");\n  const [testError, setTestError] = useState<string>(\"\");\n  const [fileName, setFileName] = useState<string>(\"\");\n  const fileInputRef = useRef<HTMLInputElement>(null);\n  const [deletionError, setDeletionError] = useState<string>(\"\");\n\n  const clearFileInput = () => {\n    setFileName(\"\");\n    if (fileInputRef.current) {\n      fileInputRef.current.value = \"\";\n    }\n  };\n\n  const handleFileUpload = async (\n    event: React.ChangeEvent<HTMLInputElement>\n  ) => {\n    const file = event.target.files?.[0];\n    setFileName(\"\");\n\n    if (file) {\n      setFileName(file.name);\n      try {\n        setDeletionError(\"\");\n        const fileContent = await file.text();\n        let jsonContent;\n        try {\n          jsonContent = JSON.parse(fileContent);\n          setApiKey(JSON.stringify(jsonContent));\n        } catch (parseError) {\n          throw new Error(\n            \"Failed to parse JSON file. Please ensure it's a valid JSON.\"\n          );\n        }\n      } catch (error) {\n        setTestError(\n          error instanceof Error\n            ? error.message\n            : \"An unknown error occurred while processing the file.\"\n        );\n        setApiKey(\"\");\n        clearFileInput();\n      }\n    }\n  };\n\n  const handleDelete = async () => {\n    setDeletionError(\"\");\n\n    try {\n      const response = await fetch(\n        `${EMBEDDING_PROVIDERS_ADMIN_URL}/${provider.provider_type.toLowerCase()}`,\n        {\n          method: \"DELETE\",\n        }\n      );\n\n      if (!response.ok) {\n        const errorData = await response.json();\n        setDeletionError(errorData.detail);\n        return;\n      }\n\n      mutate(SWR_KEYS.adminLlmProviders);\n      onDeleted();\n    } catch (error) {\n      setDeletionError(\n        error instanceof Error ? error.message : \"An unknown error occurred\"\n      );\n    }\n  };\n\n  const handleSubmit = async () => {\n    setTestError(\"\");\n    const normalizedProviderType = provider.provider_type\n      .toLowerCase()\n      .split(\" \")[0];\n\n    if (!normalizedProviderType) {\n      setTestError(\"Provider type is invalid or missing.\");\n      return;\n    }\n\n    try {\n      const testResponse = await testEmbedding({\n        provider_type: normalizedProviderType,\n        modelName,\n        apiKey,\n        apiUrl,\n        apiVersion: null,\n        deploymentName: null,\n      });\n\n      if (!testResponse.ok) {\n        const errorMsg = (await testResponse.json()).detail;\n        throw new Error(errorMsg);\n      }\n\n      const updateResponse = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, {\n        method: \"PUT\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          provider_type: normalizedProviderType,\n          api_key: apiKey,\n          api_url: apiUrl,\n          is_default_provider: false,\n          is_configured: true,\n        }),\n      });\n\n      if (!updateResponse.ok) {\n        const errorData = await updateResponse.json();\n        throw new Error(\n          errorData.detail ||\n            `Failed to update provider- check your ${\n              isProxy ? \"API URL\" : \"API key\"\n            }`\n        );\n      }\n\n      // Refresh cached provider details so the rest of the form sees the new key without forcing a re-index\n      await mutate(EMBEDDING_PROVIDERS_ADMIN_URL);\n\n      onConfirm();\n    } catch (error) {\n      setTestError(\n        error instanceof Error ? error.message : \"An unknown error occurred\"\n      );\n    }\n  };\n  return (\n    <Modal open onOpenChange={onCancel}>\n      <Modal.Content>\n        <Modal.Header\n          icon={SvgSettings}\n          title={`Modify your ${getFormattedProviderName(\n            provider.provider_type\n          )} ${isProxy ? \"Configuration\" : \"key\"}`}\n          onClose={onCancel}\n        />\n        <Modal.Body>\n          {!isAzure && (\n            <>\n              <Text as=\"p\">\n                You can modify your configuration by providing a new API key\n                {isProxy ? \" or API URL.\" : \".\"}\n              </Text>\n\n              <div className=\"flex flex-col gap-2\">\n                <Label className=\"mt-2\">API Key</Label>\n                {useFileUpload ? (\n                  <>\n                    <Label className=\"mt-2\">Upload JSON File</Label>\n                    <input\n                      ref={fileInputRef}\n                      type=\"file\"\n                      accept=\".json\"\n                      onChange={handleFileUpload}\n                      className=\"text-lg w-full p-1\"\n                    />\n                    {fileName && <p>Uploaded file: {fileName}</p>}\n                  </>\n                ) : (\n                  <>\n                    <input\n                      type=\"password\"\n                      className=\"border border-border rounded w-full py-2 px-3 bg-background-emphasis\"\n                      value={apiKey}\n                      onChange={(e: any) => setApiKey(e.target.value)}\n                      placeholder=\"Paste your API key here\"\n                    />\n                  </>\n                )}\n\n                {isProxy && (\n                  <>\n                    <Label className=\"mt-2\">API URL</Label>\n\n                    <input\n                      className={`\n                          border\n                          border-border\n                          rounded\n                          w-full\n                          py-2\n                          px-3\n                          bg-background-emphasis\n                      `}\n                      value={apiUrl}\n                      onChange={(e: any) => setApiUrl(e.target.value)}\n                      placeholder=\"Paste your API URL here\"\n                    />\n\n                    {deletionError && (\n                      <Callout type=\"danger\" title=\"Error\">\n                        {deletionError}\n                      </Callout>\n                    )}\n\n                    <div>\n                      <Label className=\"mt-2\">Test Model</Label>\n                      <Text as=\"p\">\n                        Since you are using a liteLLM proxy, we&apos;ll need a\n                        model name to test the connection with.\n                      </Text>\n                    </div>\n                    <input\n                      className={`\n                       border\n                       border-border\n                       rounded\n                       w-full\n                       py-2\n                       px-3\n                       bg-background-emphasis\n                   `}\n                      value={modelName}\n                      onChange={(e: any) => setModelName(e.target.value)}\n                      placeholder=\"Paste your model name here\"\n                    />\n                  </>\n                )}\n\n                {testError && (\n                  <Callout type=\"danger\" title=\"Error\">\n                    {testError}\n                  </Callout>\n                )}\n\n                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                <Button\n                  className=\"mr-auto mt-4\"\n                  onClick={() => handleSubmit()}\n                  disabled={!apiKey}\n                >\n                  Update Configuration\n                </Button>\n\n                <Separator />\n              </div>\n            </>\n          )}\n\n          <Text as=\"p\" className=\"mt-4 font-bold\">\n            You can delete your configuration.\n          </Text>\n          <Text as=\"p\">\n            This is only possible if you have already switched to a different\n            embedding type!\n          </Text>\n\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <Button className=\"mr-auto\" onClick={handleDelete} danger>\n            Delete Configuration\n          </Button>\n          {deletionError && (\n            <Callout type=\"danger\" title=\"Error\">\n              {deletionError}\n            </Callout>\n          )}\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { Callout } from \"@/components/ui/callout\";\nimport {\n  CloudEmbeddingProvider,\n  getFormattedProviderName,\n} from \"../../../../components/embedding/interfaces\";\nimport { SvgTrash } from \"@opal/icons\";\n\nexport interface DeleteCredentialsModalProps {\n  modelProvider: CloudEmbeddingProvider;\n  onConfirm: () => void;\n  onCancel: () => void;\n}\n\nexport default function DeleteCredentialsModal({\n  modelProvider,\n  onConfirm,\n  onCancel,\n}: DeleteCredentialsModalProps) {\n  return (\n    <Modal open onOpenChange={onCancel}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgTrash}\n          title={`Delete ${getFormattedProviderName(\n            modelProvider.provider_type\n          )} Credentials?`}\n          onClose={onCancel}\n        />\n        <Modal.Body>\n          <Text as=\"p\">\n            You&apos;re about to delete your{\" \"}\n            {getFormattedProviderName(modelProvider.provider_type)} credentials.\n            Are you sure?\n          </Text>\n          <Callout type=\"danger\" title=\"Point of No Return\" />\n        </Modal.Body>\n        <Modal.Footer>\n          <Button prominence=\"secondary\" onClick={onCancel}>\n            Keep Credentials\n          </Button>\n          <Button variant=\"danger\" onClick={onConfirm}>\n            Delete Credentials\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/InstantSwitchConfirmModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgAlertTriangle } from \"@opal/icons\";\nexport interface InstantSwitchConfirmModalProps {\n  onClose: () => void;\n  onConfirm: () => void;\n}\n\nexport default function InstantSwitchConfirmModal({\n  onClose,\n  onConfirm,\n}: InstantSwitchConfirmModalProps) {\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgAlertTriangle}\n          title=\"Are you sure you want to do an instant switch?\"\n          onClose={onClose}\n        />\n        <Modal.Body>\n          <Text as=\"p\">\n            Instant switching will immediately change the embedding model\n            without re-indexing. Searches will be over a partial set of\n            documents (starting with 0 documents) until re-indexing is complete.\n          </Text>\n          <Text as=\"p\">\n            <strong>This is not reversible.</strong>\n          </Text>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button onClick={onConfirm}>Confirm</Button>\n          <Button prominence=\"secondary\" onClick={onClose}>\n            Cancel\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { Button } from \"@opal/components\";\nimport { HostedEmbeddingModel } from \"@/components/embedding/interfaces\";\nimport { SvgServer } from \"@opal/icons\";\n\nexport interface ModelSelectionConfirmationModalProps {\n  selectedModel: HostedEmbeddingModel;\n  isCustom: boolean;\n  onConfirm: () => void;\n  onCancel: () => void;\n}\n\nexport default function ModelSelectionConfirmationModal({\n  selectedModel,\n  isCustom,\n  onConfirm,\n  onCancel,\n}: ModelSelectionConfirmationModalProps) {\n  return (\n    <Modal open onOpenChange={onCancel}>\n      <Modal.Content width=\"sm\" height=\"lg\">\n        <Modal.Header\n          icon={SvgServer}\n          title=\"Update Embedding Model\"\n          onClose={onCancel}\n        />\n        <Modal.Body>\n          <Text as=\"p\">\n            You have selected: <strong>{selectedModel.model_name}</strong>. Are\n            you sure you want to update to this new embedding model?\n          </Text>\n          <Text as=\"p\">\n            We will re-index all your documents in the background so you will be\n            able to continue to use Onyx as normal with the old model in the\n            meantime. Depending on how many documents you have indexed, this may\n            take a while.\n          </Text>\n          <Text as=\"p\">\n            <i>NOTE:</i> this re-indexing process will consume more resources\n            than normal. If you are self-hosting, we recommend that you allocate\n            at least 16GB of RAM to Onyx during this process.\n          </Text>\n\n          {isCustom && (\n            <Callout type=\"warning\" title=\"IMPORTANT\">\n              We&apos;ve detected that this is a custom-specified embedding\n              model. Since we have to download the model files before verifying\n              the configuration&apos;s correctness, we won&apos;t be able to let\n              you know if the configuration is valid until{\" \"}\n              <strong>after</strong> we start re-indexing your documents. If\n              there is an issue, it will show up on this page as an indexing\n              error on this page after clicking Confirm.\n            </Callout>\n          )}\n        </Modal.Body>\n        <Modal.Footer>\n          <Button onClick={onConfirm}>Confirm</Button>\n          <Button prominence=\"secondary\" onClick={onCancel}>\n            Cancel\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx",
    "content": "import React, { useRef, useState } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { Button } from \"@opal/components\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { Label, TextFormField } from \"@/components/Field\";\nimport {\n  CloudEmbeddingProvider,\n  EmbeddingProvider,\n  getFormattedProviderName,\n} from \"@/components/embedding/interfaces\";\nimport { EMBEDDING_PROVIDERS_ADMIN_URL } from \"@/lib/llmConfig/constants\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { SvgSettings } from \"@opal/icons\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nexport interface ProviderCreationModalProps {\n  updateCurrentModel: (\n    newModel: string,\n    provider_type: EmbeddingProvider\n  ) => void;\n  selectedProvider: CloudEmbeddingProvider;\n  onConfirm: () => void;\n  onCancel: () => void;\n  existingProvider?: CloudEmbeddingProvider;\n  isProxy?: boolean;\n  isAzure?: boolean;\n}\n\nexport default function ProviderCreationModal({\n  selectedProvider,\n  onConfirm,\n  onCancel,\n  existingProvider,\n  isProxy,\n  isAzure,\n  updateCurrentModel,\n}: ProviderCreationModalProps) {\n  const useFileUpload =\n    selectedProvider.provider_type == EmbeddingProvider.GOOGLE;\n\n  const [errorMsg, setErrorMsg] = useState<string>(\"\");\n  const [fileName, setFileName] = useState<string>(\"\");\n\n  const initialValues = {\n    provider_type:\n      existingProvider?.provider_type || selectedProvider.provider_type,\n    api_key: existingProvider?.api_key || \"\",\n    api_url: existingProvider?.api_url || \"\",\n    custom_config: existingProvider?.custom_config\n      ? Object.entries(existingProvider.custom_config)\n      : [],\n    model_id: 0,\n    model_name: null,\n  };\n\n  const validationSchema = Yup.object({\n    provider_type: Yup.string().required(\"Provider type is required\"),\n    api_key:\n      isProxy || isAzure\n        ? Yup.string()\n        : useFileUpload\n          ? Yup.string()\n          : Yup.string().required(\"API Key is required\"),\n    model_name: isProxy\n      ? Yup.string().required(\"Model name is required\")\n      : Yup.string().nullable(),\n    api_url:\n      isProxy || isAzure\n        ? Yup.string().required(\"API URL is required\")\n        : Yup.string(),\n    deployment_name: isAzure\n      ? Yup.string().required(\"Deployment name is required\")\n      : Yup.string(),\n    api_version: isAzure\n      ? Yup.string().required(\"API Version is required\")\n      : Yup.string(),\n    custom_config: Yup.array().of(Yup.array().of(Yup.string()).length(2)),\n  });\n\n  const fileInputRef = useRef<HTMLInputElement>(null);\n\n  const handleFileUpload = async (\n    event: React.ChangeEvent<HTMLInputElement>,\n    setFieldValue: (field: string, value: any) => void\n  ) => {\n    const file = event.target.files?.[0];\n    setFileName(\"\");\n    if (file) {\n      setFileName(file.name);\n      try {\n        const fileContent = await file.text();\n        let jsonContent;\n        try {\n          jsonContent = JSON.parse(fileContent);\n        } catch (parseError) {\n          throw new Error(\n            \"Failed to parse JSON file. Please ensure it's a valid JSON.\"\n          );\n        }\n        setFieldValue(\"api_key\", JSON.stringify(jsonContent));\n      } catch (error) {\n        setFieldValue(\"api_key\", \"\");\n      }\n    }\n  };\n\n  const handleSubmit = async (\n    values: any,\n    { setSubmitting }: { setSubmitting: (isSubmitting: boolean) => void }\n  ) => {\n    setErrorMsg(\"\");\n    try {\n      const customConfig = Object.fromEntries(values.custom_config);\n      const providerType = values.provider_type.toLowerCase().split(\" \")[0];\n      const isOpenAI = providerType === \"openai\";\n\n      const testModelName =\n        isOpenAI || isAzure ? \"text-embedding-3-small\" : values.model_name;\n\n      const testEmbeddingPayload = {\n        provider_type: providerType,\n        api_key: values.api_key,\n        api_url: values.api_url,\n        model_name: testModelName,\n        api_version: values.api_version,\n        deployment_name: values.deployment_name,\n      };\n\n      const initialResponse = await fetch(\n        \"/api/admin/embedding/test-embedding\",\n        {\n          method: \"POST\",\n          headers: { \"Content-Type\": \"application/json\" },\n          body: JSON.stringify(testEmbeddingPayload),\n        }\n      );\n\n      if (!initialResponse.ok) {\n        const errorMsg = (await initialResponse.json()).detail;\n        setErrorMsg(errorMsg);\n        setSubmitting(false);\n        return;\n      }\n\n      const response = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, {\n        method: \"PUT\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          ...values,\n          api_version: values.api_version,\n          deployment_name: values.deployment_name,\n          provider_type: values.provider_type.toLowerCase().split(\" \")[0],\n          custom_config: customConfig,\n          is_default_provider: false,\n          is_configured: true,\n        }),\n      });\n\n      if (isAzure) {\n        updateCurrentModel(values.model_name, EmbeddingProvider.AZURE);\n      }\n\n      if (!response.ok) {\n        const errorData = await response.json();\n        throw new Error(\n          errorData.detail || \"Failed to update provider- check your API key\"\n        );\n      }\n\n      onConfirm();\n    } catch (error: unknown) {\n      if (error instanceof Error) {\n        setErrorMsg(error.message);\n      } else {\n        setErrorMsg(\"An unknown error occurred\");\n      }\n    } finally {\n      setSubmitting(false);\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={onCancel}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgSettings}\n          title={`Configure ${getFormattedProviderName(\n            selectedProvider.provider_type\n          )}`}\n          onClose={onCancel}\n        />\n        <Modal.Body>\n          <Formik\n            initialValues={initialValues}\n            validationSchema={validationSchema}\n            onSubmit={handleSubmit}\n          >\n            {({ isSubmitting, handleSubmit, setFieldValue }) => (\n              <Form onSubmit={handleSubmit} className=\"space-y-4\">\n                <Text as=\"p\">\n                  You are setting the credentials for this provider. To access\n                  this information, follow the instructions{\" \"}\n                  <a\n                    className=\"cursor-pointer underline\"\n                    target=\"_blank\"\n                    href={selectedProvider.docsLink}\n                    rel=\"noreferrer\"\n                  >\n                    here\n                  </a>{\" \"}\n                  and gather your{\" \"}\n                  <a\n                    className=\"cursor-pointer underline\"\n                    target=\"_blank\"\n                    href={selectedProvider.apiLink}\n                    rel=\"noreferrer\"\n                  >\n                    {isProxy || isAzure ? \"API URL\" : \"API KEY\"}\n                  </a>\n                </Text>\n\n                <div className=\"flex w-full flex-col gap-y-6\">\n                  {(isProxy || isAzure) && (\n                    <TextFormField\n                      name=\"api_url\"\n                      label=\"API URL\"\n                      placeholder=\"API URL\"\n                      type=\"text\"\n                    />\n                  )}\n\n                  {isProxy && (\n                    <TextFormField\n                      name=\"model_name\"\n                      label={`Model Name ${isProxy ? \"(for testing)\" : \"\"}`}\n                      placeholder=\"Model Name\"\n                      type=\"text\"\n                    />\n                  )}\n\n                  {isAzure && (\n                    <TextFormField\n                      name=\"deployment_name\"\n                      label=\"Deployment Name\"\n                      placeholder=\"Deployment Name\"\n                      type=\"text\"\n                    />\n                  )}\n\n                  {isAzure && (\n                    <TextFormField\n                      name=\"api_version\"\n                      label=\"API Version\"\n                      placeholder=\"API Version\"\n                      type=\"text\"\n                    />\n                  )}\n\n                  {useFileUpload ? (\n                    <>\n                      <Label>Upload JSON File</Label>\n                      <input\n                        ref={fileInputRef}\n                        type=\"file\"\n                        accept=\".json\"\n                        onChange={(e) => handleFileUpload(e, setFieldValue)}\n                        className=\"text-lg w-full p-1\"\n                      />\n                      {fileName && <p>Uploaded file: {fileName}</p>}\n                    </>\n                  ) : (\n                    <TextFormField\n                      name=\"api_key\"\n                      label={`API Key ${\n                        isProxy ? \"(for non-local deployments)\" : \"\"\n                      }`}\n                      placeholder=\"API Key\"\n                      type=\"password\"\n                    />\n                  )}\n\n                  <a\n                    href={selectedProvider.apiLink}\n                    target=\"_blank\"\n                    className=\"underline cursor-pointer\"\n                    rel=\"noreferrer\"\n                  >\n                    Learn more here\n                  </a>\n                </div>\n\n                {errorMsg && (\n                  <Callout title=\"Error\" type=\"danger\">\n                    {errorMsg}\n                  </Callout>\n                )}\n\n                <Button\n                  disabled={isSubmitting}\n                  type=\"submit\"\n                  width=\"full\"\n                  icon={isSubmitting ? SimpleLoader : undefined}\n                >\n                  {isSubmitting\n                    ? \"Submitting\"\n                    : existingProvider\n                      ? \"Update\"\n                      : \"Create\"}\n                </Button>\n              </Form>\n            )}\n          </Formik>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/modals/SelectModelModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { CloudEmbeddingModel } from \"@/components/embedding/interfaces\";\nimport { SvgServer } from \"@opal/icons\";\n\nexport interface SelectModelModalProps {\n  model: CloudEmbeddingModel;\n  onConfirm: () => void;\n  onCancel: () => void;\n}\n\nexport default function SelectModelModal({\n  model,\n  onConfirm,\n  onCancel,\n}: SelectModelModalProps) {\n  return (\n    <Modal open onOpenChange={onCancel}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgServer}\n          title={`Select ${model.model_name}`}\n          onClose={onCancel}\n        />\n        <Modal.Body>\n          <Text as=\"p\">\n            You&apos;re selecting a new embedding model,{\" \"}\n            <strong>{model.model_name}</strong>. If you update to this model,\n            you will need to undergo a complete re-indexing. Are you sure?\n          </Text>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button onClick={onConfirm}>Confirm</Button>\n          <Button prominence=\"secondary\" onClick={onCancel}>\n            Cancel\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/page.tsx",
    "content": "\"use client\";\n\nimport { EmbeddingFormProvider } from \"@/components/context/EmbeddingContext\";\nimport EmbeddingSidebar from \"../../../sections/sidebar/UpsertEmbeddingSidebar\";\nimport EmbeddingForm from \"./pages/EmbeddingFormPage\";\n\nexport default function EmbeddingWrapper() {\n  return (\n    <EmbeddingFormProvider>\n      <div className=\"flex justify-center w-full h-full\">\n        <EmbeddingSidebar />\n        <div className=\"mt-12 w-full max-w-5xl mx-auto\">\n          <EmbeddingForm />\n        </div>\n      </div>\n    </EmbeddingFormProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx",
    "content": "import React, { forwardRef } from \"react\";\nimport { Formik, Form, FormikProps, FieldArray, Field } from \"formik\";\nimport * as Yup from \"yup\";\nimport {\n  AdvancedSearchConfiguration,\n  EmbeddingPrecision,\n  LLMContextualCost,\n} from \"../interfaces\";\nimport {\n  BooleanFormField,\n  Label,\n  SubLabel,\n  SelectorFormField,\n} from \"@/components/Field\";\nimport NumberInput from \"../../connectors/[connector]/pages/ConnectorInput/NumberInput\";\nimport { StringOrNumberOption } from \"@/components/Dropdown\";\nimport useSWR from \"swr\";\nimport { LLM_CONTEXTUAL_COST_ADMIN_URL } from \"@/lib/llmConfig/constants\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { SvgPlusCircle, SvgTrash } from \"@opal/icons\";\n// Number of tokens to show cost calculation for\nconst COST_CALCULATION_TOKENS = 1_000_000;\n\ninterface AdvancedEmbeddingFormPageProps {\n  updateAdvancedEmbeddingDetails: (\n    key: keyof AdvancedSearchConfiguration,\n    value: any\n  ) => void;\n  advancedEmbeddingDetails: AdvancedSearchConfiguration;\n  embeddingProviderType: string | null;\n  onValidationChange?: (\n    isValid: boolean,\n    errors: Record<string, string>\n  ) => void;\n}\n\n// Options for embedding precision based on EmbeddingPrecision enum\nconst embeddingPrecisionOptions: StringOrNumberOption[] = [\n  { name: EmbeddingPrecision.BFLOAT16, value: EmbeddingPrecision.BFLOAT16 },\n  { name: EmbeddingPrecision.FLOAT, value: EmbeddingPrecision.FLOAT },\n];\n\nconst AdvancedEmbeddingFormPage = forwardRef<\n  FormikProps<any>,\n  AdvancedEmbeddingFormPageProps\n>(\n  (\n    {\n      updateAdvancedEmbeddingDetails,\n      advancedEmbeddingDetails,\n      embeddingProviderType,\n      onValidationChange,\n    },\n    ref\n  ) => {\n    // Fetch contextual costs\n    const { data: contextualCosts, error: costError } = useSWR<\n      LLMContextualCost[]\n    >(LLM_CONTEXTUAL_COST_ADMIN_URL, errorHandlingFetcher);\n\n    const llmOptions: StringOrNumberOption[] = React.useMemo(\n      () =>\n        (contextualCosts || []).map((cost) => {\n          return {\n            // Use model_name as display - contextual costs don't have display_name field\n            name: cost.model_name,\n            value: cost.model_name,\n          };\n        }),\n      [contextualCosts]\n    );\n\n    // Helper function to format cost as USD\n    const formatCost = (cost: number) => {\n      return new Intl.NumberFormat(\"en-US\", {\n        style: \"currency\",\n        currency: \"USD\",\n      }).format(cost);\n    };\n\n    // Get cost info for selected model\n    const getSelectedModelCost = (modelName: string | null) => {\n      if (!contextualCosts || !modelName) return null;\n      return contextualCosts.find((cost) => cost.model_name === modelName);\n    };\n\n    // Get the current value for the selector based on the parent state\n    const getCurrentLLMValue = React.useMemo(() => {\n      if (!advancedEmbeddingDetails.contextual_rag_llm_name) return null;\n      return advancedEmbeddingDetails.contextual_rag_llm_name;\n    }, [advancedEmbeddingDetails.contextual_rag_llm_name]);\n\n    return (\n      <div className=\"py-4 rounded-lg max-w-4xl px-4 mx-auto\">\n        <Formik\n          innerRef={ref}\n          initialValues={{\n            ...advancedEmbeddingDetails,\n            contextual_rag_llm: getCurrentLLMValue,\n          }}\n          validationSchema={Yup.object().shape({\n            multilingual_expansion: Yup.array().of(Yup.string()),\n            multipass_indexing: Yup.boolean(),\n            enable_contextual_rag: Yup.boolean(),\n            contextual_rag_llm: Yup.string()\n              .nullable()\n              .test(\n                \"required-if-contextual-rag\",\n                \"LLM must be selected when Contextual RAG is enabled\",\n                function (value) {\n                  const enableContextualRag = this.parent.enable_contextual_rag;\n                  console.log(\"enableContextualRag\", enableContextualRag);\n                  console.log(\"value\", value);\n                  return !enableContextualRag || value !== null;\n                }\n              ),\n            embedding_precision: Yup.string().nullable(),\n            reduced_dimension: Yup.number()\n              .nullable()\n              .test(\n                \"positive\",\n                \"Must be larger than or equal to 256\",\n                (value) => value === null || value === undefined || value >= 256\n              )\n              .test(\n                \"openai\",\n                \"Reduced Dimensions is only supported for OpenAI embedding models\",\n                (value) => {\n                  return embeddingProviderType === \"openai\" || value === null;\n                }\n              ),\n          })}\n          onSubmit={async (_, { setSubmitting }) => {\n            setSubmitting(false);\n          }}\n          validate={(values) => {\n            // Call updateAdvancedEmbeddingDetails for each changed field\n            Object.entries(values).forEach(([key, value]) => {\n              if (key === \"contextual_rag_llm\") {\n                const selectedModel = (contextualCosts || []).find(\n                  (cost) => cost.model_name === value\n                );\n                if (selectedModel) {\n                  updateAdvancedEmbeddingDetails(\n                    \"contextual_rag_llm_provider\",\n                    selectedModel.provider\n                  );\n                  updateAdvancedEmbeddingDetails(\n                    \"contextual_rag_llm_name\",\n                    selectedModel.model_name\n                  );\n                }\n              } else {\n                updateAdvancedEmbeddingDetails(\n                  key as keyof AdvancedSearchConfiguration,\n                  value\n                );\n              }\n            });\n\n            // Run validation and report errors\n            if (onValidationChange) {\n              // We'll return an empty object here since Yup will handle the actual validation\n              // But we need to check if there are any validation errors\n              const errors: Record<string, string> = {};\n              try {\n                // Manually validate against the schema\n                Yup.object()\n                  .shape({\n                    multilingual_expansion: Yup.array().of(Yup.string()),\n                    multipass_indexing: Yup.boolean(),\n                    enable_contextual_rag: Yup.boolean(),\n                    contextual_rag_llm: Yup.string()\n                      .nullable()\n                      .test(\n                        \"required-if-contextual-rag\",\n                        \"LLM must be selected when Contextual RAG is enabled\",\n                        function (value) {\n                          const enableContextualRag =\n                            this.parent.enable_contextual_rag;\n                          return !enableContextualRag || value !== null;\n                        }\n                      ),\n                    embedding_precision: Yup.string().nullable(),\n                    reduced_dimension: Yup.number()\n                      .nullable()\n                      .test(\n                        \"positive\",\n                        \"Must be larger than or equal to 256\",\n                        (value) =>\n                          value === null || value === undefined || value >= 256\n                      )\n                      .test(\n                        \"openai\",\n                        \"Reduced Dimensions is only supported for OpenAI embedding models\",\n                        (value) => {\n                          return (\n                            embeddingProviderType === \"openai\" || value === null\n                          );\n                        }\n                      ),\n                  })\n                  .validateSync(values, { abortEarly: false });\n                onValidationChange(true, {});\n              } catch (validationError) {\n                if (validationError instanceof Yup.ValidationError) {\n                  validationError.inner.forEach((err) => {\n                    if (err.path) {\n                      errors[err.path] = err.message;\n                    }\n                  });\n                  onValidationChange(false, errors);\n                }\n              }\n            }\n\n            return {}; // Return empty object as Formik will handle the errors\n          }}\n          enableReinitialize={true}\n        >\n          {({ values }) => (\n            <Form>\n              <BooleanFormField\n                subtext=\"Enable multipass indexing for both mini and large chunks.\"\n                optional\n                label=\"Multipass Indexing\"\n                name=\"multipass_indexing\"\n              />\n              <BooleanFormField\n                subtext={\n                  NEXT_PUBLIC_CLOUD_ENABLED\n                    ? \"Contextual RAG disabled in Onyx Cloud\"\n                    : \"Enable contextual RAG for all chunk sizes.\"\n                }\n                optional\n                label=\"Contextual RAG\"\n                name=\"enable_contextual_rag\"\n                disabled={NEXT_PUBLIC_CLOUD_ENABLED}\n              />\n              <div>\n                <SelectorFormField\n                  name=\"contextual_rag_llm\"\n                  label=\"Contextual RAG LLM\"\n                  subtext={\n                    costError\n                      ? \"Error loading LLM models. Please try again later.\"\n                      : !contextualCosts\n                        ? \"Loading available LLM models...\"\n                        : values.enable_contextual_rag\n                          ? \"Select the LLM model to use for contextual RAG processing.\"\n                          : \"Enable Contextual RAG above to select an LLM model.\"\n                  }\n                  options={llmOptions}\n                  disabled={\n                    !values.enable_contextual_rag ||\n                    !contextualCosts ||\n                    !!costError\n                  }\n                />\n                {values.enable_contextual_rag &&\n                  values.contextual_rag_llm &&\n                  !costError && (\n                    <div className=\"mt-2 text-sm text-text-600\">\n                      {contextualCosts ? (\n                        <>\n                          Estimated cost for processing{\" \"}\n                          {COST_CALCULATION_TOKENS.toLocaleString()} tokens:{\" \"}\n                          <span className=\"font-medium\">\n                            {getSelectedModelCost(values.contextual_rag_llm)\n                              ? formatCost(\n                                  getSelectedModelCost(\n                                    values.contextual_rag_llm\n                                  )!.cost\n                                )\n                              : \"Cost information not available\"}\n                          </span>\n                        </>\n                      ) : (\n                        \"Loading cost information...\"\n                      )}\n                    </div>\n                  )}\n              </div>\n              <SelectorFormField\n                name=\"embedding_precision\"\n                label=\"Embedding Precision\"\n                options={embeddingPrecisionOptions}\n                subtext=\"Select the precision for embedding vectors. Lower precision uses less storage but may reduce accuracy.\"\n              />\n\n              <NumberInput\n                description=\"Number of dimensions to reduce the embedding to.\n              Will reduce memory usage but may reduce accuracy.\n              If not specified, will just use the selected model's default dimensionality without any reduction.\n              Currently only supported for OpenAI embedding models\"\n                optional={true}\n                label=\"Reduced Dimension\"\n                name=\"reduced_dimension\"\n              />\n            </Form>\n          )}\n        </Formik>\n      </div>\n    );\n  }\n);\nexport default AdvancedEmbeddingFormPage;\n\nAdvancedEmbeddingFormPage.displayName = \"AdvancedEmbeddingFormPage\";\n"
  },
  {
    "path": "web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx",
    "content": "\"use client\";\n\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Title from \"@/components/ui/title\";\nimport {\n  CloudEmbeddingProvider,\n  CloudEmbeddingModel,\n  AVAILABLE_CLOUD_PROVIDERS,\n  CloudEmbeddingProviderFull,\n  EmbeddingModelDescriptor,\n  EmbeddingProvider,\n  LITELLM_CLOUD_PROVIDER,\n  AZURE_CLOUD_PROVIDER,\n  getFormattedProviderName,\n} from \"../../../../components/embedding/interfaces\";\nimport { EmbeddingDetails } from \"../EmbeddingModelSelectionForm\";\nimport { FiExternalLink, FiInfo, FiTrash } from \"react-icons/fi\";\nimport { HoverPopup } from \"@/components/HoverPopup\";\nimport { Dispatch, SetStateAction, useEffect, useState } from \"react\";\nimport { CustomEmbeddingModelForm } from \"@/components/embedding/CustomEmbeddingModelForm\";\nimport { deleteSearchSettings } from \"./utils\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport { AdvancedSearchConfiguration } from \"../interfaces\";\nimport CardSection from \"@/components/admin/CardSection\";\n\nexport default function CloudEmbeddingPage({\n  currentModel,\n  embeddingProviderDetails,\n  embeddingModelDetails,\n  setShowTentativeProvider,\n  setChangeCredentialsProvider,\n  setAlreadySelectedModel,\n  setShowTentativeModel,\n  setShowModelInQueue,\n  advancedEmbeddingDetails,\n}: {\n  setShowModelInQueue: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel;\n  setAlreadySelectedModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  embeddingModelDetails?: CloudEmbeddingModel[];\n  embeddingProviderDetails?: EmbeddingDetails[];\n  setShowTentativeProvider: React.Dispatch<\n    React.SetStateAction<CloudEmbeddingProvider | null>\n  >;\n  setChangeCredentialsProvider: React.Dispatch<\n    React.SetStateAction<CloudEmbeddingProvider | null>\n  >;\n  advancedEmbeddingDetails: AdvancedSearchConfiguration;\n}) {\n  function hasProviderTypeinArray(\n    arr: Array<{ provider_type: string }>,\n    searchName: string\n  ): boolean {\n    return arr.some(\n      (item) => item.provider_type.toLowerCase() === searchName.toLowerCase()\n    );\n  }\n\n  const providers: CloudEmbeddingProviderFull[] = AVAILABLE_CLOUD_PROVIDERS.map(\n    (model) => ({\n      ...model,\n      configured:\n        embeddingProviderDetails &&\n        hasProviderTypeinArray(embeddingProviderDetails, model.provider_type),\n    })\n  );\n  const [liteLLMProvider, setLiteLLMProvider] = useState<\n    EmbeddingDetails | undefined\n  >(undefined);\n\n  const [azureProvider, setAzureProvider] = useState<\n    EmbeddingDetails | undefined\n  >(undefined);\n\n  useEffect(() => {\n    const liteLLMProvider = embeddingProviderDetails?.find(\n      (provider) =>\n        provider.provider_type === EmbeddingProvider.LITELLM.toLowerCase()\n    );\n    setLiteLLMProvider(liteLLMProvider);\n    const azureProvider = embeddingProviderDetails?.find(\n      (provider) =>\n        provider.provider_type === EmbeddingProvider.AZURE.toLowerCase()\n    );\n    setAzureProvider(azureProvider);\n  }, [embeddingProviderDetails]);\n\n  const isAzureConfigured = azureProvider !== undefined;\n\n  // Get details of the configured Azure provider\n  const azureProviderDetails = embeddingProviderDetails?.find(\n    (provider) => provider.provider_type.toLowerCase() === \"azure\"\n  );\n\n  return (\n    <div>\n      <Title className=\"mt-8\">\n        Here are some cloud-based models to choose from.\n      </Title>\n      <Text as=\"p\">\n        {\n          \"These models require API keys and run in the clouds of the respective providers.\"\n        }\n      </Text>\n      <Spacer rem={1} />\n\n      <div className=\"gap-4 mt-2 pb-10 flex content-start flex-wrap\">\n        {providers.map((provider) => (\n          <div key={provider.provider_type} className=\"mt-4 w-full\">\n            <div className=\"flex items-center mb-2\">\n              {provider.icon({ size: 40 })}\n              <h2 className=\"ml-2  mt-2 text-xl font-bold\">\n                {getFormattedProviderName(provider.provider_type)}{\" \"}\n                {provider.provider_type == EmbeddingProvider.COHERE &&\n                  \"(recommended)\"}\n              </h2>\n              <HoverPopup\n                mainContent={\n                  <FiInfo className=\"ml-2 mt-2 cursor-pointer\" size={18} />\n                }\n                popupContent={\n                  <div className=\"text-sm text-text-800 w-52\">\n                    <div className=\"my-auto\">{provider.description}</div>\n                  </div>\n                }\n                style=\"dark\"\n              />\n            </div>\n\n            <button\n              onClick={() => {\n                if (!provider.configured) {\n                  setShowTentativeProvider(provider);\n                } else {\n                  setChangeCredentialsProvider(provider);\n                }\n              }}\n              className=\"mb-2  hover:underline text-sm cursor-pointer\"\n            >\n              {provider.configured ? \"Modify API key\" : \"Provide API key\"}\n            </button>\n            <div className=\"flex flex-wrap gap-4\">\n              {provider.embedding_models.map((model) => (\n                <CloudModelCard\n                  key={`${provider.provider_type}-${model.model_name}`}\n                  model={model}\n                  provider={provider}\n                  currentModel={currentModel}\n                  setAlreadySelectedModel={setAlreadySelectedModel}\n                  setShowTentativeModel={setShowTentativeModel}\n                  setShowModelInQueue={setShowModelInQueue}\n                  setShowTentativeProvider={setShowTentativeProvider}\n                />\n              ))}\n            </div>\n          </div>\n        ))}\n\n        <Spacer rem={1.5} />\n        <Text as=\"p\">\n          {markdown(\n            \"Alternatively, you can use a self-hosted model using the LiteLLM proxy. This allows you to leverage various LLM providers through a unified interface that you control. [Learn more about LiteLLM](https://docs.litellm.ai/)\"\n          )}\n        </Text>\n\n        <div key={LITELLM_CLOUD_PROVIDER.provider_type} className=\"mt-4 w-full\">\n          <div className=\"flex items-center mb-2\">\n            {LITELLM_CLOUD_PROVIDER.icon({ size: 40 })}\n            <h2 className=\"ml-2  mt-2 text-xl font-bold\">\n              {getFormattedProviderName(LITELLM_CLOUD_PROVIDER.provider_type)}{\" \"}\n              {LITELLM_CLOUD_PROVIDER.provider_type ==\n                EmbeddingProvider.COHERE && \"(recommended)\"}\n            </h2>\n            <HoverPopup\n              mainContent={\n                <FiInfo className=\"ml-2 mt-2 cursor-pointer\" size={18} />\n              }\n              popupContent={\n                <div className=\"text-sm text-text-800 w-52\">\n                  <div className=\"my-auto\">\n                    {LITELLM_CLOUD_PROVIDER.description}\n                  </div>\n                </div>\n              }\n              style=\"dark\"\n            />\n          </div>\n          <div className=\"w-full flex flex-col items-start\">\n            {!liteLLMProvider ? (\n              <button\n                onClick={() => setShowTentativeProvider(LITELLM_CLOUD_PROVIDER)}\n                className=\"mb-2 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm cursor-pointer\"\n              >\n                Set API Configuration\n              </button>\n            ) : (\n              <button\n                onClick={() =>\n                  setChangeCredentialsProvider(LITELLM_CLOUD_PROVIDER)\n                }\n                className=\"mb-2 hover:underline text-sm cursor-pointer\"\n              >\n                Modify API Configuration\n              </button>\n            )}\n\n            {!liteLLMProvider && (\n              <CardSection className=\"mt-2 w-full max-w-4xl bg-background-50 border border-background-200\">\n                <div className=\"p-4\">\n                  <Text as=\"p\" font=\"heading-h3\">\n                    API URL Required\n                  </Text>\n                  <Spacer rem={0.5} />\n                  <Text as=\"p\">\n                    {\n                      'Before you can add models, you need to provide an API URL for your LiteLLM proxy. Click the \"Provide API URL\" button above to set up your LiteLLM configuration.'\n                    }\n                  </Text>\n                  <Spacer rem={1} />\n                  <div className=\"flex items-center\">\n                    <FiInfo className=\"text-blue-500 mr-2\" size={18} />\n                    <span className=\"text-blue-500\">\n                      <Text as=\"p\">\n                        {\n                          \"Once configured, you'll be able to add and manage your LiteLLM models here.\"\n                        }\n                      </Text>\n                    </span>\n                  </div>\n                </div>\n              </CardSection>\n            )}\n            {liteLLMProvider && (\n              <>\n                <div className=\"flex mb-4 flex-wrap gap-4\">\n                  {embeddingModelDetails\n                    ?.filter(\n                      (model) =>\n                        model.provider_type ===\n                        EmbeddingProvider.LITELLM.toLowerCase()\n                    )\n                    .map((model, index) => (\n                      <CloudModelCard\n                        key={\n                          model.id ??\n                          `${model.provider_type}-${model.model_name}-${index}`\n                        }\n                        model={model}\n                        provider={LITELLM_CLOUD_PROVIDER}\n                        currentModel={currentModel}\n                        setAlreadySelectedModel={setAlreadySelectedModel}\n                        setShowTentativeModel={setShowTentativeModel}\n                        setShowModelInQueue={setShowModelInQueue}\n                        setShowTentativeProvider={setShowTentativeProvider}\n                      />\n                    ))}\n                </div>\n\n                <CardSection\n                  className={`mt-2 w-full max-w-4xl ${\n                    currentModel.provider_type === EmbeddingProvider.LITELLM\n                      ? \"border-2 border-blue-500\"\n                      : \"\"\n                  }`}\n                >\n                  <CustomEmbeddingModelForm\n                    embeddingType={EmbeddingProvider.LITELLM}\n                    provider={liteLLMProvider}\n                    currentValues={\n                      currentModel.provider_type === EmbeddingProvider.LITELLM\n                        ? (currentModel as CloudEmbeddingModel)\n                        : null\n                    }\n                    setShowTentativeModel={setShowTentativeModel}\n                  />\n                </CardSection>\n              </>\n            )}\n          </div>\n        </div>\n\n        <Spacer rem={1.5} />\n        <Text as=\"p\">\n          {\n            \"You can also use Azure OpenAI models for embeddings. Azure requires separate configuration for each model.\"\n          }\n        </Text>\n\n        <div key={AZURE_CLOUD_PROVIDER.provider_type} className=\"mt-4 w-full\">\n          <div className=\"flex items-center mb-2\">\n            {AZURE_CLOUD_PROVIDER.icon({ size: 40 })}\n            <h2 className=\"ml-2  mt-2 text-xl font-bold\">\n              {getFormattedProviderName(AZURE_CLOUD_PROVIDER.provider_type)}{\" \"}\n            </h2>\n            <HoverPopup\n              mainContent={\n                <FiInfo className=\"ml-2 mt-2 cursor-pointer\" size={18} />\n              }\n              popupContent={\n                <div className=\"text-sm text-text-800 w-52\">\n                  <div className=\"my-auto\">\n                    {AZURE_CLOUD_PROVIDER.description}\n                  </div>\n                </div>\n              }\n              style=\"dark\"\n            />\n          </div>\n        </div>\n\n        <div className=\"w-full flex flex-col items-start\">\n          {!isAzureConfigured ? (\n            <>\n              <button\n                onClick={() => setShowTentativeProvider(AZURE_CLOUD_PROVIDER)}\n                className=\"mb-2 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm cursor-pointer\"\n              >\n                Configure Azure OpenAI\n              </button>\n              <div className=\"mt-2 w-full max-w-4xl\">\n                <CardSection className=\"p-4 border border-background-200 rounded-lg shadow-sm\">\n                  <Text as=\"p\" font=\"main-ui-action\">\n                    Configure Azure OpenAI for Embeddings\n                  </Text>\n                  <Spacer rem={0.5} />\n                  <Text as=\"p\">\n                    {\n                      'Click \"Configure Azure OpenAI\" to set up Azure OpenAI for embeddings.'\n                    }\n                  </Text>\n                  <Spacer rem={0.75} />\n                  <div className=\"flex items-center\">\n                    <FiInfo className=\"text-neutral-400 mr-2\" size={16} />\n                    <Text as=\"p\">\n                      {\n                        \"You'll need: API version, base URL, API key, model name, and deployment name.\"\n                      }\n                    </Text>\n                  </div>\n                </CardSection>\n              </div>\n            </>\n          ) : (\n            <>\n              <div className=\"mb-6 w-full\">\n                <Text as=\"p\" font=\"heading-h3\">\n                  Current Azure Configuration\n                </Text>\n                <Spacer rem={0.75} />\n\n                {azureProviderDetails ? (\n                  <CardSection className=\"bg-white shadow-sm border border-background-200 rounded-lg\">\n                    <div className=\"p-4 space-y-3\">\n                      <div className=\"flex justify-between\">\n                        <span className=\"font-medium\">API Version:</span>\n                        <span>{azureProviderDetails.api_version}</span>\n                      </div>\n                      <div className=\"flex justify-between\">\n                        <span className=\"font-medium\">Base URL:</span>\n                        <span>{azureProviderDetails.api_url}</span>\n                      </div>\n                      <div className=\"flex justify-between\">\n                        <span className=\"font-medium\">Deployment Name:</span>\n                        <span>{azureProviderDetails.deployment_name}</span>\n                      </div>\n                    </div>\n                    <button\n                      onClick={() =>\n                        setChangeCredentialsProvider(AZURE_CLOUD_PROVIDER)\n                      }\n                      className=\"mt-2 px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600 text-sm\"\n                    >\n                      Delete Current Azure Provider\n                    </button>\n                  </CardSection>\n                ) : (\n                  <CardSection className=\"bg-background-50 border border-background-200 rounded-lg\">\n                    <div className=\"p-4 text-text-500 text-center\">\n                      No Azure provider has been configured yet.\n                    </div>\n                  </CardSection>\n                )}\n              </div>\n\n              <CardSection\n                className={`mt-2 w-full max-w-4xl ${\n                  currentModel.provider_type === EmbeddingProvider.AZURE\n                    ? \"border-2 border-blue-500\"\n                    : \"\"\n                }`}\n              >\n                {azureProvider && (\n                  <CustomEmbeddingModelForm\n                    embeddingType={EmbeddingProvider.AZURE}\n                    provider={azureProvider}\n                    currentValues={\n                      currentModel.provider_type === EmbeddingProvider.AZURE\n                        ? (currentModel as CloudEmbeddingModel)\n                        : null\n                    }\n                    setShowTentativeModel={setShowTentativeModel}\n                  />\n                )}\n              </CardSection>\n            </>\n          )}\n        </div>\n      </div>\n    </div>\n  );\n}\n\nexport function CloudModelCard({\n  model,\n  provider,\n  currentModel,\n  setAlreadySelectedModel,\n  setShowTentativeModel,\n  setShowModelInQueue,\n  setShowTentativeProvider,\n}: {\n  model: CloudEmbeddingModel;\n  provider: CloudEmbeddingProviderFull;\n  currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel;\n  setAlreadySelectedModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  setShowModelInQueue: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  setShowTentativeProvider: React.Dispatch<\n    React.SetStateAction<CloudEmbeddingProvider | null>\n  >;\n}) {\n  const [showDeleteModel, setShowDeleteModel] = useState(false);\n  const modelId = typeof model.id === \"number\" ? model.id : null;\n  const currentModelId =\n    typeof currentModel.id === \"number\" ? currentModel.id : null;\n\n  const idsMatch =\n    modelId !== null && currentModelId !== null && modelId === currentModelId;\n\n  const shouldCompareNames = modelId === null || currentModelId === null;\n\n  const namesMatch =\n    shouldCompareNames &&\n    model.model_name === currentModel.model_name &&\n    model.provider_type?.toLowerCase() ===\n      currentModel.provider_type?.toLowerCase();\n\n  const enabled = idsMatch || namesMatch;\n\n  const deleteModel = async () => {\n    if (!model.id) {\n      toast.error(\"Model cannot be deleted\");\n      return;\n    }\n\n    const response = await deleteSearchSettings(model.id);\n\n    if (response.ok) {\n      toast.success(\"Model deleted successfully\");\n      setShowDeleteModel(false);\n    } else {\n      toast.error(\n        \"Failed to delete model. Ensure you are not attempting to delete a curently active model.\"\n      );\n    }\n  };\n\n  return (\n    <div\n      className={`p-4 w-96 border rounded-lg transition-all duration-200 ${\n        enabled\n          ? \"border-blue-500 bg-blue-50 dark:bg-blue-950 shadow-md\"\n          : \"border-background-300 hover:border-blue-300 hover:shadow-sm\"\n      } ${!provider.configured && \"opacity-80 hover:opacity-100\"}`}\n    >\n      {showDeleteModel && (\n        <ConfirmEntityModal\n          entityName={model.model_name}\n          entityType=\"embedding model configuration\"\n          onSubmit={() => deleteModel()}\n          onClose={() => setShowDeleteModel(false)}\n        />\n      )}\n\n      <div className=\"flex items-center justify-between mb-3\">\n        <h3 className=\"font-bold dark:text-neutral-100 text-lg\">\n          {model.model_name}\n        </h3>\n        <div className=\"flex gap-x-2\">\n          {model.provider_type == EmbeddingProvider.LITELLM.toLowerCase() && (\n            <button\n              onClickCapture={() => setShowDeleteModel(true)}\n              onClick={(e) => e.stopPropagation()}\n              className=\"text-blue-500 hover:text-blue-700 transition-colors duration-200\"\n            >\n              <FiTrash size={18} />\n            </button>\n          )}\n          <a\n            href={provider.website}\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            onClick={(e) => e.stopPropagation()}\n            className=\"text-blue-500 hover:text-blue-700 transition-colors duration-200\"\n          >\n            <FiExternalLink size={18} />\n          </a>\n        </div>\n      </div>\n      <p className=\"text-sm text-text-600 dark:text-neutral-400 mb-2\">\n        {model.description}\n      </p>\n      {model?.provider_type?.toLowerCase() !=\n        EmbeddingProvider.LITELLM.toLowerCase() && (\n        <div className=\"text-xs text-text-500 mb-2\">\n          ${model.pricePerMillion}/M tokens\n        </div>\n      )}\n      <div className=\"mt-3\">\n        <button\n          className={`w-full p-2 rounded-lg text-sm ${\n            enabled\n              ? \"bg-background-125 border border-border cursor-not-allowed\"\n              : \"bg-background border border-border hover:bg-accent-background-hovered cursor-pointer\"\n          }`}\n          onClick={() => {\n            if (enabled) {\n              setAlreadySelectedModel(model);\n            } else if (\n              provider.configured ||\n              provider.provider_type === EmbeddingProvider.LITELLM\n            ) {\n              setShowTentativeModel(model);\n            } else {\n              setShowModelInQueue(model);\n              setShowTentativeProvider(provider);\n            }\n          }}\n          disabled={enabled}\n        >\n          {enabled ? \"Selected Model\" : \"Select Model\"}\n        </button>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx",
    "content": "\"use client\";\n\nimport { toast } from \"@/hooks/useToast\";\n\nimport EmbeddingModelSelection from \"../EmbeddingModelSelectionForm\";\nimport { useCallback, useEffect, useMemo, useState, useRef } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { WarningCircle, Warning, CaretDownIcon } from \"@phosphor-icons/react\";\nimport {\n  CloudEmbeddingModel,\n  EmbeddingProvider,\n  HostedEmbeddingModel,\n} from \"@/components/embedding/interfaces\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport AdvancedEmbeddingFormPage from \"./AdvancedEmbeddingFormPage\";\nimport {\n  AdvancedSearchConfiguration,\n  EmbeddingPrecision,\n  RerankingDetails,\n  SavedSearchSettings,\n  SwitchoverType,\n} from \"../interfaces\";\nimport RerankingDetailsForm from \"../RerankingFormPage\";\nimport { useEmbeddingFormContext } from \"@/components/context/EmbeddingContext\";\nimport Modal from \"@/refresh-components/Modal\";\nimport InstantSwitchConfirmModal from \"../modals/InstantSwitchConfirmModal\";\nimport { useRouter } from \"next/navigation\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { combineSearchSettings } from \"./utils\";\nimport {\n  DropdownMenu,\n  DropdownMenuContent,\n  DropdownMenuItem,\n  DropdownMenuTrigger,\n} from \"@/components/ui/dropdown-menu\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { SvgAlertTriangle, SvgArrowLeft, SvgArrowRight } from \"@opal/icons\";\nexport default function EmbeddingForm() {\n  const { formStep, nextFormStep, prevFormStep } = useEmbeddingFormContext();\n  const router = useRouter();\n\n  const [advancedEmbeddingDetails, setAdvancedEmbeddingDetails] =\n    useState<AdvancedSearchConfiguration>({\n      index_name: \"\",\n      multipass_indexing: true,\n      enable_contextual_rag: false,\n      contextual_rag_llm_name: null,\n      contextual_rag_llm_provider: null,\n      multilingual_expansion: [],\n      disable_rerank_for_streaming: false,\n      api_url: null,\n      num_rerank: 0,\n      embedding_precision: EmbeddingPrecision.BFLOAT16,\n      reduced_dimension: null,\n    });\n\n  const [rerankingDetails, setRerankingDetails] = useState<RerankingDetails>({\n    rerank_api_key: \"\",\n    rerank_provider_type: null,\n    rerank_model_name: \"\",\n    rerank_api_url: null,\n  });\n\n  const [switchoverType, setSwitchoverType] = useState<SwitchoverType>(\n    SwitchoverType.REINDEX\n  );\n\n  const [formErrors, setFormErrors] = useState<Record<string, string>>({});\n  const [isFormValid, setIsFormValid] = useState(true);\n  const [rerankFormErrors, setRerankFormErrors] = useState<\n    Record<string, string>\n  >({});\n  const [isRerankFormValid, setIsRerankFormValid] = useState(true);\n  const advancedFormRef = useRef(null);\n  const rerankFormRef = useRef(null);\n\n  const updateAdvancedEmbeddingDetails = (\n    key: keyof AdvancedSearchConfiguration,\n    value: any\n  ) => {\n    setAdvancedEmbeddingDetails((values) => ({ ...values, [key]: value }));\n  };\n\n  async function updateSearchSettings(searchSettings: SavedSearchSettings) {\n    const response = await fetch(\n      \"/api/search-settings/update-inference-settings\",\n      {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          ...searchSettings,\n        }),\n      }\n    );\n    return response;\n  }\n\n  const updateSelectedProvider = (\n    model: CloudEmbeddingModel | HostedEmbeddingModel\n  ) => {\n    setSelectedProvider(model);\n  };\n  const [displayPoorModelName, setDisplayPoorModelName] = useState(true);\n  const [showPoorModel, setShowPoorModel] = useState(false);\n  const [showInstantSwitchConfirm, setShowInstantSwitchConfirm] =\n    useState(false);\n  const [modelTab, setModelTab] = useState<\"open\" | \"cloud\" | null>(null);\n\n  const {\n    data: currentEmbeddingModel,\n    isLoading: isLoadingCurrentModel,\n    error: currentEmbeddingModelError,\n  } = useSWR<CloudEmbeddingModel | HostedEmbeddingModel | null>(\n    SWR_KEYS.currentSearchSettings,\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n\n  const [selectedProvider, setSelectedProvider] = useState<\n    CloudEmbeddingModel | HostedEmbeddingModel | null\n  >(currentEmbeddingModel!);\n\n  const { data: searchSettings, isLoading: isLoadingSearchSettings } =\n    useSWR<SavedSearchSettings | null>(\n      SWR_KEYS.currentSearchSettings,\n      errorHandlingFetcher,\n      { refreshInterval: 5000 } // 5 seconds\n    );\n\n  useEffect(() => {\n    if (searchSettings) {\n      setAdvancedEmbeddingDetails({\n        index_name: searchSettings.index_name,\n        multipass_indexing: searchSettings.multipass_indexing,\n        enable_contextual_rag: searchSettings.enable_contextual_rag,\n        contextual_rag_llm_name: searchSettings.contextual_rag_llm_name,\n        contextual_rag_llm_provider: searchSettings.contextual_rag_llm_provider,\n        multilingual_expansion: searchSettings.multilingual_expansion,\n        disable_rerank_for_streaming:\n          searchSettings.disable_rerank_for_streaming,\n        num_rerank: searchSettings.num_rerank,\n        api_url: null,\n        embedding_precision: searchSettings.embedding_precision,\n        reduced_dimension: searchSettings.reduced_dimension,\n      });\n\n      setRerankingDetails({\n        rerank_api_key: searchSettings.rerank_api_key,\n        rerank_provider_type: searchSettings.rerank_provider_type,\n        rerank_model_name: searchSettings.rerank_model_name,\n        rerank_api_url: searchSettings.rerank_api_url,\n      });\n    }\n  }, [searchSettings]);\n\n  const originalRerankingDetails: RerankingDetails = searchSettings\n    ? {\n        rerank_api_key: searchSettings.rerank_api_key,\n        rerank_provider_type: searchSettings.rerank_provider_type,\n        rerank_model_name: searchSettings.rerank_model_name,\n        rerank_api_url: searchSettings.rerank_api_url,\n      }\n    : {\n        rerank_api_key: \"\",\n        rerank_provider_type: null,\n        rerank_model_name: \"\",\n        rerank_api_url: null,\n      };\n\n  useEffect(() => {\n    if (currentEmbeddingModel) {\n      setSelectedProvider(currentEmbeddingModel);\n    }\n  }, [currentEmbeddingModel]);\n\n  const needsReIndex =\n    currentEmbeddingModel != selectedProvider ||\n    searchSettings?.multipass_indexing !=\n      advancedEmbeddingDetails.multipass_indexing ||\n    searchSettings?.embedding_precision !=\n      advancedEmbeddingDetails.embedding_precision ||\n    searchSettings?.reduced_dimension !=\n      advancedEmbeddingDetails.reduced_dimension ||\n    searchSettings?.enable_contextual_rag !=\n      advancedEmbeddingDetails.enable_contextual_rag;\n\n  const updateSearch = useCallback(async () => {\n    if (!selectedProvider) {\n      return false;\n    }\n    const searchSettings = combineSearchSettings(\n      selectedProvider,\n      advancedEmbeddingDetails,\n      rerankingDetails,\n      selectedProvider.provider_type?.toLowerCase() as EmbeddingProvider | null,\n      switchoverType\n    );\n\n    const response = await updateSearchSettings(searchSettings);\n    if (response.ok) {\n      return true;\n    } else {\n      toast.error(\"Failed to update search settings\");\n      return false;\n    }\n  }, [\n    selectedProvider,\n    advancedEmbeddingDetails,\n    rerankingDetails,\n    switchoverType,\n  ]);\n\n  const handleValidationChange = useCallback(\n    (isValid: boolean, errors: Record<string, string>) => {\n      setIsFormValid(isValid);\n      setFormErrors(errors);\n    },\n    []\n  );\n\n  const handleRerankValidationChange = useCallback(\n    (isValid: boolean, errors: Record<string, string>) => {\n      setIsRerankFormValid(isValid);\n      setRerankFormErrors(errors);\n    },\n    []\n  );\n\n  // Combine validation states for both forms\n  const isOverallFormValid = isFormValid && isRerankFormValid;\n  const combinedFormErrors = useMemo(() => {\n    return { ...formErrors, ...rerankFormErrors };\n  }, [formErrors, rerankFormErrors]);\n\n  const ReIndexingButton = useMemo(() => {\n    const ReIndexingButtonComponent = ({\n      needsReIndex,\n    }: {\n      needsReIndex: boolean;\n    }) => {\n      return needsReIndex ? (\n        <div className=\"flex mx-auto gap-x-1 ml-auto items-center\">\n          <div className=\"flex items-center h-fit\">\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Button\n              onClick={() => {\n                if (switchoverType == SwitchoverType.INSTANT) {\n                  setShowInstantSwitchConfirm(true);\n                } else {\n                  handleReIndex();\n                  navigateToEmbeddingPage(\"search settings\");\n                }\n              }}\n              disabled={!isOverallFormValid}\n              action\n              className=\"rounded-r-none w-32 h-full\"\n            >\n              {switchoverType == SwitchoverType.REINDEX\n                ? \"Re-index\"\n                : switchoverType == SwitchoverType.ACTIVE_ONLY\n                  ? \"Active Only\"\n                  : \"Instant Switch\"}\n            </Button>\n            <DropdownMenu>\n              <DropdownMenuTrigger asChild>\n                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                <Button\n                  disabled={!isOverallFormValid}\n                  action\n                  className=\"rounded-l-none border-l border-white/20 px-1 h-[36px] w-[30px] min-w-[30px]\"\n                >\n                  <CaretDownIcon className=\"text-text-inverted-05\" />\n                </Button>\n              </DropdownMenuTrigger>\n              <DropdownMenuContent>\n                <DropdownMenuItem\n                  onClick={() => {\n                    setSwitchoverType(SwitchoverType.REINDEX);\n                  }}\n                >\n                  <SimpleTooltip tooltip=\"Re-runs all connectors in the background before switching over. Takes longer but ensures no degredation of search during the switch.\">\n                    <span className=\"w-full text-left\">\n                      (Recommended) Re-index\n                    </span>\n                  </SimpleTooltip>\n                </DropdownMenuItem>\n                <DropdownMenuItem\n                  onClick={() => {\n                    setSwitchoverType(SwitchoverType.ACTIVE_ONLY);\n                  }}\n                >\n                  <SimpleTooltip tooltip=\"Re-runs only active (non-paused) connectors in the background before switching over. Paused connectors won't block the switchover.\">\n                    <span className=\"w-full text-left\">\n                      Active Connectors Only\n                    </span>\n                  </SimpleTooltip>\n                </DropdownMenuItem>\n                <DropdownMenuItem\n                  onClick={() => {\n                    setSwitchoverType(SwitchoverType.INSTANT);\n                  }}\n                >\n                  <SimpleTooltip tooltip=\"Immediately switches to new settings without re-indexing. Searches will be degraded until the re-indexing is complete.\">\n                    <span className=\"w-full text-left\">Instant Switch</span>\n                  </SimpleTooltip>\n                </DropdownMenuItem>\n              </DropdownMenuContent>\n            </DropdownMenu>\n          </div>\n          {isOverallFormValid && (\n            <div className=\"relative group\">\n              <WarningCircle\n                className=\"text-text-800 cursor-help\"\n                size={20}\n                weight=\"fill\"\n              />\n              <div className=\"absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64\">\n                <p className=\"font-semibold mb-2\">Needs re-indexing due to:</p>\n                <ul className=\"list-disc pl-5\">\n                  {currentEmbeddingModel != selectedProvider && (\n                    <li>Changed embedding provider</li>\n                  )}\n                  {searchSettings?.multipass_indexing !=\n                    advancedEmbeddingDetails.multipass_indexing && (\n                    <li>Multipass indexing modification</li>\n                  )}\n                  {searchSettings?.embedding_precision !=\n                    advancedEmbeddingDetails.embedding_precision && (\n                    <li>Embedding precision modification</li>\n                  )}\n                  {searchSettings?.reduced_dimension !=\n                    advancedEmbeddingDetails.reduced_dimension && (\n                    <li>Reduced dimension modification</li>\n                  )}\n                  {(searchSettings?.enable_contextual_rag !=\n                    advancedEmbeddingDetails.enable_contextual_rag ||\n                    searchSettings?.contextual_rag_llm_name !=\n                      advancedEmbeddingDetails.contextual_rag_llm_name ||\n                    searchSettings?.contextual_rag_llm_provider !=\n                      advancedEmbeddingDetails.contextual_rag_llm_provider) && (\n                    <li>Contextual RAG modification</li>\n                  )}\n                </ul>\n              </div>\n            </div>\n          )}\n          {!isOverallFormValid &&\n            Object.keys(combinedFormErrors).length > 0 && (\n              <div className=\"relative group\">\n                <Warning\n                  className=\"text-red-500 cursor-help\"\n                  size={20}\n                  weight=\"fill\"\n                />\n                <div className=\"absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64\">\n                  <p className=\"font-semibold mb-2\">Validation Errors:</p>\n                  <ul className=\"list-disc pl-5\">\n                    {Object.entries(combinedFormErrors).map(\n                      ([field, error]) => (\n                        <li key={field}>\n                          {field}: {error}\n                        </li>\n                      )\n                    )}\n                  </ul>\n                </div>\n              </div>\n            )}\n        </div>\n      ) : (\n        <div className=\"flex mx-auto gap-x-1 ml-auto items-center\">\n          <OpalButton\n            disabled={!isOverallFormValid}\n            onClick={() => {\n              updateSearch();\n              navigateToEmbeddingPage(\"search settings\");\n            }}\n          >\n            Update Search\n          </OpalButton>\n          {!isOverallFormValid &&\n            Object.keys(combinedFormErrors).length > 0 && (\n              <div className=\"relative group\">\n                <Warning\n                  className=\"text-red-500 cursor-help\"\n                  size={20}\n                  weight=\"fill\"\n                />\n                <div className=\"absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64\">\n                  <p className=\"font-semibold mb-2 text-red-400\">\n                    Validation Errors:\n                  </p>\n                  <ul className=\"list-disc pl-5\">\n                    {Object.entries(combinedFormErrors).map(\n                      ([field, error]) => (\n                        <li key={field}>{error}</li>\n                      )\n                    )}\n                  </ul>\n                </div>\n              </div>\n            )}\n        </div>\n      );\n    };\n    ReIndexingButtonComponent.displayName = \"ReIndexingButton\";\n    return ReIndexingButtonComponent;\n  }, [needsReIndex, switchoverType, isOverallFormValid, combinedFormErrors]);\n\n  if (!selectedProvider) {\n    return <ThreeDotsLoader />;\n  }\n  if (currentEmbeddingModelError || !currentEmbeddingModel) {\n    return <ErrorCallout errorTitle=\"Failed to fetch embedding model status\" />;\n  }\n\n  const updateCurrentModel = (newModel: string) => {\n    setAdvancedEmbeddingDetails((values) => ({\n      ...values,\n      model_name: newModel,\n    }));\n  };\n\n  const navigateToEmbeddingPage = (changedResource: string) => {\n    router.push(\"/admin/configuration/search?message=search-settings\");\n  };\n\n  const handleReIndex = async () => {\n    if (!selectedProvider) {\n      return;\n    }\n    let searchSettings: SavedSearchSettings;\n\n    if (selectedProvider.provider_type != null) {\n      // This is a cloud model\n      searchSettings = combineSearchSettings(\n        selectedProvider,\n        advancedEmbeddingDetails,\n        rerankingDetails,\n        selectedProvider.provider_type\n          ?.toLowerCase()\n          .split(\" \")[0] as EmbeddingProvider | null,\n        switchoverType\n      );\n    } else {\n      // This is a locally hosted model\n      searchSettings = combineSearchSettings(\n        selectedProvider,\n        advancedEmbeddingDetails,\n        rerankingDetails,\n        null,\n        switchoverType\n      );\n    }\n\n    searchSettings.index_name = null;\n\n    const response = await fetch(\n      \"/api/search-settings/set-new-search-settings\",\n      {\n        method: \"POST\",\n        body: JSON.stringify(searchSettings),\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n      }\n    );\n\n    if (response.ok) {\n      navigateToEmbeddingPage(\"embedding model\");\n    } else {\n      toast.error(\"Failed to update embedding model\");\n\n      alert(`Failed to update embedding model - ${await response.text()}`);\n    }\n  };\n\n  return (\n    <div className=\"mx-auto mb-8 w-full\">\n      <div className=\"mx-auto max-w-4xl\">\n        {formStep == 0 && (\n          <>\n            <h2 className=\"text-2xl font-bold mb-4 text-text-800\">\n              Select an Embedding Model\n            </h2>\n            <Text as=\"p\" className=\"mb-4\">\n              Note that updating the backing model will require a complete\n              re-indexing of all documents across every connected source. This\n              is taken care of in the background so that the system can continue\n              to be used, but depending on the size of the corpus, this could\n              take hours or days. You can monitor the progress of the\n              re-indexing on this page while the models are being switched.\n            </Text>\n            <CardSection>\n              <EmbeddingModelSelection\n                updateCurrentModel={updateCurrentModel}\n                setModelTab={setModelTab}\n                modelTab={modelTab}\n                selectedProvider={selectedProvider}\n                currentEmbeddingModel={currentEmbeddingModel}\n                updateSelectedProvider={updateSelectedProvider}\n                advancedEmbeddingDetails={advancedEmbeddingDetails}\n              />\n            </CardSection>\n            <div className=\"mt-4 flex w-full justify-end\">\n              <OpalButton\n                variant=\"action\"\n                onClick={() => {\n                  if (\n                    selectedProvider.model_name.includes(\"e5\") &&\n                    displayPoorModelName\n                  ) {\n                    setDisplayPoorModelName(false);\n                    setShowPoorModel(true);\n                  } else {\n                    // Skip reranking step (step 1), go directly to advanced settings (step 2)\n                    nextFormStep();\n                    nextFormStep();\n                  }\n                }}\n                rightIcon={SvgArrowRight}\n              >\n                Continue\n              </OpalButton>\n            </div>\n          </>\n        )}\n        {showPoorModel && (\n          <Modal open onOpenChange={() => setShowPoorModel(false)}>\n            <Modal.Content>\n              <Modal.Header\n                icon={SvgAlertTriangle}\n                title={`Are you sure you want to select ${selectedProvider.model_name}?`}\n                onClose={() => setShowPoorModel(false)}\n              />\n              <Modal.Body>\n                <div className=\"text-lg\">\n                  <Text as=\"p\">\n                    {`${selectedProvider.model_name} is a lower accuracy model. We recommend the following alternatives:`}\n                  </Text>\n                  <ul className=\"list-disc list-inside mt-2 ml-4\">\n                    <li>\n                      <Text as=\"p\">\n                        Cohere embed-english-v3.0 for cloud-based\n                      </Text>\n                    </li>\n                    <li>\n                      <Text as=\"p\">\n                        Nomic nomic-embed-text-v1 for self-hosted\n                      </Text>\n                    </li>\n                  </ul>\n                </div>\n              </Modal.Body>\n              <Modal.Footer>\n                <OpalButton\n                  prominence=\"secondary\"\n                  onClick={() => setShowPoorModel(false)}\n                >\n                  Cancel update\n                </OpalButton>\n                <OpalButton\n                  onClick={() => {\n                    setShowPoorModel(false);\n                    // Skip reranking step (step 1), go directly to advanced settings (step 2)\n                    nextFormStep();\n                    nextFormStep();\n                  }}\n                >\n                  {`Continue with ${selectedProvider.model_name}`}\n                </OpalButton>\n              </Modal.Footer>\n            </Modal.Content>\n          </Modal>\n        )}\n\n        {showInstantSwitchConfirm && (\n          <InstantSwitchConfirmModal\n            onClose={() => setShowInstantSwitchConfirm(false)}\n            onConfirm={() => {\n              setShowInstantSwitchConfirm(false);\n              handleReIndex();\n              navigateToEmbeddingPage(\"search settings\");\n            }}\n          />\n        )}\n\n        {formStep == 1 && (\n          <>\n            <h2 className=\"text-2xl font-bold mb-4 text-text-800\">\n              Select a Reranking Model\n            </h2>\n            <Text as=\"p\" className=\"mb-4\">\n              Updating the reranking model does not require re-indexing\n              documents. The reranker helps improve search quality by reordering\n              results after the initial embedding search. Changes will take\n              effect immediately for all new searches.\n            </Text>\n\n            <CardSection>\n              <RerankingDetailsForm\n                ref={rerankFormRef}\n                setModelTab={setModelTab}\n                modelTab={\n                  originalRerankingDetails.rerank_model_name\n                    ? modelTab\n                    : modelTab || \"cloud\"\n                }\n                currentRerankingDetails={rerankingDetails}\n                originalRerankingDetails={originalRerankingDetails}\n                setRerankingDetails={setRerankingDetails}\n                onValidationChange={handleRerankValidationChange}\n              />\n            </CardSection>\n\n            <div className={`mt-4 w-full grid grid-cols-3`}>\n              <OpalButton\n                prominence=\"secondary\"\n                icon={SvgArrowLeft}\n                onClick={() => prevFormStep()}\n              >\n                Previous\n              </OpalButton>\n\n              <ReIndexingButton needsReIndex={needsReIndex} />\n\n              <div className=\"flex w-full justify-end\">\n                <OpalButton\n                  prominence=\"secondary\"\n                  onClick={() => {\n                    nextFormStep();\n                  }}\n                  rightIcon={SvgArrowRight}\n                >\n                  Advanced\n                </OpalButton>\n              </div>\n            </div>\n          </>\n        )}\n        {formStep == 2 && (\n          <>\n            <h2 className=\"text-2xl font-bold mb-4 text-text-800\">\n              Advanced Search Configuration\n            </h2>\n            <Text as=\"p\" className=\"mb-4\">\n              Configure advanced embedding and search settings. Changes will\n              require re-indexing documents.\n            </Text>\n\n            <CardSection>\n              <AdvancedEmbeddingFormPage\n                ref={advancedFormRef}\n                advancedEmbeddingDetails={advancedEmbeddingDetails}\n                updateAdvancedEmbeddingDetails={updateAdvancedEmbeddingDetails}\n                embeddingProviderType={selectedProvider.provider_type}\n                onValidationChange={handleValidationChange}\n              />\n            </CardSection>\n\n            <div className={`mt-4 grid  grid-cols-3 w-full `}>\n              <OpalButton\n                prominence=\"secondary\"\n                onClick={() => {\n                  // Skip reranking step (step 1), go back to embedding model (step 0)\n                  prevFormStep();\n                  prevFormStep();\n                }}\n                icon={SvgArrowLeft}\n              >\n                Previous\n              </OpalButton>\n\n              <ReIndexingButton needsReIndex={needsReIndex} />\n            </div>\n          </>\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx",
    "content": "\"use client\";\n\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Title from \"@/components/ui/title\";\nimport { ModelSelector } from \"../../../../components/embedding/ModelSelector\";\nimport {\n  AVAILABLE_MODELS,\n  CloudEmbeddingModel,\n  HostedEmbeddingModel,\n} from \"../../../../components/embedding/interfaces\";\nimport { CustomModelForm } from \"../../../../components/embedding/CustomModelForm\";\nimport { useState } from \"react\";\nimport CardSection from \"@/components/admin/CardSection\";\nexport default function OpenEmbeddingPage({\n  onSelectOpenSource,\n  selectedProvider,\n}: {\n  onSelectOpenSource: (model: HostedEmbeddingModel) => void;\n  selectedProvider: HostedEmbeddingModel | CloudEmbeddingModel;\n}) {\n  const [configureModel, setConfigureModel] = useState(false);\n  return (\n    <div>\n      <Title className=\"mt-8\">\n        Here are some locally-hosted models to choose from.\n      </Title>\n      <Text as=\"p\">\n        {\n          \"These models can be used without any API keys, and can leverage a GPU for faster inference.\"\n        }\n      </Text>\n      <Spacer rem={1} />\n      <ModelSelector\n        modelOptions={AVAILABLE_MODELS}\n        setSelectedModel={onSelectOpenSource}\n        currentEmbeddingModel={selectedProvider}\n      />\n\n      <Spacer rem={1.5} />\n      <Text as=\"p\">\n        {markdown(\n          \"Alternatively, (if you know what you're doing) you can specify a [SentenceTransformers](https://www.sbert.net/)-compatible model of your choice below. The rough list of supported models can be found [here](https://huggingface.co/models?library=sentence-transformers&sort=trending).\"\n        )}\n      </Text>\n      <Text as=\"p\">\n        {markdown(\n          \"**NOTE:** not all models listed will work with Onyx, since some have unique interfaces or special requirements. If in doubt, reach out to the Onyx team.\"\n        )}\n      </Text>\n      {!configureModel && (\n        // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n        <Button\n          onClick={() => setConfigureModel(true)}\n          className=\"mt-4\"\n          secondary\n        >\n          Configure custom model\n        </Button>\n      )}\n      {configureModel && (\n        <div className=\"w-full flex\">\n          <CardSection className=\"mt-4 2xl:w-4/6 mx-auto\">\n            <CustomModelForm onSubmit={onSelectOpenSource} />\n          </CardSection>\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/embeddings/pages/utils.ts",
    "content": "import {\n  CloudEmbeddingProvider,\n  HostedEmbeddingModel,\n} from \"@/components/embedding/interfaces\";\n\nimport {\n  AdvancedSearchConfiguration,\n  SavedSearchSettings,\n  SwitchoverType,\n} from \"../interfaces\";\n\nimport { EmbeddingProvider } from \"@/components/embedding/interfaces\";\nimport { RerankingDetails } from \"../interfaces\";\n\nexport const deleteSearchSettings = async (search_settings_id: number) => {\n  const response = await fetch(`/api/search-settings/delete-search-settings`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ search_settings_id }),\n  });\n  return response;\n};\n\nexport const testEmbedding = async ({\n  provider_type,\n  modelName,\n  apiKey,\n  apiUrl,\n  apiVersion,\n  deploymentName,\n}: {\n  provider_type: string;\n  modelName: string;\n  apiKey: string | null;\n  apiUrl: string | null;\n  apiVersion: string | null;\n  deploymentName: string | null;\n}) => {\n  const testModelName =\n    provider_type === \"openai\" ? \"text-embedding-3-small\" : modelName;\n\n  const testResponse = await fetch(\"/api/admin/embedding/test-embedding\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      provider_type: provider_type,\n      api_key: apiKey,\n      api_url: apiUrl,\n      model_name: testModelName,\n      api_version: apiVersion,\n      deployment_name: deploymentName,\n    }),\n  });\n\n  return testResponse;\n};\n\n// We use a spread operation to merge properties from multiple objects into a single object.\n// Advanced embedding details may update default values.\n// Do NOT modify the order unless you are positive the new hierarchy is correct.\nexport const combineSearchSettings = (\n  selectedProvider: CloudEmbeddingProvider | HostedEmbeddingModel,\n  advancedEmbeddingDetails: AdvancedSearchConfiguration,\n  rerankingDetails: RerankingDetails,\n  provider_type: EmbeddingProvider | null,\n  switchover_type?: SwitchoverType\n): SavedSearchSettings => {\n  return {\n    ...selectedProvider,\n    ...advancedEmbeddingDetails,\n    ...rerankingDetails,\n    provider_type: provider_type,\n    switchover_type,\n  };\n};\n"
  },
  {
    "path": "web/src/app/admin/federated/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { notFound } from \"next/navigation\";\nimport { Loader2 } from \"lucide-react\";\nimport { useFederatedConnector } from \"./useFederatedConnector\";\nimport { FederatedConnectorForm } from \"@/components/admin/federated/FederatedConnectorForm\";\n\nexport default function EditFederatedConnectorPage(props: {\n  params: Promise<{ id: string }>;\n}) {\n  const [params, setParams] = useState<{ id: string } | null>(null);\n\n  useEffect(() => {\n    props.params.then(setParams);\n  }, [props.params]);\n\n  const { sourceType, connectorData, credentialSchema, isLoading, error } =\n    useFederatedConnector(params?.id ?? \"\");\n\n  if (isLoading) {\n    return (\n      <div className=\"flex justify-center w-full h-full\">\n        <div className=\"mt-12 w-full max-w-4xl mx-auto\">\n          <div className=\"flex flex-col items-center justify-center py-16\">\n            <Loader2 className=\"h-8 w-8 animate-spin text-blue-500 mb-4\" />\n            <div className=\"text-center\">\n              <p className=\"text-lg font-medium text-gray-700 mb-2\">\n                Loading connector configuration...\n              </p>\n              <p className=\"text-sm text-gray-500\">\n                Retrieving connector details and credential schema\n              </p>\n            </div>\n          </div>\n        </div>\n      </div>\n    );\n  }\n\n  if (error) {\n    return (\n      <div className=\"flex justify-center w-full h-full\">\n        <div className=\"mt-12 w-full max-w-4xl mx-auto\">\n          <div className=\"text-center\">\n            <h1 className=\"text-2xl font-bold text-red-600 mb-4\">Error</h1>\n            <p className=\"text-gray-600\">{error}</p>\n          </div>\n        </div>\n      </div>\n    );\n  }\n\n  if (!sourceType || !params) {\n    notFound();\n  }\n\n  const connectorId = parseInt(params.id);\n\n  return (\n    <div className=\"flex justify-center w-full h-full\">\n      <div className=\"mt-12 w-full max-w-4xl mx-auto\">\n        <FederatedConnectorForm\n          connector={sourceType}\n          connectorId={connectorId}\n          preloadedConnectorData={connectorData ?? undefined}\n          preloadedCredentialSchema={credentialSchema ?? undefined}\n        />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/federated/[id]/useFederatedConnector.ts",
    "content": "import { useState, useEffect } from \"react\";\nimport {\n  ConfigurableSources,\n  FederatedConnectorDetail,\n  CredentialSchemaResponse,\n} from \"@/lib/types\";\n\ninterface UseFederatedConnectorResult {\n  sourceType: ConfigurableSources | null;\n  connectorData: FederatedConnectorDetail | null;\n  credentialSchema: CredentialSchemaResponse | null;\n  isLoading: boolean;\n  error: string | null;\n}\n\nexport function useFederatedConnector(\n  connectorId: string\n): UseFederatedConnectorResult {\n  const [sourceType, setSourceType] = useState<ConfigurableSources | null>(\n    null\n  );\n  const [connectorData, setConnectorData] =\n    useState<FederatedConnectorDetail | null>(null);\n  const [credentialSchema, setCredentialSchema] =\n    useState<CredentialSchemaResponse | null>(null);\n  const [isLoading, setIsLoading] = useState(true);\n  const [error, setError] = useState<string | null>(null);\n\n  useEffect(() => {\n    const fetchData = async () => {\n      try {\n        setIsLoading(true);\n        setError(null);\n\n        // First, fetch connector details to get the source type\n        const connectorResponse = await fetch(`/api/federated/${connectorId}`);\n\n        if (!connectorResponse.ok) {\n          throw new Error(\n            `Failed to fetch connector: ${connectorResponse.statusText}`\n          );\n        }\n\n        const connectorData: FederatedConnectorDetail =\n          await connectorResponse.json();\n\n        // Extract source type from the federated source string (remove 'federated_' prefix)\n        const extractedSourceType = connectorData.source.replace(\n          /^federated_/,\n          \"\"\n        ) as ConfigurableSources;\n\n        // Now fetch credential schema and set state in parallel\n        const schemaPromise = fetch(\n          `/api/federated/sources/federated_${extractedSourceType}/credentials/schema`\n        );\n\n        // Set the data we already have\n        setConnectorData(connectorData);\n        setSourceType(extractedSourceType);\n\n        // Wait for schema fetch to complete\n        const schemaResponse = await schemaPromise;\n\n        if (!schemaResponse.ok) {\n          throw new Error(\n            `Failed to fetch schema: ${schemaResponse.statusText}`\n          );\n        }\n\n        const schemaData: CredentialSchemaResponse =\n          await schemaResponse.json();\n        setCredentialSchema(schemaData);\n      } catch (error) {\n        console.error(\"Error fetching federated connector data:\", error);\n        setError(`Failed to load connector: ${error}`);\n      } finally {\n        setIsLoading(false);\n      }\n    };\n\n    if (connectorId) {\n      fetchData();\n    }\n  }, [connectorId]);\n\n  return {\n    sourceType,\n    connectorData,\n    credentialSchema,\n    isLoading,\n    error,\n  };\n}\n"
  },
  {
    "path": "web/src/app/admin/groups/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { use } from \"react\";\nimport EditGroupPage from \"@/refresh-pages/admin/GroupsPage/EditGroupPage\";\n\nexport default function EditGroupRoute({\n  params,\n}: {\n  params: Promise<{ id: string }>;\n}) {\n  const { id } = use(params);\n  return <EditGroupPage groupId={Number(id)} />;\n}\n"
  },
  {
    "path": "web/src/app/admin/groups/create/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage/CreateGroupPage\";\n"
  },
  {
    "path": "web/src/app/admin/groups/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage\";\n"
  },
  {
    "path": "web/src/app/admin/groups2/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { use } from \"react\";\nimport EditGroupPage from \"@/refresh-pages/admin/GroupsPage/EditGroupPage\";\n\nexport default function EditGroupRoute({\n  params,\n}: {\n  params: Promise<{ id: string }>;\n}) {\n  const { id } = use(params);\n  const groupId = Number(id);\n  if (Number.isNaN(groupId)) {\n    return null;\n  }\n  return <EditGroupPage groupId={groupId} />;\n}\n"
  },
  {
    "path": "web/src/app/admin/groups2/create/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage/CreateGroupPage\";\n"
  },
  {
    "path": "web/src/app/admin/groups2/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage\";\n"
  },
  {
    "path": "web/src/app/admin/hooks/page.tsx",
    "content": "export { default } from \"@/ee/refresh-pages/admin/HooksPage\";\n"
  },
  {
    "path": "web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx",
    "content": "import React from \"react\";\nimport {\n  Table,\n  TableRow,\n  TableHead,\n  TableBody,\n  TableCell,\n  TableHeader,\n} from \"@/components/ui/table\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { CCPairStatus } from \"@/components/Status\";\nimport { timeAgo } from \"@/lib/time\";\nimport {\n  ValidSources,\n  ConnectorIndexingStatusLiteResponse,\n  SourceSummary,\n  ConnectorIndexingStatusLite,\n  FederatedConnectorStatus,\n} from \"@/lib/types\";\nimport type { Route } from \"next\";\nimport { useRouter } from \"next/navigation\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport {\n  FiChevronDown,\n  FiChevronRight,\n  FiLock,\n  FiUnlock,\n  FiRefreshCw,\n} from \"react-icons/fi\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { ConnectorCredentialPairStatus } from \"../../connector/[ccPairId]/types\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { ConnectorStaggeredSkeleton } from \"./ConnectorRowSkeleton\";\nimport { Button } from \"@opal/components\";\nimport { SvgSettings } from \"@opal/icons\";\n\n// Helper to handle navigation with cmd/ctrl+click support\n// NOTE: using this rather than Next/Link (or similar) since shadcn\n// table row components must be direct descendants of the table component\n// and putting the <Link> inside the <TableRow> would causes some parts of the\n// row to not navigate as expected.\nfunction navigateWithModifier(\n  e: React.MouseEvent,\n  url: string,\n  router: ReturnType<typeof useRouter>\n) {\n  if (e.metaKey || e.ctrlKey) {\n    window.open(url, \"_blank\");\n  } else {\n    router.push(url as Route);\n  }\n}\n\nfunction isFederatedConnectorStatus(\n  status: ConnectorIndexingStatusLite | FederatedConnectorStatus\n) {\n  return status.name?.toLowerCase().includes(\"federated\");\n}\n\nconst NUMBER_OF_ROWS_PER_PAGE = 10;\nconst NUMBER_OF_COLUMNS = 6;\n\nfunction SummaryRow({\n  source,\n  summary,\n  isOpen,\n  onToggle,\n}: {\n  source: ValidSources;\n  summary: SourceSummary;\n  isOpen: boolean;\n  onToggle: () => void;\n}) {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  return (\n    <TableRow\n      onClick={onToggle}\n      className=\"border-border dark:hover:bg-neutral-800 dark:border-neutral-700 group hover:bg-background-settings-hover/20 bg-background-sidebar py-4 rounded-sm !border cursor-pointer\"\n    >\n      <TableCell>\n        <div className=\"text-xl flex items-center truncate ellipsis gap-x-2 font-semibold\">\n          <div className=\"cursor-pointer\">\n            {isOpen ? (\n              <FiChevronDown size={20} />\n            ) : (\n              <FiChevronRight size={20} />\n            )}\n          </div>\n          <SourceIcon iconSize={20} sourceType={source} />\n          {getSourceDisplayName(source)}\n        </div>\n      </TableCell>\n\n      <TableCell>\n        <div className=\"text-sm text-neutral-500 dark:text-neutral-300\">\n          Total Connectors\n        </div>\n        <div className=\"text-xl font-semibold\">{summary.total_connectors}</div>\n      </TableCell>\n\n      <TableCell>\n        <div className=\"text-sm text-neutral-500 dark:text-neutral-300\">\n          Active Connectors\n        </div>\n        <p className=\"flex text-xl mx-auto font-semibold items-center text-lg mt-1\">\n          {summary.active_connectors}/{summary.total_connectors}\n        </p>\n      </TableCell>\n\n      {isPaidEnterpriseFeaturesEnabled && (\n        <TableCell>\n          <div className=\"text-sm text-neutral-500 dark:text-neutral-300\">\n            Public Connectors\n          </div>\n          <p className=\"flex text-xl mx-auto font-semibold items-center text-lg mt-1\">\n            {summary.public_connectors}/{summary.total_connectors}\n          </p>\n        </TableCell>\n      )}\n\n      <TableCell>\n        <div className=\"text-sm text-neutral-500 dark:text-neutral-300\">\n          Total Docs Indexed\n        </div>\n        <div className=\"text-xl font-semibold\">\n          {summary.total_docs_indexed.toLocaleString()}\n        </div>\n      </TableCell>\n\n      <TableCell />\n    </TableRow>\n  );\n}\n\nfunction ConnectorRow({\n  ccPairsIndexingStatus,\n  invisible,\n  isEditable,\n}: {\n  ccPairsIndexingStatus: ConnectorIndexingStatusLite;\n  invisible?: boolean;\n  isEditable: boolean;\n}) {\n  const router = useRouter();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const connectorUrl = `/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`;\n\n  const handleRowClick = (e: React.MouseEvent) => {\n    navigateWithModifier(e, connectorUrl, router);\n  };\n\n  return (\n    <TableRow\n      className={`\n  border border-border dark:border-neutral-700\n          hover:bg-accent-background ${\n            invisible\n              ? \"invisible !h-0 !-mb-10 !border-none\"\n              : \"!border border-border dark:border-neutral-700\"\n          }  w-full cursor-pointer relative `}\n      onClick={handleRowClick}\n    >\n      <TableCell className=\"\">\n        <Truncated>{ccPairsIndexingStatus.name}</Truncated>\n      </TableCell>\n      <TableCell>\n        {timeAgo(ccPairsIndexingStatus?.last_success) || \"-\"}\n      </TableCell>\n      <TableCell>\n        <CCPairStatus\n          ccPairStatus={\n            ccPairsIndexingStatus.last_finished_status !== null\n              ? ccPairsIndexingStatus.cc_pair_status\n              : ccPairsIndexingStatus.last_status == \"not_started\"\n                ? ConnectorCredentialPairStatus.SCHEDULED\n                : ConnectorCredentialPairStatus.INITIAL_INDEXING\n          }\n          inRepeatedErrorState={ccPairsIndexingStatus.in_repeated_error_state}\n          lastIndexAttemptStatus={ccPairsIndexingStatus.last_status}\n        />\n      </TableCell>\n      {isPaidEnterpriseFeaturesEnabled && (\n        <TableCell>\n          {ccPairsIndexingStatus.access_type === \"public\" ? (\n            <Badge variant={isEditable ? \"success\" : \"default\"} icon={FiUnlock}>\n              Organization Public\n            </Badge>\n          ) : ccPairsIndexingStatus.access_type === \"sync\" ? (\n            <Badge\n              variant={isEditable ? \"auto-sync\" : \"default\"}\n              icon={FiRefreshCw}\n            >\n              Inherited from{\" \"}\n              {getSourceDisplayName(ccPairsIndexingStatus.source)}\n            </Badge>\n          ) : (\n            <Badge variant={isEditable ? \"private\" : \"default\"} icon={FiLock}>\n              Private\n            </Badge>\n          )}\n        </TableCell>\n      )}\n      <TableCell>{ccPairsIndexingStatus.docs_indexed}</TableCell>\n      <TableCell>\n        {isEditable && (\n          <SimpleTooltip tooltip=\"Manage Connector\">\n            <Button icon={SvgSettings} prominence=\"tertiary\" />\n          </SimpleTooltip>\n        )}\n      </TableCell>\n    </TableRow>\n  );\n}\n\nfunction FederatedConnectorRow({\n  federatedConnector,\n  invisible,\n}: {\n  federatedConnector: FederatedConnectorStatus;\n  invisible?: boolean;\n}) {\n  const router = useRouter();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const federatedUrl = `/admin/federated/${federatedConnector.id}`;\n\n  const handleRowClick = (e: React.MouseEvent) => {\n    navigateWithModifier(e, federatedUrl, router);\n  };\n\n  return (\n    <TableRow\n      className={`\n  border border-border dark:border-neutral-700\n          hover:bg-accent-background ${\n            invisible\n              ? \"invisible !h-0 !-mb-10 !border-none\"\n              : \"!border border-border dark:border-neutral-700\"\n          }  w-full cursor-pointer relative `}\n      onClick={handleRowClick}\n    >\n      <TableCell className=\"\">\n        <Truncated>{federatedConnector.name}</Truncated>\n      </TableCell>\n      <TableCell>N/A</TableCell>\n      <TableCell>\n        <Badge variant=\"success\">Indexed</Badge>\n      </TableCell>\n      {isPaidEnterpriseFeaturesEnabled && (\n        <TableCell>\n          <Badge variant=\"secondary\" icon={FiRefreshCw}>\n            Federated Access\n          </Badge>\n        </TableCell>\n      )}\n      <TableCell>N/A</TableCell>\n      <TableCell>\n        <Button\n          icon={SvgSettings}\n          prominence=\"tertiary\"\n          onClick={(e: React.MouseEvent) => {\n            e.stopPropagation();\n            navigateWithModifier(e, federatedUrl, router);\n          }}\n          tooltip=\"Manage Federated Connector\"\n        />\n      </TableCell>\n    </TableRow>\n  );\n}\n\nexport function CCPairIndexingStatusTable({\n  ccPairsIndexingStatuses,\n  connectorsToggled,\n  toggleSource,\n  onPageChange,\n  sourceLoadingStates = {} as Record<ValidSources, boolean>,\n}: {\n  ccPairsIndexingStatuses: ConnectorIndexingStatusLiteResponse[];\n  connectorsToggled: Record<ValidSources, boolean>;\n  toggleSource: (source: ValidSources, toggled?: boolean | null) => void;\n  onPageChange: (source: ValidSources, newPage: number) => void;\n  sourceLoadingStates?: Record<ValidSources, boolean>;\n}) {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  return (\n    <Table className=\"-mt-8 table-fixed\">\n      <TableHeader>\n        <ConnectorRow\n          invisible\n          ccPairsIndexingStatus={{\n            cc_pair_id: 1,\n            name: \"Sample File Connector\",\n            cc_pair_status: ConnectorCredentialPairStatus.ACTIVE,\n            last_status: \"success\",\n            source: ValidSources.File,\n            access_type: \"public\",\n            docs_indexed: 1000,\n            last_success: \"2023-07-01T12:00:00Z\",\n            last_finished_status: \"success\",\n            is_editable: false,\n            in_repeated_error_state: false,\n            in_progress: false,\n            latest_index_attempt_docs_indexed: 0,\n          }}\n          isEditable={false}\n        />\n      </TableHeader>\n      <TableBody>\n        {ccPairsIndexingStatuses.map((ccPairStatus) => (\n          <React.Fragment key={ccPairStatus.source}>\n            <TableRow className=\"border-none\">\n              <TableCell\n                colSpan={\n                  isPaidEnterpriseFeaturesEnabled\n                    ? NUMBER_OF_COLUMNS\n                    : NUMBER_OF_COLUMNS - 1\n                }\n                className=\"h-4 p-0\"\n              />\n            </TableRow>\n            <SummaryRow\n              source={ccPairStatus.source}\n              summary={ccPairStatus.summary}\n              isOpen={connectorsToggled[ccPairStatus.source] || false}\n              onToggle={() => toggleSource(ccPairStatus.source)}\n            />\n            {connectorsToggled[ccPairStatus.source] && (\n              <>\n                {sourceLoadingStates[ccPairStatus.source] && (\n                  <ConnectorStaggeredSkeleton rowCount={8} height=\"h-[79px]\" />\n                )}\n                {!sourceLoadingStates[ccPairStatus.source] && (\n                  <>\n                    <TableRow className=\"border border-border dark:border-neutral-700\">\n                      <TableHead>Name</TableHead>\n                      <TableHead>Last Indexed</TableHead>\n                      <TableHead>Status</TableHead>\n                      {isPaidEnterpriseFeaturesEnabled && (\n                        <TableHead>Permissions / Access</TableHead>\n                      )}\n                      <TableHead>Total Docs</TableHead>\n                      <TableHead></TableHead>\n                    </TableRow>\n                    {ccPairStatus.indexing_statuses.map((indexingStatus) => {\n                      if (isFederatedConnectorStatus(indexingStatus)) {\n                        const status =\n                          indexingStatus as FederatedConnectorStatus;\n                        return (\n                          <FederatedConnectorRow\n                            key={status.id}\n                            federatedConnector={status}\n                          />\n                        );\n                      } else {\n                        const status =\n                          indexingStatus as ConnectorIndexingStatusLite;\n                        return (\n                          <ConnectorRow\n                            key={status.cc_pair_id}\n                            ccPairsIndexingStatus={status}\n                            isEditable={status.is_editable}\n                          />\n                        );\n                      }\n                    })}\n                    {/* Add dummy rows to reach 10 total rows for cleaner UI */}\n                    {ccPairStatus.indexing_statuses.length <\n                      NUMBER_OF_ROWS_PER_PAGE &&\n                      ccPairStatus.total_pages > 1 &&\n                      Array.from({\n                        length:\n                          NUMBER_OF_ROWS_PER_PAGE -\n                          ccPairStatus.indexing_statuses.length,\n                      }).map((_, index) => {\n                        const isLastDummyRow =\n                          index ===\n                          NUMBER_OF_ROWS_PER_PAGE -\n                            ccPairStatus.indexing_statuses.length -\n                            1;\n                        return (\n                          <TableRow\n                            key={`dummy-${ccPairStatus.source}-${index}`}\n                            className={\n                              isLastDummyRow\n                                ? \"border-l border-r border-b border-border dark:border-neutral-700\"\n                                : \"border-l border-r border-t-0 border-b-0 border-border dark:border-neutral-700\"\n                            }\n                            style={\n                              isLastDummyRow\n                                ? {\n                                    borderBottom: \"1px solid var(--border)\",\n                                    borderRight: \"1px solid var(--border)\",\n                                    borderLeft: \"1px solid var(--border)\",\n                                  }\n                                : {}\n                            }\n                          >\n                            {isLastDummyRow ? (\n                              <TableCell\n                                colSpan={\n                                  isPaidEnterpriseFeaturesEnabled\n                                    ? NUMBER_OF_COLUMNS\n                                    : NUMBER_OF_COLUMNS - 1\n                                }\n                                className=\"h-[56px] text-center text-sm text-gray-400 dark:text-gray-500 border-b border-r border-l border-border dark:border-neutral-700\"\n                              >\n                                <span className=\"italic\">\n                                  All caught up! No more connectors to show\n                                </span>\n                              </TableCell>\n                            ) : (\n                              <>\n                                <TableCell className=\"h-[56px]\"></TableCell>\n                                <TableCell></TableCell>\n                                <TableCell></TableCell>\n                                {isPaidEnterpriseFeaturesEnabled && (\n                                  <TableCell></TableCell>\n                                )}\n                                <TableCell></TableCell>\n                                <TableCell></TableCell>\n                              </>\n                            )}\n                          </TableRow>\n                        );\n                      })}\n                  </>\n                )}\n                {ccPairStatus.total_pages > 1 && (\n                  <TableRow className=\"border-l border-r border-b border-border dark:border-neutral-700\">\n                    <TableCell\n                      colSpan={\n                        isPaidEnterpriseFeaturesEnabled\n                          ? NUMBER_OF_COLUMNS\n                          : NUMBER_OF_COLUMNS - 1\n                      }\n                    >\n                      <div className=\"flex justify-center\">\n                        <PageSelector\n                          currentPage={ccPairStatus.current_page}\n                          totalPages={ccPairStatus.total_pages}\n                          onPageChange={(newPage) =>\n                            onPageChange(ccPairStatus.source, newPage)\n                          }\n                        />\n                      </div>\n                    </TableCell>\n                  </TableRow>\n                )}\n              </>\n            )}\n          </React.Fragment>\n        ))}\n      </TableBody>\n    </Table>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/indexing/status/ConnectorRowSkeleton.tsx",
    "content": "import React from \"react\";\nimport {\n  Table,\n  TableRow,\n  TableHead,\n  TableBody,\n  TableCell,\n  TableHeader,\n} from \"@/components/ui/table\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\n\n// Staggered loading animation skeleton with proper table column alignment\nexport function ConnectorStaggeredSkeleton({\n  rowCount = 5,\n  standalone = false,\n  height = \"h-20\",\n}: {\n  rowCount?: number;\n  standalone?: boolean; // if you want to show skeleton which is not in a table, set this to true\n  height?: string;\n}) {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const skeletonRows = [...Array(rowCount)].map((_, index) => (\n    <TableRow\n      key={index}\n      className={`border border-border dark:border-neutral-700 hover:bg-accent-background animate-pulse ${height}`}\n      style={{\n        animationDelay: `${index * 150}ms`,\n        animationDuration: \"1.5s\",\n      }}\n    >\n      {/* Connector Name */}\n      <TableCell>\n        <div className=\"flex items-center gap-2\">\n          <div className=\"h-5 w-5 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n          <div className=\"lg:w-[180px] xl:w-[350px] h-5 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n        </div>\n      </TableCell>\n\n      {/* Last Success */}\n      <TableCell>\n        <div className=\"flex flex-col gap-1\">\n          <div className=\"h-3 w-20 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n          <div className=\"h-4 w-16 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n        </div>\n      </TableCell>\n\n      {/* Status */}\n      <TableCell>\n        <div className=\"flex items-center gap-2\">\n          <div className=\"h-2 w-2 bg-neutral-200 dark:bg-neutral-700 rounded-full\"></div>\n          <div className=\"h-6 w-24 bg-neutral-200 dark:bg-neutral-700 rounded-full\"></div>\n        </div>\n      </TableCell>\n\n      {/* Access Type (Enterprise only) */}\n      {isPaidEnterpriseFeaturesEnabled && (\n        <TableCell>\n          <div className=\"flex items-center gap-2\">\n            <div className=\"h-4 w-4 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n            <div className=\"h-6 w-28 bg-neutral-200 dark:bg-neutral-700 rounded-full\"></div>\n          </div>\n        </TableCell>\n      )}\n\n      {/* Docs Indexed */}\n      <TableCell>\n        <div className=\"flex flex-col gap-1\">\n          <div className=\"h-3 w-8 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n          <div className=\"h-5 w-16 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n        </div>\n      </TableCell>\n\n      {/* Settings Icon */}\n      <TableCell>\n        <div className=\"flex items-center justify-center\">\n          <div className=\"h-5 w-5 bg-neutral-200 dark:bg-neutral-700 rounded\"></div>\n        </div>\n      </TableCell>\n    </TableRow>\n  ));\n\n  // If standalone, wrap in complete table structure\n  if (standalone) {\n    return (\n      <div className=\"w-full\">\n        <Table className=\"w-full\">\n          <TableBody>{skeletonRows}</TableBody>\n        </Table>\n      </div>\n    );\n  }\n\n  // If not standalone, just return the rows\n  return <>{skeletonRows}</>;\n}\n"
  },
  {
    "path": "web/src/app/admin/indexing/status/FilterComponent.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useImperativeHandle, forwardRef } from \"react\";\nimport {\n  DropdownMenu,\n  DropdownMenuContent,\n  DropdownMenuGroup,\n  DropdownMenuLabel,\n  DropdownMenuSeparator,\n  DropdownMenuTrigger,\n  DropdownMenuCheckboxItem,\n} from \"@/components/ui/dropdown-menu\";\nimport { Input } from \"@/components/ui/input\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { AccessType, ValidStatuses } from \"@/lib/types\";\nimport { Button } from \"@opal/components\";\nimport { SvgFilter } from \"@opal/icons\";\nexport interface FilterOptions {\n  accessType: AccessType[] | null;\n  docsCountFilter: {\n    operator: \">\" | \"<\" | \"=\" | null;\n    value: number | null;\n  };\n  lastStatus: ValidStatuses[] | null;\n}\n\ninterface FilterComponentProps {\n  onFilterChange: (filters: FilterOptions) => void;\n}\n\nexport const FilterComponent = forwardRef<\n  { resetFilters: () => void },\n  FilterComponentProps\n>(({ onFilterChange }, ref) => {\n  const [isOpen, setIsOpen] = useState(false);\n  const [filters, setFilters] = useState<FilterOptions>({\n    accessType: null,\n    docsCountFilter: {\n      operator: null,\n      value: null,\n    },\n    lastStatus: null,\n  });\n\n  // Local state for tracking selected filters before applying\n  const [docsOperator, setDocsOperator] = useState<\">\" | \"<\" | \"=\" | null>(\n    null\n  );\n  const [docsValue, setDocsValue] = useState<string>(\"\");\n  const [selectedAccessTypes, setSelectedAccessTypes] = useState<AccessType[]>(\n    []\n  );\n  const [selectedStatuses, setSelectedStatuses] = useState<ValidStatuses[]>([]);\n\n  // Expose resetFilters method via ref\n  useImperativeHandle(ref, () => ({\n    resetFilters: () => {\n      setDocsOperator(null);\n      setDocsValue(\"\");\n      setSelectedAccessTypes([]);\n      setSelectedStatuses([]);\n      setFilters({\n        accessType: null,\n        docsCountFilter: {\n          operator: null,\n          value: null,\n        },\n        lastStatus: null,\n      });\n    },\n  }));\n\n  const handleAccessTypeChange = (accessType: AccessType) => {\n    const newAccessTypes = selectedAccessTypes.includes(accessType)\n      ? selectedAccessTypes.filter((type) => type !== accessType)\n      : [...selectedAccessTypes, accessType];\n\n    setSelectedAccessTypes(newAccessTypes);\n  };\n\n  const handleStatusChange = (status: ValidStatuses) => {\n    const newStatuses = selectedStatuses.includes(status)\n      ? selectedStatuses.filter((s) => s !== status)\n      : [...selectedStatuses, status];\n\n    setSelectedStatuses(newStatuses);\n  };\n\n  const applyFilters = () => {\n    const newFilters = {\n      ...filters,\n      accessType: selectedAccessTypes.length > 0 ? selectedAccessTypes : null,\n      lastStatus: selectedStatuses.length > 0 ? selectedStatuses : null,\n      docsCountFilter: {\n        operator: docsOperator,\n        value: docsValue ? parseInt(docsValue) : null,\n      },\n    };\n\n    setFilters(newFilters);\n    onFilterChange(newFilters);\n    setIsOpen(false);\n  };\n\n  // Sync local state with filters when dropdown opens\n  const handleOpenChange = (open: boolean) => {\n    if (open) {\n      // When opening, initialize local state from current filters\n      setSelectedAccessTypes(filters.accessType || []);\n      setSelectedStatuses(filters.lastStatus || []);\n      setDocsOperator(filters.docsCountFilter.operator);\n      setDocsValue(\n        filters.docsCountFilter.value !== null\n          ? filters.docsCountFilter.value.toString()\n          : \"\"\n      );\n    }\n    setIsOpen(open);\n  };\n\n  const hasActiveFilters =\n    (filters.accessType && filters.accessType.length > 0) ||\n    (filters.lastStatus && filters.lastStatus.length > 0) ||\n    filters.docsCountFilter.operator !== null;\n\n  return (\n    <div className=\"relative\">\n      <DropdownMenu open={isOpen} onOpenChange={handleOpenChange}>\n        <DropdownMenuTrigger asChild>\n          <Button\n            icon={SvgFilter}\n            prominence=\"secondary\"\n            interaction={isOpen ? \"hover\" : \"rest\"}\n          />\n        </DropdownMenuTrigger>\n        <DropdownMenuContent\n          align=\"end\"\n          className=\"w-72\"\n          onCloseAutoFocus={(e) => e.preventDefault()}\n        >\n          <div className=\"flex items-center justify-between px-2 py-1.5\">\n            <DropdownMenuLabel className=\"text-base font-medium\">\n              Filter Connectors\n            </DropdownMenuLabel>\n          </div>\n          <DropdownMenuSeparator />\n\n          <DropdownMenuGroup>\n            <DropdownMenuLabel className=\"px-2 py-1.5 text-xs text-muted-foreground\">\n              Access Type\n            </DropdownMenuLabel>\n            <div onClick={(e) => e.stopPropagation()}>\n              <DropdownMenuCheckboxItem\n                checked={selectedAccessTypes.includes(\"public\")}\n                onCheckedChange={() => handleAccessTypeChange(\"public\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Public\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedAccessTypes.includes(\"private\")}\n                onCheckedChange={() => handleAccessTypeChange(\"private\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Private\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedAccessTypes.includes(\"sync\")}\n                onCheckedChange={() => handleAccessTypeChange(\"sync\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Auto-Sync\n              </DropdownMenuCheckboxItem>\n            </div>\n          </DropdownMenuGroup>\n\n          <DropdownMenuSeparator />\n\n          <DropdownMenuGroup>\n            <DropdownMenuLabel className=\"px-2 py-1.5 text-xs text-muted-foreground\">\n              Last Status\n            </DropdownMenuLabel>\n            <div onClick={(e) => e.stopPropagation()}>\n              <DropdownMenuCheckboxItem\n                checked={selectedStatuses.includes(\"success\")}\n                onCheckedChange={() => handleStatusChange(\"success\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Success\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedStatuses.includes(\"failed\")}\n                onCheckedChange={() => handleStatusChange(\"failed\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Failed\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedStatuses.includes(\"in_progress\")}\n                onCheckedChange={() => handleStatusChange(\"in_progress\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                In Progress\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedStatuses.includes(\"not_started\")}\n                onCheckedChange={() => handleStatusChange(\"not_started\")}\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Not Started\n              </DropdownMenuCheckboxItem>\n              <DropdownMenuCheckboxItem\n                checked={selectedStatuses.includes(\"completed_with_errors\")}\n                onCheckedChange={() =>\n                  handleStatusChange(\"completed_with_errors\")\n                }\n                className=\"flex items-center justify-between\"\n                onSelect={(e) => e.preventDefault()}\n              >\n                Completed with Errors\n              </DropdownMenuCheckboxItem>\n            </div>\n          </DropdownMenuGroup>\n\n          <DropdownMenuSeparator />\n\n          <DropdownMenuGroup>\n            <DropdownMenuLabel className=\"px-2 py-1.5 text-xs text-muted-foreground\">\n              Document Count\n            </DropdownMenuLabel>\n            <div\n              className=\"flex items-center px-2 py-2 gap-2\"\n              onClick={(e) => e.stopPropagation()}\n            >\n              <div className=\"flex gap-2\">\n                <Button\n                  prominence={docsOperator !== \">\" ? \"secondary\" : \"primary\"}\n                  onClick={(e) => {\n                    e.preventDefault();\n                    e.stopPropagation();\n                    setDocsOperator(docsOperator === \">\" ? null : \">\");\n                  }}\n                  type=\"button\"\n                >\n                  &gt;\n                </Button>\n                <Button\n                  prominence={docsOperator !== \"<\" ? \"secondary\" : \"primary\"}\n                  onClick={(e) => {\n                    e.preventDefault();\n                    e.stopPropagation();\n                    setDocsOperator(docsOperator === \"<\" ? null : \"<\");\n                  }}\n                  type=\"button\"\n                >\n                  &lt;\n                </Button>\n                <Button\n                  prominence={docsOperator !== \"=\" ? \"secondary\" : \"primary\"}\n                  onClick={(e) => {\n                    e.preventDefault();\n                    e.stopPropagation();\n                    setDocsOperator(docsOperator === \"=\" ? null : \"=\");\n                  }}\n                  type=\"button\"\n                >\n                  =\n                </Button>\n              </div>\n              <Input\n                type=\"number\"\n                placeholder=\"Count\"\n                value={docsValue}\n                onChange={(e) => setDocsValue(e.target.value)}\n                className=\"h-8 w-full\"\n                onClick={(e) => e.stopPropagation()}\n              />\n            </div>\n            <div className=\"px-2 py-1.5\">\n              <Button\n                width=\"full\"\n                onClick={(e) => {\n                  e.preventDefault();\n                  e.stopPropagation();\n                  applyFilters();\n                }}\n                type=\"button\"\n              >\n                Apply\n              </Button>\n            </div>\n          </DropdownMenuGroup>\n        </DropdownMenuContent>\n      </DropdownMenu>\n\n      {hasActiveFilters && (\n        <div className=\"absolute -top-1 -right-1\">\n          <Badge className=\"h-2 !bg-red-400 !border-red-400 w-2 p-0 border-2 flex items-center justify-center\" />\n        </div>\n      )}\n    </div>\n  );\n});\n\nFilterComponent.displayName = \"FilterComponent\";\n"
  },
  {
    "path": "web/src/app/admin/indexing/status/SearchAndFilterControls.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { FilterComponent, FilterOptions } from \"./FilterComponent\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\n\ninterface SearchAndFilterControlsProps {\n  searchQuery: string;\n  onSearchChange: (query: string) => void;\n  hasExpandedSources: boolean;\n  onExpandAll: () => void;\n  onCollapseAll: () => void;\n  filterOptions: FilterOptions;\n  onFilterChange: (filterOptions: FilterOptions) => void;\n  onClearFilters: () => void;\n  hasActiveFilters: boolean;\n  filterComponentRef: React.RefObject<{ resetFilters: () => void }>;\n  resetPagination: () => void;\n}\n\nexport function SearchAndFilterControls({\n  searchQuery,\n  onSearchChange,\n  hasExpandedSources,\n  onExpandAll,\n  onCollapseAll,\n  filterOptions,\n  onFilterChange,\n  onClearFilters,\n  hasActiveFilters,\n  filterComponentRef,\n  resetPagination,\n}: SearchAndFilterControlsProps) {\n  const [localSearchValue, setLocalSearchValue] = useState(searchQuery);\n\n  // Debounce the search query\n  useEffect(() => {\n    const timer = setTimeout(() => {\n      resetPagination();\n      onSearchChange(localSearchValue);\n    }, 300);\n\n    return () => clearTimeout(timer);\n  }, [localSearchValue, onSearchChange, resetPagination]);\n\n  // Sync with external searchQuery changes (e.g., when filters are cleared)\n  useEffect(() => {\n    setLocalSearchValue(searchQuery);\n  }, [searchQuery]);\n\n  return (\n    <div className=\"flex items-center gap-x-2\">\n      <InputTypeIn\n        placeholder=\"Search Connectors\"\n        type=\"text\"\n        value={localSearchValue}\n        onChange={(event) => setLocalSearchValue(event.target.value)}\n        className=\"w-96\"\n      />\n\n      <Button onClick={hasExpandedSources ? onCollapseAll : onExpandAll}>\n        {hasExpandedSources ? \"Collapse All\" : \"Expand All\"}\n      </Button>\n\n      <div className=\"flex items-center gap-2\">\n        <FilterComponent\n          onFilterChange={onFilterChange}\n          ref={filterComponentRef}\n        />\n\n        {hasActiveFilters && (\n          <div className=\"flex flex-none items-center gap-1 ml-2 max-w-[500px]\">\n            {filterOptions.accessType &&\n              filterOptions.accessType.length > 0 && (\n                <Badge variant=\"secondary\" className=\"px-2 py-0.5 text-xs\">\n                  Access: {filterOptions.accessType.join(\", \")}\n                </Badge>\n              )}\n\n            {filterOptions.lastStatus &&\n              filterOptions.lastStatus.length > 0 && (\n                <Badge variant=\"secondary\" className=\"px-2 py-0.5 text-xs\">\n                  Status:{\" \"}\n                  {filterOptions.lastStatus\n                    .map((s) => s.replace(/_/g, \" \"))\n                    .join(\", \")}\n                </Badge>\n              )}\n\n            {filterOptions.docsCountFilter.operator &&\n              filterOptions.docsCountFilter.value !== null && (\n                <Badge variant=\"secondary\" className=\"px-2 py-0.5 text-xs\">\n                  Docs {filterOptions.docsCountFilter.operator}{\" \"}\n                  {filterOptions.docsCountFilter.value}\n                </Badge>\n              )}\n\n            {filterOptions.docsCountFilter.operator &&\n              filterOptions.docsCountFilter.value === null && (\n                <Badge variant=\"secondary\" className=\"px-2 py-0.5 text-xs\">\n                  Docs {filterOptions.docsCountFilter.operator} any\n                </Badge>\n              )}\n\n            <Badge\n              variant=\"outline\"\n              className=\"px-2 py-0.5 text-xs border-red-400  bg-red-100 hover:border-red-600 cursor-pointer hover:bg-red-100 dark:hover:bg-red-900\"\n              onClick={onClearFilters}\n            >\n              <span className=\"text-red-500 dark:text-red-400\">Clear</span>\n            </Badge>\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/indexing/status/page.tsx",
    "content": "\"use client\";\n\nimport { CCPairIndexingStatusTable } from \"./CCPairIndexingStatusTable\";\nimport { SearchAndFilterControls } from \"./SearchAndFilterControls\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Link from \"next/link\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { useConnectorIndexingStatusWithPagination } from \"@/lib/hooks\";\nimport { useToastFromQuery } from \"@/hooks/useToast\";\nimport { Button } from \"@opal/components\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\nimport { useState, useRef, useMemo, RefObject } from \"react\";\nimport { FilterOptions } from \"./FilterComponent\";\nimport { ValidSources } from \"@/lib/types\";\nimport Cookies from \"js-cookie\";\nimport { TOGGLED_CONNECTORS_COOKIE_NAME } from \"@/lib/constants\";\nimport { ConnectorStaggeredSkeleton } from \"./ConnectorRowSkeleton\";\nimport { IndexingStatusRequest } from \"@/lib/types\";\n\nconst route = ADMIN_ROUTES.INDEXING_STATUS;\n\nfunction Main() {\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  // State for filter management\n  const [filterOptions, setFilterOptions] = useState<FilterOptions>({\n    accessType: null,\n    docsCountFilter: {\n      operator: null,\n      value: null,\n    },\n    lastStatus: null,\n  });\n\n  // State for search\n  const [searchQuery, setSearchQuery] = useState<string>(\"\");\n\n  // State for collapse/expand functionality\n  const [connectorsToggled, setConnectorsToggled] = useState<\n    Record<ValidSources, boolean>\n  >(() => {\n    const savedState = Cookies.get(TOGGLED_CONNECTORS_COOKIE_NAME);\n    return savedState ? JSON.parse(savedState) : {};\n  });\n\n  // Reference to the FilterComponent for resetting its state\n  const filterComponentRef = useRef<{\n    resetFilters: () => void;\n  }>(null);\n\n  // Convert filter options to API request format\n  const request: IndexingStatusRequest = useMemo(() => {\n    return {\n      secondary_index: false,\n      access_type_filters: filterOptions.accessType || [],\n      last_status_filters: filterOptions.lastStatus || [],\n      docs_count_operator: filterOptions.docsCountFilter.operator,\n      docs_count_value: filterOptions.docsCountFilter.value,\n      name_filter: searchQuery,\n    };\n  }, [filterOptions, searchQuery]);\n\n  // Use the paginated hook with filter request and 30-second refresh\n  const {\n    data: ccPairsIndexingStatuses,\n    isLoading: isLoadingCcPairsIndexingStatuses,\n    error: ccPairsIndexingStatusesError,\n    handlePageChange,\n    sourcePages,\n    sourceLoadingStates,\n    resetPagination,\n  } = useConnectorIndexingStatusWithPagination(request, 30000, vectorDbEnabled);\n\n  // Check if filters are active\n  const hasActiveFilters = useMemo(() => {\n    return (\n      (filterOptions.accessType && filterOptions.accessType.length > 0) ||\n      (filterOptions.lastStatus && filterOptions.lastStatus.length > 0) ||\n      filterOptions.docsCountFilter.operator !== null\n    );\n  }, [filterOptions]);\n\n  // Handle filter changes\n  const handleFilterChange = (newFilterOptions: FilterOptions) => {\n    setFilterOptions(newFilterOptions);\n    // Reset pagination when filters change\n    resetPagination();\n  };\n\n  // Toggle source expand/collapse functions\n  const toggleSource = (\n    source: ValidSources,\n    toggled: boolean | null = null\n  ) => {\n    const newConnectorsToggled = {\n      ...connectorsToggled,\n      [source]: toggled == null ? !connectorsToggled[source] : toggled,\n    };\n    setConnectorsToggled(newConnectorsToggled);\n    Cookies.set(\n      TOGGLED_CONNECTORS_COOKIE_NAME,\n      JSON.stringify(newConnectorsToggled)\n    );\n  };\n\n  const expandAll = () => {\n    if (!ccPairsIndexingStatuses) return;\n    const newConnectorsToggled = { ...connectorsToggled };\n    ccPairsIndexingStatuses.forEach((ccPairStatus) => {\n      newConnectorsToggled[ccPairStatus.source] = true;\n    });\n    setConnectorsToggled(newConnectorsToggled);\n    Cookies.set(\n      TOGGLED_CONNECTORS_COOKIE_NAME,\n      JSON.stringify(newConnectorsToggled)\n    );\n  };\n\n  const collapseAll = () => {\n    if (!ccPairsIndexingStatuses) return;\n    const newConnectorsToggled = { ...connectorsToggled };\n    ccPairsIndexingStatuses.forEach((ccPairStatus) => {\n      newConnectorsToggled[ccPairStatus.source] = false;\n    });\n    setConnectorsToggled(newConnectorsToggled);\n    Cookies.set(\n      TOGGLED_CONNECTORS_COOKIE_NAME,\n      JSON.stringify(newConnectorsToggled)\n    );\n  };\n\n  // Check if any sources are expanded\n  const hasExpandedSources =\n    ccPairsIndexingStatuses?.some(\n      (ccPairStatus) => connectorsToggled[ccPairStatus.source]\n    ) || false;\n\n  // Handler functions for the search and filter controls\n  const handleClearFilters = () => {\n    if (filterComponentRef.current) {\n      filterComponentRef.current.resetFilters();\n      setFilterOptions({\n        accessType: null,\n        docsCountFilter: {\n          operator: null,\n          value: null,\n        },\n        lastStatus: null,\n      });\n    }\n  };\n\n  if (ccPairsIndexingStatusesError) {\n    return (\n      <div className=\"text-error\">\n        {ccPairsIndexingStatusesError?.info?.detail ||\n          \"Error loading indexing status.\"}\n      </div>\n    );\n  }\n\n  return (\n    <div>\n      {/* Search bar and controls */}\n      <SearchAndFilterControls\n        searchQuery={searchQuery}\n        onSearchChange={setSearchQuery}\n        hasExpandedSources={hasExpandedSources}\n        onExpandAll={expandAll}\n        onCollapseAll={collapseAll}\n        filterOptions={filterOptions}\n        onFilterChange={handleFilterChange}\n        resetPagination={resetPagination}\n        onClearFilters={handleClearFilters}\n        hasActiveFilters={hasActiveFilters}\n        filterComponentRef={\n          filterComponentRef as RefObject<{ resetFilters: () => void }>\n        }\n      />\n\n      {/* Table component */}\n      {isLoadingCcPairsIndexingStatuses ? (\n        <div className=\"mt-12\">\n          <ConnectorStaggeredSkeleton rowCount={8} standalone={true} />\n        </div>\n      ) : !ccPairsIndexingStatuses || ccPairsIndexingStatuses.length === 0 ? (\n        <div>\n          <Spacer rem={3} />\n          <Text as=\"p\">\n            {markdown(\n              \"It looks like you don't have any connectors setup yet. Visit the [Add Connector](/admin/add-connector) page to get started!\"\n            )}\n          </Text>\n        </div>\n      ) : (\n        <CCPairIndexingStatusTable\n          ccPairsIndexingStatuses={ccPairsIndexingStatuses}\n          connectorsToggled={connectorsToggled}\n          toggleSource={toggleSource}\n          onPageChange={handlePageChange}\n          sourceLoadingStates={sourceLoadingStates}\n        />\n      )}\n    </div>\n  );\n}\n\nexport default function Status() {\n  useToastFromQuery({\n    \"connector-created\": {\n      message: \"Connector created successfully\",\n      type: \"success\",\n    },\n  });\n\n  return (\n    <SettingsLayouts.Root width=\"full\">\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        rightChildren={\n          <Button href=\"/admin/add-connector\">Add Connector</Button>\n        }\n        separator\n      />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/kg/KGEntityTypes.tsx",
    "content": "import { useEffect, useState } from \"react\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport Link from \"next/link\";\nimport { EntityType, SourceAndEntityTypeView } from \"@/app/admin/kg/interfaces\";\nimport CollapsibleCard from \"@/components/CollapsibleCard\";\nimport { ValidSources } from \"@/lib/types\";\nimport { FaCircleQuestion } from \"react-icons/fa6\";\nimport { CheckmarkIcon } from \"@/components/icons/icons\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { cn } from \"@/lib/utils\";\n\n// Utility: Convert capitalized snake case to human readable case\nfunction snakeToHumanReadable(str: string): string {\n  return (\n    str\n      .toLowerCase()\n      .replace(/_/g, \" \")\n      .replace(/\\b\\w/g, (match) => match.toUpperCase())\n      // # TODO (@raunakab)\n      // Special case to replace all instances of \"Pr\" with \"PR\".\n      // This is a *dumb* implementation. If there exists a string that starts with \"Pr\" (e.g., \"Prompt\"),\n      // then this line will stupidly convert it to \"PRompt\".\n      // Fix this later (or if this becomes a problem lol).\n      .replace(\"Pr\", \"PR\")\n  );\n}\n\n// Custom Header Component\nfunction TableHeader() {\n  return (\n    <div className=\"grid grid-cols-12 gap-y-4 px-8 p-4 border-b bg-background-tint-00\">\n      <div className=\"col-span-1\">\n        <Text as=\"p\">Entity Name</Text>\n      </div>\n      <div className=\"col-span-10\">\n        <Text as=\"p\">Description</Text>\n      </div>\n      <div className=\"col-span-1 flex flex-1 justify-center\">\n        <Text as=\"p\">Active</Text>\n      </div>\n    </div>\n  );\n}\n\n// Custom Row Component\nfunction TableRow({ entityType }: { entityType: EntityType }) {\n  const [entityTypeState, setEntityTypeState] = useState(entityType);\n  const [descriptionSavingState, setDescriptionSavingState] = useState<\n    \"saving\" | \"saved\" | \"failed\" | undefined\n  >(undefined);\n\n  const [timer, setTimer] = useState<NodeJS.Timeout | null>(null);\n  const [checkmarkVisible, setCheckmarkVisible] = useState(false);\n  const [hasMounted, setHasMounted] = useState(false);\n\n  const handleToggle = async (checked: boolean) => {\n    const response = await fetch(\"/api/admin/kg/entity-types\", {\n      method: \"PUT\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify([{ ...entityType, active: checked }]),\n    });\n\n    if (!response.ok) return;\n\n    setEntityTypeState({ ...entityTypeState, active: checked });\n  };\n\n  const handleDescriptionChange = async (description: string) => {\n    try {\n      const response = await fetch(\"/api/admin/kg/entity-types\", {\n        method: \"PUT\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify([{ ...entityType, description }]),\n      });\n      if (response.ok) {\n        setDescriptionSavingState(\"saved\");\n        setCheckmarkVisible(true);\n        setTimeout(() => setCheckmarkVisible(false), 1000);\n      } else {\n        setDescriptionSavingState(\"failed\");\n        setCheckmarkVisible(false);\n      }\n    } catch {\n      setDescriptionSavingState(\"failed\");\n      setCheckmarkVisible(false);\n    } finally {\n      setTimeout(() => setDescriptionSavingState(undefined), 1000);\n    }\n  };\n\n  useEffect(() => {\n    if (!hasMounted) {\n      setHasMounted(true);\n      return;\n    }\n    if (timer) clearTimeout(timer);\n    setTimer(\n      setTimeout(() => {\n        setDescriptionSavingState(\"saving\");\n        setCheckmarkVisible(false);\n        setTimer(\n          setTimeout(\n            () => handleDescriptionChange(entityTypeState.description),\n            500\n          )\n        );\n      }, 1000)\n    );\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [entityTypeState.description]);\n\n  return (\n    <div className=\"bg-background-tint-00\">\n      <div className=\"grid grid-cols-12 px-8 py-4\">\n        <div\n          className={cn(\n            \"grid grid-cols-11 col-span-11 transition-opacity duration-150 ease-in-out\",\n            !entityTypeState.active && \"opacity-60\"\n          )}\n        >\n          <div className=\"col-span-1 flex items-center\">\n            <Text as=\"p\">{snakeToHumanReadable(entityType.name)}</Text>\n          </div>\n          <div className=\"col-span-10 relative\">\n            <InputTypeIn\n              placeholder=\"Value\"\n              variant={!entityTypeState.active ? \"disabled\" : undefined}\n              className=\"w-full px-3 py-2 border\"\n              defaultValue={entityType.description}\n              onChange={(e) =>\n                setEntityTypeState({\n                  ...entityTypeState,\n                  description: e.target.value,\n                })\n              }\n              onKeyDown={async (e) => {\n                if (e.key === \"Enter\") {\n                  e.preventDefault();\n                  if (timer) {\n                    clearTimeout(timer);\n                    setTimer(null);\n                  }\n                  setDescriptionSavingState(\"saving\");\n                  setCheckmarkVisible(false);\n                  await handleDescriptionChange(\n                    (e.target as HTMLInputElement).value\n                  );\n                }\n              }}\n            />\n            <span\n              className=\"absolute right-3 top-1/2 -translate-y-1/2 w-5 h-5\"\n              style={{ pointerEvents: \"none\" }}\n            >\n              <span\n                className={cn(\n                  \"absolute inset-0 flex items-center justify-center transition-opacity duration-400 ease-in-out\",\n                  descriptionSavingState === \"saving\" && hasMounted\n                    ? \"opacity-100\"\n                    : \"opacity-0\"\n                )}\n                style={{ zIndex: 1 }}\n              >\n                <span className=\"inline-block w-4 h-4 align-middle border-2 border-theme-primary-04 border-t-transparent rounded-full animate-spin\" />\n              </span>\n              <span\n                className={cn(\n                  \"absolute inset-0 flex items-center justify-center transition-opacity duration-400 ease-in-out\",\n                  checkmarkVisible ? \"opacity-100\" : \"opacity-0\"\n                )}\n                style={{ zIndex: 2 }}\n              >\n                <CheckmarkIcon size={16} className=\"text-status-success-05\" />\n              </span>\n            </span>\n          </div>\n        </div>\n        <div className=\"grid col-span-1 items-center justify-center\">\n          <Switch\n            checked={entityTypeState.active}\n            onCheckedChange={handleToggle}\n          />\n        </div>\n      </div>\n    </div>\n  );\n}\n\ninterface KGEntityTypesProps {\n  sourceAndEntityTypes: SourceAndEntityTypeView;\n}\n\nexport default function KGEntityTypes({\n  sourceAndEntityTypes,\n}: KGEntityTypesProps) {\n  // State to control open/close of all CollapsibleCards\n  const [openCards, setOpenCards] = useState<{ [key: string]: boolean }>({});\n  // State for search query\n  const [search, setSearch] = useState(\"\");\n\n  // Initialize openCards state when data changes\n  useEffect(() => {\n    const initialState: { [key: string]: boolean } = {};\n    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {\n      initialState[key] = true;\n    });\n    setOpenCards(initialState);\n  }, [sourceAndEntityTypes]);\n\n  // Handlers for expand/collapse all\n  const handleExpandAll = () => {\n    const newState: { [key: string]: boolean } = {};\n    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {\n      newState[key] = true;\n    });\n    setOpenCards(newState);\n  };\n  const handleCollapseAll = () => {\n    const newState: { [key: string]: boolean } = {};\n    Object.keys(sourceAndEntityTypes.entity_types).forEach((key) => {\n      newState[key] = false;\n    });\n    setOpenCards(newState);\n  };\n\n  // Determine if all cards are closed\n  const allClosed = Object.values(openCards).every((v) => v === false);\n\n  return (\n    <div className=\"flex flex-col gap-y-4 w-full\">\n      <div className=\"flex flex-row items-center gap-x-1.5 mb-2\">\n        <InputTypeIn\n          placeholder=\"Search source type...\"\n          value={search}\n          onChange={(event) => setSearch(event.target.value)}\n        />\n        <Button onClick={allClosed ? handleExpandAll : handleCollapseAll}>\n          {allClosed ? \"Expand All\" : \"Collapse All\"}\n        </Button>\n      </div>\n      <div className=\"flex flex-col gap-y-4 w-full\">\n        {Object.entries(sourceAndEntityTypes.entity_types).length === 0 ? (\n          <div className=\"flex flex-col gap-y-4\">\n            <Text as=\"p\" text02>\n              No results available.\n            </Text>\n            <Text as=\"p\" text02>\n              To configure Knowledge Graph, first connect some{\" \"}\n              <Link\n                href=\"/admin/add-connector\"\n                className=\"underline text-action-link-01\"\n              >\n                Connectors.\n              </Link>\n            </Text>\n          </div>\n        ) : (\n          Object.entries(sourceAndEntityTypes.entity_types)\n            .filter(([key]) =>\n              snakeToHumanReadable(key)\n                .toLowerCase()\n                .includes(search.toLowerCase())\n            )\n            .sort(([keyA], [keyB]) => keyA.localeCompare(keyB))\n            .map(([key, entityTypesArr]) => {\n              const stats = sourceAndEntityTypes.source_statistics[key] ?? {\n                source_name: key,\n                last_updated: undefined,\n                entities_count: 0,\n              };\n              return (\n                <div key={key}>\n                  <CollapsibleCard\n                    className=\"focus:outline-none focus-visible:outline-none outline-none\"\n                    header={\n                      <span className=\"font-semibold text-lg flex flex-row gap-x-4 items-center\">\n                        {Object.values(ValidSources).includes(\n                          key as ValidSources\n                        ) ? (\n                          <SourceIcon\n                            sourceType={key as ValidSources}\n                            iconSize={25}\n                          />\n                        ) : (\n                          <FaCircleQuestion size={25} />\n                        )}\n                        {snakeToHumanReadable(key)}\n                        <span className=\"ml-auto flex flex-row gap-x-16 items-center pr-16\">\n                          <span className=\"flex flex-col items-start\">\n                            <Text as=\"p\" secondaryBody text02>\n                              Entities Count\n                            </Text>\n                            <Text as=\"p\">{stats.entities_count}</Text>\n                          </span>\n                          <span className=\"flex flex-col items-start\">\n                            <Text as=\"p\" secondaryBody text02>\n                              Last Updated\n                            </Text>\n                            <Text as=\"p\">\n                              {stats.last_updated\n                                ? new Date(stats.last_updated).toLocaleString()\n                                : \"N/A\"}\n                            </Text>\n                          </span>\n                        </span>\n                      </span>\n                    }\n                    // Use a key that changes with openCards[key] to force remount and update defaultOpen\n                    key={`${key}-${openCards[key]}`}\n                    defaultOpen={\n                      openCards[key] !== undefined ? openCards[key] : true\n                    }\n                  >\n                    <div className=\"w-full\">\n                      <TableHeader />\n                      {entityTypesArr.map(\n                        (entityType: EntityType, index: number) => (\n                          <TableRow key={index} entityType={entityType} />\n                        )\n                      )}\n                    </div>\n                  </CollapsibleCard>\n                </div>\n              );\n            })\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/kg/interfaces.ts",
    "content": "export type KGConfig = {\n  enabled: boolean;\n  vendor?: string | null;\n  vendor_domains?: string[] | null;\n  ignore_domains?: string[] | null;\n  coverage_start: Date;\n};\n\nexport type KGConfigRaw = {\n  enabled: boolean;\n  vendor?: string | null;\n  vendor_domains?: string[] | null;\n  ignore_domains?: string[] | null;\n  coverage_start: string;\n};\n\nexport type EntityTypeValues = { [key: string]: EntityType };\n\nexport type SourceAndEntityTypeView = {\n  source_statistics: Record<string, SourceStatistics>;\n  entity_types: Record<string, EntityType[]>;\n};\n\nexport type SourceStatistics = {\n  source_name: string;\n  last_updated: string;\n  entities_count: number;\n};\n\nexport type EntityType = {\n  name: string;\n  description: string;\n  active: boolean;\n  grounded_source_name: string;\n};\n"
  },
  {
    "path": "web/src/app/admin/kg/page.tsx",
    "content": "\"use client\";\n\nimport CardSection from \"@/components/admin/CardSection\";\nimport {\n  DatePickerField,\n  FieldLabel,\n  TextArrayField,\n  TextFormField,\n} from \"@/components/Field\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport SwitchField from \"@/refresh-components/form/SwitchField\";\nimport { Form, Formik, FormikState, useFormikContext } from \"formik\";\nimport { useState } from \"react\";\nimport * as Yup from \"yup\";\nimport {\n  KGConfig,\n  KGConfigRaw,\n  SourceAndEntityTypeView,\n} from \"@/app/admin/kg/interfaces\";\nimport { sanitizeKGConfig } from \"@/app/admin/kg/utils\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { toast } from \"@/hooks/useToast\";\nimport Title from \"@/components/ui/title\";\nimport { redirect } from \"next/navigation\";\nimport { useIsKGExposed } from \"@/app/admin/kg/utils\";\nimport KGEntityTypes from \"@/app/admin/kg/KGEntityTypes\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgSettings } from \"@opal/icons\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.KNOWLEDGE_GRAPH;\n\nfunction createDomainField(\n  name: string,\n  label: string,\n  subtext: string,\n  placeholder: string,\n  minFields?: number\n) {\n  return function DomainFields({ disabled = false }: { disabled?: boolean }) {\n    const { values } = useFormikContext<any>();\n\n    return (\n      <TextArrayField\n        name={name}\n        label={label}\n        subtext={subtext}\n        placeholder={placeholder}\n        minFields={minFields}\n        values={values}\n        disabled={disabled}\n      />\n    );\n  };\n}\n\nconst VendorDomains = createDomainField(\n  \"vendor_domains\",\n  \"Vendor Domains\",\n  \"Domain names of your company. Users with these email domains will be recognized as employees.\",\n  \"Domain\",\n  1\n);\n\nconst IgnoreDomains = createDomainField(\n  \"ignore_domains\",\n  \"Ignore Domains\",\n  \"Domain names to ignore. Users with these email domains will be excluded from the Knowledge Graph.\",\n  \"Domain\"\n);\n\nfunction KGConfiguration({\n  kgConfig,\n  onSubmitSuccess,\n  entityTypesMutate,\n}: {\n  kgConfig: KGConfig;\n  onSubmitSuccess?: () => void;\n  entityTypesMutate?: () => void;\n}) {\n  const initialValues: KGConfig = {\n    enabled: kgConfig.enabled,\n    vendor: kgConfig.vendor ?? \"\",\n    vendor_domains:\n      (kgConfig.vendor_domains?.length ?? 0) > 0\n        ? kgConfig.vendor_domains\n        : [\"\"],\n    ignore_domains: kgConfig.ignore_domains ?? [],\n    coverage_start: kgConfig.coverage_start,\n  };\n\n  const enabledSchema = Yup.object({\n    enabled: Yup.boolean().required(),\n    vendor: Yup.string().required(\"Vendor is required.\"),\n    vendor_domains: Yup.array(\n      Yup.string().required(\"Vendor Domain is required.\")\n    )\n      .min(1)\n      .required(),\n    ignore_domains: Yup.array(\n      Yup.string().required(\"Ignore Domain is required\")\n    )\n      .min(0)\n      .required(),\n    coverage_start: Yup.date().nullable(),\n  });\n\n  const disabledSchema = Yup.object({\n    enabled: Yup.boolean().required(),\n  });\n\n  const validationSchema = Yup.lazy((values) =>\n    values.enabled ? enabledSchema : disabledSchema\n  );\n\n  const onSubmit = async (\n    values: KGConfig,\n    {\n      resetForm,\n    }: {\n      resetForm: (nextState?: Partial<FormikState<KGConfig>>) => void;\n    }\n  ) => {\n    const { enabled, ...enableRequest } = values;\n    const body = enabled ? enableRequest : {};\n\n    const response = await fetch(\"/api/admin/kg/config\", {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify(body),\n    });\n\n    if (!response.ok) {\n      const errorMsg = (await response.json()).detail;\n      console.warn({ errorMsg });\n      toast.error(\"Failed to configure Knowledge Graph.\");\n      return;\n    }\n\n    toast.success(\"Successfully configured Knowledge Graph.\");\n    resetForm({ values });\n    onSubmitSuccess?.();\n\n    // Refresh entity types if KG was enabled\n    if (enabled && entityTypesMutate) {\n      entityTypesMutate();\n    }\n  };\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      onSubmit={onSubmit}\n    >\n      {(props) => (\n        <Form>\n          <div className=\"flex flex-col gap-y-6 w-full\">\n            <div className=\"flex flex-col gap-y-1\">\n              <FieldLabel\n                name=\"enabled\"\n                label=\"Enabled\"\n                subtext=\"Enable or disable Knowledge Graph.\"\n              />\n              <SwitchField\n                name=\"enabled\"\n                onCheckedChange={(state) => {\n                  if (!state) props.resetForm();\n                }}\n              />\n            </div>\n            <div\n              className={cn(\n                \"flex flex-col gap-y-6\",\n                !props.values.enabled && \"opacity-50\"\n              )}\n            >\n              <TextFormField\n                name=\"vendor\"\n                label=\"Vendor\"\n                subtext=\"Your company name.\"\n                className=\"flex flex-row flex-1 w-full\"\n                placeholder=\"My Company Inc.\"\n                disabled={!props.values.enabled}\n              />\n              <VendorDomains disabled={!props.values.enabled} />\n              <IgnoreDomains disabled={!props.values.enabled} />\n              <DatePickerField\n                name=\"coverage_start\"\n                label=\"Coverage Start\"\n                subtext=\"The start date of coverage for Knowledge Graph.\"\n                startYear={2025} // TODO: remove this after public beta\n                disabled={!props.values.enabled}\n              />\n            </div>\n            <Button disabled={!props.dirty} type=\"submit\">\n              Submit\n            </Button>\n          </div>\n        </Form>\n      )}\n    </Formik>\n  );\n}\n\nfunction Main() {\n  // Data:\n  const {\n    data: configData,\n    isLoading: configIsLoading,\n    mutate: configMutate,\n  } = useSWR<KGConfigRaw>(SWR_KEYS.kgConfig, errorHandlingFetcher);\n  const {\n    data: sourceAndEntityTypesData,\n    isLoading: entityTypesIsLoading,\n    mutate: entityTypesMutate,\n  } = useSWR<SourceAndEntityTypeView>(\n    SWR_KEYS.kgEntityTypes,\n    errorHandlingFetcher\n  );\n\n  // Local State:\n  const [configureModalShown, setConfigureModalShown] = useState(false);\n\n  if (\n    configIsLoading ||\n    entityTypesIsLoading ||\n    !configData ||\n    !sourceAndEntityTypesData\n  ) {\n    return <></>;\n  }\n\n  const kgConfig = sanitizeKGConfig(configData);\n\n  return (\n    <div className=\"flex flex-col py-4 gap-y-8\">\n      <CardSection className=\"max-w-2xl shadow-01 rounded-08 flex flex-col gap-2\">\n        <Text as=\"p\" headingH2>\n          Knowledge Graph Configuration (Private Beta)\n        </Text>\n        <div className=\"flex flex-col gap-y-6\">\n          <div>\n            <Text as=\"p\" text03>\n              The Knowledge Graph feature lets you explore your data in new\n              ways. Instead of searching through unstructured text, your data is\n              organized as entities and their relationships, enabling powerful\n              queries like:\n            </Text>\n            <div className=\"p-4\">\n              <Text as=\"p\" text03>\n                - &quot;Summarize my last 3 calls with account XYZ&quot;\n              </Text>\n              <Text as=\"p\" text03>\n                - &quot;How many open Jiras are assigned to John Smith, ranked\n                by priority&quot;\n              </Text>\n            </div>\n            <Text as=\"p\" text03>\n              (To use Knowledge Graph queries, you&apos;ll need a dedicated\n              Assistant configured in a specific way. Please contact the Onyx\n              team for setup instructions.)\n            </Text>\n          </div>\n          <Text as=\"p\" text03>\n            <Title>Getting Started:</Title>\n            Begin by configuring some high-level attributes, and then define the\n            entities you want to model afterwards.\n          </Text>\n          <Button\n            icon={SvgSettings}\n            onClick={() => setConfigureModalShown(true)}\n          >\n            Configure Knowledge Graph\n          </Button>\n        </div>\n      </CardSection>\n      {kgConfig.enabled && (\n        <>\n          <Text as=\"p\" headingH2>\n            Entity Types\n          </Text>\n          <KGEntityTypes sourceAndEntityTypes={sourceAndEntityTypesData} />\n        </>\n      )}\n      {configureModalShown && (\n        <Modal open onOpenChange={() => setConfigureModalShown(false)}>\n          <Modal.Content>\n            <Modal.Header\n              icon={SvgSettings}\n              title=\"Configure Knowledge Graph\"\n              onClose={() => setConfigureModalShown(false)}\n            />\n            <Modal.Body>\n              <KGConfiguration\n                kgConfig={kgConfig}\n                onSubmitSuccess={async () => {\n                  await configMutate();\n                  setConfigureModalShown(false);\n                }}\n                entityTypesMutate={entityTypesMutate}\n              />\n            </Modal.Body>\n          </Modal.Content>\n        </Modal>\n      )}\n    </div>\n  );\n}\n\nexport default function Page() {\n  const { kgExposed, isLoading } = useIsKGExposed();\n\n  if (isLoading) {\n    return <></>;\n  }\n\n  if (!kgExposed) {\n    redirect(\"/\");\n  }\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/kg/utils.ts",
    "content": "import { useUser } from \"@/providers/UserProvider\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { KGConfig, KGConfigRaw } from \"./interfaces\";\n\nexport type KgExposedStatus = { kgExposed: boolean; isLoading: boolean };\n\nexport function useIsKGExposed(): KgExposedStatus {\n  const { isAdmin } = useUser();\n  const { data: kgExposedRaw, isLoading } = useSWR<boolean>(\n    isAdmin ? SWR_KEYS.kgExposed : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      revalidateOnReconnect: false,\n    }\n  );\n  return { kgExposed: kgExposedRaw ?? false, isLoading };\n}\n\nexport function sanitizeKGConfig(raw: KGConfigRaw): KGConfig {\n  const coverage_start = new Date(raw.coverage_start);\n\n  return {\n    ...raw,\n    coverage_start,\n  };\n}\n"
  },
  {
    "path": "web/src/app/admin/layout.tsx",
    "content": "import Layout from \"@/components/admin/Layout\";\n\nexport interface AdminLayoutProps {\n  children: React.ReactNode;\n}\n\nexport default async function AdminLayout({ children }: AdminLayoutProps) {\n  return await Layout({ children });\n}\n"
  },
  {
    "path": "web/src/app/admin/scim/ScimModal.tsx",
    "content": "import { SvgDownload, SvgKey, SvgRefreshCw } from \"@opal/icons\";\nimport { Interactive, Hoverable } from \"@opal/core\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { toast } from \"@/hooks/useToast\";\nimport { downloadFile } from \"@/lib/download\";\n\nimport type { ScimModalView } from \"./interfaces\";\n\n// ---------------------------------------------------------------------------\n// Props\n// ---------------------------------------------------------------------------\n\ninterface ScimModalProps {\n  view: ScimModalView;\n  isSubmitting: boolean;\n  onRegenerate: () => void;\n  onClose: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nasync function copyToClipboard(text: string) {\n  try {\n    await navigator.clipboard.writeText(text);\n    toast.success(\"Token copied to clipboard\");\n  } catch {\n    toast.error(\"Failed to copy token\");\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function ScimModal({\n  view,\n  isSubmitting,\n  onRegenerate,\n  onClose,\n}: ScimModalProps) {\n  switch (view.kind) {\n    case \"regenerate\":\n      return (\n        <ConfirmationModalLayout\n          icon={SvgRefreshCw}\n          title=\"Regenerate SCIM Token\"\n          onClose={onClose}\n          submit={\n            <Button\n              disabled={isSubmitting}\n              variant=\"danger\"\n              onClick={onRegenerate}\n            >\n              Regenerate Token\n            </Button>\n          }\n        >\n          <Section alignItems=\"start\" gap={0.5}>\n            <Text as=\"p\" text03>\n              Your current SCIM token will be revoked and a new token will be\n              generated. You will need to update the token on your identity\n              provider before SCIM provisioning will resume.\n            </Text>\n          </Section>\n        </ConfirmationModalLayout>\n      );\n\n    case \"token\":\n      return (\n        <Modal open onOpenChange={(open) => !open && onClose()}>\n          <Modal.Content width=\"sm\">\n            <Modal.Header\n              icon={SvgKey}\n              title=\"SCIM Token\"\n              description=\"Save this key before continuing. It won't be shown again.\"\n              onClose={onClose}\n            />\n            <Modal.Body>\n              <Hoverable.Root group=\"token\">\n                <Interactive.Stateless\n                  onClick={() => copyToClipboard(view.rawToken)}\n                >\n                  <InputTextArea\n                    value={view.rawToken}\n                    readOnly\n                    autoResize\n                    resizable={false}\n                    rows={2}\n                    className=\"font-main-ui-mono break-all cursor-pointer [&_textarea]:cursor-pointer\"\n                    rightSection={\n                      <div onClick={(e) => e.stopPropagation()}>\n                        <Hoverable.Item\n                          group=\"token\"\n                          variant=\"opacity-on-hover\"\n                        >\n                          <CopyIconButton getCopyText={() => view.rawToken} />\n                        </Hoverable.Item>\n                      </div>\n                    }\n                  />\n                </Interactive.Stateless>\n              </Hoverable.Root>\n            </Modal.Body>\n            <Modal.Footer>\n              <BasicModalFooter\n                left={\n                  <Button\n                    prominence=\"secondary\"\n                    icon={SvgDownload}\n                    onClick={() =>\n                      downloadFile(`onyx-scim-token-${Date.now()}.txt`, {\n                        content: view.rawToken,\n                      })\n                    }\n                  >\n                    Download\n                  </Button>\n                }\n                submit={\n                  <Button\n                    autoFocus\n                    onClick={() => copyToClipboard(view.rawToken)}\n                  >\n                    Copy Token\n                  </Button>\n                }\n              />\n            </Modal.Footer>\n          </Modal.Content>\n        </Modal>\n      );\n  }\n}\n"
  },
  {
    "path": "web/src/app/admin/scim/ScimSyncCard.tsx",
    "content": "import { SvgCheckCircle, SvgClock, SvgKey, SvgRefreshCw } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { timeAgo } from \"@/lib/time\";\n\n// ---------------------------------------------------------------------------\n// Props\n// ---------------------------------------------------------------------------\n\ninterface ScimSyncCardProps {\n  hasToken: boolean;\n  isConnected: boolean;\n  lastUsedAt: string | null;\n  idpDomain: string | null;\n  isSubmitting: boolean;\n  onGenerate: () => void;\n  onRegenerate: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function ScimSyncCard({\n  hasToken,\n  isConnected,\n  lastUsedAt,\n  idpDomain,\n  isSubmitting,\n  onGenerate,\n  onRegenerate,\n}: ScimSyncCardProps) {\n  return (\n    <Card gap={0.75}>\n      <ContentAction\n        title=\"SCIM Sync\"\n        description=\"Connect your identity provider to import and sync users and groups.\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        paddingVariant=\"fit\"\n        rightChildren={\n          hasToken ? (\n            <Button\n              variant=\"danger\"\n              prominence=\"secondary\"\n              onClick={onRegenerate}\n              icon={SvgRefreshCw}\n            >\n              Regenerate Token\n            </Button>\n          ) : (\n            <Button\n              disabled={isSubmitting}\n              rightIcon={SvgKey}\n              onClick={onGenerate}\n            >\n              Generate SCIM Token\n            </Button>\n          )\n        }\n      />\n\n      {hasToken && (\n        <>\n          <Separator noPadding />\n\n          <Section\n            flexDirection=\"row\"\n            justifyContent=\"between\"\n            alignItems=\"end\"\n            gap={1}\n          >\n            <Section alignItems=\"start\" gap={0} width=\"fit\">\n              {isConnected ? (\n                <SvgCheckCircle size={15} className=\"text-status-success-05\" />\n              ) : (\n                <SvgClock size={15} className=\"text-theme-amber-05\" />\n              )}\n              <Text as=\"p\" mainUiBody text04>\n                {isConnected ? \"Connected\" : \"Waiting for Connection\"}\n              </Text>\n            </Section>\n\n            <Section alignItems=\"end\" gap={0} width=\"fit\">\n              {isConnected ? (\n                <>\n                  {idpDomain && (\n                    <Text as=\"p\" secondaryAction text03>\n                      {idpDomain}\n                    </Text>\n                  )}\n                  <Text as=\"p\" secondaryBody text03>\n                    {timeAgo(lastUsedAt)}\n                  </Text>\n                </>\n              ) : (\n                <Text\n                  as=\"p\"\n                  secondaryBody\n                  text03\n                  className=\"max-w-[240px] text-right\"\n                >\n                  Provide the SCIM key to your identity provider to begin\n                  syncing users and groups.\n                </Text>\n              )}\n            </Section>\n          </Section>\n        </>\n      )}\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/scim/interfaces.ts",
    "content": "export interface ScimTokenResponse {\n  id: number;\n  name: string;\n  token_display: string;\n  is_active: boolean;\n  created_at: string;\n  last_used_at: string | null;\n  idp_domain: string | null;\n}\n\nexport interface ScimTokenCreatedResponse extends ScimTokenResponse {\n  raw_token: string;\n}\n\nexport type ScimModalView =\n  | { kind: \"regenerate\" }\n  | { kind: \"token\"; rawToken: string };\n"
  },
  {
    "path": "web/src/app/admin/scim/page.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\n\nimport { SvgUserSync } from \"@opal/icons\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useScimToken } from \"@/hooks/useScimToken\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\n\nimport type { ScimTokenCreatedResponse, ScimModalView } from \"./interfaces\";\nimport { generateScimToken } from \"./svc\";\nimport ScimSyncCard from \"./ScimSyncCard\";\nimport ScimModal from \"./ScimModal\";\n\n// ---------------------------------------------------------------------------\n// SCIM Content\n// ---------------------------------------------------------------------------\n\nfunction ScimContent() {\n  const { data: token, error: tokenError, isLoading, mutate } = useScimToken();\n\n  const modal = useCreateModal();\n\n  const [modalView, setModalView] = useState<ScimModalView | null>(null);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  const hasToken = !!token;\n  const isConnected = hasToken && token.last_used_at !== null;\n\n  // 404 means no active token — not an error\n  const is404 =\n    tokenError &&\n    typeof tokenError === \"object\" &&\n    \"status\" in tokenError &&\n    (tokenError as { status: number }).status === 404;\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (tokenError && !is404) {\n    return (\n      <Text as=\"p\" text03>\n        Failed to load SCIM token status.\n      </Text>\n    );\n  }\n\n  // -----------------------------------------------------------------------\n  // Handlers\n  // -----------------------------------------------------------------------\n\n  function openModal(view: ScimModalView) {\n    setModalView(view);\n    modal.toggle(true);\n  }\n\n  function closeModal() {\n    modal.toggle(false);\n    setModalView(null);\n  }\n\n  async function handleCreateToken() {\n    setIsSubmitting(true);\n    try {\n      const response = await generateScimToken(\"default\");\n      if (!response.ok) {\n        let detail: string;\n        try {\n          const body = await response.clone().json();\n          detail = body.detail ?? JSON.stringify(body);\n        } catch {\n          detail = await response.text();\n        }\n        toast.error(`Failed to generate token: ${detail}`);\n        return;\n      }\n      const created: ScimTokenCreatedResponse = await response.json();\n      await mutate();\n      openModal({ kind: \"token\", rawToken: created.raw_token });\n      if (hasToken) toast.success(\"Token regenerated\");\n    } catch {\n      toast.error(\"Something went wrong. Please try again.\");\n    } finally {\n      setIsSubmitting(false);\n    }\n  }\n\n  // -----------------------------------------------------------------------\n  // Render\n  // -----------------------------------------------------------------------\n\n  return (\n    <>\n      <ScimSyncCard\n        hasToken={hasToken}\n        isConnected={isConnected}\n        lastUsedAt={token?.last_used_at ?? null}\n        idpDomain={token?.idp_domain ?? null}\n        isSubmitting={isSubmitting}\n        onGenerate={handleCreateToken}\n        onRegenerate={() => openModal({ kind: \"regenerate\" })}\n      />\n\n      {modal.isOpen && modalView && (\n        <modal.Provider>\n          <ScimModal\n            view={modalView}\n            isSubmitting={isSubmitting}\n            onRegenerate={handleCreateToken}\n            onClose={closeModal}\n          />\n        </modal.Provider>\n      )}\n    </>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgUserSync}\n        title=\"SCIM\"\n        description=\"Sync users and groups via System for Cross-domain Identity Management (SCIM) protocol.\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <ScimContent />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/scim/svc.ts",
    "content": "export async function generateScimToken(name: string) {\n  return fetch(\"/api/admin/enterprise-settings/scim/token\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ name }),\n  });\n}\n"
  },
  {
    "path": "web/src/app/admin/service-accounts/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/ServiceAccountsPage\";\n"
  },
  {
    "path": "web/src/app/admin/systeminfo/page.tsx",
    "content": "import { NotebookIcon } from \"@/components/icons/icons\";\nimport { getWebVersion, getBackendVersion } from \"@/lib/version\";\n\nconst Page = async () => {\n  let web_version: string | null = null;\n  let backend_version: string | null = null;\n  try {\n    [web_version, backend_version] = await Promise.all([\n      getWebVersion(),\n      getBackendVersion(),\n    ]);\n  } catch (e) {\n    console.log(`Version info fetch failed for system info page - ${e}`);\n  }\n\n  return (\n    <div>\n      <div className=\"border-solid border-background-600 border-b pb-2 mb-4 flex\">\n        <NotebookIcon size={32} />\n        <h1 className=\"text-3xl font-bold pl-2\">Version</h1>\n      </div>\n\n      <div>\n        <div className=\"flex mb-2\">\n          <p className=\"my-auto mr-1\">Backend Version: </p>\n          <p className=\"text-base my-auto text-slate-400 italic\">\n            {backend_version}\n          </p>\n        </div>\n        <div className=\"flex mb-2\">\n          <p className=\"my-auto mr-1\">Web Version: </p>\n          <p className=\"text-base my-auto text-slate-400 italic\">\n            {web_version}\n          </p>\n        </div>\n      </div>\n    </div>\n  );\n};\n\nexport default Page;\n"
  },
  {
    "path": "web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx",
    "content": "\"use client\";\n\nimport * as Yup from \"yup\";\nimport { Button } from \"@opal/components\";\nimport { useEffect, useState } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Form, Formik } from \"formik\";\nimport { SelectorFormField, TextFormField } from \"@/components/Field\";\nimport { UserGroup } from \"@/lib/types\";\nimport { Scope } from \"./types\";\nimport { toast } from \"@/hooks/useToast\";\nimport { SvgSettings } from \"@opal/icons\";\ninterface CreateRateLimitModalProps {\n  isOpen: boolean;\n  setIsOpen: (isOpen: boolean) => void;\n  onSubmit: (\n    target_scope: Scope,\n    period_hours: number,\n    token_budget: number,\n    group_id: number\n  ) => void;\n  forSpecificScope?: Scope;\n  forSpecificUserGroup?: number;\n}\n\nexport default function CreateRateLimitModal({\n  isOpen,\n  setIsOpen,\n  onSubmit,\n  forSpecificScope,\n  forSpecificUserGroup,\n}: CreateRateLimitModalProps) {\n  const [modalUserGroups, setModalUserGroups] = useState([]);\n  const [shouldFetchUserGroups, setShouldFetchUserGroups] = useState(\n    forSpecificScope === Scope.USER_GROUP\n  );\n\n  useEffect(() => {\n    const fetchData = async () => {\n      try {\n        const response = await fetch(\"/api/manage/admin/user-group\");\n        const data = await response.json();\n        const options = data.map((userGroup: UserGroup) => ({\n          name: userGroup.name,\n          value: userGroup.id,\n        }));\n        setModalUserGroups(options);\n        setShouldFetchUserGroups(false);\n      } catch (error) {\n        toast.error(`Failed to fetch user groups: ${error}`);\n      }\n    };\n\n    if (shouldFetchUserGroups) {\n      fetchData();\n    }\n  }, [shouldFetchUserGroups]);\n\n  return (\n    <Modal open={isOpen} onOpenChange={() => setIsOpen(false)}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgSettings}\n          title=\"Create a Token Rate Limit\"\n          onClose={() => setIsOpen(false)}\n        />\n        <Modal.Body>\n          <Formik\n            initialValues={{\n              enabled: true,\n              period_hours: \"\",\n              token_budget: \"\",\n              target_scope: forSpecificScope || Scope.GLOBAL,\n              user_group_id: forSpecificUserGroup,\n            }}\n            validationSchema={Yup.object().shape({\n              period_hours: Yup.number()\n                .required(\"Time Window is a required field\")\n                .min(1, \"Time Window must be at least 1 hour\"),\n              token_budget: Yup.number()\n                .required(\"Token Budget is a required field\")\n                .min(1, \"Token Budget must be at least 1\"),\n              target_scope: Yup.string().required(\n                \"Target Scope is a required field\"\n              ),\n              user_group_id: Yup.string().test(\n                \"user_group_id\",\n                \"User Group is a required field\",\n                (value, context) => {\n                  return (\n                    context.parent.target_scope !== \"user_group\" ||\n                    (context.parent.target_scope === \"user_group\" &&\n                      value !== undefined)\n                  );\n                }\n              ),\n            })}\n            onSubmit={async (values, formikHelpers) => {\n              formikHelpers.setSubmitting(true);\n              onSubmit(\n                values.target_scope,\n                Number(values.period_hours),\n                Number(values.token_budget),\n                Number(values.user_group_id)\n              );\n              return formikHelpers.setSubmitting(false);\n            }}\n          >\n            {({ isSubmitting, values, setFieldValue }) => (\n              <Form className=\"overflow-visible px-2\">\n                {!forSpecificScope && (\n                  <SelectorFormField\n                    name=\"target_scope\"\n                    label=\"Target Scope\"\n                    options={[\n                      { name: \"Global\", value: Scope.GLOBAL },\n                      { name: \"User\", value: Scope.USER },\n                      { name: \"User Group\", value: Scope.USER_GROUP },\n                    ]}\n                    includeDefault={false}\n                    onSelect={(selected) => {\n                      setFieldValue(\"target_scope\", selected);\n                      if (selected === Scope.USER_GROUP) {\n                        setShouldFetchUserGroups(true);\n                      }\n                    }}\n                  />\n                )}\n                {forSpecificUserGroup === undefined &&\n                  values.target_scope === Scope.USER_GROUP && (\n                    <SelectorFormField\n                      name=\"user_group_id\"\n                      label=\"User Group\"\n                      options={modalUserGroups}\n                      includeDefault={false}\n                    />\n                  )}\n                <TextFormField\n                  name=\"period_hours\"\n                  label=\"Time Window (Hours)\"\n                  type=\"number\"\n                  placeholder=\"\"\n                />\n                <TextFormField\n                  name=\"token_budget\"\n                  label=\"Token Budget (Thousands)\"\n                  type=\"number\"\n                  placeholder=\"\"\n                />\n                <Button disabled={isSubmitting} type=\"submit\">\n                  Create\n                </Button>\n              </Form>\n            )}\n          </Formik>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx",
    "content": "\"use client\";\n\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport Title from \"@/components/ui/title\";\nimport { DeleteButton } from \"@/components/DeleteButton\";\nimport { deleteTokenRateLimit, updateTokenRateLimit } from \"./lib\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { TokenRateLimitDisplay } from \"./types\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR, { mutate } from \"swr\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport { Text } from \"@opal/components\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\ntype TokenRateLimitTableArgs = {\n  tokenRateLimits: TokenRateLimitDisplay[];\n  title?: string;\n  description?: string;\n  fetchUrl: string;\n  hideHeading?: boolean;\n  isAdmin: boolean;\n};\n\nexport const TokenRateLimitTable = ({\n  tokenRateLimits,\n  title,\n  description,\n  fetchUrl,\n  hideHeading,\n  isAdmin,\n}: TokenRateLimitTableArgs) => {\n  const shouldRenderGroupName = () =>\n    tokenRateLimits.length > 0 &&\n    tokenRateLimits[0] !== undefined &&\n    tokenRateLimits[0].group_name !== undefined;\n\n  const handleEnabledChange = (id: number) => {\n    const tokenRateLimit = tokenRateLimits.find(\n      (tokenRateLimit) => tokenRateLimit.token_id === id\n    );\n\n    if (!tokenRateLimit) {\n      return;\n    }\n\n    updateTokenRateLimit(id, {\n      token_budget: tokenRateLimit.token_budget,\n      period_hours: tokenRateLimit.period_hours,\n      enabled: !tokenRateLimit.enabled,\n    }).then(() => {\n      mutate(fetchUrl);\n    });\n  };\n\n  const handleDelete = (id: number) =>\n    deleteTokenRateLimit(id).then(() => {\n      mutate(fetchUrl);\n    });\n\n  if (tokenRateLimits.length === 0) {\n    return (\n      <div className=\"w-full\">\n        {!hideHeading && title && <Title>{title}</Title>}\n        {!hideHeading && description && (\n          <>\n            <Spacer rem={0.5} />\n            <Text as=\"p\">{description}</Text>\n            <Spacer rem={0.5} />\n          </>\n        )}\n        {!hideHeading && <Spacer rem={2} />}\n        <Text as=\"p\">No token rate limits set!</Text>\n        {!hideHeading && <Spacer rem={2} />}\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"w-full\">\n      {!hideHeading && title && <Title>{title}</Title>}\n      {!hideHeading && description && (\n        <>\n          <Spacer rem={0.5} />\n          <Text as=\"p\">{description}</Text>\n          <Spacer rem={0.5} />\n        </>\n      )}\n      <Table\n        className={`overflow-visible ${\n          !hideHeading && \"my-8\"\n        } [&_td]:text-center [&_th]:text-center`}\n      >\n        <TableHeader>\n          <TableRow>\n            <TableHead>Enabled</TableHead>\n            {shouldRenderGroupName() && <TableHead>Group Name</TableHead>}\n            <TableHead>Time Window (Hours)</TableHead>\n            <TableHead>Token Budget (Thousands)</TableHead>\n            {isAdmin && <TableHead>Delete</TableHead>}\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {tokenRateLimits.map((tokenRateLimit) => {\n            return (\n              <TableRow key={tokenRateLimit.token_id}>\n                <TableCell>\n                  <div className=\"flex justify-center\">\n                    <div\n                      onClick={\n                        isAdmin\n                          ? () => handleEnabledChange(tokenRateLimit.token_id)\n                          : undefined\n                      }\n                      className={`px-1 py-0.5 rounded select-none w-24 ${\n                        isAdmin\n                          ? \"hover:bg-accent-background cursor-pointer\"\n                          : \"opacity-50\"\n                      }`}\n                    >\n                      <div className=\"flex items-center justify-center\">\n                        <Checkbox\n                          checked={tokenRateLimit.enabled}\n                          onCheckedChange={\n                            isAdmin\n                              ? () =>\n                                  handleEnabledChange(tokenRateLimit.token_id)\n                              : undefined\n                          }\n                        />\n                        <p className=\"ml-2\">\n                          {tokenRateLimit.enabled ? \"Enabled\" : \"Disabled\"}\n                        </p>\n                      </div>\n                    </div>\n                  </div>\n                </TableCell>\n                {shouldRenderGroupName() && (\n                  <TableCell className=\"font-bold text-text-darker\">\n                    {tokenRateLimit.group_name}\n                  </TableCell>\n                )}\n                <TableCell>\n                  {tokenRateLimit.period_hours +\n                    \" hour\" +\n                    (tokenRateLimit.period_hours > 1 ? \"s\" : \"\")}\n                </TableCell>\n                <TableCell>\n                  {tokenRateLimit.token_budget + \" thousand tokens\"}\n                </TableCell>\n                {isAdmin && (\n                  <TableCell>\n                    <div className=\"flex justify-center\">\n                      <DeleteButton\n                        onClick={() => handleDelete(tokenRateLimit.token_id)}\n                      />\n                    </div>\n                  </TableCell>\n                )}\n              </TableRow>\n            );\n          })}\n        </TableBody>\n      </Table>\n    </div>\n  );\n};\n\nexport const GenericTokenRateLimitTable = ({\n  fetchUrl,\n  title,\n  description,\n  hideHeading,\n  responseMapper,\n  isAdmin = true,\n}: {\n  fetchUrl: string;\n  title?: string;\n  description?: string;\n  hideHeading?: boolean;\n  responseMapper?: (data: any) => TokenRateLimitDisplay[];\n  isAdmin?: boolean;\n}) => {\n  const { data, isLoading, error } = useSWR<TokenRateLimitDisplay[]>(\n    fetchUrl,\n    errorHandlingFetcher\n  );\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (!isLoading && error) {\n    return <Text as=\"p\">Failed to load token rate limits</Text>;\n  }\n\n  let processedData = data;\n  if (responseMapper) {\n    processedData = responseMapper(data);\n  }\n\n  return (\n    <TokenRateLimitTable\n      tokenRateLimits={processedData ?? []}\n      fetchUrl={fetchUrl}\n      title={title}\n      description={description}\n      hideHeading={hideHeading}\n      isAdmin={isAdmin}\n    />\n  );\n};\n"
  },
  {
    "path": "web/src/app/admin/token-rate-limits/lib.ts",
    "content": "import { TokenRateLimitArgs } from \"./types\";\n\nconst API_PREFIX = \"/api/admin/token-rate-limits\";\n\n// Global Token Limits\nexport const insertGlobalTokenRateLimit = async (\n  tokenRateLimit: TokenRateLimitArgs\n) => {\n  return await fetch(`${API_PREFIX}/global`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(tokenRateLimit),\n  });\n};\n\n// User Token Limits\nexport const insertUserTokenRateLimit = async (\n  tokenRateLimit: TokenRateLimitArgs\n) => {\n  return await fetch(`${API_PREFIX}/users`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(tokenRateLimit),\n  });\n};\n\n// User Group Token Limits (EE Only)\nexport const insertGroupTokenRateLimit = async (\n  tokenRateLimit: TokenRateLimitArgs,\n  group_id: number\n) => {\n  return await fetch(`${API_PREFIX}/user-group/${group_id}`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(tokenRateLimit),\n  });\n};\n\n// Common Endpoints\n\nexport const deleteTokenRateLimit = async (token_rate_limit_id: number) => {\n  return await fetch(`${API_PREFIX}/rate-limit/${token_rate_limit_id}`, {\n    method: \"DELETE\",\n  });\n};\n\nexport const updateTokenRateLimit = async (\n  token_rate_limit_id: number,\n  tokenRateLimit: TokenRateLimitArgs\n) => {\n  return await fetch(`${API_PREFIX}/rate-limit/${token_rate_limit_id}`, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(tokenRateLimit),\n  });\n};\n"
  },
  {
    "path": "web/src/app/admin/token-rate-limits/page.tsx",
    "content": "\"use client\";\n\nimport SimpleTabs from \"@/refresh-components/SimpleTabs\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Text } from \"@opal/components\";\nimport { useState } from \"react\";\nimport {\n  insertGlobalTokenRateLimit,\n  insertGroupTokenRateLimit,\n  insertUserTokenRateLimit,\n} from \"./lib\";\nimport { Scope, TokenRateLimit } from \"./types\";\nimport { GenericTokenRateLimitTable } from \"./TokenRateLimitTables\";\nimport { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { toast } from \"@/hooks/useToast\";\nimport CreateRateLimitModal from \"./CreateRateLimitModal\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { SvgGlobe, SvgUser, SvgUsers } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.TOKEN_RATE_LIMITS;\nconst GLOBAL_TOKEN_FETCH_URL = SWR_KEYS.globalTokenRateLimits;\nconst USER_TOKEN_FETCH_URL = SWR_KEYS.userTokenRateLimits;\nconst USER_GROUP_FETCH_URL = SWR_KEYS.userGroupTokenRateLimits;\n\nconst GLOBAL_DESCRIPTION =\n  \"Global rate limits apply to all users, user groups, and API keys. When the global \\\n  rate limit is reached, no more tokens can be spent.\";\nconst USER_DESCRIPTION =\n  \"User rate limits apply to individual users. When a user reaches a limit, they will \\\n  be temporarily blocked from spending tokens.\";\nconst USER_GROUP_DESCRIPTION =\n  \"User group rate limits apply to all users in a group. When a group reaches a limit, \\\n  all users in the group will be temporarily blocked from spending tokens, regardless \\\n  of their individual limits. If a user is in multiple groups, the most lenient limit \\\n  will apply.\";\n\nconst handleCreateTokenRateLimit = async (\n  target_scope: Scope,\n  period_hours: number,\n  token_budget: number,\n  group_id: number = -1\n) => {\n  const tokenRateLimitArgs = {\n    enabled: true,\n    token_budget: token_budget,\n    period_hours: period_hours,\n  };\n\n  if (target_scope === Scope.GLOBAL) {\n    return await insertGlobalTokenRateLimit(tokenRateLimitArgs);\n  } else if (target_scope === Scope.USER) {\n    return await insertUserTokenRateLimit(tokenRateLimitArgs);\n  } else if (target_scope === Scope.USER_GROUP) {\n    return await insertGroupTokenRateLimit(tokenRateLimitArgs, group_id);\n  } else {\n    throw new Error(`Invalid target_scope: ${target_scope}`);\n  }\n};\n\nfunction Main() {\n  const [tabIndex, setTabIndex] = useState(0);\n  const [modalIsOpen, setModalIsOpen] = useState(false);\n\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const updateTable = (target_scope: Scope) => {\n    if (target_scope === Scope.GLOBAL) {\n      mutate(GLOBAL_TOKEN_FETCH_URL);\n      setTabIndex(0);\n    } else if (target_scope === Scope.USER) {\n      mutate(USER_TOKEN_FETCH_URL);\n      setTabIndex(1);\n    } else if (target_scope === Scope.USER_GROUP) {\n      mutate(USER_GROUP_FETCH_URL);\n      setTabIndex(2);\n    }\n  };\n\n  const handleSubmit = (\n    target_scope: Scope,\n    period_hours: number,\n    token_budget: number,\n    group_id: number = -1\n  ) => {\n    handleCreateTokenRateLimit(\n      target_scope,\n      period_hours,\n      token_budget,\n      group_id\n    )\n      .then(() => {\n        setModalIsOpen(false);\n        toast.success(\"Token rate limit created!\");\n        updateTable(target_scope);\n      })\n      .catch((error) => {\n        toast.error(error.message);\n      });\n  };\n\n  return (\n    <Section alignItems=\"stretch\" justifyContent=\"start\" height=\"auto\">\n      <Text as=\"p\">\n        Token rate limits enable you control how many tokens can be spent in a\n        given time period. With token rate limits, you can:\n      </Text>\n\n      <ul className=\"list-disc ml-4\">\n        <li>\n          <Text as=\"p\">\n            Set a global rate limit to control your team&apos;s overall token\n            spend.\n          </Text>\n        </li>\n        {isPaidEnterpriseFeaturesEnabled && (\n          <>\n            <li>\n              <Text as=\"p\">\n                Set rate limits for users to ensure that no single user can\n                spend too many tokens.\n              </Text>\n            </li>\n            <li>\n              <Text as=\"p\">\n                Set rate limits for user groups to control token spend for your\n                teams.\n              </Text>\n            </li>\n          </>\n        )}\n        <li>\n          <Text as=\"p\">Enable and disable rate limits on the fly.</Text>\n        </li>\n      </ul>\n\n      <CreateButton onClick={() => setModalIsOpen(true)}>\n        Create a Token Rate Limit\n      </CreateButton>\n\n      {isPaidEnterpriseFeaturesEnabled ? (\n        <SimpleTabs\n          tabs={{\n            \"0\": {\n              name: \"Global\",\n              icon: SvgGlobe,\n              content: (\n                <GenericTokenRateLimitTable\n                  fetchUrl={GLOBAL_TOKEN_FETCH_URL}\n                  title={\"Global Token Rate Limits\"}\n                  description={GLOBAL_DESCRIPTION}\n                />\n              ),\n            },\n            \"1\": {\n              name: \"User\",\n              icon: SvgUser,\n              content: (\n                <GenericTokenRateLimitTable\n                  fetchUrl={USER_TOKEN_FETCH_URL}\n                  title={\"User Token Rate Limits\"}\n                  description={USER_DESCRIPTION}\n                />\n              ),\n            },\n            \"2\": {\n              name: \"User Groups\",\n              icon: SvgUsers,\n              content: (\n                <GenericTokenRateLimitTable\n                  fetchUrl={USER_GROUP_FETCH_URL}\n                  title={\"User Group Token Rate Limits\"}\n                  description={USER_GROUP_DESCRIPTION}\n                  responseMapper={(data: Record<string, TokenRateLimit[]>) =>\n                    Object.entries(data).flatMap(([group_name, elements]) =>\n                      elements.map((element) => ({\n                        ...element,\n                        group_name,\n                      }))\n                    )\n                  }\n                />\n              ),\n            },\n          }}\n          value={tabIndex.toString()}\n          onValueChange={(val) => setTabIndex(parseInt(val))}\n        />\n      ) : (\n        <GenericTokenRateLimitTable\n          fetchUrl={GLOBAL_TOKEN_FETCH_URL}\n          title={\"Global Token Rate Limits\"}\n          description={GLOBAL_DESCRIPTION}\n        />\n      )}\n\n      <CreateRateLimitModal\n        isOpen={modalIsOpen}\n        setIsOpen={() => setModalIsOpen(false)}\n        onSubmit={handleSubmit}\n        forSpecificScope={\n          isPaidEnterpriseFeaturesEnabled ? undefined : Scope.GLOBAL\n        }\n      />\n    </Section>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header title={route.title} icon={route.icon} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/admin/token-rate-limits/types.ts",
    "content": "export enum Scope {\n  USER = \"user\",\n  USER_GROUP = \"user_group\",\n  GLOBAL = \"global\",\n}\n\nexport interface TokenRateLimitArgs {\n  enabled: boolean;\n  token_budget: number;\n  period_hours: number;\n}\n\nexport interface TokenRateLimit {\n  token_id: number;\n  enabled: boolean;\n  token_budget: number;\n  period_hours: number;\n}\n\nexport interface TokenRateLimitDisplay extends TokenRateLimit {\n  group_name?: string;\n}\n"
  },
  {
    "path": "web/src/app/admin/users/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/UsersPage\";\n"
  },
  {
    "path": "web/src/app/anonymous/[id]/AnonymousPage.tsx",
    "content": "\"use client\";\nimport { redirect } from \"next/navigation\";\nimport { useEffect } from \"react\";\n\nexport default function AnonymousPage({\n  anonymousPath,\n}: {\n  anonymousPath: string;\n}) {\n  const loginAsAnonymousUser = async () => {\n    try {\n      const response = await fetch(\n        `/api/tenants/anonymous-user?anonymous_user_path=${encodeURIComponent(\n          anonymousPath\n        )}`,\n        {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          credentials: \"same-origin\",\n        }\n      );\n\n      if (!response.ok) {\n        console.error(\"Failed to login as anonymous user\", response);\n        throw new Error(\"Failed to login as anonymous user\");\n      }\n      // Redirect to the chat page and force a refresh\n      window.location.href = \"/app\";\n    } catch (error) {\n      console.error(\"Error logging in as anonymous user:\", error);\n      redirect(\"/auth/signup?error=Anonymous\");\n    }\n  };\n\n  useEffect(() => {\n    loginAsAnonymousUser();\n  }, []);\n\n  return (\n    <div className=\"flex flex-col items-center justify-center min-h-screen bg-background-100\">\n      <div className=\"bg-white p-8 rounded-lg shadow-md\">\n        <h1 className=\"text-2xl font-bold mb-4 text-center\">\n          Redirecting you to the chat page...\n        </h1>\n        <div className=\"flex justify-center\">\n          <div className=\"animate-spin rounded-full h-16 w-16 border-t-4 border-b-4 border-background-800\"></div>\n        </div>\n        <p className=\"mt-4 text-text-600 text-center\">\n          Please wait while we set up your anonymous session.\n        </p>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/anonymous/[id]/page.tsx",
    "content": "import AnonymousPage from \"./AnonymousPage\";\n\nexport default async function Page(props: { params: Promise<{ id: string }> }) {\n  const params = await props.params;\n\n  return <AnonymousPage anonymousPath={params.id} />;\n}\n"
  },
  {
    "path": "web/src/app/api/[...path]/route.ts",
    "content": "import { INTERNAL_URL } from \"@/lib/constants\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\n/* NextJS is annoying and makes use use a separate function for\neach request type >:( */\n\nexport async function GET(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function POST(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function PUT(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function PATCH(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function DELETE(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function HEAD(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nexport async function OPTIONS(\n  request: NextRequest,\n  props: { params: Promise<{ path: string[] }> }\n) {\n  const params = await props.params;\n  return handleRequest(request, params.path);\n}\n\nasync function handleRequest(request: NextRequest, path: string[]) {\n  if (\n    process.env.NODE_ENV !== \"development\" &&\n    // NOTE: Set this environment variable to 'true' for preview environments\n    // Where you want finer-grained control over API access\n    process.env.OVERRIDE_API_PRODUCTION !== \"true\"\n  ) {\n    return NextResponse.json(\n      {\n        message:\n          \"This API is only available in development mode. In production, something else (e.g. nginx) should handle this.\",\n      },\n      { status: 404 }\n    );\n  }\n\n  try {\n    const backendUrl = new URL(`${INTERNAL_URL}/${path.join(\"/\")}`);\n\n    // Get the URL parameters from the request\n    const urlParams = new URLSearchParams(request.url.split(\"?\")[1]);\n\n    // Append the URL parameters to the backend URL\n    urlParams.forEach((value, key) => {\n      backendUrl.searchParams.append(key, value);\n    });\n\n    // Build headers, optionally injecting debug auth cookie\n    const headers = new Headers(request.headers);\n    if (\n      process.env.DEBUG_AUTH_COOKIE &&\n      process.env.NODE_ENV === \"development\"\n    ) {\n      // Inject the debug auth cookie for local development against remote backend\n      // Get from cloud site: DevTools → Application → Cookies → fastapiusersauth\n      const existingCookies = headers.get(\"cookie\") || \"\";\n      const debugCookie = `fastapiusersauth=${process.env.DEBUG_AUTH_COOKIE}`;\n      headers.set(\n        \"cookie\",\n        existingCookies ? `${existingCookies}; ${debugCookie}` : debugCookie\n      );\n    }\n\n    const response = await fetch(backendUrl, {\n      method: request.method,\n      headers: headers,\n      body: request.body,\n      signal: request.signal,\n      redirect: \"manual\",\n      // @ts-ignore\n      duplex: \"half\",\n    });\n\n    const setCookies =\n      // @ts-ignore - undici provides getSetCookie in Node.\n      response.headers.getSetCookie?.() ??\n      (response.headers.get(\"set-cookie\")\n        ? [response.headers.get(\"set-cookie\")]\n        : []);\n\n    const responseHeaders = new Headers(response.headers);\n    responseHeaders.delete(\"set-cookie\");\n\n    // Check if the response is a stream\n    if (\n      response.headers.get(\"Transfer-Encoding\") === \"chunked\" ||\n      response.headers.get(\"Content-Type\")?.includes(\"stream\")\n    ) {\n      // If it's a stream, create a TransformStream to pass the data through\n      const { readable, writable } = new TransformStream();\n      response.body?.pipeTo(writable);\n\n      const proxyResponse = new NextResponse(readable, {\n        status: response.status,\n        headers: responseHeaders,\n      });\n      for (const cookie of setCookies) {\n        if (cookie) {\n          proxyResponse.headers.append(\"set-cookie\", cookie);\n        }\n      }\n      return proxyResponse;\n    } else {\n      const proxyResponse = new NextResponse(response.body, {\n        status: response.status,\n        headers: responseHeaders,\n      });\n      for (const cookie of setCookies) {\n        if (cookie) {\n          proxyResponse.headers.append(\"set-cookie\", cookie);\n        }\n      }\n      return proxyResponse;\n    }\n  } catch (error: unknown) {\n    console.error(\"Proxy error:\", error);\n    return NextResponse.json(\n      {\n        message: \"Proxy error\",\n        error:\n          error instanceof Error ? error.message : \"An unknown error occurred\",\n      },\n      { status: 500 }\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/app/api/chat/mcp/oauth/callback/route.ts",
    "content": "import { NextRequest, NextResponse } from \"next/server\";\n\n// Proxies browser callback to backend OAuth callback endpoint and then\n// redirects back to the chat UI.\n\nexport async function GET(req: NextRequest) {\n  const url = new URL(req.url);\n  const code = url.searchParams.get(\"code\");\n  const state = url.searchParams.get(\"state\");\n  const serverId =\n    url.searchParams.get(\"server_id\") || url.searchParams.get(\"serverId\");\n  const codeVerifier = url.searchParams.get(\"code_verifier\");\n\n  if (!code || !serverId) {\n    return NextResponse.json(\n      { error: \"Missing code or server_id\" },\n      { status: 400 }\n    );\n  }\n\n  try {\n    const resp = await fetch(\n      `${\n        process.env.NEXT_PUBLIC_ONYX_BACKEND_URL || \"\"\n      }/api/mcp/oauth/callback`,\n      {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          server_id: serverId,\n          code,\n          state,\n          code_verifier: codeVerifier,\n          transport: \"streamable-http\",\n        }),\n        // Ensure cookies/auth forwarded if needed\n        credentials: \"include\",\n      }\n    );\n\n    if (!resp.ok) {\n      const err = await resp.json().catch(() => ({}) as any);\n      return NextResponse.json(\n        { error: err.detail || \"OAuth callback failed\" },\n        { status: 400 }\n      );\n    }\n\n    // Check if this is an admin OAuth flow\n    const isAdminFlow = url.searchParams.get(\"admin\") === \"true\";\n\n    // Redirect back to appropriate page\n    let redirectTo = url.searchParams.get(\"redirect_to\");\n    if (!redirectTo) {\n      if (isAdminFlow) {\n        // For admin flow, redirect back to the MCP edit page\n        redirectTo = `/admin/actions/edit-mcp?server_id=${serverId}`;\n      } else {\n        // For user flow, redirect to chat\n        redirectTo = \"/app\";\n      }\n    }\n\n    return NextResponse.redirect(new URL(redirectTo, req.url));\n  } catch (e) {\n    return NextResponse.json(\n      { error: \"OAuth callback error\" },\n      { status: 500 }\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/app/app/agents/create/page.tsx",
    "content": "import AgentEditorPage from \"@/refresh-pages/AgentEditorPage\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\n\nexport default async function Page() {\n  return (\n    <AppLayouts.Root>\n      <AgentEditorPage />\n    </AppLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/agents/edit/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { use, useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { useAgent } from \"@/hooks/useAgents\";\nimport AgentEditorPage from \"@/refresh-pages/AgentEditorPage\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\n\nexport interface PageProps {\n  params: Promise<{ id: string }>;\n}\n\nexport default function Page(props: PageProps) {\n  const router = useRouter();\n  const { id } = use(props.params);\n  const agentId = parseInt(id);\n\n  // Call hook unconditionally (passes null when ID is invalid)\n  const { agent, isLoading, refresh } = useAgent(\n    isNaN(agentId) ? null : agentId\n  );\n\n  // Handle invalid ID (NaN)\n  useEffect(() => {\n    if (isNaN(agentId)) {\n      router.push(\"/app\");\n    }\n  }, [agentId, router]);\n\n  // Redirect to home if agent not found after loading completes\n  useEffect(() => {\n    if (!isLoading && !agent) {\n      router.push(\"/app\");\n    }\n  }, [isLoading, agent, router]);\n\n  // Show nothing while redirecting or loading\n  if (isLoading || !agent) return null;\n\n  return (\n    <AppLayouts.Root>\n      <AgentEditorPage agent={agent} refreshAgent={refresh} />\n    </AppLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/agents/page.tsx",
    "content": "import AgentsNavigationPage from \"@/refresh-pages/AgentsNavigationPage\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\n\nexport default async function Page() {\n  return (\n    <AppLayouts.Root>\n      <AgentsNavigationPage />\n    </AppLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/AgentDescription.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\n\nexport interface AgentDescriptionProps {\n  agent?: MinimalPersonaSnapshot;\n}\n\nexport default function AgentDescription({ agent }: AgentDescriptionProps) {\n  if (!agent?.description) return null;\n\n  return (\n    <Text\n      as=\"p\"\n      secondaryBody\n      text03\n      className=\"w-full min-w-0 text-center break-words\"\n    >\n      {agent.description}\n    </Text>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/AppPopup.tsx",
    "content": "\"use client\";\n\nimport Modal from \"@/refresh-components/Modal\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { useContext, useEffect, useState } from \"react\";\nimport ReactMarkdown from \"react-markdown\";\nimport remarkGfm from \"remark-gfm\";\nimport { transformLinkUri } from \"@/lib/utils\";\nimport { SvgAlertCircle } from \"@opal/icons\";\nimport { IconProps, OnyxIcon } from \"@/components/icons/icons\";\n\nconst ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED =\n  \"allUsersInitialPopupFlowCompleted\";\n\nconst CustomLogoHeaderIcon = ({ className, size = 24 }: IconProps) => (\n  <img\n    src=\"/api/enterprise-settings/logo\"\n    alt=\"Logo\"\n    style={{ width: size, height: size, objectFit: \"contain\" }}\n    className={className}\n  />\n);\n\nexport function AppPopup() {\n  const [completedFlow, setCompletedFlow] = useState(true);\n  const [showConsentError, setShowConsentError] = useState(false);\n  const [consentChecked, setConsentChecked] = useState(false);\n\n  useEffect(() => {\n    setCompletedFlow(\n      localStorage.getItem(ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED) === \"true\"\n    );\n  }, []);\n\n  const settings = useContext(SettingsContext);\n  const enterpriseSettings = settings?.enterpriseSettings;\n  const isConsentScreen = enterpriseSettings?.enable_consent_screen;\n\n  if (\n    !enterpriseSettings?.custom_popup_content ||\n    completedFlow ||\n    !enterpriseSettings?.show_first_visit_notice\n  ) {\n    return null;\n  }\n\n  const popupTitle = enterpriseSettings?.custom_popup_header;\n\n  const popupContent = enterpriseSettings?.custom_popup_content;\n\n  const hasApplicationName = Boolean(\n    enterpriseSettings?.application_name?.trim()\n  );\n  const hasCustomLogo = Boolean(enterpriseSettings?.use_custom_logo);\n  const logoDisplayStyle = enterpriseSettings?.logo_display_style;\n\n  // Header icon rules:\n  // - If neither app name nor custom logo exists -> show Onyx icon\n  // - If logo display is \"name_only\" -> show alert icon\n  // - Otherwise -> show uploaded custom logo (fallback to Onyx icon)\n  const headerIcon =\n    !hasApplicationName && !hasCustomLogo\n      ? (props: IconProps) => <OnyxIcon size={24} {...props} />\n      : logoDisplayStyle === \"name_only\"\n        ? SvgAlertCircle\n        : hasCustomLogo\n          ? CustomLogoHeaderIcon\n          : (props: IconProps) => <OnyxIcon size={24} {...props} />;\n\n  return (\n    <Modal open onOpenChange={() => {}}>\n      <Modal.Content width=\"sm\" height=\"lg\">\n        <Modal.Header\n          icon={headerIcon}\n          title={popupTitle || \"Welcome to Onyx!\"}\n        />\n        <Modal.Body>\n          <div className=\"overflow-y-auto text-left\">\n            <ReactMarkdown\n              className=\"prose prose-neutral dark:prose-invert max-w-full\"\n              components={{\n                a: ({ node, ...props }) => (\n                  <a\n                    {...props}\n                    className=\"text-link hover:text-link-hover\"\n                    target=\"_blank\"\n                    rel=\"noopener noreferrer\"\n                  />\n                ),\n                p: ({ node, ...props }) => (\n                  <Text as=\"p\" mainUiBody text03 {...props} />\n                ),\n                strong: ({ node, ...props }) => (\n                  <Text mainUiBody text03 {...props} />\n                ),\n                h1: ({ node, ...props }) => (\n                  <Text as=\"p\" headingH1 text03 {...props} />\n                ),\n                h2: ({ node, ...props }) => (\n                  <Text as=\"p\" headingH2 text03 {...props} />\n                ),\n                h3: ({ node, ...props }) => (\n                  <Text as=\"p\" headingH3 text03 {...props} />\n                ),\n                li: ({ node, ...props }) => (\n                  <Text as=\"li\" mainUiBody text03 {...props} />\n                ),\n              }}\n              remarkPlugins={[remarkGfm]}\n              urlTransform={transformLinkUri}\n            >\n              {popupContent}\n            </ReactMarkdown>\n            {isConsentScreen && enterpriseSettings?.consent_screen_prompt && (\n              <FormField\n                state={showConsentError ? \"error\" : \"idle\"}\n                className=\"mt-6\"\n              >\n                <div className=\"flex items-center gap-1\">\n                  <FormField.Control>\n                    <Checkbox\n                      aria-label=\"Consent checkbox\"\n                      checked={consentChecked}\n                      onCheckedChange={(checked) => {\n                        setConsentChecked(checked);\n                        if (checked) {\n                          setShowConsentError(false);\n                        }\n                      }}\n                    />\n                  </FormField.Control>\n                  <FormField.Label>\n                    <ReactMarkdown\n                      className=\"prose prose-neutral dark:prose-invert max-w-full\"\n                      components={{\n                        a: ({ node, ...props }) => (\n                          <a\n                            {...props}\n                            className=\"text-link hover:text-link-hover\"\n                            target=\"_blank\"\n                            rel=\"noopener noreferrer\"\n                          />\n                        ),\n                        p: ({ node, ...props }) => (\n                          <Text\n                            as=\"p\"\n                            mainUiBody\n                            text04\n                            className=\"!my-0\" //dont remove the !my-0 class, it's important for the markdown to render without any alignment issues\n                            {...props}\n                          />\n                        ),\n                        strong: ({ node, ...props }) => (\n                          <Text mainUiBody text04 {...props} />\n                        ),\n                        li: ({ node, ...props }) => (\n                          <Text as=\"li\" mainUiBody text04 {...props} />\n                        ),\n                      }}\n                      remarkPlugins={[remarkGfm]}\n                      urlTransform={transformLinkUri}\n                    >\n                      {enterpriseSettings.consent_screen_prompt}\n                    </ReactMarkdown>\n                  </FormField.Label>\n                </div>\n                <FormField.Message\n                  messages={{\n                    error:\n                      \"You need to agree to the terms to access the application.\",\n                  }}\n                />\n              </FormField>\n            )}\n          </div>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button\n            onClick={() => {\n              if (isConsentScreen && !consentChecked) {\n                setShowConsentError(true);\n                return;\n              }\n              localStorage.setItem(\n                ALL_USERS_INITIAL_POPUP_FLOW_COMPLETED,\n                \"true\"\n              );\n              setCompletedFlow(true);\n            }}\n          >\n            Start\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/WelcomeMessage.tsx",
    "content": "\"use client\";\n\nimport Logo from \"@/refresh-components/Logo\";\nimport {\n  getRandomGreeting,\n  GREETING_MESSAGES,\n} from \"@/lib/chat/greetingMessages\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { useState, useEffect } from \"react\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport FrostedDiv from \"@/refresh-components/FrostedDiv\";\n\nexport interface WelcomeMessageProps {\n  agent?: MinimalPersonaSnapshot;\n  isDefaultAgent: boolean;\n}\n\nexport default function WelcomeMessage({\n  agent,\n  isDefaultAgent,\n}: WelcomeMessageProps) {\n  const settings = useSettingsContext();\n  const enterpriseSettings = settings?.enterpriseSettings;\n\n  // Use a stable default for SSR, then randomize on client after hydration\n  const [greeting, setGreeting] = useState(GREETING_MESSAGES[0]);\n\n  useEffect(() => {\n    if (enterpriseSettings?.custom_greeting_message) {\n      setGreeting(enterpriseSettings.custom_greeting_message);\n    } else {\n      setGreeting(getRandomGreeting());\n    }\n  }, [enterpriseSettings?.custom_greeting_message]);\n\n  let content: React.ReactNode = null;\n\n  if (isDefaultAgent) {\n    content = (\n      <div data-testid=\"onyx-logo\" className=\"flex flex-row items-center gap-4\">\n        <Logo folded size={32} />\n        <Text as=\"p\" headingH2>\n          {greeting}\n        </Text>\n      </div>\n    );\n  } else if (agent) {\n    content = (\n      <>\n        <div\n          data-testid=\"agent-name-display\"\n          className=\"flex flex-row items-center gap-3\"\n        >\n          <AgentAvatar agent={agent} size={36} />\n          <Text as=\"p\" headingH2>\n            {agent.name}\n          </Text>\n        </div>\n      </>\n    );\n  }\n\n  // if we aren't using the default agent, we need to wait for the agent info to load\n  // before rendering\n  if (!content) return null;\n\n  return (\n    <FrostedDiv\n      data-testid=\"chat-intro\"\n      className=\"flex flex-col items-center justify-center gap-3 w-full max-w-[var(--app-page-main-content-width)]\"\n    >\n      {content}\n    </FrostedDiv>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/files/InputBarPreview.tsx",
    "content": "import { useRef, useState } from \"react\";\nimport { FileDescriptor } from \"@/app/app/interfaces\";\nimport { FiLoader, FiFileText } from \"react-icons/fi\";\nimport { InputBarPreviewImage } from \"./images/InputBarPreviewImage\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { Button } from \"@opal/components\";\nimport { SvgX } from \"@opal/icons\";\nexport interface InputBarPreviewImageProviderProps {\n  file: FileDescriptor;\n  onDelete: () => void;\n  isUploading: boolean;\n}\n\nexport function InputBarPreviewImageProvider({\n  file,\n  onDelete,\n  isUploading,\n}: InputBarPreviewImageProviderProps) {\n  const [isHovered, setIsHovered] = useState(false);\n\n  return (\n    <div\n      className=\"h-6 relative\"\n      onMouseEnter={() => setIsHovered(true)}\n      onMouseLeave={() => setIsHovered(false)}\n    >\n      {isHovered && (\n        <Button\n          icon={SvgX}\n          onClick={onDelete}\n          prominence=\"tertiary\"\n          size=\"sm\"\n        />\n      )}\n      {isUploading && (\n        <div\n          className=\"\n            absolute\n            inset-0\n            flex\n            items-center\n            justify-center\n            bg-opacity-50\n            rounded-lg\n            z-0\n          \"\n        >\n          <FiLoader className=\"animate-spin text-white\" />\n        </div>\n      )}\n      <InputBarPreviewImage fileId={file.id} />\n    </div>\n  );\n}\n\nexport interface InputBarPreviewProps {\n  file: FileDescriptor;\n  onDelete: () => void;\n  isUploading: boolean;\n}\n\nexport function InputBarPreview({\n  file,\n  onDelete,\n  isUploading,\n}: InputBarPreviewProps) {\n  const fileNameRef = useRef<HTMLDivElement>(null);\n\n  return (\n    <div className=\"relative\">\n      {isUploading && (\n        <div\n          className=\"\n            absolute\n            inset-0\n            flex\n            items-center\n            justify-center\n            bg-opacity-50\n            rounded-lg\n            z-0\n          \"\n        >\n          <FiLoader size={12} className=\"animate-spin text-white\" />\n        </div>\n      )}\n      <div\n        className={`\n        flex\n        items-center\n        px-2\n        bg-accent-background-hovered\n        border\n        gap-x-1.5\n        border-border\n        rounded-md\n        box-border\n        h-8\n      `}\n      >\n        <div className=\"flex-shrink-0\">\n          <div\n            className=\"\n            w-5\n            h-5\n            bg-document\n            flex\n            items-center\n            justify-center\n            rounded-md\n          \"\n          >\n            <FiFileText size={12} className=\"text-white\" />\n          </div>\n        </div>\n\n        <SimpleTooltip tooltip={file.name ?? undefined}>\n          <div\n            ref={fileNameRef}\n            className={`font-medium text-sm line-clamp-1 break-all ellipses max-w-48`}\n          >\n            {file.name}\n          </div>\n        </SimpleTooltip>\n\n        <Button\n          onClick={onDelete}\n          icon={SvgX}\n          prominence=\"tertiary\"\n          size=\"sm\"\n        />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/files/images/FullImageModal.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { buildImgUrl } from \"@/app/app/components/files/images/utils\";\nimport { cn } from \"@/lib/utils\";\nimport * as Dialog from \"@radix-ui/react-dialog\";\n\ninterface FullImageModalProps {\n  fileId: string;\n  open: boolean;\n  onOpenChange: (open: boolean) => void;\n}\n\nexport function FullImageModal({\n  fileId,\n  open,\n  onOpenChange,\n}: FullImageModalProps) {\n  // pre-fetch image\n  useEffect(() => {\n    const img = new Image();\n    img.src = buildImgUrl(fileId);\n  }, [fileId]);\n\n  return (\n    <Dialog.Root open={open} onOpenChange={onOpenChange}>\n      <Dialog.Portal>\n        <Dialog.Overlay className=\"fixed inset-0 bg-black bg-opacity-80 z-50 backdrop-blur-xl\" />\n        <Dialog.Content\n          className={cn(\n            \"fixed inset-0 flex items-center justify-center p-4 z-[100]\",\n            \"max-w-screen-lg h-fit top-1/2 left-1/2 -translate-y-2/4 -translate-x-2/4\",\n            \"focus:outline-none\"\n          )}\n        >\n          <img\n            src={buildImgUrl(fileId)}\n            alt=\"Uploaded image\"\n            className=\"max-w-full max-h-full\"\n          />\n        </Dialog.Content>\n      </Dialog.Portal>\n    </Dialog.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/files/images/InMessageImage.tsx",
    "content": "import { memo, useState } from \"react\";\nimport { SvgDownload } from \"@opal/icons\";\nimport { ImageShape } from \"@/app/app/services/streamingModels\";\nimport { FullImageModal } from \"@/app/app/components/files/images/FullImageModal\";\nimport { buildImgUrl } from \"@/app/app/components/files/images/utils\";\nimport { Button } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { cn } from \"@/lib/utils\";\n\nconst DEFAULT_SHAPE: ImageShape = \"square\";\n\nconst SHAPE_CLASSES: Record<ImageShape, { container: string; image: string }> =\n  {\n    square: {\n      container: \"max-w-96 max-h-96\",\n      image: \"max-w-96 max-h-96\",\n    },\n    landscape: {\n      container: \"max-w-[28rem] max-h-72\",\n      image: \"max-w-[28rem] max-h-72\",\n    },\n    portrait: {\n      container: \"max-w-72 max-h-[28rem]\",\n      image: \"max-w-72 max-h-[28rem]\",\n    },\n  };\n\n// Used to stop image flashing as images are loaded and response continues\nconst loadedImages = new Set<string>();\n\ninterface InMessageImageProps {\n  fileId: string;\n  fileName?: string;\n  shape?: ImageShape;\n}\n\nexport const InMessageImage = memo(function InMessageImage({\n  fileId,\n  fileName,\n  shape = DEFAULT_SHAPE,\n}: InMessageImageProps) {\n  const [fullImageShowing, setFullImageShowing] = useState(false);\n  const [imageLoaded, setImageLoaded] = useState(loadedImages.has(fileId));\n\n  const normalizedShape = SHAPE_CLASSES[shape] ? shape : DEFAULT_SHAPE;\n  const { container: shapeContainerClasses, image: shapeImageClasses } =\n    SHAPE_CLASSES[normalizedShape];\n\n  const handleDownload = async (e: React.MouseEvent) => {\n    e.stopPropagation(); // Prevent opening the full image modal\n\n    try {\n      const response = await fetch(buildImgUrl(fileId));\n      if (!response.ok) {\n        console.error(\"Failed to download image:\", response.status);\n        return;\n      }\n      const blob = await response.blob();\n      const url = window.URL.createObjectURL(blob);\n      const a = document.createElement(\"a\");\n      a.href = url;\n      a.download = fileName || `image-${fileId}.png`;\n      document.body.appendChild(a);\n      a.click();\n      window.URL.revokeObjectURL(url);\n      document.body.removeChild(a);\n    } catch (error) {\n      console.error(\"Failed to download image:\", error);\n    }\n  };\n\n  return (\n    <>\n      <FullImageModal\n        fileId={fileId}\n        open={fullImageShowing}\n        onOpenChange={(open) => setFullImageShowing(open)}\n      />\n\n      <Hoverable.Root group=\"messageImage\" widthVariant=\"fit\">\n        <div className={cn(\"relative\", shapeContainerClasses)}>\n          {!imageLoaded && (\n            <div className=\"absolute inset-0 bg-background-tint-02 animate-pulse rounded-lg\" />\n          )}\n\n          <img\n            width={1200}\n            height={1200}\n            alt=\"Chat Message Image\"\n            onLoad={() => {\n              loadedImages.add(fileId);\n              setImageLoaded(true);\n            }}\n            className={cn(\n              \"object-contain object-left overflow-hidden rounded-lg w-full h-full transition-opacity duration-300 cursor-pointer\",\n              shapeImageClasses,\n              imageLoaded ? \"opacity-100\" : \"opacity-0\"\n            )}\n            onClick={() => setFullImageShowing(true)}\n            src={buildImgUrl(fileId)}\n            loading=\"lazy\"\n          />\n\n          {/* Download button - appears on hover */}\n          <div className=\"absolute bottom-2 right-2 z-10\">\n            <Hoverable.Item group=\"messageImage\" variant=\"opacity-on-hover\">\n              <Button\n                icon={SvgDownload}\n                tooltip=\"Download\"\n                onClick={handleDownload}\n              />\n            </Hoverable.Item>\n          </div>\n        </div>\n      </Hoverable.Root>\n    </>\n  );\n});\n"
  },
  {
    "path": "web/src/app/app/components/files/images/InputBarPreviewImage.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { buildImgUrl } from \"./utils\";\nimport { FullImageModal } from \"./FullImageModal\";\n\nexport function InputBarPreviewImage({ fileId }: { fileId: string }) {\n  const [fullImageShowing, setFullImageShowing] = useState(false);\n\n  return (\n    <>\n      <FullImageModal\n        fileId={fileId}\n        open={fullImageShowing}\n        onOpenChange={(open) => setFullImageShowing(open)}\n      />\n      <div\n        className={`\n          bg-transparent\n          border-none\n          flex\n          items-center\n          bg-accent-background-hovered\n          border\n          border-border\n          rounded-md\n          box-border\n          h-6\n      `}\n      >\n        <img\n          alt=\"preview\"\n          onClick={() => setFullImageShowing(true)}\n          className=\"h-6 w-6 object-cover rounded-lg bg-background cursor-pointer\"\n          src={buildImgUrl(fileId)}\n        />\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/files/images/utils.ts",
    "content": "const CHAT_FILE_URL_REGEX = /\\/api\\/chat\\/file\\/([^/?#]+)/;\nconst IMAGE_EXTENSIONS = /\\.(png|jpe?g|gif|webp|svg|bmp|ico|tiff?)$/i;\n\nexport function buildImgUrl(fileId: string) {\n  return `/api/chat/file/${fileId}`;\n}\n\n/**\n * If `href` points to a chat file and `linkText` ends with an image extension,\n * returns the file ID. Otherwise returns null.\n */\nexport function extractChatImageFileId(\n  href: string | undefined,\n  linkText: string\n): string | null {\n  if (!href) return null;\n  const match = CHAT_FILE_URL_REGEX.exec(href);\n  if (!match?.[1]) return null;\n  if (!IMAGE_EXTENSIONS.test(linkText)) return null;\n  return match[1];\n}\n"
  },
  {
    "path": "web/src/app/app/components/folders/FolderDropdown.tsx",
    "content": "import React, { useState, ReactNode, forwardRef } from \"react\";\nimport { Folder } from \"./interfaces\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport { Caret } from \"@/components/icons/icons\";\nimport { cn } from \"@/lib/utils\";\n\ninterface FolderDropdownProps {\n  folder: Folder;\n  currentChatId?: string;\n  showShareModal?: (chatSession: ChatSession) => void;\n  closeSidebar?: () => void;\n  children?: ReactNode;\n  index: number;\n}\n\nexport const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(\n  ({ folder, children, index }: FolderDropdownProps, ref) => {\n    const [isOpen, setIsOpen] = useState(true);\n\n    return (\n      <div className=\"overflow-visible pt-2 w-full\">\n        <div\n          className=\"sticky top-0 bg-background-sidebar dark:bg-transparent z-10\"\n          style={{ zIndex: 1000 - index }}\n        >\n          <div\n            ref={ref}\n            className={cn(\n              \"flex\",\n              \"overflow-visible\",\n              \"items-center\",\n              \"w-full\",\n              \"text-text-darker\",\n              \"rounded-md\",\n              \"p-1\",\n              \"bg-background-sidebar\",\n              \"dark:bg-[#000]\",\n              \"sticky\",\n              \"top-0\"\n            )}\n            style={{ zIndex: 10 - index }}\n          >\n            <button\n              className=\"flex overflow-hidden bg-background-sidebar dark:bg-[#000] items-center flex-grow\"\n              onClick={() => setIsOpen(!isOpen)}\n            >\n              {isOpen ? (\n                <Caret size={16} className=\"mr-1\" />\n              ) : (\n                <Caret size={16} className=\"-rotate-90 mr-1\" />\n              )}\n              <div className=\"flex items-center\">\n                <span className=\"text-sm font-[500]\">{folder.folder_name}</span>\n              </div>\n            </button>\n          </div>\n          {isOpen && (\n            <div className=\"overflow-visible mr-3 ml-1 mt-1\">{children}</div>\n          )}\n        </div>\n      </div>\n    );\n  }\n);\n\nFolderDropdown.displayName = \"FolderDropdown\";\n"
  },
  {
    "path": "web/src/app/app/components/folders/interfaces.ts",
    "content": "import { ChatSession } from \"@/app/app/interfaces\";\n\nexport interface Folder {\n  folder_id?: number;\n  folder_name: string;\n  display_priority: number;\n  chat_sessions: ChatSession[];\n}\n"
  },
  {
    "path": "web/src/app/app/components/modifiers/SelectedDocuments.tsx",
    "content": "import { BasicClickable } from \"@/components/BasicClickable\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { FiBook } from \"react-icons/fi\";\n\nexport function SelectedDocuments({\n  selectedDocuments,\n}: {\n  selectedDocuments: OnyxDocument[];\n}) {\n  if (selectedDocuments.length === 0) {\n    return null;\n  }\n\n  return (\n    <BasicClickable>\n      <div className=\"flex text-xs max-w-md overflow-hidden\">\n        <FiBook className=\"my-auto mr-1\" />{\" \"}\n        <div className=\"w-fit whitespace-nowrap\">\n          Chatting with {selectedDocuments.length} Selected Documents\n        </div>\n      </div>\n    </BasicClickable>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/projects/ProjectChatSessionList.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo } from \"react\";\nimport Link from \"next/link\";\nimport { ChatSessionMorePopup } from \"@/components/sidebar/ChatSessionMorePopup\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { formatRelativeTime } from \"./project_utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { UNNAMED_CHAT } from \"@/lib/constants\";\nimport ChatSessionSkeleton from \"@/refresh-components/skeletons/ChatSessionSkeleton\";\nimport { SvgBubbleText } from \"@opal/icons\";\n\nexport default function ProjectChatSessionList() {\n  const {\n    currentProjectDetails,\n    currentProjectId,\n    refreshCurrentProjectDetails,\n    isLoadingProjectDetails,\n  } = useProjectsContext();\n  const { agents } = useAgents();\n  const [isRenamingChat, setIsRenamingChat] = React.useState<string | null>(\n    null\n  );\n  const [hoveredChatId, setHoveredChatId] = React.useState<string | null>(null);\n\n  const projectChats: ChatSession[] = useMemo(() => {\n    const sessions = currentProjectDetails?.project?.chat_sessions || [];\n    return [...sessions].sort(\n      (a, b) =>\n        new Date(b.time_updated).getTime() - new Date(a.time_updated).getTime()\n    );\n  }, [currentProjectDetails?.project?.chat_sessions]);\n\n  if (!currentProjectId) return null;\n\n  return (\n    <div className=\"flex flex-col gap-2 px-2 w-full mx-auto mt-4\">\n      <div className=\"flex items-center pl-2\">\n        <Text as=\"p\" text02 secondaryBody>\n          Recent Chats\n        </Text>\n      </div>\n\n      {isLoadingProjectDetails && !currentProjectDetails ? (\n        <div className=\"flex flex-col gap-2\">\n          <ChatSessionSkeleton />\n          <ChatSessionSkeleton />\n          <ChatSessionSkeleton />\n        </div>\n      ) : projectChats.length === 0 ? (\n        <Text as=\"p\" text02 secondaryBody className=\"p-2\">\n          No chats yet.\n        </Text>\n      ) : (\n        <div className=\"flex flex-col gap-2\">\n          {projectChats.map((chat) => (\n            <Link\n              key={chat.id}\n              href={{ pathname: \"/app\", query: { chatId: chat.id } }}\n              className=\"relative flex w-full\"\n              onMouseEnter={() => setHoveredChatId(chat.id)}\n              onMouseLeave={() => setHoveredChatId(null)}\n            >\n              <div\n                className={cn(\n                  \"w-full rounded-08 py-2 transition-colors p-1.5\",\n                  hoveredChatId === chat.id && \"bg-background-tint-02\"\n                )}\n              >\n                <div className=\"flex gap-3 min-w-0 w-full\">\n                  <div className=\"flex h-full w-fit pt-1 pl-1\">\n                    {(() => {\n                      const personaIdToFeatured =\n                        currentProjectDetails?.persona_id_to_is_featured || {};\n                      const isFeatured = personaIdToFeatured[chat.persona_id];\n                      if (isFeatured === false) {\n                        const agent = agents.find(\n                          (a) => a.id === chat.persona_id\n                        );\n                        if (agent) {\n                          return (\n                            <div className=\"h-full pt-1\">\n                              <AgentAvatar agent={agent} size={18} />\n                            </div>\n                          );\n                        }\n                      }\n                      return (\n                        <SvgBubbleText className=\"h-4 w-4 stroke-text-02\" />\n                      );\n                    })()}\n                  </div>\n                  <div className=\"flex flex-col w-full\">\n                    <div className=\"flex items-center gap-1 w-full justify-between\">\n                      <div className=\"flex items-center gap-1\">\n                        <Text\n                          as=\"p\"\n                          text03\n                          mainUiBody\n                          nowrap\n                          className=\"truncate\"\n                          title={chat.name}\n                        >\n                          {chat.name || UNNAMED_CHAT}\n                        </Text>\n                      </div>\n                      <div className=\"flex items-center\">\n                        <ChatSessionMorePopup\n                          chatSession={chat}\n                          projectId={currentProjectId}\n                          isRenamingChat={isRenamingChat === chat.id}\n                          setIsRenamingChat={(value) =>\n                            setIsRenamingChat(value ? chat.id : null)\n                          }\n                          search={false}\n                          afterDelete={() => {\n                            refreshCurrentProjectDetails();\n                          }}\n                          afterMove={() => {\n                            refreshCurrentProjectDetails();\n                          }}\n                          afterRemoveFromProject={() => {\n                            refreshCurrentProjectDetails();\n                          }}\n                          iconSize={20}\n                          isVisible={hoveredChatId === chat.id}\n                        />\n                      </div>\n                    </div>\n                    <Text\n                      as=\"p\"\n                      text03\n                      secondaryBody\n                      nowrap\n                      className=\"truncate\"\n                    >\n                      Last message {formatRelativeTime(chat.time_updated)}\n                    </Text>\n                  </div>\n                </div>\n              </div>\n            </Link>\n          ))}\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/projects/ProjectContextPanel.tsx",
    "content": "\"use client\";\n\nimport React, { useCallback, useState } from \"react\";\nimport { useDropzone } from \"react-dropzone\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport FilePickerPopover from \"@/refresh-components/popovers/FilePickerPopover\";\nimport type { ProjectFile } from \"../../projects/projectsService\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport { Button } from \"@opal/components\";\n\nimport AddInstructionModal from \"@/components/modals/AddInstructionModal\";\nimport UserFilesModal from \"@/components/modals/UserFilesModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { FileCard, FileCardSkeleton } from \"@/sections/cards/FileCard\";\nimport { hasNonImageFiles } from \"@/lib/utils\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport ButtonRenaming from \"@/refresh-components/buttons/ButtonRenaming\";\nimport { UserFileStatus } from \"../../projects/projectsService\";\nimport { SvgAddLines, SvgEdit, SvgFiles, SvgFolderOpen } from \"@opal/icons\";\nimport { Hoverable } from \"@opal/core\";\n\nexport interface ProjectContextPanelProps {\n  projectTokenCount?: number;\n  availableContextTokens?: number;\n  setPresentingDocument?: (document: MinimalOnyxDocument) => void;\n}\nexport default function ProjectContextPanel({\n  projectTokenCount = 0,\n  availableContextTokens = 128_000,\n  setPresentingDocument,\n}: ProjectContextPanelProps) {\n  const addInstructionModal = useCreateModal();\n  const projectFilesModal = useCreateModal();\n  // Edit project name state\n  const [isEditingName, setIsEditingName] = useState(false);\n  // Convert ProjectFile to MinimalOnyxDocument format for viewing\n  const handleOnView = useCallback(\n    (file: ProjectFile) => {\n      if (!setPresentingDocument) return;\n\n      const documentForViewer: MinimalOnyxDocument = {\n        document_id: `project_file__${file.file_id}`,\n        semantic_identifier: file.name,\n      };\n\n      setPresentingDocument(documentForViewer);\n    },\n    [setPresentingDocument]\n  );\n  const {\n    currentProjectDetails,\n    currentProjectId,\n    unlinkFileFromProject,\n    linkFileToProject,\n    allCurrentProjectFiles,\n    isLoadingProjectDetails,\n    beginUpload,\n    projects,\n    renameProject,\n  } = useProjectsContext();\n  const handleUploadFiles = useCallback(\n    async (files: File[]) => {\n      if (!files || files.length === 0) return;\n      beginUpload(Array.from(files), currentProjectId);\n    },\n    [currentProjectId, beginUpload]\n  );\n\n  const totalFiles = allCurrentProjectFiles.length;\n  const displayFileCount = totalFiles > 100 ? \"100+\" : String(totalFiles);\n\n  const handleUploadChange = useCallback(\n    async (e: React.ChangeEvent<HTMLInputElement>) => {\n      const files = e.target.files;\n      if (!files || files.length === 0) return;\n      await handleUploadFiles(Array.from(files));\n      e.target.value = \"\";\n    },\n    [handleUploadFiles]\n  );\n\n  // Nested dropzone for drag-and-drop within ProjectContextPanel\n  const { getRootProps, getInputProps, isDragActive } = useDropzone({\n    noClick: true,\n    noKeyboard: true,\n    multiple: true,\n    noDragEventsBubbling: true,\n    onDrop: (acceptedFiles) => {\n      void handleUploadFiles(acceptedFiles);\n    },\n  });\n\n  // Handle project name editing\n  const currentProject = projects.find((p) => p.id === currentProjectId);\n  const projectName = currentProject?.name || \"Loading project...\";\n\n  const startEditing = useCallback(() => {\n    setIsEditingName(true);\n  }, []);\n\n  const cancelEditing = useCallback(() => {\n    setIsEditingName(false);\n  }, []);\n\n  if (!currentProjectId) return null; // no selection yet\n\n  // Detect if there are any non-image files in the displayed files\n  // to determine if images should be compact\n  const displayedFiles = allCurrentProjectFiles.slice(0, 4);\n  const shouldCompactImages = hasNonImageFiles(displayedFiles);\n\n  return (\n    <>\n      <addInstructionModal.Provider>\n        <AddInstructionModal />\n      </addInstructionModal.Provider>\n\n      <projectFilesModal.Provider>\n        <UserFilesModal\n          title=\"Project Files\"\n          description=\"Sessions in this project can access the files here.\"\n          recentFiles={[...allCurrentProjectFiles]}\n          onView={handleOnView}\n          handleUploadChange={handleUploadChange}\n          onDelete={async (file: ProjectFile) => {\n            if (!currentProjectId) return;\n            await unlinkFileFromProject(currentProjectId, file.id);\n          }}\n        />\n      </projectFilesModal.Provider>\n      <div className=\"flex flex-col gap-6 w-full max-w-[var(--app-page-main-content-width)] mx-auto p-4 pt-14 pb-6\">\n        <div className=\"flex flex-col gap-1 text-text-04\">\n          <SvgFolderOpen className=\"h-8 w-8 text-text-04\" />\n          <Hoverable.Root group=\"projectName\" widthVariant=\"fit\">\n            <div className=\"flex items-center gap-2\">\n              {isEditingName ? (\n                <ButtonRenaming\n                  initialName={projectName}\n                  onRename={async (newName) => {\n                    if (currentProjectId) {\n                      await renameProject(currentProjectId, newName);\n                    }\n                  }}\n                  onClose={cancelEditing}\n                  className=\"font-heading-h2 text-text-04\"\n                />\n              ) : (\n                <>\n                  <Text as=\"p\" headingH2 className=\"font-heading-h2\">\n                    {projectName}\n                  </Text>\n                  {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                  <Hoverable.Item\n                    group=\"projectName\"\n                    variant=\"opacity-on-hover\"\n                  >\n                    <IconButton\n                      icon={SvgEdit}\n                      internal\n                      onClick={startEditing}\n                      tooltip=\"Edit project name\"\n                    />\n                  </Hoverable.Item>\n                </>\n              )}\n            </div>\n          </Hoverable.Root>\n        </div>\n\n        <Separator className=\"py-0\" />\n        <div className=\"flex flex-row gap-2 justify-between\">\n          <div className=\"min-w-0 flex-1\">\n            <Text as=\"p\" headingH3 text04>\n              Instructions\n            </Text>\n            {isLoadingProjectDetails && !currentProjectDetails ? (\n              <div className=\"h-5 w-3/4 rounded bg-background-tint-02 animate-pulse\" />\n            ) : currentProjectDetails?.project?.instructions ? (\n              <Text as=\"p\" text02 secondaryBody className=\"truncate\">\n                {currentProjectDetails.project.instructions}\n              </Text>\n            ) : (\n              <Text as=\"p\" text02 secondaryBody className=\"truncate\">\n                Add instructions to tailor the response in this project.\n              </Text>\n            )}\n          </div>\n          <Button\n            prominence=\"tertiary\"\n            icon={SvgAddLines}\n            onClick={() => addInstructionModal.toggle(true)}\n          >\n            Set Instructions\n          </Button>\n        </div>\n        <div\n          className=\"flex flex-col gap-2 \"\n          {...getRootProps({ onClick: (e) => e.stopPropagation() })}\n        >\n          <div className=\"flex flex-row gap-2 justify-between\">\n            <div>\n              <Text as=\"p\" headingH3 text04>\n                Files\n              </Text>\n              <Text as=\"p\" text02 secondaryBody>\n                Chats in this project can access these files.\n              </Text>\n            </div>\n            <FilePickerPopover\n              trigger={(open) => (\n                // The `secondary={undefined}` is required here because `CreateButton` sets it to true.\n                // Therefore, we need to first remove the truthiness before passing in the other `tertiary` flag.\n                <CreateButton secondary={undefined} tertiary transient={open}>\n                  Add Files\n                </CreateButton>\n              )}\n              onFileClick={handleOnView}\n              onPickRecent={async (file) => {\n                if (file.status === UserFileStatus.UPLOADING) return;\n                if (file.status === UserFileStatus.DELETING) return;\n                if (!currentProjectId) return;\n                if (!linkFileToProject) return;\n                linkFileToProject(currentProjectId, file);\n              }}\n              onUnpickRecent={async (file) => {\n                if (!currentProjectId) return;\n                await unlinkFileFromProject(currentProjectId, file.id);\n              }}\n              handleUploadChange={handleUploadChange}\n              selectedFileIds={(allCurrentProjectFiles || []).map((f) => f.id)}\n            />\n          </div>\n          {/* Hidden input just to satisfy dropzone contract; we rely on FilePicker for clicks */}\n          <input {...getInputProps()} />\n\n          {isLoadingProjectDetails && !currentProjectDetails ? (\n            <>\n              {/* Mobile / small screens: show skeleton */}\n              <div className=\"sm:hidden\">\n                <div className=\"w-full h-[68px] rounded-xl bg-background-tint-02 animate-pulse\" />\n              </div>\n\n              {/* Desktop / larger screens: show skeleton file cards */}\n              <div className=\"hidden sm:flex gap-1\">\n                <FileCardSkeleton />\n                <FileCardSkeleton />\n                <FileCardSkeleton />\n                <FileCardSkeleton />\n              </div>\n            </>\n          ) : allCurrentProjectFiles.length > 0 ? (\n            <>\n              {/* Mobile / small screens: just show a button to view files */}\n              <div className=\"sm:hidden\">\n                <button\n                  className=\"w-full rounded-xl px-3 py-3 text-left bg-transparent hover:bg-accent-background-hovered hover:dark:bg-neutral-800/75 transition-colors\"\n                  onClick={() => projectFilesModal.toggle(true)}\n                >\n                  <div className=\"flex flex-col overflow-hidden\">\n                    <div className=\"flex items-center justify-between gap-2 w-full\">\n                      <Text as=\"p\" text04 secondaryAction>\n                        View files\n                      </Text>\n                      <SvgFiles className=\"h-5 w-5 stroke-text-02\" />\n                    </div>\n                    <Text as=\"p\" text03 secondaryBody>\n                      {displayFileCount} files\n                    </Text>\n                  </div>\n                </button>\n              </div>\n\n              {/* Desktop / larger screens: show previews with optional View All */}\n              <div className=\"hidden sm:flex gap-1 relative items-center\">\n                {(() => {\n                  return allCurrentProjectFiles.slice(0, 4).map((f) => (\n                    <div key={f.id}>\n                      <FileCard\n                        file={f}\n                        removeFile={async (fileId: string) => {\n                          if (!currentProjectId) return;\n                          await unlinkFileFromProject(currentProjectId, fileId);\n                        }}\n                        onFileClick={handleOnView}\n                        compactImages={shouldCompactImages}\n                      />\n                    </div>\n                  ));\n                })()}\n                {totalFiles > 4 && (\n                  <button\n                    className=\"rounded-xl px-3 py-1 text-left transition-colors hover:bg-background-tint-02\"\n                    onClick={() => projectFilesModal.toggle(true)}\n                  >\n                    <div className=\"flex flex-col overflow-hidden h-12 p-1\">\n                      <div className=\"flex items-center justify-between gap-2 w-full\">\n                        <Text as=\"p\" text04 secondaryAction>\n                          View All\n                        </Text>\n                        <SvgFiles className=\"h-5 w-5 stroke-text-02\" />\n                      </div>\n                      <Text as=\"p\" text03 secondaryBody>\n                        {displayFileCount} files\n                      </Text>\n                    </div>\n                  </button>\n                )}\n                {isDragActive && (\n                  <div className=\"pointer-events-none absolute inset-0 rounded-lg border-2 border-dashed border-action-link-05\" />\n                )}\n              </div>\n              {projectTokenCount > availableContextTokens && (\n                <Text as=\"p\" text02 secondaryBody>\n                  This project exceeds the model&apos;s context limits. Sessions\n                  will automatically search for relevant files first before\n                  generating response.\n                </Text>\n              )}\n            </>\n          ) : (\n            <div\n              className={`h-12 rounded-lg border border-dashed ${\n                isDragActive\n                  ? \"bg-action-link-01 border-action-link-05\"\n                  : \"border-border-01\"\n              } flex items-center pl-2`}\n            >\n              <p\n                className={`font-secondary-body ${\n                  isDragActive ? \"text-action-link-05\" : \"text-text-02 \"\n                }`}\n              >\n                {isDragActive\n                  ? \"Drop files here to add to this project\"\n                  : \"Add documents, texts, or images to use in the project. Drag & drop supported.\"}\n              </p>\n            </div>\n          )}\n        </div>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/projects/project_utils.ts",
    "content": "export function formatRelativeTime(isoDate: string): string {\n  const date = new Date(isoDate);\n  const now = new Date();\n  const diffMs = now.getTime() - date.getTime();\n\n  const seconds = Math.floor(diffMs / 1000);\n  if (seconds < 45) return \"just now\";\n  const minutes = Math.floor(seconds / 60);\n  if (minutes < 60) return `${minutes} min${minutes === 1 ? \"\" : \"s\"} ago`;\n  const hours = Math.floor(minutes / 60);\n  if (hours < 24) return `${hours} hour${hours === 1 ? \"\" : \"s\"} ago`;\n  const days = Math.floor(hours / 24);\n  if (days < 30) return `${days} day${days === 1 ? \"\" : \"s\"} ago`;\n  const months = Math.floor(days / 30);\n  if (months < 12) return `${months} month${months === 1 ? \"\" : \"s\"} ago`;\n  const years = Math.floor(months / 12);\n  return `${years} year${years === 1 ? \"\" : \"s\"} ago`;\n}\n"
  },
  {
    "path": "web/src/app/app/components/tools/GeneratingImageDisplay.tsx",
    "content": "import React, { useState, useEffect, useRef } from \"react\";\n\nexport default function GeneratingImageDisplay({ isCompleted = false }) {\n  const [progress, setProgress] = useState(0);\n  const progressRef = useRef(0);\n  const animationRef = useRef<number | null>(null);\n  const startTimeRef = useRef<number>(Date.now());\n\n  useEffect(() => {\n    // Animation setup\n    let lastUpdateTime = 0;\n    const updateInterval = 500;\n    const animationDuration = 30000;\n\n    const animate = (timestamp: number) => {\n      const elapsedTime = timestamp - startTimeRef.current;\n\n      // Calculate progress using logarithmic curve\n      const maxProgress = 99.9;\n      const progress =\n        maxProgress * (1 - Math.exp(-elapsedTime / animationDuration));\n\n      // Update progress if enough time has passed\n      if (timestamp - lastUpdateTime > updateInterval) {\n        progressRef.current = progress;\n        setProgress(Math.round(progress * 10) / 10);\n        lastUpdateTime = timestamp;\n      }\n\n      // Continue animation if not completed\n      if (!isCompleted && elapsedTime < animationDuration) {\n        animationRef.current = requestAnimationFrame(animate);\n      }\n    };\n\n    // Start animation\n    startTimeRef.current = performance.now();\n    animationRef.current = requestAnimationFrame(animate);\n\n    // Cleanup function\n    return () => {\n      if (animationRef.current) {\n        cancelAnimationFrame(animationRef.current);\n      }\n    };\n  }, [isCompleted]);\n\n  // Handle completion\n  useEffect(() => {\n    if (isCompleted) {\n      if (animationRef.current) {\n        cancelAnimationFrame(animationRef.current);\n      }\n      setProgress(100);\n    }\n  }, [isCompleted]);\n\n  return (\n    <div className=\"object-cover object-center border border-background-200 bg-background-100 items-center justify-center overflow-hidden flex rounded-lg w-96 h-96 transition-opacity duration-300 opacity-100\">\n      <div className=\"m-auto relative flex\">\n        <svg className=\"w-16 h-16 transform -rotate-90\" viewBox=\"0 0 100 100\">\n          <circle\n            className=\"text-text-200\"\n            strokeWidth=\"8\"\n            stroke=\"currentColor\"\n            fill=\"transparent\"\n            r=\"44\"\n            cx=\"50\"\n            cy=\"50\"\n          />\n          <circle\n            className=\"text-text-800 transition-all duration-300\"\n            strokeWidth=\"8\"\n            strokeDasharray={276.46}\n            strokeDashoffset={276.46 * (1 - progress / 100)}\n            strokeLinecap=\"round\"\n            stroke=\"currentColor\"\n            fill=\"transparent\"\n            r=\"44\"\n            cx=\"50\"\n            cy=\"50\"\n          />\n        </svg>\n        <div className=\"absolute inset-0 flex items-center justify-center\">\n          <svg\n            className=\"w-6 h-6 text-text-500 animate-pulse-strong\"\n            fill=\"none\"\n            viewBox=\"0 0 24 24\"\n            stroke=\"currentColor\"\n          >\n            <path\n              strokeLinecap=\"round\"\n              strokeLinejoin=\"round\"\n              strokeWidth=\"2\"\n              d=\"M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z\"\n            />\n          </svg>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/components/tools/constants.ts",
    "content": "import type { IconProps } from \"@opal/types\";\nimport { SvgCode, SvgGlobe, SvgImage, SvgLink, SvgSearch } from \"@opal/icons\";\n\n// Tool names as referenced by tool results / tool calls\nexport const SEARCH_TOOL_NAME = \"run_search\";\nexport const INTERNET_SEARCH_TOOL_NAME = \"run_internet_search\";\nexport const IMAGE_GENERATION_TOOL_NAME = \"run_image_generation\";\nexport const PYTHON_TOOL_NAME = \"run_python\";\nexport const OPEN_URL_TOOL_NAME = \"open_url\";\n\n// In-code tool IDs that also correspond to the tool's name when associated with a persona\nexport const SEARCH_TOOL_ID = \"SearchTool\";\nexport const IMAGE_GENERATION_TOOL_ID = \"ImageGenerationTool\";\nexport const WEB_SEARCH_TOOL_ID = \"WebSearchTool\";\nexport const PYTHON_TOOL_ID = \"PythonTool\";\nexport const OPEN_URL_TOOL_ID = \"OpenURLTool\";\nexport const FILE_READER_TOOL_ID = \"FileReaderTool\";\n\n// Icon mappings for system tools\nexport const SYSTEM_TOOL_ICONS: Record<\n  string,\n  React.FunctionComponent<IconProps>\n> = {\n  [SEARCH_TOOL_ID]: SvgSearch,\n  [WEB_SEARCH_TOOL_ID]: SvgGlobe,\n  [IMAGE_GENERATION_TOOL_ID]: SvgImage,\n  [PYTHON_TOOL_ID]: SvgCode,\n  [OPEN_URL_TOOL_ID]: SvgLink,\n};\n"
  },
  {
    "path": "web/src/app/app/interfaces.ts",
    "content": "import {\n  OnyxDocument,\n  Filters,\n  SearchOnyxDocument,\n  StreamStopReason,\n} from \"@/lib/search/interfaces\";\nimport { Packet } from \"./services/streamingModels\";\n\nexport type FeedbackType = \"like\" | \"dislike\";\nexport type ChatState =\n  | \"input\"\n  | \"loading\"\n  | \"streaming\"\n  | \"toolBuilding\"\n  | \"uploading\";\nexport interface RegenerationState {\n  regenerating: boolean;\n  finalMessageIndex: number;\n}\n\nexport enum RetrievalType {\n  None = \"none\",\n  Search = \"search\",\n  SelectedDocs = \"selectedDocs\",\n}\n\nexport enum ResearchType {\n  LegacyAgentic = \"LEGACY_AGENTIC\",\n  Thoughtful = \"THOUGHTFUL\",\n  Deep = \"DEEP\",\n  Fast = \"FAST\",\n}\n\nexport enum ChatSessionSharedStatus {\n  Private = \"private\",\n  Public = \"public\",\n}\n\nexport interface ChatSessionSummary {\n  id: string;\n  name: string | null;\n  persona_id: number | null;\n  time_created: string;\n  shared_status: ChatSessionSharedStatus;\n  current_alternate_model: string | null;\n  current_temperature_override: number | null;\n  highlights?: string[];\n}\n\nexport interface ChatSessionGroup {\n  title: string;\n  chats: ChatSessionSummary[];\n}\n\nexport interface ChatSearchResponse {\n  groups: ChatSessionGroup[];\n  has_more: boolean;\n  next_page: number | null;\n}\n\n// The number of messages to buffer on the client side.\nexport const BUFFER_COUNT = 35;\n\nexport interface RetrievalDetails {\n  run_search: \"always\" | \"never\" | \"auto\";\n  real_time: boolean;\n  filters?: Filters;\n  enable_auto_detect_filters?: boolean | null;\n}\n\n// Citation number -> Document ID (allows O(1) lookup when rendering citations)\nexport type CitationMap = { [citation_num: number]: string };\n\nexport enum ChatFileType {\n  IMAGE = \"image\",\n  DOCUMENT = \"document\",\n  PLAIN_TEXT = \"plain_text\",\n  TABULAR = \"tabular\",\n  USER_KNOWLEDGE = \"user_knowledge\",\n}\n\nexport const isTextFile = (fileType: ChatFileType) =>\n  [\n    ChatFileType.PLAIN_TEXT,\n    ChatFileType.TABULAR,\n    ChatFileType.USER_KNOWLEDGE,\n    ChatFileType.DOCUMENT,\n  ].includes(fileType);\n\nexport interface FileDescriptor {\n  id: string;\n  type: ChatFileType;\n  name?: string | null;\n\n  user_file_id?: string | null;\n  // FE only\n  isUploading?: boolean;\n}\n\nexport interface FileDescriptorWithHighlights extends FileDescriptor {\n  match_highlights: string[];\n}\n\nexport interface LLMRelevanceFilterPacket {\n  relevant_chunk_indices: number[];\n}\n\nexport interface ToolCallMetadata {\n  tool_name: string;\n  tool_args: Record<string, any>;\n  tool_result?: Record<string, any>;\n}\n\nexport interface ToolCallFinalResult {\n  tool_name: string;\n  tool_args: Record<string, any>;\n  tool_result: Record<string, any>;\n}\n\nexport interface ChatSession {\n  id: string;\n  name: string;\n  persona_id: number;\n  time_created: string;\n  time_updated: string;\n  shared_status: ChatSessionSharedStatus;\n  project_id: number | null;\n  current_alternate_model: string;\n  current_temperature_override: number | null;\n}\n\nexport interface SearchSession {\n  search_session_id: string;\n  documents: SearchOnyxDocument[];\n  messages: BackendMessage[];\n  description: string;\n}\n\nexport interface Message {\n  is_generating?: boolean;\n  messageId?: number;\n  nodeId: number; // Unique identifier for tree structure (can be negative for temp messages)\n  message: string;\n  type: \"user\" | \"assistant\" | \"system\" | \"error\"; // TODO: rename \"assistant\" to \"agent\" — https://linear.app/onyx-app/issue/ENG-3766\n  retrievalType?: RetrievalType;\n  researchType?: ResearchType;\n  query?: string | null;\n  files: FileDescriptor[];\n  toolCall: ToolCallMetadata | null;\n  // for rebuilding the message tree - these now use nodeId\n  parentNodeId: number | null;\n  childrenNodeIds?: number[];\n  latestChildNodeId?: number | null;\n  alternateAgentID?: number | null;\n  stackTrace?: string | null;\n  errorCode?: string | null;\n  isRetryable?: boolean;\n  errorDetails?: Record<string, any> | null;\n  overridden_model?: string;\n  stopReason?: StreamStopReason | null;\n\n  // Multi-model answer generation\n  preferredResponseId?: number | null;\n  modelDisplayName?: string | null;\n\n  // new gen\n  packets: Packet[];\n  packetCount?: number; // Tracks packet count for React memo comparison (avoids reading from mutated array)\n\n  // cached values for easy access\n  documents?: OnyxDocument[] | null;\n  citations?: CitationMap;\n\n  // feedback state\n  currentFeedback?: FeedbackType | null;\n\n  // Duration in seconds for processing this message (agent messages only)\n  processingDurationSeconds?: number;\n}\n\nexport interface BackendChatSession {\n  chat_session_id: string;\n  description: string;\n  persona_id: number;\n  persona_name: string;\n  messages: BackendMessage[];\n  time_created: string;\n  time_updated: string;\n  shared_status: ChatSessionSharedStatus;\n  current_temperature_override: number | null;\n  current_alternate_model?: string;\n\n  owner_name: string | null;\n  packets: Packet[][];\n}\n\nexport function toChatSession(backend: BackendChatSession): ChatSession {\n  return {\n    id: backend.chat_session_id,\n    name: backend.description,\n    persona_id: backend.persona_id,\n    time_created: backend.time_created,\n    time_updated: backend.time_updated,\n    shared_status: backend.shared_status,\n    project_id: null,\n    current_alternate_model: backend.current_alternate_model ?? \"\",\n    current_temperature_override: backend.current_temperature_override,\n  };\n}\n\nexport interface BackendMessage {\n  message_id: number;\n  message_type: string;\n  research_type: string | null;\n  parent_message: number | null;\n  latest_child_message: number | null;\n  message: string;\n  rephrased_query: string | null;\n  // Backend sends context_docs as a flat array of documents\n  context_docs: OnyxDocument[] | null;\n  time_sent: string;\n  overridden_model: string;\n  alternate_assistant_id: number | null; // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n  chat_session_id: string;\n  citations: CitationMap | null;\n  files: FileDescriptor[];\n  tool_call: ToolCallFinalResult | null;\n  current_feedback: string | null;\n  // Duration in seconds for processing this message (agent messages only)\n  processing_duration_seconds?: number;\n\n  sub_questions: SubQuestionDetail[];\n  // Keeping existing properties\n  comments: any;\n  parentMessageId: number | null;\n  refined_answer_improvement: boolean | null;\n  is_agentic: boolean | null;\n  // Multi-model answer generation\n  preferred_response_id: number | null;\n  model_display_name: string | null;\n}\n\nexport interface MessageResponseIDInfo {\n  type: \"message_id_info\";\n  user_message_id: number | null;\n  reserved_assistant_message_id: number; // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n}\n\nexport interface ModelResponseSlot {\n  message_id: number;\n  model_name: string;\n}\n\nexport interface MultiModelMessageResponseIDInfo {\n  type: \"multi_model_message_id_info\";\n  user_message_id: number | null;\n  responses: ModelResponseSlot[];\n}\n\nexport interface UserKnowledgeFilePacket {\n  user_files: FileDescriptor[];\n}\n\nexport interface DocumentsResponse {\n  top_documents: OnyxDocument[];\n  rephrased_query: string | null;\n  level?: number | null;\n  level_question_num?: number | null;\n}\n\nexport interface FileChatDisplay {\n  file_ids: string[];\n}\n\nexport interface StreamingError {\n  error: string;\n  stack_trace: string;\n  error_code?: string;\n  is_retryable?: boolean;\n  details?: Record<string, any>;\n}\n\nexport interface InputPrompt {\n  id: number;\n  prompt: string;\n  content: string;\n  active: boolean;\n  is_public: boolean;\n}\n\nexport interface EditPromptModalProps {\n  onClose: () => void;\n\n  promptId: number;\n  editInputPrompt: (\n    promptId: number,\n    values: CreateInputPromptRequest\n  ) => Promise<void>;\n}\nexport interface CreateInputPromptRequest {\n  prompt: string;\n  content: string;\n}\n\nexport interface AddPromptModalProps {\n  onClose: () => void;\n  onSubmit: (promptData: CreateInputPromptRequest) => void;\n}\nexport interface PromptData {\n  id: number;\n  prompt: string;\n  content: string;\n}\n\n/**\n * // Start of Selection\n */\n\nexport interface BaseQuestionIdentifier {\n  level: number;\n  level_question_num: number;\n}\n\nexport interface SubQuestionDetail extends BaseQuestionIdentifier {\n  question: string;\n  answer: string;\n  sub_queries?: SubQueryDetail[] | null;\n  context_docs?: { top_documents: OnyxDocument[] } | null;\n  is_complete?: boolean;\n  is_stopped?: boolean;\n  answer_streaming?: boolean;\n}\n\nexport interface SubQueryDetail {\n  query: string;\n  query_id: number;\n  doc_ids?: number[] | null;\n}\n"
  },
  {
    "path": "web/src/app/app/layout.tsx",
    "content": "import { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { unstable_noStore as noStore } from \"next/cache\";\nimport { requireAuth } from \"@/lib/auth/requireAuth\";\nimport { ProjectsProvider } from \"@/providers/ProjectsContext\";\nimport { VoiceModeProvider } from \"@/providers/VoiceModeProvider\";\nimport AppSidebar from \"@/sections/sidebar/AppSidebar\";\n\nexport interface LayoutProps {\n  children: React.ReactNode;\n}\n\nexport default async function Layout({ children }: LayoutProps) {\n  noStore();\n\n  // Only check authentication - data fetching is done client-side via SWR hooks\n  const authResult = await requireAuth();\n\n  if (authResult.redirect) {\n    redirect(authResult.redirect as Route);\n  }\n\n  return (\n    <ProjectsProvider>\n      {/* VoiceModeProvider wraps the full app layout so TTS playback state\n          persists across page navigations (e.g., sidebar clicks during playback).\n          It only activates WebSocket connections when TTS is actually triggered. */}\n      <VoiceModeProvider>\n        <div className=\"flex flex-row w-full h-full\">\n          <AppSidebar />\n          {children}\n        </div>\n      </VoiceModeProvider>\n    </ProjectsProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/BlinkingBar.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nexport function BlinkingBar({ addMargin = false }: { addMargin?: boolean }) {\n  return (\n    <span\n      className={cn(\n        \"animate-pulse flex-none bg-theme-primary-05 relative top-[0.15rem] inline-block w-[0.5rem] h-[1rem]\",\n        addMargin && \"ml-1\"\n      )}\n    ></span>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/CodeBlock.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport React, { useState, ReactNode, useCallback, useMemo, memo } from \"react\";\nimport { SvgCheck, SvgCode, SvgCopy } from \"@opal/icons\";\n\ninterface CodeBlockProps {\n  className?: string;\n  children?: ReactNode;\n  codeText: string;\n  showHeader?: boolean;\n  noPadding?: boolean;\n}\n\nconst MemoizedCodeLine = memo(({ content }: { content: ReactNode }) => (\n  <>{content}</>\n));\n\nexport const CodeBlock = memo(function CodeBlock({\n  className = \"\",\n  children,\n  codeText,\n  showHeader = true,\n  noPadding = false,\n}: CodeBlockProps) {\n  const [copied, setCopied] = useState(false);\n\n  const language = useMemo(() => {\n    return className\n      .split(\" \")\n      .filter((cls) => cls.startsWith(\"language-\"))\n      .map((cls) => cls.replace(\"language-\", \"\"))\n      .join(\" \");\n  }, [className]);\n\n  const handleCopy = useCallback(() => {\n    if (!codeText) return;\n    navigator.clipboard.writeText(codeText).then(() => {\n      setCopied(true);\n      setTimeout(() => setCopied(false), 2000);\n    });\n  }, [codeText]);\n\n  const CopyButton = () => (\n    <div\n      className=\"ml-auto cursor-pointer select-none\"\n      onMouseDown={handleCopy}\n    >\n      {copied ? (\n        <div className=\"flex items-center space-x-2\">\n          <SvgCheck height={14} width={14} stroke=\"currentColor\" />\n          <Text as=\"p\" secondaryMono>\n            Copied!\n          </Text>\n        </div>\n      ) : (\n        <div className=\"flex items-center space-x-2\">\n          <SvgCopy height={14} width={14} stroke=\"currentColor\" />\n          <Text as=\"p\" secondaryMono>\n            Copy\n          </Text>\n        </div>\n      )}\n    </div>\n  );\n\n  if (typeof children === \"string\" && !language) {\n    return (\n      <span\n        data-testid=\"code-block\"\n        className={cn(\n          \"font-mono\",\n          \"text-text-05\",\n          \"bg-background-tint-00\",\n          \"rounded\",\n          \"text-[0.75em]\",\n          \"inline\",\n          \"whitespace-pre-wrap\",\n          \"break-words\",\n          \"py-0.5\",\n          \"px-1\",\n          className\n        )}\n      >\n        {children}\n      </span>\n    );\n  }\n\n  const CodeContent = () => {\n    if (!language) {\n      return (\n        <pre className=\"!p-2 m-0 overflow-x-auto w-0 min-w-full hljs\">\n          <code className={`text-sm hljs ${className}`}>\n            {Array.isArray(children)\n              ? children.map((child, index) => (\n                  <MemoizedCodeLine key={index} content={child} />\n                ))\n              : children}\n          </code>\n        </pre>\n      );\n    }\n\n    return (\n      <pre className=\"!p-2 m-0 overflow-x-auto w-0 min-w-full hljs\">\n        <code className=\"text-xs\">\n          {Array.isArray(children)\n            ? children.map((child, index) => (\n                <MemoizedCodeLine key={index} content={child} />\n              ))\n            : children}\n        </code>\n      </pre>\n    );\n  };\n\n  return (\n    <>\n      {showHeader ? (\n        <div\n          className={cn(\n            \"bg-background-tint-00 rounded-12 max-w-full min-w-0\",\n            !noPadding && \"px-1 pb-1\"\n          )}\n        >\n          {language && (\n            <div className=\"flex items-center px-2 py-1 text-sm text-text-04 gap-x-2\">\n              <SvgCode\n                height={12}\n                width={12}\n                stroke=\"currentColor\"\n                className=\"my-auto\"\n              />\n              <Text secondaryMono>{language}</Text>\n              {codeText && <CopyButton />}\n            </div>\n          )}\n          <CodeContent />\n        </div>\n      ) : (\n        <CodeContent />\n      )}\n    </>\n  );\n});\n\nCodeBlock.displayName = \"CodeBlock\";\nMemoizedCodeLine.displayName = \"MemoizedCodeLine\";\n"
  },
  {
    "path": "web/src/app/app/message/FileDisplay.tsx",
    "content": "\"use client\";\n\nimport { ReactNode, useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { ChatFileType, FileDescriptor } from \"@/app/app/interfaces\";\nimport Attachment from \"@/refresh-components/Attachment\";\nimport { InMessageImage } from \"@/app/app/components/files/images/InMessageImage\";\nimport CsvContent from \"@/components/tools/CSVContent\";\nimport PreviewModal from \"@/sections/modals/PreviewModal\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport ExpandableContentWrapper from \"@/components/tools/ExpandableContentWrapper\";\n\ninterface FileContainerProps {\n  children: ReactNode;\n  className?: string;\n  id?: string;\n}\n\ninterface FileDisplayProps {\n  files: FileDescriptor[];\n}\n\nfunction FileContainer({ children, className, id }: FileContainerProps) {\n  return (\n    <div\n      id={id}\n      className={cn(\"flex w-full flex-col items-end gap-2 py-2\", className)}\n    >\n      {children}\n    </div>\n  );\n}\n\nexport default function FileDisplay({ files }: FileDisplayProps) {\n  const [close, setClose] = useState(true);\n  const [previewingFile, setPreviewingFile] = useState<FileDescriptor | null>(\n    null\n  );\n  const textFiles = files.filter(\n    (file) =>\n      file.type === ChatFileType.PLAIN_TEXT ||\n      file.type === ChatFileType.DOCUMENT\n  );\n  const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE);\n  // TODO(danelegend): XLSX files are binary (OOXML) and will fail to parse in CsvContent.\n  // The backend should convert XLSX to CSV text before serving via /api/chat/file,\n  // or XLSX should be split into a separate ChatFileType and rendered as an Attachment.\n  const tabularFiles = files.filter(\n    (file) => file.type === ChatFileType.TABULAR\n  );\n\n  const presentingDocument: MinimalOnyxDocument = {\n    document_id: previewingFile?.id ?? \"\",\n    semantic_identifier: previewingFile?.name ?? \"\",\n  };\n\n  return (\n    <>\n      {previewingFile && (\n        <PreviewModal\n          presentingDocument={presentingDocument}\n          onClose={() => setPreviewingFile(null)}\n        />\n      )}\n\n      {textFiles.length > 0 && (\n        <FileContainer id=\"onyx-file\">\n          {textFiles.map((file) => (\n            <Attachment\n              key={file.id}\n              fileName={file.name || file.id}\n              open={() => setPreviewingFile(file)}\n            />\n          ))}\n        </FileContainer>\n      )}\n\n      {imageFiles.length > 0 && (\n        <FileContainer id=\"onyx-image\">\n          {imageFiles.map((file) => (\n            <InMessageImage key={file.id} fileId={file.id} />\n          ))}\n        </FileContainer>\n      )}\n\n      {tabularFiles.length > 0 && (\n        <FileContainer className=\"overflow-auto\">\n          {tabularFiles.map((file) =>\n            close ? (\n              <ExpandableContentWrapper\n                key={file.id}\n                fileDescriptor={file}\n                close={() => setClose(false)}\n                ContentComponent={CsvContent}\n              />\n            ) : (\n              <Attachment\n                key={file.id}\n                open={() => setClose(true)}\n                fileName={file.name || file.id}\n              />\n            )\n          )}\n        </FileContainer>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/HumanMessage.tsx",
    "content": "\"use client\";\n\nimport React, { useEffect, useMemo, useRef, useState } from \"react\";\nimport { FileDescriptor } from \"@/app/app/interfaces\";\nimport \"katex/dist/katex.min.css\";\nimport MessageSwitcher from \"@/app/app/message/MessageSwitcher\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { Button } from \"@opal/components\";\nimport { SvgEdit } from \"@opal/icons\";\nimport { Hoverable } from \"@opal/core\";\nimport FileDisplay from \"./FileDisplay\";\n\ninterface MessageEditingProps {\n  content: string;\n  onSubmitEdit: (editedContent: string) => void;\n  onCancelEdit: () => void;\n}\n\nfunction MessageEditing({\n  content,\n  onSubmitEdit,\n  onCancelEdit,\n}: MessageEditingProps) {\n  const textareaRef = useRef<HTMLTextAreaElement>(null);\n  const [editedContent, setEditedContent] = useState(content);\n\n  useEffect(() => {\n    if (!textareaRef.current) return;\n\n    // Focus the textarea\n    textareaRef.current.focus();\n    textareaRef.current.select();\n  }, []);\n\n  function handleSubmit() {\n    onSubmitEdit(editedContent);\n  }\n\n  function handleCancel() {\n    setEditedContent(content);\n    onCancelEdit();\n  }\n\n  return (\n    <div className=\"w-full\">\n      <div\n        className={cn(\n          \"w-full h-full border rounded-16 overflow-hidden p-3 flex flex-col gap-2\"\n        )}\n      >\n        <textarea\n          ref={textareaRef}\n          className={cn(\n            \"w-full h-full resize-none outline-none bg-transparent overflow-y-scroll whitespace-normal break-word\"\n          )}\n          aria-multiline\n          role=\"textarea\"\n          value={editedContent}\n          style={{ scrollbarWidth: \"thin\" }}\n          onChange={(e) => {\n            setEditedContent(e.target.value);\n            textareaRef.current!.style.height = \"auto\";\n            e.target.style.height = `${e.target.scrollHeight}px`;\n          }}\n          onKeyDown={(e) => {\n            if (e.key === \"Escape\") {\n              e.preventDefault();\n              handleCancel();\n            }\n            // Submit edit if \"Command Enter\" is pressed, like in ChatGPT\n            if (e.key === \"Enter\" && e.metaKey) handleSubmit();\n          }}\n        />\n        <div className=\"flex justify-end gap-1\">\n          <Button onClick={handleSubmit}>Submit</Button>\n          <Button prominence=\"secondary\" onClick={handleCancel}>\n            Cancel\n          </Button>\n        </div>\n      </div>\n    </div>\n  );\n}\n\ninterface HumanMessageProps {\n  // Content and display\n  content: string;\n  files?: FileDescriptor[];\n\n  // Message navigation - nodeId for tree position, messageId for editing\n  nodeId: number;\n  messageId?: number | null;\n  otherMessagesCanSwitchTo?: number[];\n  onMessageSelection?: (nodeId: number) => void;\n\n  // Editing functionality - takes (editedContent, messageId) to allow stable callback reference\n  onEdit?: (editedContent: string, messageId: number) => void;\n\n  // Streaming and generation\n  stopGenerating?: () => void;\n  disableSwitchingForStreaming?: boolean;\n}\n\n// Memoization comparison - compare by value for primitives, by reference for objects/arrays\nfunction arePropsEqual(\n  prev: HumanMessageProps,\n  next: HumanMessageProps\n): boolean {\n  return (\n    prev.content === next.content &&\n    prev.nodeId === next.nodeId &&\n    prev.messageId === next.messageId &&\n    prev.files === next.files &&\n    prev.disableSwitchingForStreaming === next.disableSwitchingForStreaming &&\n    prev.otherMessagesCanSwitchTo === next.otherMessagesCanSwitchTo &&\n    prev.onEdit === next.onEdit\n    // Skip: stopGenerating, onMessageSelection (inline function props)\n  );\n}\n\nconst HumanMessage = React.memo(function HumanMessage({\n  content: initialContent,\n  files,\n  nodeId,\n  messageId,\n  otherMessagesCanSwitchTo,\n  onEdit,\n  onMessageSelection,\n  stopGenerating = () => null,\n  disableSwitchingForStreaming = false,\n}: HumanMessageProps) {\n  // TODO (@raunakab):\n  //\n  // This is some duplicated state that is patching a memoization issue with `HumanMessage`.\n  // Fix this later.\n  const [content, setContent] = useState(initialContent);\n\n  const [isEditing, setIsEditing] = useState(false);\n  const { isMobile } = useScreenSize();\n\n  // Use nodeId for switching (finding position in siblings)\n  const indexInSiblings = otherMessagesCanSwitchTo?.indexOf(nodeId);\n  // indexOf returns -1 if not found, treat that as undefined\n  const currentMessageInd =\n    indexInSiblings !== undefined && indexInSiblings !== -1\n      ? indexInSiblings\n      : undefined;\n\n  const getPreviousMessage = () => {\n    if (\n      currentMessageInd !== undefined &&\n      currentMessageInd > 0 &&\n      otherMessagesCanSwitchTo\n    ) {\n      return otherMessagesCanSwitchTo[currentMessageInd - 1];\n    }\n    return undefined;\n  };\n\n  const getNextMessage = () => {\n    if (\n      currentMessageInd !== undefined &&\n      currentMessageInd < (otherMessagesCanSwitchTo?.length || 0) - 1 &&\n      otherMessagesCanSwitchTo\n    ) {\n      return otherMessagesCanSwitchTo[currentMessageInd + 1];\n    }\n    return undefined;\n  };\n\n  const copyEditButtonContent = useMemo(\n    () => (\n      <div className=\"flex flex-row flex-shrink px-1\">\n        <CopyIconButton\n          getCopyText={() => content}\n          prominence=\"tertiary\"\n          data-testid=\"HumanMessage/copy-button\"\n        />\n        <Button\n          icon={SvgEdit}\n          prominence=\"tertiary\"\n          tooltip=\"Edit\"\n          onClick={() => setIsEditing(true)}\n          data-testid=\"HumanMessage/edit-button\"\n        />\n      </div>\n    ),\n    [content]\n  );\n\n  const copyEditButton = (\n    <Hoverable.Item group=\"humanMessage\" variant=\"opacity-on-hover\">\n      {copyEditButtonContent}\n    </Hoverable.Item>\n  );\n\n  return (\n    <Hoverable.Root group=\"humanMessage\" widthVariant=\"full\">\n      <div\n        id=\"onyx-human-message\"\n        className=\"flex flex-col justify-end w-full relative\"\n      >\n        <FileDisplay files={files || []} />\n        {isEditing ? (\n          <MessageEditing\n            content={content}\n            onSubmitEdit={(editedContent) => {\n              // Don't update UI for edits that can't be persisted\n              if (messageId === undefined || messageId === null) {\n                setIsEditing(false);\n                return;\n              }\n              onEdit?.(editedContent, messageId);\n              setContent(editedContent);\n              setIsEditing(false);\n            }}\n            onCancelEdit={() => setIsEditing(false)}\n          />\n        ) : (\n          <div className=\"flex justify-end\">\n            {onEdit && !isMobile && copyEditButton}\n            <div className=\"md:max-w-[37.5rem]\">\n              <div\n                className={\n                  \"max-w-[30rem] md:max-w-[37.5rem] whitespace-break-spaces break-anywhere rounded-t-16 rounded-bl-16 bg-background-tint-02 py-2 px-3\"\n                }\n                onCopy={(e) => {\n                  const selection = window.getSelection();\n                  if (selection) {\n                    e.preventDefault();\n                    const text = selection\n                      .toString()\n                      .replace(/\\n{2,}/g, \"\\n\")\n                      .trim();\n                    e.clipboardData.setData(\"text/plain\", text);\n                  }\n                }}\n              >\n                <Text\n                  as=\"p\"\n                  className=\"inline-block align-middle\"\n                  mainContentBody\n                >\n                  {content}\n                </Text>\n              </div>\n            </div>\n          </div>\n        )}\n        <div className=\"flex justify-end pt-1\">\n          {!isEditing && onEdit && isMobile && copyEditButton}\n          {currentMessageInd !== undefined &&\n            onMessageSelection &&\n            otherMessagesCanSwitchTo &&\n            otherMessagesCanSwitchTo.length > 1 && (\n              <MessageSwitcher\n                disableForStreaming={disableSwitchingForStreaming}\n                currentPage={currentMessageInd + 1}\n                totalPages={otherMessagesCanSwitchTo.length}\n                handlePrevious={() => {\n                  stopGenerating();\n                  const prevMessage = getPreviousMessage();\n                  if (prevMessage !== undefined) {\n                    onMessageSelection(prevMessage);\n                  }\n                }}\n                handleNext={() => {\n                  stopGenerating();\n                  const nextMessage = getNextMessage();\n                  if (nextMessage !== undefined) {\n                    onMessageSelection(nextMessage);\n                  }\n                }}\n              />\n            )}\n        </div>\n      </div>\n    </Hoverable.Root>\n  );\n}, arePropsEqual);\n\nexport default HumanMessage;\n"
  },
  {
    "path": "web/src/app/app/message/MemoizedTextComponents.tsx",
    "content": "import {\n  QuestionCardProps,\n  DocumentCardProps,\n} from \"@/components/search/results/Citation\";\nimport {\n  LoadedOnyxDocument,\n  MinimalOnyxDocument,\n  OnyxDocument,\n} from \"@/lib/search/interfaces\";\nimport React, { memo, JSX, useMemo, useCallback } from \"react\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { WebResultIcon } from \"@/components/WebResultIcon\";\nimport { SubQuestionDetail, CitationMap } from \"../interfaces\";\nimport { ValidSources } from \"@/lib/types\";\nimport { ProjectFile } from \"../projects/projectsService\";\nimport { BlinkingBar } from \"./BlinkingBar\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SourceTag from \"@/refresh-components/buttons/source-tag/SourceTag\";\nimport {\n  documentToSourceInfo,\n  questionToSourceInfo,\n  getDisplayNameForSource,\n} from \"@/refresh-components/buttons/source-tag/sourceTagUtils\";\nimport { openDocument } from \"@/lib/search/utils\";\nimport { ensureHrefProtocol } from \"@/lib/utils\";\n\nexport const MemoizedAnchor = memo(\n  ({\n    docs,\n    subQuestions,\n    openQuestion,\n    userFiles,\n    citations,\n    href,\n    updatePresentingDocument,\n    children,\n  }: {\n    subQuestions?: SubQuestionDetail[];\n    openQuestion?: (question: SubQuestionDetail) => void;\n    docs?: OnyxDocument[] | null;\n    userFiles?: ProjectFile[] | null;\n    citations?: CitationMap;\n    updatePresentingDocument: (doc: MinimalOnyxDocument) => void;\n    href?: string;\n    children: React.ReactNode;\n  }): JSX.Element => {\n    const value = children?.toString();\n    if (value?.startsWith(\"[\") && value?.endsWith(\"]\")) {\n      const match = value.match(/\\[(D|Q)?(\\d+)\\]/);\n\n      if (match) {\n        const match_item = match[2];\n        if (match_item !== undefined) {\n          const isSubQuestion = match[1] === \"Q\";\n          const isDocument = !isSubQuestion;\n\n          const citation_num = parseInt(match_item, 10);\n\n          // Use citation map to find the correct document\n          // Citations map format: {citation_num: document_id}\n          // e.g., {1: \"doc_abc\", 2: \"doc_xyz\", 3: \"doc_123\"}\n          let associatedDoc: OnyxDocument | null = null;\n          if (isDocument && docs && citations) {\n            const document_id = citations[citation_num];\n            if (document_id) {\n              associatedDoc =\n                docs.find((d) => d.document_id === document_id) || null;\n            }\n          }\n\n          const associatedSubQuestion = isSubQuestion\n            ? subQuestions?.[citation_num - 1]\n            : undefined;\n\n          if (!associatedDoc && !associatedSubQuestion) {\n            return <>{children}</>;\n          }\n\n          let icon: React.ReactNode = null;\n          if (associatedDoc?.source_type === \"web\") {\n            icon = <WebResultIcon url={associatedDoc.link} />;\n          } else {\n            icon = (\n              <SourceIcon\n                sourceType={associatedDoc?.source_type as ValidSources}\n                iconSize={18}\n              />\n            );\n          }\n          const associatedDocInfo = associatedDoc\n            ? {\n                ...associatedDoc,\n                icon: icon as any,\n                link: associatedDoc.link,\n              }\n            : undefined;\n\n          return (\n            <MemoizedLink\n              updatePresentingDocument={updatePresentingDocument}\n              href={href}\n              document={associatedDocInfo}\n              question={associatedSubQuestion}\n              openQuestion={openQuestion}\n            >\n              {children}\n            </MemoizedLink>\n          );\n        }\n      }\n    }\n    return (\n      <MemoizedLink\n        updatePresentingDocument={updatePresentingDocument}\n        href={href}\n      >\n        {children}\n      </MemoizedLink>\n    );\n  }\n);\n\nexport const MemoizedLink = memo(\n  ({\n    node,\n    document,\n    updatePresentingDocument,\n    question,\n    href,\n    openQuestion,\n    ...rest\n  }: Partial<DocumentCardProps & QuestionCardProps> & {\n    node?: any;\n    [key: string]: any;\n  }) => {\n    const value = rest.children;\n\n    // Convert document to SourceInfo for SourceTag\n    const documentSourceInfo = useMemo(() => {\n      if (!document) return null;\n      return documentToSourceInfo(document as OnyxDocument);\n    }, [document]);\n\n    // Convert question to SourceInfo for SourceTag\n    const questionSourceInfo = useMemo(() => {\n      if (!question) return null;\n      return questionToSourceInfo(question, question.level_question_num);\n    }, [question]);\n\n    // Handle click on SourceTag\n    const handleSourceClick = useCallback(() => {\n      if (document && updatePresentingDocument) {\n        openDocument(document as OnyxDocument, updatePresentingDocument);\n      } else if (question && openQuestion) {\n        openQuestion(question);\n      }\n    }, [document, updatePresentingDocument, question, openQuestion]);\n\n    if (value?.toString().startsWith(\"*\")) {\n      return <BlinkingBar addMargin />;\n    } else if (value?.toString().startsWith(\"[\")) {\n      const sourceInfo = documentSourceInfo || questionSourceInfo;\n      if (!sourceInfo) {\n        return <>{rest.children}</>;\n      }\n\n      const displayName = document\n        ? getDisplayNameForSource(document as OnyxDocument)\n        : question?.question || \"Question\";\n\n      return (\n        <SourceTag\n          variant=\"inlineCitation\"\n          displayName={displayName}\n          sources={[sourceInfo]}\n          onSourceClick={handleSourceClick}\n          showDetailsCard\n          className=\"mr-0.5\"\n        />\n      );\n    }\n\n    const url = ensureHrefProtocol(href);\n\n    // Check if the link is to a file on the backend\n    const isChatFile = url?.includes(\"/api/chat/file/\");\n    if (isChatFile && updatePresentingDocument) {\n      const fileId = url!.split(\"/api/chat/file/\")[1]?.split(/[?#]/)[0] || \"\";\n      const filename = value?.toString() || \"download\";\n      return (\n        <a\n          href=\"#\"\n          onClick={(e) => {\n            e.preventDefault();\n            updatePresentingDocument({\n              document_id: fileId,\n              semantic_identifier: filename,\n            });\n          }}\n          className=\"cursor-pointer text-link hover:text-link-hover\"\n        >\n          {rest.children}\n        </a>\n      );\n    }\n\n    return (\n      <a\n        href={url}\n        target=\"_blank\"\n        rel=\"noopener noreferrer\"\n        className=\"cursor-pointer text-link hover:text-link-hover\"\n      >\n        {rest.children}\n      </a>\n    );\n  }\n);\n\ninterface MemoizedParagraphProps {\n  className?: string;\n  children?: React.ReactNode;\n}\n\nexport const MemoizedParagraph = memo(function MemoizedParagraph({\n  className,\n  children,\n}: MemoizedParagraphProps) {\n  return (\n    <Text as=\"p\" mainContentBody className={className}>\n      {children}\n    </Text>\n  );\n});\n\nMemoizedAnchor.displayName = \"MemoizedAnchor\";\nMemoizedLink.displayName = \"MemoizedLink\";\nMemoizedParagraph.displayName = \"MemoizedParagraph\";\n"
  },
  {
    "path": "web/src/app/app/message/MessageSwitcher.tsx",
    "content": "import { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgChevronLeft, SvgChevronRight } from \"@opal/icons\";\nconst DISABLED_MESSAGE = \"Wait for agent message to complete\";\n\ninterface MessageSwitcherProps {\n  currentPage: number;\n  totalPages: number;\n  handlePrevious: () => void;\n  handleNext: () => void;\n  disableForStreaming?: boolean;\n}\n\nexport default function MessageSwitcher({\n  currentPage,\n  totalPages,\n  handlePrevious,\n  handleNext,\n  disableForStreaming,\n}: MessageSwitcherProps) {\n  const handle = (num: number, callback: () => void) =>\n    disableForStreaming\n      ? undefined\n      : currentPage === num\n        ? undefined\n        : callback;\n  const previous = handle(1, handlePrevious);\n  const next = handle(totalPages, handleNext);\n\n  return (\n    <div\n      className=\"flex flex-row items-center gap-1\"\n      data-testid=\"MessageSwitcher/container\"\n    >\n      <Button\n        disabled={disableForStreaming}\n        icon={SvgChevronLeft}\n        onClick={previous}\n        prominence=\"tertiary\"\n        tooltip={disableForStreaming ? DISABLED_MESSAGE : \"Previous\"}\n      />\n\n      <div className=\"flex flex-row items-center justify-center\">\n        <Text as=\"p\" text03 mainUiAction>\n          {currentPage}\n        </Text>\n        <Text as=\"p\" text03 mainUiAction>\n          /\n        </Text>\n        <Text as=\"p\" text03 mainUiAction>\n          {totalPages}\n        </Text>\n      </div>\n\n      <Button\n        disabled={disableForStreaming}\n        icon={SvgChevronRight}\n        onClick={next}\n        prominence=\"tertiary\"\n        tooltip={disableForStreaming ? DISABLED_MESSAGE : \"Next\"}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/Resubmit.tsx",
    "content": "import { useState } from \"react\";\nimport { Alert, AlertDescription, AlertTitle } from \"@/components/ui/alert\";\nimport { SvgChevronDown, SvgChevronRight } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { getErrorIcon, getErrorTitle } from \"./errorHelpers\";\n\ninterface ResubmitProps {\n  resubmit: () => void;\n}\n\nexport const Resubmit: React.FC<ResubmitProps> = ({ resubmit }) => {\n  return (\n    <div className=\"flex flex-col items-center justify-center gap-y-2 mt-4\">\n      <p className=\"text-sm text-neutral-700 dark:text-neutral-300\">\n        There was an error with the response.\n      </p>\n      <Button onClick={resubmit}>Regenerate</Button>\n    </div>\n  );\n};\n\nexport const ErrorBanner = ({\n  error,\n  errorCode,\n  isRetryable = true,\n  details,\n  stackTrace,\n  resubmit,\n}: {\n  error: string;\n  errorCode?: string;\n  isRetryable?: boolean;\n  details?: Record<string, any>;\n  stackTrace?: string | null;\n  resubmit?: () => void;\n}) => {\n  const [isStackTraceExpanded, setIsStackTraceExpanded] = useState(false);\n\n  return (\n    <div className=\"text-red-700 mt-4 text-sm my-auto\">\n      <Alert variant=\"broken\">\n        {getErrorIcon(errorCode)}\n        <AlertTitle>{getErrorTitle(errorCode)}</AlertTitle>\n        <AlertDescription className=\"flex flex-col gap-y-1\">\n          <span>{error}</span>\n          {details?.model && (\n            <span className=\"text-xs text-muted-foreground\">\n              Model: {details.model}\n              {details.provider && ` (${details.provider})`}\n            </span>\n          )}\n          {details?.tool_name && (\n            <span className=\"text-xs text-muted-foreground\">\n              Tool: {details.tool_name}\n            </span>\n          )}\n          {stackTrace && (\n            <div className=\"mt-2 border-t border-neutral-200 dark:border-neutral-700 pt-2\">\n              <div className=\"flex flex-1 items-center justify-between\">\n                <Button\n                  prominence=\"tertiary\"\n                  icon={isStackTraceExpanded ? SvgChevronDown : SvgChevronRight}\n                  onClick={() => setIsStackTraceExpanded(!isStackTraceExpanded)}\n                >\n                  Stack trace\n                </Button>\n                <CopyIconButton\n                  prominence=\"tertiary\"\n                  getCopyText={() => stackTrace}\n                />\n              </div>\n              {isStackTraceExpanded && (\n                <pre className=\"mt-2 p-3 bg-neutral-100 dark:bg-neutral-800 border border-neutral-200 dark:border-neutral-700 rounded text-xs text-neutral-700 dark:text-neutral-300 overflow-auto max-h-48 whitespace-pre-wrap font-mono\">\n                  {stackTrace}\n                </pre>\n              )}\n            </div>\n          )}\n        </AlertDescription>\n      </Alert>\n      {isRetryable && resubmit && <Resubmit resubmit={resubmit} />}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/app/message/codeUtils.test.ts",
    "content": "import { preprocessLaTeX } from \"./codeUtils\";\n\ndescribe(\"preprocessLaTeX\", () => {\n  describe(\"currency formatting\", () => {\n    it(\"should properly escape dollar signs in text with amounts\", () => {\n      const input =\n        \"Maria wants to buy a new laptop that costs $1,200. She has saved $800 so far. If she saves an additional $100 each month, how many months will it take her to have enough money to buy the laptop?\";\n      const processed = preprocessLaTeX(input);\n\n      // Should escape all dollar signs in currency amounts\n      expect(processed).toContain(\"costs \\\\$1,200\");\n      expect(processed).toContain(\"saved \\\\$800\");\n      expect(processed).toContain(\"additional \\\\$100\");\n      expect(processed).not.toContain(\"costs $1,200\");\n    });\n\n    it(\"should handle dollar signs with backslashes already present\", () => {\n      const input =\n        \"Maria wants to buy a new laptop that costs \\\\$1,200. She has saved \\\\$800 so far.\";\n      const processed = preprocessLaTeX(input);\n\n      // Should preserve the existing escaped dollar signs\n      expect(processed).toContain(\"\\\\$1,200\");\n      expect(processed).toContain(\"\\\\$800\");\n    });\n  });\n\n  describe(\"code block handling\", () => {\n    it(\"should not process dollar signs in code blocks\", () => {\n      const input = \"```plaintext\\nThe total cost is $50.\\n```\";\n      const processed = preprocessLaTeX(input);\n\n      // Dollar sign in code block should remain untouched\n      expect(processed).toContain(\"The total cost is $50.\");\n      expect(processed).not.toContain(\"The total cost is \\\\$50.\");\n    });\n\n    it(\"should not process dollar signs in inline code\", () => {\n      const input =\n        'Use the `printf \"$%.2f\" $amount` command to format currency.';\n      const processed = preprocessLaTeX(input);\n\n      // Dollar signs in inline code should remain untouched\n      expect(processed).toContain('`printf \"$%.2f\" $amount`');\n      expect(processed).not.toContain('`printf \"\\\\$%.2f\" \\\\$amount`');\n    });\n\n    it(\"should handle mixed content with code blocks and currency\", () => {\n      const input =\n        \"The cost is $100.\\n\\n```javascript\\nconst price = '$50';\\n```\\n\\nThe remaining balance is $50.\";\n      const processed = preprocessLaTeX(input);\n\n      // Dollar signs outside code blocks should be escaped\n      expect(processed).toContain(\"The cost is \\\\$100\");\n      expect(processed).toContain(\"The remaining balance is \\\\$50\");\n\n      // Dollar sign in code block should be preserved\n      expect(processed).toContain(\"const price = '$50';\");\n      expect(processed).not.toContain(\"const price = '\\\\$50';\");\n    });\n  });\n\n  describe(\"LaTeX handling\", () => {\n    it(\"should preserve proper LaTeX delimiters\", () => {\n      const input =\n        \"The formula $x^2 + y^2 = z^2$ represents the Pythagorean theorem.\";\n      const processed = preprocessLaTeX(input);\n\n      // LaTeX delimiters should be preserved\n      expect(processed).toContain(\"$x^2 + y^2 = z^2$\");\n    });\n\n    it(\"should convert LaTeX block delimiters\", () => {\n      const input = \"Consider the equation: \\\\[E = mc^2\\\\]\";\n      const processed = preprocessLaTeX(input);\n\n      // Block LaTeX delimiters should be converted\n      expect(processed).toContain(\"$$E = mc^2$$\");\n      expect(processed).not.toContain(\"\\\\[E = mc^2\\\\]\");\n    });\n\n    it(\"should convert LaTeX inline delimiters\", () => {\n      const input =\n        \"The speed of light \\\\(c\\\\) is approximately 299,792,458 m/s.\";\n      const processed = preprocessLaTeX(input);\n\n      // Inline LaTeX delimiters should be converted\n      expect(processed).toContain(\"$c$\");\n      expect(processed).not.toContain(\"\\\\(c\\\\)\");\n    });\n  });\n\n  describe(\"special cases\", () => {\n    it(\"should handle shell variables in text\", () => {\n      const input =\n        \"In bash, you can access arguments with $1, $2, and use echo $HOME to print the home directory.\";\n      const processed = preprocessLaTeX(input);\n\n      // Verify current behavior (numeric shell variables are being escaped)\n      expect(processed).toContain(\"\\\\$1\");\n      expect(processed).toContain(\"\\\\$2\");\n\n      // But $HOME is not escaped (non-numeric)\n      expect(processed).toContain(\"$HOME\");\n    });\n\n    it(\"should handle shell commands with dollar signs\", () => {\n      const input = \"Use awk '{print $2}' to print the second column.\";\n      const processed = preprocessLaTeX(input);\n\n      // Dollar sign in awk command should not be escaped\n      expect(processed).toContain(\"{print $2}\");\n      expect(processed).not.toContain(\"{print \\\\$2}\");\n    });\n\n    it(\"should handle Einstein's equation with mixed LaTeX and code blocks\", () => {\n      const input =\n        \"Sure! The equation for Einstein's mass-energy equivalence, \\\\(E = mc^2\\\\), can be written in LaTeX as follows: ```latex\\nE = mc^2\\n``` When rendered, it looks like this: \\\\[ E = mc^2 \\\\]\";\n      const processed = preprocessLaTeX(input);\n\n      // LaTeX inline delimiters should be converted\n      expect(processed).toContain(\"equivalence, $E = mc^2$,\");\n      expect(processed).not.toContain(\"equivalence, \\\\(E = mc^2\\\\),\");\n\n      // LaTeX block delimiters should be converted\n      expect(processed).toContain(\"it looks like this: $$ E = mc^2 $$\");\n      expect(processed).not.toContain(\"it looks like this: \\\\[ E = mc^2 \\\\]\");\n\n      // LaTeX within code blocks should remain untouched\n      expect(processed).toContain(\"```latex\\nE = mc^2\\n```\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/app/message/codeUtils.ts",
    "content": "import React from \"react\";\n\nexport function extractCodeText(\n  node: any,\n  content: string,\n  children: React.ReactNode\n): string {\n  let codeText: string | null = null;\n\n  if (\n    node?.position?.start?.offset != null &&\n    node?.position?.end?.offset != null\n  ) {\n    codeText = content\n      .slice(node.position.start.offset, node.position.end.offset)\n      .trim();\n\n    // Match code block with optional language declaration\n    const codeBlockMatch = codeText.match(/^```[^\\n]*\\n([\\s\\S]*?)\\n?```$/);\n    if (codeBlockMatch) {\n      const codeTextMatch = codeBlockMatch[1];\n      if (codeTextMatch !== undefined) {\n        codeText = codeTextMatch;\n      }\n    }\n\n    // Normalize indentation\n    const codeLines = codeText.split(\"\\n\");\n    const minIndent = codeLines\n      .filter((line) => line.trim().length > 0)\n      .reduce((min, line) => {\n        const match = line.match(/^\\s*/);\n        return Math.min(min, match ? match[0].length : min);\n      }, Infinity);\n\n    const formattedCodeLines = codeLines.map((line) => line.slice(minIndent));\n    codeText = formattedCodeLines.join(\"\\n\").trim();\n  } else {\n    // Fallback if position offsets are not available\n    const extractTextFromReactNode = (node: React.ReactNode): string => {\n      if (typeof node === \"string\") return node;\n      if (typeof node === \"number\") return String(node);\n      if (!node) return \"\";\n\n      if (React.isValidElement(node)) {\n        const children = (node.props as any).children;\n        if (Array.isArray(children)) {\n          return children.map(extractTextFromReactNode).join(\"\");\n        }\n        return extractTextFromReactNode(children);\n      }\n\n      if (Array.isArray(node)) {\n        return node.map(extractTextFromReactNode).join(\"\");\n      }\n\n      return \"\";\n    };\n\n    codeText = extractTextFromReactNode(children);\n  }\n\n  return codeText || \"\";\n}\n// We must preprocess LaTeX in the LLM output to avoid improper formatting\n\nexport const preprocessLaTeX = (content: string) => {\n  // First detect if content is within a code block\n  const codeBlockRegex = /^```[\\s\\S]*?```$/;\n  const isCodeBlock = codeBlockRegex.test(content.trim());\n\n  // If the entire content is a code block, don't process LaTeX\n  if (isCodeBlock) {\n    return content;\n  }\n\n  // Extract code blocks and replace with placeholders\n  const codeBlocks: string[] = [];\n  const withCodeBlocksReplaced = content.replace(/```[\\s\\S]*?```/g, (match) => {\n    const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;\n    codeBlocks.push(match);\n    return placeholder;\n  });\n\n  // First, protect code-like expressions where $ is used for variables\n  const codeProtected = withCodeBlocksReplaced.replace(\n    /\\b(\\w+(?:\\s*-\\w+)*\\s*(?:'[^']*')?)\\s*\\{[^}]*?\\$\\d+[^}]*?\\}/g,\n    (match) => {\n      // Replace $ with a temporary placeholder in code contexts\n      return match.replace(/\\$/g, \"___DOLLAR_PLACEHOLDER___\");\n    }\n  );\n\n  // Also protect common shell variable patterns like $1, $2, etc.\n  const shellProtected = codeProtected.replace(\n    /\\b(?:print|echo|awk|sed|grep)\\s+.*?\\$\\d+/g,\n    (match) => match.replace(/\\$/g, \"___DOLLAR_PLACEHOLDER___\")\n  );\n\n  // Protect inline code blocks with backticks\n  const inlineCodeProtected = shellProtected.replace(/`[^`]+`/g, (match) => {\n    return match.replace(/\\$/g, \"___DOLLAR_PLACEHOLDER___\");\n  });\n\n  // Process LaTeX expressions now that code is protected\n  // Valid LaTeX should have matching dollar signs with non-space chars surrounding content\n  const processedForLatex = inlineCodeProtected.replace(\n    /\\$([^\\s$][^$]*?[^\\s$])\\$/g,\n    (_, equation) => `$${equation}$`\n  );\n\n  // Escape currency mentions\n  const currencyEscaped = processedForLatex.replace(\n    /\\$(\\d+(?:\\.\\d*)?)/g,\n    (_, p1) => `\\\\$${p1}`\n  );\n\n  // Replace block-level LaTeX delimiters \\[ \\] with $$ $$\n  const blockProcessed = currencyEscaped.replace(\n    /\\\\\\[([\\s\\S]*?)\\\\\\]/g,\n    (_, equation) => `$$${equation}$$`\n  );\n\n  // Replace inline LaTeX delimiters \\( \\) with $ $\n  const inlineProcessed = blockProcessed.replace(\n    /\\\\\\(([\\s\\S]*?)\\\\\\)/g,\n    (_, equation) => `$${equation}$`\n  );\n\n  // Restore original dollar signs in code contexts\n  const restoredDollars = inlineProcessed.replace(\n    /___DOLLAR_PLACEHOLDER___/g,\n    \"$\"\n  );\n\n  // Restore code blocks\n  const restoredCodeBlocks = restoredDollars.replace(\n    /___CODE_BLOCK_(\\d+)___/g,\n    (_, index) => codeBlocks[parseInt(index)] ?? \"\"\n  );\n\n  return restoredCodeBlocks;\n};\n"
  },
  {
    "path": "web/src/app/app/message/copyingUtils.tsx",
    "content": "\"use client\";\nimport { unified } from \"unified\";\nimport remarkParse from \"remark-parse\";\nimport remarkGfm from \"remark-gfm\";\nimport remarkMath from \"remark-math\";\nimport remarkRehype from \"remark-rehype\";\nimport rehypeHighlight from \"rehype-highlight\";\nimport rehypeKatex from \"rehype-katex\";\nimport rehypeSanitize from \"rehype-sanitize\";\nimport rehypeStringify from \"rehype-stringify\";\n\nexport function handleCopy(\n  event: React.ClipboardEvent,\n  markdownRef: React.RefObject<HTMLDivElement>\n) {\n  // Check if we have a selection\n  const selection = window.getSelection();\n  if (!selection?.rangeCount) return;\n\n  const range = selection.getRangeAt(0);\n\n  // If selection is within our markdown container\n  if (\n    markdownRef.current &&\n    markdownRef.current.contains(range.commonAncestorContainer)\n  ) {\n    event.preventDefault();\n\n    // Clone selection to get the HTML\n    const fragment = range.cloneContents();\n    const tempDiv = document.createElement(\"div\");\n    tempDiv.appendChild(fragment);\n\n    // Create clipboard data with both HTML and plain text\n    event.clipboardData.setData(\"text/html\", tempDiv.innerHTML);\n    event.clipboardData.setData(\"text/plain\", selection.toString());\n  }\n}\n\n// Convert markdown tables to TSV format for spreadsheet compatibility\nexport function convertMarkdownTablesToTsv(content: string): string {\n  const lines = content.split(\"\\n\");\n  const result: string[] = [];\n\n  for (const line of lines) {\n    // Check if line is a markdown table row (starts and ends with |)\n    const trimmed = line.trim();\n    if (trimmed.startsWith(\"|\") && trimmed.endsWith(\"|\")) {\n      // Check if it's a separator row (contains only |, -, :, and spaces)\n      if (/^\\|[\\s\\-:|\\s]+\\|$/.test(trimmed)) {\n        // Skip separator rows\n        continue;\n      }\n      // Convert table row: split by |, trim cells, join with tabs\n      const placeholder = \"\\x00\";\n      const cells = trimmed\n        .slice(1, -1) // Remove leading and trailing |\n        .replace(/\\\\\\|/g, placeholder) // Preserve escaped pipes\n        .split(\"|\")\n        .map((cell) => cell.trim().replace(new RegExp(placeholder, \"g\"), \"|\"));\n      result.push(cells.join(\"\\t\"));\n    } else {\n      result.push(line);\n    }\n  }\n\n  return result.join(\"\\n\");\n}\n\n// For copying the entire content\nexport function copyAll(content: string) {\n  // Convert markdown to HTML using unified ecosystem\n  unified()\n    .use(remarkParse)\n    .use(remarkGfm)\n    .use(remarkMath)\n    .use(remarkRehype)\n    .use(rehypeHighlight)\n    .use(rehypeKatex)\n    .use(rehypeSanitize)\n    .use(rehypeStringify)\n    .process(content)\n    .then((file: any) => {\n      const htmlContent = String(file);\n\n      // Create clipboard data\n      const clipboardItem = new ClipboardItem({\n        \"text/html\": new Blob([htmlContent], { type: \"text/html\" }),\n        \"text/plain\": new Blob([content], { type: \"text/plain\" }),\n      });\n\n      navigator.clipboard.write([clipboardItem]);\n    });\n}\n"
  },
  {
    "path": "web/src/app/app/message/custom-code-styles.css",
    "content": "/* Light mode syntax highlighting (Atom One Light) */\n.hljs {\n  color: #383a42 !important;\n  background: var(--background-code-01) !important;\n}\n\n.hljs-comment,\n.hljs-quote {\n  color: #a0a1a7;\n  font-style: italic;\n}\n\n.hljs-doctag,\n.hljs-keyword,\n.hljs-formula {\n  color: #a626a4;\n}\n\n.hljs-section,\n.hljs-name,\n.hljs-selector-tag,\n.hljs-deletion,\n.hljs-subst {\n  color: #e45649;\n}\n\n.hljs-literal {\n  color: #0184bb;\n}\n\n.hljs-string,\n.hljs-regexp,\n.hljs-addition,\n.hljs-attribute,\n.hljs-meta .hljs-string {\n  color: #50a14f;\n}\n\n.hljs-attr,\n.hljs-variable,\n.hljs-template-variable,\n.hljs-type,\n.hljs-selector-class,\n.hljs-selector-attr,\n.hljs-selector-pseudo,\n.hljs-number {\n  color: #986801;\n}\n\n.hljs-symbol,\n.hljs-bullet,\n.hljs-link,\n.hljs-meta,\n.hljs-selector-id,\n.hljs-title {\n  color: #4078f2;\n}\n\n.hljs-built_in,\n.hljs-title.class_,\n.hljs-class .hljs-title {\n  color: #c18401;\n}\n\n.hljs-emphasis {\n  font-style: italic;\n}\n\n.hljs-strong {\n  font-weight: bold;\n}\n\n.hljs-link {\n  text-decoration: underline;\n}\n\n/* Dark mode syntax highlighting (Atom One Dark) */\n.dark .hljs {\n  color: #e2e6eb !important;\n  background: var(--background-code-01) !important;\n}\n\n.dark .hljs-comment,\n.dark .hljs-quote {\n  color: #5c6370;\n  font-style: italic;\n}\n\n.dark .hljs-doctag,\n.dark .hljs-keyword,\n.dark .hljs-formula {\n  color: #c678dd;\n}\n\n.dark .hljs-section,\n.dark .hljs-name,\n.dark .hljs-selector-tag,\n.dark .hljs-deletion,\n.dark .hljs-subst {\n  color: #e06c75;\n}\n\n.dark .hljs-literal {\n  color: #56b6c2;\n}\n\n.dark .hljs-string,\n.dark .hljs-regexp,\n.dark .hljs-addition,\n.dark .hljs-attribute,\n.dark .hljs-meta .hljs-string {\n  color: #98c379;\n}\n\n.dark .hljs-attr,\n.dark .hljs-variable,\n.dark .hljs-template-variable,\n.dark .hljs-type,\n.dark .hljs-selector-class,\n.dark .hljs-selector-attr,\n.dark .hljs-selector-pseudo,\n.dark .hljs-number {\n  color: #d19a66;\n}\n\n.dark .hljs-symbol,\n.dark .hljs-bullet,\n.dark .hljs-link,\n.dark .hljs-meta,\n.dark .hljs-selector-id,\n.dark .hljs-title {\n  color: #61aeee;\n}\n\n.dark .hljs-built_in,\n.dark .hljs-title.class_,\n.dark .hljs-class .hljs-title {\n  color: #e6c07b;\n}\n\n.dark .hljs-emphasis {\n  font-style: italic;\n}\n\n.dark .hljs-strong {\n  font-weight: bold;\n}\n\n.dark .hljs-link {\n  text-decoration: underline;\n}\n\npre[class*=\"language-\"] {\n  padding: 0px; /* Override padding */\n  margin: 0px;\n  border: none;\n}\n\n.prose :where(pre):not(:where([class~=\"not-prose\"], [class~=\"not-prose\"] *)) {\n  padding: 0px; /* Override padding */\n  margin: 0px;\n\n  /* Override scrollbar style to match highlight.js theme */\n  ::-webkit-scrollbar {\n    width: 8px; /* Vertical scrollbar width */\n    height: 8px; /* Horizontal scrollbar height */\n  }\n\n  /* Light mode scrollbar */\n  ::-webkit-scrollbar-track {\n    background: #e5e7eb; /* Light track background color */\n  }\n\n  ::-webkit-scrollbar-thumb {\n    background: #c9cdd1; /* Light handle color - subtle */\n    border-radius: 10px;\n    transition: background 0.2s ease;\n  }\n\n  ::-webkit-scrollbar-thumb:hover {\n    background: #6b7280; /* Light handle color on hover */\n  }\n\n  scrollbar-width: thin;\n  scrollbar-color: #c9cdd1 #e5e7eb; /* thumb and track colors for light mode */\n}\n\n/* Light mode - highlight scrollbar when hovering code block */\n.prose\n  :where(pre):not(:where([class~=\"not-prose\"], [class~=\"not-prose\"] *)):hover {\n  ::-webkit-scrollbar-thumb {\n    background: #9ca3af; /* More visible on code block hover */\n  }\n\n  ::-webkit-scrollbar-thumb:hover {\n    background: #6b7280;\n  }\n\n  scrollbar-color: #9ca3af #e5e7eb;\n}\n\n/* Dark mode scrollbar for code blocks */\n.dark\n  .prose\n  :where(pre):not(:where([class~=\"not-prose\"], [class~=\"not-prose\"] *)) {\n  ::-webkit-scrollbar-track {\n    background: #1f2937; /* Dark track background color */\n  }\n\n  ::-webkit-scrollbar-thumb {\n    background: #374151; /* Dark handle color - subtle */\n    transition: background 0.2s ease;\n  }\n\n  ::-webkit-scrollbar-thumb:hover {\n    background: #6b7280; /* Dark handle color on hover */\n    box-shadow: 0 0 10px #6b7280; /* Light up effect on hover */\n  }\n\n  scrollbar-color: #374151 #1f2937; /* thumb and track colors for dark mode */\n}\n\n/* Dark mode - highlight scrollbar when hovering code block */\n.dark\n  .prose\n  :where(pre):not(:where([class~=\"not-prose\"], [class~=\"not-prose\"] *)):hover {\n  ::-webkit-scrollbar-thumb {\n    background: #4b5563; /* More visible on code block hover */\n  }\n\n  ::-webkit-scrollbar-thumb:hover {\n    background: #6b7280;\n    box-shadow: 0 0 10px #6b7280;\n  }\n\n  scrollbar-color: #4b5563 #1f2937;\n}\n\n/*\n * Table breakout container - allows tables to extend beyond their parent's\n * constrained width to use the full container query width (100cqw).\n *\n * Requires an ancestor element with `container-type: inline-size` (@container in Tailwind).\n *\n * How the math works:\n * - width: 100cqw → expand to full container query width\n * - marginLeft: calc((100% - 100cqw) / 2) → negative margin pulls element left\n *   (100% is parent width, 100cqw is larger, so result is negative)\n * - paddingLeft/Right: calc((100cqw - 100%) / 2) → padding keeps content aligned\n *   with original position while allowing scroll area to extend\n */\n.markdown-table-breakout {\n  overflow-x: auto;\n  width: 100cqw;\n  margin-left: calc((100% - 100cqw) / 2);\n  padding-left: calc((100cqw - 100%) / 2);\n  padding-right: calc((100cqw - 100%) / 2);\n}\n"
  },
  {
    "path": "web/src/app/app/message/errorHelpers.tsx",
    "content": "import { AlertCircle, Clock, Lock, Wifi, Server } from \"lucide-react\";\n\n/**\n * Get the appropriate icon for a given error code\n */\nexport const getErrorIcon = (errorCode?: string) => {\n  switch (errorCode) {\n    case \"RATE_LIMIT\":\n      return <Clock className=\"h-4 w-4\" />;\n    case \"AUTH_ERROR\":\n    case \"PERMISSION_DENIED\":\n      return <Lock className=\"h-4 w-4\" />;\n    case \"CONNECTION_ERROR\":\n      return <Wifi className=\"h-4 w-4\" />;\n    case \"SERVICE_UNAVAILABLE\":\n      return <Server className=\"h-4 w-4\" />;\n    case \"BUDGET_EXCEEDED\":\n      return <AlertCircle className=\"h-4 w-4\" />;\n    default:\n      return <AlertCircle className=\"h-4 w-4\" />;\n  }\n};\n\n/**\n * Get a human-readable title for a given error code\n */\nexport const getErrorTitle = (errorCode?: string) => {\n  switch (errorCode) {\n    case \"RATE_LIMIT\":\n      return \"Rate Limit Exceeded\";\n    case \"AUTH_ERROR\":\n      return \"Authentication Error\";\n    case \"PERMISSION_DENIED\":\n      return \"Permission Denied\";\n    case \"CONTEXT_TOO_LONG\":\n      return \"Message Too Long\";\n    case \"TOOL_CALL_FAILED\":\n      return \"Tool Error\";\n    case \"CONNECTION_ERROR\":\n      return \"Connection Error\";\n    case \"SERVICE_UNAVAILABLE\":\n      return \"Service Unavailable\";\n    case \"INIT_FAILED\":\n      return \"Initialization Error\";\n    case \"VALIDATION_ERROR\":\n      return \"Validation Error\";\n    case \"BUDGET_EXCEEDED\":\n      return \"Budget Exceeded\";\n    case \"CONTENT_POLICY\":\n      return \"Content Policy Violation\";\n    case \"BAD_REQUEST\":\n      return \"Invalid Request\";\n    case \"NOT_FOUND\":\n      return \"Resource Not Found\";\n    case \"API_ERROR\":\n      return \"API Error\";\n    default:\n      return \"Error\";\n  }\n};\n"
  },
  {
    "path": "web/src/app/app/message/hooks.ts",
    "content": "import { useEffect, useRef, useState } from \"react\";\n\nexport function useMouseTracking() {\n  const [isHovering, setIsHovering] = useState<boolean>(false);\n  const trackedElementRef = useRef<HTMLDivElement>(null);\n  const hoverElementRef = useRef<HTMLDivElement>(null);\n\n  useEffect(() => {\n    const handleMouseMove = (event: MouseEvent) => {\n      if (trackedElementRef.current && hoverElementRef.current) {\n        const trackedRect = trackedElementRef.current.getBoundingClientRect();\n        const hoverRect = hoverElementRef.current.getBoundingClientRect();\n\n        const isOverTracked =\n          event.clientX >= trackedRect.left &&\n          event.clientX <= trackedRect.right &&\n          event.clientY >= trackedRect.top &&\n          event.clientY <= trackedRect.bottom;\n\n        const isOverHover =\n          event.clientX >= hoverRect.left &&\n          event.clientX <= hoverRect.right &&\n          event.clientY >= hoverRect.top &&\n          event.clientY <= hoverRect.bottom;\n\n        setIsHovering(isOverTracked || isOverHover);\n      }\n    };\n\n    document.addEventListener(\"mousemove\", handleMouseMove);\n\n    return () => {\n      document.removeEventListener(\"mousemove\", handleMouseMove);\n    };\n  }, []);\n\n  return { isHovering, trackedElementRef, hoverElementRef };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/AgentMessage.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useRef,\n  RefObject,\n  useMemo,\n  useEffect,\n  useLayoutEffect,\n} from \"react\";\nimport { Packet, StopReason } from \"@/app/app/services/streamingModels\";\nimport CustomToolAuthCard from \"@/app/app/message/messageComponents/CustomToolAuthCard\";\nimport { FullChatState } from \"@/app/app/message/messageComponents/interfaces\";\nimport { FeedbackType } from \"@/app/app/interfaces\";\nimport { handleCopy } from \"@/app/app/message/copyingUtils\";\nimport { useAuthErrors } from \"@/app/app/message/messageComponents/hooks/useAuthErrors\";\nimport { useMessageSwitching } from \"@/app/app/message/messageComponents/hooks/useMessageSwitching\";\nimport { RendererComponent } from \"@/app/app/message/messageComponents/renderMessageComponent\";\nimport { usePacketProcessor } from \"@/app/app/message/messageComponents/timeline/hooks/usePacketProcessor\";\nimport { usePacedTurnGroups } from \"@/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups\";\nimport MessageToolbar from \"@/app/app/message/messageComponents/MessageToolbar\";\nimport { LlmDescriptor, LlmManager } from \"@/lib/hooks\";\nimport { Message } from \"@/app/app/interfaces\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { AgentTimeline } from \"@/app/app/message/messageComponents/timeline/AgentTimeline\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\nimport { getTextContent } from \"@/app/app/services/packetUtils\";\nimport { removeThinkingTokens } from \"@/app/app/services/thinkingTokens\";\n\n// Type for the regeneration factory function passed from ChatUI\nexport type RegenerationFactory = (regenerationRequest: {\n  messageId: number;\n  parentMessage: Message;\n  forceSearch?: boolean;\n}) => (modelOverride: LlmDescriptor) => Promise<void>;\n\nexport interface AgentMessageProps {\n  rawPackets: Packet[];\n  packetCount?: number; // Tracked separately for React memo comparison (avoids reading from mutated array)\n  chatState: FullChatState;\n  nodeId: number;\n  messageId?: number;\n  currentFeedback?: FeedbackType | null;\n  llmManager: LlmManager | null;\n  otherMessagesCanSwitchTo?: number[];\n  onMessageSelection?: (nodeId: number) => void;\n  // Stable regeneration callback - takes (parentMessage) and returns a function that takes (modelOverride)\n  onRegenerate?: RegenerationFactory;\n  // Parent message needed to construct regeneration request\n  parentMessage?: Message | null;\n  // Duration in seconds for processing this message (agent messages only)\n  processingDurationSeconds?: number;\n}\n\n// TODO: Consider more robust comparisons:\n// - `chatState.docs`, `chatState.citations`, and `otherMessagesCanSwitchTo` use\n//   reference equality. Shallow array/object comparison would be more robust if\n//   these are recreated with the same values.\nfunction arePropsEqual(\n  prev: AgentMessageProps,\n  next: AgentMessageProps\n): boolean {\n  return (\n    prev.nodeId === next.nodeId &&\n    prev.messageId === next.messageId &&\n    prev.currentFeedback === next.currentFeedback &&\n    // Compare packetCount (primitive) instead of rawPackets.length\n    // The array is mutated in place, so reading .length from prev and next would return same value\n    prev.packetCount === next.packetCount &&\n    prev.chatState.agent?.id === next.chatState.agent?.id &&\n    prev.chatState.docs === next.chatState.docs &&\n    prev.chatState.citations === next.chatState.citations &&\n    prev.chatState.overriddenModel === next.chatState.overriddenModel &&\n    prev.chatState.researchType === next.chatState.researchType &&\n    prev.otherMessagesCanSwitchTo === next.otherMessagesCanSwitchTo &&\n    prev.onRegenerate === next.onRegenerate &&\n    prev.parentMessage?.messageId === next.parentMessage?.messageId &&\n    prev.llmManager?.isLoadingProviders ===\n      next.llmManager?.isLoadingProviders &&\n    prev.processingDurationSeconds === next.processingDurationSeconds\n    // Skip: chatState.regenerate, chatState.setPresentingDocument,\n    //       most of llmManager, onMessageSelection (function/object props)\n  );\n}\n\nconst AgentMessage = React.memo(function AgentMessage({\n  rawPackets,\n  packetCount,\n  chatState,\n  nodeId,\n  messageId,\n  currentFeedback,\n  llmManager,\n  otherMessagesCanSwitchTo,\n  onMessageSelection,\n  onRegenerate,\n  parentMessage,\n  processingDurationSeconds,\n}: AgentMessageProps) {\n  const markdownRef = useRef<HTMLDivElement>(null);\n  const finalAnswerRef = useRef<HTMLDivElement>(null);\n\n  // Process streaming packets: returns data and callbacks\n  // Hook handles all state internally, exposes clean API\n  const {\n    citations,\n    citationMap,\n    documentMap,\n    toolGroups,\n    toolTurnGroups,\n    displayGroups,\n    hasSteps,\n    stopPacketSeen,\n    stopReason,\n    isGeneratingImage,\n    generatedImageCount,\n    isComplete,\n    onRenderComplete,\n    finalAnswerComing,\n    toolProcessingDuration,\n  } = usePacketProcessor(rawPackets, nodeId);\n\n  // Apply pacing delays between different tool types for smoother visual transitions\n  const { pacedTurnGroups, pacedDisplayGroups, pacedFinalAnswerComing } =\n    usePacedTurnGroups(\n      toolTurnGroups,\n      displayGroups,\n      stopPacketSeen,\n      nodeId,\n      finalAnswerComing\n    );\n\n  // Memoize merged citations separately to avoid creating new object when neither source changed\n  const mergedCitations = useMemo(\n    () => ({\n      ...chatState.citations,\n      ...citationMap,\n    }),\n    [chatState.citations, citationMap]\n  );\n\n  // Create a chatState that uses streaming citations for immediate rendering\n  // This merges the prop citations with streaming citations, preferring streaming ones\n  // Memoized with granular dependencies to prevent cascading re-renders\n  // Note: chatState object is recreated upstream on every render, so we depend on\n  // individual fields instead of the whole object for proper memoization\n  const effectiveChatState = useMemo<FullChatState>(\n    () => ({\n      ...chatState,\n      citations: mergedCitations,\n    }),\n    [\n      chatState.agent,\n      chatState.docs,\n      chatState.setPresentingDocument,\n      chatState.overriddenModel,\n      chatState.researchType,\n      mergedCitations,\n    ]\n  );\n\n  const authErrors = useAuthErrors(rawPackets);\n\n  // Message switching logic\n  const {\n    currentMessageInd,\n    includeMessageSwitcher,\n    getPreviousMessage,\n    getNextMessage,\n  } = useMessageSwitching({\n    nodeId,\n    otherMessagesCanSwitchTo,\n    onMessageSelection,\n  });\n\n  // Streaming TTS integration\n  const { streamTTS, resetTTS, stopTTS } = useVoiceMode();\n  const ttsCompletedRef = useRef(false);\n  const hasStreamedIncompleteRef = useRef(false);\n  const hasObservedPacketGrowthRef = useRef(false);\n  const lastSeenPacketCountRef = useRef(packetCount ?? rawPackets.length);\n  const streamTTSRef = useRef(streamTTS);\n\n  // Keep streamTTS ref in sync without triggering effect re-runs\n  useEffect(() => {\n    streamTTSRef.current = streamTTS;\n  }, [streamTTS]);\n\n  // Stream TTS as text content arrives - only for messages still streaming\n  // Uses ref for streamTTS to avoid re-triggering when its identity changes\n  // Note: packetCount is used instead of rawPackets because the array is mutated in place\n  useLayoutEffect(() => {\n    const effectivePacketCount = packetCount ?? rawPackets.length;\n    if (effectivePacketCount > lastSeenPacketCountRef.current) {\n      hasObservedPacketGrowthRef.current = true;\n    }\n    lastSeenPacketCountRef.current = effectivePacketCount;\n\n    // Skip if we've already finished TTS for this message\n    if (ttsCompletedRef.current) return;\n\n    // If user cancelled generation, do not send more text to TTS.\n    if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {\n      ttsCompletedRef.current = true;\n      return;\n    }\n\n    const textContent = removeThinkingTokens(getTextContent(rawPackets));\n    if (!(typeof textContent === \"string\" && textContent.length > 0)) return;\n\n    // Only autoplay messages that were observed streaming in this lifecycle.\n    // Prevents historical, already-complete chats from re-triggering read-aloud on mount.\n    if (!isComplete) {\n      if (!hasObservedPacketGrowthRef.current) {\n        return;\n      }\n      hasStreamedIncompleteRef.current = true;\n      streamTTSRef.current(textContent, false, nodeId);\n      return;\n    }\n\n    if (hasStreamedIncompleteRef.current) {\n      streamTTSRef.current(textContent, true, nodeId);\n      ttsCompletedRef.current = true;\n    }\n  }, [packetCount, isComplete, rawPackets, nodeId, stopPacketSeen, stopReason]); // packetCount triggers on new packets since rawPackets is mutated in place\n\n  // Stop TTS immediately when user cancels generation.\n  useEffect(() => {\n    if (stopPacketSeen && stopReason === StopReason.USER_CANCELLED) {\n      stopTTS({ manual: true });\n    }\n  }, [stopPacketSeen, stopReason, stopTTS]);\n\n  // Reset TTS completed flag when nodeId changes (new message)\n  useEffect(() => {\n    ttsCompletedRef.current = false;\n    hasStreamedIncompleteRef.current = false;\n    hasObservedPacketGrowthRef.current = false;\n    lastSeenPacketCountRef.current = packetCount ?? rawPackets.length;\n  }, [nodeId]);\n\n  // Reset TTS when component unmounts or nodeId changes\n  useEffect(() => {\n    return () => {\n      resetTTS();\n    };\n  }, [nodeId, resetTTS]);\n\n  return (\n    <div\n      className=\"flex flex-col gap-3\"\n      data-testid={isComplete ? \"onyx-ai-message\" : undefined}\n    >\n      {/* Row 1: Two-column layout for tool steps */}\n\n      <AgentTimeline\n        turnGroups={pacedTurnGroups}\n        chatState={effectiveChatState}\n        stopPacketSeen={stopPacketSeen}\n        stopReason={stopReason}\n        hasDisplayContent={pacedDisplayGroups.length > 0}\n        processingDurationSeconds={processingDurationSeconds}\n        isGeneratingImage={isGeneratingImage}\n        generatedImageCount={generatedImageCount}\n        finalAnswerComing={pacedFinalAnswerComing}\n        toolProcessingDuration={toolProcessingDuration}\n      />\n\n      {/* Row 2: Display content + MessageToolbar */}\n      <div\n        ref={markdownRef}\n        className=\"overflow-x-visible focus:outline-none select-text cursor-text px-3\"\n        onCopy={(e) => {\n          if (markdownRef.current) {\n            handleCopy(e, markdownRef as RefObject<HTMLDivElement>);\n          }\n        }}\n      >\n        {pacedDisplayGroups.length > 0 && (\n          <div ref={finalAnswerRef} className=\"flex flex-col gap-3\">\n            {authErrors.map((authError, i) => (\n              <CustomToolAuthCard\n                key={`auth-error-${i}`}\n                toolName={authError.toolName}\n                toolId={authError.toolId}\n                tools={effectiveChatState.agent.tools}\n                agentId={effectiveChatState.agent.id}\n              />\n            ))}\n            {pacedDisplayGroups.map((displayGroup, index) => (\n              <RendererComponent\n                key={`${displayGroup.turn_index}-${displayGroup.tab_index}`}\n                packets={displayGroup.packets}\n                chatState={effectiveChatState}\n                messageNodeId={nodeId}\n                hasTimelineThinking={pacedTurnGroups.length > 0 || hasSteps}\n                onComplete={() => {\n                  // Only mark complete on the last display group\n                  // Hook handles the finalAnswerComing check internally\n                  if (index === pacedDisplayGroups.length - 1) {\n                    onRenderComplete();\n                  }\n                }}\n                animate={false}\n                stopPacketSeen={stopPacketSeen}\n                stopReason={stopReason}\n              >\n                {(results) => (\n                  <>\n                    {results.map((r, i) => (\n                      <div key={i}>{r.content}</div>\n                    ))}\n                  </>\n                )}\n              </RendererComponent>\n            ))}\n          </div>\n        )}\n        {/* Show stopped message when user cancelled and no display content */}\n        {pacedDisplayGroups.length === 0 &&\n          stopReason === StopReason.USER_CANCELLED && (\n            <Text as=\"p\" secondaryBody text04>\n              User has stopped generation\n            </Text>\n          )}\n      </div>\n\n      {/* Feedback buttons - only show when streaming and rendering complete */}\n      {isComplete && (\n        <MessageToolbar\n          nodeId={nodeId}\n          messageId={messageId}\n          includeMessageSwitcher={includeMessageSwitcher}\n          currentMessageInd={currentMessageInd}\n          otherMessagesCanSwitchTo={otherMessagesCanSwitchTo}\n          getPreviousMessage={getPreviousMessage}\n          getNextMessage={getNextMessage}\n          onMessageSelection={onMessageSelection}\n          rawPackets={rawPackets}\n          finalAnswerRef={finalAnswerRef}\n          currentFeedback={currentFeedback}\n          onRegenerate={onRegenerate}\n          parentMessage={parentMessage}\n          llmManager={llmManager}\n          currentModelName={chatState.overriddenModel}\n          citations={citations}\n          documentMap={documentMap}\n        />\n      )}\n    </div>\n  );\n}, arePropsEqual);\n\nexport default AgentMessage;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/CustomToolAuthCard.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { initiateOAuthFlow } from \"@/lib/oauth/api\";\nimport { useToolOAuthStatus } from \"@/lib/hooks/useToolOAuthStatus\";\nimport { SvgArrowExchange } from \"@opal/icons\";\n\ninterface CustomToolAuthCardProps {\n  toolName: string;\n  toolId: number | null;\n  tools: ToolSnapshot[];\n  agentId: number;\n}\n\nfunction CustomToolAuthCard({\n  toolName,\n  toolId,\n  tools,\n  agentId,\n}: CustomToolAuthCardProps) {\n  const { getToolAuthStatus } = useToolOAuthStatus(agentId);\n  const matchedTool = useMemo(() => {\n    if (toolId == null) return null;\n    return tools.find((t) => t.id === toolId) ?? null;\n  }, [toolId, tools]);\n\n  // Hide the card if the user already has a valid token\n  const authStatus = matchedTool ? getToolAuthStatus(matchedTool) : undefined;\n  if (authStatus?.hasToken && !authStatus.isTokenExpired) {\n    return null;\n  }\n\n  const oauthConfigId = matchedTool?.oauth_config_id ?? null;\n\n  // No OAuth config — nothing actionable to show\n  if (!oauthConfigId) {\n    return null;\n  }\n\n  const handleAuthenticate = () => {\n    initiateOAuthFlow(\n      oauthConfigId,\n      window.location.pathname + window.location.search\n    );\n  };\n\n  return (\n    <Message\n      static\n      large\n      icon\n      close={false}\n      text={`${toolName} not connected`}\n      description={`Connect to ${toolName} to enable this tool`}\n      actions=\"Connect\"\n      actionPrimary\n      actionIcon={SvgArrowExchange}\n      onAction={handleAuthenticate}\n      className=\"w-full\"\n    />\n  );\n}\n\nexport default CustomToolAuthCard;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/MessageToolbar.tsx",
    "content": "\"use client\";\n\nimport React, { RefObject, useState, useCallback, useMemo } from \"react\";\nimport { Packet, StreamingCitation } from \"@/app/app/services/streamingModels\";\nimport { FeedbackType } from \"@/app/app/interfaces\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { TooltipGroup } from \"@/components/tooltip/CustomTooltip\";\nimport {\n  useChatSessionStore,\n  useDocumentSidebarVisible,\n  useSelectedNodeForDocDisplay,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport { convertMarkdownTablesToTsv } from \"@/app/app/message/copyingUtils\";\nimport { getTextContent } from \"@/app/app/services/packetUtils\";\nimport { removeThinkingTokens } from \"@/app/app/services/thinkingTokens\";\nimport MessageSwitcher from \"@/app/app/message/MessageSwitcher\";\nimport SourceTag from \"@/refresh-components/buttons/source-tag/SourceTag\";\nimport { citationsToSourceInfoArray } from \"@/refresh-components/buttons/source-tag/sourceTagUtils\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport LLMPopover from \"@/refresh-components/popovers/LLMPopover\";\nimport { parseLlmDescriptor } from \"@/lib/llmConfig/utils\";\nimport { LlmManager } from \"@/lib/hooks\";\nimport { Message } from \"@/app/app/interfaces\";\nimport { SvgThumbsDown, SvgThumbsUp } from \"@opal/icons\";\nimport { RegenerationFactory } from \"./AgentMessage\";\nimport useFeedbackController from \"@/hooks/useFeedbackController\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport FeedbackModal, {\n  FeedbackModalProps,\n} from \"@/sections/modals/FeedbackModal\";\nimport { Button, SelectButton } from \"@opal/components\";\nimport TTSButton from \"./TTSButton\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\nimport { useVoiceStatus } from \"@/hooks/useVoiceStatus\";\n\n// Wrapper component for SourceTag in toolbar to handle memoization\nconst SourcesTagWrapper = React.memo(function SourcesTagWrapper({\n  citations,\n  documentMap,\n  nodeId,\n  selectedMessageForDocDisplay,\n  documentSidebarVisible,\n  updateCurrentDocumentSidebarVisible,\n  updateCurrentSelectedNodeForDocDisplay,\n}: {\n  citations: StreamingCitation[];\n  documentMap: Map<string, OnyxDocument>;\n  nodeId: number;\n  selectedMessageForDocDisplay: number | null;\n  documentSidebarVisible: boolean;\n  updateCurrentDocumentSidebarVisible: (visible: boolean) => void;\n  updateCurrentSelectedNodeForDocDisplay: (nodeId: number | null) => void;\n}) {\n  // Convert citations to SourceInfo array\n  const sources = useMemo(\n    () => citationsToSourceInfoArray(citations, documentMap),\n    [citations, documentMap]\n  );\n\n  // Handle click to toggle sidebar\n  const handleSourceClick = useCallback(() => {\n    if (selectedMessageForDocDisplay === nodeId && documentSidebarVisible) {\n      updateCurrentDocumentSidebarVisible(false);\n      updateCurrentSelectedNodeForDocDisplay(null);\n    } else {\n      updateCurrentSelectedNodeForDocDisplay(nodeId);\n      updateCurrentDocumentSidebarVisible(true);\n    }\n  }, [\n    nodeId,\n    selectedMessageForDocDisplay,\n    documentSidebarVisible,\n    updateCurrentDocumentSidebarVisible,\n    updateCurrentSelectedNodeForDocDisplay,\n  ]);\n\n  if (sources.length === 0) return null;\n\n  return (\n    <SourceTag\n      variant=\"button\"\n      displayName=\"Sources\"\n      sources={sources}\n      onSourceClick={handleSourceClick}\n      toggleSource\n    />\n  );\n});\n\nexport interface MessageToolbarProps {\n  // Message identification\n  nodeId: number;\n  messageId?: number;\n\n  // Message switching\n  includeMessageSwitcher: boolean;\n  currentMessageInd: number | null | undefined;\n  otherMessagesCanSwitchTo?: number[];\n  getPreviousMessage: () => number | undefined;\n  getNextMessage: () => number | undefined;\n  onMessageSelection?: (nodeId: number) => void;\n\n  // Copy functionality\n  rawPackets: Packet[];\n  finalAnswerRef: RefObject<HTMLDivElement | null>;\n\n  // Feedback\n  currentFeedback?: FeedbackType | null;\n\n  // Regeneration\n  onRegenerate?: RegenerationFactory;\n  parentMessage?: Message | null;\n  llmManager: LlmManager | null;\n  currentModelName?: string;\n\n  // Citations\n  citations: StreamingCitation[];\n  documentMap: Map<string, OnyxDocument>;\n}\n\nexport default function MessageToolbar({\n  nodeId,\n  messageId,\n  includeMessageSwitcher,\n  currentMessageInd,\n  otherMessagesCanSwitchTo,\n  getPreviousMessage,\n  getNextMessage,\n  onMessageSelection,\n  rawPackets,\n  finalAnswerRef,\n  currentFeedback,\n  onRegenerate,\n  parentMessage,\n  llmManager,\n  currentModelName,\n  citations,\n  documentMap,\n}: MessageToolbarProps) {\n  // Document sidebar state - managed internally to reduce prop drilling\n  const documentSidebarVisible = useDocumentSidebarVisible();\n  const selectedMessageForDocDisplay = useSelectedNodeForDocDisplay();\n  const updateCurrentDocumentSidebarVisible = useChatSessionStore(\n    (state) => state.updateCurrentDocumentSidebarVisible\n  );\n  const updateCurrentSelectedNodeForDocDisplay = useChatSessionStore(\n    (state) => state.updateCurrentSelectedNodeForDocDisplay\n  );\n\n  // Voice mode - hide toolbar during TTS playback for this message\n  const { isTTSPlaying, activeMessageNodeId, isAwaitingAutoPlaybackStart } =\n    useVoiceMode();\n  const { ttsEnabled } = useVoiceStatus();\n  const isTTSActiveForThisMessage =\n    (isTTSPlaying || isAwaitingAutoPlaybackStart) &&\n    activeMessageNodeId === nodeId;\n\n  // Feedback modal state and handlers\n  const { handleFeedbackChange } = useFeedbackController();\n  const modal = useCreateModal();\n  const [feedbackModalProps, setFeedbackModalProps] =\n    useState<FeedbackModalProps | null>(null);\n\n  // Helper to check if feedback button should be in transient state\n  const isFeedbackTransient = useCallback(\n    (feedbackType: \"like\" | \"dislike\") => {\n      const hasCurrentFeedback = currentFeedback === feedbackType;\n      if (!modal.isOpen) return hasCurrentFeedback;\n\n      const isModalForThisFeedback =\n        feedbackModalProps?.feedbackType === feedbackType;\n      const isModalForThisMessage = feedbackModalProps?.messageId === messageId;\n\n      return (\n        hasCurrentFeedback || (isModalForThisFeedback && isModalForThisMessage)\n      );\n    },\n    [currentFeedback, modal.isOpen, feedbackModalProps, messageId]\n  );\n\n  // Handler for feedback button clicks with toggle logic\n  const handleFeedbackClick = useCallback(\n    async (clickedFeedback: \"like\" | \"dislike\") => {\n      if (!messageId) {\n        console.error(\"Cannot provide feedback - message has no messageId\");\n        return;\n      }\n\n      // Toggle logic\n      if (currentFeedback === clickedFeedback) {\n        // Clicking same button - remove feedback\n        await handleFeedbackChange(messageId, null);\n      }\n\n      // Clicking like (will automatically clear dislike if it was active).\n      // Open modal for positive feedback.\n      else if (clickedFeedback === \"like\") {\n        setFeedbackModalProps({\n          feedbackType: \"like\",\n          messageId,\n        });\n        modal.toggle(true);\n      }\n\n      // Clicking dislike (will automatically clear like if it was active).\n      // Always open modal for dislike.\n      else {\n        setFeedbackModalProps({\n          feedbackType: \"dislike\",\n          messageId,\n        });\n        modal.toggle(true);\n      }\n    },\n    [messageId, currentFeedback, handleFeedbackChange, modal]\n  );\n\n  // Hide toolbar while TTS is playing for this message\n  if (isTTSActiveForThisMessage) {\n    return null;\n  }\n\n  return (\n    <>\n      <modal.Provider>\n        <FeedbackModal {...feedbackModalProps!} />\n      </modal.Provider>\n\n      <div\n        data-testid=\"AgentMessage/toolbar\"\n        className=\"flex md:flex-row justify-between items-center w-full transition-transform duration-300 ease-in-out transform opacity-100 pl-1\"\n      >\n        <TooltipGroup>\n          <div className=\"flex items-center\">\n            {includeMessageSwitcher && (\n              <div className=\"-mx-1\">\n                <MessageSwitcher\n                  currentPage={(currentMessageInd ?? 0) + 1}\n                  totalPages={otherMessagesCanSwitchTo?.length || 0}\n                  handlePrevious={() => {\n                    const prevMessage = getPreviousMessage();\n                    if (prevMessage !== undefined && onMessageSelection) {\n                      onMessageSelection(prevMessage);\n                    }\n                  }}\n                  handleNext={() => {\n                    const nextMessage = getNextMessage();\n                    if (nextMessage !== undefined && onMessageSelection) {\n                      onMessageSelection(nextMessage);\n                    }\n                  }}\n                />\n              </div>\n            )}\n\n            <CopyIconButton\n              getCopyText={() =>\n                convertMarkdownTablesToTsv(\n                  removeThinkingTokens(getTextContent(rawPackets)) as string\n                )\n              }\n              getHtmlContent={() => finalAnswerRef.current?.innerHTML || \"\"}\n              data-testid=\"AgentMessage/copy-button\"\n            />\n            <SelectButton\n              icon={SvgThumbsUp}\n              onClick={() => handleFeedbackClick(\"like\")}\n              variant=\"select-light\"\n              state={isFeedbackTransient(\"like\") ? \"selected\" : \"empty\"}\n              tooltip={\n                currentFeedback === \"like\" ? \"Remove Like\" : \"Good Response\"\n              }\n              data-testid=\"AgentMessage/like-button\"\n            />\n            <SelectButton\n              icon={SvgThumbsDown}\n              onClick={() => handleFeedbackClick(\"dislike\")}\n              variant=\"select-light\"\n              state={isFeedbackTransient(\"dislike\") ? \"selected\" : \"empty\"}\n              tooltip={\n                currentFeedback === \"dislike\"\n                  ? \"Remove Dislike\"\n                  : \"Bad Response\"\n              }\n              data-testid=\"AgentMessage/dislike-button\"\n            />\n            {ttsEnabled && (\n              <TTSButton\n                text={\n                  removeThinkingTokens(getTextContent(rawPackets)) as string\n                }\n              />\n            )}\n\n            {onRegenerate &&\n              messageId !== undefined &&\n              parentMessage &&\n              llmManager && (\n                <div data-testid=\"AgentMessage/regenerate\">\n                  <LLMPopover\n                    llmManager={llmManager}\n                    currentModelName={currentModelName}\n                    onSelect={(modelName) => {\n                      const llmDescriptor = parseLlmDescriptor(modelName);\n                      const regenerator = onRegenerate({\n                        messageId,\n                        parentMessage,\n                      });\n                      regenerator(llmDescriptor);\n                    }}\n                    foldable\n                  />\n                </div>\n              )}\n\n            {nodeId && (citations.length > 0 || documentMap.size > 0) && (\n              <SourcesTagWrapper\n                citations={citations}\n                documentMap={documentMap}\n                nodeId={nodeId}\n                selectedMessageForDocDisplay={selectedMessageForDocDisplay}\n                documentSidebarVisible={documentSidebarVisible}\n                updateCurrentDocumentSidebarVisible={\n                  updateCurrentDocumentSidebarVisible\n                }\n                updateCurrentSelectedNodeForDocDisplay={\n                  updateCurrentSelectedNodeForDocDisplay\n                }\n              />\n            )}\n          </div>\n        </TooltipGroup>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/TTSButton.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect } from \"react\";\nimport { SvgPlayCircle, SvgStop } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { useVoicePlayback } from \"@/hooks/useVoicePlayback\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\nimport { toast } from \"@/hooks/useToast\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\n\ninterface TTSButtonProps {\n  text: string;\n  voice?: string;\n  speed?: number;\n}\n\nfunction TTSButton({ text, voice, speed }: TTSButtonProps) {\n  const { isPlaying, isLoading, error, play, pause, stop } = useVoicePlayback();\n  const { isTTSPlaying, isTTSLoading, isAwaitingAutoPlaybackStart, stopTTS } =\n    useVoiceMode();\n\n  const isGlobalTTSActive =\n    isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;\n  const isButtonPlaying = isGlobalTTSActive || isPlaying;\n  const isButtonLoading = !isGlobalTTSActive && isLoading;\n\n  const handleClick = useCallback(async () => {\n    if (isGlobalTTSActive) {\n      // Stop auto-playback voice mode stream from the toolbar button.\n      stopTTS({ manual: true });\n      stop();\n    } else if (isPlaying) {\n      pause();\n    } else if (isButtonLoading) {\n      stop();\n    } else {\n      try {\n        // Ensure no voice-mode stream is active before starting manual playback.\n        stopTTS();\n        await play(text, voice, speed);\n      } catch (err) {\n        console.error(\"TTS playback failed:\", err);\n        toast.error(\"Could not play audio\");\n      }\n    }\n  }, [\n    isGlobalTTSActive,\n    isPlaying,\n    isButtonLoading,\n    text,\n    voice,\n    speed,\n    play,\n    pause,\n    stop,\n    stopTTS,\n  ]);\n\n  // Surface streaming voice playback errors to the user via toast\n  useEffect(() => {\n    if (error) {\n      console.error(\"Voice playback error:\", error);\n      toast.error(error);\n    }\n  }, [error]);\n\n  const icon = isButtonLoading\n    ? SimpleLoader\n    : isButtonPlaying\n      ? SvgStop\n      : SvgPlayCircle;\n\n  const tooltip = isButtonPlaying\n    ? \"Stop playback\"\n    : isButtonLoading\n      ? \"Loading...\"\n      : \"Read aloud\";\n\n  return (\n    <Button\n      icon={icon}\n      onClick={handleClick}\n      prominence=\"tertiary\"\n      tooltip={tooltip}\n      data-testid=\"AgentMessage/tts-button\"\n    />\n  );\n}\n\nexport default TTSButton;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/constants.ts",
    "content": "export const STANDARD_TEXT_COLOR = \"text-text-700\";\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/hooks/useAuthErrors.ts",
    "content": "import { useRef } from \"react\";\nimport {\n  CustomToolDelta,\n  Packet,\n  PacketType,\n} from \"@/app/app/services/streamingModels\";\n\ninterface AuthError {\n  toolName: string;\n  toolId: number | null;\n}\n\nexport function useAuthErrors(rawPackets: Packet[]): AuthError[] {\n  const stateRef = useRef<{ processedCount: number; errors: AuthError[] }>({\n    processedCount: 0,\n    errors: [],\n  });\n\n  // Reset if packets shrunk (e.g. new message)\n  if (rawPackets.length < stateRef.current.processedCount) {\n    stateRef.current = { processedCount: 0, errors: [] };\n  }\n\n  // Process only new packets (incremental, like usePacketProcessor)\n  if (rawPackets.length > stateRef.current.processedCount) {\n    let newErrors = stateRef.current.errors;\n    for (let i = stateRef.current.processedCount; i < rawPackets.length; i++) {\n      const packet = rawPackets[i]!;\n      if (packet.obj.type === PacketType.CUSTOM_TOOL_DELTA) {\n        const delta = packet.obj as CustomToolDelta;\n        if (delta.error?.is_auth_error) {\n          const alreadyPresent = newErrors.some(\n            (e) =>\n              (delta.tool_id != null && e.toolId === delta.tool_id) ||\n              (delta.tool_id == null && e.toolName === delta.tool_name)\n          );\n          if (!alreadyPresent) {\n            newErrors = [\n              ...newErrors,\n              { toolName: delta.tool_name, toolId: delta.tool_id ?? null },\n            ];\n          }\n        }\n      }\n    }\n    stateRef.current = {\n      processedCount: rawPackets.length,\n      errors: newErrors,\n    };\n  }\n\n  return stateRef.current.errors;\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/hooks/useMessageSwitching.ts",
    "content": "interface UseMessageSwitchingProps {\n  nodeId: number;\n  otherMessagesCanSwitchTo?: number[];\n  onMessageSelection?: (messageId: number) => void;\n}\n\ninterface UseMessageSwitchingReturn {\n  currentMessageInd: number | undefined;\n  includeMessageSwitcher: boolean;\n  getPreviousMessage: () => number | undefined;\n  getNextMessage: () => number | undefined;\n}\n\nexport function useMessageSwitching({\n  nodeId,\n  otherMessagesCanSwitchTo,\n  onMessageSelection,\n}: UseMessageSwitchingProps): UseMessageSwitchingReturn {\n  // Calculate message switching state\n  const indexInSiblings = nodeId\n    ? otherMessagesCanSwitchTo?.indexOf(nodeId)\n    : undefined;\n  // indexOf returns -1 if not found, treat that as undefined\n  const currentMessageInd =\n    indexInSiblings !== undefined && indexInSiblings !== -1\n      ? indexInSiblings\n      : undefined;\n\n  const includeMessageSwitcher =\n    currentMessageInd !== undefined &&\n    onMessageSelection !== undefined &&\n    otherMessagesCanSwitchTo !== undefined &&\n    otherMessagesCanSwitchTo.length > 1;\n\n  const getPreviousMessage = () => {\n    if (\n      currentMessageInd !== undefined &&\n      currentMessageInd > 0 &&\n      otherMessagesCanSwitchTo\n    ) {\n      return otherMessagesCanSwitchTo[currentMessageInd - 1];\n    }\n    return undefined;\n  };\n\n  const getNextMessage = () => {\n    if (\n      currentMessageInd !== undefined &&\n      currentMessageInd < (otherMessagesCanSwitchTo?.length || 0) - 1 &&\n      otherMessagesCanSwitchTo\n    ) {\n      return otherMessagesCanSwitchTo[currentMessageInd + 1];\n    }\n    return undefined;\n  };\n\n  return {\n    currentMessageInd,\n    includeMessageSwitcher,\n    getPreviousMessage,\n    getNextMessage,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/hooks/usePacketAnimationAndCollapse.ts",
    "content": "import { useEffect, useState } from \"react\";\nimport { Packet } from \"@/app/app/services/streamingModels\";\n\n// Control the rate of packet streaming (packets per second)\nconst PACKET_DELAY_MS = 10;\n\ninterface UsePacketAnimationAndCollapseOptions {\n  /** Array of packets to animate */\n  packets: Packet[];\n  /** Whether animation is enabled */\n  animate: boolean;\n  /** Whether the content is complete (has SECTION_END) */\n  isComplete: boolean;\n  /** Callback to invoke when animation and completion are done */\n  onComplete: () => void;\n  /** Optional: prevent double-calling onComplete (for renderers that need it) */\n  preventDoubleComplete?: boolean;\n}\n\ninterface UsePacketAnimationAndCollapseReturn {\n  /** Number of packets currently displayed (or -1 if showing all) */\n  displayedPacketCount: number;\n  /** Whether the content is expanded */\n  isExpanded: boolean;\n  /** Function to toggle expansion state */\n  toggleExpanded: () => void;\n}\n\n/**\n * Hook that handles packet animation and auto-collapse behavior.\n *\n * Features:\n * - Gradually displays packets with configurable delay\n * - Auto-collapses when content is complete\n * - Calls onComplete when animation finishes and content is complete\n * - Manages expansion state for collapsible content\n */\nexport function usePacketAnimationAndCollapse({\n  packets,\n  animate,\n  isComplete,\n  onComplete,\n  preventDoubleComplete = false,\n}: UsePacketAnimationAndCollapseOptions): UsePacketAnimationAndCollapseReturn {\n  // If we're animating, start with 1 packet, otherwise show all\n  const initialPacketCount = animate ? (packets.length > 0 ? 1 : 0) : -1;\n\n  const [displayedPacketCount, setDisplayedPacketCount] =\n    useState(initialPacketCount);\n  const [isExpanded, setIsExpanded] = useState(true);\n  const [hasAutoCollapsed, setHasAutoCollapsed] = useState(false);\n  const [hasCalledComplete, setHasCalledComplete] = useState(false);\n\n  // Auto-collapse when content is complete\n  useEffect(() => {\n    if (isComplete && !hasAutoCollapsed) {\n      setIsExpanded(false);\n      setHasAutoCollapsed(true);\n    }\n  }, [isComplete, hasAutoCollapsed]);\n\n  // Animation effect - gradually increase displayed packets\n  useEffect(() => {\n    if (!animate) {\n      setDisplayedPacketCount(-1);\n      return;\n    }\n\n    if (displayedPacketCount >= 0 && displayedPacketCount < packets.length) {\n      const timer = setTimeout(() => {\n        setDisplayedPacketCount((prev) => Math.min(prev + 1, packets.length));\n      }, PACKET_DELAY_MS);\n\n      return () => clearTimeout(timer);\n    }\n  }, [animate, displayedPacketCount, packets.length]);\n\n  // Reset displayed count when packet array changes significantly\n  useEffect(() => {\n    if (animate && packets.length < displayedPacketCount) {\n      setDisplayedPacketCount(packets.length > 0 ? 1 : 0);\n    }\n  }, [animate, packets.length, displayedPacketCount]);\n\n  // Call onComplete when done (animation finished and content complete)\n  useEffect(() => {\n    if (isComplete) {\n      // If animation is still in progress, wait for it to finish\n      if (\n        animate &&\n        displayedPacketCount >= 0 &&\n        displayedPacketCount < packets.length\n      ) {\n        return;\n      }\n\n      // Prevent double-calling if requested\n      if (preventDoubleComplete && hasCalledComplete) {\n        return;\n      }\n\n      if (preventDoubleComplete) {\n        setHasCalledComplete(true);\n      }\n      onComplete();\n    }\n  }, [\n    isComplete,\n    onComplete,\n    animate,\n    displayedPacketCount,\n    packets.length,\n    preventDoubleComplete,\n    hasCalledComplete,\n  ]);\n\n  const toggleExpanded = () => {\n    setIsExpanded((prev) => !prev);\n  };\n\n  return {\n    displayedPacketCount,\n    isExpanded,\n    toggleExpanded,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/interfaces.ts",
    "content": "import { JSX } from \"react\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { Packet, StopReason } from \"../../services/streamingModels\";\nimport { OnyxDocument, MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport { ProjectFile } from \"../../projects/projectsService\";\nimport { LlmDescriptor } from \"@/lib/hooks\";\nimport { IconType } from \"react-icons\";\nimport { OnyxIconType } from \"@/components/icons/icons\";\nimport { CitationMap } from \"../../interfaces\";\nimport { TimelineSurfaceBackground } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineSurface\";\n\nexport enum RenderType {\n  HIGHLIGHT = \"highlight\",\n  FULL = \"full\",\n  COMPACT = \"compact\",\n  INLINE = \"inline\",\n}\n\n/**\n * Controls whether a renderer expects to be wrapped by timeline UI.\n * - timeline: parent should render StepContainer around the result.\n * - content: renderer already contains its own layout (headers/containers).\n */\nexport type TimelineLayout = \"timeline\" | \"content\";\n\nexport interface FullChatState {\n  agent: MinimalPersonaSnapshot;\n  // Document-related context for citations\n  docs?: OnyxDocument[] | null;\n  userFiles?: ProjectFile[];\n  citations?: CitationMap;\n  setPresentingDocument?: (document: MinimalOnyxDocument) => void;\n  // Regenerate functionality\n  regenerate?: (modelOverRide: LlmDescriptor) => Promise<void>;\n  overriddenModel?: string;\n  researchType?: string | null;\n}\n\nexport interface RendererResult {\n  icon: IconType | OnyxIconType | null;\n  status: string | JSX.Element | null;\n  content: JSX.Element;\n\n  // can be used to override the look on the \"expanded\" view\n  // used for things that should just show text w/o an icon or header\n  // e.g. ReasoningRenderer\n  expandedText?: JSX.Element;\n\n  // Whether this renderer supports collapsible mode (collapse button shown only when true)\n  supportsCollapsible?: boolean;\n  /** Whether the step should remain collapsible even in single-step timelines */\n  alwaysCollapsible?: boolean;\n  /** Whether the result should be wrapped by timeline UI or rendered as-is */\n  timelineLayout?: TimelineLayout;\n  /** Remove right padding for long-form content (reasoning, deep research, memory). */\n  noPaddingRight?: boolean;\n  /** Override the surface background (e.g. \"error\" for auth failures). */\n  surfaceBackground?: TimelineSurfaceBackground;\n}\n\n// All renderers return an array of results (even single-step renderers return a 1-element array)\nexport type RendererOutput = RendererResult[];\n\nexport type MessageRenderer<\n  T extends Packet,\n  S extends Partial<FullChatState>,\n> = React.ComponentType<{\n  packets: T[];\n  state: S;\n  /** Node id for the message currently being rendered */\n  messageNodeId?: number;\n  /** True when timeline/thinking UI is already shown above this text block */\n  hasTimelineThinking?: boolean;\n  onComplete: () => void;\n  renderType: RenderType;\n  animate: boolean;\n  stopPacketSeen: boolean;\n  stopReason?: StopReason;\n  /** Whether this is the last step in the timeline (for connector line decisions) */\n  isLastStep?: boolean;\n  /** Hover state from parent */\n  isHover?: boolean;\n  children: (result: RendererOutput) => JSX.Element;\n}>;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/markdownUtils.tsx",
    "content": "import React, { useCallback, useMemo, JSX } from \"react\";\nimport ReactMarkdown from \"react-markdown\";\nimport remarkGfm from \"remark-gfm\";\nimport remarkMath from \"remark-math\";\nimport rehypeHighlight from \"rehype-highlight\";\nimport rehypeKatex from \"rehype-katex\";\nimport \"katex/dist/katex.min.css\";\nimport \"@/app/app/message/custom-code-styles.css\";\nimport { FullChatState } from \"@/app/app/message/messageComponents/interfaces\";\nimport {\n  MemoizedAnchor,\n  MemoizedParagraph,\n} from \"@/app/app/message/MemoizedTextComponents\";\nimport { extractCodeText, preprocessLaTeX } from \"@/app/app/message/codeUtils\";\nimport { CodeBlock } from \"@/app/app/message/CodeBlock\";\nimport { transformLinkUri, cn } from \"@/lib/utils\";\nimport { InMessageImage } from \"@/app/app/components/files/images/InMessageImage\";\nimport { extractChatImageFileId } from \"@/app/app/components/files/images/utils\";\n\n/**\n * Processes content for markdown rendering by handling code blocks and LaTeX\n */\nexport const processContent = (content: string): string => {\n  const codeBlockRegex = /```(\\w*)\\n[\\s\\S]*?```|```[\\s\\S]*?$/g;\n  const matches = content.match(codeBlockRegex);\n\n  if (matches) {\n    content = matches.reduce((acc, match) => {\n      if (!match.match(/```\\w+/)) {\n        return acc.replace(match, match.replace(\"```\", \"```plaintext\"));\n      }\n      return acc;\n    }, content);\n\n    const lastMatch = matches[matches.length - 1];\n    if (lastMatch && !lastMatch.endsWith(\"```\")) {\n      return preprocessLaTeX(content);\n    }\n  }\n\n  const processed = preprocessLaTeX(content);\n  return processed;\n};\n\n/**\n * Hook that provides markdown component callbacks for consistent rendering\n */\nexport const useMarkdownComponents = (\n  state: FullChatState | undefined,\n  processedContent: string,\n  className?: string\n) => {\n  const paragraphCallback = useCallback(\n    (props: any) => (\n      <MemoizedParagraph className={className}>\n        {props.children}\n      </MemoizedParagraph>\n    ),\n    [className]\n  );\n\n  const anchorCallback = useCallback(\n    (props: any) => {\n      const imageFileId = extractChatImageFileId(\n        props.href,\n        String(props.children ?? \"\")\n      );\n      if (imageFileId) {\n        return (\n          <InMessageImage\n            fileId={imageFileId}\n            fileName={String(props.children ?? \"\")}\n          />\n        );\n      }\n      return (\n        <MemoizedAnchor\n          updatePresentingDocument={state?.setPresentingDocument || (() => {})}\n          docs={state?.docs || []}\n          userFiles={state?.userFiles || []}\n          citations={state?.citations}\n          href={props.href}\n        >\n          {props.children}\n        </MemoizedAnchor>\n      );\n    },\n    [\n      state?.docs,\n      state?.userFiles,\n      state?.citations,\n      state?.setPresentingDocument,\n    ]\n  );\n\n  const markdownComponents = useMemo(\n    () => ({\n      a: anchorCallback,\n      p: paragraphCallback,\n      pre: ({ node, className, children }: any) => {\n        // Don't render the pre wrapper - CodeBlock handles its own wrapper\n        return <>{children}</>;\n      },\n      b: ({ node, className, children }: any) => {\n        return <span className={className}>{children}</span>;\n      },\n      ul: ({ node, className, children, ...props }: any) => {\n        return (\n          <ul className={className} {...props}>\n            {children}\n          </ul>\n        );\n      },\n      ol: ({ node, className, children, ...props }: any) => {\n        return (\n          <ol className={className} {...props}>\n            {children}\n          </ol>\n        );\n      },\n      li: ({ node, className, children, ...props }: any) => {\n        return (\n          <li className={className} {...props}>\n            {children}\n          </li>\n        );\n      },\n      table: ({ node, className, children, ...props }: any) => {\n        return (\n          <div className=\"markdown-table-breakout\">\n            <table className={cn(className, \"min-w-full\")} {...props}>\n              {children}\n            </table>\n          </div>\n        );\n      },\n      code: ({ node, className, children }: any) => {\n        const codeText = extractCodeText(node, processedContent, children);\n\n        return (\n          <CodeBlock className={className} codeText={codeText}>\n            {children}\n          </CodeBlock>\n        );\n      },\n    }),\n    [anchorCallback, paragraphCallback, processedContent]\n  );\n\n  return markdownComponents;\n};\n\n/**\n * Renders markdown content with consistent configuration\n */\nexport const renderMarkdown = (\n  content: string,\n  markdownComponents: any,\n  textSize: string = \"text-base\"\n): JSX.Element => {\n  return (\n    <div dir=\"auto\">\n      <ReactMarkdown\n        className={`prose dark:prose-invert font-main-content-body max-w-full ${textSize}`}\n        components={markdownComponents}\n        remarkPlugins={[\n          remarkGfm,\n          [remarkMath, { singleDollarTextMath: true }],\n        ]}\n        rehypePlugins={[rehypeHighlight, rehypeKatex]}\n        urlTransform={transformLinkUri}\n      >\n        {content}\n      </ReactMarkdown>\n    </div>\n  );\n};\n\n/**\n * Complete markdown processing and rendering utility\n */\nexport const useMarkdownRenderer = (\n  content: string,\n  state: FullChatState | undefined,\n  textSize: string\n) => {\n  const processedContent = useMemo(() => processContent(content), [content]);\n  const markdownComponents = useMarkdownComponents(\n    state,\n    processedContent,\n    textSize\n  );\n\n  const renderedContent = useMemo(\n    () => renderMarkdown(processedContent, markdownComponents, textSize),\n    [processedContent, markdownComponents, textSize]\n  );\n\n  return {\n    processedContent,\n    markdownComponents,\n    renderedContent,\n  };\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/renderMessageComponent.tsx",
    "content": "import React, { JSX, memo } from \"react\";\nimport {\n  ChatPacket,\n  CODE_INTERPRETER_TOOL_TYPES,\n  ImageGenerationToolPacket,\n  Packet,\n  PacketType,\n  ReasoningPacket,\n  SearchToolStart,\n  StopReason,\n  ToolCallArgumentDelta,\n} from \"../../services/streamingModels\";\nimport {\n  FullChatState,\n  MessageRenderer,\n  RenderType,\n  RendererResult,\n  RendererOutput,\n} from \"./interfaces\";\nimport { MessageTextRenderer } from \"./renderers/MessageTextRenderer\";\nimport { ImageToolRenderer } from \"./renderers/ImageToolRenderer\";\nimport { PythonToolRenderer } from \"./timeline/renderers/code/PythonToolRenderer\";\nimport { ReasoningRenderer } from \"./timeline/renderers/reasoning/ReasoningRenderer\";\nimport CustomToolRenderer from \"./renderers/CustomToolRenderer\";\nimport { FileReaderToolRenderer } from \"./timeline/renderers/filereader/FileReaderToolRenderer\";\nimport { FetchToolRenderer } from \"./timeline/renderers/fetch/FetchToolRenderer\";\nimport { MemoryToolRenderer } from \"./timeline/renderers/memory/MemoryToolRenderer\";\nimport { DeepResearchPlanRenderer } from \"./timeline/renderers/deepresearch/DeepResearchPlanRenderer\";\nimport { ResearchAgentRenderer } from \"./timeline/renderers/deepresearch/ResearchAgentRenderer\";\nimport { WebSearchToolRenderer } from \"./timeline/renderers/search/WebSearchToolRenderer\";\nimport { InternalSearchToolRenderer } from \"./timeline/renderers/search/InternalSearchToolRenderer\";\n\n// Different types of chat packets using discriminated unions\ninterface GroupedPackets {\n  packets: Packet[];\n}\n\nfunction isChatPacket(packet: Packet): packet is ChatPacket {\n  return (\n    packet.obj.type === PacketType.MESSAGE_START ||\n    packet.obj.type === PacketType.MESSAGE_DELTA ||\n    packet.obj.type === PacketType.MESSAGE_END\n  );\n}\n\nfunction isWebSearchPacket(packet: Packet): boolean {\n  if (packet.obj.type !== PacketType.SEARCH_TOOL_START) return false;\n  return (packet.obj as SearchToolStart).is_internet_search === true;\n}\n\nfunction isInternalSearchPacket(packet: Packet): boolean {\n  if (packet.obj.type !== PacketType.SEARCH_TOOL_START) return false;\n  return (packet.obj as SearchToolStart).is_internet_search !== true;\n}\n\nfunction isImageToolPacket(packet: Packet) {\n  return packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START;\n}\n\nfunction isPythonToolPacket(packet: Packet) {\n  return (\n    packet.obj.type === PacketType.PYTHON_TOOL_START ||\n    (packet.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&\n      (packet.obj as ToolCallArgumentDelta).tool_type ===\n        CODE_INTERPRETER_TOOL_TYPES.PYTHON)\n  );\n}\n\nfunction isCustomToolPacket(packet: Packet) {\n  return packet.obj.type === PacketType.CUSTOM_TOOL_START;\n}\n\nfunction isFileReaderToolPacket(packet: Packet) {\n  return packet.obj.type === PacketType.FILE_READER_START;\n}\n\nfunction isFetchToolPacket(packet: Packet) {\n  return packet.obj.type === PacketType.FETCH_TOOL_START;\n}\n\nfunction isMemoryToolPacket(packet: Packet) {\n  return (\n    packet.obj.type === PacketType.MEMORY_TOOL_START ||\n    packet.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS\n  );\n}\n\nfunction isReasoningPacket(packet: Packet): packet is ReasoningPacket {\n  return (\n    packet.obj.type === PacketType.REASONING_START ||\n    packet.obj.type === PacketType.REASONING_DELTA ||\n    packet.obj.type === PacketType.SECTION_END ||\n    packet.obj.type === PacketType.ERROR\n  );\n}\n\nfunction isDeepResearchPlanPacket(packet: Packet) {\n  return (\n    packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_START ||\n    packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_DELTA\n  );\n}\n\nfunction isResearchAgentPacket(packet: Packet) {\n  // Check for any packet type that indicates a research agent group\n  return (\n    packet.obj.type === PacketType.RESEARCH_AGENT_START ||\n    packet.obj.type === PacketType.INTERMEDIATE_REPORT_START ||\n    packet.obj.type === PacketType.INTERMEDIATE_REPORT_DELTA ||\n    packet.obj.type === PacketType.INTERMEDIATE_REPORT_CITED_DOCS\n  );\n}\n\nexport function findRenderer(\n  groupedPackets: GroupedPackets\n): MessageRenderer<any, any> | null {\n  // Check for chat messages first\n  if (groupedPackets.packets.some((packet) => isChatPacket(packet))) {\n    return MessageTextRenderer;\n  }\n\n  // Check for deep research packets EARLY - these have priority over other tools\n  // because deep research groups may contain multiple packet types (plan + reasoning + fetch)\n  if (\n    groupedPackets.packets.some((packet) => isDeepResearchPlanPacket(packet))\n  ) {\n    return DeepResearchPlanRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isResearchAgentPacket(packet))) {\n    return ResearchAgentRenderer;\n  }\n\n  // Standard tool checks\n  if (groupedPackets.packets.some((packet) => isWebSearchPacket(packet))) {\n    return WebSearchToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isInternalSearchPacket(packet))) {\n    return InternalSearchToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isImageToolPacket(packet))) {\n    return ImageToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isPythonToolPacket(packet))) {\n    return PythonToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isFileReaderToolPacket(packet))) {\n    return FileReaderToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isCustomToolPacket(packet))) {\n    return CustomToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isFetchToolPacket(packet))) {\n    return FetchToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isMemoryToolPacket(packet))) {\n    return MemoryToolRenderer;\n  }\n  if (groupedPackets.packets.some((packet) => isReasoningPacket(packet))) {\n    return ReasoningRenderer;\n  }\n  return null;\n}\n\n// Handles display groups containing both chat text and image generation packets\nfunction MixedContentHandler({\n  chatPackets,\n  imagePackets,\n  chatState,\n  messageNodeId,\n  hasTimelineThinking,\n  onComplete,\n  animate,\n  stopPacketSeen,\n  stopReason,\n  children,\n}: {\n  chatPackets: Packet[];\n  imagePackets: Packet[];\n  chatState: FullChatState;\n  messageNodeId?: number;\n  hasTimelineThinking?: boolean;\n  onComplete: () => void;\n  animate: boolean;\n  stopPacketSeen: boolean;\n  stopReason?: StopReason;\n  children: (result: RendererOutput) => JSX.Element;\n}) {\n  return (\n    <MessageTextRenderer\n      packets={chatPackets as ChatPacket[]}\n      state={chatState}\n      messageNodeId={messageNodeId}\n      hasTimelineThinking={hasTimelineThinking}\n      onComplete={() => {}}\n      animate={animate}\n      renderType={RenderType.FULL}\n      stopPacketSeen={stopPacketSeen}\n      stopReason={stopReason}\n    >\n      {(textResults) => (\n        <ImageToolRenderer\n          packets={imagePackets as ImageGenerationToolPacket[]}\n          state={chatState}\n          onComplete={onComplete}\n          animate={animate}\n          renderType={RenderType.FULL}\n          stopPacketSeen={stopPacketSeen}\n          stopReason={stopReason}\n        >\n          {(imageResults) => children([...textResults, ...imageResults])}\n        </ImageToolRenderer>\n      )}\n    </MessageTextRenderer>\n  );\n}\n\n// Props interface for RendererComponent\ninterface RendererComponentProps {\n  packets: Packet[];\n  chatState: FullChatState;\n  messageNodeId?: number;\n  hasTimelineThinking?: boolean;\n  onComplete: () => void;\n  animate: boolean;\n  stopPacketSeen: boolean;\n  stopReason?: StopReason;\n  children: (result: RendererOutput) => JSX.Element;\n}\n\n// Custom comparison to prevent unnecessary re-renders\nfunction areRendererPropsEqual(\n  prev: RendererComponentProps,\n  next: RendererComponentProps\n): boolean {\n  return (\n    prev.packets === next.packets &&\n    prev.stopPacketSeen === next.stopPacketSeen &&\n    prev.stopReason === next.stopReason &&\n    prev.animate === next.animate &&\n    prev.chatState.agent?.id === next.chatState.agent?.id &&\n    prev.messageNodeId === next.messageNodeId\n    // Skip: onComplete, children (function refs), chatState (memoized upstream)\n  );\n}\n\n// React component wrapper that directly uses renderer components\nexport const RendererComponent = memo(function RendererComponent({\n  packets,\n  chatState,\n  messageNodeId,\n  hasTimelineThinking,\n  onComplete,\n  animate,\n  stopPacketSeen,\n  stopReason,\n  children,\n}: RendererComponentProps) {\n  // Detect mixed display groups (both chat text and image generation)\n  const hasChatPackets = packets.some((p) => isChatPacket(p));\n  const hasImagePackets = packets.some((p) => isImageToolPacket(p));\n\n  if (hasChatPackets && hasImagePackets) {\n    const sharedTypes = new Set<string>([\n      PacketType.SECTION_END,\n      PacketType.ERROR,\n    ]);\n\n    const chatPackets = packets.filter(\n      (p) =>\n        isChatPacket(p) ||\n        p.obj.type === PacketType.CITATION_INFO ||\n        sharedTypes.has(p.obj.type as string)\n    );\n    const imagePackets = packets.filter(\n      (p) =>\n        isImageToolPacket(p) ||\n        p.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA ||\n        sharedTypes.has(p.obj.type as string)\n    );\n\n    return (\n      <MixedContentHandler\n        chatPackets={chatPackets}\n        imagePackets={imagePackets}\n        chatState={chatState}\n        messageNodeId={messageNodeId}\n        hasTimelineThinking={hasTimelineThinking}\n        onComplete={onComplete}\n        animate={animate}\n        stopPacketSeen={stopPacketSeen}\n        stopReason={stopReason}\n      >\n        {children}\n      </MixedContentHandler>\n    );\n  }\n\n  const RendererFn = findRenderer({ packets });\n\n  if (!RendererFn) {\n    return children([{ icon: null, status: null, content: <></> }]);\n  }\n\n  return (\n    <RendererFn\n      packets={packets as any}\n      state={chatState}\n      messageNodeId={messageNodeId}\n      hasTimelineThinking={hasTimelineThinking}\n      onComplete={onComplete}\n      animate={animate}\n      renderType={RenderType.FULL}\n      stopPacketSeen={stopPacketSeen}\n      stopReason={stopReason}\n    >\n      {(results: RendererOutput) => children(results)}\n    </RendererFn>\n  );\n}, areRendererPropsEqual);\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/renderers/CustomToolRenderer.tsx",
    "content": "import React, { useEffect, useMemo } from \"react\";\nimport {\n  PacketType,\n  CustomToolPacket,\n  CustomToolStart,\n  CustomToolArgs,\n  CustomToolDelta,\n  CustomToolErrorInfo,\n  SectionEnd,\n} from \"../../../services/streamingModels\";\nimport { MessageRenderer, RenderType } from \"../interfaces\";\nimport { buildImgUrl } from \"../../../components/files/images/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  SvgActions,\n  SvgArrowExchange,\n  SvgDownload,\n  SvgExternalLink,\n} from \"@opal/icons\";\nimport { CodeBlock } from \"@/app/app/message/CodeBlock\";\nimport hljs from \"highlight.js/lib/core\";\nimport json from \"highlight.js/lib/languages/json\";\nimport FadingEdgeContainer from \"@/refresh-components/FadingEdgeContainer\";\n\n// Lazy registration for hljs JSON language\nfunction ensureHljsRegistered() {\n  if (!hljs.listLanguages().includes(\"json\")) {\n    hljs.registerLanguage(\"json\", json);\n  }\n}\n\n// Component to render syntax-highlighted JSON\ninterface HighlightedJsonCodeProps {\n  code: string;\n}\nfunction HighlightedJsonCode({ code }: HighlightedJsonCodeProps) {\n  const highlightedHtml = useMemo(() => {\n    ensureHljsRegistered();\n    try {\n      return hljs.highlight(code, { language: \"json\" }).value;\n    } catch {\n      return code\n        .replace(/&/g, \"&amp;\")\n        .replace(/</g, \"&lt;\")\n        .replace(/>/g, \"&gt;\");\n    }\n  }, [code]);\n\n  return (\n    <span\n      dangerouslySetInnerHTML={{ __html: highlightedHtml }}\n      className=\"hljs\"\n    />\n  );\n}\n\nfunction constructCustomToolState(packets: CustomToolPacket[]) {\n  const toolStart = packets.find(\n    (p) => p.obj.type === PacketType.CUSTOM_TOOL_START\n  )?.obj as CustomToolStart | null;\n  const toolDeltas = packets\n    .filter((p) => p.obj.type === PacketType.CUSTOM_TOOL_DELTA)\n    .map((p) => p.obj as CustomToolDelta);\n  const toolEnd = packets.find(\n    (p) =>\n      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR\n  )?.obj as SectionEnd | null;\n\n  const toolName = toolStart?.tool_name || toolDeltas[0]?.tool_name || \"Tool\";\n  const toolArgsPacket = packets.find(\n    (p) => p.obj.type === PacketType.CUSTOM_TOOL_ARGS\n  )?.obj as CustomToolArgs | null;\n  const toolArgs = toolArgsPacket?.tool_args ?? null;\n  const latestDelta = toolDeltas[toolDeltas.length - 1] || null;\n  const responseType = latestDelta?.response_type || null;\n  const data = latestDelta?.data;\n  const fileIds = latestDelta?.file_ids || null;\n  const error = latestDelta?.error || null;\n\n  const isRunning = Boolean(toolStart && !toolEnd);\n  const isComplete = Boolean(toolStart && toolEnd);\n\n  return {\n    toolName,\n    toolArgs,\n    responseType,\n    data,\n    fileIds,\n    error,\n    isRunning,\n    isComplete,\n  };\n}\n\nexport const CustomToolRenderer: MessageRenderer<CustomToolPacket, {}> = ({\n  packets,\n  onComplete,\n  renderType,\n  children,\n}) => {\n  const {\n    toolName,\n    toolArgs,\n    responseType,\n    data,\n    fileIds,\n    error,\n    isRunning,\n    isComplete,\n  } = constructCustomToolState(packets);\n\n  useEffect(() => {\n    if (isComplete) {\n      onComplete();\n    }\n  }, [isComplete, onComplete]);\n\n  const status = useMemo(() => {\n    if (isComplete) {\n      if (error) {\n        return error.is_auth_error\n          ? `${toolName} authentication failed (HTTP ${error.status_code})`\n          : `${toolName} failed (HTTP ${error.status_code})`;\n      }\n      if (responseType === \"image\") return `${toolName} returned images`;\n      if (responseType === \"csv\") return `${toolName} returned a file`;\n      return `${toolName} completed`;\n    }\n    if (isRunning) return `${toolName} running...`;\n    return null;\n  }, [toolName, responseType, error, isComplete, isRunning]);\n\n  const icon = SvgActions;\n\n  const toolArgsJson = useMemo(\n    () => (toolArgs ? JSON.stringify(toolArgs, null, 2) : null),\n    [toolArgs]\n  );\n  const dataJson = useMemo(\n    () =>\n      data !== undefined && data !== null && typeof data === \"object\"\n        ? JSON.stringify(data, null, 2)\n        : null,\n    [data]\n  );\n\n  const content = useMemo(\n    () => (\n      <div className=\"flex flex-col gap-3\">\n        {/* Loading indicator */}\n        {isRunning &&\n          !error &&\n          !fileIds &&\n          (data === undefined || data === null) && (\n            <div className=\"flex items-center gap-2 text-sm text-text-03\">\n              <div className=\"flex gap-0.5\">\n                <div className=\"w-1 h-1 bg-current rounded-full animate-pulse\"></div>\n                <div\n                  className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n                  style={{ animationDelay: \"0.1s\" }}\n                ></div>\n                <div\n                  className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n                  style={{ animationDelay: \"0.2s\" }}\n                ></div>\n              </div>\n              <Text text03 secondaryBody>\n                Waiting for response...\n              </Text>\n            </div>\n          )}\n\n        {/* Tool arguments */}\n        {toolArgsJson && (\n          <div>\n            <div className=\"flex items-center gap-1\">\n              <SvgArrowExchange className=\"w-3 h-3 text-text-02\" />\n              <Text text04 secondaryBody>\n                Request\n              </Text>\n            </div>\n            <div className=\"prose max-w-full\">\n              <CodeBlock\n                className=\"font-secondary-mono\"\n                codeText={toolArgsJson}\n                noPadding\n              >\n                <HighlightedJsonCode code={toolArgsJson} />\n              </CodeBlock>\n            </div>\n          </div>\n        )}\n\n        {/* Error display */}\n        {error && (\n          <div className=\"pl-[var(--timeline-common-text-padding)]\">\n            <Text text03 mainUiMuted>\n              {error.message}\n            </Text>\n          </div>\n        )}\n\n        {/* File responses */}\n        {!error && fileIds && fileIds.length > 0 && (\n          <div className=\"text-sm text-text-03 flex flex-col gap-2\">\n            {fileIds.map((fid, idx) => (\n              <div key={fid} className=\"flex items-center gap-2 flex-wrap\">\n                <Text text03 secondaryBody className=\"whitespace-nowrap\">\n                  File {idx + 1}\n                </Text>\n                <a\n                  href={buildImgUrl(fid)}\n                  target=\"_blank\"\n                  rel=\"noreferrer\"\n                  className=\"inline-flex items-center gap-1 text-xs text-action-link-01 hover:underline whitespace-nowrap\"\n                >\n                  <SvgExternalLink className=\"w-3 h-3\" /> Open\n                </a>\n                <a\n                  href={buildImgUrl(fid)}\n                  download\n                  className=\"inline-flex items-center gap-1 text-xs text-action-link-01 hover:underline whitespace-nowrap\"\n                >\n                  <SvgDownload className=\"w-3 h-3\" /> Download\n                </a>\n              </div>\n            ))}\n          </div>\n        )}\n\n        {/* JSON/Text responses */}\n        {!error && data !== undefined && data !== null && (\n          <div>\n            <div className=\"flex items-center gap-1\">\n              <SvgArrowExchange className=\"w-3 h-3 text-text-02\" />\n              <Text text04 secondaryBody>\n                Response\n              </Text>\n            </div>\n            <div className=\"prose max-w-full\">\n              {dataJson ? (\n                <CodeBlock\n                  className=\"font-secondary-mono\"\n                  codeText={dataJson}\n                  noPadding\n                >\n                  <HighlightedJsonCode code={dataJson} />\n                </CodeBlock>\n              ) : (\n                <CodeBlock\n                  className=\"font-secondary-mono\"\n                  codeText={String(data)}\n                  noPadding\n                >\n                  {String(data)}\n                </CodeBlock>\n              )}\n            </div>\n          </div>\n        )}\n      </div>\n    ),\n    [toolArgsJson, dataJson, data, fileIds, error, isRunning]\n  );\n\n  // Auth error: always render FULL with error surface\n  if (error?.is_auth_error) {\n    return children([\n      {\n        icon,\n        status,\n        supportsCollapsible: false,\n        noPaddingRight: true,\n        surfaceBackground: \"error\" as const,\n        content,\n      },\n    ]);\n  }\n\n  // FULL mode\n  if (renderType === RenderType.FULL) {\n    return children([\n      {\n        icon,\n        status,\n        supportsCollapsible: true,\n        noPaddingRight: true,\n        content,\n      },\n    ]);\n  }\n\n  // COMPACT mode: wrap in fading container\n  return children([\n    {\n      icon,\n      status,\n      supportsCollapsible: true,\n      content: (\n        <FadingEdgeContainer\n          direction=\"bottom\"\n          className=\"max-h-24 overflow-hidden\"\n        >\n          {content}\n        </FadingEdgeContainer>\n      ),\n    },\n  ]);\n};\n\nexport default CustomToolRenderer;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/renderers/ImageToolRenderer.tsx",
    "content": "import React, { useEffect, useMemo } from \"react\";\nimport { SvgImage } from \"@opal/icons\";\nimport {\n  PacketType,\n  ImageGenerationToolPacket,\n  ImageGenerationToolStart,\n  ImageGenerationToolDelta,\n  SectionEnd,\n} from \"../../../services/streamingModels\";\nimport { MessageRenderer, RenderType } from \"../interfaces\";\nimport { InMessageImage } from \"../../../components/files/images/InMessageImage\";\nimport GeneratingImageDisplay from \"../../../components/tools/GeneratingImageDisplay\";\n\n// Helper function to construct current image state\nfunction constructCurrentImageState(packets: ImageGenerationToolPacket[]) {\n  const imageStart = packets.find(\n    (packet) => packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START\n  )?.obj as ImageGenerationToolStart | null;\n  const imageDeltas = packets\n    .filter(\n      (packet) => packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA\n    )\n    .map((packet) => packet.obj as ImageGenerationToolDelta);\n  const imageEnd = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.SECTION_END ||\n      packet.obj.type === PacketType.ERROR\n  )?.obj as SectionEnd | null;\n\n  const prompt = \"\"; // Image generation tools don't have a main description\n  const images = imageDeltas.flatMap((delta) => delta?.images || []);\n  const isGenerating = imageStart && !imageEnd;\n  const isComplete = imageStart && imageEnd;\n\n  return {\n    prompt,\n    images,\n    isGenerating,\n    isComplete,\n    error: false, // For now, we don't have error state in the packets\n  };\n}\n\nexport const ImageToolRenderer: MessageRenderer<\n  ImageGenerationToolPacket,\n  {}\n> = ({ packets, onComplete, renderType, children }) => {\n  const { prompt, images, isGenerating, isComplete, error } =\n    constructCurrentImageState(packets);\n\n  useEffect(() => {\n    if (isComplete) {\n      onComplete();\n    }\n  }, [isComplete]);\n\n  const status = useMemo(() => {\n    if (isComplete) {\n      return `Generated ${images.length} image${images.length > 1 ? \"s\" : \"\"}`;\n    }\n    if (isGenerating) {\n      return \"Generating image...\";\n    }\n    return null;\n  }, [isComplete, isGenerating, images.length]);\n\n  // Render based on renderType\n  if (renderType === RenderType.FULL) {\n    // Full rendering with title header and content below\n    // Loading state - when generating\n    if (isGenerating) {\n      return children([\n        {\n          icon: SvgImage,\n          status: \"Generating images...\",\n          supportsCollapsible: false,\n          content: (\n            <div className=\"flex flex-col\">\n              <div>\n                <GeneratingImageDisplay isCompleted={false} />\n              </div>\n            </div>\n          ),\n        },\n      ]);\n    }\n\n    // Complete state - show images\n    if (isComplete) {\n      return children([\n        {\n          icon: SvgImage,\n          status: `Generated ${images.length} image${\n            images.length !== 1 ? \"s\" : \"\"\n          }`,\n          supportsCollapsible: false,\n          content: (\n            <div className=\"flex flex-col my-1\">\n              {images.length > 0 ? (\n                <div className=\"grid grid-cols-1 md:grid-cols-2 gap-4\">\n                  {images.map((image, index: number) => (\n                    <div\n                      key={image.file_id || index}\n                      className=\"transition-all group\"\n                    >\n                      {image.file_id && (\n                        <InMessageImage\n                          fileId={image.file_id}\n                          shape={image.shape}\n                        />\n                      )}\n                    </div>\n                  ))}\n                </div>\n              ) : (\n                <div className=\"py-4 text-center text-gray-500 dark:text-gray-400 ml-7\">\n                  <SvgImage className=\"w-6 h-6 mx-auto mb-2 opacity-50\" />\n                  <p className=\"text-sm\">No images generated</p>\n                </div>\n              )}\n            </div>\n          ),\n        },\n      ]);\n    }\n\n    // Fallback (shouldn't happen in normal flow)\n    return children([\n      {\n        icon: SvgImage,\n        status: status,\n        supportsCollapsible: false,\n        content: <div></div>,\n      },\n    ]);\n  }\n\n  // Highlight/Short rendering\n  if (isGenerating) {\n    return children([\n      {\n        icon: SvgImage,\n        status: \"Generating image...\",\n        supportsCollapsible: false,\n        content: (\n          <div className=\"flex items-center gap-2 text-sm text-muted-foreground\">\n            <div className=\"flex gap-0.5\">\n              <div className=\"w-1 h-1 bg-current rounded-full animate-pulse\"></div>\n              <div\n                className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n                style={{ animationDelay: \"0.1s\" }}\n              ></div>\n              <div\n                className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n                style={{ animationDelay: \"0.2s\" }}\n              ></div>\n            </div>\n            <span>Generating image...</span>\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  if (error) {\n    return children([\n      {\n        icon: SvgImage,\n        status: \"Image generation failed\",\n        supportsCollapsible: false,\n        content: (\n          <div className=\"text-sm text-red-600 dark:text-red-400\">\n            Image generation failed\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  if (isComplete && images.length > 0) {\n    return children([\n      {\n        icon: SvgImage,\n        status: `Generated ${images.length} image${\n          images.length > 1 ? \"s\" : \"\"\n        }`,\n        supportsCollapsible: false,\n        content: (\n          <div className=\"text-sm text-muted-foreground\">\n            Generated {images.length} image\n            {images.length > 1 ? \"s\" : \"\"}\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  return children([\n    {\n      icon: SvgImage,\n      status: \"Image generation\",\n      supportsCollapsible: false,\n      content: (\n        <div className=\"text-sm text-muted-foreground\">Image generation</div>\n      ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/renderers/MessageTextRenderer.tsx",
    "content": "import React, { useEffect, useMemo, useRef, useState } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nimport {\n  ChatPacket,\n  PacketType,\n  StopReason,\n} from \"../../../services/streamingModels\";\nimport { MessageRenderer, FullChatState } from \"../interfaces\";\nimport { isFinalAnswerComplete } from \"../../../services/packetUtils\";\nimport { useMarkdownRenderer } from \"../markdownUtils\";\nimport { BlinkingBar } from \"../../BlinkingBar\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\n\n/**\n * Maps a cleaned character position to the corresponding position in markdown text.\n * This allows progressive reveal to work with markdown formatting.\n */\nfunction getRevealPosition(markdown: string, cleanChars: number): number {\n  // Skip patterns that don't contribute to visible character count\n  const skipChars = new Set([\"*\", \"`\", \"#\"]);\n  let cleanIndex = 0;\n  let mdIndex = 0;\n\n  while (cleanIndex < cleanChars && mdIndex < markdown.length) {\n    const char = markdown[mdIndex];\n\n    // Skip markdown formatting characters\n    if (char !== undefined && skipChars.has(char)) {\n      mdIndex++;\n      continue;\n    }\n\n    // Handle link syntax [text](url) - skip the (url) part but count the text\n    if (\n      char === \"]\" &&\n      mdIndex + 1 < markdown.length &&\n      markdown[mdIndex + 1] === \"(\"\n    ) {\n      const closeIdx = markdown.indexOf(\")\", mdIndex + 2);\n      if (closeIdx > 0) {\n        mdIndex = closeIdx + 1;\n        continue;\n      }\n    }\n\n    cleanIndex++;\n    mdIndex++;\n  }\n\n  // Extend to word boundary to avoid cutting mid-word\n  while (\n    mdIndex < markdown.length &&\n    markdown[mdIndex] !== \" \" &&\n    markdown[mdIndex] !== \"\\n\"\n  ) {\n    mdIndex++;\n  }\n\n  return mdIndex;\n}\n\n// Control the rate of packet streaming (packets per second)\nconst PACKET_DELAY_MS = 10;\n\nexport const MessageTextRenderer: MessageRenderer<\n  ChatPacket,\n  FullChatState\n> = ({\n  packets,\n  state,\n  messageNodeId,\n  hasTimelineThinking,\n  onComplete,\n  renderType,\n  animate,\n  stopPacketSeen,\n  stopReason,\n  children,\n}) => {\n  // If we're animating and the final answer is already complete, show more packets initially\n  const initialPacketCount = animate\n    ? packets.length > 0\n      ? 1 // Otherwise start with 1 packet\n      : 0\n    : -1; // Show all if not animating\n\n  const [displayedPacketCount, setDisplayedPacketCount] =\n    useState(initialPacketCount);\n  const lastStableSyncedContentRef = useRef(\"\");\n  const lastVisibleContentRef = useRef(\"\");\n\n  // Get voice mode context for progressive text reveal synced with audio\n  const {\n    revealedCharCount,\n    autoPlayback,\n    isAudioSyncActive,\n    activeMessageNodeId,\n    isAwaitingAutoPlaybackStart,\n  } = useVoiceMode();\n\n  // Get the full content from all packets\n  const fullContent = packets\n    .map((packet) => {\n      if (\n        packet.obj.type === PacketType.MESSAGE_DELTA ||\n        packet.obj.type === PacketType.MESSAGE_START\n      ) {\n        return packet.obj.content;\n      }\n      return \"\";\n    })\n    .join(\"\");\n\n  const shouldUseAutoPlaybackSync =\n    autoPlayback &&\n    typeof messageNodeId === \"number\" &&\n    activeMessageNodeId === messageNodeId;\n\n  // Animation effect - gradually increase displayed packets at controlled rate\n  useEffect(() => {\n    if (!animate) {\n      setDisplayedPacketCount(-1); // Show all packets\n      return;\n    }\n\n    if (displayedPacketCount >= 0 && displayedPacketCount < packets.length) {\n      const timer = setTimeout(() => {\n        setDisplayedPacketCount((prev) => Math.min(prev + 1, packets.length));\n      }, PACKET_DELAY_MS);\n\n      return () => clearTimeout(timer);\n    }\n  }, [animate, displayedPacketCount, packets.length]);\n\n  // Reset displayed count when packet array changes significantly (e.g., new message)\n  useEffect(() => {\n    if (animate && packets.length < displayedPacketCount) {\n      const resetCount = isFinalAnswerComplete(packets)\n        ? Math.min(10, packets.length)\n        : packets.length > 0\n          ? 1\n          : 0;\n      setDisplayedPacketCount(resetCount);\n    }\n  }, [animate, packets.length, displayedPacketCount]);\n\n  // Only mark as complete when all packets are received AND displayed\n  useEffect(() => {\n    if (isFinalAnswerComplete(packets)) {\n      // If animating, wait until all packets are displayed\n      if (\n        animate &&\n        displayedPacketCount >= 0 &&\n        displayedPacketCount < packets.length\n      ) {\n        return;\n      }\n      onComplete();\n    }\n  }, [packets, onComplete, animate, displayedPacketCount]);\n\n  // Get content based on displayed packet count or audio progress\n  const computedContent = useMemo(() => {\n    // Hold response in \"thinking\" state only while autoplay startup is pending.\n    if (shouldUseAutoPlaybackSync && isAwaitingAutoPlaybackStart) {\n      return \"\";\n    }\n\n    // Sync text with audio only for the message currently being spoken.\n    if (shouldUseAutoPlaybackSync && isAudioSyncActive) {\n      const MIN_REVEAL_CHARS = 12;\n      if (revealedCharCount < MIN_REVEAL_CHARS) {\n        return \"\";\n      }\n\n      // Reveal text progressively based on audio progress\n      const revealPos = getRevealPosition(fullContent, revealedCharCount);\n      return fullContent.slice(0, Math.max(revealPos, 0));\n    }\n\n    // During an active synced turn, if sync temporarily drops, keep current reveal\n    // instead of jumping to full content or blanking.\n    if (shouldUseAutoPlaybackSync && !stopPacketSeen) {\n      return lastStableSyncedContentRef.current;\n    }\n\n    // Standard behavior when auto-playback is off\n    if (!animate || displayedPacketCount === -1) {\n      return fullContent; // Show all content\n    }\n\n    // Packet-based reveal (when auto-playback is disabled)\n    return packets\n      .slice(0, displayedPacketCount)\n      .map((packet) => {\n        if (\n          packet.obj.type === PacketType.MESSAGE_DELTA ||\n          packet.obj.type === PacketType.MESSAGE_START\n        ) {\n          return packet.obj.content;\n        }\n        return \"\";\n      })\n      .join(\"\");\n  }, [\n    animate,\n    displayedPacketCount,\n    fullContent,\n    packets,\n    revealedCharCount,\n    autoPlayback,\n    isAudioSyncActive,\n    activeMessageNodeId,\n    isAwaitingAutoPlaybackStart,\n    messageNodeId,\n    shouldUseAutoPlaybackSync,\n    stopPacketSeen,\n  ]);\n\n  // Keep synced text monotonic: once visible, never regress or disappear between chunks.\n  const content = useMemo(() => {\n    const wasUserCancelled = stopReason === StopReason.USER_CANCELLED;\n\n    // On user cancel, freeze at exactly what was already visible.\n    if (wasUserCancelled) {\n      return lastVisibleContentRef.current;\n    }\n\n    if (!shouldUseAutoPlaybackSync) {\n      return computedContent;\n    }\n\n    if (computedContent.length === 0) {\n      return lastStableSyncedContentRef.current;\n    }\n\n    const last = lastStableSyncedContentRef.current;\n    if (computedContent.startsWith(last)) {\n      return computedContent;\n    }\n\n    // If content shape changed unexpectedly mid-stream, prefer the stable version\n    // to avoid flicker/dumps.\n    if (!stopPacketSeen || wasUserCancelled) {\n      return last;\n    }\n\n    // For normal completed responses, allow final full content.\n    return computedContent;\n  }, [computedContent, shouldUseAutoPlaybackSync, stopPacketSeen, stopReason]);\n\n  // Sync the stable ref outside of useMemo to avoid side effects during render.\n  useEffect(() => {\n    if (stopReason === StopReason.USER_CANCELLED) {\n      return;\n    }\n    if (!shouldUseAutoPlaybackSync) {\n      lastStableSyncedContentRef.current = \"\";\n    } else if (content.length > 0) {\n      lastStableSyncedContentRef.current = content;\n    }\n  }, [content, shouldUseAutoPlaybackSync, stopReason]);\n\n  // Track last actually rendered content so cancel can freeze without dumping buffered text.\n  useEffect(() => {\n    if (content.length > 0) {\n      lastVisibleContentRef.current = content;\n    }\n  }, [content]);\n\n  const shouldShowThinkingPlaceholder =\n    shouldUseAutoPlaybackSync &&\n    isAwaitingAutoPlaybackStart &&\n    !hasTimelineThinking &&\n    !stopPacketSeen;\n\n  const shouldShowSpeechWarmupIndicator =\n    shouldUseAutoPlaybackSync &&\n    !isAwaitingAutoPlaybackStart &&\n    content.length === 0 &&\n    fullContent.length > 0 &&\n    !hasTimelineThinking &&\n    !stopPacketSeen;\n\n  const shouldShowCursor =\n    content.length > 0 &&\n    (!stopPacketSeen ||\n      (shouldUseAutoPlaybackSync && content.length < fullContent.length));\n\n  const { renderedContent } = useMarkdownRenderer(\n    // the [*]() is a hack to show a blinking dot when the packet is not complete\n    shouldShowCursor ? content + \" [*]() \" : content,\n    state,\n    \"font-main-content-body\"\n  );\n\n  return children([\n    {\n      icon: null,\n      status: null,\n      content:\n        shouldShowThinkingPlaceholder || shouldShowSpeechWarmupIndicator ? (\n          <Text as=\"span\" secondaryBody text04 className=\"italic\">\n            Thinking\n          </Text>\n        ) : content.length > 0 ? (\n          <>{renderedContent}</>\n        ) : (\n          <BlinkingBar addMargin />\n        ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/AgentTimeline.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo, useCallback } from \"react\";\nimport { StopReason } from \"@/app/app/services/streamingModels\";\nimport { FullChatState, RenderType } from \"../interfaces\";\nimport { TurnGroup } from \"./transformers\";\nimport { cn } from \"@/lib/utils\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useTimelineExpansion } from \"@/app/app/message/messageComponents/timeline/hooks/useTimelineExpansion\";\nimport { useTimelineMetrics } from \"@/app/app/message/messageComponents/timeline/hooks/useTimelineMetrics\";\nimport { useTimelineHeader } from \"@/app/app/message/messageComponents/timeline/hooks/useTimelineHeader\";\nimport {\n  useTimelineUIState,\n  TimelineUIState,\n} from \"@/app/app/message/messageComponents/timeline/hooks/useTimelineUIState\";\nimport {\n  isResearchAgentPackets,\n  isSearchToolPackets,\n  stepSupportsCollapsedStreaming,\n  stepHasCollapsedStreamingContent,\n} from \"@/app/app/message/messageComponents/timeline/packetHelpers\";\nimport { useTimelineStepState } from \"@/app/app/message/messageComponents/timeline/hooks/useTimelineStepState\";\nimport { StreamingHeader } from \"@/app/app/message/messageComponents/timeline/headers/StreamingHeader\";\nimport { CompletedHeader } from \"@/app/app/message/messageComponents/timeline/headers/CompletedHeader\";\nimport { StoppedHeader } from \"@/app/app/message/messageComponents/timeline/headers/StoppedHeader\";\nimport { ParallelStreamingHeader } from \"@/app/app/message/messageComponents/timeline/headers/ParallelStreamingHeader\";\nimport { useStreamingStartTime } from \"@/app/app/stores/useChatSessionStore\";\nimport { ExpandedTimelineContent } from \"./ExpandedTimelineContent\";\nimport { CollapsedStreamingContent } from \"./CollapsedStreamingContent\";\nimport { TimelineRoot } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineRoot\";\nimport { TimelineHeaderRow } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineHeaderRow\";\n\n// =============================================================================\n// Private Wrapper Components\n// =============================================================================\n\ninterface TimelineContainerProps {\n  agent: FullChatState[\"agent\"];\n  headerContent?: React.ReactNode;\n  children?: React.ReactNode;\n}\n\nfunction TimelineContainer({\n  agent,\n  headerContent,\n  children,\n}: TimelineContainerProps) {\n  return (\n    <TimelineRoot>\n      <TimelineHeaderRow left={<AgentAvatar agent={agent} size={24} />}>\n        {headerContent}\n      </TimelineHeaderRow>\n      {children}\n    </TimelineRoot>\n  );\n}\n\n// =============================================================================\n// Main Component\n// =============================================================================\n\nexport interface AgentTimelineProps {\n  /** Turn groups from usePacketProcessor */\n  turnGroups: TurnGroup[];\n  /** Chat state for rendering content */\n  chatState: FullChatState;\n  /** Whether the stop packet has been seen */\n  stopPacketSeen?: boolean;\n  /** Reason for stopping (if stopped) */\n  stopReason?: StopReason;\n  /** Whether final answer is coming (affects last connector) */\n  finalAnswerComing?: boolean;\n  /** Whether there is display content after timeline */\n  hasDisplayContent?: boolean;\n  /** Content to render after timeline (final message + toolbar) - slot pattern */\n  children?: React.ReactNode;\n  /** Whether the timeline is collapsible */\n  collapsible?: boolean;\n  /** Title of the button to toggle the timeline */\n  buttonTitle?: string;\n  /** Test ID for e2e testing */\n  \"data-testid\"?: string;\n  /** Processing duration in seconds (for completed messages) */\n  processingDurationSeconds?: number;\n  /** Whether image generation is in progress */\n  isGeneratingImage?: boolean;\n  /** Number of images generated */\n  generatedImageCount?: number;\n  /** Tool processing duration from backend (via MESSAGE_START packet) */\n  toolProcessingDuration?: number;\n}\n\n/**\n * Custom prop comparison for AgentTimeline memoization.\n * Prevents unnecessary re-renders when parent renders but props haven't meaningfully changed.\n */\nfunction areAgentTimelinePropsEqual(\n  prev: AgentTimelineProps,\n  next: AgentTimelineProps\n): boolean {\n  return (\n    prev.turnGroups === next.turnGroups &&\n    prev.stopPacketSeen === next.stopPacketSeen &&\n    prev.stopReason === next.stopReason &&\n    prev.finalAnswerComing === next.finalAnswerComing &&\n    prev.hasDisplayContent === next.hasDisplayContent &&\n    prev.processingDurationSeconds === next.processingDurationSeconds &&\n    prev.collapsible === next.collapsible &&\n    prev.buttonTitle === next.buttonTitle &&\n    prev.chatState === next.chatState &&\n    prev.isGeneratingImage === next.isGeneratingImage &&\n    prev.generatedImageCount === next.generatedImageCount &&\n    prev.toolProcessingDuration === next.toolProcessingDuration\n  );\n}\n\nexport const AgentTimeline = React.memo(function AgentTimeline({\n  turnGroups,\n  chatState,\n  stopPacketSeen = false,\n  stopReason,\n  finalAnswerComing = false,\n  hasDisplayContent = false,\n  collapsible = true,\n  buttonTitle,\n  \"data-testid\": testId,\n  processingDurationSeconds,\n  isGeneratingImage = false,\n  generatedImageCount = 0,\n  toolProcessingDuration,\n}: AgentTimelineProps) {\n  // Header text and state flags\n  const { headerText, hasPackets, userStopped } = useTimelineHeader(\n    turnGroups,\n    stopReason,\n    isGeneratingImage\n  );\n\n  // Memoized metrics derived from turn groups\n  const {\n    totalSteps,\n    isSingleStep,\n    lastTurnGroup,\n    lastStep,\n    lastStepIsResearchAgent,\n    lastStepSupportsCollapsedStreaming,\n  } = useTimelineMetrics(turnGroups, userStopped);\n\n  // Extract memory text, operation, and whether this is a memory-only timeline\n  const { memoryText, memoryOperation, memoryId, memoryIndex, isMemoryOnly } =\n    useTimelineStepState(turnGroups);\n\n  // Check if last step is a search tool for INLINE render type\n  const lastStepIsSearchTool = useMemo(\n    () => lastStep && isSearchToolPackets(lastStep.packets),\n    [lastStep]\n  );\n\n  const { isExpanded, handleToggle, parallelActiveTab, setParallelActiveTab } =\n    useTimelineExpansion(stopPacketSeen, lastTurnGroup, hasDisplayContent);\n\n  // Streaming duration tracking\n  const streamingStartTime = useStreamingStartTime();\n\n  // Parallel step analysis for collapsed streaming view\n  const parallelActiveStep = useMemo(() => {\n    if (!lastTurnGroup?.isParallel) return null;\n    return (\n      lastTurnGroup.steps.find((s) => s.key === parallelActiveTab) ??\n      lastTurnGroup.steps[0]\n    );\n  }, [lastTurnGroup, parallelActiveTab]);\n\n  const parallelActiveStepSupportsCollapsedStreaming = useMemo(() => {\n    if (!parallelActiveStep) return false;\n    return stepSupportsCollapsedStreaming(parallelActiveStep.packets);\n  }, [parallelActiveStep]);\n\n  const lastStepHasCollapsedContent = useMemo(() => {\n    if (!lastStep) return false;\n    return stepHasCollapsedStreamingContent(lastStep.packets);\n  }, [lastStep]);\n\n  const parallelActiveStepHasCollapsedContent = useMemo(() => {\n    if (!parallelActiveStep) return false;\n    return stepHasCollapsedStreamingContent(parallelActiveStep.packets);\n  }, [parallelActiveStep]);\n\n  const stoppedStepsCount = useMemo(() => {\n    if (!stopPacketSeen || !userStopped) {\n      return totalSteps;\n    }\n\n    let count = 0;\n    for (const turnGroup of turnGroups) {\n      for (const step of turnGroup.steps) {\n        if (stepHasCollapsedStreamingContent(step.packets)) {\n          count += 1;\n        }\n      }\n    }\n\n    return count;\n  }, [stopPacketSeen, userStopped, totalSteps, turnGroups]);\n\n  // Derive all UI state from inputs\n  const {\n    uiState,\n    showCollapsedCompact,\n    showCollapsedParallel,\n    showParallelTabs,\n    showDoneStep,\n    showStoppedStep,\n    hasDoneIndicator,\n    showTintedBackground,\n    showRoundedBottom,\n  } = useTimelineUIState({\n    stopPacketSeen,\n    hasPackets,\n    hasDisplayContent,\n    userStopped,\n    isExpanded,\n    lastTurnGroup,\n    lastStep,\n    lastStepSupportsCollapsedStreaming,\n    lastStepHasCollapsedContent,\n    lastStepIsResearchAgent,\n    parallelActiveStepSupportsCollapsedStreaming,\n    parallelActiveStepHasCollapsedContent,\n    isGeneratingImage,\n    finalAnswerComing,\n  });\n\n  const headerIsInteractive = useMemo(() => {\n    if (!collapsible || isMemoryOnly) {\n      return false;\n    }\n\n    if (uiState === TimelineUIState.STOPPED) {\n      return stoppedStepsCount > 0;\n    }\n\n    return totalSteps > 0;\n  }, [collapsible, isMemoryOnly, uiState, stoppedStepsCount, totalSteps]);\n\n  // Determine render type override for collapsed streaming view\n  const collapsedRenderTypeOverride = useMemo(() => {\n    if (lastStepIsResearchAgent) return RenderType.HIGHLIGHT;\n    if (lastStepIsSearchTool) return RenderType.INLINE;\n    return RenderType.COMPACT;\n  }, [lastStepIsResearchAgent, lastStepIsSearchTool]);\n\n  // Header selection based on UI state\n  const renderHeader = useCallback(() => {\n    switch (uiState) {\n      case TimelineUIState.STREAMING_PARALLEL:\n        // Only show parallel header when collapsed (showParallelTabs includes !isExpanded check)\n        if (showParallelTabs && lastTurnGroup) {\n          return (\n            <ParallelStreamingHeader\n              steps={lastTurnGroup.steps}\n              activeTab={parallelActiveTab}\n              onTabChange={setParallelActiveTab}\n              collapsible={collapsible}\n              isExpanded={isExpanded}\n              onToggle={handleToggle}\n            />\n          );\n        }\n      // falls through to sequential header when expanded or no lastTurnGroup\n      case TimelineUIState.STREAMING_SEQUENTIAL:\n        return (\n          <StreamingHeader\n            headerText={headerText}\n            collapsible={collapsible}\n            buttonTitle={buttonTitle}\n            isExpanded={isExpanded}\n            onToggle={handleToggle}\n            streamingStartTime={streamingStartTime}\n            toolProcessingDuration={toolProcessingDuration}\n          />\n        );\n\n      case TimelineUIState.STOPPED:\n        return (\n          <StoppedHeader\n            totalSteps={stoppedStepsCount}\n            collapsible={collapsible}\n            isExpanded={isExpanded}\n            onToggle={handleToggle}\n          />\n        );\n\n      case TimelineUIState.COMPLETED_COLLAPSED:\n      case TimelineUIState.COMPLETED_EXPANDED:\n        return (\n          <CompletedHeader\n            totalSteps={totalSteps}\n            collapsible={collapsible}\n            isExpanded={isExpanded}\n            onToggle={handleToggle}\n            processingDurationSeconds={\n              toolProcessingDuration ?? processingDurationSeconds\n            }\n            generatedImageCount={generatedImageCount}\n            isMemoryOnly={isMemoryOnly}\n            memoryText={memoryText}\n            memoryOperation={memoryOperation}\n            memoryId={memoryId}\n            memoryIndex={memoryIndex}\n          />\n        );\n\n      default:\n        return null;\n    }\n  }, [\n    uiState,\n    showParallelTabs,\n    lastTurnGroup,\n    parallelActiveTab,\n    setParallelActiveTab,\n    collapsible,\n    isExpanded,\n    handleToggle,\n    headerText,\n    buttonTitle,\n    streamingStartTime,\n    isMemoryOnly,\n    memoryText,\n    memoryOperation,\n    memoryId,\n    memoryIndex,\n    totalSteps,\n    stoppedStepsCount,\n    processingDurationSeconds,\n    generatedImageCount,\n    toolProcessingDuration,\n  ]);\n\n  // Empty state: no packets, still streaming, and not stopped\n  if (uiState === TimelineUIState.EMPTY) {\n    return (\n      <TimelineContainer\n        agent={chatState.agent}\n        headerContent={\n          <div className=\"flex w-full h-full items-center pl-[var(--timeline-header-padding-left)] pr-[var(--timeline-header-padding-right)]\">\n            <Text\n              as=\"p\"\n              mainUiAction\n              text03\n              className=\"animate-shimmer bg-[length:200%_100%] bg-[linear-gradient(90deg,var(--shimmer-base)_10%,var(--shimmer-highlight)_40%,var(--shimmer-base)_70%)] bg-clip-text text-transparent\"\n            >\n              {headerText}\n            </Text>\n          </div>\n        }\n      />\n    );\n  }\n\n  // Display content only (no timeline steps) - but show header for image generation\n  if (uiState === TimelineUIState.DISPLAY_CONTENT_ONLY) {\n    return <TimelineContainer agent={chatState.agent} />;\n  }\n\n  return (\n    <TimelineContainer\n      agent={chatState.agent}\n      headerContent={\n        <div\n          className={cn(\n            \"flex flex-1 min-w-0 h-full items-center justify-between p-1 rounded-t-12 transition-colors duration-300\",\n            headerIsInteractive && \"hover:bg-background-tint-00\",\n            showTintedBackground && \"bg-background-tint-00\",\n            showRoundedBottom && \"rounded-b-12\"\n          )}\n        >\n          {renderHeader()}\n        </div>\n      }\n    >\n      {/* Collapsed streaming view - single step compact mode */}\n      {showCollapsedCompact && lastStep && (\n        <CollapsedStreamingContent\n          step={lastStep}\n          chatState={chatState}\n          stopReason={stopReason}\n          renderTypeOverride={collapsedRenderTypeOverride}\n        />\n      )}\n\n      {/* Collapsed streaming view - parallel tools compact mode */}\n      {showCollapsedParallel && parallelActiveStep && (\n        <CollapsedStreamingContent\n          step={parallelActiveStep}\n          chatState={chatState}\n          stopReason={stopReason}\n          renderTypeOverride={RenderType.HIGHLIGHT}\n        />\n      )}\n\n      {/* Expanded timeline view */}\n      {isExpanded && (\n        <div className=\"animate-in fade-in slide-in-from-top-2 duration-300\">\n          <ExpandedTimelineContent\n            turnGroups={turnGroups}\n            chatState={chatState}\n            stopPacketSeen={stopPacketSeen}\n            stopReason={stopReason}\n            isSingleStep={isSingleStep}\n            userStopped={userStopped}\n            showDoneStep={showDoneStep}\n            showStoppedStep={showStoppedStep}\n            hasDoneIndicator={hasDoneIndicator}\n          />\n        </div>\n      )}\n    </TimelineContainer>\n  );\n}, areAgentTimelinePropsEqual);\n\nexport default AgentTimeline;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/CollapsedStreamingContent.tsx",
    "content": "\"use client\";\n\nimport React, { useCallback } from \"react\";\nimport { StopReason } from \"@/app/app/services/streamingModels\";\nimport { FullChatState, RenderType } from \"../interfaces\";\nimport { TransformedStep } from \"./transformers\";\nimport {\n  TimelineRendererComponent,\n  TimelineRendererOutput,\n} from \"./TimelineRendererComponent\";\nimport { TimelineRow } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineRow\";\nimport { TimelineSurface } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineSurface\";\n\nexport interface CollapsedStreamingContentProps {\n  step: TransformedStep;\n  chatState: FullChatState;\n  stopReason?: StopReason;\n  renderTypeOverride?: RenderType;\n}\n\nexport const CollapsedStreamingContent = React.memo(\n  function CollapsedStreamingContent({\n    step,\n    chatState,\n    stopReason,\n    renderTypeOverride,\n  }: CollapsedStreamingContentProps) {\n    const renderContentOnly = useCallback(\n      (results: TimelineRendererOutput) => (\n        <>\n          {results.map((result, index) => (\n            <React.Fragment key={index}>{result.content}</React.Fragment>\n          ))}\n        </>\n      ),\n      []\n    );\n\n    return (\n      <TimelineRow railVariant=\"spacer\">\n        <TimelineSurface className=\"px-2 pb-2\" roundedBottom>\n          <TimelineRendererComponent\n            key={`${step.key}-compact`}\n            packets={step.packets}\n            chatState={chatState}\n            animate={true}\n            stopPacketSeen={false}\n            stopReason={stopReason}\n            defaultExpanded={false}\n            renderTypeOverride={renderTypeOverride}\n            isLastStep={true}\n          >\n            {renderContentOnly}\n          </TimelineRendererComponent>\n        </TimelineSurface>\n      </TimelineRow>\n    );\n  }\n);\n\nexport default CollapsedStreamingContent;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/ExpandedTimelineContent.tsx",
    "content": "\"use client\";\n\nimport React, { FunctionComponent, useMemo, useCallback } from \"react\";\nimport { StopReason } from \"@/app/app/services/streamingModels\";\nimport { FullChatState } from \"../interfaces\";\nimport { TurnGroup, TransformedStep } from \"./transformers\";\nimport { SvgCheckCircle, SvgStopCircle } from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport {\n  TimelineRendererComponent,\n  TimelineRendererOutput,\n  TimelineRendererResult,\n} from \"./TimelineRendererComponent\";\nimport { ParallelTimelineTabs } from \"./ParallelTimelineTabs\";\nimport { StepContainer } from \"./StepContainer\";\nimport { TimelineStepComposer } from \"./TimelineStepComposer\";\nimport {\n  isSearchToolPackets,\n  isPythonToolPackets,\n} from \"@/app/app/message/messageComponents/timeline/packetHelpers\";\n\n// =============================================================================\n// TimelineStep Component - Memoized to prevent re-renders\n// =============================================================================\n\ninterface TimelineStepProps {\n  step: TransformedStep;\n  chatState: FullChatState;\n  stopPacketSeen: boolean;\n  stopReason?: StopReason;\n  isLastStep: boolean;\n  isFirstStep: boolean;\n  isSingleStep: boolean;\n  isStreaming?: boolean;\n}\n\nconst TimelineStep = React.memo(function TimelineStep({\n  step,\n  chatState,\n  stopPacketSeen,\n  stopReason,\n  isLastStep,\n  isFirstStep,\n  isSingleStep,\n  isStreaming = false,\n}: TimelineStepProps) {\n  const isSearchTool = useMemo(\n    () => isSearchToolPackets(step.packets),\n    [step.packets]\n  );\n  const isPythonTool = useMemo(\n    () => isPythonToolPackets(step.packets),\n    [step.packets]\n  );\n  const getCollapsedIcon = useCallback(\n    (result: TimelineRendererResult) =>\n      isSearchTool ? (result.icon as FunctionComponent<IconProps>) : undefined,\n    [isSearchTool]\n  );\n\n  const renderStep = useCallback(\n    (results: TimelineRendererOutput) => (\n      <TimelineStepComposer\n        results={results}\n        isLastStep={isLastStep}\n        isFirstStep={isFirstStep}\n        isSingleStep={isSingleStep}\n        collapsible={true}\n        getCollapsedIcon={getCollapsedIcon}\n      />\n    ),\n    [isFirstStep, isLastStep, isSingleStep, getCollapsedIcon]\n  );\n\n  return (\n    <TimelineRendererComponent\n      packets={step.packets}\n      chatState={chatState}\n      animate={!stopPacketSeen}\n      stopPacketSeen={stopPacketSeen}\n      stopReason={stopReason}\n      defaultExpanded={isStreaming || (isSingleStep && !isPythonTool)}\n      isLastStep={isLastStep}\n    >\n      {renderStep}\n    </TimelineRendererComponent>\n  );\n});\n\n// =============================================================================\n// ExpandedTimelineContent Component\n// =============================================================================\n\nexport interface ExpandedTimelineContentProps {\n  turnGroups: TurnGroup[];\n  chatState: FullChatState;\n  stopPacketSeen: boolean;\n  stopReason?: StopReason;\n  isSingleStep: boolean;\n  userStopped: boolean;\n  showDoneStep: boolean;\n  showStoppedStep: boolean;\n  hasDoneIndicator: boolean;\n}\n\nexport const ExpandedTimelineContent = React.memo(\n  function ExpandedTimelineContent({\n    turnGroups,\n    chatState,\n    stopPacketSeen,\n    stopReason,\n    isSingleStep,\n    userStopped,\n    showDoneStep,\n    showStoppedStep,\n    hasDoneIndicator,\n  }: ExpandedTimelineContentProps) {\n    return (\n      <div className=\"w-full\">\n        {turnGroups.map((turnGroup, turnIdx) =>\n          turnGroup.isParallel ? (\n            <ParallelTimelineTabs\n              key={turnGroup.turnIndex}\n              turnGroup={turnGroup}\n              chatState={chatState}\n              stopPacketSeen={stopPacketSeen}\n              stopReason={stopReason}\n              isLastTurnGroup={\n                turnIdx === turnGroups.length - 1 &&\n                !showDoneStep &&\n                !showStoppedStep\n              }\n              isFirstTurnGroup={turnIdx === 0}\n            />\n          ) : (\n            turnGroup.steps.map((step, stepIdx) => {\n              const stepIsLast =\n                turnIdx === turnGroups.length - 1 &&\n                stepIdx === turnGroup.steps.length - 1 &&\n                !hasDoneIndicator &&\n                !userStopped;\n              const stepIsFirst = turnIdx === 0 && stepIdx === 0;\n\n              return (\n                <TimelineStep\n                  key={step.key}\n                  step={step}\n                  chatState={chatState}\n                  stopPacketSeen={stopPacketSeen}\n                  stopReason={stopReason}\n                  isLastStep={stepIsLast}\n                  isFirstStep={stepIsFirst}\n                  isSingleStep={isSingleStep}\n                  isStreaming={!stopPacketSeen && !userStopped}\n                />\n              );\n            })\n          )\n        )}\n\n        {/* Done indicator */}\n        {showDoneStep && (\n          <StepContainer\n            stepIcon={SvgCheckCircle}\n            header=\"Done\"\n            isLastStep={true}\n            isFirstStep={false}\n          >\n            {null}\n          </StepContainer>\n        )}\n\n        {/* Stopped indicator */}\n        {showStoppedStep && (\n          <StepContainer\n            stepIcon={SvgStopCircle}\n            header=\"Stopped\"\n            isLastStep={true}\n            isFirstStep={false}\n          >\n            {null}\n          </StepContainer>\n        )}\n      </div>\n    );\n  }\n);\n\nexport default ExpandedTimelineContent;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/ParallelTimelineTabs.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useMemo, useCallback } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { StopReason } from \"@/app/app/services/streamingModels\";\nimport { FullChatState } from \"../interfaces\";\nimport { TurnGroup } from \"./transformers\";\nimport {\n  getToolName,\n  getToolIcon,\n  isToolComplete,\n} from \"../toolDisplayHelpers\";\nimport {\n  TimelineRendererComponent,\n  TimelineRendererOutput,\n} from \"./TimelineRendererComponent\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { SvgBranch, SvgFold, SvgExpand } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { TimelineRow } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineRow\";\nimport { TimelineSurface } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineSurface\";\nimport { TimelineTopSpacer } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineTopSpacer\";\nimport { TimelineStepComposer } from \"./TimelineStepComposer\";\n\nexport interface ParallelTimelineTabsProps {\n  /** Turn group containing parallel steps */\n  turnGroup: TurnGroup;\n  /** Chat state for rendering content */\n  chatState: FullChatState;\n  /** Whether the stop packet has been seen */\n  stopPacketSeen: boolean;\n  /** Reason for stopping (if stopped) */\n  stopReason?: StopReason;\n  /** Whether this is the last turn group (affects connector line) */\n  isLastTurnGroup: boolean;\n  /** Whether this is the first turn group (affects connector line) */\n  isFirstTurnGroup: boolean;\n}\n\nexport function ParallelTimelineTabs({\n  turnGroup,\n  chatState,\n  stopPacketSeen,\n  stopReason,\n  isLastTurnGroup,\n  isFirstTurnGroup,\n}: ParallelTimelineTabsProps) {\n  const [activeTab, setActiveTab] = useState(turnGroup.steps[0]?.key ?? \"\");\n  const [isExpanded, setIsExpanded] = useState(true);\n  const [isHover, setIsHover] = useState(false);\n  const handleToggle = useCallback(() => setIsExpanded((prev) => !prev), []);\n  const handleHeaderEnter = useCallback(() => setIsHover(true), []);\n  const handleHeaderLeave = useCallback(() => setIsHover(false), []);\n  const topSpacerVariant = isFirstTurnGroup ? \"first\" : \"none\";\n  const shouldShowResults = !(!isExpanded && stopPacketSeen);\n\n  // Find the active step based on selected tab\n  const activeStep = useMemo(\n    () => turnGroup.steps.find((step) => step.key === activeTab),\n    [turnGroup.steps, activeTab]\n  );\n\n  // Memoized loading states for each step\n  const loadingStates = useMemo(\n    () =>\n      new Map(\n        turnGroup.steps.map((step) => [\n          step.key,\n          !stopPacketSeen &&\n            step.packets.length > 0 &&\n            !isToolComplete(step.packets),\n        ])\n      ),\n    [turnGroup.steps, stopPacketSeen]\n  );\n\n  const renderTabContent = useCallback(\n    (results: TimelineRendererOutput) => (\n      <TimelineStepComposer\n        results={results}\n        isLastStep={isLastTurnGroup}\n        isFirstStep={false}\n        isSingleStep={false}\n        collapsible={true}\n      />\n    ),\n    [isLastTurnGroup]\n  );\n\n  const hasActivePackets = Boolean(activeStep && activeStep.packets.length > 0);\n  const headerIsLast =\n    isLastTurnGroup && (!shouldShowResults || !hasActivePackets);\n\n  return (\n    <Tabs value={activeTab} onValueChange={setActiveTab}>\n      <div className=\"flex flex-col w-full\">\n        <TimelineRow\n          railVariant=\"rail\"\n          isFirst={isFirstTurnGroup}\n          isLast={headerIsLast}\n          isHover={isHover}\n          disableTopConnectorHover={true}\n          icon={\n            <div\n              className={cn(\n                \"h-[var(--timeline-branch-icon-wrapper-size)] w-[var(--timeline-branch-icon-wrapper-size)] flex items-center justify-center text-text-02\",\n                isHover &&\n                  \"text-text-inverted-05 bg-background-neutral-inverted-00 rounded-full\"\n              )}\n            >\n              <SvgBranch className=\"h-[var(--timeline-branch-icon-size)] w-[var(--timeline-branch-icon-size)]\" />\n            </div>\n          }\n        >\n          <TimelineSurface\n            className=\"flex-1 flex flex-col\"\n            isHover={isHover}\n            roundedBottom={headerIsLast}\n          >\n            <TimelineTopSpacer variant={topSpacerVariant} />\n\n            <div\n              className=\"flex items-center min-h-[var(--timeline-step-header-height)] pl-[var(--timeline-header-padding-left)] pr-[var(--timeline-header-padding-right)]\"\n              onMouseEnter={handleHeaderEnter}\n              onMouseLeave={handleHeaderLeave}\n            >\n              <Tabs.List\n                variant=\"pill\"\n                enableScrollArrows\n                className={cn(\n                  isHover && \"bg-background-tint-02\",\n                  \"transition-colors duration-200\"\n                )}\n                rightContent={\n                  <Button\n                    prominence=\"tertiary\"\n                    size=\"sm\"\n                    onClick={handleToggle}\n                    icon={isExpanded ? SvgFold : SvgExpand}\n                  />\n                }\n              >\n                {turnGroup.steps.map((step) => (\n                  <Tabs.Trigger\n                    key={step.key}\n                    value={step.key}\n                    variant=\"pill\"\n                    isLoading={loadingStates.get(step.key)}\n                  >\n                    <span className=\"flex items-center gap-1.5\">\n                      {getToolIcon(step.packets)}\n                      {getToolName(step.packets)}\n                    </span>\n                  </Tabs.Trigger>\n                ))}\n              </Tabs.List>\n            </div>\n          </TimelineSurface>\n        </TimelineRow>\n\n        {shouldShowResults && activeStep && (\n          <TimelineRendererComponent\n            key={`${activeTab}-${isExpanded}`}\n            packets={activeStep.packets}\n            chatState={chatState}\n            animate={!stopPacketSeen}\n            stopPacketSeen={stopPacketSeen}\n            stopReason={stopReason}\n            defaultExpanded={isExpanded}\n            isLastStep={isLastTurnGroup}\n            isHover={isHover}\n          >\n            {renderTabContent}\n          </TimelineRendererComponent>\n        )}\n      </div>\n    </Tabs>\n  );\n}\n\nexport default ParallelTimelineTabs;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/StepContainer.tsx",
    "content": "import React, { FunctionComponent } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { IconProps } from \"@opal/types\";\nimport { TimelineRow } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineRow\";\nimport {\n  TimelineSurface,\n  TimelineSurfaceBackground,\n} from \"@/app/app/message/messageComponents/timeline/primitives/TimelineSurface\";\nimport { TimelineStepContent } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineStepContent\";\n\nexport interface StepContainerProps {\n  /** Main content */\n  children?: React.ReactNode;\n  /** Step icon component */\n  stepIcon?: FunctionComponent<IconProps>;\n  /** Header left slot */\n  header?: React.ReactNode;\n  /** Button title for toggle */\n  buttonTitle?: string;\n  /** Controlled expanded state */\n  isExpanded?: boolean;\n  /** Toggle callback */\n  onToggle?: () => void;\n  /** Whether collapse control is shown */\n  collapsible?: boolean;\n  /** Collapse button shown only when renderer supports collapsible mode */\n  supportsCollapsible?: boolean;\n  /** Last step (no bottom connector) */\n  isLastStep?: boolean;\n  /** First step (top padding instead of connector) */\n  isFirstStep?: boolean;\n  /** Hide header (single-step timelines) */\n  hideHeader?: boolean;\n  /** Hover state from parent */\n  isHover?: boolean;\n  /** Custom icon to show when collapsed (defaults to SvgExpand) */\n  collapsedIcon?: FunctionComponent<IconProps>;\n  /** Remove right padding (for reasoning content) */\n  noPaddingRight?: boolean;\n  /** Render without rail (for nested/parallel content) */\n  withRail?: boolean;\n  /** Override the surface background variant */\n  surfaceBackground?: TimelineSurfaceBackground;\n}\n\n/** Visual wrapper for timeline steps - icon, connector line, header, and content */\nexport function StepContainer({\n  children,\n  stepIcon: StepIconComponent,\n  header,\n  buttonTitle,\n  isExpanded = true,\n  onToggle,\n  collapsible = true,\n  supportsCollapsible = false,\n  isLastStep = false,\n  isFirstStep = false,\n  hideHeader = false,\n  isHover = false,\n  collapsedIcon: CollapsedIconComponent,\n  noPaddingRight = false,\n  withRail = true,\n  surfaceBackground,\n}: StepContainerProps) {\n  const iconNode = StepIconComponent ? (\n    <StepIconComponent\n      className={cn(\n        \"h-[var(--timeline-icon-size)] w-[var(--timeline-icon-size)] stroke-text-02\",\n        isHover && \"stroke-text-04\"\n      )}\n    />\n  ) : null;\n\n  const content = (\n    <TimelineSurface\n      className=\"flex-1 flex flex-col\"\n      isHover={isHover}\n      roundedBottom={isLastStep}\n      background={surfaceBackground}\n    >\n      <TimelineStepContent\n        header={header}\n        buttonTitle={buttonTitle}\n        isExpanded={isExpanded}\n        onToggle={onToggle}\n        collapsible={collapsible}\n        supportsCollapsible={supportsCollapsible}\n        hideHeader={hideHeader}\n        collapsedIcon={CollapsedIconComponent}\n        noPaddingRight={noPaddingRight}\n        surfaceBackground={surfaceBackground}\n      >\n        {children}\n      </TimelineStepContent>\n    </TimelineSurface>\n  );\n\n  if (!withRail) {\n    return <div className=\"flex w-full\">{content}</div>;\n  }\n\n  return (\n    <TimelineRow\n      railVariant=\"rail\"\n      icon={iconNode}\n      showIcon={!hideHeader && Boolean(StepIconComponent)}\n      iconRowVariant={hideHeader ? \"compact\" : \"default\"}\n      isFirst={isFirstStep}\n      isLast={isLastStep}\n      isHover={isHover}\n    >\n      {content}\n    </TimelineRow>\n  );\n}\n\nexport default StepContainer;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/TimelineRendererComponent.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useCallback, JSX } from \"react\";\nimport { Packet, StopReason } from \"@/app/app/services/streamingModels\";\nimport {\n  FullChatState,\n  RenderType,\n  RendererResult,\n  RendererOutput,\n} from \"../interfaces\";\nimport { findRenderer } from \"../renderMessageComponent\";\n\n/** Extended result that includes collapse state */\nexport interface TimelineRendererResult extends RendererResult {\n  /** Current expanded state */\n  isExpanded: boolean;\n  /** Toggle callback */\n  onToggle: () => void;\n  /** Current render type */\n  renderType: RenderType;\n  /** Whether this is the last step (passed through from props) */\n  isLastStep: boolean;\n  /** Hover state from parent */\n  isHover: boolean;\n  /** Whether parent should wrap with StepContainer or render raw content */\n  timelineLayout: \"timeline\" | \"content\";\n}\n\n// All renderers return an array of results\nexport type TimelineRendererOutput = TimelineRendererResult[];\n\nexport interface TimelineRendererComponentProps {\n  /** Packets to render */\n  packets: Packet[];\n  /** Chat state for rendering */\n  chatState: FullChatState;\n  /** Whether to animate streaming */\n  animate: boolean;\n  /** Whether stop packet has been seen */\n  stopPacketSeen: boolean;\n  /** Reason for stopping */\n  stopReason?: StopReason;\n  /** Initial expanded state */\n  defaultExpanded?: boolean;\n  /** Whether this is the last step in the timeline (for connector line decisions) */\n  isLastStep?: boolean;\n  /** Hover state from parent component */\n  isHover?: boolean;\n  /** Override render type (if not set, derives from defaultExpanded) */\n  renderTypeOverride?: RenderType;\n  /** Children render function - receives extended result with collapse state (single or array) */\n  children: (result: TimelineRendererOutput) => JSX.Element;\n}\n\n// Custom comparison function to prevent unnecessary re-renders\n// Only re-render if meaningful changes occur\nfunction arePropsEqual(\n  prev: TimelineRendererComponentProps,\n  next: TimelineRendererComponentProps\n): boolean {\n  return (\n    prev.packets === next.packets &&\n    prev.stopPacketSeen === next.stopPacketSeen &&\n    prev.stopReason === next.stopReason &&\n    prev.animate === next.animate &&\n    prev.isLastStep === next.isLastStep &&\n    prev.isHover === next.isHover &&\n    prev.defaultExpanded === next.defaultExpanded &&\n    prev.renderTypeOverride === next.renderTypeOverride\n    // Skipping chatState (memoized upstream)\n  );\n}\n\nexport const TimelineRendererComponent = React.memo(\n  function TimelineRendererComponent({\n    packets,\n    chatState,\n    animate,\n    stopPacketSeen,\n    stopReason,\n    defaultExpanded = true,\n    isLastStep,\n    isHover = false,\n    renderTypeOverride,\n    children,\n  }: TimelineRendererComponentProps) {\n    const [isExpanded, setIsExpanded] = useState(defaultExpanded);\n    const handleToggle = useCallback(() => setIsExpanded((prev) => !prev), []);\n    const RendererFn = findRenderer({ packets });\n    const renderType =\n      renderTypeOverride ?? (isExpanded ? RenderType.FULL : RenderType.COMPACT);\n\n    if (!RendererFn) {\n      return children([\n        {\n          icon: null,\n          status: null,\n          content: <></>,\n          supportsCollapsible: false,\n          timelineLayout: \"timeline\",\n          isExpanded,\n          onToggle: handleToggle,\n          renderType,\n          isLastStep: isLastStep ?? true,\n          isHover,\n        },\n      ]);\n    }\n\n    // Helper to add timeline context to a result\n    const enhanceResult = (result: RendererResult): TimelineRendererResult => ({\n      ...result,\n      isExpanded,\n      onToggle: handleToggle,\n      renderType,\n      isLastStep: isLastStep ?? true,\n      isHover,\n      timelineLayout: result.timelineLayout ?? \"timeline\",\n    });\n\n    return (\n      <RendererFn\n        packets={packets as any}\n        state={chatState}\n        onComplete={() => {}}\n        animate={animate}\n        renderType={renderType}\n        stopPacketSeen={stopPacketSeen}\n        stopReason={stopReason}\n        isLastStep={isLastStep}\n        isHover={isHover}\n      >\n        {(rendererOutput: RendererOutput) =>\n          children(rendererOutput.map((result) => enhanceResult(result)))\n        }\n      </RendererFn>\n    );\n  },\n  arePropsEqual\n);\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/TimelineStepComposer.tsx",
    "content": "import React, { FunctionComponent } from \"react\";\nimport { IconProps } from \"@opal/types\";\nimport { StepContainer } from \"./StepContainer\";\nimport {\n  TimelineRendererOutput,\n  TimelineRendererResult,\n} from \"./TimelineRendererComponent\";\n\nexport interface TimelineStepComposerProps {\n  /** Results produced by the active renderer. */\n  results: TimelineRendererOutput;\n  /** Whether the overall step is the last in the timeline (affects connector). */\n  isLastStep: boolean;\n  /** Whether the overall step is the first in the timeline (affects connector). */\n  isFirstStep: boolean;\n  /** Whether the timeline has a single step (used to hide headers). */\n  isSingleStep?: boolean;\n  /** Whether StepContainer should show collapse controls. */\n  collapsible?: boolean;\n  /** Optional resolver for custom collapsed icon per result. */\n  getCollapsedIcon?: (\n    result: TimelineRendererResult\n  ) => FunctionComponent<IconProps> | undefined;\n}\n\n/**\n * TimelineStepComposer renders renderer results into either raw content blocks\n * or StepContainer-wrapped timeline rows based on the layout contract.\n */\nexport function TimelineStepComposer({\n  results,\n  isLastStep,\n  isFirstStep,\n  isSingleStep = false,\n  collapsible = true,\n  getCollapsedIcon,\n}: TimelineStepComposerProps) {\n  return (\n    <>\n      {results.map((result, index) =>\n        result.timelineLayout === \"content\" ? (\n          <React.Fragment key={index}>{result.content}</React.Fragment>\n        ) : (\n          <StepContainer\n            key={index}\n            stepIcon={result.icon as FunctionComponent<IconProps> | undefined}\n            header={result.status}\n            isExpanded={result.isExpanded}\n            onToggle={result.onToggle}\n            collapsible={\n              collapsible && (!isSingleStep || !!result.alwaysCollapsible)\n            }\n            supportsCollapsible={result.supportsCollapsible}\n            isLastStep={index === results.length - 1 && isLastStep}\n            isFirstStep={index === 0 && isFirstStep}\n            hideHeader={\n              results.length === 1 &&\n              isSingleStep &&\n              !result.supportsCollapsible\n            }\n            collapsedIcon={\n              getCollapsedIcon ? getCollapsedIcon(result) : undefined\n            }\n            noPaddingRight={result.noPaddingRight ?? false}\n            isHover={result.isHover}\n            surfaceBackground={result.surfaceBackground}\n          >\n            {result.content}\n          </StepContainer>\n        )\n      )}\n    </>\n  );\n}\n\nexport default TimelineStepComposer;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/headers/CompletedHeader.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { SvgFold, SvgExpand, SvgAddLines, SvgMaximize2 } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport Tag from \"@/refresh-components/buttons/Tag\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { formatDurationSeconds } from \"@/lib/time\";\nimport { noProp } from \"@/lib/utils\";\nimport MemoriesModal from \"@/refresh-components/modals/MemoriesModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\n\n// =============================================================================\n// MemoryTagWithTooltip\n// =============================================================================\n\ninterface MemoryTagWithTooltipProps {\n  memoryText: string | null;\n  memoryOperation: \"add\" | \"update\" | null;\n  memoryId: number | null;\n  memoryIndex: number | null;\n}\n\nfunction MemoryTagWithTooltip({\n  memoryText,\n  memoryOperation,\n  memoryId,\n  memoryIndex,\n}: MemoryTagWithTooltipProps) {\n  const memoriesModal = useCreateModal();\n\n  const operationLabel =\n    memoryOperation === \"add\" ? \"Added to memories\" : \"Updated memory\";\n\n  const tag = <Tag icon={SvgAddLines} label={operationLabel} />;\n\n  if (!memoryText) return tag;\n\n  return (\n    <>\n      <memoriesModal.Provider>\n        <MemoriesModal\n          initialTargetMemoryId={memoryId}\n          initialTargetIndex={memoryIndex}\n          highlightOnOpen\n        />\n      </memoriesModal.Provider>\n      {memoriesModal.isOpen ? (\n        <span>{tag}</span>\n      ) : (\n        <SimpleTooltip\n          delayDuration={0}\n          side=\"bottom\"\n          className=\"bg-background-neutral-00 text-text-01 shadow-md max-w-[17.5rem] p-1\"\n          tooltip={\n            <Section\n              flexDirection=\"column\"\n              alignItems=\"start\"\n              padding={0.25}\n              gap={0.25}\n              height=\"auto\"\n            >\n              <div className=\"p-1\">\n                <Text as=\"p\" secondaryBody text03>\n                  {memoryText}\n                </Text>\n              </div>\n              <ContentAction\n                icon={SvgAddLines}\n                title={operationLabel}\n                sizePreset=\"secondary\"\n                paddingVariant=\"sm\"\n                variant=\"body\"\n                prominence=\"muted\"\n                rightChildren={\n                  <Button\n                    prominence=\"tertiary\"\n                    size=\"sm\"\n                    icon={SvgMaximize2}\n                    onClick={(e) => {\n                      e.stopPropagation();\n                      memoriesModal.toggle(true);\n                    }}\n                  />\n                }\n              />\n            </Section>\n          }\n        >\n          <span>{tag}</span>\n        </SimpleTooltip>\n      )}\n    </>\n  );\n}\n\n// =============================================================================\n// CompletedHeader\n// =============================================================================\n\nexport interface CompletedHeaderProps {\n  totalSteps: number;\n  collapsible: boolean;\n  isExpanded: boolean;\n  onToggle: () => void;\n  processingDurationSeconds?: number;\n  generatedImageCount?: number;\n  isMemoryOnly?: boolean;\n  memoryText?: string | null;\n  memoryOperation?: \"add\" | \"update\" | null;\n  memoryId?: number | null;\n  memoryIndex?: number | null;\n}\n\n/** Header when completed - handles both collapsed and expanded states */\nexport const CompletedHeader = React.memo(function CompletedHeader({\n  totalSteps,\n  collapsible,\n  isExpanded,\n  onToggle,\n  processingDurationSeconds = 0,\n  generatedImageCount = 0,\n  isMemoryOnly = false,\n  memoryText = null,\n  memoryOperation = null,\n  memoryId = null,\n  memoryIndex = null,\n}: CompletedHeaderProps) {\n  if (isMemoryOnly) {\n    return (\n      <div className=\"flex w-full justify-between\">\n        <div className=\"flex items-center px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]\">\n          <MemoryTagWithTooltip\n            memoryText={memoryText}\n            memoryOperation={memoryOperation}\n            memoryId={memoryId}\n            memoryIndex={memoryIndex}\n          />\n        </div>\n        {collapsible && totalSteps > 0 && isExpanded && (\n          <Button\n            prominence=\"tertiary\"\n            size=\"md\"\n            onClick={noProp(onToggle)}\n            rightIcon={isExpanded ? SvgFold : SvgExpand}\n            aria-label=\"Expand timeline\"\n            aria-expanded={isExpanded}\n          >\n            {`${totalSteps} ${totalSteps === 1 ? \"step\" : \"steps\"}`}\n          </Button>\n        )}\n      </div>\n    );\n  }\n\n  const durationText = processingDurationSeconds\n    ? `Thought for ${formatDurationSeconds(processingDurationSeconds)}`\n    : \"Thought for some time\";\n\n  const imageText =\n    generatedImageCount > 0\n      ? `Generated ${generatedImageCount} ${\n          generatedImageCount === 1 ? \"image\" : \"images\"\n        }`\n      : null;\n\n  return (\n    <div\n      role=\"button\"\n      onClick={onToggle}\n      className=\"flex items-center justify-between w-full\"\n    >\n      <div className=\"flex items-center gap-2 px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]\">\n        <Text as=\"p\" mainUiAction text03>\n          {isExpanded ? durationText : imageText ?? durationText}\n        </Text>\n        {memoryOperation && !isExpanded && (\n          <MemoryTagWithTooltip\n            memoryText={memoryText}\n            memoryOperation={memoryOperation}\n            memoryId={memoryId}\n            memoryIndex={memoryIndex}\n          />\n        )}\n      </div>\n\n      {collapsible && totalSteps > 0 && (\n        <Button\n          prominence=\"tertiary\"\n          size=\"md\"\n          onClick={noProp(onToggle)}\n          rightIcon={isExpanded ? SvgFold : SvgExpand}\n          aria-label=\"Expand timeline\"\n          aria-expanded={isExpanded}\n        >\n          {`${totalSteps} ${totalSteps === 1 ? \"step\" : \"steps\"}`}\n        </Button>\n      )}\n    </div>\n  );\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/headers/ParallelStreamingHeader.tsx",
    "content": "import React, { useMemo } from \"react\";\nimport { SvgFold, SvgExpand } from \"@opal/icons\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { Button } from \"@opal/components\";\nimport { TurnGroup } from \"../transformers\";\nimport {\n  getToolIcon,\n  getToolName,\n  isToolComplete,\n} from \"../../toolDisplayHelpers\";\n\nexport interface ParallelStreamingHeaderProps {\n  steps: TurnGroup[\"steps\"];\n  activeTab: string;\n  onTabChange: (tab: string) => void;\n  collapsible: boolean;\n  isExpanded: boolean;\n  onToggle: () => void;\n}\n\n/** Header during streaming with parallel tools - tabs only */\nexport const ParallelStreamingHeader = React.memo(\n  function ParallelStreamingHeader({\n    steps,\n    activeTab,\n    onTabChange,\n    collapsible,\n    isExpanded,\n    onToggle,\n  }: ParallelStreamingHeaderProps) {\n    // Memoized loading states for each step\n    const loadingStates = useMemo(\n      () =>\n        new Map(\n          steps.map((step) => [\n            step.key,\n            step.packets.length > 0 && !isToolComplete(step.packets),\n          ])\n        ),\n      [steps]\n    );\n\n    return (\n      <Tabs value={activeTab} onValueChange={onTabChange}>\n        <Tabs.List\n          variant=\"pill\"\n          enableScrollArrows\n          rightContent={\n            collapsible ? (\n              <Button\n                prominence=\"tertiary\"\n                size=\"sm\"\n                onClick={onToggle}\n                icon={isExpanded ? SvgFold : SvgExpand}\n                aria-label={\n                  isExpanded ? \"Collapse timeline\" : \"Expand timeline\"\n                }\n                aria-expanded={isExpanded}\n              />\n            ) : undefined\n          }\n          className=\"bg-transparent\"\n        >\n          {steps.map((step) => (\n            <Tabs.Trigger\n              key={step.key}\n              value={step.key}\n              variant=\"pill\"\n              isLoading={loadingStates.get(step.key)}\n            >\n              <span className=\"flex items-center gap-1.5\">\n                {getToolIcon(step.packets)}\n                {getToolName(step.packets)}\n              </span>\n            </Tabs.Trigger>\n          ))}\n        </Tabs.List>\n      </Tabs>\n    );\n  }\n);\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/headers/StoppedHeader.tsx",
    "content": "import React from \"react\";\nimport { SvgFold, SvgExpand } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn, noProp } from \"@/lib/utils\";\n\nexport interface StoppedHeaderProps {\n  totalSteps: number;\n  collapsible: boolean;\n  isExpanded: boolean;\n  onToggle: () => void;\n}\n\n/** Header when user stopped/cancelled */\nexport const StoppedHeader = React.memo(function StoppedHeader({\n  totalSteps,\n  collapsible,\n  isExpanded,\n  onToggle,\n}: StoppedHeaderProps) {\n  const isInteractive = collapsible && totalSteps > 0;\n\n  return (\n    <div\n      role={isInteractive ? \"button\" : undefined}\n      onClick={isInteractive ? onToggle : undefined}\n      className={cn(\n        \"flex items-center justify-between w-full rounded-12\",\n        isInteractive ? \"cursor-pointer\" : \"cursor-default\"\n      )}\n      aria-disabled={isInteractive ? undefined : true}\n    >\n      <div className=\"px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]\">\n        <Text as=\"p\" mainUiAction text03>\n          Interrupted Thinking\n        </Text>\n      </div>\n\n      {isInteractive && (\n        <Button\n          prominence=\"tertiary\"\n          size=\"md\"\n          onClick={noProp(onToggle)}\n          rightIcon={isExpanded ? SvgFold : SvgExpand}\n          aria-label={isExpanded ? \"Collapse timeline\" : \"Expand timeline\"}\n          aria-expanded={isExpanded}\n        >\n          {`${totalSteps} ${totalSteps === 1 ? \"step\" : \"steps\"}`}\n        </Button>\n      )}\n    </div>\n  );\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/headers/StreamingHeader.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { SvgFold, SvgExpand } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useStreamingDuration } from \"../hooks/useStreamingDuration\";\nimport { formatDurationSeconds } from \"@/lib/time\";\n\nexport interface StreamingHeaderProps {\n  headerText: string;\n  collapsible: boolean;\n  buttonTitle?: string;\n  isExpanded: boolean;\n  onToggle: () => void;\n  streamingStartTime?: number;\n  /** Tool processing duration from backend (freezes timer when available) */\n  toolProcessingDuration?: number;\n}\n\n/** Header during streaming - shimmer text with current activity */\nexport const StreamingHeader = React.memo(function StreamingHeader({\n  headerText,\n  collapsible,\n  buttonTitle,\n  isExpanded,\n  onToggle,\n  streamingStartTime,\n  toolProcessingDuration,\n}: StreamingHeaderProps) {\n  // Use backend duration when available, otherwise continue live timer\n  const elapsedSeconds = useStreamingDuration(\n    toolProcessingDuration === undefined, // Stop updating when we have backend duration\n    streamingStartTime,\n    toolProcessingDuration\n  );\n  const showElapsedTime =\n    isExpanded && streamingStartTime && elapsedSeconds > 0;\n\n  return (\n    <>\n      <div className=\"px-[var(--timeline-header-text-padding-x)] py-[var(--timeline-header-text-padding-y)]\">\n        <Text\n          as=\"p\"\n          mainUiAction\n          text03\n          className=\"animate-shimmer bg-[length:200%_100%] bg-[linear-gradient(90deg,var(--shimmer-base)_10%,var(--shimmer-highlight)_40%,var(--shimmer-base)_70%)] bg-clip-text text-transparent\"\n        >\n          {headerText}\n        </Text>\n      </div>\n\n      {collapsible &&\n        (buttonTitle ? (\n          <Button\n            prominence=\"tertiary\"\n            size=\"md\"\n            onClick={onToggle}\n            rightIcon={isExpanded ? SvgFold : SvgExpand}\n            aria-expanded={isExpanded}\n          >\n            {buttonTitle}\n          </Button>\n        ) : showElapsedTime ? (\n          <Button\n            prominence=\"tertiary\"\n            size=\"md\"\n            onClick={onToggle}\n            rightIcon={SvgFold}\n            aria-label=\"Collapse timeline\"\n            aria-expanded={true}\n          >\n            {formatDurationSeconds(elapsedSeconds)}\n          </Button>\n        ) : (\n          <Button\n            prominence=\"tertiary\"\n            size=\"md\"\n            onClick={onToggle}\n            icon={isExpanded ? SvgFold : SvgExpand}\n            aria-label={isExpanded ? \"Collapse timeline\" : \"Expand timeline\"}\n            aria-expanded={isExpanded}\n          />\n        ))}\n    </>\n  );\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/__tests__/testHelpers.ts",
    "content": "/**\n * Shared test helpers for packet processing tests\n */\nimport {\n  Packet,\n  PacketType,\n  Placement,\n  StopReason,\n} from \"@/app/app/services/streamingModels\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\n\n// Core packet factory\nexport function createPacket(\n  type: PacketType,\n  placement: Partial<Placement> = {},\n  objOverrides: Record<string, unknown> = {}\n): Packet {\n  return {\n    placement: {\n      turn_index: 0,\n      tab_index: 0,\n      ...placement,\n    },\n    obj: {\n      type,\n      ...objOverrides,\n    },\n  } as Packet;\n}\n\n// Stop packet\nexport function createStopPacket(\n  stopReason?: StopReason,\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.STOP, placement, {\n    stop_reason: stopReason,\n  });\n}\n\n// Branching packet\nexport function createBranchingPacket(\n  numBranches: number,\n  turnIndex: number\n): Packet {\n  return createPacket(\n    PacketType.TOP_LEVEL_BRANCHING,\n    { turn_index: turnIndex },\n    { num_parallel_branches: numBranches }\n  );\n}\n\n// Message packet\nexport function createMessageStartPacket(\n  placement: Partial<Placement> = {},\n  preAnswerProcessingSeconds?: number\n): Packet {\n  return createPacket(PacketType.MESSAGE_START, placement, {\n    id: \"msg-1\",\n    content: \"\",\n    final_documents: null,\n    ...(preAnswerProcessingSeconds !== undefined && {\n      pre_answer_processing_seconds: preAnswerProcessingSeconds,\n    }),\n  });\n}\n\n// Citation packet\nexport function createCitationPacket(\n  citationNumber: number,\n  documentId: string,\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.CITATION_INFO, placement, {\n    citation_number: citationNumber,\n    document_id: documentId,\n  });\n}\n\n// Image generation packet\nexport function createImageDeltaPacket(\n  imageCount: number,\n  placement: Partial<Placement> = {}\n): Packet {\n  const images = Array.from({ length: imageCount }, (_, i) => ({\n    file_id: `file-${i}`,\n    url: `https://example.com/image-${i}.png`,\n    revised_prompt: `Image ${i}`,\n  }));\n  return createPacket(PacketType.IMAGE_GENERATION_TOOL_DELTA, placement, {\n    images,\n  });\n}\n\n// Search Tool helpers\nexport function createSearchToolStartPacket(\n  placement: Partial<Placement> = {},\n  isInternetSearch?: boolean\n): Packet {\n  return createPacket(PacketType.SEARCH_TOOL_START, placement, {\n    ...(isInternetSearch !== undefined && {\n      is_internet_search: isInternetSearch,\n    }),\n  });\n}\n\nexport function createSearchToolQueriesPacket(\n  queries: string[],\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.SEARCH_TOOL_QUERIES_DELTA, placement, {\n    queries,\n  });\n}\n\nexport function createSearchToolDocumentsPacket(\n  documents: Partial<OnyxDocument>[],\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA, placement, {\n    documents,\n  });\n}\n\n// Fetch Tool helpers\nexport function createFetchToolStartPacket(\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.FETCH_TOOL_START, placement);\n}\n\nexport function createFetchToolUrlsPacket(\n  urls: string[],\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.FETCH_TOOL_URLS, placement, {\n    urls,\n  });\n}\n\nexport function createFetchToolDocumentsPacket(\n  documents: Partial<OnyxDocument>[],\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.FETCH_TOOL_DOCUMENTS, placement, {\n    documents,\n  });\n}\n\n// Python Tool helpers\nexport function createPythonToolStartPacket(\n  code: string,\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.PYTHON_TOOL_START, placement, {\n    code,\n  });\n}\n\nexport function createPythonToolDeltaPacket(\n  stdout: string,\n  stderr: string,\n  fileIds: string[],\n  placement: Partial<Placement> = {}\n): Packet {\n  return createPacket(PacketType.PYTHON_TOOL_DELTA, placement, {\n    stdout,\n    stderr,\n    file_ids: fileIds,\n  });\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/packetProcessor.test.ts",
    "content": "/**\n * Unit tests for packetProcessor.ts\n *\n * Tests the pure packet processing functions that handle streaming packet parsing,\n * grouping, and state management. These tests serve as documentation for the\n * packet processing logic and prevent regressions.\n */\nimport {\n  Packet,\n  PacketType,\n  StopReason,\n} from \"@/app/app/services/streamingModels\";\nimport { createInitialState, processPackets } from \"./packetProcessor\";\nimport {\n  createPacket,\n  createStopPacket,\n  createCitationPacket,\n  createBranchingPacket,\n  createMessageStartPacket,\n  createImageDeltaPacket,\n  createSearchToolStartPacket,\n  createSearchToolQueriesPacket,\n  createSearchToolDocumentsPacket,\n  createFetchToolStartPacket,\n  createFetchToolUrlsPacket,\n  createFetchToolDocumentsPacket,\n  createPythonToolStartPacket,\n  createPythonToolDeltaPacket,\n} from \"./__tests__/testHelpers\";\n\n// ============================================================================\n// Tests\n// ============================================================================\n\ndescribe(\"packetProcessor\", () => {\n  describe(\"createInitialState\", () => {\n    test(\"creates state with correct nodeId\", () => {\n      const state = createInitialState(123);\n      expect(state.nodeId).toBe(123);\n    });\n\n    test(\"initializes nextPacketIndex to 0\", () => {\n      const state = createInitialState(1);\n      expect(state.nextPacketIndex).toBe(0);\n    });\n\n    test(\"initializes empty citations array\", () => {\n      const state = createInitialState(1);\n      expect(state.citations).toEqual([]);\n    });\n\n    test(\"initializes empty seenCitationDocIds set\", () => {\n      const state = createInitialState(1);\n      expect(state.seenCitationDocIds.size).toBe(0);\n    });\n\n    test(\"initializes empty citationMap\", () => {\n      const state = createInitialState(1);\n      expect(state.citationMap).toEqual({});\n    });\n\n    test(\"initializes empty documentMap\", () => {\n      const state = createInitialState(1);\n      expect(state.documentMap.size).toBe(0);\n    });\n\n    test(\"initializes empty groupedPacketsMap\", () => {\n      const state = createInitialState(1);\n      expect(state.groupedPacketsMap.size).toBe(0);\n    });\n\n    test(\"initializes finalAnswerComing to false\", () => {\n      const state = createInitialState(1);\n      expect(state.finalAnswerComing).toBe(false);\n    });\n\n    test(\"initializes stopPacketSeen to false\", () => {\n      const state = createInitialState(1);\n      expect(state.stopPacketSeen).toBe(false);\n    });\n\n    test(\"initializes empty toolGroups array\", () => {\n      const state = createInitialState(1);\n      expect(state.toolGroups).toEqual([]);\n    });\n\n    test(\"initializes empty potentialDisplayGroups array\", () => {\n      const state = createInitialState(1);\n      expect(state.potentialDisplayGroups).toEqual([]);\n    });\n  });\n\n  describe(\"processPackets - basic behavior\", () => {\n    test(\"processes only new packets on subsequent calls\", () => {\n      const state = createInitialState(1);\n\n      // First call with 2 packets\n      const packets1 = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result1 = processPackets(state, packets1);\n      expect(result1.nextPacketIndex).toBe(2);\n\n      // Second call with 4 packets (2 new)\n      const packets2 = [\n        ...packets1,\n        createMessageStartPacket({ turn_index: 1 }),\n        createStopPacket(),\n      ];\n      const result2 = processPackets(result1, packets2);\n      expect(result2.nextPacketIndex).toBe(4);\n    });\n\n    test(\"skips null packets\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        null as unknown as Packet,\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // Should process valid packets without error\n      expect(result.nextPacketIndex).toBe(3);\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"skips undefined packets\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        undefined as unknown as Packet,\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.nextPacketIndex).toBe(3);\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"does not rebuild result arrays when no new packets\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const result1 = processPackets(state, packets);\n      const toolGroups1 = result1.toolGroups;\n\n      // Process same packets again\n      const result2 = processPackets(result1, packets);\n\n      // Same reference since no new packets processed\n      expect(result2.toolGroups).toBe(toolGroups1);\n    });\n  });\n\n  describe(\"processPackets - stream reset detection\", () => {\n    test(\"resets state when packets array shrinks\", () => {\n      const state = createInitialState(1);\n\n      // Process 5 packets\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createSearchToolQueriesPacket([\"query1\"], { turn_index: 0 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-1\" }], {\n          turn_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n      const result1 = processPackets(state, packets);\n      expect(result1.nextPacketIndex).toBe(5);\n      expect(result1.documentMap.size).toBe(1);\n\n      // Process with shorter array (simulating reset)\n      const shorterPackets = [createSearchToolStartPacket({ turn_index: 0 })];\n      const result2 = processPackets(result1, shorterPackets);\n\n      // State should be reset\n      expect(result2.nextPacketIndex).toBe(1);\n      expect(result2.documentMap.size).toBe(0);\n    });\n\n    test(\"preserves nodeId after reset\", () => {\n      const state = createInitialState(42);\n\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result1 = processPackets(state, packets);\n\n      // Shrink array to trigger reset\n      const shorterPackets = [createSearchToolStartPacket({ turn_index: 0 })];\n      const result2 = processPackets(result1, shorterPackets);\n\n      expect(result2.nodeId).toBe(42);\n    });\n  });\n\n  describe(\"packet grouping\", () => {\n    test(\"groups packets by turn_index-tab_index key\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolQueriesPacket([\"query\"], {\n          turn_index: 0,\n          tab_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupedPacketsMap.has(\"0-0\")).toBe(true);\n      expect(result.groupedPacketsMap.get(\"0-0\")?.length).toBe(3);\n    });\n\n    test(\"separates packets with different turn_index\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 1 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupedPacketsMap.has(\"0-0\")).toBe(true);\n      expect(result.groupedPacketsMap.has(\"1-0\")).toBe(true);\n    });\n\n    test(\"separates packets with different tab_index (parallel tools)\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupedPacketsMap.has(\"0-0\")).toBe(true);\n      expect(result.groupedPacketsMap.has(\"0-1\")).toBe(true);\n      expect(result.groupedPacketsMap.get(\"0-0\")?.length).toBe(2);\n      expect(result.groupedPacketsMap.get(\"0-1\")?.length).toBe(2);\n    });\n  });\n\n  describe(\"group categorization\", () => {\n    test(\"categorization happens only on first packet of group\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        // These delta packets should not affect categorization\n        createSearchToolQueriesPacket([\"query\"], { turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n      expect(result.toolGroupKeys.size).toBe(1);\n    });\n\n    // Parameterized tests for tool packet types\n    test.each([\n      [PacketType.SEARCH_TOOL_START, \"SEARCH_TOOL_START\"],\n      [PacketType.PYTHON_TOOL_START, \"PYTHON_TOOL_START\"],\n      [PacketType.FETCH_TOOL_START, \"FETCH_TOOL_START\"],\n      [PacketType.CUSTOM_TOOL_START, \"CUSTOM_TOOL_START\"],\n      [PacketType.FILE_READER_START, \"FILE_READER_START\"],\n      [PacketType.REASONING_START, \"REASONING_START\"],\n      [PacketType.DEEP_RESEARCH_PLAN_START, \"DEEP_RESEARCH_PLAN_START\"],\n      [PacketType.RESEARCH_AGENT_START, \"RESEARCH_AGENT_START\"],\n    ])(\"%s categorizes as tool group\", (packetType) => {\n      const state = createInitialState(1);\n      const packets = [createPacket(packetType, { turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    // Parameterized tests for display packet types\n    test.each([\n      [PacketType.MESSAGE_START, \"MESSAGE_START\"],\n      [PacketType.IMAGE_GENERATION_TOOL_START, \"IMAGE_GENERATION_TOOL_START\"],\n    ])(\"%s categorizes as display group\", (packetType) => {\n      const state = createInitialState(1);\n      const packets = [createPacket(packetType, { turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.displayGroupKeys.has(\"0-0\")).toBe(true);\n    });\n  });\n\n  describe(\"SECTION_END and ERROR tracking\", () => {\n    test(\"tracks SECTION_END in groupKeysWithSectionEnd\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"tracks ERROR as completion marker\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(\n          PacketType.ERROR,\n          { turn_index: 0 },\n          { message: \"Failed\" }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n  });\n\n  describe(\"handleTopLevelBranching\", () => {\n    test(\"stores expected branch count in expectedBranches map\", () => {\n      const state = createInitialState(1);\n      const packets = [createBranchingPacket(3, 0)];\n      const result = processPackets(state, packets);\n\n      expect(result.expectedBranches.get(0)).toBe(3);\n    });\n\n    test(\"does not add branching packet to any group\", () => {\n      const state = createInitialState(1);\n      const packets = [createBranchingPacket(2, 0)];\n      const result = processPackets(state, packets);\n\n      expect(result.groupedPacketsMap.size).toBe(0);\n    });\n\n    test(\"handles multiple branching packets at different turns\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createBranchingPacket(2, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createBranchingPacket(3, 1),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.expectedBranches.get(0)).toBe(2);\n      expect(result.expectedBranches.get(1)).toBe(3);\n    });\n  });\n\n  describe(\"handleTurnTransition\", () => {\n    test(\"injects SECTION_END when turn_index changes\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        // No explicit SECTION_END before turn change\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // SECTION_END should be injected for turn 0\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"does not inject SECTION_END when only tab_index changes\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // No SECTION_END should be injected for parallel tools\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(false);\n      expect(result.groupKeysWithSectionEnd.has(\"0-1\")).toBe(false);\n    });\n\n    test(\"does not inject duplicate SECTION_END\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      const sectionEndCount =\n        group?.filter((p) => p.obj.type === PacketType.SECTION_END).length ?? 0;\n      expect(sectionEndCount).toBe(1);\n    });\n\n    test(\"injects SECTION_END for all previous groups on turn change\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createBranchingPacket(2, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        // Turn changes\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n      expect(result.groupKeysWithSectionEnd.has(\"0-1\")).toBe(true);\n    });\n  });\n\n  describe(\"Search Tool flow\", () => {\n    test(\"SEARCH_TOOL_START categorizes group as tool\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"SEARCH_TOOL_START with is_internet_search=true\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 }, true)];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect(\n        (group?.[0]?.obj as { is_internet_search?: boolean }).is_internet_search\n      ).toBe(true);\n    });\n\n    test(\"SEARCH_TOOL_START with is_internet_search=false\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 }, false)];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect(\n        (group?.[0]?.obj as { is_internet_search?: boolean }).is_internet_search\n      ).toBe(false);\n    });\n\n    test(\"SEARCH_TOOL_QUERIES_DELTA stores queries in packet\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createSearchToolQueriesPacket([\"what is AI\", \"machine learning\"], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { queries: string[] }).queries).toEqual([\n        \"what is AI\",\n        \"machine learning\",\n      ]);\n    });\n\n    test(\"SEARCH_TOOL_DOCUMENTS_DELTA extracts documents to documentMap\", () => {\n      const state = createInitialState(1);\n      const docs = [\n        { document_id: \"doc-1\", semantic_identifier: \"Doc 1\" },\n        { document_id: \"doc-2\", semantic_identifier: \"Doc 2\" },\n      ];\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createSearchToolDocumentsPacket(docs, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.documentMap.get(\"doc-1\")).toBeDefined();\n      expect(result.documentMap.get(\"doc-2\")).toBeDefined();\n    });\n\n    test(\"full search flow: START -> QUERIES -> DOCUMENTS -> SECTION_END\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }, false),\n        createSearchToolQueriesPacket([\"test query\"], { turn_index: 0 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-1\" }], {\n          turn_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(1);\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n      expect(result.documentMap.has(\"doc-1\")).toBe(true);\n    });\n\n    test(\"multiple QUERIES_DELTA packets accumulate\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createSearchToolQueriesPacket([\"query 1\"], { turn_index: 0 }),\n        createSearchToolQueriesPacket([\"query 2\", \"query 3\"], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect(group?.length).toBe(3);\n    });\n\n    test(\"multiple DOCUMENTS_DELTA packets accumulate documents\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-1\" }], {\n          turn_index: 0,\n        }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-2\" }], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.documentMap.has(\"doc-1\")).toBe(true);\n      expect(result.documentMap.has(\"doc-2\")).toBe(true);\n    });\n\n    test(\"SEARCH_TOOL_START resets finalAnswerComing if after message\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        // Tool comes after message (Claude workaround)\n        createSearchToolStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // finalAnswerComing should be reset since tool follows message\n      expect(result.finalAnswerComing).toBe(false);\n    });\n\n    test(\"parallel search tools at same turn_index with different tab_index\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createBranchingPacket(2, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-a\" }], {\n          turn_index: 0,\n          tab_index: 0,\n        }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-b\" }], {\n          turn_index: 0,\n          tab_index: 1,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.expectedBranches.get(0)).toBe(2);\n      expect(result.toolGroups.length).toBe(2);\n      expect(result.documentMap.has(\"doc-a\")).toBe(true);\n      expect(result.documentMap.has(\"doc-b\")).toBe(true);\n    });\n  });\n\n  describe(\"Fetch Tool flow\", () => {\n    test(\"FETCH_TOOL_START categorizes group as tool\", () => {\n      const state = createInitialState(1);\n      const packets = [createFetchToolStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"FETCH_TOOL_URLS stores urls in packet\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolUrlsPacket([\"https://example.com\", \"https://test.com\"], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { urls: string[] }).urls).toEqual([\n        \"https://example.com\",\n        \"https://test.com\",\n      ]);\n    });\n\n    test(\"FETCH_TOOL_DOCUMENTS extracts documents to documentMap\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolDocumentsPacket([{ document_id: \"fetched-doc-1\" }], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.documentMap.has(\"fetched-doc-1\")).toBe(true);\n    });\n\n    test(\"full fetch flow: START -> URLS -> DOCUMENTS -> SECTION_END\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolUrlsPacket([\"https://example.com\"], { turn_index: 0 }),\n        createFetchToolDocumentsPacket([{ document_id: \"url-doc\" }], {\n          turn_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(1);\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"multiple URLs in single FETCH_TOOL_URLS packet\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolUrlsPacket(\n          [\"https://a.com\", \"https://b.com\", \"https://c.com\"],\n          { turn_index: 0 }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { urls: string[] }).urls.length).toBe(3);\n    });\n\n    test(\"empty urls array handling\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolUrlsPacket([], { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { urls: string[] }).urls).toEqual([]);\n    });\n\n    test(\"FETCH_TOOL_START resets finalAnswerComing if after message\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createFetchToolStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(false);\n    });\n\n    test(\"fetch tool with ERROR instead of SECTION_END\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createFetchToolStartPacket({ turn_index: 0 }),\n        createFetchToolUrlsPacket([\"https://invalid.com\"], { turn_index: 0 }),\n        createPacket(\n          PacketType.ERROR,\n          { turn_index: 0 },\n          { error: \"Failed to fetch\" }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      // ERROR counts as section end\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n  });\n\n  describe(\"Python Tool flow\", () => {\n    test(\"PYTHON_TOOL_START categorizes group as tool\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"print('hello')\", { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"PYTHON_TOOL_START stores code in packet\", () => {\n      const state = createInitialState(1);\n      const code = \"import pandas as pd\\ndf = pd.read_csv('data.csv')\";\n      const packets = [createPythonToolStartPacket(code, { turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[0]?.obj as { code: string }).code).toBe(code);\n    });\n\n    test(\"PYTHON_TOOL_DELTA stores stdout/stderr/file_ids\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"print('test')\", { turn_index: 0 }),\n        createPythonToolDeltaPacket(\"test\\n\", \"\", [], { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      const delta = group?.[1]?.obj as {\n        stdout: string;\n        stderr: string;\n        file_ids: string[];\n      };\n      expect(delta.stdout).toBe(\"test\\n\");\n      expect(delta.stderr).toBe(\"\");\n    });\n\n    test(\"PYTHON_TOOL_DELTA with file_ids (generated files)\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"plt.savefig('chart.png')\", {\n          turn_index: 0,\n        }),\n        createPythonToolDeltaPacket(\"\", \"\", [\"file-123\", \"file-456\"], {\n          turn_index: 0,\n        }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { file_ids: string[] }).file_ids).toEqual([\n        \"file-123\",\n        \"file-456\",\n      ]);\n    });\n\n    test(\"multiple DELTA packets (streaming output)\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"for i in range(3): print(i)\", {\n          turn_index: 0,\n        }),\n        createPythonToolDeltaPacket(\"0\\n\", \"\", [], { turn_index: 0 }),\n        createPythonToolDeltaPacket(\"1\\n\", \"\", [], { turn_index: 0 }),\n        createPythonToolDeltaPacket(\"2\\n\", \"\", [], { turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect(group?.length).toBe(5); // START + 3 DELTAs + SECTION_END\n    });\n\n    test(\"python tool with stderr (error output)\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"undefined_var\", { turn_index: 0 }),\n        createPythonToolDeltaPacket(\n          \"\",\n          \"NameError: name 'undefined_var' is not defined\",\n          [],\n          { turn_index: 0 }\n        ),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const group = result.groupedPacketsMap.get(\"0-0\");\n      expect((group?.[1]?.obj as { stderr: string }).stderr).toContain(\n        \"NameError\"\n      );\n    });\n\n    test(\"PYTHON_TOOL_START resets finalAnswerComing if after message\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createPythonToolStartPacket(\"print(1)\", { turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(false);\n    });\n\n    test(\"python tool with ERROR instead of SECTION_END\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPythonToolStartPacket(\"crash()\", { turn_index: 0 }),\n        createPacket(\n          PacketType.ERROR,\n          { turn_index: 0 },\n          { message: \"Execution failed\" }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n  });\n\n  describe(\"handleStreamingStatusPacket\", () => {\n    test(\"sets finalAnswerComing on MESSAGE_START\", () => {\n      const state = createInitialState(1);\n      const packets = [createMessageStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"sets finalAnswerComing on MESSAGE_DELTA\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createPacket(\n          PacketType.MESSAGE_DELTA,\n          { turn_index: 0 },\n          { content: \"Hello\" }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"sets finalAnswerComing on IMAGE_GENERATION_TOOL_START\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"captures toolProcessingDuration from MESSAGE_START\", () => {\n      const state = createInitialState(1);\n      const packets = [createMessageStartPacket({ turn_index: 0 }, 2.5)];\n      const result = processPackets(state, packets);\n\n      expect(result.toolProcessingDuration).toBe(2.5);\n    });\n  });\n\n  describe(\"handleStopPacket\", () => {\n    test(\"sets stopPacketSeen to true\", () => {\n      const state = createInitialState(1);\n      const packets = [createStopPacket()];\n      const result = processPackets(state, packets);\n\n      expect(result.stopPacketSeen).toBe(true);\n    });\n\n    test(\"stores stop reason\", () => {\n      const state = createInitialState(1);\n      const packets = [createStopPacket(StopReason.FINISHED)];\n      const result = processPackets(state, packets);\n\n      expect(result.stopReason).toBe(StopReason.FINISHED);\n    });\n\n    test(\"injects SECTION_END for all incomplete groups\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        // No explicit SECTION_END\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.groupKeysWithSectionEnd.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"does not process duplicate STOP packets\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createStopPacket(StopReason.FINISHED),\n        createStopPacket(StopReason.USER_CANCELLED),\n      ];\n      const result = processPackets(state, packets);\n\n      // First stop reason should be preserved\n      expect(result.stopReason).toBe(StopReason.FINISHED);\n    });\n  });\n\n  describe(\"handleToolAfterMessagePacket\", () => {\n    test(\"resets finalAnswerComing when actual tool follows message\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(false);\n    });\n\n    test(\"REASONING_START does NOT reset finalAnswerComing (critical fix)\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.REASONING_START, { turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // Reasoning is just thinking, not an actual tool call\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"REASONING_DELTA does NOT reset finalAnswerComing\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createPacket(\n          PacketType.REASONING_DELTA,\n          { turn_index: 1 },\n          { reasoning: \"thinking...\" }\n        ),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"does not reset finalAnswerComing if stopPacketSeen\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createStopPacket(),\n        createSearchToolStartPacket({ turn_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      // Stop already seen, so finalAnswerComing should remain true\n      expect(result.finalAnswerComing).toBe(true);\n    });\n  });\n\n  describe(\"image generation counting\", () => {\n    test(\"sets isGeneratingImage on IMAGE_GENERATION_TOOL_START\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.isGeneratingImage).toBe(true);\n    });\n\n    test(\"counts images from IMAGE_GENERATION_TOOL_DELTA\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n        createImageDeltaPacket(2, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.generatedImageCount).toBe(2);\n    });\n\n    test(\"accumulates image count from multiple DELTA packets\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n        createImageDeltaPacket(1, { turn_index: 0 }),\n        createImageDeltaPacket(2, { turn_index: 0 }),\n        createImageDeltaPacket(1, { turn_index: 0 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.generatedImageCount).toBe(4);\n    });\n  });\n\n  describe(\"buildGroupsFromKeys\", () => {\n    test(\"filters out groups without content packets\", () => {\n      const state = createInitialState(1);\n      // Create a group with only SECTION_END (no content packet)\n      const packets = [createPacket(PacketType.SECTION_END, { turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      // Group should exist in map but not in result arrays\n      expect(result.groupedPacketsMap.has(\"0-0\")).toBe(true);\n      expect(result.toolGroups.length).toBe(0);\n      expect(result.potentialDisplayGroups.length).toBe(0);\n    });\n\n    test(\"sorts groups by turn_index then tab_index\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 1, tab_index: 1 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 1, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n      ];\n      const result = processPackets(state, packets);\n\n      const keys = result.toolGroups.map(\n        (g) => `${g.turn_index}-${g.tab_index}`\n      );\n      expect(keys).toEqual([\"0-0\", \"0-1\", \"1-0\", \"1-1\"]);\n    });\n\n    test(\"creates new packet array references (immutability)\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      const mapPackets = result.groupedPacketsMap.get(\"0-0\");\n      const resultPackets = result.toolGroups[0]?.packets;\n\n      // Should be different array references\n      expect(resultPackets).not.toBe(mapPackets);\n      // But same content\n      expect(resultPackets).toEqual(mapPackets);\n    });\n\n    test(\"includes groups with MESSAGE_START as content\", () => {\n      const state = createInitialState(1);\n      const packets = [createMessageStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.potentialDisplayGroups.length).toBe(1);\n    });\n\n    test(\"includes groups with SEARCH_TOOL_START as content\", () => {\n      const state = createInitialState(1);\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"multi-tool scenarios\", () => {\n    test(\"Search + Python + Fetch in same conversation\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        // Turn 0: Search\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolDocumentsPacket([{ document_id: \"search-doc\" }], {\n          turn_index: 0,\n          tab_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),\n\n        // Turn 1: Python\n        createPythonToolStartPacket(\"analyze()\", {\n          turn_index: 1,\n          tab_index: 0,\n        }),\n        createPythonToolDeltaPacket(\"Result: 42\", \"\", [], {\n          turn_index: 1,\n          tab_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 1, tab_index: 0 }),\n\n        // Turn 2: Fetch\n        createFetchToolStartPacket({ turn_index: 2, tab_index: 0 }),\n        createFetchToolUrlsPacket([\"https://api.example.com\"], {\n          turn_index: 2,\n          tab_index: 0,\n        }),\n        createFetchToolDocumentsPacket([{ document_id: \"fetch-doc\" }], {\n          turn_index: 2,\n          tab_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 2, tab_index: 0 }),\n\n        // Turn 3: Final answer\n        createMessageStartPacket({ turn_index: 3, tab_index: 0 }),\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(3);\n      expect(result.potentialDisplayGroups.length).toBe(1);\n      expect(result.documentMap.has(\"search-doc\")).toBe(true);\n      expect(result.documentMap.has(\"fetch-doc\")).toBe(true);\n      expect(result.finalAnswerComing).toBe(true);\n      expect(result.stopPacketSeen).toBe(true);\n    });\n\n    test(\"parallel search tools then message\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createBranchingPacket(3, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 2 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-0\" }], {\n          turn_index: 0,\n          tab_index: 0,\n        }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-1\" }], {\n          turn_index: 0,\n          tab_index: 1,\n        }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-2\" }], {\n          turn_index: 0,\n          tab_index: 2,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 2 }),\n        createMessageStartPacket({ turn_index: 1 }),\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(3);\n      expect(result.expectedBranches.get(0)).toBe(3);\n      expect(result.documentMap.size).toBe(3);\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"tool-after-message Claude workaround scenario\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        // Claude sends message first\n        createMessageStartPacket({ turn_index: 0 }),\n        createPacket(\n          PacketType.MESSAGE_DELTA,\n          { turn_index: 0 },\n          { content: \"Let me search for that...\" }\n        ),\n        // Then tool is called (this is the workaround case)\n        createSearchToolStartPacket({ turn_index: 1 }),\n        createSearchToolDocumentsPacket([{ document_id: \"doc-1\" }], {\n          turn_index: 1,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 1 }),\n        // Then actual final answer\n        createMessageStartPacket({ turn_index: 2 }),\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(1);\n      expect(result.potentialDisplayGroups.length).toBe(2);\n      expect(result.finalAnswerComing).toBe(true);\n    });\n\n    test(\"image generation flow\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n        createImageDeltaPacket(1, { turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.isGeneratingImage).toBe(true);\n      expect(result.generatedImageCount).toBe(1);\n      expect(result.finalAnswerComing).toBe(true);\n      expect(result.displayGroupKeys.has(\"0-0\")).toBe(true);\n    });\n\n    test(\"deep research with sub-agents\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createPacket(PacketType.DEEP_RESEARCH_PLAN_START, { turn_index: 0 }),\n        createPacket(\n          PacketType.DEEP_RESEARCH_PLAN_DELTA,\n          { turn_index: 0 },\n          { content: \"Plan...\" }\n        ),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createPacket(\n          PacketType.RESEARCH_AGENT_START,\n          { turn_index: 1 },\n          { research_task: \"Research topic A\" }\n        ),\n        createSearchToolStartPacket({ turn_index: 1, sub_turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, {\n          turn_index: 1,\n          sub_turn_index: 0,\n        }),\n        createPacket(PacketType.SECTION_END, { turn_index: 1 }),\n        createMessageStartPacket({ turn_index: 2 }),\n        createStopPacket(),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroups.length).toBe(2); // Plan + Research agent\n      expect(result.potentialDisplayGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"edge cases\", () => {\n    test(\"handles empty packets array\", () => {\n      const state = createInitialState(1);\n      const result = processPackets(state, []);\n\n      expect(result.nextPacketIndex).toBe(0);\n      expect(result.toolGroups).toEqual([]);\n    });\n\n    test(\"handles sparse packets array\", () => {\n      const state = createInitialState(1);\n      const packets: Packet[] = [];\n      packets[0] = createSearchToolStartPacket({ turn_index: 0 });\n      packets[5] = createPacket(PacketType.SECTION_END, { turn_index: 0 });\n\n      const result = processPackets(state, packets);\n\n      // Should handle sparse array\n      expect(result.nextPacketIndex).toBe(6);\n    });\n\n    test(\"handles large turn indices\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 9999 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 9999 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"9999-0\")).toBe(true);\n    });\n\n    test(\"handles large tab indices\", () => {\n      const state = createInitialState(1);\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 999 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 999 }),\n      ];\n      const result = processPackets(state, packets);\n\n      expect(result.toolGroupKeys.has(\"0-999\")).toBe(true);\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/packetProcessor.ts",
    "content": "import {\n  Packet,\n  PacketType,\n  StreamingCitation,\n  StopReason,\n  CitationInfo,\n  SearchToolDocumentsDelta,\n  FetchToolDocuments,\n  TopLevelBranching,\n  Stop,\n  ImageGenerationToolDelta,\n  MessageStart,\n  ToolCallArgumentDelta,\n  CODE_INTERPRETER_TOOL_TYPES,\n} from \"@/app/app/services/streamingModels\";\nimport { CitationMap } from \"@/app/app/interfaces\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport {\n  isActualToolCallPacket,\n  isToolPacket,\n  isDisplayPacket,\n} from \"@/app/app/services/packetUtils\";\nimport { parseToolKey } from \"@/app/app/message/messageComponents/toolDisplayHelpers\";\n\n// Re-export parseToolKey for consumers that import from this module\nexport { parseToolKey };\n\n// ============================================================================\n// Types\n// ============================================================================\n\nexport interface ProcessorState {\n  nodeId: number;\n  nextPacketIndex: number;\n\n  // Citations\n  citations: StreamingCitation[];\n  seenCitationDocIds: Set<string>;\n  citationMap: CitationMap;\n\n  // Documents\n  documentMap: Map<string, OnyxDocument>;\n\n  // Packet grouping\n  groupedPacketsMap: Map<string, Packet[]>;\n  seenGroupKeys: Set<string>;\n  groupKeysWithSectionEnd: Set<string>;\n  expectedBranches: Map<number, number>;\n\n  // Pre-categorized groups (populated during packet processing)\n  toolGroupKeys: Set<string>;\n  displayGroupKeys: Set<string>;\n\n  // Image generation status\n  isGeneratingImage: boolean;\n  generatedImageCount: number;\n\n  // Streaming status\n  finalAnswerComing: boolean;\n  stopPacketSeen: boolean;\n  stopReason: StopReason | undefined;\n\n  // Tool processing duration from backend (captured when MESSAGE_START arrives)\n  toolProcessingDuration: number | undefined;\n\n  // Result arrays (built at end of processPackets)\n  toolGroups: GroupedPacket[];\n  potentialDisplayGroups: GroupedPacket[];\n}\n\nexport interface GroupedPacket {\n  turn_index: number;\n  tab_index: number;\n  packets: Packet[];\n}\n\n// ============================================================================\n// State Creation\n// ============================================================================\n\nexport function createInitialState(nodeId: number): ProcessorState {\n  return {\n    nodeId,\n    nextPacketIndex: 0,\n    citations: [],\n    seenCitationDocIds: new Set(),\n    citationMap: {},\n    documentMap: new Map(),\n    groupedPacketsMap: new Map(),\n    seenGroupKeys: new Set(),\n    groupKeysWithSectionEnd: new Set(),\n    expectedBranches: new Map(),\n    toolGroupKeys: new Set(),\n    displayGroupKeys: new Set(),\n    isGeneratingImage: false,\n    generatedImageCount: 0,\n    finalAnswerComing: false,\n    stopPacketSeen: false,\n    stopReason: undefined,\n    toolProcessingDuration: undefined,\n    toolGroups: [],\n    potentialDisplayGroups: [],\n  };\n}\n\n// ============================================================================\n// Helper Functions\n// ============================================================================\n\nfunction getGroupKey(packet: Packet): string {\n  const turnIndex = packet.placement.turn_index;\n  const tabIndex = packet.placement.tab_index ?? 0;\n  return `${turnIndex}-${tabIndex}`;\n}\n\nfunction injectSectionEnd(state: ProcessorState, groupKey: string): void {\n  if (state.groupKeysWithSectionEnd.has(groupKey)) {\n    return; // Already has SECTION_END\n  }\n\n  const { turn_index, tab_index } = parseToolKey(groupKey);\n\n  const syntheticPacket: Packet = {\n    placement: { turn_index, tab_index },\n    obj: { type: PacketType.SECTION_END },\n  };\n\n  const existingGroup = state.groupedPacketsMap.get(groupKey);\n  if (existingGroup) {\n    existingGroup.push(syntheticPacket);\n  }\n  state.groupKeysWithSectionEnd.add(groupKey);\n}\n\n/**\n * Content packet types that indicate a group has meaningful content to display\n */\nconst CONTENT_PACKET_TYPES_SET = new Set<PacketType>([\n  PacketType.MESSAGE_START,\n  PacketType.SEARCH_TOOL_START,\n  PacketType.IMAGE_GENERATION_TOOL_START,\n  PacketType.PYTHON_TOOL_START,\n  PacketType.TOOL_CALL_ARGUMENT_DELTA,\n  PacketType.CUSTOM_TOOL_START,\n  PacketType.FILE_READER_START,\n  PacketType.FETCH_TOOL_START,\n  PacketType.MEMORY_TOOL_START,\n  PacketType.MEMORY_TOOL_NO_ACCESS,\n  PacketType.REASONING_START,\n  PacketType.DEEP_RESEARCH_PLAN_START,\n  PacketType.RESEARCH_AGENT_START,\n]);\n\nfunction hasContentPackets(packets: Packet[]): boolean {\n  return packets.some((packet) => {\n    const type = packet.obj.type as PacketType;\n    if (type === PacketType.TOOL_CALL_ARGUMENT_DELTA) {\n      return (\n        (packet.obj as ToolCallArgumentDelta).tool_type ===\n        CODE_INTERPRETER_TOOL_TYPES.PYTHON\n      );\n    }\n    return CONTENT_PACKET_TYPES_SET.has(type);\n  });\n}\n\n/**\n * Packet types that indicate final answer content is coming\n */\nconst FINAL_ANSWER_PACKET_TYPES_SET = new Set<PacketType>([\n  PacketType.MESSAGE_START,\n  PacketType.MESSAGE_DELTA,\n  PacketType.IMAGE_GENERATION_TOOL_START,\n  PacketType.IMAGE_GENERATION_TOOL_DELTA,\n]);\n\n// ============================================================================\n// Packet Handlers\n// ============================================================================\n\nfunction handleTopLevelBranching(state: ProcessorState, packet: Packet): void {\n  const branchingPacket = packet.obj as TopLevelBranching;\n  state.expectedBranches.set(\n    packet.placement.turn_index,\n    branchingPacket.num_parallel_branches\n  );\n}\n\nfunction handleTurnTransition(state: ProcessorState, packet: Packet): void {\n  const currentTurnIndex = packet.placement.turn_index;\n\n  // Get all previous turn indices from seen group keys\n  const previousTurnIndices = new Set(\n    Array.from(state.seenGroupKeys).map((key) => parseToolKey(key).turn_index)\n  );\n\n  const isNewTurnIndex = !previousTurnIndices.has(currentTurnIndex);\n\n  // If we see a new turn_index (not just tab_index), inject SECTION_END for previous groups\n  if (isNewTurnIndex && state.seenGroupKeys.size > 0) {\n    state.seenGroupKeys.forEach((prevGroupKey) => {\n      if (!state.groupKeysWithSectionEnd.has(prevGroupKey)) {\n        injectSectionEnd(state, prevGroupKey);\n      }\n    });\n  }\n}\n\nfunction handleCitationPacket(state: ProcessorState, packet: Packet): void {\n  if (packet.obj.type !== PacketType.CITATION_INFO) {\n    return;\n  }\n\n  const citationInfo = packet.obj as CitationInfo;\n\n  // Add to citation map immediately for rendering\n  state.citationMap[citationInfo.citation_number] = citationInfo.document_id;\n\n  // Also add to citations array for CitedSourcesToggle (deduplicated)\n  if (!state.seenCitationDocIds.has(citationInfo.document_id)) {\n    state.seenCitationDocIds.add(citationInfo.document_id);\n    state.citations.push({\n      citation_num: citationInfo.citation_number,\n      document_id: citationInfo.document_id,\n    });\n  }\n}\n\nfunction handleDocumentPacket(state: ProcessorState, packet: Packet): void {\n  if (packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA) {\n    const docDelta = packet.obj as SearchToolDocumentsDelta;\n    if (docDelta.documents) {\n      for (const doc of docDelta.documents) {\n        if (doc.document_id) {\n          state.documentMap.set(doc.document_id, doc);\n        }\n      }\n    }\n  } else if (packet.obj.type === PacketType.FETCH_TOOL_DOCUMENTS) {\n    const fetchDocuments = packet.obj as FetchToolDocuments;\n    if (fetchDocuments.documents) {\n      for (const doc of fetchDocuments.documents) {\n        if (doc.document_id) {\n          state.documentMap.set(doc.document_id, doc);\n        }\n      }\n    }\n  }\n}\n\nfunction handleStreamingStatusPacket(\n  state: ProcessorState,\n  packet: Packet\n): void {\n  // Check if final answer is coming\n  if (FINAL_ANSWER_PACKET_TYPES_SET.has(packet.obj.type as PacketType)) {\n    state.finalAnswerComing = true;\n  }\n\n  // Capture pre-answer processing time from MESSAGE_START packet\n  if (packet.obj.type === PacketType.MESSAGE_START) {\n    const messageStart = packet.obj as MessageStart;\n    if (messageStart.pre_answer_processing_seconds !== undefined) {\n      state.toolProcessingDuration = messageStart.pre_answer_processing_seconds;\n    }\n  }\n}\n\nfunction handleStopPacket(state: ProcessorState, packet: Packet): void {\n  if (packet.obj.type !== PacketType.STOP || state.stopPacketSeen) {\n    return;\n  }\n\n  state.stopPacketSeen = true;\n\n  // Extract and store the stop reason\n  const stopPacket = packet.obj as Stop;\n  state.stopReason = stopPacket.stop_reason;\n\n  // Inject SECTION_END for all group keys that don't have one\n  state.seenGroupKeys.forEach((groupKey) => {\n    if (!state.groupKeysWithSectionEnd.has(groupKey)) {\n      injectSectionEnd(state, groupKey);\n    }\n  });\n}\n\nfunction handleToolAfterMessagePacket(\n  state: ProcessorState,\n  packet: Packet\n): void {\n  // Handles case where we get a Message packet from Claude, and then tool\n  // calling packets. We use isActualToolCallPacket instead of isToolPacket\n  // to exclude reasoning packets - reasoning is just the model thinking,\n  // not an actual tool call that would produce new content.\n  if (\n    state.finalAnswerComing &&\n    !state.stopPacketSeen &&\n    isActualToolCallPacket(packet)\n  ) {\n    state.finalAnswerComing = false;\n  }\n}\n\nfunction addPacketToGroup(\n  state: ProcessorState,\n  packet: Packet,\n  groupKey: string\n): void {\n  const existingGroup = state.groupedPacketsMap.get(groupKey);\n  if (existingGroup) {\n    existingGroup.push(packet);\n  } else {\n    state.groupedPacketsMap.set(groupKey, [packet]);\n  }\n}\n\n// ============================================================================\n// Main Processing Function\n// ============================================================================\n\nfunction processPacket(state: ProcessorState, packet: Packet): void {\n  if (!packet) return;\n\n  // Handle TopLevelBranching packets - these tell us how many parallel branches to expect\n  if (packet.obj.type === PacketType.TOP_LEVEL_BRANCHING) {\n    handleTopLevelBranching(state, packet);\n    // Don't add this packet to any group, it's just metadata\n    return;\n  }\n\n  // Handle turn transitions (inject SECTION_END for previous groups)\n  handleTurnTransition(state, packet);\n\n  // Track group key\n  const groupKey = getGroupKey(packet);\n  state.seenGroupKeys.add(groupKey);\n\n  // Track SECTION_END and ERROR packets (both indicate completion)\n  if (\n    packet.obj.type === PacketType.SECTION_END ||\n    packet.obj.type === PacketType.ERROR\n  ) {\n    state.groupKeysWithSectionEnd.add(groupKey);\n  }\n\n  // Check if this is the first packet in the group (before adding)\n  const existingGroup = state.groupedPacketsMap.get(groupKey);\n  const isFirstPacket = !existingGroup;\n\n  // Add packet to group\n  addPacketToGroup(state, packet, groupKey);\n\n  // Categorize on first packet of each group\n  if (isFirstPacket) {\n    if (isToolPacket(packet, false)) {\n      state.toolGroupKeys.add(groupKey);\n    }\n    if (isDisplayPacket(packet)) {\n      state.displayGroupKeys.add(groupKey);\n    }\n  }\n\n  // Track image generation for header display (regardless of group position)\n  if (packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START) {\n    state.isGeneratingImage = true;\n  }\n\n  // Count generated images from DELTA packets\n  if (packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_DELTA) {\n    const delta = packet.obj as ImageGenerationToolDelta;\n    state.generatedImageCount += delta.images?.length ?? 0;\n  }\n\n  // Handle specific packet types\n  handleCitationPacket(state, packet);\n  handleDocumentPacket(state, packet);\n  handleStreamingStatusPacket(state, packet);\n  handleStopPacket(state, packet);\n  handleToolAfterMessagePacket(state, packet);\n}\n\nexport function processPackets(\n  state: ProcessorState,\n  rawPackets: Packet[]\n): ProcessorState {\n  // Handle reset (packets array shrunk - upstream replaced with shorter list)\n  if (state.nextPacketIndex > rawPackets.length) {\n    state = createInitialState(state.nodeId);\n  }\n\n  // Track if we processed any new packets\n  const prevProcessedIndex = state.nextPacketIndex;\n\n  // Process only new packets\n  for (let i = state.nextPacketIndex; i < rawPackets.length; i++) {\n    const packet = rawPackets[i];\n    if (packet) {\n      processPacket(state, packet);\n    }\n  }\n\n  state.nextPacketIndex = rawPackets.length;\n\n  // Only rebuild result arrays if we processed new packets\n  // This prevents creating new references when nothing changed\n  if (prevProcessedIndex !== rawPackets.length) {\n    // Build result arrays after processing new packets\n    state.toolGroups = buildGroupsFromKeys(state, state.toolGroupKeys);\n    state.potentialDisplayGroups = buildGroupsFromKeys(\n      state,\n      state.displayGroupKeys\n    );\n  }\n\n  return state;\n}\n\n/**\n * Build GroupedPacket array from a set of group keys.\n * Filters to only include groups with meaningful content and sorts by turn/tab index.\n *\n * @example\n * // Input: state.groupedPacketsMap + keys Set\n * // ┌─────────────────────────────────────────────────────┐\n * // │ groupedPacketsMap = {                               │\n * // │   \"0-0\" → [packet1, packet2]                       │\n * // │   \"0-1\" → [packet3]                                │\n * // │   \"1-0\" → [packet4, packet5]                       │\n * // │   \"2-0\" → [empty_packet]  ← no content packets     │\n * // │ }                                                  │\n * // │ keys = Set{\"0-0\", \"0-1\", \"1-0\", \"2-0\"}             │\n * // └─────────────────────────────────────────────────────┘\n * //\n * // Step 1: Map keys → GroupedPacket (parse key, lookup packets)\n * // ┌─────────────────────────────────────────────────────┐\n * // │ \"0-0\" → { turn_index:0, tab_index:0, packets:[...] }│\n * // │ \"0-1\" → { turn_index:0, tab_index:1, packets:[...] }│\n * // │ \"1-0\" → { turn_index:1, tab_index:0, packets:[...] }│\n * // │ \"2-0\" → { turn_index:2, tab_index:0, packets:[...] }│\n * // └─────────────────────────────────────────────────────┘\n * //\n * // Step 2: Filter (hasContentPackets check)\n * // ┌─────────────────────────────────────────────────────┐\n * // │ ✓ \"0-0\" has MESSAGE_START        → keep            │\n * // │ ✓ \"0-1\" has SEARCH_TOOL_START    → keep            │\n * // │ ✓ \"1-0\" has PYTHON_TOOL_START    → keep            │\n * // │ ✗ \"2-0\" no content packets       → filtered out    │\n * // └─────────────────────────────────────────────────────┘\n * //\n * // Step 3: Sort by turn_index, then tab_index\n * // ┌─────────────────────────────────────────────────────┐\n * // │ Output: GroupedPacket[]                             │\n * // ├─────────────────────────────────────────────────────┤\n * // │ [0] turn_index=0, tab_index=0, packets=[...]       │\n * // │ [1] turn_index=0, tab_index=1, packets=[...]       │\n * // │ [2] turn_index=1, tab_index=0, packets=[...]       │\n * // └─────────────────────────────────────────────────────┘\n */\nfunction buildGroupsFromKeys(\n  state: ProcessorState,\n  keys: Set<string>\n): GroupedPacket[] {\n  return Array.from(keys)\n    .map((key) => {\n      const { turn_index, tab_index } = parseToolKey(key);\n      const packets = state.groupedPacketsMap.get(key);\n      // Spread to create new array reference - ensures React detects changes for re-renders\n      return packets ? { turn_index, tab_index, packets: [...packets] } : null;\n    })\n    .filter(\n      (g): g is GroupedPacket => g !== null && hasContentPackets(g.packets)\n    )\n    .sort((a, b) => {\n      if (a.turn_index !== b.turn_index) {\n        return a.turn_index - b.turn_index;\n      }\n      return a.tab_index - b.tab_index;\n    });\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups.test.tsx",
    "content": "/**\n * Tests for usePacedTurnGroups hook\n *\n * Tests the pacing logic that reveals steps with delays during streaming.\n * Uses @testing-library/react's renderHook with fake timers.\n */\nimport { renderHook, act } from \"@testing-library/react\";\nimport { PacketType, Packet } from \"@/app/app/services/streamingModels\";\nimport { TurnGroup, TransformedStep } from \"../transformers\";\nimport { GroupedPacket } from \"./packetProcessor\";\nimport { usePacedTurnGroups } from \"./usePacedTurnGroups\";\n\n// ============================================================================\n// Test Helpers\n// ============================================================================\n\n/**\n * Create a mock TransformedStep with a TOOL_START packet\n */\nfunction createStep(\n  turnIndex: number,\n  tabIndex: number,\n  packetType: PacketType = PacketType.SEARCH_TOOL_START\n): TransformedStep {\n  return {\n    key: `${turnIndex}-${tabIndex}`,\n    turnIndex,\n    tabIndex,\n    packets: [\n      {\n        placement: { turn_index: turnIndex, tab_index: tabIndex },\n        obj: { type: packetType },\n      } as Packet,\n    ],\n  };\n}\n\n/**\n * Create a TurnGroup from steps\n */\nfunction createTurnGroup(steps: TransformedStep[]): TurnGroup {\n  if (steps.length === 0) throw new Error(\"TurnGroup needs at least one step\");\n  return {\n    turnIndex: steps[0]!.turnIndex,\n    steps,\n    isParallel: steps.length > 1,\n  };\n}\n\n/**\n * Create a mock display group (MESSAGE_START)\n */\nfunction createDisplayGroup(turnIndex: number): GroupedPacket {\n  return {\n    turn_index: turnIndex,\n    tab_index: 0,\n    packets: [\n      {\n        placement: { turn_index: turnIndex, tab_index: 0 },\n        obj: {\n          type: PacketType.MESSAGE_START,\n          id: \"msg-1\",\n          content: \"\",\n          final_documents: null,\n        },\n      } as Packet,\n    ],\n  };\n}\n\n// ============================================================================\n// Tests\n// ============================================================================\n\ndescribe(\"usePacedTurnGroups\", () => {\n  beforeEach(() => {\n    jest.useFakeTimers();\n  });\n\n  afterEach(() => {\n    jest.useRealTimers();\n  });\n\n  describe(\"initial state\", () => {\n    test(\"returns empty arrays when no turn groups provided\", () => {\n      const { result } = renderHook(() =>\n        usePacedTurnGroups([], [], false, 1, false)\n      );\n\n      expect(result.current.pacedTurnGroups).toEqual([]);\n      expect(result.current.pacedDisplayGroups).toEqual([]);\n      expect(result.current.pacedFinalAnswerComing).toBe(false);\n    });\n  });\n\n  describe(\"bypass pacing for completed messages\", () => {\n    test(\"returns all turn groups immediately when stopPacketSeen on first render\", () => {\n      const step1 = createStep(0, 0);\n      const step2 = createStep(1, 0);\n      const turnGroups = [createTurnGroup([step1]), createTurnGroup([step2])];\n      const displayGroups = [createDisplayGroup(2)];\n\n      const { result } = renderHook(() =>\n        usePacedTurnGroups(turnGroups, displayGroups, true, 1, true)\n      );\n\n      // All steps revealed immediately - no pacing\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n      expect(result.current.pacedDisplayGroups.length).toBe(1);\n      expect(result.current.pacedFinalAnswerComing).toBe(true);\n    });\n  });\n\n  describe(\"stop packet handling\", () => {\n    test(\"flushes all pending steps when stop packet received\", () => {\n      const step1 = createStep(0, 0);\n      const step2 = createStep(1, 0);\n      const step3 = createStep(2, 0);\n\n      // Start with first step\n      const { result, rerender } = renderHook(\n        ({ turnGroups, stopPacketSeen }) =>\n          usePacedTurnGroups(turnGroups, [], stopPacketSeen, 1, false),\n        {\n          initialProps: {\n            turnGroups: [createTurnGroup([step1])],\n            stopPacketSeen: false,\n          },\n        }\n      );\n\n      // First step revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add more steps\n      rerender({\n        turnGroups: [\n          createTurnGroup([step1]),\n          createTurnGroup([step2]),\n          createTurnGroup([step3]),\n        ],\n        stopPacketSeen: false,\n      });\n\n      // Still only first step (others pending)\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // STOP packet arrives - flush all\n      rerender({\n        turnGroups: [\n          createTurnGroup([step1]),\n          createTurnGroup([step2]),\n          createTurnGroup([step3]),\n        ],\n        stopPacketSeen: true,\n      });\n\n      // All steps revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(3);\n    });\n  });\n\n  describe(\"nodeId change reset\", () => {\n    test(\"resets pacing state when nodeId changes\", () => {\n      const step1 = createStep(0, 0);\n      const turnGroups = [createTurnGroup([step1])];\n\n      const { result, rerender } = renderHook(\n        ({ nodeId }) =>\n          usePacedTurnGroups(turnGroups, [], false, nodeId, false),\n        { initialProps: { nodeId: 1 } }\n      );\n\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Change nodeId - should reset state\n      rerender({ nodeId: 2 });\n\n      // First step of new message revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"step pacing\", () => {\n    test(\"first step is revealed immediately\", () => {\n      const step1 = createStep(0, 0);\n      const turnGroups = [createTurnGroup([step1])];\n\n      const { result } = renderHook(() =>\n        usePacedTurnGroups(turnGroups, [], false, 1, false)\n      );\n\n      // First step revealed immediately without timer\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n      expect(result.current.pacedTurnGroups[0]?.steps[0]?.key).toBe(\"0-0\");\n    });\n\n    test(\"second step is revealed after 200ms delay\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add second step\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Still only first step\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Advance timer\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Now second step revealed\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n    });\n\n    test(\"third step is revealed after 400ms total (200ms after second)\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add second and third steps\n      const step2 = createStep(1, 0);\n      const step3 = createStep(2, 0);\n      rerender({\n        turnGroups: [\n          createTurnGroup([step1]),\n          createTurnGroup([step2]),\n          createTurnGroup([step3]),\n        ],\n      });\n\n      // Still only first step\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // After 200ms - second step\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n\n      // After another 200ms (400ms total) - third step\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(3);\n    });\n\n    test(\"same-type steps are paced with delay (NOT batched)\", () => {\n      const step1 = createStep(0, 0, PacketType.SEARCH_TOOL_START);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add two more SEARCH_TOOL steps (same type as first)\n      const step2 = createStep(1, 0, PacketType.SEARCH_TOOL_START);\n      const step3 = createStep(2, 0, PacketType.SEARCH_TOOL_START);\n      rerender({\n        turnGroups: [\n          createTurnGroup([step1]),\n          createTurnGroup([step2]),\n          createTurnGroup([step3]),\n        ],\n      });\n\n      // Still only first step - same-type steps should NOT be batched\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // After 200ms - second step (even though same type)\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n\n      // After another 200ms - third step (even though same type)\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(3);\n    });\n\n    test(\"different-type steps are paced with delay\", () => {\n      const step1 = createStep(0, 0, PacketType.SEARCH_TOOL_START);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add step of different type\n      const step2 = createStep(1, 0, PacketType.PYTHON_TOOL_START);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Still only first step\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // After 200ms - second step\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n    });\n  });\n\n  describe(\"display groups\", () => {\n    test(\"display groups shown only after tool pacing complete\", () => {\n      const step1 = createStep(0, 0);\n      const displayGroup = createDisplayGroup(1);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) =>\n          usePacedTurnGroups(turnGroups, [displayGroup], false, 1, true),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed, but display groups hidden until pacing complete\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add second step\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Display groups still hidden (pacing not complete)\n      expect(result.current.pacedDisplayGroups.length).toBe(0);\n\n      // Complete pacing\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Now pacing is complete, display groups shown\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n      expect(result.current.pacedDisplayGroups.length).toBe(1);\n    });\n\n    test(\"display groups shown immediately when no tool steps\", () => {\n      const displayGroup = createDisplayGroup(0);\n\n      const { result } = renderHook(() =>\n        usePacedTurnGroups([], [displayGroup], false, 1, true)\n      );\n\n      // No tools = pacing complete immediately\n      expect(result.current.pacedDisplayGroups.length).toBe(1);\n      expect(result.current.pacedFinalAnswerComing).toBe(true);\n    });\n  });\n\n  describe(\"pacedFinalAnswerComing\", () => {\n    test(\"returns false when tool pacing not complete\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, true),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // Add second step (creates pending)\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Pacing not complete\n      expect(result.current.pacedFinalAnswerComing).toBe(false);\n\n      // Complete pacing\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Now pacing complete\n      expect(result.current.pacedFinalAnswerComing).toBe(true);\n    });\n\n    test(\"returns true when bypassing pacing\", () => {\n      const step1 = createStep(0, 0);\n      const turnGroups = [createTurnGroup([step1])];\n\n      const { result } = renderHook(() =>\n        usePacedTurnGroups(turnGroups, [], true, 1, true)\n      );\n\n      // Bypassing pacing, so finalAnswerComing passed through\n      expect(result.current.pacedFinalAnswerComing).toBe(true);\n    });\n  });\n\n  describe(\"tool-after-message transition\", () => {\n    test(\"resets toolPacingComplete when finalAnswerComing goes true → false with new tool step\", () => {\n      const displayGroup = createDisplayGroup(0);\n\n      // Step 1: Render with finalAnswerComing=true, no tool steps\n      // No tools = pacing complete immediately → display groups shown\n      const { result, rerender } = renderHook(\n        ({ turnGroups, finalAnswerComing }) =>\n          usePacedTurnGroups(\n            turnGroups,\n            [displayGroup],\n            false,\n            1,\n            finalAnswerComing\n          ),\n        {\n          initialProps: {\n            turnGroups: [] as TurnGroup[],\n            finalAnswerComing: true,\n          },\n        }\n      );\n\n      expect(result.current.pacedDisplayGroups.length).toBe(1);\n      expect(result.current.pacedFinalAnswerComing).toBe(true);\n\n      // Step 2: finalAnswerComing goes false + new tool step arrives\n      // This simulates the agent switching from message streaming back to tools\n      const step1 = createStep(0, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1])],\n        finalAnswerComing: false,\n      });\n\n      // toolPacingComplete was reset, so display groups should be hidden\n      // (first tool step is revealed immediately, but pacing just re-started)\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n      expect(result.current.pacedDisplayGroups.length).toBe(0);\n\n      // Step 3: Add a second tool step so pacing is not yet complete\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n        finalAnswerComing: false,\n      });\n\n      // Display groups still hidden (pacing incomplete)\n      expect(result.current.pacedDisplayGroups.length).toBe(0);\n\n      // Step 4: Advance timer to complete pacing\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Now pacing is complete → display groups shown again\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n      expect(result.current.pacedDisplayGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"referential stability\", () => {\n    test(\"returns same array reference when turn groups have not changed\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // First step revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add second step and reveal it via pacing\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n\n      const stableRef = result.current.pacedTurnGroups;\n\n      // Re-render with new array containing structurally identical turn groups\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Should be the exact same array reference (nothing changed)\n      expect(result.current.pacedTurnGroups).toBe(stableRef);\n    });\n\n    test(\"preserves completed group references when streaming group changes\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups, stopPacketSeen }) =>\n          usePacedTurnGroups(turnGroups, [], stopPacketSeen, 1, false),\n        {\n          initialProps: {\n            turnGroups: [createTurnGroup([step1])],\n            stopPacketSeen: false,\n          },\n        }\n      );\n\n      // First step revealed immediately\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n\n      // Add second step and advance timer to reveal it\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n        stopPacketSeen: false,\n      });\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n\n      const firstGroupRef = result.current.pacedTurnGroups[0];\n\n      // Simulate streaming: step2 gets more packets (new object with longer packets array)\n      const step2Updated: TransformedStep = {\n        ...step2,\n        packets: [\n          ...step2.packets,\n          {\n            placement: { turn_index: 1, tab_index: 0 },\n            obj: { type: PacketType.SEARCH_TOOL_START },\n          } as Packet,\n        ],\n      };\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2Updated])],\n        stopPacketSeen: false,\n      });\n\n      // First group (completed) should keep the same object reference\n      expect(result.current.pacedTurnGroups[0]).toBe(firstGroupRef);\n      // Second group changed (packets.length differs) — new reference\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n    });\n\n    test(\"returns new array reference when a new step is revealed\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      const firstResult = result.current.pacedTurnGroups;\n      expect(firstResult.length).toBe(1);\n\n      // Add second step and reveal it\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Array reference must differ (length changed)\n      expect(result.current.pacedTurnGroups).not.toBe(firstResult);\n      expect(result.current.pacedTurnGroups.length).toBe(2);\n    });\n  });\n\n  describe(\"timer cleanup\", () => {\n    test(\"clears timer on unmount\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender, unmount } = renderHook(\n        ({ turnGroups }) => usePacedTurnGroups(turnGroups, [], false, 1, false),\n        { initialProps: { turnGroups: [createTurnGroup([step1])] } }\n      );\n\n      // Add second step to create pending timer\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n      });\n\n      // Unmount before timer fires\n      unmount();\n\n      // Advance timer - should not throw\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // No assertion needed - just verifying no errors on timer fire after unmount\n    });\n\n    test(\"clears timer on nodeId change\", () => {\n      const step1 = createStep(0, 0);\n\n      const { result, rerender } = renderHook(\n        ({ turnGroups, nodeId }) =>\n          usePacedTurnGroups(turnGroups, [], false, nodeId, false),\n        {\n          initialProps: {\n            turnGroups: [createTurnGroup([step1])],\n            nodeId: 1,\n          },\n        }\n      );\n\n      // Add second step to create pending timer\n      const step2 = createStep(1, 0);\n      rerender({\n        turnGroups: [createTurnGroup([step1]), createTurnGroup([step2])],\n        nodeId: 1,\n      });\n\n      // Change nodeId - should clear timer\n      rerender({\n        turnGroups: [createTurnGroup([step1])],\n        nodeId: 2,\n      });\n\n      // Old timer should not affect new state\n      act(() => {\n        jest.advanceTimersByTime(200);\n      });\n\n      // Only one step for new nodeId\n      expect(result.current.pacedTurnGroups.length).toBe(1);\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/usePacedTurnGroups.ts",
    "content": "import { useRef, useState, useEffect, useCallback, useMemo } from \"react\";\nimport { PacketType } from \"@/app/app/services/streamingModels\";\nimport { GroupedPacket } from \"./packetProcessor\";\nimport { TurnGroup, TransformedStep } from \"../transformers\";\n\n// Delay between steps (ms)\nconst PACING_DELAY_MS = 200;\n\n/**\n * Tool START packet types used for categorizing steps\n * These determine the \"type\" of a step for pacing purposes\n */\nconst TOOL_START_PACKET_TYPES = new Set<PacketType>([\n  PacketType.SEARCH_TOOL_START,\n  PacketType.FETCH_TOOL_START,\n  PacketType.PYTHON_TOOL_START,\n  PacketType.CUSTOM_TOOL_START,\n  PacketType.FILE_READER_START,\n  PacketType.REASONING_START,\n  PacketType.IMAGE_GENERATION_TOOL_START,\n  PacketType.DEEP_RESEARCH_PLAN_START,\n  PacketType.RESEARCH_AGENT_START,\n  PacketType.MEMORY_TOOL_START,\n  PacketType.MEMORY_TOOL_NO_ACCESS,\n]);\n\n/**\n * Get the primary packet type from a step's packets (first START packet)\n * Used to determine if a type transition occurred\n */\nfunction getStepPacketType(step: TransformedStep): PacketType | null {\n  for (const packet of step.packets) {\n    if (TOOL_START_PACKET_TYPES.has(packet.obj.type as PacketType)) {\n      return packet.obj.type as PacketType;\n    }\n  }\n  return null;\n}\n\n/**\n * Internal pacing state stored in ref (not triggering re-renders)\n */\ninterface PacingState {\n  // Tracking revealed content\n  revealedStepKeys: Set<string>;\n  lastRevealedPacketType: PacketType | null;\n\n  // Queued content\n  pendingSteps: TransformedStep[];\n\n  // Timer\n  pacingTimer: ReturnType<typeof setTimeout> | null;\n\n  // Flags\n  toolPacingComplete: boolean;\n  stopPacketSeen: boolean;\n\n  // Track nodeId for reset detection\n  nodeId: string | null;\n}\n\nfunction createInitialPacingState(): PacingState {\n  return {\n    revealedStepKeys: new Set(),\n    lastRevealedPacketType: null,\n    pendingSteps: [],\n    pacingTimer: null,\n    toolPacingComplete: false,\n    stopPacketSeen: false,\n    nodeId: null,\n  };\n}\n\n/**\n * Hook that adds pacing delays between steps during streaming.\n * Creates visual breathing room between agent activities.\n *\n * Architecture:\n * - Pacing state in ref: no re-renders for internal tracking\n * - useState only for revealTrigger: forces re-render when content should update\n * - Timer-based delays: 200ms between all steps\n *\n * @param toolTurnGroups - Turn groups from packet processor\n * @param displayGroups - Display content groups (MESSAGE_START/DELTA)\n * @param stopPacketSeen - Whether STOP packet has been received\n * @param nodeId - Message node ID for reset detection\n * @param finalAnswerComing - Whether message content is streaming\n */\nexport function usePacedTurnGroups(\n  toolTurnGroups: TurnGroup[],\n  displayGroups: GroupedPacket[],\n  stopPacketSeen: boolean,\n  nodeId: number,\n  finalAnswerComing: boolean\n): {\n  pacedTurnGroups: TurnGroup[];\n  pacedDisplayGroups: GroupedPacket[];\n  pacedFinalAnswerComing: boolean;\n} {\n  // Ref-based pacing state (no re-renders)\n  const stateRef = useRef<PacingState>(createInitialPacingState());\n\n  // Track previous finalAnswerComing to detect tool-after-message transitions\n  const prevFinalAnswerComingRef = useRef(finalAnswerComing);\n\n  // Cache previous pacedTurnGroups to preserve referential equality\n  // for completed turn groups that haven't changed\n  const prevPacedRef = useRef<TurnGroup[]>([]);\n\n  // Trigger re-render when content should update\n  // Used in useMemo dependencies since state.revealedStepKeys is stored in a ref\n  const [revealTrigger, setRevealTrigger] = useState(0);\n\n  // Stable nodeId string for comparison\n  const nodeIdStr = String(nodeId);\n\n  // Reset on nodeId change\n  if (stateRef.current.nodeId !== nodeIdStr) {\n    if (stateRef.current.pacingTimer) {\n      clearTimeout(stateRef.current.pacingTimer);\n    }\n    stateRef.current = createInitialPacingState();\n    stateRef.current.nodeId = nodeIdStr;\n    prevPacedRef.current = [];\n  }\n\n  const state = stateRef.current;\n\n  // Bypass pacing for completed messages (old messages loaded from history)\n  // If stopPacketSeen is true on first render, return everything immediately\n  const shouldBypassPacing =\n    stopPacketSeen &&\n    state.revealedStepKeys.size === 0 &&\n    toolTurnGroups.length > 0;\n\n  // Handle revealing the next pending step\n  // Reveals ONE step per timer fire, always with delay between steps\n  const revealNextPendingStep = useCallback(() => {\n    const state = stateRef.current;\n\n    if (state.pendingSteps.length > 0) {\n      const stepToReveal = state.pendingSteps.shift()!;\n      state.revealedStepKeys.add(stepToReveal.key);\n      state.lastRevealedPacketType = getStepPacketType(stepToReveal);\n\n      // Schedule next step if more pending (always delay, regardless of type)\n      if (state.pendingSteps.length > 0) {\n        state.pacingTimer = setTimeout(revealNextPendingStep, PACING_DELAY_MS);\n        setRevealTrigger((t) => t + 1);\n        return;\n      }\n    }\n\n    // No more pending steps - pacing complete\n    state.toolPacingComplete = true;\n    state.pacingTimer = null;\n    setRevealTrigger((t) => t + 1);\n  }, []);\n\n  // Process incoming turn groups\n  useEffect(() => {\n    // Skip processing when bypassing pacing\n    if (shouldBypassPacing) return;\n\n    const state = stateRef.current;\n\n    // Detect tool-after-message transition: message was showing, now tools are starting\n    // Reset toolPacingComplete to hide display until new tools finish pacing\n    if (prevFinalAnswerComingRef.current && !finalAnswerComing) {\n      state.toolPacingComplete = false;\n    }\n    prevFinalAnswerComingRef.current = finalAnswerComing;\n\n    // Handle STOP packet - flush everything immediately\n    if (stopPacketSeen && !state.stopPacketSeen) {\n      state.stopPacketSeen = true;\n\n      // Clear any pending timer\n      if (state.pacingTimer) {\n        clearTimeout(state.pacingTimer);\n        state.pacingTimer = null;\n      }\n\n      // Reveal all pending steps immediately\n      for (const step of state.pendingSteps) {\n        state.revealedStepKeys.add(step.key);\n      }\n      state.pendingSteps = [];\n      state.toolPacingComplete = true;\n\n      setRevealTrigger((t) => t + 1);\n      return;\n    }\n\n    // Collect all steps from turn groups\n    const allSteps: TransformedStep[] = [];\n    for (const turnGroup of toolTurnGroups) {\n      for (const step of turnGroup.steps) {\n        allSteps.push(step);\n      }\n    }\n\n    // Find new steps (not yet revealed or pending)\n    const newSteps: TransformedStep[] = [];\n    const pendingKeys = new Set(state.pendingSteps.map((s) => s.key));\n\n    for (const step of allSteps) {\n      if (!state.revealedStepKeys.has(step.key) && !pendingKeys.has(step.key)) {\n        newSteps.push(step);\n      }\n    }\n\n    if (newSteps.length === 0) {\n      // If there are no tool steps at all, mark pacing complete immediately\n      // This allows tool-less responses to render their displayGroups\n      if (allSteps.length === 0 && !state.toolPacingComplete) {\n        state.toolPacingComplete = true;\n        setRevealTrigger((t) => t + 1);\n        return;\n      }\n\n      // Check if all steps are revealed (no pending, no new)\n      if (\n        state.pendingSteps.length === 0 &&\n        !state.pacingTimer &&\n        allSteps.length > 0\n      ) {\n        const allRevealed = allSteps.every((s) =>\n          state.revealedStepKeys.has(s.key)\n        );\n        if (allRevealed && !state.toolPacingComplete) {\n          state.toolPacingComplete = true;\n          setRevealTrigger((t) => t + 1);\n        }\n      }\n      return;\n    }\n\n    // Process new steps\n    for (const step of newSteps) {\n      const stepType = getStepPacketType(step);\n\n      // First step ever - reveal immediately\n      if (\n        state.revealedStepKeys.size === 0 &&\n        state.pendingSteps.length === 0\n      ) {\n        state.revealedStepKeys.add(step.key);\n        state.lastRevealedPacketType = stepType;\n        setRevealTrigger((t) => t + 1);\n        continue;\n      }\n\n      // All subsequent steps - queue for paced reveal\n      state.pendingSteps.push(step);\n\n      // Start timer if not already running\n      if (!state.pacingTimer && state.pendingSteps.length === 1) {\n        state.pacingTimer = setTimeout(revealNextPendingStep, PACING_DELAY_MS);\n      }\n    }\n\n    // Mark pacing incomplete while we have pending steps or timer\n    if (state.pendingSteps.length > 0 || state.pacingTimer) {\n      state.toolPacingComplete = false;\n    }\n  }, [\n    toolTurnGroups,\n    stopPacketSeen,\n    finalAnswerComing,\n    revealNextPendingStep,\n    shouldBypassPacing,\n  ]);\n\n  // Cleanup timer on unmount\n  useEffect(() => {\n    return () => {\n      if (stateRef.current.pacingTimer) {\n        clearTimeout(stateRef.current.pacingTimer);\n      }\n    };\n  }, []);\n\n  // Build paced turn groups from revealed step keys\n  // Memoized to prevent unnecessary re-renders in downstream components\n  // revealTrigger is included because state.revealedStepKeys is stored in a ref\n  const pacedTurnGroups = useMemo(() => {\n    // Bypass: return all turn groups immediately\n    if (shouldBypassPacing) return toolTurnGroups;\n\n    const result: TurnGroup[] = [];\n    for (const turnGroup of toolTurnGroups) {\n      const revealedSteps = turnGroup.steps.filter((step) =>\n        state.revealedStepKeys.has(step.key)\n      );\n      if (revealedSteps.length > 0) {\n        result.push({\n          turnIndex: turnGroup.turnIndex,\n          steps: revealedSteps,\n          isParallel: revealedSteps.length > 1,\n        });\n      }\n    }\n\n    // Stabilize: reuse previous TurnGroup objects when their content hasn't changed.\n    // This preserves referential equality for completed groups, preventing\n    // unnecessary re-renders in downstream components (e.g. SearchChipList).\n    const prev = prevPacedRef.current;\n    if (prev.length === result.length) {\n      let allMatch = true;\n      for (let i = 0; i < result.length; i++) {\n        const oldGroup = prev[i]!;\n        const newGroup = result[i]!;\n        if (\n          oldGroup.turnIndex === newGroup.turnIndex &&\n          oldGroup.steps.length === newGroup.steps.length &&\n          oldGroup.steps.every(\n            (s, j) =>\n              s.key === newGroup.steps[j]!.key &&\n              s.packets.length === newGroup.steps[j]!.packets.length\n          )\n        ) {\n          // Reuse old object reference for this group\n          result[i] = oldGroup;\n        } else {\n          allMatch = false;\n        }\n      }\n      if (allMatch) {\n        // Every group matched — return the exact same array reference\n        return prev;\n      }\n    }\n\n    prevPacedRef.current = result;\n    return result;\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [toolTurnGroups, revealTrigger, shouldBypassPacing]);\n\n  // Only return display groups when tool pacing is complete (or bypassing)\n  const pacedDisplayGroups = useMemo(\n    () => (shouldBypassPacing || state.toolPacingComplete ? displayGroups : []),\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n    [state.toolPacingComplete, displayGroups, revealTrigger, shouldBypassPacing]\n  );\n\n  // Paced signals for header state consistency\n  // Only signal finalAnswerComing when tool pacing is complete (or bypassing)\n  const pacedFinalAnswerComing = useMemo(\n    () => (shouldBypassPacing || state.toolPacingComplete) && finalAnswerComing,\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n    [\n      state.toolPacingComplete,\n      finalAnswerComing,\n      revealTrigger,\n      shouldBypassPacing,\n    ]\n  );\n\n  return {\n    pacedTurnGroups,\n    pacedDisplayGroups,\n    pacedFinalAnswerComing,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/usePacketProcessor.test.tsx",
    "content": "/**\n * Integration tests for usePacketProcessor hook\n *\n * Tests the React hook that wraps packet processing functions with React state\n * management, memoization, and callbacks. Uses @testing-library/react's renderHook.\n */\nimport { renderHook, act } from \"@testing-library/react\";\nimport {\n  Packet,\n  PacketType,\n  StopReason,\n} from \"@/app/app/services/streamingModels\";\nimport { usePacketProcessor } from \"./usePacketProcessor\";\nimport {\n  createPacket,\n  createSearchToolStartPacket,\n  createMessageStartPacket,\n  createStopPacket,\n  createBranchingPacket,\n} from \"./__tests__/testHelpers\";\n\n// Mock the transformers module\njest.mock(\"../transformers\", () => ({\n  transformPacketGroups: jest.fn((groups) =>\n    groups.map(\n      (g: { turn_index: number; tab_index: number; packets: Packet[] }) => ({\n        key: `${g.turn_index}-${g.tab_index}`,\n        turnIndex: g.turn_index,\n        tabIndex: g.tab_index,\n        packets: g.packets,\n      })\n    )\n  ),\n  groupStepsByTurn: jest.fn((steps) => {\n    const turnMap = new Map<number, typeof steps>();\n    for (const step of steps) {\n      const existing = turnMap.get(step.turnIndex);\n      if (existing) {\n        existing.push(step);\n      } else {\n        turnMap.set(step.turnIndex, [step]);\n      }\n    }\n    return Array.from(turnMap.entries())\n      .sort(([a], [b]) => a - b)\n      .map(([turnIndex, stepsForTurn]) => ({\n        turnIndex,\n        steps: stepsForTurn,\n        isParallel: stepsForTurn.length > 1,\n      }));\n  }),\n}));\n\n// ============================================================================\n// Tests\n// ============================================================================\n\ndescribe(\"usePacketProcessor\", () => {\n  describe(\"initial state\", () => {\n    test(\"returns empty arrays when no packets\", () => {\n      const { result } = renderHook(() => usePacketProcessor([], 1));\n\n      expect(result.current.toolGroups).toEqual([]);\n      expect(result.current.displayGroups).toEqual([]);\n      expect(result.current.toolTurnGroups).toEqual([]);\n    });\n\n    test(\"returns empty citations when no packets\", () => {\n      const { result } = renderHook(() => usePacketProcessor([], 1));\n\n      expect(result.current.citations).toEqual([]);\n      expect(result.current.citationMap).toEqual({});\n    });\n\n    test(\"initializes stopPacketSeen to false\", () => {\n      const { result } = renderHook(() => usePacketProcessor([], 1));\n\n      expect(result.current.stopPacketSeen).toBe(false);\n    });\n\n    test(\"initializes isComplete to false\", () => {\n      const { result } = renderHook(() => usePacketProcessor([], 1));\n\n      expect(result.current.isComplete).toBe(false);\n    });\n\n    test(\"provides stable callback references\", () => {\n      const { result, rerender } = renderHook(() => usePacketProcessor([], 1));\n\n      const onRenderComplete1 = result.current.onRenderComplete;\n      const markAllToolsDisplayed1 = result.current.markAllToolsDisplayed;\n\n      rerender();\n\n      expect(result.current.onRenderComplete).toBe(onRenderComplete1);\n      expect(result.current.markAllToolsDisplayed).toBe(markAllToolsDisplayed1);\n    });\n  });\n\n  describe(\"nodeId changes\", () => {\n    test(\"resets state when nodeId changes\", () => {\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n\n      const { result, rerender } = renderHook(\n        ({ packets, nodeId }) => usePacketProcessor(packets, nodeId),\n        { initialProps: { packets, nodeId: 1 } }\n      );\n\n      expect(result.current.toolGroups.length).toBe(1);\n\n      // Change nodeId\n      rerender({ packets: [], nodeId: 2 });\n\n      expect(result.current.toolGroups).toEqual([]);\n    });\n\n    test(\"processes new packets after nodeId change\", () => {\n      const packets1 = [createSearchToolStartPacket({ turn_index: 0 })];\n      const packets2 = [createMessageStartPacket({ turn_index: 0 })];\n\n      const { result, rerender } = renderHook(\n        ({ packets, nodeId }) => usePacketProcessor(packets, nodeId),\n        { initialProps: { packets: packets1, nodeId: 1 } }\n      );\n\n      expect(result.current.toolGroups.length).toBe(1);\n\n      rerender({ packets: packets2, nodeId: 2 });\n\n      expect(result.current.toolGroups.length).toBe(0);\n      expect(result.current.displayGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"stream reset detection\", () => {\n    test(\"resets state when packets array shrinks\", () => {\n      const packets1 = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n      const packets2 = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets: packets1 } }\n      );\n\n      expect(result.current.finalAnswerComing).toBe(true);\n\n      // Shrink packets (simulates stream reset)\n      rerender({ packets: packets2 });\n\n      expect(result.current.finalAnswerComing).toBe(false);\n    });\n\n    test(\"resets renderComplete on stream reset\", () => {\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createStopPacket(),\n      ];\n\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets } }\n      );\n\n      // Trigger render complete\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.isComplete).toBe(true);\n\n      // Shrink packets\n      rerender({ packets: [createMessageStartPacket({ turn_index: 0 })] });\n\n      expect(result.current.isComplete).toBe(false);\n    });\n  });\n\n  describe(\"incremental processing\", () => {\n    test(\"processes only new packets on update\", () => {\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets: [] as Packet[] } }\n      );\n\n      expect(result.current.toolGroups.length).toBe(0);\n\n      // Add packets\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n      rerender({ packets });\n\n      expect(result.current.toolGroups.length).toBe(1);\n\n      // Add more packets\n      const morePackets = [\n        ...packets,\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n      ];\n      rerender({ packets: morePackets });\n\n      expect(result.current.toolGroups.length).toBe(1);\n    });\n\n    test(\"handles rapid packet updates\", () => {\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets: [] as Packet[] } }\n      );\n\n      // Simulate rapid streaming updates\n      for (let i = 0; i < 10; i++) {\n        const packets = Array.from({ length: i + 1 }, (_, j) =>\n          j === 0\n            ? createSearchToolStartPacket({ turn_index: 0 })\n            : createPacket(\n                PacketType.SEARCH_TOOL_QUERIES_DELTA,\n                { turn_index: 0 },\n                { queries: [`q${j}`] }\n              )\n        );\n        rerender({ packets });\n      }\n\n      expect(result.current.toolGroups.length).toBe(1);\n    });\n  });\n\n  describe(\"displayGroups derivation\", () => {\n    test(\"returns empty when tools exist but finalAnswerComing is false\", () => {\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.toolGroups.length).toBe(1);\n      expect(result.current.displayGroups.length).toBe(0);\n      expect(result.current.finalAnswerComing).toBe(false);\n    });\n\n    test(\"returns potentialDisplayGroups when finalAnswerComing is true\", () => {\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.finalAnswerComing).toBe(true);\n      expect(result.current.displayGroups.length).toBe(1);\n    });\n\n    test(\"returns potentialDisplayGroups when no tools exist\", () => {\n      const packets = [createMessageStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.toolGroups.length).toBe(0);\n      expect(result.current.displayGroups.length).toBe(1);\n    });\n\n    test(\"returns potentialDisplayGroups when forceShowAnswer triggered\", () => {\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      // Initially visible because finalAnswerComing is true\n      expect(result.current.displayGroups.length).toBe(1);\n\n      // Add tool after message to reset finalAnswerComing\n      const { result: result2 } = renderHook(() =>\n        usePacketProcessor(\n          [\n            createSearchToolStartPacket({ turn_index: 0 }),\n            // No message yet, so displayGroups should be empty\n          ],\n          2\n        )\n      );\n\n      expect(result2.current.displayGroups.length).toBe(0);\n\n      // Force show answer\n      act(() => {\n        result2.current.markAllToolsDisplayed();\n      });\n\n      expect(result2.current.displayGroups.length).toBe(0); // Still 0 because no message packet\n    });\n  });\n\n  describe(\"tool-after-message transition\", () => {\n    test(\"resets renderComplete on transition from finalAnswerComing true to false\", () => {\n      // Start with message (finalAnswerComing=true)\n      const initialPackets = [createMessageStartPacket({ turn_index: 0 })];\n\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets: initialPackets } }\n      );\n\n      expect(result.current.finalAnswerComing).toBe(true);\n\n      // Add a tool after the message - this simulates the Claude workaround scenario\n      // where Claude sends a message first, then decides to call a tool\n      const packetsWithToolAfter = [\n        ...initialPackets,\n        createSearchToolStartPacket({ turn_index: 1 }),\n      ];\n      rerender({ packets: packetsWithToolAfter });\n\n      // The tool should reset finalAnswerComing since it's an actual tool call\n      expect(result.current.finalAnswerComing).toBe(false);\n    });\n  });\n\n  describe(\"onRenderComplete callback\", () => {\n    test(\"sets isComplete when finalAnswerComing and stopPacketSeen\", () => {\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createStopPacket(),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.finalAnswerComing).toBe(true);\n      expect(result.current.stopPacketSeen).toBe(true);\n      expect(result.current.isComplete).toBe(false);\n\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.isComplete).toBe(true);\n    });\n\n    test(\"does not set isComplete when finalAnswerComing is false\", () => {\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.finalAnswerComing).toBe(false);\n\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.isComplete).toBe(false);\n    });\n  });\n\n  describe(\"markAllToolsDisplayed callback\", () => {\n    test(\"forces displayGroups to show even when finalAnswerComing is false\", () => {\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }),\n      ];\n\n      const { result, rerender } = renderHook(\n        ({ packets }) => usePacketProcessor(packets, 1),\n        { initialProps: { packets } }\n      );\n\n      // Initially visible since finalAnswerComing is true after MESSAGE_START\n      expect(result.current.displayGroups.length).toBe(1);\n\n      // Reset to a state where no message, with forceShow\n      const toolOnlyPackets = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const { result: result2 } = renderHook(() =>\n        usePacketProcessor(toolOnlyPackets, 2)\n      );\n\n      expect(result2.current.displayGroups.length).toBe(0);\n\n      act(() => {\n        result2.current.markAllToolsDisplayed();\n      });\n\n      // Now should be ready to show (though still empty because no message in packets)\n      // The key thing is forceShowAnswer flag is set\n      expect(result2.current.finalAnswerComing).toBe(false);\n    });\n  });\n\n  describe(\"isComplete flag\", () => {\n    test(\"false when stopPacketSeen is false\", () => {\n      const packets = [createMessageStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.stopPacketSeen).toBe(false);\n      expect(result.current.isComplete).toBe(false);\n    });\n\n    test(\"false when renderComplete is false\", () => {\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createStopPacket(),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.stopPacketSeen).toBe(true);\n      expect(result.current.isComplete).toBe(false);\n    });\n\n    test(\"true only when BOTH stopPacketSeen and renderComplete are true\", () => {\n      const packets = [\n        createMessageStartPacket({ turn_index: 0 }),\n        createStopPacket(),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.stopPacketSeen).toBe(true);\n      expect(result.current.isComplete).toBe(false);\n\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.isComplete).toBe(true);\n    });\n  });\n\n  describe(\"hasSteps flag\", () => {\n    test(\"false when no tool groups\", () => {\n      const packets = [createMessageStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.hasSteps).toBe(false);\n    });\n\n    test(\"true when tool groups exist\", () => {\n      const packets = [createSearchToolStartPacket({ turn_index: 0 })];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.hasSteps).toBe(true);\n    });\n  });\n\n  describe(\"toolTurnGroups transformation\", () => {\n    test(\"groups tools by turn index\", () => {\n      const packets = [\n        createBranchingPacket(2, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 0 }),\n        createPacket(PacketType.SECTION_END, { turn_index: 0, tab_index: 1 }),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.toolTurnGroups.length).toBe(1);\n      expect(result.current.toolTurnGroups[0]?.isParallel).toBe(true);\n      expect(result.current.toolTurnGroups[0]?.steps.length).toBe(2);\n    });\n  });\n\n  describe(\"expectedBranchesPerTurn\", () => {\n    test(\"exposes branch metadata from packets\", () => {\n      const packets = [\n        createBranchingPacket(3, 0),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 0 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 1 }),\n        createSearchToolStartPacket({ turn_index: 0, tab_index: 2 }),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.expectedBranchesPerTurn.get(0)).toBe(3);\n    });\n  });\n\n  describe(\"complex scenarios\", () => {\n    test(\"full flow: tools -> message -> complete\", () => {\n      const packets = [\n        createSearchToolStartPacket({ turn_index: 0 }),\n        createPacket(\n          PacketType.SEARCH_TOOL_QUERIES_DELTA,\n          { turn_index: 0 },\n          { queries: [\"test\"] }\n        ),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createMessageStartPacket({ turn_index: 1 }, 1.5),\n        createPacket(\n          PacketType.MESSAGE_DELTA,\n          { turn_index: 1 },\n          { content: \"Result:\" }\n        ),\n        createStopPacket(StopReason.FINISHED),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.toolGroups.length).toBe(1);\n      expect(result.current.displayGroups.length).toBe(1);\n      expect(result.current.hasSteps).toBe(true);\n      expect(result.current.stopPacketSeen).toBe(true);\n      expect(result.current.stopReason).toBe(StopReason.FINISHED);\n      expect(result.current.finalAnswerComing).toBe(true);\n      expect(result.current.toolProcessingDuration).toBe(1.5);\n\n      act(() => {\n        result.current.onRenderComplete();\n      });\n\n      expect(result.current.isComplete).toBe(true);\n    });\n\n    test(\"handles image generation flow\", () => {\n      const packets = [\n        createPacket(PacketType.IMAGE_GENERATION_TOOL_START, { turn_index: 0 }),\n        createPacket(\n          PacketType.IMAGE_GENERATION_TOOL_DELTA,\n          { turn_index: 0 },\n          {\n            images: [\n              {\n                file_id: \"img1\",\n                url: \"http://example.com/1.png\",\n                revised_prompt: \"test\",\n              },\n            ],\n          }\n        ),\n        createPacket(PacketType.SECTION_END, { turn_index: 0 }),\n        createStopPacket(),\n      ];\n\n      const { result } = renderHook(() => usePacketProcessor(packets, 1));\n\n      expect(result.current.isGeneratingImage).toBe(true);\n      expect(result.current.generatedImageCount).toBe(1);\n      expect(result.current.finalAnswerComing).toBe(true);\n      expect(result.current.displayGroups.length).toBe(1);\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/usePacketProcessor.ts",
    "content": "import { useRef, useState, useMemo, useCallback } from \"react\";\nimport {\n  Packet,\n  StreamingCitation,\n  StopReason,\n} from \"@/app/app/services/streamingModels\";\nimport { CitationMap } from \"@/app/app/interfaces\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport {\n  ProcessorState,\n  GroupedPacket,\n  createInitialState,\n  processPackets,\n} from \"@/app/app/message/messageComponents/timeline/hooks/packetProcessor\";\nimport {\n  transformPacketGroups,\n  groupStepsByTurn,\n  TurnGroup,\n} from \"@/app/app/message/messageComponents/timeline/transformers\";\n\nexport interface UsePacketProcessorResult {\n  // Data\n  toolGroups: GroupedPacket[];\n  displayGroups: GroupedPacket[];\n  toolTurnGroups: TurnGroup[];\n  citations: StreamingCitation[];\n  citationMap: CitationMap;\n  documentMap: Map<string, OnyxDocument>;\n\n  // Status (derived from packets)\n  stopPacketSeen: boolean;\n  stopReason: StopReason | undefined;\n  hasSteps: boolean;\n  expectedBranchesPerTurn: Map<number, number>;\n  isGeneratingImage: boolean;\n  generatedImageCount: number;\n  // Whether final answer is coming (MESSAGE_START seen)\n  finalAnswerComing: boolean;\n  // Tool processing duration from backend (via MESSAGE_START packet)\n  toolProcessingDuration: number | undefined;\n\n  // Completion: stopPacketSeen && renderComplete\n  isComplete: boolean;\n\n  // Callbacks\n  onRenderComplete: () => void;\n  markAllToolsDisplayed: () => void;\n}\n\n/**\n * Hook for processing streaming packets in AgentMessage.\n *\n * Architecture:\n * - Processor state in ref: incremental processing, synchronous, no double render\n * - Only true UI state: renderComplete (set by callback), forceShowAnswer (override)\n * - Everything else derived from packets\n *\n * Key insight: finalAnswerComing and stopPacketSeen are DERIVED from packets,\n * not independent state. Only renderComplete needs useState.\n */\nexport function usePacketProcessor(\n  rawPackets: Packet[],\n  nodeId: number\n): UsePacketProcessorResult {\n  // Processor in ref: incremental, synchronous, no double render\n  const stateRef = useRef<ProcessorState>(createInitialState(nodeId));\n\n  // Only TRUE UI state: \"has renderer finished?\"\n  const [renderComplete, setRenderComplete] = useState(false);\n\n  // Optional override to force showing answer\n  const [forceShowAnswer, setForceShowAnswer] = useState(false);\n\n  // Reset on nodeId change\n  if (stateRef.current.nodeId !== nodeId) {\n    stateRef.current = createInitialState(nodeId);\n    setRenderComplete(false);\n    setForceShowAnswer(false);\n  }\n\n  // Track for transition detection\n  const prevNextPacketIndex = stateRef.current.nextPacketIndex;\n  const prevFinalAnswerComing = stateRef.current.finalAnswerComing;\n\n  // Detect stream reset (packets shrunk)\n  if (prevNextPacketIndex > rawPackets.length) {\n    stateRef.current = createInitialState(nodeId);\n    setRenderComplete(false);\n    setForceShowAnswer(false);\n  }\n\n  // Process packets synchronously (incremental) - only if new packets arrived\n  if (rawPackets.length > stateRef.current.nextPacketIndex) {\n    stateRef.current = processPackets(stateRef.current, rawPackets);\n  }\n\n  // Reset renderComplete on tool-after-message transition\n  if (prevFinalAnswerComing && !stateRef.current.finalAnswerComing) {\n    setRenderComplete(false);\n  }\n\n  // Access state directly (result arrays are built in processPackets)\n  const state = stateRef.current;\n\n  // Derive displayGroups (not state!)\n  const effectiveFinalAnswerComing = state.finalAnswerComing || forceShowAnswer;\n  const displayGroups = useMemo(() => {\n    if (effectiveFinalAnswerComing || state.toolGroups.length === 0) {\n      return state.potentialDisplayGroups;\n    }\n    return [];\n  }, [\n    effectiveFinalAnswerComing,\n    state.toolGroups.length,\n    state.potentialDisplayGroups,\n  ]);\n\n  // Transform toolGroups to timeline format\n  const toolTurnGroups = useMemo(() => {\n    const allSteps = transformPacketGroups(state.toolGroups);\n    return groupStepsByTurn(allSteps);\n  }, [state.toolGroups]);\n\n  // Callback reads from ref: always current value, no ref needed in component\n  const onRenderComplete = useCallback(() => {\n    if (stateRef.current.finalAnswerComing) {\n      setRenderComplete(true);\n    }\n  }, []);\n\n  const markAllToolsDisplayed = useCallback(() => {\n    setForceShowAnswer(true);\n  }, []);\n\n  return {\n    // Data\n    toolGroups: state.toolGroups,\n    displayGroups,\n    toolTurnGroups,\n    citations: state.citations,\n    citationMap: state.citationMap,\n    documentMap: state.documentMap,\n\n    // Status (derived from packets)\n    stopPacketSeen: state.stopPacketSeen,\n    stopReason: state.stopReason,\n    hasSteps: toolTurnGroups.length > 0,\n    expectedBranchesPerTurn: state.expectedBranches,\n    isGeneratingImage: state.isGeneratingImage,\n    generatedImageCount: state.generatedImageCount,\n    finalAnswerComing: state.finalAnswerComing,\n    toolProcessingDuration: state.toolProcessingDuration,\n\n    // Completion: stopPacketSeen && renderComplete\n    isComplete: state.stopPacketSeen && renderComplete,\n\n    // Callbacks\n    onRenderComplete,\n    markAllToolsDisplayed,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useStreamingDuration.ts",
    "content": "import { useState, useEffect, useRef } from \"react\";\n\n/**\n * Hook to track elapsed streaming duration with efficient updates.\n *\n * Uses requestAnimationFrame for accurate timing but only triggers re-renders\n * when the elapsed seconds value actually changes (once per second).\n *\n * @param isStreaming - Whether streaming is currently active\n * @param startTime - Timestamp when streaming started (from Date.now())\n * @param backendDuration - Duration from backend when available (freezes timer)\n * @returns Elapsed seconds since streaming started\n */\nexport function useStreamingDuration(\n  isStreaming: boolean,\n  startTime: number | undefined,\n  backendDuration?: number\n): number {\n  const [elapsedSeconds, setElapsedSeconds] = useState(0);\n  const rafRef = useRef<number | null>(null);\n  const lastElapsedRef = useRef<number>(0);\n\n  // Determine if we should run the live timer\n  // Stop the timer when backend duration is available\n  const shouldRunTimer = isStreaming && backendDuration === undefined;\n\n  useEffect(() => {\n    if (!shouldRunTimer || !startTime) {\n      // Don't reset when stopping - preserve last calculated value\n      // Only reset when explicitly given no start time\n      if (!startTime) {\n        setElapsedSeconds(0);\n        lastElapsedRef.current = 0;\n      }\n      return;\n    }\n\n    const updateElapsed = () => {\n      const now = Date.now();\n      const elapsed = Math.floor((now - startTime) / 1000);\n\n      // Only update state when seconds change to avoid unnecessary re-renders\n      if (elapsed !== lastElapsedRef.current) {\n        lastElapsedRef.current = elapsed;\n        setElapsedSeconds(elapsed);\n      }\n\n      rafRef.current = requestAnimationFrame(updateElapsed);\n    };\n\n    // Start the animation loop\n    rafRef.current = requestAnimationFrame(updateElapsed);\n\n    return () => {\n      if (rafRef.current !== null) {\n        cancelAnimationFrame(rafRef.current);\n        rafRef.current = null;\n      }\n    };\n  }, [shouldRunTimer, startTime]);\n\n  // Return backend duration if provided, otherwise return live elapsed time\n  return backendDuration !== undefined ? backendDuration : elapsedSeconds;\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useTimelineExpansion.ts",
    "content": "import { useState, useEffect, useCallback, useRef } from \"react\";\nimport { TurnGroup } from \"../transformers\";\n\nexport interface TimelineExpansionState {\n  isExpanded: boolean;\n  handleToggle: () => void;\n  parallelActiveTab: string;\n  setParallelActiveTab: (tab: string) => void;\n}\n\n/**\n * Manages expansion state for the timeline.\n * Auto-collapses when streaming completes or message content starts, and syncs parallel tab selection.\n */\nexport function useTimelineExpansion(\n  stopPacketSeen: boolean,\n  lastTurnGroup: TurnGroup | undefined,\n  hasDisplayContent: boolean = false\n): TimelineExpansionState {\n  const [isExpanded, setIsExpanded] = useState(false);\n  const [parallelActiveTab, setParallelActiveTab] = useState<string>(\"\");\n  const userHasToggled = useRef(false);\n\n  const handleToggle = useCallback(() => {\n    userHasToggled.current = true;\n    setIsExpanded((prev) => !prev);\n  }, []);\n\n  // Auto-collapse when streaming completes or message content starts\n  // BUT respect user intent - if they've manually toggled, don't auto-collapse\n  useEffect(() => {\n    if ((stopPacketSeen || hasDisplayContent) && !userHasToggled.current) {\n      setIsExpanded(false);\n    }\n  }, [stopPacketSeen, hasDisplayContent]);\n\n  // Sync active tab when parallel turn group changes\n  useEffect(() => {\n    if (lastTurnGroup?.isParallel && lastTurnGroup.steps.length > 0) {\n      const validTabs = lastTurnGroup.steps.map((s) => s.key);\n      const firstStep = lastTurnGroup.steps[0];\n      if (firstStep && !validTabs.includes(parallelActiveTab)) {\n        setParallelActiveTab(firstStep.key);\n      }\n    }\n  }, [lastTurnGroup, parallelActiveTab]);\n\n  return {\n    isExpanded,\n    handleToggle,\n    parallelActiveTab,\n    setParallelActiveTab,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useTimelineHeader.ts",
    "content": "import { useMemo } from \"react\";\nimport { TurnGroup } from \"../transformers\";\nimport {\n  PacketType,\n  SearchToolPacket,\n  StopReason,\n  CustomToolStart,\n} from \"@/app/app/services/streamingModels\";\nimport { constructCurrentSearchState } from \"@/app/app/message/messageComponents/timeline/renderers/search/searchStateUtils\";\n\nexport interface TimelineHeaderResult {\n  headerText: string;\n  hasPackets: boolean;\n  userStopped: boolean;\n}\n\n/**\n * Hook that determines timeline header state based on current activity.\n * Returns header text, whether there are packets, and whether user stopped.\n */\nexport function useTimelineHeader(\n  turnGroups: TurnGroup[],\n  stopReason?: StopReason,\n  isGeneratingImage?: boolean\n): TimelineHeaderResult {\n  return useMemo(() => {\n    const hasPackets = turnGroups.length > 0;\n    const userStopped = stopReason === StopReason.USER_CANCELLED;\n\n    // If generating image with no tool packets, show image generation header\n    if (isGeneratingImage && !hasPackets) {\n      return { headerText: \"Generating image...\", hasPackets, userStopped };\n    }\n\n    if (!hasPackets) {\n      return { headerText: \"Thinking...\", hasPackets, userStopped };\n    }\n\n    // Get the last (current) turn group\n    const currentTurn = turnGroups[turnGroups.length - 1];\n    if (!currentTurn) {\n      return { headerText: \"Thinking...\", hasPackets, userStopped };\n    }\n\n    const currentStep = currentTurn.steps[0];\n    if (!currentStep?.packets?.length) {\n      return { headerText: \"Thinking...\", hasPackets, userStopped };\n    }\n\n    const firstPacket = currentStep.packets[0];\n    if (!firstPacket) {\n      return { headerText: \"Thinking...\", hasPackets, userStopped };\n    }\n\n    const packetType = firstPacket.obj.type;\n\n    // Determine header based on packet type\n    if (packetType === PacketType.SEARCH_TOOL_START) {\n      const searchState = constructCurrentSearchState(\n        currentStep.packets as SearchToolPacket[]\n      );\n      let headerText: string;\n      if (searchState.hasResults && !searchState.isInternetSearch) {\n        headerText = \"Reading\";\n      } else {\n        headerText = searchState.isInternetSearch\n          ? \"Searching the web\"\n          : \"Searching internal documents\";\n      }\n      return { headerText, hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.FETCH_TOOL_START) {\n      return { headerText: \"Reading\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.PYTHON_TOOL_START) {\n      return { headerText: \"Executing code\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.IMAGE_GENERATION_TOOL_START) {\n      return { headerText: \"Generating images\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.FILE_READER_START) {\n      return { headerText: \"Reading file\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.CUSTOM_TOOL_START) {\n      const toolName = (firstPacket.obj as CustomToolStart).tool_name;\n      return {\n        headerText: toolName ? `Executing ${toolName}` : \"Executing tool\",\n        hasPackets,\n        userStopped,\n      };\n    }\n\n    if (\n      packetType === PacketType.MEMORY_TOOL_START ||\n      packetType === PacketType.MEMORY_TOOL_NO_ACCESS\n    ) {\n      return { headerText: \"Updating memory...\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.REASONING_START) {\n      return { headerText: \"Thinking\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.DEEP_RESEARCH_PLAN_START) {\n      return { headerText: \"Generating plan\", hasPackets, userStopped };\n    }\n\n    if (packetType === PacketType.RESEARCH_AGENT_START) {\n      return { headerText: \"Researching\", hasPackets, userStopped };\n    }\n\n    return { headerText: \"Thinking...\", hasPackets, userStopped };\n  }, [turnGroups, stopReason, isGeneratingImage]);\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useTimelineMetrics.ts",
    "content": "import { useMemo } from \"react\";\nimport {\n  TurnGroup,\n  TransformedStep,\n} from \"@/app/app/message/messageComponents/timeline/transformers\";\nimport {\n  isResearchAgentPackets,\n  stepSupportsCollapsedStreaming,\n} from \"@/app/app/message/messageComponents/timeline/packetHelpers\";\n\nexport interface TimelineMetrics {\n  totalSteps: number;\n  isSingleStep: boolean;\n  lastTurnGroup: TurnGroup | undefined;\n  lastStep: TransformedStep | undefined;\n  lastStepIsResearchAgent: boolean;\n  lastStepSupportsCollapsedStreaming: boolean;\n}\n\n/**\n * Memoizes derived metrics from turn groups to avoid recomputation on every render.\n * Single-pass computation where possible for performance with large packet counts.\n */\nexport function useTimelineMetrics(\n  turnGroups: TurnGroup[],\n  userStopped: boolean\n): TimelineMetrics {\n  return useMemo(() => {\n    // Compute in single pass\n    let totalSteps = 0;\n    for (const tg of turnGroups) {\n      totalSteps += tg.steps.length;\n    }\n\n    const lastTurnGroup = turnGroups[turnGroups.length - 1];\n    const lastStep = lastTurnGroup?.steps[lastTurnGroup.steps.length - 1];\n\n    // Analyze last step packets once\n    const lastStepIsResearchAgent = lastStep\n      ? isResearchAgentPackets(lastStep.packets)\n      : false;\n    const lastStepSupportsCollapsedStreaming = lastStep\n      ? stepSupportsCollapsedStreaming(lastStep.packets)\n      : false;\n\n    return {\n      totalSteps,\n      isSingleStep: totalSteps === 1 && !userStopped,\n      lastTurnGroup,\n      lastStep,\n      lastStepIsResearchAgent,\n      lastStepSupportsCollapsedStreaming,\n    };\n  }, [turnGroups, userStopped]);\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useTimelineStepState.ts",
    "content": "import { useMemo } from \"react\";\nimport { MemoryToolPacket } from \"@/app/app/services/streamingModels\";\nimport { TurnGroup } from \"@/app/app/message/messageComponents/timeline/transformers\";\nimport { constructCurrentMemoryState } from \"@/app/app/message/messageComponents/timeline/renderers/memory/memoryStateUtils\";\nimport { isMemoryToolPackets } from \"@/app/app/message/messageComponents/timeline/packetHelpers\";\n\ninterface MemoryStepState {\n  memoryText: string | null;\n  memoryOperation: \"add\" | \"update\" | null;\n  memoryId: number | null;\n  memoryIndex: number | null;\n  isMemoryOnly: boolean;\n}\n\n/**\n * Extracts memory state from the first memory-tool step in turnGroups\n * and determines whether the timeline contains only memory steps.\n */\nexport function useTimelineStepState(turnGroups: TurnGroup[]): MemoryStepState {\n  return useMemo(() => {\n    let memoryText: string | null = null;\n    let memoryOperation: \"add\" | \"update\" | null = null;\n    let memoryId: number | null = null;\n    let memoryIndex: number | null = null;\n    let foundMemory = false;\n\n    let totalSteps = 0;\n    let allMemory = true;\n\n    for (const tg of turnGroups) {\n      for (const step of tg.steps) {\n        totalSteps++;\n        const isMem = isMemoryToolPackets(step.packets);\n\n        if (!isMem) {\n          allMemory = false;\n        }\n\n        if (!foundMemory && isMem) {\n          foundMemory = true;\n          const state = constructCurrentMemoryState(\n            step.packets as unknown as MemoryToolPacket[]\n          );\n          memoryText = state.memoryText;\n          memoryOperation = state.operation;\n          memoryId = state.memoryId;\n          memoryIndex = state.index;\n        }\n      }\n    }\n\n    return {\n      memoryText,\n      memoryOperation,\n      memoryId,\n      memoryIndex,\n      isMemoryOnly: totalSteps > 0 && allMemory,\n    };\n  }, [turnGroups]);\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/hooks/useTimelineUIState.ts",
    "content": "import { useMemo } from \"react\";\nimport { TurnGroup, TransformedStep } from \"../transformers\";\n\n// =============================================================================\n// Timeline UI State Machine\n// =============================================================================\n\nexport enum TimelineUIState {\n  /** No packets yet, showing shimmer */\n  EMPTY = \"EMPTY\",\n  /** Final message only, no timeline */\n  DISPLAY_CONTENT_ONLY = \"DISPLAY_CONTENT_ONLY\",\n  /** Active single tool execution */\n  STREAMING_SEQUENTIAL = \"STREAMING_SEQUENTIAL\",\n  /** Active parallel tool execution */\n  STREAMING_PARALLEL = \"STREAMING_PARALLEL\",\n  /** User cancelled */\n  STOPPED = \"STOPPED\",\n  /** Done, timeline collapsed */\n  COMPLETED_COLLAPSED = \"COMPLETED_COLLAPSED\",\n  /** Done, timeline expanded */\n  COMPLETED_EXPANDED = \"COMPLETED_EXPANDED\",\n}\n\nexport interface TimelineUIStateInput {\n  /** Whether the stop packet has been seen */\n  stopPacketSeen: boolean;\n  /** Whether there are any packets in the timeline */\n  hasPackets: boolean;\n  /** Whether there is display content after timeline */\n  hasDisplayContent: boolean;\n  /** Whether the user stopped the generation */\n  userStopped: boolean;\n  /** Whether the timeline is expanded */\n  isExpanded: boolean;\n  /** The last turn group (for parallel detection) */\n  lastTurnGroup: TurnGroup | undefined;\n  /** The last step */\n  lastStep: TransformedStep | undefined;\n  /** Whether the last step supports collapsed streaming rendering */\n  lastStepSupportsCollapsedStreaming: boolean;\n  /** Whether the last step has renderable collapsed streaming content */\n  lastStepHasCollapsedContent: boolean;\n  /** Whether the last step is a research agent */\n  lastStepIsResearchAgent: boolean;\n  /** Whether the parallel active step supports collapsed streaming rendering */\n  parallelActiveStepSupportsCollapsedStreaming: boolean;\n  /** Whether the parallel active step has renderable collapsed streaming content */\n  parallelActiveStepHasCollapsedContent: boolean;\n  /** Whether image generation is in progress */\n  isGeneratingImage: boolean;\n  /** Whether final answer is coming (MESSAGE_START received) */\n  finalAnswerComing: boolean;\n}\n\nexport interface TimelineUIStateResult {\n  /** The current UI state */\n  uiState: TimelineUIState;\n\n  // Convenience booleans\n  /** Whether actively streaming (tool execution in progress) */\n  isStreaming: boolean;\n  /** Whether completed (stop packet seen) */\n  isCompleted: boolean;\n  /** Whether actively executing tools (streaming without display content, or generating image) */\n  isActivelyExecuting: boolean;\n\n  // Display flags\n  /** Show collapsed compact content for single step */\n  showCollapsedCompact: boolean;\n  /** Show collapsed compact content for parallel tools */\n  showCollapsedParallel: boolean;\n  /** Show parallel tabs in header */\n  showParallelTabs: boolean;\n  /** Show the \"Done\" indicator step in expanded view */\n  showDoneStep: boolean;\n  /** Show the \"Stopped\" indicator step in expanded view */\n  showStoppedStep: boolean;\n  /** For stepIsLast calculation (excludes research agent) */\n  hasDoneIndicator: boolean;\n\n  // Styling flags\n  /** Show tinted background on header */\n  showTintedBackground: boolean;\n  /** Show rounded bottom on header */\n  showRoundedBottom: boolean;\n}\n\n/**\n * Derives the current UI state from timeline inputs.\n * Centralizes all boolean logic for timeline rendering decisions.\n */\nexport function useTimelineUIState(\n  input: TimelineUIStateInput\n): TimelineUIStateResult {\n  return useMemo(() => {\n    const {\n      stopPacketSeen,\n      hasPackets,\n      hasDisplayContent,\n      userStopped,\n      isExpanded,\n      lastTurnGroup,\n      lastStep,\n      lastStepSupportsCollapsedStreaming,\n      lastStepHasCollapsedContent,\n      lastStepIsResearchAgent,\n      parallelActiveStepSupportsCollapsedStreaming,\n      parallelActiveStepHasCollapsedContent,\n      isGeneratingImage,\n      finalAnswerComing,\n    } = input;\n\n    // Derive the primary UI state\n    let uiState: TimelineUIState;\n\n    if (!hasPackets && !hasDisplayContent && !stopPacketSeen) {\n      uiState = TimelineUIState.EMPTY;\n    } else if (hasDisplayContent && !hasPackets && !isGeneratingImage) {\n      uiState = TimelineUIState.DISPLAY_CONTENT_ONLY;\n    } else if (!stopPacketSeen && (!hasDisplayContent || isGeneratingImage)) {\n      // Actively executing tools\n      uiState = lastTurnGroup?.isParallel\n        ? TimelineUIState.STREAMING_PARALLEL\n        : TimelineUIState.STREAMING_SEQUENTIAL;\n    } else if (userStopped) {\n      uiState = TimelineUIState.STOPPED;\n    } else if (isExpanded) {\n      uiState = TimelineUIState.COMPLETED_EXPANDED;\n    } else {\n      uiState = TimelineUIState.COMPLETED_COLLAPSED;\n    }\n\n    // Convenience booleans\n    const isStreaming =\n      uiState === TimelineUIState.STREAMING_SEQUENTIAL ||\n      uiState === TimelineUIState.STREAMING_PARALLEL;\n    const isCompleted =\n      uiState === TimelineUIState.COMPLETED_COLLAPSED ||\n      uiState === TimelineUIState.COMPLETED_EXPANDED ||\n      uiState === TimelineUIState.STOPPED;\n    const isActivelyExecuting =\n      !stopPacketSeen && (!hasDisplayContent || isGeneratingImage);\n\n    // Parallel tabs in header only when collapsed during streaming\n    const showParallelTabs =\n      uiState === TimelineUIState.STREAMING_PARALLEL &&\n      !isExpanded &&\n      !!lastTurnGroup?.isParallel &&\n      (lastTurnGroup?.steps.length ?? 0) > 0;\n\n    // Collapsed streaming: show compact content below header (only during tool execution)\n    const showCollapsedCompact =\n      uiState === TimelineUIState.STREAMING_SEQUENTIAL &&\n      !isExpanded &&\n      !!lastStep &&\n      !lastTurnGroup?.isParallel &&\n      lastStepSupportsCollapsedStreaming &&\n      lastStepHasCollapsedContent;\n\n    // Collapsed parallel streaming content\n    const showCollapsedParallel =\n      showParallelTabs &&\n      !isExpanded &&\n      parallelActiveStepSupportsCollapsedStreaming &&\n      parallelActiveStepHasCollapsedContent;\n\n    // Done step: shown when expanded and completed (either normally or with display content)\n    // Also shown when finalAnswerComing is true (MESSAGE_START received)\n    const showDoneStep =\n      (stopPacketSeen || finalAnswerComing) &&\n      isExpanded &&\n      (!userStopped || hasDisplayContent);\n\n    // Stopped step: shown when user stopped without display content\n    const showStoppedStep =\n      stopPacketSeen && isExpanded && userStopped && !hasDisplayContent;\n\n    // For stepIsLast calculation: done indicator present (excludes research agent)\n    const hasDoneIndicator =\n      (stopPacketSeen || finalAnswerComing) &&\n      isExpanded &&\n      !userStopped &&\n      !lastStepIsResearchAgent;\n\n    // Styling flags\n    const showTintedBackground = isActivelyExecuting || isExpanded;\n    const showRoundedBottom =\n      !isExpanded && !showCollapsedCompact && !showCollapsedParallel;\n\n    return {\n      uiState,\n      isStreaming,\n      isCompleted,\n      isActivelyExecuting,\n      showCollapsedCompact,\n      showCollapsedParallel,\n      showParallelTabs,\n      showDoneStep,\n      showStoppedStep,\n      hasDoneIndicator,\n      showTintedBackground,\n      showRoundedBottom,\n    };\n  }, [input]);\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/packetHelpers.ts",
    "content": "import {\n  CODE_INTERPRETER_TOOL_TYPES,\n  Packet,\n  PacketType,\n  ToolCallArgumentDelta,\n} from \"@/app/app/services/streamingModels\";\n\n// Packet types with renderers supporting collapsed streaming mode.\n// TOOL_CALL_ARGUMENT_DELTA is intentionally excluded here because it requires\n// a tool_type check — it's handled separately in stepSupportsCollapsedStreaming.\nexport const COLLAPSED_STREAMING_PACKET_TYPES = new Set<PacketType>([\n  PacketType.SEARCH_TOOL_START,\n  PacketType.FETCH_TOOL_START,\n  PacketType.PYTHON_TOOL_START,\n  PacketType.CUSTOM_TOOL_START,\n  PacketType.RESEARCH_AGENT_START,\n  PacketType.REASONING_START,\n  PacketType.DEEP_RESEARCH_PLAN_START,\n]);\n\n// Check if packets belong to a research agent (handles its own Done indicator)\nexport const isResearchAgentPackets = (packets: Packet[]): boolean =>\n  packets.some((p) => p.obj.type === PacketType.RESEARCH_AGENT_START);\n\n// Check if packets belong to a search tool\nexport const isSearchToolPackets = (packets: Packet[]): boolean =>\n  packets.some((p) => p.obj.type === PacketType.SEARCH_TOOL_START);\n\n// Check if packets belong to a python tool\nexport const isPythonToolPackets = (packets: Packet[]): boolean =>\n  packets.some(\n    (p) =>\n      p.obj.type === PacketType.PYTHON_TOOL_START ||\n      (p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&\n        (p.obj as ToolCallArgumentDelta).tool_type ===\n          CODE_INTERPRETER_TOOL_TYPES.PYTHON)\n  );\n\n// Check if packets belong to reasoning\nexport const isReasoningPackets = (packets: Packet[]): boolean =>\n  packets.some((p) => p.obj.type === PacketType.REASONING_START);\n\n// Check if step supports collapsed streaming rendering mode\nexport const stepSupportsCollapsedStreaming = (packets: Packet[]): boolean =>\n  packets.some(\n    (p) =>\n      COLLAPSED_STREAMING_PACKET_TYPES.has(p.obj.type as PacketType) ||\n      (p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&\n        (p.obj as ToolCallArgumentDelta).tool_type ===\n          CODE_INTERPRETER_TOOL_TYPES.PYTHON)\n  );\n\n// Check if packets have content worth rendering in collapsed streaming mode.\n// Avoids rendering empty containers when only START packets have arrived.\nexport const stepHasCollapsedStreamingContent = (\n  packets: Packet[]\n): boolean => {\n  const packetTypes = new Set(\n    packets.map((packet) => packet.obj.type as PacketType)\n  );\n\n  // Errors should render even if no deltas arrived\n  if (packetTypes.has(PacketType.ERROR)) {\n    return true;\n  }\n\n  // Search tools need actual query/doc deltas before showing content\n  if (\n    packetTypes.has(PacketType.SEARCH_TOOL_QUERIES_DELTA) ||\n    packetTypes.has(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA)\n  ) {\n    return true;\n  }\n\n  // Fetch tool shows a loading indicator once started\n  if (\n    packetTypes.has(PacketType.FETCH_TOOL_START) ||\n    packetTypes.has(PacketType.FETCH_TOOL_URLS) ||\n    packetTypes.has(PacketType.FETCH_TOOL_DOCUMENTS)\n  ) {\n    return true;\n  }\n\n  // Python tool renders code/output from the start packet onward\n  if (\n    packetTypes.has(PacketType.PYTHON_TOOL_START) ||\n    packetTypes.has(PacketType.PYTHON_TOOL_DELTA) ||\n    packets.some(\n      (p) =>\n        p.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&\n        (p.obj as ToolCallArgumentDelta).tool_type ===\n          CODE_INTERPRETER_TOOL_TYPES.PYTHON\n    )\n  ) {\n    return true;\n  }\n\n  // Custom tool shows running/completed state after start\n  if (\n    packetTypes.has(PacketType.CUSTOM_TOOL_START) ||\n    packetTypes.has(PacketType.CUSTOM_TOOL_DELTA)\n  ) {\n    return true;\n  }\n\n  // Research agent has meaningful content from start (task) or report deltas\n  if (\n    packetTypes.has(PacketType.RESEARCH_AGENT_START) ||\n    packetTypes.has(PacketType.INTERMEDIATE_REPORT_START) ||\n    packetTypes.has(PacketType.INTERMEDIATE_REPORT_DELTA) ||\n    packetTypes.has(PacketType.INTERMEDIATE_REPORT_CITED_DOCS)\n  ) {\n    return true;\n  }\n\n  // Reasoning content only appears in deltas\n  if (packetTypes.has(PacketType.REASONING_DELTA)) {\n    return true;\n  }\n\n  // Deep research plan content only appears in deltas\n  if (packetTypes.has(PacketType.DEEP_RESEARCH_PLAN_DELTA)) {\n    return true;\n  }\n\n  return false;\n};\n\n// Check if packets belong to a deep research plan\nexport const isDeepResearchPlanPackets = (packets: Packet[]): boolean =>\n  packets.some((p) => p.obj.type === PacketType.DEEP_RESEARCH_PLAN_START);\n\n// Check if packets belong to a memory tool\nexport const isMemoryToolPackets = (packets: Packet[]): boolean =>\n  packets.some(\n    (p) =>\n      p.obj.type === PacketType.MEMORY_TOOL_START ||\n      p.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS\n  );\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineHeaderRow.tsx",
    "content": "import React from \"react\";\n\nexport interface TimelineHeaderRowProps {\n  left?: React.ReactNode;\n  children?: React.ReactNode;\n}\n\n/**\n * TimelineHeaderRow aligns the top header (e.g., agent avatar + title row)\n * with the same rail width used by the timeline steps.\n */\nexport function TimelineHeaderRow({ left, children }: TimelineHeaderRowProps) {\n  return (\n    <div className=\"flex w-full h-[var(--timeline-header-row-height)]\">\n      <div className=\"flex items-center justify-center w-[var(--timeline-rail-width)] h-[var(--timeline-header-row-height)]\">\n        {left}\n      </div>\n      <div className=\"flex-1 min-w-0 h-full\">{children}</div>\n    </div>\n  );\n}\n\nexport default TimelineHeaderRow;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineIconColumn.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\n/**\n * TimelineRailVariant controls whether a row shows the rail or only reserves width.\n * - rail: renders icon + connector line.\n * - spacer: keeps column width for alignment, but no rail.\n */\nexport type TimelineRailVariant = \"rail\" | \"spacer\";\n\nexport interface TimelineIconColumnProps {\n  variant?: TimelineRailVariant;\n  isFirst?: boolean;\n  isLast?: boolean;\n  isHover?: boolean;\n  disableTopConnectorHover?: boolean;\n  icon?: React.ReactNode;\n  showIcon?: boolean;\n  /**\n   * Controls the vertical height of the icon row.\n   * - default: uses step header height for normal rows.\n   * - compact: uses first-step spacer height for hidden headers.\n   */\n  iconRowVariant?: \"default\" | \"compact\";\n}\n\n/**\n * TimelineIconColumn renders the left rail (connector + icon).\n * For default rows, icon alignment is tied to step text padding:\n * - icon wrapper stays fixed at 1.25rem\n * - remaining top/bottom header space is filled with connector segments\n */\nexport function TimelineIconColumn({\n  variant = \"rail\",\n  isFirst = false,\n  isLast = false,\n  isHover = false,\n  disableTopConnectorHover = false,\n  icon,\n  showIcon = true,\n  iconRowVariant = \"default\",\n}: TimelineIconColumnProps) {\n  if (variant === \"spacer\") {\n    return <div className=\"w-[var(--timeline-rail-width)]\" />;\n  }\n\n  const connectorColorClass = isHover ? \"bg-border-04\" : \"bg-border-01\";\n  const topConnectorColorClass = disableTopConnectorHover\n    ? \"bg-border-01\"\n    : connectorColorClass;\n\n  return (\n    <div className=\"relative flex flex-col items-center w-[var(--timeline-rail-width)]\">\n      <div\n        className={cn(\n          \"w-full shrink-0 flex flex-col items-center\",\n          iconRowVariant === \"compact\"\n            ? \"h-[var(--timeline-first-top-spacer-height)]\"\n            : \"h-[var(--timeline-step-header-height)]\"\n        )}\n      >\n        {iconRowVariant === \"default\" ? (\n          <>\n            <div\n              className={cn(\n                \"w-px h-[calc(var(--timeline-step-top-padding)*2)]\",\n                !isFirst && topConnectorColorClass\n              )}\n            />\n            <div className=\"h-[var(--timeline-branch-icon-wrapper-size)] w-[var(--timeline-branch-icon-wrapper-size)] shrink-0 flex items-center justify-center\">\n              {showIcon && icon}\n            </div>\n            <div className={cn(\"w-px flex-1\", connectorColorClass)} />\n          </>\n        ) : (\n          <div className={cn(\"w-px flex-1\", !isFirst && connectorColorClass)} />\n        )}\n      </div>\n\n      {!isLast && <div className={cn(\"w-px flex-1\", connectorColorClass)} />}\n    </div>\n  );\n}\n\nexport default TimelineIconColumn;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineRoot.tsx",
    "content": "import React from \"react\";\nimport { getTimelineStyles, TimelineTokens } from \"./tokens\";\n\nexport interface TimelineRootProps {\n  children: React.ReactNode;\n  tokens?: Partial<TimelineTokens>;\n}\n\n/**\n * TimelineRoot provides the shared sizing contract for all timeline primitives.\n * It sets CSS variables derived from TimelineTokens so rail width, header height,\n * and padding stay consistent across the timeline.\n */\nexport function TimelineRoot({ children, tokens }: TimelineRootProps) {\n  return (\n    <div\n      className=\"flex flex-col pl-[var(--timeline-agent-message-padding-left)]\"\n      style={getTimelineStyles(tokens)}\n    >\n      {children}\n    </div>\n  );\n}\n\nexport default TimelineRoot;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineRow.tsx",
    "content": "import React from \"react\";\nimport { TimelineIconColumn, TimelineRailVariant } from \"./TimelineIconColumn\";\n\n/**\n * TimelineRowRailVariant controls how the left column is rendered.\n * - rail: normal icon + connector column.\n * - spacer: empty column that preserves rail width.\n * - none: no left column at all.\n */\nexport type TimelineRowRailVariant = TimelineRailVariant | \"none\";\n\nexport interface TimelineRowProps {\n  railVariant?: TimelineRowRailVariant;\n  icon?: React.ReactNode;\n  showIcon?: boolean;\n  disableTopConnectorHover?: boolean;\n  /**\n   * Controls the height of the icon row within the rail.\n   * Use compact when the header is hidden to keep alignment stable.\n   */\n  iconRowVariant?: \"default\" | \"compact\";\n  isFirst?: boolean;\n  isLast?: boolean;\n  isHover?: boolean;\n  children?: React.ReactNode;\n}\n\n/**\n * TimelineRow composes the rail column + content column.\n * It is the base layout primitive for all timeline rows.\n */\nexport function TimelineRow({\n  railVariant = \"rail\",\n  icon,\n  showIcon = true,\n  disableTopConnectorHover = false,\n  iconRowVariant = \"default\",\n  isFirst = false,\n  isLast = false,\n  isHover = false,\n  children,\n}: TimelineRowProps) {\n  return (\n    <div className=\"flex w-full\">\n      {railVariant !== \"none\" && (\n        <TimelineIconColumn\n          variant={railVariant === \"spacer\" ? \"spacer\" : \"rail\"}\n          icon={icon}\n          showIcon={showIcon}\n          disableTopConnectorHover={disableTopConnectorHover}\n          iconRowVariant={iconRowVariant}\n          isFirst={isFirst}\n          isLast={isLast}\n          isHover={isHover}\n        />\n      )}\n      <div className=\"flex-1 min-w-0\">{children}</div>\n    </div>\n  );\n}\n\nexport default TimelineRow;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineStepContent.tsx",
    "content": "import React, { FunctionComponent } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgFold, SvgExpand, SvgXOctagon } from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { TimelineSurfaceBackground } from \"@/app/app/message/messageComponents/timeline/primitives/TimelineSurface\";\n\nexport interface TimelineStepContentProps {\n  children?: React.ReactNode;\n  header?: React.ReactNode;\n  buttonTitle?: string;\n  isExpanded?: boolean;\n  onToggle?: () => void;\n  collapsible?: boolean;\n  supportsCollapsible?: boolean;\n  hideHeader?: boolean;\n  collapsedIcon?: FunctionComponent<IconProps>;\n  noPaddingRight?: boolean;\n  surfaceBackground?: TimelineSurfaceBackground;\n}\n\n/**\n * TimelineStepContent renders the header row + content body for a step.\n * It is used by StepContainer and by parallel tab content to keep layout consistent.\n */\nexport function TimelineStepContent({\n  children,\n  header,\n  buttonTitle,\n  isExpanded = true,\n  onToggle,\n  collapsible = true,\n  supportsCollapsible = false,\n  hideHeader = false,\n  collapsedIcon: CollapsedIconComponent,\n  noPaddingRight = false,\n  surfaceBackground,\n}: TimelineStepContentProps) {\n  const showCollapseControls = collapsible && supportsCollapsible && onToggle;\n\n  return (\n    <div className=\"flex flex-col px-1 pb-1\">\n      {!hideHeader && header && (\n        <div className=\"flex items-center justify-between h-[var(--timeline-step-header-height)] pl-1\">\n          <div className=\"pt-[var(--timeline-step-top-padding)] pl-[var(--timeline-common-text-padding)] w-full\">\n            <Text as=\"p\" mainUiMuted text04>\n              {header}\n            </Text>\n          </div>\n\n          <div className=\"h-full w-[var(--timeline-step-header-right-section-width)] flex items-center justify-end\">\n            {showCollapseControls ? (\n              buttonTitle ? (\n                <Button\n                  prominence=\"tertiary\"\n                  size=\"md\"\n                  onClick={onToggle}\n                  rightIcon={\n                    isExpanded ? SvgFold : CollapsedIconComponent || SvgExpand\n                  }\n                >\n                  {buttonTitle}\n                </Button>\n              ) : (\n                <Button\n                  prominence=\"tertiary\"\n                  size=\"md\"\n                  onClick={onToggle}\n                  icon={\n                    isExpanded ? SvgFold : CollapsedIconComponent || SvgExpand\n                  }\n                />\n              )\n            ) : surfaceBackground === \"error\" ? (\n              <div className=\"p-1.5\">\n                <SvgXOctagon className=\"h-4 w-4 text-status-error-05\" />\n              </div>\n            ) : null}\n          </div>\n        </div>\n      )}\n\n      {children && (\n        <div\n          className={cn(\n            \"pl-1 pb-1\",\n            !noPaddingRight &&\n              \"pr-[var(--timeline-step-header-right-section-width)]\",\n            hideHeader && \"pt-[var(--timeline-step-top-padding)]\"\n          )}\n        >\n          {children}\n        </div>\n      )}\n    </div>\n  );\n}\n\nexport default TimelineStepContent;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineSurface.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\nexport type TimelineSurfaceBackground = \"tint\" | \"transparent\" | \"error\";\n\nexport interface TimelineSurfaceProps {\n  children: React.ReactNode;\n  className?: string;\n  isHover?: boolean;\n  roundedTop?: boolean;\n  roundedBottom?: boolean;\n  background?: TimelineSurfaceBackground;\n}\n\n/**\n * TimelineSurface provides the shared background + rounded corners for a row.\n * Use it to keep hover and tint behavior consistent across timeline items.\n */\nexport function TimelineSurface({\n  children,\n  className,\n  isHover = false,\n  roundedTop = false,\n  roundedBottom = false,\n  background = \"tint\",\n}: TimelineSurfaceProps) {\n  if (React.Children.count(children) === 0) {\n    return null;\n  }\n\n  const baseBackground =\n    background === \"tint\"\n      ? \"bg-background-tint-00\"\n      : background === \"error\"\n        ? \"bg-status-error-00\"\n        : \"\";\n  const hoverBackground =\n    (background === \"tint\" || background === \"error\") && isHover\n      ? \"bg-background-tint-02\"\n      : \"\";\n\n  return (\n    <div\n      className={cn(\n        \"transition-colors duration-200\",\n        baseBackground,\n        hoverBackground,\n        roundedTop && \"rounded-t-12\",\n        roundedBottom && \"rounded-b-12\",\n        className\n      )}\n    >\n      {children}\n    </div>\n  );\n}\n\nexport default TimelineSurface;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/TimelineTopSpacer.tsx",
    "content": "import React from \"react\";\nimport { TimelineTopSpacerVariant } from \"./tokens\";\n\nexport interface TimelineTopSpacerProps {\n  variant?: TimelineTopSpacerVariant;\n}\n\n/**\n * TimelineTopSpacer creates vertical spacing at the top of a step's content.\n * It mirrors connector spacing when the connector is part of layout flow.\n */\nexport function TimelineTopSpacer({\n  variant = \"default\",\n}: TimelineTopSpacerProps) {\n  if (variant === \"none\") {\n    return null;\n  }\n\n  if (variant === \"first\") {\n    return <div className=\"h-[var(--timeline-first-top-spacer-height)]\" />;\n  }\n\n  return <div className=\"h-[var(--timeline-top-connector-height)]\" />;\n}\n\nexport default TimelineTopSpacer;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/primitives/tokens.ts",
    "content": "import React from \"react\";\n\n/**\n * TimelineTokens define the shared layout contract for timeline primitives.\n * Values are applied as CSS variables via TimelineRoot.\n */\nexport interface TimelineTokens {\n  railWidth: string;\n  headerRowHeight: string;\n  stepHeaderHeight: string;\n  topConnectorHeight: string;\n  firstTopSpacerHeight: string;\n  iconSize: string;\n  branchIconWrapperSize: string;\n  branchIconSize: string;\n  stepHeaderRightSectionWidth: string;\n  headerPaddingLeft: string;\n  headerPaddingRight: string;\n  headerTextPaddingX: string;\n  headerTextPaddingY: string;\n  stepTopPadding: string;\n  agentMessagePaddingLeft: string;\n  timelineCommonTextPadding: string;\n}\n\n/**\n * Controls the top spacer inside TimelineStepContent.\n * - default: reserve space equal to the top connector height.\n * - first: smaller spacer used for the first step.\n * - none: no spacer (use when connector is drawn outside layout flow).\n */\nexport type TimelineTopSpacerVariant = \"default\" | \"first\" | \"none\";\n\n/**\n * Default sizing for the timeline layout. Override in TimelineRoot if needed.\n */\nexport const timelineTokenDefaults: TimelineTokens = {\n  railWidth: \"2.25rem\",\n  headerRowHeight: \"2.25rem\",\n  stepHeaderHeight: \"2rem\",\n  topConnectorHeight: \"0.5rem\",\n  firstTopSpacerHeight: \"0.25rem\",\n  iconSize: \"0.75rem\",\n  branchIconWrapperSize: \"1.25rem\",\n  branchIconSize: \"0.75rem\",\n  stepHeaderRightSectionWidth: \"2.125rem\",\n  headerPaddingLeft: \"0.5rem\",\n  headerPaddingRight: \"0.25rem\",\n  headerTextPaddingX: \"0.375rem\",\n  headerTextPaddingY: \"0.125rem\",\n  stepTopPadding: \"0.25rem\",\n  agentMessagePaddingLeft: \"0.12rem\",\n  timelineCommonTextPadding: \"0.12rem\",\n};\n\n/**\n * Returns CSS variables for timeline layout based on defaults + overrides.\n */\nexport function getTimelineStyles(\n  tokens?: Partial<TimelineTokens>\n): React.CSSProperties {\n  const merged: TimelineTokens = { ...timelineTokenDefaults, ...tokens };\n  return {\n    \"--timeline-rail-width\": merged.railWidth,\n    \"--timeline-header-row-height\": merged.headerRowHeight,\n    \"--timeline-step-header-height\": merged.stepHeaderHeight,\n    \"--timeline-top-connector-height\": merged.topConnectorHeight,\n    \"--timeline-first-top-spacer-height\": merged.firstTopSpacerHeight,\n    \"--timeline-icon-size\": merged.iconSize,\n    \"--timeline-branch-icon-wrapper-size\": merged.branchIconWrapperSize,\n    \"--timeline-branch-icon-size\": merged.branchIconSize,\n    \"--timeline-step-header-right-section-width\":\n      merged.stepHeaderRightSectionWidth,\n    \"--timeline-header-padding-left\": merged.headerPaddingLeft,\n    \"--timeline-header-padding-right\": merged.headerPaddingRight,\n    \"--timeline-header-text-padding-x\": merged.headerTextPaddingX,\n    \"--timeline-header-text-padding-y\": merged.headerTextPaddingY,\n    \"--timeline-step-top-padding\": merged.stepTopPadding,\n    \"--timeline-agent-message-padding-left\": merged.agentMessagePaddingLeft,\n    \"--timeline-common-text-padding\": merged.timelineCommonTextPadding,\n  } as React.CSSProperties;\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/code/PythonToolRenderer.tsx",
    "content": "import { useEffect, useMemo } from \"react\";\nimport {\n  PacketType,\n  PythonToolPacket,\n  PythonToolStart,\n  PythonToolDelta,\n  ToolCallArgumentDelta,\n  SectionEnd,\n  CODE_INTERPRETER_TOOL_TYPES,\n} from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { CodeBlock } from \"@/app/app/message/CodeBlock\";\nimport hljs from \"highlight.js/lib/core\";\nimport python from \"highlight.js/lib/languages/python\";\nimport { SvgTerminal } from \"@opal/icons\";\nimport FadingEdgeContainer from \"@/refresh-components/FadingEdgeContainer\";\n\n// Register Python language for highlighting\nhljs.registerLanguage(\"python\", python);\n\n// Component to render syntax-highlighted Python code\nfunction HighlightedPythonCode({ code }: { code: string }) {\n  const highlightedHtml = useMemo(() => {\n    try {\n      return hljs.highlight(code, { language: \"python\" }).value;\n    } catch {\n      return code;\n    }\n  }, [code]);\n\n  return (\n    <span\n      dangerouslySetInnerHTML={{ __html: highlightedHtml }}\n      className=\"hljs\"\n    />\n  );\n}\n\n// Helper function to construct current Python execution state\nfunction constructCurrentPythonState(packets: PythonToolPacket[]) {\n  // Accumulate streaming code from argument deltas (arrives before PythonToolStart)\n  const streamingCode = packets\n    .filter(\n      (packet) =>\n        packet.obj.type === PacketType.TOOL_CALL_ARGUMENT_DELTA &&\n        (packet.obj as ToolCallArgumentDelta).tool_type ===\n          CODE_INTERPRETER_TOOL_TYPES.PYTHON\n    )\n    .map((packet) =>\n      String((packet.obj as ToolCallArgumentDelta).argument_deltas.code ?? \"\")\n    )\n    .join(\"\");\n  const pythonStart = packets.find(\n    (packet) => packet.obj.type === PacketType.PYTHON_TOOL_START\n  )?.obj as PythonToolStart | null;\n  const pythonDeltas = packets\n    .filter((packet) => packet.obj.type === PacketType.PYTHON_TOOL_DELTA)\n    .map((packet) => packet.obj as PythonToolDelta);\n  const pythonEnd = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.SECTION_END ||\n      packet.obj.type === PacketType.ERROR\n  )?.obj as SectionEnd | null;\n\n  // Use complete code from PythonToolStart if available, else use streamed code.\n  const code = pythonStart?.code || streamingCode;\n  const stdout = pythonDeltas\n    .map((delta) => delta?.stdout || \"\")\n    .filter((s) => s)\n    .join(\"\");\n  const stderr = pythonDeltas\n    .map((delta) => delta?.stderr || \"\")\n    .filter((s) => s)\n    .join(\"\");\n  const fileIds = pythonDeltas.flatMap((delta) => delta?.file_ids || []);\n  const isStreaming = !pythonStart && streamingCode.length > 0;\n  const isExecuting = pythonStart && !pythonEnd;\n  const isComplete = pythonStart && pythonEnd;\n  const hasError = stderr.length > 0;\n\n  return {\n    code,\n    stdout,\n    stderr,\n    fileIds,\n    isStreaming,\n    isExecuting,\n    isComplete,\n    hasError,\n  };\n}\n\nexport const PythonToolRenderer: MessageRenderer<PythonToolPacket, {}> = ({\n  packets,\n  onComplete,\n  renderType,\n  children,\n}) => {\n  const {\n    code,\n    stdout,\n    stderr,\n    fileIds,\n    isStreaming,\n    isExecuting,\n    isComplete,\n    hasError,\n  } = constructCurrentPythonState(packets);\n\n  useEffect(() => {\n    if (isComplete) {\n      onComplete();\n    }\n  }, [isComplete, onComplete]);\n\n  const status = useMemo(() => {\n    if (isStreaming) {\n      return \"Writing code...\";\n    }\n    if (isExecuting) {\n      return \"Executing Python code...\";\n    }\n    if (hasError) {\n      return \"Python execution failed\";\n    }\n    if (isComplete) {\n      return \"Python execution completed\";\n    }\n    return \"Python execution\";\n  }, [isStreaming, isComplete, isExecuting, hasError]);\n\n  // Shared content for all states - used by both FULL and compact modes\n  const content = (\n    <div className=\"flex flex-col mb-1 space-y-2\">\n      {/* Loading indicator when streaming or executing */}\n      {(isStreaming || isExecuting) && (\n        <div className=\"flex items-center gap-2 text-sm text-muted-foreground\">\n          <div className=\"flex gap-0.5\">\n            <div className=\"w-1 h-1 bg-current rounded-full animate-pulse\"></div>\n            <div\n              className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n              style={{ animationDelay: \"0.1s\" }}\n            ></div>\n            <div\n              className=\"w-1 h-1 bg-current rounded-full animate-pulse\"\n              style={{ animationDelay: \"0.2s\" }}\n            ></div>\n          </div>\n          <span>{isStreaming ? \"Writing code...\" : \"Running code...\"}</span>\n        </div>\n      )}\n\n      {/* Code block */}\n      {code && (\n        <div className=\"prose max-w-full\">\n          <CodeBlock className=\"language-python\" codeText={code.trim()}>\n            <HighlightedPythonCode code={code.trim()} />\n          </CodeBlock>\n        </div>\n      )}\n\n      {/* Output */}\n      {stdout && (\n        <div className=\"rounded-md bg-background-neutral-02 p-3\">\n          <div className=\"text-xs font-semibold mb-1 text-text-03\">Output:</div>\n          <pre className=\"text-sm whitespace-pre-wrap font-mono text-text-01 overflow-x-auto\">\n            {stdout}\n          </pre>\n        </div>\n      )}\n\n      {/* Error */}\n      {stderr && (\n        <div className=\"rounded-md bg-status-error-01 p-3 border border-status-error-02\">\n          <div className=\"text-xs font-semibold mb-1 text-status-error-05\">\n            Error:\n          </div>\n          <pre className=\"text-sm whitespace-pre-wrap font-mono text-status-error-05 overflow-x-auto\">\n            {stderr}\n          </pre>\n        </div>\n      )}\n\n      {/* File count */}\n      {fileIds.length > 0 && (\n        <div className=\"text-sm text-text-03\">\n          Generated {fileIds.length} file{fileIds.length !== 1 ? \"s\" : \"\"}\n        </div>\n      )}\n\n      {/* No output fallback - only when complete with no output */}\n      {isComplete && !stdout && !stderr && (\n        <div className=\"py-2 text-center text-text-04\">\n          <SvgTerminal className=\"w-4 h-4 mx-auto mb-1 opacity-50\" />\n          <p className=\"text-xs\">No output</p>\n        </div>\n      )}\n    </div>\n  );\n\n  // FULL mode: render content directly\n  if (renderType === RenderType.FULL) {\n    return children([\n      {\n        icon: SvgTerminal,\n        status,\n        content,\n        supportsCollapsible: true,\n        alwaysCollapsible: true,\n      },\n    ]);\n  }\n\n  // Compact mode: wrap content in FadeDiv\n  return children([\n    {\n      icon: SvgTerminal,\n      status,\n      supportsCollapsible: true,\n      alwaysCollapsible: true,\n      content: (\n        <FadingEdgeContainer\n          direction=\"bottom\"\n          className=\"max-h-24 overflow-hidden\"\n        >\n          {content}\n        </FadingEdgeContainer>\n      ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/deepresearch/DeepResearchPlanRenderer.tsx",
    "content": "import React, { useCallback, useMemo } from \"react\";\nimport { SvgCircle } from \"@opal/icons\";\n\nimport {\n  DeepResearchPlanPacket,\n  PacketType,\n} from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  FullChatState,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\nimport ExpandableTextDisplay from \"@/refresh-components/texts/ExpandableTextDisplay\";\nimport {\n  mutedTextMarkdownComponents,\n  collapsedMarkdownComponents,\n} from \"@/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents\";\n\n/**\n * Renderer for deep research plan packets.\n * Streams the research plan content with a list icon.\n */\nexport const DeepResearchPlanRenderer: MessageRenderer<\n  DeepResearchPlanPacket,\n  FullChatState\n> = ({ packets, stopPacketSeen, children }) => {\n  const isComplete = packets.some((p) => p.obj.type === PacketType.SECTION_END);\n\n  const fullContent = useMemo(\n    () =>\n      packets\n        .map((packet) => {\n          if (packet.obj.type === PacketType.DEEP_RESEARCH_PLAN_DELTA) {\n            return packet.obj.content;\n          }\n          return \"\";\n        })\n        .join(\"\"),\n    [packets]\n  );\n\n  const statusText = isComplete ? \"Generated plan\" : \"Generating plan\";\n\n  // Markdown renderer callback for ExpandableTextDisplay\n  // Uses collapsed components (no spacing) in collapsed view, normal spacing in expanded modal\n  const renderMarkdown = useCallback(\n    (text: string, isExpanded: boolean) => (\n      <MinimalMarkdown\n        content={text}\n        components={\n          isExpanded ? mutedTextMarkdownComponents : collapsedMarkdownComponents\n        }\n      />\n    ),\n    []\n  );\n\n  const planContent = (\n    <ExpandableTextDisplay\n      title=\"Research Plan\"\n      content={fullContent}\n      renderContent={renderMarkdown}\n      isStreaming={!isComplete}\n    />\n  );\n\n  return children([\n    {\n      icon: SvgCircle,\n      status: statusText,\n      content: planContent,\n      noPaddingRight: true,\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/deepresearch/ResearchAgentRenderer.tsx",
    "content": "import React, { useMemo, useCallback } from \"react\";\nimport { SvgCircle, SvgCheckCircle, SvgBookOpen } from \"@opal/icons\";\n\nimport {\n  PacketType,\n  Packet,\n  ResearchAgentPacket,\n  ResearchAgentStart,\n  IntermediateReportDelta,\n} from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  FullChatState,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { getToolName } from \"@/app/app/message/messageComponents/toolDisplayHelpers\";\nimport { StepContainer } from \"@/app/app/message/messageComponents/timeline/StepContainer\";\nimport {\n  TimelineRendererComponent,\n  TimelineRendererOutput,\n} from \"@/app/app/message/messageComponents/timeline/TimelineRendererComponent\";\nimport { TimelineStepComposer } from \"@/app/app/message/messageComponents/timeline/TimelineStepComposer\";\nimport ExpandableTextDisplay from \"@/refresh-components/texts/ExpandableTextDisplay\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  processContent,\n  useMarkdownComponents,\n  renderMarkdown,\n} from \"@/app/app/message/messageComponents/markdownUtils\";\n\ninterface NestedToolGroup {\n  sub_turn_index: number;\n  toolType: string;\n  status: string;\n  isComplete: boolean;\n  packets: Packet[];\n}\n\n/**\n * ResearchAgentRenderer - Renders research agent steps in deep research\n *\n * Segregates packets by tool and uses StepContainer + TimelineRendererComponent.\n *\n * RenderType modes:\n * - FULL: Shows all nested tool groups, research task, and report. Headers passed as `status` prop.\n *         Used when step is expanded in timeline.\n * - COMPACT: Shows only the latest active item (tool or report). Header passed as `status` prop.\n *            Used when step is collapsed in timeline, still wrapped in StepContainer.\n * - HIGHLIGHT: Shows only the latest active item with header embedded directly in content.\n *              No StepContainer wrapper. Used for parallel streaming preview.\n *              Nested tools are rendered with HIGHLIGHT mode recursively.\n */\nexport const ResearchAgentRenderer: MessageRenderer<\n  ResearchAgentPacket,\n  FullChatState\n> = ({\n  packets,\n  state,\n  onComplete,\n  renderType,\n  stopPacketSeen,\n  isLastStep = true,\n  isHover = false,\n  children,\n}) => {\n  // Extract the research task from the start packet\n  const startPacket = packets.find(\n    (p) => p.obj.type === PacketType.RESEARCH_AGENT_START\n  );\n  const researchTask = startPacket\n    ? (startPacket.obj as ResearchAgentStart).research_task\n    : \"\";\n\n  // Separate parent packets from nested tool packets\n  const { parentPackets, nestedToolGroups } = useMemo(() => {\n    const parent: Packet[] = [];\n    const nestedBySubTurn = new Map<number, Packet[]>();\n\n    packets.forEach((packet) => {\n      const subTurnIndex = packet.placement.sub_turn_index;\n      if (subTurnIndex === undefined || subTurnIndex === null) {\n        parent.push(packet);\n      } else {\n        if (!nestedBySubTurn.has(subTurnIndex)) {\n          nestedBySubTurn.set(subTurnIndex, []);\n        }\n        nestedBySubTurn.get(subTurnIndex)!.push(packet);\n      }\n    });\n\n    // Convert nested packets to groups with metadata\n    const groups: NestedToolGroup[] = Array.from(nestedBySubTurn.entries())\n      .sort(([a], [b]) => a - b)\n      .map(([subTurnIndex, toolPackets]) => {\n        const name = getToolName(toolPackets);\n        const isComplete = toolPackets.some(\n          (p) =>\n            p.obj.type === PacketType.SECTION_END ||\n            p.obj.type === PacketType.REASONING_DONE\n        );\n        return {\n          sub_turn_index: subTurnIndex,\n          toolType: name,\n          status: isComplete ? \"Complete\" : \"Running\",\n          isComplete,\n          packets: toolPackets,\n        };\n      });\n\n    return { parentPackets: parent, nestedToolGroups: groups };\n  }, [packets]);\n\n  // Filter nested tool groups based on renderType (COMPACT and HIGHLIGHT show only latest)\n  const visibleNestedToolGroups = useMemo(() => {\n    if (\n      (renderType !== RenderType.COMPACT &&\n        renderType !== RenderType.HIGHLIGHT) ||\n      nestedToolGroups.length === 0\n    ) {\n      return nestedToolGroups;\n    }\n    // COMPACT/HIGHLIGHT mode: show only the latest group (last in sorted array)\n    const latestGroup = nestedToolGroups[nestedToolGroups.length - 1];\n    return latestGroup ? [latestGroup] : [];\n  }, [renderType, nestedToolGroups]);\n\n  // Check completion from parent packets\n  const isComplete = parentPackets.some(\n    (p) => p.obj.type === PacketType.SECTION_END\n  );\n\n  // Determine if report is actively streaming\n  const isReportStreaming = !isComplete && !stopPacketSeen;\n\n  // Build report content from parent packets\n  const fullReportContent = parentPackets\n    .map((packet) => {\n      if (packet.obj.type === PacketType.INTERMEDIATE_REPORT_DELTA) {\n        return (packet.obj as IntermediateReportDelta).content;\n      }\n      return \"\";\n    })\n    .join(\"\");\n\n  // Condensed modes: show only the currently active/streaming section\n  const isCompact = renderType === RenderType.COMPACT;\n  const isHighlight = renderType === RenderType.HIGHLIGHT;\n  const isCondensedMode = isCompact || isHighlight;\n  // Report takes priority if it has content (means tools are done, report is streaming)\n  const showOnlyReport =\n    isCondensedMode && fullReportContent && visibleNestedToolGroups.length > 0;\n  const showOnlyTools =\n    isCondensedMode && !fullReportContent && visibleNestedToolGroups.length > 0;\n\n  // Process content once for consistent markdown handling\n  // This ensures code block extraction uses the same offsets as rendered content\n  const processedReportContent = useMemo(\n    () => processContent(fullReportContent),\n    [fullReportContent]\n  );\n\n  // Get markdown components for rendering (stable across renders)\n  // Uses processed content so code block extraction offsets match rendered content\n  const markdownComponents = useMarkdownComponents(\n    state,\n    processedReportContent,\n    \"text-text-03 font-main-ui-body\"\n  );\n\n  // Stable callbacks to avoid creating new functions on every render\n  // renderReport renders the processed content\n  // Uses pre-computed processedReportContent since ExpandableTextDisplay\n  // passes the same fullReportContent that we processed above\n  // Parameters are required by ExpandableTextDisplay interface but we use\n  // the pre-processed content to ensure offsets match code block extraction\n  const renderReport = useCallback(\n    (_content: string, _isExpanded?: boolean) =>\n      renderMarkdown(\n        processedReportContent,\n        markdownComponents,\n        \"text-text-03 font-main-ui-body\"\n      ),\n    [processedReportContent, markdownComponents]\n  );\n\n  // HIGHLIGHT mode: return raw content with header embedded in content\n  if (isHighlight) {\n    if (showOnlyReport) {\n      return children([\n        {\n          icon: null,\n          status: null,\n          content: (\n            <div className=\"flex flex-col pl-[var(--timeline-common-text-padding)]\">\n              <Text as=\"p\" text04 mainUiMuted className=\"mb-1\">\n                Research Report\n              </Text>\n              <ExpandableTextDisplay\n                title=\"Research Report\"\n                content={fullReportContent}\n                maxLines={5}\n                renderContent={renderReport}\n                isStreaming={isReportStreaming}\n              />\n            </div>\n          ),\n          supportsCollapsible: true,\n          timelineLayout: \"content\",\n        },\n      ]);\n    }\n\n    if (showOnlyTools) {\n      const latestGroup = visibleNestedToolGroups[0];\n      if (latestGroup) {\n        return (\n          <TimelineRendererComponent\n            key={latestGroup.sub_turn_index}\n            packets={latestGroup.packets}\n            chatState={state}\n            animate={!stopPacketSeen && !latestGroup.isComplete}\n            stopPacketSeen={stopPacketSeen}\n            defaultExpanded={false}\n            renderTypeOverride={RenderType.HIGHLIGHT}\n            isLastStep={true}\n            isHover={isHover}\n          >\n            {(results: TimelineRendererOutput) =>\n              children([\n                {\n                  icon: null,\n                  status: null,\n                  content: (\n                    <>\n                      {results.map((result, index) => (\n                        <React.Fragment key={index}>\n                          {result.content}\n                        </React.Fragment>\n                      ))}\n                    </>\n                  ),\n                  supportsCollapsible: true,\n                  timelineLayout: \"content\",\n                },\n              ])\n            }\n          </TimelineRendererComponent>\n        );\n      }\n    }\n\n    // Fallback: research task with header embedded\n    if (researchTask) {\n      return children([\n        {\n          icon: null,\n          status: null,\n          content: (\n            <div className=\"flex flex-col pl-[var(--timeline-common-text-padding)]\">\n              <Text as=\"p\" text04 mainUiMuted>\n                Research Task\n              </Text>\n              <Text as=\"p\" text03 mainUiMuted>\n                {researchTask}\n              </Text>\n            </div>\n          ),\n          supportsCollapsible: true,\n          timelineLayout: \"content\",\n        },\n      ]);\n    }\n\n    return children([\n      {\n        icon: null,\n        status: null,\n        content: <></>,\n        supportsCollapsible: true,\n        timelineLayout: \"content\",\n      },\n    ]);\n  }\n\n  // Build content using StepContainer pattern\n  const researchAgentContent = (\n    <div className=\"flex flex-col\">\n      {/* Research Task - hidden in compact mode when tools/report are active */}\n      {researchTask && !showOnlyReport && !showOnlyTools && (\n        <StepContainer\n          stepIcon={SvgCircle}\n          header=\"Research Task\"\n          collapsible={true}\n          isLastStep={\n            !stopPacketSeen &&\n            nestedToolGroups.length === 0 &&\n            !fullReportContent &&\n            !isComplete\n          }\n          isHover={isHover}\n        >\n          <div className=\"pl-[var(--timeline-common-text-padding)]\">\n            <Text as=\"p\" text02 mainUiMuted>\n              {researchTask}\n            </Text>\n          </div>\n        </StepContainer>\n      )}\n\n      {/* Nested tool calls - hidden when report is streaming in compact mode */}\n      {!showOnlyReport &&\n        visibleNestedToolGroups.map((group, index) => {\n          const isLastNestedStep =\n            !stopPacketSeen &&\n            index === visibleNestedToolGroups.length - 1 &&\n            !fullReportContent &&\n            !isComplete;\n\n          return (\n            <TimelineRendererComponent\n              key={group.sub_turn_index}\n              packets={group.packets}\n              chatState={state}\n              animate={!stopPacketSeen && !group.isComplete}\n              stopPacketSeen={stopPacketSeen}\n              defaultExpanded={true}\n              isLastStep={isLastNestedStep}\n              isHover={isHover}\n            >\n              {(results: TimelineRendererOutput) => (\n                <TimelineStepComposer\n                  results={results}\n                  isLastStep={isLastNestedStep}\n                  isFirstStep={!researchTask && index === 0}\n                  isSingleStep={false}\n                  collapsible={true}\n                />\n              )}\n            </TimelineRendererComponent>\n          );\n        })}\n\n      {/* Intermediate report - hidden when tools are active in compact mode */}\n      {fullReportContent && !showOnlyTools && (\n        <StepContainer\n          stepIcon={SvgBookOpen}\n          header=\"Research Report\"\n          isLastStep={!stopPacketSeen && !isComplete}\n          isFirstStep={!researchTask && nestedToolGroups.length === 0}\n          isHover={isHover}\n          noPaddingRight={true}\n        >\n          <div className=\"pl-[var(--timeline-common-text-padding)]\">\n            <ExpandableTextDisplay\n              title=\"Research Report\"\n              content={fullReportContent}\n              renderContent={renderReport}\n              isStreaming={isReportStreaming}\n            />\n          </div>\n        </StepContainer>\n      )}\n    </div>\n  );\n\n  // Return simplified result (no icon, no status)\n  return children([\n    {\n      icon: null,\n      status: null,\n      content: researchAgentContent,\n      supportsCollapsible: true,\n      timelineLayout: \"content\",\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/fetch/FetchToolRenderer.tsx",
    "content": "import { FetchToolPacket } from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { BlinkingBar } from \"@/app/app/message/BlinkingBar\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SearchChipList, SourceInfo } from \"../search/SearchChipList\";\nimport { getMetadataTags } from \"../search/searchStateUtils\";\nimport {\n  constructCurrentFetchState,\n  INITIAL_URLS_TO_SHOW,\n  URLS_PER_EXPANSION,\n} from \"./fetchStateUtils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgCircle } from \"@opal/icons\";\n\nconst urlToSourceInfo = (url: string, index: number): SourceInfo => ({\n  id: `url-${index}`,\n  title: url,\n  sourceType: ValidSources.Web,\n  sourceUrl: url,\n});\n\nconst documentToSourceInfo = (doc: OnyxDocument): SourceInfo => ({\n  id: doc.document_id,\n  title: doc.semantic_identifier || doc.link || \"\",\n  sourceType: doc.source_type || ValidSources.Web,\n  sourceUrl: doc.link,\n  description: doc.blurb,\n  metadata: {\n    date: doc.updated_at || undefined,\n    tags: getMetadataTags(doc.metadata),\n  },\n});\n\n/**\n * FetchToolRenderer - Renders URL fetch/open tool execution steps\n *\n * RenderType modes:\n * - FULL: Shows all details (URLs being opened + reading). Header passed as `status` prop.\n *         Used when step is expanded in timeline.\n * - COMPACT: Shows only reading (no URL list). Header passed as `status` prop.\n *            Used when step is collapsed in timeline, still wrapped in StepContainer.\n * - HIGHLIGHT: Shows URL list with header embedded directly in content.\n *              No StepContainer wrapper. Used for parallel streaming preview.\n */\nexport const FetchToolRenderer: MessageRenderer<FetchToolPacket, {}> = ({\n  packets,\n  onComplete,\n  animate,\n  stopPacketSeen,\n  renderType,\n  children,\n}) => {\n  const fetchState = constructCurrentFetchState(packets);\n  const { urls, documents, hasStarted, isLoading, isComplete } = fetchState;\n  const isCompact = renderType === RenderType.COMPACT;\n  const isHighlight = renderType === RenderType.HIGHLIGHT;\n\n  if (!hasStarted) {\n    return children([\n      {\n        icon: SvgCircle,\n        status: \"Reading\",\n        content: <div />,\n        supportsCollapsible: false,\n        timelineLayout: \"timeline\",\n      },\n    ]);\n  }\n\n  const displayDocuments = documents.length > 0;\n  const displayUrls = !displayDocuments && isComplete && urls.length > 0;\n\n  // HIGHLIGHT mode: header embedded in content, no StepContainer\n  if (isHighlight) {\n    return children([\n      {\n        icon: null,\n        status: null,\n        supportsCollapsible: false,\n        timelineLayout: \"content\",\n        content: (\n          <div className=\"flex flex-col\">\n            <Text as=\"p\" text02 className=\"text-sm mb-1\">\n              Reading\n            </Text>\n            {displayDocuments ? (\n              <SearchChipList\n                items={documents}\n                initialCount={INITIAL_URLS_TO_SHOW}\n                expansionCount={URLS_PER_EXPANSION}\n                getKey={(doc: OnyxDocument) => doc.document_id}\n                toSourceInfo={(doc: OnyxDocument) => documentToSourceInfo(doc)}\n                onClick={(doc: OnyxDocument) => {\n                  if (doc.link) window.open(doc.link, \"_blank\");\n                }}\n                emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n              />\n            ) : displayUrls ? (\n              <SearchChipList\n                items={urls}\n                initialCount={INITIAL_URLS_TO_SHOW}\n                expansionCount={URLS_PER_EXPANSION}\n                getKey={(url: string) => url}\n                toSourceInfo={urlToSourceInfo}\n                onClick={(url: string) => window.open(url, \"_blank\")}\n                emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n              />\n            ) : (\n              !stopPacketSeen && <BlinkingBar />\n            )}\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  return children([\n    {\n      icon: SvgCircle,\n      status: \"Reading\",\n      supportsCollapsible: false,\n      timelineLayout: \"timeline\",\n      content: (\n        <div className=\"flex flex-col\">\n          {displayDocuments ? (\n            <SearchChipList\n              items={documents}\n              initialCount={INITIAL_URLS_TO_SHOW}\n              expansionCount={URLS_PER_EXPANSION}\n              getKey={(doc: OnyxDocument) => doc.document_id}\n              toSourceInfo={(doc: OnyxDocument) => documentToSourceInfo(doc)}\n              onClick={(doc: OnyxDocument) => {\n                if (doc.link) window.open(doc.link, \"_blank\");\n              }}\n              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n            />\n          ) : displayUrls ? (\n            <SearchChipList\n              items={urls}\n              initialCount={INITIAL_URLS_TO_SHOW}\n              expansionCount={URLS_PER_EXPANSION}\n              getKey={(url: string) => url}\n              toSourceInfo={urlToSourceInfo}\n              onClick={(url: string) => window.open(url, \"_blank\")}\n              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n            />\n          ) : (\n            <div className=\"flex flex-wrap gap-x-2 gap-y-2 ml-1\">\n              {!stopPacketSeen && <BlinkingBar />}\n            </div>\n          )}\n        </div>\n      ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/fetch/fetchStateUtils.ts",
    "content": "import {\n  PacketType,\n  FetchToolPacket,\n  FetchToolUrls,\n  FetchToolDocuments,\n} from \"@/app/app/services/streamingModels\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\n\nexport const INITIAL_URLS_TO_SHOW = 3;\nexport const URLS_PER_EXPANSION = 5;\nexport const READING_MIN_DURATION_MS = 1000;\nexport const READ_MIN_DURATION_MS = 1000;\n\nexport interface FetchState {\n  urls: string[];\n  documents: OnyxDocument[];\n  hasStarted: boolean;\n  isLoading: boolean;\n  isComplete: boolean;\n}\n\n/** Constructs the current fetch state from fetch tool packets. */\nexport const constructCurrentFetchState = (\n  packets: FetchToolPacket[]\n): FetchState => {\n  const startPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.FETCH_TOOL_START\n  );\n  const urlsPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.FETCH_TOOL_URLS\n  )?.obj as FetchToolUrls | undefined;\n  const documentsPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.FETCH_TOOL_DOCUMENTS\n  )?.obj as FetchToolDocuments | undefined;\n  const sectionEnd = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.SECTION_END ||\n      packet.obj.type === PacketType.ERROR\n  );\n\n  const urls = urlsPacket?.urls || [];\n  const documents = documentsPacket?.documents || [];\n  const hasStarted = Boolean(startPacket);\n  const isLoading = hasStarted && !documentsPacket;\n  const isComplete = Boolean(startPacket && sectionEnd);\n\n  return { urls, documents, hasStarted, isLoading, isComplete };\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/filereader/FileReaderToolRenderer.tsx",
    "content": "import { useEffect } from \"react\";\nimport { SvgFileText } from \"@opal/icons\";\nimport {\n  PacketType,\n  FileReaderToolPacket,\n  FileReaderResult,\n} from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { BlinkingBar } from \"@/app/app/message/BlinkingBar\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface FileReaderState {\n  fileName: string | null;\n  fileId: string | null;\n  startChar: number;\n  endChar: number;\n  totalChars: number;\n  previewStart: string;\n  previewEnd: string;\n  isReading: boolean;\n  isComplete: boolean;\n}\n\nfunction constructFileReaderState(\n  packets: FileReaderToolPacket[]\n): FileReaderState {\n  const result = packets.find(\n    (p) => p.obj.type === PacketType.FILE_READER_RESULT\n  )?.obj as FileReaderResult | null;\n\n  const hasStart = packets.some(\n    (p) => p.obj.type === PacketType.FILE_READER_START\n  );\n  const hasEnd = packets.some(\n    (p) =>\n      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR\n  );\n\n  return {\n    fileName: result?.file_name ?? null,\n    fileId: result?.file_id ?? null,\n    startChar: result?.start_char ?? 0,\n    endChar: result?.end_char ?? 0,\n    totalChars: result?.total_chars ?? 0,\n    previewStart: result?.preview_start ?? \"\",\n    previewEnd: result?.preview_end ?? \"\",\n    isReading: hasStart && !hasEnd,\n    isComplete: hasStart && hasEnd,\n  };\n}\n\nfunction formatCharRange(\n  startChar: number,\n  endChar: number,\n  totalChars: number\n): string {\n  return `chars ${startChar.toLocaleString()}\\u2013${endChar.toLocaleString()} of ${totalChars.toLocaleString()}`;\n}\n\nexport const FileReaderToolRenderer: MessageRenderer<\n  FileReaderToolPacket,\n  {}\n> = ({ packets, onComplete, stopPacketSeen, renderType, children }) => {\n  const state = constructFileReaderState(packets);\n\n  useEffect(() => {\n    if (state.isComplete) {\n      onComplete();\n    }\n  }, [state.isComplete, onComplete]);\n\n  const statusText = state.fileName\n    ? `Read ${state.fileName} (${formatCharRange(\n        state.startChar,\n        state.endChar,\n        state.totalChars\n      )})`\n    : \"Reading file\";\n\n  const isCompact = renderType === RenderType.COMPACT;\n\n  if (isCompact) {\n    return children([\n      {\n        icon: SvgFileText,\n        status: statusText,\n        supportsCollapsible: true,\n        timelineLayout: \"timeline\",\n        content: <></>,\n      },\n    ]);\n  }\n\n  const hasPreview = state.previewStart || state.previewEnd;\n\n  return children([\n    {\n      icon: SvgFileText,\n      status: statusText,\n      supportsCollapsible: true,\n      timelineLayout: \"timeline\",\n      content: (\n        <Section gap={0.5} alignItems=\"start\" height=\"fit\">\n          {state.fileName ? (\n            <>\n              <Section\n                flexDirection=\"row\"\n                alignItems=\"center\"\n                justifyContent=\"start\"\n                gap={0.5}\n                height=\"fit\"\n              >\n                <Text as=\"span\" mainUiAction text02>\n                  {state.fileName}\n                </Text>\n                <Text as=\"span\" mainUiMuted text04>\n                  {formatCharRange(\n                    state.startChar,\n                    state.endChar,\n                    state.totalChars\n                  )}\n                </Text>\n              </Section>\n              {hasPreview && (\n                <Card variant=\"secondary\" padding={0.5} gap={0.25}>\n                  <Text as=\"span\" secondaryMono text04>\n                    {state.previewStart}\n                    {state.previewEnd && \"\\u2026\"}\n                  </Text>\n                  {state.previewEnd && (\n                    <Text as=\"span\" secondaryMono text04>\n                      {\"\\u2026\"}\n                      {state.previewEnd}\n                    </Text>\n                  )}\n                </Card>\n              )}\n            </>\n          ) : (\n            !stopPacketSeen && <BlinkingBar />\n          )}\n        </Section>\n      ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/memory/MemoryToolRenderer.tsx",
    "content": "\"use client\";\n\nimport { MemoryToolPacket } from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { BlinkingBar } from \"@/app/app/message/BlinkingBar\";\nimport { constructCurrentMemoryState } from \"./memoryStateUtils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgEditBig, SvgMaximize2 } from \"@opal/icons\";\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@opal/components\";\nimport MemoriesModal from \"@/refresh-components/modals/MemoriesModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\n\n/**\n * MemoryToolRenderer - Renders memory tool execution steps\n *\n * States:\n * - Loading (start, no delta): \"Saving memory...\" with BlinkingBar\n * - Delta received: operation label + memory text\n * - Complete (SectionEnd): \"Memory saved\" / \"Memory updated\" + memory text\n * - No Access: \"Memory tool disabled\"\n */\nexport const MemoryToolRenderer: MessageRenderer<MemoryToolPacket, {}> = ({\n  packets,\n  stopPacketSeen,\n  renderType,\n  children,\n}) => {\n  const memoryState = constructCurrentMemoryState(packets);\n  const {\n    hasStarted,\n    noAccess,\n    memoryText,\n    operation,\n    isComplete,\n    memoryId,\n    index,\n  } = memoryState;\n  const memoriesModal = useCreateModal();\n  const isHighlight = renderType === RenderType.HIGHLIGHT;\n\n  if (!hasStarted) {\n    return children([\n      {\n        icon: SvgEditBig,\n        status: \"Memory\",\n        content: <div />,\n        supportsCollapsible: false,\n        timelineLayout: \"timeline\",\n        noPaddingRight: true,\n      },\n    ]);\n  }\n\n  // No access case\n  if (noAccess) {\n    const content = (\n      <Text as=\"p\" text03 className=\"text-sm\">\n        Memory tool disabled\n      </Text>\n    );\n\n    if (isHighlight) {\n      return children([\n        {\n          icon: null,\n          status: null,\n          supportsCollapsible: false,\n          timelineLayout: \"content\",\n          content: (\n            <div className=\"flex flex-col\">\n              <Text as=\"p\" text02 className=\"text-sm mb-1\">\n                Memory\n              </Text>\n              {content}\n            </div>\n          ),\n        },\n      ]);\n    }\n\n    return children([\n      {\n        icon: SvgEditBig,\n        status: \"Memory\",\n        supportsCollapsible: false,\n        timelineLayout: \"timeline\",\n        noPaddingRight: true,\n        content,\n      },\n    ]);\n  }\n\n  // Determine status text\n  let statusLabel = \"Updating memory\";\n\n  const memoryContent = (\n    <div className=\"flex flex-col\">\n      <memoriesModal.Provider>\n        <MemoriesModal\n          initialTargetMemoryId={memoryId}\n          initialTargetIndex={index}\n          highlightOnOpen\n        />\n      </memoriesModal.Provider>\n      {memoryText ? (\n        <div className={cn(\"w-full flex\")}>\n          <div className=\"flex-1 min-w-0\">\n            <Text as=\"p\" text02 className=\"text-sm break-words\">\n              {memoryText}\n            </Text>\n          </div>\n          {/* Expand button */}\n          <div className=\"flex justify-end items-end mt-1 w-8\">\n            <Button\n              prominence=\"tertiary\"\n              size=\"md\"\n              icon={SvgMaximize2}\n              tooltip=\"View Memories\"\n              onClick={(e) => {\n                e.stopPropagation();\n                memoriesModal.toggle(true);\n              }}\n            />\n          </div>\n        </div>\n      ) : (\n        !stopPacketSeen && <BlinkingBar />\n      )}\n    </div>\n  );\n\n  if (isHighlight) {\n    return children([\n      {\n        icon: null,\n        status: null,\n        supportsCollapsible: false,\n        timelineLayout: \"content\",\n        content: (\n          <div className=\"flex flex-col\">\n            <Text as=\"p\" text02 className=\"text-sm mb-1\">\n              {statusLabel}\n            </Text>\n            {memoryContent}\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  return children([\n    {\n      icon: SvgEditBig,\n      status: statusLabel,\n      supportsCollapsible: false,\n      timelineLayout: \"timeline\",\n      noPaddingRight: true,\n      content: memoryContent,\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/memory/memoryStateUtils.ts",
    "content": "import {\n  PacketType,\n  MemoryToolPacket,\n  MemoryToolDelta,\n} from \"@/app/app/services/streamingModels\";\n\nexport interface MemoryState {\n  hasStarted: boolean;\n  noAccess: boolean;\n  memoryText: string | null;\n  operation: \"add\" | \"update\" | null;\n  memoryId: number | null;\n  index: number | null;\n  isComplete: boolean;\n}\n\n/** Constructs the current memory state from memory tool packets. */\nexport function constructCurrentMemoryState(\n  packets: MemoryToolPacket[]\n): MemoryState {\n  const startPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_START\n  );\n  const noAccessPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_NO_ACCESS\n  );\n  const deltaPacket = packets.find(\n    (packet) => packet.obj.type === PacketType.MEMORY_TOOL_DELTA\n  )?.obj as MemoryToolDelta | undefined;\n  const sectionEnd = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.SECTION_END ||\n      packet.obj.type === PacketType.ERROR\n  );\n\n  const hasStarted = Boolean(startPacket || noAccessPacket);\n  const noAccess = Boolean(noAccessPacket);\n  const memoryText = deltaPacket?.memory_text ?? null;\n  const operation = deltaPacket?.operation ?? null;\n  const memoryId = deltaPacket?.memory_id ?? null;\n  const index = deltaPacket?.index ?? null;\n  const isComplete = Boolean(sectionEnd);\n\n  return {\n    hasStarted,\n    noAccess,\n    memoryText,\n    operation,\n    memoryId,\n    index,\n    isComplete,\n  };\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/reasoning/ReasoningRenderer.tsx",
    "content": "import React, {\n  useCallback,\n  useEffect,\n  useMemo,\n  useRef,\n  useState,\n} from \"react\";\n\nimport {\n  PacketType,\n  ReasoningDelta,\n  ReasoningPacket,\n} from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  FullChatState,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\nimport ExpandableTextDisplay from \"@/refresh-components/texts/ExpandableTextDisplay\";\nimport {\n  mutedTextMarkdownComponents,\n  collapsedMarkdownComponents,\n} from \"@/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents\";\nimport { SvgCircle } from \"@opal/icons\";\n\nconst THINKING_MIN_DURATION_MS = 500; // 0.5 second minimum for \"Thinking\" state\n\nconst THINKING_STATUS = \"Thinking\";\n\nfunction extractFirstParagraph(content: string): {\n  title: string | null;\n  remainingContent: string;\n} {\n  if (!content || content.trim().length === 0) {\n    return { title: null, remainingContent: content };\n  }\n\n  const trimmed = content.trim();\n\n  // Split by double newline (paragraph break) or single newline\n  const lines = trimmed.split(/\\n\\n|\\n/);\n  const firstLine = lines[0]?.trim();\n\n  if (!firstLine) {\n    return { title: null, remainingContent: content };\n  }\n\n  // Only treat as title if it's an actual markdown heading (starts with #)\n  const isMarkdownHeading = /^#+\\s/.test(firstLine);\n  if (!isMarkdownHeading) {\n    return { title: null, remainingContent: content };\n  }\n\n  // Remove markdown heading markers (# ## ### etc.)\n  const cleanTitle = firstLine.replace(/^#+\\s*/, \"\").trim();\n\n  // Only use as title if it's reasonably short (under ~60 chars for UI fit)\n  if (cleanTitle.length > 60) {\n    return { title: null, remainingContent: content };\n  }\n\n  // Remove the first line from content\n  const remainingContent = trimmed.slice(firstLine.length).replace(/^\\n+/, \"\");\n\n  return { title: cleanTitle, remainingContent };\n}\n\nfunction constructCurrentReasoningState(packets: ReasoningPacket[]) {\n  const hasStart = packets.some(\n    (p) => p.obj.type === PacketType.REASONING_START\n  );\n  const hasEnd = packets.some(\n    (p) =>\n      p.obj.type === PacketType.SECTION_END ||\n      p.obj.type === PacketType.ERROR ||\n      // Support reasoning_done from backend\n      (p.obj as any).type === PacketType.REASONING_DONE\n  );\n  const deltas = packets\n    .filter((p) => p.obj.type === PacketType.REASONING_DELTA)\n    .map((p) => p.obj as ReasoningDelta);\n\n  const content = deltas.map((d) => d.reasoning).join(\"\");\n\n  return {\n    hasStart,\n    hasEnd,\n    content,\n  };\n}\n\nexport const ReasoningRenderer: MessageRenderer<\n  ReasoningPacket,\n  FullChatState\n> = ({ packets, onComplete, animate, children }) => {\n  const { hasStart, hasEnd, content } = useMemo(\n    () => constructCurrentReasoningState(packets),\n    [packets]\n  );\n\n  const { title, remainingContent } = useMemo(\n    () => extractFirstParagraph(content),\n    [content]\n  );\n\n  // Use extracted title if available, otherwise default\n  const displayStatus = title || THINKING_STATUS;\n  const displayContent = title ? remainingContent : content;\n\n  // Track reasoning timing for minimum display duration\n  const [reasoningStartTime, setReasoningStartTime] = useState<number | null>(\n    null\n  );\n  const timeoutRef = useRef<NodeJS.Timeout | null>(null);\n  const completionHandledRef = useRef(false);\n\n  // Track when reasoning starts\n  useEffect(() => {\n    if ((hasStart || hasEnd) && reasoningStartTime === null) {\n      setReasoningStartTime(Date.now());\n    }\n  }, [hasStart, hasEnd, reasoningStartTime]);\n\n  // Handle reasoning completion with minimum duration\n  useEffect(() => {\n    if (\n      hasEnd &&\n      reasoningStartTime !== null &&\n      !completionHandledRef.current\n    ) {\n      completionHandledRef.current = true;\n      const elapsedTime = Date.now() - reasoningStartTime;\n      const minimumThinkingDuration = animate ? THINKING_MIN_DURATION_MS : 0;\n\n      if (elapsedTime >= minimumThinkingDuration) {\n        // Enough time has passed, complete immediately\n        onComplete();\n      } else {\n        // Not enough time has passed, delay completion\n        const remainingTime = minimumThinkingDuration - elapsedTime;\n        timeoutRef.current = setTimeout(() => {\n          onComplete();\n        }, remainingTime);\n      }\n    }\n  }, [hasEnd, reasoningStartTime, animate, onComplete]);\n\n  // Cleanup timeout on unmount\n  useEffect(() => {\n    return () => {\n      if (timeoutRef.current) {\n        clearTimeout(timeoutRef.current);\n      }\n    };\n  }, []);\n\n  // Markdown renderer callback for ExpandableTextDisplay\n  // Uses collapsed components (no spacing) in collapsed view, normal spacing in expanded modal\n  const renderMarkdown = useCallback(\n    (text: string, isExpanded: boolean) => (\n      <MinimalMarkdown\n        content={text}\n        components={\n          isExpanded ? mutedTextMarkdownComponents : collapsedMarkdownComponents\n        }\n      />\n    ),\n    []\n  );\n\n  if (!hasStart && !hasEnd && content.length === 0) {\n    return children([\n      {\n        icon: SvgCircle,\n        status: THINKING_STATUS,\n        content: <></>,\n        noPaddingRight: true,\n      },\n    ]);\n  }\n\n  const reasoningContent = (\n    <div className=\"pl-[var(--timeline-common-text-padding)]\">\n      <ExpandableTextDisplay\n        title=\"Full text\"\n        content={content}\n        displayContent={displayContent}\n        renderContent={renderMarkdown}\n        isStreaming={!hasEnd}\n      />\n    </div>\n  );\n\n  return children([\n    {\n      icon: SvgCircle,\n      status: displayStatus,\n      content: reasoningContent,\n      expandedText: reasoningContent,\n      noPaddingRight: true,\n    },\n  ]);\n};\n\nexport default ReasoningRenderer;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/search/InternalSearchToolRenderer.tsx",
    "content": "import { SvgSearch, SvgSearchMenu } from \"@opal/icons\";\nimport { SearchToolPacket } from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { BlinkingBar } from \"@/app/app/message/BlinkingBar\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SearchChipList, SourceInfo } from \"./SearchChipList\";\nimport {\n  constructCurrentSearchState,\n  INITIAL_QUERIES_TO_SHOW,\n  QUERIES_PER_EXPANSION,\n  INITIAL_RESULTS_TO_SHOW,\n  RESULTS_PER_EXPANSION,\n  getMetadataTags,\n} from \"./searchStateUtils\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst queryToSourceInfo = (query: string, index: number): SourceInfo => ({\n  id: `query-${index}`,\n  title: query,\n  sourceType: ValidSources.Web,\n  icon: SvgSearch,\n});\n\nconst resultToSourceInfo = (doc: OnyxDocument): SourceInfo => ({\n  id: doc.document_id,\n  title: doc.semantic_identifier || \"\",\n  sourceType: doc.source_type,\n  sourceUrl: doc.link,\n  description: doc.blurb,\n  metadata: {\n    date: doc.updated_at || undefined,\n    tags: getMetadataTags(doc.metadata),\n  },\n});\n\n/**\n * InternalSearchToolRenderer - Renders internal document search tool execution steps\n *\n * RenderType modes:\n * - FULL: Shows 1 combined timeline step (queries + results together).\n *         Used when step is expanded in timeline.\n * - COMPACT: Shows only results (no queries). Header passed as `status` prop.\n *            Used when step is collapsed in timeline, still wrapped in StepContainer.\n * - HIGHLIGHT: Shows only results with header embedded directly in content.\n *              No StepContainer wrapper. Used for parallel streaming preview.\n * - INLINE: Phase-based (queries -> results) for collapsed streaming view.\n */\nexport const InternalSearchToolRenderer: MessageRenderer<\n  SearchToolPacket,\n  {}\n> = ({\n  packets,\n  onComplete,\n  animate,\n  stopPacketSeen,\n  renderType,\n  children,\n}) => {\n  const searchState = constructCurrentSearchState(packets);\n  const { queries, results, isComplete } = searchState;\n\n  const isCompact = renderType === RenderType.COMPACT;\n  const isHighlight = renderType === RenderType.HIGHLIGHT;\n  const isInline = renderType === RenderType.INLINE;\n\n  const hasResults = results.length > 0;\n\n  const queriesHeader = \"Searching internal documents\";\n\n  if (queries.length === 0) {\n    return children([\n      {\n        icon: SvgSearchMenu,\n        status: queriesHeader,\n        content: <></>,\n        supportsCollapsible: true,\n        timelineLayout: \"timeline\",\n      },\n    ]);\n  }\n\n  // HIGHLIGHT mode: header embedded in content, no StepContainer\n  if (isHighlight) {\n    return children([\n      {\n        icon: null,\n        status: null,\n        supportsCollapsible: true,\n        timelineLayout: \"content\",\n        content: (\n          <div className=\"flex flex-col\">\n            <Text as=\"p\" text04 mainUiMuted className=\"mb-1\">\n              {queriesHeader}\n            </Text>\n            <SearchChipList\n              items={results}\n              initialCount={INITIAL_RESULTS_TO_SHOW}\n              expansionCount={RESULTS_PER_EXPANSION}\n              getKey={(doc: OnyxDocument, index: number) =>\n                doc.document_id ?? `result-${index}`\n              }\n              toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}\n              onClick={(doc: OnyxDocument) => {\n                if (doc.link) {\n                  window.open(doc.link, \"_blank\", \"noopener,noreferrer\");\n                }\n              }}\n              emptyState={\n                !isComplete ? (\n                  <BlinkingBar />\n                ) : (\n                  <Text as=\"p\" text04 mainUiMuted>\n                    No results found\n                  </Text>\n                )\n              }\n            />\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  // INLINE mode: dynamic phase-based content for collapsed streaming view\n  if (isInline) {\n    // Querying phase: show queries\n    if (!hasResults) {\n      return children([\n        {\n          icon: null,\n          status: queriesHeader,\n          supportsCollapsible: true,\n          timelineLayout: \"content\",\n          content: (\n            <SearchChipList\n              items={queries}\n              initialCount={INITIAL_QUERIES_TO_SHOW}\n              expansionCount={QUERIES_PER_EXPANSION}\n              getKey={(_, index) => index}\n              toSourceInfo={queryToSourceInfo}\n              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n              showDetailsCard={false}\n              isQuery={true}\n            />\n          ),\n        },\n      ]);\n    }\n\n    // Reading phase: show results\n    return children([\n      {\n        icon: null,\n        status: \"Reading\",\n        supportsCollapsible: true,\n        timelineLayout: \"content\",\n        content: (\n          <SearchChipList\n            items={results}\n            initialCount={INITIAL_RESULTS_TO_SHOW}\n            expansionCount={RESULTS_PER_EXPANSION}\n            getKey={(doc: OnyxDocument, index: number) =>\n              doc.document_id ?? `result-${index}`\n            }\n            toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}\n            onClick={(doc: OnyxDocument) => {\n              if (doc.link) {\n                window.open(doc.link, \"_blank\", \"noopener,noreferrer\");\n              }\n            }}\n            emptyState={\n              !isComplete ? (\n                <BlinkingBar />\n              ) : (\n                <Text as=\"p\" text04 mainUiMuted>\n                  No results found\n                </Text>\n              )\n            }\n          />\n        ),\n      },\n    ]);\n  }\n\n  // FULL and COMPACT modes: single combined step (queries + results together)\n  return children([\n    {\n      icon: SvgSearchMenu,\n      status: queriesHeader,\n      supportsCollapsible: true,\n      timelineLayout: \"timeline\",\n      content: (\n        <div className=\"flex flex-col\">\n          {!isCompact && (\n            <SearchChipList\n              items={queries}\n              initialCount={INITIAL_QUERIES_TO_SHOW}\n              expansionCount={QUERIES_PER_EXPANSION}\n              getKey={(_, index) => index}\n              toSourceInfo={queryToSourceInfo}\n              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n              showDetailsCard={false}\n              isQuery={true}\n            />\n          )}\n\n          {(results.length > 0 || queries.length > 0) && (\n            <>\n              {!isCompact && (\n                <Text as=\"p\" mainUiMuted text04>\n                  Reading\n                </Text>\n              )}\n              <SearchChipList\n                items={results}\n                initialCount={INITIAL_RESULTS_TO_SHOW}\n                expansionCount={RESULTS_PER_EXPANSION}\n                getKey={(doc: OnyxDocument, index: number) =>\n                  doc.document_id ?? `result-${index}`\n                }\n                toSourceInfo={(doc: OnyxDocument) => resultToSourceInfo(doc)}\n                onClick={(doc: OnyxDocument) => {\n                  if (doc.link) {\n                    window.open(doc.link, \"_blank\", \"noopener,noreferrer\");\n                  }\n                }}\n                emptyState={\n                  !isComplete ? (\n                    <BlinkingBar />\n                  ) : (\n                    <Text as=\"p\" text03 mainUiMuted>\n                      No results found\n                    </Text>\n                  )\n                }\n              />\n            </>\n          )}\n        </div>\n      ),\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/search/SearchChipList.tsx",
    "content": "import React, { JSX, useState, useEffect, useRef, useMemo } from \"react\";\nimport { SourceTag, SourceInfo } from \"@/refresh-components/buttons/source-tag\";\nimport { cn } from \"@/lib/utils\";\n\nexport type { SourceInfo };\n\nconst ANIMATION_DELAY_MS = 30;\n\nexport interface SearchChipListProps<T> {\n  items: T[];\n  initialCount: number;\n  expansionCount: number;\n  getKey: (item: T, index: number) => string | number;\n  toSourceInfo: (item: T, index: number) => SourceInfo;\n  onClick?: (item: T) => void;\n  emptyState?: React.ReactNode;\n  className?: string;\n  showDetailsCard?: boolean;\n  isQuery?: boolean;\n}\n\ntype DisplayEntry<T> =\n  | { type: \"chip\"; item: T; index: number }\n  | { type: \"more\"; batchId: number };\n\nexport function SearchChipList<T>({\n  items,\n  initialCount,\n  expansionCount,\n  getKey,\n  toSourceInfo,\n  onClick,\n  emptyState,\n  className = \"\",\n  showDetailsCard,\n  isQuery,\n}: SearchChipListProps<T>): JSX.Element {\n  const [visibleCount, setVisibleCount] = useState(initialCount);\n  const animatedKeysRef = useRef<Set<string>>(new Set());\n\n  const getEntryKey = (entry: DisplayEntry<T>): string => {\n    if (entry.type === \"more\") return `more-button`;\n    return String(getKey(entry.item, entry.index));\n  };\n\n  const effectiveCount = Math.min(visibleCount, items.length);\n\n  const displayList: DisplayEntry<T>[] = useMemo(() => {\n    const chips: DisplayEntry<T>[] = items\n      .slice(0, effectiveCount)\n      .map((item, i) => ({ type: \"chip\" as const, item, index: i }));\n\n    if (effectiveCount < items.length) {\n      chips.push({ type: \"more\", batchId: 0 });\n    }\n    return chips;\n  }, [items, effectiveCount]);\n\n  const chipCount = effectiveCount;\n  const remainingCount = items.length - chipCount;\n  const remainingItems = items.slice(chipCount);\n\n  const handleShowMore = () => {\n    setVisibleCount((prev) => prev + expansionCount);\n  };\n\n  useEffect(() => {\n    const timer = setTimeout(() => {\n      displayList.forEach((entry) =>\n        animatedKeysRef.current.add(getEntryKey(entry))\n      );\n    }, 0);\n    return () => clearTimeout(timer);\n  }, [displayList]);\n\n  let newItemCounter = 0;\n\n  return (\n    <div className={cn(\"flex flex-wrap gap-x-2 gap-y-2\", className)}>\n      {displayList.map((entry) => {\n        const key = getEntryKey(entry);\n        const isNew = !animatedKeysRef.current.has(key);\n        const delay = isNew ? newItemCounter++ * ANIMATION_DELAY_MS : 0;\n\n        return (\n          <div\n            key={key}\n            className={cn(\"text-xs\", {\n              \"animate-in fade-in slide-in-from-left-2 duration-150\": isNew,\n            })}\n            style={\n              isNew\n                ? {\n                    animationDelay: `${delay}ms`,\n                    animationFillMode: \"backwards\",\n                  }\n                : undefined\n            }\n          >\n            {entry.type === \"chip\" ? (\n              <SourceTag\n                displayName={toSourceInfo(entry.item, entry.index).title}\n                sources={[toSourceInfo(entry.item, entry.index)]}\n                onSourceClick={onClick ? () => onClick(entry.item) : undefined}\n                showDetailsCard={showDetailsCard}\n                isQuery={isQuery}\n                tooltipText={isQuery ? \"View Full Search Term\" : undefined}\n              />\n            ) : (\n              <SourceTag\n                displayName={`+${remainingCount} more`}\n                sources={remainingItems.map((item, i) =>\n                  toSourceInfo(item, chipCount + i)\n                )}\n                onSourceClick={() => handleShowMore()}\n                showDetailsCard={showDetailsCard}\n                isQuery={isQuery}\n                isMore={isQuery}\n              />\n            )}\n          </div>\n        );\n      })}\n\n      {items.length === 0 && emptyState}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/search/WebSearchToolRenderer.tsx",
    "content": "import React from \"react\";\nimport { SvgSearch, SvgGlobe } from \"@opal/icons\";\nimport { SearchToolPacket } from \"@/app/app/services/streamingModels\";\nimport {\n  MessageRenderer,\n  RenderType,\n} from \"@/app/app/message/messageComponents/interfaces\";\nimport { BlinkingBar } from \"@/app/app/message/BlinkingBar\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SearchChipList, SourceInfo } from \"./SearchChipList\";\nimport {\n  constructCurrentSearchState,\n  INITIAL_QUERIES_TO_SHOW,\n  QUERIES_PER_EXPANSION,\n} from \"./searchStateUtils\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst queryToSourceInfo = (query: string, index: number): SourceInfo => ({\n  id: `query-${index}`,\n  title: query,\n  sourceType: ValidSources.Web,\n  icon: SvgSearch,\n});\n\n/**\n * WebSearchToolRenderer - Renders web search tool execution steps\n *\n * Only shows queries - results are handled by the fetch tool.\n *\n * RenderType modes:\n * - FULL: Shows queries timeline step. Used when step is expanded in timeline.\n * - HIGHLIGHT: Shows queries with header embedded directly in content.\n *              No StepContainer wrapper. Used for parallel streaming preview.\n * - INLINE: Shows queries for collapsed streaming view.\n */\nexport const WebSearchToolRenderer: MessageRenderer<SearchToolPacket, {}> = ({\n  packets,\n  onComplete,\n  animate,\n  stopPacketSeen,\n  renderType,\n  children,\n}) => {\n  const searchState = constructCurrentSearchState(packets);\n  const { queries } = searchState;\n\n  const isHighlight = renderType === RenderType.HIGHLIGHT;\n  const isInline = renderType === RenderType.INLINE;\n\n  const queriesHeader = \"Searching the web\";\n\n  if (queries.length === 0) {\n    return children([\n      {\n        icon: SvgGlobe,\n        status: \"Searching the web\",\n        content: <div />,\n        supportsCollapsible: false,\n        timelineLayout: \"timeline\",\n      },\n    ]);\n  }\n\n  // HIGHLIGHT mode: header embedded in content, no StepContainer\n  if (isHighlight) {\n    return children([\n      {\n        icon: null,\n        status: null,\n        supportsCollapsible: false,\n        timelineLayout: \"content\",\n        content: (\n          <div className=\"flex flex-col\">\n            <Text as=\"p\" text04 mainUiMuted className=\"mb-1\">\n              {queriesHeader}\n            </Text>\n            <SearchChipList\n              items={queries}\n              initialCount={INITIAL_QUERIES_TO_SHOW}\n              expansionCount={QUERIES_PER_EXPANSION}\n              getKey={(_, index) => index}\n              toSourceInfo={queryToSourceInfo}\n              emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n              showDetailsCard={false}\n              isQuery={true}\n            />\n          </div>\n        ),\n      },\n    ]);\n  }\n\n  // INLINE mode: show queries for collapsed streaming view\n  if (isInline) {\n    return children([\n      {\n        icon: null,\n        status: queriesHeader,\n        supportsCollapsible: false,\n        timelineLayout: \"content\",\n        content: (\n          <SearchChipList\n            items={queries}\n            initialCount={INITIAL_QUERIES_TO_SHOW}\n            expansionCount={QUERIES_PER_EXPANSION}\n            getKey={(_, index) => index}\n            toSourceInfo={queryToSourceInfo}\n            emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n            showDetailsCard={false}\n            isQuery={true}\n          />\n        ),\n      },\n    ]);\n  }\n\n  // FULL mode: return queries timeline step\n  return children([\n    {\n      icon: SvgGlobe,\n      status: \"Searching the web\",\n      content: (\n        <SearchChipList\n          items={queries}\n          initialCount={INITIAL_QUERIES_TO_SHOW}\n          expansionCount={QUERIES_PER_EXPANSION}\n          getKey={(_, index) => index}\n          toSourceInfo={queryToSourceInfo}\n          emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}\n          showDetailsCard={false}\n          isQuery={true}\n        />\n      ),\n      supportsCollapsible: false,\n      timelineLayout: \"timeline\",\n    },\n  ]);\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/search/searchStateUtils.ts",
    "content": "import {\n  PacketType,\n  SearchToolPacket,\n  SearchToolStart,\n  SearchToolQueriesDelta,\n  SearchToolDocumentsDelta,\n  SectionEnd,\n} from \"@/app/app/services/streamingModels\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\n\nexport const MAX_TITLE_LENGTH = 25;\n\nexport const getMetadataTags = (metadata?: {\n  [key: string]: string;\n}): string[] | undefined => {\n  if (!metadata) return undefined;\n  const tags = Object.values(metadata)\n    .filter((value) => typeof value === \"string\" && value.length > 0)\n    .slice(0, 2)\n    .map((value) => `# ${value}`);\n  return tags.length > 0 ? tags : undefined;\n};\n\nexport const INITIAL_QUERIES_TO_SHOW = 3;\nexport const QUERIES_PER_EXPANSION = 5;\nexport const INITIAL_RESULTS_TO_SHOW = 3;\nexport const RESULTS_PER_EXPANSION = 10;\n\nexport interface SearchState {\n  queries: string[];\n  results: OnyxDocument[];\n  isSearching: boolean;\n  hasResults: boolean;\n  isComplete: boolean;\n  isInternetSearch: boolean;\n}\n\n/** Constructs the current search state from search tool packets. */\nexport const constructCurrentSearchState = (\n  packets: SearchToolPacket[]\n): SearchState => {\n  const searchStart = packets.find(\n    (packet) => packet.obj.type === PacketType.SEARCH_TOOL_START\n  )?.obj as SearchToolStart | null;\n\n  const queryDeltas = packets\n    .filter(\n      (packet) => packet.obj.type === PacketType.SEARCH_TOOL_QUERIES_DELTA\n    )\n    .map((packet) => packet.obj as SearchToolQueriesDelta);\n\n  const documentDeltas = packets\n    .filter(\n      (packet) => packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA\n    )\n    .map((packet) => packet.obj as SearchToolDocumentsDelta);\n\n  const searchEnd = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.SECTION_END ||\n      packet.obj.type === PacketType.ERROR\n  )?.obj as SectionEnd | null;\n\n  // Deduplicate queries using Set for O(n) instead of indexOf which is O(n²)\n  const seenQueries = new Set<string>();\n  const queries = queryDeltas\n    .flatMap((delta) => delta?.queries || [])\n    .filter((query) => {\n      if (seenQueries.has(query)) return false;\n      seenQueries.add(query);\n      return true;\n    });\n\n  const seenDocIds = new Set<string>();\n  const results = documentDeltas\n    .flatMap((delta) => delta?.documents || [])\n    .filter((doc) => {\n      if (!doc || !doc.document_id) return false;\n      if (seenDocIds.has(doc.document_id)) return false;\n      seenDocIds.add(doc.document_id);\n      return true;\n    });\n\n  const isSearching = Boolean(searchStart && !searchEnd);\n  const hasResults = results.length > 0;\n  const isComplete = Boolean(searchStart && searchEnd);\n  const isInternetSearch = searchStart?.is_internet_search || false;\n\n  return {\n    queries,\n    results,\n    isSearching,\n    hasResults,\n    isComplete,\n    isInternetSearch,\n  };\n};\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/renderers/sharedMarkdownComponents.tsx",
    "content": "import type { Components } from \"react-markdown\";\nimport Text from \"@/refresh-components/texts/Text\";\n\n// Expanded view: normal spacing between paragraphs/lists\nexport const mutedTextMarkdownComponents = {\n  p: ({ children }: { children?: React.ReactNode }) => (\n    <Text as=\"p\" text03 mainUiMuted className=\"!my-1\">\n      {children}\n    </Text>\n  ),\n  li: ({ children }: { children?: React.ReactNode }) => (\n    <Text as=\"li\" text03 mainUiMuted className=\"!my-0 !py-0 leading-normal\">\n      {children}\n    </Text>\n  ),\n  ul: ({ children }: { children?: React.ReactNode }) => (\n    <ul className=\"!pl-0 !ml-0 !my-0.5 list-inside\">{children}</ul>\n  ),\n  ol: ({ children }: { children?: React.ReactNode }) => (\n    <ol className=\"!pl-0 !ml-0 !my-0.5 list-inside\">{children}</ol>\n  ),\n  a: ({ children, href }: { children?: React.ReactNode; href?: string }) => (\n    <a\n      href={href}\n      className=\"text-text-03 mainUiMuted underline\"\n      target=\"_blank\"\n      rel=\"noopener noreferrer\"\n    >\n      {children}\n    </a>\n  ),\n} satisfies Partial<Components>;\n\n// Collapsed view: no spacing for compact display\nexport const collapsedMarkdownComponents = {\n  p: ({ children }: { children?: React.ReactNode }) => (\n    <Text as=\"p\" text03 mainUiMuted className=\"!my-0\">\n      {children}\n    </Text>\n  ),\n  li: ({ children }: { children?: React.ReactNode }) => (\n    <Text as=\"li\" text03 mainUiMuted className=\"!my-0 !py-0 leading-normal\">\n      {children}\n    </Text>\n  ),\n  ul: ({ children }: { children?: React.ReactNode }) => (\n    <ul className=\"!pl-0 !ml-0 !my-0 list-inside\">{children}</ul>\n  ),\n  ol: ({ children }: { children?: React.ReactNode }) => (\n    <ol className=\"!pl-0 !ml-0 !my-0 list-inside\">{children}</ol>\n  ),\n  a: ({ children, href }: { children?: React.ReactNode; href?: string }) => (\n    <a\n      href={href}\n      className=\"text-text-03 mainUiMuted underline\"\n      target=\"_blank\"\n      rel=\"noopener noreferrer\"\n    >\n      {children}\n    </a>\n  ),\n} satisfies Partial<Components>;\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timeline/transformers.ts",
    "content": "import { GroupedPacket } from \"./hooks/packetProcessor\";\n\n/**\n * Transformed step data ready for rendering\n */\nexport interface TransformedStep {\n  /** Unique key for React rendering */\n  key: string;\n  /** Turn index from packet placement */\n  turnIndex: number;\n  /** Tab index for parallel tools */\n  tabIndex: number;\n  /** Raw packets for content rendering */\n  packets: GroupedPacket[\"packets\"];\n}\n\n/**\n * Group steps by turn_index for detecting parallel tools\n */\nexport interface TurnGroup {\n  turnIndex: number;\n  steps: TransformedStep[];\n  /** True if multiple steps have the same turn_index (parallel execution) */\n  isParallel: boolean;\n}\n\n/**\n * Transform a single GroupedPacket into step data\n */\nexport function transformPacketGroup(group: GroupedPacket): TransformedStep {\n  return {\n    key: `${group.turn_index}-${group.tab_index}`,\n    turnIndex: group.turn_index,\n    tabIndex: group.tab_index,\n    packets: group.packets,\n  };\n}\n\n/**\n * Transform all packet groups into step data\n */\nexport function transformPacketGroups(\n  groups: GroupedPacket[]\n): TransformedStep[] {\n  return groups.map(transformPacketGroup);\n}\n\n/**\n * Group transformed steps by turn_index to detect parallel tools\n *\n * @example\n * // Input: TransformedStep[]\n * // ┌──────────────────────────────────────────┐\n * // │ [0] key=\"0-0\" turnIndex=0 tabIndex=0     │\n * // │ [1] key=\"0-1\" turnIndex=0 tabIndex=1     │\n * // │ [2] key=\"1-0\" turnIndex=1 tabIndex=0     │\n * // └──────────────────────────────────────────┘\n * //\n * // Step 1: Build Map<turnIndex, TransformedStep[]>\n * // ┌─────────────────────────────────────────────┐\n * // │ turnMap = {                                 │\n * // │   0 → [step(0-0), step(0-1)]               │\n * // │   1 → [step(1-0)]                          │\n * // │ }                                          │\n * // └─────────────────────────────────────────────┘\n * //\n * // Step 2: Sort turn indices & steps by tabIndex\n * //\n * // Step 3: Build TurnGroup[] with isParallel flag\n * // ┌─────────────────────────────────────────────┐\n * // │ Output: TurnGroup[]                         │\n * // ├─────────────────────────────────────────────┤\n * // │ [0] turnIndex=0                             │\n * // │     steps=[0-0, 0-1]                        │\n * // │     isParallel=true  ← 2 steps = parallel   │\n * // │                                             │\n * // │ [1] turnIndex=1                             │\n * // │     steps=[1-0]                             │\n * // │     isParallel=false ← 1 step = sequential  │\n * // └─────────────────────────────────────────────┘\n */\nexport function groupStepsByTurn(steps: TransformedStep[]): TurnGroup[] {\n  const turnMap = new Map<number, TransformedStep[]>();\n\n  for (const step of steps) {\n    const existing = turnMap.get(step.turnIndex);\n    if (existing) {\n      existing.push(step);\n    } else {\n      turnMap.set(step.turnIndex, [step]);\n    }\n  }\n\n  const result: TurnGroup[] = [];\n  const sortedTurnIndices = Array.from(turnMap.keys()).sort((a, b) => a - b);\n\n  for (const turnIndex of sortedTurnIndices) {\n    const stepsForTurn = turnMap.get(turnIndex)!;\n    stepsForTurn.sort((a, b) => a.tabIndex - b.tabIndex);\n\n    result.push({\n      turnIndex,\n      steps: stepsForTurn,\n      isParallel: stepsForTurn.length > 1,\n    });\n  }\n\n  return result;\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/timing.ts",
    "content": "import { MutableRefObject } from \"react\";\n\n/**\n * Clears multiple timeout refs and optionally resets them to null.\n * Returns true if any timeout was cleared.\n */\nexport function clearTimeoutRefs(\n  timeoutRefs: Array<MutableRefObject<NodeJS.Timeout | null>>,\n  resetToNull: boolean = false\n): boolean {\n  let hadPendingTimeout = false;\n\n  for (const ref of timeoutRefs) {\n    if (ref.current) {\n      clearTimeout(ref.current);\n      hadPendingTimeout = true;\n      if (resetToNull) {\n        ref.current = null;\n      }\n    }\n  }\n\n  return hadPendingTimeout;\n}\n"
  },
  {
    "path": "web/src/app/app/message/messageComponents/toolDisplayHelpers.tsx",
    "content": "import { JSX } from \"react\";\nimport { FiCircle, FiList, FiTool, FiXCircle } from \"react-icons/fi\";\nimport { BrainIcon } from \"@/components/icons/icons\";\n\nimport {\n  Packet,\n  PacketType,\n  SearchToolPacket,\n} from \"@/app/app/services/streamingModels\";\nimport { constructCurrentSearchState } from \"./timeline/renderers/search/searchStateUtils\";\nimport {\n  SvgGlobe,\n  SvgSearchMenu,\n  SvgTerminal,\n  SvgLink,\n  SvgImage,\n  SvgUser,\n  SvgCircle,\n  SvgBookOpen,\n} from \"@opal/icons\";\n\n/**\n * Check if a packet group contains an ERROR packet (tool failed)\n */\nexport function hasToolError(packets: Packet[]): boolean {\n  return packets.some((p) => p.obj.type === PacketType.ERROR);\n}\n\n/**\n * Check if a tool group is complete.\n * For research agents, we only look at parent-level SECTION_END packets (sub_turn_index is undefined/null),\n * not the SECTION_END packets from nested tools (which have sub_turn_index as a number).\n */\nexport function isToolComplete(packets: Packet[]): boolean {\n  const firstPacket = packets[0];\n  if (!firstPacket) return false;\n\n  // For research agents, only parent-level SECTION_END indicates completion\n  // Nested tools (search, fetch, etc.) within the research agent have sub_turn_index set\n  if (firstPacket.obj.type === PacketType.RESEARCH_AGENT_START) {\n    return packets.some(\n      (p) =>\n        (p.obj.type === PacketType.SECTION_END ||\n          p.obj.type === PacketType.ERROR) &&\n        (p.placement.sub_turn_index === undefined ||\n          p.placement.sub_turn_index === null)\n    );\n  }\n\n  // For other tools, any SECTION_END or ERROR indicates completion\n  return packets.some(\n    (p) =>\n      p.obj.type === PacketType.SECTION_END || p.obj.type === PacketType.ERROR\n  );\n}\n\n/**\n * Get an error icon for failed tools\n */\nexport function getToolErrorIcon(): JSX.Element {\n  return <FiXCircle className=\"w-3.5 h-3.5 text-error\" />;\n}\n\nexport function getToolKey(turn_index: number, tab_index: number): string {\n  return `${turn_index}-${tab_index}`;\n}\n\nexport function parseToolKey(key: string): {\n  turn_index: number;\n  tab_index: number;\n} {\n  const parts = key.split(\"-\");\n  return {\n    turn_index: parseInt(parts[0] ?? \"0\", 10),\n    tab_index: parseInt(parts[1] ?? \"0\", 10),\n  };\n}\n\nexport function getToolName(packets: Packet[]): string {\n  const firstPacket = packets[0];\n  if (!firstPacket) return \"Tool\";\n\n  switch (firstPacket.obj.type) {\n    case PacketType.SEARCH_TOOL_START: {\n      const searchState = constructCurrentSearchState(\n        packets as SearchToolPacket[]\n      );\n      return searchState.isInternetSearch ? \"Web Search\" : \"Internal Search\";\n    }\n    case PacketType.PYTHON_TOOL_START:\n      return \"Code Interpreter\";\n    case PacketType.FETCH_TOOL_START:\n      return \"Open URLs\";\n    case PacketType.CUSTOM_TOOL_START:\n      return (\n        (firstPacket.obj as { tool_name?: string }).tool_name || \"Custom Tool\"\n      );\n    case PacketType.IMAGE_GENERATION_TOOL_START:\n      return \"Generate Image\";\n    case PacketType.DEEP_RESEARCH_PLAN_START:\n      return \"Generate plan\";\n    case PacketType.RESEARCH_AGENT_START:\n      return \"Research agent\";\n    case PacketType.REASONING_START:\n      return \"Thinking\";\n    case PacketType.MEMORY_TOOL_START:\n    case PacketType.MEMORY_TOOL_NO_ACCESS:\n      return \"Memory\";\n    default:\n      return \"Tool\";\n  }\n}\n\nexport function getToolIcon(packets: Packet[]): JSX.Element {\n  const firstPacket = packets[0];\n  if (!firstPacket) return <FiCircle className=\"w-3.5 h-3.5\" />;\n\n  switch (firstPacket.obj.type) {\n    case PacketType.SEARCH_TOOL_START: {\n      const searchState = constructCurrentSearchState(\n        packets as SearchToolPacket[]\n      );\n      return searchState.isInternetSearch ? (\n        <SvgGlobe className=\"w-3.5 h-3.5\" />\n      ) : (\n        <SvgSearchMenu className=\"w-3.5 h-3.5\" />\n      );\n    }\n    case PacketType.PYTHON_TOOL_START:\n      return <SvgTerminal className=\"w-3.5 h-3.5\" />;\n    case PacketType.FETCH_TOOL_START:\n      return <SvgLink className=\"w-3.5 h-3.5\" />;\n    case PacketType.CUSTOM_TOOL_START:\n      return <FiTool className=\"w-3.5 h-3.5\" />;\n    case PacketType.IMAGE_GENERATION_TOOL_START:\n      return <SvgImage className=\"w-3.5 h-3.5\" />;\n    case PacketType.DEEP_RESEARCH_PLAN_START:\n      return <FiList className=\"w-3.5 h-3.5\" />;\n    case PacketType.RESEARCH_AGENT_START:\n      return <SvgUser className=\"w-3.5 h-3.5\" />;\n    case PacketType.REASONING_START:\n      return <BrainIcon className=\"w-3.5 h-3.5\" />;\n    case PacketType.MEMORY_TOOL_START:\n    case PacketType.MEMORY_TOOL_NO_ACCESS:\n      return <SvgBookOpen className=\"w-3.5 h-3.5\" />;\n    default:\n      return <SvgCircle className=\"w-3.5 h-3.5\" />;\n  }\n}\n"
  },
  {
    "path": "web/src/app/app/message/thinkingBox/ThinkingBox.css",
    "content": "/* ThinkingBox.css */\n\n/* Apply transition to dark mode as well to ensure smooth color changes */\nhtml {\n  transition:\n    background-color 0.2s ease-in-out,\n    color 0.2s ease-in-out;\n}\n\n:root {\n  --thinking-border-color: rgba(0, 0, 0, 0.1);\n  --thinking-bg-color: transparent;\n  --thinking-text-color: #6b7280;\n  --thinking-title-color: #374151;\n  --thinking-fade-start: rgba(249, 250, 251, 1);\n  --thinking-fade-end: rgba(249, 250, 251, 0);\n  --thinking-fade-start-rgb: 249, 250, 251;\n}\n\n.dark {\n  --thinking-border-color: rgba(255, 255, 255, 0.1);\n  --thinking-bg-color: transparent;\n  --thinking-text-color: #9ca3af;\n  --thinking-title-color: #e5e7eb;\n  --thinking-fade-start: rgba(30, 41, 59, 1);\n  --thinking-fade-end: rgba(30, 41, 59, 0);\n  --thinking-fade-start-rgb: 30, 41, 59;\n}\n\n.thinking-box {\n  width: 98%;\n  max-width: 100%;\n  position: relative;\n}\n\n/* Simple direct rule to prevent border flash in dark mode */\n.dark .thinking-box * {\n  border-color: rgba(255, 255, 255, 0.1);\n}\n\n.thinking-box__container {\n  border: 1px solid var(--thinking-border-color);\n  border-radius: 0.75rem;\n  background-color: var(--thinking-bg-color);\n  overflow: hidden;\n  transition: all 0.2s ease-in-out;\n  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);\n  transform: translateZ(0);\n  backface-visibility: hidden;\n  perspective: 1000px;\n}\n\n/* More subtle collapsed state */\n.thinking-box__container--collapsed {\n  border-color: var(--thinking-border-color);\n  opacity: 0.9;\n}\n\n/* No preview - make the bottom border curved too */\n.thinking-box__container--no-preview {\n  border-bottom-left-radius: 0.75rem;\n  border-bottom-right-radius: 0.75rem;\n}\n\n/* Remove the bottom border when there's no preview */\n.thinking-box__container--no-preview .thinking-box__header {\n  border-bottom: none;\n}\n\n/* Style for the transitioning state to prevent flashing */\n.thinking-box__container--transitioning {\n  pointer-events: none; /* Prevent interactions during transition */\n}\n\n/* Fix for the flashing white border in dark mode */\n.dark .thinking-box__container--transitioning {\n  border-color: rgba(255, 255, 255, 0.1);\n}\n\n.dark .thinking-box__container--transitioning .thinking-box__header {\n  border-bottom-color: rgba(255, 255, 255, 0.1);\n}\n\n.dark .thinking-box__container--transitioning .thinking-box__content {\n  border-top-color: rgba(255, 255, 255, 0.1);\n}\n\n.dark .thinking-box__container--transitioning .thinking-box__preview--crawling {\n  border-top-color: rgba(255, 255, 255, 0.1);\n}\n\n.thinking-box__header {\n  display: flex;\n  align-items: center;\n  justify-content: space-between;\n  padding: 0.75rem 1rem;\n  cursor: pointer;\n  transition: background-color 0.2s ease-in-out;\n  user-select: none;\n  border-bottom: 1px solid var(--thinking-border-color);\n}\n\n.thinking-box__header:hover {\n  background-color: rgba(0, 0, 0, 0.02);\n}\n\n.dark .thinking-box__header:hover {\n  background-color: rgba(255, 255, 255, 0.02);\n}\n\n.thinking-box__title {\n  display: flex;\n  align-items: center;\n  gap: 0.5rem;\n  color: var(--thinking-title-color);\n}\n\n.thinking-box__icon {\n  color: var(--thinking-text-color);\n  margin-right: 0.25rem;\n  animation: pulse 1.5s infinite ease-in-out;\n}\n\n.thinking-box__title-text {\n  font-size: 0.8rem;\n  font-weight: 500;\n}\n\n.thinking-box__timer {\n  font-size: 0.8rem;\n  color: var(--thinking-text-color);\n}\n\n.thinking-box__collapse-icon {\n  color: var(--thinking-text-color);\n  display: flex;\n  align-items: center;\n}\n\n.thinking-box__content {\n  border-top: 1px solid var(--thinking-border-color);\n  padding: 1.25rem;\n  max-height: 400px;\n  overflow-y: auto;\n  color: var(--thinking-text-color);\n  animation: fadeIn 0.3s ease-in-out;\n}\n\n.thinking-box__markdown {\n  font-size: 0.875rem;\n  color: var(--thinking-text-color);\n  line-height: 1.5;\n  overflow-wrap: break-word;\n}\n\n/* Preview container (collapsed state) */\n.thinking-box__preview {\n  position: relative;\n  height: 2.5rem;\n  overflow: hidden;\n  width: 100%;\n  padding: 0.15rem 0;\n  transition: all 0.35s cubic-bezier(0.16, 1, 0.3, 1);\n}\n\n/* Active animation styling - highlight active thinking */\n.thinking-box__preview--crawling {\n  height: 5rem;\n  transition: all 0.5s cubic-bezier(0.16, 1, 0.3, 1);\n  border-top: 1px solid var(--thinking-border-color);\n  background-color: rgba(0, 0, 0, 0.01);\n}\n\n.dark .thinking-box__preview--crawling {\n  background-color: rgba(255, 255, 255, 0.025);\n}\n\n.thinking-box__fade-container {\n  position: relative;\n  height: 100%;\n  overflow: hidden;\n  transition: all 0.4s cubic-bezier(0.16, 1, 0.3, 1);\n}\n\n/* Create fade effect at top and bottom */\n.thinking-box__fade-container::before,\n.thinking-box__fade-container::after {\n  content: \"\";\n  position: absolute;\n  left: 0;\n  right: 0;\n  height: 0.85rem; /* Increased for more visible gradient */\n  z-index: 10;\n  pointer-events: none;\n  transition: all 0.3s cubic-bezier(0.16, 1, 0.3, 1);\n  opacity: 0.95;\n}\n\n/* Enhanced gradients with smoother transitions */\n.thinking-box__fade-container::before {\n  top: 0;\n  background: linear-gradient(\n    to bottom,\n    var(--thinking-fade-start),\n    rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%,\n    var(--thinking-fade-end) 100%\n  );\n}\n\n.thinking-box__fade-container::after {\n  bottom: 0;\n  background: linear-gradient(\n    to top,\n    var(--thinking-fade-start),\n    rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%,\n    var(--thinking-fade-end) 100%\n  );\n}\n\n.dark .thinking-box__fade-container::before {\n  background: linear-gradient(\n    to bottom,\n    var(--thinking-fade-start),\n    rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%,\n    var(--thinking-fade-end) 100%\n  );\n}\n\n.dark .thinking-box__fade-container::after {\n  background: linear-gradient(\n    to top,\n    var(--thinking-fade-start),\n    rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%,\n    var(--thinking-fade-end) 100%\n  );\n}\n\n/* Make gradients more visible during crawling */\n.thinking-box__preview--crawling .thinking-box__fade-container::before,\n.thinking-box__preview--crawling .thinking-box__fade-container::after {\n  height: 1.5rem;\n  opacity: 0.95;\n}\n\n.thinking-box__scroll-content {\n  padding: 0.75rem 1rem;\n  height: 100%;\n  width: 100%;\n  overflow-y: hidden;\n  will-change: transform;\n  transform: translateZ(0);\n  backface-visibility: hidden;\n  -webkit-font-smoothing: antialiased;\n  -webkit-mask-image: linear-gradient(\n    to bottom,\n    transparent,\n    black 12%,\n    black 88%,\n    transparent\n  );\n  mask-image: linear-gradient(\n    to bottom,\n    transparent,\n    black 12%,\n    black 88%,\n    transparent\n  );\n}\n\n/* Enhanced text during crawling */\n.thinking-box__preview--crawling .thinking-box__preview-text {\n  opacity: 0.98;\n  font-size: 0.75rem;\n  line-height: 1.6;\n}\n\n.thinking-box__expand-prompt {\n  display: none;\n}\n\n/* Animation for thinking indicator */\n@keyframes pulse {\n  0% {\n    opacity: 0.5;\n  }\n  50% {\n    opacity: 1;\n  }\n  100% {\n    opacity: 0.5;\n  }\n}\n\n/* Fade in animation */\n@keyframes fadeIn {\n  from {\n    opacity: 0;\n  }\n  to {\n    opacity: 1;\n  }\n}\n\n/* Smooth scrolling effect */\n@keyframes scrollText {\n  0% {\n    transform: translateY(0);\n  }\n  100% {\n    transform: translateY(-100%);\n  }\n}\n\n.thinking-box__preview-text {\n  font-family: \"SFMono-Regular\", Consolas, \"Liberation Mono\", Menlo, monospace;\n  font-size: 0.7rem;\n  color: var(--thinking-text-color);\n  white-space: pre-wrap;\n  margin: 0;\n  display: block;\n  opacity: 0.85;\n  line-height: 1.4;\n  transition: all 0.3s ease;\n}\n\n/* Enhanced masking during crawling */\n.thinking-box__preview--crawling .thinking-box__scroll-content {\n  -webkit-mask-image: linear-gradient(\n    to bottom,\n    transparent,\n    black 8%,\n    black 92%,\n    transparent\n  );\n  mask-image: linear-gradient(\n    to bottom,\n    transparent,\n    black 8%,\n    black 92%,\n    transparent\n  );\n  padding: 0.75rem 1rem;\n}\n\n/* Make sure the preview adjusts immediately when new content arrives */\n.thinking-box__preview--crawling .thinking-box__scroll-content {\n  transition: height 0.3s ease-out;\n}\n\n/* Enhance visibility of actual content */\n.thinking-box__preview--crawling .thinking-box__preview-text {\n  opacity: 0.98;\n  font-size: 0.75rem;\n  line-height: 1.7;\n  text-shadow: 0 0 0.1px rgba(0, 0, 0, 0.2);\n}\n\n/* Faster transition for expanding preview */\n.thinking-box__preview {\n  transition: all 0.25s cubic-bezier(0.16, 1, 0.3, 1);\n}\n\n/* Enhanced hover feedback for collapsed header */\n.thinking-box__container--collapsed .thinking-box__header:hover {\n  background-color: rgba(0, 0, 0, 0.03);\n}\n\n.dark .thinking-box__container--collapsed .thinking-box__header:hover {\n  background-color: rgba(255, 255, 255, 0.03);\n}\n"
  },
  {
    "path": "web/src/app/app/page.tsx",
    "content": "import AppPage from \"@/refresh-pages/AppPage\";\n\nexport interface PageProps {\n  searchParams: Promise<{ [key: string]: string }>;\n}\n\nexport default async function Page(props: PageProps) {\n  const searchParams = await props.searchParams;\n  const firstMessage = searchParams.firstMessage;\n\n  // Other pages in `web/src/app/chat` are wrapped with `<AppPageLayout>`.\n  // `chat/page.tsx` is not because it also needs to handle rendering of the document-sidebar (`web/src/sections/document-sidebar/DocumentsSidebar.tsx`).\n  return <AppPage firstMessage={firstMessage} />;\n}\n"
  },
  {
    "path": "web/src/app/app/projects/projectsService.ts",
    "content": "import { ChatFileType, ChatSession } from \"../interfaces\";\n\n// Generic error handler that avoids exposing server error details\nconst handleRequestError = (action: string, response: Response) => {\n  throw new Error(`${action} failed (Status: ${response.status})`);\n};\n\nexport interface Project {\n  id: number;\n  name: string;\n  description: string | null;\n  created_at: string;\n  user_id: string;\n  instructions: string | null;\n  chat_sessions: ChatSession[];\n}\n\nexport interface CategorizedFiles {\n  user_files: ProjectFile[];\n  rejected_files: RejectedFile[];\n}\n\nexport interface ProjectFile {\n  id: string;\n  name: string;\n  project_id: number | null;\n  user_id: string | null;\n  file_id: string;\n  created_at: string;\n  status: UserFileStatus;\n  file_type: string;\n  last_accessed_at: string;\n  chat_file_type: ChatFileType;\n  token_count: number | null;\n  chunk_count: number | null;\n  temp_id?: string | null;\n}\n\nexport interface RejectedFile {\n  file_name: string;\n  reason: string;\n}\n\nexport interface UserFileDeleteResult {\n  has_associations: boolean;\n  project_names: string[];\n  assistant_names: string[];\n}\n\nexport enum UserFileStatus {\n  UPLOADING = \"UPLOADING\", //UI only\n  PROCESSING = \"PROCESSING\",\n  COMPLETED = \"COMPLETED\",\n  SKIPPED = \"SKIPPED\",\n  FAILED = \"FAILED\",\n  CANCELED = \"CANCELED\",\n  DELETING = \"DELETING\",\n}\n\nexport type ProjectDetails = {\n  project: Project;\n  files?: ProjectFile[];\n  persona_id_to_is_featured?: Record<number, boolean>;\n};\n\nexport async function fetchProjects(): Promise<Project[]> {\n  const response = await fetch(\"/api/user/projects\");\n  if (!response.ok) {\n    handleRequestError(\"Fetch projects\", response);\n  }\n  return response.json();\n}\n\nexport async function createProject(name: string): Promise<Project> {\n  const response = await fetch(\n    `/api/user/projects/create?name=${encodeURIComponent(name)}`,\n    { method: \"POST\" }\n  );\n  if (!response.ok) {\n    handleRequestError(\"Create project\", response);\n  }\n  return response.json();\n}\n\nexport async function uploadFiles(\n  files: File[],\n  projectId?: number | null,\n  tempIdMap?: Map<string, string>\n): Promise<CategorizedFiles> {\n  const formData = new FormData();\n  files.forEach((file) => formData.append(\"files\", file));\n  if (projectId !== undefined && projectId !== null) {\n    formData.append(\"project_id\", String(projectId));\n  }\n  if (tempIdMap !== undefined && tempIdMap !== null) {\n    formData.append(\n      \"temp_id_map\",\n      JSON.stringify(Object.fromEntries(tempIdMap))\n    );\n  }\n\n  const response = await fetch(\"/api/user/projects/file/upload\", {\n    method: \"POST\",\n    body: formData,\n  });\n\n  if (!response.ok) {\n    handleRequestError(\"Upload files\", response);\n  }\n\n  return response.json();\n}\n\nexport async function getRecentFiles(): Promise<ProjectFile[]> {\n  const response = await fetch(`/api/user/files/recent`);\n  if (!response.ok) {\n    handleRequestError(\"Fetch recent files\", response);\n  }\n  return response.json();\n}\n\nexport async function getFilesInProject(\n  projectId: number\n): Promise<ProjectFile[]> {\n  const response = await fetch(`/api/user/projects/files/${projectId}`);\n  if (!response.ok) {\n    handleRequestError(\"Fetch project files\", response);\n  }\n  return response.json();\n}\n\nexport async function getProject(projectId: number): Promise<Project> {\n  const response = await fetch(`/api/user/projects/${projectId}`);\n  if (!response.ok) {\n    handleRequestError(\"Fetch project\", response);\n  }\n  return response.json();\n}\n\nexport async function renameProject(\n  projectId: number,\n  name: string\n): Promise<Project> {\n  const response = await fetch(`/api/user/projects/${projectId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ name }),\n  });\n  if (!response.ok) {\n    handleRequestError(\"Rename project\", response);\n  }\n  return response.json();\n}\n\nexport async function deleteProject(projectId: number): Promise<void> {\n  const response = await fetch(`/api/user/projects/${projectId}`, {\n    method: \"DELETE\",\n  });\n  if (!response.ok) {\n    handleRequestError(\"Delete project\", response);\n  }\n}\n\nexport async function getProjectInstructions(\n  projectId: number\n): Promise<string | null> {\n  const response = await fetch(`/api/user/projects/${projectId}/instructions`);\n  if (!response.ok) {\n    handleRequestError(\"Fetch project instructions\", response);\n  }\n  const data = (await response.json()) as { instructions: string | null };\n  return data.instructions ?? null;\n}\n\nexport async function upsertProjectInstructions(\n  projectId: number,\n  instructions: string\n): Promise<string | null> {\n  const response = await fetch(`/api/user/projects/${projectId}/instructions`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ instructions }),\n  });\n  if (!response.ok) {\n    handleRequestError(\"Update project instructions\", response);\n  }\n  const data = (await response.json()) as { instructions: string | null };\n  return data.instructions ?? null;\n}\n\nexport async function getProjectDetails(\n  projectId: number\n): Promise<ProjectDetails> {\n  const response = await fetch(`/api/user/projects/${projectId}/details`);\n  if (!response.ok) {\n    handleRequestError(\"Fetch project details\", response);\n  }\n  return response.json();\n}\n\nexport async function unlinkFileFromProject(\n  projectId: number,\n  fileId: string\n): Promise<Response> {\n  const response = await fetch(\n    `/api/user/projects/${encodeURIComponent(\n      projectId\n    )}/files/${encodeURIComponent(fileId)}`,\n    { method: \"DELETE\" }\n  );\n  if (!response.ok) {\n    handleRequestError(\"Unlink file from project\", response);\n  }\n  return response;\n}\n\nexport async function linkFileToProject(\n  projectId: number,\n  fileId: string\n): Promise<Response> {\n  const response = await fetch(\n    `/api/user/projects/${encodeURIComponent(\n      projectId\n    )}/files/${encodeURIComponent(fileId)}`,\n    { method: \"POST\" }\n  );\n  if (!response.ok) {\n    handleRequestError(\"Link file to project\", response);\n  }\n  return response;\n}\n\nexport async function deleteUserFile(\n  fileId: string\n): Promise<UserFileDeleteResult> {\n  const response = await fetch(\n    `/api/user/projects/file/${encodeURIComponent(fileId)}`,\n    {\n      method: \"DELETE\",\n    }\n  );\n  if (!response.ok) {\n    handleRequestError(\"Delete file\", response);\n  }\n  return (await response.json()) as UserFileDeleteResult;\n}\n\nexport async function getUserFile(fileId: string): Promise<ProjectFile> {\n  const response = await fetch(\n    `/api/user/projects/file/${encodeURIComponent(fileId)}`\n  );\n  if (!response.ok) {\n    handleRequestError(\"Fetch file\", response);\n  }\n  return response.json();\n}\n\nexport async function getUserFileStatuses(\n  fileIds: string[]\n): Promise<ProjectFile[]> {\n  const response = await fetch(`/api/user/projects/file/statuses`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ file_ids: fileIds }),\n  });\n  if (!response.ok) {\n    handleRequestError(\"Fetch file statuses\", response);\n  }\n  return response.json();\n}\n\nexport async function getSessionProjectTokenCount(\n  chatSessionId: string\n): Promise<number> {\n  const response = await fetch(\n    `/api/user/projects/session/${encodeURIComponent(\n      chatSessionId\n    )}/token-count`\n  );\n  if (!response.ok) {\n    return 0;\n  }\n  const data = (await response.json()) as { total_tokens: number };\n  return data.total_tokens ?? 0;\n}\n\nexport async function getProjectFilesForSession(\n  chatSessionId: string\n): Promise<ProjectFile[]> {\n  const response = await fetch(\n    `/api/user/projects/session/${encodeURIComponent(chatSessionId)}/files`\n  );\n  if (!response.ok) {\n    return [];\n  }\n  return response.json();\n}\n\nexport async function getProjectTokenCount(projectId: number): Promise<number> {\n  const response = await fetch(\n    `/api/user/projects/${encodeURIComponent(projectId)}/token-count`\n  );\n  if (!response.ok) {\n    return 0;\n  }\n  const data = (await response.json()) as { total_tokens: number };\n  return data.total_tokens ?? 0;\n}\n\nexport async function getMaxSelectedDocumentTokens(\n  personaId: number\n): Promise<number | null> {\n  const response = await fetch(\n    `/api/chat/max-selected-document-tokens?persona_id=${personaId}`\n  );\n  if (!response.ok) {\n    return null;\n  }\n  const json = await response.json();\n  return (json?.max_tokens as number) ?? null;\n}\n\nexport async function moveChatSession(\n  projectId: number,\n  chatSessionId: string\n): Promise<boolean> {\n  const response = await fetch(\n    `/api/user/projects/${projectId}/move_chat_session`,\n    {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({ chat_session_id: chatSessionId }),\n    }\n  );\n  if (!response.ok) {\n    handleRequestError(\"Move chat session\", response);\n  }\n  return response.ok;\n}\n\nexport async function removeChatSessionFromProject(\n  chatSessionId: string\n): Promise<boolean> {\n  const response = await fetch(`/api/user/projects/remove_chat_session`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ chat_session_id: chatSessionId }),\n  });\n  if (!response.ok) {\n    handleRequestError(\"Remove chat session from project\", response);\n  }\n  return response.ok;\n}\n"
  },
  {
    "path": "web/src/app/app/services/actionUtils.ts",
    "content": "import { JSX } from \"react\";\nimport type { IconProps } from \"@opal/types\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport {\n  SvgCpu,\n  SvgGlobe,\n  SvgImage,\n  SvgLink,\n  SvgSearch,\n  SvgServer,\n} from \"@opal/icons\";\n\n// Helper functions to identify specific tools\nconst isSearchTool = (tool: ToolSnapshot): boolean => {\n  return (\n    tool.in_code_tool_id === \"SearchTool\" ||\n    tool.name === \"run_search\" ||\n    tool.display_name?.toLowerCase().includes(\"search tool\")\n  );\n};\n\nconst isWebSearchTool = (tool: ToolSnapshot): boolean => {\n  return (\n    tool.in_code_tool_id === \"WebSearchTool\" ||\n    tool.display_name?.toLowerCase().includes(\"web_search\")\n  );\n};\n\nconst isImageGenerationTool = (tool: ToolSnapshot): boolean => {\n  return (\n    tool.in_code_tool_id === \"ImageGenerationTool\" ||\n    tool.display_name?.toLowerCase().includes(\"image generation\")\n  );\n};\n\nconst isKnowledgeGraphTool = (tool: ToolSnapshot): boolean => {\n  return (\n    tool.in_code_tool_id === \"KnowledgeGraphTool\" ||\n    tool.display_name?.toLowerCase().includes(\"knowledge graph\")\n  );\n};\n\nconst isOpenUrlTool = (tool: ToolSnapshot): boolean => {\n  return (\n    tool.in_code_tool_id === \"OpenURLTool\" ||\n    tool.name === \"open_url\" ||\n    tool.display_name?.toLowerCase().includes(\"open url\")\n  );\n};\n\nexport function getIconForAction(\n  action: ToolSnapshot\n): (props: IconProps) => JSX.Element {\n  if (isSearchTool(action)) return SvgSearch;\n  if (isWebSearchTool(action)) return SvgGlobe;\n  if (isImageGenerationTool(action)) return SvgImage;\n  if (isKnowledgeGraphTool(action)) return SvgServer;\n  if (isOpenUrlTool(action)) return SvgLink;\n  return SvgCpu;\n}\n\n// Check if the agent has either search tool or web search tool available\nexport function hasSearchToolsAvailable(tools: ToolSnapshot[]): boolean {\n  return tools.some((tool) => isSearchTool(tool) || isWebSearchTool(tool));\n}\n"
  },
  {
    "path": "web/src/app/app/services/currentMessageFIFO.ts",
    "content": "import { PacketType, sendMessage, SendMessageParams } from \"./lib\";\n\nexport class CurrentMessageFIFO {\n  private stack: PacketType[] = [];\n  isComplete: boolean = false;\n  error: string | null = null;\n\n  push(packetBunch: PacketType) {\n    this.stack.push(packetBunch);\n  }\n\n  nextPacket(): PacketType | undefined {\n    return this.stack.shift();\n  }\n\n  isEmpty(): boolean {\n    return this.stack.length === 0;\n  }\n}\n\nexport async function updateCurrentMessageFIFO(\n  stack: CurrentMessageFIFO,\n  params: SendMessageParams\n) {\n  try {\n    for await (const packet of sendMessage(params)) {\n      if (params.signal?.aborted) {\n        throw new Error(\"AbortError\");\n      }\n      stack.push(packet);\n    }\n  } catch (error: unknown) {\n    if (error instanceof Error) {\n      if (error.name === \"AbortError\") {\n        console.debug(\"Stream aborted\");\n      } else {\n        stack.error = error.message;\n      }\n    } else {\n      stack.error = String(error);\n    }\n  } finally {\n    stack.isComplete = true;\n  }\n}\n"
  },
  {
    "path": "web/src/app/app/services/fileUtils.ts",
    "content": "import { FileDescriptor } from \"../interfaces\";\nimport { ProjectFile } from \"../projects/projectsService\";\n\nexport function projectsFileToFileDescriptor(\n  file: ProjectFile\n): FileDescriptor {\n  return {\n    id: file.file_id,\n    type: file.chat_file_type,\n    name: file.name,\n    user_file_id: file.id,\n  };\n}\n\nexport function projectFilesToFileDescriptors(\n  files: ProjectFile[]\n): FileDescriptor[] {\n  return files.map(projectsFileToFileDescriptor);\n}\n"
  },
  {
    "path": "web/src/app/app/services/lib.tsx",
    "content": "import {\n  Filters,\n  DocumentInfoPacket,\n  StreamStopInfo,\n} from \"@/lib/search/interfaces\";\nimport { handleSSEStream } from \"@/lib/search/streamingUtils\";\nimport { FeedbackType } from \"@/app/app/interfaces\";\nimport {\n  BackendMessage,\n  DocumentsResponse,\n  FileDescriptor,\n  FileChatDisplay,\n  Message,\n  MessageResponseIDInfo,\n  MultiModelMessageResponseIDInfo,\n  ResearchType,\n  RetrievalType,\n  StreamingError,\n  ToolCallMetadata,\n  UserKnowledgeFilePacket,\n} from \"../interfaces\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { ReadonlyURLSearchParams } from \"next/navigation\";\nimport { SEARCH_PARAM_NAMES } from \"./searchParams\";\nimport { WEB_SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { Packet } from \"./streamingModels\";\n\nexport async function updateLlmOverrideForChatSession(\n  chatSessionId: string,\n  newAlternateModel: string\n) {\n  const response = await fetch(\"/api/chat/update-chat-session-model\", {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      chat_session_id: chatSessionId,\n      new_alternate_model: newAlternateModel,\n    }),\n  });\n  return response;\n}\n\nexport async function updateTemperatureOverrideForChatSession(\n  chatSessionId: string,\n  newTemperature: number\n) {\n  const response = await fetch(\"/api/chat/update-chat-session-temperature\", {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      chat_session_id: chatSessionId,\n      temperature_override: newTemperature,\n    }),\n  });\n  return response;\n}\n\nexport async function createChatSession(\n  personaId: number,\n  description: string | null,\n  projectId: number | null\n): Promise<string> {\n  const createChatSessionResponse = await fetch(\n    \"/api/chat/create-chat-session\",\n    {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        persona_id: personaId,\n        description,\n        project_id: projectId,\n      }),\n    }\n  );\n  if (!createChatSessionResponse.ok) {\n    console.error(\n      `Failed to create chat session - ${createChatSessionResponse.status}`\n    );\n    throw Error(\"Failed to create chat session\");\n  }\n  const chatSessionResponseJson = await createChatSessionResponse.json();\n  return chatSessionResponseJson.chat_session_id;\n}\n\nexport type PacketType =\n  | ToolCallMetadata\n  | BackendMessage\n  | DocumentInfoPacket\n  | DocumentsResponse\n  | FileChatDisplay\n  | StreamingError\n  | MessageResponseIDInfo\n  | MultiModelMessageResponseIDInfo\n  | StreamStopInfo\n  | UserKnowledgeFilePacket\n  | Packet;\n\n// Origin of the message for telemetry tracking.\n// Keep in sync with backend: backend/onyx/server/query_and_chat/models.py::MessageOrigin\nexport type MessageOrigin =\n  | \"webapp\"\n  | \"chrome_extension\"\n  | \"api\"\n  | \"slackbot\"\n  | \"unknown\";\n\nexport interface LLMOverride {\n  model_provider: string;\n  model_version: string;\n  temperature?: number;\n  display_name?: string;\n}\n\nexport interface SendMessageParams {\n  message: string;\n  fileDescriptors?: FileDescriptor[];\n  parentMessageId: number | null;\n  chatSessionId: string;\n  filters: Filters | null;\n  signal?: AbortSignal;\n  deepResearch?: boolean;\n  enabledToolIds?: number[];\n  // Single forced tool ID (new API uses singular, not array)\n  forcedToolId?: number | null;\n  // LLM override parameters\n  modelProvider?: string;\n  modelVersion?: string;\n  temperature?: number;\n  // Multi-model: send multiple LLM overrides for parallel generation\n  llmOverrides?: LLMOverride[];\n  // Origin of the message for telemetry tracking\n  origin?: MessageOrigin;\n  // Additional context injected into the LLM call but not stored/shown in chat.\n  // Used e.g. by Chrome extension \"Read this tab\" feature.\n  additionalContext?: string;\n}\n\nexport async function* sendMessage({\n  message,\n  fileDescriptors,\n  parentMessageId,\n  chatSessionId,\n  filters,\n  signal,\n  deepResearch,\n  enabledToolIds,\n  forcedToolId,\n  modelProvider,\n  modelVersion,\n  temperature,\n  llmOverrides,\n  origin,\n  additionalContext,\n}: SendMessageParams): AsyncGenerator<PacketType, void, unknown> {\n  // Build payload for new send-chat-message API\n  const payload = {\n    message: message,\n    chat_session_id: chatSessionId,\n    parent_message_id: parentMessageId,\n    file_descriptors: fileDescriptors,\n    internal_search_filters: filters,\n    deep_research: deepResearch ?? false,\n    allowed_tool_ids: enabledToolIds,\n    forced_tool_id: forcedToolId ?? null,\n    llm_override:\n      temperature || modelVersion\n        ? {\n            temperature,\n            model_provider: modelProvider,\n            model_version: modelVersion,\n          }\n        : null,\n    // Multi-model: list of LLM overrides for parallel generation\n    llm_overrides: llmOverrides ?? null,\n    // Default to \"unknown\" for consistency with backend; callers should set explicitly\n    origin: origin ?? \"unknown\",\n    additional_context: additionalContext ?? null,\n  };\n\n  const body = JSON.stringify(payload);\n\n  const response = await fetch(`/api/chat/send-chat-message`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body,\n    signal,\n  });\n\n  if (!response.ok) {\n    const data = await response.json().catch(() => ({}));\n    throw new Error(data.detail ?? `HTTP error! status: ${response.status}`);\n  }\n\n  yield* handleSSEStream<PacketType>(response, signal);\n}\n\nexport async function setPreferredResponse(\n  userMessageId: number,\n  preferredResponseId: number\n): Promise<Response> {\n  return fetch(\"/api/chat/set-preferred-response\", {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      user_message_id: userMessageId,\n      preferred_response_id: preferredResponseId,\n    }),\n  });\n}\n\nexport async function nameChatSession(chatSessionId: string) {\n  const response = await fetch(\"/api/chat/rename-chat-session\", {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      chat_session_id: chatSessionId,\n      name: null,\n    }),\n  });\n  return response;\n}\n\nexport async function patchMessageToBeLatest(messageId: number) {\n  const response = await fetch(\"/api/chat/set-message-as-latest\", {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      message_id: messageId,\n    }),\n  });\n  return response;\n}\n\nexport async function handleChatFeedback(\n  messageId: number,\n  feedback: FeedbackType,\n  feedbackDetails: string,\n  predefinedFeedback: string | undefined\n) {\n  const response = await fetch(\"/api/chat/create-chat-message-feedback\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      chat_message_id: messageId,\n      is_positive: feedback === \"like\",\n      feedback_text: feedbackDetails,\n      predefined_feedback: predefinedFeedback,\n    }),\n  });\n  return response;\n}\n\nexport async function removeChatFeedback(messageId: number) {\n  const response = await fetch(\n    `/api/chat/remove-chat-message-feedback?chat_message_id=${messageId}`,\n    {\n      method: \"DELETE\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n    }\n  );\n  return response;\n}\n\nexport async function renameChatSession(\n  chatSessionId: string,\n  newName: string\n) {\n  const response = await fetch(`/api/chat/rename-chat-session`, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      chat_session_id: chatSessionId,\n      name: newName,\n    }),\n  });\n  return response;\n}\n\nexport async function deleteChatSession(chatSessionId: string) {\n  const response = await fetch(\n    `/api/chat/delete-chat-session/${chatSessionId}`,\n    {\n      method: \"DELETE\",\n    }\n  );\n  return response;\n}\n\nexport async function deleteAllChatSessions() {\n  const response = await fetch(`/api/chat/delete-all-chat-sessions`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n  return response;\n}\n\nexport async function getAvailableContextTokens(\n  chatSessionId: string\n): Promise<number | null> {\n  const response = await fetch(\n    `/api/chat/available-context-tokens/${chatSessionId}`\n  );\n  if (!response.ok) {\n    return null;\n  }\n  const data = (await response.json()) as { available_tokens: number };\n  return data?.available_tokens ?? null;\n}\n\nexport function processRawChatHistory(\n  rawMessages: BackendMessage[],\n  packets: Packet[][]\n): Map<number, Message> {\n  const messages: Map<number, Message> = new Map();\n  const parentMessageChildrenMap: Map<number, number[]> = new Map();\n\n  let agentMessageInd = 0;\n\n  rawMessages.forEach((messageInfo, _ind) => {\n    const packetsForMessage = packets[agentMessageInd];\n    if (messageInfo.message_type === \"assistant\") {\n      agentMessageInd++;\n    }\n\n    const hasContextDocs = (messageInfo?.context_docs || []).length > 0;\n    let retrievalType;\n    if (hasContextDocs) {\n      if (messageInfo.rephrased_query) {\n        retrievalType = RetrievalType.Search;\n      } else {\n        retrievalType = RetrievalType.SelectedDocs;\n      }\n    } else {\n      retrievalType = RetrievalType.None;\n    }\n\n    const message: Message = {\n      // for existing messages, use the message_id as the nodeId\n      // all that matters is that the nodeId is unique for a given chat session\n      nodeId: messageInfo.message_id,\n      messageId: messageInfo.message_id,\n      message: messageInfo.message,\n      type: messageInfo.message_type as \"user\" | \"assistant\",\n      files: messageInfo.files,\n      alternateAgentID:\n        messageInfo.alternate_assistant_id !== null\n          ? Number(messageInfo.alternate_assistant_id)\n          : null,\n      // only include these fields if this is an agent message so that\n      // this is identical to what is computed at streaming time\n      ...(messageInfo.message_type === \"assistant\"\n        ? {\n            retrievalType: retrievalType,\n            researchType: messageInfo.research_type as ResearchType | undefined,\n            query: messageInfo.rephrased_query,\n            documents: messageInfo?.context_docs || [],\n            citations: messageInfo?.citations || {},\n            processingDurationSeconds: messageInfo.processing_duration_seconds,\n          }\n        : {}),\n      toolCall: messageInfo.tool_call,\n      parentNodeId: messageInfo.parent_message,\n      childrenNodeIds: [],\n      latestChildNodeId: messageInfo.latest_child_message,\n      overridden_model: messageInfo.overridden_model,\n      packets: packetsForMessage || [],\n      currentFeedback: messageInfo.current_feedback as FeedbackType | null,\n      // Multi-model answer generation\n      preferredResponseId: messageInfo.preferred_response_id ?? null,\n      modelDisplayName: messageInfo.model_display_name ?? null,\n    };\n\n    messages.set(messageInfo.message_id, message);\n\n    if (messageInfo.parent_message !== null) {\n      if (!parentMessageChildrenMap.has(messageInfo.parent_message)) {\n        parentMessageChildrenMap.set(messageInfo.parent_message, []);\n      }\n      parentMessageChildrenMap\n        .get(messageInfo.parent_message)!\n        .push(messageInfo.message_id);\n    }\n  });\n\n  // Populate childrenMessageIds for each message\n  parentMessageChildrenMap.forEach((childrenIds, parentId) => {\n    childrenIds.sort((a, b) => a - b);\n    const parentMesage = messages.get(parentId);\n    if (parentMesage) {\n      parentMesage.childrenNodeIds = childrenIds;\n    }\n  });\n\n  return messages;\n}\n\nexport function personaIncludesRetrieval(\n  selectedPersona: MinimalPersonaSnapshot\n) {\n  return selectedPersona.tools.some(\n    (tool) =>\n      tool.in_code_tool_id &&\n      [SEARCH_TOOL_ID, WEB_SEARCH_TOOL_ID].includes(tool.in_code_tool_id)\n  );\n}\n\nconst PARAMS_TO_SKIP = [\n  SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD,\n  SEARCH_PARAM_NAMES.USER_PROMPT,\n  SEARCH_PARAM_NAMES.TITLE,\n  // only use these if explicitly passed in\n  SEARCH_PARAM_NAMES.CHAT_ID,\n  SEARCH_PARAM_NAMES.PERSONA_ID,\n  SEARCH_PARAM_NAMES.PROJECT_ID,\n  // do not persist project context in the URL after navigation\n  \"projectid\",\n];\n\nexport function buildChatUrl(\n  existingSearchParams: ReadonlyURLSearchParams | null,\n  chatSessionId: string | null,\n  personaId: number | null,\n  search?: boolean,\n  skipReload?: boolean\n) {\n  const finalSearchParams: string[] = [];\n  if (chatSessionId) {\n    finalSearchParams.push(\n      `${\n        search ? SEARCH_PARAM_NAMES.SEARCH_ID : SEARCH_PARAM_NAMES.CHAT_ID\n      }=${chatSessionId}`\n    );\n  }\n  if (personaId !== null) {\n    finalSearchParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${personaId}`);\n  }\n\n  existingSearchParams?.forEach((value, key) => {\n    if (!PARAMS_TO_SKIP.includes(key)) {\n      finalSearchParams.push(`${key}=${value}`);\n    }\n  });\n\n  if (skipReload) {\n    finalSearchParams.push(`${SEARCH_PARAM_NAMES.SKIP_RELOAD}=true`);\n  }\n\n  const finalSearchParamsString = finalSearchParams.join(\"&\");\n\n  if (finalSearchParamsString) {\n    return `/${search ? \"search\" : \"chat\"}?${finalSearchParamsString}`;\n  }\n\n  return `/${search ? \"search\" : \"chat\"}`;\n}\n\nexport async function uploadFilesForChat(\n  files: File[]\n): Promise<[FileDescriptor[], string | null]> {\n  const formData = new FormData();\n  files.forEach((file) => {\n    formData.append(\"files\", file);\n  });\n\n  const response = await fetch(\"/api/chat/file\", {\n    method: \"POST\",\n    body: formData,\n  });\n  if (!response.ok) {\n    return [[], `Failed to upload files - ${(await response.json()).detail}`];\n  }\n  const responseJson = await response.json();\n\n  return [responseJson.files as FileDescriptor[], null];\n}\n"
  },
  {
    "path": "web/src/app/app/services/messageTree.ts",
    "content": "import { FileDescriptor, Message } from \"../interfaces\";\n\nexport const SYSTEM_MESSAGE_ID = -3;\nexport const SYSTEM_NODE_ID = -3;\n\nexport type MessageTreeState = Map<number, Message>; // key is nodeId\n\nexport function createInitialMessageTreeState(\n  initialMessages?: Map<number, Message> | Message[]\n): MessageTreeState {\n  if (!initialMessages) {\n    return new Map();\n  }\n  if (initialMessages instanceof Map) {\n    return new Map(initialMessages); // Shallow copy\n  }\n  return new Map(initialMessages.map((msg) => [msg.nodeId, msg]));\n}\n\nexport function getMessage(\n  messages: MessageTreeState,\n  nodeId: number\n): Message | undefined {\n  return messages.get(nodeId);\n}\n\nexport function getMessageByMessageId(\n  messages: MessageTreeState,\n  messageId: number\n): Message | undefined {\n  for (const message of Array.from(messages.values())) {\n    if (message.messageId === messageId) {\n      return message;\n    }\n  }\n  return undefined;\n}\n\nfunction updateParentInMap(\n  map: Map<number, Message>,\n  parentNodeId: number,\n  childNodeId: number,\n  makeLatest: boolean\n): void {\n  const parent = map.get(parentNodeId);\n  if (parent) {\n    const parentChildren = parent.childrenNodeIds || [];\n    const childrenSet = new Set(parentChildren);\n    let updatedChildren = parentChildren;\n\n    if (!childrenSet.has(childNodeId)) {\n      updatedChildren = [...parentChildren, childNodeId];\n    }\n\n    const updatedParent = {\n      ...parent,\n      childrenNodeIds: updatedChildren,\n      // Update latestChild only if explicitly requested or if it's the only child,\n      // or if the child was newly added\n      latestChildNodeId:\n        makeLatest ||\n        updatedChildren.length === 1 ||\n        !childrenSet.has(childNodeId)\n          ? childNodeId\n          : parent.latestChildNodeId,\n    };\n    if (makeLatest && parent.latestChildNodeId !== childNodeId) {\n      updatedParent.latestChildNodeId = childNodeId;\n    }\n\n    map.set(parentNodeId, updatedParent);\n  } else {\n    console.warn(\n      `Parent message with nodeId ${parentNodeId} not found when updating for child ${childNodeId}`\n    );\n  }\n}\n\nexport function upsertMessages(\n  currentMessages: MessageTreeState,\n  messagesToAdd: Message[],\n  makeLatestChildMessage: boolean = false\n): MessageTreeState {\n  let newMessages = new Map(currentMessages);\n  let messagesToAddClones = messagesToAdd.map((msg) => ({ ...msg })); // Clone all incoming messages\n\n  if (newMessages.size === 0 && messagesToAddClones.length > 0) {\n    const firstMessage = messagesToAddClones[0];\n    if (!firstMessage) {\n      throw new Error(\"No first message found in the message tree.\");\n    }\n    const systemNodeId =\n      firstMessage.parentNodeId !== null\n        ? firstMessage.parentNodeId\n        : SYSTEM_NODE_ID;\n    const firstNodeId = firstMessage.nodeId;\n\n    // Check if system message needs to be added or already exists (e.g., from parentNodeId)\n    if (!newMessages.has(systemNodeId)) {\n      const dummySystemMessage: Message = {\n        messageId: SYSTEM_MESSAGE_ID,\n        nodeId: systemNodeId,\n        message: \"\",\n        type: \"system\",\n        files: [],\n        toolCall: null,\n        parentNodeId: null,\n        childrenNodeIds: [firstNodeId],\n        latestChildNodeId: firstNodeId,\n        packets: [],\n      };\n      newMessages.set(dummySystemMessage.nodeId, dummySystemMessage);\n    }\n    // Ensure the first message points to the system message if its parent was null\n    if (!firstMessage) {\n      console.error(\"No first message found in the message tree.\");\n      return newMessages;\n    }\n    if (firstMessage.parentNodeId === null) {\n      firstMessage.parentNodeId = systemNodeId;\n    }\n  }\n\n  messagesToAddClones.forEach((message) => {\n    // Add/update the message itself\n    newMessages.set(message.nodeId, message);\n\n    // Update parent's children if the message has a parent\n    if (message.parentNodeId !== null) {\n      // When adding multiple messages, only make the *first* one added potentially the latest,\n      // unless `makeLatestChildMessage` is true for all.\n      // Let's stick to the original logic: update parent, potentially making this message latest\n      // based on makeLatestChildMessage flag OR if it's a new child being added.\n      updateParentInMap(\n        newMessages,\n        message.parentNodeId,\n        message.nodeId,\n        makeLatestChildMessage\n      );\n    }\n  });\n\n  // Explicitly set the last message of the batch as the latest if requested,\n  // overriding previous updates within the loop if necessary.\n  if (makeLatestChildMessage && messagesToAddClones.length > 0) {\n    const lastMessage = messagesToAddClones[messagesToAddClones.length - 1];\n    if (!lastMessage) {\n      console.error(\"No last message found in the message tree.\");\n      return newMessages;\n    }\n    if (lastMessage.parentNodeId !== null) {\n      const parent = newMessages.get(lastMessage.parentNodeId);\n      if (parent && parent.latestChildNodeId !== lastMessage.nodeId) {\n        const updatedParent = {\n          ...parent,\n          latestChildNodeId: lastMessage.nodeId,\n        };\n        newMessages.set(parent.nodeId, updatedParent);\n      }\n    }\n  }\n\n  return newMessages;\n}\n\nexport function removeMessage(\n  currentMessages: MessageTreeState,\n  nodeIdToRemove: number\n): MessageTreeState {\n  if (!currentMessages.has(nodeIdToRemove)) {\n    return currentMessages; // Return original if message doesn't exist\n  }\n\n  const newMessages = new Map(currentMessages);\n  const messageToRemove = newMessages.get(nodeIdToRemove)!;\n\n  // Collect all descendant IDs to remove\n  const idsToRemove = new Set<number>();\n  const queue: number[] = [nodeIdToRemove];\n\n  while (queue.length > 0) {\n    const currentId = queue.shift()!;\n    if (!newMessages.has(currentId) || idsToRemove.has(currentId)) continue;\n    idsToRemove.add(currentId);\n\n    const currentMsg = newMessages.get(currentId);\n    if (currentMsg?.childrenNodeIds) {\n      currentMsg.childrenNodeIds.forEach((childId) => queue.push(childId));\n    }\n  }\n\n  // Remove all descendants\n  idsToRemove.forEach((id) => newMessages.delete(id));\n\n  // Update the parent\n  if (messageToRemove.parentNodeId !== null) {\n    const parent = newMessages.get(messageToRemove.parentNodeId);\n    if (parent) {\n      const updatedChildren = (parent.childrenNodeIds || []).filter(\n        (id) => id !== nodeIdToRemove\n      );\n      const updatedParent = {\n        ...parent,\n        childrenNodeIds: updatedChildren,\n        // If the removed message was the latest, find the new latest (last in the updated children list)\n        latestChildNodeId:\n          parent.latestChildNodeId === nodeIdToRemove\n            ? updatedChildren.length > 0\n              ? updatedChildren[updatedChildren.length - 1]\n              : null\n            : parent.latestChildNodeId,\n      };\n      newMessages.set(parent.nodeId, updatedParent);\n    }\n  }\n\n  return newMessages;\n}\n\nexport function setMessageAsLatest(\n  currentMessages: MessageTreeState,\n  nodeId: number\n): MessageTreeState {\n  const message = currentMessages.get(nodeId);\n  if (!message || message.parentNodeId === null) {\n    return currentMessages; // Cannot set root or non-existent message as latest\n  }\n\n  const parent = currentMessages.get(message.parentNodeId);\n  if (!parent || !(parent.childrenNodeIds || []).includes(nodeId)) {\n    console.warn(\n      `Cannot set message ${nodeId} as latest, parent ${message.parentNodeId} or child link missing.`\n    );\n    return currentMessages; // Parent doesn't exist or doesn't list this message as a child\n  }\n\n  if (parent.latestChildNodeId === nodeId) {\n    return currentMessages; // Already the latest\n  }\n\n  const newMessages = new Map(currentMessages);\n  const updatedParent = {\n    ...parent,\n    latestChildNodeId: nodeId,\n  };\n  newMessages.set(parent.nodeId, updatedParent);\n\n  return newMessages;\n}\n\nexport function getLatestMessageChain(messages: MessageTreeState): Message[] {\n  const chain: Message[] = [];\n  if (messages.size === 0) {\n    return chain;\n  }\n\n  // Find the root message\n  let root: Message | undefined;\n  if (messages.has(SYSTEM_NODE_ID)) {\n    root = messages.get(SYSTEM_NODE_ID);\n  } else {\n    // Use Array.from to fix linter error\n    const potentialRoots = Array.from(messages.values()).filter(\n      (message) =>\n        message.parentNodeId === null || !messages.has(message.parentNodeId!)\n    );\n    if (potentialRoots.length > 0) {\n      // Prefer non-system message if multiple roots found somehow\n      root =\n        potentialRoots.find((m) => m.type !== \"system\") || potentialRoots[0];\n    }\n  }\n\n  if (!root) {\n    console.error(\"Could not determine the root message.\");\n    // Fallback: return flat list sorted by nodeId perhaps? Or empty?\n    return Array.from(messages.values()).sort((a, b) => a.nodeId - b.nodeId);\n  }\n\n  let currentMessage: Message | undefined = root;\n  // The root itself (like SYSTEM_MESSAGE) might not be part of the visible chain\n  if (root.nodeId !== SYSTEM_NODE_ID && root.type !== \"system\") {\n    // Need to clone message for safety? If MessageTreeState guarantees immutability maybe not.\n    // Let's assume Message objects within the map are treated as immutable.\n    chain.push(root);\n  }\n\n  while (\n    currentMessage?.latestChildNodeId !== null &&\n    currentMessage?.latestChildNodeId !== undefined\n  ) {\n    const nextNodeId = currentMessage.latestChildNodeId;\n    const nextMessage = messages.get(nextNodeId);\n    if (nextMessage) {\n      chain.push(nextMessage);\n      currentMessage = nextMessage;\n    } else {\n      console.warn(\n        `Chain broken: Message with nodeId ${nextNodeId} not found.`\n      );\n      break;\n    }\n  }\n\n  return chain;\n}\n\nexport function getHumanAndAIMessageFromMessageNumber(\n  messages: MessageTreeState,\n  messageNumber: number\n): { humanMessage: Message | null; aiMessage: Message | null } {\n  const latestChain = getLatestMessageChain(messages);\n  const messageIndex = latestChain.findIndex(\n    (msg) => msg.messageId === messageNumber\n  );\n\n  if (messageIndex === -1) {\n    // Maybe the message exists but isn't in the latest chain? Search the whole map.\n    const message = getMessageByMessageId(messages, messageNumber);\n    if (!message) return { humanMessage: null, aiMessage: null };\n\n    if (message.type === \"user\") {\n      // Find its latest child that is an agent\n      const potentialAiMessage =\n        message.latestChildNodeId !== null &&\n        message.latestChildNodeId !== undefined\n          ? messages.get(message.latestChildNodeId)\n          : undefined;\n      const aiMessage =\n        potentialAiMessage?.type === \"assistant\" ? potentialAiMessage : null;\n      return { humanMessage: message, aiMessage };\n    } else if (message.type === \"assistant\" || message.type === \"error\") {\n      const humanMessage =\n        message.parentNodeId !== null\n          ? messages.get(message.parentNodeId)\n          : null;\n      return {\n        humanMessage: humanMessage?.type === \"user\" ? humanMessage : null,\n        aiMessage: message,\n      };\n    }\n    return { humanMessage: null, aiMessage: null };\n  }\n\n  // Message is in the latest chain\n  const message = latestChain[messageIndex];\n  if (!message) {\n    console.error(`Message ${messageNumber} not found in the latest chain.`);\n    return { humanMessage: null, aiMessage: null };\n  }\n\n  if (message.type === \"user\") {\n    const potentialAiMessage = latestChain[messageIndex + 1];\n    const aiMessage =\n      potentialAiMessage?.type === \"assistant\" &&\n      potentialAiMessage.parentNodeId === message.nodeId\n        ? potentialAiMessage\n        : null;\n    return { humanMessage: message, aiMessage };\n  } else if (message.type === \"assistant\" || message.type === \"error\") {\n    const potentialHumanMessage = latestChain[messageIndex - 1];\n    const humanMessage =\n      potentialHumanMessage?.type === \"user\" &&\n      message.parentNodeId === potentialHumanMessage.nodeId\n        ? potentialHumanMessage\n        : null;\n    return { humanMessage, aiMessage: message };\n  }\n\n  return { humanMessage: null, aiMessage: null };\n}\n\nexport function getLastSuccessfulMessageId(\n  messages: MessageTreeState,\n  chain?: Message[]\n): number | null {\n  const messageChain = chain || getLatestMessageChain(messages);\n  for (let i = messageChain.length - 1; i >= 0; i--) {\n    const message = messageChain[i];\n    if (!message) {\n      console.error(`Message ${i} not found in the message chain.`);\n      continue;\n    }\n\n    // don't include failed / not-completed messages\n    if (message.type !== \"error\" && message.messageId !== undefined) {\n      return message.messageId ?? null;\n    }\n  }\n\n  // If the chain starts with an error or is empty, check for system message\n  const systemMessage = messages.get(SYSTEM_NODE_ID);\n  if (systemMessage) {\n    // Check if the system message itself is considered \"successful\" (it usually is)\n    // Or if it has a successful child\n    const childNodeId = systemMessage.latestChildNodeId;\n    if (childNodeId !== null && childNodeId !== undefined) {\n      const firstRealMessage = messages.get(childNodeId);\n      if (firstRealMessage && firstRealMessage.type !== \"error\") {\n        return firstRealMessage.messageId ?? null;\n      }\n    }\n    // If no successful child, return the system message ID itself as the root?\n    // This matches the class behavior implicitly returning the root ID if nothing else works.\n    return systemMessage.messageId ?? null;\n  }\n\n  return null; // No successful message found\n}\n\ninterface BuildEmptyMessageParams {\n  messageType: \"user\" | \"assistant\";\n  parentNodeId: number;\n  message?: string;\n  files?: FileDescriptor[];\n  nodeIdOffset?: number;\n}\n\nexport const buildEmptyMessage = (params: BuildEmptyMessageParams): Message => {\n  // use negative number to avoid conflicts with messageIds\n  const tempNodeId = -1 * Date.now() - (params.nodeIdOffset || 0);\n  return {\n    nodeId: tempNodeId,\n    message: params.message || \"\",\n    type: params.messageType,\n    files: params.files || [],\n    toolCall: null,\n    parentNodeId: params.parentNodeId,\n    packets: [],\n  };\n};\n\nexport const buildImmediateMessages = (\n  parentNodeId: number,\n  userInput: string,\n  files: FileDescriptor[],\n  messageToResend?: Message\n): {\n  initialUserNode: Message;\n  initialAgentNode: Message;\n} => {\n  // Always create a NEW message with a new nodeId for proper branching.\n  // When editing (messageToResend exists), this creates a sibling to the original\n  // message since they share the same parentNodeId.\n  const initialUserNode = buildEmptyMessage({\n    messageType: \"user\",\n    parentNodeId,\n    message: userInput,\n    files,\n  });\n  const initialAgentNode = buildEmptyMessage({\n    messageType: \"assistant\",\n    parentNodeId: initialUserNode.nodeId,\n    nodeIdOffset: 1,\n  });\n\n  initialUserNode.childrenNodeIds = [initialAgentNode.nodeId];\n  initialUserNode.latestChildNodeId = initialAgentNode.nodeId;\n\n  return {\n    initialUserNode,\n    initialAgentNode,\n  };\n};\n"
  },
  {
    "path": "web/src/app/app/services/packetUtils.test.ts",
    "content": "/**\n * Unit tests for packetUtils functions\n * Tests packet type classification and utility functions\n */\n\nimport { Packet, PacketType, Placement } from \"./streamingModels\";\nimport {\n  isToolPacket,\n  isActualToolCallPacket,\n  isDisplayPacket,\n  isSearchToolPacket,\n  isStreamingComplete,\n  isFinalAnswerComing,\n} from \"./packetUtils\";\n\n// Helper to create a mock packet with a specific type\nfunction createPacket(\n  type: PacketType,\n  placement?: Partial<Placement>\n): Packet {\n  return {\n    placement: { turn_index: 0, tab_index: 0, ...placement },\n    obj: { type } as any,\n  };\n}\n\ndescribe(\"packetUtils\", () => {\n  describe(\"isToolPacket\", () => {\n    const toolPacketTypes = [\n      PacketType.SEARCH_TOOL_START,\n      PacketType.SEARCH_TOOL_QUERIES_DELTA,\n      PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,\n      PacketType.PYTHON_TOOL_START,\n      PacketType.PYTHON_TOOL_DELTA,\n      PacketType.CUSTOM_TOOL_START,\n      PacketType.CUSTOM_TOOL_DELTA,\n      PacketType.REASONING_START,\n      PacketType.REASONING_DELTA,\n      PacketType.FETCH_TOOL_START,\n      PacketType.FETCH_TOOL_URLS,\n      PacketType.FETCH_TOOL_DOCUMENTS,\n      PacketType.DEEP_RESEARCH_PLAN_START,\n      PacketType.DEEP_RESEARCH_PLAN_DELTA,\n      PacketType.RESEARCH_AGENT_START,\n      PacketType.INTERMEDIATE_REPORT_START,\n      PacketType.INTERMEDIATE_REPORT_DELTA,\n      PacketType.INTERMEDIATE_REPORT_CITED_DOCS,\n    ];\n\n    test.each(toolPacketTypes)(\n      \"returns true for tool packet type: %s\",\n      (packetType) => {\n        const packet = createPacket(packetType);\n        expect(isToolPacket(packet, false)).toBe(true);\n      }\n    );\n\n    test(\"returns true for SECTION_END when includeSectionEnd is true\", () => {\n      const packet = createPacket(PacketType.SECTION_END);\n      expect(isToolPacket(packet, true)).toBe(true);\n    });\n\n    test(\"returns false for SECTION_END when includeSectionEnd is false\", () => {\n      const packet = createPacket(PacketType.SECTION_END);\n      expect(isToolPacket(packet, false)).toBe(false);\n    });\n\n    test(\"returns true for ERROR when includeSectionEnd is true\", () => {\n      const packet = createPacket(PacketType.ERROR);\n      expect(isToolPacket(packet, true)).toBe(true);\n    });\n\n    test(\"returns false for ERROR when includeSectionEnd is false\", () => {\n      const packet = createPacket(PacketType.ERROR);\n      expect(isToolPacket(packet, false)).toBe(false);\n    });\n\n    test(\"returns false for MESSAGE_START\", () => {\n      const packet = createPacket(PacketType.MESSAGE_START);\n      expect(isToolPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for STOP\", () => {\n      const packet = createPacket(PacketType.STOP);\n      expect(isToolPacket(packet)).toBe(false);\n    });\n  });\n\n  describe(\"isActualToolCallPacket\", () => {\n    const actualToolCallTypes = [\n      PacketType.SEARCH_TOOL_START,\n      PacketType.SEARCH_TOOL_QUERIES_DELTA,\n      PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,\n      PacketType.PYTHON_TOOL_START,\n      PacketType.PYTHON_TOOL_DELTA,\n      PacketType.CUSTOM_TOOL_START,\n      PacketType.CUSTOM_TOOL_DELTA,\n      PacketType.FETCH_TOOL_START,\n      PacketType.FETCH_TOOL_URLS,\n      PacketType.FETCH_TOOL_DOCUMENTS,\n      PacketType.DEEP_RESEARCH_PLAN_START,\n      PacketType.DEEP_RESEARCH_PLAN_DELTA,\n      PacketType.RESEARCH_AGENT_START,\n      PacketType.INTERMEDIATE_REPORT_START,\n      PacketType.INTERMEDIATE_REPORT_DELTA,\n      PacketType.INTERMEDIATE_REPORT_CITED_DOCS,\n    ];\n\n    test.each(actualToolCallTypes)(\n      \"returns true for actual tool call type: %s\",\n      (packetType) => {\n        const packet = createPacket(packetType);\n        expect(isActualToolCallPacket(packet)).toBe(true);\n      }\n    );\n\n    test(\"returns false for REASONING_START (this is the key fix)\", () => {\n      const packet = createPacket(PacketType.REASONING_START);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for REASONING_DELTA (this is the key fix)\", () => {\n      const packet = createPacket(PacketType.REASONING_DELTA);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for MESSAGE_START\", () => {\n      const packet = createPacket(PacketType.MESSAGE_START);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for STOP\", () => {\n      const packet = createPacket(PacketType.STOP);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for SECTION_END\", () => {\n      const packet = createPacket(PacketType.SECTION_END);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    test(\"returns false for ERROR\", () => {\n      const packet = createPacket(PacketType.ERROR);\n      expect(isActualToolCallPacket(packet)).toBe(false);\n    });\n\n    // Test that isActualToolCallPacket is consistent with isToolPacket\n    // (i.e., it's a subset of tool packets minus reasoning)\n    test(\"isActualToolCallPacket is isToolPacket minus reasoning packets\", () => {\n      // All actual tool call types should also be tool packets\n      actualToolCallTypes.forEach((packetType) => {\n        const packet = createPacket(packetType);\n        expect(isToolPacket(packet, false)).toBe(true);\n        expect(isActualToolCallPacket(packet)).toBe(true);\n      });\n\n      // Reasoning packets should be tool packets but NOT actual tool calls\n      const reasoningPacket1 = createPacket(PacketType.REASONING_START);\n      const reasoningPacket2 = createPacket(PacketType.REASONING_DELTA);\n\n      expect(isToolPacket(reasoningPacket1, false)).toBe(true);\n      expect(isActualToolCallPacket(reasoningPacket1)).toBe(false);\n\n      expect(isToolPacket(reasoningPacket2, false)).toBe(true);\n      expect(isActualToolCallPacket(reasoningPacket2)).toBe(false);\n    });\n  });\n\n  describe(\"isDisplayPacket\", () => {\n    test(\"returns true for MESSAGE_START\", () => {\n      const packet = createPacket(PacketType.MESSAGE_START);\n      expect(isDisplayPacket(packet)).toBe(true);\n    });\n\n    test(\"returns true for IMAGE_GENERATION_TOOL_START\", () => {\n      const packet = createPacket(PacketType.IMAGE_GENERATION_TOOL_START);\n      expect(isDisplayPacket(packet)).toBe(true);\n    });\n\n    test(\"returns false for other packet types\", () => {\n      const packet = createPacket(PacketType.SEARCH_TOOL_START);\n      expect(isDisplayPacket(packet)).toBe(false);\n    });\n  });\n\n  describe(\"isSearchToolPacket\", () => {\n    test(\"returns true for SEARCH_TOOL_START\", () => {\n      const packet = createPacket(PacketType.SEARCH_TOOL_START);\n      expect(isSearchToolPacket(packet)).toBe(true);\n    });\n\n    test(\"returns true for SEARCH_TOOL_QUERIES_DELTA\", () => {\n      const packet = createPacket(PacketType.SEARCH_TOOL_QUERIES_DELTA);\n      expect(isSearchToolPacket(packet)).toBe(true);\n    });\n\n    test(\"returns true for SEARCH_TOOL_DOCUMENTS_DELTA\", () => {\n      const packet = createPacket(PacketType.SEARCH_TOOL_DOCUMENTS_DELTA);\n      expect(isSearchToolPacket(packet)).toBe(true);\n    });\n\n    test(\"returns false for other packet types\", () => {\n      const packet = createPacket(PacketType.PYTHON_TOOL_START);\n      expect(isSearchToolPacket(packet)).toBe(false);\n    });\n  });\n\n  describe(\"isStreamingComplete\", () => {\n    test(\"returns true when packets contain STOP\", () => {\n      const packets = [\n        createPacket(PacketType.MESSAGE_START),\n        createPacket(PacketType.MESSAGE_DELTA),\n        createPacket(PacketType.STOP),\n      ];\n      expect(isStreamingComplete(packets)).toBe(true);\n    });\n\n    test(\"returns false when packets do not contain STOP\", () => {\n      const packets = [\n        createPacket(PacketType.MESSAGE_START),\n        createPacket(PacketType.MESSAGE_DELTA),\n      ];\n      expect(isStreamingComplete(packets)).toBe(false);\n    });\n\n    test(\"returns false for empty array\", () => {\n      expect(isStreamingComplete([])).toBe(false);\n    });\n  });\n\n  describe(\"isFinalAnswerComing\", () => {\n    test(\"returns true when packets contain MESSAGE_START\", () => {\n      const packets = [\n        createPacket(PacketType.SEARCH_TOOL_START),\n        createPacket(PacketType.MESSAGE_START),\n      ];\n      expect(isFinalAnswerComing(packets)).toBe(true);\n    });\n\n    test(\"returns true when packets contain IMAGE_GENERATION_TOOL_START\", () => {\n      const packets = [createPacket(PacketType.IMAGE_GENERATION_TOOL_START)];\n      expect(isFinalAnswerComing(packets)).toBe(true);\n    });\n\n    test(\"returns false when no display packets present\", () => {\n      const packets = [\n        createPacket(PacketType.SEARCH_TOOL_START),\n        createPacket(PacketType.REASONING_START),\n      ];\n      expect(isFinalAnswerComing(packets)).toBe(false);\n    });\n\n    test(\"returns false for empty array\", () => {\n      expect(isFinalAnswerComing([])).toBe(false);\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/app/services/packetUtils.ts",
    "content": "import {\n  MessageDelta,\n  MessageStart,\n  PacketType,\n  StreamingCitation,\n} from \"./streamingModels\";\nimport { Packet } from \"@/app/app/services/streamingModels\";\n\nexport function isToolPacket(\n  packet: Packet,\n  includeSectionEnd: boolean = true\n) {\n  let toolPacketTypes = [\n    PacketType.SEARCH_TOOL_START,\n    PacketType.SEARCH_TOOL_QUERIES_DELTA,\n    PacketType.SEARCH_TOOL_DOCUMENTS_DELTA,\n    PacketType.PYTHON_TOOL_START,\n    PacketType.PYTHON_TOOL_DELTA,\n    PacketType.TOOL_CALL_ARGUMENT_DELTA,\n    PacketType.CUSTOM_TOOL_START,\n    PacketType.CUSTOM_TOOL_ARGS,\n    PacketType.CUSTOM_TOOL_DELTA,\n    PacketType.FILE_READER_START,\n    PacketType.FILE_READER_RESULT,\n    PacketType.REASONING_START,\n    PacketType.REASONING_DELTA,\n    PacketType.FETCH_TOOL_START,\n    PacketType.FETCH_TOOL_URLS,\n    PacketType.FETCH_TOOL_DOCUMENTS,\n    PacketType.MEMORY_TOOL_START,\n    PacketType.MEMORY_TOOL_DELTA,\n    PacketType.MEMORY_TOOL_NO_ACCESS,\n    PacketType.DEEP_RESEARCH_PLAN_START,\n    PacketType.DEEP_RESEARCH_PLAN_DELTA,\n    PacketType.RESEARCH_AGENT_START,\n    PacketType.INTERMEDIATE_REPORT_START,\n    PacketType.INTERMEDIATE_REPORT_DELTA,\n    PacketType.INTERMEDIATE_REPORT_CITED_DOCS,\n  ];\n  if (includeSectionEnd) {\n    toolPacketTypes.push(PacketType.SECTION_END);\n    toolPacketTypes.push(PacketType.ERROR);\n  }\n  return toolPacketTypes.includes(packet.obj.type as PacketType);\n}\n\n// Check if a packet is an actual tool call (not reasoning/thinking).\n// This is used to determine if we should reset finalAnswerComing state\n// when a tool packet arrives after message packets (Claude workaround).\n// Reasoning packets should NOT reset finalAnswerComing since they are\n// just the model thinking, not actual tool calls that would produce new content.\nexport function isActualToolCallPacket(packet: Packet): boolean {\n  return (\n    isToolPacket(packet, false) &&\n    packet.obj.type !== PacketType.REASONING_START &&\n    packet.obj.type !== PacketType.REASONING_DELTA\n  );\n}\n\nexport function isDisplayPacket(packet: Packet) {\n  return (\n    packet.obj.type === PacketType.MESSAGE_START ||\n    packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START\n  );\n}\n\nexport function isSearchToolPacket(packet: Packet): boolean {\n  return (\n    packet.obj.type === PacketType.SEARCH_TOOL_START ||\n    packet.obj.type === PacketType.SEARCH_TOOL_QUERIES_DELTA ||\n    packet.obj.type === PacketType.SEARCH_TOOL_DOCUMENTS_DELTA\n  );\n}\n\nexport function isStreamingComplete(packets: Packet[]) {\n  return packets.some((packet) => packet.obj.type === PacketType.STOP);\n}\n\nexport function isFinalAnswerComing(packets: Packet[]) {\n  return packets.some(\n    (packet) =>\n      packet.obj.type === PacketType.MESSAGE_START ||\n      packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START\n  );\n}\n\nexport function isFinalAnswerComplete(packets: Packet[]) {\n  // Find the first MESSAGE_START packet and get its index\n  const messageStartPacket = packets.find(\n    (packet) =>\n      packet.obj.type === PacketType.MESSAGE_START ||\n      packet.obj.type === PacketType.IMAGE_GENERATION_TOOL_START\n  );\n\n  if (!messageStartPacket) {\n    return false;\n  }\n\n  // Check if there's a corresponding SECTION_END or ERROR with the same turn_index\n  return packets.some(\n    (packet) =>\n      (packet.obj.type === PacketType.SECTION_END ||\n        packet.obj.type === PacketType.ERROR) &&\n      packet.placement.turn_index === messageStartPacket.placement.turn_index\n  );\n}\n\nexport function groupPacketsByTurnIndex(\n  packets: Packet[]\n): { turn_index: number; tab_index: number; packets: Packet[] }[] {\n  /*\n  Group packets by (turn_index, tab_index). \n  Ordered from lowest turn_index to highest, then by tab_index within each turn.\n  This supports parallel tool calls where multiple tools share the same turn_index\n  but have different tab_index values.\n  */\n  const groups = packets.reduce(\n    (\n      acc: Map<\n        string,\n        { turn_index: number; tab_index: number; packets: Packet[] }\n      >,\n      packet\n    ) => {\n      const turn_index = packet.placement.turn_index;\n      const tab_index = packet.placement.tab_index ?? 0;\n      const key = `${turn_index}-${tab_index}`;\n      if (!acc.has(key)) {\n        acc.set(key, { turn_index, tab_index, packets: [] });\n      }\n      acc.get(key)!.packets.push(packet);\n      return acc;\n    },\n    new Map()\n  );\n\n  // Convert to array and sort by turn_index first, then tab_index\n  return Array.from(groups.values()).sort((a, b) => {\n    if (a.turn_index !== b.turn_index) {\n      return a.turn_index - b.turn_index;\n    }\n    return a.tab_index - b.tab_index;\n  });\n}\n\nexport function getTextContent(packets: Packet[]) {\n  return packets\n    .map((packet) => {\n      if (\n        packet.obj.type === PacketType.MESSAGE_START ||\n        packet.obj.type === PacketType.MESSAGE_DELTA\n      ) {\n        return (packet.obj as MessageStart | MessageDelta).content || \"\";\n      }\n      return \"\";\n    })\n    .join(\"\");\n}\n\nexport function getCitations(packets: Packet[]): StreamingCitation[] {\n  const citations: StreamingCitation[] = [];\n  const seenDocIds = new Set<string>();\n\n  packets.forEach((packet) => {\n    if (packet.obj.type === PacketType.CITATION_INFO) {\n      // Individual citation packet from backend\n      const citationInfo = packet.obj as {\n        citation_number: number;\n        document_id: string;\n      };\n      if (!seenDocIds.has(citationInfo.document_id)) {\n        seenDocIds.add(citationInfo.document_id);\n        citations.push({\n          citation_num: citationInfo.citation_number,\n          document_id: citationInfo.document_id,\n        });\n      }\n    }\n  });\n\n  return citations;\n}\n"
  },
  {
    "path": "web/src/app/app/services/searchParams.ts",
    "content": "import { ReadonlyURLSearchParams } from \"next/navigation\";\n\n// search params\nexport const SEARCH_PARAM_NAMES = {\n  CHAT_ID: \"chatId\",\n  SEARCH_ID: \"searchId\",\n  PERSONA_ID: \"agentId\",\n  PROJECT_ID: \"projectId\",\n  ALL_MY_DOCUMENTS: \"allMyDocuments\",\n  // overrides\n  TEMPERATURE: \"temperature\",\n  MODEL_VERSION: \"model-version\",\n  SYSTEM_PROMPT: \"system-prompt\",\n  STRUCTURED_MODEL: \"structured-model\",\n  // user message\n  USER_PROMPT: \"user-prompt\",\n  SUBMIT_ON_LOAD: \"submit-on-load\",\n  // chat title\n  TITLE: \"title\",\n  FILES: \"files\",\n  // for seeding chats\n  SEEDED: \"seeded\",\n  SEND_ON_LOAD: \"send-on-load\",\n\n  // when sending a message for the first time, we don't want to reload the page\n  // and cause a re-render\n  SKIP_RELOAD: \"skip-reload\",\n};\n\nexport function shouldSubmitOnLoad(\n  searchParams: ReadonlyURLSearchParams | null\n) {\n  const rawSubmitOnLoad = searchParams?.get(SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD);\n  if (rawSubmitOnLoad === \"true\" || rawSubmitOnLoad === \"1\") {\n    return true;\n  }\n  return false;\n}\n"
  },
  {
    "path": "web/src/app/app/services/streamingModels.ts",
    "content": "import { OnyxDocument } from \"@/lib/search/interfaces\";\n\n// Base interface for all streaming objects\ninterface BaseObj {\n  type: string;\n}\n\nexport enum PacketType {\n  MESSAGE_START = \"message_start\",\n  MESSAGE_DELTA = \"message_delta\",\n  MESSAGE_END = \"message_end\",\n\n  STOP = \"stop\",\n  SECTION_END = \"section_end\",\n  TOP_LEVEL_BRANCHING = \"top_level_branching\",\n  ERROR = \"error\",\n\n  // Specific tool packets\n  SEARCH_TOOL_START = \"search_tool_start\",\n  SEARCH_TOOL_QUERIES_DELTA = \"search_tool_queries_delta\",\n  SEARCH_TOOL_DOCUMENTS_DELTA = \"search_tool_documents_delta\",\n  IMAGE_GENERATION_TOOL_START = \"image_generation_start\",\n  IMAGE_GENERATION_TOOL_DELTA = \"image_generation_final\",\n  PYTHON_TOOL_START = \"python_tool_start\",\n  PYTHON_TOOL_DELTA = \"python_tool_delta\",\n  FETCH_TOOL_START = \"open_url_start\",\n  FETCH_TOOL_URLS = \"open_url_urls\",\n  FETCH_TOOL_DOCUMENTS = \"open_url_documents\",\n\n  // Tool call argument delta (streams tool args before tool executes)\n  TOOL_CALL_ARGUMENT_DELTA = \"tool_call_argument_delta\",\n\n  // Custom tool packets\n  CUSTOM_TOOL_START = \"custom_tool_start\",\n  CUSTOM_TOOL_ARGS = \"custom_tool_args\",\n  CUSTOM_TOOL_DELTA = \"custom_tool_delta\",\n\n  // File reader tool packets\n  FILE_READER_START = \"file_reader_start\",\n  FILE_READER_RESULT = \"file_reader_result\",\n  // Memory tool packets\n  MEMORY_TOOL_START = \"memory_tool_start\",\n  MEMORY_TOOL_DELTA = \"memory_tool_delta\",\n  MEMORY_TOOL_NO_ACCESS = \"memory_tool_no_access\",\n\n  // Reasoning packets\n  REASONING_START = \"reasoning_start\",\n  REASONING_DELTA = \"reasoning_delta\",\n  REASONING_DONE = \"reasoning_done\",\n\n  // Citation packets\n  CITATION_START = \"citation_start\",\n  CITATION_END = \"citation_end\",\n  // Backend sends individual citation_info packets during streaming\n  CITATION_INFO = \"citation_info\",\n\n  // Deep Research packets\n  DEEP_RESEARCH_PLAN_START = \"deep_research_plan_start\",\n  DEEP_RESEARCH_PLAN_DELTA = \"deep_research_plan_delta\",\n  RESEARCH_AGENT_START = \"research_agent_start\",\n  INTERMEDIATE_REPORT_START = \"intermediate_report_start\",\n  INTERMEDIATE_REPORT_DELTA = \"intermediate_report_delta\",\n  INTERMEDIATE_REPORT_CITED_DOCS = \"intermediate_report_cited_docs\",\n}\n\nexport const CODE_INTERPRETER_TOOL_TYPES = {\n  PYTHON: \"python\",\n} as const;\n\n// Basic Message Packets\nexport interface MessageStart extends BaseObj {\n  id: string;\n  type: \"message_start\";\n  content: string;\n\n  final_documents: OnyxDocument[] | null;\n  pre_answer_processing_seconds?: number;\n}\n\nexport interface MessageDelta extends BaseObj {\n  content: string;\n  type: \"message_delta\";\n}\n\nexport interface MessageEnd extends BaseObj {\n  type: \"message_end\";\n}\n\n// Control Packets\nexport enum StopReason {\n  FINISHED = \"finished\",\n  USER_CANCELLED = \"user_cancelled\",\n}\n\nexport interface Stop extends BaseObj {\n  type: \"stop\";\n  stop_reason?: StopReason;\n}\n\nexport interface SectionEnd extends BaseObj {\n  type: \"section_end\";\n}\n\nexport interface TopLevelBranching extends BaseObj {\n  type: \"top_level_branching\";\n  num_parallel_branches: number;\n}\n\nexport interface PacketError extends BaseObj {\n  type: \"error\";\n  message?: string;\n}\n\n// Specific tool packets\nexport interface SearchToolStart extends BaseObj {\n  type: \"search_tool_start\";\n  is_internet_search?: boolean;\n}\n\nexport interface SearchToolQueriesDelta extends BaseObj {\n  type: \"search_tool_queries_delta\";\n  queries: string[];\n}\n\nexport interface SearchToolDocumentsDelta extends BaseObj {\n  type: \"search_tool_documents_delta\";\n  documents: OnyxDocument[];\n}\n\nexport type ImageShape = \"square\" | \"landscape\" | \"portrait\";\n\ninterface GeneratedImage {\n  file_id: string;\n  url: string;\n  revised_prompt: string;\n  shape?: ImageShape;\n}\n\nexport interface ImageGenerationToolStart extends BaseObj {\n  type: \"image_generation_start\";\n}\n\nexport interface ImageGenerationToolDelta extends BaseObj {\n  type: \"image_generation_final\";\n  images: GeneratedImage[];\n}\n\nexport interface PythonToolStart extends BaseObj {\n  type: \"python_tool_start\";\n  code: string;\n}\n\nexport interface PythonToolDelta extends BaseObj {\n  type: \"python_tool_delta\";\n  stdout: string;\n  stderr: string;\n  file_ids: string[];\n}\n\nexport interface ToolCallArgumentDelta extends BaseObj {\n  type: \"tool_call_argument_delta\";\n  tool_type: string;\n  tool_id: string;\n  argument_deltas: Record<string, unknown>;\n}\n\nexport interface FetchToolStart extends BaseObj {\n  type: \"open_url_start\";\n}\n\nexport interface FetchToolUrls extends BaseObj {\n  type: \"open_url_urls\";\n  urls: string[];\n}\n\nexport interface FetchToolDocuments extends BaseObj {\n  type: \"open_url_documents\";\n  documents: OnyxDocument[];\n}\n\n// Custom Tool Packets\nexport interface CustomToolErrorInfo {\n  is_auth_error: boolean;\n  status_code: number;\n  message: string;\n}\n\nexport interface CustomToolStart extends BaseObj {\n  type: \"custom_tool_start\";\n  tool_name: string;\n  tool_id?: number | null;\n}\n\nexport interface CustomToolArgs extends BaseObj {\n  type: \"custom_tool_args\";\n  tool_name: string;\n  tool_args: Record<string, any>;\n}\n\nexport interface CustomToolDelta extends BaseObj {\n  type: \"custom_tool_delta\";\n  tool_name: string;\n  tool_id?: number | null;\n  response_type: string;\n  data?: any;\n  file_ids?: string[] | null;\n  error?: CustomToolErrorInfo | null;\n}\n\n// File Reader Packets\nexport interface FileReaderStart extends BaseObj {\n  type: \"file_reader_start\";\n}\n\nexport interface FileReaderResult extends BaseObj {\n  type: \"file_reader_result\";\n  file_name: string;\n  file_id: string;\n  start_char: number;\n  end_char: number;\n  total_chars: number;\n  preview_start: string;\n  preview_end: string;\n}\n// Memory Tool Packets\nexport interface MemoryToolStart extends BaseObj {\n  type: \"memory_tool_start\";\n}\n\nexport interface MemoryToolDelta extends BaseObj {\n  type: \"memory_tool_delta\";\n  memory_text: string;\n  operation: \"add\" | \"update\";\n  memory_id: number | null;\n  index: number | null;\n}\n\nexport interface MemoryToolNoAccess extends BaseObj {\n  type: \"memory_tool_no_access\";\n}\n\n// Reasoning Packets\nexport interface ReasoningStart extends BaseObj {\n  type: \"reasoning_start\";\n}\n\nexport interface ReasoningDelta extends BaseObj {\n  type: \"reasoning_delta\";\n  reasoning: string;\n}\n\nexport interface ReasoningDone extends BaseObj {\n  type: \"reasoning_done\";\n}\n\n// Citation Packets\nexport interface StreamingCitation {\n  citation_num: number;\n  document_id: string;\n}\n\nexport interface CitationStart extends BaseObj {\n  type: \"citation_start\";\n}\n\n// Individual citation info packet (sent during streaming from backend)\nexport interface CitationInfo extends BaseObj {\n  type: \"citation_info\";\n  citation_number: number;\n  document_id: string;\n}\n\n// Deep Research Plan Packets\nexport interface DeepResearchPlanStart extends BaseObj {\n  type: \"deep_research_plan_start\";\n}\n\nexport interface DeepResearchPlanDelta extends BaseObj {\n  type: \"deep_research_plan_delta\";\n  content: string;\n}\n\nexport interface ResearchAgentStart extends BaseObj {\n  type: \"research_agent_start\";\n  research_task: string;\n}\n\nexport interface IntermediateReportStart extends BaseObj {\n  type: \"intermediate_report_start\";\n}\n\nexport interface IntermediateReportDelta extends BaseObj {\n  type: \"intermediate_report_delta\";\n  content: string;\n}\n\nexport interface IntermediateReportCitedDocs extends BaseObj {\n  type: \"intermediate_report_cited_docs\";\n  cited_docs: OnyxDocument[] | null;\n}\n\nexport type ChatObj = MessageStart | MessageDelta | MessageEnd;\n\nexport type StopObj = Stop;\n\nexport type SectionEndObj = SectionEnd;\n\nexport type TopLevelBranchingObj = TopLevelBranching;\n\nexport type PacketErrorObj = PacketError;\n\n// Specific tool objects\nexport type SearchToolObj =\n  | SearchToolStart\n  | SearchToolQueriesDelta\n  | SearchToolDocumentsDelta\n  | SectionEnd\n  | PacketError;\nexport type ImageGenerationToolObj =\n  | ImageGenerationToolStart\n  | ImageGenerationToolDelta\n  | SectionEnd\n  | PacketError;\nexport type PythonToolObj =\n  | PythonToolStart\n  | PythonToolDelta\n  | ToolCallArgumentDelta\n  | SectionEnd\n  | PacketError;\nexport type FetchToolObj =\n  | FetchToolStart\n  | FetchToolUrls\n  | FetchToolDocuments\n  | SectionEnd\n  | PacketError;\nexport type CustomToolObj =\n  | CustomToolStart\n  | CustomToolArgs\n  | CustomToolDelta\n  | SectionEnd\n  | PacketError;\nexport type FileReaderToolObj =\n  | FileReaderStart\n  | FileReaderResult\n  | SectionEnd\n  | PacketError;\nexport type MemoryToolObj =\n  | MemoryToolStart\n  | MemoryToolDelta\n  | MemoryToolNoAccess\n  | SectionEnd\n  | PacketError;\nexport type NewToolObj =\n  | SearchToolObj\n  | ImageGenerationToolObj\n  | PythonToolObj\n  | FetchToolObj\n  | CustomToolObj\n  | FileReaderToolObj\n  | MemoryToolObj;\n\nexport type ReasoningObj =\n  | ReasoningStart\n  | ReasoningDelta\n  | ReasoningDone\n  | SectionEnd\n  | PacketError;\n\nexport type CitationObj =\n  | CitationStart\n  | CitationInfo\n  | SectionEnd\n  | PacketError;\n\nexport type DeepResearchPlanObj =\n  | DeepResearchPlanStart\n  | DeepResearchPlanDelta\n  | SectionEnd;\n\nexport type ResearchAgentObj =\n  | ResearchAgentStart\n  | IntermediateReportStart\n  | IntermediateReportDelta\n  | IntermediateReportCitedDocs\n  | SectionEnd;\n\n// Union type for all possible streaming objects\nexport type ObjTypes =\n  | ChatObj\n  | NewToolObj\n  | ReasoningObj\n  | StopObj\n  | SectionEndObj\n  | TopLevelBranchingObj\n  | CitationObj\n  | DeepResearchPlanObj\n  | ResearchAgentObj\n  | PacketErrorObj\n  | CitationObj;\n\n// Placement interface for packet positioning\nexport interface Placement {\n  turn_index: number;\n  tab_index?: number; // For parallel tool calls - tools with same turn_index but different tab_index run in parallel\n  sub_turn_index?: number | null;\n  model_index?: number | null; // For multi-model answer generation - identifies which model produced this packet\n}\n\n// Packet wrapper for streaming objects\nexport interface Packet {\n  placement: Placement;\n  obj: ObjTypes;\n}\n\nexport interface ChatPacket {\n  placement: Placement;\n  obj: ChatObj;\n}\n\nexport interface StopPacket {\n  placement: Placement;\n  obj: StopObj;\n}\n\nexport interface CitationPacket {\n  placement: Placement;\n  obj: CitationObj;\n}\n\n// New specific tool packet types\nexport interface SearchToolPacket {\n  placement: Placement;\n  obj: SearchToolObj;\n}\n\nexport interface ImageGenerationToolPacket {\n  placement: Placement;\n  obj: ImageGenerationToolObj;\n}\n\nexport interface PythonToolPacket {\n  placement: Placement;\n  obj: PythonToolObj;\n}\n\nexport interface FetchToolPacket {\n  placement: Placement;\n  obj: FetchToolObj;\n}\n\nexport interface CustomToolPacket {\n  placement: Placement;\n  obj: CustomToolObj;\n}\n\nexport interface FileReaderToolPacket {\n  placement: Placement;\n  obj: FileReaderToolObj;\n}\nexport interface MemoryToolPacket {\n  placement: Placement;\n  obj: MemoryToolObj;\n}\n\nexport interface ReasoningPacket {\n  placement: Placement;\n  obj: ReasoningObj;\n}\n\nexport interface SectionEndPacket {\n  placement: Placement;\n  obj: SectionEndObj;\n}\n\nexport interface TopLevelBranchingPacket {\n  placement: Placement;\n  obj: TopLevelBranchingObj;\n}\n\nexport interface DeepResearchPlanPacket {\n  placement: Placement;\n  obj: DeepResearchPlanObj;\n}\n\nexport interface ResearchAgentPacket {\n  placement: Placement;\n  obj: ResearchAgentObj;\n}\n"
  },
  {
    "path": "web/src/app/app/services/thinkingTokens.ts",
    "content": "import { JSX } from \"react\";\n\n/**\n * Utility functions to handle thinking tokens in AI messages\n */\n\n/**\n * Check if a message contains complete thinking tokens\n */\nexport function hasCompletedThinkingTokens(\n  content: string | JSX.Element\n): boolean {\n  if (typeof content !== \"string\") return false;\n\n  return (\n    /<think>[\\s\\S]*?<\\/think>/.test(content) ||\n    /<thinking>[\\s\\S]*?<\\/thinking>/.test(content)\n  );\n}\n\n/**\n * Check if a message contains partial thinking tokens (streaming)\n */\nexport function hasPartialThinkingTokens(\n  content: string | JSX.Element\n): boolean {\n  if (typeof content !== \"string\") return false;\n\n  // Count opening and closing tags\n  const thinkOpenCount = (content.match(/<think>/g) || []).length;\n  const thinkCloseCount = (content.match(/<\\/think>/g) || []).length;\n  const thinkingOpenCount = (content.match(/<thinking>/g) || []).length;\n  const thinkingCloseCount = (content.match(/<\\/thinking>/g) || []).length;\n\n  // Return true if we have any unmatched tags\n  return (\n    thinkOpenCount > thinkCloseCount || thinkingOpenCount > thinkingCloseCount\n  );\n}\n\n/**\n * Extract thinking content from a message\n */\nexport function extractThinkingContent(content: string | JSX.Element): string {\n  if (typeof content !== \"string\") return \"\";\n\n  // For complete thinking tags, extract all sections\n  const completeThinkRegex = /<think>[\\s\\S]*?<\\/think>/g;\n  const completeThinkingRegex = /<thinking>[\\s\\S]*?<\\/thinking>/g;\n\n  const thinkMatches = Array.from(content.matchAll(completeThinkRegex));\n  const thinkingMatches = Array.from(content.matchAll(completeThinkingRegex));\n\n  if (thinkMatches.length > 0 || thinkingMatches.length > 0) {\n    // Combine all matches and sort by their position in the original string\n    const allMatches = [...thinkMatches, ...thinkingMatches].sort(\n      (a, b) => (a.index || 0) - (b.index || 0)\n    );\n    return allMatches.map((match) => match[0]).join(\"\\n\");\n  }\n\n  // For partial thinking tokens (streaming)\n  if (hasPartialThinkingTokens(content)) {\n    // Find the last opening tag position\n    const lastThinkPos = content.lastIndexOf(\"<think>\");\n    const lastThinkingPos = content.lastIndexOf(\"<thinking>\");\n\n    // Use the position of whichever tag appears last\n    const startPos = Math.max(lastThinkPos, lastThinkingPos);\n\n    if (startPos >= 0) {\n      // Extract everything from the last opening tag to the end\n      return content.substring(startPos);\n    }\n  }\n\n  return \"\";\n}\n\n/**\n * Check if thinking tokens are complete\n */\nexport function isThinkingComplete(content: string | JSX.Element): boolean {\n  if (typeof content !== \"string\") return false;\n\n  // Count opening and closing tags\n  const thinkOpenCount = (content.match(/<think>/g) || []).length;\n  const thinkCloseCount = (content.match(/<\\/think>/g) || []).length;\n  const thinkingOpenCount = (content.match(/<thinking>/g) || []).length;\n  const thinkingCloseCount = (content.match(/<\\/thinking>/g) || []).length;\n\n  // All tags must be matched\n  return (\n    thinkOpenCount === thinkCloseCount &&\n    thinkingOpenCount === thinkingCloseCount\n  );\n}\n\n/**\n * Remove thinking tokens from content\n */\nexport function removeThinkingTokens(\n  content: string | JSX.Element\n): string | JSX.Element {\n  if (typeof content !== \"string\") return content;\n\n  // First, remove complete thinking blocks\n  let result = content.replace(/<think>[\\s\\S]*?<\\/think>/g, \"\");\n  result = result.replace(/<thinking>[\\s\\S]*?<\\/thinking>/g, \"\");\n\n  // Handle case where there's an incomplete thinking token at the end\n  if (hasPartialThinkingTokens(result)) {\n    // Find the last opening tag position\n    const lastThinkPos = result.lastIndexOf(\"<think>\");\n    const lastThinkingPos = result.lastIndexOf(\"<thinking>\");\n\n    // Use the position of whichever tag appears last\n    const startPos = Math.max(lastThinkPos, lastThinkingPos);\n\n    if (startPos >= 0) {\n      // Only keep content before the last opening tag\n      result = result.substring(0, startPos);\n    }\n  }\n\n  return result.trim();\n}\n\n// /**\n//  * Clean the extracted thinking content (remove tags)\n//  */\nexport function cleanThinkingContent(thinkingContent: string): string {\n  if (!thinkingContent) return \"\";\n\n  return thinkingContent\n    .replace(/<think>|<\\/think>|<thinking>|<\\/thinking>/g, \"\")\n    .trim();\n}\n"
  },
  {
    "path": "web/src/app/app/settings/accounts-access/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useAuthType } from \"@/lib/hooks\";\nimport { AuthType } from \"@/lib/constants\";\nimport { AccountsAccessSettings } from \"@/refresh-pages/SettingsPage\";\n\nexport default function AccountsAccessPage() {\n  const router = useRouter();\n  const { user } = useUser();\n  const authType = useAuthType();\n\n  const showPasswordSection = Boolean(user?.password_configured);\n  const showTokensSection = authType !== null;\n  const hasAccess = showPasswordSection || showTokensSection;\n\n  // Only redirect after authType has loaded to avoid redirecting during loading state\n  const isAuthTypeLoaded = authType !== null;\n\n  useEffect(() => {\n    if (isAuthTypeLoaded && !hasAccess) {\n      router.replace(\"/app/settings/general\");\n    }\n  }, [isAuthTypeLoaded, hasAccess, router]);\n\n  // Don't render content until authType is loaded and access is determined\n  if (!isAuthTypeLoaded || !hasAccess) {\n    return null;\n  }\n\n  return <AccountsAccessSettings />;\n}\n"
  },
  {
    "path": "web/src/app/app/settings/chat-preferences/page.tsx",
    "content": "import { ChatPreferencesSettings } from \"@/refresh-pages/SettingsPage\";\n\nexport default function ChatPreferencesPage() {\n  return <ChatPreferencesSettings />;\n}\n"
  },
  {
    "path": "web/src/app/app/settings/connectors/page.tsx",
    "content": "import { ConnectorsSettings } from \"@/refresh-pages/SettingsPage\";\n\nexport default function ConnectorsPage() {\n  return <ConnectorsSettings />;\n}\n"
  },
  {
    "path": "web/src/app/app/settings/general/page.tsx",
    "content": "import { GeneralSettings } from \"@/refresh-pages/SettingsPage\";\n\nexport default function GeneralSettingsPage() {\n  return <GeneralSettings />;\n}\n"
  },
  {
    "path": "web/src/app/app/settings/layout.tsx",
    "content": "\"use client\";\n\nimport { usePathname } from \"next/navigation\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { SidebarTab } from \"@opal/components\";\nimport { SvgSliders } from \"@opal/icons\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useAuthType } from \"@/lib/hooks\";\nimport { Section } from \"@/layouts/general-layouts\";\n\ninterface LayoutProps {\n  children: React.ReactNode;\n}\n\nexport default function Layout({ children }: LayoutProps) {\n  const pathname = usePathname();\n  const { user } = useUser();\n  const authType = useAuthType();\n\n  const showPasswordSection = Boolean(user?.password_configured);\n  const showTokensSection = authType !== null;\n  const showAccountsAccessTab = showPasswordSection || showTokensSection;\n\n  return (\n    <AppLayouts.Root>\n      <SettingsLayouts.Root width=\"lg\">\n        <SettingsLayouts.Header icon={SvgSliders} title=\"Settings\" separator />\n\n        <SettingsLayouts.Body>\n          <Section\n            flexDirection=\"row\"\n            justifyContent=\"start\"\n            alignItems=\"start\"\n            gap={1.5}\n          >\n            {/* Left: Tab Navigation */}\n            <div\n              data-testid=\"settings-left-tab-navigation\"\n              className=\"flex flex-col px-2 min-w-[12.5rem]\"\n            >\n              <SidebarTab\n                href=\"/app/settings/general\"\n                selected={pathname === \"/app/settings/general\"}\n              >\n                General\n              </SidebarTab>\n              <SidebarTab\n                href=\"/app/settings/chat-preferences\"\n                selected={pathname === \"/app/settings/chat-preferences\"}\n              >\n                Chat Preferences\n              </SidebarTab>\n              {showAccountsAccessTab && (\n                <SidebarTab\n                  href=\"/app/settings/accounts-access\"\n                  selected={pathname === \"/app/settings/accounts-access\"}\n                >\n                  Accounts & Access\n                </SidebarTab>\n              )}\n              <SidebarTab\n                href=\"/app/settings/connectors\"\n                selected={pathname === \"/app/settings/connectors\"}\n              >\n                Connectors\n              </SidebarTab>\n            </div>\n\n            {/* Right: Tab Content */}\n            {children}\n          </Section>\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    </AppLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/settings/page.tsx",
    "content": "import { redirect } from \"next/navigation\";\n\nexport default function SettingsPage() {\n  redirect(\"/app/settings/general\");\n}\n"
  },
  {
    "path": "web/src/app/app/shared/[chatId]/SharedChatDisplay.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { humanReadableFormat } from \"@/lib/time\";\nimport { BackendChatSession } from \"@/app/app/interfaces\";\nimport { processRawChatHistory } from \"@/app/app/services/lib\";\nimport { getLatestMessageChain } from \"@/app/app/services/messageTree\";\nimport HumanMessage from \"@/app/app/message/HumanMessage\";\nimport AgentMessage from \"@/app/app/message/messageComponents/AgentMessage\";\nimport OnyxInitializingLoader from \"@/components/OnyxInitializingLoader\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNotFound from \"@opal/illustrations/not-found\";\nimport { Button } from \"@opal/components\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport PreviewModal from \"@/sections/modals/PreviewModal\";\nimport { UNNAMED_CHAT } from \"@/lib/constants\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport useOnMount from \"@/hooks/useOnMount\";\nimport SharedAppInputBar from \"@/sections/input/SharedAppInputBar\";\n\nexport interface SharedChatDisplayProps {\n  chatSession: BackendChatSession | null;\n  persona: Persona;\n}\n\nexport default function SharedChatDisplay({\n  chatSession,\n  persona,\n}: SharedChatDisplayProps) {\n  const [presentingDocument, setPresentingDocument] =\n    useState<MinimalOnyxDocument | null>(null);\n\n  const isMounted = useOnMount();\n\n  if (!chatSession) {\n    return (\n      <div className=\"h-full w-full flex flex-col items-center justify-center\">\n        <Section flexDirection=\"column\" alignItems=\"center\" gap={1}>\n          <IllustrationContent\n            illustration={SvgNotFound}\n            title=\"Shared chat not found\"\n            description=\"Did not find a shared chat with the specified ID.\"\n          />\n          <Button href=\"/app\" prominence=\"secondary\">\n            Start a new chat\n          </Button>\n        </Section>\n      </div>\n    );\n  }\n\n  const messages = getLatestMessageChain(\n    processRawChatHistory(chatSession.messages, chatSession.packets)\n  );\n\n  const firstMessage = messages[0];\n\n  if (firstMessage === undefined) {\n    return (\n      <div className=\"h-full w-full flex flex-col items-center justify-center\">\n        <Section flexDirection=\"column\" alignItems=\"center\" gap={1}>\n          <IllustrationContent\n            illustration={SvgNotFound}\n            title=\"Shared chat not found\"\n            description=\"No messages found in shared chat.\"\n          />\n          <Button href=\"/app\" prominence=\"secondary\">\n            Start a new chat\n          </Button>\n        </Section>\n      </div>\n    );\n  }\n\n  return (\n    <>\n      {presentingDocument && (\n        <PreviewModal\n          presentingDocument={presentingDocument}\n          onClose={() => setPresentingDocument(null)}\n        />\n      )}\n\n      <div className=\"flex flex-col h-full w-full overflow-hidden\">\n        <div className=\"flex-1 flex flex-col items-center overflow-y-auto\">\n          <div className=\"sticky top-0 z-10 flex items-center justify-between w-full bg-background-tint-01 px-8 py-4\">\n            <Text as=\"p\" text04 headingH2>\n              {chatSession.description || UNNAMED_CHAT}\n            </Text>\n            <div className=\"flex flex-col items-end\">\n              <Text as=\"p\" text03 secondaryBody>\n                Shared on {humanReadableFormat(chatSession.time_created)}\n              </Text>\n              {chatSession.owner_name && (\n                <Text as=\"p\" text03 secondaryBody>\n                  by {chatSession.owner_name}\n                </Text>\n              )}\n            </div>\n          </div>\n\n          {isMounted ? (\n            <div className=\"w-[min(50rem,100%)]\">\n              {messages.map((message, i) => {\n                if (message.type === \"user\") {\n                  return (\n                    <HumanMessage\n                      key={message.messageId}\n                      content={message.message}\n                      files={message.files}\n                      nodeId={message.nodeId}\n                    />\n                  );\n                } else if (message.type === \"assistant\") {\n                  return (\n                    <AgentMessage\n                      key={message.messageId}\n                      rawPackets={message.packets}\n                      chatState={{\n                        agent: persona,\n                        docs: message.documents,\n                        citations: message.citations,\n                        setPresentingDocument: setPresentingDocument,\n                        overriddenModel: message.overridden_model,\n                      }}\n                      nodeId={message.nodeId}\n                      llmManager={null}\n                      otherMessagesCanSwitchTo={undefined}\n                      onMessageSelection={undefined}\n                    />\n                  );\n                } else {\n                  // Error message case\n                  return (\n                    <div key={message.messageId} className=\"py-5 ml-4 lg:px-5\">\n                      <div className=\"mx-auto w-[90%] max-w-message-max\">\n                        <p className=\"text-status-text-error-05 text-sm my-auto\">\n                          {message.message}\n                        </p>\n                      </div>\n                    </div>\n                  );\n                }\n              })}\n            </div>\n          ) : (\n            <div className=\"h-full w-full flex items-center justify-center\">\n              <OnyxInitializingLoader />\n            </div>\n          )}\n        </div>\n\n        <div className=\"w-full max-w-[50rem] mx-auto px-4 pb-4\">\n          <SharedAppInputBar />\n        </div>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/shared/[chatId]/page.tsx",
    "content": "import { fetchSS } from \"@/lib/utilsSS\";\nimport { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { requireAuth } from \"@/lib/auth/requireAuth\";\nimport SharedChatDisplay from \"@/app/app/shared/[chatId]/SharedChatDisplay\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\n// This is used for rendering a persona in the shared chat display\nexport function constructMiniFiedPersona(name: string, id: number): Persona {\n  return {\n    id,\n    name,\n    is_listed: true,\n    is_public: true,\n    display_priority: 0,\n    description: \"\",\n    document_sets: [],\n    tools: [],\n    owner: null,\n    starter_messages: null,\n    builtin_persona: false,\n    is_featured: false,\n    users: [],\n    groups: [],\n    user_file_ids: [],\n    system_prompt: null,\n    task_prompt: null,\n    datetime_aware: true,\n    replace_base_system_prompt: false,\n  };\n}\n\nasync function getSharedChat(chatId: string) {\n  const response = await fetchSS(\n    `/chat/get-chat-session/${chatId}?is_shared=True`\n  );\n  if (response.ok) {\n    return await response.json();\n  }\n  return null;\n}\n\nexport interface PageProps {\n  params: Promise<{ chatId: string }>;\n}\n\nexport default async function Page(props: PageProps) {\n  const params = await props.params;\n\n  const authResult = await requireAuth();\n  if (authResult.redirect) {\n    return redirect(authResult.redirect as Route);\n  }\n\n  // Catch cases where backend is completely unreachable\n  // Allows render instead of throwing an exception and crashing\n  const chatSession = await getSharedChat(params.chatId).catch(() => null);\n\n  const persona: Persona = constructMiniFiedPersona(\n    chatSession?.persona_name ?? \"\",\n    chatSession?.persona_id ?? 0\n  );\n\n  return (\n    <AppLayouts.Root>\n      <SharedChatDisplay chatSession={chatSession} persona={persona} />\n    </AppLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/app/stores/useChatSessionStore.ts",
    "content": "import { create } from \"zustand\";\nimport {\n  ChatState,\n  RegenerationState,\n  Message,\n  ChatSessionSharedStatus,\n  BackendChatSession,\n  FeedbackType,\n} from \"../interfaces\";\nimport {\n  getLatestMessageChain,\n  getMessageByMessageId,\n  MessageTreeState,\n} from \"../services/messageTree\";\nimport { useMemo } from \"react\";\n\ninterface ChatSessionData {\n  sessionId: string;\n  messageTree: MessageTreeState;\n  chatState: ChatState;\n  regenerationState: RegenerationState | null;\n  canContinue: boolean;\n  submittedMessage: string;\n  maxTokens: number;\n  chatSessionSharedStatus: ChatSessionSharedStatus;\n  selectedNodeIdForDocDisplay: number | null; // should be the node ID, not the message ID\n  abortController: AbortController;\n  hasPerformedInitialScroll: boolean;\n  documentSidebarVisible: boolean;\n  hasSentLocalUserMessage: boolean;\n\n  // Session-specific state (previously global)\n  isFetchingChatMessages: boolean;\n  uncaughtError: string | null;\n  loadingError: string | null;\n  isReady: boolean;\n\n  // Session metadata\n  lastAccessed: Date;\n  isLoaded: boolean;\n  description?: string;\n  personaId?: number;\n\n  // Streaming duration tracking\n  streamingStartTime?: number;\n}\n\ninterface ChatSessionStore {\n  // Session management\n  currentSessionId: string | null;\n  sessions: Map<string, ChatSessionData>;\n\n  // Actions - Session Management\n  setCurrentSession: (sessionId: string | null) => void;\n  createSession: (\n    sessionId: string,\n    initialData?: Partial<ChatSessionData>\n  ) => void;\n  updateSessionData: (\n    sessionId: string,\n    updates: Partial<ChatSessionData>\n  ) => void;\n  updateSessionMessageTree: (\n    sessionId: string,\n    messageTree: MessageTreeState\n  ) => void;\n  updateSessionAndMessageTree: (\n    sessionId: string,\n    messageTree: MessageTreeState\n  ) => void;\n\n  // Actions - Message Management\n  updateChatState: (sessionId: string, chatState: ChatState) => void;\n  updateRegenerationState: (\n    sessionId: string,\n    state: RegenerationState | null\n  ) => void;\n  updateCanContinue: (sessionId: string, canContinue: boolean) => void;\n  updateSubmittedMessage: (sessionId: string, message: string) => void;\n  updateMessageFeedback: (\n    sessionId: string,\n    messageId: number,\n    feedback: string | null\n  ) => void;\n  updateCurrentMessageFeedback: (\n    messageId: number,\n    feedback: string | null\n  ) => void;\n  updateSelectedNodeForDocDisplay: (\n    sessionId: string,\n    selectedMessageForDocDisplay: number | null\n  ) => void;\n  updateHasPerformedInitialScroll: (\n    sessionId: string,\n    hasPerformedInitialScroll: boolean\n  ) => void;\n  updateDocumentSidebarVisible: (\n    sessionId: string,\n    documentSidebarVisible: boolean\n  ) => void;\n  updateCurrentDocumentSidebarVisible: (\n    documentSidebarVisible: boolean\n  ) => void;\n  updateHasSentLocalUserMessage: (\n    sessionId: string,\n    hasSentLocalUserMessage: boolean\n  ) => void;\n  updateCurrentHasSentLocalUserMessage: (\n    hasSentLocalUserMessage: boolean\n  ) => void;\n\n  // Convenience functions that automatically use current session ID\n  updateCurrentSelectedNodeForDocDisplay: (\n    selectedNodeForDocDisplay: number | null\n  ) => void;\n  updateCurrentChatSessionSharedStatus: (\n    chatSessionSharedStatus: ChatSessionSharedStatus\n  ) => void;\n  updateCurrentChatState: (chatState: ChatState) => void;\n  updateCurrentRegenerationState: (\n    regenerationState: RegenerationState | null\n  ) => void;\n  updateCurrentCanContinue: (canContinue: boolean) => void;\n  updateCurrentSubmittedMessage: (submittedMessage: string) => void;\n\n  // Actions - Session-specific State (previously global)\n  setIsFetchingChatMessages: (sessionId: string, fetching: boolean) => void;\n  setUncaughtError: (sessionId: string, error: string | null) => void;\n  setLoadingError: (sessionId: string, error: string | null) => void;\n  setIsReady: (sessionId: string, ready: boolean) => void;\n\n  // Actions - Streaming Duration\n  setStreamingStartTime: (sessionId: string, time: number | null) => void;\n  getStreamingStartTime: (sessionId: string) => number | undefined;\n\n  // Actions - Abort Controllers\n  setAbortController: (sessionId: string, controller: AbortController) => void;\n  abortSession: (sessionId: string) => void;\n  abortAllSessions: () => void;\n\n  // Utilities\n  initializeSession: (\n    sessionId: string,\n    backendSession?: BackendChatSession\n  ) => void;\n  cleanupOldSessions: (maxSessions?: number) => void;\n}\n\nconst createInitialSessionData = (\n  sessionId: string,\n  initialData?: Partial<ChatSessionData>\n): ChatSessionData => ({\n  sessionId,\n  messageTree: new Map<number, Message>(),\n  chatState: \"input\" as ChatState,\n  regenerationState: null,\n  canContinue: false,\n  submittedMessage: \"\",\n  maxTokens: 128_000,\n  chatSessionSharedStatus: ChatSessionSharedStatus.Private,\n  selectedNodeIdForDocDisplay: null,\n  abortController: new AbortController(),\n  hasPerformedInitialScroll: true,\n  documentSidebarVisible: false,\n  hasSentLocalUserMessage: false,\n\n  // Session-specific state defaults\n  isFetchingChatMessages: false,\n  uncaughtError: null,\n  loadingError: null,\n  isReady: true,\n\n  lastAccessed: new Date(),\n  isLoaded: false,\n  ...initialData,\n});\n\nexport const useChatSessionStore = create<ChatSessionStore>()((set, get) => ({\n  // Initial state\n  currentSessionId: null,\n  sessions: new Map<string, ChatSessionData>(),\n\n  // Session Management Actions\n  setCurrentSession: (sessionId: string | null) => {\n    set((state) => {\n      if (sessionId && !state.sessions.has(sessionId)) {\n        // Create new session if it doesn't exist\n        const newSession = createInitialSessionData(sessionId);\n        const newSessions = new Map(state.sessions);\n        newSessions.set(sessionId, newSession);\n\n        return {\n          currentSessionId: sessionId,\n          sessions: newSessions,\n        };\n      }\n\n      // Update last accessed for the new current session\n      if (sessionId && state.sessions.has(sessionId)) {\n        const session = state.sessions.get(sessionId)!;\n        const updatedSession = { ...session, lastAccessed: new Date() };\n        const newSessions = new Map(state.sessions);\n        newSessions.set(sessionId, updatedSession);\n\n        return {\n          currentSessionId: sessionId,\n          sessions: newSessions,\n        };\n      }\n\n      return { currentSessionId: sessionId };\n    });\n  },\n\n  createSession: (\n    sessionId: string,\n    initialData?: Partial<ChatSessionData>\n  ) => {\n    set((state) => {\n      const newSession = createInitialSessionData(sessionId, initialData);\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, newSession);\n\n      return { sessions: newSessions };\n    });\n  },\n\n  updateSessionData: (sessionId: string, updates: Partial<ChatSessionData>) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      const updatedSession = {\n        ...(session || createInitialSessionData(sessionId)),\n        ...updates,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n\n      return { sessions: newSessions };\n    });\n  },\n\n  updateSessionMessageTree: (\n    sessionId: string,\n    messageTree: MessageTreeState\n  ) => {\n    get().updateSessionData(sessionId, { messageTree });\n  },\n\n  updateSessionAndMessageTree: (\n    sessionId: string,\n    messageTree: MessageTreeState\n  ) => {\n    set((state) => {\n      // Ensure session exists\n      const existingSession = state.sessions.get(sessionId);\n      const session = existingSession || createInitialSessionData(sessionId);\n\n      // Update session with new message tree\n      const updatedSession = {\n        ...session,\n        messageTree,\n        lastAccessed: new Date(),\n      };\n\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n\n      // Return both updates in a single state change\n      return {\n        currentSessionId: sessionId,\n        sessions: newSessions,\n      };\n    });\n  },\n\n  // Message Management Actions\n  updateChatState: (sessionId: string, chatState: ChatState) => {\n    get().updateSessionData(sessionId, { chatState });\n  },\n\n  updateRegenerationState: (\n    sessionId: string,\n    regenerationState: RegenerationState | null\n  ) => {\n    get().updateSessionData(sessionId, { regenerationState });\n  },\n\n  updateCanContinue: (sessionId: string, canContinue: boolean) => {\n    get().updateSessionData(sessionId, { canContinue });\n  },\n\n  updateSubmittedMessage: (sessionId: string, submittedMessage: string) => {\n    get().updateSessionData(sessionId, { submittedMessage });\n  },\n\n  updateMessageFeedback: (\n    sessionId: string,\n    messageId: number,\n    feedback: string | null\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) {\n        console.warn(`Session ${sessionId} not found`);\n        return state;\n      }\n\n      const message = getMessageByMessageId(session.messageTree, messageId);\n      if (!message) {\n        console.warn(`Message ${messageId} not found in session ${sessionId}`);\n        return state;\n      }\n\n      // Create new message object with updated feedback (immutable update)\n      const updatedMessage = {\n        ...message,\n        currentFeedback: feedback as FeedbackType | null,\n      };\n\n      // Create new messageTree Map with updated message\n      const newMessageTree = new Map(session.messageTree);\n      newMessageTree.set(message.nodeId, updatedMessage);\n\n      // Create new session object with new messageTree\n      const updatedSession = {\n        ...session,\n        messageTree: newMessageTree,\n        lastAccessed: new Date(),\n      };\n\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n\n      return { sessions: newSessions };\n    });\n  },\n\n  updateCurrentMessageFeedback: (\n    messageId: number,\n    feedback: string | null\n  ) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateMessageFeedback(currentSessionId, messageId, feedback);\n    }\n  },\n\n  updateSelectedNodeForDocDisplay: (\n    sessionId: string,\n    selectedMessageForDocDisplay: number | null\n  ) => {\n    get().updateSessionData(sessionId, {\n      selectedNodeIdForDocDisplay: selectedMessageForDocDisplay,\n    });\n  },\n\n  updateHasPerformedInitialScroll: (\n    sessionId: string,\n    hasPerformedInitialScroll: boolean\n  ) => {\n    get().updateSessionData(sessionId, { hasPerformedInitialScroll });\n  },\n\n  updateDocumentSidebarVisible: (\n    sessionId: string,\n    documentSidebarVisible: boolean\n  ) => {\n    get().updateSessionData(sessionId, { documentSidebarVisible });\n  },\n\n  updateCurrentDocumentSidebarVisible: (documentSidebarVisible: boolean) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateDocumentSidebarVisible(\n        currentSessionId,\n        documentSidebarVisible\n      );\n    }\n  },\n\n  updateHasSentLocalUserMessage: (\n    sessionId: string,\n    hasSentLocalUserMessage: boolean\n  ) => {\n    get().updateSessionData(sessionId, { hasSentLocalUserMessage });\n  },\n\n  updateCurrentHasSentLocalUserMessage: (hasSentLocalUserMessage: boolean) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateHasSentLocalUserMessage(\n        currentSessionId,\n        hasSentLocalUserMessage\n      );\n    }\n  },\n\n  // Convenience functions that automatically use current session ID\n  updateCurrentSelectedNodeForDocDisplay: (\n    selectedNodeForDocDisplay: number | null\n  ) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateSelectedNodeForDocDisplay(\n        currentSessionId,\n        selectedNodeForDocDisplay\n      );\n    }\n  },\n\n  updateCurrentChatSessionSharedStatus: (\n    chatSessionSharedStatus: ChatSessionSharedStatus\n  ) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateSessionData(currentSessionId, { chatSessionSharedStatus });\n    }\n  },\n\n  updateCurrentChatState: (chatState: ChatState) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateChatState(currentSessionId, chatState);\n    }\n  },\n\n  updateCurrentRegenerationState: (\n    regenerationState: RegenerationState | null\n  ) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateRegenerationState(currentSessionId, regenerationState);\n    }\n  },\n\n  updateCurrentCanContinue: (canContinue: boolean) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateCanContinue(currentSessionId, canContinue);\n    }\n  },\n\n  updateCurrentSubmittedMessage: (submittedMessage: string) => {\n    const { currentSessionId } = get();\n    if (currentSessionId) {\n      get().updateSubmittedMessage(currentSessionId, submittedMessage);\n    }\n  },\n\n  // Session-specific State Actions (previously global)\n  setIsFetchingChatMessages: (\n    sessionId: string,\n    isFetchingChatMessages: boolean\n  ) => {\n    get().updateSessionData(sessionId, { isFetchingChatMessages });\n  },\n\n  setUncaughtError: (sessionId: string, uncaughtError: string | null) => {\n    get().updateSessionData(sessionId, { uncaughtError });\n  },\n\n  setLoadingError: (sessionId: string, loadingError: string | null) => {\n    get().updateSessionData(sessionId, { loadingError });\n  },\n\n  setIsReady: (sessionId: string, isReady: boolean) => {\n    get().updateSessionData(sessionId, { isReady });\n  },\n\n  // Streaming Duration Actions\n  setStreamingStartTime: (sessionId: string, time: number | null) => {\n    get().updateSessionData(sessionId, {\n      streamingStartTime: time ?? undefined,\n    });\n  },\n\n  getStreamingStartTime: (sessionId: string) => {\n    return get().sessions.get(sessionId)?.streamingStartTime;\n  },\n\n  // Abort Controller Actions\n  setAbortController: (sessionId: string, controller: AbortController) => {\n    get().updateSessionData(sessionId, { abortController: controller });\n  },\n\n  abortSession: (sessionId: string) => {\n    const session = get().sessions.get(sessionId);\n    if (session?.abortController) {\n      session.abortController.abort();\n      get().updateSessionData(sessionId, {\n        abortController: new AbortController(),\n      });\n    }\n  },\n\n  abortAllSessions: () => {\n    const { sessions } = get();\n    sessions.forEach((session, sessionId) => {\n      if (session.abortController) {\n        session.abortController.abort();\n        get().updateSessionData(sessionId, {\n          abortController: new AbortController(),\n        });\n      }\n    });\n  },\n\n  // Utilities\n  initializeSession: (\n    sessionId: string,\n    backendSession?: BackendChatSession\n  ) => {\n    const initialData: Partial<ChatSessionData> = {\n      isLoaded: true,\n      description: backendSession?.description,\n      personaId: backendSession?.persona_id,\n    };\n\n    const existingSession = get().sessions.get(sessionId);\n    if (existingSession) {\n      get().updateSessionData(sessionId, initialData);\n    } else {\n      get().createSession(sessionId, initialData);\n    }\n  },\n\n  cleanupOldSessions: (maxSessions: number = 10) => {\n    set((state) => {\n      const sortedSessions = Array.from(state.sessions.entries()).sort(\n        ([, a], [, b]) => b.lastAccessed.getTime() - a.lastAccessed.getTime()\n      );\n\n      if (sortedSessions.length <= maxSessions) {\n        return state;\n      }\n\n      const sessionsToKeep = sortedSessions.slice(0, maxSessions);\n      const sessionsToRemove = sortedSessions.slice(maxSessions);\n\n      // Abort controllers for sessions being removed\n      sessionsToRemove.forEach(([, session]) => {\n        if (session.abortController) {\n          session.abortController.abort();\n        }\n      });\n\n      const newSessions = new Map(sessionsToKeep);\n\n      return {\n        sessions: newSessions,\n      };\n    });\n  },\n}));\n\nexport const useCurrentMessageTree = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.messageTree;\n  });\n\nexport const useCurrentMessageHistory = () => {\n  const messageTree = useCurrentMessageTree();\n  return useMemo(() => {\n    if (!messageTree) {\n      return [];\n    }\n    return getLatestMessageChain(messageTree);\n  }, [messageTree]);\n};\n\nexport const useCurrentChatState = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.chatState || \"input\";\n  });\n\nexport const useUncaughtError = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.uncaughtError || null;\n  });\n\nexport const useLoadingError = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.loadingError || null;\n  });\n\nexport const useIsReady = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.isReady ?? true;\n  });\n\nexport const useDocumentSidebarVisible = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.documentSidebarVisible || false;\n  });\n\nexport const useSelectedNodeForDocDisplay = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.selectedNodeIdForDocDisplay || null;\n  });\n\nexport const useHasSentLocalUserMessage = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.hasSentLocalUserMessage || false;\n  });\n\nexport const useStreamingStartTime = () =>\n  useChatSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    const currentSession = currentSessionId\n      ? sessions.get(currentSessionId)\n      : null;\n    return currentSession?.streamingStartTime;\n  });\n"
  },
  {
    "path": "web/src/app/auth/create-account/page.tsx",
    "content": "\"use client\";\n\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport { REGISTRATION_URL } from \"@/lib/constants\";\nimport { Button } from \"@opal/components\";\nimport Link from \"next/link\";\nimport { SvgImport } from \"@opal/icons\";\n\nexport default function Page() {\n  return (\n    <AuthFlowContainer>\n      <div className=\"flex flex-col space-y-6\">\n        <h2 className=\"text-2xl font-bold text-text-900 text-center\">\n          Account Not Found\n        </h2>\n        <p className=\"text-text-700 max-w-md text-center\">\n          We couldn&apos;t find your account in our records. To access Onyx, you\n          need to either:\n        </p>\n        <ul className=\"list-disc text-left text-text-600 w-full pl-6 mx-auto\">\n          <li>Be invited to an existing Onyx team</li>\n          <li>Create a new Onyx team</li>\n        </ul>\n        <div className=\"flex justify-center\">\n          <Button\n            href={`${REGISTRATION_URL}/register`}\n            width=\"full\"\n            icon={SvgImport}\n          >\n            Create New Organization\n          </Button>\n        </div>\n        <p className=\"text-sm text-text-500 text-center\">\n          Have an account with a different email?{\" \"}\n          <Link\n            href=\"/auth/login\"\n            className=\"text-action-link-05 hover:underline\"\n          >\n            Sign in\n          </Link>\n        </p>\n      </div>\n    </AuthFlowContainer>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/error/AuthErrorContent.tsx",
    "content": "\"use client\";\n\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\n\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\n\n// Maps raw IdP/OAuth error codes to user-friendly messages.\n// If the message is a known code, we replace it; otherwise show it as-is.\nconst ERROR_CODE_MESSAGES: Record<string, string> = {\n  access_denied: \"Access was denied by your identity provider.\",\n  login_required: \"You need to log in with your identity provider first.\",\n  consent_required:\n    \"Your identity provider requires consent before continuing.\",\n  interaction_required:\n    \"Additional interaction with your identity provider is required.\",\n  invalid_scope: \"The requested permissions are not available.\",\n  server_error:\n    \"Your identity provider encountered an error. Please try again.\",\n  temporarily_unavailable:\n    \"Your identity provider is temporarily unavailable. Please try again later.\",\n};\n\nfunction resolveMessage(raw: string | null): string | null {\n  if (!raw) return null;\n  return ERROR_CODE_MESSAGES[raw] ?? raw;\n}\n\ninterface AuthErrorContentProps {\n  message: string | null;\n}\n\nfunction AuthErrorContent({ message: rawMessage }: AuthErrorContentProps) {\n  const message = resolveMessage(rawMessage);\n  return (\n    <AuthFlowContainer>\n      <div className=\"flex flex-col items-center gap-4\">\n        <Text headingH2 text05>\n          Authentication Error\n        </Text>\n        <Text mainContentBody text03>\n          There was a problem with your login attempt.\n        </Text>\n        {/* TODO: Error card component */}\n        <div className=\"w-full rounded-12 border border-status-error-05 bg-status-error-00 p-4\">\n          {message ? (\n            <Text mainContentBody className=\"text-status-error-05\">\n              {message}\n            </Text>\n          ) : (\n            <div className=\"flex flex-col gap-2 px-4\">\n              <Text mainContentEmphasis className=\"text-status-error-05\">\n                Possible Issues:\n              </Text>\n              <Text as=\"li\" mainContentBody className=\"text-status-error-05\">\n                Incorrect or expired login credentials\n              </Text>\n              <Text as=\"li\" mainContentBody className=\"text-status-error-05\">\n                Temporary authentication system disruption\n              </Text>\n              <Text as=\"li\" mainContentBody className=\"text-status-error-05\">\n                Account access restrictions or permissions\n              </Text>\n            </div>\n          )}\n        </div>\n\n        <Button href=\"/auth/login\" width=\"full\">\n          Return to Login Page\n        </Button>\n\n        <Text mainContentBody text04>\n          {NEXT_PUBLIC_CLOUD_ENABLED ? (\n            <>\n              If you continue to experience problems, please reach out to the\n              Onyx team at{\" \"}\n              <a href=\"mailto:support@onyx.app\" className=\"text-action-link-05\">\n                support@onyx.app\n              </a>\n            </>\n          ) : (\n            \"If you continue to experience problems, please reach out to your system administrator for assistance.\"\n          )}\n        </Text>\n      </div>\n    </AuthFlowContainer>\n  );\n}\n\nexport default AuthErrorContent;\n"
  },
  {
    "path": "web/src/app/auth/error/layout.tsx",
    "content": "export default function AuthErrorLayout({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  // In a production environment, you might want to send this to your error tracking service\n  // For example, if using a service like Sentry:\n  // captureException(new Error(\"Authentication error page was accessed unexpectedly\"));\n\n  return <>{children}</>;\n}\n"
  },
  {
    "path": "web/src/app/auth/error/page.tsx",
    "content": "\"use client\";\n\nimport AuthErrorContent from \"./AuthErrorContent\";\nimport { useSearchParams } from \"next/navigation\";\n\nfunction Page() {\n  const searchParams = useSearchParams();\n  const error = searchParams?.get(\"error\") || null;\n\n  return <AuthErrorContent message={error} />;\n}\n\nexport default Page;\n"
  },
  {
    "path": "web/src/app/auth/forgot-password/page.tsx",
    "content": "\"use client\";\nimport React, { useState } from \"react\";\nimport { forgotPassword } from \"./utils\";\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport Title from \"@/components/ui/title\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Link from \"next/link\";\nimport { Button } from \"@opal/components\";\nimport { Form, Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport { TextFormField } from \"@/components/Field\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { redirect } from \"next/navigation\";\nimport { NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED } from \"@/lib/constants\";\n\nconst ForgotPasswordPage: React.FC = () => {\n  const [isWorking, setIsWorking] = useState(false);\n\n  if (!NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED) {\n    redirect(\"/auth/login\");\n  }\n\n  return (\n    <AuthFlowContainer>\n      <div className=\"flex flex-col w-full justify-center\">\n        <div className=\"flex\">\n          <Title className=\"mb-2 mx-auto font-bold\">Forgot Password</Title>\n        </div>\n        {isWorking && <Spinner />}\n        <Formik\n          initialValues={{\n            email: \"\",\n          }}\n          validationSchema={Yup.object().shape({\n            email: Yup.string().email().required(),\n          })}\n          onSubmit={async (values) => {\n            setIsWorking(true);\n            try {\n              await forgotPassword(values.email);\n              toast.success(\n                \"Password reset email sent. Please check your inbox.\"\n              );\n            } catch (error) {\n              const errorMessage =\n                error instanceof Error\n                  ? error.message\n                  : \"An error occurred. Please try again.\";\n              toast.error(errorMessage);\n            } finally {\n              setIsWorking(false);\n            }\n          }}\n        >\n          {({ isSubmitting }) => (\n            <Form className=\"w-full flex flex-col items-stretch mt-2\">\n              <TextFormField\n                name=\"email\"\n                label=\"Email\"\n                type=\"email\"\n                placeholder=\"email@yourcompany.com\"\n              />\n\n              <div className=\"flex\">\n                <Button disabled={isSubmitting} type=\"submit\" width=\"full\">\n                  Reset Password\n                </Button>\n              </div>\n            </Form>\n          )}\n        </Formik>\n        <Spacer rem={1} />\n        <div className=\"flex\">\n          <div className=\"mx-auto\">\n            <Text as=\"p\">{markdown(\"[Back to Login](/auth/login)\")}</Text>\n          </div>\n        </div>\n      </div>\n    </AuthFlowContainer>\n  );\n};\n\nexport default ForgotPasswordPage;\n"
  },
  {
    "path": "web/src/app/auth/forgot-password/utils.ts",
    "content": "export const forgotPassword = async (email: string): Promise<void> => {\n  const response = await fetch(`/api/auth/forgot-password`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ email }),\n  });\n\n  if (!response.ok) {\n    const error = await response.json();\n    const errorMessage =\n      error?.detail || \"An error occurred during password reset.\";\n    throw new Error(errorMessage);\n  }\n};\n\nexport const resetPassword = async (\n  token: string,\n  password: string\n): Promise<void> => {\n  const response = await fetch(`/api/auth/reset-password`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ token, password }),\n  });\n\n  if (!response.ok) {\n    const error = await response.json();\n    if (error?.detail?.code === \"RESET_PASSWORD_INVALID_PASSWORD\") {\n      throw new Error(error.detail.reason || \"Invalid password\");\n    }\n    const errorMessage =\n      error?.detail || \"An error occurred during password reset.\";\n    throw new Error(errorMessage);\n  }\n};\n"
  },
  {
    "path": "web/src/app/auth/impersonate/page.tsx",
    "content": "\"use client\";\n\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\n\nimport { useUser } from \"@/providers/UserProvider\";\nimport { redirect, useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { Formik, Form, FormikHelpers } from \"formik\";\nimport * as Yup from \"yup\";\nimport { toast } from \"@/hooks/useToast\";\nimport { TextFormField } from \"@/components/Field\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst ImpersonateSchema = Yup.object().shape({\n  email: Yup.string().email(\"Invalid email\").required(\"Required\"),\n  apiKey: Yup.string().required(\"Required\"),\n});\n\nexport default function ImpersonatePage() {\n  const router = useRouter();\n  const { user, isCloudSuperuser } = useUser();\n  if (!user) {\n    redirect(\"/auth/login\");\n  }\n\n  if (!isCloudSuperuser) {\n    redirect(\"/app\" as Route);\n  }\n\n  const handleImpersonate = async (\n    values: { email: string; apiKey: string },\n    helpers: FormikHelpers<{ email: string; apiKey: string }>\n  ) => {\n    try {\n      const response = await fetch(\"/api/tenants/impersonate\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n          Authorization: `Bearer ${values.apiKey}`,\n        },\n        body: JSON.stringify({ email: values.email }),\n        credentials: \"same-origin\",\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json();\n        toast.error(errorData.detail || \"Failed to impersonate user\");\n        helpers.setSubmitting(false);\n      } else {\n        helpers.setSubmitting(false);\n        router.push(\"/app\" as Route);\n      }\n    } catch (error) {\n      toast.error(\n        error instanceof Error ? error.message : \"Failed to impersonate user\"\n      );\n      helpers.setSubmitting(false);\n    }\n  };\n\n  return (\n    <AuthFlowContainer>\n      <div className=\"flex flex-col w-full justify-center\">\n        <div className=\"w-full flex flex-col items-center justify-center\">\n          <Text as=\"p\" headingH3 className=\"mb-6 text-center\">\n            Impersonate User\n          </Text>\n        </div>\n\n        <Formik\n          initialValues={{ email: \"\", apiKey: \"\" }}\n          validationSchema={ImpersonateSchema}\n          onSubmit={(values, helpers) => handleImpersonate(values, helpers)}\n        >\n          {({ isSubmitting }) => (\n            <Form className=\"flex flex-col gap-4\">\n              <TextFormField\n                name=\"email\"\n                type=\"email\"\n                label=\"Email\"\n                placeholder=\"email@yourcompany.com\"\n              />\n\n              <TextFormField\n                name=\"apiKey\"\n                type=\"password\"\n                label=\"API Key\"\n                placeholder=\"Enter API Key\"\n              />\n\n              <Button disabled={isSubmitting} type=\"submit\" width=\"full\">\n                Impersonate User\n              </Button>\n            </Form>\n          )}\n        </Formik>\n\n        <Text\n          as=\"p\"\n          mainUiMuted\n          text03\n          className=\"mt-4 text-center px-4\"\n        >{`Note: This feature is only available for @onyx.app administrators`}</Text>\n      </div>\n    </AuthFlowContainer>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/join/page.tsx",
    "content": "import { User } from \"@/lib/types\";\nimport {\n  getCurrentUserSS,\n  getAuthTypeMetadataSS,\n  AuthTypeMetadata,\n  getAuthUrlSS,\n} from \"@/lib/userSS\";\nimport { redirect } from \"next/navigation\";\nimport EmailPasswordForm from \"../login/EmailPasswordForm\";\nimport SignInButton from \"@/app/auth/login/SignInButton\";\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport AuthErrorDisplay from \"@/components/auth/AuthErrorDisplay\";\nimport { AuthType } from \"@/lib/constants\";\n\nconst Page = async (props: {\n  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;\n}) => {\n  const searchParams = await props.searchParams;\n  const nextUrl = Array.isArray(searchParams?.next)\n    ? searchParams?.next[0]\n    : searchParams?.next || null;\n\n  const defaultEmail = Array.isArray(searchParams?.email)\n    ? searchParams?.email[0]\n    : searchParams?.email || null;\n\n  const teamName = Array.isArray(searchParams?.team)\n    ? searchParams?.team[0]\n    : searchParams?.team || \"your team\";\n\n  // catch cases where the backend is completely unreachable here\n  // without try / catch, will just raise an exception and the page\n  // will not render\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n  let currentUser: User | null = null;\n  try {\n    [authTypeMetadata, currentUser] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Some fetch failed for the login page - ${e}`);\n  }\n\n  // if user is already logged in, take them to the main app page\n  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {\n    if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {\n      return redirect(\"/app\");\n    }\n    return redirect(\"/auth/waiting-on-verification\");\n  }\n  const cloud = authTypeMetadata?.authType === AuthType.CLOUD;\n\n  // only enable this page if basic login is enabled\n  if (authTypeMetadata?.authType !== AuthType.BASIC && !cloud) {\n    return redirect(\"/app\");\n  }\n\n  let authUrl: string | null = null;\n  if (cloud && authTypeMetadata) {\n    authUrl = await getAuthUrlSS(authTypeMetadata.authType, null);\n  }\n  const emailDomain = defaultEmail?.split(\"@\")[1];\n\n  return (\n    <AuthFlowContainer authState=\"join\">\n      <AuthErrorDisplay searchParams={searchParams} />\n\n      <>\n        <div className=\"absolute top-10x w-full\"></div>\n        <div className=\"flex w-full flex-col justify-center\">\n          <h2 className=\"text-center text-xl text-strong font-bold\">\n            Re-authenticate to join team\n          </h2>\n\n          {cloud && authUrl && (\n            <div className=\"w-full justify-center\">\n              <SignInButton authorizeUrl={authUrl} authType={AuthType.CLOUD} />\n              <div className=\"flex items-center w-full my-4\">\n                <div className=\"flex-grow border-t border-background-300\"></div>\n                <span className=\"px-4 text-text-500\">or</span>\n                <div className=\"flex-grow border-t border-background-300\"></div>\n              </div>\n            </div>\n          )}\n\n          <EmailPasswordForm\n            isSignup\n            isJoin\n            shouldVerify={authTypeMetadata?.requiresVerification}\n            nextUrl={nextUrl}\n            defaultEmail={defaultEmail}\n          />\n        </div>\n      </>\n    </AuthFlowContainer>\n  );\n};\n\nexport default Page;\n"
  },
  {
    "path": "web/src/app/auth/lib.ts",
    "content": "export async function requestEmailVerification(email: string) {\n  return await fetch(\"/api/auth/request-verify-token\", {\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    method: \"POST\",\n    body: JSON.stringify({\n      email: email,\n    }),\n  });\n}\n"
  },
  {
    "path": "web/src/app/auth/libSS.ts",
    "content": "import \"server-only\";\n\nimport { getDomain } from \"@/lib/redirectSS\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\nexport async function authErrorRedirect(\n  request: NextRequest,\n  response: Response,\n  redirectStatus?: number\n): Promise<NextResponse> {\n  const errorUrl = new URL(\"/auth/error\", getDomain(request));\n  try {\n    const body = await response.json();\n    const detail = body?.detail;\n    if (typeof detail === \"string\" && detail) {\n      errorUrl.searchParams.set(\"error\", detail);\n    }\n  } catch {\n    // response may not be JSON\n  }\n  return NextResponse.redirect(errorUrl, redirectStatus);\n}\n"
  },
  {
    "path": "web/src/app/auth/login/EmailPasswordForm.test.tsx",
    "content": "/**\n * Integration Test: Email/Password Authentication Workflow\n *\n * Tests the complete user journey for logging in.\n * This tests the full workflow: form → validation → API call → redirect\n */\nimport React from \"react\";\nimport { render, screen, waitFor, setupUser } from \"@tests/setup/test-utils\";\nimport EmailPasswordForm from \"./EmailPasswordForm\";\n\n// Mock next/navigation (not used by this component, but required by dependencies)\njest.mock(\"next/navigation\", () => ({\n  useRouter: () => ({\n    push: jest.fn(),\n    refresh: jest.fn(),\n  }),\n}));\n\ndescribe(\"Email/Password Login Workflow\", () => {\n  let fetchSpy: jest.SpyInstance;\n\n  beforeEach(() => {\n    jest.clearAllMocks();\n    fetchSpy = jest.spyOn(global, \"fetch\");\n  });\n\n  afterEach(() => {\n    fetchSpy.mockRestore();\n  });\n\n  test(\"allows user to login with valid credentials\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/auth/login\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    render(<EmailPasswordForm isSignup={false} />);\n\n    // User fills out the form using placeholder text\n    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);\n    const passwordInput = screen.getByPlaceholderText(/∗/);\n\n    await user.type(emailInput, \"test@example.com\");\n    await user.type(passwordInput, \"password123\");\n\n    // User submits the form\n    const loginButton = screen.getByRole(\"button\", { name: /sign in/i });\n    await user.click(loginButton);\n\n    // Verify success message is shown after login\n    await waitFor(() => {\n      expect(screen.getByText(/signed in successfully\\./i)).toBeInTheDocument();\n    });\n\n    // Verify API was called with correct credentials\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/auth/login\",\n      expect.objectContaining({\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/x-www-form-urlencoded\",\n        },\n      })\n    );\n\n    // Verify the request body contains email and password\n    const callArgs = fetchSpy.mock.calls[0];\n    const body = callArgs[1].body;\n    expect(body.toString()).toContain(\"username=test%40example.com\");\n    expect(body.toString()).toContain(\"password=password123\");\n  });\n\n  test(\"shows error message when login fails\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/auth/login (failure)\n    fetchSpy.mockResolvedValueOnce({\n      ok: false,\n      status: 401,\n      json: async () => ({ detail: \"LOGIN_BAD_CREDENTIALS\" }),\n    } as Response);\n\n    render(<EmailPasswordForm isSignup={false} />);\n\n    // User fills out form with invalid credentials\n    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);\n    const passwordInput = screen.getByPlaceholderText(/∗/);\n\n    await user.type(emailInput, \"wrong@example.com\");\n    await user.type(passwordInput, \"wrongpassword\");\n\n    // User submits\n    const loginButton = screen.getByRole(\"button\", { name: /sign in/i });\n    await user.click(loginButton);\n\n    // Verify field-level error message is displayed (not the toast)\n    await waitFor(() => {\n      expect(\n        screen.getByText(/^Invalid email or password$/i)\n      ).toBeInTheDocument();\n    });\n  });\n});\n\ndescribe(\"Email/Password Signup Workflow\", () => {\n  let fetchSpy: jest.SpyInstance;\n\n  beforeEach(() => {\n    jest.clearAllMocks();\n    fetchSpy = jest.spyOn(global, \"fetch\");\n  });\n\n  afterEach(() => {\n    fetchSpy.mockRestore();\n  });\n\n  test(\"allows user to sign up and login with valid credentials\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/auth/register\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock POST /api/auth/login (after successful signup)\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    render(<EmailPasswordForm isSignup={true} />);\n\n    // User fills out the signup form\n    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);\n    const passwordInput = screen.getByPlaceholderText(/∗/);\n\n    await user.type(emailInput, \"newuser@example.com\");\n    await user.type(passwordInput, \"securepassword123\");\n\n    // User submits the signup form\n    const signupButton = screen.getByRole(\"button\", {\n      name: /create account/i,\n    });\n    await user.click(signupButton);\n\n    // Verify signup API was called\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/auth/register\",\n        expect.objectContaining({\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n        })\n      );\n    });\n\n    // Verify signup request body\n    const signupCallArgs = fetchSpy.mock.calls[0];\n    const signupBody = JSON.parse(signupCallArgs[1].body);\n    expect(signupBody).toEqual({\n      email: \"newuser@example.com\",\n      username: \"newuser@example.com\",\n      password: \"securepassword123\",\n      referral_source: undefined,\n    });\n\n    // Verify login API was called after successful signup\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/auth/login\",\n        expect.objectContaining({\n          method: \"POST\",\n        })\n      );\n    });\n\n    // Verify success message is shown\n    await waitFor(() => {\n      expect(\n        screen.getByText(/account created\\. signing in/i)\n      ).toBeInTheDocument();\n    });\n  });\n\n  test(\"shows error when email already exists\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/auth/register (failure - user exists)\n    fetchSpy.mockResolvedValueOnce({\n      ok: false,\n      status: 400,\n      json: async () => ({ detail: \"REGISTER_USER_ALREADY_EXISTS\" }),\n    } as Response);\n\n    render(<EmailPasswordForm isSignup={true} />);\n\n    // User fills out form with existing email\n    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);\n    const passwordInput = screen.getByPlaceholderText(/∗/);\n\n    await user.type(emailInput, \"existing@example.com\");\n    await user.type(passwordInput, \"password123\");\n\n    // User submits\n    const signupButton = screen.getByRole(\"button\", {\n      name: /create account/i,\n    });\n    await user.click(signupButton);\n\n    // Verify field-level error message is displayed (not the toast)\n    await waitFor(() => {\n      expect(\n        screen.getByText(\n          /^An account already exists with the specified email\\.$/i\n        )\n      ).toBeInTheDocument();\n    });\n  });\n\n  test(\"shows rate limit error when too many requests\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/auth/register (failure - rate limit)\n    fetchSpy.mockResolvedValueOnce({\n      ok: false,\n      status: 429,\n      json: async () => ({ detail: \"Too many requests\" }),\n    } as Response);\n\n    render(<EmailPasswordForm isSignup={true} />);\n\n    // User fills out form\n    const emailInput = screen.getByPlaceholderText(/email@yourcompany.com/i);\n    const passwordInput = screen.getByPlaceholderText(/∗/);\n\n    await user.type(emailInput, \"user@example.com\");\n    await user.type(passwordInput, \"password123\");\n\n    // User submits\n    const signupButton = screen.getByRole(\"button\", {\n      name: /create account/i,\n    });\n    await user.click(signupButton);\n\n    // Verify field-level rate limit message is displayed (not the toast)\n    await waitFor(() => {\n      expect(\n        screen.getByText(/^Too many requests\\. Please try again later\\.$/i)\n      ).toBeInTheDocument();\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/app/auth/login/EmailPasswordForm.tsx",
    "content": "\"use client\";\n\nimport { toast } from \"@/hooks/useToast\";\nimport { basicLogin, basicSignup } from \"@/lib/user\";\nimport { Button } from \"@opal/components\";\nimport { Form, Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport { requestEmailVerification } from \"../lib\";\nimport { useMemo, useState } from \"react\";\nimport { Spinner } from \"@/components/Spinner\";\nimport Link from \"next/link\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { FormikField } from \"@/refresh-components/form/FormikField\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { validateInternalRedirect } from \"@/lib/auth/redirectValidation\";\nimport { APIFormFieldState } from \"@/refresh-components/form/types\";\nimport { SvgArrowRightCircle } from \"@opal/icons\";\nimport { useCaptcha } from \"@/lib/hooks/useCaptcha\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\ninterface EmailPasswordFormProps {\n  isSignup?: boolean;\n  shouldVerify?: boolean;\n  referralSource?: string;\n  nextUrl?: string | null;\n  defaultEmail?: string | null;\n  isJoin?: boolean;\n}\n\nexport default function EmailPasswordForm({\n  isSignup = false,\n  shouldVerify,\n  referralSource,\n  nextUrl,\n  defaultEmail,\n  isJoin = false,\n}: EmailPasswordFormProps) {\n  const { user, authTypeMetadata } = useUser();\n  const passwordMinLength = authTypeMetadata?.passwordMinLength ?? 8;\n  const [isWorking, setIsWorking] = useState<boolean>(false);\n  const [apiStatus, setApiStatus] = useState<APIFormFieldState>(\"loading\");\n  const [showApiMessage, setShowApiMessage] = useState(false);\n  const [errorMessage, setErrorMessage] = useState<string>(\"\");\n  const { getCaptchaToken } = useCaptcha();\n\n  const apiMessages = useMemo(\n    () => ({\n      loading: isSignup\n        ? isJoin\n          ? \"Joining...\"\n          : \"Creating account...\"\n        : \"Signing in...\",\n      success: isSignup\n        ? \"Account created. Signing in...\"\n        : \"Signed in successfully.\",\n      error: errorMessage,\n    }),\n    [isSignup, isJoin, errorMessage]\n  );\n\n  return (\n    <>\n      {isWorking && <Spinner />}\n\n      <Formik\n        initialValues={{\n          email: defaultEmail ? defaultEmail.toLowerCase() : \"\",\n          password: \"\",\n        }}\n        validateOnChange={true}\n        validateOnBlur={true}\n        validationSchema={Yup.object().shape({\n          email: Yup.string()\n            .email()\n            .required()\n            .transform((value) => value.toLowerCase()),\n          password: Yup.string()\n            .min(\n              passwordMinLength,\n              `Password must be at least ${passwordMinLength} characters`\n            )\n            .required(),\n        })}\n        onSubmit={async (values: { email: string; password: string }) => {\n          // Ensure email is lowercase\n          const email: string = values.email.toLowerCase();\n          setShowApiMessage(true);\n          setApiStatus(\"loading\");\n          setErrorMessage(\"\");\n\n          if (isSignup) {\n            // login is fast, no need to show a spinner\n            setIsWorking(true);\n\n            // Get captcha token for signup (if captcha is enabled)\n            const captchaToken = await getCaptchaToken(\"signup\");\n\n            const response = await basicSignup(\n              email,\n              values.password,\n              referralSource,\n              captchaToken\n            );\n\n            if (!response.ok) {\n              setIsWorking(false);\n\n              const errorBody: any = await response.json();\n              const errorDetail = errorBody.detail;\n              let errorMsg: string = \"Unknown error\";\n              if (errorDetail === \"REGISTER_USER_ALREADY_EXISTS\") {\n                errorMsg =\n                  \"An account already exists with the specified email.\";\n              } else if (typeof errorDetail === \"string\" && errorDetail) {\n                errorMsg = errorDetail;\n              }\n              if (response.status === 429) {\n                errorMsg = \"Too many requests. Please try again later.\";\n              }\n              setErrorMessage(errorMsg);\n              setApiStatus(\"error\");\n              toast.error(`Failed to sign up - ${errorMsg}`);\n              setIsWorking(false);\n              return;\n            } else {\n              setApiStatus(\"success\");\n              toast.success(\"Account created successfully. Please log in.\");\n            }\n          }\n\n          const loginResponse = await basicLogin(email, values.password);\n          if (loginResponse.ok) {\n            setApiStatus(\"success\");\n            if (isSignup && shouldVerify) {\n              await requestEmailVerification(email);\n              // Use window.location.href to force a full page reload,\n              // ensuring app re-initializes with the new state (including\n              // server-side provider values)\n              window.location.href = \"/auth/waiting-on-verification\";\n            } else {\n              // The searchparam is purely for multi tenant developement purposes.\n              // It replicates the behavior of the case where a user\n              // has signed up with email / password as the only user to an instance\n              // and has just completed verification\n              const validatedNextUrl = validateInternalRedirect(nextUrl);\n              window.location.href = validatedNextUrl\n                ? validatedNextUrl\n                : `/app${isSignup && !isJoin ? \"?new_team=true\" : \"\"}`;\n            }\n          } else {\n            setIsWorking(false);\n            const errorDetail: any = (await loginResponse.json()).detail;\n            let errorMsg: string = \"Unknown error\";\n            if (errorDetail === \"LOGIN_BAD_CREDENTIALS\") {\n              errorMsg = \"Invalid email or password\";\n            } else if (errorDetail === \"NO_WEB_LOGIN_AND_HAS_NO_PASSWORD\") {\n              errorMsg = \"Create an account to set a password\";\n            } else if (typeof errorDetail === \"string\") {\n              errorMsg = errorDetail;\n            }\n            if (loginResponse.status === 429) {\n              errorMsg = \"Too many requests. Please try again later.\";\n            }\n            setErrorMessage(errorMsg);\n            setApiStatus(\"error\");\n            toast.error(`Failed to login - ${errorMsg}`);\n          }\n        }}\n      >\n        {({ isSubmitting, isValid, dirty, values }) => {\n          return (\n            <Form className=\"gap-y-3\">\n              <FormikField<string>\n                name=\"email\"\n                render={(field, helper, meta, state) => (\n                  <FormField name=\"email\" state={state} className=\"w-full\">\n                    <FormField.Label>Email Address</FormField.Label>\n                    <FormField.Control>\n                      <InputTypeIn\n                        {...field}\n                        onChange={(e) => {\n                          if (showApiMessage && apiStatus === \"error\") {\n                            setShowApiMessage(false);\n                            setErrorMessage(\"\");\n                            setApiStatus(\"loading\");\n                          }\n                          field.onChange(e);\n                        }}\n                        placeholder=\"email@yourcompany.com\"\n                        onClear={() => helper.setValue(\"\")}\n                        data-testid=\"email\"\n                        variant={apiStatus === \"error\" ? \"error\" : undefined}\n                        showClearButton={false}\n                      />\n                    </FormField.Control>\n                  </FormField>\n                )}\n              />\n\n              <FormikField<string>\n                name=\"password\"\n                render={(field, helper, meta, state) => (\n                  <FormField name=\"password\" state={state} className=\"w-full\">\n                    <FormField.Label>Password</FormField.Label>\n                    <FormField.Control>\n                      <PasswordInputTypeIn\n                        {...field}\n                        onChange={(e) => {\n                          if (showApiMessage && apiStatus === \"error\") {\n                            setShowApiMessage(false);\n                            setErrorMessage(\"\");\n                            setApiStatus(\"loading\");\n                          }\n                          field.onChange(e);\n                        }}\n                        placeholder=\"∗∗∗∗∗∗∗∗∗∗∗∗∗∗\"\n                        onClear={() => helper.setValue(\"\")}\n                        data-testid=\"password\"\n                        error={apiStatus === \"error\"}\n                        showClearButton={false}\n                      />\n                    </FormField.Control>\n                    {isSignup && !showApiMessage && (\n                      <FormField.Message\n                        messages={{\n                          idle: `Password must be at least ${passwordMinLength} characters`,\n                          error: meta.error,\n                          success: `Password must be at least ${passwordMinLength} characters`,\n                        }}\n                      />\n                    )}\n                    {showApiMessage && (\n                      <FormField.APIMessage\n                        state={apiStatus}\n                        messages={apiMessages}\n                      />\n                    )}\n                  </FormField>\n                )}\n              />\n\n              <Spacer rem={0.25} />\n              <Button\n                disabled={isSubmitting || !isValid || !dirty}\n                type=\"submit\"\n                width=\"full\"\n                rightIcon={SvgArrowRightCircle}\n              >\n                {isJoin ? \"Join\" : isSignup ? \"Create Account\" : \"Sign In\"}\n              </Button>\n              {user?.is_anonymous_user && (\n                <Link\n                  href=\"/app\"\n                  className=\"text-xs text-action-link-05 cursor-pointer text-center w-full font-medium mx-auto\"\n                >\n                  <span className=\"hover:border-b hover:border-dotted hover:border-action-link-05\">\n                    or continue as guest\n                  </span>\n                </Link>\n              )}\n            </Form>\n          );\n        }}\n      </Formik>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/login/LoginPage.tsx",
    "content": "\"use client\";\n\nimport { AuthTypeMetadata } from \"@/hooks/useAuthTypeMetadata\";\nimport LoginText from \"@/app/auth/login/LoginText\";\nimport SignInButton from \"@/app/auth/login/SignInButton\";\nimport EmailPasswordForm from \"./EmailPasswordForm\";\nimport { AuthType, NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED } from \"@/lib/constants\";\nimport { useSendAuthRequiredMessage } from \"@/lib/extension/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Message from \"@/refresh-components/messages/Message\";\n\ninterface LoginPageProps {\n  authUrl: string | null;\n  authTypeMetadata: AuthTypeMetadata | null;\n  nextUrl: string | null;\n  hidePageRedirect?: boolean;\n  verified?: boolean;\n  isFirstUser?: boolean;\n}\n\nexport default function LoginPage({\n  authUrl,\n  authTypeMetadata,\n  nextUrl,\n  hidePageRedirect,\n  verified,\n  isFirstUser,\n}: LoginPageProps) {\n  useSendAuthRequiredMessage();\n\n  // Honor any existing nextUrl; only default to new team flow for first users with no nextUrl\n  const effectiveNextUrl =\n    nextUrl ?? (isFirstUser ? \"/app?new_team=true\" : null);\n\n  return (\n    <div className=\"flex flex-col w-full justify-center\">\n      {verified && (\n        <Message\n          success\n          close={false}\n          text=\"Your email has been verified! Please sign in to continue.\"\n          className=\"w-full mb-4\"\n        />\n      )}\n      {authUrl &&\n        authTypeMetadata &&\n        authTypeMetadata.authType !== AuthType.CLOUD &&\n        // basic auth is handled below w/ the EmailPasswordForm\n        authTypeMetadata.authType !== AuthType.BASIC && (\n          <div className=\"flex flex-col w-full gap-4\">\n            <LoginText />\n            <SignInButton\n              authorizeUrl={authUrl}\n              authType={authTypeMetadata?.authType}\n            />\n          </div>\n        )}\n\n      {authTypeMetadata?.authType === AuthType.CLOUD && (\n        <div className=\"w-full justify-center flex flex-col gap-6\">\n          <LoginText />\n          {authUrl && authTypeMetadata && (\n            <>\n              <SignInButton\n                authorizeUrl={authUrl}\n                authType={authTypeMetadata?.authType}\n              />\n              <div className=\"flex flex-row items-center w-full gap-2\">\n                <div className=\"flex-1 border-t border-text-01\" />\n                <Text as=\"p\" text03 mainUiMuted>\n                  or\n                </Text>\n                <div className=\"flex-1 border-t border-text-01\" />\n              </div>\n            </>\n          )}\n          <EmailPasswordForm shouldVerify={true} nextUrl={effectiveNextUrl} />\n          {NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED && (\n            <Button href=\"/auth/forgot-password\">Reset Password</Button>\n          )}\n        </div>\n      )}\n\n      {authTypeMetadata?.authType === AuthType.BASIC && (\n        <div className=\"flex flex-col w-full gap-6\">\n          <LoginText />\n          <EmailPasswordForm nextUrl={effectiveNextUrl} />\n        </div>\n      )}\n\n      {!hidePageRedirect && (\n        <p className=\"text-center mt-4\">\n          Don&apos;t have an account?{\" \"}\n          <span\n            onClick={() => {\n              if (typeof window !== \"undefined\" && window.top) {\n                window.top.location.href = \"/auth/signup\";\n              } else {\n                window.location.href = \"/auth/signup\";\n              }\n            }}\n            className=\"text-link font-medium cursor-pointer\"\n          >\n            Create an account\n          </span>\n        </p>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/login/LoginText.tsx",
    "content": "\"use client\";\n\nimport React, { useContext } from \"react\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport default function LoginText() {\n  const settings = useContext(SettingsContext);\n  return (\n    <div className=\"w-full flex flex-col \">\n      <Text as=\"p\" headingH2 text05>\n        Welcome to{\" \"}\n        {(settings && settings?.enterpriseSettings?.application_name) || \"Onyx\"}\n      </Text>\n      <Text as=\"p\" text03 mainUiMuted>\n        Your open source AI platform for work\n      </Text>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/login/SignInButton.tsx",
    "content": "/**\n * SignInButton — renders the SSO / OAuth sign-in button on the login page.\n *\n * IMPORTANT: This component is rendered as part of the /auth/login page, which\n * is used in healthcheck and monitoring flows that issue headless (non-browser)\n * requests (e.g. `curl`). During server-side rendering of those requests,\n * browser-only globals like `window`, `document`, `navigator`, etc. are NOT\n * available. Even though this file is marked \"use client\", Next.js still\n * executes the component body on the server during SSR — only hooks like\n * `useEffect` are skipped.\n *\n * Do NOT reference `window` or other browser APIs in the render path of this\n * component. If you need browser globals, gate them behind `useEffect` or\n * `typeof window !== \"undefined\"` checks inside callbacks/effects — but be\n * aware that Turbopack may optimise away bare `typeof window` guards in the\n * SSR bundle, so prefer `useEffect` for safety.\n */\n\n\"use client\";\n\nimport { Button } from \"@opal/components\";\nimport { AuthType } from \"@/lib/constants\";\nimport { FcGoogle } from \"react-icons/fc\";\nimport type { IconProps } from \"@opal/types\";\n\ninterface SignInButtonProps {\n  authorizeUrl: string;\n  authType: AuthType;\n}\n\nexport default function SignInButton({\n  authorizeUrl,\n  authType,\n}: SignInButtonProps) {\n  let button: string | undefined;\n  let icon: React.FunctionComponent<IconProps> | undefined;\n\n  if (authType === AuthType.GOOGLE_OAUTH || authType === AuthType.CLOUD) {\n    button = \"Continue with Google\";\n    icon = FcGoogle;\n  } else if (authType === AuthType.OIDC) {\n    button = \"Continue with OIDC SSO\";\n  } else if (authType === AuthType.SAML) {\n    button = \"Continue with SAML SSO\";\n  }\n\n  if (!button) {\n    throw new Error(`Unhandled authType: ${authType}`);\n  }\n\n  return (\n    <Button\n      prominence={\n        authType === AuthType.GOOGLE_OAUTH || authType === AuthType.CLOUD\n          ? \"secondary\"\n          : \"primary\"\n      }\n      width=\"full\"\n      icon={icon}\n      href={authorizeUrl}\n    >\n      {button}\n    </Button>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/login/page.tsx",
    "content": "import { User } from \"@/lib/types\";\nimport {\n  getCurrentUserSS,\n  getAuthUrlSS,\n  getAuthTypeMetadataSS,\n  AuthTypeMetadata,\n} from \"@/lib/userSS\";\nimport { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport LoginPage from \"./LoginPage\";\nimport { AuthType } from \"@/lib/constants\";\n\nexport interface PageProps {\n  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;\n}\n\nexport default async function Page(props: PageProps) {\n  const searchParams = await props.searchParams;\n  const autoRedirectDisabled = searchParams?.disableAutoRedirect === \"true\";\n  const autoRedirectToSignupDisabled =\n    searchParams?.autoRedirectToSignup === \"false\";\n  const nextUrl: string | null = Array.isArray(searchParams?.next)\n    ? searchParams?.next[0] ?? null\n    : searchParams?.next ?? null;\n  const verified = searchParams?.verified === \"true\";\n  const isFirstUser = searchParams?.first_user === \"true\";\n\n  // catch cases where the backend is completely unreachable here\n  // without try / catch, will just raise an exception and the page\n  // will not render\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n  let currentUser: User | null = null;\n  try {\n    [authTypeMetadata, currentUser] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Some fetch failed for the login page - ${e}`);\n  }\n\n  // if there are no users, redirect to signup page for initial setup\n  // (only for auth types that support self-service signup)\n  if (\n    authTypeMetadata &&\n    !authTypeMetadata.hasUsers &&\n    !autoRedirectToSignupDisabled &&\n    authTypeMetadata.authType === AuthType.BASIC\n  ) {\n    return redirect(\"/auth/signup\");\n  }\n\n  // if user is already logged in, take them to the main app page\n  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {\n    console.log(\"Login page: User is logged in, redirecting to chat\", {\n      userId: currentUser.id,\n      is_active: currentUser.is_active,\n      is_anonymous: currentUser.is_anonymous_user,\n    });\n\n    if (authTypeMetadata?.requiresVerification && !currentUser.is_verified) {\n      return redirect(\"/auth/waiting-on-verification\");\n    }\n\n    // Add a query parameter to indicate this is a redirect from login\n    // This will help prevent redirect loops\n    return redirect(\"/app?from=login\");\n  }\n\n  // get where to send the user to authenticate\n  let authUrl: string | null = null;\n  if (authTypeMetadata) {\n    try {\n      authUrl = await getAuthUrlSS(authTypeMetadata.authType, nextUrl);\n    } catch (e) {\n      console.log(`Some fetch failed for the login page - ${e}`);\n    }\n  }\n\n  if (authTypeMetadata?.autoRedirect && authUrl && !autoRedirectDisabled) {\n    return redirect(authUrl as Route);\n  }\n\n  const ssoLoginFooterContent =\n    authTypeMetadata &&\n    (authTypeMetadata.authType === AuthType.GOOGLE_OAUTH ||\n      authTypeMetadata.authType === AuthType.OIDC ||\n      authTypeMetadata.authType === AuthType.SAML) ? (\n      <>Need access? Reach out to your IT admin to get access.</>\n    ) : undefined;\n\n  return (\n    <div className=\"flex flex-col \">\n      <AuthFlowContainer\n        authState=\"login\"\n        footerContent={ssoLoginFooterContent}\n      >\n        <LoginPage\n          authUrl={authUrl}\n          authTypeMetadata={authTypeMetadata}\n          nextUrl={nextUrl}\n          hidePageRedirect={true}\n          verified={verified}\n          isFirstUser={isFirstUser}\n        />\n      </AuthFlowContainer>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/logout/route.ts",
    "content": "import { getAuthTypeMetadataSS, logoutSS } from \"@/lib/userSS\";\nimport { NextRequest } from \"next/server\";\n\nexport const POST = async (request: NextRequest) => {\n  // Directs the logout request to the appropriate FastAPI endpoint.\n  // Needed since env variables don't work well on the client-side\n  const authTypeMetadata = await getAuthTypeMetadataSS();\n  const response = await logoutSS(authTypeMetadata.authType, request.headers);\n\n  if (response && !response.ok) {\n    return new Response(response.body, { status: response?.status });\n  }\n\n  // Always clear the auth cookie on logout. This is critical for the JWT\n  // auth backend where destroy_token is a no-op (stateless), but is also\n  // the correct thing to do for Redis/Postgres backends — the server-side\n  // Set-Cookie from FastAPI never reaches the browser since logoutSS is a\n  // server-to-server fetch.\n  const cookiesToDelete = [\"fastapiusersauth\"];\n  const cookieOptions = {\n    path: \"/\",\n    secure: process.env.NODE_ENV === \"production\",\n    httpOnly: true,\n    sameSite: \"lax\" as const,\n  };\n\n  const headers = new Headers();\n\n  cookiesToDelete.forEach((cookieName) => {\n    headers.append(\n      \"Set-Cookie\",\n      `${cookieName}=; Max-Age=0; ${Object.entries(cookieOptions)\n        .map(([key, value]) => `${key}=${value}`)\n        .join(\"; \")}`\n    );\n  });\n\n  return new Response(null, {\n    status: 204,\n    headers: headers,\n  });\n};\n"
  },
  {
    "path": "web/src/app/auth/oauth/callback/route.ts",
    "content": "import { authErrorRedirect } from \"@/app/auth/libSS\";\nimport { getDomain } from \"@/lib/redirectSS\";\nimport { buildUrl } from \"@/lib/utilsSS\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\nexport const GET = async (request: NextRequest) => {\n  // Wrapper around the FastAPI endpoint /auth/oauth/callback,\n  // which adds back a redirect to the main app.\n  const url = new URL(buildUrl(\"/auth/oauth/callback\"));\n  url.search = request.nextUrl.search;\n  const cookieHeader = request.headers.get(\"cookie\") || \"\";\n\n  // Set 'redirect' to 'manual' to prevent automatic redirection\n  const response = await fetch(url.toString(), {\n    redirect: \"manual\",\n    headers: cookieHeader ? { cookie: cookieHeader } : undefined,\n  });\n  const setCookieHeader = response.headers.get(\"set-cookie\");\n\n  if (response.status === 401) {\n    return NextResponse.redirect(\n      new URL(\"/auth/create-account\", getDomain(request))\n    );\n  }\n\n  if (!setCookieHeader) {\n    return authErrorRedirect(request, response);\n  }\n\n  // Get the redirect URL from the backend's 'Location' header, or default to '/'\n  const redirectUrl = response.headers.get(\"location\") || \"/\";\n\n  const redirectResponse = NextResponse.redirect(\n    new URL(redirectUrl, getDomain(request))\n  );\n\n  redirectResponse.headers.set(\"set-cookie\", setCookieHeader);\n  return redirectResponse;\n};\n"
  },
  {
    "path": "web/src/app/auth/oidc/callback/route.ts",
    "content": "import { authErrorRedirect } from \"@/app/auth/libSS\";\nimport { getDomain } from \"@/lib/redirectSS\";\nimport { buildUrl } from \"@/lib/utilsSS\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\nexport const GET = async (request: NextRequest) => {\n  // Wrapper around the FastAPI endpoint /auth/oidc/callback,\n  // which adds back a redirect to the main app.\n  const url = new URL(buildUrl(\"/auth/oidc/callback\"));\n  url.search = request.nextUrl.search;\n  const cookieHeader = request.headers.get(\"cookie\") || \"\";\n\n  // Set 'redirect' to 'manual' to prevent automatic redirection\n  const response = await fetch(url.toString(), {\n    redirect: \"manual\",\n    headers: cookieHeader ? { cookie: cookieHeader } : undefined,\n  });\n  const setCookieHeader = response.headers.get(\"set-cookie\");\n\n  if (response.status === 401) {\n    return NextResponse.redirect(\n      new URL(\"/auth/create-account\", getDomain(request))\n    );\n  }\n\n  if (!setCookieHeader) {\n    return authErrorRedirect(request, response);\n  }\n\n  // Get the redirect URL from the backend's 'Location' header, or default to '/'\n  const redirectUrl = response.headers.get(\"location\") || \"/\";\n\n  const redirectResponse = NextResponse.redirect(\n    new URL(redirectUrl, getDomain(request))\n  );\n\n  redirectResponse.headers.set(\"set-cookie\", setCookieHeader);\n  return redirectResponse;\n};\n"
  },
  {
    "path": "web/src/app/auth/reset-password/page.tsx",
    "content": "\"use client\";\nimport React, { useState, useEffect } from \"react\";\nimport { resetPassword } from \"../forgot-password/utils\";\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport Title from \"@/components/ui/title\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Link from \"next/link\";\nimport { Button } from \"@opal/components\";\nimport { Form, Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport { TextFormField } from \"@/components/Field\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { redirect, useSearchParams } from \"next/navigation\";\nimport {\n  NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED,\n  TENANT_ID_COOKIE_NAME,\n} from \"@/lib/constants\";\nimport Cookies from \"js-cookie\";\n\nconst ResetPasswordPage: React.FC = () => {\n  const [isWorking, setIsWorking] = useState(false);\n  const searchParams = useSearchParams();\n  const token = searchParams?.get(\"token\");\n  const tenantId = searchParams?.get(TENANT_ID_COOKIE_NAME);\n  // Keep search param same name as cookie for simplicity\n\n  useEffect(() => {\n    if (tenantId) {\n      Cookies.set(TENANT_ID_COOKIE_NAME, tenantId, {\n        path: \"/\",\n        expires: 1 / 24,\n      }); // Expires in 1 hour\n    }\n  }, [tenantId]);\n\n  if (!NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED) {\n    redirect(\"/auth/login\");\n  }\n\n  return (\n    <AuthFlowContainer>\n      <div className=\"flex flex-col w-full justify-center\">\n        <div className=\"flex\">\n          <Title className=\"mb-2 mx-auto font-bold\">Reset Password</Title>\n        </div>\n        {isWorking && <Spinner />}\n        <Formik\n          initialValues={{\n            password: \"\",\n            confirmPassword: \"\",\n          }}\n          validationSchema={Yup.object().shape({\n            password: Yup.string().required(\"Password is required\"),\n            confirmPassword: Yup.string()\n              .oneOf([Yup.ref(\"password\"), undefined], \"Passwords must match\")\n              .required(\"Confirm Password is required\"),\n          })}\n          onSubmit={async (values) => {\n            if (!token) {\n              toast.error(\"Invalid or missing reset token.\");\n              return;\n            }\n            setIsWorking(true);\n            try {\n              await resetPassword(token, values.password);\n              toast.success(\n                \"Password reset successfully. Redirecting to login...\"\n              );\n              setTimeout(() => {\n                redirect(\"/auth/login\");\n              }, 1000);\n            } catch (error) {\n              if (error instanceof Error) {\n                toast.error(\n                  error.message || \"An error occurred during password reset.\"\n                );\n              } else {\n                toast.error(\"An unexpected error occurred. Please try again.\");\n              }\n            } finally {\n              setIsWorking(false);\n            }\n          }}\n        >\n          {({ isSubmitting }) => (\n            <Form className=\"w-full flex flex-col items-stretch mt-2\">\n              <TextFormField\n                name=\"password\"\n                label=\"New Password\"\n                type=\"password\"\n                placeholder=\"Enter your new password\"\n              />\n              <TextFormField\n                name=\"confirmPassword\"\n                label=\"Confirm New Password\"\n                type=\"password\"\n                placeholder=\"Confirm your new password\"\n              />\n\n              <div className=\"flex\">\n                <Button disabled={isSubmitting} type=\"submit\" width=\"full\">\n                  Reset Password\n                </Button>\n              </div>\n            </Form>\n          )}\n        </Formik>\n        <Spacer rem={1} />\n        <div className=\"flex\">\n          <div className=\"mx-auto\">\n            <Text as=\"p\">{markdown(\"[Back to Login](/auth/login)\")}</Text>\n          </div>\n        </div>\n      </div>\n    </AuthFlowContainer>\n  );\n};\n\nexport default ResetPasswordPage;\n"
  },
  {
    "path": "web/src/app/auth/saml/callback/route.ts",
    "content": "import { authErrorRedirect } from \"@/app/auth/libSS\";\nimport { validateInternalRedirect } from \"@/lib/auth/redirectValidation\";\nimport { getDomain } from \"@/lib/redirectSS\";\nimport { buildUrl } from \"@/lib/utilsSS\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\n// have to use this so we don't hit the redirect URL with a `POST` request\nconst SEE_OTHER_REDIRECT_STATUS = 303;\n\nasync function handleSamlCallback(\n  request: NextRequest,\n  method: \"GET\" | \"POST\"\n) {\n  // Wrapper around the FastAPI endpoint /auth/saml/callback,\n  // which adds back a redirect to the main app.\n  const url = new URL(buildUrl(\"/auth/saml/callback\"));\n  url.search = request.nextUrl.search;\n\n  const fetchOptions: RequestInit = {\n    method,\n    headers: {},\n  };\n\n  let relayState: string | null = null;\n\n  // For POST requests, include form data\n  if (method === \"POST\") {\n    const formData = await request.formData();\n    const relayStateValue = formData.get(\"RelayState\");\n    relayState = typeof relayStateValue === \"string\" ? relayStateValue : null;\n    fetchOptions.body = formData;\n  }\n\n  // OneLogin python toolkit only supports HTTP-POST binding for SAMLResponse.\n  // If the IdP returned SAMLResponse via query parameters (GET), convert to POST.\n  if (method === \"GET\") {\n    const samlResponse = request.nextUrl.searchParams.get(\"SAMLResponse\");\n    relayState = request.nextUrl.searchParams.get(\"RelayState\");\n    if (samlResponse) {\n      const formData = new FormData();\n      formData.set(\"SAMLResponse\", samlResponse);\n      if (relayState) {\n        formData.set(\"RelayState\", relayState);\n      }\n      // Clear query on backend URL and send as POST with form body\n      url.search = \"\";\n      fetchOptions.method = \"POST\";\n      fetchOptions.body = formData;\n    }\n  }\n\n  const response = await fetch(url.toString(), fetchOptions);\n  const setCookieHeader = response.headers.get(\"set-cookie\");\n\n  if (!setCookieHeader) {\n    return authErrorRedirect(request, response, SEE_OTHER_REDIRECT_STATUS);\n  }\n\n  const validatedRelayState = validateInternalRedirect(relayState);\n  const redirectDestination = validatedRelayState ?? \"/\";\n\n  const redirectResponse = NextResponse.redirect(\n    new URL(redirectDestination, getDomain(request)),\n    SEE_OTHER_REDIRECT_STATUS\n  );\n  redirectResponse.headers.set(\"set-cookie\", setCookieHeader);\n  return redirectResponse;\n}\n\nexport const GET = async (request: NextRequest) => {\n  return handleSamlCallback(request, \"GET\");\n};\n\nexport const POST = async (request: NextRequest) => {\n  return handleSamlCallback(request, \"POST\");\n};\n"
  },
  {
    "path": "web/src/app/auth/signup/ReferralSourceSelector.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { Label } from \"@/components/Field\";\n\ninterface ReferralSourceSelectorProps {\n  defaultValue?: string;\n}\n\nexport default function ReferralSourceSelector({\n  defaultValue,\n}: ReferralSourceSelectorProps) {\n  const [referralSource, setReferralSource] = useState(defaultValue);\n\n  const referralOptions = [\n    { value: \"search\", label: \"Search Engine (Google/Bing)\" },\n    { value: \"friend\", label: \"Friend/Colleague\" },\n    { value: \"linkedin\", label: \"LinkedIn\" },\n    { value: \"twitter\", label: \"Twitter\" },\n    { value: \"hackernews\", label: \"HackerNews\" },\n    { value: \"reddit\", label: \"Reddit\" },\n    { value: \"youtube\", label: \"YouTube\" },\n    { value: \"podcast\", label: \"Podcast\" },\n    { value: \"blog\", label: \"Article/Blog\" },\n    { value: \"ads\", label: \"Advertisements\" },\n    { value: \"other\", label: \"Other\" },\n  ];\n\n  const handleChange = (value: string) => {\n    setReferralSource(value);\n    const cookies = require(\"js-cookie\");\n    cookies.set(\"referral_source\", value, {\n      expires: 365,\n      path: \"/\",\n      sameSite: \"strict\",\n    });\n  };\n\n  return (\n    <div className=\"w-full gap-y-2 flex flex-col\">\n      <Label className=\"text-text-950\" small={false}>\n        How did you hear about us?\n      </Label>\n      <InputSelect value={referralSource} onValueChange={handleChange}>\n        <InputSelect.Trigger placeholder=\"Select an option\" />\n\n        <InputSelect.Content>\n          {referralOptions.map((option) => (\n            <InputSelect.Item key={option.value} value={option.value}>\n              {option.label}\n            </InputSelect.Item>\n          ))}\n        </InputSelect.Content>\n      </InputSelect>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/signup/page.tsx",
    "content": "import { User } from \"@/lib/types\";\nimport {\n  getCurrentUserSS,\n  getAuthTypeMetadataSS,\n  AuthTypeMetadata,\n  getAuthUrlSS,\n} from \"@/lib/userSS\";\nimport { redirect } from \"next/navigation\";\nimport EmailPasswordForm from \"../login/EmailPasswordForm\";\nimport SignInButton from \"@/app/auth/login/SignInButton\";\nimport AuthFlowContainer from \"@/components/auth/AuthFlowContainer\";\nimport ReferralSourceSelector from \"./ReferralSourceSelector\";\nimport AuthErrorDisplay from \"@/components/auth/AuthErrorDisplay\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { AuthType } from \"@/lib/constants\";\n\nconst Page = async (props: {\n  searchParams?: Promise<{ [key: string]: string | string[] | undefined }>;\n}) => {\n  const searchParams = await props.searchParams;\n  const nextUrl = Array.isArray(searchParams?.next)\n    ? searchParams?.next[0]\n    : searchParams?.next || null;\n\n  const defaultEmail = Array.isArray(searchParams?.email)\n    ? searchParams?.email[0]\n    : searchParams?.email || null;\n\n  // catch cases where the backend is completely unreachable here\n  // without try / catch, will just raise an exception and the page\n  // will not render\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n  let currentUser: User | null = null;\n  try {\n    [authTypeMetadata, currentUser] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Some fetch failed for the login page - ${e}`);\n  }\n\n  // if user is already logged in, take them to the main app page\n  if (currentUser && currentUser.is_active && !currentUser.is_anonymous_user) {\n    if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {\n      return redirect(\"/app\");\n    }\n    return redirect(\"/auth/waiting-on-verification\");\n  }\n  const cloud = authTypeMetadata?.authType === AuthType.CLOUD;\n\n  // only enable this page if basic login is enabled\n  if (authTypeMetadata?.authType !== AuthType.BASIC && !cloud) {\n    return redirect(\"/app\");\n  }\n\n  let authUrl: string | null = null;\n  if (cloud && authTypeMetadata) {\n    authUrl = await getAuthUrlSS(authTypeMetadata.authType, null);\n  }\n\n  return (\n    <AuthFlowContainer authState=\"signup\">\n      <AuthErrorDisplay searchParams={searchParams} />\n\n      <>\n        <div className=\"absolute top-10x w-full\"></div>\n        <div\n          className={cn(\n            \"flex w-full flex-col justify-start\",\n            cloud ? \"\" : \"gap-6\"\n          )}\n        >\n          <div className=\"w-full\">\n            <Text as=\"p\" headingH2 text05>\n              {cloud ? \"Complete your sign up\" : \"Create account\"}\n            </Text>\n            <Text as=\"p\" text03>\n              Get started with Onyx\n            </Text>\n          </div>\n          {cloud && authUrl && (\n            <div className=\"w-full justify-center mt-6\">\n              <SignInButton authorizeUrl={authUrl} authType={AuthType.CLOUD} />\n              <div className=\"flex items-center w-full my-4\">\n                <div className=\"flex-grow border-t border-border-01\" />\n                <Text as=\"p\" mainUiMuted text03 className=\"mx-2\">\n                  or\n                </Text>\n                <div className=\"flex-grow border-t border-border-01\" />\n              </div>\n            </div>\n          )}\n\n          {cloud && (\n            <>\n              <div className=\"w-full flex flex-col mb-3\">\n                <ReferralSourceSelector />\n              </div>\n            </>\n          )}\n\n          <EmailPasswordForm\n            isSignup\n            shouldVerify={authTypeMetadata?.requiresVerification}\n            nextUrl={nextUrl}\n            defaultEmail={defaultEmail}\n          />\n        </div>\n      </>\n    </AuthFlowContainer>\n  );\n};\n\nexport default Page;\n"
  },
  {
    "path": "web/src/app/auth/verify-email/Verify.tsx",
    "content": "\"use client\";\n\nimport { useSearchParams } from \"next/navigation\";\nimport { useCallback, useEffect, useState } from \"react\";\nimport { Text } from \"@opal/components\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { RequestNewVerificationEmail } from \"../waiting-on-verification/RequestNewVerificationEmail\";\nimport { User } from \"@/lib/types\";\nimport Logo from \"@/refresh-components/Logo\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\n\nexport interface VerifyProps {\n  user: User | null;\n}\n\nexport default function Verify({ user }: VerifyProps) {\n  const searchParams = useSearchParams();\n\n  const [error, setError] = useState(\"\");\n\n  const verify = useCallback(async () => {\n    const token = searchParams?.get(\"token\");\n    const firstUser =\n      searchParams?.get(\"first_user\") === \"true\" && NEXT_PUBLIC_CLOUD_ENABLED;\n    if (!token) {\n      setError(\n        \"Missing verification token. Try requesting a new verification email.\"\n      );\n      return;\n    }\n\n    const response = await fetch(\"/api/auth/verify\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({ token }),\n    });\n\n    if (response.ok) {\n      // Redirect to login page instead of /app so user can log in\n      // from any browser (not dependent on the original signup session)\n      const loginUrl = firstUser\n        ? \"/auth/login?verified=true&first_user=true\"\n        : \"/auth/login?verified=true\";\n      window.location.href = loginUrl;\n    } else {\n      let errorDetail = \"unknown error\";\n      try {\n        errorDetail = (await response.json()).detail;\n      } catch (e) {\n        console.error(\"Failed to parse verification error response:\", e);\n      }\n      setError(\n        `Failed to verify your email - ${errorDetail}. Please try requesting a new verification email.`\n      );\n    }\n  }, [searchParams]);\n\n  useEffect(() => {\n    verify();\n  }, [verify]);\n\n  return (\n    <main>\n      <div className=\"min-h-screen flex flex-col items-center justify-center py-12 px-4 sm:px-6 lg:px-8\">\n        <Logo folded size={64} className=\"mx-auto w-fit animate-pulse\" />\n        {!error ? (\n          <>\n            <Spacer rem={0.5} />\n            <Text as=\"p\">Verifying your email...</Text>\n          </>\n        ) : (\n          <div>\n            <Spacer rem={0.5} />\n            <Text as=\"p\">{error}</Text>\n\n            {user && (\n              <div className=\"text-center\">\n                <RequestNewVerificationEmail email={user.email}>\n                  {/* TODO(@raunakab): migrate to @opal/components Text */}\n                  <p className=\"text-sm mt-2 text-link\">\n                    Get new verification email\n                  </p>\n                </RequestNewVerificationEmail>\n              </div>\n            )}\n          </div>\n        )}\n      </div>\n    </main>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/verify-email/page.tsx",
    "content": "import {\n  AuthTypeMetadata,\n  getAuthTypeMetadataSS,\n  getCurrentUserSS,\n} from \"@/lib/userSS\";\nimport Verify from \"./Verify\";\nimport { User } from \"@/lib/types\";\nimport { redirect } from \"next/navigation\";\n\nexport default async function Page() {\n  // catch cases where the backend is completely unreachable here\n  // without try / catch, will just raise an exception and the page\n  // will not render\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n  let currentUser: User | null = null;\n  try {\n    [authTypeMetadata, currentUser] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Some fetch failed for the login page - ${e}`);\n  }\n\n  if (!authTypeMetadata?.requiresVerification || currentUser?.is_verified) {\n    return redirect(\"/app\");\n  }\n\n  return <Verify user={currentUser} />;\n}\n"
  },
  {
    "path": "web/src/app/auth/waiting-on-verification/RequestNewVerificationEmail.tsx",
    "content": "\"use client\";\n\nimport { toast } from \"@/hooks/useToast\";\nimport { requestEmailVerification } from \"../lib\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { useState, JSX } from \"react\";\n\nexport function RequestNewVerificationEmail({\n  children,\n  email,\n}: {\n  children: JSX.Element | string;\n  email: string;\n}) {\n  const [isRequestingVerification, setIsRequestingVerification] =\n    useState(false);\n\n  return (\n    <button\n      className=\"text-link\"\n      onClick={async () => {\n        setIsRequestingVerification(true);\n        const response = await requestEmailVerification(email);\n        setIsRequestingVerification(false);\n\n        if (response.ok) {\n          toast.success(\"A new verification email has been sent!\");\n        } else {\n          const errorDetail = (await response.json()).detail;\n          toast.error(\n            `Failed to send a new verification email - ${errorDetail}`\n          );\n        }\n      }}\n    >\n      {isRequestingVerification && <Spinner />}\n      {children}\n    </button>\n  );\n}\n"
  },
  {
    "path": "web/src/app/auth/waiting-on-verification/page.tsx",
    "content": "import {\n  AuthTypeMetadata,\n  getAuthTypeMetadataSS,\n  getCurrentUserSS,\n} from \"@/lib/userSS\";\nimport { redirect } from \"next/navigation\";\nimport { User } from \"@/lib/types\";\nimport { RequestNewVerificationEmail } from \"./RequestNewVerificationEmail\";\nimport Logo from \"@/refresh-components/Logo\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\n\nexport default async function Page() {\n  // catch cases where the backend is completely unreachable here\n  // without try / catch, will just raise an exception and the page\n  // will not render\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n  let currentUser: User | null = null;\n  try {\n    [authTypeMetadata, currentUser] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Some fetch failed for the login page - ${e}`);\n  }\n\n  if (!currentUser) {\n    return redirect(\"/auth/login\");\n  }\n\n  if (!authTypeMetadata?.requiresVerification || currentUser.is_verified) {\n    return redirect(\"/app\");\n  }\n\n  return (\n    <main>\n      <div className=\"min-h-screen flex flex-col items-center justify-center py-12 px-4 sm:px-6 lg:px-8 gap-4\">\n        <Logo folded size={64} className=\"mx-auto w-fit\" />\n        <div className=\"flex flex-col gap-2\">\n          <Text as=\"span\">\n            {markdown(\n              `Hey, *${currentUser.email}*, it looks like you haven't verified your email yet.\\nCheck your inbox for an email from us to get started!`\n            )}\n          </Text>\n          <div className=\"flex flex-row items-center gap-1\">\n            <Text as=\"span\">If you don't see anything, click</Text>\n            <RequestNewVerificationEmail email={currentUser.email}>\n              <Text as=\"span\">here</Text>\n            </RequestNewVerificationEmail>\n            <Text as=\"span\">to request a new email.</Text>\n          </div>\n        </div>\n      </div>\n    </main>\n  );\n}\n"
  },
  {
    "path": "web/src/app/components/nrf/SettingsPanel.tsx",
    "content": "\"use client\";\n\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport { useNRFPreferences } from \"@/components/context/NRFPreferencesContext\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgX, SvgSettings, SvgSun, SvgMoon, SvgCheck } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useTheme } from \"next-themes\";\nimport {\n  CHAT_BACKGROUND_OPTIONS,\n  CHAT_BACKGROUND_NONE,\n} from \"@/lib/constants/chatBackgrounds\";\n\ninterface SettingRowProps {\n  label: string;\n  description?: string;\n  children: React.ReactNode;\n}\n\nconst SettingRow = ({ label, description, children }: SettingRowProps) => (\n  <div className=\"flex justify-between items-center py-3\">\n    <div className=\"flex flex-col gap-0.5\">\n      <Text mainUiBody text04>\n        {label}\n      </Text>\n      {description && (\n        <Text secondaryBody text03>\n          {description}\n        </Text>\n      )}\n    </div>\n    {children}\n  </div>\n);\n\ninterface BackgroundThumbnailProps {\n  thumbnailUrl: string;\n  label: string;\n  isNone?: boolean;\n  isSelected: boolean;\n  onClick: () => void;\n}\n\nconst BackgroundThumbnail = ({\n  thumbnailUrl,\n  label,\n  isNone = false,\n  isSelected,\n  onClick,\n}: BackgroundThumbnailProps) => (\n  <button\n    onClick={onClick}\n    className=\"relative overflow-hidden rounded-xl transition-all aspect-video cursor-pointer border-none p-0 bg-transparent group\"\n    title={label}\n    aria-label={`${label} background${isSelected ? \" (selected)\" : \"\"}`}\n  >\n    {isNone ? (\n      <div className=\"absolute inset-0 bg-background flex items-center justify-center\">\n        <Text secondaryBody text03>\n          None\n        </Text>\n      </div>\n    ) : (\n      <div\n        className=\"absolute inset-0 bg-cover bg-center transition-transform duration-300 group-hover:scale-105\"\n        style={{ backgroundImage: `url(${thumbnailUrl})` }}\n      />\n    )}\n    <div\n      className={cn(\n        \"absolute inset-0 transition-all rounded-xl\",\n        isSelected\n          ? \"ring-2 ring-inset ring-theme-primary-05\"\n          : \"ring-1 ring-inset ring-border-02 group-hover:ring-border-03\"\n      )}\n    />\n    {isSelected && (\n      <div className=\"absolute top-2 right-2 w-5 h-5 rounded-full bg-theme-primary-05 flex items-center justify-center\">\n        <SvgCheck className=\"w-3 h-3 stroke-text-inverted-05\" />\n      </div>\n    )}\n  </button>\n);\n\nexport const SettingsPanel = ({\n  settingsOpen,\n  toggleSettings,\n  handleUseOnyxToggle,\n}: {\n  settingsOpen: boolean;\n  toggleSettings: () => void;\n  handleUseOnyxToggle: (checked: boolean) => void;\n}) => {\n  const { useOnyxAsNewTab } = useNRFPreferences();\n  const { theme, setTheme } = useTheme();\n  const { user, updateUserChatBackground } = useUser();\n\n  const currentBackgroundId = user?.preferences?.chat_background ?? \"none\";\n  const isDark = theme === \"dark\";\n\n  const toggleTheme = () => {\n    setTheme(isDark ? \"light\" : \"dark\");\n  };\n\n  const handleBackgroundChange = (backgroundId: string) => {\n    updateUserChatBackground(\n      backgroundId === CHAT_BACKGROUND_NONE ? null : backgroundId\n    );\n  };\n\n  return (\n    <>\n      {/* Backdrop overlay */}\n      <div\n        className={cn(\n          \"fixed inset-0 bg-mask-03 backdrop-blur-sm z-40 transition-opacity duration-300\",\n          settingsOpen\n            ? \"opacity-100 pointer-events-auto\"\n            : \"opacity-0 pointer-events-none\"\n        )}\n        onClick={toggleSettings}\n      />\n\n      {/* Settings panel */}\n      <div\n        className={cn(\n          \"fixed top-0 right-0 w-[25rem] h-full z-50\",\n          \"bg-gradient-to-b from-background-tint-02 to-background-tint-01\",\n          \"backdrop-blur-[24px] border-l border-border-01 overflow-y-auto\",\n          \"transition-transform duration-300 ease-out\",\n          settingsOpen ? \"translate-x-0\" : \"translate-x-full\"\n        )}\n      >\n        {/* Header */}\n        <div className=\"sticky top-0 z-10 bg-gradient-to-b from-background-tint-02 to-transparent pb-4\">\n          <div className=\"flex items-center justify-between px-6 pt-6 pb-2\">\n            <div className=\"flex items-center gap-3\">\n              <div className=\"flex items-center justify-center w-10 h-10 rounded-xl bg-background-tint-02\">\n                <SvgSettings className=\"w-5 h-5 stroke-text-03\" />\n              </div>\n              <Text headingH3 text04>\n                Settings\n              </Text>\n            </div>\n            <div className=\"flex items-center gap-3\">\n              {/* Theme Toggle */}\n              <Button\n                icon={isDark ? SvgMoon : SvgSun}\n                onClick={toggleTheme}\n                prominence=\"tertiary\"\n                tooltip={`Switch to ${isDark ? \"light\" : \"dark\"} theme`}\n              />\n              <Button\n                icon={SvgX}\n                onClick={toggleSettings}\n                prominence=\"tertiary\"\n                tooltip=\"Close settings\"\n              />\n            </div>\n          </div>\n        </div>\n\n        <div className=\"px-6 pb-8 flex flex-col gap-8\">\n          {/* General Section */}\n          <section className=\"flex flex-col gap-3\">\n            <Text secondaryAction text03 className=\"uppercase tracking-wider\">\n              General\n            </Text>\n            <div className=\"flex flex-col gap-1 bg-background-tint-01 rounded-2xl px-4\">\n              <SettingRow label=\"Use Onyx as new tab page\">\n                <Switch\n                  checked={useOnyxAsNewTab}\n                  onCheckedChange={handleUseOnyxToggle}\n                />\n              </SettingRow>\n            </div>\n          </section>\n\n          {/* Background Section */}\n          <section className=\"flex flex-col gap-3\">\n            <Text secondaryAction text03 className=\"uppercase tracking-wider\">\n              Background\n            </Text>\n            <div className=\"grid grid-cols-3 gap-2\">\n              {CHAT_BACKGROUND_OPTIONS.map((bg) => (\n                <BackgroundThumbnail\n                  key={bg.id}\n                  thumbnailUrl={bg.thumbnail}\n                  label={bg.label}\n                  isNone={bg.src === CHAT_BACKGROUND_NONE}\n                  isSelected={currentBackgroundId === bg.id}\n                  onClick={() => handleBackgroundChange(bg.id)}\n                />\n              ))}\n            </div>\n          </section>\n        </div>\n      </div>\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/app/config/timeRange.tsx",
    "content": "import { getXDaysAgo, getXYearsAgo } from \"@/lib/dateUtils\";\n\nexport const timeRangeValues = [\n  { label: \"Last 2 years\", value: getXYearsAgo(2) },\n  { label: \"Last year\", value: getXYearsAgo(1) },\n  { label: \"Last 30 days\", value: getXDaysAgo(30) },\n  { label: \"Last 7 days\", value: getXDaysAgo(7) },\n  { label: \"Today\", value: getXDaysAgo(1) },\n];\n"
  },
  {
    "path": "web/src/app/connector/oauth/callback/[source]/route.tsx",
    "content": "import { INTERNAL_URL } from \"@/lib/constants\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\n// TODO: deprecate this and just go directly to the backend via /api/...\n// For some reason Egnyte doesn't work when using /api, so leaving this as is for now\n// If we do try and remove this, make sure we test the Egnyte connector oauth flow\nexport async function GET(request: NextRequest) {\n  try {\n    const backendUrl = new URL(INTERNAL_URL);\n    // Copy path and query parameters from incoming request\n    backendUrl.pathname = request.nextUrl.pathname;\n    backendUrl.search = request.nextUrl.search;\n\n    const response = await fetch(backendUrl, {\n      method: \"GET\",\n      headers: request.headers,\n      body: request.body,\n      signal: request.signal,\n      // @ts-ignore\n      duplex: \"half\",\n    });\n\n    const responseData = await response.json();\n    if (responseData.redirect_url) {\n      return NextResponse.redirect(responseData.redirect_url);\n    }\n\n    return new NextResponse(JSON.stringify(responseData), {\n      status: response.status,\n      headers: response.headers,\n    });\n  } catch (error: unknown) {\n    console.error(\"Proxy error:\", error);\n    return NextResponse.json(\n      {\n        message: \"Proxy error\",\n        error:\n          error instanceof Error ? error.message : \"An unknown error occurred\",\n      },\n      { status: 500 }\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/app/craft/README.md",
    "content": "<h2 align=\"center\">\n    <a href=\"https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme\"> <img width=\"50%\" src=\"https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true\" /></a>\n</h2>\n\n<h1 align=\"center\">Onyx Craft</h1>\n\n<p align=\"center\">\n  <strong>Build apps, documents, and presentations from your company knowledge</strong>\n</p>\n\n<p align=\"center\">\n  <a href=\"https://docs.onyx.app/overview/core_features/craft\"><img alt=\"Documentation\" src=\"https://img.shields.io/badge/docs-onyx.app-blue?style=flat-square\" /></a>\n  <a href=\"https://github.com/onyx-dot-app/onyx/blob/main/LICENSE\"><img alt=\"License\" src=\"https://img.shields.io/badge/license-MIT-green?style=flat-square\" /></a>\n    <a href=\"https://discord.gg/TDJ59cGV2X\" target=\"_blank\" rel=\"noopener noreferrer\">\n        <img src=\"https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white\" alt=\"Discord\" />\n    </a>\n  <img alt=\"GitHub Repo stars\" src=\"https://img.shields.io/github/stars/onyx-dot-app/onyx\" />\n</p>\n\n---\n\n<p align=\"center\">\n  <a href=\"https://www.youtube.com/watch?v=Hvjn76YSIRY\">\n    <img src=\"https://img.youtube.com/vi/Hvjn76YSIRY/hqdefault.jpg\" alt=\"Watch the video\" />\n  </a>\n</p>\n\n---\n\n## Overview\n\nOnyx Craft is an AI coding agent that creates web applications, documents, presentations, and more using your company's indexed knowledge. Users describe what they want in natural language, and the agent builds artifacts in an isolated sandbox environment with access to documents from connected sources like Linear, Slack, Google Drive, Confluence, and more.\n\nFor detailed documentation, visit [our docs](https://docs.onyx.app/overview/core_features/craft).\n\n## Key Features\n\n- **Web Applications** — Build Next.js applications with React, shadcn/ui, and Recharts for interactive dashboards and tools\n- **Documents & Reports** — Generate polished markdown documents with DOCX export\n- **Knowledge Integration** — Access indexed documents from your connectors (Linear, Slack, Google Drive, Confluence, etc.)\n- **Real-time Preview** — Watch the agent build with live output streaming and tool call visibility\n- **Session Management** — Pre-provisioned sandboxes, automatic snapshots, and session restore\n\n## Quick Start\n\n### Requirements\n\n- Onyx deployment with an LLM provider configured (Anthropic, OpenAI, etc.)\n\n### New Installations\n\nYou can install Onyx Craft using our [quickstart script](https://docs.onyx.app/deployment/getting_started/quickstart):\n\n```bash\ncurl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh \\\n  && chmod +x install.sh \\\n  && ./install.sh --include-craft\n```\n\nThis will:\n\n- Set `ENABLE_CRAFT=true` in the `.env` file\n- Set `IMAGE_TAG=craft-latest` to use Craft-enabled images\n- Run template setup on container startup\n\n### Existing Deployments\n\nEnable Craft on an existing deployment:\n\n```bash\nENABLE_CRAFT=true IMAGE_TAG=craft-latest docker compose up -d\n```\n\n## How It Works\n\n1. **User visits `/craft/v1`** — A sandbox is pre-provisioned in the background\n2. **User describes what they want** — Message is sent to the OpenCode agent\n3. **Agent builds artifacts** — Uses company knowledge and uploaded files\n4. **Live preview shows output** — Next.js app, markdown, or other artifacts\n5. **User iterates or downloads** — Request changes or export finished work\n\n## Technical Architecture\n\n### Sandbox Backends\n\nCraft supports two sandbox backends controlled by `SANDBOX_BACKEND`:\n\n**Self-Hosted**\n\n- Filesystem-based sandboxes under `SANDBOX_BASE_PATH` (default: `/tmp/onyx-sandboxes`)\n- No container isolation (process-level only)\n- No automatic cleanup or snapshots\n- Direct file access via symlinks to user's knowledge files\n\n**Cloud** (Production)\n\n- Pod-based isolation with ClusterIP services\n- S3-based snapshots for session persistence\n- Automatic cleanup of idle sandboxes (default: 1 hour timeout)\n- Two containers per pod:\n  - `sandbox` — Runs OpenCode agent and Next.js preview server\n  - `file-sync` — Sidecar for S3 file synchronization\n\n### Session Lifecycle\n\nSessions go through these states:\n\n| State            | Description                                                     |\n| ---------------- | --------------------------------------------------------------- |\n| **Provisioning** | Sandbox being created when user visits /craft                   |\n| **Ready**        | Sandbox ready, waiting for first message                        |\n| **Running**      | Active session with agent processing                            |\n| **Idle**         | No recent activity                                              |\n| **Sleeping**     | Idle timeout reached, pod terminated (K8s only), snapshot saved |\n| **Restored**     | User returns, snapshot loaded, session continues                |\n\n### Sandbox Workspace Structure\n\nEach session gets an isolated workspace:\n\n```\n$SANDBOX_ROOT/\n├── files/                     # Symlink to user's knowledge files\n└── sessions/\n    └── {session_id}/\n        ├── outputs/web/       # Next.js application\n        ├── .venv/             # Python environment\n        ├── .opencode/skills/  # Agent skills\n        ├── attachments/       # User uploads\n        ├── AGENTS.md          # Agent instructions\n        └── opencode.json      # LLM configuration\n```\n\n### Sandbox Cleanup\n\nIdle sandboxes are cleaned up by a Celery background task:\n\n- **Trigger**: Sandbox idle longer than `SANDBOX_IDLE_TIMEOUT_SECONDS` (default: 1 hour)\n- **Kubernetes**: Creates snapshots of all sessions, terminates the pod, marks sandbox as \"sleeping\"\n- **Local**: No automatic cleanup (sandboxes persist until manually removed)\n\n## Configuration\n\nKey configuration categories (see source for full reference):\n\n- **Core** — `ENABLE_CRAFT`, `SANDBOX_BACKEND` (local vs kubernetes)\n- **Lifecycle** — Idle timeout (default 1 hour), max concurrent sandboxes per org (default 10)\n- **Kubernetes** — Namespace, container image, S3 bucket for snapshots\n- **File uploads** — Size limits (50MB per file, 20 files per session, 200MB total)\n- **Rate limits** — Free users: 5 messages total; Paid users: 25 messages/week\n\n## Tech Stack\n\n**Frontend**\n\n- Next.js, React, TypeScript\n- Zustand for state management\n- shadcn/ui components\n\n**Backend**\n\n- FastAPI, SQLAlchemy, Celery\n- PostgreSQL for session/sandbox metadata\n- S3-compatible storage for snapshots\n\n**Agent**\n\n- OpenCode CLI with ACP (Agent Communication Protocol)\n- JSON-RPC 2.0 over stdin/stdout\n\n**Sandbox Environment**\n\n- Next.js 16, React 19\n- shadcn/ui, Tailwind CSS, Recharts\n- Python 3.11 with numpy, pandas, matplotlib\n\n## Coming Soon\n\n- **Presentations** — Create slide decks with AI-generated visuals using nanobanana\n- **Spreadsheets**\n- **HTML Dashboards**\n\n## Contributing\n\nSee the main [CONTRIBUTING.md](../../../../CONTRIBUTING.md) for guidelines.\n\nFor Craft-specific development:\n\n1. Set `ENABLE_CRAFT=true` in your environment\n2. Ensure templates are available at `/templates/outputs` and `/templates/venv`\n3. For local development, sandboxes are created under `/tmp/onyx-sandboxes`\n\n## License\n\nMIT — see [LICENSE](../../../../LICENSE)\n"
  },
  {
    "path": "web/src/app/craft/components/BigButton.tsx",
    "content": "\"use client\";\n\nimport { forwardRef, type ButtonHTMLAttributes } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport interface BigButtonProps\n  extends ButtonHTMLAttributes<HTMLButtonElement> {\n  // Subvariants\n  primary?: boolean;\n  secondary?: boolean;\n\n  // Inverted mode for dark backgrounds\n  inverted?: boolean;\n}\n\nconst BigButton = forwardRef<HTMLButtonElement, BigButtonProps>(\n  (\n    { primary, secondary, inverted, disabled, children, className, ...props },\n    ref\n  ) => {\n    const subvariant = primary\n      ? \"primary\"\n      : secondary\n        ? \"secondary\"\n        : \"primary\";\n\n    const baseStyles =\n      \"px-6 py-3 rounded-xl w-fit flex flex-row items-center justify-center transition-colors\";\n\n    const variantStyles = {\n      primary: {\n        normal:\n          \"bg-theme-primary-05 hover:bg-theme-primary-04 active:bg-theme-primary-06\",\n        inverted: \"bg-white hover:bg-gray-200 active:bg-gray-300\",\n        disabled: \"bg-background-neutral-04\",\n      },\n      secondary: {\n        normal:\n          \"bg-transparent border border-border-01 hover:bg-background-tint-02 active:bg-background-tint-00\",\n        inverted:\n          \"bg-transparent border border-text-inverted-05 hover:bg-background-tint-inverted-02 active:bg-background-tint-inverted-01\",\n        disabled: \"bg-background-neutral-03 border border-border-01\",\n      },\n    };\n\n    const textStyles = {\n      primary: {\n        normal: \"text-text-inverted-05\",\n        inverted: \"text-gray-900\",\n        disabled: \"text-text-inverted-04\",\n      },\n      secondary: {\n        normal:\n          \"text-text-03 group-hover:text-text-04 group-active:text-text-05\",\n        inverted: \"text-text-inverted-05\",\n        disabled: \"text-text-01\",\n      },\n    };\n\n    const getVariantStyle = () => {\n      if (disabled) return variantStyles[subvariant].disabled;\n      return inverted\n        ? variantStyles[subvariant].inverted\n        : variantStyles[subvariant].normal;\n    };\n\n    const getTextStyle = () => {\n      if (disabled) return textStyles[subvariant].disabled;\n      return inverted\n        ? textStyles[subvariant].inverted\n        : textStyles[subvariant].normal;\n    };\n\n    // Check if className contains text color override\n    const hasTextWhiteOverride =\n      className?.includes(\"!text-white\") || className?.includes(\"text-white\");\n    const hasTextBlackOverride =\n      className?.includes(\"!text-black\") || className?.includes(\"text-black\");\n\n    const getTextOverride = () => {\n      if (hasTextWhiteOverride) return \"!text-white\";\n      if (hasTextBlackOverride) return \"!text-black\";\n      return getTextStyle();\n    };\n\n    return (\n      <button\n        ref={ref}\n        className={cn(\"group\", baseStyles, getVariantStyle(), className)}\n        disabled={disabled}\n        type=\"button\"\n        {...props}\n      >\n        <Text\n          mainContentEmphasis\n          className={cn(\"whitespace-nowrap\", getTextOverride())}\n          as=\"span\"\n        >\n          {children}\n        </Text>\n      </button>\n    );\n  }\n);\nBigButton.displayName = \"BigButton\";\n\nexport default BigButton;\n"
  },
  {
    "path": "web/src/app/craft/components/BuildLLMPopover.tsx",
    "content": "\"use client\";\n\nimport { useState, useCallback, useRef, useEffect, useMemo } from \"react\";\nimport {\n  SvgCheck,\n  SvgChevronDown,\n  SvgChevronRight,\n  SvgPlug,\n} from \"@opal/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport {\n  BuildLlmSelection,\n  BUILD_MODE_PROVIDERS,\n  isRecommendedModel,\n} from \"@/app/craft/onboarding/constants\";\nimport { ToggleWarningModal } from \"./ToggleWarningModal\";\nimport { getProviderIcon } from \"@/app/admin/configuration/llm/utils\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport {\n  Accordion,\n  AccordionContent,\n  AccordionItem,\n  AccordionTrigger,\n} from \"@/components/ui/accordion\";\n\ninterface BuildLLMPopoverProps {\n  currentSelection: BuildLlmSelection | null;\n  onSelectionChange: (selection: BuildLlmSelection) => void;\n  llmProviders: LLMProviderDescriptor[] | undefined;\n  onOpenOnboarding: (providerKey: string) => void;\n  children: React.ReactNode;\n  disabled?: boolean;\n}\n\ninterface ModelOption {\n  providerKey: string;\n  providerName: string;\n  providerDisplayName: string;\n  modelName: string;\n  displayName: string;\n  isRecommended: boolean;\n  isConfigured: boolean;\n}\n\nexport function BuildLLMPopover({\n  currentSelection,\n  onSelectionChange,\n  llmProviders,\n  onOpenOnboarding,\n  children,\n  disabled = false,\n}: BuildLLMPopoverProps) {\n  const [showRecommendedOnly, setShowRecommendedOnly] = useState(true);\n  const [showToggleWarning, setShowToggleWarning] = useState(false);\n  const [isOpen, setIsOpen] = useState(false);\n  const isClosingModalRef = useRef(false);\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n  const selectedItemRef = useRef<HTMLDivElement>(null);\n\n  // Check which providers are configured (exact match on provider field)\n  const isProviderConfigured = useCallback(\n    (providerKey: string) => {\n      return llmProviders?.some((p) => p.provider === providerKey);\n    },\n    [llmProviders]\n  );\n\n  // Get the actual provider descriptor for a configured provider\n  const getProviderDescriptor = useCallback(\n    (providerKey: string) => {\n      return llmProviders?.find((p) => p.provider === providerKey);\n    },\n    [llmProviders]\n  );\n\n  // Build model options based on mode\n  const modelOptions = useMemo((): ModelOption[] => {\n    const options: ModelOption[] = [];\n\n    if (showRecommendedOnly) {\n      // Show curated list from BUILD_MODE_PROVIDERS\n      BUILD_MODE_PROVIDERS.forEach((provider) => {\n        const isConfigured = isProviderConfigured(provider.providerName);\n        const descriptor = getProviderDescriptor(provider.providerName);\n        const modelsToShow = provider.models.filter((m) => m.recommended);\n\n        modelsToShow.forEach((model) => {\n          // Get display name from backend if available\n          const backendConfig = descriptor?.model_configurations.find(\n            (mc) => mc.name === model.name\n          );\n          options.push({\n            providerKey: provider.providerName,\n            providerName: descriptor?.name || provider.label,\n            providerDisplayName: provider.label,\n            modelName: model.name,\n            displayName: backendConfig?.display_name || model.label,\n            isRecommended: true,\n            isConfigured: isConfigured ?? false,\n          });\n        });\n      });\n    } else {\n      // Show ALL configured providers and their visible models\n      llmProviders?.forEach((provider) => {\n        const visibleModels = provider.model_configurations.filter(\n          (m) => m.is_visible\n        );\n\n        visibleModels.forEach((model) => {\n          options.push({\n            providerKey: provider.provider,\n            providerName: provider.name,\n            providerDisplayName:\n              provider.provider_display_name || provider.provider,\n            modelName: model.name,\n            displayName: model.display_name || model.name,\n            isRecommended: isRecommendedModel(provider.provider, model.name),\n            isConfigured: true,\n          });\n        });\n      });\n    }\n\n    return options;\n  }, [\n    showRecommendedOnly,\n    llmProviders,\n    isProviderConfigured,\n    getProviderDescriptor,\n  ]);\n\n  // Group options by provider\n  const groupedOptions = useMemo(() => {\n    const groups = new Map<\n      string,\n      {\n        providerKey: string;\n        displayName: string;\n        options: ModelOption[];\n        isConfigured: boolean;\n      }\n    >();\n\n    modelOptions.forEach((option) => {\n      const groupKey = option.providerKey;\n\n      if (!groups.has(groupKey)) {\n        groups.set(groupKey, {\n          providerKey: option.providerKey,\n          displayName: option.providerDisplayName,\n          options: [],\n          isConfigured: option.isConfigured,\n        });\n      }\n\n      groups.get(groupKey)!.options.push(option);\n    });\n\n    // Sort groups alphabetically\n    const sortedKeys = Array.from(groups.keys()).sort((a, b) =>\n      groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)\n    );\n\n    return sortedKeys.map((key) => groups.get(key)!);\n  }, [modelOptions]);\n\n  // Determine current group for auto-expand\n  const currentGroupKey = useMemo(() => {\n    if (!currentSelection) return \"\";\n    return currentSelection.provider;\n  }, [currentSelection]);\n\n  // Track expanded groups\n  const [expandedGroups, setExpandedGroups] = useState<string[]>([\n    currentGroupKey,\n  ]);\n\n  // Reset expanded groups when popover opens\n  useEffect(() => {\n    if (isOpen) {\n      setExpandedGroups([currentGroupKey]);\n    }\n  }, [isOpen, currentGroupKey]);\n\n  // Auto-scroll to selected model\n  useEffect(() => {\n    if (isOpen) {\n      const timer = setTimeout(() => {\n        selectedItemRef.current?.scrollIntoView({\n          behavior: \"instant\",\n          block: \"center\",\n        });\n      }, 50);\n      return () => clearTimeout(timer);\n    }\n  }, [isOpen]);\n\n  const handleAccordionChange = (value: string[]) => {\n    setExpandedGroups(value);\n  };\n\n  const applySelection = useCallback(\n    (option: ModelOption) => {\n      if (!option.isConfigured) return;\n\n      onSelectionChange({\n        providerName: option.providerName,\n        provider: option.providerKey,\n        modelName: option.modelName,\n      });\n      setIsOpen(false);\n    },\n    [onSelectionChange]\n  );\n\n  // Handle toggle change - show warning when turning OFF\n  const handleToggleChange = (checked: boolean) => {\n    if (!checked && showRecommendedOnly) {\n      setShowToggleWarning(true);\n    } else {\n      setShowRecommendedOnly(checked);\n    }\n  };\n\n  // Reset closing flag after modal close transition\n  useEffect(() => {\n    if (!showToggleWarning && isClosingModalRef.current) {\n      const timeoutId = setTimeout(() => {\n        isClosingModalRef.current = false;\n      }, 100);\n      return () => clearTimeout(timeoutId);\n    }\n  }, [showToggleWarning]);\n\n  const handleConnectClick = (providerKey: string) => {\n    setIsOpen(false);\n    onOpenOnboarding(providerKey);\n  };\n\n  const handlePopoverOpenChange = (open: boolean) => {\n    if (disabled && open) {\n      return;\n    }\n    if (!open && (showToggleWarning || isClosingModalRef.current)) {\n      return;\n    }\n    setIsOpen(open);\n  };\n\n  const renderModelItem = (option: ModelOption) => {\n    const isSelected =\n      currentSelection?.modelName === option.modelName &&\n      currentSelection?.provider === option.providerKey;\n\n    // Build description with recommendation badge\n    const description = option.isRecommended ? \"Recommended\" : undefined;\n\n    return (\n      <div\n        key={`${option.providerKey}-${option.modelName}`}\n        ref={isSelected ? selectedItemRef : undefined}\n      >\n        <LineItem\n          selected={isSelected}\n          description={description}\n          onClick={() => applySelection(option)}\n          rightChildren={\n            isSelected ? (\n              <SvgCheck className=\"h-4 w-4 stroke-action-link-05 shrink-0\" />\n            ) : null\n          }\n        >\n          {option.displayName}\n        </LineItem>\n      </div>\n    );\n  };\n\n  return (\n    <>\n      <Popover open={isOpen} onOpenChange={handlePopoverOpenChange}>\n        <Popover.Trigger asChild>{children}</Popover.Trigger>\n        <Popover.Content\n          side=\"bottom\"\n          align=\"start\"\n          width=\"lg\"\n          onInteractOutside={(e) => {\n            if (showToggleWarning || isClosingModalRef.current) {\n              e.preventDefault();\n            }\n          }}\n          onPointerDownOutside={(e) => {\n            if (showToggleWarning || isClosingModalRef.current) {\n              e.preventDefault();\n            }\n          }}\n        >\n          <div className=\"px-3\">\n            <Section gap={0.5}>\n              {/* Toggle for recommended only */}\n              <div className=\"flex items-center justify-between py-3 gap-3 border-b border-border-01 px-1\">\n                <Text secondaryBody text03>\n                  Recommended Models Only\n                </Text>\n                <Switch\n                  checked={showRecommendedOnly}\n                  onCheckedChange={handleToggleChange}\n                />\n              </div>\n\n              {/* Model List */}\n              <PopoverMenu scrollContainerRef={scrollContainerRef}>\n                {groupedOptions.length === 0\n                  ? [\n                      <div key=\"empty\" className=\"py-3 px-2\">\n                        <Text secondaryBody text03>\n                          No models found\n                        </Text>\n                      </div>,\n                    ]\n                  : groupedOptions.length === 1\n                    ? // Single provider - show models directly\n                      [\n                        <div\n                          key=\"single-provider\"\n                          className=\"flex flex-col gap-1\"\n                        >\n                          {groupedOptions[0]!.isConfigured ? (\n                            groupedOptions[0]!.options.map(renderModelItem)\n                          ) : (\n                            <div className=\"flex items-center justify-between px-2 py-2\">\n                              <Text secondaryBody text03>\n                                Not configured\n                              </Text>\n                              <button\n                                onClick={() =>\n                                  handleConnectClick(\n                                    groupedOptions[0]!.providerKey\n                                  )\n                                }\n                                className=\"flex items-center gap-1 px-2 py-1 text-xs rounded-08 bg-background-02 hover:bg-background-03 transition-colors\"\n                              >\n                                <SvgPlug className=\"w-3 h-3\" />\n                                <span>Connect</span>\n                              </button>\n                            </div>\n                          )}\n                        </div>,\n                      ]\n                    : // Multiple providers - show accordion\n                      [\n                        <Accordion\n                          key=\"accordion\"\n                          type=\"multiple\"\n                          value={expandedGroups}\n                          onValueChange={handleAccordionChange}\n                          className=\"w-full flex flex-col\"\n                        >\n                          {groupedOptions.map((group) => {\n                            const isExpanded = expandedGroups.includes(\n                              group.providerKey\n                            );\n                            const ProviderIcon = getProviderIcon(\n                              group.providerKey\n                            );\n\n                            return (\n                              <AccordionItem\n                                key={group.providerKey}\n                                value={group.providerKey}\n                                className=\"border-none pt-1\"\n                              >\n                                {/* Group Header */}\n                                <AccordionTrigger className=\"flex items-center rounded-08 hover:no-underline hover:bg-background-tint-02 group [&>svg]:hidden w-full py-1\">\n                                  <div className=\"flex items-center gap-1 shrink-0\">\n                                    <div className=\"flex items-center justify-center size-5 shrink-0\">\n                                      <ProviderIcon size={16} />\n                                    </div>\n                                    <Text\n                                      secondaryBody\n                                      text03\n                                      nowrap\n                                      className=\"px-0.5\"\n                                    >\n                                      {group.displayName}\n                                    </Text>\n                                  </div>\n                                  <div className=\"flex-1\" />\n                                  {!group.isConfigured && (\n                                    <button\n                                      onClick={(e) => {\n                                        e.stopPropagation();\n                                        handleConnectClick(group.providerKey);\n                                      }}\n                                      className=\"flex items-center gap-1 px-2 py-0.5 mr-1 text-xs rounded-08 bg-background-02 hover:bg-background-03 transition-colors\"\n                                    >\n                                      <SvgPlug className=\"w-3 h-3\" />\n                                      <span>Connect</span>\n                                    </button>\n                                  )}\n                                  <div className=\"flex items-center justify-center size-6 shrink-0\">\n                                    {isExpanded ? (\n                                      <SvgChevronDown className=\"h-4 w-4 stroke-text-04 shrink-0\" />\n                                    ) : (\n                                      <SvgChevronRight className=\"h-4 w-4 stroke-text-04 shrink-0\" />\n                                    )}\n                                  </div>\n                                </AccordionTrigger>\n\n                                {/* Model Items */}\n                                <AccordionContent className=\"pb-0 pt-0\">\n                                  <div className=\"flex flex-col gap-1\">\n                                    {group.isConfigured ? (\n                                      group.options.map(renderModelItem)\n                                    ) : (\n                                      <div className=\"py-1.5 px-3\">\n                                        <Text secondaryBody text03>\n                                          Not configured\n                                        </Text>\n                                      </div>\n                                    )}\n                                  </div>\n                                </AccordionContent>\n                              </AccordionItem>\n                            );\n                          })}\n                        </Accordion>,\n                      ]}\n              </PopoverMenu>\n            </Section>\n          </div>\n        </Popover.Content>\n      </Popover>\n\n      {/* Warning modal when turning OFF \"Recommended Models Only\" */}\n      <ToggleWarningModal\n        open={showToggleWarning}\n        onConfirm={() => {\n          setShowRecommendedOnly(false);\n          isClosingModalRef.current = true;\n          setShowToggleWarning(false);\n        }}\n        onCancel={() => {\n          isClosingModalRef.current = true;\n          setShowToggleWarning(false);\n        }}\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/BuildMessageList.tsx",
    "content": "\"use client\";\n\nimport { useRef, useEffect } from \"react\";\nimport Logo from \"@/refresh-components/Logo\";\nimport TextChunk from \"@/app/craft/components/TextChunk\";\nimport ThinkingCard from \"@/app/craft/components/ThinkingCard\";\nimport ToolCallPill from \"@/app/craft/components/ToolCallPill\";\nimport TodoListCard from \"@/app/craft/components/TodoListCard\";\nimport WorkingPill from \"@/app/craft/components/WorkingPill\";\nimport UserMessage from \"@/app/craft/components/UserMessage\";\nimport { BuildMessage } from \"@/app/craft/types/streamingTypes\";\nimport {\n  StreamItem,\n  GroupedStreamItem,\n  ToolCallState,\n} from \"@/app/craft/types/displayTypes\";\nimport { isWorkingToolCall } from \"@/app/craft/utils/streamItemHelpers\";\n\n/**\n * BlinkingDot - Pulsing gray circle for loading state\n * Matches the main chat UI's loading indicator\n */\nfunction BlinkingDot() {\n  return (\n    <span className=\"animate-pulse flex-none bg-theme-primary-05 inline-block rounded-full h-3 w-3 ml-2 mt-2\" />\n  );\n}\n\n/**\n * Group consecutive working tool calls into WorkingGroup items.\n * Keeps text, thinking, todo_list, and task tool_calls as individual items.\n */\nfunction groupStreamItems(items: StreamItem[]): GroupedStreamItem[] {\n  const grouped: GroupedStreamItem[] = [];\n  let currentWorkingGroup: ToolCallState[] = [];\n\n  const flushWorkingGroup = () => {\n    const firstToolCall = currentWorkingGroup[0];\n    if (firstToolCall) {\n      grouped.push({\n        type: \"working_group\",\n        id: `working-${firstToolCall.id}`,\n        toolCalls: [...currentWorkingGroup],\n      });\n      currentWorkingGroup = [];\n    }\n  };\n\n  for (const item of items) {\n    if (item.type === \"tool_call\" && isWorkingToolCall(item.toolCall)) {\n      // Add to current working group\n      currentWorkingGroup.push(item.toolCall);\n    } else {\n      // Flush any accumulated working group before adding non-working item\n      flushWorkingGroup();\n      // Add the item as-is (text, thinking, todo_list, or task tool_call)\n      grouped.push(item as GroupedStreamItem);\n    }\n  }\n\n  // Don't forget to flush any remaining working group\n  flushWorkingGroup();\n\n  return grouped;\n}\n\ninterface BuildMessageListProps {\n  messages: BuildMessage[];\n  streamItems: StreamItem[];\n  isStreaming?: boolean;\n  /** Whether auto-scroll is enabled (user is at bottom) */\n  autoScrollEnabled?: boolean;\n  /** Ref to the end marker div for scroll detection */\n  messagesEndRef?: React.RefObject<HTMLDivElement>;\n}\n\n/**\n * BuildMessageList - Displays the conversation history with FIFO rendering\n *\n * User messages are shown as right-aligned bubbles.\n * Agent responses render streamItems in exact chronological order:\n * text, thinking, and tool calls appear exactly as they arrived.\n */\nexport default function BuildMessageList({\n  messages,\n  streamItems,\n  isStreaming = false,\n  autoScrollEnabled = true,\n  messagesEndRef: externalMessagesEndRef,\n}: BuildMessageListProps) {\n  const internalMessagesEndRef = useRef<HTMLDivElement>(null);\n  // Use external ref if provided, otherwise use internal ref\n  const messagesEndRef = externalMessagesEndRef ?? internalMessagesEndRef;\n\n  // Auto-scroll to bottom when new content arrives (only if auto-scroll is enabled)\n  useEffect(() => {\n    if (autoScrollEnabled && messagesEndRef.current) {\n      messagesEndRef.current.scrollIntoView({ behavior: \"smooth\" });\n    }\n  }, [messages.length, streamItems.length, autoScrollEnabled, messagesEndRef]);\n\n  // Determine if we should show streaming response area (for current in-progress response)\n  const hasStreamItems = streamItems.length > 0;\n  const lastMessage = messages[messages.length - 1];\n  const lastMessageIsUser = lastMessage?.type === \"user\";\n  // Show streaming area if we have stream items OR if we're waiting for a response to the latest user message\n  const showStreamingArea =\n    hasStreamItems || (isStreaming && lastMessageIsUser);\n\n  // Check for active tools (for \"Working...\" state)\n  const hasActiveTools = streamItems.some(\n    (item) =>\n      item.type === \"tool_call\" &&\n      (item.toolCall.status === \"in_progress\" ||\n        item.toolCall.status === \"pending\")\n  );\n\n  // Helper to render stream items with grouping (used for both saved messages and current streaming)\n  const renderStreamItems = (items: StreamItem[], isCurrentStream = false) => {\n    const grouped = groupStreamItems(items);\n\n    // Find the index of the last working_group (only relevant for current stream)\n    const lastWorkingGroupIndex = isCurrentStream\n      ? grouped.findLastIndex((item) => item.type === \"working_group\")\n      : -1;\n\n    return grouped.map((item, index) => {\n      switch (item.type) {\n        case \"text\":\n          return <TextChunk key={item.id} content={item.content} />;\n        case \"thinking\":\n          return (\n            <ThinkingCard\n              key={item.id}\n              content={item.content}\n              isStreaming={item.isStreaming}\n            />\n          );\n        case \"tool_call\":\n          // Only task/subagent tools reach here (non-working tools)\n          return <ToolCallPill key={item.id} toolCall={item.toolCall} />;\n        case \"todo_list\":\n          return (\n            <TodoListCard\n              key={item.id}\n              todoList={item.todoList}\n              defaultOpen={item.todoList.isOpen}\n            />\n          );\n        case \"working_group\":\n          return (\n            <WorkingPill\n              key={item.id}\n              toolCalls={item.toolCalls}\n              isLatest={index === lastWorkingGroupIndex}\n            />\n          );\n        default:\n          return null;\n      }\n    });\n  };\n\n  // Helper to render an agent message\n  const renderAgentMessage = (message: BuildMessage) => {\n    // Check if we have saved stream items in message_metadata\n    const savedStreamItems = message.message_metadata?.streamItems as\n      | StreamItem[]\n      | undefined;\n\n    return (\n      <div key={message.id} className=\"flex items-start gap-3 py-4\">\n        <div className=\"shrink-0 mt-0.5\">\n          <Logo folded size={24} />\n        </div>\n        <div className=\"flex-1 flex flex-col gap-3 min-w-0\">\n          {savedStreamItems && savedStreamItems.length > 0 ? (\n            // Render full stream items (includes tool calls, thinking, etc.)\n            renderStreamItems(savedStreamItems)\n          ) : (\n            // Fallback to text content only\n            <TextChunk content={message.content} />\n          )}\n        </div>\n      </div>\n    );\n  };\n\n  return (\n    <div className=\"flex flex-col items-center px-4 pb-4\">\n      <div className=\"w-full max-w-2xl backdrop-blur-md rounded-16 p-4\">\n        {/* Render messages in order (user and agent interleaved) */}\n        {messages.map((message) =>\n          message.type === \"user\" ? (\n            <UserMessage key={message.id} content={message.content} />\n          ) : message.type === \"assistant\" ? (\n            renderAgentMessage(message)\n          ) : null\n        )}\n\n        {/* Render current streaming response (for in-progress response) */}\n        {showStreamingArea && (\n          <div className=\"flex items-start gap-3 py-4\">\n            <div className=\"shrink-0 mt-0.5\">\n              <Logo folded size={24} />\n            </div>\n            <div className=\"flex-1 flex flex-col gap-3 min-w-0\">\n              {!hasStreamItems ? (\n                // Loading state - no content yet, show blinking dot like main chat\n                <BlinkingDot />\n              ) : (\n                <>\n                  {/* Render stream items in FIFO order */}\n                  {renderStreamItems(streamItems, true)}\n\n                  {/* Streaming indicator when actively streaming text */}\n                  {isStreaming && hasStreamItems && !hasActiveTools && (\n                    <BlinkingDot />\n                  )}\n                </>\n              )}\n            </div>\n          </div>\n        )}\n\n        {/* Scroll anchor */}\n        <div ref={messagesEndRef} />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/BuildWelcome.tsx",
    "content": "\"use client\";\n\nimport { useRef } from \"react\";\nimport { BuildFile } from \"@/app/craft/contexts/UploadFilesContext\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Logo from \"@/refresh-components/Logo\";\nimport InputBar, { InputBarHandle } from \"@/app/craft/components/InputBar\";\nimport SuggestedPrompts from \"@/app/craft/components/SuggestedPrompts\";\nimport ConnectDataBanner from \"@/app/craft/components/ConnectDataBanner\";\nimport { getBuildUserPersona } from \"@/app/craft/onboarding/constants\";\nimport { workAreaToPersona } from \"@/app/craft/constants/exampleBuildPrompts\";\n\ninterface BuildWelcomeProps {\n  onSubmit: (\n    message: string,\n    files: BuildFile[],\n    demoDataEnabled: boolean\n  ) => void;\n  isRunning: boolean;\n  /** When true, shows spinner on send button with \"Initializing sandbox...\" tooltip */\n  sandboxInitializing?: boolean;\n}\n\n/**\n * BuildWelcome - Welcome screen shown when no session exists\n *\n * Displays a centered welcome message and input bar to start a new build.\n */\nexport default function BuildWelcome({\n  onSubmit,\n  isRunning,\n  sandboxInitializing = false,\n}: BuildWelcomeProps) {\n  const inputBarRef = useRef<InputBarHandle>(null);\n  const userPersona = getBuildUserPersona();\n  const persona = workAreaToPersona(userPersona?.workArea);\n\n  const handlePromptClick = (promptText: string) => {\n    inputBarRef.current?.setMessage(promptText);\n  };\n\n  return (\n    <div className=\"h-full flex flex-col items-center justify-center px-4\">\n      <div className=\"flex flex-col items-center gap-4 mb-6\">\n        <Logo folded size={48} />\n        <Text headingH2 text05>\n          What shall we craft today?\n        </Text>\n      </div>\n      <div className=\"w-full max-w-2xl\">\n        <InputBar\n          ref={inputBarRef}\n          onSubmit={onSubmit}\n          isRunning={isRunning}\n          placeholder=\"Analyze my data and create a dashboard...\"\n          sandboxInitializing={sandboxInitializing}\n          isWelcomePage\n        />\n        <ConnectDataBanner />\n        <SuggestedPrompts persona={persona} onPromptClick={handlePromptClick} />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ChatPanel.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useState, useEffect, useRef, useMemo } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { track, AnalyticsEvent } from \"@/lib/analytics\";\nimport {\n  useSession,\n  useSessionId,\n  useHasSession,\n  useIsRunning,\n  useOutputPanelOpen,\n  useToggleOutputPanel,\n  useBuildSessionStore,\n  useIsPreProvisioning,\n  useIsPreProvisioningFailed,\n  usePreProvisionedSessionId,\n  useFollowupSuggestions,\n  useSuggestionsLoading,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { useBuildStreaming } from \"@/app/craft/hooks/useBuildStreaming\";\nimport { useUsageLimits } from \"@/app/craft/hooks/useUsageLimits\";\nimport { SessionErrorCode } from \"@/app/craft/types/streamingTypes\";\nimport {\n  BuildFile,\n  UploadFileStatus,\n  useUploadFilesContext,\n} from \"@/app/craft/contexts/UploadFilesContext\";\nimport { CRAFT_SEARCH_PARAM_NAMES } from \"@/app/craft/services/searchParams\";\nimport { CRAFT_PATH } from \"@/app/craft/v1/constants\";\nimport { toast } from \"@/hooks/useToast\";\nimport InputBar, { InputBarHandle } from \"@/app/craft/components/InputBar\";\nimport BuildWelcome from \"@/app/craft/components/BuildWelcome\";\nimport BuildMessageList from \"@/app/craft/components/BuildMessageList\";\nimport SuggestionBubbles from \"@/app/craft/components/SuggestionBubbles\";\nimport ConnectorBannersRow from \"@/app/craft/components/ConnectorBannersRow\";\nimport SandboxStatusIndicator from \"@/app/craft/components/SandboxStatusIndicator\";\nimport UpgradePlanModal from \"@/app/craft/components/UpgradePlanModal\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { SvgSidebar, SvgChevronDown } from \"@opal/icons\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { useBuildContext } from \"@/app/craft/contexts/BuildContext\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\nimport { cn } from \"@/lib/utils\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\n\ninterface BuildChatPanelProps {\n  /** Session ID from URL - used to prevent welcome flash while loading */\n  existingSessionId?: string | null;\n}\n\n/**\n * BuildChatPanel - Center panel containing the chat interface\n *\n * Handles:\n * - Welcome state (no session)\n * - Message list (when session exists)\n * - Input bar at bottom\n * - Header with output panel toggle\n */\nexport default function BuildChatPanel({\n  existingSessionId,\n}: BuildChatPanelProps) {\n  const router = useRouter();\n  const outputPanelOpen = useOutputPanelOpen();\n  const session = useSession();\n  const sessionId = useSessionId();\n  const hasSession = useHasSession();\n  const isRunning = useIsRunning();\n  const { setLeftSidebarFolded, leftSidebarFolded } = useBuildContext();\n  const { isMobile } = useScreenSize();\n  const toggleOutputPanel = useToggleOutputPanel();\n\n  // Track when output panel is fully closed (after animation completes)\n  // This prevents the \"open panel\" button from appearing during the close animation\n  const [isOutputPanelFullyClosed, setIsOutputPanelFullyClosed] =\n    useState(!outputPanelOpen);\n\n  const { limits, refreshLimits } = useUsageLimits();\n  const [showUpgradeModal, setShowUpgradeModal] = useState(false);\n  const setCurrentError = useBuildSessionStore(\n    (state) => state.setCurrentError\n  );\n\n  useEffect(() => {\n    if (session?.error === SessionErrorCode.RATE_LIMIT_EXCEEDED) {\n      setShowUpgradeModal(true);\n      setCurrentError(null);\n      refreshLimits();\n    }\n  }, [session?.error, refreshLimits, setCurrentError]);\n\n  useEffect(() => {\n    if (outputPanelOpen) {\n      // Panel opening - immediately mark as not fully closed\n      setIsOutputPanelFullyClosed(false);\n    } else {\n      // Panel closing - wait for 300ms animation to complete\n      const timer = setTimeout(() => setIsOutputPanelFullyClosed(true), 300);\n      return () => clearTimeout(timer);\n    }\n  }, [outputPanelOpen]);\n\n  // Access actions directly like chat does - these don't cause re-renders\n  const consumePreProvisionedSession = useBuildSessionStore(\n    (state) => state.consumePreProvisionedSession\n  );\n  const createSession = useBuildSessionStore((state) => state.createSession);\n  const appendMessageToCurrent = useBuildSessionStore(\n    (state) => state.appendMessageToCurrent\n  );\n  const nameBuildSession = useBuildSessionStore(\n    (state) => state.nameBuildSession\n  );\n  const { streamMessage } = useBuildStreaming();\n  const isPreProvisioning = useIsPreProvisioning();\n  const isPreProvisioningFailed = useIsPreProvisioningFailed();\n  const preProvisionedSessionId = usePreProvisionedSessionId();\n\n  // Disable input when pre-provisioning is in progress or failed (waiting for retry)\n  const sandboxNotReady = isPreProvisioning || isPreProvisioningFailed;\n  const { currentMessageFiles, hasUploadingFiles, setActiveSession } =\n    useUploadFilesContext();\n  const followupSuggestions = useFollowupSuggestions();\n  const suggestionsLoading = useSuggestionsLoading();\n  const clearFollowupSuggestions = useBuildSessionStore(\n    (state) => state.clearFollowupSuggestions\n  );\n\n  // Ref to access current file state in async callbacks\n  const currentFilesRef = useRef(currentMessageFiles);\n  useEffect(() => {\n    currentFilesRef.current = currentMessageFiles;\n  }, [currentMessageFiles]);\n\n  /**\n   * Keep the upload context in sync with the active session.\n   * The context handles all session change logic internally (fetching attachments,\n   * clearing files, auto-uploading pending files).\n   */\n  useEffect(() => {\n    const activeSession = existingSessionId ?? preProvisionedSessionId ?? null;\n    setActiveSession(activeSession);\n  }, [existingSessionId, preProvisionedSessionId, setActiveSession]);\n\n  // Ref to access InputBar methods\n  const inputBarRef = useRef<InputBarHandle>(null);\n\n  // Scroll detection for auto-scroll \"magnet\"\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n  const [isAtBottom, setIsAtBottom] = useState(true);\n  const [showScrollButton, setShowScrollButton] = useState(false);\n  const prevScrollTopRef = useRef(0);\n\n  // Check if user is at bottom of scroll container\n  const checkIfAtBottom = useCallback(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return true;\n\n    const scrollTop = container.scrollTop;\n    const scrollHeight = container.scrollHeight;\n    const clientHeight = container.clientHeight;\n    const distanceFromBottom = scrollHeight - scrollTop - clientHeight;\n    const threshold = 32; // 2rem threshold\n\n    return distanceFromBottom <= threshold;\n  }, []);\n\n  // Handle scroll events - only update state on user-initiated scrolling\n  const handleScroll = useCallback(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return;\n\n    const currentScrollTop = container.scrollTop;\n    const prevScrollTop = prevScrollTopRef.current;\n    const wasAtBottom = checkIfAtBottom();\n\n    // Detect if user scrolled up (scrollTop decreased)\n    // This distinguishes user scrolling from content growth\n    const scrolledUp = currentScrollTop < prevScrollTop - 5; // 5px threshold\n\n    // Only update state if user scrolled up (definitely user action)\n    // If content grows and we're still at bottom, don't change state\n    if (scrolledUp) {\n      // User scrolled up - release auto-scroll magnet\n      setIsAtBottom(wasAtBottom);\n      setShowScrollButton(!wasAtBottom);\n    } else if (wasAtBottom) {\n      // We're at bottom - ensure button stays hidden (handles content growth)\n      setIsAtBottom(true);\n      setShowScrollButton(false);\n    }\n    // If scrollTop increased but we're still at bottom, it's content growth - do nothing\n\n    prevScrollTopRef.current = currentScrollTop;\n  }, [checkIfAtBottom]);\n\n  // Scroll to bottom and resume auto-scroll\n  const scrollToBottom = useCallback(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return;\n\n    // Use requestAnimationFrame to ensure we scroll after any layout changes\n    requestAnimationFrame(() => {\n      if (!container) return;\n\n      // Scroll to a value larger than scrollHeight - browsers will clamp to max\n      // This ensures we always reach the absolute bottom\n      const targetScroll = container.scrollHeight + 1000; // Add buffer to ensure we go all the way\n      container.scrollTo({ top: targetScroll, behavior: \"smooth\" });\n\n      // Update state immediately\n      setIsAtBottom(true);\n      setShowScrollButton(false);\n\n      // Update prevScrollTopRef after scroll completes\n      setTimeout(() => {\n        if (container) {\n          prevScrollTopRef.current = container.scrollTop;\n        }\n      }, 600); // Smooth scroll animation duration\n    });\n  }, []);\n\n  // Reset scroll state when session changes\n  useEffect(() => {\n    setIsAtBottom(true);\n    setShowScrollButton(false);\n  }, [sessionId]);\n\n  // Handle suggestion bubble click - populate InputBar with the suggestion\n  const handleSuggestionSelect = useCallback((text: string) => {\n    inputBarRef.current?.setMessage(text);\n  }, []);\n\n  // Check if agent has finished streaming at least one message\n  // Show banner only after first agent message completes streaming\n  const shouldShowConnectorBanner = useMemo(() => {\n    // Don't show if currently streaming\n    if (isRunning) {\n      return false;\n    }\n    // Check if there's at least one agent message in the session\n    const hasAgentMessage = session?.messages?.some(\n      (msg) => msg.type === \"assistant\"\n    );\n    return hasAgentMessage ?? false;\n  }, [isRunning, session?.messages]);\n\n  const handleSubmit = useCallback(\n    async (message: string, files: BuildFile[], demoDataEnabled: boolean) => {\n      if (limits?.isLimited) {\n        setShowUpgradeModal(true);\n        return;\n      }\n\n      track(AnalyticsEvent.SENT_CRAFT_MESSAGE);\n\n      if (hasSession && sessionId) {\n        // Existing session flow\n        // Check if response is still streaming - show toast like main chat does\n        if (isRunning) {\n          toast.error(\"Please wait for the current operation to complete.\");\n          return;\n        }\n\n        // Clear follow-up suggestions when user sends a new message\n        clearFollowupSuggestions(sessionId);\n\n        // Add user message to state\n        appendMessageToCurrent({\n          id: `msg-${Date.now()}`,\n          type: \"user\",\n          content: message,\n          timestamp: new Date(),\n        });\n        // Stream the response\n        await streamMessage(sessionId, message);\n        refreshLimits();\n      } else {\n        // New session flow - ALWAYS use pre-provisioned session\n        const newSessionId = await consumePreProvisionedSession();\n\n        if (!newSessionId) {\n          // This should not happen if UI properly disables input until ready\n          console.error(\"[ChatPanel] No pre-provisioned session available\");\n          toast.error(\"Please wait for sandbox to initialize\");\n          return;\n        }\n\n        // Pre-provisioned session flow:\n        // The backend session already exists (created during pre-provisioning).\n        // Files were already uploaded immediately when attached to the pre-provisioned session.\n        // Here we initialize the LOCAL Zustand store entry with the right state.\n        const userMessage = {\n          id: `msg-${Date.now()}`,\n          type: \"user\" as const,\n          content: message,\n          timestamp: new Date(),\n        };\n        // Initialize local state (NOT an API call - backend session already exists)\n        // - status: \"running\" disables input immediately\n        // - isLoaded: false allows loadSession to fetch sandbox info while preserving messages\n        createSession(newSessionId, {\n          messages: [userMessage],\n          status: \"running\",\n        });\n\n        // Handle files that weren't successfully uploaded yet\n        // This handles edge cases where:\n        // 1. File is still uploading when user sends message - wait for it\n        // 2. File upload failed and needs retry\n        // 3. File was attached but upload hasn't started yet\n\n        // Wait for any in-flight uploads to complete (max 5 seconds)\n        // Use ref to check current state during polling\n        if (hasUploadingFiles) {\n          const maxWaitMs = 5000;\n          const checkIntervalMs = 100;\n          let waited = 0;\n\n          await new Promise<void>((resolve) => {\n            const checkUploads = () => {\n              // Check current state via ref (updates with each render)\n              const stillUploading = currentFilesRef.current.some(\n                (f) => f.status === UploadFileStatus.UPLOADING\n              );\n              if (!stillUploading || waited >= maxWaitMs) {\n                resolve();\n              } else {\n                waited += checkIntervalMs;\n                setTimeout(checkUploads, checkIntervalMs);\n              }\n            };\n            checkUploads();\n          });\n        }\n\n        // Note: PENDING files are auto-uploaded by the context when session becomes available\n\n        // Navigate to URL - session controller will set currentSessionId\n        router.push(\n          `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${newSessionId}`\n        );\n\n        // Schedule naming after delay (message will be saved by then)\n        // Note: Don't call refreshSessionHistory() here - it would overwrite the\n        // optimistic update from consumePreProvisionedSession() before the message is saved\n        setTimeout(() => nameBuildSession(newSessionId), 1000);\n\n        // Stream the response (uses session ID directly, not currentSessionId)\n        await streamMessage(newSessionId, message);\n        refreshLimits();\n      }\n    },\n    [\n      hasSession,\n      sessionId,\n      isRunning,\n      appendMessageToCurrent,\n      streamMessage,\n      consumePreProvisionedSession,\n      createSession,\n      nameBuildSession,\n      router,\n      clearFollowupSuggestions,\n      hasUploadingFiles,\n      limits,\n      refreshLimits,\n    ]\n  );\n\n  return (\n    <div className=\"h-full w-full\">\n      <UpgradePlanModal\n        open={showUpgradeModal}\n        onClose={() => setShowUpgradeModal(false)}\n        limits={limits}\n      />\n      {/* Content wrapper - shrinks when output panel opens */}\n      <div\n        className={cn(\n          \"flex flex-col h-full transition-all duration-300 ease-in-out\",\n          outputPanelOpen ? \"w-1/2 pl-4\" : \"w-full\"\n        )}\n      >\n        {/* Chat header */}\n        <div className=\"flex flex-row items-center justify-between pl-4 pr-4 py-3 relative overflow-visible\">\n          <div className=\"flex flex-row items-center gap-2 max-w-[75%]\">\n            {/* Mobile sidebar toggle - only show on mobile when sidebar is folded */}\n            {isMobile && leftSidebarFolded && (\n              <OpalButton\n                icon={SvgSidebar}\n                onClick={() => setLeftSidebarFolded(false)}\n                prominence=\"tertiary\"\n                size=\"sm\"\n              />\n            )}\n            <SandboxStatusIndicator />\n          </div>\n          {/* Output panel toggle - only show when panel is fully closed (after animation) */}\n          {isOutputPanelFullyClosed && (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <IconButton\n              icon={SvgSidebar}\n              onClick={toggleOutputPanel}\n              tooltip=\"Open output panel\"\n              tertiary\n              className=\"!bg-background-tint-00 border rounded-full\"\n              iconClassName=\"!stroke-text-04\"\n            />\n          )}\n          {/* Soft fade border at bottom */}\n          <div className=\"absolute bottom-0 left-0 right-0 h-10 bg-gradient-to-b from-background-neutral-01 to-transparent pointer-events-none translate-y-full z-10\" />\n        </div>\n\n        {/* Main content area */}\n        <div\n          ref={scrollContainerRef}\n          onScroll={handleScroll}\n          className=\"flex-1 overflow-auto\"\n        >\n          {!hasSession && !existingSessionId ? (\n            <BuildWelcome\n              onSubmit={handleSubmit}\n              isRunning={isRunning}\n              sandboxInitializing={sandboxNotReady}\n            />\n          ) : (\n            <BuildMessageList\n              messages={session?.messages ?? []}\n              streamItems={session?.streamItems ?? []}\n              isStreaming={isRunning}\n              autoScrollEnabled={isAtBottom}\n            />\n          )}\n        </div>\n\n        {/* Input bar at bottom when session exists */}\n        {(hasSession || existingSessionId) && (\n          <div className=\"px-4 pb-8 pt-4 relative\">\n            {/* Soft fade border at top */}\n            <div className=\"absolute top-0 left-0 right-0 h-12 bg-gradient-to-t from-background-neutral-01 to-transparent pointer-events-none -translate-y-full\" />\n            <div className=\"max-w-2xl mx-auto\">\n              {/* Scroll to bottom button - shown when user has scrolled away */}\n              {showScrollButton && (\n                <div className=\"absolute -top-12 left-1/2 -translate-x-1/2 z-10\">\n                  <SimpleTooltip tooltip=\"Scroll to bottom\" delayDuration={200}>\n                    <button\n                      onClick={scrollToBottom}\n                      className={cn(\n                        \"flex items-center justify-center\",\n                        \"w-8 h-8 rounded-full\",\n                        \"bg-background-neutral-inverted-00 border border-border-01\",\n                        \"shadow-01 hover:shadow-02\",\n                        \"transition-all duration-200\",\n                        \"hover:bg-background-tint-inverted-01\"\n                      )}\n                      aria-label=\"Scroll to bottom\"\n                    >\n                      <SvgChevronDown\n                        size={20}\n                        className=\"stroke-background-neutral-00\"\n                      />\n                    </button>\n                  </SimpleTooltip>\n                </div>\n              )}\n              {/* Follow-up suggestion bubbles - show after first agent message */}\n              {(followupSuggestions || suggestionsLoading) && (\n                <div className=\"mb-3\">\n                  <SuggestionBubbles\n                    suggestions={followupSuggestions ?? []}\n                    loading={suggestionsLoading}\n                    onSelect={handleSuggestionSelect}\n                  />\n                </div>\n              )}\n              {/* Connector banners - show after first agent message finishes streaming */}\n              {shouldShowConnectorBanner && (\n                <ConnectorBannersRow className=\"\" />\n              )}\n              <InputBar\n                ref={inputBarRef}\n                onSubmit={handleSubmit}\n                isRunning={isRunning}\n                placeholder=\"Continue the conversation...\"\n              />\n            </div>\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ConnectDataBanner.tsx",
    "content": "\"use client\";\n\nimport { useRouter } from \"next/navigation\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  ConfluenceIcon,\n  GoogleDriveIcon,\n  GithubIcon,\n  NotionIcon,\n  ColorSlackIcon,\n  HubSpotIcon,\n} from \"@/components/icons/icons\";\nimport { SvgChevronRight } from \"@opal/icons\";\nimport { useBuildConnectors } from \"@/app/craft/hooks/useBuildConnectors\";\nimport { CRAFT_CONFIGURE_PATH } from \"@/app/craft/v1/constants\";\n\ninterface ConnectDataBannerProps {\n  className?: string;\n}\n\nfunction IconWrapper({ children }: { children: React.ReactNode }) {\n  return (\n    <div className=\"w-6 h-6 rounded-full bg-background-neutral-00 border border-border-01 flex items-center justify-center overflow-hidden\">\n      {children}\n    </div>\n  );\n}\n\nexport default function ConnectDataBanner({\n  className,\n}: ConnectDataBannerProps) {\n  const router = useRouter();\n  const { hasConnectorEverSucceeded, isLoading } = useBuildConnectors();\n\n  const handleClick = () => {\n    router.push(CRAFT_CONFIGURE_PATH);\n  };\n\n  // Only show banner if user hasn't successfully synced any connectors (and not loading)\n  if (isLoading || hasConnectorEverSucceeded) {\n    return null;\n  }\n\n  return (\n    <div className=\"relative\">\n      <button\n        onClick={handleClick}\n        className={cn(\n          // Layout\n          \"flex items-center justify-between gap-2\",\n          \"mx-auto px-4 py-2\",\n          // Sizing - thin and full width to match InputBar\n          \"h-9 w-[50%]\",\n          // Appearance - slightly different color, rounded bottom\n          \"bg-background-neutral-01 hover:bg-background-neutral-02\",\n          \"rounded-b-12 rounded-t-none\",\n          // Border for definition\n          \"border border-t-0 border-border-01\",\n          // Transition\n          \"transition-colors duration-200\",\n          // Cursor\n          \"cursor-pointer\",\n          // Group for hover effects\n          \"group\",\n          className\n        )}\n      >\n        {/* Left side: 3 icons */}\n        <div className=\"flex items-center -space-x-2\">\n          {/* Outermost - no movement */}\n          <div>\n            <IconWrapper>\n              <ColorSlackIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Middle - slight movement */}\n          <div className=\"transition-transform duration-200 group-hover:translate-x-2\">\n            <IconWrapper>\n              <GoogleDriveIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Innermost - moves towards center */}\n          <div className=\"transition-transform duration-200 group-hover:translate-x-4\">\n            <IconWrapper>\n              <ConfluenceIcon size={16} />\n            </IconWrapper>\n          </div>\n        </div>\n\n        {/* Center: Text and Arrow */}\n        <div className=\"flex items-center justify-center gap-1\">\n          <Text secondaryBody text03>\n            Connect your data\n          </Text>\n          <SvgChevronRight className=\"h-4 w-4 text-text-03\" />\n        </div>\n\n        {/* Right side: 3 icons */}\n        <div className=\"flex items-center -space-x-2\">\n          {/* Innermost - moves towards center */}\n          <div className=\"transition-transform duration-200 group-hover:-translate-x-4\">\n            <IconWrapper>\n              <GithubIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Middle - slight movement */}\n          <div className=\"transition-transform duration-200 group-hover:-translate-x-2\">\n            <IconWrapper>\n              <NotionIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Outermost - no movement */}\n          <div>\n            <IconWrapper>\n              <HubSpotIcon size={16} />\n            </IconWrapper>\n          </div>\n        </div>\n      </button>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ConnectorBannersRow.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  ConfluenceIcon,\n  GoogleDriveIcon,\n  GithubIcon,\n  NotionIcon,\n  ColorSlackIcon,\n  HubSpotIcon,\n} from \"@/components/icons/icons\";\nimport { SvgChevronRight, SvgCalendar } from \"@opal/icons\";\nimport { useBuildConnectors } from \"@/app/craft/hooks/useBuildConnectors\";\nimport {\n  CRAFT_CONFIGURE_PATH,\n  ONYX_CRAFT_CALENDAR_URL,\n} from \"@/app/craft/v1/constants\";\n\ninterface ConnectorBannersRowProps {\n  className?: string;\n}\n\nfunction IconWrapper({ children }: { children: React.ReactNode }) {\n  return (\n    <div className=\"w-6 h-6 rounded-full bg-background-neutral-00 border border-border-01 flex items-center justify-center overflow-hidden\">\n      {children}\n    </div>\n  );\n}\n\n/**\n * Row of two banners that appear above the InputBar after first agent response.\n * - Left: \"Connect your data\" - exact same look as welcome page banner, but flipped\n * - Right: \"Get help setting up connectors\" - links to cal.com booking\n *\n * Only shows if user has no connectors configured.\n * Slides up from the input bar with animation.\n */\nexport default function ConnectorBannersRow({\n  className,\n}: ConnectorBannersRowProps) {\n  const { hasConnectorEverSucceeded } = useBuildConnectors();\n\n  // Hide if user has successfully synced at least one connector\n  if (hasConnectorEverSucceeded) {\n    return null;\n  }\n\n  const handleConnectClick = () => {\n    window.location.href = CRAFT_CONFIGURE_PATH;\n  };\n\n  const handleHelpClick = () => {\n    window.open(ONYX_CRAFT_CALENDAR_URL, \"_blank\");\n  };\n\n  return (\n    <div\n      className={cn(\n        \"flex justify-center animate-in slide-in-from-bottom-2 fade-in duration-300\",\n        className\n      )}\n    >\n      {/* Left banner: Connect your data - exact same as welcome page but flipped */}\n      <button\n        onClick={handleConnectClick}\n        className={cn(\n          // Layout\n          \"flex items-center justify-between gap-2\",\n          \"px-4 py-2\",\n          // Sizing - thin and slightly narrower than 50% width\n          \"h-9 w-[calc(48%-4px)]\",\n          // Appearance - rounded top left only\n          \"bg-background-neutral-01 hover:bg-background-neutral-02\",\n          \"rounded-tl-12 rounded-tr-none rounded-bl-none rounded-br-none\",\n          // Border - flipped: no bottom border instead of no top\n          \"border border-b-0 border-border-01\",\n          // Transition\n          \"transition-colors duration-200\",\n          // Cursor\n          \"cursor-pointer\",\n          // Group for hover effects\n          \"group\"\n        )}\n      >\n        {/* Left side: 3 icons */}\n        <div className=\"flex items-center -space-x-2\">\n          {/* Outermost - no movement */}\n          <div>\n            <IconWrapper>\n              <ColorSlackIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Middle - slight movement */}\n          <div className=\"transition-transform duration-200 group-hover:translate-x-2\">\n            <IconWrapper>\n              <GoogleDriveIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Innermost - moves towards center */}\n          <div className=\"transition-transform duration-200 group-hover:translate-x-4\">\n            <IconWrapper>\n              <ConfluenceIcon size={16} />\n            </IconWrapper>\n          </div>\n        </div>\n\n        {/* Center: Text and Arrow */}\n        <div className=\"flex items-center justify-center gap-1\">\n          <Text secondaryBody text03>\n            Connect your data\n          </Text>\n          <SvgChevronRight className=\"h-4 w-4 text-text-03\" />\n        </div>\n\n        {/* Right side: 3 icons */}\n        <div className=\"flex items-center -space-x-2\">\n          {/* Innermost - moves towards center */}\n          <div className=\"transition-transform duration-200 group-hover:-translate-x-4\">\n            <IconWrapper>\n              <GithubIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Middle - slight movement */}\n          <div className=\"transition-transform duration-200 group-hover:-translate-x-2\">\n            <IconWrapper>\n              <NotionIcon size={16} />\n            </IconWrapper>\n          </div>\n          {/* Outermost - no movement */}\n          <div>\n            <IconWrapper>\n              <HubSpotIcon size={16} />\n            </IconWrapper>\n          </div>\n        </div>\n      </button>\n\n      {/* Right banner: Get help setting up connectors */}\n      <button\n        onClick={handleHelpClick}\n        className={cn(\n          // Layout\n          \"flex items-center justify-center gap-2\",\n          \"px-4 py-2\",\n          // Sizing - same as left banner\n          \"h-9 w-[calc(49%)]\",\n          // Appearance - rounded top right only\n          \"bg-background-neutral-01 hover:bg-background-neutral-02\",\n          \"rounded-tr-12 rounded-tl-none rounded-bl-none rounded-br-none\",\n          // Border - flipped: no bottom border\n          \"border border-b-0 border-border-01\",\n          // Transition\n          \"transition-colors duration-200\",\n          // Cursor\n          \"cursor-pointer\"\n        )}\n      >\n        {/* Calendar icon */}\n        <SvgCalendar className=\"h-4 w-4 text-text-03\" />\n\n        {/* Text */}\n        <Text secondaryBody text03>\n          Get help setting up connectors\n        </Text>\n\n        {/* Arrow indicator */}\n        <SvgChevronRight className=\"h-4 w-4 text-text-03\" />\n      </button>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/CraftingLoader.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef } from \"react\";\n\nconst messages = [\n  \"Punching wood...\",\n  \"Gathering resources...\",\n  \"Placing blocks...\",\n  \"Crafting your workspace...\",\n  \"Mining for dependencies...\",\n  \"Smelting the code...\",\n  \"Enchanting with magic...\",\n  \"World generation complete...\",\n  \"/gamemode 1\",\n];\n\nconst MESSAGE_COUNT = messages.length;\nconst TYPE_DELAY = 40;\nconst LINE_PAUSE = 800;\nconst RESET_DELAY = 2000;\n\nexport default function CraftingLoader() {\n  const [display, setDisplay] = useState({\n    lines: [] as string[],\n    currentText: \"\",\n  });\n\n  const lineIndexRef = useRef(0);\n  const charIndexRef = useRef(0);\n  const lastUpdateRef = useRef(0);\n  const timeoutRef = useRef<NodeJS.Timeout | undefined>(undefined);\n  const rafRef = useRef<number | undefined>(undefined);\n\n  useEffect(() => {\n    let isActive = true;\n\n    const update = (now: number) => {\n      if (!isActive) return;\n\n      const lineIdx = lineIndexRef.current;\n      const charIdx = charIndexRef.current;\n\n      if (lineIdx >= MESSAGE_COUNT) {\n        timeoutRef.current = setTimeout(() => {\n          if (!isActive) return;\n          lineIndexRef.current = 0;\n          charIndexRef.current = 0;\n          setDisplay({ lines: [], currentText: \"\" });\n          lastUpdateRef.current = performance.now();\n          rafRef.current = requestAnimationFrame(update);\n        }, RESET_DELAY);\n        return;\n      }\n\n      const msg = messages[lineIdx];\n      if (!msg) return;\n\n      const elapsed = now - lastUpdateRef.current;\n\n      if (charIdx < msg.length) {\n        if (elapsed >= TYPE_DELAY) {\n          charIndexRef.current = charIdx + 1;\n          setDisplay((prev) => ({\n            lines: prev.lines,\n            currentText: msg.substring(0, charIdx + 1),\n          }));\n          lastUpdateRef.current = now;\n        }\n      } else if (elapsed >= LINE_PAUSE) {\n        setDisplay((prev) => ({\n          lines: [...prev.lines, msg],\n          currentText: \"\",\n        }));\n        lineIndexRef.current = lineIdx + 1;\n        charIndexRef.current = 0;\n        lastUpdateRef.current = now;\n      }\n\n      rafRef.current = requestAnimationFrame(update);\n    };\n\n    lastUpdateRef.current = performance.now();\n    rafRef.current = requestAnimationFrame(update);\n\n    return () => {\n      isActive = false;\n      if (rafRef.current !== undefined) cancelAnimationFrame(rafRef.current);\n      if (timeoutRef.current !== undefined) clearTimeout(timeoutRef.current);\n    };\n  }, []);\n\n  const { lines, currentText } = display;\n  const hasCurrentText = currentText.length > 0;\n\n  return (\n    <div className=\"h-full bg-gradient-to-br from-neutral-950 via-neutral-900 to-neutral-950 flex flex-col items-center justify-center p-4\">\n      <div className=\"w-full max-w-md rounded-sm overflow-hidden shadow-2xl border-2 border-neutral-700\">\n        <div className=\"bg-neutral-800 px-4 py-3 flex items-center gap-2 border-b-2 border-neutral-700\">\n          <div className=\"w-3 h-3 rounded-none bg-red-500\" />\n          <div className=\"w-3 h-3 rounded-none bg-yellow-500\" />\n          <div className=\"w-3 h-3 rounded-none bg-green-500\" />\n          <span className=\"ml-4 text-neutral-500 text-sm font-mono\">\n            crafting_table\n          </span>\n        </div>\n\n        <div className=\"bg-neutral-900 p-6 min-h-[250px] font-mono text-sm\">\n          {lines.map((line, i) => (\n            <div key={i} className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span>{line}</span>\n            </div>\n          ))}\n          {hasCurrentText ? (\n            <div className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span>{currentText}</span>\n              <span className=\"w-2 h-5 bg-emerald-500 animate-pulse ml-0.5\" />\n            </div>\n          ) : (\n            <div className=\"flex items-center text-neutral-300\">\n              <span className=\"text-emerald-500 mr-2\">/&gt;</span>\n              <span className=\"w-2 h-5 bg-emerald-500 animate-pulse\" />\n            </div>\n          )}\n        </div>\n      </div>\n\n      <p className=\"mt-6 text-neutral-500 text-sm font-mono\">\n        Crafting your next great idea...\n      </p>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/DiffView.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\n\ninterface DiffViewProps {\n  oldContent: string;\n  newContent: string;\n  maxHeight?: string;\n  /** File path for context (displayed in header) */\n  filePath?: string;\n}\n\ninterface DiffLine {\n  type: \"added\" | \"removed\" | \"unchanged\" | \"header\";\n  content: string;\n  oldLineNum?: number;\n  newLineNum?: number;\n}\n\n/**\n * Compute a simple line-by-line diff between old and new content.\n * Uses a basic LCS-like approach for reasonable diff output.\n */\nfunction computeDiff(oldText: string, newText: string): DiffLine[] {\n  const oldLines = oldText.split(\"\\n\");\n  const newLines = newText.split(\"\\n\");\n\n  const result: DiffLine[] = [];\n\n  let oldIdx = 0;\n  let newIdx = 0;\n  let oldLineNum = 1;\n  let newLineNum = 1;\n\n  while (oldIdx < oldLines.length || newIdx < newLines.length) {\n    const oldLine: string | undefined = oldLines[oldIdx];\n    const newLine: string | undefined = newLines[newIdx];\n\n    if (oldIdx >= oldLines.length || oldLine === undefined) {\n      // All remaining new lines are additions\n      result.push({\n        type: \"added\",\n        content: newLine ?? \"\",\n        newLineNum: newLineNum++,\n      });\n      newIdx++;\n    } else if (newIdx >= newLines.length || newLine === undefined) {\n      // All remaining old lines are deletions\n      result.push({\n        type: \"removed\",\n        content: oldLine,\n        oldLineNum: oldLineNum++,\n      });\n      oldIdx++;\n    } else if (oldLine === newLine) {\n      // Lines match - unchanged\n      result.push({\n        type: \"unchanged\",\n        content: oldLine,\n        oldLineNum: oldLineNum++,\n        newLineNum: newLineNum++,\n      });\n      oldIdx++;\n      newIdx++;\n    } else {\n      // Lines differ - check if old line exists later in new, or vice versa\n      const oldExistsLaterInNew = newLines.slice(newIdx + 1).includes(oldLine);\n      const newExistsLaterInOld = oldLines.slice(oldIdx + 1).includes(newLine);\n\n      if (!oldExistsLaterInNew && newExistsLaterInOld) {\n        // Old line was removed\n        result.push({\n          type: \"removed\",\n          content: oldLine,\n          oldLineNum: oldLineNum++,\n        });\n        oldIdx++;\n      } else if (oldExistsLaterInNew && !newExistsLaterInOld) {\n        // New line was added\n        result.push({\n          type: \"added\",\n          content: newLine,\n          newLineNum: newLineNum++,\n        });\n        newIdx++;\n      } else {\n        // Both differ - show as removal then addition (replacement)\n        result.push({\n          type: \"removed\",\n          content: oldLine,\n          oldLineNum: oldLineNum++,\n        });\n        result.push({\n          type: \"added\",\n          content: newLine,\n          newLineNum: newLineNum++,\n        });\n        oldIdx++;\n        newIdx++;\n      }\n    }\n  }\n\n  return result;\n}\n\n/**\n * Collapse unchanged lines in the middle of the diff.\n * Shows context lines around changes.\n */\nfunction collapseUnchanged(\n  lines: DiffLine[],\n  contextLines: number = 3\n): DiffLine[] {\n  const result: DiffLine[] = [];\n  const changeIndices: number[] = [];\n\n  // Find all indices with changes\n  lines.forEach((line, idx) => {\n    if (line.type === \"added\" || line.type === \"removed\") {\n      changeIndices.push(idx);\n    }\n  });\n\n  if (changeIndices.length === 0) {\n    // No changes, show a summary\n    if (lines.length > 10) {\n      return [{ type: \"header\", content: `(${lines.length} unchanged lines)` }];\n    }\n    return lines;\n  }\n\n  // Create a set of indices to show\n  const showIndices = new Set<number>();\n  changeIndices.forEach((idx) => {\n    for (\n      let i = Math.max(0, idx - contextLines);\n      i <= Math.min(lines.length - 1, idx + contextLines);\n      i++\n    ) {\n      showIndices.add(i);\n    }\n  });\n\n  let lastShownIdx = -1;\n  lines.forEach((line, idx) => {\n    if (showIndices.has(idx)) {\n      if (lastShownIdx !== -1 && idx - lastShownIdx > 1) {\n        // Add collapse marker\n        const skipped = idx - lastShownIdx - 1;\n        result.push({\n          type: \"header\",\n          content: `... ${skipped} unchanged line${skipped > 1 ? \"s\" : \"\"} ...`,\n        });\n      }\n      result.push(line);\n      lastShownIdx = idx;\n    }\n  });\n\n  return result;\n}\n\n/**\n * DiffView - Displays a diff between old and new content\n *\n * Shows added lines in green with + prefix\n * Shows removed lines in red with - prefix\n * Collapses long unchanged sections\n */\nexport default function DiffView({\n  oldContent,\n  newContent,\n  maxHeight = \"300px\",\n  filePath,\n}: DiffViewProps) {\n  const diffLines = useMemo(() => {\n    const rawDiff = computeDiff(oldContent, newContent);\n    return collapseUnchanged(rawDiff);\n  }, [oldContent, newContent]);\n\n  // Count changes for summary\n  const stats = useMemo(() => {\n    const added = diffLines.filter((l) => l.type === \"added\").length;\n    const removed = diffLines.filter((l) => l.type === \"removed\").length;\n    return { added, removed };\n  }, [diffLines]);\n\n  return (\n    <div\n      className={cn(\n        \"rounded-08 border overflow-hidden\",\n        \"bg-[#fafafa] border-[#e5e5e5] dark:bg-[#151617] dark:border-[#2a2a2a]\"\n      )}\n    >\n      {/* Header with stats */}\n      <div\n        className={cn(\n          \"px-3 py-2 border-b text-xs flex items-center gap-3\",\n          \"bg-[#f5f5f5] border-[#e5e5e5] dark:bg-[#1a1a1a] dark:border-[#2a2a2a]\"\n        )}\n        style={{ fontFamily: \"var(--font-dm-mono)\" }}\n      >\n        {filePath && (\n          <span className=\"text-text-03 truncate flex-1\">{filePath}</span>\n        )}\n        <div className=\"flex items-center gap-2 shrink-0\">\n          {stats.added > 0 && (\n            <span className=\"text-green-600 dark:text-green-400\">\n              +{stats.added}\n            </span>\n          )}\n          {stats.removed > 0 && (\n            <span className=\"text-red-600 dark:text-red-400\">\n              -{stats.removed}\n            </span>\n          )}\n        </div>\n      </div>\n\n      {/* Diff content */}\n      <div\n        className=\"overflow-auto text-xs\"\n        style={{\n          fontFamily: \"var(--font-dm-mono)\",\n          maxHeight,\n        }}\n      >\n        {diffLines.map((line, idx) => (\n          <div\n            key={idx}\n            className={cn(\n              \"px-3 py-0.5 whitespace-pre-wrap break-words\",\n              line.type === \"added\" &&\n                \"bg-green-100 dark:bg-green-950/40 text-green-800 dark:text-green-300\",\n              line.type === \"removed\" &&\n                \"bg-red-100 dark:bg-red-950/40 text-red-800 dark:text-red-300\",\n              line.type === \"unchanged\" && \"text-text-03\",\n              line.type === \"header\" &&\n                \"text-text-04 bg-[#f0f0f0] dark:bg-[#1d1d1d] text-center italic py-1\"\n            )}\n          >\n            {line.type === \"added\" && (\n              <span className=\"select-none text-green-600 dark:text-green-500 mr-2\">\n                +\n              </span>\n            )}\n            {line.type === \"removed\" && (\n              <span className=\"select-none text-red-600 dark:text-red-500 mr-2\">\n                -\n              </span>\n            )}\n            {line.type === \"unchanged\" && (\n              <span className=\"select-none text-text-04 mr-2\">&nbsp;</span>\n            )}\n            {line.content || (line.type !== \"header\" ? \" \" : \"\")}\n          </div>\n        ))}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/FileBrowser.tsx",
    "content": "\"use client\";\n\nimport { useState, useCallback, useEffect } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport {\n  SvgFolder,\n  SvgFolderOpen,\n  SvgFileSmall,\n  SvgChevronRight,\n  SvgChevronDown,\n  SvgDownloadCloud,\n  SvgEye,\n  SvgHardDrive,\n  SvgLoader,\n} from \"@opal/icons\";\nimport {\n  listDirectory,\n  getArtifactUrl,\n  FileSystemEntry,\n} from \"@/lib/build/client\";\nimport FilePreviewModal from \"@/app/craft/components/FilePreviewModal\";\n\ninterface FileBrowserProps {\n  sessionId: string;\n}\n\ninterface DirectoryNodeProps {\n  entry: FileSystemEntry;\n  sessionId: string;\n  depth: number;\n  onPreview: (entry: FileSystemEntry) => void;\n}\n\nfunction DirectoryNode({\n  entry,\n  sessionId,\n  depth,\n  onPreview,\n}: DirectoryNodeProps) {\n  const [isOpen, setIsOpen] = useState(false);\n  const [children, setChildren] = useState<FileSystemEntry[] | null>(null);\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const loadChildren = useCallback(async () => {\n    if (children !== null) return;\n\n    setIsLoading(true);\n    setError(null);\n    try {\n      const listing = await listDirectory(sessionId, entry.path);\n      setChildren(listing.entries);\n    } catch (err) {\n      setError(err instanceof Error ? err.message : \"Failed to load directory\");\n    } finally {\n      setIsLoading(false);\n    }\n  }, [sessionId, entry.path, children]);\n\n  const handleToggle = async (open: boolean) => {\n    setIsOpen(open);\n    if (open) {\n      await loadChildren();\n    }\n  };\n\n  const paddingLeft = depth * 1.25;\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={handleToggle}>\n      <CollapsibleTrigger asChild>\n        <button\n          className=\"w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors\"\n          style={{ paddingLeft: `${paddingLeft}rem` }}\n        >\n          {isLoading ? (\n            <SvgLoader className=\"size-4 stroke-text-03 animate-spin\" />\n          ) : isOpen ? (\n            <SvgChevronDown className=\"size-4 stroke-text-03\" />\n          ) : (\n            <SvgChevronRight className=\"size-4 stroke-text-03\" />\n          )}\n          {isOpen ? (\n            <SvgFolderOpen className=\"size-4 stroke-text-03\" />\n          ) : (\n            <SvgFolder className=\"size-4 stroke-text-03\" />\n          )}\n          <Text mainContentMono text04 className=\"truncate\">\n            {entry.name}\n          </Text>\n        </button>\n      </CollapsibleTrigger>\n      <CollapsibleContent>\n        {error && (\n          <div style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}>\n            <Text secondaryBody className=\"text-status-error-01\">\n              {error}\n            </Text>\n          </div>\n        )}\n        {children?.map((child) =>\n          child.is_directory ? (\n            <DirectoryNode\n              key={child.path}\n              entry={child}\n              sessionId={sessionId}\n              depth={depth + 1}\n              onPreview={onPreview}\n            />\n          ) : (\n            <FileNode\n              key={child.path}\n              entry={child}\n              sessionId={sessionId}\n              depth={depth + 1}\n              onPreview={onPreview}\n            />\n          )\n        )}\n      </CollapsibleContent>\n    </Collapsible>\n  );\n}\n\ninterface FileNodeProps {\n  entry: FileSystemEntry;\n  sessionId: string;\n  depth: number;\n  onPreview: (entry: FileSystemEntry) => void;\n}\n\nfunction FileNode({ entry, sessionId, depth, onPreview }: FileNodeProps) {\n  const paddingLeft = depth * 1.25;\n  const downloadUrl = getArtifactUrl(sessionId, entry.path);\n\n  const canPreview =\n    entry.mime_type?.startsWith(\"text/\") ||\n    entry.mime_type?.startsWith(\"image/\") ||\n    entry.mime_type === \"application/json\" ||\n    entry.name.endsWith(\".md\") ||\n    entry.name.endsWith(\".txt\") ||\n    entry.name.endsWith(\".json\") ||\n    entry.name.endsWith(\".js\") ||\n    entry.name.endsWith(\".ts\") ||\n    entry.name.endsWith(\".tsx\") ||\n    entry.name.endsWith(\".jsx\") ||\n    entry.name.endsWith(\".css\") ||\n    entry.name.endsWith(\".html\") ||\n    entry.name.endsWith(\".py\") ||\n    entry.name.endsWith(\".yaml\") ||\n    entry.name.endsWith(\".yml\");\n\n  const formatSize = (bytes: number | null) => {\n    if (bytes === null) return \"\";\n    if (bytes < 1024) return `${bytes} B`;\n    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;\n    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;\n  };\n\n  return (\n    <div\n      className=\"w-full flex flex-row items-center gap-2 p-2 hover:bg-background-neutral-01 rounded-08 transition-colors group\"\n      style={{ paddingLeft: `${paddingLeft + 1.25}rem` }}\n    >\n      <SvgFileSmall className=\"size-4 stroke-text-03 shrink-0\" />\n      <Text mainContentMono text04 className=\"truncate flex-1\">\n        {entry.name}\n      </Text>\n      {entry.size !== null && (\n        <Text secondaryBody text03 className=\"shrink-0\">\n          {formatSize(entry.size)}\n        </Text>\n      )}\n      <div className=\"flex flex-row gap-1 opacity-0 group-hover:opacity-100 transition-opacity\">\n        {canPreview && (\n          <Button\n            variant=\"action\"\n            prominence=\"tertiary\"\n            icon={SvgEye}\n            onClick={(e) => {\n              e.stopPropagation();\n              onPreview(entry);\n            }}\n          >\n            Preview\n          </Button>\n        )}\n        <a\n          href={downloadUrl}\n          download={entry.name}\n          onClick={(e) => e.stopPropagation()}\n        >\n          <Button\n            variant=\"action\"\n            prominence=\"tertiary\"\n            icon={SvgDownloadCloud}\n          >\n            Download\n          </Button>\n        </a>\n      </div>\n    </div>\n  );\n}\n\nexport default function FileBrowser({ sessionId }: FileBrowserProps) {\n  const [rootEntries, setRootEntries] = useState<FileSystemEntry[] | null>(\n    null\n  );\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n  const [previewFile, setPreviewFile] = useState<FileSystemEntry | null>(null);\n  const [isOpen, setIsOpen] = useState(true);\n\n  const loadRoot = useCallback(async () => {\n    if (rootEntries !== null) return;\n\n    setIsLoading(true);\n    setError(null);\n    try {\n      const listing = await listDirectory(sessionId);\n      setRootEntries(listing.entries);\n    } catch (err) {\n      setError(\n        err instanceof Error ? err.message : \"Failed to load file system\"\n      );\n    } finally {\n      setIsLoading(false);\n    }\n  }, [sessionId, rootEntries]);\n\n  const handleToggleRoot = async (open: boolean) => {\n    setIsOpen(open);\n    if (open) {\n      await loadRoot();\n    }\n  };\n\n  const handlePreview = (entry: FileSystemEntry) => {\n    setPreviewFile(entry);\n  };\n\n  const handleClosePreview = () => {\n    setPreviewFile(null);\n  };\n\n  // Auto-load on mount\n  useEffect(() => {\n    loadRoot();\n  }, []);\n\n  return (\n    <>\n      <div className=\"border border-border-01 rounded-08 overflow-hidden\">\n        <Collapsible open={isOpen} onOpenChange={handleToggleRoot}>\n          <CollapsibleTrigger asChild>\n            <button className=\"w-full flex flex-row items-center gap-2 p-2 bg-background-neutral-01 hover:bg-background-neutral-02 transition-colors\">\n              {isLoading ? (\n                <SvgLoader className=\"size-4 stroke-text-03 animate-spin\" />\n              ) : isOpen ? (\n                <SvgChevronDown className=\"size-4 stroke-text-03\" />\n              ) : (\n                <SvgChevronRight className=\"size-4 stroke-text-03\" />\n              )}\n              <SvgHardDrive className=\"size-4 stroke-text-03\" />\n              <Text mainUiAction text03>\n                Workspace Files\n              </Text>\n            </button>\n          </CollapsibleTrigger>\n          <CollapsibleContent>\n            <div className=\"p-1 max-h-[50vh] overflow-auto\">\n              {error && (\n                <Text secondaryBody className=\"text-status-error-01 p-2\">\n                  {error}\n                </Text>\n              )}\n              {rootEntries?.length === 0 && (\n                <Text secondaryBody text03 className=\"p-2 text-center\">\n                  No files yet\n                </Text>\n              )}\n              {rootEntries?.map((entry) =>\n                entry.is_directory ? (\n                  <DirectoryNode\n                    key={entry.path}\n                    entry={entry}\n                    sessionId={sessionId}\n                    depth={0}\n                    onPreview={handlePreview}\n                  />\n                ) : (\n                  <FileNode\n                    key={entry.path}\n                    entry={entry}\n                    sessionId={sessionId}\n                    depth={0}\n                    onPreview={handlePreview}\n                  />\n                )\n              )}\n            </div>\n          </CollapsibleContent>\n        </Collapsible>\n      </div>\n\n      {previewFile && (\n        <FilePreviewModal\n          sessionId={sessionId}\n          entry={previewFile}\n          onClose={handleClosePreview}\n        />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/FilePreviewModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { SvgFileText, SvgDownloadCloud, SvgImage } from \"@opal/icons\";\nimport { getArtifactUrl, FileSystemEntry } from \"@/lib/build/client\";\n\ninterface FilePreviewModalProps {\n  sessionId: string;\n  entry: FileSystemEntry;\n  onClose: () => void;\n}\n\nexport default function FilePreviewModal({\n  sessionId,\n  entry,\n  onClose,\n}: FilePreviewModalProps) {\n  const [content, setContent] = useState<string | null>(null);\n  const [isLoading, setIsLoading] = useState(true);\n  const [error, setError] = useState<string | null>(null);\n\n  const downloadUrl = getArtifactUrl(sessionId, entry.path);\n  const isImage = entry.mime_type?.startsWith(\"image/\");\n\n  useEffect(() => {\n    if (isImage) {\n      setIsLoading(false);\n      return;\n    }\n\n    const fetchContent = async () => {\n      setIsLoading(true);\n      setError(null);\n      try {\n        const response = await fetch(downloadUrl);\n        if (!response.ok) {\n          throw new Error(`Failed to fetch file: ${response.statusText}`);\n        }\n        const text = await response.text();\n        setContent(text);\n      } catch (err) {\n        setError(err instanceof Error ? err.message : \"Failed to load file\");\n      } finally {\n        setIsLoading(false);\n      }\n    };\n\n    fetchContent();\n  }, [downloadUrl, isImage]);\n\n  return (\n    <Modal open onOpenChange={(open) => !open && onClose()}>\n      <Modal.Content>\n        <Modal.Header\n          icon={isImage ? SvgImage : SvgFileText}\n          title={entry.name}\n          description={entry.path}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          {isLoading ? (\n            <div className=\"flex items-center justify-center p-8\">\n              <SimpleLoader />\n            </div>\n          ) : error ? (\n            <Text secondaryBody className=\"text-status-error-01\">\n              {error}\n            </Text>\n          ) : isImage ? (\n            <div className=\"flex items-center justify-center p-4\">\n              {/* eslint-disable-next-line @next/next/no-img-element */}\n              <img\n                src={downloadUrl}\n                alt={entry.name}\n                className=\"max-w-full max-h-[60vh] object-contain rounded-08\"\n              />\n            </div>\n          ) : (\n            <div className=\"w-full overflow-auto max-h-[60vh] rounded-08 bg-background-neutral-02 border border-border-01\">\n              <pre className=\"p-4 text-sm font-mono whitespace-pre-wrap break-words text-text-04\">\n                {content}\n              </pre>\n            </div>\n          )}\n        </Modal.Body>\n        <Modal.Footer>\n          <a href={downloadUrl} download={entry.name}>\n            <Button\n              variant=\"action\"\n              prominence=\"secondary\"\n              icon={SvgDownloadCloud}\n            >\n              Download\n            </Button>\n          </a>\n          <Button variant=\"action\" onClick={onClose}>\n            Close\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/InputBar.tsx",
    "content": "\"use client\";\n\nimport {\n  memo,\n  forwardRef,\n  useImperativeHandle,\n  useCallback,\n  useEffect,\n  useRef,\n  useState,\n  type ChangeEvent,\n  type ClipboardEvent,\n  type KeyboardEvent,\n} from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { getPastedFilesIfNoText } from \"@/lib/clipboard\";\nimport { cn, isImageFile } from \"@/lib/utils\";\nimport { Disabled } from \"@opal/core\";\nimport {\n  useUploadFilesContext,\n  BuildFile,\n  UploadFileStatus,\n} from \"@/app/craft/contexts/UploadFilesContext\";\nimport { useDemoDataEnabled } from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { CRAFT_CONFIGURE_PATH } from \"@/app/craft/v1/constants\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport SelectButton from \"@/refresh-components/buttons/SelectButton\";\nimport { Button } from \"@opal/components\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport {\n  SvgArrowUp,\n  SvgClock,\n  SvgFileText,\n  SvgImage,\n  SvgLoader,\n  SvgX,\n  SvgPaperclip,\n  SvgOrganization,\n  SvgAlertCircle,\n} from \"@opal/icons\";\n\nconst MAX_INPUT_HEIGHT = 200;\n\nexport interface InputBarHandle {\n  reset: () => void;\n  focus: () => void;\n  setMessage: (message: string) => void;\n}\n\nexport interface InputBarProps {\n  onSubmit: (\n    message: string,\n    files: BuildFile[],\n    demoDataEnabled: boolean\n  ) => void;\n  isRunning: boolean;\n  disabled?: boolean;\n  placeholder?: string;\n  /** When true, shows spinner on send button with \"Initializing sandbox...\" tooltip */\n  sandboxInitializing?: boolean;\n  /** When true, removes bottom rounding to allow seamless connection with components below */\n  noBottomRounding?: boolean;\n  /** Whether this is the welcome page (no existing session in URL). Used for Demo Data pill. */\n  isWelcomePage?: boolean;\n}\n\n/**\n * Simple file card for displaying attached files\n */\nfunction BuildFileCard({\n  file,\n  onRemove,\n}: {\n  file: BuildFile;\n  onRemove: (id: string) => void;\n}) {\n  const isImage = isImageFile(file.name);\n  const isUploading = file.status === UploadFileStatus.UPLOADING;\n  const isPending = file.status === UploadFileStatus.PENDING;\n  const isFailed = file.status === UploadFileStatus.FAILED;\n\n  const cardContent = (\n    <div\n      className={cn(\n        \"flex items-center gap-1.5 px-2 py-1 rounded-08\",\n        \"bg-background-neutral-01 border\",\n        \"text-sm text-text-04\",\n        isFailed ? \"border-status-error-02\" : \"border-border-01\"\n      )}\n    >\n      {isUploading ? (\n        <SvgLoader className=\"h-4 w-4 animate-spin text-text-03\" />\n      ) : isPending ? (\n        <SvgClock className=\"h-4 w-4 text-text-03\" />\n      ) : isFailed ? (\n        <SvgAlertCircle className=\"h-4 w-4 text-status-error-02\" />\n      ) : isImage ? (\n        <SvgImage className=\"h-4 w-4 text-text-03\" />\n      ) : (\n        <SvgFileText className=\"h-4 w-4 text-text-03\" />\n      )}\n      <span\n        className={cn(\n          \"max-w-[120px] truncate\",\n          isFailed && \"text-status-error-02\"\n        )}\n      >\n        {file.name}\n      </span>\n      <button\n        onClick={() => onRemove(file.id)}\n        className=\"ml-1 p-0.5 hover:bg-background-neutral-02 rounded\"\n      >\n        <SvgX className=\"h-3 w-3 text-text-03\" />\n      </button>\n    </div>\n  );\n\n  // Wrap in tooltip for error or pending status\n  if (isFailed && file.error) {\n    return (\n      <SimpleTooltip tooltip={file.error} side=\"top\">\n        {cardContent}\n      </SimpleTooltip>\n    );\n  }\n\n  if (isPending) {\n    return (\n      <SimpleTooltip tooltip=\"Waiting for session to be ready...\" side=\"top\">\n        {cardContent}\n      </SimpleTooltip>\n    );\n  }\n\n  return cardContent;\n}\n\n/**\n * InputBar - Text input with file attachment support\n *\n * File upload state is managed by UploadFilesContext. This component just:\n * - Triggers file selection/paste\n * - Displays attached files\n * - Handles message submission\n *\n * The context handles:\n * - Session binding (which session to upload to)\n * - Auto-upload when session becomes available\n * - Fetching existing attachments on session change\n */\nconst InputBar = memo(\n  forwardRef<InputBarHandle, InputBarProps>(\n    (\n      {\n        onSubmit,\n        isRunning,\n        disabled = false,\n        placeholder = \"Describe your task...\",\n        sandboxInitializing = false,\n        noBottomRounding = false,\n        isWelcomePage = false,\n      },\n      ref\n    ) => {\n      const router = useRouter();\n      const demoDataEnabled = useDemoDataEnabled();\n      const [message, setMessage] = useState(\"\");\n\n      const textAreaRef = useRef<HTMLTextAreaElement>(null);\n      const containerRef = useRef<HTMLDivElement>(null);\n      const fileInputRef = useRef<HTMLInputElement>(null);\n\n      const {\n        currentMessageFiles,\n        uploadFiles,\n        removeFile,\n        clearFiles,\n        hasUploadingFiles,\n      } = useUploadFilesContext();\n\n      // Expose reset, focus, and setMessage methods to parent via ref\n      useImperativeHandle(ref, () => ({\n        reset: () => {\n          setMessage(\"\");\n          clearFiles();\n        },\n        focus: () => {\n          textAreaRef.current?.focus();\n        },\n        setMessage: (msg: string) => {\n          setMessage(msg);\n          // Move cursor to end after setting message\n          setTimeout(() => {\n            const textarea = textAreaRef.current;\n            if (textarea) {\n              textarea.focus();\n              textarea.setSelectionRange(msg.length, msg.length);\n            }\n          }, 0);\n        },\n      }));\n\n      // Auto-resize textarea based on content\n      useEffect(() => {\n        const textarea = textAreaRef.current;\n        if (textarea) {\n          textarea.style.height = \"0px\";\n          textarea.style.height = `${Math.min(\n            textarea.scrollHeight,\n            MAX_INPUT_HEIGHT\n          )}px`;\n        }\n      }, [message]);\n\n      // Auto-focus on mount\n      useEffect(() => {\n        textAreaRef.current?.focus();\n      }, []);\n\n      const handleFileSelect = useCallback(\n        async (e: ChangeEvent<HTMLInputElement>) => {\n          const files = e.target.files;\n          if (!files || files.length === 0) return;\n          // Context handles session binding internally\n          uploadFiles(Array.from(files));\n          e.target.value = \"\";\n        },\n        [uploadFiles]\n      );\n\n      const handlePaste = useCallback(\n        (event: ClipboardEvent) => {\n          const pastedFiles = getPastedFilesIfNoText(event.clipboardData);\n          if (pastedFiles.length > 0) {\n            event.preventDefault();\n            // Context handles session binding internally\n            uploadFiles(pastedFiles);\n          }\n        },\n        [uploadFiles]\n      );\n\n      const handleInputChange = useCallback(\n        (event: ChangeEvent<HTMLTextAreaElement>) => {\n          setMessage(event.target.value);\n        },\n        []\n      );\n\n      const handleSubmit = useCallback(() => {\n        if (disabled || isRunning || hasUploadingFiles || sandboxInitializing)\n          return;\n\n        const hasMessage = message.trim().length > 0;\n        const hasFiles = currentMessageFiles.length > 0;\n\n        if (hasMessage) {\n          onSubmit(message.trim(), currentMessageFiles, demoDataEnabled);\n          setMessage(\"\");\n          clearFiles({ suppressRefetch: true });\n        } else if (hasFiles) {\n          // User hit Enter with only files attached: remove files from input bar\n          // (File stays in session; no way to delete from session for now)\n          clearFiles({ suppressRefetch: true });\n        }\n      }, [\n        message,\n        disabled,\n        isRunning,\n        hasUploadingFiles,\n        sandboxInitializing,\n        onSubmit,\n        currentMessageFiles,\n        clearFiles,\n        demoDataEnabled,\n      ]);\n\n      const handleKeyDown = useCallback(\n        (event: KeyboardEvent<HTMLTextAreaElement>) => {\n          if (\n            event.key === \"Enter\" &&\n            !event.shiftKey &&\n            !(event.nativeEvent as any).isComposing\n          ) {\n            event.preventDefault();\n            handleSubmit();\n          }\n        },\n        [handleSubmit]\n      );\n\n      const canSubmit =\n        message.trim().length > 0 &&\n        !disabled &&\n        !isRunning &&\n        !hasUploadingFiles &&\n        !sandboxInitializing;\n\n      return (\n        <Disabled disabled={disabled}>\n          <div\n            ref={containerRef}\n            className={cn(\n              \"w-full flex flex-col shadow-01 bg-background-neutral-00\",\n              noBottomRounding ? \"rounded-t-16 rounded-b-none\" : \"rounded-16\"\n            )}\n          >\n            {/* Hidden file input */}\n            <input\n              ref={fileInputRef}\n              type=\"file\"\n              className=\"hidden\"\n              multiple\n              onChange={handleFileSelect}\n              accept=\"*/*\"\n            />\n\n            {/* Attached Files */}\n            {currentMessageFiles.length > 0 && (\n              <div className=\"p-2 rounded-t-16 flex flex-wrap gap-1\">\n                {currentMessageFiles.map((file) => (\n                  <BuildFileCard\n                    key={file.id}\n                    file={file}\n                    onRemove={removeFile}\n                  />\n                ))}\n              </div>\n            )}\n\n            {/* Input area */}\n            <textarea\n              onPaste={handlePaste}\n              onChange={handleInputChange}\n              onKeyDown={handleKeyDown}\n              ref={textAreaRef}\n              className={cn(\n                \"w-full\",\n                \"h-[44px]\",\n                \"outline-none\",\n                \"bg-transparent\",\n                \"resize-none\",\n                \"placeholder:text-text-03\",\n                \"whitespace-pre-wrap\",\n                \"break-word\",\n                \"overscroll-contain\",\n                \"overflow-y-auto\",\n                \"px-3\",\n                \"pb-2\",\n                \"pt-3\"\n              )}\n              autoFocus\n              style={{ scrollbarWidth: \"thin\" }}\n              role=\"textarea\"\n              aria-multiline\n              placeholder={placeholder}\n              value={message}\n              disabled={disabled}\n            />\n\n            {/* Bottom controls */}\n            <div className=\"flex justify-between items-center w-full p-1 min-h-[40px]\">\n              {/* Bottom left controls */}\n              <div className=\"flex flex-row items-center gap-1\">\n                {/* (+) button for file upload */}\n                <Button\n                  disabled={disabled}\n                  icon={SvgPaperclip}\n                  tooltip=\"Attach Files\"\n                  prominence=\"tertiary\"\n                  onClick={() => fileInputRef.current?.click()}\n                />\n                {/* Demo Data indicator pill - only show on welcome page (no session) when demo data is enabled */}\n                {demoDataEnabled && isWelcomePage && (\n                  <SimpleTooltip\n                    tooltip=\"Switch to your data in the Configure panel!\"\n                    side=\"top\"\n                  >\n                    <span>\n                      <SelectButton\n                        disabled={disabled}\n                        leftIcon={SvgOrganization}\n                        engaged={demoDataEnabled}\n                        action\n                        folded\n                        onClick={() => router.push(CRAFT_CONFIGURE_PATH)}\n                        className=\"bg-action-link-01\"\n                      >\n                        Demo Data Active\n                      </SelectButton>\n                    </span>\n                  </SimpleTooltip>\n                )}\n              </div>\n\n              {/* Bottom right controls */}\n              <div className=\"flex flex-row items-center gap-1\">\n                {/* Submit button */}\n                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                <IconButton\n                  icon={sandboxInitializing ? SvgLoader : SvgArrowUp}\n                  onClick={handleSubmit}\n                  disabled={!canSubmit}\n                  tooltip={\n                    sandboxInitializing ? \"Initializing sandbox...\" : \"Send\"\n                  }\n                  iconClassName={\n                    sandboxInitializing ? \"animate-spin\" : undefined\n                  }\n                />\n              </div>\n            </div>\n          </div>\n        </Disabled>\n      );\n    }\n  )\n);\n\nInputBar.displayName = \"InputBar\";\n\nexport default InputBar;\n"
  },
  {
    "path": "web/src/app/craft/components/IntroBackground.tsx",
    "content": "// Floating dust particles background with mouse interaction\nimport { useEffect, useRef, useState, useCallback } from \"react\";\n\ninterface Particle {\n  x: number;\n  y: number;\n  vx: number;\n  vy: number;\n  size: number;\n  opacity: number;\n  baseOpacity: number;\n  mass: number;\n  id: number;\n  glowMultiplier?: number;\n  glowVelocity?: number;\n}\n\ninterface BuildModeIntroBackgroundProps {\n  particleCount?: number;\n  particleSize?: number;\n  particleOpacity?: number;\n  glowIntensity?: number;\n  movementSpeed?: number;\n  mouseInfluence?: number;\n  backgroundColor?: string;\n  particleColor?: string;\n  mouseGravity?: \"none\" | \"attract\" | \"repel\";\n  gravityStrength?: number;\n  glowAnimation?: \"instant\" | \"ease\" | \"spring\";\n  particleInteraction?: boolean;\n  interactionType?: \"bounce\" | \"merge\";\n}\n\n/**\n * @framerSupportedLayoutWidth any\n * @framerSupportedLayoutHeight any\n */\nexport default function BuildModeIntroBackground(\n  props: BuildModeIntroBackgroundProps\n) {\n  const {\n    particleCount = 400,\n    particleSize = 2,\n    particleOpacity = 1,\n    glowIntensity = 20,\n    movementSpeed = 0.75,\n    mouseInfluence = 100,\n    backgroundColor = \"#000000\",\n    particleColor = \"#FFFFFF\",\n    mouseGravity = \"attract\",\n    gravityStrength = 50,\n    glowAnimation = \"ease\",\n    particleInteraction = true,\n    interactionType = \"bounce\",\n  } = props;\n\n  const canvasRef = useRef<HTMLCanvasElement>(null);\n  const animationRef = useRef<number | undefined>(undefined);\n  const mouseRef = useRef({ x: 0, y: 0 });\n  const particlesRef = useRef<Particle[]>([]);\n  const [canvasSize, setCanvasSize] = useState({ width: 800, height: 600 });\n  const containerRef = useRef<HTMLDivElement>(null);\n\n  const initializeParticles = useCallback(\n    (width: number, height: number) => {\n      return Array.from({ length: particleCount }, (_, index) => ({\n        x: Math.random() * width,\n        y: Math.random() * height,\n        vx: (Math.random() - 0.5) * movementSpeed,\n        vy: (Math.random() - 0.5) * movementSpeed,\n        size: Math.random() * particleSize + 1,\n        opacity: particleOpacity,\n        baseOpacity: particleOpacity,\n        mass: Math.random() * 0.5 + 0.5,\n        id: index,\n      }));\n    },\n    [particleCount, particleSize, particleOpacity, movementSpeed]\n  );\n\n  const redistributeParticles = useCallback((width: number, height: number) => {\n    particlesRef.current.forEach((particle) => {\n      // Redistribute particles proportionally across the new dimensions\n      particle.x = Math.random() * width;\n      particle.y = Math.random() * height;\n    });\n  }, []);\n\n  const updateParticles = useCallback(\n    (canvas: HTMLCanvasElement) => {\n      const rect = canvas.getBoundingClientRect();\n      const mouse = mouseRef.current;\n\n      particlesRef.current.forEach((particle, index) => {\n        // Calculate distance to mouse\n        const dx = mouse.x - particle.x;\n        const dy = mouse.y - particle.y;\n        const distance = Math.sqrt(dx * dx + dy * dy);\n\n        // Mouse influence and gravity\n        if (distance < mouseInfluence && distance > 0) {\n          const force = (mouseInfluence - distance) / mouseInfluence;\n          const normalizedDx = dx / distance;\n          const normalizedDy = dy / distance;\n          const gravityForce = force * (gravityStrength * 0.001);\n\n          // Apply gravity effect based on mouseGravity setting\n          if (mouseGravity === \"attract\") {\n            particle.vx += normalizedDx * gravityForce;\n            particle.vy += normalizedDy * gravityForce;\n          } else if (mouseGravity === \"repel\") {\n            particle.vx -= normalizedDx * gravityForce;\n            particle.vy -= normalizedDy * gravityForce;\n          }\n\n          particle.opacity = Math.min(1, particle.baseOpacity + force * 0.4);\n\n          // Apply glow animation based on type\n          const targetGlow = 1 + force * 2;\n          const currentGlow = particle.glowMultiplier || 1;\n\n          if (glowAnimation === \"instant\") {\n            particle.glowMultiplier = targetGlow;\n          } else if (glowAnimation === \"ease\") {\n            // Ease in-out animation\n            const easeSpeed = 0.15;\n            particle.glowMultiplier =\n              currentGlow + (targetGlow - currentGlow) * easeSpeed;\n          } else if (glowAnimation === \"spring\") {\n            // Spring animation with overshoot\n            const springForce = (targetGlow - currentGlow) * 0.2;\n            const damping = 0.85;\n            particle.glowVelocity =\n              (particle.glowVelocity || 0) * damping + springForce;\n            particle.glowMultiplier = currentGlow + particle.glowVelocity;\n          }\n        } else {\n          particle.opacity = Math.max(\n            particle.baseOpacity * 0.3,\n            particle.opacity - 0.02\n          );\n\n          // Return glow to normal based on animation type\n          const targetGlow = 1;\n          const currentGlow = particle.glowMultiplier || 1;\n\n          if (glowAnimation === \"instant\") {\n            particle.glowMultiplier = targetGlow;\n          } else if (glowAnimation === \"ease\") {\n            const easeSpeed = 0.08;\n            particle.glowMultiplier = Math.max(\n              1,\n              currentGlow + (targetGlow - currentGlow) * easeSpeed\n            );\n          } else if (glowAnimation === \"spring\") {\n            const springForce = (targetGlow - currentGlow) * 0.15;\n            const damping = 0.9;\n            particle.glowVelocity =\n              (particle.glowVelocity || 0) * damping + springForce;\n            particle.glowMultiplier = Math.max(\n              1,\n              currentGlow + particle.glowVelocity\n            );\n          }\n        }\n\n        // Particle interaction\n        if (particleInteraction) {\n          for (let j = index + 1; j < particlesRef.current.length; j++) {\n            const other = particlesRef.current[j];\n            if (!other) continue;\n            const dx = other.x - particle.x;\n            const dy = other.y - particle.y;\n            const distance = Math.sqrt(dx * dx + dy * dy);\n            const minDistance = particle.size + other.size + 5;\n\n            if (distance < minDistance && distance > 0) {\n              if (interactionType === \"bounce\") {\n                // Elastic collision\n                const normalX = dx / distance;\n                const normalY = dy / distance;\n\n                // Relative velocity\n                const relativeVx = particle.vx - other.vx;\n                const relativeVy = particle.vy - other.vy;\n\n                // Relative velocity in collision normal direction\n                const speed = relativeVx * normalX + relativeVy * normalY;\n\n                // Only resolve if velocities are separating\n                if (speed < 0) return;\n\n                // Collision impulse\n                const impulse = (2 * speed) / (particle.mass + other.mass);\n\n                // Update velocities\n                particle.vx -= impulse * other.mass * normalX;\n                particle.vy -= impulse * other.mass * normalY;\n                other.vx += impulse * particle.mass * normalX;\n                other.vy += impulse * particle.mass * normalY;\n\n                // Separate particles to prevent overlap\n                const overlap = minDistance - distance;\n                const separationX = normalX * overlap * 0.5;\n                const separationY = normalY * overlap * 0.5;\n\n                particle.x -= separationX;\n                particle.y -= separationY;\n                other.x += separationX;\n                other.y += separationY;\n              } else if (interactionType === \"merge\") {\n                // Temporary merge effect - increase glow and size\n                const mergeForce = (minDistance - distance) / minDistance;\n                particle.glowMultiplier =\n                  (particle.glowMultiplier || 1) + mergeForce * 0.5;\n                other.glowMultiplier =\n                  (other.glowMultiplier || 1) + mergeForce * 0.5;\n\n                // Attract particles slightly\n                const attractForce = mergeForce * 0.01;\n                particle.vx += dx * attractForce;\n                particle.vy += dy * attractForce;\n                other.vx -= dx * attractForce;\n                other.vy -= dy * attractForce;\n              }\n            }\n          }\n        }\n\n        // Update position\n        particle.x += particle.vx;\n        particle.y += particle.vy;\n\n        // Add subtle random movement\n        particle.vx += (Math.random() - 0.5) * 0.001;\n        particle.vy += (Math.random() - 0.5) * 0.001;\n\n        // Damping\n        particle.vx *= 0.999;\n        particle.vy *= 0.999;\n\n        // Boundary wrapping\n        if (particle.x < 0) particle.x = rect.width;\n        if (particle.x > rect.width) particle.x = 0;\n        if (particle.y < 0) particle.y = rect.height;\n        if (particle.y > rect.height) particle.y = 0;\n      });\n    },\n    [\n      mouseInfluence,\n      mouseGravity,\n      gravityStrength,\n      glowAnimation,\n      particleInteraction,\n      interactionType,\n    ]\n  );\n\n  const drawParticles = useCallback(\n    (ctx: CanvasRenderingContext2D) => {\n      ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);\n\n      particlesRef.current.forEach((particle) => {\n        ctx.save();\n\n        // Create glow effect with enhanced blur based on interaction\n        const currentGlowMultiplier = particle.glowMultiplier || 1;\n        ctx.shadowColor = particleColor;\n        ctx.shadowBlur = glowIntensity * currentGlowMultiplier * 2;\n        ctx.globalAlpha = particle.opacity;\n\n        ctx.fillStyle = particleColor;\n        ctx.beginPath();\n        ctx.arc(particle.x, particle.y, particle.size, 0, Math.PI * 2);\n        ctx.fill();\n\n        ctx.restore();\n      });\n    },\n    [particleColor, glowIntensity]\n  );\n\n  const animate = useCallback(() => {\n    const canvas = canvasRef.current;\n    if (!canvas) return;\n\n    const ctx = canvas.getContext(\"2d\");\n    if (!ctx) return;\n\n    updateParticles(canvas);\n    drawParticles(ctx);\n\n    animationRef.current = requestAnimationFrame(animate);\n  }, [updateParticles, drawParticles]);\n\n  const handleMouseMove = useCallback((e: MouseEvent) => {\n    const canvas = canvasRef.current;\n    if (!canvas) return;\n\n    const rect = canvas.getBoundingClientRect();\n    mouseRef.current = {\n      x: e.clientX - rect.left,\n      y: e.clientY - rect.top,\n    };\n  }, []);\n\n  const resizeCanvas = useCallback(() => {\n    const canvas = canvasRef.current;\n    const container = containerRef.current;\n    if (!canvas || !container) return;\n\n    const rect = container.getBoundingClientRect();\n    const newWidth = rect.width;\n    const newHeight = rect.height;\n\n    canvas.width = newWidth;\n    canvas.height = newHeight;\n\n    // Update canvas size state and redistribute particles\n    setCanvasSize({ width: newWidth, height: newHeight });\n\n    // Only redistribute if particles exist and size changed significantly\n    if (particlesRef.current.length > 0) {\n      redistributeParticles(newWidth, newHeight);\n    }\n  }, [redistributeParticles]);\n\n  // Effect to reinitialize particles when particle count changes\n  useEffect(() => {\n    const canvas = canvasRef.current;\n    if (!canvas) return;\n\n    particlesRef.current = initializeParticles(\n      canvas.width || canvasSize.width,\n      canvas.height || canvasSize.height\n    );\n  }, [particleCount, initializeParticles, canvasSize]);\n\n  // Effect to update particle properties when they change\n  useEffect(() => {\n    particlesRef.current.forEach((particle) => {\n      particle.baseOpacity = particleOpacity;\n      particle.opacity = particleOpacity;\n      // Update velocity based on new movement speed\n      const currentSpeed = Math.sqrt(\n        particle.vx * particle.vx + particle.vy * particle.vy\n      );\n      if (currentSpeed > 0) {\n        const ratio = movementSpeed / currentSpeed;\n        particle.vx *= ratio;\n        particle.vy *= ratio;\n      }\n    });\n  }, [particleOpacity, movementSpeed]);\n\n  useEffect(() => {\n    resizeCanvas();\n\n    if (typeof window !== \"undefined\") {\n      window.addEventListener(\"mousemove\", handleMouseMove);\n      window.addEventListener(\"resize\", resizeCanvas);\n    }\n\n    // Set up ResizeObserver for container\n    if (containerRef.current && typeof ResizeObserver !== \"undefined\") {\n      const resizeObserver = new ResizeObserver(() => {\n        resizeCanvas();\n      });\n      resizeObserver.observe(containerRef.current);\n\n      return () => {\n        resizeObserver.disconnect();\n        if (typeof window !== \"undefined\") {\n          window.removeEventListener(\"mousemove\", handleMouseMove);\n          window.removeEventListener(\"resize\", resizeCanvas);\n        }\n      };\n    }\n\n    return () => {\n      if (typeof window !== \"undefined\") {\n        window.removeEventListener(\"mousemove\", handleMouseMove);\n        window.removeEventListener(\"resize\", resizeCanvas);\n      }\n    };\n  }, [handleMouseMove, resizeCanvas]);\n\n  useEffect(() => {\n    animate();\n\n    return () => {\n      if (animationRef.current) {\n        cancelAnimationFrame(animationRef.current);\n      }\n    };\n  }, [animate]);\n\n  return (\n    <div\n      ref={containerRef}\n      style={{\n        width: \"100%\",\n        height: \"100%\",\n        backgroundColor,\n        position: \"relative\",\n        overflow: \"hidden\",\n      }}\n    >\n      <canvas\n        ref={canvasRef}\n        style={{\n          width: \"100%\",\n          height: \"100%\",\n          display: \"block\",\n        }}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/IntroContent.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { motion } from \"motion/react\";\nimport { track, AnalyticsEvent } from \"@/lib/analytics\";\nimport { OnyxLogoTypeIcon } from \"@/components/icons/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport BigButton from \"@/app/craft/components/BigButton\";\n\ninterface BuildModeIntroContentProps {\n  onClose: () => void;\n  onTryBuildMode: () => void;\n}\n\nexport default function BuildModeIntroContent({\n  onClose,\n  onTryBuildMode,\n}: BuildModeIntroContentProps) {\n  // Track when user sees the craft intro\n  useEffect(() => {\n    track(AnalyticsEvent.SAW_CRAFT_INTRO);\n  }, []);\n\n  return (\n    <div className=\"absolute inset-0 flex flex-col items-center justify-center pointer-events-none\">\n      <div className=\"flex flex-col items-center gap-7 w-full\">\n        <motion.div\n          initial={{ opacity: 0, y: 20 }}\n          animate={{ opacity: 1, y: 0 }}\n          transition={{ duration: 0.8, delay: 0.5 }}\n          className=\"w-full relative\"\n        >\n          <div className=\"grid grid-cols-[1fr_auto_1fr] items-end\">\n            <div className=\"flex justify-end\">\n              <OnyxLogoTypeIcon size={385} className=\"text-white\" />\n            </div>\n            <div className=\"w-8\"></div>\n            <div className=\"flex justify-start\">\n              <div\n                className=\"relative inline-flex overflow-visible\"\n                style={{ transform: \"translateX(-0.6em)\" }}\n              >\n                <span className=\"relative inline-block leading-[3.5]\">\n                  <Text\n                    headingH1\n                    className=\"!text-9xl !text-white relative inline-block\"\n                    style={{\n                      fontFamily: \"var(--font-kh-teka)\",\n                      fontWeight: 500,\n                    }}\n                  >\n                    Craft\n                  </Text>\n                </span>\n                <span\n                  className=\"pointer-events-none absolute top-3 -right-14 text-[1em] uppercase tracking-[0.2em] !text-white\"\n                  style={{ fontFamily: \"var(--font-kh-teka)\", fontWeight: 500 }}\n                >\n                  BETA\n                </span>\n              </div>\n            </div>\n          </div>\n        </motion.div>\n        <motion.div\n          className=\"flex gap-5 pointer-events-auto justify-center\"\n          initial={{ opacity: 0, y: 20 }}\n          animate={{ opacity: 1, y: 0 }}\n          transition={{ duration: 0.8, delay: 1.3 }}\n        >\n          <BigButton\n            secondary\n            className=\"!border-white !text-white hover:!bg-white/10 active:!bg-white/20 !w-[160px]\"\n            onClick={(e) => {\n              e.stopPropagation();\n              track(AnalyticsEvent.CLICKED_GO_HOME);\n              onClose();\n            }}\n          >\n            Return Home\n          </BigButton>\n          <BigButton\n            primary\n            className=\"!bg-white !text-black hover:!bg-gray-200 active:!bg-gray-300 !w-[160px]\"\n            onClick={(e) => {\n              e.stopPropagation();\n              track(AnalyticsEvent.CLICKED_TRY_CRAFT);\n              onTryBuildMode();\n            }}\n          >\n            Start Crafting\n          </BigButton>\n        </motion.div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/OutputPanel.tsx",
    "content": "\"use client\";\n\nimport { memo, useState, useEffect, useCallback } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  useSession,\n  useWebappNeedsRefresh,\n  useBuildSessionStore,\n  useFilePreviewTabs,\n  useActiveOutputTab,\n  useActiveFilePreviewPath,\n  usePreProvisionedSessionId,\n  useIsPreProvisioning,\n  useTabHistory,\n  OutputTabType,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport {\n  fetchWebappInfo,\n  fetchArtifacts,\n  exportDocx,\n} from \"@/app/craft/services/apiServices\";\nimport { cn, getFileIcon } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  SvgGlobe,\n  SvgHardDrive,\n  SvgFiles,\n  SvgX,\n  SvgMinus,\n  SvgMaximize2,\n} from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport CraftingLoader from \"@/app/craft/components/CraftingLoader\";\n\n// Output panel sub-components\nimport UrlBar from \"@/app/craft/components/output-panel/UrlBar\";\nimport PreviewTab from \"@/app/craft/components/output-panel/PreviewTab\";\nimport { FilePreviewContent } from \"@/app/craft/components/output-panel/FilePreviewContent\";\nimport FilesTab from \"@/app/craft/components/output-panel/FilesTab\";\nimport ArtifactsTab from \"@/app/craft/components/output-panel/ArtifactsTab\";\n\ntype TabValue = OutputTabType;\n\nconst tabs: { value: TabValue; label: string; icon: React.FC<IconProps> }[] = [\n  { value: \"preview\", label: \"Preview\", icon: SvgGlobe },\n  { value: \"files\", label: \"Files\", icon: SvgHardDrive },\n  { value: \"artifacts\", label: \"Artifacts\", icon: SvgFiles },\n];\n\ninterface BuildOutputPanelProps {\n  onClose: () => void;\n  isOpen: boolean;\n}\n\n/**\n * BuildOutputPanel - Right panel showing preview, files, and artifacts\n *\n * Features:\n * - Tabbed interface (Preview, Files, Artifacts)\n * - Live preview iframe for webapp artifacts\n * - File browser for exploring sandbox filesystem\n * - Artifact list with download/view options\n */\nconst BuildOutputPanel = memo(({ onClose, isOpen }: BuildOutputPanelProps) => {\n  const session = useSession();\n  const preProvisionedSessionId = usePreProvisionedSessionId();\n  const isPreProvisioning = useIsPreProvisioning();\n\n  // Get active tab state from store\n  const activeOutputTab = useActiveOutputTab();\n  const activeFilePreviewPath = useActiveFilePreviewPath();\n  const filePreviewTabs = useFilePreviewTabs();\n\n  // Store actions\n  const setActiveOutputTab = useBuildSessionStore(\n    (state) => state.setActiveOutputTab\n  );\n  const setNoSessionActiveOutputTab = useBuildSessionStore(\n    (state) => state.setNoSessionActiveOutputTab\n  );\n  const openFilePreview = useBuildSessionStore(\n    (state) => state.openFilePreview\n  );\n  const closeFilePreview = useBuildSessionStore(\n    (state) => state.closeFilePreview\n  );\n  const setActiveFilePreviewPath = useBuildSessionStore(\n    (state) => state.setActiveFilePreviewPath\n  );\n\n  // Store actions for refresh\n  const triggerFilesRefresh = useBuildSessionStore(\n    (state) => state.triggerFilesRefresh\n  );\n\n  // Counters to force-reload previews\n  const [previewRefreshKey, setPreviewRefreshKey] = useState(0);\n  const [filePreviewRefreshKey, setFilePreviewRefreshKey] = useState(0);\n\n  // Determine which tab is visually active\n  const isFilePreviewActive = activeFilePreviewPath !== null;\n  const activeTab = isFilePreviewActive ? null : activeOutputTab;\n\n  const handlePinnedTabClick = (tab: TabValue) => {\n    if (session?.id) {\n      setActiveOutputTab(session.id, tab);\n    } else {\n      // No session - use temporary state for tab switching\n      setNoSessionActiveOutputTab(tab);\n    }\n  };\n\n  const handlePreviewTabClick = (path: string) => {\n    if (session?.id) {\n      setActiveFilePreviewPath(session.id, path);\n    }\n  };\n\n  const handlePreviewTabClose = (e: React.MouseEvent, path: string) => {\n    e.stopPropagation(); // Don't trigger tab click\n    if (session?.id) {\n      closeFilePreview(session.id, path);\n    }\n  };\n\n  const handleFileClick = (path: string, fileName: string) => {\n    if (session?.id) {\n      openFilePreview(session.id, path, fileName);\n    }\n  };\n\n  const handleMaximize = () => {\n    setIsMaximized((prev) => !prev);\n  };\n\n  // Track when panel animation completes (defer fetch until fully open)\n  const [isFullyOpen, setIsFullyOpen] = useState(false);\n  // Track when content should unmount (delayed on close for animation)\n  const [shouldRenderContent, setShouldRenderContent] = useState(false);\n  // Track if panel is maximized\n  const [isMaximized, setIsMaximized] = useState(false);\n\n  useEffect(() => {\n    if (isOpen) {\n      // Render content immediately on open\n      setShouldRenderContent(true);\n      // Wait for 300ms CSS transition to complete before fetching\n      const timer = setTimeout(() => setIsFullyOpen(true), 300);\n      return () => clearTimeout(timer);\n    } else {\n      // Stop fetching immediately\n      setIsFullyOpen(false);\n      // Delay unmount until close animation completes\n      const timer = setTimeout(() => setShouldRenderContent(false), 300);\n      return () => clearTimeout(timer);\n    }\n  }, [isOpen]);\n\n  // Session-scoped URL caching\n  const [cachedWebappUrl, setCachedWebappUrl] = useState<string | null>(null);\n  const [cachedForSessionId, setCachedForSessionId] = useState<string | null>(\n    null\n  );\n\n  // Clear cache when session changes\n  useEffect(() => {\n    if (session?.id !== cachedForSessionId) {\n      setCachedWebappUrl(null);\n      setCachedForSessionId(session?.id ?? null);\n    }\n  }, [session?.id, cachedForSessionId]);\n\n  // Webapp refresh trigger from streaming / restore\n  const webappNeedsRefresh = useWebappNeedsRefresh();\n\n  // Track polling window: poll for up to 30s after a restore/refresh trigger\n  const [pollingDeadline, setPollingDeadline] = useState<number | null>(null);\n  const [isWebappReady, setIsWebappReady] = useState(false);\n\n  // When webappNeedsRefresh bumps (restore or file edit), start a 30s polling window\n  // and reset readiness so we poll until the server is back up\n  useEffect(() => {\n    if (webappNeedsRefresh > 0) {\n      setPollingDeadline(Date.now() + 30_000);\n      setIsWebappReady(false);\n\n      // Force a re-render after 30s to stop polling even if server never responded\n      const timer = setTimeout(() => setPollingDeadline(null), 30_000);\n      return () => clearTimeout(timer);\n    }\n  }, [webappNeedsRefresh]);\n\n  // Fetch webapp info from dedicated endpoint\n  // Only fetch for real sessions when panel is fully open\n  const shouldFetchWebapp =\n    isFullyOpen &&\n    session?.id &&\n    !session.id.startsWith(\"temp-\") &&\n    session.status !== \"creating\";\n\n  // Poll every 2s while NextJS is starting up (capped at 30s), then stop\n  const shouldPoll =\n    !isWebappReady && pollingDeadline !== null && Date.now() < pollingDeadline;\n\n  const { data: webappInfo, mutate } = useSWR(\n    shouldFetchWebapp ? SWR_KEYS.buildSessionWebappInfo(session.id) : null,\n    () => (session?.id ? fetchWebappInfo(session.id) : null),\n    {\n      refreshInterval: shouldPoll ? 2000 : 0,\n      revalidateOnFocus: true,\n      keepPreviousData: true,\n    }\n  );\n\n  // Update readiness from SWR response and clear polling deadline\n  useEffect(() => {\n    if (webappInfo?.ready) {\n      setIsWebappReady(true);\n      setPollingDeadline(null);\n    }\n  }, [webappInfo?.ready]);\n\n  // Update cache when SWR returns data for current session\n  useEffect(() => {\n    if (webappInfo?.webapp_url && session?.id === cachedForSessionId) {\n      setCachedWebappUrl(webappInfo.webapp_url);\n    }\n  }, [webappInfo?.webapp_url, session?.id, cachedForSessionId]);\n\n  // Refresh when web/ file changes or after restore\n  // webappNeedsRefresh is a counter that increments on each edit/restore,\n  // ensuring each triggers a new refresh even if the panel is already open\n  useEffect(() => {\n    if (webappNeedsRefresh > 0 && isFullyOpen && session?.id) {\n      mutate();\n    }\n  }, [webappNeedsRefresh, isFullyOpen, mutate, session?.id]);\n\n  const webappUrl = webappInfo?.webapp_url ?? null;\n\n  // Use cache only if it belongs to current session\n  const validCachedUrl =\n    cachedForSessionId === session?.id ? cachedWebappUrl : null;\n  const displayUrl = webappUrl ?? validCachedUrl;\n\n  // Tab navigation history\n  const tabHistory = useTabHistory();\n  const navigateTabBack = useBuildSessionStore(\n    (state) => state.navigateTabBack\n  );\n  const navigateTabForward = useBuildSessionStore(\n    (state) => state.navigateTabForward\n  );\n\n  const canGoBack = tabHistory.currentIndex > 0;\n  const canGoForward = tabHistory.currentIndex < tabHistory.entries.length - 1;\n\n  const handleBack = useCallback(() => {\n    if (session?.id) {\n      navigateTabBack(session.id);\n    }\n  }, [session?.id, navigateTabBack]);\n\n  const handleForward = useCallback(() => {\n    if (session?.id) {\n      navigateTabForward(session.id);\n    }\n  }, [session?.id, navigateTabForward]);\n\n  // Determine if the active file preview is a markdown or pptx file (for download buttons)\n  const isMarkdownPreview =\n    isFilePreviewActive &&\n    activeFilePreviewPath &&\n    /\\.md$/i.test(activeFilePreviewPath);\n\n  const isPptxPreview =\n    isFilePreviewActive &&\n    activeFilePreviewPath &&\n    /\\.pptx$/i.test(activeFilePreviewPath);\n\n  const isPdfPreview =\n    isFilePreviewActive &&\n    activeFilePreviewPath &&\n    /\\.pdf$/i.test(activeFilePreviewPath);\n\n  const [isExportingDocx, setIsExportingDocx] = useState(false);\n\n  const handleDocxDownload = useCallback(async () => {\n    if (!session?.id || !activeFilePreviewPath) return;\n    setIsExportingDocx(true);\n    try {\n      const blob = await exportDocx(session.id, activeFilePreviewPath);\n      const fileName =\n        activeFilePreviewPath.split(\"/\").pop() || activeFilePreviewPath;\n      const url = URL.createObjectURL(blob);\n      const link = document.createElement(\"a\");\n      link.href = url;\n      link.download = fileName.replace(/\\.md$/i, \".docx\");\n      document.body.appendChild(link);\n      link.click();\n      document.body.removeChild(link);\n      URL.revokeObjectURL(url);\n    } catch (err) {\n      console.error(\"Failed to export as DOCX:\", err);\n    } finally {\n      setIsExportingDocx(false);\n    }\n  }, [session?.id, activeFilePreviewPath]);\n\n  const handleRawFileDownload = useCallback(() => {\n    if (!session?.id || !activeFilePreviewPath) return;\n    const encodedPath = activeFilePreviewPath\n      .split(\"/\")\n      .map((s) => encodeURIComponent(s))\n      .join(\"/\");\n    const link = document.createElement(\"a\");\n    link.href = `/api/build/sessions/${session.id}/artifacts/${encodedPath}`;\n    link.download =\n      activeFilePreviewPath.split(\"/\").pop() || activeFilePreviewPath;\n    document.body.appendChild(link);\n    link.click();\n    document.body.removeChild(link);\n  }, [session?.id, activeFilePreviewPath]);\n\n  // Unified refresh handler — dispatches based on the active tab/preview\n  const handleRefresh = useCallback(() => {\n    if (isFilePreviewActive && activeFilePreviewPath) {\n      // File preview tab: bump key to reload standalone + content previews\n      setFilePreviewRefreshKey((k) => k + 1);\n    } else if (activeOutputTab === \"preview\") {\n      // Web preview tab: remount the iframe\n      setPreviewRefreshKey((k) => k + 1);\n    } else if (activeOutputTab === \"files\" && session?.id) {\n      // Files tab: clear cache and re-fetch directory listing\n      triggerFilesRefresh(session.id);\n    }\n  }, [\n    isFilePreviewActive,\n    activeFilePreviewPath,\n    activeOutputTab,\n    session?.id,\n    triggerFilesRefresh,\n  ]);\n\n  // Fetch artifacts - poll every 5 seconds when on artifacts tab\n  const shouldFetchArtifacts =\n    session?.id &&\n    !session.id.startsWith(\"temp-\") &&\n    session.status !== \"creating\" &&\n    activeTab === \"artifacts\";\n\n  const { data: polledArtifacts } = useSWR(\n    shouldFetchArtifacts ? SWR_KEYS.buildSessionArtifacts(session.id) : null,\n    () => (session?.id ? fetchArtifacts(session.id) : null),\n    {\n      refreshInterval: 5000, // Refresh every 5 seconds to catch new artifacts\n      revalidateOnFocus: true,\n    }\n  );\n\n  // Use polled artifacts if available, otherwise fall back to session store\n  const artifacts = polledArtifacts ?? session?.artifacts ?? [];\n\n  return (\n    <div\n      className={cn(\n        \"absolute z-20 flex flex-col border rounded-12 border-border-01 bg-background-neutral-00 overflow-hidden transition-all duration-300 ease-in-out\",\n        isMaximized\n          ? \"top-4 right-16 bottom-4 w-[calc(100%-8rem)]\"\n          : \"top-4 right-4 bottom-4 w-[calc(50%-2rem)]\",\n        isOpen\n          ? \"opacity-100 translate-x-0\"\n          : \"opacity-0 translate-x-full pointer-events-none\"\n      )}\n      style={{\n        boxShadow: \"0 8px 60px 30px rgba(0, 0, 0, 0.07)\",\n      }}\n    >\n      {/* Tab List - Chrome-style tabs */}\n      <div className=\"flex flex-col w-full\">\n        {/* Tabs row */}\n        <div className=\"flex items-end w-full pt-1.5 bg-background-tint-03\">\n          {/* macOS-style window controls - sticky on left */}\n          <div className=\"group flex items-center gap-2.5 pl-4 pr-2 py-3 flex-shrink-0\">\n            <button\n              onClick={onClose}\n              className=\"relative w-3.5 h-3.5 rounded-full bg-[#ff5f57] hover:bg-[#ff3b30] transition-colors flex-shrink-0 flex items-center justify-center\"\n              aria-label=\"No action\"\n            >\n              <SvgX\n                size={12}\n                strokeWidth={4}\n                className=\"opacity-0 group-hover:opacity-100 transition-opacity\"\n                style={{ stroke: \"#8a2e2a\" }}\n              />\n            </button>\n            <button\n              onClick={onClose}\n              className=\"relative w-3.5 h-3.5 rounded-full bg-[#ffbd2e] hover:bg-[#ffa000] transition-colors flex-shrink-0 flex items-center justify-center\"\n              aria-label=\"Close panel\"\n            >\n              <SvgMinus\n                size={12}\n                strokeWidth={3}\n                className=\"opacity-0 group-hover:opacity-100 transition-opacity\"\n                style={{ stroke: \"#8a6618\" }}\n              />\n            </button>\n            <button\n              onClick={handleMaximize}\n              className=\"relative w-3.5 h-3.5 rounded-full bg-[#28ca42] hover:bg-[#1fb832] transition-colors flex-shrink-0 flex items-center justify-center\"\n              aria-label=\"Maximize panel\"\n            >\n              <SvgMaximize2\n                size={8}\n                strokeWidth={2.5}\n                className=\"opacity-0 group-hover:opacity-90 rotate-90 transition-opacity\"\n                style={{ stroke: \"#155c24\" }}\n              />\n            </button>\n          </div>\n          {/* Scrollable tabs container */}\n          <div className=\"flex items-end gap-1.5 flex-1 pl-3 pr-2 overflow-x-auto [&::-webkit-scrollbar]:hidden [-ms-overflow-style:none] [scrollbar-width:none]\">\n            {/* Pinned tabs */}\n            {tabs.map((tab) => {\n              const Icon = tab.icon;\n              const isActive = activeTab === tab.value;\n              // Disable artifacts tab when no session\n              const isDisabled = tab.value === \"artifacts\" && !session;\n              return (\n                <button\n                  key={tab.value}\n                  onClick={() => !isDisabled && handlePinnedTabClick(tab.value)}\n                  disabled={isDisabled}\n                  title={\n                    isDisabled\n                      ? \"Start building something to see artifacts!\"\n                      : undefined\n                  }\n                  className={cn(\n                    \"relative inline-flex items-center justify-center gap-2 px-5\",\n                    \"max-w-[15%] min-w-fit\",\n                    isDisabled\n                      ? \"text-text-02 bg-transparent cursor-not-allowed py-1 mb-1\"\n                      : isActive\n                        ? \"bg-background-neutral-00 text-text-04 rounded-t-lg py-2\"\n                        : \"text-text-03 bg-transparent hover:bg-background-tint-02 rounded-full py-1 mb-1\"\n                  )}\n                >\n                  {/* Left curved joint */}\n                  {isActive && (\n                    <div\n                      className=\"absolute -left-3 bottom-0 w-3 h-3 bg-background-neutral-00\"\n                      style={{\n                        maskImage:\n                          \"radial-gradient(circle at 0 0, transparent 12px, black 12px)\",\n                        WebkitMaskImage:\n                          \"radial-gradient(circle at 0 0, transparent 12px, black 12px)\",\n                      }}\n                    />\n                  )}\n                  <Icon\n                    size={16}\n                    className={cn(\n                      \"stroke-current flex-shrink-0\",\n                      isDisabled\n                        ? \"stroke-text-02\"\n                        : isActive\n                          ? \"stroke-text-04\"\n                          : \"stroke-text-03\"\n                    )}\n                  />\n                  <Text\n                    className={cn(\"truncate\", isDisabled && \"text-text-02\")}\n                  >\n                    {tab.label}\n                  </Text>\n                  {/* Right curved joint */}\n                  {isActive && (\n                    <div\n                      className=\"absolute -right-3 bottom-0 w-3 h-3 bg-background-neutral-00\"\n                      style={{\n                        maskImage:\n                          \"radial-gradient(circle at 100% 0, transparent 12px, black 12px)\",\n                        WebkitMaskImage:\n                          \"radial-gradient(circle at 100% 0, transparent 12px, black 12px)\",\n                      }}\n                    />\n                  )}\n                </button>\n              );\n            })}\n\n            {/* Separator between pinned and preview tabs */}\n            {filePreviewTabs.length > 0 && (\n              <div className=\"w-px h-5 bg-border-02 mx-2 mb-1 self-center\" />\n            )}\n\n            {/* Preview tabs */}\n            {filePreviewTabs.map((previewTab) => {\n              const isActive = activeFilePreviewPath === previewTab.path;\n              const TabIcon = getFileIcon(previewTab.fileName);\n              return (\n                <button\n                  key={previewTab.path}\n                  onClick={() => handlePreviewTabClick(previewTab.path)}\n                  className={cn(\n                    \"group relative inline-flex items-center justify-center gap-1.5 px-3 pr-2\",\n                    \"max-w-[150px] min-w-fit\",\n                    isActive\n                      ? \"bg-background-neutral-00 text-text-04 rounded-t-lg py-2\"\n                      : \"text-text-03 bg-transparent hover:bg-background-tint-02 rounded-full py-1 mb-1\"\n                  )}\n                >\n                  {/* Left curved joint */}\n                  {isActive && (\n                    <div\n                      className=\"absolute -left-3 bottom-0 w-3 h-3 bg-background-neutral-00\"\n                      style={{\n                        maskImage:\n                          \"radial-gradient(circle at 0 0, transparent 12px, black 12px)\",\n                        WebkitMaskImage:\n                          \"radial-gradient(circle at 0 0, transparent 12px, black 12px)\",\n                      }}\n                    />\n                  )}\n                  <TabIcon\n                    size={14}\n                    className={cn(\n                      \"stroke-current flex-shrink-0\",\n                      isActive ? \"stroke-text-04\" : \"stroke-text-03\"\n                    )}\n                  />\n                  <Text className=\"truncate text-sm\">\n                    {previewTab.fileName}\n                  </Text>\n                  {/* Close button */}\n                  <button\n                    onClick={(e) => handlePreviewTabClose(e, previewTab.path)}\n                    className={cn(\n                      \"flex-shrink-0 p-0.5 rounded hover:bg-background-tint-03 transition-colors\",\n                      isActive\n                        ? \"opacity-100\"\n                        : \"opacity-0 group-hover:opacity-100\"\n                    )}\n                    aria-label={`Close ${previewTab.fileName}`}\n                  >\n                    <SvgX size={12} className=\"stroke-text-03\" />\n                  </button>\n                  {/* Right curved joint */}\n                  {isActive && (\n                    <div\n                      className=\"absolute -right-3 bottom-0 w-3 h-3 bg-background-neutral-00\"\n                      style={{\n                        maskImage:\n                          \"radial-gradient(circle at 100% 0, transparent 12px, black 12px)\",\n                        WebkitMaskImage:\n                          \"radial-gradient(circle at 100% 0, transparent 12px, black 12px)\",\n                      }}\n                    />\n                  )}\n                </button>\n              );\n            })}\n          </div>\n        </div>\n        {/* White bar connecting tabs to content */}\n        <div className=\"h-2 w-full bg-background-neutral-00\" />\n      </div>\n\n      {/* URL Bar - Chrome-style */}\n      <UrlBar\n        displayUrl={\n          isFilePreviewActive && activeFilePreviewPath\n            ? `sandbox://${activeFilePreviewPath}`\n            : activeOutputTab === \"preview\"\n              ? session\n                ? displayUrl || \"Loading...\"\n                : \"no-active-sandbox://\"\n              : activeOutputTab === \"files\"\n                ? session\n                  ? \"sandbox://\"\n                  : preProvisionedSessionId\n                    ? \"pre-provisioned-sandbox://\"\n                    : isPreProvisioning\n                      ? \"provisioning-sandbox://...\"\n                      : \"no-sandbox://\"\n                : \"artifacts://\"\n        }\n        showNavigation={true}\n        canGoBack={canGoBack}\n        canGoForward={canGoForward}\n        onBack={handleBack}\n        onForward={handleForward}\n        previewUrl={\n          !isFilePreviewActive &&\n          activeOutputTab === \"preview\" &&\n          displayUrl &&\n          displayUrl.startsWith(\"http\")\n            ? displayUrl\n            : null\n        }\n        onDownloadRaw={\n          isMarkdownPreview || isPptxPreview || isPdfPreview\n            ? handleRawFileDownload\n            : undefined\n        }\n        downloadRawTooltip={\n          isPdfPreview\n            ? \"Download PDF\"\n            : isPptxPreview\n              ? \"Download PPTX\"\n              : \"Download MD file\"\n        }\n        onDownload={isMarkdownPreview ? handleDocxDownload : undefined}\n        isDownloading={isExportingDocx}\n        onRefresh={handleRefresh}\n        sessionId={\n          !isFilePreviewActive &&\n          activeOutputTab === \"preview\" &&\n          session?.id &&\n          displayUrl?.startsWith(\"http\")\n            ? session.id\n            : undefined\n        }\n        sharingScope={webappInfo?.sharing_scope ?? \"private\"}\n        onScopeChange={mutate}\n      />\n\n      {/* Tab Content */}\n      <div className=\"flex-1 overflow-hidden rounded-b-08\">\n        {/* File preview content - shown when a preview tab is active */}\n        {isFilePreviewActive && activeFilePreviewPath && session?.id && (\n          <FilePreviewContent\n            sessionId={session.id}\n            filePath={activeFilePreviewPath}\n            refreshKey={filePreviewRefreshKey}\n          />\n        )}\n        {/* Pinned tab content - only show when no file preview is active */}\n        {!isFilePreviewActive && (\n          <>\n            {activeOutputTab === \"preview\" &&\n              shouldRenderContent &&\n              // Show crafting loader only when no session exists (welcome state)\n              // Otherwise, PreviewTab handles the loading/iframe display\n              (!session ? (\n                <CraftingLoader />\n              ) : (\n                <PreviewTab\n                  webappUrl={displayUrl}\n                  refreshKey={previewRefreshKey}\n                />\n              ))}\n            {activeOutputTab === \"files\" && (\n              <FilesTab\n                sessionId={session?.id ?? preProvisionedSessionId}\n                onFileClick={session ? handleFileClick : undefined}\n                isPreProvisioned={!session && !!preProvisionedSessionId}\n                isProvisioning={!session && isPreProvisioning}\n              />\n            )}\n            {activeOutputTab === \"artifacts\" && (\n              <ArtifactsTab\n                artifacts={artifacts}\n                sessionId={session?.id ?? null}\n              />\n            )}\n          </>\n        )}\n      </div>\n    </div>\n  );\n});\nBuildOutputPanel.displayName = \"BuildOutputPanel\";\nexport default BuildOutputPanel;\n"
  },
  {
    "path": "web/src/app/craft/components/RawOutputBlock.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport hljs from \"highlight.js/lib/core\";\n\n// Import highlight.js theme styles (dark mode Atom One Dark)\nimport \"@/app/app/message/custom-code-styles.css\";\n\n// Register common languages\nimport javascript from \"highlight.js/lib/languages/javascript\";\nimport typescript from \"highlight.js/lib/languages/typescript\";\nimport python from \"highlight.js/lib/languages/python\";\nimport json from \"highlight.js/lib/languages/json\";\nimport css from \"highlight.js/lib/languages/css\";\nimport xml from \"highlight.js/lib/languages/xml\"; // includes HTML\nimport bash from \"highlight.js/lib/languages/bash\";\nimport yaml from \"highlight.js/lib/languages/yaml\";\nimport markdown from \"highlight.js/lib/languages/markdown\";\nimport sql from \"highlight.js/lib/languages/sql\";\n\nhljs.registerLanguage(\"javascript\", javascript);\nhljs.registerLanguage(\"js\", javascript);\nhljs.registerLanguage(\"jsx\", javascript);\nhljs.registerLanguage(\"typescript\", typescript);\nhljs.registerLanguage(\"ts\", typescript);\nhljs.registerLanguage(\"tsx\", typescript);\nhljs.registerLanguage(\"python\", python);\nhljs.registerLanguage(\"py\", python);\nhljs.registerLanguage(\"json\", json);\nhljs.registerLanguage(\"css\", css);\nhljs.registerLanguage(\"html\", xml);\nhljs.registerLanguage(\"xml\", xml);\nhljs.registerLanguage(\"bash\", bash);\nhljs.registerLanguage(\"sh\", bash);\nhljs.registerLanguage(\"shell\", bash);\nhljs.registerLanguage(\"yaml\", yaml);\nhljs.registerLanguage(\"yml\", yaml);\nhljs.registerLanguage(\"markdown\", markdown);\nhljs.registerLanguage(\"md\", markdown);\nhljs.registerLanguage(\"sql\", sql);\n\n/**\n * Get language from file extension\n */\nfunction getLanguageFromPath(filePath: string | undefined): string | undefined {\n  if (!filePath) return undefined;\n  const ext = filePath.split(\".\").pop()?.toLowerCase();\n  if (!ext) return undefined;\n\n  const langMap: Record<string, string> = {\n    js: \"javascript\",\n    jsx: \"javascript\",\n    ts: \"typescript\",\n    tsx: \"typescript\",\n    py: \"python\",\n    json: \"json\",\n    css: \"css\",\n    html: \"html\",\n    xml: \"xml\",\n    sh: \"bash\",\n    bash: \"bash\",\n    yaml: \"yaml\",\n    yml: \"yaml\",\n    md: \"markdown\",\n    sql: \"sql\",\n  };\n\n  return langMap[ext];\n}\n\ninterface RawOutputBlockProps {\n  content: string;\n  maxHeight?: string;\n  /** File path to derive language from, or explicit language name */\n  language?: string;\n}\n\n/**\n * RawOutputBlock - Scrollable code block for tool output\n *\n * Displays raw output in a dark monospace container with\n * horizontal and vertical scrolling. Applies syntax highlighting\n * when a language can be determined.\n */\nexport default function RawOutputBlock({\n  content,\n  maxHeight = \"300px\",\n  language,\n}: RawOutputBlockProps) {\n  const highlightedHtml = useMemo(() => {\n    if (!content) return null;\n\n    // Try to determine language from file path or explicit language\n    const lang = language?.includes(\".\")\n      ? getLanguageFromPath(language)\n      : language;\n\n    try {\n      if (lang && hljs.getLanguage(lang)) {\n        return hljs.highlight(content, { language: lang }).value;\n      }\n      // Don't auto-detect for plain output (like command results)\n      return null;\n    } catch {\n      return null;\n    }\n  }, [content, language]);\n\n  if (!content) {\n    return (\n      <div\n        className={cn(\n          \"p-3 rounded-08 border\",\n          // Match hljs theme: light=#fafafa, dark=#151617\n          \"bg-[#fafafa] border-[#fafafa] dark:bg-[#151617] dark:border-[#151617]\",\n          \"text-text-03 text-xs\"\n        )}\n        style={{ fontFamily: \"var(--font-dm-mono)\" }}\n      >\n        No output yet...\n      </div>\n    );\n  }\n\n  return (\n    <div\n      className={cn(\n        \"p-3 rounded-08 border\",\n        // Match hljs theme: light=#fafafa, dark=#151617\n        \"bg-[#fafafa] border-[#fafafa] dark:bg-[#151617] dark:border-[#151617]\",\n        \"text-xs overflow-auto\"\n      )}\n      style={{\n        fontFamily: \"var(--font-dm-mono)\",\n        maxHeight,\n      }}\n    >\n      {highlightedHtml ? (\n        <pre\n          className=\"whitespace-pre-wrap break-words m-0 hljs\"\n          dangerouslySetInnerHTML={{ __html: highlightedHtml }}\n        />\n      ) : (\n        <pre className=\"whitespace-pre-wrap break-words m-0\">{content}</pre>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/SandboxStatusIndicator.tsx",
    "content": "\"use client\";\n\nimport { motion, AnimatePresence } from \"motion/react\";\n\nimport {\n  useSession,\n  useIsPreProvisioning,\n  useIsPreProvisioningReady,\n  useIsPreProvisioningFailed,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { Card } from \"@/components/ui/card\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst STATUS_CONFIG = {\n  provisioning: {\n    color: \"bg-status-warning-05\",\n    pulse: true,\n    label: \"Initializing sandbox...\",\n  },\n  running: {\n    color: \"bg-status-success-05\",\n    pulse: false,\n    label: \"Sandbox running\",\n  },\n  idle: { color: \"bg-status-warning-05\", pulse: false, label: \"Sandbox idle\" },\n  sleeping: {\n    color: \"bg-status-info-05\",\n    pulse: false,\n    label: \"Sandbox sleeping\",\n  },\n  restoring: {\n    color: \"bg-status-warning-05\",\n    pulse: true,\n    label: \"Restoring sandbox...\",\n  },\n  terminated: {\n    color: \"bg-status-error-05\",\n    pulse: false,\n    label: \"Sandbox terminated\",\n  },\n  failed: {\n    color: \"bg-status-error-05\",\n    pulse: false,\n    label: \"Failed to provision sandbox\",\n  },\n  ready: {\n    color: \"bg-status-success-05\",\n    pulse: false,\n    label: \"Sandbox ready\",\n  },\n  loading: {\n    color: \"bg-text-03\",\n    pulse: true,\n    label: \"Finding sandbox...\",\n  },\n} as const;\n\ntype Status = keyof typeof STATUS_CONFIG;\n\ninterface SandboxStatusIndicatorProps {}\n\n/**\n * Derives the current sandbox status from session state or pre-provisioning state.\n *\n * Priority:\n * 1. Actual sandbox status from backend (if session has sandbox info)\n * 2. Session exists but no sandbox info → \"running\" (optimistic for consumed pre-provisioned sessions)\n * 3. Pre-provisioning failed → \"failed\"\n * 4. Pre-provisioning in progress → \"provisioning\" (only when no session - welcome page)\n * 5. Pre-provisioning ready (not yet consumed) → \"ready\"\n * 6. Default → \"loading\" (gray, finding sandbox)\n *\n * IMPORTANT: Pre-provisioning state is checked AFTER session existence because\n * pre-provisioning is for NEW sessions. When viewing an existing session, we\n * should show that session's status, not the background pre-provisioning state.\n */\nfunction deriveSandboxStatus(\n  session: ReturnType<typeof useSession>,\n  isPreProvisioning: boolean,\n  isReady: boolean,\n  isFailed: boolean\n): Status {\n  // 1. Backend is source of truth when available\n  if (session?.sandbox) {\n    return session.sandbox.status as Status;\n  }\n  // 2. Session exists but no sandbox info - assume running\n  // (This handles consumed pre-provisioned sessions before sandbox loads)\n  if (session) {\n    return \"running\";\n  }\n  // 3. Pre-provisioning failed\n  if (isFailed) {\n    return \"failed\";\n  }\n  // 4. No session - check pre-provisioning state (welcome page)\n  if (isPreProvisioning) {\n    return \"provisioning\";\n  }\n  // 5. Pre-provisioning ready but not consumed\n  if (isReady) {\n    return \"ready\";\n  }\n  // 6. No session, no pre-provisioning state - loading\n  return \"loading\";\n}\n\n/**\n * Displays the current sandbox status with a colored indicator dot.\n *\n * Shows actual sandbox state when a session exists, otherwise shows\n * pre-provisioning state (provisioning/ready).\n */\nexport default function SandboxStatusIndicator(\n  _props: SandboxStatusIndicatorProps = {}\n) {\n  const session = useSession();\n  const isPreProvisioning = useIsPreProvisioning();\n  const isReady = useIsPreProvisioningReady();\n  const isFailed = useIsPreProvisioningFailed();\n\n  const status = deriveSandboxStatus(\n    session,\n    isPreProvisioning,\n    isReady,\n    isFailed\n  );\n  const { color, pulse, label } = STATUS_CONFIG[status];\n\n  return (\n    <motion.div layout transition={{ duration: 0.3, ease: \"easeInOut\" }}>\n      <Card className=\"flex items-center gap-2 p-2 overflow-hidden\">\n        <div\n          className={`w-2 h-2 rounded-full shrink-0 ${color} ${\n            pulse ? \"animate-pulse\" : \"\"\n          }`}\n        />\n        <AnimatePresence mode=\"wait\">\n          <motion.span\n            key={status}\n            initial={{ opacity: 0, y: 5 }}\n            animate={{ opacity: 1, y: 0 }}\n            exit={{ opacity: 0, y: -5 }}\n            transition={{ duration: 0.2 }}\n          >\n            <Text text05>{label}</Text>\n          </motion.span>\n        </AnimatePresence>\n      </Card>\n    </motion.div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ShareButton.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { SvgLink, SvgCopy, SvgCheck, SvgX } from \"@opal/icons\";\nimport { setSessionSharing } from \"@/app/craft/services/apiServices\";\nimport type { SharingScope } from \"@/app/craft/types/streamingTypes\";\nimport { cn } from \"@/lib/utils\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ContentAction } from \"@opal/layouts\";\n\ninterface ShareButtonProps {\n  sessionId: string;\n  webappUrl: string;\n  sharingScope: SharingScope;\n  onScopeChange?: () => void;\n}\n\nconst SCOPE_OPTIONS: {\n  value: SharingScope;\n  label: string;\n  description: string;\n}[] = [\n  {\n    value: \"private\",\n    label: \"Private\",\n    description: \"Only you can view this app.\",\n  },\n  {\n    value: \"public_org\",\n    label: \"Organization\",\n    description: \"Anyone logged into your Onyx can view this app.\",\n  },\n];\n\nexport default function ShareButton({\n  sessionId,\n  webappUrl,\n  sharingScope: initialScope,\n  onScopeChange,\n}: ShareButtonProps) {\n  const [isOpen, setIsOpen] = useState(false);\n  const [sharingScope, setSharingScope] = useState<SharingScope>(initialScope);\n  const [copyState, setCopyState] = useState<\"idle\" | \"copied\" | \"error\">(\n    \"idle\"\n  );\n  const [isLoading, setIsLoading] = useState(false);\n\n  const isShared = sharingScope !== \"private\";\n\n  const shareUrl =\n    typeof window !== \"undefined\"\n      ? webappUrl.startsWith(\"http\")\n        ? webappUrl\n        : `${window.location.origin}${webappUrl}`\n      : webappUrl;\n\n  const handleSelect = async (scope: SharingScope) => {\n    if (scope === sharingScope || isLoading) return;\n    setIsLoading(true);\n    try {\n      await setSessionSharing(sessionId, scope);\n      setSharingScope(scope);\n      onScopeChange?.();\n    } catch (err) {\n      console.error(\"Failed to update sharing:\", err);\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  const handleCopy = async () => {\n    let success = false;\n    try {\n      await navigator.clipboard.writeText(shareUrl);\n      success = true;\n    } catch {\n      try {\n        const el = document.createElement(\"textarea\");\n        el.value = shareUrl;\n        el.style.cssText = \"position:fixed;opacity:0\";\n        document.body.appendChild(el);\n        el.focus();\n        el.select();\n        success = document.execCommand(\"copy\");\n        document.body.removeChild(el);\n      } catch {}\n    }\n    setCopyState(success ? \"copied\" : \"error\");\n    setTimeout(() => setCopyState(\"idle\"), 2000);\n  };\n\n  return (\n    <Section width=\"fit\" height=\"fit\">\n      <Popover open={isOpen} onOpenChange={setIsOpen}>\n        <Popover.Trigger asChild>\n          <Button\n            variant=\"action\"\n            prominence={isShared ? \"primary\" : \"tertiary\"}\n            icon={SvgLink}\n            aria-label=\"Share webapp\"\n          >\n            {isShared ? \"Shared\" : \"Share\"}\n          </Button>\n        </Popover.Trigger>\n        <Popover.Content side=\"bottom\" align=\"end\" width=\"lg\" sideOffset={4}>\n          <Section\n            alignItems=\"stretch\"\n            gap={0.25}\n            padding={0.25}\n            width=\"full\"\n            height=\"fit\"\n          >\n            {/* Scope options */}\n            <Section alignItems=\"stretch\" gap={0.25} width=\"full\">\n              {SCOPE_OPTIONS.map((opt) => (\n                <div\n                  key={opt.value}\n                  role=\"button\"\n                  tabIndex={0}\n                  onClick={() => handleSelect(opt.value)}\n                  onKeyDown={(e) =>\n                    e.key === \"Enter\" && handleSelect(opt.value)\n                  }\n                  aria-disabled={isLoading}\n                  className={cn(\n                    \"cursor-pointer rounded-08 transition-colors\",\n                    sharingScope === opt.value\n                      ? \"bg-background-tint-03\"\n                      : \"hover:bg-background-tint-02\"\n                  )}\n                >\n                  <ContentAction\n                    title={opt.label}\n                    description={opt.description}\n                    sizePreset=\"main-ui\"\n                    variant=\"section\"\n                    paddingVariant=\"sm\"\n                  />\n                </div>\n              ))}\n            </Section>\n\n            {/* Copy link — shown when not private */}\n            {isShared && (\n              <div className=\"rounded-08 bg-background-tint-02\">\n                <Section\n                  flexDirection=\"row\"\n                  alignItems=\"center\"\n                  gap={0.25}\n                  padding={0.25}\n                  width=\"full\"\n                  height=\"fit\"\n                >\n                  <div className=\"min-w-0 flex-1 overflow-hidden\">\n                    <Truncated secondaryBody text03>\n                      {shareUrl}\n                    </Truncated>\n                  </div>\n                  <Button\n                    variant=\"action\"\n                    prominence=\"tertiary\"\n                    size=\"md\"\n                    icon={\n                      copyState === \"copied\"\n                        ? SvgCheck\n                        : copyState === \"error\"\n                          ? SvgX\n                          : SvgCopy\n                    }\n                    onClick={handleCopy}\n                    aria-label=\"Copy link\"\n                  />\n                </Section>\n              </div>\n            )}\n          </Section>\n        </Popover.Content>\n      </Popover>\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/SideBar.tsx",
    "content": "\"use client\";\n\nimport { memo, useMemo, useCallback, useState, useEffect, useRef } from \"react\";\nimport { useRouter, usePathname } from \"next/navigation\";\nimport { useBuildContext } from \"@/app/craft/contexts/BuildContext\";\nimport {\n  useSession,\n  useSessionHistory,\n  useBuildSessionStore,\n  SessionHistoryItem,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { useUsageLimits } from \"@/app/craft/hooks/useUsageLimits\";\nimport { CRAFT_SEARCH_PARAM_NAMES } from \"@/app/craft/services/searchParams\";\nimport { SidebarTab } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SidebarWrapper from \"@/sections/sidebar/SidebarWrapper\";\nimport SidebarBody from \"@/sections/sidebar/SidebarBody\";\nimport SidebarSection from \"@/sections/sidebar/SidebarSection\";\nimport UserAvatarPopover from \"@/sections/sidebar/UserAvatarPopover\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport ButtonRenaming from \"@/refresh-components/buttons/ButtonRenaming\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\nimport {\n  SvgEditBig,\n  SvgArrowLeft,\n  SvgSettings,\n  SvgMoreHorizontal,\n  SvgEdit,\n  SvgTrash,\n  SvgCheckCircle,\n} from \"@opal/icons\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Button } from \"@opal/components\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport TypewriterText from \"@/app/craft/components/TypewriterText\";\nimport {\n  DELETE_SUCCESS_DISPLAY_DURATION_MS,\n  DELETE_MESSAGE_ROTATION_INTERVAL_MS,\n} from \"@/app/craft/constants\";\nimport { CRAFT_PATH, CRAFT_CONFIGURE_PATH } from \"@/app/craft/v1/constants\";\n\n// ============================================================================\n// Fun Deleting Messages\n// ============================================================================\n\nconst DELETING_MESSAGES = [\n  \"Mining away your blocks...\",\n  \"Returning diamonds to the caves...\",\n  \"Creeper blew up your save file...\",\n  \"Throwing items into lava...\",\n  \"Despawning your entities...\",\n  \"Breaking bedrock illegally...\",\n  \"Enderman teleported your data away...\",\n  \"Falling into the void...\",\n  \"Your build ran out of hearts...\",\n  \"Respawning at world spawn...\",\n  \"Feeding your code to the Ender Dragon...\",\n  \"Activating TNT chain reaction...\",\n  \"Zombie horde consumed your bytes...\",\n  \"Wither withering your session...\",\n  \"Herobrine deleted your world...\",\n];\n\nfunction DeletingMessage() {\n  const [messageIndex, setMessageIndex] = useState(() =>\n    Math.floor(Math.random() * DELETING_MESSAGES.length)\n  );\n\n  useEffect(() => {\n    const interval = setInterval(() => {\n      setMessageIndex((prev) => {\n        let next = Math.floor(Math.random() * DELETING_MESSAGES.length);\n        while (next === prev && DELETING_MESSAGES.length > 1) {\n          next = Math.floor(Math.random() * DELETING_MESSAGES.length);\n        }\n        return next;\n      });\n    }, DELETE_MESSAGE_ROTATION_INTERVAL_MS);\n    return () => clearInterval(interval);\n  }, []);\n\n  return (\n    <Text as=\"p\" text03 className=\"animate-subtle-pulse\">\n      {DELETING_MESSAGES[messageIndex]}\n    </Text>\n  );\n}\n\n// ============================================================================\n// Build Session Button\n// ============================================================================\n\ninterface BuildSessionButtonProps {\n  historyItem: SessionHistoryItem;\n  isActive: boolean;\n  onLoad: () => void;\n  onRename: (newName: string) => Promise<void>;\n  onDelete: () => Promise<void>;\n  onDeleteActiveSession?: () => void;\n}\n\nfunction BuildSessionButton({\n  historyItem,\n  isActive,\n  onLoad,\n  onRename,\n  onDelete,\n  onDeleteActiveSession,\n}: BuildSessionButtonProps) {\n  const [renaming, setRenaming] = useState(false);\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);\n  const [isDeleting, setIsDeleting] = useState(false);\n  const [deleteSuccess, setDeleteSuccess] = useState(false);\n  const [deleteError, setDeleteError] = useState<string | null>(null);\n  const deleteTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n\n  // Track title changes for typewriter animation (only for auto-naming, not manual rename)\n  const prevTitleRef = useRef(historyItem.title);\n  const [shouldAnimate, setShouldAnimate] = useState(false);\n\n  // Detect when title changes from \"Fresh Craft\" to a real name (auto-naming)\n  useEffect(() => {\n    const prevTitle = prevTitleRef.current;\n    if (\n      prevTitle !== historyItem.title &&\n      prevTitle === \"Fresh Craft\" &&\n      !renaming\n    ) {\n      setShouldAnimate(true);\n    }\n    prevTitleRef.current = historyItem.title;\n  }, [historyItem.title, renaming]);\n\n  const closeModal = useCallback(() => {\n    if (deleteTimeoutRef.current) {\n      clearTimeout(deleteTimeoutRef.current);\n      deleteTimeoutRef.current = null;\n    }\n    setIsDeleteModalOpen(false);\n    setPopoverOpen(false);\n    setDeleteSuccess(false);\n    setDeleteError(null);\n    setIsDeleting(false);\n  }, []);\n\n  const handleConfirmDelete = useCallback(\n    async (e: React.MouseEvent<HTMLButtonElement>) => {\n      e.stopPropagation();\n      setIsDeleting(true);\n      setDeleteError(null);\n\n      try {\n        await onDelete();\n        setIsDeleting(false);\n        setDeleteSuccess(true);\n        // Show success briefly, then close and redirect if needed\n        deleteTimeoutRef.current = setTimeout(() => {\n          closeModal();\n          if (isActive && onDeleteActiveSession) {\n            onDeleteActiveSession();\n          }\n        }, DELETE_SUCCESS_DISPLAY_DURATION_MS);\n      } catch (err) {\n        setIsDeleting(false);\n        setDeleteError(\n          err instanceof Error ? err.message : \"Failed to delete session\"\n        );\n      }\n    },\n    [onDelete, closeModal, isActive, onDeleteActiveSession]\n  );\n\n  const rightMenu = (\n    <>\n      <Popover.Trigger asChild onClick={noProp()}>\n        <div>\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <IconButton\n            icon={SvgMoreHorizontal}\n            className={cn(\n              !popoverOpen && \"hidden\",\n              !renaming && \"group-hover/SidebarTab:flex\"\n            )}\n            transient={popoverOpen}\n            internal\n          />\n        </div>\n      </Popover.Trigger>\n      <Popover.Content side=\"right\" align=\"start\">\n        <PopoverMenu>\n          {[\n            <LineItem\n              key=\"rename\"\n              icon={SvgEdit}\n              onClick={noProp(() => setRenaming(true))}\n            >\n              Rename\n            </LineItem>,\n            null,\n            <LineItem\n              key=\"delete\"\n              icon={SvgTrash}\n              onClick={noProp(() => setIsDeleteModalOpen(true))}\n              danger\n            >\n              Delete\n            </LineItem>,\n          ]}\n        </PopoverMenu>\n      </Popover.Content>\n    </>\n  );\n\n  return (\n    <>\n      <Popover\n        onOpenChange={(state) => {\n          setPopoverOpen(state);\n        }}\n      >\n        <Popover.Anchor>\n          <SidebarTab\n            onClick={onLoad}\n            selected={isActive}\n            rightChildren={rightMenu}\n          >\n            {renaming ? (\n              <ButtonRenaming\n                initialName={historyItem.title}\n                onRename={onRename}\n                onClose={() => setRenaming(false)}\n              />\n            ) : shouldAnimate ? (\n              <Text\n                as=\"p\"\n                data-state={isActive ? \"active\" : \"inactive\"}\n                className=\"line-clamp-1 break-all text-left\"\n                mainUiBody\n              >\n                <TypewriterText\n                  text={historyItem.title}\n                  charSpeed={25}\n                  animateOnMount={true}\n                  onAnimationComplete={() => setShouldAnimate(false)}\n                />\n              </Text>\n            ) : (\n              historyItem.title\n            )}\n          </SidebarTab>\n        </Popover.Anchor>\n      </Popover>\n      {isDeleteModalOpen && (\n        <ConfirmationModalLayout\n          title={\n            deleteSuccess\n              ? \"Deleted\"\n              : deleteError\n                ? \"Delete Failed\"\n                : \"Delete Craft\"\n          }\n          icon={deleteSuccess ? SvgCheckCircle : SvgTrash}\n          onClose={isDeleting || deleteSuccess ? undefined : closeModal}\n          hideCancel={isDeleting || deleteSuccess}\n          twoTone={!isDeleting && !deleteSuccess && !deleteError}\n          submit={\n            deleteSuccess ? (\n              <Button disabled variant=\"action\" icon={SvgCheckCircle}>\n                Done\n              </Button>\n            ) : deleteError ? (\n              <Button variant=\"danger\" onClick={closeModal}>\n                Close\n              </Button>\n            ) : (\n              <Button\n                disabled={isDeleting}\n                variant=\"danger\"\n                onClick={handleConfirmDelete}\n                icon={isDeleting ? SimpleLoader : undefined}\n              >\n                {isDeleting ? \"Deleting...\" : \"Delete\"}\n              </Button>\n            )\n          }\n        >\n          {deleteSuccess ? (\n            <Text as=\"p\" text03>\n              Build deleted successfully.\n            </Text>\n          ) : deleteError ? (\n            <Text as=\"p\" text03 className=\"text-status-error-02\">\n              {deleteError}\n            </Text>\n          ) : isDeleting ? (\n            <DeletingMessage />\n          ) : (\n            \"Are you sure you want to delete this craft? This action cannot be undone.\"\n          )}\n        </ConfirmationModalLayout>\n      )}\n    </>\n  );\n}\n\n// ============================================================================\n// Build Sidebar Inner\n// ============================================================================\n\ninterface BuildSidebarInnerProps {\n  folded: boolean;\n  onFoldClick: () => void;\n}\n\nconst MemoizedBuildSidebarInner = memo(\n  ({ folded, onFoldClick }: BuildSidebarInnerProps) => {\n    const router = useRouter();\n    const pathname = usePathname();\n    const session = useSession();\n    const sessionHistory = useSessionHistory();\n    // Access actions directly like chat does - these don't cause re-renders\n    const renameBuildSession = useBuildSessionStore(\n      (state) => state.renameBuildSession\n    );\n    const deleteBuildSession = useBuildSessionStore(\n      (state) => state.deleteBuildSession\n    );\n    const refreshSessionHistory = useBuildSessionStore(\n      (state) => state.refreshSessionHistory\n    );\n    const { limits, isEnabled } = useUsageLimits();\n\n    // Fetch session history on mount\n    useEffect(() => {\n      refreshSessionHistory();\n    }, [refreshSessionHistory]);\n\n    // Build section title with usage if cloud is enabled\n    // limit=0 indicates unlimited (local/self-hosted mode), so hide the count\n    const sessionsTitle = useMemo(() => {\n      if (isEnabled && limits && limits.limit > 0) {\n        return `Total Messages (${limits.messagesUsed}/${limits.limit})`;\n      }\n      return \"Sessions\";\n    }, [isEnabled, limits]);\n\n    // Navigate to new build - session controller handles setCurrentSession and pre-provisioning\n    const handleNewBuild = useCallback(() => {\n      router.push(CRAFT_PATH);\n    }, [router]);\n\n    const handleLoadSession = useCallback(\n      (sessionId: string) => {\n        router.push(\n          `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${sessionId}`\n        );\n      },\n      [router]\n    );\n\n    const newBuildButton = useMemo(\n      () => (\n        <SidebarTab icon={SvgEditBig} folded={folded} onClick={handleNewBuild}>\n          Start Crafting\n        </SidebarTab>\n      ),\n      [folded, handleNewBuild]\n    );\n\n    const buildConfigurePanel = useMemo(\n      () => (\n        <SidebarTab\n          icon={SvgSettings}\n          folded={folded}\n          href={CRAFT_CONFIGURE_PATH}\n          selected={pathname.startsWith(CRAFT_CONFIGURE_PATH)}\n        >\n          Configure\n        </SidebarTab>\n      ),\n      [folded, pathname]\n    );\n\n    const backToChatButton = useMemo(\n      () => (\n        <SidebarTab icon={SvgArrowLeft} folded={folded} href=\"/app\">\n          Back to Chat\n        </SidebarTab>\n      ),\n      [folded]\n    );\n\n    const footer = useMemo(\n      () => (\n        <div>\n          {backToChatButton}\n          <UserAvatarPopover folded={folded} />\n        </div>\n      ),\n      [folded, backToChatButton]\n    );\n\n    return (\n      <SidebarWrapper folded={folded} onFoldClick={onFoldClick}>\n        <SidebarBody\n          pinnedContent={\n            <div className=\"flex flex-col gap-0.5\">\n              {newBuildButton}\n              {buildConfigurePanel}\n            </div>\n          }\n          footer={footer}\n          scrollKey=\"build-sidebar\"\n        >\n          {!folded && (\n            <SidebarSection title={sessionsTitle}>\n              {sessionHistory.length === 0 ? (\n                <div className=\"pl-2 pr-1.5 py-1\">\n                  <Text text01>\n                    Start building! Session history will appear here.\n                  </Text>\n                </div>\n              ) : (\n                sessionHistory.map((historyItem) => (\n                  <BuildSessionButton\n                    key={historyItem.id}\n                    historyItem={historyItem}\n                    isActive={\n                      !pathname.startsWith(CRAFT_CONFIGURE_PATH) &&\n                      session?.id === historyItem.id\n                    }\n                    onLoad={() => handleLoadSession(historyItem.id)}\n                    onRename={(newName) =>\n                      renameBuildSession(historyItem.id, newName)\n                    }\n                    onDelete={() => deleteBuildSession(historyItem.id)}\n                    onDeleteActiveSession={\n                      session?.id === historyItem.id\n                        ? () => router.push(CRAFT_PATH)\n                        : undefined\n                    }\n                  />\n                ))\n              )}\n            </SidebarSection>\n          )}\n        </SidebarBody>\n      </SidebarWrapper>\n    );\n  }\n);\n\nMemoizedBuildSidebarInner.displayName = \"BuildSidebarInner\";\n\n// ============================================================================\n// Build Sidebar (Main Export)\n// ============================================================================\n\nexport default function BuildSidebar() {\n  const { leftSidebarFolded, setLeftSidebarFolded } = useBuildContext();\n  const { isMobile } = useScreenSize();\n\n  if (!isMobile)\n    return (\n      <MemoizedBuildSidebarInner\n        folded={leftSidebarFolded}\n        onFoldClick={() => setLeftSidebarFolded((prev) => !prev)}\n      />\n    );\n\n  return (\n    <>\n      <div\n        className={cn(\n          \"fixed inset-y-0 left-0 z-50 transition-transform duration-200\",\n          leftSidebarFolded ? \"-translate-x-full\" : \"translate-x-0\"\n        )}\n      >\n        <MemoizedBuildSidebarInner\n          folded={false}\n          onFoldClick={() => setLeftSidebarFolded(true)}\n        />\n      </div>\n\n      {/* Hitbox to close the sidebar if anything outside of it is touched */}\n      <div\n        className={cn(\n          \"fixed inset-0 z-40 bg-mask-03 backdrop-blur-03 transition-opacity duration-200\",\n          leftSidebarFolded\n            ? \"opacity-0 pointer-events-none\"\n            : \"opacity-100 pointer-events-auto\"\n        )}\n        onClick={() => setLeftSidebarFolded(true)}\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/SuggestedPrompts.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  getPromptsForPersona,\n  UserPersona,\n  BuildPrompt,\n} from \"@/app/craft/constants/exampleBuildPrompts\";\n\ninterface SuggestedPromptsProps {\n  persona?: UserPersona;\n  onPromptClick: (promptText: string) => void;\n}\n\n/**\n * Shuffles an array using Fisher-Yates algorithm\n */\nfunction shuffleArray<T>(array: T[]): T[] {\n  const shuffled = [...array];\n  for (let i = shuffled.length - 1; i > 0; i--) {\n    const j = Math.floor(Math.random() * (i + 1));\n    const temp = shuffled[i]!;\n    shuffled[i] = shuffled[j]!;\n    shuffled[j] = temp;\n  }\n  return shuffled;\n}\n\n/**\n * Randomly selects 4 prompts from the available prompts\n */\nfunction selectRandomPrompts(prompts: BuildPrompt[]): BuildPrompt[] {\n  const shuffled = shuffleArray(prompts);\n  return shuffled.slice(0, 4);\n}\n\n/**\n * SuggestedPrompts - Displays clickable prompt suggestions in a 2x2 grid\n *\n * Shows a 2x2 grid of example prompts based on user persona.\n * Each prompt has summary text on top and a cropped image below it.\n * Clicking a prompt triggers the onPromptClick callback.\n * Randomly selects 4 prompts from the available prompts for the persona.\n * Shuffles on every component mount (when user returns) and when persona changes.\n */\nexport default function SuggestedPrompts({\n  persona = \"default\",\n  onPromptClick,\n}: SuggestedPromptsProps) {\n  // Randomly select 4 prompts - shuffles on mount and when persona changes\n  const [gridPrompts, setGridPrompts] = useState<BuildPrompt[]>(() => {\n    const prompts = getPromptsForPersona(persona);\n    return selectRandomPrompts(prompts);\n  });\n\n  // Reshuffle when persona changes\n  useEffect(() => {\n    const prompts = getPromptsForPersona(persona);\n    setGridPrompts(selectRandomPrompts(prompts));\n  }, [persona]);\n\n  return (\n    <div className=\"mt-4 w-full grid grid-cols-2 gap-4\">\n      {gridPrompts.map((prompt) => (\n        <button\n          key={prompt.id}\n          onClick={() => onPromptClick(prompt.fullText)}\n          className={cn(\n            \"flex flex-col items-center gap-2\",\n            \"p-4 rounded-12\",\n            \"bg-background-neutral-00 border border-border-01\",\n            \"hover:bg-background-neutral-01 hover:border-border-02\",\n            \"transition-all duration-200\",\n            \"cursor-pointer\",\n            \"focus:outline-none focus:ring-2 focus:ring-action-link-01 focus:ring-offset-2\"\n          )}\n        >\n          {/* Summary text */}\n          <span className=\"text-sm text-text-04 text-center leading-tight\">\n            {prompt.summary}\n          </span>\n          {/* Image resized to cut in half height (4:1 aspect ratio) */}\n          {prompt.image && (\n            <div className=\"w-full aspect-[3/1] rounded-08 overflow-hidden bg-background-neutral-01\">\n              <img\n                src={prompt.image}\n                alt={prompt.summary}\n                className=\"w-full h-full object-cover object-top\"\n              />\n            </div>\n          )}\n        </button>\n      ))}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/SuggestionBubbles.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport { SuggestionBubble } from \"@/app/craft/hooks/useBuildSessionStore\";\n\ninterface SuggestionBubblesProps {\n  suggestions: SuggestionBubble[];\n  loading?: boolean;\n  onSelect: (text: string) => void;\n}\n\n/**\n * Get theme-specific styles for suggestion bubbles\n */\nfunction getThemeStyles(theme: string): string {\n  // Match user message styling - same gray background\n  switch (theme) {\n    case \"add\":\n    case \"question\":\n    default:\n      // Same gray as user messages\n      return \"bg-background-tint-02 hover:bg-background-tint-03\";\n  }\n}\n\n/**\n * Displays follow-up suggestion bubbles after the first agent message.\n * Styled like user chat messages - stacked vertically and right-aligned.\n * Each bubble is clickable and populates the input bar with the suggestion text.\n */\nexport default function SuggestionBubbles({\n  suggestions,\n  loading,\n  onSelect,\n}: SuggestionBubblesProps) {\n  if (loading) {\n    return (\n      <div className=\"flex flex-col items-end gap-2\">\n        {/* Loading skeleton bubbles - right aligned */}\n        {[1, 2].map((i) => (\n          <div\n            key={i}\n            className=\"h-10 w-48 bg-background-neutral-01 rounded-16 animate-pulse\"\n          />\n        ))}\n      </div>\n    );\n  }\n\n  if (!suggestions || suggestions.length === 0) return null;\n\n  return (\n    <div className=\"flex flex-col items-end gap-3\">\n      {suggestions.map((suggestion, idx) => (\n        <button\n          key={idx}\n          onClick={() => onSelect(suggestion.text)}\n          className={cn(\n            \"px-4 py-3 rounded-t-16 rounded-bl-16 text-sm text-left\",\n            \"text-text-03 transition-colors cursor-pointer\",\n            \"max-w-[95%] shadow-01\",\n            \"animate-in fade-in duration-500\",\n            getThemeStyles(suggestion.theme)\n          )}\n          style={{\n            animationDelay: `${idx * 100}ms`,\n            animationFillMode: \"both\",\n          }}\n        >\n          {suggestion.text}\n        </button>\n      ))}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/TextChunk.tsx",
    "content": "\"use client\";\n\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\n\ninterface TextChunkProps {\n  content: string;\n}\n\n/**\n * TextChunk - Renders markdown text content\n *\n * Uses MinimalMarkdown for consistent rendering with the main chat.\n */\nexport default function TextChunk({ content }: TextChunkProps) {\n  if (!content) return null;\n\n  return (\n    <div className=\"py-1\">\n      <MinimalMarkdown content={content} className=\"text-text-05\" />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ThinkingCard.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport { SvgChevronDown, SvgBubbleText } from \"@opal/icons\";\n\ninterface ThinkingCardProps {\n  content: string;\n  isStreaming: boolean;\n}\n\n/**\n * ThinkingCard - Expandable card for agent thinking content\n *\n * Starts open and stays open. User can manually toggle.\n */\nexport default function ThinkingCard({\n  content,\n  isStreaming,\n}: ThinkingCardProps) {\n  const [isOpen, setIsOpen] = useState(true);\n\n  if (!content) return null;\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n      <div\n        className={cn(\n          \"w-full border-[0.5px] rounded-lg overflow-hidden transition-colors\",\n          \"hover:bg-background-tint-02\",\n          isStreaming\n            ? \"border-theme-blue-02 bg-theme-blue-01\"\n            : \"border-border-01 bg-background-neutral-01\"\n        )}\n      >\n        <CollapsibleTrigger asChild>\n          <button\n            className={cn(\n              \"w-full flex items-center justify-between gap-2 px-3 py-2\",\n              \"transition-colors text-left\"\n            )}\n          >\n            <div className=\"flex items-center gap-2\">\n              <SvgBubbleText\n                className={cn(\n                  \"size-4\",\n                  isStreaming ? \"stroke-theme-blue-05\" : \"stroke-text-03\"\n                )}\n              />\n              <span\n                className={cn(\n                  \"text-sm font-medium\",\n                  isStreaming ? \"text-theme-blue-05\" : \"text-text-04\"\n                )}\n              >\n                Thinking\n              </span>\n              {isStreaming && (\n                <span className=\"text-xs text-theme-blue-04 animate-pulse\">\n                  ...\n                </span>\n              )}\n            </div>\n            <SvgChevronDown\n              className={cn(\n                \"size-4 stroke-text-03 transition-transform duration-150\",\n                !isOpen && \"rotate-[-90deg]\"\n              )}\n            />\n          </button>\n        </CollapsibleTrigger>\n\n        <CollapsibleContent>\n          <div className=\"px-3 pb-3 pt-0\">\n            <div\n              className={cn(\n                \"p-3 rounded-08 text-sm\",\n                \"bg-background-neutral-02 text-text-03\",\n                \"max-h-48 overflow-y-auto\",\n                \"italic\"\n              )}\n            >\n              <p className=\"whitespace-pre-wrap break-words m-0\">{content}</p>\n            </div>\n          </div>\n        </CollapsibleContent>\n      </div>\n    </Collapsible>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/TodoListCard.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport { SvgChevronDown, SvgCheckCircle } from \"@opal/icons\";\nimport {\n  TodoListState,\n  TodoItem,\n  TodoStatus,\n} from \"@/app/craft/types/displayTypes\";\n\ninterface TodoListCardProps {\n  todoList: TodoListState;\n  /** Whether this card should be open by default */\n  defaultOpen?: boolean;\n}\n\n/**\n * Get status icon for a todo item\n */\nfunction getStatusIcon(status: TodoStatus) {\n  switch (status) {\n    case \"completed\":\n      return (\n        <SvgCheckCircle className=\"size-4 stroke-status-success-05 mt-0.5 shrink-0\" />\n      );\n    case \"in_progress\":\n      // Gray circle with inset filled circle to indicate work in progress\n      return (\n        <div className=\"size-4 rounded-full border-2 border-text-03 mt-0.5 shrink-0 flex items-center justify-center\">\n          <div className=\"size-2 bg-text-03 rounded-full\" />\n        </div>\n      );\n    case \"pending\":\n    default:\n      return (\n        <div className=\"size-4 rounded-full border-2 border-text-03 mt-0.5 shrink-0\" />\n      );\n  }\n}\n\n/**\n * Single todo item row\n */\nfunction TodoItemRow({ todo }: { todo: TodoItem }) {\n  return (\n    <div className=\"flex items-start gap-2 py-1\">\n      {/* Status indicator */}\n      {getStatusIcon(todo.status)}\n\n      {/* Task text - show activeForm when in_progress, otherwise content */}\n      <span\n        className={cn(\n          \"text-sm\",\n          todo.status === \"completed\"\n            ? \"text-text-03 line-through\"\n            : \"text-text-04\"\n        )}\n      >\n        {todo.status === \"in_progress\" ? todo.activeForm : todo.content}\n      </span>\n    </div>\n  );\n}\n\n/**\n * TodoListCard - Collapsible card showing a list of todo items\n *\n * Features:\n * - Shows progress count (e.g., \"3/5 completed\")\n * - Spinner in header when any item is in_progress\n * - Auto-collapses when new todo list appears (controlled by parent)\n * - Items show different states: pending (empty circle), in_progress (spinner), completed (checkmark)\n */\nexport default function TodoListCard({\n  todoList,\n  defaultOpen = true,\n}: TodoListCardProps) {\n  const [isOpen, setIsOpen] = useState(defaultOpen);\n\n  // Update isOpen when defaultOpen changes (for auto-collapse behavior)\n  useEffect(() => {\n    setIsOpen(defaultOpen);\n  }, [defaultOpen]);\n\n  // Calculate progress stats\n  const total = todoList.todos.length;\n  const completed = todoList.todos.filter(\n    (t) => t.status === \"completed\"\n  ).length;\n\n  // Determine background color based on state\n  // Only two states: gray (default) and green (completed)\n  const allCompleted = completed === total && total > 0;\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n      <div\n        className={cn(\n          \"w-full border-[0.5px] rounded-lg overflow-hidden\",\n          allCompleted\n            ? \"bg-status-success-01 border-status-success-01\"\n            : \"bg-background-neutral-01 border-border-01\"\n        )}\n      >\n        <CollapsibleTrigger asChild>\n          <button\n            className={cn(\n              \"w-full flex items-center justify-between px-3 py-2\",\n              \"hover:bg-background-tint-02 transition-colors text-left\"\n            )}\n          >\n            <div className=\"flex items-center gap-2 min-w-0 flex-1\">\n              {/* Status indicator in header - no spinner, only static icons */}\n              {allCompleted ? (\n                <SvgCheckCircle className=\"size-4 stroke-status-success-05 shrink-0\" />\n              ) : (\n                <div className=\"size-4 rounded border-2 border-text-03 shrink-0 flex items-center justify-center\">\n                  <div className=\"size-2 bg-text-03 rounded-sm\" />\n                </div>\n              )}\n\n              {/* Title */}\n              <span className=\"text-sm font-medium text-text-04\">Tasks</span>\n\n              {/* Progress count */}\n              <span className=\"text-xs text-text-03\">\n                {completed}/{total} completed\n              </span>\n            </div>\n\n            {/* Expand arrow */}\n            <SvgChevronDown\n              className={cn(\n                \"size-4 stroke-text-03 transition-transform duration-150 shrink-0\",\n                !isOpen && \"rotate-[-90deg]\"\n              )}\n            />\n          </button>\n        </CollapsibleTrigger>\n\n        <CollapsibleContent>\n          <div className=\"px-3 pb-3 pt-0 space-y-0.5\">\n            {todoList.todos.map((todo, index) => (\n              <TodoItemRow key={`${todoList.id}-${index}`} todo={todo} />\n            ))}\n            {todoList.todos.length === 0 && (\n              <span className=\"text-sm text-text-03 italic\">No tasks</span>\n            )}\n          </div>\n        </CollapsibleContent>\n      </div>\n    </Collapsible>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ToggleWarningModal.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface ToggleWarningModalProps {\n  open: boolean;\n  onConfirm: () => void;\n  onCancel: () => void;\n}\n\nexport function ToggleWarningModal({\n  open,\n  onConfirm,\n  onCancel,\n}: ToggleWarningModalProps) {\n  if (!open) return null;\n\n  return (\n    <div className=\"fixed inset-0 z-[1400] flex items-center justify-center\">\n      {/* Backdrop */}\n      <div\n        className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\"\n        onClick={(e) => {\n          e.stopPropagation();\n          onCancel();\n        }}\n      />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6\">\n          {/* Header */}\n          <div className=\"flex items-center justify-center\">\n            <Text headingH2 text05>\n              Show all models?\n            </Text>\n          </div>\n\n          {/* Message */}\n          <div className=\"flex justify-center\">\n            <Text mainUiBody text04 className=\"text-center\">\n              We recommend using <strong>Claude Opus 4.6</strong> for Crafting.\n              <br />\n              Other models may have reduced capabilities for code creation,\n              <br />\n              data analysis, and artifact creation.\n            </Text>\n          </div>\n\n          {/* Action buttons */}\n          <div className=\"flex items-center justify-center gap-3\">\n            <button\n              type=\"button\"\n              onClick={(e) => {\n                e.stopPropagation();\n                onConfirm();\n              }}\n              className=\"px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors\"\n            >\n              <Text mainUiBody text05>\n                Show All Models\n              </Text>\n            </button>\n            <button\n              type=\"button\"\n              onClick={(e) => {\n                e.stopPropagation();\n                onCancel();\n              }}\n              className=\"px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors\"\n            >\n              <Text\n                mainUiAction\n                className=\"text-text-light-05 dark:text-text-dark-05\"\n              >\n                Keep Recommended\n              </Text>\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/ToolCallPill.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport {\n  SvgChevronDown,\n  SvgTerminalSmall,\n  SvgFileText,\n  SvgEdit,\n  SvgLoader,\n  SvgCheckSquare,\n  SvgAlertCircle,\n  SvgBubbleText,\n} from \"@opal/icons\";\nimport RawOutputBlock from \"@/app/craft/components/RawOutputBlock\";\nimport DiffView from \"@/app/craft/components/DiffView\";\nimport { ToolCallState, ToolCallKind } from \"@/app/craft/types/displayTypes\";\n\ninterface ToolCallPillProps {\n  toolCall: ToolCallState;\n}\n\n/**\n * Get icon based on tool kind\n */\nfunction getToolIcon(kind: ToolCallKind) {\n  switch (kind) {\n    case \"execute\":\n      return SvgTerminalSmall;\n    case \"read\":\n      return SvgFileText;\n    case \"task\":\n      return SvgBubbleText;\n    case \"other\":\n      return SvgEdit;\n    default:\n      return SvgTerminalSmall;\n  }\n}\n\n/**\n * Get status icon and color\n */\nfunction getStatusDisplay(status: string) {\n  switch (status) {\n    case \"pending\":\n      return {\n        icon: null,\n        iconClass: \"stroke-status-info-05\",\n        bgClass: \"bg-status-info-01 border-status-info-01\",\n        showSpinner: true,\n      };\n    case \"in_progress\":\n      return {\n        icon: null,\n        iconClass: \"stroke-status-info-05\",\n        bgClass: \"bg-status-info-01 border-status-info-01\",\n        showSpinner: true,\n      };\n    case \"completed\":\n      return {\n        icon: SvgCheckSquare,\n        iconClass: \"stroke-status-success-05\",\n        bgClass: \"bg-background-neutral-01 border-border-01\",\n        showSpinner: false,\n      };\n    case \"failed\":\n      return {\n        icon: SvgAlertCircle,\n        iconClass: \"stroke-status-error-05\",\n        bgClass: \"bg-status-error-01 border-status-error-01\",\n        showSpinner: false,\n      };\n    default:\n      return {\n        icon: null,\n        iconClass: \"stroke-text-03\",\n        bgClass: \"bg-background-neutral-01 border-border-01\",\n        showSpinner: false,\n      };\n  }\n}\n\n/**\n * Get language hint for syntax highlighting based on tool kind and title\n */\nfunction getLanguageHint(toolCall: ToolCallState): string | undefined {\n  // Search results (glob/grep) - no highlighting for file lists\n  if (\n    toolCall.title === \"Searching files\" ||\n    toolCall.title === \"Searching content\" ||\n    toolCall.title === \"Searching\"\n  ) {\n    return undefined;\n  }\n\n  switch (toolCall.kind) {\n    case \"execute\":\n      return \"bash\";\n    case \"task\":\n      return \"markdown\";\n    case \"read\":\n    case \"other\":\n      // Use description (file path) for syntax detection\n      return toolCall.description;\n    default:\n      return undefined;\n  }\n}\n\n/**\n * ToolCallPill - Expandable pill for tool calls\n *\n * Shows description and command in collapsed state.\n * Expands to show raw output.\n *\n * Status icons:\n * - pending: gray circle\n * - in_progress: blue spinner\n * - completed: green checkmark\n * - failed: red X\n */\nexport default function ToolCallPill({ toolCall }: ToolCallPillProps) {\n  const [isOpen, setIsOpen] = useState(false);\n\n  const Icon = getToolIcon(toolCall.kind);\n  const statusDisplay = getStatusDisplay(toolCall.status);\n  const StatusIcon = statusDisplay.icon;\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n      <div\n        className={cn(\n          \"w-full border-[0.5px] rounded-lg overflow-hidden transition-colors\",\n          \"hover:bg-background-tint-02\",\n          statusDisplay.bgClass\n        )}\n      >\n        <CollapsibleTrigger asChild>\n          <button\n            className={cn(\n              \"w-full flex flex-col gap-1 px-3 py-2\",\n              \"transition-colors text-left\"\n            )}\n          >\n            {/* Top row: status icon + title + description + expand arrow */}\n            <div className=\"flex items-center justify-between gap-2 w-full\">\n              <div className=\"flex items-center gap-2 min-w-0 flex-1\">\n                {/* Status indicator */}\n                {statusDisplay.showSpinner ? (\n                  <SvgLoader className=\"size-4 stroke-status-info-05 animate-spin shrink-0\" />\n                ) : StatusIcon ? (\n                  <StatusIcon\n                    className={cn(\"size-4 shrink-0\", statusDisplay.iconClass)}\n                  />\n                ) : (\n                  <Icon className=\"size-4 stroke-text-03 shrink-0\" />\n                )}\n\n                {/* Title (action) */}\n                <span className=\"text-sm font-medium text-text-04 shrink-0\">\n                  {toolCall.title}\n                </span>\n\n                {/* Description (target) */}\n                {toolCall.description && (\n                  <span className=\"text-sm text-text-03 truncate\">\n                    {toolCall.description}\n                  </span>\n                )}\n              </div>\n\n              {/* Expand arrow */}\n              <SvgChevronDown\n                className={cn(\n                  \"size-4 stroke-text-03 transition-transform duration-150 shrink-0\",\n                  !isOpen && \"rotate-[-90deg]\"\n                )}\n              />\n            </div>\n\n            {/* Bottom row: command in monospace (for execute tools) */}\n            {toolCall.kind === \"execute\" && toolCall.command && (\n              <div\n                className=\"text-xs text-text-03 truncate pl-6\"\n                style={{ fontFamily: \"var(--font-dm-mono)\" }}\n              >\n                {toolCall.command}\n              </div>\n            )}\n          </button>\n        </CollapsibleTrigger>\n\n        <CollapsibleContent>\n          <div className=\"px-3 pb-3 pt-0\">\n            {/* Show diff view for edit operations (not new files) */}\n            {toolCall.title === \"Editing file\" &&\n            toolCall.oldContent !== undefined &&\n            toolCall.newContent !== undefined ? (\n              <DiffView\n                oldContent={toolCall.oldContent}\n                newContent={toolCall.newContent}\n                maxHeight=\"300px\"\n                filePath={toolCall.description}\n              />\n            ) : (\n              <RawOutputBlock\n                content={toolCall.rawOutput}\n                maxHeight=\"300px\"\n                language={getLanguageHint(toolCall)}\n              />\n            )}\n          </div>\n        </CollapsibleContent>\n      </div>\n    </Collapsible>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/TypewriterText.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef, memo } from \"react\";\n\ninterface TypewriterTextProps {\n  /** The text to display with typewriter animation */\n  text: string;\n  /** Speed of each character animation in ms (default: 30) */\n  charSpeed?: number;\n  /** Whether to animate on initial render (default: false) */\n  animateOnMount?: boolean;\n  /** Class name for the text container */\n  className?: string;\n  /** Callback when animation completes */\n  onAnimationComplete?: () => void;\n}\n\n/**\n * TypewriterText - Animates text changes with a delete-then-type effect.\n *\n * When text changes:\n * 1. Old text is deleted character by character (from end to start)\n * 2. New text is typed character by character (from start to end)\n *\n * This creates a smooth \"rename\" animation effect for session titles.\n */\nfunction TypewriterText({\n  text,\n  charSpeed = 30,\n  animateOnMount = false,\n  className = \"\",\n  onAnimationComplete,\n}: TypewriterTextProps) {\n  // Track the currently displayed text\n  const [displayedText, setDisplayedText] = useState(\n    animateOnMount ? \"\" : text\n  );\n  // Track whether we're in the \"deleting\" or \"typing\" phase\n  const [isDeleting, setIsDeleting] = useState(false);\n  // Store the target text we're animating towards\n  const targetTextRef = useRef(text);\n  // Store the previous text for comparison\n  // When animateOnMount is true, initialize to empty so animation triggers\n  const prevTextRef = useRef(animateOnMount ? \"\" : text);\n  // Track if this is the first render\n  const isFirstRender = useRef(true);\n  // Animation frame ID for cleanup\n  const animationRef = useRef<NodeJS.Timeout | null>(null);\n\n  useEffect(() => {\n    // Skip animation on first render unless animateOnMount is true\n    if (isFirstRender.current) {\n      isFirstRender.current = false;\n      if (!animateOnMount) {\n        setDisplayedText(text);\n        prevTextRef.current = text;\n        targetTextRef.current = text;\n        return;\n      }\n      // When animateOnMount is true, we want to animate from empty to text\n      // So we skip the delete phase and go straight to typing\n      // Set prevTextRef to empty so subsequent renders don't trigger delete phase\n      prevTextRef.current = \"\";\n      targetTextRef.current = text;\n      setIsDeleting(false); // Start in typing phase\n      return;\n    }\n\n    // If text hasn't changed, no animation needed\n    if (text === prevTextRef.current) {\n      return;\n    }\n\n    // If we're currently animating from empty (animateOnMount case), don't restart\n    // This happens when prevTextRef is \"\" (from animateOnMount) and we're typing\n    if (\n      prevTextRef.current === \"\" &&\n      displayedText.length < targetTextRef.current.length &&\n      !isDeleting\n    ) {\n      // We're in the middle of typing from animateOnMount, don't interrupt\n      return;\n    }\n\n    // Clear any existing animation\n    if (animationRef.current) {\n      clearTimeout(animationRef.current);\n    }\n\n    // Update target and start deleting phase\n    targetTextRef.current = text;\n    setIsDeleting(true);\n\n    return () => {\n      if (animationRef.current) {\n        clearTimeout(animationRef.current);\n      }\n    };\n  }, [text, animateOnMount]);\n\n  useEffect(() => {\n    // Handle the animation loop\n    if (isDeleting) {\n      // Deleting phase: remove characters from the end\n      if (displayedText.length > 0) {\n        animationRef.current = setTimeout(() => {\n          setDisplayedText((prev) => prev.slice(0, -1));\n        }, charSpeed);\n      } else {\n        // Done deleting, switch to typing phase\n        setIsDeleting(false);\n        prevTextRef.current = targetTextRef.current;\n      }\n    } else {\n      // Typing phase: add characters from the target\n      const target = targetTextRef.current;\n      if (displayedText.length < target.length) {\n        animationRef.current = setTimeout(() => {\n          setDisplayedText(target.slice(0, displayedText.length + 1));\n        }, charSpeed);\n      } else if (\n        displayedText.length === target.length &&\n        displayedText === target\n      ) {\n        // Animation complete - update prevTextRef so future changes are detected\n        prevTextRef.current = target;\n        onAnimationComplete?.();\n      }\n    }\n\n    return () => {\n      if (animationRef.current) {\n        clearTimeout(animationRef.current);\n      }\n    };\n  }, [displayedText, isDeleting, charSpeed, onAnimationComplete]);\n\n  return <span className={className}>{displayedText}</span>;\n}\n\nexport default memo(TypewriterText);\n"
  },
  {
    "path": "web/src/app/craft/components/UpgradePlanModal.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgAlertTriangle } from \"@opal/icons\";\nimport { UsageLimits } from \"@/app/craft/types/streamingTypes\";\n\ninterface UpgradePlanModalProps {\n  open: boolean;\n  onClose: () => void;\n  limits: UsageLimits | null;\n}\n\n/**\n * Modal shown when users hit their message limit.\n * Shows different messaging for free (total limit) vs paid (weekly limit) users.\n */\nexport default function UpgradePlanModal({\n  open,\n  onClose,\n  limits,\n}: UpgradePlanModalProps) {\n  if (!open) return null;\n\n  const isPaidUser = limits?.limitType === \"weekly\";\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      <div\n        className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\"\n        onClick={onClose}\n      />\n\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6 min-h-[300px]\">\n          <div className=\"flex-1 flex flex-col items-center justify-center gap-6\">\n            <SvgAlertTriangle className=\"w-16 h-16 text-status-warning-02\" />\n\n            <div className=\"flex flex-col items-center gap-2 text-center\">\n              <Text headingH2 text05>\n                You've reached your message limit\n              </Text>\n              <Text mainUiBody text03 className=\"max-w-sm\">\n                {isPaidUser ? (\n                  <>\n                    You've used all {limits?.limit ?? 25} messages for this\n                    week. Your message limit will automatically reset at the\n                    start of each week, allowing you to continue crafting with\n                    Onyx.\n                  </>\n                ) : (\n                  <>\n                    You've used all {limits?.limit ?? 5} free messages available\n                    in your trial. You've reached the limit for your free\n                    account.\n                  </>\n                )}\n              </Text>\n            </div>\n          </div>\n\n          <div className=\"flex justify-center pt-2\">\n            <button\n              type=\"button\"\n              onClick={onClose}\n              className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors\"\n            >\n              <Text mainUiAction>Got it</Text>\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/UserMessage.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface UserMessageProps {\n  content: string;\n}\n\nexport default function UserMessage({ content }: UserMessageProps) {\n  return (\n    <div className=\"flex justify-end py-4\">\n      <div className=\"max-w-[80%] whitespace-break-spaces rounded-t-16 rounded-bl-16 bg-background-tint-02 py-3 px-4\">\n        <Text as=\"p\" mainContentBody>\n          {content}\n        </Text>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/WorkingLine.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport {\n  SvgChevronDown,\n  SvgTerminalSmall,\n  SvgFileText,\n  SvgEdit,\n  SvgLoader,\n  SvgCheckSquare,\n  SvgAlertCircle,\n  SvgSearch,\n} from \"@opal/icons\";\nimport RawOutputBlock from \"@/app/craft/components/RawOutputBlock\";\nimport DiffView from \"@/app/craft/components/DiffView\";\nimport { ToolCallState, ToolCallKind } from \"@/app/craft/types/displayTypes\";\n\ninterface WorkingLineProps {\n  toolCall: ToolCallState;\n}\n\n/**\n * Get icon based on tool kind\n */\nfunction getToolIcon(kind: ToolCallKind) {\n  switch (kind) {\n    case \"search\":\n      return SvgSearch;\n    case \"execute\":\n      return SvgTerminalSmall;\n    case \"read\":\n      return SvgFileText;\n    case \"edit\":\n      return SvgEdit;\n    case \"other\":\n      return SvgEdit;\n    default:\n      return SvgTerminalSmall;\n  }\n}\n\n/**\n * Get status icon and styling\n */\nfunction getStatusDisplay(status: string) {\n  switch (status) {\n    case \"pending\":\n    case \"in_progress\":\n      return {\n        icon: SvgLoader,\n        iconClass: \"stroke-status-info-05 animate-spin\",\n      };\n    case \"completed\":\n      return {\n        icon: SvgCheckSquare,\n        iconClass: \"stroke-status-success-05\",\n      };\n    case \"failed\":\n      return {\n        icon: SvgAlertCircle,\n        iconClass: \"stroke-status-error-05\",\n      };\n    default:\n      return {\n        icon: null,\n        iconClass: \"stroke-text-03\",\n      };\n  }\n}\n\n/**\n * Get language hint for syntax highlighting\n */\nfunction getLanguageHint(toolCall: ToolCallState): string | undefined {\n  switch (toolCall.kind) {\n    case \"search\":\n      // Search results - no highlighting for file lists\n      return undefined;\n    case \"execute\":\n      return \"bash\";\n    case \"read\":\n    case \"edit\":\n    case \"other\":\n      // Use description (file path) for syntax detection\n      return toolCall.description;\n    default:\n      return undefined;\n  }\n}\n\n/**\n * WorkingLine - A single expandable line within the Working pill.\n *\n * Shows: [status icon] [action text] [expand arrow]\n * Expands to show detailed content (diff view or raw output)\n */\nexport default function WorkingLine({ toolCall }: WorkingLineProps) {\n  const [isOpen, setIsOpen] = useState(false);\n\n  const statusDisplay = getStatusDisplay(toolCall.status);\n  const StatusIcon = statusDisplay.icon;\n  const ToolIcon = getToolIcon(toolCall.kind);\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n      <div className=\"rounded-md overflow-hidden\">\n        <CollapsibleTrigger asChild>\n          <button\n            className={cn(\n              \"w-full flex gap-2 py-1.5 pl-2 pr-4 rounded-md\",\n              \"hover:bg-background-tint-02 transition-colors text-left\",\n              \"items-start\"\n            )}\n          >\n            {/* Status indicator */}\n            {StatusIcon ? (\n              <StatusIcon\n                className={cn(\n                  \"size-3.5 shrink-0 mt-0.5\",\n                  statusDisplay.iconClass\n                )}\n              />\n            ) : (\n              <ToolIcon\n                className={cn(\"size-3.5 stroke-text-03 shrink-0 mt-0.5\")}\n              />\n            )}\n\n            {/* Action text */}\n            <span className=\"text-sm flex-1 min-w-0 text-left\">\n              <span\n                className={cn(\n                  \"block\",\n                  isOpen\n                    ? toolCall.kind === \"execute\"\n                      ? \"break-words whitespace-pre-wrap\"\n                      : \"break-words whitespace-normal\"\n                    : \"truncate\"\n                )}\n              >\n                {toolCall.kind === \"execute\" && toolCall.description ? (\n                  <>\n                    {/* For execute: show description as primary, command as secondary */}\n                    <span className=\"text-text-04\">\n                      {toolCall.description.charAt(0).toUpperCase() +\n                        toolCall.description.slice(1)}\n                    </span>\n                    {toolCall.command && (\n                      <span className=\"text-text-02\"> {toolCall.command}</span>\n                    )}\n                  </>\n                ) : (\n                  <span className=\"text-text-04\">\n                    {toolCall.title}\n                    {toolCall.description &&\n                      toolCall.description !== toolCall.title && (\n                        <>\n                          {\" \"}\n                          <span className=\"text-text-02\">\n                            {toolCall.description}\n                          </span>\n                        </>\n                      )}\n                  </span>\n                )}\n              </span>\n            </span>\n\n            {/* Expand arrow */}\n            <SvgChevronDown\n              className={cn(\n                \"size-3.5 stroke-text-03 transition-transform duration-150 shrink-0 mt-0.5\",\n                !isOpen && \"rotate-[-90deg]\"\n              )}\n            />\n          </button>\n        </CollapsibleTrigger>\n\n        <CollapsibleContent>\n          <div className=\"pl-6 pr-2 pb-2\">\n            {/* Show diff view for edit operations with actual diff data */}\n            {toolCall.kind === \"edit\" &&\n            !toolCall.isNewFile &&\n            toolCall.oldContent &&\n            toolCall.newContent ? (\n              <DiffView\n                oldContent={toolCall.oldContent}\n                newContent={toolCall.newContent}\n                maxHeight=\"200px\"\n                filePath={toolCall.description}\n              />\n            ) : (\n              <RawOutputBlock\n                content={toolCall.rawOutput}\n                maxHeight=\"200px\"\n                language={getLanguageHint(toolCall)}\n              />\n            )}\n          </div>\n        </CollapsibleContent>\n      </div>\n    </Collapsible>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/WorkingPill.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport { SvgChevronDown, SvgPencilRuler } from \"@opal/icons\";\nimport { ToolCallState } from \"@/app/craft/types/displayTypes\";\nimport WorkingLine from \"@/app/craft/components/WorkingLine\";\n\ninterface WorkingPillProps {\n  toolCalls: ToolCallState[];\n  /** Whether this is the latest/active working group - auto-collapses when false */\n  isLatest?: boolean;\n}\n\n/**\n * WorkingPill - Consolidates multiple tool calls into a single expandable container.\n *\n * Features:\n * - Auto-expanded by default when isLatest\n * - Auto-collapses when a newer Working pill appears (isLatest becomes false)\n * - Each action renders as an expandable WorkingLine\n */\nexport default function WorkingPill({\n  toolCalls,\n  isLatest = true,\n}: WorkingPillProps) {\n  const [isOpen, setIsOpen] = useState(true); // Auto-expanded by default\n\n  // Auto-collapse when this is no longer the latest working group\n  useEffect(() => {\n    if (!isLatest) {\n      setIsOpen(false);\n    }\n  }, [isLatest]);\n\n  // Check if any tool is in progress (for background color)\n  const hasInProgress = toolCalls.some(\n    (tc) => tc.status === \"pending\" || tc.status === \"in_progress\"\n  );\n\n  return (\n    <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n      <div\n        className={cn(\n          \"w-full border-[0.5px] rounded-lg overflow-hidden transition-colors\",\n          hasInProgress\n            ? \"bg-status-info-01 border-status-info-01\"\n            : \"bg-background-neutral-01 border-border-01\"\n        )}\n      >\n        <CollapsibleTrigger asChild>\n          <button\n            className={cn(\n              \"w-full flex items-center justify-between gap-2 px-3 py-2\",\n              \"transition-colors text-left rounded-t-lg\",\n              \"hover:bg-background-tint-02\"\n            )}\n          >\n            <div className=\"flex items-center gap-2 min-w-0 flex-1\">\n              {/* Static icon */}\n              <SvgPencilRuler className=\"size-4 stroke-text-03 shrink-0\" />\n\n              {/* Title */}\n              <span className=\"text-sm font-medium text-text-04\">Working</span>\n            </div>\n\n            {/* Expand arrow */}\n            <SvgChevronDown\n              className={cn(\n                \"size-4 stroke-text-03 transition-transform duration-150 shrink-0\",\n                !isOpen && \"rotate-[-90deg]\"\n              )}\n            />\n          </button>\n        </CollapsibleTrigger>\n\n        <CollapsibleContent>\n          <div className=\"pl-5 pr-3 pb-3 pt-0 space-y-1\">\n            {toolCalls.map((toolCall) => (\n              <WorkingLine key={toolCall.id} toolCall={toolCall} />\n            ))}\n          </div>\n        </CollapsibleContent>\n      </div>\n    </Collapsible>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/ArtifactsTab.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useState } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgGlobe,\n  SvgDownloadCloud,\n  SvgFolder,\n  SvgFiles,\n  SvgChevronDown,\n  SvgChevronRight,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Artifact } from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { useFilesNeedsRefresh } from \"@/app/craft/hooks/useBuildSessionStore\";\nimport {\n  fetchDirectoryListing,\n  downloadArtifactFile,\n  downloadDirectory,\n} from \"@/app/craft/services/apiServices\";\nimport { FileSystemEntry } from \"@/app/craft/types/streamingTypes\";\nimport { getFileIcon } from \"@/lib/utils\";\nimport { cn } from \"@/lib/utils\";\n\ninterface ArtifactsTabProps {\n  artifacts: Artifact[];\n  sessionId: string | null;\n}\n\nexport default function ArtifactsTab({\n  artifacts,\n  sessionId,\n}: ArtifactsTabProps) {\n  const webappArtifacts = artifacts.filter(\n    (a) => a.type === \"nextjs_app\" || a.type === \"web_app\"\n  );\n\n  const filesNeedsRefresh = useFilesNeedsRefresh();\n  const { data: outputsListing } = useSWR(\n    sessionId\n      ? [SWR_KEYS.buildSessionOutputFiles(sessionId), filesNeedsRefresh]\n      : null,\n    () => (sessionId ? fetchDirectoryListing(sessionId, \"outputs\") : null),\n    {\n      revalidateOnFocus: false,\n      dedupingInterval: 2000,\n    }\n  );\n\n  // Filter out \"web\" directory (shown as webapp artifact)\n  const rawEntries = (outputsListing?.entries ?? []).filter(\n    (entry) => entry.name !== \"web\"\n  );\n\n  // Filter out empty directories\n  const [outputEntries, setOutputEntries] = useState<FileSystemEntry[]>([]);\n\n  useEffect(() => {\n    if (!sessionId || rawEntries.length === 0) {\n      setOutputEntries([]);\n      return;\n    }\n\n    let cancelled = false;\n\n    async function filterEmptyDirs() {\n      const results = await Promise.all(\n        rawEntries.map(async (entry) => {\n          if (!entry.is_directory) return entry;\n          try {\n            const listing = await fetchDirectoryListing(sessionId!, entry.path);\n            if (listing && listing.entries.length > 0) return entry;\n          } catch {\n            return entry;\n          }\n          return null;\n        })\n      );\n      if (!cancelled) {\n        setOutputEntries(\n          results.filter((e): e is FileSystemEntry => e !== null)\n        );\n      }\n    }\n\n    filterEmptyDirs();\n    return () => {\n      cancelled = true;\n    };\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [sessionId, JSON.stringify(rawEntries.map((e) => e.path))]);\n\n  const handleWebappDownload = () => {\n    if (!sessionId) return;\n    const link = document.createElement(\"a\");\n    link.href = `/api/build/sessions/${sessionId}/webapp-download`;\n    link.download = \"\";\n    document.body.appendChild(link);\n    link.click();\n    document.body.removeChild(link);\n  };\n\n  const handleOutputDownload = useCallback(\n    (path: string, isDirectory: boolean) => {\n      if (!sessionId) return;\n      if (isDirectory) {\n        downloadDirectory(sessionId, path);\n      } else {\n        downloadArtifactFile(sessionId, path);\n      }\n    },\n    [sessionId]\n  );\n\n  const hasWebapps = webappArtifacts.length > 0;\n  const hasOutputFiles = outputEntries.length > 0;\n\n  if (!sessionId || (!hasWebapps && !hasOutputFiles)) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgFiles size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          No artifacts yet\n        </Text>\n        <Text secondaryBody text02>\n          Output files and web apps will appear here\n        </Text>\n      </Section>\n    );\n  }\n\n  return (\n    <div className=\"flex flex-col h-full\">\n      <div className=\"flex-1 overflow-auto overlay-scrollbar\">\n        <div className=\"divide-y divide-border-01\">\n          {/* Webapp Artifacts */}\n          {webappArtifacts.map((artifact) => (\n            <div\n              key={artifact.id}\n              className=\"flex items-center gap-3 p-3 hover:bg-background-tint-01 transition-colors\"\n            >\n              <SvgGlobe size={24} className=\"stroke-text-02 flex-shrink-0\" />\n\n              <div className=\"flex-1 min-w-0 flex items-center gap-2\">\n                <Text secondaryBody text04 className=\"truncate\">\n                  {artifact.name}\n                </Text>\n                <Text secondaryBody text02>\n                  Next.js Application\n                </Text>\n              </div>\n\n              <div className=\"flex items-center gap-2\">\n                <Button\n                  variant=\"action\"\n                  prominence=\"tertiary\"\n                  icon={SvgDownloadCloud}\n                  onClick={handleWebappDownload}\n                >\n                  Download\n                </Button>\n              </div>\n            </div>\n          ))}\n\n          {/* Output Files & Folders */}\n          {outputEntries.map((entry) => (\n            <OutputEntryRow\n              key={entry.path}\n              entry={entry}\n              sessionId={sessionId!}\n              depth={0}\n              onDownload={handleOutputDownload}\n            />\n          ))}\n        </div>\n      </div>\n    </div>\n  );\n}\n\ninterface OutputEntryRowProps {\n  entry: FileSystemEntry;\n  sessionId: string;\n  depth: number;\n  onDownload: (path: string, isDirectory: boolean) => void;\n}\n\nfunction OutputEntryRow({\n  entry,\n  sessionId,\n  depth,\n  onDownload,\n}: OutputEntryRowProps) {\n  const [expanded, setExpanded] = useState(false);\n  const [children, setChildren] = useState<FileSystemEntry[]>([]);\n  const [loaded, setLoaded] = useState(false);\n\n  const toggleExpand = useCallback(async () => {\n    if (!entry.is_directory) return;\n\n    if (!loaded) {\n      const listing = await fetchDirectoryListing(sessionId, entry.path);\n      if (listing) {\n        setChildren(listing.entries);\n      }\n      setLoaded(true);\n    }\n    setExpanded((prev) => !prev);\n  }, [entry.is_directory, entry.path, sessionId, loaded]);\n\n  const FileIcon = entry.is_directory ? SvgFolder : getFileIcon(entry.name);\n  const paddingLeft = depth * 20;\n\n  return (\n    <>\n      <div\n        className={cn(\n          \"flex items-center gap-3 p-3 hover:bg-background-tint-01 transition-colors\",\n          entry.is_directory && \"cursor-pointer\"\n        )}\n        style={{ paddingLeft: 12 + paddingLeft }}\n        onClick={entry.is_directory ? toggleExpand : undefined}\n      >\n        {entry.is_directory ? (\n          expanded ? (\n            <SvgChevronDown\n              size={16}\n              className=\"stroke-text-03 flex-shrink-0\"\n            />\n          ) : (\n            <SvgChevronRight\n              size={16}\n              className=\"stroke-text-03 flex-shrink-0\"\n            />\n          )\n        ) : (\n          <div className=\"w-4 flex-shrink-0\" />\n        )}\n\n        <FileIcon size={20} className=\"stroke-text-02 flex-shrink-0\" />\n\n        <div className=\"flex-1 min-w-0 flex items-center gap-2\">\n          <Text secondaryBody text04 className=\"truncate\">\n            {entry.name}\n          </Text>\n          {!entry.is_directory && entry.size !== null ? (\n            <Text secondaryBody text02>\n              {formatFileSize(entry.size)}\n            </Text>\n          ) : null}\n        </div>\n\n        <div className=\"flex items-center gap-2\">\n          <Button\n            variant=\"action\"\n            prominence=\"tertiary\"\n            icon={SvgDownloadCloud}\n            onClick={(e) => {\n              e.stopPropagation();\n              onDownload(entry.path, entry.is_directory);\n            }}\n          >\n            Download\n          </Button>\n        </div>\n      </div>\n\n      {expanded &&\n        children.map((child) => (\n          <OutputEntryRow\n            key={child.path}\n            entry={child}\n            sessionId={sessionId}\n            depth={depth + 1}\n            onDownload={onDownload}\n          />\n        ))}\n    </>\n  );\n}\n\nfunction formatFileSize(bytes: number): string {\n  if (bytes < 1024) return `${bytes} B`;\n  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;\n  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/FilePreviewContent.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { fetchFileContent } from \"@/app/craft/services/apiServices\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgFileText } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport ImagePreview from \"@/app/craft/components/output-panel/ImagePreview\";\nimport MarkdownFilePreview, {\n  type FileRendererProps,\n} from \"@/app/craft/components/output-panel/MarkdownFilePreview\";\nimport PptxPreview from \"@/app/craft/components/output-panel/PptxPreview\";\nimport PdfPreview from \"@/app/craft/components/output-panel/PdfPreview\";\n\n// ── Preview registry ─────────────────────────────────────────────────────\n// Unified registry for all file preview types. First match wins.\n//\n// \"standalone\" — binary formats that handle their own data fetching.\n// \"content\"    — text-based formats that receive already-fetched content.\n\ninterface StandaloneEntry {\n  type: \"standalone\";\n  matches: (filePath: string) => boolean;\n  component: React.FC<{\n    sessionId: string;\n    filePath: string;\n    refreshKey?: number;\n  }>;\n}\n\ninterface ContentEntry {\n  type: \"content\";\n  matches: (filePath: string, mimeType: string, isImage: boolean) => boolean;\n  component: React.FC<FileRendererProps>;\n}\n\ntype PreviewEntry = StandaloneEntry | ContentEntry;\n\nfunction ImageRendererWrapper({ content, fileName }: FileRendererProps) {\n  return <ImagePreview src={content} fileName={fileName} />;\n}\n\nconst PREVIEW_REGISTRY: PreviewEntry[] = [\n  {\n    type: \"standalone\",\n    matches: (path) => /\\.pptx$/i.test(path),\n    component: PptxPreview,\n  },\n  {\n    type: \"standalone\",\n    matches: (path) => /\\.pdf$/i.test(path),\n    component: PdfPreview,\n  },\n  {\n    type: \"content\",\n    matches: (_, __, isImage) => isImage,\n    component: ImageRendererWrapper,\n  },\n  {\n    type: \"content\",\n    matches: (path) => /\\.md$/i.test(path),\n    component: MarkdownFilePreview,\n  },\n];\n\nfunction findStandalonePreview(filePath: string): StandaloneEntry | undefined {\n  return PREVIEW_REGISTRY.find(\n    (e): e is StandaloneEntry => e.type === \"standalone\" && e.matches(filePath)\n  );\n}\n\nfunction findContentPreview(\n  filePath: string,\n  mimeType: string,\n  isImage: boolean\n): ContentEntry | undefined {\n  return PREVIEW_REGISTRY.find(\n    (e): e is ContentEntry =>\n      e.type === \"content\" && e.matches(filePath, mimeType, isImage)\n  );\n}\n\n// ── Public components ────────────────────────────────────────────────────\n\ninterface FilePreviewContentProps {\n  sessionId: string;\n  filePath: string;\n  /** Changing this value forces the preview to reload its data */\n  refreshKey?: number;\n}\n\n/**\n * FilePreviewContent — full-height file preview for the main output panel.\n * Routes to the appropriate preview component based on file type.\n */\nexport function FilePreviewContent({\n  sessionId,\n  filePath,\n  refreshKey,\n}: FilePreviewContentProps) {\n  const standalone = findStandalonePreview(filePath);\n  if (standalone) {\n    const Comp = standalone.component;\n    return (\n      <Comp sessionId={sessionId} filePath={filePath} refreshKey={refreshKey} />\n    );\n  }\n\n  return (\n    <FetchedFilePreview\n      sessionId={sessionId}\n      filePath={filePath}\n      fullHeight\n      refreshKey={refreshKey}\n    />\n  );\n}\n\n/**\n * InlineFilePreview — compact file preview for pre-provisioned mode.\n * Same routing logic, without full-height layout.\n */\nexport function InlineFilePreview({\n  sessionId,\n  filePath,\n}: FilePreviewContentProps) {\n  const standalone = findStandalonePreview(filePath);\n  if (standalone) {\n    const Comp = standalone.component;\n    return <Comp sessionId={sessionId} filePath={filePath} />;\n  }\n\n  return <FetchedFilePreview sessionId={sessionId} filePath={filePath} />;\n}\n\n// ── FetchedFilePreview (inner) ───────────────────────────────────────────\n\ninterface FetchedFilePreviewProps {\n  sessionId: string;\n  filePath: string;\n  fullHeight?: boolean;\n  refreshKey?: number;\n}\n\n/**\n * Fetches file content via SWR, then delegates to the first matching\n * \"content\" entry in the registry (or falls back to raw monospace text).\n */\nfunction FetchedFilePreview({\n  sessionId,\n  filePath,\n  fullHeight,\n  refreshKey,\n}: FetchedFilePreviewProps) {\n  const { data, error, isLoading, mutate } = useSWR(\n    SWR_KEYS.buildSessionArtifactFile(sessionId, filePath),\n    () => fetchFileContent(sessionId, filePath),\n    {\n      revalidateOnFocus: false,\n      dedupingInterval: 5000,\n    }\n  );\n\n  // Re-fetch when refreshKey changes\n  useEffect(() => {\n    if (refreshKey && refreshKey > 0) {\n      mutate();\n    }\n  }, [refreshKey, mutate]);\n\n  if (isLoading) {\n    if (fullHeight) {\n      return (\n        <Section\n          height=\"full\"\n          alignItems=\"center\"\n          justifyContent=\"center\"\n          padding={2}\n        >\n          <Text secondaryBody text03>\n            Loading file...\n          </Text>\n        </Section>\n      );\n    }\n    return (\n      <div className=\"p-4\">\n        <Text secondaryBody text03>\n          Loading file...\n        </Text>\n      </div>\n    );\n  }\n\n  if (error) {\n    if (fullHeight) {\n      return (\n        <Section\n          height=\"full\"\n          alignItems=\"center\"\n          justifyContent=\"center\"\n          padding={2}\n        >\n          <SvgFileText size={48} className=\"stroke-text-02\" />\n          <Text headingH3 text03>\n            Error loading file\n          </Text>\n          <Text secondaryBody text02>\n            {error.message}\n          </Text>\n        </Section>\n      );\n    }\n    return (\n      <div className=\"p-4\">\n        <Text secondaryBody text02>\n          Error: {error.message}\n        </Text>\n      </div>\n    );\n  }\n\n  if (!data) {\n    if (fullHeight) {\n      return (\n        <Section\n          height=\"full\"\n          alignItems=\"center\"\n          justifyContent=\"center\"\n          padding={2}\n        >\n          <Text secondaryBody text03>\n            No content\n          </Text>\n        </Section>\n      );\n    }\n    return (\n      <div className=\"p-4\">\n        <Text secondaryBody text03>\n          No content\n        </Text>\n      </div>\n    );\n  }\n\n  if (data.error) {\n    if (fullHeight) {\n      return (\n        <Section\n          height=\"full\"\n          alignItems=\"center\"\n          justifyContent=\"center\"\n          padding={2}\n        >\n          <SvgFileText size={48} className=\"stroke-text-02\" />\n          <Text headingH3 text03>\n            Cannot preview file\n          </Text>\n          <Text secondaryBody text02 className=\"text-center max-w-md\">\n            {data.error}\n          </Text>\n        </Section>\n      );\n    }\n    return (\n      <div className=\"p-4\">\n        <Text secondaryBody text02 className=\"text-center\">\n          {data.error}\n        </Text>\n      </div>\n    );\n  }\n\n  // Match against content-based renderers\n  const fileName = filePath.split(\"/\").pop() || filePath;\n  const mimeType = data.mimeType ?? \"text/plain\";\n  const isImage = !!data.isImage;\n\n  const contentPreview = findContentPreview(filePath, mimeType, isImage);\n  if (contentPreview) {\n    const Comp = contentPreview.component;\n    return (\n      <Comp\n        content={data.content}\n        fileName={fileName}\n        filePath={filePath}\n        mimeType={mimeType}\n        isImage={isImage}\n      />\n    );\n  }\n\n  // Default fallback: raw text\n  if (fullHeight) {\n    return (\n      <div className=\"h-full flex flex-col\">\n        <div className=\"flex-1 overflow-auto p-4\">\n          <pre className=\"font-mono text-sm text-text-04 whitespace-pre-wrap break-words\">\n            {data.content}\n          </pre>\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"p-4\">\n      <pre className=\"font-mono text-sm text-text-04 whitespace-pre-wrap break-words\">\n        {data.content}\n      </pre>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/FilesTab.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useMemo, useRef, useCallback } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  useBuildSessionStore,\n  useFilesTabState,\n  useFilesNeedsRefresh,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { fetchDirectoryListing } from \"@/app/craft/services/apiServices\";\nimport { FileSystemEntry } from \"@/app/craft/types/streamingTypes\";\nimport { cn, getFileIcon } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  SvgHardDrive,\n  SvgFolder,\n  SvgFolderOpen,\n  SvgChevronRight,\n  SvgArrowLeft,\n  SvgImage,\n  SvgFileText,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { InlineFilePreview } from \"@/app/craft/components/output-panel/FilePreviewContent\";\n\ninterface FilesTabProps {\n  sessionId: string | null;\n  onFileClick?: (path: string, fileName: string) => void;\n  /** True when showing pre-provisioned sandbox (read-only, no file clicks) */\n  isPreProvisioned?: boolean;\n  /** True when sandbox is still being provisioned */\n  isProvisioning?: boolean;\n}\n\nexport default function FilesTab({\n  sessionId,\n  onFileClick,\n  isPreProvisioned = false,\n  isProvisioning = false,\n}: FilesTabProps) {\n  // Get persisted state from store (only used when not pre-provisioned)\n  const filesTabState = useFilesTabState();\n  const updateFilesTabState = useBuildSessionStore(\n    (state) => state.updateFilesTabState\n  );\n\n  // Local state for pre-provisioned mode (no persistence needed)\n  const [localExpandedPaths, setLocalExpandedPaths] = useState<Set<string>>(\n    new Set()\n  );\n  const [localDirectoryCache, setLocalDirectoryCache] = useState<\n    Map<string, FileSystemEntry[]>\n  >(new Map());\n  const [previewingFile, setPreviewingFile] = useState<{\n    path: string;\n    fileName: string;\n    mimeType: string | null;\n  } | null>(null);\n\n  // Use local state for pre-provisioned, store state otherwise\n  const expandedPaths = useMemo(\n    () =>\n      isPreProvisioned\n        ? localExpandedPaths\n        : new Set(filesTabState.expandedPaths),\n    [isPreProvisioned, localExpandedPaths, filesTabState.expandedPaths]\n  );\n\n  const directoryCache = useMemo(\n    () =>\n      isPreProvisioned\n        ? localDirectoryCache\n        : (new Map(Object.entries(filesTabState.directoryCache)) as Map<\n            string,\n            FileSystemEntry[]\n          >),\n    [isPreProvisioned, localDirectoryCache, filesTabState.directoryCache]\n  );\n\n  // Scroll container ref for position tracking\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n\n  // Fetch root directory\n  const {\n    data: rootListing,\n    error,\n    mutate,\n  } = useSWR(\n    sessionId ? SWR_KEYS.buildSessionFiles(sessionId) : null,\n    () => (sessionId ? fetchDirectoryListing(sessionId, \"\") : null),\n    {\n      revalidateOnFocus: false,\n      dedupingInterval: 2000,\n    }\n  );\n\n  // Refresh files list when outputs/ directory changes\n  const filesNeedsRefresh = useFilesNeedsRefresh();\n\n  // Snapshot of currently expanded paths — avoids putting both local and store\n  // versions in the dependency array (only one is used per mode).\n  const currentExpandedPaths = isPreProvisioned\n    ? Array.from(localExpandedPaths)\n    : filesTabState.expandedPaths;\n\n  useEffect(() => {\n    if (filesNeedsRefresh > 0 && sessionId && mutate) {\n      // Clear directory cache to ensure all directories are refreshed\n      if (isPreProvisioned) {\n        setLocalDirectoryCache(new Map());\n      } else {\n        updateFilesTabState(sessionId, { directoryCache: {} });\n      }\n      // Refresh root directory listing\n      mutate();\n\n      // Re-fetch all currently expanded subdirectories so they don't get\n      // stuck on \"Loading...\" after the cache was cleared\n      if (currentExpandedPaths.length > 0) {\n        Promise.allSettled(\n          currentExpandedPaths.map((p) => fetchDirectoryListing(sessionId, p))\n        ).then((settled) => {\n          // Collect only the successful fetches into a path → entries map\n          const fetched = new Map<string, FileSystemEntry[]>();\n          settled.forEach((r, i) => {\n            const p = currentExpandedPaths[i];\n            if (p && r.status === \"fulfilled\" && r.value) {\n              fetched.set(p, r.value.entries);\n            }\n          });\n\n          if (isPreProvisioned) {\n            setLocalDirectoryCache((prev) => {\n              const next = new Map(prev);\n              fetched.forEach((entries, p) => next.set(p, entries));\n              return next;\n            });\n          } else {\n            const obj: Record<string, FileSystemEntry[]> = {};\n            fetched.forEach((entries, p) => {\n              obj[p] = entries;\n            });\n            updateFilesTabState(sessionId, { directoryCache: obj });\n          }\n        });\n      }\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [\n    filesNeedsRefresh,\n    sessionId,\n    mutate,\n    isPreProvisioned,\n    updateFilesTabState,\n  ]);\n\n  // Update cache when root listing changes\n  useEffect(() => {\n    if (rootListing && sessionId) {\n      if (isPreProvisioned) {\n        setLocalDirectoryCache((prev) => {\n          const newCache = new Map(prev);\n          newCache.set(\"\", rootListing.entries);\n          return newCache;\n        });\n      } else {\n        const newCache = {\n          ...filesTabState.directoryCache,\n          \"\": rootListing.entries,\n        };\n        updateFilesTabState(sessionId, { directoryCache: newCache });\n      }\n    }\n  }, [rootListing, sessionId, isPreProvisioned]);\n\n  const toggleFolder = useCallback(\n    async (path: string) => {\n      if (!sessionId) return;\n\n      if (isPreProvisioned) {\n        // Use local state for pre-provisioned mode\n        const newExpanded = new Set(localExpandedPaths);\n        if (newExpanded.has(path)) {\n          newExpanded.delete(path);\n          setLocalExpandedPaths(newExpanded);\n        } else {\n          newExpanded.add(path);\n          if (!localDirectoryCache.has(path)) {\n            const listing = await fetchDirectoryListing(sessionId, path);\n            if (listing) {\n              setLocalDirectoryCache((prev) => {\n                const newCache = new Map(prev);\n                newCache.set(path, listing.entries);\n                return newCache;\n              });\n            }\n          }\n          setLocalExpandedPaths(newExpanded);\n        }\n      } else {\n        // Use store state for active sessions\n        const newExpanded = new Set(expandedPaths);\n        if (newExpanded.has(path)) {\n          newExpanded.delete(path);\n          updateFilesTabState(sessionId, {\n            expandedPaths: Array.from(newExpanded),\n          });\n        } else {\n          newExpanded.add(path);\n          if (!directoryCache.has(path)) {\n            const listing = await fetchDirectoryListing(sessionId, path);\n            if (listing) {\n              const newCache = {\n                ...filesTabState.directoryCache,\n                [path]: listing.entries,\n              };\n              updateFilesTabState(sessionId, {\n                expandedPaths: Array.from(newExpanded),\n                directoryCache: newCache,\n              });\n              return;\n            }\n          }\n          updateFilesTabState(sessionId, {\n            expandedPaths: Array.from(newExpanded),\n          });\n        }\n      }\n    },\n    [\n      sessionId,\n      isPreProvisioned,\n      localExpandedPaths,\n      localDirectoryCache,\n      expandedPaths,\n      directoryCache,\n      filesTabState.directoryCache,\n      updateFilesTabState,\n    ]\n  );\n\n  // Handle file click for pre-provisioned mode (inline preview)\n  const handleLocalFileClick = useCallback(\n    (path: string, fileName: string, mimeType: string | null) => {\n      if (isPreProvisioned) {\n        setPreviewingFile({ path, fileName, mimeType });\n      } else if (onFileClick) {\n        onFileClick(path, fileName);\n      }\n    },\n    [isPreProvisioned, onFileClick]\n  );\n\n  // Restore scroll position when component mounts or tab becomes active\n  useEffect(() => {\n    if (\n      scrollContainerRef.current &&\n      filesTabState.scrollTop > 0 &&\n      !isPreProvisioned\n    ) {\n      scrollContainerRef.current.scrollTop = filesTabState.scrollTop;\n    }\n  }, []); // Only on mount\n\n  // Save scroll position on scroll (debounced via passive listener)\n  const handleScroll = useCallback(() => {\n    if (scrollContainerRef.current && sessionId && !isPreProvisioned) {\n      const scrollTop = scrollContainerRef.current.scrollTop;\n      updateFilesTabState(sessionId, { scrollTop });\n    }\n  }, [sessionId, isPreProvisioned, updateFilesTabState]);\n\n  const formatFileSize = (bytes: number | null): string => {\n    if (bytes === null) return \"\";\n    if (bytes < 1024) return `${bytes} B`;\n    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;\n    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;\n  };\n\n  if (!sessionId) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgHardDrive size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          {isProvisioning ? \"Preparing sandbox...\" : \"No files yet\"}\n        </Text>\n        <Text secondaryBody text02>\n          {isProvisioning\n            ? \"Setting up your development environment\"\n            : \"Files created during the build will appear here\"}\n        </Text>\n      </Section>\n    );\n  }\n\n  if (error) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgHardDrive size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          Error loading files\n        </Text>\n        <Text secondaryBody text02>\n          {error.message}\n        </Text>\n      </Section>\n    );\n  }\n\n  if (!rootListing) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <Text secondaryBody text03>\n          Loading files...\n        </Text>\n      </Section>\n    );\n  }\n\n  // Show inline file preview for pre-provisioned mode\n  if (isPreProvisioned && previewingFile && sessionId) {\n    const isImage = previewingFile.mimeType?.startsWith(\"image/\");\n\n    return (\n      <div className=\"flex flex-col h-full\">\n        {/* Header with back button */}\n        <div className=\"flex items-center gap-2 px-3 py-2 border-b border-border-01\">\n          <button\n            onClick={() => setPreviewingFile(null)}\n            className=\"p-1 rounded hover:bg-background-tint-02 transition-colors\"\n          >\n            <SvgArrowLeft size={16} className=\"stroke-text-03\" />\n          </button>\n          {isImage ? (\n            <SvgImage size={16} className=\"stroke-text-03\" />\n          ) : (\n            <SvgFileText size={16} className=\"stroke-text-03\" />\n          )}\n          <Text secondaryBody text04 className=\"truncate\">\n            {previewingFile.fileName}\n          </Text>\n        </div>\n        {/* File content */}\n        <div className=\"flex-1 overflow-auto\">\n          <InlineFilePreview\n            sessionId={sessionId}\n            filePath={previewingFile.path}\n          />\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"flex flex-col h-full\">\n      <div\n        ref={scrollContainerRef}\n        onScroll={handleScroll}\n        className=\"flex-1 overflow-auto px-2 pb-2 relative\"\n      >\n        {/* Background to prevent content showing through sticky gap */}\n        <div className=\"sticky top-0 left-0 right-0 h-2 bg-background-neutral-00 -mx-2 z-[101]\" />\n        {rootListing.entries.length === 0 ? (\n          <Section\n            height=\"full\"\n            alignItems=\"center\"\n            justifyContent=\"center\"\n            padding={2}\n          >\n            <Text secondaryBody text03>\n              No files in this directory\n            </Text>\n          </Section>\n        ) : (\n          <div className=\"font-mono text-sm\">\n            <FileTreeNode\n              entries={rootListing.entries}\n              depth={0}\n              expandedPaths={expandedPaths}\n              directoryCache={directoryCache}\n              onToggleFolder={toggleFolder}\n              onFileClick={handleLocalFileClick}\n              formatFileSize={formatFileSize}\n            />\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n\n// ── FileTreeNode (internal) ──────────────────────────────────────────────\n\ninterface FileTreeNodeProps {\n  entries: FileSystemEntry[];\n  depth: number;\n  expandedPaths: Set<string>;\n  directoryCache: Map<string, FileSystemEntry[]>;\n  onToggleFolder: (path: string) => void;\n  onFileClick?: (\n    path: string,\n    fileName: string,\n    mimeType: string | null\n  ) => void;\n  formatFileSize: (bytes: number | null) => string;\n  parentIsLast?: boolean[];\n}\n\nfunction FileTreeNode({\n  entries,\n  depth,\n  expandedPaths,\n  directoryCache,\n  onToggleFolder,\n  onFileClick,\n  formatFileSize,\n  parentIsLast = [],\n}: FileTreeNodeProps) {\n  // Sort entries: directories first, then alphabetically\n  const sortedEntries = [...entries].sort((a, b) => {\n    if (a.is_directory && !b.is_directory) return -1;\n    if (!a.is_directory && b.is_directory) return 1;\n    return a.name.localeCompare(b.name);\n  });\n\n  return (\n    <>\n      {sortedEntries.map((entry, index) => {\n        const isExpanded = expandedPaths.has(entry.path);\n        const isLast = index === sortedEntries.length - 1;\n        const childEntries = directoryCache.get(entry.path) || [];\n        const FileIcon = getFileIcon(entry.name);\n\n        // Row height for sticky offset calculation\n        const rowHeight = 28;\n        // Account for the 8px (h-2) spacer at top of scroll container\n        const stickyTopOffset = 8;\n\n        return (\n          <div key={entry.path} className=\"relative\">\n            {/* Tree item row */}\n            <button\n              onClick={() => {\n                if (entry.is_directory) {\n                  onToggleFolder(entry.path);\n                } else if (onFileClick) {\n                  onFileClick(entry.path, entry.name, entry.mime_type);\n                }\n              }}\n              className={cn(\n                \"w-full flex items-center py-1.5 hover:bg-background-tint-02 rounded transition-colors relative\",\n                !entry.is_directory && onFileClick && \"cursor-pointer\",\n                !entry.is_directory && !onFileClick && \"cursor-default\",\n                // Make expanded folders sticky\n                entry.is_directory &&\n                  isExpanded &&\n                  \"sticky bg-background-neutral-00\"\n              )}\n              style={\n                entry.is_directory && isExpanded\n                  ? {\n                      top: stickyTopOffset + depth * rowHeight,\n                      zIndex: 100 - depth, // Higher z-index for parent folders\n                    }\n                  : undefined\n              }\n            >\n              {/* Tree lines for depth */}\n              {parentIsLast.map((isParentLast, i) => (\n                <span\n                  key={i}\n                  className=\"inline-flex w-5 justify-center flex-shrink-0 self-stretch relative\"\n                >\n                  {!isParentLast && (\n                    <span className=\"absolute left-1/2 -translate-x-1/2 -top-1.5 -bottom-1.5 w-px bg-border-02\" />\n                  )}\n                </span>\n              ))}\n\n              {/* Branch connector */}\n              {depth > 0 && (\n                <span className=\"inline-flex w-5 flex-shrink-0 self-stretch relative\">\n                  {/* Vertical line */}\n                  <span\n                    className={cn(\n                      \"absolute left-1/2 -translate-x-1/2 w-px bg-border-02\",\n                      isLast ? \"-top-1.5 bottom-1/2\" : \"-top-1.5 -bottom-1.5\"\n                    )}\n                  />\n                  {/* Horizontal line */}\n                  <span className=\"absolute top-1/2 left-1/2 w-2 h-px bg-border-02\" />\n                </span>\n              )}\n\n              {/* Expand/collapse chevron for directories */}\n              {entry.is_directory ? (\n                <span className=\"inline-flex w-4 h-4 items-center justify-center flex-shrink-0\">\n                  <SvgChevronRight\n                    size={12}\n                    className={cn(\n                      \"stroke-text-03 transition-transform duration-150\",\n                      isExpanded && \"rotate-90\"\n                    )}\n                  />\n                </span>\n              ) : (\n                <span className=\"w-4 flex-shrink-0\" />\n              )}\n\n              {/* Icon */}\n              {entry.is_directory ? (\n                isExpanded ? (\n                  <SvgFolderOpen\n                    size={16}\n                    className=\"stroke-text-03 flex-shrink-0 mx-1\"\n                  />\n                ) : (\n                  <SvgFolder\n                    size={16}\n                    className=\"stroke-text-03 flex-shrink-0 mx-1\"\n                  />\n                )\n              ) : (\n                <FileIcon\n                  size={16}\n                  className=\"stroke-text-03 flex-shrink-0 mx-1\"\n                />\n              )}\n\n              {/* Name */}\n              <Text\n                secondaryBody\n                text04\n                className=\"truncate flex-1 text-left ml-1\"\n              >\n                {entry.name}\n              </Text>\n\n              {/* File size */}\n              {!entry.is_directory && entry.size !== null && (\n                <Text text02 className=\"ml-2 mr-2 flex-shrink-0\">\n                  {formatFileSize(entry.size)}\n                </Text>\n              )}\n            </button>\n\n            {/* Render children if expanded */}\n            {entry.is_directory && isExpanded && childEntries.length > 0 && (\n              <FileTreeNode\n                entries={childEntries}\n                depth={depth + 1}\n                expandedPaths={expandedPaths}\n                directoryCache={directoryCache}\n                onToggleFolder={onToggleFolder}\n                onFileClick={onFileClick}\n                formatFileSize={formatFileSize}\n                parentIsLast={[...parentIsLast, isLast]}\n              />\n            )}\n\n            {/* Loading indicator for expanded but not-yet-loaded directories */}\n            {entry.is_directory &&\n              isExpanded &&\n              !directoryCache.has(entry.path) && (\n                <div\n                  className=\"flex items-center py-1\"\n                  style={{ paddingLeft: `${(depth + 1) * 20 + 24}px` }}\n                >\n                  <Text secondaryBody text02>\n                    Loading...\n                  </Text>\n                </div>\n              )}\n          </div>\n        );\n      })}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/ImagePreview.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgImage } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\n\ninterface ImagePreviewProps {\n  src: string;\n  fileName: string;\n}\n\n/**\n * ImagePreview - Displays images with loading and error states\n * Includes proper accessibility attributes\n */\nexport default function ImagePreview({ src, fileName }: ImagePreviewProps) {\n  const [imageLoading, setImageLoading] = useState(true);\n  const [imageError, setImageError] = useState(false);\n\n  // Extract just the filename from path for better alt text\n  const displayName = fileName.split(\"/\").pop() || fileName;\n\n  // Reset loading state when src changes\n  useEffect(() => {\n    setImageLoading(true);\n    setImageError(false);\n  }, [src]);\n\n  if (imageError) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgImage size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          Failed to load image\n        </Text>\n        <Text secondaryBody text02>\n          The image could not be displayed\n        </Text>\n      </Section>\n    );\n  }\n\n  return (\n    <div className=\"h-full flex flex-col overflow-hidden\">\n      <div className=\"flex-1 flex items-center justify-center p-4\">\n        {imageLoading && (\n          <div className=\"absolute\">\n            <Text secondaryBody text03>\n              Loading image...\n            </Text>\n          </div>\n        )}\n        <img\n          src={src}\n          alt={displayName}\n          role=\"img\"\n          aria-label={`Preview of ${displayName}`}\n          className={cn(\n            \"max-w-full max-h-full object-contain transition-opacity\",\n            imageLoading ? \"opacity-0\" : \"opacity-100\"\n          )}\n          onLoad={() => setImageLoading(false)}\n          onError={() => {\n            setImageLoading(false);\n            setImageError(true);\n          }}\n        />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/MarkdownFilePreview.tsx",
    "content": "\"use client\";\n\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\n\n/** Shared interface for the file renderer registry */\nexport interface FileRendererProps {\n  content: string;\n  fileName: string;\n  filePath: string;\n  mimeType: string;\n  isImage: boolean;\n}\n\nexport default function MarkdownFilePreview({ content }: FileRendererProps) {\n  return (\n    <div className=\"relative h-full\">\n      <div className=\"absolute inset-0 overflow-auto default-scrollbar p-6\">\n        <MinimalMarkdown\n          content={content}\n          className=\"max-w-3xl mx-auto\"\n          components={{\n            a: ({ href, children }: any) => (\n              <a\n                href={href}\n                target=\"_blank\"\n                rel=\"noopener noreferrer\"\n                className=\"text-link hover:text-link-hover underline\"\n              >\n                {children}\n              </a>\n            ),\n          }}\n        />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/PdfPreview.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgFileText } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { getArtifactUrl } from \"@/lib/build/client\";\n\ninterface PdfPreviewProps {\n  sessionId: string;\n  filePath: string;\n  refreshKey?: number;\n}\n\n/**\n * PdfPreview - Renders PDF files using the browser's built-in PDF viewer.\n * Fetches the PDF as a blob and creates an object URL so the iframe renders\n * it inline (the backend serves artifacts with Content-Disposition: attachment,\n * which would otherwise force a download).\n */\nexport default function PdfPreview({\n  sessionId,\n  filePath,\n  refreshKey,\n}: PdfPreviewProps) {\n  const [blobUrl, setBlobUrl] = useState<string | null>(null);\n  const [loading, setLoading] = useState(true);\n  const [error, setError] = useState(false);\n  const blobUrlRef = useRef<string | null>(null);\n\n  useEffect(() => {\n    const controller = new AbortController();\n\n    // Revoke the previous blob URL before starting a new fetch\n    if (blobUrlRef.current) {\n      URL.revokeObjectURL(blobUrlRef.current);\n      blobUrlRef.current = null;\n    }\n    setBlobUrl(null);\n    setLoading(true);\n    setError(false);\n\n    const encodedPath = filePath\n      .split(\"/\")\n      .map((segment) => encodeURIComponent(segment))\n      .join(\"/\");\n    const artifactUrl = getArtifactUrl(sessionId, encodedPath);\n\n    fetch(artifactUrl, { signal: controller.signal })\n      .then((res) => {\n        if (!res.ok) throw new Error(`Failed to fetch PDF: ${res.status}`);\n        return res.blob();\n      })\n      .then((blob) => {\n        const url = URL.createObjectURL(blob);\n        blobUrlRef.current = url;\n        setBlobUrl(url);\n        setLoading(false);\n      })\n      .catch((err) => {\n        if (err instanceof DOMException && err.name === \"AbortError\") return;\n        setError(true);\n        setLoading(false);\n      });\n\n    return () => {\n      controller.abort();\n      if (blobUrlRef.current) {\n        URL.revokeObjectURL(blobUrlRef.current);\n        blobUrlRef.current = null;\n      }\n    };\n  }, [sessionId, filePath, refreshKey]);\n\n  if (error) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgFileText size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          Cannot preview PDF\n        </Text>\n        <Text secondaryBody text02 className=\"text-center max-w-md\">\n          The PDF file could not be loaded.\n        </Text>\n      </Section>\n    );\n  }\n\n  if (loading || !blobUrl) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <Text secondaryBody text03>\n          Loading PDF...\n        </Text>\n      </Section>\n    );\n  }\n\n  return (\n    <iframe\n      src={blobUrl}\n      title={filePath.split(\"/\").pop() || \"PDF Preview\"}\n      className={cn(\"w-full h-full border-none\")}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/PptxPreview.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useCallback } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgChevronLeft, SvgChevronRight, SvgFileText } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { fetchPptxPreview } from \"@/app/craft/services/apiServices\";\nimport { getArtifactUrl } from \"@/lib/build/client\";\n\ninterface PptxPreviewProps {\n  sessionId: string;\n  filePath: string;\n  refreshKey?: number;\n}\n\n/**\n * PptxPreview - Displays PPTX files as navigable slide images.\n * Triggers on-demand conversion via the backend, then renders\n * individual slide JPEGs in a carousel with keyboard navigation.\n */\nexport default function PptxPreview({\n  sessionId,\n  filePath,\n  refreshKey,\n}: PptxPreviewProps) {\n  const [currentSlide, setCurrentSlide] = useState(0);\n  const [imageLoading, setImageLoading] = useState(true);\n\n  const { data, error, isLoading, mutate } = useSWR(\n    SWR_KEYS.buildSessionPptxPreview(sessionId, filePath),\n    () => fetchPptxPreview(sessionId, filePath),\n    {\n      revalidateOnFocus: false,\n      dedupingInterval: 10000,\n    }\n  );\n\n  const slideCount = data?.slide_count ?? 0;\n\n  const goToPrev = useCallback(() => {\n    setCurrentSlide((prev) => Math.max(0, prev - 1));\n  }, []);\n\n  const goToNext = useCallback(() => {\n    setCurrentSlide((prev) => Math.min(slideCount - 1, prev + 1));\n  }, [slideCount]);\n\n  // Reset slide index when file changes\n  useEffect(() => {\n    setCurrentSlide(0);\n  }, [filePath]);\n\n  // Reset image loading state when slide changes\n  useEffect(() => {\n    setImageLoading(true);\n  }, [currentSlide, data]);\n\n  // Re-fetch when refreshKey changes\n  useEffect(() => {\n    if (refreshKey && refreshKey > 0) {\n      mutate();\n    }\n  }, [refreshKey, mutate]);\n\n  // Keyboard navigation\n  useEffect(() => {\n    function handleKeyDown(e: KeyboardEvent) {\n      if (e.key === \"ArrowLeft\") {\n        goToPrev();\n      } else if (e.key === \"ArrowRight\") {\n        goToNext();\n      }\n    }\n    window.addEventListener(\"keydown\", handleKeyDown);\n    return () => window.removeEventListener(\"keydown\", handleKeyDown);\n  }, [goToPrev, goToNext]);\n\n  if (isLoading) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <Text secondaryBody text03>\n          Converting presentation...\n        </Text>\n      </Section>\n    );\n  }\n\n  if (error) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgFileText size={48} className=\"stroke-text-02\" />\n        <Text headingH3 text03>\n          Cannot preview presentation\n        </Text>\n        <Text secondaryBody text02 className=\"text-center max-w-md\">\n          {error.message}\n        </Text>\n      </Section>\n    );\n  }\n\n  if (!data || slideCount === 0) {\n    return (\n      <Section\n        height=\"full\"\n        alignItems=\"center\"\n        justifyContent=\"center\"\n        padding={2}\n      >\n        <SvgFileText size={48} className=\"stroke-text-02\" />\n        <Text secondaryBody text03>\n          No slides in this presentation\n        </Text>\n      </Section>\n    );\n  }\n\n  const slidePath = data.slide_paths[currentSlide] ?? \"\";\n  const slideUrl = getArtifactUrl(sessionId, slidePath);\n\n  return (\n    <div className=\"h-full flex flex-col overflow-hidden\">\n      {/* Slide image */}\n      <div className=\"flex-1 flex items-center justify-center p-4 overflow-hidden\">\n        {imageLoading && (\n          <div className=\"absolute\">\n            <Text secondaryBody text03>\n              Loading slide...\n            </Text>\n          </div>\n        )}\n        <img\n          src={slideUrl}\n          alt={`Slide ${currentSlide + 1} of ${slideCount}`}\n          className={cn(\n            \"max-w-full max-h-full object-contain transition-opacity\",\n            imageLoading ? \"opacity-0\" : \"opacity-100\"\n          )}\n          onLoad={() => setImageLoading(false)}\n          onError={() => setImageLoading(false)}\n        />\n      </div>\n\n      {/* Navigation bar */}\n      {slideCount > 1 && (\n        <div className=\"flex items-center justify-center gap-3 p-2 border-t border-border-02\">\n          <button\n            onClick={goToPrev}\n            disabled={currentSlide === 0}\n            className={cn(\n              \"p-1 rounded\",\n              currentSlide === 0\n                ? \"opacity-30 cursor-not-allowed\"\n                : \"hover:bg-background-neutral-03 cursor-pointer\"\n            )}\n          >\n            <SvgChevronLeft size={16} className=\"stroke-text-02\" />\n          </button>\n          <Text secondaryBody text03>\n            Slide {currentSlide + 1} of {slideCount}\n          </Text>\n          <button\n            onClick={goToNext}\n            disabled={currentSlide === slideCount - 1}\n            className={cn(\n              \"p-1 rounded\",\n              currentSlide === slideCount - 1\n                ? \"opacity-30 cursor-not-allowed\"\n                : \"hover:bg-background-neutral-03 cursor-pointer\"\n            )}\n          >\n            <SvgChevronRight size={16} className=\"stroke-text-02\" />\n          </button>\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/PreviewTab.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { cn } from \"@/lib/utils\";\n\ninterface PreviewTabProps {\n  webappUrl: string | null;\n  /** Changing this value forces the iframe to fully remount / reload */\n  refreshKey?: number;\n}\n\n/**\n * PreviewTab - Shows the webapp iframe preview\n *\n * States:\n * - No webapp URL yet: Shows blank dark background while SWR fetches\n * - Has webapp URL: Shows iframe with crossfade from blank background\n */\nexport default function PreviewTab({ webappUrl, refreshKey }: PreviewTabProps) {\n  const [iframeLoaded, setIframeLoaded] = useState(false);\n\n  // Reset loaded state when URL or refreshKey changes\n  useEffect(() => {\n    setIframeLoaded(false);\n  }, [webappUrl, refreshKey]);\n\n  // Base background shown while loading or when no webapp exists yet\n  return (\n    <div className=\"h-full flex flex-col\">\n      <div className=\"flex-1 p-3 relative\">\n        {/* Base dark background - always present, visible when no iframe or iframe loading */}\n        <div\n          className={cn(\n            \"absolute inset-0 rounded-b-08 bg-neutral-950\",\n            \"transition-opacity duration-300\",\n            iframeLoaded ? \"opacity-0 pointer-events-none\" : \"opacity-100\"\n          )}\n        />\n\n        {/* Iframe - fades in when loaded */}\n        {webappUrl && (\n          <iframe\n            key={refreshKey}\n            src={webappUrl}\n            onLoad={() => setIframeLoaded(true)}\n            className={cn(\n              \"absolute inset-0 w-full h-full rounded-b-08 bg-neutral-950\",\n              \"transition-opacity duration-300\",\n              iframeLoaded ? \"opacity-100\" : \"opacity-0\"\n            )}\n            sandbox=\"allow-scripts allow-same-origin allow-forms allow-popups allow-popups-to-escape-sandbox allow-top-navigation-by-user-activation\"\n            title=\"Web App Preview\"\n          />\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/components/output-panel/UrlBar.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgDownloadCloud,\n  SvgLoader,\n  SvgArrowLeft,\n  SvgArrowRight,\n  SvgExternalLink,\n  SvgRevert,\n} from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport ShareButton from \"@/app/craft/components/ShareButton\";\nimport type { SharingScope } from \"@/app/craft/types/streamingTypes\";\n\n/** SvgLoader wrapped with animate-spin so it can be passed as a Button leftIcon */\nconst SpinningLoader: React.FunctionComponent<IconProps> = (props) => (\n  <SvgLoader {...props} className={cn(props.className, \"animate-spin\")} />\n);\n\nexport interface UrlBarProps {\n  displayUrl: string;\n  showNavigation?: boolean;\n  canGoBack?: boolean;\n  canGoForward?: boolean;\n  onBack?: () => void;\n  onForward?: () => void;\n  previewUrl?: string | null;\n  /** Optional callback to download the raw file — shows a cloud-download icon inside the URL pill */\n  onDownloadRaw?: () => void;\n  /** Tooltip text for the raw download button */\n  downloadRawTooltip?: string;\n  /** Optional download callback — shows an export button in the URL bar when provided */\n  onDownload?: () => void;\n  /** Whether a download/export is currently in progress */\n  isDownloading?: boolean;\n  /** Optional refresh callback — shows a refresh icon at the right edge of the URL pill */\n  onRefresh?: () => void;\n  /** Session ID — when present with previewUrl, shows share button for webapp */\n  sessionId?: string;\n  /** Sharing scope for the webapp (used when sessionId + previewUrl) */\n  sharingScope?: SharingScope;\n  /** Callback when sharing scope changes (revalidate webapp info) */\n  onScopeChange?: () => void;\n}\n\n/**\n * UrlBar - Chrome-style URL/status bar below tabs\n * Shows the current URL/path based on active tab or file preview\n * Optionally shows back/forward navigation buttons\n * For Preview tab, shows a button to open the URL in a new browser tab\n * For downloadable files, shows a download icon\n */\nexport default function UrlBar({\n  displayUrl,\n  showNavigation = false,\n  canGoBack = false,\n  canGoForward = false,\n  onBack,\n  onForward,\n  previewUrl,\n  onDownloadRaw,\n  downloadRawTooltip = \"Download file\",\n  onDownload,\n  isDownloading = false,\n  onRefresh,\n  sessionId,\n  sharingScope = \"private\",\n  onScopeChange,\n}: UrlBarProps) {\n  const handleOpenInNewTab = () => {\n    if (previewUrl) {\n      window.open(previewUrl, \"_blank\", \"noopener,noreferrer\");\n    }\n  };\n\n  return (\n    <div className=\"px-3 pb-2\">\n      <div className=\"flex items-center gap-1\">\n        {/* Navigation buttons + refresh */}\n        {showNavigation && (\n          <div className=\"flex items-center gap-0.5\">\n            <button\n              onClick={onBack}\n              disabled={!canGoBack}\n              className={cn(\n                \"p-1.5 rounded-full transition-colors\",\n                canGoBack\n                  ? \"hover:bg-background-tint-03 text-text-03\"\n                  : \"text-text-02 cursor-not-allowed\"\n              )}\n              aria-label=\"Go back\"\n            >\n              <SvgArrowLeft size={16} />\n            </button>\n            <button\n              onClick={onForward}\n              disabled={!canGoForward}\n              className={cn(\n                \"p-1.5 rounded-full transition-colors\",\n                canGoForward\n                  ? \"hover:bg-background-tint-03 text-text-03\"\n                  : \"text-text-02 cursor-not-allowed\"\n              )}\n              aria-label=\"Go forward\"\n            >\n              <SvgArrowRight size={16} />\n            </button>\n            {onRefresh && (\n              <button\n                onClick={onRefresh}\n                className=\"p-1.5 rounded-full transition-colors hover:bg-background-tint-03 text-text-03\"\n                aria-label=\"Refresh\"\n              >\n                <SvgRevert size={14} className=\"-scale-x-100\" />\n              </button>\n            )}\n          </div>\n        )}\n        {/* URL display */}\n        <div className=\"flex-1 min-w-0 flex items-center px-3 py-1.5 bg-background-tint-02 rounded-full gap-2 min-h-[2.25rem]\">\n          {/* Download raw file button */}\n          {onDownloadRaw && (\n            <SimpleTooltip tooltip={downloadRawTooltip} delayDuration={200}>\n              <button\n                onClick={onDownloadRaw}\n                className=\"flex-shrink-0 p-0.5 rounded transition-colors hover:bg-background-tint-03 text-text-03\"\n                aria-label={downloadRawTooltip}\n              >\n                <SvgDownloadCloud size={14} />\n              </button>\n            </SimpleTooltip>\n          )}\n          {/* Open in new tab button - only shown for Preview tab with valid URL */}\n          {previewUrl && (\n            <SimpleTooltip tooltip=\"open in a new tab\" delayDuration={200}>\n              <button\n                onClick={handleOpenInNewTab}\n                className=\"flex-shrink-0 p-0.5 rounded transition-colors hover:bg-background-tint-03 text-text-03\"\n                aria-label=\"open in a new tab\"\n              >\n                <SvgExternalLink size={14} />\n              </button>\n            </SimpleTooltip>\n          )}\n          <Text secondaryBody text03 className=\"min-w-0 flex-1 truncate\">\n            {displayUrl}\n          </Text>\n        </div>\n        {/* Export button — shown for downloadable file previews (e.g. markdown → docx) */}\n        {onDownload && (\n          <Button\n            disabled={isDownloading}\n            variant=\"action\"\n            prominence=\"tertiary\"\n            icon={isDownloading ? SpinningLoader : SvgExternalLink}\n            onClick={onDownload}\n          >\n            {isDownloading ? \"Exporting...\" : \"Export to .docx\"}\n          </Button>\n        )}\n        {/* Share button — shown when webapp preview is active */}\n        {previewUrl && sessionId && (\n          <ShareButton\n            key={sessionId}\n            sessionId={sessionId}\n            webappUrl={previewUrl}\n            sharingScope={sharingScope}\n            onScopeChange={onScopeChange}\n          />\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/constants/exampleBuildPrompts.ts",
    "content": "/**\n * Example prompts for the Build Mode welcome screen.\n * Organized by user persona to allow different prompts for different user types.\n */\n\nexport interface BuildPrompt {\n  id: string;\n  /** Short summary shown on the button */\n  summary: string;\n  /** Full prompt text inserted into the input bar */\n  fullText: string;\n  /** Optional image URL/path for visual display */\n  image?: string;\n}\n\nexport type UserPersona = \"default\" | \"engineering\" | \"sales\" | \"product\";\n\n/**\n * Example prompts organized by user persona.\n * Each persona has a set of prompts tailored to their typical use cases.\n */\nexport const exampleBuildPrompts: Record<UserPersona, BuildPrompt[]> = {\n  default: [\n    {\n      id: \"default-1\",\n      summary: \"Analyze team productivity by month across my company\",\n      fullText:\n        \"Create a dashboard with the number of closed tickets per month. Split by priority and compare teams.\",\n      image: \"/craft_suggested_image_1.png\",\n    },\n    {\n      id: \"default-2\",\n      summary:\n        \"Visualize what my team did this month with interactive drill-downs\",\n      fullText:\n        \"What did my team work on this month? Create a dashboard that 1) shows the number of actions per activity, 2) shows the individual work items when I select something in the dashboard.\",\n      image: \"/craft_suggested_image_2.png\",\n    },\n    {\n      id: \"default-3\",\n      summary: \"Connect my backlog to recent customer conversations\",\n      fullText:\n        \"For each of my open Linear tickets, find at least 2 customers that have discussed related issues. Present the results in a dashboard table.\",\n      image: \"/craft_suggested_image_3.png\",\n    },\n    {\n      id: \"default-4\",\n      summary:\n        \"Surface the top pain points from this week's customer success calls\",\n      fullText:\n        \"Based on the customer calls this week, what are the 5 most important challenges? Create a table in a dashboard that shows the challenge and the customers that complained about it.\",\n      image: \"/craft_suggested_image_4.png\",\n    },\n    {\n      id: \"default-5\",\n      summary:\n        \"Compare and contrast which messaging resonates the most with our prospects\",\n      fullText:\n        \"If you look at the customer calls over the last 30 days, which part of our messaging seems to resonate the best, and appears to drive the most customer value? Generate a slide that effectively tells the story.\",\n      image: \"/craft_suggested_image_5.png\",\n    },\n  ],\n  engineering: [\n    {\n      id: \"eng-1\",\n      summary: \"Enrich my open PRs with customer insights and feedback\",\n      fullText:\n        \"Look at my open PRs and find information from customer discussions regarding these PRs that could help to implement better. Also find for each PR the design doc I wrote and create a new one that is appropriately updated.\",\n      image: \"/craft_suggested_image_1.png\",\n    },\n    {\n      id: \"eng-2\",\n      summary: \"Track engineering velocity from ticket to merged PR\",\n      fullText:\n        \"What is the average time it takes the engineers to merge PRs after my team created a Linear ticket? Create a dashboard that shows the average time by engineering team.\",\n      image: \"/craft_suggested_image_2.png\",\n    },\n    {\n      id: \"eng-3\",\n      summary: \"Build a visual roadmap story from my quarterly contributions\",\n      fullText:\n        \"Create an image (slide) that groups my PRs by quarter, finds the common thread, and presents a coherent story. This will later go into a historical roadmap.\",\n      image: \"/craft_suggested_image_3.png\",\n    },\n    {\n      id: \"eng-4\",\n      summary:\n        \"Find churned customers who would have benefited from our releases\",\n      fullText:\n        \"Look at the PRs that my team merged this month. Then look at the customers we lost over the last 2 months and tell me which of the customers would have likely benefitted from the merged PRs. Rank the customers by importance. Present in a dashboard.\",\n      image: \"/craft_suggested_image_4.png\",\n    },\n    {\n      id: \"eng-5\",\n      summary: \"Build a Linear dashboard to track my team's progress\",\n      fullText: \"Create a Linear dashboard for my team.\",\n      image: \"/craft_suggested_image_5.png\",\n    },\n  ],\n  sales: [\n    {\n      id: \"sales-1\",\n      summary: \"Identify sales blockers and quantify their revenue impact\",\n      fullText:\n        \"Look at the customer calls that my team had last month and identify the 3 most important sales blockers. Those could be product-related, messaging-related, or persona-chemistry. Create a dashboard showing how much revenue seems to be associated with each blocker.\",\n      image: \"/craft_suggested_image_1.png\",\n    },\n    {\n      id: \"sales-2\",\n      summary: \"Prepare winning talking points for my upcoming meeting\",\n      fullText:\n        \"I have a meeting with a prospect next week. Please go through the objections they raised and suggest good talking points based on other customer situations, upcoming product changes, etc.\",\n      image: \"/craft_suggested_image_2.png\",\n    },\n    {\n      id: \"sales-3\",\n      summary: \"Learn how my teammates overcame similar deal objections\",\n      fullText:\n        \"I don't want to give up on this opportunity. Find customer discussions from other members of my team where similar issues came up and were overcome. Provide some recommendations.\",\n      image: \"/craft_suggested_image_3.png\",\n    },\n    {\n      id: \"sales-4\",\n      summary: \"Discover which pitch messaging resonates most with customers\",\n      fullText:\n        \"If you look at the customer calls over the last 30 days, which part of our messaging seems to resonate the best, and appears to drive the most customer value? Generate a slide that effectively tells the story.\",\n      image: \"/craft_suggested_image_4.png\",\n    },\n    {\n      id: \"sales-5\",\n      summary: \"Surface the top product challenges from customer calls\",\n      fullText:\n        \"Based on the customer calls this week, what are the 5 most important challenges with the product? Create a table in a dashboard that shows the challenge and the customers that complained about it.\",\n      image: \"/craft_suggested_image_5.png\",\n    },\n  ],\n  product: [\n    {\n      id: \"product-1\",\n      summary: \"Summarize what I did this month for my manager\",\n      fullText:\n        \"I need to explain to my manager what I did last month, and how it matters for customer impact.\",\n      image: \"/craft_suggested_image_1.png\",\n    },\n    {\n      id: \"product-2\",\n      summary: \"Connect my backlog to recent customer conversations\",\n      fullText:\n        \"For each of my open Linear tickets, find at least 2 customers that have discussed related issues. Present the results in a dashboard table.\",\n      image: \"/craft_suggested_image_2.png\",\n    },\n    {\n      id: \"product-3\",\n      summary:\n        \"Visualize what my team did this month with interactive drill-downs\",\n      fullText:\n        \"What did my team work on this month? Create a dashboard that 1) shows the number of actions per activity, 2) shows the individual work items when I select something in the dashboard.\",\n      image: \"/craft_suggested_image_4.png\",\n    },\n    {\n      id: \"product-4\",\n      summary:\n        \"Find churned customers who would have benefited from the releases this month\",\n      fullText:\n        \"Look at the PRs that my team merged this month. Then look at the customers we lost over the last 2 months and tell me which of the customers would have likely benefitted from the merged PRs. Rank the customers by importance. Present in a dashboard.\",\n      image: \"/craft_suggested_image_3.png\",\n    },\n    {\n      id: \"product-5\",\n      summary: \"Analyze team productivity by month across my company\",\n      fullText:\n        \"Create a dashboard with the number of closed tickets per month. Split by priority and compare teams.\",\n      image: \"/craft_suggested_image_5.png\",\n    },\n  ],\n};\n\n/**\n * Get prompts for a specific user persona.\n * Falls back to default prompts if persona is not found.\n */\nexport function getPromptsForPersona(persona: UserPersona): BuildPrompt[] {\n  return exampleBuildPrompts[persona] ?? exampleBuildPrompts.default;\n}\n\n/**\n * Maps a workArea value from the build_user_persona cookie to a UserPersona.\n * Work areas that don't have dedicated prompts (executive, marketing, other) fall back to default.\n */\nexport function workAreaToPersona(workArea: string | undefined): UserPersona {\n  switch (workArea) {\n    case \"engineering\":\n      return \"engineering\";\n    case \"sales\":\n      return \"sales\";\n    case \"product\":\n      return \"product\";\n    default:\n      return \"default\";\n  }\n}\n"
  },
  {
    "path": "web/src/app/craft/constants.ts",
    "content": "// ============================================================================\n// Build Session Constants\n// ============================================================================\n\n/** Duration to display success state after session deletion (ms) */\nexport const DELETE_SUCCESS_DISPLAY_DURATION_MS = 800;\n\n/** Interval for rotating delete messages during session deletion (ms) */\nexport const DELETE_MESSAGE_ROTATION_INTERVAL_MS = 3000;\n"
  },
  {
    "path": "web/src/app/craft/contexts/BuildContext.tsx",
    "content": "\"use client\";\n\nimport {\n  createContext,\n  useContext,\n  useState,\n  useMemo,\n  type ReactNode,\n} from \"react\";\n\n/**\n * Build UI Context\n *\n * This context manages UI state (sidebar visibility).\n * Output panel state is stored per-session in useBuildSessionStore.\n */\ninterface BuildContextValue {\n  // UI state - left sidebar\n  leftSidebarFolded: boolean;\n  setLeftSidebarFolded: React.Dispatch<React.SetStateAction<boolean>>;\n}\n\nconst BuildContext = createContext<BuildContextValue | null>(null);\n\nexport interface BuildProviderProps {\n  children: ReactNode;\n}\n\nexport function BuildProvider({ children }: BuildProviderProps) {\n  const [leftSidebarFolded, setLeftSidebarFolded] = useState(false);\n\n  const value = useMemo<BuildContextValue>(\n    () => ({\n      leftSidebarFolded,\n      setLeftSidebarFolded,\n    }),\n    [leftSidebarFolded]\n  );\n\n  return (\n    <BuildContext.Provider value={value}>{children}</BuildContext.Provider>\n  );\n}\n\nexport function useBuildContext() {\n  const context = useContext(BuildContext);\n  if (!context) {\n    throw new Error(\"useBuildContext must be used within a BuildProvider\");\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/app/craft/contexts/UploadFilesContext.tsx",
    "content": "\"use client\";\n\nimport {\n  createContext,\n  useContext,\n  useState,\n  useCallback,\n  useMemo,\n  useRef,\n  useEffect,\n  type ReactNode,\n} from \"react\";\nimport {\n  uploadFile as uploadFileApi,\n  deleteFile as deleteFileApi,\n  fetchDirectoryListing,\n} from \"@/app/craft/services/apiServices\";\nimport { useBuildSessionStore } from \"@/app/craft/hooks/useBuildSessionStore\";\n\n/**\n * Upload File Status - tracks the state of files being uploaded\n */\nexport enum UploadFileStatus {\n  /** File is currently being uploaded to the sandbox */\n  UPLOADING = \"UPLOADING\",\n  /** File is being processed after upload */\n  PROCESSING = \"PROCESSING\",\n  /** File has been successfully uploaded and has a path */\n  COMPLETED = \"COMPLETED\",\n  /** File upload failed */\n  FAILED = \"FAILED\",\n  /** File is waiting for a session to be created before uploading */\n  PENDING = \"PENDING\",\n}\n\n/**\n * Build File - represents a file attached to a build session\n */\nexport interface BuildFile {\n  id: string;\n  name: string;\n  status: UploadFileStatus;\n  file_type: string;\n  size: number;\n  created_at: string;\n  // Original File object for upload\n  file?: File;\n  // Path in sandbox after upload (e.g., \"attachments/doc.pdf\")\n  path?: string;\n  // Error message if upload failed\n  error?: string;\n}\n\n// Helper to generate unique temp IDs\nconst generateTempId = () => {\n  try {\n    return `temp_${crypto.randomUUID()}`;\n  } catch {\n    return `temp_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;\n  }\n};\n\n// =============================================================================\n// File Validation (matches backend: build/configs.py and build/utils.py)\n// =============================================================================\n\n/** Maximum individual file size - matches BUILD_MAX_UPLOAD_FILE_SIZE_MB (50MB) */\nconst MAX_FILE_SIZE_BYTES = 50 * 1024 * 1024;\n\n/** Maximum total attachment size per session - matches BUILD_MAX_TOTAL_UPLOAD_SIZE_MB (200MB) */\nconst MAX_TOTAL_SIZE_BYTES = 200 * 1024 * 1024;\n\n/** Maximum files per session - matches BUILD_MAX_UPLOAD_FILES_PER_SESSION */\nconst MAX_FILES_PER_SESSION = 20;\n\n/** Blocked file extensions (executables/dangerous) - matches backend BLOCKED_EXTENSIONS */\nconst BLOCKED_EXTENSIONS = new Set([\n  // Windows executables\n  \".exe\",\n  \".dll\",\n  \".msi\",\n  \".scr\",\n  \".com\",\n  \".bat\",\n  \".cmd\",\n  \".ps1\",\n  // macOS\n  \".app\",\n  \".dmg\",\n  \".pkg\",\n  // Linux\n  \".deb\",\n  \".rpm\",\n  \".so\",\n  // Cross-platform\n  \".jar\",\n  \".war\",\n  \".ear\",\n  // Other potentially dangerous\n  \".vbs\",\n  \".vbe\",\n  \".wsf\",\n  \".wsh\",\n  \".hta\",\n  \".cpl\",\n  \".reg\",\n  \".lnk\",\n  \".pif\",\n]);\n\n/** Format bytes to human-readable string */\nfunction formatBytes(bytes: number): string {\n  if (bytes < 1024) return `${bytes} B`;\n  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;\n  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;\n}\n\n/** Get file extension (lowercase, including dot) */\nfunction getFileExtension(filename: string): string {\n  const lastDot = filename.lastIndexOf(\".\");\n  if (lastDot === -1) return \"\";\n  return filename.slice(lastDot).toLowerCase();\n}\n\n/** Validation result for a single file */\ninterface FileValidationResult {\n  valid: boolean;\n  error?: string;\n}\n\n/** Validate a single file before upload */\nfunction validateFile(file: File): FileValidationResult {\n  // Check file size\n  if (file.size > MAX_FILE_SIZE_BYTES) {\n    return {\n      valid: false,\n      error: `File too large (${formatBytes(\n        file.size\n      )}). Maximum is ${formatBytes(MAX_FILE_SIZE_BYTES)}.`,\n    };\n  }\n\n  // Check blocked extensions\n  const ext = getFileExtension(file.name);\n  if (ext && BLOCKED_EXTENSIONS.has(ext)) {\n    return {\n      valid: false,\n      error: `File type '${ext}' is not allowed for security reasons.`,\n    };\n  }\n\n  // Check for missing extension\n  if (!ext) {\n    return {\n      valid: false,\n      error: \"File must have an extension.\",\n    };\n  }\n\n  return { valid: true };\n}\n\n/** Validate total files and size constraints */\nfunction validateBatch(\n  newFiles: File[],\n  existingFiles: BuildFile[]\n): FileValidationResult {\n  const totalCount = existingFiles.length + newFiles.length;\n  if (totalCount > MAX_FILES_PER_SESSION) {\n    return {\n      valid: false,\n      error: `Too many files. Maximum is ${MAX_FILES_PER_SESSION} files per session.`,\n    };\n  }\n\n  const existingSize = existingFiles.reduce((sum, f) => sum + f.size, 0);\n  const newSize = newFiles.reduce((sum, f) => sum + f.size, 0);\n  const totalSize = existingSize + newSize;\n\n  if (totalSize > MAX_TOTAL_SIZE_BYTES) {\n    return {\n      valid: false,\n      error: `Total size exceeds limit. Maximum is ${formatBytes(\n        MAX_TOTAL_SIZE_BYTES\n      )} per session.`,\n    };\n  }\n\n  return { valid: true };\n}\n\n/** Create a failed BuildFile for validation errors */\nfunction createFailedFile(file: File, error: string): BuildFile {\n  return {\n    id: generateTempId(),\n    name: file.name,\n    status: UploadFileStatus.FAILED,\n    file_type: file.type,\n    size: file.size,\n    created_at: new Date().toISOString(),\n    error,\n  };\n}\n\n// Create optimistic file from File object\nconst createOptimisticFile = (file: File): BuildFile => {\n  const tempId = generateTempId();\n  return {\n    id: tempId,\n    name: file.name,\n    status: UploadFileStatus.UPLOADING,\n    file_type: file.type,\n    size: file.size,\n    created_at: new Date().toISOString(),\n    file,\n  };\n};\n\n/**\n * Error types for better error handling\n */\nexport enum UploadErrorType {\n  NETWORK = \"NETWORK\",\n  AUTH = \"AUTH\",\n  NOT_FOUND = \"NOT_FOUND\",\n  SERVER = \"SERVER\",\n  UNKNOWN = \"UNKNOWN\",\n}\n\nfunction classifyError(error: unknown): {\n  type: UploadErrorType;\n  message: string;\n} {\n  if (error instanceof Error) {\n    const message = error.message.toLowerCase();\n    if (message.includes(\"401\") || message.includes(\"unauthorized\")) {\n      return { type: UploadErrorType.AUTH, message: \"Session expired\" };\n    }\n    if (message.includes(\"404\") || message.includes(\"not found\")) {\n      return { type: UploadErrorType.NOT_FOUND, message: \"Resource not found\" };\n    }\n    if (message.includes(\"500\") || message.includes(\"server\")) {\n      return { type: UploadErrorType.SERVER, message: \"Server error\" };\n    }\n    if (message.includes(\"network\") || message.includes(\"fetch\")) {\n      return { type: UploadErrorType.NETWORK, message: \"Network error\" };\n    }\n    return { type: UploadErrorType.UNKNOWN, message: error.message };\n  }\n  return { type: UploadErrorType.UNKNOWN, message: \"Upload failed\" };\n}\n\n/**\n * UploadFilesContext - Centralized file upload state management\n *\n * This context manages:\n * - File attachment state (current files attached to input)\n * - Active session binding (which session files are associated with)\n * - Automatic upload of pending files when session becomes available\n * - Automatic fetch of existing attachments when session changes\n * - File upload, removal, and clearing operations\n *\n * Components should:\n * - Call `setActiveSession(sessionId)` when session changes\n * - Call `uploadFiles(files)` to attach files (uses active session internally)\n * - Call `removeFile(fileId)` to remove files (uses active session internally)\n * - Read `currentMessageFiles` to display attached files\n */\ninterface UploadFilesContextValue {\n  // Current message files (attached to the input bar)\n  currentMessageFiles: BuildFile[];\n\n  // Active session ID (set by parent components)\n  activeSessionId: string | null;\n\n  /**\n   * Set the active session ID. This triggers:\n   * - Fetching existing attachments from the new session (if different)\n   * - Clearing files if navigating to no session\n   * - Auto-uploading any pending files\n   *\n   * Call this when:\n   * - Session ID changes in URL\n   * - Pre-provisioned session becomes available\n   */\n  setActiveSession: (sessionId: string | null) => void;\n\n  /**\n   * Upload files to the active session.\n   * - If session is available: uploads immediately\n   * - If no session: marks as PENDING (auto-uploads when session available)\n   */\n  uploadFiles: (files: File[]) => Promise<BuildFile[]>;\n\n  /**\n   * Remove a file from the input bar.\n   * If the file was uploaded, also deletes from the sandbox.\n   */\n  removeFile: (fileId: string) => void;\n\n  /**\n   * Clear all attached files from the input bar.\n   * Does NOT delete from sandbox (use for form reset).\n   * @param options.suppressRefetch - When true, skips the refetch that would\n   *   normally restore session attachments (e.g. when user hits Enter to dismiss\n   *   a file from the input bar).\n   */\n  clearFiles: (options?: { suppressRefetch?: boolean }) => void;\n\n  // Check if any files are uploading\n  hasUploadingFiles: boolean;\n\n  // Check if any files are pending upload\n  hasPendingFiles: boolean;\n}\n\nconst UploadFilesContext = createContext<UploadFilesContextValue | null>(null);\n\nexport interface UploadFilesProviderProps {\n  children: ReactNode;\n}\n\nexport function UploadFilesProvider({ children }: UploadFilesProviderProps) {\n  // =========================================================================\n  // State\n  // =========================================================================\n\n  const [currentMessageFiles, setCurrentMessageFiles] = useState<BuildFile[]>(\n    []\n  );\n  const [activeSessionId, setActiveSessionId] = useState<string | null>(null);\n\n  // Get triggerFilesRefresh from the store to refresh the file explorer\n  const triggerFilesRefresh = useBuildSessionStore(\n    (state) => state.triggerFilesRefresh\n  );\n\n  // =========================================================================\n  // Refs for race condition protection\n  // =========================================================================\n\n  const isUploadingPendingRef = useRef(false);\n  const fetchingSessionRef = useRef<string | null>(null);\n  const prevSessionRef = useRef<string | null>(null);\n  // Track active deletions to prevent refetch race condition\n  const activeDeletionsRef = useRef<Set<string>>(new Set());\n  // When true, skip the refetch that runs after clearFiles (e.g. Enter to dismiss file)\n  const suppressRefetchRef = useRef(false);\n\n  // =========================================================================\n  // Derived state\n  // =========================================================================\n\n  const hasUploadingFiles = useMemo(() => {\n    return currentMessageFiles.some(\n      (file) => file.status === UploadFileStatus.UPLOADING\n    );\n  }, [currentMessageFiles]);\n\n  const hasPendingFiles = useMemo(() => {\n    return currentMessageFiles.some(\n      (file) => file.status === UploadFileStatus.PENDING\n    );\n  }, [currentMessageFiles]);\n\n  // =========================================================================\n  // Internal operations (not exposed to consumers)\n  // =========================================================================\n\n  /**\n   * Upload pending files to the given session.\n   * Internal function - called automatically by effects.\n   * Reads current files from state internally to avoid stale closures.\n   */\n  const uploadPendingFilesInternal = useCallback(\n    async (sessionId: string): Promise<void> => {\n      if (isUploadingPendingRef.current) return;\n\n      // Read current files and find pending ones atomically\n      let pendingFiles: BuildFile[] = [];\n      setCurrentMessageFiles((prev) => {\n        pendingFiles = prev.filter(\n          (f) => f.status === UploadFileStatus.PENDING && f.file\n        );\n        // Mark as uploading in the same state update to avoid race conditions\n        if (pendingFiles.length > 0) {\n          return prev.map((f) =>\n            pendingFiles.some((pf) => pf.id === f.id)\n              ? { ...f, status: UploadFileStatus.UPLOADING }\n              : f\n          );\n        }\n        return prev;\n      });\n\n      if (pendingFiles.length === 0) return;\n\n      isUploadingPendingRef.current = true;\n\n      try {\n        // Upload in parallel\n        const results = await Promise.all(\n          pendingFiles.map(async (file) => {\n            try {\n              const result = await uploadFileApi(sessionId, file.file!);\n              return { id: file.id, success: true as const, result };\n            } catch (error) {\n              const { message } = classifyError(error);\n              return {\n                id: file.id,\n                success: false as const,\n                errorMessage: message,\n              };\n            }\n          })\n        );\n\n        // Update statuses\n        setCurrentMessageFiles((prev) =>\n          prev.map((f) => {\n            const result = results.find((r) => r.id === f.id);\n            if (!result) return f;\n            return result.success\n              ? {\n                  ...f,\n                  status: UploadFileStatus.COMPLETED,\n                  path: result.result.path,\n                  name: result.result.filename,\n                  file: undefined, // Clear blob to free memory\n                }\n              : {\n                  ...f,\n                  status: UploadFileStatus.FAILED,\n                  error: result.errorMessage,\n                };\n          })\n        );\n\n        // Refresh file explorer if any uploads succeeded\n        const anySucceeded = results.some((r) => r.success);\n        if (anySucceeded) {\n          triggerFilesRefresh(sessionId);\n        }\n      } finally {\n        isUploadingPendingRef.current = false;\n      }\n    },\n    [triggerFilesRefresh]\n  );\n\n  /**\n   * Fetch existing attachments from the backend.\n   * Internal function - called automatically by effects.\n   */\n  const fetchExistingAttachmentsInternal = useCallback(\n    async (sessionId: string, replace: boolean): Promise<void> => {\n      // Request deduplication\n      if (fetchingSessionRef.current === sessionId) return;\n\n      fetchingSessionRef.current = sessionId;\n\n      try {\n        const listing = await fetchDirectoryListing(sessionId, \"attachments\");\n\n        // Use deterministic IDs based on session and path for stable React keys\n        const attachments: BuildFile[] = listing.entries\n          .filter((entry) => !entry.is_directory)\n          .map((entry) => ({\n            id: `existing_${sessionId}_${entry.path}`,\n            name: entry.name,\n            status: UploadFileStatus.COMPLETED,\n            file_type: entry.mime_type || \"application/octet-stream\",\n            size: entry.size || 0,\n            created_at: new Date().toISOString(),\n            path: entry.path,\n          }));\n\n        if (replace) {\n          // When replacing, preserve any files that are still being processed locally\n          // (uploading, pending, or recently completed uploads that might not be in\n          // backend listing yet due to race conditions)\n          setCurrentMessageFiles((prev) => {\n            // Keep files that are still in-flight or don't have a path yet\n            const localOnlyFiles = prev.filter(\n              (f) =>\n                f.status === UploadFileStatus.UPLOADING ||\n                f.status === UploadFileStatus.PENDING ||\n                f.status === UploadFileStatus.PROCESSING ||\n                // Keep recently uploaded files (have temp ID, not fetched from backend)\n                f.id.startsWith(\"temp_\")\n            );\n\n            // Merge: backend attachments + local-only files (avoiding duplicates by path)\n            const backendPaths = new Set(attachments.map((f) => f.path));\n            const nonDuplicateLocalFiles = localOnlyFiles.filter(\n              (f) => !f.path || !backendPaths.has(f.path)\n            );\n\n            return [...attachments, ...nonDuplicateLocalFiles];\n          });\n        } else if (attachments.length > 0) {\n          setCurrentMessageFiles((prev) => {\n            const existingPaths = new Set(prev.map((f) => f.path));\n            const newFiles = attachments.filter(\n              (f) => !existingPaths.has(f.path)\n            );\n            return [...prev, ...newFiles];\n          });\n        }\n      } catch (error) {\n        const { type } = classifyError(error);\n        if (type !== UploadErrorType.NOT_FOUND) {\n          console.error(\n            \"[UploadFilesContext] fetchExistingAttachments error:\",\n            error\n          );\n        }\n        if (replace) {\n          // On error, only clear files that aren't being processed locally\n          setCurrentMessageFiles((prev) =>\n            prev.filter(\n              (f) =>\n                f.status === UploadFileStatus.UPLOADING ||\n                f.status === UploadFileStatus.PENDING ||\n                f.status === UploadFileStatus.PROCESSING ||\n                f.id.startsWith(\"temp_\")\n            )\n          );\n        }\n      } finally {\n        fetchingSessionRef.current = null;\n      }\n    },\n    []\n  );\n\n  // =========================================================================\n  // Effects - Automatic state machine transitions\n  // =========================================================================\n\n  /**\n   * Effect: Handle session changes\n   *\n   * When activeSessionId changes:\n   * - If changed to a DIFFERENT non-null session: fetch attachments (replace mode)\n   * - If changed to null: do nothing (don't clear - session might be temporarily null during revalidation)\n   *\n   * This prevents unnecessary fetches/clears when the focus handler temporarily\n   * resets the pre-provisioned session state.\n   */\n  useEffect(() => {\n    const prevSession = prevSessionRef.current;\n    const currentSession = activeSessionId;\n\n    // Only update ref when we have a non-null session (ignore temporary nulls)\n    if (currentSession) {\n      // Session changed to a different non-null value\n      if (currentSession !== prevSession) {\n        prevSessionRef.current = currentSession;\n        fetchExistingAttachmentsInternal(currentSession, true);\n      }\n    }\n    // When session becomes null, don't clear files or update ref.\n    // This handles the case where pre-provisioning temporarily resets on focus.\n    // Files will be cleared when user actually navigates away or logs out.\n  }, [activeSessionId, fetchExistingAttachmentsInternal]);\n\n  /**\n   * Effect: Auto-upload pending files when session becomes available\n   *\n   * This handles the case where user attaches files before session is ready.\n   */\n  useEffect(() => {\n    if (activeSessionId && hasPendingFiles) {\n      uploadPendingFilesInternal(activeSessionId);\n    }\n  }, [activeSessionId, hasPendingFiles, uploadPendingFilesInternal]);\n\n  /**\n   * Effect: Refetch attachments after files are cleared\n   *\n   * When files are cleared (e.g., after sending a message) but we're still\n   * on the same session, refetch to restore any backend attachments.\n   *\n   * IMPORTANT: Skip refetch if files went to 0 due to active deletions.\n   * This prevents a race condition where refetch returns the file before\n   * backend deletion completes, causing the file pill to persist.\n   */\n  const prevFilesLengthRef = useRef(currentMessageFiles.length);\n  useEffect(() => {\n    const prevLength = prevFilesLengthRef.current;\n    const currentLength = currentMessageFiles.length;\n    prevFilesLengthRef.current = currentLength;\n\n    // Files were just cleared (went from >0 to 0)\n    const filesWereCleared = prevLength > 0 && currentLength === 0;\n\n    // Skip refetch if there are active deletions in progress\n    // This prevents the deleted file from being re-added before backend deletion completes\n    const hasActiveDeletions = activeDeletionsRef.current.size > 0;\n    // Skip refetch if caller explicitly suppressed (e.g. user hit Enter to dismiss file)\n    const shouldSuppressRefetch = suppressRefetchRef.current;\n    if (shouldSuppressRefetch) {\n      suppressRefetchRef.current = false;\n    }\n\n    // Refetch if on same session and files were cleared (not deleted)\n    if (\n      filesWereCleared &&\n      activeSessionId &&\n      prevSessionRef.current === activeSessionId &&\n      !hasActiveDeletions &&\n      !shouldSuppressRefetch\n    ) {\n      fetchExistingAttachmentsInternal(activeSessionId, false);\n    }\n  }, [\n    currentMessageFiles.length,\n    activeSessionId,\n    fetchExistingAttachmentsInternal,\n  ]);\n\n  // =========================================================================\n  // Public API\n  // =========================================================================\n\n  /**\n   * Set the active session. Triggers fetching/clearing as needed.\n   */\n  const setActiveSession = useCallback((sessionId: string | null) => {\n    setActiveSessionId(sessionId);\n  }, []);\n\n  /**\n   * Upload files. Uses activeSessionId internally.\n   * Validates files before upload (size, extension, batch limits).\n   */\n  const uploadFiles = useCallback(\n    async (files: File[]): Promise<BuildFile[]> => {\n      // Get current files for batch validation\n      const existingFiles = currentMessageFiles;\n\n      // Validate batch constraints first\n      const batchValidation = validateBatch(files, existingFiles);\n      if (!batchValidation.valid) {\n        // Create failed files for all with the batch error\n        const failedFiles = files.map((f) =>\n          createFailedFile(f, batchValidation.error!)\n        );\n        setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);\n        return failedFiles;\n      }\n\n      // Validate each file individually and separate valid from invalid\n      const validFiles: File[] = [];\n      const failedFiles: BuildFile[] = [];\n\n      for (const file of files) {\n        const validation = validateFile(file);\n        if (validation.valid) {\n          validFiles.push(file);\n        } else {\n          failedFiles.push(createFailedFile(file, validation.error!));\n        }\n      }\n\n      // Add failed files immediately\n      if (failedFiles.length > 0) {\n        setCurrentMessageFiles((prev) => [...prev, ...failedFiles]);\n      }\n\n      // If no valid files, return early\n      if (validFiles.length === 0) {\n        return failedFiles;\n      }\n\n      // Create optimistic files for valid files\n      const optimisticFiles = validFiles.map(createOptimisticFile);\n\n      // Add to current message files immediately\n      setCurrentMessageFiles((prev) => [...prev, ...optimisticFiles]);\n\n      const sessionId = activeSessionId;\n\n      if (sessionId) {\n        // Session available - upload immediately\n        const uploadPromises = optimisticFiles.map(async (optimisticFile) => {\n          try {\n            const result = await uploadFileApi(sessionId, optimisticFile.file!);\n            return {\n              id: optimisticFile.id,\n              success: true as const,\n              result,\n            };\n          } catch (error) {\n            const { message } = classifyError(error);\n            return {\n              id: optimisticFile.id,\n              success: false as const,\n              errorMessage: message,\n            };\n          }\n        });\n\n        const results = await Promise.all(uploadPromises);\n\n        // Batch update all file statuses\n        setCurrentMessageFiles((prev) =>\n          prev.map((f) => {\n            const uploadResult = results.find((r) => r.id === f.id);\n            if (!uploadResult) return f;\n\n            if (uploadResult.success) {\n              return {\n                ...f,\n                status: UploadFileStatus.COMPLETED,\n                path: uploadResult.result.path,\n                name: uploadResult.result.filename,\n                file: undefined, // Clear blob to free memory\n              };\n            } else {\n              return {\n                ...f,\n                status: UploadFileStatus.FAILED,\n                error: uploadResult.errorMessage,\n              };\n            }\n          })\n        );\n\n        // Refresh file explorer if any uploads succeeded\n        const anySucceeded = results.some((r) => r.success);\n        if (anySucceeded) {\n          triggerFilesRefresh(sessionId);\n        }\n      } else {\n        // No session yet - mark as PENDING (effect will auto-upload when session available)\n        setCurrentMessageFiles((prev) =>\n          prev.map((f) =>\n            optimisticFiles.some((of) => of.id === f.id)\n              ? { ...f, status: UploadFileStatus.PENDING }\n              : f\n          )\n        );\n      }\n\n      return [...failedFiles, ...optimisticFiles];\n    },\n    [activeSessionId, currentMessageFiles, triggerFilesRefresh]\n  );\n\n  /**\n   * Remove a file. Uses activeSessionId internally for sandbox deletion.\n   */\n  const removeFile = useCallback(\n    (fileId: string) => {\n      // Track this deletion to prevent refetch race condition\n      activeDeletionsRef.current.add(fileId);\n\n      // Use functional update to get current state and avoid stale closures\n      let removedFile: BuildFile | null = null;\n      let removedIndex = -1;\n\n      setCurrentMessageFiles((prev) => {\n        const index = prev.findIndex((f) => f.id === fileId);\n        if (index === -1) return prev;\n\n        // Capture file info for potential rollback and backend deletion\n        const file = prev[index];\n        if (!file) return prev;\n        removedFile = file;\n        removedIndex = index;\n\n        // Return filtered array (optimistic removal)\n        return prev.filter((f) => f.id !== fileId);\n      });\n\n      // After state update, trigger backend deletion if needed\n      // Use setTimeout to ensure state update has completed\n      setTimeout(() => {\n        if (removedFile?.path && activeSessionId) {\n          const filePath = removedFile.path;\n          const fileToRestore = removedFile;\n          const indexToRestore = removedIndex;\n\n          deleteFileApi(activeSessionId, filePath)\n            .then(() => {\n              // Deletion succeeded - remove from active deletions\n              activeDeletionsRef.current.delete(fileId);\n              // Refresh file explorer\n              triggerFilesRefresh(activeSessionId);\n            })\n            .catch((error) => {\n              console.error(\n                \"[UploadFilesContext] Failed to delete file from sandbox:\",\n                error\n              );\n              // Remove from active deletions\n              activeDeletionsRef.current.delete(fileId);\n              // Rollback: restore the file at its original position\n              setCurrentMessageFiles((prev) => {\n                // Check if file was already re-added (e.g., by another operation)\n                if (prev.some((f) => f.id === fileToRestore.id)) return prev;\n\n                const newFiles = [...prev];\n                const insertIndex = Math.min(indexToRestore, newFiles.length);\n                newFiles.splice(insertIndex, 0, fileToRestore);\n                return newFiles;\n              });\n            });\n        } else {\n          // No backend deletion needed - remove from active deletions immediately\n          activeDeletionsRef.current.delete(fileId);\n        }\n      }, 0);\n    },\n    [activeSessionId, triggerFilesRefresh]\n  );\n\n  /**\n   * Clear all files from the input bar.\n   */\n  const clearFiles = useCallback((options?: { suppressRefetch?: boolean }) => {\n    if (options?.suppressRefetch) {\n      suppressRefetchRef.current = true;\n    }\n    setCurrentMessageFiles([]);\n  }, []);\n\n  // =========================================================================\n  // Context value\n  // =========================================================================\n\n  const value = useMemo<UploadFilesContextValue>(\n    () => ({\n      currentMessageFiles,\n      activeSessionId,\n      setActiveSession,\n      uploadFiles,\n      removeFile,\n      clearFiles,\n      hasUploadingFiles,\n      hasPendingFiles,\n    }),\n    [\n      currentMessageFiles,\n      activeSessionId,\n      setActiveSession,\n      uploadFiles,\n      removeFile,\n      clearFiles,\n      hasUploadingFiles,\n      hasPendingFiles,\n    ]\n  );\n\n  return (\n    <UploadFilesContext.Provider value={value}>\n      {children}\n    </UploadFilesContext.Provider>\n  );\n}\n\nexport function useUploadFilesContext() {\n  const context = useContext(UploadFilesContext);\n  if (!context) {\n    throw new Error(\n      \"useUploadFilesContext must be used within an UploadFilesProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/useBuildConnectors.ts",
    "content": "import useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  BuildConnectorConfig,\n  ConnectorStatus,\n} from \"@/app/craft/v1/configure/components/ConnectorCard\";\n\ninterface BuildConnectorListResponse {\n  connectors: BuildConnectorConfig[];\n}\n\n/**\n * Hook to fetch and manage build mode connectors.\n *\n * @returns Object containing:\n * - `connectors`: Array of connector configurations\n * - `hasActiveConnector`: True if at least one connector has status \"connected\" (currently synced)\n * - `hasConnectorEverSucceeded`: True if any connector has ever succeeded (has last_indexed timestamp).\n *   Use this to determine if demo data can be disabled or if banners should be hidden.\n * - `hasAnyConnector`: True if any connectors exist (regardless of status). Useful for general checks.\n * - `isLoading`: True while fetching\n * - `mutate`: Function to refetch connectors\n */\nexport function useBuildConnectors() {\n  const { data, isLoading, mutate } = useSWR<BuildConnectorListResponse>(\n    SWR_KEYS.buildConnectors,\n    errorHandlingFetcher,\n    { refreshInterval: 30000 } // 30 seconds - matches configure page\n  );\n\n  const connectors = data?.connectors ?? [];\n\n  // At least one connector with status \"connected\" (actively synced)\n  const hasActiveConnector = connectors.some((c) => c.status === \"connected\");\n\n  // Check if any connector has ever succeeded (has last_indexed timestamp)\n  // This allows demo data to be turned off even if connectors currently have errors\n  const hasConnectorEverSucceeded = connectors.some(\n    (c) => c.last_indexed !== null\n  );\n\n  // Any connector exists (regardless of status)\n  const hasAnyConnector = connectors.length > 0;\n\n  return {\n    connectors,\n    hasActiveConnector,\n    hasConnectorEverSucceeded,\n    hasAnyConnector,\n    isLoading,\n    mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/useBuildLlmSelection.ts",
    "content": "import { useMemo, useState, useCallback } from \"react\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport {\n  BuildLlmSelection,\n  getBuildLlmSelection,\n  setBuildLlmSelection,\n  clearBuildLlmSelection,\n  getDefaultLlmSelection,\n} from \"@/app/craft/onboarding/constants\";\n\n/**\n * Hook for managing Build mode LLM selection.\n *\n * Resolution priority:\n * 1. Cookie - User's explicit selection (via onboarding or configure page)\n * 2. Smart default - via getDefaultLlmSelection()\n */\nexport function useBuildLlmSelection(\n  llmProviders: LLMProviderDescriptor[] | undefined\n) {\n  const [selection, setSelectionState] = useState<BuildLlmSelection | null>(\n    () => getBuildLlmSelection()\n  );\n\n  // Validate that a selection is still valid against current providers.\n  // Only checks that the provider exists\n  const isSelectionValid = useCallback(\n    (sel: BuildLlmSelection | null): boolean => {\n      if (!sel || !llmProviders) return false;\n      return llmProviders.some(\n        (p) => p.provider === sel.provider || p.name === sel.providerName\n      );\n    },\n    [llmProviders]\n  );\n\n  // Compute effective selection: cookie > smart default\n  const effectiveSelection = useMemo((): BuildLlmSelection | null => {\n    // Use cookie if valid\n    if (selection && isSelectionValid(selection)) {\n      return selection;\n    }\n\n    // Fall back to smart default\n    return getDefaultLlmSelection(llmProviders);\n  }, [selection, llmProviders, isSelectionValid]);\n\n  // Update selection and persist to cookie\n  const updateSelection = useCallback((newSelection: BuildLlmSelection) => {\n    setBuildLlmSelection(newSelection);\n    setSelectionState(newSelection);\n  }, []);\n\n  // Clear selection (removes cookie)\n  const clearSelection = useCallback(() => {\n    clearBuildLlmSelection();\n    setSelectionState(null);\n  }, []);\n\n  return {\n    selection: effectiveSelection,\n    updateSelection,\n    clearSelection,\n    isFromCookie: selection !== null && isSelectionValid(selection),\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/useBuildSessionController.ts",
    "content": "\"use client\";\n\nimport { useEffect, useRef, useCallback } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { useBuildSessionStore } from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { usePreProvisionPolling } from \"@/app/craft/hooks/usePreProvisionPolling\";\nimport { CRAFT_SEARCH_PARAM_NAMES } from \"@/app/craft/services/searchParams\";\nimport { CRAFT_PATH } from \"@/app/craft/v1/constants\";\nimport { getBuildUserPersona } from \"@/app/craft/onboarding/constants\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { checkPreProvisionedSession } from \"@/app/craft/services/apiServices\";\n\ninterface UseBuildSessionControllerProps {\n  /** Session ID from search params, or null for new session */\n  existingSessionId: string | null;\n}\n\n/**\n * Controller hook for managing build session lifecycle based on URL.\n * Mirrors useChatSessionController pattern.\n *\n * Responsibilities:\n * - Load session from API when URL changes\n * - Switch current session based on URL (single source of truth)\n * - Trigger pre-provisioning when on new build page\n * - Track session loading state\n * - Re-validate pre-provisioned session on tab focus (multi-tab support)\n *\n * IMPORTANT: This is the ONLY place that should call setCurrentSession.\n * Other components should navigate to URLs and let this controller handle state.\n */\nexport function useBuildSessionController({\n  existingSessionId,\n}: UseBuildSessionControllerProps) {\n  const router = useRouter();\n\n  // Check LLM provider availability\n  const { llmProviders } = useLLMProviders();\n  const hasAnyProvider = !!(llmProviders && llmProviders.length > 0);\n\n  // Check if user has completed onboarding (persona cookie is set)\n  // Read directly from cookie on every render - cookie reads are cheap and this\n  // ensures we always have the current value, especially important after onboarding\n  // completes when the cookie is set synchronously but other state updates are async\n  const hasCompletedOnboarding = getBuildUserPersona() !== null;\n\n  // Track previous existingSessionId to detect navigation transitions\n  const prevExistingSessionIdRef = useRef<string | null>(existingSessionId);\n\n  // Access store state and actions individually like chat does\n  const currentSessionId = useBuildSessionStore(\n    (state) => state.currentSessionId\n  );\n  const setCurrentSession = useBuildSessionStore(\n    (state) => state.setCurrentSession\n  );\n  const loadSession = useBuildSessionStore((state) => state.loadSession);\n\n  // Controller state from Zustand (replaces refs for better race condition handling)\n  const controllerState = useBuildSessionStore(\n    (state) => state.controllerState\n  );\n  const setControllerTriggered = useBuildSessionStore(\n    (state) => state.setControllerTriggered\n  );\n  const setControllerLoaded = useBuildSessionStore(\n    (state) => state.setControllerLoaded\n  );\n\n  // Pre-provisioning state (discriminated union)\n  const preProvisioning = useBuildSessionStore(\n    (state) => state.preProvisioning\n  );\n  const ensurePreProvisionedSession = useBuildSessionStore(\n    (state) => state.ensurePreProvisionedSession\n  );\n\n  // Compute derived state directly in selectors for efficiency\n  const isLoading = useBuildSessionStore((state) => {\n    if (!state.currentSessionId) return false;\n    const session = state.sessions.get(state.currentSessionId);\n    return session ? !session.isLoaded : false;\n  });\n\n  const isStreaming = useBuildSessionStore((state) => {\n    if (!state.currentSessionId) return false;\n    const session = state.sessions.get(state.currentSessionId);\n    return session?.status === \"running\" || session?.status === \"creating\";\n  });\n\n  // Pre-provisioning derived state\n  const isPreProvisioning = preProvisioning.status === \"provisioning\";\n  const isPreProvisioningReady = preProvisioning.status === \"ready\";\n\n  // Effect: Handle session changes based on URL\n  useEffect(() => {\n    const prevExistingSessionId = prevExistingSessionIdRef.current;\n    prevExistingSessionIdRef.current = existingSessionId;\n\n    // Handle navigation to \"new build\" (no session ID in URL)\n    if (existingSessionId === null) {\n      // Clear current session\n      if (currentSessionId !== null) {\n        setCurrentSession(null);\n      }\n\n      // Reset state when transitioning FROM a session TO new build\n      // This ensures we fetch fresh pre-provisioned status from backend\n      if (prevExistingSessionId !== null) {\n        setControllerTriggered(null);\n        // Clear pre-provisioned state to force a fresh check from backend\n        useBuildSessionStore.setState({ preProvisioning: { status: \"idle\" } });\n      }\n\n      // Trigger pre-provisioning if conditions are met\n      const canTrigger =\n        controllerState.lastTriggeredForUrl !== \"new-build\" &&\n        (preProvisioning.status === \"idle\" ||\n          preProvisioning.status === \"failed\") &&\n        hasCompletedOnboarding &&\n        hasAnyProvider;\n\n      // Also trigger retry if failed and retry time has passed\n      const shouldRetry =\n        preProvisioning.status === \"failed\" &&\n        Date.now() >= preProvisioning.retryAt &&\n        hasCompletedOnboarding &&\n        hasAnyProvider;\n\n      if (canTrigger || shouldRetry) {\n        setControllerTriggered(\"new-build\");\n        ensurePreProvisionedSession();\n      }\n      return;\n    }\n\n    // Navigating to a session - reset the trigger state for next new build visit\n    if (controllerState.lastTriggeredForUrl === \"new-build\") {\n      setControllerTriggered(null);\n    }\n\n    // Handle navigation to existing session\n    async function fetchSession() {\n      if (!existingSessionId) return;\n\n      // Mark as loaded BEFORE any async work to prevent duplicate calls\n      setControllerLoaded(existingSessionId);\n\n      // Access sessions via getState() to avoid dependency on Map reference\n      const currentState = useBuildSessionStore.getState();\n      const cachedSession = currentState.sessions.get(existingSessionId);\n\n      if (cachedSession?.isLoaded) {\n        // Just switch to it\n        setCurrentSession(existingSessionId);\n        return;\n      }\n\n      // Need to load from API\n      await loadSession(existingSessionId);\n    }\n\n    // Only fetch if we haven't already loaded this session\n    const currentState = useBuildSessionStore.getState();\n    const currentSessionData = currentState.currentSessionId\n      ? currentState.sessions.get(currentState.currentSessionId)\n      : null;\n    // Only block loading during active LLM streaming (\"running\").\n    // \"creating\" means sandbox restore, which should not prevent\n    // navigating to and loading a different session.\n    const isCurrentlyStreaming = currentSessionData?.status === \"running\";\n\n    if (\n      controllerState.loadedSessionId !== existingSessionId &&\n      !isCurrentlyStreaming\n    ) {\n      fetchSession();\n    } else if (currentSessionId !== existingSessionId) {\n      // Session is cached, just switch to it\n      setCurrentSession(existingSessionId);\n    }\n  }, [\n    existingSessionId,\n    currentSessionId,\n    setCurrentSession,\n    loadSession,\n    preProvisioning,\n    ensurePreProvisionedSession,\n    hasCompletedOnboarding,\n    hasAnyProvider,\n    controllerState.lastTriggeredForUrl,\n    controllerState.loadedSessionId,\n    setControllerTriggered,\n    setControllerLoaded,\n  ]);\n\n  // Effect: Auto-retry provisioning after backoff period\n  // When provisioning fails, we set a retryAt timestamp. This effect schedules\n  // a timer to retry after the backoff period elapses.\n  useEffect(() => {\n    // Only set up timer if in failed state and on new-build page\n    if (\n      preProvisioning.status !== \"failed\" ||\n      existingSessionId !== null ||\n      !hasCompletedOnboarding ||\n      !hasAnyProvider\n    ) {\n      return;\n    }\n\n    const msUntilRetry = preProvisioning.retryAt - Date.now();\n\n    // If retry time has already passed, trigger immediately\n    if (msUntilRetry <= 0) {\n      console.info(\"[PreProvision] Retry time passed, retrying now...\");\n      ensurePreProvisionedSession();\n      return;\n    }\n\n    // Schedule retry after backoff period\n    console.info(\n      `[PreProvision] Scheduling retry in ${Math.round(msUntilRetry / 1000)}s`\n    );\n    const timerId = setTimeout(() => {\n      console.info(\"[PreProvision] Backoff elapsed, retrying...\");\n      ensurePreProvisionedSession();\n    }, msUntilRetry);\n\n    return () => clearTimeout(timerId);\n  }, [\n    preProvisioning,\n    existingSessionId,\n    hasCompletedOnboarding,\n    hasAnyProvider,\n    ensurePreProvisionedSession,\n  ]);\n\n  // Effect: Re-validate pre-provisioned session on tab focus (multi-tab support)\n  // Uses checkPreProvisionedSession API to validate without resetting state,\n  // which prevents unnecessary cascading effects when session is still valid.\n  useEffect(() => {\n    const handleFocus = async () => {\n      const { preProvisioning } = useBuildSessionStore.getState();\n\n      // Only re-validate if we have a \"ready\" pre-provisioned session\n      if (preProvisioning.status === \"ready\") {\n        const cachedSessionId = preProvisioning.sessionId;\n\n        try {\n          // Check if session is still valid WITHOUT resetting state\n          const { valid } = await checkPreProvisionedSession(cachedSessionId);\n\n          if (!valid) {\n            // Session was consumed by another tab - now reset and re-provision\n            console.info(\n              `[PreProvision] Session ${cachedSessionId.slice(\n                0,\n                8\n              )} invalidated on focus, re-provisioning...`\n            );\n            useBuildSessionStore.setState({\n              preProvisioning: { status: \"idle\" },\n            });\n            const newSessionId = await useBuildSessionStore\n              .getState()\n              .ensurePreProvisionedSession();\n\n            if (newSessionId) {\n              console.info(\n                `[PreProvision] Session changed on focus: ${cachedSessionId.slice(\n                  0,\n                  8\n                )} -> ${newSessionId.slice(0, 8)}`\n              );\n            }\n          }\n          // If valid, do nothing - keep the current session\n        } catch (error) {\n          // On error, log but don't reset - better to keep potentially stale session\n          // than to cause UI flicker on network blip\n          console.warn(\n            \"[PreProvision] Failed to validate session on focus:\",\n            error\n          );\n        }\n      }\n    };\n\n    window.addEventListener(\"focus\", handleFocus);\n    return () => window.removeEventListener(\"focus\", handleFocus);\n  }, []);\n\n  /**\n   * Navigate to a specific session\n   */\n  const navigateToSession = useCallback(\n    (sessionId: string) => {\n      router.push(\n        `${CRAFT_PATH}?${CRAFT_SEARCH_PARAM_NAMES.SESSION_ID}=${sessionId}`\n      );\n    },\n    [router]\n  );\n\n  /**\n   * Navigate to new build (clear session)\n   * Note: We intentionally don't abort the current session's stream,\n   * allowing it to continue in the background.\n   */\n  const navigateToNewBuild = useCallback(() => {\n    router.push(CRAFT_PATH);\n  }, [router]);\n\n  // Poll to verify pre-provisioned session is still valid (multi-tab support)\n  // Only poll on welcome page (existingSessionId === null) - no point polling on session pages\n  usePreProvisionPolling({ enabled: existingSessionId === null });\n\n  return {\n    currentSessionId,\n    isLoading,\n    isStreaming,\n    navigateToSession,\n    navigateToNewBuild,\n    // Pre-provisioning state\n    isPreProvisioning,\n    isPreProvisioningReady,\n    preProvisioning,\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/useBuildSessionStore.ts",
    "content": "\"use client\";\n\nimport { create } from \"zustand\";\nimport { getDemoDataEnabled } from \"@/app/craft/v1/constants\";\nimport {\n  getBuildUserPersona,\n  getBuildLlmSelection,\n} from \"@/app/craft/onboarding/constants\";\nimport { DELETE_SUCCESS_DISPLAY_DURATION_MS } from \"@/app/craft/constants\";\n\nimport {\n  ApiSandboxResponse,\n  Artifact,\n  ArtifactType,\n  BuildMessage,\n  Session,\n  SessionHistoryItem,\n  SessionStatus,\n  ToolCall,\n  ToolCallStatus,\n} from \"@/app/craft/types/streamingTypes\";\n\nimport {\n  StreamItem,\n  ToolCallState,\n  TodoListState,\n} from \"@/app/craft/types/displayTypes\";\n\nimport {\n  createSession as apiCreateSession,\n  fetchSession,\n  fetchSessionHistory,\n  generateSessionName,\n  updateSessionName,\n  deleteSession as apiDeleteSession,\n  fetchMessages,\n  fetchArtifacts,\n  restoreSession,\n} from \"@/app/craft/services/apiServices\";\n\nimport { genId } from \"@/app/craft/utils/streamItemHelpers\";\nimport { parsePacket } from \"@/app/craft/utils/parsePacket\";\n\n/**\n * Convert loaded messages (with message_metadata) to StreamItem[] format.\n *\n * The backend stores messages with these packet types in message_metadata:\n * - user_message: {type: \"user_message\", content: {type: \"text\", text: \"...\"}}\n * - agent_message: {type: \"agent_message\", content: {type: \"text\", text: \"...\"}}\n * - agent_thought: {type: \"agent_thought\", content: {type: \"text\", text: \"...\"}}\n * - tool_call_progress: Full tool call data with status=\"completed\"\n * - agent_plan_update: Plan entries (not rendered as stream items)\n *\n * This function converts agent messages to StreamItem[] for rendering.\n */\nfunction convertMessagesToStreamItems(messages: BuildMessage[]): StreamItem[] {\n  const items: StreamItem[] = [];\n\n  for (const message of messages) {\n    if (message.type === \"user\") continue;\n\n    const metadata = message.message_metadata;\n    if (!metadata || typeof metadata !== \"object\") continue;\n\n    // SAME parsePacket — identical classification for both code paths\n    const packet = parsePacket(metadata);\n\n    switch (packet.type) {\n      case \"text_chunk\":\n        if (packet.text) {\n          items.push({\n            type: \"text\",\n            id: message.id || genId(\"text\"),\n            content: packet.text,\n            isStreaming: false,\n          });\n        }\n        break;\n\n      case \"thinking_chunk\":\n        if (packet.text) {\n          items.push({\n            type: \"thinking\",\n            id: message.id || genId(\"thinking\"),\n            content: packet.text,\n            isStreaming: false,\n          });\n        }\n        break;\n\n      case \"tool_call_progress\":\n        if (packet.isTodo) {\n          // Upsert: update existing todo_list or create new one\n          const existingIdx = items.findIndex(\n            (item) =>\n              item.type === \"todo_list\" &&\n              item.todoList.id === packet.toolCallId\n          );\n          if (existingIdx >= 0) {\n            const existing = items[existingIdx];\n            if (existing && existing.type === \"todo_list\") {\n              items[existingIdx] = {\n                ...existing,\n                todoList: { ...existing.todoList, todos: packet.todos },\n              };\n            }\n          } else {\n            items.push({\n              type: \"todo_list\",\n              id: packet.toolCallId,\n              todoList: {\n                id: packet.toolCallId,\n                todos: packet.todos,\n                isOpen: false,\n              },\n            });\n          }\n        } else {\n          items.push({\n            type: \"tool_call\",\n            id: packet.toolCallId,\n            toolCall: {\n              id: packet.toolCallId,\n              kind: packet.kind,\n              title: packet.title,\n              description: packet.description,\n              command: packet.command,\n              status: packet.status,\n              rawOutput: packet.rawOutput,\n              subagentType: packet.subagentType ?? undefined,\n              isNewFile: packet.isNewFile,\n              oldContent: packet.oldContent,\n              newContent: packet.newContent,\n            },\n          });\n        }\n        break;\n\n      // agent_plan_update and other packet types are not rendered as stream items\n      default:\n        break;\n    }\n  }\n\n  return items;\n}\n\n/**\n * Consolidate raw backend messages into proper conversation turns.\n *\n * The backend stores each streaming packet as a separate message. This function:\n * 1. Groups consecutive agent messages (between user messages) into turns\n * 2. Converts each group's packets to streamItems\n * 3. Creates consolidated messages with streamItems in message_metadata\n *\n * Returns: Array of consolidated messages (user messages + one agent message per turn)\n */\nfunction consolidateMessagesIntoTurns(\n  rawMessages: BuildMessage[]\n): BuildMessage[] {\n  const consolidated: BuildMessage[] = [];\n  let currentAgentPackets: BuildMessage[] = [];\n\n  for (const message of rawMessages) {\n    if (message.type === \"user\") {\n      // If we have accumulated agent packets, consolidate them into one message\n      if (currentAgentPackets.length > 0) {\n        const streamItems = convertMessagesToStreamItems(currentAgentPackets);\n        const textContent = streamItems\n          .filter((item) => item.type === \"text\")\n          .map((item) => item.content)\n          .join(\"\");\n\n        consolidated.push({\n          id: currentAgentPackets[0]?.id || genId(\"agent-msg\"),\n          type: \"assistant\",\n          content: textContent,\n          timestamp: currentAgentPackets[0]?.timestamp || new Date(),\n          message_metadata: {\n            streamItems,\n          },\n        });\n        currentAgentPackets = [];\n      }\n      // Add the user message as-is\n      consolidated.push(message);\n    } else if (message.type === \"assistant\") {\n      // Check if this message already has consolidated streamItems (from new format)\n      if (message.message_metadata?.streamItems) {\n        // Already consolidated, add as-is\n        if (currentAgentPackets.length > 0) {\n          // Flush any pending packets first\n          const streamItems = convertMessagesToStreamItems(currentAgentPackets);\n          const textContent = streamItems\n            .filter((item) => item.type === \"text\")\n            .map((item) => item.content)\n            .join(\"\");\n\n          consolidated.push({\n            id: currentAgentPackets[0]?.id || genId(\"agent-msg\"),\n            type: \"assistant\",\n            content: textContent,\n            timestamp: currentAgentPackets[0]?.timestamp || new Date(),\n            message_metadata: {\n              streamItems,\n            },\n          });\n          currentAgentPackets = [];\n        }\n        consolidated.push(message);\n      } else {\n        // Old format - accumulate for consolidation\n        currentAgentPackets.push(message);\n      }\n    }\n  }\n\n  // Don't forget any trailing agent packets\n  if (currentAgentPackets.length > 0) {\n    const streamItems = convertMessagesToStreamItems(currentAgentPackets);\n    const textContent = streamItems\n      .filter((item) => item.type === \"text\")\n      .map((item) => item.content)\n      .join(\"\");\n\n    consolidated.push({\n      id: currentAgentPackets[0]?.id || genId(\"agent-msg\"),\n      type: \"assistant\",\n      content: textContent,\n      timestamp: currentAgentPackets[0]?.timestamp || new Date(),\n      message_metadata: {\n        streamItems,\n      },\n    });\n  }\n\n  return consolidated;\n}\n\n// Re-export types for consumers\nexport type {\n  Artifact,\n  ArtifactType,\n  BuildMessage,\n  Session,\n  SessionHistoryItem,\n  SessionStatus,\n  ToolCall,\n  ToolCallStatus,\n};\n\n// =============================================================================\n// Store Types (mirrors chat's useChatSessionStore pattern)\n// =============================================================================\n\n/** Pre-provisioning state machine - exactly one of these states at a time */\nexport type PreProvisioningState =\n  | { status: \"idle\" }\n  | { status: \"provisioning\"; demoDataEnabled: boolean }\n  | { status: \"ready\"; sessionId: string; demoDataEnabled: boolean }\n  | { status: \"failed\"; error: string; retryCount: number; retryAt: number };\n\n// Module-level variable to store the provisioning promise (not in Zustand state for serializability)\nlet provisioningPromise: Promise<string | null> | null = null;\n\n/** File preview tab data */\nexport interface FilePreviewTab {\n  path: string;\n  fileName: string;\n}\n\n/** Files tab state - persisted across tab switches */\nexport interface FilesTabState {\n  expandedPaths: string[];\n  scrollTop: number;\n  /** Cached directory listings by path - avoids refetch on tab switch */\n  directoryCache: Record<string, unknown[]>;\n}\n\n/** Tab history entry - can be a pinned tab or a file preview */\nexport type TabHistoryEntry =\n  | { type: \"pinned\"; tab: OutputTabType }\n  | { type: \"file\"; path: string };\n\n/** Browser-style tab navigation history */\nexport interface TabNavigationHistory {\n  entries: TabHistoryEntry[];\n  currentIndex: number;\n}\n\n/** Follow-up suggestion bubble */\nexport interface SuggestionBubble {\n  theme: \"add\" | \"question\";\n  text: string;\n}\n\n/** Output panel tab types */\nexport type OutputTabType = \"preview\" | \"files\" | \"artifacts\";\n\nexport interface BuildSessionData {\n  id: string;\n  status: SessionStatus;\n  messages: BuildMessage[];\n  artifacts: Artifact[];\n  /** Active tool calls for the current response */\n  toolCalls: ToolCall[];\n  /**\n   * FIFO stream items for the current agent turn.\n   * Items are stored in chronological order as they arrive.\n   * Rendered directly without transformation.\n   */\n  streamItems: StreamItem[];\n  error: string | null;\n  webappUrl: string | null;\n  /** Sandbox info from backend */\n  sandbox: ApiSandboxResponse | null;\n  abortController: AbortController;\n  lastAccessed: Date;\n  isLoaded: boolean;\n  outputPanelOpen: boolean;\n  /** Counter to trigger webapp refresh when web/ files change (increments on each edit) */\n  webappNeedsRefresh: number;\n  /** Counter to trigger files list refresh when outputs/ directory changes (increments on each write/edit) */\n  filesNeedsRefresh: number;\n  /** File preview tabs open in this session */\n  filePreviewTabs: FilePreviewTab[];\n  /** Active pinned tab in output panel */\n  activeOutputTab: OutputTabType;\n  /** Active file preview path (when set, this is the active tab instead of pinned tab) */\n  activeFilePreviewPath: string | null;\n  /** Files tab state - expanded folders and scroll position */\n  filesTabState: FilesTabState;\n  /** Browser-style tab navigation history for back/forward */\n  tabHistory: TabNavigationHistory;\n  /** Follow-up suggestions after first agent message */\n  followupSuggestions: SuggestionBubble[] | null;\n  /** Whether suggestions are currently being generated */\n  suggestionsLoading: boolean;\n}\n\ninterface BuildSessionStore {\n  // Session management (mirrors chat)\n  currentSessionId: string | null;\n  sessions: Map<string, BuildSessionData>;\n  sessionHistory: SessionHistoryItem[];\n\n  // Pre-provisioning state (discriminated union - see PreProvisioningState type)\n  preProvisioning: PreProvisioningState;\n\n  // Controller state (replaces refs in useBuildSessionController for better race condition handling)\n  controllerState: {\n    /** Tracks which URL we've triggered provisioning for (prevents re-triggering) */\n    lastTriggeredForUrl: string | null;\n    /** Tracks which session ID has been loaded (prevents duplicate API calls) */\n    loadedSessionId: string | null;\n  };\n\n  // Temporary output panel state when no session exists (resets when session is created/cleared)\n  noSessionOutputPanelOpen: boolean;\n\n  // Temporary active tab when no session exists (resets when session is created/cleared)\n  noSessionActiveOutputTab: OutputTabType;\n\n  // Actions - Session Management\n  setCurrentSession: (sessionId: string | null) => void;\n  createSession: (\n    sessionId: string,\n    initialData?: Partial<BuildSessionData>\n  ) => void;\n  updateSessionData: (\n    sessionId: string,\n    updates: Partial<BuildSessionData>\n  ) => void;\n\n  // Actions - Current Session Shortcuts\n  setCurrentSessionStatus: (status: SessionStatus) => void;\n  appendMessageToCurrent: (message: BuildMessage) => void;\n  updateLastMessageInCurrent: (content: string) => void;\n  addArtifactToCurrent: (artifact: Artifact) => void;\n  setCurrentError: (error: string | null) => void;\n  setCurrentOutputPanelOpen: (open: boolean) => void;\n  toggleCurrentOutputPanel: () => void;\n\n  // Actions - Session-specific operations (for streaming - immune to currentSessionId changes)\n  appendMessageToSession: (sessionId: string, message: BuildMessage) => void;\n  updateLastMessageInSession: (sessionId: string, content: string) => void;\n  updateMessageByIdInSession: (\n    sessionId: string,\n    messageId: string,\n    content: string\n  ) => void;\n  addArtifactToSession: (sessionId: string, artifact: Artifact) => void;\n\n  // Actions - Tool Call Management\n  addToolCallToSession: (sessionId: string, toolCall: ToolCall) => void;\n  updateToolCallInSession: (\n    sessionId: string,\n    toolCallId: string,\n    updates: Partial<ToolCall>\n  ) => void;\n  clearToolCallsInSession: (sessionId: string) => void;\n\n  // Actions - Stream Items (FIFO rendering)\n  appendStreamItem: (sessionId: string, item: StreamItem) => void;\n  updateStreamItem: (\n    sessionId: string,\n    itemId: string,\n    updates: Partial<StreamItem>\n  ) => void;\n  updateLastStreamingText: (sessionId: string, content: string) => void;\n  updateLastStreamingThinking: (sessionId: string, content: string) => void;\n  updateToolCallStreamItem: (\n    sessionId: string,\n    toolCallId: string,\n    updates: Partial<ToolCallState>\n  ) => void;\n  updateTodoListStreamItem: (\n    sessionId: string,\n    todoListId: string,\n    updates: Partial<TodoListState>\n  ) => void;\n  upsertTodoListStreamItem: (\n    sessionId: string,\n    todoListId: string,\n    todoList: TodoListState\n  ) => void;\n  clearStreamItems: (sessionId: string) => void;\n\n  // Actions - Abort Control\n  setAbortController: (sessionId: string, controller: AbortController) => void;\n  abortSession: (sessionId: string) => void;\n  abortCurrentSession: () => void;\n\n  // Actions - Session Lifecycle\n  createNewSession: (prompt: string) => Promise<string | null>;\n  loadSession: (sessionId: string) => Promise<void>;\n\n  // Actions - Session History\n  refreshSessionHistory: () => Promise<void>;\n  nameBuildSession: (sessionId: string) => Promise<void>;\n  renameBuildSession: (sessionId: string, newName: string) => Promise<void>;\n  deleteBuildSession: (sessionId: string) => Promise<void>;\n\n  // Utilities\n  cleanupOldSessions: (maxSessions?: number) => void;\n\n  // Pre-provisioning Actions\n  ensurePreProvisionedSession: () => Promise<string | null>;\n  consumePreProvisionedSession: () => Promise<string | null>;\n  /** Clear and delete any pre-provisioned session (used when settings change) */\n  clearPreProvisionedSession: () => Promise<void>;\n\n  // Controller State Actions (for useBuildSessionController - replaces refs)\n  setControllerTriggered: (url: string | null) => void;\n  setControllerLoaded: (sessionId: string | null) => void;\n  resetControllerState: () => void;\n\n  // Webapp Refresh Actions\n  triggerWebappRefresh: (sessionId: string) => void;\n  // Files Refresh Actions\n  triggerFilesRefresh: (sessionId: string) => void;\n\n  // File Preview Actions\n  openFilePreview: (sessionId: string, path: string, fileName: string) => void;\n  /** Atomically open panel + create file tab + set active for a markdown file detected during streaming */\n  openMarkdownPreview: (sessionId: string, filePath: string) => void;\n  closeFilePreview: (sessionId: string, path: string) => void;\n  setActiveOutputTab: (sessionId: string, tab: OutputTabType) => void;\n  setActiveFilePreviewPath: (sessionId: string, path: string | null) => void;\n  /** Set active tab when no session exists (for pre-provisioned sandbox viewing) */\n  setNoSessionActiveOutputTab: (tab: OutputTabType) => void;\n\n  // Files Tab State Actions\n  updateFilesTabState: (\n    sessionId: string,\n    updates: Partial<FilesTabState>\n  ) => void;\n\n  // Tab Navigation History Actions\n  navigateTabBack: (sessionId: string) => void;\n  navigateTabForward: (sessionId: string) => void;\n\n  // Follow-up Suggestion Actions\n  setFollowupSuggestions: (\n    sessionId: string,\n    suggestions: SuggestionBubble[] | null\n  ) => void;\n  setSuggestionsLoading: (sessionId: string, loading: boolean) => void;\n  clearFollowupSuggestions: (sessionId: string) => void;\n}\n\n// =============================================================================\n// Initial State Factory\n// =============================================================================\n\nconst createInitialSessionData = (\n  sessionId: string,\n  initialData?: Partial<BuildSessionData>\n): BuildSessionData => ({\n  id: sessionId,\n  status: \"idle\",\n  messages: [],\n  artifacts: [],\n  toolCalls: [],\n  streamItems: [],\n  error: null,\n  webappUrl: null,\n  sandbox: null,\n  abortController: new AbortController(),\n  lastAccessed: new Date(),\n  isLoaded: false,\n  outputPanelOpen: false,\n  webappNeedsRefresh: 0,\n  filesNeedsRefresh: 0,\n  filePreviewTabs: [],\n  activeOutputTab: \"preview\",\n  activeFilePreviewPath: null,\n  filesTabState: { expandedPaths: [], scrollTop: 0, directoryCache: {} },\n  tabHistory: {\n    entries: [{ type: \"pinned\", tab: \"preview\" }],\n    currentIndex: 0,\n  },\n  followupSuggestions: null,\n  suggestionsLoading: false,\n  ...initialData,\n});\n\n// =============================================================================\n// Store\n// =============================================================================\n\nexport const useBuildSessionStore = create<BuildSessionStore>()((set, get) => ({\n  currentSessionId: null,\n  sessions: new Map<string, BuildSessionData>(),\n  sessionHistory: [],\n\n  // Pre-provisioning state\n  preProvisioning: { status: \"idle\" },\n\n  // Controller state (replaces refs in useBuildSessionController)\n  controllerState: {\n    lastTriggeredForUrl: null,\n    loadedSessionId: null,\n  },\n\n  // Temporary output panel state when no session exists (resets when session is created/cleared)\n  noSessionOutputPanelOpen: false,\n\n  // Temporary active tab when no session exists\n  noSessionActiveOutputTab: \"preview\" as OutputTabType,\n\n  // ===========================================================================\n  // Session Management (mirrors chat's pattern)\n  // ===========================================================================\n\n  setCurrentSession: (sessionId: string | null) => {\n    set((state) => {\n      // If setting to null, clear current session and reset no-session panel state\n      if (sessionId === null) {\n        return { currentSessionId: null, noSessionOutputPanelOpen: false };\n      }\n\n      // If session doesn't exist, create it and inherit output panel state\n      if (!state.sessions.has(sessionId)) {\n        const newSession = createInitialSessionData(sessionId, {\n          outputPanelOpen: state.noSessionOutputPanelOpen,\n        });\n        const newSessions = new Map(state.sessions);\n        newSessions.set(sessionId, newSession);\n        return {\n          currentSessionId: sessionId,\n          sessions: newSessions,\n          noSessionOutputPanelOpen: false,\n        };\n      }\n\n      // Update last accessed for existing session and reset no-session panel state\n      const session = state.sessions.get(sessionId)!;\n      const updatedSession = { ...session, lastAccessed: new Date() };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n\n      return {\n        currentSessionId: sessionId,\n        sessions: newSessions,\n        noSessionOutputPanelOpen: false,\n      };\n    });\n  },\n\n  // Initialize local session state (does NOT create backend session - use apiCreateSession for that)\n  createSession: (\n    sessionId: string,\n    initialData?: Partial<BuildSessionData>\n  ) => {\n    set((state) => {\n      // Inherit output panel state from no-session state if not explicitly set\n      const outputPanelOpen =\n        initialData?.outputPanelOpen ?? state.noSessionOutputPanelOpen;\n      const newSession = createInitialSessionData(sessionId, {\n        ...initialData,\n        outputPanelOpen,\n      });\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, newSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateSessionData: (\n    sessionId: string,\n    updates: Partial<BuildSessionData>\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        ...updates,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Current Session Shortcuts\n  // ===========================================================================\n\n  setCurrentSessionStatus: (status: SessionStatus) => {\n    const { currentSessionId, updateSessionData } = get();\n    if (currentSessionId) {\n      updateSessionData(currentSessionId, { status });\n    }\n  },\n\n  appendMessageToCurrent: (message: BuildMessage) => {\n    const { currentSessionId } = get();\n    if (!currentSessionId) return;\n\n    set((state) => {\n      const currentSession = state.sessions.get(currentSessionId);\n      if (!currentSession) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...currentSession,\n        messages: [...currentSession.messages, message],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(currentSessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateLastMessageInCurrent: (content: string) => {\n    const { currentSessionId } = get();\n    if (!currentSessionId) return;\n\n    set((state) => {\n      const session = state.sessions.get(currentSessionId);\n      if (!session || session.messages.length === 0) return state;\n\n      const messages = session.messages.map((msg, idx) =>\n        idx === session.messages.length - 1 ? { ...msg, content } : msg\n      );\n      const updatedSession: BuildSessionData = {\n        ...session,\n        messages,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(currentSessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  addArtifactToCurrent: (artifact: Artifact) => {\n    const { currentSessionId } = get();\n    if (!currentSessionId) return;\n\n    set((state) => {\n      const session = state.sessions.get(currentSessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        artifacts: [...session.artifacts, artifact],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(currentSessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  setCurrentError: (error: string | null) => {\n    const { currentSessionId, updateSessionData } = get();\n    if (currentSessionId) {\n      updateSessionData(currentSessionId, { error });\n    }\n  },\n\n  setCurrentOutputPanelOpen: (open: boolean) => {\n    const { currentSessionId, updateSessionData } = get();\n    if (currentSessionId) {\n      updateSessionData(currentSessionId, { outputPanelOpen: open });\n    } else {\n      // No session - update temporary state\n      set({ noSessionOutputPanelOpen: open });\n    }\n  },\n\n  toggleCurrentOutputPanel: () => {\n    const {\n      currentSessionId,\n      sessions,\n      updateSessionData,\n      noSessionOutputPanelOpen,\n    } = get();\n    if (currentSessionId) {\n      const session = sessions.get(currentSessionId);\n      if (session) {\n        updateSessionData(currentSessionId, {\n          outputPanelOpen: !session.outputPanelOpen,\n        });\n      }\n    } else {\n      // No session - toggle temporary state\n      set({ noSessionOutputPanelOpen: !noSessionOutputPanelOpen });\n    }\n  },\n\n  // ===========================================================================\n  // Session-specific operations (for streaming - immune to currentSessionId changes)\n  // ===========================================================================\n\n  appendMessageToSession: (sessionId: string, message: BuildMessage) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        messages: [...session.messages, message],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateLastMessageInSession: (sessionId: string, content: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session || session.messages.length === 0) return state;\n\n      const messages = session.messages.map((msg, idx) =>\n        idx === session.messages.length - 1 ? { ...msg, content } : msg\n      );\n      const updatedSession: BuildSessionData = {\n        ...session,\n        messages,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateMessageByIdInSession: (\n    sessionId: string,\n    messageId: string,\n    content: string\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const messages = session.messages.map((msg) =>\n        msg.id === messageId ? { ...msg, content } : msg\n      );\n      const updatedSession: BuildSessionData = {\n        ...session,\n        messages,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  addArtifactToSession: (sessionId: string, artifact: Artifact) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        artifacts: [...session.artifacts, artifact],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Tool Call Management\n  // ===========================================================================\n\n  addToolCallToSession: (sessionId: string, toolCall: ToolCall) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        toolCalls: [...session.toolCalls, toolCall],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateToolCallInSession: (\n    sessionId: string,\n    toolCallId: string,\n    updates: Partial<ToolCall>\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const toolCalls = session.toolCalls.map((tc) =>\n        tc.id === toolCallId ? { ...tc, ...updates } : tc\n      );\n      const updatedSession: BuildSessionData = {\n        ...session,\n        toolCalls,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  clearToolCallsInSession: (sessionId: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        toolCalls: [],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Stream Items (FIFO rendering)\n  // ===========================================================================\n\n  appendStreamItem: (sessionId: string, item: StreamItem) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems: [...session.streamItems, item],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateStreamItem: (\n    sessionId: string,\n    itemId: string,\n    updates: Partial<StreamItem>\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const streamItems = session.streamItems.map((item) =>\n        item.id === itemId ? { ...item, ...updates } : item\n      ) as StreamItem[];\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateLastStreamingText: (sessionId: string, content: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Find the last text item that is streaming\n      const items = [...session.streamItems];\n      for (let i = items.length - 1; i >= 0; i--) {\n        const item = items[i];\n        if (item && item.type === \"text\" && item.isStreaming) {\n          items[i] = { ...item, content };\n          break;\n        }\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems: items,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateLastStreamingThinking: (sessionId: string, content: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Find the last thinking item that is streaming\n      const items = [...session.streamItems];\n      for (let i = items.length - 1; i >= 0; i--) {\n        const item = items[i];\n        if (item && item.type === \"thinking\" && item.isStreaming) {\n          items[i] = { ...item, content };\n          break;\n        }\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems: items,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateToolCallStreamItem: (\n    sessionId: string,\n    toolCallId: string,\n    updates: Partial<ToolCallState>\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const streamItems = session.streamItems.map((item) => {\n        if (item.type === \"tool_call\" && item.toolCall.id === toolCallId) {\n          return {\n            ...item,\n            toolCall: { ...item.toolCall, ...updates },\n          };\n        }\n        return item;\n      }) as StreamItem[];\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  updateTodoListStreamItem: (\n    sessionId: string,\n    todoListId: string,\n    updates: Partial<TodoListState>\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const streamItems = session.streamItems.map((item) => {\n        if (item.type === \"todo_list\" && item.todoList.id === todoListId) {\n          return {\n            ...item,\n            todoList: { ...item.todoList, ...updates },\n          };\n        }\n        return item;\n      }) as StreamItem[];\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  upsertTodoListStreamItem: (\n    sessionId: string,\n    todoListId: string,\n    todoList: TodoListState\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Check if a todo_list with this ID already exists\n      const existingIndex = session.streamItems.findIndex(\n        (item) => item.type === \"todo_list\" && item.todoList.id === todoListId\n      );\n\n      let streamItems: StreamItem[];\n      if (existingIndex >= 0) {\n        // Update existing todo_list\n        streamItems = session.streamItems.map((item, index) => {\n          if (index === existingIndex && item.type === \"todo_list\") {\n            return {\n              ...item,\n              todoList: { ...item.todoList, ...todoList },\n            };\n          }\n          return item;\n        }) as StreamItem[];\n      } else {\n        // Create new todo_list item\n        streamItems = [\n          ...session.streamItems,\n          {\n            type: \"todo_list\" as const,\n            id: todoListId,\n            todoList,\n          },\n        ];\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  clearStreamItems: (sessionId: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        streamItems: [],\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Abort Control (mirrors chat's per-session pattern)\n  // ===========================================================================\n\n  setAbortController: (sessionId: string, controller: AbortController) => {\n    get().updateSessionData(sessionId, { abortController: controller });\n  },\n\n  abortSession: (sessionId: string) => {\n    const session = get().sessions.get(sessionId);\n    if (session?.abortController) {\n      session.abortController.abort();\n      get().updateSessionData(sessionId, {\n        abortController: new AbortController(),\n      });\n    }\n  },\n\n  abortCurrentSession: () => {\n    const { currentSessionId, abortSession } = get();\n    if (currentSessionId) {\n      abortSession(currentSessionId);\n    }\n  },\n\n  // ===========================================================================\n  // Session Lifecycle\n  // ===========================================================================\n\n  createNewSession: async (prompt: string) => {\n    const {\n      setCurrentSession,\n      updateSessionData,\n      refreshSessionHistory,\n      nameBuildSession,\n    } = get();\n    // Read from cookie - single source of truth\n    const demoDataEnabled = getDemoDataEnabled();\n\n    // Create a temporary session ID for optimistic UI\n    const tempId = `temp-${Date.now()}`;\n    setCurrentSession(tempId);\n    updateSessionData(tempId, { status: \"creating\" });\n\n    try {\n      // Get LLM selection from cookie\n      const llmSelection = getBuildLlmSelection();\n      const sessionData = await apiCreateSession({\n        name: prompt.slice(0, 50),\n        demoDataEnabled,\n        llmProviderType: llmSelection?.provider || null,\n        llmModelName: llmSelection?.modelName || null,\n      });\n      const realSessionId = sessionData.id;\n\n      // Remove temp session and create real one\n      set((state) => {\n        const newSessions = new Map(state.sessions);\n        newSessions.delete(tempId);\n        newSessions.set(\n          realSessionId,\n          createInitialSessionData(realSessionId, {\n            status: \"idle\",\n            messages: [\n              {\n                id: `msg-${Date.now()}`,\n                type: \"user\",\n                content: prompt,\n                timestamp: new Date(),\n              },\n            ],\n            isLoaded: true,\n            // Inherit output panel state from no-session state\n            outputPanelOpen: state.noSessionOutputPanelOpen,\n          })\n        );\n        return {\n          currentSessionId: realSessionId,\n          sessions: newSessions,\n        };\n      });\n\n      // Auto-name the session after a short delay\n      setTimeout(() => {\n        nameBuildSession(realSessionId);\n      }, 200);\n\n      await refreshSessionHistory();\n      return realSessionId;\n    } catch (err) {\n      console.error(\"Failed to create session:\", err);\n      updateSessionData(tempId, {\n        status: \"failed\",\n        error: (err as Error).message,\n      });\n      return null;\n    }\n  },\n\n  loadSession: async (sessionId: string) => {\n    const { setCurrentSession, updateSessionData, sessions } = get();\n\n    // Check if already loaded in cache\n    const existingSession = sessions.get(sessionId);\n    if (existingSession?.isLoaded) {\n      setCurrentSession(sessionId);\n      return;\n    }\n\n    // Set as current and mark as loading\n    setCurrentSession(sessionId);\n\n    try {\n      // First fetch session to check sandbox status\n      let sessionData = await fetchSession(sessionId);\n\n      // Check if session needs to be restored:\n      // - Sandbox is sleeping or terminated\n      // - Sandbox is running but session workspace is not loaded\n      const needsRestore =\n        sessionData.sandbox?.status === \"sleeping\" ||\n        sessionData.sandbox?.status === \"terminated\" ||\n        (sessionData.sandbox?.status === \"running\" &&\n          !sessionData.session_loaded_in_sandbox);\n\n      if (needsRestore) {\n        // Show sandbox as \"restoring\" while we load messages + restore\n        updateSessionData(sessionId, {\n          status: \"creating\",\n          sandbox: sessionData.sandbox\n            ? { ...sessionData.sandbox, status: \"restoring\" }\n            : null,\n        });\n      }\n\n      // Messages come from DB and don't need the sandbox running.\n      // Artifacts need sandbox filesystem, so skip during restore.\n      const messages = await fetchMessages(sessionId);\n      const artifacts = needsRestore ? [] : await fetchArtifacts(sessionId);\n\n      // Preserve optimistic messages if actively streaming (pre-provisioned flow).\n      const currentSession = get().sessions.get(sessionId);\n      const isStreaming =\n        (currentSession?.messages?.length ?? 0) > 0 &&\n        (currentSession?.status === \"running\" ||\n          currentSession?.status === \"creating\");\n\n      // Construct webapp URL\n      let webappUrl: string | null = null;\n      const hasWebapp = artifacts.some(\n        (a) => a.type === \"nextjs_app\" || a.type === \"web_app\"\n      );\n      if (hasWebapp && sessionData.sandbox?.nextjs_port) {\n        webappUrl = `http://localhost:${sessionData.sandbox.nextjs_port}`;\n      }\n\n      const status = isStreaming\n        ? currentSession!.status\n        : needsRestore\n          ? \"creating\"\n          : sessionData.status === \"active\"\n            ? \"active\"\n            : \"idle\";\n      const resolvedMessages = isStreaming\n        ? currentSession!.messages\n        : consolidateMessagesIntoTurns(messages);\n      const streamItems = isStreaming ? currentSession!.streamItems : [];\n      const sandbox =\n        needsRestore && sessionData.sandbox\n          ? { ...sessionData.sandbox, status: \"restoring\" as const }\n          : sessionData.sandbox;\n\n      updateSessionData(sessionId, {\n        status,\n        messages: resolvedMessages,\n        streamItems,\n        artifacts,\n        webappUrl,\n        sandbox,\n        error: null,\n        isLoaded: true,\n      });\n\n      // Now restore the sandbox if needed (messages are already visible).\n      // The backend enforces a timeout and returns an error if restore\n      // takes too long, so no frontend timeout needed here.\n      if (needsRestore) {\n        try {\n          sessionData = await restoreSession(sessionId);\n\n          // Sandbox is now running - fetch artifacts\n          const restoredArtifacts = await fetchArtifacts(sessionId);\n\n          updateSessionData(sessionId, {\n            status: sessionData.status === \"active\" ? \"active\" : \"idle\",\n            artifacts: restoredArtifacts,\n            sandbox: sessionData.sandbox,\n            // Bump so OutputPanel's SWR refetches webapp-info (which\n            // derives the actual webappUrl from the backend).\n            webappNeedsRefresh:\n              (get().sessions.get(sessionId)?.webappNeedsRefresh || 0) + 1,\n          });\n        } catch (restoreErr) {\n          console.error(\"Sandbox restore failed:\", restoreErr);\n          updateSessionData(sessionId, {\n            status: \"idle\",\n            sandbox: sessionData.sandbox\n              ? { ...sessionData.sandbox, status: \"failed\" }\n              : null,\n          });\n        }\n      }\n    } catch (err) {\n      console.error(\"Failed to load session:\", err);\n      updateSessionData(sessionId, {\n        error: (err as Error).message,\n      });\n    }\n  },\n\n  // ===========================================================================\n  // Session History\n  // ===========================================================================\n\n  refreshSessionHistory: async () => {\n    try {\n      const history = await fetchSessionHistory();\n      set({ sessionHistory: history });\n    } catch (err) {\n      console.error(\"Failed to fetch session history:\", err);\n    }\n  },\n\n  nameBuildSession: async (sessionId: string) => {\n    try {\n      // Generate name using LLM based on first user message\n      const generatedName = await generateSessionName(sessionId);\n\n      // Optimistically update the session title in sessionHistory immediately\n      // This triggers the typewriter animation in the sidebar\n      set((state) => ({\n        sessionHistory: state.sessionHistory.map((item) =>\n          item.id === sessionId ? { ...item, title: generatedName } : item\n        ),\n      }));\n\n      // Persist the name to backend (fire and forget - error handling below)\n      await updateSessionName(sessionId, generatedName);\n    } catch (err) {\n      console.error(\"Failed to auto-name session:\", err);\n      // On error, refresh to get the actual state from backend\n      await get().refreshSessionHistory();\n    }\n  },\n\n  renameBuildSession: async (sessionId: string, newName: string) => {\n    try {\n      await updateSessionName(sessionId, newName);\n      set((state) => ({\n        sessionHistory: state.sessionHistory.map((item) =>\n          item.id === sessionId ? { ...item, title: newName } : item\n        ),\n      }));\n    } catch (err) {\n      console.error(\"Failed to rename session:\", err);\n      await get().refreshSessionHistory();\n      throw err;\n    }\n  },\n\n  deleteBuildSession: async (sessionId: string) => {\n    const { currentSessionId, abortSession, refreshSessionHistory } = get();\n\n    try {\n      abortSession(sessionId);\n      await apiDeleteSession(sessionId);\n\n      // Remove session from local state\n      set((state) => {\n        const newSessions = new Map(state.sessions);\n        newSessions.delete(sessionId);\n        return {\n          sessions: newSessions,\n          currentSessionId:\n            currentSessionId === sessionId ? null : state.currentSessionId,\n        };\n      });\n\n      // Refresh history after UI has shown success state\n      setTimeout(\n        () => refreshSessionHistory(),\n        DELETE_SUCCESS_DISPLAY_DURATION_MS\n      );\n    } catch (err) {\n      console.error(\"Failed to delete session:\", err);\n      throw err;\n    }\n  },\n\n  // ===========================================================================\n  // Utilities (mirrors chat's cleanup pattern)\n  // ===========================================================================\n\n  cleanupOldSessions: (maxSessions: number = 10) => {\n    set((state) => {\n      const sortedSessions = Array.from(state.sessions.entries()).sort(\n        ([, a], [, b]) => b.lastAccessed.getTime() - a.lastAccessed.getTime()\n      );\n\n      if (sortedSessions.length <= maxSessions) {\n        return state;\n      }\n\n      const sessionsToKeep = sortedSessions.slice(0, maxSessions);\n      const sessionsToRemove = sortedSessions.slice(maxSessions);\n\n      // Abort controllers for sessions being removed\n      sessionsToRemove.forEach(([, session]) => {\n        if (session.abortController) {\n          session.abortController.abort();\n        }\n      });\n\n      return {\n        sessions: new Map(sessionsToKeep),\n      };\n    });\n  },\n\n  // ===========================================================================\n  // Pre-provisioning Actions\n  // ===========================================================================\n\n  ensurePreProvisionedSession: async () => {\n    const { preProvisioning } = get();\n    // Read from cookie - single source of truth\n    const demoDataEnabled = getDemoDataEnabled();\n\n    // Already have a pre-provisioned session ready\n    if (preProvisioning.status === \"ready\") {\n      // If demoDataEnabled matches, return the existing session\n      if (preProvisioning.demoDataEnabled === demoDataEnabled) {\n        return preProvisioning.sessionId;\n      }\n      // demoDataEnabled changed - invalidate and re-provision\n      const sessionIdToDelete = preProvisioning.sessionId;\n      set({ preProvisioning: { status: \"idle\" } });\n      apiDeleteSession(sessionIdToDelete).catch((err) => {\n        console.error(\n          \"[PreProvision] Failed to delete invalidated session:\",\n          err\n        );\n      });\n      // Fall through to create a new session with the current setting\n    }\n\n    // Already provisioning - return existing promise\n    if (preProvisioning.status === \"provisioning\") {\n      return provisioningPromise;\n    }\n\n    // Handle failed state with retry\n    // Capture retryCount BEFORE resetting to idle (so we can increment it on next failure)\n    let currentRetryCount = 0;\n    if (preProvisioning.status === \"failed\") {\n      currentRetryCount = preProvisioning.retryCount;\n      if (Date.now() < preProvisioning.retryAt) {\n        // Not yet time to retry\n        return null;\n      }\n      // Time to retry - reset to idle and continue\n      set({ preProvisioning: { status: \"idle\" } });\n    }\n\n    // Start new provisioning with current demoDataEnabled value\n\n    const promise = (async (): Promise<string | null> => {\n      try {\n        // Parse user persona and LLM selection from cookies\n        const persona = getBuildUserPersona();\n        const llmSelection = getBuildLlmSelection();\n\n        const sessionData = await apiCreateSession({\n          demoDataEnabled,\n          userWorkArea: persona?.workArea || null,\n          userLevel: persona?.level || null,\n          llmProviderType: llmSelection?.provider || null,\n          llmModelName: llmSelection?.modelName || null,\n        });\n\n        provisioningPromise = null; // Clear promise on success\n        set({\n          preProvisioning: {\n            status: \"ready\",\n            sessionId: sessionData.id,\n            demoDataEnabled,\n          },\n        });\n        return sessionData.id;\n      } catch (err) {\n        console.error(\"[PreProvision] Failed to pre-provision session:\", err);\n        const errorMessage =\n          err instanceof Error ? err.message : \"Unknown error\";\n\n        // Exponential backoff: 1s, 2s, 4s, 8s, ... max 30s\n        const newRetryCount = currentRetryCount + 1;\n        const backoffMs = Math.min(\n          1000 * Math.pow(2, newRetryCount - 1),\n          30000\n        );\n\n        provisioningPromise = null; // Clear promise on failure\n        set({\n          preProvisioning: {\n            status: \"failed\",\n            error: errorMessage,\n            retryCount: newRetryCount,\n            retryAt: Date.now() + backoffMs,\n          },\n        });\n        return null;\n      }\n    })();\n\n    provisioningPromise = promise;\n    set({\n      preProvisioning: { status: \"provisioning\", demoDataEnabled },\n    });\n    return promise;\n  },\n\n  consumePreProvisionedSession: async () => {\n    const { preProvisioning } = get();\n\n    // Wait for provisioning to complete if in progress\n    if (preProvisioning.status === \"provisioning\") {\n      await provisioningPromise;\n    }\n\n    // Re-check state after awaiting (may have changed)\n    const { preProvisioning: currentState, sessionHistory } = get();\n\n    if (currentState.status === \"ready\") {\n      const { sessionId } = currentState;\n\n      // Optimistically add to session history so it appears in sidebar immediately\n      // (Backend excludes empty sessions, but we're about to send a message)\n      const alreadyInHistory = sessionHistory.some(\n        (item) => item.id === sessionId\n      );\n      if (!alreadyInHistory) {\n        set({\n          sessionHistory: [\n            {\n              id: sessionId,\n              title: \"Fresh Craft\",\n              createdAt: new Date(),\n            },\n            ...sessionHistory,\n          ],\n        });\n      }\n\n      // Reset to idle and return the session ID\n      set({ preProvisioning: { status: \"idle\" } });\n      return sessionId;\n    }\n\n    // No session available\n    return null;\n  },\n\n  clearPreProvisionedSession: async () => {\n    const { preProvisioning } = get();\n\n    // If provisioning is in progress, wait for it to complete\n    if (preProvisioning.status === \"provisioning\") {\n      await provisioningPromise;\n    }\n\n    // Re-check state after awaiting\n    const { preProvisioning: currentState } = get();\n\n    if (currentState.status === \"ready\") {\n      const { sessionId } = currentState;\n\n      // Reset to idle first\n      set({ preProvisioning: { status: \"idle\" } });\n\n      // Delete the session and wait for completion\n      try {\n        await apiDeleteSession(sessionId);\n      } catch (err) {\n        console.error(\n          \"[PreProvision] Failed to delete pre-provisioned session:\",\n          err\n        );\n      }\n    } else {\n      // Just reset to idle if not ready\n      set({ preProvisioning: { status: \"idle\" } });\n    }\n  },\n\n  // ===========================================================================\n  // Controller State Actions (replaces refs in useBuildSessionController)\n  // ===========================================================================\n\n  setControllerTriggered: (url: string | null) => {\n    set((state) => ({\n      controllerState: {\n        ...state.controllerState,\n        lastTriggeredForUrl: url,\n      },\n    }));\n  },\n\n  setControllerLoaded: (sessionId: string | null) => {\n    set((state) => ({\n      controllerState: {\n        ...state.controllerState,\n        loadedSessionId: sessionId,\n      },\n    }));\n  },\n\n  resetControllerState: () => {\n    set({\n      controllerState: {\n        lastTriggeredForUrl: null,\n        loadedSessionId: null,\n      },\n    });\n  },\n\n  // ===========================================================================\n  // Webapp Refresh Actions\n  // ===========================================================================\n\n  triggerWebappRefresh: (sessionId: string) => {\n    const session = get().sessions.get(sessionId);\n    if (session) {\n      // Increment refresh counter and open panel if not already open\n      // Using a counter ensures each edit triggers a new refresh\n      get().updateSessionData(sessionId, {\n        webappNeedsRefresh: (session.webappNeedsRefresh || 0) + 1,\n        ...(session.outputPanelOpen ? {} : { outputPanelOpen: true }),\n      });\n    }\n  },\n\n  triggerFilesRefresh: (sessionId: string) => {\n    const session = get().sessions.get(sessionId);\n    if (session) {\n      // Increment refresh counter to trigger files list refresh\n      // Using a counter ensures each write/edit triggers a new refresh\n      // Also collapse the attachments directory to show fresh state\n      const collapsedExpandedPaths = session.filesTabState.expandedPaths.filter(\n        (path) => path !== \"attachments\" && !path.startsWith(\"attachments/\")\n      );\n      get().updateSessionData(sessionId, {\n        filesNeedsRefresh: (session.filesNeedsRefresh || 0) + 1,\n        filesTabState: {\n          ...session.filesTabState,\n          expandedPaths: collapsedExpandedPaths,\n        },\n      });\n    }\n  },\n\n  // ===========================================================================\n  // File Preview Actions\n  // ===========================================================================\n\n  openFilePreview: (sessionId: string, path: string, fileName: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Check if tab already exists\n      const existingTab = session.filePreviewTabs.find(\n        (tab) => tab.path === path\n      );\n\n      let filePreviewTabs = session.filePreviewTabs;\n      if (!existingTab) {\n        // Add new tab\n        filePreviewTabs = [...session.filePreviewTabs, { path, fileName }];\n      }\n\n      // Push to history (truncate forward history if navigating from middle)\n      const { tabHistory } = session;\n      const newEntry: TabHistoryEntry = { type: \"file\", path };\n      const newEntries = [\n        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),\n        newEntry,\n      ];\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        filePreviewTabs,\n        activeFilePreviewPath: path, // Always switch to this tab\n        tabHistory: {\n          entries: newEntries,\n          currentIndex: newEntries.length - 1,\n        },\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  openMarkdownPreview: (sessionId: string, filePath: string) => {\n    const fileName = filePath.split(\"/\").pop() || filePath;\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const existingTab = session.filePreviewTabs.find(\n        (t) => t.path === filePath\n      );\n      let filePreviewTabs = session.filePreviewTabs;\n      if (!existingTab) {\n        filePreviewTabs = [\n          ...session.filePreviewTabs,\n          { path: filePath, fileName },\n        ];\n      }\n\n      // Push to history (truncate forward history if navigating from middle)\n      const { tabHistory } = session;\n      const newEntry: TabHistoryEntry = { type: \"file\", path: filePath };\n      const newEntries = [\n        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),\n        newEntry,\n      ];\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        outputPanelOpen: true,\n        filePreviewTabs,\n        activeFilePreviewPath: filePath,\n        tabHistory: {\n          entries: newEntries,\n          currentIndex: newEntries.length - 1,\n        },\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  closeFilePreview: (sessionId: string, path: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Remove the tab\n      const filePreviewTabs = session.filePreviewTabs.filter(\n        (tab) => tab.path !== path\n      );\n\n      // If closing the active preview tab, switch to Files tab\n      const activeFilePreviewPath =\n        session.activeFilePreviewPath === path\n          ? null\n          : session.activeFilePreviewPath;\n\n      // If we closed the active tab, set activeOutputTab to \"files\"\n      const activeOutputTab =\n        session.activeFilePreviewPath === path\n          ? \"files\"\n          : session.activeOutputTab;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        filePreviewTabs,\n        activeFilePreviewPath,\n        activeOutputTab,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  setActiveOutputTab: (sessionId: string, tab: OutputTabType) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Push to history (truncate forward history if navigating from middle)\n      const { tabHistory } = session;\n      const newEntry: TabHistoryEntry = { type: \"pinned\", tab };\n      const newEntries = [\n        ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),\n        newEntry,\n      ];\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        activeOutputTab: tab,\n        activeFilePreviewPath: null, // Clear file preview when selecting pinned tab\n        tabHistory: {\n          entries: newEntries,\n          currentIndex: newEntries.length - 1,\n        },\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  setActiveFilePreviewPath: (sessionId: string, path: string | null) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      // Push to history if switching to a file (truncate forward history)\n      const { tabHistory } = session;\n      let newTabHistory = tabHistory;\n      if (path !== null) {\n        const newEntry: TabHistoryEntry = { type: \"file\", path };\n        const newEntries = [\n          ...tabHistory.entries.slice(0, tabHistory.currentIndex + 1),\n          newEntry,\n        ];\n        newTabHistory = {\n          entries: newEntries,\n          currentIndex: newEntries.length - 1,\n        };\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        activeFilePreviewPath: path,\n        tabHistory: newTabHistory,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  setNoSessionActiveOutputTab: (tab: OutputTabType) => {\n    set({ noSessionActiveOutputTab: tab });\n  },\n\n  // ===========================================================================\n  // Files Tab State Actions\n  // ===========================================================================\n\n  updateFilesTabState: (sessionId: string, updates: Partial<FilesTabState>) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        filesTabState: { ...session.filesTabState, ...updates },\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Tab Navigation History Actions\n  // ===========================================================================\n\n  navigateTabBack: (sessionId: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const { tabHistory } = session;\n      if (tabHistory.currentIndex <= 0) return state;\n\n      const newIndex = tabHistory.currentIndex - 1;\n      const entry = tabHistory.entries[newIndex];\n      if (!entry) return state;\n\n      // Re-open file tab if it was closed\n      let filePreviewTabs = session.filePreviewTabs;\n      if (entry.type === \"file\") {\n        const tabExists = filePreviewTabs.some(\n          (tab) => tab.path === entry.path\n        );\n        if (!tabExists) {\n          // Extract filename from path\n          const fileName = entry.path.split(\"/\").pop() || entry.path;\n          filePreviewTabs = [\n            ...filePreviewTabs,\n            { path: entry.path, fileName },\n          ];\n        }\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        tabHistory: { ...tabHistory, currentIndex: newIndex },\n        activeOutputTab:\n          entry.type === \"pinned\" ? entry.tab : session.activeOutputTab,\n        activeFilePreviewPath: entry.type === \"file\" ? entry.path : null,\n        filePreviewTabs,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  navigateTabForward: (sessionId: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const { tabHistory } = session;\n      if (tabHistory.currentIndex >= tabHistory.entries.length - 1)\n        return state;\n\n      const newIndex = tabHistory.currentIndex + 1;\n      const entry = tabHistory.entries[newIndex];\n      if (!entry) return state;\n\n      // Re-open file tab if it was closed\n      let filePreviewTabs = session.filePreviewTabs;\n      if (entry.type === \"file\") {\n        const tabExists = filePreviewTabs.some(\n          (tab) => tab.path === entry.path\n        );\n        if (!tabExists) {\n          // Extract filename from path\n          const fileName = entry.path.split(\"/\").pop() || entry.path;\n          filePreviewTabs = [\n            ...filePreviewTabs,\n            { path: entry.path, fileName },\n          ];\n        }\n      }\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        tabHistory: { ...tabHistory, currentIndex: newIndex },\n        activeOutputTab:\n          entry.type === \"pinned\" ? entry.tab : session.activeOutputTab,\n        activeFilePreviewPath: entry.type === \"file\" ? entry.path : null,\n        filePreviewTabs,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  // ===========================================================================\n  // Follow-up Suggestion Actions\n  // ===========================================================================\n\n  setFollowupSuggestions: (\n    sessionId: string,\n    suggestions: SuggestionBubble[] | null\n  ) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        followupSuggestions: suggestions,\n        suggestionsLoading: false,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  setSuggestionsLoading: (sessionId: string, loading: boolean) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        suggestionsLoading: loading,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n\n  clearFollowupSuggestions: (sessionId: string) => {\n    set((state) => {\n      const session = state.sessions.get(sessionId);\n      if (!session) return state;\n\n      const updatedSession: BuildSessionData = {\n        ...session,\n        followupSuggestions: null,\n        suggestionsLoading: false,\n        lastAccessed: new Date(),\n      };\n      const newSessions = new Map(state.sessions);\n      newSessions.set(sessionId, updatedSession);\n      return { sessions: newSessions };\n    });\n  },\n}));\n\n// =============================================================================\n// Selector Hooks (mirrors chat's pattern)\n// =============================================================================\n\n// Stable empty references for SSR hydration (prevents infinite loop)\nconst EMPTY_ARRAY: never[] = [];\nconst EMPTY_FILE_PREVIEW_TABS: FilePreviewTab[] = [];\nconst EMPTY_FILES_TAB_STATE: FilesTabState = {\n  expandedPaths: [],\n  scrollTop: 0,\n  directoryCache: {},\n};\nconst EMPTY_TAB_HISTORY: TabNavigationHistory = {\n  entries: [],\n  currentIndex: 0,\n};\n\nexport const useCurrentSession = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    return currentSessionId ? sessions.get(currentSessionId) : null;\n  });\n\n/**\n * Returns the current session data with stable reference.\n * Returns null when no session exists.\n */\nexport const useSession = (): BuildSessionData | null =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return null;\n    return sessions.get(currentSessionId) ?? null;\n  });\n\nexport const useSessionId = () =>\n  useBuildSessionStore((state) => state.currentSessionId);\n\nexport const useHasSession = () =>\n  useBuildSessionStore((state) => state.currentSessionId !== null);\n\nexport const useIsRunning = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return false;\n    const session = sessions.get(currentSessionId);\n    return session?.status === \"running\" || session?.status === \"creating\";\n  });\n\nexport const useMessages = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_ARRAY;\n    return sessions.get(currentSessionId)?.messages ?? EMPTY_ARRAY;\n  });\n\nexport const useArtifacts = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_ARRAY;\n    return sessions.get(currentSessionId)?.artifacts ?? EMPTY_ARRAY;\n  });\n\nexport const useToolCalls = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_ARRAY;\n    return sessions.get(currentSessionId)?.toolCalls ?? EMPTY_ARRAY;\n  });\n\nexport const useSessionHistory = () =>\n  useBuildSessionStore((state) => state.sessionHistory);\n\n/**\n * Returns the output panel open state for the current session.\n * Falls back to temporary state when no session exists (welcome page).\n * This temporary state resets to false when a session is created or cleared.\n */\nexport const useOutputPanelOpen = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions, noSessionOutputPanelOpen } = state;\n    if (!currentSessionId) return noSessionOutputPanelOpen;\n    return sessions.get(currentSessionId)?.outputPanelOpen ?? false;\n  });\n\nexport const useToggleOutputPanel = () =>\n  useBuildSessionStore((state) => state.toggleCurrentOutputPanel);\n\n// Pre-provisioning selectors\nexport const useIsPreProvisioning = () =>\n  useBuildSessionStore(\n    (state) => state.preProvisioning.status === \"provisioning\"\n  );\n\nexport const useIsPreProvisioningReady = () =>\n  useBuildSessionStore((state) => state.preProvisioning.status === \"ready\");\n\nexport const useIsPreProvisioningFailed = () =>\n  useBuildSessionStore((state) => state.preProvisioning.status === \"failed\");\n\nexport const usePreProvisionedSessionId = () =>\n  useBuildSessionStore((state) =>\n    state.preProvisioning.status === \"ready\"\n      ? state.preProvisioning.sessionId\n      : null\n  );\n\n// Demo data selector - reads directly from cookie (single source of truth)\n// Note: This returns the current cookie value but doesn't trigger re-renders on change.\n// Components that need reactive updates should manage their own local state.\nexport const useDemoDataEnabled = () => getDemoDataEnabled();\n\n// Controller state selectors (for useBuildSessionController)\nexport const useControllerState = () =>\n  useBuildSessionStore((state) => state.controllerState);\n\nexport const useSetControllerTriggered = () =>\n  useBuildSessionStore((state) => state.setControllerTriggered);\n\nexport const useSetControllerLoaded = () =>\n  useBuildSessionStore((state) => state.setControllerLoaded);\n\nexport const useResetControllerState = () =>\n  useBuildSessionStore((state) => state.resetControllerState);\n\n// Stream items selector\nexport const useStreamItems = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_ARRAY;\n    return sessions.get(currentSessionId)?.streamItems ?? EMPTY_ARRAY;\n  });\n\n// Webapp refresh selector\nexport const useWebappNeedsRefresh = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return 0;\n    return sessions.get(currentSessionId)?.webappNeedsRefresh ?? 0;\n  });\n\n// Files refresh selector\nexport const useFilesNeedsRefresh = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return 0;\n    return sessions.get(currentSessionId)?.filesNeedsRefresh ?? 0;\n  });\n\n// File preview selectors\nexport const useFilePreviewTabs = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_FILE_PREVIEW_TABS;\n    return (\n      sessions.get(currentSessionId)?.filePreviewTabs ?? EMPTY_FILE_PREVIEW_TABS\n    );\n  });\n\nexport const useActiveOutputTab = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions, noSessionActiveOutputTab } = state;\n    if (!currentSessionId) return noSessionActiveOutputTab;\n    return sessions.get(currentSessionId)?.activeOutputTab ?? \"preview\";\n  });\n\nexport const useActiveFilePreviewPath = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return null;\n    return sessions.get(currentSessionId)?.activeFilePreviewPath ?? null;\n  });\n\nexport const useFilesTabState = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_FILES_TAB_STATE;\n    return (\n      sessions.get(currentSessionId)?.filesTabState ?? EMPTY_FILES_TAB_STATE\n    );\n  });\n\nexport const useTabHistory = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return EMPTY_TAB_HISTORY;\n    return sessions.get(currentSessionId)?.tabHistory ?? EMPTY_TAB_HISTORY;\n  });\n\n// Follow-up suggestion selectors\nexport const useFollowupSuggestions = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return null;\n    return sessions.get(currentSessionId)?.followupSuggestions ?? null;\n  });\n\nexport const useSuggestionsLoading = () =>\n  useBuildSessionStore((state) => {\n    const { currentSessionId, sessions } = state;\n    if (!currentSessionId) return false;\n    return sessions.get(currentSessionId)?.suggestionsLoading ?? false;\n  });\n"
  },
  {
    "path": "web/src/app/craft/hooks/useBuildStreaming.ts",
    "content": "\"use client\";\n\nimport { useCallback, useMemo } from \"react\";\n\nimport {\n  Artifact,\n  ArtifactType,\n  SessionErrorCode,\n} from \"@/app/craft/types/streamingTypes\";\n\nimport {\n  sendMessageStream,\n  processSSEStream,\n  fetchSession,\n  generateFollowupSuggestions,\n  RateLimitError,\n} from \"@/app/craft/services/apiServices\";\n\nimport { useBuildSessionStore } from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { StreamItem } from \"@/app/craft/types/displayTypes\";\n\nimport { genId } from \"@/app/craft/utils/streamItemHelpers\";\nimport { parsePacket } from \"@/app/craft/utils/parsePacket\";\n\n/**\n * Hook for handling message streaming in build sessions.\n *\n * Uses a simple FIFO approach:\n * - Stream items are appended in chronological order as packets arrive\n * - Text/thinking chunks are merged when consecutive\n * - Tool calls are interleaved with text in the exact order they arrive\n */\nexport function useBuildStreaming() {\n  const appendMessageToSession = useBuildSessionStore(\n    (state) => state.appendMessageToSession\n  );\n  const addArtifactToSession = useBuildSessionStore(\n    (state) => state.addArtifactToSession\n  );\n  const setAbortController = useBuildSessionStore(\n    (state) => state.setAbortController\n  );\n  const abortCurrentSession = useBuildSessionStore(\n    (state) => state.abortCurrentSession\n  );\n  const updateSessionData = useBuildSessionStore(\n    (state) => state.updateSessionData\n  );\n\n  // Stream item actions\n  const appendStreamItem = useBuildSessionStore(\n    (state) => state.appendStreamItem\n  );\n  const updateLastStreamingText = useBuildSessionStore(\n    (state) => state.updateLastStreamingText\n  );\n  const updateLastStreamingThinking = useBuildSessionStore(\n    (state) => state.updateLastStreamingThinking\n  );\n  const updateToolCallStreamItem = useBuildSessionStore(\n    (state) => state.updateToolCallStreamItem\n  );\n  const upsertTodoListStreamItem = useBuildSessionStore(\n    (state) => state.upsertTodoListStreamItem\n  );\n  const clearStreamItems = useBuildSessionStore(\n    (state) => state.clearStreamItems\n  );\n  const triggerWebappRefresh = useBuildSessionStore(\n    (state) => state.triggerWebappRefresh\n  );\n  const triggerFilesRefresh = useBuildSessionStore(\n    (state) => state.triggerFilesRefresh\n  );\n  const openMarkdownPreview = useBuildSessionStore(\n    (state) => state.openMarkdownPreview\n  );\n  const setFollowupSuggestions = useBuildSessionStore(\n    (state) => state.setFollowupSuggestions\n  );\n  const setSuggestionsLoading = useBuildSessionStore(\n    (state) => state.setSuggestionsLoading\n  );\n\n  // ── Output file detector registry ──────────────────────────────────────\n  // Ordered by priority — first match wins.\n  // To add a new output type, add an entry here + a store action.\n  const OUTPUT_FILE_DETECTORS = useMemo(\n    () => [\n      {\n        match: (fp: string, k: string) =>\n          (k === \"edit\" || k === \"write\") &&\n          (fp.includes(\"/web/\") || fp.startsWith(\"web/\")),\n        onDetect: (sid: string) => triggerWebappRefresh(sid),\n      },\n      {\n        match: (fp: string, k: string) =>\n          (k === \"edit\" || k === \"write\") &&\n          fp.endsWith(\".md\") &&\n          (fp.includes(\"/outputs/\") || fp.startsWith(\"outputs/\")),\n        onDetect: (sid: string, fp: string) => {\n          openMarkdownPreview(sid, fp);\n          triggerFilesRefresh(sid);\n        },\n      },\n      {\n        match: (fp: string, k: string) =>\n          (k === \"edit\" || k === \"write\") &&\n          (fp.includes(\"/outputs/\") || fp.startsWith(\"outputs/\")),\n        onDetect: (sid: string) => triggerFilesRefresh(sid),\n      },\n    ],\n    [triggerWebappRefresh, triggerFilesRefresh, openMarkdownPreview]\n  );\n\n  /**\n   * Stream a message to the given session and process the SSE response.\n   * Populates streamItems in FIFO order as packets arrive.\n   */\n  const streamMessage = useCallback(\n    async (sessionId: string, content: string): Promise<void> => {\n      const currentState = useBuildSessionStore.getState();\n      const existingSession = currentState.sessions.get(sessionId);\n\n      if (existingSession?.abortController) {\n        existingSession.abortController.abort();\n      }\n\n      const controller = new AbortController();\n      setAbortController(sessionId, controller);\n\n      // Set status to running and clear previous stream items\n      updateSessionData(sessionId, { status: \"running\" });\n      clearStreamItems(sessionId);\n\n      // Track accumulated content for streaming text/thinking\n      let accumulatedText = \"\";\n      let accumulatedThinking = \"\";\n      let lastItemType: \"text\" | \"thinking\" | \"tool\" | null = null;\n\n      // Helper to finalize any streaming item before switching types\n      const finalizeStreaming = () => {\n        const session = useBuildSessionStore.getState().sessions.get(sessionId);\n        if (!session) return;\n\n        const items = session.streamItems;\n        const lastItem = items[items.length - 1];\n        if (lastItem) {\n          if (lastItem.type === \"text\" && lastItem.isStreaming) {\n            useBuildSessionStore\n              .getState()\n              .updateStreamItem(sessionId, lastItem.id, { isStreaming: false });\n          } else if (lastItem.type === \"thinking\" && lastItem.isStreaming) {\n            useBuildSessionStore\n              .getState()\n              .updateStreamItem(sessionId, lastItem.id, { isStreaming: false });\n          }\n        }\n      };\n\n      try {\n        const response = await sendMessageStream(\n          sessionId,\n          content,\n          controller.signal\n        );\n\n        await processSSEStream(response, (rawPacket) => {\n          const parsed = parsePacket(rawPacket);\n\n          switch (parsed.type) {\n            // Agent message content - accumulate and update/create text item\n            case \"text_chunk\": {\n              if (!parsed.text) break;\n\n              accumulatedText += parsed.text;\n\n              if (lastItemType === \"text\") {\n                updateLastStreamingText(sessionId, accumulatedText);\n              } else {\n                finalizeStreaming();\n                accumulatedText = parsed.text;\n                const item: StreamItem = {\n                  type: \"text\",\n                  id: genId(\"text\"),\n                  content: parsed.text,\n                  isStreaming: true,\n                };\n                appendStreamItem(sessionId, item);\n                lastItemType = \"text\";\n              }\n              break;\n            }\n\n            // Agent thinking - accumulate and update/create thinking item\n            case \"thinking_chunk\": {\n              if (!parsed.text) break;\n\n              accumulatedThinking += parsed.text;\n\n              if (lastItemType === \"thinking\") {\n                updateLastStreamingThinking(sessionId, accumulatedThinking);\n              } else {\n                finalizeStreaming();\n                accumulatedThinking = parsed.text;\n                const item: StreamItem = {\n                  type: \"thinking\",\n                  id: genId(\"thinking\"),\n                  content: parsed.text,\n                  isStreaming: true,\n                };\n                appendStreamItem(sessionId, item);\n                lastItemType = \"thinking\";\n              }\n              break;\n            }\n\n            // Tool call started\n            case \"tool_call_start\": {\n              finalizeStreaming();\n              accumulatedText = \"\";\n              accumulatedThinking = \"\";\n\n              // Skip tool_call_start for TodoWrite — pill created on first progress\n              if (parsed.isTodo) {\n                lastItemType = \"tool\";\n                break;\n              }\n\n              appendStreamItem(sessionId, {\n                type: \"tool_call\",\n                id: parsed.toolCallId,\n                toolCall: {\n                  id: parsed.toolCallId,\n                  kind: parsed.kind,\n                  title: \"\",\n                  status: \"pending\",\n                  description: \"\",\n                  command: \"\",\n                  rawOutput: \"\",\n                  subagentType: undefined,\n                  isNewFile: true,\n                  oldContent: \"\",\n                  newContent: \"\",\n                },\n              });\n              lastItemType = \"tool\";\n              break;\n            }\n\n            // Tool call progress\n            case \"tool_call_progress\": {\n              if (parsed.isTodo) {\n                upsertTodoListStreamItem(sessionId, parsed.toolCallId, {\n                  id: parsed.toolCallId,\n                  todos: parsed.todos,\n                  isOpen: true,\n                });\n                break;\n              }\n\n              updateToolCallStreamItem(sessionId, parsed.toolCallId, {\n                status: parsed.status,\n                title: parsed.title,\n                description: parsed.description,\n                command: parsed.command,\n                rawOutput: parsed.rawOutput,\n                subagentType: parsed.subagentType ?? undefined,\n                ...(parsed.kind === \"edit\" && {\n                  isNewFile: parsed.isNewFile,\n                  oldContent: parsed.oldContent,\n                  newContent: parsed.newContent,\n                }),\n              });\n\n              // Run output file detectors (filePath is pre-sanitized)\n              if (parsed.filePath && parsed.kind) {\n                for (const detector of OUTPUT_FILE_DETECTORS) {\n                  if (detector.match(parsed.filePath, parsed.kind)) {\n                    detector.onDetect(sessionId, parsed.filePath);\n                    break;\n                  }\n                }\n              }\n\n              // Task completion → emit text StreamItem\n              if (parsed.taskOutput) {\n                appendStreamItem(sessionId, {\n                  type: \"text\",\n                  id: genId(\"task-output\"),\n                  content: parsed.taskOutput,\n                  isStreaming: false,\n                });\n                lastItemType = \"text\";\n                accumulatedText = \"\";\n              }\n              break;\n            }\n\n            // Artifacts\n            case \"artifact_created\": {\n              const newArtifact: Artifact = {\n                id: parsed.artifact.id,\n                session_id: sessionId,\n                type: parsed.artifact.type as ArtifactType,\n                name: parsed.artifact.name,\n                path: parsed.artifact.path,\n                preview_url: parsed.artifact.preview_url || null,\n                created_at: new Date(),\n                updated_at: new Date(),\n              };\n              addArtifactToSession(sessionId, newArtifact);\n\n              // If webapp, fetch session to get sandbox port\n              const isWebapp =\n                newArtifact.type === \"nextjs_app\" ||\n                newArtifact.type === \"web_app\";\n              if (isWebapp) {\n                fetchSession(sessionId)\n                  .then((sessionData) => {\n                    if (sessionData.sandbox?.nextjs_port) {\n                      const webappUrl = `http://localhost:${sessionData.sandbox.nextjs_port}`;\n                      updateSessionData(sessionId, { webappUrl });\n                    }\n                  })\n                  .catch((err) =>\n                    console.error(\n                      \"Failed to fetch session for webapp URL:\",\n                      err\n                    )\n                  );\n              }\n              break;\n            }\n\n            // Agent finished\n            case \"prompt_response\": {\n              finalizeStreaming();\n\n              const session = useBuildSessionStore\n                .getState()\n                .sessions.get(sessionId);\n\n              if (session && session.streamItems.length > 0) {\n                const textContent = session.streamItems\n                  .filter((item) => item.type === \"text\")\n                  .map((item) => item.content)\n                  .join(\"\");\n\n                const isFirstAgentMessage =\n                  session.messages.filter((m) => m.type === \"assistant\")\n                    .length === 0;\n\n                const firstUserMessage = session.messages.find(\n                  (m) => m.type === \"user\"\n                );\n\n                if (isFirstAgentMessage && firstUserMessage && textContent) {\n                  (async () => {\n                    try {\n                      setSuggestionsLoading(sessionId, true);\n                      const suggestions = await generateFollowupSuggestions(\n                        sessionId,\n                        firstUserMessage.content,\n                        textContent\n                      );\n                      setFollowupSuggestions(sessionId, suggestions);\n                    } catch (err) {\n                      console.error(\"Failed to generate suggestions:\", err);\n                      setFollowupSuggestions(sessionId, null);\n                    }\n                  })();\n                }\n\n                appendMessageToSession(sessionId, {\n                  id: genId(\"agent-msg\"),\n                  type: \"assistant\",\n                  content: textContent,\n                  timestamp: new Date(),\n                  message_metadata: {\n                    streamItems: session.streamItems.map((item) => ({\n                      ...item,\n                      ...(item.type === \"text\" || item.type === \"thinking\"\n                        ? { isStreaming: false }\n                        : {}),\n                    })),\n                  },\n                });\n              }\n\n              updateSessionData(sessionId, {\n                status: \"active\",\n                streamItems: [],\n              });\n              break;\n            }\n\n            // Error\n            case \"error\": {\n              updateSessionData(sessionId, {\n                status: \"failed\",\n                error: parsed.message,\n              });\n              break;\n            }\n\n            default:\n              break;\n          }\n        });\n      } catch (err) {\n        if ((err as Error).name === \"AbortError\") {\n          // User cancelled - no error handling needed\n        } else if (err instanceof RateLimitError) {\n          console.warn(\"[Streaming] Rate limit exceeded\");\n          updateSessionData(sessionId, {\n            status: \"active\",\n            error: SessionErrorCode.RATE_LIMIT_EXCEEDED,\n          });\n        } else {\n          console.error(\"[Streaming] Stream error:\", err);\n          updateSessionData(sessionId, {\n            status: \"failed\",\n            error: (err as Error).message,\n          });\n        }\n      } finally {\n        setAbortController(sessionId, new AbortController());\n      }\n    },\n    [\n      setAbortController,\n      updateSessionData,\n      appendStreamItem,\n      updateLastStreamingText,\n      updateLastStreamingThinking,\n      updateToolCallStreamItem,\n      upsertTodoListStreamItem,\n      clearStreamItems,\n      addArtifactToSession,\n      appendMessageToSession,\n      OUTPUT_FILE_DETECTORS,\n      setFollowupSuggestions,\n      setSuggestionsLoading,\n    ]\n  );\n\n  return useMemo(\n    () => ({\n      streamMessage,\n      abortStream: abortCurrentSession,\n    }),\n    [streamMessage, abortCurrentSession]\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/usePreProvisionPolling.ts",
    "content": "\"use client\";\n\nimport { useEffect, useRef } from \"react\";\nimport { useBuildSessionStore } from \"./useBuildSessionStore\";\nimport { checkPreProvisionedSession } from \"../services/apiServices\";\n\n/** Polling interval in milliseconds (5 seconds) */\nconst POLLING_INTERVAL_MS = 5000;\n\ninterface UsePreProvisionPollingOptions {\n  /** Only poll when enabled (should be true only on welcome page) */\n  enabled: boolean;\n}\n\n/**\n * Hook that polls to verify the pre-provisioned session is still valid.\n *\n * When multiple browser tabs have the same pre-provisioned session,\n * one tab may claim it by sending a message. This hook detects when\n * that happens and triggers re-provisioning so the current tab gets\n * a fresh session.\n *\n * Usage: Call this hook on the welcome page where pre-provisioned\n * sessions are used. Pass enabled=true only on the welcome page.\n */\nexport function usePreProvisionPolling({\n  enabled,\n}: UsePreProvisionPollingOptions) {\n  const preProvisioning = useBuildSessionStore(\n    (state) => state.preProvisioning\n  );\n  const ensurePreProvisionedSession = useBuildSessionStore(\n    (state) => state.ensurePreProvisionedSession\n  );\n\n  // Extract sessionId only when status is \"ready\" (handles discriminated union)\n  const sessionId =\n    preProvisioning.status === \"ready\" ? preProvisioning.sessionId : null;\n\n  // Use ref to track if we're currently checking (prevents overlapping requests)\n  const isCheckingRef = useRef(false);\n\n  useEffect(() => {\n    // Only poll when enabled (welcome page) and we have a ready session\n    if (!enabled || !sessionId) {\n      return;\n    }\n\n    const checkValidity = async () => {\n      if (isCheckingRef.current) return;\n      isCheckingRef.current = true;\n\n      try {\n        const result = await checkPreProvisionedSession(sessionId);\n\n        if (!result.valid) {\n          console.log(\n            `[PreProvisionPolling] Session ${sessionId.slice(\n              0,\n              8\n            )} was used, re-provisioning...`\n          );\n          // Session was used by another tab - reset state and re-provision.\n          // Zustand setState is synchronous, so ensurePreProvisionedSession\n          // will immediately see the idle status (no setTimeout needed).\n          useBuildSessionStore.setState({\n            preProvisioning: { status: \"idle\" },\n          });\n          ensurePreProvisionedSession();\n        }\n      } catch (err) {\n        console.error(\"[PreProvisionPolling] Failed to check session:\", err);\n        // On error, don't re-provision - might be a network issue\n      } finally {\n        isCheckingRef.current = false;\n      }\n    };\n\n    // Start polling\n    const intervalId = setInterval(checkValidity, POLLING_INTERVAL_MS);\n\n    // Also check immediately on mount (in case session was used while tab was inactive)\n    checkValidity();\n\n    return () => {\n      clearInterval(intervalId);\n    };\n  }, [enabled, sessionId, ensurePreProvisionedSession]);\n}\n"
  },
  {
    "path": "web/src/app/craft/hooks/useUsageLimits.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\n\nimport { UsageLimits, LimitType } from \"@/app/craft/types/streamingTypes\";\n\nimport {\n  USAGE_LIMITS_ENDPOINT,\n  fetchUsageLimits,\n} from \"@/app/craft/services/apiServices\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\n\n// Re-export types for consumers\nexport type { UsageLimits, LimitType };\n\n// =============================================================================\n// Hook Return Type\n// =============================================================================\n\nexport interface UseUsageLimitsReturn {\n  // Limits state\n  limits: UsageLimits | null;\n  isLoading: boolean;\n  error: Error | null;\n  /** Whether limits are enabled (cloud mode) */\n  isEnabled: boolean;\n\n  // Actions\n  refreshLimits: () => void;\n}\n\n// =============================================================================\n// Hook Implementation\n// =============================================================================\n\n/**\n * useUsageLimits - Hook for managing build mode usage limits\n *\n * Rate limits from API:\n * - Free/unpaid users: 5 messages total (limitType: \"total\")\n * - Paid users: 25 messages per week by default (limitType: \"weekly\")\n *   (configurable via CRAFT_PAID_USER_RATE_LIMIT env var)\n *\n * Only fetches when NEXT_PUBLIC_CLOUD_ENABLED is true.\n * Automatically fetches limits on mount and provides refresh capability.\n */\nexport function useUsageLimits(): UseUsageLimitsReturn {\n  const isEnabled = NEXT_PUBLIC_CLOUD_ENABLED;\n\n  const { data, error, isLoading, mutate } = useSWR<UsageLimits>(\n    // Only fetch if cloud is enabled\n    isEnabled ? USAGE_LIMITS_ENDPOINT : null,\n    fetchUsageLimits,\n    {\n      // Revalidate on focus (when user returns to tab)\n      revalidateOnFocus: true,\n      // Revalidate on reconnect\n      revalidateOnReconnect: true,\n      // No caching - usage changes with every message sent\n      // Callers should call refreshLimits() after sending messages\n      dedupingInterval: 0,\n    }\n  );\n\n  return {\n    limits: data ?? null,\n    isLoading,\n    error: error ?? null,\n    isEnabled,\n    refreshLimits: () => mutate(),\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/layout.tsx",
    "content": "import { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { unstable_noStore as noStore } from \"next/cache\";\nimport { requireAuth } from \"@/lib/auth/requireAuth\";\nimport { fetchSettingsSS } from \"@/components/settings/lib\";\n\nexport interface LayoutProps {\n  children: React.ReactNode;\n}\n\n/**\n * Build Layout - Minimal wrapper that handles authentication and feature flag check\n *\n * Child routes (/craft and /craft/v1) handle their own UI structure.\n * Redirects to /app if Onyx Craft is disabled via feature flag.\n */\nexport default async function Layout({ children }: LayoutProps) {\n  noStore();\n\n  // Only check authentication - data fetching is done client-side\n  const authResult = await requireAuth();\n\n  if (authResult.redirect) {\n    redirect(authResult.redirect as Route);\n  }\n\n  // Check if Onyx Craft is enabled via feature flag\n  // Only explicit true enables the feature; false or undefined = disabled\n  const settings = await fetchSettingsSS();\n  if (settings?.settings?.onyx_craft_enabled !== true) {\n    redirect(\"/app\" as Route);\n  }\n\n  return <>{children}</>;\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/BuildOnboardingProvider.tsx",
    "content": "\"use client\";\n\nimport { createContext, useContext } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { useOnboardingModal } from \"@/app/craft/onboarding/hooks/useOnboardingModal\";\nimport BuildOnboardingModal from \"@/app/craft/onboarding/components/BuildOnboardingModal\";\nimport NoLlmProvidersModal from \"@/app/craft/onboarding/components/NoLlmProvidersModal\";\nimport { OnboardingModalController } from \"@/app/craft/onboarding/types\";\nimport { useUser } from \"@/providers/UserProvider\";\n\n// Context for accessing onboarding modal controls\nconst OnboardingContext = createContext<OnboardingModalController | null>(null);\n\nexport function useOnboarding(): OnboardingModalController {\n  const ctx = useContext(OnboardingContext);\n  if (!ctx) {\n    throw new Error(\n      \"useOnboarding must be used within BuildOnboardingProvider\"\n    );\n  }\n  return ctx;\n}\n\ninterface BuildOnboardingProviderProps {\n  children: React.ReactNode;\n}\n\nexport function BuildOnboardingProvider({\n  children,\n}: BuildOnboardingProviderProps) {\n  const router = useRouter();\n  const { user } = useUser();\n  const controller = useOnboardingModal();\n\n  // Show loading state while user data is loading\n  if (!user) {\n    return (\n      <div className=\"flex items-center justify-center w-full h-full\">\n        <div className=\"animate-spin rounded-full h-8 w-8 border-b-2 border-text-01\" />\n      </div>\n    );\n  }\n\n  // Non-admin users with no LLM providers cannot use Craft\n  // Don't show modal while loading to prevent flash\n  const showNoProvidersModal =\n    !controller.isLoading && !controller.isAdmin && !controller.hasAnyProvider;\n\n  return (\n    <OnboardingContext.Provider value={controller}>\n      {/* Block non-admin users when no LLM providers are configured */}\n      <NoLlmProvidersModal\n        open={showNoProvidersModal}\n        onClose={() => router.push(\"/app\")}\n      />\n\n      {/* Unified onboarding modal - only show if not blocked by no providers */}\n      {!showNoProvidersModal && (\n        <BuildOnboardingModal\n          mode={controller.mode}\n          llmProviders={controller.llmProviders}\n          initialValues={controller.initialValues}\n          isAdmin={controller.isAdmin}\n          hasUserInfo={controller.hasUserInfo}\n          allProvidersConfigured={controller.allProvidersConfigured}\n          hasAnyProvider={controller.hasAnyProvider}\n          onComplete={controller.completeUserInfo}\n          onLlmComplete={controller.completeLlmSetup}\n          onClose={controller.close}\n        />\n      )}\n\n      {/* Build content - always rendered, modals overlay it */}\n      {children}\n    </OnboardingContext.Provider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/BuildOnboardingModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useMemo } from \"react\";\nimport {\n  track,\n  AnalyticsEvent,\n  LLMProviderConfiguredSource,\n} from \"@/lib/analytics\";\nimport { SvgArrowRight, SvgArrowLeft, SvgX } from \"@opal/icons\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  BuildUserInfo,\n  OnboardingModalMode,\n  OnboardingStep,\n} from \"@/app/craft/onboarding/types\";\nimport {\n  WorkArea,\n  Level,\n  WORK_AREAS_REQUIRING_LEVEL,\n  setBuildLlmSelection,\n  getBuildLlmSelection,\n  getDefaultLlmSelection,\n} from \"@/app/craft/onboarding/constants\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { LLM_PROVIDERS_ADMIN_URL } from \"@/lib/llmConfig/constants\";\nimport { buildOnboardingInitialValues as buildInitialValues } from \"@/sections/modals/llmConfig/utils\";\nimport { testApiKeyHelper } from \"@/sections/modals/llmConfig/svc\";\nimport OnboardingInfoPages from \"@/app/craft/onboarding/components/OnboardingInfoPages\";\nimport OnboardingUserInfo from \"@/app/craft/onboarding/components/OnboardingUserInfo\";\nimport OnboardingLlmSetup, {\n  PROVIDERS,\n  type ProviderKey,\n} from \"@/app/craft/onboarding/components/OnboardingLlmSetup\";\n\n/**\n * Auto-select the best available LLM based on priority order.\n * Used when user completes onboarding without going through LLM setup step.\n */\nfunction autoSelectBestLlm(\n  llmProviders: LLMProviderDescriptor[] | undefined\n): void {\n  // Don't override if user already has a selection\n  if (getBuildLlmSelection()) return;\n\n  const selection = getDefaultLlmSelection(llmProviders);\n  if (selection) {\n    setBuildLlmSelection(selection);\n  }\n}\n\ninterface InitialValues {\n  firstName: string;\n  lastName: string;\n  workArea: WorkArea | undefined;\n  level: Level | undefined;\n}\n\ninterface BuildOnboardingModalProps {\n  mode: OnboardingModalMode;\n  llmProviders?: LLMProviderDescriptor[];\n  initialValues: InitialValues;\n  isAdmin: boolean;\n  hasUserInfo: boolean;\n  allProvidersConfigured: boolean;\n  hasAnyProvider: boolean;\n  onComplete: (info: BuildUserInfo) => Promise<void>;\n  onLlmComplete: () => Promise<void>;\n  onClose: () => void;\n}\n\n// Helper to compute steps for mode\nfunction getStepsForMode(\n  mode: OnboardingModalMode,\n  isAdmin: boolean,\n  allProvidersConfigured: boolean,\n  hasUserInfo: boolean\n): OnboardingStep[] {\n  switch (mode.type) {\n    case \"initial-onboarding\":\n      // Full flow: page1 → llm-setup (if admin + not all configured) → user-info\n      const steps: OnboardingStep[] = [\"page1\"];\n\n      if (isAdmin && !allProvidersConfigured) {\n        steps.push(\"llm-setup\");\n      }\n\n      if (!hasUserInfo) {\n        steps.push(\"user-info\");\n      }\n\n      return steps;\n\n    case \"edit-persona\":\n      return [\"user-info\"];\n\n    case \"add-llm\":\n      return [\"llm-setup\"];\n\n    case \"closed\":\n      return [];\n  }\n}\n\nexport default function BuildOnboardingModal({\n  mode,\n  llmProviders,\n  initialValues,\n  isAdmin,\n  hasUserInfo,\n  allProvidersConfigured,\n  hasAnyProvider,\n  onComplete,\n  onLlmComplete,\n  onClose,\n}: BuildOnboardingModalProps) {\n  // Compute steps based on mode\n  const steps = useMemo(\n    () => getStepsForMode(mode, isAdmin, allProvidersConfigured, hasUserInfo),\n    [mode, isAdmin, allProvidersConfigured, hasUserInfo]\n  );\n\n  // Determine initial step based on mode\n  const initialStep = useMemo((): OnboardingStep => {\n    if (mode.type === \"add-llm\") return \"llm-setup\";\n    return steps[0] || \"user-info\";\n  }, [mode.type, steps]);\n\n  // Navigation state\n  const [currentStep, setCurrentStep] = useState<OnboardingStep>(initialStep);\n\n  // Reset step when mode changes\n  useEffect(() => {\n    if (mode.type !== \"closed\") {\n      setCurrentStep(initialStep);\n    }\n  }, [mode.type, initialStep]);\n\n  // User info state - pre-fill from initialValues\n  const [firstName, setFirstName] = useState(initialValues.firstName);\n  const [lastName, setLastName] = useState(initialValues.lastName);\n  const [workArea, setWorkArea] = useState(initialValues.workArea);\n  const [level, setLevel] = useState(initialValues.level);\n\n  // Update form values when initialValues changes\n  useEffect(() => {\n    setFirstName(initialValues.firstName);\n    setLastName(initialValues.lastName);\n    setWorkArea(initialValues.workArea);\n    setLevel(initialValues.level);\n  }, [initialValues]);\n\n  // Determine initial provider for add-llm mode\n  const initialProvider = mode.type === \"add-llm\" ? mode.provider : undefined;\n\n  // LLM setup state\n  const [selectedProvider, setSelectedProvider] = useState<ProviderKey>(\n    (initialProvider as ProviderKey) || \"anthropic\"\n  );\n  const [selectedModel, setSelectedModel] = useState<string>(\n    PROVIDERS.find((p) => p.key === (initialProvider || \"anthropic\"))?.models[0]\n      ?.name || \"\"\n  );\n  const [apiKey, setApiKey] = useState(\"\");\n  const [connectionStatus, setConnectionStatus] = useState<\n    \"idle\" | \"testing\" | \"success\" | \"error\"\n  >(\"idle\");\n  const [errorMessage, setErrorMessage] = useState(\"\");\n\n  // Reset LLM state when mode changes to add-llm with a specific provider\n  useEffect(() => {\n    if (mode.type === \"add-llm\" && mode.provider) {\n      const providerConfig = PROVIDERS.find(\n        (p) => p.key === (mode.provider as ProviderKey)\n      );\n      if (providerConfig) {\n        setSelectedProvider(providerConfig.key);\n        setSelectedModel(providerConfig.models[0]?.name || \"\");\n        setApiKey(\"\");\n        setConnectionStatus(\"idle\");\n        setErrorMessage(\"\");\n      }\n    }\n  }, [mode]);\n\n  // Submission state\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  const requiresLevel =\n    workArea !== undefined && WORK_AREAS_REQUIRING_LEVEL.includes(workArea);\n  const isUserInfoValid = workArea && (!requiresLevel || level);\n\n  const currentProviderConfig = PROVIDERS.find(\n    (p) => p.key === selectedProvider\n  )!;\n  const isLlmValid = apiKey.trim() && selectedModel;\n\n  // Calculate step navigation\n  const currentStepIndex = steps.indexOf(currentStep);\n  const totalSteps = steps.length;\n\n  const handleNext = () => {\n    setErrorMessage(\"\");\n    const nextIndex = currentStepIndex + 1;\n    if (nextIndex < steps.length) {\n      setCurrentStep(steps[nextIndex]!);\n    }\n  };\n\n  const handleBack = () => {\n    setErrorMessage(\"\");\n    const prevIndex = currentStepIndex - 1;\n    if (prevIndex >= 0) {\n      setCurrentStep(steps[prevIndex]!);\n    }\n  };\n\n  const handleConnect = async () => {\n    if (!apiKey.trim()) return;\n\n    setConnectionStatus(\"testing\");\n    setErrorMessage(\"\");\n\n    const baseValues = buildInitialValues();\n    const providerName = `build-mode-${currentProviderConfig.providerName}`;\n    const payload = {\n      ...baseValues,\n      name: providerName,\n      provider: currentProviderConfig.providerName,\n      api_key: apiKey,\n      default_model_name: selectedModel,\n      model_configurations: currentProviderConfig.models.map((m) => ({\n        name: m.name,\n        is_visible: true,\n        max_input_tokens: null,\n        supports_image_input: true,\n      })),\n    };\n\n    const testResult = await testApiKeyHelper(\n      currentProviderConfig.providerName,\n      payload\n    );\n\n    if (!testResult.ok) {\n      setErrorMessage(\n        \"There was an issue with this provider and model, please try a different one.\"\n      );\n      setConnectionStatus(\"error\");\n      return;\n    }\n\n    try {\n      const response = await fetch(\n        `${LLM_PROVIDERS_ADMIN_URL}?is_creation=true`,\n        {\n          method: \"PUT\",\n          headers: { \"Content-Type\": \"application/json\" },\n          body: JSON.stringify(payload),\n        }\n      );\n\n      if (!response.ok) {\n        setErrorMessage(\n          \"There was an issue creating the provider. Please try again.\"\n        );\n        setConnectionStatus(\"error\");\n        return;\n      }\n\n      if (!llmProviders || llmProviders.length === 0) {\n        const newProvider = await response.json();\n        if (newProvider?.id) {\n          await fetch(`${LLM_PROVIDERS_ADMIN_URL}/${newProvider.id}/default`, {\n            method: \"POST\",\n          });\n        }\n      }\n\n      setBuildLlmSelection({\n        providerName: providerName,\n        provider: currentProviderConfig.providerName,\n        modelName: selectedModel,\n      });\n\n      track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {\n        provider: currentProviderConfig.providerName,\n        is_creation: true,\n        source: LLMProviderConfiguredSource.CRAFT_ONBOARDING,\n      });\n\n      setConnectionStatus(\"success\");\n    } catch (error) {\n      console.error(\"Error connecting LLM provider:\", error);\n      setErrorMessage(\n        \"There was an issue connecting the provider. Please try again.\"\n      );\n      setConnectionStatus(\"error\");\n    }\n  };\n\n  const handleSubmit = async () => {\n    // For add-llm mode, just close after successful connection\n    if (mode.type === \"add-llm\") {\n      if (connectionStatus === \"success\") {\n        await onLlmComplete();\n        onClose();\n      }\n      return;\n    }\n\n    if (!isUserInfoValid) return;\n    // If LLM setup was part of the flow and user has no providers (can't skip), require completion\n    if (\n      steps.includes(\"llm-setup\") &&\n      !hasAnyProvider &&\n      connectionStatus !== \"success\"\n    )\n      return;\n\n    setIsSubmitting(true);\n\n    try {\n      // Refresh LLM providers if LLM was set up\n      if (steps.includes(\"llm-setup\") && connectionStatus === \"success\") {\n        await onLlmComplete();\n      }\n\n      // Auto-select best available LLM if user didn't go through LLM setup\n      // (e.g., non-admin users or when all providers already configured)\n      autoSelectBestLlm(llmProviders);\n\n      // Validate workArea is provided before submission\n      if (!workArea) {\n        setErrorMessage(\"Please select a work area.\");\n        setIsSubmitting(false);\n        return;\n      }\n\n      const requiresLevel = WORK_AREAS_REQUIRING_LEVEL.includes(workArea);\n\n      // Validate level if required\n      if (requiresLevel && !level) {\n        setErrorMessage(\"Please select a level.\");\n        setIsSubmitting(false);\n        return;\n      }\n\n      await onComplete({\n        firstName: firstName.trim(),\n        lastName: lastName.trim() || undefined,\n        workArea,\n        level: level || undefined,\n      });\n\n      track(AnalyticsEvent.COMPLETED_CRAFT_ONBOARDING);\n      onClose();\n    } catch (error) {\n      console.error(\"Error completing onboarding:\", error);\n      setErrorMessage(\n        \"There was an issue completing onboarding. Please try again.\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  if (mode.type === \"closed\") return null;\n\n  const canProceedUserInfo = isUserInfoValid;\n  const isConnecting = connectionStatus === \"testing\";\n  const canTestConnection = isLlmValid && !isConnecting;\n  const isLastStep = currentStepIndex === steps.length - 1;\n  const isFirstStep = currentStepIndex === 0;\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      {/* Backdrop */}\n      <div className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\" />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        {/* Close button for add-llm mode */}\n        {mode.type === \"add-llm\" && (\n          <button\n            type=\"button\"\n            onClick={onClose}\n            className=\"absolute top-4 right-4 z-10 p-1 rounded-08 text-text-03 hover:text-text-05 hover:bg-background-tint-02 transition-colors\"\n          >\n            <SvgX className=\"w-5 h-5\" />\n          </button>\n        )}\n        <div className=\"p-6 flex flex-col gap-6 min-h-[600px]\">\n          {/* User Info Step */}\n          {currentStep === \"user-info\" && (\n            <OnboardingUserInfo\n              firstName={firstName}\n              lastName={lastName}\n              workArea={workArea}\n              level={level}\n              onFirstNameChange={setFirstName}\n              onLastNameChange={setLastName}\n              onWorkAreaChange={setWorkArea}\n              onLevelChange={setLevel}\n            />\n          )}\n\n          {/* LLM Setup Step */}\n          {currentStep === \"llm-setup\" && (\n            <OnboardingLlmSetup\n              selectedProvider={selectedProvider}\n              selectedModel={selectedModel}\n              apiKey={apiKey}\n              connectionStatus={connectionStatus}\n              errorMessage={errorMessage}\n              llmProviders={llmProviders}\n              onProviderChange={setSelectedProvider}\n              onModelChange={setSelectedModel}\n              onApiKeyChange={setApiKey}\n              onConnectionStatusChange={setConnectionStatus}\n              onErrorMessageChange={setErrorMessage}\n            />\n          )}\n\n          {/* Page 1 - What is Onyx Craft? */}\n          {currentStep === \"page1\" && (\n            <OnboardingInfoPages\n              step=\"page1\"\n              workArea={workArea}\n              level={level}\n            />\n          )}\n\n          {/* Navigation buttons */}\n          <div className=\"relative flex justify-between items-center pt-2\">\n            {/* Back button */}\n            <div>\n              {!isFirstStep && (\n                <button\n                  type=\"button\"\n                  onClick={handleBack}\n                  className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors\"\n                >\n                  <SvgArrowLeft className=\"w-4 h-4\" />\n                  <Text mainUiAction>Back</Text>\n                </button>\n              )}\n            </div>\n\n            {/* Step indicator */}\n            {totalSteps > 1 && (\n              <div className=\"absolute left-1/2 -translate-x-1/2 flex items-center justify-center gap-2\">\n                {Array.from({ length: totalSteps }).map((_, i) => (\n                  <div\n                    key={i}\n                    className={cn(\n                      \"w-2 h-2 rounded-full transition-colors\",\n                      i === currentStepIndex\n                        ? \"bg-text-05\"\n                        : i < currentStepIndex\n                          ? \"bg-text-03\"\n                          : \"bg-border-01\"\n                    )}\n                  />\n                ))}\n              </div>\n            )}\n\n            {/* Action buttons */}\n            {currentStep === \"user-info\" && (\n              <button\n                type=\"button\"\n                onClick={() => {\n                  track(AnalyticsEvent.COMPLETED_CRAFT_USER_INFO, {\n                    first_name: firstName.trim(),\n                    last_name: lastName.trim() || undefined,\n                    work_area: workArea,\n                    level: level,\n                  });\n                  if (isLastStep) {\n                    handleSubmit();\n                  } else {\n                    handleNext();\n                  }\n                }}\n                disabled={!canProceedUserInfo || isSubmitting}\n                className={cn(\n                  \"flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors\",\n                  canProceedUserInfo && !isSubmitting\n                    ? \"bg-black dark:bg-white text-white dark:text-black hover:opacity-90\"\n                    : \"bg-background-neutral-01 text-text-02 cursor-not-allowed\"\n                )}\n              >\n                <Text\n                  mainUiAction\n                  className={cn(\n                    canProceedUserInfo && !isSubmitting\n                      ? \"text-white dark:text-black\"\n                      : \"text-text-02\"\n                  )}\n                >\n                  {isLastStep\n                    ? isSubmitting\n                      ? \"Saving...\"\n                      : \"Get Started!\"\n                    : \"Continue\"}\n                </Text>\n                {!isLastStep && (\n                  <SvgArrowRight\n                    className={cn(\n                      \"w-4 h-4\",\n                      canProceedUserInfo && !isSubmitting\n                        ? \"text-white dark:text-black\"\n                        : \"text-text-02\"\n                    )}\n                  />\n                )}\n              </button>\n            )}\n\n            {currentStep === \"page1\" && (\n              <button\n                type=\"button\"\n                onClick={handleNext}\n                className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors bg-black dark:bg-white text-white dark:text-black hover:opacity-90\"\n              >\n                <Text mainUiAction className=\"text-white dark:text-black\">\n                  Continue\n                </Text>\n                <SvgArrowRight className=\"w-4 h-4 text-white dark:text-black\" />\n              </button>\n            )}\n\n            {currentStep === \"llm-setup\" && connectionStatus !== \"success\" && (\n              <div className=\"flex items-center gap-2\">\n                {/* Skip button - only shown if user has at least one provider */}\n                {hasAnyProvider && !isLastStep && (\n                  <button\n                    type=\"button\"\n                    onClick={handleNext}\n                    disabled={isConnecting}\n                    className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors\"\n                  >\n                    <Text mainUiAction>Skip</Text>\n                    <SvgArrowRight className=\"w-4 h-4\" />\n                  </button>\n                )}\n                {/* Connect button */}\n                <button\n                  type=\"button\"\n                  onClick={handleConnect}\n                  disabled={!canTestConnection || isConnecting}\n                  className={cn(\n                    \"flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors\",\n                    canTestConnection && !isConnecting\n                      ? \"bg-black dark:bg-white text-white dark:text-black hover:opacity-90\"\n                      : \"bg-background-neutral-01 text-text-02 cursor-not-allowed\"\n                  )}\n                >\n                  <Text\n                    mainUiAction\n                    className={cn(\n                      canTestConnection && !isConnecting\n                        ? \"text-white dark:text-black\"\n                        : \"text-text-02\"\n                    )}\n                  >\n                    {isConnecting ? \"Connecting...\" : \"Connect\"}\n                  </Text>\n                </button>\n              </div>\n            )}\n\n            {currentStep === \"llm-setup\" && connectionStatus === \"success\" && (\n              <button\n                type=\"button\"\n                onClick={isLastStep ? handleSubmit : handleNext}\n                className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 bg-black dark:bg-white text-white dark:text-black hover:opacity-90 transition-colors\"\n              >\n                <Text mainUiAction className=\"text-white dark:text-black\">\n                  {isLastStep ? \"Done\" : \"Continue\"}\n                </Text>\n                {!isLastStep && (\n                  <SvgArrowRight className=\"w-4 h-4 text-white dark:text-black\" />\n                )}\n              </button>\n            )}\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/NoLlmProvidersModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgLock, SvgArrowRight } from \"@opal/icons\";\nimport { logout } from \"@/lib/user\";\nimport { cn } from \"@/lib/utils\";\n\ninterface NoLlmProvidersModalProps {\n  open: boolean;\n  onClose: () => void;\n}\n\n/**\n * Modal shown to non-admin users when no LLM providers are configured.\n * Explains that an admin needs to configure providers before they can use Craft.\n */\nexport default function NoLlmProvidersModal({\n  open,\n  onClose,\n}: NoLlmProvidersModalProps) {\n  const router = useRouter();\n  const [isLoading, setIsLoading] = useState(false);\n\n  const handleCreateNewAccount = async () => {\n    setIsLoading(true);\n    try {\n      await logout();\n      router.push(\"/auth/signup\");\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  if (!open) return null;\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      {/* Backdrop */}\n      <div className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\" />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6 min-h-[400px]\">\n          {/* Content */}\n          <div className=\"flex-1 flex flex-col items-center justify-center gap-6\">\n            {/* Icon */}\n            <div className=\"w-16 h-16 rounded-full bg-background-tint-02 flex items-center justify-center\">\n              <SvgLock className=\"w-8 h-8 text-text-03\" />\n            </div>\n\n            {/* Header */}\n            <div className=\"flex flex-col items-center gap-2 text-center\">\n              <Text headingH2 text05>\n                LLM Provider Required\n              </Text>\n              <Text mainUiBody text03 className=\"max-w-sm\">\n                Onyx Craft requires an LLM provider to be configured, but only\n                admins can set this up.\n                <br />\n                <br />\n                Please ask your admin to configure an LLM provider, or create a\n                new Onyx account to become an admin yourself!\n              </Text>\n            </div>\n          </div>\n\n          {/* Footer buttons */}\n          <div className=\"flex justify-center gap-3 pt-2\">\n            <button\n              type=\"button\"\n              onClick={onClose}\n              className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors\"\n            >\n              <Text mainUiAction>Go Back</Text>\n            </button>\n            <button\n              type=\"button\"\n              onClick={handleCreateNewAccount}\n              disabled={isLoading}\n              className={cn(\n                \"flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors\",\n                !isLoading\n                  ? \"bg-black dark:bg-white text-white dark:text-black hover:opacity-90\"\n                  : \"bg-background-neutral-01 text-text-02 cursor-not-allowed\"\n              )}\n            >\n              <Text\n                mainUiAction\n                className={cn(\n                  !isLoading ? \"text-white dark:text-black\" : \"text-text-02\"\n                )}\n              >\n                {isLoading ? \"Signing out...\" : \"Create a new account\"}\n              </Text>\n              {!isLoading && (\n                <SvgArrowRight className=\"w-4 h-4 text-white dark:text-black\" />\n              )}\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/NotAllowedModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgLock, SvgArrowRight } from \"@opal/icons\";\nimport { logout } from \"@/lib/user\";\nimport { cn } from \"@/lib/utils\";\n\ninterface NotAllowedModalProps {\n  open: boolean;\n  onClose: () => void;\n}\n\nexport default function NotAllowedModal({\n  open,\n  onClose,\n}: NotAllowedModalProps) {\n  const router = useRouter();\n  const [isLoading, setIsLoading] = useState(false);\n\n  const handleCreateNewAccount = async () => {\n    setIsLoading(true);\n    try {\n      await logout();\n      router.push(\"/auth/signup\");\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  if (!open) return null;\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      {/* Backdrop */}\n      <div className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\" />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6 min-h-[400px]\">\n          {/* Content */}\n          <div className=\"flex-1 flex flex-col items-center justify-center gap-6\">\n            {/* Icon */}\n            <div className=\"w-16 h-16 rounded-full bg-background-tint-02 flex items-center justify-center\">\n              <SvgLock className=\"w-8 h-8 text-text-03\" />\n            </div>\n\n            {/* Header */}\n            <div className=\"flex flex-col items-center gap-2 text-center\">\n              <Text headingH2 text05>\n                Custom Crafting Restricted\n              </Text>\n              <Text mainUiBody text03 className=\"max-w-sm\">\n                Unfortunately, connecting your own data to Craft requires admin\n                permissions.\n                <br />\n                <br />\n                Luckily, you can create a new Onyx account to become an admin\n                and craft with your own data!\n              </Text>\n            </div>\n          </div>\n\n          {/* Footer buttons */}\n          <div className=\"flex justify-center gap-3 pt-2\">\n            <button\n              type=\"button\"\n              onClick={onClose}\n              className=\"flex items-center gap-1.5 px-4 py-2 rounded-12 border border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-02 transition-colors\"\n            >\n              <Text mainUiAction>Go Back</Text>\n            </button>\n            <button\n              type=\"button\"\n              onClick={handleCreateNewAccount}\n              disabled={isLoading}\n              className={cn(\n                \"flex items-center gap-1.5 px-4 py-2 rounded-12 transition-colors\",\n                !isLoading\n                  ? \"bg-black dark:bg-white text-white dark:text-black hover:opacity-90\"\n                  : \"bg-background-neutral-01 text-text-02 cursor-not-allowed\"\n              )}\n            >\n              <Text\n                mainUiAction\n                className={cn(\n                  !isLoading ? \"text-white dark:text-black\" : \"text-text-02\"\n                )}\n              >\n                {isLoading ? \"Signing out...\" : \"Create a new account\"}\n              </Text>\n              {!isLoading && (\n                <SvgArrowRight className=\"w-4 h-4 text-white dark:text-black\" />\n              )}\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/OnboardingInfoPages.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport {\n  WorkArea,\n  Level,\n  getPersonaInfo,\n  getPositionText,\n  DEMO_COMPANY_NAME,\n} from \"@/app/craft/onboarding/constants\";\nimport {\n  GoogleDriveIcon,\n  GithubIcon,\n  HubSpotIcon,\n  LinearIcon,\n  FirefliesIcon,\n  GmailIcon,\n  ColorSlackIcon,\n} from \"@/components/icons/icons\";\n\ninterface OnboardingInfoPagesProps {\n  step: \"page1\" | \"page2\";\n  workArea: WorkArea | undefined;\n  level: Level | undefined;\n}\n\nexport default function OnboardingInfoPages({\n  step,\n  workArea,\n  level,\n}: OnboardingInfoPagesProps) {\n  // Get persona info from mapping (only if both are valid enum values)\n  const personaInfo =\n    workArea && level ? getPersonaInfo(workArea, level) : undefined;\n\n  // Helper function to determine article (a/an) based on first letter\n  const getArticle = (word: string | undefined): string => {\n    if (!word || word.length === 0) return \"a\";\n    const firstLetter = word.toLowerCase()[0];\n    if (!firstLetter) return \"a\";\n    const vowels = [\"a\", \"e\", \"i\", \"o\", \"u\"];\n    return vowels.includes(firstLetter) ? \"an\" : \"a\";\n  };\n\n  // Get position text using shared helper (only if workArea is valid enum)\n  const positionText = workArea ? getPositionText(workArea, level) : \"Not set\";\n\n  // Determine article based on position text\n  const article = getArticle(positionText);\n\n  if (step === \"page1\") {\n    return (\n      <div className=\"flex-1 flex flex-col gap-6 items-center justify-center\">\n        <Text headingH2 text05>\n          What is Onyx Craft?\n        </Text>\n        <img\n          src=\"/craft_demo_image_1.png\"\n          alt=\"Onyx Craft\"\n          className=\"max-w-full h-auto rounded-12\"\n        />\n        <Text mainContentBody text04 className=\"text-center\">\n          Beautiful dashboards, slides, and reports.\n          <br />\n          Built by AI agents that know your world. Privately and securely.\n        </Text>\n      </div>\n    );\n  }\n\n  // Page 2\n  return (\n    <div className=\"flex-1 flex flex-col gap-6 items-center justify-center\">\n      <Text headingH2 text05>\n        Let's get started!\n      </Text>\n      <img\n        src=\"/craft_demo_image_2.png\"\n        alt=\"Onyx Craft\"\n        className=\"max-w-full h-auto rounded-12\"\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/OnboardingLlmSetup.tsx",
    "content": "\"use client\";\n\nimport { SvgCheckCircle } from \"@opal/icons\";\nimport { cn } from \"@/lib/utils\";\nimport { Disabled } from \"@opal/core\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { LLMProviderName, LLMProviderDescriptor } from \"@/interfaces/llm\";\n\n// Provider configurations\nexport type ProviderKey = \"anthropic\" | \"openai\" | \"openrouter\";\n\ninterface ModelOption {\n  name: string;\n  label: string;\n  recommended?: boolean;\n}\n\nexport interface ProviderConfig {\n  key: ProviderKey;\n  label: string;\n  providerName: LLMProviderName;\n  recommended?: boolean;\n  models: ModelOption[];\n  apiKeyPlaceholder: string;\n  apiKeyUrl: string;\n  apiKeyLabel: string;\n}\n\nexport const PROVIDERS: ProviderConfig[] = [\n  {\n    key: \"anthropic\",\n    label: \"Anthropic\",\n    providerName: LLMProviderName.ANTHROPIC,\n    recommended: true,\n    models: [\n      { name: \"claude-opus-4-6\", label: \"Claude Opus 4.6\", recommended: true },\n      { name: \"claude-sonnet-4-6\", label: \"Claude Sonnet 4.6\" },\n    ],\n    apiKeyPlaceholder: \"sk-ant-...\",\n    apiKeyUrl: \"https://console.anthropic.com/dashboard\",\n    apiKeyLabel: \"Anthropic Console\",\n  },\n  {\n    key: \"openai\",\n    label: \"OpenAI\",\n    providerName: LLMProviderName.OPENAI,\n    models: [\n      { name: \"gpt-5.2\", label: \"GPT-5.2\", recommended: true },\n      { name: \"gpt-5.1\", label: \"GPT-5.1\" },\n    ],\n    apiKeyPlaceholder: \"sk-...\",\n    apiKeyUrl: \"https://platform.openai.com/api-keys\",\n    apiKeyLabel: \"OpenAI Dashboard\",\n  },\n  {\n    key: \"openrouter\",\n    label: \"OpenRouter\",\n    providerName: LLMProviderName.OPENROUTER,\n    models: [\n      {\n        name: \"moonshotai/kimi-k2-thinking\",\n        label: \"Kimi K2 Thinking\",\n        recommended: true,\n      },\n      { name: \"google/gemini-3-pro-preview\", label: \"Gemini 3 Pro\" },\n      { name: \"qwen/qwen3-235b-a22b-thinking-2507\", label: \"Qwen3 235B\" },\n    ],\n    apiKeyPlaceholder: \"sk-or-...\",\n    apiKeyUrl: \"https://openrouter.ai/keys\",\n    apiKeyLabel: \"OpenRouter Dashboard\",\n  },\n];\n\ninterface SelectableButtonProps {\n  selected: boolean;\n  onClick: () => void;\n  children: React.ReactNode;\n  subtext?: string;\n  disabled?: boolean;\n  tooltip?: string;\n}\n\nfunction SelectableButton({\n  selected,\n  onClick,\n  children,\n  subtext,\n  disabled,\n  tooltip,\n}: SelectableButtonProps) {\n  const button = (\n    <div className=\"flex flex-col items-center gap-1\">\n      <Disabled disabled={disabled} allowClick>\n        <button\n          type=\"button\"\n          onClick={onClick}\n          disabled={disabled}\n          className={cn(\n            \"w-full px-6 py-3 rounded-12 border transition-colors\",\n            selected\n              ? \"border-action-link-05 bg-action-link-01 text-action-text-link-05\"\n              : \"border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01\"\n          )}\n        >\n          <Text mainUiAction>{children}</Text>\n        </button>\n      </Disabled>\n      {subtext && (\n        <Text figureSmallLabel text02>\n          {subtext}\n        </Text>\n      )}\n    </div>\n  );\n\n  if (tooltip) {\n    return <SimpleTooltip tooltip={tooltip}>{button}</SimpleTooltip>;\n  }\n\n  return button;\n}\n\ninterface ModelSelectButtonProps {\n  selected: boolean;\n  onClick: () => void;\n  label: string;\n  recommended?: boolean;\n  disabled?: boolean;\n}\n\nfunction ModelSelectButton({\n  selected,\n  onClick,\n  label,\n  recommended,\n  disabled,\n}: ModelSelectButtonProps) {\n  return (\n    <div className=\"flex flex-col items-center gap-1 w-full\">\n      <Disabled disabled={disabled} allowClick>\n        <button\n          type=\"button\"\n          onClick={onClick}\n          disabled={disabled}\n          className={cn(\n            \"w-full px-4 py-2.5 rounded-12 border transition-colors\",\n            selected\n              ? \"border-action-link-05 bg-action-link-01 text-action-text-link-05\"\n              : \"border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01\"\n          )}\n        >\n          <Text mainUiAction>{label}</Text>\n        </button>\n      </Disabled>\n      {recommended && (\n        <Text figureSmallLabel text02>\n          Recommended\n        </Text>\n      )}\n    </div>\n  );\n}\n\ninterface OnboardingLlmSetupProps {\n  selectedProvider: ProviderKey;\n  selectedModel: string;\n  apiKey: string;\n  connectionStatus: \"idle\" | \"testing\" | \"success\" | \"error\";\n  errorMessage: string;\n  llmProviders?: LLMProviderDescriptor[];\n  onProviderChange: (provider: ProviderKey) => void;\n  onModelChange: (model: string) => void;\n  onApiKeyChange: (apiKey: string) => void;\n  onConnectionStatusChange: (\n    status: \"idle\" | \"testing\" | \"success\" | \"error\"\n  ) => void;\n  onErrorMessageChange: (message: string) => void;\n}\n\nexport default function OnboardingLlmSetup({\n  selectedProvider,\n  selectedModel,\n  apiKey,\n  connectionStatus,\n  errorMessage,\n  llmProviders,\n  onProviderChange,\n  onModelChange,\n  onApiKeyChange,\n  onConnectionStatusChange,\n  onErrorMessageChange,\n}: OnboardingLlmSetupProps) {\n  const currentProviderConfig = PROVIDERS.find(\n    (p) => p.key === selectedProvider\n  )!;\n\n  const isProviderConfigured = (providerName: string) => {\n    return llmProviders?.some((p) => p.provider === providerName) ?? false;\n  };\n\n  const handleProviderChange = (provider: ProviderKey) => {\n    const providerConfig = PROVIDERS.find((p) => p.key === provider)!;\n    // Don't allow selecting already-configured providers\n    if (isProviderConfigured(providerConfig.providerName)) return;\n\n    onProviderChange(provider);\n    onModelChange(providerConfig.models[0]?.name || \"\");\n    onConnectionStatusChange(\"idle\");\n    onErrorMessageChange(\"\");\n  };\n\n  const handleModelChange = (model: string) => {\n    onModelChange(model);\n    onConnectionStatusChange(\"idle\");\n    onErrorMessageChange(\"\");\n  };\n\n  const handleApiKeyChange = (value: string) => {\n    onApiKeyChange(value);\n    onConnectionStatusChange(\"idle\");\n    onErrorMessageChange(\"\");\n  };\n\n  return (\n    <div className=\"flex-1 flex flex-col gap-6 justify-between\">\n      {/* Header */}\n      <div className=\"flex items-center justify-center\">\n        <Text headingH2 text05>\n          Connect your LLM\n        </Text>\n      </div>\n\n      {/* Provider selection */}\n      <div className=\"flex flex-col gap-3 items-center\">\n        <Text mainUiBody text04>\n          Provider\n        </Text>\n        <div className=\"flex justify-center gap-3 w-full max-w-md\">\n          {PROVIDERS.map((provider) => {\n            const isConfigured = isProviderConfigured(provider.providerName);\n            return (\n              <div key={provider.key} className=\"flex-1\">\n                <SelectableButton\n                  selected={selectedProvider === provider.key}\n                  onClick={() => handleProviderChange(provider.key)}\n                  subtext={\n                    isConfigured\n                      ? \"Already configured\"\n                      : provider.recommended\n                        ? \"Recommended\"\n                        : undefined\n                  }\n                  disabled={connectionStatus === \"testing\" || isConfigured}\n                  tooltip={\n                    isConfigured\n                      ? \"This provider is already configured\"\n                      : undefined\n                  }\n                >\n                  {provider.label}\n                </SelectableButton>\n              </div>\n            );\n          })}\n        </div>\n      </div>\n\n      {/* Model selection */}\n      <div className=\"flex flex-col gap-3 items-center\">\n        <Text mainUiBody text04>\n          Default Model\n        </Text>\n        <div className=\"flex justify-center gap-3 flex-wrap w-full max-w-md\">\n          {currentProviderConfig.models.map((model) => (\n            <div key={model.name} className=\"flex-1 min-w-0\">\n              <ModelSelectButton\n                selected={selectedModel === model.name}\n                onClick={() => handleModelChange(model.name)}\n                label={model.label}\n                recommended={model.recommended}\n                disabled={connectionStatus === \"testing\"}\n              />\n            </div>\n          ))}\n        </div>\n      </div>\n\n      {/* API Key input */}\n      <div className=\"flex flex-col gap-3 items-center\">\n        <Text mainUiBody text04>\n          API Key\n        </Text>\n        <div className=\"w-full max-w-md\">\n          <Disabled disabled={connectionStatus === \"testing\"} allowClick>\n            <input\n              type=\"password\"\n              value={apiKey}\n              onChange={(e) => handleApiKeyChange(e.target.value)}\n              placeholder={currentProviderConfig.apiKeyPlaceholder}\n              disabled={connectionStatus === \"testing\"}\n              className=\"w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none\"\n            />\n          </Disabled>\n          {/* Message area */}\n          <div className=\"min-h-[2rem] flex justify-center pt-4\">\n            {connectionStatus === \"error\" && (\n              <Text secondaryBody className=\"text-red-500\">\n                {errorMessage}\n              </Text>\n            )}\n            <div\n              className={cn(\n                \"flex items-center gap-2 px-3 py-2 rounded-08 bg-status-success-00 border border-status-success-02 w-fit\",\n                connectionStatus !== \"success\" && \"hidden\"\n              )}\n            >\n              <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05 shrink-0\" />\n              <Text secondaryBody className=\"text-status-success-05\">\n                Success!\n              </Text>\n            </div>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/components/OnboardingUserInfo.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport { Disabled } from \"@opal/core\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  WorkArea,\n  Level,\n  WORK_AREA_OPTIONS,\n  LEVEL_OPTIONS,\n  WORK_AREAS_REQUIRING_LEVEL,\n  PERSONA_MAPPING,\n  DEMO_COMPANY_NAME,\n  getPositionText,\n} from \"@/app/craft/onboarding/constants\";\n\ninterface SelectableButtonProps {\n  selected: boolean;\n  onClick: () => void;\n  children: React.ReactNode;\n  subtext?: string;\n  disabled?: boolean;\n}\n\nfunction SelectableButton({\n  selected,\n  onClick,\n  children,\n  subtext,\n  disabled,\n}: SelectableButtonProps) {\n  return (\n    <div className=\"flex flex-col items-center gap-1\">\n      <Disabled disabled={disabled} allowClick>\n        <button\n          type=\"button\"\n          onClick={onClick}\n          disabled={disabled}\n          className={cn(\n            \"w-full px-6 py-3 rounded-12 border transition-colors\",\n            selected\n              ? \"border-action-link-05 bg-action-link-01 text-action-text-link-05\"\n              : \"border-border-01 bg-background-tint-00 text-text-04 hover:bg-background-tint-01\"\n          )}\n        >\n          <Text mainUiAction>{children}</Text>\n        </button>\n      </Disabled>\n      {subtext && (\n        <Text figureSmallLabel text02>\n          {subtext}\n        </Text>\n      )}\n    </div>\n  );\n}\n\ninterface OnboardingUserInfoProps {\n  firstName: string;\n  lastName: string;\n  workArea: WorkArea | undefined;\n  level: Level | undefined;\n  onFirstNameChange: (value: string) => void;\n  onLastNameChange: (value: string) => void;\n  onWorkAreaChange: (value: WorkArea | undefined) => void;\n  onLevelChange: (value: Level | undefined) => void;\n}\n\nexport default function OnboardingUserInfo({\n  firstName: _firstName,\n  lastName: _lastName,\n  workArea,\n  level,\n  onFirstNameChange: _onFirstNameChange,\n  onLastNameChange: _onLastNameChange,\n  onWorkAreaChange,\n  onLevelChange,\n}: OnboardingUserInfoProps) {\n  const requiresLevel =\n    workArea !== undefined && WORK_AREAS_REQUIRING_LEVEL.includes(workArea);\n\n  // Get persona info for preview\n  const selectedLevel = level ?? Level.IC;\n  const personaInfo =\n    workArea !== undefined ? PERSONA_MAPPING[workArea]?.[selectedLevel] : null;\n  const positionText =\n    workArea !== undefined ? getPositionText(workArea, level) : null;\n\n  return (\n    <div className=\"flex-1 flex flex-col gap-6\">\n      {/* Header */}\n      <div className=\"flex flex-col items-center gap-3\">\n        <Text headingH2 text05>\n          Demo Data Configuration\n        </Text>\n      </div>\n\n      <div className=\"flex-1 flex flex-col gap-8 justify-center\">\n        {/* Name inputs - commented out for now, can be re-enabled later\n        <div className=\"flex justify-center\">\n          <div className=\"grid grid-cols-2 gap-4 w-full max-w-md\">\n            <div className=\"flex flex-col gap-1.5\">\n              <Text secondaryBody text03>\n                First name\n              </Text>\n              <input\n                type=\"text\"\n                value={firstName}\n                onChange={(e) => onFirstNameChange(e.target.value)}\n                placeholder=\"Steven\"\n                className=\"w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none\"\n              />\n            </div>\n            <div className=\"flex flex-col gap-1.5\">\n              <Text secondaryBody text03>\n                Last name\n              </Text>\n              <input\n                type=\"text\"\n                value={lastName}\n                onChange={(e) => onLastNameChange(e.target.value)}\n                placeholder=\"Alexson\"\n                className=\"w-full px-3 py-2 rounded-08 input-normal text-text-04 placeholder:text-text-02 focus:outline-none\"\n              />\n            </div>\n          </div>\n        </div>\n        */}\n\n        <Text mainUiBody text04 className=\"text-center\">\n          While you wait for your data to sync, try out our simulated demo\n          dataset! <br />\n          The simulated data will adapt to your role and level choices below.\n        </Text>\n\n        {/* Work area */}\n        <div className=\"flex flex-col gap-3 items-center\">\n          <Text mainUiBody text04>\n            Select your role:\n          </Text>\n          <div className=\"grid grid-cols-3 gap-3 w-full\">\n            {WORK_AREA_OPTIONS.map((option) => (\n              <SelectableButton\n                key={option.value}\n                selected={workArea === option.value}\n                onClick={() => onWorkAreaChange(option.value)}\n              >\n                {option.label}\n              </SelectableButton>\n            ))}\n          </div>\n        </div>\n\n        {/* Level */}\n        <div className=\"flex flex-col gap-3 items-center\">\n          <Text mainUiBody text04>\n            Level{\" \"}\n            {requiresLevel && <span className=\"text-status-error-05\">*</span>}\n          </Text>\n          <div className=\"flex justify-center gap-3 w-full\">\n            <div className=\"grid grid-cols-2 gap-3 w-2/3\">\n              {LEVEL_OPTIONS.map((option) => (\n                <SelectableButton\n                  key={option.value}\n                  selected={level === option.value}\n                  onClick={() =>\n                    onLevelChange(\n                      level === option.value ? undefined : option.value\n                    )\n                  }\n                >\n                  {option.label}\n                </SelectableButton>\n              ))}\n            </div>\n          </div>\n        </div>\n\n        {/* Persona preview - always reserve space to prevent layout shift */}\n        <div className=\"flex justify-center min-h-[1.5rem]\">\n          {personaInfo && positionText && (\n            <Text mainContentBody text03 className=\"text-center\">\n              You will play the role of {positionText} named {personaInfo.name}{\" \"}\n              working at <br />\n              {DEMO_COMPANY_NAME}\n            </Text>\n          )}\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/constants.ts",
    "content": "// =============================================================================\n// LLM Selection Types and Utilities\n// =============================================================================\n\nexport interface BuildLlmSelection {\n  providerName: string; // e.g., \"build-mode-anthropic\" (LLMProviderDescriptor.name)\n  provider: string; // e.g., \"anthropic\"\n  modelName: string; // e.g., \"claude-opus-4-6\"\n}\n\n// Priority order for smart default LLM selection\nconst LLM_SELECTION_PRIORITY = [\n  { provider: \"anthropic\", modelName: \"claude-opus-4-6\" },\n  { provider: \"openai\", modelName: \"gpt-5.2\" },\n  { provider: \"openrouter\", modelName: \"minimax/minimax-m2.1\" },\n] as const;\n\n// Minimal provider interface for selection logic\ninterface MinimalLlmProvider {\n  name: string;\n  provider: string;\n  model_configurations: { name: string; is_visible: boolean }[];\n}\n\n/**\n * Get the best default LLM selection based on available providers.\n * Priority: Anthropic > OpenAI > OpenRouter > first available\n */\nexport function getDefaultLlmSelection(\n  llmProviders: MinimalLlmProvider[] | undefined\n): BuildLlmSelection | null {\n  if (!llmProviders || llmProviders.length === 0) return null;\n\n  // Try each priority provider in order\n  for (const { provider, modelName } of LLM_SELECTION_PRIORITY) {\n    const matchingProvider = llmProviders.find((p) => p.provider === provider);\n    if (matchingProvider) {\n      return {\n        providerName: matchingProvider.name,\n        provider: matchingProvider.provider,\n        modelName,\n      };\n    }\n  }\n\n  // Fallback: first available provider, use its first visible model\n  const firstProvider = llmProviders[0];\n  if (firstProvider) {\n    const firstModel = firstProvider.model_configurations.find(\n      (m) => m.is_visible\n    );\n    return {\n      providerName: firstProvider.name,\n      provider: firstProvider.provider,\n      modelName: firstModel?.name ?? \"\",\n    };\n  }\n\n  return null;\n}\n\n// Recommended models config (for UI display)\nexport const RECOMMENDED_BUILD_MODELS = {\n  preferred: {\n    provider: \"anthropic\",\n    modelName: \"claude-opus-4-6\",\n    displayName: \"Claude Opus 4.6\",\n  },\n  alternatives: [\n    { provider: \"anthropic\", modelName: \"claude-sonnet-4-6\" },\n    { provider: \"openai\", modelName: \"gpt-5.2\" },\n    { provider: \"openai\", modelName: \"gpt-5.1-codex\" },\n    { provider: \"openrouter\", modelName: \"minimax/minimax-m2.1\" },\n  ],\n} as const;\n\n// Cookie utilities\nconst BUILD_LLM_COOKIE_KEY = \"build_llm_selection\";\n\nexport function getBuildLlmSelection(): BuildLlmSelection | null {\n  if (typeof document === \"undefined\") return null;\n  const cookie = document.cookie\n    .split(\"; \")\n    .find((row) => row.startsWith(`${BUILD_LLM_COOKIE_KEY}=`));\n  if (!cookie) return null;\n  try {\n    const value = cookie.split(\"=\")[1];\n    if (!value) return null;\n    return JSON.parse(decodeURIComponent(value));\n  } catch {\n    return null;\n  }\n}\n\nexport function setBuildLlmSelection(selection: BuildLlmSelection): void {\n  if (typeof document === \"undefined\") return;\n  const value = encodeURIComponent(JSON.stringify(selection));\n  // Cookie expires in 1 year\n  const expires = new Date(\n    Date.now() + 365 * 24 * 60 * 60 * 1000\n  ).toUTCString();\n  document.cookie = `${BUILD_LLM_COOKIE_KEY}=${value}; path=/; expires=${expires}; SameSite=Lax`;\n}\n\nexport function clearBuildLlmSelection(): void {\n  if (typeof document === \"undefined\") return;\n  document.cookie = `${BUILD_LLM_COOKIE_KEY}=; path=/; expires=Thu, 01 Jan 1970 00:00:00 GMT`;\n}\n\nexport function isRecommendedModel(\n  provider: string,\n  modelName: string\n): boolean {\n  const { preferred, alternatives } = RECOMMENDED_BUILD_MODELS;\n  // Exact match for preferred model\n  if (preferred.provider === provider && modelName === preferred.modelName) {\n    return true;\n  }\n  // Exact match for alternatives\n  return alternatives.some(\n    (alt) => alt.provider === provider && modelName === alt.modelName\n  );\n}\n\n// Curated providers for Build mode (shared between BuildOnboardingModal and BuildLLMPopover)\nexport interface BuildModeModel {\n  name: string;\n  label: string;\n  recommended?: boolean;\n}\n\nexport interface BuildModeProvider {\n  key: string;\n  label: string;\n  providerName: string;\n  recommended?: boolean;\n  models: BuildModeModel[];\n  // API-related fields (optional, only needed for onboarding modal)\n  apiKeyPlaceholder?: string;\n  apiKeyUrl?: string;\n  apiKeyLabel?: string;\n}\n\nexport const BUILD_MODE_PROVIDERS: BuildModeProvider[] = [\n  {\n    key: \"anthropic\",\n    label: \"Anthropic\",\n    providerName: \"anthropic\",\n    recommended: true,\n    models: [\n      { name: \"claude-opus-4-6\", label: \"Claude Opus 4.6\", recommended: true },\n      { name: \"claude-sonnet-4-6\", label: \"Claude Sonnet 4.6\" },\n    ],\n    apiKeyPlaceholder: \"sk-ant-...\",\n    apiKeyUrl: \"https://console.anthropic.com/dashboard\",\n    apiKeyLabel: \"Anthropic Console\",\n  },\n  {\n    key: \"openai\",\n    label: \"OpenAI\",\n    providerName: \"openai\",\n    models: [\n      { name: \"gpt-5.2\", label: \"GPT-5.2\", recommended: true },\n      { name: \"gpt-5.1-codex\", label: \"GPT-5.1 Codex\" },\n    ],\n    apiKeyPlaceholder: \"sk-...\",\n    apiKeyUrl: \"https://platform.openai.com/api-keys\",\n    apiKeyLabel: \"OpenAI Dashboard\",\n  },\n  {\n    key: \"openrouter\",\n    label: \"OpenRouter\",\n    providerName: \"openrouter\",\n    models: [\n      {\n        name: \"minimax/minimax-m2.1\",\n        label: \"MiniMax M2.1\",\n        recommended: true,\n      },\n    ],\n    apiKeyPlaceholder: \"sk-or-...\",\n    apiKeyUrl: \"https://openrouter.ai/keys\",\n    apiKeyLabel: \"OpenRouter Dashboard\",\n  },\n];\n\n// =============================================================================\n// User Info/Persona Constants\n// =============================================================================\n\nexport interface PersonaInfo {\n  name: string;\n  email: string;\n}\n\n// Work area enum - derived from PERSONA_MAPPING keys\nexport enum WorkArea {\n  ENGINEERING = \"engineering\",\n  PRODUCT = \"product\",\n  EXECUTIVE = \"executive\",\n  SALES = \"sales\",\n  MARKETING = \"marketing\",\n  OTHER = \"other\",\n}\n\n// Level enum - derived from PERSONA_MAPPING structure\nexport enum Level {\n  IC = \"ic\",\n  MANAGER = \"manager\",\n}\n\n// Persona mapping: work_area -> level -> PersonaInfo\n// Matches backend/onyx/server/features/build/sandbox/util/persona_mapping.py\n// This is the source of truth for work areas and levels\nexport const PERSONA_MAPPING: Record<WorkArea, Record<Level, PersonaInfo>> = {\n  [WorkArea.ENGINEERING]: {\n    [Level.IC]: {\n      name: \"Jiwon Kang\",\n      email: \"jiwon_kang@netherite-extraction.onyx.app\",\n    },\n    [Level.MANAGER]: {\n      name: \"Javier Morales\",\n      email: \"javier_morales@netherite-extraction.onyx.app\",\n    },\n  },\n  [WorkArea.SALES]: {\n    [Level.IC]: {\n      name: \"Megan Foster\",\n      email: \"megan_foster@netherite-extraction.onyx.app\",\n    },\n    [Level.MANAGER]: {\n      name: \"Valeria Cruz\",\n      email: \"valeria_cruz@netherite-extraction.onyx.app\",\n    },\n  },\n  [WorkArea.PRODUCT]: {\n    [Level.IC]: {\n      name: \"Michael Anderson\",\n      email: \"michael_anderson@netherite-extraction.onyx.app\",\n    },\n    [Level.MANAGER]: {\n      name: \"David Liu\",\n      email: \"david_liu@netherite-extraction.onyx.app\",\n    },\n  },\n  [WorkArea.MARKETING]: {\n    [Level.IC]: {\n      name: \"Rahul Patel\",\n      email: \"rahul_patel@netherite-extraction.onyx.app\",\n    },\n    [Level.MANAGER]: {\n      name: \"Olivia Reed\",\n      email: \"olivia_reed@netherite-extraction.onyx.app\",\n    },\n  },\n  [WorkArea.EXECUTIVE]: {\n    [Level.IC]: {\n      name: \"Sarah Mitchell\",\n      email: \"sarah_mitchell@netherite-extraction.onyx.app\",\n    },\n    [Level.MANAGER]: {\n      name: \"Sarah Mitchell\",\n      email: \"sarah_mitchell@netherite-extraction.onyx.app\",\n    },\n  },\n  [WorkArea.OTHER]: {\n    [Level.MANAGER]: {\n      name: \"Ralf Schroeder\",\n      email: \"ralf_schroeder@netherite-extraction.onyx.app\",\n    },\n    [Level.IC]: {\n      name: \"John Carpenter\",\n      email: \"john_carpenter@netherite-extraction.onyx.app\",\n    },\n  },\n};\n\n// Helper to capitalize first letter\nconst capitalize = (str: string): string => {\n  return str.charAt(0).toUpperCase() + str.slice(1);\n};\n\n// Derive WORK_AREA_OPTIONS from WorkArea enum\nexport const WORK_AREA_OPTIONS = Object.values(WorkArea).map((value) => ({\n  value,\n  label: capitalize(value),\n}));\n\n// Derive LEVEL_OPTIONS from Level enum\nexport const LEVEL_OPTIONS = Object.values(Level).map((value) => ({\n  value,\n  label: value === Level.IC ? \"IC\" : capitalize(value),\n}));\n\n// Work areas where level selection is required\n// Executive has the same persona for both levels, so level is optional\nexport const WORK_AREAS_REQUIRING_LEVEL: WorkArea[] = [\n  WorkArea.ENGINEERING,\n  WorkArea.PRODUCT,\n  WorkArea.SALES,\n  WorkArea.MARKETING,\n  WorkArea.OTHER,\n];\n\n// Helper function to get persona info\nexport function getPersonaInfo(\n  workArea: WorkArea,\n  level: Level\n): PersonaInfo | undefined {\n  return PERSONA_MAPPING[workArea]?.[level];\n}\n\n// Company name for demo personas\nexport const DEMO_COMPANY_NAME = \"Netherite Extraction Inc.\";\n\n// Helper function to get position text from work area and level\n// Executive: \"Executive\" (no level), Other: \"employee\", Everything else: show level if available\nexport function getPositionText(\n  workArea: WorkArea,\n  level: Level | undefined\n): string {\n  const workAreaLabel =\n    WORK_AREA_OPTIONS.find((opt) => opt.value === workArea)?.label || workArea;\n\n  if (workArea === WorkArea.OTHER) {\n    return \"Employee\";\n  }\n\n  if (workArea === WorkArea.EXECUTIVE) {\n    return \"Executive\";\n  }\n\n  if (level) {\n    const levelLabel =\n      LEVEL_OPTIONS.find((opt) => opt.value === level)?.label || level;\n    return `${workAreaLabel} ${levelLabel}`;\n  }\n\n  return workAreaLabel;\n}\n\nexport const BUILD_USER_PERSONA_COOKIE_NAME = \"build_user_persona\";\n\n// Helper type for the consolidated cookie\nexport interface BuildUserPersona {\n  workArea: WorkArea;\n  level?: Level;\n}\n\n// Helper functions for getting/setting the consolidated cookie\nexport function getBuildUserPersona(): BuildUserPersona | null {\n  if (typeof window === \"undefined\") return null;\n\n  const cookieValue = document.cookie\n    .split(\"; \")\n    .find((row) => row.startsWith(`${BUILD_USER_PERSONA_COOKIE_NAME}=`))\n    ?.split(\"=\")[1];\n\n  if (!cookieValue) return null;\n\n  try {\n    const parsed = JSON.parse(decodeURIComponent(cookieValue));\n    // Validate and cast to enum types\n    if (\n      parsed.workArea &&\n      Object.values(WorkArea).includes(parsed.workArea as WorkArea)\n    ) {\n      return {\n        workArea: parsed.workArea as WorkArea,\n        level:\n          parsed.level && Object.values(Level).includes(parsed.level as Level)\n            ? (parsed.level as Level)\n            : undefined,\n      };\n    }\n    return null;\n  } catch {\n    return null;\n  }\n}\n\nexport function setBuildUserPersona(persona: BuildUserPersona): void {\n  const cookieValue = encodeURIComponent(JSON.stringify(persona));\n  const expires = new Date();\n  expires.setFullYear(expires.getFullYear() + 1);\n  document.cookie = `${BUILD_USER_PERSONA_COOKIE_NAME}=${cookieValue}; path=/; expires=${expires.toUTCString()}`;\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/hooks/useOnboardingModal.ts",
    "content": "\"use client\";\n\nimport { useCallback, useState, useMemo, useEffect } from \"react\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { LLMProviderName } from \"@/interfaces/llm\";\nimport {\n  OnboardingModalMode,\n  OnboardingModalController,\n  BuildUserInfo,\n} from \"@/app/craft/onboarding/types\";\nimport {\n  getBuildUserPersona,\n  setBuildUserPersona,\n} from \"@/app/craft/onboarding/constants\";\nimport { updateUserPersonalization } from \"@/lib/userSettings\";\nimport { useBuildSessionStore } from \"@/app/craft/hooks/useBuildSessionStore\";\n\n// Check if all 3 build mode providers are configured (anthropic, openai, openrouter)\nfunction checkAllProvidersConfigured(\n  llmProviders: import(\"@/interfaces/llm\").LLMProviderDescriptor[] | undefined\n): boolean {\n  if (!llmProviders || llmProviders.length === 0) {\n    return false;\n  }\n  const configuredProviders = new Set(llmProviders.map((p) => p.provider));\n  return (\n    configuredProviders.has(LLMProviderName.ANTHROPIC) &&\n    configuredProviders.has(LLMProviderName.OPENAI) &&\n    configuredProviders.has(LLMProviderName.OPENROUTER)\n  );\n}\n\n// Check if at least one provider is configured\nfunction checkHasAnyProvider(\n  llmProviders: import(\"@/interfaces/llm\").LLMProviderDescriptor[] | undefined\n): boolean {\n  return !!(llmProviders && llmProviders.length > 0);\n}\n\nexport function useOnboardingModal(): OnboardingModalController {\n  const { user, isAdmin, refreshUser } = useUser();\n  const {\n    llmProviders,\n    isLoading: isLoadingLlm,\n    refetch: refetchLlmProviders,\n  } = useLLMProviders();\n\n  // Get ensurePreProvisionedSession from the session store\n  const ensurePreProvisionedSession = useBuildSessionStore(\n    (state) => state.ensurePreProvisionedSession\n  );\n\n  // Modal mode state\n  const [mode, setMode] = useState<OnboardingModalMode>({ type: \"closed\" });\n  const [hasInitialized, setHasInitialized] = useState(false);\n\n  // Compute initial values for the form (read fresh on every render)\n  const existingPersona = getBuildUserPersona();\n  const existingName = user?.personalization?.name || \"\";\n  const spaceIndex = existingName.indexOf(\" \");\n  const initialFirstName =\n    spaceIndex > 0 ? existingName.slice(0, spaceIndex) : existingName;\n  const initialLastName =\n    spaceIndex > 0 ? existingName.slice(spaceIndex + 1) : \"\";\n\n  const initialValues = {\n    firstName: initialFirstName,\n    lastName: initialLastName,\n    workArea: existingPersona?.workArea,\n    level: existingPersona?.level,\n  };\n\n  // Check if user has completed initial onboarding (only role required, not name)\n  const hasUserInfo = useMemo(() => {\n    return !!getBuildUserPersona()?.workArea;\n  }, [user]);\n\n  // Check if all providers are configured (skip LLM step entirely if so)\n  const allProvidersConfigured = useMemo(\n    () => checkAllProvidersConfigured(llmProviders),\n    [llmProviders]\n  );\n\n  // Check if at least one provider is configured (allow skipping LLM step)\n  const hasAnyProvider = useMemo(\n    () => checkHasAnyProvider(llmProviders),\n    [llmProviders]\n  );\n\n  // Auto-open initial onboarding modal on first load\n  // Shows if: user info (role) missing OR (admin AND no providers configured)\n  useEffect(() => {\n    if (hasInitialized || isLoadingLlm || !user) return;\n\n    const needsUserInfo = !hasUserInfo;\n    const needsLlmSetup = isAdmin && !hasAnyProvider;\n\n    if (needsUserInfo || needsLlmSetup) {\n      setMode({ type: \"initial-onboarding\" });\n    }\n\n    setHasInitialized(true);\n  }, [\n    hasInitialized,\n    isLoadingLlm,\n    user,\n    hasUserInfo,\n    isAdmin,\n    hasAnyProvider,\n  ]);\n\n  // Complete user info callback\n  const completeUserInfo = useCallback(\n    async (info: BuildUserInfo) => {\n      // Save name via API (handle optional lastName)\n      const fullName = info.lastName\n        ? `${info.firstName} ${info.lastName}`.trim()\n        : info.firstName.trim();\n      await updateUserPersonalization({ name: fullName });\n\n      // Save persona to cookie\n      setBuildUserPersona({\n        workArea: info.workArea,\n        level: info.level,\n      });\n\n      // Refresh user to update state\n      await refreshUser();\n\n      // Trigger pre-provisioning now that onboarding is complete\n      // This ensures the sandbox starts provisioning immediately rather than\n      // waiting for the controller effect to detect the cookie change\n      ensurePreProvisionedSession();\n    },\n    [refreshUser, ensurePreProvisionedSession]\n  );\n\n  // Complete LLM setup callback\n  const completeLlmSetup = useCallback(async () => {\n    await refetchLlmProviders();\n  }, [refetchLlmProviders]);\n\n  // Actions\n  const openPersonaEditor = useCallback(() => {\n    setMode({ type: \"edit-persona\" });\n  }, []);\n\n  const openLlmSetup = useCallback((provider?: string) => {\n    setMode({ type: \"add-llm\", provider });\n  }, []);\n\n  const close = useCallback(() => {\n    setMode({ type: \"closed\" });\n  }, []);\n\n  const isOpen = mode.type !== \"closed\";\n\n  return {\n    mode,\n    isOpen,\n    openPersonaEditor,\n    openLlmSetup,\n    close,\n    llmProviders,\n    initialValues,\n    completeUserInfo,\n    completeLlmSetup,\n    refetchLlmProviders,\n    isAdmin,\n    hasUserInfo,\n    allProvidersConfigured,\n    hasAnyProvider,\n    isLoading: isLoadingLlm,\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/onboarding/types.ts",
    "content": "import { WorkArea, Level } from \"./constants\";\nimport type {\n  LLMProviderDescriptor,\n  LLMProviderResponse,\n} from \"@/interfaces/llm\";\n\nexport interface BuildUserInfo {\n  firstName: string;\n  lastName?: string;\n  workArea: WorkArea;\n  level?: Level;\n}\n\n// Legacy flow interface (kept for backwards compatibility during migration)\nexport interface BuildOnboardingFlow {\n  showNotAllowedModal: boolean;\n  showUserInfoModal: boolean;\n  showLlmModal: boolean;\n}\n\n// New mode-based modal types\nexport type OnboardingModalMode =\n  | { type: \"initial-onboarding\" } // Full flow: user-info → llm? → content\n  | { type: \"edit-persona\" } // Just user-info step\n  | { type: \"add-llm\"; provider?: string } // Just llm-setup step\n  | { type: \"closed\" }; // Modal not visible\n\nexport type OnboardingStep = \"user-info\" | \"llm-setup\" | \"page1\" | \"page2\";\n\nexport interface OnboardingModalController {\n  mode: OnboardingModalMode;\n  isOpen: boolean;\n\n  // Actions\n  openPersonaEditor: () => void;\n  openLlmSetup: (provider?: string) => void;\n  close: () => void;\n\n  // Data needed for modal\n  llmProviders: LLMProviderDescriptor[] | undefined;\n  initialValues: {\n    firstName: string;\n    lastName: string;\n    workArea: WorkArea | undefined;\n    level: Level | undefined;\n  };\n\n  // State\n  isAdmin: boolean;\n  hasUserInfo: boolean; // User has completed user-info (name + workArea)\n  allProvidersConfigured: boolean; // All 3 providers (anthropic, openai, openrouter) are configured\n  hasAnyProvider: boolean; // At least 1 provider is configured (allows skipping)\n  isLoading: boolean; // True while LLM providers are loading\n\n  // Callbacks\n  completeUserInfo: (info: BuildUserInfo) => Promise<void>;\n  completeLlmSetup: () => Promise<void>;\n  refetchLlmProviders: () => Promise<\n    LLMProviderResponse<LLMProviderDescriptor> | undefined\n  >;\n}\n"
  },
  {
    "path": "web/src/app/craft/page.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { CRAFT_PATH } from \"@/app/craft/v1/constants\";\n\n/**\n * Build Page - Redirects to the new Build V1 page\n *\n * The new Build experience is at /craft/v1\n * This page exists for backwards compatibility.\n */\nexport default function BuildPage() {\n  const router = useRouter();\n\n  useEffect(() => {\n    router.replace(CRAFT_PATH);\n  }, [router]);\n\n  return (\n    <div className=\"flex items-center justify-center h-screen\">\n      <div className=\"animate-pulse text-text-03\">Redirecting...</div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/services/apiServices.ts",
    "content": "import {\n  ApiSessionResponse,\n  ApiDetailedSessionResponse,\n  ApiMessageResponse,\n  ApiArtifactResponse,\n  ApiUsageLimitsResponse,\n  ApiWebappInfoResponse,\n  SessionHistoryItem,\n  Artifact,\n  BuildMessage,\n  StreamPacket,\n  UsageLimits,\n  DirectoryListing,\n  SharingScope,\n} from \"@/app/craft/types/streamingTypes\";\n\n// =============================================================================\n// API Configuration\n// =============================================================================\n\nconst API_BASE = \"/api/build\";\nexport const USAGE_LIMITS_ENDPOINT = `${API_BASE}/limit`;\n\n// =============================================================================\n// SSE Stream Processing\n// =============================================================================\n\nexport async function processSSEStream(\n  response: Response,\n  onPacket: (packet: StreamPacket) => void\n): Promise<void> {\n  const reader = response.body?.getReader();\n  if (!reader) throw new Error(\"No response body\");\n\n  const decoder = new TextDecoder();\n  let buffer = \"\";\n  let currentEventType = \"\";\n\n  while (true) {\n    const { done, value } = await reader.read();\n    if (done) break;\n\n    buffer += decoder.decode(value, { stream: true });\n    const lines = buffer.split(\"\\n\");\n    buffer = lines.pop() || \"\";\n\n    for (const line of lines) {\n      if (line.startsWith(\"event: \") || line.startsWith(\"event:\")) {\n        // Capture the event type from the SSE event line\n        currentEventType = line.slice(line.indexOf(\":\") + 1).trim();\n      } else if (line.startsWith(\"data: \") || line.startsWith(\"data:\")) {\n        const dataStr = line.slice(line.indexOf(\":\") + 1).trim();\n        if (dataStr) {\n          try {\n            const data = JSON.parse(dataStr);\n            // The backend sends `event: message` for all events and puts the\n            // actual type in data.type. Only use SSE event type as fallback\n            // if data.type is not present and SSE event is not \"message\".\n            if (\n              !data.type &&\n              currentEventType &&\n              currentEventType !== \"message\"\n            ) {\n              onPacket({ ...data, type: currentEventType });\n            } else {\n              onPacket(data);\n            }\n          } catch (e) {\n            console.error(\"[SSE] Parse error:\", e, \"Raw data:\", dataStr);\n          }\n        }\n        // Reset event type for next event\n        currentEventType = \"\";\n      }\n    }\n  }\n}\n\n// =============================================================================\n// Session API\n// =============================================================================\n\nexport interface CreateSessionOptions {\n  name?: string | null;\n  demoDataEnabled?: boolean;\n  userWorkArea?: string | null;\n  userLevel?: string | null;\n  // LLM selection from user's cookie\n  llmProviderType?: string | null; // Provider type (e.g., \"anthropic\", \"openai\")\n  llmModelName?: string | null;\n}\n\nexport async function createSession(\n  options?: CreateSessionOptions\n): Promise<ApiDetailedSessionResponse> {\n  const res = await fetch(`${API_BASE}/sessions`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      name: options?.name || null,\n      demo_data_enabled: options?.demoDataEnabled ?? true,\n      user_work_area: options?.userWorkArea || null,\n      user_level: options?.userLevel || null,\n      llm_provider_type: options?.llmProviderType || null,\n      llm_model_name: options?.llmModelName || null,\n    }),\n  });\n\n  if (!res.ok) {\n    throw new Error(`Failed to create session: ${res.status}`);\n  }\n\n  return res.json();\n}\n\nexport async function fetchSession(\n  sessionId: string\n): Promise<ApiDetailedSessionResponse> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to load session: ${res.status}`);\n  }\n\n  return res.json();\n}\n\nexport async function fetchSessionHistory(): Promise<SessionHistoryItem[]> {\n  const res = await fetch(`${API_BASE}/sessions`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch session history: ${res.status}`);\n  }\n\n  const data = await res.json();\n  return data.sessions.map((s: ApiSessionResponse) => ({\n    id: s.id,\n    title: s.name || `Session ${s.id.slice(0, 8)}...`,\n    createdAt: new Date(s.created_at),\n  }));\n}\n\nexport async function generateSessionName(sessionId: string): Promise<string> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/generate-name`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n  });\n\n  if (!res.ok) {\n    throw new Error(`Failed to generate session name: ${res.status}`);\n  }\n\n  const data = await res.json();\n  return data.name;\n}\n\nexport interface SuggestionBubble {\n  theme: \"add\" | \"question\";\n  text: string;\n}\n\nexport async function generateFollowupSuggestions(\n  sessionId: string,\n  userMessage: string,\n  agentMessage: string\n): Promise<SuggestionBubble[]> {\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/generate-suggestions`,\n    {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({\n        user_message: userMessage,\n        assistant_message: agentMessage,\n      }),\n    }\n  );\n\n  if (!res.ok) {\n    throw new Error(`Failed to generate suggestions: ${res.status}`);\n  }\n\n  const data = await res.json();\n  return data.suggestions;\n}\n\nexport async function updateSessionName(\n  sessionId: string,\n  name: string | null\n): Promise<void> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/name`, {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ name }),\n  });\n\n  if (!res.ok) {\n    throw new Error(`Failed to update session name: ${res.status}`);\n  }\n}\n\nexport async function setSessionSharing(\n  sessionId: string,\n  sharingScope: SharingScope\n): Promise<{ session_id: string; sharing_scope: SharingScope }> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/public`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ sharing_scope: sharingScope }),\n  });\n\n  if (!res.ok) {\n    throw new Error(`Failed to update session sharing: ${res.status}`);\n  }\n\n  return res.json();\n}\n\nexport async function deleteSession(sessionId: string): Promise<void> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}`, {\n    method: \"DELETE\",\n  });\n\n  if (!res.ok) {\n    throw new Error(`Failed to delete session: ${res.status}`);\n  }\n}\n\n/**\n * Restore a sleeping sandbox and load the session's snapshot.\n * This is a blocking call that waits until the restore is complete.\n *\n * Handles two cases:\n * 1. Sandbox is SLEEPING: Re-provisions pod, then loads session snapshot\n * 2. Sandbox is RUNNING but session not loaded: Just loads session snapshot\n *\n * Returns immediately if session workspace already exists in pod.\n */\nexport async function restoreSession(\n  sessionId: string\n): Promise<ApiDetailedSessionResponse> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/restore`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to restore session: ${res.status}`\n    );\n  }\n\n  return res.json();\n}\n\n/**\n * Check if a pre-provisioned session is still valid (empty).\n * Used for polling to detect when another tab has used the session.\n *\n * @returns { valid: true, session_id: string } if session is still empty\n * @returns { valid: false, session_id: null } if session has messages or doesn't exist\n */\nexport async function checkPreProvisionedSession(\n  sessionId: string\n): Promise<{ valid: boolean; session_id: string | null }> {\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/pre-provisioned-check`\n  );\n\n  if (!res.ok) {\n    // Treat errors as invalid session\n    return { valid: false, session_id: null };\n  }\n\n  return res.json();\n}\n\n// =============================================================================\n// Messages API\n// =============================================================================\n\n/**\n * Extract text content from message_metadata.\n * For user_message: {type: \"user_message\", content: {type: \"text\", text: \"...\"}}\n */\nfunction extractContentFromMetadata(\n  metadata: Record<string, any> | null | undefined\n): string {\n  if (!metadata) return \"\";\n  const content = metadata.content;\n  if (!content) return \"\";\n  if (typeof content === \"string\") return content;\n  if (typeof content === \"object\" && content.type === \"text\" && content.text) {\n    return content.text;\n  }\n  return \"\";\n}\n\nexport async function fetchMessages(\n  sessionId: string\n): Promise<BuildMessage[]> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/messages`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch messages: ${res.status}`);\n  }\n\n  const data = await res.json();\n  return data.messages.map((m: ApiMessageResponse) => ({\n    id: m.id,\n    type: m.type,\n    // Content is stored in message_metadata, not as a separate field\n    content: m.content || extractContentFromMetadata(m.message_metadata),\n    message_metadata: m.message_metadata,\n    timestamp: new Date(m.created_at),\n  }));\n}\n\n/**\n * Custom error class for rate limit (429) errors.\n * Used to distinguish rate limit errors from other API errors\n * so the UI can show an upsell modal instead of a generic error.\n */\nexport class RateLimitError extends Error {\n  public readonly statusCode: number = 429;\n\n  constructor() {\n    super(\"Rate limit exceeded\");\n    this.name = \"RateLimitError\";\n  }\n}\n\n/**\n * Send a message and return the streaming response.\n * The caller is responsible for processing the SSE stream.\n */\nexport async function sendMessageStream(\n  sessionId: string,\n  content: string,\n  signal?: AbortSignal\n): Promise<Response> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/send-message`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ content }),\n    signal,\n  });\n\n  if (!res.ok) {\n    // Handle rate limit errors specifically so UI can show upsell modal\n    if (res.status === 429) {\n      throw new RateLimitError();\n    }\n    throw new Error(`Failed to send message: ${res.status}`);\n  }\n\n  return res;\n}\n\n// =============================================================================\n// Artifacts API\n// =============================================================================\n\nexport async function fetchArtifacts(sessionId: string): Promise<Artifact[]> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/artifacts`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch artifacts: ${res.status}`);\n  }\n\n  const data = await res.json();\n  // Backend returns a direct array, not wrapped in an object\n  return data.map((a: ApiArtifactResponse) => ({\n    id: a.id,\n    session_id: a.session_id,\n    type: a.type,\n    name: a.name,\n    path: a.path,\n    preview_url: a.preview_url,\n    created_at: new Date(a.created_at),\n    updated_at: new Date(a.updated_at),\n  }));\n}\n\n// =============================================================================\n// Webapp API\n// =============================================================================\n\nexport async function fetchWebappInfo(\n  sessionId: string\n): Promise<ApiWebappInfoResponse> {\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/webapp-info`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch webapp info: ${res.status}`);\n  }\n\n  return res.json();\n}\n\n// =============================================================================\n// Files API\n// =============================================================================\n\nexport async function fetchDirectoryListing(\n  sessionId: string,\n  path: string = \"\"\n): Promise<DirectoryListing> {\n  const url = new URL(\n    `${API_BASE}/sessions/${sessionId}/files`,\n    window.location.origin\n  );\n  if (path) {\n    url.searchParams.set(\"path\", path);\n  }\n\n  const res = await fetch(url.toString());\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch directory listing: ${res.status}`);\n  }\n\n  return res.json();\n}\n\n/**\n * Trigger a browser download for a single file from the sandbox.\n */\nexport function downloadArtifactFile(sessionId: string, path: string): void {\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n  const link = document.createElement(\"a\");\n  link.href = `${API_BASE}/sessions/${sessionId}/artifacts/${encodedPath}`;\n  link.download = path.split(\"/\").pop() || path;\n  document.body.appendChild(link);\n  link.click();\n  document.body.removeChild(link);\n}\n\n/**\n * Trigger a browser download for a directory as a zip file.\n */\nexport function downloadDirectory(sessionId: string, path: string): void {\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n  const link = document.createElement(\"a\");\n  link.href = `${API_BASE}/sessions/${sessionId}/download-directory/${encodedPath}`;\n  link.download = \"\";\n  document.body.appendChild(link);\n  link.click();\n  document.body.removeChild(link);\n}\n\nexport interface FileContentResponse {\n  content: string; // For text files: text content. For images: data URL (base64-encoded)\n  mimeType: string;\n  isImage?: boolean; // True if the content is an image data URL\n  error?: string; // Error message if file can't be previewed\n}\n\n// Maximum file size for image preview (10MB)\nconst MAX_IMAGE_SIZE = 10 * 1024 * 1024;\n\n/**\n * Fetch file content from the sandbox for preview.\n * Reuses the artifacts download endpoint but reads content as text.\n */\nexport async function fetchFileContent(\n  sessionId: string,\n  path: string\n): Promise<FileContentResponse> {\n  // Encode each path segment individually (spaces, special chars) but preserve slashes\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/artifacts/${encodedPath}`\n  );\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch file content: ${res.status}`);\n  }\n\n  const mimeType = res.headers.get(\"Content-Type\") || \"text/plain\";\n\n  // For images, convert to data URL instead of blob URL (no cleanup needed)\n  if (mimeType.startsWith(\"image/\")) {\n    const blob = await res.blob();\n\n    // Check file size limit for images\n    if (blob.size > MAX_IMAGE_SIZE) {\n      return {\n        content: \"\",\n        mimeType,\n        isImage: false,\n        error: `Image too large to preview (${(\n          blob.size /\n          (1024 * 1024)\n        ).toFixed(1)}MB). Maximum size is ${MAX_IMAGE_SIZE / (1024 * 1024)}MB.`,\n      };\n    }\n\n    return new Promise((resolve, reject) => {\n      const reader = new FileReader();\n      reader.onloadend = () => {\n        // Verify result is a string\n        if (typeof reader.result !== \"string\") {\n          reject(new Error(\"FileReader returned unexpected type\"));\n          return;\n        }\n        resolve({\n          content: reader.result,\n          mimeType,\n          isImage: true,\n        });\n      };\n      reader.onerror = () => {\n        reject(new Error(reader.error?.message || \"Failed to read image file\"));\n      };\n      reader.readAsDataURL(blob);\n    });\n  }\n\n  const content = await res.text();\n  return { content, mimeType, isImage: false };\n}\n\n// =============================================================================\n// Usage Limits API\n// =============================================================================\n\n/** Transform API response to frontend types */\nfunction transformUsageLimitsResponse(\n  data: ApiUsageLimitsResponse\n): UsageLimits {\n  return {\n    isLimited: data.is_limited,\n    limitType: data.limit_type,\n    messagesUsed: data.messages_used,\n    limit: data.limit,\n    resetTimestamp: data.reset_timestamp\n      ? new Date(data.reset_timestamp)\n      : null,\n  };\n}\n\nexport async function fetchUsageLimits(): Promise<UsageLimits> {\n  const res = await fetch(USAGE_LIMITS_ENDPOINT);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch usage limits: ${res.status}`);\n  }\n\n  const data: ApiUsageLimitsResponse = await res.json();\n  return transformUsageLimitsResponse(data);\n}\n\n// =============================================================================\n// File Upload API\n// =============================================================================\n\nexport interface UploadFileResponse {\n  filename: string;\n  path: string;\n  size_bytes: number;\n}\n\n/**\n * Upload a file to the session's sandbox.\n * The file will be placed in the sandbox's user_uploaded_files directory.\n */\nexport async function uploadFile(\n  sessionId: string,\n  file: File\n): Promise<UploadFileResponse> {\n  const formData = new FormData();\n  formData.append(\"file\", file);\n\n  const res = await fetch(`${API_BASE}/sessions/${sessionId}/upload`, {\n    method: \"POST\",\n    body: formData,\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(errorData.detail || `Failed to upload file: ${res.status}`);\n  }\n\n  return res.json();\n}\n\n/**\n * Delete a file from the session's sandbox.\n */\nexport async function deleteFile(\n  sessionId: string,\n  path: string\n): Promise<void> {\n  // Encode each path segment individually (spaces, special chars) but preserve slashes\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/files/${encodedPath}`,\n    {\n      method: \"DELETE\",\n    }\n  );\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);\n  }\n}\n\n/**\n * Export a markdown file as DOCX.\n * Returns a Blob of the converted document.\n */\nexport async function exportDocx(\n  sessionId: string,\n  path: string\n): Promise<Blob> {\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/export-docx/${encodedPath}`\n  );\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to export as DOCX: ${res.status}`\n    );\n  }\n\n  return res.blob();\n}\n\n// =============================================================================\n// PPTX Preview API\n// =============================================================================\n\nexport interface PptxPreviewResponse {\n  slide_count: number;\n  slide_paths: string[];\n  cached: boolean;\n}\n\n/**\n * Fetch PPTX slide preview images.\n * Triggers on-demand conversion (soffice → pdftoppm) with disk caching.\n */\nexport async function fetchPptxPreview(\n  sessionId: string,\n  path: string\n): Promise<PptxPreviewResponse> {\n  const encodedPath = path\n    .split(\"/\")\n    .map((segment) => encodeURIComponent(segment))\n    .join(\"/\");\n\n  const res = await fetch(\n    `${API_BASE}/sessions/${sessionId}/pptx-preview/${encodedPath}`\n  );\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to generate PPTX preview: ${res.status}`\n    );\n  }\n\n  return res.json();\n}\n\n// =============================================================================\n// Connector Management API\n// =============================================================================\n\nexport async function deleteConnector(\n  connectorId: number,\n  credentialId: number\n): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/deletion-attempt\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      connector_id: connectorId,\n      credential_id: credentialId,\n    }),\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json();\n    throw new Error(\n      errorData.detail || `Failed to delete connector: ${res.status}`\n    );\n  }\n}\n\n// =============================================================================\n// User Library API\n// =============================================================================\n\nimport {\n  LibraryEntry,\n  CreateDirectoryRequest,\n  UploadResponse,\n} from \"@/app/craft/types/user-library\";\n\nconst USER_LIBRARY_BASE = `${API_BASE}/user-library`;\n\n/**\n * Fetch the user's library tree (uploaded files).\n */\nexport async function fetchLibraryTree(): Promise<LibraryEntry[]> {\n  const res = await fetch(`${USER_LIBRARY_BASE}/tree`);\n\n  if (!res.ok) {\n    throw new Error(`Failed to fetch library tree: ${res.status}`);\n  }\n\n  return res.json();\n}\n\n/**\n * Upload files to the user library.\n */\nexport async function uploadLibraryFiles(\n  path: string,\n  files: File[]\n): Promise<UploadResponse> {\n  const formData = new FormData();\n  formData.append(\"path\", path);\n  for (const file of files) {\n    formData.append(\"files\", file);\n  }\n\n  const res = await fetch(`${USER_LIBRARY_BASE}/upload`, {\n    method: \"POST\",\n    body: formData,\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to upload files: ${res.status}`\n    );\n  }\n\n  return res.json();\n}\n\n/**\n * Upload and extract a zip file to the user library.\n */\nexport async function uploadLibraryZip(\n  path: string,\n  file: File\n): Promise<UploadResponse> {\n  const formData = new FormData();\n  formData.append(\"path\", path);\n  formData.append(\"file\", file);\n\n  const res = await fetch(`${USER_LIBRARY_BASE}/upload-zip`, {\n    method: \"POST\",\n    body: formData,\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(errorData.detail || `Failed to upload zip: ${res.status}`);\n  }\n\n  return res.json();\n}\n\n/**\n * Create a directory in the user library.\n */\nexport async function createLibraryDirectory(\n  request: CreateDirectoryRequest\n): Promise<LibraryEntry> {\n  const res = await fetch(`${USER_LIBRARY_BASE}/directories`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n  });\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to create directory: ${res.status}`\n    );\n  }\n\n  return res.json();\n}\n\n/**\n * Toggle sync status for a file/directory in the user library.\n */\nexport async function toggleLibraryFileSync(\n  documentId: string,\n  enabled: boolean\n): Promise<void> {\n  const res = await fetch(\n    `${USER_LIBRARY_BASE}/files/${encodeURIComponent(\n      documentId\n    )}/toggle?enabled=${enabled}`,\n    {\n      method: \"PATCH\",\n    }\n  );\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(errorData.detail || `Failed to toggle sync: ${res.status}`);\n  }\n}\n\n/**\n * Delete a file/directory from the user library.\n */\nexport async function deleteLibraryFile(documentId: string): Promise<void> {\n  const res = await fetch(\n    `${USER_LIBRARY_BASE}/files/${encodeURIComponent(documentId)}`,\n    {\n      method: \"DELETE\",\n    }\n  );\n\n  if (!res.ok) {\n    const errorData = await res.json().catch(() => ({}));\n    throw new Error(errorData.detail || `Failed to delete file: ${res.status}`);\n  }\n}\n"
  },
  {
    "path": "web/src/app/craft/services/searchParams.ts",
    "content": "import { ReadonlyURLSearchParams } from \"next/navigation\";\n\n// search params for build pages\nexport const CRAFT_SEARCH_PARAM_NAMES = {\n  SESSION_ID: \"sessionId\",\n};\n\nexport function getSessionIdFromSearchParams(\n  searchParams: ReadonlyURLSearchParams | null\n): string | null {\n  return searchParams?.get(CRAFT_SEARCH_PARAM_NAMES.SESSION_ID) ?? null;\n}\n"
  },
  {
    "path": "web/src/app/craft/types/displayTypes.ts",
    "content": "/**\n * Display Types\n *\n * Simple FIFO types for rendering streaming content.\n * Items are stored and rendered in chronological order as they arrive.\n */\n\nexport type ToolCallKind =\n  | \"search\"\n  | \"read\"\n  | \"execute\"\n  | \"edit\"\n  | \"task\"\n  | \"other\";\n\n// =============================================================================\n// Todo List Types (for TodoWrite tool)\n// =============================================================================\n\nexport type TodoStatus = \"pending\" | \"in_progress\" | \"completed\";\n\nexport interface TodoItem {\n  /** The task description */\n  content: string;\n  /** Current status */\n  status: TodoStatus;\n  /** Present tense form shown during execution (e.g., \"Creating API endpoint\") */\n  activeForm: string;\n}\n\nexport interface TodoListState {\n  /** Tool call ID */\n  id: string;\n  /** Array of todo items */\n  todos: TodoItem[];\n  /** Whether the card is expanded (UI state only) */\n  isOpen: boolean;\n}\nexport type ToolCallStatus =\n  | \"pending\"\n  | \"in_progress\"\n  | \"completed\"\n  | \"failed\"\n  | \"cancelled\";\n\nexport interface ToolCallState {\n  id: string;\n  kind: ToolCallKind;\n  title: string;\n  description: string; // \"Listing output directory\" or task description\n  command: string; // \"ls outputs/\" or task prompt for task kind\n  status: ToolCallStatus;\n  rawOutput: string; // Full output for expanded view\n  /** For task tool calls: the subagent type (e.g., \"explore\", \"plan\") */\n  subagentType?: string;\n  /** For edit operations: whether this is a new file (write) or edit of existing */\n  isNewFile?: boolean;\n  /** For edit operations: the old content before the edit (empty for new files) */\n  oldContent?: string;\n  /** For edit operations: the new content after the edit */\n  newContent?: string;\n}\n\n/**\n * StreamItem - A single item in the FIFO stream.\n * These are stored in chronological order and rendered directly.\n */\nexport type StreamItem =\n  | { type: \"text\"; id: string; content: string; isStreaming: boolean }\n  | { type: \"thinking\"; id: string; content: string; isStreaming: boolean }\n  | { type: \"tool_call\"; id: string; toolCall: ToolCallState }\n  | { type: \"todo_list\"; id: string; todoList: TodoListState };\n\n/**\n * GroupedStreamItem - StreamItem after grouping transformation for rendering.\n * Consecutive working tool calls are grouped into a single \"working_group\" item.\n * Used by BuildMessageList to render consolidated Working pills.\n */\nexport type GroupedStreamItem =\n  | { type: \"text\"; id: string; content: string; isStreaming: boolean }\n  | { type: \"thinking\"; id: string; content: string; isStreaming: boolean }\n  | { type: \"tool_call\"; id: string; toolCall: ToolCallState }\n  | { type: \"todo_list\"; id: string; todoList: TodoListState }\n  | { type: \"working_group\"; id: string; toolCalls: ToolCallState[] };\n"
  },
  {
    "path": "web/src/app/craft/types/streamingTypes.ts",
    "content": "// =============================================================================\n// Sharing Types\n// =============================================================================\n\nexport type SharingScope = \"private\" | \"public_org\" | \"public_global\";\n\n// =============================================================================\n// Session Error Constants\n// =============================================================================\n\nexport const SessionErrorCode = {\n  RATE_LIMIT_EXCEEDED: \"RATE_LIMIT_EXCEEDED\",\n} as const;\n\nexport type SessionErrorCode =\n  (typeof SessionErrorCode)[keyof typeof SessionErrorCode];\n\n// =============================================================================\n// Usage Limits Types\n// =============================================================================\n\nexport type LimitType = \"weekly\" | \"total\";\n\nexport interface UsageLimits {\n  /** Whether the user has reached their limit */\n  isLimited: boolean;\n  /** Type of limit period: \"weekly\" for paid, \"total\" for free */\n  limitType: LimitType;\n  /** Number of messages used in current period */\n  messagesUsed: number;\n  /** Maximum messages allowed in the period */\n  limit: number;\n  /** For weekly limits: timestamp when the limit resets (null for total limits) */\n  resetTimestamp: Date | null;\n}\n\n// API response shape (snake_case from backend)\nexport interface ApiUsageLimitsResponse {\n  is_limited: boolean;\n  limit_type: LimitType;\n  messages_used: number;\n  limit: number;\n  reset_timestamp: string | null;\n}\n\n// =============================================================================\n// Artifact & Message Types\n// =============================================================================\n\nexport type ArtifactType =\n  | \"nextjs_app\"\n  | \"web_app\" // Backend sends this\n  | \"pptx\"\n  | \"xlsx\"\n  | \"docx\"\n  | \"markdown\"\n  | \"chart\"\n  | \"csv\"\n  | \"image\";\n\nexport interface Artifact {\n  id: string;\n  session_id: string;\n  type: ArtifactType;\n  name: string;\n  path: string;\n  preview_url?: string | null;\n  created_at: Date;\n  updated_at: Date;\n}\n\nexport interface BuildMessage {\n  id: string;\n  type: \"user\" | \"assistant\" | \"system\";\n  content: string;\n  timestamp: Date;\n  /** Structured ACP event data (tool calls, thinking, plans) */\n  message_metadata?: Record<string, any> | null;\n  /** Tool calls associated with this message (for agent messages) */\n  toolCalls?: ToolCall[];\n}\n\n// =============================================================================\n// Tool Call Types (for tracking agent tool usage)\n// =============================================================================\n\nexport type ToolCallStatus =\n  | \"pending\"\n  | \"in_progress\"\n  | \"completed\"\n  | \"failed\"\n  | \"cancelled\";\n\nexport interface ToolCall {\n  /** Unique ID for this tool call */\n  id: string;\n  /** Tool kind/category (e.g., \"edit\", \"execute\", \"other\") */\n  kind: string;\n  /** Tool name (e.g., \"write\", \"bash\", \"ls\") */\n  name: string;\n  /** Human-readable title */\n  title: string;\n  /** Current status */\n  status: ToolCallStatus;\n  /** Tool input parameters */\n  input?: Record<string, unknown>;\n  /** Raw input from ACP (complete command/parameters) */\n  raw_input?: Record<string, any> | null;\n  /** Raw output from ACP (complete result) */\n  raw_output?: Record<string, any> | null;\n  /** Content block from ACP (description text) */\n  content?: any | null;\n  /** Result content (when completed) */\n  result?: string;\n  /** Error message (when failed) */\n  error?: string;\n  /** When the tool call started */\n  startedAt: Date;\n  /** When the tool call finished */\n  finishedAt?: Date;\n}\n\nexport type SessionStatus =\n  | \"idle\"\n  | \"creating\"\n  | \"running\"\n  | \"active\"\n  | \"failed\";\n\nexport interface Session {\n  id: string | null;\n  status: SessionStatus;\n  artifacts: Artifact[];\n  messages: BuildMessage[];\n  error: string | null;\n  webappUrl: string | null;\n}\n\nexport interface SessionHistoryItem {\n  id: string;\n  title: string;\n  createdAt: Date;\n}\n\n// =============================================================================\n// API Response Types\n// =============================================================================\n\nexport interface ApiSandboxResponse {\n  id: string;\n  status:\n    | \"provisioning\"\n    | \"running\"\n    | \"idle\"\n    | \"sleeping\"\n    | \"terminated\"\n    | \"failed\"\n    | \"restoring\"; // Frontend-only: set during snapshot restore\n  container_id: string | null;\n  created_at: string;\n  last_heartbeat: string | null;\n  nextjs_port: number | null;\n}\n\nexport interface ApiSessionResponse {\n  id: string;\n  user_id: string | null;\n  name: string | null;\n  status: \"active\" | \"idle\" | \"archived\";\n  created_at: string;\n  last_activity_at: string;\n  sandbox: ApiSandboxResponse | null;\n  artifacts: ApiArtifactResponse[];\n  sharing_scope: SharingScope;\n}\n\nexport interface ApiDetailedSessionResponse extends ApiSessionResponse {\n  session_loaded_in_sandbox: boolean;\n}\n\nexport interface ApiMessageResponse {\n  id: string;\n  session_id: string;\n  type: \"user\" | \"assistant\";\n  content: string;\n  message_metadata?: Record<string, any> | null;\n  created_at: string;\n}\n\nexport interface ApiArtifactResponse {\n  id: string;\n  session_id: string;\n  type: ArtifactType;\n  path: string;\n  name: string;\n  created_at: string;\n  updated_at: string;\n  preview_url?: string | null;\n}\n\nexport interface ApiWebappInfoResponse {\n  has_webapp: boolean;\n  webapp_url: string | null;\n  status: string;\n  ready: boolean;\n  sharing_scope: SharingScope;\n}\n\nexport interface FileSystemEntry {\n  name: string;\n  path: string;\n  is_directory: boolean;\n  size: number | null;\n  mime_type: string | null;\n}\n\nexport interface DirectoryListing {\n  path: string;\n  entries: FileSystemEntry[];\n}\n\n// =============================================================================\n// SSE Packet Types (matching backend build_packet_types.py)\n// =============================================================================\n\n// Step/Thinking Packets\nexport interface StepStartPacket {\n  type: \"step_start\";\n  step_id: string;\n  step_name?: string;\n  timestamp: string;\n}\n\nexport interface StepDeltaPacket {\n  type: \"step_delta\";\n  step_id: string;\n  content: string;\n  timestamp: string;\n}\n\nexport interface StepEndPacket {\n  type: \"step_end\";\n  step_id: string;\n  status: \"completed\" | \"failed\" | \"cancelled\";\n  timestamp: string;\n}\n\n// Tool Call Packets\nexport interface ToolStartPacket {\n  type: \"tool_start\";\n  tool_call_id: string;\n  tool_name: string;\n  tool_input: Record<string, any>;\n  title?: string;\n  timestamp: string;\n}\n\nexport interface ToolProgressPacket {\n  type: \"tool_progress\";\n  tool_call_id: string;\n  tool_name: string;\n  status: \"pending\" | \"in_progress\" | \"completed\" | \"failed\" | \"cancelled\";\n  progress?: number;\n  message?: string;\n  timestamp: string;\n}\n\nexport interface ToolEndPacket {\n  type: \"tool_end\";\n  tool_call_id: string;\n  tool_name: string;\n  status: \"success\" | \"error\" | \"cancelled\";\n  result?: string | Record<string, any>;\n  error?: string;\n  timestamp: string;\n}\n\n// Agent Output Packets\nexport interface OutputStartPacket {\n  type: \"output_start\";\n  timestamp: string;\n}\n\nexport interface OutputDeltaPacket {\n  type: \"output_delta\";\n  content: string;\n  timestamp: string;\n}\n\nexport interface OutputEndPacket {\n  type: \"output_end\";\n  timestamp: string;\n}\n\n// Plan Packets\nexport interface PlanEntry {\n  id: string;\n  description: string;\n  status: \"pending\" | \"in_progress\" | \"completed\" | \"cancelled\";\n  priority?: number;\n}\n\nexport interface PlanPacket {\n  type: \"plan\";\n  plan?: string;\n  entries?: PlanEntry[];\n  timestamp: string;\n}\n\n// Mode Update Packets\nexport interface ModeUpdatePacket {\n  type: \"mode_update\";\n  mode: string;\n  description?: string;\n  timestamp: string;\n}\n\n// Completion Packets\nexport interface DonePacket {\n  type: \"done\";\n  summary: string;\n  stop_reason?:\n    | \"end_turn\"\n    | \"max_tokens\"\n    | \"max_turn_requests\"\n    | \"refusal\"\n    | \"cancelled\";\n  usage?: Record<string, any>;\n  timestamp: string;\n}\n\n// Error Packets\nexport interface ErrorPacket {\n  type: \"error\";\n  message: string;\n  code?: number;\n  details?: Record<string, any>;\n  timestamp: string;\n}\n\n// File Write Packets\nexport interface FileWritePacket {\n  type: \"file_write\";\n  path: string;\n  size_bytes?: number;\n  operation: \"create\" | \"update\" | \"delete\";\n  timestamp: string;\n}\n\n// Artifact Packets\nexport type BackendArtifactType =\n  | \"web_app\"\n  | \"markdown\"\n  | \"image\"\n  | \"csv\"\n  | \"excel\"\n  | \"pptx\"\n  | \"docx\"\n  | \"pdf\"\n  | \"code\"\n  | \"other\";\n\nexport interface ArtifactCreatedPacket {\n  type: \"artifact_created\";\n  artifact: {\n    id: string;\n    type: BackendArtifactType;\n    name: string;\n    path: string;\n    preview_url?: string;\n    download_url?: string;\n    mime_type?: string;\n    size_bytes?: number;\n  };\n  timestamp: string;\n}\n\n// Permission Packets (for future use)\nexport interface PermissionRequestPacket {\n  type: \"permission_request\";\n  request_id: string;\n  operation: string;\n  description: string;\n  auto_approve: boolean;\n  timestamp: string;\n}\n\nexport interface PermissionResponsePacket {\n  type: \"permission_response\";\n  request_id: string;\n  approved: boolean;\n  reason?: string;\n  timestamp: string;\n}\n\n// =============================================================================\n// Raw ACP Packets (sent directly from backend with ALL ACP fields)\n// =============================================================================\n\n// Content block types from ACP\nexport interface TextContentBlock {\n  type: \"text\";\n  text: string;\n}\n\nexport interface ImageContentBlock {\n  type: \"image\";\n  data: string;\n  mimeType: string;\n}\n\nexport type ContentBlock =\n  | TextContentBlock\n  | ImageContentBlock\n  | Record<string, any>;\n\n// Base ACP event fields\nexport interface ACPBaseEvent {\n  field_meta?: Record<string, any> | null; // _meta field for extensibility\n  timestamp: string;\n}\n\n// ACP: agent_message_chunk - Agent's text/content output\nexport interface AgentMessageChunkPacket extends ACPBaseEvent {\n  type: \"agent_message_chunk\";\n  content: ContentBlock;\n  session_update?: string;\n}\n\n// ACP: agent_thought_chunk - Agent's internal reasoning\nexport interface AgentThoughtChunkPacket extends ACPBaseEvent {\n  type: \"agent_thought_chunk\";\n  content: ContentBlock;\n  session_update?: string;\n}\n\n// ACP: tool_call_start - Tool invocation started\nexport interface ToolCallStartPacket extends ACPBaseEvent {\n  type: \"tool_call_start\";\n  tool_call_id: string;\n  kind: string | null;\n  title: string | null;\n  content: ContentBlock | null;\n  locations: string[] | null;\n  raw_input: Record<string, any> | null;\n  raw_output: Record<string, any> | null;\n  status: string | null;\n  session_update?: string;\n}\n\n// ACP: tool_call_progress - Tool execution progress/completion\nexport interface ToolCallProgressPacket extends ACPBaseEvent {\n  type: \"tool_call_progress\";\n  tool_call_id: string;\n  kind: string | null;\n  title: string | null;\n  content: ContentBlock | null;\n  locations: string[] | null;\n  raw_input: Record<string, any> | null;\n  raw_output: Record<string, any> | null;\n  status: string | null;\n  session_update?: string;\n}\n\n// ACP: agent_plan_update - Agent's execution plan\nexport interface AgentPlanUpdatePacket extends ACPBaseEvent {\n  type: \"agent_plan_update\";\n  entries: Array<{\n    id: string;\n    description: string;\n    status: string;\n    priority: string | number | null;\n  }> | null;\n  session_update?: string;\n}\n\n// ACP: current_mode_update - Agent mode change\nexport interface CurrentModeUpdatePacket extends ACPBaseEvent {\n  type: \"current_mode_update\";\n  current_mode_id: string | null;\n  session_update?: string;\n}\n\n// ACP: prompt_response - Agent finished processing\nexport interface PromptResponsePacket extends ACPBaseEvent {\n  type: \"prompt_response\";\n  stop_reason: string | null;\n}\n\n// ACP: error - Error from ACP\nexport interface ACPErrorPacket {\n  type: \"error\";\n  code: string | null;\n  message: string;\n  data: Record<string, any> | null;\n  timestamp: string;\n}\n\n// Union type for all packets (including raw ACP packets)\nexport type StreamPacket =\n  // Raw ACP packets with ALL fields\n  | AgentMessageChunkPacket\n  | AgentThoughtChunkPacket\n  | ToolCallStartPacket\n  | ToolCallProgressPacket\n  | AgentPlanUpdatePacket\n  | CurrentModeUpdatePacket\n  | PromptResponsePacket\n  | ACPErrorPacket\n  // Custom Onyx packets\n  | StepStartPacket\n  | StepDeltaPacket\n  | StepEndPacket\n  | ToolStartPacket\n  | ToolProgressPacket\n  | ToolEndPacket\n  | OutputStartPacket\n  | OutputDeltaPacket\n  | OutputEndPacket\n  | PlanPacket\n  | ModeUpdatePacket\n  | DonePacket\n  | ErrorPacket\n  | FileWritePacket\n  | ArtifactCreatedPacket\n  | PermissionRequestPacket\n  | PermissionResponsePacket\n  | { type: string; timestamp?: string }; // catch-all for unknown packet types\n"
  },
  {
    "path": "web/src/app/craft/types/user-library.ts",
    "content": "/**\n * Types for User Library - raw binary file uploads in Craft.\n */\n\nexport interface LibraryEntry {\n  id: string; // document_id\n  name: string;\n  path: string;\n  is_directory: boolean;\n  file_size: number | null;\n  mime_type: string | null;\n  sync_enabled: boolean;\n  created_at: string;\n  children?: LibraryEntry[];\n}\n\nexport interface CreateDirectoryRequest {\n  name: string;\n  parent_path: string;\n}\n\nexport interface UploadResponse {\n  entries: LibraryEntry[];\n  total_uploaded: number;\n  total_size_bytes: number;\n}\n"
  },
  {
    "path": "web/src/app/craft/utils/packetTypes.ts",
    "content": "/**\n * Packet Types\n *\n * Type definitions for raw and parsed ACP packets.\n * Centralizes all snake_case / camelCase field resolution.\n * Defines the ParsedPacket discriminated union consumed by both\n * useBuildStreaming (live SSE) and useBuildSessionStore (DB reload).\n */\n\nimport type { TodoItem } from \"../types/displayTypes\";\n\n// Re-export from displayTypes — single source of truth\nexport type {\n  ToolCallKind as ToolKind,\n  ToolCallStatus as ToolStatus,\n} from \"../types/displayTypes\";\n\n// ─── Raw Packet Field Access ─────────────────────────────────────────\n// Every backend field name variant is listed ONCE here.\n\nexport function getRawInput(\n  p: Record<string, unknown>\n): Record<string, unknown> | null {\n  return (p.raw_input ?? p.rawInput ?? null) as Record<string, unknown> | null;\n}\n\nexport function getRawOutput(\n  p: Record<string, unknown>\n): Record<string, unknown> | null {\n  return (p.raw_output ?? p.rawOutput ?? null) as Record<\n    string,\n    unknown\n  > | null;\n}\n\nexport function getToolCallId(p: Record<string, unknown>): string {\n  return (p.tool_call_id ?? p.toolCallId ?? \"\") as string;\n}\n\nexport function getToolNameRaw(p: Record<string, unknown>): string {\n  // Prefer explicit tool_name fields\n  const explicit = (p.tool_name ?? p.toolName ?? \"\") as string;\n  if (explicit) return explicit.toLowerCase();\n\n  // Fall back to title only if it looks like a simple tool name\n  // (no spaces or newlines — otherwise it's a human-readable description)\n  const title = (p.title ?? \"\") as string;\n  if (title && !title.includes(\" \") && !title.includes(\"\\n\")) {\n    return title.toLowerCase();\n  }\n\n  return \"\";\n}\n\n// ─── Parsed Packet Types (Discriminated Union) ──────────────────────\n\nexport type ToolName =\n  | \"glob\"\n  | \"grep\"\n  | \"read\"\n  | \"write\"\n  | \"edit\"\n  | \"bash\"\n  | \"task\"\n  | \"todowrite\"\n  | \"webfetch\"\n  | \"websearch\"\n  | \"unknown\";\n\nexport interface ParsedTextChunk {\n  type: \"text_chunk\";\n  text: string;\n}\n\nexport interface ParsedThinkingChunk {\n  type: \"thinking_chunk\";\n  text: string;\n}\n\nexport interface ParsedToolCallStart {\n  type: \"tool_call_start\";\n  toolCallId: string;\n  toolName: ToolName;\n  kind: import(\"../types/displayTypes\").ToolCallKind;\n  isTodo: boolean;\n}\n\nexport interface ParsedToolCallProgress {\n  type: \"tool_call_progress\";\n  toolCallId: string;\n  toolName: ToolName;\n  kind: import(\"../types/displayTypes\").ToolCallKind;\n  status: import(\"../types/displayTypes\").ToolCallStatus;\n  isTodo: boolean;\n  // Pre-extracted, pre-sanitized fields (ready for display)\n  title: string;\n  description: string;\n  command: string;\n  rawOutput: string;\n  filePath: string; // Session-relative\n  subagentType: string | null;\n  // Edit-specific\n  isNewFile: boolean;\n  oldContent: string;\n  newContent: string;\n  // Todo-specific\n  todos: TodoItem[];\n  // Task-specific\n  taskOutput: string | null;\n}\n\nexport interface ParsedPromptResponse {\n  type: \"prompt_response\";\n}\n\nexport interface ParsedArtifact {\n  type: \"artifact_created\";\n  artifact: {\n    id: string;\n    type: string;\n    name: string;\n    path: string;\n    preview_url: string | null;\n  };\n}\n\nexport interface ParsedError {\n  type: \"error\";\n  message: string;\n}\n\nexport interface ParsedUnknown {\n  type: \"unknown\";\n}\n\nexport type ParsedPacket =\n  | ParsedTextChunk\n  | ParsedThinkingChunk\n  | ParsedToolCallStart\n  | ParsedToolCallProgress\n  | ParsedPromptResponse\n  | ParsedArtifact\n  | ParsedError\n  | ParsedUnknown;\n"
  },
  {
    "path": "web/src/app/craft/utils/parsePacket.ts",
    "content": "/**\n * Parse Packet\n *\n * Single entry point for converting raw ACP packets into strongly-typed\n * ParsedPacket values. All field resolution, tool detection, and path\n * sanitization happen here. Consumers never touch Record<string, unknown>.\n */\n\nimport { stripSessionPrefix, sanitizePathsInText } from \"./pathSanitizer\";\nimport {\n  getRawInput,\n  getRawOutput,\n  getToolCallId,\n  getToolNameRaw,\n  type ParsedPacket,\n  type ParsedToolCallStart,\n  type ParsedToolCallProgress,\n  type ParsedArtifact,\n  type ToolName,\n  type ToolKind,\n  type ToolStatus,\n} from \"./packetTypes\";\nimport type { TodoItem, TodoStatus } from \"../types/displayTypes\";\n\nexport function parsePacket(raw: unknown): ParsedPacket {\n  if (!raw || typeof raw !== \"object\") return { type: \"unknown\" };\n  const p = raw as Record<string, unknown>;\n  const packetType = p.type as string | undefined;\n\n  switch (packetType) {\n    case \"agent_message_chunk\": // Live SSE\n    case \"agent_message\": // DB-stored format\n      return { type: \"text_chunk\", text: extractText(p.content) };\n\n    case \"agent_thought_chunk\": // Live SSE\n    case \"agent_thought\": // DB-stored format\n      return { type: \"thinking_chunk\", text: extractText(p.content) };\n\n    case \"tool_call_start\":\n      return parseToolCallStart(p);\n\n    case \"tool_call_progress\":\n      return parseToolCallProgress(p);\n\n    case \"prompt_response\":\n      return { type: \"prompt_response\" };\n\n    case \"artifact_created\":\n      return parseArtifact(p);\n\n    case \"error\":\n      return { type: \"error\", message: (p.message ?? \"\") as string };\n\n    default:\n      return { type: \"unknown\" };\n  }\n}\n\n// ─── Tool Name Resolution ─────────────────────────────────────────\n\nconst NAME_MAP: Record<string, ToolName> = {\n  glob: \"glob\",\n  grep: \"grep\",\n  read: \"read\",\n  write: \"write\",\n  edit: \"edit\",\n  bash: \"bash\",\n  task: \"task\",\n  todowrite: \"todowrite\",\n  todo_write: \"todowrite\",\n  webfetch: \"webfetch\",\n  websearch: \"websearch\",\n};\n\nfunction resolveToolName(p: Record<string, unknown>): ToolName {\n  const rawName = getToolNameRaw(p);\n\n  if (NAME_MAP[rawName]) return NAME_MAP[rawName];\n\n  // Fallback: detect by rawInput shape (handles title changes on completion)\n  const ri = getRawInput(p);\n  if (ri?.subagent_type || ri?.subagentType) return \"task\";\n  if (ri?.todos && Array.isArray(ri.todos)) return \"todowrite\";\n\n  // Detect tools by rawInput fields (opencode agent uses different field names)\n  if (ri?.patchText && typeof ri.patchText === \"string\") return \"edit\";\n  if (ri?.command && typeof ri.command === \"string\") return \"bash\";\n\n  // Fallback: use backend-provided kind to infer tool name\n  const rawKind = (p.kind as string) ?? null;\n  if (rawKind === \"execute\") return \"bash\";\n  if (rawKind === \"read\") return \"read\";\n  if (rawKind === \"edit\" || rawKind === \"delete\" || rawKind === \"move\")\n    return \"edit\";\n  if (rawKind === \"search\") return \"glob\";\n  if (rawKind === \"fetch\") return \"webfetch\";\n\n  return \"unknown\";\n}\n\nconst TOOL_KIND_MAP: Record<ToolName, ToolKind> = {\n  glob: \"search\",\n  grep: \"search\",\n  read: \"read\",\n  write: \"edit\",\n  edit: \"edit\",\n  bash: \"execute\",\n  task: \"task\",\n  todowrite: \"other\",\n  webfetch: \"other\",\n  websearch: \"search\",\n  unknown: \"other\",\n};\n\nfunction resolveKind(toolName: ToolName, rawKind: string | null): ToolKind {\n  const fromName = TOOL_KIND_MAP[toolName];\n  if (fromName !== \"other\") return fromName;\n\n  // Fall back to backend-provided kind\n  if (\n    rawKind === \"search\" ||\n    rawKind === \"read\" ||\n    rawKind === \"execute\" ||\n    rawKind === \"edit\" ||\n    rawKind === \"task\"\n  ) {\n    return rawKind;\n  }\n  return \"other\";\n}\n\n// ─── Shared Helpers ───────────────────────────────────────────────\n\n/** Extract text from ACP content structure (string, {type,text}, or array) */\nfunction extractText(content: unknown): string {\n  if (!content) return \"\";\n  if (typeof content === \"string\") return content;\n  if (typeof content === \"object\" && content !== null) {\n    const obj = content as Record<string, unknown>;\n    if (obj.type === \"text\" && typeof obj.text === \"string\") return obj.text;\n    if (Array.isArray(content)) {\n      return content\n        .filter(\n          (c: Record<string, unknown>) =>\n            c?.type === \"text\" && typeof c.text === \"string\"\n        )\n        .map((c: Record<string, unknown>) => c.text)\n        .join(\"\");\n    }\n    if (typeof obj.text === \"string\") return obj.text;\n  }\n  return \"\";\n}\n\nfunction normalizeStatus(status: string | null | undefined): ToolStatus {\n  if (\n    status === \"pending\" ||\n    status === \"in_progress\" ||\n    status === \"completed\" ||\n    status === \"failed\" ||\n    status === \"cancelled\"\n  ) {\n    return status;\n  }\n  return \"pending\";\n}\n\n// ─── Edit / Diff Extraction ──────────────────────────────────────\n\n/** Extract oldText and newText from content[].type===\"diff\" items */\nfunction extractDiffData(content: unknown): {\n  oldText: string;\n  newText: string;\n  isNewFile: boolean;\n} {\n  if (!Array.isArray(content))\n    return { oldText: \"\", newText: \"\", isNewFile: true };\n  let oldText = \"\";\n  let newText = \"\";\n  for (const item of content) {\n    if (item?.type === \"diff\") {\n      if (typeof item.oldText === \"string\") oldText = item.oldText;\n      if (typeof item.newText === \"string\") newText = item.newText;\n    }\n  }\n  return { oldText, newText, isNewFile: oldText === \"\" };\n}\n\n/** Extract file path from content[].type===\"diff\" items (fallback when rawInput has no path) */\nfunction extractDiffPath(p: Record<string, unknown>): string {\n  const content = p.content as unknown[] | undefined;\n  if (!Array.isArray(content)) return \"\";\n  for (const item of content) {\n    if (\n      item &&\n      typeof item === \"object\" &&\n      (item as Record<string, unknown>).type === \"diff\"\n    ) {\n      const diffPath = (item as Record<string, unknown>).path as\n        | string\n        | undefined;\n      if (diffPath) return stripSessionPrefix(diffPath);\n    }\n  }\n  // Final fallback: title field may contain a file path\n  const title = p.title as string | undefined;\n  if (title && title.includes(\"/\")) return stripSessionPrefix(title);\n  return \"\";\n}\n\n// ─── Patch Text Extraction (opencode agent) ─────────────────────\n\n/** Extract file path and new-file flag from opencode's patch format.\n *  Format: \"*** Update File: path\" or \"*** Add File: path\" */\nfunction extractPatchInfo(\n  patchText: string\n): { path: string; isNew: boolean } | null {\n  const match = patchText.match(\n    /\\*\\*\\*\\s+(Update|Add|Delete)\\s+File:\\s*(.+?)(?:\\n|$)/\n  );\n  if (match?.[2]) {\n    return {\n      path: stripSessionPrefix(match[2].trim()),\n      isNew: match[1] === \"Add\",\n    };\n  }\n  return null;\n}\n\n// ─── Description Builder ─────────────────────────────────────────\n\nfunction buildDescription(\n  toolName: ToolName,\n  kind: ToolKind,\n  filePath: string,\n  ri: Record<string, unknown> | null,\n  rawDescription: string\n): string {\n  // Task tool: use description from rawInput\n  if (toolName === \"task\") {\n    return rawDescription || \"Running subagent\";\n  }\n  // Read/edit: show file path\n  if (kind === \"read\" || kind === \"edit\") {\n    if (filePath) return filePath;\n  }\n  // Execute: use backend description\n  if (kind === \"execute\") {\n    return sanitizePathsInText(rawDescription) || \"Running command\";\n  }\n  // Search: show pattern\n  if (\n    (toolName === \"glob\" || toolName === \"grep\" || kind === \"search\") &&\n    ri?.pattern &&\n    typeof ri.pattern === \"string\"\n  ) {\n    return ri.pattern as string;\n  }\n  return buildTitle(toolName, kind, true);\n}\n\n// ─── Title Builder ───────────────────────────────────────────────\n\nfunction buildTitle(\n  toolName: ToolName,\n  kind: ToolKind,\n  isNewFile: boolean\n): string {\n  // Edit/write: distinguish \"Writing\" (new file) vs \"Editing\" (existing)\n  if (kind === \"edit\") return isNewFile ? \"Writing\" : \"Editing\";\n\n  const TITLES: Record<ToolName, string> = {\n    glob: \"Searching files\",\n    grep: \"Searching content\",\n    read: \"Reading\",\n    write: \"Writing\",\n    edit: \"Editing\",\n    bash: \"Running command\",\n    task: \"Running task\",\n    todowrite: \"Updating todos\",\n    webfetch: \"Fetching web content\",\n    websearch: \"Searching web\",\n    unknown: \"Running tool\",\n  };\n\n  // When toolName is unknown, use kind for a more specific title\n  if (toolName === \"unknown\") {\n    const KIND_TITLES: Partial<Record<ToolKind, string>> = {\n      search: \"Searching\",\n      read: \"Reading\",\n      execute: \"Running command\",\n      task: \"Running task\",\n    };\n    return KIND_TITLES[kind] || TITLES.unknown;\n  }\n\n  return TITLES[toolName];\n}\n\n// ─── Raw Output Extraction ───────────────────────────────────────\n\n/** Extract the appropriate output text based on tool kind.\n *  Returns raw unsanitized text — caller applies sanitizePathsInText. */\nfunction extractRawOutputText(\n  toolName: ToolName,\n  kind: ToolKind,\n  p: Record<string, unknown>,\n  ro: Record<string, unknown> | null\n): string {\n  // Task tool: show the prompt (not the output JSON)\n  if (toolName === \"task\") {\n    const ri = getRawInput(p);\n    if (ri?.prompt && typeof ri.prompt === \"string\") return ri.prompt as string;\n    return \"\";\n  }\n  // Execute: prefer metadata.output, then output\n  if (kind === \"execute\") {\n    if (!ro) return \"\";\n    const metadata = ro.metadata as Record<string, unknown> | null;\n    return (metadata?.output || ro.output || \"\") as string;\n  }\n  // Read: extract file content from <file>...</file> wrapper\n  if (kind === \"read\") {\n    const fileContent = extractFileContent(p.content);\n    if (fileContent) return fileContent;\n    if (!ro) return \"\";\n    if (typeof ro.content === \"string\") return ro.content;\n    return JSON.stringify(ro, null, 2);\n  }\n  // Edit: show new text from diff\n  if (kind === \"edit\") {\n    const content = p.content as unknown[] | undefined;\n    if (Array.isArray(content)) {\n      for (const item of content) {\n        const rec = item as Record<string, unknown> | null;\n        if (rec?.type === \"diff\" && typeof rec.newText === \"string\")\n          return rec.newText as string;\n      }\n    }\n    // Fallback: show patchText from rawInput (opencode agent)\n    const ri = getRawInput(p);\n    if (ri?.patchText && typeof ri.patchText === \"string\")\n      return ri.patchText as string;\n    if (!ro) return \"\";\n    // Prefer output string over JSON dump\n    if (typeof ro.output === \"string\") return ro.output;\n    return JSON.stringify(ro, null, 2);\n  }\n  // Search: files list or output string\n  if (toolName === \"glob\" || toolName === \"grep\" || kind === \"search\") {\n    if (!ro) return \"\";\n    if (typeof ro.output === \"string\") return ro.output;\n    if (ro.files && Array.isArray(ro.files))\n      return (ro.files as string[]).join(\"\\n\");\n    return JSON.stringify(ro, null, 2);\n  }\n  // Fallback\n  if (!ro) return \"\";\n  return JSON.stringify(ro, null, 2);\n}\n\n/** Extract file content from content[].type===\"content\" items, stripping line numbers */\nfunction extractFileContent(content: unknown): string {\n  if (!Array.isArray(content)) return \"\";\n  for (const item of content) {\n    if (item?.type === \"content\" && item?.content?.type === \"text\") {\n      const text = item.content.text as string;\n      const fileMatch = text.match(\n        /<file>\\n?([\\s\\S]*?)\\n?\\(End of file[^)]*\\)\\n?<\\/file>/\n      );\n      if (fileMatch?.[1]) {\n        return fileMatch[1].replace(/^\\d+\\| /gm, \"\");\n      }\n      return text;\n    }\n  }\n  return \"\";\n}\n\n// ─── Todo Extraction ─────────────────────────────────────────────\n\nfunction extractTodos(ri: Record<string, unknown> | null): TodoItem[] {\n  if (!ri?.todos || !Array.isArray(ri.todos)) return [];\n  return ri.todos.map((t: Record<string, unknown>) => ({\n    content: (t.content as string) || \"\",\n    status: normalizeTodoStatus(t.status),\n    activeForm: (t.activeForm as string) || (t.content as string) || \"\",\n  }));\n}\n\nfunction normalizeTodoStatus(status: unknown): TodoStatus {\n  if (\n    status === \"pending\" ||\n    status === \"in_progress\" ||\n    status === \"completed\"\n  )\n    return status;\n  return \"pending\";\n}\n\n// ─── Task Output Extraction ──────────────────────────────────────\n\nfunction extractTaskOutput(ro: Record<string, unknown> | null): string | null {\n  if (!ro?.output || typeof ro.output !== \"string\") return null;\n  return (\n    ro.output.replace(/<task_metadata>[\\s\\S]*?<\\/task_metadata>/g, \"\").trim() ||\n    null\n  );\n}\n\n// ─── Artifact Parsing ─────────────────────────────────────────────\n\nfunction parseArtifact(p: Record<string, unknown>): ParsedArtifact {\n  const artifact = p.artifact as Record<string, unknown> | undefined;\n  return {\n    type: \"artifact_created\",\n    artifact: {\n      id: (artifact?.id ?? \"\") as string,\n      type: (artifact?.type ?? \"\") as string,\n      name: (artifact?.name ?? \"\") as string,\n      path: (artifact?.path ?? \"\") as string,\n      preview_url: (artifact?.preview_url as string) || null,\n    },\n  };\n}\n\n// ─── Tool Call Parsing ────────────────────────────────────────────\n\nfunction parseToolCallStart(p: Record<string, unknown>): ParsedToolCallStart {\n  const toolName = resolveToolName(p);\n  const rawKind = p.kind as string | null;\n  return {\n    type: \"tool_call_start\",\n    toolCallId: getToolCallId(p),\n    toolName,\n    kind: resolveKind(toolName, rawKind),\n    isTodo: toolName === \"todowrite\",\n  };\n}\n\nfunction parseToolCallProgress(\n  p: Record<string, unknown>\n): ParsedToolCallProgress {\n  const toolName = resolveToolName(p);\n  const rawKind = p.kind as string | null;\n  const kind = resolveKind(toolName, rawKind);\n  const ri = getRawInput(p);\n  const ro = getRawOutput(p);\n  const isTodo = toolName === \"todowrite\";\n\n  // ── Edit-specific (extracted first — isNewFile needed by buildTitle) ──\n  const diffData =\n    kind === \"edit\"\n      ? extractDiffData(p.content)\n      : { oldText: \"\", newText: \"\", isNewFile: true };\n\n  // ── Patch info (opencode agent uses patchText instead of file_path) ──\n  const patchInfo =\n    kind === \"edit\" && ri?.patchText && typeof ri.patchText === \"string\"\n      ? extractPatchInfo(ri.patchText as string)\n      : null;\n\n  // ── File path (structured field → stripSessionPrefix) ──────────\n  const rawFilePath = (ri?.file_path ??\n    ri?.filePath ??\n    ri?.path ??\n    \"\") as string;\n  let filePath = rawFilePath\n    ? stripSessionPrefix(rawFilePath)\n    : extractDiffPath(p);\n\n  // Fallback: extract from patchText\n  if (!filePath && patchInfo) {\n    filePath = patchInfo.path;\n  }\n\n  // ── Command (freeform → sanitizePathsInText) ──────────────────\n  const rawCommand = (ri?.command ?? \"\") as string;\n  const command = sanitizePathsInText(rawCommand);\n\n  // ── Description ───────────────────────────────────────────────\n  const rawDescription = (ri?.description ?? \"\") as string;\n  const description = buildDescription(\n    toolName,\n    kind,\n    filePath,\n    ri,\n    rawDescription\n  );\n\n  // ── Output (freeform → sanitizePathsInText) ───────────────────\n  const rawOutputText = extractRawOutputText(toolName, kind, p, ro);\n  const rawOutput = sanitizePathsInText(rawOutputText);\n\n  // ── Title ─────────────────────────────────────────────────────\n  const title = buildTitle(toolName, kind, diffData.isNewFile);\n\n  // ── Status ────────────────────────────────────────────────────\n  const status = normalizeStatus(p.status as string | null);\n\n  // ── Todo-specific ─────────────────────────────────────────────\n  const todos = isTodo ? extractTodos(ri) : [];\n\n  // ── Task-specific ─────────────────────────────────────────────\n  const subagentType = (ri?.subagent_type ?? ri?.subagentType ?? null) as\n    | string\n    | null;\n  const taskOutput =\n    toolName === \"task\" && status === \"completed\"\n      ? extractTaskOutput(ro)\n      : null;\n\n  return {\n    type: \"tool_call_progress\",\n    toolCallId: getToolCallId(p),\n    toolName,\n    kind,\n    status,\n    isTodo,\n    title,\n    description,\n    command,\n    rawOutput,\n    filePath,\n    subagentType,\n    isNewFile:\n      diffData.oldText || diffData.newText\n        ? diffData.isNewFile\n        : patchInfo?.isNew ?? diffData.isNewFile,\n    oldContent: diffData.oldText,\n    newContent: diffData.newText,\n    todos,\n    taskOutput,\n  };\n}\n"
  },
  {
    "path": "web/src/app/craft/utils/pathSanitizer.test.ts",
    "content": "import { stripSessionPrefix, sanitizePathsInText } from \"./pathSanitizer\";\n\n// =============================================================================\n// stripSessionPrefix\n// =============================================================================\n\ndescribe(\"stripSessionPrefix\", () => {\n  it(\"returns empty string for empty input\", () => {\n    expect(stripSessionPrefix(\"\")).toBe(\"\");\n  });\n\n  // ── Local dev (sandboxes + sessions) ────────────────────────────────\n\n  it(\"strips local sandboxes/sessions prefix\", () => {\n    expect(\n      stripSessionPrefix(\n        \"/Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/AGENTS.md\"\n      )\n    ).toBe(\"outputs/web/AGENTS.md\");\n  });\n\n  it(\"strips local sandboxes/sessions prefix for files/ directory\", () => {\n    expect(\n      stripSessionPrefix(\n        \"/Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/files/linear/Engineering/ticket.json\"\n      )\n    ).toBe(\"files/linear/Engineering/ticket.json\");\n  });\n\n  it(\"strips sandboxes/sessions even with non-standard prefix\", () => {\n    expect(\n      stripSessionPrefix(\n        \"/data/sandboxes/abcdef1234567890abcdef1234567890ab/sessions/abcdef1234567890abcdef1234567890ab/file.txt\"\n      )\n    ).toBe(\"file.txt\");\n  });\n\n  // ── Kubernetes (sessions only) ──────────────────────────────────────\n\n  it(\"strips kubernetes sessions prefix\", () => {\n    expect(\n      stripSessionPrefix(\n        \"/workspace/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/page.tsx\"\n      )\n    ).toBe(\"outputs/web/page.tsx\");\n  });\n\n  it(\"strips kubernetes sessions with short prefix\", () => {\n    expect(\n      stripSessionPrefix(\"/some/path/sessions/def-456/files/data.json\")\n    ).toBe(\"files/data.json\");\n  });\n\n  // ── Already relative ────────────────────────────────────────────────\n\n  it(\"returns already-relative paths unchanged\", () => {\n    expect(stripSessionPrefix(\"outputs/web/page.tsx\")).toBe(\n      \"outputs/web/page.tsx\"\n    );\n  });\n\n  it(\"strips leading slash from short paths\", () => {\n    expect(stripSessionPrefix(\"/file.txt\")).toBe(\"file.txt\");\n  });\n\n  // ── Title field (no leading /) ──────────────────────────────────────\n\n  it(\"handles title field without leading slash (sandboxes path)\", () => {\n    expect(\n      stripSessionPrefix(\n        \"Users/wenxi-onyx/data/sandboxes/b29c196e-fa14-46b8-8182-ff4a7f67b47b/sessions/9c7662c1-785f-4f1c-b9e0-9021ddbf2893/outputs/web/page.tsx\"\n      )\n    ).toBe(\"outputs/web/page.tsx\");\n  });\n\n  // ── Fallback (unknown format, >3 segments) ──────────────────────────\n\n  it(\"falls back to last 3 segments for unknown deep paths\", () => {\n    expect(stripSessionPrefix(\"/some/unknown/deep/path/to/file.tsx\")).toBe(\n      \"path/to/file.tsx\"\n    );\n  });\n\n  // ── Short paths ─────────────────────────────────────────────────────\n\n  it(\"returns short relative path as-is\", () => {\n    expect(stripSessionPrefix(\"file.txt\")).toBe(\"file.txt\");\n  });\n\n  it(\"returns 3-segment path as-is\", () => {\n    expect(stripSessionPrefix(\"a/b/c\")).toBe(\"a/b/c\");\n  });\n});\n\n// =============================================================================\n// sanitizePathsInText\n// =============================================================================\n\ndescribe(\"sanitizePathsInText\", () => {\n  it(\"returns empty string for empty input\", () => {\n    expect(sanitizePathsInText(\"\")).toBe(\"\");\n  });\n\n  // ── Bash commands ───────────────────────────────────────────────────\n\n  it(\"strips local sandboxes path from cd command\", () => {\n    expect(\n      sanitizePathsInText(\n        \"cd /Users/wenxi-onyx/data/sandboxes/abc-123/sessions/def-456/outputs/web && python3 prepare.py\"\n      )\n    ).toBe(\"cd outputs/web && python3 prepare.py\");\n  });\n\n  it(\"strips multiple paths in a single command\", () => {\n    expect(\n      sanitizePathsInText(\n        \"chmod +x /Users/wenxi/data/sandboxes/abc/sessions/def/outputs/web/prepare.sh && /Users/wenxi/data/sandboxes/abc/sessions/def/outputs/web/prepare.sh\"\n      )\n    ).toBe(\"chmod +x outputs/web/prepare.sh && outputs/web/prepare.sh\");\n  });\n\n  // ── Output listings ─────────────────────────────────────────────────\n\n  it(\"strips kubernetes paths from ls output\", () => {\n    expect(\n      sanitizePathsInText(\n        \"/workspace/sessions/def-456/outputs/web/page.tsx\\n/workspace/sessions/def-456/outputs/web/globals.css\"\n      )\n    ).toBe(\"outputs/web/page.tsx\\noutputs/web/globals.css\");\n  });\n\n  it(\"strips local paths from find output\", () => {\n    expect(\n      sanitizePathsInText(\n        \"find /Users/wenxi/data/sandboxes/abc/sessions/def/files/linear -type d\"\n      )\n    ).toBe(\"find files/linear -type d\");\n  });\n\n  // ── No paths — passthrough ──────────────────────────────────────────\n\n  it(\"returns text without sandbox/session paths unchanged\", () => {\n    const text =\n      \"total 0\\ndrwxr-xr-x@ 3 wenxi-onyx  staff  96 Jan 21 15:18 .\\n\";\n    expect(sanitizePathsInText(text)).toBe(text);\n  });\n\n  // ── Error messages ──────────────────────────────────────────────────\n\n  it(\"strips paths from error messages\", () => {\n    expect(\n      sanitizePathsInText(\n        \"Error: ENOENT: no such file or directory, open '/workspace/sessions/abc-123/outputs/web/missing.tsx'\"\n      )\n    ).toBe(\n      \"Error: ENOENT: no such file or directory, open 'outputs/web/missing.tsx'\"\n    );\n  });\n});\n"
  },
  {
    "path": "web/src/app/craft/utils/pathSanitizer.ts",
    "content": "/**\n * Path Sanitizer\n *\n * Pure string functions for stripping sandbox/session path prefixes.\n * All paths displayed in the UI must be relative to the session root.\n *\n * Two deployment shapes exist (both always include the sessions layer):\n *   Local:  /Users/.../sandboxes/{uuid}/sessions/{uuid}/outputs/web/page.tsx\n *   Kube:   /workspace/sessions/{uuid}/outputs/web/page.tsx\n */\n\n/**\n * Strip sandbox/session path prefixes to produce a session-relative path.\n *\n * Returns the path relative to the session root (the directory that\n * contains outputs/, files/, etc.)\n */\nexport function stripSessionPrefix(fullPath: string): string {\n  if (!fullPath) return \"\";\n\n  // 1. .../sandboxes/{uuid}/sessions/{uuid}/REST  →  REST\n  //    Matches local dev (always sandboxes + sessions)\n  const sbSession = fullPath.match(\n    /\\/sandboxes\\/[0-9a-f-]+\\/sessions\\/[0-9a-f-]+\\/(.+)$/\n  );\n  if (sbSession?.[1]) return sbSession[1];\n\n  // 2. .../sessions/{uuid}/REST  →  REST\n  //    Matches kubernetes (e.g. /workspace/sessions/...)\n  const session = fullPath.match(/\\/sessions\\/[0-9a-f-]+\\/(.+)$/);\n  if (session?.[1]) return session[1];\n\n  // 3. Fallback: keep last 3 path segments for context\n  //    /some/unknown/deep/path/to/file.tsx  →  path/to/file.tsx\n  const segments = fullPath.split(\"/\").filter(Boolean);\n  if (segments.length > 3) return segments.slice(-3).join(\"/\");\n\n  // 4. Already relative or short — return as-is\n  return fullPath.startsWith(\"/\") ? fullPath.slice(1) : fullPath;\n}\n\n/**\n * Replace all absolute sandbox/session paths in freeform text with\n * session-relative paths.\n *\n * Handles paths embedded in commands, output listings, error messages, etc.\n * Matches both local and kubernetes path formats.\n */\n\n// Pre-compiled regexes (module-level, not per-call)\n// Order matters: most specific first to avoid partial matches\nconst SESSION_PATH_PATTERNS = [\n  // Local: .../sandboxes/uuid/sessions/uuid/REST\n  /(?:\\/[\\w._-]+)*\\/sandboxes\\/[0-9a-f-]+\\/sessions\\/[0-9a-f-]+\\//g,\n  // Kubernetes: .../sessions/uuid/REST  (no sandboxes prefix)\n  /(?:\\/[\\w._-]+)*\\/sessions\\/[0-9a-f-]+\\//g,\n];\n\nexport function sanitizePathsInText(text: string): string {\n  if (!text) return \"\";\n\n  let result = text;\n  for (const pattern of SESSION_PATH_PATTERNS) {\n    // Reset lastIndex since we reuse the regex\n    pattern.lastIndex = 0;\n    result = result.replace(pattern, \"\");\n  }\n  return result;\n}\n"
  },
  {
    "path": "web/src/app/craft/utils/streamItemHelpers.ts",
    "content": "/**\n * Stream Item Helpers\n *\n * Reduced to only utility functions that are NOT packet-processing concerns.\n * All packet parsing, tool detection, and path sanitization now live in parsePacket.ts.\n */\n\n/**\n * Generate a unique ID for stream items\n */\nexport function genId(prefix: string): string {\n  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;\n}\n\n/**\n * Check if a tool call should be included in a \"Working\" pill.\n * Returns true for all tool calls except task/subagent tools.\n * Working tools: glob, grep, read, edit, write, bash, webfetch, websearch, etc.\n */\nexport function isWorkingToolCall(toolCall: {\n  kind: string;\n  subagentType?: string;\n}): boolean {\n  // Task tools (subagents) are kept as separate pills\n  if (toolCall.kind === \"task\") return false;\n  if (toolCall.subagentType) return false;\n  return true;\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/ComingSoonConnectors.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Content } from \"@opal/layouts\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { ValidSources } from \"@/lib/types\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport RequestConnectorModal from \"@/app/craft/v1/configure/components/RequestConnectorModal\";\nimport {\n  OutlookIcon,\n  OneDriveIcon,\n  BoxIcon,\n  TrelloIcon,\n  ServiceNowIcon,\n} from \"@/components/icons/icons\";\n\n// Coming soon connectors - organized by ecosystem\nconst COMING_SOON_CONNECTORS: ValidSources[] = [\n  // Microsoft\n  ValidSources.Sharepoint,\n  ValidSources.Teams,\n  ValidSources.Imap, // Outlook via IMAP\n  // Atlassian\n  ValidSources.Confluence,\n  ValidSources.Jira,\n  ValidSources.Bitbucket,\n  // Git/GitLab\n  ValidSources.GitLab,\n  // Cloud Storage\n  ValidSources.Dropbox,\n  // Salesforce\n  ValidSources.Salesforce,\n  ValidSources.Gong,\n  // Knowledge Base/Wiki\n  ValidSources.Bookstack,\n  ValidSources.Discord,\n  ValidSources.Zendesk,\n  ValidSources.Freshdesk,\n  ValidSources.Egnyte,\n  // Project Management\n  ValidSources.Asana,\n  ValidSources.Clickup,\n  ValidSources.Productboard,\n  // Knowledge Base/Wiki\n  ValidSources.Outline,\n  ValidSources.Slab,\n  ValidSources.Coda,\n  ValidSources.Guru,\n  ValidSources.Document360,\n  ValidSources.Gitbook,\n  ValidSources.Highspot,\n  ValidSources.DrupalWiki,\n  ValidSources.Discourse,\n  ValidSources.Axero,\n  // Messaging/Collaboration\n  ValidSources.Zulip,\n  // Other\n  ValidSources.Loopio,\n  ValidSources.Xenforo,\n];\n\nexport default function ComingSoonConnectors() {\n  const [showRequestModal, setShowRequestModal] = useState(false);\n\n  return (\n    <>\n      <Separator />\n      <div className=\"w-full flex items-center justify-between pb-2\">\n        <div className=\"flex flex-col gap-0.25\">\n          <Text mainContentEmphasis text04>\n            Coming Soon\n          </Text>\n          <Text secondaryBody text03>\n            Don't see what you're looking for? Submit a connector request!\n          </Text>\n        </div>\n        <button\n          type=\"button\"\n          onClick={() => setShowRequestModal(true)}\n          className=\"px-4 py-2 rounded-12 bg-white dark:bg-black hover:opacity-90 transition-colors whitespace-nowrap\"\n        >\n          <Text\n            mainUiAction\n            className=\"text-text-dark-05 dark:text-text-light-05\"\n          >\n            Submit a request\n          </Text>\n        </button>\n      </div>\n      <div className=\"w-full grid grid-cols-1 md:grid-cols-4 gap-2\">\n        {COMING_SOON_CONNECTORS.flatMap((type) => {\n          const sourceMetadata = getSourceMetadata(type);\n          // Special case: IMAP should display as \"Outlook\" with custom icon\n          const displayName =\n            type === ValidSources.Imap ? \"Outlook\" : sourceMetadata.displayName;\n\n          const card = (\n            <div key={type} className=\"opacity-60\">\n              <Card variant=\"secondary\">\n                <Content\n                  icon={\n                    type === ValidSources.Imap\n                      ? OutlookIcon\n                      : sourceMetadata.icon\n                  }\n                  title={displayName}\n                  sizePreset=\"main-ui\"\n                  variant=\"body\"\n                />\n              </Card>\n            </div>\n          );\n\n          // Insert OneDrive right after Outlook\n          if (type === ValidSources.Imap) {\n            return [\n              card,\n              <div key=\"onedrive\" className=\"opacity-60\">\n                <Card variant=\"secondary\">\n                  <Content\n                    icon={OneDriveIcon}\n                    title=\"OneDrive\"\n                    sizePreset=\"main-ui\"\n                    variant=\"body\"\n                  />\n                </Card>\n              </div>,\n            ];\n          }\n\n          // Insert Box right after Discord\n          if (type === ValidSources.Discord) {\n            return [\n              card,\n              <div key=\"box\" className=\"opacity-60\">\n                <Card variant=\"secondary\">\n                  <Content\n                    icon={BoxIcon}\n                    title=\"Box\"\n                    sizePreset=\"main-ui\"\n                    variant=\"body\"\n                  />\n                </Card>\n              </div>,\n            ];\n          }\n\n          return [card];\n        })}\n        {/* Enterprise/ERP */}\n        <div className=\"opacity-60\">\n          <Card variant=\"secondary\">\n            <Content\n              icon={ServiceNowIcon}\n              title=\"ServiceNow\"\n              sizePreset=\"main-ui\"\n              variant=\"body\"\n            />\n          </Card>\n        </div>\n        {/* Project Management */}\n        <div className=\"opacity-60\">\n          <Card variant=\"secondary\">\n            <Content\n              icon={TrelloIcon}\n              title=\"Trello\"\n              sizePreset=\"main-ui\"\n              variant=\"body\"\n            />\n          </Card>\n        </div>\n      </div>\n      <RequestConnectorModal\n        open={showRequestModal}\n        onClose={() => setShowRequestModal(false)}\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/ConfigureConnectorModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport useSWR from \"swr\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { ValidSources, ConfigurableSources } from \"@/lib/types\";\nimport { getSourceMetadata, getSourceDocLink } from \"@/lib/sources\";\nimport { SvgPlug, SvgExternalLink } from \"@opal/icons\";\nimport { Credential, credentialTemplates } from \"@/lib/connectors/credentials\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport CredentialStep from \"@/app/craft/v1/configure/components/CredentialStep\";\nimport ConnectorConfigStep from \"@/app/craft/v1/configure/components/ConnectorConfigStep\";\nimport { OAUTH_STATE_KEY } from \"@/app/craft/v1/constants\";\nimport { connectorConfigs } from \"@/lib/connectors/connectors\";\nimport { Button } from \"@opal/components\";\nimport { Section } from \"@/layouts/general-layouts\";\n\ntype ModalStep = \"credential\" | \"configure\";\n\nfunction connectorNeedsCredentials(connectorType: ValidSources): boolean {\n  return credentialTemplates[connectorType] != null;\n}\n\nfunction connectorNeedsConfigStep(connectorType: ValidSources): boolean {\n  const config = connectorConfigs[connectorType as ConfigurableSources];\n  if (!config) return false;\n\n  // Only check main values, not advanced_values\n  // Advanced values are optional configuration and shouldn't force a 2-step flow\n  const hasVisibleValues = config.values.some(\n    (field) => !(\"hidden\" in field && field.hidden)\n  );\n\n  return hasVisibleValues;\n}\n\ninterface ConfigureConnectorModalProps {\n  connectorType: ValidSources | null;\n  existingConfig: unknown | null;\n  open: boolean;\n  onClose: () => void;\n  onSuccess: () => void;\n}\n\nexport default function ConfigureConnectorModal({\n  connectorType,\n  existingConfig,\n  open,\n  onClose,\n  onSuccess,\n}: ConfigureConnectorModalProps) {\n  const [step, setStep] = useState<ModalStep>(\"credential\");\n  const [selectedCredential, setSelectedCredential] =\n    useState<Credential<any> | null>(null);\n\n  const sourceMetadata = connectorType\n    ? getSourceMetadata(connectorType)\n    : null;\n  const isConfigured = !!existingConfig;\n\n  const needsCredentials = connectorType\n    ? connectorNeedsCredentials(connectorType)\n    : true;\n  const needsConfigStep = connectorType\n    ? connectorNeedsConfigStep(connectorType)\n    : false;\n  const isSingleStep = needsCredentials && !needsConfigStep;\n\n  // Fetch credentials for this connector type\n  const { data: credentials, mutate: refreshCredentials } = useSWR<\n    Credential<any>[]\n  >(\n    connectorType && open && !isConfigured\n      ? buildSimilarCredentialInfoURL(connectorType)\n      : null,\n    errorHandlingFetcher\n  );\n\n  useEffect(() => {\n    if (open && !isConfigured) {\n      setStep(\"credential\");\n      setSelectedCredential(null);\n    }\n  }, [open, connectorType, isConfigured]);\n\n  // Auto-select credential if there's only one\n  useEffect(() => {\n    if (credentials?.length === 1 && !selectedCredential && credentials[0]) {\n      setSelectedCredential(credentials[0]);\n    }\n  }, [credentials, selectedCredential]);\n\n  if (!connectorType || !sourceMetadata) return null;\n\n  // Don't render for configured connectors (handled by popover in ConnectorCard)\n  if (isConfigured) return null;\n\n  const handleCredentialCreated = (cred: Credential<any>) => {\n    setSelectedCredential(cred);\n    refreshCredentials();\n  };\n\n  const handleCredentialDeleted = (credId: number) => {\n    if (selectedCredential?.id === credId) {\n      setSelectedCredential(null);\n    }\n    refreshCredentials();\n  };\n\n  const handleOAuthRedirect = () => {\n    // Save state before OAuth redirect\n    sessionStorage.setItem(\n      OAUTH_STATE_KEY,\n      JSON.stringify({\n        connectorType,\n        timestamp: Date.now(),\n      })\n    );\n  };\n\n  const handleContinue = () => {\n    if (selectedCredential) {\n      setStep(\"configure\");\n    }\n  };\n\n  const handleBack = () => {\n    setStep(\"credential\");\n  };\n\n  // Dynamic title and description based on flow type\n  const getStepTitle = () => {\n    if (isSingleStep) {\n      return `Connect ${sourceMetadata.displayName}`;\n    }\n    return step === \"credential\"\n      ? `Connect ${sourceMetadata.displayName}`\n      : `Configure ${sourceMetadata.displayName}`;\n  };\n\n  const getStepDescription = () => {\n    if (isSingleStep) {\n      return \"Select or create a credential to connect\";\n    }\n    return step === \"credential\"\n      ? \"Step 1: Select or create a credential\"\n      : \"Step 2: Configure your connector\";\n  };\n\n  return (\n    <>\n      <Modal open={open} onOpenChange={onClose}>\n        <Modal.Content width=\"xl\" height=\"fit\">\n          <Modal.Header\n            icon={SvgPlug}\n            title={getStepTitle()}\n            description={getStepDescription()}\n            onClose={onClose}\n          />\n          <Modal.Body>\n            {getSourceDocLink(connectorType) && (\n              <Section flexDirection=\"row\" justifyContent=\"end\" width=\"full\">\n                <div className=\"pr-10\">\n                  <Button\n                    variant=\"action\"\n                    prominence=\"tertiary\"\n                    rightIcon={SvgExternalLink}\n                    href={getSourceDocLink(connectorType)!}\n                    target=\"_blank\"\n                  >\n                    View setup documentation\n                  </Button>\n                </div>\n              </Section>\n            )}\n            {step === \"credential\" ? (\n              <CredentialStep\n                connectorType={connectorType}\n                credentials={credentials || []}\n                selectedCredential={selectedCredential}\n                onSelectCredential={setSelectedCredential}\n                onCredentialCreated={handleCredentialCreated}\n                onCredentialDeleted={handleCredentialDeleted}\n                onContinue={handleContinue}\n                onOAuthRedirect={handleOAuthRedirect}\n                refresh={refreshCredentials}\n                isSingleStep={isSingleStep}\n                onConnectorSuccess={onSuccess}\n              />\n            ) : selectedCredential ? (\n              <ConnectorConfigStep\n                connectorType={connectorType}\n                credential={selectedCredential}\n                onSuccess={onSuccess}\n                onBack={handleBack}\n              />\n            ) : null}\n          </Modal.Body>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/ConfigureOverlays.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport Message from \"@/refresh-components/messages/Message\";\n\ninterface ConnectorInfoOverlayProps {\n  visible: boolean;\n}\n\nexport function ConnectorInfoOverlay({ visible }: ConnectorInfoOverlayProps) {\n  return (\n    <div\n      className={cn(\n        \"fixed bottom-16 left-1/2 -translate-x-1/2 z-toast transition-all duration-300 ease-in-out\",\n        visible\n          ? \"opacity-100 translate-y-0\"\n          : \"opacity-0 translate-y-4 pointer-events-none\"\n      )}\n    >\n      <Message\n        info\n        text=\"Existing sessions won't have access to this data\"\n        description=\"Once synced, documents from this connector will be available in your new sessions!\"\n        close={false}\n      />\n    </div>\n  );\n}\n\ninterface ReprovisionWarningOverlayProps {\n  visible: boolean;\n  onUpdate?: () => void;\n  isUpdating?: boolean;\n}\n\nexport function ReprovisionWarningOverlay({\n  visible,\n  onUpdate,\n  isUpdating,\n}: ReprovisionWarningOverlayProps) {\n  return (\n    <div\n      className={cn(\n        \"fixed bottom-16 left-1/2 -translate-x-1/2 z-toast transition-all duration-300 ease-in-out\",\n        visible\n          ? \"opacity-100 translate-y-0\"\n          : \"opacity-0 translate-y-4 pointer-events-none\"\n      )}\n    >\n      <Message\n        warning\n        text={isUpdating ? \"Updating...\" : \"Click Update to apply your changes\"}\n        description=\"Your sandbox will be recreated with your new settings. Previously running sessions will not be affected by your changes.\"\n        close={false}\n        actions={isUpdating ? false : \"Update\"}\n        onAction={isUpdating ? undefined : onUpdate}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/ConnectorCard.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Popover from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { ValidSources } from \"@/lib/types\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { SvgMoreHorizontal, SvgPlug, SvgSettings, SvgTrash } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { useRouter } from \"next/navigation\";\nimport { cn } from \"@/lib/utils\";\n\nexport type ConnectorStatus =\n  | \"not_connected\"\n  | \"connected\"\n  | \"connected_with_errors\"\n  | \"indexing\"\n  | \"error\"\n  | \"deleting\";\n\nexport interface BuildConnectorConfig {\n  cc_pair_id: number;\n  connector_id: number;\n  credential_id: number;\n  source: string;\n  name: string;\n  status: ConnectorStatus;\n  docs_indexed: number;\n  last_indexed: string | null;\n  error_message?: string | null;\n}\n\ninterface ConnectorCardProps {\n  connectorType: ValidSources;\n  config: BuildConnectorConfig | null;\n  onConfigure: () => void;\n  onDelete: () => void;\n}\n\nfunction getStatusText(status: ConnectorStatus, docsIndexed: number): string {\n  switch (status) {\n    case \"connected\":\n      return docsIndexed > 0\n        ? `${docsIndexed.toLocaleString()} docs`\n        : \"Connected\";\n    case \"connected_with_errors\":\n      return docsIndexed > 0\n        ? `${docsIndexed.toLocaleString()} docs`\n        : \"Connected, has errors\";\n    case \"indexing\":\n      return \"Syncing...\";\n    case \"error\":\n      return \"Error\";\n    case \"deleting\":\n      return \"Deleting...\";\n    case \"not_connected\":\n    default:\n      return \"Not connected\";\n  }\n}\n\nexport default function ConnectorCard({\n  connectorType,\n  config,\n  onConfigure,\n  onDelete,\n}: ConnectorCardProps) {\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const router = useRouter();\n  const sourceMetadata = getSourceMetadata(connectorType);\n  const status: ConnectorStatus = config?.status || \"not_connected\";\n  const isConnected = status !== \"not_connected\" && status !== \"deleting\";\n  const isDeleting = status === \"deleting\";\n\n  // Check if this connector type is always available (doesn't need connection setup)\n  const isAlwaysConnected = sourceMetadata.alwaysConnected ?? false;\n  const customDescription = sourceMetadata.customDescription;\n\n  const handleCardClick = () => {\n    if (isDeleting) {\n      return; // No action while deleting\n    }\n    // Always-connected connectors always go to onConfigure\n    if (isAlwaysConnected) {\n      onConfigure();\n      return;\n    }\n    if (isConnected) {\n      setPopoverOpen(true);\n    } else {\n      onConfigure();\n    }\n  };\n\n  // Always-connected connectors show a settings icon\n  // Regular connectors show popover menu when connected, plug icon when not\n  const rightContent = isDeleting ? null : isAlwaysConnected ? (\n    <Button prominence=\"internal\" icon={SvgSettings} />\n  ) : isConnected ? (\n    <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>\n      <Popover.Trigger asChild>\n        <Button\n          icon={SvgMoreHorizontal}\n          prominence=\"tertiary\"\n          onClick={(e) => {\n            e.stopPropagation();\n            setPopoverOpen(!popoverOpen);\n          }}\n        />\n      </Popover.Trigger>\n      <Popover.Content side=\"right\" align=\"start\" sideOffset={4}>\n        <Popover.Menu>\n          <LineItem\n            key=\"manage\"\n            icon={SvgSettings}\n            onClick={(e) => {\n              e.stopPropagation();\n              setPopoverOpen(false);\n              router.push(`/admin/connector/${config?.cc_pair_id}`);\n            }}\n          >\n            Manage connector\n          </LineItem>\n          <LineItem\n            key=\"delete\"\n            danger\n            icon={SvgTrash}\n            onClick={(e) => {\n              e.stopPropagation();\n              setPopoverOpen(false);\n              onDelete();\n            }}\n          >\n            Disconnect\n          </LineItem>\n        </Popover.Menu>\n      </Popover.Content>\n    </Popover>\n  ) : (\n    <Button icon={SvgPlug} prominence=\"tertiary\" size=\"sm\" />\n  );\n\n  // Always-connected connectors show as \"primary\" variant\n  const cardVariant =\n    isAlwaysConnected || isConnected ? \"primary\" : \"secondary\";\n\n  const descriptionText =\n    customDescription ?? getStatusText(status, config?.docs_indexed || 0);\n\n  return (\n    <div\n      className={cn(!isDeleting && \"cursor-pointer\")}\n      onClick={handleCardClick}\n    >\n      <Card variant={cardVariant}>\n        <ContentAction\n          icon={sourceMetadata.icon}\n          title={sourceMetadata.displayName}\n          description={descriptionText}\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          rightChildren={rightContent}\n        />\n      </Card>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/ConnectorConfigStep.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Formik, Form, useFormikContext } from \"formik\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Button } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ValidSources } from \"@/lib/types\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport Separator from \"@/refresh-components/Separator\";\nimport {\n  connectorConfigs,\n  createConnectorInitialValues,\n} from \"@/lib/connectors/connectors\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { RenderField } from \"@/app/admin/connectors/[connector]/pages/FieldRendering\";\nimport { createBuildConnector } from \"@/app/craft/v1/configure/utils/createBuildConnector\";\nimport { useUser } from \"@/providers/UserProvider\";\n\ninterface ConnectorConfigStepProps {\n  connectorType: ValidSources;\n  credential: Credential<any>;\n  onSuccess: () => void;\n  onBack: () => void;\n}\n\nfunction ConnectorConfigForm({\n  connectorType,\n  credential,\n  onSuccess,\n  onBack,\n}: ConnectorConfigStepProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const { values } = useFormikContext<Record<string, any>>();\n  const { user } = useUser();\n\n  const config =\n    connectorConfigs[connectorType as keyof typeof connectorConfigs];\n\n  const handleSubmit = async () => {\n    setIsSubmitting(true);\n\n    try {\n      // Extract connector_name and exclude access_type/groups (these are top-level fields)\n      const { connector_name, access_type, groups, ...connectorConfig } =\n        values;\n\n      const result = await createBuildConnector({\n        connectorType,\n        credential,\n        connectorSpecificConfig: connectorConfig,\n        connectorName: connector_name,\n        userEmail: user?.email,\n      });\n\n      if (!result.success) {\n        throw new Error(result.error);\n      }\n\n      onSuccess();\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to create connector\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  const hasConfigFields = config?.values && config.values.length > 0;\n\n  return (\n    <Form className=\"w-full flex flex-col items-center\">\n      <CardSection className=\"flex flex-col gap-y-4\">\n        {hasConfigFields &&\n          config.values.map((field) => (\n            <RenderField\n              key={field.name}\n              field={field}\n              values={values}\n              connector={connectorType as any}\n              currentCredential={credential}\n            />\n          ))}\n        <Separator />\n        {config?.advanced_values &&\n          config.advanced_values.length > 0 &&\n          config.advanced_values.map((field) => (\n            <RenderField\n              key={field.name}\n              field={field}\n              values={values}\n              connector={connectorType as any}\n              currentCredential={credential}\n            />\n          ))}\n        <Section flexDirection=\"row\" justifyContent=\"between\" height=\"fit\">\n          <Button\n            disabled={isSubmitting}\n            prominence=\"secondary\"\n            onClick={onBack}\n          >\n            Back\n          </Button>\n          <Button disabled={isSubmitting} type=\"button\" onClick={handleSubmit}>\n            {isSubmitting ? \"Creating...\" : \"Create Connector\"}\n          </Button>\n        </Section>\n      </CardSection>\n    </Form>\n  );\n}\n\nfunction getUserIdentifier(email?: string): string {\n  if (!email) return \"\";\n  const prefix = email.split(\"@\")[0] || email;\n  return `-${prefix.replace(/[^a-zA-Z0-9]/g, \"-\")}`;\n}\n\nexport default function ConnectorConfigStep({\n  connectorType,\n  credential,\n  onSuccess,\n  onBack,\n}: ConnectorConfigStepProps) {\n  const { user } = useUser();\n  const baseInitialValues = createConnectorInitialValues(connectorType as any);\n  const userIdentifier = getUserIdentifier(user?.email);\n  const initialValues: Record<string, any> = {\n    ...baseInitialValues,\n    connector_name: `build-mode-${connectorType}${userIdentifier}`,\n  };\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      onSubmit={() => {}}\n      enableReinitialize\n    >\n      <ConnectorConfigForm\n        connectorType={connectorType}\n        credential={credential}\n        onSuccess={onSuccess}\n        onBack={onBack}\n      />\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/CreateCredentialInline.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { TextFormField } from \"@/components/Field\";\nimport { ValidSources } from \"@/lib/types\";\nimport {\n  Credential,\n  credentialTemplates,\n  getDisplayNameForCredentialKey,\n} from \"@/lib/connectors/credentials\";\nimport { createCredential } from \"@/lib/credential\";\nimport { getSourceMetadata } from \"@/lib/sources\";\n\ninterface CreateCredentialInlineProps {\n  connectorType: ValidSources;\n  onSuccess: (credential: Credential<any>) => void;\n  onCancel: () => void;\n}\n\nexport default function CreateCredentialInline({\n  connectorType,\n  onSuccess,\n  onCancel,\n}: CreateCredentialInlineProps) {\n  const [error, setError] = useState<string | null>(null);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  const sourceMetadata = getSourceMetadata(connectorType);\n  const credentialTemplate = credentialTemplates[connectorType];\n\n  if (!credentialTemplate) {\n    return (\n      <Section gap={0.5} alignItems=\"center\" height=\"fit\">\n        <Text secondaryBody text03>\n          No credential configuration available for {sourceMetadata.displayName}\n          .\n        </Text>\n        <Button variant=\"action\" prominence=\"secondary\" onClick={onCancel}>\n          Cancel\n        </Button>\n      </Section>\n    );\n  }\n\n  // Build initial values and validation schema from template\n  const initialValues: Record<string, string> = {};\n  const schemaFields: Record<string, Yup.StringSchema> = {};\n\n  // Filter out metadata fields and build form config\n  Object.entries(credentialTemplate).forEach(([key, value]) => {\n    if (key === \"authentication_method\" || key === \"authMethods\") {\n      return;\n    }\n    initialValues[key] = typeof value === \"string\" ? value : \"\";\n    schemaFields[key] = Yup.string().required(\n      `${getDisplayNameForCredentialKey(key)} is required`\n    );\n  });\n\n  // Add credential name field\n  initialValues[\"credential_name\"] = \"\";\n\n  const validationSchema = Yup.object().shape(schemaFields);\n\n  const handleSubmit = async (values: Record<string, string>) => {\n    setIsSubmitting(true);\n    setError(null);\n\n    try {\n      // Extract credential name and build credential_json\n      const { credential_name, ...credentialFields } = values;\n\n      const response = await createCredential({\n        credential_json: credentialFields,\n        admin_public: false,\n        source: connectorType,\n        name: credential_name || `${sourceMetadata.displayName} Credential`,\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json();\n        throw new Error(errorData.detail || \"Failed to create credential\");\n      }\n\n      const credential = await response.json();\n      onSuccess(credential);\n    } catch (err) {\n      setError(\n        err instanceof Error ? err.message : \"Failed to create credential\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      onSubmit={handleSubmit}\n    >\n      {({ isValid, dirty }) => (\n        <Form>\n          <Section gap={1} alignItems=\"stretch\" height=\"fit\">\n            <TextFormField\n              name=\"credential_name\"\n              label=\"Credential Name\"\n              placeholder={`My ${sourceMetadata.displayName} Credential`}\n              type=\"text\"\n            />\n\n            {Object.entries(credentialTemplate).map(([key, value]) => {\n              // Skip metadata fields\n              if (key === \"authentication_method\" || key === \"authMethods\") {\n                return null;\n              }\n\n              const isSecret =\n                key.toLowerCase().includes(\"token\") ||\n                key.toLowerCase().includes(\"password\") ||\n                key.toLowerCase().includes(\"secret\") ||\n                key.toLowerCase().includes(\"key\");\n\n              return (\n                <TextFormField\n                  key={key}\n                  name={key}\n                  label={getDisplayNameForCredentialKey(key)}\n                  placeholder={typeof value === \"string\" ? value : \"\"}\n                  type={isSecret ? \"password\" : \"text\"}\n                />\n              );\n            })}\n\n            {error && (\n              <Text secondaryBody className=\"text-status-error-05\">\n                {error}\n              </Text>\n            )}\n\n            <Section\n              flexDirection=\"row\"\n              justifyContent=\"end\"\n              gap={0.5}\n              height=\"fit\"\n            >\n              <Button\n                disabled={isSubmitting}\n                variant=\"action\"\n                prominence=\"secondary\"\n                onClick={onCancel}\n              >\n                Cancel\n              </Button>\n              <Button\n                disabled={!isValid || !dirty || isSubmitting}\n                variant=\"action\"\n                type=\"submit\"\n              >\n                {isSubmitting ? \"Creating...\" : \"Create Credential\"}\n              </Button>\n            </Section>\n          </Section>\n        </Form>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/CredentialStep.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Button } from \"@opal/components\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { SvgKey } from \"@opal/icons\";\nimport {\n  ConfigurableSources,\n  ValidSources,\n  oauthSupportedSources,\n} from \"@/lib/types\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\nimport {\n  useOAuthDetails,\n  getConnectorOauthRedirectUrl,\n} from \"@/lib/connectors/oauth\";\nimport { deleteCredential } from \"@/lib/credential\";\nimport ModifyCredential from \"@/components/credentials/actions/ModifyCredential\";\nimport CreateCredential from \"@/components/credentials/actions/CreateCredential\";\nimport { CreateStdOAuthCredential } from \"@/components/credentials/actions/CreateStdOAuthCredential\";\nimport { GmailMain } from \"@/app/admin/connectors/[connector]/pages/gmail/GmailPage\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { Spinner } from \"@/components/Spinner\";\nimport {\n  NEXT_PUBLIC_CLOUD_ENABLED,\n  NEXT_PUBLIC_TEST_ENV,\n} from \"@/lib/constants\";\nimport {\n  CRAFT_CONFIGURE_PATH,\n  CRAFT_OAUTH_COOKIE_NAME,\n} from \"@/app/craft/v1/constants\";\nimport Cookies from \"js-cookie\";\nimport { toast } from \"@/hooks/useToast\";\nimport { createBuildConnector } from \"@/app/craft/v1/configure/utils/createBuildConnector\";\nimport { useUser } from \"@/providers/UserProvider\";\n\ninterface CredentialStepProps {\n  connectorType: ValidSources;\n  credentials: Credential<any>[];\n  selectedCredential: Credential<any> | null;\n  onSelectCredential: (cred: Credential<any>) => void;\n  onCredentialCreated: (cred: Credential<any>) => void;\n  onCredentialDeleted: (credId: number) => void;\n  onContinue: () => void;\n  onOAuthRedirect: () => void;\n  refresh?: () => void;\n  isSingleStep?: boolean;\n  onConnectorSuccess?: () => void;\n}\n\nexport default function CredentialStep({\n  connectorType,\n  credentials,\n  selectedCredential,\n  onSelectCredential,\n  onCredentialCreated,\n  onCredentialDeleted,\n  onContinue,\n  onOAuthRedirect,\n  refresh = () => {},\n  isSingleStep = false,\n  onConnectorSuccess,\n}: CredentialStepProps) {\n  const [createCredentialFormToggle, setCreateCredentialFormToggle] =\n    useState(false);\n  const [isAuthorizing, setIsAuthorizing] = useState(false);\n  const [isConnecting, setIsConnecting] = useState(false);\n  const { user } = useUser();\n\n  const { data: oauthDetails, isLoading: oauthDetailsLoading } =\n    useOAuthDetails(connectorType);\n\n  const isAuthorizeVisible =\n    oauthDetails?.oauth_enabled !== true ||\n    (oauthDetails?.additional_kwargs?.length ?? 0) === 0;\n\n  const handleAuthorize = async () => {\n    setIsAuthorizing(true);\n    onOAuthRedirect();\n\n    const redirectUrl = await getConnectorOauthRedirectUrl(connectorType, {\n      desired_return_url: `${window.location.origin}${CRAFT_CONFIGURE_PATH}`,\n    });\n    if (redirectUrl) {\n      window.location.href = redirectUrl;\n    } else {\n      setIsAuthorizing(false);\n      console.error(\"Failed to get OAuth redirect URL\");\n    }\n  };\n\n  const handleConnect = async () => {\n    if (!selectedCredential || !isSingleStep) return;\n\n    setIsConnecting(true);\n\n    try {\n      const result = await createBuildConnector({\n        connectorType,\n        credential: selectedCredential,\n        userEmail: user?.email,\n      });\n\n      if (!result.success) {\n        throw new Error(result.error);\n      }\n\n      onConnectorSuccess?.();\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to create connector\"\n      );\n    } finally {\n      setIsConnecting(false);\n    }\n  };\n\n  const handleDeleteCredential = async (credential: Credential<any>) => {\n    try {\n      const response = await deleteCredential(credential.id);\n      if (response.ok) {\n        onCredentialDeleted(credential.id);\n      } else {\n        console.error(\"Failed to delete credential\");\n      }\n    } catch (error) {\n      console.error(\"Error deleting credential:\", error);\n    }\n  };\n\n  const handleSwap = (newCredential: Credential<any>) => {\n    onSelectCredential(newCredential);\n  };\n\n  const hasCredentials = credentials.length > 0;\n\n  return (\n    <Section flexDirection=\"column\" alignItems=\"center\" height=\"fit\">\n      <CardSection>\n        {connectorType === ValidSources.Gmail ? (\n          <GmailMain\n            buildMode\n            onOAuthRedirect={onOAuthRedirect}\n            onCredentialCreated={async (credential) => {\n              onSelectCredential(credential);\n              // For single-step connectors (like Gmail), create connector immediately\n              // For multi-step connectors, continue to config step\n              if (isSingleStep && onConnectorSuccess) {\n                // Create connector immediately for single-step flow\n                setIsConnecting(true);\n                try {\n                  const result = await createBuildConnector({\n                    connectorType,\n                    credential: credential,\n                    userEmail: user?.email,\n                  });\n\n                  if (!result.success) {\n                    throw new Error(result.error);\n                  }\n\n                  onConnectorSuccess();\n                } catch (err) {\n                  toast.error(\n                    err instanceof Error\n                      ? err.message\n                      : \"Failed to create connector\"\n                  );\n                } finally {\n                  setIsConnecting(false);\n                }\n              } else {\n                onContinue();\n              }\n            }}\n          />\n        ) : (\n          <>\n            <ModifyCredential\n              showIfEmpty\n              accessType=\"public\"\n              defaultedCredential={selectedCredential!}\n              credentials={credentials}\n              editableCredentials={credentials}\n              onDeleteCredential={handleDeleteCredential}\n              onSwitch={handleSwap}\n            />\n            {!createCredentialFormToggle && (\n              <div className=\"mt-6 flex gap-4 justify-between items-center\">\n                <div className=\"flex gap-4\">\n                  <Button\n                    onClick={async () => {\n                      if (oauthDetails && oauthDetails.oauth_enabled) {\n                        if (oauthDetails.additional_kwargs.length > 0) {\n                          setCreateCredentialFormToggle(true);\n                        } else {\n                          const redirectUrl =\n                            await getConnectorOauthRedirectUrl(connectorType, {\n                              desired_return_url: `${window.location.origin}${CRAFT_CONFIGURE_PATH}`,\n                            });\n                          if (redirectUrl) {\n                            onOAuthRedirect();\n                            window.location.href = redirectUrl;\n                          } else {\n                            setCreateCredentialFormToggle(\n                              (createConnectorToggle) => !createConnectorToggle\n                            );\n                          }\n                        }\n                      } else {\n                        if (connectorType === ValidSources.GoogleDrive) {\n                          Cookies.set(CRAFT_OAUTH_COOKIE_NAME, \"true\", {\n                            path: \"/\",\n                          });\n                          onOAuthRedirect();\n                        }\n                        setCreateCredentialFormToggle(\n                          (createConnectorToggle) => !createConnectorToggle\n                        );\n                      }\n                    }}\n                  >\n                    Create New\n                  </Button>\n                  {oauthSupportedSources.includes(\n                    connectorType as ConfigurableSources\n                  ) &&\n                    (NEXT_PUBLIC_CLOUD_ENABLED || NEXT_PUBLIC_TEST_ENV) && (\n                      <Button\n                        disabled={isAuthorizing}\n                        variant=\"action\"\n                        onClick={handleAuthorize}\n                        hidden={!isAuthorizeVisible}\n                      >\n                        {isAuthorizing\n                          ? \"Authorizing...\"\n                          : `Authorize with ${getSourceDisplayName(\n                              connectorType\n                            )}`}\n                      </Button>\n                    )}\n                </div>\n                {hasCredentials && (\n                  <Button\n                    disabled={!selectedCredential || isConnecting}\n                    onClick={isSingleStep ? handleConnect : onContinue}\n                  >\n                    {isSingleStep\n                      ? isConnecting\n                        ? \"Connecting...\"\n                        : \"Connect\"\n                      : \"Continue\"}\n                  </Button>\n                )}\n              </div>\n            )}\n\n            {createCredentialFormToggle && (\n              <Modal\n                open\n                onOpenChange={() => setCreateCredentialFormToggle(false)}\n              >\n                <Modal.Content width=\"xl\" height=\"fit\">\n                  <Modal.Header\n                    icon={SvgKey}\n                    title={`Create a ${getSourceDisplayName(\n                      connectorType\n                    )} credential`}\n                    onClose={() => setCreateCredentialFormToggle(false)}\n                  />\n                  <Modal.Body>\n                    {oauthDetailsLoading ? (\n                      <Spinner />\n                    ) : (\n                      <>\n                        {oauthDetails && oauthDetails.oauth_enabled ? (\n                          <CreateStdOAuthCredential\n                            sourceType={connectorType}\n                            additionalFields={oauthDetails.additional_kwargs}\n                          />\n                        ) : (\n                          <CreateCredential\n                            close\n                            refresh={refresh}\n                            sourceType={connectorType}\n                            accessType=\"public\"\n                            onSwitch={async (cred) => {\n                              onCredentialCreated(cred);\n                              setCreateCredentialFormToggle(false);\n                            }}\n                            onClose={() => setCreateCredentialFormToggle(false)}\n                          />\n                        )}\n                      </>\n                    )}\n                  </Modal.Body>\n                </Modal.Content>\n              </Modal>\n            )}\n          </>\n        )}\n      </CardSection>\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/DemoDataConfirmModal.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface DemoDataConfirmModalProps {\n  open: boolean;\n  onClose: () => void;\n  pendingDemoDataEnabled: boolean | null;\n  onConfirm: () => void;\n}\n\nexport default function DemoDataConfirmModal({\n  open,\n  onClose,\n  pendingDemoDataEnabled,\n  onConfirm,\n}: DemoDataConfirmModalProps) {\n  if (!open) return null;\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      {/* Backdrop */}\n      <div\n        className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\"\n        onClick={onClose}\n      />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6\">\n          {/* Header */}\n          <div className=\"flex items-center justify-center\">\n            <Text headingH2 text05>\n              Confirm Demo Data Change\n            </Text>\n          </div>\n\n          {/* Message */}\n          <div className=\"flex justify-center\">\n            <Text mainUiBody text04 className=\"text-center\">\n              Are you sure you want to{\" \"}\n              {pendingDemoDataEnabled ? \"enable\" : \"disable\"} demo data?\n              <br />\n              Your sandbox will be re-initialized with your new data set\n            </Text>\n          </div>\n\n          {/* Action buttons */}\n          <div className=\"flex items-center justify-center gap-3\">\n            <button\n              type=\"button\"\n              onClick={onClose}\n              className=\"px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors\"\n            >\n              <Text mainUiBody text05>\n                Cancel\n              </Text>\n            </button>\n            <button\n              type=\"button\"\n              onClick={onConfirm}\n              className=\"px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors\"\n            >\n              <Text\n                mainUiAction\n                className=\"text-text-light-05 dark:text-text-dark-05\"\n              >\n                Confirm\n              </Text>\n            </button>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/RequestConnectorModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\n\ninterface RequestConnectorModalProps {\n  open: boolean;\n  onClose: () => void;\n}\n\nexport default function RequestConnectorModal({\n  open,\n  onClose,\n}: RequestConnectorModalProps) {\n  const [connectorName, setConnectorName] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [errorMessage, setErrorMessage] = useState<string | null>(null);\n  const [successMessage, setSuccessMessage] = useState<string | null>(null);\n\n  const handleClose = () => {\n    setConnectorName(\"\");\n    setErrorMessage(null);\n    setSuccessMessage(null);\n    onClose();\n  };\n\n  const handleSubmit = async (e?: React.FormEvent) => {\n    e?.preventDefault();\n    if (!connectorName.trim()) return;\n\n    setIsSubmitting(true);\n    setErrorMessage(null);\n    setSuccessMessage(null);\n\n    try {\n      const response = await fetch(\"/api/manage/connector-request\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          connector_name: connectorName.trim(),\n        }),\n      });\n\n      const data = await response.json();\n\n      if (!response.ok) {\n        throw new Error(data.detail || \"Failed to submit connector request\");\n      }\n\n      setSuccessMessage(\n        data.message ||\n          \"Connector request submitted successfully. We'll prioritize popular requests!\"\n      );\n\n      setTimeout(() => {\n        handleClose();\n      }, 2000);\n    } catch (error) {\n      console.error(\"Failed to submit connector request:\", error);\n      setErrorMessage(\n        error instanceof Error\n          ? error.message\n          : \"Failed to submit connector request. Please try again.\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  if (!open) return null;\n\n  const isCloud = NEXT_PUBLIC_CLOUD_ENABLED;\n  const DISCORD_URL = \"https://discord.gg/4NA5SbzrWb\";\n\n  return (\n    <div className=\"fixed inset-0 z-50 flex items-center justify-center\">\n      {/* Backdrop */}\n      <div\n        className=\"absolute inset-0 bg-black/50 backdrop-blur-sm\"\n        onClick={handleClose}\n      />\n\n      {/* Modal */}\n      <div className=\"relative z-10 w-full max-w-xl mx-4 bg-background-tint-01 rounded-16 shadow-lg border border-border-01\">\n        <div className=\"p-6 flex flex-col gap-6\">\n          <div className=\"flex items-center justify-center\">\n            <Text headingH2 text05>\n              Request a Connector\n            </Text>\n          </div>\n\n          <div className=\"flex flex-col gap-3\">\n            <Text mainUiBody text04 className=\"text-center\">\n              Let us know which connectors you'd like to craft with\n              <br />\n              We'll prioritize popular requests!\n            </Text>\n\n            {successMessage && (\n              <div className=\"px-4 py-3 rounded-12 bg-status-success-00 border border-status-success-02\">\n                <Text mainUiBody text05 className=\"text-status-success-05\">\n                  {successMessage}\n                </Text>\n              </div>\n            )}\n\n            {errorMessage && (\n              <div className=\"px-4 py-3 rounded-12 bg-status-error-00 border border-status-error-02\">\n                <Text mainUiBody text05 className=\"text-status-error-05\">\n                  {errorMessage}\n                </Text>\n              </div>\n            )}\n\n            {isCloud ? (\n              // Cloud: Show form with text input\n              <>\n                <form\n                  onSubmit={handleSubmit}\n                  className=\"flex flex-col gap-4 items-center\"\n                >\n                  <input\n                    id=\"connector-name\"\n                    type=\"text\"\n                    value={connectorName}\n                    onChange={(e) => {\n                      setConnectorName(e.target.value);\n                      if (errorMessage) setErrorMessage(null);\n                    }}\n                    placeholder=\"e.g., ServiceNow, Workday, etc.\"\n                    className=\"px-4 py-2 rounded-12 bg-background-tint-00 border border-border-01 text-text-05 placeholder:text-text-02 focus:outline-none focus:ring-2 focus:ring-border-01 text-center max-w-md w-full\"\n                    disabled={isSubmitting || !!successMessage}\n                  />\n                </form>\n\n                <div className=\"flex items-center justify-center gap-3 pt-2 max-w-md w-full mx-auto\">\n                  <button\n                    type=\"button\"\n                    onClick={handleClose}\n                    disabled={isSubmitting}\n                    className=\"flex-1 px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed\"\n                  >\n                    <Text mainUiBody text05>\n                      {successMessage ? \"Close\" : \"Cancel\"}\n                    </Text>\n                  </button>\n                  {!successMessage && (\n                    <button\n                      type=\"button\"\n                      onClick={handleSubmit}\n                      disabled={!connectorName.trim() || isSubmitting}\n                      className={cn(\n                        \"flex-1 px-4 py-2 rounded-12 transition-colors\",\n                        !connectorName.trim() || isSubmitting\n                          ? \"bg-background-neutral-01 text-text-02 cursor-not-allowed\"\n                          : \"bg-black dark:bg-white hover:opacity-90\"\n                      )}\n                    >\n                      <Text\n                        mainUiAction\n                        className={\n                          !connectorName.trim() || isSubmitting\n                            ? \"text-text-02\"\n                            : \"text-text-light-05 dark:text-text-dark-05\"\n                        }\n                      >\n                        {isSubmitting ? \"Submitting...\" : \"Submit Request\"}\n                      </Text>\n                    </button>\n                  )}\n                </div>\n              </>\n            ) : (\n              // Self-hosted: Show email link and Discord button\n              <>\n                <div className=\"flex flex-col gap-4 items-center\">\n                  <Text mainUiBody text04 className=\"text-center\">\n                    Email your request to{\" \"}\n                    <a\n                      href=\"mailto:hello@onyx.app?subject=Onyx Craft Connector Request\"\n                      className=\"text-blue-600 dark:text-blue-400 hover:underline\"\n                    >\n                      hello@onyx.app\n                    </a>\n                  </Text>\n                </div>\n\n                <div className=\"flex items-center justify-center gap-3 pt-2 max-w-md w-full mx-auto\">\n                  <button\n                    type=\"button\"\n                    onClick={handleClose}\n                    className=\"flex-1 px-4 py-2 rounded-12 bg-background-neutral-01 border border-border-02 hover:opacity-90 transition-colors\"\n                  >\n                    <Text mainUiBody text05>\n                      Close\n                    </Text>\n                  </button>\n                  <a\n                    href={DISCORD_URL}\n                    target=\"_blank\"\n                    rel=\"noopener noreferrer\"\n                    className=\"flex-1 px-4 py-2 rounded-12 bg-black dark:bg-white hover:opacity-90 transition-colors text-center\"\n                  >\n                    <Text\n                      mainUiAction\n                      className=\"text-text-light-05 dark:text-text-dark-05\"\n                    >\n                      Join Onyx Discord\n                    </Text>\n                  </a>\n                </div>\n              </>\n            )}\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/components/UserLibraryModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useCallback, useRef, useMemo } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  fetchLibraryTree,\n  uploadLibraryFiles,\n  uploadLibraryZip,\n  createLibraryDirectory,\n  toggleLibraryFileSync,\n  deleteLibraryFile,\n} from \"@/app/craft/services/apiServices\";\nimport { LibraryEntry } from \"@/app/craft/types/user-library\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Modal from \"@/refresh-components/Modal\";\nimport ShadowDiv from \"@/refresh-components/ShadowDiv\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport {\n  SvgFolder,\n  SvgFolderOpen,\n  SvgChevronRight,\n  SvgUploadCloud,\n  SvgTrash,\n  SvgFileText,\n  SvgFolderPlus,\n} from \"@opal/icons\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\n\n/**\n * Build a hierarchical tree from a flat list of library entries.\n * Entries have paths like \"user_library/test\" or \"user_library/test/file.pdf\"\n */\nfunction buildTreeFromFlatList(flatList: LibraryEntry[]): LibraryEntry[] {\n  // Create a map of path -> entry (with children array initialized)\n  const pathToEntry = new Map<string, LibraryEntry>();\n\n  // First pass: create entries with empty children arrays\n  for (const entry of flatList) {\n    pathToEntry.set(entry.path, { ...entry, children: [] });\n  }\n\n  // Second pass: build parent-child relationships\n  const rootEntries: LibraryEntry[] = [];\n\n  for (const entry of flatList) {\n    const entryWithChildren = pathToEntry.get(entry.path)!;\n\n    // Find parent path by removing the last segment\n    const pathParts = entry.path.split(\"/\");\n    pathParts.pop(); // Remove last segment (filename or folder name)\n    const parentPath = pathParts.join(\"/\");\n\n    const parent = pathToEntry.get(parentPath);\n    if (parent && parent.children) {\n      parent.children.push(entryWithChildren);\n    } else {\n      // No parent found, this is a root-level entry\n      rootEntries.push(entryWithChildren);\n    }\n  }\n\n  return rootEntries;\n}\n\ninterface UserLibraryModalProps {\n  open: boolean;\n  onClose: () => void;\n  onChanges?: () => void; // Called when files are uploaded, deleted, or sync toggled\n}\n\nexport default function UserLibraryModal({\n  open,\n  onClose,\n  onChanges,\n}: UserLibraryModalProps) {\n  const [expandedPaths, setExpandedPaths] = useState<Set<string>>(new Set());\n  const [isUploading, setIsUploading] = useState(false);\n  const [uploadError, setUploadError] = useState<string | null>(null);\n  const [entryToDelete, setEntryToDelete] = useState<LibraryEntry | null>(null);\n  const [showNewFolderModal, setShowNewFolderModal] = useState(false);\n  const [newFolderName, setNewFolderName] = useState(\"\");\n  const fileInputRef = useRef<HTMLInputElement>(null);\n  const uploadTargetPathRef = useRef<string>(\"/\");\n\n  // Fetch library tree\n  const {\n    data: tree,\n    error,\n    isLoading,\n    mutate,\n  } = useSWR(open ? SWR_KEYS.buildUserLibraryTree : null, fetchLibraryTree, {\n    revalidateOnFocus: false,\n  });\n\n  // Build hierarchical tree from flat list\n  const hierarchicalTree = useMemo(() => {\n    if (!tree) return [];\n    return buildTreeFromFlatList(tree);\n  }, [tree]);\n\n  const toggleFolder = useCallback((path: string) => {\n    setExpandedPaths((prev) => {\n      const newSet = new Set(prev);\n      if (newSet.has(path)) {\n        newSet.delete(path);\n      } else {\n        newSet.add(path);\n      }\n      return newSet;\n    });\n  }, []);\n\n  const handleFileUpload = useCallback(\n    async (event: React.ChangeEvent<HTMLInputElement>) => {\n      const files = event.target.files;\n      if (!files || files.length === 0) return;\n\n      setIsUploading(true);\n      setUploadError(null);\n\n      const targetPath = uploadTargetPathRef.current;\n\n      try {\n        const fileArray = Array.from(files);\n        // Check if it's a single zip file\n        const firstFile = fileArray[0];\n        if (\n          fileArray.length === 1 &&\n          firstFile &&\n          firstFile.name.endsWith(\".zip\")\n        ) {\n          await uploadLibraryZip(targetPath, firstFile);\n        } else {\n          await uploadLibraryFiles(targetPath, fileArray);\n        }\n        mutate();\n        onChanges?.(); // Notify parent that changes were made\n      } catch (err) {\n        setUploadError(err instanceof Error ? err.message : \"Upload failed\");\n      } finally {\n        setIsUploading(false);\n        uploadTargetPathRef.current = \"/\";\n        // Reset input\n        event.target.value = \"\";\n      }\n    },\n    [mutate, onChanges]\n  );\n\n  const handleUploadToFolder = useCallback((folderPath: string) => {\n    uploadTargetPathRef.current = folderPath;\n    fileInputRef.current?.click();\n  }, []);\n\n  const handleToggleSync = useCallback(\n    async (entry: LibraryEntry, enabled: boolean) => {\n      try {\n        await toggleLibraryFileSync(entry.id, enabled);\n        mutate();\n        onChanges?.(); // Notify parent that changes were made\n      } catch (err) {\n        console.error(\"Failed to toggle sync:\", err);\n      }\n    },\n    [mutate, onChanges]\n  );\n\n  const handleDeleteConfirm = useCallback(async () => {\n    if (!entryToDelete) return;\n\n    try {\n      await deleteLibraryFile(entryToDelete.id);\n      mutate();\n      onChanges?.(); // Notify parent that changes were made\n    } catch (err) {\n      console.error(\"Failed to delete:\", err);\n    } finally {\n      setEntryToDelete(null);\n    }\n  }, [entryToDelete, mutate, onChanges]);\n\n  const handleCreateDirectory = useCallback(async () => {\n    const name = newFolderName.trim();\n    if (!name) return;\n\n    try {\n      await createLibraryDirectory({ name, parent_path: \"/\" });\n      mutate();\n    } catch (err) {\n      console.error(\"Failed to create directory:\", err);\n      setUploadError(\n        err instanceof Error ? err.message : \"Failed to create folder\"\n      );\n    } finally {\n      setShowNewFolderModal(false);\n      setNewFolderName(\"\");\n    }\n  }, [mutate, newFolderName]);\n\n  const formatFileSize = (bytes: number | null): string => {\n    if (bytes === null) return \"\";\n    if (bytes < 1024) return `${bytes} B`;\n    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;\n    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;\n  };\n\n  const fileCount = hierarchicalTree.length;\n\n  return (\n    <>\n      <Modal open={open} onOpenChange={(isOpen) => !isOpen && onClose()}>\n        <Modal.Content width=\"xl\" height=\"fit\">\n          <Modal.Header\n            icon={SvgFileText}\n            title=\"Your Files\"\n            description=\"Upload files for your agent to read (Excel, Word, PowerPoint, etc.)\"\n            onClose={onClose}\n          />\n          <Modal.Body>\n            <Section flexDirection=\"column\" gap={1} alignItems=\"stretch\">\n              {/* Upload error */}\n              {uploadError && (\n                <Section\n                  flexDirection=\"row\"\n                  alignItems=\"center\"\n                  justifyContent=\"start\"\n                  padding={0.5}\n                  height=\"fit\"\n                >\n                  <Text secondaryBody>{uploadError}</Text>\n                </Section>\n              )}\n\n              {/* File explorer */}\n              <Section flexDirection=\"column\" alignItems=\"stretch\">\n                {/* Action buttons */}\n                <Section\n                  flexDirection=\"row\"\n                  justifyContent=\"end\"\n                  gap={0.5}\n                  padding={0.5}\n                >\n                  <Button\n                    prominence=\"secondary\"\n                    icon={SvgFolderPlus}\n                    onClick={() => setShowNewFolderModal(true)}\n                    tooltip=\"New Folder\"\n                  />\n                  <input\n                    ref={fileInputRef}\n                    type=\"file\"\n                    multiple\n                    style={{ display: \"none\" }}\n                    onChange={handleFileUpload}\n                    disabled={isUploading}\n                    accept=\".xlsx,.xls,.docx,.doc,.pptx,.ppt,.csv,.json,.txt,.pdf,.zip\"\n                  />\n                  <Button\n                    disabled={isUploading}\n                    prominence=\"secondary\"\n                    icon={SvgUploadCloud}\n                    onClick={() => handleUploadToFolder(\"/\")}\n                    tooltip={isUploading ? \"Uploading...\" : \"Upload\"}\n                    aria-label={isUploading ? \"Uploading...\" : \"Upload\"}\n                  />\n                </Section>\n\n                {isLoading ? (\n                  <Section padding={2} height=\"fit\">\n                    <Text secondaryBody text03>\n                      Loading files...\n                    </Text>\n                  </Section>\n                ) : error ? (\n                  <Section padding={2} height=\"fit\">\n                    <Text secondaryBody text03>\n                      Failed to load files\n                    </Text>\n                  </Section>\n                ) : fileCount === 0 ? (\n                  <Section padding={2} height=\"fit\" gap={0.5}>\n                    <SvgFileText size={32} className=\"stroke-text-02\" />\n                    <Text secondaryBody text03>\n                      No files uploaded yet\n                    </Text>\n                    <Text secondaryBody text02>\n                      Upload Excel, Word, PowerPoint, or other files for your\n                      agent to work with\n                    </Text>\n                  </Section>\n                ) : (\n                  <ShadowDiv style={{ maxHeight: \"400px\", padding: \"0.5rem\" }}>\n                    <LibraryTreeView\n                      entries={hierarchicalTree}\n                      expandedPaths={expandedPaths}\n                      onToggleFolder={toggleFolder}\n                      onToggleSync={handleToggleSync}\n                      onDelete={setEntryToDelete}\n                      onUploadToFolder={handleUploadToFolder}\n                      formatFileSize={formatFileSize}\n                    />\n                  </ShadowDiv>\n                )}\n              </Section>\n            </Section>\n          </Modal.Body>\n\n          <Modal.Footer>\n            <Button onClick={onClose}>Done</Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n\n      {/* Delete confirmation modal */}\n      {entryToDelete && (\n        <ConfirmEntityModal\n          danger\n          entityType={entryToDelete.is_directory ? \"folder\" : \"file\"}\n          entityName={entryToDelete.name}\n          action=\"delete\"\n          actionButtonText=\"Delete\"\n          additionalDetails={\n            entryToDelete.is_directory\n              ? \"This will delete the folder and all its contents.\"\n              : \"This file will be removed from your library.\"\n          }\n          onClose={() => setEntryToDelete(null)}\n          onSubmit={handleDeleteConfirm}\n        />\n      )}\n\n      {/* New folder modal */}\n      <Modal\n        open={showNewFolderModal}\n        onOpenChange={(isOpen) => {\n          if (!isOpen) {\n            setShowNewFolderModal(false);\n            setNewFolderName(\"\");\n          }\n        }}\n      >\n        <Modal.Content width=\"sm\" height=\"fit\">\n          <Modal.Header\n            icon={SvgFolder}\n            title=\"New Folder\"\n            onClose={() => {\n              setShowNewFolderModal(false);\n              setNewFolderName(\"\");\n            }}\n          />\n          <Modal.Body>\n            <Section flexDirection=\"column\" gap={0.5} alignItems=\"stretch\">\n              <Text secondaryBody text03>\n                Folder name\n              </Text>\n              <InputTypeIn\n                value={newFolderName}\n                onChange={(e) => setNewFolderName(e.target.value)}\n                placeholder=\"Enter folder name\"\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\" && newFolderName.trim()) {\n                    handleCreateDirectory();\n                  }\n                }}\n                autoFocus\n              />\n            </Section>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button\n              prominence=\"secondary\"\n              onClick={() => {\n                setShowNewFolderModal(false);\n                setNewFolderName(\"\");\n              }}\n            >\n              Cancel\n            </Button>\n            <Button\n              disabled={!newFolderName.trim()}\n              onClick={handleCreateDirectory}\n            >\n              Create\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n\ninterface LibraryTreeViewProps {\n  entries: LibraryEntry[];\n  expandedPaths: Set<string>;\n  onToggleFolder: (path: string) => void;\n  onToggleSync: (entry: LibraryEntry, enabled: boolean) => void;\n  onDelete: (entry: LibraryEntry) => void;\n  onUploadToFolder: (folderPath: string) => void;\n  formatFileSize: (bytes: number | null) => string;\n  depth?: number;\n}\n\nfunction LibraryTreeView({\n  entries,\n  expandedPaths,\n  onToggleFolder,\n  onToggleSync,\n  onDelete,\n  onUploadToFolder,\n  formatFileSize,\n  depth = 0,\n}: LibraryTreeViewProps) {\n  // Sort entries: directories first, then alphabetically\n  const sortedEntries = [...entries].sort((a, b) => {\n    if (a.is_directory && !b.is_directory) return -1;\n    if (!a.is_directory && b.is_directory) return 1;\n    return a.name.localeCompare(b.name);\n  });\n\n  return (\n    <>\n      {sortedEntries.map((entry) => {\n        const isExpanded = expandedPaths.has(entry.path);\n\n        return (\n          <Section\n            key={entry.id}\n            flexDirection=\"column\"\n            alignItems=\"stretch\"\n            gap={0}\n            height=\"fit\"\n          >\n            <Section\n              flexDirection=\"row\"\n              alignItems=\"center\"\n              justifyContent=\"start\"\n              gap={0.25}\n              height=\"fit\"\n              padding={0.5}\n            >\n              {/* Indent spacer - inline style needed for dynamic depth */}\n              {depth > 0 && (\n                <span\n                  aria-hidden\n                  style={{\n                    display: \"inline-block\",\n                    width: `${depth * 1.25}rem`,\n                    flexShrink: 0,\n                  }}\n                />\n              )}\n\n              {/* Expand/collapse for directories */}\n              {entry.is_directory ? (\n                // TODO(@raunakab): migrate to opal Button once it supports style prop\n                <IconButton\n                  icon={SvgChevronRight}\n                  onClick={() => onToggleFolder(entry.path)}\n                  small\n                  tooltip={isExpanded ? \"Collapse\" : \"Expand\"}\n                  style={{\n                    transform: isExpanded ? \"rotate(90deg)\" : undefined,\n                    transition: \"transform 150ms ease\",\n                  }}\n                />\n              ) : (\n                <Section width=\"fit\" height=\"fit\" gap={0} padding={0}>\n                  <SvgChevronRight size={12} style={{ visibility: \"hidden\" }} />\n                </Section>\n              )}\n\n              {/* Icon */}\n              {entry.is_directory ? (\n                isExpanded ? (\n                  <SvgFolderOpen size={16} className=\"stroke-text-03\" />\n                ) : (\n                  <SvgFolder size={16} className=\"stroke-text-03\" />\n                )\n              ) : (\n                <SvgFileText size={16} className=\"stroke-text-03\" />\n              )}\n\n              {/* Name */}\n              <Section\n                flexDirection=\"row\"\n                alignItems=\"center\"\n                justifyContent=\"start\"\n                gap={0}\n                height=\"fit\"\n              >\n                <Text secondaryBody text04 className=\"truncate\">\n                  {entry.name}\n                </Text>\n              </Section>\n\n              {/* File size */}\n              {!entry.is_directory && entry.file_size !== null && (\n                <Section width=\"fit\" height=\"fit\" gap={0} padding={0}>\n                  <Text secondaryBody text02 style={{ whiteSpace: \"nowrap\" }}>\n                    {formatFileSize(entry.file_size)}\n                  </Text>\n                </Section>\n              )}\n\n              {/* Actions */}\n              <Section\n                flexDirection=\"row\"\n                alignItems=\"center\"\n                justifyContent=\"end\"\n                gap={0.25}\n                width=\"fit\"\n                height=\"fit\"\n              >\n                {entry.is_directory && (\n                  <Button\n                    size=\"sm\"\n                    icon={SvgUploadCloud}\n                    onClick={(e) => {\n                      e.stopPropagation();\n                      const uploadPath =\n                        entry.path.replace(/^user_library/, \"\") || \"/\";\n                      onUploadToFolder(uploadPath);\n                    }}\n                    tooltip=\"Upload to this folder\"\n                  />\n                )}\n                <Button\n                  variant=\"danger\"\n                  size=\"sm\"\n                  icon={SvgTrash}\n                  onClick={() => onDelete(entry)}\n                  tooltip=\"Delete\"\n                />\n              </Section>\n\n              {/* Sync toggle */}\n              <SimpleTooltip\n                tooltip={\n                  entry.sync_enabled\n                    ? \"Synced to sandbox - click to disable\"\n                    : \"Not synced - click to enable\"\n                }\n              >\n                <Switch\n                  checked={entry.sync_enabled}\n                  onCheckedChange={(checked) => onToggleSync(entry, checked)}\n                />\n              </SimpleTooltip>\n            </Section>\n\n            {/* Children */}\n            {entry.is_directory && isExpanded && entry.children && (\n              <LibraryTreeView\n                entries={entry.children}\n                expandedPaths={expandedPaths}\n                onToggleFolder={onToggleFolder}\n                onToggleSync={onToggleSync}\n                onDelete={onDelete}\n                onUploadToFolder={onUploadToFolder}\n                formatFileSize={formatFileSize}\n                depth={depth + 1}\n              />\n            )}\n          </Section>\n        );\n      })}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/page.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useCallback, useMemo } from \"react\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport {\n  useBuildSessionStore,\n  useIsPreProvisioning,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport SandboxStatusIndicator from \"@/app/craft/components/SandboxStatusIndicator\";\nimport { useBuildLlmSelection } from \"@/app/craft/hooks/useBuildLlmSelection\";\nimport { useBuildConnectors } from \"@/app/craft/hooks/useBuildConnectors\";\nimport { BuildLLMPopover } from \"@/app/craft/components/BuildLLMPopover\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport {\n  SvgPlug,\n  SvgSettings,\n  SvgChevronDown,\n  SvgInfoSmall,\n} from \"@opal/icons\";\nimport { ValidSources } from \"@/lib/types\";\nimport ConnectorCard, {\n  BuildConnectorConfig,\n} from \"@/app/craft/v1/configure/components/ConnectorCard\";\nimport ConfigureConnectorModal from \"@/app/craft/v1/configure/components/ConfigureConnectorModal\";\nimport ComingSoonConnectors from \"@/app/craft/v1/configure/components/ComingSoonConnectors\";\nimport DemoDataConfirmModal from \"@/app/craft/v1/configure/components/DemoDataConfirmModal\";\nimport UserLibraryModal from \"@/app/craft/v1/configure/components/UserLibraryModal\";\nimport {\n  ConnectorInfoOverlay,\n  ReprovisionWarningOverlay,\n} from \"@/app/craft/v1/configure/components/ConfigureOverlays\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { deleteConnector } from \"@/app/craft/services/apiServices\";\nimport { Button } from \"@opal/components\";\nimport {\n  OAUTH_STATE_KEY,\n  getDemoDataEnabled,\n  setDemoDataCookie,\n} from \"@/app/craft/v1/constants\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport NotAllowedModal from \"@/app/craft/onboarding/components/NotAllowedModal\";\nimport { useOnboarding } from \"@/app/craft/onboarding/BuildOnboardingProvider\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { getProviderIcon } from \"@/app/admin/configuration/llm/utils\";\nimport {\n  getBuildUserPersona,\n  getPersonaInfo,\n  getPositionText,\n  DEMO_COMPANY_NAME,\n  BuildLlmSelection,\n  BUILD_MODE_PROVIDERS,\n} from \"@/app/craft/onboarding/constants\";\n\n// Build mode connectors\nconst BUILD_CONNECTORS: ValidSources[] = [\n  ValidSources.GoogleDrive,\n  ValidSources.Gmail,\n  ValidSources.Notion,\n  ValidSources.GitHub,\n  ValidSources.Slack,\n  ValidSources.Linear,\n  ValidSources.Fireflies,\n  ValidSources.Hubspot,\n  ValidSources.Airtable,\n  ValidSources.CraftFile, // User's uploaded files\n];\n\ninterface SelectedConnectorState {\n  type: ValidSources;\n  config: BuildConnectorConfig | null;\n}\n\n/**\n * Build Admin Panel - Connector configuration page\n *\n * Renders in the center panel area (replacing ChatPanel + OutputPanel).\n * Uses SettingsLayouts like AgentEditorPage does.\n */\nexport default function BuildConfigPage() {\n  const { isAdmin, isCurator } = useUser();\n  const { llmProviders } = useLLMProviders();\n  const { openPersonaEditor, openLlmSetup } = useOnboarding();\n  const [selectedConnector, setSelectedConnector] =\n    useState<SelectedConnectorState | null>(null);\n  const [connectorToDelete, setConnectorToDelete] =\n    useState<BuildConnectorConfig | null>(null);\n  const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);\n  const [showDemoDataConfirmModal, setShowDemoDataConfirmModal] =\n    useState(false);\n  const [showUserLibraryModal, setShowUserLibraryModal] = useState(false);\n  const [pendingDemoDataEnabled, setPendingDemoDataEnabled] = useState<\n    boolean | null\n  >(null);\n\n  // Pending state for tracking unsaved changes\n  const [pendingLlmSelection, setPendingLlmSelection] =\n    useState<BuildLlmSelection | null>(null);\n  const [pendingDemoData, setPendingDemoData] = useState<boolean | null>(null);\n  const [userLibraryChanged, setUserLibraryChanged] = useState(false);\n  const [isUpdating, setIsUpdating] = useState(false);\n\n  // Track original values (set on mount and after Update)\n  const [originalLlmSelection, setOriginalLlmSelection] =\n    useState<BuildLlmSelection | null>(null);\n  const [originalDemoData, setOriginalDemoData] = useState<boolean | null>(\n    null\n  );\n\n  const isBasicUser = !isAdmin && !isCurator;\n  const isPreProvisioning = useIsPreProvisioning();\n\n  // Build mode LLM selection (cookie-based)\n  const { selection: llmSelection, updateSelection: updateLlmSelection } =\n    useBuildLlmSelection(llmProviders);\n\n  // Read demo data from cookie (single source of truth)\n  const [demoDataEnabled, setDemoDataEnabledLocal] = useState(() =>\n    getDemoDataEnabled()\n  );\n\n  // Get store values\n  const clearPreProvisionedSession = useBuildSessionStore(\n    (state) => state.clearPreProvisionedSession\n  );\n  const ensurePreProvisionedSession = useBuildSessionStore(\n    (state) => state.ensurePreProvisionedSession\n  );\n\n  // Initialize pending state from current values on mount\n  useEffect(() => {\n    if (llmSelection && pendingLlmSelection === null) {\n      setPendingLlmSelection(llmSelection);\n      setOriginalLlmSelection(llmSelection);\n    }\n  }, [llmSelection, pendingLlmSelection]);\n\n  useEffect(() => {\n    if (pendingDemoData === null) {\n      setPendingDemoData(demoDataEnabled);\n      setOriginalDemoData(demoDataEnabled);\n    }\n  }, [demoDataEnabled, pendingDemoData]);\n\n  // Compute whether there are unsaved changes\n  const hasChanges = useMemo(() => {\n    const llmChanged =\n      pendingLlmSelection !== null &&\n      originalLlmSelection !== null &&\n      (pendingLlmSelection.provider !== originalLlmSelection.provider ||\n        pendingLlmSelection.modelName !== originalLlmSelection.modelName);\n\n    const demoDataChanged =\n      pendingDemoData !== null &&\n      originalDemoData !== null &&\n      pendingDemoData !== originalDemoData;\n\n    return llmChanged || demoDataChanged || userLibraryChanged;\n  }, [\n    pendingLlmSelection,\n    pendingDemoData,\n    originalLlmSelection,\n    originalDemoData,\n    userLibraryChanged,\n  ]);\n\n  // Compute display name for the pending LLM selection\n  const pendingLlmDisplayName = useMemo(() => {\n    if (!pendingLlmSelection) return \"Select model\";\n\n    // 1. Try to get display name from backend llmProviders\n    if (llmProviders) {\n      for (const provider of llmProviders) {\n        const config = provider.model_configurations.find(\n          (m) => m.name === pendingLlmSelection.modelName\n        );\n        if (config) {\n          return config.display_name || config.name;\n        }\n      }\n    }\n\n    // 2. Fall back to BUILD_MODE_PROVIDERS labels (for unconfigured providers)\n    for (const provider of BUILD_MODE_PROVIDERS) {\n      const model = provider.models.find(\n        (m) => m.name === pendingLlmSelection.modelName\n      );\n      if (model) {\n        return model.label;\n      }\n    }\n\n    // 3. Fall back to raw model name\n    return pendingLlmSelection.modelName;\n  }, [pendingLlmSelection, llmProviders]);\n\n  // Handle LLM selection change - only update pending state\n  const handleLlmSelectionChange = useCallback(\n    (newSelection: BuildLlmSelection) => {\n      setPendingLlmSelection(newSelection);\n    },\n    []\n  );\n\n  // Handle demo data toggle change - only update pending state (after confirmation)\n  const handleDemoDataConfirm = useCallback(() => {\n    if (pendingDemoDataEnabled !== null) {\n      setPendingDemoData(pendingDemoDataEnabled);\n    }\n    setShowDemoDataConfirmModal(false);\n    setPendingDemoDataEnabled(null);\n  }, [pendingDemoDataEnabled]);\n\n  // Restore changes - revert pending state to original values\n  // Note: User Library changes cannot be reverted (files already uploaded/deleted/toggled)\n  // so we just reset the flag - user needs to manually undo file changes if desired\n  const handleRestoreChanges = useCallback(() => {\n    setPendingLlmSelection(originalLlmSelection);\n    setPendingDemoData(originalDemoData);\n    setUserLibraryChanged(false);\n  }, [originalLlmSelection, originalDemoData]);\n\n  // Update - apply pending changes and re-provision sandbox\n  const handleUpdate = useCallback(async () => {\n    setIsUpdating(true);\n    try {\n      // 1. Apply cookies FIRST (synchronous) - these are the user's preferences\n      // This ensures settings are persisted even if user navigates away during async operations\n      if (pendingLlmSelection) {\n        updateLlmSelection(pendingLlmSelection);\n        setOriginalLlmSelection(pendingLlmSelection);\n      }\n      if (pendingDemoData !== null) {\n        // Update cookie (single source of truth)\n        setDemoDataCookie(pendingDemoData);\n        // Update local state for UI reactivity\n        setDemoDataEnabledLocal(pendingDemoData);\n        setOriginalDemoData(pendingDemoData);\n      }\n\n      // 2. Clear pre-provisioned session (may wait if provisioning in progress)\n      await clearPreProvisionedSession();\n\n      // 3. Start provisioning a new session with updated settings\n      ensurePreProvisionedSession();\n\n      // 4. Reset User Library change flag (sandbox now has the updated files)\n      setUserLibraryChanged(false);\n    } catch (error) {\n      console.error(\"Failed to update settings:\", error);\n    } finally {\n      setIsUpdating(false);\n    }\n  }, [\n    pendingLlmSelection,\n    pendingDemoData,\n    updateLlmSelection,\n    clearPreProvisionedSession,\n    ensurePreProvisionedSession,\n  ]);\n\n  // Read persona from cookies\n  const existingPersona = getBuildUserPersona();\n  const workAreaValue = existingPersona?.workArea;\n  const levelValue = existingPersona?.level;\n\n  // Get persona info from mapping\n  // If workAreaValue and levelValue exist, personaInfo will always be defined\n  // (all combinations are mapped in PERSONA_MAPPING)\n  const personaInfo =\n    workAreaValue && levelValue\n      ? getPersonaInfo(workAreaValue, levelValue)\n      : undefined;\n\n  // Get persona name (split into first and last)\n  const personaName = personaInfo?.name;\n  const [firstName, ...lastNameParts] = personaName?.split(\" \") || [];\n  const lastName = lastNameParts.join(\" \") || \"\";\n\n  // Get position text using shared helper\n  const positionText = workAreaValue\n    ? getPositionText(workAreaValue, levelValue)\n    : \"Not set\";\n\n  const hasLlmProvider = (llmProviders?.length ?? 0) > 0;\n\n  const { connectors, hasConnectorEverSucceeded, isLoading, mutate } =\n    useBuildConnectors();\n\n  // Check for OAuth return state on mount\n  useEffect(() => {\n    const savedState = sessionStorage.getItem(OAUTH_STATE_KEY);\n    if (savedState) {\n      try {\n        const { connectorType, timestamp } = JSON.parse(savedState);\n        // Only restore if < 10 minutes old\n        if (Date.now() - timestamp < 600000) {\n          setSelectedConnector({\n            type: connectorType as ValidSources,\n            config: null,\n          });\n        }\n      } catch (e) {\n        console.error(\"Failed to parse OAuth state:\", e);\n      }\n      sessionStorage.removeItem(OAUTH_STATE_KEY);\n    }\n  }, []);\n\n  // Merge configured status with all available build connectors\n  const connectorStates = BUILD_CONNECTORS.map((type) => ({\n    type,\n    config: connectors.find((c) => c.source === type) || null,\n  }));\n\n  // Auto-enable demo data when no connectors have ever succeeded.\n  // Guard against loading state to avoid a race condition: before the\n  // connector fetch completes, hasConnectorEverSucceeded is false (empty\n  // array fallback), which would incorrectly re-enable demo data.\n  useEffect(() => {\n    if (isLoading) return;\n    if (!hasConnectorEverSucceeded && !demoDataEnabled) {\n      // Update cookie (single source of truth)\n      setDemoDataCookie(true);\n      // Update local state for UI reactivity\n      setDemoDataEnabledLocal(true);\n      // Also sync pending state so UI stays consistent\n      setPendingDemoData(true);\n      setOriginalDemoData(true);\n      // Clear and re-provision with new setting\n      clearPreProvisionedSession().then(() => {\n        ensurePreProvisionedSession();\n      });\n    }\n  }, [\n    isLoading,\n    hasConnectorEverSucceeded,\n    demoDataEnabled,\n    clearPreProvisionedSession,\n    ensurePreProvisionedSession,\n  ]);\n\n  const handleDeleteConfirm = async () => {\n    if (!connectorToDelete) return;\n\n    try {\n      await deleteConnector(\n        connectorToDelete.connector_id,\n        connectorToDelete.credential_id\n      );\n      mutate();\n    } catch (error) {\n      console.error(\"Failed to delete connector:\", error);\n    } finally {\n      setConnectorToDelete(null);\n    }\n  };\n\n  return (\n    <div className=\"relative w-full h-full\">\n      {/* Sandbox status indicator - positioned in top-left corner like ChatPanel */}\n      <div className=\"absolute top-3 left-4 z-20\">\n        <SandboxStatusIndicator />\n      </div>\n\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={SvgPlug}\n          title=\"Configure Onyx Craft\"\n          description=\"Select data sources and your default LLM\"\n          rightChildren={\n            <div className=\"flex items-center gap-2\">\n              <Button\n                disabled={!hasChanges || isUpdating}\n                prominence=\"secondary\"\n                onClick={handleRestoreChanges}\n              >\n                Restore Changes\n              </Button>\n              <Button\n                disabled={!hasChanges || isUpdating || isPreProvisioning}\n                onClick={handleUpdate}\n              >\n                {isUpdating || isPreProvisioning ? \"Updating...\" : \"Update\"}\n              </Button>\n            </div>\n          }\n        />\n        <SettingsLayouts.Body>\n          {isLoading ? (\n            <Card variant=\"tertiary\">\n              <Section alignItems=\"center\" gap={0.5} height=\"fit\">\n                <Text mainContentBody>Loading...</Text>\n              </Section>\n            </Card>\n          ) : (\n            <Section flexDirection=\"column\" gap={2}>\n              <Section\n                flexDirection=\"column\"\n                alignItems=\"start\"\n                gap={0.5}\n                height=\"fit\"\n              >\n                <Card>\n                  <InputLayouts.Horizontal\n                    title=\"Your Demo Persona\"\n                    description={\n                      firstName && lastName && positionText\n                        ? `${firstName} ${lastName}, ${positionText} at ${DEMO_COMPANY_NAME}`\n                        : positionText\n                          ? `${positionText} at ${DEMO_COMPANY_NAME}`\n                          : \"Not set\"\n                    }\n                    center\n                  >\n                    <SimpleTooltip\n                      tooltip={\n                        !hasLlmProvider\n                          ? \"Configure an LLM provider first\"\n                          : undefined\n                      }\n                      disabled={hasLlmProvider}\n                    >\n                      <button\n                        type=\"button\"\n                        onClick={() => openPersonaEditor()}\n                        disabled={!hasLlmProvider}\n                        className=\"p-2 rounded-08 text-text-03 hover:bg-background-tint-02 transition-colors disabled:opacity-50 disabled:cursor-not-allowed\"\n                      >\n                        <SvgSettings className=\"w-5 h-5\" />\n                      </button>\n                    </SimpleTooltip>\n                  </InputLayouts.Horizontal>\n                </Card>\n                <Card\n                  className={\n                    isUpdating || isPreProvisioning ? \"opacity-50\" : \"\"\n                  }\n                  title={\n                    isUpdating || isPreProvisioning\n                      ? \"Please wait while your session is being provisioned\"\n                      : undefined\n                  }\n                >\n                  <div\n                    className={`w-full ${\n                      isUpdating || isPreProvisioning\n                        ? \"pointer-events-none\"\n                        : \"\"\n                    }`}\n                  >\n                    <InputLayouts.Horizontal\n                      title=\"Default LLM\"\n                      description=\"Select the language model to craft with\"\n                      center\n                    >\n                      <BuildLLMPopover\n                        currentSelection={pendingLlmSelection}\n                        onSelectionChange={handleLlmSelectionChange}\n                        llmProviders={llmProviders}\n                        onOpenOnboarding={(providerKey) =>\n                          openLlmSetup(providerKey)\n                        }\n                        disabled={isUpdating || isPreProvisioning}\n                      >\n                        <button\n                          type=\"button\"\n                          className=\"flex items-center gap-2 px-3 py-1.5 rounded-08 border border-border-01 bg-background-tint-00 hover:bg-background-tint-01 transition-colors\"\n                        >\n                          {pendingLlmSelection?.provider &&\n                            (() => {\n                              const ProviderIcon = getProviderIcon(\n                                pendingLlmSelection.provider\n                              );\n                              return <ProviderIcon className=\"w-4 h-4\" />;\n                            })()}\n                          <Text mainUiAction>{pendingLlmDisplayName}</Text>\n                          <SvgChevronDown className=\"w-4 h-4 text-text-03\" />\n                        </button>\n                      </BuildLLMPopover>\n                    </InputLayouts.Horizontal>\n                  </div>\n                </Card>\n                <Separator />\n                <div className=\"w-full flex items-center justify-between\">\n                  <div className=\"flex flex-col gap-0.25\">\n                    <Text mainContentEmphasis text04>\n                      Connectors\n                    </Text>\n                    <Text secondaryBody text03>\n                      Connect your own data sources\n                    </Text>\n                  </div>\n                  <div className=\"w-fit flex-shrink-0\">\n                    <SimpleTooltip\n                      tooltip={\n                        isUpdating || isPreProvisioning\n                          ? \"Please wait while your session is being provisioned\"\n                          : !hasConnectorEverSucceeded\n                            ? \"Connect and sync a data source to disable demo data\"\n                            : undefined\n                      }\n                      disabled={\n                        hasConnectorEverSucceeded &&\n                        !isUpdating &&\n                        !isPreProvisioning\n                      }\n                    >\n                      <Card\n                        padding={0.75}\n                        className={\n                          !hasConnectorEverSucceeded ||\n                          isUpdating ||\n                          isPreProvisioning\n                            ? \"opacity-50\"\n                            : \"\"\n                        }\n                      >\n                        <div\n                          className={`flex items-center gap-3 ${\n                            !hasConnectorEverSucceeded ||\n                            isUpdating ||\n                            isPreProvisioning\n                              ? \"pointer-events-none\"\n                              : \"\"\n                          }`}\n                        >\n                          <div className=\"flex items-center gap-2\">\n                            <SimpleTooltip tooltip=\"The demo dataset contains 1000 files across various connectors\">\n                              <span className=\"inline-flex items-center cursor-help\">\n                                <SvgInfoSmall\n                                  size={16}\n                                  className=\"text-text-03\"\n                                />\n                              </span>\n                            </SimpleTooltip>\n                            <Text mainUiAction>Use Demo Dataset</Text>\n                          </div>\n                          <Switch\n                            checked={pendingDemoData ?? demoDataEnabled}\n                            disabled={\n                              isUpdating ||\n                              isPreProvisioning ||\n                              !hasConnectorEverSucceeded\n                            }\n                            onCheckedChange={(newValue) => {\n                              setPendingDemoDataEnabled(newValue);\n                              setShowDemoDataConfirmModal(true);\n                            }}\n                          />\n                        </div>\n                      </Card>\n                    </SimpleTooltip>\n                  </div>\n                </div>\n                <div className=\"w-full grid grid-cols-1 md:grid-cols-2 gap-2 pt-2\">\n                  {connectorStates.map(({ type, config }) => {\n                    const metadata = getSourceMetadata(type);\n                    return (\n                      <ConnectorCard\n                        key={type}\n                        connectorType={type}\n                        config={config}\n                        onConfigure={() => {\n                          // Connectors marked as alwaysConnected open their custom modal\n                          if (metadata.alwaysConnected) {\n                            setShowUserLibraryModal(true);\n                            return;\n                          }\n                          // Only open modal for unconfigured connectors\n                          if (!config) {\n                            if (isBasicUser) {\n                              setShowNotAllowedModal(true);\n                            } else {\n                              setSelectedConnector({ type, config });\n                            }\n                          }\n                        }}\n                        onDelete={() => config && setConnectorToDelete(config)}\n                      />\n                    );\n                  })}\n                </div>\n                <ComingSoonConnectors />\n              </Section>\n            </Section>\n          )}\n\n          {/* Sticky overlay for reprovision warning */}\n          <div className=\"sticky z-toast bottom-10 w-fit mx-auto\">\n            <ReprovisionWarningOverlay\n              visible={hasChanges && !isLoading}\n              onUpdate={handleUpdate}\n              isUpdating={isUpdating || isPreProvisioning}\n            />\n          </div>\n\n          {/* Fixed overlay for connector info - centered on screen like the modal */}\n          <ConnectorInfoOverlay visible={!!selectedConnector} />\n        </SettingsLayouts.Body>\n\n        <ConfigureConnectorModal\n          connectorType={selectedConnector?.type || null}\n          existingConfig={selectedConnector?.config || null}\n          open={!!selectedConnector}\n          onClose={() => setSelectedConnector(null)}\n          onSuccess={() => {\n            setSelectedConnector(null);\n            mutate();\n          }}\n        />\n\n        {connectorToDelete && (\n          <ConfirmEntityModal\n            danger\n            entityType=\"connector\"\n            entityName={\n              getSourceMetadata(connectorToDelete.source as ValidSources)\n                .displayName\n            }\n            action=\"disconnect\"\n            actionButtonText=\"Disconnect\"\n            additionalDetails=\"This will remove access to this data source. You can reconnect it later.\"\n            onClose={() => setConnectorToDelete(null)}\n            onSubmit={handleDeleteConfirm}\n          />\n        )}\n\n        <NotAllowedModal\n          open={showNotAllowedModal}\n          onClose={() => setShowNotAllowedModal(false)}\n        />\n\n        <DemoDataConfirmModal\n          open={showDemoDataConfirmModal}\n          onClose={() => {\n            setShowDemoDataConfirmModal(false);\n            setPendingDemoDataEnabled(null);\n          }}\n          pendingDemoDataEnabled={pendingDemoDataEnabled}\n          onConfirm={handleDemoDataConfirm}\n        />\n\n        <UserLibraryModal\n          open={showUserLibraryModal}\n          onClose={() => setShowUserLibraryModal(false)}\n          onChanges={() => setUserLibraryChanged(true)}\n        />\n      </SettingsLayouts.Root>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/configure/utils/createBuildConnector.ts",
    "content": "import { ValidSources, ProcessingMode } from \"@/lib/types\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { createConnector } from \"@/lib/connector\";\nimport { linkCredential } from \"@/lib/credential\";\nimport { connectorConfigs, isLoadState } from \"@/lib/connectors/connectors\";\n\nexport interface CreateBuildConnectorParams {\n  connectorType: ValidSources;\n  credential: Credential<any>;\n  connectorSpecificConfig?: Record<string, any>;\n  connectorName?: string;\n  userEmail?: string;\n}\n\nexport interface CreateBuildConnectorResult {\n  success: boolean;\n  error?: string;\n  connectorId?: number;\n}\n\nfunction getUserIdentifier(email?: string): string {\n  if (!email) return \"\";\n  // Extract the part before @ and sanitize it\n  const prefix = email.split(\"@\")[0] || email;\n  // Replace any non-alphanumeric characters with dashes\n  return `-${prefix.replace(/[^a-zA-Z0-9]/g, \"-\")}`;\n}\n\nexport async function createBuildConnector({\n  connectorType,\n  credential,\n  connectorSpecificConfig = {},\n  connectorName,\n  userEmail,\n}: CreateBuildConnectorParams): Promise<CreateBuildConnectorResult> {\n  const config =\n    connectorConfigs[connectorType as keyof typeof connectorConfigs];\n  const userIdentifier = getUserIdentifier(userEmail);\n  const name = connectorName || `build-mode-${connectorType}${userIdentifier}`;\n\n  const filteredConfig: Record<string, any> = {};\n  Object.entries(connectorSpecificConfig).forEach(([key, value]) => {\n    if (value !== \"\" && value !== null && value !== undefined) {\n      if (Array.isArray(value) && value.length === 0) {\n        return;\n      }\n      filteredConfig[key] = value;\n    }\n  });\n\n  try {\n    const [connectorError, connector] = await createConnector({\n      name,\n      source: connectorType,\n      input_type: isLoadState(connectorType) ? \"load_state\" : \"poll\",\n      connector_specific_config: filteredConfig,\n      refresh_freq: config?.overrideDefaultFreq || 1800,\n      prune_freq: 2592000,\n      indexing_start: null,\n      access_type: \"private\",\n      groups: [],\n    });\n\n    if (connectorError || !connector) {\n      return {\n        success: false,\n        error: connectorError || \"Failed to create connector\",\n      };\n    }\n\n    const linkResponse = await linkCredential(\n      connector.id,\n      credential.id,\n      name,\n      \"private\",\n      [],\n      undefined,\n      \"FILE_SYSTEM\"\n    );\n\n    if (!linkResponse.ok) {\n      const linkError = await linkResponse.json();\n      return {\n        success: false,\n        error: linkError.detail || \"Failed to link credential\",\n      };\n    }\n\n    return {\n      success: true,\n      connectorId: connector.id,\n    };\n  } catch (err) {\n    return {\n      success: false,\n      error: err instanceof Error ? err.message : \"Failed to create connector\",\n    };\n  }\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/constants.ts",
    "content": "import Cookies from \"js-cookie\";\n\nexport const CRAFT_PATH = \"/craft/v1\";\nexport const CRAFT_CONFIGURE_PATH = `${CRAFT_PATH}/configure`;\nexport const CRAFT_OAUTH_COOKIE_NAME = \"build_mode_oauth\";\nexport const OAUTH_STATE_KEY = \"build_oauth_state\";\nexport const CRAFT_DEMO_DATA_COOKIE_NAME = \"build_demo_data_enabled\";\nexport const ONYX_CRAFT_CALENDAR_URL = \"https://cal.com/team/onyx/onyx-craft\";\n\n/**\n * Read demo data enabled setting from cookie.\n * This is the single source of truth for the demo data setting.\n * Defaults to true if cookie doesn't exist or is invalid.\n */\nexport function getDemoDataEnabled(): boolean {\n  if (typeof window === \"undefined\") return true; // SSR fallback\n  const cookieValue = Cookies.get(CRAFT_DEMO_DATA_COOKIE_NAME);\n  if (cookieValue === \"false\") return false;\n  return true; // Default to true\n}\n\n/**\n * Write demo data enabled setting to cookie.\n */\nexport function setDemoDataCookie(enabled: boolean): void {\n  Cookies.set(CRAFT_DEMO_DATA_COOKIE_NAME, String(enabled), {\n    path: \"/\",\n    expires: 365, // 1 year\n  });\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/layout.tsx",
    "content": "\"use client\";\n\nimport { BuildProvider } from \"@/app/craft/contexts/BuildContext\";\nimport { UploadFilesProvider } from \"@/app/craft/contexts/UploadFilesContext\";\nimport { BuildOnboardingProvider } from \"@/app/craft/onboarding/BuildOnboardingProvider\";\nimport BuildSidebar from \"@/app/craft/components/SideBar\";\n\n/**\n * Build V1 Layout - Skeleton pattern with 3-panel layout\n *\n * Wraps with BuildProvider and UploadFilesProvider (for file uploads).\n * Includes BuildSidebar on the left.\n * Pre-provisioning is handled by useBuildSessionController.\n * The page component provides the center (chat) and right (output) panels.\n */\nexport default function Layout({ children }: { children: React.ReactNode }) {\n  return (\n    <UploadFilesProvider>\n      <BuildProvider>\n        <BuildOnboardingProvider>\n          <div className=\"flex flex-row w-full h-full\">\n            <BuildSidebar />\n            {children}\n          </div>\n        </BuildOnboardingProvider>\n      </BuildProvider>\n    </UploadFilesProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/craft/v1/page.tsx",
    "content": "\"use client\";\n\nimport { useSearchParams } from \"next/navigation\";\nimport { useBuildSessionController } from \"@/app/craft/hooks/useBuildSessionController\";\nimport {\n  useOutputPanelOpen,\n  useToggleOutputPanel,\n} from \"@/app/craft/hooks/useBuildSessionStore\";\nimport { getSessionIdFromSearchParams } from \"@/app/craft/services/searchParams\";\nimport BuildChatPanel from \"@/app/craft/components/ChatPanel\";\nimport BuildOutputPanel from \"@/app/craft/components/OutputPanel\";\n\n/**\n * Build V1 Page - Entry point for builds\n *\n * URL: /craft/v1 (new build)\n * URL: /craft/v1?sessionId=xxx (existing session)\n *\n * Renders the 2-panel layout (chat + output) and handles session controller setup.\n */\nexport default function BuildV1Page() {\n  const searchParams = useSearchParams();\n  const sessionId = getSessionIdFromSearchParams(searchParams);\n\n  const outputPanelOpen = useOutputPanelOpen();\n  const toggleOutputPanel = useToggleOutputPanel();\n  useBuildSessionController({ existingSessionId: sessionId });\n\n  return (\n    <div className=\"relative flex-1 h-full overflow-hidden\">\n      {/* Chat panel - always full width for background */}\n      <BuildChatPanel existingSessionId={sessionId} />\n\n      {/* Output panel - floats over as a card */}\n      <BuildOutputPanel onClose={toggleOutputPanel} isOpen={outputPanelOpen} />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/css/attachment-button.css",
    "content": "/* AttachmentButton styles */\n\n.attachment-button {\n  display: flex;\n  flex-direction: row;\n  width: 100%;\n  padding: 0.25rem;\n  background-color: var(--background-tint-00);\n  border-radius: var(--border-radius-12);\n  gap: 0.5rem;\n}\n\n.attachment-button:hover {\n  background-color: var(--background-tint-02);\n}\n\n.attachment-button[data-state=\"selected\"] {\n  background-color: var(--action-link-01);\n}\n\n.attachment-button__content {\n  flex: 1;\n  display: flex;\n  flex-direction: row;\n  gap: 0.5rem;\n  min-width: 0;\n}\n\n.attachment-button__icon-wrapper {\n  height: 100%;\n  aspect-ratio: 1;\n  background-color: var(--background-tint-01);\n  border-radius: var(--border-radius-08);\n  display: flex;\n  flex-direction: column;\n  align-items: center;\n  justify-content: center;\n  flex-shrink: 0;\n}\n\n.attachment-button__icon {\n  height: 1rem;\n  width: 1rem;\n  stroke: var(--text-02);\n}\n\n.attachment-button[data-state=\"processing\"] .attachment-button__icon {\n  stroke: var(--text-01);\n}\n\n.attachment-button__text-container {\n  display: flex;\n  flex-direction: column;\n  align-items: flex-start;\n  justify-content: center;\n  min-width: 0;\n  flex: 1;\n}\n\n.attachment-button__title-row {\n  display: flex;\n  flex-direction: row;\n  align-items: center;\n  gap: 0.5rem;\n  width: 100%;\n  min-width: 0;\n}\n\n.attachment-button__title-wrapper {\n  max-width: 70%;\n  min-width: 0;\n  flex-shrink: 1;\n  overflow: hidden;\n}\n\n.attachment-button__view-button {\n  flex-shrink: 0;\n  visibility: hidden;\n}\n\n.attachment-button:hover .attachment-button__view-button {\n  visibility: visible;\n}\n\n.attachment-button__actions {\n  display: flex;\n  flex-direction: row;\n  align-self: stretch;\n  justify-content: flex-end;\n  align-items: center;\n  gap: 0.5rem;\n  padding: 0.25rem;\n  flex-shrink: 0;\n}\n\n.attachment-button__action-button {\n  visibility: hidden;\n}\n\n.attachment-button:hover .attachment-button__action-button {\n  visibility: visible;\n}\n"
  },
  {
    "path": "web/src/app/css/button.css",
    "content": "/* ============================================================================\n   Main Variant - Primary\n   ============================================================================ */\n\n.button-main-primary {\n  background-color: var(--theme-primary-05);\n}\n.button-main-primary:hover {\n  background-color: var(--theme-primary-04);\n}\n.button-main-primary[data-state=\"transient\"] {\n  background-color: var(--theme-primary-06);\n}\n.button-main-primary:active {\n  background-color: var(--theme-primary-06);\n}\n.button-main-primary:disabled {\n  background-color: var(--background-neutral-04);\n}\n\n.button-main-primary-text {\n  color: var(--text-inverted-05) !important;\n}\n.button-main-primary:disabled .button-main-primary-text {\n  color: var(--text-inverted-04) !important;\n}\n\n.button-main-primary-icon {\n  stroke: var(--text-inverted-05);\n}\n.button-main-primary:disabled .button-main-primary-icon {\n  stroke: var(--text-inverted-04);\n}\n\n/* ============================================================================\n   Main Variant - Secondary\n   ============================================================================ */\n\n.button-main-secondary {\n  background-color: var(--background-tint-01);\n  border: 1px solid var(--border-01);\n}\n.button-main-secondary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-main-secondary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-main-secondary:active {\n  background-color: var(--background-tint-00);\n}\n.button-main-secondary:disabled {\n  background-color: var(--background-neutral-03);\n  border: 1px solid var(--border-01);\n}\n\n.button-main-secondary-text {\n  color: var(--text-03) !important;\n}\n.button-main-secondary:hover .button-main-secondary-text {\n  color: var(--text-04) !important;\n}\n.button-main-secondary[data-state=\"transient\"] .button-main-secondary-text {\n  color: var(--text-05) !important;\n}\n.button-main-secondary:active .button-main-secondary-text {\n  color: var(--text-05) !important;\n}\n.button-main-secondary:disabled .button-main-secondary-text {\n  color: var(--text-01) !important;\n}\n\n.button-main-secondary-icon {\n  stroke: var(--text-03);\n}\n.button-main-secondary:hover .button-main-secondary-icon {\n  stroke: var(--text-04);\n}\n.button-main-secondary[data-state=\"transient\"] .button-main-secondary-icon {\n  stroke: var(--text-05);\n}\n.button-main-secondary:active .button-main-secondary-icon {\n  stroke: var(--text-05);\n}\n.button-main-secondary:disabled .button-main-secondary-icon {\n  stroke: var(--text-01);\n}\n\n/* ============================================================================\n   Main Variant - Tertiary\n   ============================================================================ */\n\n.button-main-tertiary {\n  background-color: transparent;\n}\n.button-main-tertiary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-main-tertiary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-main-tertiary:active {\n  background-color: var(--background-tint-00);\n}\n.button-main-tertiary:disabled {\n  background-color: transparent;\n}\n\n.button-main-tertiary-text {\n  color: var(--text-03) !important;\n}\n.button-main-tertiary:hover .button-main-tertiary-text {\n  color: var(--text-04) !important;\n}\n.button-main-tertiary[data-state=\"transient\"] .button-main-tertiary-text {\n  color: var(--text-05) !important;\n}\n.button-main-tertiary:active .button-main-tertiary-text {\n  color: var(--text-05) !important;\n}\n.button-main-tertiary:disabled .button-main-tertiary-text {\n  color: var(--text-01) !important;\n}\n\n.button-main-tertiary-icon {\n  stroke: var(--text-03);\n}\n.button-main-tertiary:hover .button-main-tertiary-icon {\n  stroke: var(--text-04);\n}\n.button-main-tertiary[data-state=\"transient\"] .button-main-tertiary-icon {\n  stroke: var(--text-05);\n}\n.button-main-tertiary:active .button-main-tertiary-icon {\n  stroke: var(--text-05);\n}\n.button-main-tertiary:disabled .button-main-tertiary-icon {\n  stroke: var(--text-01);\n}\n\n/* ============================================================================\n   Main Variant - Internal\n   ============================================================================ */\n\n.button-main-internal {\n  background-color: transparent;\n}\n.button-main-internal:hover {\n  background-color: var(--background-tint-02);\n}\n.button-main-internal[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-main-internal:active {\n  background-color: var(--background-tint-00);\n}\n.button-main-internal:disabled {\n  background-color: transparent;\n}\n\n.button-main-internal-text {\n  color: var(--text-03) !important;\n}\n.button-main-internal:hover .button-main-internal-text {\n  color: var(--text-04) !important;\n}\n.button-main-internal[data-state=\"transient\"] .button-main-internal-text {\n  color: var(--text-05) !important;\n}\n.button-main-internal:active .button-main-internal-text {\n  color: var(--text-05) !important;\n}\n.button-main-internal:disabled .button-main-internal-text {\n  color: var(--text-01) !important;\n}\n\n.button-main-internal-icon {\n  stroke: var(--text-03);\n}\n.button-main-internal:hover .button-main-internal-icon {\n  stroke: var(--text-04);\n}\n.button-main-internal[data-state=\"transient\"] .button-main-internal-icon {\n  stroke: var(--text-05);\n}\n.button-main-internal:active .button-main-internal-icon {\n  stroke: var(--text-05);\n}\n.button-main-internal:disabled .button-main-internal-icon {\n  stroke: var(--text-01);\n}\n\n/* ============================================================================\n   Action Variant - Primary\n   ============================================================================ */\n\n.button-action-primary {\n  background-color: var(--action-link-05);\n}\n.button-action-primary:hover {\n  background-color: var(--action-link-04);\n}\n.button-action-primary[data-state=\"transient\"] {\n  background-color: var(--action-link-06);\n}\n.button-action-primary:active {\n  background-color: var(--action-link-06);\n}\n.button-action-primary:disabled {\n  background-color: var(--action-link-02);\n}\n\n.button-action-primary-text {\n  color: var(--text-light-05) !important;\n}\n.button-action-primary:disabled .button-action-primary-text {\n  color: var(--text-01) !important;\n}\n\n.button-action-primary-icon {\n  stroke: var(--text-light-05);\n}\n.button-action-primary:disabled .button-action-primary-icon {\n  stroke: var(--text-01);\n}\n\n/* ============================================================================\n   Action Variant - Secondary\n   ============================================================================ */\n\n.button-action-secondary {\n  background-color: var(--background-tint-01);\n  border: 1px solid var(--border-01);\n}\n.button-action-secondary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-action-secondary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-action-secondary:active {\n  background-color: var(--background-tint-00);\n}\n.button-action-secondary:disabled {\n  background-color: var(--background-neutral-02);\n  border: 1px solid var(--border-01);\n}\n\n.button-action-secondary-text {\n  color: var(--action-text-link-05) !important;\n}\n.button-action-secondary:disabled .button-action-secondary-text {\n  color: var(--action-link-03) !important;\n}\n\n.button-action-secondary-icon {\n  stroke: var(--action-text-link-05);\n}\n.button-action-secondary:disabled .button-action-secondary-icon {\n  stroke: var(--action-link-03);\n}\n\n/* ============================================================================\n   Action Variant - Tertiary\n   ============================================================================ */\n\n.button-action-tertiary {\n  background-color: transparent;\n}\n.button-action-tertiary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-action-tertiary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-action-tertiary:active {\n  background-color: var(--background-tint-00);\n}\n.button-action-tertiary:disabled {\n  background-color: transparent;\n}\n\n.button-action-tertiary-text {\n  color: var(--action-text-link-05) !important;\n}\n.button-action-tertiary:disabled .button-action-tertiary-text {\n  color: var(--action-link-03) !important;\n}\n\n.button-action-tertiary-icon {\n  stroke: var(--action-text-link-05);\n}\n.button-action-tertiary:disabled .button-action-tertiary-icon {\n  stroke: var(--action-link-03);\n}\n\n/* ============================================================================\n   Danger Variant - Primary\n   ============================================================================ */\n\n.button-danger-primary {\n  background-color: var(--action-danger-05);\n}\n.button-danger-primary:hover {\n  background-color: var(--action-danger-04);\n}\n.button-danger-primary[data-state=\"transient\"] {\n  background-color: var(--action-danger-06);\n}\n.button-danger-primary:active {\n  background-color: var(--action-danger-06);\n}\n.button-danger-primary:disabled {\n  background-color: var(--action-danger-02);\n}\n\n.button-danger-primary-text {\n  color: var(--text-light-05) !important;\n}\n.button-danger-primary:disabled .button-danger-primary-text {\n  color: var(--text-01) !important;\n}\n\n.button-danger-primary-icon {\n  stroke: var(--text-light-05);\n}\n.button-danger-primary:disabled .button-danger-primary-icon {\n  stroke: var(--text-01);\n}\n\n/* ============================================================================\n   Danger Variant - Secondary\n   ============================================================================ */\n\n.button-danger-secondary {\n  background-color: var(--background-tint-01);\n  border: 1px solid var(--border-01);\n}\n.button-danger-secondary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-danger-secondary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-danger-secondary:active {\n  background-color: var(--background-tint-00);\n}\n.button-danger-secondary:disabled {\n  background-color: var(--background-neutral-02);\n  border: 1px solid var(--border-01);\n}\n\n.button-danger-secondary-text {\n  color: var(--action-text-danger-05) !important;\n}\n.button-danger-secondary:disabled .button-danger-secondary-text {\n  color: var(--action-danger-03) !important;\n}\n\n.button-danger-secondary-icon {\n  stroke: var(--action-text-danger-05);\n}\n.button-danger-secondary:disabled .button-danger-secondary-icon {\n  stroke: var(--action-danger-03);\n}\n\n/* ============================================================================\n   Danger Variant - Tertiary\n   ============================================================================ */\n\n.button-danger-tertiary {\n  background-color: transparent;\n}\n.button-danger-tertiary:hover {\n  background-color: var(--background-tint-02);\n}\n.button-danger-tertiary[data-state=\"transient\"] {\n  background-color: var(--background-tint-00);\n}\n.button-danger-tertiary:active {\n  background-color: var(--background-tint-00);\n}\n.button-danger-tertiary:disabled {\n  background-color: transparent;\n}\n\n.button-danger-tertiary-text {\n  color: var(--action-text-danger-05) !important;\n}\n.button-danger-tertiary:disabled .button-danger-tertiary-text {\n  color: var(--action-danger-03) !important;\n}\n\n.button-danger-tertiary-icon {\n  stroke: var(--action-text-danger-05);\n}\n.button-danger-tertiary:disabled .button-danger-tertiary-icon {\n  stroke: var(--action-danger-03);\n}\n"
  },
  {
    "path": "web/src/app/css/card.css",
    "content": ".card {\n  @apply rounded-16 w-full overflow-clip;\n}\n\n.card[data-variant=\"primary\"] {\n  @apply bg-background-tint-00 border;\n}\n\n.card[data-variant=\"secondary\"] {\n  @apply bg-transparent border;\n}\n\n.card[data-variant=\"tertiary\"] {\n  @apply bg-transparent border border-dashed;\n}\n\n.card[data-variant=\"disabled\"] {\n  @apply cursor-not-allowed bg-background-tint-00 border opacity-50;\n}\n\n.card[data-variant=\"borderless\"] {\n  @apply bg-background-tint-00;\n}\n"
  },
  {
    "path": "web/src/app/css/code.css",
    "content": ".code-block {\n  display: block;\n  padding: 0.5rem;\n  background-color: var(--background-tint-00);\n  border: 1px solid var(--border-01);\n  border-radius: var(--border-radius-12);\n  word-break: break-all;\n  font-family: var(--font-mono);\n  font-size: 0.75rem;\n  line-height: 1rem;\n  color: var(--text-03);\n}\n\n.code-copy-button {\n  position: absolute;\n  top: 0.5rem;\n  right: 0.5rem;\n  opacity: 0;\n  transition: opacity 150ms ease-in-out;\n}\n\n.code-wrapper:hover .code-copy-button {\n  opacity: 1;\n}\n"
  },
  {
    "path": "web/src/app/css/color-swatch.css",
    "content": ".color-swatch {\n  /* Base styles */\n  display: inline-flex;\n  align-items: center;\n  justify-content: center;\n  border: 1.5px solid var(--border-01);\n  border-radius: var(--border-radius-08);\n  padding: 0.12rem 0.25rem;\n  background-color: var(--background-tint-light-01);\n  gap: 0.1rem;\n}\n\n.color-swatch[data-state=\"dark\"] {\n  background-color: var(--background-tint-dark-01);\n}\n\n.color-swatch__text {\n  font-size: 0.5rem;\n  font-weight: 500;\n  color: var(--text-dark-05);\n}\n\n.color-swatch[data-state=\"dark\"] .color-swatch__text {\n  color: var(--text-light-05);\n}\n"
  },
  {
    "path": "web/src/app/css/colors.css",
    "content": "/* Base Colors */\n:root {\n  /* Grey Scale */\n  --grey-100: #000000;\n  --grey-98: #050505;\n  --grey-96: #0a0a0a;\n  --grey-94: #0f0f0f;\n  --grey-92: #141414;\n  --grey-90: #1a1a1a;\n  --grey-85: #262626;\n  --grey-80: #333333;\n  --grey-75: #404040;\n  --grey-70: #4d4d4d;\n  --grey-60: #555555;\n  --grey-50: #808080;\n  --grey-40: #a4a4a4;\n  --grey-30: #b2b2b2;\n  --grey-20: #cccccc;\n  --grey-10: #e6e6e6;\n  --grey-08: #ebebeb;\n  --grey-06: #f0f0f0;\n  --grey-04: #f5f5f5;\n  --grey-02: #fafafa;\n  --grey-00: #ffffff;\n\n  /* Alpha Grey 100 (Black with opacity) */\n  --alpha-grey-100-95: #000000f2;\n  --alpha-grey-100-90: #000000e5;\n  --alpha-grey-100-85: #000000d9;\n  --alpha-grey-100-80: #000000cc;\n  --alpha-grey-100-75: #000000bf;\n  --alpha-grey-100-70: #000000b2;\n  --alpha-grey-100-65: #000000a6;\n  --alpha-grey-100-60: #00000099;\n  --alpha-grey-100-55: #0000008c;\n  --alpha-grey-100-50: #00000080;\n  --alpha-grey-100-45: #00000073;\n  --alpha-grey-100-40: #00000066;\n  --alpha-grey-100-35: #00000059;\n  --alpha-grey-100-30: #0000004d;\n  --alpha-grey-100-25: #00000040;\n  --alpha-grey-100-20: #00000033;\n  --alpha-grey-100-15: #00000026;\n  --alpha-grey-100-10: #0000001a;\n  --alpha-grey-100-05: #0000000d;\n  --alpha-grey-100-00: #00000000;\n\n  /* Alpha Grey 00 (White with opacity) */\n  --alpha-grey-00-95: #fffffff2;\n  --alpha-grey-00-90: #ffffffe5;\n  --alpha-grey-00-85: #ffffffd9;\n  --alpha-grey-00-80: #ffffffcc;\n  --alpha-grey-00-75: #ffffffbf;\n  --alpha-grey-00-70: #ffffffb2;\n  --alpha-grey-00-65: #ffffffa6;\n  --alpha-grey-00-60: #ffffff99;\n  --alpha-grey-00-55: #ffffff8c;\n  --alpha-grey-00-50: #ffffff80;\n  --alpha-grey-00-45: #ffffff73;\n  --alpha-grey-00-40: #ffffff66;\n  --alpha-grey-00-35: #ffffff59;\n  --alpha-grey-00-30: #ffffff4d;\n  --alpha-grey-00-25: #ffffff40;\n  --alpha-grey-00-20: #ffffff33;\n  --alpha-grey-00-15: #ffffff26;\n  --alpha-grey-00-10: #ffffff1a;\n  --alpha-grey-00-05: #ffffff0d;\n  --alpha-grey-00-00: #ffffff00;\n\n  /* Blue Scale */\n  --blue-95: #040e25;\n  --blue-90: #091938;\n  --blue-85: #11254e;\n  --blue-80: #173268;\n  --blue-60: #3363c3;\n  --blue-50: #286df8;\n  --blue-45: #397bff;\n  --blue-40: #508afb;\n  --blue-20: #9bbeff;\n  --blue-10: #cddfff;\n  --blue-05: #e7effc;\n  --blue-01: #f8fafe;\n\n  /* Green Scale */\n  --green-95: #001503;\n  --green-90: #002207;\n  --green-85: #00320d;\n  --green-80: #004214;\n  --green-60: #008933;\n  --green-50: #00a43f;\n  --green-40: #2eaa4d;\n  --green-20: #91d099;\n  --green-10: #c9e8cc;\n  --green-05: #e6f2e7;\n  --green-01: #f8fbf8;\n\n  /* Red Scale */\n  --red-95: #210504;\n  --red-90: #330b09;\n  --red-85: #481310;\n  --red-80: #5f1a16;\n  --red-60: #b02b27;\n  --red-50: #dc2626;\n  --red-45: #f23a36;\n  --red-40: #e8594e;\n  --red-20: #f8a59b;\n  --red-10: #fed2cc;\n  --red-05: #fceae7;\n  --red-01: #fef7f6;\n\n  /* Orange Scale */\n  --orange-95: #200600;\n  --orange-90: #320d01;\n  --orange-85: #471602;\n  --orange-80: #5d1e01;\n  --orange-60: #b44105;\n  --orange-55: #ce4b05;\n  --orange-50: #ec5b13;\n  --orange-40: #e1642f;\n  --orange-20: #f5a88b;\n  --orange-10: #fcd4c5;\n  --orange-05: #fbeae4;\n  --orange-01: #fef9f7;\n\n  /* Purple Scale */\n  --purple-95: #140921;\n  --purple-90: #211132;\n  --purple-85: #301b47;\n  --purple-80: #41255e;\n  --purple-60: #7e4bb2;\n  --purple-50: #9948e3;\n  --purple-45: #a361e6;\n  --purple-40: #a96fe8;\n  --purple-20: #ccaef2;\n  --purple-10: #e5d6fa;\n  --purple-05: #f1ebfa;\n  --purple-01: #f9f7fd;\n\n  /* Neon Scale\n     Base vars (--neon-X) are the /40 level.\n     Alpha variants use -aXX suffix (e.g. -a60 = 40 at 60% opacity).\n     Numeric suffixes are Figma scale levels (e.g. -50 = Neon/X/50). */\n  --neon-yellow-90: #5a581d;\n  --neon-yellow-80: #979430;\n  --neon-yellow-50: #ece600;\n  --neon-yellow: #fef800;\n  --neon-yellow-a60: #fef80099;\n  --neon-yellow-a30: #fef8004d;\n  --neon-yellow-20: #fcfa8f;\n  --neon-yellow-05: #f9faeb;\n\n  --neon-amber-90: #625025;\n  --neon-amber-80: #a68018;\n  --neon-amber-60: #d9a500;\n  --neon-amber-50: #ecb400;\n  --neon-amber: #ffc733;\n  --neon-amber-a60: #ffc73399;\n  --neon-amber-a30: #ffc7334d;\n  --neon-amber-20: #ffd985;\n  --neon-amber-05: #fef8ea;\n\n  --neon-sky-90: #204f67;\n  --neon-sky-80: #3989b3;\n  --neon-sky-50: #1ebcff;\n  --neon-sky: #4dc3ff;\n  --neon-sky-a60: #4dc3ff99;\n  --neon-sky-a30: #4dc3ff4d;\n  --neon-sky-20: #93d8ff;\n  --neon-sky-05: #f2faff;\n\n  --neon-cyan-90: #1a5e5d;\n  --neon-cyan-80: #009a99;\n  --neon-cyan-50: #00ebea;\n  --neon-cyan: #00f9f9;\n  --neon-cyan-a60: #00f9f999;\n  --neon-cyan-a30: #00f9f94d;\n  --neon-cyan-20: #62fefd;\n  --neon-cyan-05: #eafdfc;\n\n  --neon-lime-90: #3f5b39;\n  --neon-lime-80: #639e56;\n  --neon-lime-60: #53cd32;\n  --neon-lime: #6dff46;\n  --neon-lime-a60: #6dff4699;\n  --neon-lime-a30: #6dff464d;\n  --neon-lime-20: #a8ff94;\n  --neon-lime-05: #f2fcf0;\n\n  --neon-magenta-90: #654666;\n  --neon-magenta-80: #ab6bac;\n  --neon-magenta-50: #f198f2;\n  --neon-magenta: #fea1ff;\n  --neon-magenta-a60: #fea1ff99;\n  --neon-magenta-a30: #fea1ff4d;\n  --neon-magenta-20: #fec4fe;\n  --neon-magenta-05: #fff5ff;\n\n  /* Stone Scale */\n  --stone-98: #0b0b0f;\n  --stone-95: #19191e;\n  --stone-90: #26262b;\n  --stone-85: #323239;\n  --stone-80: #3f3f46;\n  --stone-60: #54545d;\n  --stone-50: #7c7c83;\n  --stone-40: #a4a4ab;\n  --stone-20: #cccccf;\n  --stone-10: #e6e6e9;\n  --stone-05: #f0f0f1;\n  --stone-02: #fafafa;\n\n  /* Chalk Scale */\n  --chalk-98: #150702;\n  --chalk-95: #1c1917;\n  --chalk-90: #292524;\n  --chalk-85: #373230;\n  --chalk-80: #433f3b;\n  --chalk-60: #595550;\n  --chalk-50: #827c78;\n  --chalk-40: #a9a3a0;\n  --chalk-20: #cfcbc9;\n  --chalk-10: #e8e6e5;\n  --chalk-05: #f0f0ee;\n  --chalk-02: #fafaf9;\n\n  /* Slate Scale */\n  --slate-98: #050b17;\n  --slate-95: #161a21;\n  --slate-90: #202730;\n  --slate-85: #2b333f;\n  --slate-80: #38404d;\n  --slate-60: #4d5663;\n  --slate-50: #777d8a;\n  --slate-40: #9fa5ae;\n  --slate-20: #c7ccd4;\n  --slate-10: #e4e6ea;\n  --slate-05: #eef0f3;\n  --slate-02: #f9fafb;\n}\n\n/* Brand Colors */\n:root {\n  /* Onyx / Ink */\n  --onyx-ink-100: #000000;\n  --onyx-ink-95: #1c1c1c;\n  --onyx-ink-90: #333333;\n\n  /* Onyx / Chrome */\n  --onyx-chrome-20: #cacaca;\n  --onyx-chrome-10: #e9e9e9;\n  --onyx-chrome-00: #ffffff;\n\n  /* Tint (referencing Stone variables) */\n  --tint-98: var(--stone-98);\n  --tint-95: var(--stone-95);\n  --tint-90: var(--stone-90);\n  --tint-85: var(--stone-85);\n  --tint-80: var(--stone-80);\n  --tint-60: var(--stone-60);\n  --tint-50: var(--stone-50);\n  --tint-40: var(--stone-40);\n  --tint-20: var(--stone-20);\n  --tint-10: var(--stone-10);\n  --tint-05: var(--stone-05);\n  --tint-02: var(--stone-02);\n}\n\n/* Light Colors */\n:root {\n  /* Shimmer colors for loading animations */\n  --shimmer-base: #a3a3a3;\n  --shimmer-highlight: #000000;\n\n  /* Text */\n  --text-05: var(--alpha-grey-100-90);\n  --text-04: var(--alpha-grey-100-75);\n  --text-03: var(--alpha-grey-100-55);\n  --text-02: var(--alpha-grey-100-45);\n  --text-01: var(--alpha-grey-100-20);\n  --text-inverted-01: var(--alpha-grey-00-20);\n  --text-inverted-02: var(--alpha-grey-00-45);\n  --text-inverted-03: var(--alpha-grey-00-60);\n  --text-inverted-04: var(--alpha-grey-00-85);\n  --text-inverted-05: var(--alpha-grey-00-95);\n  --text-light-03: var(--alpha-grey-00-60);\n  --text-light-05: var(--grey-00);\n  --text-dark-03: var(--alpha-grey-100-55);\n  --text-dark-05: var(--grey-100);\n\n  /* Background / Neutral */\n  --background-neutral-00: var(--grey-00);\n  --background-neutral-01: var(--grey-02);\n  --background-neutral-02: var(--grey-06);\n  --background-neutral-03: var(--grey-10);\n  --background-neutral-04: var(--grey-20);\n  --background-neutral-inverted-04: var(--grey-60);\n  --background-neutral-inverted-03: var(--grey-75);\n  --background-neutral-inverted-02: var(--grey-85);\n  --background-neutral-inverted-01: var(--grey-90);\n  --background-neutral-inverted-00: var(--grey-100);\n  --background-neutral-light-00: var(--grey-00);\n  --background-neutral-light-03: var(--grey-10);\n  --background-neutral-dark-03: var(--grey-80);\n\n  /* Background / Tint */\n  --background-tint-00: var(--grey-00);\n  --background-tint-01: var(--tint-02);\n  --background-tint-02: var(--tint-05);\n  --background-tint-03: var(--tint-10);\n  --background-tint-04: var(--tint-20);\n  --background-tint-inverted-04: var(--tint-60);\n  --background-tint-inverted-03: var(--tint-85);\n  --background-tint-inverted-02: var(--tint-90);\n  --background-tint-inverted-01: var(--tint-95);\n  --background-tint-inverted-00: var(--grey-100);\n  --background-tint-light-01: var(--tint-02);\n  --background-tint-dark-01: var(--tint-95);\n\n  /* Border */\n  --border-01: var(--grey-10);\n  --border-02: var(--grey-20);\n  --border-03: var(--grey-40);\n  --border-04: var(--grey-50);\n  --border-05: var(--grey-100);\n  --border-inverted-05: var(--grey-00);\n  --border-inverted-04: var(--grey-30);\n  --border-inverted-03: var(--grey-50);\n  --border-inverted-02: var(--grey-60);\n  --border-inverted-01: var(--grey-80);\n\n  /* Theme */\n  --theme-primary-06: var(--onyx-ink-100);\n  --theme-primary-05: var(--onyx-ink-95);\n  --theme-primary-04: var(--onyx-ink-90);\n\n  /* Theme / Gradient */\n  --theme-gradient-05: var(--tint-50);\n  --theme-gradient-00: var(--grey-100);\n\n  /* Theme / Red */\n  --theme-red-05: var(--red-50);\n  --theme-red-04: var(--red-50);\n  --theme-red-02: var(--red-20);\n  --theme-red-01: var(--red-05);\n\n  /* Theme / Orange */\n  --theme-orange-05: var(--orange-55);\n  --theme-orange-04: var(--orange-50);\n  --theme-orange-02: var(--orange-20);\n  --theme-orange-01: var(--orange-05);\n\n  /* Theme / Amber */\n  --theme-amber-05: var(--neon-amber-50);\n  --theme-amber-04: var(--neon-amber);\n  --theme-amber-02: var(--neon-amber-20);\n  --theme-amber-01: var(--neon-amber-05);\n\n  /* Theme / Yellow */\n  --theme-yellow-05: var(--neon-yellow-50);\n  --theme-yellow-02: var(--neon-yellow-20);\n  --theme-yellow-01: var(--neon-yellow-05);\n\n  /* Theme / Green */\n  --theme-green-05: var(--green-60);\n  --theme-green-02: var(--green-20);\n  --theme-green-01: var(--green-05);\n\n  /* Theme / Lime */\n  --theme-lime-05: var(--neon-lime-60);\n  --theme-lime-02: var(--neon-lime-20);\n  --theme-lime-01: var(--neon-lime-05);\n\n  /* Theme / Cyan */\n  --theme-cyan-05: var(--neon-cyan-50);\n  --theme-cyan-02: var(--neon-cyan-20);\n  --theme-cyan-01: var(--neon-cyan-05);\n\n  /* Theme / Sky */\n  --theme-sky-05: var(--neon-sky-50);\n  --theme-sky-02: var(--neon-sky-20);\n  --theme-sky-01: var(--neon-sky-05);\n\n  /* Theme / Blue */\n  --theme-blue-05: var(--blue-50);\n  --theme-blue-02: var(--blue-20);\n  --theme-blue-01: var(--blue-05);\n\n  /* Theme / Purple */\n  --theme-purple-05: var(--purple-50);\n  --theme-purple-02: var(--purple-20);\n  --theme-purple-01: var(--purple-05);\n\n  /* Theme / Magenta */\n  --theme-magenta-05: var(--neon-magenta-50);\n  --theme-magenta-02: var(--neon-magenta-20);\n  --theme-magenta-01: var(--neon-magenta-05);\n\n  /* Status */\n  --status-success-05: var(--green-50);\n  --status-success-02: var(--green-20);\n  --status-success-01: var(--green-05);\n  --status-success-00: var(--green-01);\n  --status-info-05: var(--blue-50);\n  --status-info-02: var(--blue-20);\n  --status-info-01: var(--blue-05);\n  --status-info-00: var(--blue-01);\n  --status-warning-05: var(--orange-50);\n  --status-warning-02: var(--orange-20);\n  --status-warning-01: var(--orange-05);\n  --status-warning-00: var(--orange-01);\n  --status-error-05: var(--red-50);\n  --status-error-02: var(--red-20);\n  --status-error-01: var(--red-05);\n  --status-error-00: var(--red-01);\n\n  /* Status / Text */\n  --status-text-success-05: var(--green-60);\n  --status-text-info-05: var(--blue-50);\n  --status-text-warning-05: var(--orange-55);\n  --status-text-error-05: var(--red-50);\n\n  /* Action */\n  --action-link-06: var(--blue-60);\n  --action-link-05: var(--blue-50);\n  --action-link-04: var(--blue-40);\n  --action-link-03: var(--blue-20);\n  --action-link-02: var(--blue-10);\n  --action-link-01: var(--blue-05);\n  --action-link-00: var(--blue-01);\n  --action-danger-06: var(--red-60);\n  --action-danger-05: var(--red-50);\n  --action-danger-04: var(--red-40);\n  --action-danger-03: var(--red-20);\n  --action-danger-02: var(--red-10);\n  --action-danger-01: var(--red-05);\n\n  /* Action / Text */\n  --action-text-link-05: var(--blue-50);\n  --action-text-danger-05: var(--red-50);\n\n  /* Background / Code */\n  --background-code-01: var(--grey-02);\n\n  /* Code */\n  --code-code: var(--alpha-grey-100-85);\n  --code-comment: var(--alpha-grey-100-35);\n  --code-keyword: var(--purple-50);\n  --code-string: var(--green-60);\n  --code-number: var(--blue-50);\n  --code-definition: var(--orange-55);\n\n  /* Highlight */\n  --highlight-match: var(--neon-yellow-a30);\n  --highlight-selection: var(--neon-sky-a30);\n  --highlight-active: var(--neon-amber-a60);\n  --highlight-accent: var(--neon-magenta-a60);\n\n  /* Shadow */\n  --shadow-01: var(--alpha-grey-100-05);\n  --shadow-02: var(--alpha-grey-100-10);\n  --shadow-03: var(--alpha-grey-100-20);\n\n  /* Mask */\n  --mask-01: var(--alpha-grey-00-10);\n  --mask-02: var(--alpha-grey-100-20);\n  --mask-03: var(--alpha-grey-100-40);\n\n  /* Frost Overlay (for FrostedDiv component) - lighter in light mode */\n  --frost-overlay: var(--alpha-grey-00-10);\n\n  /* Scrollbar */\n  --scrollbar-track: transparent;\n  --scrollbar-thumb: var(--alpha-grey-100-20);\n}\n\n/* Dark Colors */\n.dark {\n  /* Shimmer colors for loading animations */\n  --shimmer-base: #5c5c5c;\n  --shimmer-highlight: #ffffff;\n\n  /* Text */\n  --text-05: var(--alpha-grey-00-95);\n  --text-04: var(--alpha-grey-00-85);\n  --text-03: var(--alpha-grey-00-60);\n  --text-02: var(--alpha-grey-00-45);\n  --text-01: var(--alpha-grey-00-20);\n  --text-inverted-01: var(--alpha-grey-100-20);\n  --text-inverted-02: var(--alpha-grey-100-45);\n  --text-inverted-03: var(--alpha-grey-100-55);\n  --text-inverted-04: var(--alpha-grey-100-75);\n  --text-inverted-05: var(--alpha-grey-100-90);\n  --text-light-03: var(--alpha-grey-00-60);\n  --text-light-05: var(--grey-00);\n  --text-dark-03: var(--alpha-grey-100-55);\n  --text-dark-05: var(--grey-100);\n\n  /* Background / Neutral */\n  --background-neutral-00: var(--grey-100);\n  --background-neutral-01: var(--grey-90);\n  --background-neutral-02: var(--grey-85);\n  --background-neutral-03: var(--grey-80);\n  --background-neutral-04: var(--grey-75);\n  --background-neutral-inverted-04: var(--grey-20);\n  --background-neutral-inverted-03: var(--grey-10);\n  --background-neutral-inverted-02: var(--grey-06);\n  --background-neutral-inverted-01: var(--grey-02);\n  --background-neutral-inverted-00: var(--grey-00);\n  --background-neutral-light-00: var(--grey-00);\n  --background-neutral-light-03: var(--grey-10);\n  --background-neutral-dark-03: var(--grey-80);\n\n  /* Background / Tint */\n  --background-tint-00: var(--grey-100);\n  --background-tint-01: var(--tint-95);\n  --background-tint-02: var(--tint-90);\n  --background-tint-03: var(--tint-85);\n  --background-tint-04: var(--tint-80);\n  --background-tint-inverted-04: var(--tint-20);\n  --background-tint-inverted-03: var(--tint-10);\n  --background-tint-inverted-02: var(--tint-05);\n  --background-tint-inverted-01: var(--tint-02);\n  --background-tint-inverted-00: var(--grey-00);\n  --background-tint-light-01: var(--tint-02);\n  --background-tint-dark-01: var(--tint-95);\n\n  /* Border */\n  --border-01: var(--grey-80);\n  --border-02: var(--grey-60);\n  --border-03: var(--grey-50);\n  --border-04: var(--grey-30);\n  --border-05: var(--grey-00);\n  --border-inverted-05: var(--grey-100);\n  --border-inverted-04: var(--grey-50);\n  --border-inverted-03: var(--grey-40);\n  --border-inverted-02: var(--grey-20);\n  --border-inverted-01: var(--grey-10);\n\n  /* Theme */\n  --theme-primary-06: var(--onyx-chrome-00);\n  --theme-primary-05: var(--onyx-chrome-10);\n  --theme-primary-04: var(--onyx-chrome-20);\n\n  /* Theme / Gradient */\n  --theme-gradient-05: var(--grey-100);\n  --theme-gradient-00: var(--grey-00);\n\n  /* Theme / Red */\n  --theme-red-05: var(--red-45);\n  --theme-red-04: var(--red-50);\n  --theme-red-02: var(--red-80);\n  --theme-red-01: var(--red-90);\n\n  /* Theme / Orange */\n  --theme-orange-05: var(--orange-40);\n  --theme-orange-04: var(--orange-50);\n  --theme-orange-02: var(--orange-80);\n  --theme-orange-01: var(--orange-90);\n\n  /* Theme / Amber */\n  --theme-amber-05: var(--neon-amber);\n  --theme-amber-04: var(--neon-amber-60);\n  --theme-amber-02: var(--neon-amber-80);\n  --theme-amber-01: var(--neon-amber-90);\n\n  /* Theme / Yellow */\n  --theme-yellow-05: var(--neon-yellow);\n  --theme-yellow-02: var(--neon-yellow-80);\n  --theme-yellow-01: var(--neon-yellow-90);\n\n  /* Theme / Green */\n  --theme-green-05: var(--green-50);\n  --theme-green-02: var(--green-80);\n  --theme-green-01: var(--green-90);\n\n  /* Theme / Lime */\n  --theme-lime-05: var(--neon-lime);\n  --theme-lime-02: var(--neon-lime-80);\n  --theme-lime-01: var(--neon-lime-90);\n\n  /* Theme / Cyan */\n  --theme-cyan-05: var(--neon-cyan);\n  --theme-cyan-02: var(--neon-cyan-80);\n  --theme-cyan-01: var(--neon-cyan-90);\n\n  /* Theme / Sky */\n  --theme-sky-05: var(--neon-sky);\n  --theme-sky-02: var(--neon-sky-80);\n  --theme-sky-01: var(--neon-sky-90);\n\n  /* Theme / Blue */\n  --theme-blue-05: var(--blue-45);\n  --theme-blue-02: var(--blue-80);\n  --theme-blue-01: var(--blue-90);\n\n  /* Theme / Purple */\n  --theme-purple-05: var(--purple-45);\n  --theme-purple-02: var(--purple-80);\n  --theme-purple-01: var(--purple-90);\n\n  /* Theme / Magenta */\n  --theme-magenta-05: var(--neon-magenta);\n  --theme-magenta-02: var(--neon-magenta-80);\n  --theme-magenta-01: var(--neon-magenta-90);\n\n  /* Status */\n  --status-success-05: var(--green-50);\n  --status-success-02: var(--green-80);\n  --status-success-01: var(--green-90);\n  --status-success-00: var(--green-95);\n  --status-info-05: var(--blue-50);\n  --status-info-02: var(--blue-80);\n  --status-info-01: var(--blue-90);\n  --status-info-00: var(--blue-95);\n  --status-warning-05: var(--orange-50);\n  --status-warning-02: var(--orange-80);\n  --status-warning-01: var(--orange-90);\n  --status-warning-00: var(--orange-95);\n  --status-error-05: var(--red-50);\n  --status-error-02: var(--red-80);\n  --status-error-01: var(--red-90);\n  --status-error-00: var(--red-95);\n\n  /* Status / Text */\n  --status-text-success-05: var(--green-50);\n  --status-text-info-05: var(--blue-45);\n  --status-text-warning-05: var(--orange-50);\n  --status-text-error-05: var(--red-45);\n\n  /* Action */\n  --action-link-06: var(--blue-40);\n  --action-link-05: var(--blue-50);\n  --action-link-04: var(--blue-60);\n  --action-link-03: var(--blue-80);\n  --action-link-02: var(--blue-85);\n  --action-link-01: var(--blue-90);\n  --action-link-00: var(--blue-95);\n  --action-danger-06: var(--red-40);\n  --action-danger-05: var(--red-50);\n  --action-danger-04: var(--red-60);\n  --action-danger-03: var(--red-80);\n  --action-danger-02: var(--red-85);\n  --action-danger-01: var(--red-90);\n\n  /* Action / Text */\n  --action-text-link-05: var(--blue-45);\n  --action-text-danger-05: var(--red-45);\n\n  /* Background / Code */\n  --background-code-01: #151617;\n\n  /* Code */\n  --code-code: var(--alpha-grey-00-85);\n  --code-comment: var(--alpha-grey-00-45);\n  --code-keyword: var(--purple-45);\n  --code-string: var(--green-50);\n  --code-number: var(--blue-45);\n  --code-definition: var(--orange-50);\n\n  /* Highlight */\n  --highlight-match: var(--neon-yellow-a30);\n  --highlight-selection: var(--neon-sky-a30);\n  --highlight-active: var(--neon-amber-a60);\n  --highlight-accent: var(--neon-magenta-a60);\n\n  /* Shadow */\n  --shadow-01: var(--alpha-grey-00-05);\n  --shadow-02: var(--alpha-grey-00-10);\n  --shadow-03: var(--alpha-grey-00-20);\n\n  /* Mask */\n  --mask-01: var(--alpha-grey-00-10);\n  --mask-02: var(--alpha-grey-100-20);\n  --mask-03: var(--alpha-grey-100-40);\n\n  /* Frost Overlay (for FrostedDiv component) - darker in dark mode */\n  --frost-overlay: var(--alpha-grey-100-10);\n\n  /* Scrollbar */\n  --scrollbar-track: transparent;\n  --scrollbar-thumb: var(--alpha-grey-00-20);\n}\n"
  },
  {
    "path": "web/src/app/css/divider.css",
    "content": "/* =============================================================================\n   Divider Keyboard Navigation Overrides\n   Disable hover effects when keyboard navigation is active\n   ============================================================================= */\n[data-keyboard-nav=\"true\"] .group\\/divider:hover {\n  background-color: transparent !important;\n}\n\n[data-keyboard-nav=\"true\"] .group\\/divider[data-selected=\"true\"] {\n  background-color: var(--background-tint-02) !important;\n}\n"
  },
  {
    "path": "web/src/app/css/general-layouts.css",
    "content": "/* LineItemLayout */\n.line-item-layout {\n  @apply grid;\n  column-gap: 0.5rem;\n  grid-template-columns: 1fr;\n}\n\n.line-item-layout[data-reduced-padding=\"true\"] {\n  @apply p-2;\n}\n\n.line-item-layout[data-has-icon=\"true\"] {\n  grid-template-columns: auto 1fr;\n}\n\n.line-item-layout[data-loading=\"true\"] {\n  row-gap: 0.25rem;\n}\n\n/* LineItemLayout Icon */\n.line-item-layout-icon {\n  @apply self-center stroke-text-04;\n}\n\n.line-item-layout[data-variant=\"tertiary-muted\"] .line-item-layout-icon,\n.line-item-layout[data-variant=\"mini\"] .line-item-layout-icon {\n  @apply stroke-text-03;\n}\n\n.line-item-layout-title {\n  @apply text-left;\n}\n\n.line-item-layout[data-strikethrough=\"true\"] .line-item-layout-title {\n  @apply line-through;\n}\n\n/* LineItemLayout Description */\n.line-item-layout-description {\n  @apply leading-none text-left;\n}\n\n.line-item-layout[data-has-icon=\"true\"] .line-item-layout-description {\n  @apply col-start-2;\n}\n\n/* LineItemLayout Skeleton */\n.line-item-layout-skeleton-title {\n  @apply h-4 bg-background-neutral-01 rounded-08 w-1/3 animate-pulse;\n}\n\n.line-item-layout-skeleton-description {\n  @apply h-6 bg-background-neutral-01 rounded-08 w-2/3 animate-pulse;\n}\n\n.line-item-layout-skeleton-right {\n  @apply h-5 w-10 bg-background-neutral-01 rounded-full animate-pulse;\n}\n"
  },
  {
    "path": "web/src/app/css/inputs.css",
    "content": "/* Input styling */\n.input-normal {\n  background-color: var(--background-neutral-00);\n  border: 1px solid var(--border-01);\n}\n.input-normal:hover {\n  border-color: var(--border-02);\n}\n.input-normal:active {\n  border-color: var(--border-05);\n}\n.input-normal:focus:not(:active),\n.input-normal:focus-within:not(:active) {\n  border-color: var(--border-05);\n  box-shadow: inset 0px 0px 0px 2px var(--background-tint-04);\n}\n\n.input-error {\n  background-color: var(--background-neutral-00);\n  border: 1px solid var(--status-error-05);\n}\n.input-error:focus:not(:active),\n.input-error:focus-within:not(:active) {\n  box-shadow: inset 0px 0px 0px 2px var(--background-tint-04);\n}\n\n.input-disabled {\n  background-color: var(--background-neutral-03);\n  border: 1px solid transparent;\n  cursor: not-allowed;\n}\n"
  },
  {
    "path": "web/src/app/css/knowledge-table.css",
    "content": "/* ============================================================================\n   Table Layout Components\n   Based on Figma: Table/Cell component specs\n   ============================================================================ */\n\n/* Table Row Layout\n   Figma specs:\n   - Regular size: min-height 36px (2.25rem)\n   - Padding: 8px (0.5rem) vertical\n   - Gap: 4px (0.25rem) between items\n   - Border radius: 8px on hover\n*/\n.table-row-layout {\n  display: flex;\n  flex-direction: row;\n  align-items: center;\n  gap: 0.25rem;\n  min-height: 2.25rem;\n  padding: 0.5rem 0;\n  border-radius: var(--border-radius-08);\n  width: 100%;\n  min-width: 0;\n}\n\n.table-row-layout.cursor-pointer {\n  cursor: pointer;\n}\n\n.table-row-layout:hover {\n  background-color: var(--background-tint-01);\n}\n\n.table-row-layout[data-selected=\"true\"] {\n  background-color: var(--action-link-01);\n}\n\n/* Table Cell Layout\n   Figma specs:\n   - Gap: 4px (0.25rem) internal\n   - Text padding: 2px (0.125rem) horizontal\n   - min-width: 1px to allow truncation\n*/\n.table-cell-layout {\n  display: flex;\n  flex: 0 0 auto;\n  gap: 0.25rem;\n  align-items: center;\n  min-width: 1px;\n  min-height: 1px;\n  overflow: hidden;\n}\n\n.table-cell-layout[data-flex=\"true\"] {\n  flex: 1 0 0;\n}\n\n.table-cell-layout[data-fixed=\"true\"] {\n  flex-shrink: 0;\n}\n\n/* Sidebar Layout\n   Fixed-width navigation sidebar\n   Figma specs:\n   - Width: 200px (12.5rem)\n   - Gap: 4px (0.25rem)\n*/\n.sidebar-layout {\n  display: flex;\n  flex-direction: column;\n  width: 12.5rem;\n  flex-shrink: 0;\n  gap: 0.25rem;\n}\n\n/* Fix for Truncated text containers in sidebar to prevent vertical clipping\n   The Truncated component wrapper has overflow-hidden which clips text\n   if the container doesn't have sufficient height for the line-height.\n   font-secondary-body uses line-height: 16px (1rem), but we add slightly more\n   (1.125rem = 18px) to accommodate text descenders (like 'p', 'g', 'd').\n*/\n.sidebar-layout .flex-grow.overflow-hidden {\n  min-height: 1.125rem;\n}\n\n/* Two Column Layout\n   Container for sidebar + content pattern\n*/\n.two-column-layout {\n  display: flex;\n  flex-direction: row;\n  gap: 0.5rem;\n  min-width: 0;\n  overflow: hidden;\n}\n\n/* Content Column Layout\n   Main content area that fills remaining space\n   Content should align to the top (start)\n*/\n.content-column-layout {\n  display: flex;\n  flex-direction: column;\n  flex: 1 0 0;\n  min-width: 1px;\n  min-height: 1px;\n  overflow: hidden;\n  justify-content: flex-start;\n}\n\n/* Hidden Input\n   For file uploads and other hidden inputs\n*/\n.hidden-input {\n  display: none;\n}\n\n/* Checkbox Cell\n   Figma specs:\n   - Width: 24px (1.5rem)\n   - Centered content\n*/\n.checkbox-cell-layout {\n  display: flex;\n  align-items: center;\n  justify-content: center;\n  width: 1.5rem;\n  min-width: 1.5rem;\n  flex-shrink: 0;\n}\n\n/* Source Icons Row\n   Row of source type icons\n   Figma specs:\n   - Gap: 4px (0.25rem)\n   - Icon size: 16px (1rem)\n*/\n.source-icons-layout {\n  display: flex;\n  align-items: center;\n  gap: 0.25rem;\n}\n\n.source-icons-layout > svg {\n  width: 1rem;\n  height: 1rem;\n  flex-shrink: 0;\n}\n"
  },
  {
    "path": "web/src/app/css/line-item.css",
    "content": "/* LineItem Button Variants */\n/* Hover styles are disabled when keyboard navigation is active (data-keyboard-nav on parent) */\n.line-item-button-main {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-main-emphasized {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n\n  &[data-selected=\"true\"] {\n    @apply bg-action-link-01;\n  }\n\n  /* Ensure selected wins over keyboard-nav hover override */\n  [data-keyboard-nav=\"true\"] &[data-selected=\"true\"] {\n    @apply bg-action-link-01;\n  }\n}\n\n.line-item-button-strikethrough {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-strikethrough-emphasized {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-disabled {\n  @apply bg-transparent cursor-not-allowed;\n}\n\n.line-item-button-disabled-emphasized {\n  @apply bg-transparent cursor-not-allowed;\n}\n\n.line-item-button-danger {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-danger-emphasized {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n\n  &[data-selected=\"true\"] {\n    @apply bg-status-error-01;\n  }\n\n  /* Ensure selected wins over keyboard-nav hover override */\n  [data-keyboard-nav=\"true\"] &[data-selected=\"true\"] {\n    @apply bg-status-error-01;\n  }\n}\n\n/* Action Variant - same background behavior as main */\n.line-item-button-action {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-action-emphasized {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n\n  &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n\n  /* Ensure selected wins over keyboard-nav hover override */\n  [data-keyboard-nav=\"true\"] &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n}\n\n/* Muted Variant - subdued styling for less prominent items */\n.line-item-button-muted {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-muted-emphasized {\n  @apply bg-transparent hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n\n  &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n\n  /* Ensure selected wins over keyboard-nav hover override */\n  [data-keyboard-nav=\"true\"] &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n}\n\n/* Skeleton Variant - dashed border placeholder style */\n.line-item-button-skeleton {\n  @apply bg-transparent border border-dashed border-border-01 hover:bg-background-tint-01;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n}\n\n.line-item-button-skeleton-emphasized {\n  @apply bg-transparent border border-dashed border-border-01 hover:bg-background-tint-02;\n\n  [data-keyboard-nav=\"true\"] &:hover {\n    @apply bg-transparent;\n  }\n\n  &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n\n  /* Ensure selected wins over keyboard-nav hover override */\n  [data-keyboard-nav=\"true\"] &[data-selected=\"true\"] {\n    @apply bg-background-tint-02;\n  }\n}\n\n/* LineItem Text Variants */\n.line-item-text-main {\n  color: var(--text-04) !important;\n\n  .group\\/LineItem[data-selected=\"true\"] & {\n    color: var(--action-link-05) !important;\n  }\n}\n\n.line-item-text-disabled {\n  color: var(--text-01) !important;\n}\n\n.line-item-text-strikethrough {\n  color: var(--text-02) !important;\n  @apply line-through decoration-2 !important;\n}\n\n.line-item-text-danger {\n  color: var(--status-error-05) !important;\n}\n\n.line-item-text-action {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 14px;\n  font-weight: 600;\n  line-height: 20px;\n  letter-spacing: 0px;\n  color: var(--text-04) !important;\n}\n\n.line-item-text-muted {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 14px;\n  font-weight: 500;\n  line-height: 20px;\n  letter-spacing: 0px;\n  color: var(--text-03) !important;\n\n  .group\\/LineItem[data-selected=\"true\"] & {\n    color: var(--text-03) !important;\n  }\n}\n\n.line-item-text-skeleton {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 12px;\n  font-weight: 400;\n  line-height: 16px;\n  letter-spacing: 0px;\n  color: var(--text-03) !important;\n}\n\n/* LineItem Icon Variants */\n.line-item-icon-main {\n  @apply stroke-text-03;\n\n  .group\\/LineItem[data-selected=\"true\"] & {\n    @apply stroke-action-link-05;\n  }\n}\n\n.line-item-icon-strikethrough {\n  @apply stroke-text-03;\n}\n\n.line-item-icon-disabled {\n  @apply stroke-text-01;\n}\n\n.line-item-icon-danger {\n  @apply stroke-status-error-05;\n}\n\n.line-item-icon-action {\n  @apply stroke-text-03;\n}\n\n.line-item-icon-muted {\n  @apply stroke-text-02;\n\n  .group\\/LineItem[data-selected=\"true\"] & {\n    @apply stroke-text-02;\n  }\n}\n\n.line-item-icon-skeleton {\n  @apply stroke-text-02;\n}\n"
  },
  {
    "path": "web/src/app/css/sizes.css",
    "content": ":root {\n  --app-page-main-content-width: 52.5rem;\n  --block-width-form-input-min: 10rem;\n\n  --container-sm: 42rem;\n  --container-sm-md: 47rem;\n  --container-md: 54.5rem;\n  --container-lg: 62rem;\n  --container-full: 100%;\n}\n"
  },
  {
    "path": "web/src/app/css/square-button.css",
    "content": ".square-button {\n  /* Base styles */\n  position: relative;\n  display: inline-flex;\n  align-items: center;\n  justify-content: center;\n  aspect-ratio: 1 / 1;\n  border-radius: var(--radius-08);\n  padding: 0.5rem;\n  background-color: var(--background-tint-01);\n}\n\n.square-button:hover {\n  background-color: var(--background-tint-02);\n}\n\n.square-button:active {\n  background-color: var(--background-tint-03);\n}\n\n.square-button:disabled {\n  cursor: not-allowed;\n  opacity: 0.5;\n}\n\n/* Transient state */\n.square-button[data-state=\"transient\"] {\n  border: 1px solid var(--action-link-05);\n  background-color: var(--action-link-00);\n}\n\n.square-button[data-state=\"transient\"]:hover {\n  background-color: var(--action-link-01);\n}\n\n.square-button[data-state=\"transient\"]:active {\n  background-color: var(--action-link-02);\n}\n"
  },
  {
    "path": "web/src/app/css/switch.css",
    "content": ".switch-normal {\n  background-color: var(--background-tint-03);\n  border: 1px solid transparent;\n}\n.switch-normal:hover {\n  background-color: var(--background-tint-04);\n}\n.switch-normal:focus,\n.switch-normal:focus-within {\n  border-color: var(--background-tint-04);\n}\n.switch-normal:focus:hover,\n.switch-normal:focus-within:hover {\n  border-color: var(--border-01);\n}\n\n.switch-normal-checked {\n  background-color: var(--action-link-05);\n  border: 1px solid transparent;\n}\n.switch-normal-checked:hover {\n  background-color: var(--action-link-04);\n}\n.switch-normal-checked:focus,\n.switch-normal-checked:focus-within {\n  border-color: var(--action-link-04);\n}\n.switch-normal-checked:focus:hover,\n.switch-normal-checked:focus-within:hover {\n  border-color: var(--border-01);\n}\n\n.switch-disabled {\n  background-color: var(--background-neutral-04);\n  border: 1px solid transparent;\n  cursor: not-allowed !important;\n}\n.switch-disabled-checked {\n  background-color: var(--action-link-03);\n  border: 1px solid transparent;\n  cursor: not-allowed !important;\n}\n\n.switch-thumb {\n  background-color: var(--background-neutral-light-00);\n}\n.switch-thumb-disabled {\n  background-color: var(--background-neutral-03);\n}\n"
  },
  {
    "path": "web/src/app/css/z-index.css",
    "content": ":root {\n  /* Base layers */\n  --z-base: 0;\n  --z-content: 1;\n  /* Settings header must sit above sticky table headers (--z-sticky: 10) so\n     the page header scrolls over pinned columns without being obscured. */\n  --z-settings-header: 11;\n  --z-app-layout: 9;\n  --z-sticky: 10;\n\n  /* Interactive overlays */\n  --z-modal-overlay: 900;\n  --z-modal: 1000;\n  --z-toast: 1100;\n  --z-popover: 1200;\n  --z-tooltip: 1300;\n}\n\n/* Base layers */\n.z-base {\n  z-index: var(--z-base);\n}\n.z-content {\n  z-index: var(--z-content);\n}\n.z-settings-header {\n  z-index: var(--z-settings-header);\n}\n.z-app-layout {\n  z-index: var(--z-app-layout);\n}\n.z-sticky {\n  z-index: var(--z-sticky);\n}\n\n/* Interactive overlays */\n.z-modal-overlay {\n  z-index: var(--z-modal-overlay);\n}\n.z-modal {\n  z-index: var(--z-modal);\n}\n.z-toast {\n  z-index: var(--z-toast);\n}\n.z-popover {\n  z-index: var(--z-popover);\n}\n.z-tooltip {\n  z-index: var(--z-tooltip);\n}\n"
  },
  {
    "path": "web/src/app/ee/EEFeatureRedirect.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { toast } from \"@/hooks/useToast\";\n\nexport default function EEFeatureRedirect() {\n  const router = useRouter();\n\n  useEffect(() => {\n    toast.error(\n      \"This feature requires a license. Please upgrade your plan to access.\"\n    );\n    router.replace(\"/app\");\n  }, [router]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/app/ee/LICENSE",
    "content": "The Onyx Enterprise License (the \"Enterprise License\")\nCopyright (c) 2023-present DanswerAI, Inc.\n\nWith regard to the Onyx Software:\n\nThis software and associated documentation files (the \"Software\") may only be\nused in production, if you (and any entity that you represent) have agreed to,\nand are in compliance with, the Onyx Subscription Terms of Service, available\nat https://www.onyx.app/legal/self-host (the \"Enterprise Terms\"), or other\nagreement governing the use of the Software, as agreed by you and DanswerAI,\nand otherwise have a valid Onyx Enterprise License for the\ncorrect number of user seats. Subject to the foregoing sentence, you are free to\nmodify this Software and publish patches to the Software. You agree that DanswerAI\nand/or its licensors (as applicable) retain all right, title and interest in and\nto all such modifications and/or patches, and all such modifications and/or\npatches may only be used, copied, modified, displayed, distributed, or otherwise\nexploited with a valid Onyx Enterprise License for the correct\nnumber of user seats. Notwithstanding the foregoing, you may copy and modify\nthe Software for development and testing purposes, without requiring a\nsubscription. You agree that DanswerAI and/or its licensors (as applicable) retain\nall right, title and interest in and to all such modifications. You are not\ngranted any other rights beyond what is expressly stated herein. Subject to the\nforegoing, it is forbidden to copy, merge, publish, distribute, sublicense,\nand/or sell the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\nFor all third party components incorporated into the Onyx Software, those\ncomponents are licensed under the original license provided by the owner of the\napplicable component.\n"
  },
  {
    "path": "web/src/app/ee/admin/billing/BillingAlerts.tsx",
    "content": "import React from \"react\";\nimport { Alert, AlertDescription, AlertTitle } from \"@/components/ui/alert\";\nimport { CircleAlert, Info } from \"lucide-react\";\nimport { BillingInformation, BillingStatus } from \"@/lib/billing/interfaces\";\n\nexport function BillingAlerts({\n  billingInformation,\n}: {\n  billingInformation: BillingInformation;\n}) {\n  const isTrialing = billingInformation.status === BillingStatus.TRIALING;\n  const isCancelled = billingInformation.cancel_at_period_end;\n  const isExpired = billingInformation.current_period_end\n    ? new Date(billingInformation.current_period_end) < new Date()\n    : false;\n  const noPaymentMethod = !billingInformation.payment_method_enabled;\n\n  const messages: string[] = [];\n\n  if (isExpired) {\n    messages.push(\n      \"Your subscription has expired. Please resubscribe to continue using the service.\"\n    );\n  }\n  if (isCancelled && !isExpired && billingInformation.current_period_end) {\n    messages.push(\n      `Your subscription will cancel on ${new Date(\n        billingInformation.current_period_end\n      ).toLocaleDateString()}. You can resubscribe before this date to remain uninterrupted.`\n    );\n  }\n  if (isTrialing) {\n    messages.push(\n      `You're currently on a trial. Your trial ends on ${\n        billingInformation.trial_end\n          ? new Date(billingInformation.trial_end).toLocaleDateString()\n          : \"N/A\"\n      }.`\n    );\n  }\n  if (noPaymentMethod) {\n    messages.push(\n      \"You currently have no payment method on file. Please add one to avoid service interruption.\"\n    );\n  }\n\n  const variant = isExpired || noPaymentMethod ? \"destructive\" : \"default\";\n\n  if (messages.length === 0) return null;\n\n  return (\n    <Alert variant={variant}>\n      <AlertTitle className=\"flex items-center space-x-2\">\n        {variant === \"destructive\" ? (\n          <CircleAlert className=\"h-4 w-4\" />\n        ) : (\n          <Info className=\"h-4 w-4\" />\n        )}\n        <span>\n          {variant === \"destructive\"\n            ? \"Important Subscription Notice\"\n            : \"Subscription Notice\"}\n        </span>\n      </AlertTitle>\n      <AlertDescription>\n        <ul className=\"list-disc list-inside space-y-1 mt-2\">\n          {messages.map((msg, idx) => (\n            <li key={idx}>{msg}</li>\n          ))}\n        </ul>\n      </AlertDescription>\n    </Alert>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/billing/BillingInformationPage.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  createCustomerPortalSession,\n  useBillingInformation,\n  hasActiveSubscription,\n} from \"@/lib/billing\";\n\nimport {\n  Card,\n  CardContent,\n  CardDescription,\n  CardHeader,\n  CardTitle,\n} from \"@/components/ui/card\";\nimport { Button } from \"@opal/components\";\nimport { SubscriptionSummary } from \"./SubscriptionSummary\";\nimport { BillingAlerts } from \"./BillingAlerts\";\nimport { SvgClipboard, SvgWallet } from \"@opal/icons\";\nexport default function BillingInformationPage() {\n  const {\n    data: billingInformation,\n    error,\n    isLoading,\n  } = useBillingInformation();\n\n  useEffect(() => {\n    const url = new URL(window.location.href);\n    if (url.searchParams.has(\"session_id\")) {\n      toast.success(\n        \"Congratulations! Your subscription has been updated successfully.\"\n      );\n      url.searchParams.delete(\"session_id\");\n      window.history.replaceState({}, \"\", url.toString());\n    }\n  }, []);\n\n  if (isLoading) {\n    return <div className=\"text-center py-8\">Loading...</div>;\n  }\n\n  if (error) {\n    console.error(\"Failed to fetch billing information:\", error);\n    return (\n      <div className=\"text-center py-8 text-red-500\">\n        Error loading billing information. Please try again later.\n      </div>\n    );\n  }\n\n  if (!billingInformation || !hasActiveSubscription(billingInformation)) {\n    return (\n      <div className=\"text-center py-8\">No billing information available.</div>\n    );\n  }\n\n  const handleManageSubscription = async () => {\n    try {\n      const response = await createCustomerPortalSession();\n      console.log(\"response\", response);\n      if (!response.stripe_customer_portal_url) {\n        throw new Error(\"No portal URL returned from the server\");\n      }\n      window.location.href = response.stripe_customer_portal_url;\n    } catch (error) {\n      console.error(\"Error creating customer portal session:\", error);\n      toast.error(\"Error creating customer portal session\");\n    }\n  };\n\n  return (\n    <div className=\"space-y-8\">\n      <Card className=\"shadow-md\">\n        <CardHeader>\n          <CardTitle className=\"text-2xl font-bold flex items-center\">\n            <SvgWallet className=\"mr-4 text-muted-foreground h-6 w-6\" />\n            Subscription Details\n          </CardTitle>\n        </CardHeader>\n        <CardContent className=\"space-y-6\">\n          <SubscriptionSummary billingInformation={billingInformation} />\n          <BillingAlerts billingInformation={billingInformation} />\n        </CardContent>\n      </Card>\n\n      <Card className=\"shadow-md\">\n        <CardHeader>\n          <CardTitle className=\"text-xl font-semibold\">\n            Manage Subscription\n          </CardTitle>\n          <CardDescription>\n            View your plan, update payment, or change subscription\n          </CardDescription>\n        </CardHeader>\n        <CardContent>\n          <Button\n            onClick={handleManageSubscription}\n            width=\"full\"\n            icon={SvgClipboard}\n          >\n            Manage Subscription\n          </Button>\n        </CardContent>\n      </Card>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/billing/InfoItem.tsx",
    "content": "import React from \"react\";\n\ninterface InfoItemProps {\n  title: string;\n  value: string;\n}\n\nexport function InfoItem({ title, value }: InfoItemProps) {\n  return (\n    <div className=\"bg-muted p-4 rounded-lg\">\n      <p className=\"text-sm font-medium text-muted-foreground mb-1\">{title}</p>\n      <p className=\"text-lg font-semibold text-foreground dark:text-neutral-100\">\n        {value}\n      </p>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/billing/SubscriptionSummary.tsx",
    "content": "import React from \"react\";\nimport { InfoItem } from \"./InfoItem\";\nimport { statusToDisplay, BillingInformation } from \"@/lib/billing\";\nimport { formatDateShort } from \"@/lib/dateUtils\";\n\ninterface SubscriptionSummaryProps {\n  billingInformation: BillingInformation;\n}\n\nexport function SubscriptionSummary({\n  billingInformation,\n}: SubscriptionSummaryProps) {\n  return (\n    <div className=\"grid grid-cols-2 gap-4\">\n      <InfoItem\n        title=\"Subscription Status\"\n        value={statusToDisplay(billingInformation.status)}\n      />\n      <InfoItem\n        title=\"Seats\"\n        value={billingInformation.seats?.toString() ?? \"—\"}\n      />\n      <InfoItem\n        title=\"Billing Start\"\n        value={formatDateShort(billingInformation.current_period_start)}\n      />\n      <InfoItem\n        title=\"Billing End\"\n        value={formatDateShort(billingInformation.current_period_end)}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/billing/page.tsx",
    "content": "import * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport BillingInformationPage from \"./BillingInformationPage\";\nimport { SvgCreditCard } from \"@opal/icons\";\n\nexport interface BillingInformation {\n  stripe_subscription_id: string;\n  status: string;\n  current_period_start: Date;\n  current_period_end: Date;\n  number_of_seats: number;\n  cancel_at_period_end: boolean;\n  canceled_at: Date | null;\n  trial_start: Date | null;\n  trial_end: Date | null;\n  seats: number;\n  payment_method_enabled: boolean;\n}\n\nexport default function page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgCreditCard}\n        title=\"Billing Information\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <BillingInformationPage />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/groups/[id]/page.tsx",
    "content": "\"use client\";\n\nimport { use } from \"react\";\nimport EditGroupPage from \"@/refresh-pages/admin/GroupsPage/EditGroupPage\";\n\nexport default function EditGroupRoute({\n  params,\n}: {\n  params: Promise<{ id: string }>;\n}) {\n  const { id } = use(params);\n  return <EditGroupPage groupId={Number(id)} />;\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/groups/create/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage/CreateGroupPage\";\n"
  },
  {
    "path": "web/src/app/ee/admin/groups/page.tsx",
    "content": "export { default } from \"@/refresh-pages/admin/GroupsPage\";\n"
  },
  {
    "path": "web/src/app/ee/admin/layout.tsx",
    "content": "import Layout from \"@/components/admin/Layout\";\n\nexport interface AdminLayoutProps {\n  children: React.ReactNode;\n}\n\nexport default async function AdminLayout({ children }: AdminLayoutProps) {\n  return await Layout({ children });\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx",
    "content": "\"use client\";\n\nimport { Label, SubLabel } from \"@/components/Field\";\nimport { toast } from \"@/hooks/useToast\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { Button, Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { useContext, useState } from \"react\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\nexport function CustomAnalyticsUpdateForm() {\n  const settings = useContext(SettingsContext);\n  const customAnalyticsScript = settings?.customAnalyticsScript;\n\n  const [newCustomAnalyticsScript, setNewCustomAnalyticsScript] =\n    useState<string>(customAnalyticsScript || \"\");\n  const [secretKey, setSecretKey] = useState<string>(\"\");\n\n  if (!settings) {\n    return <Callout type=\"danger\" title=\"Failed to fetch settings\"></Callout>;\n  }\n\n  return (\n    <div>\n      <form\n        onSubmit={async (e) => {\n          e.preventDefault();\n\n          const response = await fetch(\n            \"/api/admin/enterprise-settings/custom-analytics-script\",\n            {\n              method: \"PUT\",\n              headers: {\n                \"Content-Type\": \"application/json\",\n              },\n              body: JSON.stringify({\n                script: newCustomAnalyticsScript.trim(),\n                secret_key: secretKey,\n              }),\n            }\n          );\n          if (response.ok) {\n            toast.success(\"Custom analytics script updated successfully!\");\n          } else {\n            const errorMsg = (await response.json()).detail;\n            toast.error(\n              `Failed to update custom analytics script: \"${errorMsg}\"`\n            );\n          }\n          setSecretKey(\"\");\n        }}\n      >\n        <div className=\"mb-4\">\n          <Label>Script</Label>\n          <Text as=\"p\">\n            Specify the Javascript that should run on page load in order to\n            initialize your custom tracking/analytics.\n          </Text>\n          <Spacer rem={0.75} />\n          <Text as=\"p\">\n            {markdown(\n              \"Do not include the `<script></script>` tags. If you upload a script below but you are not receiving any events in your analytics platform, try removing all extra whitespace before each line of JavaScript.\"\n            )}\n          </Text>\n          <Spacer rem={0.5} />\n          <InputTextArea\n            value={newCustomAnalyticsScript}\n            onChange={(event) =>\n              setNewCustomAnalyticsScript(event.target.value)\n            }\n          />\n        </div>\n\n        <Label>Secret Key</Label>\n        <SubLabel>\n          <>\n            For security reasons, you must provide a secret key to update this\n            script. This should be the value of the{\" \"}\n            <i>CUSTOM_ANALYTICS_SECRET_KEY</i> environment variable set when\n            initially setting up Onyx.\n          </>\n        </SubLabel>\n        <input\n          className={`\n            border\n            border-border\n            rounded\n            w-full\n            py-2\n            px-3\n            mt-1`}\n          type=\"password\"\n          value={secretKey}\n          onChange={(e) => setSecretKey(e.target.value)}\n        />\n        <Spacer rem={1} />\n        <Button type=\"submit\">Update</Button>\n      </form>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/custom-analytics/page.tsx",
    "content": "import * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { CUSTOM_ANALYTICS_ENABLED } from \"@/lib/constants\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Text } from \"@opal/components\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { CustomAnalyticsUpdateForm } from \"./CustomAnalyticsUpdateForm\";\n\nconst route = ADMIN_ROUTES.CUSTOM_ANALYTICS;\n\nfunction Main() {\n  if (!CUSTOM_ANALYTICS_ENABLED) {\n    return (\n      <div>\n        <div className=\"mt-4\">\n          <Callout type=\"danger\" title=\"Custom Analytics is not enabled.\">\n            To set up custom analytics scripts, please work with the team who\n            setup Onyx in your team to set the{\" \"}\n            <i>CUSTOM_ANALYTICS_SECRET_KEY</i> environment variable.\n          </Callout>\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div>\n      <Text as=\"p\">\n        {\n          \"This allows you to bring your own analytics tool to Onyx! Copy the Web snippet from your analytics provider into the box below, and we'll start sending usage events.\"\n        }\n      </Text>\n      <Spacer rem={2} />\n\n      <CustomAnalyticsUpdateForm />\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/lib.ts",
    "content": "import { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR, { mutate } from \"swr\";\nimport { OnyxBotAnalytics, QueryAnalytics, UserAnalytics } from \"./usage/types\";\nimport { useState } from \"react\";\nimport { buildApiPath } from \"@/lib/urlBuilder\";\n\nimport {\n  convertDateToEndOfDay,\n  convertDateToStartOfDay,\n  getXDaysAgo,\n} from \"../../../../components/dateRangeSelectors/dateUtils\";\nimport { THIRTY_DAYS } from \"../../../../components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\n\nexport const useTimeRange = () => {\n  return useState<DateRangePickerValue>({\n    to: new Date(),\n    from: getXDaysAgo(30),\n    selectValue: THIRTY_DAYS,\n  });\n};\n\nexport const useQueryAnalytics = (timeRange: DateRangePickerValue) => {\n  const url = buildApiPath(\"/api/analytics/admin/query\", {\n    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),\n    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),\n  });\n  const swrResponse = useSWR<QueryAnalytics[]>(url, errorHandlingFetcher);\n\n  return {\n    ...swrResponse,\n    refreshQueryAnalytics: () => mutate(url),\n  };\n};\n\nexport const useUserAnalytics = (timeRange: DateRangePickerValue) => {\n  const url = buildApiPath(\"/api/analytics/admin/user\", {\n    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),\n    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),\n  });\n  const swrResponse = useSWR<UserAnalytics[]>(url, errorHandlingFetcher);\n\n  return {\n    ...swrResponse,\n    refreshUserAnalytics: () => mutate(url),\n  };\n};\n\nexport const useOnyxBotAnalytics = (timeRange: DateRangePickerValue) => {\n  const url = buildApiPath(\"/api/analytics/admin/onyxbot\", {\n    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),\n    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),\n  });\n  const swrResponse = useSWR<OnyxBotAnalytics[]>(url, errorHandlingFetcher); // TODO\n\n  return {\n    ...swrResponse,\n    refreshOnyxBotAnalytics: () => mutate(url),\n  };\n};\n\nexport function getDatesList(startDate: Date): string[] {\n  const datesList: string[] = [];\n  const endDate = new Date(); // current date\n\n  for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) {\n    const dateStr = d.toISOString().split(\"T\")[0]; // convert date object to 'YYYY-MM-DD' format\n    if (dateStr !== undefined) {\n      datesList.push(dateStr);\n    }\n  }\n\n  return datesList;\n}\n\nexport interface PersonaMessageAnalytics {\n  total_messages: number;\n  date: string;\n  persona_id: number;\n}\n\nexport interface PersonaSnapshot {\n  id: number;\n  name: string;\n  description: string;\n  is_listed: boolean;\n  is_public: boolean;\n}\n\nexport const usePersonaMessages = (\n  personaId: number | undefined,\n  timeRange: DateRangePickerValue\n) => {\n  const url = buildApiPath(`/api/analytics/admin/persona/messages`, {\n    persona_id: personaId?.toString(),\n    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),\n    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),\n  });\n\n  const { data, error, isLoading } = useSWR<PersonaMessageAnalytics[]>(\n    personaId !== undefined ? url : null,\n    errorHandlingFetcher\n  );\n\n  return {\n    data,\n    error,\n    isLoading,\n    refreshPersonaMessages: () => mutate(url),\n  };\n};\n\nexport interface PersonaUniqueUserAnalytics {\n  unique_users: number;\n  date: string;\n  persona_id: number;\n}\n\nexport const usePersonaUniqueUsers = (\n  personaId: number | undefined,\n  timeRange: DateRangePickerValue\n) => {\n  const url = buildApiPath(`/api/analytics/admin/persona/unique-users`, {\n    persona_id: personaId?.toString(),\n    start: convertDateToStartOfDay(timeRange.from)?.toISOString(),\n    end: convertDateToEndOfDay(timeRange.to)?.toISOString(),\n  });\n\n  const { data, error, isLoading } = useSWR<PersonaUniqueUserAnalytics[]>(\n    personaId !== undefined ? url : null,\n    errorHandlingFetcher\n  );\n\n  return {\n    data,\n    error,\n    isLoading,\n    refreshPersonaUniqueUsers: () => mutate(url),\n  };\n};\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx",
    "content": "import { Badge } from \"@/components/ui/badge\";\nimport { Feedback } from \"@/lib/types\";\n\nexport function FeedbackBadge({\n  feedback,\n}: {\n  feedback?: Feedback | \"mixed\" | null;\n}) {\n  let feedbackBadge;\n  switch (feedback) {\n    case \"like\":\n      feedbackBadge = (\n        <Badge variant=\"success\" className=\"text-sm\">\n          Like\n        </Badge>\n      );\n      break;\n    case \"dislike\":\n      feedbackBadge = (\n        <Badge variant=\"destructive\" className=\"text-sm\">\n          Dislike\n        </Badge>\n      );\n      break;\n    case \"mixed\":\n      feedbackBadge = (\n        <Badge variant=\"purple\" className=\"text-sm\">\n          Mixed\n        </Badge>\n      );\n      break;\n    default:\n      feedbackBadge = (\n        <Badge variant=\"outline\" className=\"text-sm\">\n          N/A\n        </Badge>\n      );\n      break;\n  }\n  return feedbackBadge;\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/KickoffCSVExport.tsx",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { useRef, useState } from \"react\";\nimport { DateRange } from \"../../../../../components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { withRequestId, withDateRange } from \"./utils\";\nimport {\n  CHECK_QUERY_HISTORY_EXPORT_STATUS_URL,\n  DOWNLOAD_QUERY_HISTORY_URL,\n  MAX_RETRIES,\n  PREVIOUS_CSV_TASK_BUTTON_NAME,\n  RETRY_COOLDOWN_MILLISECONDS,\n} from \"./constants\";\nimport {\n  CheckQueryHistoryExportStatusResponse,\n  SpinnerStatus,\n  StartQueryHistoryExportResponse,\n} from \"./types\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgLoader, SvgPlayCircle } from \"@opal/icons\";\nexport default function KickoffCSVExport({\n  dateRange,\n}: {\n  dateRange: DateRange;\n}) {\n  const timerIdRef = useRef<null | number>(null);\n  const retryCount = useRef<number>(0);\n  const [, rerender] = useState<void>();\n  const [spinnerStatus, setSpinnerStatus] = useState<SpinnerStatus>(\"static\");\n\n  const reset = (failure: boolean = false) => {\n    setSpinnerStatus(\"static\");\n    if (timerIdRef.current) {\n      clearInterval(timerIdRef.current);\n      timerIdRef.current = null;\n    }\n    retryCount.current = 0;\n\n    if (failure) {\n      toast.error(\"Failed to download the query-history.\");\n    }\n\n    rerender();\n  };\n\n  const startExport = async () => {\n    // If the button is pressed again while we're spinning, then we reset and cancel the request.\n    if (spinnerStatus === \"spinning\") {\n      reset();\n      return;\n    }\n\n    setSpinnerStatus(\"spinning\");\n    toast.info(\n      `Generating CSV report. Click the '${PREVIOUS_CSV_TASK_BUTTON_NAME}' button to see all jobs.`\n    );\n    const response = await fetch(withDateRange(dateRange), {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n    });\n\n    if (!response.ok) {\n      reset(true);\n      return;\n    }\n\n    const { request_id } =\n      (await response.json()) as StartQueryHistoryExportResponse;\n    const timer = setInterval(\n      () => checkStatus(request_id),\n      RETRY_COOLDOWN_MILLISECONDS\n    ) as unknown as number;\n    timerIdRef.current = timer;\n    rerender();\n  };\n\n  const checkStatus = async (requestId: string) => {\n    if (retryCount.current >= MAX_RETRIES) {\n      reset();\n      return;\n    }\n    retryCount.current += 1;\n    rerender();\n\n    const response = await fetch(\n      withRequestId(CHECK_QUERY_HISTORY_EXPORT_STATUS_URL, requestId),\n      {\n        method: \"GET\",\n      }\n    );\n\n    if (!response.ok) {\n      reset(true);\n      return;\n    }\n\n    const { status } =\n      (await response.json()) as CheckQueryHistoryExportStatusResponse;\n\n    if (status === \"SUCCESS\") {\n      reset();\n      window.location.href = withRequestId(\n        DOWNLOAD_QUERY_HISTORY_URL,\n        requestId\n      );\n    } else if (status === \"FAILURE\") {\n      reset(true);\n    }\n  };\n\n  return (\n    <div className=\"flex flex-1 flex-col w-full justify-center\">\n      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n      <Button\n        className=\"ml-auto\"\n        onClick={startExport}\n        danger={spinnerStatus === \"spinning\"}\n        leftIcon={\n          spinnerStatus === \"spinning\"\n            ? ({ className }) => (\n                <SvgLoader className={cn(className, \"animate-spin\")} />\n              )\n            : SvgPlayCircle\n        }\n      >\n        {spinnerStatus === \"spinning\" ? \"Cancel\" : \"Kickoff Export\"}\n      </Button>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx",
    "content": "import Separator from \"@/refresh-components/Separator\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n  TableHeader,\n} from \"@/components/ui/table\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ChatSessionMinimal } from \"@/app/ee/admin/performance/usage/types\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { timestampToReadableDate } from \"@/lib/dateUtils\";\nimport { Dispatch, SetStateAction, useCallback, useState } from \"react\";\nimport { Feedback, TaskStatus } from \"@/lib/types\";\nimport {\n  DateRange,\n  AdminDateRangeSelector,\n} from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { FeedbackBadge } from \"@/app/ee/admin/performance/query-history/FeedbackBadge\";\nimport KickoffCSVExport from \"@/app/ee/admin/performance/query-history/KickoffCSVExport\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport usePaginatedFetch from \"@/hooks/usePaginatedFetch\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR from \"swr\";\nimport { TaskQueueState } from \"@/app/ee/admin/performance/query-history/types\";\nimport { withRequestId } from \"@/app/ee/admin/performance/query-history/utils\";\nimport {\n  DOWNLOAD_QUERY_HISTORY_URL,\n  LIST_QUERY_HISTORY_URL,\n  NUM_IN_PAGE,\n  ITEMS_PER_PAGE,\n  PAGES_PER_BATCH,\n  PREVIOUS_CSV_TASK_BUTTON_NAME,\n} from \"@/app/ee/admin/performance/query-history/constants\";\nimport { humanReadableFormatWithTime } from \"@/lib/time\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { Badge } from \"@/components/ui/badge\";\nimport {\n  SvgDownloadCloud,\n  SvgFileText,\n  SvgMinus,\n  SvgMinusCircle,\n  SvgThumbsDown,\n  SvgThumbsUp,\n} from \"@opal/icons\";\nfunction QueryHistoryTableRow({\n  chatSessionMinimal,\n}: {\n  chatSessionMinimal: ChatSessionMinimal;\n}) {\n  return (\n    <TableRow\n      key={chatSessionMinimal.id}\n      className=\"hover:bg-accent-background cursor-pointer relative select-none\"\n    >\n      <TableCell>\n        <Text className=\"whitespace-normal line-clamp-5\">\n          {chatSessionMinimal.first_user_message ||\n            chatSessionMinimal.name ||\n            \"-\"}\n        </Text>\n      </TableCell>\n      <TableCell>\n        <Text className=\"whitespace-normal line-clamp-5\">\n          {chatSessionMinimal.first_ai_message || \"-\"}\n        </Text>\n      </TableCell>\n      <TableCell>\n        <FeedbackBadge feedback={chatSessionMinimal.feedback_type} />\n      </TableCell>\n      <TableCell>{chatSessionMinimal.user_email || \"-\"}</TableCell>\n      <TableCell>{chatSessionMinimal.assistant_name || \"Unknown\"}</TableCell>\n      <TableCell>\n        {timestampToReadableDate(chatSessionMinimal.time_created)}\n      </TableCell>\n      {/* Wrapping in <td> to avoid console warnings */}\n      <td className=\"w-0 p-0\">\n        <Link\n          href={\n            `/ee/admin/performance/query-history/${chatSessionMinimal.id}` as Route\n          }\n          className=\"absolute w-full h-full left-0 top-0\"\n        ></Link>\n      </td>\n    </TableRow>\n  );\n}\n\nfunction SelectFeedbackType({\n  value,\n  onValueChange,\n}: {\n  value: Feedback | \"all\";\n  onValueChange: (value: Feedback | \"all\") => void;\n}) {\n  return (\n    <Section alignItems=\"start\" gap={0.25}>\n      <Text as=\"p\" className=\"font-medium\">\n        Feedback Type\n      </Text>\n      <InputSelect\n        value={value}\n        onValueChange={onValueChange as (value: string) => void}\n      >\n        <InputSelect.Trigger />\n\n        <InputSelect.Content>\n          <InputSelect.Item value=\"all\" icon={SvgMinusCircle}>\n            Any\n          </InputSelect.Item>\n          <InputSelect.Item value=\"like\" icon={SvgThumbsUp}>\n            Like\n          </InputSelect.Item>\n          <InputSelect.Item value=\"dislike\" icon={SvgThumbsDown}>\n            Dislike\n          </InputSelect.Item>\n          <InputSelect.Item value=\"mixed\" icon={SvgMinus}>\n            Mixed\n          </InputSelect.Item>\n        </InputSelect.Content>\n      </InputSelect>\n    </Section>\n  );\n}\n\nfunction ExportBadge({ status }: { status: TaskStatus }) {\n  if (status === \"SUCCESS\") return <Badge variant=\"success\">Success</Badge>;\n  else if (status === \"FAILURE\")\n    return <Badge variant=\"destructive\">Failure</Badge>;\n  else if (status === \"PENDING\" || status === \"STARTED\")\n    return <Badge variant=\"in_progress\">Pending</Badge>;\n  else return <></>;\n}\n\nfunction PreviousQueryHistoryExportsModal({\n  setShowModal,\n}: {\n  setShowModal: Dispatch<SetStateAction<boolean>>;\n}) {\n  const { data: queryHistoryTasks } = useSWR<TaskQueueState[]>(\n    LIST_QUERY_HISTORY_URL,\n    errorHandlingFetcher,\n    {\n      refreshInterval: 3000,\n    }\n  );\n\n  const tasks = (queryHistoryTasks ?? []).map((queryHistory) => ({\n    taskId: queryHistory.task_id,\n    start: new Date(queryHistory.start),\n    end: new Date(queryHistory.end),\n    status: queryHistory.status,\n    startTime: queryHistory.start_time,\n  }));\n\n  // sort based off of \"most-recently-exported\" CSV file.\n  tasks.sort((task_a, task_b) => {\n    if (task_a.startTime < task_b.startTime) return 1;\n    else if (task_a.startTime > task_b.startTime) return -1;\n    else return 0;\n  });\n\n  const [taskPage, setTaskPage] = useState(1);\n  const totalTaskPages = Math.ceil(tasks.length / NUM_IN_PAGE);\n  const paginatedTasks = tasks.slice(\n    NUM_IN_PAGE * (taskPage - 1),\n    NUM_IN_PAGE * taskPage\n  );\n\n  return (\n    <Modal open onOpenChange={() => setShowModal(false)}>\n      <Modal.Content width=\"full\" height=\"full\">\n        <Modal.Header\n          icon={SvgFileText}\n          title=\"Previous Query History Exports\"\n          onClose={() => setShowModal(false)}\n        />\n        <Modal.Body>\n          <Table>\n            <TableHeader>\n              <TableRow>\n                <TableHead>Generated At</TableHead>\n                <TableHead>Start Range</TableHead>\n                <TableHead>End Range</TableHead>\n                <TableHead>Status</TableHead>\n                <TableHead>Download</TableHead>\n              </TableRow>\n            </TableHeader>\n            <TableBody>\n              {paginatedTasks.map((task, index) => (\n                <TableRow key={index}>\n                  <TableCell>\n                    {humanReadableFormatWithTime(task.startTime)}\n                  </TableCell>\n                  <TableCell>{task.start.toDateString()}</TableCell>\n                  <TableCell>{task.end.toDateString()}</TableCell>\n                  <TableCell>\n                    <ExportBadge status={task.status} />\n                  </TableCell>\n                  <TableCell>\n                    <Button\n                      variant=\"default\"\n                      prominence=\"tertiary\"\n                      icon={SvgDownloadCloud}\n                      size=\"sm\"\n                      disabled={task.status !== \"SUCCESS\"}\n                      tooltip={\n                        task.status !== \"SUCCESS\"\n                          ? \"Export is not yet ready\"\n                          : undefined\n                      }\n                      href={\n                        task.status === \"SUCCESS\"\n                          ? withRequestId(\n                              DOWNLOAD_QUERY_HISTORY_URL,\n                              task.taskId\n                            )\n                          : undefined\n                      }\n                    />\n                  </TableCell>\n                </TableRow>\n              ))}\n            </TableBody>\n          </Table>\n\n          <Section>\n            <PageSelector\n              currentPage={taskPage}\n              totalPages={totalTaskPages}\n              onPageChange={setTaskPage}\n            />\n          </Section>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n\nexport function QueryHistoryTable() {\n  const [dateRange, setDateRange] = useState<DateRange>(undefined);\n  const [filters, setFilters] = useState<{\n    feedback_type?: Feedback | \"all\";\n    start_time?: string;\n    end_time?: string;\n  }>({});\n\n  const [showModal, setShowModal] = useState(false);\n\n  const {\n    currentPageData: chatSessionData,\n    isLoading,\n    error,\n    currentPage,\n    totalPages,\n    goToPage,\n  } = usePaginatedFetch<ChatSessionMinimal>({\n    itemsPerPage: ITEMS_PER_PAGE,\n    pagesPerBatch: PAGES_PER_BATCH,\n    endpoint: \"/api/admin/chat-session-history\",\n    filter: filters,\n  });\n\n  const onTimeRangeChange = useCallback((value: DateRange) => {\n    setDateRange(value);\n\n    if (value?.from && value?.to) {\n      setFilters((prev) => ({\n        ...prev,\n        start_time: value.from.toISOString(),\n        end_time: value.to.toISOString(),\n      }));\n    } else {\n      setFilters((prev) => {\n        const newFilters = { ...prev };\n        delete newFilters.start_time;\n        delete newFilters.end_time;\n        return newFilters;\n      });\n    }\n  }, []);\n\n  if (error) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error fetching query history\"\n        errorMsg={error?.message}\n      />\n    );\n  }\n\n  return (\n    <>\n      <CardSection className=\"mt-8\">\n        <div className=\"flex\">\n          <div className=\"gap-y-3 flex flex-col\">\n            <SelectFeedbackType\n              value={filters.feedback_type || \"all\"}\n              onValueChange={(value) => {\n                setFilters((prev) => {\n                  const newFilters = { ...prev };\n                  if (value === \"all\") {\n                    delete newFilters.feedback_type;\n                  } else {\n                    newFilters.feedback_type = value;\n                  }\n                  return newFilters;\n                });\n              }}\n            />\n\n            <AdminDateRangeSelector\n              value={dateRange}\n              onValueChange={onTimeRangeChange}\n            />\n          </div>\n          <div className=\"flex flex-row w-full items-center gap-x-2\">\n            <KickoffCSVExport dateRange={dateRange} />\n            <Button prominence=\"secondary\" onClick={() => setShowModal(true)}>\n              {PREVIOUS_CSV_TASK_BUTTON_NAME}\n            </Button>\n          </div>\n        </div>\n        <Separator />\n        <Section>\n          <Table className=\"mt-5\">\n            <TableHeader>\n              <TableRow>\n                <TableHead>First User Message</TableHead>\n                <TableHead>First AI Response</TableHead>\n                <TableHead>Feedback</TableHead>\n                <TableHead>User</TableHead>\n                <TableHead>Persona</TableHead>\n                <TableHead>Date</TableHead>\n              </TableRow>\n            </TableHeader>\n            {isLoading ? (\n              <TableBody>\n                <TableRow>\n                  <TableCell colSpan={6} className=\"text-center\">\n                    <ThreeDotsLoader />\n                  </TableCell>\n                </TableRow>\n              </TableBody>\n            ) : (\n              <TableBody>\n                {chatSessionData?.map((chatSessionMinimal) => (\n                  <QueryHistoryTableRow\n                    key={chatSessionMinimal.id}\n                    chatSessionMinimal={chatSessionMinimal}\n                  />\n                ))}\n              </TableBody>\n            )}\n          </Table>\n\n          {chatSessionData && (\n            <Section>\n              <PageSelector\n                totalPages={totalPages}\n                currentPage={currentPage}\n                onPageChange={goToPage}\n              />\n            </Section>\n          )}\n        </Section>\n      </CardSection>\n\n      {showModal && (\n        <PreviousQueryHistoryExportsModal setShowModal={setShowModal} />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/[id]/page.tsx",
    "content": "\"use client\";\nimport { use } from \"react\";\n\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { ChatSessionSnapshot, MessageSnapshot } from \"../../usage/types\";\nimport { FiBook } from \"react-icons/fi\";\nimport { timestampToReadableDate } from \"@/lib/dateUtils\";\nimport BackButton from \"@/refresh-components/buttons/BackButton\";\nimport { FeedbackBadge } from \"../FeedbackBadge\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport CardSection from \"@/components/admin/CardSection\";\n\nfunction MessageDisplay({ message }: { message: MessageSnapshot }) {\n  return (\n    <div>\n      <p className=\"text-xs font-bold mb-1\">\n        {message.message_type === \"user\" ? \"User\" : \"AI\"}\n      </p>\n      <Text as=\"p\">{message.message}</Text>\n      {message.documents.length > 0 && (\n        <div className=\"flex flex-col gap-y-2 mt-2\">\n          <p className=\"font-bold text-xs\">Reference Documents</p>\n          {message.documents.slice(0, 5).map((document) => {\n            return (\n              <div className=\"text-sm flex\" key={document.document_id}>\n                <FiBook\n                  className={\n                    \"my-auto mr-1\" + (document.link ? \" text-link\" : \" \")\n                  }\n                />\n                {document.link ? (\n                  <a\n                    href={document.link}\n                    target=\"_blank\"\n                    className=\"text-link\"\n                    rel=\"noreferrer\"\n                  >\n                    {document.semantic_identifier}\n                  </a>\n                ) : (\n                  document.semantic_identifier\n                )}\n              </div>\n            );\n          })}\n        </div>\n      )}\n      {message.feedback_type && (\n        <div className=\"mt-2\">\n          <p className=\"font-bold text-xs\">Feedback</p>\n          {message.feedback_text && <Text as=\"p\">{message.feedback_text}</Text>}\n          <div className=\"mt-1\">\n            <FeedbackBadge feedback={message.feedback_type} />\n          </div>\n        </div>\n      )}\n      <Separator />\n    </div>\n  );\n}\n\nexport default function QueryPage(props: { params: Promise<{ id: string }> }) {\n  const params = use(props.params);\n  const {\n    data: chatSessionSnapshot,\n    isLoading,\n    error,\n  } = useSWR<ChatSessionSnapshot>(\n    SWR_KEYS.adminChatSession(params.id),\n    errorHandlingFetcher\n  );\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (!chatSessionSnapshot || error) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch chat session - ${error}`}\n      />\n    );\n  }\n\n  return (\n    <main className=\"pt-4 mx-auto container\">\n      <BackButton />\n\n      <CardSection className=\"mt-4\">\n        <Title>Chat Session Details</Title>\n\n        <Spacer rem={0.25} />\n        {chatSessionSnapshot.assistant_name && (\n          <Text as=\"p\">{chatSessionSnapshot.assistant_name}</Text>\n        )}\n        <Spacer rem={0.25} />\n        <Text as=\"p\">\n          {`${\n            chatSessionSnapshot.user_email\n              ? `${chatSessionSnapshot.user_email}, `\n              : \"\"\n          }${timestampToReadableDate(chatSessionSnapshot.time_created)}, ${\n            chatSessionSnapshot.flow_type\n          }`}\n        </Text>\n\n        <Separator />\n\n        <div className=\"flex flex-col\">\n          {chatSessionSnapshot.messages.map((message) => {\n            return <MessageDisplay key={message.id} message={message} />;\n          })}\n        </div>\n      </CardSection>\n    </main>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/constants.ts",
    "content": "export const LIST_QUERY_HISTORY_URL = \"/api/admin/query-history/list\";\nexport const START_QUERY_HISTORY_EXPORT_URL =\n  \"/api/admin/query-history/start-export\";\nexport const CHECK_QUERY_HISTORY_EXPORT_STATUS_URL =\n  \"/api/admin/query-history/export-status\";\nexport const DOWNLOAD_QUERY_HISTORY_URL = \"/api/admin/query-history/download\";\nexport const MAX_RETRIES = 10;\nexport const RETRY_COOLDOWN_MILLISECONDS = 200;\n\nexport const ITEMS_PER_PAGE = 20;\nexport const PAGES_PER_BATCH = 2;\nexport const NUM_IN_PAGE = 10;\n\nexport const PREVIOUS_CSV_TASK_BUTTON_NAME = \"View Exports\";\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/page.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { QueryHistoryTable } from \"@/app/ee/admin/performance/query-history/QueryHistoryTable\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.QUERY_HISTORY;\n\nexport default function QueryHistoryPage() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n\n      <SettingsLayouts.Body>\n        <QueryHistoryTable />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/types.ts",
    "content": "import { TaskStatus } from \"@/lib/types\";\n\nexport interface TaskQueueState {\n  task_id: string;\n  start: string;\n  end: string;\n  status: TaskStatus;\n  start_time: string;\n}\n\nexport type StartQueryHistoryExportResponse = { request_id: string };\n\nexport type CheckQueryHistoryExportStatusResponse = {\n  status: TaskStatus;\n};\n\n// The status of the spinner.\n// If it's \"static\", then no spinning animation should be shown.\n// Otherwise, the spinning animation should be shown.\nexport type SpinnerStatus = \"static\" | \"spinning\";\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/query-history/utils.ts",
    "content": "import { DateRange } from \"../../../../../components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { START_QUERY_HISTORY_EXPORT_URL } from \"./constants\";\n\nexport const withRequestId = (url: string, requestId: string): string =>\n  `${url}?request_id=${requestId}`;\n\nexport const withDateRange = (dateRange: DateRange): string => {\n  if (!dateRange) {\n    return START_QUERY_HISTORY_EXPORT_URL;\n  }\n\n  const { from, to } = dateRange;\n\n  const fromString = from.toISOString();\n  const toString = to.toISOString();\n\n  return `${START_QUERY_HISTORY_EXPORT_URL}?start=${fromString}&end=${toString}`;\n};\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/FeedbackChart.tsx",
    "content": "import { ThreeDotsLoader } from \"@/components/Loading\";\nimport { getDatesList, useQueryAnalytics } from \"../lib\";\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\n\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { AreaChartDisplay } from \"@/components/ui/areaChart\";\n\nexport function FeedbackChart({\n  timeRange,\n}: {\n  timeRange: DateRangePickerValue;\n}) {\n  const {\n    data: queryAnalyticsData,\n    isLoading: isQueryAnalyticsLoading,\n    error: queryAnalyticsError,\n  } = useQueryAnalytics(timeRange);\n\n  let chart;\n  if (isQueryAnalyticsLoading) {\n    chart = (\n      <div className=\"h-80 flex flex-col\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  } else if (\n    !queryAnalyticsData ||\n    queryAnalyticsData[0] === undefined ||\n    queryAnalyticsError\n  ) {\n    chart = (\n      <div className=\"h-80 text-red-600 text-bold flex flex-col\">\n        <p className=\"m-auto\">Failed to fetch feedback data...</p>\n      </div>\n    );\n  } else {\n    const initialDate = timeRange.from || new Date(queryAnalyticsData[0].date);\n    const dateRange = getDatesList(initialDate);\n\n    const dateToQueryAnalytics = new Map(\n      queryAnalyticsData.map((queryAnalyticsEntry) => [\n        queryAnalyticsEntry.date,\n        queryAnalyticsEntry,\n      ])\n    );\n\n    chart = (\n      <AreaChartDisplay\n        className=\"mt-4\"\n        data={dateRange.map((dateStr) => {\n          const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr);\n          return {\n            Day: dateStr,\n            \"Positive Feedback\": queryAnalyticsForDate?.total_likes || 0,\n            \"Negative Feedback\": queryAnalyticsForDate?.total_dislikes || 0,\n          };\n        })}\n        categories={[\"Positive Feedback\", \"Negative Feedback\"]}\n        index=\"Day\"\n        colors={[\"indigo\", \"fuchsia\"]}\n        yAxisWidth={60}\n      />\n    );\n  }\n\n  return (\n    <CardSection className=\"mt-8\">\n      <Title>Feedback</Title>\n      <Text as=\"p\">Thumbs Up / Thumbs Down over time</Text>\n      {chart}\n    </CardSection>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/OnyxBotChart.tsx",
    "content": "import { ThreeDotsLoader } from \"@/components/Loading\";\nimport { getDatesList, useOnyxBotAnalytics } from \"../lib\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { AreaChartDisplay } from \"@/components/ui/areaChart\";\n\nexport function OnyxBotChart({\n  timeRange,\n}: {\n  timeRange: DateRangePickerValue;\n}) {\n  const {\n    data: onyxBotAnalyticsData,\n    isLoading: isOnyxBotAnalyticsLoading,\n    error: onyxBotAnalyticsError,\n  } = useOnyxBotAnalytics(timeRange);\n\n  let chart;\n  if (isOnyxBotAnalyticsLoading) {\n    chart = (\n      <div className=\"h-80 flex flex-col\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  } else if (\n    !onyxBotAnalyticsData ||\n    onyxBotAnalyticsData[0] == undefined ||\n    onyxBotAnalyticsError\n  ) {\n    chart = (\n      <div className=\"h-80 text-red-600 text-bold flex flex-col\">\n        <p className=\"m-auto\">Failed to fetch feedback data...</p>\n      </div>\n    );\n  } else {\n    const initialDate =\n      timeRange.from || new Date(onyxBotAnalyticsData[0].date);\n    const dateRange = getDatesList(initialDate);\n\n    const dateToOnyxBotAnalytics = new Map(\n      onyxBotAnalyticsData.map((onyxBotAnalyticsEntry) => [\n        onyxBotAnalyticsEntry.date,\n        onyxBotAnalyticsEntry,\n      ])\n    );\n\n    chart = (\n      <AreaChartDisplay\n        className=\"mt-4\"\n        data={dateRange.map((dateStr) => {\n          const onyxBotAnalyticsForDate = dateToOnyxBotAnalytics.get(dateStr);\n          return {\n            Day: dateStr,\n            \"Total Queries\": onyxBotAnalyticsForDate?.total_queries || 0,\n            \"Automatically Resolved\":\n              onyxBotAnalyticsForDate?.auto_resolved || 0,\n          };\n        })}\n        categories={[\"Total Queries\", \"Automatically Resolved\"]}\n        index=\"Day\"\n        colors={[\"indigo\", \"fuchsia\"]}\n        yAxisWidth={60}\n      />\n    );\n  }\n\n  return (\n    <CardSection className=\"mt-8\">\n      <Title>Slack Channel</Title>\n      <Text as=\"p\">Total Queries vs Auto Resolved</Text>\n      {chart}\n    </CardSection>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/PersonaMessagesChart.tsx",
    "content": "import { ThreeDotsLoader } from \"@/components/Loading\";\nimport { X, Search } from \"lucide-react\";\nimport {\n  getDatesList,\n  usePersonaMessages,\n  usePersonaUniqueUsers,\n} from \"../lib\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { AreaChartDisplay } from \"@/components/ui/areaChart\";\nimport {\n  Select,\n  SelectContent,\n  SelectItem,\n  SelectTrigger,\n  SelectValue,\n} from \"@/components/ui/select\";\nimport { useState, useMemo, useEffect } from \"react\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\nexport function PersonaMessagesChart({\n  availablePersonas,\n  timeRange,\n}: {\n  availablePersonas: Persona[];\n  timeRange: DateRangePickerValue;\n}) {\n  const [selectedPersonaId, setSelectedPersonaId] = useState<\n    number | undefined\n  >(undefined);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const [highlightedIndex, setHighlightedIndex] = useState(-1);\n\n  const {\n    data: personaMessagesData,\n    isLoading: isPersonaMessagesLoading,\n    error: personaMessagesError,\n  } = usePersonaMessages(selectedPersonaId, timeRange);\n\n  const {\n    data: personaUniqueUsersData,\n    isLoading: isPersonaUniqueUsersLoading,\n    error: personaUniqueUsersError,\n  } = usePersonaUniqueUsers(selectedPersonaId, timeRange);\n\n  const isLoading = isPersonaMessagesLoading || isPersonaUniqueUsersLoading;\n  const hasError = personaMessagesError || personaUniqueUsersError;\n\n  const filteredPersonaList = useMemo(() => {\n    if (!availablePersonas) return [];\n    return availablePersonas.filter((persona) =>\n      persona.name.toLowerCase().includes(searchQuery.toLowerCase())\n    );\n  }, [availablePersonas, searchQuery]);\n\n  const handleKeyDown = (e: React.KeyboardEvent) => {\n    e.stopPropagation();\n\n    switch (e.key) {\n      case \"ArrowDown\":\n        e.preventDefault();\n        setHighlightedIndex((prev) =>\n          prev < filteredPersonaList.length - 1 ? prev + 1 : prev\n        );\n        break;\n      case \"ArrowUp\":\n        e.preventDefault();\n        setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : prev));\n        break;\n      case \"Enter\":\n        if (\n          highlightedIndex >= 0 &&\n          highlightedIndex < filteredPersonaList.length\n        ) {\n          const filteredPersona = filteredPersonaList[highlightedIndex];\n          if (filteredPersona !== undefined) {\n            setSelectedPersonaId(filteredPersona.id);\n            setSearchQuery(\"\");\n            setHighlightedIndex(-1);\n          }\n        }\n        break;\n      case \"Escape\":\n        setSearchQuery(\"\");\n        setHighlightedIndex(-1);\n        break;\n    }\n  };\n\n  // Reset highlight when search query changes\n  useEffect(() => {\n    setHighlightedIndex(-1);\n  }, [searchQuery]);\n\n  const chartData = useMemo(() => {\n    if (\n      !personaMessagesData?.length ||\n      !personaUniqueUsersData?.length ||\n      selectedPersonaId === undefined\n    ) {\n      return null;\n    }\n\n    const initialDate =\n      timeRange.from ||\n      new Date(\n        Math.min(\n          ...personaMessagesData.map((entry) => new Date(entry.date).getTime())\n        )\n      );\n    const dateRange = getDatesList(initialDate);\n\n    // Create maps for messages and unique users data\n    const messagesMap = new Map(\n      personaMessagesData.map((entry) => [entry.date, entry])\n    );\n    const uniqueUsersMap = new Map(\n      personaUniqueUsersData.map((entry) => [entry.date, entry])\n    );\n\n    return dateRange.map((dateStr) => {\n      const messageData = messagesMap.get(dateStr);\n      const uniqueUserData = uniqueUsersMap.get(dateStr);\n      return {\n        Day: dateStr,\n        Messages: messageData?.total_messages || 0,\n        \"Unique Users\": uniqueUserData?.unique_users || 0,\n      };\n    });\n  }, [\n    personaMessagesData,\n    personaUniqueUsersData,\n    timeRange.from,\n    selectedPersonaId,\n  ]);\n\n  let content;\n  if (isLoading) {\n    content = (\n      <div className=\"h-80 flex flex-col\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  } else if (!availablePersonas || hasError) {\n    content = (\n      <div className=\"h-80 text-red-600 text-bold flex flex-col\">\n        <p className=\"m-auto\">Failed to fetch data...</p>\n      </div>\n    );\n  } else if (selectedPersonaId === undefined) {\n    content = (\n      <div className=\"h-80 text-text-500 flex flex-col\">\n        <p className=\"m-auto\">Select an agent to view analytics</p>\n      </div>\n    );\n  } else if (!personaMessagesData?.length) {\n    content = (\n      <div className=\"h-80 text-text-500 flex flex-col\">\n        <p className=\"m-auto\">\n          No data found for selected agent in the specified time range\n        </p>\n      </div>\n    );\n  } else if (chartData) {\n    content = (\n      <AreaChartDisplay\n        className=\"mt-4\"\n        data={chartData}\n        categories={[\"Messages\", \"Unique Users\"]}\n        index=\"Day\"\n        colors={[\"indigo\", \"fuchsia\"]}\n        yAxisWidth={60}\n      />\n    );\n  }\n\n  return (\n    <CardSection className=\"mt-8\">\n      <Title>Agent Analytics</Title>\n      <div className=\"flex flex-col gap-4\">\n        <Text as=\"p\">\n          Messages and unique users per day for the selected agent\n        </Text>\n        <div className=\"flex items-center gap-4\">\n          <Select\n            value={selectedPersonaId?.toString() ?? \"\"}\n            onValueChange={(value) => {\n              setSelectedPersonaId(parseInt(value));\n            }}\n          >\n            <SelectTrigger className=\"flex w-full max-w-xs\">\n              <SelectValue placeholder=\"Select an agent to display\" />\n            </SelectTrigger>\n            <SelectContent>\n              <div className=\"flex items-center px-2 pb-2 sticky top-0 bg-background border-b\">\n                <Search className=\"h-4 w-4 mr-2 shrink-0 opacity-50\" />\n                <input\n                  className=\"flex h-8 w-full rounded-sm bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50\"\n                  placeholder=\"Search agents...\"\n                  value={searchQuery}\n                  onChange={(e) => setSearchQuery(e.target.value)}\n                  onClick={(e) => e.stopPropagation()}\n                  onMouseDown={(e) => e.stopPropagation()}\n                  onKeyDown={handleKeyDown}\n                />\n                {searchQuery && (\n                  <X\n                    className=\"h-4 w-4 shrink-0 opacity-50 cursor-pointer hover:opacity-100\"\n                    onClick={() => {\n                      setSearchQuery(\"\");\n                      setHighlightedIndex(-1);\n                    }}\n                  />\n                )}\n              </div>\n              {filteredPersonaList.map((persona, index) => (\n                <SelectItem\n                  key={persona.id}\n                  value={persona.id.toString()}\n                  className={`${highlightedIndex === index ? \"hover\" : \"\"}`}\n                  onMouseEnter={() => setHighlightedIndex(index)}\n                >\n                  {persona.name}\n                </SelectItem>\n              ))}\n            </SelectContent>\n          </Select>\n        </div>\n      </div>\n      {content}\n    </CardSection>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx",
    "content": "\"use client\";\n\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { getDatesList, useQueryAnalytics, useUserAnalytics } from \"../lib\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { AreaChartDisplay } from \"@/components/ui/areaChart\";\nimport Title from \"@/components/ui/title\";\nimport { Text } from \"@opal/components\";\nimport CardSection from \"@/components/admin/CardSection\";\n\nexport function QueryPerformanceChart({\n  timeRange,\n}: {\n  timeRange: DateRangePickerValue;\n}) {\n  const {\n    data: queryAnalyticsData,\n    isLoading: isQueryAnalyticsLoading,\n    error: queryAnalyticsError,\n  } = useQueryAnalytics(timeRange);\n  const {\n    data: userAnalyticsData,\n    isLoading: isUserAnalyticsLoading,\n    error: userAnalyticsError,\n  } = useUserAnalytics(timeRange);\n\n  let chart;\n  if (isQueryAnalyticsLoading || isUserAnalyticsLoading) {\n    chart = (\n      <div className=\"h-80 flex flex-col\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  } else if (\n    !queryAnalyticsData ||\n    queryAnalyticsData[0] === undefined ||\n    !userAnalyticsData ||\n    queryAnalyticsError ||\n    userAnalyticsError\n  ) {\n    chart = (\n      <div className=\"h-80 text-red-600 text-bold flex flex-col\">\n        <p className=\"m-auto\">Failed to fetch query data...</p>\n      </div>\n    );\n  } else {\n    const initialDate = timeRange.from || new Date(queryAnalyticsData[0].date);\n    const dateRange = getDatesList(initialDate);\n\n    const dateToQueryAnalytics = new Map(\n      queryAnalyticsData.map((queryAnalyticsEntry) => [\n        queryAnalyticsEntry.date,\n        queryAnalyticsEntry,\n      ])\n    );\n    const dateToUserAnalytics = new Map(\n      userAnalyticsData.map((userAnalyticsEntry) => [\n        userAnalyticsEntry.date,\n        userAnalyticsEntry,\n      ])\n    );\n\n    chart = (\n      <AreaChartDisplay\n        className=\"mt-4\"\n        stacked={false}\n        data={dateRange.map((dateStr) => {\n          const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr);\n          const userAnalyticsForDate = dateToUserAnalytics.get(dateStr);\n          return {\n            Day: dateStr,\n            Queries: queryAnalyticsForDate?.total_queries || 0,\n            \"Unique Users\": userAnalyticsForDate?.total_active_users || 0,\n          };\n        })}\n        categories={[\"Queries\", \"Unique Users\"]}\n        index=\"Day\"\n        colors={[\"indigo\", \"fuchsia\"]}\n        yAxisFormatter={(number: number) =>\n          new Intl.NumberFormat(\"en-US\", {\n            notation: \"standard\",\n            maximumFractionDigits: 0,\n          }).format(number)\n        }\n        xAxisFormatter={(dateStr: string) => {\n          const date = new Date(dateStr);\n          return date.toLocaleDateString(\"en-US\", {\n            month: \"short\",\n            day: \"numeric\",\n          });\n        }}\n        yAxisWidth={60}\n        allowDecimals={false}\n      />\n    );\n  }\n\n  return (\n    <CardSection className=\"mt-8\">\n      <Title>Usage</Title>\n      <Text as=\"p\">Usage over time</Text>\n      {chart}\n    </CardSection>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/UsageReports.tsx",
    "content": "\"use client\";\n\nimport { format } from \"date-fns\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\nimport { FiDownload } from \"react-icons/fi\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { Text } from \"@opal/components\";\nimport Title from \"@/components/ui/title\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport React, { useState } from \"react\";\nimport { UsageReport } from \"./types\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport Link from \"next/link\";\nimport { humanReadableFormat, humanReadableFormatWithTime } from \"@/lib/time\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { DateRangePickerValue } from \"../../../../../components/dateRangeSelectors/AdminDateRangeSelector\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Calendar from \"@/refresh-components/Calendar\";\nimport { cn } from \"@/lib/utils\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { SvgCalendar, SvgDownloadCloud } from \"@opal/icons\";\n\nfunction GenerateReportInput({\n  onReportGenerated,\n  isWaitingForReport,\n}: {\n  onReportGenerated: () => void;\n  isWaitingForReport: boolean;\n}) {\n  const [dateRange, setDateRange] = useState<DateRangePickerValue | undefined>(\n    undefined\n  );\n  const [isLoading, setIsLoading] = useState(false);\n\n  const [errorOccurred, setErrorOccurred] = useState<Error | null>(null);\n\n  const requestReport = async () => {\n    setIsLoading(true);\n    setErrorOccurred(null);\n    try {\n      let period_from: string | null = null;\n      let period_to: string | null = null;\n\n      if (dateRange?.selectValue != \"allTime\" && dateRange?.from) {\n        period_from = dateRange?.from?.toISOString();\n        period_to = dateRange?.to?.toISOString() ?? new Date().toISOString();\n      }\n\n      const res = await fetch(\"/api/admin/usage-report\", {\n        method: \"POST\",\n        credentials: \"include\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          period_from: period_from,\n          period_to: period_to,\n        }),\n      });\n\n      if (!res.ok) {\n        throw Error(`Received an error: ${res.statusText}`);\n      }\n\n      // Trigger refresh of the reports list\n      onReportGenerated();\n    } catch (e) {\n      setErrorOccurred(e as Error);\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  const today = new Date();\n\n  const lastWeek = new Date();\n  lastWeek.setDate(today.getDate() - 7);\n\n  const lastMonth = new Date();\n  lastMonth.setMonth(today.getMonth() - 1);\n\n  const lastYear = new Date();\n  lastYear.setFullYear(today.getFullYear() - 1);\n\n  return (\n    <div className=\"mb-8\">\n      <Title className=\"mb-2\">Generate Usage Reports</Title>\n      <Text as=\"p\">Generate usage statistics for users in the workspace.</Text>\n      <Spacer rem={2} />\n      <div className=\"grid gap-2 mb-3\">\n        <Popover>\n          <Popover.Trigger asChild>\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Button\n              secondary\n              className={cn(\n                \"w-[300px] justify-start text-left font-normal\",\n                !dateRange && \"text-muted-foreground\"\n              )}\n              leftIcon={SvgCalendar}\n            >\n              {dateRange?.from ? (\n                dateRange.to ? (\n                  <>\n                    {format(dateRange.from, \"LLL dd, y\")} -{\" \"}\n                    {format(dateRange.to, \"LLL dd, y\")}\n                  </>\n                ) : (\n                  format(dateRange.from, \"LLL dd, y\")\n                )\n              ) : (\n                <span>Pick a date range</span>\n              )}\n            </Button>\n          </Popover.Trigger>\n          <Popover.Content align=\"start\">\n            <Calendar\n              initialFocus\n              mode=\"range\"\n              defaultMonth={dateRange?.from}\n              selected={dateRange}\n              onSelect={(range) =>\n                range?.from &&\n                setDateRange({\n                  from: range.from,\n                  to: range.to ?? range.from,\n                  selectValue: \"custom\",\n                })\n              }\n              numberOfMonths={2}\n              disabled={(date) => date > new Date()}\n            />\n            <div className=\"border-t p-3\">\n              <OpalButton\n                prominence=\"tertiary\"\n                width=\"full\"\n                onClick={() => {\n                  setDateRange({\n                    from: lastWeek,\n                    to: new Date(),\n                    selectValue: \"lastWeek\",\n                  });\n                }}\n              >\n                Last 7 days\n              </OpalButton>\n              <OpalButton\n                prominence=\"tertiary\"\n                width=\"full\"\n                onClick={() => {\n                  setDateRange({\n                    from: lastMonth,\n                    to: new Date(),\n                    selectValue: \"lastMonth\",\n                  });\n                }}\n              >\n                Last 30 days\n              </OpalButton>\n              <OpalButton\n                prominence=\"tertiary\"\n                width=\"full\"\n                onClick={() => {\n                  setDateRange({\n                    from: lastYear,\n                    to: new Date(),\n                    selectValue: \"lastYear\",\n                  });\n                }}\n              >\n                Last year\n              </OpalButton>\n              <OpalButton\n                prominence=\"tertiary\"\n                width=\"full\"\n                onClick={() => {\n                  setDateRange({\n                    from: new Date(1970, 0, 1),\n                    to: new Date(),\n                    selectValue: \"allTime\",\n                  });\n                }}\n              >\n                All time\n              </OpalButton>\n            </div>\n          </Popover.Content>\n        </Popover>\n      </div>\n      <OpalButton\n        disabled={isLoading || isWaitingForReport}\n        color={\"blue\"}\n        icon={SvgDownloadCloud}\n        onClick={() => requestReport()}\n      >\n        {isWaitingForReport ? \"Generating...\" : \"Generate Report\"}\n      </OpalButton>\n      <p className=\"mt-1 text-xs\">\n        {isWaitingForReport\n          ? \"A report is currently being generated. Please wait...\"\n          : 'Report generation runs in the background. Check the \"Previous Reports\" section below to download when ready.'}\n      </p>\n      {errorOccurred && (\n        <ErrorCallout\n          errorTitle=\"Something went wrong.\"\n          errorMsg={errorOccurred?.toString()}\n        />\n      )}\n    </div>\n  );\n}\n\nconst USAGE_REPORT_URL = SWR_KEYS.usageReport;\n\nfunction UsageReportsTable({\n  refreshTrigger,\n  isWaitingForReport,\n  onNewReportDetected,\n}: {\n  refreshTrigger: number;\n  isWaitingForReport: boolean;\n  onNewReportDetected: () => void;\n}) {\n  const [page, setPage] = useState(1);\n  const NUM_IN_PAGE = 10;\n  const [previousReportCount, setPreviousReportCount] = useState<number | null>(\n    null\n  );\n\n  const {\n    data: usageReportsMetadata,\n    error: usageReportsError,\n    isLoading: usageReportsIsLoading,\n    mutate,\n  } = useSWR<UsageReport[]>(USAGE_REPORT_URL, errorHandlingFetcher, {\n    refreshInterval: isWaitingForReport ? 3000 : 0, // Poll every 3 seconds when waiting\n  });\n\n  // Refresh when refreshTrigger changes\n  React.useEffect(() => {\n    if (refreshTrigger > 0) {\n      mutate();\n    }\n  }, [refreshTrigger, mutate]);\n\n  // Detect when a new report appears\n  React.useEffect(() => {\n    if (usageReportsMetadata && previousReportCount !== null) {\n      if (usageReportsMetadata.length > previousReportCount) {\n        onNewReportDetected();\n      }\n    }\n    if (usageReportsMetadata) {\n      setPreviousReportCount(usageReportsMetadata.length);\n    }\n  }, [usageReportsMetadata, previousReportCount, onNewReportDetected]);\n\n  const paginatedReports = usageReportsMetadata\n    ? usageReportsMetadata\n        .slice(0)\n        .reverse()\n        .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)\n    : [];\n\n  const totalPages = usageReportsMetadata\n    ? Math.ceil(usageReportsMetadata.length / NUM_IN_PAGE)\n    : 0;\n\n  return (\n    <div>\n      <Title className=\"mb-2 mt-6 mx-auto\"> Previous Reports </Title>\n      {usageReportsIsLoading && !isWaitingForReport ? (\n        <div className=\"flex justify-center w-full\">\n          <ThreeDotsLoader />\n        </div>\n      ) : usageReportsError ? (\n        <ErrorCallout\n          errorTitle=\"Something went wrong.\"\n          errorMsg={(usageReportsError as Error).toString()}\n        />\n      ) : (\n        <>\n          <Table>\n            <TableHeader>\n              <TableRow>\n                <TableHead>Report</TableHead>\n                <TableHead>Period</TableHead>\n                <TableHead>Generated By</TableHead>\n                <TableHead>Time Generated</TableHead>\n                <TableHead>Download</TableHead>\n              </TableRow>\n            </TableHeader>\n\n            <TableBody>\n              {paginatedReports.map((r) => (\n                <TableRow key={r.report_name}>\n                  <TableCell>\n                    {r.report_name.split(\"_\")[1]?.substring(0, 8) ||\n                      r.report_name.substring(0, 8)}\n                  </TableCell>\n                  <TableCell>\n                    {r.period_from\n                      ? `${humanReadableFormat(\n                          r.period_from\n                        )} - ${humanReadableFormat(r.period_to!)}`\n                      : \"All time\"}\n                  </TableCell>\n                  <TableCell>{r.requestor ?? \"Auto generated\"}</TableCell>\n                  <TableCell>\n                    {humanReadableFormatWithTime(r.time_created)}\n                  </TableCell>\n                  <TableCell>\n                    <Link\n                      href={`/api/admin/usage-report/${r.report_name}`}\n                      className=\"flex justify-center\"\n                    >\n                      <FiDownload color=\"primary\" />\n                    </Link>\n                  </TableCell>\n                </TableRow>\n              ))}\n            </TableBody>\n          </Table>\n          <div className=\"mt-3 flex\">\n            <div className=\"mx-auto\">\n              <PageSelector\n                totalPages={totalPages}\n                currentPage={page}\n                onPageChange={(newPage) => {\n                  setPage(newPage);\n                  window.scrollTo({\n                    top: 0,\n                    left: 0,\n                    behavior: \"smooth\",\n                  });\n                }}\n              />\n            </div>\n          </div>\n        </>\n      )}\n    </div>\n  );\n}\n\nexport default function UsageReports() {\n  const [refreshTrigger, setRefreshTrigger] = useState(0);\n  const [isWaitingForReport, setIsWaitingForReport] = useState(false);\n  const [timeoutMessage, setTimeoutMessage] = useState<string | null>(null);\n  const timeoutRef = React.useRef<NodeJS.Timeout | null>(null);\n\n  const handleReportGenerated = () => {\n    setRefreshTrigger((prev) => prev + 1);\n    setIsWaitingForReport(true);\n    setTimeoutMessage(null);\n\n    // Clear any existing timeout\n    if (timeoutRef.current) {\n      clearTimeout(timeoutRef.current);\n    }\n\n    // Set a 15 second timeout\n    timeoutRef.current = setTimeout(() => {\n      setIsWaitingForReport(false);\n      setTimeoutMessage(\n        \"Report generation is taking longer than expected. The report will continue generating in the background. Please check back in a few minutes.\"\n      );\n      timeoutRef.current = null;\n    }, 15000);\n  };\n\n  const handleNewReportDetected = () => {\n    setIsWaitingForReport(false);\n    setTimeoutMessage(null);\n    // Clear the timeout if report completed before timeout\n    if (timeoutRef.current) {\n      clearTimeout(timeoutRef.current);\n      timeoutRef.current = null;\n    }\n  };\n\n  // Cleanup on unmount\n  React.useEffect(() => {\n    return () => {\n      if (timeoutRef.current) {\n        clearTimeout(timeoutRef.current);\n      }\n    };\n  }, []);\n\n  return (\n    <>\n      {isWaitingForReport && <Spinner />}\n      <>\n        <GenerateReportInput\n          onReportGenerated={handleReportGenerated}\n          isWaitingForReport={isWaitingForReport}\n        />\n        {timeoutMessage && (\n          <div className=\"mb-4 p-4 bg-status-warning-00 border border-status-warning-02 rounded-regular\">\n            <div className=\"flex items-start gap-2\">\n              <div className=\"text-status-warning-05 mt-0.5\">\n                <svg\n                  className=\"w-5 h-5\"\n                  fill=\"none\"\n                  stroke=\"currentColor\"\n                  viewBox=\"0 0 24 24\"\n                >\n                  <path\n                    strokeLinecap=\"round\"\n                    strokeLinejoin=\"round\"\n                    strokeWidth={2}\n                    d=\"M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z\"\n                  />\n                </svg>\n              </div>\n              <div className=\"flex-1\">\n                <div className=\"text-status-warning-05\">\n                  <Text as=\"p\" font=\"main-ui-action\">\n                    Report Generation In Progress\n                  </Text>\n                </div>\n                <Spacer rem={0.25} />\n                <div className=\"text-status-warning-05\">\n                  <Text as=\"p\">{timeoutMessage}</Text>\n                </div>\n              </div>\n            </div>\n          </div>\n        )}\n        <Separator />\n        <UsageReportsTable\n          refreshTrigger={refreshTrigger}\n          isWaitingForReport={isWaitingForReport}\n          onNewReportDetected={handleNewReportDetected}\n        />\n      </>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/page.tsx",
    "content": "\"use client\";\n\nimport { AdminDateRangeSelector } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { OnyxBotChart } from \"@/app/ee/admin/performance/usage/OnyxBotChart\";\nimport { FeedbackChart } from \"@/app/ee/admin/performance/usage/FeedbackChart\";\nimport { QueryPerformanceChart } from \"@/app/ee/admin/performance/usage/QueryPerformanceChart\";\nimport { PersonaMessagesChart } from \"@/app/ee/admin/performance/usage/PersonaMessagesChart\";\nimport { useTimeRange } from \"@/app/ee/admin/performance/lib\";\nimport UsageReports from \"@/app/ee/admin/performance/usage/UsageReports\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { useAdminPersonas } from \"@/hooks/useAdminPersonas\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\n\nconst route = ADMIN_ROUTES.USAGE;\n\nexport default function AnalyticsPage() {\n  const [timeRange, setTimeRange] = useTimeRange();\n  const { personas } = useAdminPersonas();\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <AdminDateRangeSelector\n          value={timeRange}\n          onValueChange={(value) => setTimeRange(value as any)}\n        />\n        <QueryPerformanceChart timeRange={timeRange} />\n        <FeedbackChart timeRange={timeRange} />\n        <OnyxBotChart timeRange={timeRange} />\n        <PersonaMessagesChart\n          availablePersonas={personas}\n          timeRange={timeRange}\n        />\n        <Separator />\n        <UsageReports />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/performance/usage/types.ts",
    "content": "import { Feedback, SessionType } from \"@/lib/types\";\n\nexport interface QueryAnalytics {\n  total_queries: number;\n  total_likes: number;\n  total_dislikes: number;\n  date: string;\n}\n\nexport interface UserAnalytics {\n  total_active_users: number;\n  date: string;\n}\n\nexport interface OnyxBotAnalytics {\n  total_queries: number;\n  auto_resolved: number;\n  date: string;\n}\n\nexport interface AbridgedSearchDoc {\n  document_id: string;\n  semantic_identifier: string;\n  link: string | null;\n}\n\nexport interface MessageSnapshot {\n  id: number;\n  message: string;\n  message_type: \"user\" | \"assistant\";\n  documents: AbridgedSearchDoc[];\n  feedback_type: Feedback | null;\n  feedback_text: string | null;\n  time_created: string;\n}\n\nexport interface ChatSessionSnapshot {\n  id: number;\n  user_email: string | null;\n  name: string | null;\n  messages: MessageSnapshot[];\n  assistant_id: number | null;\n  assistant_name: string | null;\n  time_created: string;\n  flow_type: SessionType;\n}\n\nexport interface ChatSessionMinimal {\n  id: number;\n  user_email: string | null;\n  name: string | null;\n  first_user_message: string;\n  first_ai_message: string;\n  assistant_id: number | null;\n  assistant_name: string | null;\n  time_created: string;\n  feedback_type: Feedback | \"mixed\" | null;\n  flow_type: SessionType;\n  conversation_length: number;\n}\n\nexport interface UsageReport {\n  report_name: string;\n  requestor: string | null;\n  time_created: string;\n  period_from: string | null;\n  period_to: string | null;\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx",
    "content": "\"use client\";\n\nimport { toast } from \"@/hooks/useToast\";\nimport { StandardAnswerCategory, StandardAnswer } from \"@/lib/types\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Form, Formik } from \"formik\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport * as Yup from \"yup\";\nimport {\n  createStandardAnswer,\n  createStandardAnswerCategory,\n  StandardAnswerCreationRequest,\n  updateStandardAnswer,\n} from \"./lib\";\nimport {\n  TextFormField,\n  MarkdownFormField,\n  BooleanFormField,\n  SelectorFormField,\n} from \"@/components/Field\";\nimport MultiSelectDropdown from \"@/components/MultiSelectDropdown\";\n\nfunction mapKeywordSelectToMatchAny(keywordSelect: \"any\" | \"all\"): boolean {\n  return keywordSelect == \"any\";\n}\n\nfunction mapMatchAnyToKeywordSelect(matchAny: boolean): \"any\" | \"all\" {\n  return matchAny ? \"any\" : \"all\";\n}\n\nexport const StandardAnswerCreationForm = ({\n  standardAnswerCategories,\n  existingStandardAnswer,\n}: {\n  standardAnswerCategories: StandardAnswerCategory[];\n  existingStandardAnswer?: StandardAnswer;\n}) => {\n  const isUpdate = existingStandardAnswer !== undefined;\n  const router = useRouter();\n\n  return (\n    <div>\n      <CardSection>\n        <Formik\n          initialValues={{\n            keyword: existingStandardAnswer\n              ? existingStandardAnswer.keyword\n              : \"\",\n            answer: existingStandardAnswer ? existingStandardAnswer.answer : \"\",\n            categories: existingStandardAnswer\n              ? existingStandardAnswer.categories\n              : [],\n            matchRegex: existingStandardAnswer\n              ? existingStandardAnswer.match_regex\n              : false,\n            matchAnyKeywords: existingStandardAnswer\n              ? mapMatchAnyToKeywordSelect(\n                  existingStandardAnswer.match_any_keywords\n                )\n              : \"all\",\n          }}\n          validationSchema={Yup.object().shape({\n            keyword: Yup.string()\n              .required(\"Keywords or pattern is required\")\n              .max(255)\n              .min(1),\n            answer: Yup.string().required(\"Answer is required\").min(1),\n            categories: Yup.array()\n              .required()\n              .min(1, \"At least one category is required\"),\n          })}\n          onSubmit={async (values, formikHelpers) => {\n            formikHelpers.setSubmitting(true);\n\n            const cleanedValues: StandardAnswerCreationRequest = {\n              ...values,\n              matchAnyKeywords: mapKeywordSelectToMatchAny(\n                values.matchAnyKeywords\n              ),\n              categories: values.categories.map((category) => category.id),\n            };\n\n            let response;\n            if (isUpdate) {\n              response = await updateStandardAnswer(\n                existingStandardAnswer.id,\n                cleanedValues\n              );\n            } else {\n              response = await createStandardAnswer(cleanedValues);\n            }\n            formikHelpers.setSubmitting(false);\n            if (response.ok) {\n              router.push(`/ee/admin/standard-answer?u=${Date.now()}` as Route);\n            } else {\n              const responseJson = await response.json();\n              const errorMsg = responseJson.detail || responseJson.message;\n              toast.error(\n                isUpdate\n                  ? `Error updating Standard Answer - ${errorMsg}`\n                  : `Error creating Standard Answer - ${errorMsg}`\n              );\n            }\n          }}\n        >\n          {({ isSubmitting, values, setFieldValue }) => (\n            <Form>\n              {values.matchRegex ? (\n                <TextFormField\n                  name=\"keyword\"\n                  label=\"Regex pattern\"\n                  isCode\n                  tooltip=\"Triggers if the question matches this regex pattern (using Python `re.search()`)\"\n                  placeholder=\"(?:it|support)\\s*ticket\"\n                />\n              ) : values.matchAnyKeywords == \"any\" ? (\n                <TextFormField\n                  name=\"keyword\"\n                  label=\"Any of these keywords, separated by spaces\"\n                  tooltip=\"A question must match these keywords in order to trigger the answer.\"\n                  placeholder=\"ticket problem issue\"\n                />\n              ) : (\n                <TextFormField\n                  name=\"keyword\"\n                  label=\"All of these keywords, in any order, separated by spaces\"\n                  tooltip=\"A question must match these keywords in order to trigger the answer.\"\n                  placeholder=\"it ticket\"\n                />\n              )}\n              <BooleanFormField\n                subtext=\"Match a regex pattern instead of an exact keyword\"\n                optional\n                label=\"Match regex\"\n                name=\"matchRegex\"\n              />\n              {values.matchRegex ? null : (\n                <SelectorFormField\n                  defaultValue={`all`}\n                  label=\"Keyword detection strategy\"\n                  subtext=\"Choose whether to require the user's question to contain any or all of the keywords above to show this answer.\"\n                  name=\"matchAnyKeywords\"\n                  options={[\n                    {\n                      name: \"All keywords\",\n                      value: \"all\",\n                    },\n                    {\n                      name: \"Any keywords\",\n                      value: \"any\",\n                    },\n                  ]}\n                  onSelect={(selected) => {\n                    setFieldValue(\"matchAnyKeywords\", selected);\n                  }}\n                />\n              )}\n              <div className=\"w-full\">\n                <MarkdownFormField\n                  name=\"answer\"\n                  label=\"Answer\"\n                  placeholder=\"The answer in Markdown. Example: If you need any help from the IT team, please email internalsupport@company.com\"\n                />\n              </div>\n              <div className=\"w-4/12\">\n                <MultiSelectDropdown\n                  name=\"categories\"\n                  label=\"Categories:\"\n                  onChange={(selected_options) => {\n                    const selected_categories = selected_options.map(\n                      (option) => {\n                        return { id: Number(option.value), name: option.label };\n                      }\n                    );\n                    setFieldValue(\"categories\", selected_categories);\n                  }}\n                  creatable={true}\n                  onCreate={async (created_name) => {\n                    const response = await createStandardAnswerCategory({\n                      name: created_name,\n                    });\n                    const newCategory = await response.json();\n                    return {\n                      label: newCategory.name,\n                      value: newCategory.id.toString(),\n                    };\n                  }}\n                  options={standardAnswerCategories.map((category) => ({\n                    label: category.name,\n                    value: category.id.toString(),\n                  }))}\n                  initialSelectedOptions={values.categories.map((category) => ({\n                    label: category.name,\n                    value: category.id.toString(),\n                  }))}\n                />\n              </div>\n              <div className=\"py-4 flex\">\n                {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                <Button\n                  type=\"submit\"\n                  disabled={isSubmitting}\n                  className=\"mx-auto w-64\"\n                >\n                  {isUpdate ? \"Update!\" : \"Create!\"}\n                </Button>\n              </div>\n            </Form>\n          )}\n        </Formik>\n      </CardSection>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/[id]/page.tsx",
    "content": "import { StandardAnswerCreationForm } from \"@/app/ee/admin/standard-answer/StandardAnswerCreationForm\";\nimport { fetchSS } from \"@/lib/utilsSS\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { StandardAnswer, StandardAnswerCategory } from \"@/lib/types\";\n\nconst route = ADMIN_ROUTES.STANDARD_ANSWERS;\n\nasync function Main({ id }: { id: string }) {\n  const tasks = [\n    fetchSS(\"/manage/admin/standard-answer\"),\n    fetchSS(`/manage/admin/standard-answer/category`),\n  ];\n  const [standardAnswersResponse, standardAnswerCategoriesResponse] =\n    await Promise.all(tasks);\n\n  if (standardAnswersResponse === undefined) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answers.`}\n      />\n    );\n  }\n\n  if (!standardAnswersResponse.ok) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answers - ${await standardAnswersResponse.text()}`}\n      />\n    );\n  }\n  const allStandardAnswers =\n    (await standardAnswersResponse.json()) as StandardAnswer[];\n  const standardAnswer = allStandardAnswers.find(\n    (answer) => answer.id.toString() === id\n  );\n\n  if (!standardAnswer) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Did not find standard answer with ID: ${id}`}\n      />\n    );\n  }\n\n  if (standardAnswerCategoriesResponse === undefined) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answer categories.`}\n      />\n    );\n  }\n\n  if (!standardAnswerCategoriesResponse.ok) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answer categories - ${await standardAnswerCategoriesResponse.text()}`}\n      />\n    );\n  }\n\n  const standardAnswerCategories =\n    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];\n\n  return (\n    <StandardAnswerCreationForm\n      standardAnswerCategories={standardAnswerCategories}\n      existingStandardAnswer={standardAnswer}\n    />\n  );\n}\n\nexport default async function Page(props: { params: Promise<{ id: string }> }) {\n  const params = await props.params;\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title=\"Edit Standard Answer\"\n        backButton\n        separator\n      />\n      <SettingsLayouts.Body>\n        <Main id={params.id} />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/hooks.ts",
    "content": "import { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { StandardAnswerCategory, StandardAnswer } from \"@/lib/types\";\nimport useSWR, { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport const useStandardAnswerCategories = () => {\n  const swrResponse = useSWR<StandardAnswerCategory[]>(\n    SWR_KEYS.standardAnswerCategories,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshStandardAnswerCategories: () =>\n      mutate(SWR_KEYS.standardAnswerCategories),\n  };\n};\n\nexport const useStandardAnswers = () => {\n  const swrResponse = useSWR<StandardAnswer[]>(\n    SWR_KEYS.standardAnswers,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshStandardAnswers: () => mutate(SWR_KEYS.standardAnswers),\n  };\n};\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/lib.ts",
    "content": "export interface StandardAnswerCategoryCreationRequest {\n  name: string;\n}\n\nexport interface StandardAnswerCreationRequest {\n  keyword: string;\n  answer: string;\n  categories: number[];\n  matchRegex: boolean;\n  matchAnyKeywords: boolean;\n}\n\nconst buildRequestBodyFromStandardAnswerCategoryCreationRequest = (\n  request: StandardAnswerCategoryCreationRequest\n) => {\n  return JSON.stringify({\n    name: request.name,\n  });\n};\n\nexport const createStandardAnswerCategory = async (\n  request: StandardAnswerCategoryCreationRequest\n) => {\n  return fetch(\"/api/manage/admin/standard-answer/category\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromStandardAnswerCategoryCreationRequest(request),\n  });\n};\n\nexport const updateStandardAnswerCategory = async (\n  id: number,\n  request: StandardAnswerCategoryCreationRequest\n) => {\n  return fetch(`/api/manage/admin/standard-answer/category/${id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromStandardAnswerCategoryCreationRequest(request),\n  });\n};\n\nconst buildRequestBodyFromStandardAnswerCreationRequest = (\n  request: StandardAnswerCreationRequest\n) => {\n  return JSON.stringify({\n    keyword: request.keyword,\n    answer: request.answer,\n    categories: request.categories,\n    match_regex: request.matchRegex,\n    match_any_keywords: request.matchAnyKeywords,\n  });\n};\n\nexport const createStandardAnswer = async (\n  request: StandardAnswerCreationRequest\n) => {\n  return fetch(\"/api/manage/admin/standard-answer\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromStandardAnswerCreationRequest(request),\n  });\n};\n\nexport const updateStandardAnswer = async (\n  id: number,\n  request: StandardAnswerCreationRequest\n) => {\n  return fetch(`/api/manage/admin/standard-answer/${id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: buildRequestBodyFromStandardAnswerCreationRequest(request),\n  });\n};\n\nexport const deleteStandardAnswer = async (id: number) => {\n  return fetch(`/api/manage/admin/standard-answer/${id}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n};\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/new/page.tsx",
    "content": "import { StandardAnswerCreationForm } from \"@/app/ee/admin/standard-answer/StandardAnswerCreationForm\";\nimport { fetchSS } from \"@/lib/utilsSS\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { StandardAnswerCategory } from \"@/lib/types\";\n\nconst route = ADMIN_ROUTES.STANDARD_ANSWERS;\n\nasync function Page() {\n  const standardAnswerCategoriesResponse = await fetchSS(\n    \"/manage/admin/standard-answer/category\"\n  );\n\n  if (!standardAnswerCategoriesResponse.ok) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answer categories - ${await standardAnswerCategoriesResponse.text()}`}\n      />\n    );\n  }\n  const standardAnswerCategories =\n    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title=\"New Standard Answer\"\n        backButton\n        separator\n      />\n      <SettingsLayouts.Body>\n        <StandardAnswerCreationForm\n          standardAnswerCategories={standardAnswerCategories}\n        />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n\nexport default Page;\n"
  },
  {
    "path": "web/src/app/ee/admin/standard-answer/page.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useStandardAnswers, useStandardAnswerCategories } from \"./hooks\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport Separator from \"@/refresh-components/Separator\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\n\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { StandardAnswer, StandardAnswerCategory } from \"@/lib/types\";\nimport { MagnifyingGlass } from \"@phosphor-icons/react\";\nimport { useState, JSX } from \"react\";\nimport ReactMarkdown from \"react-markdown\";\nimport remarkGfm from \"remark-gfm\";\nimport { deleteStandardAnswer } from \"./lib\";\nimport { FilterDropdown } from \"@/components/search/filtering/FilterDropdown\";\nimport { FiTag } from \"react-icons/fi\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { SvgEdit, SvgTrash } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nconst NUM_RESULTS_PER_PAGE = 10;\n\nconst route = ADMIN_ROUTES.STANDARD_ANSWERS;\n\ntype Displayable = JSX.Element | string;\n\nconst RowTemplate = ({\n  id,\n  entries,\n}: {\n  id: number;\n  entries: [\n    Displayable,\n    Displayable,\n    Displayable,\n    Displayable,\n    Displayable,\n    Displayable,\n  ];\n}) => {\n  return (\n    <TableRow key={id}>\n      <TableCell className=\"w-1/24\">{entries[0]}</TableCell>\n      <TableCell className=\"w-2/12\">{entries[1]}</TableCell>\n      <TableCell className=\"w-2/12\">{entries[2]}</TableCell>\n      <TableCell className=\"w-1/24\">{entries[3]}</TableCell>\n      <TableCell className=\"w-7/12 overflow-auto\">{entries[4]}</TableCell>\n      <TableCell className=\"w-1/24\">{entries[5]}</TableCell>\n    </TableRow>\n  );\n};\n\nconst CategoryBubble = ({\n  name,\n  onDelete,\n}: {\n  name: string;\n  onDelete?: () => void;\n}) => (\n  <span\n    className={`\n      inline-block\n      px-2\n      py-1\n      mr-1\n      mb-1\n      text-xs\n      font-semibold\n      text-emphasis\n      bg-accent-background-hovered\n      rounded-full\n      items-center\n      w-fit\n      ${onDelete ? \"cursor-pointer\" : \"\"}\n    `}\n    onClick={onDelete}\n  >\n    {name}\n    {onDelete && (\n      <button\n        className=\"ml-1 text-subtle hover:text-emphasis\"\n        aria-label=\"Remove category\"\n      >\n        &times;\n      </button>\n    )}\n  </span>\n);\n\nconst StandardAnswersTableRow = ({\n  standardAnswer,\n  handleDelete,\n}: {\n  standardAnswer: StandardAnswer;\n  handleDelete: (id: number) => void;\n}) => {\n  return (\n    <RowTemplate\n      id={standardAnswer.id}\n      entries={[\n        <Link\n          key={`edit-${standardAnswer.id}`}\n          href={`/ee/admin/standard-answer/${standardAnswer.id}` as Route}\n        >\n          <SvgEdit size={16} />\n        </Link>,\n        <div key={`categories-${standardAnswer.id}`}>\n          {standardAnswer.categories.map((category) => (\n            <CategoryBubble key={category.id} name={category.name} />\n          ))}\n        </div>,\n        <ReactMarkdown key={`keyword-${standardAnswer.id}`}>\n          {standardAnswer.match_regex\n            ? `\\`${standardAnswer.keyword}\\``\n            : standardAnswer.keyword}\n        </ReactMarkdown>,\n        <div\n          key={`match_regex-${standardAnswer.id}`}\n          className=\"flex items-center\"\n        >\n          {standardAnswer.match_regex ? (\n            <span className=\"text-green-500 font-medium\">Yes</span>\n          ) : (\n            <span className=\"text-gray-500\">No</span>\n          )}\n        </div>,\n        <ReactMarkdown\n          key={`answer-${standardAnswer.id}`}\n          className=\"prose dark:prose-invert\"\n          remarkPlugins={[remarkGfm]}\n        >\n          {standardAnswer.answer}\n        </ReactMarkdown>,\n        <Button\n          key={`delete-${standardAnswer.id}`}\n          icon={SvgTrash}\n          onClick={() => handleDelete(standardAnswer.id)}\n        />,\n      ]}\n    />\n  );\n};\n\nconst StandardAnswersTable = ({\n  standardAnswers,\n  standardAnswerCategories,\n  refresh,\n}: {\n  standardAnswers: StandardAnswer[];\n  standardAnswerCategories: StandardAnswerCategory[];\n  refresh: () => void;\n}) => {\n  const [query, setQuery] = useState(\"\");\n  const [currentPage, setCurrentPage] = useState(1);\n  const [selectedCategories, setSelectedCategories] = useState<\n    StandardAnswerCategory[]\n  >([]);\n  const columns = [\n    { name: \"\", key: \"edit\" },\n    { name: \"Categories\", key: \"category\" },\n    { name: \"Keywords/Pattern\", key: \"keyword\" },\n    { name: \"Match regex?\", key: \"match_regex\" },\n    { name: \"Answer\", key: \"answer\" },\n    { name: \"\", key: \"delete\" },\n  ];\n\n  const filteredStandardAnswers = standardAnswers.filter((standardAnswer) => {\n    const {\n      answer,\n      id,\n      categories,\n      match_regex,\n      match_any_keywords,\n      ...fieldsToSearch\n    } = standardAnswer;\n    const cleanedQuery = query.toLowerCase();\n    const searchMatch = Object.values(fieldsToSearch).some((value) => {\n      return value.toLowerCase().includes(cleanedQuery);\n    });\n    const categoryMatch =\n      selectedCategories.length == 0 ||\n      selectedCategories.some((category) =>\n        categories.map((c) => c.id).includes(category.id)\n      );\n    return searchMatch && categoryMatch;\n  });\n\n  const totalPages = Math.ceil(\n    filteredStandardAnswers.length / NUM_RESULTS_PER_PAGE\n  );\n  const startIndex = (currentPage - 1) * NUM_RESULTS_PER_PAGE;\n  const endIndex = startIndex + NUM_RESULTS_PER_PAGE;\n  const paginatedStandardAnswers = filteredStandardAnswers.slice(\n    startIndex,\n    endIndex\n  );\n\n  const handlePageChange = (page: number) => {\n    setCurrentPage(page);\n  };\n\n  const handleDelete = async (id: number) => {\n    const response = await deleteStandardAnswer(id);\n    if (response.ok) {\n      toast.success(`Standard answer ${id} deleted`);\n    } else {\n      const errorMsg = await response.text();\n      toast.error(`Failed to delete standard answer - ${errorMsg}`);\n    }\n    refresh();\n  };\n\n  const handleCategorySelect = (category: StandardAnswerCategory) => {\n    setSelectedCategories((prev: StandardAnswerCategory[]) => {\n      const prevCategoryIds = prev.map((category) => category.id);\n      if (prevCategoryIds.includes(category.id)) {\n        return prev.filter((c) => c.id !== category.id);\n      }\n      return [...prev, category];\n    });\n  };\n\n  return (\n    <div className=\"justify-center py-2\">\n      <div className=\"flex items-center w-full border-2 border-border rounded-lg px-4 py-2 focus-within:border-accent\">\n        <MagnifyingGlass />\n        <textarea\n          autoFocus\n          className=\"flex-grow ml-2 h-6 bg-transparent outline-none placeholder-subtle overflow-hidden whitespace-normal resize-none\"\n          role=\"textarea\"\n          aria-multiline\n          placeholder=\"Find standard answers by keyword/phrase...\"\n          value={query}\n          onChange={(event) => {\n            setQuery(event.target.value);\n            setCurrentPage(1);\n          }}\n          onKeyDown={(event) => {\n            if (event.key === \"Enter\") {\n              event.preventDefault();\n            }\n          }}\n          suppressContentEditableWarning={true}\n        />\n      </div>\n      <div className=\"my-4 border-b border-border\">\n        <FilterDropdown\n          options={standardAnswerCategories.map((category) => {\n            return {\n              key: category.name,\n              display: category.name,\n            };\n          })}\n          selected={selectedCategories.map((category) => category.name)}\n          handleSelect={(option) => {\n            handleCategorySelect(\n              standardAnswerCategories.find(\n                (category) => category.name === option.key\n              )!\n            );\n          }}\n          icon={\n            <div className=\"my-auto mr-2 w-[16px] h-[16px]\">\n              <FiTag size={16} />\n            </div>\n          }\n          defaultDisplay=\"All Categories\"\n        />\n        <div className=\"flex flex-wrap pb-4 mt-3\">\n          {selectedCategories.map((category) => (\n            <CategoryBubble\n              key={category.id}\n              name={category.name}\n              onDelete={() => handleCategorySelect(category)}\n            />\n          ))}\n        </div>\n      </div>\n      <div className=\"flex flex-col w-full mx-auto\">\n        <Table className=\"w-full\">\n          <TableHeader>\n            <TableRow>\n              {columns.map((column) => (\n                <TableHead key={column.key}>{column.name}</TableHead>\n              ))}\n            </TableRow>\n          </TableHeader>\n\n          <TableBody>\n            {paginatedStandardAnswers.length > 0 ? (\n              paginatedStandardAnswers.map((item) => (\n                <StandardAnswersTableRow\n                  key={item.id}\n                  standardAnswer={item}\n                  handleDelete={handleDelete}\n                />\n              ))\n            ) : (\n              <RowTemplate id={0} entries={[\"\", \"\", \"\", \"\", \"\", \"\"]} />\n            )}\n          </TableBody>\n        </Table>\n        <div>\n          {paginatedStandardAnswers.length === 0 && (\n            <div className=\"flex justify-center\">\n              <Text as=\"p\">No matching standard answers found...</Text>\n            </div>\n          )}\n        </div>\n        {paginatedStandardAnswers.length > 0 && (\n          <>\n            <div className=\"mt-4\">\n              <Text as=\"p\">\n                {markdown(\n                  \"Ensure that you have added the category to the relevant [Slack Bot](/admin/bots).\"\n                )}\n              </Text>\n            </div>\n            <div className=\"mt-4 flex justify-center\">\n              <PageSelector\n                currentPage={currentPage}\n                totalPages={totalPages}\n                onPageChange={handlePageChange}\n                shouldScroll={true}\n              />\n            </div>\n          </>\n        )}\n      </div>\n    </div>\n  );\n};\n\nfunction Main() {\n  const {\n    data: standardAnswers,\n    error: standardAnswersError,\n    isLoading: standardAnswersIsLoading,\n    refreshStandardAnswers,\n  } = useStandardAnswers();\n  const {\n    data: standardAnswerCategories,\n    error: standardAnswerCategoriesError,\n    isLoading: standardAnswerCategoriesIsLoading,\n  } = useStandardAnswerCategories();\n\n  if (standardAnswersIsLoading || standardAnswerCategoriesIsLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (standardAnswersError || !standardAnswers) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error loading standard answers\"\n        errorMsg={\n          standardAnswersError.info?.detail ||\n          standardAnswersError.info?.message\n        }\n      />\n    );\n  }\n\n  if (standardAnswerCategoriesError || !standardAnswerCategories) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error loading standard answer categories\"\n        errorMsg={\n          standardAnswerCategoriesError.info?.detail ||\n          standardAnswerCategoriesError.info?.message\n        }\n      />\n    );\n  }\n\n  return (\n    <div className=\"mb-8\">\n      <Text as=\"p\">\n        {markdown(\n          \"Manage the standard answers for pre-defined questions.\\nNote: Currently, only questions asked from Slack can receive standard answers.\"\n        )}\n      </Text>\n      <Spacer rem={0.5} />\n      {standardAnswers.length == 0 && (\n        <>\n          <Text as=\"p\">Add your first standard answer below!</Text>\n          <Spacer rem={0.5} />\n        </>\n      )}\n      <div className=\"mb-2\"></div>\n\n      <CreateButton href=\"/admin/standard-answer/new\">\n        New Standard Answer\n      </CreateButton>\n\n      <Separator />\n\n      <div>\n        <StandardAnswersTable\n          standardAnswers={standardAnswers}\n          standardAnswerCategories={standardAnswerCategories}\n          refresh={refreshStandardAnswers}\n        />\n      </div>\n    </div>\n  );\n}\n\nexport default function Page() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n      <SettingsLayouts.Body>\n        <Main />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/theme/AppearanceThemeSettings.tsx",
    "content": "\"use client\";\n\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { Preview } from \"./Preview\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport CharacterCount from \"@/refresh-components/CharacterCount\";\nimport InputImage from \"@/refresh-components/inputs/InputImage\";\nimport { Button } from \"@opal/components\";\nimport { useFormikContext } from \"formik\";\nimport {\n  forwardRef,\n  useEffect,\n  useImperativeHandle,\n  useMemo,\n  useRef,\n  useState,\n} from \"react\";\nimport type { PreviewHighlightTarget } from \"./Preview\";\nimport { SvgEdit } from \"@opal/icons\";\n\ninterface AppearanceThemeSettingsProps {\n  selectedLogo: File | null;\n  setSelectedLogo: (file: File | null) => void;\n  logoVersion: number;\n  charLimits: {\n    application_name: number;\n    custom_greeting_message: number;\n    custom_header_content: number;\n    custom_lower_disclaimer_content: number;\n    custom_popup_header: number;\n    custom_popup_content: number;\n    consent_screen_prompt: number;\n  };\n}\n\nexport interface AppearanceThemeSettingsRef {\n  focusFirstError: (errors: Record<string, any>) => void;\n}\n\nexport const AppearanceThemeSettings = forwardRef<\n  AppearanceThemeSettingsRef,\n  AppearanceThemeSettingsProps\n>(function AppearanceThemeSettings(\n  { selectedLogo, setSelectedLogo, logoVersion, charLimits },\n  ref\n) {\n  const { values, errors, setFieldValue } = useFormikContext<any>();\n  const fileInputRef = useRef<HTMLInputElement>(null);\n  const applicationNameInputRef = useRef<HTMLInputElement>(null);\n  const greetingMessageInputRef = useRef<HTMLInputElement>(null);\n  const headerContentInputRef = useRef<HTMLInputElement>(null);\n  const lowerDisclaimerInputRef = useRef<HTMLTextAreaElement>(null);\n  const noticeHeaderInputRef = useRef<HTMLInputElement>(null);\n  const noticeContentInputRef = useRef<HTMLTextAreaElement>(null);\n  const consentPromptTextAreaRef = useRef<HTMLTextAreaElement>(null);\n  const prevShowFirstVisitNoticeRef = useRef<boolean>(\n    Boolean(values.show_first_visit_notice)\n  );\n  const prevEnableConsentScreenRef = useRef<boolean>(\n    Boolean(values.enable_consent_screen)\n  );\n  const [focusedPreviewTarget, setFocusedPreviewTarget] =\n    useState<PreviewHighlightTarget | null>(null);\n  const [hoveredPreviewTarget, setHoveredPreviewTarget] =\n    useState<PreviewHighlightTarget | null>(null);\n\n  const highlightTarget = useMemo(\n    () => focusedPreviewTarget ?? hoveredPreviewTarget,\n    [focusedPreviewTarget, hoveredPreviewTarget]\n  );\n\n  const getPreviewHandlers = (target: PreviewHighlightTarget) => ({\n    onFocus: () => setFocusedPreviewTarget(target),\n    onBlur: () =>\n      setFocusedPreviewTarget((cur) => (cur === target ? null : cur)),\n    onMouseEnter: () => setHoveredPreviewTarget(target),\n    onMouseLeave: () =>\n      setHoveredPreviewTarget((cur) => (cur === target ? null : cur)),\n  });\n\n  // Expose focusFirstError method to parent component\n  useImperativeHandle(ref, () => ({\n    focusFirstError: (errors: Record<string, any>) => {\n      // Focus on the first field with an error, in priority order\n      const fieldRefs = [\n        { name: \"application_name\", ref: applicationNameInputRef },\n        { name: \"custom_greeting_message\", ref: greetingMessageInputRef },\n        { name: \"custom_header_content\", ref: headerContentInputRef },\n        {\n          name: \"custom_lower_disclaimer_content\",\n          ref: lowerDisclaimerInputRef,\n        },\n        { name: \"custom_popup_header\", ref: noticeHeaderInputRef },\n        { name: \"custom_popup_content\", ref: noticeContentInputRef },\n        { name: \"consent_screen_prompt\", ref: consentPromptTextAreaRef },\n      ];\n      for (const field of fieldRefs) {\n        if (errors[field.name] && field.ref.current) {\n          field.ref.current.focus();\n          // Scroll into view if needed\n          field.ref.current.scrollIntoView({\n            behavior: \"smooth\",\n            block: \"center\",\n          });\n          break;\n        }\n      }\n    },\n  }));\n\n  useEffect(() => {\n    const prev = prevShowFirstVisitNoticeRef.current;\n    const next = Boolean(values.show_first_visit_notice);\n\n    // When enabling the toggle, autofocus the \"Notice Header\" input.\n    if (!prev && next) {\n      requestAnimationFrame(() => {\n        noticeHeaderInputRef.current?.focus();\n      });\n    }\n\n    prevShowFirstVisitNoticeRef.current = next;\n  }, [values.show_first_visit_notice]);\n\n  useEffect(() => {\n    const prev = prevEnableConsentScreenRef.current;\n    const next = Boolean(values.enable_consent_screen);\n\n    // When enabling the toggle, autofocus the \"Notice Consent Prompt\" input.\n    if (!prev && next) {\n      requestAnimationFrame(() => {\n        consentPromptTextAreaRef.current?.focus();\n      });\n    }\n\n    prevEnableConsentScreenRef.current = next;\n  }, [values.enable_consent_screen]);\n\n  const handleLogoEdit = () => {\n    fileInputRef.current?.click();\n  };\n\n  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {\n    const file = event.target.files?.[0];\n    if (file) {\n      setSelectedLogo(file);\n      setFieldValue(\"use_custom_logo\", true);\n    }\n  };\n\n  const handleLogoRemove = async () => {\n    setFieldValue(\"use_custom_logo\", false);\n    setSelectedLogo(null);\n  };\n\n  // Memoize the blob URL to prevent creating new URLs on every render\n  const logoObjectUrl = useMemo(() => {\n    if (selectedLogo) {\n      return URL.createObjectURL(selectedLogo);\n    }\n    return null;\n  }, [selectedLogo]);\n\n  // Clean up the blob URL when selectedLogo changes or component unmounts\n  useEffect(() => {\n    return () => {\n      if (logoObjectUrl) {\n        URL.revokeObjectURL(logoObjectUrl);\n      }\n    };\n  }, [logoObjectUrl]);\n\n  const logoSrc = useMemo(() => {\n    if (logoObjectUrl) {\n      return logoObjectUrl;\n    }\n    if (values.use_custom_logo) {\n      return `/api/enterprise-settings/logo?v=${logoVersion}`;\n    }\n    return undefined;\n  }, [logoObjectUrl, values.use_custom_logo, logoVersion]);\n\n  // Determine which tabs should be enabled\n  const hasLogo = Boolean(selectedLogo || values.use_custom_logo);\n  const hasApplicationName = Boolean(values.application_name?.trim());\n\n  // Auto-switch to logo_and_name if current selection becomes invalid\n  useEffect(() => {\n    if (values.logo_display_style === \"logo_only\" && !hasLogo) {\n      setFieldValue(\"logo_display_style\", \"logo_and_name\");\n    } else if (\n      values.logo_display_style === \"name_only\" &&\n      !hasApplicationName\n    ) {\n      setFieldValue(\"logo_display_style\", \"logo_and_name\");\n    }\n  }, [hasLogo, hasApplicationName, values.logo_display_style, setFieldValue]);\n\n  return (\n    <div className=\"flex flex-col gap-4 w-full\">\n      <input\n        type=\"file\"\n        ref={fileInputRef}\n        onChange={handleFileChange}\n        accept=\"image/png,image/jpeg,image/jpg\"\n        style={{ display: \"none\" }}\n      />\n\n      <div className=\"flex gap-10 items-center\">\n        <div className=\"flex flex-col gap-4 w-full\">\n          <FormField state={errors.application_name ? \"error\" : \"idle\"}>\n            <FormField.Label\n              rightAction={\n                <CharacterCount\n                  value={values.application_name}\n                  limit={charLimits.application_name}\n                />\n              }\n            >\n              Application Display Name\n            </FormField.Label>\n            <FormField.Control asChild>\n              <InputTypeIn\n                ref={applicationNameInputRef}\n                data-label=\"application-name-input\"\n                showClearButton\n                variant={errors.application_name ? \"error\" : undefined}\n                value={values.application_name}\n                {...getPreviewHandlers(\"sidebar\")}\n                onChange={(e) =>\n                  setFieldValue(\"application_name\", e.target.value)\n                }\n              />\n            </FormField.Control>\n            <FormField.Description>\n              This name will show across the app and replace \"Onyx\" in the UI.\n            </FormField.Description>\n            <FormField.Message\n              messages={{ error: errors.application_name as string }}\n            />\n          </FormField>\n\n          <FormField state=\"idle\">\n            <FormField.Label>Logo Display Style</FormField.Label>\n            <FormField.Control>\n              <Tabs\n                value={values.logo_display_style}\n                onValueChange={(value) =>\n                  setFieldValue(\"logo_display_style\", value)\n                }\n              >\n                <Tabs.List>\n                  <Tabs.Trigger\n                    value=\"logo_and_name\"\n                    tooltip=\"Show both your application logo and name.\"\n                    tooltipSide=\"top\"\n                    {...getPreviewHandlers(\"sidebar\")}\n                  >\n                    Logo & Name\n                  </Tabs.Trigger>\n                  <Tabs.Trigger\n                    value=\"logo_only\"\n                    disabled={!hasLogo}\n                    tooltip={\n                      hasLogo\n                        ? \"Show only your application logo.\"\n                        : \"Upload a logo to enable this option.\"\n                    }\n                    tooltipSide=\"top\"\n                    {...getPreviewHandlers(\"sidebar\")}\n                  >\n                    Logo Only\n                  </Tabs.Trigger>\n                  <Tabs.Trigger\n                    value=\"name_only\"\n                    disabled={!hasApplicationName}\n                    tooltip={\n                      hasApplicationName\n                        ? \"Show only your application name.\"\n                        : \"Enter an application name to enable this option.\"\n                    }\n                    tooltipSide=\"top\"\n                    {...getPreviewHandlers(\"sidebar\")}\n                  >\n                    Name Only\n                  </Tabs.Trigger>\n                </Tabs.List>\n              </Tabs>\n            </FormField.Control>\n            <FormField.Description>\n              Choose what to display at the top of the sidebar. Options become\n              available once you add a logo or application name.\n            </FormField.Description>\n          </FormField>\n        </div>\n\n        <FormField state=\"idle\">\n          <FormField.Label>Application Logo</FormField.Label>\n          <FormField.Control>\n            <InputImage\n              src={logoSrc}\n              onEdit={handleLogoEdit}\n              onDrop={(file) => {\n                setSelectedLogo(file);\n                setFieldValue(\"use_custom_logo\", true);\n              }}\n              onRemove={handleLogoRemove}\n              showEditOverlay={false}\n            />\n          </FormField.Control>\n          <div className=\"mt-2 w-full justify-center items-center flex\">\n            <Button\n              disabled={!hasLogo}\n              prominence=\"secondary\"\n              onClick={handleLogoEdit}\n              icon={SvgEdit}\n            >\n              Update\n            </Button>\n          </div>\n        </FormField>\n      </div>\n\n      <Separator className=\"my-4\" />\n\n      <Preview\n        className=\"mb-8\"\n        logoDisplayStyle={values.logo_display_style}\n        applicationDisplayName={values.application_name ?? \"\"}\n        chat_footer_content={\n          values.custom_lower_disclaimer_content || \"Chat Footer Content\"\n        }\n        chat_header_content={\n          values.custom_header_content || \"Chat Header Content\"\n        }\n        greeting_message={\n          values.custom_greeting_message || \"Welcome to Acme Chat\"\n        }\n        logoSrc={logoSrc}\n        highlightTarget={highlightTarget}\n      />\n\n      <FormField state={errors.custom_greeting_message ? \"error\" : \"idle\"}>\n        <FormField.Label\n          rightAction={\n            <CharacterCount\n              value={values.custom_greeting_message}\n              limit={charLimits.custom_greeting_message}\n            />\n          }\n        >\n          Greeting Message\n        </FormField.Label>\n        <FormField.Control asChild>\n          <InputTypeIn\n            ref={greetingMessageInputRef}\n            data-label=\"greeting-message-input\"\n            showClearButton\n            variant={errors.custom_greeting_message ? \"error\" : undefined}\n            value={values.custom_greeting_message}\n            {...getPreviewHandlers(\"greeting\")}\n            onChange={(e) =>\n              setFieldValue(\"custom_greeting_message\", e.target.value)\n            }\n          />\n        </FormField.Control>\n        <FormField.Description>\n          Add a short message to the home page.\n        </FormField.Description>\n        <FormField.Message\n          messages={{ error: errors.custom_greeting_message as string }}\n        />\n      </FormField>\n\n      <FormField state={errors.custom_header_content ? \"error\" : \"idle\"}>\n        <FormField.Label\n          rightAction={\n            <CharacterCount\n              value={values.custom_header_content}\n              limit={charLimits.custom_header_content}\n            />\n          }\n        >\n          Chat Header Text\n        </FormField.Label>\n        <FormField.Control asChild>\n          <InputTypeIn\n            ref={headerContentInputRef}\n            data-label=\"chat-header-input\"\n            showClearButton\n            variant={errors.custom_header_content ? \"error\" : undefined}\n            value={values.custom_header_content}\n            {...getPreviewHandlers(\"chat_header\")}\n            onChange={(e) =>\n              setFieldValue(\"custom_header_content\", e.target.value)\n            }\n          />\n        </FormField.Control>\n        <FormField.Message\n          messages={{ error: errors.custom_header_content as string }}\n        />\n      </FormField>\n\n      <FormField\n        state={errors.custom_lower_disclaimer_content ? \"error\" : \"idle\"}\n      >\n        <FormField.Label\n          rightAction={\n            <CharacterCount\n              value={values.custom_lower_disclaimer_content}\n              limit={charLimits.custom_lower_disclaimer_content}\n            />\n          }\n        >\n          Chat Footer Text\n        </FormField.Label>\n        <FormField.Control asChild>\n          <InputTextArea\n            ref={lowerDisclaimerInputRef}\n            data-label=\"chat-footer-textarea\"\n            rows={3}\n            placeholder=\"Add markdown content\"\n            variant={\n              errors.custom_lower_disclaimer_content ? \"error\" : undefined\n            }\n            value={values.custom_lower_disclaimer_content}\n            {...getPreviewHandlers(\"chat_footer\")}\n            onChange={(e) =>\n              setFieldValue(\"custom_lower_disclaimer_content\", e.target.value)\n            }\n          />\n        </FormField.Control>\n        <FormField.Description>\n          Add markdown content for disclaimers or additional information.\n        </FormField.Description>\n        <FormField.Message\n          messages={{ error: errors.custom_lower_disclaimer_content as string }}\n        />\n      </FormField>\n\n      <Separator className=\"my-4\" />\n\n      <div className=\"flex flex-col gap-4 p-4 bg-background-tint-00 rounded-16\">\n        <FormField state=\"idle\" className=\"gap-0\">\n          <div className=\"flex justify-between items-center\">\n            <FormField.Label>Show First Visit Notice</FormField.Label>\n            <FormField.Control>\n              <Switch\n                aria-label=\"Show First Visit Notice\"\n                data-label=\"first-visit-notice-toggle\"\n                checked={values.show_first_visit_notice}\n                onCheckedChange={(checked) =>\n                  setFieldValue(\"show_first_visit_notice\", checked)\n                }\n              />\n            </FormField.Control>\n          </div>\n          <FormField.Description>\n            Show a one-time pop-up for new users at their first visit.\n          </FormField.Description>\n        </FormField>\n\n        {values.show_first_visit_notice && (\n          <>\n            <FormField state={errors.custom_popup_header ? \"error\" : \"idle\"}>\n              <FormField.Label\n                required\n                rightAction={\n                  <CharacterCount\n                    value={values.custom_popup_header}\n                    limit={charLimits.custom_popup_header}\n                  />\n                }\n              >\n                Notice Header\n              </FormField.Label>\n              <FormField.Control asChild>\n                <InputTypeIn\n                  ref={noticeHeaderInputRef}\n                  data-label=\"notice-header-input\"\n                  showClearButton\n                  variant={errors.custom_popup_header ? \"error\" : undefined}\n                  value={values.custom_popup_header}\n                  onChange={(e) =>\n                    setFieldValue(\"custom_popup_header\", e.target.value)\n                  }\n                />\n              </FormField.Control>\n              <FormField.Message\n                messages={{ error: errors.custom_popup_header as string }}\n              />\n            </FormField>\n\n            <FormField state={errors.custom_popup_content ? \"error\" : \"idle\"}>\n              <FormField.Label\n                required\n                rightAction={\n                  <CharacterCount\n                    value={values.custom_popup_content}\n                    limit={charLimits.custom_popup_content}\n                  />\n                }\n              >\n                Notice Content\n              </FormField.Label>\n              <FormField.Control asChild>\n                <InputTextArea\n                  ref={noticeContentInputRef}\n                  data-label=\"notice-content-textarea\"\n                  rows={3}\n                  placeholder=\"Add markdown content\"\n                  variant={errors.custom_popup_content ? \"error\" : undefined}\n                  value={values.custom_popup_content}\n                  onChange={(e) =>\n                    setFieldValue(\"custom_popup_content\", e.target.value)\n                  }\n                />\n              </FormField.Control>\n              <FormField.Message\n                messages={{ error: errors.custom_popup_content as string }}\n              />\n            </FormField>\n\n            <FormField state=\"idle\" className=\"gap-0\">\n              <div className=\"flex justify-between items-center\">\n                <FormField.Label>Require Consent to Notice</FormField.Label>\n                <FormField.Control>\n                  <Switch\n                    aria-label=\"Require Consent to Notice\"\n                    data-label=\"require-consent-toggle\"\n                    checked={values.enable_consent_screen}\n                    onCheckedChange={(checked) =>\n                      setFieldValue(\"enable_consent_screen\", checked)\n                    }\n                  />\n                </FormField.Control>\n              </div>\n              <FormField.Description>\n                Require the user to read and agree to the notice before\n                accessing the application.\n              </FormField.Description>\n            </FormField>\n\n            {values.enable_consent_screen && (\n              <FormField\n                state={errors.consent_screen_prompt ? \"error\" : \"idle\"}\n              >\n                <FormField.Label\n                  required\n                  rightAction={\n                    <CharacterCount\n                      value={values.consent_screen_prompt}\n                      limit={charLimits.consent_screen_prompt}\n                    />\n                  }\n                >\n                  Notice Consent Prompt\n                </FormField.Label>\n                <FormField.Control asChild>\n                  <InputTextArea\n                    ref={consentPromptTextAreaRef}\n                    data-label=\"consent-prompt-textarea\"\n                    rows={3}\n                    placeholder=\"Add markdown content\"\n                    variant={errors.consent_screen_prompt ? \"error\" : undefined}\n                    value={values.consent_screen_prompt}\n                    onChange={(e) => {\n                      setFieldValue(\"consent_screen_prompt\", e.target.value);\n                    }}\n                  />\n                </FormField.Control>\n                <FormField.Message\n                  messages={{ error: errors.consent_screen_prompt as string }}\n                />\n              </FormField>\n            )}\n          </>\n        )}\n      </div>\n    </div>\n  );\n});\n"
  },
  {
    "path": "web/src/app/ee/admin/theme/Preview.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport type { Components } from \"react-markdown\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { cn, ensureHrefProtocol } from \"@/lib/utils\";\nimport { OnyxIcon } from \"@/components/icons/icons\";\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\n\nconst previewMarkdownComponents = {\n  p: ({ children }) => (\n    <Text as=\"p\" text03 figureSmallValue className=\"!my-0 text-center\">\n      {children}\n    </Text>\n  ),\n  a: ({ node, href, className, children, ...rest }) => {\n    const fullHref = ensureHrefProtocol(href);\n    return (\n      <a\n        href={fullHref}\n        target=\"_blank\"\n        rel=\"noopener noreferrer\"\n        {...rest}\n        className={cn(className, \"underline underline-offset-2\")}\n      >\n        <Text text03 figureSmallValue>\n          {children}\n        </Text>\n      </a>\n    );\n  },\n} satisfies Partial<Components>;\n\nconst PreviewMinimalMarkdown = React.memo(function PreviewMinimalMarkdown({\n  content,\n  className,\n}: {\n  content: string;\n  className?: string;\n}) {\n  return (\n    <MinimalMarkdown\n      content={content}\n      className={className}\n      components={previewMarkdownComponents}\n    />\n  );\n});\n\nexport type PreviewHighlightTarget =\n  | \"sidebar\"\n  | \"greeting\"\n  | \"chat_header\"\n  | \"chat_footer\";\n\nexport interface PreviewProps {\n  logoDisplayStyle: \"logo_and_name\" | \"logo_only\" | \"name_only\";\n  applicationDisplayName: string;\n  chat_footer_content: string;\n  chat_header_content: string;\n  greeting_message: string;\n  className?: string;\n  logoSrc?: string;\n  highlightTarget?: PreviewHighlightTarget | null;\n}\n\nfunction PreviewLogo({\n  logoSrc,\n  forceOnyxIcon,\n  size,\n  className,\n}: {\n  logoSrc?: string;\n  forceOnyxIcon?: boolean;\n  size: number;\n  className?: string;\n}) {\n  return logoSrc && !forceOnyxIcon ? (\n    <img\n      src={logoSrc}\n      alt=\"Logo\"\n      style={{\n        objectFit: \"cover\",\n        height: `${size}px`,\n        width: `${size}px`,\n      }}\n      className={cn(\"flex-shrink-0 rounded-full\", className)}\n    />\n  ) : (\n    <OnyxIcon size={size} className={cn(\"flex-shrink-0\", className)} />\n  );\n}\n\nexport function InputPreview() {\n  return (\n    <div className=\"bg-background-neutral-00 border border-border-01 flex flex-col gap-1.5 items-end pb-1 pl-2.5 pr-1 pt-2.5 rounded-08 w-full h-14\">\n      <div className=\"h-5 w-5 bg-theme-primary-05 mt-auto rounded-[0.25rem]\"></div>\n    </div>\n  );\n}\n\nfunction PreviewStart({\n  logoDisplayStyle,\n  applicationDisplayName,\n  chat_footer_content,\n  chat_header_content,\n  greeting_message,\n  logoSrc,\n  highlightTarget,\n}: PreviewProps) {\n  return (\n    <div className=\"flex h-60 rounded-12 shadow-00 bg-background-tint-01 relative\">\n      {/* Sidebar */}\n      <div className=\"flex w-[6rem] h-full bg-background-tint-02 rounded-l-12 p-1 justify-start\">\n        <div className=\"flex flex-col h-fit w-full justify-start\">\n          <div\n            className={cn(\n              \"inline-flex max-w-full items-center justify-start gap-1 rounded-08 p-0.5 overflow-hidden\",\n              highlightTarget === \"sidebar\" && \"bg-highlight-match\"\n            )}\n          >\n            {logoDisplayStyle !== \"name_only\" && (\n              <PreviewLogo\n                logoSrc={logoSrc}\n                size={16}\n                forceOnyxIcon={\n                  logoDisplayStyle === \"logo_and_name\" &&\n                  !applicationDisplayName\n                }\n              />\n            )}\n            {(logoDisplayStyle === \"logo_and_name\" ||\n              logoDisplayStyle === \"name_only\") && (\n              <Truncated mainUiAction text04 nowrap>\n                {applicationDisplayName || \"Onyx\"}\n              </Truncated>\n            )}\n          </div>\n        </div>\n      </div>\n      {/* Chat */}\n      <div className=\"flex flex-col flex-1 h-full\">\n        {/* Chat Body */}\n        <div className=\"flex flex-col flex-1 h-full items-center justify-center px-3\">\n          <div className=\"flex w-full max-w-[300px] flex-col items-center justify-center\">\n            <div\n              className={cn(\n                \"inline-flex max-w-full items-center justify-center gap-1 mb-2 rounded-08 border border-transparent p-0.5 text-center\",\n                highlightTarget === \"greeting\" && \"bg-highlight-match\"\n              )}\n            >\n              <PreviewLogo logoSrc={logoSrc} size={18} />\n              <Text\n                text04\n                headingH3\n                className=\"max-w-[260px] whitespace-normal break-words text-center\"\n              >\n                {greeting_message}\n              </Text>\n            </div>\n            <InputPreview />\n          </div>\n        </div>\n        {/* Chat Footer */}\n        <div className=\"flex flex-col items-center justify-end w-full\">\n          <div className=\"flex w-full max-w-[300px] justify-center\">\n            <div\n              className={cn(\n                \"inline-flex max-w-full items-start justify-center rounded-04 border border-transparent p-0.5 text-center\",\n                highlightTarget === \"chat_footer\" && \"bg-highlight-match\"\n              )}\n            >\n              <PreviewMinimalMarkdown\n                content={chat_footer_content}\n                className={cn(\"max-w-full text-center origin-center\")}\n              />\n            </div>\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n\nfunction PreviewChat({\n  chat_header_content,\n  chat_footer_content,\n  highlightTarget,\n}: {\n  chat_header_content: string;\n  chat_footer_content: string;\n  highlightTarget?: PreviewHighlightTarget | null;\n}) {\n  return (\n    <div className=\"flex flex-col h-60 relative bg-background-tint-01 rounded-12 shadow-00\">\n      {/* Header */}\n      <div className=\"flex justify-center w-full\">\n        <div className=\"flex w-full max-w-[300px] justify-center\">\n          <div\n            className={cn(\n              \"inline-flex max-w-full items-center justify-center rounded-08 border border-transparent p-0.5 text-center\",\n              highlightTarget === \"chat_header\" && \"bg-highlight-match\"\n            )}\n          >\n            <Text\n              figureSmallLabel\n              text03\n              className=\"max-w-full whitespace-normal break-words text-center\"\n            >\n              {chat_header_content}\n            </Text>\n          </div>\n        </div>\n      </div>\n\n      {/* Main Content */}\n      <div className=\"flex flex-1 flex-col gap-2 items-center justify-end max-w-[300px] w-full px-3 py-0 mx-auto\">\n        {/* User message bubble (right side) */}\n        <div className=\"flex flex-col items-end w-full\">\n          <div className=\"bg-background-tint-02 flex flex-col items-start px-2.5 py-2 rounded-bl-[10px] rounded-tl-[10px] rounded-tr-[10px]\">\n            <div className=\"bg-background-neutral-03 h-1.5 rounded-04 w-20\" />\n          </div>\n        </div>\n\n        {/* AI response bubble (left side) */}\n        <div className=\"flex flex-col gap-1.5 items-start pl-2 pr-16 py-2 w-full\">\n          <div className=\"bg-background-neutral-03 h-1.5 rounded-04 w-full\" />\n          <div className=\"bg-background-neutral-03 h-1.5 rounded-04 w-full\" />\n          <div className=\"bg-background-neutral-03 h-1.5 rounded-04 w-12\" />\n        </div>\n\n        {/* Input field */}\n        <InputPreview />\n      </div>\n\n      {/* Footer */}\n      <div className=\"flex flex-col items-center justify-end w-full\">\n        <div className=\"flex w-full max-w-[300px] justify-center\">\n          <div\n            className={cn(\n              \"inline-flex max-w-full items-start justify-center rounded-04 border border-transparent p-0.5 text-center\",\n              highlightTarget === \"chat_footer\" && \"bg-highlight-match\"\n            )}\n          >\n            <PreviewMinimalMarkdown\n              content={chat_footer_content}\n              className={cn(\"max-w-full text-center origin-center\")}\n            />\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\nexport function Preview({\n  logoDisplayStyle,\n  applicationDisplayName,\n  chat_footer_content,\n  chat_header_content,\n  greeting_message,\n  logoSrc,\n  className,\n  highlightTarget,\n}: PreviewProps) {\n  return (\n    <div className={cn(\"grid grid-cols-2 gap-2\", className)}>\n      <PreviewStart\n        logoDisplayStyle={logoDisplayStyle}\n        applicationDisplayName={applicationDisplayName}\n        chat_footer_content={chat_footer_content}\n        chat_header_content={chat_header_content}\n        greeting_message={greeting_message}\n        logoSrc={logoSrc}\n        highlightTarget={highlightTarget}\n      />\n      <PreviewChat\n        chat_header_content={chat_header_content}\n        chat_footer_content={chat_footer_content}\n        highlightTarget={highlightTarget}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/admin/theme/page.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Button } from \"@opal/components\";\nimport {\n  AppearanceThemeSettings,\n  AppearanceThemeSettingsRef,\n} from \"./AppearanceThemeSettings\";\nimport { useContext, useRef, useState } from \"react\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { EnterpriseSettings } from \"@/interfaces/settings\";\nimport { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nconst route = ADMIN_ROUTES.THEME;\n\nconst CHAR_LIMITS = {\n  application_name: 50,\n  custom_greeting_message: 50,\n  custom_header_content: 100,\n  custom_lower_disclaimer_content: 200,\n  custom_popup_header: 100,\n  custom_popup_content: 500,\n  consent_screen_prompt: 200,\n};\n\nexport default function ThemePage() {\n  const settings = useContext(SettingsContext);\n  const [selectedLogo, setSelectedLogo] = useState<File | null>(null);\n  const [logoVersion, setLogoVersion] = useState(0);\n  const appearanceSettingsRef = useRef<AppearanceThemeSettingsRef>(null);\n\n  if (!settings) {\n    return null;\n  }\n\n  const enterpriseSettings = settings.enterpriseSettings;\n\n  async function updateEnterpriseSettings(\n    newValues: EnterpriseSettings\n  ): Promise<boolean> {\n    const response = await fetch(\"/api/admin/enterprise-settings\", {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        ...(enterpriseSettings || {}),\n        ...newValues,\n      }),\n    });\n    if (response.ok) {\n      await mutate(SWR_KEYS.enterpriseSettings);\n      return true;\n    } else {\n      const errorMsg = (await response.json()).detail;\n      alert(`Failed to update settings. ${errorMsg}`);\n      return false;\n    }\n  }\n\n  const validationSchema = Yup.object().shape({\n    application_name: Yup.string()\n      .trim()\n      .max(\n        CHAR_LIMITS.application_name,\n        `Maximum ${CHAR_LIMITS.application_name} characters`\n      )\n      .nullable(),\n    logo_display_style: Yup.string()\n      .oneOf([\"logo_and_name\", \"logo_only\", \"name_only\"])\n      .required(),\n    use_custom_logo: Yup.boolean().required(),\n    custom_greeting_message: Yup.string()\n      .max(\n        CHAR_LIMITS.custom_greeting_message,\n        `Maximum ${CHAR_LIMITS.custom_greeting_message} characters`\n      )\n      .nullable(),\n    custom_header_content: Yup.string()\n      .max(\n        CHAR_LIMITS.custom_header_content,\n        `Maximum ${CHAR_LIMITS.custom_header_content} characters`\n      )\n      .nullable(),\n    custom_lower_disclaimer_content: Yup.string()\n      .max(\n        CHAR_LIMITS.custom_lower_disclaimer_content,\n        `Maximum ${CHAR_LIMITS.custom_lower_disclaimer_content} characters`\n      )\n      .nullable(),\n    show_first_visit_notice: Yup.boolean().nullable(),\n    custom_popup_header: Yup.string()\n      .max(\n        CHAR_LIMITS.custom_popup_header,\n        `Maximum ${CHAR_LIMITS.custom_popup_header} characters`\n      )\n      .when(\"show_first_visit_notice\", {\n        is: true,\n        then: (schema) => schema.required(\"Notice Header is required\"),\n        otherwise: (schema) => schema.nullable(),\n      }),\n    custom_popup_content: Yup.string()\n      .max(\n        CHAR_LIMITS.custom_popup_content,\n        `Maximum ${CHAR_LIMITS.custom_popup_content} characters`\n      )\n      .when(\"show_first_visit_notice\", {\n        is: true,\n        then: (schema) => schema.required(\"Notice Content is required\"),\n        otherwise: (schema) => schema.nullable(),\n      }),\n    enable_consent_screen: Yup.boolean().nullable(),\n    consent_screen_prompt: Yup.string()\n      .max(\n        CHAR_LIMITS.consent_screen_prompt,\n        `Maximum ${CHAR_LIMITS.consent_screen_prompt} characters`\n      )\n      .when(\"enable_consent_screen\", {\n        is: true,\n        then: (schema) => schema.required(\"Notice Consent Prompt is required\"),\n        otherwise: (schema) => schema.nullable(),\n      }),\n  });\n\n  return (\n    <Formik\n      initialValues={{\n        application_name: enterpriseSettings?.application_name || \"\",\n        logo_display_style:\n          enterpriseSettings?.logo_display_style || \"logo_and_name\",\n        use_custom_logo: enterpriseSettings?.use_custom_logo || false,\n        custom_greeting_message:\n          enterpriseSettings?.custom_greeting_message || \"\",\n        custom_header_content: enterpriseSettings?.custom_header_content || \"\",\n        custom_lower_disclaimer_content:\n          enterpriseSettings?.custom_lower_disclaimer_content || \"\",\n        show_first_visit_notice:\n          enterpriseSettings?.show_first_visit_notice || false,\n        custom_popup_header: enterpriseSettings?.custom_popup_header || \"\",\n        custom_popup_content: enterpriseSettings?.custom_popup_content || \"\",\n        enable_consent_screen:\n          enterpriseSettings?.enable_consent_screen || false,\n        consent_screen_prompt: enterpriseSettings?.consent_screen_prompt || \"\",\n      }}\n      validationSchema={validationSchema}\n      validateOnChange={false}\n      onSubmit={async (values, formikHelpers) => {\n        let logoUploaded = false;\n\n        // Handle logo upload if a new logo was selected\n        if (selectedLogo) {\n          const formData = new FormData();\n          formData.append(\"file\", selectedLogo);\n          const response = await fetch(\"/api/admin/enterprise-settings/logo\", {\n            method: \"PUT\",\n            body: formData,\n          });\n          if (!response.ok) {\n            const errorMsg = (await response.json()).detail;\n            alert(`Failed to upload logo. ${errorMsg}`);\n            formikHelpers.setSubmitting(false);\n            return;\n          }\n          // Only clear the selected logo after a successful upload\n          setSelectedLogo(null);\n          logoUploaded = true;\n          values.use_custom_logo = true;\n        }\n\n        // Update enterprise settings\n        const success = await updateEnterpriseSettings({\n          application_name: values.application_name || null,\n          use_custom_logo: values.use_custom_logo,\n          use_custom_logotype: enterpriseSettings?.use_custom_logotype || false,\n          logo_display_style: values.logo_display_style || null,\n          custom_nav_items: enterpriseSettings?.custom_nav_items || [],\n          custom_greeting_message: values.custom_greeting_message || null,\n          custom_header_content: values.custom_header_content || null,\n          custom_lower_disclaimer_content:\n            values.custom_lower_disclaimer_content || null,\n          two_lines_for_chat_header:\n            enterpriseSettings?.two_lines_for_chat_header || null,\n          custom_popup_header: values.custom_popup_header || null,\n          custom_popup_content: values.custom_popup_content || null,\n          show_first_visit_notice: values.show_first_visit_notice || null,\n          enable_consent_screen: values.enable_consent_screen || null,\n          consent_screen_prompt: values.consent_screen_prompt || null,\n        });\n\n        // Important: after a successful save, reset Formik's \"baseline\" so\n        // dirty comparisons reflect the newly-saved values.\n        if (success) {\n          formikHelpers.resetForm({ values });\n          if (logoUploaded) {\n            setLogoVersion((v) => v + 1);\n          }\n          toast.success(\"Appearance settings saved successfully!\");\n        }\n\n        formikHelpers.setSubmitting(false);\n      }}\n    >\n      {({\n        isSubmitting,\n        dirty,\n        values,\n        validateForm,\n        setErrors,\n        setTouched,\n        submitForm,\n      }) => {\n        const hasLogoChange = !!selectedLogo;\n\n        return (\n          <Form className=\"w-full h-full\">\n            <SettingsLayouts.Root>\n              <SettingsLayouts.Header\n                title={route.title}\n                description=\"Customize how the application appears to users across your organization.\"\n                icon={route.icon}\n                rightChildren={\n                  <Button\n                    disabled={isSubmitting || (!dirty && !hasLogoChange)}\n                    type=\"button\"\n                    onClick={async () => {\n                      const errors = await validateForm();\n                      if (Object.keys(errors).length > 0) {\n                        setErrors(errors);\n                        appearanceSettingsRef.current?.focusFirstError(errors);\n                        return;\n                      }\n                      await submitForm();\n                    }}\n                  >\n                    {isSubmitting ? \"Applying...\" : \"Apply Changes\"}\n                  </Button>\n                }\n              />\n              <SettingsLayouts.Body>\n                <AppearanceThemeSettings\n                  ref={appearanceSettingsRef}\n                  selectedLogo={selectedLogo}\n                  setSelectedLogo={setSelectedLogo}\n                  logoVersion={logoVersion}\n                  charLimits={CHAR_LIMITS}\n                />\n              </SettingsLayouts.Body>\n            </SettingsLayouts.Root>\n          </Form>\n        );\n      }}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/agents/stats/[id]/AgentStats.tsx",
    "content": "\"use client\";\n\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { getDatesList } from \"@/app/ee/admin/performance/lib\";\nimport { useEffect, useState, useMemo } from \"react\";\nimport {\n  AdminDateRangeSelector,\n  DateRange,\n} from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { Card, CardContent, CardHeader } from \"@/components/ui/card\";\nimport { AreaChartDisplay } from \"@/components/ui/areaChart\";\n\ntype AgentDailyUsageEntry = {\n  date: string;\n  total_messages: number;\n  total_unique_users: number;\n};\n\ntype AgentStatsResponse = {\n  daily_stats: AgentDailyUsageEntry[];\n  total_messages: number;\n  total_unique_users: number;\n};\n\nexport function AgentStats({ agentId }: { agentId: number }) {\n  const [agentStats, setAgentStats] = useState<AgentStatsResponse | null>(null);\n  const { agents } = useAgents();\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n  const [dateRange, setDateRange] = useState<DateRange>({\n    from: new Date(new Date().setDate(new Date().getDate() - 30)),\n    to: new Date(),\n  });\n\n  const agent = useMemo(() => {\n    return agents.find((a) => a.id === agentId);\n  }, [agents, agentId]);\n\n  useEffect(() => {\n    async function fetchStats() {\n      try {\n        setIsLoading(true);\n        setError(null);\n\n        const res = await fetch(\n          `/api/analytics/assistant/${agentId}/stats?start=${\n            dateRange?.from?.toISOString() || \"\"\n          }&end=${dateRange?.to?.toISOString() || \"\"}`\n        );\n\n        if (!res.ok) {\n          if (res.status === 403) {\n            throw new Error(\"You don't have permission to view these stats.\");\n          }\n          throw new Error(\"Failed to fetch agent stats\");\n        }\n\n        const data = (await res.json()) as AgentStatsResponse;\n        setAgentStats(data);\n      } catch (err) {\n        setError(\n          err instanceof Error ? err.message : \"An unknown error occurred\"\n        );\n      } finally {\n        setIsLoading(false);\n      }\n    }\n\n    fetchStats();\n  }, [agentId, dateRange]);\n\n  const chartData = useMemo(() => {\n    if (!agentStats?.daily_stats?.length || !dateRange) {\n      return null;\n    }\n\n    const initialDate =\n      dateRange.from ||\n      new Date(\n        Math.min(\n          ...agentStats.daily_stats.map((entry) =>\n            new Date(entry.date).getTime()\n          )\n        )\n      );\n    const endDate = dateRange.to || new Date();\n\n    const dateRangeList = getDatesList(initialDate);\n\n    const statsMap = new Map(\n      agentStats.daily_stats.map((entry) => [entry.date, entry])\n    );\n\n    return dateRangeList\n      .filter((date) => new Date(date) <= endDate)\n      .map((dateStr) => {\n        const dayData = statsMap.get(dateStr);\n        return {\n          Day: dateStr,\n          Messages: dayData?.total_messages || 0,\n          \"Unique Users\": dayData?.total_unique_users || 0,\n        };\n      });\n  }, [agentStats, dateRange]);\n\n  const totalMessages = agentStats?.total_messages ?? 0;\n  const totalUniqueUsers = agentStats?.total_unique_users ?? 0;\n\n  let content;\n  if (isLoading || !agent) {\n    content = (\n      <div className=\"h-80 flex flex-col\">\n        <ThreeDotsLoader />\n      </div>\n    );\n  } else if (error) {\n    content = (\n      <div className=\"h-80 text-red-600 font-bold flex flex-col\">\n        <p className=\"m-auto\">{error}</p>\n      </div>\n    );\n  } else if (!agentStats?.daily_stats?.length) {\n    content = (\n      <div className=\"h-80 text-text-500 flex flex-col\">\n        <p className=\"m-auto\">\n          No data found for this agent in the selected date range\n        </p>\n      </div>\n    );\n  } else if (chartData) {\n    content = (\n      <AreaChartDisplay\n        className=\"mt-4\"\n        data={chartData}\n        categories={[\"Messages\", \"Unique Users\"]}\n        index=\"Day\"\n        colors={[\"#4A4A4A\", \"#A0A0A0\"]}\n        yAxisWidth={60}\n      />\n    );\n  }\n\n  return (\n    <Card className=\"w-full\">\n      <CardHeader className=\"flex flex-row items-center justify-between space-y-0 pb-2\">\n        <p className=\"text-base font-normal text-2xl\">Agent Analytics</p>\n        <AdminDateRangeSelector\n          value={dateRange}\n          onValueChange={setDateRange}\n        />\n      </CardHeader>\n      <CardContent>\n        <div className=\"grid grid-cols-1 md:grid-cols-2 gap-4 mb-6\">\n          <Card>\n            <CardContent className=\"pt-6\">\n              <div className=\"flex items-center space-x-4\">\n                {agent && <AgentAvatar agent={agent} />}\n                <div>\n                  <h3 className=\"text-lg font-normal\">{agent?.name}</h3>\n                  <p className=\"text-sm text-text-500\">{agent?.description}</p>\n                </div>\n              </div>\n            </CardContent>\n          </Card>\n          <Card>\n            <CardContent className=\"pt-6\">\n              <div className=\"grid grid-cols-2 gap-4\">\n                <div>\n                  <p className=\"text-sm font-medium text-text-500\">\n                    Total Messages\n                  </p>\n                  <p className=\"text-2xl font-normal\">{totalMessages}</p>\n                </div>\n                <div>\n                  <p className=\"text-sm font-medium text-text-500\">\n                    Total Unique Users\n                  </p>\n                  <p className=\"text-2xl font-normal\">{totalUniqueUsers}</p>\n                </div>\n              </div>\n            </CardContent>\n          </Card>\n        </div>\n        {content}\n      </CardContent>\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/agents/stats/[id]/page.tsx",
    "content": "import { InstantSSRAutoRefresh } from \"@/components/SSRAutoRefresh\";\nimport { unstable_noStore as noStore } from \"next/cache\";\nimport { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { requireAuth } from \"@/lib/auth/requireAuth\";\nimport { AgentStats } from \"./AgentStats\";\nimport BackButton from \"@/refresh-components/buttons/BackButton\";\n\nexport default async function GalleryPage(props: {\n  params: Promise<{ id: string }>;\n}) {\n  const params = await props.params;\n  noStore();\n\n  // Only check authentication - data fetching is done client-side via SWR hooks\n  const authResult = await requireAuth();\n\n  if (authResult.redirect) {\n    redirect(authResult.redirect as Route);\n  }\n\n  return (\n    <>\n      <div className=\"absolute top-4 left-4\">\n        <BackButton />\n      </div>\n\n      <div className=\"w-full py-8\">\n        <div className=\"px-32\">\n          <InstantSSRAutoRefresh />\n          <div className=\"max-w-4xl mx-auto !border-none !bg-transparent !ring-none\">\n            <AgentStats agentId={parseInt(params.id)} />\n          </div>\n        </div>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/ee/layout.tsx",
    "content": "import { SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED } from \"@/lib/constants\";\nimport { fetchStandardSettingsSS } from \"@/components/settings/lib\";\nimport EEFeatureRedirect from \"@/app/ee/EEFeatureRedirect\";\n\nexport default async function AdminLayout({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  // First check build-time constant (fast path)\n  if (!SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {\n    return <EEFeatureRedirect />;\n  }\n\n  // Then check runtime license status (for license enforcement mode)\n  // This allows gating EE features when user doesn't have a valid license\n  try {\n    const settingsResponse = await fetchStandardSettingsSS();\n    if (settingsResponse?.ok) {\n      const settings = await settingsResponse.json();\n      if (settings.ee_features_enabled === false) {\n        // When the app is in GATED_ACCESS (expired or missing license), defer\n        // to the root layout's GatedContentWrapper which handles path-based\n        // exemptions (e.g. allowing /admin/billing for license management).\n        if (settings.application_status === \"gated_access\") {\n          return children;\n        }\n\n        return <EEFeatureRedirect />;\n      }\n    }\n  } catch (error) {\n    // If settings fetch fails, allow access (fail open for better UX)\n    console.error(\"Failed to fetch settings for EE check:\", error);\n  }\n\n  return children;\n}\n"
  },
  {
    "path": "web/src/app/federated/oauth/callback/page.tsx",
    "content": "\"use client\";\n\nimport OAuthCallbackPage from \"@/components/oauth/OAuthCallbackPage\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\n\nexport default function FederatedOAuthCallbackPage() {\n  const federatedConfig = {\n    processingMessage: \"Processing...\",\n    processingDetails: \"Please wait while we complete the setup.\",\n    successMessage: \"Success!\",\n    successDetailsTemplate:\n      \"Your {serviceName} authorization completed successfully. You can now use this connector for search.\",\n    errorMessage: \"Something Went Wrong\",\n    backButtonText: \"Back to Chat\",\n    redirectingMessage: \"Redirecting to chat in 2 seconds...\",\n    autoRedirectDelay: 2000,\n    defaultRedirectPath: \"/app\",\n    callbackApiUrl: \"/api/federated/callback\",\n    errorMessageMap: {\n      \"validation errors\":\n        \"Configuration error - please check your connector settings\",\n      client_secret: \"Authentication credentials are missing or invalid\",\n      oauth: \"OAuth authorization failed\",\n    },\n  };\n\n  return <OAuthCallbackPage config={federatedConfig} />;\n}\n"
  },
  {
    "path": "web/src/app/global-error.tsx",
    "content": "\"use client\";\n\nimport * as Sentry from \"@sentry/nextjs\";\nimport NextError from \"next/error\";\nimport { useEffect } from \"react\";\n\n// This global error page is necessary to capture errors that occur in the app.\nexport default function GlobalError({\n  error,\n}: {\n  error: Error & { digest?: string };\n}) {\n  useEffect(() => {\n    if (process.env.NEXT_PUBLIC_SENTRY_DSN) {\n      Sentry.captureException(error);\n    }\n  }, [error]);\n\n  return (\n    <html>\n      <body>\n        {/* NextError require  a `statusCode` prop. However, since the App Router\n        does not expose status codes for errors, we simply pass 0 to render a\n        generic error message. */}\n        <NextError statusCode={0} />\n      </body>\n    </html>\n  );\n}\n"
  },
  {
    "path": "web/src/app/globals.css",
    "content": "@import \"css/attachment-button.css\";\n@import \"css/button.css\";\n@import \"css/card.css\";\n@import \"css/code.css\";\n@import \"css/color-swatch.css\";\n@import \"css/colors.css\";\n@import \"css/divider.css\";\n@import \"css/general-layouts.css\";\n@import \"css/inputs.css\";\n@import \"css/knowledge-table.css\";\n@import \"css/line-item.css\";\n@import \"css/sizes.css\";\n@import \"css/square-button.css\";\n@import \"css/switch.css\";\n@import \"css/z-index.css\";\n\n/* KH Teka Font */\n\n@font-face {\n  font-family: \"KH Teka\";\n  src: url(\"/fonts/KHTeka-Medium.otf\") format(\"opentype\");\n  font-weight: 500;\n  font-style: normal;\n  font-display: swap;\n}\n\n@tailwind base;\n@tailwind components;\n@tailwind utilities;\n\n@layer base {\n  /* BORDER RADII */\n  :root {\n    --border-radius-02: 0.125rem;\n    --border-radius-04: 0.25rem;\n    --border-radius-08: 0.5rem;\n    --border-radius-12: 0.75rem;\n    --border-radius-16: 1rem;\n    --border-radius-full: 64rem;\n  }\n\n  /* BACKDROP BLUR */\n  :root {\n    --backdrop-blur-01: 2px;\n    --backdrop-blur-02: 1px;\n    --backdrop-blur-03: 1px;\n  }\n\n  * {\n    @apply border-border;\n  }\n\n  body {\n    @apply bg-background text-foreground;\n    overscroll-behavior-y: none;\n    overflow-anchor: none;\n  }\n\n  form {\n    display: flex;\n    flex-direction: column;\n    align-items: start;\n  }\n\n  ol > li > p,\n  ul > li > p {\n    margin-top: 0;\n    margin-bottom: 0;\n    display: inline;\n    /* Make paragraphs inline to reduce vertical space */\n  }\n\n  /* Reduce spacing for markdown elements in chat */\n  .prose h1,\n  .prose h2,\n  .prose h3,\n  .prose h4,\n  .prose h5,\n  .prose h6 {\n    margin-top: 0.75em;\n    margin-bottom: 0.5em;\n  }\n\n  .prose ul,\n  .prose ol {\n    margin-top: 0.5em;\n    margin-bottom: 0.5em;\n  }\n\n  .prose ol {\n    list-style-type: decimal;\n    padding-left: 1.5rem;\n    margin-left: 0;\n  }\n\n  .prose ul {\n    list-style-type: disc;\n    padding-left: 1.5rem;\n    margin-left: 0;\n  }\n\n  .prose li {\n    margin-top: 0.25em;\n    margin-bottom: 0.25em;\n    display: list-item;\n  }\n\n  .prose hr {\n    margin-top: 1.25em;\n    margin-bottom: 1em;\n  }\n\n  .prose p {\n    margin-top: 0.5em;\n    margin-bottom: 0.5em;\n  }\n\n  /* Remove top margin from first child to align with icon */\n  .prose > :first-child {\n    margin-top: 0;\n  }\n\n  /* Remove bottom margin from last child to avoid extra space */\n  .prose > :last-child {\n    margin-bottom: 0;\n  }\n}\n\n@layer utilities {\n  .no-scrollbar {\n    scrollbar-width: none;\n  }\n\n  /* SHADOWS */\n  .shadow-00 {\n    box-shadow: 0px 0px 2px 1px var(--shadow-01);\n  }\n  .shadow-01 {\n    box-shadow:\n      0px 2px 12px 0px var(--shadow-02),\n      0px 0px 4px 1px var(--shadow-02);\n  }\n  .shadow-02 {\n    box-shadow:\n      0px 2px 24px 0px var(--shadow-03),\n      0px 0px 12px 1px var(--shadow-03);\n  }\n\n  /* RADIAL GRADIENTS */\n  .radial-00 {\n    background: radial-gradient(\n      236.31% 141.42% at 0% 0%,\n      var(--background-tint-00) 0%,\n      var(--background-tint-01) 100%\n    );\n  }\n\n  /* DEBUGGING UTILITIES\n  If you ever want to highlight a component for debugging purposes, just type in `className=\"dbg-red ...\"`, and a red box should appear around it.\n  This helps with placing things properly on the screen and seeing how they look during active development.\n  */\n\n  .dbg-red {\n    border: 1px solid red;\n  }\n  .dbg-blue {\n    border: 1px solid blue;\n  }\n  .dbg-green {\n    border: 1px solid green;\n  }\n}\n\n/* TYPOGRAPHY SYSTEM - Imported from Figma Design System */\n\n/* Font Imports */\n\n/* Font Family Variables */\n:root {\n  --font-hanken-grotesk: \"Hanken Grotesk\", -apple-system, BlinkMacSystemFont,\n    \"Segoe UI\", Roboto, sans-serif;\n  --font-dm-mono: \"DM Mono\", \"SF Mono\", Monaco, \"Cascadia Code\", \"Roboto Mono\",\n    Consolas, \"Courier New\", monospace;\n  --font-kh-teka: \"KH Teka\", -apple-system, BlinkMacSystemFont, \"Segoe UI\",\n    Roboto, sans-serif;\n}\n\n/* HEADING STYLES */\n\n.font-heading-h1 {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 48px;\n  font-weight: 600;\n  line-height: 64px;\n  letter-spacing: -0.48px;\n}\n\n.font-heading-h2 {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 24px;\n  font-weight: 600;\n  line-height: 36px;\n  letter-spacing: -0.24px;\n}\n\n.font-heading-h3 {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 18px;\n  font-weight: 600;\n  line-height: 28px;\n  letter-spacing: -0.18px;\n}\n\n.font-heading-h3-muted {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 18px;\n  font-weight: 500;\n  line-height: 28px;\n  letter-spacing: -0.18px;\n}\n\n/* MAIN CONTENT STYLES */\n\n.font-main-content-body {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 16px;\n  font-weight: 450;\n  line-height: 24px;\n  letter-spacing: 0px;\n}\n\n.font-main-content-body strong {\n  font-weight: 700;\n}\n\n.font-main-content-emphasis {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 16px;\n  font-weight: 700;\n  line-height: 24px;\n  letter-spacing: 0px;\n}\n\n.font-main-content-muted {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 16px;\n  font-weight: 400;\n  line-height: 24px;\n  letter-spacing: 0px;\n}\n\n.font-main-content-mono {\n  font-family: var(--font-dm-mono);\n  font-size: 16px;\n  font-weight: 400;\n  line-height: 23px;\n  letter-spacing: 0px;\n}\n\n/* MAIN UI STYLES */\n\n.font-main-ui-body {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 14px;\n  font-weight: 500;\n  line-height: 20px;\n  letter-spacing: 0px;\n}\n\n.font-main-ui-muted {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 14px;\n  font-weight: 400;\n  line-height: 20px;\n  letter-spacing: 0px;\n}\n\n.font-main-ui-action {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 14px;\n  font-weight: 600;\n  line-height: 20px;\n  letter-spacing: 0px;\n}\n\n.font-main-ui-mono {\n  font-family: var(--font-dm-mono);\n  font-size: 14px;\n  font-weight: 400;\n  line-height: 20px;\n  letter-spacing: 0px;\n}\n\n/* SECONDARY STYLES */\n\n.font-secondary-body {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 12px;\n  font-weight: 400;\n  line-height: 16px;\n  letter-spacing: 0px;\n}\n\n.font-secondary-action {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 12px;\n  font-weight: 600;\n  line-height: 16px;\n  letter-spacing: 0px;\n}\n\n.font-secondary-mono {\n  font-family: var(--font-dm-mono);\n  font-size: 12px;\n  font-weight: 400;\n  line-height: 16px;\n  letter-spacing: 0px;\n}\n\n.font-secondary-mono-label {\n  font-family: var(--font-dm-mono);\n  font-size: 12px;\n  font-weight: 500;\n  line-height: 16px;\n  letter-spacing: 0px;\n}\n\n/* FIGURE STYLES */\n\n.font-figure-small-label {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 10px;\n  font-weight: 700;\n  line-height: 12px;\n  letter-spacing: 0px;\n}\n\n.font-figure-small-value {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 10px;\n  font-weight: 400;\n  line-height: 12px;\n  letter-spacing: 0px;\n}\n\n.font-figure-keystroke {\n  font-family: var(--font-hanken-grotesk);\n  font-size: 12px;\n  font-weight: 400;\n  line-height: 16px;\n  letter-spacing: -0.6px;\n}\n\n/* SCROLL BAR */\n\n.default-scrollbar {\n  scrollbar-width: thin;\n  scrollbar-color: #888 transparent;\n  overflow-y: scroll;\n  overflow-x: hidden;\n}\n\n.scrollbar {\n  width: 100%;\n  height: 100%;\n}\n\n.inputscroll {\n  scrollbar-width: none;\n}\n\n/* Ensure native scrollbars are visible */\n@layer base {\n  * {\n    scrollbar-width: auto;\n  }\n}\n\n/* TEXTAREA */\n\ntextarea {\n  resize: vertical;\n  scrollbar-width: thin;\n  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);\n}\n\n.nextjs-portal {\n  display: none !important;\n  visibility: hidden !important;\n  opacity: 0 !important;\n  width: 0 !important;\n  height: 0 !important;\n  overflow: hidden !important;\n  position: absolute !important;\n  pointer-events: none !important;\n  clip: rect(0, 0, 0, 0) !important;\n}\n\n.nextjs-portal * {\n  display: none !important;\n}\n\n/* Used to create alternatie to React Markdown */\n.preserve-lines {\n  white-space: pre-wrap;\n  /* Preserves whitespace and wraps text */\n}\n\n.loading-text {\n  color: #e5e5e5;\n\n  background: linear-gradient(\n    -90deg,\n    #a3a3a3 0%,\n    #000000 5%,\n    #a3a3a3 10%,\n    #a3a3a3 100%\n  );\n  background-size: 200% 100%;\n  background-clip: text;\n  -webkit-background-clip: text;\n  -webkit-text-fill-color: transparent;\n  animation: shimmerTransition 1.8s ease-out infinite;\n}\n\n.dark .loading-text {\n  color: #1a1a1a;\n\n  background: linear-gradient(\n    -90deg,\n    #5c5c5c 0%,\n    #ffffff 5%,\n    #5c5c5c 10%,\n    #5c5c5c 100%\n  );\n  background-size: 200% 100%;\n  background-clip: text;\n  -webkit-background-clip: text;\n  -webkit-text-fill-color: transparent;\n}\n\n@keyframes shimmerTransition {\n  0% {\n    background-position: 100% 0;\n  }\n\n  100% {\n    background-position: -100% 0;\n  }\n}\n\n.collapsible {\n  max-height: 300px;\n  transition:\n    max-height 0.5s ease-in-out,\n    opacity 0.5s ease-in-out;\n  opacity: 1;\n}\n\n.collapsible-closed {\n  max-height: 0;\n  opacity: 0;\n  overflow: hidden;\n}\n\n.prevent-scroll {\n  overscroll-behavior-y: none;\n}\n\n/* CUSTOM ANIMATIONS */\n\n@keyframes fadeIn {\n  from {\n    opacity: 0;\n    transform: scale(0.95);\n  }\n\n  to {\n    opacity: 1;\n    transform: scale(1);\n  }\n}\n\n.animate-fadeIn {\n  animation: fadeIn 0.2s ease-out forwards;\n}\n\n/* Recording waveform animation */\n@keyframes waveform {\n  0%,\n  100% {\n    transform: scaleY(0.3);\n  }\n  50% {\n    transform: scaleY(1);\n  }\n}\n\n.animate-waveform {\n  animation: waveform 0.8s ease-in-out infinite;\n}\n\n.container {\n  margin-bottom: 1rem;\n}\n"
  },
  {
    "path": "web/src/app/layout.tsx",
    "content": "import \"./globals.css\";\n\nimport { GTM_ENABLED, MODAL_ROOT_ID } from \"@/lib/constants\";\nimport { Metadata } from \"next\";\n\nimport AppProvider from \"@/providers/AppProvider\";\nimport DynamicMetadata from \"@/providers/DynamicMetadata\";\nimport { PHProvider } from \"./providers\";\nimport { Suspense } from \"react\";\nimport PostHogPageView from \"./PostHogPageView\";\nimport Script from \"next/script\";\nimport { DM_Mono, Hanken_Grotesk } from \"next/font/google\";\nimport { WebVitals } from \"./web-vitals\";\nimport { ThemeProvider } from \"next-themes\";\nimport { TooltipProvider } from \"@/components/ui/tooltip\";\nimport StatsOverlayLoader from \"@/components/dev/StatsOverlayLoader\";\nimport { cn } from \"@/lib/utils\";\nimport AppHealthBanner from \"@/sections/AppHealthBanner\";\nimport CustomAnalyticsScript from \"@/providers/CustomAnalyticsScript\";\nimport ProductGatingWrapper from \"@/providers/ProductGatingWrapper\";\nimport SWRConfigProvider from \"@/providers/SWRConfigProvider\";\n\nconst hankenGrotesk = Hanken_Grotesk({\n  subsets: [\"latin\"],\n  variable: \"--font-hanken-grotesk\",\n  display: \"swap\",\n  fallback: [\n    \"-apple-system\",\n    \"BlinkMacSystemFont\",\n    \"Segoe UI\",\n    \"Roboto\",\n    \"sans-serif\",\n  ],\n});\n\nconst dmMono = DM_Mono({\n  weight: \"400\",\n  subsets: [\"latin\"],\n  variable: \"--font-dm-mono\",\n  display: \"swap\",\n  fallback: [\n    \"SF Mono\",\n    \"Monaco\",\n    \"Cascadia Code\",\n    \"Roboto Mono\",\n    \"Consolas\",\n    \"Courier New\",\n    \"monospace\",\n  ],\n});\n\nexport const metadata: Metadata = {\n  title: \"Onyx\",\n  description: \"Question answering for your documents\",\n};\n\n// force-dynamic prevents Next.js from statically prerendering pages at build\n// time — many child routes use cookies() which requires dynamic rendering.\n// This is safe because the layout itself has no server-side data fetching;\n// all data is fetched client-side via SWR in the provider tree.\nexport const dynamic = \"force-dynamic\";\n\nexport default function RootLayout({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  return (\n    <html\n      lang=\"en\"\n      className={cn(hankenGrotesk.variable, dmMono.variable)}\n      suppressHydrationWarning\n    >\n      <head>\n        <meta\n          name=\"viewport\"\n          content=\"width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0, interactive-widget=resizes-content\"\n        />\n\n        {GTM_ENABLED && (\n          <Script\n            id=\"google-tag-manager\"\n            strategy=\"afterInteractive\"\n            dangerouslySetInnerHTML={{\n              __html: `\n               (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':\n               new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],\n               j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=\n               'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);\n               })(window,document,'script','dataLayer','GTM-PZXS36NG');\n             `,\n            }}\n          />\n        )}\n      </head>\n\n      <body className={`relative font-hanken`}>\n        <ThemeProvider\n          attribute=\"class\"\n          defaultTheme=\"system\"\n          enableSystem\n          disableTransitionOnChange\n        >\n          <div className=\"text-text min-h-screen bg-background\">\n            <TooltipProvider>\n              <PHProvider>\n                <SWRConfigProvider>\n                  <AppHealthBanner />\n                  <AppProvider>\n                    <DynamicMetadata />\n                    <CustomAnalyticsScript />\n                    <Suspense fallback={null}>\n                      <PostHogPageView />\n                    </Suspense>\n                    <div id={MODAL_ROOT_ID} className=\"h-screen w-screen\">\n                      <ProductGatingWrapper>{children}</ProductGatingWrapper>\n                    </div>\n                    {process.env.NEXT_PUBLIC_POSTHOG_KEY && <WebVitals />}\n                    {process.env.NEXT_PUBLIC_ENABLE_STATS === \"true\" && (\n                      <StatsOverlayLoader />\n                    )}\n                  </AppProvider>\n                </SWRConfigProvider>\n              </PHProvider>\n            </TooltipProvider>\n          </div>\n        </ThemeProvider>\n      </body>\n    </html>\n  );\n}\n"
  },
  {
    "path": "web/src/app/mcp/[[...path]]/route.ts",
    "content": "import { MCP_INTERNAL_URL } from \"@/lib/constants\";\nimport { NextRequest, NextResponse } from \"next/server\";\n\ntype RouteContext = {\n  params?: Promise<{\n    path?: string[];\n  }>;\n};\n\nconst proxyHandler = async (\n  request: NextRequest,\n  context: RouteContext\n): Promise<Response> => {\n  if (!isProxyEnabled()) {\n    return NextResponse.json(\n      {\n        message:\n          \"This MCP proxy is only available in development mode. In production, something else (e.g. nginx) should handle this.\",\n      },\n      { status: 404 }\n    );\n  }\n\n  try {\n    const resolvedParams = context.params ? await context.params : undefined;\n    const targetUrl = buildTargetUrl(\n      resolvedParams?.path,\n      request.nextUrl.searchParams\n    );\n    const headers = buildForwardHeaders(request.headers);\n    const fetchOptions: RequestInit & { duplex?: \"half\" } = {\n      method: request.method,\n      headers,\n      signal: request.signal,\n    };\n\n    if (supportsRequestBody(request) && request.body) {\n      fetchOptions.body = request.body;\n      fetchOptions.duplex = \"half\";\n    }\n\n    const response = await fetch(targetUrl, fetchOptions);\n    return response;\n  } catch (error: unknown) {\n    console.error(\"MCP Proxy error:\", error);\n    return NextResponse.json(\n      {\n        message: \"MCP Proxy error\",\n        error:\n          error instanceof Error ? error.message : \"An unknown error occurred\",\n      },\n      { status: 500 }\n    );\n  }\n};\n\nconst isProxyEnabled = (): boolean => {\n  if (process.env.OVERRIDE_API_PRODUCTION === \"true\") {\n    return true;\n  }\n  return process.env.NODE_ENV === \"development\";\n};\n\nconst buildForwardHeaders = (requestHeaders: Headers): Headers => {\n  const headers = new Headers(requestHeaders);\n  headers.delete(\"host\");\n  headers.delete(\"connection\");\n  headers.delete(\"content-length\");\n  return headers;\n};\n\nconst supportsRequestBody = (request: NextRequest): boolean => {\n  const method = request.method.toUpperCase();\n  return method !== \"GET\" && method !== \"HEAD\";\n};\n\nconst trimSlashes = (value: string): string => value.replace(/^\\/+|\\/+$/g, \"\");\n\nconst sanitizePathSegments = (segments: string[] | undefined): string[] =>\n  segments?.filter(Boolean).map((segment) => encodeURIComponent(segment)) ?? [];\n\nconst buildTargetUrl = (\n  pathSegments: string[] | undefined,\n  searchParams: URLSearchParams\n): string => {\n  const target = new URL(MCP_INTERNAL_URL);\n  const forwardedPath = sanitizePathSegments(pathSegments).join(\"/\");\n\n  const basePath = trimSlashes(target.pathname);\n  const combinedPath = [basePath, trimSlashes(forwardedPath)]\n    .filter(Boolean)\n    .join(\"/\");\n\n  target.pathname = combinedPath ? `/${combinedPath}` : \"/\";\n  const queryString = searchParams.toString();\n  target.search = queryString;\n\n  return target.toString();\n};\n\ntype Handler = (\n  request: NextRequest,\n  context: RouteContext\n) => Promise<Response>;\n\nconst handler: Handler = proxyHandler;\n\nexport const GET = handler;\nexport const POST = handler;\nexport const PUT = handler;\nexport const PATCH = handler;\nexport const DELETE = handler;\nexport const HEAD = handler;\nexport const OPTIONS = handler;\n"
  },
  {
    "path": "web/src/app/mcp/oauth/callback/page.tsx",
    "content": "\"use client\";\n\nimport OAuthCallbackPage from \"@/components/oauth/OAuthCallbackPage\";\n\nexport default function MCPOAuthCallbackPage() {\n  const mcpConfig = {\n    processingMessage: \"Processing...\",\n    processingDetails: \"Please wait while we complete the MCP server setup.\",\n    successMessage: \"Success!\",\n    successDetailsTemplate:\n      \"Your {serviceName} authorization completed successfully. You can now use this server's tools in chat.\",\n    errorMessage: \"Something Went Wrong\",\n    backButtonText: \"Back to Chat\",\n    redirectingMessage: \"Redirecting back in 2 seconds...\",\n    autoRedirectDelay: 2000,\n    defaultRedirectPath: \"/app\",\n    callbackApiUrl: \"/api/mcp/oauth/callback\",\n    errorMessageMap: {\n      \"server not found\": \"MCP server configuration not found\",\n      credentials: \"Authentication credentials are invalid\",\n      oauth: \"OAuth authorization failed\",\n      validation: \"Could not validate connection to MCP server\",\n    },\n  };\n\n  return <OAuthCallbackPage config={mcpConfig} />;\n}\n"
  },
  {
    "path": "web/src/app/not-found.tsx",
    "content": "import { redirect } from \"next/navigation\";\n\nexport default function NotFound() {\n  redirect(\"/auth/login\");\n}\n"
  },
  {
    "path": "web/src/app/nrf/(main)/layout.tsx",
    "content": "import { unstable_noStore as noStore } from \"next/cache\";\nimport AppSidebar from \"@/sections/sidebar/AppSidebar\";\nimport { getCurrentUserSS } from \"@/lib/userSS\";\n\nexport interface LayoutProps {\n  children: React.ReactNode;\n}\n\n/**\n * NRF Main (New Tab) Layout\n *\n * Shows the app sidebar when the user is authenticated.\n * This layout is NOT used by the side-panel route.\n */\nexport default async function Layout({ children }: LayoutProps) {\n  noStore();\n\n  const user = await getCurrentUserSS();\n\n  return (\n    <div className=\"flex flex-row w-full h-full\">\n      {user && <AppSidebar />}\n      {children}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/(main)/page.tsx",
    "content": "import { unstable_noStore as noStore } from \"next/cache\";\nimport { InstantSSRAutoRefresh } from \"@/components/SSRAutoRefresh\";\nimport NRFPage from \"@/app/nrf/NRFPage\";\nimport { NRFPreferencesProvider } from \"@/components/context/NRFPreferencesContext\";\nimport NRFChrome from \"../NRFChrome\";\n\n/**\n * NRF (New Tab Page) Route - No Auth Required\n *\n * This route is placed outside /app/app/ to bypass the authentication\n * requirement in /app/app/layout.tsx. The NRFPage component handles\n * unauthenticated users gracefully by showing a login modal instead of\n * redirecting, which is better UX for the Chrome extension.\n *\n * Instead of AppLayouts.Root (which pulls in heavy Header state management),\n * we use NRFChrome — a lightweight overlay that renders only the search/chat\n * mode toggle and footer, floating transparently over NRFPage's background.\n */\nexport default async function Page() {\n  noStore();\n\n  return (\n    <div className=\"relative w-full h-full\">\n      <InstantSSRAutoRefresh />\n      <NRFPreferencesProvider>\n        <NRFPage />\n      </NRFPreferencesProvider>\n      <NRFChrome />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/NRFChrome.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { cn, ensureHrefProtocol, noProp } from \"@/lib/utils\";\nimport type { Components } from \"react-markdown\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Popover from \"@/refresh-components/Popover\";\nimport { OpenButton } from \"@opal/components\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { Button } from \"@opal/components\";\nimport { SvgBubbleText, SvgSearchMenu, SvgSidebar } from \"@opal/icons\";\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport type { AppMode } from \"@/providers/QueryControllerProvider\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useAppSidebarContext } from \"@/providers/AppSidebarProvider\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\n\nconst footerMarkdownComponents = {\n  p: ({ children }: { children?: React.ReactNode }) => (\n    <Text as=\"p\" text03 secondaryAction className=\"!my-0 text-center\">\n      {children}\n    </Text>\n  ),\n  a: ({\n    href,\n    className,\n    children,\n    ...rest\n  }: React.AnchorHTMLAttributes<HTMLAnchorElement>) => {\n    const fullHref = ensureHrefProtocol(href);\n    return (\n      <a\n        href={fullHref}\n        target=\"_blank\"\n        rel=\"noopener noreferrer\"\n        {...rest}\n        className={cn(className, \"underline underline-offset-2\")}\n      >\n        <Text text03 secondaryAction>\n          {children}\n        </Text>\n      </a>\n    );\n  },\n} satisfies Partial<Components>;\n\n/**\n * Lightweight chrome overlay for the NRF page.\n *\n * Renders only the search/chat mode toggle (top-left) and footer (bottom),\n * absolutely positioned so they float transparently over NRFPage's own\n * background. This avoids pulling in the full AppLayouts.Root Header which\n * carries heavy state management (share/delete/move modals) that the\n * extension doesn't need.\n */\nexport default function NRFChrome() {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const { state, setAppMode } = useQueryController();\n  const settings = useSettingsContext();\n  const { isMobile } = useScreenSize();\n  const { setFolded } = useAppSidebarContext();\n  const appFocus = useAppFocus();\n  const [modePopoverOpen, setModePopoverOpen] = useState(false);\n\n  const effectiveMode: AppMode =\n    appFocus.isNewSession() && state.phase === \"idle\" ? state.appMode : \"chat\";\n\n  const customFooterContent =\n    settings?.enterpriseSettings?.custom_lower_disclaimer_content ||\n    `[Onyx ${\n      settings?.webVersion || \"dev\"\n    }](https://www.onyx.app/) - Open Source AI Platform`;\n\n  const showModeToggle =\n    isPaidEnterpriseFeaturesEnabled &&\n    settings.isSearchModeAvailable &&\n    appFocus.isNewSession() &&\n    state.phase === \"idle\";\n\n  const showHeader = isMobile || showModeToggle;\n\n  return (\n    <>\n      {/* Header chrome — top-left, mirrors position of settings button at top-right */}\n      {showHeader && (\n        <div className=\"absolute top-0 left-0 p-4 z-10 flex flex-row items-center gap-2\">\n          {isMobile && (\n            <Button\n              prominence=\"internal\"\n              icon={SvgSidebar}\n              onClick={() => setFolded(false)}\n            />\n          )}\n          {showModeToggle && (\n            <Popover open={modePopoverOpen} onOpenChange={setModePopoverOpen}>\n              <Popover.Trigger asChild>\n                <OpenButton\n                  icon={\n                    effectiveMode === \"search\" ? SvgSearchMenu : SvgBubbleText\n                  }\n                >\n                  {effectiveMode === \"search\" ? \"Search\" : \"Chat\"}\n                </OpenButton>\n              </Popover.Trigger>\n              <Popover.Content align=\"start\" width=\"lg\">\n                <Popover.Menu>\n                  <LineItem\n                    icon={SvgSearchMenu}\n                    selected={effectiveMode === \"search\"}\n                    description=\"Quick search for documents\"\n                    onClick={noProp(() => {\n                      setAppMode(\"search\");\n                      setModePopoverOpen(false);\n                    })}\n                  >\n                    Search\n                  </LineItem>\n                  <LineItem\n                    icon={SvgBubbleText}\n                    selected={effectiveMode === \"chat\"}\n                    description=\"Conversation and research\"\n                    onClick={noProp(() => {\n                      setAppMode(\"chat\");\n                      setModePopoverOpen(false);\n                    })}\n                  >\n                    Chat\n                  </LineItem>\n                </Popover.Menu>\n              </Popover.Content>\n            </Popover>\n          )}\n        </div>\n      )}\n\n      {/* Footer — bottom-center, transparent background */}\n      <footer className=\"absolute bottom-0 left-0 w-full z-10 flex flex-row justify-center items-center gap-2 px-2 pb-2 pointer-events-auto\">\n        <MinimalMarkdown\n          content={customFooterContent}\n          className=\"max-w-full text-center\"\n          components={footerMarkdownComponents}\n        />\n      </footer>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/NRFPage.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef, useCallback, useMemo } from \"react\";\nimport { useSearchParams } from \"next/navigation\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { toast } from \"@/hooks/useToast\";\nimport { AuthType } from \"@/lib/constants\";\nimport AppInputBar, { AppInputBarHandle } from \"@/sections/input/AppInputBar\";\nimport { Button } from \"@opal/components\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { useFilters, useLlmManager } from \"@/lib/hooks\";\nimport Dropzone from \"react-dropzone\";\nimport { useSendMessageToParent, getPanelOrigin } from \"@/lib/extension/utils\";\nimport { useNRFPreferences } from \"@/components/context/NRFPreferencesContext\";\nimport SidePanelHeader from \"@/app/nrf/side-panel/SidePanelHeader\";\nimport { CHROME_MESSAGE } from \"@/lib/extension/constants\";\nimport { SettingsPanel } from \"@/app/components/nrf/SettingsPanel\";\nimport LoginPage from \"@/app/auth/login/LoginPage\";\nimport { sendSetDefaultNewTabMessage } from \"@/lib/extension/utils\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport useDeepResearchToggle from \"@/hooks/useDeepResearchToggle\";\nimport useChatController from \"@/hooks/useChatController\";\nimport useChatSessionController from \"@/hooks/useChatSessionController\";\nimport useAgentController from \"@/hooks/useAgentController\";\nimport {\n  useCurrentChatState,\n  useCurrentMessageHistory,\n  useChatSessionStore,\n  useDocumentSidebarVisible,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport ChatUI from \"@/sections/chat/ChatUI\";\nimport ChatScrollContainer from \"@/sections/chat/ChatScrollContainer\";\nimport WelcomeMessage from \"@/app/app/components/WelcomeMessage\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { cn } from \"@/lib/utils\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { DEFAULT_CONTEXT_TOKENS } from \"@/lib/constants\";\nimport { SvgUser, SvgMenu, SvgAlertTriangle } from \"@opal/icons\";\nimport { useAppBackground } from \"@/providers/AppBackgroundProvider\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport DocumentsSidebar from \"@/sections/document-sidebar/DocumentsSidebar\";\nimport PreviewModal from \"@/sections/modals/PreviewModal\";\nimport { personaIncludesRetrieval } from \"@/app/app/services/lib\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport { eeGated } from \"@/ce\";\nimport EESearchUI from \"@/ee/sections/SearchUI\";\n\nconst SearchUI = eeGated(EESearchUI);\n\ninterface NRFPageProps {\n  isSidePanel?: boolean;\n}\n\n// Reserve half of the context window for the model's response output\nconst AVAILABLE_CONTEXT_TOKENS = Number(DEFAULT_CONTEXT_TOKENS) * 0.5;\n\nexport default function NRFPage({ isSidePanel = false }: NRFPageProps) {\n  const { setUseOnyxAsNewTab } = useNRFPreferences();\n\n  const searchParams = useSearchParams();\n  const filterManager = useFilters();\n  const { user, authTypeMetadata } = useUser();\n\n  // Chat sessions\n  const { refreshChatSessions } = useChatSessions();\n  const existingChatSessionId = null; // NRF always starts new chats\n\n  // Get agents for agent selection\n  const { agents: availableAgents } = useAgents();\n\n  // Projects context for file handling\n  const {\n    currentMessageFiles,\n    setCurrentMessageFiles,\n    lastFailedFiles,\n    clearLastFailedFiles,\n  } = useProjectsContext();\n\n  // Show toast if any files failed\n  useEffect(() => {\n    if (lastFailedFiles && lastFailedFiles.length > 0) {\n      const names = lastFailedFiles.map((f) => f.name).join(\", \");\n      toast.error(\n        lastFailedFiles.length === 1\n          ? `File failed and was removed: ${names}`\n          : `Files failed and were removed: ${names}`\n      );\n      clearLastFailedFiles();\n    }\n  }, [lastFailedFiles, clearLastFailedFiles]);\n\n  // Assistant controller\n  const { selectedAgent, setSelectedAgentFromId, liveAgent } =\n    useAgentController({\n      selectedChatSession: undefined,\n      onAgentSelect: () => {},\n    });\n\n  // LLM manager for model selection.\n  // - currentChatSession: undefined because NRF always starts new chats\n  // - liveAgent: uses the selected assistant, or undefined to fall back\n  //   to system-wide default LLM provider.\n  //\n  // If no LLM provider is configured (e.g., fresh signup), the input bar is\n  // disabled and a \"Set up an LLM\" button is shown (see bottom of component).\n  const llmManager = useLlmManager(undefined, liveAgent ?? undefined);\n\n  // Deep research toggle\n  const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({\n    chatSessionId: existingChatSessionId,\n    agentId: selectedAgent?.id,\n  });\n\n  // State\n  const [message, setMessage] = useState(\"\");\n  const [settingsOpen, setSettingsOpen] = useState<boolean>(false);\n  const [tabReadingEnabled, setTabReadingEnabled] = useState<boolean>(false);\n  const [currentTabUrl, setCurrentTabUrl] = useState<string | null>(null);\n  const [presentingDocument, setPresentingDocument] =\n    useState<MinimalOnyxDocument | null>(null);\n\n  // Document sidebar state (from store)\n  const documentSidebarVisible = useDocumentSidebarVisible();\n  const updateCurrentDocumentSidebarVisible = useChatSessionStore(\n    (state) => state.updateCurrentDocumentSidebarVisible\n  );\n  const setCurrentSession = useChatSessionStore(\n    (state) => state.setCurrentSession\n  );\n  const currentSessionId = useChatSessionStore(\n    (state) => state.currentSessionId\n  );\n\n  // Memoized callback for closing document sidebar\n  const handleDocumentSidebarClose = useCallback(() => {\n    updateCurrentDocumentSidebarVisible(false);\n  }, [updateCurrentDocumentSidebarVisible]);\n\n  // Initialize message from URL input parameter (for Chrome extension)\n  const initializedRef = useRef(false);\n  useEffect(() => {\n    if (initializedRef.current) return;\n    initializedRef.current = true;\n    const urlParams = new URLSearchParams(window.location.search);\n    const userPrompt = urlParams.get(\"user-prompt\");\n    if (userPrompt) {\n      setMessage(userPrompt);\n    }\n  }, []);\n\n  // Chat background from context\n  const { hasBackground, appBackgroundUrl } = useAppBackground();\n\n  // Modals\n  const [showTurnOffModal, setShowTurnOffModal] = useState<boolean>(false);\n\n  // Refs\n  const inputRef = useRef<HTMLDivElement>(null);\n  const chatInputBarRef = useRef<AppInputBarHandle | null>(null);\n  const submitOnLoadPerformed = useRef<boolean>(false);\n\n  // Access chat state from store\n  const currentChatState = useCurrentChatState();\n  const messageHistory = useCurrentMessageHistory();\n\n  // Determine if we should show centered welcome or messages\n  const hasMessages = messageHistory.length > 0;\n\n  // Resolved assistant to use throughout the component\n  const resolvedAgent = liveAgent ?? undefined;\n\n  // Auto-scroll preference from user settings (matches ChatPage pattern)\n  const autoScrollEnabled = user?.preferences?.auto_scroll !== false;\n  const isStreaming = currentChatState === \"streaming\";\n\n  // Query controller for search/chat classification (EE feature)\n  const { submit: submitQuery, state } = useQueryController();\n\n  // Determine if retrieval (search) is enabled based on the agent\n  const retrievalEnabled = useMemo(() => {\n    if (liveAgent) {\n      return personaIncludesRetrieval(liveAgent);\n    }\n    return false;\n  }, [liveAgent]);\n\n  // Check if we're in search mode\n  const isSearch =\n    state.phase === \"searching\" || state.phase === \"search-results\";\n\n  // Anchor for scroll positioning (matches ChatPage pattern)\n  const anchorMessage = messageHistory.at(-2) ?? messageHistory[0];\n  const anchorNodeId = anchorMessage?.nodeId;\n  const anchorSelector = anchorNodeId ? `#message-${anchorNodeId}` : undefined;\n\n  useSendMessageToParent();\n\n  // Listen for tab URL updates from the Chrome extension\n  useEffect(() => {\n    if (!isSidePanel) return;\n\n    function handleExtensionMessage(event: MessageEvent) {\n      // Only trust messages from the Chrome extension parent.\n      // Checking the origin (chrome-extension://) prevents a non-extension\n      // page that embeds NRFPage as an iframe from injecting arbitrary URLs\n      // into the prompt context via TAB_URL_UPDATED.\n      if (!event.origin.startsWith(\"chrome-extension://\")) return;\n      if (event.source !== window.parent) return;\n      if (event.data?.type === CHROME_MESSAGE.TAB_URL_UPDATED) {\n        setCurrentTabUrl(event.data.url as string);\n      }\n    }\n\n    window.addEventListener(\"message\", handleExtensionMessage);\n    return () => window.removeEventListener(\"message\", handleExtensionMessage);\n  }, [isSidePanel]);\n\n  const toggleSettings = () => {\n    setSettingsOpen((prev) => !prev);\n  };\n\n  // If user toggles the \"Use Onyx\" switch to off, prompt a modal\n  const handleUseOnyxToggle = (checked: boolean) => {\n    if (!checked) {\n      setShowTurnOffModal(true);\n    } else {\n      setUseOnyxAsNewTab(true);\n      sendSetDefaultNewTabMessage(true);\n    }\n  };\n\n  const confirmTurnOff = () => {\n    setUseOnyxAsNewTab(false);\n    setShowTurnOffModal(false);\n    sendSetDefaultNewTabMessage(false);\n  };\n\n  // Reset input bar after sending\n  const resetInputBar = useCallback(() => {\n    setMessage(\"\");\n    setCurrentMessageFiles([]);\n    chatInputBarRef.current?.reset();\n  }, [setMessage, setCurrentMessageFiles]);\n\n  // Chat controller for submitting messages\n  const { onSubmit, stopGenerating, handleMessageSpecificFileUpload } =\n    useChatController({\n      filterManager,\n      llmManager,\n      availableAgents: availableAgents || [],\n      liveAgent,\n      existingChatSessionId,\n      selectedDocuments: [],\n      searchParams: searchParams!,\n      resetInputBar,\n      setSelectedAgentFromId,\n    });\n\n  // Chat session controller for loading sessions\n  const { currentSessionFileTokenCount } = useChatSessionController({\n    existingChatSessionId,\n    searchParams: searchParams!,\n    filterManager,\n    firstMessage: undefined,\n    setSelectedAgentFromId,\n    setSelectedDocuments: () => {}, // No-op: NRF doesn't support document selection\n    setCurrentMessageFiles,\n    chatSessionIdRef: { current: null },\n    loadedIdSessionRef: { current: null },\n    chatInputBarRef,\n    isInitialLoad: { current: false },\n    submitOnLoadPerformed,\n    refreshChatSessions,\n    onSubmit,\n  });\n\n  // Handle file upload\n  const handleFileUpload = useCallback(\n    async (acceptedFiles: File[]) => {\n      handleMessageSpecificFileUpload(acceptedFiles);\n    },\n    [handleMessageSpecificFileUpload]\n  );\n\n  // Handle submit from AppInputBar - routes through query controller for search/chat classification\n  const handleChatInputSubmit = useCallback(\n    async (submittedMessage: string) => {\n      if (!submittedMessage.trim()) return;\n\n      const additionalContext =\n        tabReadingEnabled && currentTabUrl\n          ? `The user is currently viewing: ${currentTabUrl}. Use the open_url tool to read this page and use its content as additional context for your response.`\n          : undefined;\n\n      // If we already have messages (chat session started), always use chat mode\n      // (matches AppPage behavior where existing sessions bypass classification)\n      if (hasMessages) {\n        onSubmit({\n          message: submittedMessage,\n          currentMessageFiles: currentMessageFiles,\n          deepResearch: deepResearchEnabled,\n          additionalContext,\n        });\n        return;\n      }\n\n      // Build an onChat closure that captures additionalContext for this submission\n      const onChat = (chatMessage: string) => {\n        onSubmit({\n          message: chatMessage,\n          currentMessageFiles: currentMessageFiles,\n          deepResearch: deepResearchEnabled,\n          additionalContext,\n        });\n      };\n\n      // Use submitQuery which will classify the query and either:\n      // - Route to search (sets phase to \"searching\"/\"search-results\" and shows SearchUI)\n      // - Route to chat (calls onChat callback)\n      await submitQuery(submittedMessage, onChat);\n    },\n    [\n      hasMessages,\n      onSubmit,\n      currentMessageFiles,\n      deepResearchEnabled,\n      submitQuery,\n      tabReadingEnabled,\n      currentTabUrl,\n    ]\n  );\n\n  // Handle resubmit last message on error\n  const handleResubmitLastMessage = useCallback(() => {\n    const lastUserMsg = messageHistory\n      .slice()\n      .reverse()\n      .find((m) => m.type === \"user\");\n    if (!lastUserMsg) {\n      toast.error(\"No previously-submitted user message found.\");\n      return;\n    }\n\n    onSubmit({\n      message: lastUserMsg.message,\n      currentMessageFiles: currentMessageFiles,\n      deepResearch: deepResearchEnabled,\n      messageIdToResend: lastUserMsg.messageId,\n    });\n  }, [messageHistory, onSubmit, currentMessageFiles, deepResearchEnabled]);\n\n  // Start a new chat session in the side panel\n  const handleNewChat = useCallback(() => {\n    setCurrentSession(null);\n    setTabReadingEnabled(false);\n    setCurrentTabUrl(null);\n    resetInputBar();\n    // Notify the service worker so it stops sending tab URL updates\n    window.parent.postMessage(\n      { type: CHROME_MESSAGE.TAB_READING_DISABLED },\n      getPanelOrigin()\n    );\n  }, [setCurrentSession, resetInputBar]);\n\n  const handleToggleTabReading = useCallback(() => {\n    const next = !tabReadingEnabled;\n    setTabReadingEnabled(next);\n    if (!next) {\n      setCurrentTabUrl(null);\n    }\n    window.parent.postMessage(\n      {\n        type: next\n          ? CHROME_MESSAGE.TAB_READING_ENABLED\n          : CHROME_MESSAGE.TAB_READING_DISABLED,\n      },\n      getPanelOrigin()\n    );\n  }, [tabReadingEnabled]);\n\n  // Handle search result document click\n  const handleSearchDocumentClick = useCallback(\n    (doc: MinimalOnyxDocument) => setPresentingDocument(doc),\n    []\n  );\n\n  return (\n    <div\n      className={cn(\n        \"relative w-full h-full flex flex-col overflow-hidden\",\n        isSidePanel\n          ? \"bg-background\"\n          : hasBackground && \"bg-cover bg-center bg-fixed\"\n      )}\n      style={\n        !isSidePanel && hasBackground\n          ? { backgroundImage: `url(${appBackgroundUrl})` }\n          : undefined\n      }\n    >\n      {/* Semi-transparent overlay for readability when background is set */}\n      {!isSidePanel && hasBackground && (\n        <div className=\"absolute inset-0 bg-background/80 pointer-events-none\" />\n      )}\n\n      {/* Side panel header */}\n      {isSidePanel && (\n        <SidePanelHeader\n          onNewChat={handleNewChat}\n          chatSessionId={currentSessionId}\n        />\n      )}\n\n      {/* Settings button */}\n      {!isSidePanel && (\n        <div className=\"absolute top-0 right-0 p-4 z-10\">\n          <Button\n            prominence=\"secondary\"\n            icon={SvgMenu}\n            onClick={toggleSettings}\n            tooltip=\"Open settings\"\n          />\n        </div>\n      )}\n\n      <Dropzone onDrop={handleFileUpload} noClick>\n        {({ getRootProps }) => (\n          <div\n            {...getRootProps()}\n            className={cn(\n              \"flex-1 min-h-0 w-full flex flex-col items-center outline-none\",\n              isSidePanel && \"px-3\"\n            )}\n          >\n            {/* Chat area with messages */}\n            {hasMessages && resolvedAgent && (\n              <>\n                {/* Fake header - pushes content below absolute settings button (non-side-panel only) */}\n                {!isSidePanel && <Spacer rem={2} />}\n                <ChatScrollContainer\n                  sessionId=\"nrf-session\"\n                  anchorSelector={anchorSelector}\n                  autoScroll={autoScrollEnabled}\n                  isStreaming={isStreaming}\n                  hideScrollbar={isSidePanel}\n                >\n                  <ChatUI\n                    liveAgent={resolvedAgent}\n                    llmManager={llmManager}\n                    currentMessageFiles={currentMessageFiles}\n                    setPresentingDocument={setPresentingDocument}\n                    onSubmit={onSubmit}\n                    onMessageSelection={() => {}}\n                    stopGenerating={stopGenerating}\n                    onResubmit={handleResubmitLastMessage}\n                    deepResearchEnabled={deepResearchEnabled}\n                    anchorNodeId={anchorNodeId}\n                  />\n                </ChatScrollContainer>\n              </>\n            )}\n\n            {/* Welcome message - centered when no messages and not in search mode */}\n            {!hasMessages && !isSearch && (\n              <div className=\"relative w-full flex-1 flex flex-col items-center justify-end\">\n                <WelcomeMessage isDefaultAgent />\n                <Spacer rem={1.5} />\n              </div>\n            )}\n\n            {/* AppInputBar container - in normal flex flow like AppPage */}\n            <div\n              ref={inputRef}\n              className={cn(\n                \"w-full flex flex-col\",\n                !isSidePanel &&\n                  \"max-w-[var(--app-page-main-content-width)] px-4\"\n              )}\n            >\n              <AppInputBar\n                ref={chatInputBarRef}\n                deepResearchEnabled={deepResearchEnabled}\n                toggleDeepResearch={toggleDeepResearch}\n                filterManager={filterManager}\n                llmManager={llmManager}\n                initialMessage={message}\n                stopGenerating={stopGenerating}\n                onSubmit={handleChatInputSubmit}\n                chatState={currentChatState}\n                currentSessionFileTokenCount={currentSessionFileTokenCount}\n                availableContextTokens={AVAILABLE_CONTEXT_TOKENS}\n                selectedAgent={liveAgent ?? undefined}\n                handleFileUpload={handleFileUpload}\n                disabled={\n                  !llmManager.isLoadingProviders && !llmManager.hasAnyProvider\n                }\n                {...(isSidePanel && {\n                  tabReadingEnabled,\n                  currentTabUrl,\n                  onToggleTabReading: handleToggleTabReading,\n                })}\n              />\n              <Spacer rem={isSidePanel ? 1 : 0.5} />\n            </div>\n\n            {/* Search results - shown when query is classified as search */}\n            {isSearch && (\n              <div className=\"flex-1 w-full max-w-[var(--app-page-main-content-width)] px-4 min-h-0 overflow-auto\">\n                <Spacer rem={0.75} />\n                <SearchUI onDocumentClick={handleSearchDocumentClick} />\n              </div>\n            )}\n\n            {/* Spacer to push content up when showing welcome message */}\n            {!hasMessages && !isSearch && <div className=\"flex-1 w-full\" />}\n          </div>\n        )}\n      </Dropzone>\n\n      {/* Document sidebar - shown when sources are clicked */}\n      <div\n        className={cn(\n          \"absolute right-0 top-0 h-full z-20 overflow-hidden transition-all duration-300\",\n          documentSidebarVisible ? \"w-[25rem]\" : \"w-0\"\n        )}\n      >\n        <DocumentsSidebar\n          setPresentingDocument={setPresentingDocument}\n          modal={false}\n          closeSidebar={handleDocumentSidebarClose}\n          selectedDocuments={[]}\n        />\n      </div>\n\n      {/* Text/document preview modal */}\n      {presentingDocument && (\n        <PreviewModal\n          presentingDocument={presentingDocument}\n          onClose={() => setPresentingDocument(null)}\n        />\n      )}\n\n      {/* Modals - only show when not in side panel mode */}\n      {!isSidePanel && (\n        <>\n          <SettingsPanel\n            settingsOpen={settingsOpen}\n            toggleSettings={toggleSettings}\n            handleUseOnyxToggle={handleUseOnyxToggle}\n          />\n\n          <Modal open={showTurnOffModal} onOpenChange={setShowTurnOffModal}>\n            <Modal.Content width=\"sm\">\n              <Modal.Header\n                icon={SvgAlertTriangle}\n                title=\"Turn off Onyx new tab page?\"\n                description=\"You'll see your browser's default new tab page instead. You can turn it back on anytime in your Onyx settings.\"\n                onClose={() => setShowTurnOffModal(false)}\n              />\n              <Modal.Footer>\n                <Button\n                  prominence=\"secondary\"\n                  onClick={() => setShowTurnOffModal(false)}\n                >\n                  Cancel\n                </Button>\n                <Button variant=\"danger\" onClick={confirmTurnOff}>\n                  Turn off\n                </Button>\n              </Modal.Footer>\n            </Modal.Content>\n          </Modal>\n        </>\n      )}\n\n      {!user && (\n        <Modal open onOpenChange={() => {}}>\n          <Modal.Content width=\"sm\" height=\"sm\">\n            <Modal.Header icon={SvgUser} title=\"Welcome to Onyx\" />\n            <Modal.Body>\n              {authTypeMetadata.authType === AuthType.BASIC ? (\n                <LoginPage\n                  authUrl={null}\n                  authTypeMetadata={authTypeMetadata}\n                  nextUrl=\"/nrf\"\n                />\n              ) : (\n                <div className=\"flex flex-col items-center\">\n                  <Button\n                    width=\"full\"\n                    prominence=\"secondary\"\n                    onClick={() => {\n                      if (window.top) {\n                        window.top.location.href = \"/auth/login\";\n                      } else {\n                        window.location.href = \"/auth/login\";\n                      }\n                    }}\n                  >\n                    Log in\n                  </Button>\n                </div>\n              )}\n            </Modal.Body>\n          </Modal.Content>\n        </Modal>\n      )}\n\n      {user && !llmManager.isLoadingProviders && !llmManager.hasAnyProvider && (\n        <Button\n          width=\"full\"\n          prominence=\"secondary\"\n          onClick={() => {\n            window.location.href = \"/admin/configuration/llm\";\n          }}\n        >\n          Set up an LLM.\n        </Button>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/layout.tsx",
    "content": "import { ProjectsProvider } from \"@/providers/ProjectsContext\";\nimport { VoiceModeProvider } from \"@/providers/VoiceModeProvider\";\n\nexport interface LayoutProps {\n  children: React.ReactNode;\n}\n\n/**\n * NRF Root Layout - Shared by all NRF routes\n *\n * Provides ProjectsProvider (needed by NRFPage) without auth redirect.\n * Sidebar and chrome are handled by sub-layouts / individual pages.\n */\nexport default function Layout({ children }: LayoutProps) {\n  return (\n    <ProjectsProvider>\n      <VoiceModeProvider>{children}</VoiceModeProvider>\n    </ProjectsProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/side-panel/SidePanelHeader.tsx",
    "content": "\"use client\";\n\nimport Logo from \"@/refresh-components/Logo\";\nimport { Button } from \"@opal/components\";\nimport { SvgEditBig, SvgExternalLink } from \"@opal/icons\";\n\ninterface SidePanelHeaderProps {\n  onNewChat: () => void;\n  chatSessionId?: string | null;\n}\n\nexport default function SidePanelHeader({\n  onNewChat,\n  chatSessionId,\n}: SidePanelHeaderProps) {\n  const handleOpenInOnyx = () => {\n    const path = chatSessionId ? `/app?chatId=${chatSessionId}` : \"/app\";\n    window.open(`${window.location.origin}${path}`, \"_blank\");\n  };\n\n  return (\n    <header className=\"flex items-center justify-between px-4 py-3 border-b border-border-01 bg-background\">\n      <Logo />\n      <div className=\"flex items-center gap-1\">\n        <Button\n          prominence=\"tertiary\"\n          icon={SvgEditBig}\n          onClick={onNewChat}\n          tooltip=\"New chat\"\n        />\n        <Button\n          prominence=\"tertiary\"\n          icon={SvgExternalLink}\n          onClick={handleOpenInOnyx}\n          tooltip=\"Open in Onyx\"\n        />\n      </div>\n    </header>\n  );\n}\n"
  },
  {
    "path": "web/src/app/nrf/side-panel/page.tsx",
    "content": "import { unstable_noStore as noStore } from \"next/cache\";\nimport { InstantSSRAutoRefresh } from \"@/components/SSRAutoRefresh\";\nimport NRFPage from \"@/app/nrf/NRFPage\";\nimport { NRFPreferencesProvider } from \"@/components/context/NRFPreferencesContext\";\n\n/**\n * NRF Side Panel Route - No Auth Required\n *\n * Side panel variant — no NRFChrome overlay needed since the side panel\n * has its own header (logo + \"Open in Onyx\" button) and doesn't show\n * the mode toggle or footer.\n */\nexport default async function Page() {\n  noStore();\n\n  return (\n    <>\n      <InstantSSRAutoRefresh />\n      <NRFPreferencesProvider>\n        <NRFPage isSidePanel />\n      </NRFPreferencesProvider>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/app/oauth-config/callback/page.tsx",
    "content": "import OAuthCallbackPage from \"@/components/oauth/OAuthCallbackPage\";\n\nexport default function OAuthConfigCallbackPage() {\n  return (\n    <OAuthCallbackPage\n      config={{\n        callbackApiUrl: \"/api/oauth-config/callback\",\n        defaultRedirectPath: \"/app\",\n        processingMessage: \"Completing Authorization...\",\n        processingDetails:\n          \"Please wait while we securely store your credentials.\",\n        successMessage: \"Authorization Successful!\",\n        successDetailsTemplate:\n          \"You have successfully authorized the tool to access your {serviceName} account.\",\n        errorMessage: \"Authorization Failed\",\n        backButtonText: \"Back to Chat\",\n        autoRedirectDelay: 2000,\n      }}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/app/page.tsx",
    "content": "import { redirect } from \"next/navigation\";\n\nexport default async function Page() {\n  redirect(\"/app\");\n}\n"
  },
  {
    "path": "web/src/app/providers.tsx",
    "content": "\"use client\";\nimport posthog from \"posthog-js\";\nimport { PostHogProvider } from \"posthog-js/react\";\nimport { useEffect } from \"react\";\n\nconst isPostHogEnabled = !!process.env.NEXT_PUBLIC_POSTHOG_KEY;\n\ntype PHProviderProps = { children: React.ReactNode };\n\nexport function PHProvider({ children }: PHProviderProps) {\n  useEffect(() => {\n    if (isPostHogEnabled) {\n      posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY!, {\n        api_host: \"/ph_ingest\",\n        ui_host:\n          process.env.NEXT_PUBLIC_POSTHOG_HOST || \"https://us.posthog.com\",\n        person_profiles: \"identified_only\",\n        capture_pageview: false,\n        session_recording: {\n          // Sensitive inputs should use data-ph-no-capture attribute\n          maskAllInputs: false,\n        },\n      });\n    }\n  }, []);\n\n  if (!isPostHogEnabled) {\n    return <>{children}</>;\n  }\n\n  return <PostHogProvider client={posthog}>{children}</PostHogProvider>;\n}\n"
  },
  {
    "path": "web/src/app/web-vitals.tsx",
    "content": "\"use client\";\nimport { useReportWebVitals } from \"next/web-vitals\";\nimport { usePostHog } from \"posthog-js/react\";\n\nexport function WebVitals() {\n  const posthog = usePostHog();\n\n  useReportWebVitals((metric) => {\n    posthog.capture(metric.name, metric);\n  });\n  return <></>;\n}\n"
  },
  {
    "path": "web/src/ce.tsx",
    "content": "\"use client\";\n\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { ComponentType, ReactNode, createElement } from \"react\";\n\n/**\n * Passthrough component — renders children as-is, effectively a no-op wrapper.\n * <A><Invisible><B/></Invisible></A> === <A><B/></A>\n */\nfunction Invisible({ children }: { children?: ReactNode }) {\n  return <>{children}</>;\n}\n\n/**\n * Gates a component behind Enterprise. Returns the real component for EE,\n * or Invisible (passthrough) for CE.\n *\n * For providers: Community renders Invisible, so children pass through\n * and downstream hooks fall back to their context defaults.\n *\n * For leaf components: Community renders Invisible with no children,\n * so nothing is rendered.\n */\nexport function eeGated<P extends {}>(\n  EEComponent: ComponentType<P>\n): ComponentType<P> {\n  function EEGatedWrapper(props: P) {\n    const isEnterprise = usePaidEnterpriseFeaturesEnabled();\n    if (!isEnterprise)\n      return (\n        <Invisible>{(props as { children?: ReactNode }).children}</Invisible>\n      );\n    return createElement(EEComponent, props);\n  }\n  EEGatedWrapper.displayName = `eeGated(${\n    EEComponent.displayName || EEComponent.name || \"Component\"\n  })`;\n  return EEGatedWrapper;\n}\n"
  },
  {
    "path": "web/src/components/AdvancedOptionsToggle.tsx",
    "content": "import Button from \"@/refresh-components/buttons/Button\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgChevronRight } from \"@opal/icons\";\ninterface AdvancedOptionsToggleProps {\n  showAdvancedOptions: boolean;\n  setShowAdvancedOptions: (show: boolean) => void;\n  title?: string;\n}\n\nexport function AdvancedOptionsToggle({\n  showAdvancedOptions,\n  setShowAdvancedOptions,\n  title,\n}: AdvancedOptionsToggleProps) {\n  return (\n    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n    <Button\n      internal\n      leftIcon={({ className }) => (\n        <SvgChevronRight\n          className={cn(className, showAdvancedOptions && \"rotate-90\")}\n        />\n      )}\n      onClick={() => setShowAdvancedOptions(!showAdvancedOptions)}\n      className=\"mr-auto\"\n    >\n      {title || \"Advanced Options\"}\n    </Button>\n  );\n}\n"
  },
  {
    "path": "web/src/components/AgentsMultiSelect.tsx",
    "content": "import { FormikProps } from \"formik\";\nimport { GenericMultiSelect } from \"@/components/GenericMultiSelect\";\n\nexport type AgentsMultiSelectFormType = {\n  personas: number[];\n};\n\ninterface Agent {\n  id: number;\n  name: string;\n  description: string;\n}\n\ninterface AgentsMultiSelectProps<T extends AgentsMultiSelectFormType> {\n  formikProps: FormikProps<T>;\n  agents: Agent[] | undefined;\n  isLoading?: boolean;\n  error?: any;\n  label?: string;\n  subtext?: string;\n  disabled?: boolean;\n  disabledMessage?: string;\n}\n\nexport function AgentsMultiSelect<T extends AgentsMultiSelectFormType>({\n  formikProps,\n  agents,\n  isLoading = false,\n  error,\n  label = \"Agents\",\n  subtext = \"\",\n  disabled = false,\n  disabledMessage,\n}: AgentsMultiSelectProps<T>) {\n  return (\n    <GenericMultiSelect\n      formikProps={formikProps}\n      fieldName=\"personas\"\n      label={label}\n      subtext={subtext}\n      items={agents}\n      isLoading={isLoading}\n      error={error}\n      emptyMessage=\"No agents available. Please create an agent first from the Agents page.\"\n      disabled={disabled}\n      disabledMessage={disabledMessage}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/components/BasicClickable.tsx",
    "content": "import { JSX } from \"react\";\n\nexport function BasicClickable({\n  children,\n  onClick,\n  fullWidth = false,\n  inset,\n  className,\n}: {\n  children: string | JSX.Element;\n  onClick?: () => void;\n  inset?: boolean;\n  fullWidth?: boolean;\n  className?: string;\n}) {\n  return (\n    <button\n      onClick={onClick}\n      className={`\n        border \n        border-border\n        rounded\n        font-medium \n        text-text-darker \n        text-sm\n        relative\n        px-1 py-1.5\n        h-full\n        bg-background\n        select-none\n        overflow-hidden\n        hover:bg-accent-background\n        ${fullWidth ? \"w-full\" : \"\"}\n        ${className ? className : \"\"}\n        `}\n    >\n      {children}\n    </button>\n  );\n}\n\nexport function EmphasizedClickable({\n  children,\n  onClick,\n  fullWidth = false,\n  size = \"md\",\n}: {\n  children: string | JSX.Element;\n  onClick?: () => void;\n  fullWidth?: boolean;\n  size?: \"sm\" | \"md\" | \"lg\";\n}) {\n  return (\n    <button\n      className={`\n        inline-flex \n        items-center \n        justify-center \n        flex-shrink-0 \n        font-medium \n        ${\n          size === \"sm\"\n            ? `p-1`\n            : size === \"md\"\n              ? `min-h-[38px]  py-1 px-3`\n              : `min-h-[42px] py-2 px-4`\n        }\n        w-fit \n        bg-accent-background-hovered\n        border-1 border-border-medium border bg-background-100 \n        text-sm\n        rounded-lg\n        hover:bg-background-125\n    `}\n      onClick={onClick}\n    >\n      {children}\n    </button>\n  );\n}\n\nexport function BasicSelectable({\n  children,\n  selected,\n  hasBorder,\n  fullWidth = false,\n  padding = \"normal\",\n  removeColors = false,\n  isDragging = false,\n  isHovered,\n}: {\n  children: string | JSX.Element;\n  selected: boolean;\n  hasBorder?: boolean;\n  fullWidth?: boolean;\n  removeColors?: boolean;\n  padding?: \"none\" | \"normal\" | \"extra\";\n  isDragging?: boolean;\n  isHovered?: boolean;\n}) {\n  return (\n    <div\n      className={`\n        rounded\n        font-medium \n        text-sm\n        truncate\n        px-2\n        ${padding == \"normal\" && \"p-1\"}\n        ${padding == \"extra\" && \"p-1.5\"}\n        select-none\n        ${hasBorder ? \"border border-border\" : \"\"}\n        ${\n          !removeColors\n            ? isDragging\n              ? \"bg-background-chat-hover\"\n              : selected\n                ? \"bg-background-chat-selected\"\n                : isHovered\n                  ? \"bg-background-chat-hover\"\n                  : \"hover:bg-background-chat-hover\"\n            : \"\"\n        }\n        ${fullWidth ? \"w-full\" : \"\"}`}\n    >\n      {children}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/Bubble.tsx",
    "content": "import { JSX } from \"react\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\n\nexport function Bubble({\n  isSelected,\n  onClick,\n  children,\n  showCheckbox = false,\n  notSelectable = false,\n}: {\n  isSelected: boolean;\n  onClick?: () => void;\n  children: string | JSX.Element;\n  showCheckbox?: boolean;\n  notSelectable?: boolean;\n}) {\n  return (\n    <div\n      className={\n        `\n      px-1.5\n      py-1\n      rounded-lg\n      border\n      border-border\n      w-fit\n      flex` +\n        (notSelectable\n          ? \" bg-background cursor-default\"\n          : isSelected\n            ? \" bg-accent-background-hovered cursor-pointer\"\n            : \" bg-background hover:bg-accent-background cursor-pointer\")\n      }\n      onClick={onClick}\n    >\n      <div className=\"my-auto\">{children}</div>\n      {showCheckbox && (\n        <div className=\"pl-2 my-auto\">\n          <Checkbox checked={isSelected} />\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/CollapsibleCard.tsx",
    "content": "import { ChevronDown } from \"lucide-react\";\nimport React, {\n  useState,\n  ReactNode,\n  useRef,\n  useLayoutEffect,\n  JSX,\n} from \"react\";\n\ninterface CollapsibleCardProps {\n  header: JSX.Element;\n  children: ReactNode;\n  defaultOpen?: boolean;\n  className?: string;\n}\n\n/**\n * Renders a \"collapsible\" card which, when collapsed, is meant to showcase very \"high-level\" information (e.g., the name), but when expanded, can show a list of sub-items which are all related to one another.\n */\nexport default function CollapsibleCard({\n  header,\n  children,\n  defaultOpen = false,\n  className = \"\",\n}: CollapsibleCardProps) {\n  const [open, setOpen] = useState(defaultOpen);\n  const [maxHeight, setMaxHeight] = useState<string | undefined>(undefined);\n  const contentRef = useRef<HTMLDivElement>(null);\n\n  // Update maxHeight for animation when open/close\n  useLayoutEffect(() => {\n    if (open && contentRef.current) {\n      setMaxHeight(contentRef.current.scrollHeight + \"px\");\n    } else {\n      setMaxHeight(\"0px\");\n    }\n  }, [open, children]);\n\n  // If content changes size while open, update maxHeight\n  useLayoutEffect(() => {\n    if (open && contentRef.current) {\n      const handleResize = () => {\n        setMaxHeight(contentRef.current!.scrollHeight + \"px\");\n      };\n      handleResize();\n      window.addEventListener(\"resize\", handleResize);\n      return () => window.removeEventListener(\"resize\", handleResize);\n    }\n  }, [open, children]);\n\n  return (\n    <div\n      className={`rounded-lg border border-border bg-background shadow-md transition-all ${className}`}\n    >\n      <button\n        type=\"button\"\n        className=\"w-full flex items-center px-8 py-6 text-left focus:outline-none focus:ring-2 focus:ring-accent rounded-t-lg bg-accent-background hover:bg-accent-background-hovered transition-colors\"\n        onClick={() => setOpen((prev) => !prev)}\n        aria-expanded={open}\n      >\n        <div className=\"flex-1\">{header}</div>\n        <span\n          className=\"ml-3 transition-transform flex-shrink-0\"\n          style={{ transform: open ? \"rotate(0deg)\" : \"rotate(-90deg)\" }}\n        >\n          <ChevronDown size={20} />\n        </span>\n      </button>\n      <div\n        ref={contentRef}\n        style={{\n          maxHeight,\n          opacity: open ? 1 : 0,\n          overflow: \"hidden\",\n          transition:\n            \"max-height 0.35s cubic-bezier(0.4, 0, 0.2, 1), opacity 0.25s cubic-bezier(0.4, 0, 0.2, 1)\",\n        }}\n        aria-hidden={!open}\n      >\n        <div className=\"border-t border-border bg-background rounded-b-lg\">\n          {children}\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/ConnectorMultiSelect.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useRef, useEffect } from \"react\";\nimport { ConnectorStatus } from \"@/lib/types\";\nimport { ConnectorTitle } from \"@/components/admin/connectors/ConnectorTitle\";\nimport Label from \"@/refresh-components/form/Label\";\nimport { ErrorMessage } from \"formik\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { SvgX } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\n\ninterface ConnectorMultiSelectProps {\n  name: string;\n  label: string;\n  connectors: ConnectorStatus<any, any>[];\n  selectedIds: number[];\n  onChange: (selectedIds: number[]) => void;\n  disabled?: boolean;\n  placeholder?: string;\n  showError?: boolean;\n}\n\nexport const ConnectorMultiSelect = ({\n  name,\n  label,\n  connectors,\n  selectedIds,\n  onChange,\n  disabled = false,\n  placeholder = \"Search connectors...\",\n  showError = false,\n}: ConnectorMultiSelectProps) => {\n  const [open, setOpen] = useState(false);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const dropdownRef = useRef<HTMLDivElement>(null);\n  const inputRef = useRef<HTMLInputElement>(null);\n\n  const selectedConnectors = connectors.filter((connector) =>\n    selectedIds.includes(connector.cc_pair_id)\n  );\n\n  const unselectedConnectors = connectors.filter(\n    (connector) => !selectedIds.includes(connector.cc_pair_id)\n  );\n\n  const allConnectorsSelected =\n    connectors.length > 0 && unselectedConnectors.length === 0;\n\n  const filteredUnselectedConnectors = unselectedConnectors.filter(\n    (connector) => {\n      const connectorName = connector.name || connector.connector.source;\n      return connectorName.toLowerCase().includes(searchQuery.toLowerCase());\n    }\n  );\n\n  useEffect(() => {\n    if (allConnectorsSelected) {\n      setSearchQuery(\"\");\n    }\n  }, [allConnectorsSelected, selectedIds]);\n\n  const selectConnector = (connectorId: number) => {\n    const newSelectedIds = [...selectedIds, connectorId];\n    onChange(newSelectedIds);\n    setSearchQuery(\"\");\n\n    const willAllBeSelected = connectors.length === newSelectedIds.length;\n\n    if (!willAllBeSelected) {\n      setTimeout(() => {\n        inputRef.current?.focus();\n      }, 0);\n    }\n  };\n\n  const removeConnector = (connectorId: number) => {\n    onChange(selectedIds.filter((id) => id !== connectorId));\n  };\n\n  useEffect(() => {\n    const handleClickOutside = (event: MouseEvent) => {\n      if (\n        dropdownRef.current &&\n        !dropdownRef.current.contains(event.target as Node) &&\n        inputRef.current !== event.target &&\n        !inputRef.current?.contains(event.target as Node)\n      ) {\n        setOpen(false);\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, []);\n\n  const handleKeyDown = (e: React.KeyboardEvent) => {\n    if (e.key === \"Escape\") {\n      setOpen(false);\n    }\n  };\n\n  const effectivePlaceholder = allConnectorsSelected\n    ? \"All connectors selected\"\n    : placeholder;\n\n  const isInputDisabled = disabled;\n\n  return (\n    <div className=\"flex flex-col w-full space-y-2 mb-4\">\n      {label && (\n        <Label>\n          <Text>{label}</Text>\n        </Label>\n      )}\n\n      <Text as=\"p\" mainUiMuted text03>\n        All documents indexed by the selected connectors will be part of this\n        document set.\n      </Text>\n      <div className=\"relative\">\n        <InputTypeIn\n          ref={inputRef}\n          leftSearchIcon\n          placeholder={effectivePlaceholder}\n          value={searchQuery}\n          variant={isInputDisabled ? \"disabled\" : undefined}\n          onChange={(e) => {\n            if (!allConnectorsSelected) {\n              setSearchQuery(e.target.value);\n              setOpen(true);\n            }\n          }}\n          onFocus={() => {\n            setOpen(true);\n          }}\n          onKeyDown={handleKeyDown}\n          className=\"rounded-12\"\n        />\n\n        {open && (\n          <div\n            ref={dropdownRef}\n            className=\"absolute z-50 w-full mt-1 rounded-12 border border-border-02 bg-background-neutral-00 shadow-md default-scrollbar max-h-[300px] overflow-auto\"\n          >\n            {allConnectorsSelected ? (\n              <div className=\"py-4 px-3\">\n                <Text as=\"p\" text03 className=\"text-center text-xs\">\n                  All available connectors have been selected. Remove connectors\n                  below to add different ones.\n                </Text>\n              </div>\n            ) : filteredUnselectedConnectors.length === 0 ? (\n              <div className=\"py-4 px-3\">\n                <Text as=\"p\" text03 className=\"text-center text-xs\">\n                  {searchQuery\n                    ? \"No matching connectors found\"\n                    : connectors.length === 0\n                      ? \"No private connectors available. Create a private connector first.\"\n                      : \"No more connectors available\"}\n                </Text>\n              </div>\n            ) : (\n              <div>\n                {filteredUnselectedConnectors.map((connector) => (\n                  <div\n                    key={connector.cc_pair_id}\n                    className=\"flex items-center justify-between py-2 px-3 cursor-pointer hover:bg-background-neutral-01 text-xs\"\n                    onClick={() => selectConnector(connector.cc_pair_id)}\n                  >\n                    <div className=\"flex items-center truncate mr-2\">\n                      <ConnectorTitle\n                        connector={connector.connector}\n                        ccPairId={connector.cc_pair_id}\n                        ccPairName={connector.name}\n                        isLink={false}\n                        showMetadata={false}\n                      />\n                    </div>\n                  </div>\n                ))}\n              </div>\n            )}\n          </div>\n        )}\n      </div>\n\n      {selectedConnectors.length > 0 ? (\n        <div className=\"mt-3\">\n          <div className=\"flex flex-wrap gap-1.5\">\n            {selectedConnectors.map((connector) => (\n              <div\n                key={connector.cc_pair_id}\n                className=\"flex items-center bg-background-neutral-00 rounded-12 border border-border-02 transition-all px-2 py-1 max-w-full group text-xs\"\n              >\n                <div className=\"flex items-center overflow-hidden\">\n                  <div className=\"flex-shrink-0 text-xs\">\n                    <ConnectorTitle\n                      connector={connector.connector}\n                      ccPairId={connector.cc_pair_id}\n                      ccPairName={connector.name}\n                      isLink={false}\n                      showMetadata={false}\n                    />\n                  </div>\n                </div>\n                <Button\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  type=\"button\"\n                  aria-label=\"Remove connector\"\n                  tooltip=\"Remove connector\"\n                  onClick={() => removeConnector(connector.cc_pair_id)}\n                  icon={SvgX}\n                />\n              </div>\n            ))}\n          </div>\n        </div>\n      ) : (\n        <div className=\"mt-3 p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01 text-text-03 text-xs\">\n          No connectors selected. Search and select connectors above.\n        </div>\n      )}\n\n      {showError && (\n        <ErrorMessage\n          name={name}\n          component=\"div\"\n          className=\"text-action-danger-05 text-xs mt-1\"\n        />\n      )}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/DeleteButton.tsx",
    "content": "import { SvgTrash } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\n\nexport interface DeleteButtonProps {\n  onClick?: (event: React.MouseEvent<HTMLElement>) => void | Promise<void>;\n  disabled?: boolean;\n}\n\nexport function DeleteButton({ onClick, disabled }: DeleteButtonProps) {\n  return (\n    <Button\n      disabled={disabled}\n      onClick={onClick}\n      icon={SvgTrash}\n      tooltip=\"Delete\"\n      prominence=\"tertiary\"\n      size=\"sm\"\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/components/Dropdown.tsx",
    "content": "\"use client\";\n\nimport { forwardRef, useEffect, useRef, useState, JSX } from \"react\";\nimport { FiCheck, FiChevronDown, FiInfo } from \"react-icons/fi\";\nimport Popover from \"@/refresh-components/Popover\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nexport interface Option<T> {\n  name: string;\n  value: T;\n  description?: string;\n  icon?: (props: { size?: number; className?: string }) => JSX.Element;\n  // Domain-specific flag: when false, render as disabled (used by AccessTypeForm)\n  disabled?: boolean;\n  disabledReason?: string;\n}\n\nexport type StringOrNumberOption = Option<string | number>;\n\nexport const CustomDropdown = ({\n  children,\n  dropdown,\n  direction = \"down\",\n}: {\n  children: JSX.Element | string;\n  dropdown: JSX.Element | string;\n  direction?: \"up\" | \"down\";\n}) => {\n  const [isOpen, setIsOpen] = useState(false);\n  const dropdownRef = useRef<HTMLDivElement>(null);\n\n  useEffect(() => {\n    const handleClickOutside = (event: MouseEvent) => {\n      if (\n        dropdownRef.current &&\n        !dropdownRef.current.contains(event.target as Node)\n      ) {\n        setIsOpen(false);\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, []);\n\n  return (\n    <div className=\"relative inline-block text-left w-full\" ref={dropdownRef}>\n      <div onClick={() => setIsOpen(!isOpen)}>{children}</div>\n\n      {isOpen && (\n        <div\n          onClick={() => setIsOpen(!isOpen)}\n          className={`absolute ${\n            direction === \"up\" ? \"bottom-full pb-2\" : \"pt-2\"\n          } w-full z-30 box-shadow`}\n        >\n          {dropdown}\n        </div>\n      )}\n    </div>\n  );\n};\n\nexport function DefaultDropdownElement({\n  name,\n  icon,\n  description,\n  onSelect,\n  isSelected,\n  includeCheckbox = false,\n  disabled = false,\n  disabledReason,\n}: {\n  name: string | JSX.Element;\n  icon?: (props: { size?: number; className?: string }) => JSX.Element;\n  description?: string;\n  onSelect?: () => void;\n  isSelected?: boolean;\n  includeCheckbox?: boolean;\n  disabled?: boolean;\n  disabledReason?: string;\n}) {\n  return (\n    <div\n      className={`\n        flex\n        mx-1\n        px-2\n        text-sm\n        py-1.5\n        my-1\n        select-none\n        ${disabled ? \"cursor-not-allowed opacity-60\" : \"cursor-pointer\"}\n        bg-transparent\n        rounded\n        text-text-dark\n        ${disabled ? \"\" : \"hover:bg-accent-background-hovered\"}\n      `}\n      onClick={disabled ? undefined : onSelect}\n    >\n      <div>\n        <div className=\"flex\">\n          {includeCheckbox && (\n            <input\n              type=\"checkbox\"\n              className=\"mr-2\"\n              checked={isSelected}\n              onChange={() => null}\n            />\n          )}\n          {icon && icon({ size: 16, className: \"mr-2 h-4 w-4 my-auto\" })}\n          {name}\n          {disabled && disabledReason && (\n            <SimpleTooltip tooltip={disabledReason}>\n              <span className=\"ml-2 my-auto p-1 rounded hover:bg-background-100 text-warning transition-colors cursor-default\">\n                <FiInfo size={14} className=\"text-warning\" />\n              </span>\n            </SimpleTooltip>\n          )}\n        </div>\n        {description && <div className=\"text-xs\">{description}</div>}\n      </div>\n      {isSelected && (\n        <div className=\"ml-auto mr-1 my-auto\">\n          <FiCheck />\n        </div>\n      )}\n    </div>\n  );\n}\n\ntype DefaultDropdownProps = {\n  options: StringOrNumberOption[];\n  selected: string | null;\n  onSelect: (value: string | number | null) => void;\n  includeDefault?: boolean;\n  defaultValue?: string;\n  side?: \"top\" | \"right\" | \"bottom\" | \"left\";\n  maxHeight?: string;\n};\n\nexport const DefaultDropdown = forwardRef<HTMLDivElement, DefaultDropdownProps>(\n  (\n    {\n      options,\n      selected,\n      onSelect,\n      includeDefault,\n      defaultValue,\n      side,\n      maxHeight,\n    },\n    ref\n  ) => {\n    const selectedOption = options.find((option) => option.value === selected);\n    const [isOpen, setIsOpen] = useState(false);\n\n    const handleSelect = (value: any) => {\n      onSelect(value);\n      setIsOpen(false);\n    };\n\n    return (\n      <Popover open={isOpen} onOpenChange={setIsOpen}>\n        <Popover.Trigger asChild>\n          <div\n            className={`\n              flex\n              text-sm\n              bg-background\n              px-3\n              py-1.5\n              rounded-lg\n              border\n              border-border\n              cursor-pointer\n              w-full`}\n          >\n            <p className=\"line-clamp-1\">\n              {selectedOption?.name ||\n                (includeDefault\n                  ? defaultValue || \"Default\"\n                  : \"Select an option...\")}\n            </p>\n            <FiChevronDown className=\"my-auto ml-auto\" />\n          </div>\n        </Popover.Trigger>\n        <Popover.Content\n          align=\"start\"\n          side={side}\n          sideOffset={5}\n          width=\"trigger\"\n        >\n          <div\n            ref={ref}\n            className={`\n              rounded-lg\n              flex\n              flex-col\n              bg-background\n              ${maxHeight || \"max-h-96\"}\n              overflow-y-auto\n              overscroll-contain`}\n          >\n            {includeDefault && (\n              <DefaultDropdownElement\n                key={-1}\n                name=\"Default\"\n                onSelect={() => handleSelect(null)}\n                isSelected={selected === null}\n              />\n            )}\n            {options.map((option, ind) => {\n              const isSelected = option.value === selected;\n              return (\n                <DefaultDropdownElement\n                  key={option.value}\n                  name={option.name}\n                  description={option.description}\n                  onSelect={() => handleSelect(option.value)}\n                  isSelected={isSelected}\n                  icon={option.icon}\n                  disabled={option.disabled}\n                  disabledReason={option.disabledReason}\n                />\n              );\n            })}\n          </div>\n        </Popover.Content>\n      </Popover>\n    );\n  }\n);\n"
  },
  {
    "path": "web/src/components/EditableStringFieldDisplay.tsx",
    "content": "import { EditIcon } from \"@/components/icons/icons\";\nimport { useEffect, useRef, useState } from \"react\";\nimport { Input } from \"@/components/ui/input\";\nimport { cn } from \"@/lib/utils\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { SvgCheck, SvgX } from \"@opal/icons\";\ninterface EditableStringFieldDisplayProps {\n  value: string;\n  isEditable: boolean;\n  onUpdate: (newValue: string) => Promise<void>;\n  textClassName?: string;\n  scale?: number;\n}\n\nexport function EditableStringFieldDisplay({\n  value,\n  isEditable,\n  onUpdate,\n  textClassName,\n  scale = 1,\n}: EditableStringFieldDisplayProps) {\n  const [isEditing, setIsEditing] = useState(false);\n  const [editableValue, setEditableValue] = useState(value);\n  const inputRef = useRef<HTMLInputElement | HTMLTextAreaElement>(null);\n  const containerRef = useRef<HTMLDivElement>(null);\n\n  useEffect(() => {\n    if (isEditing && inputRef.current) {\n      inputRef.current.focus();\n    }\n  }, [isEditing]);\n\n  useEffect(() => {\n    const handleClickOutside = (event: MouseEvent) => {\n      if (\n        containerRef.current &&\n        !containerRef.current.contains(event.target as Node) &&\n        isEditing\n      ) {\n        resetEditing();\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, [isEditing]);\n\n  const handleValueChange = (e: React.ChangeEvent<HTMLInputElement>) => {\n    setEditableValue(e.target.value);\n  };\n\n  const handleUpdate = async () => {\n    await onUpdate(editableValue);\n    setIsEditing(false);\n  };\n\n  const resetEditing = () => {\n    setIsEditing(false);\n    setEditableValue(value);\n  };\n\n  const handleKeyDown = (\n    e: React.KeyboardEvent<HTMLInputElement | HTMLTextAreaElement>\n  ) => {\n    if (e.key === \"Enter\") {\n      handleUpdate();\n    }\n  };\n\n  return (\n    <div ref={containerRef} className={\"flex items-center\"}>\n      <Input\n        ref={inputRef as React.RefObject<HTMLInputElement>}\n        type=\"text\"\n        value={editableValue}\n        onChange={handleValueChange}\n        onKeyDown={handleKeyDown}\n        className={cn(\n          textClassName,\n          \"text-3xl font-bold text-text-800\",\n          \"user-text\",\n          isEditing ? \"block\" : \"hidden\"\n        )}\n        style={{ fontSize: `${scale}rem` }}\n      />\n      {!isEditing && (\n        <span\n          onClick={() => isEditable && setIsEditing(true)}\n          className={cn(\n            textClassName,\n            \"text-3xl font-bold text-text-800\",\n            \"cursor-pointer user-text\"\n          )}\n          style={{ fontSize: `${scale}rem` }}\n        >\n          {value}\n        </span>\n      )}\n      {isEditing && isEditable ? (\n        <>\n          <div className={cn(\"flex\", \"flex-row\")}>\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <IconButton\n              onClick={handleUpdate}\n              internal\n              className=\"ml-2\"\n              icon={SvgCheck}\n            />\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <IconButton\n              onClick={resetEditing}\n              internal\n              className=\"ml-2\"\n              icon={SvgX}\n            />\n          </div>\n        </>\n      ) : (\n        <h1\n          onClick={() => isEditable && setIsEditing(true)}\n          className={`group flex ${isEditable ? \"cursor-pointer\" : \"\"} ${\"\"}`}\n          style={{ fontSize: `${scale}rem` }}\n        >\n          {isEditable && (\n            <EditIcon className={`visible ml-2`} size={12 * scale} />\n          )}\n        </h1>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/EditableValue.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { FiEdit2 } from \"react-icons/fi\";\nimport { CheckmarkIcon } from \"./icons/icons\";\n\nexport function EditableValue({\n  initialValue,\n  onSubmit,\n  emptyDisplay,\n  consistentWidth = true,\n}: {\n  initialValue: string;\n  onSubmit: (value: string) => Promise<boolean>;\n  emptyDisplay?: string;\n  consistentWidth?: boolean;\n}) {\n  const [isOpen, setIsOpen] = useState(false);\n  const [editedValue, setEditedValue] = useState(initialValue);\n\n  if (isOpen) {\n    return (\n      <div className=\"my-auto h-full flex\">\n        <input\n          value={editedValue}\n          onChange={(e) => {\n            setEditedValue(e.target.value);\n          }}\n          onKeyDown={async (e) => {\n            if (e.key === \"Enter\") {\n              const success = await onSubmit(editedValue);\n              if (success) {\n                setIsOpen(false);\n              }\n            }\n            if (e.key === \"Escape\") {\n              setIsOpen(false);\n              onSubmit(initialValue);\n            }\n          }}\n          className=\"border bg-background-200 border-background-300 rounded py-1 px-1 w-12 h-4 my-auto\"\n        />\n        <div\n          onClick={async () => {\n            const success = await onSubmit(editedValue);\n            if (success) {\n              setIsOpen(false);\n            }\n          }}\n          className=\"cursor-pointer my-auto ml-2\"\n        >\n          <CheckmarkIcon size={16} className=\"text-green-700\" />\n        </div>\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"h-full flex flex-col\">\n      <div\n        className=\"flex my-auto cursor-pointer hover:bg-accent-background-hovered rounded\"\n        onClick={() => setIsOpen(true)}\n      >\n        <div className={\"flex \" + (consistentWidth && \" w-6\")}>\n          <div className=\"ml-auto my-auto\">{initialValue || emptyDisplay}</div>\n        </div>\n        <div className=\"cursor-pointer ml-2 my-auto h-4\">\n          <FiEdit2 size={16} />\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/ErrorCallout.tsx",
    "content": "import { Callout } from \"@/components/ui/callout\";\nimport { FiAlertTriangle } from \"react-icons/fi\";\n\nexport function ErrorCallout({\n  errorTitle,\n  errorMsg,\n}: {\n  errorTitle?: string;\n  errorMsg?: string;\n}) {\n  return (\n    <div>\n      <Callout\n        className=\"mt-4\"\n        title={errorTitle || \"Page not found\"}\n        icon={<FiAlertTriangle className=\"text-red-500 h-5 w-5\" />}\n        type=\"danger\"\n      >\n        {errorMsg}\n      </Callout>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/FederatedConnectorSelector.tsx",
    "content": "import React, { useState, useRef, useEffect } from \"react\";\nimport {\n  FederatedConnectorDetail,\n  FederatedConnectorConfig,\n  federatedSourceToRegularSource,\n} from \"@/lib/types\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport Label from \"@/refresh-components/form/Label\";\nimport { ErrorMessage } from \"formik\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { SvgX } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\n\ninterface FederatedConnectorSelectorProps {\n  name: string;\n  label: string;\n  federatedConnectors: FederatedConnectorDetail[];\n  selectedConfigs: FederatedConnectorConfig[];\n  onChange: (selectedConfigs: FederatedConnectorConfig[]) => void;\n  disabled?: boolean;\n  placeholder?: string;\n  showError?: boolean;\n}\n\nexport const FederatedConnectorSelector = ({\n  name,\n  label,\n  federatedConnectors,\n  selectedConfigs,\n  onChange,\n  disabled = false,\n  placeholder = \"Search federated connectors...\",\n  showError = false,\n}: FederatedConnectorSelectorProps) => {\n  const [open, setOpen] = useState(false);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const dropdownRef = useRef<HTMLDivElement>(null);\n  const inputRef = useRef<HTMLInputElement>(null);\n\n  const selectedConnectorIds = selectedConfigs.map(\n    (config) => config.federated_connector_id\n  );\n\n  const selectedConnectors = federatedConnectors.filter((connector) =>\n    selectedConnectorIds.includes(connector.id)\n  );\n\n  const unselectedConnectors = federatedConnectors.filter(\n    (connector) => !selectedConnectorIds.includes(connector.id)\n  );\n\n  const allConnectorsSelected = unselectedConnectors.length === 0;\n\n  const filteredUnselectedConnectors = unselectedConnectors.filter(\n    (connector) => {\n      const connectorName = connector.name;\n      return connectorName.toLowerCase().includes(searchQuery.toLowerCase());\n    }\n  );\n\n  useEffect(() => {\n    if (allConnectorsSelected && open) {\n      setOpen(false);\n      inputRef.current?.blur();\n      setSearchQuery(\"\");\n    }\n  }, [allConnectorsSelected, open]);\n\n  const selectConnector = (connectorId: number) => {\n    // Add connector with empty entities configuration\n    const newConfig: FederatedConnectorConfig = {\n      federated_connector_id: connectorId,\n      entities: {},\n    };\n\n    const newSelectedConfigs = [...selectedConfigs, newConfig];\n    onChange(newSelectedConfigs);\n    setSearchQuery(\"\");\n\n    const willAllBeSelected =\n      federatedConnectors.length === newSelectedConfigs.length;\n\n    if (!willAllBeSelected) {\n      setTimeout(() => {\n        inputRef.current?.focus();\n      }, 0);\n    }\n  };\n\n  const removeConnector = (connectorId: number) => {\n    onChange(\n      selectedConfigs.filter(\n        (config) => config.federated_connector_id !== connectorId\n      )\n    );\n  };\n\n  useEffect(() => {\n    const handleClickOutside = (event: MouseEvent) => {\n      if (\n        dropdownRef.current &&\n        !dropdownRef.current.contains(event.target as Node) &&\n        inputRef.current !== event.target &&\n        !inputRef.current?.contains(event.target as Node)\n      ) {\n        setOpen(false);\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, []);\n\n  const handleKeyDown = (e: React.KeyboardEvent) => {\n    if (e.key === \"Escape\") {\n      setOpen(false);\n    }\n  };\n\n  const effectivePlaceholder = allConnectorsSelected\n    ? \"All federated connectors selected\"\n    : placeholder;\n\n  const isInputDisabled = disabled || allConnectorsSelected;\n\n  return (\n    <div className=\"flex flex-col w-full space-y-2 mb-4\">\n      {label && (\n        <Label>\n          <Text>{label}</Text>\n        </Label>\n      )}\n\n      <Text as=\"p\" mainUiMuted text03>\n        Documents from selected federated connectors will be searched in\n        real-time during queries.\n      </Text>\n      <div className=\"relative\">\n        <InputTypeIn\n          ref={inputRef}\n          leftSearchIcon\n          placeholder={effectivePlaceholder}\n          value={searchQuery}\n          variant={isInputDisabled ? \"disabled\" : undefined}\n          onChange={(e) => {\n            setSearchQuery(e.target.value);\n            setOpen(true);\n          }}\n          onKeyDown={handleKeyDown}\n          onFocus={() => {\n            if (!allConnectorsSelected) {\n              setOpen(true);\n            }\n          }}\n          className={\n            allConnectorsSelected\n              ? \"rounded-12 bg-background-neutral-01\"\n              : \"rounded-12\"\n          }\n        />\n\n        {open && !allConnectorsSelected && (\n          <div\n            ref={dropdownRef}\n            className=\"absolute z-50 w-full mt-1 rounded-12 border border-border-02 bg-background-neutral-00 shadow-md default-scrollbar max-h-[300px] overflow-auto\"\n          >\n            {filteredUnselectedConnectors.length === 0 ? (\n              <div className=\"py-4 text-center text-xs text-text-03\">\n                {searchQuery\n                  ? \"No matching federated connectors found\"\n                  : \"No more federated connectors available\"}\n              </div>\n            ) : (\n              <div>\n                {filteredUnselectedConnectors.map((connector) => (\n                  <div\n                    key={connector.id}\n                    className=\"flex items-center justify-between py-2 px-3 cursor-pointer hover:bg-background-neutral-01 text-xs\"\n                    onClick={() => selectConnector(connector.id)}\n                  >\n                    <div className=\"flex items-center truncate mr-2\">\n                      <div className=\"mr-2\">\n                        <SourceIcon\n                          sourceType={federatedSourceToRegularSource(\n                            connector.source\n                          )}\n                          iconSize={16}\n                        />\n                      </div>\n                      <span className=\"font-medium\">{connector.name}</span>\n                    </div>\n                  </div>\n                ))}\n              </div>\n            )}\n          </div>\n        )}\n      </div>\n\n      {selectedConnectors.length > 0 ? (\n        <div className=\"mt-3\">\n          <div className=\"flex flex-wrap gap-1.5\">\n            {selectedConnectors.map((connector) => {\n              const config = selectedConfigs.find(\n                (c) => c.federated_connector_id === connector.id\n              );\n              const hasEntitiesConfigured =\n                config && Object.keys(config.entities).length > 0;\n\n              return (\n                <div\n                  key={connector.id}\n                  className=\"flex items-center bg-background-neutral-00 rounded-12 border border-border-02 transition-all px-2 py-1 max-w-full group text-xs\"\n                >\n                  <div className=\"flex items-center overflow-hidden\">\n                    <div className=\"mr-1 flex-shrink-0\">\n                      <SourceIcon\n                        sourceType={federatedSourceToRegularSource(\n                          connector.source\n                        )}\n                        iconSize={14}\n                      />\n                    </div>\n                    <span className=\"font-medium truncate\">\n                      {connector.name}\n                    </span>\n                    {hasEntitiesConfigured && (\n                      <div\n                        className=\"ml-1 w-2 h-2 bg-green-500 rounded-full flex-shrink-0\"\n                        title=\"Entities configured\"\n                      />\n                    )}\n                  </div>\n                  <div className=\"flex items-center ml-2 gap-1\">\n                    <Button\n                      prominence=\"tertiary\"\n                      size=\"sm\"\n                      type=\"button\"\n                      aria-label=\"Remove connector\"\n                      tooltip=\"Remove connector\"\n                      onClick={() => removeConnector(connector.id)}\n                      icon={SvgX}\n                    />\n                  </div>\n                </div>\n              );\n            })}\n          </div>\n        </div>\n      ) : (\n        <div className=\"mt-3 p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01 text-text-03 text-xs\">\n          No federated connectors selected. Search and select connectors above.\n        </div>\n      )}\n\n      {showError && (\n        <ErrorMessage\n          name={name}\n          component=\"div\"\n          className=\"text-action-danger-05 text-xs mt-1\"\n        />\n      )}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/Field.tsx",
    "content": "\"use client\";\n\nimport {\n  ArrayHelpers,\n  ErrorMessage,\n  Field,\n  FieldArray,\n  FastField,\n  useField,\n  useFormikContext,\n} from \"formik\";\nimport { FileUpload } from \"@/components/admin/connectors/FileUpload\";\nimport * as Yup from \"yup\";\nimport { FormBodyBuilder } from \"./admin/connectors/types\";\nimport { StringOrNumberOption } from \"@/components/Dropdown\";\nimport {\n  Select,\n  SelectItem,\n  SelectContent,\n  SelectTrigger,\n  SelectValue,\n} from \"@/components/ui/select\";\nimport { FiInfo, FiX } from \"react-icons/fi\";\nimport ReactMarkdown from \"react-markdown\";\nimport { FaMarkdown } from \"react-icons/fa\";\nimport { useState, useEffect, memo, JSX } from \"react\";\nimport remarkGfm from \"remark-gfm\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\n\nimport { Section } from \"@/layouts/general-layouts\";\nimport { cn, transformLinkUri } from \"@/lib/utils\";\nimport FileInput from \"@/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput\";\nimport InputDatePicker from \"@/refresh-components/inputs/InputDatePicker\";\nimport { RichTextSubtext } from \"./RichTextSubtext\";\nimport {\n  TypedFile,\n  createTypedFile,\n  getFileTypeDefinitionForField,\n  FILE_TYPE_DEFINITIONS,\n} from \"@/lib/connectors/fileTypes\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\n\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport InputTextArea, {\n  InputTextAreaProps,\n} from \"@/refresh-components/inputs/InputTextArea\";\nimport { SvgEye, SvgEyeClosed } from \"@opal/icons\";\n\nexport function SectionHeader({\n  children,\n}: {\n  children: string | JSX.Element;\n}) {\n  return <div className=\"mb-4 font-bold text-lg\">{children}</div>;\n}\n\nexport function Label({\n  children,\n  small,\n  className,\n  htmlFor,\n}: {\n  children: string | JSX.Element;\n  small?: boolean;\n  className?: string;\n  htmlFor?: string;\n}) {\n  return (\n    <label\n      {...(htmlFor ? { htmlFor } : {})}\n      className={`block font-medium ${className} ${\n        small ? \"text-sm\" : \"text-base\"\n      }`}\n    >\n      {children}\n    </label>\n  );\n}\n\nexport function LabelWithTooltip({\n  children,\n  tooltip,\n}: {\n  children: string | JSX.Element;\n  tooltip: string;\n}) {\n  return (\n    <div className=\"flex items-center gap-x-2\">\n      <Label>{children}</Label>\n      <ToolTipDetails>{tooltip}</ToolTipDetails>\n    </div>\n  );\n}\n\nexport function SubLabel({ children }: { children: string | JSX.Element }) {\n  // Add whitespace-pre-wrap for multiline descriptions (when children is a string with newlines)\n  const hasNewlines = typeof children === \"string\" && children.includes(\"\\n\");\n\n  // If children is a string, use RichTextSubtext to parse and render links\n  if (typeof children === \"string\") {\n    return (\n      <span className=\"block text-sm text-text-03 mb-2\">\n        <RichTextSubtext\n          text={children}\n          className={hasNewlines ? \"whitespace-pre-wrap\" : \"\"}\n        />\n      </span>\n    );\n  }\n\n  return (\n    <span\n      className={`block text-sm text-text-03 mb-2 ${\n        hasNewlines ? \"whitespace-pre-wrap\" : \"\"\n      }`}\n    >\n      {children}\n    </span>\n  );\n}\n\nexport function ManualErrorMessage({ children }: { children: string }) {\n  return <div className=\"text-action-danger-05 text-sm\">{children}</div>;\n}\n\nexport function ExplanationText({\n  text,\n  link,\n}: {\n  text: string;\n  link?: string;\n}) {\n  return link ? (\n    <a\n      className=\"underline text-text-500 cursor-pointer text-xs font-medium\"\n      target=\"_blank\"\n      href={link}\n    >\n      {text}\n    </a>\n  ) : (\n    <Text as=\"p\" text03 secondaryBody>\n      {text}\n    </Text>\n  );\n}\n\nexport function ToolTipDetails({ children }: { children: string }) {\n  return (\n    <SimpleTooltip tooltip={children} side=\"top\" align=\"center\">\n      <FiInfo size={12} />\n    </SimpleTooltip>\n  );\n}\n\nexport const FieldLabel = ({\n  subtext,\n  error,\n  name,\n  tooltip,\n  optional,\n  hideError,\n  label,\n  removeLabel,\n  vertical,\n}: {\n  subtext?: string | JSX.Element;\n  error?: string;\n  name: string;\n  tooltip?: string;\n  optional?: boolean;\n  hideError?: boolean;\n  label: string;\n  removeLabel?: boolean;\n  vertical?: boolean;\n}) => (\n  <>\n    <div\n      className={`flex ${\n        vertical ? \"flex-col\" : \"flex-row\"\n      } gap-x-2 items-start`}\n    >\n      <div className=\"flex gap-x-2 items-center\">\n        {!removeLabel && (\n          <Label small={false} htmlFor={name}>\n            {label}\n          </Label>\n        )}\n        {optional ? <span>(optional) </span> : \"\"}\n        {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}\n      </div>\n      {error ? (\n        <ManualErrorMessage>{error}</ManualErrorMessage>\n      ) : (\n        !hideError && (\n          <ErrorMessage\n            name={name}\n            component=\"div\"\n            className=\"text-action-danger-05 my-auto text-sm\"\n          />\n        )\n      )}\n    </div>\n    {subtext && <SubLabel>{subtext}</SubLabel>}\n  </>\n);\n\nexport function TextFormField({\n  name,\n  label,\n  subtext,\n  placeholder,\n  type = \"text\",\n  optional,\n  includeRevert,\n  isTextArea = false,\n  disabled = false,\n  autoCompleteEnabled = false,\n  error,\n  defaultHeight,\n  isCode = false,\n  fontSize,\n  hideError,\n  tooltip,\n  explanationText,\n  explanationLink,\n  small,\n  maxWidth,\n  removeLabel,\n  min,\n  onChange,\n  width,\n  vertical,\n  className,\n  showPasswordToggle = false,\n}: {\n  name: string;\n  removeLabel?: boolean;\n  label: string;\n  subtext?: string | JSX.Element;\n  placeholder?: string;\n  includeRevert?: boolean;\n  optional?: boolean;\n  type?: string;\n  isTextArea?: boolean;\n  disabled?: boolean;\n  autoCompleteEnabled?: boolean;\n  error?: string;\n  defaultHeight?: string;\n  isCode?: boolean;\n  fontSize?: \"sm\" | \"md\" | \"lg\";\n  maxWidth?: string;\n  hideError?: boolean;\n  tooltip?: string;\n  explanationText?: string;\n  explanationLink?: string;\n  small?: boolean;\n  min?: number;\n  onChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  width?: string;\n  vertical?: boolean;\n  className?: string;\n  showPasswordToggle?: boolean;\n}) {\n  let heightString = defaultHeight || \"\";\n  if (isTextArea && !heightString) {\n    heightString = \"h-28\";\n  }\n\n  const [, , { setValue }] = useField(name);\n\n  const handleChange = (\n    e: React.ChangeEvent<HTMLInputElement | HTMLTextAreaElement>\n  ) => {\n    if (onChange) {\n      onChange(e as React.ChangeEvent<HTMLInputElement>);\n    } else {\n      setValue(e.target.value);\n    }\n  };\n  const textSizeClasses = {\n    sm: {\n      label: \"text-sm\",\n      input: \"text-sm\",\n      placeholder: \"text-sm\",\n    },\n    md: {\n      label: \"text-base\",\n      input: \"text-base\",\n      placeholder: \"text-base\",\n    },\n    lg: {\n      label: \"text-lg\",\n      input: \"text-lg\",\n      placeholder: \"text-lg\",\n    },\n  };\n\n  const sizeClass = textSizeClasses[fontSize || \"sm\"];\n  const isPasswordField = type === \"password\";\n  const [isPasswordVisible, setIsPasswordVisible] = useState(false);\n  const effectiveType = isPasswordField && isPasswordVisible ? \"text\" : type;\n\n  return (\n    <div className={`w-full ${maxWidth} ${width}`}>\n      <FieldLabel\n        key={name}\n        subtext={subtext}\n        error={error}\n        name={name}\n        tooltip={tooltip}\n        optional={optional}\n        hideError={hideError}\n        label={label}\n        removeLabel={removeLabel}\n        vertical={vertical}\n      />\n      <div className={`w-full flex ${includeRevert && \"gap-x-2\"} relative`}>\n        <Field\n          onChange={handleChange}\n          min={min}\n          as={isTextArea ? \"textarea\" : \"input\"}\n          type={effectiveType}\n          data-testid={name}\n          name={name}\n          id={name}\n          className={`\n            ${small && sizeClass.input}\n            flex\n            h-10\n            w-full\n            rounded-md\n            border\n            px-3\n            py-2\n            mt-1\n            file:border-0\n            file:bg-transparent\n            file:text-sm\n            file:font-medium\n            file:text-text-05\n            placeholder:text-text-02\n            placeholder:font-description\n            placeholder:${sizeClass.placeholder}\n            caret-accent\n            focus-visible:outline-none\n            focus-visible:ring-1\n            focus-visible:ring-lighter-agent\n            focus-visible:ring-offset-1\n            disabled:cursor-not-allowed\n            disabled:opacity-50\n            md:text-sm\n            border-border-03\n            ring-offset-background-neutral-00\n            file:text-text-inverted-05\n            text-text-04\n\n            ${heightString}\n            ${sizeClass.input}\n            ${disabled ? \"bg-background-neutral-02\" : \"\"}\n            ${isCode ? \"font-mono\" : \"\"}\n            ${className}\n            bg-background-neutral-00\n            ${isPasswordField && showPasswordToggle ? \"pr-10\" : \"\"}\n          `}\n          disabled={disabled}\n          placeholder={placeholder}\n          autoComplete={autoCompleteEnabled ? undefined : \"off\"}\n        />\n        {!isTextArea && isPasswordField && showPasswordToggle && (\n          <button\n            type=\"button\"\n            aria-label={isPasswordVisible ? \"Hide password\" : \"Show password\"}\n            className=\"absolute right-3 top-1/2 -translate-y-1/2 stroke-text-02 hover:stroke-text-03 mt-0.5\"\n            onClick={() => setIsPasswordVisible((v) => !v)}\n            tabIndex={0}\n          >\n            {isPasswordVisible ? (\n              <SvgEye className=\"h-4 w-4\" />\n            ) : (\n              <SvgEyeClosed className=\"h-4 w-4\" />\n            )}\n          </button>\n        )}\n      </div>\n\n      {explanationText && (\n        <ExplanationText link={explanationLink} text={explanationText} />\n      )}\n    </div>\n  );\n}\n\nexport function FileUploadFormField({\n  name,\n  label,\n  subtext,\n}: {\n  name: string;\n  label: string;\n  subtext?: string | JSX.Element;\n}) {\n  // We create a *temporary* field inside of `Formik` to throw the `File` object into.\n  // The actual *contents* of the file will be thrown into the field called `name`.\n  const fileName = `temporary.filename-${name}`;\n  const [fileField] = useField<File>(fileName);\n  const [, , contentsHelper] = useField<string>(name);\n\n  useEffect(() => {\n    const reader = new FileReader();\n    reader.onload = (e) => {\n      contentsHelper.setValue(e.target?.result as string);\n    };\n    if (fileField.value instanceof File) {\n      reader.readAsText(fileField.value);\n    }\n  }, [contentsHelper, fileField.value]);\n\n  return (\n    <div className=\"w-full\">\n      <FieldLabel name={name} label={label} subtext={subtext} />\n      <FileInput name={fileName} multiple={false} hideError />\n    </div>\n  );\n}\n\nexport function TypedFileUploadFormField({\n  name,\n  label,\n  subtext,\n}: {\n  name: string;\n  label: string;\n  subtext?: string | JSX.Element;\n}) {\n  const [field, , helpers] = useField<TypedFile | null>(name);\n  const [customError, setCustomError] = useState<string>(\"\");\n  const [isValidating, setIsValidating] = useState(false);\n  const [description, setDescription] = useState<string>(\"\");\n\n  useEffect(() => {\n    const typeDefinitionKey = getFileTypeDefinitionForField(name);\n    if (typeDefinitionKey) {\n      setDescription(\n        FILE_TYPE_DEFINITIONS[typeDefinitionKey].description || \"\"\n      );\n    }\n  }, [name]);\n\n  useEffect(() => {\n    const validateFile = async () => {\n      if (!field.value) {\n        setIsValidating(false);\n        return;\n      }\n\n      setIsValidating(true);\n\n      try {\n        const validation = await field.value.validate();\n        if (validation?.isValid) {\n          setCustomError(\"\");\n        } else {\n          setCustomError(validation?.errors.join(\", \") || \"Unknown error\");\n          helpers.setValue(null);\n        }\n      } catch (error) {\n        setCustomError(\n          error instanceof Error ? error.message : \"Validation error\"\n        );\n        helpers.setValue(null);\n      } finally {\n        setIsValidating(false);\n      }\n    };\n\n    validateFile();\n  }, [field.value, helpers]);\n\n  const handleFileSelection = async (files: File[]) => {\n    if (files.length === 0) {\n      helpers.setValue(null);\n      setCustomError(\"\");\n      return;\n    }\n\n    const file = files[0];\n    if (!file) {\n      setCustomError(\"File selection error\");\n      return;\n    }\n\n    const typeDefinitionKey = getFileTypeDefinitionForField(name);\n\n    if (!typeDefinitionKey) {\n      setCustomError(`No file type definition found for field: ${name}`);\n      return;\n    }\n\n    try {\n      const typedFile = createTypedFile(file, name, typeDefinitionKey);\n      helpers.setValue(typedFile);\n      setCustomError(\"\");\n    } catch (error) {\n      setCustomError(error instanceof Error ? error.message : \"Unknown error\");\n      helpers.setValue(null);\n    } finally {\n      setIsValidating(false);\n    }\n  };\n\n  return (\n    <div className=\"w-full\">\n      <FieldLabel name={name} label={label} subtext={subtext} />\n      {description && (\n        <div className=\"text-sm text-text-03 mb-2\">{description}</div>\n      )}\n      <FileUpload\n        selectedFiles={field.value ? [field.value.file] : []}\n        setSelectedFiles={handleFileSelection}\n        multiple={false}\n      />\n      {/* Validation feedback */}\n      {isValidating && (\n        <div className=\"text-status-info-05 text-sm mt-1\">\n          Validating file...\n        </div>\n      )}\n\n      {customError ? (\n        <div className=\"text-action-danger-05 text-sm mt-1\">{customError}</div>\n      ) : (\n        <ErrorMessage\n          name={name}\n          component=\"div\"\n          className=\"text-action-danger-05 text-sm mt-1\"\n        />\n      )}\n    </div>\n  );\n}\n\nexport function MultiSelectField({\n  name,\n  label,\n  subtext,\n  options,\n  onChange,\n  error,\n  hideError,\n  small,\n  selectedInitially,\n}: {\n  selectedInitially: string[];\n  name: string;\n  label: string;\n  subtext?: string | JSX.Element;\n  options: { value: string; label: string }[];\n  onChange?: (selected: string[]) => void;\n  error?: string;\n  hideError?: boolean;\n  small?: boolean;\n}) {\n  const [selectedOptions, setSelectedOptions] =\n    useState<string[]>(selectedInitially);\n\n  const handleCheckboxChange = (value: string) => {\n    const newSelectedOptions = selectedOptions.includes(value)\n      ? selectedOptions.filter((option) => option !== value)\n      : [...selectedOptions, value];\n\n    setSelectedOptions(newSelectedOptions);\n    if (onChange) {\n      onChange(newSelectedOptions);\n    }\n  };\n\n  return (\n    <div className=\"mb-6\">\n      <div className=\"flex gap-x-2 items-center\">\n        <Label small={small}>{label}</Label>\n        {error ? (\n          <ManualErrorMessage>{error}</ManualErrorMessage>\n        ) : (\n          !hideError && (\n            <ErrorMessage\n              name={name}\n              component=\"div\"\n              className=\"text-action-danger-05 my-auto text-sm\"\n            />\n          )\n        )}\n      </div>\n\n      {subtext && <SubLabel>{subtext}</SubLabel>}\n      <div className=\"mt-2\">\n        {options.map((option) => (\n          <label key={option.value} className=\"flex items-center mb-2\">\n            <input\n              type=\"checkbox\"\n              name={name}\n              value={option.value}\n              checked={selectedOptions.includes(option.value)}\n              onChange={() => handleCheckboxChange(option.value)}\n              className=\"mr-2\"\n            />\n            {option.label}\n          </label>\n        ))}\n      </div>\n    </div>\n  );\n}\ninterface MarkdownPreviewProps {\n  name: string;\n  label: string;\n  placeholder?: string;\n  error?: string;\n}\n\nexport const MarkdownFormField = ({\n  name,\n  label,\n  error,\n  placeholder = \"Enter your markdown here...\",\n}: MarkdownPreviewProps) => {\n  const [field] = useField(name);\n  const [isPreviewOpen, setIsPreviewOpen] = useState(false);\n\n  const togglePreview = () => {\n    setIsPreviewOpen(!isPreviewOpen);\n  };\n\n  return (\n    <div className=\"flex flex-col space-y-4 mb-4\">\n      <Label>{label}</Label>\n      <div className=\"border border-border-02 rounded-md\">\n        <div className=\"flex items-center justify-between px-4 py-2 bg-background-neutral-02 rounded-t-md\">\n          <div className=\"flex items-center space-x-2\">\n            <FaMarkdown className=\"text-text-03\" />\n            <span className=\"text-sm font-semibold text-text-04\">Markdown</span>\n          </div>\n          <button\n            type=\"button\"\n            onClick={togglePreview}\n            className=\"text-sm font-semibold text-text-04 hover:text-text-05 focus:outline-none\"\n          >\n            {isPreviewOpen ? \"Write\" : \"Preview\"}\n          </button>\n        </div>\n        {isPreviewOpen ? (\n          <div className=\"p-4 border-t border-border-02\">\n            <ReactMarkdown\n              className=\"prose dark:prose-invert\"\n              remarkPlugins={[remarkGfm]}\n              urlTransform={transformLinkUri}\n            >\n              {field.value}\n            </ReactMarkdown>\n          </div>\n        ) : (\n          <div className=\"pt-2 px-2\">\n            <textarea\n              {...field}\n              rows={2}\n              placeholder={placeholder}\n              className={`w-full p-2 border border-border-02 rounded-md`}\n            />\n          </div>\n        )}\n      </div>\n      {error ? (\n        <ManualErrorMessage>{error}</ManualErrorMessage>\n      ) : (\n        <ErrorMessage\n          name={name}\n          component=\"div\"\n          className=\"text-action-danger-05 text-sm mt-1\"\n        />\n      )}\n    </div>\n  );\n};\n\ninterface BooleanFormFieldProps {\n  name: string;\n  label: string;\n  subtext?: string | JSX.Element;\n  removeIndent?: boolean;\n  small?: boolean;\n  noLabel?: boolean;\n  disabled?: boolean;\n  optional?: boolean;\n  tooltip?: string;\n  disabledTooltip?: string;\n  disabledTooltipSide?: \"top\" | \"bottom\" | \"left\" | \"right\";\n  onChange?: (checked: boolean) => void;\n}\n\nexport const BooleanFormField = memo(function BooleanFormField({\n  name,\n  label,\n  subtext,\n  removeIndent,\n  noLabel,\n  optional,\n  small,\n  disabled,\n  tooltip,\n  disabledTooltip,\n  disabledTooltipSide,\n  onChange,\n}: BooleanFormFieldProps) {\n  // Generate a stable, valid id from the field name for label association\n  const checkboxId = `checkbox-${name.replace(/[^a-zA-Z0-9_-]/g, \"_\")}`;\n\n  return (\n    <div>\n      <FastField\n        name={name}\n        type=\"checkbox\"\n        disabled={disabled}\n        shouldUpdate={(next: any, prev: any) =>\n          next.disabled !== prev.disabled ||\n          next.formik.values !== prev.formik.values\n        }\n      >\n        {({ field, form }: any) => {\n          const toggle = () => {\n            if (!disabled) {\n              const newValue = !field.value;\n              form.setFieldValue(name, newValue);\n              if (onChange) onChange(newValue);\n            }\n          };\n\n          return (\n            <SimpleTooltip\n              // This may seem confusing, but we only want to show the `disabledTooltip` if and only if the `BooleanFormField` is disabled.\n              // If it disabled, then we \"enable\" the showing of the tooltip. Thus, `disabled={!disabled}` is not a mistake.\n              disabled={!disabled}\n              tooltip={disabledTooltip}\n              side={disabledTooltipSide}\n            >\n              <Section flexDirection=\"row\" width=\"fit\" height=\"fit\" gap={0}>\n                <Checkbox\n                  aria-label={`${label\n                    .toLowerCase()\n                    .replace(\" \", \"-\")}-checkbox`}\n                  id={checkboxId}\n                  className={cn(\n                    disabled && \"opacity-50\",\n                    removeIndent ? \"mr-2\" : \"mx-3\"\n                  )}\n                  checked={Boolean(field.value)}\n                  onCheckedChange={(checked) => {\n                    if (!disabled) {\n                      form.setFieldValue(name, checked === true);\n                      if (onChange) onChange(checked === true);\n                    }\n                  }}\n                />\n                {!noLabel && (\n                  <div\n                    className={disabled ? \"\" : \"cursor-pointer\"}\n                    onClick={toggle}\n                  >\n                    <div className=\"flex items-center gap-x-2\">\n                      <Label small={small}>{`${label}${\n                        optional ? \" (Optional)\" : \"\"\n                      }`}</Label>\n                      {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}\n                    </div>\n                    {subtext && <SubLabel>{subtext}</SubLabel>}\n                  </div>\n                )}\n              </Section>\n            </SimpleTooltip>\n          );\n        }}\n      </FastField>\n\n      <ErrorMessage\n        name={name}\n        component=\"div\"\n        className=\"text-action-danger-05 text-sm mt-1\"\n      />\n    </div>\n  );\n});\n\ninterface TextArrayFieldProps<T extends Yup.AnyObject> {\n  name: string;\n  label: string | JSX.Element;\n  values: T;\n  subtext?: string | JSX.Element;\n  type?: string;\n  tooltip?: string;\n  minFields?: number;\n  placeholder?: string;\n  disabled?: boolean;\n}\n\nexport function TextArrayField<T extends Yup.AnyObject>({\n  name,\n  label,\n  values,\n  subtext,\n  type,\n  tooltip,\n  minFields = 0,\n  placeholder = \"\",\n  disabled = false,\n}: TextArrayFieldProps<T>) {\n  return (\n    <div className=\"mb-4\">\n      <div className=\"flex gap-x-2 items-center\">\n        <Label>{label}</Label>\n        {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}\n      </div>\n      {subtext && <SubLabel>{subtext}</SubLabel>}\n\n      <FieldArray\n        name={name}\n        render={(arrayHelpers: ArrayHelpers) => (\n          <div>\n            {values[name] &&\n              values[name].length > 0 &&\n              (values[name] as string[]).map((_, index) => (\n                <div key={index} className=\"mt-2\">\n                  <div className=\"flex\">\n                    <Field\n                      type={type}\n                      name={`${name}.${index}`}\n                      id={name}\n                      className={`\n                      border\n                      border-border\n                      bg-background\n                      rounded\n                      w-full\n                      py-2\n                      px-3\n                      mr-4\n                      disabled:cursor-not-allowed\n                      `}\n                      // Disable autocomplete since the browser doesn't know how to handle an array of text fields\n                      autoComplete=\"off\"\n                      placeholder={placeholder}\n                      disabled={disabled}\n                    />\n                    <div className=\"my-auto\">\n                      {index >= minFields ? (\n                        <FiX\n                          className=\"my-auto w-10 h-10 cursor-pointer hover:bg-background-neutral-02 rounded p-2\"\n                          onClick={() => {\n                            if (!disabled) {\n                              arrayHelpers.remove(index);\n                            }\n                          }}\n                        />\n                      ) : (\n                        <div className=\"w-10 h-10\" />\n                      )}\n                    </div>\n                  </div>\n                  <ErrorMessage\n                    name={`${name}.${index}`}\n                    component=\"div\"\n                    className=\"text-action-danger-05 text-sm mt-1\"\n                  />\n                </div>\n              ))}\n\n            <CreateButton\n              onClick={() => {\n                if (!disabled) {\n                  arrayHelpers.push(\"\");\n                }\n              }}\n              type=\"button\"\n              disabled={disabled}\n            >\n              Add New\n            </CreateButton>\n          </div>\n        )}\n      />\n    </div>\n  );\n}\n\ninterface TextArrayFieldBuilderProps<T extends Yup.AnyObject> {\n  name: string;\n  label: string;\n  subtext?: string | JSX.Element;\n  type?: string;\n  tooltip?: string;\n}\n\nexport function TextArrayFieldBuilder<T extends Yup.AnyObject>(\n  props: TextArrayFieldBuilderProps<T>\n): FormBodyBuilder<T> {\n  const _TextArrayField: FormBodyBuilder<T> = (values) => (\n    <TextArrayField {...props} values={values} />\n  );\n  return _TextArrayField;\n}\n\ninterface SelectorFormFieldProps {\n  name: string;\n  label?: string;\n  options: StringOrNumberOption[];\n  subtext?: string | JSX.Element;\n  includeDefault?: boolean;\n  side?: \"top\" | \"right\" | \"bottom\" | \"left\";\n  maxHeight?: string;\n  onSelect?: (selected: string | number | null) => void;\n  defaultValue?: string;\n  tooltip?: string;\n  includeReset?: boolean;\n  fontSize?: \"sm\" | \"md\" | \"lg\";\n  small?: boolean;\n  disabled?: boolean;\n}\n\nexport function SelectorFormField({\n  name,\n  label,\n  options,\n  subtext,\n  side = \"bottom\",\n  maxHeight,\n  onSelect,\n  defaultValue,\n  tooltip,\n  includeReset = false,\n  fontSize = \"md\",\n  small = false,\n  disabled = false,\n}: SelectorFormFieldProps) {\n  const [field] = useField<string>(name);\n  const { setFieldValue } = useFormikContext();\n  const [container, setContainer] = useState<HTMLDivElement | null>(null);\n\n  const currentlySelected = options.find(\n    (option) => option.value?.toString() === field.value?.toString()\n  );\n\n  const textSizeClasses = {\n    sm: {\n      label: \"text-sm\",\n      input: \"text-sm\",\n      placeholder: \"text-sm\",\n    },\n    md: {\n      label: \"text-base\",\n      input: \"text-base\",\n      placeholder: \"text-base\",\n    },\n    lg: {\n      label: \"text-lg\",\n      input: \"text-lg\",\n      placeholder: \"text-lg\",\n    },\n  };\n\n  const sizeClass = textSizeClasses[fontSize];\n\n  return (\n    <div>\n      {label && (\n        <div className=\"flex gap-x-2 items-center\">\n          <Label className={sizeClass.label} small={small}>\n            {label}\n          </Label>\n          {tooltip && <ToolTipDetails>{tooltip}</ToolTipDetails>}\n        </div>\n      )}\n      {subtext && <SubLabel>{subtext}</SubLabel>}\n      <div className=\"mt-2\" ref={setContainer}>\n        <Select\n          value={field.value || defaultValue}\n          onValueChange={\n            onSelect ||\n            ((selected) =>\n              selected == \"__none__\"\n                ? setFieldValue(name, null)\n                : setFieldValue(name, selected))\n          }\n          defaultValue={defaultValue}\n          disabled={disabled}\n        >\n          <SelectTrigger className={sizeClass.input} disabled={disabled}>\n            <SelectValue placeholder=\"Select...\">\n              {currentlySelected?.name || defaultValue || \"\"}\n            </SelectValue>\n          </SelectTrigger>\n\n          {container && (\n            <SelectContent\n              side={side}\n              className={`\n               ${maxHeight ? `${maxHeight}` : \"max-h-72\"}\n               overflow-y-scroll\n               ${sizeClass.input}\n              `}\n              container={container}\n            >\n              {options.length === 0 ? (\n                <SelectItem value=\"default\">Select...</SelectItem>\n              ) : (\n                options.map((option) => (\n                  <SelectItem\n                    hideCheck\n                    icon={option.icon}\n                    key={option.value}\n                    value={String(option.value)}\n                    selected={field.value === option.value}\n                  >\n                    {option.name}\n                  </SelectItem>\n                ))\n              )}\n              {includeReset && (\n                <SelectItem\n                  value={\"__none__\"}\n                  onSelect={() => setFieldValue(name, null)}\n                >\n                  None\n                </SelectItem>\n              )}\n            </SelectContent>\n          )}\n        </Select>\n      </div>\n\n      <ErrorMessage\n        name={name}\n        component=\"div\"\n        className=\"text-action-danger-05 text-sm mt-1\"\n      />\n    </div>\n  );\n}\n\nexport interface DatePickerFieldProps {\n  label: string;\n  name: string;\n  subtext?: string;\n  startYear?: number;\n  disabled?: boolean;\n}\n\nexport function DatePickerField({\n  label,\n  name,\n  subtext,\n  startYear = 1970,\n  disabled = false,\n}: DatePickerFieldProps) {\n  const [field, _, helper] = useField<Date | null>(name);\n\n  return (\n    <div>\n      <FieldLabel label={label} name={name} subtext={subtext} />\n      <InputDatePicker\n        selectedDate={field.value}\n        setSelectedDate={helper.setValue}\n        startYear={startYear}\n        disabled={disabled}\n      />\n    </div>\n  );\n}\n\nexport interface TextAreaFieldProps extends InputTextAreaProps {\n  name: string;\n}\n\nexport function TextAreaField(props: TextAreaFieldProps) {\n  const [field, _, helper] = useField<string>(props.name);\n\n  return (\n    <InputTextArea\n      value={field.value}\n      onChange={(event) => {\n        helper.setValue(event.target.value);\n      }}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/components/FormErrorHelpers.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useRef } from \"react\";\nimport { useFormikContext } from \"formik\";\n\n// After a submit with errors, scroll + focus the first invalid field\nexport function FormErrorFocus() {\n  const { submitCount, errors, isSubmitting } = useFormikContext<any>();\n  const lastHandled = useRef(0);\n\n  useEffect(() => {\n    if (isSubmitting) return;\n    if (submitCount <= 0 || submitCount === lastHandled.current) return;\n\n    const keys = Object.keys(errors || {});\n    if (keys.length === 0) return;\n\n    const timer = setTimeout(() => {\n      try {\n        let target: HTMLElement | null = null;\n\n        for (const key of keys) {\n          target = document.getElementById(key) as HTMLElement | null;\n          if (target) break;\n        }\n\n        // 2) Fallback: first element with matching name\n        if (!target) {\n          for (const key of keys) {\n            const byName = document.getElementsByName(key);\n            if (byName && byName.length > 0) {\n              target = byName[0] as HTMLElement;\n              break;\n            }\n          }\n        }\n\n        if (target) {\n          target.scrollIntoView({ behavior: \"smooth\", block: \"center\" });\n          if (typeof (target as any).focus === \"function\") {\n            (target as any).focus({ preventScroll: true });\n          }\n        }\n      } finally {\n        lastHandled.current = submitCount;\n      }\n    }, 0);\n\n    return () => clearTimeout(timer);\n  }, [submitCount, errors, isSubmitting]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/components/GatedContentWrapper.tsx",
    "content": "\"use client\";\n\nimport { usePathname } from \"next/navigation\";\nimport AccessRestrictedPage from \"@/components/errorPages/AccessRestrictedPage\";\n\n// Paths accessible even when gated - allows users to manage billing updates and seat counts\nconst ALLOWED_GATED_PATHS = [\"/admin/billing\", \"/admin/users\"];\n\n/**\n * Check if pathname matches an allowed path exactly or is a subpath.\n * Uses strict matching to prevent bypasses like \"/admin/billing-foo\".\n */\nfunction isPathAllowed(pathname: string): boolean {\n  return ALLOWED_GATED_PATHS.some(\n    (allowedPath) =>\n      pathname === allowedPath || pathname.startsWith(allowedPath + \"/\")\n  );\n}\n\nexport default function GatedContentWrapper({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  const pathname = usePathname();\n\n  if (isPathAllowed(pathname)) {\n    return <>{children}</>;\n  }\n\n  return <AccessRestrictedPage />;\n}\n"
  },
  {
    "path": "web/src/components/GenericMultiSelect.tsx",
    "content": "import { FormikProps, ErrorMessage } from \"formik\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox/InputComboBox\";\nimport { Disabled } from \"@opal/core\";\nimport { SvgX } from \"@opal/icons\";\nexport type GenericMultiSelectFormType<T extends string> = {\n  [K in T]: number[];\n};\n\ninterface GenericItem {\n  id: number;\n  name: string;\n}\n\ninterface GenericMultiSelectProps<\n  T extends string,\n  F extends GenericMultiSelectFormType<T>,\n> {\n  formikProps: FormikProps<F>;\n  fieldName: T;\n  label: string;\n  subtext?: string;\n  items: GenericItem[] | undefined;\n  isLoading: boolean;\n  error: any;\n  emptyMessage: string;\n  disabled?: boolean;\n  disabledMessage?: string;\n}\n\nexport function GenericMultiSelect<\n  T extends string,\n  F extends GenericMultiSelectFormType<T>,\n>({\n  formikProps,\n  fieldName,\n  label,\n  subtext,\n  items,\n  isLoading,\n  error,\n  emptyMessage,\n  disabled = false,\n  disabledMessage,\n}: GenericMultiSelectProps<T, F>) {\n  if (isLoading) {\n    return (\n      <div className=\"flex flex-col gap-2 w-full\">\n        <Text as=\"p\" mainUiAction>\n          {label}\n        </Text>\n        <div className=\"animate-pulse bg-background-neutral-02 h-10 w-full rounded-08\" />\n      </div>\n    );\n  }\n\n  if (error) {\n    return (\n      <div className=\"flex flex-col gap-2 w-full\">\n        <Text as=\"p\" mainUiAction>\n          {label}\n        </Text>\n        <Text as=\"p\" text03 className=\"text-action-danger-05\">\n          Failed to load {label.toLowerCase()}. Please try again.\n        </Text>\n      </div>\n    );\n  }\n\n  if (!items || items.length === 0) {\n    return (\n      <div className=\"flex flex-col gap-2 w-full\">\n        <Text as=\"p\" mainUiAction>\n          {label}\n        </Text>\n        <Text as=\"p\" text03>\n          {emptyMessage}\n        </Text>\n      </div>\n    );\n  }\n\n  const selectedIds = (formikProps.values[fieldName] as number[]) || [];\n  const selectedItems = items.filter((item) => selectedIds.includes(item.id));\n\n  const handleSelect = (itemId: number) => {\n    if (disabled) return;\n    const currentIds = (formikProps.values[fieldName] as number[]) || [];\n    if (!currentIds.includes(itemId)) {\n      formikProps.setFieldValue(fieldName, [...currentIds, itemId]);\n    }\n  };\n\n  const handleRemove = (itemId: number) => {\n    if (disabled) return;\n    const currentIds = (formikProps.values[fieldName] as number[]) || [];\n    formikProps.setFieldValue(\n      fieldName,\n      currentIds.filter((id) => id !== itemId)\n    );\n  };\n\n  return (\n    <div className=\"flex flex-col gap-2 w-full\">\n      <Text as=\"p\" mainUiAction>\n        {label}\n      </Text>\n\n      {subtext && (\n        <Text as=\"p\" text03>\n          {disabled ? disabledMessage : subtext}\n        </Text>\n      )}\n\n      <Disabled disabled={disabled}>\n        <div>\n          <InputComboBox\n            placeholder=\"Search...\"\n            value=\"\"\n            onChange={() => {}}\n            onValueChange={(selectedValue) => {\n              const numValue = parseInt(selectedValue, 10);\n              if (!isNaN(numValue)) {\n                handleSelect(numValue);\n              }\n            }}\n            options={items\n              .filter((item) => !selectedIds.includes(item.id))\n              .map((item) => ({\n                label: item.name,\n                value: String(item.id),\n              }))}\n            strict\n            leftSearchIcon\n          />\n        </div>\n      </Disabled>\n\n      {selectedItems.length > 0 && (\n        <div className=\"flex flex-wrap gap-2\">\n          {selectedItems.map((item) => (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <Button\n              key={item.id}\n              secondary\n              disabled={disabled}\n              rightIcon={SvgX}\n              onClick={() => handleRemove(item.id)}\n              className=\"!px-2 !py-1\"\n            >\n              {item.name}\n            </Button>\n          ))}\n        </div>\n      )}\n\n      <ErrorMessage name={fieldName} component=\"div\">\n        {(msg) => (\n          <Text as=\"p\" text03 className=\"text-action-danger-05\">\n            {msg}\n          </Text>\n        )}\n      </ErrorMessage>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/GroupsMultiSelect.tsx",
    "content": "import { FormikProps } from \"formik\";\nimport { Label } from \"@/components/Field\";\nimport { useUserGroups } from \"@/lib/hooks\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { GenericMultiSelect } from \"@/components/GenericMultiSelect\";\n\nexport type GroupsMultiSelectFormType = {\n  groups: number[];\n};\n\ninterface GroupsMultiSelectProps<T extends GroupsMultiSelectFormType> {\n  formikProps: FormikProps<T>;\n  label?: string;\n  subtext?: string;\n  disabled?: boolean;\n  disabledMessage?: string;\n}\n\nexport function GroupsMultiSelect<T extends GroupsMultiSelectFormType>({\n  formikProps,\n  label = \"User Groups\",\n  subtext = \"Select which user groups can access this resource\",\n  disabled = false,\n  disabledMessage,\n}: GroupsMultiSelectProps<T>) {\n  const {\n    data: userGroups,\n    isLoading: userGroupsIsLoading,\n    error,\n  } = useUserGroups();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  // Show loading state while checking enterprise features or loading groups\n  if (userGroupsIsLoading || isPaidEnterpriseFeaturesEnabled === undefined) {\n    return (\n      <div className=\"mb-4\">\n        <Label>{label}</Label>\n        <div className=\"animate-pulse bg-background-200 h-10 w-full rounded-lg mt-2\"></div>\n      </div>\n    );\n  }\n\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return null;\n  }\n\n  return (\n    <GenericMultiSelect\n      formikProps={formikProps}\n      fieldName=\"groups\"\n      label={label}\n      subtext={subtext}\n      items={userGroups}\n      isLoading={false}\n      error={error}\n      emptyMessage=\"No user groups available. Please create a user group first.\"\n      disabled={disabled}\n      disabledMessage={disabledMessage}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/components/HoverPopup.tsx",
    "content": "import { JSX } from \"react\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\n\ninterface HoverPopupProps {\n  mainContent: string | JSX.Element;\n  popupContent: string | JSX.Element;\n  classNameModifications?: string;\n  direction?: \"left\" | \"left-top\" | \"bottom\" | \"top\";\n  style?: \"basic\" | \"dark\";\n}\n\nexport const HoverPopup = ({\n  mainContent,\n  popupContent,\n  classNameModifications,\n  direction = \"bottom\",\n}: HoverPopupProps) => {\n  return (\n    <TooltipProvider>\n      <Tooltip>\n        <TooltipTrigger asChild>\n          <div>{mainContent}</div>\n        </TooltipTrigger>\n        <TooltipContent\n          side={direction === \"left-top\" ? \"left\" : direction}\n          className={classNameModifications}\n        >\n          {popupContent}\n        </TooltipContent>\n      </Tooltip>\n    </TooltipProvider>\n  );\n};\n"
  },
  {
    "path": "web/src/components/IsPublicGroupSelector.tsx",
    "content": "import { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport React, { useState, useEffect } from \"react\";\nimport { FormikProps } from \"formik\";\nimport { UserRole } from \"@/lib/types\";\nimport { useUserGroups } from \"@/lib/hooks\";\nimport { BooleanFormField } from \"@/components/Field\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { GroupsMultiSelect } from \"./GroupsMultiSelect\";\n\nexport type IsPublicGroupSelectorFormType = {\n  is_public: boolean;\n  groups: number[];\n};\n\n// This should be included for all forms that require groups / public access\n// to be set, and access to this / permissioning should be handled within this component itself.\nexport const IsPublicGroupSelector = <T extends IsPublicGroupSelectorFormType>({\n  formikProps,\n  objectName,\n  publicToWhom = \"Users\",\n  removeIndent = false,\n  enforceGroupSelection = true,\n  smallLabels = false,\n}: {\n  formikProps: FormikProps<T>;\n  objectName: string;\n  publicToWhom?: string;\n  removeIndent?: boolean;\n  enforceGroupSelection?: boolean;\n  smallLabels?: boolean;\n}) => {\n  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();\n  const { isAdmin, user, isCurator } = useUser();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const [shouldHideContent, setShouldHideContent] = useState(false);\n\n  useEffect(() => {\n    if (user && userGroups && isPaidEnterpriseFeaturesEnabled) {\n      const isUserAdmin = user.role === UserRole.ADMIN;\n      if (!isUserAdmin && userGroups.length > 0) {\n        formikProps.setFieldValue(\"is_public\", false);\n      }\n      if (\n        userGroups.length === 1 &&\n        userGroups[0] !== undefined &&\n        !isUserAdmin\n      ) {\n        formikProps.setFieldValue(\"groups\", [userGroups[0].id]);\n        setShouldHideContent(true);\n      } else if (formikProps.values.is_public) {\n        formikProps.setFieldValue(\"groups\", []);\n        setShouldHideContent(false);\n      } else {\n        setShouldHideContent(false);\n      }\n    }\n  }, [user, userGroups, isPaidEnterpriseFeaturesEnabled]);\n\n  if (userGroupsIsLoading) {\n    return <div>Loading...</div>;\n  }\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return null;\n  }\n\n  let firstUserGroupName = \"Unknown\";\n  if (userGroups) {\n    const userGroup = userGroups[0];\n    if (userGroup) {\n      firstUserGroupName = userGroup.name;\n    }\n  }\n\n  if (shouldHideContent && enforceGroupSelection) {\n    return (\n      <>\n        {userGroups && (\n          <div className=\"mb-1 font-medium text-base\">\n            This {objectName} will be assigned to group{\" \"}\n            <b>{firstUserGroupName}</b>.\n          </div>\n        )}\n      </>\n    );\n  }\n\n  return (\n    <div>\n      {isAdmin && (\n        <>\n          <BooleanFormField\n            name=\"is_public\"\n            removeIndent={removeIndent}\n            small={smallLabels}\n            label={\n              publicToWhom === \"Curators\"\n                ? `Make this ${objectName} Curator Accessible?`\n                : `Make this ${objectName} Public?`\n            }\n            disabled={!isAdmin}\n            subtext={\n              <span className=\"block mt-2 text-sm text-text-600 dark:text-neutral-400\">\n                If set, then this {objectName} will be usable by{\" \"}\n                <b>All {publicToWhom}</b>. Otherwise, only <b>Admins</b> and{\" \"}\n                <b>{publicToWhom}</b> who have explicitly been given access to\n                this {objectName} (e.g. via a User Group) will have access.\n              </span>\n            }\n          />\n        </>\n      )}\n\n      <GroupsMultiSelect\n        formikProps={formikProps}\n        label={`Assign group access for this ${objectName}`}\n        subtext={\n          isAdmin || !enforceGroupSelection\n            ? `This ${objectName} will be visible/accessible by the groups selected below`\n            : `Curators must select one or more groups to give access to this ${objectName}`\n        }\n        disabled={formikProps.values.is_public && !isCurator}\n        disabledMessage={`This ${objectName} is public and available to all users.`}\n      />\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/Loading.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useEffect } from \"react\";\nimport \"./loading.css\";\nimport { ThreeDots } from \"react-loader-spinner\";\nimport { cn } from \"@/lib/utils\";\n\ninterface LoadingAnimationProps {\n  text?: string;\n  size?: \"text-sm\" | \"text-md\";\n}\n\nexport const LoadingAnimation: React.FC<LoadingAnimationProps> = ({\n  text,\n  size,\n}) => {\n  const [dots, setDots] = useState(\"...\");\n\n  useEffect(() => {\n    const interval = setInterval(() => {\n      setDots((prevDots) => {\n        switch (prevDots) {\n          case \".\":\n            return \"..\";\n          case \"..\":\n            return \"...\";\n          case \"...\":\n            return \".\";\n          default:\n            return \"...\";\n        }\n      });\n    }, 500);\n\n    return () => clearInterval(interval);\n  }, []);\n\n  return (\n    <span className=\"loading-animation inline-flex\">\n      <span className={cn(\"mx-auto inline-flex\", size)}>\n        {text === undefined ? \"Thinking\" : text}\n        <span className=\"dots\">{dots}</span>\n      </span>\n    </span>\n  );\n};\n\nexport const ThreeDotsLoader = () => {\n  return (\n    <div className=\"flex my-auto\">\n      <div className=\"mx-auto\">\n        <ThreeDots\n          height=\"30\"\n          width=\"50\"\n          color=\"#3b82f6\"\n          ariaLabel=\"grid-loading\"\n          radius=\"12.5\"\n          wrapperStyle={{}}\n          wrapperClass=\"\"\n          visible={true}\n        />\n      </div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/MetadataBadge.tsx",
    "content": "import { JSX } from \"react\";\n\nexport function MetadataBadge({\n  icon,\n  value,\n  flexNone,\n}: {\n  icon?: (props: { size?: number; className?: string }) => JSX.Element;\n  value: string | JSX.Element;\n  flexNone?: boolean;\n}) {\n  return (\n    <div\n      className={`\n      text-xs \n      text-strong\n      flex\n      bg-accent-background-hovered \n      rounded-full \n      px-1\n      py-0.5 \n      w-fit \n      my-auto \n      select-none \n      ${flexNone ? \"flex-none\" : \"\"}`}\n    >\n      {icon &&\n        icon({\n          size: 12,\n          className: flexNone ? \"flex-none\" : \"mr-0.5 my-auto\",\n        })}\n      <p className=\"max-w-[6rem] text-ellipsis overflow-hidden truncate whitespace-nowrap\">\n        {value}\n      </p>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/MultiSelectDropdown.tsx",
    "content": "import { useState } from \"react\";\nimport { Label, ManualErrorMessage } from \"@/components/Field\";\nimport CreatableSelect from \"react-select/creatable\";\nimport Select from \"react-select\";\nimport { ErrorMessage } from \"formik\";\n\ninterface Option {\n  value: string;\n  label: string;\n}\n\ninterface MultiSelectDropdownProps {\n  name: string;\n  label: string;\n  options: Option[];\n  creatable: boolean;\n  initialSelectedOptions?: Option[];\n  direction?: \"top\" | \"bottom\";\n  onChange: (selected: Option[]) => void;\n  onCreate?: (created_name: string) => Promise<Option>;\n  error?: string;\n}\n\nconst getReactSelectStyles = () => ({\n  control: (base: any) => ({\n    ...base,\n    backgroundColor: \"var(--background-neutral-00)\",\n    borderColor: \"var(--border-03)\",\n    color: \"var(--text-04)\",\n  }),\n  menu: (base: any) => ({\n    ...base,\n    backgroundColor: \"var(--background-neutral-00)\",\n    border: \"1px solid var(--border-03)\",\n    borderRadius: \"4px\",\n    overflow: \"hidden\",\n  }),\n  menuList: (base: any) => ({\n    ...base,\n    backgroundColor: \"var(--background-neutral-00)\",\n  }),\n  option: (base: any, state: any) => ({\n    ...base,\n    backgroundColor: state.isSelected\n      ? \"var(--background-150)\"\n      : state.isFocused\n        ? \"var(--background-100)\"\n        : \"transparent\",\n    color: \"var(--text-04)\",\n  }),\n  multiValue: (base: any) => ({\n    ...base,\n    backgroundColor: \"var(--background-150)\",\n  }),\n  multiValueLabel: (base: any) => ({\n    ...base,\n    color: \"var(--text-04)\",\n  }),\n  multiValueRemove: (base: any) => ({\n    ...base,\n    color: \"var(--text-04)\",\n    \":hover\": {\n      backgroundColor: \"var(--background-200)\",\n      color: \"var(--text-04)\",\n    },\n  }),\n  input: (base: any) => ({\n    ...base,\n    color: \"var(--text-04)\",\n  }),\n  placeholder: (base: any) => ({\n    ...base,\n    color: \"var(--text-02)\",\n  }),\n  singleValue: (base: any) => ({\n    ...base,\n    color: \"var(--text-04)\",\n  }),\n});\n\nconst MultiSelectDropdown = ({\n  name,\n  label,\n  options,\n  creatable,\n  onChange,\n  onCreate,\n  error,\n  direction = \"bottom\",\n  initialSelectedOptions = [],\n}: MultiSelectDropdownProps) => {\n  const [selectedOptions, setSelectedOptions] = useState<Option[]>(\n    initialSelectedOptions\n  );\n  const [allOptions, setAllOptions] = useState<Option[]>(options);\n  const [inputValue, setInputValue] = useState(\"\");\n\n  const handleInputChange = (input: string) => {\n    setInputValue(input);\n  };\n\n  const handleChange = (selected: any) => {\n    setSelectedOptions(selected || []);\n    onChange(selected || []);\n  };\n\n  const handleCreateOption = async (inputValue: string) => {\n    if (creatable) {\n      if (!onCreate) {\n        console.error(\"onCreate is required for creatable\");\n        return;\n      }\n      try {\n        const newOption = await onCreate(inputValue);\n        if (newOption) {\n          setAllOptions([...options, newOption]);\n          setSelectedOptions([...selectedOptions, newOption]);\n          onChange([...selectedOptions, newOption]);\n        }\n      } catch (error) {\n        console.error(\"Error creating option:\", error);\n      }\n    } else {\n      return;\n    }\n  };\n\n  return (\n    <div className=\"flex flex-col text-white space-y-4 mb-4\">\n      <Label>{label}</Label>\n      {creatable ? (\n        <CreatableSelect\n          isMulti\n          options={allOptions}\n          value={selectedOptions}\n          onChange={handleChange}\n          onCreateOption={handleCreateOption}\n          onInputChange={handleInputChange}\n          inputValue={inputValue}\n          menuPlacement={direction}\n          styles={getReactSelectStyles()}\n        />\n      ) : (\n        <Select\n          isMulti\n          options={allOptions}\n          value={selectedOptions}\n          onChange={handleChange}\n          onInputChange={handleInputChange}\n          inputValue={inputValue}\n          menuPlacement={direction}\n          styles={getReactSelectStyles()}\n        />\n      )}\n      {error ? (\n        <ManualErrorMessage>{error}</ManualErrorMessage>\n      ) : (\n        <ErrorMessage\n          name={name}\n          component=\"div\"\n          className=\"text-red-500 text-sm mt-1\"\n        />\n      )}\n    </div>\n  );\n};\n\nexport default MultiSelectDropdown;\n"
  },
  {
    "path": "web/src/components/NonSelectableConnectors.tsx",
    "content": "import { ConnectorStatus } from \"@/lib/types\";\nimport { ConnectorTitle } from \"@/components/admin/connectors/ConnectorTitle\";\nimport { Content } from \"@opal/layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgLock } from \"@opal/icons\";\ninterface NonSelectableConnectorsProps {\n  connectors: ConnectorStatus<any, any>[];\n  title: string;\n  description: string;\n}\n\nexport const NonSelectableConnectors = ({\n  connectors,\n  title,\n  description,\n}: NonSelectableConnectorsProps) => {\n  if (connectors.length === 0) {\n    return null;\n  }\n\n  return (\n    <div className=\"mt-6 mb-4\">\n      <Content\n        title={title}\n        description={description}\n        sizePreset=\"main-content\"\n        variant=\"section\"\n      />\n\n      <div className=\"p-3 border border-dashed border-border-02 rounded-12 bg-background-neutral-01\">\n        <div className=\"mb-2 flex items-center gap-1.5\">\n          <SvgLock className=\"h-3.5 w-3.5 stroke-text-03\" />\n          <Text as=\"p\" figureSmallLabel text04 className=\"!mb-0\">\n            Unavailable connectors:\n          </Text>\n        </div>\n        <div className=\"flex flex-wrap gap-1.5\">\n          {connectors.map((connector) => (\n            <div\n              key={`${connector.connector.id}-${connector.credential.id}`}\n              className=\"flex items-center px-2 py-1 cursor-not-allowed opacity-80 bg-background-neutral-00 border border-border-02 rounded-12 text-xs\"\n            >\n              <div className=\"flex items-center max-w-[200px] text-xs\">\n                <ConnectorTitle\n                  connector={connector.connector}\n                  ccPairId={connector.cc_pair_id}\n                  ccPairName={connector.name}\n                  isLink={false}\n                  showMetadata={false}\n                />\n              </div>\n            </div>\n          ))}\n        </div>\n      </div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/OnyxInitializingLoader.tsx",
    "content": "\"use client\";\n\nimport { useContext } from \"react\";\nimport Logo from \"@/refresh-components/Logo\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\n\nexport default function OnyxInitializingLoader() {\n  const settings = useContext(SettingsContext);\n\n  return (\n    <div className=\"mx-auto my-auto animate-pulse\">\n      <Logo folded size={96} className=\"mx-auto mb-3\" />\n      <p className=\"text-lg text-text font-semibold\">\n        Initializing {settings?.enterpriseSettings?.application_name ?? \"Onyx\"}\n      </p>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/PageSelector.tsx",
    "content": "import React from \"react\";\n\nconst PAGINATION_OPTIONS_ON_EACH_SIDE = 2;\n\nconst getPaginationOptions = (\n  currentPage: number,\n  pageCount: number\n): number[] => {\n  const paginationOptions = [currentPage];\n  // if (currentPage !== 1) {\n  //   paginationOptions.push(currentPage)\n  // }\n\n  let offset = 1;\n\n  // Add one because currentPage is included\n  const maxPaginationOptions = PAGINATION_OPTIONS_ON_EACH_SIDE * 2 + 1;\n  while (paginationOptions.length < maxPaginationOptions) {\n    let added = false;\n    if (currentPage + offset <= pageCount) {\n      paginationOptions.push(currentPage + offset);\n      added = true;\n    }\n    if (currentPage - offset >= 1) {\n      paginationOptions.unshift(currentPage - offset);\n      added = true;\n    }\n    if (!added) {\n      break;\n    }\n    offset++;\n  }\n\n  return paginationOptions;\n};\n\nconst scrollUp = () => {\n  setTimeout(() => window.scrollTo({ top: 0 }), 50);\n};\n\ntype PageLinkProps = {\n  linkText: string | number;\n  pageChangeHandler?: () => void;\n  active?: boolean;\n  unclickable?: boolean;\n};\n\nconst PageLink = ({\n  linkText,\n  pageChangeHandler,\n  active,\n  unclickable,\n}: PageLinkProps) => (\n  <div\n    className={`\n    select-none\n    inline-block\n    text-sm\n    border\n    px-3\n    py-1\n    leading-5\n    -ml-px\n    border-border\n    ${unclickable ? \"text-text-200\" : \"\"}\n    ${!unclickable ? \"hover:bg-accent-background-hovered\" : \"\"}\n    ${!unclickable ? \"cursor-pointer\" : \"\"}\n    first:ml-0\n    first:rounded-l-md\n    last:rounded-r-md\n    ${active ? \"bg-background-200\" : \"\"}\n  `}\n    onClick={() => {\n      if (pageChangeHandler) {\n        pageChangeHandler();\n      }\n    }}\n  >\n    {linkText}\n  </div>\n);\n\nexport interface PageSelectorProps {\n  currentPage: number;\n  totalPages: number;\n  onPageChange: (newPage: number) => void;\n  shouldScroll?: boolean;\n}\n\nexport const PageSelector = ({\n  currentPage,\n  totalPages,\n  onPageChange,\n  shouldScroll = false,\n}: PageSelectorProps) => {\n  const paginationOptions = getPaginationOptions(currentPage, totalPages);\n  const modifiedScrollUp = () => {\n    if (shouldScroll) {\n      scrollUp();\n    }\n  };\n\n  return (\n    <div style={{ display: \"inline-block\" }}>\n      <PageLink\n        linkText=\"‹\"\n        unclickable={currentPage === 1}\n        pageChangeHandler={() => {\n          onPageChange(Math.max(currentPage - 1, 1));\n          modifiedScrollUp();\n        }}\n      />\n      {!paginationOptions.includes(1) && (\n        <>\n          <PageLink\n            linkText=\"1\"\n            active={currentPage === 1}\n            pageChangeHandler={() => {\n              onPageChange(1);\n              modifiedScrollUp();\n            }}\n          />\n          <PageLink linkText=\"...\" unclickable={true} />\n        </>\n      )}\n      {(!paginationOptions.includes(1)\n        ? paginationOptions.slice(2)\n        : paginationOptions\n      ).map((page) => {\n        return (\n          <PageLink\n            key={page}\n            active={page === currentPage}\n            linkText={page}\n            pageChangeHandler={() => {\n              onPageChange(page);\n              modifiedScrollUp();\n            }}\n          />\n        );\n      })}\n      <PageLink\n        linkText=\"›\"\n        unclickable={currentPage === totalPages}\n        pageChangeHandler={() => {\n          onPageChange(Math.min(currentPage + 1, totalPages));\n          modifiedScrollUp();\n        }}\n      />\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/RichTextSubtext.tsx",
    "content": "import React from \"react\";\n\ninterface RichTextSubtextProps {\n  text: string;\n  className?: string;\n}\n\n/**\n * Component that renders text with clickable links.\n * Detects URLs in the text and converts them to clickable links.\n * Also supports markdown-style links like [text](url).\n * NOTE: we should be careful not to use this component in a way that displays text from external sources\n * because it could be used to create links to malicious sites. Right now it's just used to make links\n * to our docs in connector setup pages\n */\nexport function RichTextSubtext({\n  text,\n  className = \"\",\n}: RichTextSubtextProps) {\n  // Function to parse text and create React elements\n  const parseText = (input: string): React.ReactNode[] => {\n    const elements: React.ReactNode[] = [];\n\n    // Regex to match markdown links [text](url) and plain URLs\n    const combinedRegex = /(\\[([^\\]]+)\\]\\(([^)]+)\\))|(https?:\\/\\/[^\\s]+)/g;\n\n    let lastIndex = 0;\n    let match;\n    let key = 0;\n\n    while ((match = combinedRegex.exec(input)) !== null) {\n      // Add text before the match\n      if (match.index > lastIndex) {\n        elements.push(\n          <span key={`text-${key++}`}>\n            {input.slice(lastIndex, match.index)}\n          </span>\n        );\n      }\n\n      if (match[1]) {\n        // Markdown-style link [text](url)\n        const linkText = match[2];\n        const url = match[3];\n        elements.push(\n          <a\n            key={`link-${key++}`}\n            href={url}\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            className=\"text-link hover:text-link-hover underline\"\n            onClick={(e) => e.stopPropagation()}\n          >\n            {linkText}\n          </a>\n        );\n      } else if (match[4]) {\n        // Plain URL\n        const url = match[4];\n        elements.push(\n          <a\n            key={`link-${key++}`}\n            href={url}\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            className=\"text-link hover:text-link-hover underline\"\n            onClick={(e) => e.stopPropagation()}\n          >\n            {url}\n          </a>\n        );\n      }\n\n      lastIndex = match.index + match[0].length;\n    }\n\n    // Add remaining text after the last match\n    if (lastIndex < input.length) {\n      elements.push(\n        <span key={`text-${key++}`}>{input.slice(lastIndex)}</span>\n      );\n    }\n\n    return elements;\n  };\n\n  return <div className={className}>{parseText(text)}</div>;\n}\n"
  },
  {
    "path": "web/src/components/SSRAutoRefresh.tsx",
    "content": "\"use client\";\n\nimport { useRouter } from \"next/navigation\";\nimport { useEffect } from \"react\";\n\n// NOTE: this is causing crashes due to `ECONNRESET` and `UND_ERR_SOCKET`\n// during the server-side fetch. Should not be used until this is resolved.\n// export function SSRAutoRefresh({ refreshFreq = 5 }: { refreshFreq?: number }) {\n//   // Helper which automatically refreshes a SSR page X seconds\n//   const router = useRouter();\n\n//   useEffect(() => {\n//     const interval = setInterval(() => {\n//       router.refresh();\n//     }, refreshFreq * 1000);\n\n//     return () => clearInterval(interval);\n//   }, []);\n\n//   return <></>;\n// }\n\nexport function InstantSSRAutoRefresh() {\n  const router = useRouter();\n\n  useEffect(() => {\n    router.refresh();\n  }, [router]);\n\n  return <></>;\n}\n"
  },
  {
    "path": "web/src/components/SearchResultIcon.tsx",
    "content": "\"use client\";\nimport { useState, useEffect } from \"react\";\nimport faviconFetch from \"favicon-fetch\";\nimport { SourceIcon } from \"./SourceIcon\";\nimport { ValidSources } from \"@/lib/types\";\nimport { OnyxIcon } from \"./icons/icons\";\n\nconst CACHE_DURATION = 24 * 60 * 60 * 1000;\n\nexport async function getFaviconUrl(url: string): Promise<string | null> {\n  const getCachedFavicon = () => {\n    const cachedData = localStorage.getItem(`favicon_${url}`);\n    if (cachedData) {\n      const { favicon, timestamp } = JSON.parse(cachedData);\n      if (Date.now() - timestamp < CACHE_DURATION) {\n        return favicon;\n      }\n    }\n    return null;\n  };\n\n  const cachedFavicon = getCachedFavicon();\n  if (cachedFavicon) {\n    return cachedFavicon;\n  }\n\n  const newFaviconUrl = await faviconFetch({ uri: url });\n  if (newFaviconUrl) {\n    localStorage.setItem(\n      `favicon_${url}`,\n      JSON.stringify({ favicon: newFaviconUrl, timestamp: Date.now() })\n    );\n    return newFaviconUrl;\n  }\n\n  return null;\n}\n\nexport function SearchResultIcon({ url }: { url: string }) {\n  const [faviconUrl, setFaviconUrl] = useState<string | null>(null);\n\n  useEffect(() => {\n    getFaviconUrl(url).then((favicon) => {\n      if (favicon) {\n        setFaviconUrl(favicon);\n      }\n    });\n  }, [url]);\n\n  if (!faviconUrl) {\n    return <SourceIcon sourceType={ValidSources.Web} iconSize={18} />;\n  }\n  if (url.includes(\"onyx.app\")) {\n    return <OnyxIcon size={18} className=\"dark:text-[#fff] text-[#000]\" />;\n  }\n\n  return (\n    <div className=\"rounded-full w-[18px] h-[18px] overflow-hidden bg-background-200\">\n      <img\n        height={18}\n        width={18}\n        className=\"rounded-full w-full h-full object-cover\"\n        src={faviconUrl}\n        alt=\"favicon\"\n        onError={(e) => {\n          e.currentTarget.onerror = null;\n        }}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/SourceIcon.tsx",
    "content": "\"use client\";\n\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { ValidSources } from \"@/lib/types\";\n\nexport interface SourceIconProps {\n  sourceType: ValidSources;\n  iconSize: number;\n}\n\nexport function SourceIcon({ sourceType, iconSize }: SourceIconProps) {\n  return getSourceMetadata(sourceType).icon({\n    size: iconSize,\n    className: \"text-text-04\",\n  });\n}\n"
  },
  {
    "path": "web/src/components/SourceTile.tsx",
    "content": "import { SourceIcon } from \"@/components/SourceIcon\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { SourceMetadata } from \"@/lib/search/interfaces\";\nimport React from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface SourceTileProps {\n  sourceMetadata: SourceMetadata;\n  preSelect?: boolean;\n  navigationUrl: string;\n  hasExistingSlackCredentials: boolean;\n}\n\nexport default function SourceTile({\n  sourceMetadata,\n  preSelect,\n  navigationUrl,\n}: SourceTileProps) {\n  return (\n    <Link\n      className={`flex\n              flex-col\n              items-center\n              justify-center\n              p-4\n              rounded-lg\n              w-40\n              cursor-pointer\n              shadow-md\n              bg-background-tint-00\n              hover:bg-background-tint-02\n              relative\n              ${preSelect ? \"subtle-pulse\" : \"\"}\n            `}\n      href={navigationUrl as Route}\n    >\n      <SourceIcon sourceType={sourceMetadata.internalName} iconSize={24} />\n      <Text as=\"p\" className=\"pt-2\">\n        {sourceMetadata.displayName}\n      </Text>\n    </Link>\n  );\n}\n"
  },
  {
    "path": "web/src/components/Spinner.tsx",
    "content": "import \"./spinner.css\";\n\nexport const Spinner = () => {\n  return (\n    <div className=\"fixed top-0 left-0 z-50 w-screen h-screen bg-[#000] bg-opacity-50 flex items-center justify-center\">\n      <div className=\"loader ease-linear rounded-full border-8 border-t-8 border-background-200 h-8 w-8\"></div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/Status.tsx",
    "content": "\"use client\";\n\nimport { ValidStatuses } from \"@/lib/types\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { timeAgo } from \"@/lib/time\";\nimport {\n  FiAlertTriangle,\n  FiCheckCircle,\n  FiClock,\n  FiMinus,\n  FiPauseCircle,\n} from \"react-icons/fi\";\nimport {\n  ConnectorCredentialPairStatus,\n  PermissionSyncStatusEnum,\n} from \"@/app/admin/connector/[ccPairId]/types\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\n\nexport function IndexAttemptStatus({\n  status,\n  errorMsg,\n}: {\n  status: ValidStatuses | null;\n  errorMsg?: string | null;\n}) {\n  let badge;\n\n  if (status === \"failed\") {\n    const icon = (\n      <Badge variant=\"destructive\" icon={FiAlertTriangle}>\n        Failed\n      </Badge>\n    );\n    if (errorMsg) {\n      badge = (\n        <SimpleTooltip tooltip={errorMsg}>\n          <div className=\"cursor-pointer\">{icon}</div>\n        </SimpleTooltip>\n      );\n    } else {\n      badge = icon;\n    }\n  } else if (status === \"completed_with_errors\") {\n    badge = (\n      <Badge variant=\"secondary\" icon={FiAlertTriangle}>\n        Completed with errors\n      </Badge>\n    );\n  } else if (status === \"success\") {\n    badge = (\n      <Badge variant=\"success\" icon={FiCheckCircle}>\n        Succeeded\n      </Badge>\n    );\n  } else if (status === \"in_progress\") {\n    badge = (\n      <Badge variant=\"in_progress\" icon={FiClock}>\n        In Progress\n      </Badge>\n    );\n  } else if (status === \"not_started\") {\n    badge = (\n      <Badge variant=\"not_started\" icon={FiClock}>\n        Scheduled\n      </Badge>\n    );\n  } else if (status === \"canceled\") {\n    badge = (\n      <Badge variant=\"canceled\" icon={FiClock}>\n        Canceled\n      </Badge>\n    );\n  } else if (status === \"invalid\") {\n    badge = (\n      <Badge variant=\"invalid\" icon={FiAlertTriangle}>\n        Invalid\n      </Badge>\n    );\n  } else {\n    badge = (\n      <Badge variant=\"outline\" icon={FiMinus}>\n        None\n      </Badge>\n    );\n  }\n\n  return <div>{badge}</div>;\n}\n\nexport function PermissionSyncStatus({\n  status,\n  errorMsg,\n}: {\n  status: PermissionSyncStatusEnum | null;\n  errorMsg?: string | null;\n}) {\n  let badge;\n\n  if (status === PermissionSyncStatusEnum.FAILED) {\n    const icon = (\n      <Badge variant=\"destructive\" icon={FiAlertTriangle}>\n        Failed\n      </Badge>\n    );\n    if (errorMsg) {\n      badge = (\n        <SimpleTooltip tooltip={errorMsg} side=\"bottom\">\n          <div className=\"cursor-pointer\">{icon}</div>\n        </SimpleTooltip>\n      );\n    } else {\n      badge = icon;\n    }\n  } else if (status === PermissionSyncStatusEnum.COMPLETED_WITH_ERRORS) {\n    badge = (\n      <Badge variant=\"secondary\" icon={FiAlertTriangle}>\n        Completed with errors\n      </Badge>\n    );\n  } else if (status === PermissionSyncStatusEnum.SUCCESS) {\n    badge = (\n      <Badge variant=\"success\" icon={FiCheckCircle}>\n        Succeeded\n      </Badge>\n    );\n  } else if (status === PermissionSyncStatusEnum.IN_PROGRESS) {\n    badge = (\n      <Badge variant=\"in_progress\" icon={FiClock}>\n        In Progress\n      </Badge>\n    );\n  } else if (status === PermissionSyncStatusEnum.NOT_STARTED) {\n    badge = (\n      <Badge variant=\"not_started\" icon={FiClock}>\n        Scheduled\n      </Badge>\n    );\n  } else {\n    badge = (\n      <Badge variant=\"secondary\" icon={FiClock}>\n        Not Started\n      </Badge>\n    );\n  }\n\n  return <div>{badge}</div>;\n}\n\nexport function CCPairStatus({\n  ccPairStatus,\n  inRepeatedErrorState,\n  lastIndexAttemptStatus,\n  size = \"md\",\n}: {\n  ccPairStatus: ConnectorCredentialPairStatus;\n  inRepeatedErrorState: boolean;\n  lastIndexAttemptStatus: ValidStatuses | undefined | null;\n  size?: \"xs\" | \"sm\" | \"md\" | \"lg\";\n}) {\n  let badge;\n\n  if (ccPairStatus == ConnectorCredentialPairStatus.DELETING) {\n    badge = (\n      <Badge variant=\"destructive\" icon={FiAlertTriangle}>\n        Deleting\n      </Badge>\n    );\n  } else if (ccPairStatus == ConnectorCredentialPairStatus.PAUSED) {\n    badge = (\n      <Badge variant=\"paused\" icon={FiPauseCircle}>\n        Paused\n      </Badge>\n    );\n  } else if (inRepeatedErrorState) {\n    badge = (\n      <Badge variant=\"destructive\" icon={FiAlertTriangle}>\n        Error\n      </Badge>\n    );\n  } else if (ccPairStatus == ConnectorCredentialPairStatus.SCHEDULED) {\n    badge = (\n      <Badge variant=\"not_started\" icon={FiClock}>\n        Scheduled\n      </Badge>\n    );\n  } else if (ccPairStatus == ConnectorCredentialPairStatus.INITIAL_INDEXING) {\n    badge = (\n      <Badge variant=\"in_progress\" icon={FiClock}>\n        Initial Indexing\n      </Badge>\n    );\n  } else if (ccPairStatus == ConnectorCredentialPairStatus.INVALID) {\n    badge = (\n      <Badge\n        tooltip=\"Connector is in an invalid state. Please update the credentials or create a new connector.\"\n        circle\n        variant=\"invalid\"\n      >\n        Invalid\n      </Badge>\n    );\n  } else {\n    if (lastIndexAttemptStatus && lastIndexAttemptStatus === \"in_progress\") {\n      badge = (\n        <Badge variant=\"in_progress\" icon={FiClock}>\n          Indexing\n        </Badge>\n      );\n    } else if (\n      lastIndexAttemptStatus &&\n      lastIndexAttemptStatus === \"not_started\"\n    ) {\n      badge = (\n        <Badge variant=\"not_started\" icon={FiClock}>\n          Scheduled\n        </Badge>\n      );\n    } else if (\n      lastIndexAttemptStatus &&\n      lastIndexAttemptStatus === \"canceled\"\n    ) {\n      badge = (\n        <Badge variant=\"canceled\" icon={FiClock}>\n          Canceled\n        </Badge>\n      );\n    } else {\n      badge = (\n        <Badge variant=\"success\" icon={FiCheckCircle}>\n          Indexed\n        </Badge>\n      );\n    }\n  }\n\n  return <div>{badge}</div>;\n}\n"
  },
  {
    "path": "web/src/components/WebResultIcon.tsx",
    "content": "\"use client\";\n\nimport { ValidSources } from \"@/lib/types\";\nimport { SourceIcon } from \"./SourceIcon\";\nimport { useState } from \"react\";\nimport { OnyxIcon } from \"./icons/icons\";\n\nexport function WebResultIcon({\n  url,\n  size = 18,\n}: {\n  url: string;\n  size?: number;\n}) {\n  const [error, setError] = useState(false);\n  let hostname;\n  try {\n    hostname = new URL(url).hostname;\n  } catch (e) {\n    hostname = \"onyx.app\";\n  }\n  return (\n    <>\n      {hostname.includes(\"onyx.app\") ? (\n        <OnyxIcon size={size} className=\"dark:text-[#fff] text-[#000]\" />\n      ) : !error ? (\n        <img\n          className=\"my-0 rounded-full py-0\"\n          src={`https://t3.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://${hostname}&size=128`}\n          alt=\"favicon\"\n          height={size}\n          onError={() => setError(true)}\n          width={size}\n          style={{\n            height: `${size}px`,\n            width: `${size}px`,\n            background: \"transparent\",\n          }}\n        />\n      ) : (\n        <SourceIcon sourceType={ValidSources.Web} iconSize={size} />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/CardSection.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nexport interface CardSectionProps {\n  className?: string;\n  children?: React.ReactNode;\n}\n\n// Used for all admin page sections\nexport default function CardSection({ children, className }: CardSectionProps) {\n  return (\n    <div\n      className={cn(\n        \"p-6 bg-background-neutral-00 rounded-16 border\",\n        className\n      )}\n    >\n      {children}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/ClientLayout.tsx",
    "content": "\"use client\";\n\nimport AdminSidebar from \"@/sections/sidebar/AdminSidebar\";\nimport { usePathname } from \"next/navigation\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { ApplicationStatus } from \"@/interfaces/settings\";\nimport { Button } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nexport interface ClientLayoutProps {\n  children: React.ReactNode;\n  enableCloud: boolean;\n}\n\n// TODO (@raunakab): Migrate ALL admin pages to use SettingsLayouts from\n// `@/layouts/settings-layouts`. Once every page manages its own layout,\n// the `py-10 px-4 md:px-12` padding below can be removed entirely and\n// this prefix list can be deleted.\nconst SETTINGS_LAYOUT_PREFIXES = [\n  ADMIN_ROUTES.CHAT_PREFERENCES.path,\n  ADMIN_ROUTES.IMAGE_GENERATION.path,\n  ADMIN_ROUTES.WEB_SEARCH.path,\n  ADMIN_ROUTES.MCP_ACTIONS.path,\n  ADMIN_ROUTES.OPENAPI_ACTIONS.path,\n  ADMIN_ROUTES.BILLING.path,\n  ADMIN_ROUTES.INDEX_MIGRATION.path,\n  ADMIN_ROUTES.DISCORD_BOTS.path,\n  ADMIN_ROUTES.THEME.path,\n  ADMIN_ROUTES.LLM_MODELS.path,\n  ADMIN_ROUTES.AGENTS.path,\n  ADMIN_ROUTES.USERS.path,\n  ADMIN_ROUTES.TOKEN_RATE_LIMITS.path,\n  ADMIN_ROUTES.INDEX_SETTINGS.path,\n  ADMIN_ROUTES.DOCUMENT_PROCESSING.path,\n  ADMIN_ROUTES.CODE_INTERPRETER.path,\n  ADMIN_ROUTES.API_KEYS.path,\n  ADMIN_ROUTES.ADD_CONNECTOR.path,\n  ADMIN_ROUTES.INDEXING_STATUS.path,\n  ADMIN_ROUTES.DOCUMENTS.path,\n  ADMIN_ROUTES.DEBUG.path,\n  ADMIN_ROUTES.KNOWLEDGE_GRAPH.path,\n  ADMIN_ROUTES.SLACK_BOTS.path,\n  ADMIN_ROUTES.STANDARD_ANSWERS.path,\n  ADMIN_ROUTES.GROUPS.path,\n  ADMIN_ROUTES.PERFORMANCE.path,\n  ADMIN_ROUTES.SCIM.path,\n  ADMIN_ROUTES.VOICE.path,\n];\n\nexport function ClientLayout({ children, enableCloud }: ClientLayoutProps) {\n  const pathname = usePathname();\n  const settings = useSettingsContext();\n\n  // Certain admin panels have their own custom sidebar.\n  // For those pages, we skip rendering the default `AdminSidebar` and let those individual pages render their own.\n  const hasCustomSidebar =\n    pathname.startsWith(\"/admin/connectors\") ||\n    pathname.startsWith(\"/admin/embeddings\");\n\n  // Pages using SettingsLayouts handle their own padding/centering.\n  const hasOwnLayout = SETTINGS_LAYOUT_PREFIXES.some((prefix) =>\n    pathname.startsWith(prefix)\n  );\n\n  return (\n    <div className=\"h-screen w-screen flex overflow-hidden\">\n      {settings.settings.application_status ===\n        ApplicationStatus.PAYMENT_REMINDER && (\n        <div className=\"fixed top-2 left-1/2 transform -translate-x-1/2 bg-amber-400 dark:bg-amber-500 text-gray-900 dark:text-gray-100 p-4 rounded-lg shadow-lg z-50 max-w-md text-center\">\n          <strong className=\"font-bold\">Warning:</strong> Your trial ends in\n          less than 5 days and no payment method has been added.\n          <div className=\"mt-2\">\n            <Button width=\"full\" href=\"/admin/billing\">\n              Update Billing Information\n            </Button>\n          </div>\n        </div>\n      )}\n\n      {hasCustomSidebar ? (\n        <div className=\"flex-1 min-w-0 min-h-0 overflow-y-auto\">{children}</div>\n      ) : (\n        <>\n          <AdminSidebar enableCloudSS={enableCloud} />\n          <div\n            data-main-container\n            className={cn(\n              \"flex flex-1 flex-col min-w-0 min-h-0 overflow-y-auto\",\n              !hasOwnLayout && \"py-10 px-4 md:px-12\"\n            )}\n          >\n            {children}\n          </div>\n        </>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/Layout.tsx",
    "content": "import { redirect } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { requireAdminAuth } from \"@/lib/auth/requireAuth\";\nimport { ClientLayout } from \"./ClientLayout\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { AnnouncementBanner } from \"../header/AnnouncementBanner\";\n\nexport interface LayoutProps {\n  children: React.ReactNode;\n}\n\nexport default async function Layout({ children }: LayoutProps) {\n  // Check authentication and admin role - data fetching is done client-side via SWR hooks\n  const authResult = await requireAdminAuth();\n\n  // If auth check returned a redirect, redirect immediately\n  if (authResult.redirect) {\n    return redirect(authResult.redirect as Route);\n  }\n\n  return (\n    <ClientLayout enableCloud={NEXT_PUBLIC_CLOUD_ENABLED}>\n      <AnnouncementBanner />\n      {children}\n    </ClientLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/Title.tsx",
    "content": "\"use client\";\n\nimport { JSX } from \"react\";\nimport Separator from \"@/refresh-components/Separator\";\nimport type { IconProps } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport interface AdminPageTitleProps {\n  icon: React.FunctionComponent<IconProps> | React.ReactNode;\n  title: string | JSX.Element;\n  farRightElement?: JSX.Element;\n  includeDivider?: boolean;\n}\n\nexport function AdminPageTitle({\n  icon: Icon,\n  title,\n  farRightElement,\n  includeDivider = true,\n}: AdminPageTitleProps) {\n  return (\n    <div className=\"w-full\">\n      <div className=\"w-full flex flex-row justify-between\">\n        <div className=\"flex flex-row gap-2\">\n          {typeof Icon === \"function\" ? (\n            <Icon className=\"stroke-text-04 h-8 w-8\" />\n          ) : (\n            Icon\n          )}\n          <Text headingH2 aria-label=\"admin-page-title\">\n            {title}\n          </Text>\n        </div>\n        {farRightElement}\n      </div>\n      {includeDivider ? <Separator /> : <div className=\"mb-6\" />}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/AccessTypeForm.tsx",
    "content": "import { DefaultDropdown } from \"@/components/Dropdown\";\nimport {\n  AccessType,\n  ValidAutoSyncSource,\n  ConfigurableSources,\n  validAutoSyncSources,\n} from \"@/lib/types\";\nimport { useField } from \"formik\";\nimport { AutoSyncOptions } from \"./AutoSyncOptions\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useEffect, useMemo } from \"react\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { credentialTemplates } from \"@/lib/connectors/credentials\";\n\nfunction isValidAutoSyncSource(\n  value: ConfigurableSources\n): value is ValidAutoSyncSource {\n  return validAutoSyncSources.includes(value as ValidAutoSyncSource);\n}\n\nexport function AccessTypeForm({\n  connector,\n  currentCredential,\n}: {\n  connector: ConfigurableSources;\n  currentCredential?: Credential<any> | null;\n}) {\n  const [access_type, meta, access_type_helpers] =\n    useField<AccessType>(\"access_type\");\n\n  const isPaidEnterpriseEnabled = usePaidEnterpriseFeaturesEnabled();\n  const isAutoSyncSupported = isValidAutoSyncSource(connector);\n\n  const selectedAuthMethod = currentCredential?.credential_json?.[\n    \"authentication_method\"\n  ] as string | undefined;\n\n  // If the selected auth method is one that disables sync, return true\n  const isSyncDisabledByAuth = useMemo(() => {\n    const template = (credentialTemplates as any)[connector];\n    const authMethods = template?.authMethods as\n      | { value: string; disablePermSync?: boolean }[]\n      | undefined; // auth methods are returned as an array of objects with a value and disablePermSync property\n    if (!authMethods || !selectedAuthMethod) return false;\n    const method = authMethods.find((m) => m.value === selectedAuthMethod);\n    return method?.disablePermSync === true;\n  }, [connector, selectedAuthMethod]);\n\n  useEffect(\n    () => {\n      // Only set default value if access_type.value is not already set\n      if (!access_type.value) {\n        if (!isPaidEnterpriseEnabled) {\n          access_type_helpers.setValue(\"public\");\n        } else if (isAutoSyncSupported) {\n          access_type_helpers.setValue(\"sync\");\n        } else {\n          access_type_helpers.setValue(\"private\");\n        }\n      }\n    },\n    [\n      // Only run this effect once when the component mounts\n      // eslint-disable-next-line react-hooks/exhaustive-deps\n    ]\n  );\n\n  const options = [\n    {\n      name: \"Private\",\n      value: \"private\",\n      description:\n        \"Only users who have explicitly been given access to this connector (through the User Groups page) can access the documents pulled in by this connector\",\n      disabled: false,\n      disabledReason: \"\",\n    },\n    {\n      name: \"Public\",\n      value: \"public\",\n      description:\n        \"Everyone with an account on Onyx can access the documents pulled in by this connector\",\n      disabled: false,\n      disabledReason: \"\",\n    },\n  ];\n\n  if (isAutoSyncSupported && isPaidEnterpriseEnabled) {\n    options.push({\n      name: \"Auto Sync Permissions\",\n      value: \"sync\",\n      description:\n        \"We will automatically sync permissions from the source. A document will be searchable in Onyx if and only if the user performing the search has permission to access the document in the source.\",\n      disabled: isSyncDisabledByAuth,\n      disabledReason:\n        \"Current credential auth method doesn't support Auto Sync Permissions. Please change the credential auth method to a supported one.\",\n    });\n  }\n\n  return (\n    <>\n      {isPaidEnterpriseEnabled && (\n        <>\n          <div>\n            <label className=\"text-text-950 font-medium\">Document Access</label>\n            <p className=\"text-sm text-text-500\">\n              Control who has access to the documents indexed by this connector.\n            </p>\n          </div>\n          <DefaultDropdown\n            options={options}\n            selected={access_type.value}\n            onSelect={(selected) => {\n              access_type_helpers.setValue(selected as AccessType);\n            }}\n            includeDefault={false}\n          />\n          {access_type.value === \"sync\" && isAutoSyncSupported && (\n            <AutoSyncOptions connectorType={connector as ValidAutoSyncSource} />\n          )}\n        </>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/AccessTypeGroupSelector.tsx",
    "content": "import { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport React, { useState, useEffect } from \"react\";\nimport { FieldArray, ArrayHelpers, ErrorMessage, useField } from \"formik\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { UserGroup, UserRole } from \"@/lib/types\";\nimport { useUserGroups } from \"@/lib/hooks\";\nimport {\n  AccessType,\n  ValidAutoSyncSource,\n  ConfigurableSources,\n  validAutoSyncSources,\n} from \"@/lib/types\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { SvgUsers } from \"@opal/icons\";\nfunction isValidAutoSyncSource(\n  value: ConfigurableSources\n): value is ValidAutoSyncSource {\n  return validAutoSyncSources.includes(value as ValidAutoSyncSource);\n}\n\n// This should be included for all forms that require groups / public access\n// to be set, and access to this / permissioning should be handled within this component itself.\n\nexport type AccessTypeGroupSelectorFormType = {\n  access_type: AccessType;\n  groups: number[];\n};\n\nexport function AccessTypeGroupSelector({\n  connector,\n}: {\n  connector: ConfigurableSources;\n}) {\n  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();\n  const { isAdmin, user, isCurator } = useUser();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const [shouldHideContent, setShouldHideContent] = useState(false);\n  const isAutoSyncSupported = isValidAutoSyncSource(connector);\n\n  const [access_type, meta, access_type_helpers] =\n    useField<AccessType>(\"access_type\");\n  const [groups, groups_meta, groups_helpers] = useField<number[]>(\"groups\");\n\n  useEffect(() => {\n    if (user && userGroups && isPaidEnterpriseFeaturesEnabled) {\n      const isUserAdmin = user.role === UserRole.ADMIN;\n      if (!isPaidEnterpriseFeaturesEnabled) {\n        access_type_helpers.setValue(\"public\");\n        return;\n      }\n\n      // Only set default access type if it's not already set, to avoid overriding user selections\n      if (!access_type.value && !isUserAdmin && !isAutoSyncSupported) {\n        access_type_helpers.setValue(\"private\");\n      }\n\n      if (\n        access_type.value === \"private\" &&\n        userGroups.length === 1 &&\n        userGroups[0] !== undefined &&\n        !isUserAdmin\n      ) {\n        groups_helpers.setValue([userGroups[0].id]);\n        setShouldHideContent(true);\n      } else if (access_type.value !== \"private\") {\n        // If the access type is public or sync, empty the groups selection\n        groups_helpers.setValue([]);\n        setShouldHideContent(false);\n      } else {\n        setShouldHideContent(false);\n      }\n    }\n  }, [\n    user,\n    userGroups,\n    access_type.value,\n    access_type_helpers,\n    groups_helpers,\n    isPaidEnterpriseFeaturesEnabled,\n    isAutoSyncSupported,\n  ]);\n\n  if (userGroupsIsLoading) {\n    return <div>Loading...</div>;\n  }\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return null;\n  }\n\n  if (shouldHideContent) {\n    return (\n      <>\n        {userGroups && userGroups[0] !== undefined && (\n          <div className=\"mb-1 font-medium text-base\">\n            This Connector will be assigned to group <b>{userGroups[0].name}</b>\n            .\n          </div>\n        )}\n      </>\n    );\n  }\n\n  return (\n    <div>\n      {(access_type.value === \"private\" || isCurator) &&\n        userGroups &&\n        userGroups?.length > 0 && (\n          <>\n            <Separator />\n            <div className=\"flex flex-col gap-3 pt-4\">\n              <Text as=\"p\" mainUiAction text05>\n                Assign group access for this Connector\n              </Text>\n              {userGroupsIsLoading ? (\n                <div className=\"animate-pulse bg-background-200 h-8 w-32 rounded\" />\n              ) : (\n                <Text as=\"p\" mainUiMuted text03>\n                  {isAdmin\n                    ? \"This Connector will be visible/accessible by the groups selected below\"\n                    : \"Curators must select one or more groups to give access to this Connector\"}\n                </Text>\n              )}\n            </div>\n            <FieldArray\n              name=\"groups\"\n              render={(arrayHelpers: ArrayHelpers) => (\n                <div className=\"flex flex-wrap gap-2 py-4\">\n                  {userGroupsIsLoading ? (\n                    <div className=\"animate-pulse bg-background-200 h-8 w-32 rounded\"></div>\n                  ) : (\n                    userGroups &&\n                    userGroups.map((userGroup: UserGroup) => {\n                      const ind = groups.value.indexOf(userGroup.id);\n                      let isSelected = ind !== -1;\n                      return (\n                        <Button\n                          variant={isSelected ? \"action\" : \"default\"}\n                          key={userGroup.id}\n                          icon={SvgUsers}\n                          onClick={() => {\n                            if (isSelected) {\n                              arrayHelpers.remove(ind);\n                            } else {\n                              arrayHelpers.push(userGroup.id);\n                            }\n                          }}\n                        >\n                          {userGroup.name}\n                        </Button>\n                      );\n                    })\n                  )}\n                </div>\n              )}\n            />\n            <ErrorMessage\n              name=\"groups\"\n              component=\"div\"\n              className=\"text-error text-sm mt-1\"\n            />\n          </>\n        )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/AutoSyncOptions.tsx",
    "content": "import { TextFormField } from \"@/components/Field\";\nimport { ValidAutoSyncSource } from \"@/lib/types\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { autoSyncConfigBySource } from \"@/lib/connectors/AutoSyncOptionFields\";\n\nexport function AutoSyncOptions({\n  connectorType,\n}: {\n  connectorType: ValidAutoSyncSource;\n}) {\n  const autoSyncConfig = autoSyncConfigBySource[connectorType];\n\n  if (Object.keys(autoSyncConfig).length === 0) {\n    return null;\n  }\n\n  return (\n    <div>\n      <Separator />\n      {Object.entries(autoSyncConfig).map(([key, config]) => (\n        <div key={key} className=\"mb-4\">\n          <TextFormField\n            name={`auto_sync_options.${key}`}\n            label={config.label}\n            subtext={config.subtext}\n          />\n        </div>\n      ))}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/BasicTable.tsx",
    "content": "import React, { FC, JSX } from \"react\";\n\ntype Column = {\n  header: string;\n  key: string;\n  width?: number | string;\n  alignment?: \"left\" | \"right\";\n};\n\ntype TableData = {\n  [key: string]: string | number | JSX.Element;\n};\n\ninterface BasicTableProps {\n  columns: Column[];\n  data: TableData[];\n  onSelect?: (row: TableData) => void;\n}\n\nexport const BasicTable: FC<BasicTableProps> = ({\n  columns,\n  data,\n  onSelect,\n}) => {\n  return (\n    <div>\n      <table className=\"w-full table-auto\">\n        <thead>\n          <tr className=\"text-left bg-background-700\">\n            {columns.map((column, index) => {\n              const isRightAligned = column?.alignment === \"right\";\n              return (\n                <th\n                  key={index}\n                  className={\n                    (column.width ? `w-${column.width} ` : \"\") +\n                    \"px-4 py-2 font-bold\" +\n                    (index === 0 ? \" rounded-tl-sm\" : \"\") +\n                    (index === columns.length - 1 ? \" rounded-tr-sm\" : \"\")\n                  }\n                >\n                  <div\n                    className={isRightAligned ? \"flex flex-row-reverse\" : \"\"}\n                  >\n                    {column.header}\n                  </div>\n                </th>\n              );\n            })}\n          </tr>\n        </thead>\n        <tbody>\n          {data.map((row, rowIndex) => (\n            <tr\n              key={rowIndex}\n              className={\n                \"text-sm\" +\n                (onSelect ? \" hover:bg-background-800 cursor-pointer\" : \"\")\n              }\n              onClick={() => onSelect && onSelect(row)}\n            >\n              {columns.map((column, colIndex) => {\n                const isRightAligned = column?.alignment === \"right\";\n                return (\n                  <td\n                    key={colIndex}\n                    className={\n                      (column.width ? `w-${column.width} ` : \"\") +\n                      (isRightAligned ? \"flex\" : \"\") +\n                      \"py-2 px-4 border-b border-background-800\"\n                    }\n                  >\n                    <div>{row[column.key]}</div>\n                  </td>\n                );\n              })}\n            </tr>\n          ))}\n        </tbody>\n      </table>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/admin/connectors/ConnectorDocsLink.tsx",
    "content": "import { ValidSources } from \"@/lib/types\";\nimport { getSourceDocLink } from \"@/lib/sources\";\n\nexport default function ConnectorDocsLink({\n  sourceType,\n  className,\n}: {\n  sourceType: ValidSources;\n  className?: string;\n}) {\n  const docsLink = getSourceDocLink(sourceType);\n\n  if (!docsLink) {\n    return null;\n  }\n\n  const paragraphClass = [\"text-sm\", className].filter(Boolean).join(\" \");\n\n  return (\n    <p className={paragraphClass}>\n      Check out\n      <a\n        className=\"text-blue-600 hover:underline\"\n        target=\"_blank\"\n        rel=\"noopener\"\n        href={docsLink}\n      >\n        {\" \"}\n        our docs{\" \"}\n      </a>\n      for more info on configuring this connector.\n    </p>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/ConnectorTitle.tsx",
    "content": "import {\n  ConfluenceConfig,\n  Connector,\n  GithubConfig,\n  GitlabConfig,\n  JiraConfig,\n  SlackConfig,\n  ZulipConfig,\n} from \"@/lib/connectors/connectors\";\nimport { getSourceMetadata } from \"@/lib/sources\";\n\nimport Link from \"next/link\";\n\ninterface ConnectorTitleProps {\n  connector: Connector<any>;\n  ccPairId: number;\n  ccPairName: string;\n  isPublic?: boolean;\n  owner?: string;\n  isLink?: boolean;\n  showMetadata?: boolean;\n  className?: string;\n}\n\nexport const ConnectorTitle = ({\n  connector,\n  ccPairId,\n  ccPairName,\n  owner,\n  isPublic = true,\n  isLink = true,\n  showMetadata = true,\n  className = \"\",\n}: ConnectorTitleProps) => {\n  const sourceMetadata = getSourceMetadata(connector.source);\n\n  let additionalMetadata = new Map<string, string>();\n  if (connector.source === \"github\") {\n    const typedConnector = connector as Connector<GithubConfig>;\n    additionalMetadata.set(\n      \"Repo\",\n      typedConnector.connector_specific_config.repositories\n        ? `${typedConnector.connector_specific_config.repo_owner}/${\n            typedConnector.connector_specific_config.repositories.includes(\",\")\n              ? \"multiple repos\"\n              : typedConnector.connector_specific_config.repositories\n          }`\n        : `${typedConnector.connector_specific_config.repo_owner}/*`\n    );\n  } else if (connector.source === \"gitlab\") {\n    const typedConnector = connector as Connector<GitlabConfig>;\n    additionalMetadata.set(\n      \"Repo\",\n      `${typedConnector.connector_specific_config.project_owner}/${typedConnector.connector_specific_config.project_name}`\n    );\n  } else if (connector.source === \"confluence\") {\n    const typedConnector = connector as Connector<ConfluenceConfig>;\n    const wikiUrl = typedConnector.connector_specific_config.is_cloud\n      ? `${typedConnector.connector_specific_config.wiki_base}/wiki/spaces/${typedConnector.connector_specific_config.space}`\n      : `${typedConnector.connector_specific_config.wiki_base}/spaces/${typedConnector.connector_specific_config.space}`;\n    additionalMetadata.set(\"Wiki URL\", wikiUrl);\n    if (typedConnector.connector_specific_config.page_id) {\n      additionalMetadata.set(\n        \"Page ID\",\n        typedConnector.connector_specific_config.page_id\n      );\n    }\n  } else if (connector.source === \"jira\") {\n    const typedConnector = connector as Connector<JiraConfig>;\n    additionalMetadata.set(\n      \"Jira Project URL\",\n      typedConnector.connector_specific_config.jira_project_url\n    );\n  } else if (connector.source === \"slack\") {\n    const typedConnector = connector as Connector<SlackConfig>;\n    if (\n      typedConnector.connector_specific_config?.channels &&\n      typedConnector.connector_specific_config?.channels.length > 0\n    ) {\n      additionalMetadata.set(\n        \"Channels\",\n        typedConnector.connector_specific_config.channels.join(\", \")\n      );\n    }\n    if (typedConnector.connector_specific_config.channel_regex_enabled) {\n      additionalMetadata.set(\"Channel Regex Enabled\", \"True\");\n    }\n    if (typedConnector.connector_specific_config.include_bot_messages) {\n      additionalMetadata.set(\"Include Bot Messages\", \"True\");\n    }\n  } else if (connector.source === \"zulip\") {\n    const typedConnector = connector as Connector<ZulipConfig>;\n    additionalMetadata.set(\n      \"Realm\",\n      typedConnector.connector_specific_config.realm_name\n    );\n  }\n\n  const mainSectionClassName = `text-blue-500 dark:text-blue-100 flex w-fit ${className}`;\n  const mainDisplay = (\n    <>\n      {sourceMetadata.icon({ size: 16 })}\n      <div className=\"ml-1 my-auto text-xs font-medium truncate\">\n        {ccPairName || sourceMetadata.displayName}\n      </div>\n    </>\n  );\n  return (\n    <div className=\"my-auto max-w-full\">\n      {isLink ? (\n        <Link\n          className={mainSectionClassName}\n          href={`/admin/connector/${ccPairId}`}\n        >\n          {mainDisplay}\n        </Link>\n      ) : (\n        <div className={mainSectionClassName}>{mainDisplay}</div>\n      )}\n      {showMetadata && additionalMetadata.size > 0 && (\n        <div className=\"text-[10px] mt-0.5 text-gray-600 dark:text-gray-400\">\n          {Array.from(additionalMetadata.entries()).map(([key, value]) => {\n            return (\n              <div key={key} className=\"truncate\">\n                <i>{key}:</i> {value}\n              </div>\n            );\n          })}\n        </div>\n      )}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/admin/connectors/CredentialForm.tsx",
    "content": "import React, { JSX } from \"react\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ValidSources } from \"@/lib/types\";\n\nimport {\n  createCredential,\n  createCredentialWithPrivateKey,\n} from \"@/lib/credential\";\nimport {\n  CredentialBase,\n  Credential,\n  CredentialWithPrivateKey,\n} from \"@/lib/connectors/credentials\";\n\nconst PRIVATE_KEY_FIELD_KEY = \"private_key\";\n\nexport async function submitCredential<T>(\n  credential: CredentialBase<T> | CredentialWithPrivateKey<T>\n): Promise<{\n  credential?: Credential<any>;\n  message: string;\n  isSuccess: boolean;\n}> {\n  let isSuccess = false;\n  try {\n    let response: Response;\n    if (PRIVATE_KEY_FIELD_KEY in credential && credential.private_key) {\n      response = await createCredentialWithPrivateKey(\n        credential as CredentialWithPrivateKey<T>\n      );\n    } else {\n      response = await createCredential(credential as CredentialBase<T>);\n    }\n    if (response.ok) {\n      const parsed_response = await response.json();\n      const credential = parsed_response.credential;\n      isSuccess = true;\n      return { credential, message: \"Success!\", isSuccess: true };\n    } else {\n      const errorData = await response.json();\n      return { message: `Error: ${errorData.detail}`, isSuccess: false };\n    }\n  } catch (error) {\n    return { message: `Error: ${error}`, isSuccess: false };\n  }\n}\n\ninterface Props<YupObjectType extends Yup.AnyObject> {\n  formBody: JSX.Element | null;\n  validationSchema: Yup.ObjectSchema<YupObjectType>;\n  initialValues: YupObjectType;\n  onSubmit: (isSuccess: boolean) => void;\n  source: ValidSources;\n}\n\nexport function CredentialForm<T extends Yup.AnyObject>({\n  formBody,\n  validationSchema,\n  initialValues,\n  source,\n  onSubmit,\n}: Props<T>): JSX.Element {\n  return (\n    <>\n      <Formik\n        initialValues={initialValues}\n        validationSchema={validationSchema}\n        onSubmit={(values, formikHelpers) => {\n          formikHelpers.setSubmitting(true);\n          submitCredential<T>({\n            credential_json: values,\n            admin_public: true,\n            curator_public: false,\n            groups: [],\n            source: source,\n          }).then(({ message, isSuccess }) => {\n            if (isSuccess) {\n              toast.success(message);\n            } else {\n              toast.error(message);\n            }\n            formikHelpers.setSubmitting(false);\n            onSubmit(isSuccess);\n          });\n        }}\n      >\n        {({ isSubmitting }) => (\n          <Form>\n            {formBody}\n            <div className=\"flex\">\n              <button\n                type=\"submit\"\n                color=\"green\"\n                disabled={isSubmitting}\n                className=\"mx-auto w-64 inline-flex items-center \n                justify-center whitespace-nowrap rounded-md text-sm \n                font-medium transition-colors  bg-background-200 text-primary-foreground\n                focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring \n                disabled:pointer-events-none disabled:opacity-50 \n                shadow hover:bg-primary/90 h-9 px-4 py-2\"\n              >\n                Update\n              </button>\n            </div>\n          </Form>\n        )}\n      </Formik>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/connectors/FileUpload.tsx",
    "content": "import { useFormikContext } from \"formik\";\nimport { FC, useState } from \"react\";\nimport React from \"react\";\nimport Dropzone from \"react-dropzone\";\n\ninterface FileUploadProps {\n  selectedFiles: File[];\n  setSelectedFiles: (files: File[]) => void;\n  message?: string;\n  name?: string;\n  multiple?: boolean;\n  accept?: string;\n}\n\nexport const FileUpload: FC<FileUploadProps> = ({\n  name,\n  selectedFiles,\n  setSelectedFiles,\n  message,\n  multiple = true,\n  accept,\n}) => {\n  const [dragActive, setDragActive] = useState(false);\n  const { setFieldValue } = useFormikContext();\n\n  return (\n    <div>\n      <Dropzone\n        onDrop={(acceptedFiles) => {\n          let filesToSet: File[] = [];\n          if (multiple) {\n            filesToSet = acceptedFiles;\n          } else {\n            const acceptedFile = acceptedFiles[0];\n            if (acceptedFile !== undefined) {\n              filesToSet = [acceptedFile];\n            }\n          }\n\n          if (filesToSet !== undefined) {\n            setSelectedFiles(filesToSet);\n          }\n\n          setDragActive(false);\n          if (name) {\n            setFieldValue(name, multiple ? filesToSet : filesToSet[0]);\n          }\n        }}\n        onDragLeave={() => setDragActive(false)}\n        onDragEnter={() => setDragActive(true)}\n        multiple={multiple}\n        accept={accept ? { [accept]: [] } : undefined}\n      >\n        {({ getRootProps, getInputProps }) => (\n          <section>\n            <div\n              {...getRootProps()}\n              className={\n                \"flex flex-col items-center w-full px-4 py-12 rounded \" +\n                \"shadow-lg tracking-wide border border-border cursor-pointer\" +\n                (dragActive ? \" border-accent\" : \"\")\n              }\n            >\n              <input {...getInputProps()} />\n              <b className=\"text-text-darker\">\n                {message ||\n                  `Drag and drop ${\n                    multiple ? \"some files\" : \"a file\"\n                  } here, or click to select ${multiple ? \"files\" : \"a file\"}`}\n              </b>\n            </div>\n          </section>\n        )}\n      </Dropzone>\n\n      {selectedFiles.length > 0 && (\n        <div className=\"mt-4\">\n          <h2 className=\"text-sm font-bold\">\n            Selected File{multiple ? \"s\" : \"\"}\n          </h2>\n          <ul>\n            {selectedFiles.map((file) => (\n              <div key={file.name} className=\"flex\">\n                <p className=\"text-sm mr-2\">{file.name}</p>\n              </div>\n            ))}\n          </ul>\n        </div>\n      )}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/admin/connectors/types.ts",
    "content": "import { JSX } from \"react\";\nimport * as Yup from \"yup\";\n\nexport type FormBodyBuilder<T extends Yup.AnyObject> = (\n  values: T\n) => JSX.Element;\n\nexport type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<\n  T,\n  Exclude<keyof T, Keys>\n> &\n  {\n    [K in Keys]-?: Required<Pick<T, K>> & Partial<Pick<T, Exclude<Keys, K>>>;\n  }[Keys];\n"
  },
  {
    "path": "web/src/components/admin/federated/FederatedConnectorForm.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport {\n  ConfigurableSources,\n  CredentialFieldSpec,\n  ConfigurationFieldSpec,\n  FederatedConnectorCreateRequest,\n  FederatedConnectorDetail,\n  CredentialSchemaResponse,\n} from \"@/lib/types\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { Card, CardContent } from \"@/components/ui/card\";\nimport { Input } from \"@/components/ui/input\";\nimport { useRouter } from \"next/navigation\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { AlertTriangle, Check, Loader2, Trash2Icon, Info } from \"lucide-react\";\nimport BackButton from \"@/refresh-components/buttons/BackButton\";\nimport Title from \"@/components/ui/title\";\nimport {\n  DropdownMenu,\n  DropdownMenuContent,\n  DropdownMenuTrigger,\n} from \"@/components/ui/dropdown-menu\";\nimport { DropdownMenuItemWithTooltip } from \"@/components/ui/dropdown-menu-with-tooltip\";\nimport { toast } from \"@/hooks/useToast\";\n\nimport { Badge } from \"@/components/ui/badge\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { ListFieldInput } from \"@/refresh-components/inputs/ListFieldInput\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { SvgSettings } from \"@opal/icons\";\n\nexport interface FederatedConnectorFormProps {\n  connector: ConfigurableSources;\n  connectorId?: number; // Optional ID for editing existing connector\n  preloadedConnectorData?: FederatedConnectorDetail;\n  preloadedCredentialSchema?: CredentialSchemaResponse;\n}\n\ninterface CredentialForm {\n  [key: string]: string;\n}\n\ninterface ConfigForm {\n  [key: string]: string | boolean | string[] | number | undefined;\n}\n\ninterface FormState {\n  credentials: CredentialForm;\n  config: ConfigForm;\n  schema: Record<string, CredentialFieldSpec> | null;\n  configurationSchema: Record<string, ConfigurationFieldSpec> | null;\n  schemaError: string | null;\n  configurationSchemaError: string | null;\n  connectorError: string | null;\n}\n\nasync function validateCredentials(\n  source: string,\n  credentials: CredentialForm\n): Promise<{ success: boolean; message: string }> {\n  try {\n    const response = await fetch(\n      `/api/federated/sources/federated_${source}/credentials/validate`,\n      {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify(credentials),\n      }\n    );\n\n    if (!response.ok) {\n      const errorData = await response.json().catch(() => ({}));\n      return {\n        success: false,\n        message:\n          errorData.detail || `Validation failed: ${response.statusText}`,\n      };\n    }\n\n    const result = await response.json();\n    return {\n      success: result,\n      message: result ? \"Credentials are valid\" : \"Credentials are invalid\",\n    };\n  } catch (error) {\n    return { success: false, message: `Validation error: ${error}` };\n  }\n}\n\nasync function createFederatedConnector(\n  source: string,\n  credentials: CredentialForm,\n  config?: ConfigForm\n): Promise<{ success: boolean; message: string }> {\n  try {\n    const response = await fetch(\"/api/federated\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        source: `federated_${source}`,\n        credentials,\n        config: config || {},\n      } as FederatedConnectorCreateRequest),\n    });\n\n    if (response.ok) {\n      return {\n        success: true,\n        message: \"Federated connector created successfully!\",\n      };\n    } else {\n      const errorData = await response.json();\n      return {\n        success: false,\n        message: errorData.detail || \"Failed to create federated connector\",\n      };\n    }\n  } catch (error) {\n    return { success: false, message: `Error: ${error}` };\n  }\n}\n\nasync function updateFederatedConnector(\n  id: number,\n  credentials: CredentialForm,\n  config?: ConfigForm\n): Promise<{ success: boolean; message: string }> {\n  try {\n    const response = await fetch(`/api/federated/${id}`, {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        credentials,\n        config: config || {},\n      }),\n    });\n\n    if (response.ok) {\n      return {\n        success: true,\n        message: \"Federated connector updated successfully!\",\n      };\n    } else {\n      const errorData = await response.json();\n      return {\n        success: false,\n        message: errorData.detail || \"Failed to update federated connector\",\n      };\n    }\n  } catch (error) {\n    return { success: false, message: `Error: ${error}` };\n  }\n}\n\nasync function deleteFederatedConnector(\n  id: number\n): Promise<{ success: boolean; message: string }> {\n  try {\n    const response = await fetch(`/api/federated/${id}`, {\n      method: \"DELETE\",\n    });\n\n    if (response.ok) {\n      return {\n        success: true,\n        message: \"Federated connector deleted successfully!\",\n      };\n    } else {\n      const errorData = await response.json();\n      return {\n        success: false,\n        message: errorData.detail || \"Failed to delete federated connector\",\n      };\n    }\n  } catch (error) {\n    return { success: false, message: `Error: ${error}` };\n  }\n}\n\nexport function FederatedConnectorForm({\n  connector,\n  connectorId,\n  preloadedConnectorData,\n  preloadedCredentialSchema,\n}: FederatedConnectorFormProps) {\n  const router = useRouter();\n  const sourceMetadata = getSourceMetadata(connector);\n  const isEditMode = connectorId !== undefined;\n\n  const [formState, setFormState] = useState<FormState>({\n    credentials: preloadedConnectorData?.credentials || {},\n    config: preloadedConnectorData?.config || {},\n    schema: preloadedCredentialSchema?.credentials || null,\n    configurationSchema: null,\n    schemaError: null,\n    configurationSchemaError: null,\n    connectorError: null,\n  });\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [submitMessage, setSubmitMessage] = useState<string | null>(null);\n  const [submitSuccess, setSubmitSuccess] = useState<boolean | null>(null);\n  const [isValidating, setIsValidating] = useState(false);\n  const [isDeleting, setIsDeleting] = useState(false);\n  const [isLoadingSchema, setIsLoadingSchema] = useState(\n    !preloadedCredentialSchema\n  );\n  const [configValidationErrors, setConfigValidationErrors] = useState<\n    Record<string, string>\n  >({});\n\n  // Fetch credential schema if not preloaded\n  useEffect(() => {\n    const fetchCredentialSchema = async () => {\n      if (!preloadedCredentialSchema) {\n        setIsLoadingSchema(true);\n        try {\n          const response = await fetch(\n            `/api/federated/sources/federated_${connector}/credentials/schema`\n          );\n\n          if (!response.ok) {\n            throw new Error(\n              `Failed to fetch credential schema: ${response.statusText}`\n            );\n          }\n\n          const responseData = await response.json();\n          setFormState((prev) => ({\n            ...prev,\n            schema: responseData.credentials,\n            schemaError: null,\n          }));\n        } catch (error) {\n          console.error(\"Error fetching credential schema:\", error);\n          setFormState((prev) => ({\n            ...prev,\n            schemaError: `Failed to load credential schema: ${error}`,\n          }));\n        } finally {\n          setIsLoadingSchema(false);\n        }\n      }\n    };\n\n    fetchCredentialSchema();\n  }, [connector, preloadedCredentialSchema]);\n\n  // Fetch configuration schema for connector configuration\n  useEffect(() => {\n    const fetchConfigurationSchema = async () => {\n      try {\n        const response = await fetch(\n          `/api/federated/sources/federated_${connector}/configuration/schema`\n        );\n\n        if (!response.ok) {\n          throw new Error(\n            `Failed to fetch configuration schema: ${response.statusText}`\n          );\n        }\n\n        const responseData = await response.json();\n        const configurationSchema = responseData.configuration;\n\n        // Initialize config with defaults - merge with existing config\n        // This ensures boolean fields like search_all_channels have explicit values for UI state\n        if (configurationSchema) {\n          const configWithDefaults: Record<string, any> = {};\n          (Object.entries(configurationSchema) as [string, any][]).forEach(\n            ([key, field]) => {\n              if (field.default !== undefined) {\n                configWithDefaults[key] = field.default;\n              }\n            }\n          );\n\n          setFormState((prev) => ({\n            ...prev,\n            // Merge defaults first, then overlay saved config values\n            config: { ...configWithDefaults, ...prev.config },\n            configurationSchema,\n            configurationSchemaError: null,\n          }));\n        } else {\n          setFormState((prev) => ({\n            ...prev,\n            configurationSchema,\n            configurationSchemaError: null,\n          }));\n        }\n      } catch (error) {\n        console.error(\"Error fetching configuration schema:\", error);\n        setFormState((prev) => ({\n          ...prev,\n          configurationSchemaError: `Failed to load configuration schema: ${error}`,\n        }));\n      }\n    };\n\n    fetchConfigurationSchema();\n  }, [connector, isEditMode]);\n\n  // Show loading state at the top level if schema is loading\n  if (isLoadingSchema) {\n    return (\n      <div className=\"mx-auto w-[800px]\">\n        <div className=\"flex flex-col items-center justify-center py-16\">\n          <Loader2 className=\"h-8 w-8 animate-spin text-blue-500 mb-4\" />\n          <div className=\"text-center\">\n            <p className=\"text-lg font-medium text-gray-700 mb-2\">\n              Loading credential schema...\n            </p>\n            <p className=\"text-sm text-gray-500\">\n              Retrieving required fields for this connector type\n            </p>\n          </div>\n        </div>\n      </div>\n    );\n  }\n\n  const handleCredentialChange = (key: string, value: string) => {\n    setFormState((prev) => ({\n      ...prev,\n      credentials: {\n        ...prev.credentials,\n        [key]: value,\n      },\n    }));\n  };\n\n  const handleConfigChange = (key: string, value: any) => {\n    setFormState((prev) => ({\n      ...prev,\n      config: {\n        ...prev.config,\n        [key]: value,\n      },\n    }));\n  };\n\n  const handleValidateCredentials = async () => {\n    if (!formState.schema) return;\n\n    setIsValidating(true);\n    setSubmitMessage(null);\n    setSubmitSuccess(null);\n\n    try {\n      const result = await validateCredentials(\n        connector,\n        formState.credentials\n      );\n      setSubmitMessage(result.message);\n      setSubmitSuccess(result.success);\n    } catch (error) {\n      setSubmitMessage(`Validation error: ${error}`);\n      setSubmitSuccess(false);\n    } finally {\n      setIsValidating(false);\n    }\n  };\n\n  const handleDeleteConnector = async () => {\n    if (!connectorId) return;\n\n    const confirmed = window.confirm(\n      \"Are you sure you want to delete this federated connector? This action cannot be undone.\"\n    );\n\n    if (!confirmed) return;\n\n    setIsDeleting(true);\n\n    try {\n      const result = await deleteFederatedConnector(connectorId);\n\n      if (result.success) {\n        toast.success(result.message);\n        // Redirect after a short delay\n        setTimeout(() => {\n          router.push(\"/admin/indexing/status\");\n        }, 500);\n      } else {\n        toast.error(result.message);\n      }\n    } catch (error) {\n      toast.error(`Error deleting connector: ${error}`);\n    } finally {\n      setIsDeleting(false);\n    }\n  };\n\n  const handleSubmit = async (e: React.FormEvent) => {\n    e.preventDefault();\n    setIsSubmitting(true);\n    setSubmitMessage(null);\n    setSubmitSuccess(null);\n\n    try {\n      // Validate required fields\n      if (formState.schema) {\n        const missingRequired = Object.entries(formState.schema)\n          .filter(\n            ([key, field]) => field.required && !formState.credentials[key]\n          )\n          .map(([key]) => key);\n\n        if (missingRequired.length > 0) {\n          setSubmitMessage(\n            `Missing required fields: ${missingRequired.join(\", \")}`\n          );\n          setSubmitSuccess(false);\n          setIsSubmitting(false);\n          return;\n        }\n      }\n\n      // Validate configuration fields (Slack-specific validation)\n      const configErrors = getConfigValidationErrors();\n      if (Object.keys(configErrors).length > 0) {\n        setConfigValidationErrors(configErrors);\n        // Show the first error message\n        const firstError = Object.values(configErrors)[0] as string;\n        setSubmitMessage(firstError);\n        setSubmitSuccess(false);\n        setIsSubmitting(false);\n        return;\n      }\n      setConfigValidationErrors({});\n\n      // Validate credentials before creating/updating\n      const validation = await validateCredentials(\n        connector,\n        formState.credentials\n      );\n      if (!validation.success) {\n        setSubmitMessage(`Credential validation failed: ${validation.message}`);\n        setSubmitSuccess(false);\n        setIsSubmitting(false);\n        return;\n      }\n\n      // Create or update the connector\n      const result =\n        isEditMode && connectorId\n          ? await updateFederatedConnector(\n              connectorId,\n              formState.credentials,\n              formState.config\n            )\n          : await createFederatedConnector(\n              connector,\n              formState.credentials,\n              formState.config\n            );\n\n      setSubmitMessage(result.message);\n      setSubmitSuccess(result.success);\n      setIsSubmitting(false);\n\n      if (result.success) {\n        // Redirect after a short delay\n        setTimeout(() => {\n          router.push(\"/admin/indexing/status\");\n        }, 500);\n      }\n    } catch (error) {\n      setSubmitMessage(`Error: ${error}`);\n      setSubmitSuccess(false);\n      setIsSubmitting(false);\n    }\n  };\n\n  const renderCredentialFields = () => {\n    if (formState.schemaError) {\n      return (\n        <div className=\"flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200\">\n          <AlertTriangle size={16} />\n          <span className=\"text-sm\">{formState.schemaError}</span>\n        </div>\n      );\n    }\n\n    if (formState.connectorError) {\n      return (\n        <div className=\"flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200\">\n          <AlertTriangle size={16} />\n          <span className=\"text-sm\">{formState.connectorError}</span>\n        </div>\n      );\n    }\n\n    if (!formState.schema) {\n      return (\n        <div className=\"text-sm text-gray-500\">\n          No credential schema available for this connector type.\n        </div>\n      );\n    }\n\n    return (\n      <>\n        {Object.entries(formState.schema).map(([fieldKey, fieldSpec]) => (\n          <div\n            key={fieldKey}\n            className=\"flex items-center justify-between gap-4 py-2\"\n          >\n            <div className=\"flex-1\">\n              <Text as=\"p\" mainUiAction text04 className=\"mb-1\">\n                {fieldKey\n                  .replace(/_/g, \" \")\n                  .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                {fieldSpec.required && (\n                  <span className=\"text-red-500 ml-1\">*</span>\n                )}\n              </Text>\n              {fieldSpec.description && (\n                <Text as=\"p\" mainUiMuted text03>\n                  {fieldSpec.description}\n                </Text>\n              )}\n            </div>\n            <Input\n              id={fieldKey}\n              type={fieldSpec.secret ? \"password\" : \"text\"}\n              placeholder={\n                fieldSpec.example\n                  ? String(fieldSpec.example)\n                  : fieldSpec.description\n              }\n              value={formState.credentials[fieldKey] || \"\"}\n              onChange={(e) => handleCredentialChange(fieldKey, e.target.value)}\n              className=\"w-96\"\n              required={fieldSpec.required}\n            />\n          </div>\n        ))}\n      </>\n    );\n  };\n\n  // Helper to determine if channels input should be disabled for Slack\n  const disableSlackChannelInput = (fieldKey: string): boolean => {\n    if (connector !== \"slack\" || fieldKey !== \"channels\") {\n      return false;\n    }\n    // Disable channels field when search_all_channels is true\n    return formState.config.search_all_channels === true;\n  };\n\n  // Helper to determine if channels field is required for Slack\n  const isSlackChannelsRequired = (): boolean => {\n    if (connector !== \"slack\") {\n      return false;\n    }\n    // Channels are required when search_all_channels is false\n    return formState.config.search_all_channels === false;\n  };\n\n  // Get validation errors for configuration fields (Slack-specific)\n  const getConfigValidationErrors = (): Record<string, string> => {\n    const errors: Record<string, string> = {};\n\n    if (connector === \"slack\") {\n      // Check if channels are required but not provided\n      if (\n        formState.config.search_all_channels === false &&\n        (!formState.config.channels ||\n          !Array.isArray(formState.config.channels) ||\n          formState.config.channels.length === 0)\n      ) {\n        errors.channels =\n          \"At least one channel is required when 'Search All Channels' is disabled\";\n      }\n    }\n\n    return errors;\n  };\n\n  const renderConfigFields = () => {\n    if (formState.configurationSchemaError) {\n      return (\n        <div className=\"flex items-center gap-2 p-3 rounded-md bg-red-50 text-red-700 border border-red-200\">\n          <AlertTriangle size={16} />\n          <span className=\"text-sm\">{formState.configurationSchemaError}</span>\n        </div>\n      );\n    }\n\n    if (!formState.configurationSchema) {\n      return (\n        <div className=\"text-sm text-gray-500\">\n          No search configuration available for this connector type.\n        </div>\n      );\n    }\n\n    const channelInputPlaceholder =\n      \"Type channel name or regex pattern and press Enter\";\n\n    return (\n      <>\n        {Object.entries(formState.configurationSchema).map(\n          ([fieldKey, fieldSpec]) => {\n            const isBoolType = fieldSpec.type === \"bool\";\n            const isListType = fieldSpec.type.startsWith(\"list[\");\n\n            return (\n              <div key={fieldKey} className=\"space-y-2 w-full\">\n                {isBoolType ? (\n                  <div className=\"flex items-center gap-3 py-2\">\n                    <Checkbox\n                      checked={\n                        formState.config[fieldKey] !== undefined\n                          ? Boolean(formState.config[fieldKey])\n                          : Boolean(fieldSpec.default)\n                      }\n                      onCheckedChange={(checked) =>\n                        handleConfigChange(fieldKey, checked)\n                      }\n                    />\n                    <div className=\"flex-1\">\n                      <Text as=\"p\" mainUiAction text04>\n                        {fieldKey\n                          .replace(/_/g, \" \")\n                          .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                      </Text>\n                      {fieldSpec.description && (\n                        <Text as=\"p\" mainUiMuted text03>\n                          {fieldSpec.description}\n                        </Text>\n                      )}\n                    </div>\n                  </div>\n                ) : (\n                  <>\n                    {isListType ? (\n                      <>\n                        <Text as=\"p\" mainUiAction text04>\n                          {fieldSpec.description ||\n                            fieldKey\n                              .replace(/_/g, \" \")\n                              .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                          {(fieldSpec.required ||\n                            (fieldKey === \"channels\" &&\n                              isSlackChannelsRequired())) && (\n                            <span className=\"text-red-500 ml-1\">*</span>\n                          )}\n                        </Text>\n                        <ListFieldInput\n                          values={\n                            Array.isArray(formState.config[fieldKey])\n                              ? (formState.config[fieldKey] as string[])\n                              : []\n                          }\n                          onChange={(values) => {\n                            handleConfigChange(fieldKey, values);\n                            // Clear validation error when user adds channels\n                            if (\n                              fieldKey === \"channels\" &&\n                              configValidationErrors.channels\n                            ) {\n                              setConfigValidationErrors((prev) => {\n                                const { channels, ...rest } = prev;\n                                return rest;\n                              });\n                            }\n                          }}\n                          placeholder={\n                            fieldKey === \"channels\" ||\n                            fieldKey === \"exclude_channels\"\n                              ? channelInputPlaceholder\n                              : \"Type and press Enter to add an item\"\n                          }\n                          disabled={disableSlackChannelInput(fieldKey)}\n                          error={!!configValidationErrors[fieldKey]}\n                        />\n                        {configValidationErrors[fieldKey] && (\n                          <Text as=\"p\" className=\"text-red-500 text-sm mt-1\">\n                            {configValidationErrors[fieldKey]}\n                          </Text>\n                        )}\n                      </>\n                    ) : (\n                      <div className=\"flex items-center justify-between gap-4 py-2\">\n                        <div className=\"flex-1\">\n                          <Text as=\"p\" mainUiAction text04 className=\"mb-1\">\n                            {fieldKey\n                              .replace(/_/g, \" \")\n                              .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                            {fieldSpec.required && (\n                              <span className=\"text-red-500 ml-1\">*</span>\n                            )}\n                          </Text>\n                          {fieldSpec.description && (\n                            <Text as=\"p\" mainUiMuted text03>\n                              {fieldSpec.description}\n                            </Text>\n                          )}\n                        </div>\n                        <Input\n                          id={fieldKey}\n                          type={fieldSpec.type === \"int\" ? \"number\" : \"text\"}\n                          placeholder={\n                            fieldSpec.example\n                              ? String(fieldSpec.example)\n                              : fieldSpec.description\n                          }\n                          value={\n                            formState.config[fieldKey] !== undefined\n                              ? String(formState.config[fieldKey])\n                              : \"\"\n                          }\n                          onChange={(e) => {\n                            const value =\n                              fieldSpec.type === \"int\"\n                                ? parseInt(e.target.value, 10)\n                                : e.target.value;\n                            handleConfigChange(fieldKey, value);\n                          }}\n                          className=\"w-96\"\n                          required={fieldSpec.required}\n                        />\n                      </div>\n                    )}\n                  </>\n                )}\n              </div>\n            );\n          }\n        )}\n      </>\n    );\n  };\n\n  return (\n    <div className=\"mx-auto w-[800px] pb-8\">\n      <BackButton routerOverride=\"/admin/indexing/status\" />\n\n      <div className=\"flex items-center justify-between h-16 pb-2 border-b border-neutral-200 dark:border-neutral-600\">\n        <div className=\"my-auto\">\n          <SourceIcon iconSize={32} sourceType={connector} />\n        </div>\n\n        <div className=\"ml-2 overflow-hidden text-ellipsis whitespace-nowrap flex-1 mr-4\">\n          <div className=\"text-2xl font-bold text-text-default flex items-center gap-2\">\n            <span>\n              {isEditMode ? \"Edit\" : \"Setup\"} {sourceMetadata.displayName}\n            </span>\n            <Badge variant=\"outline\" className=\"text-xs\">\n              Federated\n            </Badge>\n            <SimpleTooltip\n              tooltip={\n                sourceMetadata.federatedTooltip ||\n                \"This is a federated connector. It will result in greater latency and lower search quality compared to regular connectors.\"\n              }\n              side=\"bottom\"\n            >\n              <Info className=\"cursor-help\" size={16} />\n            </SimpleTooltip>\n          </div>\n        </div>\n\n        {isEditMode && (\n          <div className=\"ml-auto flex gap-x-2\">\n            <DropdownMenu>\n              <DropdownMenuTrigger asChild>\n                <div>\n                  <OpalButton prominence=\"secondary\" icon={SvgSettings}>\n                    Manage\n                  </OpalButton>\n                </div>\n              </DropdownMenuTrigger>\n              <DropdownMenuContent align=\"end\">\n                <DropdownMenuItemWithTooltip\n                  onClick={handleDeleteConnector}\n                  disabled={isDeleting}\n                  className=\"flex items-center gap-x-2 cursor-pointer px-3 py-2 text-red-600 hover:text-red-700 dark:text-red-400 dark:hover:text-red-300\"\n                  tooltip={isDeleting ? \"Deletion in progress\" : undefined}\n                >\n                  <Trash2Icon className=\"h-4 w-4\" />\n                  <span>{isDeleting ? \"Deleting...\" : \"Delete\"}</span>\n                </DropdownMenuItemWithTooltip>\n              </DropdownMenuContent>\n            </DropdownMenu>\n          </div>\n        )}\n      </div>\n\n      <Title className=\"mb-2 mt-6\" size=\"md\">\n        Federated Connector Configuration\n      </Title>\n\n      <Card className=\"px-8 py-4\">\n        <CardContent className=\"p-0\">\n          <form onSubmit={handleSubmit}>\n            <Text as=\"p\" headingH3>\n              Credentials\n            </Text>\n            <Text as=\"p\" mainUiMuted>\n              Enter the credentials for this connector.\n            </Text>\n            <div className=\"space-y-4\">{renderCredentialFields()}</div>\n            <Separator />\n            <Text as=\"p\" headingH3>\n              Configuration\n            </Text>\n            <div className=\"space-y-4\">{renderConfigFields()}</div>\n\n            <div className=\"flex gap-2 pt-4 w-full justify-end\">\n              {submitMessage && (\n                <div\n                  className={`flex items-center gap-2 p-2 rounded-md ${\n                    submitSuccess\n                      ? \"bg-green-50 text-green-700 border border-green-200\"\n                      : \"bg-red-50 text-red-700 border border-red-200\"\n                  }`}\n                >\n                  {submitSuccess ? (\n                    <Check size={16} />\n                  ) : (\n                    <AlertTriangle size={16} />\n                  )}\n                  <span className=\"text-sm\">{submitMessage}</span>\n                </div>\n              )}\n\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <Button\n                type=\"button\"\n                secondary\n                onClick={handleValidateCredentials}\n                disabled={isValidating || !formState.schema}\n                className=\"flex ml-auto\"\n              >\n                {isValidating ? \"Validating...\" : \"Validate\"}\n              </Button>\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <Button\n                type=\"submit\"\n                disabled={isSubmitting || !formState.schema}\n                className=\"flex\"\n                leftIcon={isSubmitting ? SimpleLoader : undefined}\n              >\n                {isSubmitting\n                  ? isEditMode\n                    ? \"Updating...\"\n                    : \"Creating...\"\n                  : isEditMode\n                    ? \"Update\"\n                    : \"Create\"}\n              </Button>\n            </div>\n          </form>\n        </CardContent>\n      </Card>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/users/BulkAdd.tsx",
    "content": "\"use client\";\n\nimport { withFormik, FormikProps, FormikErrors, Form, Field } from \"formik\";\nimport Button from \"@/refresh-components/buttons/Button\";\n\nconst WHITESPACE_SPLIT = /\\s+/;\nconst EMAIL_REGEX = /[^@]+@[^.]+\\.[^.]/;\n\nconst addUsers = async (url: string, { arg }: { arg: Array<string> }) => {\n  return await fetch(url, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ emails: arg }),\n  });\n};\n\nexport type EmailInviteStatus =\n  | \"SENT\"\n  | \"NOT_CONFIGURED\"\n  | \"SEND_FAILED\"\n  | \"DISABLED\";\n\ninterface FormProps {\n  onSuccess: (emailInviteStatus: EmailInviteStatus) => void;\n  onFailure: (res: Response) => void;\n}\n\ninterface FormValues {\n  emails: string;\n}\n\nconst normalizeEmails = (emails: string) =>\n  emails\n    .trim()\n    .split(WHITESPACE_SPLIT)\n    .filter(Boolean)\n    .map((email) => email.toLowerCase());\n\nconst AddUserFormRenderer = ({\n  touched,\n  errors,\n  isSubmitting,\n  handleSubmit,\n}: FormikProps<FormValues>) => (\n  <Form className=\"w-full\" onSubmit={handleSubmit}>\n    <Field\n      id=\"emails\"\n      name=\"emails\"\n      as=\"textarea\"\n      className=\"w-full p-4\"\n      onKeyDown={(e: React.KeyboardEvent<HTMLTextAreaElement>) => {\n        if (e.key === \"Enter\") {\n          e.preventDefault();\n          handleSubmit();\n        }\n      }}\n    />\n    {touched.emails && errors.emails && (\n      <div className=\"text-error text-sm\">{errors.emails}</div>\n    )}\n    {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n    <Button type=\"submit\" disabled={isSubmitting} className=\"self-end\">\n      Add\n    </Button>\n  </Form>\n);\n\nconst AddUserForm = withFormik<FormProps, FormValues>({\n  mapPropsToValues: (props) => {\n    return {\n      emails: \"\",\n    };\n  },\n  validate: (values: FormValues): FormikErrors<FormValues> => {\n    const emails = normalizeEmails(values.emails);\n    if (!emails.length) {\n      return { emails: \"Required\" };\n    }\n    for (let email of emails) {\n      if (!email.match(EMAIL_REGEX)) {\n        return { emails: `${email} is not a valid email` };\n      }\n    }\n    return {};\n  },\n  handleSubmit: async (values: FormValues, formikBag) => {\n    const emails = normalizeEmails(values.emails);\n    formikBag.setSubmitting(true);\n    await addUsers(\"/api/manage/admin/users\", { arg: emails })\n      .then(async (res) => {\n        if (res.ok) {\n          const data = await res.json();\n          formikBag.props.onSuccess(data.email_invite_status);\n        } else {\n          formikBag.props.onFailure(res);\n        }\n      })\n      .finally(() => {\n        formikBag.setSubmitting(false);\n      });\n  },\n})(AddUserFormRenderer);\n\nconst BulkAdd = ({ onSuccess, onFailure }: FormProps) => {\n  return <AddUserForm onSuccess={onSuccess} onFailure={onFailure} />;\n};\n\nexport default BulkAdd;\n"
  },
  {
    "path": "web/src/components/admin/users/CenteredPageSelector.tsx",
    "content": "import {\n  PageSelector,\n  type PageSelectorProps as Props,\n} from \"@/components/PageSelector\";\n\nconst CenteredPageSelector = ({\n  currentPage,\n  totalPages,\n  onPageChange,\n}: Props) => (\n  <div className=\"mx-auto text-center\">\n    <PageSelector\n      currentPage={currentPage}\n      totalPages={totalPages}\n      onPageChange={onPageChange}\n    />\n  </div>\n);\n\nexport default CenteredPageSelector;\n"
  },
  {
    "path": "web/src/components/admin/users/InvitedUserTable.tsx",
    "content": "import { useState } from \"react\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport CenteredPageSelector from \"./CenteredPageSelector\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { InvitedUserSnapshot } from \"@/lib/types\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport { InviteUserButton } from \"./buttons/InviteUserButton\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { FetchError } from \"@/lib/fetcher\";\n\nconst USERS_PER_PAGE = 10;\n\ninterface Props {\n  users: InvitedUserSnapshot[];\n  mutate: () => void;\n  error: FetchError | null;\n  isLoading: boolean;\n  q: string;\n}\n\nconst InvitedUserTable = ({ users, mutate, error, isLoading, q }: Props) => {\n  const [currentPageNum, setCurrentPageNum] = useState<number>(1);\n\n  if (!users.length)\n    return <p>Users that have been invited will show up here</p>;\n\n  const totalPages = Math.ceil(users.length / USERS_PER_PAGE);\n\n  // Filter users based on the search query\n  const filteredUsers = q\n    ? users.filter((user) => user.email.includes(q))\n    : users;\n\n  // Get the current page of users\n  const currentPageOfUsers = filteredUsers.slice(\n    (currentPageNum - 1) * USERS_PER_PAGE,\n    currentPageNum * USERS_PER_PAGE\n  );\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (error) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error loading users\"\n        errorMsg={error?.info?.detail}\n      />\n    );\n  }\n\n  return (\n    <>\n      <Table className=\"overflow-visible\">\n        <TableHeader>\n          <TableRow>\n            <TableHead>Email</TableHead>\n            <TableHead>\n              <div className=\"flex justify-end\">Actions</div>\n            </TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {currentPageOfUsers.length ? (\n            currentPageOfUsers.map((user) => (\n              <TableRow key={user.email}>\n                <TableCell>{user.email}</TableCell>\n                <TableCell>\n                  <div className=\"flex justify-end\">\n                    <InviteUserButton\n                      user={user}\n                      invited={true}\n                      mutate={mutate}\n                    />\n                  </div>\n                </TableCell>\n              </TableRow>\n            ))\n          ) : (\n            <TableRow>\n              <TableCell colSpan={2} className=\"h-24 text-center\">\n                {`No users found matching \"${q}\"`}\n              </TableCell>\n            </TableRow>\n          )}\n        </TableBody>\n      </Table>\n      {totalPages > 1 ? (\n        <CenteredPageSelector\n          currentPage={currentPageNum}\n          totalPages={totalPages}\n          onPageChange={setCurrentPageNum}\n        />\n      ) : null}\n    </>\n  );\n};\n\nexport default InvitedUserTable;\n"
  },
  {
    "path": "web/src/components/admin/users/PendingUsersTable.tsx",
    "content": "import { useState } from \"react\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport CenteredPageSelector from \"./CenteredPageSelector\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { InvitedUserSnapshot } from \"@/lib/types\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport { Button } from \"@opal/components\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { FetchError } from \"@/lib/fetcher\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport { SvgCheck } from \"@opal/icons\";\nconst USERS_PER_PAGE = 10;\n\ninterface Props {\n  users: InvitedUserSnapshot[];\n  mutate: () => void;\n  error: FetchError | null;\n  isLoading: boolean;\n  q: string;\n}\n\nconst PendingUsersTable = ({ users, mutate, error, isLoading, q }: Props) => {\n  const [currentPageNum, setCurrentPageNum] = useState<number>(1);\n  const [userToApprove, setUserToApprove] = useState<string | null>(null);\n\n  if (!users.length)\n    return <p>Users that have requested to join will show up here</p>;\n\n  const totalPages = Math.ceil(users.length / USERS_PER_PAGE);\n\n  // Filter users based on the search query\n  const filteredUsers = q\n    ? users.filter((user) => user.email.includes(q))\n    : users;\n\n  // Get the current page of users\n  const currentPageOfUsers = filteredUsers.slice(\n    (currentPageNum - 1) * USERS_PER_PAGE,\n    currentPageNum * USERS_PER_PAGE\n  );\n\n  if (isLoading) {\n    return <ThreeDotsLoader />;\n  }\n\n  if (error) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error loading pending users\"\n        errorMsg={error?.info?.detail}\n      />\n    );\n  }\n\n  const handleAcceptRequest = async (email: string) => {\n    const normalizedEmail = email.toLowerCase();\n    try {\n      await fetch(\"/api/tenants/users/invite/approve\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ email: normalizedEmail }),\n      });\n      mutate();\n      setUserToApprove(null);\n    } catch (error) {\n      toast.error(\"Failed to approve user request\");\n    }\n  };\n\n  return (\n    <>\n      {userToApprove && (\n        <ConfirmEntityModal\n          entityType=\"Join Request\"\n          entityName={userToApprove}\n          onClose={() => setUserToApprove(null)}\n          onSubmit={() => handleAcceptRequest(userToApprove)}\n          actionButtonText=\"Approve\"\n          action=\"approve the join request of\"\n          additionalDetails={`${userToApprove} has requested to join the team. Approving will add them as a user in this team.`}\n          removeConfirmationText\n        />\n      )}\n      <Table className=\"overflow-visible\">\n        <TableHeader>\n          <TableRow>\n            <TableHead>Email</TableHead>\n            <TableHead>\n              <div className=\"flex justify-end\">Actions</div>\n            </TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {currentPageOfUsers.length ? (\n            currentPageOfUsers.map((user) => (\n              <TableRow key={user.email}>\n                <TableCell>{user.email}</TableCell>\n                <TableCell>\n                  <div className=\"flex justify-end\">\n                    <Button\n                      prominence=\"secondary\"\n                      onClick={() => setUserToApprove(user.email.toLowerCase())}\n                      icon={SvgCheck}\n                    >\n                      Accept Join Request\n                    </Button>\n                  </div>\n                </TableCell>\n              </TableRow>\n            ))\n          ) : (\n            <TableRow>\n              <TableCell colSpan={2} className=\"h-24 text-center\">\n                {`No pending users found matching \"${q}\"`}\n              </TableCell>\n            </TableRow>\n          )}\n        </TableBody>\n      </Table>\n      {totalPages > 1 ? (\n        <CenteredPageSelector\n          currentPage={currentPageNum}\n          totalPages={totalPages}\n          onPageChange={setCurrentPageNum}\n        />\n      ) : null}\n    </>\n  );\n};\n\nexport default PendingUsersTable;\n"
  },
  {
    "path": "web/src/components/admin/users/ResetPasswordModal.tsx",
    "content": "import { useState } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { User } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { LoadingAnimation } from \"@/components/Loading\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { SvgKey, SvgRefreshCw } from \"@opal/icons\";\n\nexport interface ResetPasswordModalProps {\n  user: User;\n  onClose: () => void;\n}\n\nexport default function ResetPasswordModal({\n  user,\n  onClose,\n}: ResetPasswordModalProps) {\n  const [newPassword, setNewPassword] = useState<string | null>(null);\n  const [isLoading, setIsLoading] = useState(false);\n\n  const handleResetPassword = async () => {\n    setIsLoading(true);\n    try {\n      const response = await fetch(\"/api/password/reset_password\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ user_email: user.email }),\n      });\n\n      if (response.ok) {\n        const data = await response.json();\n        setNewPassword(data.new_password);\n        toast.success(\"Password reset successfully\");\n      } else {\n        const errorData = await response.json();\n        toast.error(errorData.detail || \"Failed to reset password\");\n      }\n    } catch (error) {\n      toast.error(\"An error occurred while resetting the password\");\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgKey}\n          title=\"Reset Password\"\n          onClose={onClose}\n          description={\n            newPassword\n              ? undefined\n              : `Are you sure you want to reset the password for ${user.email}?`\n          }\n        />\n        <Modal.Body>\n          {newPassword ? (\n            <div>\n              <Text as=\"p\">New Password:</Text>\n              <div className=\"flex items-center bg-background-tint-03 p-2 rounded gap-2\">\n                <Text as=\"p\" data-testid=\"new-password\" className=\"flex-grow\">\n                  {newPassword}\n                </Text>\n                <CopyIconButton getCopyText={() => newPassword} />\n              </div>\n              <Text as=\"p\" text02>\n                Please securely communicate this password to the user.\n              </Text>\n            </div>\n          ) : (\n            // TODO(@raunakab): migrate to opal Button once it supports ReactNode children\n            <Button\n              onClick={handleResetPassword}\n              disabled={isLoading}\n              leftIcon={SvgRefreshCw}\n            >\n              {isLoading ? (\n                <Text as=\"p\">\n                  <LoadingAnimation text=\"Resetting\" />\n                </Text>\n              ) : (\n                \"Reset Password\"\n              )}\n            </Button>\n          )}\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/users/SignedUpUserTable.tsx",
    "content": "\"use client\";\n\nimport {\n  type User,\n  UserRole,\n  InvitedUserSnapshot,\n  USER_ROLE_LABELS,\n} from \"@/lib/types\";\nimport { ReactNode, useEffect, useState } from \"react\";\nimport CenteredPageSelector from \"./CenteredPageSelector\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  Table,\n  TableHead,\n  TableRow,\n  TableBody,\n  TableCell,\n} from \"@/components/ui/table\";\nimport { TableHeader } from \"@/components/ui/table\";\nimport UserRoleDropdown from \"./buttons/UserRoleDropdown\";\nimport DeleteUserButton from \"./buttons/DeleteUserButton\";\nimport DeactivateUserButton from \"./buttons/DeactivateUserButton\";\nimport usePaginatedFetch from \"@/hooks/usePaginatedFetch\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { ErrorCallout } from \"@/components/ErrorCallout\";\nimport { InviteUserButton } from \"./buttons/InviteUserButton\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport {\n  Select,\n  SelectContent,\n  SelectTrigger,\n  SelectValue,\n} from \"@/components/ui/select\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { LeaveOrganizationButton } from \"./buttons/LeaveOrganizationButton\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport ResetPasswordModal from \"./ResetPasswordModal\";\nimport { LogOut, UserMinus } from \"lucide-react\";\nimport Popover from \"@/refresh-components/Popover\";\nimport { SvgKey, SvgMoreHorizontal } from \"@opal/icons\";\nimport { Button as OpalButton } from \"@opal/components\";\nconst ITEMS_PER_PAGE = 10;\nconst PAGES_PER_BATCH = 2;\n\ninterface ActionMenuProps {\n  user: User;\n  currentUser: User | null;\n  refresh: () => void;\n  invitedUsersMutate: () => void;\n  handleResetPassword: (user: User) => void;\n}\n\nexport interface SignedUpUserTableProps {\n  invitedUsers: InvitedUserSnapshot[];\n  q: string;\n  invitedUsersMutate: () => void;\n  countDisplay?: ReactNode;\n  onTotalItemsChange?: (count: number) => void;\n  onLoadingChange?: (isLoading: boolean) => void;\n}\n\nexport default function SignedUpUserTable({\n  invitedUsers,\n  q = \"\",\n  invitedUsersMutate,\n  countDisplay,\n  onTotalItemsChange,\n  onLoadingChange,\n}: SignedUpUserTableProps) {\n  const [filters, setFilters] = useState<{\n    is_active?: boolean;\n    roles?: UserRole[];\n  }>({});\n\n  const [selectedRoles, setSelectedRoles] = useState<UserRole[]>([]);\n  const [resetPasswordUser, setResetPasswordUser] = useState<User | null>(null);\n  const invitedEmails = invitedUsers.map((user) => user.email.toLowerCase());\n\n  const {\n    currentPageData: pageOfUsers,\n    isLoading,\n    error,\n    currentPage,\n    totalPages,\n    goToPage,\n    refresh,\n    totalItems,\n  } = usePaginatedFetch<User>({\n    itemsPerPage: ITEMS_PER_PAGE,\n    pagesPerBatch: PAGES_PER_BATCH,\n    endpoint: \"/api/manage/users/accepted\",\n    query: q,\n    filter: filters,\n  });\n\n  const { user: currentUser } = useUser();\n\n  useEffect(() => {\n    onLoadingChange?.(isLoading);\n  }, [isLoading, onLoadingChange]);\n\n  useEffect(() => {\n    if (pageOfUsers !== null) {\n      onTotalItemsChange?.(totalItems);\n    }\n  }, [pageOfUsers, totalItems, onTotalItemsChange]);\n\n  if (error) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Error loading users\"\n        errorMsg={error?.message}\n      />\n    );\n  }\n\n  const handlePopup = (message: string, type: \"success\" | \"error\") => {\n    if (type === \"success\") refresh();\n    if (type === \"success\") {\n      toast.success(message);\n    } else {\n      toast.error(message);\n    }\n  };\n\n  const onRoleChangeSuccess = () =>\n    handlePopup(\"User role updated successfully!\", \"success\");\n  const onRoleChangeError = (errorMsg: string) =>\n    handlePopup(`Unable to update user role - ${errorMsg}`, \"error\");\n\n  const toggleRole = (roleEnum: UserRole) => {\n    setFilters((prev) => {\n      const currentRoles = prev.roles || [];\n      const newRoles = currentRoles.includes(roleEnum)\n        ? currentRoles.filter((r) => r !== roleEnum) // Remove role if already selected\n        : [...currentRoles, roleEnum]; // Add role if not selected\n\n      setSelectedRoles(newRoles); // Update selected roles state\n      return {\n        ...prev,\n        roles: newRoles,\n      };\n    });\n  };\n\n  const removeRole = (roleEnum: UserRole) => {\n    setSelectedRoles((prev) => prev.filter((role) => role !== roleEnum)); // Remove role from selected roles\n    toggleRole(roleEnum); // Deselect the role in filters\n  };\n\n  const handleResetPassword = (user: User) => {\n    setResetPasswordUser(user);\n  };\n\n  // --------------\n  // Render Functions\n  // --------------\n\n  const renderFilters = () => (\n    <>\n      <div className=\"flex flex-wrap items-center justify-between gap-4 py-4\">\n        <div className=\"flex flex-wrap items-center gap-4\">\n          <InputSelect\n            value={filters.is_active?.toString() || \"all\"}\n            onValueChange={(selectedStatus) =>\n              setFilters((prev) => {\n                if (selectedStatus === \"all\") {\n                  const { is_active, ...rest } = prev;\n                  return rest;\n                }\n                return {\n                  ...prev,\n                  is_active: selectedStatus === \"true\",\n                };\n              })\n            }\n          >\n            <InputSelect.Trigger />\n\n            <InputSelect.Content>\n              <InputSelect.Item value=\"all\">All Status</InputSelect.Item>\n              <InputSelect.Item value=\"true\">Active</InputSelect.Item>\n              <InputSelect.Item value=\"false\">Inactive</InputSelect.Item>\n            </InputSelect.Content>\n          </InputSelect>\n\n          <Select value=\"roles\">\n            <SelectTrigger className=\"w-[260px] h-[34px] bg-neutral\">\n              <SelectValue>\n                {filters.roles?.length\n                  ? `${filters.roles.length} role(s) selected`\n                  : \"All Roles\"}\n              </SelectValue>\n            </SelectTrigger>\n            <SelectContent className=\"bg-background-tint-00\">\n              {Object.entries(USER_ROLE_LABELS)\n                .filter(([role]) => role !== UserRole.EXT_PERM_USER)\n                .map(([role, label]) => (\n                  <div\n                    key={role}\n                    className=\"flex items-center space-x-2 px-2 py-1.5 cursor-pointer hover:bg-background-200\"\n                    onClick={() => toggleRole(role as UserRole)}\n                  >\n                    <input\n                      type=\"checkbox\"\n                      checked={\n                        filters.roles?.includes(role as UserRole) || false\n                      }\n                      onChange={(e) => e.stopPropagation()}\n                    />\n                    <label className=\"text-sm font-normal\">{label}</label>\n                  </div>\n                ))}\n            </SelectContent>\n          </Select>\n        </div>\n        {countDisplay}\n      </div>\n      <div className=\"flex gap-2 py-1\">\n        {selectedRoles.map((role) => (\n          <button\n            key={role}\n            className=\"border border-background-300 bg-neutral p-1 rounded text-sm hover:bg-background-200\"\n            onClick={() => removeRole(role)}\n            style={{ padding: \"2px 8px\" }}\n          >\n            <span>{USER_ROLE_LABELS[role]}</span>\n            <span className=\"ml-3\">&times;</span>\n          </button>\n        ))}\n      </div>\n    </>\n  );\n\n  const renderUserRoleDropdown = (user: User) => {\n    if (user.role === UserRole.SLACK_USER) {\n      return <p className=\"ml-2\">Slack User</p>;\n    }\n    return (\n      <UserRoleDropdown\n        user={user}\n        onSuccess={onRoleChangeSuccess}\n        onError={onRoleChangeError}\n      />\n    );\n  };\n\n  const ActionMenu: React.FC<ActionMenuProps> = ({\n    user,\n    currentUser,\n    refresh,\n    invitedUsersMutate,\n    handleResetPassword,\n  }) => {\n    const buttonClassName = \"w-full\";\n\n    return (\n      <Popover>\n        <Popover.Trigger asChild>\n          <OpalButton prominence=\"secondary\" icon={SvgMoreHorizontal} />\n        </Popover.Trigger>\n        <Popover.Content>\n          <div className=\"grid gap-1\">\n            {NEXT_PUBLIC_CLOUD_ENABLED && user.id === currentUser?.id ? (\n              <LeaveOrganizationButton\n                user={user}\n                mutate={refresh}\n                className={buttonClassName}\n              >\n                <LogOut className=\"mr-2 h-4 w-4\" />\n                <span>Leave Organization</span>\n              </LeaveOrganizationButton>\n            ) : (\n              <>\n                {!user.is_active && (\n                  <DeleteUserButton\n                    user={user}\n                    mutate={refresh}\n                    className={buttonClassName}\n                  >\n                    <UserMinus className=\"mr-2 h-4 w-4\" />\n                    <span>Delete User</span>\n                  </DeleteUserButton>\n                )}\n                <DeactivateUserButton\n                  user={user}\n                  deactivate={user.is_active}\n                  mutate={refresh}\n                  className={buttonClassName}\n                >\n                  {/*<UserX className=\"mr-2 h-4 w-4\" />*/}\n                  {user.is_active ? \"Deactivate User\" : \"Activate User\"}\n                </DeactivateUserButton>\n              </>\n            )}\n            {user.password_configured && (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <Button\n                className={buttonClassName}\n                onClick={() => handleResetPassword(user)}\n                leftIcon={SvgKey}\n              >\n                Reset Password\n              </Button>\n            )}\n          </div>\n        </Popover.Content>\n      </Popover>\n    );\n  };\n\n  const renderActionButtons = (user: User) => {\n    return (\n      <div className=\"flex items-center justify-end gap-2\">\n        {user.role === UserRole.SLACK_USER && (\n          <InviteUserButton\n            user={user}\n            invited={invitedEmails.includes(user.email.toLowerCase())}\n            mutate={[refresh, invitedUsersMutate]}\n          />\n        )}\n        <ActionMenu\n          user={user}\n          currentUser={currentUser}\n          refresh={refresh}\n          invitedUsersMutate={invitedUsersMutate}\n          handleResetPassword={handleResetPassword}\n        />\n      </div>\n    );\n  };\n\n  return (\n    <>\n      {renderFilters()}\n      <Table className=\"overflow-visible\">\n        <TableHeader>\n          <TableRow>\n            <TableHead>Email</TableHead>\n            <TableHead className=\"text-center\">Role</TableHead>\n            <TableHead className=\"text-center\">Status</TableHead>\n            <TableHead>\n              <div className=\"flex\">\n                <div className=\"ml-auto\">Actions</div>\n              </div>\n            </TableHead>\n          </TableRow>\n        </TableHeader>\n        {isLoading ? (\n          <TableBody>\n            <TableRow>\n              <TableCell colSpan={4} className=\"text-center\">\n                <ThreeDotsLoader />\n              </TableCell>\n            </TableRow>\n          </TableBody>\n        ) : (\n          <TableBody>\n            {!pageOfUsers?.length ? (\n              <TableRow>\n                <TableCell colSpan={4} className=\"text-center\">\n                  <p className=\"pt-4 pb-4\">\n                    {filters.roles?.length || filters.is_active !== undefined\n                      ? \"No users found matching your filters\"\n                      : `No users found matching \"${q}\"`}\n                  </p>\n                </TableCell>\n              </TableRow>\n            ) : (\n              pageOfUsers.map((user) => (\n                <TableRow key={user.id}>\n                  <TableCell>{user.email}</TableCell>\n                  <TableCell className=\"w-[180px]\">\n                    {renderUserRoleDropdown(user)}\n                  </TableCell>\n                  <TableCell className=\"text-center w-[140px]\">\n                    <i>{user.is_active ? \"Active\" : \"Inactive\"}</i>\n                  </TableCell>\n                  <TableCell className=\"text-right  w-[300px] \">\n                    {renderActionButtons(user)}\n                  </TableCell>\n                </TableRow>\n              ))\n            )}\n          </TableBody>\n        )}\n      </Table>\n      {totalPages > 1 && (\n        <CenteredPageSelector\n          currentPage={currentPage}\n          totalPages={totalPages}\n          onPageChange={goToPage}\n        />\n      )}\n      {resetPasswordUser && (\n        <ResetPasswordModal\n          user={resetPasswordUser}\n          onClose={() => setResetPasswordUser(null)}\n        />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/admin/users/buttons/DeactivateUserButton.tsx",
    "content": "import { type User } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport useSWRMutation from \"swr/mutation\";\nimport userMutationFetcher from \"@/lib/admin/users/userMutationFetcher\";\nimport { SvgXCircle } from \"@opal/icons\";\nconst DeactivateUserButton = ({\n  user,\n  deactivate,\n  mutate,\n  className,\n  children,\n}: {\n  user: User;\n  deactivate: boolean;\n  mutate: () => void;\n  className?: string;\n  children?: string;\n}) => {\n  const { trigger, isMutating } = useSWRMutation(\n    deactivate\n      ? \"/api/manage/admin/deactivate-user\"\n      : \"/api/manage/admin/activate-user\",\n    userMutationFetcher,\n    {\n      onSuccess: () => {\n        mutate();\n        toast.success(`User ${deactivate ? \"deactivated\" : \"activated\"}!`);\n      },\n      onError: (errorMsg) => toast.error(errorMsg.message),\n    }\n  );\n  return (\n    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n    <Button\n      className={className}\n      onClick={() => trigger({ user_email: user.email })}\n      disabled={isMutating}\n      leftIcon={SvgXCircle}\n      tertiary\n    >\n      {children}\n    </Button>\n  );\n};\n\nexport default DeactivateUserButton;\n"
  },
  {
    "path": "web/src/components/admin/users/buttons/DeleteUserButton.tsx",
    "content": "import { type User } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport userMutationFetcher from \"@/lib/admin/users/userMutationFetcher\";\nimport useSWRMutation from \"swr/mutation\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { useState } from \"react\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\n\nconst DeleteUserButton = ({\n  user,\n  mutate,\n  className,\n  children,\n}: {\n  user: User;\n  mutate: () => void;\n  className?: string;\n  children?: React.ReactNode;\n}) => {\n  const { trigger, isMutating } = useSWRMutation(\n    \"/api/manage/admin/delete-user\",\n    userMutationFetcher,\n    {\n      onSuccess: () => {\n        mutate();\n        toast.success(\"User deleted successfully!\");\n      },\n      onError: (errorMsg) =>\n        toast.error(`Unable to delete user - ${errorMsg.message}`),\n    }\n  );\n\n  const [showDeleteModal, setShowDeleteModal] = useState(false);\n  return (\n    <>\n      {showDeleteModal && (\n        <ConfirmEntityModal\n          entityType=\"user\"\n          entityName={user.email}\n          onClose={() => setShowDeleteModal(false)}\n          onSubmit={() => trigger({ user_email: user.email, method: \"DELETE\" })}\n          additionalDetails=\"All data associated with this user will be deleted (including personas, tools and chat sessions).\"\n        />\n      )}\n\n      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n      <Button\n        className={className}\n        onClick={() => setShowDeleteModal(true)}\n        disabled={isMutating}\n        danger\n      >\n        {children}\n      </Button>\n    </>\n  );\n};\n\nexport default DeleteUserButton;\n"
  },
  {
    "path": "web/src/components/admin/users/buttons/InviteUserButton.tsx",
    "content": "import {\n  type InvitedUserSnapshot,\n  type AcceptedUserSnapshot,\n} from \"@/lib/types\";\n\nimport { toast } from \"@/hooks/useToast\";\nimport useSWRMutation from \"swr/mutation\";\nimport { Button } from \"@opal/components\";\nimport GenericConfirmModal from \"@/components/modals/GenericConfirmModal\";\nimport { useState } from \"react\";\n\nexport const InviteUserButton = ({\n  user,\n  invited,\n  mutate,\n}: {\n  user: AcceptedUserSnapshot | InvitedUserSnapshot;\n  invited: boolean;\n  mutate: (() => void) | (() => void)[];\n}) => {\n  const { trigger: inviteTrigger, isMutating: isInviting } = useSWRMutation(\n    \"/api/manage/admin/users\",\n    async (url, { arg }: { arg: { emails: string[] } }) => {\n      const response = await fetch(url, {\n        method: \"PUT\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify(arg),\n      });\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      return response.json();\n    },\n    {\n      onSuccess: () => {\n        setShowInviteModal(false);\n        if (typeof mutate === \"function\") {\n          mutate();\n        } else {\n          mutate.forEach((fn) => fn());\n        }\n        toast.success(\"User invited successfully!\");\n      },\n      onError: (errorMsg) => {\n        setShowInviteModal(false);\n        toast.error(`Unable to invite user - ${errorMsg}`);\n      },\n    }\n  );\n\n  const { trigger: uninviteTrigger, isMutating: isUninviting } = useSWRMutation(\n    \"/api/manage/admin/remove-invited-user\",\n    async (url, { arg }: { arg: { user_email: string } }) => {\n      const response = await fetch(url, {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify(arg),\n      });\n      if (!response.ok) {\n        throw new Error(await response.text());\n      }\n      return response.json();\n    },\n    {\n      onSuccess: () => {\n        setShowInviteModal(false);\n        if (typeof mutate === \"function\") {\n          mutate();\n        } else {\n          mutate.forEach((fn) => fn());\n        }\n        toast.success(\"User uninvited successfully!\");\n      },\n      onError: (errorMsg) => {\n        setShowInviteModal(false);\n        toast.error(`Unable to uninvite user - ${errorMsg}`);\n      },\n    }\n  );\n\n  const [showInviteModal, setShowInviteModal] = useState(false);\n\n  const handleConfirm = () => {\n    const normalizedEmail = user.email.toLowerCase();\n    if (invited) {\n      uninviteTrigger({ user_email: normalizedEmail });\n    } else {\n      inviteTrigger({ emails: [normalizedEmail] });\n    }\n  };\n\n  const isMutating = isInviting || isUninviting;\n\n  return (\n    <>\n      {showInviteModal && (\n        <GenericConfirmModal\n          title={`${invited ? \"Uninvite\" : \"Invite\"} User`}\n          message={`Are you sure you want to ${\n            invited ? \"uninvite\" : \"invite\"\n          } ${user.email}?`}\n          onClose={() => setShowInviteModal(false)}\n          onConfirm={handleConfirm}\n        />\n      )}\n\n      <Button disabled={isMutating} onClick={() => setShowInviteModal(true)}>\n        {invited ? \"Uninvite\" : \"Invite\"}\n      </Button>\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/components/admin/users/buttons/LeaveOrganizationButton.tsx",
    "content": "import { type User } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport userMutationFetcher from \"@/lib/admin/users/userMutationFetcher\";\nimport useSWRMutation from \"swr/mutation\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { useState } from \"react\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport { useRouter } from \"next/navigation\";\n\nexport const LeaveOrganizationButton = ({\n  user,\n  mutate,\n  className,\n  children,\n}: {\n  user: User;\n  mutate: () => void;\n  className?: string;\n  children?: React.ReactNode;\n}) => {\n  const router = useRouter();\n  const { trigger, isMutating } = useSWRMutation(\n    \"/api/tenants/leave-team\",\n    userMutationFetcher,\n    {\n      onSuccess: () => {\n        mutate();\n        toast.success(\"Successfully left the team!\");\n      },\n      onError: (errorMsg) => toast.error(`Unable to leave team - ${errorMsg}`),\n    }\n  );\n\n  const [showLeaveModal, setShowLeaveModal] = useState(false);\n\n  const handleLeaveOrganization = async () => {\n    await trigger({ user_email: user.email, method: \"POST\" });\n    router.push(\"/\");\n  };\n\n  return (\n    <>\n      {showLeaveModal && (\n        <ConfirmEntityModal\n          actionButtonText=\"Leave\"\n          entityType=\"team\"\n          entityName=\"your team\"\n          onClose={() => setShowLeaveModal(false)}\n          onSubmit={handleLeaveOrganization}\n          additionalDetails=\"You will lose access to all team data and resources.\"\n        />\n      )}\n\n      {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n      <Button\n        className={className}\n        onClick={() => setShowLeaveModal(true)}\n        disabled={isMutating}\n        internal\n      >\n        {children}\n      </Button>\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/components/admin/users/buttons/UserRoleDropdown.tsx",
    "content": "import {\n  type User,\n  UserRole,\n  USER_ROLE_LABELS,\n  INVALID_ROLE_HOVER_TEXT,\n} from \"@/lib/types\";\nimport userMutationFetcher from \"@/lib/admin/users/userMutationFetcher\";\nimport useSWRMutation from \"swr/mutation\";\n\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport GenericConfirmModal from \"@/components/modals/GenericConfirmModal\";\nimport { useState } from \"react\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\n\nexport interface UserRoleDropdownProps {\n  user: User;\n  onSuccess: () => void;\n  onError: (message: string) => void;\n}\n\nexport default function UserRoleDropdown({\n  user,\n  onSuccess,\n  onError,\n}: UserRoleDropdownProps) {\n  const [showConfirmModal, setShowConfirmModal] = useState(false);\n  const [pendingRole, setPendingRole] = useState<string | null>(null);\n\n  const { trigger: setUserRole, isMutating: isSettingRole } = useSWRMutation(\n    \"/api/manage/set-user-role\",\n    userMutationFetcher,\n    { onSuccess, onError }\n  );\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const handleChange = (value: string) => {\n    if (value === user.role) return;\n    if (user.role === UserRole.CURATOR) {\n      setShowConfirmModal(true);\n      setPendingRole(value);\n    } else {\n      setUserRole({\n        user_email: user.email,\n        new_role: value,\n      });\n    }\n  };\n\n  const handleConfirm = () => {\n    if (pendingRole) {\n      setUserRole({\n        user_email: user.email,\n        new_role: pendingRole,\n      });\n    }\n    setShowConfirmModal(false);\n    setPendingRole(null);\n  };\n\n  return (\n    <>\n      {showConfirmModal && (\n        <GenericConfirmModal\n          title=\"Change Curator Role\"\n          message={`Warning: Switching roles from Curator to ${\n            USER_ROLE_LABELS[pendingRole as UserRole] ??\n            USER_ROLE_LABELS[user.role]\n          } will remove their status as individual curators from all groups.`}\n          confirmText={`Switch Role to ${\n            USER_ROLE_LABELS[pendingRole as UserRole] ??\n            USER_ROLE_LABELS[user.role]\n          }`}\n          onClose={() => setShowConfirmModal(false)}\n          onConfirm={handleConfirm}\n        />\n      )}\n\n      <InputSelect\n        value={user.role}\n        onValueChange={handleChange}\n        disabled={isSettingRole}\n      >\n        <InputSelect.Trigger />\n\n        <InputSelect.Content>\n          {(Object.entries(USER_ROLE_LABELS) as [UserRole, string][]).map(\n            ([role, label]) => {\n              // Don't want to ever show external permissioned users because it's scary\n              if (role === UserRole.EXT_PERM_USER) return null;\n\n              // Only want to show limited users if paid enterprise features are enabled\n              // Also, dont want to show these other roles in general\n              const isNotVisibleRole =\n                (!isPaidEnterpriseFeaturesEnabled &&\n                  role === UserRole.GLOBAL_CURATOR) ||\n                role === UserRole.CURATOR ||\n                role === UserRole.LIMITED ||\n                role === UserRole.SLACK_USER;\n\n              // Always show the current role\n              const isCurrentRole = user.role === role;\n\n              return isNotVisibleRole && !isCurrentRole ? null : (\n                <InputSelect.Item\n                  key={role}\n                  value={role}\n                  data-testid={`user-role-dropdown-${role}`}\n                  title={INVALID_ROLE_HOVER_TEXT[role] ?? \"\"}\n                  data-tooltip-delay=\"0\"\n                >\n                  {label}\n                </InputSelect.Item>\n              );\n            }\n          )}\n        </InputSelect.Content>\n      </InputSelect>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/auth/AuthErrorDisplay.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst ERROR_MESSAGES = {\n  Anonymous: \"Your team does not have anonymous access enabled.\",\n};\n\nexport default function AuthErrorDisplay({\n  searchParams,\n}: {\n  searchParams: any;\n}) {\n  const error = searchParams?.error;\n\n  useEffect(() => {\n    if (error) {\n      toast.error(\n        ERROR_MESSAGES[error as keyof typeof ERROR_MESSAGES] ||\n          \"An error occurred.\"\n      );\n    }\n  }, [error]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/components/auth/AuthFlowContainer.tsx",
    "content": "import Link from \"next/link\";\nimport { OnyxIcon } from \"../icons/icons\";\n\nexport default function AuthFlowContainer({\n  children,\n  authState,\n  footerContent,\n}: {\n  children: React.ReactNode;\n  authState?: \"signup\" | \"login\" | \"join\";\n  footerContent?: React.ReactNode;\n}) {\n  return (\n    <div className=\"p-4 flex flex-col items-center justify-center min-h-screen bg-background\">\n      <div className=\"w-full max-w-md flex items-start flex-col bg-background-tint-00 rounded-16 shadow-lg shadow-02 p-6\">\n        <OnyxIcon size={44} className=\"text-theme-primary-05\" />\n        <div className=\"w-full mt-3\">{children}</div>\n      </div>\n      {authState === \"login\" && (\n        <div className=\"text-sm mt-6 text-center w-full text-text-03 mainUiBody mx-auto\">\n          {footerContent ?? (\n            <>\n              New to Onyx?{\" \"}\n              <Link\n                href=\"/auth/signup\"\n                className=\"text-text-05 mainUiAction underline transition-colors duration-200\"\n              >\n                Create an Account\n              </Link>\n            </>\n          )}\n        </div>\n      )}\n      {authState === \"signup\" && (\n        <div className=\"text-sm mt-6 text-center w-full text-text-03 mainUiBody mx-auto\">\n          Already have an account?{\" \"}\n          <Link\n            href=\"/auth/login?autoRedirectToSignup=false\"\n            className=\"text-text-05 mainUiAction underline transition-colors duration-200\"\n          >\n            Sign In\n          </Link>\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/chat/DynamicBottomSpacer.tsx",
    "content": "\"use client\";\n\nimport React, { useEffect, useRef, useCallback } from \"react\";\nimport { useCurrentChatState } from \"@/app/app/stores/useChatSessionStore\";\nimport { useScrollContainer } from \"@/components/chat/ScrollContainerContext\";\n\n// Small offset from the top of the scroll viewport where the anchor should appear.\n// The header is outside the scroll container, so when scrolled to the bottom\n// during the push-up effect, we only need minimal padding.\nconst ANCHOR_TOP_OFFSET_PX = 16;\n\n// Duration of smooth scroll animation (browser default is ~400-500ms, we add buffer)\nconst SMOOTH_SCROLL_DURATION_MS = 600;\n\n// How long to wait after content stops changing before deactivating\nconst CONTENT_SETTLED_DEBOUNCE_MS = 500;\n\nexport interface DynamicBottomSpacerProps {\n  /**\n   * Node ID of the anchor message (the new user message)\n   */\n  anchorNodeId?: number;\n}\n\n/**\n * DynamicBottomSpacer creates a \"fresh chat\" effect by filling the space\n * below messages to push content up when a new round starts.\n * Uses ResizeObserver to efficiently detect content changes instead of polling.\n */\nconst DynamicBottomSpacer = React.memo(\n  ({ anchorNodeId }: DynamicBottomSpacerProps) => {\n    const spacerRef = useRef<HTMLDivElement>(null);\n    const chatState = useCurrentChatState();\n    const isStreaming = chatState === \"streaming\" || chatState === \"loading\";\n\n    // Get scroll container refs from context (provided by ChatScrollContainer)\n    const { scrollContainerRef, contentWrapperRef, spacerHeightRef } =\n      useScrollContainer();\n\n    // Track state with refs to avoid re-renders\n    const isActiveRef = useRef(false);\n    const initialSpacerHeightRef = useRef(0);\n    const initialContentHeightRef = useRef(0);\n    const currentSpacerHeightRef = useRef(0);\n    const prevAnchorNodeIdRef = useRef<number | undefined>(undefined);\n    const wasStreamingRef = useRef(false);\n    const resizeObserverRef = useRef<ResizeObserver | null>(null);\n    const settledTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(\n      null\n    );\n\n    /**\n     * Set spacer height directly on DOM (no re-renders)\n     */\n    const setHeight = useCallback(\n      (height: number) => {\n        const h = Math.max(0, Math.round(height));\n        currentSpacerHeightRef.current = h;\n        spacerHeightRef.current = h;\n        if (spacerRef.current) {\n          spacerRef.current.style.height = `${h}px`;\n        }\n      },\n      [spacerHeightRef]\n    );\n\n    /**\n     * Get the scroll container element from context ref\n     */\n    const getScrollContainer = useCallback(() => {\n      return scrollContainerRef.current;\n    }, [scrollContainerRef]);\n\n    /**\n     * Get content height (total scrollHeight minus current spacer height)\n     */\n    const getContentHeight = useCallback(() => {\n      const scrollContainer = getScrollContainer();\n      if (!scrollContainer) return 0;\n      return scrollContainer.scrollHeight - currentSpacerHeightRef.current;\n    }, [getScrollContainer]);\n\n    /**\n     * Update spacer height based on content growth\n     */\n    const updateSpacerHeight = useCallback(() => {\n      if (!isActiveRef.current) return;\n\n      const currentContentHeight = getContentHeight();\n      const contentGrowth =\n        currentContentHeight - initialContentHeightRef.current;\n\n      // New spacer height = initial spacer - content growth\n      const newHeight = initialSpacerHeightRef.current - contentGrowth;\n\n      if (newHeight <= 0) {\n        setHeight(0);\n        isActiveRef.current = false;\n      } else {\n        setHeight(newHeight);\n      }\n    }, [setHeight, getContentHeight]);\n\n    /**\n     * Stop observing and clean up\n     */\n    const stopObserving = useCallback(() => {\n      if (resizeObserverRef.current) {\n        resizeObserverRef.current.disconnect();\n        resizeObserverRef.current = null;\n      }\n      if (settledTimeoutRef.current) {\n        clearTimeout(settledTimeoutRef.current);\n        settledTimeoutRef.current = null;\n      }\n    }, []);\n\n    /**\n     * Start observing content changes with ResizeObserver\n     */\n    const startObserving = useCallback(() => {\n      const scrollContainer = getScrollContainer();\n      if (!scrollContainer || resizeObserverRef.current) return;\n\n      resizeObserverRef.current = new ResizeObserver(() => {\n        // Content size changed - update spacer\n        updateSpacerHeight();\n\n        // Reset the \"settled\" timeout - content is still changing\n        if (settledTimeoutRef.current) {\n          clearTimeout(settledTimeoutRef.current);\n        }\n\n        // After content stops changing for CONTENT_SETTLED_DEBOUNCE_MS, deactivate\n        settledTimeoutRef.current = setTimeout(() => {\n          // Only deactivate if streaming has ended\n          if (!wasStreamingRef.current) {\n            isActiveRef.current = false;\n            stopObserving();\n          }\n        }, CONTENT_SETTLED_DEBOUNCE_MS);\n      });\n\n      // Observe the content wrapper using context ref\n      if (contentWrapperRef.current) {\n        resizeObserverRef.current.observe(contentWrapperRef.current);\n      }\n    }, [\n      getScrollContainer,\n      updateSpacerHeight,\n      stopObserving,\n      contentWrapperRef,\n    ]);\n\n    /**\n     * Activate the spacer - calculate initial height and scroll to bottom\n     */\n    const activate = useCallback(() => {\n      if (!anchorNodeId) return;\n\n      // If already active, stop the current observation to restart fresh\n      if (isActiveRef.current) {\n        stopObserving();\n        isActiveRef.current = false;\n      }\n\n      const anchor = document.getElementById(`message-${anchorNodeId}`);\n      if (!anchor) return;\n\n      const scrollContainer = getScrollContainer();\n      if (!scrollContainer) return;\n\n      // Get measurements first (before modifying spacer)\n      const viewportHeight = scrollContainer.clientHeight;\n      const currentSpacerHeight = currentSpacerHeightRef.current;\n\n      // Calculate content height (scrollHeight minus current spacer)\n      const contentHeight = scrollContainer.scrollHeight - currentSpacerHeight;\n\n      // Calculate anchor's position using getBoundingClientRect for accuracy\n      const containerRect = scrollContainer.getBoundingClientRect();\n      const anchorRect = anchor.getBoundingClientRect();\n\n      // Anchor's visual offset from the scroll container's top edge\n      const anchorVisualOffset = anchorRect.top - containerRect.top;\n\n      // Anchor's absolute position in the scrollable content\n      const anchorOffsetInContent =\n        anchorVisualOffset + scrollContainer.scrollTop;\n\n      // Calculate spacer height needed to position anchor just below the top offset\n      // when scrolled to the absolute bottom.\n      const spacerHeight =\n        anchorOffsetInContent -\n        contentHeight +\n        viewportHeight -\n        ANCHOR_TOP_OFFSET_PX;\n\n      // If spacer height is <= 0, no push-up effect is needed.\n      // This naturally handles new chats and short conversations where\n      // the anchor is already positioned appropriately.\n      if (spacerHeight <= 0) return;\n\n      // Store initial content height for tracking content growth during streaming\n      initialContentHeightRef.current = contentHeight;\n      initialSpacerHeightRef.current = spacerHeight;\n      isActiveRef.current = true;\n\n      // Set the spacer height\n      setHeight(spacerHeight);\n\n      // Tell ChatScrollContainer to not do instant auto-scroll during smooth scroll\n      scrollContainer.dataset.smoothScrollActive = \"true\";\n\n      // Start observing content changes\n      startObserving();\n\n      // Scroll to bottom smoothly (after spacer height is applied)\n      requestAnimationFrame(() => {\n        requestAnimationFrame(() => {\n          scrollContainer.scrollTo({\n            top: scrollContainer.scrollHeight - scrollContainer.clientHeight,\n            behavior: \"smooth\",\n          });\n\n          // Clear the flag after smooth scroll completes and force\n          // ChatScrollContainer to refresh scroll state (button visibility,\n          // fades). The MutationObserver doesn't observe attribute changes,\n          // so we dispatch a synthetic scroll event.\n          setTimeout(() => {\n            scrollContainer.dataset.smoothScrollActive = \"false\";\n            scrollContainer.dispatchEvent(new Event(\"scroll\"));\n          }, SMOOTH_SCROLL_DURATION_MS);\n        });\n      });\n    }, [\n      anchorNodeId,\n      setHeight,\n      getScrollContainer,\n      startObserving,\n      stopObserving,\n    ]);\n\n    /**\n     * Main effect: detect streaming start/stop and anchor changes\n     */\n    useEffect(() => {\n      const anchorChanged = prevAnchorNodeIdRef.current !== anchorNodeId;\n      const streamingStarted = isStreaming && !wasStreamingRef.current;\n\n      prevAnchorNodeIdRef.current = anchorNodeId;\n      wasStreamingRef.current = isStreaming;\n\n      // Activate when: new anchor appears while streaming, or streaming starts with anchor\n      if (\n        (anchorChanged && anchorNodeId && isStreaming) ||\n        (streamingStarted && anchorNodeId)\n      ) {\n        requestAnimationFrame(() => {\n          activate();\n        });\n      }\n\n      // Note: smoothScrollActive is cleared by the 600ms timeout inside activate().\n      // We intentionally do NOT clear it when streaming ends — for fast responses,\n      // streaming can end before the smooth scroll animation completes, which would\n      // remove the suppression too early and flash the scroll-to-bottom button.\n    }, [anchorNodeId, isStreaming, activate]);\n\n    /**\n     * Reset when anchor is cleared\n     */\n    useEffect(() => {\n      if (!anchorNodeId) {\n        setHeight(0);\n        isActiveRef.current = false;\n        initialSpacerHeightRef.current = 0;\n        initialContentHeightRef.current = 0;\n        stopObserving();\n      }\n    }, [anchorNodeId, setHeight, stopObserving]);\n\n    /**\n     * Cleanup on unmount\n     */\n    useEffect(() => {\n      return () => {\n        stopObserving();\n        const scrollContainer = getScrollContainer();\n        if (scrollContainer) {\n          scrollContainer.dataset.smoothScrollActive = \"false\";\n        }\n      };\n    }, [getScrollContainer, stopObserving]);\n\n    return (\n      <div\n        ref={spacerRef}\n        data-dynamic-spacer=\"true\"\n        aria-hidden=\"true\"\n        className=\"w-full\"\n        style={{\n          height: \"0px\",\n          flexShrink: 0,\n        }}\n      />\n    );\n  }\n);\n\nDynamicBottomSpacer.displayName = \"DynamicBottomSpacer\";\n\nexport default DynamicBottomSpacer;\n"
  },
  {
    "path": "web/src/components/chat/FederatedOAuthModal.tsx",
    "content": "\"use client\";\n\nimport { useContext, useState } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport useFederatedOAuthStatus from \"@/hooks/useFederatedOAuthStatus\";\nimport { SvgLink } from \"@opal/icons\";\nimport { Card } from \"@/refresh-components/cards\";\nimport { ContentAction } from \"@opal/layouts\";\n\nexport interface FederatedConnectorOAuthStatus {\n  federated_connector_id: number;\n  source: string;\n  name: string;\n  has_oauth_token: boolean;\n  oauth_token_expires_at?: string;\n  authorize_url?: string;\n}\n\nconst MAX_SKIP_COUNT = 2;\n\nfunction useFederatedOauthModal() {\n  // Check localStorage for previous skip preference and count\n  const [oAuthModalState, setOAuthModalState] = useState<{\n    hidden: boolean;\n    skipCount: number;\n  }>(() => {\n    if (typeof window !== \"undefined\") {\n      const skipData = localStorage.getItem(\"federatedOAuthModalSkipData\");\n      if (skipData) {\n        try {\n          const parsed = JSON.parse(skipData);\n          // Check if we're still within the hide duration (1 hour)\n          const now = Date.now();\n          const hideUntil = parsed.hideUntil || 0;\n          const isWithinHideDuration = now < hideUntil;\n\n          return {\n            hidden: parsed.permanentlyHidden || isWithinHideDuration,\n            skipCount: parsed.skipCount || 0,\n          };\n        } catch {\n          return { hidden: false, skipCount: 0 };\n        }\n      }\n    }\n    return { hidden: false, skipCount: 0 };\n  });\n\n  const handleOAuthModalSkip = () => {\n    if (typeof window !== \"undefined\") {\n      const newSkipCount = oAuthModalState.skipCount + 1;\n\n      if (newSkipCount >= MAX_SKIP_COUNT) {\n        // Permanently hide the modal after max skips\n        const skipData = {\n          skipCount: newSkipCount,\n          hideUntil: 0,\n          permanentlyHidden: true,\n        };\n\n        localStorage.setItem(\n          \"federatedOAuthModalSkipData\",\n          JSON.stringify(skipData)\n        );\n\n        setOAuthModalState({\n          hidden: true,\n          skipCount: newSkipCount,\n        });\n      } else {\n        // Hide for 1 hour after first skip\n        const oneHourFromNow = Date.now() + 60 * 60 * 1000;\n\n        const skipData = {\n          skipCount: newSkipCount,\n          hideUntil: oneHourFromNow,\n          permanentlyHidden: false,\n        };\n\n        localStorage.setItem(\n          \"federatedOAuthModalSkipData\",\n          JSON.stringify(skipData)\n        );\n\n        setOAuthModalState({\n          hidden: true,\n          skipCount: newSkipCount,\n        });\n      }\n    }\n  };\n\n  return {\n    oAuthModalState,\n    handleOAuthModalSkip,\n  };\n}\n\nexport default function FederatedOAuthModal() {\n  const settings = useContext(SettingsContext);\n\n  const {\n    oAuthModalState: { hidden },\n    handleOAuthModalSkip,\n  } = useFederatedOauthModal();\n\n  const { connectors: federatedConnectors, hasUnauthenticatedConnectors } =\n    useFederatedOAuthStatus();\n\n  const needsAuth = federatedConnectors.filter((c) => !c.has_oauth_token);\n\n  if (needsAuth.length === 0 || hidden || !hasUnauthenticatedConnectors) {\n    return null;\n  }\n\n  const applicationName =\n    settings?.enterpriseSettings?.application_name || \"Onyx\";\n\n  return (\n    <Modal open>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgLink}\n          title=\"Connect Your Apps\"\n          description={`Improve answer quality by letting ${applicationName} search all your connected data.`}\n        />\n        <Modal.Body>\n          {needsAuth.map((connector) => {\n            const sourceMetadata = getSourceMetadata(\n              connector.source as ValidSources\n            );\n\n            return (\n              <Card key={connector.federated_connector_id}>\n                <ContentAction\n                  icon={sourceMetadata.icon}\n                  title={sourceMetadata.displayName}\n                  description={sourceMetadata.category}\n                  sizePreset=\"main-content\"\n                  variant=\"section\"\n                  rightChildren={\n                    <Button\n                      prominence=\"secondary\"\n                      target=\"_blank\"\n                      href={connector.authorize_url}\n                    >\n                      Connect\n                    </Button>\n                  }\n                />\n              </Card>\n            );\n          })}\n        </Modal.Body>\n        <Modal.Footer>\n          <Button onClick={handleOAuthModalSkip}>Skip for now</Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/chat/MCPApiKeyModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { Input } from \"@/components/ui/input\";\nimport Label from \"@/refresh-components/form/Label\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgAlertCircle, SvgEye, SvgEyeClosed, SvgKey } from \"@opal/icons\";\ninterface MCPAuthTemplate {\n  headers: Array<{ name: string; value: string }>;\n  request_body_params: Array<{ path: string; value: string }>;\n  required_fields: string[];\n}\n\ninterface MCPApiKeyModalProps {\n  isOpen: boolean;\n  onClose: () => void;\n  serverName: string;\n  serverId: number;\n  authTemplate?: MCPAuthTemplate;\n  onSubmit: (serverId: number, apiKey: string) => void;\n  onSubmitCredentials?: (\n    serverId: number,\n    credentials: Record<string, string>\n  ) => void;\n  onSuccess?: () => void;\n  isAuthenticated?: boolean;\n  existingCredentials?: Record<string, string>;\n}\n\nexport default function MCPApiKeyModal({\n  isOpen,\n  onClose,\n  serverName,\n  serverId,\n  authTemplate,\n  onSubmit,\n  onSubmitCredentials,\n  onSuccess,\n  isAuthenticated = false,\n  existingCredentials,\n}: MCPApiKeyModalProps) {\n  const [apiKey, setApiKey] = useState(\"\");\n  const [showApiKey, setShowApiKey] = useState(false);\n  const [credentials, setCredentials] = useState<Record<string, string>>({});\n  const [showCredentials, setShowCredentials] = useState<\n    Record<string, boolean>\n  >({});\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const isTemplateMode =\n    authTemplate && authTemplate.required_fields.length > 0;\n\n  // Initialize form with existing credentials when modal opens\n  useEffect(() => {\n    if (isOpen && existingCredentials) {\n      if (isTemplateMode) {\n        // For template mode, set the credentials object\n        setCredentials(existingCredentials);\n      } else {\n        // For legacy API key mode, set the api_key field\n        const apiKeyValue = existingCredentials.api_key || \"\";\n        setApiKey(apiKeyValue);\n      }\n    }\n  }, [isOpen, existingCredentials, isTemplateMode]);\n\n  const handleSubmit = async (e: React.FormEvent) => {\n    e.preventDefault();\n    setError(null); // Clear any previous errors\n\n    if (isTemplateMode) {\n      // Check all required fields are filled\n      const hasAllFields = authTemplate!.required_fields.every(\n        (field) => credentials[field]?.trim()\n      );\n      if (!hasAllFields) return;\n\n      setIsSubmitting(true);\n      try {\n        if (onSubmitCredentials) {\n          await onSubmitCredentials(serverId, credentials);\n        }\n        setCredentials({});\n        if (onSuccess) {\n          onSuccess();\n        }\n        onClose();\n      } catch (error) {\n        console.error(\"Error submitting credentials:\", error);\n        let errorMessage = \"Failed to save credentials\";\n        if (error instanceof Error) {\n          errorMessage = error.message;\n        } else if (typeof error === \"string\") {\n          errorMessage = error;\n        }\n        setError(errorMessage);\n      } finally {\n        setIsSubmitting(false);\n      }\n    } else {\n      // Legacy API key mode\n      if (!apiKey.trim()) return;\n\n      setIsSubmitting(true);\n      try {\n        await onSubmit(serverId, apiKey);\n        setApiKey(\"\");\n        if (onSuccess) {\n          onSuccess();\n        }\n        onClose();\n      } catch (error) {\n        console.error(\"Error submitting API key:\", error);\n        let errorMessage = \"Failed to save API key\";\n        if (error instanceof Error) {\n          errorMessage = error.message;\n        } else if (typeof error === \"string\") {\n          errorMessage = error;\n        }\n        setError(errorMessage);\n      } finally {\n        setIsSubmitting(false);\n      }\n    }\n  };\n\n  const handleClose = () => {\n    setApiKey(\"\");\n    setShowApiKey(false);\n    setCredentials({});\n    setShowCredentials({});\n    setError(null);\n    onClose();\n  };\n\n  const toggleCredentialVisibility = (field: string) => {\n    setShowCredentials((prev) => ({\n      ...prev,\n      [field]: !prev[field],\n    }));\n  };\n\n  const updateCredential = (field: string, value: string) => {\n    setCredentials((prev) => ({\n      ...prev,\n      [field]: value,\n    }));\n  };\n\n  const credsType = isTemplateMode ? \"Credentials\" : \"API Key\";\n  return (\n    <Modal open={isOpen} onOpenChange={handleClose}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header\n          icon={SvgKey}\n          title={isAuthenticated ? `Manage ${credsType}` : `Enter ${credsType}`}\n          onClose={handleClose}\n        />\n        <Modal.Body>\n          <Text as=\"p\">\n            {isAuthenticated\n              ? `Update your ${credsType} for ${serverName}.`\n              : `Enter your ${credsType} for ${serverName} to enable authentication.`}\n          </Text>\n          <Text as=\"p\" text02>\n            {isAuthenticated\n              ? \"Changes will be validated against the server before being saved.\"\n              : `Your ${credsType} will be validated against the server and stored securely.`}\n          </Text>\n\n          {error && (\n            <div className=\"flex items-center space-x-2 p-3 bg-red-50 border border-red-200 rounded-md text-red-800 text-sm\">\n              <SvgAlertCircle className=\"h-4 w-4 flex-shrink-0\" />\n              <span>{error}</span>\n            </div>\n          )}\n\n          <form onSubmit={handleSubmit} className=\"space-y-4\">\n            {isTemplateMode ? (\n              // Template-based credential fields\n              <div className=\"space-y-4\">\n                {authTemplate!.required_fields.map((field) => (\n                  <div key={field} className=\"space-y-2\">\n                    <Label name={field}>\n                      <Text>\n                        {field\n                          .replace(/_/g, \" \")\n                          .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                      </Text>\n                    </Label>\n                    <div className=\"relative\">\n                      <Input\n                        id={field}\n                        type={showCredentials[field] ? \"text\" : \"password\"}\n                        value={credentials[field] || \"\"}\n                        onChange={(e) =>\n                          updateCredential(field, e.target.value)\n                        }\n                        placeholder={`Enter your ${field.replace(/_/g, \" \")}`}\n                        className=\"pr-10\"\n                        required\n                      />\n                      <button\n                        type=\"button\"\n                        onClick={() => toggleCredentialVisibility(field)}\n                        className=\"absolute right-3 top-1/2 -translate-y-1/2 text-subtle hover:text-emphasis\"\n                      >\n                        {showCredentials[field] ? (\n                          <SvgEyeClosed className=\"h-4 w-4\" />\n                        ) : (\n                          <SvgEye className=\"h-4 w-4\" />\n                        )}\n                      </button>\n                    </div>\n                  </div>\n                ))}\n              </div>\n            ) : (\n              // Legacy API key field\n              <div className=\"space-y-2\">\n                <Label name=\"apiKey\">\n                  <Text>{credsType}</Text>\n                </Label>\n                <div className=\"relative\">\n                  <Input\n                    id=\"apiKey\"\n                    type={showApiKey ? \"text\" : \"password\"}\n                    value={apiKey}\n                    onChange={(e) => setApiKey(e.target.value)}\n                    placeholder={`Enter your ${credsType}`}\n                    className=\"pr-10\"\n                    required\n                  />\n                  <button\n                    type=\"button\"\n                    onClick={() => setShowApiKey(!showApiKey)}\n                    className=\"absolute right-3 top-1/2 -translate-y-1/2 text-subtle hover:text-emphasis\"\n                  >\n                    {showApiKey ? (\n                      <SvgEyeClosed className=\"h-4 w-4\" />\n                    ) : (\n                      <SvgEye className=\"h-4 w-4\" />\n                    )}\n                  </button>\n                </div>\n              </div>\n            )}\n\n            <div className=\"flex justify-end space-x-2 pt-4\">\n              <Button\n                disabled={isSubmitting}\n                prominence=\"secondary\"\n                onClick={handleClose}\n              >\n                Cancel\n              </Button>\n              <Button\n                disabled={\n                  isSubmitting ||\n                  (isTemplateMode\n                    ? !authTemplate!.required_fields.every(\n                        (field) => credentials[field]?.trim()\n                      )\n                    : !apiKey.trim())\n                }\n                type=\"submit\"\n              >\n                {isSubmitting\n                  ? \"Saving...\"\n                  : isAuthenticated\n                    ? `Update ${credsType}`\n                    : `Save ${credsType}`}\n              </Button>\n            </div>\n          </form>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/chat/MinimalMarkdown.test.tsx",
    "content": "import { render, screen } from \"@testing-library/react\";\nimport \"@testing-library/jest-dom\";\nimport MinimalMarkdown from \"./MinimalMarkdown\";\n\ndescribe(\"MinimalMarkdown\", () => {\n  describe(\"Link handling\", () => {\n    test(\"converts bare email markdown links to mailto links\", () => {\n      render(\n        <MinimalMarkdown content=\"[support@anthropic.com](support@anthropic.com)\" />\n      );\n\n      const link = screen.getByText(\"support@anthropic.com\").closest(\"a\");\n      expect(link).toHaveAttribute(\"href\", \"mailto:support@anthropic.com\");\n    });\n\n    test(\"preserves explicit mailto links\", () => {\n      render(\n        <MinimalMarkdown content=\"[support@anthropic.com](mailto:support@anthropic.com)\" />\n      );\n\n      const link = screen.getByText(\"support@anthropic.com\").closest(\"a\");\n      expect(link).toHaveAttribute(\"href\", \"mailto:support@anthropic.com\");\n    });\n\n    test(\"does not restore hrefs removed by url sanitization\", () => {\n      render(<MinimalMarkdown content=\"[click](javascript:alert(1))\" />);\n\n      const link = screen.getByText(\"click\").closest(\"a\");\n      expect(link).not.toHaveAttribute(\"href\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/components/chat/MinimalMarkdown.tsx",
    "content": "import { CodeBlock } from \"@/app/app/message/CodeBlock\";\nimport { extractCodeText } from \"@/app/app/message/codeUtils\";\nimport {\n  MemoizedLink,\n  MemoizedParagraph,\n} from \"@/app/app/message/MemoizedTextComponents\";\nimport { useMemo, CSSProperties } from \"react\";\nimport ReactMarkdown, { type Components } from \"react-markdown\";\nimport remarkGfm from \"remark-gfm\";\nimport rehypeHighlight from \"rehype-highlight\";\nimport remarkMath from \"remark-math\";\nimport rehypeKatex from \"rehype-katex\";\nimport \"katex/dist/katex.min.css\";\nimport { cn, transformLinkUri } from \"@/lib/utils\";\n\ntype MinimalMarkdownComponentOverrides = Partial<Components>;\n\ninterface MinimalMarkdownProps {\n  content: string;\n  className?: string;\n  showHeader?: boolean;\n  /**\n   * Override specific markdown renderers.\n   * Any renderer not provided will fall back to this component's defaults.\n   */\n  components?: MinimalMarkdownComponentOverrides;\n}\n\nexport default function MinimalMarkdown({\n  content,\n  className = \"\",\n  showHeader = true,\n  components,\n}: MinimalMarkdownProps) {\n  const markdownComponents = useMemo(() => {\n    const defaults: Components = {\n      a: MemoizedLink,\n      p: MemoizedParagraph,\n      pre: ({ node, className, children }: any) => {\n        // Don't render the pre wrapper - CodeBlock handles its own wrapper\n        return <>{children}</>;\n      },\n      code: ({ node, inline, className, children, ...props }: any) => {\n        const codeText = extractCodeText(node, content, children);\n        return (\n          <CodeBlock\n            className={className}\n            codeText={codeText}\n            showHeader={showHeader}\n          >\n            {children}\n          </CodeBlock>\n        );\n      },\n    };\n\n    return {\n      ...defaults,\n      ...(components ?? {}),\n    } satisfies Components;\n  }, [content, components, showHeader]);\n\n  return (\n    <ReactMarkdown\n      className={cn(\n        \"prose dark:prose-invert max-w-full text-sm break-words\",\n        className\n      )}\n      components={markdownComponents}\n      rehypePlugins={[rehypeHighlight, rehypeKatex]}\n      remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}\n      urlTransform={transformLinkUri}\n    >\n      {content}\n    </ReactMarkdown>\n  );\n}\n"
  },
  {
    "path": "web/src/components/chat/ProviderContext.tsx",
    "content": "\"use client\";\nimport {\n  WellKnownLLMProviderDescriptor,\n  LLMProviderDescriptor,\n} from \"@/interfaces/llm\";\nimport React, {\n  createContext,\n  useContext,\n  useState,\n  useEffect,\n  useCallback,\n} from \"react\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { useLLMProviderOptions } from \"@/lib/hooks/useLLMProviderOptions\";\nimport { testDefaultProvider as testDefaultProviderSvc } from \"@/lib/llmConfig/svc\";\n\ninterface ProviderContextType {\n  shouldShowConfigurationNeeded: boolean;\n  providerOptions: WellKnownLLMProviderDescriptor[];\n  refreshProviderInfo: () => Promise<void>;\n  // Expose configured provider instances for components that need it (e.g., onboarding)\n  llmProviders: LLMProviderDescriptor[] | undefined;\n  isLoadingProviders: boolean;\n  hasProviders: boolean;\n}\n\nconst ProviderContext = createContext<ProviderContextType | undefined>(\n  undefined\n);\n\nconst DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY = \"defaultLlmProviderTestComplete\";\n\nfunction checkDefaultLLMProviderTestComplete() {\n  if (typeof window === \"undefined\") return true;\n  return (\n    localStorage.getItem(DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY) === \"true\"\n  );\n}\n\nfunction setDefaultLLMProviderTestComplete() {\n  if (typeof window === \"undefined\") return;\n  localStorage.setItem(DEFAULT_LLM_PROVIDER_TEST_COMPLETE_KEY, \"true\");\n}\n\nexport function ProviderContextProvider({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  const { user } = useUser();\n\n  // Use SWR hooks instead of raw fetch\n  const {\n    llmProviders,\n    isLoading: isLoadingProviders,\n    refetch: refetchProviders,\n  } = useLLMProviders();\n  const { llmProviderOptions: providerOptions, refetch: refetchOptions } =\n    useLLMProviderOptions();\n\n  const [defaultCheckSuccessful, setDefaultCheckSuccessful] =\n    useState<boolean>(true);\n\n  // Test the default provider - only runs if test hasn't passed yet\n  const testDefaultProvider = useCallback(async () => {\n    const shouldCheck =\n      !checkDefaultLLMProviderTestComplete() &&\n      (!user || user.role === \"admin\");\n\n    if (shouldCheck) {\n      const success = await testDefaultProviderSvc();\n      setDefaultCheckSuccessful(success);\n      if (success) {\n        setDefaultLLMProviderTestComplete();\n      }\n    }\n  }, [user]);\n\n  // Test default provider on mount\n  useEffect(() => {\n    testDefaultProvider();\n  }, [testDefaultProvider]);\n\n  const hasProviders = (llmProviders?.length ?? 0) > 0;\n  const validProviderExists = hasProviders && defaultCheckSuccessful;\n\n  const shouldShowConfigurationNeeded =\n    !validProviderExists && (providerOptions?.length ?? 0) > 0;\n\n  const refreshProviderInfo = useCallback(async () => {\n    // Refetch provider lists and re-test default provider if needed\n    await Promise.all([\n      refetchProviders(),\n      refetchOptions(),\n      testDefaultProvider(),\n    ]);\n  }, [refetchProviders, refetchOptions, testDefaultProvider]);\n\n  return (\n    <ProviderContext.Provider\n      value={{\n        shouldShowConfigurationNeeded,\n        providerOptions: providerOptions ?? [],\n        refreshProviderInfo,\n        llmProviders,\n        isLoadingProviders,\n        hasProviders,\n      }}\n    >\n      {children}\n    </ProviderContext.Provider>\n  );\n}\n\nexport function useProviderStatus() {\n  const context = useContext(ProviderContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useProviderStatus must be used within a ProviderContextProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/components/chat/ScrollContainerContext.tsx",
    "content": "\"use client\";\n\nimport React, {\n  createContext,\n  useContext,\n  useMemo,\n  RefObject,\n  MutableRefObject,\n} from \"react\";\n\ninterface ScrollContainerContextType {\n  scrollContainerRef: RefObject<HTMLDivElement | null>;\n  contentWrapperRef: RefObject<HTMLDivElement | null>;\n  /** Shared ref for the DynamicBottomSpacer's current height (written by spacer, read by scroll container). */\n  spacerHeightRef: MutableRefObject<number>;\n}\n\nconst ScrollContainerContext = createContext<\n  ScrollContainerContextType | undefined\n>(undefined);\n\nexport function ScrollContainerProvider({\n  children,\n  scrollContainerRef,\n  contentWrapperRef,\n  spacerHeightRef,\n}: {\n  children: React.ReactNode;\n  scrollContainerRef: RefObject<HTMLDivElement | null>;\n  contentWrapperRef: RefObject<HTMLDivElement | null>;\n  spacerHeightRef: MutableRefObject<number>;\n}) {\n  // Memoize context value to prevent unnecessary re-renders of consumers.\n  // The refs themselves are stable, but without memoization, a new object\n  // would be created on every parent re-render.\n  const value = useMemo(\n    () => ({ scrollContainerRef, contentWrapperRef, spacerHeightRef }),\n    [scrollContainerRef, contentWrapperRef, spacerHeightRef]\n  );\n\n  return (\n    <ScrollContainerContext.Provider value={value}>\n      {children}\n    </ScrollContainerContext.Provider>\n  );\n}\n\n/**\n * Hook to access the scroll container and content wrapper refs.\n * Must be used within a ScrollContainerProvider (inside ChatScrollContainer).\n */\nexport function useScrollContainer() {\n  const context = useContext(ScrollContainerContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useScrollContainer must be used within a ScrollContainerProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/components/context/EmbeddingContext.tsx",
    "content": "import React, {\n  createContext,\n  useState,\n  useContext,\n  ReactNode,\n  useEffect,\n} from \"react\";\nimport { usePathname, useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\n\ninterface EmbeddingFormContextType {\n  formStep: number;\n  formValues: Record<string, any>;\n  setFormValues: (values: Record<string, any>) => void;\n  nextFormStep: (contract?: string) => void;\n  prevFormStep: () => void;\n  formStepToLast: () => void;\n  setFormStep: React.Dispatch<React.SetStateAction<number>>;\n  allowAdvanced: boolean;\n  setAllowAdvanced: React.Dispatch<React.SetStateAction<boolean>>;\n  allowCreate: boolean;\n  setAllowCreate: React.Dispatch<React.SetStateAction<boolean>>;\n}\n\nconst EmbeddingFormContext = createContext<\n  EmbeddingFormContextType | undefined\n>(undefined);\n\nexport const EmbeddingFormProvider: React.FC<{\n  children: ReactNode;\n}> = ({ children }) => {\n  const router = useRouter();\n  const searchParams = useSearchParams();\n  const pathname = usePathname();\n\n  // Initialize formStep based on the URL parameter\n  const stepFromUrl = parseInt(searchParams?.get(\"step\") || \"0\", 10);\n  const [formStep, setFormStep] = useState(stepFromUrl);\n  const [formValues, setFormValues] = useState<Record<string, any>>({});\n\n  const [allowAdvanced, setAllowAdvanced] = useState(false);\n  const [allowCreate, setAllowCreate] = useState(false);\n\n  const nextFormStep = (values = \"\") => {\n    setFormStep((prevStep) => prevStep + 1);\n    setFormValues((prevValues) => ({ ...prevValues, values }));\n  };\n\n  const prevFormStep = () => {\n    setFormStep((currentStep) => Math.max(currentStep - 1, 0));\n  };\n\n  const formStepToLast = () => {\n    setFormStep(2);\n  };\n\n  useEffect(() => {\n    // Update URL when formStep changes\n    const updatedSearchParams = new URLSearchParams(\n      searchParams?.toString() || \"\"\n    );\n    const existingStep = updatedSearchParams?.get(\"step\");\n    updatedSearchParams.set(\"step\", formStep.toString());\n    const newUrl = `${pathname}?${updatedSearchParams.toString()}`;\n\n    if (!existingStep) {\n      router.replace(newUrl as Route);\n    } else if (newUrl !== pathname) {\n      router.push(newUrl as Route);\n    }\n  }, [formStep, router, pathname]);\n\n  // Update formStep when URL changes\n  useEffect(() => {\n    if (stepFromUrl !== formStep) {\n      setFormStep(stepFromUrl);\n    }\n  }, [stepFromUrl]);\n\n  const contextValue: EmbeddingFormContextType = {\n    formStep,\n    formValues,\n    setFormValues: (values) =>\n      setFormValues((prevValues) => ({ ...prevValues, ...values })),\n    nextFormStep,\n    prevFormStep,\n    formStepToLast,\n    setFormStep,\n    allowAdvanced,\n    setAllowAdvanced,\n    allowCreate,\n    setAllowCreate: setAllowCreate,\n  };\n\n  return (\n    <EmbeddingFormContext.Provider value={contextValue}>\n      {children}\n    </EmbeddingFormContext.Provider>\n  );\n};\n\nexport const useEmbeddingFormContext = () => {\n  const context = useContext(EmbeddingFormContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useEmbeddingFormContext must be used within a FormProvider\"\n    );\n  }\n  return context;\n};\n"
  },
  {
    "path": "web/src/components/context/FormContext.tsx",
    "content": "import React, {\n  createContext,\n  useState,\n  useContext,\n  ReactNode,\n  useEffect,\n} from \"react\";\nimport { usePathname, useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { ValidSources } from \"@/lib/types\";\n\ninterface FormContextType {\n  formStep: number;\n  formValues: Record<string, any>;\n  setFormValues: (values: Record<string, any>) => void;\n  nextFormStep: (contract?: string) => void;\n  prevFormStep: () => void;\n  formStepToLast: () => void;\n  connector: ValidSources;\n  setFormStep: React.Dispatch<React.SetStateAction<number>>;\n  allowAdvanced: boolean;\n  setAllowAdvanced: React.Dispatch<React.SetStateAction<boolean>>;\n  allowCreate: boolean;\n  setAllowCreate: React.Dispatch<React.SetStateAction<boolean>>;\n}\n\nconst FormContext = createContext<FormContextType | undefined>(undefined);\n\n// TODO: deprecate this\nexport const FormProvider: React.FC<{\n  children: ReactNode;\n  connector: ValidSources;\n}> = ({ children, connector }) => {\n  const router = useRouter();\n  const searchParams = useSearchParams();\n  const pathname = usePathname();\n\n  // Initialize formStep based on the URL parameter\n  const formStepFromUrlParams = parseInt(searchParams?.get(\"step\") || \"0\", 10);\n  const [formStep, setFormStep] = useState(formStepFromUrlParams);\n  const [formValues, setFormValues] = useState<Record<string, any>>({});\n\n  const [allowAdvanced, setAllowAdvanced] = useState(false);\n  const [allowCreate, setAllowCreate] = useState(false);\n\n  const nextFormStep = (values = \"\") => {\n    setFormStep((prevStep) => prevStep + 1);\n    setFormValues((prevValues) => ({ ...prevValues, values }));\n  };\n\n  const prevFormStep = () => {\n    setFormStep((currentStep) => Math.max(currentStep - 1, 0));\n  };\n\n  const formStepToLast = () => {\n    setFormStep(2);\n  };\n\n  useEffect(() => {\n    // Update URL when formStep changes\n    const updatedSearchParams = new URLSearchParams(\n      searchParams?.toString() || \"\"\n    );\n    updatedSearchParams.set(\"step\", formStep.toString());\n    const newUrl = `${pathname}?${updatedSearchParams.toString()}`;\n\n    if (!formStepFromUrlParams) {\n      router.replace(newUrl as Route);\n    } else if (newUrl !== pathname) {\n      router.push(newUrl as Route);\n    }\n  }, [formStep, router, pathname, formStepFromUrlParams]);\n\n  useEffect(() => {\n    if (formStepFromUrlParams !== formStep) {\n      setFormStep(formStepFromUrlParams);\n    }\n  }, [formStepFromUrlParams]);\n\n  const contextValue: FormContextType = {\n    formStep,\n    formValues,\n    setFormValues: (values) =>\n      setFormValues((prevValues) => ({ ...prevValues, ...values })),\n    nextFormStep,\n    prevFormStep,\n    formStepToLast,\n    setFormStep,\n    connector,\n    allowAdvanced,\n    setAllowAdvanced,\n    allowCreate,\n    setAllowCreate,\n  };\n\n  return (\n    <FormContext.Provider value={contextValue}>{children}</FormContext.Provider>\n  );\n};\n\nexport const useFormContext = () => {\n  const context = useContext(FormContext);\n  if (context === undefined) {\n    throw new Error(\"useFormContext must be used within a FormProvider\");\n  }\n  return context;\n};\n"
  },
  {
    "path": "web/src/components/context/ModalContext.tsx",
    "content": "\"use client\";\n\nimport React, { createContext, useContext, useEffect, useState } from \"react\";\nimport NewTeamModal from \"@/components/modals/NewTeamModal\";\nimport NewTenantModal from \"@/sections/modals/NewTenantModal\";\nimport { NewTenantInfo } from \"@/lib/types\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { useUser } from \"@/providers/UserProvider\";\n\ntype ModalContextType = {\n  showNewTeamModal: boolean;\n  setShowNewTeamModal: (show: boolean) => void;\n  newTenantInfo: NewTenantInfo | null;\n  setNewTenantInfo: (info: NewTenantInfo | null) => void;\n  invitationInfo: NewTenantInfo | null;\n  setInvitationInfo: (info: NewTenantInfo | null) => void;\n};\n\nconst ModalContext = createContext<ModalContextType | undefined>(undefined);\n\nexport const useModalContext = () => {\n  const context = useContext(ModalContext);\n  if (context === undefined) {\n    throw new Error(\"useModalContext must be used within a ModalProvider\");\n  }\n  return context;\n};\n\nexport const ModalProvider: React.FC<{\n  children: React.ReactNode;\n}> = ({ children }) => {\n  const { user } = useUser();\n  const [showNewTeamModal, setShowNewTeamModal] = useState(false);\n  const [newTenantInfo, setNewTenantInfo] = useState<NewTenantInfo | null>(\n    null\n  );\n  const [invitationInfo, setInvitationInfo] = useState<NewTenantInfo | null>(\n    null\n  );\n\n  // Sync modal states with user info — clear when backend no longer has the data\n  useEffect(() => {\n    if (user?.tenant_info?.new_tenant) {\n      setNewTenantInfo(user.tenant_info.new_tenant);\n    } else {\n      setNewTenantInfo(null);\n    }\n    if (user?.tenant_info?.invitation) {\n      setInvitationInfo(user.tenant_info.invitation);\n    } else {\n      setInvitationInfo(null);\n    }\n  }, [user?.tenant_info]);\n\n  // Render all application-wide modals\n  const renderModals = () => {\n    if (!user || !NEXT_PUBLIC_CLOUD_ENABLED) return <></>;\n\n    return (\n      <>\n        {/* Modal for users to request to join an existing team */}\n        <NewTeamModal />\n\n        {/* Modal for users who've been accepted to a new team */}\n        {newTenantInfo && (\n          <NewTenantModal\n            tenantInfo={newTenantInfo}\n            // Close function to clear the modal state\n            onClose={() => setNewTenantInfo(null)}\n          />\n        )}\n\n        {/* Modal for users who've been invited to join a team */}\n        {invitationInfo && (\n          <NewTenantModal\n            isInvite={true}\n            tenantInfo={invitationInfo}\n            // Close function to clear the modal state\n            onClose={() => setInvitationInfo(null)}\n          />\n        )}\n      </>\n    );\n  };\n\n  return (\n    <ModalContext.Provider\n      value={{\n        showNewTeamModal,\n        setShowNewTeamModal,\n        newTenantInfo,\n        setNewTenantInfo,\n        invitationInfo,\n        setInvitationInfo,\n      }}\n    >\n      {children}\n      {renderModals()}\n    </ModalContext.Provider>\n  );\n};\n"
  },
  {
    "path": "web/src/components/context/NRFPreferencesContext.tsx",
    "content": "\"use client\";\n\nimport React, { createContext, useContext, useState } from \"react\";\nimport { LocalStorageKeys } from \"@/lib/extension/constants\";\n\ninterface NRFPreferencesContextValue {\n  useOnyxAsNewTab: boolean;\n  setUseOnyxAsNewTab: (v: boolean) => void;\n}\n\nconst NRFPreferencesContext = createContext<\n  NRFPreferencesContextValue | undefined\n>(undefined);\n\nfunction useLocalStorageState<T>(\n  key: string,\n  defaultValue: T\n): [T, (value: T) => void] {\n  const [state, setState] = useState<T>(() => {\n    if (typeof window !== \"undefined\") {\n      const storedValue = localStorage.getItem(key);\n      return storedValue ? JSON.parse(storedValue) : defaultValue;\n    }\n    return defaultValue;\n  });\n\n  const setValue = (value: T) => {\n    setState(value);\n    if (typeof window !== \"undefined\") {\n      localStorage.setItem(key, JSON.stringify(value));\n    }\n  };\n\n  return [state, setValue];\n}\n\nexport function NRFPreferencesProvider({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  const [useOnyxAsNewTab, setUseOnyxAsNewTab] = useLocalStorageState<boolean>(\n    LocalStorageKeys.USE_ONYX_AS_NEW_TAB,\n    true\n  );\n\n  return (\n    <NRFPreferencesContext.Provider\n      value={{\n        useOnyxAsNewTab,\n        setUseOnyxAsNewTab,\n      }}\n    >\n      {children}\n    </NRFPreferencesContext.Provider>\n  );\n}\n\nexport function useNRFPreferences() {\n  const context = useContext(NRFPreferencesContext);\n  if (!context) {\n    throw new Error(\n      \"useNRFPreferences must be used within an NRFPreferencesProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/components/credentials/CredentialFields.tsx",
    "content": "import { JSX } from \"react\";\n\nexport default function CredentialSubText({\n  children,\n}: {\n  children: JSX.Element | string;\n}) {\n  return (\n    <p className=\"text-sm mb-2 whitespace-break-spaces text-text-500\">\n      {children}\n    </p>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/CredentialSection.tsx",
    "content": "\"use client\";\n\nimport { AccessType, ValidSources } from \"@/lib/types\";\nimport useSWR, { mutate } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { FaKey } from \"react-icons/fa\";\nimport { useState } from \"react\";\nimport { FiEdit2 } from \"react-icons/fi\";\nimport {\n  deleteCredential,\n  swapCredential,\n  updateCredential,\n  updateCredentialWithPrivateKey,\n} from \"@/lib/credential\";\nimport { toast } from \"@/hooks/useToast\";\nimport CreateCredential from \"./actions/CreateCredential\";\nimport { CCPairFullInfo } from \"@/app/admin/connector/[ccPairId]/types\";\nimport ModifyCredential from \"./actions/ModifyCredential\";\nimport { Text } from \"@opal/components\";\nimport {\n  buildCCPairInfoUrl,\n  buildSimilarCredentialInfoURL,\n} from \"@/app/admin/connector/[ccPairId]/lib\";\nimport Modal from \"@/refresh-components/Modal\";\nimport EditCredential from \"./actions/EditCredential\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\nimport {\n  ConfluenceCredentialJson,\n  Credential,\n} from \"@/lib/connectors/credentials\";\nimport {\n  getConnectorOauthRedirectUrl,\n  useOAuthDetails,\n} from \"@/lib/connectors/oauth\";\nimport { Spinner } from \"@/components/Spinner\";\nimport { CreateStdOAuthCredential } from \"@/components/credentials/actions/CreateStdOAuthCredential\";\nimport { Card } from \"../ui/card\";\nimport { isTypedFileField, TypedFile } from \"@/lib/connectors/fileTypes\";\nimport { SvgEdit, SvgKey } from \"@opal/icons\";\n\nexport interface CredentialSectionProps {\n  ccPair: CCPairFullInfo;\n  sourceType: ValidSources;\n  refresh: () => void;\n}\n\nexport default function CredentialSection({\n  ccPair,\n  sourceType,\n  refresh,\n}: CredentialSectionProps) {\n  const { data: credentials } = useSWR<Credential<ConfluenceCredentialJson>[]>(\n    buildSimilarCredentialInfoURL(sourceType),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 } // 5 seconds\n  );\n  const { data: editableCredentials } = useSWR<Credential<any>[]>(\n    buildSimilarCredentialInfoURL(sourceType, true),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 }\n  );\n  const { data: oauthDetails, isLoading: oauthDetailsLoading } =\n    useOAuthDetails(sourceType);\n\n  const makeShowCreateCredential = async () => {\n    if (oauthDetailsLoading || !oauthDetails) {\n      return;\n    }\n\n    if (oauthDetails.oauth_enabled) {\n      if (oauthDetails.additional_kwargs.length > 0) {\n        setShowCreateCredential(true);\n      } else {\n        const redirectUrl = await getConnectorOauthRedirectUrl(sourceType, {});\n        if (redirectUrl) {\n          window.location.href = redirectUrl;\n        }\n      }\n    } else {\n      setShowModifyCredential(false);\n      setShowCreateCredential(true);\n    }\n  };\n\n  const onSwap = async (\n    selectedCredential: Credential<any>,\n    connectorId: number,\n    accessType: AccessType\n  ) => {\n    const response = await swapCredential(\n      selectedCredential.id,\n      connectorId,\n      accessType\n    );\n    if (response.ok) {\n      mutate(buildSimilarCredentialInfoURL(sourceType));\n      refresh();\n\n      toast.success(\"Swapped credential successfully!\");\n    } else {\n      const errorData = await response.json();\n      toast.error(\n        `Issue swapping credential: ${\n          errorData.detail || errorData.message || \"Unknown error\"\n        }`\n      );\n    }\n  };\n\n  const onUpdateCredential = async (\n    selectedCredential: Credential<any | null>,\n    details: any,\n    onSucces: () => void\n  ) => {\n    let privateKey: TypedFile | null = null;\n    Object.entries(details).forEach(([key, value]) => {\n      if (isTypedFileField(key)) {\n        privateKey = value as TypedFile;\n        delete details[key];\n      }\n    });\n    let response;\n    if (privateKey) {\n      response = await updateCredentialWithPrivateKey(\n        selectedCredential.id,\n        details,\n        privateKey\n      );\n    } else {\n      response = await updateCredential(selectedCredential.id, details);\n    }\n    if (response.ok) {\n      toast.success(\"Updated credential\");\n      onSucces();\n    } else {\n      toast.error(\"Issue updating credential\");\n    }\n  };\n\n  const onEditCredential = (credential: Credential<any>) => {\n    closeModifyCredential();\n    setEditingCredential(credential);\n  };\n\n  const onDeleteCredential = async (credential: Credential<any | null>) => {\n    await deleteCredential(credential.id, true);\n    mutate(buildCCPairInfoUrl(ccPair.id));\n  };\n  const defaultedCredential = ccPair.credential;\n\n  const [showModifyCredential, setShowModifyCredential] = useState(false);\n  const [showCreateCredential, setShowCreateCredential] = useState(false);\n  const [editingCredential, setEditingCredential] =\n    useState<Credential<any> | null>(null);\n\n  const closeModifyCredential = () => {\n    setShowModifyCredential(false);\n  };\n\n  const closeCreateCredential = () => {\n    setShowCreateCredential(false);\n  };\n\n  const closeEditingCredential = () => {\n    setEditingCredential(null);\n    setShowModifyCredential(true);\n  };\n  if (!credentials || !editableCredentials) {\n    return <></>;\n  }\n\n  return (\n    <div\n      className=\"flex\n      flex-col\n      gap-y-4\n      rounded-lg\n      bg-background\"\n    >\n      <Card className=\"p-6\">\n        <div className=\"flex items-center\">\n          <div className=\"flex-shrink-0 mr-3\">\n            <FaKey className=\"h-4 w-4 text-muted-foreground\" />\n          </div>\n          <div className=\"flex-grow flex flex-col justify-center\">\n            <div className=\"flex items-center justify-between\">\n              <div>\n                <Text as=\"p\">\n                  {ccPair.credential.name ||\n                    `Credential #${ccPair.credential.id}`}\n                </Text>\n                <div className=\"text-xs text-muted-foreground/70\">\n                  Created{\" \"}\n                  <i>\n                    {new Date(\n                      ccPair.credential.time_created\n                    ).toLocaleDateString(undefined, {\n                      year: \"numeric\",\n                      month: \"short\",\n                      day: \"numeric\",\n                    })}\n                  </i>\n                  {ccPair.credential.user_email && (\n                    <>\n                      {\" \"}\n                      by <i>{ccPair.credential.user_email}</i>\n                    </>\n                  )}\n                </div>\n              </div>\n              <button\n                onClick={() => setShowModifyCredential(true)}\n                className=\"inline-flex\n                  items-center\n                  justify-center\n                  p-2\n                  rounded-md\n                  text-muted-foreground\n                  hover:bg-accent\n                  hover:text-accent-foreground\n                  transition-colors\"\n              >\n                <FiEdit2 className=\"h-4 w-4\" />\n                <span className=\"sr-only\">Update Credentials</span>\n              </button>\n            </div>\n          </div>\n        </div>\n      </Card>\n\n      {showModifyCredential && (\n        <Modal open onOpenChange={closeModifyCredential}>\n          <Modal.Content>\n            <Modal.Header\n              icon={SvgEdit}\n              title=\"Update Credentials\"\n              onClose={closeModifyCredential}\n            />\n            <Modal.Body>\n              <ModifyCredential\n                close={closeModifyCredential}\n                accessType={ccPair.access_type}\n                attachedConnector={ccPair.connector}\n                defaultedCredential={defaultedCredential}\n                credentials={credentials}\n                editableCredentials={editableCredentials}\n                onDeleteCredential={onDeleteCredential}\n                onEditCredential={(credential: Credential<any>) =>\n                  onEditCredential(credential)\n                }\n                onSwap={onSwap}\n                onCreateNew={() => makeShowCreateCredential()}\n              />\n            </Modal.Body>\n          </Modal.Content>\n        </Modal>\n      )}\n\n      {editingCredential && (\n        <Modal open onOpenChange={closeEditingCredential}>\n          <Modal.Content>\n            <Modal.Header\n              icon={SvgEdit}\n              title=\"Edit Credential\"\n              onClose={closeEditingCredential}\n            />\n            <Modal.Body>\n              <EditCredential\n                onUpdate={onUpdateCredential}\n                credential={editingCredential}\n                onClose={closeEditingCredential}\n              />\n            </Modal.Body>\n          </Modal.Content>\n        </Modal>\n      )}\n\n      {showCreateCredential && (\n        <Modal open onOpenChange={closeCreateCredential}>\n          <Modal.Content>\n            <Modal.Header\n              icon={SvgKey}\n              title={`Create ${getSourceDisplayName(sourceType)} Credential`}\n              onClose={closeCreateCredential}\n            />\n            <Modal.Body>\n              {oauthDetailsLoading ? (\n                <Spinner />\n              ) : (\n                <>\n                  {oauthDetails && oauthDetails.oauth_enabled ? (\n                    <CreateStdOAuthCredential\n                      sourceType={sourceType}\n                      additionalFields={oauthDetails.additional_kwargs}\n                    />\n                  ) : (\n                    <CreateCredential\n                      sourceType={sourceType}\n                      accessType={ccPair.access_type}\n                      swapConnector={ccPair.connector}\n                      onSwap={onSwap}\n                      onClose={closeCreateCredential}\n                    />\n                  )}\n                </>\n              )}\n            </Modal.Body>\n          </Modal.Content>\n        </Modal>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/actions/CreateCredential.tsx",
    "content": "import { useState } from \"react\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { ValidSources, AccessType } from \"@/lib/types\";\nimport { FaAccusoft } from \"react-icons/fa\";\nimport { submitCredential } from \"@/components/admin/connectors/CredentialForm\";\nimport { TextFormField } from \"@/components/Field\";\nimport { Form, Formik, FormikHelpers } from \"formik\";\nimport { toast } from \"@/hooks/useToast\";\nimport GDriveMain from \"@/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage\";\nimport { Connector } from \"@/lib/connectors/connectors\";\nimport { Credential, credentialTemplates } from \"@/lib/connectors/credentials\";\nimport { GmailMain } from \"@/app/admin/connectors/[connector]/pages/gmail/GmailPage\";\nimport { ActionType, dictionaryType } from \"../types\";\nimport { createValidationSchema } from \"../lib\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { AdvancedOptionsToggle } from \"@/components/AdvancedOptionsToggle\";\nimport {\n  IsPublicGroupSelectorFormType,\n  IsPublicGroupSelector,\n} from \"@/components/IsPublicGroupSelector\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { CredentialFieldsRenderer } from \"./CredentialFieldsRenderer\";\nimport { TypedFile } from \"@/lib/connectors/fileTypes\";\nimport ConnectorDocsLink from \"@/components/admin/connectors/ConnectorDocsLink\";\nimport { SvgPlusCircle } from \"@opal/icons\";\nconst CreateButton = ({\n  onClick,\n  isSubmitting,\n  isAdmin,\n  groups,\n}: {\n  onClick: () => void;\n  isSubmitting: boolean;\n  isAdmin: boolean;\n  groups: number[];\n}) => (\n  <OpalButton\n    disabled={isSubmitting || (!isAdmin && groups.length === 0)}\n    onClick={onClick}\n    icon={SvgPlusCircle}\n  >\n    Create\n  </OpalButton>\n);\n\ntype formType = IsPublicGroupSelectorFormType & {\n  name: string;\n  [key: string]: any; // For additional credential fields\n};\n\nexport default function CreateCredential({\n  hideSource,\n  sourceType,\n  accessType,\n  close,\n  onClose = () => null,\n  onSwitch,\n  onSwap = async () => null,\n  swapConnector,\n  refresh = () => null,\n}: {\n  // Source information\n  hideSource?: boolean; // hides docs link\n  sourceType: ValidSources;\n  accessType: AccessType;\n\n  // Optional toggle- close section after selection?\n  close?: boolean;\n\n  // Special handlers\n  onClose?: () => void;\n  // Switch currently selected credential\n  onSwitch?: (selectedCredential: Credential<any>) => Promise<void>;\n  // Switch currently selected credential + link with connector\n  onSwap?: (\n    selectedCredential: Credential<any>,\n    connectorId: number,\n    accessType: AccessType\n  ) => void;\n\n  // For swapping credentials on selection\n  swapConnector?: Connector<any>;\n\n  // Mutating parent state\n  refresh?: () => void;\n}) {\n  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);\n  const [authMethod, setAuthMethod] = useState<string>();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const { isAdmin } = useUser();\n\n  const handleSubmit = async (\n    values: formType,\n    formikHelpers: FormikHelpers<formType>,\n    action: ActionType\n  ) => {\n    const { setSubmitting, validateForm } = formikHelpers;\n\n    const errors = await validateForm(values);\n    if (Object.keys(errors).length > 0) {\n      formikHelpers.setErrors(errors);\n      return;\n    }\n\n    setSubmitting(true);\n    formikHelpers.setSubmitting(true);\n\n    const { name, is_public, groups, ...credentialValues } = values;\n\n    let privateKey: TypedFile | null = null;\n    const filteredCredentialValues = Object.fromEntries(\n      Object.entries(credentialValues).filter(([key, value]) => {\n        if (value instanceof TypedFile) {\n          privateKey = value;\n          return false;\n        }\n        return value !== null && value !== \"\";\n      })\n    );\n\n    try {\n      const response = await submitCredential({\n        credential_json: filteredCredentialValues,\n        admin_public: true,\n        curator_public: is_public,\n        groups: groups,\n        name: name,\n        source: sourceType,\n        private_key: privateKey || undefined,\n      });\n\n      const { message, isSuccess, credential } = response;\n\n      if (!credential) {\n        throw new Error(\"No credential returned\");\n      }\n\n      if (isSuccess && swapConnector) {\n        if (action === \"createAndSwap\") {\n          onSwap(credential, swapConnector.id, accessType);\n        } else {\n          toast.success(\"Created new credential!\");\n        }\n        onClose();\n      } else {\n        if (isSuccess) {\n          toast.success(message);\n        } else {\n          toast.error(message);\n        }\n      }\n\n      if (close) {\n        onClose();\n      }\n      await refresh();\n\n      if (onSwitch) {\n        onSwitch(response?.credential!);\n      }\n    } catch (error) {\n      console.error(\"Error submitting credential:\", error);\n      toast.error(\"Error submitting credential\");\n    } finally {\n      formikHelpers.setSubmitting(false);\n    }\n  };\n\n  if (sourceType == \"gmail\") {\n    return <GmailMain />;\n  }\n\n  if (sourceType == \"google_drive\") {\n    return <GDriveMain />;\n  }\n\n  const credentialTemplate: dictionaryType = credentialTemplates[sourceType];\n  const validationSchema = createValidationSchema(credentialTemplate);\n\n  // Set initial auth method for templates with multiple auth methods\n  const templateWithAuth = credentialTemplate as any;\n  const initialAuthMethod =\n    templateWithAuth?.authMethods?.[0]?.value || undefined;\n\n  return (\n    <Formik\n      initialValues={\n        {\n          name: \"\",\n          is_public: isAdmin || !isPaidEnterpriseFeaturesEnabled,\n          groups: [],\n          ...(initialAuthMethod && {\n            authentication_method: initialAuthMethod,\n          }),\n        } as formType\n      }\n      validationSchema={validationSchema}\n      onSubmit={() => {}} // This will be overridden by our custom submit handlers\n    >\n      {(formikProps) => {\n        // Update authentication_method in formik when authMethod changes\n        if (\n          authMethod &&\n          formikProps.values.authentication_method !== authMethod\n        ) {\n          formikProps.setFieldValue(\"authentication_method\", authMethod);\n        }\n\n        return (\n          <Form className=\"w-full flex items-stretch\">\n            {!hideSource && <ConnectorDocsLink sourceType={sourceType} />}\n            <CardSection className=\"w-full items-start dark:bg-neutral-900 mt-4 flex flex-col gap-y-6\">\n              <TextFormField\n                name=\"name\"\n                placeholder=\"(Optional) credential name..\"\n                label=\"Name:\"\n              />\n\n              <CredentialFieldsRenderer\n                credentialTemplate={credentialTemplate}\n                authMethod={authMethod || initialAuthMethod}\n                setAuthMethod={setAuthMethod}\n              />\n\n              {!swapConnector && (\n                <div className=\"mt-4 flex w-full flex-col sm:flex-row justify-between items-end\">\n                  <div className=\"w-full sm:w-3/4 mb-4 sm:mb-0\">\n                    {isPaidEnterpriseFeaturesEnabled && (\n                      <div className=\"flex flex-col items-start\">\n                        {isAdmin && (\n                          <AdvancedOptionsToggle\n                            showAdvancedOptions={showAdvancedOptions}\n                            setShowAdvancedOptions={setShowAdvancedOptions}\n                          />\n                        )}\n                        {(showAdvancedOptions || !isAdmin) && (\n                          <IsPublicGroupSelector\n                            formikProps={formikProps}\n                            objectName=\"credential\"\n                            publicToWhom=\"Curators\"\n                          />\n                        )}\n                      </div>\n                    )}\n                  </div>\n                  <CreateButton\n                    onClick={() =>\n                      handleSubmit(formikProps.values, formikProps, \"create\")\n                    }\n                    isSubmitting={formikProps.isSubmitting}\n                    isAdmin={isAdmin}\n                    groups={formikProps.values.groups}\n                  />\n                </div>\n              )}\n            </CardSection>\n            {swapConnector && (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <Button\n                className=\"bg-rose-500 hover:bg-rose-400\"\n                onClick={() =>\n                  handleSubmit(formikProps.values, formikProps, \"createAndSwap\")\n                }\n                disabled={formikProps.isSubmitting}\n                leftIcon={() => (\n                  <FaAccusoft className=\"fill-text-inverted-05\" />\n                )}\n              >\n                Create\n              </Button>\n            )}\n          </Form>\n        );\n      }}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/actions/CreateStdOAuthCredential.tsx",
    "content": "import * as Yup from \"yup\";\n\nimport { Button } from \"@opal/components\";\nimport { ValidSources } from \"@/lib/types\";\nimport { TextFormField } from \"@/components/Field\";\nimport { Form, Formik, FormikHelpers } from \"formik\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { getConnectorOauthRedirectUrl } from \"@/lib/connectors/oauth\";\nimport { OAuthAdditionalKwargDescription } from \"@/lib/connectors/credentials\";\n\ntype formType = {\n  [key: string]: any; // For additional credential fields\n};\n\nexport function CreateStdOAuthCredential({\n  sourceType,\n  additionalFields,\n}: {\n  // Source information\n  sourceType: ValidSources;\n\n  additionalFields: OAuthAdditionalKwargDescription[];\n}) {\n  const handleSubmit = async (\n    values: formType,\n    formikHelpers: FormikHelpers<formType>\n  ) => {\n    const { setSubmitting, validateForm } = formikHelpers;\n\n    const errors = await validateForm(values);\n    if (Object.keys(errors).length > 0) {\n      formikHelpers.setErrors(errors);\n      return;\n    }\n\n    setSubmitting(true);\n    formikHelpers.setSubmitting(true);\n\n    const redirectUrl = await getConnectorOauthRedirectUrl(sourceType, values);\n\n    if (!redirectUrl) {\n      throw new Error(\"No redirect URL found for OAuth connector\");\n    }\n\n    window.location.href = redirectUrl;\n  };\n\n  return (\n    <Formik\n      initialValues={\n        {\n          ...Object.fromEntries(additionalFields.map((field) => [field, \"\"])),\n        } as formType\n      }\n      validationSchema={Yup.object().shape({\n        ...Object.fromEntries(\n          additionalFields.map((field) => [field.name, Yup.string().required()])\n        ),\n      })}\n      onSubmit={(values, formikHelpers) => {\n        handleSubmit(values, formikHelpers);\n      }}\n    >\n      {() => (\n        <Form className=\"w-full flex items-stretch\">\n          <CardSection className=\"w-full !border-0 mt-4 flex flex-col gap-y-6\">\n            {additionalFields.map((field) => (\n              <TextFormField\n                key={field.name}\n                name={field.name}\n                label={field.display_name}\n                subtext={field.description}\n                type=\"text\"\n              />\n            ))}\n\n            <div className=\"flex w-full\">\n              <Button type=\"submit\">Create</Button>\n            </div>\n          </CardSection>\n        </Form>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/actions/CredentialFieldsRenderer.tsx",
    "content": "\"use client\";\n\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { useFormikContext } from \"formik\";\nimport {\n  BooleanFormField,\n  TextFormField,\n  TypedFileUploadFormField,\n} from \"@/components/Field\";\nimport {\n  getDisplayNameForCredentialKey,\n  CredentialTemplateWithAuth,\n} from \"@/lib/connectors/credentials\";\nimport { dictionaryType } from \"../types\";\nimport { isTypedFileField } from \"@/lib/connectors/fileTypes\";\n\ninterface CredentialFieldsRendererProps {\n  credentialTemplate: dictionaryType;\n  authMethod?: string;\n  setAuthMethod?: (method: string) => void;\n}\n\nexport function CredentialFieldsRenderer({\n  credentialTemplate,\n  authMethod,\n  setAuthMethod,\n}: CredentialFieldsRendererProps) {\n  const templateWithAuth =\n    credentialTemplate as CredentialTemplateWithAuth<any>;\n  const { values, setValues } = useFormikContext<any>();\n\n  // remove other auth‐method fields when switching\n  const handleAuthMethodChange = (newMethod: string) => {\n    // start from current form values\n    const cleaned = { ...values, authentication_method: newMethod };\n    // delete every field not in the selected auth method\n    templateWithAuth.authMethods?.forEach((m) => {\n      if (m.value !== newMethod) {\n        Object.keys(m.fields).forEach((fieldKey) => {\n          delete cleaned[fieldKey];\n        });\n      }\n    });\n    setValues(cleaned);\n    setAuthMethod?.(newMethod);\n  };\n\n  // Check if this credential template has multiple auth methods\n  const hasMultipleAuthMethods =\n    templateWithAuth.authMethods && templateWithAuth.authMethods.length > 1;\n\n  if (hasMultipleAuthMethods && templateWithAuth.authMethods) {\n    return (\n      <div className=\"w-full space-y-4\">\n        {/* Render authentication_method as a hidden field */}\n        <input\n          type=\"hidden\"\n          name=\"authentication_method\"\n          value={authMethod || (templateWithAuth.authMethods?.[0]?.value ?? \"\")}\n        />\n\n        <Tabs\n          value={authMethod || templateWithAuth.authMethods?.[0]?.value || \"\"}\n          onValueChange={handleAuthMethodChange}\n        >\n          <Tabs.List>\n            {templateWithAuth.authMethods.map((method) => (\n              <Tabs.Trigger key={method.value} value={method.value}>\n                {method.label}\n              </Tabs.Trigger>\n            ))}\n          </Tabs.List>\n\n          {templateWithAuth.authMethods.map((method) => (\n            <Tabs.Content\n              key={method.value}\n              value={method.value}\n              alignItems=\"stretch\"\n            >\n              {/* Show description if method has no fields but has a description */}\n              {Object.keys(method.fields).length === 0 &&\n                method.description && (\n                  <div className=\"p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-700 rounded-md\">\n                    <p className=\"text-sm text-blue-800 dark:text-blue-200\">\n                      {method.description}\n                    </p>\n                  </div>\n                )}\n\n              {Object.entries(method.fields).map(([key, val]) => {\n                if (isTypedFileField(key)) {\n                  return (\n                    <TypedFileUploadFormField\n                      key={key}\n                      name={key}\n                      label={getDisplayNameForCredentialKey(key)}\n                    />\n                  );\n                }\n\n                if (typeof val === \"boolean\") {\n                  return (\n                    <BooleanFormField\n                      key={key}\n                      name={key}\n                      label={getDisplayNameForCredentialKey(key)}\n                    />\n                  );\n                }\n                return (\n                  <TextFormField\n                    key={key}\n                    name={key}\n                    placeholder={val}\n                    label={getDisplayNameForCredentialKey(key)}\n                    type={\n                      key.toLowerCase().includes(\"token\") ||\n                      key.toLowerCase().includes(\"password\") ||\n                      key.toLowerCase().includes(\"secret\")\n                        ? \"password\"\n                        : \"text\"\n                    }\n                  />\n                );\n              })}\n            </Tabs.Content>\n          ))}\n        </Tabs>\n      </div>\n    );\n  }\n\n  // Render single auth method fields (existing behavior)\n  return (\n    <>\n      {Object.entries(credentialTemplate).map(([key, val]) => {\n        // Skip auth method metadata fields\n        if (key === \"authentication_method\" || key === \"authMethods\") {\n          return null;\n        }\n        if (isTypedFileField(key)) {\n          return (\n            <TypedFileUploadFormField\n              key={key}\n              name={key}\n              label={getDisplayNameForCredentialKey(key)}\n            />\n          );\n        }\n\n        if (typeof val === \"boolean\") {\n          return (\n            <BooleanFormField\n              key={key}\n              name={key}\n              label={getDisplayNameForCredentialKey(key)}\n            />\n          );\n        }\n        return (\n          <TextFormField\n            key={key}\n            name={key}\n            placeholder={val as string}\n            label={getDisplayNameForCredentialKey(key)}\n            type={\n              key.toLowerCase().includes(\"token\") ||\n              key.toLowerCase().includes(\"password\") ||\n              key.toLowerCase().includes(\"secret\")\n                ? \"password\"\n                : \"text\"\n            }\n          />\n        );\n      })}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/actions/EditCredential.tsx",
    "content": "import { Button } from \"@opal/components\";\nimport { Text } from \"@opal/components\";\n\nimport { FaNewspaper, FaTrash } from \"react-icons/fa\";\nimport { TextFormField, TypedFileUploadFormField } from \"@/components/Field\";\nimport { Form, Formik, FormikHelpers } from \"formik\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  Credential,\n  getDisplayNameForCredentialKey,\n} from \"@/lib/connectors/credentials\";\nimport { createEditingValidationSchema, createInitialValues } from \"../lib\";\nimport { dictionaryType, formType } from \"../types\";\nimport { isTypedFileField } from \"@/lib/connectors/fileTypes\";\nimport { SvgTrash } from \"@opal/icons\";\nexport interface EditCredentialProps {\n  credential: Credential<dictionaryType>;\n  onClose: () => void;\n  onUpdate: (\n    selectedCredentialId: Credential<any>,\n    details: any,\n    onSuccess: () => void\n  ) => Promise<void>;\n}\n\nexport default function EditCredential({\n  credential,\n  onClose,\n  onUpdate,\n}: EditCredentialProps) {\n  const validationSchema = createEditingValidationSchema(\n    credential.credential_json\n  );\n  const initialValues = createInitialValues(credential);\n\n  const handleSubmit = async (\n    values: formType,\n    formikHelpers: FormikHelpers<formType>\n  ) => {\n    formikHelpers.setSubmitting(true);\n    try {\n      await onUpdate(credential, values, onClose);\n    } catch (error) {\n      console.error(\"Error updating credential:\", error);\n      toast.error(\"Error updating credential\");\n    } finally {\n      formikHelpers.setSubmitting(false);\n    }\n  };\n\n  return (\n    <div className=\"flex flex-col gap-y-6\">\n      <Text as=\"p\">\n        Ensure that you update to a credential with the proper permissions!\n      </Text>\n\n      <Formik\n        initialValues={initialValues}\n        validationSchema={validationSchema}\n        onSubmit={handleSubmit}\n      >\n        {({ isSubmitting, resetForm }) => (\n          <Form>\n            <TextFormField\n              includeRevert\n              name=\"name\"\n              placeholder={credential.name || \"\"}\n              label=\"Name (optional):\"\n            />\n\n            {Object.entries(credential.credential_json).map(([key, value]) =>\n              isTypedFileField(key) ? (\n                <TypedFileUploadFormField\n                  key={key}\n                  name={key}\n                  label={getDisplayNameForCredentialKey(key)}\n                />\n              ) : (\n                <TextFormField\n                  includeRevert\n                  key={key}\n                  name={key}\n                  placeholder={value as string}\n                  label={getDisplayNameForCredentialKey(key)}\n                  type={\n                    key.toLowerCase().includes(\"token\") ||\n                    key.toLowerCase().includes(\"password\")\n                      ? \"password\"\n                      : \"text\"\n                  }\n                  disabled={key === \"authentication_method\"}\n                />\n              )\n            )}\n            <div className=\"flex justify-between w-full\">\n              <Button onClick={() => resetForm()} icon={SvgTrash}>\n                Reset Changes\n              </Button>\n              <Button disabled={isSubmitting} type=\"submit\" icon={FaNewspaper}>\n                Update\n              </Button>\n            </div>\n          </Form>\n        )}\n      </Formik>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/actions/ModifyCredential.tsx",
    "content": "import React, { useState } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Badge } from \"@/components/ui/badge\";\nimport { AccessType } from \"@/lib/types\";\nimport { EditIcon, NewChatIcon, SwapIcon } from \"@/components/icons/icons\";\nimport {\n  ConfluenceCredentialJson,\n  Credential,\n} from \"@/lib/connectors/credentials\";\nimport { Connector } from \"@/lib/connectors/connectors\";\nimport {\n  SvgArrowExchange,\n  SvgAlertTriangle,\n  SvgBubbleText,\n  SvgTrash,\n} from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\ninterface CredentialSelectionTableProps {\n  credentials: Credential<any>[];\n  editableCredentials: Credential<any>[];\n  onSelectCredential: (credential: Credential<any> | null) => void;\n  currentCredentialId?: number;\n  onDeleteCredential: (credential: Credential<any>) => void;\n  onEditCredential?: (credential: Credential<any>) => void;\n}\n\nfunction CredentialSelectionTable({\n  credentials,\n  editableCredentials,\n  onEditCredential,\n  onSelectCredential,\n  currentCredentialId,\n  onDeleteCredential,\n}: CredentialSelectionTableProps) {\n  const [selectedCredentialId, setSelectedCredentialId] = useState<\n    number | null\n  >(null);\n\n  // rkuo: this appears to merge editableCredentials into credentials so we get a single list\n  // of credentials to display\n  // Pretty sure this merging should be done outside of this UI component\n  const allCredentials = React.useMemo(() => {\n    const credMap = new Map(editableCredentials.map((cred) => [cred.id, cred]));\n    credentials.forEach((cred) => {\n      if (!credMap.has(cred.id)) {\n        credMap.set(cred.id, cred);\n      }\n    });\n    return Array.from(credMap.values());\n  }, [credentials, editableCredentials]);\n\n  const handleSelectCredential = (credentialId: number) => {\n    const newSelectedId =\n      selectedCredentialId === credentialId ? null : credentialId;\n    setSelectedCredentialId(newSelectedId);\n\n    const selectedCredential =\n      allCredentials.find((cred) => cred.id === newSelectedId) || null;\n    onSelectCredential(selectedCredential);\n  };\n\n  return (\n    <div className=\"w-full max-h-[50vh] overflow-auto\">\n      <table className=\"w-full text-sm border-collapse\">\n        <thead className=\"sticky top-0 w-full\">\n          <tr className=\"bg-neutral-100 dark:bg-neutral-900\">\n            <th className=\"p-2 text-left font-medium text-neutral-600 dark:text-neutral-400\"></th>\n            <th className=\"p-2 text-left font-medium text-neutral-600 dark:text-neutral-400\">\n              ID\n            </th>\n            <th className=\"p-2 text-left font-medium text-neutral-600 dark:text-neutral-400\">\n              Name\n            </th>\n            <th className=\"p-2 text-left font-medium text-neutral-600 dark:text-neutral-400\">\n              Created\n            </th>\n            <th className=\"p-2 text-left font-medium text-neutral-600 dark:text-neutral-400\">\n              Last Updated\n            </th>\n            <th />\n          </tr>\n        </thead>\n\n        {allCredentials.length > 0 && (\n          <tbody className=\"w-full\">\n            {allCredentials.map((credential, ind) => {\n              const selected = currentCredentialId\n                ? credential.id == (selectedCredentialId || currentCredentialId)\n                : false;\n              const editable = editableCredentials.some(\n                (editableCredential) => editableCredential.id === credential.id\n              );\n              return (\n                <tr\n                  key={credential.id}\n                  className=\"border-b hover:bg-background-50\"\n                >\n                  <td className=\"min-w-[60px] p-2\">\n                    {!selected ? (\n                      <input\n                        type=\"radio\"\n                        name=\"credentialSelection\"\n                        onChange={() => handleSelectCredential(credential.id)}\n                        className=\"form-radio ml-4 h-4 w-4 text-blue-600 transition duration-150 ease-in-out\"\n                      />\n                    ) : (\n                      <Badge>selected</Badge>\n                    )}\n                  </td>\n                  <td className=\"p-2\">{credential.id}</td>\n                  <td className=\"p-2\">\n                    <p>{credential.name ?? \"Untitled\"}</p>\n                  </td>\n                  <td className=\"p-2\">\n                    {new Date(credential.time_created).toLocaleString()}\n                  </td>\n                  <td className=\"p-2\">\n                    {new Date(credential.time_updated).toLocaleString()}\n                  </td>\n                  <td className=\"p-2 flex gap-x-2 content-center mt-auto\">\n                    <Button\n                      disabled={selected || !editable}\n                      onClick={async () => {\n                        onDeleteCredential(credential);\n                      }}\n                      icon={SvgTrash}\n                    />\n                    {onEditCredential && (\n                      <button\n                        disabled={!editable}\n                        onClick={() => onEditCredential(credential)}\n                        className=\"cursor-pointer my-auto\"\n                      >\n                        <EditIcon />\n                      </button>\n                    )}\n                  </td>\n                </tr>\n              );\n            })}\n          </tbody>\n        )}\n      </table>\n\n      {allCredentials.length == 0 && (\n        <p className=\"mt-4\"> No credentials exist for this connector!</p>\n      )}\n    </div>\n  );\n}\n\nexport interface ModifyCredentialProps {\n  close?: () => void;\n  showIfEmpty?: boolean;\n  attachedConnector?: Connector<any>;\n  credentials: Credential<any>[];\n  editableCredentials: Credential<any>[];\n  defaultedCredential?: Credential<any>;\n  accessType: AccessType;\n  onSwap?: (\n    newCredential: Credential<any>,\n    connectorId: number,\n    accessType: AccessType\n  ) => void;\n  onSwitch?: (newCredential: Credential<any>) => void;\n  onEditCredential?: (credential: Credential<ConfluenceCredentialJson>) => void;\n  onDeleteCredential: (credential: Credential<any | null>) => void;\n  onCreateNew?: () => void;\n}\n\nexport default function ModifyCredential({\n  close,\n  showIfEmpty,\n  attachedConnector,\n  credentials,\n  editableCredentials,\n  defaultedCredential,\n  accessType,\n  onSwap,\n  onSwitch,\n  onEditCredential,\n  onDeleteCredential,\n  onCreateNew,\n}: ModifyCredentialProps) {\n  const [selectedCredential, setSelectedCredential] =\n    useState<Credential<any> | null>(null);\n  const [confirmDeletionCredential, setConfirmDeletionCredential] =\n    useState<null | Credential<any>>(null);\n\n  if (!credentials || !editableCredentials) return null;\n\n  return (\n    <>\n      {confirmDeletionCredential != null && (\n        <Modal open onOpenChange={() => setConfirmDeletionCredential(null)}>\n          <Modal.Content width=\"sm\" height=\"sm\">\n            <Modal.Header\n              icon={SvgAlertTriangle}\n              title=\"Confirm Deletion\"\n              onClose={() => setConfirmDeletionCredential(null)}\n            />\n            <Modal.Body>\n              <Text as=\"p\">\n                Are you sure you want to delete this credential? You cannot\n                delete credentials that are linked to live connectors.\n              </Text>\n            </Modal.Body>\n            <Modal.Footer>\n              <Button\n                onClick={async () => {\n                  onDeleteCredential(confirmDeletionCredential);\n                  setConfirmDeletionCredential(null);\n                }}\n              >\n                Confirm\n              </Button>\n              <Button\n                prominence=\"secondary\"\n                onClick={() => setConfirmDeletionCredential(null)}\n              >\n                Cancel\n              </Button>\n            </Modal.Footer>\n          </Modal.Content>\n        </Modal>\n      )}\n\n      <div className=\"mb-0\">\n        <Text as=\"p\" className=\"mb-4\">\n          Select a credential as needed! Ensure that you have selected a\n          credential with the proper permissions for this connector!\n        </Text>\n\n        <CredentialSelectionTable\n          onDeleteCredential={async (credential: Credential<any | null>) => {\n            setConfirmDeletionCredential(credential);\n          }}\n          onEditCredential={\n            onEditCredential\n              ? (credential: Credential<ConfluenceCredentialJson>) =>\n                  onEditCredential(credential)\n              : undefined\n          }\n          currentCredentialId={\n            defaultedCredential ? defaultedCredential.id : undefined\n          }\n          credentials={credentials}\n          editableCredentials={editableCredentials}\n          onSelectCredential={(credential: Credential<any> | null) => {\n            if (credential && onSwitch) {\n              onSwitch(credential);\n            } else {\n              setSelectedCredential(credential);\n            }\n          }}\n        />\n\n        {!showIfEmpty && (\n          <div className=\"flex mt-8 justify-between\">\n            {onCreateNew ? (\n              <Button onClick={onCreateNew} icon={SvgBubbleText}>\n                Create\n              </Button>\n            ) : (\n              <div />\n            )}\n\n            <Button\n              disabled={selectedCredential == null}\n              onClick={() => {\n                if (onSwap && attachedConnector) {\n                  onSwap(selectedCredential!, attachedConnector.id, accessType);\n                  if (close) {\n                    close();\n                  }\n                }\n                if (onSwitch) {\n                  onSwitch(selectedCredential!);\n                }\n              }}\n              icon={SvgArrowExchange}\n            >\n              Select\n            </Button>\n          </div>\n        )}\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/credentials/lib.ts",
    "content": "import * as Yup from \"yup\";\n\nimport { dictionaryType, formType } from \"./types\";\nimport {\n  Credential,\n  getDisplayNameForCredentialKey,\n  CredentialTemplateWithAuth,\n} from \"@/lib/connectors/credentials\";\nimport { isTypedFileField } from \"@/lib/connectors/fileTypes\";\n\nexport function createValidationSchema(json_values: Record<string, any>) {\n  const schemaFields: Record<string, Yup.AnySchema> = {};\n  const template = json_values as CredentialTemplateWithAuth<any>;\n  // multi‐auth templates\n  if (template.authMethods && template.authMethods.length > 1) {\n    // auth method selector\n    schemaFields[\"authentication_method\"] = Yup.string().required(\n      \"Please select an authentication method\"\n    );\n    // conditional rules per authMethod\n    template.authMethods.forEach((method) => {\n      Object.entries(method.fields).forEach(([key, def]) => {\n        const displayName = getDisplayNameForCredentialKey(key);\n        if (typeof def === \"boolean\") {\n          schemaFields[key] = Yup.boolean()\n            .nullable()\n            .default(false)\n            .transform((v, o) => (o === undefined ? false : v));\n        } else if (isTypedFileField(key)) {\n          //TypedFile fields - use mixed schema instead of string (check before null check)\n          schemaFields[key] = Yup.mixed().when(\"authentication_method\", {\n            is: method.value,\n            then: () =>\n              Yup.mixed().required(`Please select a ${displayName} file`),\n            otherwise: () => Yup.mixed().notRequired(),\n          });\n        } else if (def === null) {\n          schemaFields[key] = Yup.string()\n            .trim()\n            .transform((v) => (v === \"\" ? null : v))\n            .nullable()\n            .notRequired();\n        } else {\n          schemaFields[key] = Yup.string()\n            .trim()\n            .when(\"authentication_method\", {\n              is: method.value,\n              then: (s) =>\n                s\n                  .min(1, `${displayName} cannot be empty`)\n                  .required(`Please enter your ${displayName}`),\n              otherwise: (s) => s.notRequired(),\n            });\n        }\n      });\n    });\n  }\n  // single‐auth templates and other fields\n  for (const key in json_values) {\n    if (!Object.prototype.hasOwnProperty.call(json_values, key)) continue;\n    if (key === \"authentication_method\" || key === \"authMethods\") continue;\n    const displayName = getDisplayNameForCredentialKey(key);\n    const def = json_values[key];\n    if (typeof def === \"boolean\") {\n      schemaFields[key] = Yup.boolean()\n        .nullable()\n        .default(false)\n        .transform((v, o) => (o === undefined ? false : v));\n    } else if (isTypedFileField(key)) {\n      // TypedFile fields - use mixed schema instead of string (check before null check)\n      schemaFields[key] = Yup.mixed().required(\n        `Please select a ${displayName} file`\n      );\n    } else if (def === null) {\n      schemaFields[key] = Yup.string()\n        .trim()\n        .transform((v) => (v === \"\" ? null : v))\n        .nullable()\n        .notRequired();\n    } else {\n      schemaFields[key] = Yup.string()\n        .trim()\n        .min(1, `${displayName} cannot be empty`)\n        .required(`Please enter your ${displayName}`);\n    }\n  }\n\n  schemaFields[\"name\"] = Yup.string().optional();\n  return Yup.object().shape(schemaFields);\n}\n\nexport function createEditingValidationSchema(json_values: dictionaryType) {\n  const schemaFields: { [key: string]: Yup.AnySchema } = {};\n\n  for (const key in json_values) {\n    if (Object.prototype.hasOwnProperty.call(json_values, key)) {\n      if (isTypedFileField(key)) {\n        // TypedFile fields - use mixed schema for optional file uploads during editing\n        schemaFields[key] = Yup.mixed().optional();\n      } else {\n        schemaFields[key] = Yup.string().optional();\n      }\n    }\n  }\n\n  schemaFields[\"name\"] = Yup.string().optional();\n  return Yup.object().shape(schemaFields);\n}\n\nexport function createInitialValues(credential: Credential<any>): formType {\n  const initialValues: formType = {\n    name: credential.name || \"\",\n  };\n\n  for (const key in credential.credential_json) {\n    // Initialize TypedFile fields as null, other fields as empty strings\n    if (isTypedFileField(key)) {\n      initialValues[key] = null as any; // TypedFile fields start as null\n    } else {\n      initialValues[key] = \"\";\n    }\n  }\n\n  return initialValues;\n}\n"
  },
  {
    "path": "web/src/components/credentials/types.ts",
    "content": "import { TypedFile } from \"@/lib/connectors/fileTypes\";\n\nexport interface dictionaryType {\n  [key: string]: string | TypedFile;\n}\nexport interface formType extends dictionaryType {\n  name: string;\n}\n\nexport type ActionType = \"create\" | \"createAndSwap\";\n"
  },
  {
    "path": "web/src/components/dateRangeSelectors/AdminDateRangeSelector.tsx",
    "content": "import React, { memo, useState } from \"react\";\nimport Calendar from \"@/refresh-components/Calendar\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\nimport { format } from \"date-fns\";\nimport { getXDaysAgo } from \"./dateUtils\";\nimport { SvgCalendar } from \"@opal/icons\";\nexport const THIRTY_DAYS = \"30d\";\n\nexport type DateRangePickerValue = DateRange & {\n  selectValue: string;\n};\n\nexport type DateRange =\n  | {\n      from: Date;\n      to: Date;\n    }\n  | undefined;\n\nexport const AdminDateRangeSelector = memo(function AdminDateRangeSelector({\n  value,\n  onValueChange,\n}: {\n  value: DateRange;\n  onValueChange: (value: DateRange) => void;\n}) {\n  const [isOpen, setIsOpen] = useState(false);\n\n  const presets = [\n    {\n      label: \"Last 30 days\",\n      value: {\n        from: getXDaysAgo(30),\n        to: getXDaysAgo(0),\n      },\n    },\n    {\n      label: \"Today\",\n      value: {\n        from: getXDaysAgo(1),\n        to: getXDaysAgo(0),\n      },\n    },\n  ];\n\n  return (\n    <div className=\"grid gap-2\">\n      <Popover open={isOpen} onOpenChange={setIsOpen}>\n        <Popover.Trigger asChild>\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <Button\n            data-testid=\"admin-date-range-selector-button\"\n            secondary\n            className={cn(\"justify-start\", !value && \"text-muted-foreground\")}\n            leftIcon={SvgCalendar}\n          >\n            {value?.from\n              ? value.to\n                ? `${format(value.from, \"LLL dd, y\")} - ${format(\n                    value.to,\n                    \"LLL dd, y\"\n                  )}`\n                : format(value.from, \"LLL dd, y\")\n              : \"Pick a date range\"}\n          </Button>\n        </Popover.Trigger>\n        <Popover.Content align=\"start\">\n          <Calendar\n            initialFocus\n            mode=\"range\"\n            defaultMonth={value?.from}\n            selected={value}\n            onSelect={(range) => {\n              if (range?.from) {\n                if (range.to) {\n                  // Normal range selection when initialized with a range\n                  onValueChange({ from: range.from, to: range.to });\n                } else {\n                  // Single date selection when initilized without a range\n                  const to = new Date(range.from);\n                  const from = new Date(to.setDate(to.getDate() - 1));\n                  onValueChange({ from, to });\n                }\n              }\n            }}\n            numberOfMonths={2}\n          />\n          <div className=\"border-t p-3\">\n            {presets.map((preset) => (\n              <OpalButton\n                key={preset.label}\n                prominence=\"internal\"\n                width=\"full\"\n                onClick={() => {\n                  onValueChange(preset.value);\n                }}\n              >\n                {preset.label}\n              </OpalButton>\n            ))}\n          </div>\n        </Popover.Content>\n      </Popover>\n    </div>\n  );\n});\n"
  },
  {
    "path": "web/src/components/dateRangeSelectors/SearchDateRangeSelector.tsx",
    "content": "import { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { FiCalendar, FiChevronDown, FiXCircle } from \"react-icons/fi\";\nimport { CustomDropdown } from \"../Dropdown\";\nimport { timeRangeValues } from \"@/app/config/timeRange\";\nimport { TimeRangeSelector } from \"@/components/filters/TimeRangeSelector\";\nimport { cn } from \"@/lib/utils\";\n\nexport function SearchDateRangeSelector({\n  value,\n  onValueChange,\n  isHorizontal,\n  className,\n}: {\n  value: DateRangePickerValue | null;\n  onValueChange: (value: DateRangePickerValue | null) => void;\n  isHorizontal?: boolean;\n  className?: string;\n}) {\n  return (\n    <div>\n      <CustomDropdown\n        dropdown={\n          <TimeRangeSelector\n            value={value}\n            className={cn(\n              \"border border-border bg-background rounded-lg flex flex-col w-64 max-h-96 overflow-y-auto flex overscroll-contain\",\n              className\n            )}\n            timeRangeValues={timeRangeValues}\n            onValueChange={onValueChange}\n          />\n        }\n      >\n        <div\n          className={`\n            flex\n            text-sm\n            px-3\n            line-clamp-1\n            py-1.5\n            rounded-lg\n            border\n            border-border\n            cursor-pointer\n            hover:bg-accent-background-hovered`}\n        >\n          <FiCalendar className=\"flex-none my-auto mr-2\" />{\" \"}\n          <p className=\"line-clamp-1\">\n            {isHorizontal ? (\n              \"Date\"\n            ) : value?.selectValue ? (\n              <div className=\"text-text-darker\">{value.selectValue}</div>\n            ) : (\n              \"Any time...\"\n            )}\n          </p>\n          {value?.selectValue ? (\n            <div\n              className=\"my-auto ml-auto p-0.5 rounded-full w-fit\"\n              onClick={(e) => {\n                onValueChange(null);\n                e.stopPropagation();\n              }}\n            >\n              <FiXCircle />\n            </div>\n          ) : (\n            <FiChevronDown className=\"my-auto ml-auto\" />\n          )}\n        </div>\n      </CustomDropdown>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/dateRangeSelectors/dateUtils.ts",
    "content": "export function getXDaysAgo(daysAgo: number) {\n  const today = new Date();\n  const daysAgoDate = new Date(today);\n  daysAgoDate.setDate(today.getDate() - daysAgo);\n  return daysAgoDate;\n}\n\nexport function convertDateToEndOfDay(date?: Date | null) {\n  if (!date) {\n    return date;\n  }\n\n  const dateCopy = new Date(date);\n  dateCopy.setHours(23, 59, 59, 999);\n  return dateCopy;\n}\n\nexport function convertDateToStartOfDay(date?: Date | null) {\n  if (!date) {\n    return date;\n  }\n\n  const dateCopy = new Date(date);\n  dateCopy.setHours(0, 0, 0, 0);\n  return dateCopy;\n}\n"
  },
  {
    "path": "web/src/components/dev/StatsOverlay.tsx",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\n\n/**\n * Development-only stats.js overlay showing FPS, MS, and memory usage.\n * Enable by running `npm run dev:profile` or setting NEXT_PUBLIC_ENABLE_STATS=true.\n * Shows FPS and MB panels (memory is Chrome only).\n *\n * Uses dynamic import to prevent stats.js from being bundled in production.\n */\nexport default function StatsOverlay() {\n  useEffect(() => {\n    let animationFrameId: number | undefined;\n    let container: HTMLDivElement | null = null;\n    let isMounted = true;\n\n    // Dynamic import to avoid bundling in production\n    import(\"stats.js\").then((StatsModule) => {\n      // Guard against unmount during async import\n      if (!isMounted) return;\n\n      const Stats = StatsModule.default;\n\n      // Create Stats instances for FPS and MB\n      const panels = [0, 2].map((panel) => {\n        // 0=FPS, 2=MB (memory)\n        const stats = new Stats();\n        stats.showPanel(panel);\n        return stats;\n      });\n\n      // Create container for all panels\n      container = document.createElement(\"div\");\n      container.style.position = \"fixed\";\n      container.style.top = \"0\";\n      container.style.left = \"50%\";\n      container.style.transform = \"translateX(-50%)\";\n      container.style.zIndex = \"99999\";\n      container.style.display = \"flex\";\n\n      panels.forEach((stats) => {\n        stats.dom.style.position = \"relative\";\n        container!.appendChild(stats.dom);\n      });\n\n      document.body.appendChild(container);\n\n      const animate = () => {\n        panels.forEach((stats) => {\n          stats.begin();\n          stats.end();\n        });\n        animationFrameId = requestAnimationFrame(animate);\n      };\n\n      animationFrameId = requestAnimationFrame(animate);\n    });\n\n    return () => {\n      isMounted = false;\n      if (animationFrameId !== undefined)\n        cancelAnimationFrame(animationFrameId);\n      if (container?.parentNode) {\n        container.parentNode.removeChild(container);\n      }\n    };\n  }, []);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/components/dev/StatsOverlayLoader.tsx",
    "content": "\"use client\";\n\nimport dynamic from \"next/dynamic\";\n\nconst StatsOverlay = dynamic(() => import(\"@/components/dev/StatsOverlay\"), {\n  ssr: false,\n});\n\nexport default function StatsOverlayLoader() {\n  return <StatsOverlay />;\n}\n"
  },
  {
    "path": "web/src/components/embedding/CustomEmbeddingModelForm.tsx",
    "content": "import {\n  CloudEmbeddingModel,\n  EmbeddingProvider,\n  getFormattedProviderName,\n} from \"./interfaces\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { TextFormField, BooleanFormField } from \"@/components/Field\";\nimport { Dispatch, SetStateAction } from \"react\";\nimport { Text } from \"@opal/components\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { EmbeddingDetails } from \"@/app/admin/embeddings/EmbeddingModelSelectionForm\";\n\nexport function CustomEmbeddingModelForm({\n  setShowTentativeModel,\n  currentValues,\n  provider,\n  embeddingType,\n}: {\n  setShowTentativeModel: Dispatch<SetStateAction<CloudEmbeddingModel | null>>;\n  currentValues: CloudEmbeddingModel | null;\n  provider: EmbeddingDetails;\n  embeddingType: EmbeddingProvider;\n}) {\n  return (\n    <div>\n      <Formik\n        initialValues={\n          currentValues || {\n            model_name: \"\",\n            model_dim: 768,\n            normalize: false,\n            query_prefix: \"\",\n            passage_prefix: \"\",\n            provider_type: embeddingType,\n            api_key: \"\",\n            enabled: true,\n            api_url: provider.api_url,\n            description: \"\",\n            index_name: \"\",\n          }\n        }\n        validationSchema={Yup.object().shape({\n          model_name: Yup.string().required(\"Model name is required\"),\n          model_dim: Yup.number().required(\"Model dimension is required\"),\n          normalize: Yup.boolean().required(),\n          query_prefix: Yup.string(),\n          passage_prefix: Yup.string(),\n          provider_type: Yup.string().required(\"Provider type is required\"),\n          api_key: Yup.string().optional(),\n          enabled: Yup.boolean(),\n          api_url: Yup.string().required(\"API base URL is required\"),\n          description: Yup.string(),\n          index_name: Yup.string().nullable(),\n        })}\n        onSubmit={async (values) => {\n          setShowTentativeModel(values as CloudEmbeddingModel);\n        }}\n      >\n        {({ isSubmitting, submitForm, errors }) => (\n          <Form>\n            <Text as=\"p\" font=\"heading-h3\">\n              {`Specify details for your ${getFormattedProviderName(\n                embeddingType\n              )} Provider's model`}\n            </Text>\n            <Spacer rem={1} />\n            <TextFormField\n              name=\"model_name\"\n              label=\"Model Name:\"\n              subtext={`The name of the ${getFormattedProviderName(\n                embeddingType\n              )} model`}\n              placeholder=\"e.g. 'all-MiniLM-L6-v2'\"\n            />\n\n            <TextFormField\n              name=\"model_dim\"\n              label=\"Model Dimension:\"\n              subtext=\"The dimension of the model's embeddings\"\n              placeholder=\"e.g. '1536'\"\n              type=\"number\"\n            />\n\n            <BooleanFormField\n              removeIndent\n              name=\"normalize\"\n              label=\"Normalize\"\n              subtext=\"Whether to normalize the embeddings\"\n            />\n\n            <TextFormField\n              name=\"query_prefix\"\n              label=\"Query Prefix:\"\n              subtext=\"Prefix for query embeddings\"\n            />\n\n            <TextFormField\n              name=\"passage_prefix\"\n              label=\"Passage Prefix:\"\n              subtext=\"Prefix for passage embeddings\"\n            />\n\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Button\n              type=\"submit\"\n              disabled={isSubmitting}\n              className=\"w-64 mx-auto\"\n            >\n              Configure {getFormattedProviderName(embeddingType)} Model\n            </Button>\n          </Form>\n        )}\n      </Formik>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/embedding/CustomModelForm.tsx",
    "content": "import { BooleanFormField, TextFormField } from \"@/components/Field\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Form, Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport { HostedEmbeddingModel } from \"./interfaces\";\n\nexport function CustomModelForm({\n  onSubmit,\n}: {\n  onSubmit: (model: HostedEmbeddingModel) => void;\n}) {\n  return (\n    <div>\n      <Formik\n        initialValues={{\n          model_name: \"\",\n          model_dim: \"\",\n          query_prefix: \"\",\n          passage_prefix: \"\",\n          description: \"\",\n          normalize: true,\n        }}\n        validationSchema={Yup.object().shape({\n          model_name: Yup.string().required(\n            \"Please enter the name of the Embedding Model\"\n          ),\n          model_dim: Yup.number().required(\n            \"Please enter the dimensionality of the embeddings generated by the model\"\n          ),\n          query_prefix: Yup.string(),\n          passage_prefix: Yup.string(),\n          normalize: Yup.boolean().required(),\n        })}\n        onSubmit={async (values, formikHelpers) => {\n          onSubmit({\n            ...values,\n            model_dim: parseInt(values.model_dim),\n            api_key: null,\n            provider_type: null,\n            index_name: null,\n            api_url: null,\n          });\n        }}\n      >\n        {({ isSubmitting }) => (\n          <Form>\n            <TextFormField\n              name=\"model_name\"\n              label=\"Name:\"\n              subtext=\"The name of the model on Hugging Face\"\n              placeholder=\"E.g. 'nomic-ai/nomic-embed-text-v1'\"\n            />\n\n            <TextFormField\n              name=\"model_dim\"\n              label=\"Model Dimension:\"\n              subtext=\"The dimensionality of the embeddings generated by the model\"\n              placeholder=\"E.g. '768'\"\n              type=\"number\"\n            />\n            <TextFormField\n              min={-1}\n              name=\"description\"\n              label=\"Description:\"\n              subtext=\"Description of  your model\"\n              placeholder=\"\"\n            />\n\n            <TextFormField\n              name=\"query_prefix\"\n              label=\"[Optional] Query Prefix:\"\n              subtext={\n                <>\n                  The prefix specified by the model creators which should be\n                  prepended to <i>queries</i> before passing them to the model.\n                  Many models do not have this, in which case this should be\n                  left empty.\n                </>\n              }\n              placeholder=\"E.g. 'query: '\"\n            />\n            <TextFormField\n              name=\"passage_prefix\"\n              label=\"[Optional] Passage Prefix:\"\n              subtext={\n                <>\n                  The prefix specified by the model creators which should be\n                  prepended to <i>passages</i> before passing them to the model.\n                  Many models do not have this, in which case this should be\n                  left empty.\n                </>\n              }\n              placeholder=\"E.g. 'passage: '\"\n            />\n\n            <BooleanFormField\n              removeIndent\n              name=\"normalize\"\n              label=\"Normalize Embeddings\"\n              subtext=\"Whether or not to normalize the embeddings generated by the model. When in doubt, leave this checked.\"\n            />\n\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Button\n              type=\"submit\"\n              disabled={isSubmitting}\n              className=\"w-64 mx-auto\"\n            >\n              Choose\n            </Button>\n          </Form>\n        )}\n      </Formik>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/embedding/FailedReIndexAttempts.tsx",
    "content": "import { buildCCPairInfoUrl } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { PageSelector } from \"@/components/PageSelector\";\nimport { IndexAttemptStatus } from \"@/components/Status\";\nimport { deleteCCPair } from \"@/lib/documentDeletion\";\nimport { FailedConnectorIndexingStatus } from \"@/lib/types\";\nimport { Button } from \"@opal/components\";\nimport { ConfirmEntityModal } from \"@/components/modals/ConfirmEntityModal\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { Text } from \"@opal/components\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport Link from \"next/link\";\nimport { useState } from \"react\";\nimport { FiLink, FiMaximize2, FiTrash } from \"react-icons/fi\";\nimport { mutate } from \"swr\";\nimport { toast } from \"@/hooks/useToast\";\nimport { SvgTrash } from \"@opal/icons\";\nexport function FailedReIndexAttempts({\n  failedIndexingStatuses,\n}: {\n  failedIndexingStatuses: FailedConnectorIndexingStatus[];\n}) {\n  const numToDisplay = 10;\n  const [page, setPage] = useState(1);\n  const [pendingConnectorDeletion, setPendingConnectorDeletion] = useState<{\n    connectorId: number;\n    credentialId: number;\n    ccPairId: number;\n    name: string;\n  } | null>(null);\n\n  const shouldConfirmConnectorDeletion = true;\n\n  const anyDeletable = failedIndexingStatuses.some(\n    (status) => status.is_deletable\n  );\n\n  return (\n    <div className=\"mt-6 mb-8 p-4 border border-status-error-02 bg-status-error-00 rounded-lg\">\n      {pendingConnectorDeletion && (\n        <ConfirmEntityModal\n          danger\n          entityType=\"connector\"\n          entityName={pendingConnectorDeletion.name}\n          additionalDetails=\"Deleting this connector schedules a deletion job that removes its indexed documents and deletes it for every user.\"\n          onClose={() => setPendingConnectorDeletion(null)}\n          onSubmit={async () => {\n            try {\n              await deleteCCPair(\n                pendingConnectorDeletion.connectorId,\n                pendingConnectorDeletion.credentialId,\n                () =>\n                  mutate(buildCCPairInfoUrl(pendingConnectorDeletion.ccPairId))\n              );\n            } catch (error) {\n              console.error(\"Error deleting connector:\", error);\n              toast.error(\"Failed to delete connector. Please try again.\");\n            } finally {\n              setPendingConnectorDeletion(null);\n            }\n          }}\n        />\n      )}\n\n      <div className=\"text-status-error-05\">\n        <Text as=\"p\" font=\"main-ui-action\">\n          Failed Re-indexing Attempts\n        </Text>\n      </div>\n      <Spacer rem={0.5} />\n      <div className=\"text-status-error-05\">\n        <Text as=\"p\">\n          The table below shows only the failed re-indexing attempts for\n          existing connectors. These failures require immediate attention. Once\n          all connectors have been re-indexed successfully, the new model will\n          be used for all search queries.\n        </Text>\n      </div>\n      <Spacer rem={1} />\n\n      <div>\n        <Table>\n          <TableHeader>\n            <TableRow>\n              <TableHead className=\"w-1/8 sm:w-1/6\">Connector Name</TableHead>\n              <TableHead className=\"w-1/8 sm:w-1/6\">Status</TableHead>\n              <TableHead className=\"w-4/8 sm:w-2/6\">Error Message</TableHead>\n              <TableHead className=\"w-1/8 sm:w-1/6\">Visit Connector</TableHead>\n              {anyDeletable && (\n                <TableHead className=\"w-1/8 sm:w-2/6\">\n                  Delete Connector\n                </TableHead>\n              )}\n            </TableRow>\n          </TableHeader>\n          <TableBody>\n            {failedIndexingStatuses\n              .slice(numToDisplay * (page - 1), numToDisplay * page)\n              .map((reindexingProgress) => {\n                return (\n                  <TableRow key={reindexingProgress.name}>\n                    <TableCell>\n                      <Link\n                        href={`/admin/connector/${reindexingProgress.cc_pair_id}`}\n                        className=\"text-link cursor-pointer flex\"\n                      >\n                        <FiMaximize2 className=\"my-auto mr-1\" />\n                        {reindexingProgress.name}\n                      </Link>\n                    </TableCell>\n                    <TableCell>\n                      <IndexAttemptStatus status=\"failed\" />\n                    </TableCell>\n\n                    <TableCell>\n                      <div>\n                        <Text as=\"p\">\n                          {reindexingProgress.error_msg || \"-\"}\n                        </Text>\n                      </div>\n                    </TableCell>\n                    <TableCell>\n                      <Link\n                        href={`/admin/connector/${reindexingProgress.cc_pair_id}`}\n                        className=\"ctext-link cursor-pointer flex\"\n                      >\n                        <FiLink className=\"my-auto mr-1\" />\n                        Visit Connector\n                      </Link>\n                    </TableCell>\n                    <TableCell>\n                      <Button\n                        disabled={!reindexingProgress.is_deletable}\n                        variant=\"danger\"\n                        onClick={async () => {\n                          if (shouldConfirmConnectorDeletion) {\n                            setPendingConnectorDeletion({\n                              connectorId: reindexingProgress.connector_id,\n                              credentialId: reindexingProgress.credential_id,\n                              ccPairId: reindexingProgress.cc_pair_id,\n                              name: reindexingProgress.name ?? \"this connector\",\n                            });\n                            return;\n                          }\n\n                          try {\n                            await deleteCCPair(\n                              reindexingProgress.connector_id,\n                              reindexingProgress.credential_id,\n                              () =>\n                                mutate(\n                                  buildCCPairInfoUrl(\n                                    reindexingProgress.cc_pair_id\n                                  )\n                                )\n                            );\n                          } catch (error) {\n                            console.error(\"Error deleting connector:\", error);\n                            toast.error(\n                              \"Failed to delete connector. Please try again.\"\n                            );\n                          }\n                        }}\n                        icon={SvgTrash}\n                      >\n                        Delete\n                      </Button>\n                    </TableCell>\n                  </TableRow>\n                );\n              })}\n          </TableBody>\n        </Table>\n\n        <div className=\"mt-3 flex\">\n          <div className=\"mx-auto\">\n            <PageSelector\n              totalPages={Math.ceil(\n                failedIndexingStatuses.length / numToDisplay\n              )}\n              currentPage={page}\n              onPageChange={(newPage) => setPage(newPage)}\n            />\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/embedding/ModelSelector.tsx",
    "content": "import { getCurrentModelCopy } from \"@/app/admin/embeddings/interfaces\";\nimport {\n  EmbeddingModelDescriptor,\n  getIconForRerankType,\n  getTitleForRerankType,\n  getFormattedProviderName,\n  HostedEmbeddingModel,\n  CloudEmbeddingModel,\n} from \"./interfaces\";\nimport { FiExternalLink } from \"react-icons/fi\";\nimport CardSection from \"../admin/CardSection\";\n\nexport function ModelPreview({\n  model,\n  display,\n  showDetails = false,\n}: {\n  model: EmbeddingModelDescriptor;\n  display?: boolean;\n  showDetails?: boolean;\n}) {\n  const currentModelCopy = getCurrentModelCopy(model.model_name);\n\n  return (\n    <CardSection\n      className={`shadow-lg rounded-16 bg-background-tint-00 ${\n        display ? \"p-4\" : \"p-2\"\n      } w-96 flex flex-col`}\n    >\n      <div className=\"font-bold text-lg flex\">{model.model_name}</div>\n\n      <div className=\"text-sm mt-1 mx-1 mb-3\">\n        {model.description ||\n          currentModelCopy?.description ||\n          \"Custom model—no description is available.\"}\n      </div>\n\n      {showDetails && (\n        <div className=\"pt-4 border-t border-border space-y-3\">\n          <div className=\"grid grid-cols-2 gap-4 text-sm\">\n            <div>\n              <span className=\"font-semibold text-text-700\">Dimensions:</span>\n              <div className=\"text-text-600\">\n                {model.model_dim.toLocaleString()}\n              </div>\n            </div>\n\n            <div>\n              <span className=\"font-semibold text-text-700\">Provider:</span>\n              <div className=\"text-text-600\">\n                {getFormattedProviderName(model.provider_type)}\n              </div>\n            </div>\n\n            <div>\n              <span className=\"font-semibold text-text-700\">Normalized:</span>\n              <div className=\"text-text-600\">\n                {model.normalize ? \"Yes\" : \"No\"}\n              </div>\n            </div>\n\n            {\"embedding_precision\" in model &&\n              (model as any).embedding_precision && (\n                <div>\n                  <span className=\"font-semibold text-text-700\">\n                    Precision:\n                  </span>\n                  <div className=\"text-text-600\">\n                    {(model as any).embedding_precision}\n                  </div>\n                </div>\n              )}\n\n            {\"isDefault\" in model &&\n              (model as HostedEmbeddingModel).isDefault && (\n                <div>\n                  <span className=\"font-semibold text-text-700\">Type:</span>\n                  <div className=\"text-text-600\">Default</div>\n                </div>\n              )}\n\n            {\"pricePerMillion\" in model && (\n              <div>\n                <span className=\"font-semibold text-text-700\">\n                  Price/Million:\n                </span>\n                <div className=\"text-text-600\">\n                  ${(model as CloudEmbeddingModel).pricePerMillion}\n                </div>\n              </div>\n            )}\n          </div>\n\n          {(model.query_prefix || model.passage_prefix) && (\n            <div className=\"space-y-2\">\n              {model.query_prefix && (\n                <div>\n                  <span className=\"font-semibold text-text-700\">\n                    Query Prefix:\n                  </span>\n                  <div className=\"text-text-600 font-mono text-xs p-2 rounded\">\n                    &quot;{model.query_prefix}&quot;\n                  </div>\n                </div>\n              )}\n\n              {model.passage_prefix && (\n                <div>\n                  <span className=\"font-semibold text-text-700\">\n                    Passage Prefix:\n                  </span>\n                  <div className=\"text-text-600 font-mono text-xs p-2 rounded\">\n                    &quot;{model.passage_prefix}&quot;\n                  </div>\n                </div>\n              )}\n            </div>\n          )}\n\n          {model.api_url && (\n            <div>\n              <span className=\"font-semibold text-text-700\">API URL:</span>\n              <div className=\"text-text-600 font-mono text-xs bg-background p-2 rounded break-all\">\n                {model.api_url}\n              </div>\n            </div>\n          )}\n\n          {model.api_version && (\n            <div>\n              <span className=\"font-semibold text-text-700\">API Version:</span>\n              <div className=\"text-text-600\">{model.api_version}</div>\n            </div>\n          )}\n\n          {model.deployment_name && (\n            <div>\n              <span className=\"font-semibold text-text-700\">Deployment:</span>\n              <div className=\"text-text-600\">{model.deployment_name}</div>\n            </div>\n          )}\n\n          {\"link\" in model && (model as HostedEmbeddingModel).link && (\n            <div className=\"pt-2\">\n              <a\n                href={(model as HostedEmbeddingModel).link}\n                target=\"_blank\"\n                rel=\"noopener noreferrer\"\n                className=\"inline-flex items-center text-blue-500 hover:text-blue-700 transition-colors duration-200 text-sm\"\n              >\n                <span>View Documentation</span>\n                <FiExternalLink className=\"ml-1\" size={14} />\n              </a>\n            </div>\n          )}\n        </div>\n      )}\n    </CardSection>\n  );\n}\n\nexport function ModelOption({\n  model,\n  onSelect,\n  selected,\n}: {\n  model: HostedEmbeddingModel;\n  onSelect?: (model: HostedEmbeddingModel) => void;\n  selected: boolean;\n}) {\n  const currentModelCopy = getCurrentModelCopy(model.model_name);\n\n  return (\n    <div\n      className={`p-4 w-96 border rounded-lg transition-all duration-200 ${\n        selected\n          ? \"border-blue-800 bg-blue-50 dark:bg-blue-950 dark:border-blue-700 shadow-md\"\n          : \"border-background-200 hover:border-blue-300 hover:shadow-sm\"\n      }`}\n    >\n      <div className=\"flex items-center justify-between mb-3\">\n        <h3 className=\"font-bold text-lg\">{model.model_name}</h3>\n\n        {model.link && (\n          <a\n            href={model.link}\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            onClick={(e) => e.stopPropagation()}\n            className=\"text-blue-500 hover:text-blue-700 transition-colors duration-200\"\n          >\n            <FiExternalLink size={18} />\n          </a>\n        )}\n      </div>\n      <p className=\"text-sm k text-text-600 dark:text-neutral-400 text-left mb-2\">\n        {model.description ||\n          currentModelCopy?.description ||\n          \"Custom model—no description is available.\"}\n      </p>\n      <div className=\"text-xs text-text-500\">\n        {model.isDefault ? \"Default\" : \"Self-hosted\"}\n      </div>\n      {onSelect && (\n        <div className=\"mt-3\">\n          <button\n            className={`w-full p-2 rounded-lg text-sm ${\n              selected\n                ? \"bg-background-125 border border-border cursor-not-allowed\"\n                : \"bg-background border border-border hover:bg-accent-background-hovered cursor-pointer\"\n            }`}\n            onClick={(e) => {\n              e.stopPropagation();\n              if (!selected) onSelect(model);\n            }}\n            disabled={selected}\n          >\n            {selected ? \"Selected Model\" : \"Select Model\"}\n          </button>\n        </div>\n      )}\n    </div>\n  );\n}\nexport function ModelSelector({\n  modelOptions,\n  setSelectedModel,\n  currentEmbeddingModel,\n}: {\n  currentEmbeddingModel: HostedEmbeddingModel;\n  modelOptions: HostedEmbeddingModel[];\n  setSelectedModel: (model: HostedEmbeddingModel) => void;\n}) {\n  const groupedModelOptions = modelOptions.reduce(\n    (acc, model) => {\n      const [type] = model.model_name.split(\"/\");\n      if (type !== undefined) {\n        if (!acc[type]) {\n          acc[type] = [];\n        }\n\n        const acc_by_type = acc[type];\n        if (acc_by_type !== undefined) {\n          acc_by_type.push(model);\n        }\n      }\n\n      return acc;\n    },\n    {} as Record<string, HostedEmbeddingModel[]>\n  );\n\n  return (\n    <div>\n      <div className=\"flex flex-col gap-y-6 gap-6\">\n        {Object.entries(groupedModelOptions).map(([type, models]) => (\n          <div key={type}>\n            <div className=\"flex items-center mb-2\">\n              {getIconForRerankType(type)}\n              <h2 className=\"ml-2 mt-2 text-xl font-bold\">\n                {getTitleForRerankType(type)}\n              </h2>\n            </div>\n\n            <div className=\"flex mt-4 flex-wrap gap-4\">\n              {models.map((modelOption) => (\n                <ModelOption\n                  key={modelOption.model_name}\n                  model={modelOption}\n                  onSelect={setSelectedModel}\n                  selected={currentEmbeddingModel === modelOption}\n                />\n              ))}\n            </div>\n          </div>\n        ))}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/embedding/ReindexingProgressTable.tsx",
    "content": "import { PageSelector } from \"@/components/PageSelector\";\nimport { IndexAttemptStatus } from \"@/components/Status\";\nimport {\n  ConnectorIndexingStatus,\n  ConnectorIndexingStatusLite,\n} from \"@/lib/types\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport Link from \"next/link\";\nimport { useState } from \"react\";\nimport { FiMaximize2 } from \"react-icons/fi\";\n\nexport function ReindexingProgressTable({\n  reindexingProgress,\n}: {\n  reindexingProgress: ConnectorIndexingStatusLite[];\n}) {\n  const numToDisplay = 10;\n  const [page, setPage] = useState(1);\n\n  return (\n    <div>\n      <Table>\n        <TableHeader>\n          <TableRow>\n            <TableHead className=\"w-1/7 sm:w-1/5\">Connector Name</TableHead>\n            <TableHead className=\"w-3/7 sm:w-1/5\">Status</TableHead>\n            <TableHead className=\"w-3/7 sm:w-1/5\">Docs Re-Indexed</TableHead>\n            <TableHead className=\"w-3/7 sm:w-1/5\"></TableHead>\n          </TableRow>\n        </TableHeader>\n        <TableBody>\n          {reindexingProgress\n            .slice(numToDisplay * (page - 1), numToDisplay * page)\n            .map((reindexingProgress) => {\n              return (\n                <TableRow key={reindexingProgress.name}>\n                  <TableCell>\n                    <Link\n                      href={`/admin/connector/${reindexingProgress.cc_pair_id}`}\n                      className=\"text-link cursor-pointer flex\"\n                    >\n                      <FiMaximize2 className=\"my-auto mr-1\" />\n                      {reindexingProgress.name}\n                    </Link>\n                  </TableCell>\n                  <TableCell>\n                    {reindexingProgress.last_status && (\n                      <IndexAttemptStatus\n                        status={reindexingProgress.last_status}\n                      />\n                    )}\n                  </TableCell>\n                  <TableCell>\n                    {reindexingProgress?.latest_index_attempt_docs_indexed ||\n                      \"-\"}\n                  </TableCell>\n                </TableRow>\n              );\n            })}\n        </TableBody>\n      </Table>\n\n      <div className=\"mt-3 flex\">\n        <div className=\"mx-auto\">\n          <PageSelector\n            totalPages={Math.ceil(reindexingProgress.length / numToDisplay)}\n            currentPage={page}\n            onPageChange={(newPage) => setPage(newPage)}\n          />\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/embedding/interfaces.tsx",
    "content": "import { JSX } from \"react\";\nimport {\n  AzureIcon,\n  CohereIcon,\n  GoogleIcon,\n  IconProps,\n  LiteLLMIcon,\n  MicrosoftIcon,\n  NomicIcon,\n  OpenAIISVG,\n  OpenSourceIcon,\n  VoyageIconSVG,\n} from \"@/components/icons/icons\";\nimport { SwitchoverType } from \"@/app/admin/embeddings/interfaces\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\n\nexport enum EmbeddingProvider {\n  OPENAI = \"openai\",\n  COHERE = \"cohere\",\n  VOYAGE = \"voyage\",\n  GOOGLE = \"google\",\n  LITELLM = \"litellm\",\n  AZURE = \"azure\",\n}\n\nexport interface CloudEmbeddingProvider {\n  provider_type: EmbeddingProvider;\n  api_key?: string;\n  api_url?: string;\n  custom_config?: Record<string, string>;\n  docsLink?: string;\n\n  // Frontend-specific properties\n  website: string;\n  icon: ({ size, className }: IconProps) => JSX.Element;\n  description: string;\n  apiLink: string;\n  costslink?: string;\n\n  // Relationships\n  embedding_models: CloudEmbeddingModel[];\n  default_model?: CloudEmbeddingModel;\n}\n\n// Embedding Models\nexport interface EmbeddingModelDescriptor {\n  id?: number;\n  model_name: string;\n  model_dim: number;\n  normalize: boolean;\n  query_prefix: string;\n  passage_prefix: string;\n  provider_type: EmbeddingProvider | null;\n  description: string;\n  api_key: string | null;\n  api_url: string | null;\n  api_version?: string | null;\n  deployment_name?: string | null;\n  index_name: string | null;\n  switchover_type?: SwitchoverType;\n}\n\nexport interface CloudEmbeddingModel extends EmbeddingModelDescriptor {\n  pricePerMillion: number;\n}\n\nexport interface HostedEmbeddingModel extends EmbeddingModelDescriptor {\n  link?: string;\n  isDefault?: boolean;\n}\n\n// Responses\nexport interface FullEmbeddingModelResponse {\n  current_model_name: string;\n  secondary_model_name: string | null;\n}\n\nexport interface CloudEmbeddingProviderFull extends CloudEmbeddingProvider {\n  configured?: boolean;\n}\n\nexport const AVAILABLE_MODELS: HostedEmbeddingModel[] = [\n  {\n    model_name: \"nomic-ai/nomic-embed-text-v1\",\n    model_dim: 768,\n    normalize: true,\n    description:\n      \"The recommended default for most situations. If you aren't sure which model to use, this is probably the one.\",\n    isDefault: true,\n    link: \"https://huggingface.co/nomic-ai/nomic-embed-text-v1\",\n    query_prefix: \"search_query: \",\n    passage_prefix: \"search_document: \",\n    index_name: \"\",\n    provider_type: null,\n    api_key: null,\n    api_url: null,\n  },\n  {\n    model_name: \"intfloat/e5-base-v2\",\n    model_dim: 768,\n    normalize: true,\n    description:\n      \"A smaller and faster model than the default. It is around 2x faster than the default model at the cost of lower search quality.\",\n    link: \"https://huggingface.co/intfloat/e5-base-v2\",\n    query_prefix: \"query: \",\n    passage_prefix: \"passage: \",\n    index_name: \"\",\n    provider_type: null,\n    api_url: null,\n    api_key: null,\n  },\n  {\n    model_name: \"intfloat/e5-small-v2\",\n    model_dim: 384,\n    normalize: true,\n    description:\n      \"The smallest and fastest version of the E5 line of models. If you're running Onyx on a resource constrained system, then this may be a good choice.\",\n    link: \"https://huggingface.co/intfloat/e5-small-v2\",\n    query_prefix: \"query: \",\n    passage_prefix: \"passage: \",\n    index_name: \"\",\n    provider_type: null,\n    api_key: null,\n    api_url: null,\n  },\n  {\n    model_name: \"intfloat/multilingual-e5-base\",\n    model_dim: 768,\n    normalize: true,\n    description:\n      \"For corpora in other languages besides English, this is the one to choose.\",\n    link: \"https://huggingface.co/intfloat/multilingual-e5-base\",\n    query_prefix: \"query: \",\n    passage_prefix: \"passage: \",\n    index_name: \"\",\n    provider_type: null,\n    api_key: null,\n    api_url: null,\n  },\n  {\n    model_name: \"intfloat/multilingual-e5-small\",\n    model_dim: 384,\n    normalize: true,\n    description:\n      \"For corpora in other languages besides English, as well as being on a resource constrained system, this is the one to choose.\",\n    link: \"https://huggingface.co/intfloat/multilingual-e5-base\",\n    query_prefix: \"query: \",\n    passage_prefix: \"passage: \",\n    index_name: \"\",\n    provider_type: null,\n    api_key: null,\n    api_url: null,\n  },\n];\n\nexport const LITELLM_CLOUD_PROVIDER: CloudEmbeddingProvider = {\n  provider_type: EmbeddingProvider.LITELLM,\n  website: \"https://github.com/BerriAI/litellm\",\n  icon: LiteLLMIcon,\n  description: \"Open-source library to call LLM APIs using OpenAI format\",\n  apiLink: \"https://docs.litellm.ai/docs/proxy/quick_start\",\n  embedding_models: [], // No default embedding models\n};\n\nexport const AZURE_CLOUD_PROVIDER: CloudEmbeddingProvider = {\n  provider_type: EmbeddingProvider.AZURE,\n  website:\n    \"https://azure.microsoft.com/en-us/products/cognitive-services/openai/\",\n  icon: AzureIcon,\n  description:\n    \"Azure OpenAI is a cloud-based AI service that provides access to OpenAI models.\",\n  apiLink:\n    \"https://docs.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource\",\n  costslink:\n    \"https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai/\",\n  embedding_models: [], // No default embedding models\n};\n\nexport const AVAILABLE_CLOUD_PROVIDERS: CloudEmbeddingProvider[] = [\n  {\n    provider_type: EmbeddingProvider.COHERE,\n    website: \"https://cohere.ai\",\n    icon: CohereIcon,\n    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,\n    description:\n      \"AI company specializing in NLP models for various text-based tasks\",\n    apiLink: \"https://dashboard.cohere.ai/api-keys\",\n    costslink: \"https://cohere.com/pricing\",\n    embedding_models: [\n      {\n        provider_type: EmbeddingProvider.COHERE,\n        model_name: \"embed-english-v3.0\",\n        description:\n          \"Cohere's English embedding model. Good performance for English-language tasks.\",\n        pricePerMillion: 0.1,\n        model_dim: 1024,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n      {\n        model_name: \"embed-english-light-v3.0\",\n        provider_type: EmbeddingProvider.COHERE,\n        description:\n          \"Cohere's lightweight English embedding model. Faster and more efficient for simpler tasks.\",\n        pricePerMillion: 0.1,\n        model_dim: 384,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n    ],\n  },\n  {\n    provider_type: EmbeddingProvider.OPENAI,\n    website: \"https://openai.com\",\n    icon: OpenAIISVG,\n    description: \"AI industry leader known for ChatGPT and DALL-E\",\n    apiLink: \"https://platform.openai.com/api-keys\",\n    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,\n    costslink: \"https://openai.com/pricing\",\n    embedding_models: [\n      {\n        provider_type: EmbeddingProvider.OPENAI,\n        model_name: \"text-embedding-3-large\",\n        description:\n          \"OpenAI's large embedding model. Best performance, but more expensive.\",\n        pricePerMillion: 0.13,\n        model_dim: 3072,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n      {\n        provider_type: EmbeddingProvider.OPENAI,\n        model_name: \"text-embedding-3-small\",\n        model_dim: 1536,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        description:\n          \"OpenAI's newer, more efficient embedding model. Good balance of performance and cost.\",\n        pricePerMillion: 0.02,\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n    ],\n  },\n\n  {\n    provider_type: EmbeddingProvider.GOOGLE,\n    website: \"https://ai.google\",\n    icon: GoogleIcon,\n    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,\n    description:\n      \"Offers a wide range of AI services including language and vision models\",\n    apiLink: \"https://console.cloud.google.com/apis/credentials\",\n    costslink: \"https://cloud.google.com/vertex-ai/pricing\",\n    embedding_models: [\n      {\n        provider_type: EmbeddingProvider.GOOGLE,\n        model_name: \"gemini-embedding-001\",\n        description: \"Google's Gemini embedding model. Powerful and efficient.\",\n        pricePerMillion: 0.025,\n        model_dim: 3072,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n      {\n        provider_type: EmbeddingProvider.GOOGLE,\n        model_name: \"text-embedding-005\",\n        description: \"Smaller, lighter-weight embedding model from Google.\",\n        pricePerMillion: 0.025,\n        model_dim: 768,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n    ],\n  },\n  {\n    provider_type: EmbeddingProvider.VOYAGE,\n    website: \"https://www.voyageai.com\",\n    icon: VoyageIconSVG,\n    description: \"Advanced NLP research startup born from Stanford AI Labs\",\n    docsLink: `${DOCS_ADMINS_PATH}/advanced_configs/search_configs`,\n    apiLink: \"https://www.voyageai.com/dashboard\",\n    costslink: \"https://www.voyageai.com/pricing\",\n    embedding_models: [\n      {\n        provider_type: EmbeddingProvider.VOYAGE,\n        model_name: \"voyage-large-2-instruct\",\n        description:\n          \"Voyage's large embedding model. High performance with instruction fine-tuning.\",\n        pricePerMillion: 0.12,\n        model_dim: 1024,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n      {\n        provider_type: EmbeddingProvider.VOYAGE,\n        model_name: \"voyage-light-2-instruct\",\n        description:\n          \"Voyage's lightweight embedding model. Good balance of performance and efficiency.\",\n        pricePerMillion: 0.12,\n        model_dim: 1024,\n        normalize: false,\n        query_prefix: \"\",\n        passage_prefix: \"\",\n        index_name: \"\",\n        api_key: null,\n        api_url: null,\n      },\n    ],\n  },\n];\n\nexport const getFormattedProviderName = (providerType: string | null) => {\n  if (!providerType) return \"Self-hosted\";\n\n  switch (providerType) {\n    case \"openai\":\n      return \"OpenAI\";\n    case \"cohere\":\n      return \"Cohere\";\n    case \"voyage\":\n      return \"Voyage AI\";\n    case \"google\":\n      return \"Google\";\n    case \"litellm\":\n      return \"LiteLLM\";\n    case \"azure\":\n      return \"Azure\";\n    default:\n      return providerType.charAt(0).toUpperCase() + providerType.slice(1);\n  }\n};\n\nexport const getTitleForRerankType = (type: string) => {\n  switch (type) {\n    case \"nomic-ai\":\n      return \"Nomic (recommended)\";\n    case \"intfloat\":\n      return \"Microsoft\";\n    default:\n      return \"Open Source\";\n  }\n};\n\nexport const getIconForRerankType = (type: string) => {\n  switch (type) {\n    case \"nomic-ai\":\n      return <NomicIcon size={40} />;\n    case \"intfloat\":\n      return <MicrosoftIcon size={40} />;\n    default:\n      return <OpenSourceIcon size={40} />;\n  }\n};\n\nexport const INVALID_OLD_MODEL = \"thenlper/gte-small\";\n\nexport function checkModelNameIsValid(\n  modelName: string | undefined | null\n): boolean {\n  return !!modelName && modelName !== INVALID_OLD_MODEL;\n}\n"
  },
  {
    "path": "web/src/components/errorPages/AccessRestrictedPage.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Link from \"next/link\";\nimport ErrorPageLayout from \"@/components/errorPages/ErrorPageLayout\";\nimport { Button } from \"@opal/components\";\nimport InlineExternalLink from \"@/refresh-components/InlineExternalLink\";\nimport { logout } from \"@/lib/user\";\nimport { loadStripe } from \"@stripe/stripe-js\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { useLicense } from \"@/hooks/useLicense\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { ApplicationStatus } from \"@/interfaces/settings\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgLock } from \"@opal/icons\";\n\nconst linkClassName = \"text-action-link-05 hover:text-action-link-06 underline\";\n\nconst fetchStripePublishableKey = async (): Promise<string> => {\n  const response = await fetch(\"/api/tenants/stripe-publishable-key\");\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch Stripe publishable key\");\n  }\n  const data = await response.json();\n  return data.publishable_key;\n};\n\nconst fetchResubscriptionSession = async () => {\n  const response = await fetch(\"/api/tenants/create-subscription-session\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n  if (!response.ok) {\n    throw new Error(\"Failed to create resubscription session\");\n  }\n  return response.json();\n};\n\nexport default function AccessRestricted() {\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n  const { data: license } = useLicense();\n  const settings = useSettingsContext();\n\n  const isSeatLimitExceeded =\n    settings.settings.application_status ===\n    ApplicationStatus.SEAT_LIMIT_EXCEEDED;\n  const hadPreviousLicense = license?.has_license === true;\n  const showRenewalMessage = NEXT_PUBLIC_CLOUD_ENABLED || hadPreviousLicense;\n\n  function getSeatLimitMessage() {\n    const { used_seats, seat_count } = settings.settings;\n    const counts =\n      used_seats != null && seat_count != null\n        ? ` (${used_seats} users / ${seat_count} seats)`\n        : \"\";\n    return `Your organization has exceeded its licensed seat count${counts}. Access is restricted until the number of users is reduced or your license is upgraded.`;\n  }\n\n  const initialModalMessage = isSeatLimitExceeded\n    ? getSeatLimitMessage()\n    : showRenewalMessage\n      ? NEXT_PUBLIC_CLOUD_ENABLED\n        ? \"Your access to Onyx has been temporarily suspended due to a lapse in your subscription.\"\n        : \"Your access to Onyx has been temporarily suspended due to a lapse in your license.\"\n      : \"An Enterprise license is required to use Onyx. Your data is protected and will be available once a license is activated.\";\n\n  const handleResubscribe = async () => {\n    setIsLoading(true);\n    setError(null);\n    try {\n      const publishableKey = await fetchStripePublishableKey();\n      const { sessionId } = await fetchResubscriptionSession();\n      const stripe = await loadStripe(publishableKey);\n\n      if (stripe) {\n        await stripe.redirectToCheckout({ sessionId });\n      } else {\n        throw new Error(\"Stripe failed to load\");\n      }\n    } catch (error) {\n      console.error(\"Error creating resubscription session:\", error);\n      setError(\"Error opening resubscription page. Please try again later.\");\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  return (\n    <ErrorPageLayout>\n      <div className=\"flex items-center gap-2\">\n        <Text headingH2>Access Restricted</Text>\n        <SvgLock className=\"stroke-status-error-05 w-[1.5rem] h-[1.5rem]\" />\n      </div>\n\n      <Text text03>{initialModalMessage}</Text>\n\n      {isSeatLimitExceeded ? (\n        <>\n          <Text text03>\n            If you are an administrator, you can manage users on the{\" \"}\n            <Link className={linkClassName} href=\"/admin/users\">\n              User Management\n            </Link>{\" \"}\n            page or upgrade your license on the{\" \"}\n            <Link className={linkClassName} href=\"/admin/billing\">\n              Admin Billing\n            </Link>{\" \"}\n            page.\n          </Text>\n\n          <div className=\"flex flex-row gap-2\">\n            <Button\n              onClick={async () => {\n                await logout();\n                window.location.reload();\n              }}\n            >\n              Log out\n            </Button>\n          </div>\n        </>\n      ) : NEXT_PUBLIC_CLOUD_ENABLED ? (\n        <>\n          <Text text03>\n            To reinstate your access and continue benefiting from Onyx&apos;s\n            powerful features, please update your payment information.\n          </Text>\n\n          <Text text03>\n            If you&apos;re an admin, you can manage your subscription by\n            clicking the button below. For other users, please reach out to your\n            administrator to address this matter.\n          </Text>\n\n          <div className=\"flex flex-row gap-2\">\n            <Button disabled={isLoading} onClick={handleResubscribe}>\n              {isLoading ? \"Loading...\" : \"Resubscribe\"}\n            </Button>\n            <Button\n              prominence=\"secondary\"\n              onClick={async () => {\n                await logout();\n                window.location.reload();\n              }}\n            >\n              Log out\n            </Button>\n          </div>\n\n          {error && <Text className=\"text-status-error-05\">{error}</Text>}\n        </>\n      ) : (\n        <>\n          <Text text03>\n            {hadPreviousLicense\n              ? \"To reinstate your access and continue using Onyx, please contact your system administrator to renew your license.\"\n              : \"To get started, please contact your system administrator to obtain an Enterprise license.\"}\n          </Text>\n\n          <Text text03>\n            If you are the administrator, please visit the{\" \"}\n            <Link className={linkClassName} href=\"/admin/billing\">\n              Admin Billing\n            </Link>{\" \"}\n            page to {hadPreviousLicense ? \"renew\" : \"activate\"} your license,\n            sign up through Stripe or reach out to{\" \"}\n            <a className={linkClassName} href=\"mailto:support@onyx.app\">\n              support@onyx.app\n            </a>{\" \"}\n            for billing assistance.\n          </Text>\n\n          <div className=\"flex flex-row gap-2\">\n            <Button\n              onClick={async () => {\n                await logout();\n                window.location.reload();\n              }}\n            >\n              Log out\n            </Button>\n          </div>\n        </>\n      )}\n\n      <Text text03>\n        Need help? Join our{\" \"}\n        <InlineExternalLink\n          className={linkClassName}\n          href=\"https://discord.gg/4NA5SbzrWb\"\n        >\n          Discord community\n        </InlineExternalLink>{\" \"}\n        for support.\n      </Text>\n    </ErrorPageLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/components/errorPages/CloudErrorPage.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nimport ErrorPageLayout from \"@/components/errorPages/ErrorPageLayout\";\n\nexport default function CloudError() {\n  return (\n    <ErrorPageLayout>\n      <Text as=\"p\" headingH2>\n        Maintenance in Progress\n      </Text>\n\n      <Text as=\"p\" text03>\n        Onyx is currently in a maintenance window. Please check back in a couple\n        of minutes.\n      </Text>\n\n      <Text as=\"p\" text03>\n        We apologize for any inconvenience this may cause and appreciate your\n        patience.\n      </Text>\n    </ErrorPageLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/components/errorPages/ErrorPage.tsx",
    "content": "import ErrorPageLayout from \"@/components/errorPages/ErrorPageLayout\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { DOCS_BASE_URL } from \"@/lib/constants\";\nimport { SvgAlertCircle } from \"@opal/icons\";\n\nexport default function Error() {\n  return (\n    <ErrorPageLayout>\n      <div className=\"flex flex-row items-center gap-2\">\n        <Text as=\"p\" headingH2>\n          We encountered an issue\n        </Text>\n        <SvgAlertCircle className=\"w-[1.5rem] h-[1.5rem] stroke-text-04\" />\n      </div>\n\n      <Text as=\"p\" text03>\n        It seems there was a problem loading your Onyx settings. This could be\n        due to a configuration issue or incomplete setup.\n      </Text>\n\n      <Text as=\"p\" text03>\n        If you&apos;re an admin, please review our{\" \"}\n        <a\n          className=\"text-action-link-05\"\n          href={`${DOCS_BASE_URL}?utm_source=app&utm_medium=error_page&utm_campaign=config_error`}\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n        >\n          documentation\n        </a>{\" \"}\n        for proper configuration steps. If you&apos;re a user, please contact\n        your admin for assistance.\n      </Text>\n\n      <Text as=\"p\" text03>\n        Need help? Join our{\" \"}\n        <a\n          className=\"text-action-link-05\"\n          href=\"https://discord.gg/4NA5SbzrWb\"\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n        >\n          Discord community\n        </a>{\" \"}\n        for support.\n      </Text>\n    </ErrorPageLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/components/errorPages/ErrorPageLayout.tsx",
    "content": "import React from \"react\";\nimport { OnyxLogoTypeIcon } from \"@/components/icons/icons\";\n\ninterface ErrorPageLayoutProps {\n  children: React.ReactNode;\n}\n\nexport default function ErrorPageLayout({ children }: ErrorPageLayoutProps) {\n  return (\n    <div className=\"flex flex-col items-center justify-center w-full h-screen gap-4\">\n      <OnyxLogoTypeIcon size={120} className=\"\" />\n      <div className=\"max-w-[40rem] w-full border bg-background-neutral-00 shadow-02 rounded-16 p-6 flex flex-col gap-4\">\n        {children}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/filters/SourceSelector.tsx",
    "content": "import React, { JSX } from \"react\";\nimport { DocumentSetSummary, Tag, ValidSources } from \"@/lib/types\";\nimport { SourceMetadata } from \"@/lib/search/interfaces\";\nimport { FiBook, FiBookmark, FiMap, FiX } from \"react-icons/fi\";\nimport { SearchDateRangeSelector } from \"@/components/dateRangeSelectors/SearchDateRangeSelector\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { listSourceMetadata } from \"@/lib/sources\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { FilterDropdown } from \"@/components/search/filtering/FilterDropdown\";\n\nexport interface SourceSelectorProps {\n  timeRange: DateRangePickerValue | null;\n  setTimeRange: React.Dispatch<\n    React.SetStateAction<DateRangePickerValue | null>\n  >;\n  showDocSidebar?: boolean;\n  selectedSources: SourceMetadata[];\n  setSelectedSources: React.Dispatch<React.SetStateAction<SourceMetadata[]>>;\n  selectedDocumentSets: string[];\n  setSelectedDocumentSets: React.Dispatch<React.SetStateAction<string[]>>;\n  selectedTags: Tag[];\n  setSelectedTags: React.Dispatch<React.SetStateAction<Tag[]>>;\n  availableDocumentSets: DocumentSetSummary[];\n  existingSources: ValidSources[];\n  availableTags: Tag[];\n  toggleFilters: () => void;\n  filtersUntoggled: boolean;\n  tagsOnLeft: boolean;\n}\n\nexport function SelectedBubble({\n  children,\n  onClick,\n}: {\n  children: string | JSX.Element;\n  onClick: () => void;\n}) {\n  return (\n    <div\n      className={\n        \"flex cursor-pointer items-center border border-border \" +\n        \"py-1 my-1.5 rounded-lg px-2 w-fit hover:bg-accent-background-hovered\"\n      }\n      onClick={onClick}\n    >\n      {children}\n      <FiX className=\"ml-2\" size={14} />\n    </div>\n  );\n}\n\nexport function HorizontalFilters({\n  timeRange,\n  setTimeRange,\n  selectedSources,\n  setSelectedSources,\n  selectedDocumentSets,\n  setSelectedDocumentSets,\n  availableDocumentSets,\n  existingSources,\n}: SourceSelectorProps) {\n  const handleSourceSelect = (source: SourceMetadata) => {\n    setSelectedSources((prev: SourceMetadata[]) => {\n      const prevSourceNames = prev.map((source) => source.internalName);\n      if (prevSourceNames.includes(source.internalName)) {\n        return prev.filter((s) => s.internalName !== source.internalName);\n      } else {\n        return [...prev, source];\n      }\n    });\n  };\n\n  const handleDocumentSetSelect = (documentSetName: string) => {\n    setSelectedDocumentSets((prev: string[]) => {\n      if (prev.includes(documentSetName)) {\n        return prev.filter((s) => s !== documentSetName);\n      } else {\n        return [...prev, documentSetName];\n      }\n    });\n  };\n\n  const allSources = listSourceMetadata();\n  const availableSources = allSources.filter((source) =>\n    existingSources.includes(source.internalName)\n  );\n\n  return (\n    <div className=\"b\">\n      <div className=\"flex gap-x-3\">\n        <div className=\"w-52\">\n          <SearchDateRangeSelector\n            value={timeRange}\n            onValueChange={setTimeRange}\n          />\n        </div>\n\n        <FilterDropdown\n          width=\"w-52\"\n          options={availableSources.map((source) => {\n            return {\n              key: source.displayName,\n              display: (\n                <>\n                  <SourceIcon\n                    sourceType={source.baseSourceType || source.internalName}\n                    iconSize={16}\n                  />\n                  <span className=\"ml-2 text-sm\">{source.displayName}</span>\n                </>\n              ),\n            };\n          })}\n          selected={selectedSources.map((source) => source.displayName)}\n          handleSelect={(option) =>\n            handleSourceSelect(\n              allSources.find((source) => source.displayName === option.key)!\n            )\n          }\n          icon={\n            <div className=\"my-auto mr-2 w-[16px] h-[16px]\">\n              <FiMap size={16} />\n            </div>\n          }\n          defaultDisplay=\"All Sources\"\n        />\n        {availableDocumentSets.length > 0 && (\n          <FilterDropdown\n            width=\"w-52\"\n            options={availableDocumentSets.map((documentSet) => {\n              return {\n                key: documentSet.name,\n                display: (\n                  <>\n                    <div className=\"my-auto\">\n                      <FiBookmark />\n                    </div>\n                    <span className=\"ml-2 text-sm\">{documentSet.name}</span>\n                  </>\n                ),\n              };\n            })}\n            selected={selectedDocumentSets}\n            handleSelect={(option) => handleDocumentSetSelect(option.key)}\n            icon={\n              <div className=\"my-auto mr-2 w-[16px] h-[16px]\">\n                <FiBook size={16} />\n              </div>\n            }\n            defaultDisplay=\"All Document Sets\"\n          />\n        )}\n      </div>\n\n      <div className=\"flex  mt-2\">\n        <div className=\"flex flex-wrap gap-x-2\">\n          {timeRange && timeRange.selectValue && (\n            <SelectedBubble onClick={() => setTimeRange(null)}>\n              <div className=\"text-sm flex\">{timeRange.selectValue}</div>\n            </SelectedBubble>\n          )}\n          {existingSources.length > 0 &&\n            selectedSources.map((source) => (\n              <SelectedBubble\n                key={source.internalName}\n                onClick={() => handleSourceSelect(source)}\n              >\n                <>\n                  <SourceIcon\n                    sourceType={source.baseSourceType || source.internalName}\n                    iconSize={16}\n                  />\n                  <span className=\"ml-2 text-sm\">{source.displayName}</span>\n                </>\n              </SelectedBubble>\n            ))}\n          {selectedDocumentSets.length > 0 &&\n            selectedDocumentSets.map((documentSetName) => (\n              <SelectedBubble\n                key={documentSetName}\n                onClick={() => handleDocumentSetSelect(documentSetName)}\n              >\n                <>\n                  <div>\n                    <FiBookmark />\n                  </div>\n                  <span className=\"ml-2 text-sm\">{documentSetName}</span>\n                </>\n              </SelectedBubble>\n            ))}\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/filters/TimeRangeSelector.tsx",
    "content": "import { DefaultDropdownElement } from \"../Dropdown\";\n\nexport function TimeRangeSelector({\n  value,\n  onValueChange,\n  className,\n  timeRangeValues,\n}: {\n  value: any;\n  onValueChange: any;\n  className: any;\n\n  timeRangeValues: { label: string; value: Date }[];\n}) {\n  return (\n    <div className={className}>\n      {timeRangeValues.map((timeRangeValue) => (\n        <DefaultDropdownElement\n          key={timeRangeValue.label}\n          name={timeRangeValue.label}\n          onSelect={() =>\n            onValueChange({\n              to: new Date(),\n              from: timeRangeValue.value,\n              selectValue: timeRangeValue.label,\n            })\n          }\n          isSelected={value?.selectValue === timeRangeValue.label}\n        />\n      ))}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/header/AnnouncementBanner.tsx",
    "content": "\"use client\";\nimport { useState, useEffect, useContext } from \"react\";\nimport { CustomTooltip } from \"../tooltip/CustomTooltip\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport Cookies from \"js-cookie\";\nimport { SvgX } from \"@opal/icons\";\nconst DISMISSED_NOTIFICATION_COOKIE_PREFIX = \"dismissed_notification_\";\nconst COOKIE_EXPIRY_DAYS = 1;\n\nexport function AnnouncementBanner() {\n  const settings = useContext(SettingsContext);\n  const [localNotifications, setLocalNotifications] = useState(\n    settings?.settings.notifications || []\n  );\n\n  useEffect(() => {\n    const filteredNotifications = (\n      settings?.settings.notifications || []\n    ).filter(\n      (notification) =>\n        !Cookies.get(\n          `${DISMISSED_NOTIFICATION_COOKIE_PREFIX}${notification.id}`\n        )\n    );\n    setLocalNotifications(filteredNotifications);\n  }, [settings?.settings.notifications]);\n\n  if (!localNotifications || localNotifications.length === 0) return null;\n\n  const handleDismiss = async (notificationId: number) => {\n    try {\n      const response = await fetch(\n        `/api/notifications/${notificationId}/dismiss`,\n        {\n          method: \"POST\",\n        }\n      );\n      if (response.ok) {\n        Cookies.set(\n          `${DISMISSED_NOTIFICATION_COOKIE_PREFIX}${notificationId}`,\n          \"true\",\n          { expires: COOKIE_EXPIRY_DAYS }\n        );\n        setLocalNotifications((prevNotifications) =>\n          prevNotifications.filter(\n            (notification) => notification.id !== notificationId\n          )\n        );\n      } else {\n        console.error(\"Failed to dismiss notification\");\n      }\n    } catch (error) {\n      console.error(\"Error dismissing notification:\", error);\n    }\n  };\n\n  return (\n    <>\n      {localNotifications\n        .filter((notification) => !notification.dismissed)\n        .map((notification) => {\n          return (\n            <div\n              key={notification.id}\n              className=\"absolute top-0 left-1/2 transform -translate-x-1/2 bg-blue-600 rounded-sm text-white px-4 pr-8 py-3 mx-auto\"\n            >\n              {notification.notif_type == \"reindex\" ? (\n                <p className=\"text-center\">\n                  Your index is out of date - we strongly recommend updating\n                  your search settings.{\" \"}\n                  <Link\n                    href={\"/admin/configuration/search\"}\n                    className=\"ml-2 underline cursor-pointer\"\n                  >\n                    Update here\n                  </Link>\n                </p>\n              ) : notification.notif_type == \"two_day_trial_ending\" ? (\n                <p className=\"text-center\">\n                  Your trial is ending soon - submit your billing information to\n                  continue using Onyx.{\" \"}\n                  <Link\n                    href={\"/admin/billing\" as Route}\n                    className=\"ml-2 underline cursor-pointer\"\n                  >\n                    Update here\n                  </Link>\n                </p>\n              ) : null}\n              <button\n                onClick={() => handleDismiss(notification.id)}\n                className=\"absolute top-0 right-0 mt-2 mr-2\"\n                aria-label=\"Dismiss\"\n              >\n                <CustomTooltip showTick citation delay={100} content=\"Dismiss\">\n                  <SvgX className=\"stroke-text-04 h-5 w-5\" />\n                </CustomTooltip>\n              </button>\n            </div>\n          );\n        })}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/header/HeaderTitle.tsx",
    "content": "\"use client\";\n\nimport React, { JSX } from \"react\";\n\nexport function HeaderTitle({\n  children,\n  backgroundToggled,\n}: {\n  children: JSX.Element | string;\n  backgroundToggled?: boolean;\n}) {\n  const isString = typeof children === \"string\";\n  const textSize =\n    isString && children.length > 10\n      ? \"text-lg pb-[4px] \"\n      : \"pb-[2px] text-2xl\";\n\n  return (\n    <h1\n      className={`${textSize} ${\n        backgroundToggled\n          ? \"text-text-sidebar-toggled-header\"\n          : \"text-text-sidebar-header\"\n      } break-words dark:text-[#fff] text-left line-clamp-2 ellipsis text-strong overflow-hidden leading-none font-bold`}\n    >\n      {children}\n    </h1>\n  );\n}\n"
  },
  {
    "path": "web/src/components/icons/DynamicFaIcon.tsx",
    "content": "import React from \"react\";\nimport { IconBaseProps, IconType } from \"react-icons\";\nimport { FaQuestion } from \"react-icons/fa\";\n\ninterface DynamicIconProps extends IconBaseProps {\n  name: string;\n}\n\n// Renders a FontAwesome icon dynamically based on the provided name\nconst DynamicFaIcon: React.FC<DynamicIconProps> = ({ name, ...props }) => {\n  const IconComponent = getPreloadedIcon(name);\n  return IconComponent ? (\n    <IconComponent className=\"h-4 w-4\" {...props} />\n  ) : (\n    <FaQuestion className=\"h-4 w-4\" {...props} />\n  );\n};\n\n// Cache for storing preloaded icons\nconst iconCache: Record<string, IconType> = {};\n\n// Preloads icons asynchronously and stores them in the cache\nexport async function preloadIcons(iconNames: string[]): Promise<void> {\n  const promises = iconNames.map(async (name) => {\n    try {\n      const iconModule = await import(\"react-icons/fa\");\n      const iconName = `Fa${\n        name.charAt(0).toUpperCase() + name.slice(1)\n      }` as keyof typeof iconModule;\n      iconCache[name] = (iconModule[iconName] as IconType) || FaQuestion;\n    } catch (error) {\n      console.error(`Failed to load icon: ${name}`, error);\n      iconCache[name] = FaQuestion;\n    }\n  });\n\n  await Promise.all(promises);\n}\n\n// Retrieves a preloaded icon from the cache\nexport function getPreloadedIcon(name: string): IconType | undefined {\n  return iconCache[name] || FaQuestion;\n}\n\nexport default DynamicFaIcon;\n"
  },
  {
    "path": "web/src/components/icons/icons.test.tsx",
    "content": "/**\n * Icon Component Tests\n *\n * Tests logo icons to ensure they render correctly with proper accessibility\n * and support various display sizes.\n */\nimport React from \"react\";\nimport { SvgBifrost } from \"@opal/icons\";\nimport { render } from \"@tests/setup/test-utils\";\nimport { GithubIcon, GitbookIcon, ConfluenceIcon } from \"./icons\";\n\ndescribe(\"Logo Icons\", () => {\n  test(\"renders with alt text\", () => {\n    const { container } = render(<GithubIcon />);\n    const image = container.querySelector(\"img\");\n\n    expect(image).toBeInTheDocument();\n    expect(image).toHaveAttribute(\"alt\");\n  });\n\n  test(\"applies custom size\", () => {\n    const { container } = render(<GithubIcon size={48} />);\n    const image = container.querySelector(\"img\");\n\n    expect(image).toHaveStyle({ width: \"48px\", height: \"48px\" });\n  });\n\n  test(\"applies size adjustments\", () => {\n    // ConfluenceIcon has a +4px size adjustment\n    const { container } = render(<ConfluenceIcon size={16} />);\n    const image = container.querySelector(\"img\");\n\n    // Base 16 + adjustment 4 = 20\n    expect(image).toHaveStyle({ width: \"20px\", height: \"20px\" });\n  });\n\n  // This test is for icons that have light and dark variants (e.g. GitbookIcon)\n  // Both exist in the DOM, one is hidden via CSS.\n  test(\"renders both light and dark variants\", () => {\n    const { container } = render(<GitbookIcon />);\n    const images = container.querySelectorAll(\"img\");\n\n    // Should render both light and dark variants in the DOM (one hidden via CSS)\n    expect(images).toHaveLength(2);\n    images.forEach((img) => {\n      expect(img).toHaveAttribute(\"alt\");\n    });\n  });\n\n  test(\"accepts className and size props\", () => {\n    expect(() => {\n      render(<GithubIcon size={100} className=\"custom-class\" />);\n    }).not.toThrow();\n  });\n\n  test(\"renders the Bifrost icon with theme-aware colors\", () => {\n    const { container } = render(\n      <SvgBifrost size={32} className=\"custom text-red-500 dark:text-black\" />\n    );\n    const icon = container.querySelector(\"svg\");\n\n    expect(icon).toBeInTheDocument();\n    expect(icon).toHaveClass(\"custom\", \"text-[#33C19E]\", \"dark:text-white\");\n    expect(icon).not.toHaveClass(\"text-red-500\", \"dark:text-black\");\n  });\n});\n"
  },
  {
    "path": "web/src/components/icons/icons.tsx",
    "content": "\"use client\";\n\nimport { JSX } from \"react\";\nimport Image from \"next/image\";\nimport { StaticImageData } from \"next/image\";\nimport { BrainIcon as Brain } from \"@phosphor-icons/react\";\nimport {\n  FiAlertCircle,\n  FiAlertTriangle,\n  FiChevronDown,\n  FiChevronsDown,\n  FiChevronsUp,\n  FiClipboard,\n  FiCpu,\n  FiDatabase,\n  FiEdit2,\n  FiFile,\n  FiGlobe,\n  FiInfo,\n  FiMail,\n} from \"react-icons/fi\";\nimport { FaRobot } from \"react-icons/fa\";\nimport { SiBookstack } from \"react-icons/si\";\nimport axeroImage from \"@public/Axero.jpeg\";\nimport airtableIcon from \"@public/Airtable.svg\";\nimport amazonSVG from \"@public/Amazon.svg\";\nimport anthropicSVG from \"@public/Anthropic.svg\";\nimport asanaIcon from \"@public/Asana.png\";\nimport azureIcon from \"@public/Azure.png\";\nimport bitbucketIcon from \"@public/Bitbucket.svg\";\nimport clickupIcon from \"@public/Clickup.svg\";\nimport codaIcon from \"@public/Coda.png\";\nimport cohereIcon from \"@public/Cohere.svg\";\nimport confluenceSVG from \"@public/Confluence.svg\";\nimport deepseekSVG from \"@public/Deepseek.svg\";\nimport discordIcon from \"@public/discord.png\";\nimport discourseIcon from \"@public/Discourse.png\";\nimport document360Icon from \"@public/Document360.png\";\nimport dropboxIcon from \"@public/Dropbox.png\";\nimport drupalwikiIcon from \"@public/DrupalWiki.png\";\nimport egnyteIcon from \"@public/Egnyte.png\";\nimport elevenLabsDarkSVG from \"@public/ElevenLabsDark.svg\";\nimport elevenLabsSVG from \"@public/ElevenLabs.svg\";\nimport firefliesIcon from \"@public/Fireflies.png\";\nimport freshdeskIcon from \"@public/Freshdesk.png\";\nimport geminiSVG from \"@public/Gemini.svg\";\nimport gitbookDarkIcon from \"@public/GitBookDark.png\";\nimport gitbookLightIcon from \"@public/GitBookLight.png\";\nimport githubLightIcon from \"@public/Github.png\";\nimport gongIcon from \"@public/Gong.png\";\nimport googleIcon from \"@public/Google.png\";\nimport googleCloudStorageIcon from \"@public/GoogleCloudStorage.png\";\nimport googleSitesIcon from \"@public/GoogleSites.png\";\nimport guruIcon from \"@public/Guru.svg\";\nimport highspotIcon from \"@public/Highspot.png\";\nimport hubSpotIcon from \"@public/HubSpot.png\";\nimport jiraSVG from \"@public/Jira.svg\";\nimport kimiIcon from \"@public/Kimi.png\";\nimport linearIcon from \"@public/Linear.png\";\nimport litellmIcon from \"@public/litellm.png\";\nimport lmStudioIcon from \"@public/lm_studio.png\";\nimport mediawikiIcon from \"@public/MediaWiki.svg\";\nimport metaSVG from \"@public/Meta.svg\";\nimport microsoftIcon from \"@public/microsoft.png\";\nimport microsoftSVG from \"@public/Microsoft.svg\";\nimport mistralSVG from \"@public/Mistral.svg\";\nimport mixedBreadSVG from \"@public/Mixedbread.png\";\nimport nomicSVG from \"@public/nomic.svg\";\nimport OCIStorageSVG from \"@public/OCI.svg\";\nimport ollamaIcon from \"@public/Ollama.png\";\nimport openAISVG from \"@public/Openai.svg\";\nimport openSourceIcon from \"@public/OpenSource.png\";\nimport outlinePNG from \"@public/Outline.png\";\nimport qwenSVG from \"@public/Qwen.svg\";\nimport r2Icon from \"@public/r2.png\";\nimport s3Icon from \"@public/S3.png\";\nimport salesforceIcon from \"@public/Salesforce.png\";\nimport sharepointIcon from \"@public/Sharepoint.png\";\nimport slackIcon from \"@public/Slack.png\";\nimport teamsIcon from \"@public/Teams.png\";\nimport outlookIcon from \"@public/Outlook.png\";\nimport oneDriveIcon from \"@public/OneDrive.png\";\nimport boxIcon from \"@public/Box.png\";\nimport trelloIcon from \"@public/Trello.png\";\nimport serviceNowIcon from \"@public/Servicenow.png\";\nimport wikipediaIcon from \"@public/Wikipedia.png\";\nimport xenforoIcon from \"@public/Xenforo.svg\";\nimport zAIIcon from \"@public/Z_AI.png\";\nimport zendeskIcon from \"@public/Zendesk.svg\";\nimport zulipIcon from \"@public/Zulip.png\";\nimport testrailSVG from \"@public/Testrail.svg\";\nimport gitlabIcon from \"@public/Gitlab.png\";\nimport gmailIcon from \"@public/Gmail.png\";\nimport googleDriveIcon from \"@public/GoogleDrive.png\";\nimport loopioIcon from \"@public/Loopio.png\";\nimport notionIcon from \"@public/Notion.png\";\nimport productboardIcon from \"@public/Productboard.png\";\nimport slabLogoIcon from \"@public/SlabLogo.png\";\n\nexport interface IconProps {\n  size?: number;\n  className?: string;\n}\nexport interface LogoIconProps extends IconProps {\n  src: string | StaticImageData;\n}\nexport type OnyxIconType = (props: IconProps) => JSX.Element;\n\nexport const defaultTailwindCSS = \"my-auto flex flex-shrink-0 text-default\";\nexport const defaultTailwindCSSBlue = \"my-auto flex flex-shrink-0 text-link\";\n\nexport const LogoIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n  src,\n}: LogoIconProps) => (\n  <Image\n    style={{ width: `${size}px`, height: `${size}px` }}\n    className={`w-[${size}px] h-[${size}px] object-contain ` + className}\n    src={src}\n    alt=\"Logo\"\n    width=\"96\"\n    height=\"96\"\n  />\n);\n\n// Helper to create simple icon components from react-icon libraries\nexport function createIcon(\n  IconComponent: React.ComponentType<{ size?: number; className?: string }>\n) {\n  function IconWrapper({\n    size = 16,\n    className = defaultTailwindCSS,\n  }: IconProps) {\n    return <IconComponent size={size} className={className} />;\n  }\n\n  IconWrapper.displayName = `Icon(${\n    IconComponent.displayName || IconComponent.name || \"Component\"\n  })`;\n  return IconWrapper;\n}\n\n/**\n * Creates a logo icon component that automatically supports dark mode adaptations.\n *\n * Depending on the options provided, the returned component handles:\n * 1. Light/Dark variants: If both `src` and `darkSrc` are provided, displays the\n *    appropriate image based on the current color theme.\n * 2. Monochromatic inversion: If `monochromatic` is true, applies a CSS color inversion\n *    in dark mode for a monochrome icon appearance.\n * 3. Static icon: If only `src` is provided, renders the image without dark mode adaptation.\n *\n * @param src - The image or SVG source used for the icon (light/default mode).\n * @param options - Optional settings:\n *   - darkSrc: The image or SVG source used specifically for dark mode.\n *   - monochromatic: If true, applies a CSS inversion in dark mode for monochrome logos.\n *   - sizeAdjustment: Number to add to the icon size (e.g., 4 to make icon larger).\n *   - classNameAddition: Additional CSS classes to apply (e.g., '-m-0.5' for margin).\n * @returns A React functional component that accepts {@link IconProps} and renders\n *          the logo with dark mode handling as needed.\n */\nconst createLogoIcon = (\n  src: string | StaticImageData,\n  options?: {\n    darkSrc?: string | StaticImageData;\n    monochromatic?: boolean;\n    sizeAdjustment?: number;\n    classNameAddition?: string;\n  }\n) => {\n  const {\n    darkSrc,\n    monochromatic,\n    sizeAdjustment = 0,\n    classNameAddition = \"\",\n  } = options || {};\n\n  const LogoIconWrapper = ({\n    size = 16,\n    className = defaultTailwindCSS,\n  }: IconProps) => {\n    const adjustedSize = size + sizeAdjustment;\n\n    // Build className dynamically (only apply monochromatic if no darkSrc)\n    const monochromaticClass = !darkSrc && monochromatic ? \"dark:invert\" : \"\";\n    const finalClassName = [className, classNameAddition, monochromaticClass]\n      .filter(Boolean)\n      .join(\" \");\n\n    // If darkSrc is provided, use CSS-based dark mode switching\n    // This avoids hydration issues and content flashing since next-themes\n    // sets the .dark class before React hydrates\n    if (darkSrc) {\n      return (\n        <>\n          <LogoIcon\n            size={adjustedSize}\n            className={`${finalClassName} dark:hidden`}\n            src={src}\n          />\n          <LogoIcon\n            size={adjustedSize}\n            className={`${finalClassName} hidden dark:block`}\n            src={darkSrc}\n          />\n        </>\n      );\n    }\n\n    return (\n      <LogoIcon size={adjustedSize} className={finalClassName} src={src} />\n    );\n  };\n\n  LogoIconWrapper.displayName = \"LogoIconWrapper\";\n  return LogoIconWrapper;\n};\n\n// ============================================================================\n// GENERIC SVG COMPONENTS (sorted alphabetically)\n// ============================================================================\nexport const AlertIcon = createIcon(FiAlertCircle);\nexport const ArtAsistantIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 24 24\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M12 1.5C9.98656 1.4999 8.01555 2.07871 6.32185 3.16743C4.62815 4.25616 3.28318 5.8089 2.44724 7.6406C1.6113 9.47231 1.31963 11.5057 1.60699 13.4986C1.89435 15.4914 2.74862 17.3596 4.068 18.8805L10.422 12.6285C10.8429 12.2144 11.4096 11.9824 12 11.9824C12.5904 11.9824 13.1571 12.2144 13.578 12.6285L19.932 18.8805C21.2514 17.3596 22.1056 15.4914 22.393 13.4986C22.6804 11.5057 22.3887 9.47231 21.5528 7.6406C20.7168 5.8089 19.3719 4.25616 17.6782 3.16743C15.9845 2.07871 14.0134 1.4999 12 1.5ZM12 22.5C14.5238 22.5042 16.9639 21.5952 18.87 19.941L12.525 13.6965C12.3848 13.5591 12.1963 13.4821 12 13.4821C11.8037 13.4821 11.6152 13.5591 11.475 13.6965L5.13 19.941C7.03607 21.5952 9.47619 22.5042 12 22.5ZM0 12C0 8.8174 1.26428 5.76516 3.51472 3.51472C5.76516 1.26428 8.8174 0 12 0C15.1826 0 18.2348 1.26428 20.4853 3.51472C22.7357 5.76516 24 8.8174 24 12C24 15.1826 22.7357 18.2348 20.4853 20.4853C18.2348 22.7357 15.1826 24 12 24C8.8174 24 5.76516 22.7357 3.51472 20.4853C1.26428 18.2348 0 15.1826 0 12ZM16.5 8.25C16.5 8.05109 16.421 7.86032 16.2803 7.71967C16.1397 7.57902 15.9489 7.5 15.75 7.5C15.5511 7.5 15.3603 7.57902 15.2197 7.71967C15.079 7.86032 15 8.05109 15 8.25C15 8.44891 15.079 8.63968 15.2197 8.78033C15.3603 8.92098 15.5511 9 15.75 9C15.9489 9 16.1397 8.92098 16.2803 8.78033C16.421 8.63968 16.5 8.44891 16.5 8.25ZM18 8.25C18 8.54547 17.9418 8.83806 17.8287 9.11104C17.7157 9.38402 17.5499 9.63206 17.341 9.84099C17.1321 10.0499 16.884 10.2157 16.611 10.3287C16.3381 10.4418 16.0455 10.5 15.75 10.5C15.4545 10.5 15.1619 10.4418 14.889 10.3287C14.616 10.2157 14.3679 10.0499 14.159 9.84099C13.9501 9.63206 13.7843 9.38402 13.6713 9.11104C13.5582 8.83806 13.5 8.54547 13.5 8.25C13.5 7.65326 13.7371 7.08097 14.159 6.65901C14.581 6.23705 15.1533 6 15.75 6C16.3467 6 16.919 6.23705 17.341 6.65901C17.7629 7.08097 18 7.65326 18 8.25Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const BookmarkIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 16 16\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M3.75 2a.75.75 0 0 0-.75.75v10.5a.75.75 0 0 0 1.28.53L8 10.06l3.72 3.72a.75.75 0 0 0 1.28-.53V2.75a.75.75 0 0 0-.75-.75z\"\n      />\n    </svg>\n  );\n};\nexport const BrainIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return <Brain size={size} className={className} />;\n};\nexport const CPUIcon = createIcon(FiCpu);\nexport const DatabaseIcon = createIcon(FiDatabase);\nexport const CameraIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <g\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      >\n        <path d=\"M13.5 5a1 1 0 0 0-1-1h-2L9 2H5L3.5 4h-2a1 1 0 0 0-1 1v6a1 1 0 0 0 1 1h11a1 1 0 0 0 1-1z\" />\n        <path d=\"M7 9.75a2.25 2.25 0 1 0 0-4.5a2.25 2.25 0 0 0 0 4.5\" />\n      </g>\n    </svg>\n  );\n};\nexport const Caret = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"m12.37 15.835l6.43-6.63C19.201 8.79 18.958 8 18.43 8H5.57c-.528 0-.771.79-.37 1.205l6.43 6.63c.213.22.527.22.74 0Z\"\n      />\n    </svg>\n  );\n};\nexport const CheckmarkIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        strokeWidth=\"2\"\n        d=\"M20 6L9 17l-5-5\"\n      />\n    </svg>\n  );\n};\nexport const ChevronDownIcon = createIcon(FiChevronDown);\nexport const ChevronsDownIcon = createIcon(FiChevronsDown);\nexport const ChevronsUpIcon = createIcon(FiChevronsUp);\nexport const ClipboardIcon = createIcon(FiClipboard);\nexport const DexpandTwoIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        d=\"m.5 13.5l5-5m-4 0h4v4m8-12l-5 5m4 0h-4v-4\"\n      />\n    </svg>\n  );\n};\nexport const DocumentIcon2 = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        strokeWidth=\"1.5\"\n        d=\"M19.5 14.25v-2.625a3.375 3.375 0 0 0-3.375-3.375h-1.5A1.125 1.125 0 0 1 13.5 7.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H8.25m0 12.75h7.5m-7.5 3H12M10.5 2.25H5.625c-.621 0-1.125.504-1.125 1.125v17.25c0 .621.504 1.125 1.125 1.125h12.75c.621 0 1.125-.504 1.125-1.125V11.25a9 9 0 0 0-9-9Z\"\n      />\n    </svg>\n  );\n};\nexport const DownloadCSVIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        d=\"M.5 10.5v1a2 2 0 0 0 2 2h9a2 2 0 0 0 2-2v-1M4 6l3 3.5L10 6M7 9.5v-9\"\n      />\n    </svg>\n  );\n};\nexport const EditIcon = createIcon(FiEdit2);\nexport const EmailIcon = createIcon(FiMail);\n\n//  COMPANY LOGOS\nexport const ExpandTwoIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        d=\"m8.5 5.5l5-5m-4 0h4v4m-8 4l-5 5m4 0h-4v-4\"\n      />\n    </svg>\n  );\n};\nexport const FileIcon = createIcon(FiFile);\nexport const FileOptionIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      width=\"24\"\n      height=\"24\"\n      viewBox=\"0 0 24 24\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M20.6801 7.02928C20.458 6.5654 20.1451 6.15072 19.76 5.80973L16.76 3.09074C16.0939 2.47491 15.2435 2.09552 14.3401 2.01115C14.2776 1.99628 14.2125 1.99628 14.15 2.01115H8.21008C7.54764 1.98307 6.88617 2.08698 6.26428 2.31683C5.64239 2.54667 5.07249 2.89785 4.58765 3.34995C4.10281 3.80205 3.71274 4.34605 3.44019 4.95025C3.16763 5.55445 3.01797 6.20679 3 6.86934V17.1655C3.03538 18.1647 3.36978 19.1303 3.95984 19.9375C4.5499 20.7448 5.36855 21.3566 6.31006 21.6939C6.92247 21.9253 7.57613 22.0274 8.22998 21.9937H15.79C16.4525 22.0218 17.1138 21.9179 17.7357 21.6881C18.3576 21.4582 18.9276 21.107 19.4125 20.6549C19.8973 20.2028 20.2874 19.6588 20.5599 19.0546C20.8325 18.4504 20.982 17.7981 21 17.1355V8.56872C21.0034 8.03873 20.8944 7.51404 20.6801 7.02928ZM16.0601 7.41915C15.9174 7.42047 15.7759 7.39353 15.6437 7.33986C15.5115 7.2862 15.3913 7.20687 15.2899 7.10649C15.1886 7.00611 15.1081 6.88664 15.0532 6.755C14.9983 6.62336 14.97 6.48215 14.97 6.33953V3.69052C15.63 3.85046 18.2 6.48947 18.76 6.92931C18.9256 7.06878 19.0675 7.23423 19.1801 7.41915H16.0601Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const GlobeIcon = createIcon(FiGlobe);\nexport const GroupsIconSkeleton = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <g fill=\"none\" stroke=\"currentColor\" strokeWidth=\"1.5\">\n        <circle cx=\"9\" cy=\"6\" r=\"4\" />\n        <path strokeLinecap=\"round\" d=\"M15 9a3 3 0 1 0 0-6\" />\n        <ellipse cx=\"9\" cy=\"17\" rx=\"7\" ry=\"4\" />\n        <path\n          strokeLinecap=\"round\"\n          d=\"M18 14c1.754.385 3 1.359 3 2.5c0 1.03-1.014 1.923-2.5 2.37\"\n        />\n      </g>\n    </svg>\n  );\n};\nexport const InfoIcon = createIcon(FiInfo);\nexport const MacIcon = ({\n  size = 16,\n  className = \"my-auto flex flex-shrink-0 \",\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M6.5 4.5a2 2 0 0 1 2 2v2h-2a2 2 0 1 1 0-4Zm4 4v-2a4 4 0 1 0-4 4h2v3h-2a4 4 0 1 0 4 4v-2h3v2a4 4 0 1 0 4-4h-2v-3h2a4 4 0 1 0-4-4v2h-3Zm0 2h3v3h-3v-3Zm5-2v-2a2 2 0 1 1 2 2h-2Zm0 7h2a2 2 0 1 1-2 2v-2Zm-7 0v2a2 2 0 1 1-2-2h2Z\"\n      />\n    </svg>\n  );\n};\nexport const NewChatIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 20 20\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M12.5 1.99982H6C3.79086 1.99982 2 3.79068 2 5.99982V13.9998C2 16.209 3.79086 17.9998 6 17.9998H14C16.2091 17.9998 18 16.209 18 13.9998V8.49982\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n      />\n      <path\n        d=\"M17.1471 5.13076C17.4492 4.82871 17.6189 4.41901 17.619 3.9918C17.6191 3.56458 17.4494 3.15484 17.1474 2.85271C16.8453 2.55058 16.4356 2.38082 16.0084 2.38077C15.5812 2.38071 15.1715 2.55037 14.8693 2.85242L11.0562 6.66651L7.24297 10.4806C7.1103 10.6129 7.01218 10.7758 6.95726 10.9549L6.20239 13.4418C6.18762 13.4912 6.18651 13.5437 6.19916 13.5937C6.21182 13.6437 6.23778 13.6894 6.27428 13.7258C6.31078 13.7623 6.35646 13.7881 6.40648 13.8007C6.45651 13.8133 6.509 13.8121 6.5584 13.7972L9.04585 13.0429C9.2248 12.9885 9.38766 12.891 9.52014 12.7589L17.1471 5.13076Z\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      />\n    </svg>\n  );\n};\nexport const NotebookIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M11.25 4.533A9.707 9.707 0 0 0 6 3a9.735 9.735 0 0 0-3.25.555a.75.75 0 0 0-.5.707v14.25a.75.75 0 0 0 1 .707A8.237 8.237 0 0 1 6 18.75c1.995 0 3.823.707 5.25 1.886V4.533Zm1.5 16.103A8.214 8.214 0 0 1 18 18.75c.966 0 1.89.166 2.75.47a.75.75 0 0 0 1-.708V4.262a.75.75 0 0 0-.5-.707A9.735 9.735 0 0 0 18 3a9.707 9.707 0 0 0-5.25 1.533v16.103Z\"\n      />\n    </svg>\n  );\n};\nexport const NotebookIconSkeleton = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        strokeWidth=\"1.5\"\n        d=\"M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25\"\n      />\n    </svg>\n  );\n};\nexport const OnyxIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 56 56\"\n      fill=\"none\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        fillRule=\"evenodd\"\n        clipRule=\"evenodd\"\n        d=\"M27.9998 0L10.8691 7.76944L27.9998 15.5389L45.1305 7.76944L27.9998 0ZM27.9998 40.4611L10.8691 48.2306L27.9998 56L45.1305 48.2306L27.9998 40.4611ZM48.2309 10.8691L56.0001 28.0003L48.2309 45.1314L40.4617 28.0003L48.2309 10.8691ZM15.5385 28.0001L7.76923 10.869L0 28.0001L7.76923 45.1313L15.5385 28.0001Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const OnyxLogoTypeIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  const aspectRatio = 2640 / 733; // Calculate the aspect ratio of the original SVG\n  const height = size / aspectRatio; // Calculate the height based on the aspect ratio\n\n  return (\n    <svg\n      version=\"1.1\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width={size}\n      height={height}\n      viewBox=\"0 0 2640 733\"\n      style={{ width: `${size}px`, height: `${height}px` }}\n      className={`w-[${size}px] h-[${height}px] ` + className}\n    >\n      <path\n        d=\"M0 0 C33.33 0 66.66 0 101 0 C116.0557783 38.21851415 116.0557783 38.21851415 122.9375 56 C123.73628808 58.06186447 124.53511763 60.12371288 125.33398438 62.18554688 C132.03825266 79.49433582 138.7196772 96.811945 145.40026855 114.12988281 C154.13201714 136.7646467 162.89604763 159.38677068 171.67333984 182.00390625 C175.22064578 191.14500491 178.76491946 200.28727941 182.30947876 209.42944336 C184.28953371 214.53634164 186.26977615 219.64316721 188.25 224.75 C189.04166757 226.79166632 189.83333424 228.83333298 190.625 230.875 C191.2128125 232.3909375 191.2128125 232.3909375 191.8125 233.9375 C193 237 194.1875 240.0625 195.375 243.125 C195.96267654 244.6405825 195.96267654 244.6405825 196.56222534 246.18678284 C197.35481547 248.23083697 198.14739765 250.27489418 198.93997192 252.31895447 C200.90494473 257.38661809 202.8700505 262.45423006 204.83544922 267.52172852 C208.56750336 277.14472477 212.29807286 286.76829035 216.02435303 296.39352417 C217.79675153 300.97152352 219.56985562 305.54924943 221.34301758 310.12695312 C222.19314439 312.32229107 223.04292035 314.51776492 223.89233398 316.71337891 C225.06537285 319.74545515 226.2395878 322.77707344 227.4140625 325.80859375 C227.93916916 327.16729836 227.93916916 327.16729836 228.47488403 328.55345154 C228.79464203 329.37813431 229.11440002 330.20281708 229.44384766 331.05249023 C229.72185211 331.77073471 229.99985657 332.48897919 230.2862854 333.22898865 C230.99698266 335.06102535 230.99698266 335.06102535 232 337 C246.56988259 292.81493181 261.13820056 248.62935087 275.6875 204.4375 C275.96156803 203.60505013 276.23563606 202.77260025 276.51800919 201.91492462 C279.01707729 194.32423703 281.51604488 186.73351636 284.01490784 179.14276123 C289.17792065 163.45917323 294.3418852 147.77589867 299.50601101 132.09267712 C302.59609693 122.70820373 305.68610876 113.32370595 308.77612305 103.93920898 C309.06425106 103.06415274 309.35237907 102.18909649 309.64923823 101.28752339 C311.15291036 96.72081077 312.65658203 92.154098 314.16025352 87.58738518 C316.88233858 79.32030053 319.60442781 71.05321725 322.32651675 62.78613389 C323.81240457 58.27343827 325.29829211 53.76074257 326.78417969 49.24804688 C327.37414551 47.45629883 327.96411133 45.66455078 328.55407715 43.87280273 C328.84746979 42.98175842 329.14086243 42.09071411 329.44314575 41.17266846 C333.96209717 27.44844564 338.48104858 13.72422282 343 0 C367.99937294 -0.58538238 392.99771191 -1.02550978 418.00257015 -1.2953043 C429.6146466 -1.42393963 441.22245313 -1.59910139 452.83178711 -1.88598633 C462.95725523 -2.13608217 473.07963139 -2.29665662 483.20810229 -2.35221237 C488.56749218 -2.38467268 493.91804247 -2.46022203 499.27458 -2.64325142 C532.79129476 -3.74490236 532.79129476 -3.74490236 540.42525101 2.4351387 C546.11597516 7.94072522 549.54345055 14.7896396 552.64213753 22.00372505 C554.56104941 26.23801807 557.12001289 29.84116876 559.875 33.5625 C560.74382604 34.81477537 561.60901068 36.06959387 562.46875 37.328125 C563.09330078 38.23401367 563.09330078 38.23401367 563.73046875 39.15820312 C565.94759045 42.37473507 568.16143251 45.59352145 570.375 48.8125 C571.04861572 49.79198608 571.04861572 49.79198608 571.73583984 50.79125977 C575.88812976 56.8318215 580.02612448 62.88212951 584.16357422 68.93286133 C587.60197218 73.96013108 591.04732974 78.98252305 594.5 84 C598.50958746 89.82679816 602.50756794 95.66143452 606.5 101.5 C618.66861604 119.30630862 618.66861604 119.30630862 631 137 C633.93322556 134.22886427 636.18623177 131.43481587 638.37109375 128.046875 C639.0165918 127.05220215 639.66208984 126.0575293 640.32714844 125.03271484 C641.37177246 123.407771 641.37177246 123.407771 642.4375 121.75 C643.18594341 120.594188 643.93498196 119.43876119 644.68457031 118.28369141 C645.87691047 116.4461905 647.06885448 114.60844385 648.25906372 112.76956177 C652.45578077 106.28625376 656.69365977 99.83021026 660.93508911 93.37609863 C662.7114261 90.67277816 664.48642899 87.96858282 666.26145935 85.2644043 C667.61103107 83.20841458 668.96077303 81.15253671 670.31054688 79.09667969 C672.96271903 75.05695522 675.61427434 71.01682617 678.265625 66.9765625 C678.69839752 66.31709122 679.13117004 65.65761993 679.57705688 64.97816467 C681.74925116 61.66806158 683.92132765 58.35788126 686.09326172 55.04760742 C690.15895391 48.85129621 694.22565727 42.65565607 698.29595947 36.46237183 C700.1986006 33.5672177 702.10066371 30.67168405 704.00268555 27.77612305 C704.90132401 26.4084212 705.80023926 25.04090117 706.69946289 23.67358398 C707.95401163 21.76593684 709.20756431 19.8576384 710.4609375 17.94921875 C711.55293981 16.28790204 712.64697455 14.62713955 713.79142761 13.00151062 C715.12629684 10.93344915 715.12629684 10.93344915 716.04707336 8.15730286 C717.64187545 4.52163394 718.9067278 2.5270255 722 0 C729.20177109 -2.08840248 737.15310869 -1.27358726 744.55859375 -1.07421875 C746.79057478 -1.05369084 749.02260553 -1.0380382 751.25465393 -1.02705383 C757.11679012 -0.98514255 762.97590648 -0.87717399 768.83685303 -0.75531006 C774.82246038 -0.64258465 780.808558 -0.59250224 786.79492188 -0.53710938 C798.53127155 -0.41929536 810.26538425 -0.23167118 822 0 C820.553704 3.75941534 818.91642122 6.80235597 816.5859375 10.09765625 C815.91151611 11.05768555 815.23709473 12.01771484 814.54223633 13.00683594 C813.82704834 14.01520508 813.11186035 15.02357422 812.375 16.0625 C810.9067251 18.15176282 809.43926664 20.24159965 807.97265625 22.33203125 C807.27543457 23.32412598 806.57821289 24.3162207 805.85986328 25.33837891 C803.18180282 29.17090911 800.59870181 33.0586811 798.0234375 36.9609375 C795.36316557 40.95648105 792.65120576 44.91561926 789.9375 48.875 C789.38537842 49.6805835 788.83325684 50.48616699 788.2644043 51.31616211 C787.41616089 52.55378296 787.41616089 52.55378296 786.55078125 53.81640625 C782.9003326 59.14263677 779.26155213 64.47677492 775.625 69.8125 C774.68829468 71.18668091 774.68829468 71.18668091 773.73266602 72.58862305 C771.82164232 75.39232337 769.91079192 78.19614172 768 81 C760.31372146 92.27869313 752.62415374 103.55504107 744.91357422 114.81713867 C740.76838789 120.87310547 736.63288078 126.93563003 732.5 133 C727.8399646 139.83788867 723.17469524 146.67212488 718.5 153.5 C713.24267506 161.1793747 707.99177778 168.86311662 702.75732422 176.55810547 C699.43988066 181.43315548 696.10700963 186.29662341 692.75 191.14453125 C691.71431885 192.64632935 691.71431885 192.64632935 690.65771484 194.1784668 C689.32948516 196.10380959 687.99666621 198.02599664 686.65869141 199.94458008 C686.06233887 200.81107178 685.46598633 201.67756348 684.8515625 202.5703125 C684.05999756 203.71137451 684.05999756 203.71137451 683.25244141 204.87548828 C681.95654959 207.07370477 681.15461584 208.44868613 681 211 C682.21715668 213.50960961 682.21715668 213.50960961 684.16796875 216.109375 C684.91224121 217.17607422 685.65651367 218.24277344 686.42333984 219.34179688 C686.83807556 219.92791748 687.25281128 220.51403809 687.68011475 221.11791992 C690.08556959 224.54795949 692.40329624 228.03742521 694.73828125 231.515625 C695.80171139 233.09175087 696.86569251 234.66750512 697.93017578 236.24291992 C698.48030334 237.05731537 699.03043091 237.87171082 699.597229 238.71078491 C700.44628815 239.96769638 700.44628815 239.96769638 701.3125 241.25 C702.50508189 243.01600738 703.69762671 244.78203979 704.89013672 246.5480957 C707.36330739 250.21070426 709.83663631 253.87320589 712.31005859 257.53564453 C715.83911575 262.76143727 719.36703433 267.98799773 722.89453125 273.21484375 C734.27251271 290.07392081 745.65406902 306.93057002 757.0425415 323.78256226 C765.76190859 336.68512283 774.4756335 349.59148371 783.18685913 362.49954224 C789.4720778 371.81275642 795.75900144 381.1248105 802.05078125 390.43359375 C802.62283295 391.2799942 803.19488464 392.12639465 803.78427124 392.9984436 C806.50812915 397.0285268 809.23231545 401.0583874 811.95703125 405.08789062 C813.83882414 407.87084107 815.7184406 410.65523891 817.59619141 413.44091797 C819.06398292 415.61794403 820.54021307 417.78797899 822.02490234 419.95361328 C822.57742676 420.76427246 823.12995117 421.57493164 823.69921875 422.41015625 C824.15514404 423.07281494 824.61106934 423.73547363 825.08081055 424.41821289 C826 426 826 426 826 428 C812.7186023 428.55849244 799.44003393 428.98365536 786.14924622 429.24349213 C779.97546122 429.36828224 773.80975625 429.5371114 767.640625 429.81054688 C724.95337975 431.65373323 724.95337975 431.65373323 716.75460434 424.41446114 C712.04037734 419.35447539 709.3798801 413.28122189 707.07299805 406.84484863 C705.66007111 403.09874371 703.78648606 400.42885338 701.359375 397.2578125 C700.75101807 396.39043457 700.14266113 395.52305664 699.51586914 394.62939453 C698.93313232 393.80294434 698.35039551 392.97649414 697.75 392.125 C692.61911083 384.79623086 687.57133526 377.42388622 682.63208008 369.96484375 C678.44807504 363.6523053 674.20014438 357.38466408 669.9375 351.125 C669.22142578 350.073125 668.50535156 349.02125 667.76757812 347.9375 C666.27717369 345.74859181 664.78664067 343.55977184 663.29589844 341.37109375 C650.09138869 321.98362059 636.97651467 302.54099407 624 283 C620.63667007 286.25172952 618.2102807 289.63210501 615.8125 293.625 C611.27686126 301.02497708 606.57303832 308.30539272 601.8125 315.5625 C601.07558406 316.68651154 600.33868638 317.81053504 599.60180664 318.93457031 C598.4901004 320.62998761 597.37820842 322.32528213 596.2658844 324.02029419 C592.58359576 329.63180255 588.91534424 335.25241454 585.25 340.875 C584.63568115 341.81714355 584.0213623 342.75928711 583.38842773 343.72998047 C577.781703 352.33079027 572.18575556 360.93858921 566.59277344 369.54833984 C564.98145814 372.02854037 563.36971782 374.50846414 561.7578125 376.98828125 C561.22019287 377.81537598 560.68257324 378.6424707 560.12866211 379.49462891 C559.04623436 381.15972763 557.96371539 382.82476707 556.88110352 384.48974609 C554.13288848 388.7166856 551.38679308 392.94497671 548.64453125 397.17578125 C543.54752725 405.03601526 538.4393602 412.88782053 533.25 420.6875 C532.83145752 421.31760986 532.41291504 421.94771973 531.98168945 422.59692383 C529.12379488 426.87620512 529.12379488 426.87620512 528 428 C524.8949275 428.09533653 521.81213542 428.12551126 518.70678711 428.11352539 C517.73128403 428.11367142 516.75578094 428.11381744 515.75071716 428.1139679 C512.50957047 428.11326833 509.26848046 428.10547391 506.02734375 428.09765625 C503.78674857 428.09579222 501.54615298 428.09436825 499.30555725 428.09336853 C493.3954049 428.08954213 487.48527695 428.0797108 481.57513428 428.06866455 C475.54944183 428.05845306 469.52374547 428.05387147 463.49804688 428.04882812 C451.66535384 428.03808864 439.83267894 428.02101076 428 428 C429.38759602 424.40646837 430.89676703 421.46843574 433.1328125 418.32421875 C433.76622559 417.42566162 434.39963867 416.52710449 435.05224609 415.60131836 C435.73625488 414.6397583 436.42026367 413.67819824 437.125 412.6875 C442.99768374 404.34860964 448.74795589 395.9440765 454.375 387.4375 C459.22154648 380.12019918 464.15866216 372.91643864 469.3125 365.8125 C473.01095468 360.63466345 476.4858559 355.30363446 480 350 C484.97233364 342.4956293 490.02639754 335.09874934 495.3125 327.8125 C499.01095468 322.63466345 502.4858559 317.30363446 506 312 C510.97233364 304.4956293 516.02639754 297.09874934 521.3125 289.8125 C525.01095468 284.63466345 528.4858559 279.30363446 532 274 C536.96973115 266.49955704 542.02030355 259.1059207 547.3046875 251.82421875 C551.75049226 245.60043968 555.91137952 239.17416483 560.1171875 232.78710938 C560.76171875 231.81193359 561.40625 230.83675781 562.0703125 229.83203125 C562.70614258 228.86990723 563.34197266 227.9077832 563.99707031 226.91650391 C566.71903045 222.95300613 569.65142849 219.15949055 572.62451172 215.3815918 C574.17664131 213.1750609 574.17664131 213.1750609 573.94384766 211.0390625 C572.66324387 208.27248134 571.07159907 205.87694269 569.34375 203.36328125 C568.7827359 202.54166252 568.7827359 202.54166252 568.21038818 201.70344543 C566.97900566 199.90268667 565.73951427 198.10766881 564.5 196.3125 C563.63580164 195.05167482 562.77220246 193.79043875 561.90917969 192.52880859 C560.13808681 189.94102732 558.36412106 187.35525348 556.58789062 184.77099609 C552.78670296 179.23720439 549.02026713 173.6798895 545.25 168.125 C537.51068925 156.74271199 529.75657459 145.37052708 522 134 C513.43731085 121.4477725 504.88308804 108.88986605 496.34082031 96.32373047 C489.2374038 85.87553272 482.11978682 75.43704638 475 65 C472.54146006 61.3959742 470.08312899 57.7918061 467.625 54.1875 C466.71524414 52.85380371 466.71524414 52.85380371 465.78710938 51.49316406 C462.33719761 46.43389556 458.89251854 41.37111171 455.453125 36.3046875 C454.68278621 35.17032452 453.91244119 34.03596578 453.14208984 32.90161133 C451.66751494 30.73020867 450.19405483 28.55806342 448.72119141 26.38549805 C445.20779569 21.21120144 441.66428168 16.06868536 438 11 C435.08364827 18.08863373 432.46200954 25.24489145 429.98046875 32.49609375 C429.60532639 33.58542709 429.23018402 34.67476044 428.84367371 35.79710388 C428.04037885 38.13038493 427.23816706 40.46403908 426.43695068 42.79803467 C424.3028893 49.01424033 422.16062809 55.22762113 420.01953125 61.44140625 C419.58991089 62.68893677 419.16029053 63.93646729 418.71765137 65.22180176 C414.96449345 76.11593334 411.17746429 86.99799684 407.375 97.875 C402.06285691 113.07626901 396.77421708 128.28553048 391.5 143.5 C386.28337043 158.54605188 381.05918969 173.58940169 375.8125 188.625 C375.51695221 189.4720047 375.22140442 190.3190094 374.91690063 191.19168091 C371.55933516 200.81292478 368.19842817 210.43299977 364.83585358 220.05249405 C362.28588913 227.3473167 359.73645342 234.64232401 357.1875 241.9375 C356.95257339 242.60984428 356.71764679 243.28218857 356.4756012 243.97490692 C349.06020798 265.19824903 341.68382863 286.43477977 334.33886719 307.68261719 C329.1266949 322.75869322 323.8879652 337.82528061 318.62036133 352.88208008 C315.27876751 362.43837041 311.96295893 372.00296953 308.67578125 381.578125 C286.91538109 444.91097327 286.91538109 444.91097327 276 463 C275.62584961 463.62052246 275.25169922 464.24104492 274.86621094 464.88037109 C264.62809287 481.55301222 251.42718504 495.89386772 237.94189453 509.96533203 C234.91302457 513.13890536 231.92528159 516.35029604 228.9375 519.5625 C224.13395104 524.71676262 219.32059266 529.86167518 214.5 535 C209.06399618 540.79503813 203.63494056 546.5964599 198.21679688 552.40820312 C195.22343867 555.61901301 192.22641742 558.82626083 189.22265625 562.02734375 C188.5994751 562.69241943 187.97629395 563.35749512 187.33422852 564.04272461 C186.15490895 565.30115732 184.974497 566.55856768 183.79272461 567.81469727 C183.25268799 568.39147217 182.71265137 568.96824707 182.15625 569.5625 C181.69025391 570.05878906 181.22425781 570.55507813 180.74414062 571.06640625 C179.01041791 572.88440751 179.01041791 572.88440751 177.90902519 574.69213963 C175.8242579 577.65667667 174.21232765 579.67328349 170.63210678 580.63813591 C167.68494552 580.86195595 164.80898166 580.86588305 161.85400391 580.79467773 C160.7412207 580.79569992 159.6284375 580.79672211 158.48193359 580.79777527 C154.81249657 580.79290759 151.14556352 580.73849841 147.4765625 580.68359375 C144.92924356 580.67053481 142.38190702 580.66057204 139.83456421 580.65357971 C133.82063293 580.62960552 127.80777254 580.57446934 121.79426992 580.50413328 C114.27317501 580.41808165 106.75193337 580.3798694 99.23046875 580.34179688 C85.81961581 580.27370262 72.41071531 580.13347157 59 580 C59 550.3 59 520.6 59 490 C102.6875 489.625 102.6875 489.625 116.40893555 489.52197266 C120.51884962 489.48199967 120.51884962 489.48199967 124.6287384 489.43954468 C126.43897042 489.42202512 128.2492402 489.40814891 130.05952454 489.39730835 C149.0629029 489.27840054 166.5526309 489.00809099 181 475 C194.03833167 460.33187687 198.69268726 438.59464263 204.69360352 420.32080078 C205.61021258 417.53517626 206.53373079 414.75186488 207.45752525 411.96861649 C209.36609119 406.21746858 211.26957235 400.46460988 213.16162109 394.70800781 C213.4935849 393.69925598 213.82554871 392.69050415 214.16757202 391.65118408 C214.85646855 389.4571306 215.44224984 387.23100064 216 385 C194.88 385 173.76 385 152 385 C141.07422789 357.73568794 130.22141622 330.44397546 119.42333984 303.12890625 C115.84670474 294.08207047 112.26544029 285.03706728 108.68447876 275.99194336 C106.70612518 270.9947057 104.72807827 265.99734665 102.75 261 C101.9583348 258.99999942 101.16666813 256.99999942 100.375 255 C79 201 79 201 57.625 147 C57.23311493 146.00997482 56.84122986 145.01994965 56.43746948 143.99992371 C55.64590381 142.00017831 54.85433887 140.00043262 54.06277466 138.00068665 C52.08206882 132.99678884 50.1013502 127.99289609 48.12060547 122.98901367 C44.57655061 114.03576268 41.03256326 105.08248502 37.48910522 96.1289978 C36.31684935 93.16703084 35.14450913 90.20509727 33.97216797 87.24316406 C32.41610715 83.31168029 30.86049218 79.38002051 29.30517578 75.44824219 C25.18391305 65.03169504 21.05482122 54.61830871 16.91510391 44.20908165 C15.209152 39.91857313 13.50497312 35.62735984 11.80047607 31.33627319 C10.855479 28.95880946 9.90956649 26.58170939 8.96270752 24.20498657 C7.64735757 20.90317604 6.33508189 17.60015896 5.0234375 14.296875 C4.62960876 13.31069687 4.23578003 12.32451874 3.83001709 11.30845642 C3.47022766 10.40067947 3.11043823 9.49290253 2.73974609 8.55761719 C2.42621185 7.76973816 2.11267761 6.98185913 1.78964233 6.17010498 C1 4 1 4 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(1814,153)\"\n      />\n      <path\n        d=\"M0 0 C0.83810211 -0.00701431 1.67620422 -0.01402863 2.53970337 -0.02125549 C5.23095126 -0.03866647 7.92197909 -0.04510809 10.61328125 -0.046875 C11.53273529 -0.04754974 12.45218933 -0.04822449 13.39950562 -0.04891968 C27.10769148 -0.03292448 40.46897884 0.34679399 53.92578125 3.203125 C54.75158691 3.37473145 55.57739258 3.54633789 56.42822266 3.72314453 C104.10137996 13.9446933 148.92015889 40.48134523 178.92578125 79.203125 C179.34972168 79.74533691 179.77366211 80.28754883 180.21044922 80.84619141 C214.41902295 124.87863761 228.47978789 178.81199691 222.328125 234.04736328 C216.03894851 283.37618327 192.43364033 325.71878914 158.61669922 361.41503906 C156.70289786 363.4388163 154.80039938 365.47290577 152.8984375 367.5078125 C151.4911795 369.01069209 150.08362987 370.51329867 148.67578125 372.015625 C148.02633545 372.70906006 147.37688965 373.40249512 146.70776367 374.11694336 C142.79169615 378.27866642 138.80498502 382.35314517 134.73291016 386.36254883 C132.92273746 388.20622512 131.22818343 390.1135503 129.55078125 392.078125 C126.80956676 395.26995009 123.91892237 398.24615477 120.92578125 401.203125 C117.99459934 404.09904458 115.16174951 407.00555071 112.48828125 410.140625 C108.70337756 414.57591787 104.54438343 418.61510611 100.37890625 422.6875 C97.94357318 424.99986008 97.94357318 424.99986008 96.62890625 427.31640625 C93.86386929 430.37950912 90.26049646 431.30794193 86.42578125 432.640625 C85.58136475 432.9406543 84.73694824 433.24068359 83.86694336 433.54980469 C62.3934153 441.0136629 40.93860537 444.75407348 18.19897461 444.59423828 C15.49216208 444.57814807 12.7868463 444.59421089 10.08007812 444.61328125 C-6.01970762 444.63290793 -21.99241242 442.8557798 -37.63671875 438.953125 C-38.9196582 438.63734497 -38.9196582 438.63734497 -40.22851562 438.31518555 C-73.32229062 429.88088614 -103.90200102 413.51357452 -128.91796875 390.20703125 C-130.92123799 388.34529733 -132.97995172 386.5858134 -135.07421875 384.828125 C-154.24034916 368.0002625 -168.35310867 345.10704354 -179.07421875 322.203125 C-179.59628906 321.11128906 -180.11835937 320.01945312 -180.65625 318.89453125 C-202.21048168 271.52071861 -202.79727805 213.17787182 -185.1484375 164.46484375 C-173.82008821 134.91287097 -155.84930804 107.17499298 -133.02246094 85.15234375 C-131.20497442 83.33394621 -129.53527624 81.46208081 -127.87109375 79.50390625 C-123.31051112 74.2512437 -118.35061936 69.42985833 -113.390625 64.55859375 C-110.00540165 61.21009977 -106.70976617 57.84651831 -103.60546875 54.234375 C-98.95475676 48.82891889 -93.82732379 43.910037 -88.74609375 38.9140625 C-83.96514754 34.18190146 -79.43948442 29.32013896 -75.07421875 24.203125 C-58.15046977 4.62939398 -24.77048816 0.13949758 0 0 Z M-81.671875 121.2109375 C-94.10751856 136.69988541 -102.42216287 153.91873058 -107.07421875 173.203125 C-107.39390625 174.45867187 -107.71359375 175.71421875 -108.04296875 177.0078125 C-112.75673366 198.0535558 -112.41571877 224.33468929 -107.07421875 245.203125 C-106.88617676 246.00073242 -106.69813477 246.79833984 -106.50439453 247.62011719 C-100.71613874 271.9059255 -89.23935154 293.26409368 -73.07421875 312.203125 C-72.04748047 313.41355469 -72.04748047 313.41355469 -71 314.6484375 C-47.9494122 341.05108564 -16.13936948 358.84287521 19.16259766 361.36279297 C46.59081946 362.79636518 74.17716739 354.47585384 94.92578125 336.203125 C95.74433594 335.52765625 96.56289062 334.8521875 97.40625 334.15625 C122.65186519 312.70726221 135.23054033 279.47024623 137.92578125 247.203125 C140.27625709 205.810847 128.67037622 165.12304253 100.8671875 133.6796875 C97.94939935 130.45922612 94.94396318 127.329408 91.92578125 124.203125 C91.04019531 123.22279297 91.04019531 123.22279297 90.13671875 122.22265625 C68.61718618 98.99259674 35.0082485 85.21932478 4.04296875 82.9140625 C-29.14695206 82.12553306 -59.95156963 96.35755729 -81.671875 121.2109375 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(1161.07421875,144.796875)\"\n      />\n      <path\n        d=\"M0 0 C0.74490189 -0.00988449 1.48980377 -0.01976898 2.25727844 -0.029953 C50.02685431 -0.53245128 90.51113055 12.36122444 125.65625 45.61328125 C146.70645987 67.4971628 159.37409264 96.78597762 165.71875 126.11328125 C165.91766846 127.02053955 166.11658691 127.92779785 166.3215332 128.86254883 C170.15662585 147.76819 170.25182679 166.81619532 170.19580078 186.02832031 C170.1941145 188.77015288 170.195468 191.51198208 170.19668579 194.2538147 C170.19772564 200.1150116 170.19154441 205.97616338 170.18021011 211.83734894 C170.16383422 220.31123842 170.15873487 228.78510646 170.15623413 237.25901005 C170.1518758 251.01318047 170.13861771 264.76732693 170.11962891 278.52148438 C170.10122157 291.86848838 170.08708894 305.21548584 170.07861328 318.5625 C170.07808783 319.3876303 170.07756239 320.2127606 170.07702102 321.06289485 C170.07441061 325.20305393 170.07188327 329.34321306 170.06939721 333.48337221 C170.0487228 367.7350298 170.01188474 401.98664275 169.96875 436.23828125 C140.59875 436.23828125 111.22875 436.23828125 80.96875 436.23828125 C80.95416748 428.65384033 80.93958496 421.06939941 80.92456055 413.25512695 C80.87567801 388.91549748 80.8122347 364.5759265 80.73976372 340.23635652 C80.72849718 336.4480173 80.71735847 332.65967772 80.70629883 328.87133789 C80.7029933 327.74017644 80.7029933 327.74017644 80.699621 326.58616325 C80.66436498 314.41742491 80.6394351 302.24868662 80.61840298 290.07991634 C80.59654727 277.56940716 80.56340542 265.05896355 80.52004844 252.54851025 C80.49369883 244.84508894 80.47588106 237.14174914 80.46940396 229.43828508 C80.46346619 223.5053007 80.44173435 217.57243578 80.41447449 211.6395092 C80.4057624 209.22466949 80.4019573 206.80980564 80.40374184 204.39495087 C80.4208244 170.57795098 75.59013237 132.77364554 50.84375 107.36328125 C34.74804984 91.64278331 13.31965625 84.3749888 -8.90625 83.86328125 C-34.71741699 84.44989868 -58.6296742 92.74617622 -77.03125 111.23828125 C-97.237198 132.83991951 -104.16021891 163.32200101 -104.18946838 192.12161255 C-104.19440636 193.17146265 -104.19934434 194.22131275 -104.20443195 195.30297649 C-104.21970325 198.7958956 -104.22813927 202.28879648 -104.23657227 205.78173828 C-104.24615895 208.29549823 -104.25616949 210.8092566 -104.26657104 213.32301331 C-104.28796255 218.72218414 -104.30642312 224.12135215 -104.32217598 229.52054214 C-104.34495666 237.32671637 -104.37308777 245.13286423 -104.40249526 252.93901597 C-104.45007113 265.60614219 -104.49336218 278.27327924 -104.53393555 290.94042969 C-104.57334155 303.24024294 -104.61477987 315.54004659 -104.65893555 327.83984375 C-104.66165982 328.59890039 -104.66438409 329.35795702 -104.66719092 330.14001538 C-104.68087124 333.94824732 -104.69459134 337.75647912 -104.70833123 341.56471086 C-104.82213871 373.12254876 -104.928189 404.68040597 -105.03125 436.23828125 C-134.73125 436.23828125 -164.43125 436.23828125 -195.03125 436.23828125 C-195.03125 294.99828125 -195.03125 153.75828125 -195.03125 8.23828125 C-165.33125 8.23828125 -135.63125 8.23828125 -105.03125 8.23828125 C-104.70125 27.37828125 -104.37125 46.51828125 -104.03125 66.23828125 C-100.56625 62.27828125 -100.56625 62.27828125 -97.03125 58.23828125 C-95.71682638 56.88630267 -94.3877664 55.54802123 -93.03125 54.23828125 C-90.43142817 51.67837991 -87.96856602 49.07048585 -85.59375 46.30078125 C-83.02459729 43.31026409 -80.33807744 40.50711441 -77.53125 37.73828125 C-74.12798063 34.381002 -70.9630553 30.91038954 -67.85546875 27.28125 C-65.09061865 24.1848547 -62.12088088 21.30582671 -59.14453125 18.4140625 C-56.95404037 16.28178654 -56.95404037 16.28178654 -55.265625 13.66015625 C-52.22091621 10.35994742 -49.19424393 9.4613871 -44.96875 8.17578125 C-43.7923999 7.80126831 -43.7923999 7.80126831 -42.59228516 7.41918945 C-28.8051804 3.17101218 -14.455506 0.15855111 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(1630.03125,144.76171875)\"\n      />\n      <path\n        d=\"M0 0 C2.38143646 0.95204965 4.66474801 1.93938584 6.9765625 3.03515625 C7.64632111 3.34503983 8.31607971 3.6549234 9.00613403 3.97419739 C10.45502737 4.64529835 11.90244088 5.31960167 13.34851074 5.99676514 C17.28525304 7.83974485 21.23314298 9.65859946 25.1796875 11.48046875 C25.99570435 11.85790222 26.81172119 12.23533569 27.65246582 12.62420654 C35.24684457 16.13041351 42.88818173 19.52557216 50.54144287 22.90084839 C64.74910513 29.16757199 78.87098464 35.60271852 92.95608521 42.1399231 C105.56993927 47.99156219 118.24093862 53.69850855 130.96044922 59.31640625 C141.80271707 64.10701193 152.57447648 69.02171692 163.30908203 74.05004883 C176.83057762 80.36079749 190.48865925 86.36485514 204.13589478 92.39733887 C209.77491735 94.89295962 215.39430951 97.4303158 221 100 C218.28771101 103.3023264 214.68401289 104.73818118 210.87890625 106.44921875 C210.15992157 106.77874039 209.44093689 107.10826202 208.70016479 107.44776917 C206.32395617 108.53442542 203.94335827 109.61107376 201.5625 110.6875 C199.8937747 111.44783214 198.22523897 112.20858047 196.55688477 112.96972656 C184.46077963 118.47742824 172.31861067 123.88115312 160.16287231 129.25570679 C149.25806702 134.07726202 138.42334852 139.02302429 127.62548828 144.08056641 C118.40661512 148.38323273 109.11928332 152.51689675 99.8125 156.625 C85.61505373 162.89310403 71.51530598 169.35473837 57.44631958 175.90567017 C43.06084311 182.6030892 28.62983404 189.18383697 14.11132812 195.58837891 C11.89012859 196.59614536 9.68730279 197.64481766 7.49804688 198.72021484 C6.34884766 199.28047363 5.19964844 199.84073242 4.015625 200.41796875 C2.98211914 200.93512451 1.94861328 201.45228027 0.88378906 201.98510742 C-4.62279823 203.92304201 -8.86034932 201.32394333 -13.87890625 198.96484375 C-14.5500798 198.65496017 -15.22125336 198.3450766 -15.9127655 198.02580261 C-17.36723373 197.35384474 -18.82063255 196.6795683 -20.27307129 196.00323486 C-24.22484546 194.16367398 -28.18489354 192.34208919 -32.14453125 190.51953125 C-33.37319412 189.95338104 -33.37319412 189.95338104 -34.62667847 189.37579346 C-42.23342215 185.87754887 -49.88260613 182.47916104 -57.54244995 179.09915161 C-71.74878884 172.83015312 -85.87122674 166.39716911 -99.95608521 159.8600769 C-112.56993927 154.00843781 -125.24093862 148.30149145 -137.96044922 142.68359375 C-148.80271707 137.89298807 -159.57447648 132.97828308 -170.30908203 127.94995117 C-183.83057762 121.63920251 -197.48865925 115.63514486 -211.13589478 109.60266113 C-216.77491735 107.10704038 -222.39430951 104.5696842 -228 102 C-228 101.34 -228 100.68 -228 100 C-208.96990129 91.17964828 -189.93885123 82.36818675 -170.78076172 73.82861328 C-161.47189338 69.67450975 -152.20210363 65.43917651 -142.9375 61.1875 C-131.07933174 55.74614873 -119.18362405 50.39867978 -107.25 45.125 C-93.78985391 39.17645246 -80.41172146 33.06564376 -67.0640564 26.86914062 C-53.2803913 20.47294082 -39.42766418 14.24567147 -25.51953125 8.125 C-24.451689 7.65401886 -24.451689 7.65401886 -23.36227417 7.17352295 C-22.00264694 6.57412276 -20.64226143 5.97643894 -19.28103638 5.38067627 C-15.88988516 3.88548315 -12.61399874 2.28385617 -9.37109375 0.4921875 C-5.40397836 -1.26382418 -4.14424383 -1.00602452 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(370,530)\"\n      />\n      <path\n        d=\"M0 0 C0.66 0 1.32 0 2 0 C2.49177734 1.08901611 2.98355469 2.17803223 3.49023438 3.30004883 C5.82583784 8.47131121 8.16271888 13.64199569 10.5 18.8125 C11.1440831 20.23746796 11.1440831 20.23746796 11.80117798 21.69122314 C17.56255614 34.43493377 23.35557963 47.16343967 29.1875 59.875 C34.84426524 72.20531301 40.38209701 84.58434871 45.86605835 96.99237061 C51.491098 109.7096263 57.28584746 122.34285838 63.1399231 134.95608521 C68.98729551 147.56074197 74.69461887 160.22071051 80.30331421 172.93325806 C83.73255225 180.70451144 87.18470715 188.46070308 90.78125 196.15625 C91.08258347 196.80116898 91.38391693 197.44608795 91.69438171 198.11054993 C93.09709069 201.10689714 94.50811603 204.09885205 95.92895508 207.08666992 C96.89529608 209.13646028 97.84393722 211.19473257 98.77075195 213.26269531 C99.53558928 214.96584448 100.340312 216.65232793 101.20385742 218.30761719 C102.82112021 221.85443625 103.25171734 224.02496081 102.2644043 227.84936523 C100.89490649 231.28525401 99.33501246 234.56337031 97.6875 237.875 C97.08394375 239.14365358 96.48351527 240.41379921 95.88598633 241.68530273 C94.6079002 244.39657917 93.31518067 247.1001516 92.01171875 249.79931641 C88.97291652 256.11391361 86.08523772 262.49773801 83.18356323 268.87615967 C82.17658153 271.08646584 81.16519854 273.29470891 80.15258789 275.50244141 C75.39486895 285.8826618 70.74044648 296.30586887 66.125 306.75 C60.17346804 320.2168991 54.05840043 333.6011398 47.8600769 346.95608521 C42.01090383 359.56462351 36.30648975 372.23032779 30.69064331 384.94433594 C25.73533892 396.1584162 20.66458506 407.30938882 15.46435547 418.41210938 C13.01949874 423.63461434 10.60006476 428.86378165 8.265625 434.13671875 C7.84788818 435.07966797 7.43015137 436.02261719 6.99975586 436.99414062 C6.2317571 438.73639073 5.46917521 440.48104306 4.71313477 442.22851562 C2.21791802 447.89104099 2.21791802 447.89104099 0 449 C-0.49177734 447.91098389 -0.98355469 446.82196777 -1.49023438 445.69995117 C-3.82583784 440.52868879 -6.16271888 435.35800431 -8.5 430.1875 C-9.1440831 428.76253204 -9.1440831 428.76253204 -9.80117798 427.30877686 C-15.56255614 414.56506623 -21.35557963 401.83656033 -27.1875 389.125 C-32.84426524 376.79468699 -38.38209701 364.41565129 -43.86605835 352.00762939 C-49.491098 339.2903737 -55.28584746 326.65714162 -61.1399231 314.04391479 C-66.98706511 301.43975468 -72.69064335 288.77890485 -78.30331421 276.0687561 C-83.86081736 263.48797197 -89.5903795 250.99709236 -95.42578125 238.54296875 C-95.87518066 237.57149902 -96.32458008 236.6000293 -96.78759766 235.59912109 C-97.52203805 234.02464417 -98.27253158 232.45733968 -99.05224609 230.90478516 C-101.06073153 226.74847773 -101.058315 224.44564743 -100 220 C-98.9051726 217.32807666 -97.76859972 214.74954149 -96.52734375 212.1484375 C-96.17302277 211.38980408 -95.81870178 210.63117065 -95.4536438 209.84954834 C-94.68832412 208.21224451 -93.91982052 206.57642592 -93.14846802 204.94195557 C-91.07013421 200.536397 -89.01636484 196.11940553 -86.9609375 191.703125 C-86.54068802 190.80147614 -86.12043854 189.89982727 -85.68745422 188.97085571 C-81.70296337 180.4032542 -77.86973 171.77244911 -74.0625 163.125 C-67.89901873 149.14644535 -61.53024284 135.27083115 -55.08288574 121.42114258 C-49.55431376 109.54347648 -44.10418211 97.63853569 -38.77392578 85.67041016 C-34.6402172 76.39728479 -30.42243936 67.16472421 -26.1875 57.9375 C-19.69862154 43.79766493 -13.34130735 29.60226131 -7.05151367 15.37280273 C-6.56479942 14.27269363 -6.56479942 14.27269363 -6.06825256 13.15036011 C-5.46374124 11.78344765 -4.86011273 10.41614428 -4.2575531 9.04837036 C-2.90739365 5.99434024 -1.53001919 2.9685345 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(630,142)\"\n      />\n      <path\n        d=\"M0 0 C1.13596367 0.53154945 1.13596367 0.53154945 2.2948761 1.07383728 C3.92670528 1.8383391 5.55663215 2.60691215 7.18481445 3.37915039 C11.57252393 5.45952627 15.97461317 7.50911219 20.375 9.5625 C21.27314423 9.98254303 22.17128845 10.40258606 23.09664917 10.83535767 C31.6971507 14.84948123 40.36487536 18.70248537 49.05078125 22.52734375 C62.43733488 28.43202972 75.73694697 34.50823224 89.00796509 40.66781616 C101.62145533 46.51926381 114.29209559 52.22601825 127.01123047 57.84375 C137.84988029 62.63275707 148.61877392 67.54432958 159.34887695 72.57299805 C170.87702047 77.95536258 182.50845376 83.10132797 194.1484375 88.23632812 C195.26818466 88.73107132 195.26818466 88.73107132 196.41055298 89.23580933 C197.80543467 89.85186781 199.20088087 90.46665049 200.59701538 91.0798645 C204.47199906 92.79105126 208.27717754 94.60307504 212.05078125 96.52734375 C209.59313167 99.35932021 207.05995942 100.60773538 203.64453125 102.1015625 C202.5268335 102.59567627 201.40913574 103.08979004 200.25756836 103.59887695 C199.65637177 103.86092926 199.05517517 104.12298157 198.4357605 104.39297485 C195.14623395 105.82723974 191.8669883 107.28475893 188.5859375 108.73828125 C187.89133926 109.04520401 187.19674103 109.35212677 186.48109436 109.66835022 C179.34665426 112.82575529 172.25712358 116.078926 165.1685791 119.33773804 C152.58690391 125.12051151 139.96611591 130.8053081 127.30078125 136.40234375 C113.83388215 142.35387571 100.44964145 148.46894332 87.09469604 154.66726685 C74.48659661 160.51623633 61.82221311 166.22268356 49.10745239 171.83569336 C40.21719767 175.76149695 31.35526206 179.73409599 22.55737305 183.86328125 C20.21016739 184.95764843 17.85494954 186.03502065 15.49145508 187.09375 C10.51434084 189.33336075 5.85003191 191.54497084 1.3671875 194.671875 C-3.30730917 197.72084874 -7.25844068 199.72627585 -12.94921875 199.52734375 C-23.91430744 196.67263963 -33.99692197 191.14492923 -44.03320312 186.00878906 C-51.25110541 182.33103456 -58.63040917 179.00552317 -66.00054932 175.64694214 C-68.46494274 174.52015431 -70.92435712 173.38316395 -73.38232422 172.24243164 C-84.14816407 167.24638345 -94.96523227 162.37963223 -105.82421875 157.58984375 C-119.8016631 151.42376281 -133.67832456 145.05761594 -147.52807617 138.61022949 C-159.38552399 133.0910683 -171.27090755 127.65184616 -183.21728516 122.32763672 C-192.30244078 118.2755355 -201.36266404 114.17084064 -210.41143799 110.03817749 C-211.95754069 109.33209836 -213.50388028 108.62653771 -215.05047607 107.92153931 C-229.86361928 101.16739681 -229.86361928 101.16739681 -236.94921875 97.52734375 C-235.40297635 94.43485896 -232.39497667 93.77224326 -229.34765625 92.46484375 C-228.65664825 92.16175293 -227.96564026 91.85866211 -227.25369263 91.54638672 C-225.74393793 90.88455358 -224.2329626 90.22549976 -222.72094727 89.56884766 C-218.60037978 87.77824555 -214.48968429 85.96528311 -210.37890625 84.15234375 C-209.52000793 83.77392334 -208.66110962 83.39550293 -207.77618408 83.00561523 C-198.61515962 78.9581325 -189.53116958 74.74850321 -180.44921875 70.52734375 C-167.01842822 64.29113497 -153.53427189 58.19365562 -139.98876953 52.2109375 C-129.12430747 47.41052541 -118.33093631 42.48485397 -107.57421875 37.4465332 C-97.61688065 32.79925504 -87.5769861 28.35132497 -77.52438354 23.91497803 C-62.64355788 17.34143391 -47.88025245 10.52383118 -33.13793945 3.64599609 C-32.29394592 3.25265076 -31.44995239 2.85930542 -30.5803833 2.45404053 C-29.00215581 1.71812923 -27.42451866 0.98095001 -25.84759521 0.24224854 C-24.47254039 -0.39926056 -23.09459654 -1.03463223 -21.71316528 -1.66229248 C-20.25193276 -2.33358828 -18.81033767 -3.04744289 -17.37475586 -3.77197266 C-11.1460286 -5.57131309 -5.51697816 -2.65206409 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(378.94921875,4.47265625)\"\n      />\n      <path\n        d=\"M0 0 C3.16996538 2.64292957 4.58615167 5.99846112 6.23046875 9.68359375 C6.53747711 10.35750656 6.84448547 11.03141937 7.16079712 11.72575378 C8.17637287 13.96004264 9.18196727 16.19867546 10.1875 18.4375 C10.9047181 20.02088535 11.62241858 21.60405227 12.34057617 23.18701172 C17.557798 34.70815066 22.67921543 46.2717554 27.79125977 57.83984375 C33.68037773 71.1576613 39.77125553 84.37649769 45.91656494 97.57775879 C51.44532587 109.45578979 56.8956508 121.36108914 62.22607422 133.32958984 C66.02968777 141.86221489 69.90147058 150.36145132 73.79812622 158.85189819 C79.85693235 172.05571719 85.84366894 185.28973626 91.75 198.5625 C92.08842926 199.32144562 92.42685852 200.08039124 92.77554321 200.86233521 C93.10460663 201.60071625 93.43367004 202.33909729 93.77270508 203.09985352 C94.09215591 203.81593277 94.41160675 204.53201202 94.74073792 205.26979065 C95.35400396 206.65889757 95.95876734 208.05179432 96.55430603 209.44859314 C97.7667374 212.29076343 98.99862324 214.99771075 100.59277344 217.6484375 C102.08152337 220.2828054 102.78334997 221.96992389 103 225 C101.30365246 230.98647292 98.55822083 236.34364945 95.75 241.875 C94.95333102 243.50968153 94.16075039 245.14636126 93.37207031 246.78491211 C92.14947833 249.32297049 90.92414575 251.85931053 89.68548584 254.38957214 C85.82877172 262.27058347 82.25905167 270.2638989 78.74108887 278.30059814 C73.29272578 290.73506591 67.67777702 303.08043961 61.91943359 315.37451172 C57.61676727 324.59338488 53.48310325 333.88071668 49.375 343.1875 C43.49553804 356.50466014 37.45637596 369.74030384 31.3125 382.9375 C25.72169773 394.94841853 20.15935153 406.96859091 14.75 419.0625 C13.96882813 420.80793091 13.96882813 420.80793091 13.171875 422.58862305 C10.99165126 427.47731698 8.8303365 432.37253116 6.72265625 437.29296875 C6.22717285 438.44623657 6.22717285 438.44623657 5.72167969 439.62280273 C4.89684505 441.55265885 4.0825803 443.48702409 3.26953125 445.421875 C2 448 2 448 0 449 C-7.04441853 434.01047326 -13.93744829 418.96464382 -20.625 403.8125 C-26.50431543 390.49528108 -32.5434519 377.25960921 -38.6875 364.0625 C-44.62674464 351.30464185 -50.50063799 338.5258406 -56.22607422 325.67041016 C-60.35978255 316.39728534 -64.57752914 307.16470924 -68.8125 297.9375 C-75.12666444 284.17885184 -81.31788127 270.36887768 -87.42114258 256.51538086 C-87.73030106 255.81563644 -88.03945953 255.11589203 -88.35798645 254.39494324 C-88.9687589 253.01216297 -89.57766625 251.62855725 -90.18461609 250.24409485 C-91.79991779 246.58830932 -93.48746332 242.98697332 -95.28125 239.41503906 C-95.65664917 238.65667755 -96.03204834 237.89831604 -96.41882324 237.11697388 C-97.13902339 235.67054754 -97.87361421 234.23116094 -98.62512207 232.80075073 C-100.93690087 228.13205351 -101.37203683 225.20640287 -100 220 C-99.02397272 217.48373378 -98.01543578 215.10715429 -96.859375 212.6796875 C-96.54119812 211.99202301 -96.22302124 211.30435852 -95.89520264 210.59585571 C-94.85494485 208.35168255 -93.8023705 206.11351491 -92.75 203.875 C-91.6452649 201.49798118 -90.54383244 199.11944845 -89.44238281 196.74090576 C-88.69802847 195.13432195 -87.952581 193.52824417 -87.20605469 191.92266846 C-83.90135901 184.81214026 -80.68880328 177.66331233 -77.5 170.5 C-72.45966554 159.20686063 -67.32682016 147.95859258 -62.16937256 136.71862793 C-57.07024954 125.59897477 -52.0696608 114.4390986 -47.125 103.25 C-41.17615106 89.78917193 -35.06527632 76.41023562 -28.86813354 63.06204224 C-22.49936999 49.33854294 -16.29910784 35.54638709 -10.21289062 21.6953125 C-6.98775083 14.36937414 -3.69282015 7.10586496 0 0 Z \"\n        fill=\"currentColor\"\n        transform=\"translate(101,142)\"\n      />\n    </svg>\n  );\n};\nexport const OpenIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <path\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        d=\"M7 13.5a9.26 9.26 0 0 0-5.61-2.95a1 1 0 0 1-.89-1V1.5A1 1 0 0 1 1.64.51A9.3 9.3 0 0 1 7 3.43zm0 0a9.26 9.26 0 0 1 5.61-2.95a1 1 0 0 0 .89-1V1.5a1 1 0 0 0-1.14-.99A9.3 9.3 0 0 0 7 3.43z\"\n      />\n    </svg>\n  );\n};\nexport const PaintingIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 36 36\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M32 4H4a2 2 0 0 0-2 2v24a2 2 0 0 0 2 2h28a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2ZM8.92 8a3 3 0 1 1-3 3a3 3 0 0 1 3-3ZM6 27v-4.1l6-6.08a1 1 0 0 1 1.41 0L16 19.35L8.32 27Zm24 0H11.15l6.23-6.23l5.4-5.4a1 1 0 0 1 1.41 0L30 21.18Z\"\n      />\n      <path fill=\"none\" d=\"M0 0h36v36H0z\" />\n    </svg>\n  );\n};\nexport const PaintingIconSkeleton = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <g\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n      >\n        <path d=\"M1.5 12h11a1 1 0 0 0 1-1V3a1 1 0 0 0-1-1h-11a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1\" />\n        <path d=\"M9.502 6.212a1.245 1.245 0 1 0 0-2.49a1.245 1.245 0 0 0 0 2.49M9.083 12a7.098 7.098 0 0 0-7.136-5.786A7.6 7.6 0 0 0 .5 6.349\" />\n        <path d=\"M13.5 8.94a7.716 7.716 0 0 0-5.506.225\" />\n      </g>\n    </svg>\n  );\n};\nexport const QuestionMarkIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      viewBox=\"0 0 24 24\"\n      fill=\"none\"\n      stroke=\"currentColor\"\n      strokeWidth=\"2\"\n      strokeLinecap=\"round\"\n      strokeLinejoin=\"round\"\n    >\n      <circle cx=\"12\" cy=\"12\" r=\"10\" />\n      <path d=\"M9.09 9a3 3 0 0 1 5.83 1c0 2-3 3-3 3\" />\n      <line x1=\"12\" y1=\"17\" x2=\"12.01\" y2=\"17\" />\n    </svg>\n  );\n};\nexport const RobotIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return <FaRobot size={size} className={className} />;\n};\nexport const SwapIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <g\n        fill=\"none\"\n        stroke=\"currentColor\"\n        strokeLinecap=\"round\"\n        strokeLinejoin=\"round\"\n        strokeWidth=\"1.5\"\n      >\n        <path d=\"M3.53 11.47v2.118a4.235 4.235 0 0 0 4.235 4.236H20.47M3.53 6.176h12.705a4.235 4.235 0 0 1 4.236 4.236v2.117\" />\n        <path d=\"m17.294 14.647l3.177 3.176L17.294 21M6.706 9.353L3.529 6.176L6.706 3\" />\n      </g>\n    </svg>\n  );\n};\nexport const TriangleAlertIcon = createIcon(FiAlertTriangle);\nexport const UsersIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 16 16\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M8 8a3 3 0 1 0 0-6a3 3 0 0 0 0 6m4.735 6c.618 0 1.093-.561.872-1.139a6.002 6.002 0 0 0-11.215 0c-.22.578.254 1.139.872 1.139z\"\n      />\n    </svg>\n  );\n  // return <FiUser size={size} className={className} />;\n};\nexport const WindowsIcon = ({\n  size = 16,\n  className = \"my-auto flex flex-shrink-0 \",\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      viewBox=\"0 0 24 24\"\n      width=\"24\"\n      height=\"24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M3 3h8v8H3V3zm10 0h8v8h-8V3zm-10 10h8v8H3v-8zm10 0h8v8h-8v-8z\"\n      />\n    </svg>\n  );\n};\n\n// ============================================================================\n// THIRD-PARTY / COMPANY ICONS (Alphabetically)\n// ============================================================================\nexport const AirtableIcon = createLogoIcon(airtableIcon);\nexport const AmazonIcon = createLogoIcon(amazonSVG);\nexport const AnthropicIcon = createLogoIcon(anthropicSVG);\nexport const AsanaIcon = createLogoIcon(asanaIcon);\nexport const AxeroIcon = createLogoIcon(axeroImage);\nexport const AzureIcon = createLogoIcon(azureIcon);\nexport const BitbucketIcon = createLogoIcon(bitbucketIcon);\nexport const BookstackIcon = createIcon(SiBookstack);\nexport const ClickupIcon = createLogoIcon(clickupIcon);\nexport const CohereIcon = createLogoIcon(cohereIcon);\nexport const ColorDiscordIcon = createLogoIcon(discordIcon);\nexport const ColorSlackIcon = createLogoIcon(slackIcon);\nexport const ConfluenceIcon = createLogoIcon(confluenceSVG, {\n  sizeAdjustment: 4,\n  classNameAddition: \"-m-0.5\",\n});\nexport const DeepseekIcon = createLogoIcon(deepseekSVG);\nexport const DiscourseIcon = createLogoIcon(discourseIcon);\nexport const Document360Icon = createLogoIcon(document360Icon);\nexport const DropboxIcon = createLogoIcon(dropboxIcon);\nexport const DrupalWikiIcon = createLogoIcon(drupalwikiIcon);\nexport const EgnyteIcon = createLogoIcon(egnyteIcon);\nexport const ElevenLabsIcon = createLogoIcon(elevenLabsSVG, {\n  darkSrc: elevenLabsDarkSVG,\n});\nexport const FirefliesIcon = createLogoIcon(firefliesIcon);\nexport const FreshdeskIcon = createLogoIcon(freshdeskIcon);\nexport const GeminiIcon = createLogoIcon(geminiSVG);\nexport const GitbookIcon = createLogoIcon(gitbookDarkIcon, {\n  darkSrc: gitbookLightIcon,\n});\nexport const GithubIcon = createLogoIcon(githubLightIcon, {\n  monochromatic: true,\n});\nexport const GitlabIcon = createLogoIcon(gitlabIcon);\nexport const GmailIcon = createLogoIcon(gmailIcon);\nexport const GongIcon = createLogoIcon(gongIcon);\nexport const GoogleDriveIcon = createLogoIcon(googleDriveIcon);\nexport const GoogleIcon = createLogoIcon(googleIcon);\nexport const GoogleSitesIcon = createLogoIcon(googleSitesIcon);\nexport const GoogleStorageIcon = createLogoIcon(googleCloudStorageIcon, {\n  sizeAdjustment: 4,\n  classNameAddition: \"-m-0.5\",\n});\nexport const GuruIcon = createLogoIcon(guruIcon, { monochromatic: true });\nexport const HighspotIcon = createLogoIcon(highspotIcon);\nexport const HubSpotIcon = createLogoIcon(hubSpotIcon);\nexport const JiraIcon = createLogoIcon(jiraSVG);\nexport const KimiIcon = createLogoIcon(kimiIcon);\nexport const LinearIcon = createLogoIcon(linearIcon);\nexport const LiteLLMIcon = createLogoIcon(litellmIcon);\nexport const LoopioIcon = createLogoIcon(loopioIcon, { monochromatic: true });\nexport const MediaWikiIcon = createLogoIcon(mediawikiIcon);\nexport const MetaIcon = createLogoIcon(metaSVG);\nexport const MicrosoftIcon = createLogoIcon(microsoftIcon);\nexport const MicrosoftIconSVG = createLogoIcon(microsoftSVG);\nexport const MistralIcon = createLogoIcon(mistralSVG);\nexport const MixedBreadIcon = createLogoIcon(mixedBreadSVG);\nexport const NomicIcon = createLogoIcon(nomicSVG);\nexport const CodaIcon = createLogoIcon(codaIcon);\nexport const NotionIcon = createLogoIcon(notionIcon, { monochromatic: true });\nexport const OCIStorageIcon = createLogoIcon(OCIStorageSVG);\nexport const OllamaIcon = createLogoIcon(ollamaIcon);\nexport const LMStudioIcon = createLogoIcon(lmStudioIcon);\nexport const TestRailIcon = createLogoIcon(testrailSVG);\nexport const OpenAIISVG = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => (\n  <svg\n    fill=\"currentColor\"\n    width={size}\n    style={{ width: `${size}px`, height: `${size}px` }}\n    height={size}\n    className={`w-[${size}px] h-[${size}px] ` + className}\n    viewBox=\"0 0 24 24\"\n    role=\"img\"\n    xmlns=\"http://www.w3.org/2000/svg\"\n  >\n    <path\n      fill=\"currentColor\"\n      d=\"M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z\"\n    />\n  </svg>\n);\nexport const OpenAIIcon = createLogoIcon(openAISVG, { monochromatic: true });\nexport const OpenAISVG = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      viewBox=\"0 0 50 50\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M45.403,25.562c-0.506-1.89-1.518-3.553-2.906-4.862c1.134-2.665,0.963-5.724-0.487-8.237\tc-1.391-2.408-3.636-4.131-6.322-4.851c-1.891-0.506-3.839-0.462-5.669,0.088C28.276,5.382,25.562,4,22.647,4\tc-4.906,0-9.021,3.416-10.116,7.991c-0.01,0.001-0.019-0.003-0.029-0.002c-2.902,0.36-5.404,2.019-6.865,4.549\tc-1.391,2.408-1.76,5.214-1.04,7.9c0.507,1.891,1.519,3.556,2.909,4.865c-1.134,2.666-0.97,5.714,0.484,8.234\tc1.391,2.408,3.636,4.131,6.322,4.851c0.896,0.24,1.807,0.359,2.711,0.359c1.003,0,1.995-0.161,2.957-0.45\tC21.722,44.619,24.425,46,27.353,46c4.911,0,9.028-3.422,10.12-8.003c2.88-0.35,5.431-2.006,6.891-4.535\tC45.754,31.054,46.123,28.248,45.403,25.562z M35.17,9.543c2.171,0.581,3.984,1.974,5.107,3.919c1.049,1.817,1.243,4,0.569,5.967\tc-0.099-0.062-0.193-0.131-0.294-0.19l-9.169-5.294c-0.312-0.179-0.698-0.177-1.01,0.006l-10.198,6.041l-0.052-4.607l8.663-5.001\tC30.733,9.26,33,8.963,35.17,9.543z M29.737,22.195l0.062,5.504l-4.736,2.805l-4.799-2.699l-0.062-5.504l4.736-2.805L29.737,22.195z M14.235,14.412C14.235,9.773,18.009,6,22.647,6c2.109,0,4.092,0.916,5.458,2.488C28,8.544,27.891,8.591,27.787,8.651l-9.17,5.294\tc-0.312,0.181-0.504,0.517-0.5,0.877l0.133,11.851l-4.015-2.258V14.412z M6.528,23.921c-0.581-2.17-0.282-4.438,0.841-6.383\tc1.06-1.836,2.823-3.074,4.884-3.474c-0.004,0.116-0.018,0.23-0.018,0.348V25c0,0.361,0.195,0.694,0.51,0.872l10.329,5.81\tL19.11,34.03l-8.662-5.002C8.502,27.905,7.11,26.092,6.528,23.921z M14.83,40.457c-2.171-0.581-3.984-1.974-5.107-3.919\tc-1.053-1.824-1.249-4.001-0.573-5.97c0.101,0.063,0.196,0.133,0.299,0.193l9.169,5.294c0.154,0.089,0.327,0.134,0.5,0.134\tc0.177,0,0.353-0.047,0.51-0.14l10.198-6.041l0.052,4.607l-8.663,5.001C19.269,40.741,17.001,41.04,14.83,40.457z M35.765,35.588\tc0,4.639-3.773,8.412-8.412,8.412c-2.119,0-4.094-0.919-5.459-2.494c0.105-0.056,0.216-0.098,0.32-0.158l9.17-5.294\tc0.312-0.181,0.504-0.517,0.5-0.877L31.75,23.327l4.015,2.258V35.588z M42.631,32.462c-1.056,1.83-2.84,3.086-4.884,3.483\tc0.004-0.12,0.018-0.237,0.018-0.357V25c0-0.361-0.195-0.694-0.51-0.872l-10.329-5.81l3.964-2.348l8.662,5.002\tc1.946,1.123,3.338,2.937,3.92,5.107C44.053,28.249,43.754,30.517,42.631,32.462z\"\n      />\n    </svg>\n  );\n};\nexport const OpenSourceIcon = createLogoIcon(openSourceIcon);\nexport const OutlineIcon = createLogoIcon(outlinePNG, {\n  sizeAdjustment: 4,\n  classNameAddition: \"-m-0.5\",\n});\nexport const ProductboardIcon = createLogoIcon(productboardIcon);\nexport const QwenIcon = createLogoIcon(qwenSVG);\nexport const R2Icon = createLogoIcon(r2Icon);\nexport const S3Icon = createLogoIcon(s3Icon);\nexport const SalesforceIcon = createLogoIcon(salesforceIcon);\nexport const SharepointIcon = createLogoIcon(sharepointIcon);\nexport const SlabIcon = createLogoIcon(slabLogoIcon);\nexport const OutlookIcon = createLogoIcon(outlookIcon);\nexport const OneDriveIcon = createLogoIcon(oneDriveIcon);\nexport const BoxIcon = createLogoIcon(boxIcon);\nexport const TrelloIcon = createLogoIcon(trelloIcon);\nexport const ServiceNowIcon = createLogoIcon(serviceNowIcon);\nexport const SlackIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M16.923 16.52h-2.39a1.984 1.984 0 0 1-1.973-1.195a2.006 2.006 0 0 1 .47-2.263a1.99 1.99 0 0 1 1.502-.53h4.858a1.978 1.978 0 0 1 1.969 1.63a1.951 1.951 0 0 1-1.147 2.173a2.21 2.21 0 0 1-.876.174c-.8.022-1.601.01-2.413.01m-9.435.501v-2.477a2.003 2.003 0 0 1 .56-1.402a1.987 1.987 0 0 1 1.377-.608a1.942 1.942 0 0 1 1.393.522c.377.352.6.84.62 1.357c.043 1.738.043 3.477 0 5.215A1.94 1.94 0 0 1 10.805 21a1.922 1.922 0 0 1-1.423.495a1.954 1.954 0 0 1-1.359-.614a1.97 1.97 0 0 1-.535-1.395c-.01-.815 0-1.64 0-2.466m8.938-9.963v2.434a1.996 1.996 0 0 1-.524 1.5a1.98 1.98 0 0 1-2.242.469a1.981 1.981 0 0 1-1.078-1.165a1.996 1.996 0 0 1-.106-.804V4.46a1.963 1.963 0 0 1 .605-1.386a1.947 1.947 0 0 1 1.408-.537a1.962 1.962 0 0 1 1.383.602a1.979 1.979 0 0 1 .553 1.408c.011.836 0 1.673 0 2.51M6.97 11.511H4.545a1.962 1.962 0 0 1-1.393-.579a1.978 1.978 0 0 1-.427-2.155a1.978 1.978 0 0 1 1.066-1.07a1.97 1.97 0 0 1 .754-.15h4.923a1.962 1.962 0 0 1 1.392.579a1.98 1.98 0 0 1-1.392 3.375zm4.478-6.171v.902c0 .18-.06.261-.216.261H9.165A1.916 1.916 0 0 1 7.9 5.787a1.929 1.929 0 0 1-.4-1.402c.022-.492.227-.958.574-1.306a1.965 1.965 0 0 1 3.342 1.12c.032.38.032.487.032.832v.214zm-5.009 7.204c.06.813.06 1.63 0 2.444a1.902 1.902 0 0 1-.754 1.18a1.887 1.887 0 0 1-1.356.34a1.988 1.988 0 0 1-1.293-.627a2.003 2.003 0 0 1-.536-1.338a1.96 1.96 0 0 1 .497-1.346c.33-.369.786-.599 1.278-.643c.736-.065 1.471-.01 2.164-.01M17.443 11.5V9.329c.052-.509.299-.977.689-1.305c.39-.329.891-.492 1.399-.455c.522 0 1.023.208 1.392.579a1.981 1.981 0 0 1 0 2.796c-.37.371-.87.58-1.392.58c-.671 0-1.363-.022-2.088-.022m-4.967 6.072c.8-.055 1.603-.055 2.402 0c.488.09.92.367 1.208.773c.286.406.405.908.329 1.4a1.99 1.99 0 0 1-.67 1.264a1.98 1.98 0 0 1-1.343.485a1.922 1.922 0 0 1-1.314-.528a1.937 1.937 0 0 1-.6-1.287c-.044-.695-.012-1.401-.012-2.107\"\n      />\n    </svg>\n  );\n};\nexport const SlackIconSkeleton = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 14 14\"\n    >\n      <g fill=\"none\" stroke=\"currentColor\">\n        <path d=\"M5.5 2a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m6 4a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m-4 6a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0m-6-4a.5.5 0 1 0 1 0a.5.5 0 1 0-1 0\" />\n        <path\n          strokeLinecap=\"round\"\n          strokeLinejoin=\"round\"\n          d=\"M8.793 1.219v4.937m-3.59 1.692v4.937M1.215 5.207h4.937m1.692 3.59h4.937\"\n        />\n      </g>\n    </svg>\n  );\n};\nexport const TeamsIcon = createLogoIcon(teamsIcon);\nexport const VoyageIconSVG = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => (\n  <svg\n    style={{ width: `${size}px`, height: `${size}px` }}\n    className={`w-[${size}px] h-[${size}px] ` + className}\n    xmlns=\"http://www.w3.org/2000/svg\"\n    viewBox=\"0 0 200 200\"\n    width=\"200\"\n    height=\"200\"\n  >\n    <path\n      d=\"M0 0 C18.56364691 14.8685395 31.52865476 35.60458591 34.68359375 59.39453125 C36.85790415 84.17093249 31.86661083 108.64738046 15.83569336 128.38696289 C-0.18749615 147.32766215 -21.13158775 159.50726579 -46 162 C-70.46026633 163.68595557 -94.53744209 157.16585411 -113.375 141.1875 C-131.5680983 125.12913912 -143.31327081 103.12304227 -145.16845703 78.79052734 C-146.52072106 52.74671426 -138.40787353 29.42123969 -121 10 C-120.39929688 9.30519531 -119.79859375 8.61039063 -119.1796875 7.89453125 C-88.7732111 -25.07872563 -34.66251161 -26.29920259 0 0 Z M-111 6 C-111.96292969 6.76441406 -112.92585938 7.52882813 -113.91796875 8.31640625 C-129.12066 21.0326872 -138.48510826 41.64930525 -141 61 C-142.57102569 86.19086606 -137.40498471 109.10013392 -120.54980469 128.68505859 C-106.05757815 144.84161953 -85.8110604 156.92053779 -63.68798828 158.12597656 C-39.72189393 158.83868932 -17.08757891 154.40601729 1.1875 137.6875 C3.15800523 135.82115685 5.07881363 133.91852176 7 132 C8.22396484 130.7934375 8.22396484 130.7934375 9.47265625 129.5625 C26.2681901 112.046746 31.70691205 89.639394 31.3125 66 C30.4579168 43.32505919 19.07700136 22.58412979 3 7 C-29.27431062 -21.68827611 -78.26536136 -21.67509486 -111 6 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(155,29)\"\n    />\n    <path\n      d=\"M0 0 C2.62278901 2.33427271 3.96735488 4.64596813 5.4453125 7.81640625 C6.10080078 9.20956055 6.10080078 9.20956055 6.76953125 10.63085938 C7.21683594 11.59830078 7.66414063 12.56574219 8.125 13.5625 C8.58003906 14.53380859 9.03507812 15.50511719 9.50390625 16.50585938 C10.34430119 18.30011504 11.18198346 20.09564546 12.01611328 21.89282227 C12.65935931 23.27045415 13.32005367 24.64010734 14 26 C12.02 26 10.04 26 8 26 C6.515 22.535 6.515 22.535 5 19 C1.7 19 -1.6 19 -5 19 C-5.99 21.31 -6.98 23.62 -8 26 C-9.32 26 -10.64 26 -12 26 C-10.34176227 20.46347949 -7.92776074 15.38439485 -5.4375 10.1875 C-5.02564453 9.31673828 -4.61378906 8.44597656 -4.18945312 7.54882812 C-1.13502139 1.13502139 -1.13502139 1.13502139 0 0 Z M-1 8 C-3.2013866 11.80427492 -3.2013866 11.80427492 -4 16 C-1.69 16 0.62 16 3 16 C2.43260132 11.87026372 2.43260132 11.87026372 1 8 C0.34 8 -0.32 8 -1 8 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(158,86)\"\n    />\n    <path\n      d=\"M0 0 C2.64453125 1.0234375 2.64453125 1.0234375 4.4453125 4.296875 C4.96971298 5.65633346 5.47294966 7.0241056 5.95703125 8.3984375 C6.22064453 9.08421875 6.48425781 9.77 6.75585938 10.4765625 C7.8687821 13.4482107 8.64453125 15.82826389 8.64453125 19.0234375 C9.30453125 19.0234375 9.96453125 19.0234375 10.64453125 19.0234375 C10.75667969 18.34925781 10.86882813 17.67507812 10.984375 16.98046875 C11.77373626 13.44469078 12.95952974 10.10400184 14.20703125 6.7109375 C14.44099609 6.06576172 14.67496094 5.42058594 14.91601562 4.75585938 C15.48900132 3.17722531 16.06632589 1.60016724 16.64453125 0.0234375 C17.96453125 0.0234375 19.28453125 0.0234375 20.64453125 0.0234375 C20.11164835 5.93359329 17.66052325 10.65458241 15.08203125 15.8984375 C14.65728516 16.77757813 14.23253906 17.65671875 13.79492188 18.5625 C12.75156566 20.71955106 11.70131241 22.87294038 10.64453125 25.0234375 C9.65453125 25.0234375 8.66453125 25.0234375 7.64453125 25.0234375 C6.36851794 22.52596727 5.09866954 20.02565814 3.83203125 17.5234375 C3.29739258 16.47929688 3.29739258 16.47929688 2.75195312 15.4140625 C0.37742917 10.70858383 -1.58321849 5.98797449 -3.35546875 1.0234375 C-2.35546875 0.0234375 -2.35546875 0.0234375 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(23.35546875,86.9765625)\"\n    />\n    <path\n      d=\"M0 0 C4.56944444 2.13888889 4.56944444 2.13888889 6 5 C6.58094684 9.76376411 6.98189835 13.6696861 4.0625 17.625 C-0.08290736 19.4862033 -3.52913433 19.80184004 -8 19 C-11.18487773 17.20850628 -12.56721386 16.06753914 -13.9375 12.6875 C-14.04047475 8.25958558 -13.25966827 4.50191217 -10.375 1.0625 C-6.92547207 -0.48070986 -3.67744273 -0.55453501 0 0 Z M-7.66796875 3.21484375 C-9.3387892 5.45403713 -9.40271257 6.72874309 -9.375 9.5 C-9.38273437 10.2734375 -9.39046875 11.046875 -9.3984375 11.84375 C-8.90844456 14.49547648 -8.12507645 15.38331504 -6 17 C-3.17884512 17.42317323 -1.66049093 17.38718434 0.8125 15.9375 C2.65621741 12.92932949 2.30257262 10.44932782 2 7 C1.54910181 4.59436406 1.54910181 4.59436406 0 3 C-4.00690889 1.63330935 -4.00690889 1.63330935 -7.66796875 3.21484375 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(58,93)\"\n    />\n    <path\n      d=\"M0 0 C0.91007812 0.00902344 1.82015625 0.01804687 2.7578125 0.02734375 C3.45648438 0.03894531 4.15515625 0.05054687 4.875 0.0625 C5.205 1.3825 5.535 2.7025 5.875 4.0625 C4.6375 3.815 3.4 3.5675 2.125 3.3125 C-1.0391959 2.93032359 -1.83705309 2.89394571 -4.6875 4.5625 C-6.71059726 8.08093001 -6.12332701 10.21181009 -5.125 14.0625 C-3.22744856 16.41223818 -3.22744856 16.41223818 0 16.1875 C0.94875 16.14625 1.8975 16.105 2.875 16.0625 C2.875 14.4125 2.875 12.7625 2.875 11.0625 C4.525 11.3925 6.175 11.7225 7.875 12.0625 C8.1875 14.375 8.1875 14.375 7.875 17.0625 C5.25185816 19.29988569 3.33979578 19.9932751 -0.0625 20.5 C-3.96030088 19.9431713 -6.06489651 18.49667323 -9.125 16.0625 C-11.6165904 12.3251144 -11.58293285 10.48918417 -11.125 6.0625 C-7.83836921 1.02299945 -5.86190884 -0.07515268 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(113.125,92.9375)\"\n    />\n    <path\n      d=\"M0 0 C4.28705043 1.42901681 5.23208702 4.57025431 7.1875 8.375 C7.55552734 9.06078125 7.92355469 9.7465625 8.30273438 10.453125 C11 15.59744608 11 15.59744608 11 19 C9.35 19 7.7 19 6 19 C5.67 17.68 5.34 16.36 5 15 C2.03 14.67 -0.94 14.34 -4 14 C-4.33 15.65 -4.66 17.3 -5 19 C-5.99 19 -6.98 19 -8 19 C-7.38188466 14.44684052 -5.53234107 10.71540233 -3.4375 6.6875 C-2.9434668 5.71973633 -2.9434668 5.71973633 -2.43945312 4.73242188 C-1.63175745 3.15214772 -0.81662387 1.57567895 0 0 Z M0 6 C-0.33 7.65 -0.66 9.3 -1 11 C0.32 11 1.64 11 3 11 C2.34 9.35 1.68 7.7 1 6 C0.67 6 0.34 6 0 6 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(90,93)\"\n    />\n    <path\n      d=\"M0 0 C3.63 0 7.26 0 11 0 C11 0.66 11 1.32 11 2 C8.69 2 6.38 2 4 2 C4 3.98 4 5.96 4 8 C5.98 8 7.96 8 10 8 C9.67 8.99 9.34 9.98 9 11 C7.68 11 6.36 11 5 11 C4.67 12.98 4.34 14.96 4 17 C7.465 16.505 7.465 16.505 11 16 C11 16.99 11 17.98 11 19 C7.37 19 3.74 19 0 19 C0 12.73 0 6.46 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(124,93)\"\n    />\n    <path\n      d=\"M0 0 C2.25 -0.3125 2.25 -0.3125 5 0 C9 4.10810811 9 4.10810811 9 7 C9.78375 6.21625 10.5675 5.4325 11.375 4.625 C12.91666667 3.08333333 14.45833333 1.54166667 16 0 C16.99 0 17.98 0 19 0 C17.84356383 2.5056117 16.63134741 4.4803655 14.9375 6.6875 C12.52118995 10.81861073 12.20924288 14.29203528 12 19 C10.68 19 9.36 19 8 19 C8.00902344 18.443125 8.01804687 17.88625 8.02734375 17.3125 C7.78294047 11.0217722 5.92390505 8.0388994 1.49609375 3.62890625 C0 2 0 2 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(64,93)\"\n    />\n    <path\n      d=\"M0 0 C1.32 0 2.64 0 4 0 C4 8.25 4 16.5 4 25 C2.68 25 1.36 25 0 25 C0 16.75 0 8.5 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(173,87)\"\n    />\n    <path\n      d=\"M0 0 C0.66 0.33 1.32 0.66 2 1 C1.125 5.75 1.125 5.75 0 8 C1.093125 7.95875 2.18625 7.9175 3.3125 7.875 C7 8 7 8 10 10 C4.555 10.495 4.555 10.495 -1 11 C-1.99 13.31 -2.98 15.62 -4 18 C-5.32 18 -6.64 18 -8 18 C-6.65150163 13.64029169 -4.95092154 9.68658562 -2.875 5.625 C-2.33617187 4.56539063 -1.79734375 3.50578125 -1.2421875 2.4140625 C-0.83226562 1.61742188 -0.42234375 0.82078125 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(154,94)\"\n    />\n    <path\n      d=\"M0 0 C0.66 0.33 1.32 0.66 2 1 C2 1.66 2 2.32 2 3 C1.34 3 0.68 3 0 3 C-0.05429959 4.74965358 -0.09292823 6.49979787 -0.125 8.25 C-0.14820313 9.22453125 -0.17140625 10.1990625 -0.1953125 11.203125 C0.00137219 14.0196498 0.55431084 15.60949036 2 18 C1.34 18.33 0.68 18.66 0 19 C-4.69653179 15.74855491 -4.69653179 15.74855491 -5.9375 12.6875 C-6.02161912 9.07037805 -5.30970069 6.36780178 -4 3 C-1.875 1.0625 -1.875 1.0625 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(50,93)\"\n    />\n    <path\n      d=\"M0 0 C2.79192205 -0.05380578 5.5828141 -0.09357669 8.375 -0.125 C9.1690625 -0.14175781 9.963125 -0.15851563 10.78125 -0.17578125 C12.85492015 -0.19335473 14.92883241 -0.10335168 17 0 C17.66 0.66 18.32 1.32 19 2 C17 4 17 4 13.0859375 4.1953125 C11.51550649 4.18200376 9.94513779 4.15813602 8.375 4.125 C7.57320312 4.11597656 6.77140625 4.10695312 5.9453125 4.09765625 C3.96341477 4.07406223 1.98167019 4.03819065 0 4 C0 2.68 0 1.36 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(92,187)\"\n    />\n    <path\n      d=\"M0 0 C0.99 0.33 1.98 0.66 3 1 C1.66666667 4.33333333 0.33333333 7.66666667 -1 11 C0.65 11 2.3 11 4 11 C4 11.33 4 11.66 4 12 C1.36 12.33 -1.28 12.66 -4 13 C-4.33 14.98 -4.66 16.96 -5 19 C-5.99 19 -6.98 19 -8 19 C-7.38188466 14.44684052 -5.53234107 10.71540233 -3.4375 6.6875 C-2.9434668 5.71973633 -2.9434668 5.71973633 -2.43945312 4.73242188 C-1.63175745 3.15214772 -0.81662387 1.57567895 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(90,93)\"\n    />\n    <path\n      d=\"M0 0 C0.99 0 1.98 0 3 0 C2.43454163 3.95820859 1.19097652 6.6659053 -1 10 C-1.66 9.67 -2.32 9.34 -3 9 C-2.44271087 5.65626525 -1.64826111 2.96687001 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(37,97)\"\n    />\n    <path\n      d=\"M0 0 C4.92127034 -0.16682272 8.50343896 -0.24828052 13 2 C9.60268371 4.09065618 6.95730595 4.42098999 3 4 C1.125 2.5625 1.125 2.5625 0 1 C0 0.67 0 0.34 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(110,12)\"\n    />\n    <path\n      d=\"M0 0 C0 0.99 0 1.98 0 3 C-3.08888522 5.05925681 -3.70935927 5.2390374 -7.1875 5.125 C-9.0746875 5.063125 -9.0746875 5.063125 -11 5 C-10.67 4.34 -10.34 3.68 -10 3 C-7.96875 2.40234375 -7.96875 2.40234375 -5.5 1.9375 C-2.46226779 1.54135157 -2.46226779 1.54135157 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(62,107)\"\n    />\n    <path\n      d=\"M0 0 C0.66 0.33 1.32 0.66 2 1 C1.25 5.75 1.25 5.75 -1 8 C-1.66 8 -2.32 8 -3 8 C-1.125 1.125 -1.125 1.125 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(154,94)\"\n    />\n    <path\n      d=\"M0 0 C2.64 0 5.28 0 8 0 C8.33 1.32 8.66 2.64 9 4 C6.03 3.01 3.06 2.02 0 1 C0 0.67 0 0.34 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(110,93)\"\n    />\n    <path\n      d=\"M0 0 C1.67542976 0.28604898 3.34385343 0.61781233 5 1 C4.67 2.32 4.34 3.64 4 5 C2.0625 4.6875 2.0625 4.6875 0 4 C-0.33 3.01 -0.66 2.02 -1 1 C-0.67 0.67 -0.34 0.34 0 0 Z \"\n      fill=\"currentColor\"\n      transform=\"translate(21,87)\"\n    />\n  </svg>\n);\nexport const WikipediaIcon = createLogoIcon(wikipediaIcon);\nexport const XenforoIcon = createLogoIcon(xenforoIcon);\nexport const ZAIIcon = createLogoIcon(zAIIcon);\nexport const ZendeskIcon = ({\n  size = 16,\n  className = defaultTailwindCSS,\n}: IconProps) => (\n  <div\n    className=\"rounded-full overflow-visible dark:overflow-hidden flex items-center justify-center dark:bg-[#fff]/90\"\n    style={{ width: size, height: size }}\n  >\n    <LogoIcon\n      size={\n        typeof window !== \"undefined\" &&\n        window.matchMedia(\"(prefers-color-scheme: dark)\").matches\n          ? size * 0.8\n          : size\n      }\n      className={`${className}`}\n      src={zendeskIcon}\n    />\n  </div>\n);\nexport const ZulipIcon = createLogoIcon(zulipIcon);\n\n// ============================================================================\n// FILE TYPE ICONS (Alphabetically)\n// ============================================================================\nexport const DOCIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 24 24\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M15.5,17H14L12,9.5L10,17H8.5L6.1,7H7.8L9.34,14.5L11.3,7H12.7L14.67,14.5L16.2,7H17.9M19,3H5C3.89,3 3,3.89 3,5V19A2,2 0 0,0 5,21H19A2,2 0 0,0 21,19V5C21,3.89 20.1,3 19,3Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const HTMLIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-orange-600 w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"24\"\n      height=\"24\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path d=\"M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8l-6-6zm-1 2 5 5h-5V4zM8.531 18h-.76v-1.411H6.515V18h-.767v-3.373h.767v1.296h1.257v-1.296h.76V18zm3-2.732h-.921V18h-.766v-2.732h-.905v-.641h2.592v.641zM14.818 18l-.05-1.291c-.017-.405-.03-.896-.03-1.387h-.016c-.104.431-.245.911-.375 1.307l-.41 1.316h-.597l-.359-1.307a15.154 15.154 0 0 1-.306-1.316h-.011c-.021.456-.034.976-.059 1.396L12.545 18h-.705l.216-3.373h1.015l.331 1.126c.104.391.21.811.284 1.206h.017c.095-.391.209-.836.32-1.211l.359-1.121h.996L15.563 18h-.745zm3.434 0h-2.108v-3.373h.767v2.732h1.342V18z\"></path>\n    </svg>\n  );\n};\nexport const ImagesIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 24 24\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M19 3H5C3.9 3 3 3.9 3 5V19C3 20.1 3.9 21 5 21H19C20.1 21 21 20.1 21 19V5C21 3.9 20.1 3 19 3M9 11.5C9 12.3 8.3 13 7.5 13H6.5V15H5V9H7.5C8.3 9 9 9.7 9 10.5V11.5M14 15H12.5L11.5 12.5V15H10V9H11.5L12.5 11.5V9H14V15M19 10.5H16.5V13.5H17.5V12H19V13.7C19 14.4 18.5 15 17.7 15H16.4C15.6 15 15.1 14.3 15.1 13.7V10.4C15 9.7 15.5 9 16.3 9H17.6C18.4 9 18.9 9.7 18.9 10.3V10.5H19M6.5 10.5H7.5V11.5H6.5V10.5Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const JSONIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-yellow-500 w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"200\"\n      height=\"200\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path\n        fill=\"currentColor\"\n        d=\"M5 3h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2m3.25 8a1.25 1.25 0 1 0-2.5 0v2a1.25 1.25 0 1 0 2.5 0v-2m4.25-1.25a1.25 1.25 0 0 0-1.25 1.25v2a1.25 1.25 0 1 0 2.5 0v-2a1.25 1.25 0 0 0-1.25-1.25m4.25 1.25a1.25 1.25 0 1 0-2.5 0v2a1.25 1.25 0 1 0 2.5 0v-2z\"\n      />\n    </svg>\n  );\n};\nexport const PDFIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-red-500 w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 24 24\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M19 3H5C3.9 3 3 3.9 3 5V19C3 20.1 3.9 21 5 21H19C20.1 21 21 20.1 21 19V5C21 3.9 20.1 3 19 3M9.5 11.5C9.5 12.3 8.8 13 8 13H7V15H5.5V9H8C8.8 9 9.5 9.7 9.5 10.5V11.5M14.5 13.5C14.5 14.3 13.8 15 13 15H10.5V9H13C13.8 9 14.5 9.7 14.5 10.5V13.5M18.5 10.5H17V11.5H18.5V13H17V15H15.5V9H18.5V10.5M12 10.5H13V13.5H12V10.5M7 10.5H8V11.5H7V10.5Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\nexport const TXTIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-blue-600 w-[${size}px] h-[${size}px] ` + className}\n      xmlns=\"http://www.w3.org/2000/svg\"\n      width=\"24\"\n      height=\"24\"\n      fill=\"currentColor\"\n      viewBox=\"0 0 24 24\"\n    >\n      <path d=\"M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8l-6-6zM9.998 14.768H8.895v3.274h-.917v-3.274H6.893V14h3.105v.768zm2.725 3.274-.365-.731c-.15-.282-.246-.492-.359-.726h-.013c-.083.233-.185.443-.312.726l-.335.731h-1.045l1.171-2.045L10.336 14h1.05l.354.738c.121.245.21.443.306.671h.013c.096-.258.174-.438.276-.671l.341-.738h1.043l-1.139 1.973 1.198 2.069h-1.055zm4.384-3.274h-1.104v3.274h-.917v-3.274h-1.085V14h3.105v.768zM14 9h-1V4l5 5h-4z\"></path>\n    </svg>\n  );\n};\nexport const XMLIcon = ({\n  size = 24,\n  className = defaultTailwindCSS,\n}: IconProps) => {\n  return (\n    <svg\n      style={{ width: `${size}px`, height: `${size}px` }}\n      className={`text-teal-500 w-[${size}px] h-[${size}px] ` + className}\n      viewBox=\"0 0 24 24\"\n      xmlns=\"http://www.w3.org/2000/svg\"\n    >\n      <path\n        d=\"M19 3H5C3.89 3 3 3.89 3 5V19C3 20.11 3.89 21 5 21H19C20.11 21 21 20.11 21 19V5C21 3.89 20.11 3 19 3M8 15H6.5L6 13L5.5 15H4L4.75 12L4 9H5.5L6 11L6.5 9H8L7.25 12L8 15M15.5 15H14V10.5H13V14H11.5V10.5H10.5V15H9V11C9 9.9 9.9 9 11 9H13.5C14.61 9 15.5 9.9 15.5 11V15M20 15H17V9H18.5V13.5H20V15Z\"\n        fill=\"currentColor\"\n      />\n    </svg>\n  );\n};\n"
  },
  {
    "path": "web/src/components/llm/LLMSelector.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { parseLlmDescriptor, structureValue } from \"@/lib/llmConfig/utils\";\nimport { DefaultModel, LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { getProviderIcon } from \"@/app/admin/configuration/llm/utils\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { createIcon } from \"@/components/icons/icons\";\n\ninterface LLMOption {\n  name: string;\n  value: string;\n  icon: ReturnType<typeof getProviderIcon>;\n  modelName: string;\n  providerName: string;\n  provider: string;\n  providerDisplayName: string;\n  supportsImageInput: boolean;\n  vendor: string | null;\n}\n\nexport interface LLMSelectorProps {\n  name?: string;\n  userSettings?: boolean;\n  llmProviders: LLMProviderDescriptor[];\n  defaultText?: DefaultModel | null;\n  currentLlm: string | null;\n  onSelect: (value: string | null) => void;\n  requiresImageGeneration?: boolean;\n  excludePublicProviders?: boolean;\n}\n\nexport default function LLMSelector({\n  name,\n  userSettings,\n  llmProviders,\n  defaultText,\n  currentLlm,\n  onSelect,\n  requiresImageGeneration,\n  excludePublicProviders = false,\n}: LLMSelectorProps) {\n  const currentDescriptor = useMemo(\n    () => (currentLlm ? parseLlmDescriptor(currentLlm) : null),\n    [currentLlm]\n  );\n\n  const llmOptions = useMemo(() => {\n    const seenKeys = new Set<string>();\n    const options: LLMOption[] = [];\n\n    llmProviders.forEach((provider) => {\n      provider.model_configurations.forEach((modelConfiguration) => {\n        // Use the display name if it is available, otherwise use the model name\n        const displayName =\n          modelConfiguration.display_name || modelConfiguration.name;\n\n        const matchesCurrentSelection =\n          currentDescriptor?.modelName === modelConfiguration.name &&\n          (currentDescriptor?.provider === provider.provider ||\n            currentDescriptor?.name === provider.name);\n\n        if (!modelConfiguration.is_visible && !matchesCurrentSelection) {\n          return;\n        }\n\n        const key = `${provider.provider}:${modelConfiguration.name}`;\n        if (seenKeys.has(key)) {\n          return; // Skip exact duplicate\n        }\n        seenKeys.add(key);\n\n        const supportsImageInput =\n          modelConfiguration.supports_image_input || false;\n\n        // If the model does not support image input and we require image generation, skip it\n        if (requiresImageGeneration && !supportsImageInput) {\n          return;\n        }\n\n        const option: LLMOption = {\n          name: displayName,\n          value: structureValue(\n            provider.name,\n            provider.provider,\n            modelConfiguration.name\n          ),\n          icon: getProviderIcon(provider.provider, modelConfiguration.name),\n          modelName: modelConfiguration.name,\n          providerName: provider.name,\n          provider: provider.provider,\n          providerDisplayName:\n            provider.provider_display_name || provider.provider,\n          supportsImageInput,\n          vendor: modelConfiguration.vendor || null,\n        };\n\n        options.push(option);\n      });\n    });\n\n    return options;\n  }, [\n    llmProviders,\n    currentDescriptor?.modelName,\n    currentDescriptor?.provider,\n    currentDescriptor?.name,\n    requiresImageGeneration,\n  ]);\n\n  // Group options by provider using backend-provided display names\n  const groupedOptions = useMemo(() => {\n    const groups = new Map<\n      string,\n      { displayName: string; options: LLMOption[] }\n    >();\n\n    llmOptions.forEach((option) => {\n      const provider = option.provider.toLowerCase();\n      if (!groups.has(provider)) {\n        groups.set(provider, {\n          displayName: option.providerDisplayName,\n          options: [],\n        });\n      }\n      groups.get(provider)!.options.push(option);\n    });\n\n    // Sort groups alphabetically by display name\n    const sortedProviders = Array.from(groups.keys()).sort((a, b) =>\n      groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)\n    );\n\n    return sortedProviders.map((provider) => {\n      const group = groups.get(provider)!;\n      return {\n        provider,\n        displayName: group.displayName,\n        options: group.options,\n      };\n    });\n  }, [llmOptions]);\n\n  const defaultProvider = defaultText\n    ? llmProviders.find((p) => p.id === defaultText.provider_id)\n    : undefined;\n\n  const defaultModelName = defaultText?.model_name;\n  const defaultModelConfig = defaultProvider?.model_configurations.find(\n    (m) => m.name === defaultModelName\n  );\n  const defaultModelDisplayName = defaultModelConfig\n    ? defaultModelConfig.display_name || defaultModelConfig.name\n    : defaultModelName || null;\n  const defaultLabel = userSettings ? \"System Default\" : \"User Default\";\n\n  // Determine if we should show grouped view (only if we have multiple vendors)\n  const showGrouped = groupedOptions.length > 1;\n\n  return (\n    <InputSelect\n      value={currentLlm ? currentLlm : \"default\"}\n      onValueChange={(value) => onSelect(value === \"default\" ? null : value)}\n    >\n      <InputSelect.Trigger id={name} name={name} placeholder={defaultLabel} />\n\n      <InputSelect.Content>\n        {!excludePublicProviders && (\n          <InputSelect.Item\n            value=\"default\"\n            description={\n              userSettings && defaultModelDisplayName\n                ? `(${defaultModelDisplayName})`\n                : undefined\n            }\n          >\n            {defaultLabel}\n          </InputSelect.Item>\n        )}\n        {showGrouped\n          ? groupedOptions.map((group) => (\n              <InputSelect.Group key={group.provider}>\n                <InputSelect.Label>{group.displayName}</InputSelect.Label>\n                {group.options.map((option) => (\n                  <InputSelect.Item\n                    key={option.value}\n                    value={option.value}\n                    icon={createIcon(option.icon)}\n                  >\n                    {option.name}\n                  </InputSelect.Item>\n                ))}\n              </InputSelect.Group>\n            ))\n          : llmOptions.map((option) => (\n              <InputSelect.Item\n                key={option.value}\n                value={option.value}\n                icon={createIcon(option.icon)}\n              >\n                {option.name}\n              </InputSelect.Item>\n            ))}\n      </InputSelect.Content>\n    </InputSelect>\n  );\n}\n"
  },
  {
    "path": "web/src/components/loading.css",
    "content": ".loading {\n  font-size: 1.5rem;\n  font-weight: bold;\n}\n\n.dots {\n  animation: blink 1s linear infinite;\n}\n\n@keyframes blink {\n  0%,\n  100% {\n    opacity: 1;\n  }\n  50% {\n    opacity: 0.5;\n  }\n}\n"
  },
  {
    "path": "web/src/components/modals/AddInstructionModal.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { SvgAddLines } from \"@opal/icons\";\nimport Modal from \"@/refresh-components/Modal\";\n\nexport default function AddInstructionModal() {\n  const modal = useModal();\n  const { currentProjectDetails, upsertInstructions } = useProjectsContext();\n  const [instructionText, setInstructionText] = useState(\"\");\n\n  useEffect(() => {\n    if (!modal.isOpen) return;\n    const preset = currentProjectDetails?.project?.instructions ?? \"\";\n    setInstructionText(preset);\n  }, [modal.isOpen, currentProjectDetails?.project?.instructions]);\n\n  async function handleSubmit() {\n    const value = instructionText.trim();\n    try {\n      await upsertInstructions(value);\n    } catch (e) {\n      console.error(\"Failed to save instructions\", e);\n    }\n    modal.toggle(false);\n  }\n\n  return (\n    <Modal open={modal.isOpen} onOpenChange={modal.toggle}>\n      <Modal.Content width=\"sm\">\n        <Modal.Header\n          icon={SvgAddLines}\n          title=\"Set Project Instructions\"\n          description=\"Specify the behaviors or tone for the chat sessions in this project.\"\n          onClose={() => modal.toggle(false)}\n        />\n        <Modal.Body>\n          <InputTextArea\n            value={instructionText}\n            onChange={(event) => setInstructionText(event.target.value)}\n            placeholder=\"My goal with is to... be sure to... in your responses.\"\n          />\n        </Modal.Body>\n        <Modal.Footer>\n          <Button prominence=\"secondary\" onClick={() => modal.toggle(false)}>\n            Cancel\n          </Button>\n          <Button onClick={handleSubmit}>Save Instructions</Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/ConfirmEntityModal.tsx",
    "content": "import Modal from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgAlertCircle } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\n\nexport interface ConfirmEntityModalProps {\n  danger?: boolean;\n\n  onClose: () => void;\n  onSubmit: () => void;\n\n  icon?: React.FunctionComponent<IconProps>;\n\n  entityType: string;\n  entityName: string;\n\n  additionalDetails?: string;\n\n  action?: string;\n  actionButtonText?: string;\n\n  removeConfirmationText?: boolean;\n}\n\nexport function ConfirmEntityModal({\n  danger,\n\n  onClose,\n  onSubmit,\n\n  icon: Icon,\n\n  entityType,\n  entityName,\n\n  additionalDetails,\n\n  action,\n  actionButtonText,\n\n  removeConfirmationText = false,\n}: ConfirmEntityModalProps) {\n  const buttonText = actionButtonText\n    ? actionButtonText\n    : danger\n      ? \"Delete\"\n      : \"Confirm\";\n  const actionText = action ? action : danger ? \"delete\" : \"modify\";\n\n  return (\n    <Modal\n      icon={Icon || SvgAlertCircle}\n      title={`${buttonText} ${entityType}`}\n      onClose={onClose}\n      submit={\n        <Button variant={danger ? \"danger\" : \"default\"} onClick={onSubmit}>\n          {buttonText}\n        </Button>\n      }\n    >\n      <div className=\"flex flex-col gap-4\">\n        {!removeConfirmationText && (\n          <Text as=\"p\">\n            Are you sure you want to {actionText} <b>{entityName}</b>?\n          </Text>\n        )}\n\n        {additionalDetails && (\n          <Text as=\"p\" text03>\n            {additionalDetails}\n          </Text>\n        )}\n      </div>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/CreateProjectModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { useKeyPress } from \"@/hooks/useKeyPress\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { SvgFolderPlus } from \"@opal/icons\";\nimport Modal from \"@/refresh-components/Modal\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { toast } from \"@/hooks/useToast\";\n\ninterface CreateProjectModalProps {\n  initialProjectName?: string;\n}\n\nexport default function CreateProjectModal({\n  initialProjectName,\n}: CreateProjectModalProps) {\n  const { createProject } = useProjectsContext();\n  const modal = useModal();\n  const route = useAppRouter();\n  const [projectName, setProjectName] = useState(initialProjectName ?? \"\");\n\n  // Reset when prop changes (modal reopens with different value)\n  useEffect(() => {\n    setProjectName(initialProjectName ?? \"\");\n  }, [initialProjectName]);\n\n  async function handleSubmit() {\n    const name = projectName.trim();\n    if (!name) return;\n\n    try {\n      const newProject = await createProject(name);\n      route({ projectId: newProject.id });\n      modal.toggle(false);\n    } catch (e) {\n      toast.error(`Failed to create the project ${name}`);\n    }\n  }\n\n  useKeyPress(handleSubmit, \"Enter\");\n\n  return (\n    <>\n      <Modal open={modal.isOpen} onOpenChange={modal.toggle}>\n        <Modal.Content width=\"sm\">\n          <Modal.Header\n            icon={SvgFolderPlus}\n            title=\"Create New Project\"\n            description=\"Use projects to organize your files and chats in one place, and add custom instructions for ongoing work.\"\n            onClose={() => modal.toggle(false)}\n          />\n          <Modal.Body>\n            <InputLayouts.Vertical title=\"Project Name\">\n              <InputTypeIn\n                value={projectName}\n                onChange={(e) => setProjectName(e.target.value)}\n                placeholder=\"What are you working on?\"\n                showClearButton\n              />\n            </InputLayouts.Vertical>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button prominence=\"secondary\" onClick={() => modal.toggle(false)}>\n              Cancel\n            </Button>\n            <Button disabled={!projectName.trim()} onClick={handleSubmit}>\n              Create Project\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/EditPropertyModal.tsx",
    "content": "import { Formik, Form } from \"formik\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { TextFormField } from \"@/components/Field\";\nimport { SvgEdit } from \"@opal/icons\";\nexport interface EditPropertyModalProps {\n  propertyTitle: string;\n  propertyDetails?: string;\n  propertyName: string;\n  propertyValue: string;\n  validationSchema: any;\n  onClose: () => void;\n  onSubmit: (propertyName: string, propertyValue: string) => Promise<void>;\n}\n\nexport default function EditPropertyModal({\n  propertyTitle, // A friendly title to be displayed for the property\n  propertyDetails, // a helpful description of the property to be displayed, (Valid ranges, units, etc)\n  propertyName, // the programmatic property name\n  propertyValue, // the programmatic property value (current)\n  validationSchema, // Allow custom Yup schemas ... set on \"propertyValue\"\n  onClose,\n  onSubmit,\n}: EditPropertyModalProps) {\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\">\n        <Modal.Header\n          icon={SvgEdit}\n          title={`Edit ${propertyTitle}`}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          <Formik\n            initialValues={{\n              propertyName: propertyName,\n              propertyValue: propertyValue,\n            }}\n            validationSchema={validationSchema}\n            onSubmit={(values) => {\n              onSubmit(values.propertyName, values.propertyValue);\n              onClose();\n            }}\n          >\n            {({ isSubmitting, isValid, values }) => (\n              <Form className=\"w-full\">\n                <TextFormField\n                  vertical\n                  label={propertyDetails || \"\"}\n                  name=\"propertyValue\"\n                  placeholder=\"Property value\"\n                />\n\n                <Modal.Footer>\n                  <Button\n                    disabled={\n                      isSubmitting ||\n                      !isValid ||\n                      values.propertyValue === propertyValue\n                    }\n                    type=\"submit\"\n                  >\n                    {isSubmitting ? \"Updating...\" : \"Update property\"}\n                  </Button>\n                </Modal.Footer>\n              </Form>\n            )}\n          </Formik>\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/GenericConfirmModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgCheck } from \"@opal/icons\";\nexport interface GenericConfirmModalProps {\n  title: string;\n  message: string;\n  confirmText?: string;\n  onClose: () => void;\n  onConfirm: () => void;\n}\n\nexport default function GenericConfirmModal({\n  title,\n  message,\n  confirmText = \"Confirm\",\n  onClose,\n  onConfirm,\n}: GenericConfirmModalProps) {\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header icon={SvgCheck} title={title} onClose={onClose} />\n        <Modal.Body>\n          <Text as=\"p\">{message}</Text>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button onClick={onConfirm}>{confirmText}</Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/MoveCustomAgentChatModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Button } from \"@opal/components\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgAlertCircle } from \"@opal/icons\";\ninterface MoveCustomAgentChatModalProps {\n  onCancel: () => void;\n  onConfirm: (doNotShowAgain: boolean) => void;\n}\n\nexport default function MoveCustomAgentChatModal({\n  onCancel,\n  onConfirm,\n}: MoveCustomAgentChatModalProps) {\n  const [doNotShowAgain, setDoNotShowAgain] = useState(false);\n\n  return (\n    <ConfirmationModalLayout\n      icon={SvgAlertCircle}\n      title=\"Move Custom Agent Chat\"\n      onClose={onCancel}\n      submit={\n        <Button onClick={() => onConfirm(doNotShowAgain)}>Confirm Move</Button>\n      }\n    >\n      <div className=\"flex flex-col gap-4\">\n        <Text as=\"p\" text03>\n          This chat uses a <b>custom agent</b> and moving it to a <b>project</b>{\" \"}\n          will not override the agent&apos;s prompt or knowledge configurations.\n          This should only be used for organization purposes.\n        </Text>\n        <div className=\"flex items-center gap-1\">\n          <Checkbox\n            id=\"move-custom-agent-do-not-show\"\n            checked={doNotShowAgain}\n            onCheckedChange={(checked) => setDoNotShowAgain(Boolean(checked))}\n          />\n          <label\n            htmlFor=\"move-custom-agent-do-not-show\"\n            className=\"text-text-03 text-sm\"\n          >\n            Do not show this again\n          </label>\n        </div>\n      </div>\n    </ConfirmationModalLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/NewTeamModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { Dialog } from \"@headlessui/react\";\nimport { Button } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useModalContext } from \"../context/ModalContext\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport {\n  SvgArrowRight,\n  SvgArrowUp,\n  SvgCheckCircle,\n  SvgOrganization,\n  SvgPlus,\n} from \"@opal/icons\";\nexport interface TenantByDomainResponse {\n  tenant_id: string;\n  number_of_users: number;\n  creator_email: string;\n}\n\nexport default function NewTeamModal() {\n  const { showNewTeamModal, setShowNewTeamModal } = useModalContext();\n  const [existingTenant, setExistingTenant] =\n    useState<TenantByDomainResponse | null>(null);\n  const [isLoading, setIsLoading] = useState(true);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [hasRequestedInvite, setHasRequestedInvite] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const { user } = useUser();\n  const appDomain = user?.email.split(\"@\")[1];\n  const router = useRouter();\n  const searchParams = useSearchParams();\n\n  useEffect(() => {\n    const hasNewTeamParam = searchParams?.has(\"new_team\");\n    if (hasNewTeamParam) {\n      setShowNewTeamModal(true);\n      fetchTenantInfo();\n\n      // Remove the new_team parameter from the URL without page reload\n      const newParams = new URLSearchParams(searchParams?.toString() || \"\");\n      newParams.delete(\"new_team\");\n      const newUrl =\n        window.location.pathname +\n        (newParams.toString() ? `?${newParams.toString()}` : \"\");\n      window.history.replaceState({}, \"\", newUrl);\n    }\n  }, [searchParams, setShowNewTeamModal]);\n\n  const fetchTenantInfo = async () => {\n    setIsLoading(true);\n    setError(null);\n\n    try {\n      const response = await fetch(\"/api/tenants/existing-team-by-domain\");\n      if (!response.ok) {\n        throw new Error(`Failed to fetch team info: ${response.status}`);\n      }\n      const responseJson = await response.json();\n      if (!responseJson) {\n        setShowNewTeamModal(false);\n        setExistingTenant(null);\n        return;\n      }\n\n      const data = responseJson as TenantByDomainResponse;\n      setExistingTenant(data);\n    } catch (error) {\n      console.error(\"Failed to fetch tenant info:\", error);\n      setError(\"Could not retrieve team information. Please try again later.\");\n    } finally {\n      setIsLoading(false);\n    }\n  };\n\n  const handleRequestInvite = async () => {\n    if (!existingTenant) return;\n\n    setIsSubmitting(true);\n    setError(null);\n\n    try {\n      const response = await fetch(\"/api/tenants/users/invite/request\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ tenant_id: existingTenant.tenant_id }),\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json().catch(() => ({}));\n        throw new Error(\n          errorData.detail || errorData.message || \"Failed to request invite\"\n        );\n      }\n\n      setHasRequestedInvite(true);\n      toast.success(\"Your invite request has been sent to the team admin.\");\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Failed to request an invite\";\n      setError(message);\n      toast.error(message);\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  const handleContinueToNewOrg = () => {\n    const newUrl = window.location.pathname;\n    router.replace(newUrl as Route);\n    setShowNewTeamModal(false);\n  };\n\n  // Update the close handler to use the context\n  const handleClose = () => {\n    setShowNewTeamModal(false);\n  };\n\n  // Only render if showNewTeamModal is true\n  if (!showNewTeamModal || isLoading) return null;\n\n  return (\n    <Dialog\n      open={showNewTeamModal}\n      onClose={handleClose}\n      className=\"relative z-[1000]\"\n    >\n      {/* Modal backdrop */}\n      <div className=\"fixed inset-0 bg-mask-03\" aria-hidden=\"true\" />\n\n      <div className=\"fixed inset-0 flex items-center justify-center p-4\">\n        <Dialog.Panel className=\"mx-auto w-full max-w-md rounded-lg bg-background-neutral-00 p-6 shadow-xl border\">\n          <Dialog.Title className=\"text-xl font-semibold mb-4 flex items-center\">\n            {hasRequestedInvite ? (\n              <>\n                <SvgCheckCircle className=\"mr-2 h-5 w-5 stroke-text-05\" />\n                Join Request Sent\n              </>\n            ) : (\n              <>\n                <SvgOrganization className=\"mr-2 h-5 w-5 stroke-text-04\" />\n                We found an existing team for {appDomain}\n              </>\n            )}\n          </Dialog.Title>\n\n          {isLoading ? (\n            <div className=\"py-8 text-center\">\n              <div className=\"animate-spin rounded-full h-8 w-8 border-b-2 border-border-05 mx-auto mb-4\"></div>\n              <p>Loading team information...</p>\n            </div>\n          ) : error ? (\n            <div className=\"space-y-4\">\n              <p className=\"text-status-text-error-05\">{error}</p>\n              <div className=\"flex w-full pt-2\">\n                <Button\n                  onClick={handleContinueToNewOrg}\n                  width=\"full\"\n                  rightIcon={SvgArrowRight}\n                >\n                  Continue with new team\n                </Button>\n              </div>\n            </div>\n          ) : hasRequestedInvite ? (\n            <div className=\"space-y-4\">\n              <p className=\"text-text-04\">\n                Your join request has been sent. You can explore as your own\n                team while waiting for an admin of {appDomain} to approve your\n                request.\n              </p>\n              <div className=\"flex w-full pt-2\">\n                <Button\n                  onClick={handleContinueToNewOrg}\n                  width=\"full\"\n                  rightIcon={SvgArrowRight}\n                >\n                  Try Onyx while waiting\n                </Button>\n              </div>\n            </div>\n          ) : (\n            <div className=\"space-y-4\">\n              <p className=\"text-text-03 text-sm mb-2\">\n                Your join request can be approved by any admin of {appDomain}.\n              </p>\n              <div className=\"flex flex-col items-center justify-center gap-4 mt-4\">\n                <Button\n                  disabled={isSubmitting}\n                  onClick={handleRequestInvite}\n                  width=\"full\"\n                  icon={isSubmitting ? SimpleLoader : SvgArrowUp}\n                >\n                  {isSubmitting\n                    ? \"Sending request...\"\n                    : \"Request to join your team\"}\n                </Button>\n              </div>\n              <Button\n                onClick={handleContinueToNewOrg}\n                width=\"full\"\n                icon={SvgPlus}\n                prominence=\"secondary\"\n              >\n                Continue with new team\n              </Button>\n            </div>\n          )}\n        </Dialog.Panel>\n      </div>\n    </Dialog>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/NoAgentModal.tsx",
    "content": "\"use client\";\n\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { SvgUser } from \"@opal/icons\";\n\nexport default function NoAgentModal() {\n  const { isAdmin } = useUser();\n\n  return (\n    <Modal open>\n      <Modal.Content width=\"sm\" height=\"sm\">\n        <Modal.Header icon={SvgUser} title=\"No Agent Available\" />\n        <Modal.Body>\n          <Text as=\"p\">\n            You currently have no agent configured. To use this feature, you\n            need to take action.\n          </Text>\n          {isAdmin ? (\n            <>\n              <Text as=\"p\">\n                As an administrator, you can create a new agent by visiting the\n                admin panel.\n              </Text>\n              <Button width=\"full\" href=\"/admin/agents\">\n                Go to Admin Panel\n              </Button>\n            </>\n          ) : (\n            <Text as=\"p\">\n              Please contact your administrator to configure an agent for you.\n            </Text>\n          )}\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/ProviderModal.tsx",
    "content": "import React from \"react\";\nimport { Button } from \"@opal/components\";\nimport type { IconProps } from \"@opal/types\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { SvgLoader } from \"@opal/icons\";\nexport interface ProviderModalProps {\n  // Modal configurations\n  clickOutsideToClose?: boolean;\n\n  // Base modal props\n  open: boolean;\n  onOpenChange: (open: boolean) => void;\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  description?: string;\n  className?: string;\n  children?: React.ReactNode;\n\n  // Footer props\n  onSubmit?: () => void;\n  submitDisabled?: boolean;\n  isSubmitting?: boolean;\n  submitLabel?: string;\n  cancelLabel?: string;\n}\n\nexport default function ProviderModal({\n  open,\n  onOpenChange,\n  icon: icon,\n  title,\n  description,\n  children,\n  onSubmit,\n  submitDisabled = false,\n  isSubmitting = false,\n  submitLabel = \"Connect\",\n  cancelLabel = \"Cancel\",\n}: ProviderModalProps) {\n  const SpinningLoader: React.FunctionComponent<IconProps> = (props) => (\n    <SvgLoader\n      {...props}\n      className={`${\n        props.className ?? \"\"\n      } h-3 w-3 stroke-text-inverted-04 animate-spin`}\n    />\n  );\n\n  const handleOpenChange = (isOpen: boolean) => {\n    if (!isOpen) {\n      onOpenChange(false);\n    }\n  };\n\n  const handleKeyDown = (e: React.KeyboardEvent) => {\n    if (e.key === \"Enter\" && onSubmit && !submitDisabled && !isSubmitting) {\n      // Check if the target is not a textarea (allow Enter in textareas)\n      if ((e.target as HTMLElement).tagName !== \"TEXTAREA\") {\n        e.preventDefault();\n        onSubmit();\n      }\n    }\n  };\n\n  return (\n    <Modal open={open} onOpenChange={handleOpenChange}>\n      <Modal.Content width=\"sm\" height=\"lg\" onKeyDown={handleKeyDown}>\n        <Modal.Header\n          icon={icon}\n          title={title}\n          description={description}\n          onClose={() => onOpenChange(false)}\n        />\n\n        <Modal.Body>{children}</Modal.Body>\n\n        {onSubmit && (\n          <Modal.Footer>\n            <Button\n              prominence=\"secondary\"\n              type=\"button\"\n              onClick={() => onOpenChange(false)}\n            >\n              {cancelLabel}\n            </Button>\n            <Button\n              disabled={submitDisabled || isSubmitting}\n              type=\"button\"\n              onClick={onSubmit}\n              icon={isSubmitting ? SpinningLoader : undefined}\n            >\n              {submitLabel}\n            </Button>\n          </Modal.Footer>\n        )}\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/components/modals/UserFilesModal.tsx",
    "content": "\"use client\";\n\nimport React, { useRef, useState, useEffect, useMemo } from \"react\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { ProjectFile } from \"@/providers/ProjectsContext\";\nimport { formatRelativeTime } from \"@/app/app/components/projects/project_utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport type { IconProps } from \"@opal/types\";\nimport { getFileExtension, isImageExtension } from \"@/lib/utils\";\nimport { UserFileStatus } from \"@/app/app/projects/projectsService\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport AttachmentButton from \"@/refresh-components/buttons/AttachmentButton\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport TextSeparator from \"@/refresh-components/TextSeparator\";\nimport {\n  SvgEye,\n  SvgFiles,\n  SvgFileText,\n  SvgImage,\n  SvgTrash,\n  SvgXCircle,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport useFilter from \"@/hooks/useFilter\";\nimport { Button } from \"@opal/components\";\nimport ScrollIndicatorDiv from \"@/refresh-components/ScrollIndicatorDiv\";\n\nfunction getIcon(\n  file: ProjectFile,\n  isProcessing: boolean\n): React.FunctionComponent<IconProps> {\n  if (isProcessing) return SimpleLoader;\n  const ext = getFileExtension(file.name).toLowerCase();\n  if (isImageExtension(ext)) return SvgImage;\n  return SvgFileText;\n}\n\nfunction getDescription(file: ProjectFile): string {\n  const s = String(file.status || \"\");\n  const typeLabel = getFileExtension(file.name);\n  if (s === UserFileStatus.PROCESSING) return \"Processing...\";\n  if (s === UserFileStatus.UPLOADING) return \"Uploading...\";\n  if (s === UserFileStatus.DELETING) return \"Deleting...\";\n  if (s === UserFileStatus.COMPLETED) return typeLabel;\n  return file.status ?? typeLabel;\n}\n\ninterface FileAttachmentProps {\n  file: ProjectFile;\n  isSelected: boolean;\n  onClick?: () => void;\n  onView?: () => void;\n  onDelete?: () => void;\n}\n\nfunction FileAttachment({\n  file,\n  isSelected,\n  onClick,\n  onView,\n  onDelete,\n}: FileAttachmentProps) {\n  const isProcessing =\n    String(file.status) === UserFileStatus.PROCESSING ||\n    String(file.status) === UserFileStatus.UPLOADING ||\n    String(file.status) === UserFileStatus.DELETING;\n\n  const Icon = getIcon(file, isProcessing);\n  const description = getDescription(file);\n  const rightText = file.last_accessed_at\n    ? formatRelativeTime(file.last_accessed_at)\n    : \"\";\n\n  return (\n    <AttachmentButton\n      onClick={onClick}\n      icon={Icon}\n      description={description}\n      rightText={rightText}\n      selected={isSelected}\n      processing={isProcessing}\n      onView={onView}\n      actionIcon={SvgTrash}\n      onAction={onDelete}\n    >\n      {file.name}\n    </AttachmentButton>\n  );\n}\n\nexport interface UserFilesModalProps {\n  // Modal content\n  title: string;\n  description: string;\n  recentFiles: ProjectFile[];\n  handleUploadChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  selectedFileIds?: string[];\n\n  // FileAttachment related\n  onView?: (file: ProjectFile) => void;\n  onDelete?: (file: ProjectFile) => void;\n  onPickRecent?: (file: ProjectFile) => void;\n  onUnpickRecent?: (file: ProjectFile) => void;\n}\n\nexport default function UserFilesModal({\n  title,\n  description,\n  recentFiles,\n  handleUploadChange,\n  selectedFileIds,\n\n  onView,\n  onDelete,\n  onPickRecent,\n  onUnpickRecent,\n}: UserFilesModalProps) {\n  const { isOpen, toggle } = useModal();\n  const [selectedIds, setSelectedIds] = useState<Set<string>>(\n    () => new Set(selectedFileIds || [])\n  );\n  const [showOnlySelected, setShowOnlySelected] = useState(false);\n  const fileInputRef = useRef<HTMLInputElement | null>(null);\n  const searchInputRef = useRef<HTMLInputElement | null>(null);\n  const triggerUploadPicker = () => fileInputRef.current?.click();\n\n  useEffect(() => {\n    if (selectedFileIds) setSelectedIds(new Set(selectedFileIds));\n    else setSelectedIds(new Set());\n  }, [selectedFileIds]);\n\n  const selectedCount = selectedIds.size;\n\n  function handleDeselectAll() {\n    selectedIds.forEach((id) => {\n      const file = recentFiles.find((f) => f.id === id);\n      if (file) {\n        onUnpickRecent?.(file);\n      }\n    });\n    setSelectedIds(new Set());\n  }\n\n  const files = useMemo(\n    () =>\n      showOnlySelected\n        ? recentFiles.filter((projectFile) => selectedIds.has(projectFile.id))\n        : recentFiles,\n    [showOnlySelected, recentFiles, selectedIds]\n  );\n\n  const { query, setQuery, filtered } = useFilter(files, (file) => file.name);\n\n  return (\n    <>\n      {/* Hidden file input */}\n      {handleUploadChange && (\n        <input\n          ref={fileInputRef}\n          type=\"file\"\n          multiple\n          className=\"hidden\"\n          onChange={handleUploadChange}\n        />\n      )}\n\n      <Modal open={isOpen} onOpenChange={toggle}>\n        <Modal.Content\n          width=\"sm\"\n          height=\"lg\"\n          onOpenAutoFocus={(e) => {\n            e.preventDefault();\n            searchInputRef.current?.focus();\n          }}\n          preventAccidentalClose={false}\n        >\n          <Modal.Header icon={SvgFiles} title={title} description={description}>\n            {/* Search bar section */}\n            <Section flexDirection=\"row\" gap={0.5}>\n              <InputTypeIn\n                ref={searchInputRef}\n                placeholder=\"Search files...\"\n                value={query}\n                onChange={(e) => setQuery(e.target.value)}\n                leftSearchIcon\n                autoComplete=\"off\"\n                tabIndex={0}\n                onFocus={(e) => {\n                  e.target.select();\n                }}\n              />\n              {handleUploadChange && (\n                <CreateButton\n                  onClick={triggerUploadPicker}\n                  secondary={false}\n                  internal\n                >\n                  Add Files\n                </CreateButton>\n              )}\n            </Section>\n          </Modal.Header>\n\n          <Modal.Body\n            padding={filtered.length === 0 ? 0.5 : 0}\n            gap={0.5}\n            alignItems=\"center\"\n          >\n            {/* File display section */}\n            {filtered.length === 0 ? (\n              <Text text03>No files found</Text>\n            ) : (\n              <ScrollIndicatorDiv className=\"p-2 gap-2 max-h-[70vh]\">\n                {filtered.map((projectFle) => {\n                  const isSelected = selectedIds.has(projectFle.id);\n                  return (\n                    <FileAttachment\n                      key={projectFle.id}\n                      file={projectFle}\n                      isSelected={isSelected}\n                      onClick={\n                        onPickRecent\n                          ? () => {\n                              if (isSelected) {\n                                onUnpickRecent?.(projectFle);\n                                setSelectedIds((prev) => {\n                                  const next = new Set(prev);\n                                  next.delete(projectFle.id);\n                                  return next;\n                                });\n                              } else {\n                                onPickRecent(projectFle);\n                                setSelectedIds((prev) => {\n                                  const next = new Set(prev);\n                                  next.add(projectFle.id);\n                                  return next;\n                                });\n                              }\n                            }\n                          : undefined\n                      }\n                      onView={onView ? () => onView(projectFle) : undefined}\n                      onDelete={\n                        onDelete ? () => onDelete(projectFle) : undefined\n                      }\n                    />\n                  );\n                })}\n\n                {/* File count divider - only show when not searching or filtering */}\n                {!query.trim() && !showOnlySelected && (\n                  <TextSeparator\n                    count={recentFiles.length}\n                    text={recentFiles.length === 1 ? \"File\" : \"Files\"}\n                  />\n                )}\n              </ScrollIndicatorDiv>\n            )}\n          </Modal.Body>\n\n          <Modal.Footer>\n            {/* Left side: file count and controls */}\n            {onPickRecent && (\n              <Section flexDirection=\"row\" justifyContent=\"start\" gap={0.5}>\n                <Text as=\"p\" text03>\n                  {selectedCount} {selectedCount === 1 ? \"file\" : \"files\"}{\" \"}\n                  selected\n                </Text>\n                <Button\n                  icon={SvgEye}\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  onClick={() => setShowOnlySelected(!showOnlySelected)}\n                  interaction={showOnlySelected ? \"hover\" : \"rest\"}\n                />\n                <Button\n                  disabled={selectedCount === 0}\n                  icon={SvgXCircle}\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  onClick={handleDeselectAll}\n                />\n              </Section>\n            )}\n\n            {/* Right side: Done button */}\n            <Button prominence=\"secondary\" onClick={() => toggle(false)}>\n              Done\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/oauth/OAuthCallbackPage.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { CheckmarkIcon, TriangleAlertIcon } from \"@/components/icons/icons\";\nimport CardSection from \"@/components/admin/CardSection\";\nimport { Button } from \"@opal/components\";\n\ninterface OAuthCallbackConfig {\n  // UI customization\n  processingMessage?: string;\n  processingDetails?: string;\n  successMessage?: string;\n  successDetailsTemplate?: string; // Template with {serviceName} placeholder\n  errorMessage?: string;\n  backButtonText?: string;\n  redirectingMessage?: string;\n\n  // Behavior\n  autoRedirectDelay?: number; // milliseconds\n  defaultRedirectPath?: string;\n\n  // API integration - all flows now use the same pattern\n  callbackApiUrl: string; // Required - API endpoint to call\n\n  // Error message mapping\n  errorMessageMap?: Record<string, string>;\n}\n\ninterface OAuthCallbackPageProps {\n  config: OAuthCallbackConfig;\n}\n\nexport default function OAuthCallbackPage({ config }: OAuthCallbackPageProps) {\n  const router = useRouter();\n  const searchParams = useSearchParams();\n\n  const [statusMessage, setStatusMessage] = useState(\n    config.processingMessage || \"Processing...\"\n  );\n  const [statusDetails, setStatusDetails] = useState(\n    config.processingDetails || \"Please wait while we complete the setup.\"\n  );\n  const [isError, setIsError] = useState(false);\n  const [isSuccess, setIsSuccess] = useState(false);\n  const [isLoading, setIsLoading] = useState(true);\n  const [serviceName, setServiceName] = useState<string>(\"\");\n  const [redirectPath, setRedirectPath] = useState<string | undefined>(\n    undefined\n  );\n  const [secondsLeft, setSecondsLeft] = useState<number | null>(null);\n\n  // Extract query parameters\n  const code = searchParams?.get(\"code\");\n  const state = searchParams?.get(\"state\");\n  const error = searchParams?.get(\"error\");\n  const errorDescription = searchParams?.get(\"error_description\");\n\n  // Auto-redirect for success cases (with countdown)\n  useEffect(() => {\n    if (!isSuccess) return;\n\n    const delayMs = config.autoRedirectDelay ?? 2000;\n    setSecondsLeft(Math.ceil(delayMs / 1000));\n\n    const interval = setInterval(() => {\n      setSecondsLeft((prev) => (prev !== null && prev > 0 ? prev - 1 : prev));\n    }, 1000);\n\n    const timer = setTimeout(() => {\n      const target = redirectPath || config.defaultRedirectPath || \"/app\";\n      router.push(target as Route);\n    }, delayMs);\n\n    return () => {\n      clearInterval(interval);\n      clearTimeout(timer);\n    };\n  }, [\n    isSuccess,\n    redirectPath,\n    router,\n    config.autoRedirectDelay,\n    config.defaultRedirectPath,\n  ]);\n\n  useEffect(() => {\n    const controller = new AbortController();\n\n    const handleOAuthCallback = async () => {\n      // Handle OAuth error from provider\n      if (error) {\n        setStatusMessage(config.errorMessage || \"Authorization Failed\");\n        setStatusDetails(\n          errorDescription ||\n            \"The authorization was cancelled or failed. Please try again.\"\n        );\n        setIsError(true);\n        setIsLoading(false);\n        return;\n      }\n\n      // Validate required parameters\n      if (!code || !state) {\n        setStatusMessage(\"Invalid Request\");\n        setStatusDetails(\n          \"The authorization request was incomplete. Please try again.\"\n        );\n        setIsError(true);\n        setIsLoading(false);\n        return;\n      }\n\n      try {\n        // Make API call to process callback - all flows use this pattern now\n        const url = `${config.callbackApiUrl}?code=${encodeURIComponent(\n          code\n        )}&state=${encodeURIComponent(state)}`;\n\n        const response = await fetch(url, {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          credentials: \"include\",\n          signal: controller.signal,\n        });\n\n        if (!response.ok) {\n          let errorMessage = \"Failed to complete authorization\";\n          try {\n            const errorData = await response.json();\n            if (errorData.detail && config.errorMessageMap) {\n              // Use custom error mapping\n              for (const [pattern, message] of Object.entries(\n                config.errorMessageMap\n              )) {\n                if (errorData.detail.includes(pattern)) {\n                  errorMessage = message;\n                  break;\n                }\n              }\n            } else if (errorData.error) {\n              errorMessage = errorData.error;\n            }\n          } catch (parseError) {\n            console.error(\"Error parsing response:\", parseError);\n          }\n          throw new Error(errorMessage);\n        }\n\n        // Parse the response to get service and redirect information\n        const responseData = await response.json();\n        const result = {\n          success: true,\n          serviceName:\n            responseData.source ||\n            responseData.server_name ||\n            responseData.service_name,\n        };\n\n        setServiceName(result.serviceName || \"\");\n        // Respect backend-provided redirect path (from state.return_path)\n        // Sanitize to prevent open redirects (e.g. \"//evil.com\")\n        const rawPath =\n          responseData.redirect_url ||\n          searchParams?.get(\"return_path\") ||\n          config.defaultRedirectPath ||\n          \"/app\";\n        const sanitizedPath =\n          rawPath.startsWith(\"http://\") || rawPath.startsWith(\"https://\")\n            ? \"/app\"\n            : \"/\" + rawPath.replace(/^\\/+/, \"\");\n        const redirectUrl = new URL(sanitizedPath, window.location.origin);\n        redirectUrl.searchParams.set(\"message\", \"oauth_connected\");\n        setRedirectPath(redirectUrl.pathname + redirectUrl.search);\n        setStatusMessage(config.successMessage || \"Success!\");\n\n        const successDetails = config.successDetailsTemplate\n          ? config.successDetailsTemplate.replace(\n              \"{serviceName}\",\n              result.serviceName || \"service\"\n            )\n          : `Your ${\n              result.serviceName || \"service\"\n            } authorization completed successfully.`;\n\n        setStatusDetails(successDetails);\n        setIsSuccess(true);\n        setIsError(false);\n        setIsLoading(false);\n      } catch (error) {\n        if (controller.signal.aborted) return;\n        console.error(\"OAuth callback error:\", error);\n        setStatusMessage(config.errorMessage || \"Something Went Wrong\");\n        setStatusDetails(\n          error instanceof Error\n            ? error.message\n            : \"An error occurred during the OAuth process. Please try again.\"\n        );\n        setIsError(true);\n        setIsLoading(false);\n      }\n    };\n\n    handleOAuthCallback();\n    return () => controller.abort();\n  }, [code, state, error, errorDescription, searchParams, config]);\n\n  const getStatusIcon = () => {\n    if (isLoading) {\n      return (\n        <div className=\"w-16 h-16 border-4 border-blue-200 dark:border-blue-800 border-t-blue-600 dark:border-t-blue-400 rounded-full animate-spin mx-auto mb-4\"></div>\n      );\n    }\n    if (isSuccess) {\n      return (\n        <CheckmarkIcon\n          size={64}\n          className=\"text-green-500 dark:text-green-400 mx-auto mb-4\"\n        />\n      );\n    }\n    if (isError) {\n      return (\n        <TriangleAlertIcon\n          size={64}\n          className=\"text-red-500 dark:text-red-400 mx-auto mb-4\"\n        />\n      );\n    }\n    return null;\n  };\n\n  const getStatusColor = () => {\n    if (isSuccess) return \"text-green-600 dark:text-green-400\";\n    if (isError) return \"text-red-600 dark:text-red-400\";\n    return \"text-gray-600 dark:text-gray-300\";\n  };\n\n  return (\n    <div className=\"min-h-screen flex flex-col\">\n      <div className=\"flex-1 flex flex-col items-center justify-center p-4\">\n        <CardSection className=\"max-w-md w-full mx-auto p-8 shadow-lg bg-white dark:bg-gray-800 rounded-lg\">\n          <div className=\"text-center\">\n            {getStatusIcon()}\n\n            <h1 className={`text-2xl font-bold mb-4 ${getStatusColor()}`}>\n              {statusMessage}\n            </h1>\n\n            <p className=\"text-gray-600 dark:text-gray-300 mb-6 leading-relaxed\">\n              {statusDetails}\n            </p>\n\n            {isSuccess && secondsLeft !== null && (\n              <div className=\"bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-lg p-4 mb-6\">\n                <p className=\"text-green-800 dark:text-green-200 text-sm\">\n                  Redirecting in {secondsLeft}{\" \"}\n                  {secondsLeft === 1 ? \"second\" : \"seconds\"}...\n                </p>\n              </div>\n            )}\n\n            <div className=\"flex flex-col space-y-3\">\n              {isError && (\n                <div className=\"flex flex-col space-y-2\">\n                  <Button\n                    onClick={() => {\n                      const target =\n                        redirectPath || config.defaultRedirectPath || \"/app\";\n                      router.push(target as Route);\n                    }}\n                    width=\"full\"\n                  >\n                    {config.backButtonText || \"Back to Chat\"}\n                  </Button>\n                </div>\n              )}\n\n              {isLoading && (\n                <p className=\"text-sm text-gray-500 dark:text-gray-400\">\n                  This may take a few moments...\n                </p>\n              )}\n            </div>\n          </div>\n        </CardSection>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/resizable/constants.ts",
    "content": "export const DOCUMENT_SIDEBAR_WIDTH_COOKIE_NAME = \"documentSidebarWidth\";\nexport const SIDEBAR_TOGGLED_COOKIE_NAME = \"sidebarIsToggled\";\nexport const PRO_SEARCH_TOGGLED_COOKIE_NAME = \"proSearchIsToggled\";\n"
  },
  {
    "path": "web/src/components/search/DocumentDisplay.tsx",
    "content": "\"use client\";\nimport React, { JSX } from \"react\";\nimport { MinimalOnyxDocument, OnyxDocument } from \"@/lib/search/interfaces\";\nimport { SourceIcon } from \"../SourceIcon\";\nimport { WebResultIcon } from \"../WebResultIcon\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { openDocument } from \"@/lib/search/utils\";\nimport { SubQuestionDetail } from \"@/app/app/interfaces\";\nimport { ValidSources } from \"@/lib/types\";\nimport { Card } from \"@/components/ui/card\";\n\nexport const buildDocumentSummaryDisplay = (\n  matchHighlights: string[],\n  blurb: string\n) => {\n  // if there are no match highlights, or if it's really short, just use the blurb\n  // this is to prevent the UI from showing something like `...` for the summary\n  const MIN_MATCH_HIGHLIGHT_LENGTH = 5;\n  if (\n    !matchHighlights ||\n    matchHighlights.length <= MIN_MATCH_HIGHLIGHT_LENGTH\n  ) {\n    return blurb;\n  }\n\n  // content, isBold, isContinuation\n  let sections = [] as [string, boolean, boolean][];\n  matchHighlights.forEach((matchHighlight, matchHighlightIndex) => {\n    if (!matchHighlight) {\n      return;\n    }\n\n    const words = matchHighlight.split(new RegExp(\"\\\\s\"));\n    words.forEach((word) => {\n      if (!word) {\n        return;\n      }\n\n      let isContinuation = false;\n      while (word.includes(\"<hi>\") && word.includes(\"</hi>\")) {\n        const start = word.indexOf(\"<hi>\");\n        const end = word.indexOf(\"</hi>\");\n        const before = word.slice(0, start);\n        const highlight = word.slice(start + 4, end);\n        const after = word.slice(end + 5);\n\n        if (before) {\n          sections.push([before, false, isContinuation]);\n          isContinuation = true;\n        }\n        sections.push([highlight, true, isContinuation]);\n        isContinuation = true;\n        word = after;\n      }\n\n      if (word) {\n        sections.push([word, false, isContinuation]);\n      }\n    });\n    if (matchHighlightIndex != matchHighlights.length - 1) {\n      sections.push([\"...\", false, false]);\n    }\n  });\n\n  if (sections.length == 0) {\n    return;\n  }\n\n  const firstSection = sections[0];\n  if (firstSection === undefined) {\n    return;\n  }\n\n  let previousIsContinuation = firstSection[2];\n  let previousIsBold = firstSection[1];\n  let currentText = \"\";\n  const finalJSX = [] as (JSX.Element | string)[];\n  sections.forEach(([word, shouldBeBold, isContinuation], index) => {\n    if (shouldBeBold != previousIsBold) {\n      if (currentText) {\n        if (previousIsBold) {\n          // remove leading space so that we don't bold the whitespace\n          // in front of the matching keywords\n          currentText = currentText.trim();\n          if (!previousIsContinuation) {\n            finalJSX[finalJSX.length - 1] = finalJSX[finalJSX.length - 1] + \" \";\n          }\n          finalJSX.push(\n            <b key={index} className=\"text-text font-bold\">\n              {currentText}\n            </b>\n          );\n        } else {\n          finalJSX.push(currentText);\n        }\n      }\n      currentText = \"\";\n    }\n    previousIsBold = shouldBeBold;\n    previousIsContinuation = isContinuation;\n    if (!isContinuation || index === 0) {\n      currentText += \" \";\n    }\n    currentText += word;\n  });\n  if (currentText) {\n    if (previousIsBold) {\n      currentText = currentText.trim();\n      if (!previousIsContinuation) {\n        finalJSX[finalJSX.length - 1] = finalJSX[finalJSX.length - 1] + \" \";\n      }\n      finalJSX.push(\n        <b key={sections.length} className=\"text-default bg-highlight-text\">\n          {currentText}\n        </b>\n      );\n    } else {\n      finalJSX.push(currentText);\n    }\n  }\n  return finalJSX;\n};\n\ninterface CompactDocumentCardProps {\n  document: OnyxDocument;\n  updatePresentingDocument: (document: MinimalOnyxDocument) => void;\n}\n\nexport function CompactDocumentCard({\n  document,\n  updatePresentingDocument,\n}: CompactDocumentCardProps) {\n  const isWebSource =\n    document.is_internet || document.source_type === ValidSources.Web;\n\n  return (\n    <Card className=\"shadow-00 w-[20rem]\">\n      <button\n        onClick={() => {\n          openDocument(document, updatePresentingDocument);\n        }}\n        className=\"max-w-[20rem] p-3 flex flex-col gap-1\"\n      >\n        <div className=\"flex flex-row gap-2 items-center w-full\">\n          {isWebSource && document.link ? (\n            <WebResultIcon url={document.link} size={18} />\n          ) : (\n            <SourceIcon sourceType={document.source_type} iconSize={18} />\n          )}\n          <Text as=\"p\" text04 className=\"truncate !m-0\">\n            {document.semantic_identifier ?? document.document_id}\n          </Text>\n        </div>\n\n        {document.blurb && (\n          <Text\n            as=\"p\"\n            text03\n            secondaryBody\n            className=\"line-clamp-2 text-left !m-0\"\n          >\n            {document.blurb}\n          </Text>\n        )}\n\n        {document.updated_at &&\n          !isNaN(new Date(document.updated_at).getTime()) && (\n            <Text\n              as=\"p\"\n              text03\n              figureSmallLabel\n              className=\"line-clamp-2 text-left !m-0\"\n            >\n              Updated {new Date(document.updated_at).toLocaleDateString()}\n            </Text>\n          )}\n      </button>\n    </Card>\n  );\n}\n\ninterface CompactQuestionCardProps {\n  question: SubQuestionDetail;\n  openQuestion: (question: SubQuestionDetail) => void;\n}\n\nexport function CompactQuestionCard({\n  question,\n  openQuestion,\n}: CompactQuestionCardProps) {\n  return (\n    <div\n      onClick={() => openQuestion(question)}\n      className=\"max-w-[350px] gap-y-1 cursor-pointer pb-0 pt-0 mt-0 flex gap-y-0 flex-col content-start items-start gap-0\"\n    >\n      <div className=\"text-sm !pb-0 !mb-0 font-semibold flex items-center gap-x-1 text-text-900 pt-0 mt-0 truncate w-full\">\n        Question\n      </div>\n      <div className=\"text-xs mb-0 text-text-600 line-clamp-2\">\n        {question.question}\n      </div>\n      <div className=\"flex mt-0 pt-0 items-center justify-between w-full\">\n        <span className=\"text-xs text-text-500\">\n          {question.context_docs?.top_documents.length || 0} context docs\n        </span>\n        {question.sub_queries && (\n          <span className=\"text-xs text-text-500\">\n            {question.sub_queries.length} subqueries\n          </span>\n        )}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/search/DocumentFeedbackBlock.tsx",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { ChevronsDownIcon, ChevronsUpIcon } from \"../icons/icons\";\nimport { CustomTooltip } from \"../tooltip/CustomTooltip\";\n\ntype DocumentFeedbackType = \"endorse\" | \"reject\" | \"hide\" | \"unhide\";\n\nconst giveDocumentFeedback = async (\n  documentId: string,\n  messageId: number,\n  documentRank: number,\n  searchFeedback: DocumentFeedbackType\n): Promise<string | null> => {\n  const response = await fetch(\"/api/chat/document-search-feedback\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      message_id: messageId,\n      document_id: documentId,\n      document_rank: documentRank,\n      click: false,\n      search_feedback: searchFeedback,\n    }),\n  });\n  return response.ok\n    ? null\n    : response.statusText || (await response.json()).message;\n};\n\ninterface DocumentFeedbackIconProps {\n  documentId: string;\n  messageId: number;\n  documentRank: number;\n  feedbackType: DocumentFeedbackType;\n}\n\nconst DocumentFeedback = ({\n  documentId,\n  messageId,\n  documentRank,\n  feedbackType,\n}: DocumentFeedbackIconProps) => {\n  let icon = null;\n  const size = 20;\n  if (feedbackType === \"endorse\") {\n    icon = (\n      <ChevronsUpIcon\n        size={size}\n        className=\"my-auto flex flex-shrink-0 text-blue-400\"\n      />\n    );\n  }\n  if (feedbackType === \"reject\") {\n    icon = (\n      <ChevronsDownIcon\n        size={size}\n        className=\"my-auto flex flex-shrink-0 text-blue-400\"\n      />\n    );\n  }\n  if (!icon) {\n    // TODO: support other types of feedback\n    return null;\n  }\n\n  return (\n    <div\n      onClick={async () => {\n        const errorMsg = await giveDocumentFeedback(\n          documentId,\n          messageId,\n          documentRank,\n          feedbackType\n        );\n        if (!errorMsg) {\n          toast.success(\"Thanks for your feedback!\");\n        } else {\n          toast.error(`Error giving feedback - ${errorMsg}`);\n        }\n      }}\n      className=\"cursor-pointer\"\n    >\n      {icon}\n    </div>\n  );\n};\n\ninterface DocumentFeedbackBlockProps {\n  documentId: string;\n  messageId: number;\n  documentRank: number;\n}\n\nexport const DocumentFeedbackBlock = ({\n  documentId,\n  messageId,\n  documentRank,\n}: DocumentFeedbackBlockProps) => {\n  return (\n    <div className=\"flex items-center gap-x-2\">\n      <CustomTooltip showTick line content=\"Good response\">\n        <DocumentFeedback\n          documentId={documentId}\n          messageId={messageId}\n          documentRank={documentRank}\n          feedbackType=\"endorse\"\n        />\n      </CustomTooltip>\n      <CustomTooltip showTick line content=\"Bad response\">\n        <DocumentFeedback\n          documentId={documentId}\n          messageId={messageId}\n          documentRank={documentRank}\n          feedbackType=\"reject\"\n        />\n      </CustomTooltip>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/search/DocumentUpdatedAtBadge.tsx",
    "content": "import { timeAgo } from \"@/lib/time\";\nimport { MetadataBadge } from \"../MetadataBadge\";\n\nexport function DocumentUpdatedAtBadge({\n  updatedAt,\n  modal,\n}: {\n  updatedAt: string;\n  modal?: boolean;\n}) {\n  return (\n    <MetadataBadge\n      flexNone={modal}\n      value={(modal ? \"\" : \"Updated \") + timeAgo(updatedAt)}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/components/search/filtering/FilterDropdown.tsx",
    "content": "import { JSX } from \"react\";\nimport { FiCheck, FiChevronDown, FiXCircle } from \"react-icons/fi\";\nimport { CustomDropdown } from \"../../Dropdown\";\n\ninterface Option {\n  key: string;\n  display: string | JSX.Element;\n  displayName?: string;\n  icon?: JSX.Element;\n}\nexport function FilterDropdown({\n  options,\n  selected,\n  handleSelect,\n  icon,\n  defaultDisplay,\n  width = \"w-64\",\n  dropdownWidth,\n  optionClassName,\n  resetValues,\n  backgroundColor,\n  dropdownColor,\n}: {\n  options: Option[];\n  selected: string[];\n  handleSelect: (option: Option) => void;\n  icon: JSX.Element;\n  defaultDisplay: string | JSX.Element;\n  width?: string;\n  dropdownWidth?: string;\n  optionClassName?: string;\n  resetValues?: () => void;\n  backgroundColor?: string;\n  dropdownColor?: string;\n}) {\n  return (\n    <div>\n      <CustomDropdown\n        dropdown={\n          <div\n            className={`\n              border \n              border-border \n              rounded-lg \n              ${backgroundColor || \"bg-background\"}\n              flex \n              flex-col \n              ${dropdownWidth || width}\n              max-h-96 \n              overflow-y-scroll\n              overscroll-contain\n              `}\n          >\n            {options.map((option, ind) => {\n              const isSelected = selected.includes(option.key);\n              return (\n                <div\n                  key={`${option.key}-1`}\n                  className={`\n                      ${optionClassName}\n                      flex\n                      px-3\n                      text-sm\n                      py-2.5\n                      select-none\n                      cursor-pointer\n                      flex-none\n                      w-full\n                      text-text-darker\n                      items-center\n                      gap-x-1\n                      ${dropdownColor || \"bg-background\"}\n                      hover:bg-accent-background-hovered\n                      ${\n                        ind === options.length - 1\n                          ? \"\"\n                          : \"border-b border-border\"\n                      } \n                    `}\n                  onClick={(event) => {\n                    handleSelect(option);\n                    event.preventDefault();\n                    event.stopPropagation();\n                  }}\n                >\n                  {option.icon}\n                  {option.display}\n                  {isSelected && (\n                    <div className=\"ml-auto my-auto mr-1\">\n                      <FiCheck />\n                    </div>\n                  )}\n                </div>\n              );\n            })}\n          </div>\n        }\n      >\n        <div\n          className={`\n            flex\n            ${width}\n            text-sm\n            px-3\n            py-1.5\n            rounded-lg \n            border \n            gap-x-2\n            border-border\n            cursor-pointer \n            ${backgroundColor || \"bg-background\"}\n            hover:bg-accent-background`}\n        >\n          <div className=\"flex-none my-auto\">{icon}</div>\n          {selected.length === 0 || resetValues ? (\n            defaultDisplay\n          ) : (\n            <p className=\"line-clamp-1\">{selected.join(\", \")}</p>\n          )}\n          {resetValues && selected.length !== 0 ? (\n            <div\n              className=\"my-auto ml-auto p-0.5 rounded-full w-fit\"\n              onClick={(e) => {\n                resetValues();\n                e.stopPropagation();\n              }}\n            >\n              <FiXCircle />\n            </div>\n          ) : (\n            <FiChevronDown className=\"my-auto ml-auto\" />\n          )}\n        </div>\n      </CustomDropdown>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/search/results/Citation.tsx",
    "content": "import { ReactNode, JSX } from \"react\";\nimport { CompactDocumentCard, CompactQuestionCard } from \"../DocumentDisplay\";\nimport {\n  LoadedOnyxDocument,\n  MinimalOnyxDocument,\n  OnyxDocument,\n} from \"@/lib/search/interfaces\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport { openDocument } from \"@/lib/search/utils\";\nimport { SubQuestionDetail } from \"@/app/app/interfaces\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\nimport { ValidSources } from \"@/lib/types\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst MAX_CITATION_TEXT_LENGTH = 40;\n\nexport interface DocumentCardProps {\n  document: LoadedOnyxDocument;\n  updatePresentingDocument: (document: MinimalOnyxDocument) => void;\n  url?: string;\n}\nexport interface QuestionCardProps {\n  question: SubQuestionDetail;\n  openQuestion: (question: SubQuestionDetail) => void;\n}\n\nfunction truncateText(str: string, maxLength: number) {\n  if (str.length <= maxLength) return str;\n  return str.slice(0, maxLength) + \"...\";\n}\n\nexport function Citation({\n  children,\n  document_info,\n  question_info,\n  index,\n}: {\n  document_info?: DocumentCardProps;\n  question_info?: QuestionCardProps;\n  children?: JSX.Element | string | null | ReactNode;\n  index?: number;\n}) {\n  let innerText = \"\";\n  if (index !== undefined) {\n    innerText = index.toString();\n  }\n\n  if (children) {\n    const childrenString = children.toString();\n    const childrenSegment1 = childrenString.split(\"[\")[1];\n    if (childrenSegment1 !== undefined) {\n      const childrenSegment1_0 = childrenSegment1.split(\"]\")[0];\n      if (childrenSegment1_0 !== undefined) {\n        innerText = childrenSegment1_0;\n      }\n    }\n  }\n\n  if (!document_info && !question_info) {\n    return <>{children}</>;\n  }\n  const sourceType = document_info?.document?.source_type;\n  const title = document_info?.document?.semantic_identifier;\n  const citationText =\n    (sourceType && sourceType != ValidSources.Web\n      ? getSourceDisplayName(sourceType)\n      : truncateText(title || \"\", MAX_CITATION_TEXT_LENGTH)) || \"Unknown\";\n\n  return (\n    <TooltipProvider>\n      <Tooltip>\n        <TooltipTrigger asChild>\n          <span\n            onClick={() => {\n              document_info?.document\n                ? openDocument(\n                    document_info.document,\n                    document_info.updatePresentingDocument\n                  )\n                : question_info?.question\n                  ? question_info.openQuestion(question_info.question)\n                  : null;\n            }}\n            className=\"inline-flex items-center cursor-pointer transition-all duration-200 ease-in-out ml-1\"\n          >\n            <span\n              className=\"flex items-center justify-center p-1 h-4 max-w-[200px]\n                         bg-background-tint-03 rounded-04\n                         hover:bg-background-tint-04 shadow-sm\"\n              style={{ transform: \"translateY(-10%)\", lineHeight: \"1\" }}\n            >\n              <Text figureSmallValue className=\"truncate\">\n                {citationText}\n              </Text>\n            </span>\n          </span>\n        </TooltipTrigger>\n        <TooltipContent\n          className=\"bg-transparent p-0 shadow-none\"\n          side=\"bottom\"\n          align=\"start\"\n        >\n          {document_info?.document ? (\n            <CompactDocumentCard\n              updatePresentingDocument={document_info.updatePresentingDocument}\n              document={document_info.document}\n            />\n          ) : (\n            <CompactQuestionCard\n              question={question_info?.question!}\n              openQuestion={question_info?.openQuestion!}\n            />\n          )}\n        </TooltipContent>\n      </Tooltip>\n    </TooltipProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/components/search/results/ResponseSection.tsx",
    "content": "import { AlertIcon, TriangleAlertIcon } from \"@/components/icons/icons\";\nimport { useState, JSX } from \"react\";\n\nexport type StatusOptions = \"in-progress\" | \"failed\" | \"warning\" | \"success\";\n\ninterface ResponseSectionProps {\n  header: JSX.Element | string;\n  body: JSX.Element | string;\n  status: StatusOptions;\n  desiredOpenStatus: boolean;\n  setDesiredOpenStatus?: (isOpen: boolean) => void;\n  isNotControllable?: boolean;\n}\n\nexport const ResponseSection = ({\n  header,\n  body,\n  status,\n  desiredOpenStatus,\n  setDesiredOpenStatus,\n  isNotControllable,\n}: ResponseSectionProps) => {\n  const [isOpen, setIsOpen] = useState<boolean | null>(null);\n\n  let icon = null;\n  if (status === \"in-progress\") {\n    icon = <></>;\n  }\n  if (status === \"failed\") {\n    icon = <AlertIcon size={16} className=\"text-red-500\" />;\n  }\n  if (status === \"success\") {\n    icon = <></>;\n  }\n  if (status === \"warning\") {\n    icon = <TriangleAlertIcon size={16} className=\"text-yellow-600\" />;\n  }\n\n  // use `desiredOpenStatus` if user has not clicked to open/close, otherwise use\n  // `isOpen` state\n  const finalIsOpen = isOpen !== null ? isOpen : desiredOpenStatus;\n  return (\n    <div>\n      <div\n        className={`\n        flex \n        my-1 \n        p-1 \n        rounded  \n        select-none \n        ${isNotControllable ? \"\" : \"hover:bg-background-800 cursor-pointer\"}`}\n        onClick={() => {\n          if (!isNotControllable) {\n            if (isOpen === null) {\n              setIsOpen(!desiredOpenStatus);\n            } else {\n              setIsOpen(!isOpen);\n            }\n          }\n          if (setDesiredOpenStatus) {\n            setDesiredOpenStatus(!desiredOpenStatus);\n          }\n        }}\n      >\n        <div className=\"my-auto\">{icon}</div>\n        <div className=\"my-auto text-sm text-text-200\">{header}</div>\n      </div>\n      {finalIsOpen && <div className=\"pb-1 mx-2 text-sm mb-1\">{body}</div>}\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/settings/lib.ts",
    "content": "import {\n  CombinedSettings,\n  EnterpriseSettings,\n  ApplicationStatus,\n  Settings,\n  QueryHistoryType,\n} from \"@/interfaces/settings\";\nimport {\n  CUSTOM_ANALYTICS_ENABLED,\n  HOST_URL,\n  SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED,\n} from \"@/lib/constants\";\nimport { fetchSS } from \"@/lib/utilsSS\";\nimport { getWebVersion } from \"@/lib/version\";\n\nexport enum SettingsError {\n  OTHER = \"OTHER\",\n}\n\nexport async function fetchStandardSettingsSS() {\n  return fetchSS(\"/settings\");\n}\n\nexport async function fetchEnterpriseSettingsSS() {\n  return fetchSS(\"/enterprise-settings\");\n}\n\nexport async function fetchCustomAnalyticsScriptSS() {\n  return fetchSS(\"/enterprise-settings/custom-analytics-script\");\n}\n\nexport async function fetchSettingsSS(): Promise<CombinedSettings | null> {\n  const tasks = [fetchStandardSettingsSS()];\n  if (SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {\n    tasks.push(fetchEnterpriseSettingsSS());\n    if (CUSTOM_ANALYTICS_ENABLED) {\n      tasks.push(fetchCustomAnalyticsScriptSS());\n    }\n  }\n\n  try {\n    const results = await Promise.all(tasks);\n\n    let settings: Settings;\n\n    const result_0 = results[0];\n    if (!result_0) {\n      throw new Error(\"Standard settings fetch failed.\");\n    }\n\n    if (!result_0.ok) {\n      if (result_0.status === 403 || result_0.status === 401) {\n        settings = {\n          auto_scroll: true,\n          application_status: ApplicationStatus.ACTIVE,\n          gpu_enabled: false,\n          maximum_chat_retention_days: null,\n          notifications: [],\n          needs_reindexing: false,\n          anonymous_user_enabled: false,\n          invite_only_enabled: false,\n          deep_research_enabled: true,\n          temperature_override_enabled: true,\n          query_history_type: QueryHistoryType.NORMAL,\n        };\n      } else {\n        throw new Error(\n          `fetchStandardSettingsSS failed: status=${\n            result_0.status\n          } body=${await result_0.text()}`\n        );\n      }\n    } else {\n      settings = await result_0.json();\n    }\n\n    let enterpriseSettings: EnterpriseSettings | null = null;\n    if (tasks.length > 1) {\n      const result_1 = results[1];\n      if (!result_1) {\n        throw new Error(\"fetchEnterpriseSettingsSS failed.\");\n      }\n\n      if (!result_1.ok) {\n        if (result_1.status !== 403 && result_1.status !== 401) {\n          throw new Error(\n            `fetchEnterpriseSettingsSS failed: status=${\n              result_1.status\n            } body=${await result_1.text()}`\n          );\n        }\n      } else {\n        enterpriseSettings = await result_1.json();\n      }\n    }\n\n    let customAnalyticsScript: string | null = null;\n    if (tasks.length > 2) {\n      const result_2 = results[2];\n      if (!result_2) {\n        throw new Error(\"fetchCustomAnalyticsScriptSS failed.\");\n      }\n\n      if (!result_2.ok) {\n        if (result_2.status !== 403) {\n          throw new Error(\n            `fetchCustomAnalyticsScriptSS failed: status=${\n              result_2.status\n            } body=${await result_2.text()}`\n          );\n        }\n      } else {\n        customAnalyticsScript = await result_2.json();\n      }\n    }\n\n    if (settings.deep_research_enabled == null) {\n      settings.deep_research_enabled = true;\n    }\n\n    const combinedSettings: CombinedSettings = {\n      settings,\n      enterpriseSettings,\n      customAnalyticsScript,\n      webVersion: settings.version ?? getWebVersion(),\n      webDomain: HOST_URL,\n      // Server-side default; the real value is computed client-side in\n      // SettingsProvider where connector data is available via useCCPairs.\n      isSearchModeAvailable: settings.search_ui_enabled !== false,\n      settingsLoading: false,\n    };\n\n    return combinedSettings;\n  } catch (error) {\n    console.error(\"fetchSettingsSS exception: \", error);\n    return null;\n  }\n}\n"
  },
  {
    "path": "web/src/components/settings/usePaidEnterpriseFeaturesEnabled.ts",
    "content": "\"use client\";\n\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\n\n/**\n * Hook to check if enterprise features should be enabled in the UI.\n *\n * When LICENSE_ENFORCEMENT_ENABLED=true on the backend:\n * - Returns true if user has a valid license (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER)\n * - Returns false if user has no license (community edition) or expired license (GATED_ACCESS)\n *\n * When LICENSE_ENFORCEMENT_ENABLED=false (legacy behavior):\n * - Returns true if enterpriseSettings exists (build-time constant)\n *\n * This determines whether EE-only UI features like user groups, RBAC, etc. are shown.\n */\nexport function usePaidEnterpriseFeaturesEnabled(): boolean {\n  const combinedSettings = useSettingsContext();\n\n  // Check the runtime license-based flag first\n  // This is set by the backend based on actual license status\n  if (combinedSettings.settings.ee_features_enabled !== undefined) {\n    return combinedSettings.settings.ee_features_enabled;\n  }\n\n  // Fallback to legacy behavior: check if enterprise settings exist\n  // This handles the case where LICENSE_ENFORCEMENT_ENABLED=false\n  return combinedSettings.enterpriseSettings !== null;\n}\n"
  },
  {
    "path": "web/src/components/sidebar/ChatSessionMorePopup.tsx",
    "content": "\"use client\";\n\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport { deleteChatSession } from \"@/app/app/services/lib\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport {\n  moveChatSession as moveChatSessionService,\n  removeChatSessionFromProject as removeChatSessionFromProjectService,\n} from \"@/app/app/projects/projectsService\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { FiMoreHorizontal } from \"react-icons/fi\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { useCallback, useState, useMemo } from \"react\";\nimport MoveCustomAgentChatModal from \"@/components/modals/MoveCustomAgentChatModal\";\n// PopoverMenu already imported above\nimport { cn, noProp } from \"@/lib/utils\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Button } from \"@opal/components\";\nimport { PopoverSearchInput } from \"@/sections/sidebar/ChatButton\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { SvgFolder, SvgFolderIn, SvgShare, SvgTrash } from \"@opal/icons\";\n// Constants\nconst DEFAULT_PERSONA_ID = 0;\nconst LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY = \"onyx:hideMoveCustomAgentModal\";\n\ninterface ChatSessionMorePopupProps {\n  chatSession: ChatSession;\n  projectId?: number;\n  isRenamingChat: boolean;\n  setIsRenamingChat: (value: boolean) => void;\n  showShareModal?: (chatSession: ChatSession) => void;\n  afterDelete?: () => void;\n  afterMove?: () => void;\n  afterRemoveFromProject?: () => void;\n  search?: boolean;\n  iconSize?: number;\n  isVisible?: boolean;\n}\n\nexport function ChatSessionMorePopup({\n  chatSession,\n  projectId,\n  isRenamingChat: _isRenamingChat,\n  setIsRenamingChat: _setIsRenamingChat,\n  showShareModal,\n  afterDelete,\n  afterMove,\n  afterRemoveFromProject,\n  search,\n  iconSize = 16,\n  isVisible = false,\n}: ChatSessionMorePopupProps) {\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);\n  const { refreshChatSessions, removeSession } = useChatSessions();\n  const { fetchProjects, projects } = useProjectsContext();\n\n  const [pendingMoveProjectId, setPendingMoveProjectId] = useState<\n    number | null\n  >(null);\n  const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =\n    useState(false);\n\n  const isChatUsingDefaultAgent = chatSession.persona_id === DEFAULT_PERSONA_ID;\n\n  const [showMoveOptions, setShowMoveOptions] = useState(false);\n  const [searchTerm, setSearchTerm] = useState(\"\");\n\n  const filteredProjects = projects.filter((project) =>\n    project.name.toLowerCase().includes(searchTerm.toLowerCase())\n  );\n\n  const handlePopoverOpenChange = useCallback((open: boolean) => {\n    setPopoverOpen(open);\n  }, []);\n\n  const handleConfirmDelete = useCallback(\n    async (e: React.MouseEvent<HTMLButtonElement>) => {\n      e.stopPropagation();\n      await deleteChatSession(chatSession.id);\n      removeSession(chatSession.id);\n      await refreshChatSessions();\n      await fetchProjects();\n      setIsDeleteModalOpen(false);\n      setPopoverOpen(false);\n      afterDelete?.();\n    },\n    [\n      chatSession,\n      refreshChatSessions,\n      removeSession,\n      fetchProjects,\n      afterDelete,\n    ]\n  );\n\n  const performMove = useCallback(\n    async (targetProjectId: number) => {\n      await moveChatSessionService(targetProjectId, chatSession.id);\n      await fetchProjects();\n      await refreshChatSessions();\n      setPopoverOpen(false);\n      afterMove?.();\n    },\n    [chatSession.id, fetchProjects, refreshChatSessions, afterMove]\n  );\n\n  const handleMoveChatSession = useCallback(\n    async (item: { id: number; label: string }) => {\n      const targetProjectId = item.id;\n      const hideModal =\n        typeof window !== \"undefined\" &&\n        window.localStorage.getItem(LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY) ===\n          \"true\";\n\n      if (!isChatUsingDefaultAgent && !hideModal) {\n        setPendingMoveProjectId(targetProjectId);\n        setShowMoveCustomAgentModal(true);\n        return;\n      }\n\n      await performMove(targetProjectId);\n    },\n    [isChatUsingDefaultAgent, performMove]\n  );\n\n  const handleRemoveChatSessionFromProject = useCallback(async () => {\n    await removeChatSessionFromProjectService(chatSession.id);\n    await fetchProjects();\n    await refreshChatSessions();\n    afterRemoveFromProject?.();\n    setPopoverOpen(false);\n  }, [\n    chatSession.id,\n    fetchProjects,\n    refreshChatSessions,\n    removeChatSessionFromProjectService,\n    afterRemoveFromProject,\n  ]);\n\n  // Build popover items similar to AppSidebar (no rename here)\n  const popoverItems = useMemo(() => {\n    if (!showMoveOptions) {\n      return [\n        showShareModal && (\n          <LineItem\n            key=\"share\"\n            icon={SvgShare}\n            onClick={noProp(() => showShareModal(chatSession))}\n          >\n            Share\n          </LineItem>\n        ),\n        <LineItem\n          key=\"move\"\n          icon={SvgFolderIn}\n          onClick={noProp(() => setShowMoveOptions(true))}\n        >\n          Move to Project\n        </LineItem>,\n        projectId && (\n          <LineItem\n            key=\"remove\"\n            icon={SvgFolder}\n            onClick={noProp(() => handleRemoveChatSessionFromProject())}\n          >\n            {`Remove from ${\n              projects.find((p) => p.id === projectId)?.name ?? \"Project\"\n            }`}\n          </LineItem>\n        ),\n        null,\n        <LineItem\n          key=\"delete\"\n          icon={SvgTrash}\n          onClick={noProp(() => setIsDeleteModalOpen(true))}\n          danger\n        >\n          Delete\n        </LineItem>,\n      ];\n    }\n    return [\n      <PopoverSearchInput\n        key=\"search\"\n        setShowMoveOptions={setShowMoveOptions}\n        onSearch={setSearchTerm}\n      />,\n      ...filteredProjects\n        .filter((candidate) => candidate.id !== projectId)\n        .map((target) => (\n          <LineItem\n            key={target.id}\n            icon={SvgFolder}\n            onClick={noProp(() =>\n              handleMoveChatSession({ id: target.id, label: target.name })\n            )}\n          >\n            {target.name}\n          </LineItem>\n        )),\n    ];\n  }, [\n    showMoveOptions,\n    showShareModal,\n    projects,\n    projectId,\n    filteredProjects,\n    chatSession,\n    setShowMoveOptions,\n    setSearchTerm,\n    handleMoveChatSession,\n    handleRemoveChatSessionFromProject,\n  ]);\n\n  return (\n    <div>\n      <div className=\"-my-1\">\n        <Popover open={popoverOpen} onOpenChange={handlePopoverOpenChange}>\n          <Popover.Trigger\n            asChild\n            onClick={(event) => {\n              event.preventDefault();\n              event.stopPropagation();\n              handlePopoverOpenChange(!popoverOpen);\n            }}\n          >\n            <div\n              className={cn(\n                \"p-1 rounded cursor-pointer select-none transition-opacity duration-150\",\n                isVisible || popoverOpen\n                  ? \"opacity-100 pointer-events-auto\"\n                  : \"opacity-0 pointer-events-none\"\n              )}\n            >\n              <FiMoreHorizontal size={iconSize} />\n            </div>\n          </Popover.Trigger>\n          <Popover.Content\n            align=\"end\"\n            side=\"right\"\n            avoidCollisions\n            sideOffset={8}\n          >\n            <PopoverMenu>{popoverItems}</PopoverMenu>\n          </Popover.Content>\n        </Popover>\n      </div>\n      {isDeleteModalOpen && (\n        <ConfirmationModalLayout\n          title=\"Delete Chat\"\n          icon={SvgTrash}\n          onClose={() => setIsDeleteModalOpen(false)}\n          submit={\n            <Button variant=\"danger\" onClick={handleConfirmDelete}>\n              Delete\n            </Button>\n          }\n        >\n          Are you sure you want to delete this chat? This action cannot be\n          undone.\n        </ConfirmationModalLayout>\n      )}\n\n      {showMoveCustomAgentModal && (\n        <MoveCustomAgentChatModal\n          onCancel={() => {\n            setShowMoveCustomAgentModal(false);\n            setPendingMoveProjectId(null);\n          }}\n          onConfirm={async (doNotShowAgain: boolean) => {\n            if (doNotShowAgain && typeof window !== \"undefined\") {\n              window.localStorage.setItem(\n                LS_HIDE_MOVE_CUSTOM_AGENT_MODAL_KEY,\n                \"true\"\n              );\n            }\n            const target = pendingMoveProjectId;\n            setShowMoveCustomAgentModal(false);\n            setPendingMoveProjectId(null);\n            if (target != null) {\n              await performMove(target);\n            }\n          }}\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/sidebar/types.ts",
    "content": "export type pageType = \"search\" | \"chat\" | \"agents\" | \"admin\" | \"shared\";\n"
  },
  {
    "path": "web/src/components/spinner.css",
    "content": ".loader {\n  border-top-color: #2876aa;\n  -webkit-animation: spinner 1.5s linear infinite;\n  animation: spinner 1.5s linear infinite;\n}\n\n@-webkit-keyframes spinner {\n  0% {\n    -webkit-transform: rotate(0deg);\n  }\n  100% {\n    -webkit-transform: rotate(360deg);\n  }\n}\n\n@keyframes spinner {\n  0% {\n    transform: rotate(0deg);\n  }\n  100% {\n    transform: rotate(360deg);\n  }\n}\n"
  },
  {
    "path": "web/src/components/standardAnswers/StandardAnswerCategoryDropdown.tsx",
    "content": "import { FC } from \"react\";\nimport { StandardAnswerCategoryResponse } from \"./getStandardAnswerCategoriesIfEE\";\nimport { Label } from \"@/components/Field\";\nimport MultiSelectDropdown from \"../MultiSelectDropdown\";\nimport { StandardAnswerCategory } from \"@/lib/types\";\nimport { ErrorCallout } from \"../ErrorCallout\";\nimport { LoadingAnimation } from \"../Loading\";\n\ninterface StandardAnswerCategoryDropdownFieldProps {\n  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;\n  categories: StandardAnswerCategory[];\n  setCategories: (categories: StandardAnswerCategory[]) => void;\n}\n\nexport const StandardAnswerCategoryDropdownField: FC<\n  StandardAnswerCategoryDropdownFieldProps\n> = ({ standardAnswerCategoryResponse, categories, setCategories }) => {\n  if (!standardAnswerCategoryResponse.paidEnterpriseFeaturesEnabled) {\n    return null;\n  }\n\n  if (standardAnswerCategoryResponse.error != null) {\n    return (\n      <ErrorCallout\n        errorTitle=\"Something went wrong :(\"\n        errorMsg={`Failed to fetch standard answer categories - ${standardAnswerCategoryResponse.error.message}`}\n      />\n    );\n  }\n\n  if (standardAnswerCategoryResponse.categories == null) {\n    return <LoadingAnimation />;\n  }\n\n  return (\n    <>\n      <div>\n        <Label>Standard Answer Categories</Label>\n        <div className=\"w-64\">\n          <MultiSelectDropdown\n            name=\"standard_answer_categories\"\n            label=\"\"\n            onChange={(selectedOptions) => {\n              const selectedCategories = selectedOptions.map((option) => {\n                return {\n                  id: Number(option.value),\n                  name: option.label,\n                };\n              });\n              setCategories(selectedCategories);\n            }}\n            creatable={false}\n            options={standardAnswerCategoryResponse.categories.map(\n              (category) => ({\n                label: category.name,\n                value: category.id.toString(),\n              })\n            )}\n            initialSelectedOptions={categories.map((category) => ({\n              label: category.name,\n              value: category.id.toString(),\n            }))}\n          />\n        </div>\n      </div>\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/components/standardAnswers/getStandardAnswerCategoriesIfEE.tsx",
    "content": "import { SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED } from \"@/lib/constants\";\nimport { StandardAnswerCategory } from \"@/lib/types\";\nimport { fetchSS } from \"@/lib/utilsSS\";\n\nexport type StandardAnswerCategoryResponse =\n  | EEStandardAnswerCategoryResponse\n  | NoEEAvailable;\n\ninterface NoEEAvailable {\n  paidEnterpriseFeaturesEnabled: false;\n}\n\ninterface EEStandardAnswerCategoryResponse {\n  paidEnterpriseFeaturesEnabled: true;\n  error?: {\n    message: string;\n  };\n  categories?: StandardAnswerCategory[];\n}\n\nexport async function getStandardAnswerCategoriesIfEE(): Promise<StandardAnswerCategoryResponse> {\n  if (!SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {\n    return {\n      paidEnterpriseFeaturesEnabled: false,\n    };\n  }\n\n  const standardAnswerCategoriesResponse = await fetchSS(\n    \"/manage/admin/standard-answer/category\"\n  );\n  if (!standardAnswerCategoriesResponse.ok) {\n    return {\n      paidEnterpriseFeaturesEnabled: true,\n      error: {\n        message: await standardAnswerCategoriesResponse.text(),\n      },\n    };\n  }\n\n  const categories =\n    (await standardAnswerCategoriesResponse.json()) as StandardAnswerCategory[];\n\n  return {\n    paidEnterpriseFeaturesEnabled: true,\n    categories,\n  };\n}\n"
  },
  {
    "path": "web/src/components/table/DragHandle.tsx",
    "content": "import React from \"react\";\nimport { MdDragIndicator } from \"react-icons/md\";\n\ninterface DragHandleProps extends React.HTMLAttributes<HTMLDivElement> {\n  isDragging?: boolean;\n  size?: number;\n}\n\nexport const DragHandle: React.FC<DragHandleProps> = ({\n  isDragging,\n  size = 16,\n  ...props\n}) => {\n  return (\n    <div\n      className={`flex items-center justify-center ${\n        isDragging ? \"cursor-grabbing\" : \"cursor-grab\"\n      }`}\n      {...props}\n    >\n      <MdDragIndicator size={size} />\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/table/DraggableRow.tsx",
    "content": "import { useSortable } from \"@dnd-kit/sortable\";\nimport { TableCell, TableRow } from \"@/components/ui/table\";\nimport { CSS } from \"@dnd-kit/utilities\";\nimport { DragHandle } from \"./DragHandle\";\nimport { Row } from \"./interfaces\";\n\nexport function DraggableRow({\n  row,\n  isAdmin = true,\n  isDragOverlay = false,\n}: {\n  row: Row;\n  isAdmin?: boolean;\n  isDragOverlay?: boolean;\n}) {\n  const {\n    attributes,\n    listeners,\n    transform,\n    transition,\n    setNodeRef,\n    isDragging,\n  } = useSortable({\n    id: row.id,\n    disabled: isDragOverlay,\n  });\n\n  const style = {\n    transform: CSS.Transform.toString(transform),\n    transition,\n  };\n\n  return (\n    <TableRow\n      ref={setNodeRef}\n      style={isDragOverlay ? undefined : style}\n      className={isDragging && !isDragOverlay ? \"opacity-0\" : \"\"}\n    >\n      <TableCell>\n        {isAdmin && <DragHandle isDragging={isDragging} {...listeners} />}\n      </TableCell>\n      {row.cells.map((cell, index) => (\n        <TableCell key={index}>{cell}</TableCell>\n      ))}\n    </TableRow>\n  );\n}\n"
  },
  {
    "path": "web/src/components/table/DraggableTable.tsx",
    "content": "import {\n  Table,\n  TableHead,\n  TableRow,\n  TableHeader,\n  TableBody,\n} from \"@/components/ui/table\";\nimport React, { useMemo, useState, JSX } from \"react\";\nimport {\n  closestCenter,\n  DndContext,\n  DragEndEvent,\n  DragOverlay,\n  DragStartEvent,\n  KeyboardSensor,\n  MouseSensor,\n  TouchSensor,\n  UniqueIdentifier,\n  useSensor,\n  useSensors,\n} from \"@dnd-kit/core\";\nimport { restrictToVerticalAxis } from \"@dnd-kit/modifiers\";\nimport {\n  arrayMove,\n  SortableContext,\n  verticalListSortingStrategy,\n} from \"@dnd-kit/sortable\";\nimport { DraggableRow } from \"./DraggableRow\";\nimport { Row } from \"./interfaces\";\n\nexport function DraggableTable({\n  headers,\n  rows,\n  setRows,\n  isAdmin,\n}: {\n  headers: (string | JSX.Element | null)[];\n  rows: Row[];\n  setRows: (newRows: UniqueIdentifier[]) => void | Promise<void>;\n  isAdmin: boolean;\n}) {\n  const [activeId, setActiveId] = useState<UniqueIdentifier | null>();\n  const items = useMemo(() => rows?.map(({ id }) => id), [rows]);\n  const sensors = useSensors(\n    useSensor(MouseSensor, {\n      activationConstraint: {\n        distance: 5,\n      },\n    }),\n    useSensor(TouchSensor, {\n      activationConstraint: {\n        delay: 250,\n        tolerance: 5,\n      },\n    }),\n    useSensor(KeyboardSensor, {})\n  );\n\n  function handleDragStart(event: DragStartEvent) {\n    if (isAdmin) {\n      setActiveId(event.active.id);\n    }\n  }\n\n  function handleDragEnd(event: DragEndEvent) {\n    if (isAdmin) {\n      const { active, over } = event;\n      if (over !== null && active.id !== over.id) {\n        const oldIndex = items.indexOf(active.id);\n        const newIndex = items.indexOf(over.id);\n        setRows(arrayMove(rows, oldIndex, newIndex).map((row) => row.id));\n      }\n    }\n    setActiveId(null);\n  }\n\n  function handleDragCancel() {\n    setActiveId(null);\n  }\n\n  const selectedRow = useMemo(() => {\n    if (activeId === null || activeId === undefined) {\n      return null;\n    }\n    const row = rows.find(({ id }) => id === activeId);\n    return row;\n  }, [activeId, rows]);\n\n  return (\n    <DndContext\n      sensors={sensors}\n      onDragEnd={handleDragEnd}\n      onDragStart={handleDragStart}\n      onDragCancel={handleDragCancel}\n      collisionDetection={closestCenter}\n      modifiers={[restrictToVerticalAxis]}\n    >\n      <Table>\n        <TableHeader>\n          <TableRow>\n            <TableHead></TableHead>\n            {headers.map((header, ind) => (\n              <TableHead key={ind}>{header}</TableHead>\n            ))}\n          </TableRow>\n        </TableHeader>\n\n        <TableBody>\n          <SortableContext items={items} strategy={verticalListSortingStrategy}>\n            {rows.map((row) => (\n              <DraggableRow key={row.id} row={row} isAdmin={isAdmin} />\n            ))}\n          </SortableContext>\n        </TableBody>\n      </Table>\n\n      {isAdmin && (\n        <DragOverlay>\n          {selectedRow && (\n            <Table>\n              <TableBody>\n                <DraggableRow\n                  row={selectedRow}\n                  isAdmin={isAdmin}\n                  isDragOverlay\n                />\n              </TableBody>\n            </Table>\n          )}\n        </DragOverlay>\n      )}\n    </DndContext>\n  );\n}\n"
  },
  {
    "path": "web/src/components/table/interfaces.ts",
    "content": "import { JSX } from \"react\";\nimport { UniqueIdentifier } from \"@dnd-kit/core\";\n\nexport interface Row {\n  id: UniqueIdentifier;\n  cells: (JSX.Element | string)[];\n  staticModifiers?: [number, string][];\n}\n"
  },
  {
    "path": "web/src/components/theme/ThemeProvider.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { ThemeProvider as NextThemesProvider } from \"next-themes\";\n\nexport function ThemeProvider({\n  children,\n  ...props\n}: React.ComponentProps<typeof NextThemesProvider>) {\n  return <NextThemesProvider {...props}>{children}</NextThemesProvider>;\n}\n"
  },
  {
    "path": "web/src/components/tools/CSVContent.tsx",
    "content": "// CsvContent\nimport React, { useState, useEffect } from \"react\";\nimport {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport { ContentComponentProps } from \"./ExpandableContentWrapper\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { SvgAlertCircle } from \"@opal/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\n\nconst CsvContent: React.FC<ContentComponentProps> = ({\n  fileDescriptor,\n  expanded = false,\n}) => {\n  const [data, setData] = useState<Record<string, string>[]>([]);\n  const [headers, setHeaders] = useState<string[]>([]);\n  const [isFetching, setIsFetching] = useState(true);\n\n  // Cache parsed CSV across mounts so closing other modals doesn't force a refetch.\n  // Keyed by file id; safe because chat file ids are unique.\n  const cacheKey = fileDescriptor.id;\n  const cached = csvCache.get(cacheKey);\n\n  useEffect(() => {\n    if (cached) {\n      setHeaders(cached.headers);\n      setData(cached.data);\n      setIsFetching(false);\n      return;\n    }\n\n    fetchCSV(fileDescriptor.id);\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [fileDescriptor.id]);\n\n  const fetchCSV = async (id: string) => {\n    setIsFetching(true);\n    try {\n      const response = await fetch(`/api/chat/file/${id}`, {\n        cache: \"force-cache\",\n      });\n      if (!response.ok) {\n        throw new Error(\"Failed to fetch CSV file\");\n      }\n\n      const contentLength = response.headers.get(\"Content-Length\");\n      const fileSizeInMB = contentLength\n        ? parseInt(contentLength) / (1024 * 1024)\n        : 0;\n      const MAX_FILE_SIZE_MB = 5;\n\n      if (fileSizeInMB > MAX_FILE_SIZE_MB) {\n        throw new Error(\"File size exceeds the maximum limit of 5MB\");\n      }\n\n      const csvData = await response.text();\n      const rows = parseCSV(csvData.trim());\n      const firstRow = rows[0];\n      if (!firstRow) {\n        throw new Error(\"CSV file is empty\");\n      }\n      const parsedHeaders = firstRow;\n      setHeaders(parsedHeaders);\n\n      const parsedData: Record<string, string>[] = rows\n        .slice(1)\n        .map((fields) => {\n          return parsedHeaders.reduce<Record<string, string>>(\n            (obj, header, index) => {\n              const val = fields[index];\n              if (val !== undefined) {\n                obj[header] = val;\n              }\n              return obj;\n            },\n            {}\n          );\n        });\n      setData(parsedData);\n      csvCache.set(id, { headers: parsedHeaders, data: parsedData });\n    } catch (error) {\n      console.error(\"Error fetching CSV file:\", error);\n      setData([]);\n      setHeaders([]);\n    } finally {\n      setIsFetching(false);\n    }\n  };\n\n  if (isFetching) {\n    return (\n      <div className=\"flex items-center justify-center h-[300px]\">\n        <SimpleLoader />\n      </div>\n    );\n  }\n\n  return (\n    <div\n      className={cn(\n        \"flex relative overflow-auto\",\n        expanded ? \"max-h-[600px]\" : \"max-h-[300px]\"\n      )}\n    >\n      <Table>\n        <TableHeader className=\"sticky top-0 z-sticky\">\n          <TableRow className=\"bg-background-tint-01\">\n            {headers.map((header, index) => (\n              <TableHead key={index}>\n                <Text\n                  as=\"p\"\n                  className=\"line-clamp-2 font-medium\"\n                  text03\n                  mainUiBody\n                >\n                  {header}\n                </Text>\n              </TableHead>\n            ))}\n          </TableRow>\n        </TableHeader>\n\n        <TableBody>\n          {data.length > 0 ? (\n            data.map((row, rowIndex) => (\n              <TableRow key={rowIndex}>\n                {headers.map((header, cellIndex) => (\n                  <TableCell\n                    className={cn(\n                      cellIndex === 0 && \"sticky left-0 bg-background-tint-01\",\n                      \"py-0 px-4\"\n                    )}\n                    key={cellIndex}\n                  >\n                    {row[header]}\n                  </TableCell>\n                ))}\n              </TableRow>\n            ))\n          ) : (\n            <TableRow>\n              <TableCell colSpan={headers.length} className=\"text-center py-8\">\n                <div className=\"flex flex-col items-center justify-center space-y-2\">\n                  <SvgAlertCircle className=\"w-8 h-8 stroke-error\" />\n                  <Text as=\"p\" text03 mainUiBody>\n                    {headers.length === 0\n                      ? \"Error loading CSV\"\n                      : \"No data available\"}\n                  </Text>\n                  <Text as=\"p\" text04 mainUiBody>\n                    {headers.length === 0\n                      ? \"The CSV file may be too large or couldn't be loaded properly.\"\n                      : \"\"}\n                  </Text>\n                </div>\n              </TableCell>\n            </TableRow>\n          )}\n        </TableBody>\n      </Table>\n    </div>\n  );\n};\n\nexport default CsvContent;\n\nconst csvCache = new Map<\n  string,\n  { headers: string[]; data: Record<string, string>[] }\n>();\n\nexport function parseCSV(text: string): string[][] {\n  const rows: string[][] = [];\n  let field = \"\";\n  let fields: string[] = [];\n  let inQuotes = false;\n\n  for (let i = 0; i < text.length; i++) {\n    const char = text[i];\n\n    if (inQuotes) {\n      if (char === '\"') {\n        if (i + 1 < text.length && text[i + 1] === '\"') {\n          field += '\"';\n          i++;\n        } else {\n          inQuotes = false;\n        }\n      } else {\n        field += char;\n      }\n    } else if (char === '\"') {\n      inQuotes = true;\n    } else if (char === \",\") {\n      fields.push(field);\n      field = \"\";\n    } else if (char === \"\\n\" || char === \"\\r\") {\n      if (char === \"\\r\" && i + 1 < text.length && text[i + 1] === \"\\n\") {\n        i++;\n      }\n      fields.push(field);\n      field = \"\";\n      rows.push(fields);\n      fields = [];\n    } else {\n      field += char;\n    }\n  }\n\n  if (inQuotes) {\n    throw new Error(\"Malformed CSV: unterminated quoted field\");\n  }\n\n  if (field.length > 0 || fields.length > 0) {\n    fields.push(field);\n    rows.push(fields);\n  }\n\n  return rows;\n}\n"
  },
  {
    "path": "web/src/components/tools/ExpandableContentWrapper.tsx",
    "content": "// ExpandableContentWrapper\nimport React, { useState } from \"react\";\nimport { SvgDownloadCloud, SvgFold, SvgMaximize2, SvgX } from \"@opal/icons\";\nimport { Card, CardHeader, CardTitle, CardContent } from \"@/components/ui/card\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { FileDescriptor } from \"@/app/app/interfaces\";\nimport { cn } from \"@/lib/utils\";\nimport PreviewModal from \"@/sections/modals/PreviewModal\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\n\nexport interface ExpandableContentWrapperProps {\n  fileDescriptor: FileDescriptor;\n  close: () => void;\n  ContentComponent: React.ComponentType<ContentComponentProps>;\n}\n\nexport interface ContentComponentProps {\n  fileDescriptor: FileDescriptor;\n  expanded?: boolean;\n}\n\nexport default function ExpandableContentWrapper({\n  fileDescriptor,\n  close,\n  ContentComponent,\n}: ExpandableContentWrapperProps) {\n  const [expanded, setExpanded] = useState(false);\n\n  const toggleExpand = () => setExpanded((prev) => !prev);\n\n  const downloadFile = () => {\n    const a = document.createElement(\"a\");\n    a.href = `api/chat/file/${fileDescriptor.id}`;\n    a.download = fileDescriptor.name || \"download.csv\";\n    a.setAttribute(\"download\", fileDescriptor.name || \"download.csv\");\n    document.body.appendChild(a);\n    a.click();\n    document.body.removeChild(a);\n  };\n\n  const Content = (\n    <div className=\"w-message-default max-w-full !rounded-lg overflow-y-hidden h-full\">\n      <CardHeader className=\"w-full bg-background-tint-02 top-0 p-3\">\n        <div className=\"flex justify-between items-center\">\n          <Text className=\"text-ellipsis line-clamp-1\" text03 mainUiAction>\n            {fileDescriptor.name || \"Untitled\"}\n          </Text>\n          <div className=\"flex flex-row items-center justify-end gap-1\">\n            <Button\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={downloadFile}\n              icon={SvgDownloadCloud}\n              tooltip=\"Download file\"\n            />\n            <Button\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={toggleExpand}\n              icon={expanded ? SvgFold : SvgMaximize2}\n              tooltip={expanded ? \"Minimize\" : \"Full screen\"}\n            />\n            <Button\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={close}\n              icon={SvgX}\n              tooltip=\"Hide\"\n            />\n          </div>\n        </div>\n      </CardHeader>\n      <Card\n        className={cn(\n          \"!rounded-none p-0 relative mx-auto w-full\",\n          expanded ? \"max-h-[600px]\" : \"max-h-[300px] h-full\"\n        )}\n      >\n        <CardContent className=\"p-0\">\n          <ContentComponent\n            fileDescriptor={fileDescriptor}\n            expanded={expanded}\n          />\n        </CardContent>\n      </Card>\n    </div>\n  );\n\n  const presentingDocument: MinimalOnyxDocument = {\n    document_id: fileDescriptor.id,\n    semantic_identifier: fileDescriptor.name ?? null,\n  };\n\n  return (\n    <>\n      {expanded && (\n        <PreviewModal\n          presentingDocument={presentingDocument}\n          onClose={() => setExpanded(false)}\n        />\n      )}\n      {!expanded && Content}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/components/tools/parseCSV.test.ts",
    "content": "import { parseCSV } from \"./CSVContent\";\n\ndescribe(\"parseCSV\", () => {\n  it(\"parses simple comma-separated rows\", () => {\n    expect(parseCSV(\"a,b,c\\n1,2,3\")).toEqual([\n      [\"a\", \"b\", \"c\"],\n      [\"1\", \"2\", \"3\"],\n    ]);\n  });\n\n  it(\"preserves commas inside quoted fields\", () => {\n    expect(parseCSV('name,address\\nAlice,\"123 Main St, Apt 4\"')).toEqual([\n      [\"name\", \"address\"],\n      [\"Alice\", \"123 Main St, Apt 4\"],\n    ]);\n  });\n\n  it(\"handles escaped double quotes inside quoted fields\", () => {\n    expect(parseCSV('a,b\\n\"say \"\"hello\"\"\",world')).toEqual([\n      [\"a\", \"b\"],\n      ['say \"hello\"', \"world\"],\n    ]);\n  });\n\n  it(\"handles newlines inside quoted fields\", () => {\n    expect(parseCSV('a,b\\n\"line1\\nline2\",val')).toEqual([\n      [\"a\", \"b\"],\n      [\"line1\\nline2\", \"val\"],\n    ]);\n  });\n\n  it(\"handles CRLF line endings\", () => {\n    expect(parseCSV(\"a,b\\r\\n1,2\\r\\n3,4\")).toEqual([\n      [\"a\", \"b\"],\n      [\"1\", \"2\"],\n      [\"3\", \"4\"],\n    ]);\n  });\n\n  it(\"handles empty fields\", () => {\n    expect(parseCSV(\"a,b,c\\n1,,3\")).toEqual([\n      [\"a\", \"b\", \"c\"],\n      [\"1\", \"\", \"3\"],\n    ]);\n  });\n\n  it(\"handles a single element\", () => {\n    expect(parseCSV(\"a\")).toEqual([[\"a\"]]);\n  });\n\n  it(\"handles a single row with no newline\", () => {\n    expect(parseCSV(\"a,b,c\")).toEqual([[\"a\", \"b\", \"c\"]]);\n  });\n\n  it(\"handles quoted fields that are entirely empty\", () => {\n    expect(parseCSV('a,b\\n\"\",val')).toEqual([\n      [\"a\", \"b\"],\n      [\"\", \"val\"],\n    ]);\n  });\n\n  it(\"handles multiple quoted fields with commas\", () => {\n    expect(parseCSV('\"foo, bar\",\"baz, qux\"\\n\"1, 2\",\"3, 4\"')).toEqual([\n      [\"foo, bar\", \"baz, qux\"],\n      [\"1, 2\", \"3, 4\"],\n    ]);\n  });\n\n  it(\"throws on unterminated quoted field\", () => {\n    expect(() => parseCSV('a,b\\n\"foo,bar')).toThrow(\n      \"Malformed CSV: unterminated quoted field\"\n    );\n  });\n\n  it(\"throws on unterminated quote at end of input\", () => {\n    expect(() => parseCSV('\"unterminated')).toThrow(\n      \"Malformed CSV: unterminated quoted field\"\n    );\n  });\n\n  it(\"returns empty array for empty input\", () => {\n    expect(parseCSV(\"\")).toEqual([]);\n  });\n});\n"
  },
  {
    "path": "web/src/components/tooltip/CustomTooltip.tsx",
    "content": "import React, {\n  ReactNode,\n  useState,\n  useEffect,\n  useRef,\n  createContext,\n  useContext,\n  JSX,\n} from \"react\";\nimport { createPortal } from \"react-dom\";\nimport { cn } from \"@/lib/utils\";\n\n// Create a context for the tooltip group\nconst TooltipGroupContext = createContext<{\n  setGroupHovered: React.Dispatch<React.SetStateAction<boolean>>;\n  groupHovered: boolean;\n  hoverCountRef: React.MutableRefObject<boolean>;\n}>({\n  setGroupHovered: () => {},\n  groupHovered: false,\n  hoverCountRef: { current: false },\n});\n\nexport const TooltipGroup: React.FC<{\n  children: React.ReactNode;\n  gap?: string;\n}> = ({ children, gap }) => {\n  const [groupHovered, setGroupHovered] = useState(false);\n  const hoverCountRef = useRef(false);\n\n  return (\n    <TooltipGroupContext.Provider\n      value={{ groupHovered, setGroupHovered, hoverCountRef }}\n    >\n      <div className={cn(\"inline-flex\", gap)}>{children}</div>\n    </TooltipGroupContext.Provider>\n  );\n};\n\nexport const CustomTooltip = ({\n  content,\n  children,\n  large,\n  light,\n  citation,\n  line,\n  medium,\n  wrap,\n  showTick = false,\n  delay = 300,\n  position = \"bottom\",\n  disabled = false,\n  className,\n}: {\n  medium?: boolean;\n  content: string | ReactNode;\n  children: JSX.Element;\n  large?: boolean;\n  line?: boolean;\n  light?: boolean;\n  showTick?: boolean;\n  delay?: number;\n  wrap?: boolean;\n  citation?: boolean;\n  position?: \"top\" | \"bottom\";\n  disabled?: boolean;\n  className?: string;\n}) => {\n  const [isVisible, setIsVisible] = useState(false);\n  const [tooltipPosition, setTooltipPosition] = useState({ top: 0, left: 0 });\n  const timeoutRef = useRef<NodeJS.Timeout | null>(null);\n  const triggerRef = useRef<HTMLSpanElement>(null);\n\n  const { groupHovered, setGroupHovered, hoverCountRef } =\n    useContext(TooltipGroupContext);\n\n  const showTooltip = () => {\n    hoverCountRef.current = true;\n\n    const showDelay = groupHovered ? 0 : delay;\n    timeoutRef.current = setTimeout(() => {\n      setIsVisible(true);\n      setGroupHovered(true);\n      updateTooltipPosition();\n    }, showDelay);\n  };\n\n  const hideTooltip = () => {\n    if (timeoutRef.current) {\n      clearTimeout(timeoutRef.current);\n    }\n    hoverCountRef.current = false;\n    setIsVisible(false);\n    setTimeout(() => {\n      if (!hoverCountRef.current) {\n        setGroupHovered(false);\n      }\n    }, 100);\n  };\n\n  const updateTooltipPosition = () => {\n    if (triggerRef.current) {\n      const rect = triggerRef.current.getBoundingClientRect();\n      const scrollX = window.scrollX || window.pageXOffset;\n      const scrollY = window.scrollY || window.pageYOffset;\n\n      setTooltipPosition({\n        top: (position === \"top\" ? rect.top - 10 : rect.bottom + 10) + scrollY,\n        left: rect.left + rect.width / 2 + scrollX,\n      });\n    }\n  };\n\n  useEffect(() => {\n    return () => {\n      if (timeoutRef.current) {\n        clearTimeout(timeoutRef.current);\n      }\n    };\n  }, []);\n\n  return (\n    <>\n      <span\n        ref={triggerRef}\n        className={cn(\"relative inline-block\", className)}\n        onMouseEnter={showTooltip}\n        onMouseLeave={hideTooltip}\n        onMouseDown={hideTooltip}\n        onClick={hideTooltip}\n      >\n        {children}\n      </span>\n      {isVisible &&\n        !disabled &&\n        createPortal(\n          <div\n            className={cn(\n              \"fixed z-[1000] overflow-hidden rounded-md text-neutral-50\",\n              \"transform -translate-x-1/2 text-xs\",\n              \"px-2 py-1.5 shadow-md animate-in fade-in-0 zoom-in-95\",\n              \"data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95\",\n              \"data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2\",\n              \"data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2\",\n              citation ? \"max-w-[350px]\" : \"max-w-40\",\n              large ? (medium ? \"w-88\" : \"w-96\") : line && \"max-w-64 w-auto\",\n              light\n                ? \"bg-neutral-200 dark:bg-neutral-800 text-neutral-900 dark:text-neutral-50\"\n                : \"bg-neutral-900 dark:bg-neutral-200 text-neutral-50 dark:text-neutral-900\",\n              className\n            )}\n            style={{\n              top: `${tooltipPosition.top}px`,\n              left: `${tooltipPosition.left}px`,\n            }}\n          >\n            {showTick && (\n              <div\n                className={cn(\n                  \"absolute w-2 h-2 left-1/2 transform -translate-x-1/2 rotate-45\",\n                  position === \"top\" ? \"bottom-1\" : \"-top-1\",\n                  light\n                    ? \"bg-neutral-200 dark:bg-neutral-800\"\n                    : \"bg-neutral-900 dark:bg-neutral-200\"\n                )}\n              />\n            )}\n            <div\n              className={cn(\n                \"flex-wrap relative p-0\",\n                wrap && \"w-full\",\n                !line && \"flex\"\n              )}\n              style={\n                line || wrap\n                  ? {\n                      whiteSpace: wrap ? \"normal\" : \"nowrap\",\n                      overflow: \"hidden\",\n                      textOverflow: \"ellipsis\",\n                    }\n                  : {}\n              }\n            >\n              {content}\n            </div>\n          </div>,\n          document.body\n        )}\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/components/ui/RadioGroupItemField.tsx",
    "content": "import React from \"react\";\nimport { RadioGroupItem } from \"@/components/ui/radio-group\";\n\ninterface RadioGroupItemFieldProps {\n  value: string;\n  id: string;\n  label: string;\n  sublabel?: string;\n}\n\nexport const RadioGroupItemField: React.FC<RadioGroupItemFieldProps> = ({\n  value,\n  id,\n  label,\n  sublabel,\n}) => {\n  const handleClick = () => {\n    const radio = document.getElementById(id) as HTMLInputElement;\n    if (radio) {\n      radio.checked = true;\n      radio.dispatchEvent(new Event(\"change\", { bubbles: true }));\n    }\n  };\n\n  return (\n    <div className=\"flex items-start space-x-2\">\n      <RadioGroupItem value={value} id={id} className=\"mt-1\" />\n      <div className=\"flex flex-col\">\n        <label\n          htmlFor={id}\n          className=\"flex flex-col cursor-pointer\"\n          onClick={handleClick}\n        >\n          <span className=\"text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70\">\n            {label}\n          </span>\n          {sublabel && (\n            <span className=\"text-sm text-muted-foreground mt-1\">\n              {sublabel}\n            </span>\n          )}\n        </label>\n      </div>\n    </div>\n  );\n};\n"
  },
  {
    "path": "web/src/components/ui/accordion.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as AccordionPrimitive from \"@radix-ui/react-accordion\";\nimport { ChevronDown } from \"lucide-react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Accordion = AccordionPrimitive.Root;\n\nconst AccordionItem = React.forwardRef<\n  React.ElementRef<typeof AccordionPrimitive.Item>,\n  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Item>\n>(({ className, ...props }, ref) => (\n  <AccordionPrimitive.Item\n    ref={ref}\n    className={cn(\n      \"border-b border-neutral-200 dark:border-neutral-600\",\n      className\n    )}\n    {...props}\n  />\n));\nAccordionItem.displayName = \"AccordionItem\";\n\nconst AccordionTrigger = React.forwardRef<\n  React.ElementRef<typeof AccordionPrimitive.Trigger>,\n  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Trigger>\n>(({ className, children, ...props }, ref) => (\n  <AccordionPrimitive.Header className=\"flex\">\n    <AccordionPrimitive.Trigger\n      ref={ref}\n      className={cn(\n        \"flex flex-1 text-base items-center text-text justify-between pb-4 font-medium transition-all hover:underline [&[data-state=open]>svg]:rotate-180\",\n        className\n      )}\n      {...props}\n    >\n      {children}\n      <ChevronDown className=\"h-4 w-4 shrink-0 transition-transform duration-200\" />\n    </AccordionPrimitive.Trigger>\n  </AccordionPrimitive.Header>\n));\nAccordionTrigger.displayName = \"AccordionTrigger\";\n\nconst AccordionContent = React.forwardRef<\n  React.ElementRef<typeof AccordionPrimitive.Content>,\n  React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Content>\n>(({ className, children, ...props }, ref) => (\n  <AccordionPrimitive.Content\n    ref={ref}\n    className={cn(\n      \"overflow-hidden text-sm transition-all data-[state=closed]:animate-accordion-up data-[state=open]:animate-accordion-down\",\n      className\n    )}\n    {...props}\n  >\n    <div className=\"pb-1 pt-0\">{children}</div>\n  </AccordionPrimitive.Content>\n));\nAccordionContent.displayName = \"AccordionContent\";\n\nexport { Accordion, AccordionItem, AccordionTrigger, AccordionContent };\n"
  },
  {
    "path": "web/src/components/ui/alert.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst alertVariants = cva(\n  \"relative w-full rounded-lg border border-background-200 p-4 [&>svg~*]:pl-7 [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-neutral-950 dark:border-background-800 dark:[&>svg]:text-neutral-50\",\n  {\n    variants: {\n      variant: {\n        broken:\n          \"border-red-500/50 text-red-500 dark:border-red-500 [&>svg]:text-red-500 dark:border-red-900/50 dark:text-red-100 dark:dark:border-red-900 dark:[&>svg]:text-red-700 bg-red-50 dark:bg-red-950\",\n        ark: \"border-amber-500/50 text-amber-500 dark:border-amber-500 [&>svg]:text-amber-500 dark:border-amber-900/50 dark:text-amber-900 dark:dark:border-amber-900 dark:[&>svg]:text-amber-900 bg-amber-50 dark:bg-amber-950\",\n        info: \"border-[#fff]/50 dark:border-[#fff] dark:border-[#fff]/50 dark:dark:border-[#fff]\",\n        default:\n          \"bg-neutral-50 text-neutral-darker dark:bg-neutral-950 dark:text-text\",\n        destructive:\n          \"border-red-500/50 text-red-500 dark:border-red-500 [&>svg]:text-red-500 dark:border-red-900/50 dark:text-red-600 dark:dark:border-red-900 dark:[&>svg]:text-red-900\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  }\n);\n\nconst Alert = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement> & VariantProps<typeof alertVariants>\n>(({ className, variant, ...props }, ref) => (\n  <div\n    ref={ref}\n    role=\"alert\"\n    className={cn(alertVariants({ variant }), className)}\n    {...props}\n  />\n));\nAlert.displayName = \"Alert\";\n\nconst AlertTitle = React.forwardRef<\n  HTMLParagraphElement,\n  React.HTMLAttributes<HTMLHeadingElement>\n>(({ className, ...props }, ref) => (\n  <h5\n    ref={ref}\n    className={cn(\"mb-1 font-medium leading-none tracking-tight\", className)}\n    {...props}\n  />\n));\nAlertTitle.displayName = \"AlertTitle\";\n\nconst AlertDescription = React.forwardRef<\n  HTMLParagraphElement,\n  React.HTMLAttributes<HTMLParagraphElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\"text-sm [&_p]:leading-relaxed\", className)}\n    {...props}\n  />\n));\nAlertDescription.displayName = \"AlertDescription\";\n\nexport { Alert, AlertTitle, AlertDescription };\n"
  },
  {
    "path": "web/src/components/ui/areaChart.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport {\n  Area,\n  AreaChart as ReChartsAreaChart,\n  CartesianGrid,\n  ResponsiveContainer,\n  Tooltip,\n  XAxis,\n  YAxis,\n} from \"recharts\";\n\nimport {\n  Card,\n  CardContent,\n  CardDescription,\n  CardHeader,\n  CardTitle,\n} from \"@/components/ui/card\";\n\ninterface AreaChartProps {\n  data?: any[];\n  categories?: string[];\n  index?: string;\n  colors?: string[];\n  showXAxis?: boolean;\n  showYAxis?: boolean;\n  yAxisWidth?: number;\n  showAnimation?: boolean;\n  showTooltip?: boolean;\n  showGridLines?: boolean;\n  connectNulls?: boolean;\n  allowDecimals?: boolean;\n  className?: string;\n  title?: string;\n  description?: string;\n  xAxisFormatter?: (value: any) => string;\n  yAxisFormatter?: (value: any) => string;\n  stacked?: boolean;\n}\n\nexport function AreaChartDisplay({\n  data = [],\n  categories = [],\n  index,\n  colors = [\"indigo\", \"fuchsia\"],\n  showXAxis = true,\n  showYAxis = true,\n  yAxisWidth = 56,\n  showAnimation = true,\n  showTooltip = true,\n  showGridLines = true,\n  connectNulls = false,\n  allowDecimals = true,\n  className,\n  title,\n  description,\n  xAxisFormatter = (dateStr: string) => dateStr,\n  yAxisFormatter = (number: number) => number.toString(),\n  stacked = false,\n}: AreaChartProps) {\n  return (\n    <Card className={className}>\n      <CardHeader>\n        {title && <CardTitle>{title}</CardTitle>}\n        {description && <CardDescription>{description}</CardDescription>}\n      </CardHeader>\n      <CardContent>\n        <div className=\"h-[350px] w-full\">\n          <ResponsiveContainer width=\"100%\" height=\"100%\">\n            <ReChartsAreaChart\n              data={data}\n              margin={{\n                top: 10,\n                right: 30,\n                left: 0,\n                bottom: 0,\n              }}\n            >\n              {showGridLines && <CartesianGrid strokeDasharray=\"3 3\" />}\n              {showXAxis && (\n                <XAxis\n                  dataKey={index}\n                  tickLine={false}\n                  axisLine={false}\n                  tickMargin={8}\n                  tickFormatter={(value) => xAxisFormatter(value)}\n                />\n              )}\n              {showYAxis && (\n                <YAxis\n                  width={yAxisWidth}\n                  tickLine={false}\n                  axisLine={false}\n                  tickFormatter={(value) => yAxisFormatter(value)}\n                  allowDecimals={allowDecimals}\n                />\n              )}\n              {showTooltip && <Tooltip />}\n              {categories.map((category, ind) => (\n                <Area\n                  key={category}\n                  type=\"monotone\"\n                  dataKey={category}\n                  stackId={stacked ? \"1\" : category}\n                  stroke={colors[ind % colors.length]}\n                  fill={colors[ind % colors.length]}\n                  fillOpacity={0.3}\n                  isAnimationActive={showAnimation}\n                  connectNulls={connectNulls}\n                />\n              ))}\n            </ReChartsAreaChart>\n          </ResponsiveContainer>\n        </div>\n      </CardContent>\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/components/ui/badge.tsx",
    "content": "import * as React from \"react\";\nimport { cva, type VariantProps } from \"class-variance-authority\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { cn } from \"@/lib/utils\";\n\nconst badgeVariants = cva(\n  \"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-normal transition-colors focus:outline-none focus:ring-2 focus:ring-neutral-950 focus:ring-offset-2 dark:focus:ring-neutral-300\",\n  {\n    variants: {\n      variant: {\n        invalid:\n          \"border-orange-200 bg-orange-50 text-orange-600 dark:border-orange-700 dark:bg-orange-900 dark:text-orange-50\",\n        outline:\n          \"border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50\",\n        purple:\n          \"border-purple-200 bg-purple-50 text-purple-700 dark:border-purple-700 dark:bg-purple-900 dark:text-purple-100\",\n        public:\n          \"border-green-200 bg-green-50 text-green-700 dark:border-green-700 dark:bg-green-900 dark:text-green-100\",\n        private:\n          \"border-yellow-200 bg-yellow-50 text-yellow-700 dark:border-yellow-600 dark:bg-yellow-700 dark:text-yellow-100\",\n        \"auto-sync\":\n          \"border-blue-200 bg-blue-50 text-blue-700 dark:border-blue-700 dark:bg-blue-900 dark:text-blue-100\",\n        agent:\n          \"border-orange-200 bg-orange-50 text-orange-600 dark:border-orange-800 dark:bg-orange-600/20 dark:text-neutral-200\",\n        \"agent-faded\":\n          \"border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-800 dark:text-neutral-50\",\n        canceled:\n          \"border-gray-200 bg-gray-50 text-gray-600 dark:border-gray-700 dark:bg-gray-900 dark:text-neutral-50\",\n        paused:\n          \"border-yellow-200 bg-yellow-50 text-yellow-700 dark:border-yellow-600 dark:bg-yellow-700 dark:text-yellow-100\",\n        in_progress:\n          \"border-blue-200 bg-blue-50 text-blue-600 dark:border-blue-700 dark:bg-blue-900 dark:text-neutral-50\",\n        success:\n          \"border-green-200 bg-emerald-50 text-green-600 dark:border-green-600 dark:bg-green-900 dark:text-green-50\",\n        default:\n          \"border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50\",\n        secondary:\n          \"border-neutral-200 bg-neutral-50 text-neutral-600 dark:border-neutral-700 dark:bg-neutral-900 dark:text-neutral-50\",\n        destructive:\n          \"border-red-200 bg-red-50 text-red-600 dark:border-red-700 dark:bg-red-900 dark:text-neutral-50\",\n        not_started:\n          \"border-purple-200 bg-purple-50 text-purple-700 dark:border-purple-700 dark:bg-purple-900 dark:text-purple-100\",\n      },\n    },\n    defaultVariants: {\n      variant: \"default\",\n    },\n  }\n);\n\nexport interface BadgeProps\n  extends React.HTMLAttributes<HTMLDivElement>,\n    VariantProps<typeof badgeVariants> {}\n\nfunction Badge({\n  className,\n  variant,\n  color,\n  icon: Icon,\n  size = \"sm\",\n  circle,\n  tooltip,\n  ...props\n}: BadgeProps & {\n  icon?: React.ElementType;\n  size?: \"sm\" | \"md\" | \"xs\";\n  circle?: boolean;\n  tooltip?: string;\n}) {\n  const sizeClasses = {\n    sm: \"px-2.5 py-0.5 text-xs\",\n    md: \"px-3 py-1 text-sm\",\n    xs: \"px-1.5 py-0.25 text-[.5rem]\",\n  };\n\n  const BadgeContent = (\n    <div\n      className={cn(\n        \"flex-none inline-flex items-center whitespace-nowrap overflow-hidden\",\n        badgeVariants({ variant }),\n        sizeClasses[size],\n        className\n      )}\n      {...props}\n    >\n      {Icon && (\n        <Icon\n          className={cn(\n            \"mr-1 flex-shrink-0\",\n            size === \"sm\" ? \"h-3 w-3\" : size === \"xs\" ? \"h-2 w-2\" : \"h-4 w-4\"\n          )}\n        />\n      )}\n      {circle && (\n        <div\n          className={cn(\n            \"mr-2 rounded-full bg-current opacity-80 flex-shrink-0\",\n            size === \"xs\" ? \"h-2 w-2\" : \"h-2.5 w-2.5\"\n          )}\n        />\n      )}\n      <span className=\"truncate\">{props.children}</span>\n    </div>\n  );\n\n  return <SimpleTooltip tooltip={tooltip}>{BadgeContent}</SimpleTooltip>;\n}\n\nexport { Badge, badgeVariants };\n"
  },
  {
    "path": "web/src/components/ui/callout.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\ninterface CalloutProps {\n  icon?: React.ReactNode;\n  children?: React.ReactNode;\n  type?: \"default\" | \"warning\" | \"danger\" | \"notice\";\n  className?: string;\n}\nexport function Callout({\n  children,\n  icon,\n  type = \"default\",\n  title,\n  className,\n  ...props\n}: CalloutProps & { title?: string }) {\n  return (\n    <div\n      className={cn(\n        \"my-6 flex items-start rounded-md border border-l-4 p-4\",\n        className,\n        {\n          \"border-rose-300 bg-rose-50 dark:border-rose-500 dark:bg-rose-950/50\":\n            type === \"danger\",\n          \"border-amber-300 bg-amber-50 dark:border-amber-700 dark:bg-amber-900/30\":\n            type === \"warning\",\n          \"border-sky-300 bg-sky-50 dark:border-sky-500 dark:bg-sky-950/50\":\n            type === \"notice\",\n        }\n      )}\n      {...props}\n    >\n      {icon && <span className=\"mr-4 text-2xl\">{icon}</span>}\n      <div className=\"flex-1\">\n        {title && (\n          <div className=\"font-medium mb-1 flex items-center dark:text-[#fff]\">\n            {title}\n          </div>\n        )}\n        <div className=\"dark:text-gray-300\">{children}</div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/components/ui/card.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Card = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\n      \"rounded-16 border bg-background-tint-00 overflow-hidden\",\n      className\n    )}\n    {...props}\n  />\n));\nCard.displayName = \"Card\";\n\nconst CardHeader = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\"flex flex-col space-y-1.5 p-6\", className)}\n    {...props}\n  />\n));\nCardHeader.displayName = \"CardHeader\";\n\nconst CardTitle = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\n      \"text-2xl font-semibold leading-none tracking-tight\",\n      className\n    )}\n    {...props}\n  />\n));\nCardTitle.displayName = \"CardTitle\";\n\nconst CardDescription = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\"text-sm text-neutral-500\", className)}\n    {...props}\n  />\n));\nCardDescription.displayName = \"CardDescription\";\n\nconst CardContent = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div ref={ref} className={cn(\"p-6 pt-0\", className)} {...props} />\n));\nCardContent.displayName = \"CardContent\";\n\nconst CardFooter = React.forwardRef<\n  HTMLDivElement,\n  React.HTMLAttributes<HTMLDivElement>\n>(({ className, ...props }, ref) => (\n  <div\n    ref={ref}\n    className={cn(\"flex items-center p-6 pt-0\", className)}\n    {...props}\n  />\n));\nCardFooter.displayName = \"CardFooter\";\n\nexport {\n  Card,\n  CardHeader,\n  CardFooter,\n  CardTitle,\n  CardDescription,\n  CardContent,\n};\n"
  },
  {
    "path": "web/src/components/ui/dialog.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as DialogPrimitive from \"@radix-ui/react-dialog\";\nimport { X } from \"lucide-react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Dialog = DialogPrimitive.Root;\n\nconst DialogTrigger = DialogPrimitive.Trigger;\n\nconst DialogPortal = DialogPrimitive.Portal;\n\nconst DialogClose = DialogPrimitive.Close;\n\nconst DialogOverlay = React.forwardRef<\n  React.ElementRef<typeof DialogPrimitive.Overlay>,\n  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay> & {\n    backgroundColor?: string;\n    overlayClassName?: string;\n  }\n>(({ className, backgroundColor, overlayClassName, ...props }, ref) => (\n  <DialogPrimitive.Overlay\n    ref={ref}\n    className={cn(\n      backgroundColor || \"bg-neutral-950/60\",\n      \"fixed inset-0 z-50   data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0\",\n      overlayClassName,\n      className\n    )}\n    {...props}\n  />\n));\nDialogOverlay.displayName = DialogPrimitive.Overlay.displayName;\n\nconst DialogContent = React.forwardRef<\n  React.ElementRef<typeof DialogPrimitive.Content>,\n  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content> & {\n    hideCloseIcon?: boolean;\n    backgroundColor?: string;\n    overlayClassName?: string;\n  }\n>(\n  (\n    {\n      className,\n      children,\n      hideCloseIcon,\n      backgroundColor,\n      overlayClassName,\n      ...props\n    },\n    ref\n  ) => (\n    <DialogPortal>\n      <DialogOverlay\n        backgroundColor={backgroundColor}\n        overlayClassName={overlayClassName}\n      />\n      <DialogPrimitive.Content\n        ref={ref}\n        className={cn(\n          \"fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-neutral-200 bg-neutral-50 p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg dark:border-neutral-800 dark:bg-neutral-900\",\n          className\n        )}\n        {...props}\n      >\n        {children}\n        {!hideCloseIcon && (\n          <DialogPrimitive.Close className=\"absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-neutral-950 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-neutral-100 data-[state=open]:text-neutral-500 dark:ring-offset-neutral-950 dark:focus:ring-neutral-300 dark:data-[state=open]:bg-neutral-800 dark:data-[state=open]:text-neutral-400\">\n            <X className=\"h-4 w-4\" />\n            <span className=\"sr-only\">Close</span>\n          </DialogPrimitive.Close>\n        )}\n      </DialogPrimitive.Content>\n    </DialogPortal>\n  )\n);\nDialogContent.displayName = DialogPrimitive.Content.displayName;\n\nconst DialogHeader = ({\n  className,\n  ...props\n}: React.HTMLAttributes<HTMLDivElement>) => (\n  <div\n    className={cn(\n      \"flex flex-col space-y-1.5 text-center sm:text-left\",\n      className\n    )}\n    {...props}\n  />\n);\nDialogHeader.displayName = \"DialogHeader\";\n\nconst DialogFooter = ({\n  className,\n  ...props\n}: React.HTMLAttributes<HTMLDivElement>) => (\n  <div\n    className={cn(\n      \"flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2\",\n      className\n    )}\n    {...props}\n  />\n);\nDialogFooter.displayName = \"DialogFooter\";\n\nconst DialogTitle = React.forwardRef<\n  React.ElementRef<typeof DialogPrimitive.Title>,\n  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Title>\n>(({ className, ...props }, ref) => (\n  <DialogPrimitive.Title\n    ref={ref}\n    className={cn(\n      \"text-lg font-semibold leading-none tracking-tight\",\n      className\n    )}\n    {...props}\n  />\n));\nDialogTitle.displayName = DialogPrimitive.Title.displayName;\n\nconst DialogDescription = React.forwardRef<\n  React.ElementRef<typeof DialogPrimitive.Description>,\n  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Description>\n>(({ className, ...props }, ref) => (\n  <DialogPrimitive.Description\n    ref={ref}\n    className={cn(\"text-sm text-neutral-500 dark:text-neutral-400\", className)}\n    {...props}\n  />\n));\nDialogDescription.displayName = DialogPrimitive.Description.displayName;\n\nexport {\n  Dialog,\n  DialogPortal,\n  DialogOverlay,\n  DialogClose,\n  DialogTrigger,\n  DialogContent,\n  DialogHeader,\n  DialogFooter,\n  DialogTitle,\n  DialogDescription,\n};\n"
  },
  {
    "path": "web/src/components/ui/dropdown-menu-with-tooltip.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { DropdownMenuItem } from \"./dropdown-menu\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { cn } from \"@/lib/utils\";\n\ninterface DropdownMenuItemWithTooltipProps\n  extends React.ComponentPropsWithoutRef<typeof DropdownMenuItem> {\n  tooltip?: string;\n}\n\nconst DropdownMenuItemWithTooltip = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuItem>,\n  DropdownMenuItemWithTooltipProps\n>(({ className, tooltip, disabled, ...props }, ref) => {\n  // Only show tooltip if the item is disabled and a tooltip is provided\n  if (!tooltip || !disabled) {\n    return (\n      <DropdownMenuItem\n        ref={ref}\n        className={className}\n        disabled={disabled}\n        {...props}\n      />\n    );\n  }\n\n  return (\n    <SimpleTooltip tooltip={tooltip}>\n      <div className=\"cursor-not-allowed\">\n        <DropdownMenuItem\n          ref={ref}\n          className={cn(className)}\n          disabled={disabled}\n          {...props}\n        />\n      </div>\n    </SimpleTooltip>\n  );\n});\n\nDropdownMenuItemWithTooltip.displayName = \"DropdownMenuItemWithTooltip\";\n\nexport { DropdownMenuItemWithTooltip };\n"
  },
  {
    "path": "web/src/components/ui/dropdown-menu.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as DropdownMenuPrimitive from \"@radix-ui/react-dropdown-menu\";\nimport { Check, ChevronRight, Circle } from \"lucide-react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst DropdownMenu = DropdownMenuPrimitive.Root;\n\nconst DropdownMenuTrigger = DropdownMenuPrimitive.Trigger;\n\nconst DropdownMenuGroup = DropdownMenuPrimitive.Group;\n\nconst DropdownMenuPortal = DropdownMenuPrimitive.Portal;\n\nconst DropdownMenuSub = DropdownMenuPrimitive.Sub;\n\nconst DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup;\n\nconst DropdownMenuSubTrigger = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {\n    inset?: boolean;\n  }\n>(({ className, inset, children, ...props }, ref) => (\n  <DropdownMenuPrimitive.SubTrigger\n    ref={ref}\n    className={cn(\n      \"flex cursor-default gap-2 select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-neutral-100 data-[state=open]:bg-neutral-100 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 dark:focus:bg-neutral-800 dark:data-[state=open]:bg-neutral-800\",\n      inset && \"pl-8\",\n      className\n    )}\n    {...props}\n  >\n    {children}\n    <ChevronRight className=\"ml-auto\" />\n  </DropdownMenuPrimitive.SubTrigger>\n));\nDropdownMenuSubTrigger.displayName =\n  DropdownMenuPrimitive.SubTrigger.displayName;\n\nconst DropdownMenuSubContent = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>\n>(({ className, ...props }, ref) => (\n  <DropdownMenuPrimitive.SubContent\n    ref={ref}\n    className={cn(\n      \"z-50 min-w-[8rem] overflow-hidden rounded-md border border-neutral-200 bg-white p-1 text-neutral-950 shadow-lg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-neutral-800 dark:bg-neutral-950 dark:text-neutral-50\",\n      className\n    )}\n    {...props}\n  />\n));\nDropdownMenuSubContent.displayName =\n  DropdownMenuPrimitive.SubContent.displayName;\n\nconst DropdownMenuContent = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.Content>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>\n>(({ className, sideOffset = 4, ...props }, ref) => (\n  <DropdownMenuPrimitive.Portal>\n    <DropdownMenuPrimitive.Content\n      ref={ref}\n      sideOffset={sideOffset}\n      className={cn(\n        \"z-50 min-w-[8rem] overflow-hidden rounded-md border border-neutral-200 bg-white p-1 text-neutral-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-neutral-800 dark:bg-neutral-950 dark:text-neutral-50\",\n        className\n      )}\n      {...props}\n    />\n  </DropdownMenuPrimitive.Portal>\n));\nDropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName;\n\nconst DropdownMenuItem = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.Item>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {\n    inset?: boolean;\n  }\n>(({ className, inset, ...props }, ref) => (\n  <DropdownMenuPrimitive.Item\n    ref={ref}\n    className={cn(\n      \"relative flex cursor-default select-none items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 dark:focus:bg-neutral-800 dark:focus:text-neutral-50\",\n      inset && \"pl-8\",\n      className\n    )}\n    {...props}\n  />\n));\nDropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName;\n\nconst DropdownMenuCheckboxItem = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>\n>(({ className, children, checked, ...props }, ref) => (\n  <DropdownMenuPrimitive.CheckboxItem\n    ref={ref}\n    className={cn(\n      \"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-neutral-800 dark:focus:text-neutral-50\",\n      className\n    )}\n    checked={checked}\n    {...props}\n  >\n    <span className=\"absolute left-2 flex h-3.5 w-3.5 items-center justify-center\">\n      <DropdownMenuPrimitive.ItemIndicator>\n        <Check className=\"h-4 w-4\" />\n      </DropdownMenuPrimitive.ItemIndicator>\n    </span>\n    {children}\n  </DropdownMenuPrimitive.CheckboxItem>\n));\nDropdownMenuCheckboxItem.displayName =\n  DropdownMenuPrimitive.CheckboxItem.displayName;\n\nconst DropdownMenuRadioItem = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>\n>(({ className, children, ...props }, ref) => (\n  <DropdownMenuPrimitive.RadioItem\n    ref={ref}\n    className={cn(\n      \"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-neutral-100 focus:text-neutral-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-neutral-800 dark:focus:text-neutral-50\",\n      className\n    )}\n    {...props}\n  >\n    <span className=\"absolute left-2 flex h-3.5 w-3.5 items-center justify-center\">\n      <DropdownMenuPrimitive.ItemIndicator>\n        <Circle className=\"h-2 w-2 fill-current\" />\n      </DropdownMenuPrimitive.ItemIndicator>\n    </span>\n    {children}\n  </DropdownMenuPrimitive.RadioItem>\n));\nDropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName;\n\nconst DropdownMenuLabel = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.Label>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {\n    inset?: boolean;\n  }\n>(({ className, inset, ...props }, ref) => (\n  <DropdownMenuPrimitive.Label\n    ref={ref}\n    className={cn(\n      \"px-2 py-1.5 text-sm font-semibold\",\n      inset && \"pl-8\",\n      className\n    )}\n    {...props}\n  />\n));\nDropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName;\n\nconst DropdownMenuSeparator = React.forwardRef<\n  React.ElementRef<typeof DropdownMenuPrimitive.Separator>,\n  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>\n>(({ className, ...props }, ref) => (\n  <DropdownMenuPrimitive.Separator\n    ref={ref}\n    className={cn(\n      \"-mx-1 my-1 h-px bg-neutral-100 dark:bg-neutral-800\",\n      className\n    )}\n    {...props}\n  />\n));\nDropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName;\n\nconst DropdownMenuShortcut = ({\n  className,\n  ...props\n}: React.HTMLAttributes<HTMLSpanElement>) => {\n  return (\n    <span\n      className={cn(\"ml-auto text-xs tracking-widest opacity-60\", className)}\n      {...props}\n    />\n  );\n};\nDropdownMenuShortcut.displayName = \"DropdownMenuShortcut\";\n\nexport {\n  DropdownMenu,\n  DropdownMenuTrigger,\n  DropdownMenuContent,\n  DropdownMenuItem,\n  DropdownMenuCheckboxItem,\n  DropdownMenuRadioItem,\n  DropdownMenuLabel,\n  DropdownMenuSeparator,\n  DropdownMenuShortcut,\n  DropdownMenuGroup,\n  DropdownMenuPortal,\n  DropdownMenuSub,\n  DropdownMenuSubContent,\n  DropdownMenuSubTrigger,\n  DropdownMenuRadioGroup,\n};\n"
  },
  {
    "path": "web/src/components/ui/input.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\ninterface InputProps extends React.ComponentProps<\"input\"> {\n  removeFocusRing?: boolean;\n}\n\nconst Input = React.forwardRef<HTMLInputElement, InputProps>(\n  ({ className, type, removeFocusRing, ...props }, ref) => {\n    return (\n      <input\n        type={type}\n        className={cn(\n          \"flex h-10 w-full rounded-md border border-neutral-200 bg-white px-3 py-2 text-base ring-offset-white file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-neutral-950 placeholder:text-neutral-500\",\n          removeFocusRing\n            ? \"\"\n            : \"focus-visible:outline-none focus-visible:ring-2  focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm dark:border-neutral-800 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:file:text-neutral-50 dark:placeholder:text-neutral-400 dark:focus-visible:ring-neutral-300\",\n          \"!focus:ring-0 !focus-visible:ring-transparent  !focus-visible:ring-0 !focus:outline-none\",\n          \"flex h-10 w-full rounded-md border border-border bg-background/75 focus:border-border-dark focus:ring-none focus:outline-none px-3 py-2 text-base ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50 md:text-sm\",\n          className\n        )}\n        ref={ref}\n        {...props}\n      />\n    );\n  }\n);\nInput.displayName = \"Input\";\n\nexport { Input };\n"
  },
  {
    "path": "web/src/components/ui/radio-group.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as RadioGroupPrimitive from \"@radix-ui/react-radio-group\";\nimport { Circle } from \"lucide-react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst RadioGroup = React.forwardRef<\n  React.ElementRef<typeof RadioGroupPrimitive.Root>,\n  React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>\n>(({ className, ...props }, ref) => {\n  return (\n    <RadioGroupPrimitive.Root\n      className={cn(\"grid gap-2\", className)}\n      {...props}\n      ref={ref}\n    />\n  );\n});\nRadioGroup.displayName = RadioGroupPrimitive.Root.displayName;\n\nconst RadioGroupItem = React.forwardRef<\n  React.ElementRef<typeof RadioGroupPrimitive.Item>,\n  React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>\n>(({ className, ...props }, ref) => {\n  return (\n    <RadioGroupPrimitive.Item\n      ref={ref}\n      className={cn(\n        \"aspect-square h-3.5 w-3.5 rounded-full border border-background-900 text-neutral-900 ring-offset-white focus:outline-none focus-visible:ring-2 focus-visible:ring-neutral-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-background-800 dark:text-neutral-50 dark:ring-offset-neutral-950 dark:focus-visible:ring-neutral-300\",\n        className\n      )}\n      {...props}\n    >\n      <RadioGroupPrimitive.Indicator className=\"flex items-center justify-center\">\n        <Circle className=\"h-2.5 w-2.5 fill-current text-current\" />\n      </RadioGroupPrimitive.Indicator>\n    </RadioGroupPrimitive.Item>\n  );\n});\nRadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName;\n\nexport { RadioGroup, RadioGroupItem };\n"
  },
  {
    "path": "web/src/components/ui/scroll-area.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as ScrollAreaPrimitive from \"@radix-ui/react-scroll-area\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst ScrollArea = React.forwardRef<\n  React.ElementRef<typeof ScrollAreaPrimitive.Root>,\n  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.Root>\n>(({ className, children, ...props }, ref) => (\n  <ScrollAreaPrimitive.Root\n    ref={ref}\n    className={cn(\"relative overflow-hidden\", className)}\n    {...props}\n  >\n    <ScrollAreaPrimitive.Viewport className=\"h-full w-full rounded-[inherit]\">\n      {children}\n    </ScrollAreaPrimitive.Viewport>\n    <ScrollBar />\n    <ScrollAreaPrimitive.Corner />\n  </ScrollAreaPrimitive.Root>\n));\nScrollArea.displayName = ScrollAreaPrimitive.Root.displayName;\n\nconst ScrollBar = React.forwardRef<\n  React.ElementRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>,\n  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>\n>(({ className, orientation = \"vertical\", ...props }, ref) => (\n  <ScrollAreaPrimitive.ScrollAreaScrollbar\n    ref={ref}\n    orientation={orientation}\n    className={cn(\n      \"flex touch-none select-none transition-colors\",\n      orientation === \"vertical\" &&\n        \"h-full w-2.5 border-l border-l-transparent p-[1px]\",\n      orientation === \"horizontal\" &&\n        \"h-2.5 flex-col border-t border-t-transparent p-[1px]\",\n      className\n    )}\n    {...props}\n  >\n    <ScrollAreaPrimitive.ScrollAreaThumb className=\"relative flex-1 rounded-full bg-neutral-200 dark:bg-neutral-800\" />\n  </ScrollAreaPrimitive.ScrollAreaScrollbar>\n));\nScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName;\n\nexport { ScrollArea, ScrollBar };\n"
  },
  {
    "path": "web/src/components/ui/select.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as SelectPrimitive from \"@radix-ui/react-select\";\nimport { Check, ChevronDown, ChevronUp } from \"lucide-react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Select = SelectPrimitive.Root;\n\nconst SelectValue = SelectPrimitive.Value;\n\nconst SelectTrigger = React.forwardRef<\n  React.ElementRef<typeof SelectPrimitive.Trigger>,\n  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>\n>(({ className, children, ...props }, ref) => (\n  <SelectPrimitive.Trigger\n    ref={ref}\n    className={cn(\n      `border flex h-10 w-full items-center justify-between rounded-md border bg-background-tint-01 p-3 ring-offset-background-neutral-00 placeholder:text-text-03 disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1`,\n      className\n    )}\n    {...props}\n  >\n    {children}\n    <SelectPrimitive.Icon asChild>\n      <ChevronDown className=\"h-4 w-4 opacity-50\" />\n    </SelectPrimitive.Icon>\n  </SelectPrimitive.Trigger>\n));\nSelectTrigger.displayName = SelectPrimitive.Trigger.displayName;\n\nconst SelectScrollUpButton = React.forwardRef<\n  React.ElementRef<typeof SelectPrimitive.ScrollUpButton>,\n  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollUpButton>\n>(({ className, ...props }, ref) => (\n  <SelectPrimitive.ScrollUpButton\n    ref={ref}\n    className={cn(\n      \"flex cursor-default items-center justify-center py-1\",\n      className\n    )}\n    {...props}\n  >\n    <ChevronUp className=\"h-4 w-4\" />\n  </SelectPrimitive.ScrollUpButton>\n));\nSelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName;\n\nconst SelectScrollDownButton = React.forwardRef<\n  React.ElementRef<typeof SelectPrimitive.ScrollDownButton>,\n  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollDownButton>\n>(({ className, ...props }, ref) => (\n  <SelectPrimitive.ScrollDownButton\n    ref={ref}\n    className={cn(\n      \"flex cursor-default items-center justify-center py-1\",\n      className\n    )}\n    {...props}\n  >\n    <ChevronDown className=\"h-4 w-4\" />\n  </SelectPrimitive.ScrollDownButton>\n));\nSelectScrollDownButton.displayName =\n  SelectPrimitive.ScrollDownButton.displayName;\n\nconst SelectContent = React.forwardRef<\n  React.ElementRef<typeof SelectPrimitive.Content>,\n  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content> & {\n    container?: HTMLElement | null;\n  }\n>(({ className, children, position = \"popper\", container, ...props }, ref) => (\n  <SelectPrimitive.Portal container={container}>\n    <SelectPrimitive.Content\n      ref={ref}\n      className={cn(\n        \"relative z-[2000] max-h-96 min-w-[8rem] overflow-hidden rounded-08 border bg-background-tint-01 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2\",\n        position === \"popper\" &&\n          \"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1\",\n        className\n      )}\n      position={position}\n      {...props}\n    >\n      <SelectScrollUpButton />\n      <SelectPrimitive.Viewport\n        className={cn(\n          \"p-1\",\n          position === \"popper\" &&\n            \"h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)]\"\n        )}\n      >\n        {children}\n      </SelectPrimitive.Viewport>\n      <SelectScrollDownButton />\n    </SelectPrimitive.Content>\n  </SelectPrimitive.Portal>\n));\nSelectContent.displayName = SelectPrimitive.Content.displayName;\n\nconst SelectItem = React.forwardRef<\n  React.ComponentRef<typeof SelectPrimitive.Item>,\n  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item> & {\n    hideCheck?: boolean;\n    icon?:\n      | React.ReactNode\n      | ((props: {\n          size?: number | undefined;\n          className?: string | undefined;\n        }) => React.JSX.Element);\n    selected?: boolean;\n  }\n>(({ className, children, hideCheck, icon, selected, ...props }, ref) => (\n  <SelectPrimitive.Item\n    ref={ref}\n    className={cn(\n      \"relative flex w-full cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 outline-none focus:bg-background-tint-02 data-[disabled]:pointer-events-none data-[disabled]:opacity-50\",\n      className\n    )}\n    {...props}\n  >\n    {icon ? (\n      <span className=\"absolute left-2 flex h-3.5 w-3.5 items-center justify-center\">\n        {typeof icon === \"function\" ? icon({ size: 16, className: \"\" }) : icon}\n      </span>\n    ) : (\n      !hideCheck &&\n      selected && (\n        <span className=\"absolute left-2 flex h-3.5 w-3.5 items-center justify-center\">\n          <SelectPrimitive.ItemIndicator>\n            <Check className=\"h-4 w-4\" />\n          </SelectPrimitive.ItemIndicator>\n        </span>\n      )\n    )}\n\n    <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>\n  </SelectPrimitive.Item>\n));\nSelectItem.displayName = SelectPrimitive.Item.displayName;\n\nexport {\n  Select,\n  SelectValue,\n  SelectTrigger,\n  SelectContent,\n  SelectItem,\n  SelectScrollUpButton,\n  SelectScrollDownButton,\n};\n"
  },
  {
    "path": "web/src/components/ui/slider.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as SliderPrimitive from \"@radix-ui/react-slider\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Slider = React.forwardRef<\n  React.ElementRef<typeof SliderPrimitive.Root>,\n  React.ComponentPropsWithoutRef<typeof SliderPrimitive.Root>\n>(({ className, ...props }, ref) => (\n  <SliderPrimitive.Root\n    ref={ref}\n    className={cn(\n      \"relative flex w-full touch-none select-none items-center\",\n      className\n    )}\n    {...props}\n  >\n    <SliderPrimitive.Track className=\"relative h-2 w-full grow overflow-hidden rounded-full bg-neutral-100 dark:bg-neutral-800\">\n      <SliderPrimitive.Range className=\"absolute h-full bg-neutral-900 dark:bg-neutral-50\" />\n    </SliderPrimitive.Track>\n    <SliderPrimitive.Thumb className=\"block h-3 w-3 rounded-full border border-background-900 bg-white ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-neutral-950 focus-visible:ring-offset disabled:pointer-events-none disabled:opacity-50 dark:border-background-50 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:focus-visible:ring-neutral-300\" />\n  </SliderPrimitive.Root>\n));\nSlider.displayName = SliderPrimitive.Root.displayName;\n\nexport { Slider };\n"
  },
  {
    "path": "web/src/components/ui/table.tsx",
    "content": "import * as React from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst Table = React.forwardRef<\n  HTMLTableElement,\n  React.HTMLAttributes<HTMLTableElement>\n>(({ className, ...props }, ref) => (\n  <div className=\"relative w-full overflow-auto\">\n    <table\n      ref={ref}\n      className={cn(\"w-full caption-bottom text-sm\", className)}\n      {...props}\n    />\n  </div>\n));\nTable.displayName = \"Table\";\n\nconst TableHeader = React.forwardRef<\n  HTMLTableSectionElement,\n  React.HTMLAttributes<HTMLTableSectionElement>\n>(({ className, ...props }, ref) => (\n  <thead ref={ref} className={cn(\"[&_tr]:border-b\", className)} {...props} />\n));\nTableHeader.displayName = \"TableHeader\";\n\nconst TableBody = React.forwardRef<\n  HTMLTableSectionElement,\n  React.HTMLAttributes<HTMLTableSectionElement>\n>(({ className, ...props }, ref) => (\n  <tbody\n    ref={ref}\n    className={cn(\"[&_tr:last-child]:border-0\", className)}\n    {...props}\n  />\n));\nTableBody.displayName = \"TableBody\";\n\nconst TableFooter = React.forwardRef<\n  HTMLTableSectionElement,\n  React.HTMLAttributes<HTMLTableSectionElement>\n>(({ className, ...props }, ref) => (\n  <tfoot\n    ref={ref}\n    className={cn(\n      \"border-t bg-neutral-100/50 font-medium [&>tr]:last:border-b-0 dark:bg-neutral-800/50\",\n      className\n    )}\n    {...props}\n  />\n));\nTableFooter.displayName = \"TableFooter\";\n\nconst TableRow = React.forwardRef<\n  HTMLTableRowElement,\n  React.HTMLAttributes<HTMLTableRowElement> & { noHover?: boolean }\n>(({ className, noHover, ...props }, ref) => (\n  <tr\n    ref={ref}\n    className={cn(\n      `border-b transition-colors  data-[state=selected]:bg-neutral-100 dark:data-[state=selected]:bg-neutral-800 ${\n        noHover ? \"\" : \"dark:hover:bg-neutral-800/80 hover:bg-neutral-100/50\"\n      }`,\n      className\n    )}\n    {...props}\n  />\n));\nTableRow.displayName = \"TableRow\";\n\nconst TableHead = React.forwardRef<\n  HTMLTableCellElement,\n  React.ThHTMLAttributes<HTMLTableCellElement>\n>(({ className, ...props }, ref) => (\n  <th\n    ref={ref}\n    className={cn(\n      \"h-12 px-4 text-left align-middle font-medium text-neutral-500 [&:has([role=checkbox])]:pr-0 dark:text-neutral-400\",\n      className\n    )}\n    {...props}\n  />\n));\nTableHead.displayName = \"TableHead\";\n\nconst TableCell = React.forwardRef<\n  HTMLTableCellElement,\n  React.TdHTMLAttributes<HTMLTableCellElement>\n>(({ className, ...props }, ref) => (\n  <td\n    ref={ref}\n    className={cn(\"p-4 align-middle [&:has([role=checkbox])]:pr-0\", className)}\n    {...props}\n  />\n));\nTableCell.displayName = \"TableCell\";\n\nconst TableCaption = React.forwardRef<\n  HTMLTableCaptionElement,\n  React.HTMLAttributes<HTMLTableCaptionElement>\n>(({ className, ...props }, ref) => (\n  <caption\n    ref={ref}\n    className={cn(\n      \"mt-4 text-sm text-neutral-500 dark:text-neutral-400\",\n      className\n    )}\n    {...props}\n  />\n));\nTableCaption.displayName = \"TableCaption\";\n\nexport {\n  Table,\n  TableHeader,\n  TableBody,\n  TableFooter,\n  TableHead,\n  TableRow,\n  TableCell,\n  TableCaption,\n};\n"
  },
  {
    "path": "web/src/components/ui/title.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\nexport default function Title({\n  children,\n  className,\n  size = \"sm\",\n}: {\n  children: React.ReactNode;\n  className?: string;\n  size?: \"lg\" | \"md\" | \"sm\";\n}) {\n  return (\n    <h1\n      className={cn(\n        \"text-lg text-neutral-800 dark:text-neutral-200 font-medium\",\n        size === \"lg\" && \"text-2xl\",\n        size === \"md\" && \"text-xl\",\n        size === \"sm\" && \"text-lg\",\n        className\n      )}\n    >\n      {children}\n    </h1>\n  );\n}\n"
  },
  {
    "path": "web/src/components/ui/tooltip.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { cn } from \"@/lib/utils\";\n\n// Default the provider delay to a snappier, consistent value\nconst TooltipProvider: React.FC<\n  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Provider>\n> = ({ delayDuration = 400, skipDelayDuration = 200, ...props }) => (\n  <TooltipPrimitive.Provider\n    delayDuration={delayDuration}\n    skipDelayDuration={skipDelayDuration}\n    {...props}\n  />\n);\n\nconst Tooltip = TooltipPrimitive.Root;\n\nconst TooltipTrigger = React.forwardRef<\n  React.ElementRef<typeof TooltipPrimitive.Trigger>,\n  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Trigger>\n>(({ type = \"button\", ...props }, ref) => (\n  <TooltipPrimitive.Trigger ref={ref} type={type} {...props} />\n));\nTooltipTrigger.displayName = TooltipPrimitive.Trigger.displayName;\n\ntype TooltipSize = \"sm\" | \"md\" | \"lg\";\n\nconst tooltipSizeClasses: Record<TooltipSize, string> = {\n  sm: \"px-2 py-1 max-w-[12rem]\",\n  md: \"px-3 py-2 max-w-[20rem]\",\n  lg: \"px-3 py-2 max-w-[30rem]\",\n};\n\nconst TooltipContent = React.forwardRef<\n  React.ElementRef<typeof TooltipPrimitive.Content>,\n  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content> & {\n    width?: string;\n    showTick?: boolean;\n    tickSide?: \"top\" | \"bottom\" | \"left\" | \"right\";\n    side?: \"top\" | \"bottom\" | \"left\" | \"right\";\n    size?: TooltipSize;\n  }\n>(\n  (\n    {\n      className,\n      sideOffset = 4,\n      width,\n      showTick = false,\n      tickSide = \"bottom\",\n      side = \"top\",\n      size = \"lg\",\n      ...props\n    },\n    ref\n  ) => (\n    <TooltipPrimitive.Portal>\n      <TooltipPrimitive.Content\n        ref={ref}\n        sideOffset={sideOffset}\n        side={side}\n        className={cn(\n          \"z-tooltip rounded-08 text-text-light-05 animate-in fade-in-0 zoom-in-95 bg-background-neutral-dark-03 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2\",\n          tooltipSizeClasses[size],\n          width,\n          className\n        )}\n        {...props}\n      >\n        {showTick && (\n          <div\n            className={cn(\n              \"absolute w-2 h-2 bg-inherit rotate-45\",\n              tickSide === \"top\" && \"-top-1 left-1/2 -translate-x-1/2\",\n              tickSide === \"bottom\" && \"-bottom-1 left-1/2 -translate-x-1/2\",\n              tickSide === \"left\" && \"-left-1 top-1/2 -translate-y-1/2\",\n              tickSide === \"right\" && \"-right-1 top-1/2 -translate-y-1/2\"\n            )}\n          />\n        )}\n        {props.children}\n      </TooltipPrimitive.Content>\n    </TooltipPrimitive.Portal>\n  )\n);\nTooltipContent.displayName = TooltipPrimitive.Content.displayName;\n\nexport { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider };\n"
  },
  {
    "path": "web/src/components/voice/Waveform.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState, useMemo, useRef } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { formatElapsedTime } from \"@/lib/dateUtils\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgMicrophone,\n  SvgMicrophoneOff,\n  SvgVolume,\n  SvgVolumeOff,\n} from \"@opal/icons\";\n\n// Recording waveform constants\nconst RECORDING_BAR_COUNT = 120;\nconst MIN_BAR_HEIGHT = 2;\nconst MAX_BAR_HEIGHT = 16;\n\n// Speaking waveform constants\nconst SPEAKING_BAR_COUNT = 28;\n\ninterface WaveformProps {\n  /** Visual style and behavior variant */\n  variant: \"speaking\" | \"recording\";\n  /** Whether the waveform is actively animating */\n  isActive: boolean;\n  /** Whether audio is muted */\n  isMuted?: boolean;\n  /** Current microphone audio level (0-1), only used for recording variant */\n  audioLevel?: number;\n  /** Callback when mute button is clicked */\n  onMuteToggle?: () => void;\n}\n\nfunction Waveform({\n  variant,\n  isActive,\n  isMuted = false,\n  audioLevel = 0,\n  onMuteToggle,\n}: WaveformProps) {\n  // ─── Recording variant state ───────────────────────────────────────────────\n  const [elapsedSeconds, setElapsedSeconds] = useState(0);\n  const [barHeights, setBarHeights] = useState<number[]>(\n    () => new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]\n  );\n  const animationRef = useRef<number | null>(null);\n  const lastPushTimeRef = useRef(0);\n  const audioLevelRef = useRef(audioLevel);\n  audioLevelRef.current = audioLevel;\n\n  // ─── Speaking variant bars ─────────────────────────────────────────────────\n  const speakingBars = useMemo(() => {\n    return Array.from({ length: SPEAKING_BAR_COUNT }, (_, i) => ({\n      id: i,\n      // Create a natural wave pattern with height variation\n      baseHeight: Math.sin(i * 0.4) * 5 + 8,\n      delay: i * 0.025,\n    }));\n  }, []);\n\n  // ─── Recording: Timer effect ───────────────────────────────────────────────\n  useEffect(() => {\n    if (variant !== \"recording\") return;\n\n    if (!isActive) {\n      setElapsedSeconds(0);\n      return;\n    }\n\n    const interval = setInterval(() => {\n      setElapsedSeconds((prev) => prev + 1);\n    }, 1000);\n\n    return () => clearInterval(interval);\n  }, [variant, isActive]);\n\n  // ─── Recording: Audio level visualization effect ───────────────────────────\n  useEffect(() => {\n    if (variant !== \"recording\") return;\n\n    if (!isActive) {\n      setBarHeights(\n        new Array(RECORDING_BAR_COUNT).fill(MIN_BAR_HEIGHT) as number[]\n      );\n      lastPushTimeRef.current = 0;\n      return;\n    }\n\n    const updateBars = (timestamp: number) => {\n      // Push a new bar roughly every 50ms (~20fps scrolling)\n      if (timestamp - lastPushTimeRef.current >= 50) {\n        lastPushTimeRef.current = timestamp;\n        const level = isMuted ? 0 : audioLevelRef.current;\n        const height =\n          MIN_BAR_HEIGHT + level * (MAX_BAR_HEIGHT - MIN_BAR_HEIGHT);\n\n        setBarHeights((prev) => {\n          const next = prev.slice(1);\n          next.push(height);\n          return next;\n        });\n      }\n\n      animationRef.current = requestAnimationFrame(updateBars);\n    };\n\n    animationRef.current = requestAnimationFrame(updateBars);\n\n    return () => {\n      if (animationRef.current) {\n        cancelAnimationFrame(animationRef.current);\n        animationRef.current = null;\n      }\n    };\n  }, [variant, isActive, isMuted]);\n\n  const formattedTime = useMemo(\n    () => formatElapsedTime(elapsedSeconds),\n    [elapsedSeconds]\n  );\n\n  if (!isActive) {\n    return null;\n  }\n\n  // ─── Speaking variant render ───────────────────────────────────────────────\n  if (variant === \"speaking\") {\n    return (\n      <div className=\"flex items-center gap-0.5 p-1.5 bg-background-tint-00 rounded-16 shadow-01\">\n        {/* Waveform container */}\n        <div className=\"flex items-center p-1 bg-background-tint-00 rounded-12 max-w-[144px] min-h-[32px]\">\n          <div className=\"flex items-center p-1\">\n            {/* Waveform bars */}\n            <div className=\"flex items-center justify-center gap-[2px] h-4 w-[120px] overflow-hidden\">\n              {speakingBars.map((bar) => (\n                <div\n                  key={bar.id}\n                  className={cn(\n                    \"w-[3px] rounded-full\",\n                    isMuted ? \"bg-text-03\" : \"bg-theme-blue-05\",\n                    !isMuted && \"animate-waveform\"\n                  )}\n                  style={{\n                    height: isMuted ? \"2px\" : `${bar.baseHeight}px`,\n                    animationDelay: isMuted ? undefined : `${bar.delay}s`,\n                  }}\n                />\n              ))}\n            </div>\n          </div>\n        </div>\n\n        {/* Divider */}\n        <div className=\"w-0.5 self-stretch bg-border-02\" />\n\n        {/* Volume button */}\n        {onMuteToggle && (\n          <div className=\"flex items-center p-1 bg-background-tint-00 rounded-12\">\n            <Button\n              icon={isMuted ? SvgVolumeOff : SvgVolume}\n              onClick={onMuteToggle}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              tooltip={isMuted ? \"Unmute\" : \"Mute\"}\n            />\n          </div>\n        )}\n      </div>\n    );\n  }\n\n  // ─── Recording variant render ──────────────────────────────────────────────\n  return (\n    <div className=\"flex items-center gap-3 px-3 py-2 bg-background-tint-00 rounded-12 min-h-[32px]\">\n      {/* Waveform visualization driven by real audio levels */}\n      <div className=\"flex-1 flex items-center justify-between h-4 overflow-hidden\">\n        {barHeights.map((height, i) => (\n          <div\n            key={i}\n            className=\"w-[1.5px] bg-text-03 rounded-full shrink-0 transition-[height] duration-75\"\n            style={{ height: `${height}px` }}\n          />\n        ))}\n      </div>\n\n      {/* Timer */}\n      <span className=\"font-mono text-xs text-text-03 tabular-nums shrink-0\">\n        {formattedTime}\n      </span>\n\n      {/* Mute button */}\n      {onMuteToggle && (\n        <Button\n          icon={isMuted ? SvgMicrophoneOff : SvgMicrophone}\n          onClick={onMuteToggle}\n          prominence=\"tertiary\"\n          size=\"sm\"\n          aria-label={isMuted ? \"Unmute microphone\" : \"Mute microphone\"}\n        />\n      )}\n    </div>\n  );\n}\n\nexport default Waveform;\n"
  },
  {
    "path": "web/src/ee/LICENSE",
    "content": "The Onyx Enterprise License (the \"Enterprise License\")\nCopyright (c) 2023-present DanswerAI, Inc.\n\nWith regard to the Onyx Software:\n\nThis software and associated documentation files (the \"Software\") may only be\nused in production, if you (and any entity that you represent) have agreed to,\nand are in compliance with, the Onyx Subscription Terms of Service, available\nat https://www.onyx.app/legal/self-host (the \"Enterprise Terms\"), or other\nagreement governing the use of the Software, as agreed by you and DanswerAI,\nand otherwise have a valid Onyx Enterprise License for the\ncorrect number of user seats. Subject to the foregoing sentence, you are free to\nmodify this Software and publish patches to the Software. You agree that DanswerAI\nand/or its licensors (as applicable) retain all right, title and interest in and\nto all such modifications and/or patches, and all such modifications and/or\npatches may only be used, copied, modified, displayed, distributed, or otherwise\nexploited with a valid Onyx Enterprise License for the correct\nnumber of user seats. Notwithstanding the foregoing, you may copy and modify\nthe Software for development and testing purposes, without requiring a\nsubscription. You agree that DanswerAI and/or its licensors (as applicable) retain\nall right, title and interest in and to all such modifications. You are not\ngranted any other rights beyond what is expressly stated herein. Subject to the\nforegoing, it is forbidden to copy, merge, publish, distribute, sublicense,\nand/or sell the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\nFor all third party components incorporated into the Onyx Software, those\ncomponents are licensed under the original license provided by the owner of the\napplicable component.\n"
  },
  {
    "path": "web/src/ee/hooks/useHookExecutionLogs.ts",
    "content": "import useSWR from \"swr\";\nimport { fetchExecutionLogs } from \"@/ee/refresh-pages/admin/HooksPage/svc\";\nimport type { HookExecutionRecord } from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\n\nconst ONE_HOUR_MS = 60 * 60 * 1000;\nconst THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;\n\ninterface UseHookExecutionLogsResult {\n  isLoading: boolean;\n  error: Error | undefined;\n  hasRecentErrors: boolean;\n  recentErrors: HookExecutionRecord[];\n  olderErrors: HookExecutionRecord[];\n}\n\nexport function useHookExecutionLogs(\n  hookId: number,\n  limit = 10\n): UseHookExecutionLogsResult {\n  const { data, isLoading, error } = useSWR(\n    [\"hook-execution-logs\", hookId, limit],\n    () => fetchExecutionLogs(hookId, limit),\n    { refreshInterval: 60_000 }\n  );\n\n  const now = Date.now();\n\n  const recentErrors =\n    data?.filter(\n      (log) => now - new Date(log.created_at).getTime() < ONE_HOUR_MS\n    ) ?? [];\n\n  const olderErrors =\n    data?.filter((log) => {\n      const age = now - new Date(log.created_at).getTime();\n      return age >= ONE_HOUR_MS && age < THIRTY_DAYS_MS;\n    }) ?? [];\n\n  const hasRecentErrors = recentErrors.length > 0;\n\n  return { isLoading, error, hasRecentErrors, recentErrors, olderErrors };\n}\n"
  },
  {
    "path": "web/src/ee/hooks/useHookSpecs.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { HookPointMeta } from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useHookSpecs() {\n  const { data, isLoading, error } = useSWR<HookPointMeta[]>(\n    SWR_KEYS.hookSpecs,\n    errorHandlingFetcher,\n    { revalidateOnFocus: false }\n  );\n\n  return { specs: data, isLoading, error };\n}\n"
  },
  {
    "path": "web/src/ee/hooks/useHooks.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { HookResponse } from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useHooks() {\n  const { data, isLoading, error, mutate } = useSWR<HookResponse[]>(\n    SWR_KEYS.hooks,\n    errorHandlingFetcher,\n    { revalidateOnFocus: false }\n  );\n\n  return { hooks: data, isLoading, error, mutate };\n}\n"
  },
  {
    "path": "web/src/ee/lib/search/svc.ts",
    "content": "/**\n * Search API Helper Functions\n */\n\nimport type {\n  BaseFilters,\n  SearchFlowClassificationRequest,\n  SearchFlowClassificationResponse,\n  SearchFullResponse,\n  SearchHistoryResponse,\n  SendSearchQueryRequest,\n} from \"@/lib/search/interfaces\";\n\n/**\n * Classify a query as search or chat flow\n */\nexport async function classifyQuery(\n  query: string,\n  signal?: AbortSignal\n): Promise<SearchFlowClassificationResponse> {\n  const response = await fetch(\"/api/search/search-flow-classification\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      user_query: query,\n    } as SearchFlowClassificationRequest),\n    signal,\n  });\n\n  if (!response.ok) {\n    throw new Error(`Classification failed: ${response.statusText}`);\n  }\n\n  return response.json();\n}\n\n/**\n * Perform a document search\n */\nexport async function searchDocuments(\n  query: string,\n  options?: {\n    filters?: BaseFilters;\n    numHits?: number;\n    includeContent?: boolean;\n    signal?: AbortSignal;\n  }\n): Promise<SearchFullResponse> {\n  const request: SendSearchQueryRequest = {\n    search_query: query,\n    filters: options?.filters,\n    num_hits: options?.numHits ?? 30,\n    include_content: options?.includeContent ?? false,\n    stream: false,\n  };\n\n  const response = await fetch(\"/api/search/send-search-message\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n    signal: options?.signal,\n  });\n\n  if (!response.ok) {\n    throw new Error(`Search failed: ${response.statusText}`);\n  }\n\n  return response.json();\n}\n\n/**\n * Fetch search history for the current user\n */\nexport async function fetchSearchHistory(options?: {\n  limit?: number;\n  filterDays?: number;\n  signal?: AbortSignal;\n}): Promise<SearchHistoryResponse> {\n  const params = new URLSearchParams();\n  if (options?.limit) params.set(\"limit\", options.limit.toString());\n  if (options?.filterDays)\n    params.set(\"filter_days\", options.filterDays.toString());\n\n  const response = await fetch(\n    `/api/search/search-history?${params.toString()}`,\n    {\n      signal: options?.signal,\n    }\n  );\n\n  if (!response.ok) {\n    throw new Error(`Failed to fetch search history: ${response.statusText}`);\n  }\n\n  return response.json();\n}\n"
  },
  {
    "path": "web/src/ee/providers/QueryControllerProvider.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useRef, useState, useMemo } from \"react\";\nimport {\n  BaseFilters,\n  SearchDocWithContent,\n  SearchFlowClassificationResponse,\n  SearchFullResponse,\n} from \"@/lib/search/interfaces\";\nimport { classifyQuery, searchDocuments } from \"@/ee/lib/search/svc\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  QueryControllerContext,\n  QueryControllerValue,\n  QueryState,\n  AppMode,\n} from \"@/providers/QueryControllerProvider\";\n\ninterface QueryControllerProviderProps {\n  children: React.ReactNode;\n}\n\nexport function QueryControllerProvider({\n  children,\n}: QueryControllerProviderProps) {\n  const appFocus = useAppFocus();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const settings = useSettingsContext();\n  const { isSearchModeAvailable: searchUiEnabled } = settings;\n  const { user } = useUser();\n\n  // ── Merged query state (discriminated union) ──────────────────────────\n  const [state, setState] = useState<QueryState>({\n    phase: \"idle\",\n    appMode: \"chat\",\n  });\n\n  // Persistent app-mode preference — survives phase transitions and is\n  // used to restore the correct mode when resetting back to idle.\n  const appModeRef = useRef<AppMode>(\"chat\");\n\n  // ── App mode sync from user preferences ───────────────────────────────\n  const persistedMode = user?.preferences?.default_app_mode;\n\n  useEffect(() => {\n    let mode: AppMode = \"chat\";\n    if (isPaidEnterpriseFeaturesEnabled && searchUiEnabled && persistedMode) {\n      const lower = persistedMode.toLowerCase();\n      mode = ([\"auto\", \"search\", \"chat\"] as const).includes(lower as AppMode)\n        ? (lower as AppMode)\n        : \"chat\";\n    }\n    appModeRef.current = mode;\n    setState((prev) =>\n      prev.phase === \"idle\" ? { phase: \"idle\", appMode: mode } : prev\n    );\n  }, [isPaidEnterpriseFeaturesEnabled, searchUiEnabled, persistedMode]);\n\n  const setAppMode = useCallback(\n    (mode: AppMode) => {\n      if (!isPaidEnterpriseFeaturesEnabled || !searchUiEnabled) return;\n      setState((prev) => {\n        if (prev.phase !== \"idle\") return prev;\n        appModeRef.current = mode;\n        return { phase: \"idle\", appMode: mode };\n      });\n    },\n    [isPaidEnterpriseFeaturesEnabled, searchUiEnabled]\n  );\n\n  // ── Ancillary state ───────────────────────────────────────────────────\n  const [query, setQuery] = useState<string | null>(null);\n  const [searchResults, setSearchResults] = useState<SearchDocWithContent[]>(\n    []\n  );\n  const [llmSelectedDocIds, setLlmSelectedDocIds] = useState<string[] | null>(\n    null\n  );\n  const [error, setError] = useState<string | null>(null);\n\n  // Abort controllers for in-flight requests\n  const classifyAbortRef = useRef<AbortController | null>(null);\n  const searchAbortRef = useRef<AbortController | null>(null);\n\n  /**\n   * Perform document search (pure data-fetching, no phase side effects)\n   */\n  const performSearch = useCallback(\n    async (searchQuery: string, filters?: BaseFilters): Promise<void> => {\n      if (searchAbortRef.current) {\n        searchAbortRef.current.abort();\n      }\n\n      const controller = new AbortController();\n      searchAbortRef.current = controller;\n\n      try {\n        const response: SearchFullResponse = await searchDocuments(\n          searchQuery,\n          {\n            filters,\n            numHits: 30,\n            includeContent: false,\n            signal: controller.signal,\n          }\n        );\n\n        if (response.error) {\n          setError(response.error);\n          setSearchResults([]);\n          setLlmSelectedDocIds(null);\n          return;\n        }\n\n        setError(null);\n        setSearchResults(response.search_docs);\n        setLlmSelectedDocIds(response.llm_selected_doc_ids ?? null);\n      } catch (err) {\n        if (err instanceof Error && err.name === \"AbortError\") {\n          throw err;\n        }\n\n        setError(\"Document search failed. Please try again.\");\n        setSearchResults([]);\n        setLlmSelectedDocIds(null);\n      }\n    },\n    []\n  );\n\n  /**\n   * Classify a query as search or chat\n   */\n  const performClassification = useCallback(\n    async (classifyQueryText: string): Promise<\"search\" | \"chat\"> => {\n      if (classifyAbortRef.current) {\n        classifyAbortRef.current.abort();\n      }\n\n      const controller = new AbortController();\n      classifyAbortRef.current = controller;\n\n      try {\n        const response: SearchFlowClassificationResponse = await classifyQuery(\n          classifyQueryText,\n          controller.signal\n        );\n\n        const result = response.is_search_flow ? \"search\" : \"chat\";\n        return result;\n      } catch (error) {\n        if (error instanceof Error && error.name === \"AbortError\") {\n          throw error;\n        }\n\n        setError(\"Query classification failed. Falling back to chat.\");\n        return \"chat\";\n      }\n    },\n    []\n  );\n\n  /**\n   * Submit a query - routes based on app mode\n   */\n  const submit = useCallback(\n    async (\n      submitQuery: string,\n      onChat: (query: string) => void,\n      filters?: BaseFilters\n    ): Promise<void> => {\n      setQuery(submitQuery);\n      setError(null);\n\n      const currentAppMode = appModeRef.current;\n\n      // Always route through chat if:\n      // 1. Not Enterprise Enabled\n      // 2. Admin has disabled the Search UI\n      // 3. Not in the \"New Session\" tab\n      // 4. In \"New Session\" tab but app-mode is \"Chat\"\n      if (\n        !isPaidEnterpriseFeaturesEnabled ||\n        !searchUiEnabled ||\n        !appFocus.isNewSession() ||\n        currentAppMode === \"chat\"\n      ) {\n        setState({ phase: \"chat\" });\n        setSearchResults([]);\n        setLlmSelectedDocIds(null);\n        onChat(submitQuery);\n        return;\n      }\n\n      // Search mode: immediately show SearchUI with loading state\n      if (currentAppMode === \"search\") {\n        setState({ phase: \"searching\" });\n        try {\n          await performSearch(submitQuery, filters);\n        } catch (err) {\n          if (err instanceof Error && err.name === \"AbortError\") return;\n          throw err;\n        }\n        setState({ phase: \"search-results\" });\n        return;\n      }\n\n      // Auto mode: classify first, then route\n      setState({ phase: \"classifying\" });\n      try {\n        const result = await performClassification(submitQuery);\n\n        if (result === \"search\") {\n          setState({ phase: \"searching\" });\n          await performSearch(submitQuery, filters);\n          setState({ phase: \"search-results\" });\n          appModeRef.current = \"search\";\n        } else {\n          setState({ phase: \"chat\" });\n          setSearchResults([]);\n          setLlmSelectedDocIds(null);\n          onChat(submitQuery);\n        }\n      } catch (error) {\n        if (error instanceof Error && error.name === \"AbortError\") {\n          return;\n        }\n\n        setState({ phase: \"chat\" });\n        setSearchResults([]);\n        setLlmSelectedDocIds(null);\n        onChat(submitQuery);\n      }\n    },\n    [\n      appFocus,\n      performClassification,\n      performSearch,\n      isPaidEnterpriseFeaturesEnabled,\n      searchUiEnabled,\n    ]\n  );\n\n  /**\n   * Re-run the current search query with updated server-side filters\n   */\n  const refineSearch = useCallback(\n    async (filters: BaseFilters): Promise<void> => {\n      if (!query) return;\n      setState({ phase: \"searching\" });\n      try {\n        await performSearch(query, filters);\n      } catch (err) {\n        if (err instanceof Error && err.name === \"AbortError\") return;\n        throw err;\n      }\n      setState({ phase: \"search-results\" });\n    },\n    [query, performSearch]\n  );\n\n  /**\n   * Reset all state to initial values\n   */\n  const reset = useCallback(() => {\n    if (classifyAbortRef.current) {\n      classifyAbortRef.current.abort();\n      classifyAbortRef.current = null;\n    }\n    if (searchAbortRef.current) {\n      searchAbortRef.current.abort();\n      searchAbortRef.current = null;\n    }\n\n    setQuery(null);\n    setState({ phase: \"idle\", appMode: appModeRef.current });\n    setSearchResults([]);\n    setLlmSelectedDocIds(null);\n    setError(null);\n  }, []);\n\n  const value: QueryControllerValue = useMemo(\n    () => ({\n      state,\n      setAppMode,\n      searchResults,\n      llmSelectedDocIds,\n      error,\n      submit,\n      refineSearch,\n      reset,\n    }),\n    [\n      state,\n      setAppMode,\n      searchResults,\n      llmSelectedDocIds,\n      error,\n      submit,\n      refineSearch,\n      reset,\n    ]\n  );\n\n  // Sync state with navigation context\n  useEffect(reset, [appFocus, reset]);\n\n  return (\n    <QueryControllerContext.Provider value={value}>\n      {children}\n    </QueryControllerContext.Provider>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/HookFormModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Formik, Form, useFormikContext } from \"formik\";\nimport * as Yup from \"yup\";\nimport { Button, Text } from \"@opal/components\";\nimport {\n  SvgCheckCircle,\n  SvgShareWebhook,\n  SvgLoader,\n  SvgRevert,\n} from \"@opal/icons\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Content, ContentAction } from \"@opal/layouts\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  createHook,\n  updateHook,\n  HookAuthError,\n  HookTimeoutError,\n  HookConnectError,\n} from \"@/ee/refresh-pages/admin/HooksPage/svc\";\nimport type {\n  HookFailStrategy,\n  HookFormState,\n  HookPointMeta,\n  HookResponse,\n  HookUpdateRequest,\n} from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface HookFormModalProps {\n  onOpenChange: (open: boolean) => void;\n  /** When provided, the modal is in edit mode for this hook. */\n  hook?: HookResponse;\n  /** When provided (create mode), the hook point is pre-selected and locked. */\n  spec?: HookPointMeta;\n  onSuccess: (hook: HookResponse) => void;\n}\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nconst MAX_TIMEOUT_SECONDS = 600;\n\nconst SOFT_DESCRIPTION =\n  \"If the endpoint returns an error, Onyx logs it and continues the pipeline as normal, ignoring the hook result.\";\n\nfunction buildInitialValues(\n  hook: HookResponse | undefined,\n  spec: HookPointMeta | undefined\n): HookFormState {\n  if (hook) {\n    return {\n      name: hook.name,\n      endpoint_url: hook.endpoint_url ?? \"\",\n      api_key: \"\",\n      fail_strategy: hook.fail_strategy,\n      timeout_seconds: String(hook.timeout_seconds),\n    };\n  }\n  return {\n    name: \"\",\n    endpoint_url: \"\",\n    api_key: \"\",\n    fail_strategy: spec?.default_fail_strategy ?? \"hard\",\n    timeout_seconds: spec ? String(spec.default_timeout_seconds) : \"30\",\n  };\n}\n\nfunction buildValidationSchema(isEdit: boolean) {\n  return Yup.object().shape({\n    name: Yup.string().trim().required(\"Display name cannot be empty.\"),\n    endpoint_url: Yup.string().trim().required(\"Endpoint URL cannot be empty.\"),\n    api_key: isEdit\n      ? Yup.string()\n      : Yup.string().trim().required(\"API key cannot be empty.\"),\n    timeout_seconds: Yup.string()\n      .required(\"Timeout is required.\")\n      .test(\n        \"valid-timeout\",\n        `Must be greater than 0 and at most ${MAX_TIMEOUT_SECONDS} seconds.`,\n        (val) => {\n          const num = parseFloat(val ?? \"\");\n          return !isNaN(num) && num > 0 && num <= MAX_TIMEOUT_SECONDS;\n        }\n      ),\n  });\n}\n\n// ---------------------------------------------------------------------------\n// Timeout field (needs access to spec for revert button)\n// ---------------------------------------------------------------------------\n\ninterface TimeoutFieldProps {\n  spec: HookPointMeta | undefined;\n}\n\nfunction TimeoutField({ spec }: TimeoutFieldProps) {\n  const { values, setFieldValue, isSubmitting } =\n    useFormikContext<HookFormState>();\n\n  return (\n    <InputLayouts.Vertical\n      name=\"timeout_seconds\"\n      title=\"Timeout\"\n      suffix=\"(seconds)\"\n      subDescription={`Maximum time Onyx will wait for the endpoint to respond before applying the fail strategy. Must be greater than 0 and at most ${MAX_TIMEOUT_SECONDS} seconds.`}\n    >\n      <div className=\"[&_input]:!font-main-ui-mono [&_input::placeholder]:!font-main-ui-mono [&_input]:![appearance:textfield] [&_input::-webkit-outer-spin-button]:!appearance-none [&_input::-webkit-inner-spin-button]:!appearance-none w-full\">\n        <InputTypeInField\n          name=\"timeout_seconds\"\n          type=\"number\"\n          placeholder={spec ? String(spec.default_timeout_seconds) : undefined}\n          variant={isSubmitting ? \"disabled\" : undefined}\n          showClearButton={false}\n          rightSection={\n            spec?.default_timeout_seconds !== undefined &&\n            values.timeout_seconds !== String(spec.default_timeout_seconds) ? (\n              <Button\n                prominence=\"tertiary\"\n                size=\"xs\"\n                icon={SvgRevert}\n                tooltip=\"Revert to Default\"\n                onClick={() =>\n                  setFieldValue(\n                    \"timeout_seconds\",\n                    String(spec.default_timeout_seconds)\n                  )\n                }\n                disabled={isSubmitting}\n              />\n            ) : undefined\n          }\n        />\n      </div>\n    </InputLayouts.Vertical>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function HookFormModal({\n  onOpenChange,\n  hook,\n  spec,\n  onSuccess,\n}: HookFormModalProps) {\n  const isEdit = !!hook;\n  const [isConnected, setIsConnected] = useState(false);\n  const [apiKeyCleared, setApiKeyCleared] = useState(false);\n\n  const initialValues = buildInitialValues(hook, spec);\n  const validationSchema = buildValidationSchema(isEdit);\n\n  function handleClose() {\n    onOpenChange(false);\n  }\n\n  const hookPointDisplayName =\n    spec?.display_name ?? spec?.hook_point ?? hook?.hook_point ?? \"\";\n  const hookPointDescription = spec?.description;\n  const docsUrl = spec?.docs_url;\n\n  return (\n    <Modal open onOpenChange={(open) => !open && handleClose()}>\n      <Modal.Content width=\"md\" height=\"fit\">\n        <Formik\n          initialValues={initialValues}\n          validationSchema={validationSchema}\n          validateOnMount\n          onSubmit={async (values, helpers) => {\n            try {\n              let result: HookResponse;\n              if (isEdit && hook) {\n                const req: HookUpdateRequest = {};\n                if (values.name !== hook.name) req.name = values.name;\n                if (values.endpoint_url !== (hook.endpoint_url ?? \"\"))\n                  req.endpoint_url = values.endpoint_url;\n                if (values.fail_strategy !== hook.fail_strategy)\n                  req.fail_strategy = values.fail_strategy;\n                const timeoutNum = parseFloat(values.timeout_seconds);\n                if (timeoutNum !== hook.timeout_seconds)\n                  req.timeout_seconds = timeoutNum;\n                if (values.api_key.trim().length > 0) {\n                  req.api_key = values.api_key;\n                } else if (apiKeyCleared) {\n                  req.api_key = null;\n                }\n                if (Object.keys(req).length === 0) {\n                  handleClose();\n                  return;\n                }\n                result = await updateHook(hook.id, req);\n              } else {\n                if (!spec) {\n                  toast.error(\"No hook point specified.\");\n                  return;\n                }\n                result = await createHook({\n                  name: values.name,\n                  hook_point: spec.hook_point,\n                  endpoint_url: values.endpoint_url,\n                  ...(values.api_key ? { api_key: values.api_key } : {}),\n                  fail_strategy: values.fail_strategy,\n                  timeout_seconds: parseFloat(values.timeout_seconds),\n                });\n              }\n              toast.success(isEdit ? \"Hook updated.\" : \"Hook created.\");\n              onSuccess(result);\n              if (!isEdit) {\n                setIsConnected(true);\n                await new Promise((resolve) => setTimeout(resolve, 500));\n              }\n              handleClose();\n            } catch (err) {\n              if (err instanceof HookAuthError) {\n                helpers.setFieldError(\"api_key\", \"Invalid API key.\");\n              } else if (err instanceof HookTimeoutError) {\n                helpers.setFieldError(\n                  \"timeout_seconds\",\n                  \"Connection timed out. Try increasing the timeout.\"\n                );\n              } else if (err instanceof HookConnectError) {\n                helpers.setFieldError(\n                  \"endpoint_url\",\n                  err.message || \"Could not connect to endpoint.\"\n                );\n              } else {\n                toast.error(\n                  err instanceof Error ? err.message : \"Something went wrong.\"\n                );\n              }\n            } finally {\n              helpers.setSubmitting(false);\n            }\n          }}\n        >\n          {({ values, setFieldValue, isSubmitting, isValid, dirty }) => {\n            const failStrategyDescription =\n              values.fail_strategy === \"soft\"\n                ? SOFT_DESCRIPTION\n                : spec?.fail_hard_description;\n\n            return (\n              <Form className=\"w-full overflow-visible\">\n                <Modal.Header\n                  icon={SvgShareWebhook}\n                  title={\n                    isEdit ? \"Manage Hook Extension\" : \"Set Up Hook Extension\"\n                  }\n                  description={\n                    isEdit\n                      ? undefined\n                      : \"Connect an external API endpoint to extend the hook point.\"\n                  }\n                  onClose={handleClose}\n                />\n\n                <Modal.Body>\n                  {/* Hook point section header */}\n                  <ContentAction\n                    sizePreset=\"main-ui\"\n                    variant=\"section\"\n                    paddingVariant=\"fit\"\n                    title={hookPointDisplayName}\n                    description={hookPointDescription}\n                    rightChildren={\n                      <div className=\"flex flex-col items-end gap-1\">\n                        <Content\n                          sizePreset=\"secondary\"\n                          variant=\"body\"\n                          icon={SvgShareWebhook}\n                          title=\"Hook Point\"\n                          prominence=\"muted\"\n                          widthVariant=\"fit\"\n                        />\n                        {docsUrl && (\n                          <a\n                            href={docsUrl}\n                            target=\"_blank\"\n                            rel=\"noopener noreferrer\"\n                            className=\"underline leading-none\"\n                          >\n                            <Text font=\"secondary-body\" color=\"text-03\">\n                              Documentation\n                            </Text>\n                          </a>\n                        )}\n                      </div>\n                    }\n                  />\n\n                  <InputLayouts.Vertical name=\"name\" title=\"Display Name\">\n                    <div className=\"[&_input::placeholder]:!font-main-ui-muted w-full\">\n                      <InputTypeInField\n                        name=\"name\"\n                        placeholder=\"Name your extension at this hook point\"\n                        variant={isSubmitting ? \"disabled\" : undefined}\n                      />\n                    </div>\n                  </InputLayouts.Vertical>\n\n                  <InputLayouts.Vertical\n                    name=\"fail_strategy\"\n                    title=\"Fail Strategy\"\n                    nonInteractive\n                    subDescription={failStrategyDescription}\n                  >\n                    <InputSelect\n                      value={values.fail_strategy}\n                      onValueChange={(v) =>\n                        setFieldValue(\"fail_strategy\", v as HookFailStrategy)\n                      }\n                      disabled={isSubmitting}\n                    >\n                      <InputSelect.Trigger placeholder=\"Select strategy\" />\n                      <InputSelect.Content>\n                        <InputSelect.Item value=\"soft\">\n                          Log Error and Continue\n                          {spec?.default_fail_strategy === \"soft\" && (\n                            <>\n                              {\" \"}\n                              <Text color=\"text-03\">(Default)</Text>\n                            </>\n                          )}\n                        </InputSelect.Item>\n                        <InputSelect.Item value=\"hard\">\n                          Block Pipeline on Failure\n                          {spec?.default_fail_strategy === \"hard\" && (\n                            <>\n                              {\" \"}\n                              <Text color=\"text-03\">(Default)</Text>\n                            </>\n                          )}\n                        </InputSelect.Item>\n                      </InputSelect.Content>\n                    </InputSelect>\n                  </InputLayouts.Vertical>\n\n                  <TimeoutField spec={spec} />\n\n                  <InputLayouts.Vertical\n                    name=\"endpoint_url\"\n                    title=\"External API Endpoint URL\"\n                    subDescription=\"Only connect to servers you trust. You are responsible for actions taken and data shared with this connection.\"\n                  >\n                    <div className=\"[&_input::placeholder]:!font-main-ui-muted w-full\">\n                      <InputTypeInField\n                        name=\"endpoint_url\"\n                        placeholder=\"https://your-api-endpoint.com\"\n                        variant={isSubmitting ? \"disabled\" : undefined}\n                      />\n                    </div>\n                  </InputLayouts.Vertical>\n\n                  <InputLayouts.Vertical\n                    name=\"api_key\"\n                    title=\"API Key\"\n                    subDescription=\"Onyx will use this key to authenticate with your API endpoint.\"\n                  >\n                    <PasswordInputTypeInField\n                      name=\"api_key\"\n                      placeholder={\n                        isEdit\n                          ? hook?.api_key_masked ??\n                            \"Leave blank to keep current key\"\n                          : undefined\n                      }\n                      disabled={isSubmitting}\n                      onChange={(e) => {\n                        if (isEdit && hook?.api_key_masked) {\n                          setApiKeyCleared(e.target.value === \"\");\n                        }\n                      }}\n                    />\n                  </InputLayouts.Vertical>\n\n                  {!isEdit && (isSubmitting || isConnected) && (\n                    <Section\n                      flexDirection=\"row\"\n                      alignItems=\"center\"\n                      justifyContent=\"start\"\n                      height=\"fit\"\n                      gap={1}\n                      className=\"px-0.5\"\n                    >\n                      <div className=\"p-0.5 shrink-0\">\n                        {isConnected ? (\n                          <SvgCheckCircle\n                            size={16}\n                            className=\"text-status-success-05\"\n                          />\n                        ) : (\n                          <SvgLoader\n                            size={16}\n                            className=\"animate-spin text-text-03\"\n                          />\n                        )}\n                      </div>\n                      <Text font=\"secondary-body\" color=\"text-03\">\n                        {isConnected\n                          ? \"Connection valid.\"\n                          : \"Verifying connection…\"}\n                      </Text>\n                    </Section>\n                  )}\n                </Modal.Body>\n\n                <Modal.Footer>\n                  <BasicModalFooter\n                    cancel={\n                      <Button\n                        disabled={isSubmitting}\n                        prominence=\"secondary\"\n                        onClick={handleClose}\n                      >\n                        Cancel\n                      </Button>\n                    }\n                    submit={\n                      <Button\n                        disabled={\n                          isSubmitting ||\n                          !isValid ||\n                          (!dirty && !apiKeyCleared && isEdit)\n                        }\n                        type=\"submit\"\n                        icon={\n                          isSubmitting && !isEdit\n                            ? () => (\n                                <SvgLoader size={16} className=\"animate-spin\" />\n                              )\n                            : undefined\n                        }\n                      >\n                        {isEdit ? \"Save Changes\" : \"Connect\"}\n                      </Button>\n                    }\n                  />\n                </Modal.Footer>\n              </Form>\n            );\n          }}\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/HookLogsModal.tsx",
    "content": "\"use client\";\n\nimport { Button, Text } from \"@opal/components\";\nimport { SvgDownload, SvgTextLines } from \"@opal/icons\";\nimport Modal from \"@/refresh-components/Modal\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { useHookExecutionLogs } from \"@/ee/hooks/useHookExecutionLogs\";\nimport { formatDateTimeLog } from \"@/lib/dateUtils\";\nimport { downloadFile } from \"@/lib/download\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport type {\n  HookExecutionRecord,\n  HookPointMeta,\n  HookResponse,\n} from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\nimport { useModalClose } from \"@/refresh-components/contexts/ModalContext\";\n\ninterface HookLogsModalProps {\n  hook: HookResponse;\n  spec: HookPointMeta | undefined;\n}\n\n// Section header: \"Past Hour ————\" or \"Older ————\"\n//\n// TODO(@raunakab): replace this with a proper, opalified `Separator` component (when it lands).\nfunction SectionHeader({ label }: { label: string }) {\n  return (\n    <Section\n      flexDirection=\"row\"\n      alignItems=\"center\"\n      height=\"fit\"\n      className=\"py-1\"\n    >\n      <Text font=\"secondary-body\" color=\"text-03\">\n        {label}\n      </Text>\n      <div className=\"flex-1 ml-2 border-t border-border-02\" />\n    </Section>\n  );\n}\n\nfunction LogRow({ log }: { log: HookExecutionRecord }) {\n  return (\n    <Section\n      flexDirection=\"row\"\n      justifyContent=\"start\"\n      alignItems=\"start\"\n      gap={0.5}\n      height=\"fit\"\n      className=\"py-2\"\n    >\n      {/* 1. Timestamp */}\n      <span className=\"shrink-0 text-code-code\">\n        <Text font=\"secondary-mono-label\" color=\"inherit\" nowrap>\n          {formatDateTimeLog(log.created_at)}\n        </Text>\n      </span>\n      {/* 2. Error message */}\n      <span className=\"flex-1 min-w-0 break-all whitespace-pre-wrap text-code-code\">\n        <Text font=\"secondary-mono\" color=\"inherit\">\n          {log.error_message ?? \"Unknown error\"}\n        </Text>\n      </span>\n      {/* 3. Copy button */}\n      <Section width=\"fit\" height=\"fit\" alignItems=\"center\">\n        <CopyIconButton size=\"xs\" getCopyText={() => log.error_message ?? \"\"} />\n      </Section>\n    </Section>\n  );\n}\n\nexport default function HookLogsModal({ hook, spec }: HookLogsModalProps) {\n  const onClose = useModalClose();\n\n  const { recentErrors, olderErrors, isLoading, error } = useHookExecutionLogs(\n    hook.id,\n    10\n  );\n\n  const totalLines = recentErrors.length + olderErrors.length;\n  const allLogs = [...recentErrors, ...olderErrors];\n\n  function getLogsText() {\n    return allLogs\n      .map(\n        (log) =>\n          `${formatDateTimeLog(log.created_at)} ${\n            log.error_message ?? \"Unknown error\"\n          }`\n      )\n      .join(\"\\n\");\n  }\n\n  function handleDownload() {\n    downloadFile(`${hook.name}-errors.txt`, { content: getLogsText() });\n  }\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"md\" height=\"fit\">\n        <Modal.Header\n          icon={(props) => <SvgTextLines {...props} />}\n          title=\"Recent Errors\"\n          description={`Hook: ${hook.name} • Hook Point: ${\n            spec?.display_name ?? hook.hook_point\n          }`}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          {isLoading ? (\n            <Section justifyContent=\"center\" height=\"fit\" className=\"py-6\">\n              <SimpleLoader />\n            </Section>\n          ) : error ? (\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              Failed to load logs.\n            </Text>\n          ) : totalLines === 0 ? (\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              No errors in the past 30 days.\n            </Text>\n          ) : (\n            <>\n              {recentErrors.length > 0 && (\n                <>\n                  <SectionHeader label=\"Past Hour\" />\n                  {recentErrors.map((log, idx) => (\n                    <LogRow key={log.created_at + String(idx)} log={log} />\n                  ))}\n                </>\n              )}\n              {olderErrors.length > 0 && (\n                <>\n                  <SectionHeader label=\"Older\" />\n                  {olderErrors.map((log, idx) => (\n                    <LogRow key={log.created_at + String(idx)} log={log} />\n                  ))}\n                </>\n              )}\n            </>\n          )}\n        </Modal.Body>\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"center\"\n          padding={0.5}\n          className=\"bg-background-tint-01\"\n        >\n          <Text font=\"main-ui-body\" color=\"text-03\">\n            {`${totalLines} ${totalLines === 1 ? \"line\" : \"lines\"}`}\n          </Text>\n          <Section\n            flexDirection=\"row\"\n            alignItems=\"center\"\n            width=\"fit\"\n            gap={0.25}\n            padding={0.25}\n            className=\"rounded-xl bg-background-tint-00\"\n          >\n            <CopyIconButton\n              size=\"sm\"\n              tooltip=\"Copy\"\n              getCopyText={getLogsText}\n            />\n            <Button\n              prominence=\"tertiary\"\n              size=\"sm\"\n              icon={SvgDownload}\n              tooltip=\"Download\"\n              onClick={handleDownload}\n            />\n          </Section>\n        </Section>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/HookStatusPopover.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useRef, useState } from \"react\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { noProp } from \"@/lib/utils\";\nimport { formatTimeOnly } from \"@/lib/dateUtils\";\nimport { Button, Text } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Popover from \"@/refresh-components/Popover\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport {\n  SvgAlertTriangle,\n  SvgCheckCircle,\n  SvgMaximize2,\n  SvgXOctagon,\n} from \"@opal/icons\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { useHookExecutionLogs } from \"@/ee/hooks/useHookExecutionLogs\";\nimport HookLogsModal from \"@/ee/refresh-pages/admin/HooksPage/HookLogsModal\";\nimport type {\n  HookPointMeta,\n  HookResponse,\n} from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\nimport { cn } from \"@opal/utils\";\n\ninterface HookStatusPopoverProps {\n  hook: HookResponse;\n  spec: HookPointMeta | undefined;\n  isBusy: boolean;\n}\n\nexport default function HookStatusPopover({\n  hook,\n  spec,\n  isBusy,\n}: HookStatusPopoverProps) {\n  const logsModal = useCreateModal();\n  const [open, setOpen] = useState(false);\n  // true = opened by click (stays until dismissed); false = opened by hover (closes after 1s)\n  const [clickOpened, setClickOpened] = useState(false);\n  const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);\n\n  const { hasRecentErrors, recentErrors, isLoading, error } =\n    useHookExecutionLogs(hook.id);\n\n  useEffect(() => {\n    return () => {\n      if (closeTimerRef.current) clearTimeout(closeTimerRef.current);\n    };\n  }, []);\n\n  useEffect(() => {\n    if (error) {\n      console.error(\n        \"HookStatusPopover: failed to fetch execution logs:\",\n        error\n      );\n    }\n  }, [error]);\n\n  function clearCloseTimer() {\n    if (closeTimerRef.current) {\n      clearTimeout(closeTimerRef.current);\n      closeTimerRef.current = null;\n    }\n  }\n\n  function scheduleClose() {\n    clearCloseTimer();\n    closeTimerRef.current = setTimeout(() => {\n      setOpen(false);\n      setClickOpened(false);\n    }, 1000);\n  }\n\n  function handleTriggerMouseEnter() {\n    clearCloseTimer();\n    setOpen(true);\n  }\n\n  function handleTriggerMouseLeave() {\n    if (!clickOpened) scheduleClose();\n  }\n\n  function handleTriggerClick() {\n    clearCloseTimer();\n    if (open && clickOpened) {\n      // Click while click-opened → close\n      setOpen(false);\n      setClickOpened(false);\n    } else {\n      // Any click → open and pin\n      setOpen(true);\n      setClickOpened(true);\n    }\n  }\n\n  function handleContentMouseEnter() {\n    clearCloseTimer();\n  }\n\n  function handleContentMouseLeave() {\n    if (!clickOpened) scheduleClose();\n  }\n\n  function handleOpenChange(newOpen: boolean) {\n    if (!newOpen) {\n      setOpen(false);\n      setClickOpened(false);\n      clearCloseTimer();\n    }\n  }\n\n  return (\n    <>\n      <logsModal.Provider>\n        <HookLogsModal hook={hook} spec={spec} />\n      </logsModal.Provider>\n\n      <Popover open={open} onOpenChange={handleOpenChange}>\n        <Popover.Anchor asChild>\n          <Button\n            prominence=\"tertiary\"\n            rightIcon={({ className, ...props }) =>\n              hook.is_reachable === false ? (\n                <SvgXOctagon\n                  {...props}\n                  className={cn(\"text-status-error-05\", className)}\n                />\n              ) : hasRecentErrors ? (\n                <SvgAlertTriangle\n                  {...props}\n                  className={cn(\"text-status-warning-05\", className)}\n                />\n              ) : (\n                <SvgCheckCircle\n                  {...props}\n                  className={cn(\"text-status-success-05\", className)}\n                />\n              )\n            }\n            onMouseEnter={handleTriggerMouseEnter}\n            onMouseLeave={handleTriggerMouseLeave}\n            onClick={noProp(handleTriggerClick)}\n            disabled={isBusy}\n          >\n            {hook.is_reachable === false ? \"Connection Lost\" : \"Connected\"}\n          </Button>\n        </Popover.Anchor>\n\n        <Popover.Content\n          align=\"end\"\n          sideOffset={4}\n          onMouseEnter={handleContentMouseEnter}\n          onMouseLeave={handleContentMouseLeave}\n        >\n          <Section\n            flexDirection=\"column\"\n            justifyContent=\"start\"\n            alignItems=\"start\"\n            height=\"fit\"\n            width={hasRecentErrors ? 20 : 12.5}\n            padding={0.125}\n            gap={0.25}\n          >\n            {isLoading ? (\n              <Section justifyContent=\"center\">\n                <SimpleLoader />\n              </Section>\n            ) : error ? (\n              <Text font=\"secondary-body\" color=\"text-03\">\n                Failed to load logs.\n              </Text>\n            ) : hasRecentErrors ? (\n              <>\n                <div className=\"p-1\">\n                  <Content\n                    sizePreset=\"secondary\"\n                    variant=\"section\"\n                    icon={SvgXOctagon}\n                    title={\n                      recentErrors.length <= 3\n                        ? `${recentErrors.length} ${\n                            recentErrors.length === 1 ? \"Error\" : \"Errors\"\n                          }`\n                        : \"Most Recent Errors\"\n                    }\n                    description=\"in the past hour\"\n                  />\n                </div>\n\n                <Separator noPadding className=\"px-2\" />\n\n                {/* Log rows — at most 3, timestamp first then error message */}\n                <Section\n                  flexDirection=\"column\"\n                  justifyContent=\"start\"\n                  alignItems=\"start\"\n                  gap={0.25}\n                  padding={0.25}\n                  height=\"fit\"\n                >\n                  {recentErrors.slice(0, 3).map((log, idx) => (\n                    <Section\n                      key={log.created_at + String(idx)}\n                      flexDirection=\"column\"\n                      justifyContent=\"start\"\n                      alignItems=\"start\"\n                      gap={0.25}\n                      padding={0.25}\n                      height=\"fit\"\n                    >\n                      <Section\n                        flexDirection=\"row\"\n                        justifyContent=\"between\"\n                        alignItems=\"center\"\n                        gap={0}\n                        height=\"fit\"\n                      >\n                        <span className=\"text-code-code\">\n                          <Text font=\"secondary-mono-label\" color=\"inherit\">\n                            {formatTimeOnly(log.created_at)}\n                          </Text>\n                        </span>\n                        <CopyIconButton\n                          size=\"xs\"\n                          getCopyText={() => log.error_message ?? \"\"}\n                        />\n                      </Section>\n                      <span className=\"break-all\">\n                        <Text font=\"secondary-mono\" color=\"text-03\">\n                          {log.error_message ?? \"Unknown error\"}\n                        </Text>\n                      </span>\n                    </Section>\n                  ))}\n                </Section>\n\n                {/* View More Lines */}\n                <LineItem\n                  muted\n                  icon={SvgMaximize2}\n                  onClick={noProp(() => {\n                    handleOpenChange(false);\n                    logsModal.toggle(true);\n                  })}\n                >\n                  View More Lines\n                </LineItem>\n              </>\n            ) : (\n              // No errors state\n              <>\n                <div className=\"p-1\">\n                  <Content\n                    sizePreset=\"secondary\"\n                    variant=\"section\"\n                    icon={SvgCheckCircle}\n                    title=\"No Error\"\n                    description=\"in the past hour\"\n                  />\n                </div>\n\n                <Separator noPadding className=\"px-2\" />\n\n                {/* View Older Errors */}\n                <LineItem\n                  muted\n                  icon={SvgMaximize2}\n                  onClick={noProp(() => {\n                    handleOpenChange(false);\n                    logsModal.toggle(true);\n                  })}\n                >\n                  View Older Errors\n                </LineItem>\n              </>\n            )}\n          </Section>\n        </Popover.Content>\n      </Popover>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/index.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useHookSpecs } from \"@/ee/hooks/useHookSpecs\";\nimport { useHooks } from \"@/ee/hooks/useHooks\";\nimport useFilter from \"@/hooks/useFilter\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  useCreateModal,\n  useModalClose,\n} from \"@/refresh-components/contexts/ModalContext\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { Button, SelectCard, Text } from \"@opal/components\";\nimport { Disabled, Hoverable } from \"@opal/core\";\nimport { markdown } from \"@opal/utils\";\nimport { Content, IllustrationContent } from \"@opal/layouts\";\nimport Modal from \"@/refresh-components/Modal\";\nimport {\n  SvgArrowExchange,\n  SvgBubbleText,\n  SvgExternalLink,\n  SvgFileBroadcast,\n  SvgShareWebhook,\n  SvgPlug,\n  SvgRefreshCw,\n  SvgSettings,\n  SvgTrash,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport { SvgNoResult, SvgEmpty } from \"@opal/illustrations\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport HookFormModal from \"@/ee/refresh-pages/admin/HooksPage/HookFormModal\";\nimport HookStatusPopover from \"@/ee/refresh-pages/admin/HooksPage/HookStatusPopover\";\nimport {\n  activateHook,\n  deactivateHook,\n  deleteHook,\n  validateHook,\n} from \"@/ee/refresh-pages/admin/HooksPage/svc\";\nimport type {\n  HookPointMeta,\n  HookResponse,\n} from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\nimport { noProp } from \"@/lib/utils\";\n\nconst route = ADMIN_ROUTES.HOOKS;\n\nconst HOOK_POINT_ICONS: Record<string, IconFunctionComponent> = {\n  document_ingestion: SvgFileBroadcast,\n  query_processing: SvgBubbleText,\n};\n\nfunction getHookPointIcon(hookPoint: string): IconFunctionComponent {\n  return HOOK_POINT_ICONS[hookPoint] ?? SvgShareWebhook;\n}\n\n// ---------------------------------------------------------------------------\n// Disconnect confirmation modal\n// ---------------------------------------------------------------------------\n\ninterface DisconnectConfirmModalProps {\n  hook: HookResponse;\n  onDisconnect: () => void;\n  onDisconnectAndDelete: () => void;\n}\n\nfunction DisconnectConfirmModal({\n  hook,\n  onDisconnect,\n  onDisconnectAndDelete,\n}: DisconnectConfirmModalProps) {\n  const onClose = useModalClose();\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"md\" height=\"fit\">\n        <Modal.Header\n          // TODO(@raunakab): replace the colour of this SVG with red.\n          icon={SvgUnplug}\n          title={markdown(`Disconnect *${hook.name}*`)}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          <div className=\"flex flex-col gap-2\">\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              {markdown(\n                `Onyx will stop calling this endpoint for hook ***${hook.name}***. In-flight requests will continue to run. The external endpoint may still retain data previously sent to it. You can reconnect this hook later if needed.`\n              )}\n            </Text>\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              You can also delete this hook. Deletion cannot be undone.\n            </Text>\n          </div>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button prominence=\"secondary\" onClick={onClose}>\n            Cancel\n          </Button>\n          <Button\n            variant=\"danger\"\n            prominence=\"secondary\"\n            onClick={onDisconnectAndDelete}\n          >\n            Disconnect &amp; Delete\n          </Button>\n          <Button variant=\"danger\" prominence=\"primary\" onClick={onDisconnect}>\n            Disconnect\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Delete confirmation modal\n// ---------------------------------------------------------------------------\n\ninterface DeleteConfirmModalProps {\n  hook: HookResponse;\n  onDelete: () => void;\n}\n\nfunction DeleteConfirmModal({ hook, onDelete }: DeleteConfirmModalProps) {\n  const onClose = useModalClose();\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"md\" height=\"fit\">\n        <Modal.Header\n          // TODO(@raunakab): replace the colour of this SVG with red.\n          icon={SvgTrash}\n          title={`Delete ${hook.name}`}\n          onClose={onClose}\n        />\n        <Modal.Body>\n          <div className=\"flex flex-col gap-2\">\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              {markdown(\n                `Hook ***${hook.name}*** will be permanently removed from this hook point. The external endpoint may still retain data previously sent to it.`\n              )}\n            </Text>\n            <Text font=\"main-ui-body\" color=\"text-03\">\n              Deletion cannot be undone.\n            </Text>\n          </div>\n        </Modal.Body>\n        <Modal.Footer>\n          <Button prominence=\"secondary\" onClick={onClose}>\n            Cancel\n          </Button>\n          <Button variant=\"danger\" prominence=\"primary\" onClick={onDelete}>\n            Delete\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Unconnected hook card\n// ---------------------------------------------------------------------------\n\ninterface UnconnectedHookCardProps {\n  spec: HookPointMeta;\n  onConnect: () => void;\n}\n\nfunction UnconnectedHookCard({ spec, onConnect }: UnconnectedHookCardProps) {\n  const Icon = getHookPointIcon(spec.hook_point);\n\n  return (\n    <SelectCard state=\"empty\" padding=\"sm\" rounding=\"lg\" onClick={onConnect}>\n      <div className=\"w-full flex flex-row\">\n        <div className=\"flex-1 p-2\">\n          <Content\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            icon={Icon}\n            title={spec.display_name}\n            description={spec.description}\n          />\n\n          {spec.docs_url && (\n            <a\n              href={spec.docs_url}\n              target=\"_blank\"\n              rel=\"noopener noreferrer\"\n              className=\"ml-6 flex items-center gap-1 w-min\"\n            >\n              <span className=\"underline font-secondary-body text-text-03\">\n                Documentation\n              </span>\n              <SvgExternalLink size={12} className=\"shrink-0\" />\n            </a>\n          )}\n        </div>\n\n        <Button\n          prominence=\"tertiary\"\n          rightIcon={SvgArrowExchange}\n          onClick={noProp(onConnect)}\n        >\n          Connect\n        </Button>\n      </div>\n    </SelectCard>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Connected hook card\n// ---------------------------------------------------------------------------\n\ninterface ConnectedHookCardProps {\n  hook: HookResponse;\n  spec: HookPointMeta | undefined;\n  onEdit: () => void;\n  onDeleted: () => void;\n  onToggled: (updated: HookResponse) => void;\n}\n\nfunction ConnectedHookCard({\n  hook,\n  spec,\n  onEdit,\n  onDeleted,\n  onToggled,\n}: ConnectedHookCardProps) {\n  const [isBusy, setIsBusy] = useState(false);\n  const disconnectModal = useCreateModal();\n  const deleteModal = useCreateModal();\n\n  async function handleDelete() {\n    deleteModal.toggle(false);\n    setIsBusy(true);\n    try {\n      await deleteHook(hook.id);\n      onDeleted();\n    } catch (err) {\n      console.error(\"Failed to delete hook:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to delete hook.\"\n      );\n    } finally {\n      setIsBusy(false);\n    }\n  }\n\n  async function handleActivate() {\n    setIsBusy(true);\n    try {\n      const updated = await activateHook(hook.id);\n      onToggled(updated);\n    } catch (err) {\n      console.error(\"Failed to reconnect hook:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to reconnect hook.\"\n      );\n    } finally {\n      setIsBusy(false);\n    }\n  }\n\n  async function handleDeactivate() {\n    disconnectModal.toggle(false);\n    setIsBusy(true);\n    try {\n      const updated = await deactivateHook(hook.id);\n      onToggled(updated);\n    } catch (err) {\n      console.error(\"Failed to deactivate hook:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to deactivate hook.\"\n      );\n    } finally {\n      setIsBusy(false);\n    }\n  }\n\n  async function handleDisconnectAndDelete() {\n    disconnectModal.toggle(false);\n    setIsBusy(true);\n    try {\n      const deactivated = await deactivateHook(hook.id);\n      onToggled(deactivated);\n      await deleteHook(hook.id);\n      onDeleted();\n    } catch (err) {\n      console.error(\"Failed to disconnect hook:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to disconnect hook.\"\n      );\n    } finally {\n      setIsBusy(false);\n    }\n  }\n\n  async function handleValidate() {\n    setIsBusy(true);\n    try {\n      const result = await validateHook(hook.id);\n      if (result.status === \"passed\") {\n        toast.success(\"Hook validated successfully.\");\n      } else {\n        toast.error(\n          result.error_message ?? `Validation failed: ${result.status}`\n        );\n      }\n    } catch (err) {\n      console.error(\"Failed to validate hook:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to validate hook.\"\n      );\n    } finally {\n      setIsBusy(false);\n    }\n  }\n\n  const HookIcon = getHookPointIcon(hook.hook_point);\n\n  return (\n    <>\n      <disconnectModal.Provider>\n        <DisconnectConfirmModal\n          hook={hook}\n          onDisconnect={handleDeactivate}\n          onDisconnectAndDelete={handleDisconnectAndDelete}\n        />\n      </disconnectModal.Provider>\n\n      <deleteModal.Provider>\n        <DeleteConfirmModal hook={hook} onDelete={handleDelete} />\n      </deleteModal.Provider>\n\n      <Hoverable.Root group=\"connected-hook-card\">\n        {/* TODO(@raunakab): Modify the background colour (by using `SelectCard disabled={...}` [when it lands]) to indicate when the card is \"disconnected\". */}\n        <SelectCard state=\"filled\" padding=\"sm\" rounding=\"lg\" onClick={onEdit}>\n          <div className=\"w-full flex flex-row\">\n            <div className=\"flex-1 p-2\">\n              <Content\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n                icon={HookIcon}\n                title={\n                  !hook.is_active || hook.is_reachable === false\n                    ? markdown(`~~${hook.name}~~`)\n                    : hook.name\n                }\n                suffix={!hook.is_active ? \"(Disconnected)\" : undefined}\n                description={`Hook Point: ${\n                  spec?.display_name ?? hook.hook_point\n                }`}\n              />\n\n              {spec?.docs_url && (\n                <a\n                  href={spec.docs_url}\n                  target=\"_blank\"\n                  rel=\"noopener noreferrer\"\n                  className=\"ml-6 flex items-center gap-1 w-min\"\n                >\n                  <span className=\"underline font-secondary-body text-text-03\">\n                    Documentation\n                  </span>\n                  <SvgExternalLink size={12} className=\"shrink-0\" />\n                </a>\n              )}\n            </div>\n\n            <div className=\"flex flex-col items-end shrink-0\">\n              <div className=\"flex items-center gap-1\">\n                {hook.is_active ? (\n                  <HookStatusPopover hook={hook} spec={spec} isBusy={isBusy} />\n                ) : (\n                  <Button\n                    prominence=\"tertiary\"\n                    rightIcon={SvgPlug}\n                    onClick={noProp(handleActivate)}\n                    disabled={isBusy}\n                  >\n                    Reconnect\n                  </Button>\n                )}\n              </div>\n\n              <Disabled disabled={isBusy}>\n                <div className=\"flex items-center pb-1 px-1 gap-1\">\n                  {hook.is_active ? (\n                    <>\n                      <Hoverable.Item\n                        group=\"connected-hook-card\"\n                        variant=\"opacity-on-hover\"\n                      >\n                        <Button\n                          prominence=\"tertiary\"\n                          size=\"md\"\n                          icon={SvgUnplug}\n                          onClick={noProp(() => disconnectModal.toggle(true))}\n                          tooltip=\"Disconnect Hook\"\n                          aria-label=\"Deactivate hook\"\n                        />\n                      </Hoverable.Item>\n                      <Button\n                        prominence=\"tertiary\"\n                        size=\"md\"\n                        icon={SvgRefreshCw}\n                        onClick={noProp(handleValidate)}\n                        tooltip=\"Test Connection\"\n                        aria-label=\"Re-validate hook\"\n                      />\n                    </>\n                  ) : (\n                    <Button\n                      prominence=\"tertiary\"\n                      size=\"md\"\n                      icon={SvgTrash}\n                      onClick={noProp(() => deleteModal.toggle(true))}\n                      tooltip=\"Delete\"\n                      aria-label=\"Delete hook\"\n                    />\n                  )}\n                  <Button\n                    prominence=\"tertiary\"\n                    size=\"md\"\n                    icon={SvgSettings}\n                    onClick={noProp(onEdit)}\n                    tooltip=\"Manage\"\n                    aria-label=\"Configure hook\"\n                  />\n                </div>\n              </Disabled>\n            </div>\n          </div>\n        </SelectCard>\n      </Hoverable.Root>\n    </>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function HooksPage() {\n  const router = useRouter();\n  const { settings, settingsLoading } = useSettingsContext();\n  const isEE = usePaidEnterpriseFeaturesEnabled();\n\n  const [connectSpec, setConnectSpec] = useState<HookPointMeta | null>(null);\n  const [editHook, setEditHook] = useState<HookResponse | null>(null);\n\n  const { specs, isLoading: specsLoading, error: specsError } = useHookSpecs();\n  const {\n    hooks,\n    isLoading: hooksLoading,\n    error: hooksError,\n    mutate,\n  } = useHooks();\n\n  const hookExtractor = useCallback(\n    (hook: HookResponse) =>\n      `${hook.name} ${\n        specs?.find((s: HookPointMeta) => s.hook_point === hook.hook_point)\n          ?.display_name ?? \"\"\n      }`,\n    [specs]\n  );\n\n  const sortedHooks = useMemo(\n    () => [...(hooks ?? [])].sort((a, b) => a.name.localeCompare(b.name)),\n    [hooks]\n  );\n\n  const {\n    query: search,\n    setQuery: setSearch,\n    filtered: connectedHooks,\n  } = useFilter(sortedHooks, hookExtractor);\n\n  const hooksByPoint = useMemo(() => {\n    const map: Record<string, HookResponse[]> = {};\n    for (const hook of hooks ?? []) {\n      (map[hook.hook_point] ??= []).push(hook);\n    }\n    return map;\n  }, [hooks]);\n\n  const unconnectedSpecs = useMemo(() => {\n    const searchLower = search.toLowerCase();\n    return (specs ?? [])\n      .filter(\n        (spec: HookPointMeta) =>\n          (hooksByPoint[spec.hook_point]?.length ?? 0) === 0 &&\n          (!searchLower ||\n            spec.display_name.toLowerCase().includes(searchLower) ||\n            spec.description.toLowerCase().includes(searchLower))\n      )\n      .sort((a: HookPointMeta, b: HookPointMeta) =>\n        a.display_name.localeCompare(b.display_name)\n      );\n  }, [specs, hooksByPoint, search]);\n\n  useEffect(() => {\n    if (settingsLoading) return;\n    if (!isEE) {\n      toast.info(\"Hook Extensions require an Enterprise license.\");\n      router.replace(\"/\");\n    } else if (!settings.hooks_enabled) {\n      toast.info(\"Hook Extensions are not enabled for this deployment.\");\n      router.replace(\"/\");\n    }\n  }, [settingsLoading, isEE, settings.hooks_enabled, router]);\n\n  if (settingsLoading || !isEE || !settings.hooks_enabled) {\n    return <SimpleLoader />;\n  }\n\n  const isLoading = specsLoading || hooksLoading;\n\n  function handleHookSuccess(updated: HookResponse) {\n    mutate((prev: HookResponse[] | undefined) => {\n      if (!prev) return [updated];\n      const idx = prev.findIndex((h: HookResponse) => h.id === updated.id);\n      if (idx >= 0) {\n        const next = [...prev];\n        next[idx] = updated;\n        return next;\n      }\n      return [...prev, updated];\n    });\n  }\n\n  function handleHookDeleted(id: number) {\n    mutate(\n      (prev: HookResponse[] | undefined) =>\n        prev?.filter((h: HookResponse) => h.id !== id)\n    );\n  }\n\n  const connectSpec_ =\n    connectSpec ??\n    (editHook\n      ? specs?.find((s: HookPointMeta) => s.hook_point === editHook.hook_point)\n      : undefined);\n\n  return (\n    <>\n      {/* Create modal */}\n      {!!connectSpec && (\n        <HookFormModal\n          key={connectSpec?.hook_point ?? \"create\"}\n          onOpenChange={(open: boolean) => {\n            if (!open) setConnectSpec(null);\n          }}\n          spec={connectSpec ?? undefined}\n          onSuccess={handleHookSuccess}\n        />\n      )}\n\n      {/* Edit modal */}\n      {!!editHook && (\n        <HookFormModal\n          key={editHook?.id ?? \"edit\"}\n          onOpenChange={(open: boolean) => {\n            if (!open) setEditHook(null);\n          }}\n          hook={editHook ?? undefined}\n          spec={connectSpec_ ?? undefined}\n          onSuccess={handleHookSuccess}\n        />\n      )}\n\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description=\"Extend Onyx pipelines by registering external API endpoints as callbacks at predefined hook points.\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          {isLoading ? (\n            <SimpleLoader />\n          ) : specsError || hooksError ? (\n            <Text font=\"secondary-body\" color=\"text-03\">\n              {`Failed to load${\n                specsError ? \" hook specifications\" : \" hooks\"\n              }. Please refresh the page.`}\n            </Text>\n          ) : (\n            <div className=\"flex flex-col gap-3 h-full\">\n              <div className=\"pb-3\">\n                <InputTypeIn\n                  placeholder=\"Search hooks...\"\n                  value={search}\n                  variant=\"internal\"\n                  leftSearchIcon\n                  onChange={(e) => setSearch(e.target.value)}\n                />\n              </div>\n\n              {connectedHooks.length === 0 && unconnectedSpecs.length === 0 ? (\n                <div>\n                  <IllustrationContent\n                    title={\n                      search ? \"No results found\" : \"No hook points available\"\n                    }\n                    description={\n                      search ? \"Try using a different search term.\" : undefined\n                    }\n                    illustration={search ? SvgNoResult : SvgEmpty}\n                  />\n                </div>\n              ) : (\n                <div className=\"flex flex-col gap-2\">\n                  {connectedHooks.map((hook) => {\n                    const spec = specs?.find(\n                      (s: HookPointMeta) => s.hook_point === hook.hook_point\n                    );\n                    return (\n                      <ConnectedHookCard\n                        key={hook.id}\n                        hook={hook}\n                        spec={spec}\n                        onEdit={() => setEditHook(hook)}\n                        onDeleted={() => handleHookDeleted(hook.id)}\n                        onToggled={handleHookSuccess}\n                      />\n                    );\n                  })}\n\n                  {unconnectedSpecs.map((spec: HookPointMeta) => (\n                    <UnconnectedHookCard\n                      key={spec.hook_point}\n                      spec={spec}\n                      onConnect={() => setConnectSpec(spec)}\n                    />\n                  ))}\n                </div>\n              )}\n            </div>\n          )}\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/interfaces.ts",
    "content": "export type HookPoint = string;\nexport type HookFailStrategy = \"hard\" | \"soft\";\n\nexport interface HookPointMeta {\n  hook_point: HookPoint;\n  display_name: string;\n  description: string;\n  docs_url: string | null;\n  input_schema: Record<string, unknown>;\n  output_schema: Record<string, unknown>;\n  default_timeout_seconds: number;\n  default_fail_strategy: HookFailStrategy;\n  fail_hard_description: string;\n}\n\nexport interface HookResponse {\n  id: number;\n  name: string;\n  hook_point: HookPoint;\n  endpoint_url: string | null;\n  /** Partially-masked API key (e.g. \"abcd••••••••wxyz\"), or null if no key is set. */\n  api_key_masked: string | null;\n  fail_strategy: HookFailStrategy;\n  timeout_seconds: number;\n  is_active: boolean;\n  is_reachable: boolean | null;\n  creator_email: string | null;\n  created_at: string;\n  updated_at: string;\n}\n\nexport interface HookFormState {\n  name: string;\n  endpoint_url: string;\n  api_key: string;\n  fail_strategy: HookFailStrategy;\n  timeout_seconds: string;\n}\n\nexport interface HookCreateRequest {\n  name: string;\n  hook_point: HookPoint;\n  endpoint_url: string;\n  api_key?: string;\n  fail_strategy?: HookFailStrategy;\n  timeout_seconds?: number;\n}\n\nexport interface HookUpdateRequest {\n  name?: string;\n  endpoint_url?: string;\n  api_key?: string | null;\n  fail_strategy?: HookFailStrategy;\n  timeout_seconds?: number;\n}\n\nexport interface HookExecutionRecord {\n  error_message: string | null;\n  status_code: number | null;\n  duration_ms: number | null;\n  created_at: string;\n}\n\nexport type HookValidateStatus =\n  | \"passed\"\n  | \"auth_failed\"\n  | \"timeout\"\n  | \"cannot_connect\";\n\nexport interface HookValidateResponse {\n  status: HookValidateStatus;\n  error_message: string | null;\n}\n"
  },
  {
    "path": "web/src/ee/refresh-pages/admin/HooksPage/svc.ts",
    "content": "import {\n  HookCreateRequest,\n  HookExecutionRecord,\n  HookResponse,\n  HookUpdateRequest,\n  HookValidateResponse,\n} from \"@/ee/refresh-pages/admin/HooksPage/interfaces\";\n\nexport class HookAuthError extends Error {}\nexport class HookTimeoutError extends Error {}\nexport class HookConnectError extends Error {}\n\nasync function parseError(res: Response, fallback: string): Promise<Error> {\n  try {\n    const body = await res.json();\n    if (body?.error_code === \"CREDENTIAL_INVALID\") {\n      return new HookAuthError(body?.detail ?? \"Invalid API key.\");\n    }\n    if (body?.error_code === \"GATEWAY_TIMEOUT\") {\n      return new HookTimeoutError(body?.detail ?? \"Connection timed out.\");\n    }\n    if (body?.error_code === \"BAD_GATEWAY\") {\n      return new HookConnectError(\n        body?.detail ?? \"Could not connect to endpoint.\"\n      );\n    }\n    return new Error(body?.detail ?? fallback);\n  } catch (err) {\n    console.error(\"parseError: failed to parse error response body:\", err);\n    return new Error(fallback);\n  }\n}\n\nexport async function createHook(\n  req: HookCreateRequest\n): Promise<HookResponse> {\n  const res = await fetch(\"/api/admin/hooks\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(req),\n  });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to create hook\");\n  }\n  return res.json();\n}\n\nexport async function updateHook(\n  id: number,\n  req: HookUpdateRequest\n): Promise<HookResponse> {\n  const res = await fetch(`/api/admin/hooks/${id}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(req),\n  });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to update hook\");\n  }\n  return res.json();\n}\n\nexport async function deleteHook(id: number): Promise<void> {\n  const res = await fetch(`/api/admin/hooks/${id}`, { method: \"DELETE\" });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to delete hook\");\n  }\n}\n\nexport async function activateHook(id: number): Promise<HookResponse> {\n  const res = await fetch(`/api/admin/hooks/${id}/activate`, {\n    method: \"POST\",\n  });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to activate hook\");\n  }\n  return res.json();\n}\n\nexport async function deactivateHook(id: number): Promise<HookResponse> {\n  const res = await fetch(`/api/admin/hooks/${id}/deactivate`, {\n    method: \"POST\",\n  });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to deactivate hook\");\n  }\n  return res.json();\n}\n\nexport async function validateHook(id: number): Promise<HookValidateResponse> {\n  const res = await fetch(`/api/admin/hooks/${id}/validate`, {\n    method: \"POST\",\n  });\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to validate hook\");\n  }\n  return res.json();\n}\n\nexport async function fetchExecutionLogs(\n  id: number,\n  limit = 20\n): Promise<HookExecutionRecord[]> {\n  const res = await fetch(\n    `/api/admin/hooks/${id}/execution-logs?limit=${limit}`\n  );\n  if (!res.ok) {\n    throw await parseError(res, \"Failed to fetch execution logs\");\n  }\n  return res.json();\n}\n"
  },
  {
    "path": "web/src/ee/sections/SearchCard.tsx",
    "content": "\"use client\";\n\nimport { SearchDocWithContent } from \"@/lib/search/interfaces\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { WebResultIcon } from \"@/components/WebResultIcon\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Chip from \"@/refresh-components/Chip\";\nimport { buildDocumentSummaryDisplay } from \"@/components/search/DocumentDisplay\";\nimport { ValidSources } from \"@/lib/types\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Interactive } from \"@opal/core\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { timeAgo } from \"@/lib/time\";\nimport { useMemo } from \"react\";\n\nexport interface SearchResultCardProps {\n  /** The search result document to display */\n  document: SearchDocWithContent;\n  /** Whether this result was selected by the LLM as relevant */\n  isLlmSelected?: boolean;\n  /** Callback when the document is clicked */\n  onDocumentClick: (doc: MinimalOnyxDocument) => void;\n}\n\n/**\n * Card component for displaying a single search result.\n *\n * Shows the document title, source icon, blurb/highlights, and metadata.\n * Clicking the card opens the document preview.\n */\nexport default function SearchCard({\n  document,\n  onDocumentClick,\n}: SearchResultCardProps) {\n  const isWebSource =\n    document.is_internet || document.source_type === ValidSources.Web;\n\n  function handleClick() {\n    if (document.link) {\n      window.open(document.link, \"_blank\", \"noopener,noreferrer\");\n      return;\n    }\n    onDocumentClick({\n      document_id: document.document_id,\n      semantic_identifier: document.semantic_identifier,\n    });\n  }\n\n  const content = useMemo(\n    () =>\n      buildDocumentSummaryDisplay(document.match_highlights, document.blurb) ||\n      document.blurb,\n    [document.match_highlights, document.blurb]\n  );\n\n  return (\n    <Interactive.Stateless onClick={handleClick} prominence=\"secondary\">\n      <Interactive.Container heightVariant=\"fit\" widthVariant=\"full\">\n        <Section alignItems=\"start\" gap={0} padding={0.25}>\n          {/* Title Row */}\n          <Section\n            flexDirection=\"row\"\n            justifyContent=\"start\"\n            gap={0.25}\n            padding={0.25}\n          >\n            {isWebSource && document.link ? (\n              <WebResultIcon url={document.link} size={18} />\n            ) : (\n              <SourceIcon sourceType={document.source_type} iconSize={16} />\n            )}\n\n            <Truncated mainUiAction className=\"text-left\">\n              {document.semantic_identifier}\n            </Truncated>\n          </Section>\n\n          {/* Body Row */}\n          <div className=\"px-1 pb-1\">\n            <Section alignItems=\"start\" gap={0.25}>\n              {/* Metadata */}\n              <Section flexDirection=\"row\" justifyContent=\"start\" gap={0.25}>\n                {(document.primary_owners ?? []).map((owner, index) => (\n                  <Chip key={index}>{owner}</Chip>\n                ))}\n                {document.metadata?.tags &&\n                  (Array.isArray(document.metadata.tags)\n                    ? document.metadata.tags\n                    : [document.metadata.tags]\n                  ).map((tag, index) => <Chip key={index}>{tag}</Chip>)}\n                {document.updated_at &&\n                  !isNaN(new Date(document.updated_at).getTime()) && (\n                    <Text secondaryBody text02>\n                      {timeAgo(document.updated_at)}\n                    </Text>\n                  )}\n              </Section>\n\n              {/* Blurb */}\n              {content && (\n                <Text secondaryBody text03 className=\"text-left\">\n                  {content}\n                </Text>\n              )}\n            </Section>\n          </div>\n        </Section>\n      </Interactive.Container>\n    </Interactive.Stateless>\n  );\n}\n"
  },
  {
    "path": "web/src/ee/sections/SearchUI.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport {\n  BaseFilters,\n  MinimalOnyxDocument,\n  SourceMetadata,\n} from \"@/lib/search/interfaces\";\nimport SearchCard from \"@/ee/sections/SearchCard\";\nimport { Pagination } from \"@opal/components\";\nimport Separator from \"@/refresh-components/Separator\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { Tag, ValidSources } from \"@/lib/types\";\nimport { getTimeFilterDate, TimeFilter } from \"@/lib/time\";\nimport useTags from \"@/hooks/useTags\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { SvgCheck, SvgClock, SvgTag } from \"@opal/icons\";\nimport { FilterButton } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport useFilter from \"@/hooks/useFilter\";\nimport { LineItemButton } from \"@opal/components\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport { cn } from \"@/lib/utils\";\nimport { toast } from \"@/hooks/useToast\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\n\n// ============================================================================\n// Types\n// ============================================================================\n\nexport interface SearchResultsProps {\n  /** Callback when a document is clicked */\n  onDocumentClick: (doc: MinimalOnyxDocument) => void;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst RESULTS_PER_PAGE = 20;\n\nconst TIME_FILTER_OPTIONS: { value: TimeFilter; label: string }[] = [\n  { value: \"day\", label: \"Past 24 hours\" },\n  { value: \"week\", label: \"Past week\" },\n  { value: \"month\", label: \"Past month\" },\n  { value: \"year\", label: \"Past year\" },\n];\n\nexport default function SearchUI({ onDocumentClick }: SearchResultsProps) {\n  // Available tags from backend\n  const { tags: availableTags } = useTags();\n  const {\n    state,\n    searchResults: results,\n    llmSelectedDocIds,\n    error,\n    refineSearch: onRefineSearch,\n  } = useQueryController();\n\n  const prevErrorRef = useRef<string | null>(null);\n\n  // Show a toast notification when a new error occurs\n  useEffect(() => {\n    if (error && error !== prevErrorRef.current) {\n      toast.error(error);\n    }\n    prevErrorRef.current = error;\n  }, [error]);\n\n  // Filter state\n  const [selectedSources, setSelectedSources] = useState<string[]>([]);\n  const [timeFilter, setTimeFilter] = useState<TimeFilter | null>(null);\n  const [timeFilterOpen, setTimeFilterOpen] = useState(false);\n  const [selectedTags, setSelectedTags] = useState<Tag[]>([]);\n  const [tagFilterOpen, setTagFilterOpen] = useState(false);\n\n  // Pagination state\n  const [currentPage, setCurrentPage] = useState(1);\n\n  const tagExtractor = useCallback(\n    (tag: Tag) => `${tag.tag_key} ${tag.tag_value}`,\n    []\n  );\n  const {\n    query: tagQuery,\n    setQuery: setTagQuery,\n    filtered: filteredTags,\n  } = useFilter(availableTags, tagExtractor);\n\n  // Build the combined server-side filters from current state\n  const buildFilters = (\n    overrides: { time?: TimeFilter | null; tags?: Tag[] } = {}\n  ): BaseFilters => {\n    const time = overrides.time !== undefined ? overrides.time : timeFilter;\n    const tags = overrides.tags !== undefined ? overrides.tags : selectedTags;\n    const cutoff = time ? getTimeFilterDate(time) : null;\n    return {\n      time_cutoff: cutoff?.toISOString() ?? null,\n      tags:\n        tags.length > 0\n          ? tags.map((t) => ({ tag_key: t.tag_key, tag_value: t.tag_value }))\n          : null,\n    };\n  };\n\n  // Reset source filter and pagination when results change\n  useEffect(() => {\n    setSelectedSources([]);\n    setCurrentPage(1);\n  }, [results]);\n\n  // Create a set for fast lookup of LLM-selected docs\n  const llmSelectedSet = new Set(llmSelectedDocIds ?? []);\n\n  // Filter and sort results\n  const filteredAndSortedResults = useMemo(() => {\n    const filtered = results.filter((doc) => {\n      // Source filter (client-side)\n      if (selectedSources.length > 0) {\n        if (!doc.source_type || !selectedSources.includes(doc.source_type)) {\n          return false;\n        }\n      }\n\n      return true;\n    });\n\n    // Sort: LLM-selected first, then by score\n    return filtered.sort((a, b) => {\n      const aSelected = llmSelectedSet.has(a.document_id);\n      const bSelected = llmSelectedSet.has(b.document_id);\n\n      if (aSelected && !bSelected) return -1;\n      if (!aSelected && bSelected) return 1;\n\n      return (b.score ?? 0) - (a.score ?? 0);\n    });\n  }, [results, selectedSources, llmSelectedSet]);\n\n  // Pagination\n  const totalPages = Math.max(\n    1,\n    Math.ceil(filteredAndSortedResults.length / RESULTS_PER_PAGE)\n  );\n  const paginatedResults = useMemo(() => {\n    const start = (currentPage - 1) * RESULTS_PER_PAGE;\n    return filteredAndSortedResults.slice(start, start + RESULTS_PER_PAGE);\n  }, [filteredAndSortedResults, currentPage]);\n\n  // Extract unique sources with metadata for the source filter\n  const sourcesWithMeta = useMemo(() => {\n    const sourceMap = new Map<\n      string,\n      { meta: SourceMetadata; count: number }\n    >();\n\n    for (const doc of results) {\n      if (doc.source_type) {\n        const existing = sourceMap.get(doc.source_type);\n        if (existing) {\n          existing.count++;\n        } else {\n          sourceMap.set(doc.source_type, {\n            meta: getSourceMetadata(doc.source_type as ValidSources),\n            count: 1,\n          });\n        }\n      }\n    }\n\n    return Array.from(sourceMap.entries())\n      .map(([source, data]) => ({\n        source,\n        ...data,\n      }))\n      .sort((a, b) => b.count - a.count);\n  }, [results]);\n\n  const handleSourceToggle = (source: string) => {\n    setCurrentPage(1);\n    if (selectedSources.includes(source)) {\n      setSelectedSources(selectedSources.filter((s) => s !== source));\n    } else {\n      setSelectedSources([...selectedSources, source]);\n    }\n  };\n\n  const showEmpty = !error && results.length === 0;\n\n  // Show a centered spinner while search is in-flight (after all hooks)\n  if (state.phase === \"searching\") {\n    return (\n      <div className=\"flex-1 min-h-0 w-full flex items-center justify-center\">\n        <SimpleLoader />\n      </div>\n    );\n  }\n\n  return (\n    <div className=\"flex-1 min-h-0 w-full flex flex-col gap-3\">\n      {/* ── Top row: Filters + Result count ── */}\n      <div className=\"flex-shrink-0 flex flex-row gap-x-4\">\n        <div\n          className={cn(\n            \"flex flex-col justify-end gap-3\",\n            showEmpty ? \"flex-1\" : \"flex-[3]\"\n          )}\n        >\n          <div className=\"flex flex-row gap-2\">\n            {/* Time filter */}\n            <Popover open={timeFilterOpen} onOpenChange={setTimeFilterOpen}>\n              <Popover.Trigger asChild>\n                <FilterButton\n                  icon={SvgClock}\n                  active={!!timeFilter}\n                  onClear={() => {\n                    setTimeFilter(null);\n                    onRefineSearch(buildFilters({ time: null }));\n                  }}\n                >\n                  {TIME_FILTER_OPTIONS.find((o) => o.value === timeFilter)\n                    ?.label ?? \"All Time\"}\n                </FilterButton>\n              </Popover.Trigger>\n              <Popover.Content align=\"start\" width=\"md\">\n                <PopoverMenu>\n                  {TIME_FILTER_OPTIONS.map((opt) => (\n                    <LineItemButton\n                      key={opt.value}\n                      onClick={() => {\n                        setTimeFilter(opt.value);\n                        setTimeFilterOpen(false);\n                        onRefineSearch(buildFilters({ time: opt.value }));\n                      }}\n                      state={timeFilter === opt.value ? \"selected\" : \"empty\"}\n                      icon={timeFilter === opt.value ? SvgCheck : SvgClock}\n                      title={opt.label}\n                      sizePreset=\"main-ui\"\n                      variant=\"section\"\n                    />\n                  ))}\n                </PopoverMenu>\n              </Popover.Content>\n            </Popover>\n\n            {/* Tag filter */}\n            <Popover open={tagFilterOpen} onOpenChange={setTagFilterOpen}>\n              <Popover.Trigger asChild>\n                <FilterButton\n                  icon={SvgTag}\n                  active={selectedTags.length > 0}\n                  onClear={() => {\n                    setSelectedTags([]);\n                    onRefineSearch(buildFilters({ tags: [] }));\n                  }}\n                >\n                  {selectedTags.length > 0\n                    ? `${selectedTags.length} Tag${\n                        selectedTags.length > 1 ? \"s\" : \"\"\n                      }`\n                    : \"Tags\"}\n                </FilterButton>\n              </Popover.Trigger>\n              <Popover.Content align=\"start\" width=\"lg\">\n                <PopoverMenu>\n                  <InputTypeIn\n                    leftSearchIcon\n                    placeholder=\"Filter tags...\"\n                    value={tagQuery}\n                    onChange={(e) => setTagQuery(e.target.value)}\n                    onClear={() => setTagQuery(\"\")}\n                    variant=\"internal\"\n                  />\n                  {filteredTags.map((tag) => {\n                    const isSelected = selectedTags.some(\n                      (t) =>\n                        t.tag_key === tag.tag_key &&\n                        t.tag_value === tag.tag_value\n                    );\n                    return (\n                      <LineItemButton\n                        key={`${tag.tag_key}=${tag.tag_value}`}\n                        onClick={() => {\n                          const next = isSelected\n                            ? selectedTags.filter(\n                                (t) =>\n                                  t.tag_key !== tag.tag_key ||\n                                  t.tag_value !== tag.tag_value\n                              )\n                            : [...selectedTags, tag];\n                          setSelectedTags(next);\n                          onRefineSearch(buildFilters({ tags: next }));\n                        }}\n                        state={isSelected ? \"selected\" : \"empty\"}\n                        icon={isSelected ? SvgCheck : SvgTag}\n                        title={tag.tag_value}\n                        sizePreset=\"main-ui\"\n                        variant=\"section\"\n                      />\n                    );\n                  })}\n                </PopoverMenu>\n              </Popover.Content>\n            </Popover>\n          </div>\n\n          <Separator noPadding />\n        </div>\n\n        {!showEmpty && (\n          <div className=\"flex-1 flex flex-col justify-end gap-3\">\n            <Section alignItems=\"start\">\n              <Text text03 mainUiMuted>\n                {results.length} Results\n              </Text>\n            </Section>\n\n            <Separator noPadding />\n          </div>\n        )}\n      </div>\n\n      {/* ── Middle row: Results + Source filter ── */}\n      <div className=\"flex-1 min-h-0 flex flex-row gap-x-4\">\n        <div\n          className={cn(\n            \"min-h-0 overflow-y-scroll flex flex-col gap-2\",\n            showEmpty ? \"flex-1 justify-center\" : \"flex-[3]\"\n          )}\n        >\n          {error ? (\n            <EmptyMessage title=\"Search failed\" description={error} />\n          ) : paginatedResults.length > 0 ? (\n            <>\n              {paginatedResults.map((doc) => (\n                <div\n                  key={`${doc.document_id}-${doc.chunk_ind}`}\n                  className=\"flex-shrink-0\"\n                >\n                  <SearchCard\n                    document={doc}\n                    isLlmSelected={llmSelectedSet.has(doc.document_id)}\n                    onDocumentClick={onDocumentClick}\n                  />\n                </div>\n              ))}\n            </>\n          ) : (\n            <IllustrationContent\n              illustration={SvgNoResult}\n              title=\"No results found\"\n              description=\"Check your connectors/filters or try a different search term.\"\n            />\n          )}\n        </div>\n\n        {!showEmpty && (\n          <div className=\"flex-1 min-h-0 overflow-y-auto flex flex-col gap-4 px-1\">\n            <Section gap={0.25} height=\"fit\">\n              {sourcesWithMeta.map(({ source, meta, count }) => (\n                <LineItemButton\n                  key={source}\n                  icon={(props) => (\n                    <SourceIcon\n                      sourceType={source as ValidSources}\n                      iconSize={16}\n                      {...props}\n                    />\n                  )}\n                  onClick={() => handleSourceToggle(source)}\n                  state={\n                    selectedSources.includes(source) ? \"selected\" : \"empty\"\n                  }\n                  title={meta.displayName}\n                  selectVariant=\"select-heavy\"\n                  sizePreset=\"main-ui\"\n                  variant=\"section\"\n                  rightChildren={<Text text03>{count}</Text>}\n                />\n              ))}\n            </Section>\n          </div>\n        )}\n      </div>\n\n      {/* ── Bottom row: Pagination ── */}\n      {!showEmpty && (\n        <Section height=\"fit\">\n          <Pagination\n            currentPage={currentPage}\n            totalPages={totalPages}\n            onChange={setCurrentPage}\n          />\n        </Section>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/hooks/__tests__/useShowOnboarding.test.tsx",
    "content": "import React from \"react\";\nimport { renderHook, act } from \"@testing-library/react\";\nimport \"@testing-library/jest-dom\";\nimport { useShowOnboarding } from \"@/hooks/useShowOnboarding\";\nimport { OnboardingStep } from \"@/interfaces/onboarding\";\n\n// Mock underlying dependencies used by the inlined useOnboardingState\njest.mock(\"@/providers/UserProvider\", () => ({\n  useUser: () => ({\n    user: null,\n    refreshUser: jest.fn(),\n  }),\n}));\n\n// Configurable mock for useProviderStatus\nconst mockProviderStatus = {\n  llmProviders: [] as unknown[],\n  isLoadingProviders: false,\n  hasProviders: false,\n  providerOptions: [],\n  refreshProviderInfo: jest.fn(),\n};\n\njest.mock(\"@/components/chat/ProviderContext\", () => ({\n  useProviderStatus: () => mockProviderStatus,\n}));\n\njest.mock(\"@/hooks/useLLMProviders\", () => ({\n  useLLMProviders: () => ({\n    refetch: jest.fn(),\n  }),\n}));\n\njest.mock(\"@/lib/userSettings\", () => ({\n  updateUserPersonalization: jest.fn(),\n}));\n\nfunction renderUseShowOnboarding(\n  overrides: {\n    isLoadingProviders?: boolean;\n    hasAnyProvider?: boolean;\n    isLoadingChatSessions?: boolean;\n    chatSessionsCount?: number;\n    userId?: string;\n  } = {}\n) {\n  // Configure the provider mock based on overrides\n  mockProviderStatus.isLoadingProviders = overrides.isLoadingProviders ?? false;\n  mockProviderStatus.hasProviders = overrides.hasAnyProvider ?? false;\n  mockProviderStatus.llmProviders = overrides.hasAnyProvider\n    ? [{ provider: \"openai\" }]\n    : [];\n\n  const defaultParams = {\n    liveAgent: undefined as undefined,\n    isLoadingChatSessions: overrides.isLoadingChatSessions ?? false,\n    chatSessionsCount: overrides.chatSessionsCount ?? 0,\n    userId: \"userId\" in overrides ? overrides.userId : \"user-1\",\n  };\n\n  return renderHook((props) => useShowOnboarding(props), {\n    initialProps: defaultParams,\n  });\n}\n\ndescribe(\"useShowOnboarding\", () => {\n  beforeEach(() => {\n    jest.clearAllMocks();\n    localStorage.clear();\n    // Reset mock to defaults\n    mockProviderStatus.llmProviders = [];\n    mockProviderStatus.isLoadingProviders = false;\n    mockProviderStatus.hasProviders = false;\n    mockProviderStatus.providerOptions = [];\n  });\n\n  it(\"returns showOnboarding=false while providers are loading\", () => {\n    const { result } = renderUseShowOnboarding({\n      isLoadingProviders: true,\n    });\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"returns showOnboarding=false while chat sessions are loading\", () => {\n    const { result } = renderUseShowOnboarding({\n      isLoadingChatSessions: true,\n    });\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"returns showOnboarding=false when userId is undefined\", () => {\n    const { result } = renderUseShowOnboarding({\n      userId: undefined,\n    });\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"returns showOnboarding=true when no providers and no chat sessions\", () => {\n    const { result } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 0,\n    });\n    expect(result.current.showOnboarding).toBe(true);\n  });\n\n  it(\"returns showOnboarding=false when providers exist\", () => {\n    const { result } = renderUseShowOnboarding({\n      hasAnyProvider: true,\n      chatSessionsCount: 0,\n    });\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"returns showOnboarding=false when chatSessionsCount > 0\", () => {\n    const { result } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 5,\n    });\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"self-corrects showOnboarding to false when providers arrive late\", () => {\n    const { result, rerender } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 0,\n      userId: \"user-1\",\n    });\n    expect(result.current.showOnboarding).toBe(true);\n\n    // Simulate providers arriving — update the mock\n    mockProviderStatus.hasProviders = true;\n    mockProviderStatus.llmProviders = [{ provider: \"openai\" }];\n\n    rerender({\n      liveAgent: undefined,\n      isLoadingChatSessions: false,\n      chatSessionsCount: 0,\n      userId: \"user-1\",\n    });\n\n    // Should correct to false — providers exist, no need for LLM setup flow\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"re-evaluates when userId changes\", () => {\n    const { result, rerender } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 0,\n      userId: \"user-1\",\n    });\n    expect(result.current.showOnboarding).toBe(true);\n\n    // Change to a new userId with providers available — update the mock\n    mockProviderStatus.hasProviders = true;\n    mockProviderStatus.llmProviders = [{ provider: \"openai\" }];\n\n    rerender({\n      liveAgent: undefined,\n      isLoadingChatSessions: false,\n      chatSessionsCount: 0,\n      userId: \"user-2\",\n    });\n\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"hideOnboarding sets showOnboarding to false\", () => {\n    const { result } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 0,\n    });\n    expect(result.current.showOnboarding).toBe(true);\n\n    act(() => {\n      result.current.hideOnboarding();\n    });\n\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"finishOnboarding sets showOnboarding to false\", () => {\n    const { result } = renderUseShowOnboarding({\n      hasAnyProvider: false,\n      chatSessionsCount: 0,\n    });\n    expect(result.current.showOnboarding).toBe(true);\n\n    act(() => {\n      result.current.finishOnboarding();\n    });\n\n    expect(result.current.showOnboarding).toBe(false);\n  });\n\n  it(\"returns onboardingState and actions\", () => {\n    const { result } = renderUseShowOnboarding();\n    expect(result.current.onboardingState.currentStep).toBe(\n      OnboardingStep.Welcome\n    );\n    expect(result.current.onboardingActions).toBeDefined();\n    expect(result.current.llmDescriptors).toEqual([]);\n  });\n\n  describe(\"localStorage persistence\", () => {\n    it(\"finishOnboarding sets localStorage flag and onboardingDismissed\", () => {\n      const { result } = renderUseShowOnboarding({\n        hasAnyProvider: false,\n        chatSessionsCount: 0,\n      });\n      expect(result.current.showOnboarding).toBe(true);\n      expect(result.current.onboardingDismissed).toBe(false);\n\n      act(() => {\n        result.current.finishOnboarding();\n      });\n\n      expect(result.current.showOnboarding).toBe(false);\n      expect(result.current.onboardingDismissed).toBe(true);\n      expect(localStorage.getItem(\"onyx:onboardingCompleted:user-1\")).toBe(\n        \"true\"\n      );\n    });\n\n    it(\"hideOnboarding sets localStorage flag and onboardingDismissed\", () => {\n      const { result } = renderUseShowOnboarding({\n        hasAnyProvider: false,\n        chatSessionsCount: 0,\n      });\n\n      act(() => {\n        result.current.hideOnboarding();\n      });\n\n      expect(result.current.onboardingDismissed).toBe(true);\n      expect(localStorage.getItem(\"onyx:onboardingCompleted:user-1\")).toBe(\n        \"true\"\n      );\n    });\n\n    it(\"showOnboarding stays false when localStorage flag is set\", () => {\n      localStorage.setItem(\"onyx:onboardingCompleted:user-1\", \"true\");\n\n      const { result } = renderUseShowOnboarding({\n        hasAnyProvider: false,\n        chatSessionsCount: 0,\n      });\n\n      expect(result.current.showOnboarding).toBe(false);\n      expect(result.current.onboardingDismissed).toBe(true);\n    });\n\n    it(\"onboardingDismissed is false when localStorage flag is not set\", () => {\n      const { result } = renderUseShowOnboarding();\n      expect(result.current.onboardingDismissed).toBe(false);\n    });\n\n    it(\"dismissal for user-1 does not suppress onboarding for user-2\", () => {\n      const { result: result1 } = renderUseShowOnboarding({\n        hasAnyProvider: false,\n        chatSessionsCount: 0,\n        userId: \"1\",\n      });\n      expect(result1.current.showOnboarding).toBe(true);\n\n      act(() => {\n        result1.current.finishOnboarding();\n      });\n      expect(result1.current.onboardingDismissed).toBe(true);\n      expect(localStorage.getItem(\"onyx:onboardingCompleted:1\")).toBe(\"true\");\n\n      // user-2 should still see onboarding\n      const { result: result2 } = renderUseShowOnboarding({\n        hasAnyProvider: false,\n        chatSessionsCount: 0,\n        userId: \"2\",\n      });\n      expect(result2.current.showOnboarding).toBe(true);\n      expect(result2.current.onboardingDismissed).toBe(false);\n      expect(localStorage.getItem(\"onyx:onboardingCompleted:2\")).toBeNull();\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/hooks/appNavigation.ts",
    "content": "\"use client\";\n\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { useCallback } from \"react\";\n\ninterface UseAppRouterProps {\n  chatSessionId?: string;\n  agentId?: number;\n  projectId?: number;\n}\n\nexport function useAppRouter() {\n  const router = useRouter();\n  return useCallback(\n    ({ chatSessionId, agentId, projectId }: UseAppRouterProps = {}) => {\n      const finalParams = [];\n\n      if (chatSessionId)\n        finalParams.push(`${SEARCH_PARAM_NAMES.CHAT_ID}=${chatSessionId}`);\n      else if (agentId)\n        finalParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${agentId}`);\n      else if (projectId)\n        finalParams.push(`${SEARCH_PARAM_NAMES.PROJECT_ID}=${projectId}`);\n\n      const finalString = finalParams.join(\"&\");\n      const finalUrl = `/app?${finalString}`;\n\n      router.push(finalUrl as Route);\n    },\n    [router]\n  );\n}\n\nexport function useAppParams() {\n  const searchParams = useSearchParams();\n  return useCallback((name: string) => searchParams.get(name), [searchParams]);\n}\n"
  },
  {
    "path": "web/src/hooks/formHooks.ts",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\n\n/**\n * Custom hook for handling form input changes in Formik forms.\n *\n * This hook automatically sets the field as \"touched\" when its value changes,\n * enabling immediate validation feedback after the first user interaction.\n *\n * @example\n * ```tsx\n * function MyField({ name }: { name: string }) {\n *   const [field] = useField(name);\n *   const onChange = useFormInputCallback(name);\n *\n *   return (\n *     <input\n *       name={name}\n *       value={field.value}\n *       onChange={onChange}\n *     />\n *   );\n * }\n * ```\n *\n * @example\n * ```tsx\n * // With callback\n * function MySelect({ name, onValueChange }: Props) {\n *   const [field] = useField(name);\n *   const onChange = useFormInputCallback(name, onValueChange);\n *\n *   return (\n *     <Select value={field.value} onValueChange={onChange} />\n *   );\n * }\n * ```\n */\nexport function useOnChangeEvent<T = any>(\n  name: string,\n  f?: (event: T) => void\n) {\n  const [field, , helpers] = useField<T>(name);\n  return (event: T) => {\n    helpers.setTouched(true);\n    f?.(event);\n    field.onChange(event);\n  };\n}\n\n/**\n * Custom hook for handling form value changes in Formik forms.\n *\n * This hook automatically sets the field as \"touched\" when its value changes,\n * enabling immediate validation feedback after the first user interaction.\n * Use this for components that pass values directly (not events).\n *\n * @example\n * ```tsx\n * function MySelect({ name, onValueChange }: Props) {\n *   const [field] = useField(name);\n *   const onChange = useOnChangeValue(name, onValueChange);\n *\n *   return (\n *     <Select value={field.value} onValueChange={onChange} />\n *   );\n * }\n * ```\n *\n * @example\n * ```tsx\n * function MyDatePicker({ name }: Props) {\n *   const [field] = useField(name);\n *   const onChange = useOnChangeValue(name);\n *\n *   return (\n *     <DatePicker selectedDate={field.value} setSelectedDate={onChange} />\n *   );\n * }\n * ```\n */\nexport function useOnChangeValue<T = any>(\n  name: string,\n  f?: (value: T) => void\n) {\n  const [, , helpers] = useField<T>(name);\n  return (value: T) => {\n    helpers.setTouched(true);\n    f?.(value);\n    helpers.setValue(value);\n  };\n}\n\n/**\n * Custom hook for handling form input blur events in Formik forms.\n *\n * This hook chains the consumer's onBlur callback with Formik's blur handler,\n * ensuring both effects run when the field loses focus.\n *\n * @example\n * ```tsx\n * function MyField({ name, onBlur }: Props) {\n *   const [field] = useField(name);\n *   const handleBlur = useOnBlurEvent(name, onBlur);\n *\n *   return (\n *     <input\n *       name={name}\n *       value={field.value}\n *       onBlur={handleBlur}\n *     />\n *   );\n * }\n * ```\n */\nexport function useOnBlurEvent<T = any>(name: string, f?: (event: T) => void) {\n  const [field] = useField<T>(name);\n  return (event: T) => {\n    f?.(event);\n    field.onBlur(event);\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useAdminPersonas.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { buildApiPath } from \"@/lib/urlBuilder\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\ninterface UseAdminPersonasOptions {\n  includeDeleted?: boolean;\n  getEditable?: boolean;\n  includeDefault?: boolean;\n  pageNum?: number;\n  pageSize?: number;\n}\n\ninterface PaginatedPersonasResponse {\n  items: Persona[];\n  total_items: number;\n}\n\nexport const useAdminPersonas = (options?: UseAdminPersonasOptions) => {\n  const {\n    includeDeleted = false,\n    getEditable = false,\n    includeDefault = false,\n    pageNum,\n    pageSize,\n  } = options || {};\n\n  // If pageNum and pageSize are provided, use paginated endpoint.\n  const usePagination = pageNum !== undefined && pageSize !== undefined;\n\n  const url = usePagination\n    ? buildApiPath(\"/api/admin/agents\", {\n        include_deleted: includeDeleted,\n        get_editable: getEditable,\n        include_default: includeDefault,\n        page_num: pageNum,\n        page_size: pageSize,\n      })\n    : buildApiPath(\"/api/admin/persona\", {\n        include_deleted: includeDeleted,\n        get_editable: getEditable,\n      });\n\n  const { data, error, isLoading, mutate } = useSWR<\n    Persona[] | PaginatedPersonasResponse\n  >(url, errorHandlingFetcher);\n\n  // Handle both paginated and non-paginated responses\n  const personas = usePagination\n    ? (data as PaginatedPersonasResponse)?.items || []\n    : (data as Persona[]) || [];\n\n  const totalItems = usePagination\n    ? (data as PaginatedPersonasResponse)?.total_items || 0\n    : personas.length;\n\n  return {\n    personas,\n    totalItems,\n    error,\n    isLoading,\n    refresh: mutate,\n  };\n};\n"
  },
  {
    "path": "web/src/hooks/useAdminUsers.ts",
    "content": "\"use client\";\n\nimport { useCallback } from \"react\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { AccountType, UserStatus } from \"@/lib/types\";\nimport type { UserRole, InvitedUserSnapshot } from \"@/lib/types\";\nimport type {\n  UserRow,\n  UserGroupInfo,\n} from \"@/refresh-pages/admin/UsersPage/interfaces\";\n\n// ---------------------------------------------------------------------------\n// Backend response shape (GET /manage/users/accepted/all)\n// ---------------------------------------------------------------------------\n\ninterface FullUserSnapshot {\n  id: string;\n  email: string;\n  role: UserRole;\n  account_type: AccountType;\n  is_active: boolean;\n  password_configured: boolean;\n  personal_name: string | null;\n  created_at: string;\n  updated_at: string;\n  groups: UserGroupInfo[];\n  is_scim_synced: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Converters\n// ---------------------------------------------------------------------------\n\nfunction toUserRow(snapshot: FullUserSnapshot): UserRow {\n  return {\n    id: snapshot.id,\n    email: snapshot.email,\n    role: snapshot.role,\n    status: snapshot.is_active ? UserStatus.ACTIVE : UserStatus.INACTIVE,\n    is_active: snapshot.is_active,\n    is_scim_synced: snapshot.is_scim_synced,\n    personal_name: snapshot.personal_name,\n    created_at: snapshot.created_at,\n    updated_at: snapshot.updated_at,\n    groups: snapshot.groups,\n  };\n}\n\nfunction emailToUserRow(\n  email: string,\n  status: UserStatus.INVITED | UserStatus.REQUESTED\n): UserRow {\n  return {\n    id: null,\n    email,\n    role: null,\n    status,\n    is_active: false,\n    is_scim_synced: false,\n    personal_name: null,\n    created_at: null,\n    updated_at: null,\n    groups: [],\n  };\n}\n\n// ---------------------------------------------------------------------------\n// Hook\n// ---------------------------------------------------------------------------\n\nexport default function useAdminUsers() {\n  const {\n    data: acceptedData,\n    isLoading: acceptedLoading,\n    error: acceptedError,\n    mutate: acceptedMutate,\n  } = useSWR<FullUserSnapshot[]>(SWR_KEYS.acceptedUsers, errorHandlingFetcher);\n\n  const {\n    data: invitedData,\n    isLoading: invitedLoading,\n    error: invitedError,\n    mutate: invitedMutate,\n  } = useSWR<InvitedUserSnapshot[]>(\n    SWR_KEYS.invitedUsers,\n    errorHandlingFetcher\n  );\n\n  const {\n    data: requestedData,\n    isLoading: requestedLoading,\n    error: requestedError,\n    mutate: requestedMutate,\n  } = useSWR<InvitedUserSnapshot[]>(\n    NEXT_PUBLIC_CLOUD_ENABLED ? SWR_KEYS.pendingTenantUsers : null,\n    errorHandlingFetcher\n  );\n\n  const acceptedRows = (acceptedData ?? []).map(toUserRow);\n  const invitedRows = (invitedData ?? []).map((u) =>\n    emailToUserRow(u.email, UserStatus.INVITED)\n  );\n  const requestedRows = (requestedData ?? []).map((u) =>\n    emailToUserRow(u.email, UserStatus.REQUESTED)\n  );\n\n  const users = [...invitedRows, ...requestedRows, ...acceptedRows];\n\n  const isLoading = acceptedLoading || invitedLoading || requestedLoading;\n  const error = acceptedError ?? invitedError ?? requestedError;\n\n  const refresh = useCallback(() => {\n    acceptedMutate();\n    invitedMutate();\n    requestedMutate();\n  }, [acceptedMutate, invitedMutate, requestedMutate]);\n\n  return { users, isLoading, error, refresh };\n}\n"
  },
  {
    "path": "web/src/hooks/useAgentController.ts",
    "content": "\"use client\";\n\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { useCallback, useMemo, useState } from \"react\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport { useAgents, usePinnedAgents } from \"@/hooks/useAgents\";\nimport { useSearchParams } from \"next/navigation\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\n\nexport default function useAgentController({\n  selectedChatSession,\n  onAgentSelect,\n}: {\n  selectedChatSession: ChatSession | null | undefined;\n  onAgentSelect?: () => void;\n}) {\n  const searchParams = useSearchParams();\n  const { agents: availableAgents } = useAgents();\n  const { pinnedAgents: pinnedAgents } = usePinnedAgents();\n  const combinedSettings = useSettingsContext();\n\n  const defaultAgentIdRaw = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);\n  const defaultAgentId = defaultAgentIdRaw\n    ? parseInt(defaultAgentIdRaw)\n    : undefined;\n\n  const existingChatSessionAgentId = selectedChatSession?.persona_id;\n  const [selectedAgent, setSelectedAssistant] = useState<\n    MinimalPersonaSnapshot | undefined\n  >(\n    // NOTE: look through available assistants here, so that even if the user\n    // has hidden this agent it still shows the correct assistant when\n    // going back to an old chat session\n    existingChatSessionAgentId !== undefined\n      ? availableAgents.find(\n          (assistant) => assistant.id === existingChatSessionAgentId\n        )\n      : defaultAgentId !== undefined\n        ? availableAgents.find((assistant) => assistant.id === defaultAgentId)\n        : undefined\n  );\n\n  // Current assistant is decided based on this ordering\n  // 1. Alternative assistant (assistant selected explicitly by user)\n  // 2. Selected assistant (assistant default in this chat session)\n  // 3. Unified assistant (ID 0) if available (unless disabled)\n  // 4. First pinned assistants (ordered list of pinned assistants)\n  // 5. Available assistants (ordered list of available assistants)\n  // Relevant test: `live_assistant.spec.ts`\n  const liveAgent: MinimalPersonaSnapshot | undefined = useMemo(() => {\n    if (selectedAgent) return selectedAgent;\n\n    const disableDefaultAssistant =\n      combinedSettings?.settings?.disable_default_assistant ?? false;\n\n    if (disableDefaultAssistant) {\n      // Skip unified assistant (ID 0), go straight to pinned/available\n      // Filter out ID 0 from both pinned and available assistants\n      const nonDefaultPinned = pinnedAgents.filter((a) => a.id !== 0);\n      const nonDefaultAvailable = availableAgents.filter((a) => a.id !== 0);\n\n      return (\n        nonDefaultPinned[0] || nonDefaultAvailable[0] || availableAgents[0] // Last resort fallback\n      );\n    }\n\n    // Try to use the unified assistant (ID 0) as default\n    const unifiedAgent = availableAgents.find((a) => a.id === 0);\n    if (unifiedAgent) return unifiedAgent;\n\n    // Fall back to pinned or available assistants\n    return pinnedAgents[0] || availableAgents[0];\n  }, [selectedAgent, pinnedAgents, availableAgents, combinedSettings]);\n\n  const setSelectedAgentFromId = useCallback(\n    (agentId: number | null | undefined) => {\n      // NOTE: also intentionally look through available assistants here, so that\n      // even if the user has hidden an agent they can still go back to it\n      // for old chats\n      let newAssistant =\n        agentId !== null\n          ? availableAgents.find((assistant) => assistant.id === agentId)\n          : undefined;\n\n      // if no assistant was passed in / found, use the default agent\n      if (!newAssistant && defaultAgentId !== undefined) {\n        newAssistant = availableAgents.find(\n          (assistant) => assistant.id === defaultAgentId\n        );\n      }\n\n      setSelectedAssistant(newAssistant);\n      onAgentSelect?.();\n    },\n    [availableAgents, defaultAgentId, onAgentSelect]\n  );\n\n  return {\n    // main assistant selection\n    selectedAgent,\n    setSelectedAgentFromId,\n\n    // final computed assistant\n    liveAgent,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useAgentPreferences.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  UserSpecificAgentPreference,\n  UserSpecificAgentPreferences,\n} from \"@/lib/types\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { useCallback } from \"react\";\n\n// TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n\n// TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\nconst buildUpdateAgentPreferenceUrl = (agentId: number) =>\n  `/api/user/assistant/${agentId}/preferences`;\n\n/**\n * Hook for managing user-specific agent preferences using SWR.\n * Provides automatic caching, deduplication, and revalidation.\n */\nexport default function useAgentPreferences() {\n  const { data, mutate } = useSWR<UserSpecificAgentPreferences>(\n    SWR_KEYS.agentPreferences,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  const setSpecificAgentPreferences = useCallback(\n    async (\n      agentId: number,\n      newAgentPreference: UserSpecificAgentPreference\n    ) => {\n      // Optimistic update\n      mutate(\n        {\n          ...data,\n          [agentId]: newAgentPreference,\n        },\n        false\n      );\n\n      try {\n        const response = await fetch(buildUpdateAgentPreferenceUrl(agentId), {\n          method: \"PATCH\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify(newAgentPreference),\n        });\n\n        if (!response.ok) {\n          console.error(\n            `Failed to update agent preferences: ${response.status}`\n          );\n        }\n      } catch (error) {\n        console.error(\"Error updating agent preferences:\", error);\n      }\n\n      // Revalidate after update\n      mutate();\n    },\n    [data, mutate]\n  );\n\n  return {\n    agentPreferences: data ?? null,\n    setSpecificAgentPreferences,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useAgents.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { useState, useEffect, useMemo, useCallback } from \"react\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  MinimalPersonaSnapshot,\n  FullPersona,\n} from \"@/app/admin/agents/interfaces\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { pinAgents } from \"@/lib/agents\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useSearchParams } from \"next/navigation\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport useChatSessions from \"./useChatSessions\";\n\n/**\n * Fetches all agents (personas) available to the current user.\n *\n * Returns minimal agent snapshots containing basic information like name, description,\n * tools, and display settings. Use this for listing agents in UI components like\n * sidebars, dropdowns, or agent selection interfaces.\n *\n * For full agent details including user_file_ids, groups, and advanced settings,\n * use `useAgent(personaId)` instead.\n *\n * @returns Object containing:\n *   - agents: Array of MinimalPersonaSnapshot objects (empty array while loading)\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Any error that occurred during fetch\n *   - refresh: Function to manually revalidate the data\n *\n * @example\n * const { agents, isLoading } = useAgents();\n * if (isLoading) return <Spinner />;\n * return <AgentList agents={agents} />;\n */\nexport function useAgents() {\n  const { data, error, mutate } = useSWR<MinimalPersonaSnapshot[]>(\n    SWR_KEYS.personas,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    agents: data ?? [],\n    isLoading: !error && !data,\n    error,\n    refresh: mutate,\n  };\n}\n\n/**\n * Fetches a single agent (persona) by ID with full details.\n *\n * Returns complete agent information including user_file_ids, groups, system prompts,\n * and all configuration settings. Use this when you need detailed agent data for\n * editing, configuration, or displaying full agent details.\n *\n * For listing multiple agents with basic information, use `useAgents()` instead.\n *\n * @param agentId - The ID of the agent to fetch, or null to skip fetching\n * @returns Object containing:\n *   - agent: FullPersona object with complete agent details, or null if not loaded/not found\n *   - isLoading: Boolean indicating if data is being fetched (false when personaId is null)\n *   - error: Any error that occurred during fetch\n *   - refresh: Function to manually revalidate the data\n *\n * @example\n * const { agent, isLoading } = useAgent(selectedAgentId);\n * if (isLoading) return <Spinner />;\n * if (!agent) return <NotFound />;\n * return <AgentEditor agent={agent} />;\n */\nexport function useAgent(agentId: number | null) {\n  const { data, error, isLoading, mutate } = useSWR<FullPersona>(\n    agentId ? SWR_KEYS.persona(agentId) : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    agent: data ?? null,\n    isLoading,\n    error,\n    refresh: mutate,\n  };\n}\n\n/**\n * Hook that combines useAgents and usePinnedAgents to return full agent objects\n * with local state for optimistic drag-and-drop updates.\n */\nexport function usePinnedAgents() {\n  const { user, refreshUser } = useUser();\n  const { agents, isLoading: isLoadingAgents } = useAgents();\n\n  // Local state for optimistic updates during drag-and-drop\n  const [localPinnedAgents, setLocalPinnedAgents] = useState<\n    MinimalPersonaSnapshot[]\n  >([]);\n\n  // Derive pinned agents from server data\n  const serverPinnedAgents = useMemo(() => {\n    if (agents.length === 0) return [];\n\n    // If pinned_assistants is null/undefined (never set), show featured personas\n    // If it's an empty array (user explicitly unpinned all), show nothing\n    const pinnedIds = user?.preferences.pinned_assistants;\n    if (pinnedIds === null || pinnedIds === undefined) {\n      return agents.filter((agent) => agent.is_featured && agent.id !== 0);\n    }\n\n    return pinnedIds\n      .map((id) => agents.find((agent) => agent.id === id))\n      .filter((agent): agent is MinimalPersonaSnapshot => !!agent);\n  }, [agents, user?.preferences.pinned_assistants]);\n\n  // Sync server data → local state when server data changes\n  // Only sync when agents have loaded (to avoid syncing empty during initial load)\n  useEffect(() => {\n    if (agents.length > 0) {\n      setLocalPinnedAgents(serverPinnedAgents);\n    }\n  }, [serverPinnedAgents, agents.length]);\n\n  // Toggle pin status - updates local state AND persists to server\n  const togglePinnedAgent = useCallback(\n    async (agent: MinimalPersonaSnapshot, shouldPin: boolean) => {\n      const newPinned = shouldPin\n        ? [...localPinnedAgents, agent]\n        : localPinnedAgents.filter((a) => a.id !== agent.id);\n\n      // Optimistic update\n      setLocalPinnedAgents(newPinned);\n\n      // Persist to server\n      await pinAgents(newPinned.map((a) => a.id));\n      refreshUser(); // Refresh user to sync pinned_assistants\n    },\n    [localPinnedAgents, refreshUser]\n  );\n\n  // Update pinned agents order (for drag-and-drop) - updates AND persists\n  const updatePinnedAgents = useCallback(\n    async (newPinnedAgents: MinimalPersonaSnapshot[]) => {\n      // Optimistic update\n      setLocalPinnedAgents(newPinnedAgents);\n\n      // Persist to server\n      await pinAgents(newPinnedAgents.map((a) => a.id));\n      refreshUser();\n    },\n    [refreshUser]\n  );\n\n  return {\n    pinnedAgents: localPinnedAgents,\n    togglePinnedAgent,\n    updatePinnedAgents, // Use this instead of setPinnedAgents for drag-and-drop\n    isLoading: isLoadingAgents,\n  };\n}\n\n/**\n * Hook to determine the currently active agent based on:\n * 1. URL param `agentId`\n * 2. Chat session's `persona_id`\n * 3. Falls back to null if neither is present\n */\nexport function useCurrentAgent(): MinimalPersonaSnapshot | null {\n  const { agents } = useAgents();\n  const searchParams = useSearchParams();\n\n  const agentIdRaw = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);\n  const { currentChatSession } = useChatSessions();\n\n  const currentAgent = useMemo(() => {\n    if (agents.length === 0) return null;\n\n    // Priority: URL param > chat session persona > null\n    const agentId = agentIdRaw\n      ? parseInt(agentIdRaw)\n      : currentChatSession?.persona_id;\n\n    if (!agentId) return null;\n\n    return agents.find((a) => a.id === agentId) ?? null;\n  }, [agents, agentIdRaw, currentChatSession?.persona_id]);\n\n  return currentAgent;\n}\n"
  },
  {
    "path": "web/src/hooks/useAppFocus.ts",
    "content": "\"use client\";\n\n// \"AppFocus\" is the current part of the main application which is active / focused on.\n// Namely, if the URL is pointing towards a \"chat\", then a `{ type: \"chat\", id: \"...\" }` is returned.\n//\n// This is useful in determining what `SidebarTab` should be active, for example.\n\nimport { useMemo } from \"react\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { usePathname, useSearchParams } from \"next/navigation\";\n\nexport type AppFocusType =\n  | { type: \"agent\" | \"project\" | \"chat\"; id: string }\n  | \"new-session\"\n  | \"more-agents\"\n  | \"user-settings\"\n  | \"shared-chat\";\n\nexport class AppFocus {\n  constructor(public value: AppFocusType) {}\n\n  isAgent(): boolean {\n    return typeof this.value === \"object\" && this.value.type === \"agent\";\n  }\n\n  isProject(): boolean {\n    return typeof this.value === \"object\" && this.value.type === \"project\";\n  }\n\n  isChat(): boolean {\n    return typeof this.value === \"object\" && this.value.type === \"chat\";\n  }\n\n  isSharedChat(): boolean {\n    return this.value === \"shared-chat\";\n  }\n\n  isNewSession(): boolean {\n    return this.value === \"new-session\";\n  }\n\n  isMoreAgents(): boolean {\n    return this.value === \"more-agents\";\n  }\n\n  isUserSettings(): boolean {\n    return this.value === \"user-settings\";\n  }\n\n  getId(): string | null {\n    return typeof this.value === \"object\" ? this.value.id : null;\n  }\n\n  getType():\n    | \"agent\"\n    | \"project\"\n    | \"chat\"\n    | \"shared-chat\"\n    | \"new-session\"\n    | \"more-agents\"\n    | \"user-settings\" {\n    return typeof this.value === \"object\" ? this.value.type : this.value;\n  }\n}\n\nexport default function useAppFocus(): AppFocus {\n  const pathname = usePathname();\n  const searchParams = useSearchParams();\n\n  const chatId = searchParams.get(SEARCH_PARAM_NAMES.CHAT_ID);\n  const agentId = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID);\n  const projectId = searchParams.get(SEARCH_PARAM_NAMES.PROJECT_ID);\n\n  // Memoize on the values that determine which AppFocus is constructed.\n  // AppFocus is immutable, so same inputs → same instance.\n  return useMemo(() => {\n    if (pathname.startsWith(\"/app/shared/\")) {\n      return new AppFocus(\"shared-chat\");\n    }\n    if (pathname.startsWith(\"/app/settings\")) {\n      return new AppFocus(\"user-settings\");\n    }\n    if (pathname.startsWith(\"/app/agents\")) {\n      return new AppFocus(\"more-agents\");\n    }\n    if (chatId) return new AppFocus({ type: \"chat\", id: chatId });\n    if (agentId) return new AppFocus({ type: \"agent\", id: agentId });\n    if (projectId) return new AppFocus({ type: \"project\", id: projectId });\n    return new AppFocus(\"new-session\");\n  }, [pathname, chatId, agentId, projectId]);\n}\n"
  },
  {
    "path": "web/src/hooks/useAuthTypeMetadata.ts",
    "content": "import useSWR from \"swr\";\nimport { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\ninterface AuthTypeAPIResponse {\n  auth_type: string;\n  requires_verification: boolean;\n  anonymous_user_enabled: boolean | null;\n  password_min_length: number;\n  has_users: boolean;\n  oauth_enabled: boolean;\n}\n\nexport interface AuthTypeMetadata {\n  authType: AuthType;\n  autoRedirect: boolean;\n  requiresVerification: boolean;\n  anonymousUserEnabled: boolean | null;\n  passwordMinLength: number;\n  hasUsers: boolean;\n  oauthEnabled: boolean;\n}\n\nconst DEFAULT_AUTH_TYPE_METADATA: AuthTypeMetadata = {\n  authType: NEXT_PUBLIC_CLOUD_ENABLED ? AuthType.CLOUD : AuthType.BASIC,\n  autoRedirect: false,\n  requiresVerification: false,\n  anonymousUserEnabled: null,\n  passwordMinLength: 0,\n  hasUsers: false,\n  oauthEnabled: false,\n};\n\nasync function fetchAuthTypeMetadata(url: string): Promise<AuthTypeMetadata> {\n  const res = await fetch(url);\n  if (!res.ok) throw new Error(\"Failed to fetch auth type metadata\");\n  const data: AuthTypeAPIResponse = await res.json();\n  const authType = NEXT_PUBLIC_CLOUD_ENABLED\n    ? AuthType.CLOUD\n    : (data.auth_type as AuthType);\n  return {\n    authType,\n    autoRedirect: authType === AuthType.OIDC || authType === AuthType.SAML,\n    requiresVerification: data.requires_verification,\n    anonymousUserEnabled: data.anonymous_user_enabled,\n    passwordMinLength: data.password_min_length,\n    hasUsers: data.has_users,\n    oauthEnabled: data.oauth_enabled,\n  };\n}\n\nexport function useAuthTypeMetadata(): {\n  authTypeMetadata: AuthTypeMetadata;\n  isLoading: boolean;\n  error: Error | undefined;\n} {\n  const { data, error, isLoading } = useSWR<AuthTypeMetadata>(\n    SWR_KEYS.authType,\n    fetchAuthTypeMetadata,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30_000,\n    }\n  );\n\n  return {\n    authTypeMetadata: data ?? DEFAULT_AUTH_TYPE_METADATA,\n    isLoading,\n    error,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useAvailableTools.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Hook to fetch all available tools from the backend.\n *\n * This hook fetches the complete list of tools that can be used with agents,\n * including built-in tools (SearchTool, ImageGenerationTool, WebSearchTool, PythonTool)\n * and any dynamically configured tools (MCP servers, OpenAPI tools).\n *\n * @example\n * ```tsx\n * const { tools, isLoading, error, refresh } = useAvailableTools();\n *\n * if (isLoading) return <Loading />;\n * if (error) return <Error />;\n *\n * const imageGenTool = tools.find(t => t.in_code_tool_id === \"ImageGenerationTool\");\n * const isImageGenAvailable = !!imageGenTool;\n * ```\n */\nexport function useAvailableTools() {\n  const { data, error, mutate } = useSWR<ToolSnapshot[]>(\n    SWR_KEYS.tools,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    tools: data ?? [],\n    isLoading: !error && !data,\n    error,\n    refresh: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useBillingInformation.ts",
    "content": "import useSWR from \"swr\";\n\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  BillingInformation,\n  SubscriptionStatus,\n} from \"@/lib/billing/interfaces\";\n\n/**\n * Hook to fetch billing information from Stripe.\n *\n * Works for both cloud and self-hosted deployments:\n * - Cloud: fetches from /api/tenants/billing-information\n * - Self-hosted: fetches from /api/admin/billing/billing-information\n */\nexport function useBillingInformation() {\n  const url = NEXT_PUBLIC_CLOUD_ENABLED\n    ? SWR_KEYS.billingInformationCloud\n    : SWR_KEYS.billingInformationSelfHosted;\n\n  const { data, error, mutate, isLoading } = useSWR<\n    BillingInformation | SubscriptionStatus\n  >(url, errorHandlingFetcher, {\n    revalidateOnFocus: false,\n    revalidateOnReconnect: false,\n    revalidateIfStale: false,\n    dedupingInterval: 30000,\n    shouldRetryOnError: false,\n    keepPreviousData: true,\n  });\n\n  return { data, isLoading, error, refresh: mutate };\n}\n"
  },
  {
    "path": "web/src/hooks/useBoundingBox.ts",
    "content": "\"use client\";\n\nimport { useRef, useEffect, useCallback, useState } from \"react\";\n\nexport function useBoundingBox() {\n  const ref = useRef<HTMLDivElement>(null);\n  const [inside, setInside] = useState(false);\n\n  const checkMousePosition = useCallback((event: MouseEvent) => {\n    if (!ref.current) return;\n\n    const rect = ref.current.getBoundingClientRect();\n    const isInside =\n      event.clientX >= rect.left &&\n      event.clientX <= rect.right &&\n      event.clientY >= rect.top &&\n      event.clientY <= rect.bottom;\n\n    setInside(isInside);\n  }, []);\n\n  useEffect(() => {\n    // Set up event listeners for mouse movement\n    const handleMouseMove = (event: MouseEvent) => checkMousePosition(event);\n\n    document.addEventListener(\"mousemove\", handleMouseMove);\n\n    return () => {\n      document.removeEventListener(\"mousemove\", handleMouseMove);\n    };\n  }, [checkMousePosition]);\n\n  return { ref, inside };\n}\n"
  },
  {
    "path": "web/src/hooks/useBrowserInfo.ts",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\n\nexport interface BrowserInfo {\n  isSafari: boolean;\n  isFirefox: boolean;\n  isChrome: boolean;\n  isChromium: boolean;\n  isEdge: boolean;\n  isOpera: boolean;\n  isIOS: boolean;\n  isMac: boolean;\n  isWindows: boolean;\n}\n\nconst DEFAULT_BROWSER_INFO: BrowserInfo = {\n  isSafari: false,\n  isFirefox: false,\n  isChrome: false,\n  isChromium: false,\n  isEdge: false,\n  isOpera: false,\n  isIOS: false,\n  isMac: false,\n  isWindows: false,\n};\n\nexport default function useBrowserInfo(): BrowserInfo {\n  const [browserInfo, setBrowserInfo] =\n    useState<BrowserInfo>(DEFAULT_BROWSER_INFO);\n  useEffect(() => {\n    const userAgent = window.navigator.userAgent;\n\n    const isEdge = /Edg/i.test(userAgent);\n    const isOpera = /OPR|Opera/i.test(userAgent);\n    const isFirefox = /Firefox|FxiOS/i.test(userAgent);\n    const isChrome = /Chrome|CriOS/i.test(userAgent) && !isEdge && !isOpera;\n    const isChromium = /Chromium/i.test(userAgent) || isChrome;\n    const isSafari =\n      /Safari/i.test(userAgent) &&\n      !isChromium &&\n      !isEdge &&\n      !isOpera &&\n      !isFirefox;\n    const isIOS = /iPhone|iPad|iPod/i.test(userAgent);\n    const isMac = /Macintosh|Mac OS X/i.test(userAgent);\n    const isWindows = /Win/i.test(userAgent);\n\n    setBrowserInfo({\n      isSafari,\n      isFirefox,\n      isChrome,\n      isChromium,\n      isEdge,\n      isOpera,\n      isIOS,\n      isMac,\n      isWindows,\n    });\n  }, []);\n\n  return browserInfo;\n}\n"
  },
  {
    "path": "web/src/hooks/useCCPairs.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { CCPairBasicInfo } from \"@/lib/types\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Hook for fetching connector-credential pairs (CC Pairs).\n *\n * Retrieves all connector-credential pairs configured in the system. CC Pairs\n * represent connections between data sources (connectors) and their authentication\n * credentials, used for indexing content from various sources like Confluence,\n * Slack, Google Drive, etc. Uses SWR for caching and automatic revalidation.\n *\n * @returns Object containing:\n *   - ccPairs: Array of CCPairBasicInfo objects\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Error object if the fetch failed\n *   - refetch: Function to manually reload CC pairs\n *\n * @example\n * ```tsx\n * // Display list of connected data sources\n * const ConnectorList = () => {\n *   const { ccPairs, isLoading, error } = useCCPairs();\n *\n *   if (isLoading) return <Spinner />;\n *   if (error) return <Error message=\"Failed to load connectors\" />;\n *\n *   return (\n *     <ul>\n *       {ccPairs.map(pair => (\n *         <li key={pair.id}>\n *           {pair.name} - {pair.source}\n *         </li>\n *       ))}\n *     </ul>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Filter connectors by source type\n * const SlackConnectors = () => {\n *   const { ccPairs } = useCCPairs();\n *\n *   const slackPairs = ccPairs.filter(pair => pair.source === 'slack');\n *\n *   return <ConnectorGrid connectors={slackPairs} />;\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Refresh list after connecting a new source\n * const ConnectSourceButton = () => {\n *   const { refetch } = useCCPairs();\n *\n *   const handleConnect = async () => {\n *     await connectNewSource();\n *     refetch(); // Refresh the list\n *   };\n *\n *   return <Button onClick={handleConnect}>Connect Source</Button>;\n * };\n * ```\n */\nexport default function useCCPairs(enabled: boolean = true) {\n  const { data, error, isLoading, mutate } = useSWR<CCPairBasicInfo[]>(\n    enabled ? SWR_KEYS.connectorStatus : null,\n    errorHandlingFetcher\n  );\n\n  return {\n    ccPairs: data ?? [],\n    isLoading: enabled && isLoading,\n    error,\n    refetch: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useChatController.ts",
    "content": "\"use client\";\n\nimport {\n  buildChatUrl,\n  getAvailableContextTokens,\n  nameChatSession,\n  updateLlmOverrideForChatSession,\n} from \"@/app/app/services/lib\";\nimport { getMaxSelectedDocumentTokens } from \"@/app/app/projects/projectsService\";\nimport { DEFAULT_CONTEXT_TOKENS } from \"@/lib/constants\";\nimport { StreamStopInfo } from \"@/lib/search/interfaces\";\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport type { Route } from \"next\";\nimport {\n  getLastSuccessfulMessageId,\n  getLatestMessageChain,\n  MessageTreeState,\n  upsertMessages,\n  SYSTEM_NODE_ID,\n  buildImmediateMessages,\n  buildEmptyMessage,\n} from \"@/app/app/services/messageTree\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { FilterManager, LlmDescriptor, LlmManager } from \"@/lib/hooks\";\nimport {\n  BackendMessage,\n  ChatFileType,\n  CitationMap,\n  FileChatDisplay,\n  FileDescriptor,\n  Message,\n  MessageResponseIDInfo,\n  RegenerationState,\n  RetrievalType,\n  StreamingError,\n  ToolCallMetadata,\n  UserKnowledgeFilePacket,\n} from \"@/app/app/interfaces\";\nimport { StreamStopReason } from \"@/lib/search/interfaces\";\nimport { createChatSession } from \"@/app/app/services/lib\";\nimport {\n  getFinalLLM,\n  modelSupportsImageInput,\n  structureValue,\n} from \"@/lib/llmConfig/utils\";\nimport {\n  CurrentMessageFIFO,\n  updateCurrentMessageFIFO,\n} from \"@/app/app/services/currentMessageFIFO\";\nimport { buildFilters } from \"@/lib/search/utils\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  ReadonlyURLSearchParams,\n  usePathname,\n  useRouter,\n  useSearchParams,\n} from \"next/navigation\";\nimport { track, AnalyticsEvent } from \"@/lib/analytics\";\nimport { getExtensionContext } from \"@/lib/extension/utils\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { usePinnedAgents } from \"@/hooks/useAgents\";\nimport {\n  useChatSessionStore,\n  useCurrentMessageTree,\n  useCurrentChatState,\n  useCurrentMessageHistory,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport { Packet, MessageStart } from \"@/app/app/services/streamingModels\";\nimport useAgentPreferences from \"@/hooks/useAgentPreferences\";\nimport { useForcedTools } from \"@/lib/hooks/useForcedTools\";\nimport { ProjectFile, useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { useAppParams } from \"@/hooks/appNavigation\";\nimport { projectFilesToFileDescriptors } from \"@/app/app/services/fileUtils\";\n\nconst SYSTEM_MESSAGE_ID = -3;\n\nexport interface OnSubmitProps {\n  message: string;\n  //from chat input bar\n  currentMessageFiles: ProjectFile[];\n  // from the chat bar???\n\n  deepResearch: boolean;\n\n  // optional params\n  messageIdToResend?: number;\n  queryOverride?: string;\n  forceSearch?: boolean;\n  isSeededChat?: boolean;\n  modelOverride?: LlmDescriptor;\n  regenerationRequest?: RegenerationRequest | null;\n  // Additional context injected into the LLM call but not stored/shown in chat.\n  additionalContext?: string;\n}\n\ninterface RegenerationRequest {\n  messageId: number;\n  parentMessage: Message;\n  forceSearch?: boolean;\n}\n\ninterface UseChatControllerProps {\n  filterManager: FilterManager;\n  llmManager: LlmManager;\n  liveAgent: MinimalPersonaSnapshot | undefined;\n  availableAgents: MinimalPersonaSnapshot[];\n  existingChatSessionId: string | null;\n  selectedDocuments: OnyxDocument[];\n  searchParams: ReadonlyURLSearchParams;\n  resetInputBar: () => void;\n  setSelectedAgentFromId: (agentId: number | null) => void;\n}\n\nasync function stopChatSession(chatSessionId: string): Promise<void> {\n  const response = await fetch(`/api/chat/stop-chat-session/${chatSessionId}`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n\n  if (!response.ok) {\n    throw new Error(`Failed to stop chat session: ${response.statusText}`);\n  }\n}\n\nexport default function useChatController({\n  filterManager,\n  llmManager,\n  availableAgents,\n  liveAgent,\n  existingChatSessionId,\n  selectedDocuments,\n  resetInputBar,\n  setSelectedAgentFromId,\n}: UseChatControllerProps) {\n  const pathname = usePathname();\n  const router = useRouter();\n  const searchParams = useSearchParams();\n  const params = useAppParams();\n  const { refreshChatSessions, addPendingChatSession } = useChatSessions();\n  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();\n  const { agentPreferences } = useAgentPreferences();\n  const { forcedToolIds } = useForcedTools();\n  const { fetchProjects, setCurrentMessageFiles, beginUpload } =\n    useProjectsContext();\n\n  // Use selectors to access only the specific fields we need\n  const currentSessionId = useChatSessionStore(\n    (state) => state.currentSessionId\n  );\n  const sessions = useChatSessionStore((state) => state.sessions);\n\n  // Store actions - these don't cause re-renders\n  const updateChatStateAction = useChatSessionStore(\n    (state) => state.updateChatState\n  );\n  const updateRegenerationStateAction = useChatSessionStore(\n    (state) => state.updateRegenerationState\n  );\n  const updateCanContinueAction = useChatSessionStore(\n    (state) => state.updateCanContinue\n  );\n  const createSession = useChatSessionStore((state) => state.createSession);\n  const setCurrentSession = useChatSessionStore(\n    (state) => state.setCurrentSession\n  );\n  const updateSessionMessageTree = useChatSessionStore(\n    (state) => state.updateSessionMessageTree\n  );\n  const updateSubmittedMessage = useChatSessionStore(\n    (state) => state.updateSubmittedMessage\n  );\n  const updateSelectedNodeForDocDisplay = useChatSessionStore(\n    (state) => state.updateSelectedNodeForDocDisplay\n  );\n  const setUncaughtError = useChatSessionStore(\n    (state) => state.setUncaughtError\n  );\n  const setLoadingError = useChatSessionStore((state) => state.setLoadingError);\n  const setAbortController = useChatSessionStore(\n    (state) => state.setAbortController\n  );\n  const setIsReady = useChatSessionStore((state) => state.setIsReady);\n  const setStreamingStartTime = useChatSessionStore(\n    (state) => state.setStreamingStartTime\n  );\n\n  // Use custom hooks for accessing store data\n  const currentMessageTree = useCurrentMessageTree();\n  const currentMessageHistory = useCurrentMessageHistory();\n  const currentChatState = useCurrentChatState();\n\n  const navigatingAway = useRef(false);\n\n  // Sync store state changes\n  useEffect(() => {\n    if (currentSessionId) {\n      // Keep track of current session ID for internal use\n    }\n  }, [currentSessionId]);\n\n  const getCurrentSessionId = (): string => {\n    return currentSessionId || existingChatSessionId || \"\";\n  };\n\n  const updateRegenerationState = (\n    newState: RegenerationState | null,\n    sessionId?: string | null\n  ) => {\n    const targetSessionId = sessionId || getCurrentSessionId();\n    if (targetSessionId) {\n      updateRegenerationStateAction(targetSessionId, newState);\n    }\n  };\n\n  const resetRegenerationState = (sessionId?: string | null) => {\n    updateRegenerationState(null, sessionId);\n  };\n\n  const updateCanContinue = (newState: boolean, sessionId?: string | null) => {\n    const targetSessionId = sessionId || getCurrentSessionId();\n    if (targetSessionId) {\n      updateCanContinueAction(targetSessionId, newState);\n    }\n  };\n\n  const updateStatesWithNewSessionId = (newSessionId: string) => {\n    // Create new session in store if it doesn't exist\n    const existingSession = sessions.get(newSessionId);\n    if (!existingSession) {\n      createSession(newSessionId);\n    }\n\n    // Set as current session\n    setCurrentSession(newSessionId);\n  };\n\n  const handleNewSessionNavigation = (chatSessionId: string) => {\n    // Build URL with skip-reload parameter\n    const newUrl = buildChatUrl(\n      searchParams,\n      chatSessionId,\n      null,\n      false,\n      true // skipReload\n    );\n\n    // Navigate immediately if still on chat page\n    // For NRF pages (/chat/nrf, /chat/nrf/side-panel), don't navigate immediately\n    // Let the streaming complete inline, then the user can continue chatting there\n    const isOnChatPage = pathname === \"/app\";\n\n    if (isOnChatPage && !navigatingAway.current) {\n      router.push(newUrl as Route, { scroll: false });\n    }\n\n    // Refresh sidebar - the chat was already optimistically added via addPendingChatSession\n    // so it will show as \"New Chat\". This refresh ensures we get the latest server state\n    // and will be called again after naming completes.\n    refreshChatSessions();\n    fetchProjects();\n  };\n\n  const handleNewSessionNaming = async (chatSessionId: string) => {\n    // Wait 200ms before naming (gives backend time to process)\n    // There is some delay here since we might get a \"finished\" response from the backend\n    // before the ChatSession is written to the database.\n    // TODO: remove this delay once we have a way to know when the ChatSession\n    // is written to the database.\n    await new Promise((resolve) => setTimeout(resolve, 200));\n\n    try {\n      // Name chat based on AI response\n      const response = await nameChatSession(chatSessionId);\n\n      if (!response.ok) {\n        console.error(\"Failed to name chat session, status:\", response.status);\n        // Still refresh to show the unnamed chat in sidebar\n        refreshChatSessions();\n        fetchProjects();\n        return;\n      }\n    } catch (error) {\n      console.error(\"Failed to name chat session:\", error);\n    } finally {\n      // Refresh sidebar to show new name\n      await refreshChatSessions();\n      await fetchProjects();\n    }\n  };\n\n  const upsertToCompleteMessageTree = ({\n    messages,\n    chatSessionId,\n    completeMessageTreeOverride,\n    makeLatestChildMessage = false,\n  }: {\n    messages: Message[];\n    chatSessionId: string;\n    // if calling this function repeatedly with short delay, stay may not update in time\n    // and result in weird behavipr\n    completeMessageTreeOverride?: MessageTreeState | null;\n    oldIds?: number[] | null;\n    makeLatestChildMessage?: boolean;\n  }) => {\n    let currentMessageTreeToUse =\n      completeMessageTreeOverride ||\n      (chatSessionId !== undefined &&\n        sessions.get(chatSessionId)?.messageTree) ||\n      currentMessageTree ||\n      new Map<number, Message>();\n\n    const newCompleteMessageTree = upsertMessages(\n      currentMessageTreeToUse,\n      messages,\n      makeLatestChildMessage\n    );\n\n    updateSessionMessageTree(chatSessionId, newCompleteMessageTree);\n\n    return newCompleteMessageTree;\n  };\n\n  const stopGenerating = useCallback(async () => {\n    const currentSession = getCurrentSessionId();\n    const lastMessage = currentMessageHistory[currentMessageHistory.length - 1];\n\n    // Call the backend stop endpoint to set the Redis fence\n    // This signals the backend to stop processing as soon as possible\n    // The backend will emit a STOP packet when it detects the fence\n    try {\n      await stopChatSession(currentSession);\n    } catch (error) {\n      console.error(\"Failed to stop chat session:\", error);\n      // Continue with UI cleanup even if backend call fails\n    }\n\n    // Clean up incomplete tool calls for immediate UI feedback\n    if (\n      lastMessage &&\n      lastMessage.type === \"assistant\" &&\n      lastMessage.toolCall &&\n      lastMessage.toolCall.tool_result === undefined\n    ) {\n      const newMessageTree = new Map(currentMessageTree);\n      const updatedMessage = { ...lastMessage, toolCall: null };\n      newMessageTree.set(lastMessage.nodeId, updatedMessage);\n      updateSessionMessageTree(currentSession, newMessageTree);\n    }\n\n    // Update chat state to input immediately for good UX\n    // The stream will close naturally when the backend sends the STOP packet\n    setStreamingStartTime(currentSession, null);\n    updateChatStateAction(currentSession, \"input\");\n  }, [currentMessageHistory, currentMessageTree]);\n\n  const onSubmit = useCallback(\n    async ({\n      message,\n      currentMessageFiles,\n      deepResearch,\n      messageIdToResend,\n      queryOverride,\n      forceSearch,\n      isSeededChat,\n      modelOverride,\n      regenerationRequest,\n      additionalContext,\n    }: OnSubmitProps) => {\n      const projectId = params(SEARCH_PARAM_NAMES.PROJECT_ID);\n      {\n        const params = new URLSearchParams(searchParams?.toString() || \"\");\n        if (params.has(SEARCH_PARAM_NAMES.PROJECT_ID)) {\n          params.delete(SEARCH_PARAM_NAMES.PROJECT_ID);\n          const newUrl = params.toString()\n            ? `${pathname}?${params.toString()}`\n            : pathname;\n          router.replace(newUrl as Route, { scroll: false });\n        }\n      }\n\n      updateSubmittedMessage(getCurrentSessionId(), message);\n\n      navigatingAway.current = false;\n      let frozenSessionId = getCurrentSessionId();\n      updateCanContinue(false, frozenSessionId);\n      setUncaughtError(frozenSessionId, null);\n      setLoadingError(frozenSessionId, null);\n\n      // Check if the last message was an error and remove it before proceeding with a new message\n      // Ensure this isn't a regeneration or resend, as those operations should preserve the history leading up to the point of regeneration/resend.\n      let currentMessageTreeLocal =\n        currentMessageTree || new Map<number, Message>();\n      let currentHistory = getLatestMessageChain(currentMessageTreeLocal);\n      let lastMessage = currentHistory[currentHistory.length - 1];\n\n      if (\n        lastMessage &&\n        lastMessage.type === \"error\" &&\n        !messageIdToResend &&\n        !regenerationRequest\n      ) {\n        const newMessageTree = new Map(currentMessageTreeLocal);\n        const parentNodeId = lastMessage.parentNodeId;\n\n        // Remove the error message itself\n        newMessageTree.delete(lastMessage.nodeId);\n\n        // Remove the parent message + update the parent of the parent to no longer\n        // link to the parent\n        if (parentNodeId !== null && parentNodeId !== undefined) {\n          const parentOfError = newMessageTree.get(parentNodeId);\n          if (parentOfError) {\n            const grandparentNodeId = parentOfError.parentNodeId;\n            if (grandparentNodeId !== null && grandparentNodeId !== undefined) {\n              const grandparent = newMessageTree.get(grandparentNodeId);\n              if (grandparent) {\n                // Update grandparent to no longer link to parent\n                const updatedGrandparent = {\n                  ...grandparent,\n                  childrenNodeIds: (grandparent.childrenNodeIds || []).filter(\n                    (id: number) => id !== parentNodeId\n                  ),\n                  latestChildNodeId:\n                    grandparent.latestChildNodeId === parentNodeId\n                      ? null\n                      : grandparent.latestChildNodeId,\n                };\n                newMessageTree.set(grandparentNodeId, updatedGrandparent);\n              }\n            }\n            // Remove the parent message\n            newMessageTree.delete(parentNodeId);\n          }\n        }\n        // Update the state immediately so subsequent logic uses the cleaned map\n        updateSessionMessageTree(frozenSessionId, newMessageTree);\n        console.log(\n          \"Removed previous error message ID:\",\n          lastMessage.messageId\n        );\n\n        // update state for the new world (with the error message removed)\n        currentHistory = getLatestMessageChain(newMessageTree);\n        currentMessageTreeLocal = newMessageTree;\n        lastMessage = currentHistory[currentHistory.length - 1];\n      }\n\n      if (currentChatState != \"input\") {\n        if (currentChatState == \"uploading\") {\n          toast.error(\"Please wait for the content to upload\");\n        } else {\n          toast.error(\"Please wait for the response to complete\");\n        }\n\n        return;\n      }\n\n      // Auto-pin the agent to sidebar when sending a message if not already pinned\n      if (liveAgent) {\n        const isAlreadyPinned = pinnedAgents.some(\n          (agent) => agent.id === liveAgent.id\n        );\n        if (!isAlreadyPinned) {\n          togglePinnedAgent(liveAgent, true).catch((err) => {\n            console.error(\"Failed to auto-pin agent:\", err);\n          });\n        }\n      }\n\n      let currChatSessionId: string;\n      // Check both the prop and the store's currentSessionId to determine if this is a new session\n      // For pages like NRF where existingChatSessionId is always null, we need to check if\n      // we already have a session from a previous message\n      const isNewSession = existingChatSessionId === null && !currentSessionId;\n\n      const searchParamBasedChatSessionName =\n        searchParams?.get(SEARCH_PARAM_NAMES.TITLE) || null;\n      // Auto-name only once, after the first agent response, and only when the chat isn't\n      // already explicitly named (e.g. `?title=...`).\n      const hadAnyUserMessagesBeforeSubmit = currentHistory.some(\n        (m) => m.type === \"user\"\n      );\n      if (isNewSession) {\n        currChatSessionId = await createChatSession(\n          liveAgent?.id || 0,\n          searchParamBasedChatSessionName,\n          projectId ? parseInt(projectId) : null\n        );\n\n        // Optimistically add the new chat session to the sidebar cache\n        // This ensures \"New Chat\" appears immediately, even before any messages are saved\n        addPendingChatSession({\n          chatSessionId: currChatSessionId,\n          personaId: liveAgent?.id || 0,\n          projectId: projectId ? parseInt(projectId) : null,\n        });\n      } else {\n        // Use the existing session ID from props or from the store\n        currChatSessionId =\n          existingChatSessionId || (currentSessionId as string);\n      }\n      frozenSessionId = currChatSessionId;\n      // update the selected model for the chat session if one is specified so that\n      // it persists across page reloads. Do not `await` here so that the message\n      // request can continue and this will just happen in the background.\n      // NOTE: only set the model override for the chat session once we send a\n      // message with it. If the user switches models and then starts a new\n      // chat session, it is unexpected for that model to be used when they\n      // return to this session the next day.\n      let finalLLM = modelOverride || llmManager.currentLlm;\n      updateLlmOverrideForChatSession(\n        currChatSessionId,\n        structureValue(\n          finalLLM.name || \"\",\n          finalLLM.provider || \"\",\n          finalLLM.modelName || \"\"\n        )\n      );\n\n      // mark the session as the current session\n      updateStatesWithNewSessionId(currChatSessionId);\n\n      // Navigate immediately for new sessions (before streaming starts)\n      if (isNewSession) {\n        handleNewSessionNavigation(currChatSessionId);\n      }\n\n      const shouldAutoNameChatSessionAfterResponse =\n        !searchParamBasedChatSessionName &&\n        !hadAnyUserMessagesBeforeSubmit &&\n        !sessions.get(currChatSessionId)?.description;\n\n      // set the ability to cancel the request\n      const controller = new AbortController();\n      setAbortController(currChatSessionId, controller);\n\n      const messageToResend = currentHistory.find(\n        (message) => message.messageId === messageIdToResend\n      );\n      if (messageIdToResend && regenerationRequest) {\n        updateRegenerationState(\n          { regenerating: true, finalMessageIndex: messageIdToResend + 1 },\n          frozenSessionId\n        );\n      }\n      const messageToResendParent =\n        messageToResend?.parentNodeId !== null &&\n        messageToResend?.parentNodeId !== undefined\n          ? currentMessageTreeLocal.get(messageToResend.parentNodeId)\n          : null;\n      const messageToResendIndex = messageToResend\n        ? currentHistory.indexOf(messageToResend)\n        : null;\n\n      if (!messageToResend && messageIdToResend !== undefined) {\n        toast.error(\n          \"Failed to re-send message - please refresh the page and try again.\"\n        );\n        resetRegenerationState(frozenSessionId);\n        updateChatStateAction(frozenSessionId, \"input\");\n        return;\n      }\n\n      // When editing (messageIdToResend exists but no regenerationRequest), use the new message\n      // When regenerating (regenerationRequest exists), use the original message\n      let currMessage = regenerationRequest\n        ? messageToResend?.message || message\n        : message;\n\n      // When editing a message that had files attached, preserve the original files.\n      // Skip for regeneration — the regeneration path reuses the existing user node\n      // (and its files), so merging here would send duplicates.\n      const effectiveFileDescriptors = [\n        ...projectFilesToFileDescriptors(currentMessageFiles),\n        ...(!regenerationRequest ? messageToResend?.files ?? [] : []),\n      ];\n\n      updateChatStateAction(frozenSessionId, \"loading\");\n\n      // find the parent\n      const currMessageHistory =\n        messageToResendIndex !== null\n          ? currentHistory.slice(0, messageToResendIndex)\n          : currentHistory;\n\n      let parentMessage =\n        messageToResendParent ||\n        (currMessageHistory.length > 0\n          ? currMessageHistory[currMessageHistory.length - 1]\n          : null) ||\n        (currentMessageTreeLocal.size === 1\n          ? Array.from(currentMessageTreeLocal.values())[0]\n          : null);\n\n      // Add user message immediately to the message tree so that the chat\n      // immediately reflects the user message\n      let initialUserNode: Message;\n      let initialAgentNode: Message;\n\n      if (regenerationRequest) {\n        // For regeneration: keep the existing user message, only create new agent\n        initialUserNode = regenerationRequest.parentMessage;\n        initialAgentNode = buildEmptyMessage({\n          messageType: \"assistant\",\n          parentNodeId: initialUserNode.nodeId,\n          nodeIdOffset: 1,\n        });\n      } else {\n        // For new messages or editing: create/update user message and assistant\n        const parentNodeIdForMessage = messageToResend\n          ? messageToResend.parentNodeId || SYSTEM_NODE_ID\n          : parentMessage?.nodeId || SYSTEM_NODE_ID;\n        const result = buildImmediateMessages(\n          parentNodeIdForMessage,\n          currMessage,\n          effectiveFileDescriptors,\n          messageToResend\n        );\n        initialUserNode = result.initialUserNode;\n        initialAgentNode = result.initialAgentNode;\n      }\n\n      // make messages appear + clear input bar\n      const messagesToUpsert = regenerationRequest\n        ? [initialAgentNode] // Only upsert the new agent for regeneration\n        : [initialUserNode, initialAgentNode]; // Upsert both for normal/edit flow\n      currentMessageTreeLocal = upsertToCompleteMessageTree({\n        messages: messagesToUpsert,\n        completeMessageTreeOverride: currentMessageTreeLocal,\n        chatSessionId: frozenSessionId,\n      });\n      resetInputBar();\n\n      let answer = \"\";\n\n      const stopReason: StreamStopReason | null = null;\n      let query: string | null = null;\n      let retrievalType: RetrievalType =\n        selectedDocuments.length > 0\n          ? RetrievalType.SelectedDocs\n          : RetrievalType.None;\n      let documents: OnyxDocument[] = selectedDocuments;\n      let citations: CitationMap | null = null;\n      let aiMessageImages: FileDescriptor[] | null = null;\n      let error: string | null = null;\n      let stackTrace: string | null = null;\n      let errorCode: string | null = null;\n      let isRetryable: boolean = true;\n      let errorDetails: Record<string, any> | null = null;\n\n      let finalMessage: BackendMessage | null = null;\n      let toolCall: ToolCallMetadata | null = null;\n      let files = effectiveFileDescriptors;\n      let packets: Packet[] = [];\n      let packetsVersion = 0;\n\n      let newUserMessageId: number | null = null;\n      let newAgentMessageId: number | null = null;\n\n      try {\n        const lastSuccessfulMessageId = getLastSuccessfulMessageId(\n          currentMessageTreeLocal\n        );\n        const disabledToolIds = liveAgent\n          ? agentPreferences?.[liveAgent?.id]?.disabled_tool_ids\n          : undefined;\n\n        // Find the search tool's numeric ID for forceSearch\n        const searchToolNumericId = liveAgent?.tools.find(\n          (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID\n        )?.id;\n\n        // Determine the forced tool ID:\n        // 1. If forceSearch is true, use the search tool's numeric ID\n        // 2. Otherwise, use the first forced tool ID from the forcedToolIds array\n        const effectiveForcedToolId = forceSearch\n          ? searchToolNumericId ?? null\n          : forcedToolIds.length > 0\n            ? forcedToolIds[0]\n            : null;\n\n        // Determine origin for telemetry tracking (also used for frontend PostHog tracking below)\n        const { isExtension, context: extensionContext } =\n          getExtensionContext();\n        const messageOrigin = isExtension ? \"chrome_extension\" : \"webapp\";\n\n        const stack = new CurrentMessageFIFO();\n        updateCurrentMessageFIFO(stack, {\n          signal: controller.signal,\n          message: currMessage,\n          fileDescriptors: effectiveFileDescriptors,\n          parentMessageId: (() => {\n            const parentId =\n              regenerationRequest?.parentMessage.messageId ||\n              messageToResendParent?.messageId ||\n              lastSuccessfulMessageId;\n            // Don't send SYSTEM_MESSAGE_ID (-3) as parent, use null instead\n            // The backend expects null for \"the first message in the chat\"\n            return parentId === SYSTEM_MESSAGE_ID ? null : parentId;\n          })(),\n          chatSessionId: currChatSessionId,\n          filters: buildFilters(\n            filterManager.selectedSources,\n            filterManager.selectedDocumentSets,\n            filterManager.timeRange,\n            filterManager.selectedTags\n          ),\n          modelProvider:\n            modelOverride?.name || llmManager.currentLlm.name || undefined,\n          modelVersion:\n            modelOverride?.modelName ||\n            llmManager.currentLlm.modelName ||\n            searchParams?.get(SEARCH_PARAM_NAMES.MODEL_VERSION) ||\n            undefined,\n          temperature: llmManager.temperature || undefined,\n          deepResearch,\n          enabledToolIds:\n            disabledToolIds && liveAgent\n              ? liveAgent.tools\n                  .filter((tool) => !disabledToolIds?.includes(tool.id))\n                  .map((tool) => tool.id)\n              : undefined,\n          forcedToolId: effectiveForcedToolId,\n          origin: messageOrigin,\n          additionalContext,\n        });\n\n        const delay = (ms: number) => {\n          return new Promise((resolve) => setTimeout(resolve, ms));\n        };\n\n        await delay(50);\n        while (!stack.isComplete || !stack.isEmpty()) {\n          if (stack.isEmpty()) {\n            await delay(0.5);\n          }\n\n          if (!stack.isEmpty() && !controller.signal.aborted) {\n            const packet = stack.nextPacket();\n            if (!packet) {\n              continue;\n            }\n\n            // We've processed initial packets and are starting to stream content.\n            // Transition from 'loading' to 'streaming'.\n            updateChatStateAction(frozenSessionId, \"streaming\");\n            // Only set start time once (guard prevents reset on each packet)\n            // Use getState() to avoid stale closure - sessions captured at render time becomes stale in async loop\n            if (\n              !useChatSessionStore.getState().sessions.get(frozenSessionId)\n                ?.streamingStartTime\n            ) {\n              setStreamingStartTime(frozenSessionId, Date.now());\n            }\n\n            if ((packet as MessageResponseIDInfo).user_message_id) {\n              newUserMessageId = (packet as MessageResponseIDInfo)\n                .user_message_id;\n\n              // Track extension queries in PostHog (reuses isExtension/extensionContext from above)\n              if (isExtension) {\n                track(AnalyticsEvent.EXTENSION_CHAT_QUERY, {\n                  extension_context: extensionContext,\n                  assistant_id: liveAgent?.id,\n                  has_files: effectiveFileDescriptors.length > 0,\n                  deep_research: deepResearch,\n                });\n              }\n            }\n\n            if (\n              (packet as MessageResponseIDInfo).reserved_assistant_message_id\n            ) {\n              newAgentMessageId = (packet as MessageResponseIDInfo)\n                .reserved_assistant_message_id;\n            }\n\n            if (Object.hasOwn(packet, \"user_files\")) {\n              const userFiles = (packet as UserKnowledgeFilePacket).user_files;\n              // Ensure files are unique by id\n              const newUserFiles = userFiles.filter(\n                (newFile) =>\n                  !files.some((existingFile) => existingFile.id === newFile.id)\n              );\n              files = files.concat(newUserFiles);\n            }\n\n            if (Object.hasOwn(packet, \"file_ids\")) {\n              aiMessageImages = (packet as FileChatDisplay).file_ids.map(\n                (fileId) => {\n                  return {\n                    id: fileId,\n                    type: ChatFileType.IMAGE,\n                  };\n                }\n              );\n            } else if (\n              Object.hasOwn(packet, \"error\") &&\n              (packet as any).error != null\n            ) {\n              const streamingError = packet as StreamingError;\n              error = streamingError.error;\n              stackTrace = streamingError.stack_trace || null;\n              errorCode = streamingError.error_code || null;\n              isRetryable = streamingError.is_retryable ?? true;\n              errorDetails = streamingError.details || null;\n\n              setUncaughtError(frozenSessionId, streamingError.error);\n              updateChatStateAction(frozenSessionId, \"input\");\n              updateSubmittedMessage(getCurrentSessionId(), \"\");\n\n              throw new Error(streamingError.error);\n            } else if (Object.hasOwn(packet, \"message_id\")) {\n              finalMessage = packet as BackendMessage;\n            } else if (Object.hasOwn(packet, \"stop_reason\")) {\n              const stop_reason = (packet as StreamStopInfo).stop_reason;\n              if (stop_reason === StreamStopReason.CONTEXT_LENGTH) {\n                updateCanContinue(true, frozenSessionId);\n              }\n            } else if (Object.hasOwn(packet, \"obj\")) {\n              packets.push(packet as Packet);\n              packetsVersion++;\n\n              // Check if the packet contains document information\n              const packetObj = (packet as Packet).obj;\n\n              if (packetObj.type === \"citation_info\") {\n                // Individual citation packet from backend streaming\n                const citationInfo = packetObj as {\n                  type: \"citation_info\";\n                  citation_number: number;\n                  document_id: string;\n                };\n                // Incrementally build citations map\n                citations = {\n                  ...(citations || {}),\n                  [citationInfo.citation_number]: citationInfo.document_id,\n                };\n              } else if (packetObj.type === \"message_start\") {\n                const messageStart = packetObj as MessageStart;\n                if (messageStart.final_documents) {\n                  documents = messageStart.final_documents;\n                  updateSelectedNodeForDocDisplay(\n                    frozenSessionId,\n                    initialAgentNode.nodeId\n                  );\n                }\n              }\n            } else {\n              console.warn(\"Unknown packet:\", JSON.stringify(packet));\n            }\n\n            // on initial message send, we insert a dummy system message\n            // set this as the parent here if no parent is set\n            parentMessage =\n              parentMessage || currentMessageTreeLocal?.get(SYSTEM_NODE_ID)!;\n\n            currentMessageTreeLocal = upsertToCompleteMessageTree({\n              messages: [\n                {\n                  ...initialUserNode,\n                  messageId: newUserMessageId ?? undefined,\n                  files: files,\n                },\n                {\n                  ...initialAgentNode,\n                  messageId: newAgentMessageId ?? undefined,\n                  message: error || answer,\n                  type: error ? \"error\" : \"assistant\",\n                  retrievalType,\n                  query: finalMessage?.rephrased_query || query,\n                  documents: documents,\n                  citations: finalMessage?.citations || citations || {},\n                  files: finalMessage?.files || aiMessageImages || [],\n                  toolCall: finalMessage?.tool_call || toolCall,\n                  stackTrace: stackTrace,\n                  overridden_model: finalMessage?.overridden_model,\n                  stopReason: stopReason,\n                  packets: packets,\n                  packetCount: packets.length,\n                  processingDurationSeconds:\n                    finalMessage?.processing_duration_seconds ??\n                    (() => {\n                      const startTime = useChatSessionStore\n                        .getState()\n                        .getStreamingStartTime(frozenSessionId);\n                      return startTime\n                        ? Math.floor((Date.now() - startTime) / 1000)\n                        : undefined;\n                    })(),\n                },\n              ],\n              // Pass the latest map state\n              completeMessageTreeOverride: currentMessageTreeLocal,\n              chatSessionId: frozenSessionId!,\n            });\n          }\n        }\n        // Surface FIFO errors (e.g. 429 before any packets arrive) so the\n        // catch block replaces the thinking placeholder with an error message.\n        if (stack.error) {\n          throw new Error(stack.error);\n        }\n      } catch (e: any) {\n        console.log(\"Error:\", e);\n        const errorMsg = e.message;\n        currentMessageTreeLocal = upsertToCompleteMessageTree({\n          messages: [\n            {\n              nodeId: initialUserNode.nodeId,\n              message: currMessage,\n              type: \"user\",\n              files: effectiveFileDescriptors,\n              toolCall: null,\n              parentNodeId: parentMessage?.nodeId || SYSTEM_NODE_ID,\n              packets: [],\n              packetCount: 0,\n            },\n            {\n              nodeId: initialAgentNode.nodeId,\n              message: errorMsg,\n              type: \"error\",\n              files: aiMessageImages || [],\n              toolCall: null,\n              parentNodeId: initialUserNode.nodeId,\n              packets: [],\n              packetCount: 0,\n              stackTrace: stackTrace,\n              errorCode: errorCode,\n              isRetryable: isRetryable,\n              errorDetails: errorDetails,\n            },\n          ],\n          completeMessageTreeOverride: currentMessageTreeLocal,\n          chatSessionId: frozenSessionId,\n        });\n      }\n\n      resetRegenerationState(frozenSessionId);\n      setStreamingStartTime(frozenSessionId, null);\n      updateChatStateAction(frozenSessionId, \"input\");\n\n      // Name the chat now that we have the first AI response (navigation already happened before streaming)\n      if (shouldAutoNameChatSessionAfterResponse) {\n        handleNewSessionNaming(currChatSessionId);\n      }\n    },\n    [\n      // Narrow to stable fields from managers to avoid re-creation\n      filterManager.selectedSources,\n      filterManager.selectedDocumentSets,\n      filterManager.selectedTags,\n      filterManager.timeRange,\n      llmManager.currentLlm,\n      llmManager.temperature,\n      // Others that affect logic\n      liveAgent,\n      availableAgents,\n      existingChatSessionId,\n      selectedDocuments,\n      searchParams,\n      resetInputBar,\n      setSelectedAgentFromId,\n      updateSelectedNodeForDocDisplay,\n      currentMessageTree,\n      currentChatState,\n      // Ensure latest forced tools are used when submitting\n      forcedToolIds,\n      // Keep tool preference-derived values fresh\n      agentPreferences,\n      fetchProjects,\n      // For auto-pinning agents\n      pinnedAgents,\n      togglePinnedAgent,\n    ]\n  );\n\n  const handleMessageSpecificFileUpload = useCallback(\n    async (acceptedFiles: File[]) => {\n      const [_, llmModel] = getFinalLLM(\n        llmManager.llmProviders || [],\n        liveAgent || null,\n        llmManager.currentLlm\n      );\n      const llmAcceptsImages = modelSupportsImageInput(\n        llmManager.llmProviders || [],\n        llmModel\n      );\n\n      const imageFiles = acceptedFiles.filter((file) =>\n        file.type.startsWith(\"image/\")\n      );\n\n      if (imageFiles.length > 0 && !llmAcceptsImages) {\n        toast.error(\n          \"The current model does not support image input. Please select a model with Vision support.\"\n        );\n        return;\n      }\n      updateChatStateAction(getCurrentSessionId(), \"uploading\");\n      const uploadedMessageFiles = await beginUpload(\n        Array.from(acceptedFiles),\n        null\n      );\n      setCurrentMessageFiles((prev) => [...prev, ...uploadedMessageFiles]);\n      updateChatStateAction(getCurrentSessionId(), \"input\");\n    },\n    [liveAgent, llmManager, forcedToolIds]\n  );\n\n  useEffect(() => {\n    return () => {\n      // Cleanup which only runs when the component unmounts (i.e. when you navigate away).\n      const currentSession = getCurrentSessionId();\n      const abortController = sessions.get(currentSession)?.abortController;\n      if (abortController) {\n        abortController.abort();\n        setAbortController(currentSession, new AbortController());\n      }\n    };\n  }, [pathname]);\n\n  // update chosen assistant if we navigate between pages\n  useEffect(() => {\n    if (currentMessageHistory.length === 0 && existingChatSessionId === null) {\n      // Select from available assistants so shared assistants appear.\n      setSelectedAgentFromId(null);\n    }\n  }, [existingChatSessionId, availableAgents, currentMessageHistory.length]);\n\n  useEffect(() => {\n    const handleSlackChatRedirect = async () => {\n      const slackChatId = searchParams.get(\"slackChatId\");\n      if (!slackChatId) return;\n\n      // Set isReady to false before starting retrieval to display loading text\n      const currentSessionId = getCurrentSessionId();\n      if (currentSessionId) {\n        setIsReady(currentSessionId, false);\n      }\n\n      try {\n        const response = await fetch(\"/api/chat/seed-chat-session-from-slack\", {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({\n            chat_session_id: slackChatId,\n          }),\n        });\n\n        if (!response.ok) {\n          throw new Error(\"Failed to seed chat from Slack\");\n        }\n\n        const data = await response.json();\n\n        router.push(data.redirect_url);\n      } catch (error) {\n        console.error(\"Error seeding chat from Slack:\", error);\n        toast.error(\"Failed to load chat from Slack\");\n      }\n    };\n\n    handleSlackChatRedirect();\n  }, [searchParams, router]);\n\n  // Available context tokens: if a chat session exists, fetch from the session\n  // API (dynamic per session/model). Otherwise derive from the persona's max\n  // document tokens. The backend already accounts for system prompt, tools,\n  // and user-message reservations.\n  const [availableContextTokens, setAvailableContextTokens] = useState<number>(\n    DEFAULT_CONTEXT_TOKENS\n  );\n\n  useEffect(() => {\n    if (!llmManager.hasAnyProvider) return;\n\n    let cancelled = false;\n\n    const setIfActive = (tokens: number) => {\n      if (!cancelled) setAvailableContextTokens(tokens);\n    };\n\n    // Prefer the Zustand session ID, but fall back to the URL-derived prop\n    // so we don't incorrectly take the persona path while the store is\n    // still initialising on navigation to an existing chat.\n    const sessionId = currentSessionId || existingChatSessionId;\n\n    (async () => {\n      try {\n        if (sessionId) {\n          const available = await getAvailableContextTokens(sessionId);\n          setIfActive(available ?? DEFAULT_CONTEXT_TOKENS);\n          return;\n        }\n\n        const personaId = liveAgent?.id;\n        if (personaId == null) {\n          setIfActive(DEFAULT_CONTEXT_TOKENS);\n          return;\n        }\n\n        const maxTokens = await getMaxSelectedDocumentTokens(personaId);\n        setIfActive(maxTokens ?? DEFAULT_CONTEXT_TOKENS);\n      } catch (e) {\n        console.error(\"Failed to fetch available context tokens:\", e);\n        setIfActive(DEFAULT_CONTEXT_TOKENS);\n      }\n    })();\n\n    return () => {\n      cancelled = true;\n    };\n  }, [\n    currentSessionId,\n    existingChatSessionId,\n    liveAgent?.id,\n    llmManager.hasAnyProvider,\n  ]);\n\n  // check if there's an image file in the message history so that we know\n  // which LLMs are available to use\n  const imageFileInMessageHistory = useMemo(() => {\n    return currentMessageHistory\n      .filter((message) => message.type === \"user\")\n      .some((message) =>\n        message.files.some((file) => file.type === ChatFileType.IMAGE)\n      );\n  }, [currentMessageHistory]);\n\n  useEffect(() => {\n    llmManager.updateImageFilesPresent(imageFileInMessageHistory);\n  }, [imageFileInMessageHistory]);\n\n  // set isReady once component is mounted\n  useEffect(() => {\n    const currentSessionId = getCurrentSessionId();\n    if (currentSessionId) {\n      setIsReady(currentSessionId, true);\n    }\n  }, []);\n\n  return {\n    // actions\n    onSubmit,\n    stopGenerating,\n    handleMessageSpecificFileUpload,\n    // data\n    availableContextTokens,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useChatSessionController.ts",
    "content": "\"use client\";\n\nimport { useEffect, useCallback, useState } from \"react\";\nimport { ReadonlyURLSearchParams } from \"next/navigation\";\nimport {\n  nameChatSession,\n  processRawChatHistory,\n  patchMessageToBeLatest,\n} from \"@/app/app/services/lib\";\nimport {\n  getLatestMessageChain,\n  setMessageAsLatest,\n} from \"@/app/app/services/messageTree\";\nimport {\n  BackendChatSession,\n  ChatSessionSharedStatus,\n} from \"@/app/app/interfaces\";\nimport {\n  SEARCH_PARAM_NAMES,\n  shouldSubmitOnLoad,\n} from \"@/app/app/services/searchParams\";\nimport { FilterManager } from \"@/lib/hooks\";\nimport { OnyxDocument } from \"@/lib/search/interfaces\";\nimport {\n  useChatSessionStore,\n  useCurrentMessageHistory,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport { useForcedTools } from \"@/lib/hooks/useForcedTools\";\nimport { ProjectFile } from \"@/app/app/projects/projectsService\";\nimport { getSessionProjectTokenCount } from \"@/app/app/projects/projectsService\";\nimport { getProjectFilesForSession } from \"@/app/app/projects/projectsService\";\nimport { AppInputBarHandle } from \"@/sections/input/AppInputBar\";\n\ninterface UseChatSessionControllerProps {\n  existingChatSessionId: string | null;\n  searchParams: ReadonlyURLSearchParams;\n  filterManager: FilterManager;\n  firstMessage?: string;\n\n  // UI state setters\n  setSelectedAgentFromId: (agentId: number | null) => void;\n  setSelectedDocuments: (documents: OnyxDocument[]) => void;\n  setCurrentMessageFiles: (\n    files: ProjectFile[] | ((prev: ProjectFile[]) => ProjectFile[])\n  ) => void;\n\n  // Refs\n  chatSessionIdRef: React.RefObject<string | null>;\n  loadedIdSessionRef: React.RefObject<string | null>;\n  chatInputBarRef: React.RefObject<AppInputBarHandle | null>;\n  isInitialLoad: React.RefObject<boolean>;\n  submitOnLoadPerformed: React.RefObject<boolean>;\n\n  // Actions\n  refreshChatSessions: () => void;\n  onSubmit: (params: {\n    message: string;\n    currentMessageFiles: ProjectFile[];\n    deepResearch: boolean;\n    isSeededChat?: boolean;\n  }) => Promise<void>;\n}\n\nexport type SessionFetchError = {\n  type: \"not_found\" | \"access_denied\" | \"unknown\";\n  detail: string;\n} | null;\n\nexport default function useChatSessionController({\n  existingChatSessionId,\n  searchParams,\n  filterManager,\n  firstMessage,\n  setSelectedAgentFromId,\n  setSelectedDocuments,\n  setCurrentMessageFiles,\n  chatSessionIdRef,\n  loadedIdSessionRef,\n  chatInputBarRef,\n  isInitialLoad,\n  submitOnLoadPerformed,\n  refreshChatSessions,\n  onSubmit,\n}: UseChatSessionControllerProps) {\n  const [currentSessionFileTokenCount, setCurrentSessionFileTokenCount] =\n    useState<number>(0);\n  const [projectFiles, setProjectFiles] = useState<ProjectFile[]>([]);\n  const [sessionFetchError, setSessionFetchError] =\n    useState<SessionFetchError>(null);\n  // Store actions\n  const updateSessionAndMessageTree = useChatSessionStore(\n    (state) => state.updateSessionAndMessageTree\n  );\n  const updateSessionMessageTree = useChatSessionStore(\n    (state) => state.updateSessionMessageTree\n  );\n  const setIsFetchingChatMessages = useChatSessionStore(\n    (state) => state.setIsFetchingChatMessages\n  );\n  const setCurrentSession = useChatSessionStore(\n    (state) => state.setCurrentSession\n  );\n  const initializeSession = useChatSessionStore(\n    (state) => state.initializeSession\n  );\n  const updateCurrentChatSessionSharedStatus = useChatSessionStore(\n    (state) => state.updateCurrentChatSessionSharedStatus\n  );\n  const updateCurrentSelectedNodeForDocDisplay = useChatSessionStore(\n    (state) => state.updateCurrentSelectedNodeForDocDisplay\n  );\n  const currentChatState = useChatSessionStore(\n    (state) =>\n      state.sessions.get(state.currentSessionId || \"\")?.chatState || \"input\"\n  );\n  const currentChatHistory = useCurrentMessageHistory();\n  const chatSessions = useChatSessionStore((state) => state.sessions);\n  const { setForcedToolIds } = useForcedTools();\n\n  // Fetch chat messages for the chat session\n  useEffect(() => {\n    const priorChatSessionId = chatSessionIdRef.current;\n    const loadedSessionId = loadedIdSessionRef.current;\n    chatSessionIdRef.current = existingChatSessionId;\n    loadedIdSessionRef.current = existingChatSessionId;\n\n    chatInputBarRef.current?.focus();\n\n    const isCreatingNewSession =\n      priorChatSessionId === null && existingChatSessionId !== null;\n    const isSwitchingBetweenSessions =\n      priorChatSessionId !== null &&\n      existingChatSessionId !== priorChatSessionId;\n\n    // Clear uploaded files on any session change (they're already in context)\n    if (isCreatingNewSession || isSwitchingBetweenSessions) {\n      setCurrentMessageFiles([]);\n    }\n\n    // Only reset filters/selections when switching between existing sessions\n    if (isSwitchingBetweenSessions) {\n      setSelectedDocuments([]);\n      filterManager.setSelectedDocumentSets([]);\n      filterManager.setSelectedTags([]);\n      filterManager.setTimeRange(null);\n\n      // Remove uploaded files\n      setCurrentMessageFiles([]);\n\n      // If switching from one chat to another, then need to scroll again\n      // If we're creating a brand new chat, then don't need to scroll\n      if (priorChatSessionId !== null) {\n        setSelectedDocuments([]);\n\n        // Clear forced tool ids if and only if we're switching to a new chat session\n        setForcedToolIds([]);\n      }\n    }\n\n    async function initialSessionFetch() {\n      setSessionFetchError(null);\n\n      if (existingChatSessionId === null) {\n        // Clear the current session in the store to show intro messages\n        setCurrentSession(null);\n\n        // Reset the selected agent back to default\n        setSelectedAgentFromId(null);\n        updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Private);\n\n        // If we're supposed to submit on initial load, then do that here\n        if (\n          shouldSubmitOnLoad(searchParams) &&\n          !submitOnLoadPerformed.current\n        ) {\n          submitOnLoadPerformed.current = true;\n          await onSubmit({\n            message: firstMessage || \"\",\n            currentMessageFiles: [],\n            deepResearch: false,\n          });\n        }\n        return;\n      }\n\n      // Set the current session first, then set fetching state to prevent intro flash\n      setCurrentSession(existingChatSessionId);\n      setIsFetchingChatMessages(existingChatSessionId, true);\n\n      let response: Response;\n      try {\n        response = await fetch(\n          `/api/chat/get-chat-session/${existingChatSessionId}`\n        );\n      } catch (error) {\n        setIsFetchingChatMessages(existingChatSessionId, false);\n        console.error(\"Failed to fetch chat session\", {\n          chatSessionId: existingChatSessionId,\n          error,\n        });\n        setSessionFetchError({\n          type: \"unknown\",\n          detail: \"Failed to load chat session. Please check your connection.\",\n        });\n        return;\n      }\n\n      if (!response.ok) {\n        setIsFetchingChatMessages(existingChatSessionId, false);\n        let detail = \"An unexpected error occurred.\";\n        try {\n          const errorBody = await response.json();\n          detail = errorBody.detail || detail;\n        } catch {\n          // ignore parse errors\n        }\n        const type =\n          response.status === 404\n            ? \"not_found\"\n            : response.status === 403\n              ? \"access_denied\"\n              : \"unknown\";\n        setSessionFetchError({ type, detail });\n        return;\n      }\n\n      const session = await response.json();\n      const chatSession = session as BackendChatSession;\n      setSelectedAgentFromId(chatSession.persona_id);\n\n      // Ensure the current session is set to the actual session ID from the response\n      setCurrentSession(chatSession.chat_session_id);\n\n      // Initialize session data including personaId\n      initializeSession(chatSession.chat_session_id, chatSession);\n\n      const newMessageMap = processRawChatHistory(\n        chatSession.messages,\n        chatSession.packets\n      );\n      const newMessageHistory = getLatestMessageChain(newMessageMap);\n\n      // Update message history except for edge where where\n      // last message is an error and we're on a new chat.\n      // This corresponds to a \"renaming\" of chat, which occurs after first message\n      // stream\n      if (\n        (newMessageHistory[newMessageHistory.length - 1]?.type !== \"error\" ||\n          loadedSessionId != null) &&\n        !(\n          currentChatState == \"toolBuilding\" ||\n          currentChatState == \"streaming\" ||\n          currentChatState == \"loading\"\n        )\n      ) {\n        updateCurrentSelectedNodeForDocDisplay(\n          newMessageHistory[newMessageHistory.length - 1]?.nodeId ?? null\n        );\n\n        updateSessionAndMessageTree(chatSession.chat_session_id, newMessageMap);\n        chatSessionIdRef.current = chatSession.chat_session_id;\n      }\n\n      setIsFetchingChatMessages(chatSession.chat_session_id, false);\n\n      // Fetch token count for this chat session's project (if any)\n      try {\n        if (chatSession.chat_session_id) {\n          const total = await getSessionProjectTokenCount(\n            chatSession.chat_session_id\n          );\n          setCurrentSessionFileTokenCount(total || 0);\n        } else {\n          setCurrentSessionFileTokenCount(0);\n        }\n      } catch (e) {\n        setCurrentSessionFileTokenCount(0);\n      }\n\n      // Fetch project files for this chat session (if any)\n      try {\n        if (chatSession.chat_session_id) {\n          const files = await getProjectFilesForSession(\n            chatSession.chat_session_id\n          );\n          setProjectFiles(files || []);\n        } else {\n          setProjectFiles([]);\n        }\n      } catch (e) {\n        setProjectFiles([]);\n      }\n\n      // If this is a seeded chat, then kick off the AI message generation\n      if (\n        newMessageHistory.length === 1 &&\n        !submitOnLoadPerformed.current &&\n        searchParams?.get(SEARCH_PARAM_NAMES.SEEDED) === \"true\"\n      ) {\n        submitOnLoadPerformed.current = true;\n\n        const seededMessage = newMessageHistory[0]?.message;\n        if (!seededMessage) {\n          return;\n        }\n\n        await onSubmit({\n          message: seededMessage,\n          isSeededChat: true,\n          currentMessageFiles: [],\n          deepResearch: false,\n        });\n        // Force re-name if the chat session doesn't have one\n        if (!chatSession.description) {\n          await nameChatSession(existingChatSessionId);\n          refreshChatSessions();\n        }\n      } else if (newMessageHistory.length >= 2 && !chatSession.description) {\n        await nameChatSession(existingChatSessionId);\n        refreshChatSessions();\n      }\n    }\n\n    // SKIP_RELOAD is used after completing the first message in a new session.\n    // We don't need to re-fetch at that point, we have everything we need.\n    // For safety, we should always re-fetch if there are no messages in the chat history.\n    if (\n      !searchParams?.get(SEARCH_PARAM_NAMES.SKIP_RELOAD) ||\n      currentChatHistory.length === 0\n    ) {\n      const existingChatSession = existingChatSessionId\n        ? chatSessions.get(existingChatSessionId)\n        : null;\n\n      if (\n        !existingChatSession?.chatState ||\n        existingChatSession.chatState === \"input\"\n      ) {\n        initialSessionFetch();\n      } else {\n        // no need to fetch if the chat session is currently streaming (it would be )\n        // out of date).\n        // this means that the user kicked off a message, switched to a different\n        // chat, and then switched back.\n        setCurrentSession(existingChatSessionId);\n      }\n    } else {\n      // Remove SKIP_RELOAD param without triggering a page reload\n      const currentSearchParams = new URLSearchParams(searchParams?.toString());\n      if (currentSearchParams.has(SEARCH_PARAM_NAMES.SKIP_RELOAD)) {\n        currentSearchParams.delete(SEARCH_PARAM_NAMES.SKIP_RELOAD);\n        const newUrl = `${window.location.pathname}${\n          currentSearchParams.toString()\n            ? \"?\" + currentSearchParams.toString()\n            : \"\"\n        }`;\n        window.history.replaceState({}, \"\", newUrl);\n      }\n    }\n  }, [\n    existingChatSessionId,\n    searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID),\n    // Note: We're intentionally not including all dependencies to avoid infinite loops\n    // This effect should only run when existingChatSessionId or persona ID changes\n  ]);\n\n  const onMessageSelection = useCallback(\n    (nodeId: number) => {\n      updateCurrentSelectedNodeForDocDisplay(nodeId);\n      const currentMessageTree = useChatSessionStore\n        .getState()\n        .sessions.get(useChatSessionStore.getState().currentSessionId || \"\")\n        ?.messageTree;\n\n      if (currentMessageTree) {\n        const newMessageTree = setMessageAsLatest(currentMessageTree, nodeId);\n        const currentSessionId =\n          useChatSessionStore.getState().currentSessionId;\n        if (currentSessionId) {\n          updateSessionMessageTree(currentSessionId, newMessageTree);\n        }\n\n        const message = currentMessageTree.get(nodeId);\n\n        if (message?.messageId) {\n          // Makes actual API call to set message as latest in the DB so we can\n          // edit this message and so it sticks around on page reload\n          patchMessageToBeLatest(message.messageId);\n        } else {\n          console.error(\"Message has no messageId\", nodeId);\n        }\n      }\n    },\n    [updateCurrentSelectedNodeForDocDisplay, updateSessionMessageTree]\n  );\n\n  return {\n    currentSessionFileTokenCount,\n    onMessageSelection,\n    projectFiles,\n    sessionFetchError,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useChatSessions.ts",
    "content": "\"use client\";\n\nimport {\n  useCallback,\n  useEffect,\n  useMemo,\n  useState,\n  useSyncExternalStore,\n} from \"react\";\nimport useSWRInfinite from \"swr/infinite\";\nimport { ChatSession, ChatSessionSharedStatus } from \"@/app/app/interfaces\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport useAppFocus from \"./useAppFocus\";\nimport { useAgents } from \"./useAgents\";\nimport { DEFAULT_AGENT_ID } from \"@/lib/constants\";\n\nconst PAGE_SIZE = 50;\nconst MIN_LOADING_DURATION_MS = 500;\n\ninterface ChatSessionsResponse {\n  sessions: ChatSession[];\n  has_more: boolean;\n}\n\nexport interface PendingChatSessionParams {\n  chatSessionId: string;\n  personaId: number;\n  projectId?: number | null;\n}\n\ninterface UseChatSessionsOutput {\n  chatSessions: ChatSession[];\n  currentChatSessionId: string | null;\n  currentChatSession: ChatSession | null;\n  agentForCurrentChatSession: MinimalPersonaSnapshot | null;\n  isLoading: boolean;\n  error: any;\n  refreshChatSessions: () => Promise<ChatSessionsResponse[] | undefined>;\n  addPendingChatSession: (params: PendingChatSessionParams) => void;\n  removeSession: (sessionId: string) => void;\n  hasMore: boolean;\n  isLoadingMore: boolean;\n  loadMore: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Shared module-level store for pending chat sessions\n// ---------------------------------------------------------------------------\n// Pending sessions are optimistic new sessions shown in the sidebar before\n// the server returns them. This must be module-level so all hook instances\n// (sidebar, ChatButton, etc.) share the same state.\n\nconst pendingSessionsStore = {\n  sessions: new Map<string, ChatSession>(),\n  listeners: new Set<() => void>(),\n  cachedSnapshot: [] as ChatSession[],\n\n  add(session: ChatSession) {\n    this.sessions.set(session.id, session);\n    this.updateSnapshot();\n    this.notify();\n  },\n\n  remove(sessionId: string) {\n    if (this.sessions.delete(sessionId)) {\n      this.updateSnapshot();\n      this.notify();\n    }\n  },\n\n  has(sessionId: string): boolean {\n    return this.sessions.has(sessionId);\n  },\n\n  subscribe(listener: () => void) {\n    this.listeners.add(listener);\n    return () => this.listeners.delete(listener);\n  },\n\n  notify() {\n    this.listeners.forEach((listener) => listener());\n  },\n\n  updateSnapshot() {\n    this.cachedSnapshot = Array.from(this.sessions.values());\n  },\n\n  getSnapshot(): ChatSession[] {\n    return this.cachedSnapshot;\n  },\n};\n\n// Stable empty array for SSR\nconst EMPTY_SESSIONS: ChatSession[] = [];\n\nfunction usePendingSessions(): ChatSession[] {\n  return useSyncExternalStore(\n    (callback) => pendingSessionsStore.subscribe(callback),\n    () => pendingSessionsStore.getSnapshot(),\n    () => EMPTY_SESSIONS\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Helper hooks\n// ---------------------------------------------------------------------------\n\nfunction useFindAgentForCurrentChatSession(\n  currentChatSession: ChatSession | null\n): MinimalPersonaSnapshot | null {\n  const { agents } = useAgents();\n  const appFocus = useAppFocus();\n\n  let agentIdToFind: number;\n\n  // This could be an alreaady existing chat session.\n  if (currentChatSession) {\n    agentIdToFind = currentChatSession.persona_id;\n  }\n\n  // This could be a new chat-session. Therefore, `currentChatSession` is false, but there could still be some agent.\n  else if (appFocus.isNewSession()) {\n    agentIdToFind = DEFAULT_AGENT_ID;\n  }\n\n  // Or this could be a new chat-session with an agent.\n  else if (appFocus.isAgent()) {\n    agentIdToFind = Number.parseInt(appFocus.getId()!);\n  }\n\n  return agents.find((agent) => agent.id === agentIdToFind) ?? null;\n}\n\n// ---------------------------------------------------------------------------\n// Main hook\n// ---------------------------------------------------------------------------\n\nexport default function useChatSessions(): UseChatSessionsOutput {\n  const getKey = (\n    pageIndex: number,\n    previousPageData: ChatSessionsResponse | null\n  ): string | null => {\n    // No more pages\n    if (previousPageData && !previousPageData.has_more) return null;\n\n    // First page — no cursor\n    if (pageIndex === 0) {\n      return `${SWR_KEYS.chatSessions}?page_size=${PAGE_SIZE}`;\n    }\n\n    // Subsequent pages — cursor from the last session of the previous page\n    const lastSession =\n      previousPageData!.sessions[previousPageData!.sessions.length - 1];\n    if (!lastSession) return null;\n\n    const params = new URLSearchParams({\n      page_size: PAGE_SIZE.toString(),\n      before: lastSession.time_updated,\n    });\n    return `${SWR_KEYS.chatSessions}?${params.toString()}`;\n  };\n\n  const { data, error, setSize, mutate } = useSWRInfinite<ChatSessionsResponse>(\n    getKey,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      revalidateFirstPage: true,\n      revalidateAll: false,\n      dedupingInterval: 30000,\n    }\n  );\n\n  const appFocus = useAppFocus();\n  const pendingSessions = usePendingSessions();\n\n  // Flatten all pages into a single session list\n  const allFetchedSessions = useMemo(\n    () => (data ? data.flatMap((page) => page.sessions) : []),\n    [data]\n  );\n\n  // hasMore: check the last loaded page\n  const hasMore = useMemo(() => {\n    if (!data || data.length === 0) return false;\n    const lastPage = data[data.length - 1];\n    return lastPage ? lastPage.has_more : false;\n  }, [data]);\n\n  const [isLoadingMore, setIsLoadingMore] = useState(false);\n\n  const loadMore = useCallback(async () => {\n    if (isLoadingMore || !hasMore) return;\n\n    setIsLoadingMore(true);\n    const loadStart = Date.now();\n\n    try {\n      await setSize((s) => s + 1);\n\n      // Enforce minimum loading duration to avoid skeleton flash\n      const elapsed = Date.now() - loadStart;\n      if (elapsed < MIN_LOADING_DURATION_MS) {\n        await new Promise((r) =>\n          setTimeout(r, MIN_LOADING_DURATION_MS - elapsed)\n        );\n      }\n    } catch (err) {\n      console.error(\"Failed to load more chat sessions:\", err);\n    } finally {\n      setIsLoadingMore(false);\n    }\n  }, [isLoadingMore, hasMore, setSize]);\n\n  // Clean up pending sessions that now appear in fetched data\n  // (they now have messages and the server returns them)\n  useEffect(() => {\n    const fetchedIds = new Set(allFetchedSessions.map((s) => s.id));\n    pendingSessions.forEach((pending) => {\n      if (fetchedIds.has(pending.id)) {\n        pendingSessionsStore.remove(pending.id);\n      }\n    });\n  }, [allFetchedSessions, pendingSessions]);\n\n  // Merge fetched sessions with pending sessions.\n  // This ensures pending sessions persist across SWR revalidations.\n  const chatSessions = useMemo(() => {\n    const fetchedIds = new Set(allFetchedSessions.map((s) => s.id));\n\n    // Get pending sessions that are not yet in fetched data\n    const remainingPending = pendingSessions.filter(\n      (pending) => !fetchedIds.has(pending.id)\n    );\n\n    // Pending sessions go first (most recent), then fetched sessions\n    return [...remainingPending, ...allFetchedSessions];\n  }, [allFetchedSessions, pendingSessions]);\n\n  const currentChatSessionId = appFocus.isChat() ? appFocus.getId() : null;\n  const currentChatSession =\n    chatSessions.find(\n      (chatSession) => chatSession.id === currentChatSessionId\n    ) ?? null;\n\n  const agentForCurrentChatSession =\n    useFindAgentForCurrentChatSession(currentChatSession);\n\n  // Add a pending chat session that will persist across SWR revalidations.\n  // The session will be automatically removed once it appears in the server response.\n  const addPendingChatSession = useCallback(\n    ({ chatSessionId, personaId, projectId }: PendingChatSessionParams) => {\n      // Don't add sessions that belong to a project\n      if (projectId != null) return;\n\n      // Don't add if already in pending store (duplicates are also filtered during merge)\n      if (pendingSessionsStore.has(chatSessionId)) return;\n\n      const now = new Date().toISOString();\n      pendingSessionsStore.add({\n        id: chatSessionId,\n        name: \"\", // Empty name will display as \"New Chat\" via UNNAMED_CHAT constant\n        persona_id: personaId,\n        time_created: now,\n        time_updated: now,\n        shared_status: ChatSessionSharedStatus.Private,\n        project_id: projectId ?? null,\n        current_alternate_model: \"\",\n        current_temperature_override: null,\n      });\n    },\n    []\n  );\n\n  const removeSession = useCallback(\n    (sessionId: string) => {\n      pendingSessionsStore.remove(sessionId);\n      // Optimistically remove from all loaded pages\n      mutate(\n        (pages) =>\n          pages?.map((page) => ({\n            ...page,\n            sessions: page.sessions.filter((s) => s.id !== sessionId),\n          })),\n        { revalidate: false }\n      );\n    },\n    [mutate]\n  );\n\n  const refreshChatSessions = useCallback(() => mutate(), [mutate]);\n\n  return {\n    chatSessions,\n    currentChatSessionId,\n    currentChatSession,\n    agentForCurrentChatSession,\n    isLoading: !error && !data,\n    error,\n    refreshChatSessions,\n    addPendingChatSession,\n    removeSession,\n    hasMore,\n    isLoadingMore,\n    loadMore,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useClickOutside.ts",
    "content": "\"use client\";\n\nimport { useEffect, RefObject } from \"react\";\n\n/**\n * A generic hook that detects clicks outside of referenced element(s).\n *\n * @param ref - A ref or array of refs to monitor for outside clicks\n * @param callback - Function to call when a click outside is detected\n * @param enabled - Whether the hook is enabled. Defaults to true.\n *\n * @example\n * ```tsx\n * // Single ref example\n * const MyComponent = () => {\n *   const ref = useRef<HTMLDivElement>(null);\n *   const [isOpen, setIsOpen] = useState(false);\n *\n *   useClickOutside(ref, () => setIsOpen(false), isOpen);\n *\n *   return (\n *     <div ref={ref}>\n *       {isOpen && <div>Content</div>}\n *     </div>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Single ref example with dropdown\n * const Dropdown = () => {\n *   const dropdownRef = useRef<HTMLDivElement>(null);\n *   const [isOpen, setIsOpen] = useState(false);\n *\n *   useClickOutside(dropdownRef, () => setIsOpen(false), isOpen);\n *\n *   return (\n *     <div>\n *       {isOpen && <div ref={dropdownRef}>Dropdown content</div>}\n *     </div>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Multiple refs example - useful for combobox/dropdown with separate input and menu\n * const ComboBox = () => {\n *   const inputRef = useRef<HTMLInputElement>(null);\n *   const dropdownRef = useRef<HTMLDivElement>(null);\n *   const [isOpen, setIsOpen] = useState(false);\n *\n *   // Close dropdown only if click is outside BOTH input and dropdown\n *   useClickOutside([inputRef, dropdownRef], () => setIsOpen(false), isOpen);\n *\n *   return (\n *     <div>\n *       <input ref={inputRef} onClick={() => setIsOpen(true)} />\n *       {isOpen && (\n *         <div ref={dropdownRef}>\n *           <div>Option 1</div>\n *           <div>Option 2</div>\n *         </div>\n *       )}\n *     </div>\n *   );\n * };\n * ```\n */\nexport function useClickOutside<T extends HTMLElement>(\n  ref: RefObject<T> | RefObject<T>[] | null,\n  callback: () => void,\n  enabled: boolean = true\n): void {\n  useEffect(() => {\n    if (!enabled) {\n      return;\n    }\n\n    const handleClickOutside = (event: Event) => {\n      const target = event.target as Node;\n\n      // Normalize to array for consistent handling\n      const refs = Array.isArray(ref) ? ref : [ref];\n\n      // Check if click is outside all provided refs\n      const isOutside = refs.every(\n        (r) => !r?.current || !r.current.contains(target)\n      );\n\n      if (isOutside) {\n        callback();\n      }\n    };\n\n    document.addEventListener(\"mousedown\", handleClickOutside);\n\n    return () => {\n      document.removeEventListener(\"mousedown\", handleClickOutside);\n    };\n  }, [ref, callback, enabled]);\n}\n"
  },
  {
    "path": "web/src/hooks/useCloudSubscription.ts",
    "content": "import { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { hasPaidSubscription } from \"@/lib/billing/interfaces\";\nimport { useBillingInformation } from \"@/hooks/useBillingInformation\";\n\n/**\n * Returns whether the current tenant has an active paid subscription on cloud.\n *\n * Self-hosted deployments always return true (no billing gate).\n * Cloud deployments check billing status via the billing API.\n * Returns true while loading to avoid flashing the upgrade prompt.\n */\nexport function useCloudSubscription(): boolean {\n  const { data: billingData, isLoading } = useBillingInformation();\n\n  if (!NEXT_PUBLIC_CLOUD_ENABLED) {\n    return true;\n  }\n\n  // Treat loading as subscribed to avoid UI flash\n  if (isLoading || billingData == null) {\n    return true;\n  }\n\n  return hasPaidSubscription(billingData);\n}\n"
  },
  {
    "path": "web/src/hooks/useCodeInterpreter.ts",
    "content": "import useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\nconst HEALTH_ENDPOINT = \"/api/admin/code-interpreter/health\";\nconst STATUS_ENDPOINT = \"/api/admin/code-interpreter\";\n\ninterface CodeInterpreterHealth {\n  healthy: boolean;\n}\n\ninterface CodeInterpreterStatus {\n  enabled: boolean;\n}\n\nexport default function useCodeInterpreter() {\n  const {\n    data: healthData,\n    error: healthError,\n    isLoading: isHealthLoading,\n    mutate: refetchHealth,\n  } = useSWR<CodeInterpreterHealth>(HEALTH_ENDPOINT, errorHandlingFetcher, {\n    refreshInterval: 30000,\n  });\n\n  const {\n    data: statusData,\n    error: statusError,\n    isLoading: isStatusLoading,\n    mutate: refetchStatus,\n  } = useSWR<CodeInterpreterStatus>(STATUS_ENDPOINT, errorHandlingFetcher);\n\n  function refetch() {\n    refetchHealth();\n    refetchStatus();\n  }\n\n  return {\n    isHealthy: healthData?.healthy ?? false,\n    isEnabled: statusData?.enabled ?? false,\n    isLoading: isHealthLoading || isStatusLoading,\n    error: healthError || statusError,\n    refetch,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useContainerCenter.ts",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { usePathname } from \"next/navigation\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\n\nconst SELECTOR = \"[data-main-container]\";\n\ninterface ContainerCenter {\n  centerX: number | null;\n  centerY: number | null;\n  hasContainerCenter: boolean;\n}\n\nconst NULL_CENTER = { x: null, y: null } as const;\n\nfunction measure(el: HTMLElement): { x: number; y: number } | null {\n  if (!el.isConnected) return null;\n  const rect = el.getBoundingClientRect();\n  if (rect.width === 0 && rect.height === 0) return null;\n  return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };\n}\n\n/**\n * Tracks the center point of the `[data-main-container]` element so that\n * portaled overlays (modals, command menus) can center relative to the main\n * content area rather than the full viewport.\n *\n * Returns `{ centerX, centerY, hasContainerCenter }`.\n * When the container is not present (e.g. pages without `AppLayouts.Root`),\n * both center values are `null` and `hasContainerCenter` is `false`, allowing\n * callers to fall back to standard viewport centering.\n *\n * Uses a lazy `useState` initializer so the first render already has the\n * correct values (no flash), and a `ResizeObserver` to stay reactive when\n * the sidebar folds/unfolds. Re-subscribes on route changes because each\n * page renders its own `AppLayouts.Root`, replacing the DOM element.\n */\nexport default function useContainerCenter(): ContainerCenter {\n  const pathname = usePathname();\n  const { isMediumScreen } = useScreenSize();\n  const [center, setCenter] = useState<{ x: number | null; y: number | null }>(\n    () => {\n      if (typeof document === \"undefined\") return NULL_CENTER;\n      const el = document.querySelector<HTMLElement>(SELECTOR);\n      if (!el) return NULL_CENTER;\n      const m = measure(el);\n      return m ?? NULL_CENTER;\n    }\n  );\n\n  useEffect(() => {\n    const container = document.querySelector<HTMLElement>(SELECTOR);\n    if (!container) {\n      setCenter(NULL_CENTER);\n      return;\n    }\n\n    const update = () => {\n      const m = measure(container);\n      setCenter(m ?? NULL_CENTER);\n    };\n\n    update();\n    const observer = new ResizeObserver(update);\n    observer.observe(container);\n    return () => observer.disconnect();\n  }, [pathname]);\n\n  return {\n    centerX: isMediumScreen ? null : center.x,\n    centerY: isMediumScreen ? null : center.y,\n    hasContainerCenter: isMediumScreen\n      ? false\n      : center.x !== null && center.y !== null,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useContentSize.ts",
    "content": "\"use client\";\n\nimport { useRef, useEffect, useState } from \"react\";\n\ninterface ContentSize {\n  width: number;\n  height: number;\n}\n\n/**\n * A hook that measures the content size (scrollWidth/scrollHeight) of a DOM element.\n *\n * This hook measures the natural content size of an element including overflow,\n * which is useful for determining how much space content needs before wrapping\n * or being cut off. It can automatically track size changes via ResizeObserver\n * and/or re-measure when dependencies change.\n *\n * @param dependencies - Optional dependency array to trigger re-measurement when values change\n * @param observeResize - Whether to continuously observe size changes via ResizeObserver. Defaults to true.\n *\n * @returns A tuple containing:\n *   - `ref`: A ref object to attach to the element you want to measure\n *   - `size`: An object with `width` and `height` properties (in pixels)\n *\n * @example\n * ```tsx\n * // Basic usage - measure button content to determine if it needs to wrap\n * const MyButton = ({ children }) => {\n *   const [ref, { width }] = useContentSize();\n *\n *   return (\n *     <button ref={ref}>\n *       Content is {width}px wide\n *     </button>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Measure content when it changes\n * const DynamicContent = ({ text }) => {\n *   const [ref, { width, height }] = useContentSize([text]);\n *\n *   return (\n *     <div ref={ref}>\n *       {text}\n *       <p>Size: {width}x{height}</p>\n *     </div>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Measure once without observing resize (better performance)\n * const SelectButton = ({ children }) => {\n *   const content = useMemo(() => <span>{children}</span>, [children]);\n *   const [measureRef, { width: contentWidth }] = useContentSize([content], false);\n *\n *   return (\n *     <div>\n *       // Hidden element for measurement\n *       <div ref={measureRef} style={{ position: 'absolute', visibility: 'hidden' }}>\n *         {content}\n *       </div>\n *       // Actual button with calculated width\n *       <button style={{ width: contentWidth }}>\n *         {content}\n *       </button>\n *     </div>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Auto-expanding textarea\n * const AutoExpandingTextarea = () => {\n *   const [value, setValue] = useState('');\n *   const [ref, { height }] = useContentSize([value]);\n *\n *   return (\n *     <textarea\n *       ref={ref}\n *       value={value}\n *       onChange={(e) => setValue(e.target.value)}\n *       style={{ height: `${height}px` }}\n *     />\n *   );\n * };\n * ```\n */\nexport function useContentSize(\n  dependencies?: React.DependencyList,\n  observeResize: boolean = true\n): [React.RefObject<HTMLDivElement | null>, ContentSize] {\n  const ref = useRef<HTMLDivElement>(null);\n  const [size, setSize] = useState<ContentSize>({ width: 0, height: 0 });\n\n  const measureSize = () => {\n    if (ref.current) {\n      const newSize: ContentSize = {\n        width: ref.current.scrollWidth,\n        height: ref.current.scrollHeight,\n      };\n      setSize(newSize);\n    }\n  };\n\n  // Measure on dependencies change\n  // We intentionally use the `dependencies` parameter directly as the dependency array.\n  // The exhaustive-deps rule is disabled because:\n  // 1. `measureSize` is stable (doesn't change) and doesn't need to be in the deps\n  // 2. We want to re-measure ONLY when the caller's dependencies change, not when measureSize changes\n  // 3. The caller passes their own dependency array to control when measurement happens\n  useEffect(() => {\n    measureSize();\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, dependencies);\n\n  // Observe resize if enabled\n  useEffect(() => {\n    if (!observeResize || !ref.current) return;\n\n    const resizeObserver = new ResizeObserver(() => {\n      // Use requestAnimationFrame to ensure measurements happen after the resize is complete\n      requestAnimationFrame(() => {\n        measureSize();\n      });\n    });\n\n    // Observe the container itself\n    resizeObserver.observe(ref.current);\n\n    // Also observe all descendant elements (like textareas)\n    const descendants = ref.current.querySelectorAll(\"*\");\n    descendants.forEach((el) => {\n      resizeObserver.observe(el);\n    });\n\n    return () => {\n      resizeObserver.disconnect();\n    };\n  }, [observeResize]);\n\n  return [ref, size];\n}\n"
  },
  {
    "path": "web/src/hooks/useCurrentUser.ts",
    "content": "import useSWR, { type KeyedMutator } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { User } from \"@/lib/types\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Fetches the current authenticated user via SWR (`/api/me`).\n *\n * This hook is intentionally configured with conservative revalidation\n * settings to avoid hammering the backend on every focus/reconnect event:\n *\n * - `revalidateOnFocus: false`      — tab switches won't trigger a refetch\n * - `revalidateOnReconnect: false`   — network recovery won't trigger a refetch\n * - `dedupingInterval: 30_000`       — duplicate requests within 30 s are deduped\n *\n * The returned `mutateUser` handle lets callers imperatively refetch (e.g.\n * after a token refresh) without changing the global SWR config.\n *\n * @example\n * ```ts\n * const { user, mutateUser, userError } = useCurrentUser();\n * ```\n */\nexport function useCurrentUser(): {\n  /** The authenticated user, or `undefined` while loading. */\n  user: User | undefined;\n  /** Imperatively revalidate / update the cached user. */\n  mutateUser: KeyedMutator<User>;\n  /** The error thrown by the fetcher, if any. */\n  userError: (Error & { status?: number }) | undefined;\n} {\n  const { data, mutate, error } = useSWR<User>(\n    SWR_KEYS.me,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30_000,\n    }\n  );\n\n  return { user: data, mutateUser: mutate, userError: error };\n}\n"
  },
  {
    "path": "web/src/hooks/useDeepResearchToggle.ts",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef, useCallback } from \"react\";\n\ninterface UseDeepResearchToggleProps {\n  chatSessionId: string | null;\n  agentId: number | undefined;\n}\n\n/**\n * Custom hook for managing the agent search (deep research) toggle state.\n * Automatically resets the toggle to false when:\n * - Switching between existing chat sessions\n * - The assistant changes\n * - The page is reloaded (since state initializes to false)\n *\n * The toggle is preserved when transitioning from no chat session to a new session.\n *\n * @param chatSessionId - The current chat session ID\n * @param agentId - The current agent ID\n * @returns An object containing the toggle state and toggle function\n */\nexport default function useDeepResearchToggle({\n  chatSessionId,\n  agentId,\n}: UseDeepResearchToggleProps) {\n  const [deepResearchEnabled, setDeepResearchEnabled] = useState(false);\n  const previousChatSessionId = useRef<string | null>(chatSessionId);\n\n  // Reset when switching chat sessions, but preserve when going from null to a new session\n  useEffect(() => {\n    const previousId = previousChatSessionId.current;\n    previousChatSessionId.current = chatSessionId;\n\n    // Only reset if we're switching between actual sessions (not from null to a new session)\n    if (previousId !== null && previousId !== chatSessionId) {\n      setDeepResearchEnabled(false);\n    }\n  }, [chatSessionId]);\n\n  // Reset when switching assistants\n  useEffect(() => {\n    setDeepResearchEnabled(false);\n  }, [agentId]);\n\n  const toggleDeepResearch = useCallback(() => {\n    setDeepResearchEnabled(!deepResearchEnabled);\n  }, [deepResearchEnabled]);\n\n  return {\n    deepResearchEnabled,\n    toggleDeepResearch,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useFederatedOAuthStatus.ts",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport useSWR from \"swr\";\nimport { FederatedConnectorOAuthStatus } from \"@/components/chat/FederatedOAuthModal\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\n/**\n * Hook for fetching federated OAuth connector authentication status.\n *\n * Retrieves the authentication status for all federated connectors (e.g., Gmail,\n * Google Drive, Slack) and provides utilities to identify which connectors need\n * OAuth authentication. Uses SWR for caching and automatic revalidation.\n *\n * @returns Object containing:\n *   - connectors: Array of all federated connector statuses\n *   - needsAuth: Array of connectors that lack OAuth tokens\n *   - hasUnauthenticatedConnectors: Boolean indicating if any connectors need auth\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Error object if the fetch failed\n *   - refetch: Function to manually reload connector statuses\n *\n * @example\n * ```tsx\n * // Display connectors requiring authentication\n * const OAuthPrompt = () => {\n *   const { needsAuth, isLoading } = useFederatedOAuthStatus();\n *\n *   if (isLoading) return <Spinner />;\n *   if (needsAuth.length === 0) return null;\n *\n *   return (\n *     <div>\n *       <h3>Connect your accounts:</h3>\n *       {needsAuth.map(connector => (\n *         <ConnectButton key={connector.source} connector={connector} />\n *       ))}\n *     </div>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // Show warning banner if any connectors need authentication\n * const AuthWarningBanner = () => {\n *   const { hasUnauthenticatedConnectors } = useFederatedOAuthStatus();\n *\n *   if (!hasUnauthenticatedConnectors) return null;\n *\n *   return (\n *     <Banner variant=\"warning\">\n *       Some connectors need authentication to access your data.\n *     </Banner>\n *   );\n * };\n * ```\n *\n * @example\n * ```tsx\n * // List all connectors with their auth status\n * const ConnectorList = () => {\n *   const { connectors, refetch } = useFederatedOAuthStatus();\n *\n *   return (\n *     <div>\n *       {connectors.map(connector => (\n *         <ConnectorRow\n *           key={connector.source}\n *           connector={connector}\n *           authenticated={connector.has_oauth_token}\n *           onReconnect={refetch}\n *         />\n *       ))}\n *     </div>\n *   );\n * };\n * ```\n */\nexport default function useFederatedOAuthStatus() {\n  const { data, error, isLoading, mutate } = useSWR<\n    FederatedConnectorOAuthStatus[]\n  >(\"/api/federated/oauth-status\", errorHandlingFetcher);\n\n  const connectors = data ?? [];\n  const needsAuth = useMemo(\n    () => (data ?? []).filter((c) => !c.has_oauth_token),\n    [data]\n  );\n  const hasUnauthenticatedConnectors = needsAuth.length > 0;\n\n  return {\n    connectors,\n    needsAuth,\n    hasUnauthenticatedConnectors,\n    isLoading,\n    error,\n    refetch: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useFeedbackController.ts",
    "content": "\"use client\";\n\nimport { useCallback } from \"react\";\nimport { useChatSessionStore } from \"@/app/app/stores/useChatSessionStore\";\nimport { FeedbackType } from \"@/app/app/interfaces\";\nimport { handleChatFeedback, removeChatFeedback } from \"@/app/app/services/lib\";\nimport { getMessageByMessageId } from \"@/app/app/services/messageTree\";\nimport { toast } from \"@/hooks/useToast\";\n\n/**\n * Hook for managing chat message feedback (like/dislike)\n *\n * Provides optimistic UI updates with automatic rollback on errors.\n * Handles both adding/updating feedback and removing existing feedback.\n *\n * @returns Object containing:\n *   - handleFeedbackChange: Function to submit feedback changes\n *\n * @example\n * ```tsx\n * const { handleFeedbackChange } = useFeedbackController();\n *\n * // Add positive feedback\n * await handleFeedbackChange(messageId, \"like\", \"Great response!\");\n *\n * // Remove feedback\n * await handleFeedbackChange(messageId, null);\n * ```\n */\nexport default function useFeedbackController() {\n  const updateCurrentMessageFeedback = useChatSessionStore(\n    (state) => state.updateCurrentMessageFeedback\n  );\n\n  const handleFeedbackChange = useCallback(\n    async (\n      messageId: number,\n      newFeedback: FeedbackType | null,\n      feedbackText?: string,\n      predefinedFeedback?: string\n    ): Promise<boolean> => {\n      // Get current feedback state for rollback on error\n      const { currentSessionId, sessions } = useChatSessionStore.getState();\n      const messageTree = currentSessionId\n        ? sessions.get(currentSessionId)?.messageTree\n        : undefined;\n      const previousFeedback = messageTree\n        ? getMessageByMessageId(messageTree, messageId)?.currentFeedback ?? null\n        : null;\n\n      // Optimistically update the UI\n      updateCurrentMessageFeedback(messageId, newFeedback);\n\n      try {\n        if (newFeedback === null) {\n          // Remove feedback\n          const response = await removeChatFeedback(messageId);\n          if (!response.ok) {\n            // Rollback on error\n            updateCurrentMessageFeedback(messageId, previousFeedback);\n            const errorData = await response.json();\n            toast.error(\n              `Failed to remove feedback - ${\n                errorData.detail || errorData.message\n              }`\n            );\n            return false;\n          }\n        } else {\n          // Add/update feedback\n          const response = await handleChatFeedback(\n            messageId,\n            newFeedback,\n            feedbackText || \"\",\n            predefinedFeedback\n          );\n          if (!response.ok) {\n            // Rollback on error\n            updateCurrentMessageFeedback(messageId, previousFeedback);\n            const errorData = await response.json();\n            toast.error(\n              `Failed to submit feedback - ${\n                errorData.detail || errorData.message\n              }`\n            );\n            return false;\n          }\n        }\n        return true;\n      } catch (error) {\n        // Rollback on network error\n        updateCurrentMessageFeedback(messageId, previousFeedback);\n        toast.error(\"Failed to submit feedback - network error\");\n        return false;\n      }\n    },\n    [updateCurrentMessageFeedback]\n  );\n\n  return { handleFeedbackChange };\n}\n"
  },
  {
    "path": "web/src/hooks/useFilter.ts",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\n\n/**\n * A generic filtering hook that filters an array of items based on a query string.\n *\n * The hook manages its own query state and uses an extractor function to convert\n * each item into a searchable string, then performs a case-insensitive substring\n * match against the query.\n *\n * @template T - The type of items being filtered\n * @param items - The array of items to filter\n * @param extractor - A function that extracts a searchable string from each item\n * @returns An object containing the query, setQuery function, and filtered items\n *\n * @example\n * ```tsx\n * function MyComponent() {\n *   const tools = [\n *     { name: \"File Reader\", description: \"Read files\" },\n *     { name: \"Web Search\", description: \"Search the web\" }\n *   ];\n *\n *   const { query, setQuery, filtered } = useFilter(\n *     tools,\n *     (tool) => `${tool.name} ${tool.description}`\n *   );\n *\n *   return (\n *     <>\n *       <input value={query} onChange={(e) => setQuery(e.target.value)} />\n *       {filtered.map(tool => <div key={tool.name}>{tool.name}</div>)}\n *     </>\n *   );\n * }\n * ```\n *\n * @remarks\n * - Returns all items if the query is empty or whitespace-only\n * - Performs case-insensitive matching\n * - Uses substring matching (includes)\n * - The extractor function is included in dependencies to prevent stale closures.\n *   For optimal performance, memoize the extractor with useCallback if it's expensive.\n */\nexport default function useFilter<T>(\n  items: T[],\n  extractor: (item: T) => string\n) {\n  const [query, setQuery] = useState(\"\");\n\n  const filtered = useMemo(() => {\n    const trimmedQuery = query.trim();\n\n    // Return all items if query is empty\n    if (!trimmedQuery) {\n      return items;\n    }\n\n    const lowerQuery = trimmedQuery.toLowerCase();\n\n    return items.filter((item) => {\n      const searchableText = extractor(item).toLowerCase();\n      return searchableText.includes(lowerQuery);\n    });\n  }, [query, items, extractor]);\n\n  return { query, setQuery, filtered };\n}\n"
  },
  {
    "path": "web/src/hooks/useGroups.ts",
    "content": "\"use client\";\n\nimport useSWR, { mutate } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { UserGroup } from \"@/lib/types\";\nimport { useContext } from \"react\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Fetches all user groups in the organization.\n *\n * Returns group information including group members, curators, and associated resources.\n * Use this for displaying group lists in sharing dialogs, admin panels, or permission\n * management interfaces.\n *\n * Note: This hook only returns data if enterprise features are enabled. In non-enterprise\n * environments, it returns an empty array.\n *\n * @returns Object containing:\n *   - data: Array of UserGroup objects, or undefined while loading\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Any error that occurred during fetch\n *   - refreshGroups: Function to manually revalidate the data\n *\n * @example\n * // Fetch groups for sharing dialogs\n * const { data: groupsData, isLoading } = useGroups();\n * if (isLoading) return <Spinner />;\n * return <GroupList groups={groupsData ?? []} />;\n *\n * @example\n * // Fetch groups with manual refresh\n * const { data: groupsData, refreshGroups } = useGroups();\n * // Later...\n * await createNewGroup(...);\n * refreshGroups(); // Refresh the group list\n */\nexport default function useGroups() {\n  const combinedSettings = useContext(SettingsContext);\n  const settingsLoading = combinedSettings?.settingsLoading ?? false;\n  const isPaidEnterpriseFeaturesEnabled =\n    !settingsLoading &&\n    combinedSettings &&\n    combinedSettings.enterpriseSettings !== null;\n\n  const { data, error, isLoading } = useSWR<UserGroup[]>(\n    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.adminUserGroups : null,\n    errorHandlingFetcher\n  );\n\n  const refreshGroups = () => mutate(SWR_KEYS.adminUserGroups);\n\n  if (settingsLoading) {\n    return {\n      data: undefined,\n      isLoading: true,\n      error: undefined,\n      refreshGroups,\n    };\n  }\n\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return {\n      data: [],\n      isLoading: false,\n      error: undefined,\n      refreshGroups,\n    };\n  }\n\n  return {\n    data,\n    isLoading,\n    error,\n    refreshGroups,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useImageDropzone.ts",
    "content": "\"use client\";\n\nimport { useCallback } from \"react\";\nimport { useDropzone, DropzoneOptions, FileRejection } from \"react-dropzone\";\n\nconst ACCEPTED_IMAGE_TYPES = {\n  \"image/png\": [\".png\"],\n  \"image/jpeg\": [\".jpeg\", \".jpg\"],\n};\n\nexport interface UseImageDropzoneOptions {\n  /** Callback when a valid image file is dropped/selected */\n  onImageAccepted: (file: File) => void;\n  /** Callback when file is rejected (wrong type, too many files, etc.) */\n  onImageRejected?: (rejections: FileRejection[]) => void;\n  /** Whether dropzone is disabled */\n  disabled?: boolean;\n  /** Custom accepted file types - defaults to png, jpeg, jpg */\n  accept?: DropzoneOptions[\"accept\"];\n}\n\nexport interface UseImageDropzoneReturn {\n  /** Whether user is actively dragging files over the drop zone */\n  isDragActive: boolean;\n  /** Props to spread onto the drop zone container element */\n  getRootProps: ReturnType<typeof useDropzone>[\"getRootProps\"];\n  /** Props to spread onto a hidden input element */\n  getInputProps: ReturnType<typeof useDropzone>[\"getInputProps\"];\n  /** Programmatically open the file picker (for click-to-edit) */\n  openFilePicker: () => void;\n}\n\nexport function useImageDropzone({\n  onImageAccepted,\n  onImageRejected,\n  disabled = false,\n  accept = ACCEPTED_IMAGE_TYPES,\n}: UseImageDropzoneOptions): UseImageDropzoneReturn {\n  const onDrop = useCallback(\n    (acceptedFiles: File[], rejections: FileRejection[]) => {\n      if (rejections.length > 0) {\n        onImageRejected?.(rejections);\n        return;\n      }\n\n      const file = acceptedFiles[0];\n      if (file) {\n        onImageAccepted(file);\n      }\n    },\n    [onImageAccepted, onImageRejected]\n  );\n\n  const { getRootProps, getInputProps, open, isDragActive } = useDropzone({\n    onDrop,\n    accept,\n    multiple: false,\n    disabled,\n    noClick: true,\n    noKeyboard: true,\n  });\n\n  return {\n    isDragActive,\n    getRootProps,\n    getInputProps,\n    openFilePicker: open,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useIsDefaultAgent.ts",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport { useSearchParams } from \"next/navigation\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { CombinedSettings } from \"@/interfaces/settings\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { DEFAULT_AGENT_ID } from \"@/lib/constants\";\n\n/**\n * Determines if the current assistant is the default agent based on:\n * 1. Whether default agent is disabled in settings\n * 2. If URL has an agentId specified\n * 3. Based on the current chat session\n */\nexport default function useIsDefaultAgent({\n  liveAgent,\n  existingChatSessionId,\n  selectedChatSession,\n  settings,\n}: {\n  liveAgent: MinimalPersonaSnapshot | undefined;\n  existingChatSessionId: string | null;\n  selectedChatSession: ChatSession | undefined;\n  settings: CombinedSettings | null;\n}) {\n  const searchParams = useSearchParams();\n  const urlAssistantId = searchParams?.get(SEARCH_PARAM_NAMES.PERSONA_ID);\n\n  return useMemo(() => {\n    // If default agent is disabled, it can never be the default agent\n    if (settings?.settings?.disable_default_assistant) {\n      return false;\n    }\n\n    // If URL has an agentId, it's explicitly selected, not default\n    if (\n      urlAssistantId !== null &&\n      urlAssistantId !== DEFAULT_AGENT_ID.toString()\n    ) {\n      return false;\n    }\n\n    // If there's an existing chat session with a persona_id, it's not default\n    if (\n      existingChatSessionId &&\n      selectedChatSession?.persona_id !== DEFAULT_AGENT_ID\n    ) {\n      return false;\n    }\n\n    // If just on `/chat` page, it's the default agent\n    return true;\n  }, [\n    settings?.settings?.disable_default_assistant,\n    urlAssistantId,\n    existingChatSessionId,\n    selectedChatSession?.persona_id,\n    liveAgent?.id,\n  ]);\n}\n"
  },
  {
    "path": "web/src/hooks/useKeyPress.ts",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\n\nexport function useKeyPress(\n  callback: () => void,\n  key: string,\n  enabled: boolean = true\n) {\n  useEffect(() => {\n    if (!enabled) return;\n    function handleKeyDown(event: KeyboardEvent) {\n      if (event.key !== key) return;\n      event.preventDefault();\n      callback();\n    }\n    document.addEventListener(\"keydown\", handleKeyDown);\n    return () => {\n      document.removeEventListener(\"keydown\", handleKeyDown);\n    };\n  }, [callback, enabled, key]);\n}\n\n/**\n * Custom hook that listens for the \"Escape\" key and calls the provided callback.\n *\n * @param callback - Function to call when the Escape key is pressed\n * @param enabled - Optional boolean to enable/disable the hook (defaults to true)\n */\nexport function useEscape(callback: () => void, enabled: boolean = true) {\n  useKeyPress(callback, \"Escape\", enabled);\n}\n\n/**\n * Custom hook that listens for the \"Enter\" key and calls the provided callback.\n *\n * @param callback - Function to call when the Enter key is pressed\n * @param enabled - Optional boolean to enable/disable the hook (defaults to true)\n */\nexport function useEnter(callback: () => void, enabled: boolean = true) {\n  useKeyPress(callback, \"Enter\", enabled);\n}\n"
  },
  {
    "path": "web/src/hooks/useLLMProviders.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  LLMProviderDescriptor,\n  LLMProviderResponse,\n  LLMProviderView,\n  WellKnownLLMProviderDescriptor,\n} from \"@/interfaces/llm\";\n\n/**\n * Fetches configured LLM providers accessible to the current user.\n *\n * Hits the **non-admin** endpoints which return `LLMProviderDescriptor`\n * (no `id` or sensitive fields like `api_key`). Use this hook in\n * user-facing UI (chat, popovers, onboarding) where you need the list\n * of providers and their visible models but don't need admin-level details.\n *\n * The backend wraps the provider list in an `LLMProviderResponse` envelope\n * that also carries the global default text and vision models. This hook\n * unwraps `.providers` for convenience while still exposing the defaults.\n *\n * **Endpoints:**\n * - No `personaId` → `GET /api/llm/provider`\n *   Returns all public providers plus restricted providers the user can\n *   access via group membership.\n * - With `personaId` → `GET /api/llm/persona/{personaId}/providers`\n *   Returns providers scoped to a specific persona, respecting RBAC\n *   restrictions. Use this when displaying model options for a particular\n *   assistant.\n *\n * @param personaId - Optional persona ID for RBAC-scoped providers.\n *\n * @returns\n * - `llmProviders` — The array of provider descriptors, or `undefined`\n *    while loading.\n * - `defaultText` — The global (or persona-overridden) default text model.\n * - `defaultVision` — The global (or persona-overridden) default vision model.\n * - `isLoading` — `true` until the first successful response or error.\n * - `error` — The SWR error object, if any.\n * - `refetch` — SWR `mutate` function to trigger a revalidation.\n */\nexport function useLLMProviders(personaId?: number) {\n  const url =\n    personaId !== undefined\n      ? SWR_KEYS.llmProvidersForPersona(personaId)\n      : SWR_KEYS.llmProviders;\n\n  const { data, error, mutate } = useSWR<\n    LLMProviderResponse<LLMProviderDescriptor>\n  >(url, errorHandlingFetcher, {\n    revalidateOnFocus: false,\n    revalidateIfStale: false,\n    dedupingInterval: 60000,\n  });\n\n  return {\n    llmProviders: data?.providers,\n    defaultText: data?.default_text ?? null,\n    defaultVision: data?.default_vision ?? null,\n    isLoading: !error && !data,\n    error,\n    refetch: mutate,\n  };\n}\n\n/**\n * Fetches configured LLM providers via the **admin** endpoint.\n *\n * Hits `GET /api/admin/llm/provider` which returns `LLMProviderView` —\n * the full provider object including `id`, `api_key` (masked),\n * group/persona assignments, and all other admin-visible fields.\n *\n * Use this hook on admin pages (e.g. the LLM Configuration page) where\n * you need provider IDs for mutations (setting defaults, editing, deleting)\n * or need to display admin-only metadata. **Do not use in user-facing UI**\n * — use `useLLMProviders` instead.\n *\n * @returns\n * - `llmProviders` — The array of full provider views, or `undefined`\n *    while loading.\n * - `defaultText` — The global default text model.\n * - `defaultVision` — The global default vision model.\n * - `isLoading` — `true` until the first successful response or error.\n * - `error` — The SWR error object, if any.\n * - `refetch` — SWR `mutate` function to trigger a revalidation.\n */\nexport function useAdminLLMProviders() {\n  const { data, error, mutate } = useSWR<LLMProviderResponse<LLMProviderView>>(\n    SWR_KEYS.adminLlmProviders,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    llmProviders: data?.providers,\n    defaultText: data?.default_text ?? null,\n    defaultVision: data?.default_vision ?? null,\n    isLoading: !error && !data,\n    error,\n    refetch: mutate,\n  };\n}\n\n/**\n * Fetches the catalog of well-known (built-in) LLM providers.\n *\n * Hits `GET /api/admin/llm/built-in/options` which returns the static\n * list of provider descriptors that Onyx ships with out of the box\n * (OpenAI, Anthropic, Vertex AI, Bedrock, Azure, Ollama, OpenRouter,\n * etc.). Each descriptor includes the provider's known models and the\n * recommended default model.\n *\n * Used primarily on the LLM Configuration page and onboarding flows\n * to show which providers are available to set up, and to pre-populate\n * model lists before the user has entered credentials.\n *\n * @returns\n * - `wellKnownLLMProviders` — The array of built-in provider descriptors,\n *    or `null` while loading.\n * - `isLoading` — `true` until the first successful response or error.\n * - `error` — The SWR error object, if any.\n * - `mutate` — SWR `mutate` function to trigger a revalidation.\n */\n/**\n * Fetches the descriptor for a single well-known (built-in) LLM provider.\n *\n * Hits `GET /api/admin/llm/built-in/options/{providerEndpoint}` which returns\n * the provider descriptor including its known models and the recommended\n * default model.\n *\n * Used inside individual provider modals to pre-populate model lists\n * before the user has entered credentials.\n *\n * @param providerEndpoint - The provider's API endpoint name (e.g. \"openai\", \"anthropic\").\n *   Pass `null` to suppress the request.\n */\nexport function useWellKnownLLMProvider(providerEndpoint: string | null) {\n  const { data, error, isLoading } = useSWR<WellKnownLLMProviderDescriptor>(\n    providerEndpoint ? SWR_KEYS.wellKnownLlmProvider(providerEndpoint) : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    wellKnownLLMProvider: data ?? null,\n    isLoading,\n    error,\n  };\n}\n\nexport function useWellKnownLLMProviders() {\n  const {\n    data: wellKnownLLMProviders,\n    error,\n    isLoading,\n    mutate,\n  } = useSWR<WellKnownLLMProviderDescriptor[]>(\n    SWR_KEYS.wellKnownLlmProviders,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    wellKnownLLMProviders: wellKnownLLMProviders ?? null,\n    isLoading,\n    error,\n    mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useLicense.ts",
    "content": "import useSWR from \"swr\";\n\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { LicenseStatus } from \"@/lib/billing/interfaces\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Hook to fetch license status for self-hosted deployments.\n *\n * Skips the fetch on cloud deployments (uses tenant auth instead).\n */\nexport function useLicense() {\n  const url = NEXT_PUBLIC_CLOUD_ENABLED ? null : SWR_KEYS.license;\n\n  const { data, error, mutate, isLoading } = useSWR<LicenseStatus>(\n    url,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30000,\n      shouldRetryOnError: false,\n      keepPreviousData: true,\n    }\n  );\n\n  if (!url) {\n    return {\n      data: undefined,\n      isLoading: false,\n      error: undefined,\n      refresh: () => Promise.resolve(undefined),\n    };\n  }\n\n  return { data, isLoading, error, refresh: mutate };\n}\n"
  },
  {
    "path": "web/src/hooks/useMcpServers.ts",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { MCPServersResponse } from \"@/lib/tools/interfaces\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Fetches MCP (Model Context Protocol) servers configuration.\n *\n * MCP servers provide additional tools and capabilities to agents through\n * the Model Context Protocol.\n *\n * @returns Object containing:\n *   - mcpData: MCPServersResponse data or null if not loaded\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Any error that occurred during fetch\n *   - mutateMcpServers: Function to manually revalidate the data\n *\n * @example\n * const { mcpData, isLoading } = useMcpServers();\n * if (isLoading) return <Spinner />;\n * return <MCPServersList servers={mcpData} />;\n */\nexport default function useMcpServers() {\n  const {\n    data: mcpData,\n    error,\n    isLoading: isMcpLoading,\n    mutate: mutateMcpServers,\n  } = useSWR<MCPServersResponse>(\n    SWR_KEYS.adminMcpServers,\n    errorHandlingFetcher\n  );\n\n  return {\n    mcpData: mcpData ?? null,\n    isLoading: isMcpLoading,\n    error,\n    mutateMcpServers,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useMcpServersForAgentEditor.ts",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { MCPServersResponse } from \"@/lib/tools/interfaces\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Fetch MCP servers for non-admin UIs (e.g. agent editor).\n *\n * This endpoint is available to all authenticated users so basic users can\n * attach MCP actions to assistants.\n */\nexport default function useMcpServersForAgentEditor() {\n  const {\n    data: mcpData,\n    error,\n    isLoading: isMcpLoading,\n    mutate: mutateMcpServers,\n  } = useSWR<MCPServersResponse>(SWR_KEYS.mcpServers, errorHandlingFetcher);\n\n  return {\n    mcpData: mcpData ?? null,\n    isLoading: isMcpLoading,\n    error,\n    mutateMcpServers,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useMemoryManager.ts",
    "content": "import { useRef, useCallback, useEffect, useState } from \"react\";\nimport { MemoryItem } from \"@/lib/types\";\n\nexport interface LocalMemory {\n  id: number;\n  content: string;\n  isNew: boolean;\n}\n\nexport const MAX_MEMORY_LENGTH = 200;\nexport const MAX_MEMORY_COUNT = 10;\n\ninterface UseMemoryManagerArgs {\n  memories: MemoryItem[];\n  onSaveMemories: (memories: MemoryItem[]) => Promise<boolean>;\n  onNotify: (message: string, type: \"success\" | \"error\") => void;\n}\n\nexport function useMemoryManager({\n  memories,\n  onSaveMemories,\n  onNotify,\n}: UseMemoryManagerArgs) {\n  const [localMemories, setLocalMemories] = useState<LocalMemory[]>([]);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const initialMemoriesRef = useRef<MemoryItem[]>([]);\n  const isSavingRef = useRef(false);\n\n  // Initialize local memories from props\n  useEffect(() => {\n    const existingMemories: LocalMemory[] = memories.map((mem, index) => ({\n      id: mem.id ?? -(index + 1),\n      content: mem.content,\n      isNew: mem.id === null,\n    }));\n\n    setLocalMemories((prev) => {\n      const emptyNewItems = prev.filter((m) => m.isNew && !m.content.trim());\n      const availableSlots = MAX_MEMORY_COUNT - existingMemories.length;\n      return [\n        ...emptyNewItems.slice(0, Math.max(0, availableSlots)),\n        ...existingMemories,\n      ];\n    });\n    initialMemoriesRef.current = memories;\n  }, [memories]);\n\n  const canAddMemory = localMemories.length < MAX_MEMORY_COUNT;\n\n  const handleAddMemory = useCallback((): number | null => {\n    if (localMemories.length >= MAX_MEMORY_COUNT) {\n      return null;\n    }\n\n    const existingEmpty = localMemories.find(\n      (m) => m.isNew && !m.content.trim()\n    );\n    if (existingEmpty) {\n      return existingEmpty.id;\n    }\n\n    // Save any unsaved new item with content before creating a new one\n    const unsavedNewItem = localMemories.find(\n      (m) => m.isNew && m.content.trim()\n    );\n    if (unsavedNewItem && !isSavingRef.current) {\n      const newMemories: MemoryItem[] = localMemories\n        .filter((m) => m.content.trim())\n        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));\n\n      const memoriesChanged =\n        JSON.stringify(newMemories) !==\n        JSON.stringify(initialMemoriesRef.current);\n\n      if (memoriesChanged) {\n        isSavingRef.current = true;\n        onSaveMemories(newMemories).then((success) => {\n          isSavingRef.current = false;\n          if (success) {\n            initialMemoriesRef.current = newMemories;\n            onNotify(\"Memory saved\", \"success\");\n          } else {\n            onNotify(\"Failed to save memory\", \"error\");\n          }\n        });\n      }\n    }\n\n    const newId = Date.now();\n    setLocalMemories((prev) => [\n      { id: newId, content: \"\", isNew: true },\n      ...prev,\n    ]);\n    return newId;\n  }, [localMemories, onSaveMemories, onNotify]);\n\n  const handleUpdateMemory = useCallback((index: number, value: string) => {\n    setLocalMemories((prev) =>\n      prev.map((memory, i) =>\n        i === index ? { ...memory, content: value } : memory\n      )\n    );\n  }, []);\n\n  const handleRemoveMemory = useCallback(\n    async (index: number) => {\n      const memory = localMemories[index];\n      if (!memory) return;\n\n      if (memory.isNew) {\n        setLocalMemories((prev) => prev.filter((_, i) => i !== index));\n        return;\n      }\n\n      const newMemories: MemoryItem[] = localMemories\n        .filter((_, i) => i !== index)\n        .filter((m) => !m.isNew || m.content.trim())\n        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));\n\n      const success = await onSaveMemories(newMemories);\n      if (success) {\n        onNotify(\"Memory deleted\", \"success\");\n      } else {\n        onNotify(\"Failed to delete memory\", \"error\");\n      }\n    },\n    [localMemories, onSaveMemories, onNotify]\n  );\n\n  const handleBlurMemory = useCallback(\n    async (index: number) => {\n      const memory = localMemories[index];\n      if (!memory || !memory.content.trim()) return;\n      if (isSavingRef.current) return;\n\n      const newMemories: MemoryItem[] = localMemories\n        .filter((m) => m.content.trim())\n        .map((m) => ({ id: m.isNew ? null : m.id, content: m.content }));\n\n      const memoriesChanged =\n        JSON.stringify(newMemories) !==\n        JSON.stringify(initialMemoriesRef.current);\n\n      if (!memoriesChanged) return;\n\n      isSavingRef.current = true;\n      const success = await onSaveMemories(newMemories);\n      isSavingRef.current = false;\n      if (success) {\n        initialMemoriesRef.current = newMemories;\n        onNotify(\"Memory saved\", \"success\");\n      } else {\n        onNotify(\"Failed to save memory\", \"error\");\n      }\n    },\n    [localMemories, onSaveMemories, onNotify]\n  );\n\n  const filteredMemories = localMemories\n    .map((memory, originalIndex) => ({ memory, originalIndex }))\n    .filter(({ memory }) => {\n      if (!searchQuery.trim()) return true;\n      return memory.content\n        .toLowerCase()\n        .includes(searchQuery.trim().toLowerCase());\n    });\n\n  const totalLineCount = localMemories.filter(\n    (m) => m.content.trim() || m.isNew\n  ).length;\n\n  return {\n    localMemories,\n    searchQuery,\n    setSearchQuery,\n    filteredMemories,\n    totalLineCount,\n    canAddMemory,\n    handleAddMemory,\n    handleUpdateMemory,\n    handleRemoveMemory,\n    handleBlurMemory,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useOnMount.ts",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\n\n/**\n * Hook that tracks whether the component has mounted on the client.\n *\n * Useful for avoiding hydration mismatches in SSR/SSG environments where\n * certain browser-only APIs (e.g., `window`, `localStorage`) are unavailable\n * on the server. By checking `isMounted`, you can defer rendering of\n * client-only content until after hydration.\n *\n * @param f - Optional callback to execute once on mount. This allows you to\n *            run initialization logic (e.g., setting up event listeners,\n *            fetching initial data) without needing a separate `useEffect`\n *            in the consuming component.\n * @returns `true` after the component has mounted, `false` during SSR and\n *          initial render.\n *\n * @example\n * ```tsx\n * function MyComponent() {\n *   const isMounted = useOnMount(() => {\n *     console.log(\"Component mounted!\");\n *   });\n *\n *   if (!isMounted) return null; // or a loading skeleton\n *\n *   return <div>Client-only content using window.innerWidth</div>;\n * }\n * ```\n */\nexport default function useOnMount(f?: React.EffectCallback): boolean {\n  const [mounted, setMounted] = useState(false);\n\n  useEffect(() => {\n    setMounted(true);\n    return f?.();\n  }, []);\n\n  return mounted;\n}\n"
  },
  {
    "path": "web/src/hooks/useOpenApiTools.ts",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n/**\n * Fetches OpenAPI tools configuration.\n *\n * OpenAPI tools provide custom actions and integrations to agents through\n * OpenAPI specifications.\n *\n * @returns Object containing:\n *   - openApiTools: ToolSnapshot[] data or null if not loaded\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Any error that occurred during fetch\n *   - mutateOpenApiTools: Function to manually revalidate the data\n *\n * @example\n * const { openApiTools, isLoading } = useOpenApiTools();\n * if (isLoading) return <Spinner />;\n * return <OpenApiToolsList tools={openApiTools} />;\n */\nexport default function useOpenApiTools() {\n  const {\n    data: openApiTools,\n    error,\n    isLoading: isOpenApiLoading,\n    mutate: mutateOpenApiTools,\n  } = useSWR<ToolSnapshot[]>(SWR_KEYS.openApiTools, errorHandlingFetcher);\n\n  return {\n    openApiTools: openApiTools ?? null,\n    isLoading: isOpenApiLoading,\n    error,\n    mutateOpenApiTools,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/usePaginatedFetch.ts",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useState, useRef, useMemo } from \"react\";\nimport { usePathname, useRouter, useSearchParams } from \"next/navigation\";\nimport type { Route } from \"next\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\n// Any type that has an id property\ntype PaginatedType = {\n  id: number | string;\n  [key: string]: any;\n};\n\ninterface PaginatedApiResponse<T extends PaginatedType> {\n  items: T[];\n  total_items: number;\n}\n\ninterface PaginationConfig {\n  itemsPerPage: number;\n  pagesPerBatch: number;\n  endpoint: string;\n  query?: string;\n  filter?: Record<string, string | boolean | number | string[] | Date>;\n  refreshIntervalInMs?: number;\n}\n\ninterface PaginatedHookReturnData<T extends PaginatedType> {\n  currentPageData: T[] | null;\n  isLoading: boolean;\n  error: Error | null;\n  currentPage: number;\n  totalPages: number;\n  totalItems: number;\n  goToPage: (page: number) => void;\n  refresh: () => Promise<void>;\n}\n\nfunction usePaginatedFetch<T extends PaginatedType>({\n  itemsPerPage,\n  pagesPerBatch,\n  endpoint,\n  query,\n  filter,\n  refreshIntervalInMs = 5000,\n}: PaginationConfig): PaginatedHookReturnData<T> {\n  const router = useRouter();\n  const currentPath = usePathname();\n  const searchParams = useSearchParams();\n\n  // State to initialize and hold the current page number\n  const [currentPage, setCurrentPage] = useState(() =>\n    parseInt(searchParams?.get(\"page\") || \"1\", 10)\n  );\n  const [currentPageData, setCurrentPageData] = useState<T[] | null>(null);\n  const [error, setError] = useState<Error | null>(null);\n  const [isLoading, setIsLoading] = useState<boolean>(false);\n  const [totalItems, setTotalItems] = useState<number>(0);\n  const [cachedBatches, setCachedBatches] = useState<{ [key: number]: T[][] }>(\n    {}\n  );\n\n  // Tracks ongoing requests to avoid duplicate requests, uses ref to persist across renders\n  const ongoingRequestsRef = useRef<Set<number>>(new Set());\n\n  const totalPages = useMemo(() => {\n    if (totalItems === 0) return 1;\n    return Math.ceil(totalItems / itemsPerPage);\n  }, [totalItems, itemsPerPage]);\n\n  // Calculates which batch we're in, and which page within that batch\n  const batchAndPageIndices = useMemo(() => {\n    const batchNum = Math.floor((currentPage - 1) / pagesPerBatch);\n    const batchPageNum = (currentPage - 1) % pagesPerBatch;\n    return { batchNum, batchPageNum };\n  }, [currentPage, pagesPerBatch]);\n\n  // Fetches a batch of data and stores it in the cache\n  const fetchBatchData = useCallback(\n    async (batchNum: number) => {\n      // Prevents duplicate requests\n      if (ongoingRequestsRef.current.has(batchNum)) {\n        return;\n      }\n      ongoingRequestsRef.current.add(batchNum);\n\n      try {\n        // Build query params\n        const params = new URLSearchParams({\n          page_num: batchNum.toString(),\n          page_size: (pagesPerBatch * itemsPerPage).toString(),\n        });\n\n        if (query) params.set(\"q\", query);\n\n        if (filter) {\n          for (const [key, value] of Object.entries(filter)) {\n            if (Array.isArray(value)) {\n              value.forEach((str) => params.append(key, str));\n            } else {\n              params.set(key, value.toString());\n            }\n          }\n        }\n\n        const url = `${endpoint}?${params.toString()}`;\n        const responseData =\n          await errorHandlingFetcher<PaginatedApiResponse<T>>(url);\n\n        // Validate response data structure\n        if (\n          !Array.isArray(\n            responseData.items || typeof responseData.total_items !== \"number\"\n          )\n        ) {\n          throw new Error(\n            \"Sorry, we encountered an issue with the data format. Please try again or contact support if the problem persists.\"\n          );\n        }\n\n        setTotalItems(responseData.total_items);\n\n        // Splits a batch into pages\n        const pagesInBatch = Array.from({ length: pagesPerBatch }, (_, i) => {\n          const startIndex = i * itemsPerPage;\n          return responseData.items.slice(\n            startIndex,\n            startIndex + itemsPerPage\n          );\n        });\n\n        setCachedBatches((prev) => ({\n          ...prev,\n          [batchNum]: pagesInBatch,\n        }));\n      } catch (error) {\n        setError(error instanceof Error ? error : new Error(String(error)));\n      } finally {\n        ongoingRequestsRef.current.delete(batchNum);\n      }\n    },\n    [endpoint, pagesPerBatch, itemsPerPage, query, filter]\n  );\n\n  // Updates the URL with the current page number\n  const updatePageUrl = useCallback(\n    (page: number) => {\n      if (currentPath && searchParams) {\n        const params = new URLSearchParams(searchParams);\n        params.set(\"page\", page.toString());\n        router.replace(`${currentPath}?${params.toString()}` as Route, {\n          scroll: false,\n        });\n      }\n    },\n    [currentPath, router, searchParams]\n  );\n\n  // Updates the current page\n  const goToPage = useCallback(\n    (newPage: number) => {\n      setCurrentPage(newPage);\n      updatePageUrl(newPage);\n    },\n    [updatePageUrl]\n  );\n\n  // Loads the current and adjacent batches\n  useEffect(() => {\n    const { batchNum } = batchAndPageIndices;\n    const nextBatchNum = batchNum + 1;\n    const prevBatchNum = Math.max(batchNum - 1, 0);\n\n    if (!cachedBatches[batchNum]) {\n      setIsLoading(true);\n      fetchBatchData(batchNum);\n    }\n\n    // Possible total number of items including the next batch\n    const totalItemsIncludingNextBatch =\n      nextBatchNum * pagesPerBatch * itemsPerPage;\n    // Preload next batch if we're not on the last batch\n    if (\n      totalItemsIncludingNextBatch <= totalItems &&\n      !cachedBatches[nextBatchNum]\n    ) {\n      fetchBatchData(nextBatchNum);\n    }\n\n    // Load previous batch if missing\n    if (!cachedBatches[prevBatchNum]) {\n      fetchBatchData(prevBatchNum);\n    }\n\n    // Ensure first batch is always loaded\n    if (!cachedBatches[0]) {\n      fetchBatchData(0);\n    }\n  }, [currentPage, cachedBatches, totalPages, pagesPerBatch, fetchBatchData]);\n\n  // Updates current page data from the cache\n  useEffect(() => {\n    const { batchNum, batchPageNum } = batchAndPageIndices;\n\n    const cachedBatch = cachedBatches[batchNum];\n    if (cachedBatch !== undefined) {\n      const cachedBatchPage = cachedBatch[batchPageNum];\n      if (cachedBatchPage !== undefined) {\n        setCurrentPageData(cachedBatchPage);\n        setIsLoading(false);\n      }\n    }\n  }, [currentPage, cachedBatches, pagesPerBatch]);\n\n  // Implements periodic refresh\n  useEffect(() => {\n    if (!refreshIntervalInMs) return;\n\n    const interval = setInterval(() => {\n      const { batchNum } = batchAndPageIndices;\n      fetchBatchData(batchNum);\n    }, refreshIntervalInMs);\n\n    return () => clearInterval(interval);\n  }, [currentPage, pagesPerBatch, refreshIntervalInMs, fetchBatchData]);\n\n  // Manually refreshes the current batch\n  const refresh = useCallback(async () => {\n    const { batchNum } = batchAndPageIndices;\n    await fetchBatchData(batchNum);\n  }, [currentPage, pagesPerBatch, fetchBatchData]);\n\n  // Cache invalidation\n  useEffect(() => {\n    setCachedBatches({});\n    setTotalItems(0);\n    goToPage(1);\n    setError(null);\n  }, [currentPath, query, filter]);\n\n  return {\n    currentPage,\n    currentPageData,\n    totalPages,\n    totalItems,\n    goToPage,\n    refresh,\n    isLoading,\n    error,\n  };\n}\n\nexport default usePaginatedFetch;\n"
  },
  {
    "path": "web/src/hooks/usePromptShortcuts.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { InputPrompt } from \"@/app/app/interfaces\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport default function usePromptShortcuts() {\n  const { data, error, isLoading, mutate } = useSWR<InputPrompt[]>(\n    SWR_KEYS.promptShortcuts,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  const promptShortcuts = data ?? [];\n  const userPromptShortcuts = promptShortcuts.filter((p) => !p.is_public);\n  const activePromptShortcuts = promptShortcuts.filter((p) => p.active);\n\n  return {\n    promptShortcuts,\n    userPromptShortcuts,\n    activePromptShortcuts,\n    isLoading,\n    error,\n    refresh: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useScimToken.ts",
    "content": "import useSWR from \"swr\";\n\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport type { ScimTokenResponse } from \"@/app/admin/scim/interfaces\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useScimToken() {\n  const { data, error, isLoading, mutate } = useSWR<ScimTokenResponse>(\n    SWR_KEYS.scimToken,\n    errorHandlingFetcher,\n    { shouldRetryOnError: false }\n  );\n\n  return { data, error, isLoading, mutate };\n}\n"
  },
  {
    "path": "web/src/hooks/useScreenSize.ts",
    "content": "\"use client\";\n\nimport {\n  DESKTOP_SMALL_BREAKPOINT_PX,\n  DESKTOP_MEDIUM_BREAKPOINT_PX,\n  MOBILE_SIDEBAR_BREAKPOINT_PX,\n} from \"@/lib/constants\";\nimport { useState, useCallback } from \"react\";\nimport useOnMount from \"@/hooks/useOnMount\";\n\nexport interface ScreenSize {\n  height: number;\n  width: number;\n  isMobile: boolean;\n  isSmallScreen: boolean;\n  isMediumScreen: boolean;\n}\n\nexport default function useScreenSize(): ScreenSize {\n  const [sizes, setSizes] = useState(() => ({\n    width: typeof window !== \"undefined\" ? window.innerWidth : 0,\n    height: typeof window !== \"undefined\" ? window.innerHeight : 0,\n  }));\n\n  const handleResize = useCallback(() => {\n    setSizes({\n      width: window.innerWidth,\n      height: window.innerHeight,\n    });\n  }, []);\n\n  const isMounted = useOnMount(() => {\n    window.addEventListener(\"resize\", handleResize);\n    return () => window.removeEventListener(\"resize\", handleResize);\n  });\n\n  const isMobile = sizes.width <= MOBILE_SIDEBAR_BREAKPOINT_PX;\n  const isSmall = sizes.width <= DESKTOP_SMALL_BREAKPOINT_PX;\n  const isMedium = sizes.width <= DESKTOP_MEDIUM_BREAKPOINT_PX;\n\n  return {\n    height: sizes.height,\n    width: sizes.width,\n    isMobile: isMounted && isMobile,\n    isSmallScreen: isMounted && isSmall,\n    isMediumScreen: isMounted && isMedium,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useServerTools.ts",
    "content": "\"use client\";\n\nimport useSWR, { KeyedMutator } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport { MCPServer, MCPTool, ToolSnapshot } from \"@/lib/tools/interfaces\";\n\n/**\n * Return type for the useServerTools hook\n */\ninterface UseServerToolsReturn {\n  /** Array of tools available for the MCP server, formatted for UI display */\n  tools: MCPTool[];\n\n  /** Loading state - true when fetching tools from the API */\n  isLoading: boolean;\n\n  /** Error object if the fetch failed, undefined otherwise */\n  error: Error | undefined;\n\n  /** SWR mutate function for manually revalidating or updating the tools cache */\n  mutate: KeyedMutator<ToolSnapshot[]>;\n}\n\n/**\n * useServerTools\n *\n * A custom hook for lazily loading and managing tools for a specific MCP server.\n * This hook only fetches tools when the server is expanded, reducing unnecessary\n * API calls and improving performance.\n *\n * @param server - The MCP server object containing server metadata (id, url, name)\n * @param isExpanded - Boolean flag indicating whether the server card is expanded.\n *                     Tools are only fetched when this is true.\n *\n * @returns An object containing:\n *   - tools: Array of MCPTool objects formatted for UI display\n *   - isLoading: Boolean indicating if tools are currently being fetched\n *   - error: Error object if fetch failed\n *   - mutate: Function to manually revalidate or update the tools cache\n *\n * @example\n * ```tsx\n * function ServerCard({ server }) {\n *   const [isExpanded, setIsExpanded] = useState(false);\n *   const { tools, isLoading, error, mutate } = useServerTools(server, isExpanded);\n *\n *   if (isLoading) return <div>Loading tools...</div>;\n *   if (error) return <div>Failed to load tools</div>;\n *\n *   return (\n *     <div>\n *       <button onClick={() => setIsExpanded(!isExpanded)}>\n *         {isExpanded ? 'Collapse' : 'Expand'}\n *       </button>\n *       {isExpanded && tools.map(tool => (\n *         <ToolItem key={tool.id} {...tool} />\n *       ))}\n *     </div>\n *   );\n * }\n * ```\n *\n * @remarks\n * - Uses SWR for caching and automatic revalidation\n * - Automatically converts ToolSnapshot[] from API to MCPTool[] for UI\n * - Revalidation on focus and reconnect are disabled to reduce API calls\n * - The hook will not fetch if isExpanded is false (lazy loading)\n */\nexport default function useServerTools(\n  server: MCPServer,\n  isExpanded: boolean\n): UseServerToolsReturn {\n  const shouldFetch = isExpanded;\n\n  const {\n    data: toolsData,\n    isLoading,\n    error,\n    mutate,\n  } = useSWR<ToolSnapshot[]>(\n    shouldFetch\n      ? `/api/admin/mcp/server/${server.id}/tools/snapshots?source=db`\n      : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n    }\n  );\n\n  // Convert ToolSnapshot[] to MCPTool[] format for UI consumption\n  const tools: MCPTool[] = toolsData\n    ? toolsData.map((tool) => ({\n        id: tool.id.toString(),\n        icon: getActionIcon(server.server_url, server.name),\n        name: tool.display_name || tool.name,\n        description: tool.description,\n        isAvailable: true,\n        isEnabled: tool.enabled,\n      }))\n    : [];\n\n  return {\n    tools,\n    isLoading: isLoading && shouldFetch,\n    error,\n    mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useSettings.test.ts",
    "content": "import useSWR from \"swr\";\nimport {\n  useSettings,\n  useEnterpriseSettings,\n  useCustomAnalyticsScript,\n} from \"@/hooks/useSettings\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { ApplicationStatus, QueryHistoryType } from \"@/interfaces/settings\";\n\njest.mock(\"swr\", () => ({\n  __esModule: true,\n  default: jest.fn(),\n}));\n\njest.mock(\"@/lib/fetcher\", () => ({\n  errorHandlingFetcher: jest.fn(),\n}));\n\njest.mock(\"@/lib/constants\", () => ({\n  EE_ENABLED: false,\n}));\n\nconst mockUseSWR = useSWR as jest.MockedFunction<typeof useSWR>;\n\ndescribe(\"useSettings\", () => {\n  beforeEach(() => {\n    mockUseSWR.mockReset();\n  });\n\n  test(\"returns DEFAULT_SETTINGS when SWR data is undefined\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: true,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useSettings();\n\n    expect(result.settings).toEqual({\n      auto_scroll: true,\n      application_status: ApplicationStatus.ACTIVE,\n      gpu_enabled: false,\n      maximum_chat_retention_days: null,\n      notifications: [],\n      needs_reindexing: false,\n      anonymous_user_enabled: false,\n      invite_only_enabled: false,\n      deep_research_enabled: true,\n      temperature_override_enabled: true,\n      query_history_type: QueryHistoryType.NORMAL,\n    });\n    expect(result.isLoading).toBe(true);\n  });\n\n  test(\"returns fetched settings when SWR has data\", () => {\n    const mockSettings = {\n      auto_scroll: false,\n      application_status: ApplicationStatus.ACTIVE,\n      gpu_enabled: true,\n      maximum_chat_retention_days: 30,\n      notifications: [],\n      needs_reindexing: false,\n      anonymous_user_enabled: false,\n      invite_only_enabled: false,\n      deep_research_enabled: true,\n      temperature_override_enabled: true,\n      query_history_type: QueryHistoryType.NORMAL,\n    };\n\n    mockUseSWR.mockReturnValue({\n      data: mockSettings,\n      error: undefined,\n      isLoading: false,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useSettings();\n\n    expect(result.settings).toBe(mockSettings);\n    expect(result.isLoading).toBe(false);\n    expect(result.error).toBeUndefined();\n  });\n\n  test(\"fetches from /api/settings with correct SWR config\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: true,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    useSettings();\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      \"/api/settings\",\n      errorHandlingFetcher,\n      expect.objectContaining({\n        revalidateOnFocus: false,\n        revalidateOnReconnect: false,\n        dedupingInterval: 30_000,\n        errorRetryInterval: 5_000,\n      })\n    );\n  });\n});\n\ndescribe(\"useEnterpriseSettings\", () => {\n  beforeEach(() => {\n    mockUseSWR.mockReset();\n  });\n\n  test(\"passes null key when EE is disabled at both build and runtime\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: false,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useEnterpriseSettings(false);\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      null,\n      errorHandlingFetcher,\n      expect.any(Object)\n    );\n    expect(result.enterpriseSettings).toBeNull();\n    expect(result.isLoading).toBe(false);\n  });\n\n  test(\"fetches from /api/enterprise-settings when runtime EE is enabled\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: true,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    useEnterpriseSettings(true);\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      \"/api/enterprise-settings\",\n      errorHandlingFetcher,\n      expect.any(Object)\n    );\n  });\n\n  test(\"uses referential equality for compare to ensure logo cache-busters update\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: true,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    useEnterpriseSettings(true);\n\n    const swrConfig = mockUseSWR.mock.calls[0]![2] as any;\n    expect(swrConfig.compare).toBeDefined();\n\n    // Same reference should be equal\n    const obj = { use_custom_logo: true };\n    expect(swrConfig.compare(obj, obj)).toBe(true);\n\n    // Different references with same values should NOT be equal\n    // (this is the key behavior — SWR's default deep compare would return true)\n    const a = { use_custom_logo: true };\n    const b = { use_custom_logo: true };\n    expect(swrConfig.compare(a, b)).toBe(false);\n  });\n\n  test(\"returns enterprise settings when SWR has data\", () => {\n    const mockEnterprise = {\n      application_name: \"Acme Corp\",\n      use_custom_logo: true,\n    };\n\n    mockUseSWR.mockReturnValue({\n      data: mockEnterprise,\n      error: undefined,\n      isLoading: false,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useEnterpriseSettings(true);\n\n    expect(result.enterpriseSettings).toBe(mockEnterprise);\n    expect(result.isLoading).toBe(false);\n  });\n});\n\ndescribe(\"useCustomAnalyticsScript\", () => {\n  beforeEach(() => {\n    mockUseSWR.mockReset();\n  });\n\n  test(\"returns null when EE is disabled\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      isLoading: false,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useCustomAnalyticsScript(false);\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      null,\n      errorHandlingFetcher,\n      expect.any(Object)\n    );\n    expect(result).toBeNull();\n  });\n\n  test(\"returns script content when available\", () => {\n    const script = \"console.log('analytics');\";\n    mockUseSWR.mockReturnValue({\n      data: script,\n      error: undefined,\n      isLoading: false,\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useCustomAnalyticsScript(true);\n\n    expect(result).toBe(script);\n  });\n});\n"
  },
  {
    "path": "web/src/hooks/useSettings.ts",
    "content": "import useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport {\n  Settings,\n  EnterpriseSettings,\n  ApplicationStatus,\n  QueryHistoryType,\n} from \"@/interfaces/settings\";\nimport { EE_ENABLED } from \"@/lib/constants\";\n\n// Longer retry delay for critical settings fetches — avoids rapid error→success\n// flicker in the SettingsProvider error boundary when there's a transient blip.\nconst SETTINGS_ERROR_RETRY_INTERVAL = 5_000;\n\nconst DEFAULT_SETTINGS = {\n  auto_scroll: true,\n  application_status: ApplicationStatus.ACTIVE,\n  gpu_enabled: false,\n  maximum_chat_retention_days: null,\n  notifications: [],\n  needs_reindexing: false,\n  anonymous_user_enabled: false,\n  invite_only_enabled: false,\n  deep_research_enabled: true,\n  temperature_override_enabled: true,\n  query_history_type: QueryHistoryType.NORMAL,\n} satisfies Settings;\n\nexport function useSettings(): {\n  settings: Settings;\n  isLoading: boolean;\n  error: Error | undefined;\n} {\n  const { data, error, isLoading } = useSWR<Settings>(\n    SWR_KEYS.settings,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30_000,\n      errorRetryInterval: SETTINGS_ERROR_RETRY_INTERVAL,\n    }\n  );\n\n  return {\n    settings: data ?? DEFAULT_SETTINGS,\n    isLoading,\n    error,\n  };\n}\n\nexport function useEnterpriseSettings(eeEnabledRuntime: boolean): {\n  enterpriseSettings: EnterpriseSettings | null;\n  isLoading: boolean;\n  error: Error | undefined;\n} {\n  // Gate on the build-time flag OR the runtime ee_features_enabled from\n  // /api/settings. The build-time flag (NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES)\n  // may be unset even when the server enables EE via LICENSE_ENFORCEMENT_ENABLED,\n  // so the runtime check is needed as a fallback.\n  const shouldFetch = EE_ENABLED || eeEnabledRuntime;\n\n  const { data, error, isLoading } = useSWR<EnterpriseSettings>(\n    shouldFetch ? SWR_KEYS.enterpriseSettings : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30_000,\n      errorRetryInterval: SETTINGS_ERROR_RETRY_INTERVAL,\n      // Referential equality instead of SWR's default deep comparison.\n      // The logo image can change without the settings JSON changing\n      // (same use_custom_logo: true), so we need every mutate() call\n      // to propagate a new reference so cache-busters recalculate.\n      compare: (a, b) => a === b,\n    }\n  );\n\n  return {\n    enterpriseSettings: data ?? null,\n    isLoading: shouldFetch ? isLoading : false,\n    error,\n  };\n}\n\nexport function useCustomAnalyticsScript(\n  eeEnabledRuntime: boolean\n): string | null {\n  const shouldFetch = EE_ENABLED || eeEnabledRuntime;\n\n  const { data } = useSWR<string>(\n    shouldFetch ? SWR_KEYS.customAnalyticsScript : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateOnReconnect: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60_000,\n    }\n  );\n\n  return data ?? null;\n}\n"
  },
  {
    "path": "web/src/hooks/useShareableGroups.ts",
    "content": "\"use client\";\n\nimport useSWR, { mutate } from \"swr\";\nimport { useContext } from \"react\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport interface MinimalUserGroupSnapshot {\n  id: number;\n  name: string;\n}\n\n// TODO (@raunakab):\n// Refactor this hook to live inside of a special `ee` directory.\n\nexport default function useShareableGroups() {\n  const combinedSettings = useContext(SettingsContext);\n  const settingsLoading = combinedSettings?.settingsLoading ?? false;\n  const isPaidEnterpriseFeaturesEnabled =\n    !settingsLoading &&\n    combinedSettings &&\n    combinedSettings.enterpriseSettings !== null;\n\n  const { data, error, isLoading } = useSWR<MinimalUserGroupSnapshot[]>(\n    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.shareableGroups : null,\n    errorHandlingFetcher\n  );\n\n  const refreshShareableGroups = () => mutate(SWR_KEYS.shareableGroups);\n\n  if (settingsLoading) {\n    return {\n      data: undefined,\n      isLoading: true,\n      error: undefined,\n      refreshShareableGroups,\n    };\n  }\n\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return {\n      data: [],\n      isLoading: false,\n      error: undefined,\n      refreshShareableGroups,\n    };\n  }\n\n  return {\n    data,\n    isLoading,\n    error,\n    refreshShareableGroups,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useShareableUsers.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { MinimalUserSnapshot } from \"@/lib/types\";\n\nexport interface UseShareableUsersParams {\n  includeApiKeys: boolean;\n}\n\nexport default function useShareableUsers({\n  includeApiKeys,\n}: UseShareableUsersParams) {\n  const { data, error, mutate, isLoading } = useSWR<MinimalUserSnapshot[]>(\n    `/api/users?include_api_keys=${includeApiKeys}`,\n    errorHandlingFetcher\n  );\n\n  return {\n    data,\n    isLoading,\n    error,\n    refreshShareableUsers: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useShowOnboarding.ts",
    "content": "\"use client\";\n\nimport { useReducer, useCallback, useEffect, useRef, useState } from \"react\";\nimport { onboardingReducer, initialState } from \"@/sections/onboarding/reducer\";\nimport {\n  OnboardingActions,\n  OnboardingActionType,\n  OnboardingData,\n  OnboardingState,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport { WellKnownLLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { updateUserPersonalization } from \"@/lib/userSettings\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { useProviderStatus } from \"@/components/chat/ProviderContext\";\n\nfunction getOnboardingCompletedKey(userId: string): string {\n  return `onyx:onboardingCompleted:${userId}`;\n}\n\nfunction useOnboardingState(liveAgent?: MinimalPersonaSnapshot): {\n  state: OnboardingState;\n  llmDescriptors: WellKnownLLMProviderDescriptor[];\n  actions: OnboardingActions;\n  isLoading: boolean;\n  hasProviders: boolean;\n} {\n  const [state, dispatch] = useReducer(onboardingReducer, initialState);\n  const { user, refreshUser } = useUser();\n\n  // Get provider data from ProviderContext instead of duplicating the call\n  const {\n    llmProviders,\n    isLoadingProviders,\n    hasProviders: hasLlmProviders,\n    providerOptions,\n    refreshProviderInfo,\n  } = useProviderStatus();\n\n  // Only fetch persona-specific providers (different endpoint)\n  const { refetch: refreshPersonaProviders } = useLLMProviders(liveAgent?.id);\n\n  const userName = user?.personalization?.name;\n  const llmDescriptors = providerOptions;\n\n  const nameUpdateTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(\n    null\n  );\n  const hasInitializedForUserRef = useRef<string | undefined>(undefined);\n\n  // Initialize onboarding to the earliest incomplete step — runs once per user\n  // after both user data and provider data have loaded.  After initialization,\n  // user actions (Next / Prev / goToStep) drive navigation; the effect never\n  // re-runs so it cannot override user-driven state (e.g. button active).\n  useEffect(() => {\n    if (\n      isLoadingProviders ||\n      !user ||\n      hasInitializedForUserRef.current === user.id\n    ) {\n      return;\n    }\n    hasInitializedForUserRef.current = user.id;\n\n    // Pre-populate state with existing data\n    if (userName) {\n      dispatch({\n        type: OnboardingActionType.UPDATE_DATA,\n        payload: { userName },\n      });\n    }\n    dispatch({\n      type: OnboardingActionType.UPDATE_DATA,\n      payload: { llmProviders: (llmProviders ?? []).map((p) => p.provider) },\n    });\n\n    // Determine the earliest incomplete step\n    // Name step is incomplete if userName is not set\n    if (!userName) {\n      // Stay at Welcome/Name step (no dispatch needed, this is the initial state)\n      return;\n    }\n\n    // LlmSetup step is incomplete if no LLM providers are configured\n    if (!hasLlmProviders) {\n      dispatch({\n        type: OnboardingActionType.SET_BUTTON_ACTIVE,\n        isButtonActive: false,\n      });\n      dispatch({\n        type: OnboardingActionType.GO_TO_STEP,\n        step: OnboardingStep.LlmSetup,\n      });\n      return;\n    }\n\n    // All steps complete - go to Complete step\n    dispatch({\n      type: OnboardingActionType.SET_BUTTON_ACTIVE,\n      isButtonActive: true,\n    });\n    dispatch({\n      type: OnboardingActionType.GO_TO_STEP,\n      step: OnboardingStep.Complete,\n    });\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [llmProviders, isLoadingProviders, userName, hasLlmProviders, user]);\n\n  const nextStep = useCallback(() => {\n    dispatch({\n      type: OnboardingActionType.SET_BUTTON_ACTIVE,\n      isButtonActive: false,\n    });\n\n    if (state.currentStep === OnboardingStep.Name) {\n      const hasProviders = (state.data.llmProviders?.length ?? 0) > 0;\n      if (hasProviders) {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: true,\n        });\n      } else {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: false,\n        });\n      }\n    }\n\n    if (state.currentStep === OnboardingStep.LlmSetup) {\n      refreshProviderInfo();\n      if (liveAgent) {\n        refreshPersonaProviders();\n      }\n    }\n    dispatch({ type: OnboardingActionType.NEXT_STEP });\n  }, [state, refreshProviderInfo, refreshPersonaProviders, liveAgent]);\n\n  const prevStep = useCallback(() => {\n    dispatch({ type: OnboardingActionType.PREV_STEP });\n  }, []);\n\n  const goToStep = useCallback(\n    (step: OnboardingStep) => {\n      const hasProviders = (state.data.llmProviders?.length ?? 0) > 0;\n      if (step === OnboardingStep.LlmSetup && hasProviders) {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: true,\n        });\n      } else if (step === OnboardingStep.LlmSetup) {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: false,\n        });\n      }\n      dispatch({ type: OnboardingActionType.GO_TO_STEP, step });\n    },\n    [state]\n  );\n\n  const updateName = useCallback(\n    (name: string) => {\n      dispatch({\n        type: OnboardingActionType.UPDATE_DATA,\n        payload: { userName: name },\n      });\n\n      if (nameUpdateTimeoutRef.current) {\n        clearTimeout(nameUpdateTimeoutRef.current);\n      }\n\n      if (name === \"\") {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: false,\n        });\n      } else {\n        dispatch({\n          type: OnboardingActionType.SET_BUTTON_ACTIVE,\n          isButtonActive: true,\n        });\n      }\n\n      nameUpdateTimeoutRef.current = setTimeout(async () => {\n        try {\n          await updateUserPersonalization({ name });\n          await refreshUser();\n        } catch (_e) {\n          dispatch({\n            type: OnboardingActionType.SET_BUTTON_ACTIVE,\n            isButtonActive: false,\n          });\n          console.error(\"Error updating user name:\", _e);\n        } finally {\n          nameUpdateTimeoutRef.current = null;\n        }\n      }, 500);\n    },\n    [refreshUser]\n  );\n\n  const updateData = useCallback((data: Partial<OnboardingData>) => {\n    dispatch({ type: OnboardingActionType.UPDATE_DATA, payload: data });\n  }, []);\n\n  const setLoading = useCallback((isLoading: boolean) => {\n    dispatch({ type: OnboardingActionType.SET_LOADING, isLoading });\n  }, []);\n\n  const setButtonActive = useCallback((active: boolean) => {\n    dispatch({\n      type: OnboardingActionType.SET_BUTTON_ACTIVE,\n      isButtonActive: active,\n    });\n  }, []);\n\n  const setError = useCallback((error: string | undefined) => {\n    dispatch({ type: OnboardingActionType.SET_ERROR, error });\n  }, []);\n\n  const reset = useCallback(() => {\n    dispatch({ type: OnboardingActionType.RESET });\n  }, []);\n\n  useEffect(() => {\n    return () => {\n      if (nameUpdateTimeoutRef.current) {\n        clearTimeout(nameUpdateTimeoutRef.current);\n      }\n    };\n  }, []);\n\n  return {\n    state,\n    llmDescriptors,\n    actions: {\n      nextStep,\n      prevStep,\n      goToStep,\n      setButtonActive,\n      updateName,\n      updateData,\n      setLoading,\n      setError,\n      reset,\n    },\n    isLoading: isLoadingProviders,\n    hasProviders: hasLlmProviders,\n  };\n}\n\ninterface UseShowOnboardingParams {\n  liveAgent: MinimalPersonaSnapshot | undefined;\n  isLoadingChatSessions: boolean;\n  chatSessionsCount: number;\n  userId: string | undefined;\n}\n\nexport function useShowOnboarding({\n  liveAgent,\n  isLoadingChatSessions,\n  chatSessionsCount,\n  userId,\n}: UseShowOnboardingParams) {\n  const [showOnboarding, setShowOnboarding] = useState(false);\n  const [onboardingDismissed, setOnboardingDismissed] = useState(false);\n\n  // Read localStorage once userId is available to check if onboarding was dismissed\n  useEffect(() => {\n    if (userId === undefined) return;\n    const dismissed =\n      localStorage.getItem(getOnboardingCompletedKey(userId)) === \"true\";\n    setOnboardingDismissed(dismissed);\n  }, [userId]);\n\n  // Initialize onboarding state — single source of truth for provider data\n  const {\n    state: onboardingState,\n    actions: onboardingActions,\n    llmDescriptors,\n    isLoading: isLoadingOnboarding,\n    hasProviders: hasAnyProvider,\n  } = useOnboardingState(liveAgent);\n\n  const isLoadingProviders = isLoadingOnboarding;\n\n  // Track which user we've already evaluated onboarding for.\n  // Re-check when userId changes (logout/login, account switching without full reload).\n  const hasCheckedOnboardingForUserId = useRef<string | undefined>(undefined);\n\n  // Evaluate onboarding once per user after data loads.\n  // Show onboarding only if no LLM providers are configured.\n  // Skip entirely if user has existing chat sessions.\n  useEffect(() => {\n    // If onboarding was previously dismissed, never show it again\n    if (onboardingDismissed) {\n      setShowOnboarding(false);\n      return;\n    }\n\n    // Wait for data to load\n    if (isLoadingProviders || isLoadingChatSessions || userId === undefined) {\n      return;\n    }\n\n    // Only check once per user — but allow self-correction from true→false\n    // when provider data arrives (e.g. after a transient fetch error).\n    if (hasCheckedOnboardingForUserId.current === userId) {\n      if (showOnboarding && hasAnyProvider && onboardingState.stepIndex === 0) {\n        setShowOnboarding(false);\n      }\n      return;\n    }\n    hasCheckedOnboardingForUserId.current = userId;\n\n    // Skip onboarding if user has any chat sessions\n    if (chatSessionsCount > 0) {\n      setShowOnboarding(false);\n      return;\n    }\n\n    // Show onboarding if no LLM providers are configured.\n    setShowOnboarding(hasAnyProvider === false);\n  }, [\n    isLoadingProviders,\n    isLoadingChatSessions,\n    hasAnyProvider,\n    chatSessionsCount,\n    userId,\n    showOnboarding,\n    onboardingDismissed,\n    onboardingState.stepIndex,\n  ]);\n\n  const dismissOnboarding = useCallback(() => {\n    if (userId === undefined) return;\n    setShowOnboarding(false);\n    setOnboardingDismissed(true);\n    localStorage.setItem(getOnboardingCompletedKey(userId), \"true\");\n  }, [userId]);\n\n  const hideOnboarding = dismissOnboarding;\n  const finishOnboarding = dismissOnboarding;\n\n  return {\n    showOnboarding,\n    onboardingDismissed,\n    onboardingState,\n    onboardingActions,\n    llmDescriptors,\n    isLoadingOnboarding,\n    hideOnboarding,\n    finishOnboarding,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useTags.ts",
    "content": "import useSWR from \"swr\";\nimport { Tag } from \"@/lib/types\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\ninterface TagsResponse {\n  tags: Tag[];\n}\n\n/**\n * Fetches the set of valid tags from the server.\n *\n * Tags are deduplicated for 60 s and not re-fetched on window focus.\n *\n * @returns tags - The array of available {@link Tag} objects (empty while loading).\n * @returns isLoading - `true` until the first successful fetch or an error.\n * @returns error - The error object if the request failed.\n * @returns refresh - SWR mutate function to manually re-fetch.\n */\nexport default function useTags() {\n  const { data, error, mutate } = useSWR<TagsResponse>(\n    SWR_KEYS.tags,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    tags: data?.tags ?? [],\n    isLoading: !error && !data,\n    error,\n    refresh: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useToast.ts",
    "content": "import { useEffect, useSyncExternalStore } from \"react\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport type ToastLevel = \"success\" | \"error\" | \"warning\" | \"info\" | \"default\";\n\nexport interface ToastOptions {\n  message: string;\n  level?: ToastLevel;\n  description?: string;\n  duration?: number; // ms – default 4000, Infinity = persistent\n  dismissible?: boolean; // default true (shows close button)\n  actionLabel?: string;\n  onAction?: () => void;\n}\n\nexport interface Toast extends ToastOptions {\n  id: string;\n  createdAt: number;\n  leaving?: boolean; // true while exit‑animation plays\n}\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nexport const MAX_VISIBLE_TOASTS = 3;\nconst DEFAULT_DURATION = 4000;\n\n// ---------------------------------------------------------------------------\n// Module‑level store (external to React)\n// ---------------------------------------------------------------------------\n\nlet toasts: Toast[] = [];\nconst subscribers = new Set<() => void>();\nconst timers = new Map<string, ReturnType<typeof setTimeout>>();\n\nlet nextId = 0;\n\nfunction notify() {\n  subscribers.forEach((cb) => cb());\n}\n\nfunction addToast(options: ToastOptions): string {\n  const id = `toast-${++nextId}-${Date.now()}`;\n  const duration = options.duration ?? DEFAULT_DURATION;\n\n  const entry: Toast = {\n    ...options,\n    id,\n    level: options.level ?? \"info\",\n    dismissible: options.dismissible ?? true,\n    createdAt: Date.now(),\n  };\n\n  toasts = [...toasts, entry];\n  notify();\n\n  if (duration !== Infinity) {\n    const timer = setTimeout(() => {\n      removeToast(id);\n    }, duration);\n    timers.set(id, timer);\n  }\n\n  return id;\n}\n\nfunction removeToast(id: string): void {\n  const timer = timers.get(id);\n  if (timer) {\n    clearTimeout(timer);\n    timers.delete(id);\n  }\n  toasts = toasts.filter((t) => t.id !== id);\n  notify();\n}\n\nfunction markLeaving(id: string): void {\n  toasts = toasts.map((t) => (t.id === id ? { ...t, leaving: true } : t));\n  notify();\n}\n\nfunction clearAll(): void {\n  timers.forEach((timer) => clearTimeout(timer));\n  timers.clear();\n  toasts = [];\n  notify();\n}\n\nfunction subscribe(cb: () => void): () => void {\n  subscribers.add(cb);\n  return () => {\n    subscribers.delete(cb);\n  };\n}\n\nfunction getSnapshot(): Toast[] {\n  return toasts;\n}\n\n// ---------------------------------------------------------------------------\n// Imperative API (works anywhere – components, hooks, plain .ts files)\n// ---------------------------------------------------------------------------\n\ninterface ToastFn {\n  (options: ToastOptions): string;\n  success: (\n    message: string,\n    opts?: Omit<ToastOptions, \"message\" | \"level\">\n  ) => string;\n  error: (\n    message: string,\n    opts?: Omit<ToastOptions, \"message\" | \"level\">\n  ) => string;\n  warning: (\n    message: string,\n    opts?: Omit<ToastOptions, \"message\" | \"level\">\n  ) => string;\n  info: (\n    message: string,\n    opts?: Omit<ToastOptions, \"message\" | \"level\">\n  ) => string;\n  dismiss: (id: string) => void;\n  clearAll: () => void;\n  /** @internal – used by ToastContainer for exit animation */\n  _markLeaving: (id: string) => void;\n}\n\nfunction toastBase(options: ToastOptions): string {\n  return addToast(options);\n}\n\nexport const toast: ToastFn = Object.assign(toastBase, {\n  success: (message: string, opts?: Omit<ToastOptions, \"message\" | \"level\">) =>\n    addToast({ ...opts, message, level: \"success\" }),\n  error: (message: string, opts?: Omit<ToastOptions, \"message\" | \"level\">) =>\n    addToast({ ...opts, message, level: \"error\" }),\n  warning: (message: string, opts?: Omit<ToastOptions, \"message\" | \"level\">) =>\n    addToast({ ...opts, message, level: \"warning\" }),\n  info: (message: string, opts?: Omit<ToastOptions, \"message\" | \"level\">) =>\n    addToast({ ...opts, message, level: \"info\" }),\n  dismiss: removeToast,\n  clearAll,\n  _markLeaving: markLeaving,\n});\n\n// ---------------------------------------------------------------------------\n// React hook (convenience wrapper)\n// ---------------------------------------------------------------------------\n\nexport function useToast() {\n  useSyncExternalStore(subscribe, getSnapshot, getSnapshot);\n  return { toast, dismiss: toast.dismiss, clearAll: toast.clearAll };\n}\n\n// ---------------------------------------------------------------------------\n// Query-param toast hook\n// ---------------------------------------------------------------------------\n\ninterface ToastFromQueryMessages {\n  [key: string]: {\n    message: string;\n    type?: ToastLevel | null;\n  };\n}\n\n/**\n * Reads a `?message=<key>` query param on mount, fires the matching toast,\n * and strips the param from the URL.\n */\nexport function useToastFromQuery(messages: ToastFromQueryMessages) {\n  useEffect(() => {\n    const searchParams = new URLSearchParams(window.location.search);\n    const messageValue = searchParams?.get(\"message\");\n\n    if (messageValue && messageValue in messages) {\n      searchParams.delete(\"message\");\n      const newSearch = searchParams.toString()\n        ? \"?\" + searchParams.toString()\n        : \"\";\n      window.history.replaceState(\n        null,\n        \"\",\n        window.location.pathname + newSearch\n      );\n      const spec = messages[messageValue];\n      if (spec !== undefined) {\n        toast({\n          message: spec.message,\n          level: spec.type ?? \"info\",\n        });\n      }\n    }\n  }, []);\n}\n\n// ---------------------------------------------------------------------------\n// Store accessors (used by ToastContainer)\n// ---------------------------------------------------------------------------\n\nexport const toastStore = {\n  subscribe,\n  getSnapshot,\n};\n"
  },
  {
    "path": "web/src/hooks/useTokenRefresh.ts",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef } from \"react\";\nimport { User } from \"@/lib/types\";\nimport { NO_AUTH_USER_ID } from \"@/lib/extension/constants\";\nimport { AuthTypeMetadata } from \"@/hooks/useAuthTypeMetadata\";\nimport { AuthType } from \"@/lib/constants\";\n\n// Refresh token every 10 minutes (600000ms)\n// This is shorter than the session expiry time to ensure tokens stay valid\nconst REFRESH_INTERVAL = 600000;\n\n//  Custom hook for handling JWT token refresh for current user\nexport function useTokenRefresh(\n  user: User | null,\n  authTypeMetadata: AuthTypeMetadata,\n  onRefreshFail: () => Promise<void>\n) {\n  // Track last refresh time to avoid unnecessary calls\n  const [lastTokenRefresh, setLastTokenRefresh] = useState<number>(Date.now());\n\n  // Use a ref to track first load\n  const isFirstLoad = useRef(true);\n\n  useEffect(() => {\n    if (\n      !user ||\n      user.id === NO_AUTH_USER_ID ||\n      authTypeMetadata.authType === AuthType.OIDC ||\n      authTypeMetadata.authType === AuthType.SAML\n    )\n      return;\n\n    const refreshTokenPeriodically = async () => {\n      try {\n        // Skip time check if this is first load - we always refresh on first load\n        const isTimeToRefresh =\n          isFirstLoad.current ||\n          Date.now() - lastTokenRefresh > REFRESH_INTERVAL - 60000;\n\n        if (!isTimeToRefresh) {\n          return;\n        }\n\n        // Reset first load flag\n        if (isFirstLoad.current) {\n          isFirstLoad.current = false;\n        }\n\n        const response = await fetch(\"/api/auth/refresh\", {\n          method: \"POST\",\n          credentials: \"include\",\n        });\n\n        if (response.ok) {\n          // Update last refresh time on success\n          setLastTokenRefresh(Date.now());\n          console.debug(\"Auth token refreshed successfully\");\n        } else {\n          console.warn(\"Failed to refresh auth token:\", response.status);\n          // If token refresh fails, try to get current user info\n          await onRefreshFail();\n        }\n      } catch (error) {\n        console.error(\"Error refreshing auth token:\", error);\n      }\n    };\n\n    // Always attempt to refresh on first component mount\n    // This helps ensure tokens are fresh, especially after browser refresh\n    refreshTokenPeriodically();\n\n    // Set up interval for periodic refreshes\n    const intervalId = setInterval(refreshTokenPeriodically, REFRESH_INTERVAL);\n\n    // Also refresh token on window focus, but no more than once per minute\n    const handleVisibilityChange = () => {\n      if (\n        document.visibilityState === \"visible\" &&\n        Date.now() - lastTokenRefresh > 60000\n      ) {\n        refreshTokenPeriodically();\n      }\n    };\n\n    document.addEventListener(\"visibilitychange\", handleVisibilityChange);\n\n    return () => {\n      clearInterval(intervalId);\n      document.removeEventListener(\"visibilitychange\", handleVisibilityChange);\n    };\n  }, [user, lastTokenRefresh, onRefreshFail]);\n\n  return { lastTokenRefresh };\n}\n"
  },
  {
    "path": "web/src/hooks/useUserCounts.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport type { InvitedUserSnapshot } from \"@/lib/types\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport type { StatusCountMap } from \"@/refresh-pages/admin/UsersPage/interfaces\";\n\ntype UserCountsResponse = {\n  role_counts: Record<string, number>;\n  status_counts: Record<string, number>;\n};\n\ntype UserCounts = {\n  activeCount: number | null;\n  invitedCount: number | null;\n  pendingCount: number | null;\n  roleCounts: Record<string, number>;\n  statusCounts: StatusCountMap;\n  refreshCounts: () => void;\n};\n\nexport default function useUserCounts(): UserCounts {\n  const { data: countsData, mutate: refreshCounts } =\n    useSWR<UserCountsResponse>(SWR_KEYS.userCounts, errorHandlingFetcher);\n\n  const { data: invitedUsers } = useSWR<InvitedUserSnapshot[]>(\n    SWR_KEYS.invitedUsers,\n    errorHandlingFetcher\n  );\n\n  const { data: pendingUsers } = useSWR<InvitedUserSnapshot[]>(\n    NEXT_PUBLIC_CLOUD_ENABLED ? SWR_KEYS.pendingTenantUsers : null,\n    errorHandlingFetcher\n  );\n\n  const activeCount = countsData?.status_counts?.active ?? null;\n  const inactiveCount = countsData?.status_counts?.inactive ?? null;\n\n  return {\n    activeCount,\n    invitedCount: invitedUsers?.length ?? null,\n    pendingCount: pendingUsers?.length ?? null,\n    roleCounts: countsData?.role_counts ?? {},\n    statusCounts: {\n      ...(activeCount !== null ? { active: activeCount } : {}),\n      ...(inactiveCount !== null ? { inactive: inactiveCount } : {}),\n      ...(invitedUsers ? { invited: invitedUsers.length } : {}),\n      ...(pendingUsers ? { requested: pendingUsers.length } : {}),\n    } satisfies StatusCountMap,\n    refreshCounts,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useUserPersonalization.ts",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { MemoryItem, User, UserPersonalization } from \"@/lib/types\";\n\nconst DEFAULT_PERSONALIZATION: UserPersonalization = {\n  name: \"\",\n  role: \"\",\n  memories: [],\n  use_memories: true,\n  enable_memory_tool: true,\n  user_preferences: \"\",\n};\n\nfunction derivePersonalizationFromUser(user: User | null): UserPersonalization {\n  if (!user?.personalization) {\n    return { ...DEFAULT_PERSONALIZATION, memories: [] };\n  }\n\n  return {\n    name: user.personalization.name ?? \"\",\n    role: user.personalization.role ?? \"\",\n    memories: [...(user.personalization.memories ?? [])],\n    use_memories:\n      user.personalization.use_memories ?? DEFAULT_PERSONALIZATION.use_memories,\n    enable_memory_tool:\n      user.personalization.enable_memory_tool ??\n      DEFAULT_PERSONALIZATION.enable_memory_tool,\n    user_preferences: user.personalization.user_preferences ?? \"\",\n  };\n}\n\ninterface UseUserPersonalizationOptions {\n  onSuccess?: (personalization: UserPersonalization) => void;\n  onError?: (error: unknown) => void;\n}\n\n/**\n * Hook for managing user personalization settings\n *\n * Handles user personalization data including name, role, and memories.\n * Provides state management and persistence for personalization fields with\n * optimistic updates and error handling.\n *\n * @param user - The current user object containing personalization data\n * @param persistPersonalization - Async function to persist personalization changes to the server\n * @param options - Optional callbacks for success and error handling\n * @param options.onSuccess - Callback invoked when personalization is successfully saved\n * @param options.onError - Callback invoked when personalization save fails\n * @returns Object containing personalization state and handler functions\n *\n * @example\n * ```tsx\n * import useUserPersonalization from \"@/hooks/useUserPersonalization\";\n * import { useUser } from \"@/providers/UserProvider\";\n *\n * function PersonalizationSettings() {\n *   const { user, updateUserPersonalization } = useUser();\n *   const {\n *     personalizationValues,\n *     updatePersonalizationField,\n *     toggleUseMemories,\n *     updateMemoryAtIndex,\n *     addMemory,\n *     handleSavePersonalization,\n *     isSavingPersonalization\n *   } = useUserPersonalization(user, updateUserPersonalization, {\n *     onSuccess: () => console.log(\"Saved!\"),\n *     onError: () => console.log(\"Failed!\")\n *   });\n *\n *   return (\n *     <div>\n *       <input\n *         value={personalizationValues.name}\n *         onChange={(e) => updatePersonalizationField(\"name\", e.target.value)}\n *       />\n *       <button\n *         onClick={handleSavePersonalization}\n *         disabled={isSavingPersonalization}\n *       >\n *         Save\n *       </button>\n *     </div>\n *   );\n * }\n * ```\n *\n * @remarks\n * - Changes are optimistic - UI updates immediately before server persistence\n * - On error, state reverts to the last known good value from the user object\n * - Memories are automatically trimmed and filtered (empty strings removed) on save\n * - The hook synchronizes with user prop changes to stay in sync with external updates\n */\nexport default function useUserPersonalization(\n  user: User | null,\n  persistPersonalization: (\n    personalization: UserPersonalization\n  ) => Promise<void>,\n  options?: UseUserPersonalizationOptions\n) {\n  const [personalizationValues, setPersonalizationValues] =\n    useState<UserPersonalization>(() => derivePersonalizationFromUser(user));\n  const [isSavingPersonalization, setIsSavingPersonalization] = useState(false);\n\n  const onSuccess = options?.onSuccess;\n  const onError = options?.onError;\n\n  const basePersonalization = useMemo(\n    () => derivePersonalizationFromUser(user),\n    [user]\n  );\n\n  useEffect(() => {\n    setPersonalizationValues(basePersonalization);\n  }, [basePersonalization]);\n\n  const updatePersonalizationField = useCallback(\n    (field: \"name\" | \"role\", value: string) => {\n      setPersonalizationValues((prev) => ({\n        ...prev,\n        [field]: value,\n      }));\n    },\n    []\n  );\n\n  const toggleUseMemories = useCallback((useMemories: boolean) => {\n    setPersonalizationValues((prev) => ({\n      ...prev,\n      use_memories: useMemories,\n    }));\n  }, []);\n\n  const toggleEnableMemoryTool = useCallback((enabled: boolean) => {\n    setPersonalizationValues((prev) => ({\n      ...prev,\n      enable_memory_tool: enabled,\n    }));\n  }, []);\n\n  const updateUserPreferences = useCallback((value: string) => {\n    setPersonalizationValues((prev) => ({\n      ...prev,\n      user_preferences: value,\n    }));\n  }, []);\n\n  const updateMemoryAtIndex = useCallback((index: number, value: string) => {\n    setPersonalizationValues((prev) => {\n      const updatedMemories = [...prev.memories];\n      const existing = updatedMemories[index];\n      if (existing) {\n        updatedMemories[index] = { ...existing, content: value };\n      }\n      return {\n        ...prev,\n        memories: updatedMemories,\n      };\n    });\n  }, []);\n\n  const addMemory = useCallback(() => {\n    setPersonalizationValues((prev) => ({\n      ...prev,\n      memories: [...prev.memories, { id: null, content: \"\" }],\n    }));\n  }, []);\n\n  const setMemories = useCallback((memories: MemoryItem[]) => {\n    setPersonalizationValues((prev) => ({\n      ...prev,\n      memories,\n    }));\n  }, []);\n\n  const handleSavePersonalization = useCallback(\n    async (overrides?: Partial<UserPersonalization>, silent?: boolean) => {\n      setIsSavingPersonalization(true);\n\n      const valuesToSave = { ...personalizationValues, ...overrides };\n      const trimmedMemories = valuesToSave.memories\n        .map((memory) => ({ ...memory, content: memory.content.trim() }))\n        .filter((memory) => memory.content.length > 0);\n\n      const updatedPersonalization: UserPersonalization = {\n        ...valuesToSave,\n        memories: trimmedMemories,\n      };\n\n      try {\n        await persistPersonalization(updatedPersonalization);\n        setPersonalizationValues(updatedPersonalization);\n        if (!silent) {\n          onSuccess?.(updatedPersonalization);\n        }\n        return updatedPersonalization;\n      } catch (error) {\n        setPersonalizationValues(basePersonalization);\n        if (!silent) {\n          onError?.(error);\n        }\n        return null;\n      } finally {\n        setIsSavingPersonalization(false);\n      }\n    },\n    [\n      basePersonalization,\n      onError,\n      onSuccess,\n      persistPersonalization,\n      personalizationValues,\n    ]\n  );\n\n  return {\n    personalizationValues,\n    updatePersonalizationField,\n    toggleUseMemories,\n    toggleEnableMemoryTool,\n    updateUserPreferences,\n    updateMemoryAtIndex,\n    addMemory,\n    setMemories,\n    handleSavePersonalization,\n    isSavingPersonalization,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useUsers.ts",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { AllUsersResponse } from \"@/lib/types\";\n\nexport interface UseUsersParams {\n  includeApiKeys: boolean;\n}\n\n/**\n * Fetches all users in the organization.\n *\n * Returns user information including accepted users, invited users, and optionally\n * API key users. Use this for displaying user lists in sharing dialogs, admin panels,\n * or permission management interfaces.\n *\n * @param params - Configuration object\n * @param params.includeApiKeys - Whether to include API key users in the response\n *\n * @returns Object containing:\n *   - data: AllUsersResponse containing accepted, invited, and API key users, or undefined while loading\n *   - isLoading: Boolean indicating if data is being fetched\n *   - error: Any error that occurred during fetch\n *   - refreshUsers: Function to manually revalidate the data\n *\n * @example\n * // Fetch users without API keys (for sharing dialogs)\n * const { data: usersData, isLoading } = useUsers({ includeApiKeys: false });\n * if (isLoading) return <Spinner />;\n * return <UserList users={usersData?.accepted ?? []} />;\n *\n * @example\n * // Fetch all users including API keys (for admin panel)\n * const { data: usersData, refreshUsers } = useUsers({ includeApiKeys: true });\n * // Later...\n * await createNewUser(...);\n * refreshUsers(); // Refresh the user list\n */\nexport default function useUsers({ includeApiKeys }: UseUsersParams) {\n  const { data, error, mutate, isLoading } = useSWR<AllUsersResponse>(\n    `/api/manage/users?include_api_keys=${includeApiKeys}`,\n    errorHandlingFetcher\n  );\n\n  return {\n    data,\n    isLoading,\n    error,\n    refreshUsers: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useVoicePlayback.ts",
    "content": "import { useState, useRef, useCallback, useEffect } from \"react\";\nimport { StreamingTTSPlayer } from \"@/lib/streamingTTS\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\n\nexport interface UseVoicePlaybackReturn {\n  isPlaying: boolean;\n  isLoading: boolean;\n  error: string | null;\n  play: (text: string, voice?: string, speed?: number) => Promise<void>;\n  pause: () => void;\n  stop: () => void;\n}\n\nexport function useVoicePlayback(): UseVoicePlaybackReturn {\n  const [isPlaying, setIsPlaying] = useState(false);\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const playerRef = useRef<StreamingTTSPlayer | null>(null);\n  const suppressPlayerErrorsRef = useRef(false);\n  const { setManualTTSPlaying, isTTSMuted, registerManualTTSMuteHandler } =\n    useVoiceMode();\n\n  useEffect(() => {\n    registerManualTTSMuteHandler((muted) => {\n      playerRef.current?.setMuted(muted);\n    });\n    return () => {\n      registerManualTTSMuteHandler(null);\n    };\n  }, [registerManualTTSMuteHandler]);\n\n  const stop = useCallback(() => {\n    suppressPlayerErrorsRef.current = true;\n    if (playerRef.current) {\n      playerRef.current.stop();\n      playerRef.current = null;\n    }\n    setManualTTSPlaying(false);\n    setError(null);\n    setIsPlaying(false);\n    setIsLoading(false);\n  }, [setManualTTSPlaying]);\n\n  const pause = useCallback(() => {\n    // Streaming player currently supports stop/resume via restart, not true pause.\n    stop();\n  }, [stop]);\n\n  const play = useCallback(\n    async (text: string, voice?: string, speed?: number) => {\n      // Stop any existing playback\n      stop();\n      suppressPlayerErrorsRef.current = false;\n      setError(null);\n      setIsLoading(true);\n\n      try {\n        const player = new StreamingTTSPlayer({\n          onPlayingChange: (playing) => {\n            setIsPlaying(playing);\n            setManualTTSPlaying(playing);\n            if (playing) {\n              setIsLoading(false);\n            }\n          },\n          onError: (playbackError) => {\n            if (suppressPlayerErrorsRef.current) {\n              return;\n            }\n            console.error(\"Voice playback error:\", playbackError);\n            setManualTTSPlaying(false);\n            setError(playbackError);\n            setIsLoading(false);\n            setIsPlaying(false);\n          },\n        });\n        playerRef.current = player;\n        player.setMuted(isTTSMuted);\n\n        await player.speak(text, voice, speed);\n        setIsLoading(false);\n      } catch (err) {\n        if (err instanceof Error && err.name === \"AbortError\") {\n          // Request was cancelled, not an error\n          return;\n        }\n        const message =\n          err instanceof Error ? err.message : \"Speech synthesis failed\";\n        setError(message);\n        setIsLoading(false);\n        setIsPlaying(false);\n        setManualTTSPlaying(false);\n      }\n    },\n    [isTTSMuted, setManualTTSPlaying, stop]\n  );\n\n  return {\n    isPlaying,\n    isLoading,\n    error,\n    play,\n    pause,\n    stop,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useVoiceProviders.ts",
    "content": "import useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport interface VoiceProviderView {\n  id: number;\n  name: string;\n  provider_type: string;\n  is_default_stt: boolean;\n  is_default_tts: boolean;\n  stt_model: string | null;\n  tts_model: string | null;\n  default_voice: string | null;\n  has_api_key: boolean;\n  target_uri: string | null;\n}\n\nexport function useVoiceProviders() {\n  const { data, error, isLoading, mutate } = useSWR<VoiceProviderView[]>(\n    SWR_KEYS.voiceProviders,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    providers: data ?? [],\n    isLoading,\n    error,\n    refresh: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useVoiceRecorder.ts",
    "content": "import { useState, useRef, useCallback, useEffect } from \"react\";\n\nimport { INTERNAL_URL, IS_DEV } from \"@/lib/constants\";\n\n// Target format for OpenAI Realtime API\nconst TARGET_SAMPLE_RATE = 24000;\nconst CHUNK_INTERVAL_MS = 250;\nconst DUPLICATE_FINAL_TRANSCRIPT_WINDOW_MS = 1500;\n// When VAD-based auto-stop is disabled, force-stop after this much silence as a fallback\nconst SILENCE_FALLBACK_TIMEOUT_MS = 10000;\n\ninterface TranscriptMessage {\n  type: \"transcript\" | \"error\";\n  text?: string;\n  message?: string;\n  is_final?: boolean;\n}\n\nexport interface UseVoiceRecorderOptions {\n  /** Called when VAD detects silence and final transcript is received */\n  onFinalTranscript?: (text: string) => void;\n  /** If true, automatically stop recording when VAD detects silence */\n  autoStopOnSilence?: boolean;\n}\n\nexport interface UseVoiceRecorderReturn {\n  isRecording: boolean;\n  isProcessing: boolean;\n  isMuted: boolean;\n  error: string | null;\n  liveTranscript: string;\n  /** Current microphone audio level (0-1, RMS-based) */\n  audioLevel: number;\n  startRecording: () => Promise<void>;\n  stopRecording: () => Promise<string | null>;\n  setMuted: (muted: boolean) => void;\n}\n\n/**\n * Encapsulates all browser resources for a voice recording session.\n * Manages WebSocket, Web Audio API, and audio buffering.\n */\nclass VoiceRecorderSession {\n  // Browser resources\n  private websocket: WebSocket | null = null;\n  private audioContext: AudioContext | null = null;\n  private scriptNode: ScriptProcessorNode | null = null;\n  private sourceNode: MediaStreamAudioSourceNode | null = null;\n  private mediaStream: MediaStream | null = null;\n  private sendInterval: NodeJS.Timeout | null = null;\n\n  // State\n  private audioBuffer: Float32Array[] = [];\n  private transcript = \"\";\n  private stopResolver: ((text: string | null) => void) | null = null;\n  private isActive = false;\n  // Guard: true once onFinalTranscript has fired for the current utterance.\n  // Prevents the same transcript from being delivered twice when VAD-triggered\n  // stop causes the server to echo the final transcript a second time.\n  private finalTranscriptDelivered = false;\n  private lastDeliveredFinalText: string | null = null;\n  private lastDeliveredFinalAtMs = 0;\n  // Fallback timer: force-stop after extended silence when VAD auto-stop is disabled\n  private silenceFallbackTimer: NodeJS.Timeout | null = null;\n\n  // Callbacks to update React state\n  private onTranscriptChange: (text: string) => void;\n  private onFinalTranscript: ((text: string) => void) | null;\n  private onError: (error: string) => void;\n  private onAudioLevel: (level: number) => void;\n  private onSilenceTimeout: (() => void) | null;\n  private onVADStop: (() => void) | null;\n  private autoStopOnSilence: boolean;\n\n  constructor(\n    onTranscriptChange: (text: string) => void,\n    onFinalTranscript: ((text: string) => void) | null,\n    onError: (error: string) => void,\n    onAudioLevel: (level: number) => void,\n    onSilenceTimeout?: () => void,\n    autoStopOnSilence?: boolean,\n    onVADStop?: () => void\n  ) {\n    this.onTranscriptChange = onTranscriptChange;\n    this.onFinalTranscript = onFinalTranscript;\n    this.onError = onError;\n    this.onAudioLevel = onAudioLevel;\n    this.onSilenceTimeout = onSilenceTimeout || null;\n    this.autoStopOnSilence = autoStopOnSilence ?? false;\n    this.onVADStop = onVADStop || null;\n  }\n\n  get recording(): boolean {\n    return this.isActive;\n  }\n\n  get currentTranscript(): string {\n    return this.transcript;\n  }\n\n  setMuted(muted: boolean): void {\n    if (this.mediaStream) {\n      this.mediaStream.getAudioTracks().forEach((track) => {\n        track.enabled = !muted;\n      });\n    }\n  }\n\n  async start(): Promise<void> {\n    if (this.isActive) return;\n\n    this.cleanup();\n    this.transcript = \"\";\n    this.audioBuffer = [];\n    this.finalTranscriptDelivered = false;\n    this.lastDeliveredFinalText = null;\n    this.lastDeliveredFinalAtMs = 0;\n\n    // Get microphone\n    this.mediaStream = await navigator.mediaDevices.getUserMedia({\n      audio: {\n        channelCount: 1,\n        sampleRate: { ideal: TARGET_SAMPLE_RATE },\n        echoCancellation: true,\n        noiseSuppression: true,\n      },\n    });\n\n    // Get WS token and connect WebSocket\n    const wsUrl = await this.getWebSocketUrl();\n    this.websocket = new WebSocket(wsUrl);\n    this.websocket.onmessage = this.handleMessage;\n    this.websocket.onerror = () => this.onError(\"Connection failed\");\n    this.websocket.onclose = () => {\n      if (this.stopResolver) {\n        this.stopResolver(this.transcript || null);\n        this.stopResolver = null;\n      }\n    };\n\n    await this.waitForConnection();\n\n    // Restore error handler after connection (waitForConnection overwrites it)\n    this.websocket.onerror = () => this.onError(\"Connection failed\");\n\n    // Set up audio capture\n    this.audioContext = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE });\n    this.sourceNode = this.audioContext.createMediaStreamSource(\n      this.mediaStream\n    );\n    this.scriptNode = this.audioContext.createScriptProcessor(4096, 1, 1);\n\n    this.scriptNode.onaudioprocess = (event) => {\n      const inputData = event.inputBuffer.getChannelData(0);\n      this.audioBuffer.push(new Float32Array(inputData));\n\n      // Compute RMS audio level (0-1) for waveform visualization\n      let sum = 0;\n      for (let i = 0; i < inputData.length; i++) {\n        sum += inputData[i]! * inputData[i]!;\n      }\n      const rms = Math.sqrt(sum / inputData.length);\n      // Scale RMS to a more visible range (raw RMS is usually very small)\n      this.onAudioLevel(Math.min(1, rms * 5));\n    };\n\n    this.sourceNode.connect(this.scriptNode);\n    this.scriptNode.connect(this.audioContext.destination);\n\n    // Start sending audio chunks\n    this.sendInterval = setInterval(\n      () => this.sendAudioBuffer(),\n      CHUNK_INTERVAL_MS\n    );\n    this.isActive = true;\n  }\n\n  async stop(): Promise<string | null> {\n    if (!this.isActive) return this.transcript || null;\n\n    this.resetSilenceFallbackTimer();\n\n    // Stop audio capture\n    if (this.sendInterval) {\n      clearInterval(this.sendInterval);\n      this.sendInterval = null;\n    }\n    if (this.scriptNode) {\n      this.scriptNode.disconnect();\n      this.scriptNode = null;\n    }\n    if (this.sourceNode) {\n      this.sourceNode.disconnect();\n      this.sourceNode = null;\n    }\n    if (this.audioContext) {\n      this.audioContext.close();\n      this.audioContext = null;\n    }\n    if (this.mediaStream) {\n      this.mediaStream.getTracks().forEach((track) => track.stop());\n      this.mediaStream = null;\n    }\n\n    this.audioBuffer = [];\n    this.isActive = false;\n\n    // Get final transcript from server\n    if (this.websocket?.readyState === WebSocket.OPEN) {\n      return new Promise((resolve) => {\n        this.stopResolver = resolve;\n        this.websocket!.send(JSON.stringify({ type: \"end\" }));\n\n        // Timeout fallback\n        setTimeout(() => {\n          if (this.stopResolver) {\n            this.stopResolver(this.transcript || null);\n            this.stopResolver = null;\n          }\n        }, 3000);\n      });\n    }\n\n    return this.transcript || null;\n  }\n\n  cleanup(): void {\n    this.resetSilenceFallbackTimer();\n    if (this.sendInterval) clearInterval(this.sendInterval);\n    if (this.scriptNode) this.scriptNode.disconnect();\n    if (this.sourceNode) this.sourceNode.disconnect();\n    if (this.audioContext) this.audioContext.close();\n    if (this.mediaStream) this.mediaStream.getTracks().forEach((t) => t.stop());\n    if (this.websocket) this.websocket.close();\n\n    this.sendInterval = null;\n    this.scriptNode = null;\n    this.sourceNode = null;\n    this.audioContext = null;\n    this.mediaStream = null;\n    this.websocket = null;\n    this.isActive = false;\n  }\n\n  private async getWebSocketUrl(): Promise<string> {\n    // Fetch short-lived WS token\n    const tokenResponse = await fetch(\"/api/voice/ws-token\", {\n      method: \"POST\",\n      credentials: \"include\",\n    });\n    if (!tokenResponse.ok) {\n      throw new Error(\"Failed to get WebSocket authentication token\");\n    }\n    const { token } = await tokenResponse.json();\n\n    const protocol = window.location.protocol === \"https:\" ? \"wss:\" : \"ws:\";\n    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;\n    const path = IS_DEV\n      ? \"/voice/transcribe/stream\"\n      : \"/api/voice/transcribe/stream\";\n    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;\n  }\n\n  private waitForConnection(): Promise<void> {\n    return new Promise((resolve, reject) => {\n      if (!this.websocket) return reject(new Error(\"No WebSocket\"));\n\n      const timeout = setTimeout(\n        () => reject(new Error(\"Connection timeout\")),\n        5000\n      );\n\n      this.websocket.onopen = () => {\n        clearTimeout(timeout);\n        resolve();\n      };\n      this.websocket.onerror = () => {\n        clearTimeout(timeout);\n        reject(new Error(\"Connection failed\"));\n      };\n    });\n  }\n\n  private resetSilenceFallbackTimer(): void {\n    if (this.silenceFallbackTimer) {\n      clearTimeout(this.silenceFallbackTimer);\n      this.silenceFallbackTimer = null;\n    }\n  }\n\n  private startSilenceFallbackTimer(): void {\n    this.resetSilenceFallbackTimer();\n    this.silenceFallbackTimer = setTimeout(() => {\n      // 10s of silence with no new speech — force-stop as a safety fallback\n      if (this.isActive && this.onVADStop) {\n        this.onVADStop();\n      }\n    }, SILENCE_FALLBACK_TIMEOUT_MS);\n  }\n\n  private handleMessage = (event: MessageEvent): void => {\n    try {\n      const data: TranscriptMessage = JSON.parse(event.data);\n\n      if (data.type === \"transcript\") {\n        if (data.text) {\n          this.transcript = data.text;\n          // Only push live updates to React while actively recording.\n          // After stop(), the final transcript is returned via stopResolver\n          // instead — this prevents stale text from reappearing in the\n          // input box when the user clears it and starts a new recording.\n          if (this.isActive) {\n            this.onTranscriptChange(data.text);\n          }\n        }\n\n        if (data.is_final && data.text) {\n          // Resolve stop promise if waiting — must run even after stop()\n          // so the caller receives the final transcript.\n          if (this.stopResolver) {\n            this.stopResolver(data.text);\n            this.stopResolver = null;\n          }\n\n          // Skip VAD logic if session is no longer active\n          if (!this.isActive) return;\n\n          if (this.autoStopOnSilence) {\n            // VAD detected silence — auto-stop and trigger callback\n            const now = Date.now();\n            const isLikelyDuplicateFinal =\n              this.lastDeliveredFinalText === data.text &&\n              now - this.lastDeliveredFinalAtMs <\n                DUPLICATE_FINAL_TRANSCRIPT_WINDOW_MS;\n\n            if (\n              this.onFinalTranscript &&\n              !this.finalTranscriptDelivered &&\n              !isLikelyDuplicateFinal\n            ) {\n              this.finalTranscriptDelivered = true;\n              this.lastDeliveredFinalText = data.text;\n              this.lastDeliveredFinalAtMs = now;\n              this.onFinalTranscript(data.text);\n            }\n\n            if (this.onVADStop) {\n              this.onVADStop();\n            }\n          } else {\n            // Auto-stop disabled (push-to-talk): ignore VAD, keep recording.\n            // Start/reset a 10s fallback timer — if no new speech arrives,\n            // force-stop to avoid recording silence indefinitely.\n            this.startSilenceFallbackTimer();\n          }\n        }\n      } else if (data.type === \"error\") {\n        this.onError(data.message || \"Transcription error\");\n      }\n    } catch (e) {\n      console.error(\"Failed to parse transcript message:\", e);\n    }\n  };\n\n  private resetBackendTranscript(): void {\n    if (this.websocket?.readyState === WebSocket.OPEN) {\n      this.websocket.send(JSON.stringify({ type: \"reset\" }));\n    }\n  }\n\n  private sendAudioBuffer(): void {\n    if (\n      !this.websocket ||\n      this.websocket.readyState !== WebSocket.OPEN ||\n      !this.audioContext ||\n      this.audioBuffer.length === 0\n    ) {\n      return;\n    }\n\n    // Concatenate buffered chunks\n    const totalLength = this.audioBuffer.reduce(\n      (sum, chunk) => sum + chunk.length,\n      0\n    );\n\n    // Prevent buffer overflow\n    if (totalLength > this.audioContext.sampleRate * 0.5 * 2) {\n      this.audioBuffer = this.audioBuffer.slice(-10);\n      return;\n    }\n\n    const concatenated = new Float32Array(totalLength);\n    let offset = 0;\n    for (const chunk of this.audioBuffer) {\n      concatenated.set(chunk, offset);\n      offset += chunk.length;\n    }\n    this.audioBuffer = [];\n\n    // Resample and convert to PCM16\n    const resampled = this.resampleAudio(\n      concatenated,\n      this.audioContext.sampleRate\n    );\n    const pcm16 = this.float32ToInt16(resampled);\n\n    this.websocket.send(pcm16.buffer);\n  }\n\n  private resampleAudio(input: Float32Array, inputRate: number): Float32Array {\n    if (inputRate === TARGET_SAMPLE_RATE) return input;\n\n    const ratio = inputRate / TARGET_SAMPLE_RATE;\n    const outputLength = Math.round(input.length / ratio);\n    const output = new Float32Array(outputLength);\n\n    for (let i = 0; i < outputLength; i++) {\n      const srcIndex = i * ratio;\n      const floor = Math.floor(srcIndex);\n      const ceil = Math.min(floor + 1, input.length - 1);\n      const fraction = srcIndex - floor;\n      output[i] = input[floor]! * (1 - fraction) + input[ceil]! * fraction;\n    }\n\n    return output;\n  }\n\n  private float32ToInt16(float32: Float32Array): Int16Array {\n    const int16 = new Int16Array(float32.length);\n    for (let i = 0; i < float32.length; i++) {\n      const s = Math.max(-1, Math.min(1, float32[i]!));\n      int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;\n    }\n    return int16;\n  }\n}\n\n/**\n * Hook for voice recording with streaming transcription.\n */\nexport function useVoiceRecorder(\n  options?: UseVoiceRecorderOptions\n): UseVoiceRecorderReturn {\n  const [isRecording, setIsRecording] = useState(false);\n  const [isProcessing, setIsProcessing] = useState(false);\n  const [isMuted, setIsMutedState] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n  const [liveTranscript, setLiveTranscript] = useState(\"\");\n  const [audioLevel, setAudioLevel] = useState(0);\n\n  const sessionRef = useRef<VoiceRecorderSession | null>(null);\n  const onFinalTranscriptRef = useRef(options?.onFinalTranscript);\n  const autoStopOnSilenceRef = useRef(options?.autoStopOnSilence ?? true); // Default to true\n\n  // Keep callback ref in sync\n  useEffect(() => {\n    onFinalTranscriptRef.current = options?.onFinalTranscript;\n    autoStopOnSilenceRef.current = options?.autoStopOnSilence ?? true;\n  }, [options?.onFinalTranscript, options?.autoStopOnSilence]);\n\n  // Cleanup on unmount\n  useEffect(() => {\n    return () => {\n      sessionRef.current?.cleanup();\n    };\n  }, []);\n\n  const startRecording = useCallback(async () => {\n    if (sessionRef.current?.recording) return;\n\n    setError(null);\n    setLiveTranscript(\"\");\n\n    // Clear any stale, inactive session before starting a new one.\n    if (sessionRef.current && !sessionRef.current.recording) {\n      sessionRef.current.cleanup();\n      sessionRef.current = null;\n    }\n\n    // Create VAD stop handler that will stop the session\n    const currentSession = new VoiceRecorderSession(\n      setLiveTranscript,\n      (text) => onFinalTranscriptRef.current?.(text),\n      setError,\n      setAudioLevel,\n      undefined, // onSilenceTimeout\n      autoStopOnSilenceRef.current,\n      () => {\n        // Stop only this session instance, and only clear recording state if it\n        // is still the active session when stop resolves.\n        currentSession.stop().then(() => {\n          if (sessionRef.current === currentSession) {\n            setIsRecording(false);\n            setIsMutedState(false);\n            sessionRef.current = null;\n          }\n        });\n      }\n    );\n    sessionRef.current = currentSession;\n\n    try {\n      await currentSession.start();\n      if (sessionRef.current === currentSession) {\n        setIsRecording(true);\n      }\n    } catch (err) {\n      currentSession.cleanup();\n      setError(\n        err instanceof Error ? err.message : \"Failed to start recording\"\n      );\n      if (sessionRef.current === currentSession) {\n        sessionRef.current = null;\n      }\n      throw err;\n    }\n  }, []);\n\n  const stopRecording = useCallback(async (): Promise<string | null> => {\n    if (!sessionRef.current) return null;\n    const currentSession = sessionRef.current;\n\n    setIsProcessing(true);\n\n    try {\n      const transcript = await currentSession.stop();\n      return transcript;\n    } finally {\n      // Only clear state if this is still the active session.\n      if (sessionRef.current === currentSession) {\n        setIsRecording(false);\n        setIsMutedState(false); // Reset mute state when recording stops\n        sessionRef.current = null;\n      }\n      setIsProcessing(false);\n    }\n  }, []);\n\n  const setMuted = useCallback((muted: boolean) => {\n    setIsMutedState(muted);\n    sessionRef.current?.setMuted(muted);\n  }, []);\n\n  return {\n    isRecording,\n    isProcessing,\n    isMuted,\n    error,\n    liveTranscript,\n    audioLevel,\n    startRecording,\n    stopRecording,\n    setMuted,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useVoiceStatus.ts",
    "content": "import useSWR from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\ninterface VoiceStatus {\n  stt_enabled: boolean;\n  tts_enabled: boolean;\n}\n\nexport function useVoiceStatus() {\n  const { data, error, isLoading } = useSWR<VoiceStatus>(\n    SWR_KEYS.voiceStatus,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    sttEnabled: data?.stt_enabled ?? false,\n    ttsEnabled: data?.tts_enabled ?? false,\n    isLoading,\n    error,\n  };\n}\n"
  },
  {
    "path": "web/src/hooks/useWebSocket.ts",
    "content": "import { useState, useRef, useCallback, useEffect } from \"react\";\n\nexport type WebSocketStatus =\n  | \"connecting\"\n  | \"connected\"\n  | \"disconnected\"\n  | \"error\";\n\nexport interface UseWebSocketOptions<T> {\n  /** URL to connect to */\n  url: string;\n  /** Called when a message is received */\n  onMessage?: (data: T) => void;\n  /** Called when connection opens */\n  onOpen?: () => void;\n  /** Called when connection closes */\n  onClose?: () => void;\n  /** Called on error */\n  onError?: (error: Event) => void;\n  /** Auto-connect on mount */\n  autoConnect?: boolean;\n}\n\nexport interface UseWebSocketReturn<T> {\n  /** Current connection status */\n  status: WebSocketStatus;\n  /** Send JSON data */\n  sendJson: (data: T) => void;\n  /** Send binary data */\n  sendBinary: (data: Blob | ArrayBuffer) => void;\n  /** Connect to WebSocket */\n  connect: () => Promise<void>;\n  /** Disconnect from WebSocket */\n  disconnect: () => void;\n}\n\nexport function useWebSocket<TReceive = unknown, TSend = unknown>({\n  url,\n  onMessage,\n  onOpen,\n  onClose,\n  onError,\n  autoConnect = false,\n}: UseWebSocketOptions<TReceive>): UseWebSocketReturn<TSend> {\n  const [status, setStatus] = useState<WebSocketStatus>(\"disconnected\");\n  const wsRef = useRef<WebSocket | null>(null);\n  const onMessageRef = useRef(onMessage);\n  const onOpenRef = useRef(onOpen);\n  const onCloseRef = useRef(onClose);\n  const onErrorRef = useRef(onError);\n\n  // Keep refs updated\n  useEffect(() => {\n    onMessageRef.current = onMessage;\n    onOpenRef.current = onOpen;\n    onCloseRef.current = onClose;\n    onErrorRef.current = onError;\n  }, [onMessage, onOpen, onClose, onError]);\n\n  const connect = useCallback(async (): Promise<void> => {\n    if (\n      wsRef.current?.readyState === WebSocket.OPEN ||\n      wsRef.current?.readyState === WebSocket.CONNECTING\n    ) {\n      return;\n    }\n\n    setStatus(\"connecting\");\n\n    return new Promise((resolve, reject) => {\n      const ws = new WebSocket(url);\n      wsRef.current = ws;\n\n      const timeout = setTimeout(() => {\n        ws.close();\n        reject(new Error(\"WebSocket connection timeout\"));\n      }, 10000);\n\n      ws.onopen = () => {\n        clearTimeout(timeout);\n        setStatus(\"connected\");\n        onOpenRef.current?.();\n        resolve();\n      };\n\n      ws.onmessage = (event) => {\n        try {\n          const data = JSON.parse(event.data) as TReceive;\n          onMessageRef.current?.(data);\n        } catch {\n          // Non-JSON message, ignore or handle differently\n        }\n      };\n\n      ws.onclose = () => {\n        clearTimeout(timeout);\n        setStatus(\"disconnected\");\n        onCloseRef.current?.();\n        wsRef.current = null;\n      };\n\n      ws.onerror = (error) => {\n        clearTimeout(timeout);\n        setStatus(\"error\");\n        onErrorRef.current?.(error);\n        reject(new Error(\"WebSocket connection failed\"));\n      };\n    });\n  }, [url]);\n\n  const disconnect = useCallback(() => {\n    if (wsRef.current) {\n      wsRef.current.close();\n      wsRef.current = null;\n    }\n    setStatus(\"disconnected\");\n  }, []);\n\n  const sendJson = useCallback((data: TSend) => {\n    if (wsRef.current?.readyState === WebSocket.OPEN) {\n      wsRef.current.send(JSON.stringify(data));\n    }\n  }, []);\n\n  const sendBinary = useCallback((data: Blob | ArrayBuffer) => {\n    if (wsRef.current?.readyState === WebSocket.OPEN) {\n      wsRef.current.send(data);\n    }\n  }, []);\n\n  // Auto-connect if enabled\n  useEffect(() => {\n    if (autoConnect) {\n      connect().catch(() => {\n        // Error handled via onError callback\n      });\n    }\n    return () => {\n      disconnect();\n    };\n  }, [autoConnect, connect, disconnect]);\n\n  return {\n    status,\n    sendJson,\n    sendBinary,\n    connect,\n    disconnect,\n  };\n}\n"
  },
  {
    "path": "web/src/instrumentation-client.ts",
    "content": "// This file configures the initialization of Sentry on the client.\n// The added config here will be used whenever a users loads a page in their browser.\n// https://docs.sentry.io/platforms/javascript/guides/nextjs/\n\nimport * as Sentry from \"@sentry/nextjs\";\n\nif (process.env.NEXT_PUBLIC_SENTRY_DSN) {\n  Sentry.init({\n    dsn: process.env.NEXT_PUBLIC_SENTRY_DSN,\n    release: process.env.SENTRY_RELEASE,\n\n    // Setting this option to true will print useful information to the console while you're setting up Sentry.\n    debug: false,\n\n    integrations: [],\n\n    tracesSampleRate: 0.0,\n    profilesSampleRate: 0.0,\n  });\n}\n\n// This export will instrument router navigations, and is only relevant if you enable tracing.\n// `captureRouterTransitionStart` is available from SDK version 9.12.0 onwards\nexport const onRouterTransitionStart = Sentry.captureRouterTransitionStart;\n"
  },
  {
    "path": "web/src/instrumentation.ts",
    "content": "import * as Sentry from \"@sentry/nextjs\";\n\nexport async function register() {\n  if (process.env.NEXT_RUNTIME === \"nodejs\") {\n    await import(\"../sentry.server.config\");\n  }\n\n  if (process.env.NEXT_RUNTIME === \"edge\") {\n    await import(\"../sentry.edge.config\");\n  }\n}\n\nexport const onRequestError = Sentry.captureRequestError;\n"
  },
  {
    "path": "web/src/interfaces/llm.ts",
    "content": "import type {\n  OnboardingState,\n  OnboardingActions,\n} from \"@/interfaces/onboarding\";\n\nexport enum LLMProviderName {\n  OPENAI = \"openai\",\n  ANTHROPIC = \"anthropic\",\n  OLLAMA_CHAT = \"ollama_chat\",\n  LM_STUDIO = \"lm_studio\",\n  AZURE = \"azure\",\n  OPENROUTER = \"openrouter\",\n  VERTEX_AI = \"vertex_ai\",\n  BEDROCK = \"bedrock\",\n  LITELLM = \"litellm\",\n  LITELLM_PROXY = \"litellm_proxy\",\n  BIFROST = \"bifrost\",\n  CUSTOM = \"custom\",\n}\n\nexport interface ModelConfiguration {\n  name: string;\n  is_visible: boolean;\n  max_input_tokens: number | null;\n  supports_image_input: boolean;\n  supports_reasoning: boolean;\n  display_name?: string;\n  provider_display_name?: string;\n  vendor?: string;\n  version?: string;\n  region?: string;\n}\n\nexport interface SimpleKnownModel {\n  name: string;\n  display_name: string | null;\n}\n\nexport interface WellKnownLLMProviderDescriptor {\n  name: string;\n  known_models: ModelConfiguration[];\n  recommended_default_model: SimpleKnownModel | null;\n}\n\nexport interface LLMModelDescriptor {\n  modelName: string;\n  provider: string;\n  maxTokens: number;\n}\n\nexport interface LLMProviderView {\n  id: number;\n  name: string;\n  provider: string;\n  api_key: string | null;\n  api_base: string | null;\n  api_version: string | null;\n  custom_config: { [key: string]: string } | null;\n  is_public: boolean;\n  is_auto_mode: boolean;\n  groups: number[];\n  personas: number[];\n  deployment_name: string | null;\n  model_configurations: ModelConfiguration[];\n}\n\nexport interface VisionProvider extends LLMProviderView {\n  vision_models: string[];\n}\n\nexport interface LLMProviderDescriptor {\n  id: number;\n  name: string;\n  provider: string;\n  provider_display_name: string;\n  model_configurations: ModelConfiguration[];\n}\n\nexport interface OllamaModelResponse {\n  name: string;\n  display_name: string;\n  max_input_tokens: number | null;\n  supports_image_input: boolean;\n}\n\nexport interface OpenRouterModelResponse {\n  name: string;\n  display_name: string;\n  max_input_tokens: number | null;\n  supports_image_input: boolean;\n}\n\nexport interface BedrockModelResponse {\n  name: string;\n  display_name: string;\n  max_input_tokens: number;\n  supports_image_input: boolean;\n}\n\nexport interface LMStudioModelResponse {\n  name: string;\n  display_name: string;\n  max_input_tokens: number | null;\n  supports_image_input: boolean;\n  supports_reasoning: boolean;\n}\n\nexport interface DefaultModel {\n  provider_id: number;\n  model_name: string;\n}\n\nexport interface LLMProviderResponse<T> {\n  providers: T[];\n  default_text: DefaultModel | null;\n  default_vision: DefaultModel | null;\n}\n\nexport type LLMModalVariant = \"onboarding\" | \"llm-configuration\";\n\nexport interface LLMProviderFormProps {\n  variant?: LLMModalVariant;\n  existingLlmProvider?: LLMProviderView;\n  shouldMarkAsDefault?: boolean;\n  open?: boolean;\n  onOpenChange?: (open: boolean) => void;\n\n  /** The current default model name for this provider (from the global default). */\n  defaultModelName?: string;\n\n  // Onboarding-specific (only when variant === \"onboarding\")\n  onboardingState?: OnboardingState;\n  onboardingActions?: OnboardingActions;\n  llmDescriptor?: WellKnownLLMProviderDescriptor;\n}\n\n// Param types for model fetching functions - use snake_case to match API structure\nexport interface BedrockFetchParams {\n  aws_region_name: string;\n  aws_access_key_id?: string;\n  aws_secret_access_key?: string;\n  aws_bearer_token_bedrock?: string;\n  provider_name?: string;\n}\n\nexport interface OllamaFetchParams {\n  api_base?: string;\n  provider_name?: string;\n  signal?: AbortSignal;\n}\n\nexport interface OpenRouterFetchParams {\n  api_base?: string;\n  api_key?: string;\n  provider_name?: string;\n}\n\nexport interface LiteLLMProxyFetchParams {\n  api_base?: string;\n  api_key?: string;\n  provider_name?: string;\n  signal?: AbortSignal;\n}\n\nexport interface LiteLLMProxyModelResponse {\n  provider_name: string;\n  model_name: string;\n}\n\nexport interface BifrostFetchParams {\n  api_base?: string;\n  api_key?: string;\n  provider_name?: string;\n  signal?: AbortSignal;\n}\n\nexport interface BifrostModelResponse {\n  name: string;\n  display_name: string;\n  max_input_tokens: number | null;\n  supports_image_input: boolean;\n  supports_reasoning: boolean;\n}\n\nexport interface VertexAIFetchParams {\n  model_configurations?: ModelConfiguration[];\n}\n\nexport interface LMStudioFetchParams {\n  api_base?: string;\n  api_key?: string;\n  api_key_changed?: boolean;\n  provider_name?: string;\n  signal?: AbortSignal;\n}\n\nexport type FetchModelsParams =\n  | BedrockFetchParams\n  | OllamaFetchParams\n  | OpenRouterFetchParams\n  | LiteLLMProxyFetchParams\n  | BifrostFetchParams\n  | VertexAIFetchParams\n  | LMStudioFetchParams;\n"
  },
  {
    "path": "web/src/interfaces/onboarding.ts",
    "content": "import type { IconProps } from \"@opal/types\";\n\nexport enum OnboardingStep {\n  Welcome = \"welcome\",\n  Name = \"name\",\n  LlmSetup = \"llm-setup\",\n  Complete = \"complete\",\n}\n\nexport interface OnboardingData {\n  userName?: string;\n  llmProviders?: string[];\n  llmApiKey?: string;\n}\n\nexport interface OnboardingState {\n  currentStep: OnboardingStep;\n  stepIndex: number;\n  totalSteps: number;\n  data: OnboardingData;\n  isButtonActive: boolean;\n  isLoading?: boolean;\n  error?: string;\n}\n\nexport enum OnboardingActionType {\n  NEXT_STEP = \"NEXT_STEP\",\n  PREV_STEP = \"PREV_STEP\",\n  GO_TO_STEP = \"GO_TO_STEP\",\n  UPDATE_DATA = \"UPDATE_DATA\",\n  SET_BUTTON_ACTIVE = \"SET_BUTTON_ACTIVE\",\n  SET_LOADING = \"SET_LOADING\",\n  SET_ERROR = \"SET_ERROR\",\n  RESET = \"RESET\",\n}\n\nexport type OnboardingAction =\n  | { type: OnboardingActionType.NEXT_STEP }\n  | { type: OnboardingActionType.PREV_STEP }\n  | { type: OnboardingActionType.GO_TO_STEP; step: OnboardingStep }\n  | { type: OnboardingActionType.UPDATE_DATA; payload: Partial<OnboardingData> }\n  | { type: OnboardingActionType.SET_BUTTON_ACTIVE; isButtonActive: boolean }\n  | { type: OnboardingActionType.SET_LOADING; isLoading: boolean }\n  | { type: OnboardingActionType.SET_ERROR; error: string | undefined }\n  | { type: OnboardingActionType.RESET };\n\nexport type FinalStepItemProps = {\n  title: string;\n  description: string;\n  icon: React.FunctionComponent<IconProps>;\n  buttonText: string;\n  buttonHref: string;\n};\n\nexport type OnboardingActions = {\n  nextStep: () => void;\n  prevStep: () => void;\n  goToStep: (step: OnboardingStep) => void;\n  setButtonActive: (active: boolean) => void;\n  updateName: (name: string) => void;\n  updateData: (data: Partial<OnboardingData>) => void;\n  setLoading: (isLoading: boolean) => void;\n  setError: (error: string | undefined) => void;\n  reset: () => void;\n};\n"
  },
  {
    "path": "web/src/interfaces/settings.ts",
    "content": "export enum ApplicationStatus {\n  PAYMENT_REMINDER = \"payment_reminder\",\n  GATED_ACCESS = \"gated_access\",\n  ACTIVE = \"active\",\n  SEAT_LIMIT_EXCEEDED = \"seat_limit_exceeded\",\n}\n\nexport enum QueryHistoryType {\n  DISABLED = \"disabled\",\n  ANONYMIZED = \"anonymized\",\n  NORMAL = \"normal\",\n}\n\nexport interface Settings {\n  anonymous_user_enabled: boolean;\n  invite_only_enabled: boolean;\n  anonymous_user_path?: string;\n  maximum_chat_retention_days?: number | null;\n  company_name?: string | null;\n  company_description?: string | null;\n  notifications: Notification[];\n  needs_reindexing: boolean;\n  gpu_enabled: boolean;\n  application_status: ApplicationStatus;\n  auto_scroll: boolean;\n  temperature_override_enabled: boolean;\n  query_history_type: QueryHistoryType;\n\n  deep_research_enabled?: boolean;\n  search_ui_enabled?: boolean;\n\n  // Image processing settings\n  image_extraction_and_analysis_enabled?: boolean;\n  search_time_image_analysis_enabled?: boolean;\n  image_analysis_max_size_mb?: number | null;\n\n  // User Knowledge settings\n  user_knowledge_enabled?: boolean;\n  user_file_max_upload_size_mb?: number | null;\n  file_token_count_threshold_k?: number | null;\n\n  // Connector settings\n  show_extra_connectors?: boolean;\n\n  // Default Assistant settings\n  disable_default_assistant?: boolean;\n\n  // Onyx Craft (Build Mode) feature flag\n  onyx_craft_enabled?: boolean;\n\n  // Whether EE features are unlocked (user has a valid enterprise license).\n  // Controls UI visibility of EE features like user groups, analytics, RBAC.\n  ee_features_enabled?: boolean;\n\n  // Seat usage - populated when seat limit is exceeded\n  seat_count?: number | null;\n  used_seats?: number | null;\n\n  // OpenSearch migration\n  opensearch_indexing_enabled?: boolean;\n\n  // Vector DB availability flag - false when DISABLE_VECTOR_DB is set.\n  // When false, connectors, RAG search, document sets, and related features\n  // are unavailable.\n  vector_db_enabled?: boolean;\n\n  // True when hooks are available: single-tenant deployment with HOOK_ENABLED=true.\n  hooks_enabled?: boolean;\n\n  // Application version from the ONYX_VERSION env var on the server.\n  version?: string | null;\n  // Hard ceiling for user_file_max_upload_size_mb, derived from env var.\n  max_allowed_upload_size_mb?: number;\n\n  // Factory defaults for the restore button.\n  default_user_file_max_upload_size_mb?: number;\n  default_file_token_count_threshold_k?: number;\n}\n\nexport enum NotificationType {\n  PERSONA_SHARED = \"persona_shared\",\n  REINDEX = \"reindex\",\n  TRIAL_ENDS_TWO_DAYS = \"two_day_trial_ending\",\n  ASSISTANT_FILES_READY = \"assistant_files_ready\",\n  RELEASE_NOTES = \"release_notes\",\n  FEATURE_ANNOUNCEMENT = \"feature_announcement\",\n}\n\nexport interface Notification {\n  id: number;\n  notif_type: string;\n  title: string;\n  description: string | null;\n  dismissed: boolean;\n  first_shown: string;\n  last_shown: string;\n  additional_data?: {\n    persona_id?: number;\n    link?: string;\n    version?: string; // For release notes notifications\n    [key: string]: any;\n  };\n}\n\nexport interface NavigationItem {\n  link: string;\n  icon?: string;\n  svg_logo?: string;\n  title: string;\n}\n\nexport interface EnterpriseSettings {\n  application_name: string | null;\n  use_custom_logo: boolean;\n  use_custom_logotype: boolean;\n  logo_display_style: \"logo_and_name\" | \"logo_only\" | \"name_only\" | null;\n\n  // custom navigation\n  custom_nav_items: NavigationItem[];\n\n  // custom Chat components\n  custom_lower_disclaimer_content: string | null;\n  custom_header_content: string | null;\n  two_lines_for_chat_header: boolean | null;\n  custom_popup_header: string | null;\n  custom_popup_content: string | null;\n  enable_consent_screen: boolean | null;\n  consent_screen_prompt: string | null;\n  show_first_visit_notice: boolean | null;\n  custom_greeting_message: string | null;\n}\n\nexport interface CombinedSettings {\n  settings: Settings;\n  enterpriseSettings: EnterpriseSettings | null;\n  customAnalyticsScript: string | null;\n  isMobile?: boolean;\n  webVersion: string | null;\n  webDomain: string | null;\n\n  /**\n   * NOTE (@raunakab):\n   * Whether search mode is actually available to users.\n   *\n   * Prefer this over reading `settings.search_ui_enabled` directly.\n   * `search_ui_enabled` only reflects the admin's *preference* — it does not\n   * account for prerequisites like connectors being configured. This derived\n   * flag combines the admin setting with runtime checks (e.g. connectors\n   * exist) so consumers get a single, accurate boolean.\n   */\n  isSearchModeAvailable: boolean;\n  settingsLoading: boolean;\n}\n"
  },
  {
    "path": "web/src/layouts/actions-layouts.tsx",
    "content": "/**\n * Actions Layout Components\n *\n * A namespaced collection of components for building consistent action cards\n * (MCP servers, OpenAPI tools, etc.). These components provide a standardized\n * layout that separates presentation from business logic, making it easier to\n * build and maintain action-related UIs.\n *\n * Built on top of ExpandableCard layouts for the underlying card structure.\n *\n * @example\n * ```tsx\n * import * as ActionsLayouts from \"@/layouts/actions-layouts\";\n * import * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\n * import { SvgServer } from \"@opal/icons\";\n * import Switch from \"@/components/ui/switch\";\n *\n * function MyActionCard() {\n *   return (\n *     <ExpandableCard.Root>\n *       <ActionsLayouts.Header\n *         title=\"My MCP Server\"\n *         description=\"A powerful MCP server for automation\"\n *         icon={SvgServer}\n *         rightChildren={\n *           <Button onClick={handleDisconnect}>Disconnect</Button>\n *         }\n *       />\n *       <ActionsLayouts.Content>\n *         <ActionsLayouts.Tool\n *           title=\"File Reader\"\n *           description=\"Read files from the filesystem\"\n *           icon={SvgFile}\n *           rightChildren={\n *             <Switch checked={enabled} onCheckedChange={setEnabled} />\n *           }\n *         />\n *         <ActionsLayouts.Tool\n *           title=\"Web Search\"\n *           description=\"Search the web\"\n *           icon={SvgGlobe}\n *           disabled={true}\n *           rightChildren={\n *             <Switch checked={false} disabled />\n *           }\n *         />\n *       </ActionsLayouts.Content>\n *     </ExpandableCard.Root>\n *   );\n * }\n * ```\n */\n\n\"use client\";\n\nimport React, { HtmlHTMLAttributes } from \"react\";\nimport type { IconProps } from \"@opal/types\";\nimport { WithoutStyles } from \"@/types\";\nimport { ContentAction } from \"@opal/layouts\";\nimport * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\nimport { Card } from \"@/refresh-components/cards\";\nimport Label from \"@/refresh-components/form/Label\";\n\n/**\n * Actions Header Component\n *\n * The header section of an action card. Displays icon, title, description,\n * and optional right-aligned actions.\n *\n * Features:\n * - Icon, title, and description display\n * - Custom right-aligned actions via rightChildren\n * - Responsive layout with truncated text\n *\n * @example\n * ```tsx\n * // Basic header\n * <ActionsLayouts.Header\n *   title=\"File Server\"\n *   description=\"Manage local files\"\n *   icon={SvgFolder}\n * />\n *\n * // With actions\n * <ActionsLayouts.Header\n *   title=\"API Server\"\n *   description=\"RESTful API integration\"\n *   icon={SvgCloud}\n *   rightChildren={\n *     <div className=\"flex gap-2\">\n *       <Button onClick={handleEdit}>Edit</Button>\n *       <Button onClick={handleDelete}>Delete</Button>\n *     </div>\n *   }\n * />\n * ```\n */\nexport interface ActionsHeaderProps\n  extends WithoutStyles<HtmlHTMLAttributes<HTMLDivElement>> {\n  // Core content\n  name?: string;\n  title: string;\n  description?: string;\n  icon: React.FunctionComponent<IconProps>;\n\n  // Custom content\n  rightChildren?: React.ReactNode;\n}\nfunction ActionsHeader({\n  name,\n  title,\n  description,\n  icon: Icon,\n  rightChildren,\n  ...props\n}: ActionsHeaderProps) {\n  return (\n    <ExpandableCard.Header>\n      <div className=\"flex flex-col gap-2 pt-4 pb-2\">\n        <div className=\"px-4\">\n          <Label name={name}>\n            <ContentAction\n              icon={Icon}\n              title={title}\n              description={description}\n              sizePreset=\"section\"\n              variant=\"section\"\n              rightChildren={rightChildren}\n              paddingVariant=\"fit\"\n            />\n          </Label>\n        </div>\n        <div {...props} className=\"px-2\" />\n      </div>\n    </ExpandableCard.Header>\n  );\n}\n\n/**\n * Actions Content Component\n *\n * A container for the content area of an action card.\n * Use this to wrap tools, settings, or other expandable content.\n * Features a maximum height with scrollable overflow.\n *\n * IMPORTANT: Only ONE ActionsContent should be used within a single ExpandableCard.Root.\n * This component self-registers with the ActionsLayout context to inform\n * ActionsHeader whether content exists (for border-radius styling). Using\n * multiple ActionsContent components will cause incorrect unmount behavior -\n * when any one unmounts, it will incorrectly signal that no content exists,\n * even if other ActionsContent components remain mounted.\n *\n * @example\n * ```tsx\n * <ActionsLayouts.Content>\n *   <ActionsLayouts.Tool {...} />\n *   <ActionsLayouts.Tool {...} />\n * </ActionsLayouts.Content>\n * ```\n */\nfunction ActionsContent({\n  children,\n  ...props\n}: WithoutStyles<React.HTMLAttributes<HTMLDivElement>>) {\n  return (\n    <ExpandableCard.Content {...props}>\n      <div className=\"flex flex-col gap-2 p-2\">{children}</div>\n    </ExpandableCard.Content>\n  );\n}\n\n/**\n * Actions Tool Component\n *\n * Represents a single tool within an actions content area. Displays the tool's\n * title, description, and icon. The component provides a label wrapper for\n * custom right-aligned controls (like toggle switches).\n *\n * Features:\n * - Tool title and description\n * - Custom icon\n * - Disabled state (applies strikethrough to title)\n * - Custom right-aligned content via rightChildren\n * - Responsive layout with truncated text\n *\n * @example\n * ```tsx\n * // Basic tool with switch\n * <ActionsLayouts.Tool\n *   title=\"File Reader\"\n *   description=\"Read files from the filesystem\"\n *   icon={SvgFile}\n *   rightChildren={\n *     <Switch checked={enabled} onCheckedChange={setEnabled} />\n *   }\n * />\n *\n * // Disabled tool\n * <ActionsLayouts.Tool\n *   title=\"Premium Feature\"\n *   description=\"This feature requires a premium subscription\"\n *   icon={SvgLock}\n *   disabled={true}\n *   rightChildren={\n *     <Switch checked={false} disabled />\n *   }\n * />\n *\n * // Tool with custom action\n * <ActionsLayouts.Tool\n *   name=\"config_tool\"\n *   title=\"Configuration\"\n *   description=\"Configure system settings\"\n *   icon={SvgSettings}\n *   rightChildren={\n *     <Button onClick={openSettings}>Configure</Button>\n *   }\n * />\n * ```\n */\nexport type ActionsToolProps = WithoutStyles<{\n  // Core content\n  name?: string;\n  title: string;\n  description: string;\n  icon?: React.FunctionComponent<IconProps>;\n\n  // State\n  disabled?: boolean;\n  rightChildren?: React.ReactNode;\n}>;\nfunction ActionsTool({\n  name,\n  title,\n  description,\n  icon,\n  disabled,\n  rightChildren,\n}: ActionsToolProps) {\n  return (\n    <Card padding={0.75} variant={disabled ? \"disabled\" : undefined}>\n      <Label name={name} disabled={disabled}>\n        <ContentAction\n          icon={icon}\n          title={title}\n          description={description}\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n          rightChildren={rightChildren}\n          paddingVariant=\"fit\"\n        />\n      </Label>\n    </Card>\n  );\n}\n\nexport {\n  ActionsHeader as Header,\n  ActionsContent as Content,\n  ActionsTool as Tool,\n};\n"
  },
  {
    "path": "web/src/layouts/app-layouts.tsx",
    "content": "/**\n * App Page Layout Components\n *\n * Provides the root layout, header, and footer for app pages.\n * AppRoot renders AppHeader and Footer by default (both can be disabled via props).\n *\n * @example\n * ```tsx\n * import * as AppLayouts from \"@/layouts/app-layouts\";\n *\n * export default function ChatPage() {\n *   return (\n *     <AppLayouts.Root>\n *       <ChatInterface />\n *     </AppLayouts.Root>\n *   );\n * }\n * ```\n */\n\n\"use client\";\n\nimport {\n  cn,\n  ensureHrefProtocol,\n  INTERACTIVE_SELECTOR,\n  noProp,\n} from \"@/lib/utils\";\nimport type { Components } from \"react-markdown\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useCallback, useMemo, useRef, useState, useEffect } from \"react\";\nimport { useAppBackground } from \"@/providers/AppBackgroundProvider\";\nimport { useTheme } from \"next-themes\";\nimport ShareChatSessionModal from \"@/sections/modals/ShareChatSessionModal\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport {\n  handleMoveOperation,\n  shouldShowMoveModal,\n  showErrorNotification,\n} from \"@/sections/sidebar/sidebarUtils\";\nimport { LOCAL_STORAGE_KEYS } from \"@/sections/sidebar/constants\";\nimport { deleteChatSession } from \"@/app/app/services/lib\";\nimport { useRouter } from \"next/navigation\";\nimport MoveCustomAgentChatModal from \"@/components/modals/MoveCustomAgentChatModal\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport FrostedDiv from \"@/refresh-components/FrostedDiv\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { PopoverSearchInput } from \"@/sections/sidebar/ChatButton\";\nimport SimplePopover from \"@/refresh-components/SimplePopover\";\nimport { Interactive } from \"@opal/core\";\nimport { Button, OpenButton } from \"@opal/components\";\nimport { useAppSidebarContext } from \"@/providers/AppSidebarProvider\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\nimport {\n  SvgBubbleText,\n  SvgFolderIn,\n  SvgMoreHorizontal,\n  SvgSearchMenu,\n  SvgShare,\n  SvgSidebar,\n  SvgTrash,\n} from \"@opal/icons\";\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport type { AppMode } from \"@/providers/QueryControllerProvider\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport useBrowserInfo from \"@/hooks/useBrowserInfo\";\nimport { APP_SLOGAN } from \"@/lib/constants\";\n\n/**\n * App Header Component\n *\n * Renders the header for chat sessions with share, move, and delete actions.\n * Designed to be rendered inside ChatScrollContainer with sticky positioning.\n *\n * Features:\n * - Share chat functionality\n * - Move chat to project (with confirmation for custom agents)\n * - Delete chat with confirmation\n * - Mobile-responsive sidebar toggle\n * - Custom header content from enterprise settings\n * - App-Mode toggle (EE gated)\n */\nfunction Header() {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const { state, setAppMode } = useQueryController();\n  const settings = useSettingsContext();\n  const { isMobile } = useScreenSize();\n  const { setFolded } = useAppSidebarContext();\n  const [showShareModal, setShowShareModal] = useState(false);\n  const [deleteModalOpen, setDeleteModalOpen] = useState(false);\n  const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =\n    useState(false);\n  const [pendingMoveProjectId, setPendingMoveProjectId] = useState<\n    number | null\n  >(null);\n  const [showMoveOptions, setShowMoveOptions] = useState(false);\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [popoverItems, setPopoverItems] = useState<React.ReactNode[]>([]);\n  const [modePopoverOpen, setModePopoverOpen] = useState(false);\n  const {\n    projects,\n    fetchProjects,\n    refreshCurrentProjectDetails,\n    currentProjectId,\n  } = useProjectsContext();\n  const { currentChatSession, refreshChatSessions, removeSession } =\n    useChatSessions();\n  const router = useRouter();\n  const appFocus = useAppFocus();\n\n  const customHeaderContent =\n    settings?.enterpriseSettings?.custom_header_content;\n  // Some pages don't want the custom header content, namely every page except Chat, Search, and\n  // NewSession. The header provides features such as the open sidebar button on mobile which pages\n  // without this content still use.\n  const pageWithHeaderContent = appFocus.isChat() || appFocus.isNewSession();\n\n  const effectiveMode: AppMode =\n    appFocus.isNewSession() && state.phase === \"idle\" ? state.appMode : \"chat\";\n\n  const availableProjects = useMemo(() => {\n    if (!projects) return [];\n    return projects.filter((project) => project.id !== currentProjectId);\n  }, [projects, currentProjectId]);\n\n  const filteredProjects = useMemo(() => {\n    if (!searchTerm) return availableProjects;\n    const term = searchTerm.toLowerCase();\n    return availableProjects.filter((project) =>\n      project.name.toLowerCase().includes(term)\n    );\n  }, [availableProjects, searchTerm]);\n\n  const resetMoveState = useCallback(() => {\n    setShowMoveOptions(false);\n    setSearchTerm(\"\");\n    setPendingMoveProjectId(null);\n    setShowMoveCustomAgentModal(false);\n  }, []);\n\n  const performMove = useCallback(\n    async (targetProjectId: number) => {\n      if (!currentChatSession) return;\n      try {\n        await handleMoveOperation({\n          chatSession: currentChatSession,\n          targetProjectId,\n          refreshChatSessions,\n          refreshCurrentProjectDetails,\n          fetchProjects,\n          currentProjectId,\n        });\n        resetMoveState();\n        setPopoverOpen(false);\n      } catch (error) {\n        console.error(\"Failed to move chat session:\", error);\n      }\n    },\n    [\n      currentChatSession,\n      refreshChatSessions,\n      refreshCurrentProjectDetails,\n      fetchProjects,\n      currentProjectId,\n      resetMoveState,\n    ]\n  );\n\n  const handleMoveClick = useCallback(\n    (projectId: number) => {\n      if (!currentChatSession) return;\n      if (shouldShowMoveModal(currentChatSession)) {\n        setPendingMoveProjectId(projectId);\n        setShowMoveCustomAgentModal(true);\n        return;\n      }\n      void performMove(projectId);\n    },\n    [currentChatSession, performMove]\n  );\n\n  const handleDeleteChat = useCallback(async () => {\n    if (!currentChatSession) return;\n    try {\n      const response = await deleteChatSession(currentChatSession.id);\n      if (!response.ok) {\n        throw new Error(\"Failed to delete chat session\");\n      }\n      removeSession(currentChatSession.id);\n      await Promise.all([refreshChatSessions(), fetchProjects()]);\n      router.replace(\"/app\");\n      setDeleteModalOpen(false);\n    } catch (error) {\n      console.error(\"Failed to delete chat:\", error);\n      showErrorNotification(\"Failed to delete chat. Please try again.\");\n    }\n  }, [\n    currentChatSession,\n    refreshChatSessions,\n    removeSession,\n    fetchProjects,\n    router,\n  ]);\n\n  const setDeleteConfirmationModalOpen = useCallback((open: boolean) => {\n    setDeleteModalOpen(open);\n    if (open) {\n      setPopoverOpen(false);\n    }\n  }, []);\n\n  useEffect(() => {\n    const items = showMoveOptions\n      ? [\n          <PopoverSearchInput\n            key=\"search\"\n            setShowMoveOptions={setShowMoveOptions}\n            onSearch={setSearchTerm}\n          />,\n          ...filteredProjects.map((project) => (\n            <LineItem\n              key={project.id}\n              icon={SvgFolderIn}\n              onClick={noProp(() => handleMoveClick(project.id))}\n            >\n              {project.name}\n            </LineItem>\n          )),\n        ]\n      : [\n          <LineItem\n            key=\"move\"\n            icon={SvgFolderIn}\n            onClick={noProp(() => setShowMoveOptions(true))}\n          >\n            Move to Project\n          </LineItem>,\n          <LineItem\n            key=\"delete\"\n            icon={SvgTrash}\n            onClick={noProp(() => setDeleteConfirmationModalOpen(true))}\n            danger\n          >\n            Delete\n          </LineItem>,\n        ];\n\n    setPopoverItems(items);\n  }, [\n    showMoveOptions,\n    filteredProjects,\n    currentChatSession,\n    setDeleteConfirmationModalOpen,\n    handleMoveClick,\n  ]);\n\n  return (\n    <>\n      {showShareModal && currentChatSession && (\n        <ShareChatSessionModal\n          chatSession={currentChatSession}\n          onClose={() => setShowShareModal(false)}\n        />\n      )}\n\n      {showMoveCustomAgentModal && (\n        <MoveCustomAgentChatModal\n          onCancel={resetMoveState}\n          onConfirm={async (doNotShowAgain: boolean) => {\n            if (doNotShowAgain && typeof window !== \"undefined\") {\n              window.localStorage.setItem(\n                LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,\n                \"true\"\n              );\n            }\n            if (pendingMoveProjectId != null) {\n              await performMove(pendingMoveProjectId);\n            }\n          }}\n        />\n      )}\n\n      {deleteModalOpen && (\n        <ConfirmationModalLayout\n          title=\"Delete Chat\"\n          icon={SvgTrash}\n          onClose={() => setDeleteModalOpen(false)}\n          submit={\n            <Button variant=\"danger\" onClick={handleDeleteChat}>\n              Delete\n            </Button>\n          }\n        >\n          Are you sure you want to delete this chat? This action cannot be\n          undone.\n        </ConfirmationModalLayout>\n      )}\n\n      <div\n        className={cn(\n          \"w-full flex flex-row flex-wrap justify-center items-center px-4\",\n          // # Note (@raunakab):\n          //\n          // We add an additional top margin to align this header with the `LogoSection` inside of the App-Sidebar.\n          // For more information, check out `SidebarWrapper.tsx`.\n          \"mt-2\"\n        )}\n      >\n        {/*\n          Left:\n          - (mobile) sidebar toggle\n          - app-mode (for Unified S+C [EE gated])\n        */}\n        <div className=\"flex-1 flex flex-row items-center gap-2 h-[3.3rem]\">\n          {isMobile && (\n            <Button\n              prominence=\"internal\"\n              icon={SvgSidebar}\n              onClick={() => setFolded(false)}\n            />\n          )}\n          {isPaidEnterpriseFeaturesEnabled &&\n            settings.isSearchModeAvailable &&\n            appFocus.isNewSession() &&\n            state.phase === \"idle\" && (\n              <Popover open={modePopoverOpen} onOpenChange={setModePopoverOpen}>\n                <Popover.Trigger asChild>\n                  <OpenButton\n                    aria-label=\"Change app mode\"\n                    icon={\n                      effectiveMode === \"search\" ? SvgSearchMenu : SvgBubbleText\n                    }\n                  >\n                    {effectiveMode === \"search\" ? \"Search\" : \"Chat\"}\n                  </OpenButton>\n                </Popover.Trigger>\n                <Popover.Content align=\"start\" width=\"lg\">\n                  <Popover.Menu>\n                    <LineItem\n                      icon={SvgSearchMenu}\n                      selected={effectiveMode === \"search\"}\n                      description=\"Quick search for documents\"\n                      onClick={noProp(() => {\n                        setAppMode(\"search\");\n                        setModePopoverOpen(false);\n                      })}\n                    >\n                      Search\n                    </LineItem>\n                    <LineItem\n                      icon={SvgBubbleText}\n                      selected={effectiveMode === \"chat\"}\n                      description=\"Conversation and research\"\n                      onClick={noProp(() => {\n                        setAppMode(\"chat\");\n                        setModePopoverOpen(false);\n                      })}\n                    >\n                      Chat\n                    </LineItem>\n                  </Popover.Menu>\n                </Popover.Content>\n              </Popover>\n            )}\n        </div>\n\n        {/*\n          Center:\n          - custom-header-content\n          - Wraps to its own row below left/right on mobile when content is present\n        */}\n        <div\n          className={cn(\n            \"flex flex-col items-center overflow-hidden\",\n            pageWithHeaderContent && customHeaderContent\n              ? \"order-last basis-full py-2 sm:py-0 sm:order-none sm:basis-auto sm:flex-1\"\n              : \"flex-1\"\n          )}\n        >\n          <Text text03 className=\"text-center w-full\">\n            {pageWithHeaderContent && customHeaderContent}\n          </Text>\n        </div>\n\n        {/*\n          Right:\n          - share button\n          - more-options buttons\n        */}\n        <div className=\"flex flex-1 justify-end items-center h-[3.3rem]\">\n          {appFocus.isChat() && currentChatSession && (\n            <FrostedDiv className=\"flex shrink flex-row items-center\">\n              <Button\n                icon={SvgShare}\n                prominence=\"tertiary\"\n                interaction={showShareModal ? \"hover\" : \"rest\"}\n                responsiveHideText\n                onClick={() => setShowShareModal(true)}\n                aria-label=\"share-chat-button\"\n              >\n                Share\n              </Button>\n              <SimplePopover\n                trigger={\n                  /* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */\n                  <IconButton\n                    icon={SvgMoreHorizontal}\n                    className=\"ml-2\"\n                    transient={popoverOpen}\n                    tertiary\n                  />\n                }\n                onOpenChange={(state) => {\n                  setPopoverOpen(state);\n                  if (!state) setShowMoveOptions(false);\n                }}\n                side=\"bottom\"\n                align=\"end\"\n              >\n                <PopoverMenu>{popoverItems}</PopoverMenu>\n              </SimplePopover>\n            </FrostedDiv>\n          )}\n        </div>\n      </div>\n    </>\n  );\n}\n\nconst footerMarkdownComponents = {\n  p: ({ children }) => (\n    //dont remove the !my-0 class, it's important for the markdown to render without any alignment issues\n    <Text as=\"p\" text03 secondaryAction className=\"!my-0 text-center\">\n      {children}\n    </Text>\n  ),\n  a: ({ node, href, className, children, ...rest }) => {\n    const fullHref = ensureHrefProtocol(href);\n    return (\n      <a\n        href={fullHref}\n        target=\"_blank\"\n        rel=\"noopener noreferrer\"\n        {...rest}\n        className={cn(className, \"underline underline-offset-2\")}\n      >\n        <Text text03 secondaryAction>\n          {children}\n        </Text>\n      </a>\n    );\n  },\n} satisfies Partial<Components>;\n\nfunction Footer() {\n  const settings = useSettingsContext();\n  const appFocus = useAppFocus();\n\n  const customFooterContent =\n    settings?.enterpriseSettings?.custom_lower_disclaimer_content ||\n    `[Onyx ${\n      settings?.webVersion || \"dev\"\n    }](https://www.onyx.app/) - ${APP_SLOGAN}`;\n\n  return (\n    <footer\n      className={cn(\n        \"relative w-full flex flex-row justify-center items-center gap-2 px-2 mt-auto\",\n        // # Note (from @raunakab):\n        //\n        // The conditional rendering of vertical padding based on the current page is intentional.\n        // The `AppInputBar` has `shadow-01` applied, which extends ~14px below it.\n        // Because the content area in `Root` uses `overflow-auto`, the shadow would be\n        // clipped at the container boundary — causing a visible rendering artefact.\n        //\n        // To fix this, `AppPage.tsx` uses animated spacer divs around `AppInputBar` to\n        // give the shadow breathing room. However, that extra space adds visible gap\n        // between the input and the Footer. To compensate, we remove the Footer's top\n        // padding when `appFocus.isChat()`.\n        //\n        // There is a corresponding note inside `AppInputBar.tsx` and `AppPage.tsx`\n        // explaining this. Please refer to those notes as well.\n        appFocus.isChat() ? \"pb-2\" : \"py-2\"\n      )}\n    >\n      <MinimalMarkdown\n        content={customFooterContent}\n        className={cn(\"max-w-full text-center\")}\n        components={footerMarkdownComponents}\n      />\n    </footer>\n  );\n}\n\n/**\n * App Root Component\n *\n * Wraps app pages with header (AppHeader) and footer chrome.\n *\n * Layout Structure:\n * ```\n * ┌──────────────────────────────────┐\n * │ AppHeader                        │\n * ├──────────────────────────────────┤\n * │                                  │\n * │ Content Area (children)          │\n * │                                  │\n * ├──────────────────────────────────┤\n * │ Footer (custom disclaimer)       │\n * └──────────────────────────────────┘\n * ```\n *\n * @example\n * ```tsx\n * <AppLayouts.Root>\n *   <ChatInterface />\n * </AppLayouts.Root>\n * ```\n */\nexport interface AppRootProps {\n  /** Opt-in to render the user's custom background image */\n  enableBackground?: boolean;\n  children?: React.ReactNode;\n}\n\nfunction Root({ children, enableBackground }: AppRootProps) {\n  const { hasBackground, appBackgroundUrl } = useAppBackground();\n  const { resolvedTheme } = useTheme();\n  const appFocus = useAppFocus();\n  const { isSafari } = useBrowserInfo();\n  const isLightMode = resolvedTheme === \"light\";\n  const showBackground = hasBackground && enableBackground;\n\n  // Track whether the chat input was focused before a mousedown, so we can\n  // restore focus on mouseup if no text was selected. This preserves\n  // click-drag text selection while keeping the input focused on plain clicks.\n  const inputWasFocused = useRef(false);\n\n  const handleMouseDown = useCallback(\n    (event: React.MouseEvent<HTMLDivElement>) => {\n      const activeEl = document.activeElement;\n      const isFocused =\n        activeEl instanceof HTMLElement &&\n        activeEl.id === \"onyx-chat-input-textarea\";\n      const target = event.target;\n      const isInteractive =\n        target instanceof HTMLElement && !!target.closest(INTERACTIVE_SELECTOR);\n      inputWasFocused.current = isFocused && !isInteractive;\n    },\n    []\n  );\n\n  const handleMouseUp = useCallback(() => {\n    if (!inputWasFocused.current) return;\n    inputWasFocused.current = false;\n    const sel = window.getSelection();\n    if (sel && !sel.isCollapsed) return;\n    const textarea = document.getElementById(\"onyx-chat-input-textarea\");\n    // Only restore focus if no other element has grabbed it since mousedown.\n    if (textarea && document.activeElement !== textarea) {\n      textarea.focus();\n    }\n  }, []);\n  const horizontalBlurMask = `linear-gradient(\n    to right,\n    transparent 0%,\n    black max(0%, calc(50% - 25rem)),\n    black min(100%, calc(50% + 25rem)),\n    transparent 100%\n  )`;\n\n  return (\n    /* NOTE: Some elements, markdown tables in particular, refer to this `@container` in order to\n      breakout of their immediate containers using cqw units.\n      The `data-main-container` attribute is used by portaled elements (e.g. CommandMenu) to\n      render inside this container so they can be centered relative to the main content area\n      rather than the full viewport (which would include the sidebar).\n    */\n    <div\n      data-main-container\n      onMouseDown={handleMouseDown}\n      onMouseUp={handleMouseUp}\n      className={cn(\n        \"@container flex flex-col h-full w-full relative overflow-hidden\",\n        showBackground && \"bg-cover bg-center bg-fixed\"\n      )}\n      style={\n        showBackground\n          ? { backgroundImage: `url(${appBackgroundUrl})` }\n          : undefined\n      }\n    >\n      {/* Effect 1 */}\n      {/* Vignette overlay for custom backgrounds (disabled in light mode) */}\n      {showBackground && !isLightMode && (\n        <div\n          className=\"absolute z-0 inset-0 pointer-events-none\"\n          style={{\n            background: `\n              linear-gradient(to bottom, rgba(0, 0, 0, 0.4) 0%, transparent 4rem),\n              linear-gradient(to top, rgba(0, 0, 0, 0.4) 0%, transparent 4rem)\n            `,\n          }}\n        />\n      )}\n\n      {/* Effect 2 */}\n      {/* Semi-transparent overlay for readability when background is set */}\n      {showBackground && appFocus.isChat() && (\n        <>\n          <div className=\"absolute inset-0 backdrop-blur-[1px] pointer-events-none\" />\n          {isSafari ? (\n            <div\n              className=\"absolute z-0 inset-0 bg-cover bg-center bg-fixed pointer-events-none\"\n              style={{\n                backgroundImage: `url(${appBackgroundUrl})`,\n                filter: \"blur(16px)\",\n                maskImage: horizontalBlurMask,\n                WebkitMaskImage: horizontalBlurMask,\n              }}\n            />\n          ) : (\n            <div\n              className=\"absolute z-0 inset-0 backdrop-blur-md transition-all duration-600 pointer-events-none\"\n              style={{\n                maskImage: horizontalBlurMask,\n                WebkitMaskImage: horizontalBlurMask,\n              }}\n            />\n          )}\n        </>\n      )}\n\n      <div className=\"z-app-layout\">\n        {!appFocus.isSharedChat() && <Header />}\n      </div>\n      <div className=\"z-app-layout flex-1 overflow-auto h-full w-full\">\n        {children}\n      </div>\n      <div className=\"z-app-layout\">\n        <Footer />\n      </div>\n    </div>\n  );\n}\n\nexport { Root };\n"
  },
  {
    "path": "web/src/layouts/expandable-card-layouts.tsx",
    "content": "/**\n * Expandable Card Layout Components\n *\n * A namespaced collection of components for building expandable cards with\n * collapsible content sections. These provide the structural foundation\n * without opinionated content styling - just pure containers.\n *\n * Use these components when you need:\n * - A card with a header that can have expandable content below it\n * - Automatic border-radius handling based on whether content exists/is folded\n * - Controlled or uncontrolled folding state\n *\n * @example\n * ```tsx\n * import * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\n *\n * // Uncontrolled — Root manages its own state\n * function MyCard() {\n *   return (\n *     <ExpandableCard.Root>\n *       <ExpandableCard.Header>\n *         <div className=\"p-4\">\n *           <h3>My Header</h3>\n *         </div>\n *       </ExpandableCard.Header>\n *       <ExpandableCard.Content>\n *         <div className=\"p-4\">\n *           <p>Expandable content goes here</p>\n *         </div>\n *       </ExpandableCard.Content>\n *     </ExpandableCard.Root>\n *   );\n * }\n *\n * // Controlled — consumer owns the state\n * function MyControlledCard() {\n *   const [isFolded, setIsFolded] = useState(false);\n *\n *   return (\n *     <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>\n *       <ExpandableCard.Header>\n *         <button onClick={() => setIsFolded(!isFolded)}>Toggle</button>\n *       </ExpandableCard.Header>\n *       <ExpandableCard.Content>\n *         <p>Content here</p>\n *       </ExpandableCard.Content>\n *     </ExpandableCard.Root>\n *   );\n * }\n * ```\n */\n\n\"use client\";\n\nimport React, {\n  createContext,\n  useContext,\n  useState,\n  useMemo,\n  useLayoutEffect,\n  Dispatch,\n  SetStateAction,\n} from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { WithoutStyles } from \"@/types\";\nimport ShadowDiv from \"@/refresh-components/ShadowDiv\";\nimport { Section, SectionProps } from \"@/layouts/general-layouts\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n} from \"@/refresh-components/Collapsible\";\n\n/**\n * Expandable Card Context\n *\n * Provides folding state management for expandable cards without prop drilling.\n * Also tracks whether content is present via self-registration.\n */\ninterface ExpandableCardContextValue {\n  isFolded: boolean;\n  setIsFolded: Dispatch<SetStateAction<boolean>>;\n  hasContent: boolean;\n  registerContent: () => () => void;\n}\n\nconst ExpandableCardContext = createContext<\n  ExpandableCardContextValue | undefined\n>(undefined);\n\nfunction useExpandableCardContext() {\n  const context = useContext(ExpandableCardContext);\n  if (!context) {\n    throw new Error(\n      \"ExpandableCard components must be used within an ExpandableCard.Root\"\n    );\n  }\n  return context;\n}\n\n/**\n * Expandable Card Root Component\n *\n * The root container and context provider for an expandable card. Provides a\n * flex column layout with no gap or padding by default.\n *\n * Supports both controlled and uncontrolled folding state:\n * - **Uncontrolled**: Manages its own state. Use `defaultFolded` to set the\n *   initial folding state (defaults to `false`, i.e. expanded).\n * - **Controlled**: Pass `isFolded` and `onFoldedChange` to manage folding\n *   state externally.\n *\n * @example\n * ```tsx\n * // Uncontrolled\n * <ExpandableCard.Root>\n *   <ExpandableCard.Header>...</ExpandableCard.Header>\n *   <ExpandableCard.Content>...</ExpandableCard.Content>\n * </ExpandableCard.Root>\n *\n * // Uncontrolled, starts folded\n * <ExpandableCard.Root defaultFolded>\n *   ...\n * </ExpandableCard.Root>\n *\n * // Controlled\n * const [isFolded, setIsFolded] = useState(false);\n * <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>\n *   ...\n * </ExpandableCard.Root>\n * ```\n */\nexport interface ExpandableCardRootProps extends SectionProps {\n  /** Controlled folding state. When provided, the component is controlled. */\n  isFolded?: boolean;\n  /** Callback when folding state changes. Required for controlled usage. */\n  onFoldedChange?: Dispatch<SetStateAction<boolean>>;\n  /** Initial folding state for uncontrolled usage. Defaults to `false`. */\n  defaultFolded?: boolean;\n}\n\nfunction ExpandableCardRoot({\n  isFolded: controlledFolded,\n  onFoldedChange,\n  defaultFolded = false,\n  ...props\n}: ExpandableCardRootProps) {\n  const [uncontrolledFolded, setUncontrolledFolded] = useState(defaultFolded);\n  const isControlled = controlledFolded !== undefined;\n  const isFolded = isControlled ? controlledFolded : uncontrolledFolded;\n  const setIsFolded = isControlled\n    ? onFoldedChange ?? (() => {})\n    : setUncontrolledFolded;\n\n  const [hasContent, setHasContent] = useState(false);\n\n  // Registration function for Content to announce its presence\n  const registerContent = useMemo(\n    () => () => {\n      setHasContent(true);\n      return () => setHasContent(false);\n    },\n    []\n  );\n\n  const contextValue = useMemo(\n    () => ({ isFolded, setIsFolded, hasContent, registerContent }),\n    [isFolded, setIsFolded, hasContent, registerContent]\n  );\n\n  return (\n    <ExpandableCardContext.Provider value={contextValue}>\n      <Section gap={0} padding={0} {...props} />\n    </ExpandableCardContext.Provider>\n  );\n}\n\n/**\n * Expandable Card Header Component\n *\n * The header section of an expandable card. This is a pure container that:\n * - Has a border and neutral background\n * - Automatically handles border-radius based on content state:\n *   - Fully rounded when no content exists or when content is folded\n *   - Only top-rounded when content is visible\n *\n * You are responsible for adding your own padding, layout, and content inside.\n *\n * @example\n * ```tsx\n * <ExpandableCard.Header>\n *   <div className=\"flex items-center justify-between p-4\">\n *     <h3>My Title</h3>\n *     <button>Action</button>\n *   </div>\n * </ExpandableCard.Header>\n * ```\n */\nexport interface ExpandableCardHeaderProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  children?: React.ReactNode;\n}\n\nfunction ExpandableCardHeader({\n  children,\n  ...props\n}: ExpandableCardHeaderProps) {\n  const { isFolded, hasContent } = useExpandableCardContext();\n\n  // Round all corners if there's no content, or if content exists but is folded\n  const shouldFullyRound = !hasContent || isFolded;\n\n  return (\n    <div\n      {...props}\n      className={cn(\n        \"border bg-background-neutral-00 w-full transition-[border-radius] duration-200 ease-out\",\n        shouldFullyRound ? \"rounded-16\" : \"rounded-t-16\"\n      )}\n    >\n      {children}\n    </div>\n  );\n}\n\n/**\n * Expandable Card Content Component\n *\n * The expandable content section of the card. This is a pure container that:\n * - Self-registers with context to inform Header about its presence\n * - Animates open/closed using Radix Collapsible (slide down/up)\n * - Has side and bottom borders that connect to the header\n * - Has a max-height with scrollable overflow via ShadowDiv\n *\n * You are responsible for adding your own content inside.\n *\n * IMPORTANT: Only ONE Content component should be used within a single Root.\n * This component self-registers with the context to inform Header whether\n * content exists (for border-radius styling). Using multiple Content components\n * will cause incorrect unmount behavior.\n *\n * @example\n * ```tsx\n * <ExpandableCard.Content>\n *   <div className=\"p-4\">\n *     <p>Your expandable content here</p>\n *   </div>\n * </ExpandableCard.Content>\n * ```\n */\nexport interface ExpandableCardContentProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  children?: React.ReactNode;\n}\n\nfunction ExpandableCardContent({\n  children,\n  ...props\n}: ExpandableCardContentProps) {\n  const { isFolded, registerContent } = useExpandableCardContext();\n\n  // Self-register with context to inform Header that content exists\n  useLayoutEffect(() => {\n    return registerContent();\n  }, [registerContent]);\n\n  return (\n    <Collapsible open={!isFolded} className=\"w-full\">\n      <CollapsibleContent>\n        <div\n          className={cn(\n            \"border-x border-b rounded-b-16 overflow-hidden w-full transition-opacity duration-200 ease-out\",\n            isFolded ? \"opacity-0\" : \"opacity-100\"\n          )}\n        >\n          <ShadowDiv\n            className=\"flex flex-col rounded-b-16 max-h-[20rem]\"\n            {...props}\n          >\n            {children}\n          </ShadowDiv>\n        </div>\n      </CollapsibleContent>\n    </Collapsible>\n  );\n}\n\nexport {\n  ExpandableCardRoot as Root,\n  ExpandableCardHeader as Header,\n  ExpandableCardContent as Content,\n};\n"
  },
  {
    "path": "web/src/layouts/general-layouts.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { WithoutStyles } from \"@/types\";\nimport { Content } from \"@opal/layouts\";\nimport { IconProps } from \"@opal/types\";\nimport React from \"react\";\n\nexport type FlexDirection = \"row\" | \"column\";\nexport type JustifyContent = \"start\" | \"center\" | \"end\" | \"between\";\nexport type AlignItems = \"start\" | \"center\" | \"end\" | \"stretch\";\nexport type Length = \"auto\" | \"fit\" | \"full\" | number;\n\nconst flexDirectionClassMap: Record<FlexDirection, string> = {\n  row: \"flex-row\",\n  column: \"flex-col\",\n};\nconst justifyClassMap: Record<JustifyContent, string> = {\n  start: \"justify-start\",\n  center: \"justify-center\",\n  end: \"justify-end\",\n  between: \"justify-between\",\n};\nconst alignClassMap: Record<AlignItems, string> = {\n  start: \"items-start\",\n  center: \"items-center\",\n  end: \"items-end\",\n  stretch: \"items-stretch\",\n};\nexport const widthClassmap: Record<Length, string> = {\n  auto: \"w-auto flex-shrink-0\",\n  fit: \"w-fit flex-shrink-0\",\n  full: \"w-full\",\n};\nexport const heightClassmap: Record<Length, string> = {\n  auto: \"h-auto\",\n  fit: \"h-fit\",\n  full: \"h-full min-h-0\",\n};\n\n/**\n * Section - A flexible container component for grouping related content\n *\n * Provides a standardized layout container with configurable direction and spacing.\n * Uses flexbox layout with customizable gap between children. Defaults to column layout.\n *\n * @param flexDirection - Flex direction. Default: \"column\".\n * @param justifyContent - Justify content along the main axis. Default: \"center\".\n * @param alignItems - Align items along the cross axis. Default: \"center\".\n * @param width - Width of the container: \"auto\", \"fit\", or \"full\". Default: \"full\".\n * @param height - Height of the container: \"auto\", \"fit\", or \"full\". Default: \"full\".\n * @param gap - Gap in REM units between children. Default: 1 (translates to gap-4 in Tailwind)\n * @param padding - Padding in REM units. Default: 0\n * @param wrap - If true, enables flex-wrap. Default: false\n * @param dbg - If true, adds a debug red border for visual debugging. Default: false\n *\n * @example\n * ```tsx\n * import * as GeneralLayouts from \"@/layouts/general-layouts\";\n *\n * // Column section with default gap - centered\n * <GeneralLayouts.Section>\n *   <Card>First item</Card>\n *   <Card>Second item</Card>\n * </GeneralLayouts.Section>\n *\n * // Row section aligned to the left and vertically centered\n * <GeneralLayouts.Section flexDirection=\"row\" justifyContent=\"start\" alignItems=\"center\">\n *   <Button>Cancel</Button>\n *   <Button>Save</Button>\n * </GeneralLayouts.Section>\n *\n * // Column section with items aligned to the right\n * <GeneralLayouts.Section alignItems=\"end\" gap={2}>\n *   <InputTypeIn label=\"Name\" />\n *   <InputTypeIn label=\"Email\" />\n * </GeneralLayouts.Section>\n *\n * // Row section centered both ways\n * <GeneralLayouts.Section flexDirection=\"row\" justifyContent=\"center\" alignItems=\"center\">\n *   <Text>Centered content</Text>\n * </GeneralLayouts.Section>\n *\n * // Section with fit width\n * <GeneralLayouts.Section width=\"fit\">\n *   <Button>Fit to content</Button>\n * </GeneralLayouts.Section>\n * ```\n *\n * @remarks\n * - The component defaults to column layout when no direction is specified\n * - Full width and height by default\n * - Accepts className for additional styling; style prop is not available\n * - Import using namespace import for consistent usage: `import * as GeneralLayouts from \"@/layouts/general-layouts\"`\n */\nexport interface SectionProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {\n  className?: string;\n  flexDirection?: FlexDirection;\n  justifyContent?: JustifyContent;\n  alignItems?: AlignItems;\n  width?: Length;\n  height?: Length;\n\n  gap?: number;\n  padding?: number;\n  wrap?: boolean;\n\n  // Debugging utilities\n  dbg?: boolean;\n\n  ref?: React.Ref<HTMLDivElement>;\n}\n\n/**\n * `<Disabled>` from `@opal/core` uses `display: contents` — it can safely\n * wrap a `Section` without affecting layout.\n */\nfunction Section({\n  className,\n  flexDirection = \"column\",\n  justifyContent = \"center\",\n  alignItems = \"center\",\n  width = \"full\",\n  height = \"full\",\n  gap = 1,\n  padding = 0,\n  wrap,\n  dbg,\n  ref,\n  ...rest\n}: SectionProps) {\n  return (\n    <div\n      ref={ref}\n      className={cn(\n        \"flex\",\n\n        flexDirectionClassMap[flexDirection],\n        justifyClassMap[justifyContent],\n        alignClassMap[alignItems],\n        typeof width === \"string\" && widthClassmap[width],\n        typeof height === \"string\" && heightClassmap[height],\n        typeof height === \"number\" && \"overflow-hidden\",\n\n        wrap && \"flex-wrap\",\n        dbg && \"dbg-red\",\n        className\n      )}\n      style={{\n        gap: `${gap}rem`,\n        padding: `${padding}rem`,\n        ...(typeof width === \"number\" && { width: `${width}rem` }),\n        ...(typeof height === \"number\" && { height: `${height}rem` }),\n      }}\n      {...rest}\n    />\n  );\n}\n\nexport interface AttachmentItemLayoutProps {\n  title: string;\n  description: string;\n  icon: React.FunctionComponent<IconProps>;\n  middleText?: string;\n  rightChildren?: React.ReactNode;\n}\nfunction AttachmentItemLayout({\n  title,\n  description,\n  icon: Icon,\n  middleText,\n  rightChildren,\n}: AttachmentItemLayoutProps) {\n  return (\n    <Section flexDirection=\"row\" gap={0.25} padding={0.25}>\n      <div className={cn(\"h-[2.25rem] aspect-square rounded-08\")}>\n        <Section>\n          <div\n            className=\"attachment-button__icon-wrapper\"\n            data-testid=\"attachment-item-icon-wrapper\"\n          >\n            <Icon className=\"attachment-button__icon\" />\n          </div>\n        </Section>\n      </div>\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems=\"center\"\n        gap={1.5}\n      >\n        <div data-testid=\"attachment-item-title\" className=\"flex-1 min-w-0\">\n          <Content\n            title={title}\n            description={description}\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n        </div>\n        {middleText && (\n          <div className=\"flex-1 min-w-0\">\n            <Truncated text03 secondaryBody>\n              {middleText}\n            </Truncated>\n          </div>\n        )}\n        {rightChildren && (\n          <div className=\"flex-shrink-0 px-1\">{rightChildren}</div>\n        )}\n      </Section>\n    </Section>\n  );\n}\n\n/**\n * CardItemLayout - A layout for card headers with icon, title, description, and actions\n *\n * Structure:\n *   Column [\n *     Row [\n *       Row [ Icon (18px), Title ],\n *       rightChildren (action buttons)\n *     ],\n *     Description (optional, 2-line clamp)\n *   ]\n *\n * Used for card components that display an entity with:\n * - An icon on the left (18px, controlled by this layout)\n * - A title next to the icon\n * - Optional action buttons on the right\n * - Optional description below (2-line max)\n *\n * @param icon - Icon component to render on the left. Receives `size` prop from layout.\n *               Use a callback for custom components: `(props) => <AgentAvatar {...props} />`\n * @param title - The main title text\n * @param description - Optional description text below the title row (clamped to 2 lines)\n * @param rightChildren - Optional content on the right (typically action buttons)\n */\nexport interface CardItemLayoutProps {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  description?: string;\n  rightChildren?: React.ReactNode;\n}\nfunction CardItemLayout({\n  icon: Icon,\n  title,\n  description,\n  rightChildren,\n}: CardItemLayoutProps) {\n  return (\n    <div className=\"flex flex-col flex-1 self-stretch items-center gap-1 p-1\">\n      <div className=\"flex flex-row self-stretch items-center justify-between gap-1\">\n        <div className=\"flex flex-row items-center self-stretch p-1.5 gap-1.5\">\n          <div className=\"px-0.5\">\n            <Icon size={18} />\n          </div>\n          <Truncated mainContentBody>{title}</Truncated>\n        </div>\n\n        {rightChildren && (\n          <div className={cn(\"flex flex-row p-0.5 items-center\")}>\n            {rightChildren}\n          </div>\n        )}\n      </div>\n\n      {description && (\n        <div className=\"pb-1 px-2 flex self-stretch\">\n          <Text\n            as=\"p\"\n            secondaryBody\n            text03\n            className=\"line-clamp-2 truncate whitespace-normal h-[2.2rem] break-words\"\n          >\n            {description}\n          </Text>\n        </div>\n      )}\n    </div>\n  );\n}\nexport { Section, CardItemLayout, AttachmentItemLayout };\n"
  },
  {
    "path": "web/src/layouts/input-layouts.tsx",
    "content": "\"use client\";\n\nimport type { RichStr } from \"@opal/types\";\nimport { resolveStr } from \"@opal/components/text/InlineMarkdown\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgXOctagon, SvgAlertCircle } from \"@opal/icons\";\nimport { useField, useFormikContext } from \"formik\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Content } from \"@opal/layouts\";\nimport Label from \"@/refresh-components/form/Label\";\n\ninterface OrientationLayoutProps {\n  name?: string;\n  disabled?: boolean;\n  nonInteractive?: boolean;\n  children?: React.ReactNode;\n  title: string | RichStr;\n  description?: string | RichStr;\n  suffix?: \"optional\" | (string & {});\n  sizePreset?: \"main-content\" | \"main-ui\";\n}\n\n/**\n * VerticalInputLayout - A layout component for form fields with vertical label arrangement\n *\n * Use this layout when you want the label, input, and error message stacked vertically.\n * Common for most form inputs where the label appears above the input field.\n *\n * Exported as `Vertical` for convenient usage.\n *\n * @example\n * ```tsx\n * import { Vertical } from \"@/layouts/input-layouts\";\n *\n * <Vertical\n *   name=\"email\"\n *   title=\"Email Address\"\n *   description=\"We'll never share your email\"\n *   optional\n * >\n *   <InputTypeIn name=\"email\" type=\"email\" />\n * </Vertical>\n * ```\n */\nexport interface VerticalLayoutProps extends OrientationLayoutProps {\n  subDescription?: string | RichStr;\n}\nfunction VerticalInputLayout({\n  name,\n  disabled,\n  nonInteractive,\n  children,\n  subDescription,\n  title,\n  description,\n  suffix,\n  sizePreset = \"main-content\",\n}: VerticalLayoutProps) {\n  const content = (\n    <Section gap={0.25} alignItems=\"start\">\n      <Content\n        title={title}\n        description={description}\n        suffix={suffix}\n        sizePreset={sizePreset}\n        variant=\"section\"\n      />\n      {children}\n      {name && <ErrorLayout name={name} />}\n      {subDescription && (\n        <Text secondaryBody text03>\n          {resolveStr(subDescription)}\n        </Text>\n      )}\n    </Section>\n  );\n\n  if (nonInteractive) return content;\n  return (\n    <Label name={name} disabled={disabled}>\n      {content}\n    </Label>\n  );\n}\n\n/**\n * HorizontalInputLayout - A layout component for form fields with horizontal label arrangement\n *\n * Use this layout when you want the label on the left and the input control on the right.\n * Commonly used for toggles, switches, and checkboxes where the label and control\n * should be side-by-side.\n *\n * Exported as `Horizontal` for convenient usage.\n *\n * @example\n * ```tsx\n * import { Horizontal } from \"@/layouts/input-layouts\";\n *\n * // Default behavior (top-aligned)\n * <Horizontal\n *   name=\"notifications\"\n *   title=\"Enable Notifications\"\n *   description=\"Receive updates about your account\"\n * >\n *   <Switch name=\"notifications\" />\n * </Horizontal>\n *\n * // Force center alignment (vertically centers input with label)\n * <Horizontal\n *   name=\"notifications\"\n *   title=\"Enable Notifications\"\n *   description=\"Receive updates about your account\"\n *   center\n * >\n *   <Switch name=\"notifications\" />\n * </Horizontal>\n * ```\n */\nexport interface HorizontalLayoutProps extends OrientationLayoutProps {\n  /** Align input to the center (middle) of the label/description */\n  center?: boolean;\n}\nfunction HorizontalInputLayout({\n  name,\n  disabled,\n  nonInteractive,\n  children,\n  center,\n  title,\n  description,\n  suffix,\n  sizePreset = \"main-content\",\n}: HorizontalLayoutProps) {\n  const content = (\n    <Section gap={0.25} alignItems=\"start\">\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems={center ? \"center\" : \"start\"}\n      >\n        <div className=\"flex flex-col flex-1 min-w-0 self-stretch\">\n          <Content\n            title={title}\n            description={description}\n            suffix={suffix}\n            sizePreset={sizePreset}\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n        </div>\n        <div className=\"flex flex-col items-end\">{children}</div>\n      </Section>\n      {name && <ErrorLayout name={name} />}\n    </Section>\n  );\n\n  if (nonInteractive) return content;\n  return (\n    <Label name={name} disabled={disabled}>\n      {content}\n    </Label>\n  );\n}\n\n/**\n * ErrorLayout - Displays Formik field validation errors\n *\n * Automatically shows error messages from Formik's validation state.\n * Only displays when the field has been touched and has an error.\n *\n * Exported as `Error` for convenient usage.\n *\n * @param name - The Formik field name to display errors for\n *\n * @example\n * ```tsx\n * import { Error } from \"@/layouts/input-layouts\";\n *\n * <InputTypeIn name=\"email\" />\n * <Error name=\"email\" />\n * ```\n *\n * @remarks\n * This component uses Formik's `useField` hook internally and requires\n * the component to be rendered within a Formik context.\n */\ninterface ErrorLayoutProps {\n  name: string;\n}\nfunction ErrorLayout({ name }: ErrorLayoutProps) {\n  const [, meta] = useField(name);\n  const { status } = useFormikContext();\n  const warning = status?.warnings?.[name];\n  if (warning && typeof warning !== \"string\")\n    throw new Error(\"The warning that is set must ALWAYS be a string\");\n\n  const hasError = meta.touched && meta.error;\n  const hasWarning = warning; // Don't require touched for warnings\n\n  // If `hasError` and `hasWarning` are both true at the same time, the error is prioritized and returned first.\n  if (hasError)\n    return <ErrorTextLayout type=\"error\">{meta.error}</ErrorTextLayout>;\n  else if (hasWarning)\n    return <ErrorTextLayout type=\"warning\">{warning}</ErrorTextLayout>;\n  else return null;\n}\n\nexport type ErrorTextType = \"error\" | \"warning\";\ninterface ErrorTextLayoutProps {\n  children?: React.ReactNode;\n  type?: ErrorTextType;\n}\nfunction ErrorTextLayout({ children, type = \"error\" }: ErrorTextLayoutProps) {\n  const Icon = type === \"error\" ? SvgXOctagon : SvgAlertCircle;\n  const colorClass =\n    type === \"error\" ? \"text-status-error-05\" : \"text-status-warning-05\";\n  const strokeClass =\n    type === \"error\" ? \"stroke-status-error-05\" : \"stroke-status-warning-05\";\n\n  return (\n    <div className=\"px-1\">\n      <Section flexDirection=\"row\" justifyContent=\"start\" gap={0.25}>\n        <Icon size={12} className={strokeClass} />\n        <Text secondaryBody className={colorClass} role=\"alert\">\n          {children}\n        </Text>\n      </Section>\n    </div>\n  );\n}\n\nexport {\n  VerticalInputLayout as Vertical,\n  HorizontalInputLayout as Horizontal,\n  ErrorLayout as Error,\n  ErrorTextLayout,\n};\n"
  },
  {
    "path": "web/src/layouts/settings-layouts.tsx",
    "content": "\"use client\";\n\n/**\n * Settings Page Layout Components\n *\n * A namespaced collection of components for building consistent settings pages.\n * These components provide a standardized layout with scroll-aware headers,\n * centered content containers, and automatic responsive behavior.\n *\n * @example\n * ```tsx\n * import SettingsLayouts from \"@/layouts/settings-layouts\";\n * import { SvgSettings } from \"@opal/icons\";\n *\n * function MySettingsPage() {\n *   return (\n *     <SettingsLayouts.Root>\n *       <SettingsLayouts.Header\n *         icon={SvgSettings}\n *         title=\"Account Settings\"\n *         description=\"Manage your account preferences and settings\"\n *         rightChildren={<Button>Save</Button>}\n *       >\n *         <InputTypeIn placeholder=\"Search settings...\" />\n *       </SettingsLayouts.Header>\n *\n *       <SettingsLayouts.Body>\n *         <Card>Settings content here</Card>\n *       </SettingsLayouts.Body>\n *     </SettingsLayouts.Root>\n *   );\n * }\n * ```\n */\n\nimport BackButton from \"@/refresh-components/buttons/BackButton\";\nimport { cn } from \"@/lib/utils\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { WithoutStyles } from \"@/types\";\nimport { IconFunctionComponent } from \"@opal/types\";\nimport { HtmlHTMLAttributes, useEffect, useRef, useState } from \"react\";\nimport { Content } from \"@opal/layouts\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\nconst widthClasses = {\n  sm: \"w-[min(var(--container-sm),100%)]\",\n  \"sm-md\": \"w-[min(var(--container-sm-md),100%)]\",\n  md: \"w-[min(var(--container-md),100%)]\",\n  lg: \"w-[min(var(--container-lg),100%)]\",\n  full: \"w-[var(--container-full)]\",\n};\n\n/**\n * Settings Root Component\n *\n * Wrapper component that provides the base structure for settings pages.\n * Creates a centered, scrollable container with configurable width.\n *\n * Features:\n * - Full height container with centered content\n * - Automatic overflow-y scrolling\n * - Contains the scroll container ID that Settings.Header uses for shadow detection\n * - Configurable width via CSS variables defined in sizes.css:\n *   \"sm\" (672px), \"sm-md\" (752px), \"md\" (872px, default), \"lg\" (992px), \"full\" (100%)\n *\n * @example\n * ```tsx\n * // Default medium width (872px max)\n * <SettingsLayouts.Root>\n *   <SettingsLayouts.Header {...} />\n *   <SettingsLayouts.Body>...</SettingsLayouts.Body>\n * </SettingsLayouts.Root>\n *\n * // Large width (992px max)\n * <SettingsLayouts.Root width=\"lg\">\n *   <SettingsLayouts.Header {...} />\n *   <SettingsLayouts.Body>...</SettingsLayouts.Body>\n * </SettingsLayouts.Root>\n * ```\n */\ninterface SettingsRootProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {\n  width?: keyof typeof widthClasses;\n}\nfunction SettingsRoot({ width = \"md\", ...props }: SettingsRootProps) {\n  return (\n    <div\n      id=\"page-wrapper-scroll-container\"\n      className=\"w-full h-full flex flex-col items-center overflow-y-auto\"\n    >\n      {/* WARNING: The id=\"page-wrapper-scroll-container\" above is used by SettingsHeader\n          to detect scroll position and show/hide the scroll shadow.\n          DO NOT REMOVE this ID without updating SettingsHeader accordingly. */}\n      <div className={cn(\"h-full\", widthClasses[width])}>\n        <div {...props} />\n      </div>\n    </div>\n  );\n}\n\n/**\n * Settings Header Component\n *\n * Sticky header component for settings pages with icon, title, description,\n * and optional actions. Automatically shows a scroll shadow when the page\n * has been scrolled down.\n *\n * Features:\n * - Sticky positioning at the top of the page\n * - Icon display (1.75rem size)\n * - Title (headingH2 style)\n * - Optional description (string)\n * - Optional right-aligned action buttons via rightChildren\n * - Optional children content below title/description\n * - Optional back button\n * - Optional bottom separator\n * - Automatic scroll shadow effect\n *\n * @example\n * ```tsx\n * // Basic header\n * <SettingsLayouts.Header\n *   icon={SvgUser}\n *   title=\"Profile Settings\"\n *   description=\"Update your profile information\"\n * />\n *\n * // Without description\n * <SettingsLayouts.Header\n *   icon={SvgUser}\n *   title=\"Profile Settings\"\n * />\n *\n * // With action buttons\n * <SettingsLayouts.Header\n *   icon={SvgSettings}\n *   title=\"General Settings\"\n *   description=\"Configure your preferences\"\n *   rightChildren={\n *     <Button onClick={handleSave}>Save Changes</Button>\n *   }\n * />\n *\n * // With search/filter below and bottom separator\n * <SettingsLayouts.Header\n *   icon={SvgDatabase}\n *   title=\"Data Sources\"\n *   description=\"Manage your connected data sources\"\n *   separator\n * >\n *   <InputTypeIn placeholder=\"Search data sources...\" />\n * </SettingsLayouts.Header>\n *\n * // With back button\n * <SettingsLayouts.Header\n *   icon={SvgArrow}\n *   title=\"Advanced Settings\"\n *   description=\"Expert configuration options\"\n *   backButton\n * />\n *\n * // With string description\n * <SettingsLayouts.Header\n *   icon={SvgDatabase}\n *   title=\"API Keys\"\n *   description=\"Manage your API keys\"\n * />\n * ```\n */\nexport interface SettingsHeaderProps {\n  icon: IconFunctionComponent;\n  title: string;\n  description?: string;\n  children?: React.ReactNode;\n  rightChildren?: React.ReactNode;\n  backButton?: boolean;\n  onBack?: () => void;\n  separator?: boolean;\n}\nfunction SettingsHeader({\n  icon: Icon,\n  title,\n  description,\n  children,\n  rightChildren,\n  backButton,\n  onBack,\n  separator,\n}: SettingsHeaderProps) {\n  const [showShadow, setShowShadow] = useState(false);\n  const headerRef = useRef<HTMLDivElement>(null);\n\n  // # NOTE (@Subash-Mohan)\n  // Headers with actions are always sticky, others are not.\n  const isSticky = !!rightChildren;\n\n  useEffect(() => {\n    if (!isSticky) return;\n\n    // IMPORTANT: This component relies on SettingsRoot having the ID \"page-wrapper-scroll-container\"\n    // on its scrollable container. If that ID is removed or changed, the scroll shadow will not work.\n    const scrollContainer = document.getElementById(\n      \"page-wrapper-scroll-container\"\n    );\n    if (!scrollContainer) return;\n\n    const handleScroll = () => {\n      // Show shadow if the scroll container has been scrolled down\n      setShowShadow(scrollContainer.scrollTop > 0);\n    };\n\n    scrollContainer.addEventListener(\"scroll\", handleScroll);\n    handleScroll(); // Check initial state\n\n    return () => scrollContainer.removeEventListener(\"scroll\", handleScroll);\n  }, [isSticky]);\n\n  return (\n    <div\n      ref={headerRef}\n      className={cn(\n        \"w-full bg-background-tint-01\",\n        isSticky && \"sticky top-0 z-settings-header\",\n        backButton && \"md:pt-4\"\n      )}\n    >\n      {backButton && (\n        <div className=\"px-2\">\n          <BackButton behaviorOverride={onBack} />\n        </div>\n      )}\n\n      <Spacer vertical rem={2.5} />\n\n      <div className=\"flex flex-col gap-6 px-4\">\n        <div className=\"flex w-full justify-between\">\n          <div aria-label=\"admin-page-title\">\n            <Content\n              icon={Icon}\n              title={title}\n              description={description}\n              sizePreset=\"headline\"\n              variant=\"heading\"\n            />\n          </div>\n          {rightChildren}\n        </div>\n\n        {children}\n      </div>\n\n      {separator ? (\n        <>\n          <Spacer vertical rem={1.5} />\n          <Separator noPadding className=\"px-4\" />\n        </>\n      ) : (\n        <Spacer vertical rem={0.5} />\n      )}\n\n      {isSticky && (\n        <div\n          className={cn(\n            \"absolute left-0 right-0 h-[0.5rem] pointer-events-none transition-opacity duration-300 rounded-b-08 opacity-0\",\n            showShadow && \"opacity-100\"\n          )}\n          style={{\n            background:\n              \"linear-gradient(to bottom, var(--mask-02), transparent)\",\n          }}\n        />\n      )}\n    </div>\n  );\n}\n\n/**\n * Settings Body Component\n *\n * Content container for settings page body. Provides consistent padding\n * and vertical spacing for content sections.\n *\n * Features:\n * - Top padding: 1.5rem (pt-6)\n * - Bottom padding: 4.5rem (pb-[4.5rem])\n * - Horizontal padding: 1rem (px-4)\n * - Flex column layout with 2rem gap (gap-8)\n * - Full width container\n *\n * @example\n * ```tsx\n * <SettingsLayouts.Body>\n *   <Card>\n *     <h3>Section 1</h3>\n *     <p>Content here</p>\n *   </Card>\n *   <Card>\n *     <h3>Section 2</h3>\n *     <p>More content</p>\n *   </Card>\n * </SettingsLayouts.Body>\n * ```\n */\nfunction SettingsBody(\n  props: WithoutStyles<HtmlHTMLAttributes<HTMLDivElement>>\n) {\n  return (\n    <div\n      className=\"pt-6 pb-[4.5rem] px-4 flex flex-col gap-8 w-full\"\n      {...props}\n    />\n  );\n}\n\nexport { SettingsRoot as Root, SettingsHeader as Header, SettingsBody as Body };\n"
  },
  {
    "path": "web/src/layouts/table-layouts.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport { WithoutStyles } from \"@/types\";\nimport React from \"react\";\n\n// ============================================================================\n// TABLE LAYOUTS - For building table-like structures without raw divs\n// ============================================================================\n\n/**\n * TableRow - A horizontal row layout for tables/lists\n *\n * @param selected - If true, applies selected background styling\n * @param onClick - Click handler for the row\n * @param children - Row content\n */\ninterface TableRowProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {\n  selected?: boolean;\n}\nfunction TableRow({ selected, children, onClick, ...rest }: TableRowProps) {\n  return (\n    <div\n      className={cn(\"table-row-layout\", onClick && \"cursor-pointer\")}\n      data-selected={selected ? \"true\" : undefined}\n      onClick={onClick}\n      {...rest}\n    >\n      {children}\n    </div>\n  );\n}\n\n/**\n * TableCell - A cell within a table row\n *\n * @param flex - If true, cell takes remaining space (flex: 1)\n * @param fixed - If true, cell has fixed width (doesn't shrink)\n * @param width - Optional fixed width in rem\n * @param children - Cell content\n */\ninterface TableCellProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {\n  flex?: boolean;\n  width?: number;\n}\nfunction TableCell({ flex, width, children, ...rest }: TableCellProps) {\n  return (\n    <div\n      className=\"table-cell-layout\"\n      data-flex={flex ? \"true\" : undefined}\n      data-fixed={width ? \"true\" : undefined}\n      style={width ? { width: `${width}rem` } : undefined}\n      {...rest}\n    >\n      {children}\n    </div>\n  );\n}\n\n/**\n * SidebarLayout - A fixed-width sidebar container\n *\n * @param children - Sidebar content\n */\ninterface SidebarLayoutProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}\nfunction SidebarLayout({ children, ...rest }: SidebarLayoutProps) {\n  return (\n    <div className=\"sidebar-layout\" {...rest}>\n      {children}\n    </div>\n  );\n}\n\n/**\n * TwoColumnLayout - A two-column layout with sidebar and content\n *\n * @param children - Should contain sidebar and content sections\n */\ninterface TwoColumnLayoutProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {\n  minHeight?: number;\n}\nfunction TwoColumnLayout({\n  minHeight,\n  children,\n  ...rest\n}: TwoColumnLayoutProps) {\n  return (\n    <div\n      className=\"two-column-layout\"\n      style={minHeight ? { minHeight: `${minHeight}rem` } : undefined}\n      {...rest}\n    >\n      {children}\n    </div>\n  );\n}\n\n/**\n * ContentColumn - The main content area in a two-column layout\n */\ninterface ContentColumnProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}\nfunction ContentColumn({ children, ...rest }: ContentColumnProps) {\n  return (\n    <div className=\"content-column-layout\" {...rest}>\n      {children}\n    </div>\n  );\n}\n\n/**\n * HiddenInput - A hidden input element (for file uploads, etc.)\n */\ninterface HiddenInputProps extends React.InputHTMLAttributes<HTMLInputElement> {\n  inputRef?: React.Ref<HTMLInputElement>;\n}\nfunction HiddenInput({ inputRef, ...rest }: HiddenInputProps) {\n  return <input ref={inputRef} className=\"hidden-input\" {...rest} />;\n}\n\n/**\n * CheckboxCell - A fixed-width cell for checkboxes in tables\n */\ninterface CheckboxCellProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}\nfunction CheckboxCell({ children, ...rest }: CheckboxCellProps) {\n  return (\n    <div className=\"checkbox-cell-layout\" {...rest}>\n      {children}\n    </div>\n  );\n}\n\n/**\n * SourceIconsRow - A row of source icons\n */\ninterface SourceIconsRowProps\n  extends WithoutStyles<React.HtmlHTMLAttributes<HTMLDivElement>> {}\nfunction SourceIconsRow({ children, ...rest }: SourceIconsRowProps) {\n  return (\n    <div className=\"source-icons-layout\" {...rest}>\n      {children}\n    </div>\n  );\n}\n\nexport {\n  TableRow,\n  TableCell,\n  SidebarLayout,\n  TwoColumnLayout,\n  ContentColumn,\n  HiddenInput,\n  CheckboxCell,\n  SourceIconsRow,\n};\n"
  },
  {
    "path": "web/src/lib/admin/users/userMutationFetcher.ts",
    "content": "const userMutationFetcher = async (\n  url: string,\n  { arg }: { arg: { user_email: string; new_role?: string; method?: string } }\n) => {\n  const { method = \"PATCH\", ...body } = arg;\n  return fetch(url, {\n    method,\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(body),\n  }).then(async (res) => {\n    if (res.ok) return res.json();\n\n    const errorDetail = (await res.json()).detail;\n    throw Error(errorDetail);\n  });\n};\n\nexport default userMutationFetcher;\n"
  },
  {
    "path": "web/src/lib/admin/voice/svc.ts",
    "content": "const VOICE_PROVIDERS_URL = \"/api/admin/voice/providers\";\n\nexport async function activateVoiceProvider(\n  providerId: number,\n  mode: \"stt\" | \"tts\",\n  ttsModel?: string\n): Promise<Response> {\n  const url = new URL(\n    `${VOICE_PROVIDERS_URL}/${providerId}/activate-${mode}`,\n    window.location.origin\n  );\n  if (mode === \"tts\" && ttsModel) {\n    url.searchParams.set(\"tts_model\", ttsModel);\n  }\n  return fetch(url.toString(), { method: \"POST\" });\n}\n\nexport async function deactivateVoiceProvider(\n  providerId: number,\n  mode: \"stt\" | \"tts\"\n): Promise<Response> {\n  return fetch(`${VOICE_PROVIDERS_URL}/${providerId}/deactivate-${mode}`, {\n    method: \"POST\",\n  });\n}\n\nexport async function testVoiceProvider(request: {\n  provider_type: string;\n  api_key?: string;\n  target_uri?: string;\n  use_stored_key?: boolean;\n}): Promise<Response> {\n  return fetch(`${VOICE_PROVIDERS_URL}/test`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n  });\n}\n\nexport async function upsertVoiceProvider(\n  request: Record<string, unknown>\n): Promise<Response> {\n  return fetch(VOICE_PROVIDERS_URL, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n  });\n}\n\nexport async function fetchVoicesByType(\n  providerType: string\n): Promise<Response> {\n  return fetch(`/api/admin/voice/voices?provider_type=${providerType}`);\n}\n\nexport async function deleteVoiceProvider(\n  providerId: number\n): Promise<Response> {\n  return fetch(`${VOICE_PROVIDERS_URL}/${providerId}`, { method: \"DELETE\" });\n}\n\nexport async function fetchLLMProviders(): Promise<Response> {\n  return fetch(\"/api/admin/llm/provider\");\n}\n"
  },
  {
    "path": "web/src/lib/admin-routes.ts",
    "content": "import { IconFunctionComponent } from \"@opal/types\";\nimport {\n  SvgActions,\n  SvgActivity,\n  SvgArrowExchange,\n  SvgAudio,\n  SvgShareWebhook,\n  SvgBarChart,\n  SvgBookOpen,\n  SvgBubbleText,\n  SvgClipboard,\n  SvgCpu,\n  SvgDiscordMono,\n  SvgDownload,\n  SvgEmpty,\n  SvgFileText,\n  SvgFiles,\n  SvgGlobe,\n  SvgHistory,\n  SvgImage,\n  SvgMcp,\n  SvgNetworkGraph,\n  SvgOnyxOctagon,\n  SvgPaintBrush,\n  SvgProgressBars,\n  SvgSearchMenu,\n  SvgSlack,\n  SvgTerminal,\n  SvgThumbsUp,\n  SvgUploadCloud,\n  SvgUser,\n  SvgUserKey,\n  SvgUserSync,\n  SvgUsers,\n  SvgWallet,\n  SvgZoomIn,\n} from \"@opal/icons\";\n\nexport interface AdminRouteEntry {\n  path: string;\n  icon: IconFunctionComponent;\n  title: string;\n  sidebarLabel: string;\n}\n\n/**\n * Single source of truth for every admin route: path, icon, page-header\n * title, and sidebar label.\n */\nexport const ADMIN_ROUTES = {\n  INDEXING_STATUS: {\n    path: \"/admin/indexing/status\",\n    icon: SvgBookOpen,\n    title: \"Existing Connectors\",\n    sidebarLabel: \"Existing Connectors\",\n  },\n  ADD_CONNECTOR: {\n    path: \"/admin/add-connector\",\n    icon: SvgUploadCloud,\n    title: \"Add Connector\",\n    sidebarLabel: \"Add Connector\",\n  },\n  DOCUMENT_SETS: {\n    path: \"/admin/documents/sets\",\n    icon: SvgFiles,\n    title: \"Document Sets\",\n    sidebarLabel: \"Document Sets\",\n  },\n  DOCUMENT_EXPLORER: {\n    path: \"/admin/documents/explorer\",\n    icon: SvgZoomIn,\n    title: \"Document Explorer\",\n    sidebarLabel: \"Explorer\",\n  },\n  DOCUMENT_FEEDBACK: {\n    path: \"/admin/documents/feedback\",\n    icon: SvgThumbsUp,\n    title: \"Document Feedback\",\n    sidebarLabel: \"Feedback\",\n  },\n  AGENTS: {\n    path: \"/admin/agents\",\n    icon: SvgOnyxOctagon,\n    title: \"Agents\",\n    sidebarLabel: \"Agents\",\n  },\n  SLACK_BOTS: {\n    path: \"/admin/bots\",\n    icon: SvgSlack,\n    title: \"Slack Integration\",\n    sidebarLabel: \"Slack Integration\",\n  },\n  DISCORD_BOTS: {\n    path: \"/admin/discord-bot\",\n    icon: SvgDiscordMono,\n    title: \"Discord Integration\",\n    sidebarLabel: \"Discord Integration\",\n  },\n  MCP_ACTIONS: {\n    path: \"/admin/actions/mcp\",\n    icon: SvgMcp,\n    title: \"MCP Actions\",\n    sidebarLabel: \"MCP Actions\",\n  },\n  OPENAPI_ACTIONS: {\n    path: \"/admin/actions/open-api\",\n    icon: SvgActions,\n    title: \"OpenAPI Actions\",\n    sidebarLabel: \"OpenAPI Actions\",\n  },\n  STANDARD_ANSWERS: {\n    path: \"/admin/standard-answer\",\n    icon: SvgClipboard,\n    title: \"Standard Answers\",\n    sidebarLabel: \"Standard Answers\",\n  },\n  GROUPS: {\n    path: \"/admin/groups\",\n    icon: SvgUsers,\n    title: \"Manage User Groups\",\n    sidebarLabel: \"Groups\",\n  },\n  CHAT_PREFERENCES: {\n    path: \"/admin/configuration/chat-preferences\",\n    icon: SvgBubbleText,\n    title: \"Chat Preferences\",\n    sidebarLabel: \"Chat Preferences\",\n  },\n  LLM_MODELS: {\n    path: \"/admin/configuration/llm\",\n    icon: SvgCpu,\n    title: \"Language Models\",\n    sidebarLabel: \"Language Models\",\n  },\n  WEB_SEARCH: {\n    path: \"/admin/configuration/web-search\",\n    icon: SvgGlobe,\n    title: \"Web Search\",\n    sidebarLabel: \"Web Search\",\n  },\n  IMAGE_GENERATION: {\n    path: \"/admin/configuration/image-generation\",\n    icon: SvgImage,\n    title: \"Image Generation\",\n    sidebarLabel: \"Image Generation\",\n  },\n  VOICE: {\n    path: \"/admin/configuration/voice\",\n    icon: SvgAudio,\n    title: \"Voice\",\n    sidebarLabel: \"Voice\",\n  },\n  CODE_INTERPRETER: {\n    path: \"/admin/configuration/code-interpreter\",\n    icon: SvgTerminal,\n    title: \"Code Interpreter\",\n    sidebarLabel: \"Code Interpreter\",\n  },\n  INDEX_SETTINGS: {\n    path: \"/admin/configuration/search\",\n    icon: SvgSearchMenu,\n    title: \"Index Settings\",\n    sidebarLabel: \"Index Settings\",\n  },\n  DOCUMENT_PROCESSING: {\n    path: \"/admin/configuration/document-processing\",\n    icon: SvgFileText,\n    title: \"Document Processing\",\n    sidebarLabel: \"Document Processing\",\n  },\n  KNOWLEDGE_GRAPH: {\n    path: \"/admin/kg\",\n    icon: SvgNetworkGraph,\n    title: \"Knowledge Graph\",\n    sidebarLabel: \"Knowledge Graph\",\n  },\n  USERS: {\n    path: \"/admin/users\",\n    icon: SvgUser,\n    title: \"Users & Requests\",\n    sidebarLabel: \"Users\",\n  },\n  API_KEYS: {\n    path: \"/admin/service-accounts\",\n    icon: SvgUserKey,\n    title: \"Service Accounts\",\n    sidebarLabel: \"Service Accounts\",\n  },\n  TOKEN_RATE_LIMITS: {\n    path: \"/admin/token-rate-limits\",\n    icon: SvgProgressBars,\n    title: \"Spending Limits\",\n    sidebarLabel: \"Spending Limits\",\n  },\n  USAGE: {\n    path: \"/admin/performance/usage\",\n    icon: SvgActivity,\n    title: \"Usage Statistics\",\n    sidebarLabel: \"Usage Statistics\",\n  },\n  QUERY_HISTORY: {\n    path: \"/admin/performance/query-history\",\n    icon: SvgHistory,\n    title: \"Query History\",\n    sidebarLabel: \"Query History\",\n  },\n  CUSTOM_ANALYTICS: {\n    path: \"/admin/performance/custom-analytics\",\n    icon: SvgBarChart,\n    title: \"Custom Analytics\",\n    sidebarLabel: \"Custom Analytics\",\n  },\n  THEME: {\n    path: \"/admin/theme\",\n    icon: SvgPaintBrush,\n    title: \"Appearance & Theming\",\n    sidebarLabel: \"Appearance & Theming\",\n  },\n  BILLING: {\n    path: \"/admin/billing\",\n    icon: SvgWallet,\n    title: \"Plans & Billing\",\n    sidebarLabel: \"Plans & Billing\",\n  },\n  INDEX_MIGRATION: {\n    path: \"/admin/document-index-migration\",\n    icon: SvgArrowExchange,\n    title: \"Document Index Migration\",\n    sidebarLabel: \"Document Index Migration\",\n  },\n  HOOKS: {\n    path: \"/admin/hooks\",\n    icon: SvgShareWebhook,\n    title: \"Hook Extensions\",\n    sidebarLabel: \"Hook Extensions\",\n  },\n  SCIM: {\n    path: \"/admin/scim\",\n    icon: SvgUserSync,\n    title: \"SCIM\",\n    sidebarLabel: \"SCIM\",\n  },\n  DEBUG: {\n    path: \"/admin/debug\",\n    icon: SvgDownload,\n    title: \"Debug Logs\",\n    sidebarLabel: \"Debug Logs\",\n  },\n  // Prefix-only entries used for layout matching — not rendered as sidebar\n  // items or page headers.\n  DOCUMENTS: {\n    path: \"/admin/documents\",\n    icon: SvgEmpty,\n    title: \"\",\n    sidebarLabel: \"\",\n  },\n  PERFORMANCE: {\n    path: \"/admin/performance\",\n    icon: SvgEmpty,\n    title: \"\",\n    sidebarLabel: \"\",\n  },\n} as const satisfies Record<string, AdminRouteEntry>;\n\n/**\n * Helper that converts a route entry into the `{ name, icon, link }`\n * shape expected by the sidebar.\n */\nexport function sidebarItem(route: AdminRouteEntry) {\n  return { name: route.sidebarLabel, icon: route.icon, link: route.path };\n}\n"
  },
  {
    "path": "web/src/lib/agents.ts",
    "content": "import { MinimalPersonaSnapshot, Persona } from \"@/app/admin/agents/interfaces\";\nimport { User } from \"./types\";\nimport { checkUserIsNoAuthUser } from \"./user\";\nimport { personaComparator } from \"@/app/admin/agents/lib\";\n\n/**\n * Checks if the given user owns the specified assistant.\n *\n * @param user - The user to check ownership for, or null if no user is logged in\n * @param assistant - The assistant to check ownership of\n * @returns true if the user owns the agent (or no auth is required), false otherwise\n */\nexport function checkUserOwnsAgent(\n  user: User | null,\n  agent: MinimalPersonaSnapshot | Persona\n) {\n  return checkUserIdOwnsAgent(user?.id, agent);\n}\n\n/**\n * Checks if the given user ID owns the specified assistant.\n *\n * Returns true if a valid user ID is provided and any of the following conditions\n * are met (and the agent is not built-in):\n * - The user is a no-auth user (authentication is disabled)\n * - The user ID matches the agent owner's ID\n *\n * Returns false if userId is undefined (e.g., user is loading or unauthenticated)\n * to prevent granting ownership access prematurely.\n *\n * @param userId - The user ID to check ownership for\n * @param assistant - The assistant to check ownership of\n * @returns true if the user owns the agent, false otherwise\n */\nexport function checkUserIdOwnsAgent(\n  userId: string | undefined,\n  agent: MinimalPersonaSnapshot | Persona\n) {\n  return (\n    !!userId &&\n    (checkUserIsNoAuthUser(userId) || agent.owner?.id === userId) &&\n    !agent.builtin_persona\n  );\n}\n\n/**\n * Updates the user's pinned assistants with the given ordered list of agent IDs.\n *\n * @param pinnedAgentIds - Array of agent IDs in the desired pinned order\n * @throws Error if the API request fails\n */\nexport async function pinAgents(pinnedAgentIds: number[]) {\n  // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n  const response = await fetch(`/api/user/pinned-assistants`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      ordered_assistant_ids: pinnedAgentIds, // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n    }),\n  });\n  if (!response.ok) {\n    throw new Error(\"Failed to update pinned assistants\");\n  }\n}\n\n/**\n * Filters and sorts assistants based on visibility.\n *\n * Only returns assistants that are marked as visible, sorted using the persona comparator.\n *\n * @param assistants - Array of assistants to filter\n * @returns Filtered and sorted array of visible assistants\n */\nexport function filterAgents(\n  assistants: MinimalPersonaSnapshot[]\n): MinimalPersonaSnapshot[] {\n  let filteredAgents = assistants.filter((assistant) => assistant.is_listed);\n  return filteredAgents.sort(personaComparator);\n}\n\n/**\n * Deletes an agent by its ID.\n *\n * @param agentId - The ID of the agent to delete\n * @returns null on success, or an error message string on failure\n */\nexport async function deleteAgent(agentId: number): Promise<string | null> {\n  try {\n    const response = await fetch(`/api/persona/${agentId}`, {\n      method: \"DELETE\",\n    });\n\n    if (response.ok) {\n      return null;\n    }\n\n    const errorMessage = (await response.json()).detail || \"Unknown error\";\n    return errorMessage;\n  } catch (error) {\n    console.error(\"deleteAgent: Network error\", error);\n    return \"Network error. Please check your connection and try again.\";\n  }\n}\n\n/**\n * Updates agent sharing settings.\n *\n * For MIT versions, group_ids should not be sent since group-based sharing\n * is an EE-only feature.\n *\n * @param agentId - The ID of the agent to update\n * @param userIds - Array of user IDs to share with\n * @param groupIds - Array of group IDs to share with (ignored when isPaidEnterpriseFeaturesEnabled is false)\n * @param isPublic - Whether the agent should be public\n * @param isPaidEnterpriseFeaturesEnabled - Whether enterprise features are enabled\n * @returns null on success, or an error message string on failure\n *\n * @example\n * const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n * const error = await updateAgentSharedStatus(agentId, userIds, groupIds, isPublic, isPaidEnterpriseFeaturesEnabled);\n * if (error) console.error(error);\n */\nexport async function updateAgentSharedStatus(\n  agentId: number,\n  userIds: string[],\n  groupIds: number[],\n  isPublic: boolean | undefined,\n  isPaidEnterpriseFeaturesEnabled: boolean,\n  labelIds?: number[]\n): Promise<null | string> {\n  // MIT versions should not send group_ids - warn if caller provided non-empty groups\n  if (!isPaidEnterpriseFeaturesEnabled && groupIds.length > 0) {\n    console.error(\n      \"updateAgentSharedStatus: groupIds provided but enterprise features are disabled. \" +\n        \"Group sharing is an EE-only feature. Discarding groupIds.\"\n    );\n  }\n\n  try {\n    const response = await fetch(`/api/persona/${agentId}/share`, {\n      method: \"PATCH\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        user_ids: userIds,\n        // Only include group_ids for enterprise versions\n        group_ids: isPaidEnterpriseFeaturesEnabled ? groupIds : undefined,\n        is_public: isPublic,\n        label_ids: labelIds,\n      }),\n    });\n\n    if (response.ok) {\n      return null;\n    }\n\n    const errorMessage = (await response.json()).detail || \"Unknown error\";\n    return errorMessage;\n  } catch (error) {\n    console.error(\"updateAgentSharedStatus: Network error\", error);\n    return \"Network error. Please check your connection and try again.\";\n  }\n}\n\n/**\n * Updates the labels assigned to an agent via the share endpoint.\n *\n * @param agentId - The ID of the agent to update\n * @param labelIds - Array of label IDs to assign to the agent\n * @returns null on success, or an error message string on failure\n */\nexport async function updateAgentLabels(\n  agentId: number,\n  labelIds: number[]\n): Promise<string | null> {\n  try {\n    const response = await fetch(`/api/persona/${agentId}/share`, {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({ label_ids: labelIds }),\n    });\n\n    if (response.ok) {\n      return null;\n    }\n\n    const errorMessage = (await response.json()).detail || \"Unknown error\";\n    return errorMessage;\n  } catch (error) {\n    console.error(\"updateAgentLabels: Network error\", error);\n    return \"Network error. Please check your connection and try again.\";\n  }\n}\n\n/**\n * Updates the featured (default) status of an agent.\n *\n * @param agentId - The ID of the agent to update\n * @param isFeatured - Whether the agent should be featured\n * @returns null on success, or an error message string on failure\n */\nexport async function updateAgentFeaturedStatus(\n  agentId: number,\n  isFeatured: boolean\n): Promise<string | null> {\n  try {\n    const response = await fetch(`/api/admin/persona/${agentId}/featured`, {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({ is_featured: isFeatured }),\n    });\n\n    if (response.ok) {\n      return null;\n    }\n\n    const errorMessage = (await response.json()).detail || \"Unknown error\";\n    return errorMessage;\n  } catch (error) {\n    console.error(\"updateAgentFeaturedStatus: Network error\", error);\n    return \"Network error. Please check your connection and try again.\";\n  }\n}\n"
  },
  {
    "path": "web/src/lib/agentsSS.ts",
    "content": "import { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { fetchSS } from \"./utilsSS\";\n\nexport type FetchAgentsResponse = [MinimalPersonaSnapshot[], string | null];\n\n// Fetch agents server-side\nexport async function fetchAgentsSS(): Promise<FetchAgentsResponse> {\n  const response = await fetchSS(\"/persona\");\n  if (response.ok) {\n    return [(await response.json()) as MinimalPersonaSnapshot[], null];\n  }\n  return [[], (await response.json()).detail || \"Unknown Error\"];\n}\n"
  },
  {
    "path": "web/src/lib/analytics.ts",
    "content": "import posthog from \"posthog-js\";\n\n// ─── Event Registry ────────────────────────────────────────────────────────\n// All tracked event names. Add new events here to get type-safe tracking.\n\nexport enum AnalyticsEvent {\n  CONFIGURED_LLM_PROVIDER = \"configured_llm_provider\",\n  COMPLETED_CRAFT_ONBOARDING = \"completed_craft_onboarding\",\n  COMPLETED_CRAFT_USER_INFO = \"completed_craft_user_info\",\n  SENT_CRAFT_MESSAGE = \"sent_craft_message\",\n  SAW_CRAFT_INTRO = \"saw_craft_intro\",\n  CLICKED_GO_HOME = \"clicked_go_home\",\n  CLICKED_TRY_CRAFT = \"clicked_try_craft\",\n  CLICKED_CRAFT_IN_SIDEBAR = \"clicked_craft_in_sidebar\",\n  RELEASE_NOTIFICATION_CLICKED = \"release_notification_clicked\",\n  EXTENSION_CHAT_QUERY = \"extension_chat_query\",\n}\n\n// ─── Shared Enums ──────────────────────────────────────────────────────────\n\nexport enum LLMProviderConfiguredSource {\n  ADMIN_PAGE = \"admin_page\",\n  CHAT_ONBOARDING = \"chat_onboarding\",\n  CRAFT_ONBOARDING = \"craft_onboarding\",\n}\n\n// ─── Event Property Types ──────────────────────────────────────────────────\n// Maps each event to its required properties. Use `void` for events with no\n// properties — this makes the second argument to `track()` optional for those\n// events while requiring it for events that carry data.\n\ninterface AnalyticsEventProperties {\n  [AnalyticsEvent.CONFIGURED_LLM_PROVIDER]: {\n    provider: string;\n    is_creation: boolean;\n    source: LLMProviderConfiguredSource;\n  };\n  [AnalyticsEvent.COMPLETED_CRAFT_ONBOARDING]: void;\n  [AnalyticsEvent.COMPLETED_CRAFT_USER_INFO]: {\n    first_name: string;\n    last_name: string | undefined;\n    work_area: string | undefined;\n    level: string | undefined;\n  };\n  [AnalyticsEvent.SENT_CRAFT_MESSAGE]: void;\n  [AnalyticsEvent.SAW_CRAFT_INTRO]: void;\n  [AnalyticsEvent.CLICKED_GO_HOME]: void;\n  [AnalyticsEvent.CLICKED_TRY_CRAFT]: void;\n  [AnalyticsEvent.CLICKED_CRAFT_IN_SIDEBAR]: void;\n  [AnalyticsEvent.RELEASE_NOTIFICATION_CLICKED]: {\n    version: string | undefined;\n  };\n  [AnalyticsEvent.EXTENSION_CHAT_QUERY]: {\n    extension_context: string | null | undefined;\n    assistant_id: number | undefined;\n    has_files: boolean;\n    deep_research: boolean;\n  };\n}\n\n// ─── Typed Track Function ──────────────────────────────────────────────────\n\nexport function track<E extends AnalyticsEvent>(\n  ...args: AnalyticsEventProperties[E] extends void\n    ? [event: E]\n    : [event: E, properties: AnalyticsEventProperties[E]]\n): void {\n  const [event, properties] = args as [E, Record<string, unknown>?];\n  posthog.capture(event, properties ?? {});\n}\n"
  },
  {
    "path": "web/src/lib/appSidebarSS.ts",
    "content": "import { cookies } from \"next/headers\";\nimport { SIDEBAR_TOGGLED_COOKIE_NAME } from \"@/components/resizable/constants\";\nimport { User } from \"@/lib/types\";\n\nexport interface AppSidebarMetadata {\n  folded: boolean;\n}\n\nexport async function fetchAppSidebarMetadata(\n  user?: User | null\n): Promise<AppSidebarMetadata> {\n  const requestCookies = await cookies();\n  const sidebarToggled = requestCookies.get(SIDEBAR_TOGGLED_COOKIE_NAME);\n\n  const folded = !user?.is_anonymous_user && sidebarToggled?.value === \"true\";\n\n  return {\n    folded,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/auth/redirectValidation.ts",
    "content": "/**\n * Validates a redirect URL to prevent Open Redirect vulnerabilities.\n * Only allows internal paths (relative URLs starting with /).\n *\n * @param url - The URL to validate (typically from query params like ?next=...)\n * @returns The validated URL if safe, otherwise null\n *\n * Security: Rejects:\n * - External URLs (https://evil.com)\n * - Protocol-relative URLs (//evil.com)\n * - JavaScript URLs (javascript:alert(1))\n * - Data URLs (data:text/html,...)\n * - Absolute URLs with protocols\n */\nexport function validateInternalRedirect(\n  url: string | null | undefined\n): string | null {\n  if (!url) {\n    return null;\n  }\n\n  // Trim whitespace\n  const trimmedUrl = url.trim();\n\n  // Must start with / (internal path)\n  if (!trimmedUrl.startsWith(\"/\")) {\n    return null;\n  }\n\n  // Reject protocol-relative URLs (//evil.com)\n  if (trimmedUrl.startsWith(\"//\")) {\n    return null;\n  }\n\n  // Reject URLs with protocol schemes in the path (before query/hash)\n  //\n  // Regex breakdown: /^[^?#]*:/\n  //   ^        - Start of string\n  //   [^?#]*   - Match any characters EXCEPT ? and # (zero or more times)\n  //              This matches everything before the query string or hash\n  //   :        - Match a literal colon\n  //\n  // This rejects: /javascript:alert(1), /http://evil.com, /data:text/html\n  // But allows:   /chat?time=12:30:00, /admin#section:1\n  //               (colons after ? or # are safe)\n  if (trimmedUrl.match(/^[^?#]*:/)) {\n    return null;\n  }\n\n  // Additional safety: check for backslash sequences that could bypass validation\n  if (trimmedUrl.includes(\"\\\\\")) {\n    return null;\n  }\n\n  return trimmedUrl;\n}\n"
  },
  {
    "path": "web/src/lib/auth/requireAuth.ts",
    "content": "import { User, UserRole } from \"@/lib/types\";\nimport {\n  AuthTypeMetadata,\n  getAuthTypeMetadataSS,\n  getCurrentUserSS,\n} from \"@/lib/userSS\";\nimport { AuthType } from \"@/lib/constants\";\n\n/**\n * Result of an authentication check.\n * If redirect is set, the caller should redirect immediately.\n */\nexport interface AuthCheckResult {\n  user: User | null;\n  authTypeMetadata: AuthTypeMetadata | null;\n  redirect?: string;\n}\n\n/**\n * Requires that the user is authenticated.\n * If not authenticated and auth is enabled, returns a redirect to login.\n * Also checks email verification if required.\n *\n * @returns AuthCheckResult with user, auth metadata, and optional redirect\n *\n * @example\n * ```typescript\n * const authResult = await requireAuth();\n * if (authResult.redirect) {\n *   return redirect(authResult.redirect);\n * }\n * // User is authenticated, proceed with logic\n * const { user } = authResult;\n * ```\n */\nexport async function requireAuth(): Promise<AuthCheckResult> {\n  // Fetch auth information\n  let user: User | null = null;\n  let authTypeMetadata: AuthTypeMetadata | null = null;\n\n  try {\n    [authTypeMetadata, user] = await Promise.all([\n      getAuthTypeMetadataSS(),\n      getCurrentUserSS(),\n    ]);\n  } catch (e) {\n    console.log(`Failed to fetch auth information - ${e}`);\n  }\n\n  // If user is not logged in, redirect to login\n  if (!user) {\n    return {\n      user,\n      authTypeMetadata,\n      redirect: \"/auth/login\",\n    };\n  }\n\n  // Check email verification if required\n  if (user && !user.is_verified && authTypeMetadata?.requiresVerification) {\n    return {\n      user,\n      authTypeMetadata,\n      redirect: \"/auth/waiting-on-verification\",\n    };\n  }\n\n  return {\n    user,\n    authTypeMetadata,\n  };\n}\n\n// Allowlist of roles that can access admin pages (all roles except BASIC)\nconst ADMIN_ALLOWED_ROLES = [\n  UserRole.ADMIN,\n  UserRole.CURATOR,\n  UserRole.GLOBAL_CURATOR,\n];\n\n/**\n * Requires that the user is authenticated AND has admin role.\n * If not authenticated, redirects to login.\n * If authenticated but not admin, redirects to /chat.\n * Also checks email verification if required.\n *\n * @returns AuthCheckResult with user, auth metadata, and optional redirect\n *\n * @example\n * ```typescript\n * const authResult = await requireAdminAuth();\n * if (authResult.redirect) {\n *   return redirect(authResult.redirect);\n * }\n * // User is authenticated admin, proceed with admin logic\n * const { user } = authResult;\n * ```\n */\nexport async function requireAdminAuth(): Promise<AuthCheckResult> {\n  const authResult = await requireAuth();\n\n  // If already has a redirect (not authenticated or not verified), return it\n  if (authResult.redirect) {\n    return authResult;\n  }\n\n  const { user, authTypeMetadata } = authResult;\n\n  // Check if user has an allowed role\n  if (user && !ADMIN_ALLOWED_ROLES.includes(user.role)) {\n    return {\n      user,\n      authTypeMetadata,\n      redirect: \"/app\",\n    };\n  }\n\n  return authResult;\n}\n"
  },
  {
    "path": "web/src/lib/azureTargetUri.ts",
    "content": "const getApiVersionParam = (url: URL): string => {\n  const directApiVersion = url.searchParams.get(\"api-version\");\n  if (directApiVersion?.trim()) {\n    return directApiVersion.trim();\n  }\n\n  let normalized: string | null = null;\n  url.searchParams.forEach((value, key) => {\n    if (normalized) {\n      return;\n    }\n    if (key.toLowerCase() === \"api-version\" && value?.trim()) {\n      normalized = value.trim();\n    }\n  });\n\n  return normalized ?? \"\";\n};\n\nconst getDeploymentNameParam = (url: URL): string => {\n  const match = url.pathname.match(/\\/openai\\/deployments\\/([^/]+)/i);\n  const deployment = match?.[1] ?? \"\";\n  return deployment ? deployment.toLowerCase() : \"\";\n};\n\nconst isResponsesPath = (url: URL): boolean =>\n  /\\/openai\\/responses/i.test(url.pathname);\n\nexport const parseAzureTargetUri = (\n  rawUri: string\n): {\n  url: URL;\n  apiVersion: string;\n  deploymentName: string;\n  isResponsesPath: boolean;\n} => {\n  const url = new URL(rawUri);\n  return {\n    url,\n    apiVersion: getApiVersionParam(url),\n    deploymentName: getDeploymentNameParam(url),\n    isResponsesPath: isResponsesPath(url),\n  };\n};\n\nexport const isValidAzureTargetUri = (rawUri: string): boolean => {\n  try {\n    const { apiVersion, deploymentName, isResponsesPath } =\n      parseAzureTargetUri(rawUri);\n\n    return Boolean(apiVersion) && (Boolean(deploymentName) || isResponsesPath);\n  } catch {\n    return false;\n  }\n};\n"
  },
  {
    "path": "web/src/lib/billing/index.ts",
    "content": "/**\n * Billing module - re-exports for convenience.\n */\n\n// Types and interfaces\nexport * from \"./interfaces\";\n\n// Service functions\nexport * from \"./svc\";\n\n// Hooks\nexport { useBillingInformation } from \"@/hooks/useBillingInformation\";\nexport { useLicense } from \"@/hooks/useLicense\";\n"
  },
  {
    "path": "web/src/lib/billing/interfaces.ts",
    "content": "/**\n * Billing and License interfaces.\n *\n * These types match the backend Pydantic models:\n * - LicenseStatusResponse (backend/ee/onyx/server/license/models.py)\n * - BillingInformationResponse (backend/ee/onyx/server/billing/models.py)\n */\n\n// ----------------------------------------------------------------------------\n// License Types (Self-hosted only)\n// ----------------------------------------------------------------------------\n\nexport type PlanType = \"monthly\" | \"annual\";\n\nexport type LicenseSource = \"auto_fetch\" | \"manual_upload\";\n\nexport type ApplicationStatus =\n  | \"active\"\n  | \"payment_reminder\"\n  | \"gated_access\"\n  | \"expired\"\n  | \"seat_limit_exceeded\";\n\n/**\n * Billing status from Stripe subscription.\n */\nexport enum BillingStatus {\n  TRIALING = \"trialing\",\n  ACTIVE = \"active\",\n  CANCELLED = \"cancelled\",\n  EXPIRED = \"expired\",\n  PAST_DUE = \"past_due\",\n  UNPAID = \"unpaid\",\n}\n\n/**\n * License status response from /api/license endpoint.\n * Only relevant for self-hosted deployments.\n */\nexport interface LicenseStatus {\n  has_license: boolean;\n  seats: number;\n  used_seats: number;\n  plan_type: PlanType | null;\n  issued_at: string | null;\n  expires_at: string | null;\n  grace_period_end: string | null;\n  status: ApplicationStatus | null;\n  source: LicenseSource | null;\n}\n\n// ----------------------------------------------------------------------------\n// Billing Types (Cloud and Self-hosted)\n// ----------------------------------------------------------------------------\n\n/**\n * Billing information from Stripe subscription.\n * Available for both cloud and self-hosted with active subscription.\n */\nexport interface BillingInformation {\n  tenant_id: string;\n  status: string | null;\n  plan_type: string | null;\n  seats: number | null;\n  billing_period: string | null;\n  current_period_start: string | null;\n  current_period_end: string | null;\n  cancel_at_period_end: boolean;\n  canceled_at: string | null;\n  trial_start: string | null;\n  trial_end: string | null;\n  payment_method_enabled: boolean;\n}\n\n/**\n * Response when no subscription exists.\n */\nexport interface SubscriptionStatus {\n  subscribed: boolean;\n}\n\n// ----------------------------------------------------------------------------\n// Checkout & Portal Types\n// ----------------------------------------------------------------------------\n\nexport interface CreateCheckoutSessionRequest {\n  billing_period?: \"monthly\" | \"annual\";\n  seats?: number;\n  email?: string;\n}\n\nexport interface CreateCheckoutSessionResponse {\n  stripe_checkout_url: string;\n}\n\nexport interface CreateCustomerPortalSessionRequest {\n  return_url?: string;\n}\n\nexport interface CreateCustomerPortalSessionResponse {\n  stripe_customer_portal_url: string;\n}\n\n// ----------------------------------------------------------------------------\n// Seat Management Types\n// ----------------------------------------------------------------------------\n\nexport interface SeatUpdateRequest {\n  new_seat_count: number;\n}\n\nexport interface SeatUpdateResponse {\n  success: boolean;\n  current_seats: number;\n  used_seats: number;\n  message: string | null;\n}\n\n// ----------------------------------------------------------------------------\n// Type Guards\n// ----------------------------------------------------------------------------\n\n/**\n * Check if the response indicates an active subscription.\n * Returns true only if the data is BillingInformation with a non-null status.\n */\nexport function hasActiveSubscription(\n  data: BillingInformation | SubscriptionStatus\n): data is BillingInformation {\n  // SubscriptionStatus (bare { subscribed: boolean }) is never BillingInformation\n  if (\"subscribed\" in data) {\n    return false;\n  }\n  return data.status !== null;\n}\n\n/**\n * Check if the response indicates an active *paid* subscription.\n * Returns true only for status === \"active\" (excludes trialing, past_due, etc.).\n */\nexport function hasPaidSubscription(\n  data: BillingInformation | SubscriptionStatus\n): data is BillingInformation {\n  if (\"subscribed\" in data) {\n    return false;\n  }\n  return data.status === BillingStatus.ACTIVE;\n}\n\n/**\n * Check if a license is valid and active.\n */\nexport function isLicenseValid(license: LicenseStatus): boolean {\n  return license.has_license && license.status === \"active\";\n}\n\n// ----------------------------------------------------------------------------\n// Display Utilities\n// ----------------------------------------------------------------------------\n\n/**\n * Convert status string to human-readable display format.\n */\nexport function statusToDisplay(status: string | null): string {\n  if (!status) return \"Unknown\";\n\n  switch (status) {\n    case \"trialing\":\n      return \"Trialing\";\n    case \"active\":\n      return \"Active\";\n    case \"canceled\":\n    case \"cancelled\":\n      return \"Canceled\";\n    case \"past_due\":\n      return \"Past Due\";\n    case \"unpaid\":\n      return \"Unpaid\";\n    case \"expired\":\n      return \"Expired\";\n    default:\n      return status.charAt(0).toUpperCase() + status.slice(1);\n  }\n}\n"
  },
  {
    "path": "web/src/lib/billing/svc.test.ts",
    "content": "/**\n * Tests for billing action functions.\n */\n\nimport {\n  createCheckoutSession,\n  createCustomerPortalSession,\n  updateSeatCount,\n  refreshLicenseCache,\n  uploadLicense,\n} from \"./svc\";\n\n// Mock NEXT_PUBLIC_CLOUD_ENABLED\njest.mock(\"@/lib/constants\", () => ({\n  NEXT_PUBLIC_CLOUD_ENABLED: false,\n}));\n\ndescribe(\"billing actions\", () => {\n  let fetchSpy: jest.SpyInstance;\n\n  beforeEach(() => {\n    fetchSpy = jest.spyOn(global, \"fetch\");\n  });\n\n  afterEach(() => {\n    fetchSpy.mockRestore();\n  });\n\n  describe(\"createCheckoutSession\", () => {\n    test(\"calls correct endpoint with request body\", async () => {\n      // Mock POST /api/admin/billing/create-checkout-session\n      fetchSpy.mockResolvedValueOnce({\n        ok: true,\n        json: async () => ({ url: \"https://checkout.stripe.com/session123\" }),\n      } as Response);\n\n      const result = await createCheckoutSession({\n        billing_period: \"monthly\",\n        email: \"test@example.com\",\n      });\n\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/billing/create-checkout-session\",\n        expect.objectContaining({\n          method: \"POST\",\n          headers: { \"Content-Type\": \"application/json\" },\n        })\n      );\n\n      const callArgs = fetchSpy.mock.calls[0];\n      const requestBody = JSON.parse(callArgs[1].body);\n      expect(requestBody).toEqual({\n        billing_period: \"monthly\",\n        email: \"test@example.com\",\n      });\n\n      expect(result).toEqual({ url: \"https://checkout.stripe.com/session123\" });\n    });\n\n    test(\"throws error on failed response\", async () => {\n      // Mock POST /api/admin/billing/create-checkout-session (error)\n      fetchSpy.mockResolvedValueOnce({\n        ok: false,\n        json: async () => ({ detail: \"Invalid request\" }),\n      } as Response);\n\n      await expect(createCheckoutSession()).rejects.toThrow(\"Invalid request\");\n    });\n\n    test(\"throws default error when no detail provided\", async () => {\n      // Mock POST /api/admin/billing/create-checkout-session (error, no detail)\n      fetchSpy.mockResolvedValueOnce({\n        ok: false,\n        json: async () => ({}),\n      } as Response);\n\n      await expect(createCheckoutSession()).rejects.toThrow(\n        \"Billing request failed\"\n      );\n    });\n  });\n\n  describe(\"createCustomerPortalSession\", () => {\n    test(\"calls correct endpoint and returns portal URL\", async () => {\n      // Mock POST /api/admin/billing/create-customer-portal-session\n      fetchSpy.mockResolvedValueOnce({\n        ok: true,\n        json: async () => ({ url: \"https://billing.stripe.com/portal123\" }),\n      } as Response);\n\n      const result = await createCustomerPortalSession({\n        return_url: \"https://example.com/billing\",\n      });\n\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/billing/create-customer-portal-session\",\n        expect.objectContaining({ method: \"POST\" })\n      );\n\n      expect(result).toEqual({ url: \"https://billing.stripe.com/portal123\" });\n    });\n  });\n\n  describe(\"updateSeatCount\", () => {\n    test(\"calls correct endpoint with seat count\", async () => {\n      // Mock POST /api/admin/billing/seats/update\n      fetchSpy.mockResolvedValueOnce({\n        ok: true,\n        json: async () => ({\n          success: true,\n          current_seats: 10,\n          used_seats: 5,\n          message: null,\n        }),\n      } as Response);\n\n      const result = await updateSeatCount({ new_seat_count: 10 });\n\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/billing/seats/update\",\n        expect.objectContaining({ method: \"POST\" })\n      );\n\n      const callArgs = fetchSpy.mock.calls[0];\n      const requestBody = JSON.parse(callArgs[1].body);\n      expect(requestBody).toEqual({ new_seat_count: 10 });\n\n      expect(result.current_seats).toBe(10);\n    });\n  });\n\n  describe(\"refreshLicenseCache (self-hosted only)\", () => {\n    test(\"calls license refresh endpoint\", async () => {\n      // Mock POST /api/license/refresh\n      fetchSpy.mockResolvedValueOnce({\n        ok: true,\n        json: async () => ({ success: true, message: \"Cache refreshed\" }),\n      } as Response);\n\n      const result = await refreshLicenseCache();\n\n      expect(fetchSpy).toHaveBeenCalledWith(\"/api/license/refresh\", {\n        method: \"POST\",\n      });\n\n      expect(result).toEqual({ success: true, message: \"Cache refreshed\" });\n    });\n  });\n\n  describe(\"uploadLicense (self-hosted only)\", () => {\n    test(\"calls license upload endpoint with FormData\", async () => {\n      // Mock POST /api/license/upload\n      fetchSpy.mockResolvedValueOnce({\n        ok: true,\n        json: async () => ({\n          success: true,\n          message:\n            \"License uploaded successfully. 10 seats, expires 2025-12-31\",\n        }),\n      } as Response);\n\n      const licenseKey = \"test-license-key-12345\";\n      const result = await uploadLicense(licenseKey);\n\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/license/upload\",\n        expect.objectContaining({\n          method: \"POST\",\n        })\n      );\n\n      // Verify FormData was used\n      const callArgs = fetchSpy.mock.calls[0];\n      expect(callArgs[1].body).toBeInstanceOf(FormData);\n\n      expect(result).toEqual({\n        success: true,\n        message: \"License uploaded successfully. 10 seats, expires 2025-12-31\",\n      });\n    });\n\n    test(\"throws error on failed upload\", async () => {\n      // Mock POST /api/license/upload (error)\n      fetchSpy.mockResolvedValueOnce({\n        ok: false,\n        json: async () => ({ detail: \"Invalid license signature\" }),\n      } as Response);\n\n      await expect(uploadLicense(\"invalid-key\")).rejects.toThrow(\n        \"Invalid license signature\"\n      );\n    });\n  });\n});\n\ndescribe(\"billing actions (cloud mode)\", () => {\n  let fetchSpy: jest.SpyInstance;\n\n  beforeEach(() => {\n    fetchSpy = jest.spyOn(global, \"fetch\");\n    // Override to cloud mode\n    jest.resetModules();\n    jest.doMock(\"@/lib/constants\", () => ({\n      NEXT_PUBLIC_CLOUD_ENABLED: true,\n    }));\n  });\n\n  afterEach(() => {\n    fetchSpy.mockRestore();\n    jest.resetModules();\n  });\n\n  test(\"uses cloud endpoint for checkout session\", async () => {\n    // Re-import with cloud mode\n    const { createCheckoutSession: cloudCheckout } = await import(\"./svc\");\n\n    // Mock POST /api/tenants/create-checkout-session\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({ url: \"https://checkout.stripe.com/cloud123\" }),\n    } as Response);\n\n    await cloudCheckout();\n\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/tenants/create-checkout-session\",\n      expect.any(Object)\n    );\n  });\n\n  test(\"uploadLicense throws error in cloud mode\", async () => {\n    // Re-import with cloud mode\n    const { uploadLicense: cloudUploadLicense } = await import(\"./svc\");\n\n    await expect(cloudUploadLicense(\"test-key\")).rejects.toThrow(\n      \"only available for self-hosted\"\n    );\n  });\n});\n"
  },
  {
    "path": "web/src/lib/billing/svc.ts",
    "content": "/**\n * Billing action functions for mutations.\n *\n * These are async functions for one-off actions like creating\n * checkout sessions or portal sessions. They don't need SWR caching.\n *\n * Endpoints:\n * - Cloud: /api/tenants/* (legacy, will migrate to /api/admin/billing/*)\n * - Self-hosted: /api/admin/billing/* (unified billing API)\n *\n * License actions (self-hosted only):\n * - /api/license/fetch - Fetch license from control plane after checkout\n * - /api/license/refresh - Refresh cached license data\n * - /api/license/upload - Upload license key manually (air-gapped deployments)\n */\n\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport {\n  CreateCheckoutSessionRequest,\n  CreateCheckoutSessionResponse,\n  CreateCustomerPortalSessionRequest,\n  CreateCustomerPortalSessionResponse,\n  SeatUpdateRequest,\n  SeatUpdateResponse,\n} from \"@/lib/billing/interfaces\";\n\nfunction getBillingBaseUrl(): string {\n  return NEXT_PUBLIC_CLOUD_ENABLED ? \"/api/tenants\" : \"/api/admin/billing\";\n}\n\nasync function billingPost<T>(endpoint: string, body?: unknown): Promise<T> {\n  const response = await fetch(`${getBillingBaseUrl()}${endpoint}`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(body ?? {}),\n  });\n\n  if (!response.ok) {\n    const error = await response.json().catch(() => ({}));\n    throw new Error(error.detail || \"Billing request failed\");\n  }\n\n  return response.json();\n}\n\nexport const createCheckoutSession = (request?: CreateCheckoutSessionRequest) =>\n  billingPost<CreateCheckoutSessionResponse>(\n    \"/create-checkout-session\",\n    request\n  );\n\nexport const createCustomerPortalSession = (\n  request?: CreateCustomerPortalSessionRequest\n) =>\n  billingPost<CreateCustomerPortalSessionResponse>(\n    \"/create-customer-portal-session\",\n    request\n  );\n\nexport const updateSeatCount = (request: SeatUpdateRequest) =>\n  billingPost<SeatUpdateResponse>(\"/seats/update\", request);\n\n/**\n * Reset the Stripe connection circuit breaker (self-hosted only).\n * Called when user clicks \"Connect to Stripe\" to retry after a previous failure.\n */\nexport const resetStripeConnection = () =>\n  billingPost<{ success: boolean; message: string }>(\"/reset-connection\");\n\n// Self-hosted only actions\nasync function selfHostedPost<T>(endpoint: string): Promise<T> {\n  if (NEXT_PUBLIC_CLOUD_ENABLED) {\n    throw new Error(`${endpoint} is only available for self-hosted`);\n  }\n\n  const response = await fetch(`/api/license${endpoint}`, {\n    method: \"POST\",\n  });\n\n  if (!response.ok) {\n    const error = await response.json().catch(() => ({}));\n    throw new Error(error.detail || \"License request failed\");\n  }\n\n  return response.json();\n}\n\n/**\n * Claim a license from the control plane (self-hosted only).\n *\n * Two modes:\n * - With sessionId: After Stripe checkout, exchange session_id for license\n * - Without sessionId: Re-claim using existing license for auth\n */\nexport const claimLicense = (sessionId?: string) =>\n  selfHostedPost<{ success: boolean; license?: unknown }>(\n    sessionId ? `/claim?session_id=${encodeURIComponent(sessionId)}` : \"/claim\"\n  );\n\n/**\n * Refresh the cached license data (self-hosted only).\n * Forces a re-read of the license and updates the cache.\n */\nexport const refreshLicenseCache = () =>\n  selfHostedPost<{ success: boolean; message?: string }>(\"/refresh\");\n\n/**\n * Upload a license key string (self-hosted only).\n * Used for air-gapped deployments where users paste license keys manually.\n */\nexport async function uploadLicense(\n  licenseKey: string\n): Promise<{ success: boolean; message?: string }> {\n  if (NEXT_PUBLIC_CLOUD_ENABLED) {\n    throw new Error(\"License upload is only available for self-hosted\");\n  }\n\n  // Create a file from the license key string\n  const blob = new Blob([licenseKey], { type: \"text/plain\" });\n  const formData = new FormData();\n  formData.append(\"license_file\", blob, \"license.txt\");\n\n  const response = await fetch(\"/api/license/upload\", {\n    method: \"POST\",\n    body: formData,\n  });\n\n  if (!response.ok) {\n    const error = await response.json().catch(() => ({}));\n    throw new Error(error.detail || \"License upload failed\");\n  }\n\n  return response.json();\n}\n"
  },
  {
    "path": "web/src/lib/browserUtilities.tsx",
    "content": "\"use client\";\n\nimport { MacIcon, WindowsIcon } from \"@/components/icons/icons\";\nimport { useState, useEffect } from \"react\";\n\nexport enum OperatingSystem {\n  Windows = \"Windows\",\n  Mac = \"Mac\",\n  Other = \"Other\",\n}\n\nexport const useOperatingSystem = (): OperatingSystem => {\n  const [os, setOS] = useState<OperatingSystem>(OperatingSystem.Other);\n\n  useEffect(() => {\n    const userAgent = window.navigator.userAgent.toLowerCase();\n    if (userAgent.includes(\"win\")) {\n      setOS(OperatingSystem.Windows);\n    } else if (userAgent.includes(\"mac\")) {\n      setOS(OperatingSystem.Mac);\n    }\n  }, []);\n\n  return os;\n};\n\n// Use this to handle the sidebar shortcut for the chat page\n// The shortcut is Ctrl+E on Windows/Linux and Cmd+E on Mac\n// This hook handles the keyboard event and toggles the sidebar\nexport const useSidebarShortcut = (router: any, toggleSidebar: () => void) => {\n  const os = useOperatingSystem();\n\n  useEffect(() => {\n    const handleKeyDown = (event: KeyboardEvent) => {\n      const isMac = os === OperatingSystem.Mac;\n      const modifierKey = isMac ? event.metaKey : event.ctrlKey;\n\n      if (modifierKey && event.key.toLowerCase() === \"e\") {\n        event.preventDefault();\n        toggleSidebar();\n      }\n    };\n\n    window.addEventListener(\"keydown\", handleKeyDown);\n    return () => {\n      window.removeEventListener(\"keydown\", handleKeyDown);\n    };\n  }, [router, toggleSidebar, os]);\n};\n\nconst KeyboardSymbol = () => {\n  const os = useOperatingSystem();\n\n  if (os === OperatingSystem.Windows) {\n    return <WindowsIcon size={12} />;\n  } else {\n    return <MacIcon size={12} />;\n  }\n};\n\nexport default KeyboardSymbol;\n"
  },
  {
    "path": "web/src/lib/build/client.ts",
    "content": "export interface CreateSessionRequest {\n  task: string;\n  available_sources?: string[];\n}\n\nexport interface CreateSessionResponse {\n  session_id: string;\n}\n\nexport interface ArtifactInfo {\n  artifact_type: \"webapp\" | \"file\" | \"markdown\" | \"image\";\n  path: string;\n  filename: string;\n  mime_type?: string;\n}\n\n// =============================================================================\n// ACP Event Types (from Agent Client Protocol)\n// =============================================================================\n\n/** Text or image content from the agent */\nexport interface AgentMessageChunkEvent {\n  sessionUpdate: \"agent_message_chunk\";\n  content: Array<{\n    type: \"text\" | \"image\";\n    text?: string;\n    image?: string;\n    mimeType?: string;\n  }>;\n}\n\n/** Agent's internal reasoning/thinking */\nexport interface AgentThoughtChunkEvent {\n  sessionUpdate: \"agent_thought_chunk\";\n  thought: string;\n}\n\n/** Tool invocation started */\nexport interface ToolCallStartEvent {\n  sessionUpdate: \"tool_call\";\n  toolCallId: string;\n  toolName: string;\n  toolInput?: Record<string, unknown>;\n}\n\n/** Tool execution progress/result */\nexport interface ToolCallProgressEvent {\n  sessionUpdate: \"tool_call_update\";\n  toolCallId: string;\n  content?: Array<{\n    type: \"text\" | \"image\";\n    text?: string;\n    image?: string;\n    mimeType?: string;\n  }>;\n  error?: string;\n  isComplete?: boolean;\n}\n\n/** Agent's execution plan */\nexport interface AgentPlanUpdateEvent {\n  sessionUpdate: \"plan\";\n  plan: Array<{\n    id: string;\n    description: string;\n    status: \"pending\" | \"in_progress\" | \"completed\" | \"failed\";\n  }>;\n}\n\n/** Agent mode change */\nexport interface CurrentModeUpdateEvent {\n  sessionUpdate: \"current_mode_update\";\n  mode: string;\n}\n\n/** Agent finished processing prompt */\nexport interface PromptResponseEvent {\n  stopReason?: string;\n  usage?: {\n    inputTokens?: number;\n    outputTokens?: number;\n  };\n}\n\n/** ACP error event */\nexport interface ACPErrorEvent {\n  code: number;\n  message: string;\n}\n\n/** File write event (custom Onyx extension) */\nexport interface FileWriteEvent {\n  path: string;\n  size_bytes?: number;\n  operation?: \"create\" | \"update\" | \"delete\";\n}\n\n// =============================================================================\n// Legacy Event Types (kept for backwards compatibility)\n// =============================================================================\n\nexport interface OutputEvent {\n  stream: \"stdout\" | \"stderr\";\n  data: string;\n}\n\nexport interface StatusEvent {\n  status: \"running\" | \"completed\" | \"failed\";\n  message?: string;\n}\n\nexport interface ArtifactEvent {\n  artifact_type: string;\n  path: string;\n  filename: string;\n}\n\nexport interface ErrorEvent {\n  message: string;\n}\n\nexport interface FileSystemEntry {\n  name: string;\n  path: string;\n  is_directory: boolean;\n  size: number | null;\n  mime_type: string | null;\n}\n\nexport interface DirectoryListing {\n  path: string;\n  entries: FileSystemEntry[];\n}\n\n// =============================================================================\n// Union Types\n// =============================================================================\n\n/** All possible ACP events from the agent */\nexport type ACPEvent =\n  | { type: \"agent_message_chunk\"; data: AgentMessageChunkEvent }\n  | { type: \"agent_thought_chunk\"; data: AgentThoughtChunkEvent }\n  | { type: \"tool_call\"; data: ToolCallStartEvent }\n  | { type: \"tool_call_update\"; data: ToolCallProgressEvent }\n  | { type: \"plan\"; data: AgentPlanUpdateEvent }\n  | { type: \"current_mode_update\"; data: CurrentModeUpdateEvent }\n  | { type: \"prompt_response\"; data: PromptResponseEvent }\n  | { type: \"error\"; data: ACPErrorEvent }\n  | { type: \"status\"; data: StatusEvent }\n  | { type: \"artifact\"; data: ArtifactEvent }\n  | { type: \"file_write\"; data: FileWriteEvent };\n\n/** Legacy BuildEvent type - alias for ACPEvent */\nexport type BuildEvent = ACPEvent;\n\nexport async function createSession(\n  request: CreateSessionRequest\n): Promise<CreateSessionResponse> {\n  const response = await fetch(\"/api/build/sessions\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n  });\n  if (!response.ok) {\n    throw new Error(`Failed to create session: ${response.statusText}`);\n  }\n  return response.json();\n}\n\nexport async function deleteSession(sessionId: string): Promise<void> {\n  const response = await fetch(`/api/build/sessions/${sessionId}`, {\n    method: \"DELETE\",\n  });\n  if (!response.ok) {\n    throw new Error(`Failed to delete session: ${response.statusText}`);\n  }\n}\n\nexport async function executeTask(\n  sessionId: string,\n  task: string,\n  context: string | undefined,\n  onEvent: (event: BuildEvent) => void,\n  onError: (error: Error) => void,\n  onComplete: () => void\n): Promise<void> {\n  try {\n    const response = await fetch(`/api/build/sessions/${sessionId}/execute`, {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n        Accept: \"text/event-stream\",\n      },\n      body: JSON.stringify({ task, context }),\n    });\n\n    if (!response.ok) {\n      throw new Error(`HTTP ${response.status}: ${response.statusText}`);\n    }\n\n    const reader = response.body?.getReader();\n    if (!reader) {\n      throw new Error(\"No response body\");\n    }\n\n    const decoder = new TextDecoder();\n    let buffer = \"\";\n\n    while (true) {\n      const { done, value } = await reader.read();\n      if (done) break;\n\n      buffer += decoder.decode(value, { stream: true });\n      const lines = buffer.split(\"\\n\");\n      buffer = lines.pop() || \"\";\n\n      let currentEventType = \"output\";\n      for (const line of lines) {\n        if (line.startsWith(\"event:\")) {\n          currentEventType = line.slice(6).trim();\n        } else if (line.startsWith(\"data:\")) {\n          const data = line.slice(5).trim();\n          if (data) {\n            try {\n              const parsed = JSON.parse(data);\n              onEvent({ type: currentEventType, data: parsed } as BuildEvent);\n            } catch {\n              // Skip malformed JSON\n            }\n          }\n        }\n      }\n    }\n\n    onComplete();\n  } catch (error) {\n    onError(error instanceof Error ? error : new Error(String(error)));\n  }\n}\n\n/**\n * Send a message to the build session using the new messages API endpoint.\n * This endpoint streams SSE events with message-prefixed packet types.\n */\nexport async function sendMessage(\n  sessionId: string,\n  message: string,\n  onEvent: (event: BuildEvent) => void,\n  onError: (error: Error) => void,\n  onComplete: () => void\n): Promise<void> {\n  try {\n    const response = await fetch(`/api/build/sessions/${sessionId}/messages`, {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n        Accept: \"text/event-stream\",\n      },\n      body: JSON.stringify({ content: message }),\n    });\n\n    if (!response.ok) {\n      const errorText = await response.text();\n      throw new Error(\n        `HTTP ${response.status}: ${errorText || response.statusText}`\n      );\n    }\n\n    const reader = response.body?.getReader();\n    if (!reader) {\n      throw new Error(\"No response body\");\n    }\n\n    const decoder = new TextDecoder();\n    let buffer = \"\";\n\n    while (true) {\n      const { done, value } = await reader.read();\n      if (done) break;\n\n      buffer += decoder.decode(value, { stream: true });\n      const lines = buffer.split(\"\\n\");\n      buffer = lines.pop() || \"\";\n\n      for (const line of lines) {\n        if (line.startsWith(\"event:\")) {\n          // Skip event type line (all events are \"message\")\n          continue;\n        } else if (line.startsWith(\"data:\")) {\n          const data = line.slice(5).trim();\n          if (data) {\n            try {\n              const parsed = JSON.parse(data);\n              // Map frontend packet types to BuildEvent types\n              const eventType = mapMessagePacketToEventType(parsed.type);\n              if (eventType) {\n                onEvent({ type: eventType, data: parsed } as BuildEvent);\n              }\n            } catch (err) {\n              console.error(\"Failed to parse SSE data:\", err);\n            }\n          }\n        }\n      }\n    }\n\n    onComplete();\n  } catch (error) {\n    onError(error instanceof Error ? error : new Error(String(error)));\n  }\n}\n\n/**\n * Map message API packet types to BuildEvent types.\n * Uses direct ACP event names from the backend, plus custom Onyx packet types.\n */\nfunction mapMessagePacketToEventType(packetType: string): string | null {\n  const mapping: Record<string, string> = {\n    // Direct ACP event types\n    agent_message_chunk: \"agent_message_chunk\",\n    agent_thought_chunk: \"agent_thought_chunk\",\n    tool_call_start: \"tool_call\",\n    tool_call_progress: \"tool_call_update\",\n    agent_plan_update: \"plan\",\n    current_mode_update: \"current_mode_update\",\n    prompt_response: \"prompt_response\",\n    error: \"error\",\n    // Custom Onyx packet types (extensions to ACP)\n    artifact_created: \"artifact\",\n    file_write: \"file_write\",\n  };\n  return mapping[packetType] || null;\n}\n\nexport async function listArtifacts(\n  sessionId: string\n): Promise<ArtifactInfo[]> {\n  const response = await fetch(`/api/build/sessions/${sessionId}/artifacts`);\n  if (!response.ok) {\n    throw new Error(`Failed to list artifacts: ${response.statusText}`);\n  }\n  return response.json();\n}\n\nexport function getArtifactUrl(sessionId: string, path: string): string {\n  return `/api/build/sessions/${sessionId}/artifacts/${path}`;\n}\n\nexport async function listDirectory(\n  sessionId: string,\n  path: string = \"\"\n): Promise<DirectoryListing> {\n  const url = path\n    ? `/api/build/sessions/${sessionId}/files?path=${encodeURIComponent(path)}`\n    : `/api/build/sessions/${sessionId}/files`;\n  const response = await fetch(url);\n  if (!response.ok) {\n    throw new Error(`Failed to list directory: ${response.statusText}`);\n  }\n  return response.json();\n}\n\nexport function getWebappUrl(sessionId: string, path: string = \"\"): string {\n  return `/api/build/sessions/${sessionId}/webapp${path ? `/${path}` : \"\"}`;\n}\n"
  },
  {
    "path": "web/src/lib/ccPair.ts",
    "content": "import { ConnectorCredentialPairStatus } from \"@/app/admin/connector/[ccPairId]/types\";\nimport { toast } from \"@/hooks/useToast\";\n\nexport async function setCCPairStatus(\n  ccPairId: number,\n  ccPairStatus: ConnectorCredentialPairStatus,\n  onUpdate?: () => void\n) {\n  try {\n    const response = await fetch(\n      `/api/manage/admin/cc-pair/${ccPairId}/status`,\n      {\n        method: \"PUT\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ status: ccPairStatus }),\n      }\n    );\n\n    if (!response.ok) {\n      const { detail } = await response.json();\n      toast.error(`Failed to update connector status - ${detail}`);\n      return;\n    }\n\n    toast.success(\n      ccPairStatus === ConnectorCredentialPairStatus.ACTIVE\n        ? \"Enabled connector!\"\n        : \"Paused connector!\"\n    );\n\n    onUpdate && onUpdate();\n  } catch (error) {\n    console.error(\"Error updating CC pair status:\", error);\n    toast.error(\"Failed to update connector status\");\n  }\n}\n\nexport const getCCPairStatusMessage = (\n  isDisabled: boolean,\n  isIndexing: boolean,\n  ccPairStatus: ConnectorCredentialPairStatus\n) => {\n  if (ccPairStatus === ConnectorCredentialPairStatus.INVALID) {\n    return \"Connector is in an invalid state. Please update the credentials or configuration before re-indexing.\";\n  }\n  if (ccPairStatus === ConnectorCredentialPairStatus.DELETING) {\n    return \"Cannot index while connector is deleting\";\n  }\n  if (isIndexing) {\n    return \"Indexing is already in progress\";\n  }\n  if (isDisabled) {\n    return \"Connector must be re-enabled before indexing\";\n  }\n  return undefined;\n};\n"
  },
  {
    "path": "web/src/lib/chat/fetchAgentData.ts",
    "content": "import { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { filterAgents } from \"@/lib/agents\";\nimport { fetchAgentsSS } from \"@/lib/agentsSS\";\n\nexport async function fetchAgentData(): Promise<MinimalPersonaSnapshot[]> {\n  try {\n    // Fetch core assistants data\n    const [assistants, agentsFetchError] = await fetchAgentsSS();\n    if (agentsFetchError) {\n      // This is not a critical error and occurs when the user is not logged in\n      console.warn(`Failed to fetch agents - ${agentsFetchError}`);\n      return [];\n    }\n\n    return filterAgents(assistants);\n  } catch (error) {\n    console.error(\"Unexpected error in fetchAgentData:\", error);\n    return [];\n  }\n}\n"
  },
  {
    "path": "web/src/lib/chat/fetchBackendChatSessionSS.ts",
    "content": "import { BackendChatSession } from \"@/app/app/interfaces\";\nimport { fetchSS } from \"@/lib/utilsSS\";\n\nexport async function fetchBackendChatSessionSS(\n  chatId: string\n): Promise<BackendChatSession | null> {\n  const response = await fetchSS(`/chat/get-chat-session/${chatId}`);\n  if (!response.ok) return null;\n  return (await response.json()) as BackendChatSession;\n}\n"
  },
  {
    "path": "web/src/lib/chat/greetingMessages.ts",
    "content": "export const GREETING_MESSAGES = [\"How can I help?\", \"Let's get started.\"];\n\nexport function getRandomGreeting(): string {\n  return GREETING_MESSAGES[\n    Math.floor(Math.random() * GREETING_MESSAGES.length)\n  ] as string;\n}\n"
  },
  {
    "path": "web/src/lib/chat/svc.ts",
    "content": "const CHAT_FILE_PREFIX = \"/api/chat/file\";\n\n/**\n * Fetch a chat file by its ID, returning the raw Response.\n *\n * The caller is responsible for consuming the body (e.g. `.blob()`,\n * `.text()`) since different consumers need different formats.\n */\nexport async function fetchChatFile(fileId: string): Promise<Response> {\n  const response = await fetch(\n    `${CHAT_FILE_PREFIX}/${encodeURIComponent(fileId)}`,\n    {\n      method: \"GET\",\n      cache: \"force-cache\",\n    }\n  );\n\n  if (!response.ok) {\n    throw new Error(\"Failed to load document.\");\n  }\n\n  return response;\n}\n"
  },
  {
    "path": "web/src/lib/clipboard.test.ts",
    "content": "import { getPastedFilesIfNoText } from \"./clipboard\";\n\ntype MockClipboardData = Parameters<typeof getPastedFilesIfNoText>[0];\n\nfunction makeClipboardData({\n  textPlain = \"\",\n  text = \"\",\n  files = [],\n}: {\n  textPlain?: string;\n  text?: string;\n  files?: File[];\n}): MockClipboardData {\n  return {\n    items: files.map((file) => ({\n      kind: \"file\",\n      getAsFile: () => file,\n    })),\n    getData: (format: string) => {\n      if (format === \"text/plain\") {\n        return textPlain;\n      }\n\n      if (format === \"text\") {\n        return text;\n      }\n\n      return \"\";\n    },\n  };\n}\n\ndescribe(\"getPastedFilesIfNoText\", () => {\n  it(\"prefers plain text over pasted files when both are present\", () => {\n    const imageFile = new File([\"slide preview\"], \"slide.png\", {\n      type: \"image/png\",\n    });\n\n    expect(\n      getPastedFilesIfNoText(\n        makeClipboardData({\n          textPlain: \"Welcome to PowerPoint for Mac\",\n          files: [imageFile],\n        })\n      )\n    ).toEqual([]);\n  });\n\n  it(\"falls back to text data when text/plain is empty\", () => {\n    const imageFile = new File([\"slide preview\"], \"slide.png\", {\n      type: \"image/png\",\n    });\n\n    expect(\n      getPastedFilesIfNoText(\n        makeClipboardData({\n          text: \"Welcome to PowerPoint for Mac\",\n          files: [imageFile],\n        })\n      )\n    ).toEqual([]);\n  });\n\n  it(\"still returns files for image-only pastes\", () => {\n    const imageFile = new File([\"slide preview\"], \"slide.png\", {\n      type: \"image/png\",\n    });\n\n    expect(\n      getPastedFilesIfNoText(makeClipboardData({ files: [imageFile] }))\n    ).toEqual([imageFile]);\n  });\n\n  it(\"ignores whitespace-only text and keeps file pastes working\", () => {\n    const imageFile = new File([\"slide preview\"], \"slide.png\", {\n      type: \"image/png\",\n    });\n\n    expect(\n      getPastedFilesIfNoText(\n        makeClipboardData({\n          textPlain: \"   \",\n          text: \"\\n\",\n          files: [imageFile],\n        })\n      )\n    ).toEqual([imageFile]);\n  });\n});\n"
  },
  {
    "path": "web/src/lib/clipboard.ts",
    "content": "type ClipboardFileItem = {\n  kind: string;\n  getAsFile: () => File | null;\n};\n\ntype ClipboardDataLike = {\n  items?: ArrayLike<ClipboardFileItem> | null;\n  getData: (format: string) => string;\n};\n\nfunction getClipboardText(\n  clipboardData: ClipboardDataLike,\n  format: \"text/plain\" | \"text\"\n): string {\n  try {\n    return clipboardData.getData(format);\n  } catch {\n    return \"\";\n  }\n}\n\nexport function getPastedFilesIfNoText(\n  clipboardData?: ClipboardDataLike | null\n): File[] {\n  if (!clipboardData) {\n    return [];\n  }\n\n  const plainText = getClipboardText(clipboardData, \"text/plain\").trim();\n  const fallbackText = getClipboardText(clipboardData, \"text\").trim();\n\n  // Apps like PowerPoint on macOS can place both rendered image data and the\n  // original text on the clipboard. Prefer letting the textarea consume text.\n  if (plainText || fallbackText || !clipboardData.items) {\n    return [];\n  }\n\n  const pastedFiles: File[] = [];\n  for (let i = 0; i < clipboardData.items.length; i++) {\n    const item = clipboardData.items[i];\n    if (item?.kind !== \"file\") {\n      continue;\n    }\n\n    const file = item.getAsFile();\n    if (file) {\n      pastedFiles.push(file);\n    }\n  }\n\n  return pastedFiles;\n}\n"
  },
  {
    "path": "web/src/lib/connector.ts",
    "content": "import { ValidSources } from \"./types\";\nimport {\n  Connector,\n  ConnectorBase,\n  ConnectorSnapshot,\n} from \"./connectors/connectors\";\nasync function handleResponse(\n  response: Response\n): Promise<[string | null, any]> {\n  const responseJson = await response.json();\n  if (response.ok) {\n    return [null, responseJson];\n  }\n  return [responseJson.detail, null];\n}\n\nexport async function fetchConnectors(\n  credential_id: number\n): Promise<ConnectorSnapshot[]> {\n  const url = `/api/manage/admin/connector?credential=${credential_id}`;\n  const response = await fetch(url);\n  if (!response.ok) {\n    throw new Error(`Failed to fetch connectors: ${await response.text()}`);\n  }\n  const connectors: ConnectorSnapshot[] = await response.json();\n  return connectors;\n}\n\nexport async function createConnector<T>(\n  connector: ConnectorBase<T>\n): Promise<[string | null, Connector<T> | null]> {\n  const response = await fetch(`/api/manage/admin/connector`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(connector),\n  });\n  return handleResponse(response);\n}\n\nexport async function updateConnectorCredentialPairName(\n  ccPairId: number,\n  newName: string\n): Promise<Response> {\n  return fetch(\n    `/api/manage/admin/cc-pair/${ccPairId}/name?new_name=${encodeURIComponent(\n      newName\n    )}`,\n    {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n    }\n  );\n}\n\nexport async function updateConnectorCredentialPairProperty(\n  ccPairId: number,\n  name: string,\n  value: string\n): Promise<Response> {\n  return fetch(`/api/manage/admin/cc-pair/${ccPairId}/property`, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      name: name,\n      value: value,\n    }),\n  });\n}\n\nexport async function updateConnector<T>(\n  connector: Connector<T>\n): Promise<Connector<T>> {\n  const response = await fetch(`/api/manage/admin/connector/${connector.id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(connector),\n  });\n  return await response.json();\n}\n\nexport async function deleteConnector(\n  connectorId: number\n): Promise<string | null> {\n  const response = await fetch(`/api/manage/admin/connector/${connectorId}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n  if (response.ok) {\n    return null;\n  }\n  return (await response.json()).detail;\n}\n\nexport async function runConnector(\n  connectorId: number,\n  credentialIds: number[],\n  fromBeginning: boolean = false\n): Promise<string | null> {\n  const response = await fetch(\"/api/manage/admin/connector/run-once\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      connector_id: connectorId,\n      credentialIds,\n      from_beginning: fromBeginning,\n    }),\n  });\n  if (!response.ok) {\n    return (await response.json()).detail;\n  }\n  return null;\n}\n\nexport async function deleteConnectorIfExistsAndIsUnlinked({\n  source,\n  name,\n}: {\n  source: ValidSources;\n  name?: string;\n}): Promise<string | null> {\n  const connectorsResponse = await fetch(\"/api/manage/connector\");\n  if (connectorsResponse.ok) {\n    const connectors = (await connectorsResponse.json()) as Connector<any>[];\n    const matchingConnectors = connectors.filter(\n      (connector) =>\n        connector.source === source && (!name || connector.name === name)\n    );\n    if (\n      matchingConnectors.length > 0 &&\n      matchingConnectors[0] &&\n      matchingConnectors[0].credential_ids.length === 0\n    ) {\n      const errorMsg = await deleteConnector(matchingConnectors[0].id);\n      if (errorMsg) {\n        return errorMsg;\n      }\n    }\n  }\n  return null;\n}\n"
  },
  {
    "path": "web/src/lib/connectors/AutoSyncOptionFields.tsx",
    "content": "import { JSX } from \"react\";\nimport { ValidAutoSyncSource } from \"@/lib/types\";\n\n// The first key is the connector type, and the second key is the field name\nexport const autoSyncConfigBySource: Record<\n  ValidAutoSyncSource,\n  Record<\n    string,\n    {\n      label: string;\n      subtext: JSX.Element;\n    }\n  >\n> = {\n  confluence: {},\n  jira: {},\n  google_drive: {},\n  gmail: {},\n  github: {},\n  slack: {},\n  salesforce: {},\n  sharepoint: {},\n  teams: {},\n};\n"
  },
  {
    "path": "web/src/lib/connectors/connectors.tsx",
    "content": "import * as Yup from \"yup\";\nimport { ConfigurableSources, ValidInputTypes, ValidSources } from \"../types\";\nimport { AccessTypeGroupSelectorFormType } from \"@/components/admin/connectors/AccessTypeGroupSelector\";\nimport { Credential } from \"@/lib/connectors/credentials\"; // Import Credential type\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\n\nexport function isLoadState(connector_name: string): boolean {\n  // TODO: centralize connector metadata like this somewhere instead of hardcoding it here\n  const loadStateConnectors = [\"web\", \"xenforo\", \"file\", \"airtable\"];\n  if (loadStateConnectors.includes(connector_name)) {\n    return true;\n  }\n\n  return false;\n}\n\nexport type InputType =\n  | \"list\"\n  | \"text\"\n  | \"select\"\n  | \"multiselect\"\n  | \"boolean\"\n  | \"number\"\n  | \"file\";\n\nexport type StringWithDescription = {\n  value: string;\n  name: string;\n  description?: string;\n};\n\nexport interface Option {\n  label: string | ((currentCredential: Credential<any> | null) => string);\n  name: string;\n  description?:\n    | string\n    | ((currentCredential: Credential<any> | null) => string);\n  query?: string;\n  optional?: boolean;\n  hidden?: boolean;\n  visibleCondition?: (\n    values: any,\n    currentCredential: Credential<any> | null\n  ) => boolean;\n  wrapInCollapsible?: boolean;\n  disabled?: boolean | ((currentCredential: Credential<any> | null) => boolean);\n}\n\nexport interface SelectOption extends Option {\n  type: \"select\";\n  options?: StringWithDescription[];\n  default?: string;\n}\n\nexport interface MultiSelectOption extends Option {\n  type: \"multiselect\";\n  options?: StringWithDescription[];\n  default?: string[];\n}\n\nexport interface ListOption extends Option {\n  type: \"list\";\n  default?: string[];\n  transform?: (values: string[]) => string[];\n}\n\nexport interface TextOption extends Option {\n  type: \"text\";\n  default?: string;\n  initial?: string | ((currentCredential: Credential<any> | null) => string);\n  isTextArea?: boolean;\n}\n\nexport interface NumberOption extends Option {\n  type: \"number\";\n  default?: number;\n}\n\nexport interface BooleanOption extends Option {\n  type: \"checkbox\";\n  default?: boolean;\n}\n\nexport interface FileOption extends Option {\n  type: \"file\";\n  default?: string;\n}\n\nexport interface StringTabOption extends Option {\n  type: \"string_tab\";\n  default?: string;\n}\n\nexport interface TabOption extends Option {\n  type: \"tab\";\n  defaultTab?: string;\n  tabs: {\n    label: string;\n    value: string;\n    fields: (\n      | BooleanOption\n      | ListOption\n      | TextOption\n      | NumberOption\n      | SelectOption\n      | MultiSelectOption\n      | FileOption\n      | StringTabOption\n    )[];\n  }[];\n  default?: [];\n}\n\nexport interface ConnectionConfiguration {\n  description: string;\n  subtext?: string;\n  initialConnectorName?: string; // a key in the credential to prepopulate the connector name field\n  values: (\n    | BooleanOption\n    | ListOption\n    | TextOption\n    | NumberOption\n    | SelectOption\n    | MultiSelectOption\n    | FileOption\n    | TabOption\n  )[];\n  advanced_values: (\n    | BooleanOption\n    | ListOption\n    | TextOption\n    | NumberOption\n    | SelectOption\n    | MultiSelectOption\n    | FileOption\n    | TabOption\n  )[];\n  overrideDefaultFreq?: number;\n  advancedValuesVisibleCondition?: (\n    values: any,\n    currentCredential: Credential<any> | null\n  ) => boolean;\n}\n\nexport const connectorConfigs: Record<\n  ConfigurableSources,\n  ConnectionConfiguration\n> = {\n  web: {\n    description: \"Configure Web connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the website URL to scrape e.g. https://docs.onyx.app/:\",\n        label: \"Base URL\",\n        name: \"base_url\",\n        optional: false,\n      },\n      {\n        type: \"select\",\n        query: \"Select the web connector type:\",\n        label: \"Scrape Method\",\n        name: \"web_connector_type\",\n        options: [\n          { name: \"recursive\", value: \"recursive\" },\n          { name: \"single\", value: \"single\" },\n          { name: \"sitemap\", value: \"sitemap\" },\n        ],\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"checkbox\",\n        query: \"Scroll before scraping:\",\n        label: \"Scroll before scraping\",\n        description:\n          \"Enable if the website requires scrolling for the desired content to load\",\n        name: \"scroll_before_scraping\",\n        optional: true,\n      },\n    ],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  github: {\n    description: \"Configure GitHub connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the GitHub username or organization:\",\n        label: \"Repository Owner\",\n        name: \"repo_owner\",\n        optional: false,\n      },\n      {\n        type: \"tab\",\n        name: \"github_mode\",\n        label: \"What should we index from GitHub?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"repo\",\n            label: \"Specific Repository\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the repository name(s):\",\n                label: \"Repository Name(s)\",\n                name: \"repositories\",\n                optional: false,\n                description:\n                  \"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)\",\n              },\n            ],\n          },\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything\",\n                description:\n                  \"This connector will index all repositories the provided credentials have access to!\",\n              },\n            ],\n          },\n        ],\n      },\n      {\n        type: \"checkbox\",\n        query: \"Include pull requests?\",\n        label: \"Include pull requests?\",\n        description: \"Index pull requests from repositories\",\n        name: \"include_prs\",\n        optional: true,\n      },\n      {\n        type: \"checkbox\",\n        query: \"Include issues?\",\n        label: \"Include Issues?\",\n        name: \"include_issues\",\n        description: \"Index issues from repositories\",\n        optional: true,\n      },\n    ],\n    advanced_values: [],\n  },\n  testrail: {\n    description: \"Configure TestRail connector\",\n    values: [\n      {\n        type: \"text\",\n        label: \"Project IDs\",\n        name: \"project_ids\",\n        optional: true,\n        description:\n          \"Comma-separated list of TestRail project IDs to index (e.g., 1 or 1,2,3). Leave empty to index all projects.\",\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"number\",\n        label: \"Cases Page Size\",\n        name: \"cases_page_size\",\n        optional: true,\n        description:\n          \"Number of test cases to fetch per page from the TestRail API (default: 250)\",\n      },\n      {\n        type: \"number\",\n        label: \"Max Pages\",\n        name: \"max_pages\",\n        optional: true,\n        description:\n          \"Maximum number of pages to fetch to prevent infinite loops (default: 10000)\",\n      },\n      {\n        type: \"number\",\n        label: \"Skip Document Character Limit\",\n        name: \"skip_doc_absolute_chars\",\n        optional: true,\n        description:\n          \"Skip indexing test cases that exceed this character limit (default: 200000)\",\n      },\n    ],\n  },\n  gitlab: {\n    description: \"Configure GitLab connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the project owner:\",\n        label: \"Project Owner\",\n        name: \"project_owner\",\n        optional: false,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the project name:\",\n        label: \"Project Name\",\n        name: \"project_name\",\n        optional: false,\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"checkbox\",\n        query: \"Include merge requests?\",\n        label: \"Include MRs\",\n        name: \"include_mrs\",\n        description: \"Index merge requests from repositories\",\n        default: true,\n      },\n      {\n        type: \"checkbox\",\n        query: \"Include issues?\",\n        label: \"Include Issues\",\n        name: \"include_issues\",\n        description: \"Index issues from repositories\",\n        default: true,\n      },\n    ],\n  },\n  bitbucket: {\n    description: \"Configure Bitbucket connector\",\n    subtext:\n      \"Configure Bitbucket connector (Cloud only). You can index a workspace, specific projects or repositories.\",\n    values: [\n      {\n        type: \"text\",\n        label: \"Workspace\",\n        name: \"workspace\",\n        optional: false,\n        description: `The Bitbucket workspace to index (e.g., \"atlassian\" from https://bitbucket.org/atlassian/workspace ).`,\n      },\n      {\n        type: \"tab\",\n        name: \"bitbucket_mode\",\n        label: \"What should be indexed from Bitbucket?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"repo\",\n            label: \"Specific Repositories\",\n            fields: [\n              {\n                type: \"text\",\n                label: \"Repository Slugs\",\n                name: \"repositories\",\n                optional: false,\n                description:\n                  \"For multiple repositories, enter comma-separated slugs (e.g., repo1,repo2,repo3)\",\n              },\n            ],\n          },\n          {\n            value: \"project\",\n            label: \"Project(s)\",\n            fields: [\n              {\n                type: \"text\",\n                label: \"Project Key(s)\",\n                name: \"projects\",\n                optional: false,\n                description:\n                  \"One or more Bitbucket Project Keys (comma-separated) to index all repositories in those projects (e.g., PROJ1,PROJ2)\",\n              },\n            ],\n          },\n          {\n            value: \"workspace\",\n            label: \"Workspace\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Workspace\",\n                name: \"workspace_tab\",\n                description:\n                  \"This connector will index all repositories in the workspace.\",\n              },\n            ],\n          },\n        ],\n      },\n    ],\n    advanced_values: [],\n  },\n  gitbook: {\n    description: \"Configure GitBook connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the space ID:\",\n        label: \"Space ID\",\n        name: \"space_id\",\n        optional: false,\n        description:\n          \"The ID of the GitBook space to index. This can be found in the URL \" +\n          \"of a page in the space. For example, if your URL looks like \" +\n          \"`https://app.gitbook.com/o/ccLx08XZ5wZ54LwdP9QU/s/8JkzVx8QCIGRrmxhGHU8/`, \" +\n          \"then your space ID is `8JkzVx8QCIGRrmxhGHU8`.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  google_drive: {\n    description: \"Configure Google Drive connector\",\n    values: [\n      {\n        type: \"tab\",\n        name: \"indexing_scope\",\n        label: \"How should we index your Google Drive?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"general\",\n            label: \"General\",\n            fields: [\n              {\n                type: \"checkbox\",\n                label: \"Include shared drives?\",\n                description: (currentCredential) => {\n                  return currentCredential?.credential_json?.google_tokens\n                    ? \"This will allow Onyx to index everything in the shared drives you have access to.\"\n                    : \"This will allow Onyx to index everything in your Organization's shared drives.\";\n                },\n                name: \"include_shared_drives\",\n                default: false,\n              },\n              {\n                type: \"checkbox\",\n                label: (currentCredential) => {\n                  return currentCredential?.credential_json?.google_tokens\n                    ? \"Include My Drive?\"\n                    : \"Include Everyone's My Drive?\";\n                },\n                description: (currentCredential) => {\n                  return currentCredential?.credential_json?.google_tokens\n                    ? \"This will allow Onyx to index everything in your My Drive.\"\n                    : \"This will allow Onyx to index everything in everyone's My Drives.\";\n                },\n                name: \"include_my_drives\",\n                default: false,\n              },\n              {\n                type: \"checkbox\",\n                description:\n                  \"This will allow Onyx to index all files shared with you.\",\n                label: \"Include All Files Shared With You?\",\n                name: \"include_files_shared_with_me\",\n                visibleCondition: (values, currentCredential) =>\n                  currentCredential?.credential_json?.google_tokens,\n                default: false,\n              },\n            ],\n          },\n          {\n            value: \"specific\",\n            label: \"Specific\",\n            fields: [\n              {\n                type: \"text\",\n                description: (currentCredential) => {\n                  return currentCredential?.credential_json?.google_tokens\n                    ? \"Enter a comma separated list of the URLs for the shared drive you would like to index. You must have access to these shared drives.\"\n                    : \"Enter a comma separated list of the URLs for the shared drive you would like to index.\";\n                },\n                label: \"Shared Drive URLs\",\n                name: \"shared_drive_urls\",\n                default: \"\",\n                isTextArea: true,\n              },\n              {\n                type: \"text\",\n                description:\n                  \"Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed.\",\n                label: \"Folder URLs\",\n                name: \"shared_folder_urls\",\n                default: \"\",\n                isTextArea: true,\n              },\n              {\n                type: \"text\",\n                description:\n                  \"Enter a comma separated list of the emails of the users whose MyDrive you want to index.\",\n                label: \"My Drive Emails\",\n                name: \"my_drive_emails\",\n                visibleCondition: (values, currentCredential) =>\n                  !currentCredential?.credential_json?.google_tokens,\n                default: \"\",\n                isTextArea: true,\n              },\n            ],\n          },\n        ],\n        defaultTab: \"general\",\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"text\",\n        description:\n          \"Enter a comma separated list of specific user emails to index. This will only index files accessible to these users.\",\n        label: \"Specific User Emails\",\n        name: \"specific_user_emails\",\n        optional: true,\n        default: \"\",\n        isTextArea: true,\n        visibleCondition: (values, currentCredential) =>\n          !currentCredential?.credential_json?.google_tokens,\n      },\n      {\n        type: \"checkbox\",\n        label: \"Hide domain link-only files?\",\n        description:\n          \"When enabled, Onyx skips files that are shared broadly (domain or public) but require the link to access.\",\n        name: \"exclude_domain_link_only\",\n        optional: true,\n        default: false,\n      },\n    ],\n  },\n  gmail: {\n    description: \"Configure Gmail connector\",\n    values: [],\n    advanced_values: [],\n  },\n  bookstack: {\n    description: \"Configure Bookstack connector\",\n    values: [],\n    advanced_values: [],\n  },\n  outline: {\n    description: \"Configure Outline connector\",\n    values: [],\n    advanced_values: [],\n  },\n  confluence: {\n    description: \"Configure Confluence connector\",\n    initialConnectorName: \"cloud_name\",\n    values: [\n      {\n        type: \"checkbox\",\n        query: \"Is this a Confluence Cloud instance?\",\n        label: \"Is Cloud\",\n        name: \"is_cloud\",\n        optional: false,\n        default: true,\n        description:\n          \"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center\",\n        disabled: (currentCredential) => {\n          if (currentCredential?.credential_json?.confluence_refresh_token) {\n            return true;\n          }\n          return false;\n        },\n      },\n      {\n        type: \"text\",\n        query: \"Enter the wiki base URL:\",\n        label: \"Wiki Base URL\",\n        name: \"wiki_base\",\n        optional: false,\n        initial: (currentCredential) => {\n          return currentCredential?.credential_json?.wiki_base ?? \"\";\n        },\n        disabled: (currentCredential) => {\n          if (currentCredential?.credential_json?.confluence_refresh_token) {\n            return true;\n          }\n          return false;\n        },\n        description:\n          \"The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)\",\n      },\n      {\n        type: \"checkbox\",\n        query: \"Using scoped token?\",\n        label: \"Using scoped token\",\n        name: \"scoped_token\",\n        optional: true,\n        default: false,\n      },\n      {\n        type: \"tab\",\n        name: \"indexing_scope\",\n        label: \"How Should We Index Your Confluence?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything\",\n                description:\n                  \"This connector will index all pages the provided credentials have access to!\",\n              },\n            ],\n          },\n          {\n            value: \"space\",\n            label: \"Space\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the space:\",\n                label: \"Space Key\",\n                name: \"space\",\n                default: \"\",\n                description: \"The Confluence space key to index (e.g. `KB`).\",\n              },\n            ],\n          },\n          {\n            value: \"page\",\n            label: \"Page\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the page ID:\",\n                label: \"Page ID\",\n                name: \"page_id\",\n                default: \"\",\n                description: \"Specific page ID to index (e.g. `131368`)\",\n              },\n              {\n                type: \"checkbox\",\n                query: \"Should index pages recursively?\",\n                label: \"Index Recursively\",\n                name: \"index_recursively\",\n                description:\n                  \"If this is set, we will index the page indicated by the Page ID as well as all of its children.\",\n                optional: false,\n                default: true,\n              },\n            ],\n          },\n          {\n            value: \"cql\",\n            label: \"CQL Query\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the CQL query (optional):\",\n                label: \"CQL Query\",\n                name: \"cql_query\",\n                default: \"\",\n                description:\n                  \"IMPORTANT: We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See Atlassian's [CQL documentation](https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/) for more details.\",\n              },\n            ],\n          },\n        ],\n        defaultTab: \"space\",\n      },\n    ],\n    advanced_values: [],\n  },\n  jira: {\n    description: \"Configure Jira connector\",\n    subtext: `Configure which Jira content to index. You can index everything or specify a particular project.`,\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the Jira base URL:\",\n        label: \"Jira Base URL\",\n        name: \"jira_base_url\",\n        optional: false,\n        description:\n          \"The base URL of your Jira instance (e.g., https://your-domain.atlassian.net)\",\n      },\n      {\n        type: \"checkbox\",\n        query: \"Using scoped token?\",\n        label: \"Using scoped token\",\n        name: \"scoped_token\",\n        optional: true,\n        default: false,\n      },\n      {\n        type: \"tab\",\n        name: \"indexing_scope\",\n        label: \"How Should We Index Your Jira?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything\",\n                description:\n                  \"This connector will index all issues the provided credentials have access to!\",\n              },\n            ],\n          },\n          {\n            value: \"project\",\n            label: \"Project\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the project key:\",\n                label: \"Project Key\",\n                name: \"project_key\",\n                description:\n                  \"The key of a specific project to index (e.g., 'PROJ').\",\n              },\n            ],\n          },\n          {\n            value: \"jql\",\n            label: \"JQL Query\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter the JQL query:\",\n                label: \"JQL Query\",\n                name: \"jql_query\",\n                description:\n                  \"A custom JQL query to filter Jira issues.\" +\n                  \"\\n\\nIMPORTANT: Do not include any time-based filters in the JQL query as that will conflict with the connector's logic. Additionally, do not include ORDER BY clauses.\" +\n                  \"\\n\\nSee Atlassian's [JQL documentation](https://support.atlassian.com/jira-software-cloud/docs/advanced-search-reference-jql-fields/) for more details on syntax.\",\n              },\n            ],\n          },\n        ],\n        defaultTab: \"everything\",\n      },\n      {\n        type: \"list\",\n        query: \"Enter email addresses to blacklist from comments:\",\n        label: \"Comment Email Blacklist\",\n        name: \"comment_email_blacklist\",\n        description:\n          \"This is generally useful to ignore certain bots. Add user emails which comments should NOT be indexed.\",\n        optional: true,\n      },\n    ],\n    advanced_values: [],\n  },\n  salesforce: {\n    description: \"Configure Salesforce connector\",\n    values: [\n      {\n        type: \"tab\",\n        name: \"salesforce_config_type\",\n        label: \"Configuration Type\",\n        optional: true,\n        tabs: [\n          {\n            value: \"simple\",\n            label: \"Simple\",\n            fields: [\n              {\n                type: \"list\",\n                query: \"Enter requested objects:\",\n                label: \"Requested Objects\",\n                name: \"requested_objects\",\n                optional: true,\n                description:\n                  \"Specify the Salesforce object types you want us to index. If unsure, don't specify any objects and Onyx will default to indexing by 'Account'.\" +\n                  \"\\n\\nHint: Use the singular form of the object name (e.g., 'Opportunity' instead of 'Opportunities').\",\n              },\n            ],\n          },\n          {\n            value: \"advanced\",\n            label: \"Advanced\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Enter custom query config:\",\n                label: \"Custom Query Config\",\n                name: \"custom_query_config\",\n                optional: true,\n                isTextArea: true,\n                description:\n                  \"Enter a JSON configuration that precisely defines which fields and child objects to index. This gives you complete control over the data structure.\" +\n                  \"\\n\\nExample:\" +\n                  \"\\n{\" +\n                  '\\n  \"Account\": {' +\n                  '\\n    \"fields\": [\"Id\", \"Name\", \"Industry\"],' +\n                  '\\n    \"associations\": {' +\n                  '\\n      \"Contact\": [\"Id\", \"FirstName\", \"LastName\", \"Email\"]' +\n                  \"\\n    }\" +\n                  \"\\n  }\" +\n                  \"\\n}\" +\n                  `\\n\\n[See our docs](${DOCS_ADMINS_PATH}/connectors/official/salesforce) for more details.`,\n              },\n            ],\n          },\n        ],\n        defaultTab: \"simple\",\n      },\n    ],\n    advanced_values: [],\n  },\n  sharepoint: {\n    description: \"Configure SharePoint connector\",\n    values: [\n      {\n        type: \"list\",\n        query: \"Enter SharePoint sites:\",\n        label: \"Sites\",\n        name: \"sites\",\n        optional: true,\n        description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).\n• Specifying 'https://onyxai.sharepoint.com/sites/support' for example only indexes this site.\n• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example only indexes this folder.\n• Specifying sites currently works for SharePoint instances using English, Spanish, or German. Contact the Onyx team if you need another language supported.\n`,\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"checkbox\",\n        query: \"Index Documents:\",\n        label: \"Index Documents\",\n        name: \"include_site_documents\",\n        optional: true,\n        default: true,\n        description:\n          \"Index documents of all SharePoint libraries or folders defined above.\",\n      },\n      {\n        type: \"checkbox\",\n        query: \"Index ASPX Sites:\",\n        label: \"Index ASPX Sites\",\n        name: \"include_site_pages\",\n        optional: true,\n        default: true,\n        description:\n          \"Index aspx-pages of all SharePoint sites defined above, even if a library or folder is specified.\",\n      },\n      {\n        type: \"checkbox\",\n        label: \"Treat sharing links as public?\",\n        description:\n          \"When enabled, documents with a sharing link (anonymous or organization-wide) \" +\n          \"are treated as public (visible to all Onyx users). \" +\n          \"When disabled, only users and groups with explicit role assignments can see the document.\",\n        name: \"treat_sharing_link_as_public\",\n        optional: true,\n        default: false,\n      },\n      {\n        type: \"list\",\n        query: \"Enter site URLs to exclude:\",\n        label: \"Excluded Sites\",\n        name: \"excluded_sites\",\n        optional: true,\n        description:\n          \"Site URLs or glob patterns to exclude from indexing. \" +\n          \"Matched sites will never be indexed, even if they appear in the sites list above. \" +\n          \"Examples: 'https://contoso.sharepoint.com/sites/archive' (exact), \" +\n          \"'*://*/sites/archive-*' (glob pattern).\",\n      },\n      {\n        type: \"list\",\n        query: \"Enter file path patterns to exclude:\",\n        label: \"Excluded Paths\",\n        name: \"excluded_paths\",\n        optional: true,\n        description:\n          \"Glob patterns for file paths to exclude from indexing within document libraries. \" +\n          \"Patterns are matched against both the full relative path and the filename. \" +\n          \"Examples: '*.tmp' (temp files), '~$*' (Office lock files), 'Archive/*' (folder).\",\n      },\n      {\n        type: \"text\",\n        query: \"Microsoft Authority Host:\",\n        label: \"Authority Host\",\n        name: \"authority_host\",\n        optional: true,\n        default: \"https://login.microsoftonline.com\",\n        description:\n          \"The Microsoft identity authority host used for authentication. \" +\n          \"For most deployments, leave as default. \" +\n          \"For GCC High / DoD, use https://login.microsoftonline.us\",\n      },\n      {\n        type: \"text\",\n        query: \"Microsoft Graph API Host:\",\n        label: \"Graph API Host\",\n        name: \"graph_api_host\",\n        optional: true,\n        default: \"https://graph.microsoft.com\",\n        description:\n          \"The Microsoft Graph API host. \" +\n          \"For most deployments, leave as default. \" +\n          \"For GCC High / DoD, use https://graph.microsoft.us\",\n      },\n      {\n        type: \"text\",\n        query: \"SharePoint Domain Suffix:\",\n        label: \"SharePoint Domain Suffix\",\n        name: \"sharepoint_domain_suffix\",\n        optional: true,\n        default: \"sharepoint.com\",\n        description:\n          \"The domain suffix for SharePoint sites (e.g. sharepoint.com). \" +\n          \"For most deployments, leave as default. \" +\n          \"For GCC High, use sharepoint.us\",\n      },\n    ],\n  },\n  teams: {\n    description: \"Configure Teams connector\",\n    values: [\n      {\n        type: \"list\",\n        query: \"Enter Teams to include:\",\n        label: \"Teams\",\n        name: \"teams\",\n        optional: true,\n        description: `Specify 0 or more Teams to index. For example, specifying the Team 'Support' for the 'onyxai' Org will cause us to only index messages sent in channels belonging to the 'Support' Team. If no Teams are specified, all Teams in your organization will be indexed.`,\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"text\",\n        query: \"Microsoft Authority Host:\",\n        label: \"Authority Host\",\n        name: \"authority_host\",\n        optional: true,\n        default: \"https://login.microsoftonline.com\",\n        description:\n          \"The Microsoft identity authority host used for authentication. \" +\n          \"For most deployments, leave as default. \" +\n          \"For GCC High / DoD, use https://login.microsoftonline.us\",\n      },\n      {\n        type: \"text\",\n        query: \"Microsoft Graph API Host:\",\n        label: \"Graph API Host\",\n        name: \"graph_api_host\",\n        optional: true,\n        default: \"https://graph.microsoft.com\",\n        description:\n          \"The Microsoft Graph API host. \" +\n          \"For most deployments, leave as default. \" +\n          \"For GCC High / DoD, use https://graph.microsoft.us\",\n      },\n    ],\n  },\n  discourse: {\n    description: \"Configure Discourse connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the base URL:\",\n        label: \"Base URL\",\n        name: \"base_url\",\n        optional: false,\n      },\n      {\n        type: \"list\",\n        query: \"Enter categories to include:\",\n        label: \"Categories\",\n        name: \"categories\",\n        optional: true,\n      },\n    ],\n    advanced_values: [],\n  },\n  drupal_wiki: {\n    description: \"Configure Drupal Wiki connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the base URL of the Drupal Wiki instance:\",\n        label: \"Base URL\",\n        name: \"base_url\",\n        optional: false,\n        description:\n          \"The base URL of your Drupal Wiki instance (e.g., https://help.drupal-wiki.com )\",\n      },\n      {\n        type: \"tab\",\n        name: \"indexing_scope\",\n        label: \"What should we index from Drupal Wiki?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything_description\",\n                description:\n                  \"This connector will index all spaces the provided credentials have access to!\",\n              },\n            ],\n          },\n          {\n            value: \"specific\",\n            label: \"Specific Spaces/Pages\",\n            fields: [\n              {\n                type: \"list\",\n                query: \"Enter space IDs to include:\",\n                label: \"Space IDs\",\n                name: \"spaces\",\n                description:\n                  \"Specify one or more space IDs to index. Only numeric values are allowed.\",\n                optional: true,\n                transform: (values: string[]) =>\n                  values.filter((value) => /^\\d+$/.test(value.trim())),\n              },\n              {\n                type: \"list\",\n                query: \"Enter page IDs to include:\",\n                label: \"Page IDs\",\n                name: \"pages\",\n                description:\n                  \"Specify one or more page IDs to index. Only numeric values are allowed.\",\n                optional: true,\n                transform: (values: string[]) =>\n                  values.filter((value) => /^\\d+$/.test(value.trim())),\n              },\n            ],\n          },\n        ],\n      },\n      {\n        type: \"checkbox\",\n        query: \"Include attachments?\",\n        label: \"Include Attachments\",\n        name: \"include_attachments\",\n        description:\n          \"Enable processing of page attachments including images and documents\",\n        default: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  axero: {\n    description: \"Configure Axero connector\",\n    values: [\n      {\n        type: \"list\",\n        query: \"Enter spaces to include:\",\n        label: \"Spaces\",\n        name: \"spaces\",\n        optional: true,\n        description:\n          \"Specify zero or more Spaces to index (by the Space IDs). If no Space IDs are specified, all Spaces will be indexed.\",\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  productboard: {\n    description: \"Configure Productboard connector\",\n    values: [],\n    advanced_values: [],\n  },\n  slack: {\n    description: \"Configure Slack connector\",\n    values: [],\n    advanced_values: [\n      {\n        type: \"list\",\n        query: \"Enter channels to include:\",\n        label: \"Channels\",\n        name: \"channels\",\n        description: `Specify 0 or more channels to index. For example, specifying the channel \"support\" will cause us to only index all content within the \"#support\" channel. If no channels are specified, all channels in your workspace will be indexed.`,\n        optional: true,\n        // Slack Channels can only be lowercase\n        transform: (values) => values.map((value) => value.toLowerCase()),\n      },\n      {\n        type: \"checkbox\",\n        query: \"Enable channel regex?\",\n        label: \"Enable Channel Regex\",\n        name: \"channel_regex_enabled\",\n        description: `If enabled, we will treat the \"channels\" specified above as regular expressions. A channel's messages will be pulled in by the connector if the name of the channel fully matches any of the specified regular expressions.\nFor example, specifying .*-support.* as a \"channel\" will cause the connector to include any channels with \"-support\" in the name.`,\n        optional: true,\n      },\n      {\n        type: \"checkbox\",\n        query: \"Include bot messages?\",\n        label: \"Include Bot Messages\",\n        name: \"include_bot_messages\",\n        description:\n          \"If enabled, messages from bots and apps will be indexed. Useful for channels that are primarily bot-driven feeds (e.g. CRM updates, automated notes).\",\n        optional: true,\n      },\n    ],\n  },\n  slab: {\n    description: \"Configure Slab connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the base URL:\",\n        label: \"Base URL\",\n        name: \"base_url\",\n        optional: false,\n        description: `Specify the base URL for your Slab team. This will look something like: https://onyx.slab.com/`,\n      },\n    ],\n    advanced_values: [],\n  },\n  guru: {\n    description: \"Configure Guru connector\",\n    values: [],\n    advanced_values: [],\n  },\n  gong: {\n    description: \"Configure Gong connector\",\n    values: [\n      {\n        type: \"list\",\n        query: \"Enter workspaces to include:\",\n        label: \"Workspaces\",\n        name: \"workspaces\",\n        optional: true,\n        description:\n          \"Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace name from Gong. If no workspaces are specified, transcripts from all workspaces will be indexed.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  loopio: {\n    description: \"Configure Loopio connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the Loopio stack name\",\n        label: \"Loopio Stack Name\",\n        name: \"loopio_stack_name\",\n        description:\n          \"Must be exact match to the name in Library Management, leave this blank if you want to index all Stacks\",\n        optional: true,\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  file: {\n    description: \"Configure File connector\",\n    values: [\n      {\n        type: \"file\",\n        query: \"Enter file locations:\",\n        label: \"Files\",\n        name: \"file_locations\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  zulip: {\n    description: \"Configure Zulip connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the realm name\",\n        label: \"Realm Name\",\n        name: \"realm_name\",\n        optional: false,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the realm URL\",\n        label: \"Realm URL\",\n        name: \"realm_url\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  coda: {\n    description: \"Configure Coda connector\",\n    values: [],\n    advanced_values: [],\n  },\n  notion: {\n    description: \"Configure Notion connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the root page ID\",\n        label: \"Root Page ID\",\n        name: \"root_page_id\",\n        optional: true,\n        description:\n          \"If specified, will only index the specified page + all of its child pages. If left blank, will index all pages the integration has been given access to.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  hubspot: {\n    description: \"Configure HubSpot connector\",\n    values: [\n      {\n        type: \"multiselect\",\n        query: \"Select which HubSpot objects to index:\",\n        label: \"Object Types\",\n        name: \"object_types\",\n        options: [\n          { name: \"Tickets\", value: \"tickets\" },\n          { name: \"Companies\", value: \"companies\" },\n          { name: \"Deals\", value: \"deals\" },\n          { name: \"Contacts\", value: \"contacts\" },\n        ],\n        default: [\"tickets\", \"companies\", \"deals\", \"contacts\"],\n        description:\n          \"Choose which HubSpot object types to index. All types are selected by default.\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  document360: {\n    description: \"Configure Document360 connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the workspace\",\n        label: \"Workspace\",\n        name: \"workspace\",\n        optional: false,\n      },\n      {\n        type: \"list\",\n        query: \"Enter categories to include\",\n        label: \"Categories\",\n        name: \"categories\",\n        optional: true,\n        description:\n          \"Specify 0 or more categories to index. For instance, specifying the category 'Help' will cause us to only index all content within the 'Help' category. If no categories are specified, all categories in your workspace will be indexed.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  clickup: {\n    description: \"Configure ClickUp connector\",\n    values: [\n      {\n        type: \"select\",\n        query: \"Select the connector type:\",\n        label: \"Connector Type\",\n        name: \"connector_type\",\n        optional: false,\n        options: [\n          { name: \"list\", value: \"list\" },\n          { name: \"folder\", value: \"folder\" },\n          { name: \"space\", value: \"space\" },\n          { name: \"workspace\", value: \"workspace\" },\n        ],\n      },\n      {\n        type: \"list\",\n        query: \"Enter connector IDs:\",\n        label: \"Connector IDs\",\n        name: \"connector_ids\",\n        description: \"Specify 0 or more id(s) to index from.\",\n        optional: true,\n      },\n      {\n        type: \"checkbox\",\n        query: \"Retrieve task comments?\",\n        label: \"Retrieve Task Comments\",\n        name: \"retrieve_task_comments\",\n        description:\n          \"If checked, then all the comments for each task will also be retrieved and indexed.\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  google_sites: {\n    description: \"Configure Google Sites connector\",\n    values: [\n      {\n        type: \"file\",\n        query: \"Enter the zip path:\",\n        label: \"File Locations\",\n        name: \"file_locations\",\n        optional: false,\n        description:\n          \"Upload a zip file containing the HTML of your Google Site\",\n      },\n      {\n        type: \"text\",\n        query: \"Enter the base URL:\",\n        label: \"Base URL\",\n        name: \"base_url\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  zendesk: {\n    description: \"Configure Zendesk connector\",\n    values: [\n      {\n        type: \"select\",\n        query: \"Select the what content this connector will index:\",\n        label: \"Content Type\",\n        name: \"content_type\",\n        optional: false,\n        options: [\n          { name: \"articles\", value: \"articles\" },\n          { name: \"tickets\", value: \"tickets\" },\n        ],\n        default: \"articles\",\n      },\n    ],\n    advanced_values: [\n      {\n        type: \"number\",\n        label: \"API Calls per Minute\",\n        name: \"calls_per_minute\",\n        optional: true,\n        description:\n          \"Restricts how many Zendesk API calls this connector can make per minute (applies only to this connector). See defaults: https://developer.zendesk.com/api-reference/introduction/rate-limits/\",\n      },\n    ],\n  },\n  linear: {\n    description: \"Configure Linear connector\",\n    values: [],\n    advanced_values: [],\n  },\n  dropbox: {\n    description: \"Configure Dropbox connector\",\n    values: [],\n    advanced_values: [],\n  },\n  s3: {\n    description: \"Configure S3 connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the bucket name:\",\n        label: \"Bucket Name\",\n        name: \"bucket_name\",\n        optional: false,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the prefix:\",\n        label: \"Prefix\",\n        name: \"prefix\",\n        optional: true,\n      },\n      {\n        type: \"text\",\n        label: \"Bucket Type\",\n        name: \"bucket_type\",\n        optional: false,\n        default: \"s3\",\n        hidden: true,\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  r2: {\n    description: \"Configure R2 connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the bucket name:\",\n        label: \"Bucket Name\",\n        name: \"bucket_name\",\n        optional: false,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the prefix:\",\n        label: \"Prefix\",\n        name: \"prefix\",\n        optional: true,\n      },\n      {\n        type: \"checkbox\",\n        label: \"EU Data Residency\",\n        name: \"european_residency\",\n        description:\n          \"Check this box if your bucket has EU data residency enabled.\",\n        optional: true,\n        default: false,\n      },\n      {\n        type: \"text\",\n        label: \"Bucket Type\",\n        name: \"bucket_type\",\n        optional: false,\n        default: \"r2\",\n        hidden: true,\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  google_cloud_storage: {\n    description: \"Configure Google Cloud Storage connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the bucket name:\",\n        label: \"Bucket Name\",\n        name: \"bucket_name\",\n        optional: false,\n        description: \"Name of the GCS bucket to index, e.g. my-gcs-bucket\",\n      },\n      {\n        type: \"text\",\n        query: \"Enter the prefix:\",\n        label: \"Path Prefix\",\n        name: \"prefix\",\n        optional: true,\n      },\n      {\n        type: \"text\",\n        label: \"Bucket Type\",\n        name: \"bucket_type\",\n        optional: false,\n        default: \"google_cloud_storage\",\n        hidden: true,\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  oci_storage: {\n    description: \"Configure OCI Storage connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the bucket name:\",\n        label: \"Bucket Name\",\n        name: \"bucket_name\",\n        optional: false,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the prefix:\",\n        label: \"Prefix\",\n        name: \"prefix\",\n        optional: true,\n      },\n      {\n        type: \"text\",\n        label: \"Bucket Type\",\n        name: \"bucket_type\",\n        optional: false,\n        default: \"oci_storage\",\n        hidden: true,\n      },\n    ],\n    advanced_values: [],\n  },\n  wikipedia: {\n    description: \"Configure Wikipedia connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the language code:\",\n        label: \"Language Code\",\n        name: \"language_code\",\n        optional: false,\n        description: \"Input a valid Wikipedia language code (e.g. 'en', 'es')\",\n      },\n      {\n        type: \"list\",\n        query: \"Enter categories to include:\",\n        label: \"Categories to index\",\n        name: \"categories\",\n        description:\n          \"Specify 0 or more names of categories to index. For most Wikipedia sites, these are pages with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only specify the name of the category, not its url.\",\n        optional: true,\n      },\n      {\n        type: \"list\",\n        query: \"Enter pages to include:\",\n        label: \"Pages\",\n        name: \"pages\",\n        optional: true,\n        description: \"Specify 0 or more names of pages to index.\",\n      },\n      {\n        type: \"number\",\n        query: \"Enter the recursion depth:\",\n        label: \"Recursion Depth\",\n        name: \"recurse_depth\",\n        description:\n          \"When indexing categories that have sub-categories, this will determine how may levels to index. Specify 0 to only index the category itself (i.e. no recursion). Specify -1 for unlimited recursion depth. Note, that in some rare instances, a category might contain itself in its dependencies, which will cause an infinite loop. Only use -1 if you confident that this will not happen.\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n  },\n  xenforo: {\n    description: \"Configure Xenforo connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter forum or thread URL:\",\n        label: \"URL\",\n        name: \"base_url\",\n        optional: false,\n        description:\n          \"The XenForo v2.2 forum URL to index. Can be board or thread.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  asana: {\n    description: \"Configure Asana connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter your Asana workspace ID:\",\n        label: \"Workspace ID\",\n        name: \"asana_workspace_id\",\n        optional: false,\n        description:\n          \"The ID of the Asana workspace to index. You can find this at https://app.asana.com/api/1.0/workspaces. It's a number that looks like 1234567890123456.\",\n      },\n      {\n        type: \"text\",\n        query: \"Enter project IDs to index (optional):\",\n        label: \"Project IDs\",\n        name: \"asana_project_ids\",\n        description:\n          \"IDs of specific Asana projects to index, separated by commas. Leave empty to index all projects in the workspace. Example: 1234567890123456,2345678901234567\",\n        optional: true,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the Team ID (optional):\",\n        label: \"Team ID\",\n        name: \"asana_team_id\",\n        optional: true,\n        description:\n          \"ID of a team to use for accessing team-visible tasks. This allows indexing of team-visible tasks in addition to public tasks. Leave empty if you don't want to use this feature.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  mediawiki: {\n    description: \"Configure MediaWiki connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the language code:\",\n        label: \"Language Code\",\n        name: \"language_code\",\n        optional: false,\n        description: \"Input a valid MediaWiki language code (e.g. 'en', 'es')\",\n      },\n      {\n        type: \"text\",\n        query: \"Enter the MediaWiki Site URL\",\n        label: \"MediaWiki Site URL\",\n        name: \"hostname\",\n        optional: false,\n      },\n      {\n        type: \"list\",\n        query: \"Enter categories to include:\",\n        label: \"Categories to index\",\n        name: \"categories\",\n        description:\n          \"Specify 0 or more names of categories to index. For most MediaWiki sites, these are pages with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only specify the name of the category, not its url.\",\n        optional: true,\n      },\n      {\n        type: \"list\",\n        query: \"Enter pages to include:\",\n        label: \"Pages\",\n        name: \"pages\",\n        optional: true,\n        description:\n          \"Specify 0 or more names of pages to index. Only specify the name of the page, not its url.\",\n      },\n      {\n        type: \"number\",\n        query: \"Enter the recursion depth:\",\n        label: \"Recursion Depth\",\n        name: \"recurse_depth\",\n        description:\n          \"When indexing categories that have sub-categories, this will determine how may levels to index. Specify 0 to only index the category itself (i.e. no recursion). Specify -1 for unlimited recursion depth. Note, that in some rare instances, a category might contain itself in its dependencies, which will cause an infinite loop. Only use -1 if you confident that this will not happen.\",\n        optional: true,\n      },\n    ],\n    advanced_values: [],\n  },\n  discord: {\n    description: \"Configure Discord connector\",\n    values: [],\n    advanced_values: [\n      {\n        type: \"list\",\n        query: \"Enter Server IDs to include:\",\n        label: \"Server IDs\",\n        name: \"server_ids\",\n        description: `Specify 0 or more server ids to include. Only channels inside them will be used for indexing`,\n        optional: true,\n      },\n      {\n        type: \"list\",\n        query: \"Enter channel names to include:\",\n        label: \"Channels\",\n        name: \"channel_names\",\n        description: `Specify 0 or more channels to index. For example, specifying the channel \"support\" will cause us to only index all content within the \"#support\" channel. If no channels are specified, all channels the bot has access to will be indexed.`,\n        optional: true,\n      },\n      {\n        type: \"text\",\n        query: \"Enter the Start Date:\",\n        label: \"Start Date\",\n        name: \"start_date\",\n        description: `Only messages after this date will be indexed. Format: YYYY-MM-DD`,\n        optional: true,\n      },\n    ],\n  },\n  freshdesk: {\n    description: \"Configure Freshdesk connector\",\n    values: [],\n    advanced_values: [],\n  },\n  fireflies: {\n    description: \"Configure Fireflies connector\",\n    values: [],\n    advanced_values: [],\n  },\n  egnyte: {\n    description: \"Configure Egnyte connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter folder path to index:\",\n        label: \"Folder Path\",\n        name: \"folder_path\",\n        optional: true,\n        description:\n          \"The folder path to index (e.g., '/Shared/Documents'). Leave empty to index everything.\",\n      },\n    ],\n    advanced_values: [],\n  },\n  airtable: {\n    description: \"Configure Airtable connector\",\n    values: [\n      {\n        type: \"tab\",\n        name: \"airtable_scope\",\n        label: \"What should we index from Airtable?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything_description\",\n                description:\n                  \"This connector will automatically discover and index all bases and tables accessible by your API token.\",\n              },\n            ],\n          },\n          {\n            value: \"specific\",\n            label: \"Specific Table\",\n            fields: [\n              {\n                type: \"text\",\n                query: \"Paste the Airtable URL:\",\n                label: \"Airtable URL\",\n                name: \"airtable_url\",\n                optional: false,\n                description:\n                  \"Paste the URL from your browser when viewing the table, e.g. https://airtable.com/appXXX/tblYYY/viwZZZ\",\n              },\n              {\n                type: \"text\",\n                label: \"Share ID\",\n                name: \"share_id\",\n                optional: true,\n                description:\n                  \"Optional. If you want record links to use a shared view URL, put the share ID here e.g. shrkfjEzDmLaDtK83.\",\n              },\n            ],\n          },\n        ],\n      },\n      {\n        type: \"checkbox\",\n        label: \"Treat all fields except attachments as metadata\",\n        name: \"treat_all_non_attachment_fields_as_metadata\",\n        description:\n          \"Choose this if the primary content to index are attachments and all other columns are metadata for these attachments.\",\n        optional: false,\n      },\n    ],\n    advanced_values: [],\n    overrideDefaultFreq: 60 * 60 * 24,\n  },\n  highspot: {\n    description: \"Configure Highspot connector\",\n    values: [\n      {\n        type: \"tab\",\n        name: \"highspot_scope\",\n        label: \"What should we index from Highspot?\",\n        optional: true,\n        tabs: [\n          {\n            value: \"spots\",\n            label: \"Specific Spots\",\n            fields: [\n              {\n                type: \"list\",\n                query: \"Enter the spot name(s):\",\n                label: \"Spot Name(s)\",\n                name: \"spot_names\",\n                optional: false,\n                description: \"For multiple spots, enter your spot one by one.\",\n              },\n            ],\n          },\n          {\n            value: \"everything\",\n            label: \"Everything\",\n            fields: [\n              {\n                type: \"string_tab\",\n                label: \"Everything\",\n                name: \"everything\",\n                description:\n                  \"This connector will index all spots the provided credentials have access to!\",\n              },\n            ],\n          },\n        ],\n      },\n    ],\n    advanced_values: [],\n  },\n  imap: {\n    description: \"Configure Email connector\",\n    values: [\n      {\n        type: \"text\",\n        query: \"Enter the IMAP server host:\",\n        label: \"IMAP Server Host\",\n        name: \"host\",\n        optional: false,\n        description:\n          \"The IMAP server hostname (e.g., imap.gmail.com, outlook.office365.com)\",\n      },\n      {\n        type: \"number\",\n        query: \"Enter the IMAP server port:\",\n        label: \"IMAP Server Port\",\n        name: \"port\",\n        optional: true,\n        default: 993,\n        description: \"The IMAP server port (default: 993 for SSL)\",\n      },\n      {\n        type: \"list\",\n        query: \"Enter mailboxes to include:\",\n        label: \"Mailboxes\",\n        name: \"mailboxes\",\n        optional: true,\n        description:\n          \"Specify mailboxes to index (e.g., INBOX, Sent, Drafts). Leave empty to index all mailboxes.\",\n      },\n    ],\n    advanced_values: [],\n  },\n};\ntype ConnectorField = ConnectionConfiguration[\"values\"][number];\n\nconst buildInitialValuesForFields = (\n  fields: ConnectorField[]\n): Record<string, any> =>\n  fields.reduce(\n    (acc, field) => {\n      if (field.type === \"select\") {\n        acc[field.name] = null;\n      } else if (field.type === \"list\") {\n        acc[field.name] = field.default || [];\n      } else if (field.type === \"multiselect\") {\n        acc[field.name] = field.default || [];\n      } else if (field.type === \"checkbox\") {\n        acc[field.name] = field.default ?? false;\n      } else if (field.default !== undefined) {\n        acc[field.name] = field.default;\n      }\n      return acc;\n    },\n    {} as Record<string, any>\n  );\n\nexport function createConnectorInitialValues(\n  connector: ConfigurableSources\n): Record<string, any> & AccessTypeGroupSelectorFormType {\n  const configuration = connectorConfigs[connector];\n\n  return {\n    name: \"\",\n    groups: [],\n    access_type: \"public\",\n    ...buildInitialValuesForFields(configuration.values),\n    ...buildInitialValuesForFields(configuration.advanced_values),\n  };\n}\n\nexport function createConnectorValidationSchema(\n  connector: ConfigurableSources\n): Yup.ObjectSchema<Record<string, any>> {\n  const configuration = connectorConfigs[connector];\n\n  const object = Yup.object().shape({\n    access_type: Yup.string().required(\"Access Type is required\"),\n    name: Yup.string().required(\"Connector Name is required\"),\n    ...[...configuration.values, ...configuration.advanced_values].reduce(\n      (acc, field) => {\n        let schema: any =\n          field.type === \"select\"\n            ? Yup.string()\n            : field.type === \"list\"\n              ? Yup.array().of(Yup.string())\n              : field.type === \"multiselect\"\n                ? Yup.array().of(Yup.string())\n                : field.type === \"checkbox\"\n                  ? Yup.boolean()\n                  : field.type === \"file\"\n                    ? Yup.mixed()\n                    : Yup.string();\n\n        if (!field.optional) {\n          schema = schema.required(`${field.label} is required`);\n        }\n\n        acc[field.name] = schema;\n        return acc;\n      },\n      {} as Record<string, any>\n    ),\n    // These are advanced settings\n    indexingStart: Yup.string().nullable(),\n    pruneFreq: Yup.number().min(\n      0.083,\n      \"Prune frequency must be at least 0.083 hours (5 minutes)\"\n    ),\n    refreshFreq: Yup.number().min(\n      1,\n      \"Refresh frequency must be at least 1 minute\"\n    ),\n  });\n\n  return object;\n}\n\nexport const defaultPruneFreqHours = 720; // 30 days in hours\nexport const defaultRefreshFreqMinutes = 30; // 30 minutes\n\n// CONNECTORS\nexport interface ConnectorBase<T> {\n  name: string;\n  source: ValidSources;\n  input_type: ValidInputTypes;\n  connector_specific_config: T;\n  refresh_freq: number | null;\n  prune_freq: number | null;\n  indexing_start: Date | null;\n  access_type: string;\n  groups?: number[];\n  from_beginning?: boolean;\n}\n\nexport interface Connector<T> extends ConnectorBase<T> {\n  id: number;\n  credential_ids: number[];\n  time_created: string;\n  time_updated: string;\n}\n\nexport interface ConnectorSnapshot {\n  id: number;\n  name: string;\n  source: ValidSources;\n  input_type: ValidInputTypes;\n  // connector_specific_config\n  refresh_freq: number | null;\n  prune_freq: number | null;\n  credential_ids: number[];\n  indexing_start: number | null;\n  time_created: string;\n  time_updated: string;\n  from_beginning?: boolean;\n}\n\nexport interface WebConfig {\n  base_url: string;\n  web_connector_type?: \"recursive\" | \"single\" | \"sitemap\";\n}\n\nexport interface GithubConfig {\n  repo_owner: string;\n  repositories: string; // Comma-separated list of repository names\n  include_prs: boolean;\n  include_issues: boolean;\n}\n\nexport interface GitlabConfig {\n  project_owner: string;\n  project_name: string;\n  include_mrs: boolean;\n  include_issues: boolean;\n}\n\nexport interface BitbucketConfig {\n  workspace: string;\n  repositories?: string;\n  projects?: string;\n}\n\nexport interface GoogleDriveConfig {\n  include_shared_drives?: boolean;\n  shared_drive_urls?: string;\n  include_my_drives?: boolean;\n  my_drive_emails?: string;\n  shared_folder_urls?: string;\n}\n\nexport interface GmailConfig {}\n\nexport interface BookstackConfig {}\n\nexport interface OutlineConfig {}\n\nexport interface ConfluenceConfig {\n  wiki_base: string;\n  space?: string;\n  page_id?: string;\n  is_cloud?: boolean;\n  index_recursively?: boolean;\n  cql_query?: string;\n}\n\nexport interface JiraConfig {\n  jira_project_url: string;\n  project_key?: string;\n  comment_email_blacklist?: string[];\n  jql_query?: string;\n}\n\nexport interface SalesforceConfig {\n  requested_objects?: string[];\n}\n\nexport interface SharepointConfig {\n  sites?: string[];\n  include_site_pages?: boolean;\n  treat_sharing_link_as_public?: boolean;\n  include_site_documents?: boolean;\n  authority_host?: string;\n  graph_api_host?: string;\n  sharepoint_domain_suffix?: string;\n}\n\nexport interface TeamsConfig {\n  teams?: string[];\n  authority_host?: string;\n  graph_api_host?: string;\n}\n\nexport interface DiscourseConfig {\n  base_url: string;\n  categories?: string[];\n}\n\nexport interface AxeroConfig {\n  spaces?: string[];\n}\n\nexport interface DrupalWikiConfig {\n  base_url: string;\n  spaces?: string[];\n  pages?: string[];\n  include_attachments?: boolean;\n}\n\nexport interface ProductboardConfig {}\n\nexport interface SlackConfig {\n  workspace: string;\n  channels?: string[];\n  channel_regex_enabled?: boolean;\n  include_bot_messages?: boolean;\n}\n\nexport interface SlabConfig {\n  base_url: string;\n}\n\nexport interface GuruConfig {}\n\nexport interface GongConfig {\n  workspaces?: string[];\n}\n\nexport interface LoopioConfig {\n  loopio_stack_name?: string;\n}\n\nexport interface FileConfig {\n  file_locations: string[];\n  file_names: string[];\n  zip_metadata_file_id: string | null;\n}\n\nexport interface ZulipConfig {\n  realm_name: string;\n  realm_url: string;\n}\n\nexport interface CodaConfig {\n  workspace_id?: string;\n}\n\nexport interface NotionConfig {\n  root_page_id?: string;\n}\n\nexport interface HubSpotConfig {\n  object_types?: string[];\n}\n\nexport interface Document360Config {\n  workspace: string;\n  categories?: string[];\n}\n\nexport interface ClickupConfig {\n  connector_type: \"list\" | \"folder\" | \"space\" | \"workspace\";\n  connector_ids?: string[];\n  retrieve_task_comments: boolean;\n}\n\nexport interface GoogleSitesConfig {\n  zip_path: string;\n  base_url: string;\n}\n\nexport interface XenforoConfig {\n  base_url: string;\n}\n\nexport interface ZendeskConfig {\n  content_type?: \"articles\" | \"tickets\";\n  calls_per_minute?: number;\n}\n\nexport interface DropboxConfig {}\n\nexport interface S3Config {\n  bucket_type: \"s3\";\n  bucket_name: string;\n  prefix: string;\n}\n\nexport interface R2Config {\n  bucket_type: \"r2\";\n  bucket_name: string;\n  prefix: string;\n  european_residency?: boolean;\n}\n\nexport interface GCSConfig {\n  bucket_type: \"google_cloud_storage\";\n  bucket_name: string;\n  prefix: string;\n}\n\nexport interface OCIConfig {\n  bucket_type: \"oci_storage\";\n  bucket_name: string;\n  prefix: string;\n}\n\nexport interface MediaWikiBaseConfig {\n  connector_name: string;\n  language_code: string;\n  categories?: string[];\n  pages?: string[];\n  recurse_depth?: number;\n}\n\nexport interface AsanaConfig {\n  asana_workspace_id: string;\n  asana_project_ids?: string;\n  asana_team_id?: string;\n}\n\nexport interface FreshdeskConfig {}\n\nexport interface FirefliesConfig {}\n\nexport interface MediaWikiConfig extends MediaWikiBaseConfig {\n  hostname: string;\n}\n\nexport interface WikipediaConfig extends MediaWikiBaseConfig {}\n\nexport interface ImapConfig {\n  host: string;\n  port?: number;\n  mailboxes?: string[];\n}\n"
  },
  {
    "path": "web/src/lib/connectors/credentials.ts",
    "content": "import { ValidSources } from \"../types\";\nimport { TypedFile } from \"./fileTypes\";\n\nexport interface OAuthAdditionalKwargDescription {\n  name: string;\n  display_name: string;\n  description: string;\n}\n\nexport interface OAuthDetails {\n  oauth_enabled: boolean;\n  additional_kwargs: OAuthAdditionalKwargDescription[];\n}\nexport interface AuthMethodOption<TFields> {\n  value: string;\n  label: string;\n  fields: TFields;\n  description?: string;\n  // UI-only: if true, hide/disable the \"Auto Sync Permissions\" access type when this auth is used\n  disablePermSync?: boolean;\n}\nexport interface CredentialTemplateWithAuth<TFields> {\n  authentication_method?: string;\n  authMethods?: AuthMethodOption<Partial<TFields>>[];\n}\n\nexport interface CredentialBase<T> {\n  credential_json: T;\n  admin_public: boolean;\n  source: ValidSources;\n  name?: string;\n  curator_public?: boolean;\n  groups?: number[];\n}\n\nexport interface CredentialWithPrivateKey<T> extends CredentialBase<T> {\n  private_key: TypedFile;\n}\n\nexport interface Credential<T> extends CredentialBase<T> {\n  id: number;\n  user_id: string | null;\n  user_email: string | null;\n  time_created: string;\n  time_updated: string;\n}\nexport interface GithubCredentialJson {\n  github_access_token: string;\n}\n\nexport interface GitbookCredentialJson {\n  gitbook_api_key: string;\n}\n\nexport interface GitlabCredentialJson {\n  gitlab_url: string;\n  gitlab_access_token: string;\n}\n\nexport interface BitbucketCredentialJson {\n  bitbucket_email: string;\n  bitbucket_api_token: string;\n}\n\nexport interface BookstackCredentialJson {\n  bookstack_base_url: string;\n  bookstack_api_token_id: string;\n  bookstack_api_token_secret: string;\n}\n\nexport interface OutlineCredentialJson {\n  outline_base_url: string;\n  outline_api_token: string;\n}\n\nexport interface ConfluenceCredentialJson {\n  confluence_username: string;\n  confluence_access_token: string;\n}\n\nexport interface JiraCredentialJson {\n  jira_user_email: string | null;\n  jira_api_token: string;\n}\n\nexport interface JiraServerCredentialJson {\n  jira_api_token: string;\n}\n\nexport interface ProductboardCredentialJson {\n  productboard_access_token: string;\n}\n\nexport interface SlackCredentialJson {\n  slack_bot_token: string;\n}\n\nexport interface GmailCredentialJson {\n  google_tokens: string;\n  google_primary_admin: string;\n}\n\nexport interface GoogleDriveCredentialJson {\n  google_tokens: string;\n  google_primary_admin: string;\n  authentication_method?: string;\n}\n\nexport interface GmailServiceAccountCredentialJson {\n  google_service_account_key: string;\n  google_primary_admin: string;\n}\n\nexport interface GoogleDriveServiceAccountCredentialJson {\n  google_service_account_key: string;\n  google_primary_admin: string;\n  authentication_method?: string;\n}\n\nexport interface SlabCredentialJson {\n  slab_bot_token: string;\n}\n\nexport interface CodaCredentialJson {\n  coda_bearer_token: string;\n}\n\nexport interface NotionCredentialJson {\n  notion_integration_token: string;\n}\n\nexport interface ZulipCredentialJson {\n  zuliprc_content: string;\n}\n\nexport interface GuruCredentialJson {\n  guru_user: string;\n  guru_user_token: string;\n}\n\nexport interface GongCredentialJson {\n  gong_access_key: string;\n  gong_access_key_secret: string;\n}\n\nexport interface LoopioCredentialJson {\n  loopio_subdomain: string;\n  loopio_client_id: string;\n  loopio_client_token: string;\n}\n\nexport interface LinearCredentialJson {\n  linear_access_token: string;\n}\n\nexport interface HubSpotCredentialJson {\n  hubspot_access_token: string;\n}\n\nexport interface Document360CredentialJson {\n  portal_id: string;\n  document360_api_token: string;\n}\n\nexport interface ClickupCredentialJson {\n  clickup_api_token: string;\n  clickup_team_id: string;\n}\n\nexport interface ZendeskCredentialJson {\n  zendesk_subdomain: string;\n  zendesk_email: string;\n  zendesk_token: string;\n}\n\nexport interface DropboxCredentialJson {\n  dropbox_access_token: string;\n}\n\nexport interface R2CredentialJson {\n  account_id: string;\n  r2_access_key_id: string;\n  r2_secret_access_key: string;\n}\n\nexport interface S3CredentialJson {\n  aws_access_key_id?: string;\n  aws_secret_access_key?: string;\n  aws_role_arn?: string;\n}\n\nexport interface GCSCredentialJson {\n  access_key_id: string;\n  secret_access_key: string;\n}\n\nexport interface OCICredentialJson {\n  namespace: string;\n  region: string;\n  access_key_id: string;\n  secret_access_key: string;\n}\nexport interface SalesforceCredentialJson {\n  sf_username: string;\n  sf_password: string;\n  sf_security_token: string;\n  is_sandbox: boolean;\n}\n\nexport interface SharepointCredentialJson {\n  sp_client_id: string;\n  sp_client_secret?: string;\n  sp_directory_id: string;\n  sp_certificate_password?: string;\n  sp_private_key?: TypedFile;\n}\n\nexport interface AsanaCredentialJson {\n  asana_api_token_secret: string;\n}\n\nexport interface TeamsCredentialJson {\n  teams_client_id: string;\n  teams_client_secret: string;\n  teams_directory_id: string;\n}\n\nexport interface DiscourseCredentialJson {\n  discourse_api_key: string;\n  discourse_api_username: string;\n}\n\nexport interface AxeroCredentialJson {\n  base_url: string;\n  axero_api_token: string;\n}\n\nexport interface DiscordCredentialJson {\n  discord_bot_token: string;\n}\n\nexport interface FreshdeskCredentialJson {\n  freshdesk_domain: string;\n  freshdesk_api_key: string;\n}\n\nexport interface FirefliesCredentialJson {\n  fireflies_api_key: string;\n}\n\nexport interface MediaWikiCredentialJson {}\nexport interface WikipediaCredentialJson extends MediaWikiCredentialJson {}\n\nexport interface EgnyteCredentialJson {\n  domain: string;\n  access_token: string;\n}\n\nexport interface AirtableCredentialJson {\n  airtable_access_token: string;\n}\n\nexport interface HighspotCredentialJson {\n  highspot_url: string;\n  highspot_key: string;\n  highspot_secret: string;\n}\n\nexport interface DrupalWikiCredentialJson {\n  drupal_wiki_api_token: string;\n}\n\nexport interface ImapCredentialJson {\n  imap_username: string;\n  imap_password: string;\n}\n\nexport interface TestRailCredentialJson {\n  testrail_base_url: string;\n  testrail_username: string;\n  testrail_api_key: string;\n}\n\nexport const credentialTemplates: Record<ValidSources, any> = {\n  github: { github_access_token: \"\" } as GithubCredentialJson,\n  gitlab: {\n    gitlab_url: \"\",\n    gitlab_access_token: \"\",\n  } as GitlabCredentialJson,\n  bitbucket: {\n    bitbucket_email: \"\",\n    bitbucket_api_token: \"\",\n  } as BitbucketCredentialJson,\n  slack: { slack_bot_token: \"\" } as SlackCredentialJson,\n  bookstack: {\n    bookstack_base_url: \"\",\n    bookstack_api_token_id: \"\",\n    bookstack_api_token_secret: \"\",\n  } as BookstackCredentialJson,\n  outline: {\n    outline_base_url: \"\",\n    outline_api_token: \"\",\n  } as OutlineCredentialJson,\n  confluence: {\n    confluence_username: \"\",\n    confluence_access_token: \"\",\n  } as ConfluenceCredentialJson,\n  jira: {\n    jira_user_email: null,\n    jira_api_token: \"\",\n  } as JiraCredentialJson,\n  productboard: { productboard_access_token: \"\" } as ProductboardCredentialJson,\n  slab: { slab_bot_token: \"\" } as SlabCredentialJson,\n  coda: { coda_bearer_token: \"\" } as CodaCredentialJson,\n  notion: { notion_integration_token: \"\" } as NotionCredentialJson,\n  guru: { guru_user: \"\", guru_user_token: \"\" } as GuruCredentialJson,\n  gong: {\n    gong_access_key: \"\",\n    gong_access_key_secret: \"\",\n  } as GongCredentialJson,\n  zulip: { zuliprc_content: \"\" } as ZulipCredentialJson,\n  linear: { linear_access_token: \"\" } as LinearCredentialJson,\n  hubspot: { hubspot_access_token: \"\" } as HubSpotCredentialJson,\n  document360: {\n    portal_id: \"\",\n    document360_api_token: \"\",\n  } as Document360CredentialJson,\n  loopio: {\n    loopio_subdomain: \"\",\n    loopio_client_id: \"\",\n    loopio_client_token: \"\",\n  } as LoopioCredentialJson,\n  dropbox: { dropbox_access_token: \"\" } as DropboxCredentialJson,\n  salesforce: {\n    sf_username: \"\",\n    sf_password: \"\",\n    sf_security_token: \"\",\n    is_sandbox: false,\n  } as SalesforceCredentialJson,\n  sharepoint: {\n    authentication_method: \"client_credentials\",\n    authMethods: [\n      {\n        value: \"client_secret\",\n        label: \"Client Secret\",\n        fields: {\n          sp_client_id: \"\",\n          sp_client_secret: \"\",\n          sp_directory_id: \"\",\n        },\n        description:\n          \"If you select this mode, the SharePoint connector will use a client secret to authenticate. You will need to provide the client ID and client secret.\",\n        disablePermSync: true,\n      },\n      {\n        value: \"certificate\",\n        label: \"Certificate Authentication\",\n        fields: {\n          sp_client_id: \"\",\n          sp_directory_id: \"\",\n          sp_certificate_password: \"\",\n          sp_private_key: null,\n        },\n        description:\n          \"If you select this mode, the SharePoint connector will use a certificate to authenticate. You will need to provide the client ID, directory ID, certificate password, and PFX data.\",\n        disablePermSync: false,\n      },\n    ],\n  } as CredentialTemplateWithAuth<SharepointCredentialJson>,\n  asana: {\n    asana_api_token_secret: \"\",\n  } as AsanaCredentialJson,\n  teams: {\n    teams_client_id: \"\",\n    teams_client_secret: \"\",\n    teams_directory_id: \"\",\n  } as TeamsCredentialJson,\n  zendesk: {\n    zendesk_subdomain: \"\",\n    zendesk_email: \"\",\n    zendesk_token: \"\",\n  } as ZendeskCredentialJson,\n  discourse: {\n    discourse_api_key: \"\",\n    discourse_api_username: \"\",\n  } as DiscourseCredentialJson,\n  axero: {\n    base_url: \"\",\n    axero_api_token: \"\",\n  } as AxeroCredentialJson,\n  clickup: {\n    clickup_api_token: \"\",\n    clickup_team_id: \"\",\n  } as ClickupCredentialJson,\n\n  s3: {\n    authentication_method: \"access_key\",\n    authMethods: [\n      {\n        value: \"access_key\",\n        label: \"Access Key and Secret\",\n        fields: {\n          aws_access_key_id: \"\",\n          aws_secret_access_key: \"\",\n        },\n        disablePermSync: false,\n      },\n      {\n        value: \"iam_role\",\n        label: \"IAM Role\",\n        fields: {\n          aws_role_arn: \"\",\n        },\n        disablePermSync: false,\n      },\n      {\n        value: \"assume_role\",\n        label: \"Assume Role\",\n        fields: {},\n        description:\n          \"If you select this mode, the Amazon EC2 instance will assume its existing role to access S3. No additional credentials are required.\",\n        disablePermSync: false,\n      },\n    ],\n  } as CredentialTemplateWithAuth<S3CredentialJson>,\n  r2: {\n    account_id: \"\",\n    r2_access_key_id: \"\",\n    r2_secret_access_key: \"\",\n  } as R2CredentialJson,\n  google_cloud_storage: {\n    access_key_id: \"\",\n    secret_access_key: \"\",\n  } as GCSCredentialJson,\n  oci_storage: {\n    namespace: \"\",\n    region: \"\",\n    access_key_id: \"\",\n    secret_access_key: \"\",\n  } as OCICredentialJson,\n  freshdesk: {\n    freshdesk_domain: \"\",\n    freshdesk_api_key: \"\",\n  } as FreshdeskCredentialJson,\n  fireflies: {\n    fireflies_api_key: \"\",\n  } as FirefliesCredentialJson,\n  egnyte: {\n    domain: \"\",\n    access_token: \"\",\n  } as EgnyteCredentialJson,\n  airtable: {\n    airtable_access_token: \"\",\n  } as AirtableCredentialJson,\n  drupal_wiki: {\n    drupal_wiki_api_token: \"\",\n  } as DrupalWikiCredentialJson,\n  xenforo: null,\n  google_sites: null,\n  file: null,\n  user_file: null,\n  craft_file: null, // User Library - managed through dedicated UI\n  wikipedia: null,\n  mediawiki: null,\n  web: null,\n  not_applicable: null,\n  ingestion_api: null,\n  federated_slack: null,\n  discord: { discord_bot_token: \"\" } as DiscordCredentialJson,\n\n  // NOTE: These are Special Cases\n  google_drive: { google_tokens: \"\" } as GoogleDriveCredentialJson,\n  gmail: { google_tokens: \"\" } as GmailCredentialJson,\n  gitbook: {\n    gitbook_api_key: \"\",\n  } as GitbookCredentialJson,\n  highspot: {\n    highspot_url: \"\",\n    highspot_key: \"\",\n    highspot_secret: \"\",\n  } as HighspotCredentialJson,\n  imap: {\n    imap_username: \"\",\n    imap_password: \"\",\n  } as ImapCredentialJson,\n  testrail: {\n    testrail_base_url: \"\",\n    testrail_username: \"\",\n    testrail_api_key: \"\",\n  } as TestRailCredentialJson,\n};\n\nexport const credentialDisplayNames: Record<string, string> = {\n  // Github\n  github_access_token: \"GitHub Access Token\",\n\n  // Gitlab\n  gitlab_url: \"GitLab URL\",\n  gitlab_access_token: \"GitLab Access Token\",\n\n  // Bookstack\n  bookstack_base_url: \"Bookstack Base URL\",\n  bookstack_api_token_id: \"Bookstack API Token ID\",\n  bookstack_api_token_secret: \"Bookstack API Token Secret\",\n\n  // Outline\n  outline_base_url:\n    \"Outline Base URL (e.g. https://app.getoutline.com or your self-hosted URL)\",\n  outline_api_token: \"Outline API Token\",\n\n  // Confluence\n  confluence_username: \"Confluence Username\",\n  confluence_access_token: \"Confluence Access Token\",\n\n  // Jira\n  jira_user_email: \"Jira User Email (required for Jira Cloud)\",\n  jira_api_token: \"API or Personal Access Token\",\n\n  // Productboard\n  productboard_access_token: \"Productboard Access Token\",\n\n  // Slack\n  slack_bot_token: \"Slack Bot Token\",\n\n  // Discord\n  discord_bot_token: \"Discord Bot Token\",\n\n  // Gmail and Google Drive\n  google_tokens: \"Google Oauth Tokens\",\n  google_service_account_key: \"Google Service Account Key\",\n  google_primary_admin: \"Primary Admin Email\",\n\n  // Slab\n  slab_bot_token: \"Slab Bot Token\",\n\n  // Coda\n  coda_bearer_token: \"Coda Bearer Token\",\n\n  // Notion\n  notion_integration_token: \"Notion Integration Token\",\n\n  // Zulip\n  zuliprc_content: \"Zuliprc Content\",\n\n  // Guru\n  guru_user: \"Guru User\",\n  guru_user_token: \"Guru User Token\",\n\n  // Gong\n  gong_access_key: \"Gong Access Key\",\n  gong_access_key_secret: \"Gong Access Key Secret\",\n\n  // Loopio\n  loopio_subdomain: \"Loopio Subdomain\",\n  loopio_client_id: \"Loopio Client ID\",\n  loopio_client_token: \"Loopio Client Token\",\n\n  // Linear\n  linear_access_token: \"Linear Access Token\",\n\n  // HubSpot\n  hubspot_access_token: \"HubSpot Access Token\",\n  // Document360\n  portal_id: \"Document360 Portal ID\",\n  document360_api_token: \"Document360 API Token\",\n\n  // Clickup\n  clickup_api_token: \"ClickUp API Token\",\n  clickup_team_id: \"ClickUp Team ID\",\n\n  // Zendesk\n  zendesk_subdomain: \"Zendesk Subdomain\",\n  zendesk_email: \"Zendesk Email\",\n  zendesk_token: \"Zendesk Token\",\n\n  // Dropbox\n  dropbox_access_token: \"Dropbox API Key\",\n\n  // R2\n  account_id: \"R2 Account ID\",\n  r2_access_key_id: \"R2 Access Key ID\",\n  r2_secret_access_key: \"R2 Secret Access Key\",\n\n  // IMAP\n  imap_username: \"IMAP Username\",\n  imap_password: \"IMAP Password\",\n\n  // TestRail\n  testrail_base_url: \"TestRail Base URL (e.g. https://yourcompany.testrail.io)\",\n  testrail_username: \"TestRail Username or Email\",\n  testrail_api_key: \"TestRail API Key\",\n\n  // S3\n  aws_access_key_id: \"AWS Access Key ID\",\n  aws_secret_access_key: \"AWS Secret Access Key\",\n  aws_role_arn: \"AWS Role ARN\",\n  authentication_method: \"Authentication Method\",\n\n  // GCS\n  access_key_id: \"GCS Access Key ID\",\n  secret_access_key: \"GCS Secret Access Key\",\n\n  // OCI\n  namespace: \"OCI Namespace\",\n  region: \"OCI Region\",\n\n  // Salesforce\n  sf_username: \"Salesforce Username\",\n  sf_password: \"Salesforce Password\",\n  sf_security_token: \"Salesforce Security Token\",\n  is_sandbox: \"Is Sandbox Environment\",\n\n  // Sharepoint\n  sp_client_id: \"SharePoint Client ID\",\n  sp_client_secret: \"SharePoint Client Secret\",\n  sp_directory_id: \"SharePoint Directory ID\",\n  sp_certificate_password: \"SharePoint Certificate Password\",\n  sp_private_key: \"SharePoint Private Key\",\n\n  // Asana\n  asana_api_token_secret: \"Asana API Token\",\n\n  // Teams\n  teams_client_id: \"Microsoft Teams Client ID\",\n  teams_client_secret: \"Microsoft Teams Client Secret\",\n  teams_directory_id: \"Microsoft Teams Directory ID\",\n\n  // Discourse\n  discourse_api_key: \"Discourse API Key\",\n  discourse_api_username: \"Discourse API Username\",\n\n  // Axero\n  base_url: \"Axero Base URL\",\n  axero_api_token: \"Axero API Token\",\n\n  // Freshdesk\n  freshdesk_domain: \"Freshdesk Domain\",\n  freshdesk_api_key: \"Freshdesk API Key\",\n\n  // Fireflies\n  fireflies_api_key: \"Fireflies API Key\",\n\n  // GitBook\n  gitbook_space_id: \"GitBook Space ID\",\n  gitbook_api_key: \"GitBook API Key\",\n\n  //Highspot\n  highspot_url: \"Highspot URL\",\n  highspot_key: \"Highspot Key\",\n  highspot_secret: \"Highspot Secret\",\n\n  // Drupal Wiki\n  drupal_wiki_api_token: \"Drupal Wiki Personal Access Token\",\n\n  // Bitbucket\n  bitbucket_email: \"Bitbucket Account Email\",\n  bitbucket_api_token: \"Bitbucket API Token\",\n};\n\nexport function getDisplayNameForCredentialKey(key: string): string {\n  return credentialDisplayNames[key] || key;\n}\n"
  },
  {
    "path": "web/src/lib/connectors/fileTypes.ts",
    "content": "export enum FileTypeCategory {\n  SHAREPOINT_PFX_FILE = \"sharepoint_pfx_file\",\n}\n\nexport interface FileValidationRule {\n  maxSizeKB?: number;\n  allowedExtensions?: string[];\n  contentValidation?: (file: File) => Promise<boolean>;\n}\n\nexport interface FileTypeDefinition {\n  category: FileTypeCategory;\n  validation?: FileValidationRule;\n  description?: string;\n}\n\nexport const FILE_TYPE_DEFINITIONS: Record<\n  FileTypeCategory,\n  FileTypeDefinition\n> = {\n  [FileTypeCategory.SHAREPOINT_PFX_FILE]: {\n    category: FileTypeCategory.SHAREPOINT_PFX_FILE,\n    validation: {\n      maxSizeKB: 10,\n      allowedExtensions: [\".pfx\"],\n    },\n    description:\n      \"Please upload a .pfx file containing the private key for SharePoint. The file size must be under 10KB.\",\n  },\n};\n\nexport class TypedFile {\n  constructor(\n    public readonly file: File,\n    public readonly typeDefinition: FileTypeDefinition,\n    public readonly fieldKey: string\n  ) {}\n\n  async validate(): Promise<{ isValid: boolean; errors: string[] }> {\n    const errors: string[] = [];\n    const { validation } = this.typeDefinition;\n\n    if (!validation) {\n      return {\n        isValid: true,\n        errors: [],\n      };\n    }\n\n    // Size validation\n    if (validation.maxSizeKB && this.file.size > validation.maxSizeKB * 1024) {\n      errors.push(`File size must not exceed ${validation.maxSizeKB}KB`);\n    }\n\n    // Extension validation\n    if (validation.allowedExtensions) {\n      const extension = this.file.name.toLowerCase().split(\".\").pop();\n      if (\n        !extension ||\n        !validation.allowedExtensions.includes(`.${extension}`)\n      ) {\n        errors.push(\n          `File must have one of these extensions: ${validation.allowedExtensions.join(\n            \", \"\n          )}`\n        );\n      }\n    }\n\n    // Content validation\n    if (validation.contentValidation) {\n      try {\n        const isContentValid = await validation.contentValidation(this.file);\n        if (!isContentValid) {\n          errors.push(`File content validation failed`);\n        }\n      } catch (error) {\n        errors.push(\n          `Content validation error: ${\n            error instanceof Error ? error.message : \"Unknown error\"\n          }`\n        );\n      }\n    }\n\n    return {\n      isValid: errors.length === 0,\n      errors,\n    };\n  }\n}\n\nexport function createTypedFile(\n  file: File,\n  fieldKey: string,\n  typeDefinitionKey: FileTypeCategory\n): TypedFile {\n  const typeDefinition = FILE_TYPE_DEFINITIONS[typeDefinitionKey];\n  if (!typeDefinition) {\n    throw new Error(`Unknown file type definition: ${typeDefinitionKey}`);\n  }\n\n  return new TypedFile(file, typeDefinition, fieldKey);\n}\n\nexport function isTypedFileField(fieldKey: string): boolean {\n  // Define which fields should be typed files\n  const typedFileFields = new Set([\"sp_private_key\"]);\n  return typedFileFields.has(fieldKey);\n}\n\n// Get the appropriate file type definition for a field\nexport function getFileTypeDefinitionForField(\n  fieldKey: string\n): FileTypeCategory | null {\n  const fieldToTypeMap: Record<string, FileTypeCategory> = {\n    sp_private_key: FileTypeCategory.SHAREPOINT_PFX_FILE,\n  };\n\n  return fieldToTypeMap[fieldKey] || null;\n}\n"
  },
  {
    "path": "web/src/lib/connectors/oauth.ts",
    "content": "import useSWR from \"swr\";\nimport { ValidSources } from \"../types\";\nimport { OAuthDetails } from \"./credentials\";\nimport { errorHandlingFetcher } from \"../fetcher\";\n\nexport async function getConnectorOauthRedirectUrl(\n  connector: ValidSources,\n  additional_kwargs: Record<string, string>\n): Promise<string | null> {\n  const queryParams = new URLSearchParams({\n    desired_return_url: window.location.href,\n    ...additional_kwargs,\n  });\n  const response = await fetch(\n    `/api/connector/oauth/authorize/${connector}?${queryParams.toString()}`\n  );\n\n  if (!response.ok) {\n    console.error(`Failed to fetch OAuth redirect URL for ${connector}`);\n    return null;\n  }\n\n  const data = await response.json();\n  return data.redirect_url as string;\n}\n\nexport function useOAuthDetails(sourceType: ValidSources) {\n  return useSWR<OAuthDetails>(\n    `/api/connector/oauth/details/${sourceType}`,\n    errorHandlingFetcher,\n    {\n      shouldRetryOnError: false,\n    }\n  );\n}\n"
  },
  {
    "path": "web/src/lib/constants/chatBackgrounds.ts",
    "content": "// Default chat background images\n\nexport const CHAT_BACKGROUND_NONE = \"none\";\n\nexport interface ChatBackgroundOption {\n  id: string;\n  src: string;\n  thumbnail: string;\n  label: string;\n}\n\n// Curated collection of scenic backgrounds that work well as chat backgrounds\nexport const CHAT_BACKGROUND_OPTIONS: ChatBackgroundOption[] = [\n  {\n    id: \"none\",\n    src: CHAT_BACKGROUND_NONE,\n    thumbnail: CHAT_BACKGROUND_NONE,\n    label: \"None\",\n  },\n  {\n    id: \"clouds\",\n    src: \"/chat-backgrounds/clouds.jpg\",\n    thumbnail: \"/chat-backgrounds/thumbnails/clouds.jpg\",\n    label: \"Clouds\",\n  },\n  {\n    id: \"hills\",\n    src: \"/chat-backgrounds/hills.jpg\",\n    thumbnail: \"/chat-backgrounds/thumbnails/hills.jpg\",\n    label: \"Hills\",\n  },\n  {\n    id: \"plant\",\n    src: \"/chat-backgrounds/plant.jpg\",\n    thumbnail: \"/chat-backgrounds/thumbnails/plant.jpg\",\n    label: \"Plants\",\n  },\n  {\n    id: \"mountains\",\n    src: \"/chat-backgrounds/mountains.jpg\",\n    thumbnail: \"/chat-backgrounds/thumbnails/mountains.jpg\",\n    label: \"Mountains\",\n  },\n  {\n    id: \"night\",\n    src: \"/chat-backgrounds/night.jpg\",\n    thumbnail: \"/chat-backgrounds/thumbnails/night.jpg\",\n    label: \"Night\",\n  },\n];\n\nexport const getBackgroundById = (\n  id: string | null\n): ChatBackgroundOption | undefined => {\n  if (!id || id === CHAT_BACKGROUND_NONE) {\n    return CHAT_BACKGROUND_OPTIONS[0];\n  }\n  return CHAT_BACKGROUND_OPTIONS.find((bg) => bg.id === id);\n};\n"
  },
  {
    "path": "web/src/lib/constants.ts",
    "content": "export const IS_DEV = process.env.NODE_ENV === \"development\";\n\nexport enum AuthType {\n  BASIC = \"basic\",\n  GOOGLE_OAUTH = \"google_oauth\",\n  OIDC = \"oidc\",\n  SAML = \"saml\",\n  CLOUD = \"cloud\",\n}\n\nexport const HOST_URL = process.env.WEB_DOMAIN || \"http://localhost:3000\";\n\nexport const INTERNAL_URL = process.env.INTERNAL_URL || \"http://localhost:8080\";\n\n// Documentation URLs\nexport const DOCS_BASE_URL = \"https://docs.onyx.app\";\nexport const DOCS_ADMINS_PATH = `${DOCS_BASE_URL}/admins`;\n\nexport const MCP_INTERNAL_URL =\n  process.env.MCP_INTERNAL_URL || \"http://127.0.0.1:8090\";\n\n// NOTE: this should ONLY be used on the server-side (including middleware).\n// The AUTH_TYPE environment variable is set in the backend and shared with Next.js\nexport const SERVER_SIDE_ONLY__AUTH_TYPE = (process.env.AUTH_TYPE ||\n  AuthType.BASIC) as AuthType;\n\nexport const NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED =\n  process.env.NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED?.toLowerCase() ===\n  \"true\";\n\nexport const TENANT_ID_COOKIE_NAME = \"onyx_tid\";\n\nexport const SEARCH_TYPE_COOKIE_NAME = \"search_type\";\nexport const AGENTIC_SEARCH_TYPE_COOKIE_NAME = \"agentic_type\";\n\nexport const LOGOUT_DISABLED =\n  process.env.NEXT_PUBLIC_DISABLE_LOGOUT?.toLowerCase() === \"true\";\n\nexport const TOGGLED_CONNECTORS_COOKIE_NAME = \"toggled_connectors\";\n\n/* Enterprise-only settings */\nexport const NEXT_PUBLIC_CUSTOM_REFRESH_URL =\n  process.env.NEXT_PUBLIC_CUSTOM_REFRESH_URL;\n\n// NOTE: this should ONLY be used on the server-side. If used client side,\n// it will not be accurate (will always be false).\n// Mirrors backend logic: EE is enabled if EITHER the legacy flag OR license\n// enforcement is active. LICENSE_ENFORCEMENT_ENABLED defaults to true on the\n// backend, so we treat undefined as enabled here to match.\nexport const SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED =\n  process.env.ENABLE_PAID_ENTERPRISE_EDITION_FEATURES?.toLowerCase() ===\n    \"true\" ||\n  process.env.LICENSE_ENFORCEMENT_ENABLED?.toLowerCase() !== \"false\";\n// NOTE: since this is a `NEXT_PUBLIC_` variable, it will be set at\n// build-time\n// TODO: consider moving this to an API call so that the api_server\n// can be the single source of truth\nexport const EE_ENABLED =\n  process.env.NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES?.toLowerCase() === \"true\";\n\nexport const CUSTOM_ANALYTICS_ENABLED = process.env.CUSTOM_ANALYTICS_SECRET_KEY\n  ? true\n  : false;\n\nexport const GTM_ENABLED =\n  process.env.NEXT_PUBLIC_GTM_ENABLED?.toLowerCase() === \"true\";\n\nexport const NEXT_PUBLIC_CLOUD_ENABLED =\n  process.env.NEXT_PUBLIC_CLOUD_ENABLED?.toLowerCase() === \"true\";\n\nexport const REGISTRATION_URL =\n  process.env.INTERNAL_URL || \"http://127.0.0.1:3001\";\n\nexport const SERVER_SIDE_ONLY__CLOUD_ENABLED =\n  process.env.NEXT_PUBLIC_CLOUD_ENABLED?.toLowerCase() === \"true\";\n\nexport const NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED =\n  process.env.NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED?.toLowerCase() === \"true\" &&\n  !NEXT_PUBLIC_CLOUD_ENABLED;\n\nexport const NEXT_PUBLIC_TEST_ENV =\n  process.env.NEXT_PUBLIC_TEST_ENV?.toLowerCase() === \"true\";\n\nexport const NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK =\n  process.env.NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK?.toLowerCase() ===\n  \"true\";\n\n// Restrict markdown links to safe protocols\nexport const ALLOWED_URL_PROTOCOLS = [\"http:\", \"https:\", \"mailto:\"] as const;\n\n// Agent/Persona related constants\nexport const MAX_CHARACTERS_PERSONA_DESCRIPTION = 5000000;\nexport const MAX_CHARACTERS_AGENT_DESCRIPTION = 500;\nexport const MAX_STARTER_MESSAGES = 4;\nexport const MAX_CHARACTERS_STARTER_MESSAGE = 200;\nexport const STARTER_MESSAGES_EXAMPLES = [\n  \"Give me an overview of some documents.\",\n  \"Find the latest sales report.\",\n  \"Compile a list of our engineering goals for this quarter.\",\n  \"Summarize my goals for today.\",\n];\n\n//Credential form data key constants\nexport const CREDENTIAL_NAME = \"name\";\nexport const CREDENTIAL_SOURCE = \"source\";\nexport const CREDENTIAL_UPLOADED_FILE = \"uploaded_file\";\nexport const CREDENTIAL_FIELD_KEY = \"field_key\";\nexport const CREDENTIAL_TYPE_DEFINITION_KEY = \"type_definition_key\";\nexport const CREDENTIAL_JSON = \"credential_json\";\n\nexport const MODAL_ROOT_ID = \"modal-root\";\n\nexport const UNNAMED_CHAT = \"New Chat\";\n\nexport const DEFAULT_AGENT_ID = 0;\nexport const GENERAL_ASSISTANT_ID = -1;\nexport const IMAGE_ASSISTANT_ID = -2;\nexport const ART_ASSISTANT_ID = -3;\n\n// Used in the File Picker to show a max number of files.\n// The rest will be hidden behind an \"All Recent Files\" button.\nexport const MAX_FILES_TO_SHOW = 3;\n\n// SIZES\nexport const MOBILE_SIDEBAR_BREAKPOINT_PX = 640;\nexport const DESKTOP_SMALL_BREAKPOINT_PX = 912;\nexport const DESKTOP_MEDIUM_BREAKPOINT_PX = 1232;\nexport const DEFAULT_AVATAR_SIZE_PX = 18;\nexport const HORIZON_DISTANCE_PX = 800;\nexport const DEFAULT_LOGO_SIZE_PX = 24;\n\nexport const DEFAULT_CONTEXT_TOKENS = 120_000;\nexport const MAX_CHUNKS_FED_TO_CHAT = 25;\n\nexport const APP_SLOGAN = \"Open Source AI Platform\";\n"
  },
  {
    "path": "web/src/lib/contains.ts",
    "content": "import { RefObject } from \"react\";\n\ninterface SomeNonNestedObject {\n  [key: string]: any;\n}\n\nexport function objectsAreEquivalent(\n  a: SomeNonNestedObject,\n  b: SomeNonNestedObject\n): boolean {\n  // NOTE: only works for non-nested objects\n  const aProps = Object.getOwnPropertyNames(a);\n  const bProps = Object.getOwnPropertyNames(b);\n\n  if (aProps.length !== bProps.length) {\n    return false;\n  }\n\n  for (let i = 0; i < aProps.length; i++) {\n    const propName = aProps[i];\n    if (propName === undefined) {\n      continue;\n    }\n\n    if (a[propName] !== b[propName]) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\nexport function containsObject(\n  list: SomeNonNestedObject[],\n  obj: SomeNonNestedObject\n): boolean {\n  // NOTE: only works for non-nested objects\n  return list.some((item) => objectsAreEquivalent(item, obj));\n}\n\nexport function isEventWithinRef(\n  event: MouseEvent | TouchEvent,\n  ref: RefObject<HTMLElement>\n): boolean {\n  if (!ref.current) return false;\n\n  const rect = ref.current.getBoundingClientRect();\n\n  let clientX: number;\n  let clientY: number;\n  if (event instanceof TouchEvent) {\n    const touches_0 = event.touches[0];\n    if (touches_0 === undefined) {\n      throw new Error(\"Touch event must exist!\");\n    }\n    clientX = touches_0.clientX;\n    clientY = touches_0.clientY;\n  } else {\n    clientX = event.clientX;\n    clientY = event.clientY;\n  }\n\n  return (\n    clientX >= rect.left &&\n    clientX <= rect.right &&\n    clientY >= rect.top &&\n    clientY <= rect.bottom\n  );\n}\n"
  },
  {
    "path": "web/src/lib/credential.ts",
    "content": "import {\n  CredentialBase,\n  CredentialWithPrivateKey,\n} from \"./connectors/credentials\";\nimport { AccessType, ProcessingMode } from \"@/lib/types\";\nimport { TypedFile } from \"./connectors/fileTypes\";\nimport {\n  CREDENTIAL_NAME,\n  CREDENTIAL_SOURCE,\n  CREDENTIAL_UPLOADED_FILE,\n  CREDENTIAL_FIELD_KEY,\n  CREDENTIAL_TYPE_DEFINITION_KEY,\n  CREDENTIAL_JSON,\n} from \"./constants\";\n\nexport async function createCredential(credential: CredentialBase<any>) {\n  return await fetch(`/api/manage/credential`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(credential),\n  });\n}\n\nexport async function createCredentialWithPrivateKey(\n  credential: CredentialWithPrivateKey<any>\n) {\n  const formData = new FormData();\n  formData.append(CREDENTIAL_JSON, JSON.stringify(credential.credential_json));\n  formData.append(\"admin_public\", credential.admin_public.toString());\n  formData.append(\n    \"curator_public\",\n    credential.curator_public?.toString() || \"false\"\n  );\n  if (credential.groups && credential.groups.length > 0) {\n    credential.groups.forEach((group) => {\n      formData.append(\"groups\", String(group));\n    });\n  }\n  formData.append(CREDENTIAL_NAME, credential.name || \"\");\n  formData.append(CREDENTIAL_SOURCE, credential.source);\n  if (credential.private_key) {\n    formData.append(CREDENTIAL_UPLOADED_FILE, credential.private_key.file);\n    formData.append(CREDENTIAL_FIELD_KEY, credential.private_key.fieldKey);\n    formData.append(\n      CREDENTIAL_TYPE_DEFINITION_KEY,\n      credential.private_key.typeDefinition.category\n    );\n  }\n  return await fetch(`/api/manage/credential/private-key`, {\n    method: \"POST\",\n    body: formData,\n  });\n}\n\nexport async function adminDeleteCredential<T>(credentialId: number) {\n  return await fetch(`/api/manage/admin/credential/${credentialId}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n}\n\nexport async function deleteCredential<T>(\n  credentialId: number,\n  force?: boolean\n) {\n  return await fetch(`/api/manage/credential/${credentialId}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n}\n\nexport async function forceDeleteCredential<T>(credentialId: number) {\n  return await fetch(`/api/manage/credential/force/${credentialId}`, {\n    method: \"DELETE\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n}\n\nexport function linkCredential(\n  connectorId: number,\n  credentialId: number,\n  name: string,\n  accessType?: AccessType,\n  groups?: number[],\n  autoSyncOptions?: Record<string, any>,\n  processingMode?: ProcessingMode\n) {\n  return fetch(\n    `/api/manage/connector/${connectorId}/credential/${credentialId}`,\n    {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        name,\n        access_type: accessType !== undefined ? accessType : \"public\",\n        groups: groups || null,\n        auto_sync_options: autoSyncOptions || null,\n        processing_mode: processingMode || \"REGULAR\",\n      }),\n    }\n  );\n}\n\nexport function updateCredential(credentialId: number, newDetails: any) {\n  const name = newDetails.name;\n  const details = Object.fromEntries(\n    Object.entries(newDetails).filter(\n      ([key, value]) => key !== CREDENTIAL_NAME && value !== \"\"\n    )\n  );\n  return fetch(`/api/manage/admin/credential/${credentialId}`, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      name: name,\n      credential_json: details,\n    }),\n  });\n}\n\nexport function updateCredentialWithPrivateKey(\n  credentialId: number,\n  newDetails: any,\n  privateKey: TypedFile\n) {\n  const name = newDetails.name;\n  const details = Object.fromEntries(\n    Object.entries(newDetails).filter(\n      ([key, value]) => key !== CREDENTIAL_NAME && value !== \"\"\n    )\n  );\n  const formData = new FormData();\n  formData.append(CREDENTIAL_NAME, name);\n  formData.append(CREDENTIAL_JSON, JSON.stringify(details));\n  formData.append(CREDENTIAL_UPLOADED_FILE, privateKey.file);\n  formData.append(CREDENTIAL_FIELD_KEY, privateKey.fieldKey);\n  formData.append(\n    CREDENTIAL_TYPE_DEFINITION_KEY,\n    privateKey.typeDefinition.category\n  );\n  return fetch(`/api/manage/admin/credential/private-key/${credentialId}`, {\n    method: \"PUT\",\n    body: formData,\n  });\n}\n\nexport function swapCredential(\n  newCredentialId: number,\n  connectorId: number,\n  accessType: AccessType\n) {\n  return fetch(`/api/manage/admin/credential/swap`, {\n    method: \"PUT\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      new_credential_id: newCredentialId,\n      connector_id: connectorId,\n      access_type: accessType,\n    }),\n  });\n}\n"
  },
  {
    "path": "web/src/lib/dateUtils.ts",
    "content": "\"use client\";\n\nimport { useEffect } from \"react\";\nimport { useState } from \"react\";\n\nexport const useNightTime = () => {\n  const [isNight, setIsNight] = useState(false);\n\n  useEffect(() => {\n    const checkNightTime = () => {\n      const currentHour = new Date().getHours();\n      setIsNight(currentHour >= 18 || currentHour < 6);\n    };\n\n    checkNightTime();\n    const interval = setInterval(checkNightTime, 60000); // Check every minute\n\n    return () => clearInterval(interval);\n  }, []);\n\n  return { isNight };\n};\n\nexport function getXDaysAgo(daysAgo: number) {\n  const today = new Date();\n  const daysAgoDate = new Date(today);\n  daysAgoDate.setDate(today.getDate() - daysAgo);\n  return daysAgoDate;\n}\n\nexport function getXYearsAgo(yearsAgo: number) {\n  const today = new Date();\n  const yearsAgoDate = new Date(today);\n  yearsAgoDate.setFullYear(yearsAgoDate.getFullYear() - yearsAgo);\n  return yearsAgoDate;\n}\n\nexport function normalizeDate(date: Date): Date {\n  const normalizedDate = new Date(date);\n  normalizedDate.setHours(0, 0, 0, 0);\n  return normalizedDate;\n}\n\nexport function isAfterDate(date: Date, maxDate: Date): boolean {\n  return normalizeDate(date).getTime() > normalizeDate(maxDate).getTime();\n}\n\nexport function isDateInFuture(date: Date): boolean {\n  return isAfterDate(date, new Date());\n}\n\nexport const timestampToDateString = (timestamp: string) => {\n  const date = new Date(timestamp);\n  const year = date.getFullYear();\n  const month = date.getMonth() + 1; // getMonth() is zero-based\n  const day = date.getDate();\n\n  const formattedDate = `${year}-${month.toString().padStart(2, \"0\")}-${day\n    .toString()\n    .padStart(2, \"0\")}`;\n  return formattedDate;\n};\n\n// Options for formatting the date\nconst dateOptions: Intl.DateTimeFormatOptions = {\n  year: \"numeric\",\n  month: \"2-digit\",\n  day: \"2-digit\",\n};\n\n// Options for formatting the time\nconst timeOptions: Intl.DateTimeFormatOptions = {\n  hour: \"numeric\",\n  minute: \"2-digit\",\n  hour12: true, // Use 12-hour format with AM/PM\n};\n\nexport const timestampToReadableDate = (timestamp: string) => {\n  const date = new Date(timestamp);\n  return (\n    date.toLocaleDateString(undefined, dateOptions) +\n    \", \" +\n    date.toLocaleTimeString(undefined, timeOptions)\n  );\n};\n\nexport const buildDateString = (date: Date | null) => {\n  return date\n    ? `${Math.round(\n        (new Date().getTime() - date.getTime()) / (1000 * 60 * 60 * 24)\n      )} days ago`\n    : \"Select a time range\";\n};\n\nexport const getFormattedDateRangeString = (\n  from: Date | null,\n  to: Date | null\n) => {\n  if (!from || !to) return null;\n\n  const options: Intl.DateTimeFormatOptions = {\n    month: \"short\",\n    day: \"numeric\",\n    year: \"numeric\",\n  };\n  const fromString = from.toLocaleDateString(\"en-US\", options);\n  const toString = to.toLocaleDateString(\"en-US\", options);\n\n  return `${fromString} - ${toString}`;\n};\n\nexport const getDateRangeString = (from: Date | null, to: Date | null) => {\n  if (!from || !to) return null;\n\n  const now = new Date();\n  const fromDiffMs = now.getTime() - from.getTime();\n  const toDiffMs = now.getTime() - to.getTime();\n\n  const fromDiffDays = Math.floor(fromDiffMs / (1000 * 60 * 60 * 24));\n  const toDiffDays = Math.floor(toDiffMs / (1000 * 60 * 60 * 24));\n\n  const fromString = getTimeAgoString(from);\n  const toString = getTimeAgoString(to);\n\n  if (fromString === toString) return fromString;\n\n  if (toDiffDays === 0) {\n    return `${fromString} - Today`;\n  }\n\n  return `${fromString} - ${toString}`;\n};\n\nexport const getTimeAgoString = (date: Date | null) => {\n  if (!date) return null;\n\n  const now = new Date();\n  const diffMs = now.getTime() - date.getTime();\n  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));\n  const diffWeeks = Math.floor(diffDays / 7);\n  const diffMonths = Math.floor(diffDays / 30);\n\n  if (now.toDateString() === date.toDateString()) return \"Today\";\n  if (diffDays === 1) return \"Yesterday\";\n  if (diffDays < 7) return `${diffDays}d ago`;\n  if (diffDays < 30) return `${diffWeeks}w ago`;\n  return `${diffMonths}mo ago`;\n};\n\n/**\n * Format a date to short format like \"Jan 27, 2026\".\n * Always shows date, never time.\n */\nexport const formatDateShort = (dateStr: string | null | undefined): string => {\n  if (!dateStr) return \"—\";\n  return new Date(dateStr).toLocaleDateString(\"en-US\", {\n    month: \"short\",\n    day: \"numeric\",\n    year: \"numeric\",\n  });\n};\n\n/**\n * Format an ISO timestamp as \"YYYY/MM/DD HH:MM:SS\" (24-hour, local time).\n * Intended for log displays where full precision is needed.\n */\nexport function formatDateTimeLog(iso: string): string {\n  const d = new Date(iso);\n  const pad = (n: number) => String(n).padStart(2, \"0\");\n  return `${d.getFullYear()}/${pad(d.getMonth() + 1)}/${pad(d.getDate())} ${pad(\n    d.getHours()\n  )}:${pad(d.getMinutes())}:${pad(d.getSeconds())}`;\n}\n\n/**\n * Format an ISO timestamp as \"HH:MM:SS\" (24-hour, local time).\n * Intended for compact time-only displays.\n */\nexport function formatTimeOnly(iso: string): string {\n  return new Date(iso).toLocaleTimeString(undefined, {\n    hour: \"2-digit\",\n    minute: \"2-digit\",\n    second: \"2-digit\",\n    hour12: false,\n  });\n}\n\nexport function formatMmDdYyyy(d: string): string {\n  const date = new Date(d);\n  return `${date.getMonth() + 1}/${date.getDate()}/${date.getFullYear()}`;\n}\n\n/**\n * Format a duration in seconds as MM:SS (e.g. 65 → \"01:05\").\n */\nexport function formatElapsedTime(totalSeconds: number): string {\n  const minutes = Math.floor(totalSeconds / 60);\n  const seconds = totalSeconds % 60;\n  return `${minutes.toString().padStart(2, \"0\")}:${seconds\n    .toString()\n    .padStart(2, \"0\")}`;\n}\n\nexport const getFormattedDateTime = (date: Date | null) => {\n  if (!date) return null;\n\n  const now = new Date();\n  const isToday = now.toDateString() === date.toDateString();\n\n  if (isToday) {\n    // If it's today, return the time in format like \"3:45 PM\"\n    return date.toLocaleTimeString(\"en-US\", {\n      hour: \"numeric\",\n      minute: \"2-digit\",\n      hour12: true,\n    });\n  } else {\n    // Otherwise return the date in format like \"Jan 15, 2023\"\n    return date.toLocaleDateString(\"en-US\", {\n      month: \"short\",\n      day: \"numeric\",\n      year: \"numeric\",\n    });\n  }\n};\n"
  },
  {
    "path": "web/src/lib/documentDeletion.ts",
    "content": "import { toast } from \"@/hooks/useToast\";\nimport { DeletionAttemptSnapshot } from \"./types\";\n\nexport async function scheduleDeletionJobForConnector(\n  connectorId: number,\n  credentialId: number\n) {\n  // Will schedule a background job which will:\n  // 1. Remove all documents indexed by the connector / credential pair\n  // 2. Remove the connector (if this is the only pair using the connector)\n  const response = await fetch(`/api/manage/admin/deletion-attempt`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      connector_id: connectorId,\n      credential_id: credentialId,\n    }),\n  });\n  if (response.ok) {\n    return null;\n  }\n  return (await response.json()).detail;\n}\n\nexport async function deleteCCPair(\n  connectorId: number,\n  credentialId: number,\n  onCompletion?: () => void\n) {\n  const deletionScheduleError = await scheduleDeletionJobForConnector(\n    connectorId,\n    credentialId\n  );\n  if (deletionScheduleError) {\n    throw new Error(deletionScheduleError);\n  }\n  toast.success(\"Scheduled deletion of connector!\");\n  onCompletion?.();\n}\n\nexport function isCurrentlyDeleting(\n  deletionAttempt: DeletionAttemptSnapshot | null\n) {\n  if (!deletionAttempt) {\n    return false;\n  }\n\n  return (\n    deletionAttempt.status === \"PENDING\" || deletionAttempt.status === \"STARTED\"\n  );\n}\n"
  },
  {
    "path": "web/src/lib/documentUtils.ts",
    "content": "import { OnyxDocument } from \"./search/interfaces\";\n\nexport function removeDuplicateDocs(\n  documents: OnyxDocument[],\n  agentic?: boolean,\n  relevance?: any\n) {\n  const seen = new Set<string>();\n  const output: OnyxDocument[] = [];\n  documents.forEach((document) => {\n    if (\n      document.document_id &&\n      !seen.has(document.document_id) &&\n      (!agentic || (agentic && relevance && relevance[document.document_id]))\n    ) {\n      output.push(document);\n      seen.add(document.document_id);\n    }\n  });\n  return output;\n}\n"
  },
  {
    "path": "web/src/lib/download.ts",
    "content": "/**\n * Trigger a browser file download.\n *\n * Supports two modes:\n *  1. **From content** — pass `content` (string) and optional `mimeType`.\n *     A Blob is created, downloaded, and the object URL is revoked.\n *  2. **From URL** — pass `url` (string). The browser navigates to the\n *     URL with the `download` attribute set.\n */\nexport function downloadFile(\n  filename: string,\n  opts: { content: string; mimeType?: string } | { url: string }\n): void {\n  const a = document.createElement(\"a\");\n\n  if (\"content\" in opts) {\n    const blob = new Blob([opts.content], {\n      type: opts.mimeType ?? \"text/plain\",\n    });\n    const url = URL.createObjectURL(blob);\n    a.href = url;\n    a.download = filename;\n    document.body.appendChild(a);\n    a.click();\n    document.body.removeChild(a);\n    setTimeout(() => URL.revokeObjectURL(url), 0);\n  } else {\n    a.href = opts.url;\n    a.download = filename;\n    document.body.appendChild(a);\n    a.click();\n    document.body.removeChild(a);\n  }\n}\n"
  },
  {
    "path": "web/src/lib/drag/constants.ts",
    "content": "export const CHAT_SESSION_ID_KEY = \"chatSessionId\";\n"
  },
  {
    "path": "web/src/lib/error.ts",
    "content": "/**\n * Extract a human-readable error message from an SWR error object.\n * SWR errors from `errorHandlingFetcher` attach `info.message` or `info.detail`.\n */\nexport function getErrorMsg(\n  error: { info?: { message?: string; detail?: string } } | null | undefined,\n  fallback = \"An unknown error occurred\"\n): string {\n  return error?.info?.message || error?.info?.detail || fallback;\n}\n"
  },
  {
    "path": "web/src/lib/extension/constants.ts",
    "content": "export const darkExtensionImages = [\n  \"https://images.unsplash.com/photo-1692520883599-d543cfe6d43d?q=80&w=2666&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\",\n  \"https://images.unsplash.com/photo-1520330461350-508fab483d6a?q=80&w=2723&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\",\n];\n\nexport const lightExtensionImages = [\n  \"https://images.unsplash.com/photo-1473830439578-14e9a9e61d55?q=80&w=2670&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\",\n  \"https://images.unsplash.com/photo-1500964757637-c85e8a162699?q=80&w=2703&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\",\n  \"https://images.unsplash.com/photo-1475924156734-496f6cac6ec1?q=80&w=2670&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\",\n];\n\n// Chrome message types\nexport const CHROME_MESSAGE = {\n  PREFERENCES_UPDATED: \"PREFERENCES_UPDATED\",\n  ONYX_APP_LOADED: \"ONYX_APP_LOADED\",\n  SET_DEFAULT_NEW_TAB: \"SET_DEFAULT_NEW_TAB\",\n  LOAD_NEW_CHAT_PAGE: \"LOAD_NEW_CHAT_PAGE\",\n  LOAD_NEW_PAGE: \"LOAD_NEW_PAGE\",\n  AUTH_REQUIRED: \"AUTH_REQUIRED\",\n  TAB_READING_ENABLED: \"TAB_READING_ENABLED\",\n  TAB_READING_DISABLED: \"TAB_READING_DISABLED\",\n  TAB_URL_UPDATED: \"TAB_URL_UPDATED\",\n};\n\nexport const SUBMIT_MESSAGE_TYPES = {\n  PAGE_CHANGE: \"PAGE_CHANGE\",\n};\n\nexport const LocalStorageKeys = {\n  THEME: \"onyxTheme\",\n  LIGHT_BG_URL: \"lightBgUrl\",\n  DARK_BG_URL: \"darkBgUrl\",\n  SHORTCUTS: \"shortCuts\",\n  SHOW_SHORTCUTS: \"showShortcuts\",\n  USE_ONYX_AS_NEW_TAB: \"useOnyxAsDefaultNewTab\",\n};\n\nexport const SEARCH_PARAMS = {\n  DEFAULT_SIDEBAR_OFF: \"defaultSidebarOff\",\n};\n\nexport const NO_AUTH_USER_ID = \"__no_auth_user__\";\n"
  },
  {
    "path": "web/src/lib/extension/utils.ts",
    "content": "import { useEffect } from \"react\";\nimport { CHROME_MESSAGE } from \"./constants\";\n\nexport type ExtensionContext = \"new_tab\" | \"side_panel\" | null;\n\n// Returns the origin of the Chrome extension panel (our parent frame).\n// window.location.ancestorOrigins is Chrome-specific and only populated\n// when the page is loaded inside an iframe (e.g. the Chrome extension panel).\n// Falls back to \"*\" in regular browser contexts (no parent frame).\nexport function getPanelOrigin(): string {\n  return window.location.ancestorOrigins?.[0] ?? \"*\";\n}\n\nexport function getExtensionContext(): {\n  isExtension: boolean;\n  context: ExtensionContext;\n} {\n  if (typeof window === \"undefined\")\n    return { isExtension: false, context: null };\n\n  const pathname = window.location.pathname;\n  if (pathname.includes(\"/nrf/side-panel\")) {\n    return { isExtension: true, context: \"side_panel\" };\n  }\n  if (pathname.includes(\"/nrf\")) {\n    return { isExtension: true, context: \"new_tab\" };\n  }\n  return { isExtension: false, context: null };\n}\nexport function sendSetDefaultNewTabMessage(value: boolean) {\n  if (typeof window !== \"undefined\" && window.parent !== window) {\n    window.parent.postMessage(\n      { type: CHROME_MESSAGE.SET_DEFAULT_NEW_TAB, value },\n      getPanelOrigin()\n    );\n  }\n}\n\nexport const sendAuthRequiredMessage = () => {\n  if (typeof window !== \"undefined\" && window.parent !== window) {\n    window.parent.postMessage(\n      { type: CHROME_MESSAGE.AUTH_REQUIRED },\n      getPanelOrigin()\n    );\n  }\n};\n\nexport const useSendAuthRequiredMessage = () => {\n  useEffect(() => {\n    sendAuthRequiredMessage();\n  }, []);\n};\n\nexport const sendMessageToParent = () => {\n  if (typeof window !== \"undefined\" && window.parent !== window) {\n    window.parent.postMessage(\n      { type: CHROME_MESSAGE.ONYX_APP_LOADED },\n      getPanelOrigin()\n    );\n  }\n};\nexport const useSendMessageToParent = () => {\n  useEffect(() => {\n    sendMessageToParent();\n  }, []);\n};\n"
  },
  {
    "path": "web/src/lib/fetchUtils.ts",
    "content": "export const getErrorMsg = async (response: Response) => {\n  if (response.ok) {\n    return null;\n  }\n  const responseJson = await response.json();\n  return responseJson.message || responseJson.detail || \"Unknown error\";\n};\n"
  },
  {
    "path": "web/src/lib/fetcher.ts",
    "content": "export class FetchError extends Error {\n  status: number;\n  info: any;\n  constructor(message: string, status: number, info: any) {\n    super(message);\n    this.status = status;\n    this.info = info;\n  }\n}\n\nexport class RedirectError extends FetchError {\n  constructor(message: string, status: number, info: any) {\n    super(message, status, info);\n  }\n}\n\nconst DEFAULT_AUTH_ERROR_MSG =\n  \"An error occurred while fetching the data, related to the user's authentication status.\";\n\nconst DEFAULT_ERROR_MSG = \"An error occurred while fetching the data.\";\n\n/**\n * SWR `onErrorRetry` callback that suppresses automatic retries for\n * authentication errors (401/403). Pass this to any SWR hook whose endpoint\n * requires auth so that unauthenticated pages don't spam the backend.\n */\nexport const skipRetryOnAuthError: NonNullable<\n  import(\"swr\").SWRConfiguration[\"onErrorRetry\"]\n> = (error, _key, _config, revalidate, { retryCount }) => {\n  if (\n    error instanceof FetchError &&\n    (error.status === 401 || error.status === 403)\n  )\n    return;\n  // For non-auth errors, retry with exponential backoff\n  if (\n    _config.errorRetryCount !== undefined &&\n    retryCount >= _config.errorRetryCount\n  )\n    return;\n  const delay = Math.min(2000 * 2 ** retryCount, 30000);\n  setTimeout(() => revalidate({ retryCount }), delay);\n};\n\nexport const errorHandlingFetcher = async <T>(url: string): Promise<T> => {\n  const res = await fetch(url);\n\n  if (res.status === 403) {\n    const redirect = new RedirectError(\n      DEFAULT_AUTH_ERROR_MSG,\n      res.status,\n      await res.json()\n    );\n    throw redirect;\n  }\n\n  if (!res.ok) {\n    const error = new FetchError(\n      DEFAULT_ERROR_MSG,\n      res.status,\n      await res.json()\n    );\n    throw error;\n  }\n\n  return res.json();\n};\n"
  },
  {
    "path": "web/src/lib/fileConnector.ts",
    "content": "export interface ConnectorFileInfo {\n  file_id: string;\n  file_name: string;\n  file_size?: number;\n  upload_date?: string;\n}\n\nexport interface ConnectorFilesResponse {\n  files: ConnectorFileInfo[];\n}\n\nexport interface FileUploadResponse {\n  file_paths: string[];\n  file_names: string[];\n  zip_metadata_file_id: string | null;\n}\n\nexport async function updateConnectorFiles(\n  connectorId: number,\n  fileIdsToRemove: string[],\n  filesToAdd: File[]\n): Promise<void> {\n  const formData = new FormData();\n\n  // Add files to remove as JSON\n  formData.append(\"file_ids_to_remove\", JSON.stringify(fileIdsToRemove));\n\n  // Add new files\n  filesToAdd.forEach((file) => {\n    formData.append(\"files\", file);\n  });\n\n  const response = await fetch(\n    `/api/manage/admin/connector/${connectorId}/files/update`,\n    {\n      method: \"POST\",\n      body: formData,\n    }\n  );\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(\n      `Failed to update connector files (${response.status}): ${\n        error.detail || \"Unknown error\"\n      }`\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/lib/filters.ts",
    "content": "import { Persona } from \"@/app/admin/agents/interfaces\";\nimport { DocumentSetSummary, ValidSources } from \"./types\";\nimport { getSourcesForPersona } from \"./sources\";\n\nexport function computeAvailableFilters({\n  selectedPersona,\n  availableSources,\n  availableDocumentSets,\n}: {\n  selectedPersona: Persona | undefined | null;\n  availableSources: ValidSources[];\n  availableDocumentSets: DocumentSetSummary[];\n}): [ValidSources[], DocumentSetSummary[]] {\n  const finalAvailableSources =\n    selectedPersona && selectedPersona.document_sets.length\n      ? getSourcesForPersona(selectedPersona)\n      : availableSources;\n\n  // only display document sets that are available to the persona\n  // in filters\n  const personaDocumentSetIds =\n    selectedPersona && selectedPersona.document_sets.length\n      ? selectedPersona.document_sets.map((documentSet) => documentSet.id)\n      : null;\n  const finalAvailableDocumentSets = personaDocumentSetIds\n    ? availableDocumentSets.filter((documentSet) =>\n        personaDocumentSetIds.includes(documentSet.id)\n      )\n    : availableDocumentSets;\n\n  return [finalAvailableSources, finalAvailableDocumentSets];\n}\n"
  },
  {
    "path": "web/src/lib/generated/README.md",
    "content": "- Generated Files\n* Generated files live here. This directory should be git ignored.\n"
  },
  {
    "path": "web/src/lib/gmail.ts",
    "content": "import { Credential } from \"./connectors/credentials\";\n\nexport const setupGmailOAuth = async ({\n  isAdmin,\n}: {\n  isAdmin: boolean;\n}): Promise<[string | null, string]> => {\n  const credentialCreationResponse = await fetch(\"/api/manage/credential\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      admin_public: isAdmin,\n      credential_json: {},\n      source: \"gmail\",\n    }),\n  });\n  if (!credentialCreationResponse.ok) {\n    return [\n      null,\n      `Failed to create credential - ${credentialCreationResponse.status}`,\n    ];\n  }\n  const credential =\n    (await credentialCreationResponse.json()) as Credential<{}>;\n\n  const authorizationUrlResponse = await fetch(\n    `/api/manage/connector/gmail/authorize/${credential.id}`\n  );\n  if (!authorizationUrlResponse.ok) {\n    return [\n      null,\n      `Failed to create credential - ${authorizationUrlResponse.status}`,\n    ];\n  }\n  const authorizationUrlJson = (await authorizationUrlResponse.json()) as {\n    auth_url: string;\n  };\n\n  return [authorizationUrlJson.auth_url, \"\"];\n};\n"
  },
  {
    "path": "web/src/lib/googleConnector.ts",
    "content": "import useSWR, { mutate } from \"swr\";\nimport { FetchError, errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { Credential } from \"@/lib/connectors/credentials\";\nimport { ConnectorSnapshot } from \"@/lib/connectors/connectors\";\nimport { ValidSources } from \"@/lib/types\";\nimport { buildSimilarCredentialInfoURL } from \"@/app/admin/connector/[ccPairId]/lib\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\n// Constants for service names to avoid typos\nexport const GOOGLE_SERVICES = {\n  GMAIL: \"gmail\",\n  GOOGLE_DRIVE: \"google-drive\",\n} as const;\n\nexport const useGoogleAppCredential = (service: \"gmail\" | \"google_drive\") => {\n  const endpoint = `/api/manage/admin/connector/${\n    service === \"gmail\" ? GOOGLE_SERVICES.GMAIL : GOOGLE_SERVICES.GOOGLE_DRIVE\n  }/app-credential`;\n\n  return useSWR<{ client_id: string }, FetchError>(\n    endpoint,\n    errorHandlingFetcher\n  );\n};\n\nexport const useGoogleServiceAccountKey = (\n  service: \"gmail\" | \"google_drive\"\n) => {\n  const endpoint = `/api/manage/admin/connector/${\n    service === \"gmail\" ? GOOGLE_SERVICES.GMAIL : GOOGLE_SERVICES.GOOGLE_DRIVE\n  }/service-account-key`;\n\n  return useSWR<{ service_account_email: string }, FetchError>(\n    endpoint,\n    errorHandlingFetcher\n  );\n};\n\nexport const useGoogleCredentials = (\n  source: ValidSources.Gmail | ValidSources.GoogleDrive\n) => {\n  return useSWR<Credential<any>[]>(\n    buildSimilarCredentialInfoURL(source),\n    errorHandlingFetcher,\n    { refreshInterval: 5000 }\n  );\n};\n\nexport const useConnectorsByCredentialId = (credential_id: number | null) => {\n  let url: string | null = null;\n  if (credential_id !== null) {\n    url = `/api/manage/admin/connector?credential=${credential_id}`;\n  }\n  const swrResponse = useSWR<ConnectorSnapshot[]>(url, errorHandlingFetcher);\n\n  return {\n    ...swrResponse,\n    refreshConnectorsByCredentialId: () => mutate(url),\n  };\n};\n\nexport const checkCredentialsFetched = (\n  appCredentialData: any,\n  appCredentialError: FetchError | undefined,\n  serviceAccountKeyData: any,\n  serviceAccountKeyError: FetchError | undefined\n) => {\n  const appCredentialSuccessfullyFetched =\n    appCredentialData ||\n    (appCredentialError && appCredentialError.status === 404);\n\n  const serviceAccountKeySuccessfullyFetched =\n    serviceAccountKeyData ||\n    (serviceAccountKeyError && serviceAccountKeyError.status === 404);\n\n  return {\n    appCredentialSuccessfullyFetched,\n    serviceAccountKeySuccessfullyFetched,\n  };\n};\n\nexport const filterUploadedCredentials = <\n  T extends { authentication_method?: string },\n>(\n  credentials: Credential<T>[] | undefined\n): { credential_id: number | null; uploadedCredentials: Credential<T>[] } => {\n  let credential_id = null;\n  let uploadedCredentials: Credential<T>[] = [];\n\n  if (credentials) {\n    uploadedCredentials = credentials.filter(\n      (credential) =>\n        credential.credential_json.authentication_method !== \"oauth_interactive\"\n    );\n\n    if (uploadedCredentials.length > 0 && uploadedCredentials[0]) {\n      credential_id = uploadedCredentials[0].id;\n    }\n  }\n\n  return { credential_id, uploadedCredentials };\n};\n\nexport const checkConnectorsExist = (\n  connectors: ConnectorSnapshot[] | undefined\n): boolean => {\n  return !!connectors && connectors.length > 0;\n};\n\nexport const refreshAllGoogleData = (\n  source: ValidSources.Gmail | ValidSources.GoogleDrive\n) => {\n  mutate(buildSimilarCredentialInfoURL(source));\n\n  const service =\n    source === ValidSources.Gmail\n      ? GOOGLE_SERVICES.GMAIL\n      : GOOGLE_SERVICES.GOOGLE_DRIVE;\n  mutate(SWR_KEYS.googleConnectorAppCredential(service));\n  mutate(SWR_KEYS.googleConnectorServiceAccountKey(service));\n};\n"
  },
  {
    "path": "web/src/lib/googleDrive.ts",
    "content": "import { Credential } from \"./connectors/credentials\";\n\nexport const setupGoogleDriveOAuth = async ({\n  isAdmin,\n  name,\n}: {\n  isAdmin: boolean;\n  name: string;\n}): Promise<[string | null, string]> => {\n  const credentialCreationResponse = await fetch(\"/api/manage/credential\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      admin_public: isAdmin,\n      credential_json: {},\n      source: \"google_drive\",\n      name: name,\n    }),\n  });\n\n  if (!credentialCreationResponse.ok) {\n    return [\n      null,\n      `Failed to create credential - ${credentialCreationResponse.status}`,\n    ];\n  }\n  const credential =\n    (await credentialCreationResponse.json()) as Credential<{}>;\n\n  const authorizationUrlResponse = await fetch(\n    `/api/manage/connector/google-drive/authorize/${credential.id}`\n  );\n  if (!authorizationUrlResponse.ok) {\n    return [\n      null,\n      `Failed to create credential - ${authorizationUrlResponse.status}`,\n    ];\n  }\n\n  const authorizationUrlJson = (await authorizationUrlResponse.json()) as {\n    auth_url: string;\n  };\n\n  return [authorizationUrlJson.auth_url, \"\"];\n};\n"
  },
  {
    "path": "web/src/lib/headers/fetchHeaderDataSS.ts",
    "content": "import { CombinedSettings } from \"@/interfaces/settings\";\nimport { ChatSession, toChatSession } from \"@/app/app/interfaces\";\nimport { fetchSettingsSS } from \"@/components/settings/lib\";\nimport { fetchBackendChatSessionSS } from \"@/lib/chat/fetchBackendChatSessionSS\";\n\nexport interface HeaderData {\n  settings: CombinedSettings | null;\n  chatSession: ChatSession | null;\n}\n\nexport async function fetchHeaderDataSS(\n  chatSessionId?: string\n): Promise<HeaderData> {\n  const [settings, backendChatSession] = await Promise.all([\n    fetchSettingsSS(),\n    chatSessionId\n      ? fetchBackendChatSessionSS(chatSessionId)\n      : Promise.resolve(null),\n  ]);\n  const chatSession = backendChatSession\n    ? toChatSession(backendChatSession)\n    : null;\n\n  return {\n    settings,\n    chatSession,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hierarchy/interfaces.ts",
    "content": "import { ValidSources } from \"@/lib/types\";\n\n// Sort options for document pagination\nexport type DocumentSortField = \"name\" | \"last_updated\";\nexport type DocumentSortDirection = \"asc\" | \"desc\";\nexport type FolderPosition = \"on_top\" | \"mixed\";\n\n// Hierarchy Node types matching backend models\nexport interface HierarchyNodeSummary {\n  id: number;\n  title: string;\n  link: string | null;\n  parent_id: number | null;\n}\n\nexport interface HierarchyNodesRequest {\n  source: ValidSources;\n}\n\nexport interface HierarchyNodesResponse {\n  nodes: HierarchyNodeSummary[];\n}\n\n// Document types for hierarchy\nexport interface DocumentPageCursor {\n  // Fields for last_updated sorting\n  last_modified?: string | null;\n  last_synced?: string | null;\n  // Field for name sorting\n  name?: string | null;\n  // Document ID for tie-breaking (always required)\n  document_id: string;\n}\n\nexport interface HierarchyNodeDocumentsRequest {\n  parent_hierarchy_node_id: number;\n  cursor?: DocumentPageCursor | null;\n  sort_field?: DocumentSortField;\n  sort_direction?: DocumentSortDirection;\n  folder_position?: FolderPosition;\n}\n\nexport interface DocumentSummary {\n  id: string;\n  title: string;\n  link: string | null;\n  parent_id: number | null;\n  last_modified: string | null;\n  last_synced: string | null;\n}\n\nexport interface HierarchyNodeDocumentsResponse {\n  documents: DocumentSummary[];\n  next_cursor: DocumentPageCursor | null;\n  page_size: number;\n  sort_field: DocumentSortField;\n  sort_direction: DocumentSortDirection;\n  folder_position: FolderPosition;\n}\n\n// Connected source type for display\nexport interface ConnectedSource {\n  source: ValidSources;\n  connectorCount: number;\n}\n\n// Union type for folders and documents in hierarchy tables\nexport type HierarchyItem =\n  | { type: \"folder\"; data: HierarchyNodeSummary }\n  | { type: \"document\"; data: DocumentSummary };\n\n// Props for hierarchy breadcrumb navigation\nexport interface HierarchyBreadcrumbProps {\n  source: ValidSources;\n  path: HierarchyNodeSummary[];\n  onNavigateToRoot: () => void;\n  onNavigateToNode: (node: HierarchyNodeSummary, index: number) => void;\n}\n"
  },
  {
    "path": "web/src/lib/hierarchy/svc.ts",
    "content": "import { ValidSources } from \"@/lib/types\";\nimport {\n  HierarchyNodesResponse,\n  HierarchyNodeDocumentsRequest,\n  HierarchyNodeDocumentsResponse,\n} from \"./interfaces\";\n\nconst HIERARCHY_NODES_PREFIX = \"/api/hierarchy-nodes\";\n\nasync function extractErrorDetail(\n  response: Response,\n  fallback: string\n): Promise<string> {\n  try {\n    const body = await response.json();\n    if (body.detail) return body.detail;\n  } catch {\n    // JSON parsing failed — fall through to fallback\n  }\n  return fallback;\n}\n\nexport async function fetchHierarchyNodes(\n  source: ValidSources\n): Promise<HierarchyNodesResponse> {\n  const response = await fetch(\n    `${HIERARCHY_NODES_PREFIX}?source=${encodeURIComponent(source)}`\n  );\n\n  if (!response.ok) {\n    const detail = await extractErrorDetail(\n      response,\n      `Failed to fetch hierarchy nodes: ${response.statusText}`\n    );\n    throw new Error(detail);\n  }\n\n  return response.json();\n}\n\nexport async function fetchHierarchyNodeDocuments(\n  request: HierarchyNodeDocumentsRequest\n): Promise<HierarchyNodeDocumentsResponse> {\n  const response = await fetch(`${HIERARCHY_NODES_PREFIX}/documents`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(request),\n  });\n\n  if (!response.ok) {\n    const detail = await extractErrorDetail(\n      response,\n      `Failed to fetch hierarchy node documents: ${response.statusText}`\n    );\n    throw new Error(detail);\n  }\n\n  return response.json();\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useCaptcha.ts",
    "content": "/**\n * Hook for Google reCAPTCHA v3 integration.\n *\n * Usage:\n * 1. Add NEXT_PUBLIC_RECAPTCHA_SITE_KEY to your environment\n * 2. Include the reCAPTCHA script in your page/layout\n * 3. Use the hook to get a captcha token before form submission\n *\n * Example:\n * ```tsx\n * const { getCaptchaToken, isCaptchaEnabled } = useCaptcha();\n *\n * const handleSubmit = async () => {\n *   const token = await getCaptchaToken('signup');\n *   await basicSignup(email, password, referralSource, token);\n * };\n * ```\n */\n\nimport { useCallback, useEffect, useState } from \"react\";\n\n// Declare the global grecaptcha object\ndeclare global {\n  interface Window {\n    grecaptcha?: {\n      ready: (callback: () => void) => void;\n      execute: (\n        siteKey: string,\n        options: { action: string }\n      ) => Promise<string>;\n    };\n  }\n}\n\nconst RECAPTCHA_SITE_KEY = process.env.NEXT_PUBLIC_RECAPTCHA_SITE_KEY || \"\";\n\nexport function useCaptcha() {\n  const [isLoaded, setIsLoaded] = useState(false);\n\n  const isCaptchaEnabled = Boolean(RECAPTCHA_SITE_KEY);\n\n  useEffect(() => {\n    if (!isCaptchaEnabled) {\n      return;\n    }\n\n    const scriptSrc = `https://www.google.com/recaptcha/api.js?render=${RECAPTCHA_SITE_KEY}`;\n\n    // Check if the script is already loaded\n    if (window.grecaptcha) {\n      window.grecaptcha.ready(() => {\n        setIsLoaded(true);\n      });\n      return;\n    }\n\n    // Check if the script is already in the DOM (loading but not yet executed)\n    const existingScript = document.querySelector(`script[src=\"${scriptSrc}\"]`);\n    if (existingScript) {\n      // Script exists but hasn't loaded yet, wait for it\n      existingScript.addEventListener(\"load\", () => {\n        if (window.grecaptcha) {\n          window.grecaptcha.ready(() => {\n            setIsLoaded(true);\n          });\n        }\n      });\n      return;\n    }\n\n    // Load the reCAPTCHA script\n    const script = document.createElement(\"script\");\n    script.src = scriptSrc;\n    script.async = true;\n    script.defer = true;\n\n    script.onload = () => {\n      if (window.grecaptcha) {\n        window.grecaptcha.ready(() => {\n          setIsLoaded(true);\n        });\n      }\n    };\n\n    document.head.appendChild(script);\n\n    return () => {\n      // Cleanup is tricky with reCAPTCHA, so we leave the script in place\n    };\n  }, [isCaptchaEnabled]);\n\n  const getCaptchaToken = useCallback(\n    async (action: string = \"submit\"): Promise<string | undefined> => {\n      if (!isCaptchaEnabled) {\n        return undefined;\n      }\n\n      if (!isLoaded || !window.grecaptcha) {\n        console.warn(\"reCAPTCHA not loaded yet\");\n        return undefined;\n      }\n\n      try {\n        const token = await window.grecaptcha.execute(RECAPTCHA_SITE_KEY, {\n          action,\n        });\n        return token;\n      } catch (error) {\n        console.error(\"Failed to execute reCAPTCHA:\", error);\n        return undefined;\n      }\n    },\n    [isCaptchaEnabled, isLoaded]\n  );\n\n  return {\n    getCaptchaToken,\n    isCaptchaEnabled,\n    isLoaded,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useCustomAnalyticsEnabled.ts",
    "content": "import { CUSTOM_ANALYTICS_ENABLED } from \"@/lib/constants\";\n\nexport type CustomAnalyticsStatus = {\n  customAnalyticsEnabled: boolean;\n  isLoading: boolean;\n};\n\n/**\n * Hook to check if custom analytics is enabled.\n * Returns the status and loading state for consistency with other hooks.\n * Since this is based on an environment variable, there's no actual loading state.\n */\nexport function useCustomAnalyticsEnabled(): CustomAnalyticsStatus {\n  return {\n    customAnalyticsEnabled: CUSTOM_ANALYTICS_ENABLED,\n    isLoading: false,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useDocumentSets.ts",
    "content": "import useSWR from \"swr\";\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useDocumentSets() {\n  const { data, error, mutate } = useSWR<DocumentSetSummary[]>(\n    SWR_KEYS.documentSets,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 60000,\n    }\n  );\n\n  return {\n    documentSets: data ?? [],\n    isLoading: !error && !data,\n    error,\n    refresh: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useForcedTools.ts",
    "content": "import { create } from \"zustand\";\n\ninterface ForcedToolsState {\n  forcedToolIds: number[];\n  setForcedToolIds: (ids: number[]) => void;\n  toggleForcedTool: (id: number) => void;\n  clearForcedTools: () => void;\n}\n\n/**\n * Zustand store for managing forced tool IDs.\n * This is local UI state - tools that are forced to be used in the next message.\n *\n * When a tool is \"forced\", it will be included in the next chat request\n * regardless of whether the LLM would normally choose to use it.\n */\nexport const useForcedTools = create<ForcedToolsState>((set, get) => ({\n  forcedToolIds: [],\n\n  setForcedToolIds: (ids) => set({ forcedToolIds: ids }),\n\n  toggleForcedTool: (id) => {\n    const { forcedToolIds } = get();\n    if (forcedToolIds.includes(id)) {\n      // If clicking already forced tool, clear all forced tools\n      set({ forcedToolIds: [] });\n    } else {\n      // Replace with single forced tool\n      set({ forcedToolIds: [id] });\n    }\n  },\n\n  clearForcedTools: () => set({ forcedToolIds: [] }),\n}));\n"
  },
  {
    "path": "web/src/lib/hooks/useLLMProviderOptions.ts",
    "content": "import useSWR from \"swr\";\nimport { WellKnownLLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useLLMProviderOptions() {\n  const { data, error, mutate } = useSWR<\n    WellKnownLLMProviderDescriptor[] | undefined\n  >(SWR_KEYS.wellKnownLlmProviders, errorHandlingFetcher, {\n    revalidateOnFocus: false,\n    revalidateIfStale: false,\n    dedupingInterval: 60000,\n  });\n\n  return {\n    llmProviderOptions: data,\n    isLoading: !error && !data,\n    error,\n    refetch: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useLLMProviders.test.ts",
    "content": "import useSWR from \"swr\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\n\njest.mock(\"swr\", () => ({\n  __esModule: true,\n  default: jest.fn(),\n}));\n\njest.mock(\"@/lib/fetcher\", () => ({\n  errorHandlingFetcher: jest.fn(),\n}));\n\nconst mockUseSWR = useSWR as jest.MockedFunction<typeof useSWR>;\n\ndescribe(\"useLLMProviders\", () => {\n  beforeEach(() => {\n    mockUseSWR.mockReset();\n  });\n\n  test(\"uses public providers endpoint when personaId is not provided\", () => {\n    const mockMutate = jest.fn();\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: undefined,\n      mutate: mockMutate,\n      isValidating: false,\n    } as any);\n\n    const result = useLLMProviders();\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      \"/api/llm/provider\",\n      errorHandlingFetcher,\n      expect.objectContaining({\n        revalidateOnFocus: false,\n        dedupingInterval: 60000,\n      })\n    );\n    expect(result.isLoading).toBe(true);\n    expect(result.refetch).toBe(mockMutate);\n  });\n\n  test(\"uses persona-specific providers endpoint when personaId is provided\", () => {\n    const mockMutate = jest.fn();\n    const providers = [{ name: \"Persona Provider\" }];\n    mockUseSWR.mockReturnValue({\n      data: { providers, default_text: null, default_vision: null },\n      error: undefined,\n      mutate: mockMutate,\n      isValidating: false,\n    } as any);\n\n    const result = useLLMProviders(42);\n\n    expect(mockUseSWR).toHaveBeenCalledWith(\n      \"/api/llm/persona/42/providers\",\n      errorHandlingFetcher,\n      expect.objectContaining({\n        revalidateOnFocus: false,\n        dedupingInterval: 60000,\n      })\n    );\n    expect(result.llmProviders).toBe(providers);\n    expect(result.isLoading).toBe(false);\n    expect(result.refetch).toBe(mockMutate);\n  });\n\n  test(\"reports not loading when SWR returns an error\", () => {\n    mockUseSWR.mockReturnValue({\n      data: undefined,\n      error: new Error(\"request failed\"),\n      mutate: jest.fn(),\n      isValidating: false,\n    } as any);\n\n    const result = useLLMProviders();\n\n    expect(result.isLoading).toBe(false);\n    expect(result.error).toBeInstanceOf(Error);\n  });\n});\n"
  },
  {
    "path": "web/src/lib/hooks/useProjects.ts",
    "content": "import useSWR from \"swr\";\nimport { Project } from \"@/app/app/projects/projectsService\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport function useProjects() {\n  const { data, error, mutate } = useSWR<Project[]>(\n    SWR_KEYS.userProjects,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      revalidateIfStale: false,\n      dedupingInterval: 30000,\n    }\n  );\n\n  return {\n    projects: data ?? [],\n    isLoading: !error && !data,\n    error,\n    refreshProjects: mutate,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks/useToolOAuthStatus.ts",
    "content": "import { useCallback, useEffect, useRef } from \"react\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher, skipRetryOnAuthError } from \"@/lib/fetcher\";\nimport { initiateOAuthFlow } from \"@/lib/oauth/api\";\nimport { OAuthTokenStatus, ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport interface ToolAuthStatus {\n  // whether or not the user has EVER auth'd\n  hasToken: boolean;\n  // whether or not the user's current token is expired\n  isTokenExpired: boolean;\n}\n\nexport function useToolOAuthStatus(agentId?: number) {\n  const {\n    data: oauthTokenStatuses = [],\n    isLoading: loading,\n    error: swrError,\n    mutate,\n  } = useSWR<OAuthTokenStatus[]>(\n    SWR_KEYS.oauthTokenStatus,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: false,\n      dedupingInterval: 60_000,\n      onErrorRetry: skipRetryOnAuthError,\n      onError: (err) =>\n        console.error(\"[useToolOAuthStatus] fetch failed:\", err),\n    }\n  );\n\n  const error: string | null = swrError\n    ? swrError instanceof Error\n      ? swrError.message\n      : \"An error occurred\"\n    : null;\n\n  // Re-validate when the active agent changes so the UI reflects fresh token\n  // state for the new agent's tools without waiting for the dedup interval.\n  const prevAgentIdRef = useRef(agentId);\n  useEffect(() => {\n    if (prevAgentIdRef.current !== agentId) {\n      prevAgentIdRef.current = agentId;\n      mutate();\n    }\n  }, [agentId, mutate]);\n\n  /**\n   * Get OAuth status for a specific tool\n   */\n  const getToolAuthStatus = useCallback(\n    (tool: ToolSnapshot): ToolAuthStatus | undefined => {\n      if (!tool.oauth_config_id) return undefined;\n\n      const status = oauthTokenStatuses.find(\n        (s) => s.oauth_config_id === tool.oauth_config_id\n      );\n\n      if (!status)\n        return {\n          hasToken: false,\n          isTokenExpired: false,\n        };\n\n      return {\n        hasToken: true,\n        isTokenExpired: status.is_expired,\n      };\n    },\n    [oauthTokenStatuses]\n  );\n\n  /**\n   * Initiate OAuth authentication flow for a tool\n   */\n  const authenticateTool = useCallback(\n    async (tool: ToolSnapshot): Promise<void> => {\n      if (!tool.oauth_config_id) {\n        throw new Error(\"Tool does not have OAuth configuration\");\n      }\n\n      try {\n        await initiateOAuthFlow(\n          tool.oauth_config_id,\n          window.location.pathname + window.location.search\n        );\n      } catch (err) {\n        console.error(\"Error initiating OAuth flow:\", err);\n        throw err;\n      }\n    },\n    []\n  );\n\n  /**\n   * Get all tools that need authentication from a list\n   */\n  const getToolsNeedingAuth = useCallback(\n    (tools: ToolSnapshot[]): ToolSnapshot[] => {\n      return tools.filter((tool) => !getToolAuthStatus(tool));\n    },\n    [getToolAuthStatus]\n  );\n\n  return {\n    oauthTokenStatuses,\n    loading,\n    error,\n    getToolAuthStatus,\n    authenticateTool,\n    getToolsNeedingAuth,\n    refetch: () => mutate(),\n  };\n}\n"
  },
  {
    "path": "web/src/lib/hooks.llmResolver.test.ts",
    "content": "import {\n  getDefaultLlmDescriptor,\n  getValidLlmDescriptorForProviders,\n} from \"@/lib/hooks\";\nimport { structureValue } from \"@/lib/llmConfig/utils\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { makeProvider } from \"@tests/setup/llmProviderTestUtils\";\n\ndescribe(\"LLM resolver helpers\", () => {\n  test(\"chooses provider-specific descriptor when model names collide\", () => {\n    const sharedModel = \"shared-runtime-model\";\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        id: 1,\n        name: \"OpenAI Provider\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: sharedModel,\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        id: 2,\n        name: \"Anthropic Provider\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: sharedModel,\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    const descriptor = getValidLlmDescriptorForProviders(\n      structureValue(\"Anthropic Provider\", \"anthropic\", sharedModel),\n      providers\n    );\n\n    expect(descriptor).toEqual({\n      name: \"Anthropic Provider\",\n      provider: \"anthropic\",\n      modelName: sharedModel,\n    });\n  });\n\n  test(\"falls back to default provider when model is unavailable\", () => {\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        id: 10,\n        name: \"Default OpenAI\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: \"gpt-4o-mini\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: true,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        id: 20,\n        name: \"Anthropic Backup\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: \"claude-3-5-sonnet\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: true,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    const descriptor = getValidLlmDescriptorForProviders(\n      \"unknown-model-name\",\n      providers\n    );\n\n    expect(descriptor).toEqual({\n      name: \"Default OpenAI\",\n      provider: \"openai\",\n      modelName: \"gpt-4o-mini\",\n    });\n  });\n\n  test(\"prefers provider by name when multiple share the same type\", () => {\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        id: 1,\n        name: \"Anthropic\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: \"claude-sonnet-4-5\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        id: 2,\n        name: \"PersonalAnthropicToken\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: \"claude-sonnet-4-5\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    const descriptor = getValidLlmDescriptorForProviders(\n      structureValue(\n        \"PersonalAnthropicToken\",\n        \"anthropic\",\n        \"claude-sonnet-4-5\"\n      ),\n      providers\n    );\n\n    expect(descriptor).toEqual({\n      name: \"PersonalAnthropicToken\",\n      provider: \"anthropic\",\n      modelName: \"claude-sonnet-4-5\",\n    });\n  });\n\n  test(\"uses first provider with models when no explicit default exists\", () => {\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        id: 30,\n        name: \"First Provider\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: \"gpt-first\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        id: 40,\n        name: \"Second Provider\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: \"claude-second\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    expect(getDefaultLlmDescriptor(providers)).toEqual({\n      name: \"First Provider\",\n      provider: \"openai\",\n      modelName: \"gpt-first\",\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/lib/hooks.ts",
    "content": "\"use client\";\n\nimport {\n  DocumentBoostStatus,\n  Tag,\n  UserGroup,\n  ConnectorStatus,\n  CCPairBasicInfo,\n  FederatedConnectorDetail,\n  ValidSources,\n  ConnectorIndexingStatusLiteResponse,\n  IndexingStatusRequest,\n} from \"@/lib/types\";\nimport useSWR, { mutate, useSWRConfig } from \"swr\";\nimport { errorHandlingFetcher } from \"./fetcher\";\nimport {\n  useCallback,\n  useContext,\n  useEffect,\n  useMemo,\n  useRef,\n  useState,\n} from \"react\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { SourceMetadata } from \"./search/interfaces\";\nimport { parseLlmDescriptor } from \"./llmConfig/utils\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport { Credential } from \"./connectors/credentials\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport {\n  MinimalPersonaSnapshot,\n  PersonaLabel,\n} from \"@/app/admin/agents/interfaces\";\nimport { DefaultModel, LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { isAnthropic } from \"@/app/admin/configuration/llm/utils\";\nimport { getSourceMetadataForSources } from \"./sources\";\nimport { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from \"./constants\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { updateTemperatureOverrideForChatSession } from \"@/app/app/services/lib\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nexport const usePublicCredentials = () => {\n  const { mutate } = useSWRConfig();\n  const swrResponse = useSWR<Credential<any>[]>(\n    SWR_KEYS.adminCredentials,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshCredentials: () => mutate(SWR_KEYS.adminCredentials),\n  };\n};\n\nconst buildReactedDocsUrl = (ascending: boolean, limit: number) => {\n  return `/api/manage/admin/doc-boosts?ascending=${ascending}&limit=${limit}`;\n};\n\nexport const useMostReactedToDocuments = (\n  ascending: boolean,\n  limit: number\n) => {\n  const url = buildReactedDocsUrl(ascending, limit);\n  const swrResponse = useSWR<DocumentBoostStatus[]>(url, errorHandlingFetcher);\n\n  return {\n    ...swrResponse,\n    refreshDocs: () => mutate(url),\n  };\n};\n\nexport const useObjectState = <T>(\n  initialValue: T\n): [T, (update: Partial<T>) => void] => {\n  const [state, setState] = useState<T>(initialValue);\n  const set = (update: Partial<T>) => {\n    setState((prevState) => {\n      return {\n        ...prevState,\n        ...update,\n      };\n    });\n  };\n  return [state, set];\n};\n\nexport const useConnectorIndexingStatusWithPagination = (\n  filters: Omit<IndexingStatusRequest, \"source\" | \"source_to_page\"> = {},\n  refreshInterval = 30000,\n  enabled: boolean = true\n) => {\n  const { mutate } = useSWRConfig();\n  //maintains the current page for each source\n  const [sourcePages, setSourcePages] = useState<Record<ValidSources, number>>(\n    {} as Record<ValidSources, number>\n  );\n  const [mergedData, setMergedData] = useState<\n    ConnectorIndexingStatusLiteResponse[]\n  >([]);\n  //maintains the loading state for each source\n  const [sourceLoadingStates, setSourceLoadingStates] = useState<\n    Record<ValidSources, boolean>\n  >({} as Record<ValidSources, boolean>);\n\n  //ref to maintain the current source pages for the main request\n  const sourcePagesRef = useRef(sourcePages);\n  sourcePagesRef.current = sourcePages;\n\n  // Main request that includes current pagination state\n  const mainRequest: IndexingStatusRequest = useMemo(\n    () => ({\n      secondary_index: false,\n      access_type_filters: [],\n      last_status_filters: [],\n      docs_count_operator: null,\n      docs_count_value: null,\n      ...filters,\n    }),\n    [filters]\n  );\n\n  const swrKey = enabled\n    ? [SWR_KEYS.indexingStatus, JSON.stringify(mainRequest)]\n    : null;\n\n  // Main data fetch with auto-refresh\n  const { data, isLoading, error } = useSWR<\n    ConnectorIndexingStatusLiteResponse[]\n  >(\n    swrKey,\n    () => fetchConnectorIndexingStatus(mainRequest, sourcePagesRef.current),\n    {\n      refreshInterval,\n    }\n  );\n\n  // Update merged data when main data changes\n  useEffect(() => {\n    if (data) {\n      setMergedData(data);\n    }\n  }, [data]);\n\n  // Function to handle page changes for a specific source\n  const handlePageChange = useCallback(\n    async (source: ValidSources, page: number) => {\n      // Update the source page state\n      setSourcePages((prev) => ({ ...prev, [source]: page }));\n\n      const sourceRequest: IndexingStatusRequest = {\n        ...filters,\n        source: source,\n        source_to_page: { [source]: page } as Record<ValidSources, number>,\n      };\n      setSourceLoadingStates((prev) => ({ ...prev, [source]: true }));\n\n      try {\n        const sourceData = await fetchConnectorIndexingStatus(sourceRequest);\n        if (sourceData && sourceData.length > 0) {\n          setMergedData((prevData) =>\n            prevData\n              .map((existingSource) =>\n                existingSource.source === source\n                  ? sourceData[0]\n                  : existingSource\n              )\n              .filter(\n                (item): item is ConnectorIndexingStatusLiteResponse =>\n                  item !== undefined\n              )\n          );\n        }\n      } catch (error) {\n        console.error(\n          `Failed to fetch page ${page} for source ${source}:`,\n          error\n        );\n      } finally {\n        setSourceLoadingStates((prev) => ({ ...prev, [source]: false }));\n      }\n    },\n    [filters]\n  );\n\n  // Function to refresh all data (maintains current pagination)\n  const refreshAllData = useCallback(() => {\n    if (swrKey) mutate(swrKey);\n  }, [mutate, swrKey]);\n\n  // Reset pagination when filters change (but not search)\n  const resetPagination = useCallback(() => {\n    setSourcePages({} as Record<ValidSources, number>);\n  }, []);\n\n  return {\n    data: mergedData,\n    isLoading,\n    error,\n    handlePageChange,\n    sourcePages,\n    sourceLoadingStates,\n    refreshAllData,\n    resetPagination,\n  };\n};\n\nexport const useConnectorStatus = (\n  refreshInterval = 30000,\n  enabled: boolean = true\n) => {\n  const { mutate } = useSWRConfig();\n  const url = SWR_KEYS.adminConnectorStatus;\n  const swrResponse = useSWR<ConnectorStatus<any, any>[]>(\n    enabled ? url : null,\n    errorHandlingFetcher,\n    { refreshInterval: refreshInterval }\n  );\n\n  return {\n    ...swrResponse,\n    refreshIndexingStatus: enabled ? () => mutate(url) : () => {},\n  };\n};\n\nexport const useBasicConnectorStatus = (enabled: boolean = true) => {\n  const url = SWR_KEYS.connectorStatus;\n  const swrResponse = useSWR<CCPairBasicInfo[]>(\n    enabled ? url : null,\n    errorHandlingFetcher\n  );\n  return {\n    ...swrResponse,\n    refreshIndexingStatus: enabled ? () => mutate(url) : () => {},\n  };\n};\n\nexport const useFederatedConnectors = () => {\n  const { mutate } = useSWRConfig();\n  const url = SWR_KEYS.federatedConnectors;\n  const swrResponse = useSWR<FederatedConnectorDetail[]>(\n    url,\n    errorHandlingFetcher\n  );\n\n  return {\n    ...swrResponse,\n    refreshFederatedConnectors: () => mutate(url),\n  };\n};\n\nexport const useLabels = () => {\n  const { mutate } = useSWRConfig();\n  const { data: labels, error } = useSWR<PersonaLabel[]>(\n    SWR_KEYS.personaLabels,\n    errorHandlingFetcher\n  );\n\n  const refreshLabels = async () => {\n    return mutate(SWR_KEYS.personaLabels);\n  };\n\n  const createLabel = async (name: string): Promise<PersonaLabel | null> => {\n    const response = await fetch(SWR_KEYS.personaLabels, {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({ name }),\n    });\n\n    if (!response.ok) {\n      return null;\n    }\n\n    const newLabel: PersonaLabel = await response.json();\n    mutate(\n      SWR_KEYS.personaLabels,\n      (currentLabels: PersonaLabel[] | undefined) => [\n        ...(currentLabels || []),\n        newLabel,\n      ],\n      false\n    );\n    return newLabel;\n  };\n\n  const updateLabel = async (id: number, name: string) => {\n    const response = await fetch(`/api/admin/persona/label/${id}`, {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({ label_name: name }),\n    });\n\n    if (response.ok) {\n      mutate(\n        SWR_KEYS.personaLabels,\n        labels?.map((label) => (label.id === id ? { ...label, name } : label)),\n        false\n      );\n    }\n\n    return response;\n  };\n\n  const deleteLabel = async (id: number) => {\n    const response = await fetch(`/api/admin/persona/label/${id}`, {\n      method: \"DELETE\",\n      headers: { \"Content-Type\": \"application/json\" },\n    });\n\n    if (response.ok) {\n      mutate(\n        SWR_KEYS.personaLabels,\n        labels?.filter((label) => label.id !== id),\n        false\n      );\n    }\n\n    return response;\n  };\n\n  return {\n    labels,\n    error,\n    refreshLabels,\n    createLabel,\n    updateLabel,\n    deleteLabel,\n  };\n};\n\nexport const useTimeRange = (initialValue?: DateRangePickerValue) => {\n  return useState<DateRangePickerValue | null>(null);\n};\n\nexport interface FilterManager {\n  timeRange: DateRangePickerValue | null;\n  setTimeRange: React.Dispatch<\n    React.SetStateAction<DateRangePickerValue | null>\n  >;\n  selectedSources: SourceMetadata[];\n  setSelectedSources: React.Dispatch<React.SetStateAction<SourceMetadata[]>>;\n  selectedDocumentSets: string[];\n  setSelectedDocumentSets: React.Dispatch<React.SetStateAction<string[]>>;\n  selectedTags: Tag[];\n  setSelectedTags: React.Dispatch<React.SetStateAction<Tag[]>>;\n  getFilterString: () => string;\n  buildFiltersFromQueryString: (\n    filterString: string,\n    availableSources: SourceMetadata[],\n    availableDocumentSets: string[],\n    availableTags: Tag[]\n  ) => void;\n  clearFilters: () => void;\n}\n\nexport function useFilters(): FilterManager {\n  const [timeRange, setTimeRange] = useTimeRange();\n  const [selectedSources, setSelectedSources] = useState<SourceMetadata[]>([]);\n  const [selectedDocumentSets, setSelectedDocumentSets] = useState<string[]>(\n    []\n  );\n  const [selectedTags, setSelectedTags] = useState<Tag[]>([]);\n\n  function getFilterString() {\n    const params = new URLSearchParams();\n\n    if (timeRange) {\n      params.set(\"from\", timeRange.from.toISOString());\n      params.set(\"to\", timeRange.to.toISOString());\n    }\n\n    if (selectedSources.length > 0) {\n      const sourcesParam = selectedSources\n        .map((source) => encodeURIComponent(source.internalName))\n        .join(\",\");\n      params.set(\"sources\", sourcesParam);\n    }\n\n    if (selectedDocumentSets.length > 0) {\n      const docSetsParam = selectedDocumentSets\n        .map((ds) => encodeURIComponent(ds))\n        .join(\",\");\n      params.set(\"documentSets\", docSetsParam);\n    }\n\n    if (selectedTags.length > 0) {\n      const tagsParam = selectedTags\n        .map((tag) => encodeURIComponent(tag.tag_value))\n        .join(\",\");\n      params.set(\"tags\", tagsParam);\n    }\n\n    const queryString = params.toString();\n    return queryString ? `&${queryString}` : \"\";\n  }\n\n  function clearFilters() {\n    setTimeRange(null);\n    setSelectedSources([]);\n    setSelectedDocumentSets([]);\n    setSelectedTags([]);\n  }\n\n  function buildFiltersFromQueryString(\n    filterString: string,\n    availableSources: SourceMetadata[],\n    availableDocumentSets: string[],\n    availableTags: Tag[]\n  ): void {\n    const params = new URLSearchParams(filterString);\n\n    // Parse the \"from\" parameter as a DateRangePickerValue\n    let newTimeRange: DateRangePickerValue | null = null;\n    const fromParam = params.get(\"from\");\n    const toParam = params.get(\"to\");\n    if (fromParam && toParam) {\n      const fromDate = new Date(fromParam);\n      const toDate = new Date(toParam);\n      if (!isNaN(fromDate.getTime()) && !isNaN(toDate.getTime())) {\n        newTimeRange = { from: fromDate, to: toDate, selectValue: \"\" };\n      }\n    }\n\n    // Parse sources\n    let newSelectedSources: SourceMetadata[] = [];\n    const sourcesParam = params.get(\"sources\");\n    if (sourcesParam) {\n      const sourceNames = sourcesParam.split(\",\").map(decodeURIComponent);\n      newSelectedSources = availableSources.filter((source) =>\n        sourceNames.includes(source.internalName)\n      );\n    }\n\n    // Parse document sets\n    let newSelectedDocSets: string[] = [];\n    const docSetsParam = params.get(\"documentSets\");\n    if (docSetsParam) {\n      const docSetNames = docSetsParam.split(\",\").map(decodeURIComponent);\n      newSelectedDocSets = availableDocumentSets.filter((ds) =>\n        docSetNames.includes(ds)\n      );\n    }\n\n    // Parse tags\n    let newSelectedTags: Tag[] = [];\n    const tagsParam = params.get(\"tags\");\n    if (tagsParam) {\n      const tagValues = tagsParam.split(\",\").map(decodeURIComponent);\n      newSelectedTags = availableTags.filter((tag) =>\n        tagValues.includes(tag.tag_value)\n      );\n    }\n\n    // Update filter manager's values instead of returning\n    setTimeRange(newTimeRange);\n    setSelectedSources(newSelectedSources);\n    setSelectedDocumentSets(newSelectedDocSets);\n    setSelectedTags(newSelectedTags);\n  }\n\n  return {\n    clearFilters,\n    timeRange,\n    setTimeRange,\n    selectedSources,\n    setSelectedSources,\n    selectedDocumentSets,\n    setSelectedDocumentSets,\n    selectedTags,\n    setSelectedTags,\n    getFilterString,\n    buildFiltersFromQueryString,\n  };\n}\n\nexport interface LlmDescriptor {\n  name: string;\n  provider: string;\n  modelName: string;\n}\n\nexport interface LlmManager {\n  currentLlm: LlmDescriptor;\n  updateCurrentLlm: (newOverride: LlmDescriptor) => void;\n  temperature: number;\n  updateTemperature: (temperature: number) => void;\n  updateModelOverrideBasedOnChatSession: (chatSession?: ChatSession) => void;\n  imageFilesPresent: boolean;\n  updateImageFilesPresent: (present: boolean) => void;\n  liveAgent: MinimalPersonaSnapshot | null;\n  maxTemperature: number;\n  llmProviders: LLMProviderDescriptor[] | undefined;\n  isLoadingProviders: boolean;\n  hasAnyProvider: boolean;\n}\n\n// Things to test\n// 1. User override\n// 2. User preference (defaults to system wide default if no preference set)\n// 3. Current assistant\n// 4. Current chat session\n// 5. Live assistant\n\n/*\nLLM Override is as follows (i.e. this order)\n- User override (explicitly set in the chat input bar)\n- User preference (defaults to system wide default if no preference set)\n\nOn switching to an existing or new chat session or a different assistant:\n- If we have a live assistant after any switch with a model override, use that- otherwise use the above hierarchy\n\nThus, the input should be\n- User preference\n- LLM Providers (which contain the system wide default)\n- Current assistant\n\nChanges take place as\n- liveAgent or currentChatSession changes (and the associated model override is set)\n- (updateCurrentLlm) User explicitly setting a model override (and we explicitly override and set the userSpecifiedOverride which we'll use in place of the user preferences unless overridden by an agent)\n\nIf we have a live assistant, we should use that model override\n\nRelevant test: `llm_ordering.spec.ts`.\n\nTemperature override is set as follows:\n- For existing chat sessions:\n  - If the user has previously overridden the temperature for a specific chat session,\n    that value is persisted and used when the user returns to that chat.\n  - This persistence applies even if the temperature was set before sending the first message in the chat.\n- For new chat sessions:\n  - If the search tool is available, the default temperature is set to 0.\n  - If the search tool is not available, the default temperature is set to 0.5.\n\nThis approach ensures that user preferences are maintained for existing chats while\nproviding appropriate defaults for new conversations based on the available tools.\n*/\n\nexport function getDefaultLlmDescriptor(\n  llmProviders: LLMProviderDescriptor[],\n  defaultText?: DefaultModel | null\n): LlmDescriptor | null {\n  if (defaultText) {\n    const provider = llmProviders.find((p) => p.id === defaultText.provider_id);\n    if (provider) {\n      return {\n        name: provider.name,\n        provider: provider.provider,\n        modelName: defaultText.model_name,\n      };\n    }\n  }\n  // Fallback: first provider with visible models\n  const firstLlmProvider = llmProviders.find(\n    (provider) => provider.model_configurations.length > 0\n  );\n  if (firstLlmProvider) {\n    const firstModel = firstLlmProvider.model_configurations.find(\n      (m) => m.is_visible\n    );\n    return {\n      name: firstLlmProvider.name,\n      provider: firstLlmProvider.provider,\n      modelName: firstModel?.name ?? \"\",\n    };\n  }\n  return null;\n}\n\nexport function getValidLlmDescriptorForProviders(\n  modelName: string | null | undefined,\n  llmProviders: LLMProviderDescriptor[] | undefined | null\n): LlmDescriptor {\n  // Return early if providers haven't loaded yet (undefined/null)\n  // Empty arrays are valid (user has no provider access for this assistant)\n  if (llmProviders === undefined || llmProviders === null) {\n    return { name: \"\", provider: \"\", modelName: \"\" };\n  }\n\n  if (modelName) {\n    const model = parseLlmDescriptor(modelName);\n    // If we have no parsed modelName, try to find the provider by the raw modelName string\n    if (!(model.modelName && model.modelName.length > 0)) {\n      const provider = llmProviders.find((p) =>\n        p.model_configurations\n          .map((modelConfiguration) => modelConfiguration.name)\n          .includes(modelName)\n      );\n      if (provider) {\n        return {\n          modelName: modelName,\n          name: provider.name,\n          provider: provider.provider,\n        };\n      }\n    }\n\n    // If we have parsed provider info, try to find that specific provider.\n    // This ensures we don't incorrectly match a model to the wrong provider\n    // when the same model name exists across multiple providers (e.g., gpt-5 in Azure and OpenAI)\n    if (model.provider && model.provider.length > 0) {\n      const hasModel = (p: LLMProviderDescriptor) =>\n        p.model_configurations.some((mc) => mc.name === model.modelName);\n      const typeMatches = llmProviders.filter(\n        (p) => p.provider === model.provider && hasModel(p)\n      );\n      // When multiple providers share the same type (e.g., two \"anthropic\"\n      // providers with different API keys), prefer the one whose name matches\n      // the user's explicit selection to avoid silently switching providers.\n      const matchingProvider =\n        typeMatches.find((p) => p.name === model.name) ?? typeMatches[0];\n      if (matchingProvider) {\n        return {\n          ...model,\n          name: matchingProvider.name,\n          provider: matchingProvider.provider,\n        };\n      }\n      // Provider info was present but not found - fall through to default\n    } else {\n      // Only search by model name when no provider info was parsed\n      const provider = llmProviders.find((p) =>\n        p.model_configurations\n          .map((modelConfiguration) => modelConfiguration.name)\n          .includes(model.modelName)\n      );\n\n      if (provider) {\n        return { ...model, provider: provider.provider, name: provider.name };\n      }\n    }\n  }\n\n  // Model not found in available providers - fall back to default model\n  return (\n    getDefaultLlmDescriptor(llmProviders) ?? {\n      name: \"\",\n      provider: \"\",\n      modelName: \"\",\n    }\n  );\n}\n\nexport function useLlmManager(\n  currentChatSession?: ChatSession,\n  liveAgent?: MinimalPersonaSnapshot\n): LlmManager {\n  const { user } = useUser();\n\n  // Get all user-accessible providers via SWR (general providers - no persona filter)\n  // This includes public + all restricted providers user can access via groups\n  const {\n    llmProviders: allUserProviders,\n    defaultText: allUserDefaultText,\n    isLoading: isLoadingAllProviders,\n  } = useLLMProviders();\n  // Fetch persona-specific providers to enforce RBAC restrictions per assistant\n  // Only fetch if we have an agent selected\n  const personaId = liveAgent?.id !== undefined ? liveAgent.id : undefined;\n  const {\n    llmProviders: personaProviders,\n    defaultText: personaDefaultText,\n    isLoading: isLoadingPersonaProviders,\n  } = useLLMProviders(personaId);\n\n  const llmProviders =\n    personaProviders !== undefined ? personaProviders : allUserProviders;\n  const defaultText =\n    personaProviders !== undefined ? personaDefaultText : allUserDefaultText;\n\n  const [userHasManuallyOverriddenLLM, setUserHasManuallyOverriddenLLM] =\n    useState(false);\n  const [chatSession, setChatSession] = useState<ChatSession | null>(null);\n  const [currentLlm, setCurrentLlm] = useState<LlmDescriptor>({\n    name: \"\",\n    provider: \"\",\n    modelName: \"\",\n  });\n\n  // Track the previous assistant ID to detect when it changes\n  const prevAgentIdRef = useRef<number | undefined>(undefined);\n\n  // Reset manual override when switching to a different assistant\n  useEffect(() => {\n    if (\n      liveAgent?.id !== undefined &&\n      prevAgentIdRef.current !== undefined &&\n      liveAgent.id !== prevAgentIdRef.current\n    ) {\n      // User switched to a different assistant - reset manual override\n      setUserHasManuallyOverriddenLLM(false);\n    }\n    prevAgentIdRef.current = liveAgent?.id;\n  }, [liveAgent?.id]);\n\n  const llmUpdate = () => {\n    /* Should be called when the live assistant or current chat session changes */\n\n    // Don't update if providers haven't loaded yet (undefined/null)\n    // Empty arrays are valid (user has no provider access for this assistant)\n    if (llmProviders === undefined || llmProviders === null) {\n      return;\n    }\n\n    // separate function so we can `return` to break out\n    const _llmUpdate = () => {\n      // if the user has overridden in this session and just switched to a brand\n      // new session, use their manually specified model\n      if (userHasManuallyOverriddenLLM && !currentChatSession) {\n        return;\n      }\n\n      if (currentChatSession?.current_alternate_model) {\n        setCurrentLlm(\n          getValidLlmDescriptor(currentChatSession.current_alternate_model)\n        );\n      } else if (liveAgent?.llm_model_version_override) {\n        setCurrentLlm(\n          getValidLlmDescriptor(liveAgent.llm_model_version_override)\n        );\n      } else if (userHasManuallyOverriddenLLM) {\n        // if the user has an override and there's nothing special about the\n        // current chat session, use the override\n        return;\n      } else if (user?.preferences?.default_model) {\n        setCurrentLlm(getValidLlmDescriptor(user.preferences.default_model));\n      } else {\n        const defaultLlm = getDefaultLlmDescriptor(llmProviders, defaultText);\n        if (defaultLlm) {\n          setCurrentLlm(defaultLlm);\n        }\n      }\n    };\n\n    _llmUpdate();\n    setChatSession(currentChatSession || null);\n  };\n\n  function getValidLlmDescriptor(\n    modelName: string | null | undefined\n  ): LlmDescriptor {\n    return getValidLlmDescriptorForProviders(modelName, llmProviders);\n  }\n\n  const [imageFilesPresent, setImageFilesPresent] = useState(false);\n\n  const updateImageFilesPresent = (present: boolean) => {\n    setImageFilesPresent(present);\n  };\n\n  // Manually set the LLM\n  const updateCurrentLlm = (newLlm: LlmDescriptor) => {\n    setCurrentLlm(newLlm);\n    setUserHasManuallyOverriddenLLM(true);\n  };\n\n  const updateCurrentLlmToModelName = (modelName: string) => {\n    setCurrentLlm(getValidLlmDescriptor(modelName));\n    setUserHasManuallyOverriddenLLM(true);\n  };\n\n  const updateModelOverrideBasedOnChatSession = (chatSession?: ChatSession) => {\n    if (chatSession && chatSession.current_alternate_model?.length > 0) {\n      setCurrentLlm(getValidLlmDescriptor(chatSession.current_alternate_model));\n    }\n  };\n\n  const [temperature, setTemperature] = useState<number>(() => {\n    if (currentChatSession?.current_temperature_override != null) {\n      // Derive Anthropic check from chat session since currentLlm isn't populated yet\n      const sessionModel = currentChatSession.current_alternate_model\n        ? parseLlmDescriptor(currentChatSession.current_alternate_model)\n        : null;\n      const isAnthropicModel = sessionModel\n        ? isAnthropic(sessionModel.provider, sessionModel.modelName)\n        : false;\n      return Math.min(\n        currentChatSession.current_temperature_override,\n        isAnthropicModel ? 1.0 : 2.0\n      );\n    } else if (liveAgent?.tools.some((tool) => tool.name === SEARCH_TOOL_ID)) {\n      return 0;\n    }\n    return 0.5;\n  });\n\n  const maxTemperature = useMemo(() => {\n    // Check currentLlm first, fall back to chat session model if currentLlm isn't populated\n    if (currentLlm.provider) {\n      return isAnthropic(currentLlm.provider, currentLlm.modelName) ? 1.0 : 2.0;\n    }\n    const sessionModel = currentChatSession?.current_alternate_model\n      ? parseLlmDescriptor(currentChatSession.current_alternate_model)\n      : null;\n    if (sessionModel?.provider) {\n      return isAnthropic(sessionModel.provider, sessionModel.modelName)\n        ? 1.0\n        : 2.0;\n    }\n    return 2.0; // Default max when no model info available\n  }, [currentLlm, currentChatSession]);\n\n  useEffect(() => {\n    if (isAnthropic(currentLlm.provider, currentLlm.modelName)) {\n      const newTemperature = Math.min(temperature, 1.0);\n      setTemperature(newTemperature);\n      if (chatSession?.id) {\n        updateTemperatureOverrideForChatSession(chatSession.id, newTemperature);\n      }\n    }\n  }, [currentLlm]);\n\n  useEffect(() => {\n    llmUpdate();\n\n    if (!chatSession && currentChatSession) {\n      if (temperature) {\n        updateTemperatureOverrideForChatSession(\n          currentChatSession.id,\n          temperature\n        );\n      }\n      return;\n    }\n\n    if (currentChatSession?.current_temperature_override) {\n      setTemperature(currentChatSession.current_temperature_override);\n    } else if (liveAgent?.tools.some((tool) => tool.name === SEARCH_TOOL_ID)) {\n      setTemperature(0);\n    } else {\n      setTemperature(0.5);\n    }\n  }, [\n    liveAgent,\n    currentChatSession,\n    llmProviders,\n    user?.preferences?.default_model,\n  ]);\n\n  const updateTemperature = (temperature: number) => {\n    const clampedTemp = isAnthropic(currentLlm.provider, currentLlm.modelName)\n      ? Math.min(temperature, 1.0)\n      : temperature;\n    setTemperature(clampedTemp);\n    if (chatSession) {\n      updateTemperatureOverrideForChatSession(chatSession.id, clampedTemp);\n    }\n  };\n\n  // Track if any provider exists for the current persona context.\n  // Uses the persona-aware list so chat input reflects actual access,\n  // falling back to the global list when no persona is selected.\n  const hasAnyProvider = (llmProviders?.length ?? 0) > 0;\n\n  return {\n    updateModelOverrideBasedOnChatSession,\n    currentLlm,\n    updateCurrentLlm,\n    temperature,\n    updateTemperature,\n    imageFilesPresent,\n    updateImageFilesPresent,\n    liveAgent: liveAgent ?? null,\n    maxTemperature,\n    llmProviders,\n    isLoadingProviders:\n      isLoadingAllProviders ||\n      (personaId !== undefined && isLoadingPersonaProviders),\n    hasAnyProvider,\n  };\n}\n\nexport function useAuthType(): AuthType | null {\n  const { data, error } = useSWR<{ auth_type: AuthType }>(\n    SWR_KEYS.authType,\n    errorHandlingFetcher\n  );\n\n  if (NEXT_PUBLIC_CLOUD_ENABLED) {\n    return AuthType.CLOUD;\n  }\n\n  if (error || !data) {\n    return null;\n  }\n\n  return data.auth_type;\n}\n\n/*\nEE Only APIs\n*/\n\nexport const useUserGroups = (): {\n  data: UserGroup[] | undefined;\n  isLoading: boolean;\n  error: string;\n  refreshUserGroups: () => void;\n} => {\n  const combinedSettings = useContext(SettingsContext);\n  const isLoading = combinedSettings?.settingsLoading ?? false;\n  const isPaidEnterpriseFeaturesEnabled =\n    !isLoading &&\n    combinedSettings &&\n    combinedSettings.enterpriseSettings !== null;\n\n  const swrResponse = useSWR<UserGroup[]>(\n    isPaidEnterpriseFeaturesEnabled ? SWR_KEYS.adminUserGroups : null,\n    errorHandlingFetcher\n  );\n\n  const refreshUserGroups = () => mutate(SWR_KEYS.adminUserGroups);\n\n  if (isLoading) {\n    return {\n      data: undefined,\n      isLoading: true,\n      error: \"\",\n      refreshUserGroups,\n    };\n  }\n\n  if (!isPaidEnterpriseFeaturesEnabled) {\n    return {\n      data: [],\n      isLoading: false,\n      error: \"\",\n      refreshUserGroups,\n    };\n  }\n\n  return {\n    ...swrResponse,\n    refreshUserGroups,\n  };\n};\n\nexport const fetchConnectorIndexingStatus = async (\n  request: IndexingStatusRequest = {},\n  sourcePages: Record<ValidSources, number> | null = null\n): Promise<ConnectorIndexingStatusLiteResponse[]> => {\n  const response = await fetch(SWR_KEYS.indexingStatus, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      secondary_index: false,\n      access_type_filters: [],\n      last_status_filters: [],\n      docs_count_operator: null,\n      docs_count_value: null,\n      source_to_page: sourcePages || {}, // Use current pagination state\n      ...request,\n    }),\n  });\n\n  if (!response.ok) {\n    throw new Error(`HTTP error! status: ${response.status}`);\n  }\n\n  return response.json();\n};\n\n// Get source metadata for configured sources - deduplicated by source type\nfunction getConfiguredSources(\n  availableSources: ValidSources[]\n): Array<SourceMetadata & { originalName: string; uniqueKey: string }> {\n  const allSources = getSourceMetadataForSources(availableSources);\n\n  const seenSources = new Set<string>();\n  const configuredSources: Array<\n    SourceMetadata & { originalName: string; uniqueKey: string }\n  > = [];\n\n  availableSources.forEach((sourceName) => {\n    // Handle federated connectors by removing the federated_ prefix\n    const cleanName = sourceName.replace(\"federated_\", \"\");\n    // Skip if we've already seen this source type\n    if (seenSources.has(cleanName)) return;\n    seenSources.add(cleanName);\n    const source = allSources.find(\n      (source) => source.internalName === cleanName\n    );\n    if (source) {\n      configuredSources.push({\n        ...source,\n        originalName: sourceName,\n        uniqueKey: cleanName,\n      });\n    }\n  });\n  return configuredSources;\n}\n\ninterface UseSourcePreferencesProps {\n  availableSources: ValidSources[];\n  selectedSources: SourceMetadata[];\n  setSelectedSources: (sources: SourceMetadata[]) => void;\n}\n\ninterface SourcePreferencesSnapshot {\n  sourcePreferences: Record<string, boolean>; // uniqueKey -> enabled status\n}\n\nconst LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY = \"selectedInternalSearchSources\";\n\nexport function useSourcePreferences({\n  availableSources,\n  selectedSources,\n  setSelectedSources,\n}: UseSourcePreferencesProps) {\n  const [sourcesInitialized, setSourcesInitialized] = useState(false);\n\n  const configuredSources = useMemo(\n    () => getConfiguredSources(availableSources),\n    [availableSources]\n  );\n\n  // Load saved source preferences from localStorage\n  const loadSavedSourcePreferences = (): SourcePreferencesSnapshot | null => {\n    if (typeof window === \"undefined\") return null;\n    const saved = localStorage.getItem(LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY);\n    if (!saved) return null;\n    try {\n      const res = JSON.parse(saved);\n\n      // Validate the snapshot structure\n      if (\n        typeof res !== \"object\" ||\n        res === null ||\n        typeof res.sourcePreferences !== \"object\" ||\n        res.sourcePreferences === null ||\n        Array.isArray(res.sourcePreferences)\n      ) {\n        return null;\n      }\n\n      // Validate that all values in sourcePreferences are booleans\n      for (const value of Object.values(res.sourcePreferences)) {\n        if (typeof value !== \"boolean\") {\n          return null;\n        }\n      }\n\n      return res as SourcePreferencesSnapshot;\n    } catch {\n      return null;\n    }\n  };\n\n  const persistSourcePreferencesState = (\n    enabledSources: SourceMetadata[],\n    allKnownSources: SourceMetadata[]\n  ) => {\n    if (typeof window === \"undefined\") return;\n\n    const enabledKeys = new Set(enabledSources.map((s) => s.uniqueKey));\n\n    const snapshot: SourcePreferencesSnapshot = {\n      sourcePreferences: Object.fromEntries(\n        allKnownSources\n          .filter((src) => src.uniqueKey !== undefined)\n          .map((src) => [src.uniqueKey, enabledKeys.has(src.uniqueKey)])\n      ),\n    };\n\n    localStorage.setItem(\n      LS_SELECTED_INTERNAL_SEARCH_SOURCES_KEY,\n      JSON.stringify(snapshot)\n    );\n  };\n\n  // Initialize sources - load from localStorage or enable all by default\n  useEffect(() => {\n    if (!sourcesInitialized && availableSources.length > 0) {\n      const savedSources = loadSavedSourcePreferences();\n\n      if (savedSources !== null) {\n        // Filter out saved sources that no longer exist\n        const { sourcePreferences } = savedSources;\n\n        // Helper to check if there is a preference for a key\n        const hasPref = (key: string) =>\n          Object.prototype.hasOwnProperty.call(sourcePreferences, key);\n\n        // Get sources with no preference\n        const newSources = configuredSources.filter((source) => {\n          return !hasPref(source.uniqueKey);\n        });\n\n        const enabledSources = configuredSources.filter((source) => {\n          return (\n            hasPref(source.uniqueKey) && sourcePreferences[source.uniqueKey]\n          );\n        });\n\n        // Merge valid saved sources with new sources (enable new sources by default)\n        const mergedSources = [...enabledSources, ...newSources];\n        setSelectedSources(mergedSources);\n\n        // Persist the merged state\n        persistSourcePreferencesState(mergedSources, configuredSources);\n      } else {\n        // First time user or invalid data - enable all sources by default\n        setSelectedSources(configuredSources);\n        persistSourcePreferencesState(configuredSources, configuredSources);\n      }\n      setSourcesInitialized(true);\n    }\n  }, [\n    availableSources,\n    configuredSources,\n    sourcesInitialized,\n    setSelectedSources,\n  ]);\n\n  const enableSources = (sources: SourceMetadata[]) => {\n    setSelectedSources([...sources]);\n    persistSourcePreferencesState(sources, configuredSources);\n  };\n\n  const enableAllSources = () => {\n    enableSources(configuredSources);\n  };\n\n  const disableAllSources = () => {\n    setSelectedSources([]);\n    persistSourcePreferencesState([], configuredSources);\n  };\n\n  const toggleSource = (sourceUniqueKey: string) => {\n    const configuredSource = configuredSources.find(\n      (s) => s.uniqueKey === sourceUniqueKey\n    );\n    if (!configuredSource) return;\n\n    const isCurrentlySelected = selectedSources.some(\n      (s) => s.uniqueKey === configuredSource.uniqueKey\n    );\n\n    let newSources: SourceMetadata[];\n    if (isCurrentlySelected) {\n      newSources = selectedSources.filter(\n        (s) => s.uniqueKey !== configuredSource.uniqueKey\n      );\n    } else {\n      newSources = [...selectedSources, configuredSource];\n    }\n\n    setSelectedSources(newSources);\n    persistSourcePreferencesState(newSources, configuredSources);\n  };\n\n  const isSourceEnabled = (sourceUniqueKey: string) => {\n    const configuredSource = configuredSources.find(\n      (s) => s.uniqueKey === sourceUniqueKey\n    );\n    if (!configuredSource) return false;\n    return selectedSources.some(\n      (s: SourceMetadata) => s.uniqueKey === configuredSource.uniqueKey\n    );\n  };\n\n  return {\n    sourcesInitialized,\n    enableSources,\n    enableAllSources,\n    disableAllSources,\n    toggleSource,\n    isSourceEnabled,\n  };\n}\n"
  },
  {
    "path": "web/src/lib/indexAttempt.ts",
    "content": "import { IndexAttemptSnapshot } from \"./types\";\n\nexport const getDocsProcessedPerMinute = (\n  indexAttempt: IndexAttemptSnapshot | null\n): number | null => {\n  if (\n    !indexAttempt ||\n    !indexAttempt.time_started ||\n    !indexAttempt.time_updated ||\n    indexAttempt.total_docs_indexed === 0\n  ) {\n    return null;\n  }\n\n  const timeStarted = new Date(indexAttempt.time_started);\n  const timeUpdated = new Date(indexAttempt.time_updated);\n  const timeDiff = timeUpdated.getTime() - timeStarted.getTime();\n  const seconds = timeDiff / 1000;\n  return (indexAttempt.total_docs_indexed / seconds) * 60;\n};\n"
  },
  {
    "path": "web/src/lib/languages.test.ts",
    "content": "import {\n  getCodeLanguage,\n  getDataLanguage,\n  getLanguageByMime,\n  isMarkdownFile,\n} from \"./languages\";\n\ndescribe(\"getCodeLanguage\", () => {\n  it.each([\n    [\"app.py\", \"python\"],\n    [\"index.ts\", \"typescript\"],\n    [\"main.go\", \"go\"],\n    [\"style.css\", \"css\"],\n    [\"page.html\", \"html\"],\n    [\"App.vue\", \"vue\"],\n    [\"lib.rs\", \"rust\"],\n    [\"main.cpp\", \"c++\"],\n    [\"util.c\", \"c\"],\n    [\"script.js\", \"javascript\"],\n  ])(\"%s → %s\", (filename, expected) => {\n    expect(getCodeLanguage(filename)).toBe(expected);\n  });\n\n  it.each([\n    [\".h\", \"c\"],\n    [\".inc\", \"php\"],\n    [\".m\", \"objective-c\"],\n    [\".re\", \"reason\"],\n  ])(\"override: %s → %s\", (ext, expected) => {\n    expect(getCodeLanguage(`file${ext}`)).toBe(expected);\n  });\n\n  it(\"resolves by exact filename when there is no extension\", () => {\n    expect(getCodeLanguage(\"Dockerfile\")).toBe(\"dockerfile\");\n    expect(getCodeLanguage(\"Makefile\")).toBe(\"makefile\");\n  });\n\n  it(\"is case-insensitive for filenames\", () => {\n    expect(getCodeLanguage(\"INDEX.JS\")).toBe(\"javascript\");\n    expect(getCodeLanguage(\"dockerfile\")).toBe(\"dockerfile\");\n  });\n\n  it(\"returns null for unknown extensions\", () => {\n    expect(getCodeLanguage(\"file.xyz123\")).toBeNull();\n  });\n\n  it(\"excludes markdown extensions\", () => {\n    expect(getCodeLanguage(\"README.md\")).toBeNull();\n    expect(getCodeLanguage(\"notes.markdown\")).toBeNull();\n  });\n});\n\ndescribe(\"getDataLanguage\", () => {\n  it.each([\n    [\"config.json\", \"json\"],\n    [\"config.yaml\", \"yaml\"],\n    [\"config.yml\", \"yaml\"],\n    [\"config.toml\", \"toml\"],\n    [\"data.xml\", \"xml\"],\n    [\"data.csv\", \"csv\"],\n  ])(\"%s → %s\", (filename, expected) => {\n    expect(getDataLanguage(filename)).toBe(expected);\n  });\n\n  it(\"returns null for code files\", () => {\n    expect(getDataLanguage(\"app.py\")).toBeNull();\n    expect(getDataLanguage(\"header.h\")).toBeNull();\n    expect(getDataLanguage(\"view.m\")).toBeNull();\n    expect(getDataLanguage(\"component.re\")).toBeNull();\n  });\n});\n\ndescribe(\"isMarkdownFile\", () => {\n  it(\"recognises markdown extensions\", () => {\n    expect(isMarkdownFile(\"README.md\")).toBe(true);\n    expect(isMarkdownFile(\"doc.markdown\")).toBe(true);\n  });\n\n  it(\"is case-insensitive\", () => {\n    expect(isMarkdownFile(\"NOTES.MD\")).toBe(true);\n  });\n\n  it(\"rejects non-markdown files\", () => {\n    expect(isMarkdownFile(\"app.py\")).toBe(false);\n    expect(isMarkdownFile(\"data.json\")).toBe(false);\n  });\n});\n\ndescribe(\"getLanguageByMime\", () => {\n  it(\"resolves known MIME types\", () => {\n    expect(getLanguageByMime(\"text/x-python\")).toBe(\"python\");\n    expect(getLanguageByMime(\"text/javascript\")).toBe(\"javascript\");\n  });\n\n  it(\"strips parameters before matching\", () => {\n    expect(getLanguageByMime(\"text/x-python; charset=utf-8\")).toBe(\"python\");\n  });\n\n  it(\"returns null for unknown MIME types\", () => {\n    expect(getLanguageByMime(\"application/x-unknown-thing\")).toBeNull();\n  });\n});\n"
  },
  {
    "path": "web/src/lib/languages.ts",
    "content": "import * as languages from \"linguist-languages\";\n\nconst LANGUAGE_EXT_PATTERN = /\\.[^.]+$/;\n\ninterface LinguistLanguage {\n  name: string;\n  type: string;\n  extensions?: string[];\n  filenames?: string[];\n  codemirrorMimeType?: string;\n}\n\ninterface LanguageMaps {\n  extensions: Map<string, string>;\n  filenames: Map<string, string>;\n}\n\n// Explicit winners for extensions claimed by multiple linguist-languages entries\n// where the \"most extensions\" heuristic below picks the wrong language.\nconst EXTENSION_OVERRIDES: Record<string, string> = {\n  \".h\": \"c\",\n  \".inc\": \"php\",\n  \".m\": \"objective-c\",\n  \".re\": \"reason\",\n  \".rs\": \"rust\",\n};\n\n// Sort so that languages with more extensions (i.e. more general-purpose) win\n// when multiple languages claim the same extension (e.g. Ecmarkup vs HTML both\n// claim .html — HTML should win because it's the canonical language for that\n// extension). Known mis-rankings are patched by EXTENSION_OVERRIDES above.\nconst allLanguages = (Object.values(languages) as LinguistLanguage[]).sort(\n  (a, b) => (b.extensions?.length ?? 0) - (a.extensions?.length ?? 0)\n);\n\n// Collect extensions that linguist-languages assigns to \"Markdown\" so we can\n// exclude them from the code-language map\nconst markdownExtensions = new Set(\n  allLanguages\n    .find((lang) => lang.name === \"Markdown\")\n    ?.extensions?.map((ext) => ext.toLowerCase()) ?? []\n);\n\nfunction buildLanguageMaps(\n  types: string[],\n  excludedExtensions?: Set<string>\n): LanguageMaps {\n  const typeSet = new Set(types);\n  const extensions = new Map<string, string>();\n  const filenames = new Map<string, string>();\n\n  if (typeSet.has(\"programming\") || typeSet.has(\"markup\")) {\n    for (const [ext, lang] of Object.entries(EXTENSION_OVERRIDES)) {\n      if (excludedExtensions?.has(ext.toLowerCase())) continue;\n      extensions.set(ext, lang);\n    }\n  }\n\n  for (const lang of allLanguages) {\n    if (!typeSet.has(lang.type)) continue;\n\n    const name = lang.name.toLowerCase();\n    for (const ext of lang.extensions ?? []) {\n      if (excludedExtensions?.has(ext.toLowerCase())) continue;\n      if (!extensions.has(ext)) {\n        extensions.set(ext, name);\n      }\n    }\n    for (const filename of lang.filenames ?? []) {\n      if (!filenames.has(filename.toLowerCase())) {\n        filenames.set(filename.toLowerCase(), name);\n      }\n    }\n  }\n\n  return { extensions, filenames };\n}\n\nfunction lookupLanguage(name: string, maps: LanguageMaps): string | null {\n  const lower = name.toLowerCase();\n  const ext = lower.match(LANGUAGE_EXT_PATTERN)?.[0];\n  return (ext && maps.extensions.get(ext)) ?? maps.filenames.get(lower) ?? null;\n}\n\nconst codeMaps = buildLanguageMaps(\n  [\"programming\", \"markup\"],\n  markdownExtensions\n);\nconst dataMaps = buildLanguageMaps([\"data\"]);\n\n/**\n * Returns the language name for a given file name, or null if it's not a\n * recognised code or markup file (programming + markup types from\n * linguist-languages, e.g. Python, HTML, CSS, Vue). Looks up by extension\n * first, then by exact filename (e.g. \"Dockerfile\", \"Makefile\"). Runs in O(1).\n */\nexport function getCodeLanguage(name: string): string | null {\n  return lookupLanguage(name, codeMaps);\n}\n\n/**\n * Returns the language name for a given file name if it's a recognised\n * \"data\" type in linguist-languages (e.g. JSON, YAML, TOML, XML).\n * Returns null otherwise. Runs in O(1).\n */\nexport function getDataLanguage(name: string): string | null {\n  return lookupLanguage(name, dataMaps);\n}\n\n/**\n * Returns true if the file name has a Markdown extension (as defined by\n * linguist-languages) and should be rendered as rich text rather than code.\n */\nexport function isMarkdownFile(name: string): boolean {\n  const ext = name.toLowerCase().match(LANGUAGE_EXT_PATTERN)?.[0];\n  return !!ext && markdownExtensions.has(ext);\n}\n\nconst mimeToLanguage = new Map<string, string>();\nfor (const lang of allLanguages) {\n  if (lang.codemirrorMimeType && !mimeToLanguage.has(lang.codemirrorMimeType)) {\n    mimeToLanguage.set(lang.codemirrorMimeType, lang.name.toLowerCase());\n  }\n}\n\n/**\n * Returns the language name for a given MIME type using the codemirrorMimeType\n * field from linguist-languages (~297 entries). Returns null if unrecognised.\n */\nexport function getLanguageByMime(mime: string): string | null {\n  const base = mime.split(\";\")[0];\n  if (!base) return null;\n  return mimeToLanguage.get(base.trim().toLowerCase()) ?? null;\n}\n"
  },
  {
    "path": "web/src/lib/llmConfig/cache.ts",
    "content": "import { ScopedMutator } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nconst PERSONA_PROVIDER_ENDPOINT_PATTERN =\n  /^\\/api\\/llm\\/persona\\/\\d+\\/providers$/;\n\nexport async function refreshLlmProviderCaches(\n  mutate: ScopedMutator\n): Promise<void> {\n  await Promise.all([\n    mutate(SWR_KEYS.adminLlmProviders),\n    mutate(SWR_KEYS.llmProviders),\n    mutate(\n      (key) =>\n        typeof key === \"string\" && PERSONA_PROVIDER_ENDPOINT_PATTERN.test(key)\n    ),\n  ]);\n}\n"
  },
  {
    "path": "web/src/lib/llmConfig/constants.ts",
    "content": "export const LLM_ADMIN_URL = \"/api/admin/llm\";\nexport const LLM_PROVIDERS_ADMIN_URL = `${LLM_ADMIN_URL}/provider`;\nexport const LLM_CHAT_PROVIDERS_URL = \"/api/llm/provider\";\n\nexport const LLM_CONTEXTUAL_COST_ADMIN_URL =\n  \"/api/admin/llm/provider-contextual-cost\";\n\nexport const EMBEDDING_PROVIDERS_ADMIN_URL =\n  \"/api/admin/embedding/embedding-provider\";\n\nexport const EMBEDDING_MODELS_ADMIN_URL = \"/api/admin/embedding\";\n"
  },
  {
    "path": "web/src/lib/llmConfig/providers.ts",
    "content": "import type { IconFunctionComponent } from \"@opal/types\";\nimport {\n  SvgBifrost,\n  SvgCpu,\n  SvgOpenai,\n  SvgClaude,\n  SvgOllama,\n  SvgAws,\n  SvgOpenrouter,\n  SvgServer,\n  SvgAzure,\n  SvgGemini,\n  SvgLitellm,\n  SvgLmStudio,\n} from \"@opal/icons\";\nimport { LLMProviderName } from \"@/interfaces/llm\";\n\nconst PROVIDER_ICONS: Record<string, IconFunctionComponent> = {\n  [LLMProviderName.OPENAI]: SvgOpenai,\n  [LLMProviderName.ANTHROPIC]: SvgClaude,\n  [LLMProviderName.VERTEX_AI]: SvgGemini,\n  [LLMProviderName.BEDROCK]: SvgAws,\n  [LLMProviderName.AZURE]: SvgAzure,\n  [LLMProviderName.LITELLM]: SvgLitellm,\n  [LLMProviderName.LITELLM_PROXY]: SvgLitellm,\n  [LLMProviderName.OLLAMA_CHAT]: SvgOllama,\n  [LLMProviderName.OPENROUTER]: SvgOpenrouter,\n  [LLMProviderName.LM_STUDIO]: SvgLmStudio,\n  [LLMProviderName.BIFROST]: SvgBifrost,\n\n  // fallback\n  [LLMProviderName.CUSTOM]: SvgServer,\n};\n\nconst PROVIDER_PRODUCT_NAMES: Record<string, string> = {\n  [LLMProviderName.OPENAI]: \"GPT\",\n  [LLMProviderName.ANTHROPIC]: \"Claude\",\n  [LLMProviderName.VERTEX_AI]: \"Gemini\",\n  [LLMProviderName.BEDROCK]: \"Amazon Bedrock\",\n  [LLMProviderName.AZURE]: \"Azure OpenAI\",\n  [LLMProviderName.LITELLM]: \"LiteLLM\",\n  [LLMProviderName.LITELLM_PROXY]: \"LiteLLM Proxy\",\n  [LLMProviderName.OLLAMA_CHAT]: \"Ollama\",\n  [LLMProviderName.OPENROUTER]: \"OpenRouter\",\n  [LLMProviderName.LM_STUDIO]: \"LM Studio\",\n  [LLMProviderName.BIFROST]: \"Bifrost\",\n\n  // fallback\n  [LLMProviderName.CUSTOM]: \"Custom Models\",\n};\n\nconst PROVIDER_DISPLAY_NAMES: Record<string, string> = {\n  [LLMProviderName.OPENAI]: \"OpenAI\",\n  [LLMProviderName.ANTHROPIC]: \"Anthropic\",\n  [LLMProviderName.VERTEX_AI]: \"Google Cloud Vertex AI\",\n  [LLMProviderName.BEDROCK]: \"AWS\",\n  [LLMProviderName.AZURE]: \"Microsoft Azure\",\n  [LLMProviderName.LITELLM]: \"LiteLLM\",\n  [LLMProviderName.LITELLM_PROXY]: \"LiteLLM Proxy\",\n  [LLMProviderName.OLLAMA_CHAT]: \"Ollama\",\n  [LLMProviderName.OPENROUTER]: \"OpenRouter\",\n  [LLMProviderName.LM_STUDIO]: \"LM Studio\",\n  [LLMProviderName.BIFROST]: \"Bifrost\",\n\n  // fallback\n  [LLMProviderName.CUSTOM]: \"Other providers or self-hosted\",\n};\n\nexport function getProviderProductName(providerName: string): string {\n  return PROVIDER_PRODUCT_NAMES[providerName] ?? providerName;\n}\n\nexport function getProviderDisplayName(providerName: string): string {\n  return PROVIDER_DISPLAY_NAMES[providerName] ?? providerName;\n}\n\nexport function getProviderIcon(providerName: string): IconFunctionComponent {\n  return PROVIDER_ICONS[providerName] ?? SvgCpu;\n}\n"
  },
  {
    "path": "web/src/lib/llmConfig/svc.ts",
    "content": "/**\n * LLM action functions for mutations.\n *\n * These are async functions for one-off actions that don't need SWR caching.\n *\n * Endpoints:\n * - /api/admin/llm/test/default - Test the default LLM provider connection\n * - /api/admin/llm/default - Set the default LLM model\n * - /api/admin/llm/provider/{id} - Delete an LLM provider\n */\n\nimport {\n  LLM_ADMIN_URL,\n  LLM_PROVIDERS_ADMIN_URL,\n} from \"@/lib/llmConfig/constants\";\n\n/**\n * Test the default LLM provider.\n * Returns true if the default provider is configured and working, false otherwise.\n */\nexport async function testDefaultProvider(): Promise<boolean> {\n  try {\n    const response = await fetch(`${LLM_ADMIN_URL}/test/default`, {\n      method: \"POST\",\n    });\n    return response?.ok || false;\n  } catch {\n    return false;\n  }\n}\n\n/**\n * Set the default LLM model.\n * @param providerId - The provider ID\n * @param modelName - The model name within that provider\n * @throws Error with the detail message from the API on failure\n */\nexport async function setDefaultLlmModel(\n  providerId: number,\n  modelName: string\n): Promise<void> {\n  const response = await fetch(`${LLM_ADMIN_URL}/default`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      provider_id: providerId,\n      model_name: modelName,\n    }),\n  });\n\n  if (!response.ok) {\n    const errorMsg = (await response.json()).detail;\n    throw new Error(errorMsg);\n  }\n}\n\n/**\n * Delete an LLM provider.\n * @param providerId - The provider ID to delete\n * @throws Error with the detail message from the API on failure\n */\nexport async function deleteLlmProvider(providerId: number): Promise<void> {\n  const response = await fetch(`${LLM_PROVIDERS_ADMIN_URL}/${providerId}`, {\n    method: \"DELETE\",\n  });\n\n  if (!response.ok) {\n    const errorMsg = (await response.json()).detail;\n    throw new Error(errorMsg);\n  }\n}\n"
  },
  {
    "path": "web/src/lib/llmConfig/utils.ts",
    "content": "import { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport {\n  DefaultModel,\n  LLMProviderDescriptor,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport { LlmDescriptor } from \"@/lib/hooks\";\n\nexport function getFinalLLM(\n  llmProviders: LLMProviderDescriptor[],\n  persona: MinimalPersonaSnapshot | null,\n  currentLlm: LlmDescriptor | null,\n  defaultText?: DefaultModel | null\n): [string, string] {\n  const defaultProvider = defaultText\n    ? llmProviders.find((p) => p.id === defaultText.provider_id)\n    : llmProviders.find((p) =>\n        p.model_configurations.some((m) => m.is_visible)\n      );\n\n  let provider = defaultProvider?.provider || \"\";\n  let model =\n    defaultText?.model_name ||\n    defaultProvider?.model_configurations.find((m) => m.is_visible)?.name ||\n    \"\";\n\n  if (persona) {\n    // Map \"provider override\" to actual LLLMProvider\n    if (persona.llm_model_provider_override) {\n      const underlyingProvider = llmProviders.find(\n        (item: LLMProviderDescriptor) =>\n          item.name === persona.llm_model_provider_override\n      );\n      provider = underlyingProvider?.provider || provider;\n    }\n    model = persona.llm_model_version_override || model;\n  }\n\n  if (currentLlm) {\n    provider = currentLlm.provider || provider;\n    model = currentLlm.modelName || model;\n  }\n\n  return [provider, model];\n}\n\nexport function getLLMProviderOverrideForPersona(\n  liveAgent: MinimalPersonaSnapshot,\n  llmProviders: LLMProviderDescriptor[]\n): LlmDescriptor | null {\n  const overrideProvider = liveAgent.llm_model_provider_override;\n  const overrideModel = liveAgent.llm_model_version_override;\n\n  if (!overrideModel) {\n    return null;\n  }\n\n  const matchingProvider = llmProviders.find(\n    (provider) =>\n      (overrideProvider ? provider.name === overrideProvider : true) &&\n      provider.model_configurations\n        .map((modelConfiguration) => modelConfiguration.name)\n        .includes(overrideModel)\n  );\n\n  if (matchingProvider) {\n    return {\n      name: matchingProvider.name,\n      provider: matchingProvider.provider,\n      modelName: overrideModel,\n    };\n  }\n\n  return null;\n}\n\nexport const structureValue = (\n  name: string,\n  provider: string,\n  modelName: string\n) => {\n  return `${name}__${provider}__${modelName}`;\n};\n\nexport const parseLlmDescriptor = (value: string): LlmDescriptor => {\n  const [displayName, provider, modelName] = value.split(\"__\");\n  if (displayName === undefined) {\n    return { name: \"Unknown\", provider: \"\", modelName: \"\" };\n  }\n\n  return {\n    name: displayName,\n    provider: provider ?? \"\",\n    modelName: modelName ?? \"\",\n  };\n};\n\nexport const findModelInModelConfigurations = (\n  modelConfigurations: ModelConfiguration[],\n  modelName: string\n): ModelConfiguration | null => {\n  return modelConfigurations.find((m) => m.name === modelName) || null;\n};\n\nexport const findModelConfiguration = (\n  llmProviders: LLMProviderDescriptor[],\n  modelName: string,\n  providerName: string | null = null\n): ModelConfiguration | null => {\n  if (providerName) {\n    const provider = llmProviders.find((p) => p.name === providerName);\n    return provider\n      ? findModelInModelConfigurations(provider.model_configurations, modelName)\n      : null;\n  }\n\n  for (const provider of llmProviders) {\n    const modelConfiguration = findModelInModelConfigurations(\n      provider.model_configurations,\n      modelName\n    );\n    if (modelConfiguration) {\n      return modelConfiguration;\n    }\n  }\n\n  return null;\n};\n\nexport const modelSupportsImageInput = (\n  llmProviders: LLMProviderDescriptor[],\n  modelName: string,\n  providerName: string | null = null\n): boolean => {\n  const modelConfiguration = findModelConfiguration(\n    llmProviders,\n    modelName,\n    providerName\n  );\n  return modelConfiguration?.supports_image_input || false;\n};\n\nexport function getDisplayName(\n  agent: MinimalPersonaSnapshot,\n  llmProviders: LLMProviderDescriptor[]\n): string | undefined {\n  const llmDescriptor = getLLMProviderOverrideForPersona(\n    agent,\n    llmProviders ?? []\n  );\n  const llmProvider = llmProviders?.find(\n    (llmProvider) => llmProvider.name === agent.llm_model_provider_override\n  );\n  const modelConfig = llmProvider?.model_configurations.find(\n    (modelConfig) => modelConfig.name === llmDescriptor?.modelName\n  );\n  return modelConfig?.display_name;\n}\n"
  },
  {
    "path": "web/src/lib/llmConfig/visionLLM.ts",
    "content": "import { LLMProviderResponse, VisionProvider } from \"@/interfaces/llm\";\nimport { LLM_ADMIN_URL } from \"@/lib/llmConfig/constants\";\n\nexport async function fetchVisionProviders(): Promise<VisionProvider[]> {\n  const response = await fetch(`${LLM_ADMIN_URL}/vision-providers`, {\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n  if (!response.ok) {\n    throw new Error(\n      `Failed to fetch vision providers: ${await response.text()}`\n    );\n  }\n  const data = (await response.json()) as LLMProviderResponse<VisionProvider>;\n  return data.providers;\n}\n\nexport async function setDefaultVisionProvider(\n  providerId: number,\n  visionModel: string\n): Promise<void> {\n  const response = await fetch(`${LLM_ADMIN_URL}/default-vision`, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      provider_id: providerId,\n      model_name: visionModel,\n    }),\n  });\n\n  if (!response.ok) {\n    const errorMsg = await response.text();\n    throw new Error(errorMsg);\n  }\n}\n"
  },
  {
    "path": "web/src/lib/oauth/api.ts",
    "content": "import {\n  OAuthConfig,\n  OAuthConfigCreate,\n  OAuthConfigUpdate,\n  OAuthTokenStatus,\n} from \"@/lib/tools/interfaces\";\n\n// Admin OAuth Config Management\n\nexport async function createOAuthConfig(\n  config: OAuthConfigCreate\n): Promise<OAuthConfig> {\n  const response = await fetch(\"/api/admin/oauth-config/create\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(config),\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail ||\n        `Failed to create OAuth config: ${response.statusText}`\n    );\n  }\n\n  return await response.json();\n}\n\nexport async function getOAuthConfigs(): Promise<OAuthConfig[]> {\n  const response = await fetch(\"/api/admin/oauth-config\");\n\n  if (!response.ok) {\n    throw new Error(`Failed to fetch OAuth configs: ${response.statusText}`);\n  }\n\n  return await response.json();\n}\n\nexport async function getOAuthConfig(id: number): Promise<OAuthConfig> {\n  const response = await fetch(`/api/admin/oauth-config/${id}`);\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to fetch OAuth config: ${response.statusText}`\n    );\n  }\n\n  return await response.json();\n}\n\nexport async function updateOAuthConfig(\n  id: number,\n  updates: OAuthConfigUpdate\n): Promise<OAuthConfig> {\n  const response = await fetch(`/api/admin/oauth-config/${id}`, {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(updates),\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail ||\n        `Failed to update OAuth config: ${response.statusText}`\n    );\n  }\n\n  return await response.json();\n}\n\nexport async function deleteOAuthConfig(id: number): Promise<void> {\n  const response = await fetch(`/api/admin/oauth-config/${id}`, {\n    method: \"DELETE\",\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail ||\n        `Failed to delete OAuth config: ${response.statusText}`\n    );\n  }\n}\n\n// User OAuth Flow\n\nexport async function initiateOAuthFlow(\n  oauthConfigId: number,\n  returnPath: string = \"/app\"\n): Promise<void> {\n  const response = await fetch(\"/api/oauth-config/initiate\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      oauth_config_id: oauthConfigId,\n      return_path: returnPath,\n    }),\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail ||\n        `Failed to initiate OAuth flow: ${response.statusText}`\n    );\n  }\n\n  const data = await response.json();\n  // Redirect to authorization URL\n  window.location.href = data.authorization_url;\n}\n\nexport async function handleOAuthCallback(\n  code: string,\n  state: string,\n  oauthConfigId: number\n): Promise<{ success: boolean; redirect_url: string; error?: string }> {\n  const response = await fetch(\"/api/oauth-config/callback\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      code,\n      state,\n      oauth_config_id: oauthConfigId,\n    }),\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `OAuth callback failed: ${response.statusText}`\n    );\n  }\n\n  return await response.json();\n}\n\nexport async function getUserOAuthTokenStatus(): Promise<OAuthTokenStatus[]> {\n  const response = await fetch(\"/api/user-oauth-token/status\");\n\n  if (!response.ok) {\n    throw new Error(\n      `Failed to fetch OAuth token status: ${response.statusText}`\n    );\n  }\n\n  return await response.json();\n}\n\nexport async function revokeOAuthToken(oauthConfigId: number): Promise<void> {\n  const response = await fetch(`/api/oauth-config/${oauthConfigId}/token`, {\n    method: \"DELETE\",\n  });\n\n  if (!response.ok) {\n    const errorData = await response.json().catch(() => ({}));\n    throw new Error(\n      errorData.detail || `Failed to revoke OAuth token: ${response.statusText}`\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/lib/oauth_utils.ts",
    "content": "import {\n  OAuthBaseCallbackResponse,\n  OAuthConfluenceFinalizeResponse,\n  OAuthConfluencePrepareFinalizationResponse,\n  OAuthPrepareAuthorizationResponse,\n  OAuthSlackCallbackResponse,\n} from \"./types\";\n\n// server side handler to help initiate the oauth authorization request\nexport async function prepareOAuthAuthorizationRequest(\n  connector: string,\n  finalRedirect: string | null // a redirect (not the oauth redirect) for the user to return to after oauth is complete)\n): Promise<OAuthPrepareAuthorizationResponse> {\n  let url = `/api/oauth/prepare-authorization-request?connector=${encodeURIComponent(\n    connector\n  )}`;\n\n  // Conditionally append the `redirect_on_success` parameter\n  if (finalRedirect) {\n    url += `&redirect_on_success=${encodeURIComponent(finalRedirect)}`;\n  }\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      connector: connector,\n      redirect_on_success: finalRedirect,\n    }),\n  });\n\n  if (!response.ok) {\n    throw new Error(\n      `Failed to prepare OAuth authorization request: ${response.status}`\n    );\n  }\n\n  // Parse the JSON response\n  const data = (await response.json()) as OAuthPrepareAuthorizationResponse;\n  return data;\n}\n\nexport async function handleOAuthAuthorizationResponse(\n  connector: string,\n  code: string,\n  state: string\n) {\n  if (connector === \"slack\") {\n    return handleOAuthSlackAuthorizationResponse(code, state);\n  }\n\n  if (connector === \"google-drive\") {\n    return handleOAuthGoogleDriveAuthorizationResponse(code, state);\n  }\n\n  if (connector === \"confluence\") {\n    return handleOAuthConfluenceAuthorizationResponse(code, state);\n  }\n\n  return;\n}\n\n// Handler for federated connector OAuth callbacks\nexport async function handleFederatedOAuthCallback(\n  federatedConnectorId: string,\n  code: string,\n  state: string\n): Promise<OAuthBaseCallbackResponse> {\n  // Use the generic callback endpoint - the connector ID will be extracted from the state parameter\n  const url = `/api/federated/callback?code=${encodeURIComponent(\n    code\n  )}&state=${encodeURIComponent(state)}`;\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle federated OAuth callback: ${response.status}`;\n\n    try {\n      const responseBody = await response.text();\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response and extract the data field\n  const result = await response.json();\n\n  if (!result.success) {\n    throw new Error(result.message || \"OAuth callback failed\");\n  }\n\n  return {\n    success: true,\n    message: result.message || \"OAuth authorization successful\",\n    redirect_on_success: `/admin/federated/${federatedConnectorId}`,\n    finalize_url: null,\n  };\n}\n\n// server side handler to process the oauth redirect callback\n// https://api.slack.com/authentication/oauth-v2#exchanging\nexport async function handleOAuthSlackAuthorizationResponse(\n  code: string,\n  state: string\n): Promise<OAuthSlackCallbackResponse> {\n  const url = `/api/oauth/connector/slack/callback?code=${encodeURIComponent(\n    code\n  )}&state=${encodeURIComponent(state)}`;\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ code, state }),\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle OAuth Slack authorization response: ${response.status}`;\n\n    try {\n      const responseBody = await response.text(); // Read the body as text\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response\n  const data = (await response.json()) as OAuthSlackCallbackResponse;\n  return data;\n}\n\nexport async function handleOAuthGoogleDriveAuthorizationResponse(\n  code: string,\n  state: string\n): Promise<OAuthBaseCallbackResponse> {\n  const url = `/api/oauth/connector/google-drive/callback?code=${encodeURIComponent(\n    code\n  )}&state=${encodeURIComponent(state)}`;\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ code, state }),\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle OAuth Google Drive authorization response: ${response.status}`;\n\n    try {\n      const responseBody = await response.text(); // Read the body as text\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response\n  const data = (await response.json()) as OAuthBaseCallbackResponse;\n  return data;\n}\n\n// call server side helper\n// https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps\nexport async function handleOAuthConfluenceAuthorizationResponse(\n  code: string,\n  state: string\n): Promise<OAuthBaseCallbackResponse> {\n  const url = `/api/oauth/connector/confluence/callback?code=${encodeURIComponent(\n    code\n  )}&state=${encodeURIComponent(state)}`;\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ code, state }),\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle OAuth Confluence authorization response: ${response.status}`;\n\n    try {\n      const responseBody = await response.text(); // Read the body as text\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response\n  const data = (await response.json()) as OAuthBaseCallbackResponse;\n  return data;\n}\n\nexport async function handleOAuthPrepareFinalization(\n  connector: string,\n  credential: number\n) {\n  if (connector === \"confluence\") {\n    return handleOAuthConfluencePrepareFinalization(credential);\n  }\n\n  return;\n}\n\n// call server side helper\n// https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps\nexport async function handleOAuthConfluencePrepareFinalization(\n  credential: number\n): Promise<OAuthConfluencePrepareFinalizationResponse> {\n  const url = `/api/oauth/connector/confluence/accessible-resources?credential_id=${encodeURIComponent(\n    credential\n  )}`;\n\n  const response = await fetch(url, {\n    method: \"GET\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle OAuth Confluence prepare finalization response: ${response.status}`;\n\n    try {\n      const responseBody = await response.text(); // Read the body as text\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response\n  const data =\n    (await response.json()) as OAuthConfluencePrepareFinalizationResponse;\n  return data;\n}\n\nexport async function handleOAuthConfluenceFinalize(\n  credential_id: number,\n  cloud_id: string,\n  cloud_name: string,\n  cloud_url: string\n): Promise<OAuthConfluenceFinalizeResponse> {\n  const url = `/api/oauth/connector/confluence/finalize?credential_id=${encodeURIComponent(\n    credential_id\n  )}&cloud_id=${encodeURIComponent(cloud_id)}&cloud_name=${encodeURIComponent(\n    cloud_name\n  )}&cloud_url=${encodeURIComponent(cloud_url)}`;\n\n  const response = await fetch(url, {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n  });\n\n  if (!response.ok) {\n    let errorDetails = `Failed to handle OAuth Confluence finalization response: ${response.status}`;\n\n    try {\n      const responseBody = await response.text(); // Read the body as text\n      errorDetails += `\\nResponse Body: ${responseBody}`;\n    } catch (err) {\n      if (err instanceof Error) {\n        errorDetails += `\\nUnable to read response body: ${err.message}`;\n      } else {\n        errorDetails += `\\nUnable to read response body: Unknown error type`;\n      }\n    }\n\n    throw new Error(errorDetails);\n  }\n\n  // Parse the JSON response\n  const data = (await response.json()) as OAuthConfluenceFinalizeResponse;\n  return data;\n}\n"
  },
  {
    "path": "web/src/lib/redirectSS.ts",
    "content": "import { NextRequest } from \"next/server\";\n\nexport const getDomain = (request: NextRequest) => {\n  // Use the WEB_DOMAIN env variable if set (required in production).\n  // Never trust X-Forwarded-* headers from the request — they can be\n  // spoofed by an attacker to poison redirect URLs (host header poisoning).\n  if (process.env.WEB_DOMAIN) {\n    return process.env.WEB_DOMAIN;\n  }\n\n  // Fallback for local development: use Next.js's own origin.\n  return request.nextUrl.origin;\n};\n"
  },
  {
    "path": "web/src/lib/search/interfaces.ts",
    "content": "import { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\nimport { Tag, ValidSources } from \"../types\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\n\nexport const FlowType = {\n  SEARCH: \"search\",\n  QUESTION_ANSWER: \"question-answer\",\n};\nexport type FlowType = (typeof FlowType)[keyof typeof FlowType];\nexport const SearchType = {\n  SEMANTIC: \"semantic\",\n  KEYWORD: \"keyword\",\n  AUTOMATIC: \"automatic\",\n  INTERNET: \"internet\",\n};\nexport type SearchType = (typeof SearchType)[keyof typeof SearchType];\n\nexport interface ToolResponse {\n  id?: string | null;\n  response?: any;\n}\nexport interface ExtendedToolResponse extends ToolResponse {\n  level: number;\n  level_question_num: number;\n}\n\nexport enum StreamStopReason {\n  CONTEXT_LENGTH = \"CONTEXT_LENGTH\",\n  CANCELLED = \"CANCELLED\",\n}\n\nexport interface StreamStopInfo {\n  stop_reason: StreamStopReason;\n  level?: number;\n  level_question_num?: number;\n  stream_type?: \"sub_answer\" | \"sub_questions\" | \"main_answer\";\n}\n\nexport interface ErrorMessagePacket {\n  error: string;\n}\n\nexport interface Quote {\n  quote: string;\n  document_id: string;\n  link: string | null;\n  source_type: ValidSources;\n  blurb: string;\n  semantic_identifier: string;\n}\n\nexport interface QuotesInfoPacket {\n  quotes: Quote[];\n}\nexport interface MinimalOnyxDocument {\n  document_id: string;\n  semantic_identifier: string | null;\n}\n\nexport interface OnyxDocument extends MinimalOnyxDocument {\n  link: string;\n  source_type: ValidSources;\n  blurb: string;\n  boost: number;\n  hidden: boolean;\n  score: number;\n  chunk_ind: number;\n  match_highlights: string[];\n  metadata: { [key: string]: string };\n  updated_at: string | null;\n  db_doc_id?: number;\n  is_internet: boolean;\n  validationState?: null | \"good\" | \"bad\";\n}\n\nexport interface LoadedOnyxDocument extends OnyxDocument {\n  icon: React.FC<{ size?: number; className?: string }>;\n}\n\nexport interface SearchOnyxDocument extends OnyxDocument {\n  is_relevant: boolean;\n  relevance_explanation: string;\n}\n\nexport interface FilteredOnyxDocument extends OnyxDocument {\n  included: boolean;\n}\nexport interface DocumentInfoPacket {\n  top_documents: OnyxDocument[];\n  predicted_flow: FlowType | null;\n  predicted_search: SearchType | null;\n  time_cutoff: string | null;\n  favor_recent: boolean;\n}\n\nexport interface DocumentRelevance {\n  relevant: boolean;\n  content: string;\n}\n\nexport interface Relevance {\n  [url: string]: DocumentRelevance;\n}\n\nexport interface RelevanceChunk {\n  relevance_summaries: Relevance;\n}\n\nexport interface SearchResponse {\n  suggestedSearchType: SearchType | null;\n  suggestedFlowType: FlowType | null;\n  answer: string | null;\n  quotes: Quote[] | null;\n  documents: SearchOnyxDocument[] | null;\n  selectedDocIndices: number[] | null;\n  error: string | null;\n  messageId: number | null;\n  additional_relevance?: Relevance;\n}\n\nexport enum SourceCategory {\n  Wiki = \"Knowledge Base & Wikis\",\n  Storage = \"Cloud Storage\",\n  TicketingAndTaskManagement = \"Ticketing & Task Management\",\n  Messaging = \"Messaging\",\n  Sales = \"Sales\",\n  CodeRepository = \"Code Repository\",\n  Other = \"Others\",\n}\n\nexport interface SourceMetadata {\n  icon: React.FC<{ size?: number; className?: string }>;\n  displayName: string;\n  category: SourceCategory;\n  shortDescription?: string;\n  internalName: ValidSources;\n  adminUrl: string;\n  isPopular?: boolean;\n  oauthSupported?: boolean;\n  federated?: boolean;\n  federatedTooltip?: string;\n  uniqueKey?: string;\n  // For federated connectors, this stores the base source type for the icon\n  baseSourceType?: ValidSources;\n  // For connectors that are always available (don't need connection setup)\n  // e.g., User Library (CraftFile) where users just upload files\n  alwaysConnected?: boolean;\n  // Custom description to show instead of status (e.g., \"Manage your uploaded files\")\n  customDescription?: string;\n}\n\nexport interface SearchDefaultOverrides {\n  forceDisplayQA: boolean;\n  offset: number;\n}\n\nexport interface Filters {\n  source_type: string[] | null;\n  document_set: string[] | null;\n  time_cutoff: Date | null;\n}\n\nexport interface SearchRequestArgs {\n  query: string;\n  agentic?: boolean;\n  sources: SourceMetadata[];\n  documentSets: string[];\n  timeRange: DateRangePickerValue | null;\n  tags: Tag[];\n  persona: Persona;\n  updateDocumentRelevance: (relevance: any) => void;\n  updateCurrentAnswer: (val: string) => void;\n  updateQuotes: (quotes: Quote[]) => void;\n  updateDocs: (documents: OnyxDocument[]) => void;\n  updateSelectedDocIndices: (docIndices: number[]) => void;\n  updateSuggestedSearchType: (searchType: SearchType) => void;\n  updateSuggestedFlowType: (flowType: FlowType) => void;\n  updateError: (error: string) => void;\n  updateMessageAndThreadId: (\n    messageId: number,\n    chat_session_id: string\n  ) => void;\n  finishedSearching: () => void;\n  updateComments: (comments: any) => void;\n  selectedSearchType: SearchType | null;\n}\n\nexport interface SearchRequestOverrides {\n  searchType?: SearchType;\n  offset?: number;\n  overrideMessage?: string;\n  agentic?: boolean;\n}\n\nexport interface ValidQuestionResponse {\n  reasoning: string | null;\n  error: string | null;\n}\n\n// ============================================================================\n// Classification API\n// ============================================================================\n\n/**\n * Request to classify a query as search or chat flow\n * POST /api/search/search-flow-classification\n */\nexport interface SearchFlowClassificationRequest {\n  user_query: string;\n}\n\n/**\n * Response from query classification\n */\nexport interface SearchFlowClassificationResponse {\n  is_search_flow: boolean;\n}\n\n// ============================================================================\n// Search API (Unified Search + Chat)\n// ============================================================================\n\n/**\n * Base filters for search queries\n * Matches backend/onyx/context/search/models.py BaseFilters\n */\nexport interface BaseFilters {\n  source_type?: ValidSources[] | null;\n  document_set?: string[] | null;\n  time_cutoff?: string | null; // ISO date string\n  tags?: Array<{ tag_key: string; tag_value: string }> | null;\n}\n\n/**\n * Request to perform a document search\n * POST /api/search/send-search-message\n */\nexport interface SendSearchQueryRequest {\n  search_query: string;\n  filters?: BaseFilters | null;\n  num_docs_fed_to_llm_selection?: number | null;\n  run_query_expansion?: boolean;\n  num_hits?: number; // default 30\n  include_content?: boolean;\n  stream?: boolean;\n}\n\n/**\n * Search document with optional content\n * Matches backend SearchDocWithContent\n */\nexport interface SearchDocWithContent {\n  document_id: string;\n  chunk_ind: number;\n  semantic_identifier: string;\n  link: string | null;\n  blurb: string;\n  source_type: ValidSources;\n  boost: number;\n  hidden: boolean;\n  metadata: Record<string, string | string[]>;\n  score: number | null;\n  is_relevant?: boolean | null;\n  relevance_explanation?: string | null;\n  match_highlights: string[];\n  updated_at: string | null; // ISO date string\n  primary_owners?: string[] | null;\n  secondary_owners?: string[] | null;\n  is_internet: boolean;\n  content?: string | null;\n}\n\n/**\n * Full response from a search query (non-streaming)\n */\nexport interface SearchFullResponse {\n  all_executed_queries: string[];\n  search_docs: SearchDocWithContent[];\n  doc_selection_reasoning?: string | null;\n  llm_selected_doc_ids?: string[] | null;\n  error?: string | null;\n}\n\n// ============================================================================\n// Search History API\n// ============================================================================\n\n/**\n * Single search query in history\n */\nexport interface SearchQueryResponse {\n  query: string;\n  query_expansions: string[] | null;\n  created_at: string; // ISO date string\n}\n\n/**\n * Response from search history endpoint\n * GET /api/search/search-history\n */\nexport interface SearchHistoryResponse {\n  search_queries: SearchQueryResponse[];\n}\n\n// ============================================================================\n// Streaming Packets (for stream=true)\n// ============================================================================\n\nexport interface SearchDocsPacket {\n  type: \"search_docs\";\n  search_docs: SearchDocWithContent[];\n}\n\nexport interface SearchErrorPacket {\n  type: \"search_error\";\n  error: string;\n}\n\nexport interface LLMSelectedDocsPacket {\n  type: \"llm_selected_docs\";\n  llm_selected_doc_ids: string[] | null;\n}\n\nexport interface QueryExpansionsPacket {\n  type: \"query_expansions\";\n  executed_queries: string[];\n}\n\nexport interface DocSelectionReasoningPacket {\n  type: \"doc_selection_reasoning\";\n  reasoning: string;\n}\n\nexport type SearchStreamPacket =\n  | SearchDocsPacket\n  | SearchErrorPacket\n  | LLMSelectedDocsPacket\n  | QueryExpansionsPacket\n  | DocSelectionReasoningPacket;\n"
  },
  {
    "path": "web/src/lib/search/streamingUtils.ts",
    "content": "import { PacketType } from \"@/app/app/services/lib\";\n\nexport async function* handleSSEStream<T extends PacketType>(\n  streamingResponse: Response,\n  signal?: AbortSignal\n): AsyncGenerator<T, void, unknown> {\n  const reader = streamingResponse.body?.getReader();\n  const decoder = new TextDecoder();\n  let buffer = \"\";\n  if (signal) {\n    signal.addEventListener(\"abort\", () => {\n      console.log(\"aborting\");\n      reader?.cancel();\n    });\n  }\n  while (true) {\n    const rawChunk = await reader?.read();\n    if (!rawChunk) {\n      throw new Error(\"Unable to process chunk\");\n    }\n    const { done, value } = rawChunk;\n    if (done) {\n      break;\n    }\n\n    buffer += decoder.decode(value, { stream: true });\n    const lines = buffer.split(\"\\n\");\n    buffer = lines.pop() || \"\";\n\n    for (const line of lines) {\n      if (line.trim() === \"\") continue;\n\n      try {\n        const data = JSON.parse(line) as T;\n        yield data;\n      } catch (error) {\n        console.error(\"Error parsing SSE data:\", error);\n\n        // Detect JSON objects (ie. check if parseable json has been accumulated)\n        const jsonObjects = line.match(/\\{[^{}]*\\}/g);\n        if (jsonObjects) {\n          for (const jsonObj of jsonObjects) {\n            try {\n              const data = JSON.parse(jsonObj) as T;\n              yield data;\n            } catch (innerError) {\n              console.error(\"Error parsing extracted JSON:\", innerError);\n            }\n          }\n        }\n      }\n    }\n  }\n\n  // Process any remaining data in the buffer\n  if (buffer.trim() !== \"\") {\n    try {\n      const data = JSON.parse(buffer) as T;\n      yield data;\n    } catch (error) {\n      console.error(\"Error parsing remaining buffer:\", error);\n    }\n  }\n}\n"
  },
  {
    "path": "web/src/lib/search/utils.ts",
    "content": "import { Tag, ValidSources } from \"../types\";\nimport {\n  Filters,\n  MinimalOnyxDocument,\n  OnyxDocument,\n  SourceMetadata,\n} from \"./interfaces\";\nimport { DateRangePickerValue } from \"@/components/dateRangeSelectors/AdminDateRangeSelector\";\n\nexport const buildFilters = (\n  sources: SourceMetadata[],\n  documentSets: string[],\n  timeRange: DateRangePickerValue | null,\n  tags: Tag[]\n): Filters => {\n  const filters = {\n    source_type:\n      sources.length > 0 ? sources.map((source) => source.internalName) : null,\n    document_set: documentSets.length > 0 ? documentSets : null,\n    time_cutoff: timeRange?.from ? timeRange.from : null,\n    tags: tags,\n  };\n\n  return filters;\n};\n\n// If we have a link, open it in a new tab (including if it's a file)\n// If above fails and we have a file, update the presenting document\nexport const openDocument = (\n  document: OnyxDocument,\n  updatePresentingDocument?: (document: MinimalOnyxDocument) => void\n) => {\n  if (document.link) {\n    window.open(document.link, \"_blank\");\n  } else if (document.source_type === ValidSources.File) {\n    updatePresentingDocument?.(document);\n  }\n};\n"
  },
  {
    "path": "web/src/lib/search/utilsSS.ts",
    "content": "import { DocumentSetSummary } from \"../types\";\nimport { fetchSS } from \"../utilsSS\";\nimport { Connector } from \"../connectors/connectors\";\n\nexport async function fetchValidFilterInfo() {\n  const [connectorsResponse, documentSetResponse] = await Promise.all([\n    fetchSS(\"/manage/connector\"),\n    fetchSS(\"/manage/document-set\"),\n  ]);\n\n  let connectors = [] as Connector<any>[];\n  if (connectorsResponse.ok) {\n    connectors = (await connectorsResponse.json()) as Connector<any>[];\n  } else {\n    console.log(\n      `Failed to fetch connectors - ${connectorsResponse.status} - ${connectorsResponse.statusText}`\n    );\n  }\n\n  let documentSets = [] as DocumentSetSummary[];\n  if (documentSetResponse.ok) {\n    documentSets = (await documentSetResponse.json()) as DocumentSetSummary[];\n  } else {\n    console.log(\n      `Failed to fetch document sets - ${documentSetResponse.status} - ${documentSetResponse.statusText}`\n    );\n  }\n\n  return { connectors, documentSets };\n}\n"
  },
  {
    "path": "web/src/lib/sources.ts",
    "content": "import {\n  AxeroIcon,\n  BookstackIcon,\n  OutlineIcon,\n  ClickupIcon,\n  ConfluenceIcon,\n  DiscourseIcon,\n  Document360Icon,\n  DropboxIcon,\n  GithubIcon,\n  GitlabIcon,\n  BitbucketIcon,\n  GmailIcon,\n  GongIcon,\n  GoogleDriveIcon,\n  GoogleSitesIcon,\n  GuruIcon,\n  HubSpotIcon,\n  JiraIcon,\n  LinearIcon,\n  LoopioIcon,\n  CodaIcon,\n  NotionIcon,\n  ProductboardIcon,\n  R2Icon,\n  SalesforceIcon,\n  SharepointIcon,\n  TeamsIcon,\n  SlabIcon,\n  ZendeskIcon,\n  ZulipIcon,\n  MediaWikiIcon,\n  WikipediaIcon,\n  AsanaIcon,\n  S3Icon,\n  OCIStorageIcon,\n  GoogleStorageIcon,\n  ColorSlackIcon,\n  XenforoIcon,\n  ColorDiscordIcon,\n  FreshdeskIcon,\n  FirefliesIcon,\n  EgnyteIcon,\n  AirtableIcon,\n  GitbookIcon,\n  HighspotIcon,\n  DrupalWikiIcon,\n  EmailIcon,\n  TestRailIcon,\n} from \"@/components/icons/icons\";\nimport { ValidSources } from \"./types\";\nimport { SourceCategory, SourceMetadata } from \"./search/interfaces\";\nimport { Persona } from \"@/app/admin/agents/interfaces\";\nimport React from \"react\";\nimport { DOCS_ADMINS_PATH } from \"./constants\";\nimport { SvgFileText, SvgGlobe } from \"@opal/icons\";\n\ninterface PartialSourceMetadata {\n  icon: React.FC<{ size?: number; className?: string }>;\n  displayName: string;\n  category: SourceCategory;\n  isPopular?: boolean;\n  docs?: string;\n  oauthSupported?: boolean;\n  federated?: boolean;\n  federatedTooltip?: string;\n  // federated connectors store the base source type if it's a source\n  // that has both indexed connectors and federated connectors\n  baseSourceType?: ValidSources;\n  // For connectors that are always available (don't need connection setup)\n  // e.g., User Library (CraftFile) where users just upload files\n  alwaysConnected?: boolean;\n  // Custom description to show instead of status (e.g., \"Manage your uploaded files\")\n  customDescription?: string;\n}\n\ntype SourceMap = {\n  [K in ValidSources | \"federated_slack\"]: PartialSourceMetadata;\n};\n\nconst slackMetadata = {\n  icon: ColorSlackIcon,\n  displayName: \"Slack\",\n  category: SourceCategory.Messaging,\n  isPopular: true,\n  docs: `${DOCS_ADMINS_PATH}/connectors/official/slack`,\n  oauthSupported: true,\n  // Federated Slack is available as an option but not the default\n  federated: true,\n  federatedTooltip:\n    \"⚠️ WARNING: Federated Slack results in significantly greater latency and lower search quality.\",\n  baseSourceType: \"slack\",\n};\n\nexport const SOURCE_METADATA_MAP: SourceMap = {\n  // Knowledge Base & Wikis\n  confluence: {\n    icon: ConfluenceIcon,\n    displayName: \"Confluence\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/confluence`,\n    oauthSupported: true,\n    isPopular: true,\n  },\n  sharepoint: {\n    icon: SharepointIcon,\n    displayName: \"Sharepoint\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/sharepoint`,\n    isPopular: true,\n  },\n  coda: {\n    icon: CodaIcon,\n    displayName: \"Coda\",\n    category: SourceCategory.Wiki,\n    docs: \"https://docs.onyx.app/connectors/coda\",\n  },\n  notion: {\n    icon: NotionIcon,\n    displayName: \"Notion\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/notion`,\n  },\n  bookstack: {\n    icon: BookstackIcon,\n    displayName: \"BookStack\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/bookstack`,\n  },\n  document360: {\n    icon: Document360Icon,\n    displayName: \"Document360\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/document360`,\n  },\n  discourse: {\n    icon: DiscourseIcon,\n    displayName: \"Discourse\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/discourse`,\n  },\n  gitbook: {\n    icon: GitbookIcon,\n    displayName: \"GitBook\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/gitbook`,\n  },\n  slab: {\n    icon: SlabIcon,\n    displayName: \"Slab\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/slab`,\n  },\n  outline: {\n    icon: OutlineIcon,\n    displayName: \"Outline\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/outline`,\n  },\n  google_sites: {\n    icon: GoogleSitesIcon,\n    displayName: \"Google Sites\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_sites`,\n  },\n  guru: {\n    icon: GuruIcon,\n    displayName: \"Guru\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/guru`,\n  },\n  mediawiki: {\n    icon: MediaWikiIcon,\n    displayName: \"MediaWiki\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/mediawiki`,\n  },\n  axero: {\n    icon: AxeroIcon,\n    displayName: \"Axero\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/axero`,\n  },\n  wikipedia: {\n    icon: WikipediaIcon,\n    displayName: \"Wikipedia\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/wikipedia`,\n  },\n\n  // Cloud Storage\n  google_drive: {\n    icon: GoogleDriveIcon,\n    displayName: \"Google Drive\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_drive/overview`,\n    oauthSupported: true,\n    isPopular: true,\n  },\n  dropbox: {\n    icon: DropboxIcon,\n    displayName: \"Dropbox\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/dropbox`,\n  },\n  s3: {\n    icon: S3Icon,\n    displayName: \"S3\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/s3`,\n  },\n  google_cloud_storage: {\n    icon: GoogleStorageIcon,\n    displayName: \"Google Storage\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/google_storage`,\n  },\n  egnyte: {\n    icon: EgnyteIcon,\n    displayName: \"Egnyte\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/egnyte`,\n  },\n  oci_storage: {\n    icon: OCIStorageIcon,\n    displayName: \"Oracle Storage\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/oci_storage`,\n  },\n  r2: {\n    icon: R2Icon,\n    displayName: \"R2\",\n    category: SourceCategory.Storage,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/r2`,\n  },\n\n  // Ticketing & Task Management\n  jira: {\n    icon: JiraIcon,\n    displayName: \"Jira\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/jira`,\n    isPopular: true,\n  },\n  zendesk: {\n    icon: ZendeskIcon,\n    displayName: \"Zendesk\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/zendesk`,\n    isPopular: true,\n  },\n  airtable: {\n    icon: AirtableIcon,\n    displayName: \"Airtable\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/airtable`,\n  },\n  linear: {\n    icon: LinearIcon,\n    displayName: \"Linear\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/linear`,\n  },\n  freshdesk: {\n    icon: FreshdeskIcon,\n    displayName: \"Freshdesk\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/freshdesk`,\n  },\n  asana: {\n    icon: AsanaIcon,\n    displayName: \"Asana\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/asana`,\n  },\n  clickup: {\n    icon: ClickupIcon,\n    displayName: \"Clickup\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/clickup`,\n  },\n  productboard: {\n    icon: ProductboardIcon,\n    displayName: \"Productboard\",\n    category: SourceCategory.TicketingAndTaskManagement,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/productboard`,\n  },\n  testrail: {\n    icon: TestRailIcon,\n    displayName: \"TestRail\",\n    category: SourceCategory.TicketingAndTaskManagement,\n  },\n\n  // Messaging\n  slack: slackMetadata,\n  federated_slack: slackMetadata,\n  teams: {\n    icon: TeamsIcon,\n    displayName: \"Teams\",\n    category: SourceCategory.Messaging,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/teams`,\n  },\n  gmail: {\n    icon: GmailIcon,\n    displayName: \"Gmail\",\n    category: SourceCategory.Messaging,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/gmail/overview`,\n  },\n  drupal_wiki: {\n    icon: DrupalWikiIcon,\n    displayName: \"Drupal Wiki\",\n    category: SourceCategory.Wiki,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/drupal_wiki`,\n  },\n  imap: {\n    icon: EmailIcon,\n    displayName: \"Email\",\n    category: SourceCategory.Messaging,\n  },\n  discord: {\n    icon: ColorDiscordIcon,\n    displayName: \"Discord\",\n    category: SourceCategory.Messaging,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/discord`,\n  },\n  xenforo: {\n    icon: XenforoIcon,\n    displayName: \"Xenforo\",\n    category: SourceCategory.Messaging,\n  },\n  zulip: {\n    icon: ZulipIcon,\n    displayName: \"Zulip\",\n    category: SourceCategory.Messaging,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/zulip`,\n  },\n\n  // Sales\n  salesforce: {\n    icon: SalesforceIcon,\n    displayName: \"Salesforce\",\n    category: SourceCategory.Sales,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/salesforce`,\n    isPopular: true,\n  },\n  hubspot: {\n    icon: HubSpotIcon,\n    displayName: \"HubSpot\",\n    category: SourceCategory.Sales,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/hubspot`,\n    isPopular: true,\n  },\n  gong: {\n    icon: GongIcon,\n    displayName: \"Gong\",\n    category: SourceCategory.Sales,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/gong`,\n    isPopular: true,\n  },\n  fireflies: {\n    icon: FirefliesIcon,\n    displayName: \"Fireflies\",\n    category: SourceCategory.Sales,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/fireflies`,\n  },\n  highspot: {\n    icon: HighspotIcon,\n    displayName: \"Highspot\",\n    category: SourceCategory.Sales,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/highspot`,\n  },\n  loopio: {\n    icon: LoopioIcon,\n    displayName: \"Loopio\",\n    category: SourceCategory.Sales,\n  },\n\n  // Code Repository\n  github: {\n    icon: GithubIcon,\n    displayName: \"Github\",\n    category: SourceCategory.CodeRepository,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/github`,\n    isPopular: true,\n  },\n  gitlab: {\n    icon: GitlabIcon,\n    displayName: \"Gitlab\",\n    category: SourceCategory.CodeRepository,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/gitlab`,\n  },\n  bitbucket: {\n    icon: BitbucketIcon,\n    displayName: \"Bitbucket\",\n    category: SourceCategory.CodeRepository,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/bitbucket`,\n  },\n\n  // Others\n  web: {\n    icon: SvgGlobe,\n    displayName: \"Web\",\n    category: SourceCategory.Other,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/web`,\n    isPopular: true,\n  },\n  file: {\n    icon: SvgFileText,\n    displayName: \"File\",\n    category: SourceCategory.Other,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/file`,\n    isPopular: true,\n  },\n  user_file: {\n    // TODO: write docs for projects and link them here\n    icon: SvgFileText,\n    displayName: \"File\",\n    category: SourceCategory.Other,\n    docs: `${DOCS_ADMINS_PATH}/connectors/official/file`,\n    isPopular: false, // Needs to be false to hide from the Add Connector page\n  },\n\n  // Other\n  ingestion_api: {\n    icon: SvgGlobe,\n    displayName: \"Ingestion\",\n    category: SourceCategory.Other,\n  },\n\n  // Craft-specific sources\n  craft_file: {\n    icon: SvgFileText,\n    displayName: \"Your Files\",\n    category: SourceCategory.Other,\n    isPopular: false, // Hidden from standard Add Connector page\n    alwaysConnected: true, // No setup required, just upload files\n    customDescription: \"Manage your uploaded files\",\n  },\n\n  // Placeholder (non-null default)\n  not_applicable: {\n    icon: SvgGlobe,\n    displayName: \"Not Applicable\",\n    category: SourceCategory.Other,\n  },\n  mock_connector: {\n    icon: SvgGlobe,\n    displayName: \"Mock Connector\",\n    category: SourceCategory.Other,\n  },\n} as SourceMap;\n\nfunction fillSourceMetadata(\n  partialMetadata: PartialSourceMetadata,\n  internalName: ValidSources\n): SourceMetadata {\n  return {\n    internalName: partialMetadata.baseSourceType || internalName,\n    ...partialMetadata,\n    adminUrl: `/admin/connectors/${internalName}`,\n  };\n}\n\nexport function getSourceMetadata(sourceType: ValidSources): SourceMetadata {\n  const partialMetadata = SOURCE_METADATA_MAP[sourceType];\n\n  // Fallback to not_applicable if sourceType not found in map\n  if (!partialMetadata) {\n    return fillSourceMetadata(\n      SOURCE_METADATA_MAP[ValidSources.NotApplicable],\n      ValidSources.NotApplicable\n    );\n  }\n\n  return fillSourceMetadata(partialMetadata, sourceType);\n}\n\nexport function listSourceMetadata(): SourceMetadata[] {\n  /* This gives back all the viewable / common sources, primarily for\n  display in the Add Connector page */\n  const entries = Object.entries(SOURCE_METADATA_MAP)\n    .filter(\n      ([source, _]) =>\n        source !== \"not_applicable\" &&\n        source !== \"ingestion_api\" &&\n        source !== \"mock_connector\" &&\n        // use the \"regular\" slack connector when listing\n        source !== \"federated_slack\" &&\n        // user_file is for internal use (projects), not the Add Connector page\n        source !== \"user_file\"\n    )\n    .map(([source, metadata]) => {\n      return fillSourceMetadata(metadata, source as ValidSources);\n    });\n  return entries;\n}\n\nexport function getSourceDocLink(sourceType: ValidSources): string | null {\n  return SOURCE_METADATA_MAP[sourceType].docs || null;\n}\n\nexport const isValidSource = (sourceType: string) => {\n  return Object.keys(SOURCE_METADATA_MAP).includes(sourceType);\n};\n\nexport function getSourceDisplayName(sourceType: ValidSources): string | null {\n  return getSourceMetadata(sourceType).displayName;\n}\n\nexport function getSourceMetadataForSources(sources: ValidSources[]) {\n  return sources.map((source) => getSourceMetadata(source));\n}\n\nexport function getSourcesForPersona(persona: Persona): ValidSources[] {\n  const personaSources: ValidSources[] = [];\n  persona.document_sets.forEach((documentSet) => {\n    documentSet.cc_pair_summaries.forEach((ccPair) => {\n      if (!personaSources.includes(ccPair.source)) {\n        personaSources.push(ccPair.source);\n      }\n    });\n  });\n  return personaSources;\n}\n\nexport async function fetchTitleFromUrl(url: string): Promise<string | null> {\n  try {\n    const response = await fetch(url, {\n      method: \"GET\",\n      // If the remote site has no CORS header, this may fail in the browser\n      mode: \"cors\",\n    });\n    if (!response.ok) {\n      // Non-200 response, treat as a failure\n      return null;\n    }\n    const html = await response.text();\n    const parser = new DOMParser();\n    const doc = parser.parseFromString(html, \"text/html\");\n    // If the site has <title>My Demo Page</title>, we retrieve \"My Demo Page\"\n    const pageTitle = doc.querySelector(\"title\")?.innerText.trim() ?? null;\n    return pageTitle;\n  } catch (error) {\n    console.error(\"Error fetching page title:\", error);\n    return null;\n  }\n}\n"
  },
  {
    "path": "web/src/lib/streamingTTS.ts",
    "content": "/**\n * Real-time streaming TTS using HTTP streaming with MediaSource Extensions.\n * Plays audio chunks as they arrive for smooth, low-latency playback.\n */\n\nimport { INTERNAL_URL, IS_DEV } from \"@/lib/constants\";\n\n/**\n * HTTPStreamingTTSPlayer - Uses HTTP streaming with MediaSource Extensions\n * for smooth, gapless audio playback. This is the recommended approach for\n * real-time TTS as it properly handles MP3 frame boundaries.\n */\nexport class HTTPStreamingTTSPlayer {\n  private mediaSource: MediaSource | null = null;\n  private mediaSourceUrl: string | null = null;\n  private sourceBuffer: SourceBuffer | null = null;\n  private audioElement: HTMLAudioElement | null = null;\n  private pendingChunks: Uint8Array[] = [];\n  private isAppending: boolean = false;\n  private isPlaying: boolean = false;\n  private streamComplete: boolean = false;\n  private onPlayingChange?: (playing: boolean) => void;\n  private onError?: (error: string) => void;\n  private abortController: AbortController | null = null;\n  private isMuted: boolean = false;\n\n  constructor(options?: {\n    onPlayingChange?: (playing: boolean) => void;\n    onError?: (error: string) => void;\n  }) {\n    this.onPlayingChange = options?.onPlayingChange;\n    this.onError = options?.onError;\n  }\n\n  private getAPIUrl(): string {\n    // Always go through the frontend proxy to ensure cookies are sent correctly\n    // The Next.js proxy at /api/* forwards to the backend\n    return \"/api/voice/synthesize\";\n  }\n\n  /**\n   * Speak text using HTTP streaming with real-time playback.\n   * Audio begins playing as soon as the first chunks arrive.\n   */\n  async speak(\n    text: string,\n    voice?: string,\n    speed: number = 1.0\n  ): Promise<void> {\n    // Cleanup any previous playback\n    this.cleanup();\n\n    // Create abort controller for this request\n    this.abortController = new AbortController();\n\n    // Build URL with query params\n    const params = new URLSearchParams();\n    params.set(\"text\", text);\n    if (voice) params.set(\"voice\", voice);\n    params.set(\"speed\", speed.toString());\n\n    const url = `${this.getAPIUrl()}?${params}`;\n\n    // Check if MediaSource is supported\n    if (!window.MediaSource || !MediaSource.isTypeSupported(\"audio/mpeg\")) {\n      // Fallback to simple buffered playback\n      return this.fallbackSpeak(url);\n    }\n\n    // Create MediaSource and audio element\n    this.mediaSource = new MediaSource();\n    this.audioElement = new Audio();\n    this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);\n    this.audioElement.src = this.mediaSourceUrl;\n    this.audioElement.muted = this.isMuted;\n\n    // Set up audio element event handlers\n    this.audioElement.onplay = () => {\n      if (!this.isPlaying) {\n        this.isPlaying = true;\n        this.onPlayingChange?.(true);\n      }\n    };\n\n    this.audioElement.onended = () => {\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n    };\n\n    this.audioElement.onerror = () => {\n      this.onError?.(\"Audio playback error\");\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n    };\n\n    // Wait for MediaSource to be ready\n    await new Promise<void>((resolve, reject) => {\n      if (!this.mediaSource) {\n        reject(new Error(\"MediaSource not initialized\"));\n        return;\n      }\n\n      this.mediaSource.onsourceopen = () => {\n        try {\n          // Create SourceBuffer for MP3\n          this.sourceBuffer = this.mediaSource!.addSourceBuffer(\"audio/mpeg\");\n          this.sourceBuffer.mode = \"sequence\";\n\n          this.sourceBuffer.onupdateend = () => {\n            this.isAppending = false;\n            this.processNextChunk();\n          };\n\n          resolve();\n        } catch (err) {\n          reject(err);\n        }\n      };\n\n      // MediaSource doesn't have onerror in all browsers, use onsourceclose as fallback\n      this.mediaSource.onsourceclose = () => {\n        if (this.mediaSource?.readyState === \"closed\") {\n          reject(new Error(\"MediaSource closed unexpectedly\"));\n        }\n      };\n    });\n\n    // Start fetching and streaming audio\n    try {\n      const response = await fetch(url, {\n        method: \"POST\",\n        signal: this.abortController.signal,\n        credentials: \"include\", // Include cookies for authentication\n      });\n\n      if (!response.ok) {\n        const errorText = await response.text();\n        throw new Error(\n          `TTS request failed: ${response.status} - ${errorText}`\n        );\n      }\n\n      const reader = response.body?.getReader();\n      if (!reader) {\n        throw new Error(\"No response body\");\n      }\n\n      // Start playback as soon as we have some data\n      let firstChunk = true;\n\n      while (true) {\n        const { done, value } = await reader.read();\n\n        if (done) {\n          this.streamComplete = true;\n          // End the stream when all chunks are appended\n          this.finalizeStream();\n          break;\n        }\n\n        if (value) {\n          this.pendingChunks.push(value);\n          this.processNextChunk();\n\n          // Start playback after first chunk\n          if (firstChunk && this.audioElement) {\n            firstChunk = false;\n            // Small delay to buffer a bit before starting\n            setTimeout(() => {\n              this.audioElement?.play().catch(() => {\n                // Ignore playback start errors\n              });\n            }, 100);\n          }\n        }\n      }\n    } catch (err) {\n      if (err instanceof Error && err.name === \"AbortError\") {\n        return;\n      }\n      this.onError?.(err instanceof Error ? err.message : \"TTS error\");\n      throw err;\n    }\n  }\n\n  /**\n   * Process next chunk from the queue.\n   */\n  private processNextChunk(): void {\n    if (\n      this.isAppending ||\n      this.pendingChunks.length === 0 ||\n      !this.sourceBuffer ||\n      this.sourceBuffer.updating\n    ) {\n      return;\n    }\n\n    const chunk = this.pendingChunks.shift();\n    if (chunk) {\n      this.isAppending = true;\n      try {\n        // Use ArrayBuffer directly for better TypeScript compatibility\n        const buffer = chunk.buffer.slice(\n          chunk.byteOffset,\n          chunk.byteOffset + chunk.byteLength\n        ) as ArrayBuffer;\n        this.sourceBuffer.appendBuffer(buffer);\n      } catch {\n        this.isAppending = false;\n        // Try next chunk\n        this.processNextChunk();\n      }\n    }\n  }\n\n  /**\n   * Finalize the stream when all data has been received.\n   */\n  private finalizeStream(): void {\n    if (this.pendingChunks.length > 0 || this.isAppending) {\n      // Wait for remaining chunks to be appended\n      setTimeout(() => this.finalizeStream(), 50);\n      return;\n    }\n\n    if (\n      this.mediaSource &&\n      this.mediaSource.readyState === \"open\" &&\n      this.sourceBuffer &&\n      !this.sourceBuffer.updating\n    ) {\n      try {\n        this.mediaSource.endOfStream();\n      } catch {\n        // Ignore errors when ending stream\n      }\n    }\n  }\n\n  /**\n   * Fallback for browsers that don't support MediaSource Extensions.\n   * Buffers all audio before playing.\n   */\n  private async fallbackSpeak(url: string): Promise<void> {\n    const response = await fetch(url, {\n      method: \"POST\",\n      signal: this.abortController?.signal,\n      credentials: \"include\", // Include cookies for authentication\n    });\n\n    if (!response.ok) {\n      const errorText = await response.text();\n      throw new Error(`TTS request failed: ${response.status} - ${errorText}`);\n    }\n\n    const audioData = await response.arrayBuffer();\n\n    const blob = new Blob([audioData], { type: \"audio/mpeg\" });\n    const audioUrl = URL.createObjectURL(blob);\n\n    this.audioElement = new Audio(audioUrl);\n    this.audioElement.muted = this.isMuted;\n\n    this.audioElement.onplay = () => {\n      this.isPlaying = true;\n      this.onPlayingChange?.(true);\n    };\n\n    this.audioElement.onended = () => {\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n      URL.revokeObjectURL(audioUrl);\n    };\n\n    this.audioElement.onerror = () => {\n      this.onError?.(\"Audio playback error\");\n    };\n\n    await this.audioElement.play();\n  }\n\n  /**\n   * Stop playback and cleanup resources.\n   */\n  stop(): void {\n    // Abort any ongoing request\n    if (this.abortController) {\n      this.abortController.abort();\n      this.abortController = null;\n    }\n\n    this.cleanup();\n  }\n\n  setMuted(muted: boolean): void {\n    this.isMuted = muted;\n    if (this.audioElement) {\n      this.audioElement.muted = muted;\n    }\n  }\n\n  /**\n   * Cleanup all resources.\n   */\n  private cleanup(): void {\n    // Revoke Object URL to prevent memory leak\n    if (this.mediaSourceUrl) {\n      URL.revokeObjectURL(this.mediaSourceUrl);\n      this.mediaSourceUrl = null;\n    }\n\n    // Stop and cleanup audio element\n    if (this.audioElement) {\n      this.audioElement.pause();\n      this.audioElement.src = \"\";\n      this.audioElement = null;\n    }\n\n    // Cleanup MediaSource\n    if (this.mediaSource && this.mediaSource.readyState === \"open\") {\n      try {\n        if (this.sourceBuffer) {\n          this.mediaSource.removeSourceBuffer(this.sourceBuffer);\n        }\n        this.mediaSource.endOfStream();\n      } catch {\n        // Ignore cleanup errors\n      }\n    }\n\n    this.mediaSource = null;\n    this.sourceBuffer = null;\n    this.pendingChunks = [];\n    this.isAppending = false;\n    this.streamComplete = false;\n\n    if (this.isPlaying) {\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n    }\n  }\n\n  get playing(): boolean {\n    return this.isPlaying;\n  }\n}\n\n/**\n * WebSocketStreamingTTSPlayer - Uses WebSocket for bidirectional streaming.\n * Useful for scenarios where you want to stream text in and get audio out\n * incrementally (e.g., as LLM generates text).\n */\nexport class WebSocketStreamingTTSPlayer {\n  private websocket: WebSocket | null = null;\n  private mediaSource: MediaSource | null = null;\n  private mediaSourceUrl: string | null = null;\n  private sourceBuffer: SourceBuffer | null = null;\n  private audioElement: HTMLAudioElement | null = null;\n  private pendingChunks: Uint8Array[] = [];\n  private isAppending: boolean = false;\n  private isPlaying: boolean = false;\n  private onPlayingChange?: (playing: boolean) => void;\n  private onError?: (error: string) => void;\n  private hasStartedPlayback: boolean = false;\n\n  constructor(options?: {\n    onPlayingChange?: (playing: boolean) => void;\n    onError?: (error: string) => void;\n  }) {\n    this.onPlayingChange = options?.onPlayingChange;\n    this.onError = options?.onError;\n  }\n\n  private async getWebSocketUrl(): Promise<string> {\n    // Fetch short-lived WS token\n    const tokenResponse = await fetch(\"/api/voice/ws-token\", {\n      method: \"POST\",\n      credentials: \"include\",\n    });\n    if (!tokenResponse.ok) {\n      throw new Error(\"Failed to get WebSocket authentication token\");\n    }\n    const { token } = await tokenResponse.json();\n\n    const protocol = window.location.protocol === \"https:\" ? \"wss:\" : \"ws:\";\n    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;\n    const path = IS_DEV\n      ? \"/voice/synthesize/stream\"\n      : \"/api/voice/synthesize/stream\";\n    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;\n  }\n\n  async connect(voice?: string, speed?: number): Promise<void> {\n    // Cleanup any previous connection\n    this.cleanup();\n\n    // Check MediaSource support\n    if (!window.MediaSource || !MediaSource.isTypeSupported(\"audio/mpeg\")) {\n      throw new Error(\"MediaSource Extensions not supported\");\n    }\n\n    // Create MediaSource and audio element\n    this.mediaSource = new MediaSource();\n    this.audioElement = new Audio();\n    this.mediaSourceUrl = URL.createObjectURL(this.mediaSource);\n    this.audioElement.src = this.mediaSourceUrl;\n\n    this.audioElement.onplay = () => {\n      if (!this.isPlaying) {\n        this.isPlaying = true;\n        this.onPlayingChange?.(true);\n      }\n    };\n\n    this.audioElement.onended = () => {\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n    };\n\n    // Wait for MediaSource to be ready\n    await new Promise<void>((resolve, reject) => {\n      this.mediaSource!.onsourceopen = () => {\n        try {\n          this.sourceBuffer = this.mediaSource!.addSourceBuffer(\"audio/mpeg\");\n          this.sourceBuffer.mode = \"sequence\";\n          this.sourceBuffer.onupdateend = () => {\n            this.isAppending = false;\n            this.processNextChunk();\n          };\n          resolve();\n        } catch (err) {\n          reject(err);\n        }\n      };\n    });\n\n    // Connect WebSocket\n    const url = await this.getWebSocketUrl();\n    return new Promise((resolve, reject) => {\n      this.websocket = new WebSocket(url);\n\n      this.websocket.onopen = () => {\n        // Send initial config\n        this.websocket?.send(\n          JSON.stringify({\n            type: \"config\",\n            voice: voice,\n            speed: speed || 1.0,\n          })\n        );\n        resolve();\n      };\n\n      this.websocket.onerror = () => {\n        reject(new Error(\"WebSocket connection failed\"));\n      };\n\n      this.websocket.onmessage = async (event) => {\n        if (event.data instanceof Blob) {\n          // Audio chunk received\n          const arrayBuffer = await event.data.arrayBuffer();\n          this.pendingChunks.push(new Uint8Array(arrayBuffer));\n          this.processNextChunk();\n\n          // Start playback after first chunk\n          if (!this.hasStartedPlayback && this.audioElement) {\n            this.hasStartedPlayback = true;\n            setTimeout(() => {\n              this.audioElement?.play().catch(() => {\n                // Ignore playback errors\n              });\n            }, 100);\n          }\n        } else {\n          // JSON message\n          try {\n            const data = JSON.parse(event.data);\n            if (data.type === \"audio_done\") {\n              this.finalizeStream();\n            } else if (data.type === \"error\") {\n              this.onError?.(data.message);\n            }\n          } catch {\n            // Ignore parse errors\n          }\n        }\n      };\n\n      this.websocket.onclose = () => {\n        this.finalizeStream();\n      };\n    });\n  }\n\n  private processNextChunk(): void {\n    if (\n      this.isAppending ||\n      this.pendingChunks.length === 0 ||\n      !this.sourceBuffer ||\n      this.sourceBuffer.updating\n    ) {\n      return;\n    }\n\n    const chunk = this.pendingChunks.shift();\n    if (chunk) {\n      this.isAppending = true;\n      try {\n        // Use ArrayBuffer directly for better TypeScript compatibility\n        const buffer = chunk.buffer.slice(\n          chunk.byteOffset,\n          chunk.byteOffset + chunk.byteLength\n        ) as ArrayBuffer;\n        this.sourceBuffer.appendBuffer(buffer);\n      } catch {\n        this.isAppending = false;\n        this.processNextChunk();\n      }\n    }\n  }\n\n  private finalizeStream(): void {\n    if (this.pendingChunks.length > 0 || this.isAppending) {\n      setTimeout(() => this.finalizeStream(), 50);\n      return;\n    }\n\n    if (\n      this.mediaSource &&\n      this.mediaSource.readyState === \"open\" &&\n      this.sourceBuffer &&\n      !this.sourceBuffer.updating\n    ) {\n      try {\n        this.mediaSource.endOfStream();\n      } catch {\n        // Ignore\n      }\n    }\n  }\n\n  async speak(text: string): Promise<void> {\n    if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) {\n      throw new Error(\"WebSocket not connected\");\n    }\n\n    this.websocket.send(\n      JSON.stringify({\n        type: \"synthesize\",\n        text: text,\n      })\n    );\n  }\n\n  stop(): void {\n    this.cleanup();\n  }\n\n  disconnect(): void {\n    if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {\n      this.websocket.send(JSON.stringify({ type: \"end\" }));\n      this.websocket.close();\n    }\n    this.cleanup();\n  }\n\n  private cleanup(): void {\n    if (this.websocket) {\n      this.websocket.close();\n      this.websocket = null;\n    }\n\n    // Revoke Object URL to prevent memory leak\n    if (this.mediaSourceUrl) {\n      URL.revokeObjectURL(this.mediaSourceUrl);\n      this.mediaSourceUrl = null;\n    }\n\n    if (this.audioElement) {\n      this.audioElement.pause();\n      this.audioElement.src = \"\";\n      this.audioElement = null;\n    }\n\n    if (this.mediaSource && this.mediaSource.readyState === \"open\") {\n      try {\n        if (this.sourceBuffer) {\n          this.mediaSource.removeSourceBuffer(this.sourceBuffer);\n        }\n        this.mediaSource.endOfStream();\n      } catch {\n        // Ignore\n      }\n    }\n\n    this.mediaSource = null;\n    this.sourceBuffer = null;\n    this.pendingChunks = [];\n    this.isAppending = false;\n    this.hasStartedPlayback = false;\n\n    if (this.isPlaying) {\n      this.isPlaying = false;\n      this.onPlayingChange?.(false);\n    }\n  }\n\n  get playing(): boolean {\n    return this.isPlaying;\n  }\n}\n\n// Export the HTTP player as the default/recommended option\nexport { HTTPStreamingTTSPlayer as StreamingTTSPlayer };\n"
  },
  {
    "path": "web/src/lib/swr-keys.ts",
    "content": "/**\n * Centralized SWR cache key registry.\n *\n * All useSWR calls and mutate() calls should reference these constants\n * instead of inline strings to prevent typos and make key usage greppable.\n *\n * For dynamic keys (e.g. per-ID endpoints), use the builder functions.\n */\nexport const SWR_KEYS = {\n  // ── User ──────────────────────────────────────────────────────────────────\n  me: \"/api/me\",\n\n  // ── Health ────────────────────────────────────────────────────────────────\n  health: \"/api/health\",\n\n  // ── Settings ──────────────────────────────────────────────────────────────\n  settings: \"/api/settings\",\n  enterpriseSettings: \"/api/enterprise-settings\",\n  customAnalyticsScript: \"/api/enterprise-settings/custom-analytics-script\",\n  authType: \"/api/auth/type\",\n\n  // ── Agents / Personas ─────────────────────────────────────────────────────\n  personas: \"/api/persona\",\n  persona: (id: number) => `/api/persona/${id}`,\n  agentPreferences: \"/api/user/assistant/preferences\",\n  defaultAssistantConfig: \"/api/admin/default-assistant/configuration\",\n  personaLabels: \"/api/persona/labels\",\n\n  // ── LLM Providers ─────────────────────────────────────────────────────────\n  llmProviders: \"/api/llm/provider\",\n  llmProvidersForPersona: (personaId: number) =>\n    `/api/llm/persona/${personaId}/providers`,\n  adminLlmProviders: \"/api/admin/llm/provider\",\n  llmProvidersWithImageGen: \"/api/admin/llm/provider?include_image_gen=true\",\n  wellKnownLlmProviders: \"/api/admin/llm/built-in/options\",\n  wellKnownLlmProvider: (providerEndpoint: string) =>\n    `/api/admin/llm/built-in/options/${providerEndpoint}`,\n\n  // ── Image Generation ──────────────────────────────────────────────────────\n  imageGenConfig: \"/api/admin/image-generation/config\",\n\n  // ── Documents ─────────────────────────────────────────────────────────────\n  documentSets: \"/api/manage/document-set\",\n  documentSetsEditable: \"/api/manage/document-set?get_editable=true\",\n  tags: \"/api/query/valid-tags\",\n  connectorStatus: \"/api/manage/connector-status\",\n\n  // ── Credentials & Connectors ──────────────────────────────────────────────\n  adminCredentials: \"/api/manage/admin/credential\",\n  indexingStatus: \"/api/manage/admin/connector/indexing-status\",\n  adminConnectorStatus: \"/api/manage/admin/connector/status\",\n  federatedConnectors: \"/api/federated\",\n\n  // ── Google Connectors ─────────────────────────────────────────────────────\n  googleConnectorAppCredential: (service: \"gmail\" | \"google-drive\") =>\n    `/api/manage/admin/connector/${service}/app-credential`,\n  googleConnectorServiceAccountKey: (service: \"gmail\" | \"google-drive\") =>\n    `/api/manage/admin/connector/${service}/service-account-key`,\n  googleConnectorCredentials: (service: \"gmail\" | \"google-drive\") =>\n    `/api/manage/admin/connector/${service}/credentials`,\n  googleConnectorPublicCredential: (service: \"gmail\" | \"google-drive\") =>\n    `/api/manage/admin/connector/${service}/public-credential`,\n  googleConnectorServiceAccountCredential: (\n    service: \"gmail\" | \"google-drive\"\n  ) => `/api/manage/admin/connector/${service}/service-account-credential`,\n\n  // ── Search Settings ───────────────────────────────────────────────────────\n  currentSearchSettings: \"/api/search-settings/get-current-search-settings\",\n  secondarySearchSettings: \"/api/search-settings/get-secondary-search-settings\",\n\n  // ── Chat Sessions ─────────────────────────────────────────────────────────\n  chatSessions: \"/api/chat/get-user-chat-sessions\",\n\n  // ── Projects & Files ──────────────────────────────────────────────────────\n  userProjects: \"/api/user/projects\",\n  recentFiles: \"/api/user/files/recent\",\n  userPats: \"/api/user/pats\",\n  notifications: \"/api/notifications\",\n\n  // ── Users ─────────────────────────────────────────────────────────────────\n  acceptedUsers: \"/api/manage/users/accepted/all\",\n  invitedUsers: \"/api/manage/users/invited\",\n  pendingTenantUsers: \"/api/tenants/users/pending\",\n  userCounts: \"/api/manage/users/counts\",\n\n  // ── API Keys ──────────────────────────────────────────────────────────────\n  adminApiKeys: \"/api/admin/api-key\",\n\n  // ── Groups ────────────────────────────────────────────────────────────────\n  adminUserGroups: \"/api/manage/admin/user-group\",\n  shareableGroups: \"/api/manage/user-groups/minimal\",\n  scimToken: \"/api/admin/enterprise-settings/scim/token\",\n\n  // ── MCP Servers ───────────────────────────────────────────────────────────\n  adminMcpServers: \"/api/admin/mcp/servers\",\n  mcpServers: \"/api/mcp/servers\",\n\n  // ── Tools ─────────────────────────────────────────────────────────────────\n  tools: \"/api/tool\",\n  openApiTools: \"/api/tool/openapi\",\n  oauthTokenStatus: \"/api/user-oauth-token/status\",\n\n  // ── Voice ─────────────────────────────────────────────────────────────────\n  voiceProviders: \"/api/admin/voice/providers\",\n  voiceStatus: \"/api/voice/status\",\n\n  // ── Build (Craft) ─────────────────────────────────────────────────────────\n  buildConnectors: \"/api/build/connectors\",\n  buildUserLibraryTree: \"/api/build/user-library/tree\",\n  buildSessionFiles: (sessionId: string) =>\n    `/api/build/sessions/${sessionId}/files?path=`,\n  buildSessionOutputFiles: (sessionId: string) =>\n    `/api/build/sessions/${sessionId}/files?path=outputs`,\n  buildSessionWebappInfo: (sessionId: string) =>\n    `/api/build/sessions/${sessionId}/webapp-info`,\n  buildSessionArtifacts: (sessionId: string) =>\n    `/api/build/sessions/${sessionId}/artifacts`,\n  buildSessionArtifactFile: (sessionId: string, filePath: string) =>\n    `/api/build/sessions/${sessionId}/artifacts/${filePath}`,\n  buildSessionPptxPreview: (sessionId: string, filePath: string) =>\n    `/api/build/sessions/${sessionId}/pptx-preview/${filePath}`,\n\n  // ── Knowledge Graph ───────────────────────────────────────────────────────\n  kgConfig: \"/api/admin/kg/config\",\n  kgEntityTypes: \"/api/admin/kg/entity-types\",\n  kgExposed: \"/api/admin/kg/exposed\",\n\n  // ── OpenSearch Migration ──────────────────────────────────────────────────\n  opensearchMigrationStatus: \"/api/admin/opensearch-migration/status\",\n  opensearchMigrationRetrieval: \"/api/admin/opensearch-migration/retrieval\",\n\n  // ── Token Rate Limits ─────────────────────────────────────────────────────\n  globalTokenRateLimits: \"/api/admin/token-rate-limits/global\",\n  userTokenRateLimits: \"/api/admin/token-rate-limits/users\",\n  userGroupTokenRateLimits: \"/api/admin/token-rate-limits/user-groups\",\n  userGroupTokenRateLimit: (groupId: number) =>\n    `/api/admin/token-rate-limits/user-group/${groupId}`,\n\n  // ── Usage Reports ─────────────────────────────────────────────────────────\n  usageReport: \"/api/admin/usage-report\",\n\n  // ── Web Search ────────────────────────────────────────────────────────────\n  webSearchContentProviders: \"/api/admin/web-search/content-providers\",\n  webSearchSearchProviders: \"/api/admin/web-search/search-providers\",\n\n  // ── Prompt shortcuts ──────────────────────────────────────────────────────\n  promptShortcuts: \"/api/input_prompt\",\n\n  // ── License & Billing ─────────────────────────────────────────────────────\n  license: \"/api/license\",\n  billingInformationCloud: \"/api/tenants/billing-information\",\n  billingInformationSelfHosted: \"/api/admin/billing/billing-information\",\n\n  // ── Admin ─────────────────────────────────────────────────────────────────\n  hooks: \"/api/admin/hooks\",\n  hookSpecs: \"/api/admin/hooks/specs\",\n\n  // ── Slack Bots ────────────────────────────────────────────────────────────\n  slackChannels: \"/api/manage/admin/slack-app/channel\",\n  slackBots: \"/api/manage/admin/slack-app/bots\",\n  slackBot: (botId: number) => `/api/manage/admin/slack-app/bots/${botId}`,\n  slackBotConfig: (botId: number) =>\n    `/api/manage/admin/slack-app/bots/${botId}/config`,\n\n  // ── Standard Answers (EE) ─────────────────────────────────────────────────\n  standardAnswerCategories: \"/api/manage/admin/standard-answer/category\",\n  standardAnswers: \"/api/manage/admin/standard-answer\",\n\n  // ── Query History (EE) ────────────────────────────────────────────────────\n  adminChatSessionHistory: \"/api/admin/chat-session-history\",\n  adminChatSession: (id: string) => `/api/admin/chat-session-history/${id}`,\n\n  // ── MCP Server (per-ID) ───────────────────────────────────────────────────\n  adminMcpServer: (id: number) => `/api/admin/mcp/servers/${id}`,\n\n  // ── Document Processing ───────────────────────────────────────────────────\n  unstructuredApiKeySet: \"/api/search-settings/unstructured-api-key-set\",\n\n  // ── Connectors ────────────────────────────────────────────────────────────\n  connector: \"/api/manage/connector\",\n} as const;\n"
  },
  {
    "path": "web/src/lib/time.ts",
    "content": "import { User } from \"@/lib/types\";\n\nconst conditionallyAddPlural = (noun: string, cnt: number) => {\n  if (cnt > 1) {\n    return `${noun}s`;\n  }\n  return noun;\n};\n\nexport const timeAgo = (\n  dateString: string | undefined | null\n): string | null => {\n  if (!dateString) {\n    return null;\n  }\n\n  const date = new Date(dateString);\n  const now = new Date();\n  const secondsDiff = Math.floor((now.getTime() - date.getTime()) / 1000);\n\n  if (secondsDiff < 60) {\n    return `${secondsDiff} ${conditionallyAddPlural(\n      \"second\",\n      secondsDiff\n    )} ago`;\n  }\n\n  const minutesDiff = Math.floor(secondsDiff / 60);\n  if (minutesDiff < 60) {\n    return `${minutesDiff} ${conditionallyAddPlural(\n      \"minute\",\n      secondsDiff\n    )} ago`;\n  }\n\n  const hoursDiff = Math.floor(minutesDiff / 60);\n  if (hoursDiff < 24) {\n    return `${hoursDiff} ${conditionallyAddPlural(\"hour\", hoursDiff)} ago`;\n  }\n\n  const daysDiff = Math.floor(hoursDiff / 24);\n  if (daysDiff < 30) {\n    return `${daysDiff} ${conditionallyAddPlural(\"day\", daysDiff)} ago`;\n  }\n\n  const weeksDiff = Math.floor(daysDiff / 7);\n  if (weeksDiff < 4) {\n    return `${weeksDiff} ${conditionallyAddPlural(\"week\", weeksDiff)} ago`;\n  }\n\n  const monthsDiff = Math.floor(daysDiff / 30);\n  if (monthsDiff < 12) {\n    return `${monthsDiff} ${conditionallyAddPlural(\"month\", monthsDiff)} ago`;\n  }\n\n  const yearsDiff = Math.floor(monthsDiff / 12);\n  return `${yearsDiff} ${conditionallyAddPlural(\"year\", yearsDiff)} ago`;\n};\n\nexport function localizeAndPrettify(dateString: string) {\n  const date = new Date(dateString);\n  return date.toLocaleString();\n}\n\nexport function humanReadableFormat(dateString: string): string {\n  // Create a Date object from the dateString\n  const date = new Date(dateString);\n\n  // Use Intl.DateTimeFormat to format the date\n  // Specify the locale as 'en-US' and options for month, day, and year\n  const formatter = new Intl.DateTimeFormat(\"en-US\", {\n    month: \"long\", // full month name\n    day: \"numeric\", // numeric day\n    year: \"numeric\", // numeric year\n  });\n\n  // Format the date and return it\n  return formatter.format(date);\n}\n\n/**\n * Format a date as \"Jan 15, 2025\" (short month name).\n */\nexport function humanReadableFormatShort(date: string | Date | null): string {\n  if (!date) return \"\";\n  const d = typeof date === \"string\" ? new Date(date) : date;\n  const formatter = new Intl.DateTimeFormat(\"en-US\", {\n    month: \"short\",\n    day: \"numeric\",\n    year: \"numeric\",\n  });\n  return formatter.format(d);\n}\n\nexport function humanReadableFormatWithTime(datetimeString: string): string {\n  // Create a Date object from the dateString\n  const date = new Date(datetimeString);\n\n  // Use Intl.DateTimeFormat to format the date\n  // Specify the locale as 'en-US' and options for month, day, and year\n  const formatter = new Intl.DateTimeFormat(\"en-US\", {\n    month: \"long\", // full month name\n    day: \"numeric\", // numeric day\n    year: \"numeric\", // numeric year\n    hour: \"numeric\",\n    minute: \"numeric\",\n  });\n  // Format the date and return it\n  return formatter.format(date);\n}\n\nexport function getSecondsUntilExpiration(\n  userInfo: User | null\n): number | null {\n  if (!userInfo) {\n    return null;\n  }\n\n  const { oidc_expiry, current_token_created_at, current_token_expiry_length } =\n    userInfo;\n\n  const now = new Date();\n\n  let secondsUntilTokenExpiration: number | null = null;\n  let secondsUntilOIDCExpiration: number | null = null;\n\n  if (current_token_created_at && current_token_expiry_length !== undefined) {\n    const createdAt = new Date(current_token_created_at);\n    const expiresAt = new Date(\n      createdAt.getTime() + current_token_expiry_length * 1000\n    );\n    secondsUntilTokenExpiration = Math.floor(\n      (expiresAt.getTime() - now.getTime()) / 1000\n    );\n  }\n\n  if (oidc_expiry) {\n    const expiresAtFromOIDC = new Date(oidc_expiry);\n    secondsUntilOIDCExpiration = Math.floor(\n      (expiresAtFromOIDC.getTime() - now.getTime()) / 1000\n    );\n  }\n\n  if (\n    secondsUntilTokenExpiration === null &&\n    secondsUntilOIDCExpiration === null\n  ) {\n    return null;\n  }\n\n  return Math.max(\n    0,\n    Math.min(\n      secondsUntilTokenExpiration ?? Infinity,\n      secondsUntilOIDCExpiration ?? Infinity\n    )\n  );\n}\n\nexport type TimeFilter = \"day\" | \"week\" | \"month\" | \"year\";\n\nexport function getTimeFilterDate(filter: TimeFilter): Date | null {\n  const now = new Date();\n  switch (filter) {\n    case \"day\":\n      return new Date(now.getTime() - 24 * 60 * 60 * 1000);\n    case \"week\":\n      return new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);\n    case \"month\":\n      return new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);\n    case \"year\":\n      return new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);\n    default:\n      return null;\n  }\n}\n\nexport function formatDurationSeconds(seconds: number): string {\n  const totalSeconds = Math.ceil(seconds);\n  if (totalSeconds < 60) {\n    return `${totalSeconds}s`;\n  }\n  const mins = Math.floor(totalSeconds / 60);\n  const secs = totalSeconds % 60;\n  return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;\n}\n"
  },
  {
    "path": "web/src/lib/tools/fetchTools.ts",
    "content": "import { ToolSnapshot } from \"./interfaces\";\nimport { fetchSS } from \"../utilsSS\";\n\nexport async function fetchToolsSS(): Promise<ToolSnapshot[] | null> {\n  try {\n    const response = await fetchSS(\"/tool\");\n    if (!response.ok) {\n      throw new Error(`Failed to fetch tools: ${await response.text()}`);\n    }\n    const tools: ToolSnapshot[] = await response.json();\n    return tools;\n  } catch (error) {\n    console.error(\"Error fetching tools:\", error);\n    return null;\n  }\n}\n\nexport async function fetchToolByIdSS(\n  toolId: string\n): Promise<ToolSnapshot | null> {\n  try {\n    const response = await fetchSS(`/tool/${toolId}`);\n    if (!response.ok) {\n      throw new Error(\n        `Failed to fetch tool with ID ${toolId}: ${await response.text()}`\n      );\n    }\n    const tool: ToolSnapshot = await response.json();\n    return tool;\n  } catch (error) {\n    console.error(`Error fetching tool with ID ${toolId}:`, error);\n    return null;\n  }\n}\n"
  },
  {
    "path": "web/src/lib/tools/interfaces.ts",
    "content": "import type React from \"react\";\nimport type { IconProps } from \"@opal/types\";\n\n// Generic action status for UI components\nexport enum ActionStatus {\n  CONNECTED = \"connected\",\n  PENDING = \"pending\",\n  DISCONNECTED = \"disconnected\",\n  FETCHING = \"fetching\",\n}\n\nexport enum MCPServerStatus {\n  CREATED = \"CREATED\",\n  AWAITING_AUTH = \"AWAITING_AUTH\",\n  FETCHING_TOOLS = \"FETCHING_TOOLS\",\n  CONNECTED = \"CONNECTED\",\n  DISCONNECTED = \"DISCONNECTED\",\n}\n\nexport interface MCPServer {\n  id: number;\n  name: string;\n  description?: string;\n  server_url: string;\n  owner: string;\n  transport?: MCPTransportType;\n  auth_type?: MCPAuthenticationType;\n  auth_performer?: MCPAuthenticationPerformer;\n  is_authenticated: boolean;\n  user_authenticated?: boolean;\n  auth_template?: any;\n  admin_credentials?: Record<string, string>;\n  user_credentials?: Record<string, string>;\n  status: MCPServerStatus;\n  last_refreshed_at?: string;\n  tool_count: number;\n}\n\nexport interface MCPServersResponse {\n  assistant_id?: string | null;\n  mcp_servers: MCPServer[];\n}\n\nexport interface MCPServerCreateRequest {\n  name: string;\n  description?: string;\n  server_url: string;\n}\n\nexport interface MCPServerUpdateRequest {\n  name?: string;\n  description?: string;\n  server_url?: string;\n}\n\nexport interface MCPTool {\n  id: string;\n  name: string;\n  description: string;\n  icon?: React.FunctionComponent<IconProps>;\n  isAvailable: boolean;\n  isEnabled: boolean;\n}\n\nexport interface MethodSpec {\n  /* Defines a single method that is part of a custom tool. Each method maps to a single\n  action that the LLM can choose to take. */\n  name: string;\n  summary: string;\n  path: string;\n  method: string;\n  spec: Record<string, any>;\n  custom_headers: { key: string; value: string }[];\n}\n\nexport interface ToolSnapshot {\n  id: number;\n  name: string;\n  display_name: string;\n  description: string;\n\n  // only specified for Custom Tools. OpenAPI schema which represents\n  // the tool's API.\n  definition: Record<string, any> | null;\n\n  // only specified for Custom Tools. Custom headers to add to the tool's API requests.\n  custom_headers: { key: string; value: string }[];\n\n  // only specified for Custom Tools. ID of the tool in the codebase.\n  in_code_tool_id: string | null;\n\n  // whether to pass through the user's OAuth token as Authorization header\n  passthrough_auth: boolean;\n\n  // OAuth configuration for this tool\n  oauth_config_id?: number | null;\n  oauth_config_name?: string | null;\n\n  // If this is an MCP tool, which server it belongs to\n  mcp_server_id?: number | null;\n  user_id?: string | null;\n\n  // Whether the tool is enabled\n  enabled: boolean;\n\n  // Visibility settings from backend TOOL_VISIBILITY_CONFIG\n  chat_selectable: boolean;\n  agent_creation_selectable: boolean;\n  default_enabled: boolean;\n}\n\nexport enum MCPAuthenticationType {\n  NONE = \"NONE\",\n  API_TOKEN = \"API_TOKEN\",\n  OAUTH = \"OAUTH\",\n  PT_OAUTH = \"PT_OAUTH\", // Pass-Through OAuth\n}\n\nexport enum MCPAuthenticationPerformer {\n  ADMIN = \"ADMIN\",\n  PER_USER = \"PER_USER\",\n}\n\nexport interface ApiResponse<T> {\n  data: T | null;\n  error: string | null;\n}\n\nexport interface OAuthConfig {\n  id: number;\n  name: string;\n  authorization_url: string;\n  token_url: string;\n  scopes: string[] | null;\n  has_client_credentials: boolean;\n  tool_count: number;\n  created_at: string;\n  updated_at: string;\n}\n\nexport enum MCPTransportType {\n  STDIO = \"STDIO\",\n  STREAMABLE_HTTP = \"STREAMABLE_HTTP\",\n  SSE = \"SSE\",\n}\n\nexport interface OAuthConfigCreate {\n  name: string;\n  authorization_url: string;\n  token_url: string;\n  client_id: string;\n  client_secret: string;\n  scopes?: string[];\n  additional_params?: Record<string, any>;\n}\n\nexport interface OAuthConfigUpdate {\n  name?: string;\n  authorization_url?: string;\n  token_url?: string;\n  client_id?: string;\n  client_secret?: string;\n  scopes?: string[];\n  additional_params?: Record<string, any>;\n}\n\nexport interface OAuthTokenStatus {\n  oauth_config_id: number;\n  expires_at: number | null;\n  is_expired: boolean;\n}\n"
  },
  {
    "path": "web/src/lib/tools/mcpService.ts",
    "content": "/**\n * Service layer for MCP (Model Context Protocol) related API calls\n */\n\nimport {\n  MCPServer,\n  MCPServerCreateRequest,\n  MCPServerUpdateRequest,\n  MCPServerStatus,\n  ApiResponse,\n  ToolSnapshot,\n  MCPAuthenticationType,\n  MCPAuthenticationPerformer,\n} from \"@/lib/tools/interfaces\";\nexport interface ToolStatusUpdateRequest {\n  tool_ids: number[];\n  enabled: boolean;\n}\n\nexport interface ToolStatusUpdateResponse {\n  updated_count: number;\n  tool_ids: number[];\n}\n\n/**\n * Delete an MCP server\n */\nexport async function deleteMCPServer(serverId: number): Promise<void> {\n  const response = await fetch(`/api/admin/mcp/server/${serverId}`, {\n    method: \"DELETE\",\n  });\n\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to delete MCP server\");\n  }\n}\n\n/**\n * This performs actual discovery from the MCP server and syncs to DB\n */\nexport async function refreshMCPServerTools(\n  serverId: number\n): Promise<ToolSnapshot[]> {\n  // Discovers tools from MCP server, upserts to DB, and returns ToolSnapshot format\n  const response = await fetch(\n    `/api/admin/mcp/server/${serverId}/tools/snapshots?source=mcp`\n  );\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to refresh tools\");\n  }\n\n  return await response.json();\n}\n\n/**\n * Update status (enable/disable) for one or more tools\n */\nexport async function updateToolsStatus(\n  toolIds: number[],\n  enabled: boolean\n): Promise<ToolStatusUpdateResponse> {\n  const response = await fetch(\"/api/admin/tool/status\", {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      tool_ids: toolIds,\n      enabled: enabled,\n    } as ToolStatusUpdateRequest),\n  });\n\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to update tool status\");\n  }\n\n  return await response.json();\n}\n\n/**\n * Update status for a single tool\n */\nexport async function updateToolStatus(\n  toolId: number,\n  enabled: boolean\n): Promise<ToolStatusUpdateResponse> {\n  return updateToolsStatus([toolId], enabled);\n}\n\n/**\n * Disable all tools for a specific MCP server\n */\nexport async function disableAllServerTools(\n  toolIds: number[]\n): Promise<ToolStatusUpdateResponse> {\n  return updateToolsStatus(toolIds, false);\n}\n\n/**\n * Create a new MCP server with basic information\n */\nexport async function createMCPServer(\n  data: MCPServerCreateRequest\n): Promise<MCPServer> {\n  const response = await fetch(\"/api/admin/mcp/server\", {\n    method: \"POST\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(data),\n  });\n\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to create MCP server\");\n  }\n\n  return await response.json();\n}\n\n/**\n * Update an existing MCP server\n */\nexport async function updateMCPServer(\n  serverId: number,\n  data: MCPServerUpdateRequest\n): Promise<MCPServer> {\n  const response = await fetch(`/api/admin/mcp/server/${serverId}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(data),\n  });\n\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to update MCP server\");\n  }\n\n  return await response.json();\n}\n\n/**\n * Update the status of an MCP server\n */\nexport async function updateMCPServerStatus(\n  serverId: number,\n  status: MCPServerStatus\n): Promise<void> {\n  const response = await fetch(\n    `/api/admin/mcp/server/${serverId}/status?status=${status}`,\n    {\n      method: \"PATCH\",\n    }\n  );\n\n  if (!response.ok) {\n    const errorText = await response.text();\n    throw new Error(errorText || \"Failed to update MCP server status\");\n  }\n}\n\ninterface UpsertMCPServerResponse {\n  server_id: number;\n  server_name: string;\n  server_url: string;\n  auth_type: string;\n  auth_performer: string;\n  is_authenticated: boolean;\n}\n\nexport async function upsertMCPServer(serverData: {\n  name: string;\n  description?: string;\n  server_url: string;\n  transport: string;\n  auth_type: MCPAuthenticationType;\n  auth_performer: MCPAuthenticationPerformer;\n  api_token?: string;\n  oauth_client_id?: string;\n  oauth_client_secret?: string;\n  auth_template?: any;\n  admin_credentials?: Record<string, string>;\n  existing_server_id?: number;\n}): Promise<ApiResponse<UpsertMCPServerResponse>> {\n  try {\n    const response = await fetch(\"/api/admin/mcp/servers/create\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify(serverData),\n    });\n\n    if (!response.ok) {\n      const errorDetail = (await response.json()).detail;\n      return {\n        data: null,\n        error: `Failed to create MCP server: ${errorDetail}`,\n      };\n    }\n\n    const result: UpsertMCPServerResponse = await response.json();\n    return { data: result, error: null };\n  } catch (error) {\n    console.error(\"Error creating MCP server:\", error);\n    return { data: null, error: `Error creating MCP server: ${error}` };\n  }\n}\n"
  },
  {
    "path": "web/src/lib/tools/mcpUtils.tsx",
    "content": "import { SOURCE_METADATA_MAP } from \"../sources\";\nimport { MCPServer } from \"./interfaces\";\nimport { DatabaseIcon, FileIcon } from \"@/components/icons/icons\";\nimport type { IconProps } from \"@opal/types\";\nimport { SvgServer } from \"@opal/icons\";\n\n/**\n * Get an appropriate icon for an MCP server based on its URL and name.\n * Leverages the existing SOURCE_METADATA_MAP for connector icons.\n */\nexport function getActionIcon(\n  serverUrl: string,\n  serverName: string\n): React.FunctionComponent<IconProps> {\n  const url = serverUrl.toLowerCase();\n  const name = serverName.toLowerCase();\n\n  for (const [sourceKey, metadata] of Object.entries(SOURCE_METADATA_MAP)) {\n    const keyword = sourceKey.toLowerCase();\n\n    if (url.includes(keyword) || name.includes(keyword)) {\n      const Icon = metadata.icon;\n      return Icon;\n    }\n  }\n\n  if (\n    url.includes(\"postgres\") ||\n    url.includes(\"mysql\") ||\n    url.includes(\"mongodb\") ||\n    url.includes(\"redis\")\n  ) {\n    return DatabaseIcon;\n  }\n  if (url.includes(\"filesystem\") || name.includes(\"file system\")) {\n    return FileIcon;\n  }\n\n  return SvgServer;\n}\n"
  },
  {
    "path": "web/src/lib/tools/openApiService.ts",
    "content": "import { MethodSpec, ApiResponse, ToolSnapshot } from \"@/lib/tools/interfaces\";\n\nconst SUPPORTED_HTTP_METHODS = new Set([\n  \"get\",\n  \"post\",\n  \"put\",\n  \"patch\",\n  \"delete\",\n  \"options\",\n  \"head\",\n]);\n\nconst isPlainRecord = (value: unknown): value is Record<string, any> =>\n  Boolean(value) && typeof value === \"object\" && !Array.isArray(value);\n\nexport function extractMethodSpecsFromDefinition(\n  definition?: Record<string, any> | null\n): MethodSpec[] {\n  if (!isPlainRecord(definition) || !isPlainRecord(definition.paths)) {\n    return [];\n  }\n\n  const pathEntries = Object.entries(definition.paths as Record<string, any>);\n  const methods: MethodSpec[] = [];\n\n  for (const [path, operations] of pathEntries) {\n    if (!isPlainRecord(operations)) {\n      continue;\n    }\n\n    for (const [methodName, spec] of Object.entries(operations)) {\n      if (!isPlainRecord(spec)) {\n        continue;\n      }\n\n      if (!SUPPORTED_HTTP_METHODS.has(methodName.toLowerCase())) {\n        continue;\n      }\n\n      const name = spec.operationId ?? spec.operationID;\n      const summary = spec.summary ?? spec.description;\n\n      if (!name || !summary) {\n        continue;\n      }\n\n      methods.push({\n        name,\n        summary,\n        path,\n        method: methodName.toUpperCase(),\n        spec,\n        custom_headers: [],\n      });\n    }\n  }\n\n  return methods;\n}\n\nexport async function validateToolDefinition(toolData: {\n  definition: Record<string, any>;\n}): Promise<ApiResponse<MethodSpec[]>> {\n  try {\n    const response = await fetch(`/api/admin/tool/custom/validate`, {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify(toolData),\n    });\n\n    if (!response.ok) {\n      const errorDetail = (await response.json()).detail;\n      return { data: null, error: errorDetail };\n    }\n\n    const responseJson = await response.json();\n    return { data: responseJson.methods, error: null };\n  } catch (error) {\n    console.error(\"Error validating tool:\", error);\n    return { data: null, error: \"Unexpected error validating tool definition\" };\n  }\n}\n\nexport async function createCustomTool(toolData: {\n  name: string;\n  description?: string;\n  definition: Record<string, any>;\n  custom_headers: { key: string; value: string }[];\n  passthrough_auth: boolean;\n}): Promise<ApiResponse<ToolSnapshot>> {\n  try {\n    const response = await fetch(\"/api/admin/tool/custom\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify(toolData),\n    });\n\n    if (!response.ok) {\n      const errorDetail = (await response.json()).detail;\n      return { data: null, error: `Failed to create tool: ${errorDetail}` };\n    }\n\n    const tool: ToolSnapshot = await response.json();\n    return { data: tool, error: null };\n  } catch (error) {\n    console.error(\"Error creating tool:\", error);\n    return { data: null, error: \"Error creating tool\" };\n  }\n}\n\ntype ToolUpdatePayload = {\n  name?: string;\n  description?: string;\n  definition?: Record<string, any>;\n  custom_headers?: { key: string; value: string }[] | null;\n  passthrough_auth?: boolean;\n  oauth_config_id?: number | null;\n};\n\nexport async function updateCustomTool(\n  toolId: number,\n  toolData: ToolUpdatePayload\n): Promise<ApiResponse<ToolSnapshot>> {\n  try {\n    const response = await fetch(`/api/admin/tool/custom/${toolId}`, {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify(toolData),\n    });\n\n    if (!response.ok) {\n      const errorDetail = (await response.json()).detail;\n      return { data: null, error: `Failed to update tool: ${errorDetail}` };\n    }\n\n    const updatedTool: ToolSnapshot = await response.json();\n    return { data: updatedTool, error: null };\n  } catch (error) {\n    console.error(\"Error updating tool:\", error);\n    return { data: null, error: \"Error updating tool\" };\n  }\n}\n\nexport async function deleteCustomTool(\n  toolId: number\n): Promise<ApiResponse<boolean>> {\n  try {\n    const response = await fetch(`/api/admin/tool/custom/${toolId}`, {\n      method: \"DELETE\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n    });\n\n    if (!response.ok) {\n      const errorDetail = (await response.json()).detail;\n      return { data: false, error: `Failed to delete tool: ${errorDetail}` };\n    }\n\n    return { data: true, error: null };\n  } catch (error) {\n    console.error(\"Error deleting tool:\", error);\n    return { data: false, error: \"Error deleting tool\" };\n  }\n}\n"
  },
  {
    "path": "web/src/lib/types.ts",
    "content": "import { Persona } from \"@/app/admin/agents/interfaces\";\nimport { Credential } from \"./connectors/credentials\";\nimport { Connector } from \"./connectors/connectors\";\nimport { ConnectorCredentialPairStatus } from \"@/app/admin/connector/[ccPairId]/types\";\n\nexport interface UserSpecificAgentPreference {\n  disabled_tool_ids?: number[];\n}\n\nexport type UserSpecificAgentPreferences = Record<\n  number,\n  UserSpecificAgentPreference\n>;\n\nexport enum ThemePreference {\n  LIGHT = \"light\",\n  DARK = \"dark\",\n  SYSTEM = \"system\",\n}\n\ninterface UserPreferences {\n  // TODO: rename to agent — https://linear.app/onyx-app/issue/ENG-3766\n  chosen_assistants: number[] | null;\n  visible_assistants: number[];\n  hidden_assistants: number[];\n  pinned_assistants?: number[];\n  default_model: string | null;\n  recent_assistants: number[];\n  auto_scroll: boolean;\n  shortcut_enabled: boolean;\n  temperature_override_enabled: boolean;\n  theme_preference: ThemePreference | null;\n  chat_background: string | null;\n  default_app_mode: \"AUTO\" | \"CHAT\" | \"SEARCH\";\n  // Voice preferences\n  voice_auto_send?: boolean;\n  voice_auto_playback?: boolean;\n  voice_playback_speed?: number;\n}\n\nexport interface MemoryItem {\n  id: number | null;\n  content: string;\n}\n\nexport interface UserPersonalization {\n  name: string;\n  role: string;\n  memories: MemoryItem[];\n  use_memories: boolean;\n  enable_memory_tool: boolean;\n  user_preferences: string;\n}\n\nexport enum AccountType {\n  STANDARD = \"STANDARD\",\n  BOT = \"BOT\",\n  EXT_PERM_USER = \"EXT_PERM_USER\",\n  SERVICE_ACCOUNT = \"SERVICE_ACCOUNT\",\n  ANONYMOUS = \"ANONYMOUS\",\n}\n\nexport enum UserRole {\n  LIMITED = \"limited\",\n  BASIC = \"basic\",\n  ADMIN = \"admin\",\n  CURATOR = \"curator\",\n  GLOBAL_CURATOR = \"global_curator\",\n  EXT_PERM_USER = \"ext_perm_user\",\n  SLACK_USER = \"slack_user\",\n}\n\nexport const USER_ROLE_LABELS: Record<UserRole, string> = {\n  [UserRole.BASIC]: \"Basic\",\n  [UserRole.ADMIN]: \"Admin\",\n  [UserRole.GLOBAL_CURATOR]: \"Global Curator\",\n  [UserRole.CURATOR]: \"Curator\",\n  [UserRole.LIMITED]: \"Limited\",\n  [UserRole.EXT_PERM_USER]: \"External Permissioned User\",\n  [UserRole.SLACK_USER]: \"Slack User\",\n};\n\nexport enum UserStatus {\n  ACTIVE = \"active\",\n  INACTIVE = \"inactive\",\n  INVITED = \"invited\",\n  REQUESTED = \"requested\",\n}\n\nexport const USER_STATUS_LABELS: Record<UserStatus, string> = {\n  [UserStatus.ACTIVE]: \"Active\",\n  [UserStatus.INACTIVE]: \"Inactive\",\n  [UserStatus.INVITED]: \"Invite Pending\",\n  [UserStatus.REQUESTED]: \"Request to Join\",\n};\n\nexport const INVALID_ROLE_HOVER_TEXT: Partial<Record<UserRole, string>> = {\n  [UserRole.BASIC]: \"Basic users can't perform any admin actions\",\n  [UserRole.ADMIN]: \"Admin users can perform all admin actions\",\n  [UserRole.GLOBAL_CURATOR]:\n    \"Global Curator users can perform admin actions for all groups they are a member of\",\n  [UserRole.CURATOR]: \"Curator role must be assigned in the Groups tab\",\n  [UserRole.SLACK_USER]:\n    \"This role is automatically assigned to users who only use Onyx via Slack\",\n};\n\nexport interface User {\n  id: string;\n  email: string;\n  is_active: boolean;\n  is_superuser: boolean;\n  is_verified: boolean;\n  role: UserRole;\n  preferences: UserPreferences;\n  current_token_created_at?: Date;\n  current_token_expiry_length?: number;\n  oidc_expiry?: Date;\n  is_cloud_superuser?: boolean;\n  team_name: string | null;\n  is_anonymous_user?: boolean;\n  // If user does not have a configured password\n  // (i.e.) they are using an oauth flow\n  // or are in a no-auth situation\n  // we don't want to show them things like the reset password\n  // functionality\n  password_configured?: boolean;\n  tenant_info?: TenantInfo | null;\n  personalization?: UserPersonalization;\n}\n\nexport interface TenantInfo {\n  new_tenant?: NewTenantInfo | null;\n  invitation?: NewTenantInfo | null;\n}\n\nexport interface NewTenantInfo {\n  tenant_id: string;\n  number_of_users: number;\n}\n\nexport interface AllUsersResponse {\n  accepted: User[];\n  invited: User[];\n  slack_users: User[];\n  accepted_pages: number;\n  invited_pages: number;\n  slack_users_pages: number;\n}\n\nexport interface AcceptedUserSnapshot {\n  id: string;\n  email: string;\n  role: UserRole;\n  is_active: boolean;\n}\n\nexport interface InvitedUserSnapshot {\n  email: string;\n}\n\nexport interface MinimalUserSnapshot {\n  id: string;\n  email: string;\n}\n\nexport type ValidInputTypes =\n  | \"load_state\"\n  | \"poll\"\n  | \"event\"\n  | \"slim_retrieval\";\nexport type ValidStatuses =\n  | \"invalid\"\n  | \"success\"\n  | \"completed_with_errors\"\n  | \"canceled\"\n  | \"failed\"\n  | \"in_progress\"\n  | \"not_started\";\nexport type TaskStatus = \"PENDING\" | \"STARTED\" | \"SUCCESS\" | \"FAILURE\";\nexport type Feedback = \"like\" | \"dislike\" | \"mixed\";\nexport type AccessType = \"public\" | \"private\" | \"sync\";\nexport type ProcessingMode = \"REGULAR\" | \"FILE_SYSTEM\";\nexport type SessionType = \"Chat\" | \"Search\" | \"Slack\";\n\nexport interface DocumentBoostStatus {\n  document_id: string;\n  semantic_id: string;\n  link: string;\n  boost: number;\n  hidden: boolean;\n}\n\nexport interface FailedConnectorIndexingStatus {\n  cc_pair_id: number;\n  name: string;\n  error_msg: string | null;\n  is_deletable: boolean;\n  connector_id: number;\n  credential_id: number;\n}\n\nexport interface IndexAttemptSnapshot {\n  id: number;\n  status: ValidStatuses | null;\n  from_beginning: boolean;\n  new_docs_indexed: number;\n  docs_removed_from_index: number;\n  total_docs_indexed: number;\n  error_msg: string | null;\n  error_count: number;\n  full_exception_trace: string | null;\n  time_started: string | null;\n  time_updated: string;\n}\n\nexport interface ConnectorStatus<ConnectorConfigType, ConnectorCredentialType> {\n  cc_pair_id: number;\n  name: string;\n  connector: Connector<ConnectorConfigType>;\n  credential: Credential<ConnectorCredentialType>;\n  access_type: AccessType;\n  groups: number[];\n}\n\nexport interface ConnectorIndexingStatus<\n  ConnectorConfigType,\n  ConnectorCredentialType,\n> extends ConnectorStatus<ConnectorConfigType, ConnectorCredentialType> {\n  // Inlcude data only necessary for indexing statuses in admin page\n  last_success: string | null;\n  last_status: ValidStatuses | null;\n  last_finished_status: ValidStatuses | null;\n  cc_pair_status: ConnectorCredentialPairStatus;\n  in_repeated_error_state: boolean;\n  latest_index_attempt: IndexAttemptSnapshot | null;\n  docs_indexed: number;\n}\n\nexport interface ConnectorIndexingStatusLite {\n  cc_pair_id: number;\n  name: string;\n  source: ValidSources;\n  access_type: AccessType;\n  in_progress: boolean;\n  cc_pair_status: ConnectorCredentialPairStatus;\n  last_finished_status: ValidStatuses | null;\n  last_status: ValidStatuses | null;\n  last_success: string | null;\n  is_editable: boolean;\n  docs_indexed: number;\n  in_repeated_error_state: boolean;\n  latest_index_attempt_docs_indexed: number | null;\n}\n\nexport interface FederatedConnectorStatus {\n  id: number;\n  source: ValidSources;\n  name: string;\n}\n\nexport interface SourceSummary {\n  total_connectors: number;\n  active_connectors: number;\n  public_connectors: number;\n  total_docs_indexed: number;\n}\n\nexport interface ConnectorIndexingStatusLiteResponse {\n  source: ValidSources;\n  summary: SourceSummary;\n  current_page: number;\n  total_pages: number;\n  indexing_statuses: (ConnectorIndexingStatusLite | FederatedConnectorStatus)[];\n}\n\nexport interface FederatedConnectorDetail {\n  id: number;\n  source: ValidSources.FederatedSlack;\n  name: string;\n  credentials: Record<string, any>;\n  config: Record<string, any>;\n  oauth_token_exists: boolean;\n  oauth_token_expires_at: string | null;\n  document_sets: Array<{\n    id: number;\n    name: string;\n    entities: Record<string, any>;\n  }>;\n}\n\nexport interface OAuthPrepareAuthorizationResponse {\n  url: string;\n}\n\nexport interface OAuthBaseCallbackResponse {\n  success: boolean;\n  message: string;\n  finalize_url: string | null;\n  redirect_on_success: string;\n}\n\nexport interface OAuthSlackCallbackResponse extends OAuthBaseCallbackResponse {\n  team_id: string;\n  authed_user_id: string;\n}\n\nexport interface ConfluenceAccessibleResource {\n  id: string;\n  name: string;\n  url: string;\n  scopes: string[];\n  avatarUrl: string;\n}\n\nexport interface OAuthConfluencePrepareFinalizationResponse {\n  success: boolean;\n  message: string;\n  accessible_resources: ConfluenceAccessibleResource[];\n}\n\nexport interface OAuthConfluenceFinalizeResponse {\n  success: boolean;\n  message: string;\n  redirect_url: string;\n}\n\nexport interface CCPairBasicInfo {\n  has_successful_run: boolean;\n  source: ValidSources;\n  status: ConnectorCredentialPairStatus;\n}\n\nexport type ConnectorSummary = {\n  count: number;\n  active: number;\n  public: number;\n  totalDocsIndexed: number;\n  errors: number; // New field for error count\n};\n\nexport type GroupedConnectorSummaries = Record<ValidSources, ConnectorSummary>;\n\n// DELETION\n\nexport interface DeletionAttemptSnapshot {\n  connector_id: number;\n  credential_id: number;\n  status: TaskStatus;\n}\n\n// DOCUMENT SETS\nexport interface CCPairDescriptor<ConnectorType, CredentialType> {\n  id: number;\n  name: string;\n  connector: Connector<ConnectorType>;\n  credential: Credential<CredentialType>;\n  access_type: AccessType;\n}\n\nexport interface FederatedConnectorConfig {\n  federated_connector_id: number;\n  entities: Record<string, any>;\n}\n\nexport interface FederatedConnectorDescriptor {\n  id: number;\n  name: string;\n  source: string;\n  entities: Record<string, any>;\n}\n\n// Simplified interfaces with minimal data\nexport interface CCPairSummary {\n  id: number;\n  name: string;\n  source: ValidSources;\n  access_type: AccessType;\n}\n\nexport interface FederatedConnectorSummary {\n  id: number;\n  name: string;\n  source: string;\n  entities: Record<string, any>;\n}\n\nexport interface DocumentSetSummary {\n  id: number;\n  name: string;\n  description: string;\n  cc_pair_summaries: CCPairSummary[];\n  is_up_to_date: boolean;\n  is_public: boolean;\n  users: string[];\n  groups: number[];\n  federated_connector_summaries: FederatedConnectorSummary[];\n}\n\nexport interface Tag {\n  tag_key: string;\n  tag_value: string;\n  source: ValidSources;\n}\n\n// STANDARD ANSWERS\nexport interface StandardAnswerCategory {\n  id: number;\n  name: string;\n}\n\nexport interface StandardAnswer {\n  id: number;\n  keyword: string;\n  answer: string;\n  match_regex: boolean;\n  match_any_keywords: boolean;\n  categories: StandardAnswerCategory[];\n}\n\n// SLACK BOT CONFIGS\n\nexport type AnswerFilterOption =\n  | \"well_answered_postfilter\"\n  | \"questionmark_prefilter\";\n\nexport interface ChannelConfig {\n  channel_name: string;\n  respond_tag_only?: boolean;\n  respond_to_bots?: boolean;\n  is_ephemeral?: boolean;\n  show_continue_in_web_ui?: boolean;\n  respond_member_group_list?: string[];\n  answer_filters?: AnswerFilterOption[];\n  follow_up_tags?: string[];\n  disabled?: boolean;\n}\n\nexport type SlackBotResponseType = \"quotes\" | \"citations\";\n\nexport interface SlackChannelConfig {\n  id: number;\n  slack_bot_id: number;\n  persona_id: number | null;\n  persona: Persona | null;\n  channel_config: ChannelConfig;\n  enable_auto_filters: boolean;\n  standard_answer_categories: StandardAnswerCategory[];\n  is_default: boolean;\n}\n\nexport interface SlackChannelDescriptor {\n  id: string;\n  name: string;\n}\n\nexport type SlackBot = {\n  id: number;\n  name: string;\n  enabled: boolean;\n  configs_count: number;\n  slack_channel_configs: Array<{\n    id: number;\n    is_default: boolean;\n    channel_config: {\n      channel_name: string;\n    };\n  }>;\n  bot_token: string;\n  app_token: string;\n  user_token?: string;\n};\n\nexport interface SlackBotTokens {\n  bot_token: string;\n  app_token: string;\n  user_token?: string;\n}\n\n/* EE Only Types */\nexport interface UserGroup {\n  id: number;\n  name: string;\n  users: User[];\n  curator_ids: string[];\n  cc_pairs: CCPairDescriptor<any, any>[];\n  document_sets: DocumentSetSummary[];\n  personas: Persona[];\n  is_up_to_date: boolean;\n  is_up_for_deletion: boolean;\n  is_default: boolean;\n}\n\nexport enum ValidSources {\n  Web = \"web\",\n  GitHub = \"github\",\n  GitLab = \"gitlab\",\n  Slack = \"slack\",\n  GoogleDrive = \"google_drive\",\n  Gmail = \"gmail\",\n  Bookstack = \"bookstack\",\n  Outline = \"outline\",\n  Confluence = \"confluence\",\n  Jira = \"jira\",\n  Productboard = \"productboard\",\n  Slab = \"slab\",\n  Coda = \"coda\",\n  Notion = \"notion\",\n  Guru = \"guru\",\n  Gong = \"gong\",\n  Zulip = \"zulip\",\n  Linear = \"linear\",\n  Hubspot = \"hubspot\",\n  Document360 = \"document360\",\n  File = \"file\",\n  UserFile = \"user_file\",\n  GoogleSites = \"google_sites\",\n  Loopio = \"loopio\",\n  Dropbox = \"dropbox\",\n  Discord = \"discord\",\n  Salesforce = \"salesforce\",\n  Sharepoint = \"sharepoint\",\n  Teams = \"teams\",\n  Zendesk = \"zendesk\",\n  Discourse = \"discourse\",\n  Axero = \"axero\",\n  Clickup = \"clickup\",\n  Wikipedia = \"wikipedia\",\n  Mediawiki = \"mediawiki\",\n  Asana = \"asana\",\n  S3 = \"s3\",\n  R2 = \"r2\",\n  GoogleCloudStorage = \"google_cloud_storage\",\n  Xenforo = \"xenforo\",\n  OciStorage = \"oci_storage\",\n  NotApplicable = \"not_applicable\",\n  IngestionApi = \"ingestion_api\",\n  Freshdesk = \"freshdesk\",\n  Fireflies = \"fireflies\",\n  Egnyte = \"egnyte\",\n  Airtable = \"airtable\",\n  Gitbook = \"gitbook\",\n  Highspot = \"highspot\",\n  DrupalWiki = \"drupal_wiki\",\n  Imap = \"imap\",\n  Bitbucket = \"bitbucket\",\n  TestRail = \"testrail\",\n\n  // Craft-specific sources\n  CraftFile = \"craft_file\",\n\n  // Federated Connectors\n  FederatedSlack = \"federated_slack\",\n}\n\nexport const federatedSourceToRegularSource = (\n  maybeFederatedSource: ValidSources\n): ValidSources => {\n  if (maybeFederatedSource === ValidSources.FederatedSlack) {\n    return ValidSources.Slack;\n  }\n  return maybeFederatedSource;\n};\n\nexport const validAutoSyncSources = [\n  ValidSources.Confluence,\n  ValidSources.Jira,\n  ValidSources.GoogleDrive,\n  ValidSources.Gmail,\n  ValidSources.Slack,\n  ValidSources.Salesforce,\n  ValidSources.GitHub,\n  ValidSources.Sharepoint,\n  ValidSources.Teams,\n] as const;\n\n// Create a type from the array elements\nexport type ValidAutoSyncSource = (typeof validAutoSyncSources)[number];\n\nexport type ConfigurableSources = Exclude<\n  ValidSources,\n  | ValidSources.NotApplicable\n  | ValidSources.IngestionApi\n  | ValidSources.FederatedSlack // is part of ValiedSources.Slack\n  | ValidSources.UserFile\n  | ValidSources.CraftFile // User Library - managed through dedicated UI\n>;\n\nexport const oauthSupportedSources: ConfigurableSources[] = [\n  ValidSources.Slack,\n  // NOTE: temporarily disabled until our GDrive App is approved\n  // ValidSources.GoogleDrive,\n  ValidSources.Confluence,\n];\n\nexport type OAuthSupportedSource = (typeof oauthSupportedSources)[number];\n\n// Federated Connector Types\nexport interface CredentialFieldSpec {\n  type: string;\n  description: string;\n  required: boolean;\n  default?: any;\n  example?: any;\n  secret: boolean;\n}\n\nexport interface ConfigurationFieldSpec {\n  type: string;\n  description: string;\n  required: boolean;\n  default?: any;\n  example?: any;\n  secret: boolean;\n  hidden_when?: Record<string, any>;\n}\n\nexport interface CredentialSchemaResponse {\n  credentials: Record<string, CredentialFieldSpec>;\n}\n\nexport interface ConfigurationSchemaResponse {\n  configuration: Record<string, ConfigurationFieldSpec>;\n}\n\nexport interface FederatedConnectorCreateRequest {\n  source: string;\n  credentials: Record<string, any>;\n  config?: Record<string, any>;\n}\n\nexport interface FederatedConnectorCreateResponse {\n  id: number;\n  source: string;\n}\n\nexport interface IndexingStatusRequest {\n  secondary_index?: boolean;\n  access_type_filters?: string[];\n  last_status_filters?: string[];\n  docs_count_operator?: \">\" | \"<\" | \"=\" | null;\n  docs_count_value?: number | null;\n  source_to_page?: Record<ValidSources, number>;\n  source?: ValidSources;\n  get_all_connectors?: boolean;\n}\n"
  },
  {
    "path": "web/src/lib/typingUtils.ts",
    "content": "import { useEffect } from \"react\";\n\ntype Handler = (event: React.KeyboardEvent) => void;\n\nexport function handleKeyPress(\n  requestedKey: string,\n  callback?: Handler,\n  passthrough?: Handler\n): Handler {\n  return (event) => {\n    const func = event.key === requestedKey ? callback : passthrough;\n    func?.(event);\n  };\n}\n\nexport function handleEnterPress(\n  callback?: Handler,\n  passthrough?: Handler\n): Handler {\n  return handleKeyPress(\"Enter\", callback, passthrough);\n}\n\nexport function useEscapePress(callback: () => void, enabled?: boolean) {\n  useEffect(() => {\n    if (!enabled) return;\n\n    const handleEscape = (event: KeyboardEvent) => {\n      if (event.key === \"Escape\") {\n        callback();\n      }\n    };\n\n    document.addEventListener(\"keydown\", handleEscape);\n    return () => {\n      document.removeEventListener(\"keydown\", handleEscape);\n    };\n  }, [callback, enabled]);\n}\n"
  },
  {
    "path": "web/src/lib/updateSlackBotField.ts",
    "content": "import { SlackBot } from \"@/lib/types\";\n\nexport async function updateSlackBotField(\n  slackBot: SlackBot,\n  field: keyof SlackBot,\n  value: any\n): Promise<Response> {\n  return fetch(`/api/manage/admin/slack-app/bots/${slackBot.id}`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({\n      ...slackBot,\n      [field]: value,\n    }),\n  });\n}\n"
  },
  {
    "path": "web/src/lib/urlBuilder.ts",
    "content": "type QueryParams = {\n  [key: string]: string | number | boolean | null | undefined;\n};\n\nexport function buildApiPath(base: string, params?: QueryParams): string {\n  let queryString = \"\";\n  if (params) {\n    const entries = Object.entries(params)\n      .filter(([key, value]) => value !== null && value !== undefined)\n      .map(\n        ([key, value]) =>\n          `${encodeURIComponent(key)}=${encodeURIComponent(value!.toString())}`\n      );\n\n    if (entries.length > 0) {\n      queryString = `?${entries.join(\"&\")}`;\n    }\n  }\n\n  return `${base}${queryString}`;\n}\n"
  },
  {
    "path": "web/src/lib/user.test.ts",
    "content": "import { getUserInitials } from \"@/lib/user\";\n\ndescribe(\"getUserInitials\", () => {\n  it(\"returns first letters of first two name parts\", () => {\n    expect(getUserInitials(\"Alice Smith\", \"alice@example.com\")).toBe(\"AS\");\n  });\n\n  it(\"returns first two chars of a single-word name\", () => {\n    expect(getUserInitials(\"Alice\", \"alice@example.com\")).toBe(\"AL\");\n  });\n\n  it(\"handles three-word names (uses first two)\", () => {\n    expect(getUserInitials(\"Alice B. Smith\", \"alice@example.com\")).toBe(\"AB\");\n  });\n\n  it(\"falls back to email local part with dot separator\", () => {\n    expect(getUserInitials(null, \"alice.smith@example.com\")).toBe(\"AS\");\n  });\n\n  it(\"falls back to email local part with underscore separator\", () => {\n    expect(getUserInitials(null, \"alice_smith@example.com\")).toBe(\"AS\");\n  });\n\n  it(\"falls back to email local part with hyphen separator\", () => {\n    expect(getUserInitials(null, \"alice-smith@example.com\")).toBe(\"AS\");\n  });\n\n  it(\"uses first two chars of email local if no separator\", () => {\n    expect(getUserInitials(null, \"alice@example.com\")).toBe(\"AL\");\n  });\n\n  it(\"returns null for empty email local part\", () => {\n    expect(getUserInitials(null, \"@example.com\")).toBeNull();\n  });\n\n  it(\"uppercases the result\", () => {\n    expect(getUserInitials(\"john doe\", \"jd@test.com\")).toBe(\"JD\");\n  });\n\n  it(\"trims whitespace from name\", () => {\n    expect(getUserInitials(\"  Alice Smith  \", \"a@test.com\")).toBe(\"AS\");\n  });\n\n  it(\"returns null for numeric name parts\", () => {\n    expect(getUserInitials(\"Alice 1st\", \"x@test.com\")).toBeNull();\n  });\n\n  it(\"returns null for numeric email\", () => {\n    expect(getUserInitials(null, \"42@domain.com\")).toBeNull();\n  });\n\n  it(\"falls back to email when name has non-alpha chars\", () => {\n    expect(getUserInitials(\"A1\", \"alice@example.com\")).toBe(\"AL\");\n  });\n});\n"
  },
  {
    "path": "web/src/lib/user.ts",
    "content": "import { User } from \"@/lib/types\";\n\nexport const checkUserIsNoAuthUser = (userId: string) => {\n  return userId === \"__no_auth_user__\";\n};\n\nexport const getCurrentUser = async (): Promise<User | null> => {\n  const response = await fetch(\"/api/me\", {\n    credentials: \"include\",\n  });\n  if (!response.ok) {\n    return null;\n  }\n  const user = await response.json();\n  return user;\n};\n\nexport const logout = async (): Promise<Response> => {\n  const response = await fetch(\"/auth/logout\", {\n    method: \"POST\",\n    credentials: \"include\",\n  });\n  return response;\n};\n\nexport const basicLogin = async (\n  email: string,\n  password: string\n): Promise<Response> => {\n  const params = new URLSearchParams([\n    [\"username\", email],\n    [\"password\", password],\n  ]);\n\n  const response = await fetch(\"/api/auth/login\", {\n    method: \"POST\",\n    credentials: \"include\",\n    headers: {\n      \"Content-Type\": \"application/x-www-form-urlencoded\",\n    },\n    body: params,\n  });\n  return response;\n};\n\nexport const basicSignup = async (\n  email: string,\n  password: string,\n  referralSource?: string,\n  captchaToken?: string\n) => {\n  const headers: Record<string, string> = {\n    \"Content-Type\": \"application/json\",\n  };\n\n  // Add captcha token to headers if provided\n  if (captchaToken) {\n    headers[\"X-Captcha-Token\"] = captchaToken;\n  }\n\n  const response = await fetch(\"/api/auth/register\", {\n    method: \"POST\",\n    credentials: \"include\",\n    headers,\n    body: JSON.stringify({\n      email,\n      username: email,\n      password,\n      referral_source: referralSource,\n      captcha_token: captchaToken,\n    }),\n  });\n  return response;\n};\n\nexport interface CustomRefreshTokenResponse {\n  access_token: string;\n  refresh_token: string;\n  session: {\n    exp: number;\n  };\n  userinfo: {\n    sub: string;\n    familyName: string;\n    givenName: string;\n    fullName: string;\n    userId: string;\n    email: string;\n  };\n}\n\nexport async function refreshToken(\n  customRefreshUrl: string\n): Promise<CustomRefreshTokenResponse | null> {\n  try {\n    console.debug(\"Sending request to custom refresh URL\");\n    // support both absolute and relative\n    const url = customRefreshUrl.startsWith(\"http\")\n      ? new URL(customRefreshUrl)\n      : new URL(customRefreshUrl, window.location.origin);\n    url.searchParams.append(\"info\", \"json\");\n    url.searchParams.append(\"access_token_refresh_interval\", \"3600\");\n\n    const response = await fetch(url.toString());\n    if (!response.ok) {\n      console.error(`Failed to refresh token: ${await response.text()}`);\n      return null;\n    }\n\n    return await response.json();\n  } catch (error) {\n    console.error(\"Error refreshing token:\", error);\n    throw error;\n  }\n}\n\nexport function getUserDisplayName(user: User | null): string {\n  // Prioritize custom personal name, if set.\n  if (!!user?.personalization?.name) return user.personalization.name;\n\n  // Then, prioritize personal email.\n  if (!!user?.email) {\n    const atIndex = user.email.indexOf(\"@\");\n    if (atIndex > 0) {\n      return user.email.substring(0, atIndex);\n    }\n  }\n\n  // If nothing works, then fall back to anonymous user name\n  return \"Anonymous\";\n}\n\nexport function getUserEmail(user: User | null): string {\n  // Prioritize personal email.\n  if (!!user?.email) return user.email;\n\n  // If nothing works, then fall back to anonymous email.\n  return \"anonymous@email.com\";\n}\n\n/**\n * Derive display initials from a user's name or email.\n *\n * - If a name is provided, uses the first letter of the first two words.\n * - Falls back to the email local part, splitting on `.`, `_`, or `-`.\n * - Returns `null` when no valid alpha initials can be derived.\n */\nexport function getUserInitials(\n  name: string | null,\n  email: string\n): string | null {\n  if (name) {\n    const words = name.trim().split(/\\s+/);\n    if (words.length >= 2) {\n      const first = words[0]?.[0];\n      const second = words[1]?.[0];\n      if (first && second) {\n        const result = (first + second).toUpperCase();\n        if (/^[A-Z]{2}$/.test(result)) return result;\n      }\n      return null;\n    }\n    if (name.trim().length >= 1) {\n      const result = name.trim().slice(0, 2).toUpperCase();\n      if (/^[A-Z]{1,2}$/.test(result)) return result;\n    }\n  }\n\n  const local = email.split(\"@\")[0];\n  if (!local || local.length === 0) return null;\n  const parts = local.split(/[._-]/);\n  if (parts.length >= 2) {\n    const first = parts[0]?.[0];\n    const second = parts[1]?.[0];\n    if (first && second) {\n      const result = (first + second).toUpperCase();\n      if (/^[A-Z]{2}$/.test(result)) return result;\n    }\n    return null;\n  }\n  if (local.length >= 2) {\n    const result = local.slice(0, 2).toUpperCase();\n    if (/^[A-Z]{2}$/.test(result)) return result;\n  }\n  if (local.length === 1) {\n    const result = local.toUpperCase();\n    if (/^[A-Z]$/.test(result)) return result;\n  }\n  return null;\n}\n"
  },
  {
    "path": "web/src/lib/userSS.ts",
    "content": "import { cookies } from \"next/headers\";\nimport { User } from \"./types\";\nimport { buildUrl, UrlBuilder } from \"./utilsSS\";\nimport { ReadonlyRequestCookies } from \"next/dist/server/web/spec-extension/adapters/request-cookies\";\nimport { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from \"./constants\";\n\nexport interface AuthTypeMetadata {\n  authType: AuthType;\n  autoRedirect: boolean;\n  requiresVerification: boolean;\n  anonymousUserEnabled: boolean | null;\n  passwordMinLength: number;\n  hasUsers: boolean;\n  oauthEnabled: boolean;\n}\n\nexport const getAuthTypeMetadataSS = async (): Promise<AuthTypeMetadata> => {\n  const res = await fetch(buildUrl(\"/auth/type\"));\n  if (!res.ok) {\n    throw new Error(\"Failed to fetch data\");\n  }\n\n  const data: {\n    auth_type: string;\n    requires_verification: boolean;\n    anonymous_user_enabled: boolean | null;\n    password_min_length: number;\n    has_users: boolean;\n    oauth_enabled: boolean;\n  } = await res.json();\n\n  let authType: AuthType;\n\n  // Override fastapi users auth so we can use both\n  if (NEXT_PUBLIC_CLOUD_ENABLED) {\n    authType = AuthType.CLOUD;\n  } else {\n    authType = data.auth_type as AuthType;\n  }\n\n  // for SAML / OIDC, we auto-redirect the user to the IdP when the user visits\n  // Onyx in an un-authenticated state\n  if (authType === AuthType.OIDC || authType === AuthType.SAML) {\n    return {\n      authType,\n      autoRedirect: true,\n      requiresVerification: data.requires_verification,\n      anonymousUserEnabled: data.anonymous_user_enabled,\n      passwordMinLength: data.password_min_length,\n      hasUsers: data.has_users,\n      oauthEnabled: data.oauth_enabled,\n    };\n  }\n  return {\n    authType,\n    autoRedirect: false,\n    requiresVerification: data.requires_verification,\n    anonymousUserEnabled: data.anonymous_user_enabled,\n    passwordMinLength: data.password_min_length,\n    hasUsers: data.has_users,\n    oauthEnabled: data.oauth_enabled,\n  };\n};\n\nconst getOIDCAuthUrlSS = async (nextUrl: string | null): Promise<string> => {\n  const url = UrlBuilder.fromClientUrl(\"/api/auth/oidc/authorize\");\n  if (nextUrl) {\n    url.addParam(\"next\", nextUrl);\n  }\n  url.addParam(\"redirect\", true);\n\n  return url.toString();\n};\n\nconst getGoogleOAuthUrlSS = async (nextUrl: string | null): Promise<string> => {\n  const url = UrlBuilder.fromClientUrl(\"/api/auth/oauth/authorize\");\n  if (nextUrl) {\n    url.addParam(\"next\", nextUrl);\n  }\n  url.addParam(\"redirect\", true);\n\n  return url.toString();\n};\n\nconst getSAMLAuthUrlSS = async (nextUrl: string | null): Promise<string> => {\n  const url = UrlBuilder.fromInternalUrl(\"/auth/saml/authorize\");\n  if (nextUrl) {\n    url.addParam(\"next\", nextUrl);\n  }\n\n  const res = await fetch(url.toString());\n  if (!res.ok) {\n    throw new Error(\"Failed to fetch data\");\n  }\n\n  const data: { authorization_url: string } = await res.json();\n  return data.authorization_url;\n};\n\nexport const getAuthUrlSS = async (\n  authType: AuthType,\n  nextUrl: string | null\n): Promise<string> => {\n  // Returns the auth url for the given auth type\n\n  switch (authType) {\n    case AuthType.BASIC:\n      return \"\";\n    case AuthType.GOOGLE_OAUTH: {\n      return await getGoogleOAuthUrlSS(nextUrl);\n    }\n    case AuthType.CLOUD: {\n      return await getGoogleOAuthUrlSS(nextUrl);\n    }\n    case AuthType.SAML: {\n      return await getSAMLAuthUrlSS(nextUrl);\n    }\n    case AuthType.OIDC: {\n      return await getOIDCAuthUrlSS(nextUrl);\n    }\n  }\n};\n\nconst logoutStandardSS = async (headers: Headers): Promise<Response> => {\n  return await fetch(buildUrl(\"/auth/logout\"), {\n    method: \"POST\",\n    headers: headers,\n  });\n};\n\nconst logoutSAMLSS = async (headers: Headers): Promise<Response> => {\n  return await fetch(buildUrl(\"/auth/saml/logout\"), {\n    method: \"POST\",\n    headers: headers,\n  });\n};\n\nexport const logoutSS = async (\n  authType: AuthType,\n  headers: Headers\n): Promise<Response | null> => {\n  switch (authType) {\n    case AuthType.SAML: {\n      return await logoutSAMLSS(headers);\n    }\n    default: {\n      return await logoutStandardSS(headers);\n    }\n  }\n};\n\nexport const getCurrentUserSS = async (): Promise<User | null> => {\n  try {\n    const cookieString = processCookies(await cookies());\n\n    const response = await fetch(buildUrl(\"/me\"), {\n      credentials: \"include\",\n      next: { revalidate: 0 },\n      headers: {\n        cookie: cookieString,\n      },\n    });\n\n    if (!response.ok) {\n      return null;\n    }\n\n    const user = await response.json();\n    return user;\n  } catch (e) {\n    console.log(`Error fetching user: ${e}`);\n    return null;\n  }\n};\n\nexport const processCookies = (cookies: ReadonlyRequestCookies): string => {\n  let cookieString = cookies\n    .getAll()\n    .map((cookie) => `${cookie.name}=${cookie.value}`)\n    .join(\"; \");\n\n  // Inject debug auth cookie for local development against remote backend (only if not already present)\n  if (process.env.DEBUG_AUTH_COOKIE && process.env.NODE_ENV === \"development\") {\n    const hasAuthCookie = cookieString\n      .split(/;\\s*/)\n      .some((c) => c.startsWith(\"fastapiusersauth=\"));\n    if (!hasAuthCookie) {\n      const debugCookie = `fastapiusersauth=${process.env.DEBUG_AUTH_COOKIE}`;\n      cookieString = cookieString\n        ? `${cookieString}; ${debugCookie}`\n        : debugCookie;\n    }\n  }\n\n  return cookieString;\n};\n"
  },
  {
    "path": "web/src/lib/userSettings.ts",
    "content": "import { UserPersonalization } from \"@/lib/types\";\n\nexport async function setUserDefaultModel(\n  model: string | null\n): Promise<Response> {\n  const response = await fetch(`/api/user/default-model`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify({ default_model: model }),\n  });\n\n  return response;\n}\n\n/**\n * Update the current user's personalization settings.\n */\nexport async function updateUserPersonalization(\n  personalization: Partial<UserPersonalization>\n): Promise<Response> {\n  return fetch(`/api/user/personalization`, {\n    method: \"PATCH\",\n    headers: {\n      \"Content-Type\": \"application/json\",\n    },\n    body: JSON.stringify(personalization),\n  });\n}\n"
  },
  {
    "path": "web/src/lib/utils.test.ts",
    "content": "import { ensureHrefProtocol, transformLinkUri } from \"./utils\";\n\ndescribe(\"ensureHrefProtocol\", () => {\n  it(\"adds https protocol to bare domains\", () => {\n    expect(ensureHrefProtocol(\"anthropic.com\")).toBe(\"https://anthropic.com\");\n  });\n\n  it(\"preserves links that already include a protocol\", () => {\n    expect(ensureHrefProtocol(\"https://anthropic.com\")).toBe(\n      \"https://anthropic.com\"\n    );\n    expect(ensureHrefProtocol(\"mailto:support@anthropic.com\")).toBe(\n      \"mailto:support@anthropic.com\"\n    );\n  });\n\n  it(\"converts bare email addresses to mailto links\", () => {\n    expect(ensureHrefProtocol(\"support@anthropic.com\")).toBe(\n      \"mailto:support@anthropic.com\"\n    );\n  });\n});\n\ndescribe(\"transformLinkUri\", () => {\n  it(\"allows safe protocols\", () => {\n    expect(transformLinkUri(\"https://anthropic.com\")).toBe(\n      \"https://anthropic.com\"\n    );\n    expect(transformLinkUri(\"mailto:support@anthropic.com\")).toBe(\n      \"mailto:support@anthropic.com\"\n    );\n  });\n\n  it(\"converts bare email addresses to mailto links\", () => {\n    expect(transformLinkUri(\"support@anthropic.com\")).toBe(\n      \"mailto:support@anthropic.com\"\n    );\n  });\n\n  it(\"blocks unsafe protocols\", () => {\n    expect(transformLinkUri(\"javascript:alert(1)\")).toBeNull();\n  });\n});\n"
  },
  {
    "path": "web/src/lib/utils.ts",
    "content": "import type { ComponentType } from \"react\";\nimport { clsx, type ClassValue } from \"clsx\";\nimport { twMerge } from \"tailwind-merge\";\nimport type { IconProps } from \"@opal/types\";\nimport {\n  SvgImage,\n  SvgFileChartPie,\n  SvgFileBraces,\n  SvgFileText,\n} from \"@opal/icons\";\nimport { ALLOWED_URL_PROTOCOLS } from \"./constants\";\n\nconst URI_SCHEME_REGEX = /^[a-zA-Z][a-zA-Z\\d+.-]*:/;\nconst BARE_EMAIL_REGEX = /^[^\\s@/]+@[^\\s@/:]+\\.[^\\s@/:]+$/;\n\nexport const INTERACTIVE_SELECTOR =\n  \"a, button, input, textarea, select, label, [role='button'], [tabindex]:not([tabindex='-1']), [contenteditable]:not([contenteditable='false'])\";\n\nexport function cn(...inputs: ClassValue[]) {\n  return twMerge(clsx(inputs));\n}\n\nexport const truncateString = (str: string, maxLength: number) => {\n  return str.length > maxLength ? str.slice(0, maxLength - 1) + \"...\" : str;\n};\n\n/**\n * Ensures an href has a protocol, adding https:// only to bare domains.\n * Converts bare email addresses to mailto: links.\n * Preserves existing protocols, relative paths, and anchors.\n */\nexport function ensureHrefProtocol(\n  href: string | undefined\n): string | undefined {\n  if (!href) return href;\n  const trimmedHref = href.trim();\n  if (!trimmedHref) return href;\n\n  const needsProtocol =\n    !URI_SCHEME_REGEX.test(trimmedHref) &&\n    !trimmedHref.startsWith(\"/\") &&\n    !trimmedHref.startsWith(\"#\");\n  if (!needsProtocol) {\n    return trimmedHref;\n  }\n\n  if (BARE_EMAIL_REGEX.test(trimmedHref)) {\n    return `mailto:${trimmedHref}`;\n  }\n\n  return `https://${trimmedHref}`;\n}\n\n/**\n * Custom URL transformer function for ReactMarkdown.\n * Only allows a small, safe set of protocols and strips everything else.\n * Bare email addresses are normalized to mailto: links.\n * Returning null removes the href attribute entirely.\n */\nexport function transformLinkUri(href: string): string | null {\n  if (!href) return null;\n\n  const trimmedHref = href.trim();\n  if (!trimmedHref) return null;\n\n  try {\n    const parsedUrl = new URL(trimmedHref);\n    const protocol = parsedUrl.protocol.toLowerCase();\n\n    if (ALLOWED_URL_PROTOCOLS.some((allowed) => allowed === protocol)) {\n      return trimmedHref;\n    }\n\n    return null;\n  } catch {\n    if (BARE_EMAIL_REGEX.test(trimmedHref)) {\n      return `mailto:${trimmedHref}`;\n    }\n\n    // Allow relative URLs, but drop anything that looks like a protocol-prefixed link\n    if (URI_SCHEME_REGEX.test(trimmedHref)) {\n      return null;\n    }\n\n    return trimmedHref;\n  }\n}\n\nexport function isSubset(parent: string[], child: string[]): boolean {\n  const parentSet = new Set(parent);\n  return Array.from(new Set(child)).every((item) => parentSet.has(item));\n}\n\nexport function trinaryLogic<T>(\n  a: boolean | undefined,\n  b: boolean,\n  ifTrue: T,\n  ifFalse: T\n): T {\n  const condition = a !== undefined ? a : b;\n  return condition ? ifTrue : ifFalse;\n}\n\n// A convenience function to prevent propagation of click events to items higher up in the DOM tree.\n//\n// # Note:\n// This is a desired behaviour in MANY locations, since we have buttons nested within buttons.\n// When the nested button is pressed, the click event that triggered it should (in most scenarios) NOT trigger its parent button!\nexport function noProp(\n  f?: (event: React.MouseEvent) => void\n): React.MouseEventHandler {\n  return (event) => {\n    event.stopPropagation();\n    f?.(event);\n  };\n}\n\n/**\n * Extracts the file extension from a filename and returns it in uppercase.\n * Returns an empty string if no valid extension is found.\n */\nexport function getFileExtension(fileName: string): string {\n  const idx = fileName.lastIndexOf(\".\");\n  if (idx === -1) return \"\";\n  const ext = fileName.slice(idx + 1).toLowerCase();\n  if (ext === \"txt\") return \"PLAINTEXT\";\n  return ext.toUpperCase();\n}\n\n/**\n * Centralized list of image file extensions (lowercase, no leading dots)\n */\nexport const IMAGE_EXTENSIONS = [\n  \"png\",\n  \"jpg\",\n  \"jpeg\",\n  \"gif\",\n  \"webp\",\n  \"svg\",\n  \"bmp\",\n] as const;\n\nexport type ImageExtension = (typeof IMAGE_EXTENSIONS)[number];\n\n/**\n * Checks whether a provided extension string corresponds to an image extension.\n * Accepts values with any casing and without a leading dot.\n */\nexport function isImageExtension(\n  extension: string | null | undefined\n): boolean {\n  if (!extension) {\n    return false;\n  }\n  const normalized = extension.toLowerCase();\n  return (IMAGE_EXTENSIONS as readonly string[]).includes(normalized);\n}\n\n/**\n * Formats bytes to human-readable file size.\n */\nexport function formatBytes(\n  bytes: number | undefined,\n  decimals: number = 2\n): string {\n  if (bytes == null) return \"Unknown\";\n  if (bytes === 0) return \"0 Bytes\";\n\n  const k = 1024;\n  const dm = decimals < 0 ? 0 : decimals;\n  const sizes = [\"Bytes\", \"KB\", \"MB\", \"GB\", \"TB\"];\n\n  let unitIndex = Math.floor(Math.log(bytes) / Math.log(k));\n  if (unitIndex < 0) unitIndex = 0;\n  if (unitIndex >= sizes.length) unitIndex = sizes.length - 1;\n  return (\n    parseFloat((bytes / Math.pow(k, unitIndex)).toFixed(dm)) +\n    \" \" +\n    sizes[unitIndex]\n  );\n}\n\n/**\n * Checks if a filename represents an image file based on its extension.\n */\nexport function isImageFile(fileName: string | null | undefined): boolean {\n  if (!fileName) return false;\n  const lowerFileName = String(fileName).toLowerCase();\n  return IMAGE_EXTENSIONS.some((ext) => lowerFileName.endsWith(`.${ext}`));\n}\n\n/**\n * Typical code/config file extensions (lowercase, no leading dots)\n */\nexport const CODE_EXTENSIONS = [\n  \"ts\",\n  \"tsx\",\n  \"js\",\n  \"jsx\",\n  \"mjs\",\n  \"cjs\",\n  \"py\",\n  \"pyw\",\n  \"java\",\n  \"kt\",\n  \"kts\",\n  \"c\",\n  \"h\",\n  \"cpp\",\n  \"cc\",\n  \"cxx\",\n  \"hpp\",\n  \"cs\",\n  \"go\",\n  \"rs\",\n  \"rb\",\n  \"php\",\n  \"swift\",\n  \"scala\",\n  \"r\",\n  \"sql\",\n  \"sh\",\n  \"bash\",\n  \"zsh\",\n  \"yaml\",\n  \"yml\",\n  \"json\",\n  \"xml\",\n  \"html\",\n  \"htm\",\n  \"css\",\n  \"scss\",\n  \"sass\",\n  \"less\",\n  \"lua\",\n  \"pl\",\n  \"vue\",\n  \"svelte\",\n  \"m\",\n  \"mm\",\n  \"md\",\n  \"markdown\",\n] as const;\n\n/**\n * Checks if a filename represents a code/config file based on its extension.\n */\nexport function isCodeFile(fileName: string | null | undefined): boolean {\n  if (!fileName) return false;\n  const lowerFileName = String(fileName).toLowerCase();\n  return CODE_EXTENSIONS.some((ext) => lowerFileName.endsWith(`.${ext}`));\n}\n\n/**\n * Returns the icon component for a file based on its name/path.\n * Used for file tree and preview tab icons.\n */\nexport function getFileIcon(\n  fileName: string | null | undefined\n): ComponentType<IconProps> {\n  if (!fileName) return SvgFileText;\n  if (isImageFile(fileName)) return SvgImage;\n  if (/\\.pptx$/i.test(fileName)) return SvgFileChartPie;\n  if (/\\.pdf$/i.test(fileName)) return SvgFileText;\n  if (isCodeFile(fileName)) return SvgFileBraces;\n  return SvgFileText;\n}\n\n/**\n * Checks if a collection of files contains any non-image files.\n * Useful for determining whether image previews should be compact.\n */\nexport function hasNonImageFiles(\n  files: Array<{ name?: string | null }>\n): boolean {\n  return files.some((file) => !isImageFile(file.name));\n}\n\n/**\n * Merges multiple refs into a single callback ref.\n * Useful when a component needs both an internal ref and a forwarded ref.\n */\nexport function mergeRefs<T>(\n  ...refs: (React.Ref<T> | undefined)[]\n): React.RefCallback<T> {\n  return (node: T | null) => {\n    refs.forEach((ref) => {\n      if (typeof ref === \"function\") {\n        ref(node);\n      } else if (ref) {\n        (ref as React.MutableRefObject<T | null>).current = node;\n      }\n    });\n  };\n}\n"
  },
  {
    "path": "web/src/lib/utilsSS.ts",
    "content": "import { cookies } from \"next/headers\";\nimport { HOST_URL, INTERNAL_URL } from \"./constants\";\nimport { processCookies } from \"@/lib/userSS\";\n\nexport function buildClientUrl(path: string) {\n  if (path.startsWith(\"/\")) {\n    return `${HOST_URL}${path}`;\n  }\n  return `${HOST_URL}/${path}`;\n}\n\nexport function buildUrl(path: string) {\n  if (path.startsWith(\"/\")) {\n    return `${INTERNAL_URL}${path}`;\n  }\n  return `${INTERNAL_URL}/${path}`;\n}\n\nexport class UrlBuilder {\n  private url: URL;\n\n  constructor(baseUrl: string) {\n    try {\n      this.url = new URL(baseUrl);\n    } catch {\n      // Handle relative URLs by prepending a base\n      this.url = new URL(baseUrl, \"http://placeholder.com\");\n    }\n  }\n\n  addParam(key: string, value: string | number | boolean): UrlBuilder {\n    this.url.searchParams.set(key, String(value));\n    return this;\n  }\n\n  addParams(params: Record<string, string | number | boolean>): UrlBuilder {\n    Object.entries(params).forEach(([key, value]) => {\n      this.url.searchParams.set(key, String(value));\n    });\n    return this;\n  }\n\n  toString(): string {\n    // Extract just the path and query parts for relative URLs\n    if (this.url.origin === \"http://placeholder.com\") {\n      return `${this.url.pathname}${this.url.search}`;\n    }\n    return this.url.toString();\n  }\n\n  static fromInternalUrl(path: string): UrlBuilder {\n    return new UrlBuilder(buildUrl(path));\n  }\n\n  static fromClientUrl(path: string): UrlBuilder {\n    return new UrlBuilder(buildClientUrl(path));\n  }\n}\n\nexport async function fetchSS(url: string, options?: RequestInit) {\n  const cookieString = processCookies(await cookies());\n\n  const init: RequestInit = {\n    credentials: \"include\",\n    cache: \"no-store\",\n    ...options,\n    headers: {\n      ...options?.headers,\n      cookie: cookieString,\n    },\n  };\n\n  return fetch(buildUrl(url), init);\n}\n"
  },
  {
    "path": "web/src/lib/version.ts",
    "content": "import { buildUrl } from \"./utilsSS\";\n\n// Maybe improve type-safety by creating a 'VersionType' instead of generic string\nexport const getBackendVersion = async (): Promise<string | null> => {\n  try {\n    const res = await fetch(buildUrl(\"/version\"));\n    if (!res.ok) {\n      //throw new Error(\"Failed to fetch data\");\n      return null;\n    }\n\n    const data: { backend_version: string } = await res.json();\n    return data.backend_version as string;\n  } catch (e) {\n    console.log(`Error fetching backend version info: ${e}`);\n    return null;\n  }\n};\n\n// Frontend?\nexport const getWebVersion = (): string | null => {\n  return process.env.ONYX_VERSION || \"dev\";\n};\n"
  },
  {
    "path": "web/src/providers/AppBackgroundProvider.tsx",
    "content": "\"use client\";\n\nimport React, { createContext, useContext, useMemo } from \"react\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  CHAT_BACKGROUND_NONE,\n  getBackgroundById,\n  ChatBackgroundOption,\n} from \"@/lib/constants/chatBackgrounds\";\n\ninterface AppBackgroundContextType {\n  /** The full background option object, or undefined if none/invalid */\n  appBackground: ChatBackgroundOption | undefined;\n  /** The URL of the background image, or null if no background is set */\n  appBackgroundUrl: string | null;\n  /** Whether a background is currently active */\n  hasBackground: boolean;\n}\n\nconst AppBackgroundContext = createContext<\n  AppBackgroundContextType | undefined\n>(undefined);\n\nexport function AppBackgroundProvider({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  const { user } = useUser();\n\n  const value = useMemo(() => {\n    const chatBackgroundId = user?.preferences?.chat_background;\n    const appBackground = getBackgroundById(chatBackgroundId ?? null);\n    const hasBackground =\n      !!appBackground && appBackground.src !== CHAT_BACKGROUND_NONE;\n    const appBackgroundUrl = hasBackground ? appBackground.src : null;\n\n    return {\n      appBackground,\n      appBackgroundUrl,\n      hasBackground,\n    };\n  }, [user?.preferences?.chat_background]);\n\n  return (\n    <AppBackgroundContext.Provider value={value}>\n      {children}\n    </AppBackgroundContext.Provider>\n  );\n}\n\nexport function useAppBackground() {\n  const context = useContext(AppBackgroundContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useAppBackground must be used within an AppBackgroundProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/providers/AppProvider.tsx",
    "content": "/**\n * AppProvider - Root Provider Composition\n *\n * This component serves as a centralized wrapper that composes all of the\n * application's context providers into a single component. It is rendered\n * at the root layout level (`app/layout.tsx`) and provides global state\n * and functionality to the entire application.\n *\n * All data is fetched client-side by individual providers via SWR hooks,\n * eliminating server-side data fetching from the root layout and preventing\n * RSC prefetch amplification.\n *\n * ## Provider Hierarchy (outermost to innermost)\n *\n * 1. **SettingsProvider** - Application settings and feature flags\n * 2. **UserProvider** - Current user authentication and profile\n * 3. **AppBackgroundProvider** - App background image/URL based on user preferences\n * 4. **ProviderContextProvider** - LLM provider configuration\n * 5. **ModalProvider** - Global modal state management\n * 6. **AppSidebarProvider** - Sidebar open/closed state\n * 7. **QueryControllerProvider** - Search/Chat mode + query lifecycle\n */\n\"use client\";\n\nimport { UserProvider } from \"@/providers/UserProvider\";\nimport { ProviderContextProvider } from \"@/components/chat/ProviderContext\";\nimport { SettingsProvider } from \"@/providers/SettingsProvider\";\nimport { ModalProvider } from \"@/components/context/ModalContext\";\nimport { AppSidebarProvider } from \"@/providers/AppSidebarProvider\";\nimport { AppBackgroundProvider } from \"@/providers/AppBackgroundProvider\";\nimport { QueryControllerProvider } from \"@/providers/QueryControllerProvider\";\nimport ToastProvider from \"@/providers/ToastProvider\";\n\ninterface AppProviderProps {\n  children: React.ReactNode;\n}\n\nexport default function AppProvider({ children }: AppProviderProps) {\n  return (\n    <SettingsProvider>\n      <UserProvider>\n        <AppBackgroundProvider>\n          <ProviderContextProvider>\n            <ModalProvider>\n              <AppSidebarProvider>\n                <QueryControllerProvider>\n                  <ToastProvider>{children}</ToastProvider>\n                </QueryControllerProvider>\n              </AppSidebarProvider>\n            </ModalProvider>\n          </ProviderContextProvider>\n        </AppBackgroundProvider>\n      </UserProvider>\n    </SettingsProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/providers/AppSidebarProvider.tsx",
    "content": "\"use client\";\n\nimport {\n  createContext,\n  useContext,\n  useState,\n  ReactNode,\n  Dispatch,\n  SetStateAction,\n  useEffect,\n} from \"react\";\nimport Cookies from \"js-cookie\";\nimport { SIDEBAR_TOGGLED_COOKIE_NAME } from \"@/components/resizable/constants\";\n\nfunction setFoldedCookie(folded: boolean) {\n  const foldedAsString = folded.toString();\n  Cookies.set(SIDEBAR_TOGGLED_COOKIE_NAME, foldedAsString, { expires: 365 });\n  if (typeof window !== \"undefined\") {\n    localStorage.setItem(SIDEBAR_TOGGLED_COOKIE_NAME, foldedAsString);\n  }\n}\n\nexport interface AppSidebarProviderProps {\n  children: ReactNode;\n}\n\nexport function AppSidebarProvider({ children }: AppSidebarProviderProps) {\n  const [folded, setFoldedInternal] = useState(false);\n\n  useEffect(() => {\n    const stored =\n      Cookies.get(SIDEBAR_TOGGLED_COOKIE_NAME) ??\n      localStorage.getItem(SIDEBAR_TOGGLED_COOKIE_NAME);\n    if (stored === \"true\") {\n      setFoldedInternal(true);\n    }\n  }, []);\n\n  const setFolded: Dispatch<SetStateAction<boolean>> = (value) => {\n    setFoldedInternal((prev) => {\n      const newState = typeof value === \"function\" ? value(prev) : value;\n      setFoldedCookie(newState);\n      return newState;\n    });\n  };\n\n  useEffect(() => {\n    function handleKeyDown(event: KeyboardEvent) {\n      const isMac = navigator.userAgent.toLowerCase().includes(\"mac\");\n      const isModifierPressed = isMac ? event.metaKey : event.ctrlKey;\n      if (!isModifierPressed || event.key !== \"e\") return;\n\n      event.preventDefault();\n      setFolded((prev) => !prev);\n    }\n\n    document.addEventListener(\"keydown\", handleKeyDown);\n    return () => {\n      document.removeEventListener(\"keydown\", handleKeyDown);\n    };\n  }, []);\n\n  return (\n    <AppSidebarContext.Provider\n      value={{\n        folded,\n        setFolded,\n      }}\n    >\n      {children}\n    </AppSidebarContext.Provider>\n  );\n}\n\nexport interface AppSidebarContextType {\n  folded: boolean;\n  setFolded: Dispatch<SetStateAction<boolean>>;\n}\n\nconst AppSidebarContext = createContext<AppSidebarContextType | undefined>(\n  undefined\n);\n\nexport function useAppSidebarContext() {\n  const context = useContext(AppSidebarContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useAppSidebarContext must be used within an AppSidebarProvider\"\n    );\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/providers/CustomAnalyticsScript.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useRef } from \"react\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\n\nexport default function CustomAnalyticsScript() {\n  const { customAnalyticsScript } = useSettingsContext();\n  const injectedRef = useRef(false);\n\n  useEffect(() => {\n    if (!customAnalyticsScript || injectedRef.current) return;\n    injectedRef.current = true;\n\n    const script = document.createElement(\"script\");\n    script.type = \"text/javascript\";\n    script.textContent = customAnalyticsScript;\n    document.head.appendChild(script);\n  }, [customAnalyticsScript]);\n\n  return null;\n}\n"
  },
  {
    "path": "web/src/providers/DynamicMetadata.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useMemo } from \"react\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\n\nexport default function DynamicMetadata() {\n  const { enterpriseSettings } = useSettingsContext();\n\n  useEffect(() => {\n    const title = enterpriseSettings?.application_name || \"Onyx\";\n    if (document.title !== title) {\n      document.title = title;\n    }\n  }, [enterpriseSettings]);\n\n  // Cache-buster so the favicon re-fetches after an admin uploads a new logo.\n  const cacheBuster = useMemo(\n    () => Date.now(),\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n    [enterpriseSettings]\n  );\n\n  const favicon = enterpriseSettings?.use_custom_logo\n    ? `/api/enterprise-settings/logo?v=${cacheBuster}`\n    : \"/onyx.ico\";\n\n  return <link rel=\"icon\" href={favicon} />;\n}\n"
  },
  {
    "path": "web/src/providers/ProductGatingWrapper.tsx",
    "content": "\"use client\";\n\nimport { ApplicationStatus } from \"@/interfaces/settings\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport GatedContentWrapper from \"@/components/GatedContentWrapper\";\n\nexport default function ProductGatingWrapper({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  const { settings, settingsLoading } = useSettingsContext();\n  const status = settings.application_status;\n\n  if (settingsLoading) return null;\n\n  if (\n    status === ApplicationStatus.GATED_ACCESS ||\n    status === ApplicationStatus.SEAT_LIMIT_EXCEEDED\n  ) {\n    return <GatedContentWrapper>{children}</GatedContentWrapper>;\n  }\n\n  return children;\n}\n"
  },
  {
    "path": "web/src/providers/ProjectsContext.tsx",
    "content": "\"use client\";\n\nimport {\n  createContext,\n  useCallback,\n  useContext,\n  useEffect,\n  useMemo,\n  useState,\n  useRef,\n  ReactNode,\n  Dispatch,\n  SetStateAction,\n} from \"react\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher, skipRetryOnAuthError } from \"@/lib/fetcher\";\nimport type {\n  CategorizedFiles,\n  Project,\n  ProjectFile,\n  UserFileDeleteResult,\n} from \"@/app/app/projects/projectsService\";\nimport {\n  fetchProjects as svcFetchProjects,\n  createProject as svcCreateProject,\n  uploadFiles as svcUploadFiles,\n  getRecentFiles as svcGetRecentFiles,\n  getFilesInProject as svcGetFilesInProject,\n  getProject as svcGetProject,\n  getProjectInstructions as svcGetProjectInstructions,\n  upsertProjectInstructions as svcUpsertProjectInstructions,\n  getProjectDetails as svcGetProjectDetails,\n  ProjectDetails,\n  renameProject as svcRenameProject,\n  deleteProject as svcDeleteProject,\n  deleteUserFile as svcDeleteUserFile,\n  getUserFileStatuses as svcGetUserFileStatuses,\n  unlinkFileFromProject as svcUnlinkFileFromProject,\n  linkFileToProject as svcLinkFileToProject,\n  UserFileStatus,\n} from \"@/app/app/projects/projectsService\";\nimport { useSearchParams } from \"next/navigation\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport { ChatFileType } from \"@/app/app/interfaces\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useProjects } from \"@/lib/hooks/useProjects\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\n\nexport type { Project, ProjectFile } from \"@/app/app/projects/projectsService\";\n\n// Helper to generate unique temp IDs\nconst generateTempId = () => {\n  try {\n    return `temp_${crypto.randomUUID()}`;\n  } catch {\n    // Extremely unlikely fallback\n    return `temp_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;\n  }\n};\n\n// Create optimistic file from File object\nconst createOptimisticFile = (\n  file: File,\n  projectId: number | null = null\n): ProjectFile => {\n  const tempId = generateTempId();\n  return {\n    id: tempId, // Use temp ID as the actual ID initially\n    file_id: tempId,\n    name: file.name,\n    project_id: projectId,\n    user_id: null,\n    created_at: new Date().toISOString(),\n    status: UserFileStatus.UPLOADING,\n    file_type: file.type,\n    last_accessed_at: new Date().toISOString(),\n    chat_file_type: ChatFileType.DOCUMENT,\n    token_count: null,\n    chunk_count: null,\n    temp_id: tempId, // Store temp_id for mapping later\n  };\n};\n\nfunction buildFileKey(file: File): string {\n  const namePrefix = (file.name ?? \"\").slice(0, 50);\n  return `${file.size}|${namePrefix}`;\n}\n\ninterface ProjectsContextType {\n  projects: Project[];\n  recentFiles: ProjectFile[];\n  currentProjectDetails: ProjectDetails | null;\n  currentProjectId: number | null;\n  currentMessageFiles: ProjectFile[];\n  beginUpload: (\n    files: File[],\n    projectId?: number | null,\n    onSuccess?: (uploaded: CategorizedFiles) => void,\n    onFailure?: (failedTempIds: string[]) => void\n  ) => Promise<ProjectFile[]>;\n  allRecentFiles: ProjectFile[];\n  allCurrentProjectFiles: ProjectFile[];\n  isLoadingProjectDetails: boolean;\n  setCurrentMessageFiles: Dispatch<SetStateAction<ProjectFile[]>>;\n  upsertInstructions: (instructions: string) => Promise<void>;\n  fetchProjects: () => Promise<Project[]>;\n  createProject: (name: string) => Promise<Project>;\n  renameProject: (projectId: number, name: string) => Promise<Project>;\n  deleteProject: (projectId: number) => Promise<void>;\n  uploadFiles: (\n    files: File[],\n    projectId?: number | null\n  ) => Promise<CategorizedFiles>;\n  getRecentFiles: () => Promise<ProjectFile[]>;\n  getFilesInProject: (projectId: number) => Promise<ProjectFile[]>;\n  refreshCurrentProjectDetails: () => Promise<void>;\n  refreshRecentFiles: () => Promise<void>;\n  deleteUserFile: (fileId: string) => Promise<UserFileDeleteResult>;\n  unlinkFileFromProject: (projectId: number, fileId: string) => Promise<void>;\n  linkFileToProject?: (projectId: number, file: ProjectFile) => void;\n  lastFailedFiles: ProjectFile[];\n  clearLastFailedFiles: () => void;\n}\n\nconst ProjectsContext = createContext<ProjectsContextType | undefined>(\n  undefined\n);\n\ninterface ProjectsProviderProps {\n  children: ReactNode;\n}\n\nexport function ProjectsProvider({ children }: ProjectsProviderProps) {\n  // Use SWR hook for projects list - no more SSR initial data\n  const { projects, refreshProjects } = useProjects();\n  const [recentFiles, setRecentFiles] = useState<ProjectFile[]>([]);\n  const [currentProjectDetails, setCurrentProjectDetails] =\n    useState<ProjectDetails | null>(null);\n  const searchParams = useSearchParams();\n  const currentProjectIdRaw = searchParams.get(SEARCH_PARAM_NAMES.PROJECT_ID);\n  const currentProjectId = currentProjectIdRaw\n    ? Number.parseInt(currentProjectIdRaw)\n    : null;\n  const [currentMessageFiles, setCurrentMessageFiles] = useState<ProjectFile[]>(\n    []\n  );\n  const pollIntervalRef = useRef<number | null>(null);\n  const isPollingRef = useRef<boolean>(false);\n  const [lastFailedFiles, setLastFailedFiles] = useState<ProjectFile[]>([]);\n  const [trackedUploadIds, setTrackedUploadIds] = useState<Set<string>>(\n    new Set()\n  );\n  const [allRecentFiles, setAllRecentFiles] = useState<ProjectFile[]>([]);\n  const [allCurrentProjectFiles, setAllCurrentProjectFiles] = useState<\n    ProjectFile[]\n  >([]);\n  const [isLoadingProjectDetails, setIsLoadingProjectDetails] = useState(false);\n  const projectToUploadFilesMapRef = useRef<Map<number, ProjectFile[]>>(\n    new Map()\n  );\n  const route = useAppRouter();\n  const settingsContext = useContext(SettingsContext);\n\n  // SWR-backed fetch for recent files. Deduplicates across all mounts and\n  // handles React StrictMode double-invocation without firing duplicate requests.\n  const { data: recentFilesData, mutate: mutateRecentFiles } = useSWR<\n    ProjectFile[]\n  >(\"/api/user/files/recent\", errorHandlingFetcher, {\n    revalidateOnFocus: false,\n    dedupingInterval: 60_000,\n    onErrorRetry: skipRetryOnAuthError,\n    onError: (err) =>\n      console.error(\"[ProjectsContext] recent files fetch failed:\", err),\n  });\n  // Track whether allRecentFiles has been seeded from the initial server fetch.\n  // Subsequent updates come through the merge effect below, not a full reset.\n  const hasInitializedAllRecentFilesRef = useRef(false);\n\n  // Use SWR's mutate to refresh projects - returns the new data\n  const fetchProjects = useCallback(async (): Promise<Project[]> => {\n    try {\n      const result = await refreshProjects();\n      return result ?? [];\n    } catch (err) {\n      return [];\n    }\n  }, [refreshProjects]);\n\n  // Load full details for current project\n  const refreshCurrentProjectDetails = useCallback(async () => {\n    if (currentProjectId) {\n      setIsLoadingProjectDetails(true);\n      try {\n        const details = await svcGetProjectDetails(currentProjectId);\n        await fetchProjects();\n        setCurrentProjectDetails(details);\n        setAllCurrentProjectFiles(details.files || []);\n        if (projectToUploadFilesMapRef.current.has(currentProjectId)) {\n          setAllCurrentProjectFiles((prev) => [\n            ...prev,\n            ...(projectToUploadFilesMapRef.current.get(currentProjectId) || []),\n          ]);\n        }\n      } finally {\n        setIsLoadingProjectDetails(false);\n      }\n    }\n  }, [\n    fetchProjects,\n    currentProjectId,\n    setCurrentProjectDetails,\n    projectToUploadFilesMapRef,\n  ]);\n\n  const upsertInstructions = useCallback(\n    async (instructions: string) => {\n      if (!currentProjectId) {\n        throw new Error(\"No project selected\");\n      }\n      await svcUpsertProjectInstructions(currentProjectId, instructions);\n      await refreshCurrentProjectDetails();\n    },\n    [currentProjectId, refreshCurrentProjectDetails]\n  );\n\n  const createProject = useCallback(\n    async (name: string): Promise<Project> => {\n      try {\n        const project: Project = await svcCreateProject(name);\n        // Navigate to the newly created project's page\n        route({ projectId: project.id });\n        // Refresh list to keep order consistent with backend\n        await fetchProjects();\n        return project;\n      } catch (err) {\n        const message =\n          err instanceof Error ? err.message : \"Failed to create project\";\n        throw err;\n      }\n    },\n    [fetchProjects, route]\n  );\n\n  const renameProject = useCallback(\n    async (projectId: number, name: string): Promise<Project> => {\n      // Optimistically update project details UI if this is the current project\n      if (currentProjectId === projectId) {\n        setCurrentProjectDetails((prev) =>\n          prev ? { ...prev, project: { ...prev.project, name } } : prev\n        );\n      }\n\n      try {\n        const updated = await svcRenameProject(projectId, name);\n        // Refresh to get canonical state from server (SWR handles projects list)\n        await fetchProjects();\n        if (currentProjectId === projectId) {\n          await refreshCurrentProjectDetails();\n        }\n        return updated;\n      } catch (err) {\n        // Refresh to restore on failure\n        await fetchProjects();\n        if (currentProjectId === projectId) {\n          await refreshCurrentProjectDetails();\n        }\n        const message =\n          err instanceof Error ? err.message : \"Failed to rename project\";\n        throw err;\n      }\n    },\n    [fetchProjects, currentProjectId, refreshCurrentProjectDetails]\n  );\n\n  const deleteProject = useCallback(\n    async (projectId: number): Promise<void> => {\n      try {\n        await svcDeleteProject(projectId);\n        await fetchProjects();\n        if (currentProjectId === projectId) {\n          setCurrentProjectDetails(null);\n          setAllCurrentProjectFiles([]);\n          projectToUploadFilesMapRef.current.delete(projectId);\n          route();\n        }\n      } catch (err) {\n        throw err;\n      }\n    },\n    [fetchProjects, currentProjectId, projectToUploadFilesMapRef, route]\n  );\n\n  const getRecentFiles = useCallback(async (): Promise<ProjectFile[]> => {\n    try {\n      const data: ProjectFile[] = await svcGetRecentFiles();\n      return data;\n    } catch (err) {\n      const message =\n        err instanceof Error ? err.message : \"Failed to fetch recent files\";\n      return [];\n    }\n  }, []);\n\n  const refreshRecentFiles = useCallback(async () => {\n    await mutateRecentFiles();\n  }, [mutateRecentFiles]);\n\n  const getTempIdMap = (files: File[], optimisticFiles: ProjectFile[]) => {\n    const tempIdMap = new Map<string, string>();\n    for (const f of files) {\n      const tempId = optimisticFiles.find((o) => o.name === f.name)?.temp_id;\n      if (tempId) {\n        tempIdMap.set(buildFileKey(f), tempId);\n      }\n    }\n    return tempIdMap;\n  };\n\n  const removeOptimisticFilesByTempIds = useCallback(\n    (optimisticTempIds: Set<string>, projectId?: number | null) => {\n      // Remove from recent optimistic list\n      setAllRecentFiles((prev) =>\n        prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))\n      );\n\n      // Remove from current message files if present\n      setCurrentMessageFiles((prev) =>\n        prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))\n      );\n\n      // Remove from project optimistic list\n      if (projectId) {\n        setAllCurrentProjectFiles((prev) =>\n          prev.filter((f) => !f.temp_id || !optimisticTempIds.has(f.temp_id))\n        );\n\n        // Clear the tracked optimistic files for this project\n        let projectIdToFiles: ProjectFile[] =\n          projectToUploadFilesMapRef.current.get(projectId) || [];\n        projectIdToFiles = projectIdToFiles.filter(\n          (f: ProjectFile) => !f.temp_id || !optimisticTempIds.has(f.temp_id)\n        );\n        projectToUploadFilesMapRef.current.set(projectId, projectIdToFiles);\n      }\n    },\n    [projectToUploadFilesMapRef]\n  );\n\n  const beginUpload = useCallback(\n    async (\n      files: File[],\n      projectId?: number | null,\n      onSuccess?: (uploaded: CategorizedFiles) => void,\n      onFailure?: (failedTempIds: string[]) => void\n    ): Promise<ProjectFile[]> => {\n      const rawMax = settingsContext?.settings?.user_file_max_upload_size_mb;\n\n      const oversizedFiles =\n        rawMax && rawMax > 0\n          ? files.filter((file) => file.size > rawMax * 1024 * 1024)\n          : [];\n      const validFiles =\n        rawMax && rawMax > 0\n          ? files.filter((file) => file.size <= rawMax * 1024 * 1024)\n          : files;\n\n      if (oversizedFiles.length > 0) {\n        const skippedNames = oversizedFiles.map((file) => file.name).join(\", \");\n        toast.warning(\n          `Skipped ${oversizedFiles.length} oversized file(s) (>${rawMax} MB): ${skippedNames}`\n        );\n      }\n\n      if (validFiles.length === 0) {\n        onFailure?.([]);\n        return [];\n      }\n\n      const optimisticFiles = validFiles.map((f) =>\n        createOptimisticFile(f, projectId)\n      );\n      const tempIdMap = getTempIdMap(validFiles, optimisticFiles);\n      setAllRecentFiles((prev) => [...optimisticFiles, ...prev]);\n      if (projectId) {\n        setAllCurrentProjectFiles((prev) => [...optimisticFiles, ...prev]);\n        projectToUploadFilesMapRef.current.set(projectId, optimisticFiles);\n      }\n      svcUploadFiles(validFiles, projectId, tempIdMap)\n        .then((uploaded) => {\n          const uploadedFiles = uploaded.user_files || [];\n          const tempIdToUploadedFileMap = new Map(\n            uploadedFiles.map((f) => [f.temp_id, f])\n          );\n\n          setAllRecentFiles((prev) =>\n            prev.map((f) => {\n              if (f.temp_id) {\n                const u = tempIdToUploadedFileMap.get(f.temp_id);\n                return u ? { ...f, ...u } : f;\n              }\n              return f;\n            })\n          );\n          setCurrentMessageFiles((prev) =>\n            prev.map((f) => {\n              if (f.temp_id) {\n                const u = tempIdToUploadedFileMap.get(f.temp_id);\n                return u ? { ...f, ...u } : f;\n              }\n              return f;\n            })\n          );\n          if (projectId) {\n            setAllCurrentProjectFiles((prev) =>\n              prev.map((f) => {\n                if (f.temp_id) {\n                  const u = tempIdToUploadedFileMap.get(f.temp_id);\n                  return u ? { ...f, ...u } : f;\n                }\n                return f;\n              })\n            );\n            projectToUploadFilesMapRef.current.set(projectId, []);\n          }\n          const rejected_files = uploaded.rejected_files || [];\n\n          if (rejected_files.length > 0) {\n            const uniqueReasons = new Set(\n              rejected_files.map((rejected_file) => rejected_file.reason)\n            );\n            const detailsParts = Array.from(uniqueReasons);\n\n            toast.warning(\n              `Some files were not uploaded. ${detailsParts.join(\" | \")}`\n            );\n\n            const failedNameSet = new Set<string>(\n              rejected_files.map((file) => file.file_name)\n            );\n            const failedTempIds = Array.from(\n              new Set(\n                optimisticFiles\n                  .filter((f) => f.temp_id && failedNameSet.has(f.name))\n                  .map((f) => f.temp_id as string)\n              )\n            );\n            removeOptimisticFilesByTempIds(new Set(failedTempIds), projectId);\n            if (failedTempIds.length > 0) {\n              onFailure?.(failedTempIds);\n            }\n          }\n          if (uploadedFiles.length > 0) {\n            setTrackedUploadIds((prev) => {\n              const next = new Set(prev);\n              for (const f of uploadedFiles) next.add(f.id);\n              return next;\n            });\n          }\n          onSuccess?.(uploaded);\n        })\n        .catch((err) => {\n          // Roll back optimistic inserts on failure\n          const optimisticTempIds = new Set(\n            optimisticFiles\n              .map((f) => f.temp_id)\n              .filter((id): id is string => Boolean(id))\n          );\n\n          removeOptimisticFilesByTempIds(optimisticTempIds, projectId);\n\n          toast.error(\"Failed to upload files\");\n\n          onFailure?.(Array.from(optimisticTempIds));\n        })\n        .finally(() => {\n          if (projectId && currentProjectId === projectId) {\n            refreshCurrentProjectDetails();\n          }\n          refreshRecentFiles();\n        });\n      return optimisticFiles;\n    },\n    [\n      currentProjectId,\n      refreshCurrentProjectDetails,\n      refreshRecentFiles,\n      removeOptimisticFilesByTempIds,\n      settingsContext,\n    ]\n  );\n\n  const uploadFiles = useCallback(\n    async (\n      files: File[],\n      projectId?: number | null\n    ): Promise<CategorizedFiles> => {\n      try {\n        const uploaded: CategorizedFiles = await svcUploadFiles(\n          files,\n          projectId\n        );\n        const uploadedFiles = uploaded.user_files || [];\n        // Track these uploaded file IDs for targeted polling\n        if (uploadedFiles.length > 0) {\n          setTrackedUploadIds((prev) => {\n            const next = new Set(prev);\n            for (const f of uploadedFiles) next.add(f.id);\n            return next;\n          });\n        }\n\n        // Refresh canonical sources instead of manual merges\n        if (projectId && currentProjectId === projectId) {\n          await refreshCurrentProjectDetails();\n        }\n        await refreshRecentFiles();\n        return uploaded;\n      } catch (err) {\n        throw err;\n      }\n    },\n    [currentProjectId, refreshCurrentProjectDetails, refreshRecentFiles]\n  );\n\n  const getFilesInProject = useCallback(\n    async (projectId: number): Promise<ProjectFile[]> => {\n      try {\n        const data: ProjectFile[] = await svcGetFilesInProject(projectId);\n        return data;\n      } catch (err) {\n        const message =\n          err instanceof Error ? err.message : \"Failed to fetch project files\";\n        return [];\n      }\n    },\n    []\n  );\n\n  // Sync SWR-fetched recent files into local state. On first arrival, seed\n  // allRecentFiles as well; subsequent updates only touch recentFiles so the\n  // merge effect below can non-destructively apply them to allRecentFiles.\n  useEffect(() => {\n    if (!recentFilesData) return;\n    setRecentFiles(recentFilesData);\n    if (!hasInitializedAllRecentFilesRef.current) {\n      setAllRecentFiles(recentFilesData);\n      hasInitializedAllRecentFilesRef.current = true;\n    }\n  }, [recentFilesData]);\n\n  useEffect(() => {\n    setAllRecentFiles((prev) =>\n      prev.map((f) => {\n        const newFile = recentFiles.find((f2) => f2.id === f.id);\n        return newFile ? { ...f, ...newFile } : f;\n      })\n    );\n  }, [recentFiles]);\n\n  // Clear project details when switching projects to show skeleton\n  useEffect(() => {\n    setCurrentProjectDetails(null);\n    setAllCurrentProjectFiles([]);\n  }, [currentProjectId]);\n\n  useEffect(() => {\n    if (currentProjectId) {\n      refreshCurrentProjectDetails();\n    }\n  }, [currentProjectId, refreshCurrentProjectDetails]);\n\n  // Targeted polling for tracked uploaded files only\n  useEffect(() => {\n    const ids = Array.from(trackedUploadIds);\n    const shouldPoll = ids.length > 0;\n\n    const poll = async () => {\n      if (isPollingRef.current) return;\n      isPollingRef.current = true;\n      try {\n        const statuses = await svcGetUserFileStatuses(ids);\n        if (!statuses || statuses.length === 0) return;\n\n        // Build maps for quick lookup\n        const statusById = new Map(statuses.map((f) => [f.id, f]));\n\n        // Update currentMessageFiles inline based on polled statuses\n        setCurrentMessageFiles((prev) => {\n          let changed = false;\n          const next: ProjectFile[] = [];\n          const newlyFailedLocal: ProjectFile[] = [];\n          for (const f of prev) {\n            const latest = statusById.get(f.id);\n            if (latest) {\n              const latestStatus = String(latest.status).toLowerCase();\n              if (latestStatus === \"failed\") {\n                if (String(f.status).toLowerCase() !== \"failed\") {\n                  newlyFailedLocal.push(latest);\n                }\n                changed = true;\n                continue;\n              }\n              if (\n                latest.status !== f.status ||\n                latest.name !== f.name ||\n                latest.file_type !== f.file_type\n              ) {\n                next.push({ ...f, ...latest } as ProjectFile);\n                changed = true;\n                continue;\n              }\n            }\n            next.push(f);\n          }\n          if (newlyFailedLocal.length > 0) {\n            setLastFailedFiles(newlyFailedLocal);\n          }\n          return changed || next.length !== prev.length ? next : prev;\n        });\n\n        // Update currentProjectDetails.files with latest statuses\n        setCurrentProjectDetails((prev) => {\n          if (!prev || !prev.files || prev.files.length === 0) return prev;\n          let changed = false;\n          const nextFiles = prev.files.map((f) => {\n            const latest = statusById.get(f.id);\n            if (latest) {\n              if (\n                latest.status !== f.status ||\n                latest.name !== f.name ||\n                latest.file_type !== f.file_type\n              ) {\n                changed = true;\n                return { ...f, ...latest } as ProjectFile;\n              }\n            }\n            return f;\n          });\n          return changed\n            ? ({ ...prev, files: nextFiles } as ProjectDetails)\n            : prev;\n        });\n\n        // Update recent files list inline as well\n        setRecentFiles((prev) => {\n          if (prev.length === 0) return prev;\n          let changed = false;\n          const map = new Map(prev.map((f) => [f.id, f]));\n          for (const latest of statuses) {\n            const id = latest.id;\n            if (map.has(id)) {\n              const prevVal = map.get(id)!;\n              if (\n                latest.status !== prevVal.status ||\n                latest.name !== prevVal.name ||\n                latest.file_type !== prevVal.file_type\n              ) {\n                map.set(id, latest);\n                changed = true;\n              }\n            }\n          }\n          return changed ? Array.from(map.values()) : prev;\n        });\n\n        // Remove completed/skipped/failed from tracking\n        const remaining = new Set(trackedUploadIds);\n        const newlyFailed: ProjectFile[] = [];\n        for (const f of statuses) {\n          const s = String(f.status).toLowerCase();\n          if (s === \"completed\" || s === \"skipped\") {\n            remaining.delete(f.id);\n          } else if (s === \"failed\") {\n            remaining.delete(f.id);\n            newlyFailed.push(f);\n          }\n        }\n        if (newlyFailed.length > 0) {\n          setLastFailedFiles(newlyFailed);\n        }\n        const trackingChanged = remaining.size !== trackedUploadIds.size;\n        if (trackingChanged) {\n          setTrackedUploadIds(remaining);\n        }\n\n        // If all tracked uploads finished (completed or failed), do a single refresh\n        if (remaining.size === 0) {\n          if (currentProjectId) {\n            await refreshCurrentProjectDetails();\n          }\n          await refreshRecentFiles();\n        }\n      } finally {\n        isPollingRef.current = false;\n      }\n    };\n\n    if (shouldPoll && pollIntervalRef.current === null) {\n      // Kick once immediately, then start interval\n      poll();\n      pollIntervalRef.current = window.setInterval(poll, 3000);\n    }\n\n    if (!shouldPoll && pollIntervalRef.current !== null) {\n      window.clearInterval(pollIntervalRef.current);\n      pollIntervalRef.current = null;\n    }\n\n    return () => {\n      if (pollIntervalRef.current !== null) {\n        window.clearInterval(pollIntervalRef.current);\n        pollIntervalRef.current = null;\n      }\n    };\n  }, [\n    trackedUploadIds,\n    currentProjectId,\n    refreshCurrentProjectDetails,\n    refreshRecentFiles,\n  ]);\n\n  const value: ProjectsContextType = useMemo(\n    () => ({\n      projects,\n      recentFiles,\n      currentProjectDetails,\n      currentProjectId,\n      currentMessageFiles,\n      allRecentFiles,\n      allCurrentProjectFiles,\n      isLoadingProjectDetails,\n      beginUpload,\n      setCurrentMessageFiles,\n      upsertInstructions,\n      fetchProjects,\n      createProject,\n      renameProject,\n      deleteProject,\n      uploadFiles,\n      getRecentFiles,\n      getFilesInProject,\n      refreshCurrentProjectDetails,\n      refreshRecentFiles,\n      lastFailedFiles,\n      clearLastFailedFiles: () => setLastFailedFiles([]),\n      deleteUserFile: async (fileId: string) => {\n        const result = await svcDeleteUserFile(fileId);\n        // If no associations, backend enqueues deletion and status moves to DELETING; refresh lists\n        if (!result.has_associations) {\n          if (currentProjectId) {\n            await refreshCurrentProjectDetails();\n          }\n          await refreshRecentFiles();\n        }\n        return result;\n      },\n      unlinkFileFromProject: async (projectId: number, fileId: string) => {\n        const file = allCurrentProjectFiles.find((f) => f.id === fileId);\n        if (!file) return;\n        setAllCurrentProjectFiles((prev) =>\n          prev.filter((f) => f.id !== file.id)\n        );\n        svcUnlinkFileFromProject(projectId, file.id).then(async (result) => {\n          if (result.ok) {\n            if (currentProjectId === projectId) {\n              await refreshCurrentProjectDetails();\n            }\n            await refreshRecentFiles();\n          } else {\n            if (currentProjectId === projectId) {\n              setAllCurrentProjectFiles((prev) => [file, ...prev]);\n            }\n          }\n        });\n      },\n      linkFileToProject: async (projectId: number, file: ProjectFile) => {\n        const existing = allCurrentProjectFiles.find((f) => f.id === file.id);\n        if (existing) return;\n        setAllCurrentProjectFiles((prev) => [file, ...prev]);\n        svcLinkFileToProject(projectId, file.id).then(async (result) => {\n          if (result.ok) {\n            if (currentProjectId === projectId) {\n              await refreshCurrentProjectDetails();\n            }\n            await refreshRecentFiles();\n          } else {\n            if (currentProjectId === projectId) {\n              setAllCurrentProjectFiles((prev) =>\n                prev.filter((f) => f.id !== file.id)\n              );\n            }\n          }\n        });\n      },\n    }),\n    [\n      projects,\n      recentFiles,\n      currentProjectDetails,\n      currentProjectId,\n      currentMessageFiles,\n      allRecentFiles,\n      allCurrentProjectFiles,\n      isLoadingProjectDetails,\n      beginUpload,\n      setCurrentMessageFiles,\n      upsertInstructions,\n      fetchProjects,\n      createProject,\n      renameProject,\n      deleteProject,\n      uploadFiles,\n      getRecentFiles,\n      getFilesInProject,\n      refreshCurrentProjectDetails,\n      refreshRecentFiles,\n      lastFailedFiles,\n    ]\n  );\n\n  return (\n    <ProjectsContext.Provider value={value}>\n      {children}\n    </ProjectsContext.Provider>\n  );\n}\n\nexport function useProjectsContext(): ProjectsContextType {\n  const ctx = useContext(ProjectsContext);\n  if (!ctx) {\n    throw new Error(\n      \"useProjectsContext must be used within a ProjectsProvider\"\n    );\n  }\n  return ctx;\n}\n"
  },
  {
    "path": "web/src/providers/QueryControllerProvider.tsx",
    "content": "\"use client\";\n\nimport { createContext, useContext } from \"react\";\nimport { eeGated } from \"@/ce\";\nimport { QueryControllerProvider as EEQueryControllerProvider } from \"@/ee/providers/QueryControllerProvider\";\nimport { SearchDocWithContent, BaseFilters } from \"@/lib/search/interfaces\";\n\nexport type AppMode = \"auto\" | \"search\" | \"chat\";\n\nexport type QueryState =\n  | { phase: \"idle\"; appMode: AppMode }\n  | { phase: \"classifying\" }\n  | { phase: \"searching\" }\n  | { phase: \"search-results\" }\n  | { phase: \"chat\" };\n\nexport interface QueryControllerValue {\n  /** Single state variable encoding both the query lifecycle phase and (when idle) the user's mode selection. */\n  state: QueryState;\n  /** Update the app mode. Only takes effect when idle. No-op in CE or when search is unavailable. */\n  setAppMode: (mode: AppMode) => void;\n  /** Search results (empty if chat or not yet searched) */\n  searchResults: SearchDocWithContent[];\n  /** Document IDs selected by the LLM as most relevant */\n  llmSelectedDocIds: string[] | null;\n  /** User-facing error message from the last search or classification request, null when idle */\n  error: string | null;\n  /** Submit a query - routes to search or chat based on app mode */\n  submit: (\n    query: string,\n    onChat: (query: string) => void,\n    filters?: BaseFilters\n  ) => Promise<void>;\n  /** Re-run the current search query with updated server-side filters */\n  refineSearch: (filters: BaseFilters) => Promise<void>;\n  /** Reset all state to initial values */\n  reset: () => void;\n}\n\nexport const QueryControllerContext = createContext<QueryControllerValue>({\n  state: { phase: \"idle\", appMode: \"chat\" },\n  setAppMode: () => undefined,\n  searchResults: [],\n  llmSelectedDocIds: null,\n  error: null,\n  submit: async (_q, onChat) => {\n    onChat(_q);\n  },\n  refineSearch: async () => undefined,\n  reset: () => undefined,\n});\n\nexport function useQueryController(): QueryControllerValue {\n  return useContext(QueryControllerContext);\n}\n\nexport const QueryControllerProvider = eeGated(EEQueryControllerProvider);\n"
  },
  {
    "path": "web/src/providers/SWRConfigProvider.tsx",
    "content": "\"use client\";\n\nimport { SWRConfig } from \"swr\";\nimport { skipRetryOnAuthError } from \"@/lib/fetcher\";\n\nexport default function SWRConfigProvider({\n  children,\n}: {\n  children: React.ReactNode;\n}) {\n  return (\n    <SWRConfig value={{ onErrorRetry: skipRetryOnAuthError }}>\n      {children}\n    </SWRConfig>\n  );\n}\n"
  },
  {
    "path": "web/src/providers/SettingsProvider.tsx",
    "content": "\"use client\";\n\nimport { CombinedSettings } from \"@/interfaces/settings\";\nimport {\n  createContext,\n  useContext,\n  useEffect,\n  useState,\n  useMemo,\n  JSX,\n} from \"react\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport {\n  useSettings,\n  useEnterpriseSettings,\n  useCustomAnalyticsScript,\n} from \"@/hooks/useSettings\";\nimport { HOST_URL, NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport CloudError from \"@/components/errorPages/CloudErrorPage\";\nimport ErrorPage from \"@/components/errorPages/ErrorPage\";\nimport { FetchError } from \"@/lib/fetcher\";\n\nexport function SettingsProvider({\n  children,\n}: {\n  children: React.ReactNode | JSX.Element;\n}) {\n  const {\n    settings,\n    isLoading: coreSettingsLoading,\n    error: settingsError,\n  } = useSettings();\n\n  // Once core settings load, check if the backend reports EE as enabled.\n  // This handles deployments where NEXT_PUBLIC_ENABLE_PAID_EE_FEATURES is\n  // unset but LICENSE_ENFORCEMENT_ENABLED defaults to true on the server.\n  const eeEnabledRuntime =\n    !coreSettingsLoading &&\n    !settingsError &&\n    settings.ee_features_enabled !== false;\n\n  const {\n    enterpriseSettings,\n    isLoading: enterpriseSettingsLoading,\n    error: enterpriseSettingsError,\n  } = useEnterpriseSettings(eeEnabledRuntime);\n  const customAnalyticsScript = useCustomAnalyticsScript(eeEnabledRuntime);\n\n  const [isMobile, setIsMobile] = useState<boolean | undefined>();\n  const settingsLoading = coreSettingsLoading || enterpriseSettingsLoading;\n  const vectorDbEnabled =\n    !coreSettingsLoading &&\n    !settingsError &&\n    settings.vector_db_enabled !== false;\n  const { ccPairs } = useCCPairs(vectorDbEnabled);\n\n  useEffect(() => {\n    const checkMobile = () => {\n      setIsMobile(window.innerWidth < 768);\n    };\n\n    checkMobile();\n    window.addEventListener(\"resize\", checkMobile);\n    return () => window.removeEventListener(\"resize\", checkMobile);\n  }, []);\n\n  /**\n   * NOTE (@raunakab):\n   * Whether search mode is actually available to users.\n   *\n   * Prefer `isSearchModeAvailable` over `settings.search_ui_enabled`.\n   * The raw setting only captures the admin's *intent*. This derived value\n   * also checks runtime prerequisites (connectors must exist) so that\n   * consumers don't need to independently verify availability.\n   */\n  const isSearchModeAvailable = useMemo(\n    () => settings.search_ui_enabled !== false && ccPairs.length > 0,\n    [settings.search_ui_enabled, ccPairs.length]\n  );\n\n  const combinedSettings: CombinedSettings = useMemo(\n    () => ({\n      settings,\n      enterpriseSettings,\n      customAnalyticsScript,\n      webVersion: settings.version ?? null,\n      webDomain: HOST_URL,\n      isMobile,\n      isSearchModeAvailable,\n      settingsLoading,\n    }),\n    [\n      settings,\n      enterpriseSettings,\n      customAnalyticsScript,\n      isMobile,\n      isSearchModeAvailable,\n      settingsLoading,\n    ]\n  );\n\n  // Auth errors (401/403) are expected for unauthenticated users (e.g. login\n  // page). Fall through with default settings so the app can render normally.\n  const isAuthError = (err: Error | undefined) =>\n    err instanceof FetchError && (err.status === 401 || err.status === 403);\n\n  const hasFatalError =\n    (settingsError && !isAuthError(settingsError)) ||\n    (enterpriseSettingsError && !isAuthError(enterpriseSettingsError));\n\n  if (hasFatalError) {\n    return NEXT_PUBLIC_CLOUD_ENABLED ? <CloudError /> : <ErrorPage />;\n  }\n\n  return (\n    <SettingsContext.Provider value={combinedSettings}>\n      {children}\n    </SettingsContext.Provider>\n  );\n}\n\nexport const SettingsContext = createContext<CombinedSettings | null>(null);\n\nexport function useSettingsContext() {\n  const context = useContext(SettingsContext);\n  if (context === null) {\n    throw new Error(\n      \"useSettingsContext must be used within a SettingsProvider\"\n    );\n  }\n  return context;\n}\n\nexport function useVectorDbEnabled(): boolean {\n  const settings = useSettingsContext();\n  return settings.settings.vector_db_enabled !== false;\n}\n"
  },
  {
    "path": "web/src/providers/ToastProvider.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useSyncExternalStore } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport { NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK } from \"@/lib/constants\";\nimport { toast, toastStore, MAX_VISIBLE_TOASTS } from \"@/hooks/useToast\";\nimport type { Toast, ToastLevel } from \"@/hooks/useToast\";\n\nconst ANIMATION_DURATION = 200; // matches tailwind fade-out-scale (0.2s)\nconst MAX_TOAST_MESSAGE_LENGTH = 150;\n\nfunction levelProps(level: ToastLevel): Record<string, boolean> {\n  switch (level) {\n    case \"success\":\n      return { success: true };\n    case \"error\":\n      return { error: true };\n    case \"warning\":\n      return { warning: true };\n    case \"info\":\n      return { info: true };\n    default:\n      return { default: true };\n  }\n}\n\nfunction buildDescription(t: Toast): string | undefined {\n  const parts: string[] = [];\n  if (t.description) parts.push(t.description);\n  if (t.level === \"error\" && NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK) {\n    parts.push(\n      \"Need help? Join our community at https://discord.gg/4NA5SbzrWb for support!\"\n    );\n  }\n  return parts.length > 0 ? parts.join(\" \") : undefined;\n}\n\nfunction ToastContainer() {\n  const allToasts = useSyncExternalStore(\n    toastStore.subscribe,\n    toastStore.getSnapshot,\n    toastStore.getSnapshot\n  );\n\n  const visible = allToasts.slice(-MAX_VISIBLE_TOASTS);\n\n  const handleClose = useCallback((id: string) => {\n    toast._markLeaving(id);\n    setTimeout(() => {\n      toast.dismiss(id);\n    }, ANIMATION_DURATION);\n  }, []);\n\n  if (visible.length === 0) return null;\n\n  return (\n    <div\n      data-testid=\"toast-container\"\n      className={cn(\n        \"fixed bottom-4 right-4 z-[10000]\",\n        \"flex flex-col gap-2 items-end\",\n        \"max-w-[420px]\"\n      )}\n    >\n      {visible.map((t) => {\n        const text =\n          t.message.length > MAX_TOAST_MESSAGE_LENGTH\n            ? t.message.slice(0, MAX_TOAST_MESSAGE_LENGTH) + \"…\"\n            : t.message;\n        return (\n          <div\n            key={t.id}\n            className={cn(\n              t.leaving ? \"animate-fade-out-scale\" : \"animate-fade-in-scale\"\n            )}\n          >\n            <Message\n              flash\n              medium\n              {...levelProps(t.level ?? \"info\")}\n              text={text}\n              description={buildDescription(t)}\n              close={t.dismissible}\n              onClose={() => handleClose(t.id)}\n              actions={t.actionLabel ? t.actionLabel : undefined}\n              onAction={t.onAction}\n            />\n          </div>\n        );\n      })}\n    </div>\n  );\n}\n\ninterface ToastProviderProps {\n  children: React.ReactNode;\n}\n\nexport default function ToastProvider({ children }: ToastProviderProps) {\n  return (\n    <>\n      {children}\n      <ToastContainer />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/providers/UserProvider.tsx",
    "content": "\"use client\";\n\nimport React, {\n  createContext,\n  useCallback,\n  useContext,\n  useMemo,\n  useState,\n  useEffect,\n  useRef,\n} from \"react\";\nimport {\n  User,\n  UserPersonalization,\n  UserRole,\n  ThemePreference,\n} from \"@/lib/types\";\nimport { usePostHog } from \"posthog-js/react\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { useTokenRefresh } from \"@/hooks/useTokenRefresh\";\nimport { useCurrentUser } from \"@/hooks/useCurrentUser\";\nimport {\n  useAuthTypeMetadata,\n  AuthTypeMetadata,\n} from \"@/hooks/useAuthTypeMetadata\";\nimport { updateUserPersonalization as persistPersonalization } from \"@/lib/userSettings\";\nimport { useTheme } from \"next-themes\";\n\ninterface UserContextType {\n  user: User | null;\n  isAdmin: boolean;\n  isCurator: boolean;\n  refreshUser: () => Promise<void>;\n  isCloudSuperuser: boolean;\n  authTypeMetadata: AuthTypeMetadata;\n  updateUserAutoScroll: (autoScroll: boolean) => Promise<void>;\n  updateUserShortcuts: (enabled: boolean) => Promise<void>;\n  toggleAgentPinnedStatus: (\n    currentPinnedAgentIDs: number[],\n    agentId: number,\n    isPinned: boolean\n  ) => Promise<boolean>;\n  updateUserTemperatureOverrideEnabled: (enabled: boolean) => Promise<void>;\n  updateUserPersonalization: (\n    personalization: UserPersonalization\n  ) => Promise<void>;\n  updateUserThemePreference: (\n    themePreference: ThemePreference\n  ) => Promise<void>;\n  updateUserChatBackground: (chatBackground: string | null) => Promise<void>;\n  updateUserDefaultModel: (defaultModel: string | null) => Promise<void>;\n  updateUserDefaultAppMode: (mode: \"CHAT\" | \"SEARCH\") => Promise<void>;\n  updateUserVoiceSettings: (settings: {\n    auto_send?: boolean;\n    auto_playback?: boolean;\n    playback_speed?: number;\n  }) => Promise<void>;\n}\n\nconst UserContext = createContext<UserContextType | undefined>(undefined);\n\nexport function UserProvider({ children }: { children: React.ReactNode }) {\n  const { user: fetchedUser, mutateUser } = useCurrentUser();\n  const { authTypeMetadata } = useAuthTypeMetadata();\n  const updatedSettings = useContext(SettingsContext);\n  const posthog = usePostHog();\n\n  // For auto_scroll and temperature_override_enabled:\n  // - If user has a preference set, use that\n  // - Otherwise, use the workspace setting if available\n  const mergeUserPreferences = useCallback(\n    (currentUser: User | null): User | null => {\n      if (!currentUser) return null;\n      return {\n        ...currentUser,\n        preferences: {\n          ...currentUser.preferences,\n          auto_scroll:\n            currentUser.preferences?.auto_scroll ??\n            updatedSettings?.settings?.auto_scroll ??\n            false,\n          temperature_override_enabled:\n            currentUser.preferences?.temperature_override_enabled ??\n            updatedSettings?.settings?.temperature_override_enabled ??\n            false,\n        },\n      };\n    },\n    [updatedSettings]\n  );\n\n  const [upToDateUser, setUpToDateUser] = useState<User | null>(null);\n\n  useEffect(() => {\n    setUpToDateUser(mergeUserPreferences(fetchedUser ?? null));\n  }, [fetchedUser, mergeUserPreferences]);\n\n  useEffect(() => {\n    if (!posthog) return;\n\n    if (fetchedUser?.id) {\n      const identifyData: Record<string, any> = {\n        email: fetchedUser.email,\n      };\n      if (fetchedUser.team_name) {\n        identifyData.team_name = fetchedUser.team_name;\n      }\n      posthog.identify(fetchedUser.id, identifyData);\n    } else {\n      posthog.reset();\n    }\n  }, [posthog, fetchedUser]);\n\n  // Use the custom token refresh hook — on refresh failure, revalidate via SWR\n  // so the result goes through mergeUserPreferences\n  const onRefreshFail = useCallback(async () => {\n    await mutateUser();\n  }, [mutateUser]);\n  useTokenRefresh(upToDateUser, authTypeMetadata, onRefreshFail);\n\n  // Sync user's theme preference from DB to next-themes on load\n  const { setTheme, theme } = useTheme();\n  const hasSyncedThemeRef = useRef(false);\n\n  useEffect(() => {\n    // Only sync once per session\n    if (hasSyncedThemeRef.current) return;\n\n    // Wait for next-themes to initialize\n    if (!theme) return;\n\n    // Wait for user data to load\n    if (!upToDateUser?.id) return;\n\n    // Only sync if user has a saved preference\n    const savedTheme = upToDateUser?.preferences?.theme_preference;\n    if (!savedTheme) return;\n\n    // Sync DB theme to localStorage\n    setTheme(savedTheme);\n    hasSyncedThemeRef.current = true;\n  }, [\n    upToDateUser?.id,\n    upToDateUser?.preferences?.theme_preference,\n    theme,\n    setTheme,\n  ]);\n\n  const updateUserTemperatureOverrideEnabled = async (enabled: boolean) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              temperature_override_enabled: enabled,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(\n        `/api/temperature-override-enabled?temperature_override_enabled=${enabled}`,\n        {\n          method: \"PATCH\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n        }\n      );\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update user temperature override setting\");\n      }\n    } catch (error) {\n      console.error(\"Error updating user temperature override setting:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserShortcuts = async (enabled: boolean) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              shortcut_enabled: enabled,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(\n        `/api/shortcut-enabled?shortcut_enabled=${enabled}`,\n        {\n          method: \"PATCH\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n        }\n      );\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update user shortcut setting\");\n      }\n    } catch (error) {\n      console.error(\"Error updating user shortcut setting:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserAutoScroll = async (autoScroll: boolean) => {\n    try {\n      const response = await fetch(\"/api/auto-scroll\", {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ auto_scroll: autoScroll }),\n      });\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              auto_scroll: autoScroll,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      if (!response.ok) {\n        throw new Error(\"Failed to update auto-scroll setting\");\n      }\n    } catch (error) {\n      console.error(\"Error updating auto-scroll setting:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserPersonalization = async (\n    personalization: UserPersonalization\n  ) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (!prevUser) {\n          return prevUser;\n        }\n\n        return {\n          ...prevUser,\n          personalization,\n        };\n      });\n\n      const response = await persistPersonalization(personalization);\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update personalization settings\");\n      }\n\n      await refreshUser();\n    } catch (error) {\n      console.error(\"Error updating personalization settings:\", error);\n      throw error;\n    }\n  };\n\n  const toggleAgentPinnedStatus = async (\n    currentPinnedAgentIDs: number[],\n    agentId: number,\n    isPinned: boolean\n  ) => {\n    setUpToDateUser((prevUser) => {\n      if (!prevUser) return prevUser;\n      return {\n        ...prevUser,\n        preferences: {\n          ...prevUser.preferences,\n          pinned_assistants: isPinned\n            ? [...currentPinnedAgentIDs, agentId]\n            : currentPinnedAgentIDs.filter((id) => id !== agentId),\n        },\n      };\n    });\n\n    let updatedPinnedAgentsIds = isPinned\n      ? [...currentPinnedAgentIDs, agentId]\n      : currentPinnedAgentIDs.filter((id) => id !== agentId);\n    try {\n      const response = await fetch(`/api/user/pinned-assistants`, {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          ordered_assistant_ids: updatedPinnedAgentsIds,\n        }),\n      });\n\n      if (!response.ok) {\n        throw new Error(\"Failed to update pinned assistants\");\n      }\n\n      await refreshUser();\n      return true;\n    } catch (error) {\n      console.error(\"Error updating pinned assistants:\", error);\n      return false;\n    }\n  };\n\n  const updateUserThemePreference = async (\n    themePreference: ThemePreference\n  ) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              theme_preference: themePreference,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(`/api/user/theme-preference`, {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ theme_preference: themePreference }),\n      });\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update theme preference\");\n      }\n    } catch (error) {\n      console.error(\"Error updating theme preference:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserChatBackground = async (chatBackground: string | null) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              chat_background: chatBackground,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(`/api/user/chat-background`, {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ chat_background: chatBackground }),\n      });\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update chat background\");\n      }\n    } catch (error) {\n      console.error(\"Error updating chat background:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserDefaultModel = async (defaultModel: string | null) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              default_model: defaultModel,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(`/api/user/default-model`, {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ default_model: defaultModel }),\n      });\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update default model\");\n      }\n    } catch (error) {\n      console.error(\"Error updating default model:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserDefaultAppMode = async (mode: \"CHAT\" | \"SEARCH\") => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              default_app_mode: mode,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(\"/api/user/default-app-mode\", {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ default_app_mode: mode }),\n      });\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update default app mode\");\n      }\n    } catch (error) {\n      console.error(\"Error updating default app mode:\", error);\n      throw error;\n    }\n  };\n\n  const updateUserVoiceSettings = async (settings: {\n    auto_send?: boolean;\n    auto_playback?: boolean;\n    playback_speed?: number;\n  }) => {\n    try {\n      setUpToDateUser((prevUser) => {\n        if (prevUser) {\n          return {\n            ...prevUser,\n            preferences: {\n              ...prevUser.preferences,\n              voice_auto_send:\n                settings.auto_send ?? prevUser.preferences.voice_auto_send,\n              voice_auto_playback:\n                settings.auto_playback ??\n                prevUser.preferences.voice_auto_playback,\n              voice_playback_speed:\n                settings.playback_speed ??\n                prevUser.preferences.voice_playback_speed,\n            },\n          };\n        }\n        return prevUser;\n      });\n\n      const response = await fetch(\"/api/voice/settings\", {\n        method: \"PATCH\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify(settings),\n      });\n\n      if (!response.ok) {\n        await refreshUser();\n        throw new Error(\"Failed to update voice settings\");\n      }\n    } catch (error) {\n      console.error(\"Error updating voice settings:\", error);\n      throw error;\n    }\n  };\n\n  const refreshUser = async () => {\n    await mutateUser();\n  };\n\n  return (\n    <UserContext.Provider\n      value={{\n        user: upToDateUser,\n        refreshUser,\n        authTypeMetadata,\n        updateUserAutoScroll,\n        updateUserShortcuts,\n        updateUserTemperatureOverrideEnabled,\n        updateUserPersonalization,\n        updateUserThemePreference,\n        updateUserChatBackground,\n        updateUserDefaultModel,\n        updateUserDefaultAppMode,\n        updateUserVoiceSettings,\n        toggleAgentPinnedStatus,\n        isAdmin: upToDateUser?.role === UserRole.ADMIN,\n        // Curator status applies for either global or basic curator\n        isCurator:\n          upToDateUser?.role === UserRole.CURATOR ||\n          upToDateUser?.role === UserRole.GLOBAL_CURATOR,\n        isCloudSuperuser: upToDateUser?.is_cloud_superuser ?? false,\n      }}\n    >\n      {children}\n    </UserContext.Provider>\n  );\n}\n\nexport function useUser() {\n  const context = useContext(UserContext);\n  if (context === undefined) {\n    throw new Error(\"useUser must be used within a UserProvider\");\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/providers/VoiceModeProvider.tsx",
    "content": "\"use client\";\n\nimport React, {\n  createContext,\n  useContext,\n  useState,\n  useCallback,\n  useRef,\n  useEffect,\n} from \"react\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useVoiceStatus } from \"@/hooks/useVoiceStatus\";\nimport { INTERNAL_URL, IS_DEV } from \"@/lib/constants\";\n\n// --- TTS Configuration Constants ---\n\n/** WebSocket path for TTS streaming (backend-direct, used in dev) */\nconst TTS_WS_PATH = \"/voice/synthesize/stream\";\n\n/** WebSocket path for TTS streaming (proxied, used in production) */\nconst TTS_WS_PATH_PROXIED = \"/api/voice/synthesize/stream\";\n\n/** API endpoint to fetch a short-lived WebSocket auth token */\nconst WS_TOKEN_ENDPOINT = \"/api/voice/ws-token\";\n\n/** Delay before starting audio playback to buffer initial chunks (ms) */\nconst AUDIO_START_DELAY_MS = 100;\n\n/** Interval for checking if audio playback has ended (ms) */\nconst END_CHECK_INTERVAL_MS = 200;\n\n/** Delay before retrying WebSocket end signal (ms) */\nconst WS_END_RETRY_DELAY_MS = 100;\n\n/** Delay before checking finalizeStream readiness (ms) */\nconst FINALIZE_RETRY_DELAY_MS = 50;\n\n/** Fast-start timer: how long to wait before sending first TTS chunk (ms) */\nconst FAST_START_DELAY_MS = 200;\n\n/** Flush timer: how long to wait after punctuation before flushing (ms) */\nconst FLUSH_DELAY_MS = 250;\n\n/** Safety timeout for TTS loading — resets state if generation stalls (ms) */\nconst TTS_LOADING_TIMEOUT_MS = 60_000;\n\n/** Hard safety timeout for entire TTS playback session (ms).\n *  Prevents stuck audio from blocking the UI indefinitely. */\nconst TTS_SESSION_TIMEOUT_MS = 5 * 60 * 1000;\n\n/** Characters revealed per second when audio duration is unknown */\nconst BASE_CHARS_PER_SECOND = 15;\n\n/** How far ahead (in seconds) text reveal leads audio playback */\nconst REVEAL_LEAD_SECONDS = 0.28;\n\n/** Max characters to reveal per animation frame (smooths catch-up) */\nconst MAX_CATCHUP_CHARS_PER_FRAME = 8;\n\ninterface VoiceModeContextType {\n  /** Whether TTS audio is currently playing */\n  isTTSPlaying: boolean;\n  /** Whether manual read-aloud playback is currently speaking */\n  isManualTTSPlaying: boolean;\n  /** Whether TTS is loading/generating audio */\n  isTTSLoading: boolean;\n  /** Text that has been spoken so far (for synced display) */\n  spokenText: string;\n  /** Node id of the assistant message currently being spoken */\n  activeMessageNodeId: number | null;\n  /** Stream text for TTS - speaks sentences as they complete */\n  streamTTS: (\n    text: string,\n    isComplete?: boolean,\n    messageNodeId?: number\n  ) => void;\n  /** Stop TTS playback */\n  stopTTS: (options?: { manual?: boolean }) => void;\n  /** Increments when TTS is manually stopped by the user */\n  manualStopCount: number;\n  /** Reset state for new message */\n  resetTTS: () => void;\n  /** Audio playback progress (0-1) based on currentTime vs estimated duration */\n  audioProgress: number;\n  /** Number of clean characters to reveal based on audio progress */\n  revealedCharCount: number;\n  /** Whether audio sync is active for progressive text reveal */\n  isAudioSyncActive: boolean;\n  /** Whether auto-playback is enabled in user preferences */\n  autoPlayback: boolean;\n  /** True after text is queued for autoplay but before audio starts playing */\n  isAwaitingAutoPlaybackStart: boolean;\n  /** Whether TTS audio is muted */\n  isTTSMuted: boolean;\n  /** Toggle TTS mute state */\n  toggleTTSMute: () => void;\n  /** Set manual read-aloud speaking state for shared UI (e.g., waveform) */\n  setManualTTSPlaying: (playing: boolean) => void;\n  /** Register manual read-aloud mute handler so shared mute controls affect it */\n  registerManualTTSMuteHandler: (\n    handler: ((muted: boolean) => void) | null\n  ) => void;\n}\n\nconst VoiceModeContext = createContext<VoiceModeContextType | null>(null);\n\n/**\n * Clean text for TTS - remove markdown formatting\n */\nfunction cleanTextForTTS(text: string): string {\n  return text\n    .replace(/\\*\\*/g, \"\") // Remove bold markers\n    .replace(/\\*/g, \"\") // Remove italic markers\n    .replace(/`{1,3}/g, \"\") // Remove code markers\n    .replace(/#{1,6}\\s*/g, \"\") // Remove headers\n    .replace(/\\[([^\\]]+)\\]\\([^)]+\\)/g, \"$1\") // Convert links to just text\n    .replace(/\\n+/g, \" \") // Replace newlines with spaces\n    .replace(/\\s+/g, \" \") // Normalize whitespace\n    .trim();\n}\n\n/**\n * Find the next natural chunk boundary in text.\n * Prefers sentence endings for natural speech rhythm.\n */\nfunction findChunkBoundary(text: string): number {\n  // Look for sentence endings (. ! ?) - these are natural speech breaks\n  const sentenceRegex = /[.!?](?:\\s|$)/g;\n  let match;\n  let lastSentenceEnd = -1;\n\n  while ((match = sentenceRegex.exec(text)) !== null) {\n    const endPos = match.index + 1;\n    if (endPos >= 10) {\n      lastSentenceEnd = endPos;\n      if (endPos >= 30) return endPos;\n    }\n  }\n\n  if (lastSentenceEnd > 0) return lastSentenceEnd;\n\n  // Only break at clauses for very long text (150+ chars)\n  if (text.length >= 150) {\n    const clauseRegex = /[,;:]\\s/g;\n    while ((match = clauseRegex.exec(text)) !== null) {\n      const endPos = match.index + 1;\n      if (endPos >= 70) return endPos;\n    }\n  }\n\n  // Break at word boundary for extremely long text (200+ chars)\n  if (text.length >= 200) {\n    const spaceIndex = text.lastIndexOf(\" \", 120);\n    if (spaceIndex > 80) return spaceIndex;\n  }\n\n  return -1;\n}\n\nexport function VoiceModeProvider({ children }: { children: React.ReactNode }) {\n  const { user } = useUser();\n  const { ttsEnabled } = useVoiceStatus();\n  const autoPlayback =\n    (user?.preferences?.voice_auto_playback ?? false) && ttsEnabled;\n  const playbackSpeed = user?.preferences?.voice_playback_speed ?? 1.0;\n\n  const [isTTSPlaying, setIsTTSPlaying] = useState(false);\n  const [isManualTTSPlaying, setIsManualTTSPlaying] = useState(false);\n  const [isTTSLoading, setIsTTSLoading] = useState(false);\n  const [spokenText, setSpokenText] = useState(\"\");\n  const [activeMessageNodeId, setActiveMessageNodeId] = useState<number | null>(\n    null\n  );\n  const [isAwaitingAutoPlaybackStart, setIsAwaitingAutoPlaybackStart] =\n    useState(false);\n  const [manualStopCount, setManualStopCount] = useState(0);\n  const [isTTSMuted, setIsTTSMuted] = useState(false);\n  const manualTTSMuteHandlerRef = useRef<((muted: boolean) => void) | null>(\n    null\n  );\n\n  // Audio progress tracking for progressive text reveal\n  const [audioProgress, setAudioProgress] = useState(0);\n  const [totalSpokenCharCount, setTotalSpokenCharCount] = useState(0);\n  const [revealedCharCount, setRevealedCharCount] = useState(0);\n\n  // WebSocket and audio state\n  const wsRef = useRef<WebSocket | null>(null);\n  const mediaSourceRef = useRef<MediaSource | null>(null);\n  const sourceBufferRef = useRef<SourceBuffer | null>(null);\n  const audioElementRef = useRef<HTMLAudioElement | null>(null);\n  const audioUrlRef = useRef<string | null>(null);\n  const pendingChunksRef = useRef<Uint8Array[]>([]);\n  const isAppendingRef = useRef(false);\n  const isPlayingRef = useRef(false);\n  const hasStartedPlaybackRef = useRef(false);\n\n  // Audio progress tracking refs\n  const totalBytesReceivedRef = useRef(0);\n  const animationFrameRef = useRef<number | null>(null);\n  const lastRevealedCharCountRef = useRef(0);\n\n  // Text tracking\n  const committedPositionRef = useRef(0);\n  const lastRawTextRef = useRef(\"\");\n  const pendingTextRef = useRef<string[]>([]);\n  const isConnectingRef = useRef(false);\n\n  // Timers\n  const flushTimerRef = useRef<NodeJS.Timeout | null>(null);\n  const fastStartTimerRef = useRef<NodeJS.Timeout | null>(null);\n  const loadingTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n  const endCheckIntervalRef = useRef<NodeJS.Timeout | null>(null);\n  const sessionTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n  const hasSpokenFirstChunkRef = useRef(false);\n  const hasSignaledEndRef = useRef(false);\n  const streamEndedRef = useRef(false);\n\n  // Process next chunk from the pending queue\n  const processNextChunk = useCallback(() => {\n    if (\n      isAppendingRef.current ||\n      pendingChunksRef.current.length === 0 ||\n      !sourceBufferRef.current ||\n      sourceBufferRef.current.updating\n    ) {\n      return;\n    }\n\n    const chunk = pendingChunksRef.current.shift();\n    if (chunk) {\n      isAppendingRef.current = true;\n      try {\n        const buffer = chunk.buffer.slice(\n          chunk.byteOffset,\n          chunk.byteOffset + chunk.byteLength\n        ) as ArrayBuffer;\n        sourceBufferRef.current.appendBuffer(buffer);\n      } catch {\n        isAppendingRef.current = false;\n        processNextChunk();\n      }\n    }\n  }, []);\n\n  // Finalize the media stream when done\n  const finalizeStream = useCallback(() => {\n    if (pendingChunksRef.current.length > 0 || isAppendingRef.current) {\n      setTimeout(() => finalizeStream(), FINALIZE_RETRY_DELAY_MS);\n      return;\n    }\n\n    streamEndedRef.current = true;\n\n    // Don't call endOfStream if no audio was received - it causes errors\n    if (totalBytesReceivedRef.current === 0) {\n      return;\n    }\n\n    if (\n      mediaSourceRef.current &&\n      mediaSourceRef.current.readyState === \"open\" &&\n      sourceBufferRef.current &&\n      !sourceBufferRef.current.updating\n    ) {\n      try {\n        mediaSourceRef.current.endOfStream();\n      } catch {\n        // Ignore endOfStream errors\n      }\n    }\n\n    // Clear any existing end check interval\n    if (endCheckIntervalRef.current) {\n      clearInterval(endCheckIntervalRef.current);\n      endCheckIntervalRef.current = null;\n    }\n\n    // More aggressive end detection: check every 200ms if audio has ended\n    // This handles cases where onended event doesn't fire with MediaSource\n    endCheckIntervalRef.current = setInterval(() => {\n      const audioEl = audioElementRef.current;\n\n      // If audio element is gone or stream was reset, clean up\n      if (!audioEl || !streamEndedRef.current) {\n        if (endCheckIntervalRef.current) {\n          clearInterval(endCheckIntervalRef.current);\n          endCheckIntervalRef.current = null;\n        }\n        return;\n      }\n\n      // Only check audio.ended - don't use duration comparison as it's unreliable\n      // with MediaSource streaming (duration updates as chunks arrive)\n      const hasEnded = audioEl.ended;\n\n      if (hasEnded && isPlayingRef.current) {\n        isPlayingRef.current = false;\n        setIsTTSPlaying(false);\n        setActiveMessageNodeId(null);\n        setIsAwaitingAutoPlaybackStart(false);\n        if (endCheckIntervalRef.current) {\n          clearInterval(endCheckIntervalRef.current);\n          endCheckIntervalRef.current = null;\n        }\n      }\n    }, END_CHECK_INTERVAL_MS);\n\n    // No fixed timeout fallback here.\n    // Long responses can legitimately continue playing well past 10s after stream end.\n    // We rely on onended / interval end detection instead.\n  }, []);\n\n  // Initialize MediaSource for streaming audio\n  const initMediaSource = useCallback(async () => {\n    // Check if MediaSource is supported\n    if (!window.MediaSource || !MediaSource.isTypeSupported(\"audio/mpeg\")) {\n      return false;\n    }\n\n    // Clean up any existing MediaSource before creating a new one\n    if (audioUrlRef.current) {\n      URL.revokeObjectURL(audioUrlRef.current);\n      audioUrlRef.current = null;\n    }\n    if (audioElementRef.current) {\n      audioElementRef.current.pause();\n      audioElementRef.current.src = \"\";\n      audioElementRef.current = null;\n    }\n    if (\n      mediaSourceRef.current &&\n      mediaSourceRef.current.readyState === \"open\"\n    ) {\n      try {\n        if (sourceBufferRef.current) {\n          mediaSourceRef.current.removeSourceBuffer(sourceBufferRef.current);\n        }\n        mediaSourceRef.current.endOfStream();\n      } catch {\n        // Ignore cleanup errors\n      }\n    }\n    mediaSourceRef.current = null;\n    sourceBufferRef.current = null;\n\n    // Create MediaSource and audio element\n    mediaSourceRef.current = new MediaSource();\n    audioElementRef.current = new Audio();\n    audioUrlRef.current = URL.createObjectURL(mediaSourceRef.current);\n    audioElementRef.current.src = audioUrlRef.current;\n\n    audioElementRef.current.onplay = () => {\n      if (!isPlayingRef.current) {\n        isPlayingRef.current = true;\n        setIsTTSPlaying(true);\n        setIsAwaitingAutoPlaybackStart(false);\n      }\n    };\n\n    audioElementRef.current.onended = () => {\n      isPlayingRef.current = false;\n      setIsTTSPlaying(false);\n      setActiveMessageNodeId(null);\n      setIsAwaitingAutoPlaybackStart(false);\n    };\n\n    audioElementRef.current.onerror = () => {\n      const audioEl = audioElementRef.current;\n      const mediaError = audioEl?.error;\n\n      // Ignore spurious errors with no actual error code (happens during cleanup)\n      if (!mediaError || mediaError.code === undefined) {\n        return;\n      }\n\n      isPlayingRef.current = false;\n      setIsTTSPlaying(false);\n      setActiveMessageNodeId(null);\n      setIsAwaitingAutoPlaybackStart(false);\n    };\n\n    // Wait for MediaSource to be ready\n    await new Promise<void>((resolve, reject) => {\n      if (!mediaSourceRef.current) {\n        reject(new Error(\"MediaSource not initialized\"));\n        return;\n      }\n\n      mediaSourceRef.current.onsourceopen = () => {\n        try {\n          sourceBufferRef.current =\n            mediaSourceRef.current!.addSourceBuffer(\"audio/mpeg\");\n          sourceBufferRef.current.mode = \"sequence\";\n\n          sourceBufferRef.current.onupdateend = () => {\n            isAppendingRef.current = false;\n            processNextChunk();\n          };\n\n          resolve();\n        } catch (err) {\n          reject(err);\n        }\n      };\n\n      mediaSourceRef.current.onsourceclose = () => {\n        if (mediaSourceRef.current?.readyState === \"closed\") {\n          reject(new Error(\"MediaSource closed unexpectedly\"));\n        }\n      };\n    });\n\n    return true;\n  }, [processNextChunk]);\n\n  // Handle incoming audio data from WebSocket\n  const handleAudioData = useCallback(\n    async (data: ArrayBuffer) => {\n      // Track total bytes for duration estimation\n      totalBytesReceivedRef.current += data.byteLength;\n\n      // If we are receiving audio bytes, playback startup is no longer pending.\n      // This avoids UI getting stuck in \"thinking\" when onplay is delayed.\n      setIsAwaitingAutoPlaybackStart(false);\n\n      pendingChunksRef.current.push(new Uint8Array(data));\n      processNextChunk();\n\n      // Start playback after first chunk\n      if (!hasStartedPlaybackRef.current && audioElementRef.current) {\n        // Small delay to buffer a bit before starting\n        setTimeout(() => {\n          const audioEl = audioElementRef.current;\n          if (!audioEl || hasStartedPlaybackRef.current) {\n            return;\n          }\n\n          audioEl\n            .play()\n            .then(() => {\n              hasStartedPlaybackRef.current = true;\n            })\n            .catch(() => {\n              // Keep hasStartedPlaybackRef as false so we retry on next audio chunk.\n            });\n        }, AUDIO_START_DELAY_MS);\n      }\n    },\n    [processNextChunk]\n  );\n\n  // Get WebSocket URL for TTS with authentication token\n  const getWebSocketUrl = useCallback(async () => {\n    // Fetch short-lived WS token\n    const tokenResponse = await fetch(WS_TOKEN_ENDPOINT, {\n      method: \"POST\",\n      credentials: \"include\",\n    });\n    if (!tokenResponse.ok) {\n      throw new Error(\"Failed to get WebSocket authentication token\");\n    }\n    const { token } = await tokenResponse.json();\n\n    // In development, the Next.js dev server (port 3000) does not proxy\n    // WebSocket connections, so we connect directly to the backend (port 8080).\n    // In production, the reverse proxy handles the /api prefix routing.\n    const protocol = window.location.protocol === \"https:\" ? \"wss:\" : \"ws:\";\n    const host = IS_DEV ? new URL(INTERNAL_URL).host : window.location.host;\n    const path = IS_DEV ? TTS_WS_PATH : TTS_WS_PATH_PROXIED;\n    // Auth: the token query param is validated server-side by\n    // current_user_from_websocket (single-use, 60s TTL, same checks as HTTP auth).\n    return `${protocol}//${host}${path}?token=${encodeURIComponent(token)}`;\n  }, []);\n\n  // Connect to WebSocket TTS\n  const connectWebSocket = useCallback(async () => {\n    // Skip if already connected, connecting, or in the process of connecting\n    if (\n      wsRef.current?.readyState === WebSocket.OPEN ||\n      wsRef.current?.readyState === WebSocket.CONNECTING ||\n      isConnectingRef.current\n    ) {\n      return;\n    }\n\n    // Set connecting flag to prevent concurrent connection attempts\n    isConnectingRef.current = true;\n\n    try {\n      // Initialize MediaSource first\n      const initialized = await initMediaSource();\n      if (!initialized) {\n        isConnectingRef.current = false;\n        return;\n      }\n\n      // Get WebSocket URL with auth token\n      const wsUrl = await getWebSocketUrl();\n\n      const ws = new WebSocket(wsUrl);\n\n      ws.onopen = () => {\n        isConnectingRef.current = false;\n        // Send initial config\n        ws.send(\n          JSON.stringify({\n            type: \"config\",\n            speed: playbackSpeed,\n          })\n        );\n\n        // Send any pending text\n        for (const text of pendingTextRef.current) {\n          ws.send(JSON.stringify({ type: \"synthesize\", text }));\n        }\n        pendingTextRef.current = [];\n      };\n\n      ws.onmessage = async (event) => {\n        if (event.data instanceof Blob) {\n          const arrayBuffer = await event.data.arrayBuffer();\n          handleAudioData(arrayBuffer);\n        } else if (typeof event.data === \"string\") {\n          try {\n            const msg = JSON.parse(event.data);\n            if (msg.type === \"audio_done\") {\n              if (loadingTimeoutRef.current) {\n                clearTimeout(loadingTimeoutRef.current);\n                loadingTimeoutRef.current = null;\n              }\n              setIsTTSLoading(false);\n              finalizeStream();\n            }\n          } catch {\n            // Ignore parse errors\n          }\n        }\n      };\n\n      ws.onerror = () => {\n        isConnectingRef.current = false;\n        setIsTTSLoading(false);\n        setIsAwaitingAutoPlaybackStart(false);\n      };\n\n      ws.onclose = () => {\n        wsRef.current = null;\n        isConnectingRef.current = false;\n        setIsTTSLoading(false);\n        setIsAwaitingAutoPlaybackStart(false);\n        finalizeStream();\n      };\n\n      wsRef.current = ws;\n    } catch {\n      isConnectingRef.current = false;\n    }\n  }, [\n    playbackSpeed,\n    handleAudioData,\n    getWebSocketUrl,\n    initMediaSource,\n    finalizeStream,\n  ]);\n\n  const stopTTS = useCallback((options?: { manual?: boolean }) => {\n    // Clear timers\n    if (flushTimerRef.current) {\n      clearTimeout(flushTimerRef.current);\n      flushTimerRef.current = null;\n    }\n    if (fastStartTimerRef.current) {\n      clearTimeout(fastStartTimerRef.current);\n      fastStartTimerRef.current = null;\n    }\n    if (loadingTimeoutRef.current) {\n      clearTimeout(loadingTimeoutRef.current);\n      loadingTimeoutRef.current = null;\n    }\n    if (endCheckIntervalRef.current) {\n      clearInterval(endCheckIntervalRef.current);\n      endCheckIntervalRef.current = null;\n    }\n    if (sessionTimeoutRef.current) {\n      clearTimeout(sessionTimeoutRef.current);\n      sessionTimeoutRef.current = null;\n    }\n\n    // Revoke blob URL to prevent memory leak\n    if (audioUrlRef.current) {\n      URL.revokeObjectURL(audioUrlRef.current);\n      audioUrlRef.current = null;\n    }\n\n    // Stop audio element\n    if (audioElementRef.current) {\n      audioElementRef.current.pause();\n      audioElementRef.current.src = \"\";\n      audioElementRef.current = null;\n    }\n\n    // Cleanup MediaSource\n    if (\n      mediaSourceRef.current &&\n      mediaSourceRef.current.readyState === \"open\"\n    ) {\n      try {\n        if (sourceBufferRef.current) {\n          mediaSourceRef.current.removeSourceBuffer(sourceBufferRef.current);\n        }\n        mediaSourceRef.current.endOfStream();\n      } catch {\n        // Ignore cleanup errors\n      }\n    }\n\n    mediaSourceRef.current = null;\n    sourceBufferRef.current = null;\n    pendingChunksRef.current = [];\n    isAppendingRef.current = false;\n    hasStartedPlaybackRef.current = false;\n    pendingTextRef.current = [];\n    isPlayingRef.current = false;\n    hasSignaledEndRef.current = false;\n    isConnectingRef.current = false;\n    streamEndedRef.current = false;\n\n    // Close WebSocket\n    if (wsRef.current) {\n      try {\n        wsRef.current.send(JSON.stringify({ type: \"end\" }));\n        wsRef.current.close();\n      } catch {\n        // Ignore\n      }\n      wsRef.current = null;\n    }\n\n    setIsTTSPlaying(false);\n    setIsTTSLoading(false);\n    setIsAwaitingAutoPlaybackStart(false);\n    if (options?.manual) {\n      setManualStopCount((count) => count + 1);\n    }\n  }, []);\n\n  // Send text to TTS via WebSocket\n  const sendTextToTTS = useCallback(\n    (text: string) => {\n      if (!text.trim()) return;\n\n      setIsTTSLoading(true);\n      setIsAwaitingAutoPlaybackStart(true);\n      setSpokenText((prev) => (prev ? prev + \" \" + text : text));\n\n      // Track character count for progressive text reveal\n      // Note: text is already cleaned (from cleanTextForTTS) when called from streamTTS\n      setTotalSpokenCharCount((prev) => prev + text.length);\n\n      // Set a timeout to reset loading state if TTS doesn't complete\n      if (loadingTimeoutRef.current) {\n        clearTimeout(loadingTimeoutRef.current);\n      }\n      loadingTimeoutRef.current = setTimeout(() => {\n        setIsTTSLoading(false);\n        setIsTTSPlaying(false);\n      }, TTS_LOADING_TIMEOUT_MS);\n\n      // Hard safety timeout: if the entire TTS session hasn't finished in 5 minutes,\n      // force cleanup to prevent the UI from being stuck indefinitely.\n      if (!sessionTimeoutRef.current) {\n        sessionTimeoutRef.current = setTimeout(() => {\n          sessionTimeoutRef.current = null;\n          stopTTS();\n        }, TTS_SESSION_TIMEOUT_MS);\n      }\n\n      if (wsRef.current?.readyState === WebSocket.OPEN) {\n        wsRef.current.send(JSON.stringify({ type: \"synthesize\", text }));\n      } else {\n        pendingTextRef.current.push(text);\n        connectWebSocket();\n      }\n    },\n    [connectWebSocket, stopTTS]\n  );\n\n  const streamTTS = useCallback(\n    (text: string, isComplete: boolean = false, messageNodeId?: number) => {\n      if (!autoPlayback) {\n        return;\n      }\n\n      if (typeof messageNodeId === \"number\") {\n        setActiveMessageNodeId((prev) =>\n          prev === messageNodeId ? prev : messageNodeId\n        );\n      }\n\n      // Skip if text hasn't changed\n      if (text === lastRawTextRef.current && !isComplete) return;\n      lastRawTextRef.current = text;\n\n      // Clear pending timers\n      if (flushTimerRef.current) {\n        clearTimeout(flushTimerRef.current);\n        flushTimerRef.current = null;\n      }\n      if (fastStartTimerRef.current) {\n        clearTimeout(fastStartTimerRef.current);\n        fastStartTimerRef.current = null;\n      }\n\n      // Clean the full text\n      const cleanedText = cleanTextForTTS(text);\n      const uncommittedText = cleanedText.slice(committedPositionRef.current);\n\n      // On completion, we must still signal \"end\" even if there's no new text.\n      // Otherwise ElevenLabs waits for more input and eventually times out.\n      if (uncommittedText.length === 0) {\n        if (isComplete && !hasSignaledEndRef.current) {\n          hasSignaledEndRef.current = true;\n\n          if (wsRef.current?.readyState === WebSocket.OPEN) {\n            wsRef.current.send(JSON.stringify({ type: \"end\" }));\n          } else {\n            const sendEnd = () => {\n              if (wsRef.current?.readyState === WebSocket.OPEN) {\n                if (pendingTextRef.current.length === 0) {\n                  wsRef.current.send(JSON.stringify({ type: \"end\" }));\n                } else {\n                  setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n                }\n              } else if (wsRef.current?.readyState === WebSocket.CONNECTING) {\n                setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n              }\n            };\n            setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n          }\n        }\n        return;\n      }\n\n      // Find chunk boundaries and send immediately\n      let remaining = uncommittedText;\n      let offset = 0;\n\n      while (remaining.length > 0) {\n        const boundaryIndex = findChunkBoundary(remaining);\n\n        if (boundaryIndex > 0) {\n          const chunkText = remaining.slice(0, boundaryIndex).trim();\n          if (chunkText.length > 0) {\n            sendTextToTTS(chunkText);\n            hasSpokenFirstChunkRef.current = true;\n          }\n          offset += boundaryIndex;\n          remaining = remaining.slice(boundaryIndex).trim();\n        } else {\n          break;\n        }\n      }\n\n      committedPositionRef.current += offset;\n\n      // Handle remaining text when stream is complete\n      if (isComplete && remaining.trim().length > 0) {\n        sendTextToTTS(remaining.trim());\n        committedPositionRef.current = cleanedText.length;\n        hasSpokenFirstChunkRef.current = true;\n      }\n\n      // When streaming is complete, signal end to flush remaining audio\n      if (isComplete && !hasSignaledEndRef.current) {\n        hasSignaledEndRef.current = true;\n\n        if (wsRef.current?.readyState === WebSocket.OPEN) {\n          wsRef.current.send(JSON.stringify({ type: \"end\" }));\n        } else {\n          const sendEnd = () => {\n            if (wsRef.current?.readyState === WebSocket.OPEN) {\n              if (pendingTextRef.current.length === 0) {\n                wsRef.current.send(JSON.stringify({ type: \"end\" }));\n              } else {\n                setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n              }\n            } else if (wsRef.current?.readyState === WebSocket.CONNECTING) {\n              setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n            }\n          };\n          setTimeout(sendEnd, WS_END_RETRY_DELAY_MS);\n        }\n      }\n\n      const currentUncommitted = cleanedText\n        .slice(committedPositionRef.current)\n        .trim();\n\n      // Fast start: send the first TTS chunk as soon as we have enough text (20+ chars)\n      // without waiting for a full sentence boundary. This reduces perceived latency —\n      // the user hears audio begin within ~200ms of the first text arriving, rather than\n      // waiting for the LLM to produce a complete sentence.\n      if (\n        !hasSpokenFirstChunkRef.current &&\n        currentUncommitted.length >= 20 &&\n        !isComplete\n      ) {\n        fastStartTimerRef.current = setTimeout(() => {\n          if (hasSpokenFirstChunkRef.current) return;\n\n          const nowCleaned = cleanTextForTTS(lastRawTextRef.current);\n          const nowUncommitted = nowCleaned\n            .slice(committedPositionRef.current)\n            .trim();\n\n          if (nowUncommitted.length >= 20) {\n            // Find a reasonable break point\n            let breakPoint = nowUncommitted.length;\n            const spaceIdx = nowUncommitted.lastIndexOf(\" \", 50);\n            if (spaceIdx >= 15) breakPoint = spaceIdx;\n\n            const chunk = nowUncommitted.slice(0, breakPoint).trim();\n            if (chunk.length > 0) {\n              sendTextToTTS(chunk);\n              committedPositionRef.current += breakPoint;\n              hasSpokenFirstChunkRef.current = true;\n            }\n          }\n        }, FAST_START_DELAY_MS);\n      }\n\n      // Flush timer for text ending with punctuation\n      if (\n        currentUncommitted.length > 0 &&\n        !isComplete &&\n        /[.!?]$/.test(currentUncommitted)\n      ) {\n        flushTimerRef.current = setTimeout(() => {\n          const nowCleaned = cleanTextForTTS(lastRawTextRef.current);\n          const nowUncommitted = nowCleaned\n            .slice(committedPositionRef.current)\n            .trim();\n\n          if (nowUncommitted.length > 0) {\n            sendTextToTTS(nowUncommitted);\n            committedPositionRef.current = nowCleaned.length;\n            hasSpokenFirstChunkRef.current = true;\n          }\n        }, FLUSH_DELAY_MS);\n      }\n    },\n    [autoPlayback, sendTextToTTS]\n  );\n\n  const resetTTS = useCallback(() => {\n    stopTTS();\n    if (sessionTimeoutRef.current) {\n      clearTimeout(sessionTimeoutRef.current);\n      sessionTimeoutRef.current = null;\n    }\n    committedPositionRef.current = 0;\n    lastRawTextRef.current = \"\";\n    hasSpokenFirstChunkRef.current = false;\n    hasSignaledEndRef.current = false;\n    setSpokenText(\"\");\n    setActiveMessageNodeId(null);\n    setIsAwaitingAutoPlaybackStart(false);\n    setIsTTSMuted(false);\n    setIsManualTTSPlaying(false);\n\n    // Reset audio progress tracking\n    totalBytesReceivedRef.current = 0;\n    setAudioProgress(0);\n    setTotalSpokenCharCount(0);\n    setRevealedCharCount(0);\n    lastRevealedCharCountRef.current = 0;\n\n    // Cancel animation frame if running\n    if (animationFrameRef.current) {\n      cancelAnimationFrame(animationFrameRef.current);\n      animationFrameRef.current = null;\n    }\n  }, [stopTTS]);\n\n  // Toggle TTS mute state\n  const toggleTTSMute = useCallback(() => {\n    setIsTTSMuted((prev) => {\n      const newMuted = !prev;\n      if (audioElementRef.current) {\n        audioElementRef.current.muted = newMuted;\n      }\n      manualTTSMuteHandlerRef.current?.(newMuted);\n      return newMuted;\n    });\n  }, []);\n\n  const registerManualTTSMuteHandler = useCallback(\n    (handler: ((muted: boolean) => void) | null) => {\n      manualTTSMuteHandlerRef.current = handler;\n      if (handler) {\n        handler(isTTSMuted);\n      }\n    },\n    [isTTSMuted]\n  );\n\n  // Animation loop to track audio playback progress for progressive text reveal\n  useEffect(() => {\n    if (!isTTSPlaying || !audioElementRef.current) {\n      return;\n    }\n\n    const updateProgress = () => {\n      const audio = audioElementRef.current;\n      if (!audio) return;\n\n      // Use playback position + a small lead.\n      const effectiveSeconds = Math.max(\n        audio.currentTime + REVEAL_LEAD_SECONDS,\n        0\n      );\n      const hasDuration = Number.isFinite(audio.duration) && audio.duration > 0;\n      const rawTargetChars = hasDuration\n        ? Math.floor(\n            Math.min(effectiveSeconds / audio.duration, 1) *\n              totalSpokenCharCount\n          )\n        : Math.floor(effectiveSeconds * BASE_CHARS_PER_SECOND * playbackSpeed);\n      const targetChars = Math.max(\n        0,\n        Math.min(rawTargetChars, totalSpokenCharCount)\n      );\n\n      // Smooth catch-up to avoid sudden end-of-response jumps.\n      const prevChars = lastRevealedCharCountRef.current;\n      const nextChars =\n        targetChars > prevChars + MAX_CATCHUP_CHARS_PER_FRAME\n          ? prevChars + MAX_CATCHUP_CHARS_PER_FRAME\n          : targetChars;\n      lastRevealedCharCountRef.current = nextChars;\n      setRevealedCharCount(nextChars);\n\n      // Calculate progress as ratio of chars revealed to total\n      let progress = 0;\n      if (totalSpokenCharCount > 0) {\n        progress = Math.min(nextChars / totalSpokenCharCount, 1);\n      }\n\n      setAudioProgress(progress);\n\n      if (isTTSPlaying) {\n        animationFrameRef.current = requestAnimationFrame(updateProgress);\n      }\n    };\n\n    animationFrameRef.current = requestAnimationFrame(updateProgress);\n\n    return () => {\n      if (animationFrameRef.current) {\n        cancelAnimationFrame(animationFrameRef.current);\n        animationFrameRef.current = null;\n      }\n    };\n  }, [isTTSPlaying, totalSpokenCharCount]);\n\n  // Reset TTS state when voice auto-playback is disabled\n  // This prevents the mic button from being stuck disabled\n  const prevAutoPlaybackRef = useRef(autoPlayback);\n  useEffect(() => {\n    if (prevAutoPlaybackRef.current && !autoPlayback) {\n      // Auto-playback was just disabled, clean up TTS state\n      resetTTS();\n    }\n    prevAutoPlaybackRef.current = autoPlayback;\n  }, [autoPlayback, resetTTS]);\n\n  // Cleanup on unmount\n  useEffect(() => {\n    return () => {\n      if (flushTimerRef.current) clearTimeout(flushTimerRef.current);\n      if (fastStartTimerRef.current) clearTimeout(fastStartTimerRef.current);\n      if (loadingTimeoutRef.current) clearTimeout(loadingTimeoutRef.current);\n      if (endCheckIntervalRef.current)\n        clearInterval(endCheckIntervalRef.current);\n      if (animationFrameRef.current)\n        cancelAnimationFrame(animationFrameRef.current);\n      if (sessionTimeoutRef.current) clearTimeout(sessionTimeoutRef.current);\n      if (audioUrlRef.current) {\n        URL.revokeObjectURL(audioUrlRef.current);\n      }\n      if (wsRef.current) {\n        try {\n          wsRef.current.close();\n        } catch (err) {\n          // WebSocket may already be closed or in CLOSING state — non-critical\n          console.warn(\"Failed to close TTS WebSocket during cleanup:\", err);\n        }\n      }\n      if (audioElementRef.current) {\n        try {\n          audioElementRef.current.pause();\n          audioElementRef.current.src = \"\";\n        } catch {\n          // Ignore\n        }\n      }\n      if (\n        mediaSourceRef.current &&\n        mediaSourceRef.current.readyState === \"open\"\n      ) {\n        try {\n          mediaSourceRef.current.endOfStream();\n        } catch {\n          // Ignore\n        }\n      }\n    };\n  }, []);\n\n  const isAudioSyncActive = autoPlayback && (isTTSPlaying || isTTSLoading);\n\n  return (\n    <VoiceModeContext.Provider\n      value={{\n        isTTSPlaying,\n        isManualTTSPlaying,\n        isTTSLoading,\n        spokenText,\n        activeMessageNodeId,\n        streamTTS,\n        stopTTS,\n        manualStopCount,\n        resetTTS,\n        audioProgress,\n        revealedCharCount,\n        isAudioSyncActive,\n        autoPlayback,\n        isAwaitingAutoPlaybackStart,\n        isTTSMuted,\n        toggleTTSMute,\n        setManualTTSPlaying: setIsManualTTSPlaying,\n        registerManualTTSMuteHandler,\n      }}\n    >\n      {children}\n    </VoiceModeContext.Provider>\n  );\n}\n\nexport function useVoiceMode(): VoiceModeContextType {\n  const context = useContext(VoiceModeContext);\n  if (!context) {\n    throw new Error(\"useVoiceMode must be used within VoiceModeProvider\");\n  }\n  return context;\n}\n"
  },
  {
    "path": "web/src/providers/__tests__/ProjectsContext.test.tsx",
    "content": "import React, { PropsWithChildren } from \"react\";\nimport { act, renderHook } from \"@testing-library/react\";\nimport {\n  ProjectsProvider,\n  useProjectsContext,\n} from \"@/providers/ProjectsContext\";\nimport { SettingsContext } from \"@/providers/SettingsProvider\";\nimport { CombinedSettings } from \"@/interfaces/settings\";\nimport type { ProjectFile } from \"@/app/app/projects/projectsService\";\n\nconst mockUploadFiles = jest.fn();\nconst mockGetRecentFiles = jest.fn();\nconst mockToastWarning = jest.fn();\n\njest.mock(\"next/navigation\", () => ({\n  useSearchParams: () => ({\n    get: () => null,\n  }),\n}));\n\njest.mock(\"@/hooks/appNavigation\", () => ({\n  useAppRouter: () => jest.fn(),\n}));\n\njest.mock(\"@/lib/hooks/useProjects\", () => ({\n  useProjects: () => ({\n    projects: [],\n    refreshProjects: jest.fn().mockResolvedValue([]),\n  }),\n}));\n\njest.mock(\"@/hooks/useToast\", () => ({\n  toast: {\n    warning: (...args: unknown[]) => mockToastWarning(...args),\n    error: jest.fn(),\n    success: jest.fn(),\n  },\n}));\n\njest.mock(\"@/app/app/projects/projectsService\", () => {\n  const actual = jest.requireActual(\"@/app/app/projects/projectsService\");\n  return {\n    ...actual,\n    fetchProjects: jest.fn().mockResolvedValue([]),\n    createProject: jest.fn(),\n    uploadFiles: (...args: unknown[]) => mockUploadFiles(...args),\n    getRecentFiles: (...args: unknown[]) => mockGetRecentFiles(...args),\n    getFilesInProject: jest.fn().mockResolvedValue([]),\n    getProject: jest.fn(),\n    getProjectInstructions: jest.fn(),\n    upsertProjectInstructions: jest.fn(),\n    getProjectDetails: jest.fn(),\n    renameProject: jest.fn(),\n    deleteProject: jest.fn(),\n    deleteUserFile: jest.fn(),\n    getUserFileStatuses: jest.fn().mockResolvedValue([]),\n    unlinkFileFromProject: jest.fn(),\n    linkFileToProject: jest.fn(),\n  };\n});\n\nconst settingsValue: CombinedSettings = {\n  settings: {\n    user_file_max_upload_size_mb: 1,\n  } as CombinedSettings[\"settings\"],\n  enterpriseSettings: null,\n  customAnalyticsScript: null,\n  webVersion: null,\n  webDomain: null,\n  isSearchModeAvailable: true,\n  settingsLoading: false,\n};\n\nconst wrapper = ({ children }: PropsWithChildren) => (\n  <SettingsContext.Provider value={settingsValue}>\n    <ProjectsProvider>{children}</ProjectsProvider>\n  </SettingsContext.Provider>\n);\n\ndescribe(\"ProjectsContext beginUpload size precheck\", () => {\n  beforeEach(() => {\n    mockUploadFiles.mockReset();\n    mockGetRecentFiles.mockReset();\n    mockToastWarning.mockReset();\n\n    mockUploadFiles.mockResolvedValue({\n      user_files: [],\n      rejected_files: [],\n    });\n    mockGetRecentFiles.mockResolvedValue([]);\n  });\n\n  it(\"only sends valid files to the upload API when oversized files are present\", async () => {\n    const { result } = renderHook(() => useProjectsContext(), { wrapper });\n\n    const valid = new File([\"small\"], \"small.txt\", { type: \"text/plain\" });\n    const oversized = new File([new Uint8Array(2 * 1024 * 1024)], \"big.txt\", {\n      type: \"text/plain\",\n    });\n\n    let optimisticFiles: ProjectFile[] = [];\n    await act(async () => {\n      optimisticFiles = await result.current.beginUpload(\n        [valid, oversized],\n        null\n      );\n    });\n\n    expect(mockUploadFiles).toHaveBeenCalledTimes(1);\n    const [uploadedFiles] = mockUploadFiles.mock.calls[0];\n    expect((uploadedFiles as File[]).map((f) => f.name)).toEqual([\"small.txt\"]);\n    expect(optimisticFiles.map((f) => f.name)).toEqual([\"small.txt\"]);\n    expect(mockToastWarning).toHaveBeenCalledTimes(1);\n  });\n\n  it(\"uploads all files when none are oversized\", async () => {\n    const { result } = renderHook(() => useProjectsContext(), { wrapper });\n\n    const first = new File([\"small\"], \"first.txt\", { type: \"text/plain\" });\n    const second = new File([\"small\"], \"second.txt\", { type: \"text/plain\" });\n\n    let optimisticFiles: ProjectFile[] = [];\n    await act(async () => {\n      optimisticFiles = await result.current.beginUpload([first, second], null);\n    });\n\n    expect(mockUploadFiles).toHaveBeenCalledTimes(1);\n    const [uploadedFiles] = mockUploadFiles.mock.calls[0];\n    expect((uploadedFiles as File[]).map((f) => f.name)).toEqual([\n      \"first.txt\",\n      \"second.txt\",\n    ]);\n    expect(mockToastWarning).not.toHaveBeenCalled();\n    expect(optimisticFiles.map((f) => f.name)).toEqual([\n      \"first.txt\",\n      \"second.txt\",\n    ]);\n  });\n\n  it(\"does not call upload API when all files are oversized\", async () => {\n    const { result } = renderHook(() => useProjectsContext(), { wrapper });\n\n    const oversized = new File(\n      [new Uint8Array(2 * 1024 * 1024)],\n      \"too-big.txt\",\n      { type: \"text/plain\" }\n    );\n    const onSuccess = jest.fn();\n    const onFailure = jest.fn();\n\n    let optimisticFiles: ProjectFile[] = [];\n    await act(async () => {\n      optimisticFiles = await result.current.beginUpload(\n        [oversized],\n        null,\n        onSuccess,\n        onFailure\n      );\n    });\n\n    expect(mockUploadFiles).not.toHaveBeenCalled();\n    expect(optimisticFiles).toEqual([]);\n    expect(mockToastWarning).toHaveBeenCalledTimes(1);\n    expect(onSuccess).not.toHaveBeenCalled();\n    expect(onFailure).toHaveBeenCalledWith([]);\n  });\n});\n"
  },
  {
    "path": "web/src/proxy.ts",
    "content": "import { NextResponse } from \"next/server\";\nimport type { NextRequest } from \"next/server\";\nimport {\n  AuthType,\n  SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED,\n  SERVER_SIDE_ONLY__AUTH_TYPE,\n} from \"./lib/constants\";\n\n// Authentication cookie names (matches backend constants)\nconst FASTAPI_USERS_AUTH_COOKIE_NAME = \"fastapiusersauth\";\nconst ANONYMOUS_USER_COOKIE_NAME = \"onyx_anonymous_user\";\n\n// Protected route prefixes (require authentication)\nconst PROTECTED_ROUTES = [\"/app\", \"/admin\", \"/agents\", \"/connector\"];\n\n// Public route prefixes (no authentication required)\nconst PUBLIC_ROUTES = [\"/auth\", \"/anonymous\", \"/_next\", \"/api\"];\n\n// NOTE: have to have the \"/:path*\" here since NextJS doesn't allow any real JS to\n// be run before the config is defined e.g. if we try and do a .map it will complain\nexport const config = {\n  matcher: [\n    // Auth-protected routes (for middleware auth check)\n    \"/app/:path*\",\n    \"/admin/:path*\",\n    \"/agents/:path*\",\n    \"/connector/:path*\",\n\n    // Enterprise Edition routes (for /ee rewriting)\n    // These are ONLY the EE-specific routes that should be rewritten\n    \"/admin/groups/:path*\",\n    \"/admin/performance/usage/:path*\",\n    \"/admin/performance/query-history/:path*\",\n    \"/admin/theme/:path*\",\n    \"/admin/performance/custom-analytics/:path*\",\n    \"/admin/standard-answer/:path*\",\n    \"/agents/stats/:path*\",\n\n    // Cloud only\n    \"/admin/billing/:path*\",\n  ],\n};\n\n// Enterprise Edition specific routes (ONLY these get /ee rewriting)\nconst EE_ROUTES = [\n  \"/admin/groups\",\n  \"/admin/performance/usage\",\n  \"/admin/performance/query-history\",\n  \"/admin/theme\",\n  \"/admin/performance/custom-analytics\",\n  \"/admin/standard-answer\",\n  \"/agents/stats\",\n];\n\nexport async function proxy(request: NextRequest) {\n  const pathname = request.nextUrl.pathname;\n\n  // Auth Check: Fast-fail at edge if no cookie (defense in depth)\n  // Note: Layouts still do full verification (token validity, roles, etc.)\n  const isProtectedRoute = PROTECTED_ROUTES.some((route) =>\n    pathname.startsWith(route)\n  );\n  const isPublicRoute = PUBLIC_ROUTES.some((route) =>\n    pathname.startsWith(route)\n  );\n\n  if (isProtectedRoute && !isPublicRoute) {\n    const authCookie = request.cookies.get(FASTAPI_USERS_AUTH_COOKIE_NAME);\n    const anonymousCookie = request.cookies.get(ANONYMOUS_USER_COOKIE_NAME);\n\n    // Allow access if user has either a regular auth cookie or anonymous user cookie\n    if (!authCookie && !anonymousCookie) {\n      const loginUrl = new URL(\"/auth/login\", request.url);\n      // Preserve full URL including query params and hash for deep linking\n      const fullPath = pathname + request.nextUrl.search + request.nextUrl.hash;\n      loginUrl.searchParams.set(\"next\", fullPath);\n      return NextResponse.redirect(loginUrl);\n    }\n  }\n\n  // Enterprise Edition: Rewrite EE-specific routes to /ee prefix\n  if (SERVER_SIDE_ONLY__PAID_ENTERPRISE_FEATURES_ENABLED) {\n    if (EE_ROUTES.some((route) => pathname.startsWith(route))) {\n      const newUrl = new URL(`/ee${pathname}`, request.url);\n      return NextResponse.rewrite(newUrl);\n    }\n  }\n\n  return NextResponse.next();\n}\n"
  },
  {
    "path": "web/src/refresh-components/Attachment.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Attachment from \"./Attachment\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Attachment> = {\n  title: \"refresh-components/Attachment\",\n  component: Attachment,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Attachment>;\n\nexport const Default: Story = {\n  args: {\n    fileName: \"quarterly-report.pdf\",\n  },\n};\n\nexport const WithOpenAction: Story = {\n  args: {\n    fileName: \"meeting-notes.docx\",\n    open: () => alert(\"Opening document\"),\n  },\n};\n\nexport const LongFileName: Story = {\n  args: {\n    fileName:\n      \"very-long-document-name-that-might-overflow-the-container-2024-Q4-final-draft.pdf\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/Attachment.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { SvgFileText, SvgMaximize2 } from \"@opal/icons\";\nexport interface AttachmentsProps {\n  fileName: string;\n  open?: () => void;\n}\n\nexport default function Attachments({ fileName, open }: AttachmentsProps) {\n  return (\n    <div className=\"flex items-center border bg-background-tint-00 rounded-12 p-1 gap-1\">\n      <div className=\"p-2 bg-background-tint-01 rounded-08\">\n        <SvgFileText className=\"w-[1.25rem] h-[1.25rem] stroke-text-02\" />\n      </div>\n      <div className=\"flex flex-col px-2\">\n        <Text as=\"p\" secondaryAction>\n          {fileName}\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Document\n        </Text>\n      </div>\n\n      {open && (\n        <Button\n          aria-label=\"Expand document\"\n          onClick={open}\n          icon={SvgMaximize2}\n          prominence=\"tertiary\"\n          size=\"sm\"\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Calendar.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport Calendar from \"./Calendar\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport type { DateRange } from \"react-day-picker\";\n\nconst meta: Meta<typeof Calendar> = {\n  title: \"refresh-components/Calendar\",\n  component: Calendar,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Calendar>;\n\n// ---------------------------------------------------------------------------\n// Single selection\n// ---------------------------------------------------------------------------\n\nfunction SingleSelectDemo() {\n  const [selected, setSelected] = React.useState<Date | undefined>(new Date());\n  return <Calendar mode=\"single\" selected={selected} onSelect={setSelected} />;\n}\n\nexport const SingleSelect: Story = {\n  render: () => <SingleSelectDemo />,\n};\n\n// ---------------------------------------------------------------------------\n// Range selection\n// ---------------------------------------------------------------------------\n\nfunction RangeSelectDemo() {\n  const [range, setRange] = React.useState<DateRange | undefined>({\n    from: new Date(2025, 2, 10),\n    to: new Date(2025, 2, 20),\n  });\n  return <Calendar mode=\"range\" selected={range} onSelect={setRange} />;\n}\n\nexport const RangeSelect: Story = {\n  render: () => <RangeSelectDemo />,\n};\n\n// ---------------------------------------------------------------------------\n// Without outside days\n// ---------------------------------------------------------------------------\n\nexport const NoOutsideDays: Story = {\n  args: {\n    mode: \"single\",\n    showOutsideDays: false,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/Calendar.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { DayButton, DayPicker, getDefaultClassNames } from \"react-day-picker\";\nimport { cn } from \"@/lib/utils\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { SvgChevronDown, SvgChevronLeft, SvgChevronRight } from \"@opal/icons\";\nimport Button from \"@/refresh-components/buttons/Button\";\n\nfunction CalendarDayButton({\n  className,\n  day,\n  modifiers,\n  ...props\n}: React.ComponentProps<typeof DayButton>) {\n  const ref = React.useRef<HTMLButtonElement>(null);\n  React.useEffect(() => {\n    if (modifiers.focused) ref.current?.focus();\n  }, [modifiers.focused]);\n\n  return (\n    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n    <Button\n      ref={ref}\n      tertiary\n      className=\"w-full\"\n      transient={modifiers.selected}\n      data-day={day.date.toLocaleDateString()}\n      data-selected-single={\n        modifiers.selected &&\n        !modifiers.range_start &&\n        !modifiers.range_end &&\n        !modifiers.range_middle\n      }\n      data-range-start={modifiers.range_start}\n      data-range-end={modifiers.range_end}\n      data-range-middle={modifiers.range_middle}\n      {...props}\n    />\n  );\n}\n\nexport default function Calendar({\n  className,\n  classNames,\n  showOutsideDays = true,\n  captionLayout = \"label\",\n  formatters,\n  components,\n  ...props\n}: React.ComponentProps<typeof DayPicker>) {\n  const defaultClassNames = getDefaultClassNames();\n\n  return (\n    <DayPicker\n      showOutsideDays={showOutsideDays}\n      className={cn(\n        \"group/calendar p-0 [--cell-size:2rem] [[data-slot=card-content]_&]:bg-transparent [[data-slot=popover-content]_&]:bg-transparent\",\n        String.raw`rtl:**:[.rdp-button\\_next>svg]:rotate-180`,\n        String.raw`rtl:**:[.rdp-button\\_previous>svg]:rotate-180`,\n        className\n      )}\n      captionLayout={captionLayout}\n      formatters={{\n        formatMonthDropdown: (date) =>\n          date.toLocaleString(\"default\", { month: \"short\" }),\n        ...formatters,\n      }}\n      classNames={{\n        root: cn(\"w-fit\", defaultClassNames.root),\n        months: cn(\n          \"relative flex flex-col gap-4 md:flex-row\",\n          defaultClassNames.months\n        ),\n        month: cn(\"flex w-full flex-col gap-4\", defaultClassNames.month),\n        nav: cn(\n          \"absolute inset-x-0 top-0 flex w-full items-center justify-between gap-1\",\n          defaultClassNames.nav\n        ),\n        button_previous: cn(\n          \"h-[--cell-size] w-[--cell-size] select-none p-0 aria-disabled:opacity-50\",\n          defaultClassNames.button_previous\n        ),\n        button_next: cn(\n          \"h-[--cell-size] w-[--cell-size] select-none p-0 aria-disabled:opacity-50\",\n          defaultClassNames.button_next\n        ),\n        month_caption: cn(\n          \"flex h-[--cell-size] w-full items-center justify-center px-[--cell-size]\",\n          defaultClassNames.month_caption\n        ),\n        dropdowns: cn(\n          \"flex h-[--cell-size] w-full items-center justify-center gap-1.5 text-sm font-medium\",\n          defaultClassNames.dropdowns\n        ),\n        dropdown_root: cn(\n          \"has-focus:border-border-05 border-border-03 shadow-xs has-focus:ring-border-05/50 has-focus:ring-[3px] relative rounded-md border\",\n          defaultClassNames.dropdown_root\n        ),\n        dropdown: cn(\n          \"bg-background-neutral-00 absolute inset-0 opacity-0\",\n          defaultClassNames.dropdown\n        ),\n        caption_label: cn(\n          \"select-none font-medium\",\n          captionLayout === \"label\"\n            ? \"text-sm\"\n            : \"[&>svg]:text-text-03 flex h-8 items-center gap-1 rounded-md pl-2 pr-1 text-sm [&>svg]:size-3.5\",\n          defaultClassNames.caption_label\n        ),\n        table: \"w-full border-collapse\",\n        weekdays: cn(\"flex\", defaultClassNames.weekdays),\n        weekday: cn(\n          \"text-text-02 flex-1 select-none font-secondary-mono pb-2\",\n          defaultClassNames.weekday\n        ),\n        week: cn(\"flex w-full\", defaultClassNames.week),\n        day: cn(\n          \"group/day relative h-full w-full select-none\",\n          defaultClassNames.day\n        ),\n        // week_number_header: cn(defaultClassNames.week_number_header),\n        // week_number: cn(defaultClassNames.week_number),\n        // range_start: cn(defaultClassNames.range_start),\n        // range_middle: cn(defaultClassNames.range_middle),\n        // range_end: cn(defaultClassNames.range_end),\n        // today: cn(defaultClassNames.today),\n        // outside: cn(defaultClassNames.outside),\n        // disabled: cn(defaultClassNames.disabled),\n        // hidden: cn(defaultClassNames.hidden),\n        ...classNames,\n      }}\n      components={{\n        Root: ({ className, rootRef, ...props }) => {\n          return (\n            <div\n              data-slot=\"calendar\"\n              ref={rootRef}\n              className={cn(className)}\n              {...props}\n            />\n          );\n        },\n        Chevron: ({ className, orientation, size: _size, ...props }) => {\n          if (orientation === \"left\")\n            return (\n              <OpalButton\n                icon={SvgChevronLeft}\n                prominence=\"tertiary\"\n                {...props}\n              />\n            );\n          if (orientation === \"right\")\n            return (\n              <OpalButton\n                icon={SvgChevronRight}\n                prominence=\"tertiary\"\n                {...props}\n              />\n            );\n          return (\n            <OpalButton\n              icon={SvgChevronDown}\n              prominence=\"tertiary\"\n              {...props}\n            />\n          );\n        },\n        DayButton: CalendarDayButton,\n        WeekNumber: ({ children, ...props }) => {\n          return (\n            <td {...props}>\n              <div className=\"flex size-[--cell-size] items-center justify-center text-center\">\n                {children}\n              </div>\n            </td>\n          );\n        },\n        ...components,\n      }}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/CharacterCount.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport CharacterCount from \"./CharacterCount\";\n\nconst meta: Meta<typeof CharacterCount> = {\n  title: \"refresh-components/CharacterCount\",\n  component: CharacterCount,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof CharacterCount>;\n\nexport const UnderLimit: Story = {\n  args: {\n    value: \"Hello world\",\n    limit: 100,\n  },\n};\n\nexport const NearLimit: Story = {\n  args: {\n    value: \"A\".repeat(95),\n    limit: 100,\n  },\n};\n\nexport const AtLimit: Story = {\n  args: {\n    value: \"A\".repeat(100),\n    limit: 100,\n  },\n};\n\nexport const Empty: Story = {\n  args: {\n    value: \"\",\n    limit: 256,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/CharacterCount.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nexport interface CharacterCountProps {\n  value: string;\n  limit: number;\n}\nexport default function CharacterCount({ value, limit }: CharacterCountProps) {\n  const length = value?.length || 0;\n  return (\n    <Text text03 secondaryBody>\n      ({length}/{limit} characters)\n    </Text>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Chip.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Chip from \"./Chip\";\nimport { SvgUser } from \"@opal/icons\";\n\nconst meta: Meta<typeof Chip> = {\n  title: \"refresh-components/Chip\",\n  component: Chip,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Chip>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Tag Name\",\n  },\n};\n\nexport const WithIcon: Story = {\n  args: {\n    children: \"John Doe\",\n    icon: SvgUser,\n  },\n};\n\nexport const Removable: Story = {\n  args: {\n    children: \"Removable Tag\",\n    onRemove: () => alert(\"Removed!\"),\n  },\n};\n\nexport const WithIconAndRemove: Story = {\n  args: {\n    children: \"Jane Smith\",\n    icon: SvgUser,\n    onRemove: () => alert(\"Removed!\"),\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/Chip.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgX } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport type { IconProps } from \"@opal/types\";\n\nexport interface ChipProps {\n  children?: string;\n  icon?: React.FunctionComponent<IconProps>;\n  /** Icon rendered after the label (e.g. a warning indicator) */\n  rightIcon?: React.FunctionComponent<IconProps>;\n  onRemove?: () => void;\n  smallLabel?: boolean;\n  /** When true, applies warning-coloured styling to the right icon. */\n  error?: boolean;\n}\n\n/**\n * A simple chip/tag component for displaying metadata.\n * Supports an optional remove button via the `onRemove` prop.\n *\n * @example\n * ```tsx\n * <Chip>Tag Name</Chip>\n * <Chip icon={SvgUser}>John Doe</Chip>\n * <Chip onRemove={() => removeTag(id)}>Removable</Chip>\n * ```\n */\nexport default function Chip({\n  children,\n  icon: Icon,\n  rightIcon: RightIcon,\n  onRemove,\n  smallLabel = true,\n  error = false,\n}: ChipProps) {\n  return (\n    <div\n      className={cn(\n        \"flex items-center gap-1 px-1.5 py-0.5 rounded-08\",\n        \"bg-background-tint-02\"\n      )}\n    >\n      {Icon && <Icon size={12} className=\"text-text-03\" />}\n      {children && (\n        <Text figureSmallLabel={smallLabel} text03>\n          {children}\n        </Text>\n      )}\n      {RightIcon && (\n        <RightIcon\n          size={14}\n          className={cn(error ? \"text-status-warning-05\" : \"text-text-03\")}\n        />\n      )}\n      {onRemove && (\n        <Button\n          onClick={(e) => {\n            e.stopPropagation();\n            onRemove();\n          }}\n          prominence=\"tertiary\"\n          icon={SvgX}\n          size=\"xs\"\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Code.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport Code from \"./Code\";\n\nconst meta: Meta<typeof Code> = {\n  title: \"refresh-components/Code\",\n  component: Code,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Code>;\n\nexport const Default: Story = {\n  args: {\n    children: `const greeting = \"Hello, world!\";\\nconsole.log(greeting);`,\n  },\n};\n\nexport const WithoutCopyButton: Story = {\n  args: {\n    children: `npm install @onyx/sdk`,\n    showCopyButton: false,\n  },\n};\n\nexport const MultiLine: Story = {\n  args: {\n    children: `function fibonacci(n: number): number {\n  if (n <= 1) return n;\n  return fibonacci(n - 1) + fibonacci(n - 2);\n}\n\nconsole.log(fibonacci(10));`,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/Code.tsx",
    "content": "import { WithoutStyles } from \"@/types\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\n\ninterface CodeProps extends WithoutStyles<React.HTMLAttributes<HTMLElement>> {\n  children: string;\n  showCopyButton?: boolean;\n}\n\nexport default function Code({\n  children,\n  showCopyButton = true,\n  ...props\n}: CodeProps) {\n  return (\n    <div className=\"relative code-wrapper\">\n      <code className=\"code-block\" {...props}>\n        {children}\n      </code>\n      {showCopyButton && (\n        <div className=\"code-copy-button\">\n          <CopyIconButton getCopyText={() => children} />\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Collapsible.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport {\n  Collapsible,\n  CollapsibleTrigger,\n  CollapsibleContent,\n} from \"./Collapsible\";\n\nconst meta: Meta<typeof Collapsible> = {\n  title: \"refresh-components/Collapsible\",\n  component: Collapsible,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Collapsible>;\n\nexport const Default: Story = {\n  render: () => (\n    <Collapsible defaultOpen={false}>\n      <CollapsibleTrigger asChild>\n        <button className=\"p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left\">\n          Click to toggle\n        </button>\n      </CollapsibleTrigger>\n      <CollapsibleContent>\n        <div className=\"p-4 border border-border-01 rounded-08 mt-2\">\n          This content can be expanded and collapsed with a smooth animation.\n        </div>\n      </CollapsibleContent>\n    </Collapsible>\n  ),\n};\n\nexport const DefaultOpen: Story = {\n  render: () => (\n    <Collapsible defaultOpen>\n      <CollapsibleTrigger asChild>\n        <button className=\"p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left\">\n          Already open — click to close\n        </button>\n      </CollapsibleTrigger>\n      <CollapsibleContent>\n        <div className=\"p-4 border border-border-01 rounded-08 mt-2\">\n          This section starts open by default.\n        </div>\n      </CollapsibleContent>\n    </Collapsible>\n  ),\n};\n\nfunction ControlledDemo() {\n  const [open, setOpen] = React.useState(false);\n  return (\n    <div style={{ width: 320 }}>\n      <Collapsible open={open} onOpenChange={setOpen}>\n        <CollapsibleTrigger asChild>\n          <button className=\"p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left\">\n            {open ? \"Close\" : \"Open\"} (controlled)\n          </button>\n        </CollapsibleTrigger>\n        <CollapsibleContent>\n          <div className=\"p-4 border border-border-01 rounded-08 mt-2\">\n            Controlled collapsible content. Current state:{\" \"}\n            {open ? \"open\" : \"closed\"}.\n          </div>\n        </CollapsibleContent>\n      </Collapsible>\n    </div>\n  );\n}\n\nexport const Controlled: Story = {\n  render: () => <ControlledDemo />,\n};\n\nexport const MultipleCollapsibles: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-2\" style={{ width: 320 }}>\n      {[\"Section A\", \"Section B\", \"Section C\"].map((title) => (\n        <Collapsible key={title}>\n          <CollapsibleTrigger asChild>\n            <button className=\"p-2 bg-background-tint-03 rounded-08 font-main-ui-action w-full text-left\">\n              {title}\n            </button>\n          </CollapsibleTrigger>\n          <CollapsibleContent>\n            <div className=\"p-4 border border-border-01 rounded-08 mt-1\">\n              Content for {title}\n            </div>\n          </CollapsibleContent>\n        </Collapsible>\n      ))}\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/Collapsible.tsx",
    "content": "/**\n * Collapsible Components\n *\n * A set of components for creating expandable/collapsible sections.\n * Built on Radix UI Collapsible primitives with custom animations.\n *\n * Components:\n * - Collapsible: Root container that manages open/closed state\n * - CollapsibleTrigger: Interactive element that toggles the collapsible\n * - CollapsibleContent: Content area that expands/collapses with animation\n *\n * @example\n * ```tsx\n * import { Collapsible, CollapsibleTrigger, CollapsibleContent } from \"@/refresh-components/Collapsible\";\n *\n * // Basic usage\n * <Collapsible>\n *   <CollapsibleTrigger>\n *     <button>Toggle Content</button>\n *   </CollapsibleTrigger>\n *   <CollapsibleContent>\n *     <div className=\"p-4\">\n *       Your collapsible content here\n *     </div>\n *   </CollapsibleContent>\n * </Collapsible>\n *\n * // Controlled state\n * const [isOpen, setIsOpen] = useState(false);\n * <Collapsible open={isOpen} onOpenChange={setIsOpen}>\n *   <CollapsibleTrigger asChild>\n *     <button>{isOpen ? \"Close\" : \"Open\"}</button>\n *   </CollapsibleTrigger>\n *   <CollapsibleContent>\n *     <div>Content</div>\n *   </CollapsibleContent>\n * </Collapsible>\n * ```\n */\n\n\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport * as CollapsiblePrimitive from \"@radix-ui/react-collapsible\";\nimport * as React from \"react\";\n\n/**\n * Collapsible Root Component\n *\n * The root container for a collapsible section. Manages the open/closed state\n * and provides context to trigger and content components.\n *\n * This is a re-export of Radix UI's Collapsible.Root component.\n *\n * @see https://www.radix-ui.com/primitives/docs/components/collapsible\n */\nconst Collapsible = CollapsiblePrimitive.Root;\n\n/**\n * Collapsible Trigger Component\n *\n * The interactive element that controls the open/closed state of the collapsible.\n * Typically wraps a button or other clickable element.\n *\n * Supports the `asChild` prop to merge props with a child element instead of\n * rendering a default button.\n *\n * This is a re-export of Radix UI's CollapsibleTrigger component.\n *\n * @see https://www.radix-ui.com/primitives/docs/components/collapsible\n */\nconst CollapsibleTrigger = CollapsiblePrimitive.CollapsibleTrigger;\n\n/**\n * Collapsible Content Component\n *\n * The expandable/collapsible content area. Automatically animates when\n * opening and closing based on the collapsible state.\n *\n * Features:\n * - Smooth slide-down animation when opening (animate-collapsible-down)\n * - Smooth slide-up animation when closing (animate-collapsible-up)\n * - Overflow hidden to prevent content bleeding during animation\n * - Supports custom className for additional styling\n *\n * Built on Radix UI's CollapsibleContent with custom animations.\n *\n * @see https://www.radix-ui.com/primitives/docs/components/collapsible\n */\nconst CollapsibleContent = React.forwardRef<\n  React.ElementRef<typeof CollapsiblePrimitive.CollapsibleContent>,\n  React.ComponentPropsWithoutRef<typeof CollapsiblePrimitive.CollapsibleContent>\n>(({ className, ...props }, ref) => (\n  <CollapsiblePrimitive.CollapsibleContent\n    ref={ref}\n    className={cn(\n      \"overflow-hidden data-[state=open]:animate-collapsible-down data-[state=closed]:animate-collapsible-up\",\n      className\n    )}\n    {...props}\n  />\n));\nCollapsibleContent.displayName = \"CollapsibleContent\";\n\nexport { Collapsible, CollapsibleContent, CollapsibleTrigger };\n"
  },
  {
    "path": "web/src/refresh-components/ColorSwatch.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ColorSwatch from \"./ColorSwatch\";\n\nconst meta: Meta<typeof ColorSwatch> = {\n  title: \"refresh-components/ColorSwatch\",\n  component: ColorSwatch,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ColorSwatch>;\n\nexport const Light: Story = {\n  args: {\n    light: true,\n  },\n};\n\nexport const Dark: Story = {\n  args: {\n    dark: true,\n  },\n};\n\nexport const SideBySide: Story = {\n  render: () => (\n    <div className=\"flex gap-4 items-center\">\n      <ColorSwatch light />\n      <ColorSwatch dark />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/ColorSwatch.tsx",
    "content": "import \"@/app/css/color-swatch.css\";\n\n/**\n * A small color swatch chip component that displays a visual preview of light or dark color modes.\n * Shows \"Aa\" text sample with appropriate background and text colors.\n *\n * @param light - If true, displays light mode swatch with light background and dark text\n * @param dark - If true, displays dark mode swatch with dark background and light text\n *\n * @example\n * <ColorSwatch light />\n * <ColorSwatch dark />\n */\nexport interface ColorSwatchProps {\n  /** Display light mode variant */\n  light?: boolean;\n  /** Display dark mode variant */\n  dark?: boolean;\n}\n\nexport default function ColorSwatch({ light, dark }: ColorSwatchProps) {\n  const mode = light ? \"light\" : dark ? \"dark\" : \"light\";\n\n  return (\n    <div className=\"color-swatch\" data-state={mode}>\n      <div className=\"rounded-full h-[0.3rem] w-[0.3rem] bg-action-link-05\" />\n      <span className=\"color-swatch__text\">Aa</span>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/ConnectionProviderIcon.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ConnectionProviderIcon from \"./ConnectionProviderIcon\";\nimport { SvgSettings, SvgStar } from \"@opal/icons\";\n\nconst meta: Meta<typeof ConnectionProviderIcon> = {\n  title: \"refresh-components/ConnectionProviderIcon\",\n  component: ConnectionProviderIcon,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ConnectionProviderIcon>;\n\nexport const WithSettingsIcon: Story = {\n  args: {\n    icon: <SvgSettings className=\"w-5 h-5 stroke-text-04\" />,\n  },\n};\n\nexport const WithStarIcon: Story = {\n  args: {\n    icon: <SvgStar className=\"w-5 h-5 stroke-text-04\" />,\n  },\n};\n\nexport const WithCustomEmoji: Story = {\n  args: {\n    icon: <span className=\"text-lg\">📄</span>,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/ConnectionProviderIcon.tsx",
    "content": "import React, { memo } from \"react\";\nimport { SvgArrowExchange, SvgOnyxLogo } from \"@opal/icons\";\n\ntype ConnectionProviderIconProps = {\n  icon: React.ReactNode;\n};\n\nconst ConnectionProviderIcon = memo(({ icon }: ConnectionProviderIconProps) => {\n  return (\n    <div className=\"flex items-center gap-1\">\n      <div className=\"w-7 h-7 flex items-center justify-center\">{icon}</div>\n      <div className=\"w-4 h-4 flex items-center justify-center\">\n        <SvgArrowExchange className=\"w-3 h-3 stroke-text-04\" />\n      </div>\n      <div className=\"w-7 h-7 flex items-center justify-center\">\n        <SvgOnyxLogo size={24} className=\"fill-text-04\" />\n      </div>\n    </div>\n  );\n});\n\nConnectionProviderIcon.displayName = \"ConnectionProviderIcon\";\n\nexport default ConnectionProviderIcon;\n"
  },
  {
    "path": "web/src/refresh-components/Divider.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport Divider from \"./Divider\";\nimport { SvgSettings } from \"@opal/icons\";\n\nconst meta: Meta<typeof Divider> = {\n  title: \"refresh-components/Divider\",\n  component: Divider,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Divider>;\n\nexport const SimpleLine: Story = {\n  args: {},\n};\n\nexport const WithTitle: Story = {\n  args: {\n    showTitle: true,\n    text: \"Section Title\",\n  },\n};\n\nexport const WithTitleAndDescription: Story = {\n  args: {\n    showTitle: true,\n    text: \"Advanced Settings\",\n    description: \"Configure additional options for this section.\",\n    showDescription: true,\n  },\n};\n\nexport const WithInfoText: Story = {\n  args: {\n    showTitle: true,\n    text: \"Items\",\n    infoText: \"3 items\",\n    showInfo: true,\n  },\n};\n\nfunction FoldableDividerDemo() {\n  const [expanded, setExpanded] = React.useState(false);\n  return (\n    <div style={{ width: 400 }}>\n      <Divider\n        showTitle\n        text=\"Click to toggle\"\n        foldable\n        expanded={expanded}\n        onClick={() => setExpanded(!expanded)}\n      />\n      {expanded && (\n        <div style={{ padding: 12 }}>Expanded content goes here.</div>\n      )}\n    </div>\n  );\n}\n\nexport const Foldable: Story = {\n  render: () => <FoldableDividerDemo />,\n};\n\nexport const WithIcon: Story = {\n  args: {\n    showTitle: true,\n    text: \"Settings\",\n    icon: SvgSettings,\n  },\n};\n\nexport const Highlighted: Story = {\n  args: {\n    showTitle: true,\n    text: \"Active Section\",\n    foldable: true,\n    expanded: false,\n    isHighlighted: true,\n  },\n};\n\nexport const NoDividerLine: Story = {\n  args: {\n    showTitle: true,\n    text: \"No Lines\",\n    dividerLine: false,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/Divider.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgChevronRight, SvgChevronDown, SvgInfoSmall } from \"@opal/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport type { IconProps } from \"@opal/types\";\nimport Truncated from \"./texts/Truncated\";\n\nexport interface DividerProps\n  extends Omit<React.HTMLAttributes<HTMLDivElement>, \"title\"> {\n  /** Ref to the root element */\n  ref?: React.Ref<HTMLDivElement>;\n  /** Show title content instead of simple line */\n  showTitle?: boolean;\n  /** Title text */\n  text?: string;\n  /** Description text below title */\n  description?: string;\n  /** Show description */\n  showDescription?: boolean;\n  /** Enable foldable/collapsible behavior */\n  foldable?: boolean;\n  /** Controlled expanded state */\n  expanded?: boolean;\n  /** Callback when expanded changes */\n  onClick?: () => void;\n  /** Leading icon */\n  icon?: React.FunctionComponent<IconProps>;\n  /** Show info icon */\n  showInfo?: boolean;\n  /** Info text on right side */\n  infoText?: string;\n  /** Apply highlighted (hover) state styling */\n  isHighlighted?: boolean;\n  /** Show horizontal divider lines (default: true) */\n  dividerLine?: boolean;\n}\n\n/**\n * Divider Component\n *\n * A flexible divider component that supports two modes:\n * 1. Simple horizontal line divider\n * 2. Title divider with optional foldable/collapsible behavior, icons, and multiple interactive states\n *\n * @example\n * ```tsx\n * // Simple horizontal line divider\n * <Divider />\n *\n * // Title divider\n * <Divider showTitle text=\"Section Title\" />\n *\n * // Title divider with icon\n * <Divider showTitle text=\"Settings\" icon={SvgSettings} />\n *\n * // Foldable divider (collapsed)\n * <Divider showTitle text=\"Details\" foldable expanded={false} onClick={setExpanded} />\n *\n * // Foldable divider (expanded)\n * <Divider showTitle text=\"Details\" foldable expanded onClick={setExpanded} />\n *\n * // With info icon and text\n * <Divider showTitle text=\"Section\" showInfo infoText=\"3 items\" />\n *\n * // With description\n * <Divider showTitle text=\"Title\" description=\"Optional description\" showDescription />\n * ```\n */\nexport default function Divider({\n  ref,\n  showTitle,\n  text = \"Title\",\n  description,\n  showDescription,\n  foldable,\n  expanded,\n  onClick,\n  icon: Icon,\n  showInfo,\n  infoText,\n  isHighlighted,\n  dividerLine = true,\n  className,\n  ...props\n}: DividerProps) {\n  const handleClick = () => {\n    if (foldable && onClick) {\n      onClick();\n    }\n  };\n\n  // Simple horizontal line divider\n  if (!showTitle) {\n    return (\n      <div\n        ref={ref}\n        role=\"separator\"\n        className={cn(\"w-full py-1\", className)}\n        {...props}\n      >\n        <div className=\"h-px w-full bg-border-01\" />\n      </div>\n    );\n  }\n\n  // Title divider with optional features\n  return (\n    <div\n      ref={ref}\n      role={foldable ? \"button\" : \"separator\"}\n      aria-expanded={foldable ? expanded : undefined}\n      tabIndex={foldable ? 0 : undefined}\n      data-selected={isHighlighted ? \"true\" : undefined}\n      onClick={foldable ? handleClick : undefined}\n      onKeyDown={\n        foldable\n          ? (e) => {\n              if (e.key === \"Enter\" || e.key === \" \") {\n                e.preventDefault();\n                handleClick();\n              }\n            }\n          : undefined\n      }\n      className={cn(\n        \"w-full mt-1 py-0.5 rounded-08\",\n        foldable && \"group/divider cursor-pointer\",\n        foldable && !expanded && \"hover:bg-background-tint-02\",\n        foldable && !expanded && isHighlighted && \"bg-background-tint-02\",\n        foldable &&\n          expanded &&\n          \"bg-background-tint-01 hover:bg-background-tint-02\",\n        className\n      )}\n      {...props}\n    >\n      {/* Title line */}\n      <div\n        className={cn(\n          \"flex items-center py-1\",\n          !dividerLine && (foldable ? \"pl-1.5\" : \"px-2\"),\n          dividerLine && !foldable && \"pl-1.5\"\n        )}\n      >\n        {/* Left divider line (only for foldable dividers) */}\n        {dividerLine && foldable && (\n          <div className={cn(\"h-px bg-border-01 w-1.5\")} />\n        )}\n\n        {/* Content container */}\n        <div className=\"flex items-center gap-0.5 px-0.5\">\n          {/* Icon container */}\n          {Icon && (\n            <div className=\"flex items-center justify-center size-5 p-0.5\">\n              <Icon\n                className={cn(\n                  \"size-4 stroke-text-03\",\n                  foldable && \"group-hover/divider:stroke-text-04\",\n                  foldable && expanded && \"stroke-text-04\",\n                  foldable && isHighlighted && \"stroke-text-04\"\n                )}\n              />\n            </div>\n          )}\n\n          {/* Title text */}\n          <Text\n            secondaryBody\n            className={cn(\n              \"leading-4 truncate\",\n              !foldable && \"text-text-03\",\n              foldable &&\n                !expanded &&\n                \"text-text-03 group-hover/divider:text-text-04\",\n              foldable && expanded && \"text-text-04\",\n              foldable && isHighlighted && \"text-text-04\"\n            )}\n          >\n            {text}\n          </Text>\n\n          {/* Info icon */}\n          {showInfo && (\n            <div className=\"flex items-center justify-center size-5 p-0.5\">\n              <SvgInfoSmall\n                className={cn(\n                  \"size-3 stroke-text-03\",\n                  foldable && \"group-hover/divider:stroke-text-04\",\n                  foldable && expanded && \"stroke-text-04\",\n                  foldable && isHighlighted && \"stroke-text-04\"\n                )}\n              />\n            </div>\n          )}\n        </div>\n\n        {/* Center divider line (flex-1 to fill remaining space) */}\n        <div className={cn(\"flex-1\", dividerLine && \"h-px bg-border-01\")} />\n\n        {/* Info text on right side */}\n        {infoText && (\n          <>\n            <Text\n              secondaryBody\n              className={cn(\n                \"leading-4 px-0.5\",\n                !foldable && \"text-text-03\",\n                foldable &&\n                  !expanded &&\n                  \"text-text-03 group-hover/divider:text-text-04\",\n                foldable && expanded && \"text-text-04\",\n                foldable && isHighlighted && \"text-text-04\"\n              )}\n            >\n              {infoText}\n            </Text>\n            {/* Right divider line after info text */}\n            {dividerLine && (\n              <div\n                className={cn(\"h-px bg-border-01\", foldable ? \"w-1.5\" : \"w-2\")}\n              />\n            )}\n          </>\n        )}\n\n        {/* Chevron button for foldable */}\n        {foldable && (\n          <div className=\"flex items-center justify-center size-6\">\n            {expanded ? (\n              <SvgChevronDown\n                className={cn(\n                  \"size-4 stroke-text-03\",\n                  \"group-hover/divider:stroke-text-04\",\n                  expanded && \"stroke-text-04\",\n                  isHighlighted && \"stroke-text-04\"\n                )}\n              />\n            ) : (\n              <SvgChevronRight\n                className={cn(\n                  \"size-4 stroke-text-03\",\n                  \"group-hover/divider:stroke-text-04\",\n                  isHighlighted && \"stroke-text-04\"\n                )}\n              />\n            )}\n          </div>\n        )}\n      </div>\n\n      {/* Description line */}\n      {showDescription && description && (\n        <div className=\"flex items-center py-1 pl-2\">\n          <Truncated secondaryBody text03>\n            {description}\n          </Truncated>\n        </div>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/EmptyMessage.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport EmptyMessage from \"./EmptyMessage\";\nimport { SvgFileText, SvgUsers } from \"@opal/icons\";\n\nconst meta: Meta<typeof EmptyMessage> = {\n  title: \"refresh-components/messages/EmptyMessage\",\n  component: EmptyMessage,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof EmptyMessage>;\n\nexport const Default: Story = {\n  args: {\n    title: \"No items found\",\n  },\n};\n\nexport const WithDescription: Story = {\n  args: {\n    title: \"No connectors configured\",\n    description:\n      \"Set up a connector to start indexing documents from your data sources.\",\n  },\n};\n\nexport const WithCustomIcon: Story = {\n  args: {\n    icon: SvgFileText,\n    title: \"No documents available\",\n    description: \"Upload documents or connect a data source to get started.\",\n  },\n};\n\nexport const UsersEmpty: Story = {\n  args: {\n    icon: SvgUsers,\n    title: \"No users in this group\",\n    description: \"Add users to this group to grant them access.\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/EmptyMessage.tsx",
    "content": "/**\n * EmptyMessage - A component for displaying empty state messages\n *\n * Displays a translucent card with an icon and message text to indicate\n * when no data or content is available.\n *\n * Features:\n * - Translucent card background with dashed border\n * - Horizontal layout with icon on left, text on right\n * - 0.5rem gap between icon and text\n * - Accepts string children for the message text\n * - Customizable icon\n *\n * @example\n * ```tsx\n * import EmptyMessage from \"@/refresh-components/EmptyMessage\";\n * import { SvgActivity } from \"@opal/icons\";\n *\n * // Basic usage\n * <EmptyMessage icon={SvgActivity}>\n *   No connectors set up for your organization.\n * </EmptyMessage>\n *\n * // With different icon\n * <EmptyMessage icon={SvgFileText}>\n *   No documents available.\n * </EmptyMessage>\n * ```\n */\n\nimport { SvgEmpty } from \"@opal/icons\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Content } from \"@opal/layouts\";\nimport { IconProps } from \"@opal/types\";\n\nexport interface EmptyMessageProps {\n  icon?: React.FunctionComponent<IconProps>;\n  title: string;\n  description?: string;\n}\n\nexport default function EmptyMessage({\n  icon: Icon = SvgEmpty,\n  title,\n  description,\n}: EmptyMessageProps) {\n  return (\n    <Card variant=\"tertiary\">\n      <Content\n        icon={Icon}\n        title={title}\n        sizePreset=\"main-ui\"\n        variant=\"body\"\n        prominence=\"muted\"\n      />\n      {description && (\n        <Text secondaryBody text03>\n          {description}\n        </Text>\n      )}\n    </Card>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/EnabledCount.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport EnabledCount from \"./EnabledCount\";\n\nconst meta: Meta<typeof EnabledCount> = {\n  title: \"refresh-components/EnabledCount\",\n  component: EnabledCount,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof EnabledCount>;\n\nexport const Default: Story = {\n  args: {\n    enabledCount: 5,\n    totalCount: 12,\n  },\n};\n\nexport const WithName: Story = {\n  args: {\n    name: \"connector\",\n    enabledCount: 3,\n    totalCount: 10,\n  },\n};\n\nexport const AllEnabled: Story = {\n  args: {\n    name: \"source\",\n    enabledCount: 8,\n    totalCount: 8,\n  },\n};\n\nexport const NoneEnabled: Story = {\n  args: {\n    name: \"item\",\n    enabledCount: 0,\n    totalCount: 15,\n  },\n};\n\nexport const SingleItem: Story = {\n  args: {\n    name: \"document\",\n    enabledCount: 1,\n    totalCount: 1,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/EnabledCount.tsx",
    "content": "\"use client\";\n\nimport { memo } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface EnabledCountProps {\n  name?: string;\n  enabledCount: number;\n  totalCount: number;\n}\n\nconst EnabledCount = memo(\n  ({ name, enabledCount, totalCount }: EnabledCountProps) => {\n    return (\n      <Text text03 mainUiBody>\n        <Text mainUiBody className=\"text-action-link-05\">\n          {enabledCount}\n        </Text>\n        {` of ${totalCount} ${name ?? \"\"}${\n          name && totalCount !== 1 ? \"s\" : \"\"\n        }`}\n      </Text>\n    );\n  }\n);\nEnabledCount.displayName = \"EnabledCount\";\n\nexport default EnabledCount;\n"
  },
  {
    "path": "web/src/refresh-components/FadingEdgeContainer.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport FadingEdgeContainer from \"./FadingEdgeContainer\";\n\nconst meta: Meta<typeof FadingEdgeContainer> = {\n  title: \"refresh-components/FadingEdgeContainer\",\n  component: FadingEdgeContainer,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FadingEdgeContainer>;\n\nconst sampleItems = Array.from({ length: 20 }, (_, i) => (\n  <div key={i} className=\"p-2 border-b border-border-01\">\n    Item {i + 1}\n  </div>\n));\n\nexport const BottomFade: Story = {\n  args: {\n    direction: \"bottom\",\n    className: \"max-h-[200px] overflow-y-auto\",\n    children: sampleItems,\n  },\n};\n\nexport const TopFade: Story = {\n  args: {\n    direction: \"top\",\n    className: \"max-h-[200px] overflow-y-auto\",\n    children: sampleItems,\n  },\n};\n\nexport const CustomFadeHeight: Story = {\n  args: {\n    direction: \"bottom\",\n    className: \"max-h-[200px] overflow-y-auto\",\n    fadeClassName: \"h-16\",\n    children: sampleItems,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/FadingEdgeContainer.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\ninterface FadingEdgeContainerProps {\n  /** Classes applied to the inner scrollable container */\n  className?: string;\n  /** Classes to customize the fade gradient (e.g., height, color) */\n  fadeClassName?: string;\n  children: React.ReactNode;\n  /** Which edge to show the fade on */\n  direction?: \"top\" | \"bottom\";\n}\n\n/**\n * A container that adds a gradient fade overlay at the top or bottom edge.\n *\n * Use this component to wrap scrollable content where you want to visually\n * indicate that more content exists beyond the visible area. The fade stays\n * fixed relative to the container bounds, not the scroll content.\n *\n * @example\n * // Bottom fade for a scrollable list\n * <FadingEdgeContainer\n *   direction=\"bottom\"\n *   className=\"max-h-[300px] overflow-y-auto\"\n * >\n *   {items.map(item => <Item key={item.id} />)}\n * </FadingEdgeContainer>\n *\n * @example\n * // Top fade with custom fade styling\n * <FadingEdgeContainer\n *   direction=\"top\"\n *   className=\"max-h-[200px] overflow-y-auto\"\n *   fadeClassName=\"h-12\"\n * >\n *   {content}\n * </FadingEdgeContainer>\n */\nconst FadingEdgeContainer: React.FC<FadingEdgeContainerProps> = ({\n  className,\n  fadeClassName,\n  children,\n  direction = \"top\",\n}) => {\n  const isTop = direction === \"top\";\n\n  return (\n    <div className=\"relative\">\n      <div className={className}>{children}</div>\n      <div\n        className={cn(\n          \"absolute inset-x-0 h-8 pointer-events-none z-10\",\n          isTop\n            ? \"top-0 bg-gradient-to-b from-background to-transparent\"\n            : \"bottom-0 bg-gradient-to-t from-background to-transparent\",\n          fadeClassName\n        )}\n      />\n    </div>\n  );\n};\n\nexport default FadingEdgeContainer;\n"
  },
  {
    "path": "web/src/refresh-components/FrostedDiv.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport FrostedDiv from \"./FrostedDiv\";\n\nconst meta: Meta<typeof FrostedDiv> = {\n  title: \"refresh-components/FrostedDiv\",\n  component: FrostedDiv,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n  decorators: [\n    (Story) => (\n      <div\n        className=\"p-12\"\n        style={{\n          background:\n            \"linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%)\",\n        }}\n      >\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FrostedDiv>;\n\nexport const Default: Story = {\n  args: {\n    className: \"p-4\",\n    children: (\n      <span className=\"text-text-04 font-main-ui-action\">\n        Frosted glass content\n      </span>\n    ),\n  },\n};\n\nexport const CustomBlur: Story = {\n  args: {\n    blur: \"30px\",\n    backdropBlur: \"10px\",\n    className: \"p-6\",\n    children: (\n      <span className=\"text-text-04 font-main-ui-action\">\n        Heavy blur effect\n      </span>\n    ),\n  },\n};\n\nexport const CustomBorderRadius: Story = {\n  args: {\n    borderRadius: \"0.5rem\",\n    className: \"p-4\",\n    children: (\n      <span className=\"text-text-04 font-main-ui-action\">Rounded corners</span>\n    ),\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/FrostedDiv.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface FrostedDivProps extends React.HTMLAttributes<HTMLDivElement> {\n  /**\n   * Background color for the frost effect.\n   * Defaults to a semi-transparent white\n   */\n  backgroundColor?: string;\n\n  /**\n   * Blur amount for the frosted glass effect (filter blur).\n   * Defaults to \"20px\"\n   */\n  blur?: string;\n\n  /**\n   * Backdrop blur for the glass effect.\n   * Defaults to \"6px\"\n   */\n  backdropBlur?: string;\n\n  /**\n   * Border radius for the frost effect.\n   * Defaults to \"1rem\" (16px)\n   */\n  borderRadius?: string;\n\n  /**\n   * Additional classes for the frost overlay element itself\n   */\n  overlayClassName?: string;\n}\n\n/**\n * FrostedDiv - A wrapper that adds a frosted glass bloom effect behind its children\n *\n * This component wraps content and adds a frosted glass effect behind it.\n * The wrapper adds `relative` positioning - pass layout classes via `className`.\n *\n * @example\n * ```tsx\n * <FrostedDiv>\n *   <Button>Click me</Button>\n * </FrostedDiv>\n * ```\n *\n * @example\n * // Custom blur intensity and layout\n * <FrostedDiv blur=\"30px\" className=\"flex items-center gap-2 p-2\">\n *   <Button>One</Button>\n *   <Button>Two</Button>\n * </FrostedDiv>\n */\nexport default function FrostedDiv({\n  backgroundColor = \"var(--frost-overlay)\",\n  blur = \"20px\",\n  backdropBlur = \"6px\",\n  borderRadius = \"1rem\",\n  overlayClassName,\n  className,\n  style,\n  children,\n  ...props\n}: FrostedDivProps) {\n  return (\n    <div className=\"relative\">\n      {/* Frost effect overlay - positioned behind content with bloom extending outward */}\n      <div\n        className={cn(\"absolute pointer-events-none\", overlayClassName)}\n        style={{\n          borderRadius,\n          background: backgroundColor,\n          filter: `blur(${blur})`,\n          backdropFilter: `blur(${backdropBlur})`,\n        }}\n      />\n      {/* Content rendered above the frost effect */}\n      <div className={cn(\"relative\", className)} style={style} {...props}>\n        {children}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/InlineExternalLink.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InlineExternalLink from \"./InlineExternalLink\";\n\nconst meta: Meta<typeof InlineExternalLink> = {\n  title: \"refresh-components/InlineExternalLink\",\n  component: InlineExternalLink,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InlineExternalLink>;\n\nexport const Default: Story = {\n  args: {\n    href: \"https://docs.onyx.app\",\n    children: \"Onyx Documentation\",\n  },\n};\n\nexport const CustomClassName: Story = {\n  args: {\n    href: \"https://github.com/onyx-dot-app/onyx\",\n    children: \"GitHub Repository\",\n    className: \"text-action-link-05 underline hover:opacity-80\",\n  },\n};\n\nexport const InContext: Story = {\n  render: () => (\n    <p className=\"font-main-content-body text-text-04\">\n      For more information, visit the{\" \"}\n      <InlineExternalLink href=\"https://docs.onyx.app\">\n        official documentation\n      </InlineExternalLink>{\" \"}\n      or check out the{\" \"}\n      <InlineExternalLink href=\"https://github.com/onyx-dot-app/onyx\">\n        source code\n      </InlineExternalLink>\n      .\n    </p>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/InlineExternalLink.tsx",
    "content": "import { ReactNode } from \"react\";\n\nexport interface InlineExternalLinkProps {\n  href: string;\n  children: ReactNode;\n  className?: string;\n}\n\nexport default function InlineExternalLink({\n  href,\n  children,\n  className,\n}: InlineExternalLinkProps) {\n  return (\n    <a\n      href={href}\n      target=\"_blank\"\n      rel=\"noopener noreferrer\"\n      className={className ?? \"underline\"}\n    >\n      {children}\n    </a>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Logo.tsx",
    "content": "\"use client\";\n\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport {\n  DEFAULT_LOGO_SIZE_PX,\n  NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED,\n} from \"@/lib/constants\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { useMemo } from \"react\";\nimport { SvgOnyxLogo, SvgOnyxLogoTyped } from \"@opal/icons\";\n\nexport interface LogoProps {\n  folded?: boolean;\n  size?: number;\n  className?: string;\n}\n\nexport default function Logo({ folded, size, className }: LogoProps) {\n  const resolvedSize = size ?? DEFAULT_LOGO_SIZE_PX;\n  const settings = useSettingsContext();\n  const logoDisplayStyle = settings.enterpriseSettings?.logo_display_style;\n  const applicationName = settings.enterpriseSettings?.application_name;\n\n  // Cache-buster: the logo URL never changes (/api/enterprise-settings/logo)\n  // so the browser serves the in-memory cached image even after an admin\n  // uploads a new one. Generating a fresh timestamp each time enterprise\n  // settings are revalidated by SWR appends a unique query param to force\n  // the browser to re-fetch the image.\n  const logoBuster = useMemo(\n    () => Date.now(),\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n    [settings.enterpriseSettings]\n  );\n\n  const logo = settings.enterpriseSettings?.use_custom_logo ? (\n    <div\n      className={cn(\n        \"aspect-square rounded-full overflow-hidden relative flex-shrink-0\",\n        className\n      )}\n      style={{ height: resolvedSize }}\n    >\n      {/* eslint-disable-next-line @next/next/no-img-element */}\n      <img\n        alt=\"Logo\"\n        src={`/api/enterprise-settings/logo?v=${logoBuster}`}\n        className=\"object-cover object-center w-full h-full\"\n      />\n    </div>\n  ) : (\n    <SvgOnyxLogo\n      size={resolvedSize}\n      className={cn(\"flex-shrink-0\", className)}\n    />\n  );\n\n  const renderNameAndPoweredBy = (opts: {\n    includeLogo: boolean;\n    includeName: boolean;\n  }) => {\n    return (\n      <div className=\"flex min-w-0 gap-2\">\n        {opts.includeLogo && logo}\n        {!folded && (\n          /* H3 text is 4px larger (28px) than the Logo icon (24px), so negative margin hack. */\n          <div className=\"flex flex-1 flex-col -mt-0.5\">\n            {opts.includeName && (\n              <Truncated headingH3>{applicationName}</Truncated>\n            )}\n            {!NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED && (\n              <Text\n                secondaryBody\n                text03\n                className={\"line-clamp-1 truncate\"}\n                nowrap\n              >\n                Powered by Onyx\n              </Text>\n            )}\n          </div>\n        )}\n      </div>\n    );\n  };\n\n  // Handle \"logo_only\" display style\n  if (logoDisplayStyle === \"logo_only\") {\n    return renderNameAndPoweredBy({ includeLogo: true, includeName: false });\n  }\n\n  // Handle \"name_only\" display style\n  if (logoDisplayStyle === \"name_only\") {\n    return renderNameAndPoweredBy({ includeLogo: false, includeName: true });\n  }\n\n  // Handle \"logo_and_name\" or default behavior\n  return applicationName ? (\n    renderNameAndPoweredBy({ includeLogo: true, includeName: true })\n  ) : folded ? (\n    <SvgOnyxLogo\n      size={resolvedSize}\n      className={cn(\"flex-shrink-0\", className)}\n    />\n  ) : (\n    <SvgOnyxLogoTyped size={resolvedSize} className={className} />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Modal.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport Modal from \"./Modal\";\nimport { Button } from \"@opal/components\";\nimport { SvgInfoSmall } from \"@opal/icons\";\n\nconst meta: Meta<typeof Modal> = {\n  title: \"refresh-components/Modal\",\n  component: Modal,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"fullscreen\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Modal>;\n\nfunction ModalDemo() {\n  const [open, setOpen] = React.useState(false);\n  return (\n    <div style={{ padding: 32 }}>\n      <Button onClick={() => setOpen(true)}>Open Modal</Button>\n      <Modal open={open} onOpenChange={setOpen}>\n        <Modal.Content width=\"sm\" height=\"fit\">\n          <Modal.Header\n            icon={SvgInfoSmall}\n            title=\"Example Modal\"\n            description=\"This is a demo modal with header, body, and footer.\"\n            onClose={() => setOpen(false)}\n          />\n          <Modal.Body>\n            <div style={{ padding: 16 }}>\n              Some body content goes here. You can put forms, text, or anything\n              else inside the modal body.\n            </div>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button\n              variant=\"default\"\n              prominence=\"secondary\"\n              onClick={() => setOpen(false)}\n            >\n              Cancel\n            </Button>\n            <Button\n              variant=\"action\"\n              prominence=\"primary\"\n              onClick={() => setOpen(false)}\n            >\n              Confirm\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </div>\n  );\n}\n\nexport const Default: Story = {\n  render: () => <ModalDemo />,\n};\n\nfunction LargeModalDemo() {\n  const [open, setOpen] = React.useState(false);\n  return (\n    <div style={{ padding: 32 }}>\n      <Button onClick={() => setOpen(true)}>Open Large Modal</Button>\n      <Modal open={open} onOpenChange={setOpen}>\n        <Modal.Content width=\"full\" height=\"full\">\n          <Modal.Header\n            icon={SvgInfoSmall}\n            title=\"Large Modal\"\n            description=\"A large modal with full height.\"\n            onClose={() => setOpen(false)}\n          />\n          <Modal.Body>\n            <div style={{ padding: 16 }}>\n              {Array.from({ length: 20 }, (_, i) => (\n                <p key={i} style={{ marginBottom: 12 }}>\n                  Paragraph {i + 1}: Lorem ipsum dolor sit amet, consectetur\n                  adipiscing elit. Sed do eiusmod tempor incididunt ut labore et\n                  dolore magna aliqua.\n                </p>\n              ))}\n            </div>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button\n              variant=\"default\"\n              prominence=\"secondary\"\n              onClick={() => setOpen(false)}\n            >\n              Close\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </div>\n  );\n}\n\nexport const Large: Story = {\n  render: () => <LargeModalDemo />,\n};\n\nfunction GrayBackgroundDemo() {\n  const [open, setOpen] = React.useState(false);\n  return (\n    <div style={{ padding: 32 }}>\n      <Button onClick={() => setOpen(true)}>Open Gray Modal</Button>\n      <Modal open={open} onOpenChange={setOpen}>\n        <Modal.Content width=\"sm\" height=\"fit\" background=\"gray\">\n          <Modal.Header\n            icon={SvgInfoSmall}\n            title=\"Gray Background\"\n            description=\"This modal uses background='gray' for a tinted card.\"\n            onClose={() => setOpen(false)}\n          />\n          <Modal.Body>\n            <div style={{ padding: 16 }}>\n              The modal card background uses the tinted color variant.\n            </div>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button\n              variant=\"default\"\n              prominence=\"secondary\"\n              onClick={() => setOpen(false)}\n            >\n              Close\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </div>\n  );\n}\n\nexport const GrayBackground: Story = {\n  render: () => <GrayBackgroundDemo />,\n};\n\nfunction NoOverlayDemo() {\n  const [open, setOpen] = React.useState(false);\n  return (\n    <div style={{ padding: 32 }}>\n      <Button onClick={() => setOpen(true)}>Open Without Overlay</Button>\n      <Modal open={open} onOpenChange={setOpen}>\n        <Modal.Content width=\"sm\" height=\"fit\" skipOverlay>\n          <Modal.Header\n            icon={SvgInfoSmall}\n            title=\"No Overlay\"\n            description=\"This modal skips the backdrop overlay.\"\n            onClose={() => setOpen(false)}\n          />\n          <Modal.Body>\n            <div style={{ padding: 16 }}>\n              The page behind remains fully visible with no blur or mask.\n            </div>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button\n              variant=\"default\"\n              prominence=\"secondary\"\n              onClick={() => setOpen(false)}\n            >\n              Close\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    </div>\n  );\n}\n\nexport const NoOverlay: Story = {\n  render: () => <NoOverlayDemo />,\n};\n"
  },
  {
    "path": "web/src/refresh-components/Modal.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport * as DialogPrimitive from \"@radix-ui/react-dialog\";\nimport { cn } from \"@/lib/utils\";\nimport type { IconFunctionComponent, RichStr } from \"@opal/types\";\nimport { Button } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport { toPlainString } from \"@opal/components/text/InlineMarkdown\";\nimport { SvgX } from \"@opal/icons\";\nimport { WithoutStyles } from \"@/types\";\nimport { Section, SectionProps } from \"@/layouts/general-layouts\";\nimport useContainerCenter from \"@/hooks/useContainerCenter\";\n\n/**\n * Modal Root Component\n *\n * Wrapper around Radix Dialog.Root for managing modal state.\n *\n * @example\n * ```tsx\n * <Modal open={isOpen} onOpenChange={setIsOpen}>\n *   <Modal.Content>\n *     {/* Modal content *\\/}\n *   </Modal.Content>\n * </Modal>\n * ```\n */\nconst ModalRoot = DialogPrimitive.Root;\n\n/**\n * Modal Overlay Component\n *\n * Backdrop overlay that appears behind the modal.\n *\n * @example\n * ```tsx\n * <Modal.Overlay />\n * ```\n */\nconst ModalOverlay = React.forwardRef<\n  React.ComponentRef<typeof DialogPrimitive.Overlay>,\n  WithoutStyles<React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay>>\n>(({ ...props }, ref) => (\n  <DialogPrimitive.Overlay\n    ref={ref}\n    className={cn(\n      \"fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none\",\n      \"data-[state=open]:animate-in data-[state=closed]:animate-out\",\n      \"data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0\"\n    )}\n    {...props}\n  />\n));\nModalOverlay.displayName = DialogPrimitive.Overlay.displayName;\n\n/**\n * Modal Context for managing close button ref, warning state, and height variant\n */\ninterface ModalContextValue {\n  closeButtonRef: React.RefObject<HTMLDivElement | null>;\n  hasAttemptedClose: boolean;\n  setHasAttemptedClose: (value: boolean) => void;\n  height: keyof typeof heightClasses;\n  hasDescription: boolean;\n  setHasDescription: (value: boolean) => void;\n}\n\nconst ModalContext = React.createContext<ModalContextValue | null>(null);\n\nconst useModalContext = () => {\n  const context = React.useContext(ModalContext);\n  if (!context) {\n    throw new Error(\"Modal compound components must be used within Modal\");\n  }\n  return context;\n};\n\nconst widthClasses = {\n  full: \"w-[80dvw]\",\n  xl: \"w-[60rem]\",\n  lg: \"w-[50rem]\",\n  md: \"w-[40rem]\",\n  sm: \"w-[30rem]\",\n};\n\nconst heightClasses = {\n  fit: \"h-fit\",\n  sm: \"max-h-[30rem] overflow-y-auto\",\n  lg: \"max-h-[calc(100dvh-4rem)] overflow-y-auto\",\n  full: \"h-[80dvh] overflow-y-auto\",\n};\n\n/**\n * Modal Content Component\n *\n * Main modal container with default styling.\n *\n * @example\n * ```tsx\n * // Using width and height props\n * <Modal.Content width=\"full\" height=\"full\">\n *   {/* Full modal: w-[80dvw] h-[80dvh] *\\/}\n * </Modal.Content>\n *\n * <Modal.Content width=\"xl\" height=\"fit\">\n *   {/* XL modal: w-[60rem] h-fit *\\/}\n * </Modal.Content>\n *\n * <Modal.Content width=\"sm\" height=\"sm\">\n *   {/* Small modal: w-[30rem] max-h-[30rem] *\\/}\n * </Modal.Content>\n *\n * <Modal.Content width=\"sm\" height=\"lg\">\n *   {/* Tall modal: w-[30rem] max-h-[calc(100dvh-4rem)] *\\/}\n * </Modal.Content>\n * ```\n */\nexport interface ModalContentProps\n  extends WithoutStyles<\n    React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content>\n  > {\n  width?: keyof typeof widthClasses;\n  height?: keyof typeof heightClasses;\n  /** Vertical placement of the modal. `\"center\"` (default) centers in the\n   *  viewport/container. `\"top\"` pins the modal near the top of the viewport,\n   *  matching the position used by CommandMenu. */\n  position?: \"center\" | \"top\";\n  preventAccidentalClose?: boolean;\n  skipOverlay?: boolean;\n  background?: \"default\" | \"gray\";\n  /** Content rendered below the modal card, floating with gap-4 (1rem) separation.\n   *  Stays inside DialogPrimitive.Content for proper focus management. */\n  bottomSlot?: React.ReactNode;\n}\nconst ModalContent = React.forwardRef<\n  React.ComponentRef<typeof DialogPrimitive.Content>,\n  ModalContentProps\n>(\n  (\n    {\n      children,\n      width = \"xl\",\n      height = \"fit\",\n      position = \"center\",\n      preventAccidentalClose = true,\n      skipOverlay = false,\n      background = \"default\",\n      bottomSlot,\n      ...props\n    },\n    ref\n  ) => {\n    const closeButtonRef = React.useRef<HTMLDivElement>(null);\n    const [hasAttemptedClose, setHasAttemptedClose] = React.useState(false);\n    const [hasDescription, setHasDescription] = React.useState(false);\n    const hasUserTypedRef = React.useRef(false);\n\n    // Reset state when modal closes or opens\n    const resetState = React.useCallback(() => {\n      setHasAttemptedClose(false);\n      hasUserTypedRef.current = false;\n    }, []);\n\n    // Handle input events to detect typing\n    const handleInput = React.useCallback((e: Event) => {\n      // Early exit if already detected typing (performance optimization)\n      if (hasUserTypedRef.current) {\n        return;\n      }\n\n      // Only trust events triggered by actual user interaction\n      if (!e.isTrusted) {\n        return;\n      }\n\n      const target = e.target as HTMLElement;\n\n      // Only handle input and textarea elements\n      if (\n        !(\n          target instanceof HTMLInputElement ||\n          target instanceof HTMLTextAreaElement\n        )\n      ) {\n        return;\n      }\n\n      // Skip non-text inputs\n      if (\n        target.type === \"hidden\" ||\n        target.type === \"submit\" ||\n        target.type === \"button\" ||\n        target.type === \"checkbox\" ||\n        target.type === \"radio\"\n      ) {\n        return;\n      }\n      // Mark that user has typed something\n      hasUserTypedRef.current = true;\n    }, []);\n\n    // Keep track of the container node for cleanup\n    const containerNodeRef = React.useRef<HTMLDivElement | null>(null);\n\n    // Callback ref to attach event listener when element mounts\n    const contentRef = React.useCallback(\n      (node: HTMLDivElement | null) => {\n        // Cleanup previous listener if exists\n        if (containerNodeRef.current) {\n          containerNodeRef.current.removeEventListener(\n            \"input\",\n            handleInput,\n            true\n          );\n        }\n\n        // Attach new listener if node exists\n        if (node) {\n          node.addEventListener(\"input\", handleInput, true);\n          containerNodeRef.current = node;\n        } else {\n          containerNodeRef.current = null;\n        }\n      },\n      [handleInput]\n    );\n\n    // Check if user has typed anything\n    const hasModifiedInputs = React.useCallback(() => {\n      return hasUserTypedRef.current;\n    }, []);\n\n    // Handle escape key and outside clicks\n    const handleInteractOutside = React.useCallback(\n      (e: Event) => {\n        // If preventAccidentalClose is disabled, always allow immediate close\n        if (!preventAccidentalClose) {\n          setHasAttemptedClose(false);\n          return;\n        }\n\n        // If preventAccidentalClose is enabled, check if user has modified inputs\n        if (hasModifiedInputs()) {\n          if (!hasAttemptedClose) {\n            // First attempt: prevent close and focus the close button\n            e.preventDefault();\n            setHasAttemptedClose(true);\n            setTimeout(() => {\n              closeButtonRef.current?.focus();\n            }, 0);\n          } else {\n            // Second attempt: allow close\n            setHasAttemptedClose(false);\n          }\n        } else {\n          // No modified inputs: allow immediate close\n          setHasAttemptedClose(false);\n        }\n      },\n      [preventAccidentalClose, hasModifiedInputs, hasAttemptedClose]\n    );\n\n    const handleRef = (node: HTMLDivElement | null) => {\n      // Handle forwarded ref\n      if (typeof ref === \"function\") {\n        ref(node);\n      } else if (ref) {\n        ref.current = node;\n      }\n      // Handle content ref with event listener\n      contentRef(node);\n    };\n\n    const { centerX, centerY, hasContainerCenter } = useContainerCenter();\n\n    const isTop = position === \"top\";\n\n    const animationClasses = cn(\n      \"data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0\",\n      \"data-[state=open]:zoom-in-95 data-[state=closed]:zoom-out-95\",\n      !isTop &&\n        \"data-[state=open]:slide-in-from-top-1/2 data-[state=closed]:slide-out-to-top-1/2\",\n      \"duration-200\"\n    );\n\n    const containerStyle: React.CSSProperties | undefined =\n      hasContainerCenter && !isTop\n        ? ({\n            left: centerX,\n            top: centerY,\n            \"--tw-enter-translate-x\": \"-50%\",\n            \"--tw-exit-translate-x\": \"-50%\",\n            \"--tw-enter-translate-y\": \"-50%\",\n            \"--tw-exit-translate-y\": \"-50%\",\n          } as React.CSSProperties)\n        : hasContainerCenter && isTop\n          ? ({\n              left: centerX,\n              \"--tw-enter-translate-x\": \"-50%\",\n              \"--tw-exit-translate-x\": \"-50%\",\n            } as React.CSSProperties)\n          : undefined;\n\n    const positionClasses = cn(\n      \"fixed -translate-x-1/2\",\n      isTop\n        ? cn(\"top-[72px]\", !hasContainerCenter && \"left-1/2\")\n        : cn(\"-translate-y-1/2\", !hasContainerCenter && \"left-1/2 top-1/2\")\n    );\n\n    const dialogEventHandlers = {\n      onOpenAutoFocus: (e: Event) => {\n        resetState();\n        props.onOpenAutoFocus?.(e);\n      },\n      onCloseAutoFocus: (e: Event) => {\n        resetState();\n        props.onCloseAutoFocus?.(e);\n      },\n      onEscapeKeyDown: handleInteractOutside,\n      onPointerDownOutside: handleInteractOutside,\n      ...(!hasDescription && { \"aria-describedby\": undefined }),\n      ...props,\n    };\n\n    const cardClasses = cn(\n      \"overflow-hidden\",\n      background === \"gray\" ? \"bg-background-tint-01\" : \"bg-background-tint-00\",\n      \"border rounded-16 shadow-2xl\",\n      \"flex flex-col\",\n      heightClasses[height]\n    );\n\n    return (\n      <ModalContext.Provider\n        value={{\n          closeButtonRef,\n          hasAttemptedClose,\n          setHasAttemptedClose,\n          height,\n          hasDescription,\n          setHasDescription,\n        }}\n      >\n        <DialogPrimitive.Portal>\n          {!skipOverlay && <ModalOverlay />}\n          {bottomSlot ? (\n            // With bottomSlot: use asChild to wrap card + slot in a flex column\n            <DialogPrimitive.Content\n              asChild\n              ref={handleRef}\n              {...dialogEventHandlers}\n            >\n              <div\n                style={containerStyle}\n                className={cn(\n                  positionClasses,\n                  \"z-modal\",\n                  \"flex flex-col gap-4 items-center\",\n                  \"max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-2rem)]\",\n                  animationClasses,\n                  widthClasses[width]\n                )}\n              >\n                <div className={cn(cardClasses, \"w-full min-h-0\")}>\n                  {children}\n                </div>\n                <div className=\"w-full flex-shrink-0\">{bottomSlot}</div>\n              </div>\n            </DialogPrimitive.Content>\n          ) : (\n            // Without bottomSlot: original single-element rendering\n            <DialogPrimitive.Content\n              ref={handleRef}\n              style={containerStyle}\n              className={cn(\n                positionClasses,\n                \"overflow-hidden\",\n                \"z-modal\",\n                background === \"gray\"\n                  ? \"bg-background-tint-01\"\n                  : \"bg-background-tint-00\",\n                \"border rounded-16 shadow-2xl\",\n                \"flex flex-col\",\n                \"max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-2rem)]\",\n                animationClasses,\n                widthClasses[width],\n                heightClasses[height]\n              )}\n              {...dialogEventHandlers}\n            >\n              {children}\n            </DialogPrimitive.Content>\n          )}\n        </DialogPrimitive.Portal>\n      </ModalContext.Provider>\n    );\n  }\n);\nModalContent.displayName = DialogPrimitive.Content.displayName;\n\n/**\n * Modal Header Component\n *\n * Container for header content with optional bottom shadow. All header visuals\n * (icon, title, description, close button) are now controlled via this single\n * component using props, so no additional subcomponents are required.\n *\n * When `icon` is omitted the header renders a minimal variant: just the\n * title + description with the close button inline to the right of the title.\n * This is JUST to be used for preview windows\n *\n * @example\n * ```tsx\n * <Modal.Header icon={SvgWarning} title=\"Confirm Action\" description=\"Are you sure?\" />\n *\n * // Minimal variant (no icon)\n * <Modal.Header title=\"Confirm Action\" description=\"Are you sure?\" />\n *\n * // With custom content\n * // Children render below the provided title/description stack.\n * <Modal.Header icon={SvgFile} title=\"Select Files\">\n *   <InputTypeIn placeholder=\"Search...\" />\n * </Modal.Header>\n * ```\n */\ninterface ModalHeaderProps extends Omit<WithoutStyles<SectionProps>, \"title\"> {\n  icon?: IconFunctionComponent;\n  moreIcon1?: IconFunctionComponent;\n  moreIcon2?: IconFunctionComponent;\n  title: string | RichStr;\n  description?: string | RichStr;\n  onClose?: () => void;\n}\nconst ModalHeader = React.forwardRef<HTMLDivElement, ModalHeaderProps>(\n  (\n    {\n      icon,\n      moreIcon1,\n      moreIcon2,\n      title,\n      description,\n      onClose,\n      children,\n      ...props\n    },\n    ref\n  ) => {\n    const { closeButtonRef, setHasDescription } = useModalContext();\n\n    React.useLayoutEffect(() => {\n      setHasDescription(!!description);\n    }, [description, setHasDescription]);\n\n    const closeButton = onClose && (\n      <div\n        tabIndex={-1}\n        ref={closeButtonRef as React.RefObject<HTMLDivElement>}\n        className=\"outline-none\"\n      >\n        <DialogPrimitive.Close asChild>\n          <Button\n            icon={SvgX}\n            prominence=\"tertiary\"\n            size=\"sm\"\n            onClick={onClose}\n          />\n        </DialogPrimitive.Close>\n      </div>\n    );\n\n    return (\n      <Section\n        ref={ref}\n        padding={0.5}\n        alignItems=\"start\"\n        height=\"fit\"\n        {...props}\n      >\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"between\"\n          alignItems=\"start\"\n          gap={0}\n          padding={0.5}\n        >\n          <div className=\"relative w-full\">\n            {/* Close button is absolutely positioned because:\n               1. Figma mocks place it overlapping the top-right of the content area\n               2. Using ContentAction with rightChildren causes the description\n                  to wrap to the second line early due to the button reserving space */}\n            <div className=\"absolute top-0 right-0\">{closeButton}</div>\n            <DialogPrimitive.Title asChild>\n              <div>\n                <Content\n                  icon={icon}\n                  moreIcon1={moreIcon1}\n                  moreIcon2={moreIcon2}\n                  title={title}\n                  description={description}\n                  sizePreset=\"section\"\n                  variant=\"heading\"\n                />\n                {description && (\n                  <DialogPrimitive.Description className=\"hidden\">\n                    {toPlainString(description)}\n                  </DialogPrimitive.Description>\n                )}\n              </div>\n            </DialogPrimitive.Title>\n          </div>\n        </Section>\n        {children}\n      </Section>\n    );\n  }\n);\nModalHeader.displayName = \"ModalHeader\";\n\n/**\n * Modal Body Component\n *\n * Content area for the main modal content.\n *\n * @example\n * ```tsx\n * <Modal.Body>\n *   {/* Content *\\/}\n * </Modal.Body>\n * ```\n */\ninterface ModalBodyProps extends WithoutStyles<SectionProps> {\n  twoTone?: boolean;\n}\nconst ModalBody = React.forwardRef<HTMLDivElement, ModalBodyProps>(\n  ({ twoTone = true, children, ...props }, ref) => {\n    return (\n      <div\n        ref={ref}\n        className={cn(\n          twoTone && \"bg-background-tint-01\",\n          \"flex-auto min-h-0 overflow-y-auto w-full\"\n        )}\n      >\n        <Section\n          height=\"auto\"\n          padding={1}\n          gap={1}\n          alignItems=\"start\"\n          {...props}\n        >\n          {children}\n        </Section>\n      </div>\n    );\n  }\n);\nModalBody.displayName = \"ModalBody\";\n\n/**\n * Modal Footer Component\n *\n * Footer section for actions/buttons.\n *\n * @example\n * ```tsx\n * // Right-aligned buttons\n * <Modal.Footer>\n *   <Button secondary>Cancel</Button>\n *   <Button primary>Confirm</Button>\n * </Modal.Footer>\n * ```\n */\nconst ModalFooter = React.forwardRef<\n  HTMLDivElement,\n  WithoutStyles<SectionProps>\n>(({ ...props }, ref) => {\n  return (\n    <Section\n      ref={ref}\n      flexDirection=\"row\"\n      justifyContent=\"end\"\n      gap={0.5}\n      padding={1}\n      height=\"fit\"\n      {...props}\n    />\n  );\n});\nModalFooter.displayName = \"ModalFooter\";\n\nexport default Object.assign(ModalRoot, {\n  Content: ModalContent,\n  Header: ModalHeader,\n  Body: ModalBody,\n  Footer: ModalFooter,\n});\n\n// ============================================================================\n// Common Layouts\n// ============================================================================\n\nexport interface BasicModalFooterProps {\n  left?: React.ReactNode;\n  cancel?: React.ReactNode;\n  submit?: React.ReactNode;\n}\n\nexport function BasicModalFooter({\n  left,\n  cancel,\n  submit,\n}: BasicModalFooterProps) {\n  return (\n    <>\n      {left && <Section alignItems=\"start\">{left}</Section>}\n      {(cancel || submit) && (\n        <Section flexDirection=\"row\" justifyContent=\"end\" gap={0.5}>\n          {cancel}\n          {submit}\n        </Section>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/OverflowDiv.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport OverflowDiv from \"./OverflowDiv\";\n\nconst meta: Meta<typeof OverflowDiv> = {\n  title: \"refresh-components/OverflowDiv\",\n  component: OverflowDiv,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof OverflowDiv>;\n\nconst sampleItems = Array.from({ length: 25 }, (_, i) => (\n  <div key={i} className=\"p-2 border-b border-border-01\">\n    Sidebar item {i + 1}\n  </div>\n));\n\nexport const Default: Story = {\n  args: {\n    style: { width: 260, height: 300 },\n    children: sampleItems,\n  },\n};\n\nexport const MaskDisabled: Story = {\n  args: {\n    disableMask: true,\n    style: { width: 260, height: 300 },\n    children: sampleItems,\n  },\n};\n\nexport const CustomHeight: Story = {\n  args: {\n    height: \"4rem\",\n    style: { width: 260, height: 300 },\n    children: sampleItems,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/OverflowDiv.tsx",
    "content": "\"use client\";\n\nimport React, { useRef, useEffect, useLayoutEffect } from \"react\";\nimport { usePathname } from \"next/navigation\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface VerticalShadowScrollerProps\n  extends React.HtmlHTMLAttributes<HTMLDivElement> {\n  // Mask related\n  disableMask?: boolean;\n  backgroundColor?: string;\n  height?: string;\n  /**\n   * Unique identifier for this scroll container to enable scroll position persistence across navigation.\n   *\n   * When provided, the scroll position will be saved to a global Map and restored when the pathname changes\n   * (e.g., navigating between admin pages). This prevents the sidebar from jumping to the top when clicking links.\n   *\n   * If not provided, scroll position will NOT be saved/restored (opt-out of scroll persistence).\n   *\n   * @example scrollKey=\"admin-sidebar\"\n   */\n  scrollKey?: string;\n}\n\nconst SCROLL_POSITION_PREFIX = \"onyx-scroll-\";\n\nexport default function OverflowDiv({\n  disableMask,\n  backgroundColor = \"var(--background-tint-02)\",\n  height: minHeight = \"2rem\",\n  scrollKey,\n\n  className,\n  ...rest\n}: VerticalShadowScrollerProps) {\n  const scrollRef = useRef<HTMLDivElement>(null);\n  const pathname = usePathname();\n\n  // Save scroll position on every scroll event (only if scrollKey is provided)\n  useEffect(() => {\n    if (!scrollKey) return; // Opt-out: no scroll persistence if scrollKey not provided\n\n    const scrollElement = scrollRef.current;\n    if (!scrollElement) return;\n\n    const storageKey = `${SCROLL_POSITION_PREFIX}${scrollKey}`;\n    const handleScroll = () => {\n      sessionStorage.setItem(storageKey, scrollElement.scrollTop.toString());\n    };\n\n    scrollElement.addEventListener(\"scroll\", handleScroll, { passive: true });\n    return () => scrollElement.removeEventListener(\"scroll\", handleScroll);\n  }, [scrollKey]);\n\n  // Restore scroll position immediately after pathname changes (before paint)\n  useLayoutEffect(() => {\n    if (!scrollKey) return; // Opt-out: no scroll restoration if scrollKey not provided\n\n    const scrollElement = scrollRef.current;\n    if (!scrollElement) return;\n\n    const storageKey = `${SCROLL_POSITION_PREFIX}${scrollKey}`;\n    const savedPosition = parseInt(\n      sessionStorage.getItem(storageKey) || \"0\",\n      10\n    );\n    scrollElement.scrollTop = savedPosition;\n  }, [pathname, scrollKey]);\n\n  return (\n    <div className=\"relative flex-1 min-h-0 overflow-y-hidden flex flex-col\">\n      <div\n        ref={scrollRef}\n        className=\"flex-1 min-h-0 overflow-y-auto flex flex-col\"\n      >\n        <div className={cn(\"flex-1 flex flex-col\", className)} {...rest} />\n        <div style={{ minHeight }} />\n      </div>\n      {!disableMask && (\n        <div\n          className=\"absolute bottom-0 left-0 right-0 h-[1rem] z-[20] pointer-events-none\"\n          style={{\n            background: `linear-gradient(to bottom, transparent, ${backgroundColor})`,\n          }}\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Popover.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport Popover from \"./Popover\";\nimport { Button } from \"@opal/components\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Popover> = {\n  title: \"refresh-components/Popover\",\n  component: Popover,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Popover>;\n\nexport const Default: Story = {\n  render: () => (\n    <Popover>\n      <Popover.Trigger asChild>\n        <Button>Open Popover</Button>\n      </Popover.Trigger>\n      <Popover.Content>\n        <div style={{ padding: 8 }}>\n          <p>Popover content goes here.</p>\n        </div>\n      </Popover.Content>\n    </Popover>\n  ),\n};\n\nexport const WidthVariants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 16 }}>\n      {([\"fit\", \"md\", \"lg\", \"xl\"] as const).map((width) => (\n        <Popover key={width}>\n          <Popover.Trigger asChild>\n            <Button prominence=\"secondary\">{width}</Button>\n          </Popover.Trigger>\n          <Popover.Content width={width}>\n            <div style={{ padding: 8 }}>\n              <p>Width: {width}</p>\n            </div>\n          </Popover.Content>\n        </Popover>\n      ))}\n    </div>\n  ),\n};\n\nexport const WithMenu: Story = {\n  render: () => (\n    <Popover>\n      <Popover.Trigger asChild>\n        <Button>Options</Button>\n      </Popover.Trigger>\n      <Popover.Content width=\"lg\">\n        <Popover.Menu>\n          <Popover.Close asChild>\n            <Button prominence=\"tertiary\" width=\"full\">\n              Edit\n            </Button>\n          </Popover.Close>\n          <Popover.Close asChild>\n            <Button prominence=\"tertiary\" width=\"full\">\n              Duplicate\n            </Button>\n          </Popover.Close>\n          {null}\n          <Popover.Close asChild>\n            <Button variant=\"danger\" prominence=\"tertiary\" width=\"full\">\n              Delete\n            </Button>\n          </Popover.Close>\n        </Popover.Menu>\n      </Popover.Content>\n    </Popover>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/Popover.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport * as PopoverPrimitive from \"@radix-ui/react-popover\";\nimport { cn } from \"@/lib/utils\";\nimport Separator from \"@/refresh-components/Separator\";\nimport ShadowDiv from \"@/refresh-components/ShadowDiv\";\nimport { WithoutStyles } from \"@/types\";\nimport { Section } from \"@/layouts/general-layouts\";\n\n/**\n * Popover Root Component\n *\n * Wrapper around Radix Popover.Root for managing popover state.\n *\n * @example\n * ```tsx\n * <Popover open={isOpen} onOpenChange={setIsOpen}>\n *   <Popover.Trigger>\n *     <button>Open</button>\n *   </Popover.Trigger>\n *   <Popover.Content>\n *     {/* Popover content *\\/}\n *   </Popover.Content>\n * </Popover>\n * ```\n */\nconst PopoverRoot = PopoverPrimitive.Root;\n\n/**\n * Popover Trigger Component\n *\n * Button or element that triggers the popover to open.\n *\n * @example\n * ```tsx\n * <Popover.Trigger asChild>\n *   <button>Click me</button>\n * </Popover.Trigger>\n * ```\n */\nconst PopoverTrigger = PopoverPrimitive.Trigger;\n\n/**\n * Popover Anchor Component\n *\n * An optional element to position the popover relative to.\n *\n * @example\n * ```tsx\n * <Popover>\n *   <Popover.Anchor asChild>\n *     <div>Anchor element</div>\n *   </Popover.Anchor>\n *   <Popover.Trigger>\n *     <button>Click me</button>\n *   </Popover.Trigger>\n *   <Popover.Content>\n *     {/* This will be positioned relative to the anchor *\\/}\n *   </Popover.Content>\n * </Popover>\n * ```\n */\nconst PopoverAnchor = PopoverPrimitive.Anchor;\n\n/**\n * Popover Close Component\n *\n * Element that closes the popover when clicked.\n *\n * @example\n * ```tsx\n * <Popover.Close asChild>\n *   <button>Close</button>\n * </Popover.Close>\n * ```\n */\nconst PopoverClose = PopoverPrimitive.Close;\n\n/**\n * Popover Content Component\n *\n * The main popover container with default styling.\n *\n * Widths:\n * - `fit`: Fits content width (default)\n * - `md`: Medium width (12rem)\n * - `lg`: Large width (15rem)\n * - `xl`: Extra large width (18rem)\n *\n * @param width - Width of the popover. Default: \"fit\"\n *\n * @example\n * ```tsx\n * <Popover.Content align=\"start\" sideOffset={8}>\n *   <div>Popover content here</div>\n * </Popover.Content>\n *\n * // Medium width\n * <Popover.Content width=\"md\">\n *   <div>Medium width content</div>\n * </Popover.Content>\n *\n * // Extra large width\n * <Popover.Content width=\"xl\">\n *   <div>Extra large width content</div>\n * </Popover.Content>\n * ```\n */\ntype PopoverWidths = \"fit\" | \"sm\" | \"md\" | \"lg\" | \"xl\" | \"trigger\";\nconst widthClasses: Record<PopoverWidths, string> = {\n  fit: \"w-fit\",\n  sm: \"w-[10rem]\",\n  md: \"w-[12rem]\",\n  lg: \"w-[15rem]\",\n  xl: \"w-[18rem]\",\n  trigger: \"w-[var(--radix-popover-trigger-width)]\",\n};\ninterface PopoverContentProps\n  extends WithoutStyles<\n    React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>\n  > {\n  width?: PopoverWidths;\n  /** Portal container. Set to a DOM element to render inside it (e.g. inside a modal). */\n  container?: HTMLElement | null;\n  ref?: React.Ref<React.ComponentRef<typeof PopoverPrimitive.Content>>;\n}\nfunction PopoverContent({\n  width = \"fit\",\n  container,\n  align = \"center\",\n  sideOffset = 4,\n  ref,\n  ...props\n}: PopoverContentProps) {\n  return (\n    <PopoverPrimitive.Portal container={container}>\n      <PopoverPrimitive.Content\n        ref={ref}\n        align={align}\n        sideOffset={sideOffset}\n        collisionPadding={8}\n        className={cn(\n          \"bg-background-neutral-00 p-1 z-popover rounded-12 border shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2\",\n          \"flex flex-col\",\n          \"max-h-[var(--radix-popover-content-available-height)]\",\n          \"overflow-hidden\",\n          widthClasses[width]\n        )}\n        {...props}\n      />\n    </PopoverPrimitive.Portal>\n  );\n}\n\nexport default Object.assign(PopoverRoot, {\n  Trigger: PopoverTrigger,\n  Anchor: PopoverAnchor,\n  Content: PopoverContent,\n  Close: PopoverClose,\n  Menu: PopoverMenu,\n});\n\n// ============================================================================\n// Common Layouts\n// ============================================================================\n\nfunction SeparatorHelper() {\n  return <Separator className=\"py-0 px-2\" />;\n}\n\n/**\n * Popover Menu Component\n *\n * Converts a list of React nodes into a vertical menu with automatic separator handling.\n *\n * @remarks\n * - Treats `null` values as separator lines\n * - Filters out `undefined` and `false` values\n * - Removes separators at the beginning and end of the list\n *\n * @example\n * ```tsx\n * <Popover>\n *   <Popover.Trigger asChild>\n *     <button>Options</button>\n *   </Popover.Trigger>\n *   <Popover.Content>\n *     <Popover.Menu>\n *       <MenuItem>Option 1</MenuItem>\n *       <MenuItem>Option 2</MenuItem>\n *       {null}  {/* Separator line *\\/}\n *       <MenuItem>Option 3</MenuItem>\n *     </Popover.Menu>\n *   </Popover.Content>\n * </Popover>\n *\n * // With footer\n * <Popover.Menu\n *   footer={<Button>Apply</Button>}\n * >\n *   <MenuItem>Item 1</MenuItem>\n *   <MenuItem>Item 2</MenuItem>\n * </Popover.Menu>\n * ```\n */\nexport interface PopoverMenuProps {\n  children?: React.ReactNode[];\n  footer?: React.ReactNode;\n\n  // Ref for the scrollable container (useful for programmatic scrolling)\n  scrollContainerRef?: React.RefObject<HTMLDivElement | null>;\n}\nexport function PopoverMenu({\n  children,\n  footer,\n  scrollContainerRef,\n}: PopoverMenuProps) {\n  if (!children) return null;\n\n  const definedChildren = children.filter(\n    (child) => child !== undefined && child !== false\n  );\n  const filteredChildren = definedChildren.filter((child, index) => {\n    if (child !== null) return true;\n    return index !== 0 && index !== definedChildren.length - 1;\n  });\n\n  return (\n    <Section alignItems=\"stretch\" height=\"auto\" className=\"flex-1 min-h-0\">\n      <ShadowDiv\n        scrollContainerRef={scrollContainerRef}\n        className=\"flex flex-col gap-1 max-h-[20rem] w-full\"\n      >\n        {filteredChildren.map((child, index) => (\n          <div key={index}>\n            {child === undefined ? (\n              <></>\n            ) : child === null ? (\n              // Render `null`s as separator lines\n              <SeparatorHelper />\n            ) : (\n              child\n            )}\n          </div>\n        ))}\n      </ShadowDiv>\n      {footer && (\n        <>\n          <SeparatorHelper />\n          {footer}\n        </>\n      )}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/PreviewImage.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport PreviewImage from \"./PreviewImage\";\n\nconst meta: Meta<typeof PreviewImage> = {\n  title: \"refresh-components/PreviewImage\",\n  component: PreviewImage,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof PreviewImage>;\n\nexport const Default: Story = {\n  args: {\n    src: \"https://placehold.co/400x300/EEE/31343C?text=Preview+Image\",\n    alt: \"Sample preview image\",\n  },\n};\n\nexport const WithCustomClass: Story = {\n  args: {\n    src: \"https://placehold.co/200x200/EEE/31343C?text=Square\",\n    alt: \"Square preview\",\n    className: \"w-[200px] h-[200px] rounded-12\",\n  },\n};\n\nexport const Landscape: Story = {\n  args: {\n    src: \"https://placehold.co/600x200/EEE/31343C?text=Landscape\",\n    alt: \"Landscape preview\",\n    className: \"max-w-[400px]\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/PreviewImage.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\ninterface PreviewImageProps {\n  src: string;\n  alt: string;\n  className?: string;\n}\n\nexport default function PreviewImage({\n  src,\n  alt,\n  className,\n}: PreviewImageProps) {\n  return (\n    <img\n      src={src}\n      alt={alt}\n      className={cn(\"object-contain object-center\", className)}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/ScrollIndicatorDiv.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ScrollIndicatorDiv from \"./ScrollIndicatorDiv\";\n\nconst meta: Meta<typeof ScrollIndicatorDiv> = {\n  title: \"refresh-components/ScrollIndicatorDiv\",\n  component: ScrollIndicatorDiv,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ScrollIndicatorDiv>;\n\nconst sampleItems = Array.from({ length: 30 }, (_, i) => (\n  <div key={i} className=\"p-2 border-b border-border-01\">\n    Scrollable item {i + 1}\n  </div>\n));\n\nexport const GradientVariant: Story = {\n  args: {\n    variant: \"gradient\",\n    style: { width: 300, height: 250 },\n    children: sampleItems,\n  },\n};\n\nexport const ShadowVariant: Story = {\n  args: {\n    variant: \"shadow\",\n    style: { width: 300, height: 250 },\n    children: sampleItems,\n  },\n};\n\nexport const DisabledIndicators: Story = {\n  args: {\n    disableIndicators: true,\n    style: { width: 300, height: 250 },\n    children: sampleItems,\n  },\n};\n\nexport const WithBottomSpacing: Story = {\n  args: {\n    variant: \"gradient\",\n    bottomSpacing: \"2rem\",\n    style: { width: 300, height: 250 },\n    children: sampleItems,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/ScrollIndicatorDiv.tsx",
    "content": "\"use client\";\n\nimport React, { useEffect, useRef, useState, useCallback } from \"react\";\nimport { cn } from \"@/lib/utils\";\n\n// Throttle interval for scroll events (~60fps)\nconst SCROLL_THROTTLE_MS = 16;\n\n/**\n * A scrollable container that shows gradient or shadow indicators when\n * content overflows above or below the visible area.\n *\n * HEIGHT CONSTRAINT REQUIREMENT\n *\n * This component relies on its inner scroll container having a smaller\n * clientHeight than its scrollHeight. For that to happen, the entire\n * ancestor chain must constrain height via flex sizing (flex-1 min-h-0),\n * NOT via percentage heights (h-full).\n *\n * height: 100% resolves to \"auto\" when the containing block's height is\n * determined by flex layout (flex-auto, flex-1) rather than an explicit\n * height property — this is per the CSS spec. When that happens, the\n * container grows to fit its content and scrollHeight === clientHeight,\n * making scroll indicators invisible.\n *\n * Correct pattern: every ancestor up to the nearest fixed-height boundary\n * must form an unbroken flex column chain using \"flex-1 min-h-0\":\n *\n *   fixed-height-ancestor  (e.g. h-[500px])\n *     flex flex-col flex-1 min-h-0   <-- use flex-1, NOT h-full\n *       ScrollIndicatorDiv\n *         ...tall content...\n *\n * Common mistakes:\n *  - Using h-full instead of flex-1 min-h-0 anywhere in the chain.\n *  - Placing this inside a parent with overflow-y: auto (e.g. Modal.Body),\n *    which becomes the scroll container instead of this component's inner div.\n */\nexport interface ScrollIndicatorDivProps\n  extends React.HTMLAttributes<HTMLDivElement> {\n  // Mask/Shadow options\n  disableIndicators?: boolean;\n  disableTopIndicator?: boolean;\n  disableBottomIndicator?: boolean;\n  backgroundColor?: string;\n  indicatorHeight?: string;\n\n  // Choose between gradient mask or box shadow\n  variant?: \"gradient\" | \"shadow\";\n\n  // Optional spacing at bottom (defaults to none)\n  bottomSpacing?: string;\n}\n\nexport default function ScrollIndicatorDiv({\n  disableIndicators = false,\n  disableTopIndicator = false,\n  disableBottomIndicator = false,\n  backgroundColor = \"var(--background-tint-02)\",\n  indicatorHeight = \"3rem\",\n  variant = \"gradient\",\n  bottomSpacing,\n\n  className,\n  children,\n  ...rest\n}: ScrollIndicatorDivProps) {\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n  const [showTopIndicator, setShowTopIndicator] = useState(false);\n  const [showBottomIndicator, setShowBottomIndicator] = useState(false);\n  const throttleTimeoutRef = useRef<number | null>(null);\n  const isThrottledRef = useRef(false);\n\n  const updateScrollIndicators = useCallback(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return;\n\n    const { scrollTop, scrollHeight, clientHeight } = container;\n    const isScrollable = scrollHeight > clientHeight;\n\n    // Show top indicator if scrolled down from top\n    setShowTopIndicator(isScrollable && scrollTop > 0);\n\n    // Show bottom indicator if not scrolled to bottom\n    // Add small threshold (1px) to account for rounding errors\n    setShowBottomIndicator(\n      isScrollable && scrollTop < scrollHeight - clientHeight - 1\n    );\n  }, []);\n\n  // Throttled scroll handler for better performance\n  const handleScroll = useCallback(() => {\n    if (isThrottledRef.current) return;\n\n    isThrottledRef.current = true;\n    updateScrollIndicators();\n\n    throttleTimeoutRef.current = window.setTimeout(() => {\n      isThrottledRef.current = false;\n      updateScrollIndicators();\n    }, SCROLL_THROTTLE_MS);\n  }, [updateScrollIndicators]);\n\n  useEffect(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return;\n\n    // Initial check\n    updateScrollIndicators();\n\n    // Update on scroll (throttled)\n    container.addEventListener(\"scroll\", handleScroll, { passive: true });\n\n    // Update when the container itself resizes\n    const resizeObserver = new ResizeObserver(updateScrollIndicators);\n    resizeObserver.observe(container);\n\n    // Update when descendants change (e.g. syntax highlighting mutates the\n    // DOM after initial render, which changes scrollHeight without firing\n    // resize or scroll events on the container).\n    const mutationObserver = new MutationObserver(handleScroll);\n\n    return () => {\n      container.removeEventListener(\"scroll\", handleScroll);\n      resizeObserver.disconnect();\n      mutationObserver.disconnect();\n      if (throttleTimeoutRef.current) {\n        clearTimeout(throttleTimeoutRef.current);\n      }\n    };\n  }, [updateScrollIndicators, handleScroll]);\n\n  // Update when children change\n  useEffect(() => {\n    updateScrollIndicators();\n  }, [children]);\n\n  const getIndicatorStyle = (direction: \"top\" | \"bottom\") => {\n    if (variant === \"shadow\") {\n      return {\n        height: \"2px\",\n        backgroundColor: backgroundColor,\n        boxShadow:\n          direction === \"top\"\n            ? \"0 -2px 12px 0 var(--shadow-02), 0 0 4px 1px var(--shadow-02)\"\n            : \"0 4px 24px 0 var(--shadow-02), 0 2px 8px 2px var(--shadow-02)\",\n      };\n    }\n\n    // Gradient variant - use full indicator height\n    return {\n      height: indicatorHeight,\n      background:\n        direction === \"top\"\n          ? `linear-gradient(to top, transparent, ${backgroundColor})`\n          : `linear-gradient(to bottom, transparent, ${backgroundColor})`,\n    };\n  };\n\n  return (\n    <div className=\"relative flex-1 min-h-0 overflow-y-hidden flex flex-col w-full\">\n      {/* Top indicator */}\n      {!disableIndicators && !disableTopIndicator && showTopIndicator && (\n        <div\n          className=\"absolute top-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200\"\n          style={getIndicatorStyle(\"top\")}\n        />\n      )}\n\n      {/* Scrollable content */}\n      <div\n        ref={scrollContainerRef}\n        className={cn(\n          \"flex-1 min-h-0 overflow-y-auto flex flex-col\",\n          className\n        )}\n        {...rest}\n      >\n        {children}\n        {bottomSpacing && <div style={{ minHeight: bottomSpacing }} />}\n      </div>\n\n      {/* Bottom indicator */}\n      {!disableIndicators && !disableBottomIndicator && showBottomIndicator && (\n        <div\n          className=\"absolute bottom-0 left-0 right-0 z-[20] pointer-events-none transition-opacity duration-200\"\n          style={getIndicatorStyle(\"bottom\")}\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Separator.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Separator from \"./Separator\";\n\nconst meta: Meta<typeof Separator> = {\n  title: \"refresh-components/Separator\",\n  component: Separator,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Separator>;\n\nexport const Horizontal: Story = {\n  decorators: [\n    (Story) => (\n      <div style={{ width: 400 }}>\n        <div>Content above</div>\n        <Story />\n        <div>Content below</div>\n      </div>\n    ),\n  ],\n};\n\nexport const Vertical: Story = {\n  args: {\n    orientation: \"vertical\",\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ display: \"flex\", alignItems: \"center\", height: 60 }}>\n        <span>Left</span>\n        <Story />\n        <span>Right</span>\n      </div>\n    ),\n  ],\n};\n\nexport const NoPadding: Story = {\n  args: {\n    noPadding: true,\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 400 }}>\n        <div>No padding above</div>\n        <Story />\n        <div>No padding below</div>\n      </div>\n    ),\n  ],\n};\n"
  },
  {
    "path": "web/src/refresh-components/Separator.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport * as SeparatorPrimitive from \"@radix-ui/react-separator\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface SeparatorProps\n  extends React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root> {\n  noPadding?: boolean;\n  /** Custom horizontal padding in rem. Overrides the default padding. */\n  paddingXRem?: number;\n  /** Custom vertical padding in rem. Overrides the default padding. */\n  paddingYRem?: number;\n}\n\n/**\n * Separator Component\n *\n * A visual divider that separates content either horizontally or vertically.\n * Built on Radix UI's Separator primitive.\n *\n * @example\n * ```tsx\n * // Horizontal separator (default)\n * <Separator />\n *\n * // Vertical separator\n * <Separator orientation=\"vertical\" />\n *\n * // With custom className\n * <Separator className=\"my-8\" />\n *\n * // Non-decorative (announced by screen readers)\n * <Separator decorative={false} />\n * ```\n */\nconst Separator = React.forwardRef(\n  (\n    {\n      noPadding,\n      paddingXRem,\n      paddingYRem,\n      className,\n      orientation = \"horizontal\",\n      decorative = true,\n      ...props\n    }: SeparatorProps,\n    ref: React.ForwardedRef<React.ComponentRef<typeof SeparatorPrimitive.Root>>\n  ) => {\n    const isHorizontal = orientation === \"horizontal\";\n\n    return (\n      <div\n        style={{\n          ...(paddingXRem != null\n            ? {\n                paddingLeft: `${paddingXRem}rem`,\n                paddingRight: `${paddingXRem}rem`,\n              }\n            : {}),\n          ...(paddingYRem != null\n            ? {\n                paddingTop: `${paddingYRem}rem`,\n                paddingBottom: `${paddingYRem}rem`,\n              }\n            : {}),\n        }}\n        className={cn(\n          isHorizontal ? \"w-full\" : \"h-full\",\n          paddingXRem == null && !noPadding && (isHorizontal ? \"py-4\" : \"px-4\"),\n          className\n        )}\n      >\n        <SeparatorPrimitive.Root\n          ref={ref}\n          decorative={decorative}\n          orientation={orientation}\n          className={cn(\n            \"bg-border-01\",\n            isHorizontal ? \"h-[1px] w-full\" : \"h-full w-[1px]\"\n          )}\n          {...props}\n        />\n      </div>\n    );\n  }\n);\nSeparator.displayName = SeparatorPrimitive.Root.displayName;\n\nexport default Separator;\n"
  },
  {
    "path": "web/src/refresh-components/ShadowDiv.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ShadowDiv from \"./ShadowDiv\";\n\nconst meta: Meta<typeof ShadowDiv> = {\n  title: \"refresh-components/ShadowDiv\",\n  component: ShadowDiv,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ShadowDiv>;\n\nconst sampleItems = Array.from({ length: 30 }, (_, i) => (\n  <div key={i} className=\"p-2 border-b border-border-01\">\n    Scrollable item {i + 1}\n  </div>\n));\n\nexport const Default: Story = {\n  args: {\n    className: \"max-h-[250px]\",\n    style: { width: 300 },\n    children: sampleItems,\n  },\n};\n\nexport const BottomOnly: Story = {\n  args: {\n    bottomOnly: true,\n    className: \"max-h-[250px]\",\n    style: { width: 300 },\n    children: sampleItems,\n  },\n};\n\nexport const TopOnly: Story = {\n  args: {\n    topOnly: true,\n    className: \"max-h-[250px]\",\n    style: { width: 300 },\n    children: sampleItems,\n  },\n};\n\nexport const CustomShadowHeight: Story = {\n  args: {\n    shadowHeight: \"3rem\",\n    className: \"max-h-[250px]\",\n    style: { width: 300 },\n    children: sampleItems,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/ShadowDiv.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useEffect, useCallback } from \"react\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface ShadowDivProps extends React.HTMLAttributes<HTMLDivElement> {\n  /**\n   * Background color to use for the shadow gradients.\n   * Defaults to --background-neutral-00\n   */\n  backgroundColor?: string;\n\n  /**\n   * Height of the shadow gradients.\n   * Defaults to 1.5rem (24px)\n   */\n  shadowHeight?: string;\n\n  /**\n   * Ref for the scrollable container (useful for programmatic scrolling)\n   */\n  scrollContainerRef?: React.RefObject<HTMLDivElement | null>;\n\n  /**\n   * Show only bottom shadow (similar to OverflowDiv behavior)\n   */\n  bottomOnly?: boolean;\n\n  /**\n   * Show only top shadow\n   */\n  topOnly?: boolean;\n}\n\n/**\n * ShadowDiv - A scrollable container with automatic top/bottom shadow indicators\n *\n * This component wraps content in a scrollable div and automatically displays\n * gradient shadows at the top and/or bottom to indicate there's more content\n * to scroll in those directions.\n *\n * @example\n * ```tsx\n * <ShadowDiv className=\"max-h-[20rem]\">\n *   <div>Long content...</div>\n *   <div>More content...</div>\n * </ShadowDiv>\n * ```\n *\n * @example\n * // Only show bottom shadow\n * <ShadowDiv bottomOnly className=\"max-h-[20rem]\">\n *   <div>Content...</div>\n * </ShadowDiv>\n */\nexport default function ShadowDiv({\n  backgroundColor = \"var(--background-neutral-00)\",\n  shadowHeight = \"1.5rem\",\n  scrollContainerRef,\n  bottomOnly = false,\n  topOnly = false,\n  className,\n  children,\n  ...props\n}: ShadowDivProps) {\n  const [showTopShadow, setShowTopShadow] = useState(false);\n  const [showBottomShadow, setShowBottomShadow] = useState(false);\n  const internalRef = React.useRef<HTMLDivElement>(null);\n  const containerRef = scrollContainerRef || internalRef;\n\n  const checkScroll = useCallback(() => {\n    const container = containerRef.current;\n    if (!container) return;\n\n    // Show top shadow if scrolled down\n    if (!bottomOnly) {\n      setShowTopShadow(container.scrollTop > 1);\n    }\n\n    // Show bottom shadow if there's more content to scroll down\n    if (!topOnly) {\n      const hasMoreBelow =\n        container.scrollHeight - container.scrollTop - container.clientHeight >\n        1;\n      setShowBottomShadow(hasMoreBelow);\n    }\n  }, [containerRef, bottomOnly, topOnly]);\n\n  useEffect(() => {\n    const container = containerRef.current;\n    if (!container) return;\n\n    // Check initial state\n    checkScroll();\n\n    container.addEventListener(\"scroll\", checkScroll);\n    // Also check on resize in case content changes\n    const resizeObserver = new ResizeObserver(checkScroll);\n    resizeObserver.observe(container);\n\n    return () => {\n      container.removeEventListener(\"scroll\", checkScroll);\n      resizeObserver.disconnect();\n    };\n  }, [containerRef, checkScroll]);\n\n  return (\n    <div className=\"relative min-h-0 flex flex-col\">\n      <div\n        ref={containerRef}\n        className={cn(\"overflow-y-auto\", className)}\n        {...props}\n      >\n        {children}\n      </div>\n\n      {/* Top scroll shadow indicator */}\n      {!bottomOnly && (\n        <div\n          className={cn(\n            \"absolute top-0 left-0 right-0 pointer-events-none transition-opacity duration-150\",\n            showTopShadow ? \"opacity-100\" : \"opacity-0\"\n          )}\n          style={{\n            height: shadowHeight,\n            background: `linear-gradient(to bottom, ${backgroundColor}, transparent)`,\n          }}\n        />\n      )}\n\n      {/* Bottom scroll shadow indicator */}\n      {!topOnly && (\n        <div\n          className={cn(\n            \"absolute bottom-0 left-0 right-0 pointer-events-none transition-opacity duration-150\",\n            showBottomShadow ? \"opacity-100\" : \"opacity-0\"\n          )}\n          style={{\n            height: shadowHeight,\n            background: `linear-gradient(to top, ${backgroundColor}, transparent)`,\n          }}\n        />\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/SimpleCollapsible.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport SimpleCollapsible from \"./SimpleCollapsible\";\n\nconst meta: Meta<typeof SimpleCollapsible> = {\n  title: \"refresh-components/SimpleCollapsible\",\n  component: SimpleCollapsible,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SimpleCollapsible>;\n\nexport const DefaultOpen: Story = {\n  render: () => (\n    <SimpleCollapsible>\n      <SimpleCollapsible.Header\n        title=\"Section Title\"\n        description=\"This section is open by default.\"\n      />\n      <SimpleCollapsible.Content>\n        <div>Here is some collapsible content that starts expanded.</div>\n      </SimpleCollapsible.Content>\n    </SimpleCollapsible>\n  ),\n};\n\nexport const DefaultClosed: Story = {\n  render: () => (\n    <SimpleCollapsible defaultOpen={false}>\n      <SimpleCollapsible.Header\n        title=\"Initially Closed\"\n        description=\"Click the button to expand this section.\"\n      />\n      <SimpleCollapsible.Content>\n        <div>This content was hidden until you clicked expand.</div>\n      </SimpleCollapsible.Content>\n    </SimpleCollapsible>\n  ),\n};\n\nexport const TitleOnly: Story = {\n  render: () => (\n    <SimpleCollapsible>\n      <SimpleCollapsible.Header title=\"No Description\" />\n      <SimpleCollapsible.Content>\n        <div>Content with a header that has no description.</div>\n      </SimpleCollapsible.Content>\n    </SimpleCollapsible>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/SimpleCollapsible.tsx",
    "content": "/**\n * SimpleCollapsible - A collapsible container component\n *\n * Provides an expandable/collapsible section with a header and content area.\n * Supports both controlled and uncontrolled modes.\n *\n * @example\n * ```tsx\n * import SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\n *\n * // Basic usage\n * <SimpleCollapsible>\n *   <SimpleCollapsible.Header\n *     title=\"Section Title\"\n *     description=\"Optional description\"\n *   />\n *   <SimpleCollapsible.Content>\n *     <div>Content goes here</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n *\n * // Controlled state\n * const [open, setOpen] = useState(true);\n * <SimpleCollapsible open={open} onOpenChange={setOpen}>\n *   <SimpleCollapsible.Header title=\"Controlled Section\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n *\n * // Default closed\n * <SimpleCollapsible defaultOpen={false}>\n *   <SimpleCollapsible.Header title=\"Initially Closed\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n * ```\n */\n\n\"use client\";\n\nimport * as React from \"react\";\nimport { useBoundingBox } from \"@/hooks/useBoundingBox\";\nimport {\n  Collapsible,\n  CollapsibleContent,\n  CollapsibleTrigger,\n} from \"@/refresh-components/Collapsible\";\nimport { Button } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgFold, SvgExpand } from \"@opal/icons\";\nimport { WithoutStyles } from \"@/types\";\n\n// Context for sharing state between compound components\ninterface SimpleCollapsibleContextValue {\n  open: boolean;\n}\nconst SimpleCollapsibleContext =\n  React.createContext<SimpleCollapsibleContextValue | null>(null);\nfunction useSimpleCollapsible() {\n  const context = React.useContext(SimpleCollapsibleContext);\n  if (!context) {\n    throw new Error(\n      \"SimpleCollapsible compound components must be used within SimpleCollapsible\"\n    );\n  }\n  return context;\n}\n\n/**\n * SimpleCollapsible Root Component\n *\n * A collapsible container with a header and expandable content area.\n * Built on Radix UI Collapsible primitives.\n *\n * @example\n * ```tsx\n * <SimpleCollapsible>\n *   <SimpleCollapsible.Header title=\"Settings\" description=\"Configure your preferences\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content here</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n *\n * // Controlled state\n * <SimpleCollapsible open={isOpen} onOpenChange={setIsOpen}>\n *   <SimpleCollapsible.Header title=\"Controlled\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n *\n * // Default closed\n * <SimpleCollapsible defaultOpen={false}>\n *   <SimpleCollapsible.Header title=\"Initially Closed\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n * ```\n */\ninterface SimpleCollapsibleRootProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  /** Controlled open state - when provided, component becomes controlled */\n  open?: boolean;\n  /** Default open state for uncontrolled mode (defaults to true) */\n  defaultOpen?: boolean;\n  /** Callback fired when the open state changes */\n  onOpenChange?: (open: boolean) => void;\n}\nconst Root = React.forwardRef<HTMLDivElement, SimpleCollapsibleRootProps>(\n  (\n    {\n      children,\n      open: controlledOpen,\n      defaultOpen = true,\n      onOpenChange,\n      ...props\n    },\n    ref\n  ) => {\n    const [internalOpen, setInternalOpen] = React.useState(defaultOpen);\n\n    const isControlled = controlledOpen !== undefined;\n    const open = isControlled ? controlledOpen : internalOpen;\n\n    const handleOpenChange = React.useCallback(\n      (newOpen: boolean) => {\n        onOpenChange?.(newOpen);\n        if (!isControlled) {\n          setInternalOpen(newOpen);\n        }\n      },\n      [isControlled, onOpenChange]\n    );\n\n    return (\n      <SimpleCollapsibleContext.Provider value={{ open }}>\n        <Collapsible\n          ref={ref}\n          open={open}\n          onOpenChange={handleOpenChange}\n          className=\"flex flex-col flex-1 self-stretch\"\n          {...props}\n        >\n          {children}\n        </Collapsible>\n      </SimpleCollapsibleContext.Provider>\n    );\n  }\n);\nRoot.displayName = \"SimpleCollapsible\";\n\n/**\n * SimpleCollapsible Header Component\n *\n * A pre-styled header component for the collapsible trigger.\n * Displays a title and optional description.\n *\n * @example\n * ```tsx\n * <SimpleCollapsible>\n *   <SimpleCollapsible.Header\n *     title=\"Advanced Settings\"\n *     description=\"Configure advanced options\"\n *   />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n *\n * // Title only\n * <SimpleCollapsible>\n *   <SimpleCollapsible.Header title=\"Quick Settings\" />\n *   <SimpleCollapsible.Content>\n *     <div>Content</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n * ```\n */\ninterface SimpleCollapsibleHeaderProps\n  extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {\n  /** The main heading text displayed in emphasized style */\n  title: string;\n  /** Optional secondary description text displayed below the title */\n  description?: string;\n}\nconst Header = React.forwardRef<HTMLDivElement, SimpleCollapsibleHeaderProps>(\n  ({ title, description, ...props }, ref) => {\n    const { open } = useSimpleCollapsible();\n    const { ref: boundingRef, inside } = useBoundingBox();\n\n    return (\n      <CollapsibleTrigger asChild>\n        <div\n          ref={ref}\n          className=\"flex flex-row items-center justify-between gap-4 cursor-pointer select-none\"\n          {...props}\n        >\n          <div ref={boundingRef} className=\"w-full\">\n            <Content\n              title={title}\n              description={description}\n              sizePreset=\"main-content\"\n              variant=\"section\"\n            />\n          </div>\n          <Button\n            icon={open ? SvgFold : SvgExpand}\n            prominence=\"tertiary\"\n            size=\"sm\"\n            interaction={inside ? \"hover\" : \"rest\"}\n            tooltip={open ? \"Fold\" : \"Expand\"}\n          />\n        </div>\n      </CollapsibleTrigger>\n    );\n  }\n);\nHeader.displayName = \"SimpleCollapsible.Header\";\n\n/**\n * SimpleCollapsible Content Component\n *\n * Container for the collapsible content area.\n *\n * @example\n * ```tsx\n * <SimpleCollapsible>\n *   <SimpleCollapsible.Header title=\"Settings\" />\n *   <SimpleCollapsible.Content>\n *     <div>Your content here</div>\n *   </SimpleCollapsible.Content>\n * </SimpleCollapsible>\n * ```\n */\nconst ContentPanel = React.forwardRef<\n  HTMLDivElement,\n  WithoutStyles<React.HTMLAttributes<HTMLDivElement>>\n>(({ children, ...props }, ref) => {\n  return (\n    <CollapsibleContent>\n      <div ref={ref} className=\"pt-4\" {...props}>\n        {children}\n      </div>\n    </CollapsibleContent>\n  );\n});\nContentPanel.displayName = \"SimpleCollapsible.Content\";\n\nexport default Object.assign(Root, {\n  Header,\n  Content: ContentPanel,\n});\n"
  },
  {
    "path": "web/src/refresh-components/SimplePopover.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SimplePopover from \"./SimplePopover\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst meta: Meta<typeof SimplePopover> = {\n  title: \"refresh-components/modals/SimplePopover\",\n  component: SimplePopover,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SimplePopover>;\n\nexport const Default: Story = {\n  args: {\n    trigger: <Button>Open Popover</Button>,\n    children: (\n      <div style={{ padding: 16 }}>\n        <Text mainUiBody text04>\n          Popover content goes here.\n        </Text>\n      </div>\n    ),\n  },\n};\n\nexport const WithRenderPropTrigger: Story = {\n  args: {\n    trigger: (open: boolean) => (\n      <Button>{`${open ? \"Close\" : \"Open\"} Popover`}</Button>\n    ),\n    children: (\n      <div style={{ padding: 16 }}>\n        <Text mainUiBody text04>\n          The trigger updates its label based on open state.\n        </Text>\n      </div>\n    ),\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/SimplePopover.tsx",
    "content": "\"use client\";\n\nimport React, { useState } from \"react\";\nimport Popover from \"@/refresh-components/Popover\";\n\nexport interface SimplePopoverProps\n  extends React.ComponentPropsWithoutRef<typeof Popover.Content> {\n  onOpenChange?: (open: boolean) => void;\n  trigger: React.ReactNode | ((open: boolean) => React.ReactNode);\n}\n\nexport default function SimplePopover({\n  trigger,\n  onOpenChange,\n  ...rest\n}: SimplePopoverProps) {\n  const [open, setOpen] = useState(false);\n\n  function handleOnOpenChange(state: boolean) {\n    setOpen(state);\n    onOpenChange?.(state);\n  }\n\n  return (\n    <Popover open={open} onOpenChange={handleOnOpenChange}>\n      <Popover.Trigger asChild>\n        <div>{typeof trigger === \"function\" ? trigger(open) : trigger}</div>\n      </Popover.Trigger>\n      <Popover.Content align=\"start\" side=\"top\" width=\"md\" {...rest} />\n    </Popover>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/SimpleTabs.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SimpleTabs from \"./SimpleTabs\";\n\nconst meta: Meta<typeof SimpleTabs> = {\n  title: \"refresh-components/SimpleTabs\",\n  component: SimpleTabs,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SimpleTabs>;\n\nexport const Default: Story = {\n  args: {\n    tabs: {\n      overview: {\n        name: \"Overview\",\n        content: <div style={{ padding: 16 }}>Overview content goes here.</div>,\n      },\n      settings: {\n        name: \"Settings\",\n        content: <div style={{ padding: 16 }}>Settings content goes here.</div>,\n      },\n      activity: {\n        name: \"Activity\",\n        content: <div style={{ padding: 16 }}>Activity content goes here.</div>,\n      },\n    },\n    defaultValue: \"overview\",\n  },\n};\n\nexport const TwoTabs: Story = {\n  args: {\n    tabs: {\n      users: {\n        name: \"Users\",\n        content: (\n          <div style={{ padding: 16 }}>User management panel content.</div>\n        ),\n      },\n      groups: {\n        name: \"Groups\",\n        content: (\n          <div style={{ padding: 16 }}>Group management panel content.</div>\n        ),\n      },\n    },\n    defaultValue: \"users\",\n  },\n};\n\nexport const WithDisabledTab: Story = {\n  args: {\n    tabs: {\n      active: {\n        name: \"Active\",\n        content: <div style={{ padding: 16 }}>This tab is active.</div>,\n      },\n      disabled: {\n        name: \"Disabled\",\n        content: <div style={{ padding: 16 }}>You should not see this.</div>,\n        disabled: true,\n      },\n      another: {\n        name: \"Another\",\n        content: <div style={{ padding: 16 }}>Another tab content.</div>,\n      },\n    },\n    defaultValue: \"active\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/SimpleTabs.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport Tabs from \"./Tabs\";\nimport { IconProps } from \"@opal/types\";\n\n/**\n * Tab Definition\n *\n * Defines a single tab with its trigger label and content.\n */\nexport interface TabDefinition {\n  /** Display name for the tab trigger */\n  name: string;\n  /** Content to render when this tab is active */\n  content: React.ReactNode;\n  /** Optional icon to display in the tab trigger */\n  icon?: React.FunctionComponent<IconProps>;\n  /** Optional tooltip text to display on hover */\n  tooltip?: string;\n  /** Optional tooltip side */\n  tooltipSide?: \"top\" | \"bottom\" | \"left\" | \"right\";\n  /** Whether the tab is disabled */\n  disabled?: boolean;\n}\n\n/**\n * Simple Tabs Props\n */\nexport interface SimpleTabsProps {\n  /** Record of tab definitions, where the key is the tab value */\n  tabs: Record<string, TabDefinition>;\n  /** The tab value that should be active by default */\n  defaultValue?: string;\n  /** The controlled active tab value */\n  value?: string;\n  /** Callback when the active tab changes */\n  onValueChange?: (value: string) => void;\n}\n\n/**\n * SimpleTabs Component\n *\n * A simplified API for creating tabs when you don't need granular control.\n * For complex use cases, use the base Tabs component with Tabs.List, Tabs.Trigger, and Tabs.Content.\n *\n * @example\n * ```tsx\n * const UserComponent = () => {\n *   const [count, setCount] = useState(0);\n *   return <div>User tab with state: {count}</div>;\n * };\n *\n * const AdminComponent = () => {\n *   return <div>Admin content</div>;\n * };\n *\n * <SimpleTabs\n *   tabs={{\n *     user: {\n *       name: \"Users\",\n *       content: <UserComponent />,\n *       icon: SvgUser,\n *       tooltip: \"Manage users\"\n *     },\n *     admin: {\n *       name: \"Admin\",\n *       content: <AdminComponent />,\n *       icon: SvgSettings\n *     }\n *   }}\n *   defaultValue=\"user\"\n * />\n * ```\n *\n * @remarks\n * - This is a convenience wrapper around the base Tabs component\n * - For complex layouts or custom styling, use Tabs.List, Tabs.Trigger, and Tabs.Content directly\n * - Tab keys become the tab values, so they should be stable and URL-friendly\n * - Content components can use React hooks and maintain their own state\n */\nexport default function SimpleTabs({\n  tabs,\n  defaultValue,\n  value,\n  onValueChange,\n}: SimpleTabsProps) {\n  const tabEntries = Object.entries(tabs);\n\n  // Use the first tab as default if none specified\n  const effectiveDefaultValue = defaultValue ?? tabEntries[0]?.[0];\n\n  return (\n    <Tabs\n      defaultValue={effectiveDefaultValue}\n      value={value}\n      onValueChange={onValueChange}\n    >\n      <Tabs.List>\n        {tabEntries.map(([key, tab]) => (\n          <Tabs.Trigger\n            key={key}\n            value={key}\n            icon={tab.icon}\n            tooltip={tab.tooltip}\n            tooltipSide={tab.tooltipSide}\n            disabled={tab.disabled}\n          >\n            {tab.name}\n          </Tabs.Trigger>\n        ))}\n      </Tabs.List>\n\n      {tabEntries.map(([key, tab]) => (\n        <Tabs.Content key={key} value={key}>\n          {tab.content}\n        </Tabs.Content>\n      ))}\n    </Tabs>\n  );\n}\n\n/**\n * Helper function to generate tab definitions with type safety\n *\n * This is optional but provides better autocomplete and type checking when defining tabs.\n *\n * @example\n * ```tsx\n * const pageTabs = SimpleTabs.generateTabs({\n *   userTab: {\n *     name: \"Some name\",\n *     content: <SomeComponent />\n *   },\n *   anothaOne: {\n *     name: \"DJ Khalid\",\n *     content: <AnothaOne />\n *   }\n * });\n *\n * <SimpleTabs tabs={pageTabs} />\n * ```\n */\nSimpleTabs.generateTabs = <T extends Record<string, TabDefinition>>(\n  tabs: T\n): T => tabs;\n"
  },
  {
    "path": "web/src/refresh-components/SimpleTooltip.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SimpleTooltip from \"./SimpleTooltip\";\n\nconst meta: Meta<typeof SimpleTooltip> = {\n  title: \"refresh-components/SimpleTooltip\",\n  component: SimpleTooltip,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SimpleTooltip>;\n\nexport const Default: Story = {\n  args: {\n    tooltip: \"This is a tooltip\",\n    children: <button>Hover me</button>,\n  },\n};\n\nexport const SideVariants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 24, padding: 48 }}>\n      {([\"top\", \"right\", \"bottom\", \"left\"] as const).map((side) => (\n        <SimpleTooltip key={side} tooltip={`Tooltip on ${side}`} side={side}>\n          <button>{side}</button>\n        </SimpleTooltip>\n      ))}\n    </div>\n  ),\n};\n\nexport const Disabled: Story = {\n  args: {\n    tooltip: \"You won't see this\",\n    disabled: true,\n    children: <button>Tooltip disabled</button>,\n  },\n};\n\nexport const StringChild: Story = {\n  render: () => (\n    <SimpleTooltip>\n      <span>String child auto-tooltips itself</span>\n    </SimpleTooltip>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/SimpleTooltip.tsx",
    "content": "/**\n * SimpleTooltip - A wrapper component for easily adding tooltips to elements.\n *\n * IMPORTANT: Children must be ref-compatible (either a DOM element or a component\n * that uses forwardRef). This is required because TooltipTrigger uses `asChild`\n * which needs to attach a ref to the child element for positioning.\n *\n * Valid children:\n * - DOM elements: <div>, <button>, <span>, etc.\n * - forwardRef components: Components wrapped with React.forwardRef()\n *\n * Invalid children (will cause errors or warnings):\n * - Fragments: <>{content}</>\n * - Regular function components that don't forward refs\n * - Multiple children\n *\n * @example\n * // Valid - DOM element\n * <SimpleTooltip tooltip=\"Hello\">\n *   <button>Hover me</button>\n * </SimpleTooltip>\n *\n * // Valid - forwardRef component\n * <SimpleTooltip tooltip=\"Card tooltip\">\n *   <Card>Content</Card>\n * </SimpleTooltip>\n *\n * // Invalid - will cause React warning\n * <SimpleTooltip tooltip=\"Won't work\">\n *   <NonForwardRefComponent />\n * </SimpleTooltip>\n */\n\n\"use client\";\n\nimport React from \"react\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport interface SimpleTooltipProps\n  extends React.ComponentPropsWithoutRef<typeof TooltipContent> {\n  disabled?: boolean;\n  tooltip?: React.ReactNode;\n  children?: React.ReactNode;\n  delayDuration?: number;\n}\n\nexport default function SimpleTooltip({\n  disabled = false,\n  tooltip,\n  className,\n  children,\n  side = \"right\",\n  delayDuration,\n  ...rest\n}: SimpleTooltipProps) {\n  // Determine hover content based on the logic:\n  // 1. If tooltip is defined, use tooltip\n  // 2. If tooltip is undefined and children is a string, use children\n  // 3. Otherwise, no tooltip\n  const hoverContent =\n    tooltip ?? (typeof children === \"string\" ? children : undefined);\n\n  // If no hover content, just render children without tooltip\n  if (!hoverContent) return children;\n\n  // Check if tooltip is a string to wrap in Text component, otherwise render as-is\n  const tooltipContent =\n    typeof hoverContent === \"string\" ? (\n      <Text as=\"p\" textLight05>\n        {hoverContent}\n      </Text>\n    ) : (\n      hoverContent\n    );\n\n  return (\n    <TooltipProvider delayDuration={delayDuration}>\n      <Tooltip>\n        <TooltipTrigger asChild>{children}</TooltipTrigger>\n        {!disabled && (\n          <TooltipContent side={side} className={className} {...rest}>\n            {tooltipContent}\n          </TooltipContent>\n        )}\n      </Tooltip>\n    </TooltipProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Spacer.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Spacer from \"./Spacer\";\n\nconst meta: Meta<typeof Spacer> = {\n  title: \"refresh-components/Spacer\",\n  component: Spacer,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Spacer>;\n\nexport const VerticalDefault: Story = {\n  render: () => (\n    <div className=\"flex flex-col items-start\">\n      <div className=\"p-2 bg-background-tint-03\">Above</div>\n      <Spacer />\n      <div className=\"p-2 bg-background-tint-03\">Below (1rem gap)</div>\n    </div>\n  ),\n};\n\nexport const VerticalCustomRem: Story = {\n  render: () => (\n    <div className=\"flex flex-col items-start\">\n      <div className=\"p-2 bg-background-tint-03\">Above</div>\n      <Spacer vertical rem={3} />\n      <div className=\"p-2 bg-background-tint-03\">Below (3rem gap)</div>\n    </div>\n  ),\n};\n\nexport const Horizontal: Story = {\n  render: () => (\n    <div className=\"flex flex-row items-center\">\n      <div className=\"p-2 bg-background-tint-03\">Left</div>\n      <Spacer horizontal rem={2} />\n      <div className=\"p-2 bg-background-tint-03\">Right (2rem gap)</div>\n    </div>\n  ),\n};\n\nexport const PixelBased: Story = {\n  render: () => (\n    <div className=\"flex flex-col items-start\">\n      <div className=\"p-2 bg-background-tint-03\">Above</div>\n      <Spacer pixels={48} />\n      <div className=\"p-2 bg-background-tint-03\">Below (48px gap)</div>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/Spacer.tsx",
    "content": "type DirectionProps = {\n  vertical?: boolean;\n  horizontal?: boolean;\n};\n\nexport type SpacerProps = DirectionProps &\n  ({ rem?: number; pixels?: never } | { pixels: number; rem?: never });\n\nexport default function Spacer({\n  vertical,\n  horizontal,\n  rem = 1,\n  pixels,\n}: SpacerProps) {\n  const isVertical = vertical ? true : horizontal ? false : true;\n  const size = pixels !== undefined ? `${pixels}px` : `${rem}rem`;\n\n  return (\n    <div\n      style={{\n        height: isVertical ? size : undefined,\n        width: !isVertical ? size : undefined,\n      }}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/Tabs.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Tabs from \"./Tabs\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport { SvgSettings, SvgStar, SvgRefreshCw } from \"@opal/icons\";\n\nconst meta: Meta<typeof Tabs> = {\n  title: \"refresh-components/Tabs\",\n  component: Tabs,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"padded\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Tabs>;\n\n// ---------------------------------------------------------------------------\n// Contained variant (default)\n// ---------------------------------------------------------------------------\n\nexport const Contained: Story = {\n  render: () => (\n    <Tabs defaultValue=\"overview\">\n      <Tabs.List variant=\"contained\">\n        <Tabs.Trigger value=\"overview\">Overview</Tabs.Trigger>\n        <Tabs.Trigger value=\"details\">Details</Tabs.Trigger>\n        <Tabs.Trigger value=\"settings\">Settings</Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"overview\">Overview tab content</Tabs.Content>\n      <Tabs.Content value=\"details\">Details tab content</Tabs.Content>\n      <Tabs.Content value=\"settings\">Settings tab content</Tabs.Content>\n    </Tabs>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Pill variant\n// ---------------------------------------------------------------------------\n\nexport const Pill: Story = {\n  render: () => (\n    <Tabs defaultValue=\"all\">\n      <Tabs.List variant=\"pill\">\n        <Tabs.Trigger value=\"all\">All</Tabs.Trigger>\n        <Tabs.Trigger value=\"active\">Active</Tabs.Trigger>\n        <Tabs.Trigger value=\"archived\">Archived</Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"all\">All items</Tabs.Content>\n      <Tabs.Content value=\"active\">Active items</Tabs.Content>\n      <Tabs.Content value=\"archived\">Archived items</Tabs.Content>\n    </Tabs>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// With icons\n// ---------------------------------------------------------------------------\n\nexport const WithIcons: Story = {\n  render: () => (\n    <Tabs defaultValue=\"general\">\n      <Tabs.List variant=\"contained\">\n        <Tabs.Trigger value=\"general\" icon={SvgSettings}>\n          General\n        </Tabs.Trigger>\n        <Tabs.Trigger value=\"favorites\" icon={SvgStar}>\n          Favorites\n        </Tabs.Trigger>\n        <Tabs.Trigger value=\"sync\" icon={SvgRefreshCw}>\n          Sync\n        </Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"general\">General settings</Tabs.Content>\n      <Tabs.Content value=\"favorites\">Your favorites</Tabs.Content>\n      <Tabs.Content value=\"sync\">Sync configuration</Tabs.Content>\n    </Tabs>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Pill with right content\n// ---------------------------------------------------------------------------\n\nexport const PillWithRightContent: Story = {\n  render: () => (\n    <Tabs defaultValue=\"users\">\n      <Tabs.List\n        variant=\"pill\"\n        rightContent={\n          <button className=\"px-3 py-1 text-sm bg-background-tint-03 rounded-08\">\n            Add New\n          </button>\n        }\n      >\n        <Tabs.Trigger value=\"users\">Users</Tabs.Trigger>\n        <Tabs.Trigger value=\"groups\">Groups</Tabs.Trigger>\n        <Tabs.Trigger value=\"roles\">Roles</Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"users\">Users list</Tabs.Content>\n      <Tabs.Content value=\"groups\">Groups list</Tabs.Content>\n      <Tabs.Content value=\"roles\">Roles list</Tabs.Content>\n    </Tabs>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// With disabled and tooltip\n// ---------------------------------------------------------------------------\n\nexport const WithDisabledTab: Story = {\n  render: () => (\n    <Tabs defaultValue=\"active\">\n      <Tabs.List variant=\"contained\">\n        <Tabs.Trigger value=\"active\">Active</Tabs.Trigger>\n        <Tabs.Trigger value=\"pending\" disabled tooltip=\"Coming soon\">\n          Pending\n        </Tabs.Trigger>\n        <Tabs.Trigger value=\"completed\">Completed</Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"active\">Active tasks</Tabs.Content>\n      <Tabs.Content value=\"completed\">Completed tasks</Tabs.Content>\n    </Tabs>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// Loading state\n// ---------------------------------------------------------------------------\n\nexport const LoadingTab: Story = {\n  render: () => (\n    <Tabs defaultValue=\"data\">\n      <Tabs.List variant=\"pill\">\n        <Tabs.Trigger value=\"data\" isLoading>\n          Loading Data\n        </Tabs.Trigger>\n        <Tabs.Trigger value=\"ready\">Ready</Tabs.Trigger>\n      </Tabs.List>\n      <Tabs.Content value=\"data\">Data is loading...</Tabs.Content>\n      <Tabs.Content value=\"ready\">Ready content</Tabs.Content>\n    </Tabs>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/Tabs.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useRef,\n  useState,\n  useEffect,\n  useMemo,\n  useCallback,\n} from \"react\";\nimport * as TabsPrimitive from \"@radix-ui/react-tabs\";\nimport { cn, mergeRefs } from \"@/lib/utils\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { WithoutStyles } from \"@/types\";\nimport { Section, SectionProps } from \"@/layouts/general-layouts\";\nimport { IconProps } from \"@opal/types\";\nimport { SvgChevronLeft, SvgChevronRight } from \"@opal/icons\";\nimport Text from \"./texts/Text\";\nimport { Button } from \"@opal/components\";\n\n/* =============================================================================\n   CONTEXT\n   ============================================================================= */\n\ninterface TabsContextValue {\n  variant: \"contained\" | \"pill\";\n}\n\nconst TabsContext = React.createContext<TabsContextValue | undefined>(\n  undefined\n);\n\nconst useTabsContext = () => {\n  const context = React.useContext(TabsContext);\n  return context; // Returns undefined if used outside Tabs.List (allows explicit override)\n};\n\n/**\n * TABS COMPONENT VARIANTS\n *\n * Contained (default):\n * ┌─────────────────────────────────────────────────┐\n * │ ┌──────────┐ ╔══════════╗ ┌──────────┐          │\n * │ │   Tab 1  │ ║  Tab 2   ║ │   Tab 3  │          │  ← gray background\n * │ └──────────┘ ╚══════════╝ └──────────┘          │\n * └─────────────────────────────────────────────────┘\n *                 ↑ active tab (white bg, shadow)\n *\n * Pill:\n *    Tab 1      Tab 2      Tab 3          [Action]\n *              ╔═════╗\n *              ║     ║                        ↑ optional rightContent\n * ─────────────╨═════╨─────────────────────────────\n *              ↑ sliding indicator under active tab\n *\n * @example\n * <Tabs defaultValue=\"tab1\">\n *   <Tabs.List variant=\"pill\">\n *     <Tabs.Trigger value=\"tab1\">Overview</Tabs.Trigger>\n *     <Tabs.Trigger value=\"tab2\">Details</Tabs.Trigger>\n *   </Tabs.List>\n *   <Tabs.Content value=\"tab1\">Overview content</Tabs.Content>\n *   <Tabs.Content value=\"tab2\">Details content</Tabs.Content>\n * </Tabs>\n */\n\n/* =============================================================================\n   VARIANT STYLES\n   Centralized styling definitions for tabs variants.\n   ============================================================================= */\n\n/** Style classes for TabsList variants */\nconst listVariants = {\n  contained: \"grid w-full rounded-08 bg-background-tint-03\",\n  pill: \"relative flex w-full items-center pb-[5px] bg-background-tint-00 overflow-hidden\",\n} as const;\n\n/** Base style classes for TabsTrigger variants */\nconst triggerBaseStyles = {\n  contained: \"p-2 gap-2\",\n  pill: \"p-1 font-secondary-action transition-all duration-200 ease-out\",\n} as const;\n\n/** Icon style classes for TabsTrigger variants */\nconst iconVariants = {\n  contained: \"stroke-text-03\",\n  pill: \"stroke-current\",\n} as const;\n\n/* =============================================================================\n   CONSTANTS\n   ============================================================================= */\n\n/** Pixel tolerance for detecting scroll boundaries (accounts for rounding) */\nconst SCROLL_TOLERANCE_PX = 1;\n\n/** Pixel amount to scroll when clicking scroll arrows */\nconst SCROLL_AMOUNT_PX = 200;\n\n/* =============================================================================\n   HOOKS\n   ============================================================================= */\n\n/** Style properties for the pill indicator position */\ninterface IndicatorStyle {\n  left: number;\n  width: number;\n  opacity: number;\n}\n\n/**\n * Hook to track and animate a sliding indicator under the active tab.\n *\n * Uses MutationObserver to detect when the active tab changes (via data-state\n * attribute updates from Radix UI) and calculates the indicator position.\n *\n * @param listRef - Ref to the TabsList container element\n * @param enabled - Whether indicator tracking is enabled (only true for pill variant)\n * @returns Style object with left, width, and opacity for the indicator element\n */\nfunction usePillIndicator(\n  listRef: React.RefObject<HTMLElement | null>,\n  enabled: boolean,\n  scrollContainerRef?: React.RefObject<HTMLElement | null>\n): { style: IndicatorStyle; isScrolling: boolean } {\n  const [style, setStyle] = useState<IndicatorStyle>({\n    left: 0,\n    width: 0,\n    opacity: 0,\n  });\n  const [isScrolling, setIsScrolling] = useState(false);\n  const scrollTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n\n  useEffect(() => {\n    if (!enabled) return;\n\n    const list = listRef.current;\n    if (!list) return;\n\n    const updateIndicator = () => {\n      const activeTab = list.querySelector<HTMLElement>(\n        '[data-state=\"active\"]'\n      );\n      if (activeTab) {\n        const listRect = list.getBoundingClientRect();\n        const tabRect = activeTab.getBoundingClientRect();\n        setStyle({\n          left: tabRect.left - listRect.left,\n          width: tabRect.width,\n          opacity: 1,\n        });\n      }\n    };\n\n    const handleScroll = () => {\n      setIsScrolling(true);\n      updateIndicator();\n\n      // Clear existing timeout\n      if (scrollTimeoutRef.current) {\n        clearTimeout(scrollTimeoutRef.current);\n      }\n      // Reset scrolling state after scroll ends\n      scrollTimeoutRef.current = setTimeout(() => {\n        setIsScrolling(false);\n      }, 150);\n    };\n\n    updateIndicator();\n\n    // Watch for size changes on ANY tab (sibling size changes affect active tab position)\n    const resizeObserver = new ResizeObserver(() => updateIndicator());\n    list.querySelectorAll<HTMLElement>('[role=\"tab\"]').forEach((tab) => {\n      resizeObserver.observe(tab);\n    });\n\n    // Watch for data-state changes (tab switches)\n    const mutationObserver = new MutationObserver(() => updateIndicator());\n    mutationObserver.observe(list, {\n      attributes: true,\n      subtree: true,\n      attributeFilter: [\"data-state\"],\n    });\n\n    // Listen for scroll events on scroll container\n    const scrollContainer = scrollContainerRef?.current;\n    if (scrollContainer) {\n      scrollContainer.addEventListener(\"scroll\", handleScroll);\n    }\n\n    return () => {\n      mutationObserver.disconnect();\n      resizeObserver.disconnect();\n      if (scrollContainer) {\n        scrollContainer.removeEventListener(\"scroll\", handleScroll);\n      }\n      if (scrollTimeoutRef.current) {\n        clearTimeout(scrollTimeoutRef.current);\n      }\n    };\n  }, [enabled, listRef, scrollContainerRef]);\n\n  return { style, isScrolling };\n}\n\n/** State for horizontal scroll arrows */\ninterface ScrollState {\n  canScrollLeft: boolean;\n  canScrollRight: boolean;\n  scrollLeft: () => void;\n  scrollRight: () => void;\n}\n\n/**\n * Hook to manage horizontal scrolling with arrow navigation.\n *\n * Tracks scroll position and overflow state of a container, providing\n * scroll functions and boolean flags for arrow visibility.\n *\n * @param containerRef - Ref to the scrollable container element\n * @param enabled - Whether scroll tracking is enabled\n * @returns Object with canScrollLeft, canScrollRight, and scroll functions\n */\nfunction useHorizontalScroll(\n  containerRef: React.RefObject<HTMLElement | null>,\n  enabled: boolean\n): ScrollState {\n  const [canScrollLeft, setCanScrollLeft] = useState(false);\n  const [canScrollRight, setCanScrollRight] = useState(false);\n\n  const updateScrollState = useCallback(() => {\n    const container = containerRef.current;\n    if (!container) return;\n\n    const { scrollLeft, scrollWidth, clientWidth } = container;\n    setCanScrollLeft(scrollLeft > 0);\n    setCanScrollRight(\n      scrollLeft + clientWidth < scrollWidth - SCROLL_TOLERANCE_PX\n    );\n  }, [containerRef]);\n\n  useEffect(() => {\n    if (!enabled) return;\n\n    const container = containerRef.current;\n    if (!container) return;\n\n    // Delay initial measurement until after layout\n    const rafId = requestAnimationFrame(() => {\n      updateScrollState();\n    });\n\n    container.addEventListener(\"scroll\", updateScrollState);\n\n    const resizeObserver = new ResizeObserver(updateScrollState);\n    resizeObserver.observe(container);\n\n    // Also observe children for size changes\n    Array.from(container.children).forEach((child) => {\n      resizeObserver.observe(child);\n    });\n\n    return () => {\n      cancelAnimationFrame(rafId);\n      container.removeEventListener(\"scroll\", updateScrollState);\n      resizeObserver.disconnect();\n    };\n  }, [enabled, containerRef, updateScrollState]);\n\n  const scrollLeft = useCallback(() => {\n    containerRef.current?.scrollBy({\n      left: -SCROLL_AMOUNT_PX,\n      behavior: \"smooth\",\n    });\n  }, [containerRef]);\n\n  const scrollRight = useCallback(() => {\n    containerRef.current?.scrollBy({\n      left: SCROLL_AMOUNT_PX,\n      behavior: \"smooth\",\n    });\n  }, [containerRef]);\n\n  return { canScrollLeft, canScrollRight, scrollLeft, scrollRight };\n}\n\n/* =============================================================================\n   SUB-COMPONENTS\n   ============================================================================= */\n\n/**\n * Renders the bottom line and sliding indicator for the pill variant.\n * The indicator animates smoothly when switching between tabs.\n *\n * @param style - Position and opacity for the sliding indicator\n * @param rightOffset - Distance from the right edge where the border line should stop (for rightContent)\n */\nfunction PillIndicator({\n  style,\n  rightOffset = 0,\n}: {\n  style: IndicatorStyle;\n  rightOffset?: number;\n}) {\n  return (\n    <>\n      <div\n        className=\"absolute bottom-0 left-0 h-px bg-border-02 pointer-events-none\"\n        style={{ right: rightOffset }}\n      />\n      <div\n        className=\"absolute bottom-0 h-[2px] bg-background-tint-inverted-03 z-10 pointer-events-none transition-all duration-200 ease-out\"\n        style={{\n          left: style.left,\n          width: style.width,\n          opacity: style.opacity,\n        }}\n      />\n    </>\n  );\n}\n\n/* =============================================================================\n   MAIN COMPONENTS\n   ============================================================================= */\n\n/**\n * Tabs Root Component\n *\n * Container for tab navigation and content. Manages the active tab state.\n * Supports both controlled and uncontrolled modes.\n *\n * @param defaultValue - The tab value that should be active by default (uncontrolled mode)\n * @param value - The controlled active tab value\n * @param onValueChange - Callback fired when the active tab changes\n */\nconst TabsRoot = React.forwardRef<\n  React.ElementRef<typeof TabsPrimitive.Root>,\n  WithoutStyles<React.ComponentPropsWithoutRef<typeof TabsPrimitive.Root>>\n>(({ ...props }, ref) => (\n  <TabsPrimitive.Root ref={ref} className=\"w-full\" {...props} />\n));\nTabsRoot.displayName = TabsPrimitive.Root.displayName;\n\n/* -------------------------------------------------------------------------- */\n\n/**\n * Tabs List Props\n */\ninterface TabsListProps\n  extends Omit<\n    React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>,\n    \"style\"\n  > {\n  /**\n   * Visual variant of the tabs list.\n   *\n   * - `contained` (default): Rounded background with equal-width tabs in a grid.\n   *   Best for primary navigation where tabs should fill available space.\n   *\n   * - `pill`: Transparent background with a sliding underline indicator.\n   *   Best for secondary navigation or filter-style tabs with flexible widths.\n   */\n  variant?: \"contained\" | \"pill\";\n\n  /**\n   * Content to render on the right side of the tab list.\n   * Only applies to the `pill` variant (ignored for `contained`).\n   *\n   * @example\n   * ```tsx\n   * <Tabs.List variant=\"pill\" rightContent={<Button size=\"sm\">Add New</Button>}>\n   *   <Tabs.Trigger value=\"all\">All</Tabs.Trigger>\n   *   <Tabs.Trigger value=\"active\">Active</Tabs.Trigger>\n   * </Tabs.List>\n   * ```\n   */\n  rightContent?: React.ReactNode;\n\n  /**\n   * Enable horizontal scroll arrows when tabs overflow.\n   * Only applies to the `pill` variant.\n   * @default false\n   */\n  enableScrollArrows?: boolean;\n}\n\n/**\n * Tabs List Component\n *\n * Container for tab triggers. Renders as a horizontal list with automatic\n * keyboard navigation (arrow keys, Home/End) and accessibility attributes.\n *\n * @remarks\n * - **Contained**: Uses CSS Grid for equal-width tabs with rounded background\n * - **Pill**: Uses Flexbox for content-width tabs with animated bottom indicator\n * - The `variant` prop is automatically propagated to child `Tabs.Trigger` components via context\n */\nconst TabsList = React.forwardRef<\n  React.ElementRef<typeof TabsPrimitive.List>,\n  TabsListProps\n>(\n  (\n    {\n      variant = \"contained\",\n      rightContent,\n      enableScrollArrows = false,\n      children,\n      className,\n      ...props\n    },\n    ref\n  ) => {\n    const listRef = useRef<HTMLDivElement>(null);\n    const tabsContainerRef = useRef<HTMLDivElement>(null);\n    const scrollArrowsRef = useRef<HTMLDivElement>(null);\n    const rightContentRef = useRef<HTMLDivElement>(null);\n    const [rightOffset, setRightOffset] = useState(0);\n    const isPill = variant === \"pill\";\n    const { style: indicatorStyle } = usePillIndicator(\n      listRef,\n      isPill,\n      enableScrollArrows ? tabsContainerRef : undefined\n    );\n    const contextValue = useMemo(() => ({ variant }), [variant]);\n    const {\n      canScrollLeft,\n      canScrollRight,\n      scrollLeft: handleScrollLeft,\n      scrollRight: handleScrollRight,\n    } = useHorizontalScroll(tabsContainerRef, isPill && enableScrollArrows);\n\n    const showScrollArrows =\n      isPill && enableScrollArrows && (canScrollLeft || canScrollRight);\n\n    // Track right content and scroll arrows width to offset the border line\n    useEffect(() => {\n      if (!isPill) {\n        setRightOffset(0);\n        return;\n      }\n\n      const updateWidth = () => {\n        let totalWidth = 0;\n\n        // Add scroll arrows width if visible\n        if (scrollArrowsRef.current) {\n          totalWidth += scrollArrowsRef.current.offsetWidth;\n        }\n\n        // Add right content width if present\n        if (rightContentRef.current) {\n          totalWidth += rightContentRef.current.offsetWidth;\n        }\n\n        setRightOffset(totalWidth);\n      };\n\n      updateWidth();\n\n      const resizeObserver = new ResizeObserver(updateWidth);\n      if (scrollArrowsRef.current)\n        resizeObserver.observe(scrollArrowsRef.current);\n      if (rightContentRef.current)\n        resizeObserver.observe(rightContentRef.current);\n\n      return () => resizeObserver.disconnect();\n    }, [isPill, rightContent, showScrollArrows]);\n\n    return (\n      <TabsPrimitive.List\n        ref={mergeRefs(listRef, ref)}\n        className={cn(listVariants[variant], className)}\n        style={\n          variant === \"contained\"\n            ? {\n                gridTemplateColumns: `repeat(${React.Children.count(\n                  children\n                )}, 1fr)`,\n              }\n            : undefined\n        }\n        {...props}\n      >\n        <TabsContext.Provider value={contextValue}>\n          {isPill ? (\n            enableScrollArrows ? (\n              <div\n                ref={tabsContainerRef}\n                className=\"flex items-center gap-2 overflow-x-auto scrollbar-hide flex-1 min-w-0\"\n                style={{ scrollbarWidth: \"none\", msOverflowStyle: \"none\" }}\n              >\n                {children}\n              </div>\n            ) : (\n              <div className=\"flex items-center gap-2 pt-1\">{children}</div>\n            )\n          ) : (\n            children\n          )}\n\n          {showScrollArrows && (\n            <div\n              ref={scrollArrowsRef}\n              className=\"flex items-center gap-1 pl-2 flex-shrink-0\"\n            >\n              <Button\n                disabled={!canScrollLeft}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                icon={SvgChevronLeft}\n                onClick={handleScrollLeft}\n                tooltip=\"Scroll tabs left\"\n              />\n              <Button\n                disabled={!canScrollRight}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                icon={SvgChevronRight}\n                onClick={handleScrollRight}\n                tooltip=\"Scroll tabs right\"\n              />\n            </div>\n          )}\n\n          {isPill && rightContent && (\n            <div ref={rightContentRef} className=\"ml-auto flex-shrink-0\">\n              {rightContent}\n            </div>\n          )}\n\n          {isPill && (\n            <PillIndicator style={indicatorStyle} rightOffset={rightOffset} />\n          )}\n        </TabsContext.Provider>\n      </TabsPrimitive.List>\n    );\n  }\n);\nTabsList.displayName = TabsPrimitive.List.displayName;\n\n/* -------------------------------------------------------------------------- */\n\n/**\n * Tabs Trigger Props\n */\ninterface TabsTriggerProps\n  extends WithoutStyles<\n    Omit<\n      React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>,\n      \"children\"\n    >\n  > {\n  /**\n   * Visual variant of the tab trigger.\n   * Automatically inherited from the parent `Tabs.List` variant via context.\n   * Can be explicitly set to override the inherited value.\n   *\n   * - `contained` (default): White background with shadow when active\n   * - `pill`: Dark pill background when active, transparent when inactive\n   */\n  variant?: \"contained\" | \"pill\";\n\n  /** Optional tooltip text to display on hover */\n  tooltip?: string;\n\n  /** Side where tooltip appears. @default \"top\" */\n  tooltipSide?: \"top\" | \"bottom\" | \"left\" | \"right\";\n\n  /** Optional icon component to render before the label */\n  icon?: React.FunctionComponent<IconProps>;\n\n  /** Tab label - can be string or ReactNode for custom content */\n  children?: React.ReactNode;\n\n  /** Show loading spinner after label */\n  isLoading?: boolean;\n}\n\n/**\n * Tabs Trigger Component\n *\n * Individual tab button that switches the active tab when clicked.\n * Supports icons, tooltips, loading states, and disabled state.\n *\n * @remarks\n * - **Contained active**: White background with subtle shadow\n * - **Pill active**: Dark inverted background\n * - Tooltips work on disabled triggers via wrapper span technique\n * - Loading spinner appears after the label text\n */\nconst TabsTrigger = React.forwardRef<\n  React.ElementRef<typeof TabsPrimitive.Trigger>,\n  TabsTriggerProps\n>(\n  (\n    {\n      variant: variantProp,\n      tooltip,\n      tooltipSide = \"top\",\n      icon: Icon,\n      children,\n      disabled,\n      isLoading,\n      ...props\n    },\n    ref\n  ) => {\n    const context = useTabsContext();\n    const variant = variantProp ?? context?.variant ?? \"contained\";\n\n    const inner = (\n      <>\n        {Icon && (\n          <div className=\"p-0.5\">\n            <Icon size={14} className={cn(iconVariants[variant])} />\n          </div>\n        )}\n        {typeof children === \"string\" ? (\n          <div className=\"px-0.5\">\n            <Text>{children}</Text>\n          </div>\n        ) : (\n          children\n        )}\n        {isLoading && (\n          <span\n            className=\"inline-block w-3 h-3 border-2 border-current border-t-transparent rounded-full animate-spin ml-1\"\n            aria-label=\"Loading\"\n          />\n        )}\n      </>\n    );\n\n    const trigger = (\n      <TabsPrimitive.Trigger\n        ref={ref}\n        disabled={disabled}\n        className={cn(\n          \"inline-flex items-center justify-center whitespace-nowrap rounded-08\",\n          triggerBaseStyles[variant],\n          variant === \"contained\" && [\n            \"data-[state=active]:bg-background-neutral-00\",\n            \"data-[state=active]:text-text-04\",\n            \"data-[state=active]:shadow-01\",\n            \"data-[state=active]:border\",\n            \"data-[state=active]:border-border-01\",\n          ],\n          variant === \"pill\" && [\n            \"data-[state=active]:bg-background-tint-inverted-03\",\n            \"data-[state=active]:text-text-inverted-05\",\n          ],\n          variant === \"contained\" && [\n            \"data-[state=inactive]:text-text-03\",\n            \"data-[state=inactive]:bg-transparent\",\n            \"data-[state=inactive]:border\",\n            \"data-[state=inactive]:border-transparent\",\n          ],\n          variant === \"pill\" && [\n            \"data-[state=inactive]:bg-background-tint-00\",\n            \"data-[state=inactive]:text-text-03\",\n          ]\n        )}\n        {...props}\n      >\n        {tooltip && !disabled ? (\n          <SimpleTooltip tooltip={tooltip} side={tooltipSide}>\n            <span className=\"inline-flex items-center gap-inherit\">\n              {inner}\n            </span>\n          </SimpleTooltip>\n        ) : (\n          inner\n        )}\n      </TabsPrimitive.Trigger>\n    );\n\n    // Disabled native buttons don't emit pointer/focus events, so tooltips\n    // inside them won't trigger. Wrap the entire trigger with a neutral span\n    // only when disabled so layout stays unchanged for the enabled case.\n    if (tooltip && disabled) {\n      return (\n        <SimpleTooltip tooltip={tooltip} side={tooltipSide}>\n          <span className=\"flex-1 inline-flex align-middle justify-center\">\n            {trigger}\n          </span>\n        </SimpleTooltip>\n      );\n    }\n\n    return trigger;\n  }\n);\nTabsTrigger.displayName = TabsPrimitive.Trigger.displayName;\n\n/* -------------------------------------------------------------------------- */\n\n/**\n * Tabs Content Component\n *\n * Container for the content associated with each tab.\n * Only the content for the active tab is rendered and visible.\n *\n * @param value - The tab value this content is associated with (must match a Tabs.Trigger value)\n */\nconst TabsContent = React.forwardRef<\n  React.ElementRef<typeof TabsPrimitive.Content>,\n  SectionProps & { value: string }\n>(({ children, value, ...props }, ref) => (\n  <TabsPrimitive.Content\n    ref={ref}\n    value={value}\n    className=\"pt-4 focus:outline-none focus:border-theme-primary-05 w-full\"\n  >\n    <Section padding={0} {...props}>\n      {children}\n    </Section>\n  </TabsPrimitive.Content>\n));\nTabsContent.displayName = TabsPrimitive.Content.displayName;\n\n/* =============================================================================\n   EXPORTS\n   ============================================================================= */\n\nexport default Object.assign(TabsRoot, {\n  List: TabsList,\n  Trigger: TabsTrigger,\n  Content: TabsContent,\n});\n"
  },
  {
    "path": "web/src/refresh-components/TextSeparator.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport TextSeparator from \"./TextSeparator\";\n\nconst meta: Meta<typeof TextSeparator> = {\n  title: \"refresh-components/TextSeparator\",\n  component: TextSeparator,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"padded\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof TextSeparator>;\n\nexport const TextOnly: Story = {\n  args: {\n    text: \"Older messages\",\n  },\n};\n\nexport const WithCount: Story = {\n  args: {\n    text: \"results\",\n    count: 42,\n  },\n};\n\nexport const InContext: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-2\" style={{ width: 400 }}>\n      <div className=\"p-2 bg-background-tint-01 rounded-08\">Message 1</div>\n      <div className=\"p-2 bg-background-tint-01 rounded-08\">Message 2</div>\n      <TextSeparator text=\"older messages\" count={15} />\n      <div className=\"p-2 bg-background-tint-01 rounded-08\">Message 3</div>\n      <div className=\"p-2 bg-background-tint-01 rounded-08\">Message 4</div>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/TextSeparator.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport interface TextSeparatorProps {\n  count?: number;\n  text: string;\n  className?: string;\n}\n\nexport default function TextSeparator({\n  count,\n  text,\n  className,\n}: TextSeparatorProps) {\n  return (\n    <div\n      className={cn(\"flex flex-row items-center w-full gap-2 px-4\", className)}\n    >\n      <div className=\"flex-1 h-px bg-border\" />\n      <div className=\"flex flex-row items-center gap-1 flex-shrink-0\">\n        {count !== undefined && (\n          <Text as=\"p\" secondaryBody text03>\n            {count}\n          </Text>\n        )}\n        <Text as=\"p\" secondaryBody text03>\n          {text}\n        </Text>\n      </div>\n      <div className=\"flex-1 h-px bg-border\" />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/avatars/AgentAvatar.tsx",
    "content": "\"use client\";\n\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { buildImgUrl } from \"@/app/app/components/files/images/utils\";\nimport { OnyxIcon } from \"@/components/icons/icons\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { DEFAULT_AVATAR_SIZE_PX, DEFAULT_AGENT_ID } from \"@/lib/constants\";\nimport CustomAgentAvatar from \"@/refresh-components/avatars/CustomAgentAvatar\";\nimport Image from \"next/image\";\n\nexport interface AgentAvatarProps {\n  agent: MinimalPersonaSnapshot;\n  size?: number;\n}\n\nexport default function AgentAvatar({\n  agent,\n  size = DEFAULT_AVATAR_SIZE_PX,\n  ...props\n}: AgentAvatarProps) {\n  const settings = useSettingsContext();\n\n  if (agent.id === DEFAULT_AGENT_ID) {\n    return settings.enterpriseSettings?.use_custom_logo ? (\n      <div\n        className=\"aspect-square rounded-full overflow-hidden relative\"\n        style={{ height: size, width: size }}\n      >\n        <Image\n          alt=\"Logo\"\n          src=\"/api/enterprise-settings/logo\"\n          fill\n          className=\"object-cover object-center\"\n          sizes={`${size}px`}\n        />\n      </div>\n    ) : (\n      <OnyxIcon size={size} className=\"shrink-0\" />\n    );\n  }\n\n  return (\n    <CustomAgentAvatar\n      name={agent.name}\n      src={\n        agent.uploaded_image_id\n          ? buildImgUrl(agent.uploaded_image_id)\n          : undefined\n      }\n      iconName={agent.icon_name}\n      size={size}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/avatars/CustomAgentAvatar.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport CustomAgentAvatar from \"./CustomAgentAvatar\";\n\nconst meta: Meta<typeof CustomAgentAvatar> = {\n  title: \"refresh-components/Avatars/CustomAgentAvatar\",\n  component: CustomAgentAvatar,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"centered\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof CustomAgentAvatar>;\n\n// ---------------------------------------------------------------------------\n// Default — falls back to letter from name\n// ---------------------------------------------------------------------------\n\nexport const WithName: Story = {\n  args: {\n    name: \"Research Assistant\",\n    size: 40,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// Icon variants\n// ---------------------------------------------------------------------------\n\nexport const WithIconSearch: Story = {\n  args: {\n    name: \"Search Agent\",\n    iconName: \"Search\",\n    size: 40,\n  },\n};\n\nexport const WithIconTerminal: Story = {\n  args: {\n    name: \"Code Agent\",\n    iconName: \"Terminal\",\n    size: 40,\n  },\n};\n\nexport const WithIconPen: Story = {\n  args: {\n    name: \"Writer Agent\",\n    iconName: \"Pen\",\n    size: 40,\n  },\n};\n\nexport const WithIconBarChart: Story = {\n  args: {\n    name: \"Analytics Agent\",\n    iconName: \"BarChart\",\n    size: 40,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// Fallback — no name, no icon\n// ---------------------------------------------------------------------------\n\nexport const NoNameNoIcon: Story = {\n  args: {\n    size: 40,\n  },\n};\n\n// ---------------------------------------------------------------------------\n// Sizes\n// ---------------------------------------------------------------------------\n\nexport const Small: Story = {\n  args: {\n    name: \"Tiny\",\n    iconName: \"Info\",\n    size: 24,\n  },\n};\n\nexport const Large: Story = {\n  args: {\n    name: \"Big Agent\",\n    iconName: \"BooksStack\",\n    size: 64,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/avatars/CustomAgentAvatar.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Image from \"next/image\";\nimport { DEFAULT_AVATAR_SIZE_PX } from \"@/lib/constants\";\nimport {\n  SvgActivitySmall,\n  SvgAudioEqSmall,\n  SvgBarChartSmall,\n  SvgBooksLineSmall,\n  SvgBooksStackSmall,\n  SvgCheckSmall,\n  SvgClockHandsSmall,\n  SvgFileSmall,\n  SvgHashSmall,\n  SvgImageSmall,\n  SvgInfoSmall,\n  SvgMusicSmall,\n  SvgOnyxOctagon,\n  SvgPenSmall,\n  SvgQuestionMarkSmall,\n  SvgSearchSmall,\n  SvgSlidersSmall,\n  SvgTerminalSmall,\n  SvgTextLinesSmall,\n  SvgTwoLineSmall,\n} from \"@opal/icons\";\n\ninterface IconConfig {\n  Icon: React.FunctionComponent<IconProps>;\n  className?: string;\n}\n\nexport const agentAvatarIconMap: Record<string, IconConfig> = {\n  Info: { Icon: SvgInfoSmall, className: \"stroke-theme-primary-05\" },\n  QuestionMark: {\n    Icon: SvgQuestionMarkSmall,\n    className: \"stroke-theme-primary-05\",\n  },\n\n  // blue\n  TextLines: { Icon: SvgTextLinesSmall, className: \"stroke-theme-blue-05\" },\n  Pen: { Icon: SvgPenSmall, className: \"stroke-theme-blue-05\" },\n  ClockHands: { Icon: SvgClockHandsSmall, className: \"stroke-theme-blue-05\" },\n  Hash: { Icon: SvgHashSmall, className: \"stroke-theme-blue-05\" },\n\n  // green\n  Search: { Icon: SvgSearchSmall, className: \"stroke-theme-green-05\" },\n  Check: { Icon: SvgCheckSmall, className: \"stroke-theme-green-05\" },\n  BarChart: { Icon: SvgBarChartSmall, className: \"stroke-theme-green-05\" },\n  Activity: { Icon: SvgActivitySmall, className: \"stroke-theme-green-05\" },\n\n  // purple\n  File: { Icon: SvgFileSmall, className: \"stroke-theme-purple-05\" },\n  Image: { Icon: SvgImageSmall, className: \"stroke-theme-purple-05\" },\n  BooksStack: { Icon: SvgBooksStackSmall, className: \"stroke-theme-purple-05\" },\n  BooksLine: { Icon: SvgBooksLineSmall, className: \"stroke-theme-purple-05\" },\n\n  // orange\n  Terminal: { Icon: SvgTerminalSmall, className: \"stroke-theme-orange-04\" },\n  Sliders: { Icon: SvgSlidersSmall, className: \"stroke-theme-orange-04\" },\n\n  // amber\n  AudioEq: { Icon: SvgAudioEqSmall, className: \"stroke-theme-amber-04\" },\n  Music: { Icon: SvgMusicSmall, className: \"stroke-theme-amber-04\" },\n};\n\ninterface SvgOctagonWrapperProps {\n  size: number;\n  children: React.ReactNode;\n}\n\nfunction SvgOctagonWrapper({ size, children }: SvgOctagonWrapperProps) {\n  return (\n    <div className=\"relative flex flex-col items-center justify-center\">\n      <div className=\"absolute inset-0 flex items-center justify-center\">\n        {children}\n      </div>\n      <SvgOnyxOctagon className=\"stroke-text-04\" height={size} width={size} />\n    </div>\n  );\n}\n\nexport interface CustomAgentAvatarProps {\n  name?: string;\n  src?: string;\n  iconName?: string;\n\n  size?: number;\n}\n\nexport default function CustomAgentAvatar({\n  name,\n  src,\n  iconName,\n\n  size = DEFAULT_AVATAR_SIZE_PX,\n}: CustomAgentAvatarProps) {\n  if (src) {\n    return (\n      <div\n        className=\"aspect-square rounded-full overflow-hidden relative\"\n        style={{ height: size, width: size }}\n      >\n        <Image\n          alt={name || \"Agent avatar\"}\n          src={src}\n          fill\n          className=\"object-cover object-center\"\n          sizes={`${size}px`}\n        />\n      </div>\n    );\n  }\n\n  const iconConfig = iconName && agentAvatarIconMap[iconName];\n  if (iconConfig) {\n    const { Icon, className } = iconConfig;\n    const multiplier = 0.7;\n    return (\n      <SvgOctagonWrapper size={size}>\n        <Icon\n          className={cn(\"stroke-text-04\", className)}\n          style={{ width: size * multiplier, height: size * multiplier }}\n        />\n      </SvgOctagonWrapper>\n    );\n  }\n\n  // Display first letter of name if available, otherwise fall back to two-line-small icon\n  const trimmedName = name?.trim();\n  const firstLetter =\n    trimmedName && trimmedName.length > 0\n      ? trimmedName[0]!.toUpperCase()\n      : undefined;\n  const validFirstLetter = !!firstLetter && /^[a-zA-Z]$/.test(firstLetter);\n  if (validFirstLetter) {\n    return (\n      <SvgOctagonWrapper size={size}>\n        <Text style={{ fontSize: size * 0.5 }}>{firstLetter}</Text>\n      </SvgOctagonWrapper>\n    );\n  }\n\n  return (\n    <SvgOctagonWrapper size={size}>\n      <SvgTwoLineSmall\n        className=\"stroke-text-04\"\n        style={{ width: size * 0.8, height: size * 0.8 }}\n      />\n    </SvgOctagonWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/avatars/UserAvatar.tsx",
    "content": "import { SvgUser } from \"@opal/icons\";\nimport { DEFAULT_AVATAR_SIZE_PX } from \"@/lib/constants\";\nimport { getUserEmail, getUserInitials } from \"@/lib/user\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport type { User } from \"@/lib/types\";\n\nexport interface UserAvatarProps {\n  user: User | null;\n  size?: number;\n}\n\nexport default function UserAvatar({\n  user,\n  size = DEFAULT_AVATAR_SIZE_PX,\n}: UserAvatarProps) {\n  const userEmail = getUserEmail(user);\n  const userInitials = getUserInitials(\n    user?.personalization?.name ?? null,\n    userEmail\n  );\n\n  if (!userInitials) {\n    return (\n      <div\n        role=\"img\"\n        aria-label={`${userEmail} avatar`}\n        className=\"flex items-center justify-center rounded-full bg-background-tint-01\"\n        style={{ width: size, height: size }}\n      >\n        <SvgUser size={size * 0.55} className=\"stroke-text-03\" aria-hidden />\n      </div>\n    );\n  }\n\n  return (\n    <div\n      role=\"img\"\n      aria-label={`${userEmail} avatar`}\n      className=\"flex items-center justify-center rounded-full bg-background-neutral-inverted-00\"\n      style={{ width: size, height: size }}\n    >\n      <Text\n        inverted\n        secondaryAction\n        text05\n        className=\"select-none\"\n        style={{ fontSize: size * 0.4 }}\n      >\n        {userInitials}\n      </Text>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/AttachmentButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport AttachmentButton from \"./AttachmentButton\";\nimport { SvgTextLines, SvgTrash, SvgFiles } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof AttachmentButton> = {\n  title: \"refresh-components/buttons/AttachmentButton\",\n  component: AttachmentButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof AttachmentButton>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgTextLines,\n    children: \"Project Proposal\",\n    description: \"document.pdf\",\n    rightText: \"2.4 MB\",\n  },\n};\n\nexport const Selected: Story = {\n  args: {\n    icon: SvgTextLines,\n    children: \"Project Proposal\",\n    description: \"document.pdf\",\n    rightText: \"2.4 MB\",\n    selected: true,\n  },\n};\n\nexport const Processing: Story = {\n  args: {\n    icon: SvgTextLines,\n    children: \"Project Proposal\",\n    description: \"Uploading...\",\n    rightText: \"45%\",\n    processing: true,\n  },\n};\n\nexport const WithViewButton: Story = {\n  args: {\n    icon: SvgTextLines,\n    children: \"Project Proposal\",\n    description: \"document.pdf\",\n    rightText: \"2.4 MB\",\n    onView: () => {},\n  },\n};\n\nexport const WithActionButton: Story = {\n  args: {\n    icon: SvgTextLines,\n    children: \"Project Proposal\",\n    description: \"document.pdf\",\n    rightText: \"2.4 MB\",\n    actionIcon: SvgTrash,\n    onAction: () => {},\n  },\n};\n\nexport const FileList: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 4 }}>\n      <AttachmentButton\n        icon={SvgTextLines}\n        description=\"proposal.pdf\"\n        rightText=\"2.4 MB\"\n        onView={() => {}}\n      >\n        Project Proposal\n      </AttachmentButton>\n      <AttachmentButton\n        icon={SvgFiles}\n        description=\"report.xlsx\"\n        rightText=\"1.1 MB\"\n        selected\n      >\n        Quarterly Report\n      </AttachmentButton>\n      <AttachmentButton\n        icon={SvgTextLines}\n        description=\"Uploading...\"\n        rightText=\"72%\"\n        processing\n      >\n        Meeting Notes\n      </AttachmentButton>\n      <AttachmentButton\n        icon={SvgFiles}\n        description=\"readme.md\"\n        rightText=\"4 KB\"\n        actionIcon={SvgTrash}\n        onAction={() => {}}\n      >\n        README\n      </AttachmentButton>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/AttachmentButton.tsx",
    "content": "/**\n * AttachmentButton - A button component for displaying file attachments or similar items\n *\n * Displays an attachment item with an icon, title, description, metadata text,\n * and optional action buttons. Commonly used for file lists, attachment pickers,\n * and similar UI patterns where items can be viewed or acted upon.\n *\n * Features:\n * - Three visual states: default, selected (shows checkbox), processing\n * - Left icon that changes to checkbox when selected\n * - Truncated title and description text\n * - Right-aligned metadata text (e.g., file size, date)\n * - Optional view button (external link icon) that appears on hover\n * - Optional action button (custom icon) that appears on hover\n * - Full-width button with hover states\n * - Prevents event bubbling for nested action buttons\n *\n * @example\n * ```tsx\n * import AttachmentButton from \"@/refresh-components/buttons/AttachmentButton\";\n * import { SvgFileText, SvgTrash } from \"@opal/icons\";\n *\n * // Basic attachment\n * <AttachmentButton\n *   icon={SvgFileText}\n *   description=\"document.pdf\"\n *   rightText=\"2.4 MB\"\n * >\n *   Project Proposal\n * </AttachmentButton>\n *\n * // Selected state with view button\n * <AttachmentButton\n *   icon={SvgFileText}\n *   selected\n *   description=\"document.pdf\"\n *   rightText=\"2.4 MB\"\n *   onView={() => window.open('/view/doc')}\n * >\n *   Project Proposal\n * </AttachmentButton>\n *\n * // With action button (delete)\n * <AttachmentButton\n *   icon={SvgFileText}\n *   description=\"document.pdf\"\n *   rightText=\"2.4 MB\"\n *   actionIcon={SvgTrash}\n *   onAction={() => handleDelete()}\n * >\n *   Project Proposal\n * </AttachmentButton>\n *\n * // Processing state\n * <AttachmentButton\n *   icon={SvgFileText}\n *   processing\n *   description=\"Uploading...\"\n *   rightText=\"45%\"\n * >\n *   Project Proposal\n * </AttachmentButton>\n * ```\n */\n\nimport React from \"react\";\nimport { noProp } from \"@/lib/utils\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport type { IconProps } from \"@opal/types\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport { SvgExternalLink } from \"@opal/icons\";\nimport { WithoutStyles } from \"@/types\";\n\nexport interface AttachmentProps\n  extends WithoutStyles<React.ButtonHTMLAttributes<HTMLButtonElement>> {\n  selected?: boolean;\n  processing?: boolean;\n\n  icon: React.FunctionComponent<IconProps>;\n  children: string;\n  description?: string;\n  rightText?: string;\n  onView?: () => void;\n\n  // Action button: An optional secondary action button that appears on hover.\n  // Commonly used for actions like delete, download, or remove.\n  // Both `actionIcon` and `onAction` must be provided for the button to appear.\n  actionIcon?: React.FunctionComponent<IconProps>;\n  onAction?: () => void;\n}\n\nexport default function AttachmentButton({\n  selected,\n  processing,\n  icon: Icon,\n  children,\n  description,\n  rightText,\n  onView,\n  actionIcon,\n  onAction,\n  ...props\n}: AttachmentProps) {\n  const state = selected ? \"selected\" : processing ? \"processing\" : \"default\";\n\n  return (\n    <button\n      type=\"button\"\n      className=\"attachment-button\"\n      data-state={state}\n      {...props}\n    >\n      <div className=\"attachment-button__content\">\n        <div className=\"attachment-button__icon-wrapper\">\n          {selected ? (\n            <Checkbox checked />\n          ) : (\n            <Icon className=\"attachment-button__icon\" />\n          )}\n        </div>\n        <div className=\"attachment-button__text-container\">\n          <div className=\"attachment-button__title-row\">\n            <div className=\"attachment-button__title-wrapper\">\n              <Truncated mainUiMuted text04 nowrap>\n                {children}\n              </Truncated>\n            </div>\n            {onView && (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <IconButton\n                icon={SvgExternalLink}\n                onClick={noProp(onView)}\n                internal\n                className=\"attachment-button__view-button\"\n              />\n            )}\n          </div>\n          {description && (\n            <Truncated secondaryBody text03 className=\"w-full\">\n              {description}\n            </Truncated>\n          )}\n        </div>\n      </div>\n\n      <div className=\"attachment-button__actions\">\n        {rightText && (\n          <Text as=\"p\" secondaryBody text03>\n            {rightText}\n          </Text>\n        )}\n        {actionIcon && onAction && (\n          <div className=\"attachment-button__action-button\">\n            <Button\n              icon={actionIcon}\n              onClick={noProp(onAction)}\n              prominence=\"tertiary\"\n              size=\"sm\"\n            />\n          </div>\n        )}\n      </div>\n    </button>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/BackButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport BackButton from \"./BackButton\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof BackButton> = {\n  title: \"refresh-components/buttons/BackButton\",\n  component: BackButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof BackButton>;\n\nexport const Default: Story = {};\n\nexport const WithBehaviorOverride: Story = {\n  args: {\n    behaviorOverride: () => {\n      console.log(\"Custom back behavior\");\n    },\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/BackButton.tsx",
    "content": "\"use client\";\n\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { Button } from \"@opal/components\";\nimport { SvgArrowLeft } from \"@opal/icons\";\n\nexport interface BackButtonProps {\n  behaviorOverride?: () => void;\n  routerOverride?: string;\n}\n\nexport default function BackButton({\n  behaviorOverride,\n  routerOverride,\n}: BackButtonProps) {\n  const router = useRouter();\n\n  return (\n    <Button\n      icon={SvgArrowLeft}\n      prominence=\"tertiary\"\n      onClick={() => {\n        if (behaviorOverride) {\n          behaviorOverride();\n        } else if (routerOverride) {\n          router.push(routerOverride as Route);\n        } else {\n          router.back();\n        }\n      }}\n    >\n      Back\n    </Button>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/Button.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Button from \"./Button\";\nimport { SvgPlus, SvgArrowRight } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Button> = {\n  title: \"refresh-components/buttons/Button\",\n  component: Button,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Button>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Button\",\n  },\n};\n\nexport const Variants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <Button main>Main</Button>\n      <Button action>Action</Button>\n      <Button danger>Danger</Button>\n    </div>\n  ),\n};\n\nexport const Prominences: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <Button primary>Primary</Button>\n      <Button secondary>Secondary</Button>\n      <Button tertiary>Tertiary</Button>\n    </div>\n  ),\n};\n\nexport const WithIcons: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <Button leftIcon={SvgPlus}>With Left Icon</Button>\n      <Button rightIcon={SvgArrowRight}>With Right Icon</Button>\n    </div>\n  ),\n};\n\nexport const Small: Story = {\n  args: {\n    size: \"md\",\n    children: \"Small Button\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    disabled: true,\n    children: \"Disabled\",\n  },\n};\n\nexport const AsLink: Story = {\n  args: {\n    href: \"https://example.com\",\n    children: \"Link Button\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/Button.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport type { IconProps } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nexport interface ButtonProps\n  extends React.ButtonHTMLAttributes<HTMLButtonElement> {\n  // Button variants:\n  main?: boolean;\n  action?: boolean;\n  danger?: boolean;\n\n  // Button subvariants:\n  primary?: boolean;\n  secondary?: boolean;\n  tertiary?: boolean;\n  internal?: boolean;\n\n  // Button states:\n  transient?: boolean;\n\n  // Button sizes:\n  size?: \"lg\" | \"md\";\n\n  // Icons:\n  leftIcon?: React.FunctionComponent<IconProps>;\n  rightIcon?: React.FunctionComponent<IconProps>;\n\n  href?: string;\n  target?: string;\n}\n\nconst BUTTON_SIZE_CLASS_MAP = {\n  lg: {\n    button: \"p-2 rounded-12 gap-1.5\",\n    content: {\n      left: \"pr-1\",\n      right: \"pl-1\",\n      none: \"\",\n    },\n  },\n  md: {\n    button: \"p-1 rounded-08 gap-0\",\n    content: {\n      left: \"pr-1 py-0.5\",\n      right: \"pl-1 py-0.5\",\n      none: \"py-0.5\",\n    },\n  },\n} as const;\n\nconst Button = React.forwardRef<HTMLButtonElement, ButtonProps>(\n  (\n    {\n      main,\n      action,\n      danger,\n\n      primary,\n      secondary,\n      tertiary,\n      internal,\n\n      disabled,\n      transient,\n      size = \"lg\",\n\n      leftIcon: LeftIcon,\n      rightIcon: RightIcon,\n\n      href,\n      target,\n      children,\n      className,\n      ...props\n    },\n    ref\n  ) => {\n    if (LeftIcon && RightIcon)\n      throw new Error(\n        \"The left and right icons cannot be both specified at the same time\"\n      );\n\n    const variant = main\n      ? \"main\"\n      : action\n        ? \"action\"\n        : danger\n          ? \"danger\"\n          : \"main\";\n    const subvariant = primary\n      ? \"primary\"\n      : secondary\n        ? \"secondary\"\n        : tertiary\n          ? \"tertiary\"\n          : internal\n            ? \"internal\"\n            : \"primary\";\n\n    const buttonClass = `button-${variant}-${subvariant}`;\n    const textClass = `button-${variant}-${subvariant}-text`;\n    const iconClass = `button-${variant}-${subvariant}-icon`;\n    const iconPlacement = LeftIcon ? \"left\" : RightIcon ? \"right\" : \"none\";\n    const sizeClasses = BUTTON_SIZE_CLASS_MAP[size];\n    const textSizeProps =\n      size === \"md\"\n        ? { secondaryAction: true as const }\n        : { mainUiBody: true as const };\n\n    const content = (\n      <button\n        ref={ref}\n        className={cn(\n          \"h-fit w-fit flex flex-row items-center justify-center\",\n          sizeClasses.button,\n          buttonClass,\n          className\n        )}\n        disabled={disabled}\n        data-state={transient ? \"transient\" : undefined}\n        type=\"button\"\n        {...props}\n      >\n        {LeftIcon && (\n          <div className=\"w-[1rem] h-[1rem] flex flex-col items-center justify-center\">\n            <LeftIcon className={cn(\"w-[1rem] h-[1rem]\", iconClass)} />\n          </div>\n        )}\n        {/* Buttons may conditionally pass text as children (e.g. responsive\n            breakpoints), so skip content padding when children is empty. */}\n        {children !== \"\" && (\n          <div\n            className={cn(\"leading-none\", sizeClasses.content[iconPlacement])}\n          >\n            {typeof children === \"string\" ? (\n              <Text\n                {...textSizeProps}\n                className={cn(\"whitespace-nowrap\", textClass)}\n              >\n                {children}\n              </Text>\n            ) : (\n              children\n            )}\n          </div>\n        )}\n        {RightIcon && (\n          <div className=\"w-[1rem] h-[1rem]\">\n            <RightIcon className={cn(\"w-[1rem] h-[1rem]\", iconClass)} />\n          </div>\n        )}\n      </button>\n    );\n\n    if (!href) return content;\n    return (\n      <Link\n        href={href as Route}\n        target={target}\n        rel={target === \"_blank\" ? \"noopener noreferrer\" : undefined}\n      >\n        {content}\n      </Link>\n    );\n  }\n);\nButton.displayName = \"Button\";\n\nexport default Button;\n"
  },
  {
    "path": "web/src/refresh-components/buttons/ButtonRenaming.stories.tsx",
    "content": "import React from \"react\";\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport ButtonRenaming from \"./ButtonRenaming\";\n\nconst noop = () => {};\n\nconst meta: Meta<typeof ButtonRenaming> = {\n  title: \"refresh-components/buttons/ButtonRenaming\",\n  component: ButtonRenaming,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <div\n        style={{\n          width: 260,\n          padding: 8,\n          background: \"var(--background-neutral-01)\",\n          borderRadius: 8,\n        }}\n      >\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ButtonRenaming>;\n\nexport const Default: Story = {\n  args: {\n    initialName: \"My Chat Session\",\n    onRename: async () => {},\n    onClose: noop,\n  },\n};\n\nexport const EmptyName: Story = {\n  args: {\n    initialName: null,\n    onRename: async () => {},\n    onClose: noop,\n  },\n};\n\nexport const LongName: Story = {\n  args: {\n    initialName: \"This is a very long chat session name that should overflow\",\n    onRename: async () => {},\n    onClose: noop,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/ButtonRenaming.tsx",
    "content": "\"use client\";\n\nimport React, { useState } from \"react\";\nimport { handleEnterPress, useEscapePress } from \"@/lib/typingUtils\";\nimport { UNNAMED_CHAT } from \"@/lib/constants\";\nimport { cn } from \"@/lib/utils\";\n\ninterface ButtonRenamingProps {\n  initialName: string | null;\n  onRename: (newName: string) => Promise<void>;\n  onClose: () => void;\n  className?: string;\n}\n\nexport default function ButtonRenaming({\n  initialName,\n  onRename,\n  onClose,\n  className,\n}: ButtonRenamingProps) {\n  const [renamingValue, setRenamingValue] = useState(\n    initialName || UNNAMED_CHAT\n  );\n\n  useEscapePress(onClose, true);\n\n  async function submitRename() {\n    const newName = renamingValue.trim();\n    if (newName === \"\" || newName === initialName) {\n      onClose();\n      return;\n    }\n\n    // Close immediately for instant feedback\n    onClose();\n\n    // Proceed with the rename operation after closing\n    try {\n      await onRename(newName);\n    } catch (error) {\n      console.error(\"Failed to rename:\", error);\n    }\n  }\n\n  return (\n    <input\n      onBlur={onClose}\n      value={renamingValue}\n      className={cn(\n        \"bg-transparent outline-none w-full resize-none overflow-x-hidden overflow-y-hidden whitespace-nowrap no-scrollbar font-main-content-body\",\n        className\n      )}\n      onChange={(event) => setRenamingValue(event.target.value)}\n      onKeyDown={handleEnterPress(() => submitRename())}\n      autoFocus\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/CopyIconButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport CopyIconButton from \"./CopyIconButton\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof CopyIconButton> = {\n  title: \"refresh-components/buttons/CopyIconButton\",\n  component: CopyIconButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof CopyIconButton>;\n\nexport const Default: Story = {\n  args: {\n    getCopyText: () => \"Copied text!\",\n  },\n};\n\nexport const WithTooltip: Story = {\n  args: {\n    getCopyText: () => \"Copied text!\",\n    tooltip: \"Copy to clipboard\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/CopyIconButton.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useRef, useState } from \"react\";\nimport { Button, ButtonProps } from \"@opal/components\";\nimport { SvgAlertTriangle, SvgCheck, SvgCopy } from \"@opal/icons\";\n\ntype CopyState = \"idle\" | \"copied\" | \"error\";\n\n/** Omit that distributes over unions, preserving discriminated-union branches. */\ntype DistributiveOmit<T, K extends PropertyKey> = T extends unknown\n  ? Omit<T, K>\n  : never;\n\nexport type CopyIconButtonProps = DistributiveOmit<\n  ButtonProps,\n  \"variant\" | \"icon\" | \"onClick\"\n> & {\n  // Function that returns the text to copy to clipboard\n  getCopyText: () => string;\n  // Optional function to get HTML content for rich copy\n  getHtmlContent?: () => string;\n};\n\nexport default function CopyIconButton({\n  getCopyText,\n  getHtmlContent,\n  tooltip,\n  prominence = \"tertiary\",\n  ...iconButtonProps\n}: CopyIconButtonProps) {\n  const [copyState, setCopyState] = useState<CopyState>(\"idle\");\n  const copyTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n\n  async function handleCopy() {\n    const text = getCopyText();\n\n    // Clear existing timeout if any\n    if (copyTimeoutRef.current) {\n      clearTimeout(copyTimeoutRef.current);\n    }\n\n    try {\n      // Check if Clipboard API is available\n      if (!navigator.clipboard) {\n        throw new Error(\"Clipboard API not available\");\n      }\n\n      // If HTML content getter is provided, copy both HTML and plain text\n      if (getHtmlContent) {\n        const htmlContent = getHtmlContent();\n        const clipboardItem = new ClipboardItem({\n          \"text/html\": new Blob([htmlContent], { type: \"text/html\" }),\n          \"text/plain\": new Blob([text], { type: \"text/plain\" }),\n        });\n        await navigator.clipboard.write([clipboardItem]);\n      }\n      // Default: plain text only\n      else {\n        await navigator.clipboard.writeText(text);\n      }\n\n      // Show \"copied\" state\n      setCopyState(\"copied\");\n    } catch (err) {\n      console.error(\"Failed to copy:\", err);\n\n      // Show \"error\" state\n      setCopyState(\"error\");\n    }\n\n    // Reset to normal state after 3 seconds\n    copyTimeoutRef.current = setTimeout(() => {\n      setCopyState(\"idle\");\n    }, 3000);\n  }\n\n  // Clean up timeout on unmount\n  useEffect(() => {\n    return () => {\n      if (copyTimeoutRef.current) {\n        clearTimeout(copyTimeoutRef.current);\n      }\n    };\n  }, []);\n\n  function getIcon() {\n    switch (copyState) {\n      case \"copied\":\n        return SvgCheck;\n      case \"error\":\n        return SvgAlertTriangle;\n      case \"idle\":\n      default:\n        return SvgCopy;\n    }\n  }\n\n  function getTooltip() {\n    switch (copyState) {\n      case \"copied\":\n        return \"Copied!\";\n      case \"error\":\n        return \"Failed to copy\";\n      case \"idle\":\n      default:\n        return tooltip || \"Copy\";\n    }\n  }\n\n  // Assertion is safe: CopyIconButton always supplies icon + onClick,\n  // satisfying Button's content union. Spread may override prominence.\n  const buttonProps = {\n    prominence,\n    ...iconButtonProps,\n    icon: getIcon(),\n    onClick: handleCopy,\n    tooltip: getTooltip(),\n  } as ButtonProps;\n\n  return <Button {...buttonProps} />;\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/CreateButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport CreateButton from \"./CreateButton\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof CreateButton> = {\n  title: \"refresh-components/buttons/CreateButton\",\n  component: CreateButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof CreateButton>;\n\nexport const Default: Story = {};\n\nexport const CustomLabel: Story = {\n  args: {\n    children: \"New Document\",\n  },\n};\n\nexport const RightIcon: Story = {\n  args: {\n    rightIcon: true,\n    children: \"Add Item\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    disabled: true,\n  },\n};\n\nexport const AllVariants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <CreateButton />\n      <CreateButton>New Document</CreateButton>\n      <CreateButton rightIcon>Add Item</CreateButton>\n      <CreateButton disabled />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/CreateButton.tsx",
    "content": "\"use client\";\n\nimport Button, { ButtonProps } from \"@/refresh-components/buttons/Button\";\nimport { WithoutStyles } from \"@/types\";\nimport { SvgPlusCircle } from \"@opal/icons\";\n\nexport interface CreateButtonProps\n  extends Omit<WithoutStyles<ButtonProps>, \"leftIcon\" | \"rightIcon\"> {\n  rightIcon?: boolean;\n}\n\nexport default function CreateButton({\n  rightIcon,\n  children,\n  ...props\n}: CreateButtonProps) {\n  return (\n    <Button\n      secondary\n      leftIcon={rightIcon ? undefined : SvgPlusCircle}\n      rightIcon={rightIcon ? SvgPlusCircle : undefined}\n      {...props}\n    >\n      {children ?? \"Create\"}\n    </Button>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/IconButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport IconButton from \"./IconButton\";\nimport { SvgSettings, SvgPlus, SvgX } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof IconButton> = {\n  title: \"refresh-components/buttons/IconButton\",\n  component: IconButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof IconButton>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgSettings,\n  },\n};\n\nexport const Variants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <IconButton main icon={SvgSettings} />\n      <IconButton action icon={SvgPlus} />\n      <IconButton danger icon={SvgX} />\n    </div>\n  ),\n};\n\nexport const Small: Story = {\n  args: {\n    icon: SvgSettings,\n    small: true,\n  },\n};\n\nexport const WithTooltip: Story = {\n  args: {\n    icon: SvgSettings,\n    tooltip: \"Settings\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    icon: SvgSettings,\n    disabled: true,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/IconButton.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo } from \"react\";\nimport type { IconProps } from \"@opal/types\";\nimport { cn } from \"@/lib/utils\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\n\nconst buttonClasses = (transient: boolean | undefined) =>\n  ({\n    main: {\n      primary: {\n        enabled: [\n          \"bg-theme-primary-05\",\n          \"hover:bg-theme-primary-04\",\n          transient && \"bg-theme-primary-04\",\n          \"active:bg-theme-primary-06\",\n        ],\n        disabled: [\"bg-background-neutral-04\"],\n      },\n      secondary: {\n        enabled: [\n          \"bg-background-tint-02\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-background-neutral-03\"],\n      },\n      tertiary: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n      internal: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n      small: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n    },\n    action: {\n      primary: {\n        enabled: [\n          \"bg-action-link-05\",\n          \"hover:bg-action-link-04\",\n          transient && \"bg-action-link-04\",\n          \"active:bg-action-link-06\",\n        ],\n        disabled: [\"bg-action-link-02\"],\n      },\n      secondary: {\n        enabled: [\n          \"bg-background-tint-02\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-background-neutral-02\"],\n      },\n      tertiary: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-background-neutral-02\"],\n      },\n      internal: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n      small: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n    },\n    danger: {\n      primary: {\n        enabled: [\n          \"bg-action-danger-05\",\n          \"hover:bg-action-danger-04\",\n          transient && \"bg-action-danger-04\",\n          \"active:bg-action-danger-06\",\n        ],\n        disabled: [\"bg-action-danger-02\"],\n      },\n      secondary: {\n        enabled: [\n          \"bg-background-tint-02\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-background-neutral-02\"],\n      },\n      tertiary: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-02\",\n          transient && \"bg-background-tint-02\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-background-neutral-02\"],\n      },\n      internal: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n      small: {\n        enabled: [\n          \"bg-transparent\",\n          \"hover:bg-background-tint-00\",\n          transient && \"bg-background-tint-00\",\n          \"active:bg-background-tint-00\",\n        ],\n        disabled: [\"bg-transparent\"],\n      },\n    },\n  }) as const;\n\nconst iconClasses = (transient: boolean | undefined) =>\n  ({\n    main: {\n      primary: {\n        enabled: [\"stroke-text-inverted-05\"],\n        disabled: [\"stroke-text-inverted-05\"],\n      },\n      secondary: {\n        enabled: [\n          \"stroke-text-03\",\n          \"group-hover/IconButton:stroke-text-04\",\n          transient && \"stroke-text-04\",\n          \"group-active/IconButton:stroke-text-05\",\n        ],\n        disabled: [\"stroke-text-01\"],\n      },\n      tertiary: {\n        enabled: [\n          \"stroke-text-03\",\n          \"group-hover/IconButton:stroke-text-04\",\n          transient && \"stroke-text-04\",\n          \"group-active/IconButton:stroke-text-05\",\n        ],\n        disabled: [\"stroke-text-01\"],\n      },\n      internal: {\n        enabled: [\n          \"stroke-text-02\",\n          \"group-hover/IconButton:stroke-text-04\",\n          transient && \"stroke-text-04\",\n          \"group-active/IconButton:stroke-text-05\",\n        ],\n        disabled: [\"stroke-text-01\"],\n      },\n      small: {\n        enabled: [\n          \"stroke-text-02\",\n          \"group-hover/IconButton:stroke-text-04\",\n          transient && \"stroke-text-04\",\n          \"group-active/IconButton:stroke-text-05\",\n        ],\n        disabled: [\"stroke-text-01\"],\n      },\n    },\n    action: {\n      primary: {\n        enabled: [\"stroke-text-light-05\"],\n        disabled: [\"stroke-text-01\"],\n      },\n      secondary: {\n        enabled: [\n          \"stroke-action-link-05\",\n          \"group-hover/IconButton:stroke-action-link-05\",\n          transient && \"stroke-action-link-05\",\n          \"group-active/IconButton:stroke-action-link-06\",\n        ],\n        disabled: [\"stroke-action-link-02\"],\n      },\n      tertiary: {\n        enabled: [\n          \"stroke-action-link-05\",\n          \"group-hover/IconButton:stroke-action-link-05\",\n          transient && \"stroke-action-link-05\",\n          \"group-active/IconButton:stroke-action-link-06\",\n        ],\n        disabled: [\"stroke-action-link-02\"],\n      },\n      internal: {\n        enabled: [\n          \"stroke-action-link-05\",\n          \"group-hover/IconButton:stroke-action-link-05\",\n          transient && \"stroke-action-link-05\",\n          \"group-active/IconButton:stroke-action-link-06\",\n        ],\n        disabled: [\"stroke-action-link-02\"],\n      },\n      small: {\n        enabled: [\n          \"stroke-action-link-05\",\n          \"group-hover/IconButton:stroke-action-link-05\",\n          transient && \"stroke-action-link-05\",\n          \"group-active/IconButton:stroke-action-link-06\",\n        ],\n        disabled: [\"stroke-action-link-02\"],\n      },\n    },\n    danger: {\n      primary: {\n        enabled: [\"stroke-text-light-05\"],\n        disabled: [\"stroke-text-01\"],\n      },\n      secondary: {\n        enabled: [\n          \"stroke-action-danger-05\",\n          \"group-hover/IconButton:stroke-action-danger-05\",\n          transient && \"stroke-action-danger-05\",\n          \"group-active/IconButton:stroke-action-danger-06\",\n        ],\n        disabled: [\"stroke-action-danger-02\"],\n      },\n      tertiary: {\n        enabled: [\n          \"stroke-action-danger-05\",\n          \"group-hover/IconButton:stroke-action-danger-05\",\n          transient && \"stroke-action-danger-05\",\n          \"group-active/IconButton:stroke-action-danger-06\",\n        ],\n        disabled: [\"stroke-action-danger-02\"],\n      },\n      internal: {\n        enabled: [\n          \"stroke-action-danger-05\",\n          \"group-hover/IconButton:stroke-action-danger-05\",\n          transient && \"stroke-action-danger-05\",\n          \"group-active/IconButton:stroke-action-danger-06\",\n        ],\n        disabled: [\"stroke-action-danger-02\"],\n      },\n      small: {\n        enabled: [\n          \"stroke-action-danger-05\",\n          \"group-hover/IconButton:stroke-action-danger-05\",\n          transient && \"stroke-action-danger-05\",\n          \"group-active/IconButton:stroke-action-danger-06\",\n        ],\n        disabled: [\"stroke-action-danger-02\"],\n      },\n    },\n  }) as const;\n\nexport interface IconButtonProps\n  extends React.ButtonHTMLAttributes<HTMLButtonElement> {\n  // Top level button variants\n  main?: boolean;\n  action?: boolean;\n  danger?: boolean;\n\n  // Button sub-variants\n  primary?: boolean;\n  secondary?: boolean;\n  tertiary?: boolean;\n  internal?: boolean;\n\n  // Button size\n  small?: boolean;\n\n  // Button states\n  transient?: boolean;\n  disabled?: boolean;\n\n  // Button properties\n  onHover?: (isHovering: boolean) => void;\n  onClick?: React.MouseEventHandler<HTMLButtonElement>;\n  icon: React.FunctionComponent<IconProps>;\n  tooltip?: string;\n  toolTipPosition?: \"top\" | \"bottom\" | \"left\" | \"right\";\n  tooltipSize?: \"sm\" | \"md\" | \"lg\";\n  /** Additional className to apply to the icon element */\n  iconClassName?: string;\n}\n\nexport default function IconButton({\n  main,\n  action,\n  danger,\n\n  primary,\n  secondary,\n  tertiary,\n  internal,\n  small,\n\n  transient,\n  disabled,\n\n  onHover,\n  onClick,\n  icon: Icon,\n  className,\n  iconClassName,\n  tooltip,\n  toolTipPosition = \"top\",\n  tooltipSize = \"lg\",\n  ...props\n}: IconButtonProps) {\n  const variant = main\n    ? \"main\"\n    : action\n      ? \"action\"\n      : danger\n        ? \"danger\"\n        : \"main\";\n  const subvariant = primary\n    ? \"primary\"\n    : secondary\n      ? \"secondary\"\n      : tertiary\n        ? \"tertiary\"\n        : internal\n          ? \"internal\"\n          : small\n            ? \"small\"\n            : \"primary\";\n  const abled = disabled ? \"disabled\" : \"enabled\";\n\n  const buttonClass = useMemo(\n    () => buttonClasses(transient)[variant][subvariant][abled],\n    [transient, variant, subvariant, abled]\n  );\n  const iconClass = useMemo(\n    () => iconClasses(transient)[variant][subvariant][abled],\n    [transient, variant, subvariant, abled]\n  );\n\n  const buttonElement = (\n    <button\n      type=\"button\"\n      className={cn(\n        \"flex items-center justify-center h-fit w-fit group/IconButton\",\n        small || internal ? \"p-1\" : \"p-2\",\n        disabled && \"cursor-not-allowed\",\n        small || internal ? \"rounded-08\" : \"rounded-12\",\n        buttonClass,\n        className\n      )}\n      onClick={disabled ? undefined : onClick}\n      onMouseEnter={(e) => {\n        props.onMouseEnter?.(e);\n        if (!disabled) onHover?.(true);\n      }}\n      onMouseLeave={(e) => {\n        props.onMouseLeave?.(e);\n        if (!disabled) onHover?.(false);\n      }}\n      disabled={disabled}\n      {...props}\n    >\n      <Icon\n        className={cn(\n          small ? \"h-[0.75rem] w-[0.75rem]\" : \"h-[1rem] w-[1rem]\",\n          iconClass,\n          iconClassName\n        )}\n      />\n    </button>\n  );\n\n  if (!tooltip) return buttonElement;\n\n  return (\n    <SimpleTooltip side={toolTipPosition} size={tooltipSize} tooltip={tooltip}>\n      {buttonElement}\n    </SimpleTooltip>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/LineItem.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport LineItem from \"./LineItem\";\nimport {\n  SvgUser,\n  SvgSettings,\n  SvgTrash,\n  SvgFolder,\n  SvgCheck,\n  SvgSearch,\n} from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst meta: Meta<typeof LineItem> = {\n  title: \"refresh-components/buttons/LineItem\",\n  component: LineItem,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 300 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof LineItem>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgUser,\n    children: \"Profile Settings\",\n  },\n};\n\nexport const WithDescription: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Settings\",\n    description: \"Manage your account settings\",\n  },\n};\n\nexport const Selected: Story = {\n  args: {\n    icon: SvgCheck,\n    children: \"Active Item\",\n    selected: true,\n  },\n};\n\nexport const SelectedEmphasized: Story = {\n  args: {\n    icon: SvgFolder,\n    children: \"Selected Folder\",\n    selected: true,\n    emphasized: true,\n  },\n};\n\nexport const Danger: Story = {\n  args: {\n    icon: SvgTrash,\n    children: \"Delete Account\",\n    danger: true,\n  },\n};\n\nexport const Action: Story = {\n  args: {\n    icon: SvgSearch,\n    children: \"Search Results\",\n    action: true,\n  },\n};\n\nexport const Muted: Story = {\n  args: {\n    icon: SvgFolder,\n    children: \"Secondary Item\",\n    muted: true,\n  },\n};\n\nexport const Strikethrough: Story = {\n  args: {\n    icon: SvgFolder,\n    children: \"Archived Feature\",\n    strikethrough: true,\n  },\n};\n\nexport const WithRightChildren: Story = {\n  args: {\n    icon: SvgSettings,\n    children: \"Keyboard Shortcuts\",\n    rightChildren: (\n      <Text as=\"p\" secondaryBody text03>\n        Cmd+K\n      </Text>\n    ),\n  },\n};\n\nexport const MenuExample: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 2 }}>\n      <LineItem icon={SvgUser}>Profile</LineItem>\n      <LineItem icon={SvgSettings} description=\"Manage your preferences\">\n        Settings\n      </LineItem>\n      <LineItem icon={SvgFolder} selected emphasized>\n        Documents\n      </LineItem>\n      <LineItem icon={SvgSearch} action>\n        Search\n      </LineItem>\n      <LineItem icon={SvgFolder} muted>\n        Archived\n      </LineItem>\n      <LineItem icon={SvgTrash} danger>\n        Delete\n      </LineItem>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/LineItem.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { WithoutStyles } from \"@/types\";\n\nconst buttonClassNames = {\n  main: {\n    normal: \"line-item-button-main\",\n    emphasized: \"line-item-button-main-emphasized\",\n  },\n  strikethrough: {\n    normal: \"line-item-button-strikethrough\",\n    emphasized: \"line-item-button-strikethrough-emphasized\",\n  },\n  disabled: {\n    normal: \"line-item-button-disabled\",\n    emphasized: \"line-item-button-disabled-emphasized\",\n  },\n  danger: {\n    normal: \"line-item-button-danger\",\n    emphasized: \"line-item-button-danger-emphasized\",\n  },\n  action: {\n    normal: \"line-item-button-action\",\n    emphasized: \"line-item-button-action-emphasized\",\n  },\n  muted: {\n    normal: \"line-item-button-muted\",\n    emphasized: \"line-item-button-muted-emphasized\",\n  },\n  skeleton: {\n    normal: \"line-item-button-skeleton\",\n    emphasized: \"line-item-button-skeleton-emphasized\",\n  },\n} as const;\n\nconst textClassNames = {\n  main: \"line-item-text-main\",\n  strikethrough: \"line-item-text-strikethrough\",\n  disabled: \"line-item-text-disabled\",\n  danger: \"line-item-text-danger\",\n  action: \"line-item-text-action\",\n  muted: \"line-item-text-muted\",\n  skeleton: \"line-item-text-skeleton\",\n} as const;\n\nconst iconClassNames = {\n  main: \"line-item-icon-main\",\n  strikethrough: \"line-item-icon-strikethrough\",\n  disabled: \"line-item-icon-disabled\",\n  danger: \"line-item-icon-danger\",\n  action: \"line-item-icon-action\",\n  muted: \"line-item-icon-muted\",\n  skeleton: \"line-item-icon-skeleton\",\n} as const;\n\nexport interface LineItemProps\n  extends Omit<\n    WithoutStyles<React.HTMLAttributes<HTMLDivElement>>,\n    \"children\"\n  > {\n  /**\n   * Whether the row should behave like a standalone interactive button.\n   * Set to false when nested inside another interactive primitive\n   * (e.g. Radix Select.Item) to avoid nested focus targets.\n   */\n  interactive?: boolean;\n  // line-item variants\n  strikethrough?: boolean;\n  disabled?: boolean;\n  danger?: boolean;\n  action?: boolean;\n  muted?: boolean;\n  skeleton?: boolean;\n\n  // modifier (makes the background more pronounced when selected).\n  emphasized?: boolean;\n\n  selected?: boolean;\n  icon?: React.FunctionComponent<IconProps>;\n  description?: string;\n  rightChildren?: React.ReactNode;\n  href?: string;\n  rel?: string;\n  target?: string;\n  ref?: React.Ref<HTMLDivElement>;\n  children?: React.ReactNode;\n}\n\n/**\n * LineItem Component\n *\n * A versatile menu item button component designed for use in dropdowns, sidebars, and menus.\n * Supports icons, descriptions, and multiple visual states.\n *\n * @example\n * ```tsx\n * // Basic usage\n * <LineItem icon={SvgUser}>Profile Settings</LineItem>\n *\n * // With selection state\n * <LineItem icon={SvgCheck} selected>Active Item</LineItem>\n *\n * // With emphasis (highlighted background)\n * <LineItem icon={SvgFolder} selected emphasized>\n *   Selected Folder\n * </LineItem>\n *\n * // Danger variant\n * <LineItem icon={SvgTrash} danger>Delete Account</LineItem>\n *\n * // With description\n * <LineItem icon={SvgSettings} description=\"Manage your account settings\">\n *   Settings\n * </LineItem>\n *\n * // With right content\n * <LineItem icon={SvgKey} rightChildren={<Text as=\"p\" text03>⌘K</Text>}>\n *   Keyboard Shortcuts\n * </LineItem>\n *\n * // As a link\n * <LineItem icon={SvgHome} href=\"/dashboard\">Dashboard</LineItem>\n *\n * // Strikethrough (disabled/deprecated items)\n * <LineItem icon={SvgArchive} strikethrough>\n *   Archived Feature\n * </LineItem>\n *\n * // Muted variant (less prominent items)\n * <LineItem icon={SvgFolder} muted>\n *   Secondary Item\n * </LineItem>\n * ```\n *\n * @remarks\n * - Variants are mutually exclusive: only one of `strikethrough`, `danger`, `action`, `muted`, or `skeleton` should be used\n * - The `selected` prop modifies text/icon colors for `main` and `danger` variants\n * - The `emphasized` prop adds background colors when combined with `selected`\n * - The component automatically adds a `data-selected=\"true\"` attribute for custom styling\n */\nexport default function LineItem({\n  interactive = true,\n  selected,\n  strikethrough,\n  disabled,\n  danger,\n  action,\n  muted,\n  skeleton,\n  emphasized,\n  icon: Icon,\n  description,\n  children,\n  rightChildren,\n  href,\n  rel,\n  target,\n  ref,\n  ...props\n}: LineItemProps) {\n  // Determine variant (mutually exclusive, with priority order: strikethrough > disabled > danger > action > muted > main)\n  const variant = strikethrough\n    ? \"strikethrough\"\n    : disabled\n      ? \"disabled\"\n      : danger\n        ? \"danger\"\n        : action\n          ? \"action\"\n          : muted\n            ? \"muted\"\n            : skeleton\n              ? \"skeleton\"\n              : \"main\";\n\n  const emphasisKey = emphasized ? \"emphasized\" : \"normal\";\n\n  const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {\n    if (disabled) {\n      e.preventDefault();\n      e.stopPropagation();\n      return;\n    }\n    props.onClick?.(e);\n  };\n\n  const handleKeyDown = (e: React.KeyboardEvent<HTMLDivElement>) => {\n    if (!interactive) {\n      props.onKeyDown?.(e);\n      return;\n    }\n\n    if (e.key === \"Enter\") {\n      e.preventDefault();\n      if (!disabled) {\n        (e.currentTarget as HTMLDivElement).click();\n      }\n    } else if (e.key === \" \") {\n      e.preventDefault();\n    }\n    props.onKeyDown?.(e);\n  };\n\n  const handleKeyUp = (e: React.KeyboardEvent<HTMLDivElement>) => {\n    if (!interactive) {\n      props.onKeyUp?.(e);\n      return;\n    }\n\n    if (e.key === \" \") {\n      e.preventDefault();\n      if (!disabled) {\n        (e.currentTarget as HTMLDivElement).click();\n      }\n    }\n    props.onKeyUp?.(e);\n  };\n\n  const content = (\n    <div\n      ref={ref}\n      role={interactive ? \"button\" : undefined}\n      tabIndex={interactive ? 0 : undefined}\n      aria-disabled={disabled || undefined}\n      className={cn(\n        \"flex flex-row w-full items-start p-2 rounded-08 group/LineItem gap-2\",\n        !!(children && description) ? \"items-start\" : \"items-center\",\n        buttonClassNames[variant][emphasisKey]\n      )}\n      data-selected={selected}\n      {...props}\n      onClick={handleClick}\n      onKeyDown={handleKeyDown}\n      onKeyUp={handleKeyUp}\n    >\n      {Icon && (\n        <div\n          className={cn(\n            \"flex flex-col justify-center items-center h-[1rem] min-w-[1rem]\",\n            !!(children && description) && \"mt-0.5\"\n          )}\n        >\n          <Icon className={cn(\"h-[1rem] w-[1rem]\", iconClassNames[variant])} />\n        </div>\n      )}\n      <Section alignItems=\"start\" gap={0}>\n        {children ? (\n          <>\n            <Section flexDirection=\"row\" gap={0.5}>\n              <Truncated\n                mainUiMuted\n                className={cn(\"text-left w-full\", textClassNames[variant])}\n              >\n                {children}\n              </Truncated>\n              {rightChildren && (\n                <Section alignItems=\"end\" width=\"fit\">\n                  {rightChildren}\n                </Section>\n              )}\n            </Section>\n            {description && (\n              <Truncated secondaryBody text03 className=\"text-left w-full\">\n                {description}\n              </Truncated>\n            )}\n          </>\n        ) : description ? (\n          <Section flexDirection=\"row\" gap={0.5}>\n            <Truncated secondaryBody text03 className=\"text-left w-full\">\n              {description}\n            </Truncated>\n            {rightChildren && (\n              <Section alignItems=\"end\" width=\"fit\">\n                {rightChildren}\n              </Section>\n            )}\n          </Section>\n        ) : null}\n      </Section>\n    </div>\n  );\n\n  if (!href) return content;\n  return (\n    <Link href={href as Route} rel={rel} target={target}>\n      {content}\n    </Link>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/SelectButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SelectButton from \"./SelectButton\";\nimport { SvgFilter, SvgSettings } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof SelectButton> = {\n  title: \"refresh-components/buttons/SelectButton\",\n  component: SelectButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SelectButton>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Select Option\",\n  },\n};\n\nexport const MainVariant: Story = {\n  args: {\n    main: true,\n    children: \"Main Select\",\n    leftIcon: SvgFilter,\n  },\n};\n\nexport const ActionVariant: Story = {\n  args: {\n    action: true,\n    children: \"Action Select\",\n    leftIcon: SvgSettings,\n  },\n};\n\nexport const Engaged: Story = {\n  args: {\n    action: true,\n    engaged: true,\n    children: \"Engaged\",\n    leftIcon: SvgSettings,\n  },\n};\n\nexport const WithChevron: Story = {\n  args: {\n    main: true,\n    children: \"Dropdown\",\n    leftIcon: SvgFilter,\n    rightChevronIcon: true,\n  },\n};\n\nexport const Transient: Story = {\n  args: {\n    main: true,\n    transient: true,\n    children: \"Transient\",\n    leftIcon: SvgFilter,\n    rightChevronIcon: true,\n  },\n};\n\nexport const Folded: Story = {\n  args: {\n    main: true,\n    folded: true,\n    children: \"Folded Label\",\n    leftIcon: SvgFilter,\n  },\n};\n\nexport const FoldedAction: Story = {\n  args: {\n    action: true,\n    folded: true,\n    children: \"Set as Default\",\n    rightIcon: SvgSettings,\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    main: true,\n    disabled: true,\n    children: \"Disabled\",\n    leftIcon: SvgFilter,\n  },\n};\n\nexport const ActionDisabled: Story = {\n  args: {\n    action: true,\n    disabled: true,\n    children: \"Disabled Action\",\n    leftIcon: SvgSettings,\n  },\n};\n\nexport const AllStates: Story = {\n  render: () => (\n    <div\n      style={{\n        display: \"flex\",\n        gap: 16,\n        alignItems: \"center\",\n        flexWrap: \"wrap\",\n      }}\n    >\n      <SelectButton main leftIcon={SvgFilter}>\n        Main\n      </SelectButton>\n      <SelectButton action leftIcon={SvgSettings}>\n        Action\n      </SelectButton>\n      <SelectButton action engaged leftIcon={SvgSettings}>\n        Engaged\n      </SelectButton>\n      <SelectButton main transient leftIcon={SvgFilter} rightChevronIcon>\n        Transient\n      </SelectButton>\n      <SelectButton main disabled leftIcon={SvgFilter}>\n        Disabled\n      </SelectButton>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/SelectButton.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgChevronDownSmall } from \"@opal/icons\";\nimport { useContentSize } from \"@/hooks/useContentSize\";\n\nconst MARGIN = 5;\n\nconst baseClassNames = (engaged?: boolean, transient?: boolean) =>\n  ({\n    main: {\n      enabled: [\n        \"bg-transparent\",\n        \"hover:bg-background-tint-02\",\n        transient && \"bg-background-tint-02\",\n        \"active:bg-background-tint-00\",\n      ],\n      disabled: [\"bg-background-neutral-02\"],\n    },\n    action: {\n      enabled: [\n        engaged ? \"bg-action-link-01\" : \"bg-transparent\",\n        engaged ? \"hover:bg-action-link-01\" : \"hover:bg-background-tint-02\",\n        \"active:bg-background-tint-00\",\n      ],\n      disabled: [\"bg-background-neutral-02\"],\n    },\n  }) as const;\n\nconst iconClassNames = (engaged?: boolean, transient?: boolean) =>\n  ({\n    main: {\n      enabled: [\n        \"stroke-text-03\",\n        \"group-hover/SelectButton:stroke-text-04\",\n        transient && \"stroke-text-04\",\n        \"group-active/SelectButton:stroke-text-05\",\n      ],\n      disabled: [\"stroke-text-02\"],\n    },\n    action: {\n      enabled: [\n        engaged ? \"stroke-action-link-05\" : \"stroke-text-03\",\n        engaged\n          ? \"group-hover/SelectButton:stroke-action-link-05\"\n          : \"group-hover/SelectButton:stroke-text-04\",\n        engaged\n          ? \"group-active/SelectButton:stroke-action-link-06\"\n          : \"group-active/SelectButton:stroke-text-05\",\n      ],\n      disabled: [\"stroke-action-link-03\"],\n    },\n  }) as const;\n\nconst textClassNames = (engaged?: boolean, transient?: boolean) =>\n  ({\n    main: {\n      enabled: [\n        \"text-text-03\",\n        \"group-hover/SelectButton:text-text-04\",\n        transient && \"text-text-04\",\n        \"group-active/SelectButton:text-text-05\",\n      ],\n      disabled: [\"text-text-01\"],\n    },\n    action: {\n      enabled: [\n        engaged ? \"text-action-link-05\" : \"text-text-03\",\n        engaged\n          ? \"group-hover/SelectButton:text-action-link-05\"\n          : \"group-hover/SelectButton:text-text-04\",\n        engaged\n          ? \"group-active/SelectButton:text-action-link-06\"\n          : \"group-active/SelectButton:text-text-05\",\n      ],\n      disabled: [\"stroke-action-link-03\"],\n    },\n  }) as const;\n\nexport interface SelectButtonProps {\n  // Button variants\n  main?: boolean;\n  action?: boolean;\n\n  // Button states\n  transient?: boolean;\n  engaged?: boolean;\n  disabled?: boolean;\n  folded?: boolean;\n\n  // Content\n  children: string;\n  leftIcon?: React.FunctionComponent<IconProps>;\n  rightIcon?: React.FunctionComponent<IconProps>;\n  rightChevronIcon?: boolean;\n  onClick?: () => void;\n  className?: string;\n}\n\nexport default function SelectButton({\n  main,\n  action,\n\n  transient,\n  engaged,\n  disabled,\n  folded,\n\n  children,\n  leftIcon: LeftIcon,\n  rightIcon: RightIcon,\n  rightChevronIcon,\n  onClick,\n  className,\n}: SelectButtonProps) {\n  const hasRightIcon = !!RightIcon;\n  const hasLeftIcon = !!LeftIcon;\n  const variant = main ? \"main\" : action ? \"action\" : \"main\";\n  const state = disabled ? \"disabled\" : \"enabled\";\n\n  // Refs and state for measuring foldedContent width\n  const [hovered, setHovered] = useState<boolean>(false);\n\n  // Memoize class name invocations\n  const baseClasses = useMemo(\n    () => baseClassNames(engaged, transient)[variant][state],\n    [engaged, transient, variant, state]\n  );\n  const iconClasses = useMemo(\n    () => iconClassNames(engaged, transient)[variant][state],\n    [engaged, transient, variant, state]\n  );\n  const textClasses = useMemo(\n    () => textClassNames(engaged, transient)[variant][state],\n    [engaged, transient, variant, state]\n  );\n\n  const content = useMemo(\n    () => (\n      <div className=\"flex flex-row items-center justify-center\">\n        <Text as=\"p\" className={cn(\"whitespace-nowrap\", textClasses)}>\n          {children}\n        </Text>\n\n        {rightChevronIcon && (\n          <SvgChevronDownSmall\n            className={cn(\n              \"w-[1rem] h-[1rem] transition-all duration-300 ease-in-out\",\n              iconClasses,\n              transient && \"-rotate-180\"\n            )}\n          />\n        )}\n      </div>\n    ),\n    [textClasses, iconClasses, rightChevronIcon, children, transient]\n  );\n  const [measureRef, { width: foldedContentWidth }] = useContentSize([content]);\n\n  return (\n    <>\n      {/* Hidden element for measuring the natural width of the content */}\n      <div\n        ref={measureRef}\n        className=\"flex items-center w-auto h-fit absolute -left-[9999rem] opacity-0 pointer-events-none\"\n      >\n        {content}\n      </div>\n\n      <button\n        className={cn(\n          baseClasses,\n          \"group/SelectButton flex items-center px-2 py-2 rounded-12 h-fit w-fit\",\n          className\n        )}\n        onClick={disabled ? undefined : onClick}\n        disabled={disabled}\n        onMouseEnter={() => setHovered(true)}\n        onMouseOver={() => setHovered(true)}\n        onMouseLeave={() => setHovered(false)}\n      >\n        {/* Left icon */}\n        {hasLeftIcon && LeftIcon && (\n          <LeftIcon className={cn(\"w-[1rem] h-[1rem]\", iconClasses)} />\n        )}\n\n        {/* Animation component */}\n        <div\n          className={cn(\n            \"flex items-center transition-all duration-300 ease-in-out overflow-hidden\",\n            folded\n              ? engaged || transient || hovered\n                ? \"opacity-100\"\n                : \"opacity-0\"\n              : \"opacity-100\"\n          )}\n          style={{\n            width: folded\n              ? engaged || transient || hovered\n                ? `${foldedContentWidth}px`\n                : \"0px\"\n              : `${foldedContentWidth}px`,\n            margin: folded\n              ? engaged || transient || hovered\n                ? hasRightIcon\n                  ? `0px ${MARGIN}px 0px 0px`\n                  : `0px 0px 0px ${MARGIN}px`\n                : \"0px\"\n              : hasRightIcon\n                ? `0px ${MARGIN}px 0px 0px`\n                : `0px 0px 0px ${MARGIN}px`,\n          }}\n        >\n          {content}\n        </div>\n\n        {/* Right icon */}\n        {hasRightIcon && RightIcon && (\n          <RightIcon className={cn(\"w-[1rem] h-[1rem]\", iconClasses)} />\n        )}\n      </button>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/SquareButton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SquareButton from \"./SquareButton\";\nimport { SvgPlus, SvgSettings, SvgSearch, SvgX } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof SquareButton> = {\n  title: \"refresh-components/buttons/SquareButton\",\n  component: SquareButton,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SquareButton>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgPlus,\n  },\n};\n\nexport const Transient: Story = {\n  args: {\n    icon: SvgSettings,\n    transient: true,\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    icon: SvgPlus,\n    disabled: true,\n  },\n};\n\nexport const AllVariants: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 12, alignItems: \"center\" }}>\n      <SquareButton icon={SvgPlus} />\n      <SquareButton icon={SvgSettings} transient />\n      <SquareButton icon={SvgSearch} />\n      <SquareButton icon={SvgX} />\n      <SquareButton icon={SvgPlus} disabled />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/SquareButton.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\n\nexport interface SquareButtonProps\n  extends Omit<React.ComponentPropsWithoutRef<\"button\">, \"children\"> {\n  transient?: boolean;\n  icon: React.FunctionComponent<IconProps>;\n}\n\nconst SquareButton = React.forwardRef<HTMLButtonElement, SquareButtonProps>(\n  ({ transient = false, icon: Icon, className, ...props }, ref) => {\n    return (\n      <button\n        ref={ref}\n        type=\"button\"\n        data-state={transient ? \"transient\" : \"normal\"}\n        className={cn(\"square-button rounded-08\", className)}\n        {...props}\n      >\n        <Icon className=\"h-5 w-5\" />\n      </button>\n    );\n  }\n);\nSquareButton.displayName = \"SquareButton\";\n\nexport default SquareButton;\n"
  },
  {
    "path": "web/src/refresh-components/buttons/Tag.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Tag from \"./Tag\";\nimport { SvgFilter, SvgUser, SvgFolder } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Tag> = {\n  title: \"refresh-components/buttons/Tag\",\n  component: Tag,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Tag>;\n\nexport const Default: Story = {\n  args: {\n    label: \"Label\",\n  },\n};\n\nexport const DisplayVariant: Story = {\n  args: {\n    label: \"Display Tag\",\n    variant: \"display\",\n  },\n};\n\nexport const EditableVariant: Story = {\n  args: {\n    label: \"Editable Tag\",\n    variant: \"editable\",\n  },\n};\n\nexport const WithIcon: Story = {\n  args: {\n    label: \"With Icon\",\n    icon: SvgFilter,\n  },\n};\n\nexport const Removable: Story = {\n  args: {\n    label: \"Removable\",\n    variant: \"editable\",\n    onRemove: () => {},\n  },\n};\n\nexport const Clickable: Story = {\n  args: {\n    label: \"Click Me\",\n    onClick: () => {},\n  },\n};\n\nexport const WithIconAndRemove: Story = {\n  args: {\n    label: \"Filter: Active\",\n    variant: \"editable\",\n    icon: SvgFilter,\n    onRemove: () => {},\n  },\n};\n\nexport const TagGroup: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 8, flexWrap: \"wrap\" }}>\n      <Tag label=\"React\" variant=\"display\" />\n      <Tag label=\"TypeScript\" variant=\"display\" icon={SvgFolder} />\n      <Tag\n        label=\"Active Filter\"\n        variant=\"editable\"\n        icon={SvgFilter}\n        onRemove={() => {}}\n      />\n      <Tag\n        label=\"John Doe\"\n        variant=\"editable\"\n        icon={SvgUser}\n        onRemove={() => {}}\n      />\n      <Tag label=\"Clickable\" onClick={() => {}} />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/buttons/Tag.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgX } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\n\nconst variantStyles = {\n  display: {\n    container: \"flex items-center p-1\",\n    icon: \"size-4 stroke-text-03\",\n    text: { secondaryBody: true, text03: true },\n  },\n  editable: {\n    container: \"flex items-center gap-1 px-2 py-1\",\n    icon: \"size-3 stroke-text-03\",\n    text: { mainUiBody: true, text04: true },\n  },\n} as const;\n\nexport interface TagProps {\n  label: string;\n  variant?: \"display\" | \"editable\";\n  icon?: React.FunctionComponent<IconProps>;\n  onRemove?: () => void;\n  onClick?: () => void;\n  className?: string;\n  ref?: React.Ref<HTMLDivElement>;\n}\n\nexport default function Tag({\n  label,\n  variant = \"display\",\n  icon: Icon,\n  onRemove,\n  onClick,\n  className,\n  ref,\n}: TagProps) {\n  const styles = variantStyles[variant];\n\n  return (\n    <div\n      ref={ref}\n      className={cn(\n        styles.container,\n        \"rounded-08\",\n        \"bg-background-tint-02 hover:bg-background-tint-03\",\n        \"focus-visible:shadow-[0_0_0_2px_var(--background-tint-04)]\",\n        \"outline-none transition-colors\",\n        onClick || variant === \"display\" ? \"cursor-pointer\" : undefined,\n        className\n      )}\n      onClick={onClick}\n      role={onClick ? \"button\" : undefined}\n      tabIndex={onClick ? 0 : undefined}\n      onKeyDown={\n        onClick\n          ? (e) => {\n              if (e.key === \"Enter\" || e.key === \" \") {\n                e.preventDefault();\n                onClick();\n              }\n            }\n          : undefined\n      }\n    >\n      {Icon && <Icon className={styles.icon} />}\n      <Text {...styles.text}>{label}</Text>\n      {onRemove && (\n        <button\n          type=\"button\"\n          onClick={(e) => {\n            e.stopPropagation();\n            onRemove();\n          }}\n          className=\"p-0.5 stroke-text-02 hover:stroke-text-03\"\n          aria-label={`Remove ${label} filter`}\n        >\n          <SvgX className=\"size-3\" />\n        </button>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/buttons/source-tag/SourceTag.tsx",
    "content": "\"use client\";\n\nimport {\n  memo,\n  useState,\n  useMemo,\n  useCallback,\n  useRef,\n  useLayoutEffect,\n} from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipProvider,\n  TooltipTrigger,\n} from \"@/components/ui/tooltip\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { WebResultIcon } from \"@/components/WebResultIcon\";\nimport { ValidSources } from \"@/lib/types\";\nimport SourceTagDetailsCard, {\n  SourceInfo,\n} from \"@/refresh-components/buttons/source-tag/SourceTagDetailsCard\";\n\nexport type { SourceInfo };\n\n// Variant-specific styles\nconst sizeClasses = {\n  inlineCitation: {\n    container: \"rounded-04 p-0.5 gap-0.5\",\n  },\n  tag: {\n    container: \"rounded-08 p-1 gap-1\",\n  },\n  button: {\n    container: \"rounded-08 h-[2.25rem] min-w-[2.25rem] p-2 gap-1\",\n  },\n} as const;\n\n/**\n * Hook to detect if text content is truncated within its container.\n *\n * Compares scrollWidth vs clientWidth to determine if CSS truncation is active.\n * Re-checks on window resize and when the text content changes.\n *\n * @param text - The text content to monitor for truncation\n * @returns Object containing:\n *   - `isTruncated`: Whether the text is currently truncated\n *   - `textRef`: Ref to attach to the text container element\n *\n * @example\n * ```tsx\n * const { isTruncated, textRef } = useIsTruncated(displayName);\n * return (\n *   <span ref={textRef} className=\"truncate\">\n *     {displayName}\n *   </span>\n * );\n * ```\n */\nfunction useIsTruncated(text: string) {\n  const [isTruncated, setIsTruncated] = useState(false);\n  const textRef = useRef<HTMLSpanElement>(null);\n\n  useLayoutEffect(() => {\n    function checkTruncation() {\n      if (textRef.current) {\n        setIsTruncated(\n          textRef.current.scrollWidth > textRef.current.clientWidth\n        );\n      }\n    }\n\n    const timeoutId = setTimeout(checkTruncation, 0);\n    window.addEventListener(\"resize\", checkTruncation);\n\n    return () => {\n      clearTimeout(timeoutId);\n      window.removeEventListener(\"resize\", checkTruncation);\n    };\n  }, [text]);\n\n  return { isTruncated, textRef };\n}\n\n/**\n * Generates a unique key for a source based on its icon type.\n *\n * Used to deduplicate sources with identical icons when displaying stacked icons.\n *\n * @param source - The source info object\n * @returns A unique string key based on:\n *   - Custom icon name if `source.icon` exists\n *   - Hostname from URL for web sources\n *   - Source type string for other sources\n */\nconst getIconKey = (source: SourceInfo): string => {\n  if (source.icon) return source.icon.name || \"custom\";\n  if (source.sourceType === ValidSources.Web && source.sourceUrl) {\n    try {\n      return new URL(source.sourceUrl).hostname;\n    } catch {\n      return source.sourceUrl;\n    }\n  }\n  return source.sourceType;\n};\n\n/**\n * Renders the appropriate icon for a source based on its type and properties.\n *\n * Icon selection priority:\n * 1. Custom icon component (`source.icon`) - rendered directly\n * 2. Web source with URL - renders favicon via `WebResultIcon`\n * 3. Default - renders standard `SourceIcon` for the source type\n *\n * @param props.source - The source info containing icon, sourceType, and optional sourceUrl\n */\nconst SourceIconRenderer = ({ source }: { source: SourceInfo }) => {\n  if (source.icon) {\n    return <source.icon size={12} />;\n  }\n  if (source.sourceType === ValidSources.Web && source.sourceUrl) {\n    return <WebResultIcon url={source.sourceUrl} size={12} />;\n  }\n  return <SourceIcon sourceType={source.sourceType} iconSize={12} />;\n};\n\n/**\n * Props for the IconStack sub-component.\n */\ninterface IconStackProps {\n  sources: SourceInfo[];\n  isQuery?: boolean;\n  isOpen: boolean;\n  showDetailsCard: boolean;\n}\n\n/**\n * Renders a horizontal stack of up to 3 source icons with overlapping layout.\n *\n * Icons are displayed with negative spacing to create a stacked/overlapping effect.\n * Each icon has a border that changes color based on open/hover states.\n *\n * @param props.sources - Array of sources to display (max 3 shown)\n * @param props.isQuery - When true, removes icon background\n * @param props.isOpen - Whether the details card is currently open\n * @param props.showDetailsCard - Whether hover interactions are enabled\n */\nconst IconStack = ({\n  sources,\n  isQuery,\n  isOpen,\n  showDetailsCard,\n}: IconStackProps) => (\n  <div className=\"flex items-center -space-x-1.5\">\n    {sources.slice(0, 3).map((source, index) => (\n      <div\n        key={source.id ?? `source-${index}`}\n        className={cn(\n          \"relative flex items-center justify-center p-0.5 rounded-04\",\n          !isQuery && \"bg-background-tint-00\",\n          \"border transition-colors duration-150\",\n          isOpen\n            ? \"border-background-tint-inverted-03\"\n            : \"border-background-tint-02\",\n          !showDetailsCard &&\n            !isQuery &&\n            \"group-hover:border-background-tint-inverted-03\"\n        )}\n        style={{ zIndex: sources.length - index }}\n      >\n        <SourceIconRenderer source={source} />\n      </div>\n    ))}\n  </div>\n);\n\n/**\n * Shared text styling props passed to Text and Truncated components.\n * Computed based on `inlineCitation` and `isOpen` state.\n */\ninterface TextStyleProps {\n  figureSmallValue?: boolean;\n  secondaryBody?: boolean;\n  text05?: boolean;\n  text03?: boolean;\n  text04?: boolean;\n  inverted?: boolean;\n}\n\n/**\n * Props for the QueryText sub-component.\n */\ninterface QueryTextProps {\n  expanded: boolean;\n  displayName: string;\n  tooltipText?: string;\n  isTruncated: boolean;\n  textRef: React.RefObject<HTMLSpanElement | null>;\n  textStyleProps: TextStyleProps;\n}\n\n/**\n * Renders query text with two display modes based on expansion state.\n *\n * **Collapsed mode** (default):\n * - Text is truncated at 10rem with CSS overflow\n * - Shows tooltip with full text when truncated\n * - Clicking expands to full width\n *\n * **Expanded mode**:\n * - Text displays at full width using `Truncated` component\n * - Provides its own overflow handling with tooltip\n *\n * @param props.expanded - Whether text is in expanded (full-width) mode\n * @param props.displayName - The text content to display\n * @param props.tooltipText - Custom tooltip text (defaults to displayName)\n * @param props.isTruncated - Whether the collapsed text is currently truncated\n * @param props.textRef - Ref for measuring text truncation in collapsed mode\n * @param props.textStyleProps - Shared text styling props (colors, typography)\n */\nconst QueryText = ({\n  expanded,\n  displayName,\n  tooltipText,\n  isTruncated,\n  textRef,\n  textStyleProps,\n}: QueryTextProps) => {\n  if (expanded) {\n    return (\n      <Truncated\n        {...textStyleProps}\n        className=\"max-w-full transition-colors duration-150\"\n      >\n        {displayName}\n      </Truncated>\n    );\n  }\n\n  return (\n    <TooltipProvider delayDuration={300}>\n      <Tooltip>\n        <TooltipTrigger asChild>\n          <span ref={textRef} className=\"max-w-[10rem] truncate block\">\n            <Text\n              as=\"span\"\n              {...textStyleProps}\n              className=\"transition-colors duration-150\"\n            >\n              {displayName}\n            </Text>\n          </span>\n        </TooltipTrigger>\n        {isTruncated && (\n          <TooltipContent\n            side=\"top\"\n            className=\"max-w-[400px] break-words whitespace-normal\"\n          >\n            <Text as=\"p\" textLight05>\n              {tooltipText ?? displayName}\n            </Text>\n          </TooltipContent>\n        )}\n      </Tooltip>\n    </TooltipProvider>\n  );\n};\n\n/**\n * Props for the SourceTag component.\n */\nexport interface SourceTagProps {\n  /** Sizing variant: \"inlineCitation\" for compact in-text use, \"button\" for interactive contexts, \"tag\" (default) for standard display */\n  variant?: \"inlineCitation\" | \"tag\" | \"button\";\n\n  /** Display name shown on the tag (e.g., \"Google Drive\", \"Business Insider\") */\n  displayName: string;\n\n  /** URL to display below name (for site type - shows domain) */\n  displayUrl?: string;\n\n  /** Array of sources for navigation in details card */\n  sources: SourceInfo[];\n\n  /** Callback when a source is clicked in the details card */\n  onSourceClick?: () => void;\n\n  /** Whether to show the details card on hover (defaults to true) */\n  showDetailsCard?: boolean;\n\n  /** Additional CSS classes */\n  className?: string;\n\n  /** When true, removes icon background and wraps displayName with Truncated */\n  isQuery?: boolean;\n\n  /** When true, hides icon, removes background, shows bg-background-tint-02 on hover */\n  isMore?: boolean;\n\n  /** When true, no details card, no background, tint-02 on hover */\n  toggleSource?: boolean;\n\n  /** Tooltip text shown when query is truncated (defaults to displayName) */\n  tooltipText?: string;\n}\n\n/**\n * A tag component for displaying source citations with multiple display modes.\n *\n * ## Display Modes\n *\n * **Standard Tag** (default):\n * - Shows stacked source icons + display name\n * - Hovering opens a details card with source navigation\n *\n * **Inline Citation** (`variant=\"inlineCitation\"`):\n * - Compact size for use within text content\n * - Shows \"+N\" count for multiple sources\n *\n * **Query Mode** (`isQuery`):\n * - No icon background, text-only appearance\n * - Truncated text expands on click\n * - Shows tooltip when truncated\n *\n * **More Mode** (`isMore`):\n * - Hides icons, shows only text\n * - No default background, shows tint on hover\n *\n * **Toggle Source** (`toggleSource`):\n * - No details card on hover\n * - No default background, shows tint on hover\n *\n * @example\n * ```tsx\n * // Standard tag with details card\n * <SourceTag\n *   displayName=\"Google Drive\"\n *   sources={[{ sourceType: ValidSources.GoogleDrive, ... }]}\n * />\n *\n * // Inline citation within text\n * <SourceTag\n *   variant=\"inlineCitation\"\n *   displayName=\"Source 1\"\n *   sources={multipleSources}\n * />\n *\n * // Query mode for search queries\n * <SourceTag\n *   isQuery\n *   displayName=\"What is the meaning of life?\"\n *   sources={[]}\n * />\n * ```\n */\nconst SourceTagInner = ({\n  variant = \"tag\",\n  displayName,\n  displayUrl,\n  sources,\n  onSourceClick,\n  showDetailsCard = true,\n  className,\n  isQuery,\n  isMore,\n  toggleSource,\n  tooltipText,\n}: SourceTagProps) => {\n  const inlineCitation = variant === \"inlineCitation\";\n\n  const [currentIndex, setCurrentIndex] = useState(0);\n  const [isOpen, setIsOpen] = useState(false);\n  const [expanded, setExpanded] = useState(false);\n  const { isTruncated, textRef } = useIsTruncated(displayName);\n\n  const uniqueSources = useMemo(\n    () =>\n      sources.filter(\n        (source, index, arr) =>\n          arr.findIndex((s) => getIconKey(s) === getIconKey(source)) === index\n      ),\n    [sources]\n  );\n\n  const extraCount = sources.length - 1;\n\n  const size = variant;\n  const styles = sizeClasses[size];\n\n  // Shared text styling props\n  const textStyleProps = useMemo<TextStyleProps>(\n    () => ({\n      figureSmallValue: inlineCitation,\n      secondaryBody: !inlineCitation,\n      text05: isOpen,\n      text03: !isOpen && inlineCitation,\n      text04: !isOpen && !inlineCitation,\n      inverted: isOpen,\n    }),\n    [inlineCitation, isOpen]\n  );\n\n  // Cursor class based on mode and state\n  const cursorClass = useMemo(() => {\n    if (!isQuery) return \"cursor-pointer\";\n    if (!isTruncated || expanded) return \"cursor-default\";\n    return \"cursor-pointer\";\n  }, [isQuery, isTruncated, expanded]);\n\n  // Background class based on mode and state\n  const backgroundClass = useMemo(() => {\n    if (isOpen) return \"bg-background-tint-inverted-03\";\n    if (isMore || toggleSource) return \"hover:bg-background-tint-02\";\n    if (!showDetailsCard && !isQuery)\n      return \"bg-background-tint-02 hover:bg-background-tint-inverted-03\";\n    return \"bg-background-tint-02\";\n  }, [isOpen, isMore, toggleSource, showDetailsCard, isQuery]);\n\n  const handlePrev = useCallback(() => {\n    setCurrentIndex((prev) => Math.max(0, prev - 1));\n  }, []);\n\n  const handleNext = useCallback(() => {\n    setCurrentIndex((prev) => Math.min(sources.length - 1, prev + 1));\n  }, [sources.length]);\n\n  // Reset to first source when tooltip closes\n  const handleOpenChange = useCallback((open: boolean) => {\n    setIsOpen(open);\n    if (!open) {\n      setCurrentIndex(0);\n    }\n  }, []);\n\n  const handleClick = useCallback(() => {\n    // Only expand if truncated\n    if (isQuery && !expanded && isTruncated) {\n      setExpanded(true);\n    }\n    onSourceClick?.();\n  }, [isQuery, expanded, isTruncated, onSourceClick]);\n\n  const buttonContent = (\n    <button\n      type=\"button\"\n      className={cn(\n        \"group inline-flex items-center transition-all duration-150\",\n        \"appearance-none border-none\",\n        backgroundClass,\n        styles.container,\n        isQuery && \"gap-0\",\n        isQuery && expanded && \"w-fit\",\n        cursorClass,\n        className\n      )}\n      onClick={handleClick}\n    >\n      {/* Stacked icons container - only for tag variant */}\n      {!inlineCitation && !isMore && (\n        <IconStack\n          sources={uniqueSources}\n          isQuery={isQuery}\n          isOpen={isOpen}\n          showDetailsCard={showDetailsCard}\n        />\n      )}\n\n      <div\n        className={cn(\n          \"flex items-baseline\",\n          !inlineCitation && \"pr-0.5\",\n          isQuery && expanded && \"w-fit\"\n        )}\n      >\n        {isQuery ? (\n          <QueryText\n            expanded={expanded}\n            displayName={displayName}\n            tooltipText={tooltipText}\n            isTruncated={isTruncated}\n            textRef={textRef}\n            textStyleProps={textStyleProps}\n          />\n        ) : (\n          <Text\n            {...textStyleProps}\n            className={cn(\n              \"max-w-[10rem] truncate transition-colors duration-150\",\n              !showDetailsCard &&\n                !isQuery &&\n                \"group-hover:text-text-inverted-05\"\n            )}\n          >\n            {displayName}\n          </Text>\n        )}\n\n        {/* Count - for inline citation */}\n        {inlineCitation && sources.length > 1 && (\n          <Text\n            figureSmallValue\n            text05={isOpen}\n            text03={!isOpen}\n            inverted={isOpen}\n            className={cn(\n              \"transition-colors duration-150\",\n              !showDetailsCard &&\n                !isQuery &&\n                \"group-hover:text-text-inverted-05\"\n            )}\n          >\n            +{extraCount}\n          </Text>\n        )}\n\n        {/* URL - for tag variant */}\n        {!inlineCitation && displayUrl && (\n          <Text\n            figureSmallValue\n            text05={isOpen}\n            text02={!isOpen}\n            inverted={isOpen}\n            className={cn(\n              \"max-w-[10rem] truncate transition-colors duration-150\",\n              !showDetailsCard &&\n                !isQuery &&\n                \"group-hover:text-text-inverted-05\"\n            )}\n          >\n            {displayUrl}\n          </Text>\n        )}\n      </div>\n    </button>\n  );\n\n  if (!showDetailsCard || toggleSource) {\n    return buttonContent;\n  }\n\n  return (\n    <TooltipProvider delayDuration={50}>\n      <Tooltip open={isOpen} onOpenChange={handleOpenChange}>\n        <TooltipTrigger asChild>{buttonContent}</TooltipTrigger>\n        <TooltipContent\n          side=\"bottom\"\n          align=\"start\"\n          sideOffset={4}\n          className=\"bg-transparent p-0 shadow-none border-none\"\n        >\n          <SourceTagDetailsCard\n            sources={sources}\n            currentIndex={currentIndex}\n            onPrev={handlePrev}\n            onNext={handleNext}\n          />\n        </TooltipContent>\n      </Tooltip>\n    </TooltipProvider>\n  );\n};\n\n/**\n * Memoized SourceTag component for displaying source citations.\n *\n * @see {@link SourceTagInner} for full documentation and examples.\n */\nconst SourceTag = memo(SourceTagInner);\nexport default SourceTag;\n"
  },
  {
    "path": "web/src/refresh-components/buttons/source-tag/SourceTagDetailsCard.tsx",
    "content": "\"use client\";\n\nimport React, { memo } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgArrowLeft,\n  SvgArrowRight,\n  SvgUser,\n  SvgQuestionMarkSmall,\n} from \"@opal/icons\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { WebResultIcon } from \"@/components/WebResultIcon\";\nimport { ValidSources } from \"@/lib/types\";\nimport { timeAgo } from \"@/lib/time\";\nimport { IconProps } from \"@/components/icons/icons\";\nimport { SubQuestionDetail } from \"@/app/app/interfaces\";\n\nexport interface SourceInfo {\n  id: string;\n  title: string;\n  sourceType: ValidSources;\n  sourceUrl?: string;\n  description?: string;\n  metadata?: {\n    author?: string;\n    date?: string | Date;\n    tags?: string[];\n  };\n  icon?: React.FunctionComponent<IconProps>;\n  // Support for questions\n  isQuestion?: boolean;\n  questionData?: SubQuestionDetail;\n}\n\ninterface SourceTagDetailsCardProps {\n  sources: SourceInfo[];\n  currentIndex: number;\n  onPrev: () => void;\n  onNext: () => void;\n}\n\ninterface MetadataChipProps {\n  icon?: React.FunctionComponent<IconProps>;\n  text: string;\n}\n\nconst MetadataChip = memo(function MetadataChip({\n  icon: Icon,\n  text,\n}: MetadataChipProps) {\n  return (\n    <div className=\"flex items-center gap-0 bg-background-tint-02 rounded-08 p-1\">\n      {Icon && (\n        <div className=\"flex items-center justify-center p-0.5 w-4 h-4\">\n          <Icon className=\"w-3 h-3 stroke-text-03\" />\n        </div>\n      )}\n\n      <Text secondaryBody text03 className=\"px-0.5 max-w-[10rem] truncate\">\n        {text}\n      </Text>\n    </div>\n  );\n});\n\nconst SourceTagDetailsCardInner = ({\n  sources,\n  currentIndex,\n  onPrev,\n  onNext,\n}: SourceTagDetailsCardProps) => {\n  const currentSource = sources[currentIndex];\n  if (!currentSource) return null;\n\n  const showNavigation = sources.length > 1;\n  const isFirst = currentIndex === 0;\n  const isLast = currentIndex === sources.length - 1;\n  const isWebSource = currentSource.sourceType === \"web\";\n  const isQuestion = currentSource.isQuestion;\n  const relativeDate = timeAgo(\n    currentSource.metadata?.date instanceof Date\n      ? currentSource.metadata.date.toISOString()\n      : currentSource.metadata?.date\n  );\n\n  return (\n    <div className=\"w-[17.5rem] bg-background-neutral-00 border border-border-01 rounded-12 shadow-01 overflow-hidden\">\n      {/* Navigation header - only shown for multiple sources */}\n      {showNavigation && (\n        <div className=\"flex items-center justify-between p-2 bg-background-tint-01 border-b border-border-01\">\n          <div className=\"flex items-center gap-1\">\n            <Button\n              disabled={isFirst}\n              prominence=\"internal\"\n              icon={SvgArrowLeft}\n              onClick={onPrev}\n              size=\"sm\"\n            />\n            <Button\n              disabled={isLast}\n              prominence=\"internal\"\n              icon={SvgArrowRight}\n              onClick={onNext}\n              size=\"sm\"\n            />\n          </div>\n          <Text secondaryBody text03 className=\"px-1\">\n            {currentIndex + 1}/{sources.length}\n          </Text>\n        </div>\n      )}\n\n      <div className=\"p-1 flex flex-col gap-1\">\n        {/* Header with icon and title */}\n        <div className=\"flex items-start gap-1 p-0.5 min-h-[1.75rem] w-full text-left hover:bg-background-tint-01 rounded-08 transition-colors\">\n          <div className=\"flex items-center justify-center p-0.5 shrink-0 w-5 h-5\">\n            {isQuestion ? (\n              <SvgQuestionMarkSmall size={16} className=\"text-text-03\" />\n            ) : currentSource.icon ? (\n              <currentSource.icon size={16} />\n            ) : isWebSource && currentSource.sourceUrl ? (\n              <WebResultIcon url={currentSource.sourceUrl} size={16} />\n            ) : (\n              <SourceIcon\n                sourceType={\n                  currentSource.sourceType === \"web\"\n                    ? ValidSources.Web\n                    : currentSource.sourceType\n                }\n                iconSize={16}\n              />\n            )}\n          </div>\n          <div className=\"flex-1 min-w-0 px-0.5\">\n            <Text\n              mainUiAction\n              text04\n              className=\"truncate w-full block leading-5\"\n            >\n              {currentSource.title}\n            </Text>\n          </div>\n        </div>\n\n        {/* Metadata row */}\n        {(currentSource.metadata?.author ||\n          currentSource.metadata?.tags?.length ||\n          relativeDate) && (\n          <div className=\"flex flex-row items-center gap-2 \">\n            <div className=\"flex flex-wrap gap-1 items-center\">\n              {currentSource.metadata?.author && (\n                <MetadataChip\n                  icon={SvgUser}\n                  text={currentSource.metadata.author}\n                />\n              )}\n              {currentSource.metadata?.tags\n                ?.slice(0, 2)\n                .map((tag) => <MetadataChip key={tag} text={tag} />)}\n              {relativeDate && (\n                <Text secondaryBody text02>\n                  {relativeDate}\n                </Text>\n              )}\n            </div>\n          </div>\n        )}\n\n        {/* Description */}\n        {currentSource.description && (\n          <div className=\"px-1.5 pb-1\">\n            <Text secondaryBody text03 as=\"span\" className=\"line-clamp-4\">\n              {currentSource.description}\n            </Text>\n          </div>\n        )}\n      </div>\n    </div>\n  );\n};\n\nconst SourceTagDetailsCard = memo(SourceTagDetailsCardInner);\nexport default SourceTagDetailsCard;\n"
  },
  {
    "path": "web/src/refresh-components/buttons/source-tag/index.ts",
    "content": "export {\n  default as SourceTag,\n  type SourceTagProps,\n  type SourceInfo,\n} from \"./SourceTag\";\nexport { default as SourceTagDetailsCard } from \"./SourceTagDetailsCard\";\n"
  },
  {
    "path": "web/src/refresh-components/buttons/source-tag/sourceTagUtils.ts",
    "content": "import { OnyxDocument } from \"@/lib/search/interfaces\";\nimport { SubQuestionDetail } from \"@/app/app/interfaces\";\nimport { StreamingCitation } from \"@/app/app/services/streamingModels\";\nimport { ValidSources } from \"@/lib/types\";\nimport { getSourceDisplayName } from \"@/lib/sources\";\nimport { SourceInfo } from \"./SourceTagDetailsCard\";\n\nconst MAX_TITLE_LENGTH = 40;\n\nfunction truncateText(str: string, maxLength: number): string {\n  if (str.length <= maxLength) return str;\n  return str.slice(0, maxLength) + \"...\";\n}\n\n/**\n * Convert an OnyxDocument to a SourceInfo object for use with SourceTag\n */\nexport function documentToSourceInfo(doc: OnyxDocument): SourceInfo {\n  const sourceType = doc.source_type as ValidSources;\n\n  return {\n    id: doc.document_id,\n    title: doc.semantic_identifier || \"Unknown\",\n    sourceType,\n    sourceUrl: doc.link,\n    description: doc.blurb,\n    metadata: doc.updated_at\n      ? {\n          date: doc.updated_at,\n        }\n      : undefined,\n  };\n}\n\n/**\n * Convert a SubQuestionDetail to a SourceInfo object for use with SourceTag\n */\nexport function questionToSourceInfo(\n  question: SubQuestionDetail,\n  index: number\n): SourceInfo {\n  return {\n    id: `question-${question.level}-${question.level_question_num}`,\n    title: truncateText(question.question, MAX_TITLE_LENGTH),\n    sourceType: ValidSources.NotApplicable,\n    description: question.answer,\n    isQuestion: true,\n    questionData: question,\n  };\n}\n\n/**\n * Convert an array of citations and document map to SourceInfo array\n * Used for end-of-message Sources tag\n */\nexport function citationsToSourceInfoArray(\n  citations: StreamingCitation[],\n  documentMap: Map<string, OnyxDocument>\n): SourceInfo[] {\n  const sources: SourceInfo[] = [];\n  const seenDocIds = new Set<string>();\n\n  for (const citation of citations) {\n    if (seenDocIds.has(citation.document_id)) continue;\n\n    const doc = documentMap.get(citation.document_id);\n    if (doc) {\n      seenDocIds.add(citation.document_id);\n      sources.push(documentToSourceInfo(doc));\n    }\n  }\n\n  // Fallback: if no citations but we have documents, use first few documents\n  if (sources.length === 0 && documentMap.size > 0) {\n    const entries = Array.from(documentMap.entries());\n    for (const [, doc] of entries) {\n      sources.push(documentToSourceInfo(doc));\n      if (sources.length >= 3) break;\n    }\n  }\n\n  return sources;\n}\n\n/**\n * Get a display name for a source, used for inline citations\n */\nexport function getDisplayNameForSource(doc: OnyxDocument): string {\n  const sourceType = doc.source_type as ValidSources;\n\n  if (sourceType === ValidSources.Web || doc.is_internet) {\n    return truncateText(doc.semantic_identifier || \"\", MAX_TITLE_LENGTH);\n  }\n\n  return (\n    getSourceDisplayName(sourceType) ||\n    truncateText(doc.semantic_identifier || \"\", MAX_TITLE_LENGTH)\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/cards/Card.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Card from \"./Card\";\nimport Text from \"@/refresh-components/texts/Text\";\n\nconst meta: Meta<typeof Card> = {\n  title: \"refresh-components/cards/Card\",\n  component: Card,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Card>;\n\nexport const Primary: Story = {\n  args: {\n    variant: \"primary\",\n    children: (\n      <>\n        <Text as=\"p\" mainUiAction text05>\n          Card Title\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          This is a primary card with some content inside.\n        </Text>\n      </>\n    ),\n  },\n};\n\nexport const Secondary: Story = {\n  args: {\n    variant: \"secondary\",\n    children: (\n      <>\n        <Text as=\"p\" mainUiAction text05>\n          Secondary Card\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Less prominent content or nested cards.\n        </Text>\n      </>\n    ),\n  },\n};\n\nexport const Tertiary: Story = {\n  args: {\n    variant: \"tertiary\",\n    children: (\n      <Text as=\"p\" secondaryBody text03>\n        Dashed border for placeholder or empty states.\n      </Text>\n    ),\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    variant: \"disabled\",\n    children: (\n      <>\n        <Text as=\"p\" mainUiAction text05>\n          Disabled Card\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          This content is unavailable.\n        </Text>\n      </>\n    ),\n  },\n};\n\nexport const Borderless: Story = {\n  args: {\n    variant: \"borderless\",\n    children: (\n      <>\n        <Text as=\"p\" mainUiAction text05>\n          Borderless Card\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          No border, solid background.\n        </Text>\n      </>\n    ),\n  },\n};\n\nexport const AllVariants: Story = {\n  render: () => (\n    <div\n      style={{\n        display: \"flex\",\n        flexDirection: \"column\",\n        gap: 16,\n        maxWidth: 400,\n      }}\n    >\n      <Card variant=\"primary\">\n        <Text as=\"p\" mainUiAction text05>\n          Primary\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Default card style\n        </Text>\n      </Card>\n      <Card variant=\"secondary\">\n        <Text as=\"p\" mainUiAction text05>\n          Secondary\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Transparent background\n        </Text>\n      </Card>\n      <Card variant=\"tertiary\">\n        <Text as=\"p\" mainUiAction text05>\n          Tertiary\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Dashed border\n        </Text>\n      </Card>\n      <Card variant=\"disabled\">\n        <Text as=\"p\" mainUiAction text05>\n          Disabled\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          Dimmed / unavailable\n        </Text>\n      </Card>\n      <Card variant=\"borderless\">\n        <Text as=\"p\" mainUiAction text05>\n          Borderless\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          No border\n        </Text>\n      </Card>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/cards/Card.tsx",
    "content": "/**\n * Card - A styled container component\n *\n * Provides a consistent card-style container with background, padding, border, and rounded corners.\n * Uses a vertical flex layout with automatic gap spacing between children.\n *\n * Features:\n * - Padding: 1rem by default (configurable)\n * - Flex column layout with 1rem gap\n * - Rounded-16 corners\n * - Accepts optional className for custom styling\n * - Accepts all standard div HTML attributes except style (enforced by WithoutStyles)\n *\n * Variants:\n * - `primary`: Solid background with border. The default, most prominent card style.\n * - `secondary`: Transparent background with border. Use for less prominent content or nested cards.\n * - `tertiary`: Transparent background with dashed border. Use for placeholder or empty states.\n * - `disabled`: Dimmed primary style with reduced opacity. Indicates unavailable or locked content.\n * - `borderless`: Solid background without border. Use when cards are visually grouped or in tight layouts.\n *\n * @example\n * ```tsx\n * import { Card } from \"@/refresh-components/cards\";\n *\n * // Basic usage (primary variant)\n * <Card>\n *   <h2>Card Title</h2>\n *   <p>Card content goes here</p>\n * </Card>\n *\n * // Secondary variant for nested content\n * <Card variant=\"secondary\">\n *   <div>Less prominent content</div>\n * </Card>\n *\n * // Tertiary variant for empty states\n * <Card variant=\"tertiary\">\n *   <div>No items yet</div>\n * </Card>\n * ```\n */\n\nimport { Section, SectionProps } from \"@/layouts/general-layouts\";\nimport { cn } from \"@/lib/utils\";\n\ntype CardVariant =\n  // The main card variant.\n  | \"primary\"\n  // A background-colorless card variant.\n  | \"secondary\"\n  // A background-colorless card variant with a dashed border.\n  | \"tertiary\"\n  // A dimmed version of the primary variant (indicates that this card is unavailable).\n  | \"disabled\"\n  // A borderless version of the primary variant.\n  | \"borderless\";\n\nexport interface CardProps extends SectionProps {\n  // variants\n  variant?: CardVariant;\n  // Optional className to apply custom styles\n  className?: string;\n\n  ref?: React.Ref<HTMLDivElement>;\n}\n\nexport default function Card({\n  variant = \"primary\",\n  padding = 1,\n  className,\n  ref,\n  ...props\n}: CardProps) {\n  const dataProps: Record<string, unknown> = {};\n  const sectionProps: Record<string, unknown> = {};\n  for (const [key, value] of Object.entries(props)) {\n    if (key.startsWith(\"data-\")) {\n      dataProps[key] = value;\n    } else {\n      sectionProps[key] = value;\n    }\n  }\n\n  return (\n    <div\n      ref={ref}\n      className={cn(\"card\", className)}\n      data-variant={variant}\n      {...dataProps}\n    >\n      <Section\n        alignItems=\"start\"\n        padding={padding}\n        height=\"fit\"\n        {...sectionProps}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/cards/index.ts",
    "content": "export { default as Card } from \"./Card\";\nexport type { CardProps } from \"./Card\";\n"
  },
  {
    "path": "web/src/refresh-components/commandmenu/CommandMenu.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { useState } from \"react\";\nimport CommandMenu from \"./CommandMenu\";\nimport {\n  SvgFileText,\n  SvgUsers,\n  SvgSettings,\n  SvgPlus,\n  SvgSearch,\n  SvgArrowRight,\n} from \"@opal/icons\";\n\nconst meta: Meta<typeof CommandMenu> = {\n  title: \"refresh-components/modals/CommandMenu\",\n  component: CommandMenu,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"fullscreen\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof CommandMenu>;\n\nexport const Default: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    const [search, setSearch] = useState(\"\");\n\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Command Menu</button>\n        <CommandMenu open={open} onOpenChange={setOpen}>\n          <CommandMenu.Content>\n            <CommandMenu.Header\n              placeholder=\"Type a command or search...\"\n              value={search}\n              onValueChange={setSearch}\n              onClose={() => setOpen(false)}\n            />\n            <CommandMenu.List emptyMessage=\"No results found.\">\n              <CommandMenu.Item\n                value=\"documents\"\n                icon={SvgFileText}\n                onSelect={() => alert(\"Documents\")}\n              >\n                Search Documents\n              </CommandMenu.Item>\n              <CommandMenu.Item\n                value=\"people\"\n                icon={SvgUsers}\n                onSelect={() => alert(\"People\")}\n              >\n                Find People\n              </CommandMenu.Item>\n              <CommandMenu.Item\n                value=\"settings\"\n                icon={SvgSettings}\n                onSelect={() => alert(\"Settings\")}\n              >\n                Open Settings\n              </CommandMenu.Item>\n              <CommandMenu.Action\n                value=\"new-chat\"\n                icon={SvgPlus}\n                shortcut=\"⌘N\"\n                onSelect={() => alert(\"New chat\")}\n              >\n                New Chat\n              </CommandMenu.Action>\n            </CommandMenu.List>\n            <CommandMenu.Footer\n              leftActions={\n                <>\n                  <CommandMenu.FooterAction\n                    icon={SvgArrowRight}\n                    label=\"Select\"\n                  />\n                  <CommandMenu.FooterAction icon={SvgSearch} label=\"Search\" />\n                </>\n              }\n            />\n          </CommandMenu.Content>\n        </CommandMenu>\n      </>\n    );\n  },\n};\n\nexport const WithFilters: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    const [search, setSearch] = useState(\"\");\n\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Command Menu</button>\n        <CommandMenu open={open} onOpenChange={setOpen}>\n          <CommandMenu.Content>\n            <CommandMenu.Header\n              placeholder=\"Search within filter...\"\n              value={search}\n              onValueChange={setSearch}\n              onClose={() => setOpen(false)}\n              filters={[{ id: \"docs\", label: \"Documents\", icon: SvgFileText }]}\n              onFilterRemove={(id) => alert(`Remove filter: ${id}`)}\n            />\n            <CommandMenu.List>\n              <CommandMenu.Filter\n                value=\"filter-docs\"\n                icon={SvgFileText}\n                isApplied\n              >\n                Documents\n              </CommandMenu.Filter>\n              <CommandMenu.Item value=\"doc-1\" onSelect={() => {}}>\n                Q3 Financial Report\n              </CommandMenu.Item>\n              <CommandMenu.Item value=\"doc-2\" onSelect={() => {}}>\n                Engineering Roadmap 2025\n              </CommandMenu.Item>\n              <CommandMenu.Item value=\"doc-3\" onSelect={() => {}}>\n                Onboarding Guide\n              </CommandMenu.Item>\n            </CommandMenu.List>\n          </CommandMenu.Content>\n        </CommandMenu>\n      </>\n    );\n  },\n};\n\nexport const EmptyState: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Command Menu</button>\n        <CommandMenu open={open} onOpenChange={setOpen}>\n          <CommandMenu.Content>\n            <CommandMenu.Header\n              placeholder=\"Search...\"\n              onClose={() => setOpen(false)}\n            />\n            <CommandMenu.List emptyMessage=\"No commands match your search.\">\n              <div />\n            </CommandMenu.List>\n          </CommandMenu.Content>\n        </CommandMenu>\n      </>\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/commandmenu/CommandMenu.test.tsx",
    "content": "import React, { useState } from \"react\";\nimport { render, screen, fireEvent, waitFor } from \"@testing-library/react\";\nimport \"@testing-library/jest-dom\";\nimport userEvent from \"@testing-library/user-event\";\nimport CommandMenu, {\n  useCommandMenuContext,\n} from \"@/refresh-components/commandmenu/CommandMenu\";\n\n// Mock Radix Dialog portal to render inline for testing\njest.mock(\"@radix-ui/react-dialog\", () => {\n  const actual = jest.requireActual(\"@radix-ui/react-dialog\");\n  return {\n    ...actual,\n    Portal: ({ children }: { children: React.ReactNode }) => <>{children}</>,\n  };\n});\n\n// Mock scrollIntoView which is not available in jsdom\nElement.prototype.scrollIntoView = jest.fn();\n\n// Mock requestAnimationFrame for highlight updates\nconst originalRAF = global.requestAnimationFrame;\nbeforeAll(() => {\n  global.requestAnimationFrame = (cb: FrameRequestCallback) => {\n    cb(0);\n    return 0;\n  };\n});\nafterAll(() => {\n  global.requestAnimationFrame = originalRAF;\n});\n\nfunction setupUser() {\n  return userEvent.setup({ delay: null });\n}\n\n/**\n * Test wrapper for CommandMenu compound component\n */\nfunction TestCommandMenu({\n  open = true,\n  onOpenChange = jest.fn(),\n  includeFilter = false,\n  defaultHighlightAction = true,\n}: {\n  open?: boolean;\n  onOpenChange?: (open: boolean) => void;\n  includeFilter?: boolean;\n  defaultHighlightAction?: boolean;\n}) {\n  const [selected, setSelected] = useState<string | null>(null);\n\n  return (\n    <CommandMenu open={open} onOpenChange={onOpenChange}>\n      <CommandMenu.Content>\n        <CommandMenu.Header placeholder=\"Search...\" />\n        <CommandMenu.List>\n          <CommandMenu.Action\n            value=\"action-1\"\n            onSelect={() => setSelected(\"action-1\")}\n            defaultHighlight={defaultHighlightAction}\n          >\n            Action 1\n          </CommandMenu.Action>\n          {includeFilter && (\n            <CommandMenu.Filter value=\"filter-1\" onSelect={() => {}}>\n              Filter 1\n            </CommandMenu.Filter>\n          )}\n          <CommandMenu.Item\n            value=\"item-1\"\n            onSelect={() => setSelected(\"item-1\")}\n          >\n            Item 1\n          </CommandMenu.Item>\n          <CommandMenu.Item\n            value=\"item-2\"\n            onSelect={() => setSelected(\"item-2\")}\n          >\n            Item 2\n          </CommandMenu.Item>\n        </CommandMenu.List>\n        <CommandMenu.Footer\n          leftActions={\n            <CommandMenu.FooterAction\n              icon={() => <span>Icon</span>}\n              label=\"Select\"\n            />\n          }\n        />\n      </CommandMenu.Content>\n      {selected && <div data-testid=\"selected\">{selected}</div>}\n    </CommandMenu>\n  );\n}\n\n/**\n * Minimal test wrapper for context hook testing\n */\nfunction ContextTestComponent() {\n  const context = useCommandMenuContext();\n  return (\n    <div>\n      <div data-testid=\"highlighted-value\">\n        {context.highlightedValue ?? \"none\"}\n      </div>\n      <div data-testid=\"highlighted-type\">\n        {context.highlightedItemType ?? \"none\"}\n      </div>\n      <div data-testid=\"is-keyboard-nav\">\n        {context.isKeyboardNav ? \"true\" : \"false\"}\n      </div>\n    </div>\n  );\n}\n\nfunction TestCommandMenuWithContext({\n  open = true,\n  onOpenChange = jest.fn(),\n}: {\n  open?: boolean;\n  onOpenChange?: (open: boolean) => void;\n}) {\n  return (\n    <CommandMenu open={open} onOpenChange={onOpenChange}>\n      <CommandMenu.Content>\n        <CommandMenu.Header placeholder=\"Search...\" />\n        <CommandMenu.List>\n          <CommandMenu.Action value=\"action-1\" onSelect={() => {}}>\n            Action 1\n          </CommandMenu.Action>\n          <CommandMenu.Item value=\"item-1\" onSelect={() => {}}>\n            Item 1\n          </CommandMenu.Item>\n        </CommandMenu.List>\n        <ContextTestComponent />\n      </CommandMenu.Content>\n    </CommandMenu>\n  );\n}\n\ndescribe(\"CommandMenu\", () => {\n  describe(\"Rendering\", () => {\n    test(\"renders children when open\", () => {\n      render(<TestCommandMenu open={true} />);\n      expect(screen.getByPlaceholderText(\"Search...\")).toBeInTheDocument();\n      // Use getAllByText since Truncated component creates visible + hidden measurement elements\n      expect(screen.getAllByText(\"Action 1\").length).toBeGreaterThan(0);\n      expect(screen.getAllByText(\"Item 1\").length).toBeGreaterThan(0);\n      expect(screen.getAllByText(\"Item 2\").length).toBeGreaterThan(0);\n    });\n\n    test(\"does not render content when closed\", () => {\n      render(<TestCommandMenu open={false} />);\n      expect(\n        screen.queryByPlaceholderText(\"Search...\")\n      ).not.toBeInTheDocument();\n      expect(screen.queryByText(\"Action 1\")).not.toBeInTheDocument();\n    });\n\n    test(\"renders header with placeholder text\", () => {\n      render(<TestCommandMenu open={true} />);\n      const input = screen.getByPlaceholderText(\"Search...\");\n      expect(input).toBeInTheDocument();\n      expect(input).toHaveFocus();\n    });\n\n    test(\"renders filter items\", () => {\n      render(<TestCommandMenu open={true} includeFilter={true} />);\n      expect(screen.getByText(\"Filter 1\")).toBeInTheDocument();\n    });\n\n    test(\"renders action items\", () => {\n      render(<TestCommandMenu open={true} />);\n      // Use getAllByText since Truncated component creates visible + hidden measurement elements\n      expect(screen.getAllByText(\"Action 1\").length).toBeGreaterThan(0);\n      // Verify the item is registered\n      expect(\n        document.querySelector('[data-command-item=\"action-1\"]')\n      ).toBeInTheDocument();\n    });\n\n    test(\"renders regular items\", () => {\n      render(<TestCommandMenu open={true} />);\n      // Use getAllByText since Truncated component creates visible + hidden measurement elements\n      expect(screen.getAllByText(\"Item 1\").length).toBeGreaterThan(0);\n      expect(screen.getAllByText(\"Item 2\").length).toBeGreaterThan(0);\n      // Verify the items are registered\n      expect(\n        document.querySelector('[data-command-item=\"item-1\"]')\n      ).toBeInTheDocument();\n      expect(\n        document.querySelector('[data-command-item=\"item-2\"]')\n      ).toBeInTheDocument();\n    });\n\n    test(\"renders footer with actions\", () => {\n      render(<TestCommandMenu open={true} />);\n      expect(screen.getByText(\"Select\")).toBeInTheDocument();\n    });\n  });\n\n  describe(\"Keyboard Navigation\", () => {\n    test(\"ArrowDown highlights next item\", async () => {\n      const user = setupUser();\n      render(<TestCommandMenuWithContext open={true} />);\n\n      // Wait for initial highlight\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n\n      await user.keyboard(\"{ArrowDown}\");\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n    });\n\n    test(\"ArrowUp highlights previous item\", async () => {\n      const user = setupUser();\n      render(<TestCommandMenuWithContext open={true} />);\n\n      // Move down first\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n\n      await user.keyboard(\"{ArrowDown}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n\n      await user.keyboard(\"{ArrowUp}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n\n    test(\"ArrowDown wraps to first item at end\", async () => {\n      const user = setupUser();\n      render(<TestCommandMenuWithContext open={true} />);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n\n      // Move through all items\n      await user.keyboard(\"{ArrowDown}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n\n      // Should wrap back to action-1\n      await user.keyboard(\"{ArrowDown}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n\n    test(\"ArrowUp wraps to last item at start\", async () => {\n      const user = setupUser();\n      render(<TestCommandMenuWithContext open={true} />);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n\n      // Going up from first should wrap to last\n      await user.keyboard(\"{ArrowUp}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n    });\n\n    test(\"Enter selects highlighted item\", async () => {\n      const user = setupUser();\n      const onOpenChange = jest.fn();\n      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);\n\n      // Wait for initial highlight and then press Enter\n      await waitFor(() => {\n        const items = document.querySelectorAll(\"[data-command-item]\");\n        expect(items.length).toBeGreaterThan(0);\n      });\n\n      await user.keyboard(\"{ArrowDown}\"); // Move to item-1\n      await user.keyboard(\"{Enter}\");\n\n      // Menu should close after selecting a non-filter item\n      await waitFor(() => {\n        expect(onOpenChange).toHaveBeenCalledWith(false);\n      });\n    });\n\n    test(\"Escape closes menu\", async () => {\n      const user = setupUser();\n      const onOpenChange = jest.fn();\n      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);\n\n      await user.keyboard(\"{Escape}\");\n\n      expect(onOpenChange).toHaveBeenCalledWith(false);\n    });\n\n    test(\"Enter on filter does not close menu\", async () => {\n      const user = setupUser();\n      const onOpenChange = jest.fn();\n      render(\n        <TestCommandMenu\n          open={true}\n          onOpenChange={onOpenChange}\n          includeFilter={true}\n        />\n      );\n\n      // Navigate to filter\n      await waitFor(() => {\n        const items = document.querySelectorAll(\"[data-command-item]\");\n        expect(items.length).toBeGreaterThan(0);\n      });\n\n      await user.keyboard(\"{ArrowDown}\"); // Move to filter-1\n      await user.keyboard(\"{Enter}\");\n\n      // Menu should NOT close after selecting a filter\n      await waitFor(() => {\n        // Give it time to potentially call onOpenChange incorrectly\n        return new Promise((r) => setTimeout(r, 100));\n      });\n\n      // onOpenChange should not have been called with false for filter selection\n      const closeCalls = onOpenChange.mock.calls.filter(\n        (call) => call[0] === false\n      );\n      expect(closeCalls.length).toBe(0);\n    });\n  });\n\n  describe(\"Mouse Interaction\", () => {\n    test(\"Mouse hover highlights item\", async () => {\n      render(<TestCommandMenuWithContext open={true} />);\n\n      // Use data-command-item selector directly\n      const itemContainer = document.querySelector(\n        '[data-command-item=\"item-1\"]'\n      );\n      expect(itemContainer).toBeInTheDocument();\n\n      // The LineItem component has a button inside that handles click events\n      const button = itemContainer!.querySelector('[role=\"button\"]');\n      expect(button).toBeInTheDocument();\n      fireEvent.mouseEnter(button!);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n    });\n\n    test(\"Click selects item\", async () => {\n      const user = setupUser();\n      const onOpenChange = jest.fn();\n      render(<TestCommandMenu open={true} onOpenChange={onOpenChange} />);\n\n      // Use data-command-item selector to find the clickable item container\n      const itemContainer = document.querySelector(\n        '[data-command-item=\"item-1\"]'\n      );\n      expect(itemContainer).toBeInTheDocument();\n\n      // The LineItem component has a button inside that handles click events\n      const button = itemContainer!.querySelector('[role=\"button\"]');\n      expect(button).toBeInTheDocument();\n      await user.click(button!);\n\n      await waitFor(() => {\n        expect(onOpenChange).toHaveBeenCalledWith(false);\n      });\n    });\n\n    test(\"Click on filter does not close menu\", async () => {\n      const user = setupUser();\n      const onOpenChange = jest.fn();\n      render(\n        <TestCommandMenu\n          open={true}\n          onOpenChange={onOpenChange}\n          includeFilter={true}\n        />\n      );\n\n      // Use data-command-item selector directly\n      const filterContainer = document.querySelector(\n        '[data-command-item=\"filter-1\"]'\n      );\n      expect(filterContainer).toBeInTheDocument();\n      await user.click(filterContainer!);\n\n      // Give it time to potentially call onOpenChange incorrectly\n      await waitFor(() => {\n        return new Promise((r) => setTimeout(r, 100));\n      });\n\n      // onOpenChange should not have been called with false for filter click\n      const closeCalls = onOpenChange.mock.calls.filter(\n        (call) => call[0] === false\n      );\n      expect(closeCalls.length).toBe(0);\n    });\n  });\n\n  describe(\"Item Registration\", () => {\n    test(\"Items with defaultHighlight=false are skipped in initial highlight\", async () => {\n      render(<TestCommandMenuWithContext open={true} />);\n\n      // The first selectable item (action-1) should be highlighted initially\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n\n    test(\"First selectable item is highlighted on open\", async () => {\n      render(<TestCommandMenuWithContext open={true} />);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n\n    test(\"Non-default-highlight action is skipped for initial highlight\", async () => {\n      // Render with defaultHighlightAction=false, so action-1 should be skipped\n      render(<TestCommandMenu open={true} defaultHighlightAction={false} />);\n\n      // The item-1 should be highlighted instead (first item with defaultHighlight=true)\n      await waitFor(() => {\n        const highlightedItems = document.querySelectorAll(\n          '[aria-selected=\"true\"]'\n        );\n        expect(highlightedItems.length).toBeGreaterThan(0);\n        // Check that the highlighted item is item-1, not action-1\n        const highlightedValues = Array.from(highlightedItems).map((el) =>\n          el.getAttribute(\"data-command-item\")\n        );\n        expect(highlightedValues).toContain(\"item-1\");\n      });\n    });\n  });\n\n  describe(\"Context Hook\", () => {\n    test(\"useCommandMenuContext provides correct highlighted value\", async () => {\n      render(<TestCommandMenuWithContext open={true} />);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n\n    test(\"useCommandMenuContext provides correct highlighted item type\", async () => {\n      render(<TestCommandMenuWithContext open={true} />);\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-type\")).toHaveTextContent(\n          \"action\"\n        );\n      });\n\n      // Navigate to regular item\n      const user = setupUser();\n      await user.keyboard(\"{ArrowDown}\");\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-type\")).toHaveTextContent(\n          \"item\"\n        );\n      });\n    });\n\n    test(\"useCommandMenuContext throws when used outside CommandMenu\", () => {\n      // Suppress console.error for this test since we expect an error\n      const consoleSpy = jest\n        .spyOn(console, \"error\")\n        .mockImplementation(() => {});\n\n      expect(() => {\n        render(<ContextTestComponent />);\n      }).toThrow(\n        \"CommandMenu compound components must be used within CommandMenu\"\n      );\n\n      consoleSpy.mockRestore();\n    });\n\n    test(\"isKeyboardNav is true after keyboard navigation\", async () => {\n      const user = setupUser();\n      render(<TestCommandMenuWithContext open={true} />);\n\n      // Initially should not be keyboard nav\n      expect(screen.getByTestId(\"is-keyboard-nav\")).toHaveTextContent(\"false\");\n\n      await user.keyboard(\"{ArrowDown}\");\n\n      await waitFor(() => {\n        expect(screen.getByTestId(\"is-keyboard-nav\")).toHaveTextContent(\"true\");\n      });\n    });\n  });\n\n  describe(\"Menu State Reset\", () => {\n    test(\"highlight resets when menu closes and reopens\", async () => {\n      const { rerender } = render(<TestCommandMenuWithContext open={true} />);\n\n      // Wait for initial highlight\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n\n      // Navigate to item-1\n      const user = setupUser();\n      await user.keyboard(\"{ArrowDown}\");\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"item-1\"\n        );\n      });\n\n      // Close menu\n      rerender(<TestCommandMenuWithContext open={false} />);\n\n      // Reopen menu\n      rerender(<TestCommandMenuWithContext open={true} />);\n\n      // Should reset to first item\n      await waitFor(() => {\n        expect(screen.getByTestId(\"highlighted-value\")).toHaveTextContent(\n          \"action-1\"\n        );\n      });\n    });\n  });\n\n  describe(\"Header Input Behavior\", () => {\n    test(\"typing in input does not trigger keyboard navigation\", async () => {\n      const user = setupUser();\n      const onValueChange = jest.fn();\n\n      render(\n        <CommandMenu open={true} onOpenChange={() => {}}>\n          <CommandMenu.Content>\n            <CommandMenu.Header\n              placeholder=\"Search...\"\n              value=\"\"\n              onValueChange={onValueChange}\n            />\n            <CommandMenu.List>\n              <CommandMenu.Item value=\"item-1\" onSelect={() => {}}>\n                Item 1\n              </CommandMenu.Item>\n            </CommandMenu.List>\n          </CommandMenu.Content>\n        </CommandMenu>\n      );\n\n      const input = screen.getByPlaceholderText(\"Search...\");\n      await user.type(input, \"test\");\n\n      expect(onValueChange).toHaveBeenCalledWith(\"t\");\n      expect(onValueChange).toHaveBeenCalledWith(\"e\");\n      expect(onValueChange).toHaveBeenCalledWith(\"s\");\n      expect(onValueChange).toHaveBeenCalledWith(\"t\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/refresh-components/commandmenu/CommandMenu.tsx",
    "content": "\"use client\";\n\nimport React, {\n  createContext,\n  useContext,\n  useEffect,\n  useCallback,\n  useRef,\n  useMemo,\n} from \"react\";\nimport * as DialogPrimitive from \"@radix-ui/react-dialog\";\nimport * as VisuallyHidden from \"@radix-ui/react-visually-hidden\";\nimport useContainerCenter from \"@/hooks/useContainerCenter\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Tag from \"@/refresh-components/buttons/Tag\";\nimport { Button } from \"@opal/components\";\nimport ScrollIndicatorDiv from \"@/refresh-components/ScrollIndicatorDiv\";\nimport Divider from \"@/refresh-components/Divider\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { SvgSearch, SvgX } from \"@opal/icons\";\nimport type {\n  CommandMenuProps,\n  CommandMenuContentProps,\n  CommandMenuHeaderProps,\n  CommandMenuListProps,\n  CommandMenuFilterProps,\n  CommandMenuItemProps,\n  CommandMenuActionProps,\n  CommandMenuFooterProps,\n  CommandMenuFooterActionProps,\n  CommandMenuContextValue,\n} from \"./types\";\n\n// =============================================================================\n// Context\n// =============================================================================\n\nconst CommandMenuContext = createContext<CommandMenuContextValue | null>(null);\n\nfunction useCommandMenuContext() {\n  const context = useContext(CommandMenuContext);\n  if (!context) {\n    throw new Error(\n      \"CommandMenu compound components must be used within CommandMenu\"\n    );\n  }\n  return context;\n}\n\n// =============================================================================\n// CommandMenu Root\n// =============================================================================\n\n/**\n * Gets ordered items by querying DOM for data-command-item elements.\n * Safe to call in event handlers (after DOM is committed).\n */\nfunction getOrderedItems(): string[] {\n  const container = document.querySelector(\"[data-command-menu-list]\");\n  if (!container) return [];\n  const elements = container.querySelectorAll(\"[data-command-item]\");\n  return Array.from(elements)\n    .map((el) => el.getAttribute(\"data-command-item\"))\n    .filter((v): v is string => v !== null);\n}\n\n/**\n * CommandMenu Root Component\n *\n * Wrapper around Radix Dialog.Root for managing command menu state.\n * Centralizes all keyboard/selection logic - items only render and report mouse events.\n *\n * @example\n * ```tsx\n * <CommandMenu open={isOpen} onOpenChange={setIsOpen}>\n *   <CommandMenu.Content>\n *     <CommandMenu.Header placeholder=\"Search...\" />\n *     <CommandMenu.List>\n *       <CommandMenu.Item value=\"1\">Item 1</CommandMenu.Item>\n *     </CommandMenu.List>\n *     <CommandMenu.Footer />\n *   </CommandMenu.Content>\n * </CommandMenu>\n * ```\n */\nfunction CommandMenuRoot({ open, onOpenChange, children }: CommandMenuProps) {\n  const [highlightedValue, setHighlightedValue] = React.useState<string | null>(\n    null\n  );\n  const [isKeyboardNav, setIsKeyboardNav] = React.useState(false);\n  const [itemsRevision, setItemsRevision] = React.useState(0);\n\n  // Centralized callback registry - items register their onSelect callback, type, and defaultHighlight\n  const itemCallbacks = useRef<\n    Map<\n      string,\n      {\n        callback: () => void;\n        type: \"filter\" | \"item\" | \"action\";\n        defaultHighlight: boolean;\n      }\n    >\n  >(new Map());\n\n  // Track previous itemsRevision to detect when items actually change\n  const prevItemsRevisionRef = useRef(itemsRevision);\n\n  // Reset state when menu closes\n  useEffect(() => {\n    if (!open) {\n      setHighlightedValue(null);\n      setIsKeyboardNav(false);\n      itemCallbacks.current.clear();\n    }\n  }, [open]);\n\n  // Ensure valid highlight when menu is open and items change\n  useEffect(() => {\n    if (open) {\n      const frame = requestAnimationFrame(() => {\n        const items = getOrderedItems();\n        const currentEntry = highlightedValue\n          ? itemCallbacks.current.get(highlightedValue)\n          : null;\n\n        const itemsChanged = prevItemsRevisionRef.current !== itemsRevision;\n        prevItemsRevisionRef.current = itemsRevision;\n\n        // Re-evaluate if:\n        // 1. No highlight set\n        // 2. Current highlight is not in DOM\n        // 3. Items changed AND current highlight has defaultHighlight=false\n        const shouldReselect =\n          !highlightedValue ||\n          !items.includes(highlightedValue) ||\n          (itemsChanged && currentEntry?.defaultHighlight === false);\n\n        if (shouldReselect) {\n          // Find first item eligible for default highlight\n          const defaultItem = items.find((value) => {\n            const entry = itemCallbacks.current.get(value);\n            return entry?.defaultHighlight !== false;\n          });\n          // Use default item if found, otherwise fall back to first item\n          const targetItem = defaultItem || items[0];\n          setHighlightedValue(targetItem || null);\n        }\n      });\n      return () => cancelAnimationFrame(frame);\n    }\n  }, [open, highlightedValue, itemsRevision]);\n\n  // Registration functions (items call on mount)\n  const registerItem = useCallback(\n    (\n      value: string,\n      onSelect: () => void,\n      type: \"filter\" | \"item\" | \"action\" = \"item\",\n      defaultHighlight: boolean = true\n    ) => {\n      if (\n        process.env.NODE_ENV === \"development\" &&\n        itemCallbacks.current.has(value)\n      ) {\n        console.warn(\n          `[CommandMenu] Duplicate value \"${value}\" registered. ` +\n            `Values must be unique across all Filter, Item, and Action components.`\n        );\n      }\n      itemCallbacks.current.set(value, {\n        callback: onSelect,\n        type,\n        defaultHighlight,\n      });\n      setItemsRevision((r) => r + 1);\n    },\n    []\n  );\n\n  const unregisterItem = useCallback((value: string) => {\n    itemCallbacks.current.delete(value);\n    setItemsRevision((r) => r + 1);\n  }, []);\n\n  // Shared mouse handlers (items call on events)\n  const onItemMouseEnter = useCallback(\n    (value: string) => {\n      if (!isKeyboardNav) {\n        setHighlightedValue(value);\n      }\n    },\n    [isKeyboardNav]\n  );\n\n  const onItemMouseMove = useCallback(\n    (value: string) => {\n      if (isKeyboardNav) {\n        setIsKeyboardNav(false);\n      }\n      if (highlightedValue !== value) {\n        setHighlightedValue(value);\n      }\n    },\n    [isKeyboardNav, highlightedValue]\n  );\n\n  const onItemClick = useCallback(\n    (value: string) => {\n      const entry = itemCallbacks.current.get(value);\n      entry?.callback();\n      if (entry?.type !== \"filter\") {\n        onOpenChange(false);\n      }\n    },\n    [onOpenChange]\n  );\n\n  const onListMouseLeave = useCallback(() => {\n    if (!isKeyboardNav) {\n      setHighlightedValue(null);\n    }\n  }, [isKeyboardNav]);\n\n  // Compute the type of the currently highlighted item\n  const highlightedItemType = useMemo(() => {\n    if (!highlightedValue) return null;\n    return itemCallbacks.current.get(highlightedValue)?.type ?? null;\n  }, [highlightedValue]);\n\n  // Keyboard handler - centralized for all keys including Enter\n  const handleKeyDown = useCallback(\n    (e: React.KeyboardEvent) => {\n      switch (e.key) {\n        case \"ArrowDown\": {\n          e.preventDefault();\n          setIsKeyboardNav(true);\n          const items = getOrderedItems();\n          if (items.length === 0) return;\n\n          const currentIndex = highlightedValue\n            ? items.indexOf(highlightedValue)\n            : -1;\n          const nextIndex =\n            currentIndex < items.length - 1 ? currentIndex + 1 : 0;\n          const nextItem = items[nextIndex];\n          if (nextItem !== undefined) {\n            setHighlightedValue(nextItem);\n          }\n          break;\n        }\n        case \"ArrowUp\": {\n          e.preventDefault();\n          setIsKeyboardNav(true);\n          const items = getOrderedItems();\n          if (items.length === 0) return;\n\n          const currentIndex = highlightedValue\n            ? items.indexOf(highlightedValue)\n            : 0;\n          const prevIndex =\n            currentIndex > 0 ? currentIndex - 1 : items.length - 1;\n          const prevItem = items[prevIndex];\n          if (prevItem !== undefined) {\n            setHighlightedValue(prevItem);\n          }\n          break;\n        }\n        case \"Enter\": {\n          e.preventDefault();\n          e.stopPropagation();\n          if (highlightedValue) {\n            const entry = itemCallbacks.current.get(highlightedValue);\n            entry?.callback();\n            if (entry?.type !== \"filter\") {\n              onOpenChange(false);\n            }\n          }\n          break;\n        }\n        case \"Escape\": {\n          e.preventDefault();\n          onOpenChange(false);\n          break;\n        }\n      }\n    },\n    [highlightedValue, onOpenChange]\n  );\n\n  // Scroll highlighted item into view on keyboard nav\n  // Uses manual scroll calculation instead of scrollIntoView to only scroll\n  // the list container, not the modal or other ancestors\n  useEffect(() => {\n    if (isKeyboardNav && highlightedValue) {\n      const container = document.querySelector(\"[data-command-menu-list]\");\n      // Use safe attribute matching instead of direct selector interpolation\n      // to prevent CSS selector injection\n      const el = Array.from(\n        container?.querySelectorAll(\"[data-command-item]\") ?? []\n      ).find((e) => e.getAttribute(\"data-command-item\") === highlightedValue);\n\n      if (container && el instanceof HTMLElement) {\n        const containerRect = container.getBoundingClientRect();\n        const elRect = el.getBoundingClientRect();\n\n        const scrollMargin = 60;\n        if (elRect.top < containerRect.top + scrollMargin) {\n          container.scrollTop -= containerRect.top + scrollMargin - elRect.top;\n        } else if (elRect.bottom > containerRect.bottom) {\n          container.scrollTop += elRect.bottom - containerRect.bottom;\n        }\n      }\n    }\n  }, [highlightedValue, isKeyboardNav]);\n\n  const contextValue = useMemo<CommandMenuContextValue>(\n    () => ({\n      highlightedValue,\n      highlightedItemType,\n      isKeyboardNav,\n      registerItem,\n      unregisterItem,\n      onItemMouseEnter,\n      onItemMouseMove,\n      onItemClick,\n      onListMouseLeave,\n      handleKeyDown,\n    }),\n    [\n      highlightedValue,\n      highlightedItemType,\n      isKeyboardNav,\n      registerItem,\n      unregisterItem,\n      onItemMouseEnter,\n      onItemMouseMove,\n      onItemClick,\n      onListMouseLeave,\n      handleKeyDown,\n    ]\n  );\n\n  return (\n    <CommandMenuContext.Provider value={contextValue}>\n      <DialogPrimitive.Root open={open} onOpenChange={onOpenChange}>\n        {children}\n      </DialogPrimitive.Root>\n    </CommandMenuContext.Provider>\n  );\n}\n\n// =============================================================================\n// CommandMenu Content\n// =============================================================================\n\n/**\n * CommandMenu Content Component\n *\n * Modal container with overlay, sizing, and animations.\n * Keyboard handling is centralized in Root and accessed via context.\n */\nconst CommandMenuContent = React.forwardRef<\n  React.ComponentRef<typeof DialogPrimitive.Content>,\n  CommandMenuContentProps\n>(({ children }, ref) => {\n  const { handleKeyDown } = useCommandMenuContext();\n  const { centerX, hasContainerCenter } = useContainerCenter();\n\n  return (\n    <DialogPrimitive.Portal>\n      {/* Overlay - fixed to full viewport, hidden from assistive technology */}\n      <DialogPrimitive.Overlay\n        aria-hidden=\"true\"\n        className={cn(\n          \"fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none\",\n          \"data-[state=open]:animate-in data-[state=closed]:animate-out\",\n          \"data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0\"\n        )}\n      />\n      {/* Content - centered within the main container when available,\n          otherwise falls back to viewport centering */}\n      <DialogPrimitive.Content\n        ref={ref}\n        onKeyDown={handleKeyDown}\n        style={\n          hasContainerCenter\n            ? ({\n                left: centerX,\n                \"--tw-enter-translate-x\": \"-50%\",\n                \"--tw-exit-translate-x\": \"-50%\",\n              } as React.CSSProperties)\n            : undefined\n        }\n        className={cn(\n          \"fixed top-[72px]\",\n          hasContainerCenter ? \"-translate-x-1/2\" : \"inset-x-0 mx-auto\",\n          \"z-modal\",\n          \"bg-background-tint-00 border rounded-16 shadow-2xl outline-none\",\n          \"flex flex-col overflow-hidden\",\n          \"max-w-[calc(100dvw-2rem)] max-h-[calc(100dvh-144px)]\",\n          \"data-[state=open]:animate-in data-[state=closed]:animate-out\",\n          \"data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0\",\n          \"data-[state=open]:slide-in-from-bottom data-[state=open]:slide-in-from-left-0\",\n          \"data-[state=closed]:slide-out-to-bottom data-[state=closed]:slide-out-to-left-0\",\n          \"duration-200\",\n          \"w-[32rem]\",\n          \"min-h-[15rem]\"\n        )}\n      >\n        <VisuallyHidden.Root asChild>\n          <DialogPrimitive.Title>Command Menu</DialogPrimitive.Title>\n        </VisuallyHidden.Root>\n        {children}\n      </DialogPrimitive.Content>\n    </DialogPrimitive.Portal>\n  );\n});\nCommandMenuContent.displayName = \"CommandMenuContent\";\n\n// =============================================================================\n// CommandMenu Header\n// =============================================================================\n\n/**\n * CommandMenu Header Component\n *\n * Contains filter tags and search input.\n * Arrow keys preventDefault at input level (to stop cursor movement) then bubble to Content.\n */\nfunction CommandMenuHeader({\n  placeholder = \"Search...\",\n  filters = [],\n  value = \"\",\n  onValueChange,\n  onFilterRemove,\n  onClose,\n  onEmptyBackspace,\n}: CommandMenuHeaderProps) {\n  // Prevent default for arrow/enter keys so they don't move cursor or submit forms\n  // The actual handling happens in Root's centralized handler via event bubbling\n  const handleInputKeyDown = useCallback(\n    (e: React.KeyboardEvent<HTMLInputElement>) => {\n      if (e.key === \"ArrowDown\" || e.key === \"ArrowUp\" || e.key === \"Enter\") {\n        e.preventDefault();\n      }\n      // Handle backspace on empty input for navigation\n      if (e.key === \"Backspace\" && !value) {\n        onEmptyBackspace?.();\n      }\n    },\n    [value, onEmptyBackspace]\n  );\n\n  return (\n    <div className=\"flex-shrink-0\">\n      {/* Top row: Search icon, filters, close button */}\n      <div className=\"px-3 pt-3 flex flex-row justify-between items-center\">\n        <Section\n          flexDirection=\"row\"\n          justifyContent=\"start\"\n          gap={0.5}\n          width=\"fit\"\n        >\n          {/* Standalone search icon */}\n          <SvgSearch className=\"w-6 h-6 stroke-text-04\" />\n          {filters.map((filter) => (\n            <Tag\n              variant=\"editable\"\n              key={filter.id}\n              label={filter.label}\n              icon={filter.icon}\n              onRemove={\n                onFilterRemove ? () => onFilterRemove(filter.id) : undefined\n              }\n            />\n          ))}\n        </Section>\n        {onClose && (\n          <DialogPrimitive.Close asChild>\n            <Button\n              icon={SvgX}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={onClose}\n              aria-label=\"Close menu\"\n            />\n          </DialogPrimitive.Close>\n        )}\n      </div>\n      {/* Search input - arrow/enter keys bubble up to Content for centralized handling */}\n      <div className=\"px-2 pb-2 pt-0.5\">\n        <InputTypeIn\n          placeholder={placeholder}\n          value={value}\n          onChange={(e) => onValueChange?.(e.target.value)}\n          onKeyDown={handleInputKeyDown}\n          autoFocus\n          className=\"w-full !bg-transparent !border-transparent [&:is(:hover,:active,:focus,:focus-within)]:!bg-background-neutral-00 [&:is(:hover)]:!border-border-01 [&:is(:focus,:focus-within)]:!shadow-none\"\n          showClearButton={false}\n        />\n      </div>\n    </div>\n  );\n}\n\n// =============================================================================\n// CommandMenu List\n// =============================================================================\n\n/**\n * CommandMenu List Component\n *\n * Scrollable container for menu items with scroll shadow indicators.\n * Uses ScrollIndicatorDiv for automatic scroll shadows.\n */\nfunction CommandMenuList({ children, emptyMessage }: CommandMenuListProps) {\n  const { isKeyboardNav, onListMouseLeave } = useCommandMenuContext();\n  const childCount = React.Children.count(children);\n\n  if (childCount === 0 && emptyMessage) {\n    return (\n      <div\n        className=\"bg-background-tint-01 p-4\"\n        role=\"status\"\n        aria-live=\"polite\"\n      >\n        <Text secondaryBody text03>\n          {emptyMessage}\n        </Text>\n      </div>\n    );\n  }\n\n  return (\n    <ScrollIndicatorDiv\n      role=\"listbox\"\n      aria-label=\"Command menu options\"\n      className=\"p-1 gap-1 max-h-[60vh] bg-background-tint-01\"\n      backgroundColor=\"var(--background-tint-01)\"\n      data-command-menu-list\n      data-keyboard-nav={isKeyboardNav ? \"true\" : undefined}\n      variant=\"shadow\"\n      onMouseLeave={onListMouseLeave}\n    >\n      {children}\n    </ScrollIndicatorDiv>\n  );\n}\n\n// =============================================================================\n// CommandMenu Filter\n// =============================================================================\n\n/**\n * CommandMenu Filter Component\n *\n * When `isApplied` is true, renders as a non-interactive group label.\n * Otherwise, renders as a selectable filter with a chevron indicator.\n * Dumb component - registers callback on mount, renders based on context state.\n */\nfunction CommandMenuFilter({\n  value,\n  children,\n  icon,\n  isApplied,\n  onSelect,\n}: CommandMenuFilterProps) {\n  const {\n    highlightedValue,\n    registerItem,\n    unregisterItem,\n    onItemMouseEnter,\n    onItemMouseMove,\n    onItemClick,\n  } = useCommandMenuContext();\n\n  // Register callback on mount - NO keyboard listener needed\n  useEffect(() => {\n    if (!isApplied && onSelect) {\n      registerItem(value, () => onSelect(), \"filter\");\n      return () => unregisterItem(value);\n    }\n  }, [value, isApplied, onSelect, registerItem, unregisterItem]);\n\n  // When filter is applied, show as group label (non-interactive)\n  if (isApplied) {\n    return (\n      <Divider\n        showTitle\n        text={children as string}\n        icon={icon}\n        dividerLine={false}\n      />\n    );\n  }\n\n  const isHighlighted = value === highlightedValue;\n\n  // Selectable filter - uses LineItem, delegates all events to context\n  return (\n    <div data-command-item={value} role=\"option\" aria-selected={isHighlighted}>\n      <Divider\n        showTitle\n        text={children as string}\n        icon={icon}\n        foldable\n        isHighlighted={isHighlighted}\n        onClick={() => onItemClick(value)}\n        onMouseEnter={() => onItemMouseEnter(value)}\n        onMouseMove={() => onItemMouseMove(value)}\n        dividerLine={false}\n      />\n    </div>\n  );\n}\n\n// =============================================================================\n// CommandMenu Item\n// =============================================================================\n\n/**\n * CommandMenu Item Component\n *\n * Dumb component - registers callback on mount, renders based on context state.\n * Use rightContent for timestamps, badges, etc.\n */\nfunction CommandMenuItem({\n  value,\n  icon,\n  rightContent,\n  onSelect,\n  children,\n}: CommandMenuItemProps) {\n  const {\n    highlightedValue,\n    registerItem,\n    unregisterItem,\n    onItemMouseEnter,\n    onItemMouseMove,\n    onItemClick,\n  } = useCommandMenuContext();\n\n  // Register callback on mount - NO keyboard listener needed\n  useEffect(() => {\n    registerItem(value, () => onSelect?.(value));\n    return () => unregisterItem(value);\n  }, [value, onSelect, registerItem, unregisterItem]);\n\n  const isHighlighted = value === highlightedValue;\n\n  // Resolve rightContent - supports both static ReactNode and render function\n  const resolvedRightContent =\n    typeof rightContent === \"function\"\n      ? rightContent({ isHighlighted })\n      : rightContent;\n\n  return (\n    <div data-command-item={value} role=\"option\" aria-selected={isHighlighted}>\n      <LineItem\n        muted\n        icon={icon}\n        rightChildren={resolvedRightContent}\n        emphasized={isHighlighted}\n        selected={isHighlighted}\n        onClick={() => onItemClick(value)}\n        onMouseEnter={() => onItemMouseEnter(value)}\n        onMouseMove={() => onItemMouseMove(value)}\n      >\n        {children}\n      </LineItem>\n    </div>\n  );\n}\n\n// =============================================================================\n// CommandMenu Action\n// =============================================================================\n\n/**\n * CommandMenu Action Component\n *\n * Dumb component - registers callback on mount, renders based on context state.\n * Uses LineItem with action variant for visual distinction.\n */\nfunction CommandMenuAction({\n  value,\n  icon,\n  shortcut,\n  onSelect,\n  children,\n  defaultHighlight = true,\n}: CommandMenuActionProps) {\n  const {\n    highlightedValue,\n    registerItem,\n    unregisterItem,\n    onItemMouseEnter,\n    onItemMouseMove,\n    onItemClick,\n  } = useCommandMenuContext();\n\n  // Register callback on mount - NO keyboard listener needed\n  useEffect(() => {\n    registerItem(value, () => onSelect?.(value), \"action\", defaultHighlight);\n    return () => unregisterItem(value);\n  }, [value, onSelect, defaultHighlight, registerItem, unregisterItem]);\n\n  const isHighlighted = value === highlightedValue;\n\n  return (\n    <div data-command-item={value} role=\"option\" aria-selected={isHighlighted}>\n      <LineItem\n        action\n        icon={icon}\n        rightChildren={\n          shortcut ? (\n            <Text figureKeystroke text02>\n              {shortcut}\n            </Text>\n          ) : undefined\n        }\n        emphasized={isHighlighted}\n        selected={isHighlighted}\n        onClick={() => onItemClick(value)}\n        onMouseEnter={() => onItemMouseEnter(value)}\n        onMouseMove={() => onItemMouseMove(value)}\n      >\n        {children}\n      </LineItem>\n    </div>\n  );\n}\n\n// =============================================================================\n// CommandMenu Footer\n// =============================================================================\n\n/**\n * CommandMenu Footer Component\n *\n * Footer section with keyboard hint actions.\n */\nfunction CommandMenuFooter({ leftActions }: CommandMenuFooterProps) {\n  return (\n    <div className=\"flex-shrink-0\">\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"start\"\n        gap={1}\n        padding={0.75}\n      >\n        {leftActions}\n      </Section>\n    </div>\n  );\n}\n\n// =============================================================================\n// CommandMenu Footer Action\n// =============================================================================\n\n/**\n * CommandMenu Footer Action Component\n *\n * Display-only visual hint showing a keyboard shortcut.\n */\nfunction CommandMenuFooterAction({\n  icon: Icon,\n  label,\n}: CommandMenuFooterActionProps) {\n  return (\n    <div className=\"flex items-center gap-1\" aria-label={label}>\n      <Icon\n        className=\"w-[0.875rem] h-[0.875rem] stroke-text-02\"\n        aria-hidden=\"true\"\n      />\n      <Text mainUiBody text03>\n        {label}\n      </Text>\n    </div>\n  );\n}\n\n// =============================================================================\n// Export Compound Component\n// =============================================================================\n\nexport { useCommandMenuContext };\n\nexport default Object.assign(CommandMenuRoot, {\n  Content: CommandMenuContent,\n  Header: CommandMenuHeader,\n  List: CommandMenuList,\n  Filter: CommandMenuFilter,\n  Item: CommandMenuItem,\n  Action: CommandMenuAction,\n  Footer: CommandMenuFooter,\n  FooterAction: CommandMenuFooterAction,\n});\n"
  },
  {
    "path": "web/src/refresh-components/commandmenu/types.ts",
    "content": "import type { IconProps } from \"@opal/types\";\n\n// =============================================================================\n// Filter Object (for header display)\n// =============================================================================\n\n/**\n * Filter object for CommandMenu header\n */\nexport interface CommandMenuFilter {\n  id: string;\n  label: string;\n  icon?: React.FunctionComponent<IconProps>;\n}\n\n/**\n * Props for CommandMenu root component\n */\nexport interface CommandMenuProps {\n  open: boolean;\n  onOpenChange: (open: boolean) => void;\n  children: React.ReactNode;\n}\n\n/**\n * Props for CommandMenu content (modal container)\n */\nexport interface CommandMenuContentProps {\n  children: React.ReactNode;\n}\n\n/**\n * Props for CommandMenu header with search and filters\n */\nexport interface CommandMenuHeaderProps {\n  placeholder?: string;\n  filters?: CommandMenuFilter[];\n  value?: string;\n  onValueChange?: (value: string) => void;\n  onFilterRemove?: (filterId: string) => void;\n  onClose?: () => void;\n  onEmptyBackspace?: () => void;\n}\n\n/**\n * Props for CommandMenu list container\n */\nexport interface CommandMenuListProps {\n  children: React.ReactNode;\n  emptyMessage?: string;\n}\n\n/**\n * Props for CommandMenu filter (selectable or as applied group label)\n */\nexport interface CommandMenuFilterProps {\n  /**\n   * Unique identifier for this item within the CommandMenu.\n   * Must be unique across all Filter, Item, and Action components.\n   * Used for keyboard navigation, selection callbacks, and highlight state.\n   */\n  value: string;\n  children: string;\n  icon?: React.FunctionComponent<IconProps>;\n  isApplied?: boolean; // When true, renders as non-interactive group label\n  onSelect?: () => void;\n}\n\n/**\n * Props for CommandMenu item\n */\nexport interface CommandMenuItemProps {\n  /**\n   * Unique identifier for this item within the CommandMenu.\n   * Must be unique across all Filter, Item, and Action components.\n   * Used for keyboard navigation, selection callbacks, and highlight state.\n   */\n  value: string;\n  icon?: React.FunctionComponent<IconProps>;\n  rightContent?:\n    | React.ReactNode\n    | ((params: { isHighlighted: boolean }) => React.ReactNode); // For timestamps, badges, etc.\n  onSelect?: (value: string) => void;\n  children: React.ReactNode;\n}\n\n/**\n * Props for CommandMenu action (quick actions with keyboard shortcuts)\n */\nexport interface CommandMenuActionProps {\n  /**\n   * Unique identifier for this item within the CommandMenu.\n   * Must be unique across all Filter, Item, and Action components.\n   * Used for keyboard navigation, selection callbacks, and highlight state.\n   */\n  value: string;\n  icon?: React.FunctionComponent<IconProps>;\n  shortcut?: string; // Keyboard shortcut like \"⌘N\", \"⌘P\"\n  onSelect?: (value: string) => void;\n  children: React.ReactNode;\n  /**\n   * Whether this action should be considered for initial highlight.\n   * Default: true. Set false to skip this item when determining initial highlight.\n   * Arrow key navigation still includes all items regardless of this setting.\n   */\n  defaultHighlight?: boolean;\n}\n\n/**\n * Props for CommandMenu footer\n */\nexport interface CommandMenuFooterProps {\n  leftActions?: React.ReactNode;\n}\n\n/**\n * Props for CommandMenu footer action hint\n */\nexport interface CommandMenuFooterActionProps {\n  icon: React.FunctionComponent<IconProps>;\n  label: string;\n}\n\n/**\n * Context value for CommandMenu keyboard navigation\n * Uses centralized control with callback registry - items are \"dumb\" renderers\n */\nexport interface CommandMenuContextValue {\n  // State\n  highlightedValue: string | null;\n  highlightedItemType: \"filter\" | \"item\" | \"action\" | null;\n  isKeyboardNav: boolean;\n\n  // Registration (items call on mount with their callback)\n  registerItem: (\n    value: string,\n    onSelect: () => void,\n    type?: \"filter\" | \"item\" | \"action\",\n    defaultHighlight?: boolean\n  ) => void;\n  unregisterItem: (value: string) => void;\n\n  // Mouse interaction (items call on events - centralized in root)\n  onItemMouseEnter: (value: string) => void;\n  onItemMouseMove: (value: string) => void;\n  onItemClick: (value: string) => void;\n  onListMouseLeave: () => void;\n\n  // Keyboard handler (Content attaches this to DialogPrimitive.Content)\n  handleKeyDown: (e: React.KeyboardEvent) => void;\n}\n"
  },
  {
    "path": "web/src/refresh-components/contexts/ModalContext.tsx",
    "content": "\"use client\";\n\nimport React, { createContext, useContext, useState, useCallback } from \"react\";\n\nconst ModalContext = createContext<ModalInterface | null>(null);\n\nexport interface ProviderProps {\n  children?: React.ReactNode;\n}\n\nexport interface ModalCreationInterface {\n  isOpen: boolean;\n  toggle: (state: boolean) => void;\n  Provider: React.FunctionComponent<ProviderProps>;\n}\n\nexport function useCreateModal(): ModalCreationInterface {\n  const [isOpen, setIsOpen] = useState(false);\n\n  const toggle = useCallback(\n    (state: boolean) => {\n      setIsOpen(state);\n    },\n    [setIsOpen]\n  );\n\n  const Provider: React.FunctionComponent<ProviderProps> = useCallback(\n    ({ children }: ProviderProps) => {\n      if (!isOpen) return null;\n\n      return (\n        <ModalContext.Provider value={{ isOpen, toggle }}>\n          {children}\n        </ModalContext.Provider>\n      );\n    },\n    [isOpen, toggle]\n  );\n\n  return { isOpen, toggle, Provider };\n}\n\nexport interface ModalInterface {\n  isOpen: boolean;\n  toggle: (state: boolean) => void;\n}\n\nexport function useModal(): ModalInterface {\n  const context = useContext(ModalContext);\n\n  if (!context) {\n    throw new Error(\n      \"useModal must be used within the `Modal` field returned by `useCreateModal`\"\n    );\n  }\n\n  return context;\n}\n\nexport function useModalClose(onClose?: () => void): (() => void) | undefined {\n  const context = useContext(ModalContext);\n\n  return context\n    ? () => {\n        context.toggle(false);\n        onClose?.();\n      }\n    : onClose;\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/CheckboxField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport Checkbox, { CheckboxProps } from \"@/refresh-components/inputs/Checkbox\";\nimport { useOnChangeValue } from \"@/hooks/formHooks\";\n\ninterface CheckboxFieldProps extends Omit<CheckboxProps, \"checked\"> {\n  name: string;\n}\n\nexport default function UnlabeledCheckboxField({\n  name,\n  onCheckedChange,\n  ...props\n}: CheckboxFieldProps) {\n  const [field] = useField<boolean>({ name, type: \"checkbox\" });\n  const onChange = useOnChangeValue(name, onCheckedChange);\n\n  return (\n    <Checkbox checked={field.value} onCheckedChange={onChange} {...props} />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/FieldContext.tsx",
    "content": "\"use client\";\n\nimport { createContext, useContext } from \"react\";\nimport { FieldContextType } from \"./types\";\n\nexport const FieldContext = createContext<FieldContextType | undefined>(\n  undefined\n);\n\nexport const useFieldContext = () => {\n  const context = useContext(FieldContext);\n  if (context === undefined) {\n    throw new Error(\n      \"useFieldContext must be used within a FieldContextProvider\"\n    );\n  }\n  return context;\n};\n"
  },
  {
    "path": "web/src/refresh-components/form/FormField.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { FormField } from \"./FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\n\nconst meta: Meta<typeof FormField> = {\n  title: \"refresh-components/form/FormField\",\n  component: FormField,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FormField>;\n\nexport const Default: Story = {\n  render: () => (\n    <FormField state=\"idle\" name=\"email\">\n      <FormField.Label>Email Address</FormField.Label>\n      <FormField.Description>\n        We will never share your email with anyone.\n      </FormField.Description>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"you@example.com\" />\n      </FormField.Control>\n    </FormField>\n  ),\n};\n\nexport const Required: Story = {\n  render: () => (\n    <FormField state=\"idle\" name=\"name\" required>\n      <FormField.Label required>Full Name</FormField.Label>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"Jane Doe\" />\n      </FormField.Control>\n    </FormField>\n  ),\n};\n\nexport const Optional: Story = {\n  render: () => (\n    <FormField state=\"idle\" name=\"nickname\">\n      <FormField.Label optional>Nickname</FormField.Label>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"Optional nickname\" />\n      </FormField.Control>\n    </FormField>\n  ),\n};\n\nexport const ErrorState: Story = {\n  render: () => (\n    <FormField state=\"error\" name=\"username\">\n      <FormField.Label>Username</FormField.Label>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"Choose a username\" variant=\"error\" />\n      </FormField.Control>\n      <FormField.Message\n        messages={{ error: \"This username is already taken.\" }}\n      />\n    </FormField>\n  ),\n};\n\nexport const SuccessState: Story = {\n  render: () => (\n    <FormField state=\"success\" name=\"username\">\n      <FormField.Label>Username</FormField.Label>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"Choose a username\" />\n      </FormField.Control>\n      <FormField.Message messages={{ success: \"Username is available!\" }} />\n    </FormField>\n  ),\n};\n\nexport const WithAPIMessage: Story = {\n  render: () => (\n    <FormField state=\"idle\" name=\"domain\">\n      <FormField.Label>Custom Domain</FormField.Label>\n      <FormField.Control>\n        <InputTypeIn placeholder=\"your-domain.com\" />\n      </FormField.Control>\n      <FormField.APIMessage\n        state=\"loading\"\n        messages={{ loading: \"Verifying DNS records...\" }}\n      />\n    </FormField>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/form/FormField.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport { FieldContext } from \"./FieldContext\";\nimport {\n  ControlProps,\n  DescriptionProps,\n  FieldContextType,\n  FormFieldRootProps,\n  LabelProps,\n  MessageProps,\n  APIMessageProps,\n} from \"./types\";\nimport React, { useId, useMemo } from \"react\";\nimport { useFieldContext } from \"./FieldContext\";\nimport { Slot } from \"@radix-ui/react-slot\";\nimport Text from \"../texts/Text\";\nimport { FieldMessage } from \"../messages/FieldMessage\";\n\nexport const FormFieldRoot: React.FC<FormFieldRootProps> = ({\n  id,\n  name,\n  state = \"idle\",\n  required,\n  className,\n  children,\n  ...props\n}) => {\n  const reactId = useId();\n  const baseId = id ?? `field_${reactId}`;\n\n  const describedByIds = useMemo(() => {\n    return [`${baseId}-desc`, `${baseId}-msg`, `${baseId}-api-msg`];\n  }, [baseId]);\n\n  const contextValue: FieldContextType = {\n    baseId,\n    name,\n    required,\n    state,\n    describedByIds,\n  };\n\n  return (\n    <FieldContext.Provider value={contextValue}>\n      <div\n        id={baseId}\n        className={cn(\"flex flex-col gap-y-1\", className)}\n        {...props}\n      >\n        {children}\n      </div>\n    </FieldContext.Provider>\n  );\n};\n\nexport const FormFieldLabel: React.FC<LabelProps> = ({\n  leftIcon,\n  rightIcon,\n  optional,\n  required,\n  rightAction,\n  className,\n  children,\n  ...props\n}) => {\n  const { baseId } = useFieldContext();\n  return (\n    <label\n      id={`${baseId}-label`}\n      htmlFor={`${baseId}-control`}\n      className={cn(\n        \"ml-0.5 text-text-04 font-main-ui-action flex flex-row items-center gap-1\",\n        className\n      )}\n      {...props}\n    >\n      {leftIcon && <span className=\"flex items-center\">{leftIcon}</span>}\n      {children}\n      {required ? (\n        <Text as=\"p\" text03 mainUiMuted className=\"mx-0.5\">\n          {\"(Required)\"}\n        </Text>\n      ) : optional ? (\n        <Text as=\"p\" text03 mainUiMuted className=\"mx-0.5\">\n          {\"(Optional)\"}\n        </Text>\n      ) : null}\n      {rightIcon && <span className=\"flex items-center\">{rightIcon}</span>}\n      {rightAction && (\n        <span className=\"ml-auto flex items-center\">{rightAction}</span>\n      )}\n    </label>\n  );\n};\n\nexport const FormFieldControl: React.FC<ControlProps> = ({\n  asChild,\n  children,\n}) => {\n  const { baseId, state, describedByIds, required } = useFieldContext();\n\n  const ariaAttributes = {\n    id: `${baseId}-control`,\n    \"aria-invalid\": state === \"error\",\n    \"aria-describedby\": describedByIds?.join(\" \"),\n    \"aria-required\": required,\n  };\n\n  if (asChild) {\n    return <Slot {...ariaAttributes}>{children}</Slot>;\n  }\n\n  if (React.isValidElement(children)) {\n    return React.cloneElement(children, {\n      ...ariaAttributes,\n      ...(children.props as any),\n    });\n  }\n\n  return <>{children}</>;\n};\n\nexport const FormFieldDescription: React.FC<DescriptionProps> = ({\n  className,\n  children,\n  ...props\n}) => {\n  const { baseId } = useFieldContext();\n  const content = children;\n  if (!content) return null;\n  return (\n    <Text\n      as=\"p\"\n      id={`${baseId}-desc`}\n      text03\n      secondaryBody\n      className={cn(\"ml-0.5\", className)}\n      {...props}\n    >\n      {content}\n    </Text>\n  );\n};\n\nexport const FormFieldMessage: React.FC<MessageProps> = ({\n  className,\n  messages,\n  render,\n}) => {\n  const { baseId, state } = useFieldContext();\n  let tempState = state;\n  let content = messages?.[tempState];\n  // If the state is success and there is no content, set the state to idle and use the idle message\n  if (tempState === \"success\" && !content) {\n    tempState = \"idle\";\n    content = messages?.idle;\n  }\n  return content ? (\n    <FieldMessage variant={tempState} className={className}>\n      <FieldMessage.Content id={`${baseId}-msg`}>\n        {content}\n      </FieldMessage.Content>\n    </FieldMessage>\n  ) : null;\n};\n\nexport const FormAPIFieldMessage: React.FC<APIMessageProps> = ({\n  className,\n  messages,\n  state = \"loading\",\n}) => {\n  const { baseId } = useFieldContext();\n  const content = messages?.[state];\n  return content ? (\n    <FieldMessage variant={state} className={className}>\n      <FieldMessage.Content id={`${baseId}-api-msg`}>\n        {content}\n      </FieldMessage.Content>\n    </FieldMessage>\n  ) : null;\n};\n\nexport const FormField = Object.assign(FormFieldRoot, {\n  Label: FormFieldLabel,\n  Control: FormFieldControl,\n  Description: FormFieldDescription,\n  Message: FormFieldMessage,\n  APIMessage: FormAPIFieldMessage,\n});\n"
  },
  {
    "path": "web/src/refresh-components/form/FormikField.tsx",
    "content": "\"use client\";\n\nimport {\n  useField,\n  FieldInputProps,\n  FieldHelperProps,\n  FieldMetaProps,\n} from \"formik\";\nimport { FormFieldState } from \"./types\";\nimport React, { useMemo, memo } from \"react\";\n\nexport type FormikFieldProps<T = any> = {\n  name: string;\n  render: (\n    field: FieldInputProps<T>,\n    helper: FieldHelperProps<T>,\n    meta: FieldMetaProps<T>,\n    status: FormFieldState\n  ) => React.ReactElement;\n};\n\nfunction FormikFieldComponent<T>({ name, render }: FormikFieldProps<T>) {\n  const [field, meta, helper] = useField<T>(name);\n\n  const state = useMemo(\n    (): FormFieldState =>\n      meta.touched ? (meta.error ? \"error\" : \"success\") : \"idle\",\n    [meta.touched, meta.error]\n  );\n\n  return render(field, helper, meta, state);\n}\n\nexport const FormikField = memo(\n  FormikFieldComponent\n) as typeof FormikFieldComponent;\n"
  },
  {
    "path": "web/src/refresh-components/form/FormikFields.stories.tsx",
    "content": "/**\n * Stories for Formik-connected form field components.\n *\n * All these components call `useField` from Formik internally, so every story\n * wraps the component in a minimal `<Formik>` provider. The forms are\n * non-submitting; they exist purely to demonstrate the field UI.\n *\n * Components covered:\n * - CheckboxField (unlabeled, from CheckboxField.tsx)\n * - LabeledCheckboxField (from LabeledCheckboxField.tsx)\n * - SwitchField\n * - InputTypeInField\n * - InputTextAreaField\n * - InputSelectField\n * - InputDatePickerField\n * - PasswordInputTypeInField\n */\n\nimport type { Meta, StoryObj } from \"@storybook/react\";\nimport { Formik, Form } from \"formik\";\nimport React from \"react\";\n\nimport UnlabeledCheckboxField from \"./CheckboxField\";\nimport { CheckboxField as LabeledCheckboxField } from \"./LabeledCheckboxField\";\nimport SwitchField from \"./SwitchField\";\nimport InputTypeInField from \"./InputTypeInField\";\nimport InputTextAreaField from \"./InputTextAreaField\";\nimport InputSelectField from \"./InputSelectField\";\nimport InputDatePickerField from \"./InputDatePickerField\";\nimport PasswordInputTypeInField from \"./PasswordInputTypeInField\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\n/** Minimal Formik wrapper that never submits. */\nfunction FormikWrapper({\n  initialValues,\n  children,\n}: {\n  initialValues: Record<string, unknown>;\n  children: React.ReactNode;\n}) {\n  return (\n    <Formik initialValues={initialValues} onSubmit={() => {}}>\n      <Form\n        style={{\n          display: \"flex\",\n          flexDirection: \"column\",\n          gap: 16,\n          maxWidth: 400,\n        }}\n      >\n        {children}\n      </Form>\n    </Formik>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Meta (we use a dummy component since this file covers multiple components)\n// ---------------------------------------------------------------------------\n\nconst meta: Meta = {\n  title: \"refresh-components/form/FormikFields\",\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj;\n\n// ---------------------------------------------------------------------------\n// CheckboxField (unlabeled)\n// ---------------------------------------------------------------------------\n\nexport const Checkbox: Story = {\n  name: \"CheckboxField (unlabeled)\",\n  render: () => (\n    <FormikWrapper initialValues={{ agree: false }}>\n      <UnlabeledCheckboxField name=\"agree\" />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// LabeledCheckboxField\n// ---------------------------------------------------------------------------\n\nexport const LabeledCheckbox: Story = {\n  name: \"LabeledCheckboxField\",\n  render: () => (\n    <FormikWrapper initialValues={{ terms: false }}>\n      <LabeledCheckboxField\n        name=\"terms\"\n        label=\"I agree to the terms and conditions\"\n        sublabel=\"You must accept before continuing.\"\n      />\n    </FormikWrapper>\n  ),\n};\n\nexport const LabeledCheckboxWithTooltip: Story = {\n  name: \"LabeledCheckboxField with tooltip\",\n  render: () => (\n    <FormikWrapper initialValues={{ newsletter: true }}>\n      <LabeledCheckboxField\n        name=\"newsletter\"\n        label=\"Subscribe to newsletter\"\n        tooltip=\"We send at most one email per week.\"\n      />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// SwitchField\n// ---------------------------------------------------------------------------\n\nexport const Switch: Story = {\n  name: \"SwitchField\",\n  render: () => (\n    <FormikWrapper initialValues={{ notifications: true }}>\n      <label htmlFor=\"notifications\" style={{ fontWeight: 500 }}>\n        Enable notifications\n      </label>\n      <SwitchField name=\"notifications\" />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// InputTypeInField\n// ---------------------------------------------------------------------------\n\nexport const TextInput: Story = {\n  name: \"InputTypeInField\",\n  render: () => (\n    <FormikWrapper initialValues={{ username: \"\" }}>\n      <InputTypeInField name=\"username\" placeholder=\"Enter your username\" />\n    </FormikWrapper>\n  ),\n};\n\nexport const TextInputDisabled: Story = {\n  name: \"InputTypeInField (disabled)\",\n  render: () => (\n    <FormikWrapper initialValues={{ locked: \"read-only value\" }}>\n      <InputTypeInField\n        name=\"locked\"\n        placeholder=\"Disabled\"\n        variant=\"disabled\"\n      />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// InputTextAreaField\n// ---------------------------------------------------------------------------\n\nexport const TextArea: Story = {\n  name: \"InputTextAreaField\",\n  render: () => (\n    <FormikWrapper initialValues={{ bio: \"\" }}>\n      <InputTextAreaField name=\"bio\" placeholder=\"Tell us about yourself...\" />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// InputSelectField\n// ---------------------------------------------------------------------------\n\nexport const Select: Story = {\n  name: \"InputSelectField\",\n  render: () => (\n    <FormikWrapper initialValues={{ role: \"\" }}>\n      <InputSelectField name=\"role\">\n        <InputSelect.Trigger placeholder=\"Select a role\" />\n        <InputSelect.Content>\n          <InputSelect.Item value=\"admin\">Admin</InputSelect.Item>\n          <InputSelect.Item value=\"editor\">Editor</InputSelect.Item>\n          <InputSelect.Item value=\"viewer\">Viewer</InputSelect.Item>\n        </InputSelect.Content>\n      </InputSelectField>\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// InputDatePickerField\n// ---------------------------------------------------------------------------\n\nexport const DatePicker: Story = {\n  name: \"InputDatePickerField\",\n  render: () => (\n    <FormikWrapper initialValues={{ startDate: null }}>\n      <InputDatePickerField name=\"startDate\" />\n    </FormikWrapper>\n  ),\n};\n\n// ---------------------------------------------------------------------------\n// PasswordInputTypeInField\n// ---------------------------------------------------------------------------\n\nexport const PasswordInput: Story = {\n  name: \"PasswordInputTypeInField\",\n  render: () => (\n    <FormikWrapper initialValues={{ apiKey: \"\" }}>\n      <PasswordInputTypeInField name=\"apiKey\" placeholder=\"sk-...\" />\n    </FormikWrapper>\n  ),\n};\n\nexport const PasswordInputNoLabel: Story = {\n  name: \"PasswordInputTypeInField (no label)\",\n  render: () => (\n    <FormikWrapper initialValues={{ secret: \"\" }}>\n      <PasswordInputTypeInField name=\"secret\" placeholder=\"Enter secret\" />\n    </FormikWrapper>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/form/InputDatePickerField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport InputDatePicker, {\n  InputDatePickerProps,\n} from \"@/refresh-components/inputs/InputDatePicker\";\nimport { useOnChangeValue } from \"@/hooks/formHooks\";\n\ninterface InputDatePickerFieldProps\n  extends Omit<InputDatePickerProps, \"selectedDate\" | \"setSelectedDate\"> {\n  name: string;\n  setSelectedDate?: (date: Date | null) => void;\n}\n\nexport default function InputDatePickerField({\n  name,\n  setSelectedDate,\n  ...props\n}: InputDatePickerFieldProps) {\n  const [field] = useField<Date | null>(name);\n  const onChange = useOnChangeValue(name, setSelectedDate);\n\n  return (\n    <InputDatePicker\n      name={name}\n      selectedDate={field.value}\n      setSelectedDate={onChange}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/InputSelectField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport InputSelect, {\n  InputSelectRootProps,\n} from \"@/refresh-components/inputs/InputSelect\";\nimport { useOnChangeValue } from \"@/hooks/formHooks\";\n\nexport interface InputSelectFieldProps\n  extends Omit<InputSelectRootProps, \"value\"> {\n  name: string;\n}\n\nexport default function InputSelectField({\n  name,\n  children,\n  onValueChange,\n  ...selectProps\n}: InputSelectFieldProps) {\n  const [field, meta] = useField(name);\n  const onChange = useOnChangeValue(name, onValueChange);\n  const hasError = meta.touched && meta.error;\n\n  return (\n    <InputSelect\n      name={name}\n      value={field.value}\n      onValueChange={onChange}\n      error={!!hasError}\n      {...selectProps}\n    >\n      {children}\n    </InputSelect>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/InputTextAreaField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport InputTextArea, {\n  InputTextAreaProps,\n} from \"@/refresh-components/inputs/InputTextArea\";\nimport { useOnChangeEvent, useOnBlurEvent } from \"@/hooks/formHooks\";\n\nexport interface InputTextAreaFieldProps\n  extends Omit<InputTextAreaProps, \"value\"> {\n  name: string;\n}\n\nexport default function InputTextAreaField({\n  name,\n  onChange: onChangeProp,\n  onBlur: onBlurProp,\n  ...textareaProps\n}: InputTextAreaFieldProps) {\n  const [field, meta] = useField(name);\n  const onChange = useOnChangeEvent(name, onChangeProp);\n  const onBlur = useOnBlurEvent(name, onBlurProp);\n  const hasError = meta.touched && meta.error;\n  const isNonEditable =\n    textareaProps.variant === \"disabled\" ||\n    textareaProps.variant === \"readOnly\";\n\n  return (\n    <InputTextArea\n      {...textareaProps}\n      id={name}\n      name={name}\n      value={field.value ?? \"\"}\n      onChange={onChange}\n      onBlur={onBlur}\n      variant={\n        isNonEditable\n          ? textareaProps.variant\n          : hasError\n            ? \"error\"\n            : textareaProps.variant\n      }\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/InputTypeInElementField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport InputTypeIn, {\n  InputTypeInProps,\n} from \"@/refresh-components/inputs/InputTypeIn\";\nimport { Button } from \"@opal/components\";\nimport { SvgMinusCircle } from \"@opal/icons\";\nimport { useOnChangeEvent, useOnBlurEvent } from \"@/hooks/formHooks\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nexport interface InputTypeInElementFieldProps\n  extends Omit<InputTypeInProps, \"value\" | \"onClear\"> {\n  name: string;\n  onRemove?: () => void;\n}\n\n// This component should be used inside of a list in `formik`'s \"Form\" context.\nexport default function InputTypeInElementField({\n  name,\n  onRemove,\n  onChange: onChangeProp,\n  onBlur: onBlurProp,\n  ...inputProps\n}: InputTypeInElementFieldProps) {\n  const [field, meta] = useField(name);\n  const onChange = useOnChangeEvent(name, onChangeProp);\n  const onBlur = useOnBlurEvent(name, onBlurProp);\n  const hasError = meta.touched && meta.error;\n  const isEmpty = !field.value || field.value.trim() === \"\";\n  const isNonEditable =\n    inputProps.variant === \"disabled\" || inputProps.variant === \"readOnly\";\n\n  return (\n    <Section flexDirection=\"row\" gap={0.25}>\n      {/* Input */}\n      <InputTypeIn\n        {...inputProps}\n        id={name}\n        name={name}\n        value={field.value ?? \"\"}\n        onChange={onChange}\n        onBlur={onBlur}\n        variant={\n          isNonEditable\n            ? inputProps.variant\n            : hasError\n              ? \"error\"\n              : inputProps.variant\n        }\n        showClearButton={false}\n      />\n      <Button\n        disabled={!onRemove || isEmpty}\n        icon={SvgMinusCircle}\n        prominence=\"tertiary\"\n        onClick={onRemove}\n        tooltip=\"Remove\"\n      />\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/InputTypeInField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport InputTypeIn, {\n  InputTypeInProps,\n} from \"@/refresh-components/inputs/InputTypeIn\";\nimport { useOnChangeEvent, useOnBlurEvent } from \"@/hooks/formHooks\";\n\nexport interface InputTypeInFieldProps\n  extends Omit<InputTypeInProps, \"value\" | \"onClear\"> {\n  name: string;\n}\n\nexport default function InputTypeInField({\n  name,\n  onChange: onChangeProp,\n  onBlur: onBlurProp,\n  ...inputProps\n}: InputTypeInFieldProps) {\n  const [field, meta, helpers] = useField(name);\n  const onChange = useOnChangeEvent(name, onChangeProp);\n  const onBlur = useOnBlurEvent(name, onBlurProp);\n  const hasError = meta.touched && meta.error;\n  const isNonEditable =\n    inputProps.variant === \"disabled\" || inputProps.variant === \"readOnly\";\n\n  return (\n    <InputTypeIn\n      {...inputProps}\n      id={name}\n      name={name}\n      value={field.value ?? \"\"}\n      onChange={onChange}\n      onBlur={onBlur}\n      onClear={() => {\n        helpers.setValue(\"\");\n      }}\n      variant={\n        isNonEditable\n          ? inputProps.variant\n          : hasError\n            ? \"error\"\n            : inputProps.variant\n      }\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/Label.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Label from \"./Label\";\n\nconst meta: Meta<typeof Label> = {\n  title: \"refresh-components/form/Label\",\n  component: Label,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Label>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Email Address\",\n    name: \"email\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    children: \"Disabled Label\",\n    name: \"disabled-input\",\n    disabled: true,\n  },\n};\n\nexport const NonInteractive: Story = {\n  args: {\n    children: \"Non-Interactive Label\",\n    name: \"readonly-input\",\n    nonInteractive: true,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/form/Label.tsx",
    "content": "\"use client\";\n\nimport { cn } from \"@/lib/utils\";\nimport { WithoutStyles } from \"@/types\";\n\n/**\n * Label - A form label component\n *\n * Renders a label element that associates with a form input via the `name` prop.\n *\n * @example\n * ```tsx\n * import Label from \"@/refresh-components/form/Label\";\n *\n * <Label name=\"email\">\n *   Email Address\n * </Label>\n * ```\n */\n\ninterface LabelProps\n  extends WithoutStyles<\n    // The `htmlFor` prop is instead renamed to `name?: string`.\n    Omit<React.LabelHTMLAttributes<HTMLLabelElement>, \"htmlFor\">\n  > {\n  /** The name/id of the form element this label is associated with */\n  name?: string;\n  /** Whether the associated input is disabled */\n  disabled?: boolean;\n  nonInteractive?: boolean;\n  ref?: React.Ref<HTMLLabelElement>;\n}\n\nexport default function Label({\n  name,\n  disabled,\n  nonInteractive,\n  ref,\n  ...props\n}: LabelProps) {\n  return (\n    <label\n      ref={ref}\n      data-non-interactive={nonInteractive ? \"true\" : undefined}\n      className={cn(\n        \"flex-1 self-stretch\",\n        \"peer-disabled:cursor-not-allowed data-[non-interactive=true]:cursor-default\",\n        disabled\n          ? \"cursor-not-allowed\"\n          : nonInteractive\n            ? undefined\n            : \"cursor-pointer\"\n      )}\n      htmlFor={name}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/LabeledCheckboxField.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { useField } from \"formik\";\nimport { cn } from \"@/lib/utils\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\n\ninterface CheckboxFieldProps {\n  name: string;\n  label: string;\n  labelClassName?: string;\n  sublabel?: string;\n  size?: \"sm\" | \"md\" | \"lg\";\n  tooltip?: string;\n  onChange?: (checked: boolean) => void;\n  disabled?: boolean;\n}\n\nexport const CheckboxField: React.FC<CheckboxFieldProps> = ({\n  name,\n  label,\n  onChange,\n  sublabel,\n  size = \"md\",\n  tooltip,\n  labelClassName,\n  disabled,\n  ...props\n}) => {\n  const [field, , helpers] = useField<boolean>({ name, type: \"checkbox\" });\n\n  const sizeClasses = {\n    sm: \"h-2 w-2\",\n    md: \"h-3 w-3\",\n    lg: \"h-4 w-4\",\n  };\n\n  const handleClick = (e: React.MouseEvent<HTMLLabelElement>) => {\n    e.preventDefault();\n    const next = !field.value;\n    helpers.setValue(next);\n    onChange?.(next);\n  };\n\n  const labelId = `${name}-label`;\n\n  const checkboxContent = (\n    <div className=\"flex w-fit items-start space-x-2\">\n      <Checkbox\n        id={name}\n        aria-labelledby={labelId}\n        checked={field.value}\n        onCheckedChange={(checked) => {\n          helpers.setValue(Boolean(checked));\n          onChange?.(Boolean(checked));\n        }}\n        className={cn(sizeClasses[size])}\n        disabled={disabled}\n        {...props}\n      />\n      <div className=\"flex flex-col\">\n        <label\n          id={labelId}\n          htmlFor={name}\n          className=\"flex flex-col cursor-pointer\"\n          onClick={handleClick}\n        >\n          <span\n            className={cn(\n              \"text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70\",\n              labelClassName\n            )}\n          >\n            {label}\n          </span>\n          {sublabel && (\n            <span className=\"text-sm text-muted-foreground mt-1\">\n              {sublabel}\n            </span>\n          )}\n        </label>\n      </div>\n    </div>\n  );\n\n  return (\n    <SimpleTooltip tooltip={tooltip} side=\"top\" sideOffset={25}>\n      {checkboxContent}\n    </SimpleTooltip>\n  );\n};\n\nexport default CheckboxField;\n"
  },
  {
    "path": "web/src/refresh-components/form/PasswordInputTypeInField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport PasswordInputTypeIn, {\n  PasswordInputTypeInProps,\n} from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { useOnChangeEvent, useOnBlurEvent } from \"@/hooks/formHooks\";\n\nexport interface PasswordInputTypeInFieldProps\n  extends Omit<PasswordInputTypeInProps, \"value\"> {\n  name: string;\n}\n\nexport default function PasswordInputTypeInField({\n  name,\n  onChange: onChangeProp,\n  onBlur: onBlurProp,\n  ...inputProps\n}: PasswordInputTypeInFieldProps) {\n  const [field, meta] = useField(name);\n  const onChange = useOnChangeEvent(name, onChangeProp);\n  const onBlur = useOnBlurEvent(name, onBlurProp);\n  const hasError = meta.touched && meta.error;\n  const showError = hasError && !inputProps.disabled;\n\n  return (\n    <PasswordInputTypeIn\n      {...inputProps}\n      id={name}\n      name={name}\n      value={field.value ?? \"\"}\n      onChange={onChange}\n      onBlur={onBlur}\n      error={showError ? true : inputProps.error}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/SwitchField.tsx",
    "content": "\"use client\";\n\nimport { useField } from \"formik\";\nimport Switch, { SwitchProps } from \"@/refresh-components/inputs/Switch\";\nimport { useOnChangeValue } from \"@/hooks/formHooks\";\n\ninterface SwitchFieldProps extends Omit<SwitchProps, \"checked\"> {\n  name: string;\n}\n\nexport default function SwitchField({\n  name,\n  onCheckedChange,\n  ...props\n}: SwitchFieldProps) {\n  const [field] = useField<boolean>({ name, type: \"checkbox\" });\n  const onChange = useOnChangeValue(name, onCheckedChange);\n\n  return (\n    <Switch\n      id={name}\n      name={name}\n      checked={field.value}\n      onCheckedChange={onChange}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/form/types.ts",
    "content": "import type React from \"react\";\nexport type FormFieldState = \"idle\" | \"success\" | \"error\";\nexport type APIFormFieldState = FormFieldState | \"loading\";\n\nexport interface FieldContextType {\n  baseId: string;\n  name?: string;\n  required?: boolean;\n  state: FormFieldState;\n  describedByIds: string[];\n}\n\nexport type FormFieldRootProps = React.HTMLAttributes<HTMLDivElement> & {\n  name?: string;\n  state?: FormFieldState;\n  required?: boolean;\n  id?: string;\n};\n\nexport type LabelProps = React.HTMLAttributes<HTMLLabelElement> & {\n  leftIcon?: React.ReactNode;\n  rightIcon?: React.ReactNode;\n  optional?: boolean;\n  required?: boolean;\n  rightAction?: React.ReactNode;\n};\n\nexport type ControlProps = React.PropsWithChildren<{\n  asChild?: boolean;\n}>;\n\nexport type DescriptionProps = React.HTMLAttributes<HTMLParagraphElement>;\nexport type MessageByState = Partial<\n  Record<FormFieldState, string | React.ReactNode>\n>;\nexport type APIMessageByState = Partial<\n  Record<FormFieldState | \"loading\", string>\n>;\n\nexport type MessageProps = React.HTMLAttributes<HTMLDivElement> & {\n  messages?: MessageByState;\n  render?: (state: FormFieldState) => React.ReactNode;\n};\n\nexport type APIMessageProps = React.HTMLAttributes<HTMLDivElement> & {\n  state?: APIFormFieldState;\n  messages?: APIMessageByState;\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/Checkbox.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Checkbox from \"./Checkbox\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Checkbox> = {\n  title: \"refresh-components/inputs/Checkbox\",\n  component: Checkbox,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Checkbox>;\n\nexport const Default: Story = {\n  args: {},\n};\n\nexport const Checked: Story = {\n  args: {\n    checked: true,\n  },\n};\n\nexport const Indeterminate: Story = {\n  args: {\n    indeterminate: true,\n  },\n};\n\nexport const WithLabel: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", alignItems: \"center\", gap: 8 }}>\n      <Checkbox id=\"terms\" />\n      <label htmlFor=\"terms\" style={{ cursor: \"pointer\" }}>\n        Accept terms and conditions\n      </label>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/Checkbox.test.tsx",
    "content": "import React from \"react\";\nimport { render, screen, fireEvent } from \"@testing-library/react\";\nimport \"@testing-library/jest-dom\";\nimport Checkbox from \"./Checkbox\";\n\ndescribe(\"Checkbox\", () => {\n  describe(\"Rendering\", () => {\n    test(\"renders unchecked by default\", () => {\n      const { container } = render(<Checkbox />);\n      const checkbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"false\");\n      expect(input).not.toBeChecked();\n    });\n\n    test(\"renders checked when checked prop is true\", () => {\n      const { container } = render(\n        <Checkbox checked={true} onCheckedChange={() => {}} />\n      );\n      const checkbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"true\");\n      expect(input).toBeChecked();\n    });\n\n    test(\"renders unchecked when checked prop is false\", () => {\n      const { container } = render(\n        <Checkbox checked={false} onCheckedChange={() => {}} />\n      );\n      const checkbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"false\");\n      expect(input).not.toBeChecked();\n    });\n\n    test(\"renders with defaultChecked\", () => {\n      const { container } = render(<Checkbox defaultChecked={true} />);\n      const checkbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"true\");\n      expect(input).toBeChecked();\n    });\n\n    test(\"applies custom className\", () => {\n      const { container } = render(<Checkbox className=\"custom-class\" />);\n      const visualCheckbox = container.querySelector(\".custom-class\");\n      expect(visualCheckbox).toBeInTheDocument();\n    });\n  });\n\n  describe(\"Controlled mode\", () => {\n    test(\"calls onCheckedChange when clicked\", () => {\n      const handleChange = jest.fn();\n      render(<Checkbox checked={false} onCheckedChange={handleChange} />);\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      fireEvent.click(visualCheckbox);\n      expect(handleChange).toHaveBeenCalledWith(true);\n    });\n\n    test(\"does not change state when controlled\", () => {\n      const handleChange = jest.fn();\n      const { container } = render(\n        <Checkbox checked={false} onCheckedChange={handleChange} />\n      );\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      fireEvent.click(visualCheckbox);\n      expect(input).not.toBeChecked(); // Should not change without parent updating prop\n    });\n\n    test(\"updates when checked prop changes\", () => {\n      const { rerender, container } = render(\n        <Checkbox checked={false} onCheckedChange={() => {}} />\n      );\n      let checkbox = screen.getByRole(\"checkbox\");\n      let input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"false\");\n      expect(input).not.toBeChecked();\n\n      rerender(<Checkbox checked={true} onCheckedChange={() => {}} />);\n      checkbox = screen.getByRole(\"checkbox\");\n      input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"true\");\n      expect(input).toBeChecked();\n    });\n  });\n\n  describe(\"Uncontrolled mode\", () => {\n    test(\"toggles state and calls onCheckedChange when clicked\", () => {\n      const handleChange = jest.fn();\n      const { container } = render(<Checkbox onCheckedChange={handleChange} />);\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n\n      expect(visualCheckbox).toHaveAttribute(\"aria-checked\", \"false\");\n      expect(input).not.toBeChecked();\n      fireEvent.click(visualCheckbox);\n      expect(visualCheckbox).toHaveAttribute(\"aria-checked\", \"true\");\n      expect(input).toBeChecked();\n      expect(handleChange).toHaveBeenCalledWith(true);\n\n      fireEvent.click(visualCheckbox);\n      expect(visualCheckbox).toHaveAttribute(\"aria-checked\", \"false\");\n      expect(input).not.toBeChecked();\n      expect(handleChange).toHaveBeenCalledWith(false);\n    });\n  });\n\n  describe(\"Indeterminate state\", () => {\n    test(\"sets correct aria-checked values for all states\", () => {\n      const { rerender, container } = render(\n        <Checkbox checked={false} onCheckedChange={() => {}} />\n      );\n      let checkbox = screen.getByRole(\"checkbox\");\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"false\");\n\n      rerender(<Checkbox checked={true} onCheckedChange={() => {}} />);\n      checkbox = screen.getByRole(\"checkbox\");\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"true\");\n\n      rerender(<Checkbox indeterminate={true} />);\n      checkbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n      expect(checkbox).toHaveAttribute(\"aria-checked\", \"mixed\");\n      expect(input.indeterminate).toBe(true);\n    });\n  });\n\n  describe(\"Disabled state\", () => {\n    test(\"sets disabled attribute and prevents interaction\", () => {\n      const handleChange = jest.fn();\n      const { container } = render(\n        <Checkbox disabled={true} onCheckedChange={handleChange} />\n      );\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n\n      expect(input).toBeDisabled();\n      expect(input).not.toBeChecked();\n\n      fireEvent.click(visualCheckbox);\n      expect(input).not.toBeChecked();\n      expect(handleChange).not.toHaveBeenCalled();\n    });\n  });\n\n  describe(\"Keyboard interaction\", () => {\n    test(\"toggles when spacebar is pressed on visual checkbox\", () => {\n      const { container } = render(<Checkbox />);\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n\n      visualCheckbox.focus();\n      expect(input).not.toBeChecked();\n\n      fireEvent.keyDown(visualCheckbox, { key: \" \", code: \"Space\" });\n      expect(input).toBeChecked();\n    });\n\n    test(\"toggles when Enter is pressed on visual checkbox\", () => {\n      const { container } = render(<Checkbox />);\n      const visualCheckbox = screen.getByRole(\"checkbox\");\n      const input = container.querySelector(\n        'input[type=\"checkbox\"]'\n      ) as HTMLInputElement;\n\n      visualCheckbox.focus();\n      expect(input).not.toBeChecked();\n\n      fireEvent.keyDown(visualCheckbox, { key: \"Enter\", code: \"Enter\" });\n      expect(input).toBeChecked();\n    });\n  });\n\n  describe(\"onChange handler\", () => {\n    test(\"calls onChange when provided\", () => {\n      const handleChange = jest.fn();\n      render(<Checkbox onChange={handleChange} />);\n      const checkbox = screen.getByRole(\"checkbox\");\n\n      fireEvent.click(checkbox);\n      expect(handleChange).toHaveBeenCalled();\n    });\n\n    test(\"calls both onChange and onCheckedChange\", () => {\n      const handleChange = jest.fn();\n      const handleCheckedChange = jest.fn();\n      render(\n        <Checkbox\n          onChange={handleChange}\n          onCheckedChange={handleCheckedChange}\n        />\n      );\n      const checkbox = screen.getByRole(\"checkbox\");\n\n      fireEvent.click(checkbox);\n      expect(handleChange).toHaveBeenCalled();\n      expect(handleCheckedChange).toHaveBeenCalledWith(true);\n    });\n  });\n\n  describe(\"Ref forwarding\", () => {\n    test(\"forwards ref to input element\", () => {\n      const ref = React.createRef<HTMLInputElement>();\n      render(<Checkbox ref={ref} />);\n      expect(ref.current).toBeInstanceOf(HTMLInputElement);\n      expect(ref.current?.type).toBe(\"checkbox\");\n    });\n  });\n\n  describe(\"Accessibility\", () => {\n    test(\"has role checkbox\", () => {\n      render(<Checkbox />);\n      const checkbox = screen.getByRole(\"checkbox\");\n      expect(checkbox).toBeInTheDocument();\n    });\n\n    test(\"supports aria-label\", () => {\n      render(<Checkbox aria-label=\"Accept terms\" />);\n      const checkbox = screen.getByRole(\"checkbox\");\n      expect(checkbox).toHaveAttribute(\"aria-label\", \"Accept terms\");\n    });\n\n    test(\"supports aria-labelledby\", () => {\n      render(\n        <div>\n          <span id=\"checkbox-label\">Accept terms</span>\n          <Checkbox aria-labelledby=\"checkbox-label\" />\n        </div>\n      );\n      const checkbox = screen.getByRole(\"checkbox\");\n      expect(checkbox).toHaveAttribute(\"aria-labelledby\", \"checkbox-label\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/refresh-components/inputs/Checkbox.tsx",
    "content": "\"use client\";\n\nimport React, { useEffect, useRef, useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgCheck, SvgMinus } from \"@opal/icons\";\nconst getRootClasses = (checked: boolean, indeterminate: boolean) => ({\n  main:\n    checked || indeterminate\n      ? [\n          \"bg-action-link-05\",\n          \"hover:bg-action-link-04\",\n          \"focus-visible:border-border-05\",\n          \"focus-visible:focus-shadow\",\n        ]\n      : [\n          \"bg-background-neutral-00\",\n          \"border\",\n          \"border-border-02\",\n          \"hover:border-border-03\",\n          \"focus-visible:border-border-05\",\n          \"focus-visible:focus-shadow\",\n        ],\n  disabled:\n    checked || indeterminate\n      ? [\"bg-background-neutral-04\"]\n      : [\"bg-background-neutral-03\", \"border\", \"border-border-02\"],\n});\n\nexport interface CheckboxProps\n  extends Omit<React.ComponentPropsWithoutRef<\"input\">, \"type\" | \"size\"> {\n  checked?: boolean;\n  defaultChecked?: boolean;\n  onCheckedChange?: (checked: boolean) => void;\n  indeterminate?: boolean;\n}\n\nfunction CheckboxInner(\n  {\n    checked: controlledChecked,\n    defaultChecked,\n    onCheckedChange,\n    indeterminate = false,\n    disabled,\n    className,\n    onChange,\n    id,\n    name,\n    \"aria-label\": ariaLabel,\n    \"aria-labelledby\": ariaLabelledby,\n    \"aria-describedby\": ariaDescribedby,\n    ...props\n  }: CheckboxProps,\n  ref: React.ForwardedRef<HTMLInputElement>\n) {\n  const [uncontrolledChecked, setUncontrolledChecked] = useState(\n    defaultChecked ?? false\n  );\n  const inputRef = useRef<HTMLInputElement>(null);\n\n  // Merge refs\n  useEffect(() => {\n    if (ref) {\n      if (typeof ref === \"function\") {\n        ref(inputRef.current);\n      } else {\n        ref.current = inputRef.current;\n      }\n    }\n\n    // Cleanup: clear ref on unmount\n    return () => {\n      if (ref) {\n        if (typeof ref === \"function\") {\n          ref(null);\n        } else {\n          ref.current = null;\n        }\n      }\n    };\n  }, [ref]);\n\n  const isControlled = controlledChecked !== undefined;\n  const checked = isControlled ? controlledChecked : uncontrolledChecked;\n\n  // Set indeterminate state on the DOM element\n  useEffect(() => {\n    if (inputRef.current) {\n      inputRef.current.indeterminate = indeterminate;\n    }\n  }, [indeterminate]);\n\n  function handleChange(event: React.ChangeEvent<HTMLInputElement>) {\n    if (disabled) return;\n\n    const newChecked = event.target.checked;\n\n    if (!isControlled) setUncontrolledChecked(newChecked);\n    onChange?.(event);\n    onCheckedChange?.(newChecked);\n  }\n\n  const variant = disabled ? \"disabled\" : \"main\";\n  const rootClasses = getRootClasses(checked, indeterminate);\n\n  return (\n    <div className=\"relative inline-flex shrink-0\">\n      {/*\n        Dual-element pattern for custom checkbox:\n        1. Hidden input: Maintains form state, enables form submission, supports indeterminate property\n        2. Visible div: Provides custom styling, handles user interaction, accessible via role=\"checkbox\"\n        The div's click handler triggers the input's native click, preserving standard checkbox behavior.\n      */}\n      <input\n        ref={inputRef}\n        id={id}\n        type=\"checkbox\"\n        role=\"presentation\"\n        className=\"sr-only peer\"\n        checked={checked}\n        disabled={disabled}\n        onChange={handleChange}\n        name={name}\n        {...props}\n      />\n      <div\n        role=\"checkbox\"\n        aria-checked={indeterminate ? \"mixed\" : checked}\n        aria-label={ariaLabel}\n        aria-labelledby={ariaLabelledby}\n        aria-describedby={ariaDescribedby}\n        tabIndex={disabled ? -1 : 0}\n        className={cn(\n          \"flex h-4 w-4 shrink-0 items-center justify-center rounded-04 transition-colors\",\n          disabled ? \"cursor-not-allowed\" : \"cursor-pointer\",\n          rootClasses[variant],\n          className\n        )}\n        onClick={(e) => {\n          if (!disabled && inputRef.current) {\n            inputRef.current.click();\n            e.preventDefault();\n          }\n        }}\n        onKeyDown={(e) => {\n          if (\n            !disabled &&\n            inputRef.current &&\n            (e.key === \" \" || e.key === \"Enter\")\n          ) {\n            e.preventDefault();\n            inputRef.current.click();\n          }\n        }}\n      >\n        {(checked || indeterminate) && (\n          <div>\n            {indeterminate ? (\n              <SvgMinus className=\"h-3 w-3 stroke-text-light-05\" />\n            ) : (\n              <SvgCheck className=\"h-3 w-3 stroke-text-light-05\" />\n            )}\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n\nconst Checkbox = React.forwardRef(CheckboxInner);\nCheckbox.displayName = \"Checkbox\";\nexport default Checkbox;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputAvatar.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InputAvatar from \"./InputAvatar\";\nimport * as AvatarPrimitive from \"@radix-ui/react-avatar\";\n\nconst meta: Meta<typeof InputAvatar> = {\n  title: \"refresh-components/inputs/InputAvatar\",\n  component: InputAvatar,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <div\n        style={{\n          width: 320,\n          display: \"flex\",\n          justifyContent: \"center\",\n          padding: 24,\n        }}\n      >\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputAvatar>;\n\nexport const WithImage: Story = {\n  render: () => (\n    <InputAvatar>\n      <AvatarPrimitive.Image\n        src=\"https://picsum.photos/80\"\n        alt=\"User avatar\"\n        className=\"h-full w-full object-cover\"\n      />\n      <AvatarPrimitive.Fallback className=\"flex h-full w-full items-center justify-center bg-background-tint-02 text-text-03 text-sm font-medium\">\n        AB\n      </AvatarPrimitive.Fallback>\n    </InputAvatar>\n  ),\n};\n\nexport const WithFallback: Story = {\n  render: () => (\n    <InputAvatar>\n      <AvatarPrimitive.Fallback className=\"flex h-full w-full items-center justify-center bg-background-tint-02 text-text-03 text-sm font-medium\">\n        JD\n      </AvatarPrimitive.Fallback>\n    </InputAvatar>\n  ),\n};\n\nexport const Empty: Story = {\n  render: () => (\n    <InputAvatar>\n      <AvatarPrimitive.Fallback className=\"flex h-full w-full items-center justify-center bg-background-tint-02 text-text-04 text-xs\">\n        ?\n      </AvatarPrimitive.Fallback>\n    </InputAvatar>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputAvatar.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as AvatarPrimitive from \"@radix-ui/react-avatar\";\nimport { cn } from \"@/lib/utils\";\nimport { wrapperClasses } from \"@/refresh-components/inputs/styles\";\n\nconst InputAvatar = React.forwardRef<\n  React.ElementRef<typeof AvatarPrimitive.Root>,\n  React.ComponentPropsWithoutRef<typeof AvatarPrimitive.Root>\n>(({ className, ...props }, ref) => (\n  <AvatarPrimitive.Root\n    ref={ref}\n    className={cn(\n      \"relative flex h-10 w-10 shrink-0 overflow-hidden rounded-full\",\n      wrapperClasses.primary,\n      className\n    )}\n    {...props}\n  />\n));\nInputAvatar.displayName = AvatarPrimitive.Root.displayName;\n\nexport default InputAvatar;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputChipField.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputChipField from \"./InputChipField\";\nimport type { ChipItem } from \"./InputChipField\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputChipField> = {\n  title: \"refresh-components/inputs/InputChipField\",\n  component: InputChipField,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputChipField>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [chips, setChips] = React.useState<ChipItem[]>([]);\n    const [value, setValue] = React.useState(\"\");\n\n    return (\n      <InputChipField\n        chips={chips}\n        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}\n        onAdd={(label) => {\n          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);\n          setValue(\"\");\n        }}\n        value={value}\n        onChange={setValue}\n        placeholder=\"Type and press Enter...\"\n      />\n    );\n  },\n};\n\nexport const WithChips: Story = {\n  render: function WithChipsStory() {\n    const [chips, setChips] = React.useState<ChipItem[]>([\n      { id: \"1\", label: \"React\" },\n      { id: \"2\", label: \"TypeScript\" },\n      { id: \"3\", label: \"Tailwind\" },\n    ]);\n    const [value, setValue] = React.useState(\"\");\n\n    return (\n      <InputChipField\n        chips={chips}\n        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}\n        onAdd={(label) => {\n          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);\n          setValue(\"\");\n        }}\n        value={value}\n        onChange={setValue}\n        placeholder=\"Add tags...\"\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <InputChipField\n      chips={[\n        { id: \"1\", label: \"Locked\" },\n        { id: \"2\", label: \"Tag\" },\n      ]}\n      onRemoveChip={() => {}}\n      onAdd={() => {}}\n      value=\"\"\n      onChange={() => {}}\n      placeholder=\"Disabled\"\n      disabled\n    />\n  ),\n};\n\nexport const ErrorVariant: Story = {\n  render: function ErrorStory() {\n    const [chips, setChips] = React.useState<ChipItem[]>([\n      { id: \"1\", label: \"Invalid\" },\n    ]);\n    const [value, setValue] = React.useState(\"\");\n\n    return (\n      <InputChipField\n        chips={chips}\n        onRemoveChip={(id) => setChips((c) => c.filter((ch) => ch.id !== id))}\n        onAdd={(label) => {\n          setChips((c) => [...c, { id: crypto.randomUUID(), label }]);\n          setValue(\"\");\n        }}\n        value={value}\n        onChange={setValue}\n        placeholder=\"Add labels...\"\n        variant=\"error\"\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputChipField.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Chip from \"@/refresh-components/Chip\";\nimport {\n  innerClasses,\n  textClasses,\n  Variants,\n  wrapperClasses,\n} from \"@/refresh-components/inputs/styles\";\nimport { SvgAlertTriangle } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\n\nexport interface ChipItem {\n  id: string;\n  label: string;\n  /** When true the chip shows a warning icon */\n  error?: boolean;\n}\n\nexport interface InputChipFieldProps {\n  chips: ChipItem[];\n  onRemoveChip: (id: string) => void;\n  onAdd: (value: string) => void;\n\n  value: string;\n  onChange: (value: string) => void;\n\n  placeholder?: string;\n  disabled?: boolean;\n  variant?: Variants;\n  icon?: React.FunctionComponent<IconProps>;\n  className?: string;\n  /** \"inline\" renders chips and input in one row; \"stacked\" puts chips above the input */\n  layout?: \"inline\" | \"stacked\";\n}\n\n/**\n * A tag/chip input field that renders chips inline alongside a text input.\n *\n * Pressing Enter adds a chip via `onAdd`. Pressing Backspace on an empty\n * input removes the last chip. Each chip has a remove button.\n *\n * @example\n * ```tsx\n * <InputChipField\n *   chips={[{ id: \"1\", label: \"Search\" }]}\n *   onRemoveChip={(id) => remove(id)}\n *   onAdd={(value) => add(value)}\n *   value={inputValue}\n *   onChange={setInputValue}\n *   placeholder=\"Add labels...\"\n *   icon={SvgTag}\n * />\n * ```\n */\nfunction InputChipField({\n  chips,\n  onRemoveChip,\n  onAdd,\n  value,\n  onChange,\n  placeholder,\n  disabled = false,\n  variant = \"primary\",\n  icon: Icon,\n  className,\n  layout = \"inline\",\n}: InputChipFieldProps) {\n  const inputRef = React.useRef<HTMLInputElement>(null);\n\n  function handleKeyDown(e: React.KeyboardEvent<HTMLInputElement>) {\n    if (disabled) {\n      return;\n    }\n\n    if (e.key === \"Enter\") {\n      e.preventDefault();\n      e.stopPropagation();\n      const trimmed = value.trim();\n      if (trimmed) {\n        onAdd(trimmed);\n      }\n    }\n    if (e.key === \"Backspace\" && value === \"\") {\n      const lastChip = chips[chips.length - 1];\n      if (lastChip) {\n        onRemoveChip(lastChip.id);\n      }\n    }\n  }\n\n  const chipElements =\n    chips.length > 0\n      ? chips.map((chip) => (\n          <Chip\n            key={chip.id}\n            onRemove={disabled ? undefined : () => onRemoveChip(chip.id)}\n            rightIcon={chip.error ? SvgAlertTriangle : undefined}\n            error={chip.error}\n            smallLabel={layout === \"stacked\"}\n          >\n            {chip.label}\n          </Chip>\n        ))\n      : null;\n\n  const inputElement = (\n    <>\n      {Icon && <Icon size={16} className=\"text-text-04 shrink-0\" />}\n      <input\n        ref={inputRef}\n        type=\"text\"\n        disabled={disabled}\n        value={value}\n        onChange={(e) => onChange(e.target.value)}\n        onKeyDown={handleKeyDown}\n        placeholder={placeholder}\n        className={cn(\n          \"flex-1 min-w-[80px] h-[1.5rem] bg-transparent p-0.5 focus:outline-none\",\n          innerClasses[variant],\n          textClasses[variant]\n        )}\n      />\n    </>\n  );\n\n  return (\n    <div\n      className={cn(\n        \"flex p-1.5 rounded-08 cursor-text w-full\",\n        layout === \"stacked\"\n          ? \"flex-col gap-1\"\n          : \"flex-row flex-wrap items-center gap-1\",\n        wrapperClasses[variant],\n        className\n      )}\n      onClick={() => inputRef.current?.focus()}\n    >\n      {layout === \"stacked\" ? (\n        <>\n          {chipElements && (\n            <div className=\"flex flex-row items-center flex-wrap gap-1\">\n              {chipElements}\n            </div>\n          )}\n          <div className=\"flex flex-row items-center gap-1\">{inputElement}</div>\n        </>\n      ) : (\n        <>\n          {chipElements}\n          {inputElement}\n        </>\n      )}\n    </div>\n  );\n}\n\nexport default InputChipField;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/InputComboBox.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputComboBox from \"./InputComboBox\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputComboBox> = {\n  title: \"refresh-components/inputs/InputComboBox\",\n  component: InputComboBox,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputComboBox>;\n\nconst fruitOptions = [\n  { value: \"apple\", label: \"Apple\" },\n  { value: \"banana\", label: \"Banana\" },\n  { value: \"cherry\", label: \"Cherry\" },\n  { value: \"dragonfruit\", label: \"Dragonfruit\" },\n  { value: \"elderberry\", label: \"Elderberry\" },\n];\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputComboBox\n        placeholder=\"Type or select...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        options={fruitOptions}\n      />\n    );\n  },\n};\n\nexport const InputModeNoOptions: Story = {\n  render: function InputModeStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputComboBox\n        placeholder=\"Type anything...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n      />\n    );\n  },\n};\n\nexport const StrictMode: Story = {\n  render: function StrictStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputComboBox\n        placeholder=\"Select a fruit (strict)\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        options={fruitOptions}\n        strict\n      />\n    );\n  },\n};\n\nexport const WithPreselectedValue: Story = {\n  render: function PreselectedStory() {\n    const [value, setValue] = React.useState(\"cherry\");\n    return (\n      <InputComboBox\n        placeholder=\"Select a fruit\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        onValueChange={setValue}\n        options={fruitOptions}\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <InputComboBox\n      placeholder=\"Cannot interact\"\n      value=\"banana\"\n      options={fruitOptions}\n      disabled\n    />\n  ),\n};\n\nexport const WithSearchIcon: Story = {\n  render: function SearchIconStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputComboBox\n        placeholder=\"Search fruits...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        options={fruitOptions}\n        leftSearchIcon\n      />\n    );\n  },\n};\n\nexport const ErrorState: Story = {\n  render: function ErrorStory() {\n    const [value, setValue] = React.useState(\"invalid-value\");\n    return (\n      <InputComboBox\n        placeholder=\"Select a fruit\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        options={fruitOptions}\n        isError\n      />\n    );\n  },\n};\n\nexport const WithOtherOptions: Story = {\n  render: function OtherOptionsStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputComboBox\n        placeholder=\"Search or select...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        options={fruitOptions}\n        showOtherOptions\n        separatorLabel=\"Other fruits\"\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/InputComboBox.test.tsx",
    "content": "import React from \"react\";\nimport { render, screen, fireEvent, waitFor } from \"@testing-library/react\";\nimport \"@testing-library/jest-dom\";\nimport userEvent from \"@testing-library/user-event\";\nimport InputComboBox from \"./InputComboBox\";\n\n// Mock createPortal for dropdown rendering\njest.mock(\"react-dom\", () => ({\n  ...jest.requireActual(\"react-dom\"),\n  createPortal: (node: React.ReactNode) => node,\n}));\n\n// Mock scrollIntoView which is not available in jsdom\nElement.prototype.scrollIntoView = jest.fn();\n\nconst mockOptions = [\n  { value: \"apple\", label: \"Apple\" },\n  { value: \"banana\", label: \"Banana\" },\n  { value: \"cherry\", label: \"Cherry\" },\n];\n\nconst mockOptionsWithDescriptions = [\n  { value: \"apple\", label: \"Apple\", description: \"A red fruit\" },\n  { value: \"banana\", label: \"Banana\", description: \"A yellow fruit\" },\n];\n\nfunction setupUser() {\n  return userEvent.setup({ delay: null });\n}\n\ndescribe(\"InputComboBox\", () => {\n  describe(\"Rendering\", () => {\n    test(\"renders with placeholder\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select an option\"\n          value=\"\"\n          options={mockOptions}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select an option\");\n      expect(input).toBeInTheDocument();\n    });\n\n    test(\"renders with initial value\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"apple\"\n          options={mockOptions}\n        />\n      );\n      const input = screen.getByDisplayValue(\"Apple\");\n      expect(input).toBeInTheDocument();\n    });\n\n    test(\"renders without options (input mode)\", () => {\n      render(<InputComboBox placeholder=\"Type here\" value=\"\" options={[]} />);\n      const input = screen.getByPlaceholderText(\"Type here\");\n      expect(input).toBeInTheDocument();\n    });\n\n    test(\"renders disabled state\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          disabled\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n      expect(input).toBeDisabled();\n    });\n\n    test(\"renders with options that have descriptions\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptionsWithDescriptions}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n      fireEvent.focus(input);\n      expect(screen.getByText(\"A red fruit\")).toBeInTheDocument();\n    });\n  });\n\n  describe(\"Dropdown Behavior\", () => {\n    test(\"opens dropdown on focus when options exist\", () => {\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n      fireEvent.focus(input);\n      expect(screen.getByRole(\"listbox\")).toBeInTheDocument();\n    });\n\n    test(\"does not open dropdown on focus when no options\", () => {\n      render(<InputComboBox placeholder=\"Select\" value=\"\" options={[]} />);\n      const input = screen.getByPlaceholderText(\"Select\");\n      fireEvent.focus(input);\n      expect(screen.queryByRole(\"listbox\")).not.toBeInTheDocument();\n    });\n\n    test(\"closes dropdown on escape\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      expect(screen.getByRole(\"listbox\")).toBeInTheDocument();\n\n      await user.keyboard(\"{Escape}\");\n      expect(screen.queryByRole(\"listbox\")).not.toBeInTheDocument();\n    });\n\n    test(\"shows all options on focus when a value is already selected\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"apple\"\n          options={mockOptions}\n        />\n      );\n      const input = screen.getByDisplayValue(\"Apple\");\n      fireEvent.focus(input);\n\n      const options = screen.getAllByRole(\"option\");\n      expect(options.length).toBe(3);\n    });\n\n    test(\"closes dropdown on tab\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      expect(screen.getByRole(\"listbox\")).toBeInTheDocument();\n\n      await user.tab();\n      expect(screen.queryByRole(\"listbox\")).not.toBeInTheDocument();\n    });\n  });\n\n  describe(\"Keyboard Navigation\", () => {\n    test(\"ArrowDown opens dropdown and highlights first option\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      await user.keyboard(\"{ArrowDown}\");\n\n      const listbox = screen.getByRole(\"listbox\");\n      expect(listbox).toBeInTheDocument();\n    });\n\n    test(\"ArrowUp moves highlight up through options\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      await user.keyboard(\"{ArrowDown}\");\n      await user.keyboard(\"{ArrowDown}\");\n      await user.keyboard(\"{ArrowUp}\");\n\n      // Highlight should have moved\n      expect(screen.getByRole(\"listbox\")).toBeInTheDocument();\n    });\n\n    test(\"Enter selects highlighted option\", async () => {\n      const handleValueChange = jest.fn();\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          onValueChange={handleValueChange}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      await user.keyboard(\"{ArrowDown}\");\n      await user.keyboard(\"{Enter}\");\n\n      expect(handleValueChange).toHaveBeenCalledWith(\"apple\");\n    });\n  });\n\n  describe(\"Filtering\", () => {\n    test(\"filters options based on input value\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"app\");\n\n      // In non-strict mode, searching shows:\n      // 1) a create option for the current input and\n      // 2) matched options.\n      const options = screen.getAllByRole(\"option\");\n      expect(options.length).toBe(2);\n      expect(screen.getByLabelText('Create \"app\"')).toBeInTheDocument();\n      expect(\n        options.some((option) => option.textContent?.includes(\"Apple\"))\n      ).toBe(true);\n      expect(screen.queryByText(\"Banana\")).not.toBeInTheDocument();\n    });\n\n    test(\"shows 'No options found' when no matches and strict mode\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          strict\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"xyz\");\n\n      expect(screen.getByText(\"No options found\")).toBeInTheDocument();\n    });\n\n    test(\"shows separator between matched and unmatched options when enabled\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          separatorLabel=\"Other fruits\"\n          showOtherOptions\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"app\");\n\n      expect(screen.getByText(\"Other fruits\")).toBeInTheDocument();\n    });\n  });\n\n  describe(\"Selection\", () => {\n    test(\"clicking option selects it and closes dropdown\", async () => {\n      const handleChange = jest.fn();\n      const handleValueChange = jest.fn();\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          onChange={handleChange}\n          onValueChange={handleValueChange}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      const option = screen.getByText(\"Banana\");\n      await user.click(option);\n\n      expect(handleChange).toHaveBeenCalled();\n      expect(handleValueChange).toHaveBeenCalledWith(\"banana\");\n      expect(screen.queryByRole(\"listbox\")).not.toBeInTheDocument();\n    });\n\n    test(\"displays label instead of value when closed\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"apple\"\n          options={mockOptions}\n        />\n      );\n      // Should show \"Apple\" (label) not \"apple\" (value)\n      expect(screen.getByDisplayValue(\"Apple\")).toBeInTheDocument();\n    });\n  });\n\n  describe(\"Strict Mode\", () => {\n    test(\"strict=true shows error when value not in options\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"invalid\"\n          options={mockOptions}\n          strict\n        />\n      );\n      expect(\n        screen.getByText(\"Please select a valid option from the list\")\n      ).toBeInTheDocument();\n    });\n\n    test(\"strict=false allows custom values\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"custom-value\"\n          options={mockOptions}\n          strict={false}\n        />\n      );\n      expect(\n        screen.queryByText(\"Please select a valid option from the list\")\n      ).not.toBeInTheDocument();\n    });\n\n    test(\"strict=false shows create option when no matches\", async () => {\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          strict={false}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"newvalue\");\n\n      // Should show the create option with the typed value\n      expect(screen.getByText(\"newvalue\")).toBeInTheDocument();\n    });\n  });\n\n  describe(\"External Error State\", () => {\n    test(\"shows error styling when isError is true\", () => {\n      const { container } = render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          isError\n        />\n      );\n      // The input should have error styling applied\n      expect(container.querySelector(\"input\")).toBeInTheDocument();\n    });\n\n    test(\"does not show internal error when isError is provided\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"invalid\"\n          options={mockOptions}\n          strict\n          isError={false}\n        />\n      );\n      // Internal validation error should not show when isError is explicitly false\n      expect(\n        screen.queryByText(\"Please select a valid option from the list\")\n      ).not.toBeInTheDocument();\n    });\n  });\n\n  describe(\"Accessibility\", () => {\n    test(\"has correct ARIA attributes\", () => {\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByRole(\"combobox\");\n      expect(input).toHaveAttribute(\"aria-autocomplete\", \"list\");\n      expect(input).toHaveAttribute(\"aria-expanded\", \"false\");\n    });\n\n    test(\"aria-expanded is true when dropdown is open\", () => {\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByRole(\"combobox\");\n      fireEvent.focus(input);\n      expect(input).toHaveAttribute(\"aria-expanded\", \"true\");\n    });\n\n    test(\"options have role option\", () => {\n      render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n      fireEvent.focus(input);\n\n      const options = screen.getAllByRole(\"option\");\n      expect(options.length).toBe(3);\n    });\n\n    test(\"listbox has correct aria-label\", () => {\n      render(\n        <InputComboBox\n          placeholder=\"Select a fruit\"\n          value=\"\"\n          options={mockOptions}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select a fruit\");\n      fireEvent.focus(input);\n\n      const listbox = screen.getByRole(\"listbox\");\n      expect(listbox).toHaveAttribute(\"aria-label\", \"Select a fruit\");\n    });\n  });\n\n  describe(\"Text Highlighting\", () => {\n    test(\"matching text is highlighted in option labels\", async () => {\n      const user = setupUser();\n      const { container } = render(\n        <InputComboBox placeholder=\"Select\" value=\"\" options={mockOptions} />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"app\");\n\n      // Look for the bold/highlighted text\n      const boldText = container.querySelector(\".font-semibold\");\n      expect(boldText).toBeInTheDocument();\n      expect(boldText?.textContent).toBe(\"App\");\n    });\n  });\n\n  describe(\"onChange vs onValueChange\", () => {\n    test(\"onChange is called on every keystroke\", async () => {\n      const handleChange = jest.fn();\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          onChange={handleChange}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"abc\");\n\n      expect(handleChange).toHaveBeenCalledTimes(3);\n    });\n\n    test(\"onValueChange is only called on option select\", async () => {\n      const handleChange = jest.fn();\n      const handleValueChange = jest.fn();\n      const user = setupUser();\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={mockOptions}\n          onChange={handleChange}\n          onValueChange={handleValueChange}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.type(input, \"app\");\n      expect(handleValueChange).not.toHaveBeenCalled();\n\n      // Get the Apple option by role and click it\n      const options = screen.getAllByRole(\"option\");\n      const appleOption = options.find((opt) => opt.textContent === \"Apple\");\n      expect(appleOption).toBeDefined();\n      await user.click(appleOption!);\n      expect(handleValueChange).toHaveBeenCalledWith(\"apple\");\n    });\n  });\n\n  describe(\"Disabled Options\", () => {\n    test(\"disabled options cannot be selected\", async () => {\n      const handleValueChange = jest.fn();\n      const user = setupUser();\n      const optionsWithDisabled = [\n        { value: \"apple\", label: \"Apple\" },\n        { value: \"banana\", label: \"Banana\", disabled: true },\n      ];\n      render(\n        <InputComboBox\n          placeholder=\"Select\"\n          value=\"\"\n          options={optionsWithDisabled}\n          onValueChange={handleValueChange}\n        />\n      );\n      const input = screen.getByPlaceholderText(\"Select\");\n\n      await user.click(input);\n      const disabledOption = screen.getByText(\"Banana\");\n      await user.click(disabledOption);\n\n      expect(handleValueChange).not.toHaveBeenCalled();\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/InputComboBox.tsx",
    "content": "\"use client\";\n\n/**\n * InputComboBox - A flexible combo box component that combines input and select functionality\n *\n * Features:\n * - Dual mode: Acts as input when no options, acts as filterable select with options\n * - Automatic filtering based on user input\n * - Strict/non-strict mode: Controls whether only option values are allowed\n * - Built-in validation with inline error display\n * - Full accessibility with ARIA support\n * - Integrates with FormField and form libraries\n * - Based on InputTypeIn with dropdown functionality\n * - **InputSelect API compatible**: Can be used as a drop-in replacement for InputSelect\n *\n * @example Basic Usage - Input Mode (no options)\n * ```tsx\n * const [value, setValue] = useState(\"\");\n *\n * <InputComboBox\n *   placeholder=\"Enter or select\"\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n * />\n * ```\n *\n * @example Select Mode with Filtering\n * ```tsx\n * const options = [\n *   { value: \"apple\", label: \"Apple\" },\n *   { value: \"banana\", label: \"Banana\" },\n * ];\n *\n * <InputComboBox\n *   placeholder=\"Select fruit\"\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n *   options={options}\n *   strict={true}\n * />\n * ```\n *\n * @example InputSelect-compatible API (drop-in replacement)\n * ```tsx\n * // Works exactly like InputSelect but with filtering capability\n * // onValueChange is only called when user selects from dropdown\n * <InputComboBox\n *   value={model}\n *   onValueChange={(value) => {\n *     setModel(value);\n *     testApiKey(value); // Only called when option is selected\n *   }}\n *   options={modelOptions}\n *   placeholder=\"Select model\"\n *   isError={!!error}\n *   rightSection={<RefreshButton />}\n * />\n * ```\n *\n * @example With FormField Integration\n * ```tsx\n * <FormField state={error ? \"error\" : \"idle\"}>\n *   <FormField.Label>Country</FormField.Label>\n *   <FormField.Control asChild>\n *     <InputComboBox\n *       placeholder=\"Select or type country\"\n *       value={country}\n *       onChange={(e) => setCountry(e.target.value)}\n *       options={countryOptions}\n *       strict={false}\n *       onValidationError={setError}\n *     />\n *   </FormField.Control>\n * </FormField>\n * ```\n */\n\nimport React, {\n  useCallback,\n  useContext,\n  useMemo,\n  useRef,\n  useId,\n  useEffect,\n} from \"react\";\nimport {\n  useFloating,\n  autoUpdate,\n  flip,\n  offset,\n  shift,\n  size,\n} from \"@floating-ui/react-dom\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport InputTypeIn from \"../InputTypeIn\";\nimport { FieldContext } from \"../../form/FieldContext\";\nimport { Button } from \"@opal/components\";\nimport { FieldMessage } from \"../../messages/FieldMessage\";\n\n// Hooks\nimport {\n  useComboBoxState,\n  useComboBoxKeyboard,\n  useOptionFiltering,\n} from \"./hooks\";\nimport { useClickOutside } from \"@/hooks/useClickOutside\";\nimport { useValidation } from \"./utils/validation\";\nimport { buildAriaAttributes } from \"./utils/aria\";\n\n// Components\nimport { ComboBoxDropdown } from \"./components/ComboBoxDropdown\";\n\n// Types\nimport { InputComboBoxProps, ComboBoxOption } from \"./types\";\nimport { SvgChevronDown, SvgChevronUp } from \"@opal/icons\";\nimport { WithoutStyles } from \"@/types\";\n\nconst InputComboBox = ({\n  value,\n  onChange,\n  onValueChange,\n  options = [],\n  strict = false,\n  disabled = false,\n  placeholder,\n  isError: externalIsError,\n  onValidationError,\n  name,\n  leftSearchIcon = false,\n  rightSection,\n  separatorLabel = \"Other options\",\n  showAddPrefix = false,\n  showOtherOptions = false,\n  ...rest\n}: WithoutStyles<InputComboBoxProps>) => {\n  const inputRef = useRef<HTMLInputElement>(null);\n  const dropdownRef = useRef<HTMLDivElement>(null);\n  const fieldContext = useContext(FieldContext);\n\n  const hasOptions = options.length > 0;\n\n  //State Management Hook\n  const {\n    isOpen,\n    setIsOpen,\n    inputValue,\n    setInputValue,\n    highlightedIndex,\n    setHighlightedIndex,\n    isKeyboardNav,\n    setIsKeyboardNav,\n  } = useComboBoxState({ value, options });\n\n  // Filtering Hook\n  const { matchedOptions, unmatchedOptions, hasSearchTerm } =\n    useOptionFiltering({ options, inputValue });\n  const visibleUnmatchedOptions =\n    hasSearchTerm && showOtherOptions ? unmatchedOptions : [];\n\n  // Whether to show the create option (always show when typing in non-strict mode)\n  const showCreateOption = !strict && hasSearchTerm && inputValue.trim() !== \"\";\n\n  // Combined list for keyboard navigation (includes create option when shown)\n  // Only show matched options when searching (hide unmatched)\n  const allVisibleOptions = useMemo(() => {\n    const baseOptions = [...matchedOptions, ...visibleUnmatchedOptions];\n    if (showCreateOption) {\n      // Prepend a synthetic option for the \"create new\" item\n      return [{ value: inputValue, label: inputValue }, ...baseOptions];\n    }\n    return baseOptions;\n  }, [matchedOptions, visibleUnmatchedOptions, showCreateOption, inputValue]);\n\n  // Floating UI for dropdown positioning\n  const { refs, floatingStyles } = useFloating({\n    open: isOpen,\n    placement: \"bottom-start\",\n    middleware: [\n      offset(4),\n      flip(),\n      shift({ padding: 8 }),\n      size({\n        apply({ rects, elements }) {\n          Object.assign(elements.floating.style, {\n            width: `${rects.reference.width}px`,\n          });\n        },\n      }),\n    ],\n    whileElementsMounted: autoUpdate,\n  });\n\n  // Check if an option is an exact match\n  const isExactMatch = useCallback(\n    (option: ComboBoxOption) => {\n      const currentValue = (inputValue || value || \"\").trim().toLowerCase();\n      if (!currentValue) return false;\n\n      return (\n        option.value.toLowerCase() === currentValue ||\n        option.label.toLowerCase() === currentValue\n      );\n    },\n    [inputValue, value]\n  );\n\n  // Validation Logic\n  const { isValid, errorMessage } = useValidation({\n    value,\n    options,\n    strict,\n    externalIsError,\n    onValidationError,\n  });\n\n  // Sync highlightedIndex with exact match when typing (not keyboard nav)\n  useEffect(() => {\n    // Skip if keyboard navigating or dropdown closed\n    if (isKeyboardNav || !isOpen) return;\n    if (!inputValue.trim()) return;\n\n    const exactMatchIndex = allVisibleOptions.findIndex(\n      (opt) =>\n        opt.value.toLowerCase() === inputValue.trim().toLowerCase() ||\n        opt.label.toLowerCase() === inputValue.trim().toLowerCase()\n    );\n\n    if (exactMatchIndex >= 0) {\n      setHighlightedIndex(exactMatchIndex);\n    }\n  }, [\n    inputValue,\n    allVisibleOptions,\n    isKeyboardNav,\n    isOpen,\n    setHighlightedIndex,\n  ]);\n\n  // Event Handlers\n  const handleInputChange = useCallback(\n    (e: React.ChangeEvent<HTMLInputElement>) => {\n      const newValue = e.target.value;\n      setInputValue(newValue);\n\n      // Only call onChange while typing (for controlled input behavior)\n      // onValueChange is only called when selecting from dropdown\n      onChange?.(e);\n\n      // Open dropdown when user starts typing and there are options\n      if (hasOptions && !isOpen) {\n        setIsOpen(true);\n      }\n\n      // Auto-highlight first match when typing\n      setHighlightedIndex(0);\n      setIsKeyboardNav(false); // Reset keyboard navigation mode when typing\n    },\n    [\n      onChange,\n      hasOptions,\n      isOpen,\n      setInputValue,\n      setIsOpen,\n      setHighlightedIndex,\n      setIsKeyboardNav,\n    ]\n  );\n\n  const handleOptionSelect = useCallback(\n    (option: ComboBoxOption) => {\n      if (option.disabled) return;\n\n      setInputValue(option.value);\n\n      // Support both onChange (event) and onValueChange (value) patterns\n      if (onChange) {\n        const syntheticEvent = {\n          target: { value: option.value },\n          currentTarget: { value: option.value },\n          type: \"change\",\n          bubbles: true,\n          cancelable: true,\n        } as React.ChangeEvent<HTMLInputElement>;\n        onChange(syntheticEvent);\n      }\n\n      onValueChange?.(option.value);\n\n      setIsOpen(false);\n      inputRef.current?.focus();\n    },\n    [onChange, onValueChange, setInputValue, setIsOpen]\n  );\n\n  // Keyboard Navigation Hook\n  const { handleKeyDown } = useComboBoxKeyboard({\n    isOpen,\n    setIsOpen,\n    highlightedIndex,\n    setHighlightedIndex,\n    setIsKeyboardNav,\n    allVisibleOptions,\n    onSelect: handleOptionSelect,\n    hasOptions,\n  });\n\n  // Click Outside Hook\n  useClickOutside<HTMLElement>(\n    [\n      inputRef as React.RefObject<HTMLElement>,\n      dropdownRef as React.RefObject<HTMLElement>,\n    ],\n    useCallback(() => {\n      setIsOpen(false);\n      setIsKeyboardNav(false);\n    }, [setIsOpen, setIsKeyboardNav]),\n    isOpen\n  );\n\n  const handleFocus = useCallback(() => {\n    if (hasOptions) {\n      setInputValue(\"\");\n      setIsOpen(true);\n      setHighlightedIndex(-1);\n      setIsKeyboardNav(false);\n    }\n  }, [\n    hasOptions,\n    setInputValue,\n    setIsOpen,\n    setHighlightedIndex,\n    setIsKeyboardNav,\n  ]);\n\n  const toggleDropdown = useCallback(() => {\n    if (!disabled && hasOptions) {\n      setIsOpen((prev) => {\n        const newOpen = !prev;\n        if (newOpen) {\n          setInputValue(\"\");\n          setHighlightedIndex(-1);\n        }\n        return newOpen;\n      });\n      inputRef.current?.focus();\n    }\n  }, [disabled, hasOptions, setIsOpen, setInputValue, setHighlightedIndex]);\n\n  const autoId = useId();\n  const fieldId = fieldContext?.baseId || name || `combo-box-${autoId}`;\n\n  // ARIA Attributes Builder\n  const ariaProps = buildAriaAttributes({\n    hasOptions,\n    isOpen,\n    isValid,\n    highlightedIndex,\n    fieldId,\n    allVisibleOptions,\n    placeholder,\n  });\n\n  // Get display label for the current value\n  const displayLabel = useMemo(() => {\n    // If dropdown is open, show what user is typing\n    if (isOpen) return inputValue;\n\n    // When closed, show the matched option label or the value\n    if (!value || !hasOptions) return inputValue;\n    const option = options.find((opt) => opt.value === value);\n    return option ? option.label : inputValue;\n  }, [isOpen, inputValue, value, options, hasOptions]);\n\n  return (\n    <div ref={refs.setReference} className=\"relative w-full\">\n      <>\n        <InputTypeIn\n          ref={inputRef}\n          placeholder={placeholder}\n          value={displayLabel}\n          onChange={handleInputChange}\n          onFocus={handleFocus}\n          onKeyDown={handleKeyDown}\n          variant={disabled ? \"disabled\" : !isValid ? \"error\" : undefined}\n          leftSearchIcon={leftSearchIcon}\n          showClearButton={false}\n          rightSection={\n            <>\n              {rightSection && (\n                <div\n                  className=\"flex items-center\"\n                  onPointerDown={(e) => {\n                    e.stopPropagation();\n                  }}\n                  onClick={(e) => {\n                    e.stopPropagation();\n                  }}\n                >\n                  {rightSection}\n                </div>\n              )}\n              {hasOptions && (\n                <Button\n                  disabled={disabled}\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  onClick={noProp(toggleDropdown)}\n                  icon={isOpen ? SvgChevronUp : SvgChevronDown}\n                  aria-label={isOpen ? \"Close dropdown\" : \"Open dropdown\"}\n                  tabIndex={-1}\n                  type=\"button\"\n                />\n              )}\n            </>\n          }\n          {...ariaProps}\n          {...rest}\n        />\n\n        {/* Dropdown - Rendered in Portal */}\n        <ComboBoxDropdown\n          ref={dropdownRef}\n          isOpen={isOpen}\n          disabled={disabled}\n          floatingStyles={floatingStyles}\n          setFloatingRef={refs.setFloating}\n          fieldId={fieldId}\n          placeholder={placeholder}\n          matchedOptions={matchedOptions}\n          unmatchedOptions={visibleUnmatchedOptions}\n          hasSearchTerm={hasSearchTerm}\n          separatorLabel={separatorLabel}\n          value={value}\n          highlightedIndex={highlightedIndex}\n          onSelect={handleOptionSelect}\n          onMouseEnter={(index) => {\n            setIsKeyboardNav(false);\n            setHighlightedIndex(index);\n          }}\n          onMouseMove={() => {\n            if (isKeyboardNav) {\n              setIsKeyboardNav(false);\n            }\n          }}\n          isExactMatch={isExactMatch}\n          inputValue={inputValue}\n          allowCreate={!strict}\n          showCreateOption={showCreateOption}\n          showAddPrefix={showAddPrefix}\n        />\n      </>\n\n      {/* Error message - only show internal error messages when not using external isError */}\n      {!isValid && errorMessage && externalIsError === undefined && (\n        <FieldMessage variant=\"error\" className=\"ml-0.5 mt-1\">\n          <FieldMessage.Content\n            id={`${fieldId}-error`}\n            role=\"alert\"\n            className=\"ml-0.5\"\n          >\n            {errorMessage}\n          </FieldMessage.Content>\n        </FieldMessage>\n      )}\n    </div>\n  );\n};\n\nexport default InputComboBox;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/components/ComboBoxDropdown.tsx",
    "content": "import React, { useEffect, forwardRef } from \"react\";\nimport { createPortal } from \"react-dom\";\nimport { cn } from \"@/lib/utils\";\nimport { OptionsList } from \"./OptionsList\";\nimport { ComboBoxOption } from \"../types\";\n\ninterface ComboBoxDropdownProps {\n  isOpen: boolean;\n  disabled: boolean;\n  floatingStyles: React.CSSProperties;\n  setFloatingRef: (node: HTMLDivElement | null) => void;\n  fieldId: string;\n  placeholder: string;\n  matchedOptions: ComboBoxOption[];\n  unmatchedOptions: ComboBoxOption[];\n  hasSearchTerm: boolean;\n  separatorLabel: string;\n  value: string;\n  highlightedIndex: number;\n  onSelect: (option: ComboBoxOption) => void;\n  onMouseEnter: (index: number) => void;\n  onMouseMove: () => void;\n  isExactMatch: (option: ComboBoxOption) => boolean;\n  /** Current input value for creating new option */\n  inputValue: string;\n  /** Whether to show create option when no exact match */\n  allowCreate: boolean;\n  /** Whether to show create option (pre-computed by parent) */\n  showCreateOption: boolean;\n  /** Show \"Add\" prefix in create option */\n  showAddPrefix: boolean;\n}\n\n/**\n * Renders the dropdown menu in a portal\n * Handles scroll-into-view for highlighted options\n */\nexport const ComboBoxDropdown = forwardRef<\n  HTMLDivElement,\n  ComboBoxDropdownProps\n>(\n  (\n    {\n      isOpen,\n      disabled,\n      floatingStyles,\n      setFloatingRef,\n      fieldId,\n      placeholder,\n      matchedOptions,\n      unmatchedOptions,\n      hasSearchTerm,\n      separatorLabel,\n      value,\n      highlightedIndex,\n      onSelect,\n      onMouseEnter,\n      onMouseMove,\n      isExactMatch,\n      inputValue,\n      allowCreate,\n      showCreateOption,\n      showAddPrefix,\n    },\n    ref\n  ) => {\n    // Scroll highlighted option into view\n    useEffect(() => {\n      if (\n        isOpen &&\n        ref &&\n        typeof ref !== \"function\" &&\n        ref.current &&\n        highlightedIndex >= 0\n      ) {\n        const highlightedElement = ref.current.querySelector(\n          `[data-index=\"${highlightedIndex}\"]`\n        );\n        if (highlightedElement) {\n          highlightedElement.scrollIntoView({\n            block: \"nearest\",\n            behavior: \"instant\",\n          });\n        }\n      }\n    }, [highlightedIndex, isOpen, ref]);\n\n    if (!isOpen || disabled || typeof document === \"undefined\") {\n      return null;\n    }\n\n    return createPortal(\n      <div\n        ref={(node) => {\n          // Handle both the forwarded ref and the floating ref\n          setFloatingRef(node);\n          if (typeof ref === \"function\") {\n            ref(node);\n          } else if (ref) {\n            ref.current = node;\n          }\n        }}\n        id={`${fieldId}-listbox`}\n        role=\"listbox\"\n        aria-label={placeholder}\n        className={cn(\n          \"z-[10000] bg-background-neutral-00 border border-border-02 rounded-12 shadow-02 max-h-60 overflow-y-auto overflow-x-hidden p-1 pointer-events-auto touch-auto\"\n        )}\n        style={{\n          ...floatingStyles,\n          // Ensure the dropdown can scroll independently\n          overscrollBehavior: \"contain\",\n        }}\n        onWheel={(e) => {\n          // Prevent event from bubbling to prevent any parent scroll blocking\n          e.stopPropagation();\n        }}\n        onTouchMove={(e) => {\n          // Prevent event from bubbling for touch devices\n          e.stopPropagation();\n        }}\n      >\n        <OptionsList\n          matchedOptions={matchedOptions}\n          unmatchedOptions={unmatchedOptions}\n          hasSearchTerm={hasSearchTerm}\n          separatorLabel={separatorLabel}\n          value={value}\n          highlightedIndex={highlightedIndex}\n          fieldId={fieldId}\n          onSelect={onSelect}\n          onMouseEnter={onMouseEnter}\n          onMouseMove={onMouseMove}\n          isExactMatch={isExactMatch}\n          inputValue={inputValue}\n          allowCreate={allowCreate}\n          showCreateOption={showCreateOption}\n          showAddPrefix={showAddPrefix}\n        />\n      </div>,\n      document.body\n    );\n  }\n);\n\nComboBoxDropdown.displayName = \"ComboBoxDropdown\";\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/components/OptionItem.tsx",
    "content": "import React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { ComboBoxOption } from \"../types\";\nimport { sanitizeOptionId } from \"../utils/aria\";\n\ninterface OptionItemProps {\n  option: ComboBoxOption;\n  index: number;\n  fieldId: string;\n  isHighlighted: boolean;\n  isSelected: boolean;\n  isExact: boolean;\n  onSelect: (option: ComboBoxOption) => void;\n  onMouseEnter: (index: number) => void;\n  onMouseMove: () => void;\n  /** Search term to highlight in the label */\n  searchTerm: string;\n}\n\n/**\n * Escapes special regex characters in a string\n */\nconst escapeRegex = (str: string) => str.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n\n/**\n * Highlights matching text within a string\n */\nconst highlightMatch = (text: string, searchTerm: string): React.ReactNode => {\n  if (!searchTerm.trim()) return text;\n\n  const regex = new RegExp(`(${escapeRegex(searchTerm)})`, \"gi\");\n  const parts = text.split(regex);\n\n  if (parts.length === 1) return text;\n\n  return parts.map((part, i) =>\n    part.toLowerCase() === searchTerm.toLowerCase() ? (\n      <span key={i} className=\"font-semibold\">\n        {part}\n      </span>\n    ) : (\n      part\n    )\n  );\n};\n\n/**\n * Renders a single option item in the dropdown\n * Memoized to prevent unnecessary re-renders\n */\nexport const OptionItem = React.memo(\n  ({\n    option,\n    index,\n    fieldId,\n    isHighlighted,\n    isSelected,\n    isExact,\n    onSelect,\n    onMouseEnter,\n    onMouseMove,\n    searchTerm,\n  }: OptionItemProps) => {\n    return (\n      <div\n        id={`${fieldId}-option-${sanitizeOptionId(option.value)}`}\n        data-index={index}\n        role=\"option\"\n        aria-selected={isSelected}\n        aria-disabled={option.disabled}\n        onClick={(e) => {\n          e.stopPropagation();\n          onSelect(option);\n        }}\n        onMouseDown={(e) => {\n          e.preventDefault();\n        }}\n        onMouseEnter={() => onMouseEnter(index)}\n        onMouseMove={onMouseMove}\n        className={cn(\n          \"px-3 py-2 cursor-pointer transition-colors\",\n          \"flex flex-col rounded-08\",\n          isExact && \"bg-action-link-01\",\n          !isExact && isHighlighted && \"bg-background-tint-02\",\n          !isExact && isSelected && \"bg-background-tint-02\",\n          option.disabled &&\n            \"opacity-50 cursor-not-allowed bg-background-neutral-02\",\n          !option.disabled && !isExact && \"hover:bg-background-tint-02\"\n        )}\n      >\n        <span\n          className={cn(\n            \"font-main-ui-action\",\n            isExact && \"text-action-link-05 font-medium\",\n            !isExact && \"text-text-04\",\n            !isExact && isSelected && \"font-medium\"\n          )}\n        >\n          {highlightMatch(option.label, searchTerm)}\n        </span>\n        {option.description && (\n          <span\n            className={cn(\n              \"mt-0.5 font-secondary-body\",\n              isExact ? \"text-action-link-04\" : \"text-text-03\"\n            )}\n          >\n            {option.description}\n          </span>\n        )}\n      </div>\n    );\n  }\n);\n\nOptionItem.displayName = \"OptionItem\";\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/components/OptionsList.tsx",
    "content": "import React from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { OptionItem } from \"./OptionItem\";\nimport { ComboBoxOption } from \"../types\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgPlus } from \"@opal/icons\";\nimport { sanitizeOptionId } from \"../utils/aria\";\n\ninterface OptionsListProps {\n  matchedOptions: ComboBoxOption[];\n  unmatchedOptions: ComboBoxOption[];\n  hasSearchTerm: boolean;\n  separatorLabel: string;\n  value: string;\n  highlightedIndex: number;\n  fieldId: string;\n  onSelect: (option: ComboBoxOption) => void;\n  onMouseEnter: (index: number) => void;\n  onMouseMove: () => void;\n  isExactMatch: (option: ComboBoxOption) => boolean;\n  /** Current input value for creating new option */\n  inputValue: string;\n  /** Whether to show create option when no exact match */\n  allowCreate: boolean;\n  /** Whether to show create option (pre-computed by parent) */\n  showCreateOption: boolean;\n  /** Show \"Add\" prefix in create option */\n  showAddPrefix: boolean;\n}\n\n/**\n * Renders the list of options with matched/unmatched sections\n * Includes separator between sections when filtering\n */\nexport const OptionsList: React.FC<OptionsListProps> = ({\n  matchedOptions,\n  unmatchedOptions,\n  hasSearchTerm,\n  separatorLabel,\n  value,\n  highlightedIndex,\n  fieldId,\n  onSelect,\n  onMouseEnter,\n  onMouseMove,\n  isExactMatch,\n  inputValue,\n  allowCreate,\n  showCreateOption,\n  showAddPrefix,\n}) => {\n  // Index offset for other options when create option is shown\n  const indexOffset = showCreateOption ? 1 : 0;\n\n  if (\n    matchedOptions.length === 0 &&\n    unmatchedOptions.length === 0 &&\n    !showCreateOption\n  ) {\n    return (\n      <div className=\"px-3 py-2 text-text-02 font-secondary-body\">\n        No options found\n      </div>\n    );\n  }\n\n  return (\n    <>\n      {/* Create New Option */}\n      {showCreateOption && (\n        <div\n          id={`${fieldId}-option-${sanitizeOptionId(inputValue)}`}\n          data-index={0}\n          role=\"option\"\n          aria-selected={false}\n          aria-label={`${showAddPrefix ? \"Add\" : \"Create\"} \"${inputValue}\"`}\n          onClick={(e) => {\n            e.stopPropagation();\n            onSelect({ value: inputValue, label: inputValue });\n          }}\n          onMouseDown={(e) => {\n            e.preventDefault();\n          }}\n          onMouseEnter={() => onMouseEnter(0)}\n          onMouseMove={onMouseMove}\n          className={cn(\n            \"cursor-pointer transition-colors\",\n            \"flex items-center justify-between rounded-08\",\n            highlightedIndex === 0 && \"bg-background-tint-02\",\n            \"hover:bg-background-tint-02\",\n            showAddPrefix ? \"px-1.5 py-1.5\" : \"px-3 py-2\"\n          )}\n        >\n          <span\n            className={cn(\n              \"font-main-ui-action truncate min-w-0\",\n              showAddPrefix ? \"px-1\" : \"\"\n            )}\n          >\n            {showAddPrefix ? (\n              <>\n                <span className=\"text-text-03\">Add</span>\n                <span className=\"text-text-04\">{` ${inputValue}`}</span>\n              </>\n            ) : (\n              <span className=\"text-text-04\">{inputValue}</span>\n            )}\n          </span>\n          <SvgPlus\n            className={cn(\n              \"w-4 h-4 flex-shrink-0\",\n              showAddPrefix ? \"text-text-04 mx-1\" : \"text-text-03 ml-2\"\n            )}\n          />\n        </div>\n      )}\n\n      {/* Separator - show when there are options to display */}\n      {separatorLabel &&\n        (matchedOptions.length > 0 ||\n          (!hasSearchTerm && unmatchedOptions.length > 0)) && (\n          <div className=\"px-3 py-1\">\n            <Text as=\"p\" text03 secondaryBody>\n              {separatorLabel}\n            </Text>\n          </div>\n        )}\n\n      {/* Matched/Filtered Options */}\n      {matchedOptions.map((option, idx) => {\n        const globalIndex = idx + indexOffset;\n        // Only highlight first exact match, not all matches\n        const isExact = idx === 0 && isExactMatch(option);\n        return (\n          <OptionItem\n            key={option.value}\n            option={option}\n            index={globalIndex}\n            fieldId={fieldId}\n            isHighlighted={globalIndex === highlightedIndex}\n            isSelected={value === option.value}\n            isExact={isExact}\n            onSelect={onSelect}\n            onMouseEnter={onMouseEnter}\n            onMouseMove={onMouseMove}\n            searchTerm={inputValue}\n          />\n        );\n      })}\n\n      {/* Unmatched Options - only show when NOT searching */}\n      {!hasSearchTerm &&\n        unmatchedOptions.map((option, idx) => {\n          const globalIndex = matchedOptions.length + idx + indexOffset;\n          const isExact = isExactMatch(option);\n          return (\n            <OptionItem\n              key={option.value}\n              option={option}\n              index={globalIndex}\n              fieldId={fieldId}\n              isHighlighted={globalIndex === highlightedIndex}\n              isSelected={value === option.value}\n              isExact={isExact}\n              onSelect={onSelect}\n              onMouseEnter={onMouseEnter}\n              onMouseMove={onMouseMove}\n              searchTerm={inputValue}\n            />\n          );\n        })}\n    </>\n  );\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/hooks.ts",
    "content": "import { useState, useEffect, useCallback, useMemo, useRef } from \"react\";\nimport { ComboBoxOption } from \"./types\";\n\n// =============================================================================\n// HOOK: useComboBoxState\n// =============================================================================\n\ninterface UseComboBoxStateProps {\n  value: string;\n  options: ComboBoxOption[];\n}\n\n/**\n * Manages the internal state of the ComboBox component\n * Handles state synchronization between external value prop and internal input state\n */\nexport function useComboBoxState({ value, options }: UseComboBoxStateProps) {\n  const [isOpen, setIsOpen] = useState(false);\n  const [inputValue, setInputValue] = useState(value);\n  const [highlightedIndex, setHighlightedIndex] = useState(-1);\n  const [isKeyboardNav, setIsKeyboardNav] = useState(false);\n  const prevIsOpenRef = useRef(false);\n\n  // Sync inputValue with the external value prop.\n  // When the dropdown is closed, always reflect the controlled value.\n  // When the dropdown is open, only sync if the *value prop itself* changes\n  // (e.g. parent programmatically updates it), not when inputValue changes\n  // (e.g. user clears the field on focus to browse all options).\n  useEffect(() => {\n    if (!isOpen) {\n      setInputValue(value);\n    }\n  }, [value, isOpen]);\n\n  useEffect(() => {\n    if (isOpen) {\n      const isExactOptionMatch = options.some((opt) => opt.value === value);\n      if (isExactOptionMatch) {\n        setInputValue(value);\n      }\n    }\n    // Only react to value prop changes while open, not inputValue changes\n  }, [value]);\n\n  // Reset highlight and keyboard nav when closing dropdown\n  useEffect(() => {\n    if (!isOpen) {\n      setHighlightedIndex(-1);\n      setIsKeyboardNav(false);\n    }\n  }, [isOpen]);\n\n  return {\n    isOpen,\n    setIsOpen,\n    inputValue,\n    setInputValue,\n    highlightedIndex,\n    setHighlightedIndex,\n    isKeyboardNav,\n    setIsKeyboardNav,\n  };\n}\n\n// =============================================================================\n// HOOK: useComboBoxKeyboard\n// =============================================================================\n\ninterface UseComboBoxKeyboardProps {\n  isOpen: boolean;\n  setIsOpen: (open: boolean) => void;\n  highlightedIndex: number;\n  setHighlightedIndex: (index: number | ((prev: number) => number)) => void;\n  setIsKeyboardNav: (isKeyboard: boolean) => void;\n  allVisibleOptions: ComboBoxOption[];\n  onSelect: (option: ComboBoxOption) => void;\n  hasOptions: boolean;\n}\n\n/**\n * Manages keyboard navigation for the ComboBox\n * Handles arrow keys, Enter, Escape, and Tab\n */\nexport function useComboBoxKeyboard({\n  isOpen,\n  setIsOpen,\n  highlightedIndex,\n  setHighlightedIndex,\n  setIsKeyboardNav,\n  allVisibleOptions,\n  onSelect,\n  hasOptions,\n}: UseComboBoxKeyboardProps) {\n  const handleKeyDown = useCallback(\n    (e: React.KeyboardEvent<HTMLInputElement>) => {\n      if (!hasOptions) return;\n\n      switch (e.key) {\n        case \"ArrowDown\":\n          e.preventDefault();\n          setIsKeyboardNav(true); // Mark as keyboard navigation\n          if (!isOpen) {\n            setIsOpen(true);\n            setHighlightedIndex(0);\n          } else {\n            setHighlightedIndex((prev) => {\n              // If no item highlighted yet (-1), start at 0\n              if (prev === -1) return 0;\n              // Otherwise move down if not at end\n              return prev < allVisibleOptions.length - 1 ? prev + 1 : prev;\n            });\n          }\n          break;\n        case \"ArrowUp\":\n          e.preventDefault();\n          setIsKeyboardNav(true); // Mark as keyboard navigation\n          if (isOpen) {\n            setHighlightedIndex((prev) => {\n              // If at first item or no highlight, don't go further up\n              if (prev <= 0) return -1;\n              return prev - 1;\n            });\n          }\n          break;\n        case \"Enter\":\n          // Always prevent default and stop propagation when dropdown is open\n          // to avoid bubbling to parent forms\n          if (isOpen) {\n            e.preventDefault();\n            e.stopPropagation();\n            if (highlightedIndex >= 0) {\n              const option = allVisibleOptions[highlightedIndex];\n              if (option) {\n                onSelect(option);\n              }\n            }\n          }\n          break;\n        case \"Escape\":\n          e.preventDefault();\n          setIsOpen(false);\n          setIsKeyboardNav(false);\n          break;\n        case \"Tab\":\n          setIsOpen(false);\n          setIsKeyboardNav(false);\n          break;\n      }\n    },\n    [\n      hasOptions,\n      isOpen,\n      allVisibleOptions,\n      highlightedIndex,\n      onSelect,\n      setIsOpen,\n      setHighlightedIndex,\n      setIsKeyboardNav,\n    ]\n  );\n\n  return { handleKeyDown };\n}\n\n// =============================================================================\n// HOOK: useOptionFiltering\n// =============================================================================\n\ninterface UseOptionFilteringProps {\n  options: ComboBoxOption[];\n  inputValue: string;\n}\n\ninterface FilterResult {\n  matchedOptions: ComboBoxOption[];\n  unmatchedOptions: ComboBoxOption[];\n  hasSearchTerm: boolean;\n}\n\n/**\n * Filters options based on input value\n * Splits options into matched and unmatched for better UX\n */\nexport function useOptionFiltering({\n  options,\n  inputValue,\n}: UseOptionFilteringProps): FilterResult {\n  return useMemo(() => {\n    if (!options.length) {\n      return { matchedOptions: [], unmatchedOptions: [], hasSearchTerm: false };\n    }\n\n    if (!inputValue || !inputValue.trim()) {\n      return {\n        matchedOptions: options,\n        unmatchedOptions: [],\n        hasSearchTerm: false,\n      };\n    }\n\n    const searchTerm = inputValue.toLowerCase().trim();\n    const matched: ComboBoxOption[] = [];\n    const unmatched: ComboBoxOption[] = [];\n\n    options.forEach((option) => {\n      const matchesLabel = option.label.toLowerCase().includes(searchTerm);\n      const matchesValue = option.value.toLowerCase().includes(searchTerm);\n\n      if (matchesLabel || matchesValue) {\n        matched.push(option);\n      } else {\n        unmatched.push(option);\n      }\n    });\n\n    return {\n      matchedOptions: matched,\n      unmatchedOptions: unmatched,\n      hasSearchTerm: true,\n    };\n  }, [options, inputValue]);\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/index.ts",
    "content": "export { default } from \"./InputComboBox\";\nexport type { InputComboBoxProps, ComboBoxOption } from \"./types\";\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/types.ts",
    "content": "export type ComboBoxOption = {\n  value: string;\n  label: string;\n  description?: string;\n  disabled?: boolean;\n};\n\nexport interface InputComboBoxProps\n  extends Omit<\n    React.InputHTMLAttributes<HTMLInputElement>,\n    \"onChange\" | \"value\"\n  > {\n  /** Current value */\n  value: string;\n  /** Change handler (React event style) - Called on every keystroke */\n  onChange?: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  /** Change handler (direct value style, for InputSelect compatibility) - Only called when option is selected from dropdown */\n  onValueChange?: (value: string) => void;\n  /** Array of options for select mode */\n  options?: ComboBoxOption[];\n  /**\n   * Strict mode:\n   * - true: Only option values allowed (if options exist)\n   * - false: User can type anything\n   */\n  strict?: boolean;\n  /** Disabled state */\n  disabled?: boolean;\n  /** Placeholder text */\n  placeholder: string;\n  /** External error state (for InputSelect compatibility) - overrides internal validation */\n  isError?: boolean;\n  /** Callback to handle validation errors - integrates with form libraries */\n  onValidationError?: (errorMessage: string | null) => void;\n  /** Optional name for the field (for accessibility) */\n  name?: string;\n  /** Left search icon */\n  leftSearchIcon?: boolean;\n  /** Right section for custom UI elements (e.g., refresh button) */\n  rightSection?: React.ReactNode;\n  /** Label for the separator between matched and unmatched options */\n  separatorLabel?: string;\n  /** Show \"Add\" prefix in create option (e.g., \"Add [value]\") */\n  showAddPrefix?: boolean;\n  /**\n   * When true, keep non-matching options visible under a separator while searching.\n   * Defaults to false so search results are strictly filtered.\n   */\n  showOtherOptions?: boolean;\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/utils/aria.ts",
    "content": "import { ComboBoxOption } from \"../types\";\n\n/**\n * Sanitizes a value for use in HTML element IDs.\n * Encodes characters that are invalid in IDs (spaces, special chars).\n */\nexport function sanitizeOptionId(value: string): string {\n  return `option-${encodeURIComponent(value)}`;\n}\n\ninterface BuildAriaAttributesProps {\n  hasOptions: boolean;\n  isOpen: boolean;\n  isValid: boolean;\n  highlightedIndex: number;\n  fieldId: string;\n  allVisibleOptions: ComboBoxOption[];\n  placeholder: string;\n}\n\n/**\n * Builds ARIA attributes for accessibility\n * Ensures proper screen reader support\n */\nexport function buildAriaAttributes({\n  hasOptions,\n  isOpen,\n  isValid,\n  highlightedIndex,\n  fieldId,\n  allVisibleOptions,\n  placeholder,\n}: BuildAriaAttributesProps) {\n  const activeOption =\n    hasOptions && isOpen && highlightedIndex >= 0\n      ? allVisibleOptions[highlightedIndex]\n      : undefined;\n\n  return {\n    \"aria-label\": placeholder,\n    \"aria-invalid\": !isValid,\n    \"aria-describedby\": !isValid ? `${fieldId}-error` : undefined,\n    \"aria-expanded\": hasOptions ? isOpen : undefined,\n    \"aria-haspopup\": hasOptions ? (\"listbox\" as const) : undefined,\n    \"aria-controls\": hasOptions ? `${fieldId}-listbox` : undefined,\n    \"aria-activedescendant\": activeOption\n      ? `${fieldId}-option-${sanitizeOptionId(activeOption.value)}`\n      : undefined,\n    \"aria-autocomplete\": hasOptions ? (\"list\" as const) : undefined,\n    role: hasOptions ? (\"combobox\" as const) : undefined,\n  };\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputComboBox/utils/validation.ts",
    "content": "import { useMemo, useEffect } from \"react\";\nimport { ComboBoxOption } from \"../types\";\n\ninterface UseValidationProps {\n  value: string;\n  options: ComboBoxOption[];\n  strict: boolean;\n  externalIsError?: boolean;\n  onValidationError?: (errorMessage: string | null) => void;\n}\n\ninterface ValidationResult {\n  isValid: boolean;\n  errorMessage: string | null;\n}\n\n/**\n * Handles validation logic for the ComboBox\n * Supports both external error state and internal strict mode validation\n * external error state has precedence over internal validation.When we have external error, internal error is\n * not displayed we need to display external error separately\n */\nexport function useValidation({\n  value,\n  options,\n  strict,\n  externalIsError,\n  onValidationError,\n}: UseValidationProps): ValidationResult {\n  const hasOptions = options.length > 0;\n\n  // Validation logic - use external error if provided, otherwise use internal validation\n  const { isValid, errorMessage } = useMemo(() => {\n    // If external error is provided, use it\n    if (externalIsError !== undefined) {\n      return { isValid: !externalIsError, errorMessage: null };\n    }\n\n    // Otherwise use internal validation\n    if (!strict || !hasOptions || !value) {\n      return { isValid: true, errorMessage: null };\n    }\n\n    // In strict mode with options, value must be one of the option values\n    const isValidOption = options.some((opt) => opt.value === value);\n\n    if (!isValidOption) {\n      return {\n        isValid: false,\n        errorMessage: \"Please select a valid option from the list\",\n      };\n    }\n\n    return { isValid: true, errorMessage: null };\n  }, [externalIsError, strict, hasOptions, value, options]);\n\n  // Notify parent of error state\n  useEffect(() => {\n    onValidationError?.(errorMessage);\n  }, [errorMessage, onValidationError]);\n\n  return { isValid, errorMessage };\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputDatePicker.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputDatePicker from \"./InputDatePicker\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputDatePicker> = {\n  title: \"refresh-components/inputs/InputDatePicker\",\n  component: InputDatePicker,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputDatePicker>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [date, setDate] = React.useState<Date | null>(null);\n    return <InputDatePicker selectedDate={date} setSelectedDate={setDate} />;\n  },\n};\n\nexport const WithSelectedDate: Story = {\n  render: function SelectedDateStory() {\n    const [date, setDate] = React.useState<Date | null>(new Date(2025, 0, 15));\n    return <InputDatePicker selectedDate={date} setSelectedDate={setDate} />;\n  },\n};\n\nexport const CustomStartYear: Story = {\n  render: function CustomStartYearStory() {\n    const [date, setDate] = React.useState<Date | null>(null);\n    return (\n      <InputDatePicker\n        selectedDate={date}\n        setSelectedDate={setDate}\n        startYear={2020}\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <InputDatePicker\n      selectedDate={new Date()}\n      setSelectedDate={() => {}}\n      disabled\n    />\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputDatePicker.tsx",
    "content": "\"use client\";\n\nimport { Button } from \"@opal/components\";\nimport { isAfterDate, normalizeDate } from \"@/lib/dateUtils\";\nimport Calendar from \"@/refresh-components/Calendar\";\nimport Popover from \"@/refresh-components/Popover\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { useMemo, useState } from \"react\";\nimport { SvgCalendar } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nexport interface InputDatePickerProps {\n  name?: string;\n  selectedDate: Date | null;\n  setSelectedDate: (date: Date | null) => void;\n  startYear?: number;\n  disabled?: boolean;\n  maxDate?: Date;\n}\n\nfunction extractYear(date: Date | null): number {\n  return (date ?? new Date()).getFullYear();\n}\n\nfunction clampToMaxDate(date: Date, maxDate?: Date): Date {\n  if (!maxDate || !isAfterDate(date, maxDate)) {\n    return date;\n  }\n\n  return normalizeDate(maxDate);\n}\n\nexport default function InputDatePicker({\n  name,\n  selectedDate,\n  setSelectedDate,\n  startYear = 1970,\n  disabled = false,\n  maxDate,\n}: InputDatePickerProps) {\n  const validStartYear = Math.max(startYear, 1970);\n  const normalizedMaxDate = useMemo(\n    () => (maxDate ? normalizeDate(maxDate) : undefined),\n    [maxDate]\n  );\n  const currYear = Math.max(\n    validStartYear,\n    extractYear(normalizedMaxDate ?? new Date())\n  );\n  const years = useMemo(\n    () =>\n      Array(currYear - validStartYear + 1)\n        .fill(currYear)\n        .map((year, index) => year - index),\n    [currYear, validStartYear]\n  );\n  const [open, setOpen] = useState(false);\n  const [displayedMonth, setDisplayedMonth] = useState<Date>(\n    clampToMaxDate(\n      selectedDate ?? normalizedMaxDate ?? new Date(),\n      normalizedMaxDate\n    )\n  );\n\n  function handleDateSelection(date: Date) {\n    setSelectedDate(date);\n    setDisplayedMonth(date);\n    setOpen(false);\n  }\n\n  return (\n    <Popover open={open} onOpenChange={setOpen}>\n      <Popover.Trigger asChild id={name} name={name}>\n        <Button disabled={disabled} prominence=\"secondary\" icon={SvgCalendar}>\n          {selectedDate ? selectedDate.toLocaleDateString() : \"Select Date\"}\n        </Button>\n      </Popover.Trigger>\n      <Popover.Content>\n        <Section padding={0.25}>\n          <Section flexDirection=\"row\" gap={0.5}>\n            <InputSelect\n              value={`${extractYear(displayedMonth)}`}\n              onValueChange={(value) => {\n                const year = parseInt(value);\n                setDisplayedMonth(new Date(year, 0));\n              }}\n            >\n              <InputSelect.Trigger />\n              <InputSelect.Content>\n                {years.map((year) => (\n                  <InputSelect.Item key={year} value={`${year}`}>\n                    {`${year}`}\n                  </InputSelect.Item>\n                ))}\n              </InputSelect.Content>\n            </InputSelect>\n            <Button\n              onClick={() => {\n                const now = normalizedMaxDate ?? new Date();\n                setSelectedDate(now);\n                setDisplayedMonth(now);\n                setOpen(false);\n              }}\n            >\n              Today\n            </Button>\n          </Section>\n          <Calendar\n            mode=\"single\"\n            selected={selectedDate ?? undefined}\n            onSelect={(date) => {\n              if (date) {\n                handleDateSelection(date);\n              }\n            }}\n            month={displayedMonth}\n            onMonthChange={setDisplayedMonth}\n            disabled={\n              normalizedMaxDate ? [{ after: normalizedMaxDate }] : undefined\n            }\n            startMonth={new Date(validStartYear, 0)}\n            endMonth={normalizedMaxDate ?? new Date()}\n            showOutsideDays={false}\n          />\n        </Section>\n      </Popover.Content>\n    </Popover>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputFile.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputFile from \"./InputFile\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputFile> = {\n  title: \"refresh-components/inputs/InputFile\",\n  component: InputFile,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputFile>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [, setValue] = React.useState(\"\");\n    return (\n      <InputFile placeholder=\"Paste or attach a file...\" setValue={setValue} />\n    );\n  },\n};\n\nexport const WithAcceptFilter: Story = {\n  render: function AcceptFilterStory() {\n    const [, setValue] = React.useState(\"\");\n    return (\n      <InputFile\n        placeholder=\"JSON files only...\"\n        setValue={setValue}\n        accept=\"application/json,.json\"\n      />\n    );\n  },\n};\n\nexport const WithMaxSize: Story = {\n  render: function MaxSizeStory() {\n    const [, setValue] = React.useState(\"\");\n    return (\n      <InputFile\n        placeholder=\"Max 100KB...\"\n        setValue={setValue}\n        maxSizeKb={100}\n        onFileSizeExceeded={({ file, maxSizeKb }) =>\n          alert(`${file.name} exceeds ${maxSizeKb}KB limit`)\n        }\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <InputFile placeholder=\"Cannot upload\" setValue={() => {}} disabled />\n  ),\n};\n\nexport const ErrorState: Story = {\n  render: function ErrorStory() {\n    const [, setValue] = React.useState(\"\");\n    return (\n      <InputFile placeholder=\"Required file...\" setValue={setValue} error />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputFile.tsx",
    "content": "\"use client\";\n\nimport React, { useRef, useState } from \"react\";\nimport InputTypeIn, {\n  InputTypeInProps,\n} from \"@/refresh-components/inputs/InputTypeIn\";\nimport { Button } from \"@opal/components\";\nimport { noProp } from \"@/lib/utils\";\nimport { SvgPaperclip } from \"@opal/icons\";\n\nexport interface InputFileProps\n  extends Omit<\n    InputTypeInProps,\n    \"type\" | \"rightSection\" | \"value\" | \"onChange\" | \"readOnly\" | \"onClear\"\n  > {\n  /**\n   * Whether the input is disabled.\n   */\n  disabled?: boolean;\n  /**\n   * Whether the input has an error.\n   */\n  error?: boolean;\n  // Receives the extracted file content (text) or pasted value\n  setValue: (value: string) => void;\n  // Called when a value is committed via file selection or paste (not on each keystroke)\n  onValueSet?: (value: string, source: \"file\" | \"paste\") => void;\n  // HTML accept attribute e.g. \"application/json\" or \".txt,.md\"\n  accept?: string;\n  // Maximum allowed file size in kilobytes. If exceeded, file is rejected.\n  maxSizeKb?: number;\n  // Optional callback when the selected file exceeds max size\n  onFileSizeExceeded?: (args: { file: File; maxSizeKb: number }) => void;\n}\n\nexport default function InputFile({\n  setValue,\n  onValueSet,\n  accept,\n  maxSizeKb,\n  onFileSizeExceeded,\n  disabled,\n  error,\n  variant,\n  placeholder,\n  className,\n  ...rest\n}: InputFileProps) {\n  const [displayValue, setDisplayValue] = useState<string>(\"\");\n  const [selectedFileName, setSelectedFileName] = useState<string | null>(null);\n  const [isFileMode, setIsFileMode] = useState<boolean>(false);\n  const fileInputRef = useRef<HTMLInputElement>(null);\n\n  // Derive disabled/readOnly state from either the boolean props or the variant\n  const isDisabled = disabled || variant === \"disabled\";\n  const isReadOnly = variant === \"readOnly\";\n  const isNonEditable = isDisabled || isReadOnly;\n\n  function openFilePicker() {\n    if (isNonEditable) return;\n    fileInputRef.current?.click();\n  }\n\n  function handleFileChange(e: React.ChangeEvent<HTMLInputElement>) {\n    const file = e.target.files?.[0];\n    if (!file) return;\n\n    // Enforce file size limit if provided\n    if (typeof maxSizeKb === \"number\" && maxSizeKb >= 0) {\n      const maxBytes = maxSizeKb * 1024;\n      if (file.size > maxBytes) {\n        onFileSizeExceeded?.({ file, maxSizeKb });\n        // Reset file input to allow re-selecting the same file\n        e.target.value = \"\";\n        return;\n      }\n    }\n\n    const reader = new FileReader();\n    reader.onload = () => {\n      const textContent =\n        typeof reader.result === \"string\" ? reader.result : \"\";\n      setValue(textContent);\n      setSelectedFileName(file.name);\n      setDisplayValue(file.name);\n      setIsFileMode(true);\n      onValueSet?.(textContent, \"file\");\n    };\n    reader.onerror = () => {\n      // Reset state on error\n      setSelectedFileName(null);\n      setDisplayValue(\"\");\n      setIsFileMode(false);\n      setValue(\"\");\n    };\n    reader.readAsText(file);\n    // clear the input value to allow re-selecting the same file if needed\n    e.target.value = \"\";\n  }\n\n  function handleClear() {\n    setSelectedFileName(null);\n    setDisplayValue(\"\");\n    setIsFileMode(false);\n    setValue(\"\");\n  }\n\n  function handleChangeWhenTyping(e: React.ChangeEvent<HTMLInputElement>) {\n    if (isFileMode) return; // ignore typing when file-mode is active\n    const next = e.target.value;\n    setDisplayValue(next);\n    setValue(next);\n  }\n\n  function handlePaste(e: React.ClipboardEvent<HTMLInputElement>) {\n    // Don't allow paste when non-editable\n    if (isNonEditable) return;\n    // Switch to editable mode and use pasted text as the value\n    const pastedText = e.clipboardData.getData(\"text\");\n    if (!pastedText) return;\n    e.preventDefault();\n    setIsFileMode(false);\n    setSelectedFileName(null);\n    setDisplayValue(pastedText);\n    setValue(pastedText);\n    onValueSet?.(pastedText, \"paste\");\n  }\n\n  const rightSection = (\n    <Button\n      disabled={isNonEditable}\n      icon={SvgPaperclip}\n      onClick={noProp(openFilePicker)}\n      type=\"button\"\n      prominence=\"tertiary\"\n      size=\"sm\"\n      aria-label=\"Attach file\"\n    />\n  );\n\n  return (\n    <>\n      <input\n        ref={fileInputRef}\n        type=\"file\"\n        accept={accept}\n        onChange={handleFileChange}\n        aria-hidden\n        className=\"hidden\"\n        tabIndex={-1}\n        disabled={isNonEditable}\n      />\n      <InputTypeIn\n        {...rest}\n        className={className}\n        placeholder={placeholder}\n        variant={isDisabled ? \"disabled\" : error ? \"error\" : variant}\n        value={displayValue}\n        onChange={handleChangeWhenTyping}\n        onPaste={handlePaste}\n        onClear={handleClear}\n        readOnly={isFileMode || isReadOnly}\n        rightSection={rightSection}\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputImage.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputImage from \"./InputImage\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputImage> = {\n  title: \"refresh-components/inputs/InputImage\",\n  component: InputImage,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div\n          style={{\n            width: 320,\n            display: \"flex\",\n            justifyContent: \"center\",\n            padding: 24,\n          }}\n        >\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputImage>;\n\nexport const Empty: Story = {\n  args: {\n    onDrop: () => {},\n  },\n};\n\nexport const WithImage: Story = {\n  args: {\n    src: \"https://picsum.photos/200\",\n    alt: \"Sample image\",\n    onEdit: () => {},\n    onRemove: () => {},\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    disabled: true,\n    onDrop: () => {},\n  },\n};\n\nexport const DisabledWithImage: Story = {\n  args: {\n    src: \"https://picsum.photos/200\",\n    alt: \"Cannot edit\",\n    disabled: true,\n  },\n};\n\nexport const CustomSize: Story = {\n  args: {\n    size: 80,\n    onDrop: () => {},\n  },\n};\n\nexport const LargeSize: Story = {\n  args: {\n    src: \"https://picsum.photos/300\",\n    alt: \"Large avatar\",\n    size: 160,\n    onEdit: () => {},\n    onRemove: () => {},\n  },\n};\n\nexport const NoEditOverlay: Story = {\n  args: {\n    src: \"https://picsum.photos/200\",\n    alt: \"No overlay\",\n    showEditOverlay: false,\n    onEdit: () => {},\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputImage.tsx",
    "content": "\"use client\";\n\nimport { cn, noProp } from \"@/lib/utils\";\nimport { SvgPlus, SvgX } from \"@opal/icons\";\nimport { Hoverable } from \"@opal/core\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { useImageDropzone } from \"@/hooks/useImageDropzone\";\n\ntype ImageState = \"empty\" | \"withImage\" | \"dragActive\";\ntype AbledState = \"enabled\" | \"disabled\";\n\nconst inputImageClasses = {\n  container: {\n    empty: {\n      enabled: [\n        \"bg-background-neutral-00\",\n        \"hover:bg-background-tint-02\",\n        \"active:bg-background-tint-00\",\n        \"focus-visible:bg-background-tint-01\",\n        \"focus-visible:hover:bg-background-tint-02\",\n        \"border-dashed\",\n        \"hover:border-solid\",\n        \"active:border-solid\",\n        \"border-border-01\",\n        \"hover:border-border-03\",\n        \"active:border-border-05\",\n        \"focus-visible:border-border-05\",\n      ],\n      disabled: [\n        \"bg-background-neutral-00\",\n        \"border-dashed\",\n        \"border-border-01\",\n        \"opacity-50\",\n        \"cursor-not-allowed\",\n      ],\n    },\n    withImage: {\n      enabled: [\n        \"bg-background-neutral-00\",\n        \"hover:bg-background-tint-02\",\n        \"active:bg-background-tint-00\",\n        \"focus-visible:bg-background-tint-01\",\n        \"focus-visible:hover:bg-background-tint-02\",\n        \"border-solid\",\n        \"border-border-01\",\n        \"hover:border-border-03\",\n        \"active:border-border-05\",\n        \"focus-visible:border-border-05\",\n      ],\n      disabled: [\n        \"bg-background-neutral-00\",\n        \"border-solid\",\n        \"border-border-01\",\n        \"opacity-50\",\n        \"cursor-not-allowed\",\n      ],\n    },\n    dragActive: {\n      enabled: [\n        \"bg-background-neutral-00\",\n        \"border-solid\",\n        \"border-2\",\n        \"border-action-link-05\",\n      ],\n      disabled: [\n        \"bg-background-neutral-00\",\n        \"border-solid\",\n        \"border-2\",\n        \"border-action-link-05\",\n        \"opacity-50\",\n        \"cursor-not-allowed\",\n      ],\n    },\n  },\n  placeholder: {\n    empty: {\n      enabled: [\n        \"stroke-text-02\",\n        \"group-hover:stroke-text-03\",\n        \"group-active:stroke-text-04\",\n        \"group-focus-visible:stroke-text-02\",\n        \"group-focus-visible:group-hover:stroke-text-03\",\n      ],\n      disabled: [\"stroke-text-01\"],\n    },\n    withImage: {\n      enabled: [],\n      disabled: [],\n    },\n    dragActive: {\n      enabled: [\"stroke-action-link-05\"],\n      disabled: [\"stroke-action-link-05\"],\n    },\n  },\n} as const;\n\nexport interface InputImageProps {\n  // State control\n  disabled?: boolean;\n\n  // Image source\n  src?: string;\n  alt?: string;\n\n  // Callbacks\n  onEdit?: () => void;\n  onRemove?: () => void;\n  /** Callback when image is dropped onto the component */\n  onDrop?: (file: File) => void;\n  /** Callback when file is rejected */\n  onDropRejected?: (reason: string) => void;\n\n  /** Whether to show the edit overlay on hover (default: true) */\n  showEditOverlay?: boolean;\n\n  // Size control\n  size?: number;\n\n  className?: string;\n}\n\nexport default function InputImage({\n  disabled = false,\n  src,\n  alt = \"Image\",\n  onEdit,\n  onRemove,\n  onDrop,\n  onDropRejected,\n  showEditOverlay = true,\n  size = 120,\n  className,\n}: InputImageProps) {\n  const isInteractive = !disabled && (onEdit || onDrop);\n  const hasImage = !!src;\n\n  const { isDragActive, getRootProps, getInputProps, openFilePicker } =\n    useImageDropzone({\n      onImageAccepted: (file) => {\n        onDrop?.(file);\n      },\n      onImageRejected: (rejections) => {\n        const firstRejection = rejections[0];\n        const reason = firstRejection?.errors[0]?.message || \"File rejected\";\n        onDropRejected?.(reason);\n      },\n      disabled: disabled || !onDrop,\n    });\n\n  const handleClick = () => {\n    if (disabled) return;\n    if (onEdit) {\n      onEdit();\n    } else if (onDrop) {\n      openFilePicker();\n    }\n  };\n\n  // Derive states once\n  const imageState: ImageState = isDragActive\n    ? \"dragActive\"\n    : hasImage\n      ? \"withImage\"\n      : \"empty\";\n  const abled: AbledState = disabled ? \"disabled\" : \"enabled\";\n\n  // Single lookup pattern for all classes\n  const containerClass = inputImageClasses.container[imageState][abled];\n  const placeholderClass = inputImageClasses.placeholder[imageState][abled];\n\n  const dropzoneProps = onDrop ? getRootProps() : {};\n\n  return (\n    <Hoverable.Root group=\"inputImage\" widthVariant=\"fit\">\n      <div\n        className={cn(\"relative\", className)}\n        style={{ width: size, height: size }}\n        {...dropzoneProps}\n      >\n        {/* Hidden input for file selection */}\n        {onDrop && <input {...getInputProps()} />}\n\n        {/* Main container */}\n        <button\n          type=\"button\"\n          onClick={handleClick}\n          disabled={disabled}\n          className={cn(\n            \"group relative w-full h-full rounded-full overflow-hidden\",\n            \"border flex items-center justify-center\",\n            \"transition-all duration-150\",\n            containerClass\n          )}\n          aria-label={\n            isInteractive\n              ? hasImage\n                ? \"Edit image\"\n                : \"Upload image\"\n              : undefined\n          }\n        >\n          {/* Content */}\n          {hasImage ? (\n            <img\n              src={src}\n              alt={alt}\n              className=\"absolute inset-0 w-full h-full object-cover pointer-events-none\"\n            />\n          ) : (\n            <SvgPlus\n              className={cn(\"w-6 h-6\", placeholderClass, \"pointer-events-none\")}\n            />\n          )}\n\n          {/* Drag overlay indicator */}\n          {isDragActive && (\n            <div className=\"absolute inset-0 bg-action-link-05/10 flex items-center justify-center rounded-full pointer-events-none\">\n              <SvgPlus className=\"w-8 h-8 stroke-action-link-05\" />\n            </div>\n          )}\n\n          {/* Edit overlay - shows on hover/focus when image is uploaded */}\n          {showEditOverlay && isInteractive && hasImage && !isDragActive && (\n            <div className=\"absolute bottom-0 left-0 right-0 pointer-events-none\">\n              <Hoverable.Item group=\"inputImage\" variant=\"opacity-on-hover\">\n                <div\n                  className={cn(\n                    \"flex items-center justify-center\",\n                    \"pb-2.5 pt-1.5\",\n                    \"backdrop-blur-sm bg-mask-01\",\n                    \"pointer-events-none\"\n                  )}\n                >\n                  <div className=\"pointer-events-auto\">\n                    <SimpleTooltip tooltip=\"Edit\" side=\"top\">\n                      <div\n                        className={cn(\n                          \"flex items-center justify-center\",\n                          \"px-1 py-0.5 rounded-08\"\n                        )}\n                      >\n                        <Text\n                          className=\"text-text-03 font-secondary-action\"\n                          style={{ fontSize: \"12px\", lineHeight: \"16px\" }}\n                        >\n                          Edit\n                        </Text>\n                      </div>\n                    </SimpleTooltip>\n                  </div>\n                </div>\n              </Hoverable.Item>\n            </div>\n          )}\n        </button>\n\n        {/* Remove button - top left corner (only when image is uploaded) */}\n        {isInteractive && hasImage && onRemove && (\n          <div className=\"absolute top-1 left-1\">\n            <Hoverable.Item group=\"inputImage\" variant=\"opacity-on-hover\">\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <IconButton\n                icon={SvgX}\n                onClick={noProp(onRemove)}\n                type=\"button\"\n                primary\n                className=\"!w-5 !h-5 !p-0.5 !rounded-04\"\n                aria-label=\"Remove image\"\n              />\n            </Hoverable.Item>\n          </div>\n        )}\n      </div>\n    </Hoverable.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputKeyValue.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport KeyValueInput from \"./InputKeyValue\";\nimport type { KeyValue } from \"./InputKeyValue\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof KeyValueInput> = {\n  title: \"refresh-components/inputs/InputKeyValue\",\n  component: KeyValueInput,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof KeyValueInput>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([\n      { key: \"\", value: \"\" },\n    ]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Key\"\n        valueTitle=\"Value\"\n        items={items}\n        onChange={setItems}\n      />\n    );\n  },\n};\n\nexport const WithValues: Story = {\n  render: function WithValuesStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([\n      { key: \"API_KEY\", value: \"sk-abc123\" },\n      { key: \"BASE_URL\", value: \"https://api.example.com\" },\n    ]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Variable Name\"\n        valueTitle=\"Value\"\n        items={items}\n        onChange={setItems}\n      />\n    );\n  },\n};\n\nexport const FixedLineMode: Story = {\n  render: function FixedLineStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([\n      { key: \"Content-Type\", value: \"application/json\" },\n    ]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Header\"\n        valueTitle=\"Value\"\n        items={items}\n        onChange={setItems}\n        mode=\"fixed-line\"\n      />\n    );\n  },\n};\n\nexport const KeyWideLayout: Story = {\n  render: function KeyWideStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([\n      { key: \"Authorization\", value: \"Bearer token\" },\n    ]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Header\"\n        valueTitle=\"Value\"\n        items={items}\n        onChange={setItems}\n        layout=\"key-wide\"\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <KeyValueInput\n      keyTitle=\"Key\"\n      valueTitle=\"Value\"\n      items={[{ key: \"LOCKED\", value: \"cannot-edit\" }]}\n      onChange={() => {}}\n      disabled\n    />\n  ),\n};\n\nexport const EmptyLineMode: Story = {\n  render: function EmptyStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Key\"\n        valueTitle=\"Value\"\n        items={items}\n        onChange={setItems}\n        mode=\"line\"\n      />\n    );\n  },\n};\n\nexport const CustomAddLabel: Story = {\n  render: function CustomLabelStory() {\n    const [items, setItems] = React.useState<KeyValue[]>([\n      { key: \"\", value: \"\" },\n    ]);\n    return (\n      <KeyValueInput\n        keyTitle=\"Name\"\n        valueTitle=\"Endpoint\"\n        items={items}\n        onChange={setItems}\n        addButtonLabel=\"Add Endpoint\"\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputKeyValue.tsx",
    "content": "\"use client\";\n\n/**\n * KeyValueInput - A comprehensive key-value pair input component\n *\n * Features:\n * - Two modes: 'line' (can remove all) and 'fixed-line' (minimum 1 item)\n * - Built-in validation for duplicate keys and empty keys\n * - Full accessibility with ARIA support\n * - Integrates with Formik, FormField, and custom form libraries\n * - Inline error display with danger-colored borders\n *\n * @example Basic Usage\n * ```tsx\n * const [items, setItems] = useState([{ key: \"API_KEY\", value: \"value\" }]);\n *\n * <KeyValueInput\n *   keyTitle=\"Variable Name\"\n *   valueTitle=\"Value\"\n *   items={items}\n *   onChange={setItems}\n *   mode=\"line\"\n * />\n * ```\n *\n * @example With Formik Integration\n * ```tsx\n * <Formik initialValues={{ envVars: [] }}>\n *   {({ values, setFieldValue, setFieldError }) => (\n *     <FormField state={errors.envVars ? \"error\" : \"idle\"}>\n *       <FormField.Label>Environment Variables</FormField.Label>\n *       <FormField.Control asChild>\n *         <KeyValueInput\n *           keyTitle=\"Variable Name\"\n *           valueTitle=\"Value\"\n *           items={values.envVars}\n *           onChange={(items) => setFieldValue(\"envVars\", items)}\n *           onValidationError={(error) => {\n *             if (error) {\n *               setFieldError(\"envVars\", error);\n *             } else {\n *               setFieldError(\"envVars\", undefined);\n *             }\n *           }}\n *         />\n *       </FormField.Control>\n *     </FormField>\n *   )}\n * </Formik>\n * ```\n *\n * @example With Local Error State\n * ```tsx\n * const [error, setError] = useState<string | null>(null);\n *\n * <FormField state={error ? \"error\" : \"idle\"}>\n *   <FormField.Label>Headers</FormField.Label>\n *   <FormField.Control asChild>\n *     <KeyValueInput\n *       keyTitle=\"Header\"\n *       valueTitle=\"Value\"\n *       items={headers}\n *       onChange={setHeaders}\n *       onValidationError={setError}\n *     />\n *   </FormField.Control>\n * </FormField>\n * ```\n */\n\nimport React, {\n  useCallback,\n  useContext,\n  useEffect,\n  useMemo,\n  useId,\n  useRef,\n} from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport InputTypeIn from \"./InputTypeIn\";\nimport { Button, EmptyMessageCard } from \"@opal/components\";\nimport type { WithoutStyles } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { FieldContext } from \"../form/FieldContext\";\nimport { FieldMessage } from \"../messages/FieldMessage\";\nimport { SvgMinusCircle, SvgPlusCircle } from \"@opal/icons\";\n\nexport type KeyValue = { key: string; value: string };\n\ntype KeyValueError = {\n  key?: string;\n  value?: string;\n};\n\n/*\n * CSS Grid is used instead of flexbox so that the key column, value column,\n * and remove button stay perfectly aligned across every row — including the\n * header titles. With flex + width restrictions each row is laid out\n * independently, so columns can drift when content (e.g. validation errors)\n * causes one cell to grow. Grid's shared column tracks prevent that.\n */\nconst GRID_COLS = {\n  equal: \"grid-cols-[1fr_1fr_2.25rem]\",\n  \"key-wide\": \"grid-cols-[3fr_2fr_2.25rem]\",\n} as const;\n\ninterface KeyValueInputItemProps {\n  item: KeyValue;\n  onChange: (next: KeyValue) => void;\n  disabled?: boolean;\n  onRemove: () => void;\n  keyPlaceholder?: string;\n  valuePlaceholder?: string;\n  error?: KeyValueError;\n  canRemove: boolean;\n  index: number;\n  fieldId: string;\n}\n\nfunction KeyValueInputItem({\n  item,\n  onChange,\n  disabled,\n  onRemove,\n  keyPlaceholder,\n  valuePlaceholder,\n  error,\n  canRemove,\n  index,\n  fieldId,\n}: KeyValueInputItemProps) {\n  return (\n    <>\n      <div className=\"flex flex-col gap-y-0.5\">\n        <InputTypeIn\n          placeholder={keyPlaceholder || \"Key\"}\n          value={item.key}\n          onChange={(e) => onChange({ ...item, key: e.target.value })}\n          aria-label={`${keyPlaceholder || \"Key\"} ${index + 1}`}\n          aria-invalid={!!error?.key}\n          aria-describedby={\n            error?.key ? `${fieldId}-key-error-${index}` : undefined\n          }\n          variant={disabled ? \"disabled\" : undefined}\n          showClearButton={false}\n        />\n        {error?.key && (\n          <FieldMessage variant=\"error\" className=\"ml-0.5\">\n            <FieldMessage.Content\n              id={`${fieldId}-key-error-${index}`}\n              role=\"alert\"\n              className=\"ml-0.5\"\n            >\n              {error.key}\n            </FieldMessage.Content>\n          </FieldMessage>\n        )}\n      </div>\n      <div className=\"flex flex-col gap-y-0.5\">\n        <InputTypeIn\n          placeholder={valuePlaceholder || \"Value\"}\n          value={item.value}\n          onChange={(e) => onChange({ ...item, value: e.target.value })}\n          aria-label={`${valuePlaceholder || \"Value\"} ${index + 1}`}\n          aria-invalid={!!error?.value}\n          aria-describedby={\n            error?.value ? `${fieldId}-value-error-${index}` : undefined\n          }\n          variant={disabled ? \"disabled\" : undefined}\n          showClearButton={false}\n        />\n        {error?.value && (\n          <FieldMessage variant=\"error\" className=\"ml-0.5\">\n            <FieldMessage.Content\n              id={`${fieldId}-value-error-${index}`}\n              role=\"alert\"\n              className=\"ml-0.5\"\n            >\n              {error.value}\n            </FieldMessage.Content>\n          </FieldMessage>\n        )}\n      </div>\n      <Button\n        disabled={disabled || !canRemove}\n        prominence=\"tertiary\"\n        icon={SvgMinusCircle}\n        onClick={onRemove}\n        aria-label={`Remove ${keyPlaceholder || \"key-value\"} pair ${index + 1}`}\n      />\n    </>\n  );\n}\n\nexport interface KeyValueInputProps\n  extends WithoutStyles<\n    Omit<React.HTMLAttributes<HTMLDivElement>, \"onChange\">\n  > {\n  /** Title for the key column */\n  keyTitle?: string;\n  /** Title for the value column */\n  valueTitle?: string;\n  /** Array of key-value pairs */\n  items: KeyValue[];\n  /** Callback when items change */\n  onChange: (nextItems: KeyValue[]) => void;\n  /** Custom add handler */\n  onAdd?: () => void;\n  /** Custom remove handler */\n  onRemove?: (index: number) => void;\n  /** Disabled state */\n  disabled?: boolean;\n  /** Mode: 'line' allows removing all items, 'fixed-line' requires at least one item */\n  mode?: \"line\" | \"fixed-line\";\n  /** Layout: 'equal' - both inputs same width, 'key-wide' - key input is wider (60/40 split) */\n  layout?: \"equal\" | \"key-wide\";\n  /** Callback when validation state changes */\n  onValidationChange?: (isValid: boolean, errors: KeyValueError[]) => void;\n  /** Callback to handle validation errors - integrates with Formik or custom error handling. Called with error message when invalid, null when valid */\n  onValidationError?: (errorMessage: string | null) => void;\n  /** Optional custom validator for the key field. Return { isValid, message } */\n  onKeyValidate?: (\n    key: string,\n    index: number,\n    item: KeyValue,\n    items: KeyValue[]\n  ) => { isValid: boolean; message?: string };\n  /** Optional custom validator for the value field. Return { isValid, message } */\n  onValueValidate?: (\n    value: string,\n    index: number,\n    item: KeyValue,\n    items: KeyValue[]\n  ) => { isValid: boolean; message?: string };\n  /** Whether to validate for duplicate keys */\n  validateDuplicateKeys?: boolean;\n  /** Whether to validate for empty keys */\n  validateEmptyKeys?: boolean;\n  /** Optional name for the field (for accessibility) */\n  name?: string;\n  /** Custom label for the add button (defaults to \"Add Line\") */\n  addButtonLabel?: string;\n}\n\nexport default function KeyValueInput({\n  keyTitle = \"Key\",\n  valueTitle = \"Value\",\n  items = [],\n  onChange,\n  onAdd,\n  onRemove,\n  disabled = false,\n  mode = \"line\",\n  layout = \"equal\",\n  onValidationChange,\n  onValidationError,\n  onKeyValidate,\n  onValueValidate,\n  validateDuplicateKeys = true,\n  validateEmptyKeys = true,\n  name,\n  addButtonLabel = \"Add Line\",\n  ...rest\n}: KeyValueInputProps) {\n  // Try to get field context if used within FormField (safe access)\n  const fieldContext = useContext(FieldContext);\n\n  // Validation logic\n  const errors = useMemo((): KeyValueError[] => {\n    if (!items || items.length === 0) return [];\n\n    const errorsList: KeyValueError[] = items.map(() => ({}));\n    const keyCount = new Map<string, number[]>();\n\n    items.forEach((item, index) => {\n      // Validate empty keys - only if value is filled (user is actively working on this row)\n      if (\n        validateEmptyKeys &&\n        item.key.trim() === \"\" &&\n        item.value.trim() !== \"\"\n      ) {\n        const error = errorsList[index];\n        if (error) {\n          error.key = \"Key cannot be empty\";\n        }\n      }\n\n      // Track key occurrences for duplicate validation\n      if (item.key.trim() !== \"\") {\n        const existing = keyCount.get(item.key) || [];\n        existing.push(index);\n        keyCount.set(item.key, existing);\n      }\n\n      // Custom key validation\n      if (onKeyValidate) {\n        const result = onKeyValidate(item.key, index, item, items);\n        if (result && result.isValid === false) {\n          const error = errorsList[index];\n          if (error) {\n            error.key = result.message || \"Invalid key\";\n          }\n        }\n      }\n\n      // Custom value validation\n      if (onValueValidate) {\n        const result = onValueValidate(item.value, index, item, items);\n        if (result && result.isValid === false) {\n          const error = errorsList[index];\n          if (error) {\n            error.value = result.message || \"Invalid value\";\n          }\n        }\n      }\n    });\n\n    // Validate duplicate keys\n    if (validateDuplicateKeys) {\n      keyCount.forEach((indices, key) => {\n        if (indices.length > 1) {\n          indices.forEach((index) => {\n            const error = errorsList[index];\n            if (error) {\n              error.key = \"Duplicate key\";\n            }\n          });\n        }\n      });\n    }\n\n    return errorsList;\n  }, [\n    items,\n    validateDuplicateKeys,\n    validateEmptyKeys,\n    onKeyValidate,\n    onValueValidate,\n  ]);\n\n  const isValid = useMemo(() => {\n    return errors.every((error) => !error.key && !error.value);\n  }, [errors]);\n\n  const hasAnyError = useMemo(() => {\n    return errors.some((error) => error.key || error.value);\n  }, [errors]);\n\n  // Generate error message for external form libraries (Formik, etc.)\n  const errorMessage = useMemo(() => {\n    if (!hasAnyError) return null;\n\n    const errorCount = errors.filter((e) => e.key || e.value).length;\n    const duplicateCount = errors.filter(\n      (e) => e.key === \"Duplicate key\"\n    ).length;\n    const emptyCount = errors.filter(\n      (e) => e.key === \"Key cannot be empty\"\n    ).length;\n\n    if (duplicateCount > 0) {\n      return `${duplicateCount} duplicate ${\n        duplicateCount === 1 ? \"key\" : \"keys\"\n      } found`;\n    } else if (emptyCount > 0) {\n      return `${emptyCount} empty ${emptyCount === 1 ? \"key\" : \"keys\"} found`;\n    }\n    return `${errorCount} validation ${\n      errorCount === 1 ? \"error\" : \"errors\"\n    } found`;\n  }, [hasAnyError, errors]);\n\n  // Notify parent of validation changes\n  const onValidationChangeRef = useRef(onValidationChange);\n  const onValidationErrorRef = useRef(onValidationError);\n\n  useEffect(() => {\n    onValidationChangeRef.current = onValidationChange;\n  }, [onValidationChange]);\n\n  useEffect(() => {\n    onValidationErrorRef.current = onValidationError;\n  }, [onValidationError]);\n\n  useEffect(() => {\n    onValidationChangeRef.current?.(isValid, errors);\n  }, [isValid, errors]);\n\n  // Notify parent of error state for form library integration\n  useEffect(() => {\n    onValidationErrorRef.current?.(errorMessage);\n  }, [errorMessage]);\n\n  const canRemoveItems = mode === \"line\" || items.length > 1;\n\n  const handleAdd = useCallback(() => {\n    if (onAdd) {\n      onAdd();\n      return;\n    }\n    onChange([...(items || []), { key: \"\", value: \"\" }]);\n  }, [onAdd, onChange, items]);\n\n  const handleRemove = useCallback(\n    (index: number) => {\n      if (!canRemoveItems && items.length === 1) return;\n\n      if (onRemove) {\n        onRemove(index);\n        return;\n      }\n      const next = (items || []).filter((_, i) => i !== index);\n      onChange(next);\n    },\n    [canRemoveItems, items, onRemove, onChange]\n  );\n\n  const handleItemChange = useCallback(\n    (index: number, nextItem: KeyValue) => {\n      const next = [...(items || [])];\n      next[index] = nextItem;\n      onChange(next);\n    },\n    [items, onChange]\n  );\n\n  // Initialize with at least one item for fixed-line mode\n  useEffect(() => {\n    if (mode === \"fixed-line\" && (!items || items.length === 0)) {\n      onChange([{ key: \"\", value: \"\" }]);\n    }\n  }, [mode]); // Only run on mode change\n\n  const autoId = useId();\n  const fieldId = fieldContext?.baseId || name || `key-value-input-${autoId}`;\n  const gridCols = GRID_COLS[layout];\n\n  return (\n    <div\n      className=\"w-full flex flex-col gap-y-2\"\n      role=\"group\"\n      aria-label={`${keyTitle} and ${valueTitle} pairs`}\n      {...rest}\n    >\n      {items && items.length > 0 ? (\n        <div className={cn(\"grid items-start gap-1\", gridCols)}>\n          {/*\n            # NOTE (@raunakab)\n            We add this space below the \"title\"-row to add some breathing room between the titles and the key-value items.\n            Since we're using a `grid` template, the padding below *one* item in a row applies additional height to *all* items in that row.\n          */}\n          <div className=\"pb-1\">\n            <Text mainUiAction>{keyTitle}</Text>\n          </div>\n          <Text mainUiAction>{valueTitle}</Text>\n          <div aria-hidden />\n\n          {items.map((item, index) => (\n            <KeyValueInputItem\n              key={index}\n              item={item}\n              onChange={(next) => handleItemChange(index, next)}\n              disabled={disabled}\n              onRemove={() => handleRemove(index)}\n              keyPlaceholder={keyTitle}\n              valuePlaceholder={valueTitle}\n              error={errors[index]}\n              canRemove={canRemoveItems}\n              index={index}\n              fieldId={fieldId}\n            />\n          ))}\n        </div>\n      ) : (\n        <EmptyMessageCard title=\"No items added yet.\" />\n      )}\n\n      <Button\n        disabled={disabled}\n        prominence=\"secondary\"\n        onClick={handleAdd}\n        icon={SvgPlusCircle}\n        aria-label={`Add ${keyTitle} and ${valueTitle} pair`}\n        type=\"button\"\n      >\n        {addButtonLabel}\n      </Button>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputNumber.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InputNumber from \"./InputNumber\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputNumber> = {\n  title: \"refresh-components/inputs/InputNumber\",\n  component: InputNumber,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 200 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputNumber>;\n\nexport const Default: Story = {\n  args: {\n    value: 5,\n    onChange: () => {},\n  },\n};\n\nexport const WithMinMax: Story = {\n  args: {\n    value: 50,\n    onChange: () => {},\n    min: 0,\n    max: 100,\n  },\n};\n\nexport const WithReset: Story = {\n  args: {\n    value: 42,\n    onChange: () => {},\n    showReset: true,\n    defaultValue: 10,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputNumber.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@opal/components\";\nimport {\n  Variants,\n  wrapperClasses,\n  innerClasses,\n  textClasses,\n} from \"@/refresh-components/inputs/styles\";\nimport { SvgChevronUp, SvgChevronDown, SvgRevert } from \"@opal/icons\";\n\n/**\n * InputNumber Component\n *\n * A number input with increment/decrement stepper buttons and optional reset.\n *\n * @example\n * ```tsx\n * // Basic usage\n * <InputNumber\n *   value={count}\n *   onChange={setCount}\n *   min={0}\n *   max={100}\n * />\n *\n * // With reset button\n * <InputNumber\n *   value={count}\n *   onChange={setCount}\n *   defaultValue={10}\n *   showReset\n * />\n *\n * // With step\n * <InputNumber\n *   value={count}\n *   onChange={setCount}\n *   step={5}\n * />\n * ```\n */\nexport interface InputNumberProps {\n  value: number | null;\n  onChange: (value: number | null) => void;\n  min?: number;\n  max?: number;\n  step?: number;\n  defaultValue?: number;\n  showReset?: boolean;\n  variant?: Variants;\n  disabled?: boolean;\n  className?: string;\n  placeholder?: string;\n}\n\nexport default function InputNumber({\n  value,\n  onChange,\n  min,\n  max,\n  step = 1,\n  defaultValue,\n  showReset = false,\n  variant = \"primary\",\n  disabled = false,\n  className,\n  placeholder,\n}: InputNumberProps) {\n  const inputRef = React.useRef<HTMLInputElement | null>(null);\n  const [inputValue, setInputValue] = React.useState(\n    value === null ? \"\" : String(value)\n  );\n  const isDisabled = disabled || variant === \"disabled\";\n\n  // Sync input value when external value changes (e.g., from stepper buttons or reset)\n  React.useEffect(() => {\n    setInputValue(value === null ? \"\" : String(value));\n  }, [value]);\n\n  const effectiveValue = value ?? 0;\n  const canIncrement = max === undefined || effectiveValue < max;\n  const canDecrement =\n    value !== null && (min === undefined || effectiveValue > min);\n  const canReset =\n    showReset && defaultValue !== undefined && value !== defaultValue;\n\n  const handleIncrement = () => {\n    if (canIncrement) {\n      const newValue = effectiveValue + step;\n      onChange(max !== undefined ? Math.min(newValue, max) : newValue);\n    }\n  };\n\n  const handleDecrement = () => {\n    if (canDecrement) {\n      const newValue = effectiveValue - step;\n      onChange(min !== undefined ? Math.max(newValue, min) : newValue);\n    }\n  };\n\n  const handleReset = () => {\n    if (defaultValue !== undefined) {\n      onChange(defaultValue);\n    }\n  };\n\n  const handleBlur = () => {\n    // On blur, if empty, keep as null so placeholder shows\n    if (inputValue.trim() === \"\") {\n      onChange(null);\n    } else {\n      setInputValue(value === null ? \"\" : String(value));\n    }\n  };\n\n  const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {\n    const rawValue = e.target.value;\n\n    // Only allow digits (and empty string)\n    if (rawValue !== \"\" && !/^\\d+$/.test(rawValue)) {\n      return;\n    }\n\n    setInputValue(rawValue);\n\n    // Allow empty input while typing (fallback applied on blur)\n    if (rawValue === \"\") {\n      return;\n    }\n\n    const val = parseInt(rawValue, 10);\n    let newValue = val;\n    if (min !== undefined) newValue = Math.max(newValue, min);\n    if (max !== undefined) newValue = Math.min(newValue, max);\n    onChange(newValue);\n  };\n\n  return (\n    <div\n      className={cn(\n        \"flex flex-row items-center justify-between w-full h-fit pr-1.5 pl-1.5 rounded-08\",\n        wrapperClasses[variant],\n        className\n      )}\n      onClick={() => inputRef.current?.focus()}\n    >\n      <input\n        ref={inputRef}\n        type=\"text\"\n        inputMode=\"numeric\"\n        pattern=\"[0-9]*\"\n        disabled={isDisabled}\n        value={inputValue}\n        placeholder={placeholder}\n        onChange={handleInputChange}\n        onBlur={handleBlur}\n        className={cn(\n          \"w-full h-[1.5rem] bg-transparent p-0.5 focus:outline-none\",\n          innerClasses[variant],\n          textClasses[variant]\n        )}\n      />\n\n      <div className=\"flex flex-row items-center gap-1\">\n        {showReset && (\n          <Button\n            disabled={!canReset || isDisabled}\n            icon={SvgRevert}\n            onClick={handleReset}\n            prominence=\"tertiary\"\n          />\n        )}\n        <div className=\"flex flex-col\">\n          <button\n            type=\"button\"\n            onClick={handleIncrement}\n            disabled={!canIncrement || isDisabled}\n            className=\"p-0.5 text-text-03 hover:text-text-04 disabled:text-text-02 disabled:cursor-not-allowed transition-colors\"\n          >\n            <SvgChevronUp size={14} />\n          </button>\n          <button\n            type=\"button\"\n            onClick={handleDecrement}\n            disabled={!canDecrement || isDisabled}\n            className=\"p-0.5 text-text-03 hover:text-text-04 disabled:text-text-02 disabled:cursor-not-allowed transition-colors\"\n          >\n            <SvgChevronDown size={14} />\n          </button>\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputSearch.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputSearch from \"./InputSearch\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputSearch> = {\n  title: \"refresh-components/inputs/InputSearch\",\n  component: InputSearch,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputSearch>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <InputSearch\n        placeholder=\"Search...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n      />\n    );\n  },\n};\n\nexport const WithValue: Story = {\n  render: function WithValueStory() {\n    const [value, setValue] = React.useState(\"Search Value\");\n    return (\n      <InputSearch\n        placeholder=\"Search...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: function DisabledStory() {\n    return (\n      <InputSearch\n        placeholder=\"Search...\"\n        value=\"\"\n        onChange={() => {}}\n        disabled\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputSearch.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport InputTypeIn, {\n  InputTypeInProps,\n} from \"@/refresh-components/inputs/InputTypeIn\";\n\n/**\n * InputSearch Component\n *\n * A subtle search input that follows the \"Subtle Input Styles\" spec:\n * no border by default, border appears on hover/focus/active.\n *\n * @example\n * ```tsx\n * // Basic usage\n * <InputSearch\n *   placeholder=\"Search...\"\n *   value={search}\n *   onChange={(e) => setSearch(e.target.value)}\n * />\n *\n * // Disabled state\n * <InputSearch\n *   disabled\n *   placeholder=\"Search...\"\n *   value=\"\"\n *   onChange={() => {}}\n * />\n * ```\n */\nexport interface InputSearchProps\n  extends Omit<InputTypeInProps, \"variant\" | \"leftSearchIcon\"> {\n  /**\n   * Ref to the underlying input element.\n   */\n  ref?: React.Ref<HTMLInputElement>;\n  /**\n   * Whether the input is disabled.\n   */\n  disabled?: boolean;\n}\n\nexport default function InputSearch({\n  ref,\n  disabled,\n  className,\n  ...props\n}: InputSearchProps) {\n  return (\n    <InputTypeIn\n      ref={ref}\n      variant={disabled ? \"disabled\" : \"internal\"}\n      leftSearchIcon\n      className={cn(\n        \"[&_input]:font-main-ui-muted [&_input]:text-text-02 [&_input]:placeholder:text-text-02\",\n        !disabled && [\n          \"border border-transparent\",\n          \"hover:border-border-03\",\n          \"active:border-border-05\",\n          \"focus-within:shadow-[0px_0px_0px_2px_var(--background-tint-04)]\",\n          \"focus-within:hover:border-border-03\",\n        ],\n        className\n      )}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputSelect.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport InputSelect from \"./InputSelect\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputSelect> = {\n  title: \"refresh-components/inputs/InputSelect\",\n  component: InputSelect,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputSelect>;\n\nexport const Default: Story = {\n  render: () => (\n    <InputSelect defaultValue=\"option1\">\n      <InputSelect.Trigger placeholder=\"Select an option\" />\n      <InputSelect.Content>\n        <InputSelect.Item value=\"option1\">Option 1</InputSelect.Item>\n        <InputSelect.Item value=\"option2\">Option 2</InputSelect.Item>\n        <InputSelect.Item value=\"option3\">Option 3</InputSelect.Item>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n\nexport const WithPlaceholder: Story = {\n  render: () => (\n    <InputSelect>\n      <InputSelect.Trigger placeholder=\"Choose a fruit...\" />\n      <InputSelect.Content>\n        <InputSelect.Item value=\"apple\">Apple</InputSelect.Item>\n        <InputSelect.Item value=\"banana\">Banana</InputSelect.Item>\n        <InputSelect.Item value=\"cherry\">Cherry</InputSelect.Item>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n\nexport const Controlled: Story = {\n  render: function ControlledStory() {\n    const [value, setValue] = React.useState(\"b\");\n    return (\n      <InputSelect value={value} onValueChange={setValue}>\n        <InputSelect.Trigger placeholder=\"Select...\" />\n        <InputSelect.Content>\n          <InputSelect.Item value=\"a\">Alpha</InputSelect.Item>\n          <InputSelect.Item value=\"b\">Bravo</InputSelect.Item>\n          <InputSelect.Item value=\"c\">Charlie</InputSelect.Item>\n        </InputSelect.Content>\n      </InputSelect>\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <InputSelect defaultValue=\"option1\" disabled>\n      <InputSelect.Trigger placeholder=\"Select an option\" />\n      <InputSelect.Content>\n        <InputSelect.Item value=\"option1\">Option 1</InputSelect.Item>\n        <InputSelect.Item value=\"option2\">Option 2</InputSelect.Item>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n\nexport const ErrorState: Story = {\n  render: () => (\n    <InputSelect error>\n      <InputSelect.Trigger placeholder=\"Required field\" />\n      <InputSelect.Content>\n        <InputSelect.Item value=\"x\">X</InputSelect.Item>\n        <InputSelect.Item value=\"y\">Y</InputSelect.Item>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n\nexport const WithGroups: Story = {\n  render: () => (\n    <InputSelect defaultValue=\"gpt4o\">\n      <InputSelect.Trigger placeholder=\"Choose a model...\" />\n      <InputSelect.Content>\n        <InputSelect.Group>\n          <InputSelect.Label>OpenAI</InputSelect.Label>\n          <InputSelect.Item value=\"gpt4o\">GPT-4o</InputSelect.Item>\n          <InputSelect.Item value=\"gpt4o-mini\">GPT-4o Mini</InputSelect.Item>\n        </InputSelect.Group>\n        <InputSelect.Separator />\n        <InputSelect.Group>\n          <InputSelect.Label>Anthropic</InputSelect.Label>\n          <InputSelect.Item value=\"opus\">Claude Opus</InputSelect.Item>\n          <InputSelect.Item value=\"sonnet\">Claude Sonnet</InputSelect.Item>\n        </InputSelect.Group>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n\nexport const WithDescription: Story = {\n  render: () => (\n    <InputSelect>\n      <InputSelect.Trigger placeholder=\"Select a plan...\" />\n      <InputSelect.Content>\n        <InputSelect.Item value=\"free\" description=\"Up to 5 users\">\n          Free\n        </InputSelect.Item>\n        <InputSelect.Item value=\"pro\" description=\"Unlimited users\">\n          Pro\n        </InputSelect.Item>\n        <InputSelect.Item value=\"enterprise\" description=\"Custom limits\">\n          Enterprise\n        </InputSelect.Item>\n      </InputSelect.Content>\n    </InputSelect>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputSelect.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport * as SelectPrimitive from \"@radix-ui/react-select\";\nimport { cn } from \"@/lib/utils\";\nimport LineItem, { LineItemProps } from \"@/refresh-components/buttons/LineItem\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport type { IconProps } from \"@opal/types\";\nimport {\n  iconClasses,\n  textClasses,\n  Variants,\n  wrapperClasses,\n} from \"@/refresh-components/inputs/styles\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { SvgChevronDownSmall } from \"@opal/icons\";\nimport Separator, { SeparatorProps } from \"@/refresh-components/Separator\";\nimport { WithoutStyles } from \"@/types\";\n\n// ============================================================================\n// Context\n// ============================================================================\n\ninterface SelectedItemDisplay {\n  childrenRef: React.MutableRefObject<React.ReactNode>;\n  iconRef: React.MutableRefObject<\n    React.FunctionComponent<IconProps> | undefined\n  >;\n}\n\ninterface InputSelectContextValue {\n  variant: Variants;\n  currentValue?: string;\n  disabled?: boolean;\n  selectedItemDisplay: SelectedItemDisplay | null;\n  setSelectedItemDisplay: (display: SelectedItemDisplay | null) => void;\n}\n\nconst InputSelectContext = React.createContext<InputSelectContextValue | null>(\n  null\n);\n\nconst useInputSelectContext = () => {\n  const context = React.useContext(InputSelectContext);\n  if (!context) {\n    throw new Error(\n      \"InputSelect compound components must be used within InputSelect\"\n    );\n  }\n  return context;\n};\n\n// ============================================================================\n// InputSelect Root\n// ============================================================================\n\n/**\n * InputSelect Root Component\n *\n * A styled select/dropdown component built on Radix UI Select primitives.\n * Provides full control over trigger and content rendering.\n *\n * @example\n * ```tsx\n * <InputSelect defaultValue=\"option1\">\n *   <InputSelect.Trigger placeholder=\"Select an option\" />\n *   <InputSelect.Content>\n *     <InputSelect.Item value=\"option1\">Option 1</InputSelect.Item>\n *     <InputSelect.Item value=\"option2\">Option 2</InputSelect.Item>\n *   </InputSelect.Content>\n * </InputSelect>\n *\n * // Controlled\n * <InputSelect value={value} onValueChange={setValue}>\n *   <InputSelect.Trigger placeholder=\"Select...\" />\n *   <InputSelect.Content>\n *     <InputSelect.Item value=\"a\">A</InputSelect.Item>\n *   </InputSelect.Content>\n * </InputSelect>\n *\n * // With error state\n * <InputSelect error>\n *   <InputSelect.Trigger placeholder=\"Required field\" />\n *   <InputSelect.Content>\n *     <InputSelect.Item value=\"x\">X</InputSelect.Item>\n *   </InputSelect.Content>\n * </InputSelect>\n * ```\n */\ninterface InputSelectRootProps\n  extends WithoutStyles<\n    React.ComponentPropsWithoutRef<typeof SelectPrimitive.Root>\n  > {\n  /** Whether to show error styling */\n  error?: boolean;\n  /** Whether the select is disabled */\n  disabled?: boolean;\n  children: React.ReactNode;\n  ref?: React.Ref<HTMLDivElement>;\n}\nfunction InputSelectRoot({\n  disabled,\n  error,\n  value,\n  defaultValue,\n  onValueChange,\n  children,\n  ref,\n  ...props\n}: InputSelectRootProps) {\n  const variant: Variants = disabled ? \"disabled\" : error ? \"error\" : \"primary\";\n\n  // Support both controlled and uncontrolled modes\n  const isControlled = value !== undefined;\n  const [internalValue, setInternalValue] = React.useState<string | undefined>(\n    defaultValue\n  );\n  const currentValue = isControlled ? value : internalValue;\n\n  React.useEffect(() => {\n    if (isControlled) return;\n    setInternalValue(defaultValue);\n  }, [defaultValue, isControlled]);\n\n  const handleValueChange = React.useCallback(\n    (nextValue: string) => {\n      onValueChange?.(nextValue);\n\n      if (isControlled) return;\n      setInternalValue(nextValue);\n    },\n    [isControlled, onValueChange]\n  );\n\n  // Store the selected item's display data (children/icon refs)\n  // Only the currently selected item registers itself\n  const [selectedItemDisplay, setSelectedItemDisplay] =\n    React.useState<SelectedItemDisplay | null>(null);\n\n  React.useEffect(() => {\n    if (!currentValue) setSelectedItemDisplay(null);\n  }, [currentValue]);\n\n  const contextValue = React.useMemo<InputSelectContextValue>(\n    () => ({\n      variant,\n      currentValue,\n      disabled,\n      selectedItemDisplay,\n      setSelectedItemDisplay,\n    }),\n    [variant, currentValue, disabled, selectedItemDisplay]\n  );\n\n  return (\n    <div className=\"w-full min-w-[var(--block-width-form-input-min)] relative\">\n      <InputSelectContext.Provider value={contextValue}>\n        <SelectPrimitive.Root\n          {...(isControlled ? { value: currentValue } : { defaultValue })}\n          onValueChange={handleValueChange}\n          disabled={disabled}\n          {...props}\n        >\n          <div ref={ref} className=\"w-full\">\n            {children}\n          </div>\n        </SelectPrimitive.Root>\n      </InputSelectContext.Provider>\n    </div>\n  );\n}\n\n// ============================================================================\n// InputSelect Trigger\n// ============================================================================\n\n/**\n * InputSelect Trigger Component\n *\n * The clickable trigger that opens the dropdown.\n *\n * @example\n * ```tsx\n * // With placeholder\n * <InputSelect.Trigger placeholder=\"Select...\" />\n *\n * // With right section\n * <InputSelect.Trigger placeholder=\"Select...\" rightSection={<Badge>New</Badge>} />\n * ```\n */\ninterface InputSelectTriggerProps\n  extends WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Trigger>> {\n  /** Placeholder when no value selected */\n  placeholder?: React.ReactNode;\n  /** Content to render on the right side of the trigger */\n  rightSection?: React.ReactNode;\n}\nfunction InputSelectTrigger({\n  placeholder,\n  rightSection,\n  children,\n  ref,\n  ...props\n}: InputSelectTriggerProps) {\n  const { variant, selectedItemDisplay } = useInputSelectContext();\n\n  // Don't memoize - we need to read the latest ref values on every render\n  let displayContent: React.ReactNode;\n\n  if (!selectedItemDisplay) {\n    displayContent = placeholder ? (\n      typeof placeholder === \"string\" ? (\n        <Text as=\"p\" text03>\n          {placeholder}\n        </Text>\n      ) : (\n        placeholder\n      )\n    ) : (\n      <Text as=\"p\" text03>\n        Select an option\n      </Text>\n    );\n  } else {\n    const Icon = selectedItemDisplay.iconRef.current;\n    displayContent = (\n      <div className=\"flex flex-row items-center gap-2 flex-1 w-full\">\n        {Icon && <Icon className={cn(\"h-4 w-4\", iconClasses[variant])} />}\n        <Truncated className={cn(textClasses[variant])}>\n          {selectedItemDisplay.childrenRef.current}\n        </Truncated>\n      </div>\n    );\n  }\n\n  return (\n    <SelectPrimitive.Trigger\n      ref={ref}\n      className={cn(\n        \"group/InputSelect flex w-full items-center justify-between p-1.5 rounded-08 focus:outline-none\",\n        wrapperClasses[variant],\n        variant === \"primary\" && \"data-[state=open]:border-border-05\"\n      )}\n      {...props}\n    >\n      <div className=\"flex flex-row items-center justify-between w-full p-0.5 gap-1\">\n        {children ?? displayContent}\n\n        <div className=\"flex flex-row items-center gap-1\">\n          {rightSection}\n\n          <SelectPrimitive.Icon asChild>\n            <SvgChevronDownSmall\n              className={cn(\n                \"h-4 w-4 transition-transform\",\n                iconClasses[variant],\n                \"group-data-[state=open]/InputSelect:-rotate-180\"\n              )}\n            />\n          </SelectPrimitive.Icon>\n        </div>\n      </div>\n    </SelectPrimitive.Trigger>\n  );\n}\n\n// ============================================================================\n// InputSelect Content\n// ============================================================================\n\n/**\n * InputSelect Content Component\n *\n * The dropdown content container with animations and styling.\n *\n * @example\n * ```tsx\n * <InputSelect.Content>\n *   <InputSelect.Item value=\"1\">Item 1</InputSelect.Item>\n *   <InputSelect.Item value=\"2\">Item 2</InputSelect.Item>\n * </InputSelect.Content>\n * ```\n */\nfunction InputSelectContent({\n  children,\n  ref,\n  ...props\n}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Content>>) {\n  return (\n    <SelectPrimitive.Portal>\n      <SelectPrimitive.Content\n        ref={ref}\n        className={cn(\n          \"z-popover w-[var(--radix-select-trigger-width)] max-h-72 overflow-auto rounded-12 border bg-background-neutral-00 p-1\",\n          \"data-[state=open]:animate-in data-[state=closed]:animate-out\",\n          \"data-[state=open]:fade-in-0 data-[state=closed]:fade-out-0\",\n          \"data-[state=open]:zoom-in-95 data-[state=closed]:zoom-out-95\"\n        )}\n        sideOffset={4}\n        position=\"popper\"\n        onMouseDown={(e) => {\n          e.stopPropagation();\n          e.preventDefault();\n        }}\n        {...props}\n      >\n        <SelectPrimitive.Viewport className=\"flex flex-col gap-1\">\n          {children}\n        </SelectPrimitive.Viewport>\n      </SelectPrimitive.Content>\n    </SelectPrimitive.Portal>\n  );\n}\n\n// ============================================================================\n// InputSelect Item\n// ============================================================================\n\n/**\n * InputSelect Item Component\n *\n * Individual selectable option within the dropdown.\n *\n * @example\n * ```tsx\n * <InputSelect.Item value=\"option1\" icon={SvgIcon}>\n *   Option 1\n * </InputSelect.Item>\n *\n * <InputSelect.Item value=\"option2\" description=\"Additional info\">\n *   Option 2\n * </InputSelect.Item>\n * ```\n */\ninterface InputSelectItemProps\n  extends WithoutStyles<Omit<LineItemProps, \"heavyForced\" | \"ref\">> {\n  /** Unique value for this option */\n  value: string;\n  /** Optional callback when item is selected */\n  onClick?: (event: React.SyntheticEvent) => void;\n  ref?: React.Ref<React.ComponentRef<typeof SelectPrimitive.Item>>;\n}\nfunction InputSelectItem({\n  value,\n  children,\n  description,\n  onClick,\n  icon,\n  ref,\n  ...props\n}: InputSelectItemProps) {\n  const { currentValue, setSelectedItemDisplay } = useInputSelectContext();\n  const isSelected = value === currentValue;\n\n  // Use refs to hold latest children/icon - these are passed to the context\n  // so the trigger always reads current values without needing re-registration\n  const childrenRef = React.useRef(children);\n  const iconRef = React.useRef(icon);\n  childrenRef.current = children;\n  iconRef.current = icon;\n\n  // Only the selected item registers its display data\n  React.useEffect(() => {\n    if (!isSelected) return;\n    setSelectedItemDisplay({ childrenRef, iconRef });\n\n    // Clean up functions only need to return for items which are selected.\n    return () => setSelectedItemDisplay(null);\n  }, [isSelected]);\n\n  return (\n    <SelectPrimitive.Item\n      ref={ref}\n      value={value}\n      className=\"outline-none focus:outline-none rounded-08 data-[highlighted]:bg-background-tint-02\"\n      onSelect={onClick}\n    >\n      {/* Hidden ItemText for Radix to track selection */}\n      <span className=\"hidden\">\n        <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>\n      </span>\n\n      <LineItem\n        {...props}\n        icon={icon}\n        selected={isSelected}\n        emphasized\n        description={description}\n        interactive={false}\n      >\n        {children}\n      </LineItem>\n    </SelectPrimitive.Item>\n  );\n}\n\n// ============================================================================\n// InputSelect Group\n// ============================================================================\n\n/**\n * InputSelect Group Component\n *\n * Groups related items together with an optional label.\n *\n * @example\n * ```tsx\n * <InputSelect.Group>\n *   <InputSelect.Label>Fruits</InputSelect.Label>\n *   <InputSelect.Item value=\"apple\">Apple</InputSelect.Item>\n *   <InputSelect.Item value=\"banana\">Banana</InputSelect.Item>\n * </InputSelect.Group>\n * ```\n */\nfunction InputSelectGroup({\n  ref,\n  ...props\n}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Group>>) {\n  return <SelectPrimitive.Group ref={ref} {...props} />;\n}\n\n// ============================================================================\n// InputSelect Label\n// ============================================================================\n\n/**\n * InputSelect Label Component\n *\n * A label for a group of items.\n *\n * @example\n * ```tsx\n * <InputSelect.Label>Category Name</InputSelect.Label>\n * ```\n */\nfunction InputSelectLabel({\n  ref,\n  ...props\n}: WithoutStyles<React.ComponentProps<typeof SelectPrimitive.Label>>) {\n  return (\n    <SelectPrimitive.Label\n      ref={ref}\n      className=\"px-2 py-1.5 text-xs font-medium text-text-03 uppercase tracking-wide\"\n      {...props}\n    />\n  );\n}\n\n// ============================================================================\n// InputSelect Separator\n// ============================================================================\n\n/**\n * InputSelect Separator Component\n *\n * A visual divider between items in the dropdown.\n * Uses the app's standard Separator component with appropriate defaults for dropdown menus.\n *\n * @example\n * ```tsx\n * <InputSelect.Content>\n *   <InputSelect.Item value=\"1\">Option 1</InputSelect.Item>\n *   <InputSelect.Separator />\n *   <InputSelect.Item value=\"2\">Option 2</InputSelect.Item>\n * </InputSelect.Content>\n * ```\n */\nfunction InputSelectSeparator({\n  noPadding = true,\n  ref,\n  ...props\n}: WithoutStyles<SeparatorProps> & {\n  ref?: React.Ref<React.ComponentRef<typeof Separator>>;\n}) {\n  return (\n    <Separator\n      ref={ref}\n      noPadding={noPadding}\n      className=\"px-2 py-1\"\n      {...props}\n    />\n  );\n}\n\n// ============================================================================\n// Exports\n// ============================================================================\n\n/**\n * InputSelect - A styled select/dropdown component\n *\n * @example\n * ```tsx\n * import InputSelect from \"@/refresh-components/inputs/InputSelect\";\n *\n * <InputSelect defaultValue=\"1\">\n *   <InputSelect.Trigger placeholder=\"Choose...\" />\n *   <InputSelect.Content>\n *     <InputSelect.Item value=\"1\">Option 1</InputSelect.Item>\n *     <InputSelect.Item value=\"2\">Option 2</InputSelect.Item>\n *   </InputSelect.Content>\n * </InputSelect>\n *\n * // With groups\n * <InputSelect defaultValue=\"1\">\n *   <InputSelect.Trigger placeholder=\"Choose a model...\" />\n *   <InputSelect.Content>\n *     <InputSelect.Group>\n *       <InputSelect.Label>OpenAI</InputSelect.Label>\n *       <InputSelect.Item value=\"1\">GPT-4o Mini</InputSelect.Item>\n *       <InputSelect.Item value=\"2\">GPT-4o</InputSelect.Item>\n *     </InputSelect.Group>\n *     <InputSelect.Group>\n *       <InputSelect.Label>Anthropic</InputSelect.Label>\n *       <InputSelect.Item value=\"3\">Claude Opus 4.5</InputSelect.Item>\n *       <InputSelect.Item value=\"4\">Claude Sonnet 4.5</InputSelect.Item>\n *     </InputSelect.Group>\n *   </InputSelect.Content>\n * </InputSelect>\n * ```\n */\nexport default Object.assign(InputSelectRoot, {\n  Trigger: InputSelectTrigger,\n  Content: InputSelectContent,\n  Item: InputSelectItem,\n  Group: InputSelectGroup,\n  Label: InputSelectLabel,\n  Separator: InputSelectSeparator,\n});\n\nexport {\n  type InputSelectRootProps,\n  type InputSelectTriggerProps,\n  type InputSelectItemProps,\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputTextArea.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InputTextArea from \"./InputTextArea\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputTextArea> = {\n  title: \"refresh-components/inputs/InputTextArea\",\n  component: InputTextArea,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputTextArea>;\n\nexport const Default: Story = {\n  args: {\n    placeholder: \"Enter a description...\",\n  },\n};\n\nexport const AutoResize: Story = {\n  args: {\n    autoResize: true,\n    placeholder: \"This textarea grows as you type...\",\n  },\n};\n\nexport const WithMaxRows: Story = {\n  args: {\n    autoResize: true,\n    maxRows: 5,\n    placeholder: \"Grows up to 5 rows...\",\n  },\n};\n\nexport const Error: Story = {\n  args: {\n    variant: \"error\",\n    value: \"Invalid content\",\n    placeholder: \"Enter a description...\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    variant: \"disabled\",\n    value: \"Cannot edit this textarea\",\n  },\n};\n\nexport const ReadOnly: Story = {\n  args: {\n    variant: \"readOnly\",\n    value: \"This content is read-only and cannot be modified.\",\n  },\n};\n\nexport const NonResizable: Story = {\n  args: {\n    resizable: false,\n    placeholder: \"This textarea cannot be resized...\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputTextArea.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cn, mergeRefs } from \"@/lib/utils\";\nimport {\n  innerClasses,\n  textClasses,\n  Variants,\n  wrapperClasses,\n} from \"@/refresh-components/inputs/styles\";\n\n/**\n * InputTextArea Component\n *\n * A styled textarea component with support for various states and auto-resize.\n *\n * @example\n * ```tsx\n * // Basic usage\n * <InputTextArea\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n *   placeholder=\"Enter description...\"\n * />\n *\n * // With error state\n * <InputTextArea\n *   variant=\"error\"\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n * />\n *\n * // Disabled state\n * <InputTextArea variant=\"disabled\" value=\"Cannot edit\" />\n *\n * // Read-only state (non-editable, minimal styling)\n * <InputTextArea variant=\"readOnly\" value=\"Read-only value\" />\n *\n * // Custom rows\n * <InputTextArea\n *   rows={8}\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n * />\n *\n * // Internal styling (no border)\n * <InputTextArea variant=\"internal\" value={value} onChange={handleChange} />\n * ```\n */\nexport interface InputTextAreaProps\n  extends Omit<React.TextareaHTMLAttributes<HTMLTextAreaElement>, \"disabled\"> {\n  variant?: Variants;\n  autoResize?: boolean;\n  maxRows?: number;\n  resizable?: boolean;\n  rightSection?: React.ReactNode;\n}\nconst InputTextArea = React.forwardRef<HTMLTextAreaElement, InputTextAreaProps>(\n  (\n    {\n      variant = \"primary\",\n      className,\n      rows = 4,\n      readOnly,\n      autoResize = false,\n      maxRows,\n      resizable = true,\n      rightSection,\n      ...props\n    },\n    ref\n  ) => {\n    const disabled = variant === \"disabled\";\n    const isReadOnlyVariant = variant === \"readOnly\";\n    const isReadOnly = isReadOnlyVariant || readOnly;\n\n    const internalRef = React.useRef<HTMLTextAreaElement | null>(null);\n    const cachedLineHeight = React.useRef<number | null>(null);\n\n    const adjustHeight = React.useCallback(() => {\n      const textarea = internalRef.current;\n      if (!textarea || !autoResize) return;\n\n      if (cachedLineHeight.current === null) {\n        cachedLineHeight.current =\n          parseFloat(getComputedStyle(textarea).lineHeight) || 20;\n      }\n      const lineHeight = cachedLineHeight.current;\n\n      // Reset to auto so scrollHeight reflects actual content\n      textarea.style.height = \"auto\";\n      textarea.style.overflowY = \"hidden\";\n\n      const minHeight = rows * lineHeight;\n      const maxHeight = maxRows ? maxRows * lineHeight : Infinity;\n\n      const contentHeight = textarea.scrollHeight;\n      const clampedHeight = Math.min(\n        Math.max(contentHeight, minHeight),\n        maxHeight\n      );\n\n      textarea.style.height = `${clampedHeight}px`;\n      textarea.style.overflowY = contentHeight > maxHeight ? \"auto\" : \"hidden\";\n    }, [autoResize, rows, maxRows]);\n\n    React.useEffect(() => {\n      adjustHeight();\n    }, [adjustHeight, props.value]);\n\n    const resizeClass = autoResize || !resizable ? \"resize-none\" : \"resize-y\";\n\n    return (\n      <div\n        className={cn(\n          wrapperClasses[variant],\n          \"flex flex-row items-start justify-between w-full h-fit p-1.5 rounded-08 relative\",\n          !isReadOnlyVariant && \"bg-background-neutral-00\",\n          className\n        )}\n      >\n        <textarea\n          ref={mergeRefs(internalRef, ref)}\n          disabled={disabled}\n          readOnly={isReadOnly}\n          className={cn(\n            \"w-full min-w-0 flex-1 min-h-[3rem] bg-transparent focus:outline-none p-0.5\",\n            resizeClass,\n            innerClasses[variant],\n            textClasses[variant]\n          )}\n          rows={rows}\n          {...props}\n        />\n        {rightSection && (\n          <div className=\"shrink-0 self-start -my-1 -mr-1 font-sans text-base\">\n            {rightSection}\n          </div>\n        )}\n      </div>\n    );\n  }\n);\nInputTextArea.displayName = \"InputTextArea\";\n\nexport default InputTextArea;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputTypeIn.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InputTypeIn from \"./InputTypeIn\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof InputTypeIn> = {\n  title: \"refresh-components/inputs/InputTypeIn\",\n  component: InputTypeIn,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InputTypeIn>;\n\nexport const Default: Story = {\n  args: {\n    placeholder: \"Enter text...\",\n  },\n};\n\nexport const WithPrefix: Story = {\n  args: {\n    prefixText: \"https://\",\n    placeholder: \"example.com\",\n  },\n};\n\nexport const WithSearchIcon: Story = {\n  args: {\n    leftSearchIcon: true,\n    placeholder: \"Search...\",\n  },\n};\n\nexport const WithClearButton: Story = {\n  args: {\n    showClearButton: true,\n    value: \"Some text to clear\",\n    onChange: () => {},\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    variant: \"disabled\",\n    value: \"Cannot edit\",\n  },\n};\n\nexport const Error: Story = {\n  args: {\n    variant: \"error\",\n    value: \"Invalid input\",\n    placeholder: \"Enter text...\",\n  },\n};\n\nexport const ReadOnly: Story = {\n  args: {\n    variant: \"readOnly\",\n    value: \"Read-only value\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/InputTypeIn.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { Button } from \"@opal/components\";\nimport {\n  innerClasses,\n  textClasses,\n  Variants,\n  wrapperClasses,\n} from \"@/refresh-components/inputs/styles\";\nimport { SvgSearch, SvgX } from \"@opal/icons\";\n\n/**\n * InputTypeIn Component\n *\n * A styled text input component with support for search icon, clear button,\n * and custom right section content.\n *\n * @example\n * ```tsx\n * // Basic usage\n * <InputTypeIn\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n *   placeholder=\"Enter text...\"\n * />\n *\n * // With search icon\n * <InputTypeIn\n *   leftSearchIcon\n *   value={search}\n *   onChange={(e) => setSearch(e.target.value)}\n *   placeholder=\"Search...\"\n * />\n *\n * // With error state\n * <InputTypeIn\n *   variant=\"error\"\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n * />\n *\n * // Disabled state\n * <InputTypeIn variant=\"disabled\" value=\"Cannot edit\" />\n *\n * // Read-only state (non-editable, minimal styling)\n * <InputTypeIn variant=\"readOnly\" value=\"Read-only value\" />\n *\n * // With custom right section\n * <InputTypeIn\n *   value={password}\n *   onChange={(e) => setPassword(e.target.value)}\n *   type={showPassword ? \"text\" : \"password\"}\n *   rightSection={<Button icon={SvgEye} onClick={togglePassword}/>}\n * />\n *\n * // Without clear button\n * <InputTypeIn\n *   showClearButton={false}\n *   value={value}\n *   onChange={(e) => setValue(e.target.value)}\n * />\n * ```\n */\nexport interface InputTypeInProps\n  extends Omit<React.InputHTMLAttributes<HTMLInputElement>, \"disabled\"> {\n  variant?: Variants;\n\n  prefixText?: string;\n  leftSearchIcon?: boolean;\n  rightSection?: React.ReactNode;\n  showClearButton?: boolean;\n  onClear?: () => void;\n}\nconst InputTypeIn = React.forwardRef<HTMLInputElement, InputTypeInProps>(\n  (\n    {\n      variant = \"primary\",\n      prefixText,\n      leftSearchIcon,\n      rightSection,\n      showClearButton = true,\n      onClear,\n      className,\n      value,\n      onChange,\n      readOnly,\n      ...props\n    },\n    ref\n  ) => {\n    const localInputRef = React.useRef<HTMLInputElement | null>(null);\n    const disabled = variant === \"disabled\";\n    const isReadOnlyVariant = variant === \"readOnly\";\n    const isReadOnly = isReadOnlyVariant || readOnly;\n\n    // Combine forwarded ref with local ref\n    const setInputRef = React.useCallback(\n      (node: HTMLInputElement | null) => {\n        localInputRef.current = node;\n        if (typeof ref === \"function\") {\n          ref(node);\n        } else if (ref) {\n          (ref as React.MutableRefObject<HTMLInputElement | null>).current =\n            node;\n        }\n      },\n      [ref]\n    );\n\n    const handleClear = React.useCallback(() => {\n      if (onClear) {\n        onClear();\n        return;\n      }\n\n      onChange?.({\n        target: { value: \"\" },\n        currentTarget: { value: \"\" },\n        type: \"change\",\n        bubbles: true,\n        cancelable: true,\n      } as React.ChangeEvent<HTMLInputElement>);\n    }, [onClear, onChange]);\n\n    return (\n      <div\n        className={cn(\n          \"flex flex-row items-center justify-between flex-1 h-fit p-1.5 rounded-08 relative w-full\",\n          wrapperClasses[variant],\n          className\n        )}\n        onClick={() => {\n          localInputRef.current?.focus();\n        }}\n      >\n        {leftSearchIcon && (\n          <div className=\"pr-2 pl-1\">\n            <div className=\"pl-[2px]\">\n              <SvgSearch className=\"w-[1rem] h-[1rem] stroke-text-02\" />\n            </div>\n          </div>\n        )}\n\n        {prefixText && (\n          <span className=\"select-none pointer-events-none text-text-02 pl-0.5\">\n            {prefixText}\n          </span>\n        )}\n\n        <input\n          ref={setInputRef}\n          type=\"text\"\n          disabled={disabled}\n          readOnly={isReadOnly}\n          value={value}\n          onChange={onChange}\n          className={cn(\n            \"w-full h-[1.5rem] bg-transparent p-0.5 focus:outline-none\",\n            innerClasses[variant],\n            textClasses[variant]\n          )}\n          {...props}\n        />\n\n        {showClearButton && !disabled && !isReadOnly && (\n          // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n          <IconButton\n            icon={SvgX}\n            disabled={disabled}\n            onClick={noProp(handleClear)}\n            type=\"button\"\n            internal\n            className={value ? \"\" : \"invisible\"}\n          />\n        )}\n\n        {rightSection}\n      </div>\n    );\n  }\n);\nInputTypeIn.displayName = \"InputTypeIn\";\n\nexport default InputTypeIn;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/ListFieldInput.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport { ListFieldInput } from \"./ListFieldInput\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof ListFieldInput> = {\n  title: \"refresh-components/inputs/ListFieldInput\",\n  component: ListFieldInput,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 400 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ListFieldInput>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [values, setValues] = React.useState<string[]>([]);\n    return (\n      <ListFieldInput\n        values={values}\n        onChange={setValues}\n        placeholder=\"Type and press Enter...\"\n      />\n    );\n  },\n};\n\nexport const WithValues: Story = {\n  render: function WithValuesStory() {\n    const [values, setValues] = React.useState([\n      \"admin@example.com\",\n      \"user@example.com\",\n      \"dev@example.com\",\n    ]);\n    return (\n      <ListFieldInput\n        values={values}\n        onChange={setValues}\n        placeholder=\"Add email...\"\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <ListFieldInput\n      values={[\"locked-item\"]}\n      onChange={() => {}}\n      placeholder=\"Cannot edit\"\n      disabled\n    />\n  ),\n};\n\nexport const ErrorState: Story = {\n  render: function ErrorStory() {\n    const [values, setValues] = React.useState([\"invalid\"]);\n    return (\n      <ListFieldInput\n        values={values}\n        onChange={setValues}\n        placeholder=\"Add value...\"\n        error\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/ListFieldInput.tsx",
    "content": "import { useState, KeyboardEvent } from \"react\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { SvgX } from \"@opal/icons\";\ninterface ListFieldInputProps {\n  values: string[];\n  onChange: (values: string[]) => void;\n  placeholder?: string;\n  disabled?: boolean;\n  error?: boolean;\n}\n\n/**\n * ListFieldInput is a component that allows the user to input a list of values by typing and pressing Enter.\n * It displays the values in a list of chips, and allows the user to add and remove values.\n\n * @param values - The array of values to display in the input field.\n * @param onChange - The function to call when the value changes.\n * @param placeholder - The placeholder text to display in the input field.\n * @param disabled - Whether the input field is disabled.\n **/\nexport function ListFieldInput({\n  values,\n  onChange,\n  placeholder = \"\",\n  disabled = false,\n  error = false,\n}: ListFieldInputProps) {\n  const [inputValue, setInputValue] = useState(\"\");\n\n  const handleKeyDown = (e: KeyboardEvent<HTMLInputElement>) => {\n    if (e.key === \"Enter\" && inputValue.trim()) {\n      e.preventDefault();\n      const trimmedValue = inputValue.trim();\n\n      // Avoid duplicates\n      if (!values.includes(trimmedValue)) {\n        onChange([...values, trimmedValue]);\n      }\n\n      setInputValue(\"\");\n    }\n  };\n\n  const removeValue = (indexToRemove: number) => {\n    onChange(values.filter((_, index) => index !== indexToRemove));\n  };\n\n  return (\n    <div className=\"flex flex-col w-full space-y-2 mb-4\">\n      <InputTypeIn\n        placeholder={placeholder}\n        value={inputValue}\n        variant={disabled ? \"disabled\" : error ? \"error\" : undefined}\n        onChange={(e) => setInputValue(e.target.value)}\n        onKeyDown={handleKeyDown}\n      />\n\n      <div className=\"mt-3\">\n        <div className=\"flex flex-wrap gap-1.5\">\n          {values.map((value, index) => (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <Button\n              key={index}\n              internal\n              secondary\n              type=\"button\"\n              aria-label={`Remove ${value}`}\n              onClick={() => removeValue(index)}\n              rightIcon={SvgX}\n              className=\"rounded h-8\"\n            >\n              {value}\n            </Button>\n          ))}\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/PasswordInputTypeIn.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport React from \"react\";\nimport PasswordInputTypeIn from \"./PasswordInputTypeIn\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof PasswordInputTypeIn> = {\n  title: \"refresh-components/inputs/PasswordInputTypeIn\",\n  component: PasswordInputTypeIn,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ width: 320 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof PasswordInputTypeIn>;\n\nexport const Default: Story = {\n  render: function DefaultStory() {\n    const [value, setValue] = React.useState(\"\");\n    return (\n      <PasswordInputTypeIn\n        placeholder=\"Enter password...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n      />\n    );\n  },\n};\n\nexport const WithValue: Story = {\n  render: function WithValueStory() {\n    const [value, setValue] = React.useState(\"supersecret123\");\n    return (\n      <PasswordInputTypeIn\n        placeholder=\"Enter password...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n      />\n    );\n  },\n};\n\nexport const NonRevealable: Story = {\n  render: function NonRevealableStory() {\n    const [value, setValue] = React.useState(\"stored-secret-value\");\n    return (\n      <PasswordInputTypeIn\n        placeholder=\"Stored secret\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        isNonRevealable\n      />\n    );\n  },\n};\n\nexport const Disabled: Story = {\n  render: () => (\n    <PasswordInputTypeIn\n      placeholder=\"Cannot edit\"\n      value=\"disabled-password\"\n      onChange={() => {}}\n      disabled\n    />\n  ),\n};\n\nexport const ErrorState: Story = {\n  render: function ErrorStory() {\n    const [value, setValue] = React.useState(\"bad\");\n    return (\n      <PasswordInputTypeIn\n        placeholder=\"Enter password...\"\n        value={value}\n        onChange={(e) => setValue(e.target.value)}\n        error\n      />\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/PasswordInputTypeIn.test.ts",
    "content": "import { computeMaskedInputChange } from \"./PasswordInputTypeIn\";\n\ndescribe(\"computeMaskedInputChange\", () => {\n  const MASK = \"∗\"; // ASTERISK OPERATOR (U+2217)\n\n  test(\"handles typing at any position\", () => {\n    // Typing \"x\" in middle of \"abcd\" -> \"abxcd\"\n    const result = computeMaskedInputChange(\n      MASK.repeat(2) + \"x\" + MASK.repeat(2),\n      \"abcd\",\n      3,\n      { start: 2, end: 2 }\n    );\n    expect(result).toEqual({ newValue: \"abxcd\", cursorPosition: 3 });\n  });\n\n  test(\"handles deletion\", () => {\n    // Delete at position 1 of \"abcd\" -> \"acd\"\n    const result = computeMaskedInputChange(MASK.repeat(3), \"abcd\", 1, {\n      start: 1,\n      end: 1,\n    });\n    expect(result).toEqual({ newValue: \"acd\", cursorPosition: 1 });\n  });\n\n  test(\"handles selection replacement\", () => {\n    // Select \"bc\" in \"abcd\", type \"xyz\" -> \"axyzd\"\n    const result = computeMaskedInputChange(MASK + \"xyz\" + MASK, \"abcd\", 4, {\n      start: 1,\n      end: 3,\n    });\n    expect(result).toEqual({ newValue: \"axyzd\", cursorPosition: 4 });\n  });\n\n  test(\"handles clearing the field\", () => {\n    const result = computeMaskedInputChange(\"\", \"password\", 0, {\n      start: 0,\n      end: 8,\n    });\n    expect(result).toEqual({ newValue: \"\", cursorPosition: 0 });\n  });\n\n  test(\"preserves mask character in user input\", () => {\n    // Pasting \"∗∗\" to replace \"bc\" in \"abcd\" -> \"a∗∗d\"\n    const result = computeMaskedInputChange(\n      MASK.repeat(4), // display shows 4 masks\n      \"abcd\",\n      3,\n      { start: 1, end: 3 }\n    );\n    expect(result).toEqual({ newValue: \"a∗∗d\", cursorPosition: 3 });\n  });\n});\n"
  },
  {
    "path": "web/src/refresh-components/inputs/PasswordInputTypeIn.tsx",
    "content": "\"use client\";\n\nimport * as React from \"react\";\nimport InputTypeIn, {\n  InputTypeInProps,\n} from \"@/refresh-components/inputs/InputTypeIn\";\nimport { Button } from \"@opal/components\";\nimport { noProp } from \"@/lib/utils\";\nimport { SvgEye, SvgEyeClosed } from \"@opal/icons\";\n\n/**\n * Custom mask character for password display.\n *\n * We use ASTERISK OPERATOR (U+2217) instead of the browser's native password\n * masking (typically bullet •) to follow our design guidelines. This requires\n * custom change handling logic to track the real value while displaying masks.\n */\nconst MASK_CHARACTER = \"∗\";\n\n// Backend placeholder pattern - indicates a stored value that can't be revealed\nconst BACKEND_PLACEHOLDER_PATTERN = /^•+$/; // All bullet characters (U+2022)\n\n/**\n * Check if a value is a backend placeholder (all bullet characters).\n * The backend sends this to indicate a stored secret exists without revealing it.\n */\nfunction isBackendPlaceholder(value: string): boolean {\n  return !!value && BACKEND_PLACEHOLDER_PATTERN.test(value);\n}\n\nexport interface SelectionRange {\n  start: number;\n  end: number;\n}\n\nexport interface MaskedInputChangeResult {\n  newValue: string;\n  cursorPosition: number;\n}\n\n/**\n * Computes the real value from a masked input change event.\n *\n * Since we display mask characters (∗) instead of the actual password,\n * we need to reverse-engineer what the user typed/deleted by comparing\n * the new display value with the previous real value and selection state.\n *\n * @param newDisplayValue - The new value from the input (mix of masks and typed chars)\n * @param previousValue - The actual password value before the change\n * @param cursorPosition - Current cursor position after the change\n * @param previousSelection - Selection range before the change occurred\n * @returns The computed real value and where to place the cursor\n */\nexport function computeMaskedInputChange(\n  newDisplayValue: string,\n  previousValue: string,\n  cursorPosition: number,\n  previousSelection: SelectionRange\n): MaskedInputChangeResult {\n  const oldLength = previousValue.length;\n  const newLength = newDisplayValue.length;\n  const hadSelection = previousSelection.end > previousSelection.start;\n\n  // Field was cleared\n  if (newLength === 0) {\n    return { newValue: \"\", cursorPosition: 0 };\n  }\n\n  // Text was selected and replaced/deleted\n  if (hadSelection) {\n    const selectionLength = previousSelection.end - previousSelection.start;\n    const insertedLength = newLength - oldLength + selectionLength;\n\n    // Extract inserted characters from their position in the display value\n    const insertedChars = newDisplayValue.slice(\n      previousSelection.start,\n      previousSelection.start + insertedLength\n    );\n\n    const newValue =\n      previousValue.slice(0, previousSelection.start) +\n      insertedChars +\n      previousValue.slice(previousSelection.end);\n\n    return {\n      newValue,\n      cursorPosition: previousSelection.start + insertedChars.length,\n    };\n  }\n\n  // Characters were added (typed or pasted) without selection\n  if (newLength > oldLength) {\n    const charsAdded = newLength - oldLength;\n    const insertPos = cursorPosition - charsAdded;\n    const addedChars = newDisplayValue.slice(insertPos, cursorPosition);\n\n    return {\n      newValue:\n        previousValue.slice(0, insertPos) +\n        addedChars +\n        previousValue.slice(insertPos),\n      cursorPosition,\n    };\n  }\n\n  // Characters were deleted without selection\n  if (newLength < oldLength) {\n    const charsDeleted = oldLength - newLength;\n    const deleteEnd = cursorPosition + charsDeleted;\n\n    return {\n      newValue:\n        previousValue.slice(0, cursorPosition) + previousValue.slice(deleteEnd),\n      cursorPosition,\n    };\n  }\n\n  // Same length without selection - no change\n  return { newValue: previousValue, cursorPosition };\n}\n\nexport interface PasswordInputTypeInProps\n  extends Omit<\n    InputTypeInProps,\n    \"type\" | \"rightSection\" | \"leftSearchIcon\" | \"variant\"\n  > {\n  /**\n   * Ref to the input element.\n   */\n  ref?: React.Ref<HTMLInputElement>;\n  /**\n   * Whether the input is disabled.\n   */\n  disabled?: boolean;\n  /**\n   * Whether the input has an error.\n   */\n  error?: boolean;\n  /**\n   * When true, the reveal toggle is disabled.\n   * Use this when displaying a stored/masked value from the backend\n   * that cannot actually be revealed.\n   * The input remains editable so users can type a new value.\n   */\n  isNonRevealable?: boolean;\n}\n\n/**\n * PasswordInputTypeIn Component\n *\n * A password input with custom mask character (∗) and reveal/hide toggle.\n * Built on top of InputTypeIn for consistency.\n *\n * Features:\n * - Custom mask character (∗) instead of browser default\n * - Show/hide toggle button only visible when input has value or is focused\n * - When revealed, the toggle icon uses action style (more prominent)\n * - When hidden, the toggle icon uses internal style (muted)\n * - Optional `isNonRevealable` prop to disable reveal (for stored backend values)\n */\nexport default function PasswordInputTypeIn({\n  ref,\n  isNonRevealable = false,\n  value,\n  onChange,\n  onFocus,\n  onBlur,\n  disabled,\n  error,\n  showClearButton = false,\n  ...props\n}: PasswordInputTypeInProps) {\n  const [isPasswordVisible, setIsPasswordVisible] = React.useState(false);\n  const [isFocused, setIsFocused] = React.useState(false);\n  const containerRef = React.useRef<HTMLDivElement>(null);\n\n  // Track selection range before changes occur\n  const selectionRef = React.useRef<{ start: number; end: number }>({\n    start: 0,\n    end: 0,\n  });\n\n  const realValue = String(value || \"\");\n  const hasValue = realValue.length > 0;\n  const effectiveNonRevealable =\n    isNonRevealable || isBackendPlaceholder(realValue);\n  const isHidden = !isPasswordVisible || effectiveNonRevealable;\n\n  const getDisplayValue = (): string => {\n    if (isHidden) {\n      return MASK_CHARACTER.repeat(realValue.length);\n    }\n    return realValue;\n  };\n\n  const handleContainerFocus = React.useCallback(() => {\n    setIsFocused(true);\n  }, []);\n\n  const handleContainerBlur = React.useCallback(\n    (e: React.FocusEvent<HTMLDivElement>) => {\n      if (containerRef.current?.contains(e.relatedTarget as Node)) {\n        return;\n      }\n      setIsFocused(false);\n    },\n    []\n  );\n\n  const handleFocus = React.useCallback(\n    (e: React.FocusEvent<HTMLInputElement>) => {\n      onFocus?.(e);\n    },\n    [onFocus]\n  );\n\n  const handleBlur = React.useCallback(\n    (e: React.FocusEvent<HTMLInputElement>) => {\n      onBlur?.(e);\n    },\n    [onBlur]\n  );\n\n  // Track selection before any change occurs (used by both onSelect and onKeyDown)\n  const captureSelection = React.useCallback(\n    (e: React.SyntheticEvent<HTMLInputElement>) => {\n      const target = e.target as HTMLInputElement;\n      selectionRef.current = {\n        start: target.selectionStart ?? 0,\n        end: target.selectionEnd ?? 0,\n      };\n    },\n    []\n  );\n\n  const handleChange = React.useCallback(\n    (e: React.ChangeEvent<HTMLInputElement>) => {\n      // When visible, pass through directly - no masking needed\n      if (!isHidden) {\n        onChange?.(e);\n        return;\n      }\n\n      const input = e.target;\n      const cursorPos = input.selectionStart ?? input.value.length;\n\n      // Compute the real value from the masked input change\n      const result = computeMaskedInputChange(\n        input.value,\n        realValue,\n        cursorPos,\n        selectionRef.current\n      );\n\n      // Restore cursor position after React re-renders with new masked value\n      requestAnimationFrame(() => {\n        if (input && document.activeElement === input) {\n          input.setSelectionRange(result.cursorPosition, result.cursorPosition);\n        }\n      });\n\n      // Create synthetic event for Formik compatibility\n      const syntheticEvent = {\n        target: { name: input.name, value: result.newValue, type: \"text\" },\n        currentTarget: {\n          name: input.name,\n          value: result.newValue,\n          type: \"text\",\n        },\n        type: \"change\",\n        persist: () => {},\n      } as unknown as React.ChangeEvent<HTMLInputElement>;\n\n      onChange?.(syntheticEvent);\n    },\n    [isHidden, realValue, onChange]\n  );\n\n  const showToggleButton = hasValue || isFocused;\n  const isRevealed = isPasswordVisible && !effectiveNonRevealable;\n  const toggleLabel = effectiveNonRevealable\n    ? \"Value cannot be revealed\"\n    : isPasswordVisible\n      ? \"Hide password\"\n      : \"Show password\";\n\n  return (\n    <div\n      ref={containerRef}\n      className=\"contents\"\n      onFocus={handleContainerFocus}\n      onBlur={handleContainerBlur}\n    >\n      <InputTypeIn\n        ref={ref}\n        value={getDisplayValue()}\n        onChange={handleChange}\n        onFocus={handleFocus}\n        onBlur={handleBlur}\n        onSelect={captureSelection}\n        onKeyDown={captureSelection}\n        variant={disabled ? \"disabled\" : error ? \"error\" : undefined}\n        showClearButton={showClearButton}\n        autoComplete=\"off\"\n        data-ph-no-capture\n        rightSection={\n          showToggleButton ? (\n            <Button\n              disabled={disabled || effectiveNonRevealable}\n              icon={isRevealed ? SvgEye : SvgEyeClosed}\n              onClick={noProp(() => setIsPasswordVisible((v) => !v))}\n              type=\"button\"\n              variant={isRevealed ? \"action\" : undefined}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              tooltipSide=\"left\"\n              tooltip={toggleLabel}\n              aria-label={toggleLabel}\n            />\n          ) : undefined\n        }\n        {...props}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/inputs/Switch.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Switch from \"./Switch\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof Switch> = {\n  title: \"refresh-components/inputs/Switch\",\n  component: Switch,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Switch>;\n\nexport const Default: Story = {\n  args: {},\n};\n\nexport const Checked: Story = {\n  args: {\n    checked: true,\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    disabled: true,\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/inputs/Switch.tsx",
    "content": "\"use client\";\n\nimport React, { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { WithoutStyles } from \"@/types\";\n\nexport interface SwitchProps\n  extends WithoutStyles<\n    Omit<React.ComponentPropsWithoutRef<\"button\">, \"onChange\">\n  > {\n  // Switch variants\n  disabled?: boolean;\n\n  checked?: boolean;\n  defaultChecked?: boolean;\n  onCheckedChange?: (checked: boolean) => void;\n}\n\nconst Switch = React.forwardRef<HTMLButtonElement, SwitchProps>(\n  (\n    {\n      disabled,\n\n      checked: controlledChecked,\n      defaultChecked,\n      onCheckedChange,\n\n      onClick,\n      ...props\n    },\n    ref\n  ) => {\n    const [uncontrolledChecked, setUncontrolledChecked] = useState(\n      defaultChecked ?? false\n    );\n\n    const isControlled = controlledChecked !== undefined;\n    const checked = isControlled ? controlledChecked : uncontrolledChecked;\n\n    function handleClick(event: React.MouseEvent<HTMLButtonElement>) {\n      if (disabled) return;\n\n      const newChecked = !checked;\n\n      if (!isControlled) setUncontrolledChecked(newChecked);\n      onClick?.(event);\n      onCheckedChange?.(newChecked);\n    }\n\n    return (\n      <button\n        ref={ref}\n        type=\"button\"\n        role=\"switch\"\n        aria-checked={checked}\n        className={cn(\n          \"peer inline-flex h-[1.125rem] w-[2rem] shrink-0 cursor-pointer items-center rounded-full transition-colors focus-visible:outline-none\",\n          disabled\n            ? checked\n              ? \"switch-disabled-checked\"\n              : \"switch-disabled\"\n            : checked\n              ? \"switch-normal-checked\"\n              : \"switch-normal\"\n        )}\n        disabled={disabled}\n        onClick={handleClick}\n        {...props}\n      >\n        <span\n          className={cn(\n            \"pointer-events-none block h-[0.875rem] w-[0.875rem] rounded-full ring-0 transition-transform\",\n            checked ? \"translate-x-[15px]\" : \"translate-x-[1px]\",\n            disabled ? \"switch-thumb-disabled\" : \"switch-thumb\"\n          )}\n        />\n      </button>\n    );\n  }\n);\nSwitch.displayName = \"Switch\";\n\nexport default Switch;\n"
  },
  {
    "path": "web/src/refresh-components/inputs/styles.ts",
    "content": "export type Variants =\n  | \"primary\"\n  | \"internal\"\n  | \"error\"\n  | \"disabled\"\n  | \"readOnly\";\n\ntype ClassNamesMap = Record<Variants, string | null>;\n\nexport const MIN_WIDTH_CLASS = \"min-w-[14rem]\";\n\nexport const wrapperClasses: ClassNamesMap = {\n  primary: \"input-normal\",\n  internal: null,\n  error: \"input-error\",\n  disabled: \"input-disabled\",\n  readOnly: \"bg-transparent border rounded-08\",\n} as const;\n\nexport const innerClasses: ClassNamesMap = {\n  primary:\n    \"text-text-04 placeholder:!font-secondary-body placeholder:text-text-02\",\n  internal: null,\n  error: null,\n  disabled: \"text-text-02\",\n  readOnly: null,\n} as const;\n\nexport const iconClasses: ClassNamesMap = {\n  primary: \"stroke-text-03\",\n  internal: \"stroke-text-03\",\n  error: \"stroke-text-03\",\n  disabled: \"stroke-text-01\",\n  readOnly: \"stroke-text-01\",\n} as const;\n\nexport const textClasses: ClassNamesMap = {\n  primary: \"text-text-04\",\n  internal: \"text-text-04\",\n  error: \"text-text-04\",\n  disabled: \"text-text-01\",\n  readOnly: \"text-text-01\",\n} as const;\n"
  },
  {
    "path": "web/src/refresh-components/layouts/ConfirmationModalLayout.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { useState } from \"react\";\nimport ConfirmationModalLayout from \"./ConfirmationModalLayout\";\nimport { SvgAlertTriangle, SvgTrash, SvgCheckCircle } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\n\nconst meta: Meta<typeof ConfirmationModalLayout> = {\n  title: \"refresh-components/modals/ConfirmationModalLayout\",\n  component: ConfirmationModalLayout,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"fullscreen\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ConfirmationModalLayout>;\n\n/**\n * NOTE: ConfirmationModalLayout calls `useModalClose` internally, which reads\n * from ModalContext. Outside of that context, it falls back to the `onClose`\n * prop, so these stories work without wrapping in a ModalContext provider.\n */\n\nexport const DeleteConfirmation: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Modal</button>\n        {open && (\n          <ConfirmationModalLayout\n            icon={SvgTrash}\n            title=\"Delete Item\"\n            description=\"Are you sure you want to delete this item? This action cannot be undone.\"\n            submit={\n              <Button variant=\"danger\" onClick={() => setOpen(false)}>\n                Delete\n              </Button>\n            }\n            onClose={() => setOpen(false)}\n          />\n        )}\n      </>\n    );\n  },\n};\n\nexport const WarningConfirmation: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Modal</button>\n        {open && (\n          <ConfirmationModalLayout\n            icon={SvgAlertTriangle}\n            title=\"Proceed with Caution\"\n            description=\"This operation will affect all users in the organization.\"\n            submit={<Button onClick={() => setOpen(false)}>Confirm</Button>}\n            onClose={() => setOpen(false)}\n          />\n        )}\n      </>\n    );\n  },\n};\n\nexport const WithChildren: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Modal</button>\n        {open && (\n          <ConfirmationModalLayout\n            icon={SvgCheckCircle}\n            title=\"Review Changes\"\n            description=\"Please review the following changes before confirming.\"\n            submit={<Button onClick={() => setOpen(false)}>Approve</Button>}\n            onClose={() => setOpen(false)}\n          >\n            <ul style={{ listStyle: \"disc\", paddingLeft: 20 }}>\n              <li>Updated email notification settings</li>\n              <li>Changed default connector timeout to 30s</li>\n              <li>Enabled automatic document syncing</li>\n            </ul>\n          </ConfirmationModalLayout>\n        )}\n      </>\n    );\n  },\n};\n\nexport const HiddenCancel: Story = {\n  render: () => {\n    const [open, setOpen] = useState(true);\n    return (\n      <>\n        <button onClick={() => setOpen(true)}>Open Modal</button>\n        {open && (\n          <ConfirmationModalLayout\n            icon={SvgCheckCircle}\n            title=\"Welcome!\"\n            description=\"Thanks for signing up. Let's get you started.\"\n            hideCancel\n            submit={<Button onClick={() => setOpen(false)}>Get Started</Button>}\n            onClose={() => setOpen(false)}\n          />\n        )}\n      </>\n    );\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/layouts/ConfirmationModalLayout.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport type { IconProps } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { useModalClose } from \"../contexts/ModalContext\";\n\nexport interface ConfirmationModalProps {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  description?: string;\n  children?: React.ReactNode;\n\n  submit: React.ReactNode;\n  hideCancel?: boolean;\n  onClose?: () => void;\n  /** If false, removes the gray background from the body. Defaults to true. */\n  twoTone?: boolean;\n}\n\nexport default function ConfirmationModalLayout({\n  icon,\n  title,\n  description,\n  children,\n\n  submit,\n  hideCancel,\n  onClose: externalOnClose,\n  twoTone = true,\n}: ConfirmationModalProps) {\n  const onClose = useModalClose(externalOnClose);\n\n  return (\n    <Modal open onOpenChange={(isOpen) => !isOpen && onClose?.()}>\n      <Modal.Content width=\"sm\">\n        <Modal.Header\n          icon={icon}\n          title={title}\n          description={description}\n          onClose={onClose}\n        />\n        <Modal.Body twoTone={twoTone}>\n          {typeof children === \"string\" ? (\n            <Text as=\"p\" text03>\n              {children}\n            </Text>\n          ) : (\n            children\n          )}\n        </Modal.Body>\n        <Modal.Footer>\n          {!hideCancel && (\n            <Button prominence=\"secondary\" onClick={onClose}>\n              Cancel\n            </Button>\n          )}\n          {submit}\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/loaders/SimpleLoader.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SimpleLoader from \"./SimpleLoader\";\n\nconst meta: Meta<typeof SimpleLoader> = {\n  title: \"refresh-components/loaders/SimpleLoader\",\n  component: SimpleLoader,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SimpleLoader>;\n\nexport const Default: Story = {\n  args: {},\n};\n\nexport const Large: Story = {\n  args: {\n    className: \"h-8 w-8\",\n  },\n};\n\nexport const CustomColor: Story = {\n  args: {\n    className: \"h-6 w-6 stroke-text-05\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/loaders/SimpleLoader.tsx",
    "content": "import type { IconProps } from \"@opal/types\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgLoader } from \"@opal/icons\";\n\nexport default function SimpleLoader({ className, ...props }: IconProps) {\n  return (\n    <SvgLoader\n      className={cn(\"h-[1rem] w-[1rem] animate-spin\", className)}\n      {...props}\n    />\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/messages/FieldMessage.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport { FieldMessage } from \"./FieldMessage\";\n\nconst meta: Meta<typeof FieldMessage> = {\n  title: \"refresh-components/messages/FieldMessage\",\n  component: FieldMessage,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FieldMessage>;\n\nexport const Error: Story = {\n  args: {\n    variant: \"error\",\n    children: (\n      <FieldMessage.Content>This field is required.</FieldMessage.Content>\n    ),\n  },\n};\n\nexport const Success: Story = {\n  args: {\n    variant: \"success\",\n    children: (\n      <FieldMessage.Content>Username is available!</FieldMessage.Content>\n    ),\n  },\n};\n\nexport const Warning: Story = {\n  args: {\n    variant: \"warning\",\n    children: (\n      <FieldMessage.Content>This action cannot be undone.</FieldMessage.Content>\n    ),\n  },\n};\n\nexport const Loading: Story = {\n  args: {\n    variant: \"loading\",\n    children: (\n      <FieldMessage.Content>Checking availability...</FieldMessage.Content>\n    ),\n  },\n};\n\nexport const Info: Story = {\n  args: {\n    variant: \"info\",\n    children: (\n      <FieldMessage.Content>\n        Passwords must be at least 8 characters.\n      </FieldMessage.Content>\n    ),\n  },\n};\n\nexport const Idle: Story = {\n  args: {\n    variant: \"idle\",\n    children: (\n      <FieldMessage.Content>Enter your email address.</FieldMessage.Content>\n    ),\n  },\n};\n\nexport const AllVariants: Story = {\n  name: \"All Variants\",\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 12 }}>\n      <FieldMessage variant=\"error\">\n        <FieldMessage.Content>Error message</FieldMessage.Content>\n      </FieldMessage>\n      <FieldMessage variant=\"success\">\n        <FieldMessage.Content>Success message</FieldMessage.Content>\n      </FieldMessage>\n      <FieldMessage variant=\"warning\">\n        <FieldMessage.Content>Warning message</FieldMessage.Content>\n      </FieldMessage>\n      <FieldMessage variant=\"loading\">\n        <FieldMessage.Content>Loading message</FieldMessage.Content>\n      </FieldMessage>\n      <FieldMessage variant=\"info\">\n        <FieldMessage.Content>Info message</FieldMessage.Content>\n      </FieldMessage>\n      <FieldMessage variant=\"idle\">\n        <FieldMessage.Content>Idle message</FieldMessage.Content>\n      </FieldMessage>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/messages/FieldMessage.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport React from \"react\";\nimport Text from \"../texts/Text\";\nimport {\n  SvgAlertCircle,\n  SvgCheckCircle,\n  SvgLoader,\n  SvgXOctagon,\n} from \"@opal/icons\";\ntype MessageVariant =\n  | \"error\"\n  | \"success\"\n  | \"loading\"\n  | \"warning\"\n  | \"info\"\n  | \"idle\";\n\nconst iconMap: Record<MessageVariant, React.ReactNode> = {\n  error: <SvgXOctagon className=\"h-3 w-3 stroke-status-error-05\" />,\n  success: <SvgCheckCircle className=\"h-3 w-3 stroke-status-success-05\" />,\n  loading: <SvgLoader className=\"h-3 w-3 stroke-text-02 animate-spin\" />,\n  warning: <SvgAlertCircle className=\"h-3 w-3 stroke-status-warning-05\" />,\n  info: <SvgAlertCircle className=\"h-3 w-3 stroke-text-03\" />,\n  idle: null,\n};\n\ninterface FieldMessageRootProps extends React.HTMLAttributes<HTMLDivElement> {\n  variant: MessageVariant;\n  children: React.ReactNode;\n}\n\nconst FieldMessageRoot: React.FC<FieldMessageRootProps> = ({\n  variant,\n  className,\n  children,\n  ...props\n}) => {\n  const icon = iconMap[variant];\n\n  return (\n    <div\n      className={cn(\"flex flex-row items-center gap-x-0.5\", className)}\n      {...props}\n    >\n      {icon !== null && (\n        <div className=\"w-4 h-4 flex items-center justify-center\">{icon}</div>\n      )}\n      {children}\n    </div>\n  );\n};\n\ninterface FieldMessageContentProps\n  extends React.HTMLAttributes<HTMLParagraphElement> {\n  children: React.ReactNode;\n}\n\nconst FieldMessageContent: React.FC<FieldMessageContentProps> = ({\n  className,\n  children,\n  ...props\n}) => {\n  return (\n    <Text\n      as=\"p\"\n      text03\n      secondaryBody\n      className={cn(\"ml-0.5\", className)}\n      {...props}\n    >\n      {children}\n    </Text>\n  );\n};\n\nexport const FieldMessage = Object.assign(FieldMessageRoot, {\n  Content: FieldMessageContent,\n});\n"
  },
  {
    "path": "web/src/refresh-components/messages/InfoBlock.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport InfoBlock from \"./InfoBlock\";\nimport { SvgAlertCircle, SvgCheckCircle, SvgSettings } from \"@opal/icons\";\n\nconst meta: Meta<typeof InfoBlock> = {\n  title: \"refresh-components/messages/InfoBlock\",\n  component: InfoBlock,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof InfoBlock>;\n\nexport const Default: Story = {\n  args: {\n    icon: SvgAlertCircle,\n    title: \"Important Notice\",\n    description: \"This is a description providing additional context.\",\n  },\n};\n\nexport const TitleOnly: Story = {\n  args: {\n    icon: SvgCheckCircle,\n    title: \"All systems operational\",\n  },\n};\n\nexport const WithCustomIcon: Story = {\n  args: {\n    icon: SvgSettings,\n    title: \"Configuration Required\",\n    description: \"Please update your settings before continuing.\",\n  },\n};\n\nexport const LongContent: Story = {\n  args: {\n    icon: SvgAlertCircle,\n    title:\n      \"This is a very long title that should get truncated when it exceeds the available width\",\n    description:\n      \"And this is a very long description that provides detailed context about the situation at hand and should also truncate gracefully.\",\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/messages/InfoBlock.tsx",
    "content": "\"use client\";\n\nimport React, { memo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\n\nexport interface InfoBlockProps extends React.HTMLAttributes<HTMLDivElement> {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  description?: string;\n  iconClassName?: string;\n}\n\nconst InfoBlockInner = React.forwardRef<HTMLDivElement, InfoBlockProps>(\n  (\n    { icon: Icon, title, description, iconClassName, className, ...props },\n    ref\n  ) => {\n    return (\n      <div\n        ref={ref}\n        className={cn(\"flex flex-row items-start gap-1\", className)}\n        {...props}\n      >\n        {/* Icon Container */}\n        <div className=\"flex items-center justify-center p-0.5 size-5 shrink-0\">\n          <Icon className={cn(\"size-4 stroke-text-02\", iconClassName)} />\n        </div>\n\n        {/* Text Content */}\n        <div className=\"flex flex-col flex-1 items-start min-w-0\">\n          <Truncated mainUiAction text04>\n            {title}\n          </Truncated>\n          {description && (\n            <Truncated secondaryBody text03>\n              {description}\n            </Truncated>\n          )}\n        </div>\n      </div>\n    );\n  }\n);\nconst InfoBlock = memo(InfoBlockInner);\nInfoBlock.displayName = \"InfoBlock\";\n\nexport default InfoBlock;\n"
  },
  {
    "path": "web/src/refresh-components/messages/Message.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Message from \"./Message\";\n\nconst meta: Meta<typeof Message> = {\n  title: \"refresh-components/messages/Message\",\n  component: Message,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Message>;\n\nexport const Default: Story = {\n  args: {\n    text: \"This is a default message.\",\n  },\n};\n\nexport const FlashInfo: Story = {\n  args: {\n    flash: true,\n    info: true,\n    text: \"Your changes have been saved.\",\n    description: \"The settings will take effect immediately.\",\n  },\n};\n\nexport const FlashSuccess: Story = {\n  args: {\n    flash: true,\n    success: true,\n    text: \"Operation completed successfully!\",\n  },\n};\n\nexport const FlashWarning: Story = {\n  args: {\n    flash: true,\n    warning: true,\n    text: \"Your session is about to expire.\",\n    description: \"Please save your work before the session ends.\",\n  },\n};\n\nexport const FlashError: Story = {\n  args: {\n    flash: true,\n    error: true,\n    text: \"Something went wrong.\",\n    description: \"Please try again or contact support.\",\n  },\n};\n\nexport const StaticInfo: Story = {\n  args: {\n    static: true,\n    info: true,\n    text: \"This is informational.\",\n    description: \"Here is some extra context.\",\n  },\n};\n\nexport const StaticSuccess: Story = {\n  args: {\n    static: true,\n    success: true,\n    text: \"All checks passed.\",\n  },\n};\n\nexport const StaticWarning: Story = {\n  args: {\n    static: true,\n    warning: true,\n    text: \"Proceed with caution.\",\n  },\n};\n\nexport const StaticError: Story = {\n  args: {\n    static: true,\n    error: true,\n    text: \"Failed to load resource.\",\n  },\n};\n\nexport const MediumSize: Story = {\n  args: {\n    flash: true,\n    info: true,\n    medium: true,\n    text: \"Medium sized message.\",\n    description: \"Compact layout for tight spaces.\",\n  },\n};\n\nexport const WithAction: Story = {\n  args: {\n    flash: true,\n    warning: true,\n    text: \"Unsaved changes detected.\",\n    actions: \"Undo\",\n    onAction: () => alert(\"Action clicked\"),\n  },\n};\n\nexport const WithoutIcon: Story = {\n  args: {\n    flash: true,\n    info: true,\n    icon: false,\n    text: \"Message without an icon.\",\n  },\n};\n\nexport const WithoutCloseButton: Story = {\n  args: {\n    flash: true,\n    success: true,\n    close: false,\n    text: \"This message cannot be dismissed.\",\n  },\n};\n\nexport const AllLevels: Story = {\n  name: \"All Levels (Flash / Large)\",\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 16 }}>\n      <Message flash default text=\"Default flash message\" />\n      <Message flash info text=\"Info flash message\" />\n      <Message flash success text=\"Success flash message\" />\n      <Message flash warning text=\"Warning flash message\" />\n      <Message flash error text=\"Error flash message\" />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/messages/Message.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgAlertCircle,\n  SvgAlertTriangle,\n  SvgCheckCircle,\n  SvgX,\n  SvgXOctagon,\n} from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\n\nconst containerClasses = {\n  flash: {\n    default: {\n      large: [\n        \"bg-background-neutral-00\",\n        \"shadow-02\",\n        \"rounded-16\",\n        \"w-[40rem]\",\n      ],\n      medium: [\n        \"bg-background-neutral-00\",\n        \"shadow-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    info: {\n      large: [\n        \"bg-status-info-00\",\n        \"border\",\n        \"border-status-info-05\",\n        \"rounded-16\",\n        \"w-[40rem]\",\n      ],\n      medium: [\n        \"bg-status-info-00\",\n        \"border\",\n        \"border-status-info-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    success: {\n      large: [\n        \"bg-status-success-00\",\n        \"border\",\n        \"border-status-success-05\",\n        \"rounded-16\",\n        \"w-[40rem]\",\n      ],\n      medium: [\n        \"bg-status-success-00\",\n        \"border\",\n        \"border-status-success-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    warning: {\n      large: [\n        \"bg-status-warning-00\",\n        \"border\",\n        \"border-status-warning-05\",\n        \"rounded-16\",\n        \"w-[40rem]\",\n      ],\n      medium: [\n        \"bg-status-warning-00\",\n        \"border\",\n        \"border-status-warning-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    error: {\n      large: [\n        \"bg-status-error-00\",\n        \"border\",\n        \"border-status-error-05\",\n        \"rounded-16\",\n        \"w-[40rem]\",\n      ],\n      medium: [\n        \"bg-status-error-00\",\n        \"border\",\n        \"border-status-error-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n  },\n  static: {\n    default: {\n      large: [\n        \"bg-background-tint-01\",\n        \"border\",\n        \"border-border-01\",\n        \"rounded-16\",\n        \"w-[19.375rem]\",\n      ],\n      medium: [\n        \"bg-background-tint-01\",\n        \"border\",\n        \"border-border-01\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    info: {\n      large: [\n        \"bg-status-info-00\",\n        \"border\",\n        \"border-status-info-02\",\n        \"rounded-16\",\n        \"w-[19.375rem]\",\n      ],\n      medium: [\n        \"bg-status-info-00\",\n        \"border\",\n        \"border-status-info-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    success: {\n      large: [\n        \"bg-status-success-00\",\n        \"border\",\n        \"border-status-success-02\",\n        \"rounded-16\",\n        \"w-[19.375rem]\",\n      ],\n      medium: [\n        \"bg-status-success-00\",\n        \"border\",\n        \"border-status-success-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    warning: {\n      large: [\n        \"bg-status-warning-00\",\n        \"border\",\n        \"border-status-warning-02\",\n        \"rounded-16\",\n        \"w-[19.375rem]\",\n      ],\n      medium: [\n        \"bg-status-warning-00\",\n        \"border\",\n        \"border-status-warning-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n    error: {\n      large: [\n        \"bg-status-error-00\",\n        \"border\",\n        \"border-status-error-02\",\n        \"rounded-16\",\n        \"w-[19.375rem]\",\n      ],\n      medium: [\n        \"bg-status-error-00\",\n        \"border\",\n        \"border-status-error-02\",\n        \"rounded-12\",\n        \"w-[19.375rem]\",\n      ],\n    },\n  },\n} as const;\n\nconst iconClasses = {\n  default: \"stroke-text-03\",\n  info: \"stroke-status-info-05\",\n  success: \"stroke-status-success-05\",\n  warning: \"stroke-status-warning-05\",\n  error: \"stroke-status-error-05\",\n} as const;\n\nconst textClasses = {\n  flash: {\n    text: \"font-main-ui-action text-text-04\",\n    description: \"font-secondary-body text-text-02\",\n  },\n  static: {\n    text: \"font-main-ui-body text-text-04\",\n    description: \"font-secondary-body text-text-02\",\n  },\n} as const;\n\nexport interface MessageProps extends React.HTMLAttributes<HTMLDivElement> {\n  // Type variants:\n  flash?: boolean;\n  static?: boolean;\n\n  // Level variants:\n  default?: boolean;\n  info?: boolean;\n  success?: boolean;\n  warning?: boolean;\n  error?: boolean;\n\n  // Size variants:\n  large?: boolean;\n  medium?: boolean;\n\n  // Content:\n  text: string;\n  description?: string;\n\n  // Features:\n  icon?: boolean;\n  iconComponent?: IconFunctionComponent;\n  actions?: boolean | string;\n  close?: boolean;\n\n  // Action button customization:\n  actionIcon?: IconFunctionComponent;\n  actionPrimary?: boolean;\n\n  // Callbacks:\n  onClose?: () => void;\n  onAction?: () => void;\n}\n\nfunction MessageInner(\n  {\n    flash,\n    static: staticProp,\n\n    default: defaultProp,\n    info,\n    success,\n    warning,\n    error,\n\n    large,\n    medium,\n\n    text,\n    description,\n\n    icon = true,\n    iconComponent,\n    actions,\n    close = true,\n\n    actionIcon,\n    actionPrimary,\n\n    onClose,\n    onAction,\n\n    className,\n    ...props\n  }: MessageProps,\n  ref: React.ForwardedRef<HTMLDivElement>\n) {\n  const type = flash ? \"flash\" : staticProp ? \"static\" : \"flash\";\n  const level = info\n    ? \"info\"\n    : success\n      ? \"success\"\n      : warning\n        ? \"warning\"\n        : error\n          ? \"error\"\n          : defaultProp\n            ? \"default\"\n            : \"default\";\n  const size = large ? \"large\" : medium ? \"medium\" : \"large\";\n\n  const containerClass = useMemo(\n    () => containerClasses[type][level][size],\n    [type, level, size]\n  );\n\n  const iconClass = useMemo(() => iconClasses[level], [level]);\n\n  const textClass = useMemo(() => textClasses[type].text, [type]);\n  const descriptionClass = useMemo(() => textClasses[type].description, [type]);\n\n  const IconComponent = iconComponent\n    ? iconComponent\n    : level === \"success\"\n      ? SvgCheckCircle\n      : level === \"warning\"\n        ? SvgAlertTriangle\n        : level === \"error\"\n          ? SvgXOctagon\n          : SvgAlertCircle;\n\n  const contentPadding = size === \"large\" ? \"p-2\" : \"p-1\";\n  const closeButtonSize =\n    size === \"large\" ? \"size-[2.25rem]\" : \"size-[1.75rem]\";\n\n  return (\n    <div\n      ref={ref}\n      className={cn(\n        \"flex flex-row items-start gap-1 p-1\",\n        containerClass,\n        className\n      )}\n      {...props}\n    >\n      {/* Content Container */}\n      <div\n        className={cn(\n          \"flex flex-1 flex-row items-start gap-1 min-w-0\",\n          contentPadding\n        )}\n      >\n        {/* Icon Container */}\n        {icon && (\n          <div className=\"flex items-center justify-center p-0.5 size-[1.25rem] shrink-0\">\n            <IconComponent className={cn(\"size-[1rem]\", iconClass)} />\n          </div>\n        )}\n\n        {/* Text Content */}\n        <div className=\"flex flex-col flex-1 items-start min-w-0 px-0.5\">\n          <Text as=\"p\" className={cn(\"w-full\", textClass)}>\n            {text}\n          </Text>\n          {description && (\n            <Text as=\"p\" className={cn(\"w-full\", descriptionClass)}>\n              {description}\n            </Text>\n          )}\n        </div>\n      </div>\n\n      {/* Actions */}\n      {actions && (\n        <div className=\"flex items-center justify-end shrink-0 self-center pr-2\">\n          <Button\n            prominence={actionPrimary ? \"primary\" : \"secondary\"}\n            icon={actionIcon}\n            onClick={onAction}\n            size={size === \"large\" ? \"lg\" : \"md\"}\n          >\n            {typeof actions === \"string\" ? actions : \"Cancel\"}\n          </Button>\n        </div>\n      )}\n\n      {/* Close Container */}\n      {close && (\n        <div className=\"flex items-center justify-center shrink-0\">\n          <div className={cn(\"flex items-start\", closeButtonSize)}>\n            <Button\n              prominence=\"internal\"\n              icon={SvgX}\n              onClick={onClose}\n              aria-label=\"Close\"\n              size={size === \"large\" ? \"lg\" : \"sm\"}\n            />\n          </div>\n        </div>\n      )}\n    </div>\n  );\n}\n\nconst Message = React.forwardRef<HTMLDivElement, MessageProps>(MessageInner);\nMessage.displayName = \"Message\";\n\nexport default Message;\n"
  },
  {
    "path": "web/src/refresh-components/modals/MemoriesModal.tsx",
    "content": "\"use client\";\n\nimport { Fragment, useState, useRef, useEffect, useCallback } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport CharacterCount from \"@/refresh-components/CharacterCount\";\nimport Separator from \"@/refresh-components/Separator\";\nimport TextSeparator from \"@/refresh-components/TextSeparator\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useModalClose } from \"@/refresh-components/contexts/ModalContext\";\nimport { SvgAddLines, SvgMinusCircle, SvgPlusCircle } from \"@opal/icons\";\nimport {\n  useMemoryManager,\n  MAX_MEMORY_LENGTH,\n  MAX_MEMORY_COUNT,\n  LocalMemory,\n} from \"@/hooks/useMemoryManager\";\nimport { cn } from \"@/lib/utils\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport useUserPersonalization from \"@/hooks/useUserPersonalization\";\nimport type { MemoryItem } from \"@/lib/types\";\n\ninterface MemoryItemProps {\n  memory: LocalMemory;\n  originalIndex: number;\n  onUpdate: (index: number, value: string) => void;\n  onBlur: (index: number) => void;\n  onRemove: (index: number) => void;\n  shouldFocus?: boolean;\n  onFocused?: () => void;\n  shouldHighlight?: boolean;\n  onHighlighted?: () => void;\n}\n\nfunction MemoryItem({\n  memory,\n  originalIndex,\n  onUpdate,\n  onBlur,\n  onRemove,\n  shouldFocus,\n  onFocused,\n  shouldHighlight,\n  onHighlighted,\n}: MemoryItemProps) {\n  const [isFocused, setIsFocused] = useState(false);\n  const [isHighlighting, setIsHighlighting] = useState(false);\n  const textareaRef = useRef<HTMLTextAreaElement>(null);\n  const wrapperRef = useRef<HTMLDivElement>(null);\n\n  useEffect(() => {\n    if (shouldFocus && textareaRef.current) {\n      const el = textareaRef.current;\n      el.focus();\n      el.selectionStart = el.selectionEnd = el.value.length;\n      onFocused?.();\n    }\n  }, [shouldFocus, onFocused]);\n\n  useEffect(() => {\n    if (!shouldHighlight) return;\n\n    wrapperRef.current?.scrollIntoView({\n      block: \"start\",\n      behavior: \"smooth\",\n    });\n    setIsHighlighting(true);\n\n    const timer = setTimeout(() => {\n      setIsHighlighting(false);\n      onHighlighted?.();\n    }, 1000);\n\n    return () => clearTimeout(timer);\n  }, [shouldHighlight, onHighlighted]);\n\n  return (\n    <div\n      ref={wrapperRef}\n      className={cn(\n        \"rounded-08 w-full p-0.5 border border-transparent\",\n        \"transition-colors \",\n        isHighlighting &&\n          \"bg-action-link-01 hover:bg-action-link-01 border-action-link-05 duration-700\"\n      )}\n    >\n      <Section gap={0.25} alignItems=\"start\">\n        <Section flexDirection=\"row\" alignItems=\"start\" gap={0.5}>\n          <InputTextArea\n            ref={textareaRef}\n            placeholder=\"Type or paste in a personal note or memory\"\n            value={memory.content}\n            onChange={(e) => onUpdate(originalIndex, e.target.value)}\n            onFocus={() => setIsFocused(true)}\n            onBlur={() => {\n              setIsFocused(false);\n              void onBlur(originalIndex);\n            }}\n            onKeyDown={(e) => {\n              if (\n                e.key === \"Enter\" &&\n                !e.shiftKey &&\n                !e.nativeEvent.isComposing\n              ) {\n                e.preventDefault();\n                textareaRef.current?.blur();\n              }\n            }}\n            rows={1}\n            autoResize\n            maxRows={3}\n            maxLength={MAX_MEMORY_LENGTH}\n            resizable={false}\n            className=\"bg-background-tint-01 hover:bg-background-tint-00 focus-within:bg-background-tint-00\"\n          />\n          <Button\n            disabled={!memory.content.trim() && memory.isNew}\n            prominence=\"tertiary\"\n            icon={SvgMinusCircle}\n            onClick={() => void onRemove(originalIndex)}\n            aria-label=\"Remove Line\"\n            tooltip=\"Remove Line\"\n          />\n        </Section>\n        <div\n          className={isFocused ? \"visible\" : \"invisible h-0 overflow-hidden\"}\n        >\n          <CharacterCount value={memory.content} limit={MAX_MEMORY_LENGTH} />\n        </div>\n      </Section>\n    </div>\n  );\n}\n\nfunction resolveTargetMemoryId(\n  targetMemoryId: number | null | undefined,\n  targetIndex: number | null | undefined,\n  memories: MemoryItem[]\n): number | null {\n  if (targetMemoryId != null) return targetMemoryId;\n\n  if (targetIndex != null && memories.length > 0) {\n    // Backend index is ASC (oldest-first), frontend displays DESC (newest-first)\n    const descIdx = memories.length - 1 - targetIndex;\n    return memories[descIdx]?.id ?? null;\n  }\n\n  return null;\n}\n\ninterface MemoriesModalProps {\n  memories?: MemoryItem[];\n  onSaveMemories?: (memories: MemoryItem[]) => Promise<boolean>;\n  onClose?: () => void;\n  initialTargetMemoryId?: number | null;\n  initialTargetIndex?: number | null;\n  highlightOnOpen?: boolean;\n  focusNewLine?: boolean;\n}\n\nexport default function MemoriesModal({\n  memories: memoriesProp,\n  onSaveMemories: onSaveMemoriesProp,\n  onClose,\n  initialTargetMemoryId,\n  initialTargetIndex,\n  highlightOnOpen = false,\n  focusNewLine = false,\n}: MemoriesModalProps) {\n  const close = useModalClose(onClose);\n  const [focusMemoryId, setFocusMemoryId] = useState<number | null>(null);\n\n  // Self-fetching: when no props provided, fetch from UserProvider\n  const { user, refreshUser, updateUserPersonalization } = useUser();\n  const { handleSavePersonalization } = useUserPersonalization(\n    user,\n    updateUserPersonalization,\n    {\n      onSuccess: () => toast.success(\"Preferences saved\"),\n      onError: () => toast.error(\"Failed to save preferences\"),\n    }\n  );\n\n  useEffect(() => {\n    if (memoriesProp === undefined) {\n      void refreshUser();\n    }\n    // Only run on mount\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, []);\n\n  const internalSaveMemories = useCallback(\n    async (newMemories: MemoryItem[]): Promise<boolean> => {\n      const result = await handleSavePersonalization(\n        { memories: newMemories },\n        true\n      );\n      return !!result;\n    },\n    [handleSavePersonalization]\n  );\n\n  const effectiveMemories =\n    memoriesProp ?? user?.personalization?.memories ?? [];\n  const effectiveSave = onSaveMemoriesProp ?? internalSaveMemories;\n\n  // Drives scroll-into-view + highlight when opening from a FileTile click\n  const [highlightMemoryId, setHighlightMemoryId] = useState<number | null>(\n    null\n  );\n\n  useEffect(() => {\n    const targetId = resolveTargetMemoryId(\n      initialTargetMemoryId,\n      initialTargetIndex,\n      effectiveMemories\n    );\n    if (targetId == null) return;\n\n    setFocusMemoryId(targetId);\n    if (highlightOnOpen) {\n      setHighlightMemoryId(targetId);\n    }\n  }, [initialTargetMemoryId, initialTargetIndex]);\n\n  const {\n    searchQuery,\n    setSearchQuery,\n    filteredMemories,\n    totalLineCount,\n    canAddMemory,\n    handleAddMemory,\n    handleUpdateMemory,\n    handleRemoveMemory,\n    handleBlurMemory,\n  } = useMemoryManager({\n    memories: effectiveMemories,\n    onSaveMemories: effectiveSave,\n    onNotify: (message, type) => toast[type](message),\n  });\n\n  // Always start with an empty card; optionally focus it (View/Add button)\n  const hasAddedEmptyRef = useRef(false);\n  useEffect(() => {\n    if (hasAddedEmptyRef.current) return;\n    hasAddedEmptyRef.current = true;\n\n    const id = handleAddMemory();\n    if (id !== null && focusNewLine) {\n      setFocusMemoryId(id);\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, []);\n\n  const onAddLine = () => {\n    const id = handleAddMemory();\n    if (id !== null) {\n      setFocusMemoryId(id);\n    }\n  };\n\n  return (\n    <Modal open onOpenChange={(open) => !open && close?.()}>\n      <Modal.Content width=\"sm\" height=\"lg\" position=\"top\">\n        <Modal.Header\n          icon={SvgAddLines}\n          title=\"Memory\"\n          description=\"Let Onyx reference these stored notes and memories in chats.\"\n          onClose={close}\n        >\n          <Section flexDirection=\"row\" gap={0.5}>\n            <InputTypeIn\n              placeholder=\"Search...\"\n              value={searchQuery}\n              onChange={(e) => setSearchQuery(e.target.value)}\n              leftSearchIcon\n              showClearButton={false}\n              className=\"w-full !bg-transparent !border-transparent [&:is(:hover,:active,:focus,:focus-within)]:!bg-background-neutral-00 [&:is(:hover)]:!border-border-01 [&:is(:focus,:focus-within)]:!shadow-none\"\n            />\n            <Button\n              disabled={!canAddMemory}\n              prominence=\"tertiary\"\n              onClick={onAddLine}\n              rightIcon={SvgPlusCircle}\n              title={\n                !canAddMemory\n                  ? `Maximum of ${MAX_MEMORY_COUNT} memories reached`\n                  : undefined\n              }\n            >\n              Add Line\n            </Button>\n          </Section>\n        </Modal.Header>\n\n        <Modal.Body padding={0.5}>\n          {filteredMemories.length === 0 ? (\n            <Section alignItems=\"center\" padding={2}>\n              <Text secondaryBody text03>\n                {searchQuery.trim()\n                  ? \"No memories match your search.\"\n                  : 'No memories yet. Click \"Add Line\" to get started.'}\n              </Text>\n            </Section>\n          ) : (\n            <Section gap={0.5}>\n              {filteredMemories.map(({ memory, originalIndex }) => (\n                <Fragment key={memory.id}>\n                  <MemoryItem\n                    memory={memory}\n                    originalIndex={originalIndex}\n                    onUpdate={handleUpdateMemory}\n                    onBlur={handleBlurMemory}\n                    onRemove={handleRemoveMemory}\n                    shouldFocus={memory.id === focusMemoryId}\n                    onFocused={() => setFocusMemoryId(null)}\n                    shouldHighlight={memory.id === highlightMemoryId}\n                    onHighlighted={() => {\n                      setHighlightMemoryId(null);\n                    }}\n                  />\n                  {memory.isNew && <Separator noPadding />}\n                </Fragment>\n              ))}\n            </Section>\n          )}\n          <TextSeparator\n            count={totalLineCount}\n            text={totalLineCount === 1 ? \"Line\" : \"Lines\"}\n          />\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/ActionsPopover/ActionLineItem.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { SEARCH_TOOL_ID } from \"@/app/app/components/tools/constants\";\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { getIconForAction } from \"@/app/app/services/actionUtils\";\nimport { ToolAuthStatus } from \"@/lib/hooks/useToolOAuthStatus\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { Button } from \"@opal/components\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport { SvgChevronRight, SvgKey, SvgSettings, SvgSlash } from \"@opal/icons\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport EnabledCount from \"@/refresh-components/EnabledCount\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nexport interface ActionItemProps {\n  tool?: ToolSnapshot;\n  Icon?: React.FunctionComponent<IconProps>;\n  label?: string;\n  disabled: boolean;\n  isForced: boolean;\n  isUnavailable?: boolean;\n  tooltip?: string;\n  showAdminConfigure?: boolean;\n  adminConfigureHref?: string;\n  adminConfigureTooltip?: string;\n  onToggle: () => void;\n  onForceToggle: () => void;\n  onSourceManagementOpen?: () => void;\n  hasNoConnectors?: boolean;\n  toolAuthStatus?: ToolAuthStatus;\n  onOAuthAuthenticate?: () => void;\n  onClose?: () => void;\n  // Source counts for internal search tool\n  sourceCounts?: { enabled: number; total: number };\n}\n\nexport default function ActionLineItem({\n  tool,\n  Icon: ProvidedIcon,\n  label: providedLabel,\n  disabled,\n  isForced,\n  isUnavailable = false,\n  tooltip,\n  showAdminConfigure = false,\n  adminConfigureHref,\n  adminConfigureTooltip = \"Configure\",\n  onToggle,\n  onForceToggle,\n  onSourceManagementOpen,\n  hasNoConnectors = false,\n  toolAuthStatus,\n  onOAuthAuthenticate,\n  onClose,\n  sourceCounts,\n}: ActionItemProps) {\n  const router = useRouter();\n  const { currentProjectId } = useProjectsContext();\n\n  const Icon = tool ? getIconForAction(tool) : ProvidedIcon!;\n  const toolName = tool?.name || providedLabel || \"\";\n\n  let label = tool ? tool.display_name || tool.name : providedLabel!;\n  if (!!currentProjectId && tool?.in_code_tool_id === SEARCH_TOOL_ID) {\n    label = \"Project Search\";\n  }\n\n  const isSearchToolWithNoConnectors =\n    !currentProjectId &&\n    tool?.in_code_tool_id === SEARCH_TOOL_ID &&\n    hasNoConnectors;\n\n  const isSearchToolAndNotInProject =\n    tool?.in_code_tool_id === SEARCH_TOOL_ID && !currentProjectId;\n\n  // Show source count when: internal search is pinned, has some (but not all) sources enabled\n  const shouldShowSourceCount =\n    isSearchToolAndNotInProject &&\n    !isSearchToolWithNoConnectors &&\n    isForced &&\n    sourceCounts &&\n    sourceCounts.enabled > 0 &&\n    sourceCounts.enabled < sourceCounts.total;\n\n  const tooltipText = tooltip || tool?.description;\n\n  return (\n    <SimpleTooltip tooltip={tooltipText} className=\"max-w-[30rem]\">\n      <LineItem\n        data-testid={`tool-option-${toolName}`}\n        onClick={() => {\n          if (isUnavailable) {\n            onForceToggle();\n            return;\n          }\n          if (disabled) onToggle();\n          onForceToggle();\n          if (isSearchToolAndNotInProject && !isForced)\n            onSourceManagementOpen?.();\n          else onClose?.();\n        }}\n        selected={isForced}\n        disabled={isSearchToolWithNoConnectors || (isUnavailable && !isForced)}\n        muted={isUnavailable && isForced}\n        strikethrough={disabled}\n        icon={Icon}\n        rightChildren={\n          <Section gap={0.25} flexDirection=\"row\">\n            {!isUnavailable && tool?.oauth_config_id && toolAuthStatus && (\n              <Button\n                icon={SvgKey}\n                prominence=\"secondary\"\n                size=\"sm\"\n                onClick={noProp(() => {\n                  if (\n                    !toolAuthStatus.hasToken ||\n                    toolAuthStatus.isTokenExpired\n                  ) {\n                    onOAuthAuthenticate?.();\n                  }\n                })}\n              />\n            )}\n\n            {!isSearchToolWithNoConnectors && !isUnavailable && (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <IconButton\n                icon={SvgSlash}\n                onClick={noProp(onToggle)}\n                internal\n                aria-label={disabled ? \"Enable\" : \"Disable\"}\n                className={cn(\n                  !disabled && \"invisible group-hover/LineItem:visible\",\n                  // Hide when showing source count (it has its own hover behavior)\n                  shouldShowSourceCount && \"!hidden\"\n                )}\n                tooltip={disabled ? \"Enable\" : \"Disable\"}\n              />\n            )}\n\n            {isUnavailable && showAdminConfigure && adminConfigureHref && (\n              <Button\n                icon={SvgSettings}\n                onClick={noProp(() => {\n                  router.push(adminConfigureHref as Route);\n                  onClose?.();\n                })}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                tooltip={adminConfigureTooltip}\n              />\n            )}\n\n            {/* Source count for internal search - show when some but not all sources selected AND tool is pinned */}\n            {shouldShowSourceCount && (\n              <span className=\"relative flex items-center whitespace-nowrap\">\n                {/* Show count normally, disable icon on hover - both in same space */}\n                <span className=\"group-hover/LineItem:invisible\">\n                  <EnabledCount\n                    enabledCount={sourceCounts.enabled}\n                    totalCount={sourceCounts.total}\n                  />\n                </span>\n                <span className=\"absolute inset-0 flex items-center justify-center invisible group-hover/LineItem:visible\">\n                  <Button\n                    icon={SvgSlash}\n                    onClick={noProp(onToggle)}\n                    prominence=\"tertiary\"\n                    size=\"sm\"\n                    tooltip={disabled ? \"Enable\" : \"Disable\"}\n                  />\n                </span>\n              </span>\n            )}\n\n            {isSearchToolAndNotInProject && (\n              <Button\n                aria-label={\n                  isSearchToolWithNoConnectors\n                    ? \"Add Connectors\"\n                    : \"Configure Connectors\"\n                }\n                icon={\n                  isSearchToolWithNoConnectors ? SvgSettings : SvgChevronRight\n                }\n                onClick={noProp(() => {\n                  if (isSearchToolWithNoConnectors)\n                    router.push(\"/admin/add-connector\");\n                  else onSourceManagementOpen?.();\n                })}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                tooltip={\n                  isSearchToolWithNoConnectors\n                    ? \"Add Connectors\"\n                    : \"Configure Connectors\"\n                }\n              />\n            )}\n          </Section>\n        }\n      >\n        {label}\n      </LineItem>\n    </SimpleTooltip>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/ActionsPopover/MCPLineItem.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport {\n  MCPAuthenticationType,\n  MCPAuthenticationPerformer,\n  ToolSnapshot,\n} from \"@/lib/tools/interfaces\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport type { IconProps } from \"@opal/types\";\nimport {\n  SvgCheck,\n  SvgChevronRight,\n  SvgKey,\n  SvgLock,\n  SvgServer,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Button } from \"@opal/components\";\nimport EnabledCount from \"@/refresh-components/EnabledCount\";\n\nexport interface MCPServer {\n  id: number;\n  name: string;\n  owner_email: string;\n  server_url: string;\n  auth_type: MCPAuthenticationType;\n  auth_performer: MCPAuthenticationPerformer;\n  is_authenticated: boolean;\n  user_authenticated?: boolean;\n  auth_template?: any;\n  user_credentials?: Record<string, string>;\n}\n\nexport interface MCPLineItemProps {\n  server: MCPServer;\n  isActive: boolean;\n  onSelect: () => void;\n  onAuthenticate: () => void;\n  tools: ToolSnapshot[];\n  enabledTools: ToolSnapshot[];\n  isAuthenticated: boolean;\n  isLoading: boolean;\n}\n\nexport default function MCPLineItem({\n  server,\n  isActive,\n  onSelect,\n  onAuthenticate,\n  tools,\n  enabledTools,\n  isAuthenticated,\n  isLoading,\n}: MCPLineItemProps) {\n  const showAuthTrigger =\n    server.auth_performer === MCPAuthenticationPerformer.PER_USER &&\n    server.auth_type !== MCPAuthenticationType.NONE;\n\n  const canClickIntoServer = isAuthenticated && tools.length > 0;\n  const showInlineReauth = showAuthTrigger && canClickIntoServer;\n  const showReauthButton = showAuthTrigger && !showInlineReauth;\n\n  function getServerIcon(): React.FunctionComponent<IconProps> {\n    if (isLoading) return SimpleLoader;\n    if (isAuthenticated) {\n      return (({ className }) => (\n        <SvgCheck className={cn(className, \"stroke-status-success-05\")} />\n      )) as React.FunctionComponent<IconProps>;\n    }\n    if (server.auth_type === MCPAuthenticationType.NONE) return SvgServer;\n    if (server.auth_performer === MCPAuthenticationPerformer.PER_USER) {\n      return (({ className }) => (\n        <SvgKey className={cn(className, \"stroke-status-warning-05\")} />\n      )) as React.FunctionComponent<IconProps>;\n    }\n    return (({ className }) => (\n      <SvgLock className={cn(className, \"stroke-status-error-05\")} />\n    )) as React.FunctionComponent<IconProps>;\n  }\n\n  const handleClick = noProp(() => {\n    if (canClickIntoServer) {\n      onSelect();\n      return;\n    }\n    if (showAuthTrigger) {\n      onAuthenticate();\n    }\n  });\n\n  const allToolsDisabled = enabledTools.length === 0 && tools.length > 0;\n\n  return (\n    <LineItem\n      data-mcp-server-id={server.id}\n      data-mcp-server-name={server.name}\n      icon={getServerIcon()}\n      onClick={handleClick}\n      strikethrough={allToolsDisabled}\n      selected={isActive}\n      rightChildren={\n        <Section gap={0.25} flexDirection=\"row\">\n          {isAuthenticated &&\n            tools.length > 0 &&\n            enabledTools.length > 0 &&\n            tools.length !== enabledTools.length && (\n              <EnabledCount\n                enabledCount={enabledTools.length}\n                totalCount={tools.length}\n              />\n            )}\n          {canClickIntoServer && (\n            <Button\n              icon={SvgChevronRight}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={onSelect}\n            />\n          )}\n          {showReauthButton && (\n            <Button\n              icon={SvgKey}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={onAuthenticate}\n            />\n          )}\n        </Section>\n      }\n    >\n      {server.name}\n    </LineItem>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/ActionsPopover/SwitchList.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo, useState } from \"react\";\nimport { Button } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { PopoverMenu } from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport type { IconProps } from \"@opal/types\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport { SvgChevronLeft, SvgPlug, SvgUnplug } from \"@opal/icons\";\n\nexport interface SwitchListItem {\n  id: string;\n  label: string;\n  description?: string;\n  leading?: React.ReactNode;\n  isEnabled: boolean;\n  onToggle: () => void;\n  disabled?: boolean;\n  disabledTooltip?: string;\n}\n\nexport interface SwitchListProps {\n  items: SwitchListItem[];\n  searchPlaceholder: string;\n  allDisabled: boolean;\n  onDisableAll: () => void;\n  onEnableAll: () => void;\n  disableAllLabel: string;\n  enableAllLabel: string;\n  onBack: () => void;\n  footer?: React.ReactNode;\n}\n\nexport default function SwitchList({\n  items,\n  searchPlaceholder,\n  allDisabled,\n  onDisableAll,\n  onEnableAll,\n  onBack,\n  footer,\n}: SwitchListProps) {\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const filteredItems = useMemo(() => {\n    if (!searchTerm) return items;\n    const searchLower = searchTerm.toLowerCase();\n    return items.filter((item) => {\n      return (\n        item.label.toLowerCase().includes(searchLower) ||\n        (item.description &&\n          item.description.toLowerCase().includes(searchLower))\n      );\n    });\n  }, [items, searchTerm]);\n\n  return (\n    <PopoverMenu footer={footer}>\n      {[\n        <div className=\"flex items-center gap-1\" key=\"search\">\n          <Button\n            icon={SvgChevronLeft}\n            prominence=\"tertiary\"\n            size=\"sm\"\n            aria-label=\"Back\"\n            onClick={() => {\n              setSearchTerm(\"\");\n              onBack();\n            }}\n          />\n          <InputTypeIn\n            variant=\"internal\"\n            placeholder={searchPlaceholder}\n            value={searchTerm}\n            onChange={(e) => setSearchTerm(e.target.value)}\n            autoFocus\n          />\n        </div>,\n\n        <LineItem\n          key=\"enable-disable-all\"\n          icon={allDisabled ? SvgPlug : SvgUnplug}\n          onClick={allDisabled ? onEnableAll : onDisableAll}\n        >\n          {allDisabled ? \"Enable All\" : \"Disable All\"}\n        </LineItem>,\n\n        ...filteredItems.map((item) => {\n          const tooltip = item.disabled\n            ? item.disabledTooltip\n            : item.description;\n          return (\n            <SimpleTooltip\n              key={item.id}\n              tooltip={tooltip}\n              className=\"max-w-[30rem]\"\n            >\n              <LineItem\n                icon={\n                  item.leading\n                    ? ((() =>\n                        item.leading) as React.FunctionComponent<IconProps>)\n                    : undefined\n                }\n                rightChildren={\n                  <Switch\n                    checked={item.isEnabled}\n                    onCheckedChange={item.onToggle}\n                    aria-label={`Toggle ${item.label}`}\n                    disabled={item.disabled}\n                  />\n                }\n              >\n                {item.label}\n              </LineItem>\n            </SimpleTooltip>\n          );\n        }),\n      ]}\n    </PopoverMenu>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/ActionsPopover/index.tsx",
    "content": "\"use client\";\n\nimport {\n  FILE_READER_TOOL_ID,\n  IMAGE_GENERATION_TOOL_ID,\n  PYTHON_TOOL_ID,\n  SEARCH_TOOL_ID,\n  WEB_SEARCH_TOOL_ID,\n} from \"@/app/app/components/tools/constants\";\nimport { useState, useEffect, useMemo, useCallback, useRef } from \"react\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport SwitchList, {\n  SwitchListItem,\n} from \"@/refresh-components/popovers/ActionsPopover/SwitchList\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport {\n  MCPAuthenticationType,\n  MCPAuthenticationPerformer,\n  ToolSnapshot,\n} from \"@/lib/tools/interfaces\";\nimport { useForcedTools } from \"@/lib/hooks/useForcedTools\";\nimport useAgentPreferences from \"@/hooks/useAgentPreferences\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { FilterManager, useSourcePreferences } from \"@/lib/hooks\";\nimport { listSourceMetadata } from \"@/lib/sources\";\nimport MCPApiKeyModal from \"@/components/chat/MCPApiKeyModal\";\nimport { ValidSources } from \"@/lib/types\";\nimport { SourceMetadata } from \"@/lib/search/interfaces\";\nimport { SourceIcon } from \"@/components/SourceIcon\";\nimport { useAvailableTools } from \"@/hooks/useAvailableTools\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { useToolOAuthStatus } from \"@/lib/hooks/useToolOAuthStatus\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport ActionLineItem from \"@/refresh-components/popovers/ActionsPopover/ActionLineItem\";\nimport MCPLineItem, {\n  MCPServer,\n} from \"@/refresh-components/popovers/ActionsPopover/MCPLineItem\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { SvgActions, SvgChevronRight, SvgKey, SvgSliders } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\n\nfunction buildTooltipMessage(\n  actionDescription: string,\n  isConfigured: boolean,\n  canManageAction: boolean\n) {\n  const _CONFIGURE_MESSAGE = \"Press the settings cog to enable.\";\n  const _USER_NOT_ADMIN_MESSAGE = \"Ask an admin to configure.\";\n\n  if (isConfigured) {\n    return actionDescription;\n  }\n\n  if (canManageAction) {\n    return actionDescription + \" \" + _CONFIGURE_MESSAGE;\n  }\n\n  return actionDescription + \" \" + _USER_NOT_ADMIN_MESSAGE;\n}\n\nconst TOOL_DESCRIPTIONS: Record<string, string> = {\n  [SEARCH_TOOL_ID]: \"Search through connected knowledge to inform the answer.\",\n  [IMAGE_GENERATION_TOOL_ID]: \"Generate images based on a prompt.\",\n  [WEB_SEARCH_TOOL_ID]: \"Search the web for up-to-date information.\",\n  [PYTHON_TOOL_ID]: \"Execute code for complex analysis.\",\n};\n\nconst DEFAULT_TOOL_DESCRIPTION = \"This action is not configured yet.\";\n\nfunction getToolTooltip(\n  tool: ToolSnapshot,\n  isConfigured: boolean,\n  canManageAction: boolean\n): string {\n  const description =\n    (tool.in_code_tool_id && TOOL_DESCRIPTIONS[tool.in_code_tool_id]) ||\n    tool.description ||\n    DEFAULT_TOOL_DESCRIPTION;\n  return buildTooltipMessage(description, isConfigured, canManageAction);\n}\n\nconst ADMIN_CONFIG_LINKS: Record<string, { href: string; tooltip: string }> = {\n  [IMAGE_GENERATION_TOOL_ID]: {\n    href: \"/admin/configuration/image-generation\",\n    tooltip: \"Configure Image Generation\",\n  },\n  [WEB_SEARCH_TOOL_ID]: {\n    href: \"/admin/configuration/web-search\",\n    tooltip: \"Configure Web Search\",\n  },\n  [PYTHON_TOOL_ID]: {\n    href: \"/admin/configuration/code-interpreter\",\n    tooltip: \"Configure Code Interpreter\",\n  },\n  KnowledgeGraphTool: {\n    href: \"/admin/kg\",\n    tooltip: \"Configure Knowledge Graph\",\n  },\n};\n\nconst OPENAPI_ADMIN_CONFIG = {\n  href: \"/admin/actions/open-api\",\n  tooltip: \"Manage OpenAPI Actions\",\n};\n\nconst getAdminConfigureInfo = (\n  tool: ToolSnapshot\n): { href: string; tooltip: string } | null => {\n  if (tool.in_code_tool_id && ADMIN_CONFIG_LINKS[tool.in_code_tool_id]) {\n    return ADMIN_CONFIG_LINKS[tool.in_code_tool_id] ?? null;\n  }\n\n  if (!tool.in_code_tool_id && !tool.mcp_server_id) {\n    return OPENAPI_ADMIN_CONFIG;\n  }\n\n  return null;\n};\n\n// Get source metadata for configured sources - deduplicated by source type\nfunction getConfiguredSources(\n  availableSources: ValidSources[]\n): Array<SourceMetadata & { originalName: string; uniqueKey: string }> {\n  const allSources = listSourceMetadata();\n\n  const seenSources = new Set<string>();\n  const configuredSources: Array<\n    SourceMetadata & { originalName: string; uniqueKey: string }\n  > = [];\n\n  availableSources.forEach((sourceName) => {\n    // Handle federated connectors by removing the federated_ prefix\n    const cleanName = sourceName.replace(\"federated_\", \"\");\n    // Skip if we've already seen this source type\n    if (seenSources.has(cleanName)) return;\n    seenSources.add(cleanName);\n    const source = allSources.find(\n      (source) => source.internalName === cleanName\n    );\n    if (source) {\n      configuredSources.push({\n        ...source,\n        originalName: sourceName,\n        uniqueKey: cleanName,\n      });\n    }\n  });\n  return configuredSources;\n}\n\ntype SecondaryViewState =\n  | { type: \"sources\" }\n  | { type: \"mcp\"; serverId: number };\n\nexport interface ActionsPopoverProps {\n  selectedAgent: MinimalPersonaSnapshot;\n  filterManager: FilterManager;\n  availableSources?: ValidSources[];\n  disabled?: boolean;\n}\n\nexport default function ActionsPopover({\n  selectedAgent,\n  filterManager,\n  availableSources = [],\n  disabled = false,\n}: ActionsPopoverProps) {\n  const [open, setOpen] = useState(false);\n  const [secondaryView, setSecondaryView] = useState<SecondaryViewState | null>(\n    null\n  );\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  // const [showFadeMask, setShowFadeMask] = useState(false);\n  // const [showTopShadow, setShowTopShadow] = useState(false);\n  const { selectedSources, setSelectedSources } = filterManager;\n  const [mcpServers, setMcpServers] = useState<MCPServer[]>([]);\n  const { llmProviders, isLoading: isLLMLoading } = useLLMProviders(\n    selectedAgent.id\n  );\n  const hasAnyProvider = !isLLMLoading && (llmProviders?.length ?? 0) > 0;\n\n  // Use the OAuth hook\n  const { getToolAuthStatus, authenticateTool } = useToolOAuthStatus(\n    selectedAgent.id\n  );\n\n  const {\n    sourcesInitialized,\n    enableSources,\n    enableAllSources: baseEnableAllSources,\n    disableAllSources: baseDisableAllSources,\n    toggleSource: baseToggleSource,\n    isSourceEnabled,\n  } = useSourcePreferences({\n    availableSources,\n    selectedSources,\n    setSelectedSources,\n  });\n\n  // Store previously enabled sources when search tool is disabled\n  const previouslyEnabledSourcesRef = useRef<SourceMetadata[]>([]);\n\n  const isDefaultAgent = selectedAgent.id === 0;\n\n  // Check if the search tool is explicitly enabled on this persona (admin enabled \"Use Knowledge\")\n  const hasSearchTool = selectedAgent.tools.some(\n    (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID\n  );\n\n  // Get sources the agent has access to via document sets, hierarchy nodes, and attached documents\n  // Default agent has access to all sources\n  const agentAccessibleSources = useMemo(() => {\n    if (isDefaultAgent) {\n      return null; // null means \"all accessible\"\n    }\n\n    const sourceSet = new Set<string>();\n\n    // Add sources from document sets\n    selectedAgent.document_sets.forEach((docSet) => {\n      // Check cc_pair_summaries (regular connectors)\n      docSet.cc_pair_summaries?.forEach((ccPair) => {\n        // Normalize by removing federated_ prefix\n        const normalized = ccPair.source.replace(\"federated_\", \"\");\n        sourceSet.add(normalized);\n      });\n\n      // Check federated_connector_summaries (federated connectors)\n      docSet.federated_connector_summaries?.forEach((fedConnector) => {\n        // Normalize by removing federated_ prefix\n        const normalized = fedConnector.source.replace(\"federated_\", \"\");\n        sourceSet.add(normalized);\n      });\n    });\n\n    // Add sources from hierarchy nodes and attached documents (via knowledge_sources)\n    selectedAgent.knowledge_sources?.forEach((source) => {\n      // Normalize by removing federated_ prefix\n      const normalized = source.replace(\"federated_\", \"\");\n      sourceSet.add(normalized);\n    });\n\n    // If agent has search tool but no specific sources, it can search everything\n    if (sourceSet.size === 0 && hasSearchTool) {\n      return null;\n    }\n\n    return sourceSet;\n  }, [\n    isDefaultAgent,\n    selectedAgent.document_sets,\n    selectedAgent.knowledge_sources,\n    hasSearchTool,\n  ]);\n\n  // Check if non-default agent has no knowledge sources (Internal Search should be disabled)\n  // Knowledge sources include document sets, hierarchy nodes, and attached documents\n  // If the search tool is present, the admin intentionally enabled knowledge search\n  const hasNoKnowledgeSources =\n    !isDefaultAgent &&\n    !hasSearchTool &&\n    selectedAgent.document_sets.length === 0 &&\n    (selectedAgent.hierarchy_node_count ?? 0) === 0 &&\n    (selectedAgent.attached_document_count ?? 0) === 0;\n\n  // Store MCP server auth/loading state (tools are part of selectedAgent.tools)\n  const [mcpServerData, setMcpServerData] = useState<{\n    [serverId: number]: {\n      isAuthenticated: boolean;\n      isLoading: boolean;\n    };\n  }>({});\n\n  const [mcpApiKeyModal, setMcpApiKeyModal] = useState<{\n    isOpen: boolean;\n    serverId: number | null;\n    serverName: string;\n    authTemplate?: any;\n    onSuccess?: () => void;\n    isAuthenticated?: boolean;\n    existingCredentials?: Record<string, string>;\n  }>({\n    isOpen: false,\n    serverId: null,\n    serverName: \"\",\n    authTemplate: undefined,\n    onSuccess: undefined,\n    isAuthenticated: false,\n  });\n\n  // Get the agent preference for this assistant\n  const { agentPreferences, setSpecificAgentPreferences } =\n    useAgentPreferences();\n  const { forcedToolIds, setForcedToolIds } = useForcedTools();\n\n  // Reset state when assistant changes\n  useEffect(() => {\n    setForcedToolIds([]);\n  }, [selectedAgent.id, setForcedToolIds]);\n\n  const { isAdmin, isCurator } = useUser();\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  const { tools: availableTools } = useAvailableTools();\n  const { ccPairs } = useCCPairs(vectorDbEnabled);\n  const { currentProjectId, allCurrentProjectFiles } = useProjectsContext();\n  const availableToolIdSet = new Set(availableTools.map((tool) => tool.id));\n\n  // Check if there are any connectors available\n  const hasNoConnectors = ccPairs.length === 0;\n\n  const agentPreference = agentPreferences?.[selectedAgent.id];\n  const disabledToolIds = agentPreference?.disabled_tool_ids || [];\n  const toggleToolForCurrentAgent = (toolId: number) => {\n    const disabled = disabledToolIds.includes(toolId);\n    setSpecificAgentPreferences(selectedAgent.id, {\n      disabled_tool_ids: disabled\n        ? disabledToolIds.filter((id) => id !== toolId)\n        : [...disabledToolIds, toolId],\n    });\n\n    // If we're disabling a tool that is currently forced, remove it from forced tools\n    if (!disabled && forcedToolIds.includes(toolId)) {\n      setForcedToolIds(forcedToolIds.filter((id) => id !== toolId));\n    }\n  };\n\n  const toggleForcedTool = (toolId: number) => {\n    if (forcedToolIds.includes(toolId)) {\n      // If clicking on already forced tool, unforce it\n      setForcedToolIds([]);\n    } else {\n      // If clicking on a new tool, replace any existing forced tools with just this one\n      setForcedToolIds([toolId]);\n    }\n  };\n\n  // Get internal search tool reference for auto-pin logic\n  const internalSearchTool = useMemo(\n    () =>\n      selectedAgent.tools.find(\n        (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID && !tool.mcp_server_id\n      ),\n    [selectedAgent.tools]\n  );\n\n  // Handle explicit force toggle from ActionLineItem\n  const handleForceToggleWithTracking = useCallback(\n    (toolId: number, wasForced: boolean) => {\n      // If pinning internal search, enable all accessible sources\n      if (\n        !wasForced &&\n        internalSearchTool &&\n        toolId === internalSearchTool.id\n      ) {\n        const sources = getConfiguredSources(availableSources);\n        const accessibleSources = sources.filter(\n          (s) =>\n            agentAccessibleSources === null ||\n            agentAccessibleSources.has(s.uniqueKey)\n        );\n        setSelectedSources(accessibleSources);\n      }\n      toggleForcedTool(toolId);\n    },\n    [\n      toggleForcedTool,\n      internalSearchTool,\n      availableSources,\n      agentAccessibleSources,\n      setSelectedSources,\n    ]\n  );\n\n  // Wrapped source functions that auto-pin internal search when sources change\n  const enableAllSources = useCallback(() => {\n    // Only enable sources the agent has access to\n    const allConfiguredSources = getConfiguredSources(availableSources);\n    const accessibleSources = allConfiguredSources.filter(\n      (s) =>\n        agentAccessibleSources === null ||\n        agentAccessibleSources.has(s.uniqueKey)\n    );\n    setSelectedSources(accessibleSources);\n\n    if (internalSearchTool) {\n      setForcedToolIds([internalSearchTool.id]);\n    }\n  }, [\n    agentAccessibleSources,\n    availableSources,\n    setSelectedSources,\n    internalSearchTool,\n    setForcedToolIds,\n  ]);\n\n  const disableAllSources = useCallback(() => {\n    baseDisableAllSources();\n    const willUnpin =\n      internalSearchTool && forcedToolIds.includes(internalSearchTool.id);\n    if (willUnpin) {\n      setForcedToolIds([]);\n    }\n  }, [\n    baseDisableAllSources,\n    internalSearchTool,\n    forcedToolIds,\n    setForcedToolIds,\n  ]);\n\n  const toggleSource = useCallback(\n    (sourceUniqueKey: string) => {\n      const wasEnabled = isSourceEnabled(sourceUniqueKey);\n      baseToggleSource(sourceUniqueKey);\n\n      const configuredSources = getConfiguredSources(availableSources);\n\n      if (internalSearchTool) {\n        if (!wasEnabled) {\n          // Enabling a source - auto-pin internal search\n          setForcedToolIds([internalSearchTool.id]);\n        } else {\n          // Disabling a source - check if all sources will be disabled\n          const remainingEnabled = configuredSources.filter(\n            (s) =>\n              s.uniqueKey !== sourceUniqueKey && isSourceEnabled(s.uniqueKey)\n          );\n          if (\n            remainingEnabled.length === 0 &&\n            forcedToolIds.includes(internalSearchTool.id)\n          ) {\n            // All sources disabled - unpin\n            setForcedToolIds([]);\n          }\n        }\n      }\n    },\n    [\n      baseToggleSource,\n      internalSearchTool,\n      isSourceEnabled,\n      availableSources,\n      forcedToolIds,\n      setForcedToolIds,\n    ]\n  );\n\n  // Filter out MCP tools from the main list (they have mcp_server_id)\n  // Also filter out internal search tool for basic users when there are no connectors\n  // Also filter out tools that are not chat-selectable (e.g., OpenURL)\n  const displayTools = selectedAgent.tools.filter((tool) => {\n    // Filter out MCP tools\n    if (tool.mcp_server_id) return false;\n\n    // Filter out tools that are not chat-selectable (visibility set by backend)\n    if (!tool.chat_selectable) return false;\n\n    // Always hide File Reader from the actions popover\n    if (tool.in_code_tool_id === FILE_READER_TOOL_ID) return false;\n\n    // Special handling for Project Search\n    // Ensure Project Search is hidden if no files exist\n    if (tool.in_code_tool_id === SEARCH_TOOL_ID && !!currentProjectId) {\n      if (!allCurrentProjectFiles || allCurrentProjectFiles.length === 0) {\n        return false;\n      }\n      // If files exist, show it (even if backend thinks it's strictly unavailable due to no connectors)\n      return true;\n    }\n\n    // Advertise to admin/curator users that they can connect an internal search tool\n    // even if it's not available or has no connectors\n    if (tool.in_code_tool_id === SEARCH_TOOL_ID && (isAdmin || isCurator)) {\n      return true;\n    }\n\n    // Filter out internal search tool for non-admin/curator users when there are no connectors\n    if (\n      tool.in_code_tool_id === SEARCH_TOOL_ID &&\n      hasNoConnectors &&\n      !isAdmin &&\n      !isCurator\n    ) {\n      return false;\n    }\n\n    return true;\n  });\n\n  const searchToolId =\n    displayTools.find((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)?.id ??\n    null;\n\n  // Fetch MCP servers for the agent on mount\n  useEffect(() => {\n    if (selectedAgent == null || selectedAgent.id == null || !hasAnyProvider)\n      return;\n\n    const abortController = new AbortController();\n\n    const fetchMCPServers = async () => {\n      try {\n        const response = await fetch(\n          `/api/mcp/servers/persona/${selectedAgent.id}`,\n          {\n            signal: abortController.signal,\n          }\n        );\n        if (response.ok) {\n          const data = await response.json();\n          const servers = data.mcp_servers || [];\n          setMcpServers(servers);\n          // Seed auth/loading state based on response\n          setMcpServerData((prev) => {\n            const next = { ...prev } as any;\n            servers.forEach((s: any) => {\n              next[s.id as number] = {\n                isAuthenticated: !!s.user_authenticated || !!s.is_authenticated,\n                isLoading: false,\n              };\n            });\n            return next;\n          });\n        }\n      } catch (error) {\n        if (abortController.signal.aborted) {\n          return;\n        }\n        console.error(\"Error fetching MCP servers:\", error);\n      }\n    };\n\n    fetchMCPServers();\n\n    return () => {\n      abortController.abort();\n    };\n  }, [selectedAgent?.id, hasAnyProvider]);\n\n  // No separate MCP tool loading; tools already exist in selectedAgent.tools\n\n  // Handle MCP authentication\n  const handleMCPAuthenticate = async (\n    serverId: number,\n    authType: MCPAuthenticationType\n  ) => {\n    if (authType === MCPAuthenticationType.OAUTH) {\n      const updateLoadingState = (loading: boolean) => {\n        setMcpServerData((prev) => {\n          const previous = prev[serverId] ?? {\n            isAuthenticated: false,\n            isLoading: false,\n          };\n          return {\n            ...prev,\n            [serverId]: {\n              ...previous,\n              isLoading: loading,\n            },\n          };\n        });\n      };\n\n      updateLoadingState(true);\n      try {\n        const response = await fetch(\"/api/mcp/oauth/connect\", {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({\n            server_id: serverId,\n            return_path: window.location.pathname + window.location.search,\n            include_resource_param: true,\n          }),\n        });\n\n        if (response.ok) {\n          const { oauth_url } = await response.json();\n          window.location.href = oauth_url;\n        } else {\n          updateLoadingState(false);\n        }\n      } catch (error) {\n        console.error(\"Error initiating OAuth:\", error);\n        updateLoadingState(false);\n      }\n    }\n  };\n\n  const handleMCPApiKeySubmit = async (serverId: number, apiKey: string) => {\n    try {\n      const response = await fetch(\"/api/mcp/user-credentials\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          server_id: serverId,\n          credentials: { api_key: apiKey },\n          transport: \"streamable-http\",\n        }),\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json().catch(() => ({}));\n        const errorMessage = errorData.detail || \"Failed to save API key\";\n        throw new Error(errorMessage);\n      }\n    } catch (error) {\n      console.error(\"Error saving API key:\", error);\n      throw error;\n    }\n  };\n\n  const handleMCPCredentialsSubmit = async (\n    serverId: number,\n    credentials: Record<string, string>\n  ) => {\n    try {\n      const response = await fetch(\"/api/mcp/user-credentials\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({\n          server_id: serverId,\n          credentials: credentials,\n          transport: \"streamable-http\",\n        }),\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json().catch(() => ({}));\n        const errorMessage = errorData.detail || \"Failed to save credentials\";\n        throw new Error(errorMessage);\n      }\n    } catch (error) {\n      console.error(\"Error saving credentials:\", error);\n      throw error;\n    }\n  };\n\n  const handleServerAuthentication = (server: MCPServer) => {\n    const authType = server.auth_type;\n    const performer = server.auth_performer;\n\n    if (\n      authType === MCPAuthenticationType.NONE ||\n      performer === MCPAuthenticationPerformer.ADMIN\n    ) {\n      return;\n    }\n\n    if (authType === MCPAuthenticationType.OAUTH) {\n      handleMCPAuthenticate(server.id, MCPAuthenticationType.OAUTH);\n    } else if (authType === MCPAuthenticationType.API_TOKEN) {\n      setMcpApiKeyModal({\n        isOpen: true,\n        serverId: server.id,\n        serverName: server.name,\n        authTemplate: server.auth_template,\n        onSuccess: () => {\n          // Update the authentication state after successful credential submission\n          setMcpServerData((prev) => ({\n            ...prev,\n            [server.id]: {\n              ...prev[server.id],\n              isAuthenticated: true,\n              isLoading: false,\n            },\n          }));\n        },\n        isAuthenticated: server.user_authenticated,\n        existingCredentials: server.user_credentials,\n      });\n    }\n  };\n\n  // Filter tools based on search term\n  const filteredTools = displayTools.filter((tool) => {\n    if (!searchTerm) return true;\n    const searchLower = searchTerm.toLowerCase();\n    return (\n      tool.display_name?.toLowerCase().includes(searchLower) ||\n      tool.name.toLowerCase().includes(searchLower) ||\n      tool.description?.toLowerCase().includes(searchLower)\n    );\n  });\n\n  // Filter MCP servers based on search term\n  const filteredMCPServers = mcpServers.filter((server) => {\n    if (!searchTerm) return true;\n    const searchLower = searchTerm.toLowerCase();\n    return server.name.toLowerCase().includes(searchLower);\n  });\n\n  const selectedMcpServerId =\n    secondaryView?.type === \"mcp\" ? secondaryView.serverId : null;\n  const selectedMcpServer = selectedMcpServerId\n    ? mcpServers.find((server) => server.id === selectedMcpServerId)\n    : undefined;\n  const selectedMcpTools =\n    selectedMcpServerId !== null\n      ? selectedAgent.tools.filter(\n          (t) => t.mcp_server_id === Number(selectedMcpServerId)\n        )\n      : [];\n  const selectedMcpServerData = selectedMcpServer\n    ? mcpServerData[selectedMcpServer.id]\n    : undefined;\n  const isActiveServerAuthenticated =\n    selectedMcpServerData?.isAuthenticated ??\n    !!(\n      selectedMcpServer?.user_authenticated ||\n      selectedMcpServer?.is_authenticated\n    );\n  const showActiveReauthRow =\n    !!selectedMcpServer &&\n    selectedMcpTools.length > 0 &&\n    selectedMcpServer.auth_performer === MCPAuthenticationPerformer.PER_USER &&\n    selectedMcpServer.auth_type !== MCPAuthenticationType.NONE &&\n    isActiveServerAuthenticated;\n\n  const mcpToggleItems: SwitchListItem[] = selectedMcpTools.map((tool) => ({\n    id: tool.id.toString(),\n    label: tool.display_name || tool.name,\n    description: tool.description,\n    isEnabled: !disabledToolIds.includes(tool.id),\n    onToggle: () => toggleToolForCurrentAgent(tool.id),\n  }));\n\n  const mcpAllDisabled = selectedMcpTools.every((tool) =>\n    disabledToolIds.includes(tool.id)\n  );\n\n  const disableAllToolsForSelectedServer = () => {\n    if (!selectedMcpServer) return;\n    const serverToolIds = selectedMcpTools.map((tool) => tool.id);\n    const merged = Array.from(new Set([...disabledToolIds, ...serverToolIds]));\n    setSpecificAgentPreferences(selectedAgent.id, {\n      disabled_tool_ids: merged,\n    });\n    setForcedToolIds(forcedToolIds.filter((id) => !serverToolIds.includes(id)));\n  };\n\n  const enableAllToolsForSelectedServer = () => {\n    if (!selectedMcpServer) return;\n    const serverToolIdSet = new Set(selectedMcpTools.map((tool) => tool.id));\n    setSpecificAgentPreferences(selectedAgent.id, {\n      disabled_tool_ids: disabledToolIds.filter(\n        (id) => !serverToolIdSet.has(id)\n      ),\n    });\n  };\n\n  const handleFooterReauthClick = () => {\n    if (selectedMcpServer) {\n      handleServerAuthentication(selectedMcpServer);\n    }\n  };\n\n  const handleOpenChange = (newOpen: boolean) => {\n    setOpen(newOpen);\n    if (newOpen) {\n      setSecondaryView(null);\n      setSearchTerm(\"\");\n    }\n  };\n\n  const mcpFooter = showActiveReauthRow ? (\n    <LineItem\n      onClick={handleFooterReauthClick}\n      icon={selectedMcpServerData?.isLoading ? SimpleLoader : SvgKey}\n      rightChildren={\n        <Button icon={SvgChevronRight} prominence=\"tertiary\" size=\"sm\" />\n      }\n    >\n      Re-Authenticate\n    </LineItem>\n  ) : undefined;\n\n  const configuredSources = getConfiguredSources(availableSources);\n\n  const numSourcesEnabled = configuredSources.filter((source) =>\n    isSourceEnabled(source.uniqueKey)\n  ).length;\n  const searchToolDisabled =\n    searchToolId !== null && disabledToolIds.includes(searchToolId);\n\n  // Sync search tool state with sources on mount/when states change\n  useEffect(() => {\n    if (searchToolId === null || !sourcesInitialized) return;\n\n    const hasEnabledSources = numSourcesEnabled > 0;\n    if (hasEnabledSources && searchToolDisabled) {\n      // Sources are enabled but search tool is disabled - enable it\n      toggleToolForCurrentAgent(searchToolId);\n    } else if (!hasEnabledSources && !searchToolDisabled) {\n      // No sources enabled but search tool is enabled - disable it\n      toggleToolForCurrentAgent(searchToolId);\n    }\n  }, [\n    searchToolId,\n    numSourcesEnabled,\n    searchToolDisabled,\n    sourcesInitialized,\n    toggleToolForCurrentAgent,\n  ]);\n\n  // Set search tool to a specific enabled/disabled state (only toggles if needed)\n  const setSearchToolEnabled = (enabled: boolean) => {\n    if (searchToolId === null) return;\n\n    if (enabled && searchToolDisabled) {\n      toggleToolForCurrentAgent(searchToolId);\n    } else if (!enabled && !searchToolDisabled) {\n      toggleToolForCurrentAgent(searchToolId);\n    }\n  };\n\n  const handleSourceToggle = (sourceUniqueKey: string) => {\n    const willEnable = !isSourceEnabled(sourceUniqueKey);\n    const newEnabledCount = numSourcesEnabled + (willEnable ? 1 : -1);\n\n    toggleSource(sourceUniqueKey);\n    setSearchToolEnabled(newEnabledCount > 0);\n  };\n\n  const handleDisableAllSources = () => {\n    disableAllSources();\n    setSearchToolEnabled(false);\n  };\n\n  const handleEnableAllSources = () => {\n    enableAllSources();\n    setSearchToolEnabled(true);\n  };\n\n  const handleToggleTool = (toolId: number) => {\n    const wasDisabled = disabledToolIds.includes(toolId);\n    toggleToolForCurrentAgent(toolId);\n\n    if (toolId === searchToolId) {\n      if (wasDisabled) {\n        // Enabling - restore previous sources or enable all (persisted to localStorage)\n        const previous = previouslyEnabledSourcesRef.current;\n        if (previous.length > 0) {\n          enableSources(previous);\n        } else {\n          baseEnableAllSources();\n        }\n        previouslyEnabledSourcesRef.current = [];\n      } else {\n        // Disabling - store current sources then disable all (persisted to localStorage)\n        previouslyEnabledSourcesRef.current = [...selectedSources];\n        baseDisableAllSources();\n      }\n    }\n  };\n\n  // Only show sources the agent has access to\n  const accessibleConfiguredSources = configuredSources.filter(\n    (source) =>\n      agentAccessibleSources === null ||\n      agentAccessibleSources.has(source.uniqueKey)\n  );\n\n  const sourceToggleItems: SwitchListItem[] = accessibleConfiguredSources.map(\n    (source) => ({\n      id: source.uniqueKey,\n      label: source.displayName,\n      leading: <SourceIcon sourceType={source.internalName} iconSize={16} />,\n      isEnabled: isSourceEnabled(source.uniqueKey),\n      onToggle: () => handleSourceToggle(source.uniqueKey),\n    })\n  );\n\n  const allSourcesDisabled = configuredSources.every(\n    (source) => !isSourceEnabled(source.uniqueKey)\n  );\n\n  // Count enabled sources for display (only accessible sources)\n  const enabledSourceCount = accessibleConfiguredSources.filter((source) =>\n    isSourceEnabled(source.uniqueKey)\n  ).length;\n  const totalSourceCount = accessibleConfiguredSources.length;\n\n  const primaryView = (\n    <PopoverMenu>\n      {[\n        <InputTypeIn\n          key=\"search\"\n          placeholder=\"Search Actions\"\n          value={searchTerm}\n          onChange={(event) => setSearchTerm(event.target.value)}\n          autoFocus\n          variant=\"internal\"\n        />,\n\n        // Actions\n        ...filteredTools.map((tool) =>\n          (() => {\n            const isToolAvailable = availableToolIdSet.has(tool.id);\n            const isUnavailable =\n              !isToolAvailable && tool.in_code_tool_id !== SEARCH_TOOL_ID;\n            const canAdminConfigure = isAdmin || isCurator;\n            const adminConfigureInfo =\n              isUnavailable && canAdminConfigure\n                ? getAdminConfigureInfo(tool)\n                : null;\n            return (\n              <ActionLineItem\n                key={tool.id}\n                tool={tool}\n                disabled={disabledToolIds.includes(tool.id)}\n                isForced={forcedToolIds.includes(tool.id)}\n                isUnavailable={isUnavailable}\n                tooltip={getToolTooltip(\n                  tool,\n                  isToolAvailable,\n                  canAdminConfigure\n                )}\n                showAdminConfigure={!!adminConfigureInfo}\n                adminConfigureHref={adminConfigureInfo?.href}\n                adminConfigureTooltip={adminConfigureInfo?.tooltip}\n                onToggle={() => handleToggleTool(tool.id)}\n                onForceToggle={() =>\n                  handleForceToggleWithTracking(\n                    tool.id,\n                    forcedToolIds.includes(tool.id)\n                  )\n                }\n                onSourceManagementOpen={() =>\n                  setSecondaryView({ type: \"sources\" })\n                }\n                hasNoConnectors={hasNoConnectors}\n                toolAuthStatus={getToolAuthStatus(tool)}\n                onOAuthAuthenticate={() => authenticateTool(tool)}\n                onClose={() => setOpen(false)}\n                sourceCounts={{\n                  enabled: enabledSourceCount,\n                  total: totalSourceCount,\n                }}\n              />\n            );\n          })()\n        ),\n\n        // MCP Servers\n        ...filteredMCPServers.map((server) => {\n          const serverData = mcpServerData[server.id] || {\n            isAuthenticated:\n              !!server.user_authenticated || !!server.is_authenticated,\n            isLoading: false,\n          };\n\n          // Tools for this server come from assistant.tools\n          const serverTools = selectedAgent.tools.filter(\n            (t) => t.mcp_server_id === Number(server.id)\n          );\n          const enabledTools = serverTools.filter(\n            (t) => !disabledToolIds.includes(t.id)\n          );\n\n          return (\n            <MCPLineItem\n              key={server.id}\n              server={server}\n              isActive={selectedMcpServerId === server.id}\n              tools={serverTools}\n              enabledTools={enabledTools}\n              isAuthenticated={serverData.isAuthenticated}\n              isLoading={serverData.isLoading}\n              onSelect={() =>\n                setSecondaryView({\n                  type: \"mcp\",\n                  serverId: server.id,\n                })\n              }\n              onAuthenticate={() => handleServerAuthentication(server)}\n            />\n          );\n        }),\n\n        null,\n\n        (isAdmin || isCurator) && (\n          <LineItem href=\"/admin/actions\" icon={SvgActions} key=\"more-actions\">\n            More Actions\n          </LineItem>\n        ),\n      ]}\n    </PopoverMenu>\n  );\n\n  const toolsView = (\n    <SwitchList\n      items={sourceToggleItems}\n      searchPlaceholder=\"Search Filters\"\n      allDisabled={allSourcesDisabled}\n      onDisableAll={handleDisableAllSources}\n      onEnableAll={handleEnableAllSources}\n      disableAllLabel=\"Disable All Sources\"\n      enableAllLabel=\"Enable All Sources\"\n      onBack={() => setSecondaryView(null)}\n    />\n  );\n\n  const mcpView = (\n    <SwitchList\n      items={mcpToggleItems}\n      searchPlaceholder={`Search ${selectedMcpServer?.name ?? \"server\"} tools`}\n      allDisabled={mcpAllDisabled}\n      onDisableAll={disableAllToolsForSelectedServer}\n      onEnableAll={enableAllToolsForSelectedServer}\n      disableAllLabel=\"Disable All Tools\"\n      enableAllLabel=\"Enable All Tools\"\n      onBack={() => setSecondaryView(null)}\n      footer={mcpFooter}\n    />\n  );\n\n  // If no tools or MCP servers are available, don't render the component\n  if (displayTools.length === 0 && mcpServers.length === 0) return null;\n\n  return (\n    <>\n      <Popover open={open} onOpenChange={handleOpenChange}>\n        <Popover.Trigger asChild>\n          <div data-testid=\"action-management-toggle\">\n            <Button\n              disabled={disabled}\n              icon={SvgSliders}\n              interaction={open ? \"hover\" : \"rest\"}\n              prominence=\"tertiary\"\n              tooltip=\"Manage Actions\"\n            />\n          </div>\n        </Popover.Trigger>\n        <Popover.Content side=\"bottom\" align=\"start\" width=\"lg\">\n          <div data-testid=\"tool-options\">\n            {secondaryView\n              ? secondaryView.type === \"mcp\"\n                ? mcpView\n                : toolsView\n              : primaryView}\n          </div>\n        </Popover.Content>\n      </Popover>\n\n      {/* MCP API Key Modal */}\n      {mcpApiKeyModal.isOpen && (\n        <MCPApiKeyModal\n          isOpen={mcpApiKeyModal.isOpen}\n          onClose={() =>\n            setMcpApiKeyModal({\n              isOpen: false,\n              serverId: null,\n              serverName: \"\",\n              authTemplate: undefined,\n              onSuccess: undefined,\n              isAuthenticated: false,\n              existingCredentials: undefined,\n            })\n          }\n          serverName={mcpApiKeyModal.serverName}\n          serverId={mcpApiKeyModal.serverId ?? 0}\n          authTemplate={mcpApiKeyModal.authTemplate}\n          onSubmit={handleMCPApiKeySubmit}\n          onSubmitCredentials={handleMCPCredentialsSubmit}\n          onSuccess={mcpApiKeyModal.onSuccess}\n          isAuthenticated={mcpApiKeyModal.isAuthenticated}\n          existingCredentials={mcpApiKeyModal.existingCredentials}\n        />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/FilePickerPopover.tsx",
    "content": "\"use client\";\n\nimport React, { useEffect, useMemo, useRef, useState } from \"react\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport UserFilesModal from \"@/components/modals/UserFilesModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport {\n  ProjectFile,\n  UserFileStatus,\n} from \"@/app/app/projects/projectsService\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { MAX_FILES_TO_SHOW } from \"@/lib/constants\";\nimport { isImageFile } from \"@/lib/utils\";\nimport {\n  SvgExternalLink,\n  SvgFileText,\n  SvgImage,\n  SvgLoader,\n  SvgMoreHorizontal,\n  SvgPaperclip,\n} from \"@opal/icons\";\nconst getFileExtension = (fileName: string): string => {\n  const idx = fileName.lastIndexOf(\".\");\n  if (idx === -1) return \"\";\n  const ext = fileName.slice(idx + 1).toLowerCase();\n  if (ext === \"txt\") return \"PLAINTEXT\";\n  return ext.toUpperCase();\n};\n\ninterface FileLineItemProps {\n  projectFile: ProjectFile;\n  onPickRecent: (file: ProjectFile) => void;\n  onFileClick: (file: ProjectFile) => void;\n}\n\nfunction FileLineItem({\n  projectFile,\n  onPickRecent,\n  onFileClick,\n}: FileLineItemProps) {\n  const showLoader = useMemo(\n    () =>\n      String(projectFile.status) === UserFileStatus.PROCESSING ||\n      String(projectFile.status) === UserFileStatus.UPLOADING ||\n      String(projectFile.status) === UserFileStatus.DELETING,\n    [projectFile.status]\n  );\n\n  const disableActionButton = useMemo(\n    () =>\n      String(projectFile.status) === UserFileStatus.UPLOADING ||\n      String(projectFile.status) === UserFileStatus.DELETING,\n    [projectFile.status]\n  );\n\n  return (\n    <LineItem\n      key={projectFile.id}\n      onClick={noProp(() => onPickRecent(projectFile))}\n      icon={\n        showLoader\n          ? ({ className }) => (\n              <SvgLoader className={cn(className, \"animate-spin\")} />\n            )\n          : isImageFile(projectFile.name)\n            ? SvgImage\n            : SvgFileText\n      }\n      rightChildren={\n        <div className=\"h-[1rem] flex flex-col justify-center\">\n          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n          <IconButton\n            icon={SvgExternalLink}\n            onClick={noProp(() => onFileClick(projectFile))}\n            tooltip=\"View File\"\n            disabled={disableActionButton}\n            internal\n            className=\"hidden group-hover/LineItem:flex\"\n          />\n          <Text\n            as=\"p\"\n            className=\"flex group-hover/LineItem:hidden\"\n            secondaryBody\n            text03\n          >\n            {getFileExtension(projectFile.name)}\n          </Text>\n        </div>\n      }\n    >\n      {projectFile.name}\n    </LineItem>\n  );\n}\n\ninterface FilePickerPopoverContentsProps {\n  recentFiles: ProjectFile[];\n  onPickRecent: (file: ProjectFile) => void;\n  onFileClick: (file: ProjectFile) => void;\n  triggerUploadPicker: () => void;\n  openRecentFilesModal: () => void;\n}\n\nfunction FilePickerPopoverContents({\n  recentFiles,\n  onPickRecent,\n  onFileClick,\n  triggerUploadPicker,\n  openRecentFilesModal,\n}: FilePickerPopoverContentsProps) {\n  // These are the \"quick\" files that we show. Essentially \"speed dial\", but for files.\n  // The rest of the files will be hidden behind the \"All Recent Files\" button, should there be more files left to show!\n  const hasFiles = recentFiles.length > 0;\n  const shouldShowMoreFilesButton = recentFiles.length > MAX_FILES_TO_SHOW;\n  const quickAccessFiles = recentFiles.slice(0, MAX_FILES_TO_SHOW);\n\n  return (\n    <PopoverMenu>\n      {[\n        // Action button to upload more files\n        <LineItem\n          key=\"upload-files\"\n          icon={SvgPaperclip}\n          description=\"Upload a file from your device\"\n          onClick={triggerUploadPicker}\n        >\n          Upload Files\n        </LineItem>,\n\n        // Separator\n        null,\n\n        // Title\n        hasFiles && (\n          <div key=\"recent-files\" className=\"pt-1\">\n            <Text as=\"p\" text02 secondaryBody className=\"py-1 px-3\">\n              Recent Files\n            </Text>\n          </div>\n        ),\n\n        // Quick access files\n        ...quickAccessFiles.map((projectFile) => (\n          <FileLineItem\n            key={projectFile.id}\n            projectFile={projectFile}\n            onPickRecent={onPickRecent}\n            onFileClick={onFileClick}\n          />\n        )),\n\n        // Rest of the files\n        shouldShowMoreFilesButton && (\n          <LineItem icon={SvgMoreHorizontal} onClick={openRecentFilesModal}>\n            All Recent Files\n          </LineItem>\n        ),\n      ]}\n    </PopoverMenu>\n  );\n}\n\nexport interface FilePickerPopoverProps {\n  onPickRecent?: (file: ProjectFile) => void;\n  onUnpickRecent?: (file: ProjectFile) => void;\n  onFileClick?: (file: ProjectFile) => void;\n  handleUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  trigger?: React.ReactNode | ((open: boolean) => React.ReactNode);\n  selectedFileIds?: string[];\n}\n\nexport default function FilePickerPopover({\n  onPickRecent,\n  onUnpickRecent,\n  onFileClick,\n  handleUploadChange,\n  trigger,\n  selectedFileIds,\n}: FilePickerPopoverProps) {\n  const { allRecentFiles } = useProjectsContext();\n  const fileInputRef = useRef<HTMLInputElement | null>(null);\n  const recentFilesModal = useCreateModal();\n  const [open, setOpen] = useState(false);\n  // Snapshot of recent files to avoid re-arranging when the modal is open\n  const [recentFilesSnapshot, setRecentFilesSnapshot] = useState<ProjectFile[]>(\n    []\n  );\n  const { deleteUserFile, setCurrentMessageFiles } = useProjectsContext();\n  const [deletedFileIds, setDeletedFileIds] = useState<string[]>([]);\n\n  const triggerUploadPicker = () => fileInputRef.current?.click();\n\n  useEffect(() => {\n    setRecentFilesSnapshot(\n      allRecentFiles.slice().filter((f) => !deletedFileIds.includes(f.id))\n    );\n  }, [allRecentFiles]);\n\n  const handleDeleteFile = (file: ProjectFile) => {\n    const lastStatus = file.status;\n    setRecentFilesSnapshot((prev) =>\n      prev.map((f) =>\n        f.id === file.id ? { ...f, status: UserFileStatus.DELETING } : f\n      )\n    );\n    deleteUserFile(file.id)\n      .then((result) => {\n        if (!result.has_associations) {\n          toast.success(\"File deleted successfully\");\n          setCurrentMessageFiles((prev) =>\n            prev.filter((f) => f.id !== file.id)\n          );\n          setDeletedFileIds((prev) => [...prev, file.id]);\n          setRecentFilesSnapshot((prev) => prev.filter((f) => f.id != file.id));\n        } else {\n          setRecentFilesSnapshot((prev) =>\n            prev.map((f) =>\n              f.id === file.id ? { ...f, status: lastStatus } : f\n            )\n          );\n          let projects = result.project_names.join(\", \");\n          let assistants = result.assistant_names.join(\", \");\n          let message = \"Cannot delete file. It is associated with\";\n          if (projects) {\n            message += ` projects: ${projects}`;\n          }\n          if (projects && assistants) {\n            message += \" and \";\n          }\n          if (assistants) {\n            message += `assistants: ${assistants}`;\n          }\n\n          toast.error(message);\n        }\n      })\n      .catch((error) => {\n        // Revert status and show error if the delete request fails\n        setRecentFilesSnapshot((prev) =>\n          prev.map((f) => (f.id === file.id ? { ...f, status: lastStatus } : f))\n        );\n        toast.error(\"Failed to delete file. Please try again.\");\n        // Useful for debugging; safe in client components\n        console.error(\"Failed to delete file\", error);\n      });\n  };\n\n  return (\n    <>\n      <input\n        ref={fileInputRef}\n        type=\"file\"\n        className=\"hidden\"\n        multiple\n        onChange={handleUploadChange}\n        accept={\"*/*\"}\n      />\n\n      <recentFilesModal.Provider>\n        <UserFilesModal\n          title=\"Recent Files\"\n          description=\"Upload files or pick from your recent files.\"\n          recentFiles={recentFilesSnapshot}\n          onPickRecent={(file) => {\n            onPickRecent && onPickRecent(file);\n          }}\n          onUnpickRecent={(file) => {\n            onUnpickRecent && onUnpickRecent(file);\n          }}\n          handleUploadChange={handleUploadChange}\n          onView={onFileClick}\n          selectedFileIds={selectedFileIds}\n          onDelete={handleDeleteFile}\n        />\n      </recentFilesModal.Provider>\n\n      <Popover open={open} onOpenChange={setOpen}>\n        <Popover.Trigger asChild>\n          {typeof trigger === \"function\" ? trigger(open) : trigger}\n        </Popover.Trigger>\n        <Popover.Content align=\"start\" side=\"bottom\" width=\"lg\">\n          <FilePickerPopoverContents\n            recentFiles={recentFilesSnapshot}\n            onPickRecent={(file) => {\n              onPickRecent && onPickRecent(file);\n              setOpen(false);\n            }}\n            onFileClick={(file) => {\n              onFileClick && onFileClick(file);\n              setOpen(false);\n            }}\n            triggerUploadPicker={() => {\n              triggerUploadPicker();\n              setOpen(false);\n            }}\n            openRecentFilesModal={() => {\n              recentFilesModal.toggle(true);\n              // Close the small popover when opening the dialog\n              setOpen(false);\n            }}\n          />\n        </Popover.Content>\n      </Popover>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/LLMPopover.test.tsx",
    "content": "import { buildLlmOptions, groupLlmOptions } from \"./LLMPopover\";\nimport { LLMOption } from \"./interfaces\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { makeProvider } from \"@tests/setup/llmProviderTestUtils\";\n\ndescribe(\"LLMPopover helpers\", () => {\n  test(\"deduplicates identical provider+model combinations across provider entries\", () => {\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        name: \"OpenAI A\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: \"shared-model\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        name: \"OpenAI B\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: \"shared-model\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n      makeProvider({\n        name: \"Anthropic A\",\n        provider: \"anthropic\",\n        model_configurations: [\n          {\n            name: \"shared-model\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    const options = buildLlmOptions(providers);\n    const sharedModelOptions = options.filter(\n      (o) => o.modelName === \"shared-model\"\n    );\n\n    expect(sharedModelOptions).toHaveLength(2);\n    expect(sharedModelOptions.map((o) => o.provider).sort()).toEqual([\n      \"anthropic\",\n      \"openai\",\n    ]);\n  });\n\n  test(\"includes currently selected hidden model in options\", () => {\n    const providers: LLMProviderDescriptor[] = [\n      makeProvider({\n        name: \"OpenAI A\",\n        provider: \"openai\",\n        model_configurations: [\n          {\n            name: \"hidden-selected-model\",\n            is_visible: false,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    ];\n\n    const options = buildLlmOptions(providers, \"hidden-selected-model\");\n    expect(options.map((o) => o.modelName)).toContain(\"hidden-selected-model\");\n  });\n\n  test(\"groups aggregator options by provider/vendor and sorts by display name\", () => {\n    const options: LLMOption[] = [\n      {\n        name: \"Bedrock Provider\",\n        provider: \"bedrock\",\n        providerDisplayName: \"Amazon Bedrock\",\n        modelName: \"claude-3-5-sonnet\",\n        displayName: \"Claude 3.5 Sonnet\",\n        vendor: \"anthropic\",\n      },\n      {\n        name: \"OpenAI Provider\",\n        provider: \"openai\",\n        providerDisplayName: \"ChatGPT (OpenAI)\",\n        modelName: \"gpt-4o-mini\",\n        displayName: \"GPT-4o Mini\",\n        vendor: null,\n      },\n    ];\n\n    const grouped = groupLlmOptions(options);\n\n    expect(grouped.map((group) => group.key)).toEqual([\n      \"bedrock/anthropic\",\n      \"openai\",\n    ]);\n    expect(grouped[0]?.displayName).toBe(\"Amazon Bedrock/Anthropic\");\n    expect(grouped[1]?.displayName).toBe(\"ChatGPT (OpenAI)\");\n    expect(grouped[0]?.options).toHaveLength(1);\n    expect(grouped[1]?.options).toHaveLength(1);\n  });\n});\n"
  },
  {
    "path": "web/src/refresh-components/popovers/LLMPopover.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useCallback, useMemo, useRef } from \"react\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { LlmDescriptor, LlmManager } from \"@/lib/hooks\";\nimport { structureValue } from \"@/lib/llmConfig/utils\";\nimport {\n  getProviderIcon,\n  AGGREGATOR_PROVIDERS,\n} from \"@/app/admin/configuration/llm/utils\";\nimport { LLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { Slider } from \"@/components/ui/slider\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport {\n  Accordion,\n  AccordionContent,\n  AccordionItem,\n  AccordionTrigger,\n} from \"@/components/ui/accordion\";\nimport {\n  SvgCheck,\n  SvgChevronDown,\n  SvgChevronRight,\n  SvgRefreshCw,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { OpenButton } from \"@opal/components\";\nimport { LLMOption, LLMOptionGroup } from \"./interfaces\";\n\nexport interface LLMPopoverProps {\n  llmManager: LlmManager;\n  requiresImageInput?: boolean;\n  foldable?: boolean;\n  onSelect?: (value: string) => void;\n  currentModelName?: string;\n  disabled?: boolean;\n}\n\nexport function buildLlmOptions(\n  llmProviders: LLMProviderDescriptor[] | undefined,\n  currentModelName?: string\n): LLMOption[] {\n  if (!llmProviders) {\n    return [];\n  }\n\n  // Track seen combinations of provider + exact model name to avoid true duplicates\n  // (same model appearing from multiple LLM provider configs with same provider type)\n  const seenKeys = new Set<string>();\n  const options: LLMOption[] = [];\n\n  llmProviders.forEach((llmProvider) => {\n    llmProvider.model_configurations\n      .filter(\n        (modelConfiguration) =>\n          modelConfiguration.is_visible ||\n          modelConfiguration.name === currentModelName\n      )\n      .forEach((modelConfiguration) => {\n        // Deduplicate by exact provider + model name combination\n        const key = `${llmProvider.provider}:${modelConfiguration.name}`;\n        if (seenKeys.has(key)) {\n          return;\n        }\n        seenKeys.add(key);\n\n        options.push({\n          name: llmProvider.name,\n          provider: llmProvider.provider,\n          providerDisplayName:\n            llmProvider.provider_display_name || llmProvider.provider,\n          modelName: modelConfiguration.name,\n          displayName:\n            modelConfiguration.display_name || modelConfiguration.name,\n          vendor: modelConfiguration.vendor || null,\n          maxInputTokens: modelConfiguration.max_input_tokens,\n          region: modelConfiguration.region || null,\n          version: modelConfiguration.version || null,\n          supportsReasoning: modelConfiguration.supports_reasoning || false,\n          supportsImageInput: modelConfiguration.supports_image_input || false,\n        });\n      });\n  });\n\n  return options;\n}\n\nexport function groupLlmOptions(\n  filteredOptions: LLMOption[]\n): LLMOptionGroup[] {\n  const groups = new Map<string, Omit<LLMOptionGroup, \"key\">>();\n\n  filteredOptions.forEach((option) => {\n    const provider = option.provider.toLowerCase();\n    const isAggregator = AGGREGATOR_PROVIDERS.has(provider);\n    const groupKey =\n      isAggregator && option.vendor\n        ? `${provider}/${option.vendor.toLowerCase()}`\n        : provider;\n\n    if (!groups.has(groupKey)) {\n      let displayName: string;\n\n      if (isAggregator && option.vendor) {\n        const vendorDisplayName =\n          option.vendor.charAt(0).toUpperCase() + option.vendor.slice(1);\n        displayName = `${option.providerDisplayName}/${vendorDisplayName}`;\n      } else {\n        displayName = option.providerDisplayName;\n      }\n\n      groups.set(groupKey, {\n        displayName,\n        options: [],\n        Icon: getProviderIcon(provider),\n      });\n    }\n\n    groups.get(groupKey)!.options.push(option);\n  });\n\n  const sortedKeys = Array.from(groups.keys()).sort((a, b) =>\n    groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)\n  );\n\n  return sortedKeys.map((key) => {\n    const group = groups.get(key)!;\n    return {\n      key,\n      displayName: group.displayName,\n      options: group.options,\n      Icon: group.Icon,\n    };\n  });\n}\n\nexport default function LLMPopover({\n  llmManager,\n  requiresImageInput,\n  foldable,\n  onSelect,\n  currentModelName,\n  disabled = false,\n}: LLMPopoverProps) {\n  const llmProviders = llmManager.llmProviders;\n  const isLoadingProviders = llmManager.isLoadingProviders;\n\n  const [open, setOpen] = useState(false);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const { user } = useUser();\n\n  const [localTemperature, setLocalTemperature] = useState(\n    llmManager.temperature ?? 0.5\n  );\n\n  useEffect(() => {\n    setLocalTemperature(llmManager.temperature ?? 0.5);\n  }, [llmManager.temperature]);\n\n  const searchInputRef = useRef<HTMLInputElement>(null);\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n  const selectedItemRef = useRef<HTMLDivElement>(null);\n\n  const handleGlobalTemperatureChange = useCallback((value: number[]) => {\n    const value_0 = value[0];\n    if (value_0 !== undefined) {\n      setLocalTemperature(value_0);\n    }\n  }, []);\n\n  const handleGlobalTemperatureCommit = useCallback(\n    (value: number[]) => {\n      const value_0 = value[0];\n      if (value_0 !== undefined) {\n        llmManager.updateTemperature(value_0);\n      }\n    },\n    [llmManager]\n  );\n\n  const llmOptions = useMemo(\n    () => buildLlmOptions(llmProviders, currentModelName),\n    [llmProviders, currentModelName]\n  );\n\n  // Filter options by vision capability (when images are uploaded) and search query\n  const filteredOptions = useMemo(() => {\n    let result = llmOptions;\n    if (requiresImageInput) {\n      result = result.filter((opt) => opt.supportsImageInput);\n    }\n    if (searchQuery.trim()) {\n      const query = searchQuery.toLowerCase();\n      result = result.filter(\n        (opt) =>\n          opt.displayName.toLowerCase().includes(query) ||\n          opt.modelName.toLowerCase().includes(query) ||\n          (opt.vendor && opt.vendor.toLowerCase().includes(query))\n      );\n    }\n    return result;\n  }, [llmOptions, searchQuery, requiresImageInput]);\n\n  // Group options by provider using backend-provided display names and ordering\n  // For aggregator providers (bedrock, openrouter, vertex_ai), flatten to \"Provider/Vendor\" format\n  const groupedOptions = useMemo(\n    () => groupLlmOptions(filteredOptions),\n    [filteredOptions]\n  );\n\n  // Get display name for the model to show in the button\n  // Use currentModelName prop if provided (e.g., for regenerate showing the model used),\n  // otherwise fall back to the globally selected model\n  const currentLlmDisplayName = useMemo(() => {\n    // Only use currentModelName if it's a non-empty string\n    const currentModel =\n      currentModelName && currentModelName.trim()\n        ? currentModelName\n        : llmManager.currentLlm.modelName;\n    if (!llmProviders) return currentModel;\n\n    for (const provider of llmProviders) {\n      const config = provider.model_configurations.find(\n        (m) => m.name === currentModel\n      );\n      if (config) {\n        return config.display_name || config.name;\n      }\n    }\n    return currentModel;\n  }, [llmProviders, currentModelName, llmManager.currentLlm.modelName]);\n\n  // Determine which group the current model belongs to (for auto-expand)\n  const currentGroupKey = useMemo(() => {\n    const currentModel = llmManager.currentLlm.modelName;\n    const currentProvider = llmManager.currentLlm.provider;\n    // Match by both modelName AND provider to handle same model name across providers\n    const option = llmOptions.find(\n      (o) => o.modelName === currentModel && o.provider === currentProvider\n    );\n    if (!option) return \"openai\";\n\n    const provider = option.provider.toLowerCase();\n    const isAggregator = AGGREGATOR_PROVIDERS.has(provider);\n\n    if (isAggregator && option.vendor) {\n      return `${provider}/${option.vendor.toLowerCase()}`;\n    }\n    return provider;\n  }, [\n    llmOptions,\n    llmManager.currentLlm.modelName,\n    llmManager.currentLlm.provider,\n  ]);\n\n  // Track expanded groups - initialize with current model's group\n  const [expandedGroups, setExpandedGroups] = useState<string[]>([\n    currentGroupKey,\n  ]);\n\n  // Reset state when popover closes/opens\n  useEffect(() => {\n    if (!open) {\n      setSearchQuery(\"\");\n    } else {\n      // Reset expanded groups to only show the selected model's group\n      setExpandedGroups([currentGroupKey]);\n    }\n  }, [open, currentGroupKey]);\n\n  // Auto-scroll to selected model when popover opens\n  useEffect(() => {\n    if (open) {\n      // Small delay to let accordion content render\n      const timer = setTimeout(() => {\n        selectedItemRef.current?.scrollIntoView({\n          behavior: \"instant\",\n          block: \"center\",\n        });\n      }, 50);\n      return () => clearTimeout(timer);\n    }\n  }, [open]);\n\n  const isSearching = searchQuery.trim().length > 0;\n\n  // Compute final expanded groups\n  const effectiveExpandedGroups = useMemo(() => {\n    if (isSearching) {\n      // Force expand all when searching\n      return groupedOptions.map((g) => g.key);\n    }\n    return expandedGroups;\n  }, [isSearching, groupedOptions, expandedGroups]);\n\n  // Handler for accordion changes\n  const handleAccordionChange = (value: string[]) => {\n    // Only update state when not searching (force-expanding)\n    if (!isSearching) {\n      setExpandedGroups(value);\n    }\n  };\n\n  const handleSelectModel = (option: LLMOption) => {\n    llmManager.updateCurrentLlm({\n      modelName: option.modelName,\n      provider: option.provider,\n      name: option.name,\n    } as LlmDescriptor);\n    onSelect?.(structureValue(option.name, option.provider, option.modelName));\n    setOpen(false);\n  };\n\n  const renderModelItem = (option: LLMOption) => {\n    const isSelected =\n      option.modelName === llmManager.currentLlm.modelName &&\n      option.provider === llmManager.currentLlm.provider;\n\n    const capabilities: string[] = [];\n    if (option.supportsReasoning) {\n      capabilities.push(\"Reasoning\");\n    }\n    if (option.supportsImageInput) {\n      capabilities.push(\"Vision\");\n    }\n    const description =\n      capabilities.length > 0 ? capabilities.join(\", \") : undefined;\n\n    return (\n      <div\n        key={`${option.name}-${option.modelName}`}\n        ref={isSelected ? selectedItemRef : undefined}\n      >\n        <LineItem\n          selected={isSelected}\n          description={description}\n          onClick={() => handleSelectModel(option)}\n          rightChildren={\n            isSelected ? (\n              <SvgCheck className=\"h-4 w-4 stroke-action-link-05 shrink-0\" />\n            ) : null\n          }\n        >\n          {option.displayName}\n        </LineItem>\n      </div>\n    );\n  };\n\n  return (\n    <Popover open={open} onOpenChange={setOpen}>\n      <div data-testid=\"llm-popover-trigger\">\n        <Popover.Trigger asChild disabled={disabled}>\n          <OpenButton\n            disabled={disabled}\n            icon={\n              foldable\n                ? SvgRefreshCw\n                : getProviderIcon(\n                    llmManager.currentLlm.provider,\n                    llmManager.currentLlm.modelName\n                  )\n            }\n            foldable={foldable}\n          >\n            {currentLlmDisplayName}\n          </OpenButton>\n        </Popover.Trigger>\n      </div>\n\n      <Popover.Content side=\"top\" align=\"end\" width=\"xl\">\n        <Section gap={0.5}>\n          {/* Search Input */}\n          <InputTypeIn\n            ref={searchInputRef}\n            leftSearchIcon\n            variant=\"internal\"\n            value={searchQuery}\n            onChange={(e) => setSearchQuery(e.target.value)}\n            placeholder=\"Search models...\"\n          />\n\n          {/* Model List with Vendor Groups */}\n          <PopoverMenu scrollContainerRef={scrollContainerRef}>\n            {isLoadingProviders\n              ? [\n                  <div key=\"loading\" className=\"flex items-center gap-2 py-3\">\n                    <SimpleLoader />\n                    <Text secondaryBody text03>\n                      Loading models...\n                    </Text>\n                  </div>,\n                ]\n              : groupedOptions.length === 0\n                ? [\n                    <div key=\"empty\" className=\"py-3\">\n                      <Text secondaryBody text03>\n                        No models found\n                      </Text>\n                    </div>,\n                  ]\n                : groupedOptions.length === 1\n                  ? // Single provider - show models directly without accordion\n                    [\n                      <div\n                        key=\"single-provider\"\n                        className=\"flex flex-col gap-1\"\n                      >\n                        {groupedOptions[0]!.options.map(renderModelItem)}\n                      </div>,\n                    ]\n                  : // Multiple providers - show accordion with groups\n                    [\n                      <Accordion\n                        key=\"accordion\"\n                        type=\"multiple\"\n                        value={effectiveExpandedGroups}\n                        onValueChange={handleAccordionChange}\n                        className=\"w-full flex flex-col\"\n                      >\n                        {groupedOptions.map((group) => {\n                          const isExpanded = effectiveExpandedGroups.includes(\n                            group.key\n                          );\n                          return (\n                            <AccordionItem\n                              key={group.key}\n                              value={group.key}\n                              className=\"border-none pt-1\"\n                            >\n                              {/* Group Header */}\n                              <AccordionTrigger className=\"flex items-center rounded-08 hover:no-underline hover:bg-background-tint-02 group [&>svg]:hidden w-full py-1\">\n                                <div className=\"flex items-center gap-1 shrink-0\">\n                                  <div className=\"flex items-center justify-center size-5 shrink-0\">\n                                    <group.Icon size={16} />\n                                  </div>\n                                  <Text\n                                    secondaryBody\n                                    text03\n                                    nowrap\n                                    className=\"px-0.5\"\n                                  >\n                                    {group.displayName}\n                                  </Text>\n                                </div>\n                                <div className=\"flex-1\" />\n                                <div className=\"flex items-center justify-center size-6 shrink-0\">\n                                  {isExpanded ? (\n                                    <SvgChevronDown className=\"h-4 w-4 stroke-text-04 shrink-0\" />\n                                  ) : (\n                                    <SvgChevronRight className=\"h-4 w-4 stroke-text-04 shrink-0\" />\n                                  )}\n                                </div>\n                              </AccordionTrigger>\n\n                              {/* Model Items - full width highlight */}\n                              <AccordionContent className=\"pb-0 pt-0\">\n                                <div className=\"flex flex-col gap-1\">\n                                  {group.options.map(renderModelItem)}\n                                </div>\n                              </AccordionContent>\n                            </AccordionItem>\n                          );\n                        })}\n                      </Accordion>,\n                    ]}\n          </PopoverMenu>\n\n          {/* Global Temperature Slider (shown if enabled in user prefs) */}\n          {user?.preferences?.temperature_override_enabled && (\n            <>\n              <div className=\"border-t border-border-02 mx-2\" />\n              <div className=\"flex flex-col w-full py-2 gap-2\">\n                <Slider\n                  value={[localTemperature]}\n                  max={llmManager.maxTemperature}\n                  min={0}\n                  step={0.01}\n                  onValueChange={handleGlobalTemperatureChange}\n                  onValueCommit={handleGlobalTemperatureCommit}\n                  className=\"w-full\"\n                />\n                <div className=\"flex flex-row items-center justify-between\">\n                  <Text secondaryBody text03>\n                    Temperature (creativity)\n                  </Text>\n                  <Text secondaryBody text03>\n                    {localTemperature.toFixed(1)}\n                  </Text>\n                </div>\n              </div>\n            </>\n          )}\n        </Section>\n      </Popover.Content>\n    </Popover>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/popovers/interfaces.ts",
    "content": "import { IconProps } from \"@/components/icons/icons\";\nimport { FunctionComponent } from \"react\";\n\nexport interface LLMOption {\n  name: string;\n  provider: string;\n  providerDisplayName: string;\n  modelName: string;\n  displayName: string;\n  description?: string;\n  vendor: string | null;\n  maxInputTokens?: number | null;\n  region?: string | null;\n  version?: string | null;\n  supportsReasoning?: boolean;\n  supportsImageInput?: boolean;\n}\n\nexport interface LLMOptionGroup {\n  key: string;\n  displayName: string;\n  options: LLMOption[];\n  Icon: FunctionComponent<IconProps>;\n}\n"
  },
  {
    "path": "web/src/refresh-components/skeletons/ChatSessionSkeleton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ChatSessionSkeleton from \"./ChatSessionSkeleton\";\n\nconst meta: Meta<typeof ChatSessionSkeleton> = {\n  title: \"refresh-components/Skeletons/ChatSessionSkeleton\",\n  component: ChatSessionSkeleton,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"padded\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ChatSessionSkeleton>;\n\nexport const Default: Story = {};\n\nexport const Multiple: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-1\" style={{ width: 300 }}>\n      <ChatSessionSkeleton />\n      <ChatSessionSkeleton />\n      <ChatSessionSkeleton />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/skeletons/ChatSessionSkeleton.tsx",
    "content": "export default function ChatSessionSkeleton() {\n  return (\n    <div className=\"w-full rounded-08 py-2 p-1.5\">\n      <div className=\"flex gap-3 min-w-0 w-full\">\n        <div className=\"flex h-full w-fit pt-1 pl-1\">\n          <div className=\"h-4 w-4 rounded-full bg-background-tint-02 animate-pulse\" />\n        </div>\n        <div className=\"flex flex-col w-full gap-1\">\n          <div className=\"h-5 w-2/3 rounded bg-background-tint-02 animate-pulse\" />\n          <div className=\"h-4 w-1/2 rounded bg-background-tint-02 animate-pulse\" />\n        </div>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/skeletons/SidebarTabSkeleton.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport SidebarTabSkeleton from \"./SidebarTabSkeleton\";\n\nconst meta: Meta<typeof SidebarTabSkeleton> = {\n  title: \"refresh-components/Skeletons/SidebarTabSkeleton\",\n  component: SidebarTabSkeleton,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"padded\",\n  },\n};\n\nexport default meta;\ntype Story = StoryObj<typeof SidebarTabSkeleton>;\n\nexport const Default: Story = {};\n\nexport const NarrowText: Story = {\n  args: {\n    textWidth: \"w-1/3\",\n  },\n};\n\nexport const WideText: Story = {\n  args: {\n    textWidth: \"w-full\",\n  },\n};\n\nexport const Multiple: Story = {\n  render: () => (\n    <div className=\"flex flex-col gap-1\" style={{ width: 260 }}>\n      <SidebarTabSkeleton textWidth=\"w-3/4\" />\n      <SidebarTabSkeleton textWidth=\"w-1/2\" />\n      <SidebarTabSkeleton textWidth=\"w-2/3\" />\n      <SidebarTabSkeleton textWidth=\"w-1/3\" />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/skeletons/SidebarTabSkeleton.tsx",
    "content": "import { cn } from \"@/lib/utils\";\n\ninterface SidebarTabSkeletonProps {\n  textWidth?: string;\n}\n\nexport default function SidebarTabSkeleton({\n  textWidth = \"w-2/3\",\n}: SidebarTabSkeletonProps) {\n  return (\n    <div className=\"w-full rounded-08 p-1.5\">\n      <div className=\"h-[1.5rem] flex flex-row items-center px-1 py-0.5\">\n        <div\n          className={cn(\n            \"h-3 rounded bg-background-tint-04 animate-pulse\",\n            textWidth\n          )}\n        />\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/texts/ExpandableTextDisplay.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nimport ExpandableTextDisplay from \"./ExpandableTextDisplay\";\n\nconst meta: Meta<typeof ExpandableTextDisplay> = {\n  title: \"refresh-components/texts/ExpandableTextDisplay\",\n  component: ExpandableTextDisplay,\n  tags: [\"autodocs\"],\n  parameters: {\n    layout: \"padded\",\n  },\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <Story />\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ExpandableTextDisplay>;\n\nconst shortContent =\n  \"This is a short piece of content that fits within the default line clamp.\";\n\nconst longContent = Array.from(\n  { length: 30 },\n  (_, i) =>\n    `Line ${i + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`\n).join(\"\\n\");\n\nexport const ShortContent: Story = {\n  args: {\n    title: \"Short Content\",\n    content: shortContent,\n  },\n};\n\nexport const LongContent: Story = {\n  args: {\n    title: \"Log Output\",\n    content: longContent,\n  },\n};\n\nexport const CustomMaxLines: Story = {\n  args: {\n    title: \"Compact View\",\n    content: longContent,\n    maxLines: 3,\n  },\n};\n\nexport const WithSubtitle: Story = {\n  args: {\n    title: \"Build Log\",\n    content: longContent,\n    subtitle: \"2.4 KB - 30 lines\",\n  },\n};\n\nexport const StreamingMode: Story = {\n  args: {\n    title: \"Live Output\",\n    content: longContent,\n    isStreaming: true,\n    maxLines: 5,\n  },\n};\n\nexport const WithCustomRenderer: Story = {\n  args: {\n    title: \"Formatted Content\",\n    content:\n      \"# Hello World\\n\\nThis is **bold** and this is *italic*.\\n\\n- Item 1\\n- Item 2\\n- Item 3\",\n    renderContent: (content: string) => (\n      <pre\n        style={{\n          whiteSpace: \"pre-wrap\",\n          fontFamily: \"monospace\",\n          fontSize: 13,\n        }}\n      >\n        {content}\n      </pre>\n    ),\n  },\n};\n"
  },
  {
    "path": "web/src/refresh-components/texts/ExpandableTextDisplay.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo, useRef, useEffect, useLayoutEffect } from \"react\";\nimport * as DialogPrimitive from \"@radix-ui/react-dialog\";\nimport Modal from \"@/refresh-components/Modal\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgDownload, SvgMaximize2, SvgX } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface ExpandableTextDisplayProps {\n  /** Title shown in header and modal */\n  title: string;\n  /** The full text content to display (used in modal and for copy/download) */\n  content: string;\n  /** Optional content to display in collapsed view (e.g., for streaming animation). Falls back to `content`. */\n  displayContent?: string;\n  /** Subtitle text (e.g., file size). If not provided, calculates from content */\n  subtitle?: string;\n  /** Maximum lines to show in collapsed state (1-10). Values outside this range default to 8. */\n  maxLines?: 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10;\n  /** Additional className for the container */\n  className?: string;\n  /** Optional custom renderer for content (e.g., markdown). Falls back to plain text.\n   * @param content - The text content to render\n   * @param isExpanded - Whether the content is being rendered in expanded (modal) view\n   */\n  renderContent?: (content: string, isExpanded: boolean) => React.ReactNode;\n  /** When true, shows last N lines with top-truncation (ellipsis at top) instead of bottom-truncation */\n  isStreaming?: boolean;\n}\n\n/** Calculate content size in human-readable format */\nfunction getContentSize(text: string): string {\n  const bytes = new Blob([text]).size;\n  if (bytes < 1024) return `${bytes} Bytes`;\n  return `${(bytes / 1024).toFixed(2)} KB`;\n}\n\n/** Count lines in text */\nfunction getLineCount(text: string): number {\n  return text.split(\"\\n\").length;\n}\n\n/** Extract the last N lines from text for streaming display.\n * When truncated, returns (maxLines - 1) lines to leave room for ellipsis.\n */\nfunction getLastLines(\n  text: string,\n  maxLines: number\n): { lines: string; hasTruncation: boolean } {\n  const allLines = text.split(\"\\n\");\n  if (allLines.length <= maxLines) {\n    return { lines: text, hasTruncation: false };\n  }\n  // Reserve one line for ellipsis, show last (maxLines - 1) content lines\n  const linesToShow = maxLines - 1;\n  if (linesToShow <= 0) {\n    return { lines: \"\", hasTruncation: true };\n  }\n  return {\n    lines: allLines.slice(-linesToShow).join(\"\\n\"),\n    hasTruncation: true,\n  };\n}\n\n/** Download content as a .txt file */\nfunction downloadAsTxt(content: string, filename: string) {\n  const blob = new Blob([content], { type: \"text/plain\" });\n  const url = URL.createObjectURL(blob);\n  try {\n    const a = document.createElement(\"a\");\n    a.href = url;\n    a.download = `${filename}.txt`;\n    document.body.appendChild(a);\n    a.click();\n    document.body.removeChild(a);\n  } finally {\n    URL.revokeObjectURL(url);\n  }\n}\n\n/** Block-level HTML tags used by the snap algorithm to recurse into containers. */\nconst CONTAINER_TAGS = new Set([\n  \"UL\",\n  \"OL\",\n  \"LI\",\n  \"BLOCKQUOTE\",\n  \"DIV\",\n  \"DL\",\n  \"DD\",\n  \"TABLE\",\n  \"TBODY\",\n  \"THEAD\",\n  \"TR\",\n  \"TH\",\n  \"TD\",\n  \"SECTION\",\n  \"DETAILS\",\n  \"PRE\",\n  \"FIGURE\",\n  \"FIGCAPTION\",\n  \"ARTICLE\",\n  \"ASIDE\",\n  \"HEADER\",\n  \"FOOTER\",\n  \"MAIN\",\n  \"NAV\",\n]);\n\nexport default function ExpandableTextDisplay({\n  title,\n  content,\n  displayContent,\n  subtitle,\n  maxLines = 8,\n  className,\n  renderContent,\n  isStreaming = false,\n}: ExpandableTextDisplayProps) {\n  const [isModalOpen, setIsModalOpen] = useState(false);\n  const [isTruncated, setIsTruncated] = useState(false);\n  const scrollRef = useRef<HTMLDivElement>(null);\n  const contentInnerRef = useRef<HTMLDivElement>(null);\n\n  const lineCount = useMemo(() => getLineCount(content), [content]);\n  const contentSize = useMemo(() => getContentSize(content), [content]);\n  const displaySubtitle = subtitle ?? contentSize;\n\n  // Truncation detection (read-only, doesn't need to block paint)\n  useEffect(() => {\n    if (renderContent && scrollRef.current) {\n      setIsTruncated(\n        scrollRef.current.scrollHeight > scrollRef.current.clientHeight\n      );\n    } else if (isStreaming) {\n      const textToCheck = displayContent ?? content;\n      setIsTruncated(getLineCount(textToCheck) > maxLines);\n    } else if (scrollRef.current) {\n      setIsTruncated(\n        scrollRef.current.scrollHeight > scrollRef.current.clientHeight\n      );\n    }\n  }, [isStreaming, renderContent, content, displayContent, maxLines]);\n\n  // Shift content upward during streaming for renderContent mode,\n  // snapping to element boundaries so blocks are never partially clipped.\n  // Must block paint to avoid flicker.\n  useLayoutEffect(() => {\n    if (\n      !isStreaming ||\n      !renderContent ||\n      !scrollRef.current ||\n      !contentInnerRef.current\n    ) {\n      return;\n    }\n\n    const containerHeight = scrollRef.current.clientHeight;\n    const contentHeight = contentInnerRef.current.scrollHeight;\n    let overflow = Math.max(0, contentHeight - containerHeight);\n\n    if (overflow > 0) {\n      let blockParent: Element = contentInnerRef.current;\n      while (\n        blockParent.children.length === 1 &&\n        blockParent.children[0]!.children.length > 0\n      ) {\n        blockParent = blockParent.children[0]!;\n      }\n\n      contentInnerRef.current.style.transform = \"translateY(0)\";\n      const refTop = contentInnerRef.current.getBoundingClientRect().top;\n\n      let snapParent: Element = blockParent;\n      let snap = overflow;\n      while (true) {\n        let found = false;\n        for (let i = 0; i < snapParent.children.length; i++) {\n          const child = snapParent.children[i] as HTMLElement;\n          const rect = child.getBoundingClientRect();\n          const top = rect.top - refTop;\n          const bottom = top + rect.height;\n          if (top < snap && snap < bottom) {\n            if (\n              child.children.length > 0 &&\n              CONTAINER_TAGS.has(child.tagName)\n            ) {\n              snapParent = child;\n              found = true;\n              break;\n            }\n            snap = bottom;\n            found = true;\n            break;\n          }\n        }\n        if (!found) break;\n        if (snap !== overflow) break;\n      }\n      overflow = snap;\n    }\n\n    contentInnerRef.current.style.transform =\n      overflow > 0 ? `translateY(-${overflow}px)` : \"translateY(0)\";\n  }, [isStreaming, renderContent, content, displayContent, maxLines]);\n\n  const handleDownload = () => {\n    const sanitizedTitle = title.replace(/[^a-z0-9]/gi, \"_\").toLowerCase();\n    downloadAsTxt(content, sanitizedTitle);\n  };\n\n  // Map maxLines to Tailwind line-clamp classes (fallback to 8 for invalid runtime values)\n  const lineClampClass =\n    {\n      1: \"line-clamp-1\",\n      2: \"line-clamp-2\",\n      3: \"line-clamp-3\",\n      4: \"line-clamp-4\",\n      5: \"line-clamp-5\",\n      6: \"line-clamp-6\",\n      7: \"line-clamp-7\",\n      8: \"line-clamp-8\",\n      9: \"line-clamp-9\",\n      10: \"line-clamp-10\",\n    }[maxLines] ?? \"line-clamp-8\";\n\n  // Single container for renderContent mode (both streaming and static)\n  // Keeps scrollRef alive across the streaming → static transition\n  const renderContentWithRef = () => {\n    const textToDisplay = displayContent ?? content;\n\n    if (isStreaming) {\n      // During streaming: use max-height with overflow-hidden and CSS transform to shift\n      // content upward, showing the latest content from the bottom without scroll jitter.\n      // Line height is approximately 1.5rem (24px) for body text.\n      // We show a top ellipsis indicator when content is truncated.\n      return (\n        <div>\n          {isTruncated && (\n            <Text as=\"p\" text03 mainUiMuted className=\"!my-0\">\n              …\n            </Text>\n          )}\n          <div\n            ref={scrollRef}\n            className=\"overflow-hidden\"\n            style={{ maxHeight: `calc(${maxLines} * 1.5rem)` }}\n          >\n            <div ref={contentInnerRef}>\n              {renderContent!(textToDisplay, false)}\n            </div>\n          </div>\n        </div>\n      );\n    }\n\n    // Static mode: use CSS line-clamp for bottom truncation\n    return (\n      <div ref={scrollRef} className={cn(\"overflow-hidden\", lineClampClass)}>\n        {renderContent!(textToDisplay, false)}\n      </div>\n    );\n  };\n\n  // Render plain text streaming (top-truncation with last N lines)\n  const renderPlainTextStreaming = () => {\n    const textToDisplay = displayContent ?? content;\n    const { lines, hasTruncation } = getLastLines(textToDisplay, maxLines);\n\n    return (\n      <div ref={scrollRef} className=\"overflow-hidden\">\n        {hasTruncation && (\n          <Text as=\"span\" mainUiMuted text03>\n            …{\"\\n\"}\n          </Text>\n        )}\n        <Text as=\"p\" mainUiMuted text03 className=\"whitespace-pre-wrap\">\n          {lines}\n        </Text>\n      </div>\n    );\n  };\n\n  // Render plain text static (CSS line-clamp + scroll-based truncation detection)\n  const renderPlainTextStatic = () => (\n    <div ref={scrollRef} className={cn(\"overflow-hidden\", lineClampClass)}>\n      <Text as=\"span\" mainUiMuted text03 className=\"whitespace-pre-wrap\">\n        {displayContent ?? content}\n      </Text>\n    </div>\n  );\n\n  return (\n    <>\n      {/* Collapsed View */}\n      <div className={cn(\"w-full flex\", className)}>\n        <div className=\"flex-1 min-w-0\">\n          {renderContent\n            ? renderContentWithRef()\n            : isStreaming\n              ? renderPlainTextStreaming()\n              : renderPlainTextStatic()}\n        </div>\n\n        {/* Expand button - only show when content is truncated */}\n\n        <div className=\"flex justify-end self-end mt-1 w-8\">\n          {isTruncated && (\n            <Button\n              prominence=\"tertiary\"\n              size=\"sm\"\n              icon={SvgMaximize2}\n              tooltip=\"View Full Text\"\n              onClick={() => setIsModalOpen(true)}\n            />\n          )}\n        </div>\n      </div>\n\n      {/* Expanded Modal */}\n      <Modal open={isModalOpen} onOpenChange={setIsModalOpen}>\n        <Modal.Content height=\"lg\" width=\"lg\" preventAccidentalClose={false}>\n          {/* Header */}\n          <div className=\"flex items-start justify-between px-4 py-3\">\n            <div className=\"flex flex-col\">\n              <DialogPrimitive.Title asChild>\n                <Text as=\"span\" text04 headingH3>\n                  {title}\n                </Text>\n              </DialogPrimitive.Title>\n              <DialogPrimitive.Description asChild>\n                <Text as=\"span\" text03 secondaryBody>\n                  {displaySubtitle}\n                </Text>\n              </DialogPrimitive.Description>\n            </div>\n            <DialogPrimitive.Close asChild>\n              <Button\n                icon={SvgX}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                onClick={() => setIsModalOpen(false)}\n              />\n            </DialogPrimitive.Close>\n          </div>\n\n          {/* Body */}\n          <Modal.Body>\n            {renderContent ? (\n              renderContent(content, true)\n            ) : (\n              <Text as=\"p\" mainUiMuted text03 className=\"whitespace-pre-wrap\">\n                {content}\n              </Text>\n            )}\n          </Modal.Body>\n\n          {/* Footer */}\n          <div className=\"flex items-center justify-between p-2 bg-background-tint-01\">\n            <div className=\"px-2\">\n              <Text as=\"span\" mainUiMuted text03>\n                {lineCount} {lineCount === 1 ? \"line\" : \"lines\"}\n              </Text>\n            </div>\n            <div className=\"flex items-center gap-1 bg-background-tint-00 p-1 rounded-12\">\n              <CopyIconButton\n                prominence=\"tertiary\"\n                size=\"sm\"\n                getCopyText={() => content}\n                tooltip=\"Copy\"\n              />\n              <Button\n                prominence=\"tertiary\"\n                size=\"sm\"\n                icon={SvgDownload}\n                tooltip=\"Download\"\n                onClick={handleDownload}\n              />\n            </div>\n          </div>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/texts/Text.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Text from \"./Text\";\n\nconst meta: Meta<typeof Text> = {\n  title: \"refresh-components/texts/Text\",\n  component: Text,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Text>;\n\nexport const Default: Story = {\n  args: {\n    children: \"Hello, this is some default text.\",\n  },\n};\n\nexport const Colors: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 8 }}>\n      <Text text01 mainUiBody>\n        text01 — Primary text color\n      </Text>\n      <Text text02 mainUiBody>\n        text02 — Secondary text color\n      </Text>\n      <Text text03 mainUiBody>\n        text03 — Tertiary text color\n      </Text>\n      <Text text04 mainUiBody>\n        text04 — Quaternary text color\n      </Text>\n      <Text text05 mainUiBody>\n        text05 — Quinary text color\n      </Text>\n    </div>\n  ),\n};\n\nexport const Typography: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 12 }}>\n      <Text headingH2>Heading H2</Text>\n      <Text mainContentBody>Main Content Body</Text>\n      <Text mainUiBody>Main UI Body</Text>\n      <Text secondaryBody>Secondary Body</Text>\n    </div>\n  ),\n};\n\nexport const Emphasis: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", flexDirection: \"column\", gap: 8 }}>\n      <Text mainContentEmphasis>Main Content Emphasis</Text>\n      <Text mainUiAction>Main UI Action</Text>\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/texts/Text.tsx",
    "content": "import type { HTMLAttributes } from \"react\";\n\nimport { cn } from \"@/lib/utils\";\n\nconst fonts = {\n  // Heading\n  headingH1: \"font-heading-h1\",\n  headingH2: \"font-heading-h2\",\n  headingH3: \"font-heading-h3\",\n  headingH3Muted: \"font-heading-h3-muted\",\n\n  // Main Content\n  mainContentBody: \"font-main-content-body\",\n  mainContentMuted: \"font-main-content-muted\",\n  mainContentEmphasis: \"font-main-content-emphasis\",\n  mainContentMono: \"font-main-content-mono\",\n\n  // Main UI\n  mainUiBody: \"font-main-ui-body\",\n  mainUiMuted: \"font-main-ui-muted\",\n  mainUiAction: \"font-main-ui-action\",\n  mainUiMono: \"font-main-ui-mono\",\n\n  // Secondary\n  secondaryBody: \"font-secondary-body\",\n  secondaryAction: \"font-secondary-action\",\n  secondaryMono: \"font-secondary-mono\",\n  secondaryMonoLabel: \"font-secondary-mono-label\",\n\n  // Figure\n  figureSmallLabel: \"font-figure-small-label\",\n  figureSmallValue: \"font-figure-small-value\",\n  figureKeystroke: \"font-figure-keystroke\",\n};\n\nconst colors = {\n  text05: \"text-text-05\",\n  text04: \"text-text-04\",\n  text03: \"text-text-03\",\n  text02: \"text-text-02\",\n  text01: \"text-text-01\",\n  textLight03: \"text-text-light-03\",\n  textLight05: \"text-text-light-05\",\n  textDark03: \"text-text-dark-03\",\n  textDark05: \"text-text-dark-05\",\n\n  inverted: {\n    text05: \"text-text-inverted-05\",\n    text04: \"text-text-inverted-04\",\n    text03: \"text-text-inverted-03\",\n    text02: \"text-text-inverted-02\",\n    text01: \"text-text-inverted-01\",\n    textLight03: \"text-text-light-03\",\n    textLight05: \"text-text-light-05\",\n    textDark03: \"text-text-dark-03\",\n    textDark05: \"text-text-dark-05\",\n  },\n};\n\nexport interface TextProps extends Omit<HTMLAttributes<HTMLElement>, \"as\"> {\n  nowrap?: boolean;\n\n  // Fonts\n  headingH1?: boolean;\n  headingH2?: boolean;\n  headingH3?: boolean;\n  headingH3Muted?: boolean;\n  mainContentBody?: boolean;\n  mainContentMuted?: boolean;\n  mainContentEmphasis?: boolean;\n  mainContentMono?: boolean;\n  mainUiBody?: boolean;\n  mainUiMuted?: boolean;\n  mainUiAction?: boolean;\n  mainUiMono?: boolean;\n  secondaryBody?: boolean;\n  secondaryAction?: boolean;\n  secondaryMono?: boolean;\n  secondaryMonoLabel?: boolean;\n  figureSmallLabel?: boolean;\n  figureSmallValue?: boolean;\n  figureKeystroke?: boolean;\n\n  // Colors\n  text05?: boolean;\n  text04?: boolean;\n  text03?: boolean;\n  text02?: boolean;\n  text01?: boolean;\n  inverted?: boolean;\n  textLight03?: boolean;\n  textLight05?: boolean;\n  textDark03?: boolean;\n  textDark05?: boolean;\n\n  // Tag type override\n  as?: \"p\" | \"span\" | \"li\";\n}\n\nexport default function Text({\n  nowrap,\n  headingH1,\n  headingH2,\n  headingH3,\n  headingH3Muted,\n  mainContentBody,\n  mainContentMuted,\n  mainContentEmphasis,\n  mainContentMono,\n  mainUiBody,\n  mainUiMuted,\n  mainUiAction,\n  mainUiMono,\n  secondaryBody,\n  secondaryAction,\n  secondaryMono,\n  secondaryMonoLabel,\n  figureSmallLabel,\n  figureSmallValue,\n  figureKeystroke,\n  text05,\n  text04,\n  text03,\n  text02,\n  text01,\n  inverted,\n  textLight03,\n  textLight05,\n  textDark03,\n  textDark05,\n  children,\n  className,\n  as,\n  ...rest\n}: TextProps) {\n  const font = headingH1\n    ? \"headingH1\"\n    : headingH2\n      ? \"headingH2\"\n      : headingH3\n        ? \"headingH3\"\n        : headingH3Muted\n          ? \"headingH3Muted\"\n          : mainContentBody\n            ? \"mainContentBody\"\n            : mainContentMuted\n              ? \"mainContentMuted\"\n              : mainContentEmphasis\n                ? \"mainContentEmphasis\"\n                : mainContentMono\n                  ? \"mainContentMono\"\n                  : mainUiBody\n                    ? \"mainUiBody\"\n                    : mainUiMuted\n                      ? \"mainUiMuted\"\n                      : mainUiAction\n                        ? \"mainUiAction\"\n                        : mainUiMono\n                          ? \"mainUiMono\"\n                          : secondaryBody\n                            ? \"secondaryBody\"\n                            : secondaryAction\n                              ? \"secondaryAction\"\n                              : secondaryMono\n                                ? \"secondaryMono\"\n                                : secondaryMonoLabel\n                                  ? \"secondaryMonoLabel\"\n                                  : figureSmallLabel\n                                    ? \"figureSmallLabel\"\n                                    : figureSmallValue\n                                      ? \"figureSmallValue\"\n                                      : figureKeystroke\n                                        ? \"figureKeystroke\"\n                                        : \"mainUiBody\";\n\n  const color = text01\n    ? \"text01\"\n    : text02\n      ? \"text02\"\n      : text03\n        ? \"text03\"\n        : text04\n          ? \"text04\"\n          : text05\n            ? \"text05\"\n            : textLight03\n              ? \"textLight03\"\n              : textLight05\n                ? \"textLight05\"\n                : textDark03\n                  ? \"textDark03\"\n                  : textDark05\n                    ? \"textDark05\"\n                    : \"text05\";\n\n  const Tag = as ?? \"span\";\n\n  return (\n    <Tag\n      {...rest}\n      className={cn(\n        fonts[font],\n        inverted ? colors.inverted[color] : colors[color],\n        nowrap && \"whitespace-nowrap\",\n        className\n      )}\n    >\n      {children}\n    </Tag>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/texts/Truncated.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport Truncated from \"./Truncated\";\n\nconst meta: Meta<typeof Truncated> = {\n  title: \"refresh-components/texts/Truncated\",\n  component: Truncated,\n  tags: [\"autodocs\"],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof Truncated>;\n\nexport const ShortText: Story = {\n  args: {\n    children: \"Short text that fits.\",\n    mainUiBody: true,\n    text04: true,\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 300 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport const LongText: Story = {\n  args: {\n    children:\n      \"This is a very long piece of text that will definitely get truncated because it exceeds the width of the container and should show a tooltip on hover.\",\n    mainUiBody: true,\n    text04: true,\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 200 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport const TooltipDisabled: Story = {\n  args: {\n    children:\n      \"Long text but tooltip is disabled so it won't appear even when truncated.\",\n    mainUiBody: true,\n    text03: true,\n    disable: true,\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 200 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport const CustomTooltipSide: Story = {\n  args: {\n    children:\n      \"Hover to see the tooltip appear on the right side instead of the default top.\",\n    mainUiBody: true,\n    text04: true,\n    side: \"right\",\n  },\n  decorators: [\n    (Story) => (\n      <div style={{ width: 200, paddingTop: 40 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n"
  },
  {
    "path": "web/src/refresh-components/texts/Truncated.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useRef, useCallback, useLayoutEffect } from \"react\";\nimport { TextProps } from \"@/refresh-components/texts/Text\";\nimport {\n  Tooltip,\n  TooltipContent,\n  TooltipTrigger,\n  TooltipProvider,\n} from \"@/components/ui/tooltip\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\n\n/**\n * Hook to detect if text is truncated by comparing visible width vs full width\n */\nfunction useTruncated(children: React.ReactNode) {\n  const [isTruncated, setIsTruncated] = useState(false);\n  const visibleRef = useRef<HTMLDivElement>(null);\n  const hiddenRef = useRef<HTMLDivElement>(null);\n\n  useLayoutEffect(() => {\n    function checkTruncation() {\n      if (visibleRef.current && hiddenRef.current) {\n        const visibleWidth = visibleRef.current.offsetWidth;\n        const fullTextWidth = hiddenRef.current.offsetWidth;\n        setIsTruncated(fullTextWidth > visibleWidth);\n      }\n    }\n\n    // Use a small delay to ensure DOM is ready\n    const timeoutId = setTimeout(checkTruncation, 0);\n\n    window.addEventListener(\"resize\", checkTruncation);\n    return () => {\n      clearTimeout(timeoutId);\n      window.removeEventListener(\"resize\", checkTruncation);\n    };\n  }, [children]);\n\n  return { isTruncated, visibleRef, hiddenRef };\n}\n\nexport interface TruncatedProps extends TextProps {\n  side?: \"top\" | \"right\" | \"bottom\" | \"left\";\n  sideOffset?: number;\n  disable?: boolean;\n}\n\n/**\n * Renders passed in text on a single line. If text is truncated,\n * shows a tooltip on hover with the full text.\n */\nexport default function Truncated({\n  side = \"top\",\n  sideOffset,\n  disable,\n  children,\n  className,\n  ...rest\n}: TruncatedProps) {\n  const { isTruncated, visibleRef, hiddenRef } = useTruncated(children);\n\n  const text = (\n    <Text\n      as=\"p\"\n      className={cn(\"line-clamp-1 break-all text-left\", className)}\n      {...rest}\n    >\n      {children}\n    </Text>\n  );\n\n  const showTooltip = !disable && isTruncated;\n\n  // Radix's composeEventHandlers skips its internal handler when\n  // event.defaultPrevented is true. When there is nothing to show we\n  // block onPointerMove so the inner Tooltip never starts its open-delay\n  // timer and therefore never dispatches the global \"tooltip.open\" custom\n  // event that would close any *outer* tooltip wrapping this component.\n  const blockPointerWhenInert = useCallback(\n    (e: React.PointerEvent) => {\n      if (!showTooltip) e.preventDefault();\n    },\n    [showTooltip]\n  );\n\n  return (\n    <>\n      <TooltipProvider>\n        <Tooltip>\n          <div\n            ref={visibleRef}\n            className=\"flex-grow overflow-hidden text-left w-full\"\n          >\n            <TooltipTrigger asChild>\n              <div onPointerMove={blockPointerWhenInert}>{text}</div>\n            </TooltipTrigger>\n          </div>\n\n          {showTooltip && (\n            <TooltipContent\n              side={side}\n              sideOffset={sideOffset}\n              className=\"max-w-[400px] break-words whitespace-normal\"\n            >\n              {typeof children === \"string\" ? (\n                <Text as=\"p\" textLight05>\n                  {children}\n                </Text>\n              ) : (\n                children\n              )}\n            </TooltipContent>\n          )}\n        </Tooltip>\n      </TooltipProvider>\n\n      {/*\n        Hide offscreen to measure full text width\n\n        # Note\n\n        The placement of this `div` *after* the above `TooltipProvider` is *VERY* important to our tests!\n        If the bottom `div` were placed first, any tests that try locating the string that the `Truncated` component is trying to render would find the bottom div first.\n        This can break expectations (since it's supposed to be hidden in the first place).\n\n        All in all, keep the below `div` *below* the above `TooltipProvider`.\n\n        - @raunakab\n      */}\n      <div\n        ref={hiddenRef}\n        className=\"fixed left-[-9999px] top-[0rem] whitespace-nowrap pointer-events-none opacity-0\"\n        aria-hidden=\"true\"\n      >\n        {text}\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/tiles/ButtonTile.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport ButtonTile from \"./ButtonTile\";\nimport { SvgArrowRight, SvgPlus, SvgSettings, SvgSearch } from \"@opal/icons\";\n\nconst meta: Meta<typeof ButtonTile> = {\n  title: \"refresh-components/tiles/ButtonTile\",\n  component: ButtonTile,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <div style={{ maxWidth: 300 }}>\n        <Story />\n      </div>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof ButtonTile>;\n\nexport const Default: Story = {\n  args: {\n    title: \"Create New\",\n    description: \"Start from scratch\",\n    icon: SvgArrowRight,\n    onClick: () => {},\n  },\n};\n\nexport const TitleOnly: Story = {\n  args: {\n    title: \"Quick Action\",\n    icon: SvgPlus,\n    onClick: () => {},\n  },\n};\n\nexport const DescriptionOnly: Story = {\n  args: {\n    description: \"Click to configure settings\",\n    icon: SvgSettings,\n    onClick: () => {},\n  },\n};\n\nexport const NoIcon: Story = {\n  args: {\n    title: \"Simple Tile\",\n    description: \"Without an icon\",\n    onClick: () => {},\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    title: \"Unavailable\",\n    description: \"This feature is not enabled\",\n    icon: SvgSettings,\n    disabled: true,\n  },\n};\n\nexport const TileGrid: Story = {\n  render: () => (\n    <div\n      style={{\n        display: \"grid\",\n        gridTemplateColumns: \"1fr 1fr\",\n        gap: 8,\n        maxWidth: 500,\n      }}\n    >\n      <ButtonTile\n        title=\"Search\"\n        description=\"Find documents\"\n        icon={SvgSearch}\n        onClick={() => {}}\n      />\n      <ButtonTile\n        title=\"Create\"\n        description=\"New document\"\n        icon={SvgPlus}\n        onClick={() => {}}\n      />\n      <ButtonTile\n        title=\"Settings\"\n        description=\"Configure\"\n        icon={SvgSettings}\n        onClick={() => {}}\n      />\n      <ButtonTile\n        title=\"Disabled\"\n        description=\"Not available\"\n        icon={SvgArrowRight}\n        disabled\n      />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/tiles/ButtonTile.tsx",
    "content": "import type { FunctionComponent } from \"react\";\n\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Interactive } from \"@opal/core\";\nimport type { IconProps } from \"@opal/types\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface ButtonTileProps {\n  title?: string;\n  description?: string;\n  icon?: FunctionComponent<IconProps>;\n  onClick?: () => void;\n  disabled?: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// ButtonTile\n// ---------------------------------------------------------------------------\n\nexport default function ButtonTile({\n  title,\n  description,\n  icon,\n  onClick,\n  disabled,\n}: ButtonTileProps) {\n  const Icon = icon;\n\n  return (\n    <Interactive.Stateless\n      variant=\"default\"\n      prominence=\"secondary\"\n      group=\"group/Tile\"\n      disabled={disabled}\n      onClick={onClick}\n    >\n      <div className={cn(\"rounded-08 p-1.5\", \"flex flex-row gap-2\")}>\n        {(title || description) && (\n          <div className=\"min-w-0 flex flex-col px-0.5\">\n            {title && (\n              <Text\n                secondaryAction\n                text02={disabled}\n                text04={!disabled}\n                className=\"truncate\"\n              >\n                {title}\n              </Text>\n            )}\n            {description && (\n              <Text secondaryBody text02={disabled} text03={!disabled}>\n                {description}\n              </Text>\n            )}\n          </div>\n        )}\n\n        {Icon && (\n          <div className=\"flex items-start justify-center\">\n            <Icon\n              size={16}\n              className={cn(\n                disabled\n                  ? \"stroke-text-01\"\n                  : \"stroke-text-03 group-hover/Tile:stroke-text-04\"\n              )}\n            />\n          </div>\n        )}\n      </div>\n    </Interactive.Stateless>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-components/tiles/FileTile.stories.tsx",
    "content": "import type { Meta, StoryObj } from \"@storybook/react\";\nimport FileTile from \"./FileTile\";\nimport { SvgTextLines, SvgFiles } from \"@opal/icons\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\n\nconst meta: Meta<typeof FileTile> = {\n  title: \"refresh-components/tiles/FileTile\",\n  component: FileTile,\n  tags: [\"autodocs\"],\n  decorators: [\n    (Story) => (\n      <TooltipPrimitive.Provider>\n        <div style={{ maxWidth: 300 }}>\n          <Story />\n        </div>\n      </TooltipPrimitive.Provider>\n    ),\n  ],\n};\n\nexport default meta;\ntype Story = StoryObj<typeof FileTile>;\n\nexport const Default: Story = {\n  args: {\n    title: \"document.pdf\",\n    description: \"Project proposal document\",\n    icon: SvgTextLines,\n  },\n};\n\nexport const WithOpen: Story = {\n  args: {\n    title: \"report.xlsx\",\n    description: \"Quarterly report\",\n    icon: SvgFiles,\n    onOpen: () => {},\n  },\n};\n\nexport const WithRemove: Story = {\n  args: {\n    title: \"notes.md\",\n    description: \"Meeting notes\",\n    icon: SvgTextLines,\n    onRemove: () => {},\n  },\n};\n\nexport const Processing: Story = {\n  args: {\n    title: \"uploading.pdf\",\n    description: \"Processing...\",\n    icon: SvgTextLines,\n    state: \"processing\",\n  },\n};\n\nexport const Disabled: Story = {\n  args: {\n    title: \"locked.pdf\",\n    description: \"Access denied\",\n    icon: SvgFiles,\n    state: \"disabled\",\n  },\n};\n\nexport const TitleOnly: Story = {\n  args: {\n    title: \"image.png\",\n    icon: SvgFiles,\n  },\n};\n\nexport const DefaultIcon: Story = {\n  args: {\n    title: \"unknown-file\",\n    description: \"Uses default text lines icon\",\n  },\n};\n\nexport const FileList: Story = {\n  render: () => (\n    <div style={{ display: \"flex\", gap: 8, flexWrap: \"wrap\" }}>\n      <FileTile\n        title=\"proposal.pdf\"\n        description=\"2.4 MB\"\n        icon={SvgTextLines}\n        onOpen={() => {}}\n        onRemove={() => {}}\n      />\n      <FileTile\n        title=\"report.xlsx\"\n        description=\"1.1 MB\"\n        icon={SvgFiles}\n        onOpen={() => {}}\n      />\n      <FileTile\n        title=\"uploading.doc\"\n        description=\"Processing...\"\n        icon={SvgTextLines}\n        state=\"processing\"\n      />\n      <FileTile\n        title=\"locked.pdf\"\n        description=\"No access\"\n        icon={SvgFiles}\n        state=\"disabled\"\n      />\n    </div>\n  ),\n};\n"
  },
  {
    "path": "web/src/refresh-components/tiles/FileTile.tsx",
    "content": "import type { FunctionComponent } from \"react\";\n\nimport { cn, noProp } from \"@/lib/utils\";\nimport { SvgMaximize2, SvgTextLines, SvgX } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\nimport { Hoverable } from \"@opal/core\";\nimport IconButton from \"../buttons/IconButton\";\nimport Text from \"../texts/Text\";\nimport Truncated from \"../texts/Truncated\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport type FileTileState = \"default\" | \"processing\" | \"disabled\";\n\ninterface FileTileProps {\n  title?: string;\n  description?: string;\n  icon?: FunctionComponent<IconProps>;\n  onRemove?: () => void;\n  onOpen?: () => void;\n  state?: FileTileState;\n}\n\n// ---------------------------------------------------------------------------\n// RemoveButton (internal)\n// ---------------------------------------------------------------------------\n\ninterface RemoveButtonProps {\n  onRemove: () => void;\n}\n\nfunction RemoveButton({ onRemove }: RemoveButtonProps) {\n  return (\n    <div\n      className={cn(\n        \"absolute -left-1 -top-1 z-10\",\n        \"pointer-events-none focus-within:pointer-events-auto\"\n      )}\n    >\n      <Hoverable.Item group=\"fileTile\" variant=\"opacity-on-hover\">\n        <button\n          type=\"button\"\n          onClick={(e) => {\n            e.stopPropagation();\n            onRemove();\n          }}\n          title=\"Remove\"\n          aria-label=\"Remove\"\n          className={cn(\n            \"h-4 w-4\",\n            \"flex items-center justify-center\",\n            \"rounded-full bg-theme-primary-05 text-text-inverted-05\",\n            \"pointer-events-auto\"\n          )}\n        >\n          <SvgX size={10} />\n        </button>\n      </Hoverable.Item>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// FileTile\n// ---------------------------------------------------------------------------\n\nexport default function FileTile({\n  title,\n  description,\n  icon,\n  onRemove,\n  onOpen,\n  state = \"default\",\n}: FileTileProps) {\n  const Icon = icon ?? SvgTextLines;\n  const isMuted = state === \"processing\" || state === \"disabled\";\n\n  return (\n    <Hoverable.Root group=\"fileTile\" widthVariant=\"fit\">\n      <div\n        onClick={onOpen && state !== \"disabled\" ? () => onOpen() : undefined}\n        className={cn(\n          \"relative min-w-[7.5rem] max-w-[15rem] h-full\",\n          \"border rounded-12 p-1\",\n          \"flex flex-row items-center\",\n          \"transition-colors duration-150\",\n          // Outer container bg + border per state\n          isMuted\n            ? \"bg-background-neutral-02 border-border-01\"\n            : \"bg-background-tint-00 border-border-01\",\n          // Hover overrides (disabled gets none)\n          state !== \"disabled\" && \"hover:border-border-02\",\n          state === \"default\" && \"hover:bg-background-tint-02\",\n          // Clickable cursor when onOpen is provided and not disabled\n          onOpen && state !== \"disabled\" && \"cursor-pointer\"\n        )}\n      >\n        {onRemove && <RemoveButton onRemove={onRemove} />}\n\n        <div\n          className={cn(\n            \"shrink-0 h-9 w-9 rounded-08\",\n            \"flex items-center justify-center\",\n            isMuted ? \"bg-background-neutral-03\" : \"bg-background-tint-01\"\n          )}\n        >\n          <Icon\n            size={16}\n            className={cn(isMuted ? \"stroke-text-01\" : \"stroke-text-02\")}\n          />\n        </div>\n\n        {(title || description || onOpen) && (\n          <div className=\"min-w-0 flex pl-1 w-full justify-between h-full\">\n            {isMuted ? (\n              <div className=\"flex flex-col min-w-0\">\n                {title && (\n                  <Truncated\n                    secondaryAction\n                    text02\n                    className={cn(\n                      \"truncate\",\n                      state === \"processing\" && \"hover:text-text-03\"\n                    )}\n                  >\n                    {title}\n                  </Truncated>\n                )}\n                {description && (\n                  <Text\n                    secondaryBody\n                    text02\n                    className={cn(\n                      \"line-clamp-2\",\n                      state === \"processing\" && \"hover:text-text-03\"\n                    )}\n                  >\n                    {description}\n                  </Text>\n                )}\n              </div>\n            ) : (\n              <div className=\"flex flex-col min-w-0\">\n                {title && (\n                  <Truncated secondaryAction text04 className=\"truncate\">\n                    {title}\n                  </Truncated>\n                )}\n                {description && (\n                  <Text secondaryBody text03 className=\"line-clamp-2\">\n                    {description}\n                  </Text>\n                )}\n              </div>\n            )}\n            {onOpen && (\n              <div className=\"h-full\">\n                <IconButton\n                  small\n                  icon={SvgMaximize2}\n                  onClick={noProp(onOpen)}\n                />\n              </div>\n            )}\n          </div>\n        )}\n      </div>\n    </Hoverable.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/AgentEditorPage.tsx",
    "content": "\"use client\";\n\nimport { useState, useRef, useEffect, useCallback } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport * as GeneralLayouts from \"@/layouts/general-layouts\";\nimport Button from \"@/refresh-components/buttons/Button\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { FullPersona } from \"@/app/admin/agents/interfaces\";\nimport { buildImgUrl } from \"@/app/app/components/files/images/utils\";\nimport { Formik, Form, FieldArray } from \"formik\";\nimport * as Yup from \"yup\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\nimport InputTypeInElementField from \"@/refresh-components/form/InputTypeInElementField\";\nimport InputDatePickerField from \"@/refresh-components/form/InputDatePickerField\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport Separator from \"@/refresh-components/Separator\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { useFormikContext } from \"formik\";\nimport LLMSelector from \"@/components/llm/LLMSelector\";\nimport { parseLlmDescriptor, structureValue } from \"@/lib/llmConfig/utils\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport {\n  STARTER_MESSAGES_EXAMPLES,\n  MAX_CHARACTERS_STARTER_MESSAGE,\n  MAX_CHARACTERS_AGENT_DESCRIPTION,\n} from \"@/lib/constants\";\nimport {\n  IMAGE_GENERATION_TOOL_ID,\n  WEB_SEARCH_TOOL_ID,\n  PYTHON_TOOL_ID,\n  SEARCH_TOOL_ID,\n  OPEN_URL_TOOL_ID,\n} from \"@/app/app/components/tools/constants\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Card } from \"@/refresh-components/cards\";\nimport SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\nimport SwitchField from \"@/refresh-components/form/SwitchField\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { useDocumentSets } from \"@/app/admin/documents/sets/hooks\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { toast } from \"@/hooks/useToast\";\nimport UserFilesModal from \"@/components/modals/UserFilesModal\";\nimport {\n  ProjectFile,\n  UserFileStatus,\n} from \"@/app/app/projects/projectsService\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport {\n  SvgActions,\n  SvgExpand,\n  SvgFold,\n  SvgImage,\n  SvgLock,\n  SvgOnyxOctagon,\n  SvgSliders,\n  SvgUsers,\n  SvgTrash,\n} from \"@opal/icons\";\nimport CustomAgentAvatar, {\n  agentAvatarIconMap,\n} from \"@/refresh-components/avatars/CustomAgentAvatar\";\nimport InputAvatar from \"@/refresh-components/inputs/InputAvatar\";\nimport SquareButton from \"@/refresh-components/buttons/SquareButton\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport {\n  createPersona,\n  updatePersona,\n  PersonaUpsertParameters,\n} from \"@/app/admin/agents/lib\";\nimport useMcpServersForAgentEditor from \"@/hooks/useMcpServersForAgentEditor\";\nimport useOpenApiTools from \"@/hooks/useOpenApiTools\";\nimport { useAvailableTools } from \"@/hooks/useAvailableTools\";\nimport * as ActionsLayouts from \"@/layouts/actions-layouts\";\nimport * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport { MCPServer, MCPTool, ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport useFilter from \"@/hooks/useFilter\";\nimport EnabledCount from \"@/refresh-components/EnabledCount\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport { isDateInFuture } from \"@/lib/dateUtils\";\nimport {\n  deleteAgent,\n  updateAgentFeaturedStatus,\n  updateAgentSharedStatus,\n} from \"@/lib/agents\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport ShareAgentModal from \"@/sections/modals/ShareAgentModal\";\nimport AgentKnowledgePane from \"@/sections/knowledge/AgentKnowledgePane\";\nimport { ValidSources } from \"@/lib/types\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\n\ninterface AgentIconEditorProps {\n  existingAgent?: FullPersona | null;\n}\n\nfunction FormWarningsEffect() {\n  const { values, setStatus } = useFormikContext<{\n    web_search: boolean;\n    open_url: boolean;\n  }>();\n\n  useEffect(() => {\n    const warnings: Record<string, string> = {};\n    if (values.web_search && !values.open_url) {\n      warnings.open_url =\n        \"Web Search without the ability to open URLs can lead to significantly worse web based results.\";\n    }\n    setStatus({ warnings });\n  }, [values.web_search, values.open_url, setStatus]);\n\n  return null;\n}\n\nfunction AgentIconEditor({ existingAgent }: AgentIconEditorProps) {\n  const { values, setFieldValue } = useFormikContext<{\n    name: string;\n    icon_name: string | null;\n    uploaded_image_id: string | null;\n    remove_image: boolean | null;\n  }>();\n  const [uploadedImagePreview, setUploadedImagePreview] = useState<\n    string | null\n  >(null);\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const fileInputRef = useRef<HTMLInputElement | null>(null);\n\n  async function handleImageUpload(e: React.ChangeEvent<HTMLInputElement>) {\n    const file = e.target.files?.[0];\n    if (!file) return;\n\n    // Clear previous preview to free memory\n    setUploadedImagePreview(null);\n\n    // Clear selected icon and remove_image flag when uploading an image\n    setFieldValue(\"icon_name\", null);\n    setFieldValue(\"remove_image\", false);\n\n    // Show preview immediately\n    const reader = new FileReader();\n    reader.onloadend = () => {\n      setUploadedImagePreview(reader.result as string);\n    };\n    reader.readAsDataURL(file);\n\n    // Upload the file\n    try {\n      const formData = new FormData();\n      formData.append(\"file\", file);\n      const response = await fetch(\"/api/admin/persona/upload-image\", {\n        method: \"POST\",\n        body: formData,\n      });\n\n      if (!response.ok) {\n        console.error(\"Failed to upload image\");\n        setUploadedImagePreview(null);\n        return;\n      }\n\n      const { file_id } = await response.json();\n      setFieldValue(\"uploaded_image_id\", file_id);\n      setPopoverOpen(false);\n    } catch (error) {\n      console.error(\"Upload error:\", error);\n      setUploadedImagePreview(null);\n    }\n  }\n\n  const imageSrc = uploadedImagePreview\n    ? uploadedImagePreview\n    : values.uploaded_image_id\n      ? buildImgUrl(values.uploaded_image_id)\n      : values.icon_name\n        ? undefined\n        : values.remove_image\n          ? undefined\n          : existingAgent?.uploaded_image_id\n            ? buildImgUrl(existingAgent.uploaded_image_id)\n            : undefined;\n\n  function handleIconClick(iconName: string | null) {\n    setFieldValue(\"icon_name\", iconName);\n    setFieldValue(\"uploaded_image_id\", null);\n    setFieldValue(\"remove_image\", true);\n    setUploadedImagePreview(null);\n    setPopoverOpen(false);\n\n    // Reset the file input so the same file can be uploaded again later\n    if (fileInputRef.current) {\n      fileInputRef.current.value = \"\";\n    }\n  }\n\n  return (\n    <>\n      <input\n        ref={fileInputRef}\n        type=\"file\"\n        accept=\"image/*\"\n        onChange={handleImageUpload}\n        className=\"hidden\"\n      />\n\n      <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>\n        <Popover.Trigger asChild>\n          <Hoverable.Root group=\"inputAvatar\" widthVariant=\"fit\">\n            <InputAvatar className=\"relative flex flex-col items-center justify-center h-[7.5rem] w-[7.5rem]\">\n              {/* We take the `InputAvatar`'s height/width (in REM) and multiply it by 16 (the REM -> px conversion factor). */}\n              <CustomAgentAvatar\n                size={imageSrc ? 7.5 * 16 : 40}\n                src={imageSrc}\n                iconName={values.icon_name ?? undefined}\n                name={values.name}\n              />\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <div className=\"absolute bottom-0 left-1/2 -translate-x-1/2 mb-2\">\n                <Hoverable.Item group=\"inputAvatar\" variant=\"opacity-on-hover\">\n                  <Button className=\"h-[1.75rem]\" secondary>\n                    Edit\n                  </Button>\n                </Hoverable.Item>\n              </div>\n            </InputAvatar>\n          </Hoverable.Root>\n        </Popover.Trigger>\n        <Popover.Content>\n          <PopoverMenu>\n            {[\n              <LineItem\n                key=\"upload-image\"\n                icon={SvgImage}\n                onClick={() => fileInputRef.current?.click()}\n                emphasized\n              >\n                Upload Image\n              </LineItem>,\n              null,\n              <div className=\"grid grid-cols-4 gap-1\">\n                <SquareButton\n                  key=\"default-icon\"\n                  icon={() => (\n                    <CustomAgentAvatar name={values.name} size={30} />\n                  )}\n                  onClick={() => handleIconClick(null)}\n                  transient={!imageSrc && values.icon_name === null}\n                />\n                {Object.keys(agentAvatarIconMap).map((iconName) => (\n                  <SquareButton\n                    key={iconName}\n                    onClick={() => handleIconClick(iconName)}\n                    icon={() => (\n                      <CustomAgentAvatar iconName={iconName} size={30} />\n                    )}\n                    transient={values.icon_name === iconName}\n                  />\n                ))}\n              </div>,\n            ]}\n          </PopoverMenu>\n        </Popover.Content>\n      </Popover>\n    </>\n  );\n}\n\ninterface OpenApiToolCardProps {\n  tool: ToolSnapshot;\n}\n\nfunction OpenApiToolCard({ tool }: OpenApiToolCardProps) {\n  const toolFieldName = `openapi_tool_${tool.id}`;\n\n  return (\n    <ExpandableCard.Root defaultFolded>\n      <ActionsLayouts.Header\n        title={tool.display_name || tool.name}\n        description={tool.description}\n        icon={SvgActions}\n        rightChildren={<SwitchField name={toolFieldName} />}\n      />\n    </ExpandableCard.Root>\n  );\n}\n\ninterface MCPServerCardProps {\n  server: MCPServer;\n  tools: MCPTool[];\n  isLoading: boolean;\n}\n\nfunction MCPServerCard({\n  server,\n  tools: enabledTools,\n  isLoading,\n}: MCPServerCardProps) {\n  const [isFolded, setIsFolded] = useState(false);\n  const { values, setFieldValue, getFieldMeta } = useFormikContext<any>();\n  const serverFieldName = `mcp_server_${server.id}`;\n  const isServerEnabled = values[serverFieldName]?.enabled ?? false;\n  const {\n    query,\n    setQuery,\n    filtered: filteredTools,\n  } = useFilter(enabledTools, (tool) => `${tool.name} ${tool.description}`);\n\n  // Calculate enabled and total tool counts\n  const enabledCount = enabledTools.filter((tool) => {\n    const toolFieldValue = values[serverFieldName]?.[`tool_${tool.id}`];\n    return toolFieldValue === true;\n  }).length;\n\n  return (\n    <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>\n      <ActionsLayouts.Header\n        title={server.name}\n        description={server.description}\n        icon={getActionIcon(server.server_url, server.name)}\n        rightChildren={\n          <GeneralLayouts.Section\n            flexDirection=\"row\"\n            gap={0.5}\n            alignItems=\"start\"\n          >\n            <EnabledCount\n              enabledCount={enabledCount}\n              totalCount={enabledTools.length}\n            />\n            <SwitchField\n              name={`${serverFieldName}.enabled`}\n              onCheckedChange={(checked) => {\n                enabledTools.forEach((tool) => {\n                  setFieldValue(`${serverFieldName}.tool_${tool.id}`, checked);\n                });\n                if (!checked) return;\n                setIsFolded(false);\n              }}\n            />\n          </GeneralLayouts.Section>\n        }\n      >\n        <GeneralLayouts.Section flexDirection=\"row\" gap={0.5}>\n          <InputTypeIn\n            placeholder=\"Search tools...\"\n            variant=\"internal\"\n            leftSearchIcon\n            value={query}\n            onChange={(e) => setQuery(e.target.value)}\n          />\n          {enabledTools.length > 0 && (\n            <OpalButton\n              prominence=\"internal\"\n              rightIcon={isFolded ? SvgExpand : SvgFold}\n              onClick={() => setIsFolded((prev) => !prev)}\n            >\n              {isFolded ? \"Expand\" : \"Fold\"}\n            </OpalButton>\n          )}\n        </GeneralLayouts.Section>\n      </ActionsLayouts.Header>\n      {isLoading ? (\n        <ActionsLayouts.Content>\n          <GeneralLayouts.Section padding={1}>\n            <SimpleLoader />\n          </GeneralLayouts.Section>\n        </ActionsLayouts.Content>\n      ) : (\n        enabledTools.length > 0 &&\n        filteredTools.length > 0 && (\n          <ActionsLayouts.Content>\n            {filteredTools.map((tool) => (\n              <ActionsLayouts.Tool\n                key={tool.id}\n                name={`${serverFieldName}.tool_${tool.id}`}\n                title={tool.name}\n                description={tool.description}\n                icon={tool.icon ?? SvgSliders}\n                disabled={\n                  !tool.isAvailable ||\n                  !getFieldMeta<boolean>(`${serverFieldName}.enabled`).value\n                }\n                rightChildren={\n                  <SwitchField\n                    name={`${serverFieldName}.tool_${tool.id}`}\n                    disabled={!isServerEnabled}\n                  />\n                }\n              />\n            ))}\n          </ActionsLayouts.Content>\n        )\n      )}\n    </ExpandableCard.Root>\n  );\n}\n\nfunction StarterMessages() {\n  const max_starters = STARTER_MESSAGES_EXAMPLES.length;\n\n  const { values } = useFormikContext<{\n    starter_messages: string[];\n  }>();\n\n  const starters = values.starter_messages || [];\n\n  // Count how many non-empty starters we have\n  const filledStarters = starters.filter((s) => s).length;\n  const canAddMore = filledStarters < max_starters;\n\n  // Show at least 1, or all filled ones, or filled + 1 empty (up to max)\n  const visibleCount = Math.min(\n    max_starters,\n    Math.max(\n      1,\n      filledStarters === 0 ? 1 : filledStarters + (canAddMore ? 1 : 0)\n    )\n  );\n\n  return (\n    <FieldArray name=\"starter_messages\">\n      {(arrayHelpers) => (\n        <GeneralLayouts.Section gap={0.5}>\n          {Array.from({ length: visibleCount }, (_, i) => (\n            <InputTypeInElementField\n              key={`starter_messages.${i}`}\n              name={`starter_messages.${i}`}\n              placeholder={\n                STARTER_MESSAGES_EXAMPLES[i] ||\n                \"Enter a conversation starter...\"\n              }\n              onRemove={() => arrayHelpers.remove(i)}\n            />\n          ))}\n        </GeneralLayouts.Section>\n      )}\n    </FieldArray>\n  );\n}\n\nexport interface AgentEditorPageProps {\n  agent?: FullPersona;\n  refreshAgent?: () => void;\n}\n\nexport default function AgentEditorPage({\n  agent: existingAgent,\n  refreshAgent,\n}: AgentEditorPageProps) {\n  const router = useRouter();\n  const appRouter = useAppRouter();\n  const { refresh: refreshAgents } = useAgents();\n  const shareAgentModal = useCreateModal();\n  const deleteAgentModal = useCreateModal();\n  const { isAdmin, isCurator } = useUser();\n  const canUpdateFeaturedStatus = isAdmin || isCurator;\n  const vectorDbEnabled = useVectorDbEnabled();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  // LLM Model Selection\n  const getCurrentLlm = useCallback(\n    (values: any, llmProviders: any) =>\n      values.llm_model_version_override && values.llm_model_provider_override\n        ? (() => {\n            const provider = llmProviders?.find(\n              (p: any) => p.name === values.llm_model_provider_override\n            );\n            return structureValue(\n              values.llm_model_provider_override,\n              provider?.provider || \"\",\n              values.llm_model_version_override\n            );\n          })()\n        : null,\n    []\n  );\n\n  const onLlmSelect = useCallback(\n    (selected: string | null, setFieldValue: any) => {\n      if (selected === null) {\n        setFieldValue(\"llm_model_version_override\", null);\n        setFieldValue(\"llm_model_provider_override\", null);\n      } else {\n        const { modelName, name } = parseLlmDescriptor(selected);\n        if (modelName && name) {\n          setFieldValue(\"llm_model_version_override\", modelName);\n          setFieldValue(\"llm_model_provider_override\", name);\n        }\n      }\n    },\n    []\n  );\n\n  // Hooks for Knowledge section\n  const { allRecentFiles, beginUpload } = useProjectsContext();\n  const { data: documentSets } = useDocumentSets();\n  const userFilesModal = useCreateModal();\n  const [presentingDocument, setPresentingDocument] = useState<{\n    document_id: string;\n    semantic_identifier: string;\n  } | null>(null);\n\n  const { mcpData, isLoading: isMcpLoading } = useMcpServersForAgentEditor();\n  const { openApiTools: openApiToolsRaw, isLoading: isOpenApiLoading } =\n    useOpenApiTools();\n  const { llmProviders } = useLLMProviders(existingAgent?.id);\n  const mcpServers = mcpData?.mcp_servers ?? [];\n  const openApiTools = openApiToolsRaw ?? [];\n\n  // Check if the *BUILT-IN* tools are available.\n  // The built-in tools are:\n  // - image-gen\n  // - web-search\n  // - code-interpreter\n  const { tools: availableTools, isLoading: isToolsLoading } =\n    useAvailableTools();\n  const searchTool = availableTools?.find(\n    (t) => t.in_code_tool_id === SEARCH_TOOL_ID\n  );\n  const imageGenTool = availableTools?.find(\n    (t) => t.in_code_tool_id === IMAGE_GENERATION_TOOL_ID\n  );\n  const webSearchTool = availableTools?.find(\n    (t) => t.in_code_tool_id === WEB_SEARCH_TOOL_ID\n  );\n  const openURLTool = availableTools?.find(\n    (t) => t.in_code_tool_id === OPEN_URL_TOOL_ID\n  );\n  const codeInterpreterTool = availableTools?.find(\n    (t) => t.in_code_tool_id === PYTHON_TOOL_ID\n  );\n  const isImageGenerationAvailable = !!imageGenTool;\n  const imageGenerationDisabledTooltip = isImageGenerationAvailable\n    ? undefined\n    : \"Image generation requires a configured model. If you have access, set one up under Settings > Image Generation, or ask an admin.\";\n\n  // Group MCP server tools from availableTools by server ID\n  const mcpServersWithTools = mcpServers.map((server) => {\n    const serverTools: MCPTool[] = (availableTools || [])\n      .filter((tool) => tool.mcp_server_id === server.id)\n      .map((tool) => ({\n        id: tool.id.toString(),\n        icon: getActionIcon(server.server_url, server.name),\n        name: tool.display_name || tool.name,\n        description: tool.description,\n        isAvailable: true,\n        isEnabled: tool.enabled,\n      }));\n\n    return { server, tools: serverTools, isLoading: false };\n  });\n\n  const initialValues = {\n    // General\n    icon_name: existingAgent?.icon_name ?? null,\n    uploaded_image_id: existingAgent?.uploaded_image_id ?? null,\n    remove_image: false,\n    name: existingAgent?.name ?? \"\",\n    description: existingAgent?.description ?? \"\",\n\n    // Prompts\n    instructions: existingAgent?.system_prompt ?? \"\",\n    starter_messages: Array.from(\n      { length: STARTER_MESSAGES_EXAMPLES.length },\n      (_, i) => existingAgent?.starter_messages?.[i]?.message ?? \"\"\n    ),\n\n    // Knowledge - enabled if agent has any knowledge sources attached\n    enable_knowledge:\n      (existingAgent?.document_sets?.length ?? 0) > 0 ||\n      (existingAgent?.hierarchy_nodes?.length ?? 0) > 0 ||\n      (existingAgent?.attached_documents?.length ?? 0) > 0 ||\n      (existingAgent?.user_file_ids?.length ?? 0) > 0,\n    document_set_ids: existingAgent?.document_sets?.map((ds) => ds.id) ?? [],\n    // Individual document IDs from hierarchy browsing\n    document_ids: existingAgent?.attached_documents?.map((doc) => doc.id) ?? [],\n    // Hierarchy node IDs (folders/spaces/channels) for scoped search\n    hierarchy_node_ids:\n      existingAgent?.hierarchy_nodes?.map((node) => node.id) ?? [],\n    user_file_ids: existingAgent?.user_file_ids ?? [],\n    // Selected sources for the new knowledge UI - derived from document sets\n    selected_sources: [] as ValidSources[],\n\n    // Advanced\n    llm_model_provider_override:\n      existingAgent?.llm_model_provider_override ?? null,\n    llm_model_version_override:\n      existingAgent?.llm_model_version_override ?? null,\n    knowledge_cutoff_date: existingAgent?.search_start_date\n      ? new Date(existingAgent.search_start_date)\n      : null,\n    replace_base_system_prompt:\n      existingAgent?.replace_base_system_prompt ?? false,\n    reminders: existingAgent?.task_prompt ?? \"\",\n    // For new agents, default to false for optional tools to avoid\n    // \"Tool not available\" errors when the tool isn't configured.\n    // For existing agents, preserve the current tool configuration.\n    image_generation:\n      !!imageGenTool &&\n      (existingAgent?.tools?.some(\n        (tool) => tool.in_code_tool_id === IMAGE_GENERATION_TOOL_ID\n      ) ??\n        false),\n    web_search:\n      !!webSearchTool &&\n      (existingAgent?.tools?.some(\n        (tool) => tool.in_code_tool_id === WEB_SEARCH_TOOL_ID\n      ) ??\n        false),\n    open_url:\n      !!openURLTool &&\n      (existingAgent?.tools?.some(\n        (tool) => tool.in_code_tool_id === OPEN_URL_TOOL_ID\n      ) ??\n        false),\n    code_interpreter:\n      !!codeInterpreterTool &&\n      (existingAgent?.tools?.some(\n        (tool) => tool.in_code_tool_id === PYTHON_TOOL_ID\n      ) ??\n        false),\n    // MCP servers - dynamically add fields for each server with nested tool fields\n    ...Object.fromEntries(\n      mcpServersWithTools.map(({ server, tools }) => {\n        // Find all tools from existingAgent that belong to this MCP server\n        const serverToolsFromAgent =\n          existingAgent?.tools?.filter(\n            (tool) => tool.mcp_server_id === server.id\n          ) ?? [];\n\n        // Build the tool field object with tool_{id} for ALL available tools\n        const toolFields: Record<string, boolean> = {};\n        tools.forEach((tool) => {\n          // Set to true if this tool was enabled in existingAgent, false otherwise\n          toolFields[`tool_${tool.id}`] = serverToolsFromAgent.some(\n            (t) => t.id === Number(tool.id)\n          );\n        });\n\n        return [\n          `mcp_server_${server.id}`,\n          {\n            enabled: serverToolsFromAgent.length > 0, // Server is enabled if it has any tools\n            ...toolFields, // Add individual tool states for ALL tools\n          },\n        ];\n      })\n    ),\n\n    // OpenAPI tools - add a boolean field for each tool\n    ...Object.fromEntries(\n      openApiTools.map((openApiTool) => [\n        `openapi_tool_${openApiTool.id}`,\n        existingAgent?.tools?.some((t) => t.id === openApiTool.id) ?? false,\n      ])\n    ),\n\n    // Sharing\n    shared_user_ids: existingAgent?.users?.map((user) => user.id) ?? [],\n    shared_group_ids: existingAgent?.groups ?? [],\n    is_public: existingAgent?.is_public ?? false,\n    label_ids: existingAgent?.labels?.map((l) => l.id) ?? [],\n    is_featured: existingAgent?.is_featured ?? false,\n  };\n\n  const validationSchema = Yup.object().shape({\n    // General\n    icon_name: Yup.string().nullable(),\n    remove_image: Yup.boolean().optional(),\n    uploaded_image_id: Yup.string().nullable(),\n    name: Yup.string().required(\"Agent name is required.\"),\n    description: Yup.string()\n      .max(\n        MAX_CHARACTERS_AGENT_DESCRIPTION,\n        `Description must be ${MAX_CHARACTERS_AGENT_DESCRIPTION} characters or less`\n      )\n      .optional(),\n\n    // Prompts\n    instructions: Yup.string().optional(),\n    starter_messages: Yup.array().of(\n      Yup.string().max(\n        MAX_CHARACTERS_STARTER_MESSAGE,\n        `Conversation starter must be ${MAX_CHARACTERS_STARTER_MESSAGE} characters or less`\n      )\n    ),\n\n    // Knowledge\n    enable_knowledge: Yup.boolean(),\n    document_set_ids: Yup.array().of(Yup.number()),\n    document_ids: Yup.array().of(Yup.string()),\n    hierarchy_node_ids: Yup.array().of(Yup.number()),\n    user_file_ids: Yup.array().of(Yup.string()),\n    selected_sources: Yup.array().of(Yup.string()),\n\n    // Advanced\n    llm_model_provider_override: Yup.string().nullable().optional(),\n    llm_model_version_override: Yup.string().nullable().optional(),\n    knowledge_cutoff_date: Yup.date()\n      .nullable()\n      .optional()\n      .test(\n        \"knowledge-cutoff-date-not-in-future\",\n        \"Knowledge cutoff date must be today or earlier.\",\n        (value) => !value || !isDateInFuture(value)\n      ),\n    replace_base_system_prompt: Yup.boolean(),\n    reminders: Yup.string().optional(),\n\n    // MCP servers - dynamically add validation for each server with nested tool validation\n    ...Object.fromEntries(\n      mcpServers.map((server) => [\n        `mcp_server_${server.id}`,\n        Yup.object(), // Allow any nested tool fields as booleans\n      ])\n    ),\n\n    // OpenAPI tools - add boolean validation for each tool\n    ...Object.fromEntries(\n      openApiTools.map((openApiTool) => [\n        `openapi_tool_${openApiTool.id}`,\n        Yup.boolean(),\n      ])\n    ),\n  });\n\n  async function handleSubmit(values: typeof initialValues) {\n    try {\n      // Map conversation starters\n      const starterMessages = values.starter_messages\n        .filter((message: string) => message.trim() !== \"\")\n        .map((message: string) => ({\n          message: message,\n          name: message,\n        }));\n\n      // Send null instead of empty array if no starter messages\n      const finalStarterMessages =\n        starterMessages.length > 0 ? starterMessages : null;\n\n      // Always look up tools in availableTools to ensure we can find all tools\n\n      const toolIds = [];\n      if (values.enable_knowledge) {\n        if (vectorDbEnabled && searchTool) {\n          toolIds.push(searchTool.id);\n        }\n      }\n      if (values.image_generation && imageGenTool) {\n        toolIds.push(imageGenTool.id);\n      }\n      if (values.web_search && webSearchTool) {\n        toolIds.push(webSearchTool.id);\n      }\n      if (values.open_url && openURLTool) {\n        toolIds.push(openURLTool.id);\n      }\n      if (values.code_interpreter && codeInterpreterTool) {\n        toolIds.push(codeInterpreterTool.id);\n      }\n\n      // Collect enabled MCP tool IDs\n      mcpServers.forEach((server) => {\n        const serverFieldName = `mcp_server_${server.id}`;\n        const serverData = (values as any)[serverFieldName];\n\n        if (\n          serverData &&\n          typeof serverData === \"object\" &&\n          serverData.enabled\n        ) {\n          // Server is enabled, collect all enabled tools\n          Object.keys(serverData).forEach((key) => {\n            if (key.startsWith(\"tool_\") && serverData[key] === true) {\n              // Extract tool ID from key (e.g., \"tool_123\" -> 123)\n              const toolId = parseInt(key.replace(\"tool_\", \"\"), 10);\n              if (!isNaN(toolId)) {\n                toolIds.push(toolId);\n              }\n            }\n          });\n        }\n      });\n\n      // Collect enabled OpenAPI tool IDs\n      openApiTools.forEach((openApiTool) => {\n        const toolFieldName = `openapi_tool_${openApiTool.id}`;\n        if ((values as any)[toolFieldName] === true) {\n          toolIds.push(openApiTool.id);\n        }\n      });\n\n      // Build submission data\n      const submissionData: PersonaUpsertParameters = {\n        name: values.name,\n        description: values.description,\n        document_set_ids: values.enable_knowledge\n          ? values.document_set_ids\n          : [],\n        is_public: values.is_public,\n        llm_model_provider_override: values.llm_model_provider_override || null,\n        llm_model_version_override: values.llm_model_version_override || null,\n        starter_messages: finalStarterMessages,\n        users: values.shared_user_ids,\n        groups: values.shared_group_ids,\n        tool_ids: toolIds,\n        // uploaded_image: null, // Already uploaded separately\n        remove_image: values.remove_image ?? false,\n        uploaded_image_id: values.uploaded_image_id,\n        icon_name: values.icon_name,\n        search_start_date: values.knowledge_cutoff_date || null,\n        label_ids: values.label_ids,\n        is_featured: values.is_featured,\n        // display_priority: ...,\n\n        user_file_ids: values.enable_knowledge ? values.user_file_ids : [],\n        hierarchy_node_ids: values.enable_knowledge\n          ? values.hierarchy_node_ids\n          : [],\n        document_ids: values.enable_knowledge ? values.document_ids : [],\n\n        system_prompt: values.instructions,\n        replace_base_system_prompt: values.replace_base_system_prompt,\n        task_prompt: values.reminders || \"\",\n        datetime_aware: false,\n      };\n\n      // Call API\n      let personaResponse;\n      if (!!existingAgent) {\n        personaResponse = await updatePersona(existingAgent.id, submissionData);\n      } else {\n        personaResponse = await createPersona(submissionData);\n      }\n\n      // Handle response\n      if (!personaResponse || !personaResponse.ok) {\n        const error = personaResponse\n          ? await personaResponse.text()\n          : \"No response received\";\n        toast.error(\n          `Failed to ${existingAgent ? \"update\" : \"create\"} agent - ${error}`\n        );\n        return;\n      }\n\n      // Success\n      const agent = await personaResponse.json();\n      toast.success(\n        `Agent \"${agent.name}\" ${\n          existingAgent ? \"updated\" : \"created\"\n        } successfully`\n      );\n\n      // Refresh agents list and the specific agent\n      await refreshAgents();\n      if (refreshAgent) {\n        refreshAgent();\n      }\n\n      // Immediately start a chat with this agent.\n      appRouter({ agentId: agent.id });\n    } catch (error) {\n      console.error(\"Submit error:\", error);\n      toast.error(`An error occurred: ${error}`);\n    }\n  }\n\n  // Delete agent handler\n  async function handleDeleteAgent() {\n    if (!existingAgent) return;\n\n    const error = await deleteAgent(existingAgent.id);\n\n    if (error) {\n      toast.error(`Failed to delete agent: ${error}`);\n    } else {\n      toast.success(\"Agent deleted successfully\");\n\n      deleteAgentModal.toggle(false);\n      await refreshAgents();\n      router.push(\"/app/agents\");\n    }\n  }\n\n  // FilePickerPopover callbacks for Knowledge section\n  function handlePickRecentFile(\n    file: ProjectFile,\n    currentFileIds: string[],\n    setFieldValue: (field: string, value: unknown) => void\n  ) {\n    if (!currentFileIds.includes(file.id)) {\n      setFieldValue(\"user_file_ids\", [...currentFileIds, file.id]);\n    }\n  }\n\n  function handleUnpickRecentFile(\n    file: ProjectFile,\n    currentFileIds: string[],\n    setFieldValue: (field: string, value: unknown) => void\n  ) {\n    setFieldValue(\n      \"user_file_ids\",\n      currentFileIds.filter((id) => id !== file.id)\n    );\n  }\n\n  function handleFileClick(file: ProjectFile) {\n    setPresentingDocument({\n      document_id: `project_file__${file.file_id}`,\n      semantic_identifier: file.name,\n    });\n  }\n\n  async function handleUploadChange(\n    e: React.ChangeEvent<HTMLInputElement>,\n    currentFileIds: string[],\n    setFieldValue: (field: string, value: unknown) => void\n  ) {\n    const files = e.target.files;\n    if (!files || files.length === 0) return;\n    try {\n      let selectedIds = [...(currentFileIds || [])];\n      const optimistic = await beginUpload(\n        Array.from(files),\n        null,\n        (result) => {\n          const uploadedFiles = result.user_files || [];\n          if (uploadedFiles.length === 0) return;\n          const tempToFinal = new Map(\n            uploadedFiles\n              .filter((f) => f.temp_id)\n              .map((f) => [f.temp_id as string, f.id])\n          );\n          const replaced = (selectedIds || []).map(\n            (id: string) => tempToFinal.get(id) ?? id\n          );\n          selectedIds = replaced;\n          setFieldValue(\"user_file_ids\", replaced);\n        }\n      );\n      if (optimistic) {\n        const optimisticIds = optimistic.map((f) => f.id);\n        selectedIds = [...selectedIds, ...optimisticIds];\n        setFieldValue(\"user_file_ids\", selectedIds);\n      }\n    } catch (error) {\n      console.error(\"Upload error:\", error);\n    }\n  }\n\n  // Wait for async tool data before rendering the form. Formik captures\n  // initialValues on mount — if tools haven't loaded yet, the initial values\n  // won't include MCP tool fields. Later, toggling those fields would make\n  // the form permanently dirty since they have no baseline to compare against.\n  if (isToolsLoading || isMcpLoading || isOpenApiLoading) {\n    return null;\n  }\n\n  return (\n    <>\n      <div\n        data-testid=\"AgentsEditorPage/container\"\n        aria-label=\"Agents Editor Page\"\n        className=\"h-full w-full\"\n      >\n        <Formik\n          initialValues={initialValues}\n          validationSchema={validationSchema}\n          onSubmit={handleSubmit}\n          validateOnChange\n          validateOnBlur\n          validateOnMount\n          initialTouched={{\n            description:\n              initialValues.description.length >\n              MAX_CHARACTERS_AGENT_DESCRIPTION,\n            starter_messages: initialValues.starter_messages.map(\n              (msg) => msg.length > MAX_CHARACTERS_STARTER_MESSAGE\n            ) as unknown as boolean,\n          }}\n          initialStatus={{ warnings: {} }}\n        >\n          {({ isSubmitting, isValid, dirty, values, setFieldValue }) => {\n            const fileStatusMap = new Map(\n              allRecentFiles.map((f) => [f.id, f.status])\n            );\n\n            const hasUploadingFiles = values.user_file_ids.some(\n              (fileId: string) => {\n                const status = fileStatusMap.get(fileId);\n                if (status === undefined) {\n                  return fileId.startsWith(\"temp_\");\n                }\n                return status === UserFileStatus.UPLOADING;\n              }\n            );\n\n            const hasProcessingFiles = values.user_file_ids.some(\n              (fileId: string) =>\n                fileStatusMap.get(fileId) === UserFileStatus.PROCESSING\n            );\n            const isShared =\n              values.is_public ||\n              values.shared_user_ids.length > 0 ||\n              values.shared_group_ids.length > 0;\n\n            return (\n              <>\n                <FormWarningsEffect />\n\n                <userFilesModal.Provider>\n                  <UserFilesModal\n                    title=\"User Files\"\n                    description=\"All files selected for this agent\"\n                    recentFiles={values.user_file_ids\n                      .map((userFileId: string) => {\n                        const rf = allRecentFiles.find(\n                          (f) => f.id === userFileId\n                        );\n                        if (rf) return rf;\n                        return {\n                          id: userFileId,\n                          name: `File ${userFileId.slice(0, 8)}`,\n                          status: UserFileStatus.COMPLETED,\n                          file_id: userFileId,\n                          created_at: new Date().toISOString(),\n                          project_id: null,\n                          user_id: null,\n                          file_type: \"\",\n                          last_accessed_at: new Date().toISOString(),\n                          chat_file_type: \"file\" as const,\n                        } as unknown as ProjectFile;\n                      })\n                      .filter((f): f is ProjectFile => f !== null)}\n                    selectedFileIds={values.user_file_ids}\n                    onPickRecent={(file: ProjectFile) => {\n                      if (!values.user_file_ids.includes(file.id)) {\n                        setFieldValue(\"user_file_ids\", [\n                          ...values.user_file_ids,\n                          file.id,\n                        ]);\n                      }\n                    }}\n                    onUnpickRecent={(file: ProjectFile) => {\n                      setFieldValue(\n                        \"user_file_ids\",\n                        values.user_file_ids.filter((id) => id !== file.id)\n                      );\n                    }}\n                    onView={(file: ProjectFile) => {\n                      setPresentingDocument({\n                        document_id: `project_file__${file.file_id}`,\n                        semantic_identifier: file.name,\n                      });\n                    }}\n                  />\n                </userFilesModal.Provider>\n\n                <shareAgentModal.Provider>\n                  <ShareAgentModal\n                    agentId={existingAgent?.id}\n                    userIds={values.shared_user_ids}\n                    groupIds={values.shared_group_ids}\n                    isPublic={values.is_public}\n                    isFeatured={values.is_featured}\n                    labelIds={values.label_ids}\n                    onShare={async (\n                      userIds,\n                      groupIds,\n                      isPublic,\n                      isFeatured,\n                      labelIds\n                    ) => {\n                      if (!existingAgent) {\n                        // New agents are not persisted until the main Create action.\n                        setFieldValue(\"shared_user_ids\", userIds);\n                        setFieldValue(\"shared_group_ids\", groupIds);\n                        setFieldValue(\"is_public\", isPublic);\n                        setFieldValue(\"is_featured\", isFeatured);\n                        setFieldValue(\"label_ids\", labelIds);\n                        shareAgentModal.toggle(false);\n                        return;\n                      }\n\n                      const applySharingFields = () => {\n                        setFieldValue(\"shared_user_ids\", userIds);\n                        setFieldValue(\"shared_group_ids\", groupIds);\n                        setFieldValue(\"is_public\", isPublic);\n                        setFieldValue(\"label_ids\", labelIds);\n                      };\n\n                      const refreshSharedUi = async () => {\n                        try {\n                          await refreshAgents();\n                          refreshAgent?.();\n                        } catch (error) {\n                          console.error(\n                            \"Refresh failed after successful share:\",\n                            error\n                          );\n                          toast.error(\n                            \"Agent sharing was saved, but failed to refresh. Please reload.\"\n                          );\n                        }\n                      };\n\n                      let shareError: string | null;\n                      try {\n                        shareError = await updateAgentSharedStatus(\n                          existingAgent.id,\n                          userIds,\n                          groupIds,\n                          isPublic,\n                          isPaidEnterpriseFeaturesEnabled,\n                          labelIds\n                        );\n                      } catch (error) {\n                        console.error(\n                          \"Share agent mutation failed unexpectedly:\",\n                          error\n                        );\n                        toast.error(\"Failed to share agent. Please try again.\");\n                        return;\n                      }\n\n                      if (shareError) {\n                        toast.error(`Failed to share agent: ${shareError}`);\n                        return;\n                      }\n\n                      if (canUpdateFeaturedStatus) {\n                        let featuredError: string | null;\n                        try {\n                          featuredError = await updateAgentFeaturedStatus(\n                            existingAgent.id,\n                            isFeatured\n                          );\n                        } catch (error) {\n                          console.error(\n                            \"Featured mutation failed unexpectedly:\",\n                            error\n                          );\n                          // Share succeeded; sync form and UI before returning.\n                          applySharingFields();\n                          await refreshSharedUi();\n                          toast.error(\n                            \"Failed to update featured status. Please try again.\"\n                          );\n                          return;\n                        }\n\n                        if (featuredError) {\n                          // Share succeeded, featured failed: keep modal open for retry.\n                          applySharingFields();\n                          await refreshSharedUi();\n                          toast.error(\n                            `Failed to update featured status: ${featuredError}`\n                          );\n                          return;\n                        }\n\n                        applySharingFields();\n                        setFieldValue(\"is_featured\", isFeatured);\n                        shareAgentModal.toggle(false);\n                        await refreshSharedUi();\n                        return;\n                      }\n\n                      applySharingFields();\n                      shareAgentModal.toggle(false);\n                      await refreshSharedUi();\n                    }}\n                  />\n                </shareAgentModal.Provider>\n                <deleteAgentModal.Provider>\n                  {deleteAgentModal.isOpen && (\n                    <ConfirmationModalLayout\n                      icon={SvgTrash}\n                      title=\"Delete Agent\"\n                      submit={\n                        <OpalButton\n                          variant=\"danger\"\n                          onClick={handleDeleteAgent}\n                        >\n                          Delete Agent\n                        </OpalButton>\n                      }\n                      onClose={() => deleteAgentModal.toggle(false)}\n                    >\n                      <GeneralLayouts.Section alignItems=\"start\" gap={0.5}>\n                        <Text>\n                          Anyone using this agent will no longer be able to\n                          access it. Deletion cannot be undone.\n                        </Text>\n                        <Text>Are you sure you want to delete this agent?</Text>\n                      </GeneralLayouts.Section>\n                    </ConfirmationModalLayout>\n                  )}\n                </deleteAgentModal.Provider>\n\n                <Form className=\"h-full w-full\">\n                  <SettingsLayouts.Root>\n                    <SettingsLayouts.Header\n                      icon={SvgOnyxOctagon}\n                      title={existingAgent ? \"Edit Agent\" : \"Create Agent\"}\n                      rightChildren={\n                        <div className=\"flex gap-2\">\n                          <OpalButton\n                            prominence=\"secondary\"\n                            type=\"button\"\n                            onClick={() => router.back()}\n                          >\n                            Cancel\n                          </OpalButton>\n                          <SimpleTooltip\n                            tooltip={\n                              isSubmitting\n                                ? \"Saving changes...\"\n                                : !isValid\n                                  ? \"Please fix the errors in the form before saving.\"\n                                  : !dirty\n                                    ? \"No changes have been made.\"\n                                    : hasUploadingFiles\n                                      ? \"Please wait for files to finish uploading.\"\n                                      : undefined\n                            }\n                            side=\"bottom\"\n                          >\n                            <OpalButton\n                              disabled={\n                                isSubmitting ||\n                                !isValid ||\n                                !dirty ||\n                                hasUploadingFiles\n                              }\n                              type=\"submit\"\n                            >\n                              {existingAgent ? \"Save\" : \"Create\"}\n                            </OpalButton>\n                          </SimpleTooltip>\n                        </div>\n                      }\n                      backButton\n                      separator\n                    />\n\n                    {/* Agent Form Content */}\n                    <SettingsLayouts.Body>\n                      <GeneralLayouts.Section\n                        flexDirection=\"row\"\n                        gap={2.5}\n                        alignItems=\"start\"\n                      >\n                        <GeneralLayouts.Section>\n                          <InputLayouts.Vertical name=\"name\" title=\"Name\">\n                            <InputTypeInField\n                              name=\"name\"\n                              placeholder=\"Name your agent\"\n                            />\n                          </InputLayouts.Vertical>\n\n                          <InputLayouts.Vertical\n                            name=\"description\"\n                            title=\"Description\"\n                            suffix=\"optional\"\n                          >\n                            <InputTextAreaField\n                              name=\"description\"\n                              placeholder=\"What does this agent do?\"\n                            />\n                          </InputLayouts.Vertical>\n                        </GeneralLayouts.Section>\n\n                        <GeneralLayouts.Section width=\"fit\">\n                          <InputLayouts.Vertical\n                            name=\"agent_avatar\"\n                            title=\"Agent Avatar\"\n                          >\n                            <AgentIconEditor existingAgent={existingAgent} />\n                          </InputLayouts.Vertical>\n                        </GeneralLayouts.Section>\n                      </GeneralLayouts.Section>\n\n                      <Separator noPadding />\n\n                      <GeneralLayouts.Section>\n                        <InputLayouts.Vertical\n                          name=\"instructions\"\n                          title=\"Instructions\"\n                          suffix=\"optional\"\n                          description=\"Add instructions to tailor the response for this agent.\"\n                        >\n                          <InputTextAreaField\n                            name=\"instructions\"\n                            placeholder=\"Think step by step and show reasoning for complex problems. Use specific examples. Emphasize action items, and leave blanks for the human to fill in when you have unknown. Use a polite enthusiastic tone.\"\n                          />\n                        </InputLayouts.Vertical>\n\n                        <InputLayouts.Vertical\n                          name=\"starter_messages\"\n                          title=\"Conversation Starters\"\n                          description=\"Example messages that help users understand what this agent can do and how to interact with it effectively.\"\n                          suffix=\"optional\"\n                        >\n                          <StarterMessages />\n                        </InputLayouts.Vertical>\n                      </GeneralLayouts.Section>\n\n                      <Separator noPadding />\n\n                      <AgentKnowledgePane\n                        enableKnowledge={values.enable_knowledge}\n                        onEnableKnowledgeChange={(enabled) =>\n                          setFieldValue(\"enable_knowledge\", enabled)\n                        }\n                        selectedSources={values.selected_sources}\n                        onSourcesChange={(sources) =>\n                          setFieldValue(\"selected_sources\", sources)\n                        }\n                        documentSets={documentSets ?? []}\n                        selectedDocumentSetIds={values.document_set_ids}\n                        onDocumentSetIdsChange={(ids) =>\n                          setFieldValue(\"document_set_ids\", ids)\n                        }\n                        selectedDocumentIds={values.document_ids}\n                        onDocumentIdsChange={(ids) =>\n                          setFieldValue(\"document_ids\", ids)\n                        }\n                        selectedFolderIds={values.hierarchy_node_ids}\n                        onFolderIdsChange={(ids) =>\n                          setFieldValue(\"hierarchy_node_ids\", ids)\n                        }\n                        selectedFileIds={values.user_file_ids}\n                        onFileIdsChange={(ids) =>\n                          setFieldValue(\"user_file_ids\", ids)\n                        }\n                        allRecentFiles={allRecentFiles}\n                        onFileClick={handleFileClick}\n                        onUploadChange={(e) =>\n                          handleUploadChange(\n                            e,\n                            values.user_file_ids,\n                            setFieldValue\n                          )\n                        }\n                        hasProcessingFiles={hasProcessingFiles}\n                        initialAttachedDocuments={\n                          existingAgent?.attached_documents\n                        }\n                        initialHierarchyNodes={existingAgent?.hierarchy_nodes}\n                        vectorDbEnabled={vectorDbEnabled}\n                      />\n\n                      <Separator noPadding />\n\n                      <SimpleCollapsible>\n                        <SimpleCollapsible.Header\n                          title=\"Actions\"\n                          description=\"Tools and capabilities available for this agent to use.\"\n                        />\n                        <SimpleCollapsible.Content>\n                          <GeneralLayouts.Section gap={0.5}>\n                            <SimpleTooltip\n                              tooltip={imageGenerationDisabledTooltip}\n                              side=\"top\"\n                            >\n                              <Card\n                                variant={\n                                  isImageGenerationAvailable\n                                    ? undefined\n                                    : \"disabled\"\n                                }\n                              >\n                                <InputLayouts.Horizontal\n                                  name=\"image_generation\"\n                                  title=\"Image Generation\"\n                                  description=\"Generate and manipulate images using AI-powered tools.\"\n                                  disabled={!isImageGenerationAvailable}\n                                >\n                                  <SwitchField\n                                    name=\"image_generation\"\n                                    disabled={!isImageGenerationAvailable}\n                                  />\n                                </InputLayouts.Horizontal>\n                              </Card>\n                            </SimpleTooltip>\n\n                            <Card\n                              variant={!!webSearchTool ? undefined : \"disabled\"}\n                            >\n                              <InputLayouts.Horizontal\n                                name=\"web_search\"\n                                title=\"Web Search\"\n                                description=\"Search the web for real-time information and up-to-date results.\"\n                                disabled={!webSearchTool}\n                              >\n                                <SwitchField\n                                  name=\"web_search\"\n                                  disabled={!webSearchTool}\n                                />\n                              </InputLayouts.Horizontal>\n                            </Card>\n\n                            <Card\n                              variant={!!openURLTool ? undefined : \"disabled\"}\n                            >\n                              <InputLayouts.Horizontal\n                                name=\"open_url\"\n                                title=\"Open URL\"\n                                description=\"Fetch and read content from web URLs.\"\n                                disabled={!openURLTool}\n                              >\n                                <SwitchField\n                                  name=\"open_url\"\n                                  disabled={!openURLTool}\n                                />\n                              </InputLayouts.Horizontal>\n                            </Card>\n\n                            <Card\n                              variant={\n                                !!codeInterpreterTool ? undefined : \"disabled\"\n                              }\n                            >\n                              <InputLayouts.Horizontal\n                                name=\"code_interpreter\"\n                                title=\"Code Interpreter\"\n                                description=\"Generate and run code.\"\n                                disabled={!codeInterpreterTool}\n                              >\n                                <SwitchField\n                                  name=\"code_interpreter\"\n                                  disabled={!codeInterpreterTool}\n                                />\n                              </InputLayouts.Horizontal>\n                            </Card>\n\n                            {/* Tools */}\n                            <>\n                              {/* render the separator if there is at least one mcp-server or open-api-tool */}\n                              {(mcpServers.length > 0 ||\n                                openApiTools.length > 0) && (\n                                <Separator noPadding className=\"py-1\" />\n                              )}\n\n                              {/* MCP tools */}\n                              {mcpServersWithTools.length > 0 && (\n                                <GeneralLayouts.Section gap={0.5}>\n                                  {mcpServersWithTools.map(\n                                    ({ server, tools, isLoading }) => (\n                                      <MCPServerCard\n                                        key={server.id}\n                                        server={server}\n                                        tools={tools}\n                                        isLoading={isLoading}\n                                      />\n                                    )\n                                  )}\n                                </GeneralLayouts.Section>\n                              )}\n\n                              {/* OpenAPI tools */}\n                              {openApiTools.length > 0 && (\n                                <GeneralLayouts.Section gap={0.5}>\n                                  {openApiTools.map((tool) => (\n                                    <OpenApiToolCard\n                                      key={tool.id}\n                                      tool={tool}\n                                    />\n                                  ))}\n                                </GeneralLayouts.Section>\n                              )}\n                            </>\n                          </GeneralLayouts.Section>\n                        </SimpleCollapsible.Content>\n                      </SimpleCollapsible>\n\n                      <Separator noPadding />\n\n                      <SimpleCollapsible>\n                        <SimpleCollapsible.Header\n                          title=\"Advanced Options\"\n                          description=\"Fine-tune agent prompts and knowledge.\"\n                        />\n                        <SimpleCollapsible.Content>\n                          <GeneralLayouts.Section>\n                            <Card>\n                              <InputLayouts.Horizontal\n                                title=\"Share This Agent\"\n                                description=\"with other users, groups, or everyone in your organization.\"\n                                center\n                              >\n                                <OpalButton\n                                  prominence=\"secondary\"\n                                  icon={isShared ? SvgUsers : SvgLock}\n                                  onClick={() => shareAgentModal.toggle(true)}\n                                >\n                                  Share\n                                </OpalButton>\n                              </InputLayouts.Horizontal>\n                              {canUpdateFeaturedStatus && (\n                                <>\n                                  <InputLayouts.Horizontal\n                                    name=\"is_featured\"\n                                    title=\"Feature This Agent\"\n                                    description=\"Show this agent at the top of the explore agents list and automatically pin it to the sidebar for new users with access.\"\n                                  >\n                                    <SwitchField name=\"is_featured\" />\n                                  </InputLayouts.Horizontal>\n                                  {values.is_featured && !isShared && (\n                                    <Message\n                                      static\n                                      close={false}\n                                      className=\"w-full\"\n                                      text=\"This agent is private to you and will only be featured for yourself.\"\n                                    />\n                                  )}\n                                </>\n                              )}\n                            </Card>\n\n                            <Card>\n                              <InputLayouts.Horizontal\n                                name=\"llm_model\"\n                                title=\"Default Model\"\n                                description=\"This model will be used by Onyx by default in your chats.\"\n                              >\n                                <LLMSelector\n                                  name=\"llm_model\"\n                                  llmProviders={llmProviders ?? []}\n                                  currentLlm={getCurrentLlm(\n                                    values,\n                                    llmProviders\n                                  )}\n                                  onSelect={(selected) =>\n                                    onLlmSelect(selected, setFieldValue)\n                                  }\n                                />\n                              </InputLayouts.Horizontal>\n                              <InputLayouts.Horizontal\n                                name=\"knowledge_cutoff_date\"\n                                title=\"Knowledge Cutoff Date\"\n                                suffix=\"optional\"\n                                description=\"Documents with a last-updated date prior to this will be ignored.\"\n                              >\n                                <InputDatePickerField\n                                  name=\"knowledge_cutoff_date\"\n                                  maxDate={new Date()}\n                                />\n                              </InputLayouts.Horizontal>\n                              <InputLayouts.Horizontal\n                                name=\"replace_base_system_prompt\"\n                                title=\"Overwrite System Prompt\"\n                                suffix=\"(Not Recommended)\"\n                                description='Remove the base system prompt which includes useful instructions (e.g. \"You can use Markdown tables\"). This may affect response quality.'\n                              >\n                                <SwitchField name=\"replace_base_system_prompt\" />\n                              </InputLayouts.Horizontal>\n                            </Card>\n\n                            <GeneralLayouts.Section gap={0.25}>\n                              <InputLayouts.Vertical\n                                name=\"reminders\"\n                                title=\"Reminders\"\n                                suffix=\"optional\"\n                              >\n                                <InputTextAreaField\n                                  name=\"reminders\"\n                                  placeholder=\"Remember, I want you to always format your response as a numbered list.\"\n                                />\n                              </InputLayouts.Vertical>\n                              <Text text03 secondaryBody>\n                                Append a brief reminder to the prompt messages.\n                                Use this to remind the agent if you find that it\n                                tends to forget certain instructions as the chat\n                                progresses. This should be brief and not\n                                interfere with the user messages.\n                              </Text>\n                            </GeneralLayouts.Section>\n                          </GeneralLayouts.Section>\n                        </SimpleCollapsible.Content>\n                      </SimpleCollapsible>\n\n                      {existingAgent && (\n                        <>\n                          <Separator noPadding />\n\n                          <Card>\n                            <InputLayouts.Horizontal\n                              title=\"Delete This Agent\"\n                              description=\"Anyone using this agent will no longer be able to access it.\"\n                              center\n                            >\n                              <OpalButton\n                                variant=\"danger\"\n                                prominence=\"secondary\"\n                                onClick={() => deleteAgentModal.toggle(true)}\n                              >\n                                Delete Agent\n                              </OpalButton>\n                            </InputLayouts.Horizontal>\n                          </Card>\n                        </>\n                      )}\n                    </SettingsLayouts.Body>\n                  </SettingsLayouts.Root>\n                </Form>\n              </>\n            );\n          }}\n        </Formik>\n      </div>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/AgentsNavigationPage.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState, useRef, useEffect } from \"react\";\nimport AgentCard from \"@/sections/cards/AgentCard\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { checkUserOwnsAgent as checkUserOwnsAgent } from \"@/lib/agents\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport TextSeparator from \"@/refresh-components/TextSeparator\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { FilterButton } from \"@opal/components\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { Button } from \"@opal/components\";\nimport {\n  SEARCH_TOOL_ID,\n  IMAGE_GENERATION_TOOL_ID,\n  OPEN_URL_TOOL_ID,\n  OPEN_URL_TOOL_NAME,\n  WEB_SEARCH_TOOL_ID,\n  SYSTEM_TOOL_ICONS,\n} from \"@/app/app/components/tools/constants\";\nimport {\n  SvgActions,\n  SvgCheck,\n  SvgOnyxOctagon,\n  SvgPlus,\n  SvgUser,\n} from \"@opal/icons\";\nimport useOnMount from \"@/hooks/useOnMount\";\n\ninterface AgentsSectionProps {\n  title: string;\n  description?: string;\n  agents: MinimalPersonaSnapshot[];\n}\n\nfunction AgentsSection({ title, description, agents }: AgentsSectionProps) {\n  if (agents.length === 0) return null;\n\n  return (\n    <div className=\"flex flex-col gap-4\">\n      <div>\n        <Text as=\"p\" headingH3>\n          {title}\n        </Text>\n        <Text as=\"p\" secondaryBody text03>\n          {description}\n        </Text>\n      </div>\n      <div className=\"w-full grid grid-cols-1 md:grid-cols-2 gap-2\">\n        {agents\n          .sort((a, b) => b.id - a.id)\n          .map((agent) => (\n            <AgentCard key={agent.id} agent={agent} />\n          ))}\n      </div>\n    </div>\n  );\n}\n\nexport default function AgentsNavigationPage() {\n  const { agents } = useAgents();\n  const [creatorFilterOpen, setCreatorFilterOpen] = useState(false);\n  const [actionsFilterOpen, setActionsFilterOpen] = useState(false);\n  const { user } = useUser();\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const [activeTab, setActiveTab] = useState<\"all\" | \"your\">(\"all\");\n  const [selectedCreatorIds, setSelectedCreatorIds] = useState<Set<string>>(\n    new Set()\n  );\n  const [selectedActionIds, setSelectedActionIds] = useState<Set<number>>(\n    new Set()\n  );\n  const [selectedMcpServerIds, setSelectedMcpServerIds] = useState<Set<number>>(\n    new Set()\n  );\n  const [creatorSearchQuery, setCreatorSearchQuery] = useState(\"\");\n  const [actionsSearchQuery, setActionsSearchQuery] = useState(\"\");\n  const [mcpServersMap, setMcpServersMap] = useState<\n    Map<number, { id: number; name: string }>\n  >(new Map());\n  const searchInputRef = useRef<HTMLInputElement>(null);\n\n  useOnMount(() => {\n    // Focus the search input when the page loads\n    searchInputRef.current?.focus();\n  });\n\n  // Fetch all MCP servers used by agents\n  useEffect(() => {\n    const fetchMCPServers = async () => {\n      const serverIds = new Set<number>();\n      agents.forEach((agent) => {\n        agent.tools.forEach((tool) => {\n          if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {\n            serverIds.add(tool.mcp_server_id);\n          }\n        });\n      });\n\n      if (serverIds.size === 0) return;\n\n      const serversMap = new Map<number, { id: number; name: string }>();\n\n      // Fetch server data for each unique server ID\n      for (const serverId of Array.from(serverIds)) {\n        try {\n          // We need to fetch from an agent that has this server\n          const agentWithServer = agents.find((agent) =>\n            agent.tools.some((tool) => tool.mcp_server_id === serverId)\n          );\n\n          if (agentWithServer) {\n            const response = await fetch(\n              `/api/mcp/servers/persona/${agentWithServer.id}`\n            );\n            if (response.ok) {\n              const data = await response.json();\n              const server = data.mcp_servers?.find(\n                (s: any) => s.id === serverId\n              );\n              if (server) {\n                serversMap.set(serverId, { id: server.id, name: server.name });\n              }\n            }\n          }\n        } catch (error) {\n          console.error(`Error fetching MCP server ${serverId}:`, error);\n        }\n      }\n\n      setMcpServersMap(serversMap);\n    };\n\n    fetchMCPServers();\n  }, [agents]);\n\n  const uniqueCreators = useMemo(() => {\n    const creatorsMap = new Map<string, { id: string; email: string }>();\n    agents.forEach((agent) => {\n      if (agent.owner) {\n        creatorsMap.set(agent.owner.id, agent.owner);\n      }\n    });\n\n    let creators = Array.from(creatorsMap.values()).sort((a, b) =>\n      a.email.localeCompare(b.email)\n    );\n\n    // Add current user if not in the list, and put them first\n    if (user) {\n      const hasCurrentUser = creators.some((c) => c.id === user.id);\n\n      if (!hasCurrentUser) {\n        creators = [{ id: user.id, email: user.email }, ...creators];\n      } else {\n        // Sort to put current user first\n        creators = creators.sort((a, b) => {\n          if (a.id === user.id) return -1;\n          if (b.id === user.id) return 1;\n          return 0;\n        });\n      }\n    }\n\n    return creators;\n  }, [agents, user]);\n\n  const filteredCreators = useMemo(() => {\n    if (!creatorSearchQuery) return uniqueCreators;\n\n    return uniqueCreators.filter((creator) =>\n      creator.email.toLowerCase().includes(creatorSearchQuery.toLowerCase())\n    );\n  }, [uniqueCreators, creatorSearchQuery]);\n\n  const uniqueActions = useMemo(() => {\n    const actionsMap = new Map<\n      number,\n      {\n        id: number;\n        name: string;\n        display_name: string;\n        mcp_server_id?: number | null;\n      }\n    >();\n    agents.forEach((agent) => {\n      agent.tools.forEach((tool) => {\n        if (\n          tool.in_code_tool_id === OPEN_URL_TOOL_ID ||\n          tool.name === OPEN_URL_TOOL_ID ||\n          tool.name === OPEN_URL_TOOL_NAME\n        ) {\n          return;\n        }\n        actionsMap.set(tool.id, {\n          id: tool.id,\n          name: tool.name,\n          display_name: tool.display_name,\n          mcp_server_id: tool.mcp_server_id,\n        });\n      });\n    });\n\n    const systemToolIds = [\n      SEARCH_TOOL_ID,\n      IMAGE_GENERATION_TOOL_ID,\n      WEB_SEARCH_TOOL_ID,\n    ];\n\n    const allActions = Array.from(actionsMap.values());\n    const systemTools = allActions.filter((action) =>\n      systemToolIds.includes(action.name)\n    );\n    const otherTools = allActions.filter(\n      (action) => !systemToolIds.includes(action.name)\n    );\n\n    // Sort each group by display name\n    systemTools.sort((a, b) => a.display_name.localeCompare(b.display_name));\n    otherTools.sort((a, b) => a.display_name.localeCompare(b.display_name));\n\n    // Group ALL tools by mcp_server_id (both system and other)\n    const mcpGroupsMap = new Map<number, typeof allActions>();\n    const nonMcpSystemTools: typeof systemTools = [];\n    const nonMcpOtherTools: typeof otherTools = [];\n\n    // Group system tools by MCP server\n    systemTools.forEach((tool) => {\n      if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {\n        const group = mcpGroupsMap.get(tool.mcp_server_id) || [];\n        group.push(tool);\n        mcpGroupsMap.set(tool.mcp_server_id, group);\n      } else {\n        nonMcpSystemTools.push(tool);\n      }\n    });\n\n    // Group other tools by MCP server\n    otherTools.forEach((tool) => {\n      if (tool.mcp_server_id !== null && tool.mcp_server_id !== undefined) {\n        const group = mcpGroupsMap.get(tool.mcp_server_id) || [];\n        group.push(tool);\n        mcpGroupsMap.set(tool.mcp_server_id, group);\n      } else {\n        nonMcpOtherTools.push(tool);\n      }\n    });\n\n    // Create grouped action items\n    type ActionItem =\n      | {\n          type: \"tool\";\n          id: number;\n          name: string;\n          display_name: string;\n          mcp_server_id?: number | null;\n        }\n      | {\n          type: \"mcp_group\";\n          mcp_server_id: number;\n          server_name: string;\n          tools: Array<{ id: number; name: string; display_name: string }>;\n        };\n\n    const mcpGroupItems: ActionItem[] = Array.from(mcpGroupsMap.entries()).map(\n      ([serverId, tools]) => {\n        const serverInfo = mcpServersMap.get(serverId);\n        return {\n          type: \"mcp_group\" as const,\n          mcp_server_id: serverId,\n          server_name: serverInfo?.name || `MCP Server ${serverId}`,\n          tools: tools.map((t) => ({\n            id: t.id,\n            name: t.name,\n            display_name: t.display_name,\n          })),\n        };\n      }\n    );\n\n    const nonMcpSystemToolItems: ActionItem[] = nonMcpSystemTools.map(\n      (tool) => ({ type: \"tool\" as const, ...tool })\n    );\n    const nonMcpOtherToolItems: ActionItem[] = nonMcpOtherTools.map((tool) => ({\n      type: \"tool\" as const,\n      ...tool,\n    }));\n\n    // Return non-MCP system tools first, then MCP groups, then non-MCP other tools\n    return [\n      ...nonMcpSystemToolItems,\n      ...mcpGroupItems,\n      ...nonMcpOtherToolItems,\n    ];\n  }, [agents, mcpServersMap]);\n\n  const filteredActions = useMemo(() => {\n    if (!actionsSearchQuery) return uniqueActions;\n\n    const query = actionsSearchQuery.toLowerCase();\n    return uniqueActions.filter((action) => {\n      if (action.type === \"tool\") {\n        return action.display_name.toLowerCase().includes(query);\n      } else {\n        // For MCP groups, search through all tool names in the group\n        return action.tools.some((tool) =>\n          tool.display_name.toLowerCase().includes(query)\n        );\n      }\n    });\n  }, [uniqueActions, actionsSearchQuery]);\n\n  const memoizedCurrentlyVisibleAgents = useMemo(() => {\n    return agents.filter((agent) => {\n      const nameMatches = agent.name\n        .toLowerCase()\n        .includes(searchQuery.toLowerCase());\n      const labelMatches = agent.labels?.some((label) =>\n        label.name.toLowerCase().includes(searchQuery.toLowerCase())\n      );\n\n      const mineFilter =\n        activeTab === \"your\" ? checkUserOwnsAgent(user, agent) : true;\n      const isNotUnifiedAgent = agent.id !== 0;\n\n      const creatorFilter =\n        selectedCreatorIds.size === 0 ||\n        (agent.owner && selectedCreatorIds.has(agent.owner.id));\n\n      const actionsFilter =\n        (selectedActionIds.size === 0 && selectedMcpServerIds.size === 0) ||\n        agent.tools.some(\n          (tool) =>\n            selectedActionIds.has(tool.id) ||\n            (tool.mcp_server_id !== null &&\n              tool.mcp_server_id !== undefined &&\n              selectedMcpServerIds.has(tool.mcp_server_id))\n        );\n\n      return (\n        (nameMatches || labelMatches) &&\n        mineFilter &&\n        isNotUnifiedAgent &&\n        creatorFilter &&\n        actionsFilter\n      );\n    });\n  }, [\n    agents,\n    searchQuery,\n    activeTab,\n    user,\n    selectedCreatorIds,\n    selectedActionIds,\n    selectedMcpServerIds,\n  ]);\n\n  const featuredAgents = [\n    ...memoizedCurrentlyVisibleAgents.filter((agent) => agent.is_featured),\n  ];\n  const allAgents = memoizedCurrentlyVisibleAgents.filter(\n    (agent) => !agent.is_featured\n  );\n\n  const agentCount = featuredAgents.length + allAgents.length;\n\n  const creatorFilterButtonText = useMemo(() => {\n    if (selectedCreatorIds.size === 0) {\n      return \"Everyone\";\n    } else if (selectedCreatorIds.size === 1) {\n      const selectedId = Array.from(selectedCreatorIds)[0];\n      const creator = uniqueCreators.find((c) => c.id === selectedId);\n      return `By ${creator?.email}` || \"Everyone\";\n    } else {\n      return `${selectedCreatorIds.size} people`;\n    }\n  }, [selectedCreatorIds, uniqueCreators]);\n\n  const actionsFilterButtonText = useMemo(() => {\n    const totalSelected = selectedActionIds.size + selectedMcpServerIds.size;\n\n    if (totalSelected === 0) {\n      return \"All Actions\";\n    } else if (totalSelected === 1) {\n      // Check if it's a single tool\n      if (selectedActionIds.size === 1) {\n        const selectedId = Array.from(selectedActionIds)[0];\n        for (const action of uniqueActions) {\n          if (action.type === \"tool\" && action.id === selectedId) {\n            return action.display_name;\n          }\n        }\n      }\n\n      // Check if it's a single MCP server\n      if (selectedMcpServerIds.size === 1) {\n        const selectedServerId = Array.from(selectedMcpServerIds)[0];\n        for (const action of uniqueActions) {\n          if (\n            action.type === \"mcp_group\" &&\n            action.mcp_server_id === selectedServerId\n          ) {\n            return action.server_name;\n          }\n        }\n      }\n\n      return \"All Actions\";\n    } else {\n      return `${totalSelected} selected`;\n    }\n  }, [selectedActionIds, selectedMcpServerIds, uniqueActions]);\n\n  return (\n    <SettingsLayouts.Root\n      data-testid=\"AgentsPage/container\"\n      aria-label=\"Agents Page\"\n    >\n      <SettingsLayouts.Header\n        icon={SvgOnyxOctagon}\n        title=\"Agents\"\n        description=\"Customize AI behavior and knowledge for you and your team's use cases.\"\n        rightChildren={\n          <Button\n            href=\"/app/agents/create\"\n            icon={SvgPlus}\n            aria-label=\"AgentsPage/new-agent-button\"\n          >\n            New Agent\n          </Button>\n        }\n      >\n        <div className=\"flex flex-col gap-2\">\n          <div className=\"flex flex-row items-center gap-2\">\n            <div className=\"flex-[2]\">\n              <InputTypeIn\n                ref={searchInputRef}\n                placeholder=\"Search agents...\"\n                value={searchQuery}\n                onChange={(event) => setSearchQuery(event.target.value)}\n                leftSearchIcon\n              />\n            </div>\n            <div className=\"flex-1\">\n              <Tabs\n                value={activeTab}\n                onValueChange={(value) => setActiveTab(value as \"all\" | \"your\")}\n              >\n                <Tabs.List>\n                  <Tabs.Trigger value=\"all\">All Agents</Tabs.Trigger>\n                  <Tabs.Trigger value=\"your\">Your Agents</Tabs.Trigger>\n                </Tabs.List>\n              </Tabs>\n            </div>\n          </div>\n          <div className=\"flex flex-row gap-2\">\n            <Popover\n              open={creatorFilterOpen}\n              onOpenChange={setCreatorFilterOpen}\n            >\n              <Popover.Trigger asChild>\n                <FilterButton\n                  icon={SvgUser}\n                  active={selectedCreatorIds.size > 0}\n                  onClear={() => setSelectedCreatorIds(new Set())}\n                >\n                  {creatorFilterButtonText}\n                </FilterButton>\n              </Popover.Trigger>\n              <Popover.Content align=\"start\">\n                <PopoverMenu>\n                  {[\n                    <InputTypeIn\n                      key=\"created-by\"\n                      placeholder=\"Created by...\"\n                      variant=\"internal\"\n                      leftSearchIcon\n                      value={creatorSearchQuery}\n                      onChange={(e) => setCreatorSearchQuery(e.target.value)}\n                    />,\n                    ...filteredCreators.flatMap((creator, index) => {\n                      const isSelected = selectedCreatorIds.has(creator.id);\n                      const isCurrentUser = user && creator.id === user.id;\n\n                      // Check if we need to add a separator after this item\n                      const nextCreator = filteredCreators[index + 1];\n                      const nextIsCurrentUser =\n                        user && nextCreator && nextCreator.id === user.id;\n                      const needsSeparator =\n                        isCurrentUser && nextCreator && !nextIsCurrentUser;\n\n                      // Determine icon: Check if selected, User icon if current user, otherwise no icon\n                      const icon = isCurrentUser\n                        ? SvgUser\n                        : isSelected\n                          ? SvgCheck\n                          : () => null;\n\n                      const lineItem = (\n                        <LineItem\n                          key={creator.id}\n                          icon={icon}\n                          selected={isSelected}\n                          emphasized\n                          onClick={() => {\n                            setSelectedCreatorIds((prev) => {\n                              const newSet = new Set(prev);\n                              if (newSet.has(creator.id)) {\n                                newSet.delete(creator.id);\n                              } else {\n                                newSet.add(creator.id);\n                              }\n                              return newSet;\n                            });\n                          }}\n                        >\n                          {creator.email}\n                        </LineItem>\n                      );\n\n                      // Return the line item, and optionally a separator\n                      return needsSeparator ? [lineItem, null] : [lineItem];\n                    }),\n                  ]}\n                </PopoverMenu>\n              </Popover.Content>\n            </Popover>\n            <Popover\n              open={actionsFilterOpen}\n              onOpenChange={setActionsFilterOpen}\n            >\n              <Popover.Trigger asChild>\n                <FilterButton\n                  icon={SvgActions}\n                  active={\n                    selectedActionIds.size > 0 || selectedMcpServerIds.size > 0\n                  }\n                  onClear={() => {\n                    setSelectedActionIds(new Set());\n                    setSelectedMcpServerIds(new Set());\n                  }}\n                >\n                  {actionsFilterButtonText}\n                </FilterButton>\n              </Popover.Trigger>\n              <Popover.Content align=\"start\">\n                <PopoverMenu>\n                  {[\n                    <InputTypeIn\n                      key=\"actions\"\n                      placeholder=\"Filter actions...\"\n                      variant=\"internal\"\n                      leftSearchIcon\n                      value={actionsSearchQuery}\n                      onChange={(e) => setActionsSearchQuery(e.target.value)}\n                    />,\n                    ...filteredActions.flatMap((action, index) => {\n                      if (action.type === \"tool\") {\n                        const isSelected = selectedActionIds.has(action.id);\n                        const systemIcon = SYSTEM_TOOL_ICONS[action.name];\n                        const isSystemTool = !!systemIcon;\n\n                        // Check if we need to add a separator after this item\n                        const nextAction = filteredActions[index + 1];\n                        const nextIsSystemTool =\n                          nextAction && nextAction.type === \"tool\"\n                            ? !!SYSTEM_TOOL_ICONS[nextAction.name]\n                            : false;\n                        const needsSeparator =\n                          isSystemTool && nextAction && !nextIsSystemTool;\n\n                        // Determine icon: system icon if available, otherwise Actions icon\n                        const icon = systemIcon ? systemIcon : SvgActions;\n\n                        const lineItem = (\n                          <LineItem\n                            key={action.id}\n                            icon={icon}\n                            selected={isSelected}\n                            emphasized\n                            onClick={() => {\n                              setSelectedActionIds((prev) => {\n                                const newSet = new Set(prev);\n                                if (newSet.has(action.id)) {\n                                  newSet.delete(action.id);\n                                } else {\n                                  newSet.add(action.id);\n                                }\n                                return newSet;\n                              });\n                            }}\n                          >\n                            {action.display_name}\n                          </LineItem>\n                        );\n\n                        return needsSeparator ? [lineItem, null] : [lineItem];\n                      } else {\n                        // MCP Group - render only the server name, not individual tools\n                        const groupKey = `mcp-group-${action.mcp_server_id}`;\n                        const isSelected = selectedMcpServerIds.has(\n                          action.mcp_server_id\n                        );\n\n                        const lineItem = (\n                          <LineItem\n                            key={groupKey}\n                            icon={SvgActions}\n                            selected={isSelected}\n                            emphasized\n                            onClick={() => {\n                              setSelectedMcpServerIds((prev) => {\n                                const newSet = new Set(prev);\n                                if (newSet.has(action.mcp_server_id)) {\n                                  newSet.delete(action.mcp_server_id);\n                                } else {\n                                  newSet.add(action.mcp_server_id);\n                                }\n                                return newSet;\n                              });\n                            }}\n                          >\n                            {action.server_name}\n                          </LineItem>\n                        );\n\n                        return [lineItem];\n                      }\n                    }),\n                  ]}\n                </PopoverMenu>\n              </Popover.Content>\n            </Popover>\n          </div>\n        </div>\n      </SettingsLayouts.Header>\n\n      {/* Agents List */}\n      <SettingsLayouts.Body>\n        {agentCount === 0 ? (\n          <Text\n            as=\"p\"\n            className=\"w-full h-full flex flex-col items-center justify-center py-12\"\n            text03\n          >\n            No Agents found\n          </Text>\n        ) : (\n          <>\n            <AgentsSection\n              title=\"Featured Agents\"\n              description=\"Curated by your team\"\n              agents={featuredAgents}\n            />\n            <AgentsSection title=\"All Agents\" agents={allAgents} />\n            <TextSeparator\n              count={agentCount}\n              text={agentCount === 1 ? \"Agent\" : \"Agents\"}\n            />\n          </>\n        )}\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/AppPage.tsx",
    "content": "\"use client\";\n\nimport { redirect, useRouter, useSearchParams } from \"next/navigation\";\nimport { personaIncludesRetrieval } from \"@/app/app/services/lib\";\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport { toast, useToastFromQuery } from \"@/hooks/useToast\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { useFederatedConnectors, useFilters, useLlmManager } from \"@/lib/hooks\";\nimport { useForcedTools } from \"@/lib/hooks/useForcedTools\";\nimport OnyxInitializingLoader from \"@/components/OnyxInitializingLoader\";\nimport { OnyxDocument, MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport {\n  useSettingsContext,\n  useVectorDbEnabled,\n} from \"@/providers/SettingsProvider\";\nimport Dropzone from \"react-dropzone\";\nimport AppInputBar, { AppInputBarHandle } from \"@/sections/input/AppInputBar\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport useTags from \"@/hooks/useTags\";\nimport { useDocumentSets } from \"@/lib/hooks/useDocumentSets\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { AppPopup } from \"@/app/app/components/AppPopup\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport NoAgentModal from \"@/components/modals/NoAgentModal\";\nimport PreviewModal from \"@/sections/modals/PreviewModal\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { useSendMessageToParent } from \"@/lib/extension/utils\";\nimport { SUBMIT_MESSAGE_TYPES } from \"@/lib/extension/constants\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { SourceMetadata } from \"@/lib/search/interfaces\";\nimport { FederatedConnectorDetail, UserRole, ValidSources } from \"@/lib/types\";\nimport DocumentsSidebar from \"@/sections/document-sidebar/DocumentsSidebar\";\nimport useChatController from \"@/hooks/useChatController\";\nimport useAgentController from \"@/hooks/useAgentController\";\nimport useChatSessionController from \"@/hooks/useChatSessionController\";\nimport useDeepResearchToggle from \"@/hooks/useDeepResearchToggle\";\nimport useIsDefaultAgent from \"@/hooks/useIsDefaultAgent\";\nimport AgentDescription from \"@/app/app/components/AgentDescription\";\nimport {\n  useChatSessionStore,\n  useCurrentMessageHistory,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport {\n  useCurrentChatState,\n  useIsReady,\n  useDocumentSidebarVisible,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport FederatedOAuthModal from \"@/components/chat/FederatedOAuthModal\";\nimport ChatScrollContainer, {\n  ChatScrollContainerHandle,\n} from \"@/sections/chat/ChatScrollContainer\";\nimport ProjectContextPanel from \"@/app/app/components/projects/ProjectContextPanel\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { getProjectTokenCount } from \"@/app/app/projects/projectsService\";\nimport ProjectChatSessionList from \"@/app/app/components/projects/ProjectChatSessionList\";\nimport { cn } from \"@/lib/utils\";\nimport Suggestions from \"@/sections/Suggestions\";\nimport OnboardingFlow from \"@/sections/onboarding/OnboardingFlow\";\nimport { OnboardingStep } from \"@/interfaces/onboarding\";\nimport { useShowOnboarding } from \"@/hooks/useShowOnboarding\";\nimport * as AppLayouts from \"@/layouts/app-layouts\";\nimport { SvgChevronDown, SvgFileText } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNotFound from \"@opal/illustrations/not-found\";\nimport SvgNoAccess from \"@opal/illustrations/no-access\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport WelcomeMessage from \"@/app/app/components/WelcomeMessage\";\nimport ChatUI from \"@/sections/chat/ChatUI\";\nimport { eeGated } from \"@/ce\";\nimport EESearchUI from \"@/ee/sections/SearchUI\";\nconst SearchUI = eeGated(EESearchUI);\nimport { motion, AnimatePresence } from \"motion/react\";\n\ninterface FadeProps {\n  show: boolean;\n  children?: React.ReactNode;\n  className?: string;\n}\n\nfunction Fade({ show, children, className }: FadeProps) {\n  return (\n    <AnimatePresence>\n      {show && (\n        <motion.div\n          initial={{ opacity: 0 }}\n          animate={{ opacity: 1 }}\n          exit={{ opacity: 0 }}\n          transition={{ duration: 0.15 }}\n          className={className}\n        >\n          {children}\n        </motion.div>\n      )}\n    </AnimatePresence>\n  );\n}\n\nexport interface ChatPageProps {\n  firstMessage?: string;\n}\n\nexport default function AppPage({ firstMessage }: ChatPageProps) {\n  // Performance tracking\n  // Keeping this here in case we need to track down slow renders in the future\n  // const renderCount = useRef(0);\n  // renderCount.current++;\n  // const renderStartTime = performance.now();\n\n  // useEffect(() => {\n  //   const renderTime = performance.now() - renderStartTime;\n  //   if (renderTime > 10) {\n  //     console.log(\n  //       `[ChatPage] Slow render #${renderCount.current}: ${renderTime.toFixed(\n  //         2\n  //       )}ms`\n  //     );\n  //   }\n  // });\n\n  const router = useRouter();\n  const appFocus = useAppFocus();\n\n  useToastFromQuery({\n    oauth_connected: {\n      message: \"Authentication successful\",\n      type: \"success\",\n    },\n  });\n  const searchParams = useSearchParams();\n\n  // Use SWR hooks for data fetching\n  const {\n    chatSessions,\n    refreshChatSessions,\n    currentChatSession,\n    currentChatSessionId,\n    isLoading: isLoadingChatSessions,\n  } = useChatSessions();\n  // handle redirect if chat page is disabled\n  // NOTE: this must be done here, in a client component since\n  // settings are passed in via Context and therefore aren't\n  // available in server-side components\n  const settings = useSettingsContext();\n  const vectorDbEnabled = useVectorDbEnabled();\n  const { ccPairs } = useCCPairs(vectorDbEnabled);\n  const { tags } = useTags();\n  const { documentSets } = useDocumentSets();\n  const {\n    currentMessageFiles,\n    setCurrentMessageFiles,\n    currentProjectId,\n    currentProjectDetails,\n    lastFailedFiles,\n    clearLastFailedFiles,\n  } = useProjectsContext();\n\n  // When changing from project chat to main chat (or vice-versa), clear forced tools\n  const { setForcedToolIds } = useForcedTools();\n  useEffect(() => {\n    setForcedToolIds([]);\n  }, [currentProjectId, setForcedToolIds]);\n\n  const isInitialLoad = useRef(true);\n\n  const { agents, isLoading: isLoadingAgents } = useAgents();\n\n  // Also fetch federated connectors for the sources list\n  const { data: federatedConnectorsData } = useFederatedConnectors();\n\n  const { user } = useUser();\n\n  function processSearchParamsAndSubmitMessage(searchParamsString: string) {\n    const newSearchParams = new URLSearchParams(searchParamsString);\n    const message = newSearchParams?.get(\"user-prompt\");\n\n    filterManager.buildFiltersFromQueryString(\n      newSearchParams.toString(),\n      sources,\n      documentSets.map((ds) => ds.name),\n      tags\n    );\n\n    newSearchParams.delete(SEARCH_PARAM_NAMES.SEND_ON_LOAD);\n\n    router.replace(`?${newSearchParams.toString()}`, { scroll: false });\n\n    // If there's a message, submit it\n    if (message) {\n      onSubmit({\n        message,\n        currentMessageFiles,\n        deepResearch: deepResearchEnabledForCurrentWorkflow,\n      });\n    }\n  }\n\n  const { selectedAgent, setSelectedAgentFromId, liveAgent } =\n    useAgentController({\n      selectedChatSession: currentChatSession,\n      onAgentSelect: () => {\n        // Only remove project context if user explicitly selected an agent\n        // (i.e., agentId is present). Avoid clearing project when agentId was removed.\n        const newSearchParams = new URLSearchParams(\n          searchParams?.toString() || \"\"\n        );\n        if (newSearchParams.has(SEARCH_PARAM_NAMES.PERSONA_ID)) {\n          newSearchParams.delete(SEARCH_PARAM_NAMES.PROJECT_ID);\n          router.replace(`?${newSearchParams.toString()}`, { scroll: false });\n        }\n      },\n    });\n\n  const { deepResearchEnabled, toggleDeepResearch } = useDeepResearchToggle({\n    chatSessionId: currentChatSessionId,\n    agentId: selectedAgent?.id,\n  });\n  const deepResearchEnabledForCurrentWorkflow =\n    currentProjectId === null && deepResearchEnabled;\n\n  const [presentingDocument, setPresentingDocument] =\n    useState<MinimalOnyxDocument | null>(null);\n\n  const llmManager = useLlmManager(currentChatSession ?? undefined, liveAgent);\n\n  const {\n    showOnboarding,\n    onboardingDismissed,\n    onboardingState,\n    onboardingActions,\n    llmDescriptors,\n    isLoadingOnboarding,\n    finishOnboarding,\n    hideOnboarding,\n  } = useShowOnboarding({\n    liveAgent,\n    isLoadingChatSessions,\n    chatSessionsCount: chatSessions.length,\n    userId: user?.id,\n  });\n\n  const noAgents = liveAgent === null || liveAgent === undefined;\n\n  const availableSources: ValidSources[] = useMemo(() => {\n    return ccPairs.map((ccPair) => ccPair.source);\n  }, [ccPairs]);\n\n  const sources: SourceMetadata[] = useMemo(() => {\n    const uniqueSources = Array.from(new Set(availableSources));\n    const regularSources = uniqueSources.map((source) =>\n      getSourceMetadata(source)\n    );\n\n    // Add federated connectors as sources\n    const federatedSources =\n      federatedConnectorsData?.map((connector: FederatedConnectorDetail) => {\n        return getSourceMetadata(connector.source);\n      }) || [];\n\n    // Combine sources and deduplicate based on internalName\n    const allSources = [...regularSources, ...federatedSources];\n    const deduplicatedSources = allSources.reduce((acc, source) => {\n      const existing = acc.find((s) => s.internalName === source.internalName);\n      if (!existing) {\n        acc.push(source);\n      }\n      return acc;\n    }, [] as SourceMetadata[]);\n\n    return deduplicatedSources;\n  }, [availableSources, federatedConnectorsData]);\n\n  // Show toast if any files failed in ProjectsContext reconciliation\n  useEffect(() => {\n    if (lastFailedFiles && lastFailedFiles.length > 0) {\n      const names = lastFailedFiles.map((f) => f.name).join(\", \");\n      toast.error(\n        lastFailedFiles.length === 1\n          ? `File failed and was removed: ${names}`\n          : `Files failed and were removed: ${names}`\n      );\n      clearLastFailedFiles();\n    }\n  }, [lastFailedFiles, clearLastFailedFiles]);\n\n  const chatInputBarRef = useRef<AppInputBarHandle>(null);\n\n  const filterManager = useFilters();\n\n  const isDefaultAgent = useIsDefaultAgent({\n    liveAgent,\n    existingChatSessionId: currentChatSessionId,\n    selectedChatSession: currentChatSession ?? undefined,\n    settings,\n  });\n\n  const scrollContainerRef = useRef<ChatScrollContainerHandle>(null);\n  const [showScrollButton, setShowScrollButton] = useState(false);\n\n  // Reset scroll button when session changes\n  useEffect(() => {\n    setShowScrollButton(false);\n  }, [currentChatSessionId]);\n\n  const handleScrollToBottom = useCallback(() => {\n    scrollContainerRef.current?.scrollToBottom();\n  }, []);\n\n  const resetInputBar = useCallback(() => {\n    chatInputBarRef.current?.reset();\n    setCurrentMessageFiles([]);\n  }, [setCurrentMessageFiles]);\n\n  // Add refs needed by useChatSessionController\n  const chatSessionIdRef = useRef<string | null>(currentChatSessionId);\n  const loadedIdSessionRef = useRef<string | null>(currentChatSessionId);\n  const submitOnLoadPerformed = useRef<boolean>(false);\n\n  function loadNewPageLogic(event: MessageEvent) {\n    if (event.data.type === SUBMIT_MESSAGE_TYPES.PAGE_CHANGE) {\n      try {\n        const url = new URL(event.data.href);\n        processSearchParamsAndSubmitMessage(url.searchParams.toString());\n      } catch (error) {\n        console.error(\"Error parsing URL:\", error);\n      }\n    }\n  }\n\n  // Equivalent to `loadNewPageLogic`\n  useEffect(() => {\n    if (searchParams?.get(SEARCH_PARAM_NAMES.SEND_ON_LOAD)) {\n      processSearchParamsAndSubmitMessage(searchParams.toString());\n    }\n  }, [searchParams, router]);\n\n  useEffect(() => {\n    window.addEventListener(\"message\", loadNewPageLogic);\n\n    return () => {\n      window.removeEventListener(\"message\", loadNewPageLogic);\n    };\n  }, []);\n\n  const [selectedDocuments, setSelectedDocuments] = useState<OnyxDocument[]>(\n    []\n  );\n\n  // Access chat state directly from the store\n  const currentChatState = useCurrentChatState();\n  const isReady = useIsReady();\n  const documentSidebarVisible = useDocumentSidebarVisible();\n  const updateCurrentDocumentSidebarVisible = useChatSessionStore(\n    (state) => state.updateCurrentDocumentSidebarVisible\n  );\n  const messageHistory = useCurrentMessageHistory();\n\n  // Determine anchor: second-to-last message (last user message before current response)\n  const anchorMessage = messageHistory.at(-2) ?? messageHistory[0];\n  const anchorNodeId = anchorMessage?.nodeId;\n  const anchorSelector = anchorNodeId ? `#message-${anchorNodeId}` : undefined;\n\n  // Auto-scroll preference from user settings\n  const autoScrollEnabled = user?.preferences?.auto_scroll !== false;\n  const isStreaming = currentChatState === \"streaming\";\n\n  const {\n    onSubmit,\n    stopGenerating,\n    handleMessageSpecificFileUpload,\n    availableContextTokens,\n  } = useChatController({\n    filterManager,\n    llmManager,\n    availableAgents: agents,\n    liveAgent,\n    existingChatSessionId: currentChatSessionId,\n    selectedDocuments,\n    searchParams,\n    resetInputBar,\n    setSelectedAgentFromId,\n  });\n\n  const {\n    onMessageSelection,\n    currentSessionFileTokenCount,\n    sessionFetchError,\n  } = useChatSessionController({\n    existingChatSessionId: currentChatSessionId,\n    searchParams,\n    filterManager,\n    firstMessage,\n    setSelectedAgentFromId,\n    setSelectedDocuments,\n    setCurrentMessageFiles,\n    chatSessionIdRef,\n    loadedIdSessionRef,\n    chatInputBarRef,\n    isInitialLoad,\n    submitOnLoadPerformed,\n    refreshChatSessions,\n    onSubmit,\n  });\n\n  useSendMessageToParent();\n\n  const retrievalEnabled = useMemo(() => {\n    if (liveAgent) {\n      return personaIncludesRetrieval(liveAgent);\n    }\n    return false;\n  }, [liveAgent]);\n\n  useEffect(() => {\n    if (\n      (!personaIncludesRetrieval &&\n        (!selectedDocuments || selectedDocuments.length === 0) &&\n        documentSidebarVisible) ||\n      !currentChatSessionId\n    ) {\n      updateCurrentDocumentSidebarVisible(false);\n    }\n  }, [currentChatSessionId]);\n\n  const handleResubmitLastMessage = useCallback(() => {\n    // Grab the last user-type message\n    const lastUserMsg = messageHistory\n      .slice()\n      .reverse()\n      .find((m) => m.type === \"user\");\n    if (!lastUserMsg) {\n      toast.error(\"No previously-submitted user message found.\");\n      return;\n    }\n\n    // We call onSubmit, passing a `messageOverride`\n    onSubmit({\n      message: lastUserMsg.message,\n      currentMessageFiles: currentMessageFiles,\n      deepResearch: deepResearchEnabledForCurrentWorkflow,\n      messageIdToResend: lastUserMsg.messageId,\n    });\n  }, [\n    messageHistory,\n    onSubmit,\n    currentMessageFiles,\n    deepResearchEnabledForCurrentWorkflow,\n  ]);\n\n  const toggleDocumentSidebar = useCallback(() => {\n    if (!documentSidebarVisible) {\n      updateCurrentDocumentSidebarVisible(true);\n    } else {\n      updateCurrentDocumentSidebarVisible(false);\n    }\n  }, [documentSidebarVisible, updateCurrentDocumentSidebarVisible]);\n\n  if (!user) {\n    redirect(\"/auth/login\");\n  }\n\n  const onChat = useCallback(\n    (message: string) => {\n      onSubmit({\n        message,\n        currentMessageFiles,\n        deepResearch: deepResearchEnabledForCurrentWorkflow,\n      });\n      if (showOnboarding || !onboardingDismissed) {\n        finishOnboarding();\n      }\n    },\n    [\n      onSubmit,\n      currentMessageFiles,\n      deepResearchEnabledForCurrentWorkflow,\n      showOnboarding,\n      onboardingDismissed,\n      finishOnboarding,\n    ]\n  );\n  const { submit: submitQuery, state, setAppMode } = useQueryController();\n\n  const defaultAppMode =\n    (user?.preferences?.default_app_mode?.toLowerCase() as \"chat\" | \"search\") ??\n    \"chat\";\n\n  const isNewSession = appFocus.isNewSession();\n\n  const isSearch =\n    state.phase === \"searching\" || state.phase === \"search-results\";\n\n  // 1. Reset the app-mode back to the user's default when navigating back to the \"New Sessions\" tab.\n  // 2. If we're navigating away from the \"New Session\" tab after performing a search, we reset the app-input-bar.\n  useEffect(() => {\n    if (isNewSession) setAppMode(defaultAppMode);\n    if (!isNewSession && isSearch) resetInputBar();\n  }, [isNewSession, defaultAppMode, isSearch, resetInputBar, setAppMode]);\n\n  const handleSearchDocumentClick = useCallback(\n    (doc: MinimalOnyxDocument) => setPresentingDocument(doc),\n    []\n  );\n\n  const handleAppInputBarSubmit = useCallback(\n    async (message: string) => {\n      // If we're in an existing chat session, always use chat mode\n      // (appMode only applies to new sessions)\n      if (currentChatSessionId) {\n        onSubmit({\n          message,\n          currentMessageFiles,\n          deepResearch: deepResearchEnabledForCurrentWorkflow,\n        });\n        if (showOnboarding || !onboardingDismissed) {\n          finishOnboarding();\n        }\n        return;\n      }\n\n      // For new sessions, let the query controller handle routing.\n      // resetInputBar is called inside useChatController.onSubmit for chat-routed queries.\n      // For search-routed queries, the input bar is intentionally kept\n      // so the user can see and refine their search query.\n      await submitQuery(message, onChat);\n    },\n    [\n      currentChatSessionId,\n      submitQuery,\n      onChat,\n      onSubmit,\n      currentMessageFiles,\n      deepResearchEnabledForCurrentWorkflow,\n      showOnboarding,\n      onboardingDismissed,\n      finishOnboarding,\n    ]\n  );\n\n  // Memoized callbacks for DocumentsSidebar\n  const handleMobileDocumentSidebarClose = useCallback(() => {\n    updateCurrentDocumentSidebarVisible(false);\n  }, [updateCurrentDocumentSidebarVisible]);\n\n  const handleDesktopDocumentSidebarClose = useCallback(() => {\n    setTimeout(() => updateCurrentDocumentSidebarVisible(false), 300);\n  }, [updateCurrentDocumentSidebarVisible]);\n\n  const desktopDocumentSidebar =\n    retrievalEnabled && !settings.isMobile ? (\n      <div\n        className={cn(\n          \"flex-shrink-0 overflow-hidden transition-all duration-300 ease-in-out\",\n          documentSidebarVisible ? \"w-[25rem]\" : \"w-[0rem]\"\n        )}\n      >\n        <div className=\"h-full w-[25rem]\">\n          <DocumentsSidebar\n            setPresentingDocument={setPresentingDocument}\n            modal={false}\n            closeSidebar={handleDesktopDocumentSidebarClose}\n            selectedDocuments={selectedDocuments}\n          />\n        </div>\n      </div>\n    ) : null;\n\n  // When no chat session exists but a project is selected, fetch the\n  // total tokens for the project's files so upload UX can compare\n  // against available context similar to session-based flows.\n  const [projectContextTokenCount, setProjectContextTokenCount] = useState(0);\n  // Fetch project-level token count when no chat session exists.\n  // Note: useEffect cannot be async, so we define an inner async function (run)\n  // and invoke it. The `cancelled` guard prevents setting state after the\n  // component unmounts or when the dependencies change and a newer effect run\n  // supersedes an older in-flight request.\n  useEffect(() => {\n    let cancelled = false;\n    async function run() {\n      if (!currentChatSessionId && currentProjectId !== null) {\n        try {\n          const total = await getProjectTokenCount(currentProjectId);\n          if (!cancelled) setProjectContextTokenCount(total || 0);\n        } catch {\n          if (!cancelled) setProjectContextTokenCount(0);\n        }\n      } else {\n        setProjectContextTokenCount(0);\n      }\n    }\n    run();\n    return () => {\n      cancelled = true;\n    };\n  }, [currentChatSessionId, currentProjectId, currentProjectDetails?.files]);\n\n  // handle error case where no assistants are available\n  // Only show this after agents have loaded to prevent flash during initial load\n  if (noAgents && !isLoadingAgents) {\n    return <NoAgentModal />;\n  }\n\n  const hasStarterMessages = (liveAgent?.starter_messages?.length ?? 0) > 0;\n\n  const gridStyle = {\n    gridTemplateColumns: \"1fr\",\n    gridTemplateRows: isSearch\n      ? \"0fr auto 1fr\"\n      : appFocus.isChat()\n        ? \"1fr auto 0fr\"\n        : appFocus.isProject()\n          ? \"auto auto 1fr\"\n          : \"1fr auto 1fr\",\n  };\n\n  if (!isReady) return <OnyxInitializingLoader />;\n\n  return (\n    <>\n      <AppPopup />\n\n      {retrievalEnabled && documentSidebarVisible && settings.isMobile && (\n        <div className=\"md:hidden\">\n          <Modal\n            open\n            onOpenChange={() => updateCurrentDocumentSidebarVisible(false)}\n          >\n            <Modal.Content>\n              <Modal.Header\n                icon={SvgFileText}\n                title=\"Sources\"\n                onClose={() => updateCurrentDocumentSidebarVisible(false)}\n              />\n              <Modal.Body>\n                {/* IMPORTANT: this is a memoized component, and it's very important\n                for performance reasons that this stays true. MAKE SURE that all function\n                props are wrapped in useCallback. */}\n                <DocumentsSidebar\n                  setPresentingDocument={setPresentingDocument}\n                  modal\n                  closeSidebar={handleMobileDocumentSidebarClose}\n                  selectedDocuments={selectedDocuments}\n                />\n              </Modal.Body>\n            </Modal.Content>\n          </Modal>\n        </div>\n      )}\n\n      {presentingDocument && (\n        <PreviewModal\n          presentingDocument={presentingDocument}\n          onClose={() => setPresentingDocument(null)}\n        />\n      )}\n\n      <FederatedOAuthModal />\n\n      <AppLayouts.Root enableBackground={!appFocus.isProject()}>\n        <Dropzone\n          onDrop={(acceptedFiles) =>\n            handleMessageSpecificFileUpload(acceptedFiles)\n          }\n          noClick\n        >\n          {({ getRootProps }) => (\n            <div\n              className=\"h-full w-full flex flex-col items-center outline-none relative\"\n              {...getRootProps({ tabIndex: -1 })}\n            >\n              {/* Main content grid — 3 rows, animated */}\n              <div\n                className=\"flex-1 w-full grid min-h-0 transition-[grid-template-rows] duration-150 ease-in-out\"\n                style={gridStyle}\n              >\n                {/* ── Top row: ChatUI / WelcomeMessage / ProjectUI ── */}\n                <div className=\"row-start-1 min-h-0 overflow-hidden flex flex-col items-center\">\n                  {/* ChatUI */}\n                  <Fade\n                    show={\n                      appFocus.isChat() &&\n                      !!currentChatSessionId &&\n                      !!liveAgent &&\n                      !sessionFetchError\n                    }\n                    className=\"h-full w-full flex flex-col items-center\"\n                  >\n                    <ChatScrollContainer\n                      ref={scrollContainerRef}\n                      sessionId={currentChatSessionId!}\n                      anchorSelector={anchorSelector}\n                      autoScroll={autoScrollEnabled}\n                      isStreaming={isStreaming}\n                      onScrollButtonVisibilityChange={setShowScrollButton}\n                    >\n                      <ChatUI\n                        liveAgent={liveAgent!}\n                        llmManager={llmManager}\n                        deepResearchEnabled={\n                          deepResearchEnabledForCurrentWorkflow\n                        }\n                        currentMessageFiles={currentMessageFiles}\n                        setPresentingDocument={setPresentingDocument}\n                        onSubmit={onSubmit}\n                        onMessageSelection={onMessageSelection}\n                        stopGenerating={stopGenerating}\n                        onResubmit={handleResubmitLastMessage}\n                        anchorNodeId={anchorNodeId}\n                      />\n                    </ChatScrollContainer>\n                  </Fade>\n\n                  {/* Session fetch error (404 / 403) */}\n                  <Fade\n                    show={appFocus.isChat() && sessionFetchError !== null}\n                    className=\"h-full w-full flex flex-col items-center justify-center\"\n                  >\n                    {sessionFetchError && (\n                      <Section\n                        flexDirection=\"column\"\n                        alignItems=\"center\"\n                        gap={1}\n                      >\n                        <IllustrationContent\n                          illustration={\n                            sessionFetchError.type === \"access_denied\"\n                              ? SvgNoAccess\n                              : SvgNotFound\n                          }\n                          title={\n                            sessionFetchError.type === \"not_found\"\n                              ? \"Chat not found\"\n                              : sessionFetchError.type === \"access_denied\"\n                                ? \"Access denied\"\n                                : \"Something went wrong\"\n                          }\n                          description={\n                            sessionFetchError.type === \"not_found\"\n                              ? \"This chat session doesn't exist or has been deleted.\"\n                              : sessionFetchError.type === \"access_denied\"\n                                ? \"You don't have permission to view this chat session.\"\n                                : sessionFetchError.detail\n                          }\n                        />\n                        <Button href=\"/app\" prominence=\"secondary\">\n                          Start a new chat\n                        </Button>\n                      </Section>\n                    )}\n                  </Fade>\n\n                  {/* ProjectUI */}\n                  {appFocus.isProject() && (\n                    <div className=\"w-full max-h-[50vh] overflow-y-auto overscroll-y-none\">\n                      <ProjectContextPanel\n                        projectTokenCount={projectContextTokenCount}\n                        availableContextTokens={availableContextTokens}\n                        setPresentingDocument={setPresentingDocument}\n                      />\n                    </div>\n                  )}\n\n                  {/* WelcomeMessageUI */}\n                  <Fade\n                    show={\n                      (appFocus.isNewSession() || appFocus.isAgent()) &&\n                      (state.phase === \"idle\" || state.phase === \"classifying\")\n                    }\n                    className=\"w-full flex-1 flex flex-col items-center justify-end\"\n                  >\n                    <WelcomeMessage\n                      agent={liveAgent}\n                      isDefaultAgent={isDefaultAgent}\n                    />\n                    <Spacer rem={1.5} />\n                  </Fade>\n                </div>\n\n                {/* ── Middle-center: AppInputBar ── */}\n                <div\n                  className={cn(\n                    \"row-start-2 flex flex-col items-center px-4\",\n                    sessionFetchError && \"hidden\"\n                  )}\n                >\n                  <div className=\"relative w-full max-w-[var(--app-page-main-content-width)] flex flex-col\">\n                    {/* Scroll to bottom button - positioned absolutely above AppInputBar */}\n                    {appFocus.isChat() && showScrollButton && (\n                      <div className=\"absolute top-[-3.5rem] self-center\">\n                        <Button\n                          icon={SvgChevronDown}\n                          onClick={handleScrollToBottom}\n                          aria-label=\"Scroll to bottom\"\n                          prominence=\"secondary\"\n                        />\n                      </div>\n                    )}\n\n                    {/* OnboardingUI */}\n                    {(appFocus.isNewSession() || appFocus.isAgent()) &&\n                      (state.phase === \"idle\" ||\n                        state.phase === \"classifying\") &&\n                      (showOnboarding || !user?.personalization?.name) &&\n                      !onboardingDismissed && (\n                        <OnboardingFlow\n                          showOnboarding={showOnboarding}\n                          handleHideOnboarding={hideOnboarding}\n                          handleFinishOnboarding={finishOnboarding}\n                          state={onboardingState}\n                          actions={onboardingActions}\n                          llmDescriptors={llmDescriptors}\n                        />\n                      )}\n\n                    {/*\n                      # Note (@raunakab)\n\n                      `shadow-01` on AppInputBar extends ~14px below the element\n                      (2px offset + 12px blur). Because the content area in `Root`\n                      (app-layouts.tsx) uses `overflow-auto`, shadows that exceed\n                      the container bounds are clipped.\n\n                      The animated spacer divs above and below the AppInputBar\n                      provide 14px of breathing room so the shadow renders fully.\n                      They transition between h-0 and h-[14px] depending on whether\n                      the classification is \"search\" (spacer above) or \"chat\"\n                      (spacer below).\n\n                      There is a corresponding note inside `app-layouts.tsx`\n                      (Footer) that explains why the Footer removes its top\n                      padding during chat to compensate for this extra space.\n                    */}\n                    <div>\n                      <div\n                        className={cn(\n                          \"transition-all duration-150 ease-in-out overflow-hidden\",\n                          isSearch ? \"h-[14px]\" : \"h-0\"\n                        )}\n                      />\n                      <AppInputBar\n                        ref={chatInputBarRef}\n                        deepResearchEnabled={\n                          deepResearchEnabledForCurrentWorkflow\n                        }\n                        toggleDeepResearch={toggleDeepResearch}\n                        filterManager={filterManager}\n                        llmManager={llmManager}\n                        initialMessage={\n                          searchParams?.get(SEARCH_PARAM_NAMES.USER_PROMPT) ||\n                          \"\"\n                        }\n                        stopGenerating={stopGenerating}\n                        onSubmit={handleAppInputBarSubmit}\n                        chatState={currentChatState}\n                        currentSessionFileTokenCount={\n                          currentChatSessionId\n                            ? currentSessionFileTokenCount\n                            : projectContextTokenCount\n                        }\n                        availableContextTokens={availableContextTokens}\n                        selectedAgent={selectedAgent || liveAgent}\n                        handleFileUpload={handleMessageSpecificFileUpload}\n                        setPresentingDocument={setPresentingDocument}\n                        // Intentionally enabled during name-only onboarding (showOnboarding=false)\n                        // since LLM providers are already configured and the user can chat.\n                        disabled={\n                          (!llmManager.isLoadingProviders &&\n                            llmManager.hasAnyProvider === false) ||\n                          (showOnboarding &&\n                            !isLoadingOnboarding &&\n                            onboardingState.currentStep !==\n                              OnboardingStep.Complete)\n                        }\n                      />\n                      <div\n                        className={cn(\n                          \"transition-all duration-150 ease-in-out overflow-hidden\",\n                          appFocus.isChat() ? \"h-[14px]\" : \"h-0\"\n                        )}\n                      />\n                    </div>\n                  </div>\n                </div>\n\n                {/* ── Bottom: SearchResults + SourceFilter / Suggestions / ProjectChatList ── */}\n                <div className=\"row-start-3 min-h-0 overflow-hidden flex flex-col items-center w-full px-4\">\n                  {/* Agent description below input */}\n                  {(appFocus.isNewSession() || appFocus.isAgent()) &&\n                    !isDefaultAgent && (\n                      <>\n                        <Spacer rem={1} />\n                        <AgentDescription agent={liveAgent} />\n                        <Spacer rem={1.5} />\n                      </>\n                    )}\n                  {/* ProjectChatSessionList */}\n                  {appFocus.isProject() && (\n                    <div className=\"w-full max-w-[var(--app-page-main-content-width)] h-full overflow-y-auto overscroll-y-none mx-auto\">\n                      <ProjectChatSessionList />\n                    </div>\n                  )}\n\n                  {/* SuggestionsUI */}\n                  <Fade\n                    show={\n                      (appFocus.isNewSession() || appFocus.isAgent()) &&\n                      hasStarterMessages\n                    }\n                    className=\"h-full flex-1 w-full max-w-[var(--app-page-main-content-width)]\"\n                  >\n                    <Spacer rem={0.5} />\n                    <Suggestions onSubmit={onSubmit} />\n                  </Fade>\n\n                  {/* SearchUI */}\n                  <Fade\n                    show={isSearch}\n                    className=\"h-full flex-1 w-full max-w-[var(--app-page-main-content-width)] px-1 flex flex-col\"\n                  >\n                    <Spacer rem={0.75} />\n                    <SearchUI onDocumentClick={handleSearchDocumentClick} />\n                  </Fade>\n                </div>\n              </div>\n            </div>\n          )}\n        </Dropzone>\n      </AppLayouts.Root>\n\n      {desktopDocumentSidebar}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/SettingsPage.tsx",
    "content": "\"use client\";\n\nimport { useRef, useCallback, useEffect, useState } from \"react\";\nimport { usePathname, useRouter } from \"next/navigation\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Section, AttachmentItemLayout } from \"@/layouts/general-layouts\";\nimport { Content, ContentAction } from \"@opal/layouts\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport {\n  SvgArrowExchange,\n  SvgKey,\n  SvgLock,\n  SvgMinusCircle,\n  SvgTrash,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport InputTextArea from \"@/refresh-components/inputs/InputTextArea\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { useTheme } from \"next-themes\";\nimport { MemoryItem, ThemePreference } from \"@/lib/types\";\nimport useUserPersonalization from \"@/hooks/useUserPersonalization\";\nimport { toast } from \"@/hooks/useToast\";\nimport LLMPopover from \"@/refresh-components/popovers/LLMPopover\";\nimport { deleteAllChatSessions } from \"@/app/app/services/lib\";\nimport { useAuthType, useLlmManager } from \"@/lib/hooks\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useFilter from \"@/hooks/useFilter\";\nimport CreateButton from \"@/refresh-components/buttons/CreateButton\";\nimport { Button } from \"@opal/components\";\nimport useFederatedOAuthStatus from \"@/hooks/useFederatedOAuthStatus\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport { ValidSources } from \"@/lib/types\";\nimport { ConnectorCredentialPairStatus } from \"@/app/admin/connector/[ccPairId]/types\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport Code from \"@/refresh-components/Code\";\nimport CharacterCount from \"@/refresh-components/CharacterCount\";\nimport { InputPrompt } from \"@/app/app/interfaces\";\nimport usePromptShortcuts from \"@/hooks/usePromptShortcuts\";\nimport ColorSwatch from \"@/refresh-components/ColorSwatch\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport Memories from \"@/sections/settings/Memories\";\nimport { FederatedConnectorOAuthStatus } from \"@/components/chat/FederatedOAuthModal\";\nimport {\n  CHAT_BACKGROUND_OPTIONS,\n  CHAT_BACKGROUND_NONE,\n} from \"@/lib/constants/chatBackgrounds\";\nimport { SvgCheck } from \"@opal/icons\";\nimport { cn } from \"@/lib/utils\";\nimport { Interactive } from \"@opal/core\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { useCloudSubscription } from \"@/hooks/useCloudSubscription\";\n\ninterface PAT {\n  id: number;\n  name: string;\n  token_display: string;\n  created_at: string;\n  expires_at: string | null;\n  last_used_at: string | null;\n}\n\ninterface CreatedTokenState {\n  id: number;\n  token: string;\n  name: string;\n}\n\ninterface PATModalProps {\n  isCreating: boolean;\n  newTokenName: string;\n  setNewTokenName: (name: string) => void;\n  expirationDays: string;\n  setExpirationDays: (days: string) => void;\n  onClose: () => void;\n  onCreate: () => void;\n  createdToken: CreatedTokenState | null;\n}\n\nfunction PATModal({\n  isCreating,\n  newTokenName,\n  setNewTokenName,\n  expirationDays,\n  setExpirationDays,\n  onClose,\n  onCreate,\n  createdToken,\n}: PATModalProps) {\n  return (\n    <ConfirmationModalLayout\n      icon={SvgKey}\n      title=\"Create Access Token\"\n      description=\"All API requests using this token will inherit your access permissions and be attributed to you as an individual.\"\n      onClose={onClose}\n      submit={\n        !!createdToken?.token ? (\n          <Button onClick={onClose}>Done</Button>\n        ) : (\n          <Button\n            disabled={isCreating || !newTokenName.trim()}\n            onClick={onCreate}\n          >\n            {isCreating ? \"Creating Token...\" : \"Create Token\"}\n          </Button>\n        )\n      }\n      hideCancel={!!createdToken}\n    >\n      <Section gap={1}>\n        {/* Token Creation*/}\n        {!!createdToken?.token ? (\n          <InputLayouts.Vertical title=\"Token Value\">\n            <Code>{createdToken.token}</Code>\n          </InputLayouts.Vertical>\n        ) : (\n          <>\n            <InputLayouts.Vertical title=\"Token Name\">\n              <InputTypeIn\n                placeholder=\"Name your token\"\n                value={newTokenName}\n                onChange={(e) => setNewTokenName(e.target.value)}\n                variant={isCreating ? \"disabled\" : undefined}\n                autoComplete=\"new-password\"\n              />\n            </InputLayouts.Vertical>\n            <InputLayouts.Vertical\n              title=\"Expires in\"\n              subDescription={\n                expirationDays === \"null\"\n                  ? undefined\n                  : (() => {\n                      const expiryDate = new Date();\n                      expiryDate.setUTCDate(\n                        expiryDate.getUTCDate() + parseInt(expirationDays)\n                      );\n                      expiryDate.setUTCHours(23, 59, 59, 999);\n                      return `This token will expire at: ${expiryDate\n                        .toISOString()\n                        .replace(\"T\", \" \")\n                        .replace(\".999Z\", \" UTC\")}`;\n                    })()\n              }\n            >\n              <InputSelect\n                value={expirationDays}\n                onValueChange={setExpirationDays}\n                disabled={isCreating}\n              >\n                <InputSelect.Trigger placeholder=\"Select expiration\" />\n                <InputSelect.Content>\n                  <InputSelect.Item value=\"7\">7 days</InputSelect.Item>\n                  <InputSelect.Item value=\"30\">30 days</InputSelect.Item>\n                  <InputSelect.Item value=\"365\">365 days</InputSelect.Item>\n                  <InputSelect.Item value=\"null\">\n                    No expiration\n                  </InputSelect.Item>\n                </InputSelect.Content>\n              </InputSelect>\n            </InputLayouts.Vertical>\n          </>\n        )}\n      </Section>\n    </ConfirmationModalLayout>\n  );\n}\n\nfunction GeneralSettings() {\n  const {\n    user,\n    updateUserPersonalization,\n    updateUserThemePreference,\n    updateUserChatBackground,\n  } = useUser();\n  const { theme, setTheme, systemTheme } = useTheme();\n  const { refreshChatSessions } = useChatSessions();\n  const router = useRouter();\n  const pathname = usePathname();\n  const [isDeleting, setIsDeleting] = useState(false);\n  const [showDeleteConfirmation, setShowDeleteConfirmation] = useState(false);\n\n  const {\n    personalizationValues,\n    updatePersonalizationField,\n    handleSavePersonalization,\n  } = useUserPersonalization(user, updateUserPersonalization, {\n    onSuccess: () => toast.success(\"Personalization updated successfully\"),\n    onError: () => toast.error(\"Failed to update personalization\"),\n  });\n\n  // Track initial values to detect changes\n  const initialNameRef = useRef(personalizationValues.name);\n  const initialRoleRef = useRef(personalizationValues.role);\n\n  // Update refs when personalization values change from external source\n  useEffect(() => {\n    initialNameRef.current = personalizationValues.name;\n    initialRoleRef.current = personalizationValues.role;\n  }, [user?.personalization]);\n\n  const handleDeleteAllChats = useCallback(async () => {\n    setIsDeleting(true);\n    try {\n      const response = await deleteAllChatSessions();\n      if (response.ok) {\n        toast.success(\"All your chat sessions have been deleted.\");\n        await refreshChatSessions();\n        setShowDeleteConfirmation(false);\n      } else {\n        throw new Error(\"Failed to delete all chat sessions\");\n      }\n    } catch (error) {\n      toast.error(\"Failed to delete all chat sessions\");\n    } finally {\n      setIsDeleting(false);\n    }\n  }, [pathname, router, refreshChatSessions]);\n\n  return (\n    <>\n      {showDeleteConfirmation && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title=\"Delete All Chats\"\n          onClose={() => setShowDeleteConfirmation(false)}\n          submit={\n            <Button\n              disabled={isDeleting}\n              variant=\"danger\"\n              onClick={() => {\n                void handleDeleteAllChats();\n              }}\n            >\n              {isDeleting ? \"Deleting...\" : \"Delete\"}\n            </Button>\n          }\n        >\n          <Section gap={0.5} alignItems=\"start\">\n            <Text>\n              All your chat sessions and history will be permanently deleted.\n              Deletion cannot be undone.\n            </Text>\n            <Text>Are you sure you want to delete all chats?</Text>\n          </Section>\n        </ConfirmationModalLayout>\n      )}\n\n      <Section gap={2}>\n        <Section gap={0.75}>\n          <Content\n            title=\"Profile\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n          <Card>\n            <InputLayouts.Horizontal\n              title=\"Full Name\"\n              description=\"We'll display this name in the app.\"\n              center\n            >\n              <InputTypeIn\n                placeholder=\"Your name\"\n                value={personalizationValues.name}\n                onChange={(e) =>\n                  updatePersonalizationField(\"name\", e.target.value)\n                }\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\") {\n                    e.currentTarget.blur();\n                  }\n                }}\n                onBlur={() => {\n                  // Only save if the value has changed\n                  if (personalizationValues.name !== initialNameRef.current) {\n                    void handleSavePersonalization();\n                    initialNameRef.current = personalizationValues.name;\n                  }\n                }}\n              />\n            </InputLayouts.Horizontal>\n            <InputLayouts.Horizontal\n              title=\"Work Role\"\n              description=\"Share your role to better tailor responses.\"\n              center\n            >\n              <InputTypeIn\n                placeholder=\"Your role\"\n                value={personalizationValues.role}\n                onChange={(e) =>\n                  updatePersonalizationField(\"role\", e.target.value)\n                }\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\") {\n                    e.currentTarget.blur();\n                  }\n                }}\n                onBlur={() => {\n                  // Only save if the value has changed\n                  if (personalizationValues.role !== initialRoleRef.current) {\n                    void handleSavePersonalization();\n                    initialRoleRef.current = personalizationValues.role;\n                  }\n                }}\n              />\n            </InputLayouts.Horizontal>\n          </Card>\n        </Section>\n\n        <Section gap={0.75}>\n          <Content\n            title=\"Appearance\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n          <Card>\n            <InputLayouts.Horizontal\n              title=\"Color Mode\"\n              description=\"Select your preferred color mode for the UI.\"\n              center\n            >\n              <InputSelect\n                value={theme}\n                onValueChange={(value) => {\n                  setTheme(value);\n                  updateUserThemePreference(value as ThemePreference);\n                }}\n              >\n                <InputSelect.Trigger />\n                <InputSelect.Content>\n                  <InputSelect.Item\n                    value={ThemePreference.SYSTEM}\n                    icon={() => (\n                      <ColorSwatch\n                        light={systemTheme === \"light\"}\n                        dark={systemTheme === \"dark\"}\n                      />\n                    )}\n                    description={\n                      systemTheme\n                        ? systemTheme.charAt(0).toUpperCase() +\n                          systemTheme.slice(1)\n                        : undefined\n                    }\n                  >\n                    Auto\n                  </InputSelect.Item>\n                  <InputSelect.Separator />\n                  <InputSelect.Item\n                    value={ThemePreference.LIGHT}\n                    icon={() => <ColorSwatch light />}\n                  >\n                    Light\n                  </InputSelect.Item>\n                  <InputSelect.Item\n                    value={ThemePreference.DARK}\n                    icon={() => <ColorSwatch dark />}\n                  >\n                    Dark\n                  </InputSelect.Item>\n                </InputSelect.Content>\n              </InputSelect>\n            </InputLayouts.Horizontal>\n            <InputLayouts.Vertical title=\"Chat Background\">\n              <div className=\"flex flex-wrap gap-2\">\n                {CHAT_BACKGROUND_OPTIONS.map((bg) => {\n                  const currentBackgroundId =\n                    user?.preferences?.chat_background ?? \"none\";\n                  const isSelected = currentBackgroundId === bg.id;\n                  const isNone = bg.src === CHAT_BACKGROUND_NONE;\n\n                  return (\n                    <button\n                      key={bg.id}\n                      onClick={() =>\n                        updateUserChatBackground(\n                          bg.id === CHAT_BACKGROUND_NONE ? null : bg.id\n                        )\n                      }\n                      className=\"relative overflow-hidden rounded-lg transition-all w-[90px] h-[68px] cursor-pointer border-none p-0 bg-transparent group\"\n                      title={bg.label}\n                      aria-label={`${bg.label} background${\n                        isSelected ? \" (selected)\" : \"\"\n                      }`}\n                    >\n                      {isNone ? (\n                        <div className=\"absolute inset-0 bg-background flex items-center justify-center\">\n                          <span className=\"text-xs text-text-02\">None</span>\n                        </div>\n                      ) : (\n                        <div\n                          className=\"absolute inset-0 bg-cover bg-center transition-transform duration-300 group-hover:scale-105\"\n                          style={{ backgroundImage: `url(${bg.thumbnail})` }}\n                        />\n                      )}\n                      <div\n                        className={cn(\n                          \"absolute inset-0 transition-all rounded-lg\",\n                          isSelected\n                            ? \"ring-2 ring-inset ring-theme-primary-05\"\n                            : \"ring-1 ring-inset ring-border-02 group-hover:ring-border-03\"\n                        )}\n                      />\n                      {isSelected && (\n                        <div className=\"absolute top-1.5 right-1.5 w-4 h-4 rounded-full bg-theme-primary-05 flex items-center justify-center\">\n                          <SvgCheck className=\"w-2.5 h-2.5 stroke-text-inverted-05\" />\n                        </div>\n                      )}\n                    </button>\n                  );\n                })}\n              </div>\n            </InputLayouts.Vertical>\n          </Card>\n        </Section>\n\n        <Separator noPadding />\n\n        <Section gap={0.75}>\n          <Content\n            title=\"Danger Zone\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n          <Card>\n            <InputLayouts.Horizontal\n              title=\"Delete All Chats\"\n              description=\"Permanently delete all your chat sessions.\"\n              center\n            >\n              <Button\n                variant=\"danger\"\n                prominence=\"secondary\"\n                onClick={() => setShowDeleteConfirmation(true)}\n                icon={SvgTrash}\n                interaction={showDeleteConfirmation ? \"hover\" : \"rest\"}\n              >\n                Delete All Chats\n              </Button>\n            </InputLayouts.Horizontal>\n          </Card>\n        </Section>\n      </Section>\n    </>\n  );\n}\n\ninterface LocalShortcut extends InputPrompt {\n  isNew: boolean;\n}\n\nfunction PromptShortcuts() {\n  const { promptShortcuts, isLoading, error, refresh } = usePromptShortcuts();\n  const [shortcuts, setShortcuts] = useState<LocalShortcut[]>([]);\n  const [isInitialLoad, setIsInitialLoad] = useState(true);\n\n  // Initialize shortcuts when input prompts are loaded\n  useEffect(() => {\n    if (isLoading || error) return;\n\n    // Convert InputPrompt[] to LocalShortcut[] with isNew: false for existing items\n    // Sort by id to maintain stable ordering when editing\n    const existingShortcuts: LocalShortcut[] = promptShortcuts\n      .map((shortcut) => ({\n        ...shortcut,\n        isNew: false,\n      }))\n      .sort((a, b) => a.id - b.id);\n\n    // Always ensure there's at least one empty row\n    setShortcuts([\n      ...existingShortcuts,\n      {\n        id: Date.now(),\n        prompt: \"\",\n        content: \"\",\n        active: true,\n        is_public: false,\n        isNew: true,\n      },\n    ]);\n    setIsInitialLoad(false);\n  }, [promptShortcuts, isLoading, error]);\n\n  // Show error popup if fetch fails\n  useEffect(() => {\n    if (!error) return;\n    toast.error(\"Failed to load shortcuts\");\n  }, [error]);\n\n  const handleUpdateShortcut = useCallback(\n    (index: number, field: \"prompt\" | \"content\", value: string) => {\n      setShortcuts((prev) => {\n        const next = prev.map((shortcut, i) =>\n          i === index ? { ...shortcut, [field]: value } : shortcut\n        );\n\n        const isEmptyNew = (s: LocalShortcut) =>\n          s.isNew && !s.prompt.trim() && !s.content.trim();\n\n        const emptyCount = next.filter(isEmptyNew).length;\n\n        if (emptyCount === 0) {\n          return [\n            ...next,\n            {\n              id: Date.now(),\n              prompt: \"\",\n              content: \"\",\n              active: true,\n              is_public: false,\n              isNew: true,\n            },\n          ];\n        }\n\n        if (emptyCount > 1) {\n          const userRow = next[index];\n          const userRowEmpty = userRow !== undefined && isEmptyNew(userRow);\n          let keepIndex = -1;\n          if (userRowEmpty) {\n            keepIndex = index;\n          } else {\n            for (let i = next.length - 1; i >= 0; i--) {\n              const row = next[i];\n              if (row !== undefined && isEmptyNew(row)) {\n                keepIndex = i;\n                break;\n              }\n            }\n          }\n          return next.filter((s, i) => !isEmptyNew(s) || i === keepIndex);\n        }\n\n        return next;\n      });\n    },\n    []\n  );\n\n  const handleRemoveShortcut = useCallback(\n    async (index: number) => {\n      const shortcut = shortcuts[index];\n      if (!shortcut) return;\n\n      // If it's a new shortcut, just remove from state\n      if (shortcut.isNew) {\n        setShortcuts((prev) => prev.filter((_, i) => i !== index));\n        return;\n      }\n\n      // Otherwise, delete from backend\n      try {\n        const response = await fetch(`/api/input_prompt/${shortcut.id}`, {\n          method: \"DELETE\",\n        });\n\n        if (response.ok) {\n          setShortcuts((prev) => prev.filter((_, i) => i !== index));\n          await refresh();\n          toast.success(\"Shortcut deleted\");\n        } else {\n          throw new Error(\"Failed to delete shortcut\");\n        }\n      } catch (error) {\n        toast.error(\"Failed to delete shortcut\");\n      }\n    },\n    [shortcuts, refresh]\n  );\n\n  const handleSaveShortcut = useCallback(\n    async (index: number) => {\n      const shortcut = shortcuts[index];\n      if (!shortcut || !shortcut.prompt.trim() || !shortcut.content.trim()) {\n        toast.error(\"Both shortcut and expansion are required\");\n        return;\n      }\n\n      try {\n        if (shortcut.isNew) {\n          // Create new shortcut\n          const response = await fetch(\"/api/input_prompt\", {\n            method: \"POST\",\n            headers: { \"Content-Type\": \"application/json\" },\n            body: JSON.stringify({\n              prompt: shortcut.prompt,\n              content: shortcut.content,\n              active: true,\n              is_public: false,\n            }),\n          });\n\n          if (response.ok) {\n            await refresh();\n            toast.success(\"Shortcut created\");\n          } else {\n            throw new Error(\"Failed to create shortcut\");\n          }\n        } else {\n          // Update existing shortcut\n          const response = await fetch(`/api/input_prompt/${shortcut.id}`, {\n            method: \"PATCH\",\n            headers: { \"Content-Type\": \"application/json\" },\n            body: JSON.stringify({\n              prompt: shortcut.prompt,\n              content: shortcut.content,\n              active: true,\n              is_public: false,\n            }),\n          });\n\n          if (response.ok) {\n            await refresh();\n            toast.success(\"Shortcut updated\");\n          } else {\n            throw new Error(\"Failed to update shortcut\");\n          }\n        }\n      } catch (error) {\n        toast.error(\"Failed to save shortcut\");\n      }\n    },\n    [shortcuts, refresh]\n  );\n\n  const handleBlurShortcut = useCallback(\n    async (index: number) => {\n      const shortcut = shortcuts[index];\n      if (!shortcut) return;\n\n      const hasPrompt = shortcut.prompt.trim();\n      const hasContent = shortcut.content.trim();\n\n      // Both fields are filled - save/update the shortcut\n      if (hasPrompt && hasContent) {\n        await handleSaveShortcut(index);\n      }\n      // For existing shortcuts with incomplete fields, error state will be shown in UI\n      // User must use the delete button to remove them\n    },\n    [shortcuts, handleSaveShortcut]\n  );\n\n  return (\n    <>\n      {shortcuts.length > 0 && (\n        <Section gap={0.75}>\n          {shortcuts.map((shortcut, index) => {\n            const isEmpty = !shortcut.prompt.trim() && !shortcut.content.trim();\n            const isExisting = !shortcut.isNew;\n            const hasPrompt = shortcut.prompt.trim();\n            const hasContent = shortcut.content.trim();\n\n            // Show error for existing shortcuts with incomplete fields\n            // (either one field empty or both fields empty)\n            const showPromptError = isExisting && !hasPrompt;\n            const showContentError = isExisting && !hasContent;\n\n            return (\n              <div\n                key={shortcut.id}\n                className=\"w-full grid grid-cols-[1fr_min-content] gap-x-1 gap-y-1\"\n              >\n                <InputTypeIn\n                  prefixText=\"/\"\n                  placeholder=\"Summarize\"\n                  value={shortcut.prompt}\n                  onChange={(e) =>\n                    handleUpdateShortcut(index, \"prompt\", e.target.value)\n                  }\n                  onBlur={\n                    shortcut.is_public\n                      ? undefined\n                      : () => void handleBlurShortcut(index)\n                  }\n                  variant={\n                    shortcut.is_public\n                      ? \"readOnly\"\n                      : showPromptError\n                        ? \"error\"\n                        : undefined\n                  }\n                />\n                <Section>\n                  <Button\n                    disabled={(shortcut.isNew && isEmpty) || shortcut.is_public}\n                    icon={SvgMinusCircle}\n                    onClick={() => void handleRemoveShortcut(index)}\n                    prominence=\"tertiary\"\n                    aria-label=\"Remove shortcut\"\n                    tooltip={\n                      shortcut.is_public\n                        ? \"Cannot delete public prompt-shortcuts.\"\n                        : undefined\n                    }\n                  />\n                </Section>\n                <InputTextArea\n                  placeholder=\"Provide a concise 1–2 sentence summary of the following:\"\n                  value={shortcut.content}\n                  onChange={(e) =>\n                    handleUpdateShortcut(index, \"content\", e.target.value)\n                  }\n                  onBlur={\n                    shortcut.is_public\n                      ? undefined\n                      : () => void handleBlurShortcut(index)\n                  }\n                  variant={\n                    shortcut.is_public\n                      ? \"readOnly\"\n                      : showContentError\n                        ? \"error\"\n                        : undefined\n                  }\n                  rows={3}\n                />\n                <div />\n              </div>\n            );\n          })}\n        </Section>\n      )}\n    </>\n  );\n}\n\nfunction ChatPreferencesSettings() {\n  const {\n    user,\n    updateUserPersonalization,\n    updateUserAutoScroll,\n    updateUserShortcuts,\n    updateUserDefaultModel,\n    updateUserDefaultAppMode,\n    updateUserVoiceSettings,\n  } = useUser();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const settings = useSettingsContext();\n  const { isSearchModeAvailable: searchUiEnabled } = settings;\n  const llmManager = useLlmManager();\n\n  const {\n    personalizationValues,\n    toggleUseMemories,\n    toggleEnableMemoryTool,\n    updateUserPreferences,\n    handleSavePersonalization,\n  } = useUserPersonalization(user, updateUserPersonalization, {\n    onSuccess: () => toast.success(\"Preferences saved\"),\n    onError: () => toast.error(\"Failed to save preferences\"),\n  });\n  const [draftVoicePlaybackSpeed, setDraftVoicePlaybackSpeed] = useState(\n    user?.preferences.voice_playback_speed ?? 1\n  );\n\n  useEffect(() => {\n    setDraftVoicePlaybackSpeed(user?.preferences.voice_playback_speed ?? 1);\n  }, [user?.preferences.voice_playback_speed]);\n\n  const saveVoiceSettings = useCallback(\n    async (settings: {\n      auto_send?: boolean;\n      auto_playback?: boolean;\n      playback_speed?: number;\n    }) => {\n      try {\n        await updateUserVoiceSettings(settings);\n        toast.success(\"Preferences saved\");\n      } catch {\n        toast.error(\"Failed to save preferences\");\n      }\n    },\n    [updateUserVoiceSettings]\n  );\n\n  const commitVoicePlaybackSpeed = useCallback(() => {\n    const currentSpeed = user?.preferences.voice_playback_speed ?? 1;\n    if (Math.abs(currentSpeed - draftVoicePlaybackSpeed) < 0.001) {\n      return;\n    }\n    void saveVoiceSettings({\n      playback_speed: draftVoicePlaybackSpeed,\n    });\n  }, [\n    draftVoicePlaybackSpeed,\n    saveVoiceSettings,\n    user?.preferences.voice_playback_speed,\n  ]);\n\n  // Wrapper to save memories and return success/failure\n  const handleSaveMemories = useCallback(\n    async (newMemories: MemoryItem[]): Promise<boolean> => {\n      const result = await handleSavePersonalization(\n        { memories: newMemories },\n        true\n      );\n      return !!result;\n    },\n    [handleSavePersonalization]\n  );\n\n  return (\n    <Section gap={2}>\n      <Section gap={0.75}>\n        <Content\n          title=\"Chats\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          widthVariant=\"full\"\n        />\n        <Card>\n          <InputLayouts.Horizontal\n            title=\"Default Model\"\n            description=\"This model will be used by Onyx by default in your chats.\"\n          >\n            <LLMPopover\n              llmManager={llmManager}\n              onSelect={(selected) => {\n                void updateUserDefaultModel(selected);\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          <InputLayouts.Horizontal\n            title=\"Chat Auto-scroll\"\n            description=\"Automatically scroll to new content as chat generates response.\"\n          >\n            <Switch\n              checked={user?.preferences.auto_scroll}\n              onCheckedChange={(checked) => {\n                updateUserAutoScroll(checked);\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          {isPaidEnterpriseFeaturesEnabled && (\n            <SimpleTooltip\n              tooltip={\n                searchUiEnabled\n                  ? undefined\n                  : \"Search UI is disabled and can only be enabled by an admin.\"\n              }\n              side=\"top\"\n            >\n              <InputLayouts.Horizontal\n                title=\"Default App Mode\"\n                description=\"Choose whether new sessions start in Search or Chat mode.\"\n                center\n                disabled={!searchUiEnabled}\n              >\n                <InputSelect\n                  value={user?.preferences.default_app_mode ?? \"CHAT\"}\n                  onValueChange={(value) => {\n                    void updateUserDefaultAppMode(value as \"CHAT\" | \"SEARCH\");\n                  }}\n                  disabled={!searchUiEnabled}\n                >\n                  <InputSelect.Trigger />\n                  <InputSelect.Content>\n                    <InputSelect.Item value=\"CHAT\">Chat</InputSelect.Item>\n                    <InputSelect.Item value=\"SEARCH\">Search</InputSelect.Item>\n                  </InputSelect.Content>\n                </InputSelect>\n              </InputLayouts.Horizontal>\n            </SimpleTooltip>\n          )}\n        </Card>\n      </Section>\n\n      <Section gap={0.75}>\n        <InputLayouts.Vertical\n          title=\"Personal Preferences\"\n          description=\"Provide your custom preferences in natural language.\"\n        >\n          <InputTextArea\n            placeholder=\"Describe how you want the system to behave and the tone it should use.\"\n            value={personalizationValues.user_preferences}\n            onChange={(e) => updateUserPreferences(e.target.value)}\n            onBlur={() => void handleSavePersonalization()}\n            rows={4}\n            maxRows={10}\n            autoResize\n            maxLength={500}\n          />\n          <CharacterCount\n            value={personalizationValues.user_preferences || \"\"}\n            limit={500}\n          />\n        </InputLayouts.Vertical>\n        <Content\n          title=\"Memory\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          widthVariant=\"full\"\n        />\n        <Card>\n          <InputLayouts.Horizontal\n            title=\"Reference Stored Memories\"\n            description=\"Let Onyx reference stored memories in chats.\"\n          >\n            <Switch\n              checked={personalizationValues.use_memories}\n              onCheckedChange={(checked) => {\n                toggleUseMemories(checked);\n                void handleSavePersonalization({ use_memories: checked });\n              }}\n            />\n          </InputLayouts.Horizontal>\n          <InputLayouts.Horizontal\n            title=\"Update Memories\"\n            description=\"Let Onyx generate and update stored memories.\"\n          >\n            <Switch\n              checked={personalizationValues.enable_memory_tool}\n              onCheckedChange={(checked) => {\n                toggleEnableMemoryTool(checked);\n                void handleSavePersonalization({\n                  enable_memory_tool: checked,\n                });\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          {(personalizationValues.use_memories ||\n            personalizationValues.enable_memory_tool ||\n            personalizationValues.memories.length > 0) && (\n            <Memories\n              memories={personalizationValues.memories}\n              onSaveMemories={handleSaveMemories}\n            />\n          )}\n        </Card>\n      </Section>\n\n      <Section gap={0.75}>\n        <Content\n          title=\"Prompt Shortcuts\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          widthVariant=\"full\"\n        />\n        <Card>\n          <InputLayouts.Horizontal\n            title=\"Use Prompt Shortcuts\"\n            description=\"Enable shortcuts to quickly insert common prompts.\"\n          >\n            <Switch\n              checked={user?.preferences?.shortcut_enabled}\n              onCheckedChange={(checked) => {\n                updateUserShortcuts(checked);\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          {user?.preferences?.shortcut_enabled && <PromptShortcuts />}\n        </Card>\n      </Section>\n\n      <Section gap={0.75}>\n        <Content\n          title=\"Voice\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          widthVariant=\"full\"\n        />\n        <Card>\n          <InputLayouts.Horizontal\n            title=\"Auto-Send on Pause\"\n            description=\"Automatically send voice input when you stop speaking.\"\n          >\n            <Switch\n              checked={user?.preferences.voice_auto_send ?? false}\n              onCheckedChange={(checked) => {\n                void saveVoiceSettings({ auto_send: checked });\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          <InputLayouts.Horizontal\n            title=\"Auto-Playback\"\n            description=\"Automatically play voice responses.\"\n          >\n            <Switch\n              checked={user?.preferences.voice_auto_playback ?? false}\n              onCheckedChange={(checked) => {\n                void saveVoiceSettings({ auto_playback: checked });\n              }}\n            />\n          </InputLayouts.Horizontal>\n\n          <InputLayouts.Horizontal\n            title=\"Playback Speed\"\n            description=\"Adjust the speed of voice playback.\"\n          >\n            <div className=\"flex items-center gap-3\">\n              <input\n                type=\"range\"\n                min=\"0.5\"\n                max=\"2\"\n                step=\"0.1\"\n                value={draftVoicePlaybackSpeed}\n                onChange={(e) => {\n                  setDraftVoicePlaybackSpeed(parseFloat(e.target.value));\n                }}\n                onMouseUp={commitVoicePlaybackSpeed}\n                onTouchEnd={commitVoicePlaybackSpeed}\n                onKeyUp={(e) => {\n                  if (e.key === \"ArrowLeft\" || e.key === \"ArrowRight\") {\n                    commitVoicePlaybackSpeed();\n                  }\n                }}\n                className=\"w-24 h-2 rounded-lg appearance-none cursor-pointer bg-background-neutral-02\"\n              />\n              <span className=\"text-sm text-text-02 w-10\">\n                {draftVoicePlaybackSpeed.toFixed(1)}x\n              </span>\n            </div>\n          </InputLayouts.Horizontal>\n        </Card>\n      </Section>\n    </Section>\n  );\n}\n\nfunction AccountsAccessSettings() {\n  const { user, authTypeMetadata } = useUser();\n  const authType = useAuthType();\n  const [showPasswordModal, setShowPasswordModal] = useState(false);\n\n  const passwordValidationSchema = Yup.object().shape({\n    currentPassword: Yup.string().required(\"Current password is required\"),\n    newPassword: Yup.string()\n      .min(\n        authTypeMetadata.passwordMinLength,\n        `Password must be at least ${authTypeMetadata.passwordMinLength} characters`\n      )\n      .required(\"New password is required\"),\n    confirmPassword: Yup.string()\n      .oneOf([Yup.ref(\"newPassword\")], \"Passwords do not match\")\n      .required(\"Please confirm your new password\"),\n  });\n\n  // PAT state\n  const [showCreateModal, setShowCreateModal] = useState(false);\n  const [isCreating, setIsCreating] = useState(false);\n  const [newTokenName, setNewTokenName] = useState(\"\");\n  const [expirationDays, setExpirationDays] = useState<string>(\"30\");\n  const [newlyCreatedToken, setNewlyCreatedToken] =\n    useState<CreatedTokenState | null>(null);\n  const [tokenToDelete, setTokenToDelete] = useState<PAT | null>(null);\n\n  const canCreateTokens = useCloudSubscription();\n\n  const showPasswordSection = Boolean(user?.password_configured);\n  const showTokensSection = authType !== null;\n\n  // Fetch PATs with SWR\n  const {\n    data: pats = [],\n    mutate,\n    error,\n    isLoading,\n  } = useSWR<PAT[]>(\n    showTokensSection ? SWR_KEYS.userPats : null,\n    errorHandlingFetcher,\n    {\n      revalidateOnFocus: true,\n      dedupingInterval: 2000,\n      fallbackData: [],\n    }\n  );\n\n  // Use filter hook for searching tokens\n  const {\n    query,\n    setQuery,\n    filtered: filteredPats,\n  } = useFilter(pats, (pat) => `${pat.name} ${pat.token_display}`);\n\n  // Show error popup if SWR fetch fails\n  useEffect(() => {\n    if (error) {\n      toast.error(\"Failed to load tokens\");\n    }\n  }, [error]);\n\n  const createPAT = useCallback(async () => {\n    if (!newTokenName.trim()) {\n      toast.error(\"Token name is required\");\n      return;\n    }\n\n    setIsCreating(true);\n    try {\n      const response = await fetch(\"/api/user/pats\", {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          name: newTokenName,\n          expiration_days:\n            expirationDays === \"null\" ? null : parseInt(expirationDays),\n        }),\n      });\n\n      if (response.ok) {\n        const data = await response.json();\n        // Store the newly created token - modal will switch to display view\n        setNewlyCreatedToken({\n          id: data.id,\n          token: data.token,\n          name: newTokenName,\n        });\n        toast.success(\"Token created successfully\");\n        // Revalidate the token list\n        await mutate();\n      } else {\n        const errorData = await response.json();\n        toast.error(errorData.detail || \"Failed to create token\");\n      }\n    } catch (error) {\n      toast.error(\"Network error creating token\");\n    } finally {\n      setIsCreating(false);\n    }\n  }, [newTokenName, expirationDays, mutate]);\n\n  const deletePAT = useCallback(\n    async (patId: number) => {\n      try {\n        const response = await fetch(`/api/user/pats/${patId}`, {\n          method: \"DELETE\",\n        });\n\n        if (response.ok) {\n          // Clear the newly created token if it's the one being deleted\n          if (newlyCreatedToken?.id === patId) {\n            setNewlyCreatedToken(null);\n          }\n          await mutate();\n          toast.success(\"Token deleted successfully\");\n          setTokenToDelete(null);\n        } else {\n          toast.error(\"Failed to delete token\");\n        }\n      } catch (error) {\n        toast.error(\"Network error deleting token\");\n      }\n    },\n    [newlyCreatedToken, mutate]\n  );\n\n  const handleChangePassword = useCallback(\n    async (values: {\n      currentPassword: string;\n      newPassword: string;\n      confirmPassword: string;\n    }) => {\n      try {\n        const response = await fetch(\"/api/password/change-password\", {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({\n            old_password: values.currentPassword,\n            new_password: values.newPassword,\n          }),\n        });\n\n        if (response.ok) {\n          toast.success(\"Password updated successfully\");\n          setShowPasswordModal(false);\n        } else {\n          const errorData = await response.json();\n          toast.error(errorData.detail || \"Failed to change password\");\n        }\n      } catch (error) {\n        toast.error(\"An error occurred while changing the password\");\n      }\n    },\n    []\n  );\n\n  return (\n    <>\n      {showCreateModal && (\n        <PATModal\n          isCreating={isCreating}\n          newTokenName={newTokenName}\n          setNewTokenName={setNewTokenName}\n          expirationDays={expirationDays}\n          setExpirationDays={setExpirationDays}\n          onClose={() => {\n            setShowCreateModal(false);\n            setNewTokenName(\"\");\n            setExpirationDays(\"30\");\n            setNewlyCreatedToken(null);\n          }}\n          onCreate={createPAT}\n          createdToken={newlyCreatedToken}\n        />\n      )}\n\n      {tokenToDelete && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title=\"Revoke Access Token\"\n          onClose={() => setTokenToDelete(null)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={() => deletePAT(tokenToDelete.id)}\n            >\n              Revoke\n            </Button>\n          }\n        >\n          <Section gap={0.5} alignItems=\"start\">\n            <Text>\n              Any application using the token{\" \"}\n              <Text className=\"!font-bold\">{tokenToDelete.name}</Text>{\" \"}\n              <Text secondaryMono>({tokenToDelete.token_display})</Text> will\n              lose access to Onyx. This action cannot be undone.\n            </Text>\n            <Text>Are you sure you want to revoke this token?</Text>\n          </Section>\n        </ConfirmationModalLayout>\n      )}\n\n      {showPasswordModal && (\n        <Formik\n          initialValues={{\n            currentPassword: \"\",\n            newPassword: \"\",\n            confirmPassword: \"\",\n          }}\n          validationSchema={passwordValidationSchema}\n          validateOnChange={true}\n          validateOnBlur={true}\n          onSubmit={() => undefined}\n        >\n          {({\n            values,\n            handleChange,\n            handleBlur,\n            isSubmitting,\n            dirty,\n            isValid,\n            errors,\n            touched,\n            setSubmitting,\n          }) => (\n            <Form>\n              <ConfirmationModalLayout\n                icon={SvgLock}\n                title=\"Change Password\"\n                submit={\n                  <Button\n                    disabled={isSubmitting || !dirty || !isValid}\n                    onClick={async () => {\n                      setSubmitting(true);\n                      try {\n                        await handleChangePassword(values);\n                      } finally {\n                        setSubmitting(false);\n                      }\n                    }}\n                  >\n                    {isSubmitting ? \"Updating...\" : \"Update\"}\n                  </Button>\n                }\n                onClose={() => {\n                  setShowPasswordModal(false);\n                }}\n              >\n                <Section gap={1}>\n                  <Section gap={0.25} alignItems=\"start\">\n                    <InputLayouts.Vertical\n                      name=\"currentPassword\"\n                      title=\"Current Password\"\n                    >\n                      <PasswordInputTypeIn\n                        name=\"currentPassword\"\n                        value={values.currentPassword}\n                        onChange={handleChange}\n                        onBlur={handleBlur}\n                        error={\n                          touched.currentPassword && !!errors.currentPassword\n                        }\n                      />\n                    </InputLayouts.Vertical>\n                  </Section>\n                  <Section gap={0.25} alignItems=\"start\">\n                    <InputLayouts.Vertical\n                      name=\"newPassword\"\n                      title=\"New Password\"\n                    >\n                      <PasswordInputTypeIn\n                        name=\"newPassword\"\n                        value={values.newPassword}\n                        onChange={handleChange}\n                        onBlur={handleBlur}\n                        error={touched.newPassword && !!errors.newPassword}\n                      />\n                    </InputLayouts.Vertical>\n                  </Section>\n                  <Section gap={0.25} alignItems=\"start\">\n                    <InputLayouts.Vertical\n                      name=\"confirmPassword\"\n                      title=\"Confirm New Password\"\n                    >\n                      <PasswordInputTypeIn\n                        name=\"confirmPassword\"\n                        value={values.confirmPassword}\n                        onChange={handleChange}\n                        onBlur={handleBlur}\n                        error={\n                          touched.confirmPassword && !!errors.confirmPassword\n                        }\n                      />\n                    </InputLayouts.Vertical>\n                  </Section>\n                </Section>\n              </ConfirmationModalLayout>\n            </Form>\n          )}\n        </Formik>\n      )}\n\n      <Section gap={2}>\n        <Section gap={0.75}>\n          <Content\n            title=\"Accounts\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n            widthVariant=\"full\"\n          />\n          <Card>\n            <InputLayouts.Horizontal\n              title=\"Email\"\n              description=\"Your account email address.\"\n              center\n              nonInteractive\n            >\n              <Text>{user?.email ?? \"anonymous\"}</Text>\n            </InputLayouts.Horizontal>\n\n            {showPasswordSection && (\n              <InputLayouts.Horizontal\n                title=\"Password\"\n                description=\"Update your account password.\"\n                center\n              >\n                <Button\n                  prominence=\"secondary\"\n                  icon={SvgLock}\n                  onClick={() => setShowPasswordModal(true)}\n                  interaction={showPasswordModal ? \"hover\" : \"rest\"}\n                >\n                  Change Password\n                </Button>\n              </InputLayouts.Horizontal>\n            )}\n          </Card>\n        </Section>\n\n        {showTokensSection && (\n          <Section gap={0.75}>\n            <Content\n              title=\"Access Tokens\"\n              sizePreset=\"main-content\"\n              variant=\"section\"\n              widthVariant=\"full\"\n            />\n            {canCreateTokens ? (\n              <Card padding={0.25}>\n                <Section gap={0}>\n                  <Section flexDirection=\"row\" padding={0.25} gap={0.5}>\n                    {pats.length === 0 ? (\n                      <Section padding={0.5} alignItems=\"start\">\n                        <Text text03 secondaryBody>\n                          {isLoading\n                            ? \"Loading tokens...\"\n                            : \"No access tokens created.\"}\n                        </Text>\n                      </Section>\n                    ) : (\n                      <InputTypeIn\n                        placeholder=\"Search...\"\n                        value={query}\n                        onChange={(e) => setQuery(e.target.value)}\n                        leftSearchIcon\n                        variant=\"internal\"\n                      />\n                    )}\n                    <CreateButton\n                      onClick={() => setShowCreateModal(true)}\n                      secondary={false}\n                      internal\n                      transient={showCreateModal}\n                      rightIcon\n                    >\n                      New Access Token\n                    </CreateButton>\n                  </Section>\n\n                  <Section gap={0.25}>\n                    {filteredPats.map((pat) => {\n                      const now = new Date();\n                      const createdDate = new Date(pat.created_at);\n                      const daysSinceCreation = Math.floor(\n                        (now.getTime() - createdDate.getTime()) /\n                          (1000 * 60 * 60 * 24)\n                      );\n\n                      let expiryText = \"Never expires\";\n                      if (pat.expires_at) {\n                        const expiresDate = new Date(pat.expires_at);\n                        const daysUntilExpiry = Math.ceil(\n                          (expiresDate.getTime() - now.getTime()) /\n                            (1000 * 60 * 60 * 24)\n                        );\n                        expiryText = `Expires in ${daysUntilExpiry} day${\n                          daysUntilExpiry === 1 ? \"\" : \"s\"\n                        }`;\n                      }\n\n                      const middleText = `Created ${daysSinceCreation} day${\n                        daysSinceCreation === 1 ? \"\" : \"s\"\n                      } ago - ${expiryText}`;\n\n                      return (\n                        <Interactive.Container\n                          key={pat.id}\n                          heightVariant=\"fit\"\n                          widthVariant=\"full\"\n                        >\n                          <div className=\"w-full bg-background-tint-01\">\n                            <AttachmentItemLayout\n                              icon={SvgKey}\n                              title={pat.name}\n                              description={pat.token_display}\n                              middleText={middleText}\n                              rightChildren={\n                                <Button\n                                  icon={SvgTrash}\n                                  onClick={() => setTokenToDelete(pat)}\n                                  prominence=\"tertiary\"\n                                  size=\"sm\"\n                                  aria-label={`Delete token ${pat.name}`}\n                                />\n                              }\n                            />\n                          </div>\n                        </Interactive.Container>\n                      );\n                    })}\n                  </Section>\n                </Section>\n              </Card>\n            ) : (\n              <Card>\n                <Section flexDirection=\"row\" justifyContent=\"between\">\n                  <Text text03 secondaryBody>\n                    Access tokens require an active paid subscription.\n                  </Text>\n                  <Button prominence=\"secondary\" href=\"/admin/billing\">\n                    Upgrade Plan\n                  </Button>\n                </Section>\n              </Card>\n            )}\n          </Section>\n        )}\n      </Section>\n    </>\n  );\n}\n\ninterface IndexedConnectorCardProps {\n  source: ValidSources;\n  isActive: boolean;\n}\n\nfunction IndexedConnectorCard({ source, isActive }: IndexedConnectorCardProps) {\n  const sourceMetadata = getSourceMetadata(source);\n\n  return (\n    <Card>\n      <Content\n        icon={sourceMetadata.icon}\n        title={sourceMetadata.displayName}\n        description={isActive ? \"Connected\" : \"Paused\"}\n        sizePreset=\"main-content\"\n        variant=\"section\"\n      />\n    </Card>\n  );\n}\n\ninterface FederatedConnectorCardProps {\n  connector: FederatedConnectorOAuthStatus;\n  onDisconnectSuccess: () => void;\n}\n\nfunction FederatedConnectorCard({\n  connector,\n  onDisconnectSuccess,\n}: FederatedConnectorCardProps) {\n  const [isDisconnecting, setIsDisconnecting] = useState(false);\n  const [showDisconnectConfirmation, setShowDisconnectConfirmation] =\n    useState(false);\n  const sourceMetadata = getSourceMetadata(connector.source as ValidSources);\n\n  const handleDisconnect = useCallback(async () => {\n    setIsDisconnecting(true);\n    try {\n      const response = await fetch(\n        `/api/federated/${connector.federated_connector_id}/oauth`,\n        { method: \"DELETE\" }\n      );\n\n      if (response.ok) {\n        toast.success(\"Disconnected successfully\");\n        setShowDisconnectConfirmation(false);\n        onDisconnectSuccess();\n      } else {\n        throw new Error(\"Failed to disconnect\");\n      }\n    } catch (error) {\n      toast.error(\"Failed to disconnect\");\n    } finally {\n      setIsDisconnecting(false);\n    }\n  }, [connector.federated_connector_id, onDisconnectSuccess]);\n\n  return (\n    <>\n      {showDisconnectConfirmation && (\n        <ConfirmationModalLayout\n          icon={SvgUnplug}\n          title={`Disconnect ${sourceMetadata.displayName}`}\n          onClose={() => setShowDisconnectConfirmation(false)}\n          submit={\n            <Button\n              disabled={isDisconnecting}\n              variant=\"danger\"\n              onClick={() => void handleDisconnect()}\n            >\n              {isDisconnecting ? \"Disconnecting...\" : \"Disconnect\"}\n            </Button>\n          }\n        >\n          <Section gap={0.5} alignItems=\"start\">\n            <Text>\n              Onyx will no longer be able to access or search content from your{\" \"}\n              <Text className=\"!font-bold\">{sourceMetadata.displayName}</Text>{\" \"}\n              account.\n            </Text>\n            <Text>\n              You can still continue existing sessions referencing{\" \"}\n              {sourceMetadata.displayName} content.\n            </Text>\n          </Section>\n        </ConfirmationModalLayout>\n      )}\n\n      <Card padding={0.5}>\n        <ContentAction\n          icon={sourceMetadata.icon}\n          title={sourceMetadata.displayName}\n          description={\n            connector.has_oauth_token ? \"Connected\" : \"Not connected\"\n          }\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          paddingVariant=\"sm\"\n          rightChildren={\n            connector.has_oauth_token ? (\n              <Button\n                disabled={isDisconnecting}\n                icon={SvgUnplug}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                onClick={() => setShowDisconnectConfirmation(true)}\n              />\n            ) : connector.authorize_url ? (\n              <Button\n                prominence=\"internal\"\n                href={connector.authorize_url}\n                target=\"_blank\"\n                rightIcon={SvgArrowExchange}\n              >\n                Connect\n              </Button>\n            ) : undefined\n          }\n        />\n      </Card>\n    </>\n  );\n}\n\nfunction ConnectorsSettings() {\n  const {\n    connectors: federatedConnectors,\n    refetch: refetchFederatedConnectors,\n  } = useFederatedOAuthStatus();\n  const { ccPairs } = useCCPairs();\n\n  const ACTIVE_STATUSES: ConnectorCredentialPairStatus[] = [\n    ConnectorCredentialPairStatus.ACTIVE,\n    ConnectorCredentialPairStatus.SCHEDULED,\n    ConnectorCredentialPairStatus.INITIAL_INDEXING,\n  ];\n\n  // Group indexed connectors by source\n  const groupedConnectors = ccPairs.reduce(\n    (acc, ccPair) => {\n      if (!acc[ccPair.source]) {\n        acc[ccPair.source] = {\n          source: ccPair.source,\n          hasActiveConnector: false,\n        };\n      }\n      if (ACTIVE_STATUSES.includes(ccPair.status)) {\n        acc[ccPair.source]!.hasActiveConnector = true;\n      }\n      return acc;\n    },\n    {} as Record<\n      string,\n      {\n        source: ValidSources;\n        hasActiveConnector: boolean;\n      }\n    >\n  );\n\n  const hasConnectors =\n    Object.keys(groupedConnectors).length > 0 || federatedConnectors.length > 0;\n\n  return (\n    <Section gap={2}>\n      <Section gap={0.75} justifyContent=\"start\">\n        <Content\n          title=\"Connectors\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n          widthVariant=\"full\"\n        />\n        {hasConnectors ? (\n          <>\n            {/* Indexed Connectors */}\n            {Object.values(groupedConnectors).map((connector) => (\n              <IndexedConnectorCard\n                key={connector.source}\n                source={connector.source}\n                isActive={connector.hasActiveConnector}\n              />\n            ))}\n\n            {/* Federated Connectors */}\n            {federatedConnectors.map((connector) => (\n              <FederatedConnectorCard\n                key={connector.federated_connector_id}\n                connector={connector}\n                onDisconnectSuccess={() => refetchFederatedConnectors?.()}\n              />\n            ))}\n          </>\n        ) : (\n          <EmptyMessage title=\"No connectors set up for your organization.\" />\n        )}\n      </Section>\n    </Section>\n  );\n}\n\nexport {\n  GeneralSettings,\n  ChatPreferencesSettings,\n  AccountsAccessSettings,\n  ConnectorsSettings,\n};\n"
  },
  {
    "path": "web/src/refresh-pages/admin/AgentsPage/AgentRowActions.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useState } from \"react\";\nimport { Button } from \"@opal/components\";\n// TODO(@raunakab): migrate to Opal LineItemButton once it supports danger variant\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { cn } from \"@opal/utils\";\nimport {\n  SvgMoreHorizontal,\n  SvgEdit,\n  SvgEye,\n  SvgEyeOff,\n  SvgStar,\n  SvgStarOff,\n  SvgShare,\n  SvgBarChart,\n  SvgTrash,\n} from \"@opal/icons\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useRouter } from \"next/navigation\";\nimport {\n  deleteAgent,\n  toggleAgentFeatured,\n  toggleAgentListed,\n} from \"@/refresh-pages/admin/AgentsPage/svc\";\nimport type { AgentRow } from \"@/refresh-pages/admin/AgentsPage/interfaces\";\nimport type { Route } from \"next\";\nimport ShareAgentModal from \"@/sections/modals/ShareAgentModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { useAgent } from \"@/hooks/useAgents\";\nimport {\n  updateAgentSharedStatus,\n  updateAgentFeaturedStatus,\n} from \"@/lib/agents\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useUser } from \"@/providers/UserProvider\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface AgentRowActionsProps {\n  agent: AgentRow;\n  onMutate: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function AgentRowActions({\n  agent,\n  onMutate,\n}: AgentRowActionsProps) {\n  const router = useRouter();\n  const { isAdmin, isCurator } = useUser();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const canUpdateFeaturedStatus = isAdmin || isCurator;\n  const { agent: fullAgent, refresh: refreshAgent } = useAgent(agent.id);\n  const shareModal = useCreateModal();\n\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [deleteOpen, setDeleteOpen] = useState(false);\n  const [featuredOpen, setFeaturedOpen] = useState(false);\n  const [unlistOpen, setUnlistOpen] = useState(false);\n\n  async function handleAction(action: () => Promise<void>, close: () => void) {\n    setIsSubmitting(true);\n    try {\n      await action();\n      onMutate();\n      toast.success(`${agent.name} updated successfully.`);\n      close();\n    } catch (err) {\n      toast.error(err instanceof Error ? err.message : \"An error occurred\");\n    } finally {\n      setIsSubmitting(false);\n    }\n  }\n\n  const handleShare = useCallback(\n    async (\n      userIds: string[],\n      groupIds: number[],\n      isPublic: boolean,\n      isFeatured: boolean,\n      labelIds: number[]\n    ) => {\n      const shareError = await updateAgentSharedStatus(\n        agent.id,\n        userIds,\n        groupIds,\n        isPublic,\n        isPaidEnterpriseFeaturesEnabled,\n        labelIds\n      );\n\n      if (shareError) {\n        toast.error(`Failed to share agent: ${shareError}`);\n        return;\n      }\n\n      if (canUpdateFeaturedStatus) {\n        const featuredError = await updateAgentFeaturedStatus(\n          agent.id,\n          isFeatured\n        );\n        if (featuredError) {\n          toast.error(`Failed to update featured status: ${featuredError}`);\n          refreshAgent();\n          return;\n        }\n      }\n\n      refreshAgent();\n      onMutate();\n      shareModal.toggle(false);\n    },\n    [\n      agent.id,\n      isPaidEnterpriseFeaturesEnabled,\n      canUpdateFeaturedStatus,\n      refreshAgent,\n      onMutate,\n    ]\n  );\n\n  return (\n    <>\n      <shareModal.Provider>\n        <ShareAgentModal\n          agentId={agent.id}\n          userIds={fullAgent?.users?.map((u) => u.id) ?? []}\n          groupIds={fullAgent?.groups ?? []}\n          isPublic={fullAgent?.is_public ?? false}\n          isFeatured={fullAgent?.is_featured ?? false}\n          labelIds={fullAgent?.labels?.map((l) => l.id) ?? []}\n          onShare={handleShare}\n        />\n      </shareModal.Provider>\n\n      <div className=\"flex items-center gap-0.5\">\n        {/* TODO(@raunakab): abstract a more standardized way of doing this\n            opacity-on-hover animation. Making Hoverable more extensible\n            (e.g. supporting table row groups) would let us use it here\n            instead of raw Tailwind group-hover. */}\n        {!agent.builtin_persona && (\n          <div className=\"opacity-0 group-hover/row:opacity-100 transition-opacity\">\n            <Button\n              prominence=\"tertiary\"\n              icon={SvgEdit}\n              tooltip=\"Edit Agent\"\n              onClick={() =>\n                router.push(\n                  `/app/agents/edit/${\n                    agent.id\n                  }?u=${Date.now()}&admin=true` as Route\n                )\n              }\n            />\n          </div>\n        )}\n        {!agent.is_listed ? (\n          <Button\n            prominence=\"tertiary\"\n            icon={SvgEyeOff}\n            tooltip=\"Re-list Agent\"\n            onClick={() =>\n              handleAction(\n                () => toggleAgentListed(agent.id, agent.is_listed),\n                () => {}\n              )\n            }\n          />\n        ) : (\n          <div\n            className={cn(\n              !agent.is_featured &&\n                \"opacity-0 group-hover/row:opacity-100 transition-opacity\"\n            )}\n          >\n            <Button\n              prominence=\"tertiary\"\n              icon={SvgStar}\n              interaction={featuredOpen ? \"hover\" : \"rest\"}\n              tooltip={\n                agent.is_featured ? \"Remove Featured\" : \"Set as Featured\"\n              }\n              onClick={() => {\n                setPopoverOpen(false);\n                setFeaturedOpen(true);\n              }}\n            />\n          </div>\n        )}\n\n        {/* Overflow menu */}\n        <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>\n          <div\n            className={cn(\n              !popoverOpen &&\n                \"opacity-0 group-hover/row:opacity-100 transition-opacity\"\n            )}\n          >\n            <Popover.Trigger asChild>\n              <Button prominence=\"tertiary\" icon={SvgMoreHorizontal} />\n            </Popover.Trigger>\n          </div>\n          <Popover.Content align=\"end\" width=\"sm\">\n            <PopoverMenu>\n              {[\n                <LineItem\n                  key=\"visibility\"\n                  icon={agent.is_listed ? SvgEyeOff : SvgEye}\n                  onClick={() => {\n                    setPopoverOpen(false);\n                    if (agent.is_listed) {\n                      setUnlistOpen(true);\n                    } else {\n                      handleAction(\n                        () => toggleAgentListed(agent.id, agent.is_listed),\n                        () => {}\n                      );\n                    }\n                  }}\n                >\n                  {agent.is_listed ? \"Unlist Agent\" : \"List Agent\"}\n                </LineItem>,\n                <LineItem\n                  key=\"share\"\n                  icon={SvgShare}\n                  onClick={() => {\n                    setPopoverOpen(false);\n                    shareModal.toggle(true);\n                  }}\n                >\n                  Share\n                </LineItem>,\n                isPaidEnterpriseFeaturesEnabled ? (\n                  <LineItem\n                    key=\"stats\"\n                    icon={SvgBarChart}\n                    onClick={() => {\n                      setPopoverOpen(false);\n                      router.push(`/ee/agents/stats/${agent.id}` as Route);\n                    }}\n                  >\n                    Stats\n                  </LineItem>\n                ) : undefined,\n                !agent.builtin_persona ? null : undefined,\n                !agent.builtin_persona ? (\n                  <LineItem\n                    key=\"delete\"\n                    icon={SvgTrash}\n                    danger\n                    onClick={() => {\n                      setPopoverOpen(false);\n                      setDeleteOpen(true);\n                    }}\n                  >\n                    Delete\n                  </LineItem>\n                ) : undefined,\n              ]}\n            </PopoverMenu>\n          </Popover.Content>\n        </Popover>\n      </div>\n\n      {deleteOpen && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title=\"Delete Agent\"\n          onClose={isSubmitting ? undefined : () => setDeleteOpen(false)}\n          submit={\n            <Button\n              disabled={isSubmitting}\n              variant=\"danger\"\n              onClick={() => {\n                handleAction(\n                  () => deleteAgent(agent.id),\n                  () => setDeleteOpen(false)\n                );\n              }}\n            >\n              Delete\n            </Button>\n          }\n        >\n          <Text as=\"p\" text03>\n            Are you sure you want to delete{\" \"}\n            <Text as=\"span\" text05>\n              {agent.name}\n            </Text>\n            ? This action cannot be undone.\n          </Text>\n        </ConfirmationModalLayout>\n      )}\n\n      {featuredOpen && (\n        <ConfirmationModalLayout\n          icon={agent.is_featured ? SvgStarOff : SvgStar}\n          title={\n            agent.is_featured\n              ? `Remove ${agent.name} from Featured`\n              : `Feature ${agent.name}`\n          }\n          onClose={isSubmitting ? undefined : () => setFeaturedOpen(false)}\n          submit={\n            <Button\n              disabled={isSubmitting}\n              onClick={() => {\n                handleAction(\n                  () => toggleAgentFeatured(agent.id, agent.is_featured),\n                  () => setFeaturedOpen(false)\n                );\n              }}\n            >\n              {agent.is_featured ? \"Unfeature\" : \"Feature\"}\n            </Button>\n          }\n        >\n          <div className=\"flex flex-col gap-2\">\n            <Text as=\"p\" text03>\n              {agent.is_featured\n                ? `This will remove ${agent.name} from the featured section on top of the explore agents list. New users will no longer see it pinned to their sidebar, but existing pins are unaffected.`\n                : \"Featured agents appear at the top of the explore agents list and are automatically pinned to the sidebar for new users with access. Use this to highlight recommended agents across your organization.\"}\n            </Text>\n            <Text as=\"p\" text03>\n              This does not change who can access this agent.\n            </Text>\n          </div>\n        </ConfirmationModalLayout>\n      )}\n\n      {unlistOpen && (\n        <ConfirmationModalLayout\n          icon={SvgEyeOff}\n          title={`Unlist ${agent.name}`}\n          onClose={isSubmitting ? undefined : () => setUnlistOpen(false)}\n          submit={\n            <Button\n              disabled={isSubmitting}\n              onClick={() => {\n                handleAction(\n                  () => toggleAgentListed(agent.id, agent.is_listed),\n                  () => setUnlistOpen(false)\n                );\n              }}\n            >\n              Unlist\n            </Button>\n          }\n        >\n          <div className=\"flex flex-col gap-2\">\n            <Text as=\"p\" text03>\n              Unlisted agents don&apos;t appear in the explore agents list but\n              remain accessible via direct link, and to users who have\n              previously used or pinned them.\n            </Text>\n            <Text as=\"p\" text03>\n              This does not change who can access this agent.\n            </Text>\n          </div>\n        </ConfirmationModalLayout>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/AgentsPage/AgentsTable.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\nimport { Table, createTableColumns } from \"@opal/components\";\nimport { Content, IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport type { MinimalUserSnapshot } from \"@/lib/types\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport type { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { useAdminPersonas } from \"@/hooks/useAdminPersonas\";\nimport { toast } from \"@/hooks/useToast\";\nimport AgentRowActions from \"@/refresh-pages/admin/AgentsPage/AgentRowActions\";\nimport { updateAgentDisplayPriorities } from \"@/refresh-pages/admin/AgentsPage/svc\";\nimport type { AgentRow } from \"@/refresh-pages/admin/AgentsPage/interfaces\";\nimport type { Persona } from \"@/app/admin/agents/interfaces\";\nimport { SvgUser } from \"@opal/icons\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nfunction toAgentRow(persona: Persona): AgentRow {\n  return {\n    id: persona.id,\n    name: persona.name,\n    description: persona.description,\n    is_public: persona.is_public,\n    is_listed: persona.is_listed,\n    is_featured: persona.is_featured,\n    builtin_persona: persona.builtin_persona,\n    display_priority: persona.display_priority,\n    owner: persona.owner,\n    groups: persona.groups,\n    users: persona.users,\n    uploaded_image_id: persona.uploaded_image_id,\n    icon_name: persona.icon_name,\n  };\n}\n\n// ---------------------------------------------------------------------------\n// Column renderers\n// ---------------------------------------------------------------------------\n\nfunction renderCreatedByColumn(\n  _value: MinimalUserSnapshot | null,\n  row: AgentRow\n) {\n  return (\n    <Content\n      sizePreset=\"main-ui\"\n      variant=\"section\"\n      icon={SvgUser}\n      title={row.builtin_persona ? \"System\" : row.owner?.email ?? \"\\u2014\"}\n    />\n  );\n}\n\nfunction getAccessTitle(row: AgentRow): string {\n  if (row.is_public) return \"Public\";\n  if (row.groups.length > 0 || row.users.length > 0) return \"Shared\";\n  return \"Private\";\n}\n\nfunction renderAccessColumn(_isPublic: boolean, row: AgentRow) {\n  return (\n    <Content\n      sizePreset=\"main-ui\"\n      variant=\"section\"\n      title={getAccessTitle(row)}\n      description={\n        !row.is_listed ? \"Unlisted\" : row.is_featured ? \"Featured\" : undefined\n      }\n    />\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Columns\n// ---------------------------------------------------------------------------\n\nconst tc = createTableColumns<AgentRow>();\n\nfunction buildColumns(onMutate: () => void) {\n  return [\n    tc.qualifier({\n      content: \"icon\",\n      background: true,\n      getContent: (row) => (props) => (\n        <AgentAvatar\n          agent={row as unknown as MinimalPersonaSnapshot}\n          size={props.size}\n        />\n      ),\n    }),\n    tc.column(\"name\", {\n      header: \"Name\",\n      weight: 25,\n      cell: (value) => (\n        <Text as=\"span\" mainUiBody text05>\n          {value}\n        </Text>\n      ),\n    }),\n    tc.column(\"description\", {\n      header: \"Description\",\n      weight: 35,\n      cell: (value) => (\n        <Text as=\"span\" mainUiBody text03>\n          {value || \"\\u2014\"}\n        </Text>\n      ),\n    }),\n    tc.column(\"owner\", {\n      header: \"Created By\",\n      weight: 20,\n      cell: renderCreatedByColumn,\n    }),\n    tc.column(\"is_public\", {\n      header: \"Access\",\n      weight: 12,\n      cell: renderAccessColumn,\n    }),\n    tc.actions({\n      cell: (row) => <AgentRowActions agent={row} onMutate={onMutate} />,\n    }),\n  ];\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nconst PAGE_SIZE = 10;\n\nexport default function AgentsTable() {\n  const [searchTerm, setSearchTerm] = useState(\"\");\n\n  const { personas, isLoading, error, refresh } = useAdminPersonas();\n\n  const columns = useMemo(() => buildColumns(refresh), [refresh]);\n\n  const agentRows: AgentRow[] = useMemo(\n    () => personas.filter((p) => !p.builtin_persona).map(toAgentRow),\n    [personas]\n  );\n\n  const handleReorder = async (\n    _orderedIds: string[],\n    changedOrders: Record<string, number>\n  ) => {\n    try {\n      await updateAgentDisplayPriorities(changedOrders);\n      refresh();\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to update agent order\"\n      );\n      refresh();\n    }\n  };\n\n  if (isLoading) {\n    return (\n      <div className=\"flex justify-center py-12\">\n        <SimpleLoader className=\"h-6 w-6\" />\n      </div>\n    );\n  }\n\n  if (error) {\n    console.error(\"Failed to load agents:\", error);\n    return (\n      <Text as=\"p\" secondaryBody text03>\n        Failed to load agents. Please try refreshing the page.\n      </Text>\n    );\n  }\n\n  return (\n    <div className=\"flex flex-col gap-3\">\n      <InputTypeIn\n        value={searchTerm}\n        onChange={(e) => setSearchTerm(e.target.value)}\n        placeholder=\"Search agents...\"\n        leftSearchIcon\n      />\n      <Table\n        data={agentRows}\n        columns={columns}\n        getRowId={(row) => String(row.id)}\n        pageSize={PAGE_SIZE}\n        searchTerm={searchTerm}\n        draggable={{\n          onReorder: handleReorder,\n        }}\n        emptyState={\n          <IllustrationContent\n            illustration={SvgNoResult}\n            title=\"No agents found\"\n            description=\"No agents match the current search.\"\n          />\n        }\n        footer={{}}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/AgentsPage/interfaces.ts",
    "content": "import type { MinimalUserSnapshot } from \"@/lib/types\";\n\nexport interface AgentRow {\n  id: number;\n  name: string;\n  description: string;\n  is_public: boolean;\n  is_listed: boolean;\n  is_featured: boolean;\n  builtin_persona: boolean;\n  display_priority: number | null;\n  owner: MinimalUserSnapshot | null;\n  groups: number[];\n  users: MinimalUserSnapshot[];\n  uploaded_image_id?: string;\n  icon_name?: string;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/AgentsPage/svc.ts",
    "content": "async function parseErrorDetail(\n  res: Response,\n  fallback: string\n): Promise<string> {\n  try {\n    const body = await res.json();\n    return body?.detail ?? fallback;\n  } catch (err) {\n    console.error(\"Failed to parse error response:\", err);\n    return fallback;\n  }\n}\n\nexport async function deleteAgent(agentId: number): Promise<void> {\n  const res = await fetch(`/api/persona/${agentId}`, {\n    method: \"DELETE\",\n    credentials: \"include\",\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to delete agent\"));\n  }\n}\n\nexport async function toggleAgentFeatured(\n  agentId: number,\n  currentlyFeatured: boolean\n): Promise<void> {\n  const res = await fetch(`/api/admin/persona/${agentId}/featured`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ is_featured: !currentlyFeatured }),\n    credentials: \"include\",\n  });\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to toggle featured status\")\n    );\n  }\n}\n\nexport async function toggleAgentListed(\n  agentId: number,\n  currentlyListed: boolean\n): Promise<void> {\n  const res = await fetch(`/api/admin/persona/${agentId}/listed`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ is_listed: !currentlyListed }),\n    credentials: \"include\",\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to toggle visibility\"));\n  }\n}\n\nexport async function updateAgentDisplayPriorities(\n  displayPriorityMap: Record<string, number>\n): Promise<void> {\n  const res = await fetch(\"/api/admin/agents/display-priorities\", {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ display_priority_map: displayPriorityMap }),\n  });\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to update agent order\")\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/AgentsPage.tsx",
    "content": "\"use client\";\n\nimport { SvgOnyxOctagon, SvgPlus } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport Link from \"next/link\";\n\nimport AgentsTable from \"./AgentsPage/AgentsTable\";\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function AgentsPage() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        title=\"Agents\"\n        description=\"Customize AI behavior and knowledge with agents. Manage agents in your organization.\"\n        icon={SvgOnyxOctagon}\n        rightChildren={\n          <Button href=\"/app/agents/create?admin=true\" icon={SvgPlus}>\n            New Agent\n          </Button>\n        }\n      />\n      <SettingsLayouts.Body>\n        <AgentsTable />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ChatPreferencesPage.tsx",
    "content": "\"use client\";\n\nimport { markdown } from \"@opal/utils\";\nimport React, { useCallback, useRef, useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport { Formik, Form, useFormikContext } from \"formik\";\nimport useSWR, { mutate } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport Separator from \"@/refresh-components/Separator\";\nimport SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport SwitchField from \"@/refresh-components/form/SwitchField\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\nimport InputSelectField from \"@/refresh-components/form/InputSelectField\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport {\n  SvgAddLines,\n  SvgActions,\n  SvgExpand,\n  SvgFold,\n  SvgExternalLink,\n  SvgAlertCircle,\n  SvgRefreshCw,\n} from \"@opal/icons\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Content } from \"@opal/layouts\";\nimport {\n  useSettingsContext,\n  useVectorDbEnabled,\n} from \"@/providers/SettingsProvider\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport { Settings } from \"@/interfaces/settings\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useAvailableTools } from \"@/hooks/useAvailableTools\";\nimport {\n  SEARCH_TOOL_ID,\n  IMAGE_GENERATION_TOOL_ID,\n  WEB_SEARCH_TOOL_ID,\n  PYTHON_TOOL_ID,\n  OPEN_URL_TOOL_ID,\n} from \"@/app/app/components/tools/constants\";\nimport { Button, Text, Card as OpalCard } from \"@opal/components\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport useMcpServersForAgentEditor from \"@/hooks/useMcpServersForAgentEditor\";\nimport useOpenApiTools from \"@/hooks/useOpenApiTools\";\nimport * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\nimport * as ActionsLayouts from \"@/layouts/actions-layouts\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport { Disabled, Hoverable } from \"@opal/core\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport useFilter from \"@/hooks/useFilter\";\nimport { MCPServer } from \"@/lib/tools/interfaces\";\nimport type { IconProps } from \"@opal/types\";\n\nconst route = ADMIN_ROUTES.CHAT_PREFERENCES;\n\ninterface DefaultAgentConfiguration {\n  tool_ids: number[];\n  system_prompt: string | null;\n  default_system_prompt: string;\n}\n\ninterface ChatPreferencesFormValues {\n  // Features\n  search_ui_enabled: boolean;\n  deep_research_enabled: boolean;\n  auto_scroll: boolean;\n\n  // Team context\n  company_name: string;\n  company_description: string;\n\n  // Advanced\n  maximum_chat_retention_days: string;\n  anonymous_user_enabled: boolean;\n  disable_default_assistant: boolean;\n\n  // File limits\n  user_file_max_upload_size_mb: string;\n  file_token_count_threshold_k: string;\n}\n\ninterface MCPServerCardTool {\n  id: number;\n  icon: React.FunctionComponent<IconProps>;\n  name: string;\n  description: string;\n}\n\ninterface MCPServerCardProps {\n  server: MCPServer;\n  tools: MCPServerCardTool[];\n  isToolEnabled: (toolDbId: number) => boolean;\n  onToggleTool: (toolDbId: number, enabled: boolean) => void;\n  onToggleTools: (toolDbIds: number[], enabled: boolean) => void;\n}\n\nfunction MCPServerCard({\n  server,\n  tools,\n  isToolEnabled,\n  onToggleTool,\n  onToggleTools,\n}: MCPServerCardProps) {\n  const [isFolded, setIsFolded] = useState(true);\n  const {\n    query,\n    setQuery,\n    filtered: filteredTools,\n  } = useFilter(tools, (tool) => `${tool.name} ${tool.description}`);\n\n  const allToolIds = tools.map((t) => t.id);\n  const serverEnabled =\n    tools.length > 0 && tools.some((t) => isToolEnabled(t.id));\n  const needsAuth = !server.is_authenticated;\n  const authTooltip = needsAuth\n    ? \"Authenticate this MCP server before enabling its tools.\"\n    : undefined;\n\n  return (\n    <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>\n      <ActionsLayouts.Header\n        title={server.name}\n        description={server.description}\n        icon={getActionIcon(server.server_url, server.name)}\n        rightChildren={\n          <SimpleTooltip tooltip={authTooltip} side=\"top\">\n            <Switch\n              checked={serverEnabled}\n              onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}\n              disabled={needsAuth}\n            />\n          </SimpleTooltip>\n        }\n      >\n        {tools.length > 0 && (\n          <Section flexDirection=\"row\" gap={0.5}>\n            <InputTypeIn\n              placeholder=\"Search tools...\"\n              variant=\"internal\"\n              leftSearchIcon\n              value={query}\n              onChange={(e) => setQuery(e.target.value)}\n            />\n            <Button\n              rightIcon={isFolded ? SvgExpand : SvgFold}\n              onClick={() => setIsFolded((prev) => !prev)}\n              prominence=\"internal\"\n              size=\"lg\"\n            >\n              {isFolded ? \"Expand\" : \"Fold\"}\n            </Button>\n          </Section>\n        )}\n      </ActionsLayouts.Header>\n      {tools.length > 0 && filteredTools.length > 0 && (\n        <ActionsLayouts.Content>\n          <div className=\"flex flex-col gap-2\">\n            {filteredTools.map((tool) => (\n              <ActionsLayouts.Tool\n                key={tool.id}\n                title={tool.name}\n                description={tool.description}\n                icon={tool.icon}\n                rightChildren={\n                  <SimpleTooltip tooltip={authTooltip} side=\"top\">\n                    <Switch\n                      checked={isToolEnabled(tool.id)}\n                      onCheckedChange={(checked) =>\n                        onToggleTool(tool.id, checked)\n                      }\n                      disabled={needsAuth}\n                    />\n                  </SimpleTooltip>\n                }\n              />\n            ))}\n          </div>\n        </ActionsLayouts.Content>\n      )}\n    </ExpandableCard.Root>\n  );\n}\n\ntype FileLimitFieldName =\n  | \"user_file_max_upload_size_mb\"\n  | \"file_token_count_threshold_k\";\n\ninterface NumericLimitFieldProps {\n  name: FileLimitFieldName;\n  defaultValue: string;\n  saveSettings: (updates: Partial<Settings>) => Promise<void>;\n  maxValue?: number;\n  allowZero?: boolean;\n}\n\nfunction NumericLimitField({\n  name,\n  defaultValue,\n  saveSettings,\n  maxValue,\n  allowZero = false,\n}: NumericLimitFieldProps) {\n  const { values, setFieldValue } =\n    useFormikContext<ChatPreferencesFormValues>();\n  const initialValue = useRef(values[name]);\n  const restoringRef = useRef(false);\n  const value = values[name];\n\n  const parsed = parseInt(value, 10);\n  const isOverMax =\n    maxValue !== undefined && !isNaN(parsed) && parsed > maxValue;\n\n  const handleRestore = () => {\n    restoringRef.current = true;\n    initialValue.current = defaultValue;\n    void setFieldValue(name, defaultValue);\n    void saveSettings({ [name]: parseInt(defaultValue, 10) });\n  };\n\n  const handleBlur = () => {\n    // The restore button triggers a blur — skip since handleRestore already saved.\n    if (restoringRef.current) {\n      restoringRef.current = false;\n      return;\n    }\n\n    const parsed = parseInt(value, 10);\n    const isValid = !isNaN(parsed) && (allowZero ? parsed >= 0 : parsed > 0);\n\n    // Revert invalid input (empty, NaN, negative).\n    if (!isValid) {\n      if (allowZero) {\n        // Empty/invalid means \"no limit\" — persist 0 and clear the field.\n        void setFieldValue(name, \"\");\n        void saveSettings({ [name]: 0 });\n        initialValue.current = \"\";\n      } else {\n        void setFieldValue(name, initialValue.current);\n      }\n      return;\n    }\n\n    // Block save when the value exceeds the hard ceiling.\n    if (maxValue !== undefined && parsed > maxValue) {\n      return;\n    }\n\n    // For allowZero fields, 0 means \"no limit\" — clear the display\n    // so the \"No limit\" placeholder is visible, but still persist 0.\n    if (allowZero && parsed === 0) {\n      void setFieldValue(name, \"\");\n      if (initialValue.current !== \"\") {\n        void saveSettings({ [name]: 0 });\n        initialValue.current = \"\";\n      }\n      return;\n    }\n\n    const normalizedDisplay = String(parsed);\n\n    // Update the display to the canonical form (e.g. strip leading zeros).\n    if (value !== normalizedDisplay) {\n      void setFieldValue(name, normalizedDisplay);\n    }\n\n    // Persist only when the value actually changed.\n    if (normalizedDisplay !== initialValue.current) {\n      void saveSettings({ [name]: parsed });\n      initialValue.current = normalizedDisplay;\n    }\n  };\n\n  return (\n    <Hoverable.Root group=\"numericLimit\" widthVariant=\"full\">\n      <InputTypeInField\n        name={name}\n        inputMode=\"numeric\"\n        showClearButton={false}\n        pattern=\"[0-9]*\"\n        placeholder={allowZero ? \"No limit\" : `Default: ${defaultValue}`}\n        variant={isOverMax ? \"error\" : undefined}\n        rightSection={\n          (value || \"\") !== defaultValue ? (\n            <Hoverable.Item group=\"numericLimit\" variant=\"opacity-on-hover\">\n              <IconButton\n                icon={SvgRefreshCw}\n                tooltip=\"Restore default\"\n                internal\n                type=\"button\"\n                onClick={handleRestore}\n              />\n            </Hoverable.Item>\n          ) : undefined\n        }\n        onBlur={handleBlur}\n      />\n    </Hoverable.Root>\n  );\n}\n\ninterface FileSizeLimitFieldsProps {\n  saveSettings: (updates: Partial<Settings>) => Promise<void>;\n  defaultUploadSizeMb: string;\n  defaultTokenThresholdK: string;\n  maxAllowedUploadSizeMb?: number;\n}\n\nfunction FileSizeLimitFields({\n  saveSettings,\n  defaultUploadSizeMb,\n  defaultTokenThresholdK,\n  maxAllowedUploadSizeMb,\n}: FileSizeLimitFieldsProps) {\n  return (\n    <div className=\"flex gap-4 w-full items-start\">\n      <div className=\"flex-1\">\n        <InputLayouts.Vertical\n          title=\"File Size Limit (MB)\"\n          subDescription={\n            maxAllowedUploadSizeMb\n              ? `Max: ${maxAllowedUploadSizeMb} MB`\n              : undefined\n          }\n          nonInteractive\n        >\n          <NumericLimitField\n            name=\"user_file_max_upload_size_mb\"\n            defaultValue={defaultUploadSizeMb}\n            saveSettings={saveSettings}\n            maxValue={maxAllowedUploadSizeMb}\n          />\n        </InputLayouts.Vertical>\n      </div>\n      <div className=\"flex-1\">\n        <InputLayouts.Vertical\n          title=\"File Token Limit (thousand tokens)\"\n          nonInteractive\n        >\n          <NumericLimitField\n            name=\"file_token_count_threshold_k\"\n            defaultValue={defaultTokenThresholdK}\n            saveSettings={saveSettings}\n            allowZero\n          />\n        </InputLayouts.Vertical>\n      </div>\n    </div>\n  );\n}\n\n/**\n * Inner form component that uses useFormikContext to access values\n * and create save handlers for settings fields.\n */\nfunction ChatPreferencesForm() {\n  const router = useRouter();\n  const settings = useSettingsContext();\n  const { values } = useFormikContext<ChatPreferencesFormValues>();\n\n  // Track initial text values to avoid unnecessary saves on blur\n  const initialCompanyName = useRef(values.company_name);\n  const initialCompanyDescription = useRef(values.company_description);\n\n  // Tools availability\n  const { tools: availableTools } = useAvailableTools();\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  const searchTool = availableTools.find(\n    (t) => t.in_code_tool_id === SEARCH_TOOL_ID\n  );\n  const imageGenTool = availableTools.find(\n    (t) => t.in_code_tool_id === IMAGE_GENERATION_TOOL_ID\n  );\n  const webSearchTool = availableTools.find(\n    (t) => t.in_code_tool_id === WEB_SEARCH_TOOL_ID\n  );\n  const openURLTool = availableTools.find(\n    (t) => t.in_code_tool_id === OPEN_URL_TOOL_ID\n  );\n  const codeInterpreterTool = availableTools.find(\n    (t) => t.in_code_tool_id === PYTHON_TOOL_ID\n  );\n\n  // Connectors\n  const { ccPairs } = useCCPairs();\n  const uniqueSources = Array.from(new Set(ccPairs.map((p) => p.source)));\n\n  // MCP servers and OpenAPI tools\n  const { mcpData } = useMcpServersForAgentEditor();\n  const { openApiTools: openApiToolsRaw } = useOpenApiTools();\n  const mcpServers = mcpData?.mcp_servers ?? [];\n  const openApiTools = openApiToolsRaw ?? [];\n\n  const mcpServersWithTools = mcpServers.map((server) => ({\n    server,\n    tools: availableTools\n      .filter((tool) => tool.mcp_server_id === server.id)\n      .map((tool) => ({\n        id: tool.id,\n        icon: getActionIcon(server.server_url, server.name),\n        name: tool.display_name || tool.name,\n        description: tool.description,\n      })),\n  }));\n\n  // Default agent configuration (system prompt)\n  const { data: defaultAgentConfig, mutate: mutateDefaultAgent } =\n    useSWR<DefaultAgentConfiguration>(\n      SWR_KEYS.defaultAssistantConfig,\n      errorHandlingFetcher\n    );\n\n  const enabledToolIds = defaultAgentConfig?.tool_ids ?? [];\n\n  const isToolEnabled = useCallback(\n    (toolDbId: number) => enabledToolIds.includes(toolDbId),\n    [enabledToolIds]\n  );\n\n  const saveToolIds = useCallback(\n    async (newToolIds: number[]) => {\n      // Optimistic update so subsequent toggles read fresh state\n      const optimisticData = defaultAgentConfig\n        ? { ...defaultAgentConfig, tool_ids: newToolIds }\n        : undefined;\n      try {\n        await mutateDefaultAgent(\n          async () => {\n            const response = await fetch(\"/api/admin/default-assistant\", {\n              method: \"PATCH\",\n              headers: { \"Content-Type\": \"application/json\" },\n              body: JSON.stringify({ tool_ids: newToolIds }),\n            });\n            if (!response.ok) {\n              const errorMsg = (await response.json()).detail;\n              throw new Error(errorMsg);\n            }\n            return optimisticData;\n          },\n          { optimisticData, revalidate: true }\n        );\n        toast.success(\"Tools updated\");\n      } catch {\n        toast.error(\"Failed to update tools\");\n      }\n    },\n    [defaultAgentConfig, mutateDefaultAgent]\n  );\n\n  const toggleTool = useCallback(\n    (toolDbId: number, enabled: boolean) => {\n      const newToolIds = enabled\n        ? [...enabledToolIds, toolDbId]\n        : enabledToolIds.filter((id) => id !== toolDbId);\n      void saveToolIds(newToolIds);\n    },\n    [enabledToolIds, saveToolIds]\n  );\n\n  const toggleTools = useCallback(\n    (toolDbIds: number[], enabled: boolean) => {\n      const idsSet = new Set(toolDbIds);\n      const withoutIds = enabledToolIds.filter((id) => !idsSet.has(id));\n      const newToolIds = enabled ? [...withoutIds, ...toolDbIds] : withoutIds;\n      void saveToolIds(newToolIds);\n    },\n    [enabledToolIds, saveToolIds]\n  );\n\n  // System prompt modal state\n  const [systemPromptModalOpen, setSystemPromptModalOpen] = useState(false);\n\n  const saveSettings = useCallback(\n    async (updates: Partial<Settings>) => {\n      const currentSettings = settings?.settings;\n      if (!currentSettings) return;\n\n      const newSettings = { ...currentSettings, ...updates };\n\n      try {\n        const response = await fetch(\"/api/admin/settings\", {\n          method: \"PUT\",\n          headers: { \"Content-Type\": \"application/json\" },\n          body: JSON.stringify(newSettings),\n        });\n\n        if (!response.ok) {\n          const errorMsg = (await response.json()).detail;\n          throw new Error(errorMsg);\n        }\n\n        router.refresh();\n        await mutate(SWR_KEYS.settings);\n        toast.success(\"Settings updated\");\n      } catch (error) {\n        toast.error(\"Failed to update settings\");\n      }\n    },\n    [settings, router]\n  );\n\n  return (\n    <>\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description=\"Organization-wide chat settings and defaults. Users can override some of these in their personal settings.\"\n          separator\n        />\n\n        <SettingsLayouts.Body>\n          {/* Team Context */}\n          <Section gap={1}>\n            <InputLayouts.Vertical\n              title=\"Team Name\"\n              subDescription=\"This is added to all chat sessions as additional context to provide a richer/customized experience.\"\n            >\n              <InputTypeInField\n                name=\"company_name\"\n                placeholder=\"Enter team name\"\n                onBlur={() => {\n                  if (values.company_name !== initialCompanyName.current) {\n                    void saveSettings({\n                      company_name: values.company_name || null,\n                    });\n                    initialCompanyName.current = values.company_name;\n                  }\n                }}\n              />\n            </InputLayouts.Vertical>\n\n            <InputLayouts.Vertical\n              title=\"Team Context\"\n              subDescription=\"Users can also provide additional individual context in their personal settings.\"\n            >\n              <InputTextAreaField\n                name=\"company_description\"\n                placeholder=\"Describe your team and how Onyx should behave.\"\n                rows={4}\n                maxRows={10}\n                autoResize\n                onBlur={() => {\n                  if (\n                    values.company_description !==\n                    initialCompanyDescription.current\n                  ) {\n                    void saveSettings({\n                      company_description: values.company_description || null,\n                    });\n                    initialCompanyDescription.current =\n                      values.company_description;\n                  }\n                }}\n              />\n            </InputLayouts.Vertical>\n          </Section>\n\n          <InputLayouts.Horizontal\n            title=\"System Prompt\"\n            description=\"Base prompt for all chats, agents, and projects. Modify with caution: Significant changes may degrade response quality.\"\n          >\n            <Button\n              prominence=\"tertiary\"\n              icon={SvgAddLines}\n              onClick={() => setSystemPromptModalOpen(true)}\n            >\n              Modify Prompt\n            </Button>\n          </InputLayouts.Horizontal>\n\n          <Separator noPadding />\n\n          {/* Features */}\n          <Section gap={0.75}>\n            <Content\n              title=\"Features\"\n              sizePreset=\"main-content\"\n              variant=\"section\"\n            />\n            <Card>\n              <SimpleTooltip\n                tooltip={\n                  uniqueSources.length === 0\n                    ? \"Set up connectors to use Search Mode\"\n                    : undefined\n                }\n                side=\"top\"\n              >\n                <Disabled disabled={uniqueSources.length === 0} allowClick>\n                  <div className=\"w-full\">\n                    <InputLayouts.Horizontal\n                      title=\"Search Mode\"\n                      description=\"UI mode for quick document search across your organization.\"\n                      disabled={uniqueSources.length === 0}\n                    >\n                      <SwitchField\n                        name=\"search_ui_enabled\"\n                        onCheckedChange={(checked) => {\n                          void saveSettings({ search_ui_enabled: checked });\n                        }}\n                        disabled={uniqueSources.length === 0}\n                      />\n                    </InputLayouts.Horizontal>\n                  </div>\n                </Disabled>\n              </SimpleTooltip>\n              <InputLayouts.Horizontal\n                title=\"Deep Research\"\n                description=\"Agentic research system that works across the web and connected sources. Uses significantly more tokens per query.\"\n              >\n                <SwitchField\n                  name=\"deep_research_enabled\"\n                  onCheckedChange={(checked) => {\n                    void saveSettings({ deep_research_enabled: checked });\n                  }}\n                />\n              </InputLayouts.Horizontal>\n              <InputLayouts.Horizontal\n                title=\"Chat Auto-Scroll\"\n                description=\"Automatically scroll to new content as chat generates response. Users can override this in their personal settings.\"\n              >\n                <SwitchField\n                  name=\"auto_scroll\"\n                  onCheckedChange={(checked) => {\n                    void saveSettings({ auto_scroll: checked });\n                  }}\n                />\n              </InputLayouts.Horizontal>\n            </Card>\n          </Section>\n\n          <Separator noPadding />\n\n          <Disabled disabled={values.disable_default_assistant}>\n            <div>\n              <Section gap={1.5}>\n                {/* Connectors */}\n                <Section gap={0.75}>\n                  <Content\n                    title=\"Connectors\"\n                    sizePreset=\"main-content\"\n                    variant=\"section\"\n                  />\n\n                  <Section\n                    flexDirection=\"row\"\n                    justifyContent=\"between\"\n                    alignItems=\"center\"\n                    gap={0.25}\n                  >\n                    {uniqueSources.length === 0 ? (\n                      <EmptyMessage title=\"No connectors set up\" />\n                    ) : (\n                      <>\n                        <Section\n                          flexDirection=\"row\"\n                          justifyContent=\"start\"\n                          alignItems=\"center\"\n                          gap={0.25}\n                        >\n                          {uniqueSources.slice(0, 3).map((source) => {\n                            const meta = getSourceMetadata(source);\n                            return (\n                              <Card\n                                key={source}\n                                padding={0.75}\n                                className=\"w-[10rem]\"\n                              >\n                                <Content\n                                  icon={meta.icon}\n                                  title={meta.displayName}\n                                  sizePreset=\"main-ui\"\n                                />\n                              </Card>\n                            );\n                          })}\n                        </Section>\n\n                        <Button\n                          href=\"/admin/indexing/status\"\n                          prominence=\"tertiary\"\n                          rightIcon={SvgExternalLink}\n                        >\n                          Manage All\n                        </Button>\n                      </>\n                    )}\n                  </Section>\n                </Section>\n\n                {/* Actions & Tools */}\n                <SimpleCollapsible>\n                  <SimpleCollapsible.Header\n                    title=\"Actions & Tools\"\n                    description=\"Tools and capabilities available for chat to use. This does not apply to agents.\"\n                  />\n                  <SimpleCollapsible.Content>\n                    <Section gap={0.5}>\n                      {vectorDbEnabled && searchTool && (\n                        <Card>\n                          <InputLayouts.Horizontal\n                            title=\"Internal Search\"\n                            description=\"Search through your organization's connected knowledge base and documents.\"\n                          >\n                            <Switch\n                              checked={isToolEnabled(searchTool.id)}\n                              onCheckedChange={(checked) =>\n                                void toggleTool(searchTool.id, checked)\n                              }\n                            />\n                          </InputLayouts.Horizontal>\n                        </Card>\n                      )}\n\n                      <SimpleTooltip\n                        tooltip={\n                          imageGenTool\n                            ? undefined\n                            : \"Image generation requires a configured model. Set one up under Configuration > Image Generation, or ask an admin.\"\n                        }\n                        side=\"top\"\n                      >\n                        <Card variant={imageGenTool ? undefined : \"disabled\"}>\n                          <InputLayouts.Horizontal\n                            title=\"Image Generation\"\n                            description=\"Generate and manipulate images using AI-powered tools.\"\n                            disabled={!imageGenTool}\n                          >\n                            <Switch\n                              checked={\n                                imageGenTool\n                                  ? isToolEnabled(imageGenTool.id)\n                                  : false\n                              }\n                              onCheckedChange={(checked) =>\n                                imageGenTool &&\n                                void toggleTool(imageGenTool.id, checked)\n                              }\n                              disabled={!imageGenTool}\n                            />\n                          </InputLayouts.Horizontal>\n                        </Card>\n                      </SimpleTooltip>\n\n                      <Card variant={webSearchTool ? undefined : \"disabled\"}>\n                        <InputLayouts.Horizontal\n                          title=\"Web Search\"\n                          description=\"Search the web for real-time information and up-to-date results.\"\n                          disabled={!webSearchTool}\n                        >\n                          <Switch\n                            checked={\n                              webSearchTool\n                                ? isToolEnabled(webSearchTool.id)\n                                : false\n                            }\n                            onCheckedChange={(checked) =>\n                              webSearchTool &&\n                              void toggleTool(webSearchTool.id, checked)\n                            }\n                            disabled={!webSearchTool}\n                          />\n                        </InputLayouts.Horizontal>\n                      </Card>\n\n                      <Card variant={openURLTool ? undefined : \"disabled\"}>\n                        <InputLayouts.Horizontal\n                          title=\"Open URL\"\n                          description=\"Fetch and read content from web URLs.\"\n                          disabled={!openURLTool}\n                        >\n                          <Switch\n                            checked={\n                              openURLTool\n                                ? isToolEnabled(openURLTool.id)\n                                : false\n                            }\n                            onCheckedChange={(checked) =>\n                              openURLTool &&\n                              void toggleTool(openURLTool.id, checked)\n                            }\n                            disabled={!openURLTool}\n                          />\n                        </InputLayouts.Horizontal>\n                      </Card>\n\n                      <Card\n                        variant={codeInterpreterTool ? undefined : \"disabled\"}\n                      >\n                        <InputLayouts.Horizontal\n                          title=\"Code Interpreter\"\n                          description=\"Generate and run code.\"\n                          disabled={!codeInterpreterTool}\n                        >\n                          <Switch\n                            checked={\n                              codeInterpreterTool\n                                ? isToolEnabled(codeInterpreterTool.id)\n                                : false\n                            }\n                            onCheckedChange={(checked) =>\n                              codeInterpreterTool &&\n                              void toggleTool(codeInterpreterTool.id, checked)\n                            }\n                            disabled={!codeInterpreterTool}\n                          />\n                        </InputLayouts.Horizontal>\n                      </Card>\n                    </Section>\n\n                    {/* Separator between built-in tools and MCP/OpenAPI tools */}\n                    {(mcpServersWithTools.length > 0 ||\n                      openApiTools.length > 0) && (\n                      <Separator noPadding className=\"py-3\" />\n                    )}\n\n                    {/* MCP Servers & OpenAPI Tools */}\n                    <Section gap={0.5}>\n                      {mcpServersWithTools.map(({ server, tools }) => (\n                        <MCPServerCard\n                          key={server.id}\n                          server={server}\n                          tools={tools}\n                          isToolEnabled={isToolEnabled}\n                          onToggleTool={toggleTool}\n                          onToggleTools={toggleTools}\n                        />\n                      ))}\n                      {openApiTools.map((tool) => (\n                        <ExpandableCard.Root key={tool.id} defaultFolded>\n                          <ActionsLayouts.Header\n                            title={tool.display_name || tool.name}\n                            description={tool.description}\n                            icon={SvgActions}\n                            rightChildren={\n                              <Switch\n                                checked={isToolEnabled(tool.id)}\n                                onCheckedChange={(checked) =>\n                                  toggleTool(tool.id, checked)\n                                }\n                              />\n                            }\n                          />\n                        </ExpandableCard.Root>\n                      ))}\n                    </Section>\n                  </SimpleCollapsible.Content>\n                </SimpleCollapsible>\n              </Section>\n            </div>\n          </Disabled>\n\n          <Separator noPadding />\n\n          {/* Advanced Options */}\n          <SimpleCollapsible defaultOpen={false}>\n            <SimpleCollapsible.Header title=\"Advanced Options\" />\n            <SimpleCollapsible.Content>\n              <Section gap={1}>\n                <Card>\n                  <InputLayouts.Horizontal\n                    title=\"Keep Chat History\"\n                    description=\"Specify how long Onyx should retain chats in your organization.\"\n                  >\n                    <InputSelectField\n                      name=\"maximum_chat_retention_days\"\n                      onValueChange={(value) => {\n                        void saveSettings({\n                          maximum_chat_retention_days:\n                            value === \"forever\" ? null : parseInt(value, 10),\n                        });\n                      }}\n                    >\n                      <InputSelect.Trigger />\n                      <InputSelect.Content>\n                        <InputSelect.Item value=\"forever\">\n                          Forever\n                        </InputSelect.Item>\n                        <InputSelect.Item value=\"7\">7 days</InputSelect.Item>\n                        <InputSelect.Item value=\"30\">30 days</InputSelect.Item>\n                        <InputSelect.Item value=\"90\">90 days</InputSelect.Item>\n                        <InputSelect.Item value=\"365\">\n                          365 days\n                        </InputSelect.Item>\n                      </InputSelect.Content>\n                    </InputSelectField>\n                  </InputLayouts.Horizontal>\n                </Card>\n\n                <Card>\n                  <InputLayouts.Vertical\n                    title=\"File Attachment Size Limit\"\n                    description=\"Files attached in chats and projects must fit within both limits to be accepted. Larger files increase latency, memory usage, and token costs.\"\n                  >\n                    <FileSizeLimitFields\n                      saveSettings={saveSettings}\n                      defaultUploadSizeMb={\n                        settings?.settings.default_user_file_max_upload_size_mb?.toString() ??\n                        \"100\"\n                      }\n                      defaultTokenThresholdK={\n                        settings?.settings.default_file_token_count_threshold_k?.toString() ??\n                        \"200\"\n                      }\n                      maxAllowedUploadSizeMb={\n                        settings?.settings.max_allowed_upload_size_mb\n                      }\n                    />\n                  </InputLayouts.Vertical>\n                </Card>\n\n                <Card>\n                  <InputLayouts.Horizontal\n                    title=\"Allow Anonymous Users\"\n                    description=\"Allow anyone to start chats without logging in. They do not see any other chats and cannot create agents or update settings.\"\n                  >\n                    <SwitchField\n                      name=\"anonymous_user_enabled\"\n                      onCheckedChange={(checked) => {\n                        void saveSettings({ anonymous_user_enabled: checked });\n                      }}\n                    />\n                  </InputLayouts.Horizontal>\n\n                  <InputLayouts.Horizontal\n                    title=\"Always Start with an Agent\"\n                    description=\"This removes the default chat. Users will always start in an agent, and new chats will be created in their last active agent. Set featured agents to help new users get started.\"\n                  >\n                    <SwitchField\n                      name=\"disable_default_assistant\"\n                      onCheckedChange={(checked) => {\n                        void saveSettings({\n                          disable_default_assistant: checked,\n                        });\n                      }}\n                    />\n                  </InputLayouts.Horizontal>\n                </Card>\n              </Section>\n            </SimpleCollapsible.Content>\n          </SimpleCollapsible>\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n\n      <Modal\n        open={systemPromptModalOpen}\n        onOpenChange={setSystemPromptModalOpen}\n      >\n        <Modal.Content width=\"xl\" height=\"fit\">\n          <Formik\n            initialValues={{\n              system_prompt:\n                defaultAgentConfig?.system_prompt ??\n                defaultAgentConfig?.default_system_prompt ??\n                \"\",\n            }}\n            onSubmit={async ({ system_prompt }) => {\n              try {\n                const response = await fetch(\"/api/admin/default-assistant\", {\n                  method: \"PATCH\",\n                  headers: { \"Content-Type\": \"application/json\" },\n                  body: JSON.stringify({ system_prompt }),\n                });\n                if (!response.ok) {\n                  const errorMsg = (await response.json()).detail;\n                  throw new Error(errorMsg);\n                }\n                await mutateDefaultAgent();\n                setSystemPromptModalOpen(false);\n                toast.success(\"System prompt updated\");\n              } catch {\n                toast.error(\"Failed to update system prompt\");\n              }\n            }}\n          >\n            {({ dirty, isSubmitting, submitForm }) => (\n              <Form>\n                <Modal.Header\n                  icon={SvgAddLines}\n                  title=\"System Prompt\"\n                  description=\"This base prompt is prepended to all chats, agents, and projects.\"\n                  onClose={() => setSystemPromptModalOpen(false)}\n                />\n                <Modal.Body>\n                  <Section gap={0.25} alignItems=\"start\">\n                    <InputTextAreaField\n                      name=\"system_prompt\"\n                      placeholder=\"Enter your system prompt...\"\n                      rows={8}\n                      maxRows={20}\n                      autoResize\n                    />\n                    <Text font=\"secondary-body\" color=\"text-03\">\n                      {markdown(\n                        \"You can use the following placeholders in your prompt:\\n`{{CURRENT_DATETIME}}` - Current date and day of the week in a human-readable format.\\n`{{CITATION_GUIDANCE}}` - Instructions for providing citations when facts are retrieved from search tools.\\nOnly included when search tools are used.\"\n                      )}\n                    </Text>\n                  </Section>\n                  <OpalCard background=\"none\" border=\"solid\" padding=\"sm\">\n                    <Content\n                      sizePreset=\"main-ui\"\n                      icon={SvgAlertCircle}\n                      title=\"Modify with caution.\"\n                      description=\"System prompt affects all chats, agents, and projects. Significant changes may degrade response quality.\"\n                    />\n                  </OpalCard>\n                </Modal.Body>\n                <Modal.Footer>\n                  <Button\n                    prominence=\"secondary\"\n                    onClick={() => setSystemPromptModalOpen(false)}\n                  >\n                    Cancel\n                  </Button>\n                  <Button\n                    prominence=\"primary\"\n                    onClick={submitForm}\n                    disabled={!dirty || isSubmitting}\n                  >\n                    Save\n                  </Button>\n                </Modal.Footer>\n              </Form>\n            )}\n          </Formik>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n\nexport default function ChatPreferencesPage() {\n  const settings = useSettingsContext();\n\n  const initialValues: ChatPreferencesFormValues = {\n    // Features\n    search_ui_enabled: settings.settings.search_ui_enabled ?? false,\n    deep_research_enabled: settings.settings.deep_research_enabled ?? true,\n    auto_scroll: settings.settings.auto_scroll ?? false,\n\n    // Team context\n    company_name: settings.settings.company_name ?? \"\",\n    company_description: settings.settings.company_description ?? \"\",\n\n    // Advanced\n    maximum_chat_retention_days:\n      settings.settings.maximum_chat_retention_days?.toString() ?? \"forever\",\n    anonymous_user_enabled: settings.settings.anonymous_user_enabled ?? false,\n    disable_default_assistant:\n      settings.settings.disable_default_assistant ?? false,\n\n    // File limits — for upload size: 0/null means \"use default\";\n    // for token threshold: null means \"use default\", 0 means \"no limit\".\n    user_file_max_upload_size_mb:\n      (settings.settings.user_file_max_upload_size_mb ?? 0) <= 0\n        ? settings.settings.default_user_file_max_upload_size_mb?.toString() ??\n          \"100\"\n        : settings.settings.user_file_max_upload_size_mb!.toString(),\n    file_token_count_threshold_k:\n      settings.settings.file_token_count_threshold_k == null\n        ? settings.settings.default_file_token_count_threshold_k?.toString() ??\n          \"200\"\n        : settings.settings.file_token_count_threshold_k === 0\n          ? \"\"\n          : settings.settings.file_token_count_threshold_k.toString(),\n  };\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      onSubmit={() => {}}\n      enableReinitialize\n    >\n      <Form className=\"h-full w-full\">\n        <ChatPreferencesForm />\n      </Form>\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/CodeInterpreterPage/index.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport {\n  SvgArrowExchange,\n  SvgCheckCircle,\n  SvgRefreshCw,\n  SvgTerminal,\n  SvgUnplug,\n  SvgXOctagon,\n} from \"@opal/icons\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Button, SelectCard } from \"@opal/components\";\nimport { CardHeaderLayout } from \"@opal/layouts\";\nimport { Disabled, Hoverable } from \"@opal/core\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport useCodeInterpreter from \"@/hooks/useCodeInterpreter\";\nimport { updateCodeInterpreter } from \"@/refresh-pages/admin/CodeInterpreterPage/svc\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst route = ADMIN_ROUTES.CODE_INTERPRETER;\n\n// ---------------------------------------------------------------------------\n// Sub-components\n// ---------------------------------------------------------------------------\n\nfunction CheckingStatus() {\n  return (\n    <Section\n      flexDirection=\"row\"\n      justifyContent=\"end\"\n      alignItems=\"center\"\n      gap={0.25}\n      padding={0.5}\n    >\n      <Text mainUiAction text03>\n        Checking...\n      </Text>\n      <SimpleLoader />\n    </Section>\n  );\n}\n\ninterface ConnectionStatusProps {\n  healthy: boolean;\n  isLoading: boolean;\n}\n\nfunction ConnectionStatus({ healthy, isLoading }: ConnectionStatusProps) {\n  if (isLoading) {\n    return <CheckingStatus />;\n  }\n\n  const label = healthy ? \"Connected\" : \"Connection Lost\";\n  const Icon = healthy ? SvgCheckCircle : SvgXOctagon;\n  const iconColor = healthy ? \"text-status-success-05\" : \"text-status-error-05\";\n\n  return (\n    <Section\n      flexDirection=\"row\"\n      justifyContent=\"end\"\n      alignItems=\"center\"\n      gap={0.25}\n      padding={0.5}\n    >\n      <Text mainUiAction text03>\n        {label}\n      </Text>\n      <Icon size={16} className={iconColor} />\n    </Section>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function CodeInterpreterPage() {\n  const { isHealthy, isEnabled, isLoading, refetch } = useCodeInterpreter();\n  const [showDisconnectModal, setShowDisconnectModal] = useState(false);\n  const [isReconnecting, setIsReconnecting] = useState(false);\n\n  async function handleToggle(enabled: boolean) {\n    const action = enabled ? \"reconnect\" : \"disconnect\";\n    setIsReconnecting(enabled);\n    try {\n      const response = await updateCodeInterpreter({ enabled });\n      if (!response.ok) {\n        toast.error(`Failed to ${action} Code Interpreter`);\n        return;\n      }\n      setShowDisconnectModal(false);\n      refetch();\n    } finally {\n      setIsReconnecting(false);\n    }\n  }\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Safe and sandboxed Python runtime available to your LLM. See docs for more details.\"\n        separator\n      />\n\n      <SettingsLayouts.Body>\n        {isEnabled || isLoading ? (\n          <Hoverable.Root group=\"code-interpreter/Card\">\n            <SelectCard state=\"filled\" padding=\"sm\" rounding=\"lg\">\n              <CardHeaderLayout\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n                icon={SvgTerminal}\n                title=\"Code Interpreter\"\n                description=\"Built-in Python runtime\"\n                rightChildren={\n                  <ConnectionStatus healthy={isHealthy} isLoading={isLoading} />\n                }\n                bottomRightChildren={\n                  <Section\n                    flexDirection=\"row\"\n                    justifyContent=\"end\"\n                    alignItems=\"center\"\n                    gap={0.25}\n                    padding={0.25}\n                  >\n                    <Disabled disabled={isLoading}>\n                      <Hoverable.Item group=\"code-interpreter/Card\">\n                        <Button\n                          prominence=\"tertiary\"\n                          size=\"sm\"\n                          icon={SvgUnplug}\n                          onClick={() => setShowDisconnectModal(true)}\n                          tooltip=\"Disconnect\"\n                        />\n                      </Hoverable.Item>\n                    </Disabled>\n                    <Button\n                      disabled={isLoading}\n                      prominence=\"tertiary\"\n                      size=\"sm\"\n                      icon={SvgRefreshCw}\n                      onClick={refetch}\n                      tooltip=\"Refresh\"\n                    />\n                  </Section>\n                }\n              />\n            </SelectCard>\n          </Hoverable.Root>\n        ) : (\n          <SelectCard\n            state=\"empty\"\n            padding=\"sm\"\n            rounding=\"lg\"\n            onClick={() => handleToggle(true)}\n          >\n            <CardHeaderLayout\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n              icon={SvgTerminal}\n              title=\"Code Interpreter (Disconnected)\"\n              description=\"Built-in Python runtime\"\n              rightChildren={\n                <Section flexDirection=\"row\" alignItems=\"center\" padding={0.5}>\n                  {isReconnecting ? (\n                    <CheckingStatus />\n                  ) : (\n                    <Button\n                      prominence=\"tertiary\"\n                      rightIcon={SvgArrowExchange}\n                      onClick={(e) => {\n                        e.stopPropagation();\n                        handleToggle(true);\n                      }}\n                    >\n                      Reconnect\n                    </Button>\n                  )}\n                </Section>\n              }\n            />\n          </SelectCard>\n        )}\n      </SettingsLayouts.Body>\n\n      {showDisconnectModal && (\n        <ConfirmationModalLayout\n          icon={SvgUnplug}\n          title=\"Disconnect Code Interpreter\"\n          onClose={() => setShowDisconnectModal(false)}\n          submit={\n            <Button variant=\"danger\" onClick={() => handleToggle(false)}>\n              Disconnect\n            </Button>\n          }\n        >\n          <Text as=\"p\" text03>\n            All running sessions connected to{\" \"}\n            <Text as=\"span\" mainContentEmphasis text03>\n              Code Interpreter\n            </Text>{\" \"}\n            will stop working. Note that this will not remove any data from your\n            runtime. You can reconnect to this runtime later if needed.\n          </Text>\n        </ConfirmationModalLayout>\n      )}\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/CodeInterpreterPage/svc.ts",
    "content": "const UPDATE_ENDPOINT = \"/api/admin/code-interpreter\";\n\ninterface CodeInterpreterUpdateRequest {\n  enabled: boolean;\n}\n\nexport async function updateCodeInterpreter(\n  request: CodeInterpreterUpdateRequest\n): Promise<Response> {\n  return fetch(UPDATE_ENDPOINT, {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(request),\n  });\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/CreateGroupPage.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport useSWR from \"swr\";\nimport { Table, Button } from \"@opal/components\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport { SvgUsers } from \"@opal/icons\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { toast } from \"@/hooks/useToast\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useAdminUsers from \"@/hooks/useAdminUsers\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport type { ApiKeyDescriptor, MemberRow } from \"./interfaces\";\nimport {\n  createGroup,\n  updateAgentGroupSharing,\n  updateDocSetGroupSharing,\n  saveTokenLimits,\n} from \"./svc\";\nimport { apiKeyToMemberRow, memberTableColumns, PAGE_SIZE } from \"./shared\";\nimport SharedGroupResources from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources\";\nimport TokenLimitSection from \"./TokenLimitSection\";\nimport type { TokenLimit } from \"./TokenLimitSection\";\n\nfunction CreateGroupPage() {\n  const router = useRouter();\n  const [groupName, setGroupName] = useState(\"\");\n  const [selectedUserIds, setSelectedUserIds] = useState<string[]>([]);\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [selectedCcPairIds, setSelectedCcPairIds] = useState<number[]>([]);\n  const [selectedDocSetIds, setSelectedDocSetIds] = useState<number[]>([]);\n  const [selectedAgentIds, setSelectedAgentIds] = useState<number[]>([]);\n  const [tokenLimits, setTokenLimits] = useState<TokenLimit[]>([\n    { tokenBudget: null, periodHours: null },\n  ]);\n\n  const { users, isLoading: usersLoading, error: usersError } = useAdminUsers();\n\n  const {\n    data: apiKeys,\n    isLoading: apiKeysLoading,\n    error: apiKeysError,\n  } = useSWR<ApiKeyDescriptor[]>(SWR_KEYS.adminApiKeys, errorHandlingFetcher);\n\n  const isLoading = usersLoading || apiKeysLoading;\n  const error = usersError ?? apiKeysError;\n\n  const allRows: MemberRow[] = useMemo(() => {\n    const activeUsers = users.filter((u) => u.is_active);\n    const serviceAccountRows = (apiKeys ?? []).map(apiKeyToMemberRow);\n    return [...activeUsers, ...serviceAccountRows];\n  }, [users, apiKeys]);\n\n  async function handleCreate() {\n    const trimmed = groupName.trim();\n    if (!trimmed) {\n      toast.error(\"Group name is required\");\n      return;\n    }\n\n    setIsSubmitting(true);\n    try {\n      const groupId = await createGroup(\n        trimmed,\n        selectedUserIds,\n        selectedCcPairIds\n      );\n      await updateAgentGroupSharing(groupId, [], selectedAgentIds);\n      await updateDocSetGroupSharing(groupId, [], selectedDocSetIds);\n      await saveTokenLimits(groupId, tokenLimits, []);\n      toast.success(`Group \"${trimmed}\" created`);\n      router.push(\"/admin/groups\");\n    } catch (e) {\n      toast.error(e instanceof Error ? e.message : \"Failed to create group\");\n    } finally {\n      setIsSubmitting(false);\n    }\n  }\n\n  const headerActions = (\n    <Section flexDirection=\"row\" gap={0.5} width=\"auto\" height=\"auto\">\n      <Button\n        prominence=\"secondary\"\n        onClick={() => router.push(\"/admin/groups\")}\n      >\n        Cancel\n      </Button>\n      <Button\n        onClick={handleCreate}\n        disabled={!groupName.trim() || isSubmitting}\n      >\n        Create\n      </Button>\n    </Section>\n  );\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={SvgUsers}\n        title=\"Create Group\"\n        separator\n        rightChildren={headerActions}\n      />\n\n      <SettingsLayouts.Body>\n        {/* Group Name */}\n        <Section\n          gap={0.5}\n          height=\"auto\"\n          alignItems=\"stretch\"\n          justifyContent=\"start\"\n        >\n          <Text mainUiBody text04>\n            Group Name\n          </Text>\n          <InputTypeIn\n            placeholder=\"Name your group\"\n            value={groupName}\n            onChange={(e) => setGroupName(e.target.value)}\n          />\n        </Section>\n\n        <Separator noPadding />\n\n        {/* Members table */}\n        {isLoading && <SimpleLoader />}\n\n        {error && (\n          <Text as=\"p\" secondaryBody text03>\n            Failed to load users.\n          </Text>\n        )}\n\n        {!isLoading && !error && (\n          <Section\n            gap={0.75}\n            height=\"auto\"\n            alignItems=\"stretch\"\n            justifyContent=\"start\"\n          >\n            <InputTypeIn\n              value={searchTerm}\n              onChange={(e) => setSearchTerm(e.target.value)}\n              placeholder=\"Search users and accounts...\"\n              leftSearchIcon\n            />\n            <Table\n              data={allRows}\n              columns={memberTableColumns}\n              getRowId={(row) => row.id ?? row.email}\n              pageSize={PAGE_SIZE}\n              searchTerm={searchTerm}\n              selectionBehavior=\"multi-select\"\n              onSelectionChange={setSelectedUserIds}\n              footer={{}}\n              emptyState={\n                <IllustrationContent\n                  illustration={SvgNoResult}\n                  title=\"No users found\"\n                  description=\"No users match your search.\"\n                />\n              }\n            />\n          </Section>\n        )}\n        <SharedGroupResources\n          selectedCcPairIds={selectedCcPairIds}\n          onCcPairIdsChange={setSelectedCcPairIds}\n          selectedDocSetIds={selectedDocSetIds}\n          onDocSetIdsChange={setSelectedDocSetIds}\n          selectedAgentIds={selectedAgentIds}\n          onAgentIdsChange={setSelectedAgentIds}\n        />\n\n        <TokenLimitSection\n          limits={tokenLimits}\n          onLimitsChange={setTokenLimits}\n        />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n\nexport default CreateGroupPage;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/EditGroupPage.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport useSWR, { useSWRConfig } from \"swr\";\nimport { Table, Button } from \"@opal/components\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport { SvgUsers, SvgTrash, SvgMinusCircle, SvgPlusCircle } from \"@opal/icons\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { toast } from \"@/hooks/useToast\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport useAdminUsers from \"@/hooks/useAdminUsers\";\nimport type { UserGroup } from \"@/lib/types\";\nimport type {\n  ApiKeyDescriptor,\n  MemberRow,\n  TokenRateLimitDisplay,\n} from \"./interfaces\";\nimport {\n  apiKeyToMemberRow,\n  baseColumns,\n  memberTableColumns,\n  tc,\n  PAGE_SIZE,\n} from \"./shared\";\nimport {\n  renameGroup,\n  updateGroup,\n  deleteGroup,\n  updateAgentGroupSharing,\n  updateDocSetGroupSharing,\n  saveTokenLimits,\n} from \"./svc\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport SharedGroupResources from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources\";\nimport TokenLimitSection from \"./TokenLimitSection\";\nimport type { TokenLimit } from \"./TokenLimitSection\";\n\nconst addModeColumns = memberTableColumns;\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\ninterface EditGroupPageProps {\n  groupId: number;\n}\n\nfunction EditGroupPage({ groupId }: EditGroupPageProps) {\n  const router = useRouter();\n  const { mutate } = useSWRConfig();\n\n  // Fetch the group data — poll every 5s while syncing so the UI updates\n  // automatically when the backend finishes processing the previous edit.\n  const {\n    data: groups,\n    isLoading: groupLoading,\n    error: groupError,\n  } = useSWR<UserGroup[]>(SWR_KEYS.adminUserGroups, errorHandlingFetcher, {\n    refreshInterval: (latestData) => {\n      const g = latestData?.find((g) => g.id === groupId);\n      return g && !g.is_up_to_date ? 5000 : 0;\n    },\n  });\n\n  const group = useMemo(\n    () => groups?.find((g) => g.id === groupId) ?? null,\n    [groups, groupId]\n  );\n\n  const isSyncing = group != null && !group.is_up_to_date;\n\n  // Fetch token rate limits for this group\n  const { data: tokenRateLimits, isLoading: tokenLimitsLoading } = useSWR<\n    TokenRateLimitDisplay[]\n  >(SWR_KEYS.userGroupTokenRateLimit(groupId), errorHandlingFetcher);\n\n  // Form state\n  const [groupName, setGroupName] = useState(\"\");\n  const [selectedUserIds, setSelectedUserIds] = useState<string[]>([]);\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const isSubmittingRef = useRef(false);\n  const [selectedCcPairIds, setSelectedCcPairIds] = useState<number[]>([]);\n  const [selectedDocSetIds, setSelectedDocSetIds] = useState<number[]>([]);\n  const [selectedAgentIds, setSelectedAgentIds] = useState<number[]>([]);\n  const [tokenLimits, setTokenLimits] = useState<TokenLimit[]>([\n    { tokenBudget: null, periodHours: null },\n  ]);\n  const [showDeleteModal, setShowDeleteModal] = useState(false);\n  const [isDeleting, setIsDeleting] = useState(false);\n  const [initialized, setInitialized] = useState(false);\n  const [isAddingMembers, setIsAddingMembers] = useState(false);\n  const initialAgentIdsRef = useRef<number[]>([]);\n  const initialDocSetIdsRef = useRef<number[]>([]);\n\n  // Users and API keys\n  const { users, isLoading: usersLoading, error: usersError } = useAdminUsers();\n\n  const {\n    data: apiKeys,\n    isLoading: apiKeysLoading,\n    error: apiKeysError,\n  } = useSWR<ApiKeyDescriptor[]>(SWR_KEYS.adminApiKeys, errorHandlingFetcher);\n\n  const isLoading =\n    groupLoading || usersLoading || apiKeysLoading || tokenLimitsLoading;\n  const error = groupError ?? usersError ?? apiKeysError;\n\n  // Pre-populate form when group data loads\n  useEffect(() => {\n    if (group && !initialized) {\n      setGroupName(group.name);\n      setSelectedUserIds(group.users.map((u) => u.id));\n      setSelectedCcPairIds(group.cc_pairs.map((cc) => cc.id));\n      const docSetIds = group.document_sets.map((ds) => ds.id);\n      setSelectedDocSetIds(docSetIds);\n      initialDocSetIdsRef.current = docSetIds;\n      const agentIds = group.personas.map((p) => p.id);\n      setSelectedAgentIds(agentIds);\n      initialAgentIdsRef.current = agentIds;\n      setInitialized(true);\n    }\n  }, [group, initialized]);\n\n  // Pre-populate token limits when fetched\n  useEffect(() => {\n    if (tokenRateLimits && tokenRateLimits.length > 0) {\n      setTokenLimits(\n        tokenRateLimits.map((trl) => ({\n          tokenBudget: trl.token_budget,\n          periodHours: trl.period_hours,\n        }))\n      );\n    }\n  }, [tokenRateLimits]);\n\n  const allRows = useMemo(() => {\n    const activeUsers = users.filter((u) => u.is_active);\n    const serviceAccountRows = (apiKeys ?? []).map(apiKeyToMemberRow);\n    return [...activeUsers, ...serviceAccountRows];\n  }, [users, apiKeys]);\n\n  const memberRows = useMemo(() => {\n    const selected = new Set(selectedUserIds);\n    return allRows.filter((r) => selected.has(r.id ?? r.email));\n  }, [allRows, selectedUserIds]);\n\n  const currentRowSelection = useMemo(() => {\n    const sel: Record<string, boolean> = {};\n    for (const id of selectedUserIds) sel[id] = true;\n    return sel;\n  }, [selectedUserIds]);\n\n  const handleRemoveMember = useCallback((userId: string) => {\n    setSelectedUserIds((prev) => prev.filter((id) => id !== userId));\n  }, []);\n\n  const memberColumns = useMemo(\n    () => [\n      ...baseColumns,\n      tc.actions({\n        showSorting: false,\n        showColumnVisibility: false,\n        cell: (row: MemberRow) => (\n          <IconButton\n            icon={SvgMinusCircle}\n            tertiary\n            onClick={(e) => {\n              e.stopPropagation();\n              handleRemoveMember(row.id ?? row.email);\n            }}\n          />\n        ),\n      }),\n    ],\n    [handleRemoveMember]\n  );\n\n  // IDs of members not visible in the add-mode table (e.g. inactive users).\n  // We preserve these so they aren't silently removed when the table fires\n  // onSelectionChange with only the visible rows.\n  const hiddenMemberIds = useMemo(() => {\n    const visibleIds = new Set(allRows.map((r) => r.id ?? r.email));\n    return selectedUserIds.filter((id) => !visibleIds.has(id));\n  }, [allRows, selectedUserIds]);\n\n  // Guard onSelectionChange: ignore updates until the form is fully initialized.\n  // Without this, TanStack fires onSelectionChange before all rows are loaded,\n  // which overwrites selectedUserIds with a partial set.\n  const handleSelectionChange = useCallback(\n    (ids: string[]) => {\n      if (!initialized) return;\n      setSelectedUserIds([...ids, ...hiddenMemberIds]);\n    },\n    [initialized, hiddenMemberIds]\n  );\n\n  async function handleSave() {\n    if (isSubmittingRef.current) return;\n\n    const trimmed = groupName.trim();\n    if (!trimmed) {\n      toast.error(\"Group name is required\");\n      return;\n    }\n\n    // Re-fetch group to check sync status before saving\n    const freshGroups = await fetch(SWR_KEYS.adminUserGroups).then((r) =>\n      r.json()\n    );\n    const freshGroup = freshGroups.find((g: UserGroup) => g.id === groupId);\n    if (freshGroup && !freshGroup.is_up_to_date) {\n      toast.error(\n        \"This group is currently syncing. Please wait a moment and try again.\"\n      );\n      return;\n    }\n\n    isSubmittingRef.current = true;\n    setIsSubmitting(true);\n    try {\n      // Rename if name changed\n      if (group && trimmed !== group.name) {\n        await renameGroup(group.id, trimmed);\n      }\n\n      // Update members and cc_pairs\n      await updateGroup(groupId, selectedUserIds, selectedCcPairIds);\n\n      // Update agent sharing (add/remove this group from changed agents)\n      await updateAgentGroupSharing(\n        groupId,\n        initialAgentIdsRef.current,\n        selectedAgentIds\n      );\n\n      // Update document set sharing (add/remove this group from changed doc sets)\n      await updateDocSetGroupSharing(\n        groupId,\n        initialDocSetIdsRef.current,\n        selectedDocSetIds\n      );\n\n      // Save token rate limits (create/update/delete)\n      await saveTokenLimits(groupId, tokenLimits, tokenRateLimits ?? []);\n\n      // Update refs so subsequent saves diff correctly\n      initialAgentIdsRef.current = selectedAgentIds;\n      initialDocSetIdsRef.current = selectedDocSetIds;\n\n      mutate(SWR_KEYS.adminUserGroups);\n      mutate(SWR_KEYS.userGroupTokenRateLimit(groupId));\n      toast.success(`Group \"${trimmed}\" updated`);\n      router.push(\"/admin/groups\");\n    } catch (e) {\n      toast.error(e instanceof Error ? e.message : \"Failed to update group\");\n    } finally {\n      isSubmittingRef.current = false;\n      setIsSubmitting(false);\n    }\n  }\n\n  async function handleDelete() {\n    setIsDeleting(true);\n    try {\n      await deleteGroup(groupId);\n      mutate(SWR_KEYS.adminUserGroups);\n      toast.success(`Group \"${group?.name}\" deleted`);\n      router.push(\"/admin/groups\");\n    } catch (e) {\n      toast.error(e instanceof Error ? e.message : \"Failed to delete group\");\n    } finally {\n      setIsDeleting(false);\n      setShowDeleteModal(false);\n    }\n  }\n\n  // 404 state\n  if (!isLoading && !error && !group) {\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={SvgUsers}\n          title=\"Group Not Found\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          <IllustrationContent\n            illustration={SvgNoResult}\n            title=\"Group not found\"\n            description=\"This group doesn't exist or may have been deleted.\"\n          />\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  const headerActions = (\n    <Section flexDirection=\"row\" gap={0.5} width=\"auto\" height=\"auto\">\n      <Button\n        prominence=\"secondary\"\n        onClick={() => router.push(\"/admin/groups\")}\n      >\n        Cancel\n      </Button>\n      <Button\n        onClick={handleSave}\n        disabled={!groupName.trim() || isSubmitting || isSyncing}\n        tooltip={\n          isSyncing\n            ? \"Document embeddings are being updated due to recent changes to this group.\"\n            : undefined\n        }\n      >\n        {isSubmitting ? \"Saving...\" : isSyncing ? \"Syncing...\" : \"Save Changes\"}\n      </Button>\n    </Section>\n  );\n\n  return (\n    <>\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={SvgUsers}\n          title=\"Edit Group\"\n          separator\n          rightChildren={headerActions}\n        />\n\n        <SettingsLayouts.Body>\n          {isLoading && <SimpleLoader />}\n\n          {error && (\n            <Text as=\"p\" secondaryBody text03>\n              Failed to load group data.\n            </Text>\n          )}\n\n          {!isLoading && !error && group && (\n            <>\n              {/* Group Name */}\n              <Section\n                gap={0.5}\n                height=\"auto\"\n                alignItems=\"stretch\"\n                justifyContent=\"start\"\n              >\n                <Text mainUiBody text04>\n                  Group Name\n                </Text>\n                <InputTypeIn\n                  placeholder=\"Name your group\"\n                  value={groupName}\n                  onChange={(e) => setGroupName(e.target.value)}\n                />\n              </Section>\n\n              <Separator noPadding />\n\n              {/* Members table */}\n              <Section\n                gap={0.75}\n                height=\"auto\"\n                alignItems=\"stretch\"\n                justifyContent=\"start\"\n              >\n                <Section\n                  flexDirection=\"row\"\n                  gap={0.5}\n                  height=\"auto\"\n                  alignItems=\"center\"\n                  justifyContent=\"start\"\n                >\n                  <InputTypeIn\n                    value={searchTerm}\n                    onChange={(e) => setSearchTerm(e.target.value)}\n                    placeholder={\n                      isAddingMembers\n                        ? \"Search users and accounts...\"\n                        : \"Search members...\"\n                    }\n                    leftSearchIcon\n                    className=\"flex-1\"\n                  />\n                  {isAddingMembers ? (\n                    <Button\n                      prominence=\"secondary\"\n                      onClick={() => setIsAddingMembers(false)}\n                    >\n                      Done\n                    </Button>\n                  ) : (\n                    <Button\n                      prominence=\"tertiary\"\n                      icon={SvgPlusCircle}\n                      onClick={() => setIsAddingMembers(true)}\n                    >\n                      Add\n                    </Button>\n                  )}\n                </Section>\n\n                {isAddingMembers ? (\n                  <Table\n                    key=\"add-members\"\n                    data={allRows as MemberRow[]}\n                    columns={addModeColumns}\n                    getRowId={(row) => row.id ?? row.email}\n                    pageSize={PAGE_SIZE}\n                    searchTerm={searchTerm}\n                    selectionBehavior=\"multi-select\"\n                    initialRowSelection={currentRowSelection}\n                    onSelectionChange={handleSelectionChange}\n                    footer={{}}\n                    emptyState={\n                      <IllustrationContent\n                        illustration={SvgNoResult}\n                        title=\"No users found\"\n                        description=\"No users match your search.\"\n                      />\n                    }\n                  />\n                ) : (\n                  <Table\n                    data={memberRows}\n                    columns={memberColumns}\n                    getRowId={(row) => row.id ?? row.email}\n                    pageSize={PAGE_SIZE}\n                    searchTerm={searchTerm}\n                    footer={{}}\n                    emptyState={\n                      <IllustrationContent\n                        illustration={SvgNoResult}\n                        title=\"No members\"\n                        description=\"Add members to this group.\"\n                      />\n                    }\n                  />\n                )}\n              </Section>\n\n              <SharedGroupResources\n                selectedCcPairIds={selectedCcPairIds}\n                onCcPairIdsChange={setSelectedCcPairIds}\n                selectedDocSetIds={selectedDocSetIds}\n                onDocSetIdsChange={setSelectedDocSetIds}\n                selectedAgentIds={selectedAgentIds}\n                onAgentIdsChange={setSelectedAgentIds}\n              />\n\n              <TokenLimitSection\n                limits={tokenLimits}\n                onLimitsChange={setTokenLimits}\n              />\n\n              {/* Delete This Group */}\n              <Card>\n                <InputLayouts.Horizontal\n                  title=\"Delete This Group\"\n                  description=\"Members will lose access to any resources shared with this group.\"\n                  center\n                  nonInteractive\n                >\n                  <Button\n                    variant=\"danger\"\n                    prominence=\"secondary\"\n                    icon={SvgTrash}\n                    onClick={() => setShowDeleteModal(true)}\n                  >\n                    Delete Group\n                  </Button>\n                </InputLayouts.Horizontal>\n              </Card>\n            </>\n          )}\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n\n      {showDeleteModal && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title=\"Delete Group\"\n          onClose={() => setShowDeleteModal(false)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={handleDelete}\n              disabled={isDeleting}\n            >\n              {isDeleting ? \"Deleting...\" : \"Delete\"}\n            </Button>\n          }\n        >\n          <Text as=\"p\" text03>\n            Members of group{\" \"}\n            <Text as=\"span\" text05>\n              {group?.name}\n            </Text>{\" \"}\n            will lose access to any resources shared with this group, unless\n            they have been granted access directly. Deletion cannot be undone.\n          </Text>\n        </ConfirmationModalLayout>\n      )}\n    </>\n  );\n}\n\nexport default EditGroupPage;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/GroupCard.tsx",
    "content": "\"use client\";\n\nimport type { Route } from \"next\";\nimport { useRouter } from \"next/navigation\";\nimport type { UserGroup } from \"@/lib/types\";\nimport { SvgChevronRight, SvgUserManage, SvgUsers } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  isBuiltInGroup,\n  buildGroupDescription,\n  formatMemberCount,\n} from \"./utils\";\nimport { renameGroup } from \"./svc\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useSWRConfig } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\ninterface GroupCardProps {\n  group: UserGroup;\n}\n\nfunction GroupCard({ group }: GroupCardProps) {\n  const router = useRouter();\n  const { mutate } = useSWRConfig();\n  const builtIn = isBuiltInGroup(group);\n  const isAdmin = group.name === \"Admin\";\n  const isBasic = group.name === \"Basic\";\n  const isSyncing = !group.is_up_to_date;\n\n  async function handleRename(newName: string) {\n    try {\n      await renameGroup(group.id, newName);\n      mutate(SWR_KEYS.adminUserGroups);\n      toast.success(`Group renamed to \"${newName}\"`);\n    } catch (e) {\n      console.error(\"Failed to rename group:\", e);\n      toast.error(e instanceof Error ? e.message : \"Failed to rename group\");\n    }\n  }\n\n  return (\n    <Card padding={0.5} data-card>\n      <ContentAction\n        icon={isAdmin ? SvgUserManage : SvgUsers}\n        title={group.name}\n        description={buildGroupDescription(group)}\n        sizePreset=\"main-content\"\n        variant=\"section\"\n        tag={isBasic ? { title: \"Default\" } : undefined}\n        editable={!builtIn && !isSyncing}\n        onTitleChange={!builtIn && !isSyncing ? handleRename : undefined}\n        rightChildren={\n          <Section flexDirection=\"row\" alignItems=\"start\" gap={0}>\n            <div className=\"py-1\">\n              <Text mainUiBody text03>\n                {formatMemberCount(\n                  group.users.filter((u) => u.is_active).length\n                )}\n              </Text>\n            </div>\n            <Button\n              icon={SvgChevronRight}\n              prominence=\"tertiary\"\n              tooltip=\"View group\"\n              aria-label=\"View group\"\n              onClick={() => router.push(`/admin/groups/${group.id}` as Route)}\n            />\n          </Section>\n        }\n      />\n    </Card>\n  );\n}\n\nexport default GroupCard;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/GroupsList.tsx",
    "content": "\"use client\";\n\nimport { useMemo } from \"react\";\nimport type { UserGroup } from \"@/lib/types\";\nimport Separator from \"@/refresh-components/Separator\";\nimport GroupCard from \"./GroupCard\";\nimport { isBuiltInGroup } from \"./utils\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\n\ninterface GroupsListProps {\n  groups: UserGroup[];\n  searchQuery: string;\n}\n\nfunction GroupsList({ groups, searchQuery }: GroupsListProps) {\n  const filtered = useMemo(() => {\n    if (!searchQuery.trim()) return groups;\n    const q = searchQuery.toLowerCase();\n    return groups.filter((g) => g.name.toLowerCase().includes(q));\n  }, [groups, searchQuery]);\n\n  if (filtered.length === 0) {\n    return (\n      <IllustrationContent\n        illustration={SvgNoResult}\n        title=\"No groups found\"\n        description={`No groups matching \"${searchQuery}\"`}\n      />\n    );\n  }\n\n  const builtInGroups = filtered.filter(isBuiltInGroup);\n  const customGroups = filtered.filter((g) => !isBuiltInGroup(g));\n\n  return (\n    <Section flexDirection=\"column\" gap={0.5}>\n      {builtInGroups.map((group) => (\n        <GroupCard key={group.id} group={group} />\n      ))}\n\n      {builtInGroups.length > 0 && customGroups.length > 0 && (\n        <Separator paddingYRem={0.5} />\n      )}\n\n      {customGroups.map((group) => (\n        <GroupCard key={group.id} group={group} />\n      ))}\n    </Section>\n  );\n}\n\nexport default GroupsList;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourceContent.tsx",
    "content": "\"use client\";\n\nimport type { ReactNode } from \"react\";\nimport { SvgX } from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport { Content } from \"@opal/layouts\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\n\ninterface ResourceContentProps {\n  /** SVG icon for connectors/doc sets. */\n  icon?: IconFunctionComponent;\n  /** Custom ReactNode icon (e.g. AgentAvatar). Takes priority over `icon`. */\n  leftContent?: ReactNode;\n  title: string;\n  description?: string;\n  /** Inline info rendered after description (e.g. source icon stack). */\n  infoContent?: ReactNode;\n  onRemove: () => void;\n}\n\nfunction ResourceContent({\n  icon,\n  leftContent,\n  title,\n  description,\n  infoContent,\n  onRemove,\n}: ResourceContentProps) {\n  return (\n    <div className=\"flex flex-1 gap-0.5 items-start p-1.5 rounded-08 bg-background-tint-01 min-w-[240px] max-w-[302px]\">\n      <div className=\"flex flex-1 gap-1 p-0.5 items-center min-w-0\">\n        {leftContent ? (\n          <>\n            {leftContent}\n            <div className=\"flex-1 min-w-0\">\n              <Content\n                title={title}\n                description={description}\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n              />\n            </div>\n          </>\n        ) : (\n          <div className=\"flex-1 min-w-0\">\n            <Content\n              icon={icon}\n              title={title}\n              description={description}\n              sizePreset=\"main-ui\"\n              variant=\"section\"\n            />\n          </div>\n        )}\n      </div>\n      {infoContent}\n      <IconButton small icon={SvgX} onClick={onRemove} className=\"shrink-0\" />\n    </div>\n  );\n}\n\nexport default ResourceContent;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourcePopover.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { SvgEmpty } from \"@opal/icons\";\nimport { Content } from \"@opal/layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Separator from \"@/refresh-components/Separator\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport type { ResourcePopoverProps } from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces\";\n\nfunction ResourcePopover({\n  placeholder,\n  searchValue,\n  onSearchChange,\n  sections,\n}: ResourcePopoverProps) {\n  const [open, setOpen] = useState(false);\n\n  const totalItems = sections.reduce((sum, s) => sum + s.items.length, 0);\n\n  return (\n    <Popover open={open} onOpenChange={setOpen}>\n      <Popover.Anchor>\n        <InputTypeIn\n          placeholder={placeholder}\n          value={searchValue}\n          onChange={(e) => {\n            onSearchChange(e.target.value);\n            if (!open) setOpen(true);\n          }}\n          onFocus={() => setOpen(true)}\n        />\n      </Popover.Anchor>\n      <Popover.Content\n        width=\"trigger\"\n        align=\"start\"\n        sideOffset={4}\n        onOpenAutoFocus={(e) => e.preventDefault()}\n      >\n        <div className=\"flex flex-col gap-1 max-h-64 overflow-y-auto\">\n          {totalItems === 0 ? (\n            <div className=\"px-3 py-3\">\n              <Content\n                icon={SvgEmpty}\n                title=\"No results found\"\n                sizePreset=\"secondary\"\n                variant=\"section\"\n              />\n            </div>\n          ) : (\n            sections.map(\n              (section, idx) =>\n                section.items.length > 0 && (\n                  <div key={section.label ?? `section-${idx}`}>\n                    {section.label && (\n                      <Section\n                        flexDirection=\"row\"\n                        gap={0.25}\n                        padding={0}\n                        height=\"auto\"\n                        alignItems=\"center\"\n                        justifyContent=\"start\"\n                        className=\"px-2 pt-2 pb-1\"\n                      >\n                        <Text secondaryBody text03 className=\"shrink-0\">\n                          {section.label}\n                        </Text>\n                        <Separator noPadding className=\"flex-1\" />\n                      </Section>\n                    )}\n                    <Section\n                      gap={0.25}\n                      alignItems=\"stretch\"\n                      justifyContent=\"start\"\n                    >\n                      {section.items.map((item) => (\n                        <div\n                          key={item.key}\n                          className={cn(\n                            \"rounded-08 cursor-pointer\",\n                            item.disabled\n                              ? \"bg-background-tint-02\"\n                              : \"hover:bg-background-tint-02 transition-colors\"\n                          )}\n                          onClick={() => {\n                            item.onSelect();\n                          }}\n                        >\n                          {item.render(!!item.disabled)}\n                        </div>\n                      ))}\n                    </Section>\n                  </div>\n                )\n            )\n          )}\n        </div>\n      </Popover.Content>\n    </Popover>\n  );\n}\n\nexport default ResourcePopover;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/index.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo } from \"react\";\nimport { SvgEmpty, SvgFiles, SvgXOctagon } from \"@opal/icons\";\nimport { Content } from \"@opal/layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Separator from \"@/refresh-components/Separator\";\nimport SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { useConnectorStatus } from \"@/lib/hooks\";\nimport { useDocumentSets } from \"@/lib/hooks/useDocumentSets\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport type { ValidSources } from \"@/lib/types\";\nimport ResourceContent from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourceContent\";\nimport ResourcePopover from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources/ResourcePopover\";\nimport type { PopoverSection } from \"@/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces\";\n\ninterface SharedGroupResourcesProps {\n  selectedCcPairIds: number[];\n  onCcPairIdsChange: (ids: number[]) => void;\n  selectedDocSetIds: number[];\n  onDocSetIdsChange: (ids: number[]) => void;\n  selectedAgentIds: number[];\n  onAgentIdsChange: (ids: number[]) => void;\n}\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nfunction SharedBadge() {\n  return (\n    <Text as=\"span\" secondaryBody text03>\n      Shared\n    </Text>\n  );\n}\n\ninterface SourceIconStackProps {\n  sources: { source: ValidSources }[];\n}\n\nfunction SourceIconStack({ sources }: SourceIconStackProps) {\n  if (sources.length === 0) return null;\n\n  const unique = Array.from(\n    new Map(sources.map((s) => [s.source, s])).values()\n  ).slice(0, 3);\n\n  return (\n    <Section\n      flexDirection=\"row\"\n      alignItems=\"center\"\n      width=\"auto\"\n      height=\"auto\"\n      gap={0}\n      className=\"shrink-0 p-0.5\"\n    >\n      {unique.map((s, i) => {\n        const Icon = getSourceMetadata(s.source).icon;\n        return (\n          <div\n            key={s.source}\n            className=\"flex items-center justify-center size-4 rounded-04 bg-background-tint-00 border border-border-01 overflow-hidden [&_img]:!size-4 [&_img]:!m-0 [&_svg]:size-4\"\n            style={{ zIndex: unique.length - i, marginLeft: i > 0 ? -6 : 0 }}\n          >\n            <Icon />\n          </div>\n        );\n      })}\n    </Section>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nfunction SharedGroupResources({\n  selectedCcPairIds,\n  onCcPairIdsChange,\n  selectedDocSetIds,\n  onDocSetIdsChange,\n  selectedAgentIds,\n  onAgentIdsChange,\n}: SharedGroupResourcesProps) {\n  const [connectorSearch, setConnectorSearch] = useState(\"\");\n  const [agentSearch, setAgentSearch] = useState(\"\");\n\n  const { data: connectors = [] } = useConnectorStatus();\n  const { documentSets } = useDocumentSets();\n  const { agents } = useAgents();\n\n  // --- Derived data ---\n\n  const selectedCcPairSet = useMemo(\n    () => new Set(selectedCcPairIds),\n    [selectedCcPairIds]\n  );\n  const selectedDocSetSet = useMemo(\n    () => new Set(selectedDocSetIds),\n    [selectedDocSetIds]\n  );\n  const selectedAgentSet = useMemo(\n    () => new Set(selectedAgentIds),\n    [selectedAgentIds]\n  );\n\n  const selectedPairs = useMemo(\n    () => connectors.filter((p) => selectedCcPairSet.has(p.cc_pair_id)),\n    [connectors, selectedCcPairSet]\n  );\n  const selectedDocSets = useMemo(\n    () => documentSets.filter((ds) => selectedDocSetSet.has(ds.id)),\n    [documentSets, selectedDocSetSet]\n  );\n  const selectedAgentObjects = useMemo(\n    () => agents.filter((a) => selectedAgentSet.has(a.id)),\n    [agents, selectedAgentSet]\n  );\n\n  // --- Popover sections ---\n\n  const connectorDocSetSections: PopoverSection[] = useMemo(() => {\n    const q = connectorSearch.toLowerCase();\n\n    const connectorItems = connectors\n      .filter((p) => !q || (p.name ?? \"\").toLowerCase().includes(q))\n      .map((p) => {\n        const isSelected = selectedCcPairSet.has(p.cc_pair_id);\n        return {\n          key: `c-${p.cc_pair_id}`,\n          disabled: isSelected,\n          onSelect: () =>\n            isSelected\n              ? onCcPairIdsChange(\n                  selectedCcPairIds.filter((id) => id !== p.cc_pair_id)\n                )\n              : onCcPairIdsChange([...selectedCcPairIds, p.cc_pair_id]),\n          render: (dimmed: boolean) => (\n            <LineItem\n              interactive={!dimmed}\n              muted={dimmed}\n              icon={getSourceMetadata(p.connector.source).icon}\n              rightChildren={\n                p.groups.length > 0 || dimmed ? <SharedBadge /> : undefined\n              }\n            >\n              {p.name ?? `Connector #${p.cc_pair_id}`}\n            </LineItem>\n          ),\n        };\n      });\n\n    const docSetItems = documentSets\n      .filter((ds) => !q || ds.name.toLowerCase().includes(q))\n      .map((ds) => {\n        const isSelected = selectedDocSetSet.has(ds.id);\n        return {\n          key: `d-${ds.id}`,\n          disabled: isSelected,\n          onSelect: () =>\n            isSelected\n              ? onDocSetIdsChange(\n                  selectedDocSetIds.filter((id) => id !== ds.id)\n                )\n              : onDocSetIdsChange([...selectedDocSetIds, ds.id]),\n          render: (dimmed: boolean) => (\n            <LineItem\n              interactive={!dimmed}\n              muted={dimmed}\n              icon={SvgFiles}\n              rightChildren={\n                ds.groups.length > 0 || dimmed ? <SharedBadge /> : undefined\n              }\n            >\n              {ds.name}\n            </LineItem>\n          ),\n        };\n      });\n\n    return [\n      ...(connectorItems.length > 0\n        ? [{ label: \"Connectors\", items: connectorItems }]\n        : []),\n      ...(docSetItems.length > 0\n        ? [{ label: \"Document Sets\", items: docSetItems }]\n        : []),\n    ];\n  }, [\n    connectors,\n    documentSets,\n    connectorSearch,\n    selectedCcPairSet,\n    selectedDocSetSet,\n    selectedCcPairIds,\n    selectedDocSetIds,\n    onCcPairIdsChange,\n    onDocSetIdsChange,\n  ]);\n\n  const agentSections: PopoverSection[] = useMemo(() => {\n    const q = agentSearch.toLowerCase();\n\n    const items = agents\n      .filter((a) => !q || a.name.toLowerCase().includes(q))\n      .map((a) => {\n        const isSelected = selectedAgentSet.has(a.id);\n        return {\n          key: `a-${a.id}`,\n          disabled: isSelected,\n          onSelect: () =>\n            isSelected\n              ? onAgentIdsChange(selectedAgentIds.filter((id) => id !== a.id))\n              : onAgentIdsChange([...selectedAgentIds, a.id]),\n          render: (dimmed: boolean) => (\n            <LineItem\n              interactive={!dimmed}\n              muted={dimmed}\n              icon={(_props) => <AgentAvatar agent={a} size={16} />}\n              description=\"agent\"\n              rightChildren={\n                !a.is_public || dimmed ? <SharedBadge /> : undefined\n              }\n            >\n              {a.name}\n            </LineItem>\n          ),\n        };\n      });\n\n    return items.length > 0 ? [{ items }] : [];\n  }, [\n    agents,\n    agentSearch,\n    selectedAgentSet,\n    selectedAgentIds,\n    onAgentIdsChange,\n  ]);\n\n  // --- Handlers ---\n\n  function removeConnector(id: number) {\n    onCcPairIdsChange(selectedCcPairIds.filter((cid) => cid !== id));\n  }\n\n  function removeDocSet(id: number) {\n    onDocSetIdsChange(selectedDocSetIds.filter((did) => did !== id));\n  }\n\n  function removeAgent(id: number) {\n    onAgentIdsChange(selectedAgentIds.filter((aid) => aid !== id));\n  }\n\n  const hasSelectedResources =\n    selectedPairs.length > 0 || selectedDocSets.length > 0;\n\n  return (\n    <SimpleCollapsible>\n      <SimpleCollapsible.Header\n        title=\"Shared with This Group\"\n        description=\"Share connectors, document sets, agents with members of this group.\"\n      />\n      <SimpleCollapsible.Content>\n        <Card>\n          <Section\n            gap={1}\n            height=\"auto\"\n            alignItems=\"stretch\"\n            justifyContent=\"start\"\n            width=\"full\"\n          >\n            {/* Connectors & Document Sets */}\n            <Section\n              gap={0.5}\n              height=\"auto\"\n              alignItems=\"stretch\"\n              justifyContent=\"start\"\n            >\n              <Section\n                gap={0.25}\n                height=\"auto\"\n                alignItems=\"stretch\"\n                justifyContent=\"start\"\n              >\n                <Text mainUiAction text04>\n                  Connectors & Document Sets\n                </Text>\n                <ResourcePopover\n                  placeholder=\"Add connectors, document sets\"\n                  searchValue={connectorSearch}\n                  onSearchChange={setConnectorSearch}\n                  sections={connectorDocSetSections}\n                />\n              </Section>\n              {hasSelectedResources ? (\n                <Section\n                  flexDirection=\"row\"\n                  wrap\n                  gap={0.25}\n                  height=\"auto\"\n                  alignItems=\"start\"\n                  justifyContent=\"start\"\n                >\n                  {selectedPairs.map((pair) => (\n                    <ResourceContent\n                      key={`c-${pair.cc_pair_id}`}\n                      icon={getSourceMetadata(pair.connector.source).icon}\n                      title={pair.name ?? `Connector #${pair.cc_pair_id}`}\n                      description=\"Connector\"\n                      onRemove={() => removeConnector(pair.cc_pair_id)}\n                    />\n                  ))}\n                  {selectedDocSets.map((ds) => (\n                    <ResourceContent\n                      key={`d-${ds.id}`}\n                      icon={SvgFiles}\n                      title={ds.name}\n                      description=\"Document Set\"\n                      infoContent={\n                        <SourceIconStack sources={ds.cc_pair_summaries} />\n                      }\n                      onRemove={() => removeDocSet(ds.id)}\n                    />\n                  ))}\n                </Section>\n              ) : (\n                <Content\n                  icon={SvgEmpty}\n                  title=\"No connectors or document sets added\"\n                  description=\"Add connectors or document set to share with this group.\"\n                  sizePreset=\"secondary\"\n                  variant=\"section\"\n                />\n              )}\n            </Section>\n\n            <Separator noPadding />\n\n            {/* Agents */}\n            <Section\n              gap={0.5}\n              height=\"auto\"\n              alignItems=\"stretch\"\n              justifyContent=\"start\"\n            >\n              <Section\n                gap={0.25}\n                height=\"auto\"\n                alignItems=\"stretch\"\n                justifyContent=\"start\"\n              >\n                <Text mainUiAction text04>\n                  Agents\n                </Text>\n                <ResourcePopover\n                  placeholder=\"Add agents\"\n                  searchValue={agentSearch}\n                  onSearchChange={setAgentSearch}\n                  sections={agentSections}\n                />\n              </Section>\n              {selectedAgentObjects.length > 0 ? (\n                <Section\n                  flexDirection=\"row\"\n                  wrap\n                  gap={0.25}\n                  height=\"auto\"\n                  alignItems=\"start\"\n                  justifyContent=\"start\"\n                >\n                  {selectedAgentObjects.map((agent) => (\n                    <ResourceContent\n                      key={agent.id}\n                      leftContent={\n                        <div className=\"flex items-center justify-center shrink-0 size-5 p-0.5 rounded-04\">\n                          <AgentAvatar agent={agent} size={16} />\n                        </div>\n                      }\n                      title={agent.name}\n                      description=\"agent\"\n                      onRemove={() => removeAgent(agent.id)}\n                    />\n                  ))}\n                </Section>\n              ) : (\n                <Content\n                  icon={SvgXOctagon}\n                  title=\"No agents added\"\n                  description=\"Add agents to share with this group.\"\n                  sizePreset=\"secondary\"\n                  variant=\"section\"\n                />\n              )}\n            </Section>\n          </Section>\n        </Card>\n      </SimpleCollapsible.Content>\n    </SimpleCollapsible>\n  );\n}\n\nexport default SharedGroupResources;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/interfaces.ts",
    "content": "export interface PopoverItem {\n  key: string;\n  render: (disabled: boolean) => React.ReactNode;\n  onSelect: () => void;\n  /** When true, the item is already selected — shown dimmed with bg-tint-02. */\n  disabled?: boolean;\n}\n\nexport interface PopoverSection {\n  label?: string;\n  items: PopoverItem[];\n}\n\nexport interface ResourcePopoverProps {\n  placeholder: string;\n  searchValue: string;\n  onSearchChange: (value: string) => void;\n  sections: PopoverSection[];\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/TokenLimitSection.tsx",
    "content": "\"use client\";\n\nimport { useRef } from \"react\";\nimport { SvgPlusCircle, SvgMinusCircle } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport InputNumber from \"@/refresh-components/inputs/InputNumber\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface TokenLimit {\n  tokenBudget: number | null;\n  periodHours: number | null;\n}\n\ninterface TokenLimitSectionProps {\n  limits: TokenLimit[];\n  onLimitsChange: (limits: TokenLimit[]) => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nfunction TokenLimitSection({ limits, onLimitsChange }: TokenLimitSectionProps) {\n  const nextKeyRef = useRef(limits.length);\n  const keysRef = useRef<number[]>(limits.map((_, i) => i));\n\n  // Sync keys if the parent provides a different number of limits externally\n  // (e.g. loaded from server after initial mount).\n  if (keysRef.current.length < limits.length) {\n    while (keysRef.current.length < limits.length) {\n      keysRef.current.push(nextKeyRef.current++);\n    }\n  } else if (keysRef.current.length > limits.length) {\n    keysRef.current = keysRef.current.slice(0, limits.length);\n  }\n\n  function addLimit() {\n    const emptyIndex = limits.findIndex(\n      (l) => l.tokenBudget === null && l.periodHours === null\n    );\n    if (emptyIndex !== -1) return;\n    const key = nextKeyRef.current++;\n    keysRef.current = [...keysRef.current, key];\n    onLimitsChange([...limits, { tokenBudget: null, periodHours: null }]);\n  }\n\n  function removeLimit(index: number) {\n    keysRef.current = keysRef.current.filter((_, i) => i !== index);\n    onLimitsChange(limits.filter((_, i) => i !== index));\n  }\n\n  function updateLimit(\n    index: number,\n    field: keyof TokenLimit,\n    value: number | null\n  ) {\n    onLimitsChange(\n      limits.map((l, i) => (i === index ? { ...l, [field]: value } : l))\n    );\n  }\n\n  return (\n    <SimpleCollapsible>\n      <SimpleCollapsible.Header\n        title=\"Token Rate Limit\"\n        description=\"Limit number of tokens this group can use within a given time period.\"\n      />\n      <SimpleCollapsible.Content>\n        <Card>\n          <Section\n            gap={0.5}\n            height=\"auto\"\n            alignItems=\"stretch\"\n            justifyContent=\"start\"\n            width=\"full\"\n          >\n            {/* Column headers */}\n            <div className=\"flex flex-wrap items-center gap-1 pr-[40px]\">\n              <div className=\"flex-1 flex items-center min-w-[160px]\">\n                <Text mainUiAction text04>\n                  Token Limit\n                </Text>\n                <Text mainUiMuted text03 className=\"ml-0.5\">\n                  (thousand tokens)\n                </Text>\n              </div>\n              <div className=\"flex-1 flex items-center min-w-[160px]\">\n                <Text mainUiAction text04>\n                  Time Window\n                </Text>\n                <Text mainUiMuted text03 className=\"ml-0.5\">\n                  (hours)\n                </Text>\n              </div>\n            </div>\n\n            {/* Limit rows */}\n            {limits.map((limit, i) => (\n              <div key={keysRef.current[i]} className=\"flex items-center gap-1\">\n                <div className=\"flex-1\">\n                  <InputNumber\n                    value={limit.tokenBudget}\n                    onChange={(v) => updateLimit(i, \"tokenBudget\", v)}\n                    min={0}\n                    placeholder=\"Token limit in thousands\"\n                  />\n                </div>\n                <div className=\"flex-1\">\n                  <InputNumber\n                    value={limit.periodHours}\n                    onChange={(v) => updateLimit(i, \"periodHours\", v)}\n                    min={1}\n                    placeholder=\"24\"\n                  />\n                </div>\n                <IconButton\n                  small\n                  icon={SvgMinusCircle}\n                  onClick={() => removeLimit(i)}\n                />\n              </div>\n            ))}\n\n            {/* Add button */}\n            <Button\n              icon={SvgPlusCircle}\n              prominence=\"secondary\"\n              size=\"md\"\n              onClick={addLimit}\n            >\n              Add Limit\n            </Button>\n          </Section>\n        </Card>\n      </SimpleCollapsible.Content>\n    </SimpleCollapsible>\n  );\n}\n\nexport default TokenLimitSection;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/index.tsx",
    "content": "\"use client\";\n\nimport type { Route } from \"next\";\nimport { useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport useSWR from \"swr\";\nimport { SvgUsers } from \"@opal/icons\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport type { UserGroup } from \"@/lib/types\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport GroupsList from \"./GroupsList\";\nimport AdminListHeader from \"@/sections/admin/AdminListHeader\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\n\nfunction GroupsPage() {\n  const router = useRouter();\n  const [searchQuery, setSearchQuery] = useState(\"\");\n\n  const {\n    data: groups,\n    error,\n    isLoading,\n  } = useSWR<UserGroup[]>(SWR_KEYS.adminUserGroups, errorHandlingFetcher);\n\n  return (\n    <SettingsLayouts.Root>\n      <div data-testid=\"groups-page-heading\">\n        <SettingsLayouts.Header icon={SvgUsers} title=\"Groups\" separator />\n      </div>\n\n      <SettingsLayouts.Body>\n        <AdminListHeader\n          hasItems={!isLoading && !error && (groups?.length ?? 0) > 0}\n          searchQuery={searchQuery}\n          onSearchQueryChange={setSearchQuery}\n          placeholder=\"Search groups...\"\n          emptyStateText=\"Create groups to organize users and manage access.\"\n          onAction={() => router.push(\"/admin/groups/create\" as Route)}\n          actionLabel=\"New Group\"\n        />\n\n        {isLoading && <SimpleLoader />}\n\n        {error && (\n          <IllustrationContent\n            illustration={SvgNoResult}\n            title=\"Failed to load groups.\"\n            description=\"Please check the console for more details.\"\n          />\n        )}\n\n        {!isLoading && !error && groups && (\n          <GroupsList groups={groups} searchQuery={searchQuery} />\n        )}\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n\nexport default GroupsPage;\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/interfaces.ts",
    "content": "import type { UserRole } from \"@/lib/types\";\nimport type { UserRow } from \"@/refresh-pages/admin/UsersPage/interfaces\";\n\nexport interface ApiKeyDescriptor {\n  api_key_id: number;\n  api_key_display: string;\n  api_key_name: string | null;\n  api_key_role: UserRole;\n  user_id: string;\n}\n\n/** Extends UserRow with an optional API key display for service accounts. */\nexport interface MemberRow extends UserRow {\n  api_key_display?: string;\n}\n\nexport interface TokenRateLimitDisplay {\n  token_id: number;\n  enabled: boolean;\n  token_budget: number;\n  period_hours: number;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/shared.tsx",
    "content": "import { createTableColumns } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgUser, SvgUserManage, SvgGlobe, SvgSlack } from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { UserRole, UserStatus, USER_ROLE_LABELS } from \"@/lib/types\";\nimport type { ApiKeyDescriptor, MemberRow } from \"./interfaces\";\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nexport const PAGE_SIZE = 10;\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nexport function apiKeyToMemberRow(key: ApiKeyDescriptor): MemberRow {\n  return {\n    id: key.user_id,\n    email: \"Service Account\",\n    role: key.api_key_role,\n    status: UserStatus.ACTIVE,\n    is_active: true,\n    is_scim_synced: false,\n    personal_name: key.api_key_name ?? \"Unnamed Key\",\n    created_at: null,\n    updated_at: null,\n    groups: [],\n    api_key_display: key.api_key_display,\n  };\n}\n\n// ---------------------------------------------------------------------------\n// Role icon mapping (mirrors UsersPage/UserRoleCell)\n// ---------------------------------------------------------------------------\n\nconst ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {\n  [UserRole.ADMIN]: SvgUserManage,\n  [UserRole.GLOBAL_CURATOR]: SvgGlobe,\n  [UserRole.SLACK_USER]: SvgSlack,\n};\n\n// ---------------------------------------------------------------------------\n// Column renderers\n// ---------------------------------------------------------------------------\n\nfunction renderNameColumn(email: string, row: MemberRow) {\n  return (\n    <Content\n      sizePreset=\"main-ui\"\n      variant=\"section\"\n      title={row.personal_name ?? email}\n      description={row.personal_name ? email : undefined}\n    />\n  );\n}\n\nfunction renderAccountTypeColumn(_value: unknown, row: MemberRow) {\n  const Icon = (row.role && ROLE_ICONS[row.role]) || SvgUser;\n  return (\n    <div className=\"flex flex-row items-center gap-1\">\n      <Icon className=\"w-4 h-4 text-text-03\" />\n      <Text as=\"span\" mainUiBody text03>\n        {row.role ? USER_ROLE_LABELS[row.role] ?? row.role : \"\\u2014\"}\n      </Text>\n    </div>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Columns\n// ---------------------------------------------------------------------------\n\nexport const tc = createTableColumns<MemberRow>();\n\nexport const baseColumns = [\n  tc.qualifier(),\n  tc.column(\"email\", {\n    header: \"Name\",\n    weight: 25,\n    cell: renderNameColumn,\n  }),\n  tc.column(\"api_key_display\", {\n    header: \"\",\n    weight: 15,\n    enableSorting: false,\n    cell: (value) =>\n      value ? (\n        <Text as=\"span\" secondaryBody text03>\n          {value}\n        </Text>\n      ) : null,\n  }),\n  tc.column(\"role\", {\n    header: \"Account Type\",\n    weight: 15,\n    cell: renderAccountTypeColumn,\n  }),\n];\n\nexport const memberTableColumns = [\n  ...baseColumns,\n  tc.actions({ showSorting: false }),\n];\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/svc.ts",
    "content": "/** API helpers for the Groups pages. */\n\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\n\nconst USER_GROUP_URL = SWR_KEYS.adminUserGroups;\n\nasync function renameGroup(groupId: number, newName: string): Promise<void> {\n  const res = await fetch(`${USER_GROUP_URL}/rename`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ id: groupId, name: newName }),\n  });\n  if (!res.ok) {\n    const detail = await res.json().catch(() => null);\n    throw new Error(\n      detail?.detail ?? `Failed to rename group: ${res.statusText}`\n    );\n  }\n}\n\nasync function createGroup(\n  name: string,\n  userIds: string[],\n  ccPairIds: number[] = []\n): Promise<number> {\n  const res = await fetch(USER_GROUP_URL, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      name,\n      user_ids: userIds,\n      cc_pair_ids: ccPairIds,\n    }),\n  });\n  if (!res.ok) {\n    const detail = await res.json().catch(() => null);\n    throw new Error(\n      detail?.detail ?? `Failed to create group: ${res.statusText}`\n    );\n  }\n  const group = await res.json();\n  return group.id;\n}\n\nasync function updateGroup(\n  groupId: number,\n  userIds: string[],\n  ccPairIds: number[]\n): Promise<void> {\n  const res = await fetch(`${USER_GROUP_URL}/${groupId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      user_ids: userIds,\n      cc_pair_ids: ccPairIds,\n    }),\n  });\n  if (!res.ok) {\n    const detail = await res.json().catch(() => null);\n    throw new Error(\n      detail?.detail ?? `Failed to update group: ${res.statusText}`\n    );\n  }\n}\n\nasync function deleteGroup(groupId: number): Promise<void> {\n  const res = await fetch(`${USER_GROUP_URL}/${groupId}`, {\n    method: \"DELETE\",\n  });\n  if (!res.ok) {\n    const detail = await res.json().catch(() => null);\n    throw new Error(\n      detail?.detail ?? `Failed to delete group: ${res.statusText}`\n    );\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Agent (persona) sharing — managed from the persona side\n// ---------------------------------------------------------------------------\n\nasync function updateAgentGroupSharing(\n  groupId: number,\n  initialAgentIds: number[],\n  currentAgentIds: number[]\n): Promise<void> {\n  const initialSet = new Set(initialAgentIds);\n  const currentSet = new Set(currentAgentIds);\n\n  const added_agent_ids = currentAgentIds.filter((id) => !initialSet.has(id));\n  const removed_agent_ids = initialAgentIds.filter((id) => !currentSet.has(id));\n\n  if (added_agent_ids.length === 0 && removed_agent_ids.length === 0) return;\n\n  const res = await fetch(`${USER_GROUP_URL}/${groupId}/agents`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ added_agent_ids, removed_agent_ids }),\n  });\n  if (!res.ok) {\n    const detail = await res.json().catch(() => null);\n    throw new Error(\n      detail?.detail ?? `Failed to update agent sharing: ${res.statusText}`\n    );\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Document set sharing — managed from the document set side\n// ---------------------------------------------------------------------------\n\ninterface DocumentSetSummary {\n  id: number;\n  description: string;\n  cc_pair_summaries: { id: number }[];\n  federated_connector_summaries: { id: number }[];\n  is_public: boolean;\n  users: string[];\n  groups: number[];\n}\n\nasync function updateDocSetGroupSharing(\n  groupId: number,\n  initialDocSetIds: number[],\n  currentDocSetIds: number[]\n): Promise<void> {\n  const initialSet = new Set(initialDocSetIds);\n  const currentSet = new Set(currentDocSetIds);\n\n  const added = currentDocSetIds.filter((id) => !initialSet.has(id));\n  const removed = initialDocSetIds.filter((id) => !currentSet.has(id));\n\n  if (added.length === 0 && removed.length === 0) return;\n\n  // Fetch all document sets to get their current state\n  const allRes = await fetch(\"/api/manage/document-set\");\n  if (!allRes.ok) {\n    throw new Error(\"Failed to fetch document sets\");\n  }\n  const allDocSets: DocumentSetSummary[] = await allRes.json();\n  const docSetMap = new Map(allDocSets.map((ds) => [ds.id, ds]));\n\n  for (const dsId of added) {\n    const ds = docSetMap.get(dsId);\n    if (!ds) {\n      throw new Error(`Document set ${dsId} not found`);\n    }\n    const updatedGroups = ds.groups.includes(groupId)\n      ? ds.groups\n      : [...ds.groups, groupId];\n    const res = await fetch(\"/api/manage/admin/document-set\", {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({\n        id: ds.id,\n        description: ds.description,\n        cc_pair_ids: ds.cc_pair_summaries.map((cc) => cc.id),\n        federated_connectors: ds.federated_connector_summaries.map((fc) => ({\n          federated_connector_id: fc.id,\n        })),\n        is_public: ds.is_public,\n        users: ds.users,\n        groups: updatedGroups,\n      }),\n    });\n    if (!res.ok) {\n      throw new Error(`Failed to add group to document set ${dsId}`);\n    }\n  }\n\n  for (const dsId of removed) {\n    const ds = docSetMap.get(dsId);\n    if (!ds) {\n      throw new Error(`Document set ${dsId} not found`);\n    }\n    const updatedGroups = ds.groups.filter((id) => id !== groupId);\n    const res = await fetch(\"/api/manage/admin/document-set\", {\n      method: \"PATCH\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({\n        id: ds.id,\n        description: ds.description,\n        cc_pair_ids: ds.cc_pair_summaries.map((cc) => cc.id),\n        federated_connectors: ds.federated_connector_summaries.map((fc) => ({\n          federated_connector_id: fc.id,\n        })),\n        is_public: ds.is_public,\n        users: ds.users,\n        groups: updatedGroups,\n      }),\n    });\n    if (!res.ok) {\n      throw new Error(`Failed to remove group from document set ${dsId}`);\n    }\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Token rate limits — create / update / delete\n// ---------------------------------------------------------------------------\n\ninterface TokenLimitPayload {\n  tokenBudget: number | null;\n  periodHours: number | null;\n}\n\ninterface ExistingTokenLimit {\n  token_id: number;\n  enabled: boolean;\n  token_budget: number;\n  period_hours: number;\n}\n\nasync function saveTokenLimits(\n  groupId: number,\n  limits: TokenLimitPayload[],\n  existing: ExistingTokenLimit[]\n): Promise<void> {\n  // Filter to only valid (non-null) limits\n  const validLimits = limits.filter(\n    (l): l is { tokenBudget: number; periodHours: number } =>\n      l.tokenBudget != null && l.periodHours != null\n  );\n\n  // Update existing limits (match by index position)\n  const toUpdate = Math.min(validLimits.length, existing.length);\n  for (let i = 0; i < toUpdate; i++) {\n    const limit = validLimits[i]!;\n    const existingLimit = existing[i]!;\n    const updateRes = await fetch(\n      `/api/admin/token-rate-limits/rate-limit/${existingLimit.token_id}`,\n      {\n        method: \"PUT\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          enabled: existingLimit.enabled,\n          token_budget: limit.tokenBudget,\n          period_hours: limit.periodHours,\n        }),\n      }\n    );\n    if (!updateRes.ok) {\n      throw new Error(\n        `Failed to update token rate limit ${existingLimit.token_id}`\n      );\n    }\n  }\n\n  // Create new limits beyond existing count\n  for (let i = toUpdate; i < validLimits.length; i++) {\n    const limit = validLimits[i]!;\n    const createRes = await fetch(\n      `/api/admin/token-rate-limits/user-group/${groupId}`,\n      {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          enabled: true,\n          token_budget: limit.tokenBudget,\n          period_hours: limit.periodHours,\n        }),\n      }\n    );\n    if (!createRes.ok) {\n      throw new Error(\"Failed to create token rate limit\");\n    }\n  }\n\n  // Delete excess existing limits\n  for (let i = toUpdate; i < existing.length; i++) {\n    const existingLimit = existing[i]!;\n    const deleteRes = await fetch(\n      `/api/admin/token-rate-limits/rate-limit/${existingLimit.token_id}`,\n      { method: \"DELETE\" }\n    );\n    if (!deleteRes.ok) {\n      throw new Error(\n        `Failed to delete token rate limit ${existingLimit.token_id}`\n      );\n    }\n  }\n}\n\nexport {\n  renameGroup,\n  createGroup,\n  updateGroup,\n  deleteGroup,\n  updateAgentGroupSharing,\n  updateDocSetGroupSharing,\n  saveTokenLimits,\n};\n"
  },
  {
    "path": "web/src/refresh-pages/admin/GroupsPage/utils.ts",
    "content": "import type { UserGroup } from \"@/lib/types\";\n\n/** Whether this group is a system default group (Admin, Basic). */\nexport function isBuiltInGroup(group: UserGroup): boolean {\n  return group.is_default;\n}\n\n/** Human-readable description for built-in groups. */\nconst BUILT_IN_DESCRIPTIONS: Record<string, string> = {\n  Basic: \"Default group for all users with basic permissions.\",\n  Admin: \"Built-in admin group with full access to manage all permissions.\",\n};\n\n/**\n * Build the description line(s) shown beneath the group name.\n *\n * Built-in groups use a fixed label.\n * Custom groups list resource counts (\"3 connectors · 2 document sets · 2 agents\")\n * or fall back to \"No private connectors / document sets / agents\".\n */\nexport function buildGroupDescription(group: UserGroup): string {\n  if (isBuiltInGroup(group)) {\n    return BUILT_IN_DESCRIPTIONS[group.name] ?? \"\";\n  }\n\n  const parts: string[] = [];\n  if (group.cc_pairs.length > 0) {\n    parts.push(\n      `${group.cc_pairs.length} connector${\n        group.cc_pairs.length !== 1 ? \"s\" : \"\"\n      }`\n    );\n  }\n  if (group.document_sets.length > 0) {\n    parts.push(\n      `${group.document_sets.length} document set${\n        group.document_sets.length !== 1 ? \"s\" : \"\"\n      }`\n    );\n  }\n  if (group.personas.length > 0) {\n    parts.push(\n      `${group.personas.length} agent${group.personas.length !== 1 ? \"s\" : \"\"}`\n    );\n  }\n\n  return parts.length > 0\n    ? parts.join(\" · \")\n    : \"No private connectors / document sets / agents\";\n}\n\n/** Format the member count badge, e.g. \"306 Members\" or \"1 Member\". */\nexport function formatMemberCount(count: number): string {\n  return `${count} ${count === 1 ? \"Member\" : \"Members\"}`;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/ImageGenerationContent.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo, useEffect } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { LLMProviderResponse, LLMProviderView } from \"@/interfaces/llm\";\nimport {\n  IMAGE_PROVIDER_GROUPS,\n  ImageProvider,\n} from \"@/refresh-pages/admin/ImageGenerationPage/constants\";\nimport {\n  ImageGenerationConfigView,\n  setDefaultImageGenerationConfig,\n  unsetDefaultImageGenerationConfig,\n  deleteImageGenerationConfig,\n} from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\nimport { ProviderIcon } from \"@/app/admin/configuration/llm/ProviderIcon\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { Button, SelectCard, Text } from \"@opal/components\";\nimport { Content, CardHeaderLayout } from \"@opal/layouts\";\nimport { Hoverable } from \"@opal/core\";\nimport {\n  SvgArrowExchange,\n  SvgArrowRightCircle,\n  SvgCheckSquare,\n  SvgSettings,\n  SvgSlash,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport { markdown } from \"@opal/utils\";\nimport { getImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms\";\n\nconst NO_DEFAULT_VALUE = \"__none__\";\n\nconst STATUS_TO_STATE = {\n  disconnected: \"empty\",\n  connected: \"filled\",\n  selected: \"selected\",\n} as const;\n\nexport default function ImageGenerationContent() {\n  const {\n    data: llmProviderResponse,\n    error: llmError,\n    mutate: refetchProviders,\n  } = useSWR<LLMProviderResponse<LLMProviderView>>(\n    SWR_KEYS.llmProvidersWithImageGen,\n    errorHandlingFetcher\n  );\n  const llmProviders = llmProviderResponse?.providers ?? [];\n\n  const {\n    data: configs = [],\n    error: configError,\n    mutate: refetchConfigs,\n  } = useSWR<ImageGenerationConfigView[]>(\n    SWR_KEYS.imageGenConfig,\n    errorHandlingFetcher\n  );\n\n  const modal = useCreateModal();\n  const [activeProvider, setActiveProvider] = useState<ImageProvider | null>(\n    null\n  );\n  const [editConfig, setEditConfig] =\n    useState<ImageGenerationConfigView | null>(null);\n  const [disconnectProvider, setDisconnectProvider] =\n    useState<ImageProvider | null>(null);\n  const [replacementProviderId, setReplacementProviderId] = useState<\n    string | null\n  >(null);\n\n  const connectedProviderIds = useMemo(() => {\n    return new Set(configs.map((c) => c.image_provider_id));\n  }, [configs]);\n\n  const defaultConfig = useMemo(() => {\n    return configs.find((c) => c.is_default);\n  }, [configs]);\n\n  const getStatus = (\n    provider: ImageProvider\n  ): \"disconnected\" | \"connected\" | \"selected\" => {\n    if (defaultConfig?.image_provider_id === provider.image_provider_id)\n      return \"selected\";\n    if (connectedProviderIds.has(provider.image_provider_id))\n      return \"connected\";\n    return \"disconnected\";\n  };\n\n  const handleConnect = (provider: ImageProvider) => {\n    setEditConfig(null);\n    setActiveProvider(provider);\n    modal.toggle(true);\n  };\n\n  const handleSelect = async (provider: ImageProvider) => {\n    const config = configs.find(\n      (c) => c.image_provider_id === provider.image_provider_id\n    );\n    if (config) {\n      try {\n        await setDefaultImageGenerationConfig(config.image_provider_id);\n        toast.success(`${provider.title} set as default`);\n        refetchConfigs();\n      } catch (error) {\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to set default\"\n        );\n      }\n    }\n  };\n\n  const handleDeselect = async (provider: ImageProvider) => {\n    const config = configs.find(\n      (c) => c.image_provider_id === provider.image_provider_id\n    );\n    if (config) {\n      try {\n        await unsetDefaultImageGenerationConfig(config.image_provider_id);\n        toast.success(`${provider.title} deselected`);\n        refetchConfigs();\n      } catch (error) {\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to deselect\"\n        );\n      }\n    }\n  };\n\n  const handleEdit = (provider: ImageProvider) => {\n    const config = configs.find(\n      (c) => c.image_provider_id === provider.image_provider_id\n    );\n    setEditConfig(config || null);\n    setActiveProvider(provider);\n    modal.toggle(true);\n  };\n\n  const handleDisconnect = async () => {\n    if (!disconnectProvider) return;\n    try {\n      // If a replacement was selected (not \"No Default\"), activate it first\n      if (replacementProviderId && replacementProviderId !== NO_DEFAULT_VALUE) {\n        await setDefaultImageGenerationConfig(replacementProviderId);\n      }\n\n      await deleteImageGenerationConfig(disconnectProvider.image_provider_id);\n      toast.success(`${disconnectProvider.title} disconnected`);\n      refetchConfigs();\n      refetchProviders();\n    } catch (error) {\n      console.error(\"Failed to disconnect image generation provider:\", error);\n      toast.error(\n        error instanceof Error ? error.message : \"Failed to disconnect\"\n      );\n    } finally {\n      setDisconnectProvider(null);\n      setReplacementProviderId(null);\n    }\n  };\n\n  const handleModalSuccess = () => {\n    toast.success(\"Provider configured successfully\");\n    setEditConfig(null);\n    refetchConfigs();\n    refetchProviders();\n  };\n\n  if (llmError || configError) {\n    return (\n      <div className=\"text-error\">\n        Failed to load configuration. Please refresh the page.\n      </div>\n    );\n  }\n\n  // Compute replacement options when disconnecting an active provider\n  const isDisconnectingDefault =\n    disconnectProvider &&\n    defaultConfig?.image_provider_id === disconnectProvider.image_provider_id;\n\n  // Group connected replacement models by provider (excluding the model being disconnected)\n  const replacementGroups = useMemo(() => {\n    if (!disconnectProvider) return [];\n    return IMAGE_PROVIDER_GROUPS.map((group) => ({\n      ...group,\n      providers: group.providers.filter(\n        (p) =>\n          p.image_provider_id !== disconnectProvider.image_provider_id &&\n          connectedProviderIds.has(p.image_provider_id)\n      ),\n    })).filter((g) => g.providers.length > 0);\n  }, [disconnectProvider, connectedProviderIds]);\n\n  const needsReplacement = !!isDisconnectingDefault;\n  const hasReplacements = replacementGroups.length > 0;\n\n  // Auto-select first replacement when modal opens\n  useEffect(() => {\n    if (needsReplacement && !replacementProviderId && hasReplacements) {\n      const firstGroup = replacementGroups[0];\n      const firstModel = firstGroup?.providers[0];\n      if (firstModel) setReplacementProviderId(firstModel.image_provider_id);\n    }\n  }, [disconnectProvider]); // eslint-disable-line react-hooks/exhaustive-deps\n\n  return (\n    <>\n      <div className=\"flex flex-col gap-4\">\n        <Content\n          title=\"Image Generation Model\"\n          description=\"Select a model to generate images in chat.\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n        />\n\n        {connectedProviderIds.size === 0 && (\n          <Message\n            info\n            static\n            large\n            close={false}\n            text=\"Connect an image generation model to use in chat.\"\n            className=\"w-full\"\n          />\n        )}\n\n        {/* Provider Groups */}\n        {IMAGE_PROVIDER_GROUPS.map((group) => (\n          <div key={group.name} className=\"flex flex-col gap-2\">\n            <Content title={group.name} sizePreset=\"secondary\" variant=\"body\" />\n            {group.providers.map((provider) => {\n              const status = getStatus(provider);\n              const isDisconnected = status === \"disconnected\";\n              const isConnected = status === \"connected\";\n              const isSelected = status === \"selected\";\n\n              return (\n                <Hoverable.Root\n                  key={provider.image_provider_id}\n                  group=\"image-gen/ProviderCard\"\n                >\n                  <SelectCard\n                    state={STATUS_TO_STATE[status]}\n                    padding=\"sm\"\n                    rounding=\"lg\"\n                    aria-label={`image-gen-provider-${provider.image_provider_id}`}\n                    onClick={\n                      isDisconnected\n                        ? () => handleConnect(provider)\n                        : isSelected\n                          ? () => handleDeselect(provider)\n                          : undefined\n                    }\n                  >\n                    <CardHeaderLayout\n                      sizePreset=\"main-ui\"\n                      variant=\"section\"\n                      icon={() => (\n                        <ProviderIcon\n                          provider={provider.provider_name}\n                          size={16}\n                        />\n                      )}\n                      title={provider.title}\n                      description={provider.description}\n                      rightChildren={\n                        isDisconnected ? (\n                          <Button\n                            prominence=\"tertiary\"\n                            rightIcon={SvgArrowExchange}\n                            onClick={(e) => {\n                              e.stopPropagation();\n                              handleConnect(provider);\n                            }}\n                          >\n                            Connect\n                          </Button>\n                        ) : isConnected ? (\n                          <Button\n                            prominence=\"tertiary\"\n                            rightIcon={SvgArrowRightCircle}\n                            onClick={(e) => {\n                              e.stopPropagation();\n                              handleSelect(provider);\n                            }}\n                          >\n                            Set as Default\n                          </Button>\n                        ) : isSelected ? (\n                          <div className=\"p-2\">\n                            <Content\n                              title=\"Current Default\"\n                              sizePreset=\"main-ui\"\n                              variant=\"section\"\n                              icon={SvgCheckSquare}\n                            />\n                          </div>\n                        ) : undefined\n                      }\n                      bottomRightChildren={\n                        !isDisconnected ? (\n                          <div className=\"flex flex-row px-1 pb-1\">\n                            <Hoverable.Item group=\"image-gen/ProviderCard\">\n                              <Button\n                                icon={SvgUnplug}\n                                tooltip=\"Disconnect\"\n                                aria-label={`Disconnect ${provider.title}`}\n                                prominence=\"tertiary\"\n                                onClick={(e) => {\n                                  e.stopPropagation();\n                                  setDisconnectProvider(provider);\n                                }}\n                                size=\"md\"\n                              />\n                            </Hoverable.Item>\n                            <Button\n                              icon={SvgSettings}\n                              tooltip=\"Edit\"\n                              aria-label={`Edit ${provider.title}`}\n                              prominence=\"tertiary\"\n                              onClick={(e) => {\n                                e.stopPropagation();\n                                handleEdit(provider);\n                              }}\n                              size=\"md\"\n                            />\n                          </div>\n                        ) : undefined\n                      }\n                    />\n                  </SelectCard>\n                </Hoverable.Root>\n              );\n            })}\n          </div>\n        ))}\n      </div>\n\n      {disconnectProvider && (\n        <ConfirmationModalLayout\n          icon={SvgUnplug}\n          title={`Disconnect ${disconnectProvider.title}`}\n          description=\"This will remove the stored credentials for this provider.\"\n          onClose={() => {\n            setDisconnectProvider(null);\n            setReplacementProviderId(null);\n          }}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={() => void handleDisconnect()}\n              disabled={\n                needsReplacement && hasReplacements && !replacementProviderId\n              }\n            >\n              Disconnect\n            </Button>\n          }\n        >\n          {needsReplacement ? (\n            hasReplacements ? (\n              <Section alignItems=\"start\">\n                <Text as=\"p\" color=\"text-03\">\n                  {markdown(\n                    `**${disconnectProvider.title}** is currently the default image generation model. Session history will be preserved.`\n                  )}\n                </Text>\n                <Section alignItems=\"start\" gap={0.25}>\n                  <Text as=\"p\" color=\"text-04\">\n                    Set New Default\n                  </Text>\n                  <InputSelect\n                    value={replacementProviderId ?? undefined}\n                    onValueChange={(v) => setReplacementProviderId(v)}\n                  >\n                    <InputSelect.Trigger placeholder=\"Select a replacement model\" />\n                    <InputSelect.Content>\n                      {replacementGroups.map((group) => (\n                        <InputSelect.Group key={group.name}>\n                          <InputSelect.Label>{group.name}</InputSelect.Label>\n                          {group.providers.map((p) => (\n                            <InputSelect.Item\n                              key={p.image_provider_id}\n                              value={p.image_provider_id}\n                              icon={() => (\n                                <ProviderIcon\n                                  provider={p.provider_name}\n                                  size={16}\n                                />\n                              )}\n                            >\n                              {p.title}\n                            </InputSelect.Item>\n                          ))}\n                        </InputSelect.Group>\n                      ))}\n                      <InputSelect.Separator />\n                      <InputSelect.Item\n                        value={NO_DEFAULT_VALUE}\n                        icon={SvgSlash}\n                      >\n                        <span>\n                          <b>No Default</b>\n                          <span className=\"text-text-03\">\n                            {\" \"}\n                            (Disable Image Generation)\n                          </span>\n                        </span>\n                      </InputSelect.Item>\n                    </InputSelect.Content>\n                  </InputSelect>\n                </Section>\n              </Section>\n            ) : (\n              <>\n                <Text as=\"p\" color=\"text-03\">\n                  {markdown(\n                    `**${disconnectProvider.title}** is currently the default image generation model.`\n                  )}\n                </Text>\n                <Text as=\"p\" color=\"text-03\">\n                  Connect another provider to continue using image generation.\n                </Text>\n              </>\n            )\n          ) : (\n            <>\n              <Text as=\"p\" color=\"text-03\">\n                {markdown(\n                  `**${disconnectProvider.title}** models will no longer be used to generate images.`\n                )}\n              </Text>\n              <Text as=\"p\" color=\"text-03\">\n                Session history will be preserved.\n              </Text>\n            </>\n          )}\n        </ConfirmationModalLayout>\n      )}\n\n      {activeProvider && (\n        <modal.Provider>\n          {getImageGenForm({\n            modal: modal,\n            imageProvider: activeProvider,\n            existingProviders: llmProviders,\n            existingConfig: editConfig || undefined,\n            onSuccess: handleModalSuccess,\n          })}\n        </modal.Provider>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/constants.ts",
    "content": "export interface ImageProvider {\n  image_provider_id: string; // Static unique key for UI-DB mapping\n  model_name: string; // Actual model name for LLM API\n  provider_name: string;\n  title: string;\n  description: string;\n}\n\nexport interface ProviderGroup {\n  name: string;\n  providers: ImageProvider[];\n}\n\nexport const IMAGE_PROVIDER_GROUPS: ProviderGroup[] = [\n  {\n    name: \"OpenAI\",\n    providers: [\n      {\n        image_provider_id: \"openai_gpt_image_1_5\",\n        model_name: \"gpt-image-1.5\",\n        provider_name: \"openai\",\n        title: \"GPT Image 1.5\",\n        description:\n          \"OpenAI's latest Image Generation model with the highest prompt fidelity.\",\n      },\n      {\n        image_provider_id: \"openai_gpt_image_1\",\n        model_name: \"gpt-image-1\",\n        provider_name: \"openai\",\n        title: \"GPT Image 1\",\n        description:\n          \"A capable image generation model from OpenAI with strong prompt adherence.\",\n      },\n      {\n        image_provider_id: \"openai_dalle_3\",\n        model_name: \"dall-e-3\",\n        provider_name: \"openai\",\n        title: \"DALL-E 3\",\n        description:\n          \"OpenAI image generation model capable of generating rich and expressive images.\",\n      },\n    ],\n  },\n  {\n    name: \"Azure OpenAI\",\n    providers: [\n      {\n        image_provider_id: \"azure_gpt_image_1_5\",\n        model_name: \"\", // Extracted from deployment in target URI\n        provider_name: \"azure\",\n        title: \"Azure OpenAI GPT Image 1.5\",\n        description:\n          \"GPT Image 1.5 image generation model hosted on Microsoft Azure.\",\n      },\n      {\n        image_provider_id: \"azure_gpt_image_1\",\n        model_name: \"\", // Extracted from deployment in target URI\n        provider_name: \"azure\",\n        title: \"Azure OpenAI GPT Image 1\",\n        description:\n          \"GPT Image 1 image generation model hosted on Microsoft Azure.\",\n      },\n      {\n        image_provider_id: \"azure_dalle_3\",\n        model_name: \"\", // Extracted from deployment in target URI\n        provider_name: \"azure\",\n        title: \"Azure OpenAI DALL-E 3\",\n        description:\n          \"DALL-E 3 image generation model hosted on Microsoft Azure.\",\n      },\n    ],\n  },\n  {\n    name: \"Google Cloud Vertex AI\",\n    providers: [\n      {\n        image_provider_id: \"gemini-2.5-flash-image\",\n        model_name: \"gemini-2.5-flash-image\",\n        provider_name: \"vertex_ai\",\n        title: \"Gemini 2.5 Flash Image\",\n        description:\n          \"Gemini 2.5 Flash Image (Nano Banana) model is designed for speed and efficiency.\",\n      },\n      {\n        image_provider_id: \"gemini-3-pro-image-preview\",\n        model_name: \"gemini-3-pro-image-preview\",\n        provider_name: \"vertex_ai\",\n        title: \"Gemini 3 Pro Image Preview\",\n        description:\n          \"Gemini 3 Pro Image Preview (Nano Banana Pro) is designed for professional asset production.\",\n      },\n    ],\n  },\n];\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport * as Yup from \"yup\";\nimport { FormikField } from \"@/refresh-components/form/FormikField\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { ImageGenFormWrapper } from \"@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper\";\nimport {\n  ImageGenFormBaseProps,\n  ImageGenFormChildProps,\n  ImageGenSubmitPayload,\n} from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nimport { ImageGenerationCredentials } from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\nimport { ImageProvider } from \"@/refresh-pages/admin/ImageGenerationPage/constants\";\nimport {\n  parseAzureTargetUri,\n  isValidAzureTargetUri,\n} from \"@/lib/azureTargetUri\";\n\n// Azure form values - target URI and API key\ninterface AzureFormValues {\n  target_uri: string;\n  api_key: string;\n}\n\nconst initialValues: AzureFormValues = {\n  target_uri: \"\",\n  api_key: \"\",\n};\n\nconst validationSchema = Yup.object().shape({\n  target_uri: Yup.string()\n    .required(\"Target URI is required\")\n    .test(\n      \"valid-target-uri\",\n      \"Target URI must be a valid URL with api-version and deployment name\",\n      (value) => (value ? isValidAzureTargetUri(value) : false)\n    ),\n  api_key: Yup.string().required(\"API Key is required\"),\n});\n\nfunction AzureFormFields(props: ImageGenFormChildProps<AzureFormValues>) {\n  const {\n    formikProps,\n    apiStatus,\n    showApiMessage,\n    errorMessage,\n    disabled,\n    isLoadingCredentials,\n    apiKeyOptions,\n    resetApiState,\n    imageProvider,\n  } = props;\n\n  return (\n    <>\n      {/* Target URI field */}\n      <FormikField<string>\n        name=\"target_uri\"\n        render={(field, helper, meta, state) => (\n          <FormField name=\"target_uri\" state={state} className=\"w-full\">\n            <FormField.Label>Target URI</FormField.Label>\n            <FormField.Control>\n              <InputTypeIn\n                {...field}\n                placeholder=\"https://your-resource.cognitiveservices.azure.com/openai/deployments/deployment-name/images/generations?api-version=2025-01-01-preview\"\n                showClearButton={false}\n                variant={disabled ? \"disabled\" : undefined}\n              />\n            </FormField.Control>\n            <FormField.Message\n              messages={{\n                idle: (\n                  <>\n                    Paste your endpoint target URI from{\" \"}\n                    <a\n                      href=\"https://oai.azure.com\"\n                      target=\"_blank\"\n                      rel=\"noopener noreferrer\"\n                      className=\"underline\"\n                    >\n                      Azure OpenAI\n                    </a>{\" \"}\n                    (including API endpoint base, deployment name, and API\n                    version).\n                  </>\n                ),\n                error: meta.error,\n              }}\n            />\n          </FormField>\n        )}\n      />\n\n      {/* API Key field */}\n      <FormikField<string>\n        name=\"api_key\"\n        render={(field, helper, meta, state) => (\n          <FormField\n            name=\"api_key\"\n            state={apiStatus === \"error\" ? \"error\" : state}\n            className=\"w-full\"\n          >\n            <FormField.Label>API Key</FormField.Label>\n            <FormField.Control>\n              {apiKeyOptions.length > 0 ? (\n                <InputComboBox\n                  value={field.value}\n                  onChange={(e) => {\n                    helper.setValue(e.target.value);\n                    resetApiState();\n                  }}\n                  onValueChange={(value) => {\n                    helper.setValue(value);\n                    resetApiState();\n                  }}\n                  onBlur={field.onBlur}\n                  options={apiKeyOptions}\n                  placeholder={\n                    isLoadingCredentials\n                      ? \"Loading...\"\n                      : \"Enter new API key or select existing provider\"\n                  }\n                  disabled={disabled || !formikProps.values.target_uri?.trim()}\n                  isError={apiStatus === \"error\"}\n                />\n              ) : (\n                <PasswordInputTypeIn\n                  {...field}\n                  onChange={(e) => {\n                    field.onChange(e);\n                    resetApiState();\n                  }}\n                  placeholder={\n                    isLoadingCredentials ? \"Loading...\" : \"Enter your API key\"\n                  }\n                  showClearButton={false}\n                  disabled={disabled || !formikProps.values.target_uri?.trim()}\n                  error={apiStatus === \"error\"}\n                />\n              )}\n            </FormField.Control>\n            {showApiMessage ? (\n              <FormField.APIMessage\n                state={apiStatus}\n                messages={{\n                  loading: `Testing API key with ${imageProvider.title}...`,\n                  success: \"API key is valid. Configuration saved.\",\n                  error: errorMessage || \"Invalid API key\",\n                }}\n              />\n            ) : (\n              <FormField.Message\n                messages={{\n                  idle: (\n                    <>\n                      {\"Paste your \"}\n                      <a\n                        href=\"https://oai.azure.com\"\n                        target=\"_blank\"\n                        rel=\"noopener noreferrer\"\n                        className=\"underline\"\n                      >\n                        API key\n                      </a>\n                      {\" from Azure OpenAI to access your models.\"}\n                    </>\n                  ),\n                  error: meta.error,\n                }}\n              />\n            )}\n          </FormField>\n        )}\n      />\n    </>\n  );\n}\n\nfunction getInitialValuesFromCredentials(\n  credentials: ImageGenerationCredentials,\n  imageProvider: ImageProvider\n): Partial<AzureFormValues> {\n  // Reconstruct target_uri from credentials\n  let targetUri = \"\";\n  if (credentials.api_base && credentials.api_version) {\n    const deployment = credentials.deployment_name || imageProvider.model_name;\n    targetUri = `${credentials.api_base}/openai/deployments/${deployment}/images/generations?api-version=${credentials.api_version}`;\n  }\n\n  return {\n    api_key: credentials.api_key || \"\",\n    target_uri: targetUri,\n  };\n}\n\nfunction transformValues(\n  values: AzureFormValues,\n  imageProvider: ImageProvider\n): ImageGenSubmitPayload {\n  // Parse target_uri to extract api_base, api_version, deployment_name\n  let apiBase: string | undefined;\n  let apiVersion: string | undefined;\n  let deploymentName: string | undefined;\n  let modelName = imageProvider.model_name;\n\n  if (values.target_uri) {\n    try {\n      const parsed = parseAzureTargetUri(values.target_uri);\n      apiBase = parsed.url.origin;\n      apiVersion = parsed.apiVersion;\n      deploymentName = parsed.deploymentName || undefined;\n      // For Azure, use deployment name as model name\n      modelName = deploymentName || imageProvider.model_name;\n    } catch (error) {\n      console.error(\"Failed to parse target_uri:\", error);\n    }\n  }\n\n  return {\n    modelName,\n    imageProviderId: imageProvider.image_provider_id,\n    provider: \"azure\",\n    apiKey: values.api_key,\n    apiBase,\n    apiVersion,\n    deploymentName,\n  };\n}\n\nexport function AzureImageGenForm(props: ImageGenFormBaseProps) {\n  const { imageProvider, existingConfig } = props;\n\n  return (\n    <ImageGenFormWrapper<AzureFormValues>\n      {...props}\n      title={\n        existingConfig\n          ? `Edit ${imageProvider.title}`\n          : `Connect ${imageProvider.title}`\n      }\n      description={imageProvider.description}\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      getInitialValuesFromCredentials={getInitialValuesFromCredentials}\n      transformValues={(values) => transformValues(values, imageProvider)}\n    >\n      {(childProps) => <AzureFormFields {...childProps} />}\n    </ImageGenFormWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useMemo, useEffect } from \"react\";\nimport { Form, Formik, FormikProps } from \"formik\";\nimport ProviderModal from \"@/components/modals/ProviderModal\";\nimport { ProviderIcon } from \"@/app/admin/configuration/llm/ProviderIcon\";\nimport ConnectionProviderIcon from \"@/refresh-components/ConnectionProviderIcon\";\nimport {\n  testImageGenerationApiKey,\n  createImageGenerationConfig,\n  updateImageGenerationConfig,\n  fetchImageGenerationCredentials,\n} from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\nimport { APIFormFieldState } from \"@/refresh-components/form/types\";\nimport {\n  ImageGenFormWrapperProps,\n  ImageGenFormChildProps,\n  ImageGenSubmitPayload,\n  FormValues,\n} from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nimport { toast } from \"@/hooks/useToast\";\n\nexport function ImageGenFormWrapper<T extends FormValues>({\n  modal,\n  imageProvider,\n  existingProviders,\n  existingConfig,\n  onSuccess,\n  title,\n  description,\n  initialValues,\n  validationSchema,\n  children,\n  transformValues,\n  getInitialValuesFromCredentials,\n}: ImageGenFormWrapperProps<T>) {\n  // State management\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [apiStatus, setApiStatus] = useState<APIFormFieldState>(\"idle\");\n  const [showApiMessage, setShowApiMessage] = useState(false);\n  const [errorMessage, setErrorMessage] = useState(\"\");\n  const [isLoadingCredentials, setIsLoadingCredentials] = useState(false);\n\n  // Track merged initial values with fetched credentials\n  const [mergedInitialValues, setMergedInitialValues] =\n    useState<T>(initialValues);\n\n  const isEditMode = !!existingConfig;\n\n  // Compute API key options from existing providers matching this image provider\n  // API keys from LLM providers are already masked by backend (first 4 + **** + last 4)\n  const apiKeyOptions = useMemo(() => {\n    return existingProviders\n      .filter((p) => p.provider === imageProvider.provider_name)\n      .map((provider) => ({\n        value: `existing:${provider.id}:${provider.name}`,\n        label: provider.api_key || \"****\",\n      }));\n  }, [existingProviders, imageProvider.provider_name]);\n\n  // Fetch credentials when modal opens in edit mode\n  useEffect(() => {\n    if (existingConfig && modal.isOpen) {\n      setIsLoadingCredentials(true);\n      fetchImageGenerationCredentials(existingConfig.image_provider_id)\n        .then((creds) => {\n          if (getInitialValuesFromCredentials) {\n            const credValues = getInitialValuesFromCredentials(\n              creds,\n              imageProvider\n            );\n            setMergedInitialValues((prev) => ({ ...prev, ...credValues }));\n          }\n        })\n        .catch((err) => {\n          console.error(\"Failed to fetch credentials:\", err);\n        })\n        .finally(() => {\n          setIsLoadingCredentials(false);\n        });\n    } else if (!modal.isOpen) {\n      // Reset when modal closes\n      setMergedInitialValues(initialValues);\n      setApiStatus(\"idle\");\n      setShowApiMessage(false);\n      setErrorMessage(\"\");\n    }\n  }, [\n    existingConfig,\n    modal.isOpen,\n    getInitialValuesFromCredentials,\n    imageProvider,\n    initialValues,\n  ]);\n\n  // Close modal after successful connection (1 second delay)\n  useEffect(() => {\n    if (apiStatus === \"success\" && !isSubmitting) {\n      const timer = setTimeout(() => {\n        onSuccess();\n        modal.toggle(false);\n      }, 1000);\n      return () => clearTimeout(timer);\n    }\n  }, [apiStatus, isSubmitting, modal, onSuccess]);\n\n  const resetApiState = () => {\n    if (showApiMessage) {\n      setShowApiMessage(false);\n      setApiStatus(\"idle\");\n      setErrorMessage(\"\");\n    }\n  };\n\n  const handleSubmit = async (values: T) => {\n    setIsSubmitting(true);\n    setShowApiMessage(true);\n    setApiStatus(\"loading\");\n\n    try {\n      // Get the submit payload from transformValues or use defaults\n      const payload: ImageGenSubmitPayload = transformValues\n        ? transformValues(values)\n        : {\n            modelName: imageProvider.model_name,\n            imageProviderId: imageProvider.image_provider_id,\n            provider: imageProvider.provider_name,\n            apiKey: (values as Record<string, unknown>).api_key as\n              | string\n              | undefined,\n          };\n\n      // Check if user selected existing provider (clone mode)\n      const apiKeyValue = (values as Record<string, unknown>).api_key as string;\n      const isCloneMode = apiKeyValue?.startsWith(\"existing:\");\n\n      if (isCloneMode) {\n        const parts = apiKeyValue.split(\":\");\n        const providerIdStr = parts[1];\n        if (!providerIdStr) {\n          throw new Error(\"Invalid provider selection\");\n        }\n        const providerId = parseInt(providerIdStr, 10);\n\n        // Test API key from existing provider before creating config\n        const result = await testImageGenerationApiKey(payload.modelName, {\n          sourceLlmProviderId: providerId,\n          apiBase: payload.apiBase,\n          apiVersion: payload.apiVersion,\n          deploymentName: payload.deploymentName,\n          customConfig: payload.customConfig,\n        });\n\n        if (!result.ok) {\n          setApiStatus(\"error\");\n          setErrorMessage(result.errorMessage || \"API key validation failed\");\n          setIsSubmitting(false);\n          return;\n        }\n\n        // Test passed - now create/update config\n        if (isEditMode && existingConfig) {\n          await updateImageGenerationConfig(existingConfig.image_provider_id, {\n            modelName: payload.modelName,\n            sourceLlmProviderId: providerId,\n            apiBase: payload.apiBase,\n            apiVersion: payload.apiVersion,\n            deploymentName: payload.deploymentName,\n            customConfig: payload.customConfig,\n          });\n        } else {\n          await createImageGenerationConfig({\n            imageProviderId: payload.imageProviderId,\n            modelName: payload.modelName,\n            sourceLlmProviderId: providerId,\n            apiBase: payload.apiBase,\n            apiVersion: payload.apiVersion,\n            deploymentName: payload.deploymentName,\n            customConfig: payload.customConfig,\n            isDefault: true,\n          });\n        }\n      } else {\n        // New credentials mode - check if API key was changed from masked value\n        // A masked key contains \"****\", so if present, user hasn't entered a new key\n        const apiKeyChanged = !apiKeyValue?.includes(\"****\");\n\n        // Test the API key first (only if changed or creating new config)\n        if (apiKeyChanged) {\n          const result = await testImageGenerationApiKey(payload.modelName, {\n            provider: payload.provider,\n            apiKey: payload.apiKey,\n            apiBase: payload.apiBase,\n            apiVersion: payload.apiVersion,\n            deploymentName: payload.deploymentName,\n            customConfig: payload.customConfig,\n          });\n\n          if (!result.ok) {\n            setApiStatus(\"error\");\n            setErrorMessage(result.errorMessage || \"API key validation failed\");\n            setIsSubmitting(false);\n            return;\n          }\n        }\n\n        // Create or update config\n        if (isEditMode && existingConfig) {\n          await updateImageGenerationConfig(existingConfig.image_provider_id, {\n            modelName: payload.modelName,\n            provider: payload.provider,\n            apiKey: payload.apiKey,\n            apiBase: payload.apiBase,\n            apiVersion: payload.apiVersion,\n            deploymentName: payload.deploymentName,\n            customConfig: payload.customConfig,\n            apiKeyChanged,\n          });\n        } else {\n          await createImageGenerationConfig({\n            imageProviderId: payload.imageProviderId,\n            modelName: payload.modelName,\n            provider: payload.provider,\n            apiKey: payload.apiKey,\n            apiBase: payload.apiBase,\n            apiVersion: payload.apiVersion,\n            deploymentName: payload.deploymentName,\n            customConfig: payload.customConfig,\n            isDefault: true,\n          });\n        }\n      }\n\n      setApiStatus(\"success\");\n      setErrorMessage(\"\");\n      setIsSubmitting(false);\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Unknown error occurred\";\n      setApiStatus(\"error\");\n      setErrorMessage(message);\n      toast.error(message);\n      setIsSubmitting(false);\n    }\n  };\n\n  const icon = () => (\n    <ConnectionProviderIcon\n      icon={<ProviderIcon provider={imageProvider.provider_name} size={24} />}\n    />\n  );\n\n  // Create child props\n  const createChildProps = (\n    formikProps: FormikProps<T>\n  ): ImageGenFormChildProps<T> => ({\n    formikProps,\n    apiStatus,\n    setApiStatus,\n    showApiMessage,\n    setShowApiMessage,\n    errorMessage,\n    setErrorMessage,\n    isSubmitting,\n    disabled: isSubmitting || isLoadingCredentials,\n    isEditMode,\n    isLoadingCredentials,\n    apiKeyOptions,\n    resetApiState,\n    imageProvider,\n  });\n\n  return (\n    <Formik<T>\n      initialValues={mergedInitialValues}\n      onSubmit={handleSubmit}\n      validationSchema={validationSchema}\n      enableReinitialize\n    >\n      {(formikProps) => {\n        const childProps = createChildProps(formikProps);\n\n        return (\n          <ProviderModal\n            open={modal.isOpen}\n            onOpenChange={modal.toggle}\n            title={title}\n            description={description}\n            icon={icon}\n            onSubmit={formikProps.submitForm}\n            submitDisabled={\n              !formikProps.isValid ||\n              (!isEditMode && !formikProps.dirty) ||\n              isSubmitting\n            }\n            isSubmitting={isSubmitting}\n          >\n            <Form className=\"flex flex-col gap-0 bg-background-tint-01 w-full\">\n              <div className=\"flex flex-col gap-4 w-full\">\n                {children(childProps)}\n              </div>\n            </Form>\n          </ProviderModal>\n        );\n      }}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport * as Yup from \"yup\";\nimport { FormikField } from \"@/refresh-components/form/FormikField\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { ImageGenFormWrapper } from \"@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper\";\nimport {\n  ImageGenFormBaseProps,\n  ImageGenFormChildProps,\n  ImageGenSubmitPayload,\n} from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nimport { ImageGenerationCredentials } from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\nimport { ImageProvider } from \"@/refresh-pages/admin/ImageGenerationPage/constants\";\n\n// OpenAI form values - just API key\ninterface OpenAIFormValues {\n  api_key: string;\n}\n\nconst initialValues: OpenAIFormValues = {\n  api_key: \"\",\n};\n\nconst validationSchema = Yup.object().shape({\n  api_key: Yup.string().required(\"API Key is required\"),\n});\n\nfunction OpenAIFormFields(props: ImageGenFormChildProps<OpenAIFormValues>) {\n  const {\n    apiStatus,\n    showApiMessage,\n    errorMessage,\n    disabled,\n    isLoadingCredentials,\n    apiKeyOptions,\n    resetApiState,\n    imageProvider,\n  } = props;\n\n  return (\n    <FormikField<string>\n      name=\"api_key\"\n      render={(field, helper, meta, state) => (\n        <FormField\n          name=\"api_key\"\n          state={apiStatus === \"error\" ? \"error\" : state}\n          className=\"w-full\"\n        >\n          <FormField.Label>API Key</FormField.Label>\n          <FormField.Control>\n            {apiKeyOptions.length > 0 ? (\n              <InputComboBox\n                value={field.value}\n                onChange={(e) => {\n                  helper.setValue(e.target.value);\n                  resetApiState();\n                }}\n                onValueChange={(value) => {\n                  helper.setValue(value);\n                  resetApiState();\n                }}\n                onBlur={field.onBlur}\n                options={apiKeyOptions}\n                placeholder={\n                  isLoadingCredentials\n                    ? \"Loading...\"\n                    : \"Enter new API key or select existing provider\"\n                }\n                disabled={disabled}\n                isError={apiStatus === \"error\"}\n              />\n            ) : (\n              <PasswordInputTypeIn\n                {...field}\n                onChange={(e) => {\n                  field.onChange(e);\n                  resetApiState();\n                }}\n                placeholder={\n                  isLoadingCredentials ? \"Loading...\" : \"Enter your API key\"\n                }\n                showClearButton={false}\n                disabled={disabled}\n                error={apiStatus === \"error\"}\n              />\n            )}\n          </FormField.Control>\n          {showApiMessage ? (\n            <FormField.APIMessage\n              state={apiStatus}\n              messages={{\n                loading: `Testing API key with ${imageProvider.title}...`,\n                success: \"API key is valid. Configuration saved.\",\n                error: errorMessage || \"Invalid API key\",\n              }}\n            />\n          ) : (\n            <FormField.Message\n              messages={{\n                idle: \"Enter a new API key or select an existing provider.\",\n                error: meta.error,\n              }}\n            />\n          )}\n        </FormField>\n      )}\n    />\n  );\n}\n\nfunction getInitialValuesFromCredentials(\n  credentials: ImageGenerationCredentials,\n  _imageProvider: ImageProvider\n): Partial<OpenAIFormValues> {\n  return {\n    api_key: credentials.api_key || \"\",\n  };\n}\n\nfunction transformValues(\n  values: OpenAIFormValues,\n  imageProvider: ImageProvider\n): ImageGenSubmitPayload {\n  return {\n    modelName: imageProvider.model_name,\n    imageProviderId: imageProvider.image_provider_id,\n    provider: \"openai\",\n    apiKey: values.api_key,\n  };\n}\n\nexport function OpenAIImageGenForm(props: ImageGenFormBaseProps) {\n  const { imageProvider, existingConfig } = props;\n\n  return (\n    <ImageGenFormWrapper<OpenAIFormValues>\n      {...props}\n      title={\n        existingConfig\n          ? `Edit ${imageProvider.title}`\n          : `Connect ${imageProvider.title}`\n      }\n      description={imageProvider.description}\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      getInitialValuesFromCredentials={getInitialValuesFromCredentials}\n      transformValues={(values) => transformValues(values, imageProvider)}\n    >\n      {(childProps) => <OpenAIFormFields {...childProps} />}\n    </ImageGenFormWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/VertexImageGenForm.tsx",
    "content": "\"use client\";\n\nimport * as Yup from \"yup\";\nimport { FormikField } from \"@/refresh-components/form/FormikField\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputFile from \"@/refresh-components/inputs/InputFile\";\nimport InlineExternalLink from \"@/refresh-components/InlineExternalLink\";\nimport { ImageGenFormWrapper } from \"@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper\";\nimport {\n  ImageGenFormBaseProps,\n  ImageGenFormChildProps,\n  ImageGenSubmitPayload,\n} from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nimport { ImageProvider } from \"@/refresh-pages/admin/ImageGenerationPage/constants\";\nimport { ImageGenerationCredentials } from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\n\nconst VERTEXAI_PROVIDER_NAME = \"vertex_ai\";\nconst VERTEXAI_DEFAULT_LOCATION = \"global\";\n\n// Vertex form values\ninterface VertexImageGenFormValues {\n  custom_config: {\n    vertex_credentials: string;\n    vertex_location: string;\n  };\n}\n\nconst initialValues: VertexImageGenFormValues = {\n  custom_config: {\n    vertex_credentials: \"\",\n    vertex_location: VERTEXAI_DEFAULT_LOCATION,\n  },\n};\n\nconst validationSchema = Yup.object().shape({\n  custom_config: Yup.object().shape({\n    vertex_credentials: Yup.string().required(\"Credentials file is required\"),\n    vertex_location: Yup.string().required(\"Location is required\"),\n  }),\n});\n\nfunction getInitialValuesFromCredentials(\n  credentials: ImageGenerationCredentials,\n  _imageProvider: ImageProvider\n): Partial<VertexImageGenFormValues> {\n  return {\n    custom_config: {\n      vertex_credentials: credentials.custom_config?.vertex_credentials || \"\",\n      vertex_location:\n        credentials.custom_config?.vertex_location || VERTEXAI_DEFAULT_LOCATION,\n    },\n  };\n}\n\nfunction transformValues(\n  values: VertexImageGenFormValues,\n  imageProvider: ImageProvider\n): ImageGenSubmitPayload {\n  return {\n    modelName: imageProvider.model_name,\n    imageProviderId: imageProvider.image_provider_id,\n    provider: VERTEXAI_PROVIDER_NAME,\n    customConfig: {\n      vertex_credentials: values.custom_config.vertex_credentials,\n      vertex_location: values.custom_config.vertex_location,\n    },\n  };\n}\n\nfunction VertexFormFields(\n  props: ImageGenFormChildProps<VertexImageGenFormValues>\n) {\n  const { apiStatus, showApiMessage, errorMessage, disabled, imageProvider } =\n    props;\n\n  return (\n    <>\n      {/* Credentials File field */}\n      <FormikField<string>\n        name=\"custom_config.vertex_credentials\"\n        render={(field, helper, meta, state) => (\n          <FormField\n            name=\"custom_config.vertex_credentials\"\n            state={apiStatus === \"error\" ? \"error\" : state}\n            className=\"w-full\"\n          >\n            <FormField.Label>Credentials File</FormField.Label>\n            <FormField.Control>\n              <InputFile\n                setValue={(value) => helper.setValue(value)}\n                error={apiStatus === \"error\"}\n                onBlur={field.onBlur}\n                showClearButton={true}\n                disabled={disabled}\n                accept=\"application/json\"\n                placeholder=\"Upload or paste your credentials\"\n              />\n            </FormField.Control>\n            {showApiMessage ? (\n              <FormField.APIMessage\n                state={apiStatus}\n                messages={{\n                  loading: `Testing credentials with ${imageProvider.title}...`,\n                  success: \"Credentials valid. Configuration saved.\",\n                  error: errorMessage || \"Invalid credentials\",\n                }}\n              />\n            ) : (\n              <FormField.Message\n                messages={{\n                  idle: (\n                    <>\n                      {\"Upload or paste your \"}\n                      <InlineExternalLink href=\"https://console.cloud.google.com/projectselector2/iam-admin/serviceaccounts?supportedpurview=project\">\n                        service account credentials\n                      </InlineExternalLink>\n                      {\" from Google Cloud.\"}\n                    </>\n                  ),\n                  error: meta.error,\n                }}\n              />\n            )}\n          </FormField>\n        )}\n      />\n\n      {/* Location field */}\n      <FormikField<string>\n        name=\"custom_config.vertex_location\"\n        render={(field, helper, meta, state) => (\n          <FormField\n            name=\"custom_config.vertex_location\"\n            state={state}\n            className=\"w-full\"\n          >\n            <FormField.Label>Location</FormField.Label>\n            <FormField.Control>\n              <InputTypeIn\n                value={field.value}\n                onChange={(e) => helper.setValue(e.target.value)}\n                onBlur={field.onBlur}\n                placeholder=\"global\"\n                showClearButton={false}\n                variant={disabled ? \"disabled\" : undefined}\n              />\n            </FormField.Control>\n            <FormField.Message\n              messages={{\n                idle: (\n                  <>\n                    {\"The Google Cloud region for your Vertex AI models. See \"}\n                    <InlineExternalLink href=\"https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations\">\n                      Google&apos;s documentation\n                    </InlineExternalLink>\n                    {\" for available regions.\"}\n                  </>\n                ),\n                error: meta.error,\n              }}\n            />\n          </FormField>\n        )}\n      />\n    </>\n  );\n}\n\nexport function VertexImageGenForm(props: ImageGenFormBaseProps) {\n  const { imageProvider, existingConfig } = props;\n\n  return (\n    <ImageGenFormWrapper<VertexImageGenFormValues>\n      {...props}\n      title={\n        existingConfig\n          ? `Edit ${imageProvider.title}`\n          : `Connect ${imageProvider.title}`\n      }\n      description={imageProvider.description}\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      getInitialValuesFromCredentials={getInitialValuesFromCredentials}\n      transformValues={(values) => transformValues(values, imageProvider)}\n    >\n      {(childProps) => <VertexFormFields {...childProps} />}\n    </ImageGenFormWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/getImageGenForm.tsx",
    "content": "import React from \"react\";\nimport { ImageGenFormBaseProps } from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nimport { OpenAIImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm\";\nimport { AzureImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm\";\nimport { VertexImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/VertexImageGenForm\";\n\n/**\n * Factory function that routes to the correct provider-specific form\n * based on the imageProvider.provider_name.\n */\nexport function getImageGenForm(props: ImageGenFormBaseProps): React.ReactNode {\n  const providerName = props.imageProvider.provider_name;\n\n  switch (providerName) {\n    case \"openai\":\n      return <OpenAIImageGenForm {...props} />;\n    case \"azure\":\n      return <AzureImageGenForm {...props} />;\n    case \"vertex_ai\":\n      return <VertexImageGenForm {...props} />;\n    default:\n      // Fallback to OpenAI form for unknown providers\n      console.warn(\n        `Unknown image provider: ${providerName}, falling back to OpenAI form`\n      );\n      return <OpenAIImageGenForm {...props} />;\n  }\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/index.ts",
    "content": "export * from \"@/refresh-pages/admin/ImageGenerationPage/forms/types\";\nexport { ImageGenFormWrapper } from \"@/refresh-pages/admin/ImageGenerationPage/forms/ImageGenFormWrapper\";\nexport { OpenAIImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/OpenAIImageGenForm\";\nexport { AzureImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/AzureImageGenForm\";\nexport { getImageGenForm } from \"@/refresh-pages/admin/ImageGenerationPage/forms/getImageGenForm\";\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/forms/types.ts",
    "content": "import { FormikProps } from \"formik\";\nimport { ImageProvider } from \"@/refresh-pages/admin/ImageGenerationPage/constants\";\nimport { LLMProviderView } from \"@/interfaces/llm\";\nimport {\n  ImageGenerationConfigView,\n  ImageGenerationCredentials,\n} from \"@/refresh-pages/admin/ImageGenerationPage/svc\";\nimport { ModalCreationInterface } from \"@/refresh-components/contexts/ModalContext\";\nimport { APIFormFieldState } from \"@/refresh-components/form/types\";\n\n// Base props for all image generation forms\nexport interface ImageGenFormBaseProps {\n  modal: ModalCreationInterface;\n  imageProvider: ImageProvider;\n  existingProviders: LLMProviderView[];\n  existingConfig?: ImageGenerationConfigView;\n  onSuccess: () => void;\n}\n\n// Base type for form values - allows any object structure\n// eslint-disable-next-line @typescript-eslint/no-explicit-any\nexport type FormValues = Record<string, any>;\n\n// Props for the generic wrapper component\nexport interface ImageGenFormWrapperProps<T extends FormValues>\n  extends ImageGenFormBaseProps {\n  title: string;\n  description: string;\n  initialValues: T;\n  validationSchema: unknown;\n  children: (props: ImageGenFormChildProps<T>) => React.ReactNode;\n  transformValues?: (values: T) => ImageGenSubmitPayload;\n  getInitialValuesFromCredentials?: (\n    credentials: ImageGenerationCredentials,\n    imageProvider: ImageProvider\n  ) => Partial<T>;\n}\n\n// Props passed to form field children\nexport interface ImageGenFormChildProps<T extends FormValues> {\n  formikProps: FormikProps<T>;\n  apiStatus: APIFormFieldState;\n  setApiStatus: (status: APIFormFieldState) => void;\n  showApiMessage: boolean;\n  setShowApiMessage: (show: boolean) => void;\n  errorMessage: string;\n  setErrorMessage: (message: string) => void;\n  isSubmitting: boolean;\n  disabled: boolean;\n  isEditMode: boolean;\n  isLoadingCredentials: boolean;\n  apiKeyOptions: { value: string; label: string }[];\n  resetApiState: () => void;\n  imageProvider: ImageProvider;\n}\n\n// Payload for submitting image generation config\nexport interface ImageGenSubmitPayload {\n  modelName: string;\n  imageProviderId: string;\n  isDefault?: boolean;\n\n  // Clone mode - reuse credentials from existing LLM provider\n  sourceLlmProviderId?: number;\n\n  // New credentials mode\n  provider?: string;\n  apiKey?: string;\n  apiBase?: string;\n  apiVersion?: string;\n  deploymentName?: string;\n  customConfig?: Record<string, string>;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/index.tsx",
    "content": "\"use client\";\n\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport ImageGenerationContent from \"@/refresh-pages/admin/ImageGenerationPage/ImageGenerationContent\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\nconst route = ADMIN_ROUTES.IMAGE_GENERATION;\n\nexport default function ImageGenerationPage() {\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description=\"Settings for in-chat image generation.\"\n        separator\n      />\n      <SettingsLayouts.Body>\n        <ImageGenerationContent />\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ImageGenerationPage/svc.ts",
    "content": "/**\n * Image Generation Configuration Service\n * API functions for managing image generation configurations\n */\n\n// Types\nexport interface ImageGenerationConfigView {\n  image_provider_id: string; // Primary key\n  model_configuration_id: number;\n  model_name: string;\n  llm_provider_id: number;\n  llm_provider_name: string;\n  is_default: boolean;\n}\n\nexport interface TestApiKeyResult {\n  ok: boolean;\n  errorMessage?: string;\n}\n\nexport interface ImageGenerationCredentials {\n  api_key: string | null;\n  api_base: string | null;\n  api_version: string | null;\n  deployment_name: string | null;\n  custom_config: Record<string, string> | null;\n}\n\n// Creation options - either clone from existing provider or use new credentials\nexport interface ImageGenerationConfigCreateOptions {\n  imageProviderId: string;\n  modelName: string;\n  isDefault?: boolean;\n\n  // Option 1: Clone mode - use credentials from existing provider\n  sourceLlmProviderId?: number;\n\n  // Option 2: New credentials mode\n  provider?: string;\n  apiKey?: string;\n  apiBase?: string;\n  apiVersion?: string;\n  deploymentName?: string;\n  customConfig?: Record<string, string>;\n}\n\n// API Endpoints\nconst IMAGE_GEN_CONFIG_URL = \"/api/admin/image-generation/config\";\nconst IMAGE_GEN_TEST_URL = \"/api/admin/image-generation/test\";\n\n/**\n * Test API key for image generation provider\n *\n * Two modes:\n * 1. Direct: provider + apiKey provided\n * 2. From existing provider: sourceLlmProviderId provided (backend fetches API key)\n */\nexport async function testImageGenerationApiKey(\n  modelName: string,\n  options: {\n    // Option 1: Direct API key\n    provider?: string;\n    apiKey?: string;\n    // Option 2: Use existing provider\n    sourceLlmProviderId?: number;\n    // Additional fields\n    apiBase?: string;\n    apiVersion?: string;\n    deploymentName?: string;\n    customConfig?: Record<string, string>;\n  }\n): Promise<TestApiKeyResult> {\n  try {\n    const response = await fetch(IMAGE_GEN_TEST_URL, {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify({\n        model_name: modelName,\n        provider: options.provider || null,\n        api_key: options.apiKey || null,\n        source_llm_provider_id: options.sourceLlmProviderId || null,\n        api_base: options.apiBase || null,\n        api_version: options.apiVersion || null,\n        deployment_name: options.deploymentName || null,\n        custom_config: options.customConfig || null,\n      }),\n    });\n\n    if (!response.ok) {\n      const error = await response.json();\n      return {\n        ok: false,\n        errorMessage: error.detail || \"API key validation failed\",\n      };\n    }\n\n    return { ok: true };\n  } catch (error) {\n    return {\n      ok: false,\n      errorMessage:\n        error instanceof Error ? error.message : \"An error occurred\",\n    };\n  }\n}\n\n/**\n * Fetch all image generation configurations\n */\nexport async function fetchImageGenerationConfigs(): Promise<\n  ImageGenerationConfigView[]\n> {\n  const response = await fetch(IMAGE_GEN_CONFIG_URL);\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch image generation configs\");\n  }\n  return response.json();\n}\n\n/**\n * Fetch credentials for an image generation config (for edit mode)\n */\nexport async function fetchImageGenerationCredentials(\n  imageProviderId: string\n): Promise<ImageGenerationCredentials> {\n  const response = await fetch(\n    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/credentials`\n  );\n  if (!response.ok) {\n    throw new Error(\"Failed to fetch credentials\");\n  }\n  return response.json();\n}\n\n/**\n * Create image generation configuration\n * Backend creates new LLM provider + model config + image config\n */\nexport async function createImageGenerationConfig(\n  options: ImageGenerationConfigCreateOptions\n): Promise<ImageGenerationConfigView> {\n  const response = await fetch(IMAGE_GEN_CONFIG_URL, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      image_provider_id: options.imageProviderId,\n      model_name: options.modelName,\n      is_default: options.isDefault ?? false,\n      // Clone mode\n      source_llm_provider_id: options.sourceLlmProviderId,\n      // New credentials mode\n      provider: options.provider,\n      api_key: options.apiKey,\n      api_base: options.apiBase,\n      api_version: options.apiVersion,\n      deployment_name: options.deploymentName,\n      custom_config: options.customConfig,\n    }),\n  });\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to create config\");\n  }\n\n  return response.json();\n}\n\n// Update options - same structure but without isDefault\nexport interface ImageGenerationConfigUpdateOptions {\n  modelName: string;\n\n  // Option 1: Clone mode - use credentials from existing provider\n  sourceLlmProviderId?: number;\n\n  // Option 2: New credentials mode\n  provider?: string;\n  apiKey?: string;\n  apiBase?: string;\n  apiVersion?: string;\n  deploymentName?: string;\n  customConfig?: Record<string, string>;\n\n  // If true, apiKey was changed by user; if false, backend preserves existing key\n  apiKeyChanged?: boolean;\n}\n\n/**\n * Update image generation configuration\n * Backend deletes old LLM provider and creates new one\n */\nexport async function updateImageGenerationConfig(\n  imageProviderId: string,\n  options: ImageGenerationConfigUpdateOptions\n): Promise<ImageGenerationConfigView> {\n  const response = await fetch(`${IMAGE_GEN_CONFIG_URL}/${imageProviderId}`, {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      model_name: options.modelName,\n      // Clone mode\n      source_llm_provider_id: options.sourceLlmProviderId,\n      // New credentials mode\n      provider: options.provider,\n      api_key: options.apiKey,\n      api_base: options.apiBase,\n      api_version: options.apiVersion,\n      deployment_name: options.deploymentName,\n      custom_config: options.customConfig,\n      // If false, backend preserves existing API key\n      api_key_changed: options.apiKeyChanged ?? false,\n    }),\n  });\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to update config\");\n  }\n\n  return response.json();\n}\n\n/**\n * Set image generation config as default\n */\nexport async function setDefaultImageGenerationConfig(\n  imageProviderId: string\n): Promise<void> {\n  const response = await fetch(\n    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/default`,\n    {\n      method: \"POST\",\n    }\n  );\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to set default\");\n  }\n}\n\n/**\n * Unset image generation config as default\n */\nexport async function unsetDefaultImageGenerationConfig(\n  imageProviderId: string\n): Promise<void> {\n  const response = await fetch(\n    `${IMAGE_GEN_CONFIG_URL}/${imageProviderId}/default`,\n    {\n      method: \"DELETE\",\n    }\n  );\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to unset default\");\n  }\n}\n\n/**\n * Delete image generation configuration\n */\nexport async function deleteImageGenerationConfig(\n  imageProviderId: string\n): Promise<void> {\n  const response = await fetch(`${IMAGE_GEN_CONFIG_URL}/${imageProviderId}`, {\n    method: \"DELETE\",\n  });\n\n  if (!response.ok) {\n    const error = await response.json();\n    throw new Error(error.detail || \"Failed to delete config\");\n  }\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/LLMConfigurationPage.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  useAdminLLMProviders,\n  useWellKnownLLMProviders,\n} from \"@/hooks/useLLMProviders\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { Content, CardHeaderLayout } from \"@opal/layouts\";\nimport { Button, SelectCard } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { SvgArrowExchange, SvgSettings, SvgTrash } from \"@opal/icons\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport * as GeneralLayouts from \"@/layouts/general-layouts\";\nimport {\n  getProviderDisplayName,\n  getProviderIcon,\n  getProviderProductName,\n} from \"@/lib/llmConfig/providers\";\nimport { refreshLlmProviderCaches } from \"@/lib/llmConfig/cache\";\nimport { deleteLlmProvider, setDefaultLlmModel } from \"@/lib/llmConfig/svc\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Horizontal as HorizontalInput } from \"@/layouts/input-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport Separator from \"@/refresh-components/Separator\";\nimport {\n  LLMProviderView,\n  WellKnownLLMProviderDescriptor,\n} from \"@/interfaces/llm\";\nimport { getModalForExistingProvider } from \"@/sections/modals/llmConfig/getModal\";\nimport OpenAIModal from \"@/sections/modals/llmConfig/OpenAIModal\";\nimport AnthropicModal from \"@/sections/modals/llmConfig/AnthropicModal\";\nimport OllamaModal from \"@/sections/modals/llmConfig/OllamaModal\";\nimport AzureModal from \"@/sections/modals/llmConfig/AzureModal\";\nimport BedrockModal from \"@/sections/modals/llmConfig/BedrockModal\";\nimport VertexAIModal from \"@/sections/modals/llmConfig/VertexAIModal\";\nimport OpenRouterModal from \"@/sections/modals/llmConfig/OpenRouterModal\";\nimport CustomModal from \"@/sections/modals/llmConfig/CustomModal\";\nimport LMStudioForm from \"@/sections/modals/llmConfig/LMStudioForm\";\nimport LiteLLMProxyModal from \"@/sections/modals/llmConfig/LiteLLMProxyModal\";\nimport BifrostModal from \"@/sections/modals/llmConfig/BifrostModal\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nconst route = ADMIN_ROUTES.LLM_MODELS;\n\n// ============================================================================\n// Provider form mapping (keyed by provider name from the API)\n// ============================================================================\n\n// Client-side ordering for the \"Add Provider\" cards. The backend may return\n// wellKnownLLMProviders in an arbitrary order, so we sort explicitly here.\nconst PROVIDER_DISPLAY_ORDER: string[] = [\n  \"openai\",\n  \"anthropic\",\n  \"vertex_ai\",\n  \"bedrock\",\n  \"azure\",\n  \"litellm_proxy\",\n  \"ollama_chat\",\n  \"openrouter\",\n  \"lm_studio\",\n  \"bifrost\",\n];\n\nconst PROVIDER_MODAL_MAP: Record<\n  string,\n  (\n    shouldMarkAsDefault: boolean,\n    open: boolean,\n    onOpenChange: (open: boolean) => void\n  ) => React.ReactNode\n> = {\n  openai: (d, open, onOpenChange) => (\n    <OpenAIModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  anthropic: (d, open, onOpenChange) => (\n    <AnthropicModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  ollama_chat: (d, open, onOpenChange) => (\n    <OllamaModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  azure: (d, open, onOpenChange) => (\n    <AzureModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  bedrock: (d, open, onOpenChange) => (\n    <BedrockModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  vertex_ai: (d, open, onOpenChange) => (\n    <VertexAIModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  openrouter: (d, open, onOpenChange) => (\n    <OpenRouterModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  lm_studio: (d, open, onOpenChange) => (\n    <LMStudioForm\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  litellm_proxy: (d, open, onOpenChange) => (\n    <LiteLLMProxyModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n  bifrost: (d, open, onOpenChange) => (\n    <BifrostModal\n      shouldMarkAsDefault={d}\n      open={open}\n      onOpenChange={onOpenChange}\n    />\n  ),\n};\n\n// ============================================================================\n// ExistingProviderCard — card for configured (existing) providers\n// ============================================================================\n\ninterface ExistingProviderCardProps {\n  provider: LLMProviderView;\n  isDefault: boolean;\n  isLastProvider: boolean;\n  defaultModelName?: string;\n}\n\nfunction ExistingProviderCard({\n  provider,\n  isDefault,\n  isLastProvider,\n  defaultModelName,\n}: ExistingProviderCardProps) {\n  const { mutate } = useSWRConfig();\n  const [isOpen, setIsOpen] = useState(false);\n  const deleteModal = useCreateModal();\n\n  const handleDelete = async () => {\n    try {\n      await deleteLlmProvider(provider.id);\n      await refreshLlmProviderCaches(mutate);\n      deleteModal.toggle(false);\n      toast.success(\"Provider deleted successfully!\");\n    } catch (e) {\n      const message = e instanceof Error ? e.message : \"Unknown error\";\n      toast.error(`Failed to delete provider: ${message}`);\n    }\n  };\n\n  return (\n    <>\n      {deleteModal.isOpen && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title={`Delete ${provider.name}`}\n          onClose={() => deleteModal.toggle(false)}\n          submit={\n            <Button variant=\"danger\" onClick={handleDelete}>\n              Delete\n            </Button>\n          }\n        >\n          <Section alignItems=\"start\" gap={0.5}>\n            <Text text03>\n              All LLM models from provider <b>{provider.name}</b> will be\n              removed and unavailable for future chats. Chat history will be\n              preserved.\n            </Text>\n            {isLastProvider && (\n              <Text text03>\n                Connect another provider to continue using chats.\n              </Text>\n            )}\n          </Section>\n        </ConfirmationModalLayout>\n      )}\n\n      <Hoverable.Root group=\"ExistingProviderCard\">\n        <SelectCard\n          state=\"filled\"\n          padding=\"sm\"\n          rounding=\"lg\"\n          onClick={() => setIsOpen(true)}\n        >\n          <CardHeaderLayout\n            icon={getProviderIcon(provider.provider)}\n            title={provider.name}\n            description={getProviderDisplayName(provider.provider)}\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            tag={isDefault ? { title: \"Default\", color: \"blue\" } : undefined}\n            rightChildren={\n              <div className=\"flex flex-row\">\n                <Hoverable.Item\n                  group=\"ExistingProviderCard\"\n                  variant=\"opacity-on-hover\"\n                >\n                  <Button\n                    icon={SvgTrash}\n                    prominence=\"tertiary\"\n                    aria-label={`Delete ${provider.name}`}\n                    onClick={(e) => {\n                      e.stopPropagation();\n                      deleteModal.toggle(true);\n                    }}\n                  />\n                </Hoverable.Item>\n                <Button\n                  icon={SvgSettings}\n                  prominence=\"tertiary\"\n                  aria-label={`Edit ${provider.name}`}\n                  onClick={(e) => {\n                    e.stopPropagation();\n                    setIsOpen(true);\n                  }}\n                />\n              </div>\n            }\n          />\n          {getModalForExistingProvider(\n            provider,\n            isOpen,\n            setIsOpen,\n            defaultModelName\n          )}\n        </SelectCard>\n      </Hoverable.Root>\n    </>\n  );\n}\n\n// ============================================================================\n// NewProviderCard — card for the \"Add Provider\" list\n// ============================================================================\n\ninterface NewProviderCardProps {\n  provider: WellKnownLLMProviderDescriptor;\n  isFirstProvider: boolean;\n  formFn: (\n    shouldMarkAsDefault: boolean,\n    open: boolean,\n    onOpenChange: (open: boolean) => void\n  ) => React.ReactNode;\n}\n\nfunction NewProviderCard({\n  provider,\n  isFirstProvider,\n  formFn,\n}: NewProviderCardProps) {\n  const [isOpen, setIsOpen] = useState(false);\n\n  return (\n    <SelectCard\n      state=\"empty\"\n      padding=\"sm\"\n      rounding=\"lg\"\n      onClick={() => setIsOpen(true)}\n    >\n      <CardHeaderLayout\n        icon={getProviderIcon(provider.name)}\n        title={getProviderProductName(provider.name)}\n        description={getProviderDisplayName(provider.name)}\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        rightChildren={\n          <Button\n            rightIcon={SvgArrowExchange}\n            prominence=\"tertiary\"\n            onClick={(e) => {\n              e.stopPropagation();\n              setIsOpen(true);\n            }}\n          >\n            Connect\n          </Button>\n        }\n      />\n      {formFn(isFirstProvider, isOpen, setIsOpen)}\n    </SelectCard>\n  );\n}\n\n// ============================================================================\n// NewCustomProviderCard — card for adding a custom LLM provider\n// ============================================================================\n\ninterface NewCustomProviderCardProps {\n  isFirstProvider: boolean;\n}\n\nfunction NewCustomProviderCard({\n  isFirstProvider,\n}: NewCustomProviderCardProps) {\n  const [isOpen, setIsOpen] = useState(false);\n\n  return (\n    <SelectCard\n      state=\"empty\"\n      padding=\"sm\"\n      rounding=\"lg\"\n      onClick={() => setIsOpen(true)}\n    >\n      <CardHeaderLayout\n        icon={getProviderIcon(\"custom\")}\n        title={getProviderProductName(\"custom\")}\n        description={getProviderDisplayName(\"custom\")}\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        rightChildren={\n          <Button\n            rightIcon={SvgArrowExchange}\n            prominence=\"tertiary\"\n            onClick={(e) => {\n              e.stopPropagation();\n              setIsOpen(true);\n            }}\n          >\n            Set Up\n          </Button>\n        }\n      />\n      <CustomModal\n        shouldMarkAsDefault={isFirstProvider}\n        open={isOpen}\n        onOpenChange={setIsOpen}\n      />\n    </SelectCard>\n  );\n}\n\n// ============================================================================\n// LLMConfigurationPage — main page component\n// ============================================================================\n\nexport default function LLMConfigurationPage() {\n  const { mutate } = useSWRConfig();\n  const { llmProviders: existingLlmProviders, defaultText } =\n    useAdminLLMProviders();\n  const { wellKnownLLMProviders } = useWellKnownLLMProviders();\n\n  if (!existingLlmProviders) {\n    return <ThreeDotsLoader />;\n  }\n\n  const hasProviders = existingLlmProviders.length > 0;\n  const isFirstProvider = !hasProviders;\n\n  // Pre-sort providers so the default appears first\n  const sortedProviders = [...existingLlmProviders].sort((a, b) => {\n    const aIsDefault = defaultText?.provider_id === a.id;\n    const bIsDefault = defaultText?.provider_id === b.id;\n    if (aIsDefault && !bIsDefault) return -1;\n    if (!aIsDefault && bIsDefault) return 1;\n    return 0;\n  });\n\n  // Pre-filter to providers that have at least one visible model\n  const providersWithVisibleModels = existingLlmProviders\n    .map((provider) => ({\n      provider,\n      visibleModels: provider.model_configurations.filter((m) => m.is_visible),\n    }))\n    .filter(({ visibleModels }) => visibleModels.length > 0);\n\n  // Default model logic — use the global default from the API response\n  const currentDefaultValue = defaultText\n    ? `${defaultText.provider_id}:${defaultText.model_name}`\n    : undefined;\n\n  async function handleDefaultModelChange(compositeValue: string) {\n    const separatorIndex = compositeValue.indexOf(\":\");\n    const providerId = Number(compositeValue.slice(0, separatorIndex));\n    const modelName = compositeValue.slice(separatorIndex + 1);\n\n    try {\n      await setDefaultLlmModel(providerId, modelName);\n      await refreshLlmProviderCaches(mutate);\n      toast.success(\"Default model updated successfully!\");\n    } catch (e) {\n      const message = e instanceof Error ? e.message : \"Unknown error\";\n      toast.error(`Failed to set default model: ${message}`);\n    }\n  }\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header icon={route.icon} title={route.title} separator />\n\n      <SettingsLayouts.Body>\n        {hasProviders ? (\n          <Card>\n            <HorizontalInput\n              title=\"Default Model\"\n              description=\"This model will be used by Onyx by default in your chats.\"\n              nonInteractive\n              center\n            >\n              <InputSelect\n                value={currentDefaultValue}\n                onValueChange={handleDefaultModelChange}\n              >\n                <InputSelect.Trigger placeholder=\"Select a default model\" />\n                <InputSelect.Content>\n                  {providersWithVisibleModels.map(\n                    ({ provider, visibleModels }) => (\n                      <InputSelect.Group key={provider.id}>\n                        <InputSelect.Label>{provider.name}</InputSelect.Label>\n                        {visibleModels.map((model) => (\n                          <InputSelect.Item\n                            key={`${provider.id}:${model.name}`}\n                            value={`${provider.id}:${model.name}`}\n                          >\n                            {model.display_name || model.name}\n                          </InputSelect.Item>\n                        ))}\n                      </InputSelect.Group>\n                    )\n                  )}\n                </InputSelect.Content>\n              </InputSelect>\n            </HorizontalInput>\n          </Card>\n        ) : (\n          <Message\n            info\n            large\n            icon\n            close={false}\n            text=\"Set up an LLM provider to start chatting.\"\n            className=\"w-full\"\n          />\n        )}\n\n        {/* ── Available Providers (only when providers exist) ── */}\n        {hasProviders && (\n          <>\n            <GeneralLayouts.Section\n              gap={0.75}\n              height=\"fit\"\n              alignItems=\"stretch\"\n              justifyContent=\"start\"\n            >\n              <Content\n                title=\"Available Providers\"\n                sizePreset=\"main-content\"\n                variant=\"section\"\n              />\n\n              <div className=\"flex flex-col gap-2\">\n                {sortedProviders.map((provider) => (\n                  <ExistingProviderCard\n                    key={provider.id}\n                    provider={provider}\n                    isDefault={defaultText?.provider_id === provider.id}\n                    isLastProvider={sortedProviders.length === 1}\n                    defaultModelName={\n                      defaultText?.provider_id === provider.id\n                        ? defaultText.model_name\n                        : undefined\n                    }\n                  />\n                ))}\n              </div>\n            </GeneralLayouts.Section>\n\n            <Separator noPadding />\n          </>\n        )}\n\n        {/* ── Add Provider (always visible) ── */}\n        <GeneralLayouts.Section\n          gap={0.75}\n          height=\"fit\"\n          alignItems=\"stretch\"\n          justifyContent=\"start\"\n        >\n          <Content\n            title=\"Add Provider\"\n            description=\"Onyx supports both popular providers and self-hosted models.\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n          />\n\n          <div className=\"grid grid-cols-2 gap-2\">\n            {[...(wellKnownLLMProviders ?? [])]\n              .sort((a, b) => {\n                const aIndex = PROVIDER_DISPLAY_ORDER.indexOf(a.name);\n                const bIndex = PROVIDER_DISPLAY_ORDER.indexOf(b.name);\n                return (\n                  (aIndex === -1 ? Infinity : aIndex) -\n                  (bIndex === -1 ? Infinity : bIndex)\n                );\n              })\n              .map((provider) => {\n                const formFn = PROVIDER_MODAL_MAP[provider.name];\n                if (!formFn) {\n                  toast.error(\n                    `No modal mapping for provider \"${provider.name}\".`\n                  );\n                  return null;\n                }\n                return (\n                  <NewProviderCard\n                    key={provider.name}\n                    provider={provider}\n                    isFirstProvider={isFirstProvider}\n                    formFn={formFn}\n                  />\n                );\n              })}\n            <NewCustomProviderCard isFirstProvider={isFirstProvider} />\n          </div>\n        </GeneralLayouts.Section>\n      </SettingsLayouts.Body>\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ServiceAccountsPage/ApiKeyFormModal.tsx",
    "content": "\"use client\";\n\nimport { Form, Formik } from \"formik\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  createApiKey,\n  updateApiKey,\n} from \"@/refresh-pages/admin/ServiceAccountsPage/svc\";\nimport type { APIKey } from \"@/refresh-pages/admin/ServiceAccountsPage/interfaces\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport { FormikField } from \"@/refresh-components/form/FormikField\";\nimport { Vertical as VerticalInput } from \"@/layouts/input-layouts\";\nimport { USER_ROLE_LABELS, UserRole } from \"@/lib/types\";\nimport { SvgKey, SvgLock, SvgUser, SvgUserManage } from \"@opal/icons\";\n\ninterface ApiKeyFormModalProps {\n  onClose: () => void;\n  onCreateApiKey: (apiKey: APIKey) => void;\n  apiKey?: APIKey;\n}\n\nexport default function ApiKeyFormModal({\n  onClose,\n  onCreateApiKey,\n  apiKey,\n}: ApiKeyFormModalProps) {\n  const isUpdate = apiKey !== undefined;\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"sm\" height=\"lg\">\n        <Modal.Header\n          icon={SvgKey}\n          title={isUpdate ? \"Update Service Account\" : \"Create Service Account\"}\n          description={\n            isUpdate\n              ? undefined\n              : \"Use service account API key to programmatically access Onyx API with user-level permissions. You can modify the account details later.\"\n          }\n          onClose={onClose}\n        />\n        <Formik\n          initialValues={{\n            name: apiKey?.api_key_name || \"\",\n            role: apiKey?.api_key_role || UserRole.BASIC.toString(),\n          }}\n          onSubmit={async (values, formikHelpers) => {\n            formikHelpers.setSubmitting(true);\n\n            const payload = {\n              ...values,\n              role: values.role as UserRole,\n            };\n\n            try {\n              let response;\n              if (isUpdate) {\n                response = await updateApiKey(apiKey.api_key_id, payload);\n              } else {\n                response = await createApiKey(payload);\n              }\n              if (response.ok) {\n                toast.success(\n                  isUpdate\n                    ? \"Successfully updated service account!\"\n                    : \"Successfully created service account!\"\n                );\n                if (!isUpdate) {\n                  onCreateApiKey(await response.json());\n                }\n                onClose();\n              } else {\n                const responseJson = await response.json();\n                const errorMsg = responseJson.detail || responseJson.message;\n                toast.error(\n                  isUpdate\n                    ? `Error updating service account - ${errorMsg}`\n                    : `Error creating service account - ${errorMsg}`\n                );\n              }\n            } catch (e) {\n              toast.error(\n                e instanceof Error ? e.message : \"An unexpected error occurred.\"\n              );\n            } finally {\n              formikHelpers.setSubmitting(false);\n            }\n          }}\n        >\n          {({ isSubmitting, values }) => (\n            <Form className=\"w-full overflow-visible\">\n              <Modal.Body>\n                <VerticalInput\n                  name=\"name\"\n                  title=\"Name\"\n                  nonInteractive\n                  sizePreset=\"main-ui\"\n                >\n                  <FormikField<string>\n                    name=\"name\"\n                    render={(field, helper) => (\n                      <InputTypeIn\n                        {...field}\n                        placeholder=\"Enter a name\"\n                        onClear={() => helper.setValue(\"\")}\n                        showClearButton={false}\n                      />\n                    )}\n                  />\n                </VerticalInput>\n\n                <VerticalInput\n                  name=\"role\"\n                  title=\"Account Permissions\"\n                  nonInteractive\n                  sizePreset=\"main-ui\"\n                >\n                  <FormikField<string>\n                    name=\"role\"\n                    render={(field, helper) => (\n                      <InputSelect\n                        value={field.value}\n                        onValueChange={(value) => helper.setValue(value)}\n                      >\n                        <InputSelect.Trigger placeholder=\"Select permissions\" />\n                        <InputSelect.Content>\n                          <InputSelect.Item\n                            value={UserRole.ADMIN.toString()}\n                            icon={SvgUserManage}\n                            description=\"Unrestricted admin access to all endpoints.\"\n                          >\n                            {USER_ROLE_LABELS[UserRole.ADMIN]}\n                          </InputSelect.Item>\n                          <InputSelect.Item\n                            value={UserRole.BASIC.toString()}\n                            icon={SvgUser}\n                            description=\"Standard user-level access to non-admin endpoints.\"\n                          >\n                            {USER_ROLE_LABELS[UserRole.BASIC]}\n                          </InputSelect.Item>\n                          <InputSelect.Item\n                            value={UserRole.LIMITED.toString()}\n                            icon={SvgLock}\n                            description=\"For agents: chat posting and read-only access to other endpoints.\"\n                          >\n                            {USER_ROLE_LABELS[UserRole.LIMITED]}\n                          </InputSelect.Item>\n                        </InputSelect.Content>\n                      </InputSelect>\n                    )}\n                  />\n                </VerticalInput>\n              </Modal.Body>\n\n              <Modal.Footer>\n                <Button prominence=\"secondary\" type=\"button\" onClick={onClose}>\n                  Cancel\n                </Button>\n                <Button\n                  disabled={isSubmitting || !values.name.trim()}\n                  type=\"submit\"\n                >\n                  {isUpdate ? \"Update\" : \"Create Account\"}\n                </Button>\n              </Modal.Footer>\n            </Form>\n          )}\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ServiceAccountsPage/index.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\nimport useSWR, { mutate } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Button, Text } from \"@opal/components\";\nimport { Content, IllustrationContent } from \"@opal/layouts\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport {\n  SvgDownload,\n  SvgKey,\n  SvgLock,\n  SvgMoreHorizontal,\n  SvgRefreshCw,\n  SvgTrash,\n  SvgUser,\n  SvgUserEdit,\n  SvgUserKey,\n  SvgUserManage,\n} from \"@opal/icons\";\nimport { USER_ROLE_LABELS, UserRole } from \"@/lib/types\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport AdminListHeader from \"@/sections/admin/AdminListHeader\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport Code from \"@/refresh-components/Code\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { markdown } from \"@opal/utils\";\nimport Message from \"@/refresh-components/messages/Message\";\n\nimport { useBillingInformation } from \"@/hooks/useBillingInformation\";\nimport { BillingStatus, hasActiveSubscription } from \"@/lib/billing/interfaces\";\nimport {\n  deleteApiKey,\n  regenerateApiKey,\n  updateApiKey,\n} from \"@/refresh-pages/admin/ServiceAccountsPage/svc\";\nimport type { APIKey } from \"@/refresh-pages/admin/ServiceAccountsPage/interfaces\";\nimport { DISCORD_SERVICE_API_KEY_NAME } from \"@/refresh-pages/admin/ServiceAccountsPage/interfaces\";\nimport ApiKeyFormModal from \"@/refresh-pages/admin/ServiceAccountsPage/ApiKeyFormModal\";\nimport { Table } from \"@opal/components\";\nimport { createTableColumns } from \"@opal/components/table/columns\";\nimport { Section } from \"@/layouts/general-layouts\";\n\nconst API_KEY_SWR_KEY = SWR_KEYS.adminApiKeys;\nconst route = ADMIN_ROUTES.API_KEYS;\n\nconst tc = createTableColumns<APIKey>();\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function ServiceAccountsPage() {\n  const {\n    data: apiKeys,\n    isLoading,\n    error,\n  } = useSWR<APIKey[]>(API_KEY_SWR_KEY, errorHandlingFetcher);\n\n  const { data: billingData } = useBillingInformation();\n  const isTrialing =\n    billingData !== undefined &&\n    hasActiveSubscription(billingData) &&\n    billingData.status === BillingStatus.TRIALING;\n\n  const [fullApiKey, setFullApiKey] = useState<string | null>(null);\n  const [showCreateUpdateForm, setShowCreateUpdateForm] = useState(false);\n  const [selectedApiKey, setSelectedApiKey] = useState<APIKey | undefined>();\n  const [search, setSearch] = useState(\"\");\n  const [regenerateTarget, setRegenerateTarget] = useState<APIKey | null>(null);\n  const [deleteTarget, setDeleteTarget] = useState<APIKey | null>(null);\n\n  const visibleApiKeys = (apiKeys ?? []).filter(\n    (key) => key.api_key_name !== DISCORD_SERVICE_API_KEY_NAME\n  );\n\n  const filteredApiKeys = visibleApiKeys.filter(\n    (key) =>\n      !search ||\n      (key.api_key_name ?? \"\").toLowerCase().includes(search.toLowerCase()) ||\n      key.api_key_display.toLowerCase().includes(search.toLowerCase())\n  );\n\n  const handleRoleChange = async (apiKey: APIKey, newRole: UserRole) => {\n    try {\n      const response = await updateApiKey(apiKey.api_key_id, {\n        name: apiKey.api_key_name ?? undefined,\n        role: newRole,\n      });\n      if (!response.ok) {\n        const errorMsg = await response.text();\n        toast.error(`Failed to update role: ${errorMsg}`);\n        return;\n      }\n      mutate(API_KEY_SWR_KEY);\n      toast.success(\"Role updated.\");\n    } catch {\n      toast.error(\"Failed to update role.\");\n    }\n  };\n\n  const handleRegenerate = async (apiKey: APIKey) => {\n    try {\n      const response = await regenerateApiKey(apiKey);\n      if (!response.ok) {\n        const errorMsg = await response.text();\n        toast.error(`Failed to regenerate API Key: ${errorMsg}`);\n        return;\n      }\n      const newKey = (await response.json()) as APIKey;\n      setFullApiKey(newKey.api_key);\n      mutate(API_KEY_SWR_KEY);\n    } catch (e) {\n      toast.error(\n        e instanceof Error ? e.message : \"Failed to regenerate API Key.\"\n      );\n    }\n  };\n\n  const handleDelete = async (apiKey: APIKey) => {\n    try {\n      const response = await deleteApiKey(apiKey.api_key_id);\n      if (!response.ok) {\n        const errorMsg = await response.text();\n        toast.error(`Failed to delete API Key: ${errorMsg}`);\n        return;\n      }\n      mutate(API_KEY_SWR_KEY);\n    } catch (e) {\n      toast.error(e instanceof Error ? e.message : \"Failed to delete API Key.\");\n    }\n  };\n\n  const columns = useMemo(\n    () => [\n      tc.qualifier({\n        content: \"icon\",\n        getContent: () => SvgUserKey,\n      }),\n      tc.column(\"api_key_name\", {\n        header: \"Name\",\n        weight: 25,\n        cell: (value) => (\n          <Content\n            title={value || \"Unnamed\"}\n            sizePreset=\"main-ui\"\n            variant=\"body\"\n          />\n        ),\n      }),\n      tc.column(\"api_key_display\", {\n        header: \"API Key\",\n        weight: 30,\n        cell: (value) => (\n          <Text font=\"secondary-mono\" color=\"text-03\">\n            {value}\n          </Text>\n        ),\n      }),\n      tc.displayColumn({\n        id: \"account_type\",\n        header: \"Account Type\",\n        width: { weight: 25, minWidth: 160 },\n        cell: (row) => (\n          <InputSelect\n            value={row.api_key_role}\n            onValueChange={(value) => handleRoleChange(row, value as UserRole)}\n          >\n            <InputSelect.Trigger />\n            <InputSelect.Content>\n              <InputSelect.Item\n                value={UserRole.ADMIN.toString()}\n                icon={SvgUserManage}\n                description=\"Unrestricted admin access to all endpoints.\"\n              >\n                {USER_ROLE_LABELS[UserRole.ADMIN]}\n              </InputSelect.Item>\n              <InputSelect.Item\n                value={UserRole.BASIC.toString()}\n                icon={SvgUser}\n                description=\"Standard user-level access to non-admin endpoints.\"\n              >\n                {USER_ROLE_LABELS[UserRole.BASIC]}\n              </InputSelect.Item>\n              <InputSelect.Item\n                value={UserRole.LIMITED.toString()}\n                icon={SvgLock}\n                description=\"For agents: chat posting and read-only access to other endpoints.\"\n              >\n                {USER_ROLE_LABELS[UserRole.LIMITED]}\n              </InputSelect.Item>\n            </InputSelect.Content>\n          </InputSelect>\n        ),\n      }),\n      tc.actions({\n        cell: (row) => (\n          <div className=\"flex flex-row gap-1\">\n            <Button\n              icon={SvgRefreshCw}\n              prominence=\"tertiary\"\n              tooltip=\"Regenerate\"\n              onClick={() => setRegenerateTarget(row)}\n            />\n            <Popover>\n              <Popover.Trigger asChild>\n                <Button\n                  icon={SvgMoreHorizontal}\n                  prominence=\"tertiary\"\n                  tooltip=\"More\"\n                />\n              </Popover.Trigger>\n              <Popover.Content side=\"bottom\" align=\"end\" width=\"md\">\n                <PopoverMenu>\n                  <LineItem\n                    icon={SvgUserEdit}\n                    onClick={() => {\n                      setSelectedApiKey(row);\n                      setShowCreateUpdateForm(true);\n                    }}\n                  >\n                    Edit Account\n                  </LineItem>\n                  <LineItem\n                    icon={SvgTrash}\n                    danger\n                    onClick={() => setDeleteTarget(row)}\n                  >\n                    Delete Account\n                  </LineItem>\n                </PopoverMenu>\n              </Popover.Content>\n            </Popover>\n          </div>\n        ),\n      }),\n    ],\n    [] // eslint-disable-line react-hooks/exhaustive-deps\n  );\n\n  if (error) {\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          title={route.title}\n          icon={route.icon}\n          description=\"Use service accounts to programmatically access Onyx API.\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          <IllustrationContent\n            illustration={SvgNoResult}\n            title=\"Failed to load service accounts.\"\n            description=\"Please check the console for more details.\"\n          />\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  if (isLoading) {\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          title={route.title}\n          icon={route.icon}\n          description=\"Use service accounts to programmatically access Onyx API.\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          <SimpleLoader />\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  const hasKeys = visibleApiKeys.length > 0;\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        title={route.title}\n        icon={route.icon}\n        description=\"Use service accounts to programmatically access Onyx API.\"\n        separator\n      />\n\n      <SettingsLayouts.Body>\n        {isTrialing && (\n          <Message\n            static\n            warning\n            close={false}\n            className=\"w-full\"\n            text=\"Upgrade to a paid plan to create API keys.\"\n            description=\"Trial accounts do not include API key access — purchase a paid subscription to unlock this feature.\"\n          />\n        )}\n\n        <div className=\"flex flex-col\">\n          <AdminListHeader\n            hasItems={hasKeys}\n            searchQuery={search}\n            onSearchQueryChange={setSearch}\n            placeholder=\"Search service accounts...\"\n            emptyStateText=\"Create service account API keys with user-level access.\"\n            onAction={() => {\n              setSelectedApiKey(undefined);\n              setShowCreateUpdateForm(true);\n            }}\n            actionLabel=\"New Service Account\"\n          />\n\n          {hasKeys && (\n            <Table\n              data={filteredApiKeys}\n              getRowId={(row) => String(row.api_key_id)}\n              columns={columns}\n              searchTerm={search}\n            />\n          )}\n        </div>\n      </SettingsLayouts.Body>\n\n      <Modal open={!!fullApiKey}>\n        <Modal.Content width=\"sm\" height=\"sm\">\n          <Modal.Header\n            title=\"Service Account API Key\"\n            icon={SvgKey}\n            onClose={() => setFullApiKey(null)}\n            description=\"Save this key before continuing. It won't be shown again.\"\n          />\n          <Modal.Body>\n            <Code showCopyButton={false}>{fullApiKey ?? \"\"}</Code>\n          </Modal.Body>\n          <Modal.Footer>\n            <BasicModalFooter\n              left={\n                <Button\n                  prominence=\"secondary\"\n                  icon={SvgDownload}\n                  onClick={() => {\n                    if (!fullApiKey) return;\n                    const blob = new Blob([fullApiKey], {\n                      type: \"text/plain\",\n                    });\n                    const url = URL.createObjectURL(blob);\n                    const a = document.createElement(\"a\");\n                    a.href = url;\n                    a.download = \"onyx-api-key.txt\";\n                    a.click();\n                    URL.revokeObjectURL(url);\n                  }}\n                >\n                  Download\n                </Button>\n              }\n              submit={\n                // TODO(@raunakab): Create an opalified copy-button and replace it here\n                <Button\n                  onClick={() => {\n                    if (fullApiKey) {\n                      navigator.clipboard.writeText(fullApiKey);\n                      toast.success(\"API key copied to clipboard.\");\n                    }\n                  }}\n                >\n                  Copy API Key\n                </Button>\n              }\n            />\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n\n      {showCreateUpdateForm && (\n        <ApiKeyFormModal\n          onCreateApiKey={(apiKey) => {\n            setFullApiKey(apiKey.api_key);\n          }}\n          onClose={() => {\n            setShowCreateUpdateForm(false);\n            setSelectedApiKey(undefined);\n            mutate(API_KEY_SWR_KEY);\n          }}\n          apiKey={selectedApiKey}\n        />\n      )}\n\n      {regenerateTarget && (\n        <ConfirmationModalLayout\n          icon={SvgRefreshCw}\n          title=\"Regenerate API Key\"\n          onClose={() => setRegenerateTarget(null)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={async () => {\n                const target = regenerateTarget;\n                setRegenerateTarget(null);\n                await handleRegenerate(target);\n              }}\n            >\n              Regenerate Key\n            </Button>\n          }\n        >\n          <Text as=\"p\" color=\"text-03\">\n            {markdown(\n              `Your current API key *${\n                regenerateTarget.api_key_name || \"Unnamed\"\n              }* (\\`${\n                regenerateTarget.api_key_display\n              }\\`) will be revoked and a new key will be generated. You will need to update any applications using this key with the new one.`\n            )}\n          </Text>\n        </ConfirmationModalLayout>\n      )}\n\n      {deleteTarget && (\n        <ConfirmationModalLayout\n          icon={SvgTrash}\n          title=\"Delete Account\"\n          onClose={() => setDeleteTarget(null)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={async () => {\n                await handleDelete(deleteTarget);\n                setDeleteTarget(null);\n              }}\n            >\n              Delete\n            </Button>\n          }\n        >\n          <Section alignItems=\"start\" gap={0.5}>\n            <Text as=\"p\" color=\"text-03\">\n              {markdown(\n                `Any application using the API key of account *${\n                  deleteTarget.api_key_name || \"Unnamed\"\n                }* (\\`${\n                  deleteTarget.api_key_display\n                }\\`) will lose access to Onyx.`\n              )}\n            </Text>\n            <Text as=\"p\" color=\"text-03\">\n              Deletion cannot be undone.\n            </Text>\n          </Section>\n        </ConfirmationModalLayout>\n      )}\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ServiceAccountsPage/interfaces.ts",
    "content": "import { UserRole } from \"@/lib/types\";\n\nexport const DISCORD_SERVICE_API_KEY_NAME = \"discord-bot-service\";\n\nexport interface APIKey {\n  api_key_id: number;\n  api_key_display: string;\n  api_key: string | null;\n  api_key_name: string | null;\n  api_key_role: UserRole;\n  user_id: string;\n}\n\nexport interface APIKeyArgs {\n  name?: string;\n  role: UserRole;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/ServiceAccountsPage/svc.ts",
    "content": "import type {\n  APIKeyArgs,\n  APIKey,\n} from \"@/refresh-pages/admin/ServiceAccountsPage/interfaces\";\n\nconst API_KEY_URL = \"/api/admin/api-key\";\n\nexport async function createApiKey(args: APIKeyArgs): Promise<Response> {\n  return fetch(API_KEY_URL, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(args),\n  });\n}\n\nexport async function regenerateApiKey(apiKey: APIKey): Promise<Response> {\n  return fetch(`${API_KEY_URL}/${apiKey.api_key_id}/regenerate`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n  });\n}\n\nexport async function updateApiKey(\n  apiKeyId: number,\n  args: APIKeyArgs\n): Promise<Response> {\n  return fetch(`${API_KEY_URL}/${apiKeyId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(args),\n  });\n}\n\nexport async function deleteApiKey(apiKeyId: number): Promise<Response> {\n  return fetch(`${API_KEY_URL}/${apiKeyId}`, {\n    method: \"DELETE\",\n  });\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/EditUserModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo, useCallback } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { SvgUsers, SvgUser, SvgLogOut, SvgCheck } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport Modal from \"@/refresh-components/Modal\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport Popover from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Separator from \"@/refresh-components/Separator\";\nimport ShadowDiv from \"@/refresh-components/ShadowDiv\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { toast } from \"@/hooks/useToast\";\nimport { UserRole, USER_ROLE_LABELS } from \"@/lib/types\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport useGroups from \"@/hooks/useGroups\";\nimport { addUserToGroup, removeUserFromGroup, setUserRole } from \"./svc\";\nimport type { UserRow } from \"./interfaces\";\nimport { cn } from \"../../../lib/utils\";\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst ASSIGNABLE_ROLES: UserRole[] = [\n  UserRole.ADMIN,\n  UserRole.GLOBAL_CURATOR,\n  UserRole.BASIC,\n];\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface EditUserModalProps {\n  user: UserRow & { id: string };\n  onClose: () => void;\n  onMutate: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function EditUserModal({\n  user,\n  onClose,\n  onMutate,\n}: EditUserModalProps) {\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const { data: allGroups, isLoading: groupsLoading } = useGroups();\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [selectedRole, setSelectedRole] = useState<UserRole | \"\">(\n    user.role ?? \"\"\n  );\n\n  const initialMemberGroupIds = useMemo(\n    () => new Set(user.groups.map((g) => g.id)),\n    [user.groups]\n  );\n  const [memberGroupIds, setMemberGroupIds] = useState<Set<number>>(\n    () => new Set(initialMemberGroupIds)\n  );\n\n  // Dropdown shows all groups filtered by search term\n  const dropdownGroups = useMemo(() => {\n    if (!allGroups) return [];\n    if (searchTerm.length === 0) return allGroups;\n    const lower = searchTerm.toLowerCase();\n    return allGroups.filter((g) => g.name.toLowerCase().includes(lower));\n  }, [allGroups, searchTerm]);\n\n  // Joined groups shown in the modal body\n  const joinedGroups = useMemo(() => {\n    if (!allGroups) return [];\n    return allGroups.filter((g) => memberGroupIds.has(g.id));\n  }, [allGroups, memberGroupIds]);\n\n  const hasGroupChanges = useMemo(() => {\n    if (memberGroupIds.size !== initialMemberGroupIds.size) return true;\n    return Array.from(memberGroupIds).some(\n      (id) => !initialMemberGroupIds.has(id)\n    );\n  }, [memberGroupIds, initialMemberGroupIds]);\n\n  const visibleRoles = isPaidEnterpriseFeaturesEnabled\n    ? ASSIGNABLE_ROLES\n    : ASSIGNABLE_ROLES.filter((r) => r !== UserRole.GLOBAL_CURATOR);\n\n  const hasRoleChange =\n    user.role !== null && selectedRole !== \"\" && selectedRole !== user.role;\n  const hasChanges = hasGroupChanges || hasRoleChange;\n\n  const toggleGroup = (groupId: number) => {\n    setMemberGroupIds((prev) => {\n      const next = new Set(prev);\n      if (next.has(groupId)) {\n        next.delete(groupId);\n      } else {\n        next.add(groupId);\n      }\n      return next;\n    });\n  };\n\n  const handleSave = async () => {\n    setIsSubmitting(true);\n    try {\n      const toAdd = Array.from(memberGroupIds).filter(\n        (id) => !initialMemberGroupIds.has(id)\n      );\n      const toRemove = Array.from(initialMemberGroupIds).filter(\n        (id) => !memberGroupIds.has(id)\n      );\n\n      if (user.id) {\n        for (const groupId of toAdd) {\n          await addUserToGroup(groupId, user.id);\n        }\n        for (const groupId of toRemove) {\n          const group = allGroups?.find((g) => g.id === groupId);\n          if (group) {\n            const currentUserIds = group.users.map((u) => u.id);\n            const ccPairIds = group.cc_pairs.map((cc) => cc.id);\n            await removeUserFromGroup(\n              groupId,\n              currentUserIds,\n              user.id,\n              ccPairIds\n            );\n          }\n        }\n      }\n\n      if (\n        user.role !== null &&\n        selectedRole !== \"\" &&\n        selectedRole !== user.role\n      ) {\n        await setUserRole(user.email, selectedRole);\n      }\n\n      onMutate();\n      toast.success(\"User updated\");\n      onClose();\n    } catch (err) {\n      onMutate(); // refresh to show partially-applied state\n      toast.error(err instanceof Error ? err.message : \"An error occurred\");\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  const displayName = user.personal_name ?? user.email;\n  const [contentEl, setContentEl] = useState<HTMLDivElement | null>(null);\n  const contentRef = useCallback((node: HTMLDivElement | null) => {\n    setContentEl(node);\n  }, []);\n\n  return (\n    <Modal\n      open\n      onOpenChange={(isOpen) => !isOpen && !isSubmitting && onClose()}\n    >\n      <Modal.Content width=\"sm\" ref={contentRef}>\n        <Modal.Header\n          icon={SvgUsers}\n          title=\"Edit User's Groups & Roles\"\n          description={\n            user.personal_name\n              ? `${user.personal_name} (${user.email})`\n              : user.email\n          }\n          onClose={isSubmitting ? undefined : onClose}\n        />\n        <Modal.Body twoTone>\n          <Section padding={0} height=\"auto\" alignItems=\"stretch\">\n            <Section\n              gap={0.5}\n              padding={0.25}\n              height={joinedGroups.length === 0 && !popoverOpen ? \"auto\" : 14.5}\n              alignItems=\"stretch\"\n              justifyContent=\"start\"\n              className=\"bg-background-tint-02 rounded-08\"\n            >\n              <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>\n                <Popover.Trigger asChild>\n                  {/* asChild merges trigger props onto this div instead of rendering a <button>.\n                     Without it, the trigger <button> would nest around InputTypeIn's\n                     internal IconButton <button>, causing a hydration error. */}\n                  <div>\n                    <InputTypeIn\n                      value={searchTerm}\n                      onChange={(e) => setSearchTerm(e.target.value)}\n                      placeholder=\"Search groups to join...\"\n                      leftSearchIcon\n                    />\n                  </div>\n                </Popover.Trigger>\n                <Popover.Content\n                  width=\"trigger\"\n                  align=\"start\"\n                  container={contentEl}\n                >\n                  {groupsLoading ? (\n                    <LineItem skeleton description=\"Loading groups...\">\n                      Loading...\n                    </LineItem>\n                  ) : dropdownGroups.length === 0 ? (\n                    <LineItem\n                      skeleton\n                      description=\"Try a different search term.\"\n                    >\n                      No groups found\n                    </LineItem>\n                  ) : (\n                    <ShadowDiv\n                      shadowHeight=\"0.75rem\"\n                      className={cn(\n                        \"flex flex-col gap-1 max-h-[15rem] rounded-08\"\n                      )}\n                    >\n                      {dropdownGroups.map((group) => {\n                        const isMember = memberGroupIds.has(group.id);\n                        return (\n                          <LineItem\n                            key={group.id}\n                            icon={isMember ? SvgCheck : SvgUsers}\n                            description={`${group.users.length} ${\n                              group.users.length === 1 ? \"user\" : \"users\"\n                            }`}\n                            selected={isMember}\n                            emphasized={isMember}\n                            onClick={() => toggleGroup(group.id)}\n                          >\n                            {group.name}\n                          </LineItem>\n                        );\n                      })}\n                    </ShadowDiv>\n                  )}\n                </Popover.Content>\n              </Popover>\n\n              <ShadowDiv\n                className={cn(\" max-h-[11rem] flex flex-col gap-1 rounded-08\")}\n                shadowHeight=\"0.75rem\"\n              >\n                {joinedGroups.length === 0 ? (\n                  <LineItem\n                    icon={SvgUsers}\n                    skeleton\n                    interactive={false}\n                    description={`${displayName} is not in any groups.`}\n                  >\n                    No groups found\n                  </LineItem>\n                ) : (\n                  joinedGroups.map((group) => (\n                    <div\n                      key={group.id}\n                      className=\"bg-background-tint-01 rounded-08\"\n                    >\n                      <LineItem\n                        key={group.id}\n                        icon={SvgUsers}\n                        description={`${group.users.length} ${\n                          group.users.length === 1 ? \"user\" : \"users\"\n                        }`}\n                        rightChildren={\n                          <SimpleTooltip\n                            tooltip=\"Remove from group\"\n                            side=\"left\"\n                          >\n                            <SvgLogOut height={16} width={16} />\n                          </SimpleTooltip>\n                        }\n                        onClick={() => toggleGroup(group.id)}\n                      >\n                        {group.name}\n                      </LineItem>\n                    </div>\n                  ))\n                )}\n              </ShadowDiv>\n            </Section>\n            {user.role && (\n              <>\n                <Separator noPadding />\n\n                <ContentAction\n                  title=\"User Role\"\n                  description=\"This controls their general permissions.\"\n                  sizePreset=\"main-ui\"\n                  variant=\"section\"\n                  paddingVariant=\"fit\"\n                  rightChildren={\n                    <InputSelect\n                      value={selectedRole}\n                      onValueChange={(v) => setSelectedRole(v as UserRole)}\n                    >\n                      <InputSelect.Trigger />\n                      <InputSelect.Content>\n                        {user.role && !visibleRoles.includes(user.role) && (\n                          <InputSelect.Item\n                            key={user.role}\n                            value={user.role}\n                            icon={SvgUser}\n                          >\n                            {USER_ROLE_LABELS[user.role]}\n                          </InputSelect.Item>\n                        )}\n                        {visibleRoles.map((role) => (\n                          <InputSelect.Item\n                            key={role}\n                            value={role}\n                            icon={SvgUser}\n                          >\n                            {USER_ROLE_LABELS[role]}\n                          </InputSelect.Item>\n                        ))}\n                      </InputSelect.Content>\n                    </InputSelect>\n                  }\n                />\n              </>\n            )}\n          </Section>\n        </Modal.Body>\n\n        <Modal.Footer>\n          <Button\n            prominence=\"secondary\"\n            onClick={isSubmitting ? undefined : onClose}\n          >\n            Cancel\n          </Button>\n          <Button disabled={isSubmitting || !hasChanges} onClick={handleSave}>\n            Save Changes\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/GroupsCell.tsx",
    "content": "\"use client\";\n\nimport {\n  useState,\n  useRef,\n  useLayoutEffect,\n  useCallback,\n  useEffect,\n} from \"react\";\nimport { Hoverable } from \"@opal/core\";\nimport { SvgEdit } from \"@opal/icons\";\nimport { Button, Tag } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport EditUserModal from \"./EditUserModal\";\nimport type { UserRow, UserGroupInfo } from \"./interfaces\";\n\ninterface GroupsCellProps {\n  groups: UserGroupInfo[];\n  user: UserRow;\n  onMutate: () => void;\n}\n\n/**\n * Measures how many Tag pills fit in the container, accounting for a \"+N\"\n * overflow counter when not all tags are visible. Uses a two-phase render:\n * first renders all tags (clipped by overflow:hidden) for measurement, then\n * re-renders with only the visible subset + \"+N\".\n *\n * Hovering the cell shows a tooltip with ALL groups. Clicking opens the\n * edit groups modal.\n */\nexport default function GroupsCell({\n  groups,\n  user,\n  onMutate,\n}: GroupsCellProps) {\n  const [showModal, setShowModal] = useState(false);\n  const [visibleCount, setVisibleCount] = useState<number | null>(null);\n  const containerRef = useRef<HTMLDivElement>(null);\n\n  const computeVisibleCount = useCallback(() => {\n    const container = containerRef.current;\n    if (!container || groups.length <= 1) {\n      setVisibleCount(groups.length);\n      return;\n    }\n\n    const tags = container.querySelectorAll<HTMLElement>(\"[data-group-tag]\");\n    if (tags.length === 0) return;\n\n    const containerWidth = container.clientWidth;\n    const gap = 4; // gap-1\n    const counterWidth = 32; // \"+N\" Tag approximate width\n\n    let used = 0;\n    let count = 0;\n\n    for (let i = 0; i < tags.length; i++) {\n      const tagWidth = tags[i]!.offsetWidth;\n      const gapBefore = count > 0 ? gap : 0;\n      const hasMore = i < tags.length - 1;\n      const reserve = hasMore ? gap + counterWidth : 0;\n\n      if (used + gapBefore + tagWidth + reserve <= containerWidth) {\n        used += gapBefore + tagWidth;\n        count++;\n      } else {\n        break;\n      }\n    }\n\n    setVisibleCount(Math.max(1, count));\n  }, [groups]);\n\n  // Reset to measurement phase when groups change\n  useLayoutEffect(() => {\n    setVisibleCount(null);\n  }, [groups]);\n\n  // Measure after the \"show all\" render\n  useLayoutEffect(() => {\n    if (visibleCount !== null) return;\n    computeVisibleCount();\n  }, [visibleCount, computeVisibleCount]);\n\n  // Re-measure when the container width changes (e.g. window resize).\n  // Track width so height-only changes (from the measurement cycle toggling\n  // visible tags) don't cause an infinite render loop.\n  const lastWidthRef = useRef(0);\n\n  useEffect(() => {\n    const node = containerRef.current;\n    if (!node) return;\n\n    const observer = new ResizeObserver((entries) => {\n      const width = entries[0]?.contentRect.width ?? 0;\n      if (Math.abs(width - lastWidthRef.current) < 1) return;\n      lastWidthRef.current = width;\n      setVisibleCount(null);\n    });\n    observer.observe(node);\n\n    return () => observer.disconnect();\n  }, [groups]);\n\n  const isMeasuring = visibleCount === null;\n  const effectiveVisible = visibleCount ?? groups.length;\n  const overflowCount = groups.length - effectiveVisible;\n  const hasOverflow = !isMeasuring && overflowCount > 0;\n\n  const allGroupsTooltip = (\n    <div className=\"flex flex-wrap gap-1 max-w-[14rem]\">\n      {groups.map((g) => (\n        <div key={g.id} className=\"max-w-[10rem]\">\n          <Tag title={g.name} size=\"md\" />\n        </div>\n      ))}\n    </div>\n  );\n\n  const tagsContent = (\n    <>\n      {(isMeasuring ? groups : groups.slice(0, effectiveVisible)).map((g) => (\n        <div key={g.id} data-group-tag className=\"flex-shrink-0\">\n          <Tag title={g.name} size=\"md\" />\n        </div>\n      ))}\n      {hasOverflow && (\n        <div className=\"flex-shrink-0\">\n          <Tag title={`+${overflowCount}`} size=\"md\" />\n        </div>\n      )}\n    </>\n  );\n\n  return (\n    <>\n      <Hoverable.Root group=\"tags\">\n        <div\n          className={`relative flex justify-between items-center w-full min-w-0 ${\n            user.id ? \"cursor-pointer\" : \"\"\n          }`}\n          onClick={user.id ? () => setShowModal(true) : undefined}\n        >\n          {groups.length === 0 ? (\n            <div\n              ref={containerRef}\n              className=\"flex items-center gap-1 overflow-hidden flex-nowrap min-w-0 -mr-7\"\n            >\n              <Text as=\"span\" secondaryBody text03>\n                —\n              </Text>\n            </div>\n          ) : (\n            <SimpleTooltip\n              side=\"bottom\"\n              align=\"start\"\n              tooltip={allGroupsTooltip}\n              disabled={!hasOverflow}\n              className=\"bg-background-neutral-01 shadow-sm\"\n              delayDuration={200}\n            >\n              <div\n                ref={containerRef}\n                className=\"flex items-center gap-1 overflow-hidden flex-nowrap min-w-0 -mr-7\"\n              >\n                {tagsContent}\n              </div>\n            </SimpleTooltip>\n          )}\n          {user.id && (\n            <Hoverable.Item group=\"tags\" variant=\"opacity-on-hover\">\n              <Button\n                icon={SvgEdit}\n                prominence=\"tertiary\"\n                tooltip=\"Edit\"\n                tooltipSide=\"left\"\n                onClick={(e) => {\n                  e.stopPropagation();\n                  setShowModal(true);\n                }}\n              />\n            </Hoverable.Item>\n          )}\n        </div>\n      </Hoverable.Root>\n      {showModal && user.id != null && (\n        <EditUserModal\n          user={{ ...user, id: user.id }}\n          onClose={() => setShowModal(false)}\n          onMutate={onMutate}\n        />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/InviteUsersModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useCallback } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { SvgUsers, SvgAlertTriangle } from \"@opal/icons\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport InputChipField from \"@/refresh-components/inputs/InputChipField\";\nimport type { ChipItem } from \"@/refresh-components/inputs/InputChipField\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { toast } from \"@/hooks/useToast\";\nimport { inviteUsers } from \"./svc\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nconst EMAIL_REGEX = /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/;\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\ninterface InviteUsersModalProps {\n  open: boolean;\n  onOpenChange: (open: boolean) => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function InviteUsersModal({\n  open,\n  onOpenChange,\n}: InviteUsersModalProps) {\n  const [chips, setChips] = useState<ChipItem[]>([]);\n  const [inputValue, setInputValue] = useState(\"\");\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  /** Parse a comma-separated string into de-duped ChipItems */\n  function parseEmails(value: string, existing: ChipItem[]): ChipItem[] {\n    const entries = value\n      .split(\",\")\n      .map((e) => e.trim().toLowerCase())\n      .filter(Boolean);\n\n    const newChips: ChipItem[] = [];\n    for (const email of entries) {\n      const alreadyAdded =\n        existing.some((c) => c.label === email) ||\n        newChips.some((c) => c.label === email);\n      if (!alreadyAdded) {\n        newChips.push({\n          id: email,\n          label: email,\n          error: !EMAIL_REGEX.test(email),\n        });\n      }\n    }\n    return newChips;\n  }\n\n  function addEmail(value: string) {\n    const newChips = parseEmails(value, chips);\n    if (newChips.length > 0) {\n      setChips((prev) => [...prev, ...newChips]);\n    }\n    setInputValue(\"\");\n  }\n\n  function removeChip(id: string) {\n    setChips((prev) => prev.filter((c) => c.id !== id));\n  }\n\n  const handleClose = useCallback(() => {\n    onOpenChange(false);\n    // Reset state after close animation\n    setTimeout(() => {\n      setChips([]);\n      setInputValue(\"\");\n      setIsSubmitting(false);\n    }, 200);\n  }, [onOpenChange]);\n\n  /** Intercept backdrop/ESC closes so state is always reset */\n  const handleOpenChange = useCallback(\n    (next: boolean) => {\n      if (!next) {\n        if (!isSubmitting) handleClose();\n      } else {\n        onOpenChange(next);\n      }\n    },\n    [handleClose, isSubmitting, onOpenChange]\n  );\n\n  async function handleInvite() {\n    // Flush any pending text in the input into chips synchronously\n    const pending = inputValue.trim();\n    const allChips = pending\n      ? [...chips, ...parseEmails(pending, chips)]\n      : chips;\n\n    if (pending) {\n      setChips(allChips);\n      setInputValue(\"\");\n    }\n\n    const validEmails = allChips.filter((c) => !c.error).map((c) => c.label);\n\n    if (validEmails.length === 0) {\n      toast.error(\"Please add at least one valid email address\");\n      return;\n    }\n\n    setIsSubmitting(true);\n    try {\n      await inviteUsers(validEmails);\n      toast.success(\n        `Invited ${validEmails.length} user${validEmails.length > 1 ? \"s\" : \"\"}`\n      );\n      handleClose();\n    } catch (err) {\n      toast.error(\n        err instanceof Error ? err.message : \"Failed to invite users\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  }\n\n  return (\n    <Modal open={open} onOpenChange={handleOpenChange}>\n      <Modal.Content width=\"sm\" height=\"fit\">\n        <Modal.Header\n          icon={SvgUsers}\n          title=\"Invite Users\"\n          onClose={isSubmitting ? undefined : handleClose}\n        />\n\n        <Modal.Body>\n          <InputChipField\n            chips={chips}\n            onRemoveChip={removeChip}\n            onAdd={addEmail}\n            value={inputValue}\n            onChange={setInputValue}\n            placeholder=\"Add an email and press enter\"\n            layout=\"stacked\"\n          />\n          {chips.some((c) => c.error) && (\n            <div className=\"flex items-center gap-1 pt-1\">\n              <SvgAlertTriangle\n                size={14}\n                className=\"text-status-warning-05 shrink-0\"\n              />\n              <Text secondaryBody text03>\n                Some email addresses are invalid and will be skipped.\n              </Text>\n            </div>\n          )}\n        </Modal.Body>\n\n        <Modal.Footer>\n          <BasicModalFooter\n            cancel={\n              <Button\n                disabled={isSubmitting}\n                prominence=\"tertiary\"\n                onClick={handleClose}\n              >\n                Cancel\n              </Button>\n            }\n            submit={\n              <Button\n                disabled={\n                  isSubmitting ||\n                  chips.length === 0 ||\n                  chips.every((c) => c.error)\n                }\n                onClick={handleInvite}\n              >\n                Invite\n              </Button>\n            }\n          />\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UserActionModals.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { SvgUserPlus, SvgUserX, SvgXCircle, SvgKey } from \"@opal/icons\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  deactivateUser,\n  activateUser,\n  deleteUser,\n  cancelInvite,\n  resetPassword,\n} from \"./svc\";\n\n// ---------------------------------------------------------------------------\n// Shared helper\n// ---------------------------------------------------------------------------\n\nasync function runAction(\n  action: () => Promise<void>,\n  successMessage: string,\n  onDone: () => void,\n  setIsSubmitting: (v: boolean) => void\n) {\n  setIsSubmitting(true);\n  try {\n    await action();\n    onDone();\n    toast.success(successMessage);\n  } catch (err) {\n    toast.error(err instanceof Error ? err.message : \"An error occurred\");\n  } finally {\n    setIsSubmitting(false);\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Cancel Invite Modal\n// ---------------------------------------------------------------------------\n\ninterface CancelInviteModalProps {\n  email: string;\n  onClose: () => void;\n  onMutate: () => void;\n}\n\nexport function CancelInviteModal({\n  email,\n  onClose,\n  onMutate,\n}: CancelInviteModalProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  return (\n    <ConfirmationModalLayout\n      icon={(props) => (\n        <SvgUserX {...props} className=\"text-action-danger-05\" />\n      )}\n      title=\"Cancel Invite\"\n      onClose={isSubmitting ? undefined : onClose}\n      submit={\n        <Button\n          disabled={isSubmitting}\n          variant=\"danger\"\n          onClick={() =>\n            runAction(\n              () => cancelInvite(email),\n              \"Invite cancelled\",\n              () => {\n                onMutate();\n                onClose();\n              },\n              setIsSubmitting\n            )\n          }\n        >\n          Cancel Invite\n        </Button>\n      }\n    >\n      <Text as=\"p\" text03>\n        <Text as=\"span\" text05>\n          {email}\n        </Text>{\" \"}\n        will no longer be able to join Onyx with this invite.\n      </Text>\n    </ConfirmationModalLayout>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Deactivate User Modal\n// ---------------------------------------------------------------------------\n\ninterface DeactivateUserModalProps {\n  email: string;\n  onClose: () => void;\n  onMutate: () => void;\n}\n\nexport function DeactivateUserModal({\n  email,\n  onClose,\n  onMutate,\n}: DeactivateUserModalProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  return (\n    <ConfirmationModalLayout\n      icon={(props) => (\n        <SvgUserX {...props} className=\"text-action-danger-05\" />\n      )}\n      title=\"Deactivate User\"\n      onClose={isSubmitting ? undefined : onClose}\n      submit={\n        <Button\n          disabled={isSubmitting}\n          variant=\"danger\"\n          onClick={() =>\n            runAction(\n              () => deactivateUser(email),\n              \"User deactivated\",\n              () => {\n                onMutate();\n                onClose();\n              },\n              setIsSubmitting\n            )\n          }\n        >\n          Deactivate\n        </Button>\n      }\n    >\n      <Text as=\"p\" text03>\n        <Text as=\"span\" text05>\n          {email}\n        </Text>{\" \"}\n        will immediately lose access to Onyx. Their sessions and agents will be\n        preserved. Their license seat will be freed. You can reactivate this\n        account later.\n      </Text>\n    </ConfirmationModalLayout>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Activate User Modal\n// ---------------------------------------------------------------------------\n\ninterface ActivateUserModalProps {\n  email: string;\n  onClose: () => void;\n  onMutate: () => void;\n}\n\nexport function ActivateUserModal({\n  email,\n  onClose,\n  onMutate,\n}: ActivateUserModalProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  return (\n    <ConfirmationModalLayout\n      icon={SvgUserPlus}\n      title=\"Activate User\"\n      onClose={isSubmitting ? undefined : onClose}\n      submit={\n        <Button\n          disabled={isSubmitting}\n          onClick={() =>\n            runAction(\n              () => activateUser(email),\n              \"User activated\",\n              () => {\n                onMutate();\n                onClose();\n              },\n              setIsSubmitting\n            )\n          }\n        >\n          Activate\n        </Button>\n      }\n    >\n      <Text as=\"p\" text03>\n        <Text as=\"span\" text05>\n          {email}\n        </Text>{\" \"}\n        will regain access to Onyx.\n      </Text>\n    </ConfirmationModalLayout>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Delete User Modal\n// ---------------------------------------------------------------------------\n\ninterface DeleteUserModalProps {\n  email: string;\n  onClose: () => void;\n  onMutate: () => void;\n}\n\nexport function DeleteUserModal({\n  email,\n  onClose,\n  onMutate,\n}: DeleteUserModalProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  return (\n    <ConfirmationModalLayout\n      icon={(props) => (\n        <SvgUserX {...props} className=\"text-action-danger-05\" />\n      )}\n      title=\"Delete User\"\n      onClose={isSubmitting ? undefined : onClose}\n      submit={\n        <Button\n          disabled={isSubmitting}\n          variant=\"danger\"\n          onClick={() =>\n            runAction(\n              () => deleteUser(email),\n              \"User deleted\",\n              () => {\n                onMutate();\n                onClose();\n              },\n              setIsSubmitting\n            )\n          }\n        >\n          Delete\n        </Button>\n      }\n    >\n      <Text as=\"p\" text03>\n        <Text as=\"span\" text05>\n          {email}\n        </Text>{\" \"}\n        will be permanently removed from Onyx. All of their session history will\n        be deleted. Deletion cannot be undone.\n      </Text>\n    </ConfirmationModalLayout>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Reset Password Modal\n// ---------------------------------------------------------------------------\n\ninterface ResetPasswordModalProps {\n  email: string;\n  onClose: () => void;\n}\n\nexport function ResetPasswordModal({\n  email,\n  onClose,\n}: ResetPasswordModalProps) {\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const [newPassword, setNewPassword] = useState<string | null>(null);\n\n  const handleClose = () => {\n    onClose();\n    setNewPassword(null);\n  };\n\n  return (\n    <ConfirmationModalLayout\n      icon={SvgKey}\n      title={newPassword ? \"Password Reset\" : \"Reset Password\"}\n      onClose={isSubmitting ? undefined : handleClose}\n      submit={\n        newPassword ? (\n          <Button onClick={handleClose}>Done</Button>\n        ) : (\n          <Button\n            disabled={isSubmitting}\n            variant=\"danger\"\n            onClick={async () => {\n              setIsSubmitting(true);\n              try {\n                const result = await resetPassword(email);\n                setNewPassword(result.new_password);\n              } catch (err) {\n                toast.error(\n                  err instanceof Error\n                    ? err.message\n                    : \"Failed to reset password\"\n                );\n              } finally {\n                setIsSubmitting(false);\n              }\n            }}\n          >\n            Reset Password\n          </Button>\n        )\n      }\n    >\n      {newPassword ? (\n        <div className=\"flex flex-col gap-2\">\n          <Text as=\"p\" text03>\n            The password for{\" \"}\n            <Text as=\"span\" text05>\n              {email}\n            </Text>{\" \"}\n            has been reset. Copy the new password below — it will not be shown\n            again.\n          </Text>\n          <code className=\"rounded-sm bg-background-neutral-02 px-3 py-2 text-sm select-all\">\n            {newPassword}\n          </code>\n        </div>\n      ) : (\n        <Text as=\"p\" text03>\n          This will generate a new random password for{\" \"}\n          <Text as=\"span\" text05>\n            {email}\n          </Text>\n          . Their current password will stop working immediately.\n        </Text>\n      )}\n    </ConfirmationModalLayout>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UserFilters.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport {\n  SvgCheck,\n  SvgSlack,\n  SvgUser,\n  SvgUserManage,\n  SvgUsers,\n} from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport { FilterButton } from \"@opal/components\";\nimport Popover from \"@/refresh-components/Popover\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport ShadowDiv from \"@/refresh-components/ShadowDiv\";\nimport {\n  UserRole,\n  UserStatus,\n  USER_ROLE_LABELS,\n  USER_STATUS_LABELS,\n} from \"@/lib/types\";\nimport { NEXT_PUBLIC_CLOUD_ENABLED } from \"@/lib/constants\";\nimport type { GroupOption, StatusFilter, StatusCountMap } from \"./interfaces\";\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst VISIBLE_FILTER_ROLES: UserRole[] = [\n  UserRole.ADMIN,\n  UserRole.GLOBAL_CURATOR,\n  UserRole.BASIC,\n  UserRole.SLACK_USER,\n];\n\nconst FILTERABLE_ROLES = VISIBLE_FILTER_ROLES.map(\n  (role) => [role, USER_ROLE_LABELS[role]] as [UserRole, string]\n);\n\nconst FILTERABLE_STATUSES = (\n  Object.entries(USER_STATUS_LABELS) as [UserStatus, string][]\n).filter(\n  ([value]) => value !== UserStatus.REQUESTED || NEXT_PUBLIC_CLOUD_ENABLED\n);\n\nconst ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {\n  [UserRole.ADMIN]: SvgUserManage,\n  [UserRole.SLACK_USER]: SvgSlack,\n};\n\n/** Map UserStatus enum values to the keys returned by the counts endpoint. */\nconst STATUS_COUNT_KEY: Record<UserStatus, keyof StatusCountMap> = {\n  [UserStatus.ACTIVE]: \"active\",\n  [UserStatus.INACTIVE]: \"inactive\",\n  [UserStatus.INVITED]: \"invited\",\n  [UserStatus.REQUESTED]: \"requested\",\n};\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nfunction CountBadge({ count }: { count: number | undefined }) {\n  return (\n    <Text as=\"span\" secondaryBody text03>\n      {count ?? 0}\n    </Text>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\ninterface UserFiltersProps {\n  selectedRoles: UserRole[];\n  onRolesChange: (roles: UserRole[]) => void;\n  selectedGroups: number[];\n  onGroupsChange: (groupIds: number[]) => void;\n  groups: GroupOption[];\n  selectedStatuses: StatusFilter;\n  onStatusesChange: (statuses: StatusFilter) => void;\n  roleCounts: Record<string, number>;\n  statusCounts: StatusCountMap;\n}\n\nexport default function UserFilters({\n  selectedRoles,\n  onRolesChange,\n  selectedGroups,\n  onGroupsChange,\n  groups,\n  selectedStatuses,\n  onStatusesChange,\n  roleCounts,\n  statusCounts,\n}: UserFiltersProps) {\n  const hasRoleFilter = selectedRoles.length > 0;\n  const hasGroupFilter = selectedGroups.length > 0;\n  const hasStatusFilter = selectedStatuses.length > 0;\n  const [groupSearch, setGroupSearch] = useState(\"\");\n  const [groupPopoverOpen, setGroupPopoverOpen] = useState(false);\n\n  const toggleRole = (role: UserRole) => {\n    if (selectedRoles.includes(role)) {\n      onRolesChange(selectedRoles.filter((r) => r !== role));\n    } else {\n      onRolesChange([...selectedRoles, role]);\n    }\n  };\n\n  const toggleGroup = (groupId: number) => {\n    if (selectedGroups.includes(groupId)) {\n      onGroupsChange(selectedGroups.filter((id) => id !== groupId));\n    } else {\n      onGroupsChange([...selectedGroups, groupId]);\n    }\n  };\n\n  const toggleStatus = (status: UserStatus) => {\n    if (selectedStatuses.includes(status)) {\n      onStatusesChange(selectedStatuses.filter((s) => s !== status));\n    } else {\n      onStatusesChange([...selectedStatuses, status]);\n    }\n  };\n\n  const roleLabel = hasRoleFilter\n    ? FILTERABLE_ROLES.filter(([role]) => selectedRoles.includes(role))\n        .map(([, label]) => label)\n        .slice(0, 2)\n        .join(\", \") +\n      (selectedRoles.length > 2 ? `, +${selectedRoles.length - 2}` : \"\")\n    : \"All Account Types\";\n\n  const groupLabel = hasGroupFilter\n    ? groups\n        .filter((g) => selectedGroups.includes(g.id))\n        .map((g) => g.name)\n        .slice(0, 2)\n        .join(\", \") +\n      (selectedGroups.length > 2 ? `, +${selectedGroups.length - 2}` : \"\")\n    : \"All Groups\";\n\n  const statusLabel = hasStatusFilter\n    ? FILTERABLE_STATUSES.filter(([status]) =>\n        selectedStatuses.includes(status)\n      )\n        .map(([, label]) => label)\n        .slice(0, 2)\n        .join(\", \") +\n      (selectedStatuses.length > 2 ? `, +${selectedStatuses.length - 2}` : \"\")\n    : \"All Status\";\n\n  const filteredGroups = groupSearch\n    ? groups.filter((g) =>\n        g.name.toLowerCase().includes(groupSearch.toLowerCase())\n      )\n    : groups;\n\n  return (\n    <div className=\"flex gap-2\">\n      {/* Role filter */}\n      <Popover>\n        <Popover.Trigger asChild>\n          <FilterButton\n            aria-label=\"Filter by role\"\n            icon={SvgUsers}\n            active={hasRoleFilter}\n            onClear={() => onRolesChange([])}\n          >\n            {roleLabel}\n          </FilterButton>\n        </Popover.Trigger>\n        <Popover.Content align=\"start\">\n          <div className=\"flex flex-col gap-1 p-1 min-w-[200px]\">\n            <LineItem\n              icon={!hasRoleFilter ? SvgCheck : SvgUsers}\n              selected={!hasRoleFilter}\n              emphasized={!hasRoleFilter}\n              onClick={() => onRolesChange([])}\n            >\n              All Account Types\n            </LineItem>\n            {FILTERABLE_ROLES.map(([role, label]) => {\n              const isSelected = selectedRoles.includes(role);\n              const roleIcon = ROLE_ICONS[role] ?? SvgUser;\n              return (\n                <LineItem\n                  key={role}\n                  icon={isSelected ? SvgCheck : roleIcon}\n                  selected={isSelected}\n                  emphasized={isSelected}\n                  onClick={() => toggleRole(role)}\n                  rightChildren={<CountBadge count={roleCounts[role]} />}\n                >\n                  {label}\n                </LineItem>\n              );\n            })}\n          </div>\n        </Popover.Content>\n      </Popover>\n\n      {/* Groups filter */}\n      <Popover\n        open={groupPopoverOpen}\n        onOpenChange={(open) => {\n          setGroupPopoverOpen(open);\n          if (!open) setGroupSearch(\"\");\n        }}\n      >\n        <Popover.Trigger asChild>\n          <FilterButton\n            aria-label=\"Filter by group\"\n            icon={SvgUsers}\n            active={hasGroupFilter}\n            onClear={() => onGroupsChange([])}\n          >\n            {groupLabel}\n          </FilterButton>\n        </Popover.Trigger>\n        <Popover.Content align=\"start\">\n          <div className=\"flex flex-col gap-1 p-1 min-w-[200px]\">\n            <InputTypeIn\n              value={groupSearch}\n              onChange={(e) => setGroupSearch(e.target.value)}\n              placeholder=\"Search groups...\"\n              leftSearchIcon\n              variant=\"internal\"\n            />\n            <LineItem\n              icon={!hasGroupFilter ? SvgCheck : SvgUsers}\n              selected={!hasGroupFilter}\n              emphasized={!hasGroupFilter}\n              onClick={() => onGroupsChange([])}\n            >\n              All Groups\n            </LineItem>\n            <ShadowDiv className=\"flex flex-col gap-1 max-h-[240px]\">\n              {filteredGroups.map((group) => {\n                const isSelected = selectedGroups.includes(group.id);\n                return (\n                  <LineItem\n                    key={group.id}\n                    icon={isSelected ? SvgCheck : SvgUsers}\n                    selected={isSelected}\n                    emphasized={isSelected}\n                    onClick={() => toggleGroup(group.id)}\n                    rightChildren={<CountBadge count={group.memberCount} />}\n                  >\n                    {group.name}\n                  </LineItem>\n                );\n              })}\n              {filteredGroups.length === 0 && (\n                <Text as=\"span\" secondaryBody text03 className=\"px-2 py-1.5\">\n                  No groups found\n                </Text>\n              )}\n            </ShadowDiv>\n          </div>\n        </Popover.Content>\n      </Popover>\n\n      {/* Status filter */}\n      <Popover>\n        <Popover.Trigger asChild>\n          <FilterButton\n            aria-label=\"Filter by status\"\n            icon={SvgUsers}\n            active={hasStatusFilter}\n            onClear={() => onStatusesChange([])}\n          >\n            {statusLabel}\n          </FilterButton>\n        </Popover.Trigger>\n        <Popover.Content align=\"start\">\n          <div className=\"flex flex-col gap-1 p-1 min-w-[200px]\">\n            <LineItem\n              icon={!hasStatusFilter ? SvgCheck : SvgUser}\n              selected={!hasStatusFilter}\n              emphasized={!hasStatusFilter}\n              onClick={() => onStatusesChange([])}\n            >\n              All Status\n            </LineItem>\n            {FILTERABLE_STATUSES.map(([status, label]) => {\n              const isSelected = selectedStatuses.includes(status);\n              const countKey = STATUS_COUNT_KEY[status];\n              return (\n                <LineItem\n                  key={status}\n                  icon={isSelected ? SvgCheck : SvgUser}\n                  selected={isSelected}\n                  emphasized={isSelected}\n                  onClick={() => toggleStatus(status)}\n                  rightChildren={<CountBadge count={statusCounts[countKey]} />}\n                >\n                  {label}\n                </LineItem>\n              );\n            })}\n          </div>\n        </Popover.Content>\n      </Popover>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UserRoleCell.tsx",
    "content": "\"use client\";\n\nimport { useState, useRef } from \"react\";\nimport { UserRole, USER_ROLE_LABELS } from \"@/lib/types\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { OpenButton } from \"@opal/components\";\nimport { Disabled } from \"@opal/core\";\nimport {\n  SvgCheck,\n  SvgGlobe,\n  SvgUser,\n  SvgSlack,\n  SvgUserManage,\n} from \"@opal/icons\";\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Popover from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { toast } from \"@/hooks/useToast\";\nimport { setUserRole } from \"./svc\";\nimport type { UserRow } from \"./interfaces\";\n\nconst ROLE_ICONS: Partial<Record<UserRole, IconFunctionComponent>> = {\n  [UserRole.ADMIN]: SvgUserManage,\n  [UserRole.GLOBAL_CURATOR]: SvgGlobe,\n  [UserRole.SLACK_USER]: SvgSlack,\n};\n\nconst SELECTABLE_ROLES = [\n  UserRole.ADMIN,\n  UserRole.GLOBAL_CURATOR,\n  UserRole.BASIC,\n] as const;\n\ninterface UserRoleCellProps {\n  user: UserRow;\n  onMutate: () => void;\n}\n\nexport default function UserRoleCell({ user, onMutate }: UserRoleCellProps) {\n  const [isUpdating, setIsUpdating] = useState(false);\n  const [open, setOpen] = useState(false);\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const isUpdatingRef = useRef(false);\n\n  if (!user.role) {\n    return (\n      <Text as=\"span\" secondaryBody text03>\n        —\n      </Text>\n    );\n  }\n\n  const applyRole = async (newRole: UserRole) => {\n    if (isUpdatingRef.current) return;\n    isUpdatingRef.current = true;\n    setIsUpdating(true);\n    try {\n      await setUserRole(user.email, newRole);\n      toast.success(\"Role updated\");\n      onMutate();\n    } catch (err) {\n      toast.error(err instanceof Error ? err.message : \"Failed to update role\");\n      onMutate();\n    } finally {\n      setIsUpdating(false);\n      isUpdatingRef.current = false;\n    }\n  };\n\n  const handleSelect = (role: UserRole) => {\n    if (role === user.role) {\n      setOpen(false);\n      return;\n    }\n    setOpen(false);\n    void applyRole(role);\n  };\n\n  const currentIcon = ROLE_ICONS[user.role] ?? SvgUser;\n\n  const visibleRoles = isPaidEnterpriseFeaturesEnabled\n    ? SELECTABLE_ROLES\n    : SELECTABLE_ROLES.filter((r) => r !== UserRole.GLOBAL_CURATOR);\n\n  const roleItems = visibleRoles.map((role) => {\n    const isSelected = user.role === role;\n    const icon = ROLE_ICONS[role] ?? SvgUser;\n    return (\n      <LineItem\n        key={role}\n        icon={isSelected ? SvgCheck : icon}\n        selected={isSelected}\n        emphasized={isSelected}\n        onClick={() => handleSelect(role)}\n      >\n        {USER_ROLE_LABELS[role]}\n      </LineItem>\n    );\n  });\n\n  return (\n    <Disabled disabled={isUpdating}>\n      <Popover open={open} onOpenChange={setOpen}>\n        <Popover.Trigger asChild>\n          <OpenButton\n            icon={currentIcon}\n            variant=\"select-tinted\"\n            width=\"full\"\n            justifyContent=\"between\"\n            roundingVariant=\"sm\"\n          >\n            {USER_ROLE_LABELS[user.role]}\n          </OpenButton>\n        </Popover.Trigger>\n        <Popover.Content align=\"start\">\n          <div className=\"flex flex-col gap-1 p-1 min-w-[160px]\">\n            {roleItems}\n          </div>\n        </Popover.Content>\n      </Popover>\n    </Disabled>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UserRowActions.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgMoreHorizontal,\n  SvgUsers,\n  SvgXCircle,\n  SvgUserCheck,\n  SvgUserPlus,\n  SvgUserX,\n  SvgKey,\n} from \"@opal/icons\";\nimport { Disabled } from \"@opal/core\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Popover from \"@/refresh-components/Popover\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { UserStatus } from \"@/lib/types\";\nimport { toast } from \"@/hooks/useToast\";\nimport { approveRequest } from \"./svc\";\nimport EditUserModal from \"./EditUserModal\";\nimport {\n  CancelInviteModal,\n  DeactivateUserModal,\n  ActivateUserModal,\n  DeleteUserModal,\n  ResetPasswordModal,\n} from \"./UserActionModals\";\nimport type { UserRow } from \"./interfaces\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nenum Modal {\n  DEACTIVATE = \"deactivate\",\n  ACTIVATE = \"activate\",\n  DELETE = \"delete\",\n  CANCEL_INVITE = \"cancelInvite\",\n  EDIT_GROUPS = \"editGroups\",\n  RESET_PASSWORD = \"resetPassword\",\n}\n\ninterface UserRowActionsProps {\n  user: UserRow;\n  onMutate: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nexport default function UserRowActions({\n  user,\n  onMutate,\n}: UserRowActionsProps) {\n  const [modal, setModal] = useState<Modal | null>(null);\n  const [popoverOpen, setPopoverOpen] = useState(false);\n\n  const openModal = (type: Modal) => {\n    setPopoverOpen(false);\n    setModal(type);\n  };\n\n  const closeModal = () => setModal(null);\n\n  const closeAndMutate = () => {\n    setModal(null);\n    onMutate();\n  };\n\n  // Status-aware action menus\n  const actionButtons = (() => {\n    // SCIM-managed users get limited actions — most changes would be\n    // overwritten on the next IdP sync.\n    if (user.is_scim_synced) {\n      return (\n        <>\n          {user.id && (\n            <LineItem\n              icon={SvgUsers}\n              onClick={() => openModal(Modal.EDIT_GROUPS)}\n            >\n              Groups &amp; Roles\n            </LineItem>\n          )}\n          <Disabled disabled>\n            <LineItem danger icon={SvgUserX}>\n              Deactivate User\n            </LineItem>\n          </Disabled>\n          <Separator paddingXRem={0.5} />\n          <Text as=\"p\" secondaryBody text03 className=\"px-3 py-1\">\n            This is a synced SCIM user managed by your identity provider.\n          </Text>\n        </>\n      );\n    }\n\n    switch (user.status) {\n      case UserStatus.INVITED:\n        return (\n          <LineItem\n            danger\n            icon={SvgXCircle}\n            onClick={() => openModal(Modal.CANCEL_INVITE)}\n          >\n            Cancel Invite\n          </LineItem>\n        );\n\n      case UserStatus.REQUESTED:\n        return (\n          <LineItem\n            icon={SvgUserCheck}\n            onClick={() => {\n              setPopoverOpen(false);\n              void (async () => {\n                try {\n                  await approveRequest(user.email);\n                  onMutate();\n                  toast.success(\"Request approved\");\n                } catch (err) {\n                  toast.error(\n                    err instanceof Error ? err.message : \"An error occurred\"\n                  );\n                }\n              })();\n            }}\n          >\n            Approve\n          </LineItem>\n        );\n\n      case UserStatus.ACTIVE:\n        return (\n          <>\n            {user.id && (\n              <LineItem\n                icon={SvgUsers}\n                onClick={() => openModal(Modal.EDIT_GROUPS)}\n              >\n                Groups &amp; Roles\n              </LineItem>\n            )}\n            <LineItem\n              icon={SvgKey}\n              onClick={() => openModal(Modal.RESET_PASSWORD)}\n            >\n              Reset Password\n            </LineItem>\n            <Separator paddingXRem={0.5} />\n            <LineItem\n              danger\n              icon={SvgUserX}\n              onClick={() => openModal(Modal.DEACTIVATE)}\n            >\n              Deactivate User\n            </LineItem>\n          </>\n        );\n\n      case UserStatus.INACTIVE:\n        return (\n          <>\n            {user.id && (\n              <LineItem\n                icon={SvgUsers}\n                onClick={() => openModal(Modal.EDIT_GROUPS)}\n              >\n                Groups &amp; Roles\n              </LineItem>\n            )}\n            <LineItem\n              icon={SvgKey}\n              onClick={() => openModal(Modal.RESET_PASSWORD)}\n            >\n              Reset Password\n            </LineItem>\n            <Separator paddingXRem={0.5} />\n            <LineItem\n              icon={SvgUserPlus}\n              onClick={() => openModal(Modal.ACTIVATE)}\n            >\n              Activate User\n            </LineItem>\n            <Separator paddingXRem={0.5} />\n            <LineItem\n              danger\n              icon={SvgUserX}\n              onClick={() => openModal(Modal.DELETE)}\n            >\n              Delete User\n            </LineItem>\n          </>\n        );\n\n      default: {\n        const _exhaustive: never = user.status;\n        return null;\n      }\n    }\n  })();\n\n  return (\n    <>\n      <Popover open={popoverOpen} onOpenChange={setPopoverOpen}>\n        <Popover.Trigger asChild>\n          <Button prominence=\"tertiary\" icon={SvgMoreHorizontal} />\n        </Popover.Trigger>\n        <Popover.Content align=\"end\" width=\"sm\">\n          <Section\n            gap={0.5}\n            height=\"auto\"\n            alignItems=\"stretch\"\n            justifyContent=\"start\"\n          >\n            {actionButtons}\n          </Section>\n        </Popover.Content>\n      </Popover>\n\n      {modal === Modal.EDIT_GROUPS && user.id && (\n        <EditUserModal\n          user={user as UserRow & { id: string }}\n          onClose={closeModal}\n          onMutate={onMutate}\n        />\n      )}\n\n      {modal === Modal.CANCEL_INVITE && (\n        <CancelInviteModal\n          email={user.email}\n          onClose={closeModal}\n          onMutate={onMutate}\n        />\n      )}\n\n      {modal === Modal.DEACTIVATE && (\n        <DeactivateUserModal\n          email={user.email}\n          onClose={closeModal}\n          onMutate={onMutate}\n        />\n      )}\n\n      {modal === Modal.ACTIVATE && (\n        <ActivateUserModal\n          email={user.email}\n          onClose={closeModal}\n          onMutate={onMutate}\n        />\n      )}\n\n      {modal === Modal.DELETE && (\n        <DeleteUserModal\n          email={user.email}\n          onClose={closeModal}\n          onMutate={onMutate}\n        />\n      )}\n\n      {modal === Modal.RESET_PASSWORD && (\n        <ResetPasswordModal email={user.email} onClose={closeModal} />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UsersSummary.tsx",
    "content": "import { SvgArrowUpRight, SvgFilterPlus, SvgUserSync } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Card from \"@/refresh-components/cards/Card\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Link from \"next/link\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\n\n// ---------------------------------------------------------------------------\n// Stats cell — number + label + hover filter icon\n// ---------------------------------------------------------------------------\n\ntype StatCellProps = {\n  value: number | null;\n  label: string;\n  onFilter?: () => void;\n};\n\nfunction StatCell({ value, label, onFilter }: StatCellProps) {\n  const display = value === null ? \"\\u2014\" : value.toLocaleString();\n\n  return (\n    <Hoverable.Root group=\"stat\" widthVariant=\"full\">\n      <div\n        className={`relative flex flex-col items-start gap-0.5 w-full p-2 rounded-08 transition-colors ${\n          onFilter ? \"cursor-pointer hover:bg-background-tint-02\" : \"\"\n        }`}\n        onClick={onFilter}\n      >\n        <Text as=\"span\" mainUiAction text04>\n          {display}\n        </Text>\n        <Text as=\"span\" secondaryBody text03>\n          {label}\n        </Text>\n        {onFilter && (\n          <div className=\"absolute right-1 top-1\">\n            <Hoverable.Item group=\"stat\" variant=\"opacity-on-hover\">\n              <IconButton\n                tertiary\n                icon={SvgFilterPlus}\n                tooltip=\"Add Filter\"\n                toolTipPosition=\"left\"\n                tooltipSize=\"sm\"\n                onClick={(e) => {\n                  e.stopPropagation();\n                  onFilter();\n                }}\n              />\n            </Hoverable.Item>\n          </div>\n        )}\n      </div>\n    </Hoverable.Root>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// SCIM card\n// ---------------------------------------------------------------------------\n\nfunction ScimCard() {\n  return (\n    <Card gap={0.5} padding={0.75}>\n      <ContentAction\n        icon={SvgUserSync}\n        title=\"SCIM Sync\"\n        description=\"Users are synced from your identity provider.\"\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        paddingVariant=\"fit\"\n        rightChildren={\n          <Link href={ADMIN_ROUTES.SCIM.path}>\n            <Button prominence=\"tertiary\" rightIcon={SvgArrowUpRight} size=\"sm\">\n              Manage\n            </Button>\n          </Link>\n        }\n      />\n    </Card>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Stats bar — layout varies by SCIM status\n// ---------------------------------------------------------------------------\n\ntype UsersSummaryProps = {\n  activeUsers: number | null;\n  pendingInvites: number | null;\n  requests: number | null;\n  showScim: boolean;\n  onFilterActive?: () => void;\n  onFilterInvites?: () => void;\n  onFilterRequests?: () => void;\n};\n\nexport default function UsersSummary({\n  activeUsers,\n  pendingInvites,\n  requests,\n  showScim,\n  onFilterActive,\n  onFilterInvites,\n  onFilterRequests,\n}: UsersSummaryProps) {\n  const showRequests = requests !== null && requests > 0;\n\n  const statsCard = (\n    <Card padding={0.5}>\n      <Section flexDirection=\"row\" gap={0}>\n        <StatCell\n          value={activeUsers}\n          label=\"active users\"\n          onFilter={onFilterActive}\n        />\n        <StatCell\n          value={pendingInvites}\n          label=\"pending invites\"\n          onFilter={onFilterInvites}\n        />\n        {showRequests && (\n          <StatCell\n            value={requests}\n            label=\"requests to join\"\n            onFilter={onFilterRequests}\n          />\n        )}\n      </Section>\n    </Card>\n  );\n\n  if (showScim) {\n    return (\n      <Section\n        flexDirection=\"row\"\n        justifyContent=\"start\"\n        alignItems=\"stretch\"\n        gap={0.5}\n      >\n        {statsCard}\n        <ScimCard />\n      </Section>\n    );\n  }\n\n  // No SCIM — each stat gets its own card\n  return (\n    <Section flexDirection=\"row\" gap={0.5}>\n      <Card padding={0.5}>\n        <StatCell\n          value={activeUsers}\n          label=\"active users\"\n          onFilter={onFilterActive}\n        />\n      </Card>\n      <Card padding={0.5}>\n        <StatCell\n          value={pendingInvites}\n          label=\"pending invites\"\n          onFilter={onFilterInvites}\n        />\n      </Card>\n      {showRequests && (\n        <Card padding={0.5}>\n          <StatCell\n            value={requests}\n            label=\"requests to join\"\n            onFilter={onFilterRequests}\n          />\n        </Card>\n      )}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/UsersTable.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\nimport { Table, createTableColumns } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport { Button } from \"@opal/components\";\nimport { SvgDownload } from \"@opal/icons\";\nimport SvgNoResult from \"@opal/illustrations/no-result\";\nimport { IllustrationContent } from \"@opal/layouts\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { UserRole, UserStatus, USER_STATUS_LABELS } from \"@/lib/types\";\nimport { timeAgo } from \"@/lib/time\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { toast } from \"@/hooks/useToast\";\nimport useAdminUsers from \"@/hooks/useAdminUsers\";\nimport useGroups from \"@/hooks/useGroups\";\nimport { downloadUsersCsv } from \"./svc\";\nimport UserFilters from \"./UserFilters\";\nimport GroupsCell from \"./GroupsCell\";\nimport UserRowActions from \"./UserRowActions\";\nimport UserRoleCell from \"./UserRoleCell\";\nimport type {\n  UserRow,\n  GroupOption,\n  StatusFilter,\n  StatusCountMap,\n} from \"./interfaces\";\nimport UserAvatar from \"@/refresh-components/avatars/UserAvatar\";\nimport type { User } from \"@/lib/types\";\n\n// ---------------------------------------------------------------------------\n// Column renderers\n// ---------------------------------------------------------------------------\n\nfunction renderNameColumn(email: string, row: UserRow) {\n  return (\n    <Content\n      sizePreset=\"main-ui\"\n      variant=\"section\"\n      title={row.personal_name ?? email}\n      description={row.personal_name ? email : undefined}\n    />\n  );\n}\n\nfunction renderStatusColumn(value: UserStatus, row: UserRow) {\n  return (\n    <div className=\"flex flex-col\">\n      <Text as=\"span\" mainUiBody text03>\n        {USER_STATUS_LABELS[value] ?? value}\n      </Text>\n      {row.is_scim_synced && (\n        <Text as=\"span\" secondaryBody text03>\n          SCIM synced\n        </Text>\n      )}\n    </div>\n  );\n}\n\nfunction renderLastUpdatedColumn(value: string | null) {\n  return (\n    <Text as=\"span\" secondaryBody text03>\n      {value ? timeAgo(value) ?? \"\\u2014\" : \"\\u2014\"}\n    </Text>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Columns\n// ---------------------------------------------------------------------------\n\nconst tc = createTableColumns<UserRow>();\n\nfunction buildColumns(onMutate: () => void) {\n  return [\n    tc.qualifier({\n      content: \"icon\",\n      iconSize: \"lg\",\n      getContent: (row) => {\n        const user = {\n          email: row.email,\n          personalization: row.personal_name\n            ? { name: row.personal_name }\n            : undefined,\n        } as User;\n        return (props) => <UserAvatar user={user} size={props.size} />;\n      },\n    }),\n    tc.column(\"email\", {\n      header: \"Name\",\n      weight: 22,\n      cell: renderNameColumn,\n    }),\n    tc.column(\"groups\", {\n      header: \"Groups\",\n      weight: 24,\n      enableSorting: false,\n      cell: (value, row) => (\n        <GroupsCell groups={value} user={row} onMutate={onMutate} />\n      ),\n    }),\n    tc.column(\"role\", {\n      header: \"Account Type\",\n      weight: 16,\n      cell: (_value, row) => <UserRoleCell user={row} onMutate={onMutate} />,\n    }),\n    tc.column(\"status\", {\n      header: \"Status\",\n      weight: 14,\n      cell: renderStatusColumn,\n    }),\n    tc.column(\"updated_at\", {\n      header: \"Last Updated\",\n      weight: 14,\n      cell: renderLastUpdatedColumn,\n    }),\n    tc.actions({\n      cell: (row) => <UserRowActions user={row} onMutate={onMutate} />,\n    }),\n  ];\n}\n\n// ---------------------------------------------------------------------------\n// Component\n// ---------------------------------------------------------------------------\n\nconst PAGE_SIZE = 8;\n\ninterface UsersTableProps {\n  selectedStatuses: StatusFilter;\n  onStatusesChange: (statuses: StatusFilter) => void;\n  roleCounts: Record<string, number>;\n  statusCounts: StatusCountMap;\n}\n\nexport default function UsersTable({\n  selectedStatuses,\n  onStatusesChange,\n  roleCounts,\n  statusCounts,\n}: UsersTableProps) {\n  const [searchTerm, setSearchTerm] = useState(\"\");\n  const [selectedRoles, setSelectedRoles] = useState<UserRole[]>([]);\n  const [selectedGroups, setSelectedGroups] = useState<number[]>([]);\n\n  const { data: allGroups } = useGroups();\n\n  const groupOptions: GroupOption[] = useMemo(\n    () =>\n      (allGroups ?? []).map((g) => ({\n        id: g.id,\n        name: g.name,\n        memberCount: g.users.length,\n      })),\n    [allGroups]\n  );\n\n  const { users, isLoading, error, refresh } = useAdminUsers();\n\n  const columns = useMemo(() => buildColumns(refresh), [refresh]);\n\n  // Client-side filtering\n  const filteredUsers = useMemo(() => {\n    let result = users;\n\n    if (selectedRoles.length > 0) {\n      result = result.filter(\n        (u) => u.role !== null && selectedRoles.includes(u.role)\n      );\n    }\n\n    if (selectedStatuses.length > 0) {\n      result = result.filter((u) => selectedStatuses.includes(u.status));\n    }\n\n    if (selectedGroups.length > 0) {\n      result = result.filter((u) =>\n        u.groups.some((g) => selectedGroups.includes(g.id))\n      );\n    }\n\n    return result;\n  }, [users, selectedRoles, selectedStatuses, selectedGroups]);\n\n  if (isLoading) {\n    return (\n      <div className=\"flex justify-center py-12\">\n        <SimpleLoader className=\"h-6 w-6\" />\n      </div>\n    );\n  }\n\n  if (error) {\n    return (\n      <Text as=\"p\" secondaryBody text03>\n        Failed to load users. Please try refreshing the page.\n      </Text>\n    );\n  }\n\n  return (\n    <div className=\"flex flex-col gap-3\">\n      <InputTypeIn\n        value={searchTerm}\n        onChange={(e) => setSearchTerm(e.target.value)}\n        placeholder=\"Search users...\"\n        leftSearchIcon\n      />\n      <UserFilters\n        selectedRoles={selectedRoles}\n        onRolesChange={setSelectedRoles}\n        selectedGroups={selectedGroups}\n        onGroupsChange={setSelectedGroups}\n        groups={groupOptions}\n        selectedStatuses={selectedStatuses}\n        onStatusesChange={onStatusesChange}\n        roleCounts={roleCounts}\n        statusCounts={statusCounts}\n      />\n      <Table\n        data={filteredUsers}\n        columns={columns}\n        getRowId={(row) => row.id ?? row.email}\n        pageSize={PAGE_SIZE}\n        searchTerm={searchTerm}\n        emptyState={\n          <IllustrationContent\n            illustration={SvgNoResult}\n            title=\"No users found\"\n            description=\"No users match the current filters.\"\n          />\n        }\n        footer={{\n          leftExtra: (\n            <Button\n              icon={SvgDownload}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              tooltip=\"Download CSV\"\n              aria-label=\"Download CSV\"\n              onClick={() => {\n                downloadUsersCsv().catch((err) => {\n                  toast.error(\n                    err instanceof Error\n                      ? err.message\n                      : \"Failed to download CSV\"\n                  );\n                });\n              }}\n            />\n          ),\n        }}\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/index.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { SvgUser, SvgUserPlus } from \"@opal/icons\";\nimport { Button } from \"@opal/components\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { useScimToken } from \"@/hooks/useScimToken\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport useUserCounts from \"@/hooks/useUserCounts\";\nimport { UserStatus } from \"@/lib/types\";\nimport type { StatusFilter } from \"./interfaces\";\n\nimport UsersSummary from \"./UsersSummary\";\nimport UsersTable from \"./UsersTable\";\nimport InviteUsersModal from \"./InviteUsersModal\";\n\n// ---------------------------------------------------------------------------\n// Users page content\n// ---------------------------------------------------------------------------\n\nfunction UsersContent() {\n  const isEe = usePaidEnterpriseFeaturesEnabled();\n\n  const { data: scimToken } = useScimToken();\n  const showScim = isEe && !!scimToken;\n\n  const { activeCount, invitedCount, pendingCount, roleCounts, statusCounts } =\n    useUserCounts();\n\n  const [selectedStatuses, setSelectedStatuses] = useState<StatusFilter>([]);\n\n  const toggleStatus = (target: UserStatus) => {\n    setSelectedStatuses((prev) =>\n      prev.includes(target)\n        ? prev.filter((s) => s !== target)\n        : [...prev, target]\n    );\n  };\n\n  return (\n    <>\n      <UsersSummary\n        activeUsers={activeCount}\n        pendingInvites={invitedCount}\n        requests={pendingCount}\n        showScim={showScim}\n        onFilterActive={() => toggleStatus(UserStatus.ACTIVE)}\n        onFilterInvites={() => toggleStatus(UserStatus.INVITED)}\n        onFilterRequests={() => toggleStatus(UserStatus.REQUESTED)}\n      />\n\n      <UsersTable\n        selectedStatuses={selectedStatuses}\n        onStatusesChange={setSelectedStatuses}\n        roleCounts={roleCounts}\n        statusCounts={statusCounts}\n      />\n    </>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function UsersPage() {\n  const [inviteOpen, setInviteOpen] = useState(false);\n\n  return (\n    <SettingsLayouts.Root width=\"lg\">\n      <SettingsLayouts.Header\n        title=\"Users & Requests\"\n        icon={SvgUser}\n        rightChildren={\n          <Button icon={SvgUserPlus} onClick={() => setInviteOpen(true)}>\n            Invite Users\n          </Button>\n        }\n      />\n      <SettingsLayouts.Body>\n        <UsersContent />\n      </SettingsLayouts.Body>\n\n      <InviteUsersModal open={inviteOpen} onOpenChange={setInviteOpen} />\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/interfaces.ts",
    "content": "import type { UserRole, UserStatus } from \"@/lib/types\";\n\nexport interface UserGroupInfo {\n  id: number;\n  name: string;\n}\n\nexport interface UserRow {\n  id: string | null;\n  email: string;\n  role: UserRole | null;\n  status: UserStatus;\n  is_active: boolean;\n  is_scim_synced: boolean;\n  personal_name: string | null;\n  created_at: string | null;\n  updated_at: string | null;\n  groups: UserGroupInfo[];\n}\n\nexport interface GroupOption {\n  id: number;\n  name: string;\n  memberCount?: number;\n}\n\n/** Empty array = no filter (show all). */\nexport type StatusFilter = UserStatus[];\n\n/** Keys match the UserStatus-derived labels used in filter badges. */\nexport type StatusCountMap = {\n  active?: number;\n  inactive?: number;\n  invited?: number;\n  requested?: number;\n};\n"
  },
  {
    "path": "web/src/refresh-pages/admin/UsersPage/svc.ts",
    "content": "import { UserRole } from \"@/lib/types\";\n\nasync function parseErrorDetail(\n  res: Response,\n  fallback: string\n): Promise<string> {\n  try {\n    const body = await res.json();\n    return body?.detail ?? fallback;\n  } catch {\n    return fallback;\n  }\n}\n\nexport async function deactivateUser(email: string): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/deactivate-user\", {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to deactivate user\"));\n  }\n}\n\nexport async function activateUser(email: string): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/activate-user\", {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to activate user\"));\n  }\n}\n\nexport async function deleteUser(email: string): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/delete-user\", {\n    method: \"DELETE\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to delete user\"));\n  }\n}\n\nexport async function setUserRole(\n  email: string,\n  newRole: UserRole\n): Promise<void> {\n  const res = await fetch(\"/api/manage/set-user-role\", {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email, new_role: newRole }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to update user role\"));\n  }\n}\n\nexport async function addUserToGroup(\n  groupId: number,\n  userId: string\n): Promise<void> {\n  const res = await fetch(`/api/manage/admin/user-group/${groupId}/add-users`, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_ids: [userId] }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to add user to group\"));\n  }\n}\n\nexport async function removeUserFromGroup(\n  groupId: number,\n  currentUserIds: string[],\n  userIdToRemove: string,\n  ccPairIds: number[]\n): Promise<void> {\n  const res = await fetch(`/api/manage/admin/user-group/${groupId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({\n      user_ids: currentUserIds.filter((id) => id !== userIdToRemove),\n      cc_pair_ids: ccPairIds,\n    }),\n  });\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to remove user from group\")\n    );\n  }\n}\n\nexport async function cancelInvite(email: string): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/remove-invited-user\", {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to cancel invite\"));\n  }\n}\n\nexport async function approveRequest(email: string): Promise<void> {\n  const res = await fetch(\"/api/tenants/users/invite/approve\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to approve request\"));\n  }\n}\n\nexport async function inviteUsers(emails: string[]): Promise<void> {\n  const res = await fetch(\"/api/manage/admin/users\", {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ emails }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to invite users\"));\n  }\n}\n\nexport async function resetPassword(\n  email: string\n): Promise<{ user_id: string; new_password: string }> {\n  const res = await fetch(\"/api/password/reset_password\", {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ user_email: email }),\n  });\n  if (!res.ok) {\n    throw new Error(await parseErrorDetail(res, \"Failed to reset password\"));\n  }\n  return res.json();\n}\n\nexport async function downloadUsersCsv(): Promise<void> {\n  const res = await fetch(\"/api/manage/users/download\");\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to download users CSV\")\n    );\n  }\n  const blob = await res.blob();\n  const url = URL.createObjectURL(blob);\n  const a = document.createElement(\"a\");\n  a.href = url;\n  const ts = new Date().toISOString().replace(/[:.]/g, \"-\").slice(0, 19);\n  a.download = `onyx_users_${ts}.csv`;\n  a.click();\n  URL.revokeObjectURL(url);\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/VoiceConfigurationPage.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useMemo, useState } from \"react\";\nimport {\n  AzureIcon,\n  ElevenLabsIcon,\n  IconProps,\n  OpenAIIcon,\n} from \"@/components/icons/icons\";\nimport ProviderCard from \"@/sections/admin/ProviderCard\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { FetchError } from \"@/lib/fetcher\";\nimport {\n  useVoiceProviders,\n  VoiceProviderView,\n} from \"@/hooks/useVoiceProviders\";\nimport {\n  activateVoiceProvider,\n  deactivateVoiceProvider,\n  deleteVoiceProvider,\n} from \"@/lib/admin/voice/svc\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgMicrophone, SvgSlash, SvgUnplug } from \"@opal/icons\";\nimport { Button, Text } from \"@opal/components\";\nimport { markdown } from \"@opal/utils\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport VoiceProviderSetupModal from \"@/app/admin/configuration/voice/VoiceProviderSetupModal\";\n\ninterface ModelDetails {\n  id: string;\n  label: string;\n  subtitle: string;\n  providerType: string;\n}\n\ninterface ProviderGroup {\n  providerType: string;\n  providerLabel: string;\n  models: ModelDetails[];\n}\n\n// STT Models - individual cards\nconst STT_MODELS: ModelDetails[] = [\n  {\n    id: \"whisper\",\n    label: \"Whisper\",\n    subtitle: \"OpenAI's general purpose speech recognition model.\",\n    providerType: \"openai\",\n  },\n  {\n    id: \"azure-speech-stt\",\n    label: \"Azure Speech\",\n    subtitle: \"Speech to text in Microsoft Foundry Tools.\",\n    providerType: \"azure\",\n  },\n  {\n    id: \"elevenlabs-stt\",\n    label: \"ElevenAPI\",\n    subtitle: \"ElevenLabs Speech to Text API.\",\n    providerType: \"elevenlabs\",\n  },\n];\n\n// TTS Models - grouped by provider\nconst TTS_PROVIDER_GROUPS: ProviderGroup[] = [\n  {\n    providerType: \"openai\",\n    providerLabel: \"OpenAI\",\n    models: [\n      {\n        id: \"tts-1\",\n        label: \"TTS-1\",\n        subtitle: \"OpenAI's text-to-speech model optimized for speed.\",\n        providerType: \"openai\",\n      },\n      {\n        id: \"tts-1-hd\",\n        label: \"TTS-1 HD\",\n        subtitle: \"OpenAI's text-to-speech model optimized for quality.\",\n        providerType: \"openai\",\n      },\n    ],\n  },\n  {\n    providerType: \"azure\",\n    providerLabel: \"Azure\",\n    models: [\n      {\n        id: \"azure-speech-tts\",\n        label: \"Azure Speech\",\n        subtitle: \"Text to speech in Microsoft Foundry Tools.\",\n        providerType: \"azure\",\n      },\n    ],\n  },\n  {\n    providerType: \"elevenlabs\",\n    providerLabel: \"ElevenLabs\",\n    models: [\n      {\n        id: \"elevenlabs-tts\",\n        label: \"ElevenAPI\",\n        subtitle: \"ElevenLabs Text to Speech API.\",\n        providerType: \"elevenlabs\",\n      },\n    ],\n  },\n];\n\nconst FallbackMicrophoneIcon = ({ size, className }: IconProps) => (\n  <SvgMicrophone size={size} className={className} />\n);\n\nfunction getProviderIcon(\n  providerType: string\n): React.FunctionComponent<IconProps> {\n  switch (providerType) {\n    case \"openai\":\n      return OpenAIIcon;\n    case \"azure\":\n      return AzureIcon;\n    case \"elevenlabs\":\n      return ElevenLabsIcon;\n    default:\n      return FallbackMicrophoneIcon;\n  }\n}\n\ntype ProviderMode = \"stt\" | \"tts\";\n\nfunction getProviderLabel(providerType: string): string {\n  switch (providerType) {\n    case \"openai\":\n      return \"OpenAI\";\n    case \"azure\":\n      return \"Azure\";\n    case \"elevenlabs\":\n      return \"ElevenLabs\";\n    default:\n      return providerType;\n  }\n}\n\nconst NO_DEFAULT_VALUE = \"__none__\";\n\nconst route = ADMIN_ROUTES.VOICE;\nconst pageDescription =\n  \"Configure speech-to-text and text-to-speech providers for voice input and spoken responses.\";\n\ninterface VoiceDisconnectModalProps {\n  disconnectTarget: {\n    providerId: number;\n    providerLabel: string;\n    providerType: string;\n  };\n  providers: VoiceProviderView[];\n  replacementProviderId: string | null;\n  onReplacementChange: (id: string | null) => void;\n  onClose: () => void;\n  onDisconnect: () => void;\n}\n\nfunction VoiceDisconnectModal({\n  disconnectTarget,\n  providers,\n  replacementProviderId,\n  onReplacementChange,\n  onClose,\n  onDisconnect,\n}: VoiceDisconnectModalProps) {\n  const targetProvider = providers.find(\n    (p) => p.id === disconnectTarget.providerId\n  );\n  const isActive =\n    (targetProvider?.is_default_stt ?? false) ||\n    (targetProvider?.is_default_tts ?? false);\n\n  // Find other configured providers that could serve as replacements\n  const replacementOptions = providers.filter(\n    (p) => p.id !== disconnectTarget.providerId && p.has_api_key\n  );\n\n  const needsReplacement = isActive;\n  const hasReplacements = replacementOptions.length > 0;\n\n  // Auto-select first replacement when modal opens\n  useEffect(() => {\n    if (needsReplacement && hasReplacements && !replacementProviderId) {\n      const first = replacementOptions[0];\n      if (first) onReplacementChange(String(first.id));\n    }\n  }, []); // eslint-disable-line react-hooks/exhaustive-deps\n\n  return (\n    <ConfirmationModalLayout\n      icon={SvgUnplug}\n      title={`Disconnect ${disconnectTarget.providerLabel}`}\n      description=\"Voice models\"\n      onClose={onClose}\n      submit={\n        <Button\n          variant=\"danger\"\n          onClick={onDisconnect}\n          disabled={\n            needsReplacement && hasReplacements && !replacementProviderId\n          }\n        >\n          Disconnect\n        </Button>\n      }\n    >\n      {needsReplacement ? (\n        hasReplacements ? (\n          <Section alignItems=\"start\">\n            <Text as=\"p\" color=\"text-03\">\n              {markdown(\n                `**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default. Session history will be preserved.`\n              )}\n            </Text>\n            <Section alignItems=\"start\" gap={0.25}>\n              <Text as=\"p\" color=\"text-04\">\n                Set New Default\n              </Text>\n              <InputSelect\n                value={replacementProviderId ?? undefined}\n                onValueChange={(v) => onReplacementChange(v)}\n              >\n                <InputSelect.Trigger placeholder=\"Select a replacement provider\" />\n                <InputSelect.Content>\n                  {replacementOptions.map((p) => (\n                    <InputSelect.Item\n                      key={p.id}\n                      value={String(p.id)}\n                      icon={getProviderIcon(p.provider_type)}\n                    >\n                      {getProviderLabel(p.provider_type)}\n                    </InputSelect.Item>\n                  ))}\n                  <InputSelect.Separator />\n                  <InputSelect.Item value={NO_DEFAULT_VALUE} icon={SvgSlash}>\n                    <span>\n                      <b>No Default</b>\n                      <span className=\"text-text-03\"> (Disable Voice)</span>\n                    </span>\n                  </InputSelect.Item>\n                </InputSelect.Content>\n              </InputSelect>\n            </Section>\n          </Section>\n        ) : (\n          <>\n            <Text as=\"p\" color=\"text-03\">\n              {markdown(\n                `**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default.`\n              )}\n            </Text>\n            <Text as=\"p\" color=\"text-03\">\n              Connect another provider to continue using voice.\n            </Text>\n          </>\n        )\n      ) : (\n        <>\n          <Text as=\"p\" color=\"text-03\">\n            {markdown(\n              `**${disconnectTarget.providerLabel}** models will no longer be available for voice.`\n            )}\n          </Text>\n          <Text as=\"p\" color=\"text-03\">\n            Session history will be preserved.\n          </Text>\n        </>\n      )}\n    </ConfirmationModalLayout>\n  );\n}\n\nexport default function VoiceConfigurationPage() {\n  const [modalOpen, setModalOpen] = useState(false);\n  const [selectedProvider, setSelectedProvider] = useState<string | null>(null);\n  const [editingProvider, setEditingProvider] =\n    useState<VoiceProviderView | null>(null);\n  const [modalMode, setModalMode] = useState<ProviderMode>(\"stt\");\n  const [selectedModelId, setSelectedModelId] = useState<string | null>(null);\n  const [sttActivationError, setSTTActivationError] = useState<string | null>(\n    null\n  );\n  const [ttsActivationError, setTTSActivationError] = useState<string | null>(\n    null\n  );\n  const [disconnectTarget, setDisconnectTarget] = useState<{\n    providerId: number;\n    providerLabel: string;\n    providerType: string;\n  } | null>(null);\n  const [replacementProviderId, setReplacementProviderId] = useState<\n    string | null\n  >(null);\n\n  const { providers, error, isLoading, refresh: mutate } = useVoiceProviders();\n\n  const handleConnect = (\n    providerType: string,\n    mode: ProviderMode,\n    modelId?: string\n  ) => {\n    setSelectedProvider(providerType);\n    setEditingProvider(null);\n    setModalMode(mode);\n    setSelectedModelId(modelId ?? null);\n    setModalOpen(true);\n    setSTTActivationError(null);\n    setTTSActivationError(null);\n  };\n\n  const handleEdit = (\n    provider: VoiceProviderView,\n    mode: ProviderMode,\n    modelId?: string\n  ) => {\n    setSelectedProvider(provider.provider_type);\n    setEditingProvider(provider);\n    setModalMode(mode);\n    setSelectedModelId(modelId ?? null);\n    setModalOpen(true);\n    setSTTActivationError(null);\n    setTTSActivationError(null);\n  };\n\n  const handleSetDefault = async (\n    providerId: number,\n    mode: ProviderMode,\n    modelId?: string\n  ) => {\n    const setError =\n      mode === \"stt\" ? setSTTActivationError : setTTSActivationError;\n    setError(null);\n    try {\n      const response = await activateVoiceProvider(providerId, mode, modelId);\n      if (!response.ok) {\n        const errorBody = await response.json().catch(() => ({}));\n        throw new Error(\n          typeof errorBody?.detail === \"string\"\n            ? errorBody.detail\n            : `Failed to set provider as default ${mode.toUpperCase()}.`\n        );\n      }\n      await mutate();\n    } catch (err) {\n      const message =\n        err instanceof Error ? err.message : \"Unexpected error occurred.\";\n      setError(message);\n    }\n  };\n\n  const handleDeactivate = async (providerId: number, mode: ProviderMode) => {\n    const setError =\n      mode === \"stt\" ? setSTTActivationError : setTTSActivationError;\n    setError(null);\n    try {\n      const response = await deactivateVoiceProvider(providerId, mode);\n      if (!response.ok) {\n        const errorBody = await response.json().catch(() => ({}));\n        throw new Error(\n          typeof errorBody?.detail === \"string\"\n            ? errorBody.detail\n            : `Failed to deactivate ${mode.toUpperCase()} provider.`\n        );\n      }\n      await mutate();\n    } catch (err) {\n      const message =\n        err instanceof Error ? err.message : \"Unexpected error occurred.\";\n      setError(message);\n    }\n  };\n\n  const handleModalClose = () => {\n    setModalOpen(false);\n    setSelectedProvider(null);\n    setEditingProvider(null);\n    setSelectedModelId(null);\n  };\n\n  const handleModalSuccess = () => {\n    mutate();\n    handleModalClose();\n  };\n\n  const handleDisconnect = async () => {\n    if (!disconnectTarget) return;\n    try {\n      const targetProvider = providers.find(\n        (p) => p.id === disconnectTarget.providerId\n      );\n\n      // If a replacement was selected (not \"No Default\"), activate it for each\n      // mode the disconnected provider was default for\n      if (replacementProviderId && replacementProviderId !== NO_DEFAULT_VALUE) {\n        const repId = Number(replacementProviderId);\n\n        if (targetProvider?.is_default_stt) {\n          const resp = await activateVoiceProvider(repId, \"stt\");\n          if (!resp.ok) {\n            const errorBody = await resp.json().catch(() => ({}));\n            throw new Error(\n              typeof errorBody?.detail === \"string\"\n                ? errorBody.detail\n                : \"Failed to activate replacement STT provider.\"\n            );\n          }\n        }\n\n        if (targetProvider?.is_default_tts) {\n          const resp = await activateVoiceProvider(repId, \"tts\");\n          if (!resp.ok) {\n            const errorBody = await resp.json().catch(() => ({}));\n            throw new Error(\n              typeof errorBody?.detail === \"string\"\n                ? errorBody.detail\n                : \"Failed to activate replacement TTS provider.\"\n            );\n          }\n        }\n      }\n\n      const response = await deleteVoiceProvider(disconnectTarget.providerId);\n      if (!response.ok) {\n        const errorBody = await response.json().catch(() => ({}));\n        throw new Error(\n          typeof errorBody?.detail === \"string\"\n            ? errorBody.detail\n            : \"Failed to disconnect provider.\"\n        );\n      }\n      await mutate();\n      toast.success(`${disconnectTarget.providerLabel} disconnected`);\n    } catch (err) {\n      console.error(\"Failed to disconnect voice provider:\", err);\n      toast.error(\n        err instanceof Error ? err.message : \"Unexpected error occurred.\"\n      );\n    } finally {\n      setDisconnectTarget(null);\n      setReplacementProviderId(null);\n    }\n  };\n\n  const isProviderConfigured = (provider?: VoiceProviderView): boolean => {\n    return !!provider?.has_api_key;\n  };\n\n  const providersByType = useMemo(() => {\n    return new Map((providers ?? []).map((p) => [p.provider_type, p] as const));\n  }, [providers]);\n\n  const hasActiveSTTProvider =\n    providers?.some((p) => p.is_default_stt) ?? false;\n  const hasActiveTTSProvider =\n    providers?.some((p) => p.is_default_tts) ?? false;\n\n  const getModelStatus = (\n    model: ModelDetails,\n    mode: ProviderMode\n  ): \"disconnected\" | \"connected\" | \"selected\" => {\n    const provider = providersByType.get(model.providerType);\n    if (!provider || !isProviderConfigured(provider)) return \"disconnected\";\n\n    const isActive =\n      mode === \"stt\"\n        ? provider.is_default_stt\n        : provider.is_default_tts && provider.tts_model === model.id;\n\n    if (isActive) return \"selected\";\n    return \"connected\";\n  };\n\n  const renderModelSelect = (model: ModelDetails, mode: ProviderMode) => {\n    const provider = providersByType.get(model.providerType);\n    const status = getModelStatus(model, mode);\n    const Icon = getProviderIcon(model.providerType);\n\n    return (\n      <ProviderCard\n        key={`${mode}-${model.id}`}\n        aria-label={`voice-${mode}-${model.id}`}\n        icon={Icon}\n        title={model.label}\n        description={model.subtitle}\n        status={status}\n        onConnect={() => handleConnect(model.providerType, mode, model.id)}\n        onSelect={() => {\n          if (provider?.id) handleSetDefault(provider.id, mode, model.id);\n        }}\n        onDeselect={() => {\n          if (provider?.id) handleDeactivate(provider.id, mode);\n        }}\n        onEdit={() => {\n          if (provider) handleEdit(provider, mode, model.id);\n        }}\n        onDisconnect={\n          status !== \"disconnected\" && provider\n            ? () =>\n                setDisconnectTarget({\n                  providerId: provider.id,\n                  providerLabel: getProviderLabel(model.providerType),\n                  providerType: model.providerType,\n                })\n            : undefined\n        }\n      />\n    );\n  };\n\n  if (error) {\n    const message = error?.message || \"Unable to load voice configuration.\";\n    const detail =\n      error instanceof FetchError && typeof error.info?.detail === \"string\"\n        ? error.info.detail\n        : undefined;\n\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description={pageDescription}\n        />\n        <SettingsLayouts.Body>\n          <Callout type=\"danger\" title=\"Failed to load voice settings\">\n            {message}\n            {detail && (\n              <Text as=\"p\" font=\"main-content-body\" color=\"text-03\">\n                {detail}\n              </Text>\n            )}\n          </Callout>\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  if (isLoading) {\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description={pageDescription}\n        />\n        <SettingsLayouts.Body>\n          <ThreeDotsLoader />\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  return (\n    <SettingsLayouts.Root>\n      <SettingsLayouts.Header\n        icon={route.icon}\n        title={route.title}\n        description={pageDescription}\n      />\n      <SettingsLayouts.Body>\n        <div className=\"flex flex-col gap-6\">\n          <Content\n            title=\"Speech to Text\"\n            description=\"Select a model to transcribe speech to text in chats.\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n          />\n\n          {sttActivationError && (\n            <Callout type=\"danger\" title=\"Unable to update STT provider\">\n              {sttActivationError}\n            </Callout>\n          )}\n\n          {!hasActiveSTTProvider && (\n            <Message\n              info\n              static\n              large\n              close={false}\n              text=\"Connect a speech to text provider to use in chat.\"\n              className=\"w-full\"\n            />\n          )}\n\n          <div className=\"flex flex-col gap-2\">\n            {STT_MODELS.map((model) => renderModelSelect(model, \"stt\"))}\n          </div>\n        </div>\n\n        <div className=\"flex flex-col gap-6\">\n          <Content\n            title=\"Text to Speech\"\n            description=\"Select a model to speak out chat responses.\"\n            sizePreset=\"main-content\"\n            variant=\"section\"\n          />\n\n          {ttsActivationError && (\n            <Callout type=\"danger\" title=\"Unable to update TTS provider\">\n              {ttsActivationError}\n            </Callout>\n          )}\n\n          {!hasActiveTTSProvider && (\n            <Message\n              info\n              static\n              large\n              close={false}\n              text=\"Connect a text to speech provider to use in chat.\"\n              className=\"w-full\"\n            />\n          )}\n\n          {TTS_PROVIDER_GROUPS.map((group) => (\n            <div key={group.providerType} className=\"flex flex-col gap-2\">\n              <Text font=\"secondary-body\" color=\"text-03\">\n                {group.providerLabel}\n              </Text>\n              <div className=\"flex flex-col gap-2\">\n                {group.models.map((model) => renderModelSelect(model, \"tts\"))}\n              </div>\n            </div>\n          ))}\n        </div>\n      </SettingsLayouts.Body>\n\n      {disconnectTarget && (\n        <VoiceDisconnectModal\n          disconnectTarget={disconnectTarget}\n          providers={providers}\n          replacementProviderId={replacementProviderId}\n          onReplacementChange={setReplacementProviderId}\n          onClose={() => {\n            setDisconnectTarget(null);\n            setReplacementProviderId(null);\n          }}\n          onDisconnect={() => void handleDisconnect()}\n        />\n      )}\n\n      {modalOpen && selectedProvider && (\n        <VoiceProviderSetupModal\n          providerType={selectedProvider}\n          existingProvider={editingProvider}\n          mode={modalMode}\n          defaultModelId={selectedModelId}\n          onClose={handleModalClose}\n          onSuccess={handleModalSuccess}\n        />\n      )}\n    </SettingsLayouts.Root>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/WebProviderModalReducer.ts",
    "content": "export type WebProviderModalState = {\n  /** Provider type currently being configured in the modal (null when closed). */\n  providerType: string | null;\n\n  /** Existing provider ID when editing (null for new providers). */\n  existingProviderId: number | null;\n\n  /** Raw API key input value (may be the masked placeholder). */\n  apiKeyValue: string;\n  /** Single provider-specific config field value (e.g. cx / base URL). */\n  configValue: string;\n\n  /** Request phase for disabling inputs/buttons. */\n  phase: \"idle\" | \"validating\" | \"saving\";\n\n  /**\n   * UI message shown in the modal helper region.\n   * - kind=error: red error message\n   * - kind=status: neutral/green status message\n   */\n  message: { kind: \"status\" | \"error\"; text: string } | null;\n};\n\nexport type WebProviderModalAction =\n  | {\n      type: \"OPEN\";\n      providerType: string;\n      existingProviderId: number | null;\n      initialApiKeyValue: string;\n      initialConfigValue: string;\n    }\n  | { type: \"CLOSE\" }\n  | { type: \"SET_API_KEY\"; value: string }\n  | { type: \"SET_CONFIG_VALUE\"; value: string }\n  | { type: \"SET_PHASE\"; phase: \"idle\" | \"validating\" | \"saving\" }\n  | { type: \"SET_STATUS_MESSAGE\"; text: string }\n  | { type: \"SET_ERROR_MESSAGE\"; text: string }\n  | { type: \"CLEAR_MESSAGE\" };\n\nexport const initialWebProviderModalState: WebProviderModalState = {\n  providerType: null,\n  existingProviderId: null,\n  apiKeyValue: \"\",\n  configValue: \"\",\n  phase: \"idle\",\n  message: null,\n};\n\nexport const MASKED_API_KEY_PLACEHOLDER = \"••••••••••••••••\";\n\nexport function WebProviderModalReducer(\n  state: WebProviderModalState,\n  action: WebProviderModalAction\n): WebProviderModalState {\n  switch (action.type) {\n    case \"OPEN\":\n      return {\n        ...state,\n        providerType: action.providerType,\n        existingProviderId: action.existingProviderId,\n        apiKeyValue: action.initialApiKeyValue,\n        configValue: action.initialConfigValue,\n        phase: \"idle\",\n        message: null,\n      };\n    case \"CLOSE\":\n      return {\n        ...state,\n        providerType: null,\n        existingProviderId: null,\n        apiKeyValue: \"\",\n        configValue: \"\",\n        phase: \"idle\",\n        message: null,\n      };\n    case \"SET_API_KEY\": {\n      return {\n        ...state,\n        apiKeyValue: action.value,\n      };\n    }\n    case \"SET_CONFIG_VALUE\":\n      return {\n        ...state,\n        configValue: action.value,\n      };\n    case \"SET_PHASE\":\n      return {\n        ...state,\n        phase: action.phase,\n      };\n    case \"SET_STATUS_MESSAGE\":\n      return {\n        ...state,\n        message: { kind: \"status\", text: action.text },\n      };\n    case \"SET_ERROR_MESSAGE\":\n      return {\n        ...state,\n        phase: \"idle\",\n        message: { kind: \"error\", text: action.text },\n      };\n    case \"CLEAR_MESSAGE\":\n      return {\n        ...state,\n        message: null,\n      };\n    default:\n      return state;\n  }\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/WebProviderSetupModal.tsx",
    "content": "\"use client\";\n\nimport { memo, useMemo, type ReactNode, type FunctionComponent } from \"react\";\n\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\n\nimport { SvgArrowExchange, SvgOnyxLogo } from \"@opal/icons\";\nimport type { IconProps } from \"@opal/types\";\n\nexport type WebProviderSetupModalProps = {\n  isOpen: boolean;\n  onClose: () => void;\n  providerLabel: string;\n  providerLogo: ReactNode;\n  description: string;\n  apiKeyValue: string;\n  onApiKeyChange: (value: string) => void;\n  /**\n   * When true, the API key is a stored/masked value from the backend\n   * that cannot actually be revealed. The reveal toggle will be disabled.\n   */\n  isStoredApiKey?: boolean;\n  optionalField?: {\n    label: string;\n    value: string;\n    onChange: (value: string) => void;\n    placeholder: string;\n    description?: ReactNode;\n    showFirst?: boolean;\n  };\n  helperMessage: ReactNode;\n  helperClass: string;\n  isProcessing: boolean;\n  canConnect: boolean;\n  onConnect: () => void;\n  apiKeyAutoFocus?: boolean;\n  hideApiKey?: boolean;\n};\n\nexport const WebProviderSetupModal = memo(\n  ({\n    isOpen,\n    onClose,\n    providerLabel,\n    providerLogo,\n    description,\n    apiKeyValue,\n    onApiKeyChange,\n    isStoredApiKey = false,\n    optionalField,\n    helperMessage,\n    helperClass,\n    isProcessing,\n    canConnect,\n    onConnect,\n    apiKeyAutoFocus = true,\n    hideApiKey = false,\n  }: WebProviderSetupModalProps) => {\n    const LogoArrangement = useMemo(() => {\n      const Component: FunctionComponent<IconProps> = () => (\n        <div className=\"flex items-center gap-1\">\n          {providerLogo}\n          <div className=\"flex items-center justify-center size-4 p-0.5 shrink-0\">\n            <SvgArrowExchange className=\"size-3 text-text-04\" />\n          </div>\n          <div className=\"flex items-center justify-center size-7 p-0.5 shrink-0 overflow-clip\">\n            <SvgOnyxLogo size={24} className=\"shrink-0\" />\n          </div>\n        </div>\n      );\n      return Component;\n    }, [providerLogo]);\n\n    return (\n      <Modal open={isOpen} onOpenChange={(open) => !open && onClose()}>\n        <Modal.Content width=\"sm\" preventAccidentalClose>\n          <Modal.Header\n            icon={LogoArrangement}\n            title={`Set up ${providerLabel}`}\n            description={description}\n            onClose={onClose}\n          />\n          <Modal.Body>\n            {optionalField?.showFirst && (\n              <FormField\n                name={optionalField.label.toLowerCase().replace(/\\s+/g, \"_\")}\n                state=\"idle\"\n                className=\"w-full\"\n              >\n                <FormField.Label>{optionalField.label}</FormField.Label>\n                <FormField.Control asChild>\n                  <InputTypeIn\n                    placeholder={optionalField.placeholder}\n                    value={optionalField.value}\n                    onChange={(event) =>\n                      optionalField.onChange(event.target.value)\n                    }\n                  />\n                </FormField.Control>\n                {optionalField.description && (\n                  <FormField.Description>\n                    {optionalField.description}\n                  </FormField.Description>\n                )}\n              </FormField>\n            )}\n\n            {!hideApiKey && (\n              <FormField\n                name=\"api_key\"\n                state={\n                  helperClass.includes(\"status-error\") ||\n                  helperClass.includes(\"error\")\n                    ? \"error\"\n                    : helperClass.includes(\"green\")\n                      ? \"success\"\n                      : \"idle\"\n                }\n                className=\"w-full\"\n              >\n                <FormField.Label>API Key</FormField.Label>\n                <FormField.Control asChild>\n                  <PasswordInputTypeIn\n                    data-testid=\"web-provider-api-key-input\"\n                    placeholder=\"Enter API key\"\n                    value={apiKeyValue}\n                    autoFocus={apiKeyAutoFocus}\n                    isNonRevealable={isStoredApiKey}\n                    onFocus={(e) => {\n                      if (isStoredApiKey) {\n                        e.target.select();\n                      }\n                    }}\n                    onChange={(event) => onApiKeyChange(event.target.value)}\n                    showClearButton={false}\n                  />\n                </FormField.Control>\n                {isProcessing ? (\n                  <FormField.APIMessage\n                    state=\"loading\"\n                    messages={{\n                      loading:\n                        typeof helperMessage === \"string\"\n                          ? helperMessage\n                          : \"Validating API key...\",\n                    }}\n                  />\n                ) : typeof helperMessage === \"string\" ? (\n                  <FormField.Message\n                    messages={{\n                      idle:\n                        helperClass.includes(\"status-error\") ||\n                        helperClass.includes(\"error\")\n                          ? \"\"\n                          : helperClass.includes(\"green\")\n                            ? \"\"\n                            : helperMessage,\n                      error:\n                        helperClass.includes(\"status-error\") ||\n                        helperClass.includes(\"error\")\n                          ? helperMessage\n                          : \"\",\n                      success: helperClass.includes(\"green\")\n                        ? helperMessage\n                        : \"\",\n                    }}\n                  />\n                ) : (\n                  <FormField.Description className={helperClass}>\n                    {helperMessage}\n                  </FormField.Description>\n                )}\n              </FormField>\n            )}\n\n            {optionalField && !optionalField.showFirst && (\n              <FormField\n                name={optionalField.label.toLowerCase().replace(/\\s+/g, \"_\")}\n                state={\n                  hideApiKey &&\n                  (helperClass.includes(\"status-error\") ||\n                    helperClass.includes(\"error\"))\n                    ? \"error\"\n                    : \"idle\"\n                }\n                className=\"w-full\"\n              >\n                <FormField.Label>{optionalField.label}</FormField.Label>\n                <FormField.Control asChild>\n                  <InputTypeIn\n                    placeholder={optionalField.placeholder}\n                    value={optionalField.value}\n                    onChange={(event) =>\n                      optionalField.onChange(event.target.value)\n                    }\n                  />\n                </FormField.Control>\n                {optionalField.description && (\n                  <FormField.Description>\n                    {optionalField.description}\n                  </FormField.Description>\n                )}\n\n                {hideApiKey && (\n                  <>\n                    {isProcessing ? (\n                      <FormField.APIMessage\n                        state=\"loading\"\n                        messages={{\n                          loading:\n                            typeof helperMessage === \"string\"\n                              ? helperMessage\n                              : \"Testing connection...\",\n                        }}\n                      />\n                    ) : typeof helperMessage === \"string\" ? (\n                      <FormField.Message\n                        messages={{\n                          idle:\n                            helperClass.includes(\"status-error\") ||\n                            helperClass.includes(\"error\")\n                              ? \"\"\n                              : helperClass.includes(\"green\")\n                                ? \"\"\n                                : \"\",\n                          error:\n                            helperClass.includes(\"status-error\") ||\n                            helperClass.includes(\"error\")\n                              ? helperMessage\n                              : \"\",\n                          success: helperClass.includes(\"green\")\n                            ? helperMessage\n                            : \"\",\n                        }}\n                      />\n                    ) : null}\n                  </>\n                )}\n              </FormField>\n            )}\n          </Modal.Body>\n          <Modal.Footer>\n            <Button prominence=\"secondary\" type=\"button\" onClick={onClose}>\n              Cancel\n            </Button>\n            <Button\n              disabled={!canConnect || isProcessing}\n              type=\"button\"\n              onClick={onConnect}\n            >\n              {isProcessing ? \"Connecting...\" : \"Connect\"}\n            </Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    );\n  }\n);\n\nWebProviderSetupModal.displayName = \"WebProviderSetupModal\";\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/connectProviderFlow.ts",
    "content": "export type ProviderTestPayload = {\n  provider_type: string;\n  api_key: string | null;\n  use_stored_key: boolean;\n  config: Record<string, string>;\n};\n\nexport type ProviderUpsertPayload = {\n  id: number | null;\n  name: string;\n  provider_type: string;\n  api_key: string | null;\n  api_key_changed: boolean;\n  config: Record<string, string>;\n  activate: boolean;\n};\n\nconst WEB_SEARCH_PROVIDER_ENDPOINTS = {\n  search: {\n    upsertUrl: \"/api/admin/web-search/search-providers\",\n    testUrl: \"/api/admin/web-search/search-providers/test\",\n  },\n  content: {\n    upsertUrl: \"/api/admin/web-search/content-providers\",\n    testUrl: \"/api/admin/web-search/content-providers/test\",\n  },\n} as const;\n\n/**\n * Which web-search provider category we are configuring.\n * - `search`: search engine provider\n * - `content`: crawler/content provider used to fetch full pages\n */\nexport type WebProviderCategory = keyof typeof WEB_SEARCH_PROVIDER_ENDPOINTS;\n\nexport type ConnectProviderFlowArgs = {\n  category: WebProviderCategory;\n  providerType: string;\n\n  existingProviderId: number | null;\n  existingProviderName: string | null;\n  existingProviderHasApiKey: boolean;\n\n  displayName: string;\n\n  providerRequiresApiKey: boolean;\n  apiKeyChangedForProvider: boolean;\n  apiKey: string;\n\n  config: Record<string, string>;\n  configChanged: boolean;\n\n  onValidating: (message: string) => void;\n  onSaving: (message: string) => void;\n  onError: (message: string) => void;\n  onClose: () => void;\n\n  mutate: () => Promise<unknown>;\n};\n\nexport async function connectProviderFlow({\n  category,\n  providerType,\n  existingProviderId,\n  existingProviderName,\n  existingProviderHasApiKey,\n  displayName,\n  providerRequiresApiKey,\n  apiKeyChangedForProvider,\n  apiKey,\n  config,\n  configChanged,\n  onValidating,\n  onSaving,\n  onError,\n  onClose,\n  mutate,\n}: ConnectProviderFlowArgs): Promise<void> {\n  const { testUrl, upsertUrl } = WEB_SEARCH_PROVIDER_ENDPOINTS[category];\n  const isNewProvider = existingProviderId == null;\n  const needsValidation =\n    isNewProvider || apiKeyChangedForProvider || configChanged;\n  const msg = {\n    validating: \"Validating configuration...\",\n    activating: \"Activating provider...\",\n    validatedThenActivating: \"Configuration validated. Activating provider...\",\n    validationFailedFallback: \"Failed to validate configuration.\",\n    activateFailedFallback: \"Failed to activate provider.\",\n  };\n\n  if (providerRequiresApiKey) {\n    if (isNewProvider && !apiKey) {\n      return;\n    }\n    if (apiKeyChangedForProvider && !apiKey) {\n      return;\n    }\n  }\n\n  try {\n    if (needsValidation) {\n      onValidating(msg.validating);\n\n      const testPayload: ProviderTestPayload = {\n        provider_type: providerType,\n        api_key: apiKeyChangedForProvider ? apiKey : null,\n        use_stored_key:\n          providerRequiresApiKey &&\n          !apiKeyChangedForProvider &&\n          existingProviderHasApiKey,\n        config,\n      };\n\n      const testResponse = await fetch(testUrl, {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify(testPayload),\n      });\n\n      if (!testResponse.ok) {\n        const errorBody = await testResponse.json().catch(() => ({}));\n        throw new Error(\n          typeof (errorBody as any)?.detail === \"string\"\n            ? (errorBody as any).detail\n            : msg.validationFailedFallback\n        );\n      }\n\n      onSaving(msg.validatedThenActivating);\n    } else {\n      onSaving(msg.activating);\n    }\n\n    const upsertPayload: ProviderUpsertPayload = {\n      id: existingProviderId,\n      name: existingProviderName ?? displayName,\n      provider_type: providerType,\n      api_key: apiKeyChangedForProvider ? apiKey : null,\n      api_key_changed: apiKeyChangedForProvider,\n      config,\n      activate: true,\n    };\n\n    const upsertResponse = await fetch(upsertUrl, {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify(upsertPayload),\n    });\n\n    if (!upsertResponse.ok) {\n      const errorBody = await upsertResponse.json().catch(() => ({}));\n      throw new Error(\n        typeof (errorBody as any)?.detail === \"string\"\n          ? (errorBody as any).detail\n          : msg.activateFailedFallback\n      );\n    }\n\n    await mutate();\n    onClose();\n  } catch (e) {\n    const message =\n      e instanceof Error ? e.message : \"Unexpected error occurred.\";\n    onError(message);\n  }\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/contentProviderUtils.ts",
    "content": "export type WebContentProviderType =\n  | \"firecrawl\"\n  | \"onyx_web_crawler\"\n  | \"exa\"\n  | (string & {});\n\nexport const CONTENT_PROVIDER_DETAILS: Record<\n  string,\n  { label: string; subtitle: string; description: string; logoSrc?: string }\n> = {\n  onyx_web_crawler: {\n    label: \"Onyx Web Crawler\",\n    subtitle:\n      \"Built-in web crawler. Works for most pages but less performant in edge cases.\",\n    description:\n      \"Onyx's built-in crawler processes URLs returned by your search engine.\",\n  },\n  firecrawl: {\n    label: \"Firecrawl\",\n    subtitle: \"Leading open-source crawler.\",\n    description:\n      \"Connect Firecrawl to fetch and summarize page content from search results.\",\n    logoSrc: \"/firecrawl.svg\",\n  },\n  exa: {\n    label: \"Exa\",\n    subtitle: \"Exa.ai\",\n    description:\n      \"Use Exa to fetch and summarize page content from search results.\",\n    logoSrc: \"/Exa.svg\",\n  },\n};\n\n/**\n * Display order for built-in providers.\n * Derived from insertion order of `CONTENT_PROVIDER_DETAILS` for a single source of truth.\n */\nexport const CONTENT_PROVIDER_ORDER = Object.keys(\n  CONTENT_PROVIDER_DETAILS\n) as WebContentProviderType[];\n\nexport type ContentProviderConfig = Record<string, string> | null | undefined;\n\nexport type ContentProviderLike =\n  | {\n      has_api_key: boolean;\n      config: ContentProviderConfig;\n    }\n  | null\n  | undefined;\n\ntype ContentProviderCapabilities = {\n  requiresApiKey: boolean;\n  requiredConfigKeys: string[];\n  storedConfigAliases?: Record<string, string[]>;\n};\n\nconst CONTENT_PROVIDER_CAPABILITIES: Record<\n  string,\n  ContentProviderCapabilities\n> = {\n  onyx_web_crawler: {\n    requiresApiKey: false,\n    requiredConfigKeys: [],\n  },\n  firecrawl: {\n    requiresApiKey: true,\n    requiredConfigKeys: [\"base_url\"],\n    storedConfigAliases: {\n      base_url: [\"base_url\", \"api_base_url\"],\n    },\n  },\n  // exa uses default capabilities\n};\n\nconst DEFAULT_CONTENT_PROVIDER_CAPABILITIES: ContentProviderCapabilities = {\n  requiresApiKey: true,\n  requiredConfigKeys: [],\n};\n\nfunction getCapabilities(\n  providerType: WebContentProviderType\n): ContentProviderCapabilities {\n  return (\n    CONTENT_PROVIDER_CAPABILITIES[providerType as string] ??\n    DEFAULT_CONTENT_PROVIDER_CAPABILITIES\n  );\n}\n\nfunction getStoredContentConfigValue(\n  providerType: WebContentProviderType,\n  canonicalKey: string,\n  config: ContentProviderConfig\n): string {\n  const caps = getCapabilities(providerType);\n  const aliases = caps.storedConfigAliases?.[canonicalKey] ?? [canonicalKey];\n\n  const safeConfig = config ?? {};\n  for (const key of aliases) {\n    const value = safeConfig[key];\n    if (typeof value === \"string\" && value.length > 0) {\n      return value;\n    }\n  }\n  return \"\";\n}\n\nexport function isContentProviderConfigured(\n  providerType: WebContentProviderType,\n  provider: ContentProviderLike\n): boolean {\n  const caps = getCapabilities(providerType);\n\n  if (caps.requiresApiKey && !(provider?.has_api_key ?? false)) {\n    return false;\n  }\n\n  for (const requiredKey of caps.requiredConfigKeys) {\n    const value = getStoredContentConfigValue(\n      providerType,\n      requiredKey,\n      provider?.config\n    );\n    if (!value) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\nexport function getCurrentContentProviderType(\n  providers: Array<{\n    is_active: boolean;\n    provider_type: WebContentProviderType;\n  }>\n): WebContentProviderType {\n  return (\n    providers.find((p) => p.is_active && p.provider_type !== \"onyx_web_crawler\")\n      ?.provider_type ??\n    providers.find((p) => p.is_active)?.provider_type ??\n    \"onyx_web_crawler\"\n  );\n}\n\nexport function buildContentProviderConfig(\n  providerType: WebContentProviderType,\n  baseUrl: string\n): Record<string, string> {\n  const caps = getCapabilities(providerType);\n  const trimmed = baseUrl.trim();\n  const config: Record<string, string> = {};\n\n  if (caps.requiredConfigKeys.length === 0 || !trimmed) {\n    return config;\n  }\n\n  const requiredKey = caps.requiredConfigKeys[0];\n  if (!requiredKey) {\n    return config;\n  }\n\n  config[requiredKey] = trimmed;\n  return config;\n}\n\nexport function canConnectContentProvider(\n  providerType: WebContentProviderType,\n  apiKey: string,\n  baseUrl: string\n): boolean {\n  const caps = getCapabilities(providerType);\n\n  if (caps.requiresApiKey && apiKey.trim().length === 0) {\n    return false;\n  }\n\n  if (caps.requiredConfigKeys.length > 0 && baseUrl.trim().length === 0) {\n    return false;\n  }\n\n  return true;\n}\n\nexport function getSingleContentConfigFieldValueForForm(\n  providerType: WebContentProviderType,\n  provider: ContentProviderLike,\n  defaultValue = \"\"\n): string {\n  const caps = getCapabilities(providerType);\n  if (caps.requiredConfigKeys.length === 0) {\n    return defaultValue;\n  }\n\n  const requiredKey = caps.requiredConfigKeys[0];\n  if (!requiredKey) {\n    return defaultValue;\n  }\n\n  return (\n    getStoredContentConfigValue(providerType, requiredKey, provider?.config) ||\n    defaultValue\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/index.tsx",
    "content": "\"use client\";\n\nimport Image from \"next/image\";\nimport { useEffect, useMemo, useState, useReducer } from \"react\";\nimport { InfoIcon } from \"@/components/icons/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport * as SettingsLayouts from \"@/layouts/settings-layouts\";\nimport { Content, CardHeaderLayout } from \"@opal/layouts\";\nimport useSWR from \"swr\";\nimport { errorHandlingFetcher, FetchError } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { ThreeDotsLoader } from \"@/components/Loading\";\nimport { Callout } from \"@/components/ui/callout\";\nimport { cn } from \"@/lib/utils\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  SvgArrowExchange,\n  SvgArrowRightCircle,\n  SvgCheckSquare,\n  SvgGlobe,\n  SvgOnyxLogo,\n  SvgSettings,\n  SvgSlash,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport { Button, SelectCard } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { ADMIN_ROUTES } from \"@/lib/admin-routes\";\nimport { WebProviderSetupModal } from \"@/refresh-pages/admin/WebSearchPage/WebProviderSetupModal\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport {\n  SEARCH_PROVIDER_DETAILS,\n  SEARCH_PROVIDER_ORDER,\n  getSearchProviderDisplayLabel,\n  buildSearchProviderConfig,\n  canConnectSearchProvider,\n  getSingleConfigFieldValueForForm,\n  isBuiltInSearchProviderType,\n  isSearchProviderConfigured,\n  searchProviderRequiresApiKey,\n  type WebSearchProviderType,\n} from \"@/refresh-pages/admin/WebSearchPage/searchProviderUtils\";\nimport {\n  CONTENT_PROVIDER_DETAILS,\n  CONTENT_PROVIDER_ORDER,\n  buildContentProviderConfig,\n  canConnectContentProvider,\n  getSingleContentConfigFieldValueForForm,\n  getCurrentContentProviderType,\n  isContentProviderConfigured,\n  type WebContentProviderType,\n} from \"@/refresh-pages/admin/WebSearchPage/contentProviderUtils\";\nimport {\n  initialWebProviderModalState,\n  WebProviderModalReducer,\n  MASKED_API_KEY_PLACEHOLDER,\n} from \"@/refresh-pages/admin/WebSearchPage/WebProviderModalReducer\";\nimport { connectProviderFlow } from \"@/refresh-pages/admin/WebSearchPage/connectProviderFlow\";\nimport {\n  activateSearchProvider,\n  deactivateSearchProvider,\n  activateContentProvider,\n  deactivateContentProvider,\n  disconnectProvider,\n} from \"@/refresh-pages/admin/WebSearchPage/svc\";\nimport type {\n  WebSearchProviderView,\n  WebContentProviderView,\n  DisconnectTargetState,\n} from \"@/refresh-pages/admin/WebSearchPage/interfaces\";\n\nconst NO_DEFAULT_VALUE = \"__none__\";\n\nconst route = ADMIN_ROUTES.WEB_SEARCH;\n\n// ---------------------------------------------------------------------------\n// WebSearchDisconnectModal\n// ---------------------------------------------------------------------------\n\nfunction WebSearchDisconnectModal({\n  disconnectTarget,\n  searchProviders,\n  contentProviders,\n  replacementProviderId,\n  onReplacementChange,\n  onClose,\n  onDisconnect,\n}: {\n  disconnectTarget: DisconnectTargetState;\n  searchProviders: WebSearchProviderView[];\n  contentProviders: WebContentProviderView[];\n  replacementProviderId: string | null;\n  onReplacementChange: (id: string | null) => void;\n  onClose: () => void;\n  onDisconnect: () => void;\n}) {\n  const isSearch = disconnectTarget.category === \"search\";\n\n  // Determine if the target is currently the active/selected provider\n  const isActive = isSearch\n    ? searchProviders.find((p) => p.id === disconnectTarget.id)?.is_active ??\n      false\n    : contentProviders.find((p) => p.id === disconnectTarget.id)?.is_active ??\n      false;\n\n  // Find other configured providers as replacements\n  const replacementOptions = isSearch\n    ? searchProviders.filter(\n        (p) => p.id !== disconnectTarget.id && p.id > 0 && p.has_api_key\n      )\n    : contentProviders.filter(\n        (p) =>\n          p.id !== disconnectTarget.id &&\n          p.provider_type !== \"onyx_web_crawler\" &&\n          p.id > 0 &&\n          p.has_api_key\n      );\n\n  const needsReplacement = isActive;\n  const hasReplacements = replacementOptions.length > 0;\n\n  const getLabel = (p: { name: string; provider_type: string }) => {\n    if (isSearch) {\n      const details =\n        SEARCH_PROVIDER_DETAILS[p.provider_type as WebSearchProviderType];\n      return details?.label ?? p.name ?? p.provider_type;\n    }\n    const details = CONTENT_PROVIDER_DETAILS[p.provider_type];\n    return details?.label ?? p.name ?? p.provider_type;\n  };\n\n  const categoryLabel = isSearch ? \"search engine\" : \"web crawler\";\n  const featureLabel = isSearch ? \"web search\" : \"web crawling\";\n  const disableLabel = isSearch ? \"Disable Web Search\" : \"Disable Web Crawling\";\n\n  // Auto-select first replacement when modal opens\n  useEffect(() => {\n    if (needsReplacement && hasReplacements && !replacementProviderId) {\n      const first = replacementOptions[0];\n      if (first) onReplacementChange(String(first.id));\n    }\n  }, []); // eslint-disable-line react-hooks/exhaustive-deps\n\n  return (\n    <ConfirmationModalLayout\n      icon={SvgUnplug}\n      title={`Disconnect ${disconnectTarget.label}`}\n      description=\"This will remove the stored credentials for this provider.\"\n      onClose={onClose}\n      submit={\n        <Button\n          variant=\"danger\"\n          onClick={onDisconnect}\n          disabled={\n            needsReplacement && hasReplacements && !replacementProviderId\n          }\n        >\n          Disconnect\n        </Button>\n      }\n    >\n      {needsReplacement ? (\n        hasReplacements ? (\n          <Section alignItems=\"start\">\n            <Text as=\"p\" text03>\n              <b>{disconnectTarget.label}</b> is currently the active{\" \"}\n              {categoryLabel}. Search history will be preserved.\n            </Text>\n            <Section alignItems=\"start\" gap={0.25}>\n              <Text as=\"p\" secondaryBody text03>\n                Set New Default\n              </Text>\n              <InputSelect\n                value={replacementProviderId ?? undefined}\n                onValueChange={(v) => onReplacementChange(v)}\n              >\n                <InputSelect.Trigger placeholder=\"Select a replacement provider\" />\n                <InputSelect.Content>\n                  {replacementOptions.map((p) => (\n                    <InputSelect.Item key={p.id} value={String(p.id)}>\n                      {getLabel(p)}\n                    </InputSelect.Item>\n                  ))}\n                  <InputSelect.Separator />\n                  <InputSelect.Item value={NO_DEFAULT_VALUE} icon={SvgSlash}>\n                    <span>\n                      <b>No Default</b>\n                      <span className=\"text-text-03\"> ({disableLabel})</span>\n                    </span>\n                  </InputSelect.Item>\n                </InputSelect.Content>\n              </InputSelect>\n            </Section>\n          </Section>\n        ) : (\n          <>\n            <Text as=\"p\" text03>\n              <b>{disconnectTarget.label}</b> is currently the active{\" \"}\n              {categoryLabel}.\n            </Text>\n            <Text as=\"p\" text03>\n              Connect another provider to continue using {featureLabel}.\n            </Text>\n          </>\n        )\n      ) : (\n        <>\n          <Text as=\"p\" text03>\n            {isSearch ? \"Web search\" : \"Web crawling\"} will no longer be routed\n            through <b>{disconnectTarget.label}</b>.\n          </Text>\n          <Text as=\"p\" text03>\n            Search history will be preserved.\n          </Text>\n        </>\n      )}\n    </ConfirmationModalLayout>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// ProviderCard — uses SelectCard for stateful interactive provider cards\n// ---------------------------------------------------------------------------\n\ntype ProviderStatus = \"disconnected\" | \"connected\" | \"selected\";\n\ninterface ProviderCardProps {\n  icon: React.FunctionComponent<{ size?: number; className?: string }>;\n  title: string;\n  description: string;\n  status: ProviderStatus;\n  onConnect?: () => void;\n  onSelect?: () => void;\n  onDeselect?: () => void;\n  onEdit?: () => void;\n  onDisconnect?: () => void;\n  selectedLabel?: string;\n}\n\nconst STATUS_TO_STATE = {\n  disconnected: \"empty\",\n  connected: \"filled\",\n  selected: \"selected\",\n} as const;\n\nfunction ProviderCard({\n  icon,\n  title,\n  description,\n  status,\n  onConnect,\n  onSelect,\n  onDeselect,\n  onEdit,\n  onDisconnect,\n  selectedLabel = \"Current Default\",\n}: ProviderCardProps) {\n  const isDisconnected = status === \"disconnected\";\n  const isConnected = status === \"connected\";\n  const isSelected = status === \"selected\";\n\n  return (\n    <Hoverable.Root group=\"web-search/ProviderCard\">\n      <SelectCard\n        state={STATUS_TO_STATE[status]}\n        padding=\"sm\"\n        rounding=\"lg\"\n        onClick={\n          isDisconnected && onConnect\n            ? onConnect\n            : isSelected && onDeselect\n              ? onDeselect\n              : undefined\n        }\n      >\n        <CardHeaderLayout\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n          icon={icon}\n          title={title}\n          description={description}\n          rightChildren={\n            isDisconnected && onConnect ? (\n              <Button\n                prominence=\"tertiary\"\n                rightIcon={SvgArrowExchange}\n                onClick={(e) => {\n                  e.stopPropagation();\n                  onConnect();\n                }}\n              >\n                Connect\n              </Button>\n            ) : isConnected && onSelect ? (\n              <Button\n                prominence=\"tertiary\"\n                rightIcon={SvgArrowRightCircle}\n                onClick={(e) => {\n                  e.stopPropagation();\n                  onSelect();\n                }}\n              >\n                Set as Default\n              </Button>\n            ) : isSelected ? (\n              <div className=\"p-2\">\n                <Content\n                  title={selectedLabel}\n                  sizePreset=\"main-ui\"\n                  variant=\"section\"\n                  icon={SvgCheckSquare}\n                />\n              </div>\n            ) : undefined\n          }\n          bottomRightChildren={\n            !isDisconnected ? (\n              <div className=\"flex flex-row px-1 pb-1\">\n                {onDisconnect && (\n                  <Hoverable.Item group=\"web-search/ProviderCard\">\n                    <Button\n                      icon={SvgUnplug}\n                      tooltip=\"Disconnect\"\n                      aria-label={`Disconnect ${title}`}\n                      prominence=\"tertiary\"\n                      onClick={(e) => {\n                        e.stopPropagation();\n                        onDisconnect();\n                      }}\n                      size=\"md\"\n                    />\n                  </Hoverable.Item>\n                )}\n                {onEdit && (\n                  <Button\n                    icon={SvgSettings}\n                    tooltip=\"Edit\"\n                    aria-label={`Edit ${title}`}\n                    prominence=\"tertiary\"\n                    onClick={(e) => {\n                      e.stopPropagation();\n                      onEdit();\n                    }}\n                    size=\"md\"\n                  />\n                )}\n              </div>\n            ) : undefined\n          }\n        />\n      </SelectCard>\n    </Hoverable.Root>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Page\n// ---------------------------------------------------------------------------\n\nexport default function WebSearchPage() {\n  const [searchModal, dispatchSearchModal] = useReducer(\n    WebProviderModalReducer,\n    initialWebProviderModalState\n  );\n  const [disconnectTarget, setDisconnectTarget] =\n    useState<DisconnectTargetState | null>(null);\n  const [replacementProviderId, setReplacementProviderId] = useState<\n    string | null\n  >(null);\n  const [contentModal, dispatchContentModal] = useReducer(\n    WebProviderModalReducer,\n    initialWebProviderModalState\n  );\n  const [activationError, setActivationError] = useState<string | null>(null);\n  const [contentActivationError, setContentActivationError] = useState<\n    string | null\n  >(null);\n  const {\n    data: searchProvidersData,\n    error: searchProvidersError,\n    isLoading: isLoadingSearchProviders,\n    mutate: mutateSearchProviders,\n  } = useSWR<WebSearchProviderView[]>(\n    SWR_KEYS.webSearchSearchProviders,\n    errorHandlingFetcher\n  );\n\n  const {\n    data: contentProvidersData,\n    error: contentProvidersError,\n    isLoading: isLoadingContentProviders,\n    mutate: mutateContentProviders,\n  } = useSWR<WebContentProviderView[]>(\n    SWR_KEYS.webSearchContentProviders,\n    errorHandlingFetcher\n  );\n\n  const searchProviders = searchProvidersData ?? [];\n  const contentProviders = contentProvidersData ?? [];\n\n  const isLoading = isLoadingSearchProviders || isLoadingContentProviders;\n\n  // Exa shares API key between search and content providers\n  const exaSearchProvider = searchProviders.find(\n    (p) => p.provider_type === \"exa\"\n  );\n  const exaContentProvider = contentProviders.find(\n    (p) => p.provider_type === \"exa\"\n  );\n  const hasSharedExaKey =\n    (exaSearchProvider?.has_api_key || exaContentProvider?.has_api_key) ??\n    false;\n\n  // Modal form state is owned by reducers\n\n  const openSearchModal = (\n    providerType: WebSearchProviderType,\n    provider?: WebSearchProviderView\n  ) => {\n    const requiresApiKey = searchProviderRequiresApiKey(providerType);\n    const hasStoredKey = provider?.has_api_key ?? false;\n\n    // For Exa search provider, check if we can use the shared Exa key\n    const isExa = providerType === \"exa\";\n    const canUseSharedExaKey = isExa && hasSharedExaKey && !hasStoredKey;\n\n    dispatchSearchModal({\n      type: \"OPEN\",\n      providerType,\n      existingProviderId: provider?.id ?? null,\n      initialApiKeyValue:\n        requiresApiKey && (hasStoredKey || canUseSharedExaKey)\n          ? MASKED_API_KEY_PLACEHOLDER\n          : \"\",\n      initialConfigValue: getSingleConfigFieldValueForForm(\n        providerType,\n        provider\n      ),\n    });\n  };\n\n  const openContentModal = (\n    providerType: WebContentProviderType,\n    provider?: WebContentProviderView\n  ) => {\n    const hasStoredKey = provider?.has_api_key ?? false;\n    const defaultFirecrawlBaseUrl = \"https://api.firecrawl.dev/v2/scrape\";\n\n    // For Exa content provider, check if we can use the shared Exa key\n    const isExa = providerType === \"exa\";\n    const canUseSharedExaKey = isExa && hasSharedExaKey && !hasStoredKey;\n\n    dispatchContentModal({\n      type: \"OPEN\",\n      providerType,\n      existingProviderId: provider?.id ?? null,\n      initialApiKeyValue:\n        hasStoredKey || canUseSharedExaKey ? MASKED_API_KEY_PLACEHOLDER : \"\",\n      initialConfigValue:\n        providerType === \"firecrawl\"\n          ? getSingleContentConfigFieldValueForForm(\n              providerType,\n              provider,\n              defaultFirecrawlBaseUrl\n            )\n          : \"\",\n    });\n  };\n\n  const hasActiveSearchProvider = searchProviders.some(\n    (provider) => provider.is_active\n  );\n\n  const hasConfiguredSearchProvider = searchProviders.some((provider) =>\n    isSearchProviderConfigured(provider.provider_type, provider)\n  );\n\n  const combinedSearchProviders = useMemo(() => {\n    const byType = new Map(\n      searchProviders.map((p) => [p.provider_type, p] as const)\n    );\n\n    const ordered = SEARCH_PROVIDER_ORDER.map((providerType) => {\n      const provider = byType.get(providerType);\n      const details = SEARCH_PROVIDER_DETAILS[providerType];\n      return {\n        key: provider?.id ?? providerType,\n        providerType,\n        label: getSearchProviderDisplayLabel(providerType, provider?.name),\n        subtitle: details.subtitle,\n        logoSrc: details.logoSrc,\n        provider,\n      };\n    });\n\n    const additional = searchProviders\n      .filter((p) => !SEARCH_PROVIDER_ORDER.includes(p.provider_type))\n      .map((provider) => ({\n        key: provider.id,\n        providerType: provider.provider_type,\n        label: getSearchProviderDisplayLabel(\n          provider.provider_type,\n          provider.name\n        ),\n        subtitle: \"Custom integration\",\n        logoSrc: undefined,\n        provider,\n      }));\n\n    return [...ordered, ...additional];\n  }, [searchProviders]);\n\n  const selectedProviderType =\n    searchModal.providerType as WebSearchProviderType | null;\n  const selectedContentProviderType =\n    contentModal.providerType as WebContentProviderType | null;\n\n  const providerLabel = selectedProviderType\n    ? getSearchProviderDisplayLabel(selectedProviderType)\n    : \"\";\n  const searchProviderValues = useMemo(\n    () => ({\n      apiKey: searchModal.apiKeyValue.trim(),\n      config: searchModal.configValue.trim(),\n    }),\n    [searchModal.apiKeyValue, searchModal.configValue]\n  );\n  const canConnect =\n    !!selectedProviderType &&\n    canConnectSearchProvider(\n      selectedProviderType,\n      searchProviderValues.apiKey,\n      searchProviderValues.config\n    );\n  const contentProviderLabel = selectedContentProviderType\n    ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ||\n      selectedContentProviderType\n    : \"\";\n  const contentProviderValues = useMemo(\n    () => ({\n      apiKey: contentModal.apiKeyValue.trim(),\n      config: contentModal.configValue.trim(),\n    }),\n    [contentModal.apiKeyValue, contentModal.configValue]\n  );\n  const canConnectContent =\n    !!selectedContentProviderType &&\n    canConnectContentProvider(\n      selectedContentProviderType,\n      contentProviderValues.apiKey,\n      contentProviderValues.config\n    );\n\n  const renderLogo = ({\n    logoSrc,\n    alt,\n    fallback,\n    size = 16,\n    containerSize,\n  }: {\n    logoSrc?: string;\n    alt: string;\n    fallback?: React.ReactNode;\n    size?: number;\n    containerSize?: number;\n  }) => {\n    const containerSizeClass =\n      size === 24 || containerSize === 28 ? \"size-7\" : \"size-5\";\n\n    return (\n      <div\n        className={cn(\n          \"flex items-center justify-center px-0.5 py-0 shrink-0 overflow-clip\",\n          containerSizeClass\n        )}\n      >\n        {logoSrc ? (\n          <Image src={logoSrc} alt={alt} width={size} height={size} />\n        ) : fallback ? (\n          fallback\n        ) : (\n          <SvgGlobe size={size} className=\"text-text-02\" />\n        )}\n      </div>\n    );\n  };\n\n  const combinedContentProviders = useMemo(() => {\n    const byType = new Map(\n      contentProviders.map((p) => [p.provider_type, p] as const)\n    );\n\n    // Always include our built-in providers in a stable order. If missing, inject\n    // a virtual placeholder so the UI can still render/activate it.\n    const ordered = CONTENT_PROVIDER_ORDER.map((providerType) => {\n      const existing = byType.get(providerType);\n      if (existing) return existing;\n\n      if (providerType === \"onyx_web_crawler\") {\n        return {\n          id: -1,\n          name: \"Onyx Web Crawler\",\n          provider_type: \"onyx_web_crawler\",\n          is_active: true,\n          config: null,\n          has_api_key: true,\n        } satisfies WebContentProviderView;\n      }\n\n      if (providerType === \"firecrawl\") {\n        return {\n          id: -2,\n          name: \"Firecrawl\",\n          provider_type: \"firecrawl\",\n          is_active: false,\n          config: null,\n          has_api_key: false,\n        } satisfies WebContentProviderView;\n      }\n\n      if (providerType === \"exa\") {\n        return {\n          id: -3,\n          name: \"Exa\",\n          provider_type: \"exa\",\n          is_active: false,\n          config: null,\n          has_api_key: hasSharedExaKey,\n        } satisfies WebContentProviderView;\n      }\n\n      return null;\n    }).filter(Boolean) as WebContentProviderView[];\n\n    const additional = contentProviders.filter(\n      (p) => !CONTENT_PROVIDER_ORDER.includes(p.provider_type)\n    );\n\n    return [...ordered, ...additional];\n  }, [contentProviders, hasSharedExaKey]);\n\n  const currentContentProviderType =\n    getCurrentContentProviderType(contentProviders);\n\n  if (searchProvidersError || contentProvidersError) {\n    const message =\n      searchProvidersError?.message ||\n      contentProvidersError?.message ||\n      \"Unable to load web search configuration.\";\n\n    const detail =\n      (searchProvidersError instanceof FetchError &&\n      typeof searchProvidersError.info?.detail === \"string\"\n        ? searchProvidersError.info.detail\n        : undefined) ||\n      (contentProvidersError instanceof FetchError &&\n      typeof contentProvidersError.info?.detail === \"string\"\n        ? contentProvidersError.info.detail\n        : undefined);\n\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description=\"Search settings for external search across the internet.\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          <Callout type=\"danger\" title=\"Failed to load web search settings\">\n            {message}\n            {detail && (\n              <Text as=\"p\" className=\"mt-2 text-text-03\" mainContentBody text03>\n                {detail}\n              </Text>\n            )}\n          </Callout>\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  if (isLoading) {\n    return (\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description=\"Search settings for external search across the internet.\"\n          separator\n        />\n        <SettingsLayouts.Body>\n          <ThreeDotsLoader />\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n    );\n  }\n\n  const handleSearchConnect = async () => {\n    if (!selectedProviderType) {\n      return;\n    }\n\n    const config = buildSearchProviderConfig(\n      selectedProviderType,\n      searchProviderValues.config\n    );\n\n    const existingProviderId = searchModal.existingProviderId;\n    const existingProvider = existingProviderId\n      ? searchProviders.find((p) => p.id === existingProviderId)\n      : null;\n\n    const providerRequiresApiKey =\n      searchProviderRequiresApiKey(selectedProviderType);\n    const apiKeyChangedForProvider =\n      providerRequiresApiKey &&\n      searchModal.apiKeyValue !== MASKED_API_KEY_PLACEHOLDER &&\n      searchProviderValues.apiKey.length > 0;\n\n    const storedConfigValue = getSingleConfigFieldValueForForm(\n      selectedProviderType,\n      existingProvider\n    );\n    const configChanged =\n      Object.keys(config).length > 0 &&\n      storedConfigValue !== searchProviderValues.config;\n\n    dispatchSearchModal({ type: \"SET_PHASE\", phase: \"saving\" });\n    dispatchSearchModal({ type: \"CLEAR_MESSAGE\" });\n    setActivationError(null);\n\n    await connectProviderFlow({\n      category: \"search\",\n      providerType: selectedProviderType,\n      existingProviderId: existingProvider?.id ?? null,\n      existingProviderName: existingProvider?.name ?? null,\n      existingProviderHasApiKey: existingProvider?.has_api_key ?? false,\n      displayName:\n        SEARCH_PROVIDER_DETAILS[selectedProviderType]?.label ??\n        selectedProviderType,\n      providerRequiresApiKey,\n      apiKeyChangedForProvider,\n      apiKey: searchProviderValues.apiKey,\n      config,\n      configChanged,\n      onValidating: (message) => (\n        dispatchSearchModal({ type: \"SET_PHASE\", phase: \"validating\" }),\n        dispatchSearchModal({ type: \"SET_STATUS_MESSAGE\", text: message })\n      ),\n      onSaving: (message) => (\n        dispatchSearchModal({ type: \"SET_PHASE\", phase: \"saving\" }),\n        dispatchSearchModal({ type: \"SET_STATUS_MESSAGE\", text: message })\n      ),\n      onError: (message) =>\n        dispatchSearchModal({ type: \"SET_ERROR_MESSAGE\", text: message }),\n      onClose: () => {\n        dispatchSearchModal({ type: \"CLOSE\" });\n      },\n      mutate: async () => {\n        await mutateSearchProviders();\n        if (selectedProviderType === \"exa\") {\n          await mutateContentProviders();\n        }\n      },\n    });\n  };\n\n  const handleActivateSearchProvider = async (providerId: number) => {\n    setActivationError(null);\n    try {\n      await activateSearchProvider(providerId);\n      await mutateSearchProviders();\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Unexpected error occurred.\";\n      setActivationError(message);\n    }\n  };\n\n  const handleDeactivateSearchProvider = async (providerId: number) => {\n    setActivationError(null);\n    try {\n      await deactivateSearchProvider(providerId);\n      await mutateSearchProviders();\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Unexpected error occurred.\";\n      setActivationError(message);\n    }\n  };\n\n  const handleActivateContentProvider = async (\n    provider: WebContentProviderView\n  ) => {\n    setContentActivationError(null);\n    try {\n      await activateContentProvider(provider);\n      await mutateContentProviders();\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Unexpected error occurred.\";\n      setContentActivationError(message);\n    }\n  };\n\n  const handleDeactivateContentProvider = async (\n    providerId: number,\n    providerType: string\n  ) => {\n    setContentActivationError(null);\n    try {\n      await deactivateContentProvider(providerId, providerType);\n      await mutateContentProviders();\n    } catch (error) {\n      const message =\n        error instanceof Error ? error.message : \"Unexpected error occurred.\";\n      setContentActivationError(message);\n    }\n  };\n\n  const handleContentConnect = async () => {\n    if (!selectedContentProviderType) {\n      return;\n    }\n\n    const config = buildContentProviderConfig(\n      selectedContentProviderType,\n      contentProviderValues.config\n    );\n\n    const existingProviderId = contentModal.existingProviderId;\n    const existingProvider = existingProviderId\n      ? contentProviders.find((p) => p.id === existingProviderId)\n      : null;\n\n    const storedBaseUrl = getSingleContentConfigFieldValueForForm(\n      selectedContentProviderType,\n      existingProvider,\n      \"https://api.firecrawl.dev/v2/scrape\"\n    );\n    const configChanged =\n      selectedContentProviderType === \"firecrawl\" &&\n      storedBaseUrl !== contentProviderValues.config;\n\n    dispatchContentModal({ type: \"SET_PHASE\", phase: \"saving\" });\n    dispatchContentModal({ type: \"CLEAR_MESSAGE\" });\n\n    const apiKeyChangedForContentProvider =\n      contentModal.apiKeyValue !== MASKED_API_KEY_PLACEHOLDER &&\n      contentProviderValues.apiKey.length > 0;\n\n    await connectProviderFlow({\n      category: \"content\",\n      providerType: selectedContentProviderType,\n      existingProviderId: existingProvider?.id ?? null,\n      existingProviderName: existingProvider?.name ?? null,\n      existingProviderHasApiKey: existingProvider?.has_api_key ?? false,\n      displayName:\n        CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ??\n        selectedContentProviderType,\n      providerRequiresApiKey: true,\n      apiKeyChangedForProvider: apiKeyChangedForContentProvider,\n      apiKey: contentProviderValues.apiKey,\n      config,\n      configChanged,\n      onValidating: (message) => (\n        dispatchContentModal({ type: \"SET_PHASE\", phase: \"validating\" }),\n        dispatchContentModal({ type: \"SET_STATUS_MESSAGE\", text: message })\n      ),\n      onSaving: (message) => (\n        dispatchContentModal({ type: \"SET_PHASE\", phase: \"saving\" }),\n        dispatchContentModal({ type: \"SET_STATUS_MESSAGE\", text: message })\n      ),\n      onError: (message) =>\n        dispatchContentModal({ type: \"SET_ERROR_MESSAGE\", text: message }),\n      onClose: () => {\n        dispatchContentModal({ type: \"CLOSE\" });\n      },\n      mutate: async () => {\n        await mutateContentProviders();\n        if (selectedContentProviderType === \"exa\") {\n          await mutateSearchProviders();\n        }\n      },\n    });\n  };\n\n  const getContentProviderHelperMessage = () => {\n    if (contentModal.message?.kind === \"error\") {\n      return contentModal.message.text;\n    }\n    if (contentModal.message?.kind === \"status\") {\n      return contentModal.message.text;\n    }\n    if (\n      contentModal.phase === \"validating\" ||\n      contentModal.phase === \"saving\"\n    ) {\n      return \"Validating API key...\";\n    }\n\n    const providerName = selectedContentProviderType\n      ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.label ||\n        selectedContentProviderType\n      : \"\";\n\n    if (selectedContentProviderType === \"exa\") {\n      return (\n        <>\n          Paste your{\" \"}\n          <a\n            href=\"https://dashboard.exa.ai/api-keys\"\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            className=\"underline\"\n          >\n            API key\n          </a>{\" \"}\n          from Exa to enable crawling.\n        </>\n      );\n    }\n\n    return selectedContentProviderType === \"firecrawl\" ? (\n      <>\n        Paste your <span className=\"underline\">API key</span> from Firecrawl to\n        access your search engine.\n      </>\n    ) : (\n      `Paste your API key from ${providerName} to enable crawling.`\n    );\n  };\n\n  const getContentProviderHelperClass = () => {\n    if (contentModal.message?.kind === \"error\") return \"text-status-error-05\";\n    if (contentModal.message?.kind === \"status\") {\n      return contentModal.message.text.toLowerCase().includes(\"validated\")\n        ? \"text-green-500\"\n        : \"text-text-03\";\n    }\n    return \"text-text-03\";\n  };\n\n  const handleDisconnectProvider = async () => {\n    if (!disconnectTarget) return;\n    const { id, category } = disconnectTarget;\n\n    try {\n      await disconnectProvider(id, category, replacementProviderId);\n      toast.success(`${disconnectTarget.label} disconnected`);\n      await mutateSearchProviders();\n      await mutateContentProviders();\n    } catch (error) {\n      console.error(\"Failed to disconnect web search provider:\", error);\n      const message =\n        error instanceof Error ? error.message : \"Unexpected error occurred.\";\n      if (category === \"search\") {\n        setActivationError(message);\n      } else {\n        setContentActivationError(message);\n      }\n    } finally {\n      setDisconnectTarget(null);\n      setReplacementProviderId(null);\n    }\n  };\n\n  return (\n    <>\n      <SettingsLayouts.Root>\n        <SettingsLayouts.Header\n          icon={route.icon}\n          title={route.title}\n          description=\"Search settings for external search across the internet.\"\n          separator\n        />\n\n        <SettingsLayouts.Body>\n          <div className=\"flex w-full flex-col gap-3\">\n            <Content\n              title=\"Search Engine\"\n              description=\"External search engine API used for web search result URLs, snippets, and metadata.\"\n              sizePreset=\"main-content\"\n              variant=\"section\"\n            />\n\n            {activationError && (\n              <Callout type=\"danger\" title=\"Unable to update default provider\">\n                {activationError}\n              </Callout>\n            )}\n\n            {!hasActiveSearchProvider && (\n              <div\n                className=\"flex items-start rounded-16 border p-1\"\n                style={{\n                  backgroundColor: \"var(--status-info-00)\",\n                  borderColor: \"var(--status-info-02)\",\n                }}\n              >\n                <div className=\"flex items-start gap-1 p-2\">\n                  <div\n                    className=\"flex size-5 items-center justify-center rounded-full p-0.5\"\n                    style={{\n                      backgroundColor: \"var(--status-info-01)\",\n                    }}\n                  >\n                    <div style={{ color: \"var(--status-text-info-05)\" }}>\n                      <InfoIcon size={16} />\n                    </div>\n                  </div>\n                  <Text as=\"p\" className=\"flex-1 px-0.5\" mainUiBody text04>\n                    {hasConfiguredSearchProvider\n                      ? \"Select a search engine to enable web search.\"\n                      : \"Connect a search engine to set up web search.\"}\n                  </Text>\n                </div>\n              </div>\n            )}\n\n            <div className=\"flex flex-col gap-2\">\n              {combinedSearchProviders.map(\n                ({ key, providerType, label, subtitle, logoSrc, provider }) => {\n                  const isConfigured = isSearchProviderConfigured(\n                    providerType,\n                    provider\n                  );\n                  const isActive = provider?.is_active ?? false;\n                  const providerId = provider?.id;\n                  const canOpenModal =\n                    isBuiltInSearchProviderType(providerType);\n\n                  const status: \"disconnected\" | \"connected\" | \"selected\" =\n                    !isConfigured\n                      ? \"disconnected\"\n                      : isActive\n                        ? \"selected\"\n                        : \"connected\";\n\n                  return (\n                    <ProviderCard\n                      key={`${key}-${providerType}`}\n                      icon={() =>\n                        logoSrc ? (\n                          <Image\n                            src={logoSrc}\n                            alt={`${label} logo`}\n                            width={16}\n                            height={16}\n                          />\n                        ) : (\n                          <SvgGlobe size={16} />\n                        )\n                      }\n                      title={label}\n                      description={subtitle}\n                      status={status}\n                      onConnect={\n                        canOpenModal\n                          ? () => {\n                              openSearchModal(providerType, provider);\n                              setActivationError(null);\n                            }\n                          : undefined\n                      }\n                      onSelect={\n                        providerId\n                          ? () => {\n                              void handleActivateSearchProvider(providerId);\n                            }\n                          : undefined\n                      }\n                      onDeselect={\n                        providerId\n                          ? () => {\n                              void handleDeactivateSearchProvider(providerId);\n                            }\n                          : undefined\n                      }\n                      onEdit={\n                        isConfigured && canOpenModal\n                          ? () => {\n                              openSearchModal(\n                                providerType as WebSearchProviderType,\n                                provider\n                              );\n                            }\n                          : undefined\n                      }\n                      onDisconnect={\n                        isConfigured && provider && provider.id > 0\n                          ? () =>\n                              setDisconnectTarget({\n                                id: provider.id,\n                                label,\n                                category: \"search\",\n                                providerType,\n                              })\n                          : undefined\n                      }\n                    />\n                  );\n                }\n              )}\n            </div>\n          </div>\n\n          <div className=\"flex w-full flex-col gap-3\">\n            <Content\n              title=\"Web Crawler\"\n              description=\"Used to read the full contents of search result pages.\"\n              sizePreset=\"main-content\"\n              variant=\"section\"\n            />\n\n            {contentActivationError && (\n              <Callout type=\"danger\" title=\"Unable to update crawler\">\n                {contentActivationError}\n              </Callout>\n            )}\n\n            <div className=\"flex flex-col gap-2\">\n              {combinedContentProviders.map((provider) => {\n                const label =\n                  provider.name ||\n                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.label ||\n                  provider.provider_type;\n\n                const subtitle =\n                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.subtitle ||\n                  provider.provider_type;\n\n                const providerId = provider.id;\n                const isConfigured = isContentProviderConfigured(\n                  provider.provider_type,\n                  provider\n                );\n                const isCurrentCrawler =\n                  provider.provider_type === currentContentProviderType;\n\n                const status: \"disconnected\" | \"connected\" | \"selected\" =\n                  !isConfigured\n                    ? \"disconnected\"\n                    : isCurrentCrawler\n                      ? \"selected\"\n                      : \"connected\";\n\n                const canActivate =\n                  providerId > 0 ||\n                  provider.provider_type === \"onyx_web_crawler\" ||\n                  isConfigured;\n\n                const contentLogoSrc =\n                  CONTENT_PROVIDER_DETAILS[provider.provider_type]?.logoSrc;\n\n                return (\n                  <ProviderCard\n                    key={`${provider.provider_type}-${provider.id}`}\n                    icon={() =>\n                      contentLogoSrc ? (\n                        <Image\n                          src={contentLogoSrc}\n                          alt={`${label} logo`}\n                          width={16}\n                          height={16}\n                        />\n                      ) : provider.provider_type === \"onyx_web_crawler\" ? (\n                        <SvgOnyxLogo size={16} />\n                      ) : (\n                        <SvgGlobe size={16} />\n                      )\n                    }\n                    title={label}\n                    description={subtitle}\n                    status={status}\n                    selectedLabel=\"Current Crawler\"\n                    onConnect={() => {\n                      openContentModal(provider.provider_type, provider);\n                      setContentActivationError(null);\n                    }}\n                    onSelect={\n                      canActivate\n                        ? () => {\n                            void handleActivateContentProvider(provider);\n                          }\n                        : undefined\n                    }\n                    onDeselect={() => {\n                      void handleDeactivateContentProvider(\n                        providerId,\n                        provider.provider_type\n                      );\n                    }}\n                    onEdit={\n                      provider.provider_type !== \"onyx_web_crawler\" &&\n                      isConfigured\n                        ? () => {\n                            openContentModal(provider.provider_type, provider);\n                          }\n                        : undefined\n                    }\n                    onDisconnect={\n                      provider.provider_type !== \"onyx_web_crawler\" &&\n                      isConfigured &&\n                      provider.id > 0\n                        ? () =>\n                            setDisconnectTarget({\n                              id: provider.id,\n                              label,\n                              category: \"content\",\n                              providerType: provider.provider_type,\n                            })\n                        : undefined\n                    }\n                  />\n                );\n              })}\n            </div>\n          </div>\n        </SettingsLayouts.Body>\n      </SettingsLayouts.Root>\n\n      {disconnectTarget && (\n        <WebSearchDisconnectModal\n          disconnectTarget={disconnectTarget}\n          searchProviders={searchProviders}\n          contentProviders={combinedContentProviders}\n          replacementProviderId={replacementProviderId}\n          onReplacementChange={setReplacementProviderId}\n          onClose={() => {\n            setDisconnectTarget(null);\n            setReplacementProviderId(null);\n          }}\n          onDisconnect={() => void handleDisconnectProvider()}\n        />\n      )}\n\n      <WebProviderSetupModal\n        isOpen={selectedProviderType !== null}\n        onClose={() => {\n          dispatchSearchModal({ type: \"CLOSE\" });\n        }}\n        providerLabel={providerLabel}\n        providerLogo={renderLogo({\n          logoSrc: selectedProviderType\n            ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.logoSrc\n            : undefined,\n          alt: `${providerLabel} logo`,\n          size: 24,\n          containerSize: 28,\n        })}\n        description={\n          selectedProviderType\n            ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.helper ??\n              SEARCH_PROVIDER_DETAILS[selectedProviderType]?.subtitle ??\n              \"\"\n            : \"\"\n        }\n        apiKeyValue={searchModal.apiKeyValue}\n        onApiKeyChange={(value) =>\n          dispatchSearchModal({ type: \"SET_API_KEY\", value })\n        }\n        isStoredApiKey={searchModal.apiKeyValue === MASKED_API_KEY_PLACEHOLDER}\n        optionalField={\n          selectedProviderType === \"google_pse\"\n            ? {\n                label: \"Search Engine ID\",\n                value: searchModal.configValue,\n                onChange: (value) =>\n                  dispatchSearchModal({ type: \"SET_CONFIG_VALUE\", value }),\n                placeholder: \"Enter search engine ID\",\n                description: (\n                  <>\n                    Paste your{\" \"}\n                    <a\n                      href=\"https://programmablesearchengine.google.com/controlpanel/all\"\n                      target=\"_blank\"\n                      rel=\"noopener noreferrer\"\n                      className=\"underline\"\n                    >\n                      search engine ID\n                    </a>{\" \"}\n                    you want to use for web search.\n                  </>\n                ),\n              }\n            : selectedProviderType === \"searxng\"\n              ? {\n                  label: \"SearXNG Base URL\",\n                  value: searchModal.configValue,\n                  onChange: (value) =>\n                    dispatchSearchModal({ type: \"SET_CONFIG_VALUE\", value }),\n                  placeholder: \"https://your-searxng-instance.com\",\n                  description: (\n                    <>\n                      Paste the base URL of your{\" \"}\n                      <a\n                        href=\"https://docs.searxng.org/admin/installation.html\"\n                        target=\"_blank\"\n                        rel=\"noopener noreferrer\"\n                        className=\"underline\"\n                      >\n                        private SearXNG instance\n                      </a>\n                      .\n                    </>\n                  ),\n                }\n              : undefined\n        }\n        helperMessage={\n          searchModal.message?.kind === \"error\" ? (\n            searchModal.message.text\n          ) : searchModal.phase === \"validating\" ||\n            searchModal.phase === \"saving\" ? (\n            \"Checking connection...\"\n          ) : (\n            <>\n              Paste your{\" \"}\n              <a\n                href={\n                  (selectedProviderType\n                    ? SEARCH_PROVIDER_DETAILS[selectedProviderType]?.apiKeyUrl\n                    : undefined) ?? \"#\"\n                }\n                target=\"_blank\"\n                rel=\"noopener noreferrer\"\n                className=\"underline\"\n              >\n                API key\n              </a>{\" \"}\n              to access your search engine.\n            </>\n          )\n        }\n        helperClass={\n          searchModal.message?.kind === \"error\"\n            ? \"text-status-error-05\"\n            : searchModal.phase === \"validating\" ||\n                searchModal.phase === \"saving\"\n              ? \"text-text-03\"\n              : \"text-text-03\"\n        }\n        isProcessing={\n          searchModal.phase === \"validating\" || searchModal.phase === \"saving\"\n        }\n        canConnect={canConnect}\n        onConnect={() => {\n          void handleSearchConnect();\n        }}\n        hideApiKey={\n          !!selectedProviderType &&\n          !searchProviderRequiresApiKey(selectedProviderType)\n        }\n      />\n\n      <WebProviderSetupModal\n        isOpen={selectedContentProviderType !== null}\n        onClose={() => {\n          dispatchContentModal({ type: \"CLOSE\" });\n        }}\n        providerLabel={contentProviderLabel}\n        providerLogo={renderLogo({\n          logoSrc: selectedContentProviderType\n            ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.logoSrc\n            : undefined,\n          alt: `${\n            contentProviderLabel || selectedContentProviderType || \"provider\"\n          } logo`,\n          fallback:\n            selectedContentProviderType === \"onyx_web_crawler\" ? (\n              <SvgOnyxLogo size={24} />\n            ) : undefined,\n          size: 24,\n          containerSize: 28,\n        })}\n        description={\n          selectedContentProviderType\n            ? CONTENT_PROVIDER_DETAILS[selectedContentProviderType]\n                ?.description ||\n              CONTENT_PROVIDER_DETAILS[selectedContentProviderType]?.subtitle ||\n              `Provide credentials for ${contentProviderLabel} to enable crawling.`\n            : \"\"\n        }\n        apiKeyValue={contentModal.apiKeyValue}\n        onApiKeyChange={(value) =>\n          dispatchContentModal({ type: \"SET_API_KEY\", value })\n        }\n        isStoredApiKey={contentModal.apiKeyValue === MASKED_API_KEY_PLACEHOLDER}\n        optionalField={\n          selectedContentProviderType === \"firecrawl\"\n            ? {\n                label: \"API Base URL\",\n                value: contentModal.configValue,\n                onChange: (value) =>\n                  dispatchContentModal({ type: \"SET_CONFIG_VALUE\", value }),\n                placeholder: \"https://\",\n                description: \"Your Firecrawl API base URL.\",\n                showFirst: true,\n              }\n            : undefined\n        }\n        helperMessage={getContentProviderHelperMessage()}\n        helperClass={getContentProviderHelperClass()}\n        isProcessing={\n          contentModal.phase === \"validating\" || contentModal.phase === \"saving\"\n        }\n        canConnect={canConnectContent}\n        onConnect={() => {\n          void handleContentConnect();\n        }}\n        apiKeyAutoFocus={\n          !selectedContentProviderType ||\n          selectedContentProviderType !== \"firecrawl\"\n        }\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/interfaces.ts",
    "content": "import type { WebSearchProviderType } from \"@/refresh-pages/admin/WebSearchPage/searchProviderUtils\";\nimport type { WebContentProviderType } from \"@/refresh-pages/admin/WebSearchPage/contentProviderUtils\";\n\nexport interface WebSearchProviderView {\n  id: number;\n  name: string;\n  provider_type: WebSearchProviderType;\n  is_active: boolean;\n  config: Record<string, string> | null;\n  has_api_key: boolean;\n}\n\nexport interface WebContentProviderView {\n  id: number;\n  name: string;\n  provider_type: WebContentProviderType;\n  is_active: boolean;\n  config: Record<string, string> | null;\n  has_api_key: boolean;\n}\n\nexport interface DisconnectTargetState {\n  id: number;\n  label: string;\n  category: \"search\" | \"content\";\n  providerType: string;\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/searchProviderUtils.ts",
    "content": "export type WebSearchProviderType =\n  | \"google_pse\"\n  | \"serper\"\n  | \"exa\"\n  | \"searxng\"\n  | \"brave\";\n\nexport const SEARCH_PROVIDER_DETAILS: Record<\n  WebSearchProviderType,\n  {\n    label: string;\n    subtitle: string;\n    helper: string;\n    logoSrc?: string;\n    apiKeyUrl?: string;\n  }\n> = {\n  exa: {\n    label: \"Exa\",\n    subtitle: \"Exa.ai\",\n    helper: \"Connect to Exa to set up web search.\",\n    logoSrc: \"/Exa.svg\",\n    apiKeyUrl: \"https://dashboard.exa.ai/api-keys\",\n  },\n  serper: {\n    label: \"Serper\",\n    subtitle: \"Serper.dev\",\n    helper: \"Connect to Serper to set up web search.\",\n    logoSrc: \"/Serper.svg\",\n    apiKeyUrl: \"https://serper.dev/api-key\",\n  },\n  brave: {\n    label: \"Brave\",\n    subtitle: \"Brave Search API\",\n    helper: \"Connect to Brave Search API to set up web search.\",\n    logoSrc: \"/Brave.svg\",\n    apiKeyUrl:\n      \"https://api-dashboard.search.brave.com/app/documentation/web-search/get-started\",\n  },\n  google_pse: {\n    label: \"Google PSE\",\n    subtitle: \"Google\",\n    helper: \"Connect to Google PSE to set up web search.\",\n    logoSrc: \"/Google.svg\",\n    apiKeyUrl: \"https://programmablesearchengine.google.com/controlpanel/all\",\n  },\n  searxng: {\n    label: \"SearXNG\",\n    subtitle: \"SearXNG\",\n    helper: \"Connect to SearXNG to set up web search.\",\n    logoSrc: \"/SearXNG.svg\",\n  },\n};\n\n/**\n * Display order for built-in providers.\n * Derived from insertion order of `SEARCH_PROVIDER_DETAILS` for a single source of truth.\n */\nexport const SEARCH_PROVIDER_ORDER = Object.keys(\n  SEARCH_PROVIDER_DETAILS\n) as WebSearchProviderType[];\n\nexport function getSearchProviderDisplayLabel(\n  providerType: string,\n  providerName?: string | null\n): string {\n  if (providerName) return providerName;\n  return (\n    (SEARCH_PROVIDER_DETAILS as Record<string, { label: string }>)[providerType]\n      ?.label ?? providerType\n  );\n}\n\nexport type SearchProviderConfig = Record<string, string> | null | undefined;\n\nexport type SearchProviderLike =\n  | {\n      has_api_key: boolean;\n      config: SearchProviderConfig;\n    }\n  | null\n  | undefined;\n\ntype SearchProviderCapabilities = {\n  requiresApiKey: boolean;\n  /** Keys required in `config` to consider the provider configured / connectable. */\n  requiredConfigKeys: string[];\n  /**\n   * Some providers historically stored config under different keys.\n   * When reading stored config, we consider these aliases equivalent.\n   */\n  storedConfigAliases?: Record<string, string[]>;\n};\n\nconst SEARCH_PROVIDER_CAPABILITIES: Record<\n  WebSearchProviderType,\n  SearchProviderCapabilities\n> = {\n  exa: {\n    requiresApiKey: true,\n    requiredConfigKeys: [],\n  },\n  serper: {\n    requiresApiKey: true,\n    requiredConfigKeys: [],\n  },\n  brave: {\n    requiresApiKey: true,\n    requiredConfigKeys: [],\n  },\n  google_pse: {\n    requiresApiKey: true,\n    requiredConfigKeys: [\"search_engine_id\"],\n    storedConfigAliases: {\n      search_engine_id: [\"search_engine_id\", \"cx\", \"search_engine\"],\n    },\n  },\n  searxng: {\n    requiresApiKey: false,\n    requiredConfigKeys: [\"searxng_base_url\"],\n    storedConfigAliases: {\n      searxng_base_url: [\"searxng_base_url\"],\n    },\n  },\n};\n\nconst DEFAULT_SEARCH_PROVIDER_CAPABILITIES: SearchProviderCapabilities = {\n  requiresApiKey: true,\n  requiredConfigKeys: [],\n};\n\nfunction getCapabilities(providerType: string): SearchProviderCapabilities {\n  return (\n    (\n      SEARCH_PROVIDER_CAPABILITIES as Record<string, SearchProviderCapabilities>\n    )[providerType] ?? DEFAULT_SEARCH_PROVIDER_CAPABILITIES\n  );\n}\n\nexport function isBuiltInSearchProviderType(\n  providerType: string\n): providerType is WebSearchProviderType {\n  return Object.prototype.hasOwnProperty.call(\n    SEARCH_PROVIDER_DETAILS,\n    providerType\n  );\n}\n\nexport function searchProviderRequiresApiKey(providerType: string): boolean {\n  return getCapabilities(providerType).requiresApiKey;\n}\n\nfunction getStoredConfigValue(\n  providerType: string,\n  canonicalKey: string,\n  config: SearchProviderConfig\n): string {\n  const caps = getCapabilities(providerType);\n  const aliases = caps.storedConfigAliases?.[canonicalKey] ?? [canonicalKey];\n\n  const safeConfig = config ?? {};\n  for (const key of aliases) {\n    const value = safeConfig[key];\n    if (typeof value === \"string\" && value.length > 0) {\n      return value;\n    }\n  }\n  return \"\";\n}\n\n/** True when the provider has all required credentials/config to be usable. */\nexport function isSearchProviderConfigured(\n  providerType: string,\n  provider: SearchProviderLike\n): boolean {\n  const caps = getCapabilities(providerType);\n\n  if (caps.requiresApiKey && !(provider?.has_api_key ?? false)) {\n    return false;\n  }\n\n  for (const requiredKey of caps.requiredConfigKeys) {\n    const value = getStoredConfigValue(\n      providerType,\n      requiredKey,\n      provider?.config\n    );\n    if (!value) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\nexport function canConnectSearchProvider(\n  providerType: string,\n  apiKey: string,\n  searchEngineIdOrBaseUrl: string\n): boolean {\n  const caps = getCapabilities(providerType);\n\n  if (caps.requiresApiKey && apiKey.trim().length === 0) {\n    return false;\n  }\n\n  // Today, all config-driven search providers only expose a single required string field.\n  if (\n    caps.requiredConfigKeys.length > 0 &&\n    searchEngineIdOrBaseUrl.trim().length === 0\n  ) {\n    return false;\n  }\n\n  return true;\n}\n\n/** Build the `config` payload to send to the backend for a provider. */\nexport function buildSearchProviderConfig(\n  providerType: string,\n  searchEngineIdOrBaseUrl: string\n): Record<string, string> {\n  const caps = getCapabilities(providerType);\n  const value = searchEngineIdOrBaseUrl.trim();\n\n  const config: Record<string, string> = {};\n  if (!value || caps.requiredConfigKeys.length === 0) {\n    return config;\n  }\n\n  // Only one required key for now.\n  const requiredKey = caps.requiredConfigKeys[0];\n  if (!requiredKey) {\n    return config;\n  }\n  config[requiredKey] = value;\n  return config;\n}\n\n/**\n * For providers that have a single required config field, return that stored value for form prefilling.\n */\nexport function getSingleConfigFieldValueForForm(\n  providerType: string,\n  provider: SearchProviderLike\n): string {\n  const caps = getCapabilities(providerType);\n  if (caps.requiredConfigKeys.length === 0) {\n    return \"\";\n  }\n\n  const requiredKey = caps.requiredConfigKeys[0];\n  if (!requiredKey) {\n    return \"\";\n  }\n  return getStoredConfigValue(providerType, requiredKey, provider?.config);\n}\n"
  },
  {
    "path": "web/src/refresh-pages/admin/WebSearchPage/svc.ts",
    "content": "import { CONTENT_PROVIDER_DETAILS } from \"@/refresh-pages/admin/WebSearchPage/contentProviderUtils\";\nimport type { WebContentProviderView } from \"@/refresh-pages/admin/WebSearchPage/interfaces\";\n\nasync function parseErrorDetail(\n  res: Response,\n  fallback: string\n): Promise<string> {\n  try {\n    const body = await res.json();\n    return body?.detail ?? fallback;\n  } catch {\n    return fallback;\n  }\n}\n\nexport async function activateSearchProvider(\n  providerId: number\n): Promise<void> {\n  const res = await fetch(\n    `/api/admin/web-search/search-providers/${providerId}/activate`,\n    {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n    }\n  );\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to set provider as default.\")\n    );\n  }\n}\n\nexport async function deactivateSearchProvider(\n  providerId: number\n): Promise<void> {\n  const res = await fetch(\n    `/api/admin/web-search/search-providers/${providerId}/deactivate`,\n    {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n    }\n  );\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to deactivate provider.\")\n    );\n  }\n}\n\nexport async function activateContentProvider(\n  provider: WebContentProviderView\n): Promise<void> {\n  if (provider.provider_type === \"onyx_web_crawler\") {\n    const res = await fetch(\n      \"/api/admin/web-search/content-providers/reset-default\",\n      {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n      }\n    );\n    if (!res.ok) {\n      throw new Error(\n        await parseErrorDetail(res, \"Failed to set crawler as default.\")\n      );\n    }\n  } else if (provider.id > 0) {\n    const res = await fetch(\n      `/api/admin/web-search/content-providers/${provider.id}/activate`,\n      {\n        method: \"POST\",\n        headers: { \"Content-Type\": \"application/json\" },\n      }\n    );\n    if (!res.ok) {\n      throw new Error(\n        await parseErrorDetail(res, \"Failed to set crawler as default.\")\n      );\n    }\n  } else {\n    const payload = {\n      id: null,\n      name:\n        provider.name ||\n        CONTENT_PROVIDER_DETAILS[provider.provider_type]?.label ||\n        provider.provider_type,\n      provider_type: provider.provider_type,\n      api_key: null,\n      api_key_changed: false,\n      config: provider.config ?? null,\n      activate: true,\n    };\n\n    const res = await fetch(\"/api/admin/web-search/content-providers\", {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify(payload),\n    });\n    if (!res.ok) {\n      throw new Error(\n        await parseErrorDetail(res, \"Failed to set crawler as default.\")\n      );\n    }\n  }\n}\n\nexport async function deactivateContentProvider(\n  providerId: number,\n  providerType: string\n): Promise<void> {\n  const endpoint =\n    providerType === \"onyx_web_crawler\" || providerId < 0\n      ? \"/api/admin/web-search/content-providers/reset-default\"\n      : `/api/admin/web-search/content-providers/${providerId}/deactivate`;\n\n  const res = await fetch(endpoint, {\n    method: \"POST\",\n    headers: { \"Content-Type\": \"application/json\" },\n  });\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to deactivate provider.\")\n    );\n  }\n}\n\nexport async function disconnectProvider(\n  id: number,\n  category: \"search\" | \"content\",\n  replacementProviderId: string | null\n): Promise<void> {\n  // If a replacement was selected (not \"No Default\"), activate it first\n  if (replacementProviderId && replacementProviderId !== \"__none__\") {\n    const repId = Number(replacementProviderId);\n    const activateEndpoint =\n      category === \"search\"\n        ? `/api/admin/web-search/search-providers/${repId}/activate`\n        : `/api/admin/web-search/content-providers/${repId}/activate`;\n    const activateRes = await fetch(activateEndpoint, {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n    });\n    if (!activateRes.ok) {\n      throw new Error(\n        await parseErrorDetail(\n          activateRes,\n          \"Failed to activate replacement provider.\"\n        )\n      );\n    }\n  }\n\n  const res = await fetch(`/api/admin/web-search/${category}-providers/${id}`, {\n    method: \"DELETE\",\n  });\n  if (!res.ok) {\n    throw new Error(\n      await parseErrorDetail(res, \"Failed to disconnect provider.\")\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/sections/AppHealthBanner.tsx",
    "content": "\"use client\";\n\nimport { errorHandlingFetcher, RedirectError } from \"@/lib/fetcher\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { useCallback, useEffect, useState, useRef } from \"react\";\nimport { getSecondsUntilExpiration } from \"@/lib/time\";\nimport { refreshToken } from \"@/lib/user\";\nimport { NEXT_PUBLIC_CUSTOM_REFRESH_URL } from \"@/lib/constants\";\nimport { Button } from \"@opal/components\";\nimport { logout } from \"@/lib/user\";\nimport { usePathname, useRouter } from \"next/navigation\";\nimport { SvgAlertTriangle, SvgLogOut } from \"@opal/icons\";\nimport { Content } from \"@opal/layouts\";\nimport { useCurrentUser } from \"@/hooks/useCurrentUser\";\nimport { getExtensionContext } from \"@/lib/extension/utils\";\n\nexport default function AppHealthBanner() {\n  const router = useRouter();\n  const { error } = useSWR(SWR_KEYS.health, errorHandlingFetcher);\n  const [expired, setExpired] = useState(false);\n  const [showLoggedOutModal, setShowLoggedOutModal] = useState(false);\n  const pathname = usePathname();\n  const expirationTimeoutRef = useRef<NodeJS.Timeout | null>(null);\n  const refreshIntervalRef = useRef<NodeJS.Timer | null>(null);\n\n  const { user, mutateUser, userError } = useCurrentUser();\n\n  // Handle 403 errors from the /api/me endpoint.\n  // Skip entirely on auth pages — the user isn't logged in yet, so there's\n  // nothing to \"log out\" of and hitting /auth/logout just creates noise.\n  useEffect(() => {\n    if (userError && userError.status === 403 && !pathname?.includes(\"/auth\")) {\n      logout().then(() => {\n        setShowLoggedOutModal(true);\n      });\n    }\n  }, [userError, pathname]);\n\n  // Function to handle the \"Log in\" button click\n  function handleLogin() {\n    setShowLoggedOutModal(false);\n    const { isExtension } = getExtensionContext();\n    if (isExtension) {\n      // In the Chrome extension, open login in a new tab so OAuth popups\n      // work correctly (the extension iframe has no navigable URL origin).\n      window.open(\n        window.location.origin + \"/auth/login\",\n        \"_blank\",\n        \"noopener,noreferrer\"\n      );\n    } else {\n      router.push(\"/auth/login\");\n    }\n  }\n\n  // Function to set up expiration timeout\n  const setupExpirationTimeout = useCallback(\n    (secondsUntilExpiration: number) => {\n      // Clear any existing timeout\n      if (expirationTimeoutRef.current) {\n        clearTimeout(expirationTimeoutRef.current);\n      }\n\n      // Set timeout to show logout modal when session expires\n      const timeUntilExpire = (secondsUntilExpiration + 10) * 1000;\n      expirationTimeoutRef.current = setTimeout(() => {\n        setExpired(true);\n\n        if (!pathname?.includes(\"/auth\")) {\n          setShowLoggedOutModal(true);\n        }\n      }, timeUntilExpire);\n    },\n    [pathname]\n  );\n\n  // Clean up any timeouts/intervals when component unmounts\n  useEffect(() => {\n    return () => {\n      if (expirationTimeoutRef.current) {\n        clearTimeout(expirationTimeoutRef.current);\n      }\n\n      if (refreshIntervalRef.current) {\n        clearInterval(refreshIntervalRef.current);\n      }\n    };\n  }, []);\n\n  // Set up token refresh logic if custom refresh URL exists\n  useEffect(() => {\n    if (!user) return;\n\n    const secondsUntilExpiration = getSecondsUntilExpiration(user);\n    if (secondsUntilExpiration === null) return;\n\n    // Set up expiration timeout based on current user data\n    setupExpirationTimeout(secondsUntilExpiration);\n\n    if (NEXT_PUBLIC_CUSTOM_REFRESH_URL) {\n      const refreshUrl = NEXT_PUBLIC_CUSTOM_REFRESH_URL;\n\n      const attemptTokenRefresh = async () => {\n        let retryCount = 0;\n        const maxRetries = 3;\n\n        while (retryCount < maxRetries) {\n          try {\n            const refreshTokenData = await refreshToken(refreshUrl);\n            if (!refreshTokenData) {\n              throw new Error(\"Failed to refresh token\");\n            }\n\n            const response = await fetch(\n              \"/api/enterprise-settings/refresh-token\",\n              {\n                method: \"POST\",\n                headers: {\n                  \"Content-Type\": \"application/json\",\n                },\n                body: JSON.stringify(refreshTokenData),\n              }\n            );\n            if (!response.ok) {\n              throw new Error(`HTTP error! status: ${response.status}`);\n            }\n\n            // Wait for backend to process the token\n            await new Promise((resolve) => setTimeout(resolve, 4000));\n\n            // Get updated user data\n            const updatedUser = await mutateUser();\n\n            if (updatedUser) {\n              // Reset expiration timeout with new expiration time\n              const newSecondsUntilExpiration =\n                getSecondsUntilExpiration(updatedUser);\n              if (newSecondsUntilExpiration !== null) {\n                setupExpirationTimeout(newSecondsUntilExpiration);\n                console.debug(\n                  `Token refreshed, new expiration in ${newSecondsUntilExpiration} seconds`\n                );\n              }\n            }\n\n            break; // Success - exit the retry loop\n          } catch (error) {\n            console.error(\n              `Error refreshing token (attempt ${\n                retryCount + 1\n              }/${maxRetries}):`,\n              error\n            );\n            retryCount++;\n\n            if (retryCount === maxRetries) {\n              console.error(\"Max retry attempts reached\");\n            } else {\n              // Wait before retrying (exponential backoff)\n              await new Promise((resolve) =>\n                setTimeout(resolve, Math.pow(2, retryCount) * 1000)\n              );\n            }\n          }\n        }\n      };\n\n      // Set up refresh interval\n      const refreshInterval = 60 * 15; // 15 mins\n\n      // Clear any existing interval\n      if (refreshIntervalRef.current) {\n        clearInterval(refreshIntervalRef.current);\n      }\n\n      refreshIntervalRef.current = setInterval(\n        attemptTokenRefresh,\n        refreshInterval * 1000\n      );\n\n      // If we're going to expire before the next refresh, kick off a refresh now\n      if (secondsUntilExpiration < refreshInterval) {\n        attemptTokenRefresh();\n      }\n    }\n  }, [user, setupExpirationTimeout, mutateUser]);\n\n  // Logged out modal\n  if (showLoggedOutModal) {\n    return (\n      <Modal open>\n        <Modal.Content width=\"sm\" height=\"sm\">\n          <Modal.Header icon={SvgLogOut} title=\"You Have Been Logged Out\" />\n          <Modal.Body>\n            <p className=\"text-sm\">\n              Your session has expired. Please log in again to continue.\n            </p>\n          </Modal.Body>\n          <Modal.Footer>\n            <Button onClick={handleLogin}>Log In</Button>\n          </Modal.Footer>\n        </Modal.Content>\n      </Modal>\n    );\n  }\n\n  if (!error && !expired) {\n    return null;\n  }\n\n  if (error instanceof RedirectError || expired) {\n    if (!pathname?.includes(\"/auth\")) {\n      setShowLoggedOutModal(true);\n    }\n    return null;\n  } else {\n    return (\n      <div className=\"fixed top-0 left-0 z-[101] w-full bg-status-error-01 p-3\">\n        <Content\n          icon={SvgAlertTriangle}\n          title=\"The backend is currently unavailable\"\n          description=\"If this is your initial setup or you just updated your Onyx deployment, this is likely because the backend is still starting up. Give it a minute or two, and then refresh the page. If that does not work, make sure the backend is setup and/or contact an administrator.\"\n          sizePreset=\"main-content\"\n          variant=\"section\"\n        />\n      </div>\n    );\n  }\n}\n"
  },
  {
    "path": "web/src/sections/Suggestions.tsx",
    "content": "\"use client\";\n\nimport { OnSubmitProps } from \"@/hooks/useChatController\";\nimport { useCurrentAgent } from \"@/hooks/useAgents\";\nimport { Interactive } from \"@opal/core\";\nimport { Content } from \"@opal/layouts\";\n\nexport interface SuggestionsProps {\n  onSubmit: (props: OnSubmitProps) => void;\n}\n\nexport default function Suggestions({ onSubmit }: SuggestionsProps) {\n  const currentAgent = useCurrentAgent();\n\n  if (\n    !currentAgent ||\n    !currentAgent.starter_messages ||\n    currentAgent.starter_messages.length === 0\n  )\n    return null;\n\n  const handleSuggestionClick = (suggestion: string) => {\n    onSubmit({\n      message: suggestion,\n      currentMessageFiles: [],\n      deepResearch: false,\n    });\n  };\n\n  return (\n    <div className=\"max-w-[var(--app-page-main-content-width)] flex flex-col w-full p-1\">\n      {currentAgent.starter_messages.map(({ message }, index) => (\n        <Interactive.Stateless\n          key={index}\n          variant=\"default\"\n          prominence=\"tertiary\"\n          onClick={() => handleSuggestionClick(message)}\n        >\n          <Interactive.Container\n            widthVariant=\"full\"\n            roundingVariant=\"sm\"\n            heightVariant=\"lg\"\n          >\n            <Content\n              title={message}\n              sizePreset=\"main-ui\"\n              variant=\"body\"\n              widthVariant=\"full\"\n              prominence=\"muted\"\n            />\n          </Interactive.Container>\n        </Interactive.Stateless>\n      ))}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/ActionCard.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useEffect, useRef } from \"react\";\nimport ActionCardHeader from \"@/sections/actions/ActionCardHeader\";\nimport ToolsSection from \"@/sections/actions/ToolsSection\";\nimport { cn } from \"@/lib/utils\";\nimport { ActionStatus } from \"@/lib/tools/interfaces\";\nimport type { IconProps } from \"@opal/types\";\nimport { SvgServer } from \"@opal/icons\";\nimport {\n  ActionCardProvider,\n  ActionCardContextValue,\n} from \"@/sections/actions/ActionCardContext\";\n\nexport interface ActionCardProps {\n  // Core content\n  title: string;\n  description: string;\n  icon?: React.FunctionComponent<IconProps>;\n\n  // Status\n  status: ActionStatus;\n\n  // Header actions (right side of header)\n  actions: React.ReactNode;\n\n  // Edit handler for header\n  onEdit?: () => void;\n\n  // Rename handler for header\n  onRename?: (newName: string) => Promise<void>;\n\n  // Expansion control (can be controlled or uncontrolled)\n  initialExpanded?: boolean;\n  isExpanded?: boolean;\n  onExpandedChange?: (expanded: boolean) => void;\n\n  // Search functionality\n  enableSearch?: boolean;\n  searchQuery?: string;\n  onSearchQueryChange?: (query: string) => void;\n\n  // Tools section actions\n  onFold?: () => void;\n\n  // Content\n  children?: React.ReactNode;\n\n  // Accessibility\n  ariaLabel?: string;\n\n  // Optional styling\n  className?: string;\n}\n\n// Main Component\nexport default function ActionCard({\n  title,\n  description,\n  icon,\n  status,\n  actions,\n  onEdit,\n  onRename,\n  initialExpanded = false,\n  isExpanded: controlledIsExpanded,\n  onExpandedChange,\n  enableSearch = false,\n  searchQuery = \"\",\n  onSearchQueryChange,\n  onFold,\n  children,\n  ariaLabel,\n  className,\n}: ActionCardProps) {\n  // Internal state for uncontrolled mode\n  const [internalExpanded, setInternalExpanded] = useState(initialExpanded);\n\n  const hasInitializedExpansion = useRef(false);\n  const [isHovered, setIsHovered] = useState(false);\n\n  // Determine if we're in controlled mode\n  const isControlled = controlledIsExpanded !== undefined;\n  const isExpandedActual = isControlled\n    ? controlledIsExpanded\n    : internalExpanded;\n\n  // Apply initial expansion only once per component lifetime (uncontrolled mode)\n  useEffect(() => {\n    if (!isControlled && initialExpanded && !hasInitializedExpansion.current) {\n      setInternalExpanded(true);\n      hasInitializedExpansion.current = true;\n    }\n  }, [initialExpanded, isControlled]);\n\n  const isConnected = status === ActionStatus.CONNECTED;\n  const isDisconnected = status === ActionStatus.DISCONNECTED;\n\n  const backgroundColor = isConnected\n    ? \"bg-background-tint-00\"\n    : isDisconnected\n      ? \"bg-background-neutral-02\"\n      : \"\";\n\n  const contextValue: ActionCardContextValue = { isHovered };\n\n  return (\n    <ActionCardProvider value={contextValue}>\n      <div\n        className={cn(\n          \"w-full\",\n          backgroundColor,\n          \"border border-border-01 rounded-16\",\n          \"transition-shadow duration-200\",\n          isHovered && \"shadow-00\",\n          className\n        )}\n        role=\"article\"\n        aria-label={ariaLabel || `${title} action card`}\n        onMouseEnter={() => setIsHovered(true)}\n        onMouseLeave={() => setIsHovered(false)}\n      >\n        <div className=\"flex flex-col w-full\">\n          {/* Header Section */}\n          <div className=\"flex items-start justify-between gap-2 p-3 w-full\">\n            <ActionCardHeader\n              title={title}\n              description={description}\n              icon={icon || SvgServer}\n              status={status}\n              onEdit={onEdit}\n              onRename={onRename}\n            />\n\n            {/* Action Buttons */}\n            <div className=\"shrink-0 flex items-start\">{actions}</div>\n          </div>\n\n          {/* Tools Section (Only when expanded and search is enabled) */}\n          {isExpandedActual && enableSearch && (\n            <ToolsSection\n              onFold={onFold}\n              searchQuery={searchQuery}\n              onSearchQueryChange={onSearchQueryChange || (() => {})}\n            />\n          )}\n        </div>\n\n        {/* Content Area - Only render when expanded */}\n        {isExpandedActual && children && (\n          <div className=\"animate-in fade-in slide-in-from-top-2 duration-300 p-2 border-t border-border-01\">\n            {children}\n          </div>\n        )}\n      </div>\n    </ActionCardProvider>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/ActionCardContext.tsx",
    "content": "\"use client\";\n\nimport { createContext, ReactNode, useContext } from \"react\";\n\nexport interface ActionCardContextValue {\n  isHovered: boolean;\n}\n\nconst defaultValue: ActionCardContextValue = {\n  isHovered: false,\n};\n\nconst ActionCardContext = createContext<ActionCardContextValue>(defaultValue);\n\ninterface ActionCardProviderProps {\n  value: ActionCardContextValue;\n  children: ReactNode;\n}\n\nexport function ActionCardProvider({\n  value,\n  children,\n}: ActionCardProviderProps) {\n  return (\n    <ActionCardContext.Provider value={value}>\n      {children}\n    </ActionCardContext.Provider>\n  );\n}\n\nexport function useActionCardContext() {\n  return useContext(ActionCardContext);\n}\n"
  },
  {
    "path": "web/src/sections/actions/ActionCardHeader.tsx",
    "content": "\"use client\";\n\nimport React, { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { ActionStatus } from \"@/lib/tools/interfaces\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport ButtonRenaming from \"@/refresh-components/buttons/ButtonRenaming\";\nimport type { IconProps } from \"@opal/types\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { SvgEdit } from \"@opal/icons\";\nimport { useActionCardContext } from \"@/sections/actions/ActionCardContext\";\n\ninterface ActionCardHeaderProps {\n  title: string;\n  description: string;\n  icon: React.FunctionComponent<IconProps>;\n  status: ActionStatus;\n  onEdit?: () => void;\n  onRename?: (newName: string) => Promise<void>;\n}\n\nfunction ActionCardHeader({\n  title,\n  description,\n  icon: Icon,\n  status,\n  onEdit,\n  onRename,\n}: ActionCardHeaderProps) {\n  const [isRenaming, setIsRenaming] = useState(false);\n  const { isHovered } = useActionCardContext();\n\n  const isConnected = status === ActionStatus.CONNECTED;\n  const isPending = status === ActionStatus.PENDING;\n  const isDisconnected = status === ActionStatus.DISCONNECTED;\n  const isFetching = status === ActionStatus.FETCHING;\n\n  const showRenameIcon = onRename && isHovered && !isRenaming;\n\n  const handleRename = async (newName: string) => {\n    if (onRename) {\n      await onRename(newName);\n    }\n    setIsRenaming(false);\n  };\n\n  const handleRenameClick = () => {\n    if (onRename) {\n      setIsRenaming(true);\n    }\n  };\n\n  return (\n    <div className=\"flex gap-2 items-start flex-1 min-w-0 mr-2\">\n      <div\n        className={cn(\n          \"flex items-center px-0 py-0.5 shrink-0\",\n          isConnected && \"h-7 w-7 justify-center p-1\"\n        )}\n      >\n        <Icon size={20} className=\"h-5 w-5 stroke-text-04\" />\n      </div>\n\n      <div className=\"flex flex-col items-start flex-1 min-w-0 overflow-hidden\">\n        <div className=\"flex items-center gap-1 min-w-0 w-full\">\n          {isRenaming ? (\n            <ButtonRenaming\n              initialName={title}\n              onRename={handleRename}\n              onClose={() => setIsRenaming(false)}\n              className={cn(\n                \"font-main-content-emphasis\",\n                isConnected || isFetching\n                  ? \"text-text-04\"\n                  : isDisconnected\n                    ? \"text-text-03\"\n                    : \"text-text-04\"\n              )}\n            />\n          ) : (\n            <div className=\"min-w-0 shrink overflow-hidden\">\n              <Truncated\n                mainContentEmphasis\n                className={cn(\n                  \"truncate\",\n                  isConnected || isFetching\n                    ? \"text-text-04\"\n                    : isDisconnected\n                      ? \"text-text-03 line-through\"\n                      : \"text-text-04\"\n                )}\n              >\n                {title}\n              </Truncated>\n            </div>\n          )}\n          {isPending && !isRenaming && (\n            <Text\n              as=\"p\"\n              mainUiMuted\n              text03\n              className=\"shrink-0 whitespace-nowrap\"\n            >\n              (Not Authenticated)\n            </Text>\n          )}\n          {isDisconnected && !isRenaming && (\n            <Text\n              as=\"p\"\n              mainUiMuted\n              text02\n              className=\"shrink-0 whitespace-nowrap\"\n            >\n              (Disconnected)\n            </Text>\n          )}\n          {showRenameIcon && (\n            // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n            <IconButton\n              icon={SvgEdit}\n              tooltip=\"Rename\"\n              internal\n              tertiary\n              onClick={handleRenameClick}\n              className=\"h-6 w-6 opacity-70 hover:opacity-100\"\n              aria-label={`Rename ${title}`}\n            />\n          )}\n        </div>\n\n        {isConnected ? (\n          <Text as=\"p\" secondaryBody text03 className=\"w-full\">\n            {description}\n          </Text>\n        ) : (\n          <Text as=\"p\" secondaryBody text02 className=\"w-full\">\n            {description}\n          </Text>\n        )}\n      </div>\n    </div>\n  );\n}\n\nexport default ActionCardHeader;\n"
  },
  {
    "path": "web/src/sections/actions/Actions.tsx",
    "content": "\"use client\";\nimport { ActionStatus } from \"@/lib/tools/interfaces\";\nimport React from \"react\";\nimport { Button } from \"@opal/components\";\nimport {\n  SvgArrowExchange,\n  SvgChevronDown,\n  SvgPlug,\n  SvgSettings,\n  SvgTrash,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport { useActionCardContext } from \"@/sections/actions/ActionCardContext\";\nimport { cn } from \"@/lib/utils\";\n\ninterface ActionsProps {\n  status: ActionStatus;\n  serverName: string;\n  onDisconnect?: () => void;\n  onManage?: () => void;\n  onAuthenticate?: () => void;\n  onReconnect?: () => void;\n  onDelete?: () => void;\n  toolCount?: number;\n  isToolsExpanded?: boolean;\n  onToggleTools?: () => void;\n}\n\nconst Actions = React.memo(\n  ({\n    status,\n    serverName,\n    onDisconnect,\n    onManage,\n    onAuthenticate,\n    onReconnect,\n    onDelete,\n    toolCount,\n    isToolsExpanded,\n    onToggleTools,\n  }: ActionsProps) => {\n    const { isHovered: isParentHovered } = useActionCardContext();\n    const showViewToolsButton =\n      (status === ActionStatus.CONNECTED ||\n        status === ActionStatus.FETCHING ||\n        status === ActionStatus.DISCONNECTED) &&\n      !isToolsExpanded &&\n      onToggleTools;\n\n    // Connected state\n    if (status === ActionStatus.CONNECTED || status === ActionStatus.FETCHING) {\n      return (\n        <div className=\"flex flex-col gap-1 items-end\">\n          <div className=\"flex items-center\">\n            {onDisconnect && (\n              <div\n                className={cn(\n                  \"inline-flex transition-all duration-200 ease-out\",\n                  isParentHovered\n                    ? \"opacity-100 translate-x-0 pointer-events-auto\"\n                    : \"opacity-0 translate-x-2 pointer-events-none\"\n                )}\n              >\n                <Button\n                  icon={SvgUnplug}\n                  tooltip=\"Disconnect Server\"\n                  prominence=\"tertiary\"\n                  onClick={onDisconnect}\n                  aria-label={`Disconnect ${serverName} server`}\n                />\n              </div>\n            )}\n            {onManage && (\n              <Button\n                icon={SvgSettings}\n                tooltip=\"Manage Server\"\n                prominence=\"tertiary\"\n                onClick={onManage}\n                aria-label={`Manage ${serverName} server`}\n              />\n            )}\n          </div>\n          {showViewToolsButton && (\n            <Button\n              prominence=\"tertiary\"\n              onClick={onToggleTools}\n              rightIcon={SvgChevronDown}\n              aria-label={`View tools for ${serverName}`}\n            >\n              {status === ActionStatus.FETCHING\n                ? \"Fetching tools...\"\n                : `View ${toolCount ?? 0} tool${toolCount !== 1 ? \"s\" : \"\"}`}\n            </Button>\n          )}\n        </div>\n      );\n    }\n\n    // Pending state\n    if (status === ActionStatus.PENDING) {\n      return (\n        <div className=\"flex flex-col gap-1 items-end shrink-0\">\n          {onAuthenticate && (\n            <Button\n              prominence=\"tertiary\"\n              onClick={onAuthenticate}\n              rightIcon={SvgArrowExchange}\n              aria-label={`Authenticate and connect to ${serverName}`}\n            >\n              Authenticate\n            </Button>\n          )}\n          <div\n            className={cn(\n              \"flex gap-1 items-center transition-opacity duration-200 ease-out\",\n              isParentHovered\n                ? \"opacity-100 pointer-events-auto\"\n                : \"opacity-0 pointer-events-none\"\n            )}\n          >\n            {onDelete && (\n              <Button\n                icon={SvgTrash}\n                tooltip=\"Delete Server\"\n                prominence=\"tertiary\"\n                onClick={onDelete}\n                aria-label={`Delete ${serverName} server`}\n              />\n            )}\n            {onManage && (\n              <Button\n                icon={SvgSettings}\n                tooltip=\"Manage Server\"\n                prominence=\"tertiary\"\n                onClick={onManage}\n                aria-label={`Manage ${serverName} server`}\n              />\n            )}\n          </div>\n        </div>\n      );\n    }\n\n    // Disconnected state\n    return (\n      <div className=\"flex flex-col gap-1 items-end shrink-0\">\n        <div className=\"flex gap-1 items-end\">\n          {onReconnect && (\n            <Button\n              prominence=\"secondary\"\n              onClick={onReconnect}\n              rightIcon={SvgPlug}\n              aria-label={`Reconnect to ${serverName}`}\n            >\n              Reconnect\n            </Button>\n          )}\n          {onManage && (\n            <Button\n              icon={SvgSettings}\n              tooltip=\"Manage Server\"\n              prominence=\"tertiary\"\n              onClick={onManage}\n              aria-label={`Manage ${serverName} server`}\n            />\n          )}\n        </div>\n        {showViewToolsButton && (\n          <Button\n            disabled\n            prominence=\"tertiary\"\n            onClick={onToggleTools}\n            rightIcon={SvgChevronDown}\n            aria-label={`View tools for ${serverName}`}\n          >\n            {`View ${toolCount ?? 0} tool${toolCount !== 1 ? \"s\" : \"\"}`}\n          </Button>\n        )}\n      </div>\n    );\n  }\n);\nActions.displayName = \"Actions\";\n\nexport default Actions;\n"
  },
  {
    "path": "web/src/sections/actions/MCPActionCard.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useState,\n  useMemo,\n  useEffect,\n  useRef,\n  useCallback,\n} from \"react\";\nimport ActionCard from \"@/sections/actions/ActionCard\";\nimport Actions from \"@/sections/actions/Actions\";\nimport ToolItem from \"@/sections/actions/ToolItem\";\nimport ToolsList from \"@/sections/actions/ToolsList\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport {\n  ActionStatus,\n  ToolSnapshot,\n  MCPServerStatus,\n  MCPServer,\n} from \"@/lib/tools/interfaces\";\nimport useServerTools from \"@/hooks/useServerTools\";\nimport { KeyedMutator } from \"swr\";\nimport type { IconProps } from \"@opal/types\";\nimport { SvgRefreshCw, SvgServer, SvgTrash } from \"@opal/icons\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { timeAgo } from \"@/lib/time\";\nimport { cn } from \"@/lib/utils\";\nimport Modal from \"@/refresh-components/layouts/ConfirmationModalLayout\";\n\nexport interface MCPActionCardProps {\n  // Server identification\n  serverId: number;\n  server: MCPServer;\n\n  // Core content\n  title: string;\n  description: string;\n  logo?: React.FunctionComponent<IconProps>;\n\n  // Status\n  status: ActionStatus;\n\n  // Initial expanded state\n  initialExpanded?: boolean;\n\n  // Tool count (only for connected state)\n  toolCount?: number;\n\n  // Actions\n  onDisconnect?: () => void;\n  onManage?: () => void;\n  onEdit?: () => void;\n  onDelete?: () => Promise<void> | void;\n  onAuthenticate?: () => void; // For pending state\n  onReconnect?: () => void; // For disconnected state\n  onRename?: (serverId: number, newName: string) => Promise<void>; // For renaming\n\n  // Tool-related actions (now includes SWR mutate function for optimistic updates)\n  onToolToggle?: (\n    serverId: number,\n    toolId: string,\n    enabled: boolean,\n    mutate: KeyedMutator<ToolSnapshot[]>\n  ) => void;\n  onRefreshTools?: (\n    serverId: number,\n    mutate: KeyedMutator<ToolSnapshot[]>\n  ) => void;\n  onUpdateToolsStatus?: (\n    serverId: number,\n    toolIds: number[],\n    enabled: boolean,\n    mutate: KeyedMutator<ToolSnapshot[]>\n  ) => void;\n\n  // Optional styling\n  className?: string;\n}\n\n// Main Component\nexport default function MCPActionCard({\n  serverId,\n  server,\n  title,\n  description,\n  logo,\n  status,\n  initialExpanded = false,\n  toolCount,\n  onDisconnect,\n  onManage,\n  onEdit,\n  onDelete,\n  onAuthenticate,\n  onReconnect,\n  onRename,\n  onToolToggle,\n  onRefreshTools,\n  onUpdateToolsStatus,\n  className,\n}: MCPActionCardProps) {\n  const [isToolsExpanded, setIsToolsExpanded] = useState(initialExpanded);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const [showOnlyEnabled, setShowOnlyEnabled] = useState(false);\n  const [isToolsRefreshing, setIsToolsRefreshing] = useState(false);\n  const deleteModal = useCreateModal();\n\n  // Update expanded state when initialExpanded changes\n  const hasInitializedExpansion = useRef(false);\n  const previousStatus = useRef<MCPServerStatus>(server.status);\n  const hasRetriedTools = useRef(false);\n\n  // Apply initial expansion only once per component lifetime\n  useEffect(() => {\n    if (initialExpanded && !hasInitializedExpansion.current) {\n      setIsToolsExpanded(true);\n      hasInitializedExpansion.current = true;\n    }\n  }, [initialExpanded]);\n\n  // Collapse tools when server becomes disconnected or awaiting auth\n  useEffect(() => {\n    if (\n      server.status === MCPServerStatus.DISCONNECTED ||\n      server.status === MCPServerStatus.AWAITING_AUTH\n    ) {\n      setIsToolsExpanded(false);\n    }\n  }, [server.status]);\n\n  // Lazy load tools only when expanded\n  const { tools, isLoading, mutate } = useServerTools(server, isToolsExpanded);\n\n  // Retry tools fetch when server transitions from FETCHING_TOOLS to CONNECTED\n  useEffect(() => {\n    const statusChanged =\n      previousStatus.current === MCPServerStatus.FETCHING_TOOLS &&\n      server.status === MCPServerStatus.CONNECTED;\n\n    if (statusChanged && tools.length === 0 && !hasRetriedTools.current) {\n      console.log(\n        \"Server status changed to CONNECTED with empty tools, retrying fetch\"\n      );\n      hasRetriedTools.current = true;\n      mutate();\n    }\n\n    // Update previous status\n    previousStatus.current = server.status;\n  }, [server.status, tools.length, mutate]);\n\n  const isNotAuthenticated = status === ActionStatus.PENDING;\n\n  // Filter tools based on search query and enabled status\n  const filteredTools = useMemo(() => {\n    if (!tools) return [];\n\n    let filtered = tools;\n\n    // Filter by enabled status if showOnlyEnabled is true\n    if (showOnlyEnabled) {\n      filtered = filtered.filter((tool) => tool.isEnabled);\n    }\n\n    // Filter by search query\n    if (searchQuery.trim()) {\n      const query = searchQuery.toLowerCase();\n      filtered = filtered.filter(\n        (tool) =>\n          tool.name.toLowerCase().includes(query) ||\n          tool.description.toLowerCase().includes(query)\n      );\n    }\n\n    return filtered;\n  }, [tools, searchQuery, showOnlyEnabled]);\n\n  const icon = isNotAuthenticated ? SvgServer : logo;\n\n  const handleToggleTools = useCallback(() => {\n    setIsToolsExpanded((prev) => !prev);\n    if (isToolsExpanded) {\n      setSearchQuery(\"\");\n    }\n  }, [isToolsExpanded]);\n\n  const handleFold = () => {\n    setIsToolsExpanded(false);\n    setSearchQuery(\"\");\n    setShowOnlyEnabled(false);\n  };\n\n  const handleToggleShowOnlyEnabled = () => {\n    setShowOnlyEnabled((prev) => !prev);\n  };\n\n  // Build the actions component\n  const actionsComponent = useMemo(\n    () => (\n      <Actions\n        status={status}\n        serverName={title}\n        onDisconnect={onDisconnect}\n        onManage={onManage}\n        onAuthenticate={onAuthenticate}\n        onReconnect={onReconnect}\n        onDelete={onDelete ? () => deleteModal.toggle(true) : undefined}\n        toolCount={toolCount}\n        isToolsExpanded={isToolsExpanded}\n        onToggleTools={handleToggleTools}\n      />\n    ),\n    [\n      deleteModal,\n      handleToggleTools,\n      isToolsExpanded,\n      onAuthenticate,\n      onDelete,\n      onDisconnect,\n      onManage,\n      onReconnect,\n      status,\n      title,\n      toolCount,\n    ]\n  );\n\n  const handleRename = async (newName: string) => {\n    if (onRename) {\n      await onRename(serverId, newName);\n    }\n  };\n\n  const handleRefreshTools = () => {\n    setIsToolsRefreshing(true);\n    onRefreshTools?.(serverId, mutate);\n    setTimeout(() => {\n      setIsToolsRefreshing(false);\n    }, 1000);\n  };\n\n  // Left action for ToolsList footer\n  const leftAction = useMemo(() => {\n    const lastRefreshedText = timeAgo(server.last_refreshed_at);\n\n    return (\n      <div className=\"flex items-center gap-2\">\n        <Button\n          icon={isToolsRefreshing ? SimpleLoader : SvgRefreshCw}\n          prominence=\"internal\"\n          onClick={handleRefreshTools}\n          tooltip=\"Refresh tools\"\n          aria-label=\"Refresh tools\"\n        />\n        {lastRefreshedText && (\n          <Text as=\"p\" text03 mainUiBody className=\"whitespace-nowrap\">\n            Tools last refreshed {lastRefreshedText}\n          </Text>\n        )}\n      </div>\n    );\n  }, [\n    server.last_refreshed_at,\n    serverId,\n    mutate,\n    onRefreshTools,\n    isToolsRefreshing,\n  ]);\n\n  return (\n    <>\n      <ActionCard\n        title={title}\n        description={description}\n        icon={icon}\n        status={status}\n        actions={actionsComponent}\n        onEdit={onEdit}\n        onRename={handleRename}\n        isExpanded={isToolsExpanded}\n        onExpandedChange={setIsToolsExpanded}\n        enableSearch={true}\n        searchQuery={searchQuery}\n        onSearchQueryChange={setSearchQuery}\n        onFold={handleFold}\n        className={className}\n        ariaLabel={`${title} MCP server card`}\n      >\n        <ToolsList\n          isFetching={\n            server.status === MCPServerStatus.FETCHING_TOOLS || isLoading\n          }\n          totalCount={tools.length}\n          enabledCount={tools.filter((tool) => tool.isEnabled).length}\n          showOnlyEnabled={showOnlyEnabled}\n          onToggleShowOnlyEnabled={handleToggleShowOnlyEnabled}\n          onUpdateToolsStatus={(enabled) => {\n            const toolIds = tools.map((tool) => parseInt(tool.id));\n            onUpdateToolsStatus?.(serverId, toolIds, enabled, mutate);\n          }}\n          isEmpty={filteredTools.length === 0}\n          searchQuery={searchQuery}\n          emptyMessage=\"No tools available\"\n          emptySearchMessage=\"No tools found\"\n          leftAction={leftAction}\n        >\n          {filteredTools.map((tool) => (\n            <ToolItem\n              key={tool.id}\n              name={tool.name}\n              description={tool.description}\n              icon={tool.icon}\n              isAvailable={tool.isAvailable}\n              isEnabled={tool.isEnabled}\n              onToggle={(enabled) =>\n                onToolToggle?.(serverId, tool.id, enabled, mutate)\n              }\n              variant=\"mcp\"\n            />\n          ))}\n        </ToolsList>\n      </ActionCard>\n\n      {deleteModal.isOpen && (\n        <Modal\n          icon={({ className }) => (\n            <SvgTrash className={cn(className, \"stroke-action-danger-05\")} />\n          )}\n          title=\"Delete MCP server\"\n          onClose={() => deleteModal.toggle(false)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={async () => {\n                if (!onDelete) return;\n                try {\n                  await onDelete();\n                  deleteModal.toggle(false);\n                } catch (error) {\n                  // Keep modal open if deletion fails; caller should surface error feedback.\n                  console.error(\"Failed to delete MCP server\", error);\n                }\n              }}\n            >\n              Delete\n            </Button>\n          }\n        >\n          <div className=\"flex flex-col gap-4\">\n            <Text as=\"p\" text03>\n              All tools connected to <b>{title}</b> will be removed. Deletion is\n              irreversible.\n            </Text>\n            <Text as=\"p\" text03>\n              Are you sure you want to delete this MCP server?\n            </Text>\n          </div>\n        </Modal>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/MCPPageContent.tsx",
    "content": "\"use client\";\n\nimport { useState, useCallback, useMemo, useEffect } from \"react\";\nimport { KeyedMutator } from \"swr\";\nimport MCPActionCard from \"@/sections/actions/MCPActionCard\";\nimport AdminListHeader from \"@/sections/admin/AdminListHeader\";\nimport ActionCardSkeleton from \"@/sections/actions/skeleton/ActionCardSkeleton\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport {\n  ActionStatus,\n  MCPServerStatus,\n  MCPServer,\n  ToolSnapshot,\n} from \"@/lib/tools/interfaces\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport MCPAuthenticationModal from \"@/sections/actions/modals/MCPAuthenticationModal\";\nimport AddMCPServerModal from \"@/sections/actions/modals/AddMCPServerModal\";\nimport DisconnectEntityModal from \"./modals/DisconnectEntityModal\";\nimport {\n  deleteMCPServer,\n  refreshMCPServerTools,\n  updateToolStatus,\n  updateMCPServerStatus,\n  updateMCPServer,\n  updateToolsStatus,\n} from \"@/lib/tools/mcpService\";\nimport { useSearchParams } from \"next/navigation\";\nimport { useRouter } from \"next/navigation\";\nimport useMcpServers from \"@/hooks/useMcpServers\";\n\nexport default function MCPPageContent() {\n  // Data fetching\n  const {\n    mcpData,\n    isLoading: isMcpLoading,\n    mutateMcpServers,\n  } = useMcpServers();\n\n  // Modal management\n  const authModal = useCreateModal();\n  const disconnectModal = useCreateModal();\n  const manageServerModal = useCreateModal();\n\n  // Local state\n  const [activeServer, setActiveServer] = useState<MCPServer | null>(null);\n  const [serverToExpand, setServerToExpand] = useState<number | null>(null);\n  const [isDisconnecting, setIsDisconnecting] = useState(false);\n  const [showSharedOverlay, setShowSharedOverlay] = useState(false);\n  const [fetchingToolsServerIds, setFetchingToolsServerIds] = useState<\n    number[]\n  >([]);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n\n  const mcpServers = useMemo(\n    () => (mcpData?.mcp_servers || []) as MCPServer[],\n    [mcpData?.mcp_servers]\n  );\n  const isLoading = isMcpLoading;\n\n  const searchParams = useSearchParams();\n  const router = useRouter();\n\n  useEffect(() => {\n    const serverId = searchParams.get(\"server_id\");\n    const triggerFetch = searchParams.get(\"trigger_fetch\");\n\n    // Only process if we have a server_id and trigger_fetch flag\n    if (\n      serverId &&\n      triggerFetch === \"true\" &&\n      !fetchingToolsServerIds.includes(parseInt(serverId))\n    ) {\n      const serverIdInt = parseInt(serverId);\n\n      const handleFetchingTools = async () => {\n        try {\n          await updateMCPServerStatus(\n            serverIdInt,\n            MCPServerStatus.FETCHING_TOOLS\n          );\n\n          await mutateMcpServers();\n\n          router.replace(\"/admin/actions/mcp\");\n\n          // Automatically expand the tools for this server\n          setServerToExpand(serverIdInt);\n\n          await refreshMCPServerTools(serverIdInt);\n\n          toast.success(\"Successfully connected and fetched tools\");\n\n          await mutateMcpServers();\n        } catch (error) {\n          console.error(\"Failed to fetch tools:\", error);\n          toast.error(\n            `Failed to fetch tools: ${\n              error instanceof Error ? error.message : \"Unknown error\"\n            }`\n          );\n          await mutateMcpServers();\n        }\n      };\n\n      handleFetchingTools();\n    }\n  }, [\n    searchParams,\n    router,\n    fetchingToolsServerIds,\n    mutateMcpServers,\n    setServerToExpand,\n  ]);\n\n  // Track fetching tools server IDs\n  useEffect(() => {\n    if (mcpServers) {\n      const fetchingIds = mcpServers\n        .filter((server) => server.status === MCPServerStatus.FETCHING_TOOLS)\n        .map((server) => server.id);\n      setFetchingToolsServerIds(fetchingIds);\n    }\n  }, [mcpServers]);\n\n  // Track if any modal is open to manage the shared overlay\n  useEffect(() => {\n    const anyModalOpen =\n      authModal.isOpen || disconnectModal.isOpen || manageServerModal.isOpen;\n    setShowSharedOverlay(anyModalOpen);\n  }, [authModal.isOpen, disconnectModal.isOpen, manageServerModal.isOpen]);\n\n  // Determine action status based on server status field\n  const getActionStatusForServer = useCallback(\n    (server: MCPServer): ActionStatus => {\n      if (server.status === MCPServerStatus.CONNECTED) {\n        return ActionStatus.CONNECTED;\n      } else if (\n        server.status === MCPServerStatus.AWAITING_AUTH ||\n        server.status === MCPServerStatus.CREATED\n      ) {\n        return ActionStatus.PENDING;\n      } else if (server.status === MCPServerStatus.FETCHING_TOOLS) {\n        return ActionStatus.FETCHING;\n      }\n      return ActionStatus.DISCONNECTED;\n    },\n    []\n  );\n\n  // Handler callbacks\n  const handleDisconnect = useCallback(\n    (serverId: number) => {\n      const server = mcpServers.find((s) => s.id === serverId);\n      if (server) {\n        setActiveServer(server);\n        disconnectModal.toggle(true);\n      }\n    },\n    [mcpServers, disconnectModal]\n  );\n\n  const handleConfirmDisconnect = useCallback(async () => {\n    if (!activeServer) return;\n\n    setIsDisconnecting(true);\n    try {\n      await updateMCPServerStatus(\n        activeServer.id,\n        MCPServerStatus.DISCONNECTED\n      );\n\n      toast.success(\"MCP Server disconnected successfully\");\n\n      await mutateMcpServers();\n      disconnectModal.toggle(false);\n      setActiveServer(null);\n    } catch (error) {\n      console.error(\"Error disconnecting server:\", error);\n      toast.error(\n        error instanceof Error\n          ? error.message\n          : \"Failed to disconnect MCP Server\"\n      );\n    } finally {\n      setIsDisconnecting(false);\n    }\n  }, [activeServer, mutateMcpServers, disconnectModal]);\n\n  const handleConfirmDisconnectAndDelete = useCallback(async () => {\n    if (!activeServer) return;\n\n    setIsDisconnecting(true);\n    try {\n      await deleteMCPServer(activeServer.id);\n\n      toast.success(\"MCP Server deleted successfully\");\n\n      await mutateMcpServers();\n      disconnectModal.toggle(false);\n      setActiveServer(null);\n    } catch (error) {\n      console.error(\"Error deleting server:\", error);\n      toast.error(\n        error instanceof Error ? error.message : \"Failed to delete MCP Server\"\n      );\n    } finally {\n      setIsDisconnecting(false);\n    }\n  }, [activeServer, mutateMcpServers, disconnectModal]);\n\n  const openManageServerModal = useCallback(\n    (serverId: number) => {\n      const server = mcpServers.find((s) => s.id === serverId);\n      if (server) {\n        setActiveServer(server);\n        manageServerModal.toggle(true);\n      }\n    },\n    [mcpServers, manageServerModal]\n  );\n\n  const handleManage = useCallback(\n    (serverId: number) => {\n      openManageServerModal(serverId);\n    },\n    [openManageServerModal]\n  );\n\n  const handleEdit = useCallback(\n    (serverId: number) => {\n      openManageServerModal(serverId);\n    },\n    [openManageServerModal]\n  );\n\n  const handleDelete = useCallback(\n    async (serverId: number) => {\n      try {\n        await deleteMCPServer(serverId);\n\n        toast.success(\"MCP Server deleted successfully\");\n\n        await mutateMcpServers();\n      } catch (error) {\n        console.error(\"Error deleting server:\", error);\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to delete MCP Server\"\n        );\n      }\n    },\n    [mutateMcpServers]\n  );\n\n  const handleAuthenticate = useCallback(\n    (serverId: number) => {\n      const server = mcpServers.find((s) => s.id === serverId);\n      if (server) {\n        setActiveServer(server);\n        authModal.toggle(true);\n      }\n    },\n    [mcpServers, authModal]\n  );\n\n  const triggerFetchToolsInPlace = useCallback(\n    async (serverId: number) => {\n      if (fetchingToolsServerIds.includes(serverId)) {\n        return;\n      }\n\n      try {\n        // Expand tools list immediately so the user sees the skeleton\n        setServerToExpand(serverId);\n\n        await updateMCPServerStatus(serverId, MCPServerStatus.FETCHING_TOOLS);\n        await mutateMcpServers();\n\n        await refreshMCPServerTools(serverId);\n\n        toast.success(\"Successfully connected and fetched tools\");\n\n        await mutateMcpServers();\n      } catch (error) {\n        console.error(\"Failed to fetch tools:\", error);\n        toast.error(\n          `Failed to fetch tools: ${\n            error instanceof Error ? error.message : \"Unknown error\"\n          }`\n        );\n        await mutateMcpServers();\n      }\n    },\n    [fetchingToolsServerIds, mutateMcpServers, setServerToExpand]\n  );\n\n  const handleReconnect = useCallback(\n    async (serverId: number) => {\n      try {\n        await updateMCPServerStatus(serverId, MCPServerStatus.CONNECTED);\n\n        toast.success(\"MCP Server reconnected successfully\");\n\n        await mutateMcpServers();\n      } catch (error) {\n        console.error(\"Error reconnecting server:\", error);\n        toast.error(\n          error instanceof Error\n            ? error.message\n            : \"Failed to reconnect MCP Server\"\n        );\n      }\n    },\n    [mutateMcpServers]\n  );\n\n  const handleToolToggle = useCallback(\n    async (\n      serverId: number,\n      toolId: string,\n      enabled: boolean,\n      mutateServerTools: KeyedMutator<ToolSnapshot[]>\n    ) => {\n      try {\n        // Optimistically update the UI\n        await mutateServerTools(\n          async (currentTools) => {\n            if (!currentTools) return currentTools;\n            return currentTools.map((tool) =>\n              tool.id.toString() === toolId ? { ...tool, enabled } : tool\n            );\n          },\n          { revalidate: false }\n        );\n\n        await updateToolStatus(parseInt(toolId), enabled);\n\n        // Revalidate to get fresh data from server\n        await mutateServerTools();\n\n        toast.success(`Tool ${enabled ? \"enabled\" : \"disabled\"} successfully`);\n      } catch (error) {\n        console.error(\"Error toggling tool:\", error);\n\n        // Revert on error by revalidating\n        await mutateServerTools();\n\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to update tool\"\n        );\n      }\n    },\n    []\n  );\n\n  const handleRefreshTools = useCallback(\n    async (\n      serverId: number,\n      mutateServerTools: KeyedMutator<ToolSnapshot[]>\n    ) => {\n      try {\n        // Refresh tools for this specific server (discovers from MCP and syncs to DB)\n        await refreshMCPServerTools(serverId);\n\n        // Update the local cache with fresh data\n        await mutateServerTools();\n\n        // Also refresh the servers list to update tool counts\n        await mutateMcpServers();\n\n        toast.success(\"Tools refreshed successfully\");\n      } catch (error) {\n        console.error(\"Error refreshing tools:\", error);\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to refresh tools\"\n        );\n      }\n    },\n    [mutateMcpServers]\n  );\n\n  const handleUpdateToolsStatus = useCallback(\n    async (\n      serverId: number,\n      toolIds: number[],\n      enabled: boolean,\n      mutateServerTools: KeyedMutator<ToolSnapshot[]>\n    ) => {\n      try {\n        if (toolIds.length === 0) {\n          toast.info(\"No tools to disable\");\n          return;\n        }\n\n        // Optimistically update - disable all tools in the UI\n        await mutateServerTools(\n          async (currentTools) => {\n            if (!currentTools) return currentTools;\n            return currentTools.map((tool) =>\n              toolIds.includes(tool.id) ? { ...tool, enabled } : tool\n            );\n          },\n          { revalidate: false }\n        );\n\n        const result = await updateToolsStatus(toolIds, enabled);\n\n        // Revalidate to get fresh data from server\n        await mutateServerTools();\n\n        toast.success(\n          `${result.updated_count} tool${\n            result.updated_count !== 1 ? \"s\" : \"\"\n          } ${enabled ? \"enabled\" : \"disabled\"} successfully`\n        );\n      } catch (error) {\n        console.error(\n          `Error ${enabled ? \"enabling\" : \"disabling\"} all tools:`,\n          error\n        );\n\n        // Revert on error by revalidating\n        await mutateServerTools();\n\n        toast.error(\n          error instanceof Error\n            ? error.message\n            : `Failed to ${enabled ? \"enable\" : \"disable\"} all tools`\n        );\n      }\n    },\n    []\n  );\n\n  const onServerCreated = useCallback(\n    (server: MCPServer) => {\n      setActiveServer(server);\n      authModal.toggle(true);\n    },\n    [authModal]\n  );\n\n  const handleAddServer = useCallback(() => {\n    setActiveServer(null);\n    manageServerModal.toggle(true);\n  }, [manageServerModal]);\n\n  const handleRenameServer = useCallback(\n    async (serverId: number, newName: string) => {\n      try {\n        await updateMCPServer(serverId, { name: newName });\n        toast.success(\"MCP Server renamed successfully\");\n        await mutateMcpServers();\n      } catch (error) {\n        console.error(\"Error renaming server:\", error);\n        toast.error(\n          error instanceof Error ? error.message : \"Failed to rename MCP Server\"\n        );\n        throw error; // Re-throw so ButtonRenaming can handle it\n      }\n    },\n    [mutateMcpServers]\n  );\n\n  // Filter servers based on search query\n  const filteredServers = useMemo(() => {\n    if (!searchQuery.trim()) return mcpServers;\n\n    const query = searchQuery.toLowerCase();\n    return mcpServers.filter(\n      (server) =>\n        server.name.toLowerCase().includes(query) ||\n        server.description?.toLowerCase().includes(query) ||\n        server.server_url.toLowerCase().includes(query)\n    );\n  }, [mcpServers, searchQuery]);\n\n  return (\n    <div className=\"flex flex-col h-full overflow-hidden\">\n      {/* Shared overlay that persists across modal transitions */}\n      {showSharedOverlay && (\n        <div\n          className=\"fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none data-[state=open]:animate-in data-[state=open]:fade-in-0\"\n          data-state=\"open\"\n          aria-hidden=\"true\"\n        />\n      )}\n\n      <div className=\"flex-shrink-0 mb-4\">\n        <AdminListHeader\n          hasItems={isLoading || mcpServers.length > 0}\n          searchQuery={searchQuery}\n          onSearchQueryChange={setSearchQuery}\n          onAction={handleAddServer}\n          actionLabel=\"Add MCP Server\"\n          emptyStateText=\"Connect MCP server to add custom actions.\"\n        />\n      </div>\n\n      <div className=\"flex-1 overflow-y-auto min-h-0\">\n        <div className=\"flex flex-col gap-4 w-full pb-4\">\n          {isLoading ? (\n            <>\n              <ActionCardSkeleton />\n              <ActionCardSkeleton />\n            </>\n          ) : (\n            filteredServers.map((server) => {\n              const status = getActionStatusForServer(server);\n\n              return (\n                <MCPActionCard\n                  key={server.id}\n                  serverId={server.id}\n                  server={server}\n                  title={server.name}\n                  description={server.description || server.server_url}\n                  logo={getActionIcon(server.server_url, server.name)}\n                  status={status}\n                  toolCount={server.tool_count}\n                  initialExpanded={server.id === serverToExpand}\n                  onDisconnect={() => handleDisconnect(server.id)}\n                  onManage={() => handleManage(server.id)}\n                  onEdit={() => handleEdit(server.id)}\n                  onDelete={() => handleDelete(server.id)}\n                  onAuthenticate={() => handleAuthenticate(server.id)}\n                  onReconnect={() => handleReconnect(server.id)}\n                  onRename={handleRenameServer}\n                  onToolToggle={handleToolToggle}\n                  onRefreshTools={handleRefreshTools}\n                  onUpdateToolsStatus={handleUpdateToolsStatus}\n                />\n              );\n            })\n          )}\n        </div>\n      </div>\n\n      <authModal.Provider>\n        <MCPAuthenticationModal\n          mcpServer={activeServer}\n          skipOverlay\n          onTriggerFetchTools={triggerFetchToolsInPlace}\n          mutateMcpServers={mutateMcpServers}\n        />\n      </authModal.Provider>\n\n      <manageServerModal.Provider>\n        <AddMCPServerModal\n          skipOverlay\n          activeServer={activeServer}\n          setActiveServer={setActiveServer}\n          disconnectModal={disconnectModal}\n          manageServerModal={manageServerModal}\n          onServerCreated={onServerCreated}\n          handleAuthenticate={handleAuthenticate}\n          mutateMcpServers={async () => {\n            await mutateMcpServers();\n          }}\n        />\n      </manageServerModal.Provider>\n\n      <DisconnectEntityModal\n        isOpen={disconnectModal.isOpen}\n        onClose={() => {\n          disconnectModal.toggle(false);\n          setActiveServer(null);\n        }}\n        name={activeServer?.name ?? null}\n        onConfirmDisconnect={handleConfirmDisconnect}\n        onConfirmDisconnectAndDelete={handleConfirmDisconnectAndDelete}\n        isDisconnecting={isDisconnecting}\n        skipOverlay\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/OpenApiActionCard.tsx",
    "content": "\"use client\";\n\nimport React, { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { toast } from \"@/hooks/useToast\";\nimport ActionCard from \"@/sections/actions/ActionCard\";\nimport Actions from \"@/sections/actions/Actions\";\nimport ToolsList from \"@/sections/actions/ToolsList\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { ToolSnapshot, ActionStatus, MethodSpec } from \"@/lib/tools/interfaces\";\nimport ToolItem from \"@/sections/actions/ToolItem\";\nimport { extractMethodSpecsFromDefinition } from \"@/lib/tools/openApiService\";\nimport { updateToolStatus } from \"@/lib/tools/mcpService\";\nimport { SvgServer, SvgTrash } from \"@opal/icons\";\nimport Modal from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface OpenApiActionCardProps {\n  tool: ToolSnapshot;\n  onAuthenticate: (tool: ToolSnapshot) => void;\n  onManage?: (tool: ToolSnapshot) => void;\n  onDelete?: (tool: ToolSnapshot) => Promise<void> | void;\n  onRename?: (toolId: number, newName: string) => Promise<void>;\n  mutateOpenApiTools: () => Promise<unknown> | void;\n  onOpenDisconnectModal?: (tool: ToolSnapshot) => void;\n}\n\nexport default function OpenApiActionCard({\n  tool,\n  onAuthenticate,\n  onManage,\n  onDelete,\n  onRename,\n  mutateOpenApiTools,\n  onOpenDisconnectModal,\n}: OpenApiActionCardProps) {\n  const [isToolsExpanded, setIsToolsExpanded] = useState(false);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const [updatingStatus, setUpdatingStatus] = useState(false);\n  const deleteModal = useCreateModal();\n\n  const methodSpecs = useMemo<MethodSpec[]>(() => {\n    try {\n      return extractMethodSpecsFromDefinition(tool.definition) ?? [];\n    } catch (error) {\n      console.error(\"Failed to parse OpenAPI definition\", error);\n      return [];\n    }\n  }, [tool.definition]);\n\n  const filteredTools = useMemo(() => {\n    if (!searchQuery.trim()) return methodSpecs;\n\n    const query = searchQuery.toLowerCase();\n    return methodSpecs.filter((method) => {\n      const name = method.name?.toLowerCase() ?? \"\";\n      const summary = method.summary?.toLowerCase() ?? \"\";\n      return name.includes(query) || summary.includes(query);\n    });\n  }, [methodSpecs, searchQuery]);\n\n  const hasCustomHeaders =\n    Array.isArray(tool.custom_headers) && tool.custom_headers.length > 0;\n  const hasAuthConfigured =\n    Boolean(tool.oauth_config_id) ||\n    Boolean(tool.passthrough_auth) ||\n    hasCustomHeaders;\n  const isDisconnected = !tool.enabled;\n\n  // Compute generic ActionStatus for the OpenAPI tool\n  const status = isDisconnected\n    ? ActionStatus.DISCONNECTED\n    : hasAuthConfigured\n      ? ActionStatus.CONNECTED\n      : ActionStatus.PENDING;\n\n  const handleConnectionUpdate = useCallback(\n    async (shouldEnable: boolean) => {\n      if (updatingStatus || tool.enabled === shouldEnable) {\n        return;\n      }\n\n      try {\n        setUpdatingStatus(true);\n        await updateToolStatus(tool.id, shouldEnable);\n        await mutateOpenApiTools();\n      } catch (error) {\n        console.error(\"Failed to update OpenAPI tool status\", error);\n      } finally {\n        setUpdatingStatus(false);\n      }\n    },\n    [updatingStatus, mutateOpenApiTools, tool.enabled, tool.id]\n  );\n\n  const handleToggleTools = useCallback(() => {\n    setIsToolsExpanded((prev) => !prev);\n    if (isToolsExpanded) {\n      setSearchQuery(\"\");\n    }\n  }, [isToolsExpanded]);\n\n  useEffect(() => {\n    if (isDisconnected) {\n      setIsToolsExpanded(false);\n    }\n  }, [isDisconnected]);\n\n  const handleFold = () => {\n    setIsToolsExpanded(false);\n    setSearchQuery(\"\");\n  };\n\n  // Build the actions component\n  const actionsComponent = useMemo(\n    () => (\n      <Actions\n        status={status}\n        serverName={tool.name}\n        toolCount={methodSpecs.length}\n        isToolsExpanded={isToolsExpanded}\n        onToggleTools={methodSpecs.length ? handleToggleTools : undefined}\n        onDisconnect={() => onOpenDisconnectModal?.(tool)}\n        onManage={onManage ? () => onManage(tool) : undefined}\n        onAuthenticate={() => {\n          onAuthenticate(tool);\n        }}\n        onReconnect={() => handleConnectionUpdate(true)}\n        onDelete={onDelete ? () => deleteModal.toggle(true) : undefined}\n      />\n    ),\n    [\n      deleteModal,\n      handleConnectionUpdate,\n      handleToggleTools,\n      isToolsExpanded,\n      methodSpecs.length,\n      onAuthenticate,\n      onDelete,\n      onManage,\n      onOpenDisconnectModal,\n      status,\n      tool,\n    ]\n  );\n\n  const handleRename = async (newName: string) => {\n    if (onRename) {\n      await onRename(tool.id, newName);\n    }\n  };\n\n  return (\n    <>\n      <ActionCard\n        title={tool.name}\n        description={tool.description}\n        icon={SvgServer}\n        status={status}\n        actions={actionsComponent}\n        onRename={handleRename}\n        isExpanded={isToolsExpanded}\n        onExpandedChange={setIsToolsExpanded}\n        enableSearch={true}\n        searchQuery={searchQuery}\n        onSearchQueryChange={setSearchQuery}\n        onFold={handleFold}\n        ariaLabel={`${tool.name} OpenAPI action card`}\n      >\n        <ToolsList\n          isEmpty={filteredTools.length === 0}\n          searchQuery={searchQuery}\n          emptyMessage=\"No actions defined for this OpenAPI schema\"\n          emptySearchMessage=\"No actions match your search\"\n          className=\"gap-2\"\n        >\n          {filteredTools.map((method) => (\n            <ToolItem\n              key={`${tool.id}-${method.method}-${method.path}-${method.name}`}\n              name={method.name}\n              description={method.summary || \"No summary provided\"}\n              variant=\"openapi\"\n              openApiMetadata={{\n                method: method.method,\n                path: method.path,\n              }}\n            />\n          ))}\n        </ToolsList>\n      </ActionCard>\n\n      {deleteModal.isOpen && onDelete && (\n        <Modal\n          icon={({ className }) => (\n            <SvgTrash className={cn(className, \"stroke-action-danger-05\")} />\n          )}\n          title=\"Delete OpenAPI action\"\n          onClose={() => deleteModal.toggle(false)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={async () => {\n                await onDelete(tool);\n                deleteModal.toggle(false);\n              }}\n            >\n              Delete\n            </Button>\n          }\n        >\n          <div className=\"flex flex-col gap-4\">\n            <Text as=\"p\" text03>\n              This will permanently delete the OpenAPI action <b>{tool.name}</b>{\" \"}\n              and its configuration.\n            </Text>\n            <Text as=\"p\" text03>\n              Are you sure you want to delete this OpenAPI action?\n            </Text>\n          </div>\n        </Modal>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/OpenApiPageContent.tsx",
    "content": "\"use client\";\n\nimport { ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport OpenAPIAuthenticationModal, {\n  AuthMethod,\n  OpenAPIAuthFormValues,\n} from \"./modals/OpenAPIAuthenticationModal\";\nimport AddOpenAPIActionModal from \"./modals/AddOpenAPIActionModal\";\nimport AdminListHeader from \"@/sections/admin/AdminListHeader\";\nimport { toast } from \"@/hooks/useToast\";\nimport OpenApiActionCard from \"./OpenApiActionCard\";\nimport { createOAuthConfig, updateOAuthConfig } from \"@/lib/oauth/api\";\nimport { updateCustomTool, deleteCustomTool } from \"@/lib/tools/openApiService\";\nimport { updateToolStatus } from \"@/lib/tools/mcpService\";\nimport DisconnectEntityModal from \"./modals/DisconnectEntityModal\";\nimport ActionCardSkeleton from \"./skeleton/ActionCardSkeleton\";\nimport useOpenApiTools from \"@/hooks/useOpenApiTools\";\n\nexport default function OpenApiPageContent() {\n  const {\n    openApiTools,\n    mutateOpenApiTools,\n    isLoading: isOpenApiLoading,\n  } = useOpenApiTools();\n  const addOpenAPIActionModal = useCreateModal();\n  const openAPIAuthModal = useCreateModal();\n  const disconnectModal = useCreateModal();\n  const [selectedTool, setSelectedTool] = useState<ToolSnapshot | null>(null);\n  const [toolBeingEdited, setToolBeingEdited] = useState<ToolSnapshot | null>(\n    null\n  );\n  const [toolPendingDisconnect, setToolPendingDisconnect] =\n    useState<ToolSnapshot | null>(null);\n  const [isDisconnecting, setIsDisconnecting] = useState(false);\n  const [isDeleting, setIsDeleting] = useState(false);\n  const [searchQuery, setSearchQuery] = useState(\"\");\n  const [showSharedOverlay, setShowSharedOverlay] = useState(false);\n\n  useEffect(() => {\n    const anyModalOpen =\n      addOpenAPIActionModal.isOpen ||\n      openAPIAuthModal.isOpen ||\n      disconnectModal.isOpen;\n    setShowSharedOverlay(anyModalOpen);\n  }, [\n    addOpenAPIActionModal.isOpen,\n    openAPIAuthModal.isOpen,\n    disconnectModal.isOpen,\n  ]);\n\n  const handleOpenAuthModal = useCallback(\n    (tool: ToolSnapshot) => {\n      setSelectedTool(tool);\n      openAPIAuthModal.toggle(true);\n    },\n    [openAPIAuthModal]\n  );\n\n  const resetAuthModal = useCallback(() => {\n    setSelectedTool(null);\n    openAPIAuthModal.toggle(false);\n  }, [openAPIAuthModal]);\n\n  const handleConnect = useCallback(\n    async (values: OpenAPIAuthFormValues) => {\n      if (!selectedTool) {\n        throw new Error(\"No OpenAPI action selected for authentication.\");\n      }\n\n      try {\n        if (values.authMethod === \"oauth\") {\n          const parsedScopes = values.scopes\n            .split(\",\")\n            .map((scope) => scope.trim())\n            .filter(Boolean);\n          const trimmedClientId = values.clientId.trim();\n          const trimmedClientSecret = values.clientSecret.trim();\n\n          let oauthConfigId = selectedTool.oauth_config_id ?? null;\n\n          if (oauthConfigId) {\n            await updateOAuthConfig(oauthConfigId, {\n              authorization_url: values.authorizationUrl,\n              token_url: values.tokenUrl,\n              scopes: parsedScopes,\n              ...(trimmedClientId ? { client_id: trimmedClientId } : {}),\n              ...(trimmedClientSecret\n                ? { client_secret: trimmedClientSecret }\n                : {}),\n            });\n          } else {\n            const oauthConfig = await createOAuthConfig({\n              name: `${selectedTool.name} OAuth`,\n              authorization_url: values.authorizationUrl,\n              token_url: values.tokenUrl,\n              client_id: trimmedClientId,\n              client_secret: trimmedClientSecret,\n              scopes: parsedScopes.length ? parsedScopes : undefined,\n            });\n            oauthConfigId = oauthConfig.id;\n          }\n\n          const response = await updateCustomTool(selectedTool.id, {\n            custom_headers: [],\n            passthrough_auth: false,\n            oauth_config_id: oauthConfigId,\n          });\n\n          if (response.error) {\n            throw new Error(response.error);\n          }\n\n          toast.success(\n            `${selectedTool.name} authentication ${\n              selectedTool.oauth_config_id ? \"updated\" : \"saved\"\n            } successfully.`\n          );\n        } else if (values.authMethod === \"custom-header\") {\n          const customHeaders = values.headers\n            .map(({ key, value }) => ({\n              key: key.trim(),\n              value: value.trim(),\n            }))\n            .filter(({ key, value }) => key && value);\n\n          const response = await updateCustomTool(selectedTool.id, {\n            custom_headers: customHeaders,\n            passthrough_auth: false,\n            oauth_config_id: null,\n          });\n\n          if (response.error) {\n            throw new Error(response.error);\n          }\n\n          toast.success(\n            `${selectedTool.name} authentication headers saved successfully.`\n          );\n        } else if (values.authMethod === \"pt-oauth\") {\n          const response = await updateCustomTool(selectedTool.id, {\n            passthrough_auth: true,\n            oauth_config_id: null,\n            custom_headers: [],\n          });\n          if (response.error) {\n            throw new Error(response.error);\n          }\n          toast.success(\n            `${selectedTool.name} authentication passthrough saved successfully.`\n          );\n        }\n\n        await mutateOpenApiTools();\n        setSelectedTool(null);\n      } catch (error) {\n        const message =\n          error instanceof Error\n            ? error.message\n            : \"Failed to save authentication settings.\";\n        toast.error(message);\n        throw error;\n      }\n    },\n    [selectedTool, mutateOpenApiTools]\n  );\n\n  const handleManageTool = useCallback(\n    (tool: ToolSnapshot) => {\n      setToolBeingEdited(tool);\n      addOpenAPIActionModal.toggle(true);\n    },\n    [addOpenAPIActionModal]\n  );\n\n  const handleEditAuthenticationFromModal = useCallback(\n    (tool: ToolSnapshot) => {\n      setSelectedTool(tool);\n      openAPIAuthModal.toggle(true);\n    },\n    [openAPIAuthModal]\n  );\n\n  const handleDisableTool = useCallback(\n    async (tool: ToolSnapshot) => {\n      try {\n        await updateToolStatus(tool.id, false);\n\n        toast.success(`${tool.name} has been disconnected.`);\n\n        await mutateOpenApiTools();\n      } catch (error) {\n        const message =\n          error instanceof Error\n            ? error.message\n            : \"Failed to disconnect OpenAPI action.\";\n        toast.error(message);\n        throw error instanceof Error\n          ? error\n          : new Error(\"Failed to disconnect OpenAPI action.\");\n      }\n    },\n    [mutateOpenApiTools]\n  );\n\n  const handleOpenDisconnectModal = useCallback(\n    (tool: ToolSnapshot) => {\n      setToolPendingDisconnect(tool);\n      addOpenAPIActionModal.toggle(false);\n      disconnectModal.toggle(true);\n    },\n    [disconnectModal, addOpenAPIActionModal]\n  );\n\n  const handleConfirmDisconnectFromModal = useCallback(async () => {\n    if (!toolPendingDisconnect) {\n      return;\n    }\n\n    try {\n      setIsDisconnecting(true);\n      await handleDisableTool(toolPendingDisconnect);\n    } finally {\n      setIsDisconnecting(false);\n      disconnectModal.toggle(false);\n      setToolPendingDisconnect(null);\n    }\n  }, [disconnectModal, handleDisableTool, toolPendingDisconnect]);\n\n  const executeDeleteTool = useCallback(\n    async (tool: ToolSnapshot) => {\n      try {\n        setIsDeleting(true);\n        const response = await deleteCustomTool(tool.id);\n        if (response.data) {\n          toast.success(`${tool.name} deleted successfully.`);\n          await mutateOpenApiTools();\n        } else {\n          throw new Error(response.error || \"Failed to delete tool.\");\n        }\n      } catch (error) {\n        console.error(\"Failed to delete OpenAPI tool\", error);\n        toast.error(\n          error instanceof Error\n            ? error.message\n            : \"An unexpected error occurred while deleting the tool.\"\n        );\n        throw error;\n      } finally {\n        setIsDeleting(false);\n      }\n    },\n    [mutateOpenApiTools]\n  );\n\n  const handleDeleteToolFromModal = useCallback(async () => {\n    if (!toolPendingDisconnect || isDeleting) {\n      return;\n    }\n\n    try {\n      await executeDeleteTool(toolPendingDisconnect);\n    } finally {\n      disconnectModal.toggle(false);\n      setToolPendingDisconnect(null);\n    }\n  }, [disconnectModal, executeDeleteTool, isDeleting, toolPendingDisconnect]);\n\n  const handleDeleteTool = useCallback(\n    async (tool: ToolSnapshot) => {\n      if (isDeleting) return;\n      await executeDeleteTool(tool);\n    },\n    [executeDeleteTool, isDeleting]\n  );\n\n  const handleAddAction = useCallback(() => {\n    setToolBeingEdited(null);\n    addOpenAPIActionModal.toggle(true);\n  }, [addOpenAPIActionModal]);\n\n  const handleAddModalClose = useCallback(() => {\n    setToolBeingEdited(null);\n  }, []);\n\n  const handleRenameTool = useCallback(\n    async (toolId: number, newName: string) => {\n      try {\n        const response = await updateCustomTool(toolId, { name: newName });\n        if (response.error) {\n          throw new Error(response.error);\n        }\n        toast.success(\"OpenAPI action renamed successfully\");\n        await mutateOpenApiTools();\n      } catch (error) {\n        console.error(\"Error renaming tool:\", error);\n        toast.error(\n          error instanceof Error\n            ? error.message\n            : \"Failed to rename OpenAPI action\"\n        );\n        throw error; // Re-throw so ButtonRenaming can handle it\n      }\n    },\n    [mutateOpenApiTools]\n  );\n\n  const authenticationModalTitle = useMemo(() => {\n    if (!selectedTool) {\n      return \"Authenticate OpenAPI Action\";\n    }\n    const hasExistingAuth =\n      Boolean(selectedTool.oauth_config_id) ||\n      Boolean(selectedTool.custom_headers?.length);\n    const prefix = hasExistingAuth\n      ? \"Update authentication for\"\n      : \"Authenticate\";\n    return `${prefix} ${selectedTool.name}`;\n  }, [selectedTool]);\n\n  const authenticationDefaultMethod = useMemo<AuthMethod>(() => {\n    if (!selectedTool) {\n      return \"oauth\";\n    }\n    return selectedTool.custom_headers?.length ? \"custom-header\" : \"oauth\";\n  }, [selectedTool]);\n\n  // Filter tools based on search query\n  const filteredTools = useMemo(() => {\n    if (!openApiTools) return [];\n    if (!searchQuery.trim()) return openApiTools;\n\n    const query = searchQuery.toLowerCase();\n    return openApiTools.filter(\n      (tool) =>\n        tool.name.toLowerCase().includes(query) ||\n        tool.description?.toLowerCase().includes(query)\n    );\n  }, [openApiTools, searchQuery]);\n\n  return (\n    <div className=\"flex flex-col h-full overflow-hidden\">\n      {showSharedOverlay && (\n        <div\n          className=\"fixed inset-0 z-modal-overlay bg-mask-03 backdrop-blur-03 pointer-events-none data-[state=open]:animate-in data-[state=open]:fade-in-0\"\n          data-state=\"open\"\n          aria-hidden=\"true\"\n        />\n      )}\n\n      <div className=\"flex-shrink-0 mb-4\">\n        <AdminListHeader\n          hasItems={isOpenApiLoading || (openApiTools?.length ?? 0) > 0}\n          searchQuery={searchQuery}\n          onSearchQueryChange={setSearchQuery}\n          onAction={handleAddAction}\n          actionLabel=\"Add OpenAPI Action\"\n          emptyStateText=\"Add custom actions from OpenAPI schemas.\"\n        />\n      </div>\n\n      <div className=\"flex-1 overflow-y-auto min-h-0\">\n        <div className=\"flex flex-col gap-4 w-full pb-4\">\n          {isOpenApiLoading ? (\n            <>\n              <ActionCardSkeleton />\n              <ActionCardSkeleton />\n            </>\n          ) : (\n            filteredTools.map((tool) => (\n              <OpenApiActionCard\n                key={tool.id}\n                tool={tool}\n                onAuthenticate={handleOpenAuthModal}\n                onManage={handleManageTool}\n                onDelete={handleDeleteTool}\n                onRename={handleRenameTool}\n                mutateOpenApiTools={mutateOpenApiTools}\n                onOpenDisconnectModal={handleOpenDisconnectModal}\n              />\n            ))\n          )}\n        </div>\n      </div>\n\n      <addOpenAPIActionModal.Provider>\n        <AddOpenAPIActionModal\n          skipOverlay\n          existingTool={toolBeingEdited}\n          onEditAuthentication={handleEditAuthenticationFromModal}\n          onDisconnectTool={(tool: ToolSnapshot) => {\n            handleOpenDisconnectModal(tool);\n            resetAuthModal();\n          }}\n          onSuccess={(tool) => {\n            setSelectedTool(tool);\n            openAPIAuthModal.toggle(true);\n            mutateOpenApiTools();\n          }}\n          onUpdate={() => {\n            mutateOpenApiTools();\n          }}\n          onClose={handleAddModalClose}\n        />\n      </addOpenAPIActionModal.Provider>\n      <openAPIAuthModal.Provider>\n        <OpenAPIAuthenticationModal\n          isOpen={openAPIAuthModal.isOpen}\n          skipOverlay\n          onClose={resetAuthModal}\n          title={authenticationModalTitle}\n          entityName={selectedTool?.name ?? null}\n          defaultMethod={authenticationDefaultMethod}\n          oauthConfigId={selectedTool?.oauth_config_id ?? null}\n          initialHeaders={selectedTool?.custom_headers ?? null}\n          passthroughOAuthEnabled={selectedTool?.passthrough_auth ?? false}\n          onConnect={handleConnect}\n          onSkip={resetAuthModal}\n        />\n      </openAPIAuthModal.Provider>\n\n      <DisconnectEntityModal\n        isOpen={disconnectModal.isOpen}\n        onClose={() => {\n          disconnectModal.toggle(false);\n          setToolPendingDisconnect(null);\n        }}\n        name={toolPendingDisconnect?.name ?? null}\n        onConfirmDisconnect={handleConfirmDisconnectFromModal}\n        onConfirmDisconnectAndDelete={handleDeleteToolFromModal}\n        isDisconnecting={isDisconnecting || isDeleting}\n        skipOverlay\n      />\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/PerUserAuthConfig.tsx",
    "content": "\"use client\";\n\nimport { useEffect, useState } from \"react\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputKeyValue, {\n  KeyValue,\n} from \"@/refresh-components/inputs/InputKeyValue\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Separator from \"@/refresh-components/Separator\";\nimport type { MCPAuthFormValues } from \"@/sections/actions/modals/MCPAuthenticationModal\";\nimport { SvgUser } from \"@opal/icons\";\n\ninterface PerUserAuthConfigProps {\n  values: MCPAuthFormValues;\n  setFieldValue: (\n    field: keyof MCPAuthFormValues | string,\n    value: unknown\n  ) => void;\n}\n\nexport function PerUserAuthConfig({\n  values,\n  setFieldValue,\n}: PerUserAuthConfigProps) {\n  // Use draft state for KeyValue array (like in LLMConnectionFieldsCustom)\n  const [headersDraft, setHeadersDraft] = useState<KeyValue[]>(\n    Object.entries(values.auth_template?.headers || {}).map(([key, value]) => ({\n      key,\n      value: String(value),\n    }))\n  );\n\n  // Initialize auth template if not exists\n  useEffect(() => {\n    if (!values.auth_template) {\n      const initialHeaders = { Authorization: \"Bearer {api_key}\" };\n      setFieldValue(\"auth_template\", {\n        headers: initialHeaders,\n        required_fields: [\"api_key\"],\n      });\n      setHeadersDraft([{ key: \"Authorization\", value: \"Bearer {api_key}\" }]);\n    }\n  }, [values.auth_template, setFieldValue]);\n\n  // Update headers from KeyValue array\n  const handleHeadersChange = (items: KeyValue[]) => {\n    // Update draft state first\n    setHeadersDraft(items);\n\n    // Convert KeyValue[] to Record<string, string> for form value\n    const headersObject: Record<string, string> = {};\n    items.forEach((item) => {\n      if (item.key.trim()) {\n        headersObject[item.key] = item.value;\n      }\n    });\n    setFieldValue(\"auth_template.headers\", headersObject);\n    updateRequiredFields(headersObject);\n  };\n\n  const computeRequiredFieldsFromHeaders = (\n    headers: Record<string, string>\n  ): string[] => {\n    const placeholderRegex = /\\{([^}]+)\\}/g;\n    const requiredFields = new Set<string>();\n\n    Object.values(headers).forEach((value) => {\n      const matches = value.match(placeholderRegex);\n      if (matches) {\n        matches.forEach((match: string) => {\n          const field = match.slice(1, -1);\n          if (field !== \"user_email\") {\n            // user_email is automatically provided\n            requiredFields.add(field);\n          }\n        });\n      }\n    });\n    return Array.from(requiredFields);\n  };\n\n  // Extract required fields from placeholders in header values\n  const updateRequiredFields = (headers: Record<string, string>) => {\n    const requiredFields = computeRequiredFieldsFromHeaders(headers);\n    setFieldValue(\"auth_template.required_fields\", requiredFields);\n  };\n\n  // Update user credential value\n  const updateUserCredential = (field: string, value: string) => {\n    const currentCreds = values.user_credentials || {};\n    setFieldValue(\"user_credentials\", {\n      ...currentCreds,\n      [field]: value,\n    });\n  };\n\n  const requiredFields: string[] = values.auth_template?.required_fields?.length\n    ? values.auth_template.required_fields\n    : computeRequiredFieldsFromHeaders(values.auth_template?.headers || {});\n  const userCredentials = values.user_credentials || {};\n\n  return (\n    <div className=\"flex flex-col gap-4 -mx-2 px-2 py-2 bg-background-tint-00 rounded-12\">\n      {/* Authentication Headers */}\n      <FormField name=\"auth_template.headers\" state=\"idle\">\n        <FormField.Label>Authentication Headers</FormField.Label>\n        <FormField.Control asChild>\n          <InputKeyValue\n            keyTitle=\"Header Name\"\n            valueTitle=\"Header Value\"\n            items={headersDraft}\n            onChange={handleHeadersChange}\n            mode=\"fixed-line\"\n            layout=\"equal\"\n            addButtonLabel=\"Add Header\"\n          />\n        </FormField.Control>\n        <FormField.Description>\n          Format headers for each user to fill in their individual credentials.\n          Use placeholders like{\" \"}\n          <Text text03 secondaryMono className=\"inline\">\n            {\"{api_key}\"}\n          </Text>{\" \"}\n          or{\" \"}\n          <Text text03 secondaryMono className=\"inline\">\n            {\"{user_email}\"}\n          </Text>\n          . Users will be prompted to provide values for placeholders (except\n          user_email).\n        </FormField.Description>\n      </FormField>\n\n      {/* Only show user credentials section if there are required fields */}\n      {requiredFields.length > 0 && (\n        <>\n          <Separator className=\"-my-2\" />\n\n          <div className=\"flex flex-col gap-4\">\n            <div className=\"flex items-start gap-1\">\n              <SvgUser className=\"w-4 h-4 stroke-text-04 mt-0.5\" />\n              <div className=\"flex flex-col gap-1\">\n                <Text text04 secondaryAction as=\"p\">\n                  Only for your own account\n                </Text>\n                <Text text03 secondaryBody as=\"p\">\n                  The following credentials will not be shared with your\n                  organization.\n                </Text>\n              </div>\n            </div>\n\n            {/* User Credentials Fields */}\n            <div className=\"flex flex-col gap-3\">\n              {requiredFields.map((field: string) => {\n                const isSecretField =\n                  field.toLowerCase().includes(\"key\") ||\n                  field.toLowerCase().includes(\"token\") ||\n                  field.toLowerCase().includes(\"secret\") ||\n                  field.toLowerCase().includes(\"password\");\n\n                return (\n                  <FormField\n                    key={field}\n                    name={`user_credentials.${field}`}\n                    state=\"idle\"\n                  >\n                    <FormField.Label>\n                      {field\n                        .replace(/_/g, \" \")\n                        .replace(/\\b\\w/g, (l) => l.toUpperCase())}\n                    </FormField.Label>\n                    <FormField.Control asChild>\n                      <InputTypeIn\n                        name={`user_credentials.${field}`}\n                        type={isSecretField ? \"password\" : \"text\"}\n                        value={userCredentials[field] || \"\"}\n                        onChange={(e) =>\n                          updateUserCredential(field, e.target.value)\n                        }\n                        placeholder={`Enter ${field.replace(/_/g, \" \")}`}\n                        showClearButton={false}\n                      />\n                    </FormField.Control>\n                  </FormField>\n                );\n              })}\n            </div>\n          </div>\n        </>\n      )}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/ToolItem.tsx",
    "content": "\"use client\";\n\nimport React, { useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport type { IconProps } from \"@opal/types\";\nimport {\n  SvgAlertTriangle,\n  SvgArrowLeftDot,\n  SvgArrowRightDot,\n  SvgCornerRightUpDot,\n  SvgMinusCircle,\n} from \"@opal/icons\";\n\ntype ToolItemVariant = \"mcp\" | \"openapi\";\n\ninterface OpenApiMetadata {\n  method?: string;\n  path?: string;\n}\n\nconst METHOD_ICON_MAP: Record<string, React.ReactNode> = {\n  GET: <SvgArrowLeftDot className=\"size-4 stroke-status-success-05\" />,\n  POST: <SvgArrowRightDot className=\"size-4 stroke-status-info-05\" />,\n  PUT: <SvgCornerRightUpDot className=\"size-4 stroke-status-info-05\" />,\n  PATCH: <SvgCornerRightUpDot className=\"size-4 stroke-status-warning-05\" />,\n  DELETE: <SvgMinusCircle className=\"size-4 stroke-status-error-05\" />,\n};\nconst METHOD_STYLE_MAP: Record<string, { bg: string; text: string }> = {\n  GET: { bg: \"bg-status-success-00\", text: \"text-status-success-05\" },\n  POST: { bg: \"bg-status-info-00\", text: \"text-status-info-05\" },\n  PUT: { bg: \"bg-status-info-00\", text: \"text-status-info-05\" },\n  PATCH: { bg: \"bg-status-warning-00\", text: \"text-status-warning-05\" },\n  DELETE: { bg: \"bg-status-error-00\", text: \"text-status-error-05\" },\n};\n\nfunction getMethodStyles(method?: string) {\n  if (!method) {\n    return {\n      label: undefined,\n      bg: \"bg-background-neutral-01\",\n      text: \"text-text-03\",\n    };\n  }\n\n  const upperMethod = method.toUpperCase();\n  const styles = METHOD_STYLE_MAP[upperMethod] ?? {\n    bg: \"bg-background-neutral-01\",\n    text: \"text-text-03\",\n  };\n\n  return {\n    label: upperMethod,\n    ...styles,\n  };\n}\n\nexport interface ToolItemProps {\n  // Tool information\n  name: string;\n  description: string;\n  icon?: React.FunctionComponent<IconProps>;\n\n  // Tool state\n  isAvailable?: boolean;\n  isEnabled?: boolean;\n\n  // Variant\n  variant?: ToolItemVariant;\n  openApiMetadata?: OpenApiMetadata;\n\n  // Handlers\n  onToggle?: (enabled: boolean) => void;\n\n  // Optional styling\n  className?: string;\n}\n\nconst ToolItem: React.FC<ToolItemProps> = ({\n  name,\n  description,\n  icon: Icon,\n  isAvailable = true,\n  isEnabled = true,\n  variant = \"mcp\",\n  openApiMetadata,\n  onToggle,\n  className,\n}) => {\n  const isMcpVariant = variant === \"mcp\";\n\n  const unavailableStyles =\n    isMcpVariant && !isAvailable\n      ? \"bg-background-neutral-02\"\n      : \"bg-background-tint-00\";\n\n  const textOpacity = isMcpVariant && !isAvailable ? \"opacity-50\" : \"\";\n\n  const {\n    label: methodLabel,\n    bg: methodBg,\n    text: methodText,\n  } = isMcpVariant\n    ? { label: undefined, bg: \"\", text: \"\" }\n    : getMethodStyles(openApiMetadata?.method);\n\n  const highlightedPathContent = useMemo(() => {\n    if (!openApiMetadata?.path) {\n      return null;\n    }\n\n    // Example: \"/repos/{owner}/{repo}\" => plain spans for static segments,\n    // colored spans for \"{owner}\" and \"{repo}\".\n    const path = openApiMetadata.path;\n    const segments: React.ReactNode[] = [];\n    const paramRegex = /\\{[^}]+\\}/g;\n    let lastIndex = 0;\n    let match: RegExpExecArray | null;\n    const highlightClass = methodText || \"text-text-03\";\n\n    while ((match = paramRegex.exec(path)) !== null) {\n      // Push plain text before the param, then the colored \"{param}\" segment.\n      if (match.index > lastIndex) {\n        segments.push(\n          <span key={`text-${match.index}`}>\n            {path.slice(lastIndex, match.index)}\n          </span>\n        );\n      }\n\n      segments.push(\n        <span key={`param-${match.index}`} className={highlightClass}>\n          {match[0]}\n        </span>\n      );\n\n      lastIndex = paramRegex.lastIndex;\n    }\n\n    if (lastIndex < path.length) {\n      segments.push(<span key=\"text-end\">{path.slice(lastIndex)}</span>);\n    }\n\n    return segments;\n  }, [openApiMetadata?.path, methodText]);\n\n  return (\n    <div\n      className={cn(\n        \"flex items-start justify-between w-full p-2 rounded-08 border border-border-01 gap-2\",\n        unavailableStyles,\n        className\n      )}\n    >\n      {/* Left Section: Icon and Content */}\n      <div className=\"flex gap-1 items-start flex-1 min-w-0 pr-2\">\n        {/* Icon Container */}\n        {Icon ? (\n          <div\n            className={cn(\n              \"flex items-center justify-center shrink-0\",\n              textOpacity\n            )}\n          >\n            <Icon size={20} className=\"h-5 w-5 stroke-text-04\" />\n          </div>\n        ) : (\n          <div className=\"flex items-center justify-center h-5 w-5\">\n            {METHOD_ICON_MAP[openApiMetadata?.method?.toUpperCase() ?? \"\"]}\n          </div>\n        )}\n\n        {/* Content Container */}\n        <div className=\"flex flex-col items-start flex-1 min-w-0\">\n          {/* Tool Name */}\n          <div className=\"flex items-center w-full min-h-[20px] px-0.5\">\n            <Truncated\n              mainUiAction\n              text04\n              className={cn(\n                \"truncate\",\n                textOpacity,\n                !isAvailable && \"line-through\"\n              )}\n            >\n              {name}\n            </Truncated>\n          </div>\n\n          {/* Description */}\n          <div className=\"px-0.5 w-full\">\n            <Truncated\n              text03\n              secondaryBody\n              className={cn(\"whitespace-pre-wrap\", textOpacity)}\n            >\n              {description}\n            </Truncated>\n          </div>\n        </div>\n      </div>\n\n      {/* Right Section */}\n      {isMcpVariant ? (\n        <div className=\"flex gap-2 items-start justify-end shrink-0\">\n          {/* Unavailable Badge */}\n          {!isAvailable && (\n            <div className=\"flex items-center min-h-[20px] px-0 py-0.5\">\n              <div className=\"flex gap-0.5 items-center\">\n                <div className=\"flex items-center px-0.5\">\n                  <Text as=\"p\" text03 secondaryBody className=\"text-right\">\n                    Tool unavailable\n                  </Text>\n                </div>\n                <div className=\"flex items-center justify-center p-0.5 w-4 h-4\">\n                  <SvgAlertTriangle className=\"w-3 h-3 stroke-status-warning-05\" />\n                </div>\n              </div>\n            </div>\n          )}\n\n          {/* Switch */}\n          <div className=\"flex items-center justify-center gap-1 h-5 px-0.5 py-0.5\">\n            <Switch\n              checked={isEnabled}\n              onCheckedChange={onToggle}\n              disabled={!isAvailable}\n              aria-label={`tool-toggle-${name}`}\n            />\n          </div>\n        </div>\n      ) : (\n        <div className=\"flex flex-col items-end justify-center\">\n          {methodLabel && (\n            <div\n              className={cn(\"rounded-04 border border-transparent\", methodBg)}\n            >\n              <Text\n                as=\"p\"\n                figureSmallLabel\n                className={cn(\"uppercase tracking-wide p-0.5 \", methodText)}\n              >\n                {methodLabel}\n              </Text>\n            </div>\n          )}\n\n          {openApiMetadata?.path && (\n            <Truncated secondaryMono text03 className=\"text-right truncate\">\n              {highlightedPathContent}\n            </Truncated>\n          )}\n        </div>\n      )}\n    </div>\n  );\n};\n\nToolItem.displayName = \"ToolItem\";\nexport default ToolItem;\n"
  },
  {
    "path": "web/src/sections/actions/ToolsList.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport FadingEdgeContainer from \"@/refresh-components/FadingEdgeContainer\";\nimport ToolItemSkeleton from \"@/sections/actions/skeleton/ToolItemSkeleton\";\nimport EnabledCount from \"@/refresh-components/EnabledCount\";\nimport { SvgEye, SvgXCircle } from \"@opal/icons\";\n\nexport interface ToolsListProps {\n  // Loading state\n  isFetching?: boolean;\n\n  // Tool count for footer\n  totalCount?: number;\n  enabledCount?: number;\n  showOnlyEnabled?: boolean;\n  onToggleShowOnlyEnabled?: () => void;\n  onUpdateToolsStatus?: (enabled: boolean) => void;\n\n  // Empty state of filtered tools\n  isEmpty?: boolean;\n  searchQuery?: string;\n  emptyMessage?: string;\n  emptySearchMessage?: string;\n\n  // Content\n  children?: React.ReactNode;\n\n  // Left action (for refresh button and last verified text)\n  leftAction?: React.ReactNode;\n\n  // Styling\n  className?: string;\n}\n\nconst ToolsList: React.FC<ToolsListProps> = ({\n  isFetching = false,\n  totalCount,\n  enabledCount = 0,\n  showOnlyEnabled = false,\n  onToggleShowOnlyEnabled,\n  onUpdateToolsStatus,\n  isEmpty = false,\n  searchQuery,\n  emptyMessage = \"No tools available\",\n  emptySearchMessage = \"No tools found\",\n  children,\n  leftAction,\n  className,\n}) => {\n  const showFooter =\n    totalCount !== undefined && enabledCount !== undefined && totalCount > 0;\n\n  return (\n    <>\n      <FadingEdgeContainer\n        direction=\"bottom\"\n        className={cn(\n          \"flex flex-col gap-1 items-start max-h-[30vh] overflow-y-auto\",\n          className\n        )}\n      >\n        {isFetching ? (\n          Array.from({ length: 5 }).map((_, index) => (\n            <ToolItemSkeleton key={`skeleton-${index}`} />\n          ))\n        ) : isEmpty ? (\n          <div className=\"flex items-center justify-center w-full py-8\">\n            <Text as=\"p\" text03 mainUiBody>\n              {searchQuery ? emptySearchMessage : emptyMessage}\n            </Text>\n          </div>\n        ) : (\n          children\n        )}\n      </FadingEdgeContainer>\n\n      {/* Footer showing enabled tool count with filter toggle */}\n      {showFooter && !(totalCount === 0) && !isFetching && (\n        <div className=\"pt-2 px-2\">\n          <div className=\"flex items-center justify-between gap-2 w-full\">\n            {/* Left action area */}\n            {leftAction}\n\n            {/* Right action area */}\n            <div className=\"flex items-center gap-1 ml-auto\">\n              {enabledCount > 0 && (\n                <EnabledCount\n                  enabledCount={enabledCount}\n                  totalCount={totalCount}\n                  name=\"tool\"\n                />\n              )}\n              {onToggleShowOnlyEnabled && enabledCount > 0 && (\n                <Button\n                  icon={SvgEye}\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  onClick={onToggleShowOnlyEnabled}\n                  interaction={showOnlyEnabled ? \"hover\" : \"rest\"}\n                  tooltip={\n                    showOnlyEnabled ? \"Show all tools\" : \"Show only enabled\"\n                  }\n                  aria-label={\n                    showOnlyEnabled\n                      ? \"Show all tools\"\n                      : \"Show only enabled tools\"\n                  }\n                />\n              )}\n              {onUpdateToolsStatus && enabledCount > 0 && (\n                <Button\n                  icon={SvgXCircle}\n                  prominence=\"tertiary\"\n                  size=\"sm\"\n                  onClick={() => onUpdateToolsStatus(false)}\n                  tooltip=\"Disable all tools\"\n                  aria-label=\"Disable all tools\"\n                />\n              )}\n              {onUpdateToolsStatus && enabledCount === 0 && (\n                <Button\n                  prominence=\"tertiary\"\n                  onClick={() => onUpdateToolsStatus(true)}\n                >\n                  Enable all\n                </Button>\n              )}\n            </div>\n          </div>\n        </div>\n      )}\n    </>\n  );\n};\nToolsList.displayName = \"ToolsList\";\n\nexport default ToolsList;\n"
  },
  {
    "path": "web/src/sections/actions/ToolsSection.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { SvgFold } from \"@opal/icons\";\ninterface ToolsSectionProps {\n  onFold?: () => void;\n  searchQuery: string;\n  onSearchQueryChange: (query: string) => void;\n  className?: string;\n}\n\nconst ToolsSection: React.FC<ToolsSectionProps> = ({\n  onFold,\n  searchQuery,\n  onSearchQueryChange,\n  className,\n}) => {\n  const handleSearchChange = (e: React.ChangeEvent<HTMLInputElement>) => {\n    onSearchQueryChange(e.target.value);\n  };\n\n  return (\n    <div className={cn(\"w-full\", className)}>\n      <div className=\"flex gap-1 items-center w-full transition-all duration-300 ease-in-out px-2 pb-2\">\n        {/* Search Bar */}\n        <div className=\"flex-1 min-w-[160px]\">\n          <InputTypeIn\n            placeholder=\"Search tools…\"\n            aria-label=\"Search tools\"\n            value={searchQuery}\n            onChange={handleSearchChange}\n            leftSearchIcon\n            showClearButton\n            className=\"w-full\"\n          />\n        </div>\n\n        {/* Actions */}\n        <div className=\"flex gap-1 items-center p-1\">\n          {/* Fold Button */}\n          {onFold && (\n            <Button prominence=\"tertiary\" onClick={onFold} rightIcon={SvgFold}>\n              Fold\n            </Button>\n          )}\n        </div>\n      </div>\n    </div>\n  );\n};\n\nToolsSection.displayName = \"ToolsSection\";\nexport default ToolsSection;\n"
  },
  {
    "path": "web/src/sections/actions/modals/AddMCPServerModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport Modal from \"@/refresh-components/Modal\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\nimport { createMCPServer, updateMCPServer } from \"@/lib/tools/mcpService\";\nimport {\n  MCPServerCreateRequest,\n  MCPServerStatus,\n  MCPServer,\n} from \"@/lib/tools/interfaces\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { Button } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\nimport { ModalCreationInterface } from \"@/refresh-components/contexts/ModalContext\";\nimport { SvgCheckCircle, SvgServer, SvgUnplug } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface AddMCPServerModalProps {\n  skipOverlay?: boolean;\n  activeServer: MCPServer | null;\n  setActiveServer: (server: MCPServer | null) => void;\n  disconnectModal: ModalCreationInterface;\n  manageServerModal: ModalCreationInterface;\n  onServerCreated?: (server: MCPServer) => void;\n  handleAuthenticate: (serverId: number) => void;\n  mutateMcpServers?: () => Promise<void>;\n}\n\nconst validationSchema = Yup.object().shape({\n  name: Yup.string().required(\"Server name is required\"),\n  description: Yup.string(),\n  server_url: Yup.string()\n    .url(\"Must be a valid URL\")\n    .required(\"Server URL is required\"),\n});\n\nexport default function AddMCPServerModal({\n  skipOverlay = false,\n  activeServer,\n  disconnectModal,\n  manageServerModal,\n  onServerCreated,\n  handleAuthenticate,\n  mutateMcpServers,\n}: AddMCPServerModalProps) {\n  const { isOpen, toggle } = useModal();\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  // Use activeServer from props\n  const server = activeServer;\n\n  // Handler for disconnect button\n  const handleDisconnectClick = () => {\n    if (activeServer) {\n      // Server stays the same, just toggle modals\n      manageServerModal.toggle(false);\n      disconnectModal.toggle(true);\n    }\n  };\n\n  // Determine if we're in edit mode\n  const isEditMode = !!server;\n\n  const initialValues: MCPServerCreateRequest = {\n    name: server?.name || \"\",\n    description: server?.description || \"\",\n    server_url: server?.server_url || \"\",\n  };\n\n  const handleSubmit = async (values: MCPServerCreateRequest) => {\n    setIsSubmitting(true);\n\n    try {\n      if (isEditMode && server) {\n        // Update existing server\n        await updateMCPServer(server.id, values);\n        toast.success(\"MCP Server updated successfully\");\n        await mutateMcpServers?.();\n      } else {\n        // Create new server\n        const createdServer = await createMCPServer(values);\n\n        toast.success(\"MCP Server created successfully\");\n\n        await mutateMcpServers?.();\n\n        if (onServerCreated) {\n          onServerCreated(createdServer);\n        }\n      }\n      // Close modal. Do NOT clear `activeServer` here because this modal\n      // frequently transitions to other modals (authenticate/disconnect), and\n      // clearing would race those flows.\n      toggle(false);\n    } catch (error) {\n      console.error(\n        `Error ${isEditMode ? \"updating\" : \"creating\"} MCP server:`,\n        error\n      );\n      toast.error(\n        error instanceof Error\n          ? error.message\n          : `Failed to ${isEditMode ? \"update\" : \"create\"} MCP server`\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  // Handle modal close to clear server state\n  const handleModalClose = (open: boolean) => {\n    toggle(open);\n  };\n\n  return (\n    <Modal open={isOpen} onOpenChange={handleModalClose}>\n      <Modal.Content\n        width=\"sm\"\n        height=\"lg\"\n        preventAccidentalClose={false}\n        skipOverlay={skipOverlay}\n      >\n        <Formik\n          initialValues={initialValues}\n          validationSchema={validationSchema}\n          onSubmit={handleSubmit}\n        >\n          {({ isValid, dirty }) => (\n            <Form>\n              <Modal.Header\n                icon={SvgServer}\n                title={isEditMode ? \"Manage MCP Server\" : \"Add MCP Server\"}\n                description={\n                  isEditMode\n                    ? \"Update your MCP server configuration and manage authentication.\"\n                    : \"Connect MCP (Model Context Protocol) server to add custom actions.\"\n                }\n                onClose={() => handleModalClose(false)}\n              />\n\n              <Modal.Body>\n                <InputLayouts.Vertical name=\"name\" title=\"Server Name\">\n                  <InputTypeInField\n                    name=\"name\"\n                    placeholder=\"Name your MCP server\"\n                    autoFocus\n                  />\n                </InputLayouts.Vertical>\n\n                <InputLayouts.Vertical\n                  name=\"description\"\n                  title=\"Description\"\n                  suffix=\"optional\"\n                >\n                  <InputTextAreaField\n                    name=\"description\"\n                    placeholder=\"More details about the MCP server\"\n                    rows={3}\n                  />\n                </InputLayouts.Vertical>\n\n                <Separator noPadding />\n\n                <InputLayouts.Vertical\n                  name=\"server_url\"\n                  title=\"MCP Server URL\"\n                  subDescription=\"Only connect to servers you trust. You are responsible for actions taken with this connection and keeping your tools updated.\"\n                >\n                  <InputTypeInField\n                    name=\"server_url\"\n                    placeholder=\"https://your-mcp-server.com/mcp\"\n                  />\n                </InputLayouts.Vertical>\n\n                {/* Authentication Status Section - Only show in edit mode when authenticated */}\n                {isEditMode &&\n                  server?.is_authenticated &&\n                  server?.status === MCPServerStatus.CONNECTED && (\n                    <Section\n                      flexDirection=\"row\"\n                      justifyContent=\"between\"\n                      alignItems=\"start\"\n                      gap={1}\n                    >\n                      <Section gap={0.25} alignItems=\"start\">\n                        <Section\n                          flexDirection=\"row\"\n                          gap={0.5}\n                          alignItems=\"center\"\n                          width=\"fit\"\n                        >\n                          <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05\" />\n                          <Text>Authenticated &amp; Connected</Text>\n                        </Section>\n                        <Text secondaryBody text03>\n                          {server.auth_type === \"OAUTH\"\n                            ? `OAuth connected to ${server.owner}`\n                            : server.auth_type === \"API_TOKEN\"\n                              ? \"API token configured\"\n                              : \"Connected\"}\n                        </Text>\n                      </Section>\n                      <Section\n                        flexDirection=\"row\"\n                        gap={0.5}\n                        alignItems=\"center\"\n                        width=\"fit\"\n                      >\n                        <Button\n                          icon={SvgUnplug}\n                          prominence=\"tertiary\"\n                          type=\"button\"\n                          tooltip=\"Disconnect Server\"\n                          onClick={handleDisconnectClick}\n                        />\n                        <Button\n                          prominence=\"secondary\"\n                          type=\"button\"\n                          onClick={() => {\n                            // Close this modal and open the auth modal for this server\n                            toggle(false);\n                            handleAuthenticate(server.id);\n                          }}\n                        >\n                          Edit Configs\n                        </Button>\n                      </Section>\n                    </Section>\n                  )}\n              </Modal.Body>\n\n              <Modal.Footer>\n                <Button\n                  disabled={isSubmitting}\n                  prominence=\"secondary\"\n                  type=\"button\"\n                  onClick={() => handleModalClose(false)}\n                >\n                  Cancel\n                </Button>\n                <Button\n                  disabled={isSubmitting || !isValid || !dirty}\n                  type=\"submit\"\n                >\n                  {isSubmitting\n                    ? isEditMode\n                      ? \"Saving...\"\n                      : \"Adding...\"\n                    : isEditMode\n                      ? \"Save Changes\"\n                      : \"Add Server\"}\n                </Button>\n              </Modal.Footer>\n            </Form>\n          )}\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/modals/AddOpenAPIActionModal.tsx",
    "content": "\"use client\";\n\nimport { markdown } from \"@opal/utils\";\nimport Link from \"next/link\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { useCallback, useEffect, useMemo, useState } from \"react\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { Button } from \"@opal/components\";\nimport { Hoverable } from \"@opal/core\";\nimport { MethodSpec, ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport {\n  validateToolDefinition,\n  createCustomTool,\n  updateCustomTool,\n} from \"@/lib/tools/openApiService\";\nimport ToolItem from \"@/sections/actions/ToolItem\";\nimport debounce from \"lodash/debounce\";\nimport { DOCS_ADMINS_PATH } from \"@/lib/constants\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { Formik, Form, useFormikContext } from \"formik\";\nimport * as Yup from \"yup\";\nimport { toast } from \"@/hooks/useToast\";\nimport {\n  SvgActions,\n  SvgBracketCurly,\n  SvgCheckCircle,\n  SvgAlertCircle,\n  SvgUnplug,\n} from \"@opal/icons\";\nimport InfoBlock from \"@/refresh-components/messages/InfoBlock\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\n\ninterface AddOpenAPIActionModalProps {\n  skipOverlay?: boolean;\n  onSuccess?: (tool: ToolSnapshot) => void;\n  onUpdate?: (tool: ToolSnapshot) => void;\n  existingTool?: ToolSnapshot | null;\n  onClose?: () => void;\n  onEditAuthentication?: (tool: ToolSnapshot) => void;\n  onDisconnectTool?: (tool: ToolSnapshot) => Promise<void> | void;\n}\n\ninterface OpenAPIActionFormValues {\n  definition: string;\n}\n\nconst validationSchema = Yup.object().shape({\n  definition: Yup.string().required(\"OpenAPI schema definition is required\"),\n});\n\nfunction parseJsonWithTrailingCommas(jsonString: string) {\n  // Regular expression to remove trailing commas before } or ]\n  let cleanedJsonString = jsonString.replace(/,\\s*([}\\]])/g, \"$1\");\n  // Replace True with true, False with false, and None with null\n  cleanedJsonString = cleanedJsonString\n    .replace(/\\bTrue\\b/g, \"true\")\n    .replace(/\\bFalse\\b/g, \"false\")\n    .replace(/\\bNone\\b/g, \"null\");\n  // Now parse the cleaned JSON string\n  return JSON.parse(cleanedJsonString);\n}\n\nfunction prettifyDefinition(definition: any) {\n  return JSON.stringify(definition, null, 2);\n}\n\ninterface FormContentProps {\n  handleClose: () => void;\n  existingTool: ToolSnapshot | null;\n  onEditAuthentication?: (tool: ToolSnapshot) => void;\n  onDisconnectTool?: (tool: ToolSnapshot) => Promise<void> | void;\n}\n\nfunction FormContent({\n  handleClose,\n  existingTool,\n  onEditAuthentication,\n  onDisconnectTool,\n}: FormContentProps) {\n  const { values, setFieldValue, setFieldError, dirty, isSubmitting } =\n    useFormikContext<OpenAPIActionFormValues>();\n\n  const [methodSpecs, setMethodSpecs] = useState<MethodSpec[] | null>(null);\n  const [name, setName] = useState<string | null>(null);\n  const [description, setDescription] = useState<string | undefined>(undefined);\n  const [url, setUrl] = useState<string | undefined>(undefined);\n\n  const isEditMode = Boolean(existingTool);\n\n  const handleFormat = useCallback(() => {\n    if (!values.definition.trim()) {\n      return;\n    }\n\n    try {\n      const formatted = prettifyDefinition(\n        parseJsonWithTrailingCommas(values.definition)\n      );\n      setFieldValue(\"definition\", formatted);\n      setFieldError(\"definition\", \"\");\n    } catch {\n      setFieldError(\"definition\", \"Invalid JSON format\");\n    }\n  }, [values.definition, setFieldValue, setFieldError]);\n\n  const validateDefinition = useCallback(\n    async (\n      rawDefinition: string,\n      setFieldError: (field: string, message: string) => void\n    ) => {\n      if (!rawDefinition.trim()) {\n        setMethodSpecs(null);\n        setFieldError(\"definition\", \"\");\n        return;\n      }\n\n      try {\n        const parsedDefinition = parseJsonWithTrailingCommas(rawDefinition);\n        const derivedName = parsedDefinition?.info?.title;\n        const derivedDescription = parsedDefinition?.info?.description;\n        const derivedUrl = parsedDefinition?.servers?.[0]?.url;\n\n        setName(derivedName);\n        setDescription(derivedDescription);\n        setUrl(derivedUrl);\n\n        const response = await validateToolDefinition({\n          definition: parsedDefinition,\n        });\n\n        if (response.error) {\n          setMethodSpecs(null);\n          setFieldError(\"definition\", response.error);\n        } else {\n          setMethodSpecs(response.data ?? []);\n          setFieldError(\"definition\", \"\");\n        }\n      } catch {\n        setMethodSpecs(null);\n        setFieldError(\"definition\", \"Invalid JSON format\");\n      }\n    },\n    []\n  );\n\n  const debouncedValidateDefinition = useMemo(\n    () => debounce(validateDefinition, 300),\n    [validateDefinition]\n  );\n\n  const modalTitle = isEditMode ? \"Edit OpenAPI action\" : \"Add OpenAPI action\";\n  const modalDescription = isEditMode\n    ? \"Update the OpenAPI schema for this action.\"\n    : \"Add OpenAPI schema to add custom actions.\";\n  const primaryButtonLabel = isSubmitting\n    ? isEditMode\n      ? \"Saving...\"\n      : \"Adding...\"\n    : isEditMode\n      ? \"Save Changes\"\n      : \"Add Action\";\n\n  const hasOAuthConfig = Boolean(existingTool?.oauth_config_id);\n  const hasCustomHeaders =\n    Array.isArray(existingTool?.custom_headers) &&\n    (existingTool?.custom_headers?.length ?? 0) > 0;\n  const hasPassthroughAuth = Boolean(existingTool?.passthrough_auth);\n  const hasAuthenticationConfigured =\n    hasOAuthConfig || hasCustomHeaders || hasPassthroughAuth;\n  const authenticationDescription = useMemo(() => {\n    if (!existingTool) {\n      return \"\";\n    }\n    if (hasOAuthConfig) {\n      return existingTool.oauth_config_name\n        ? `OAuth connected via ${existingTool.oauth_config_name}`\n        : \"OAuth authentication configured\";\n    }\n    if (hasCustomHeaders) {\n      return \"Custom authentication headers configured\";\n    }\n    if (hasPassthroughAuth) {\n      return \"Passthrough authentication enabled\";\n    }\n    return \"\";\n  }, [existingTool, hasOAuthConfig, hasCustomHeaders, hasPassthroughAuth]);\n\n  const showAuthenticationStatus = Boolean(\n    isEditMode && existingTool?.enabled && hasAuthenticationConfigured\n  );\n\n  const handleEditAuthenticationClick = useCallback(() => {\n    if (!existingTool || !onEditAuthentication) {\n      return;\n    }\n    handleClose();\n    onEditAuthentication(existingTool);\n  }, [existingTool, onEditAuthentication, handleClose]);\n\n  useEffect(() => {\n    if (!values.definition.trim()) {\n      setMethodSpecs(null);\n      setFieldError(\"definition\", \"\");\n      debouncedValidateDefinition.cancel();\n      return () => {\n        debouncedValidateDefinition.cancel();\n      };\n    }\n\n    debouncedValidateDefinition(values.definition, setFieldError);\n\n    return () => {\n      debouncedValidateDefinition.cancel();\n    };\n  }, [\n    values.definition,\n    debouncedValidateDefinition,\n    setFieldError,\n    setMethodSpecs,\n  ]);\n\n  return (\n    <Form>\n      <Modal.Header\n        icon={SvgActions}\n        title={modalTitle}\n        description={modalDescription}\n        onClose={handleClose}\n      />\n\n      <Modal.Body>\n        <InputLayouts.Vertical\n          name=\"definition\"\n          title=\"OpenAPI Schema Definition\"\n          subDescription={markdown(\n            `Specify an OpenAPI schema that defines the APIs you want to make available as part of this action. Learn more about [OpenAPI actions](${DOCS_ADMINS_PATH}/actions/openapi).`\n          )}\n        >\n          <Hoverable.Root group=\"definitionField\" widthVariant=\"full\">\n            <div className=\"relative w-full\">\n              {values.definition.trim() && (\n                <div className=\"absolute z-[100000] top-2 right-2 bg-background-tint-00\">\n                  <Hoverable.Item\n                    group=\"definitionField\"\n                    variant=\"opacity-on-hover\"\n                  >\n                    <div className=\"flex\">\n                      <CopyIconButton\n                        prominence=\"tertiary\"\n                        size=\"sm\"\n                        getCopyText={() => values.definition}\n                        tooltip=\"Copy definition\"\n                      />\n                      <Button\n                        prominence=\"tertiary\"\n                        size=\"sm\"\n                        icon={SvgBracketCurly}\n                        tooltip=\"Format definition\"\n                        onClick={handleFormat}\n                      />\n                    </div>\n                  </Hoverable.Item>\n                </div>\n              )}\n              <InputTextAreaField\n                name=\"definition\"\n                rows={14}\n                placeholder=\"Enter your OpenAPI schema here\"\n                className=\"font-main-ui-mono\"\n              />\n            </div>\n          </Hoverable.Root>\n        </InputLayouts.Vertical>\n\n        <Separator noPadding />\n\n        {methodSpecs && methodSpecs.length > 0 ? (\n          <>\n            {name && (\n              <InfoBlock\n                icon={getActionIcon(url || \"\", name || \"\")}\n                title={name}\n                description={description}\n              />\n            )}\n            {url && (\n              <InfoBlock\n                icon={SvgAlertCircle}\n                title={url || \"\"}\n                description=\"URL found in the schema. Only connect to servers you trust.\"\n              />\n            )}\n            <Separator noPadding />\n            <Section gap={0.5}>\n              {methodSpecs.map((method) => (\n                <ToolItem\n                  key={`${method.method}-${method.path}-${method.name}`}\n                  name={method.name}\n                  description={method.summary || \"No summary provided\"}\n                  variant=\"openapi\"\n                  openApiMetadata={{\n                    method: method.method,\n                    path: method.path,\n                  }}\n                />\n              ))}\n            </Section>\n          </>\n        ) : (\n          <EmptyMessage\n            title=\"No Actions Found\"\n            icon={SvgActions}\n            description=\"Provide OpenAPI schema to preview actions here.\"\n          />\n        )}\n\n        {showAuthenticationStatus && (\n          <Section\n            flexDirection=\"row\"\n            justifyContent=\"between\"\n            alignItems=\"start\"\n            gap={1}\n          >\n            <Section gap={0.25} alignItems=\"start\">\n              <Section\n                flexDirection=\"row\"\n                gap={0.5}\n                alignItems=\"center\"\n                width=\"fit\"\n              >\n                <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05\" />\n                <Text>\n                  {existingTool?.enabled\n                    ? \"Authenticated & Enabled\"\n                    : \"Authentication configured\"}\n                </Text>\n              </Section>\n              {authenticationDescription && (\n                <Text secondaryBody text03 className=\"pl-5\">\n                  {authenticationDescription}\n                </Text>\n              )}\n            </Section>\n            <Section\n              flexDirection=\"row\"\n              gap={0.5}\n              alignItems=\"center\"\n              width=\"fit\"\n            >\n              <Button\n                icon={SvgUnplug}\n                prominence=\"tertiary\"\n                type=\"button\"\n                tooltip=\"Disable action\"\n                onClick={() => {\n                  if (!existingTool || !onDisconnectTool) {\n                    return;\n                  }\n                  onDisconnectTool(existingTool);\n                }}\n              />\n              <Button\n                disabled={!onEditAuthentication}\n                prominence=\"secondary\"\n                type=\"button\"\n                onClick={handleEditAuthenticationClick}\n              >\n                Edit Configs\n              </Button>\n            </Section>\n          </Section>\n        )}\n      </Modal.Body>\n\n      <Modal.Footer>\n        <Button\n          disabled={isSubmitting}\n          prominence=\"secondary\"\n          type=\"button\"\n          onClick={handleClose}\n        >\n          Cancel\n        </Button>\n        <Button disabled={isSubmitting || !dirty} type=\"submit\">\n          {primaryButtonLabel}\n        </Button>\n      </Modal.Footer>\n    </Form>\n  );\n}\n\nexport default function AddOpenAPIActionModal({\n  skipOverlay = false,\n  onSuccess,\n  onUpdate,\n  existingTool = null,\n  onClose,\n  onEditAuthentication,\n  onDisconnectTool,\n}: AddOpenAPIActionModalProps) {\n  const { isOpen, toggle } = useModal();\n\n  const handleModalClose = useCallback(\n    (open: boolean) => {\n      toggle(open);\n      if (!open) {\n        onClose?.();\n      }\n    },\n    [toggle, onClose]\n  );\n\n  const handleClose = useCallback(() => {\n    handleModalClose(false);\n  }, [handleModalClose]);\n\n  const initialValues: OpenAPIActionFormValues = useMemo(\n    () => ({\n      definition: existingTool?.definition\n        ? prettifyDefinition(existingTool.definition)\n        : \"\",\n    }),\n    [existingTool]\n  );\n\n  const handleSubmit = async (values: OpenAPIActionFormValues) => {\n    let parsedDefinition;\n    try {\n      parsedDefinition = parseJsonWithTrailingCommas(values.definition);\n    } catch (error) {\n      console.error(\"Error parsing OpenAPI definition:\", error);\n      toast.error(\"Invalid JSON format in OpenAPI schema definition\");\n      return;\n    }\n\n    const derivedName = parsedDefinition?.info?.title;\n    const derivedDescription = parsedDefinition?.info?.description;\n\n    if (existingTool) {\n      try {\n        const updatePayload: {\n          name?: string;\n          description?: string;\n          definition: Record<string, any>;\n          custom_headers?: { key: string; value: string }[];\n          passthrough_auth?: boolean;\n          oauth_config_id?: number | null;\n        } = {\n          definition: parsedDefinition,\n          custom_headers: existingTool.custom_headers,\n          passthrough_auth: existingTool.passthrough_auth,\n          oauth_config_id: existingTool.oauth_config_id,\n        };\n\n        if (derivedName) {\n          updatePayload.name = derivedName;\n        }\n\n        if (derivedDescription) {\n          updatePayload.description = derivedDescription;\n        }\n\n        const response = await updateCustomTool(existingTool.id, updatePayload);\n\n        if (response.error) {\n          toast.error(response.error);\n        } else {\n          toast.success(\"OpenAPI action updated successfully\");\n          handleClose();\n          if (response.data && onUpdate) {\n            onUpdate(response.data);\n          }\n        }\n      } catch (error) {\n        console.error(\"Error updating OpenAPI action:\", error);\n        toast.error(\"Failed to update OpenAPI action\");\n      }\n      return;\n    }\n\n    try {\n      const response = await createCustomTool({\n        name: derivedName,\n        description: derivedDescription || undefined,\n        definition: parsedDefinition,\n        custom_headers: [],\n        passthrough_auth: false,\n      });\n\n      if (response.error) {\n        toast.error(response.error);\n      } else {\n        toast.success(\"OpenAPI action created successfully\");\n        handleClose();\n        if (response.data && onSuccess) {\n          onSuccess(response.data);\n        }\n      }\n    } catch (error) {\n      console.error(\"Error creating OpenAPI action:\", error);\n      toast.error(\"Failed to create OpenAPI action\");\n    }\n  };\n\n  return (\n    <Modal open={isOpen} onOpenChange={handleModalClose}>\n      <Modal.Content width=\"sm\" height=\"lg\" skipOverlay={skipOverlay}>\n        <Formik\n          initialValues={initialValues}\n          validationSchema={validationSchema}\n          onSubmit={handleSubmit}\n          enableReinitialize\n        >\n          <FormContent\n            handleClose={handleClose}\n            existingTool={existingTool}\n            onEditAuthentication={onEditAuthentication}\n            onDisconnectTool={onDisconnectTool}\n          />\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/modals/DisconnectEntityModal.tsx",
    "content": "\"use client\";\n\nimport { useRef } from \"react\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgUnplug } from \"@opal/icons\";\ninterface DisconnectEntityModalProps {\n  isOpen: boolean;\n  onClose: () => void;\n  name: string | null;\n  onConfirmDisconnect: () => void;\n  onConfirmDisconnectAndDelete?: () => void;\n  isDisconnecting?: boolean;\n  skipOverlay?: boolean;\n}\n\nexport default function DisconnectEntityModal({\n  isOpen,\n  onClose,\n  name,\n  onConfirmDisconnect,\n  onConfirmDisconnectAndDelete,\n  isDisconnecting = false,\n  skipOverlay = false,\n}: DisconnectEntityModalProps) {\n  const disconnectButtonRef = useRef<HTMLButtonElement>(null);\n\n  if (!name) return null;\n\n  return (\n    <Modal\n      open={isOpen}\n      onOpenChange={(open) => {\n        if (!open) {\n          onClose();\n        }\n      }}\n    >\n      <Modal.Content\n        width=\"sm\"\n        preventAccidentalClose={false}\n        skipOverlay={skipOverlay}\n        onOpenAutoFocus={(e) => {\n          e.preventDefault();\n          disconnectButtonRef.current?.focus();\n        }}\n      >\n        <Modal.Header\n          icon={({ className }) => (\n            <SvgUnplug className={cn(className, \"stroke-action-danger-05\")} />\n          )}\n          title={`Disconnect ${name}`}\n          onClose={onClose}\n        />\n\n        <Modal.Body>\n          <Text as=\"p\" text03 mainUiBody>\n            All tools connected to {name} will stop working. You can reconnect\n            to this server later if needed.\n          </Text>\n          <Text as=\"p\" text03 mainUiBody>\n            Are you sure you want to proceed?\n          </Text>\n        </Modal.Body>\n\n        <Modal.Footer>\n          <Button\n            disabled={isDisconnecting}\n            prominence=\"secondary\"\n            onClick={onClose}\n          >\n            Cancel\n          </Button>\n          {onConfirmDisconnectAndDelete && (\n            <Button\n              disabled={isDisconnecting}\n              variant=\"danger\"\n              prominence=\"secondary\"\n              onClick={onConfirmDisconnectAndDelete}\n            >\n              Disconnect &amp; Delete\n            </Button>\n          )}\n          <Button\n            disabled={isDisconnecting}\n            variant=\"danger\"\n            onClick={onConfirmDisconnect}\n            ref={disconnectButtonRef}\n          >\n            {isDisconnecting ? \"Disconnecting...\" : \"Disconnect\"}\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/modals/MCPAuthenticationModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useMemo, useEffect } from \"react\";\nimport useSWR, { KeyedMutator } from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { Button } from \"@opal/components\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Formik, Form } from \"formik\";\nimport * as Yup from \"yup\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport {\n  MCPAuthenticationPerformer,\n  MCPAuthenticationType,\n  MCPTransportType,\n  MCPServerStatus,\n  MCPServer,\n  MCPServersResponse,\n} from \"@/lib/tools/interfaces\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { PerUserAuthConfig } from \"@/sections/actions/PerUserAuthConfig\";\nimport { updateMCPServerStatus, upsertMCPServer } from \"@/lib/tools/mcpService\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport { toast } from \"@/hooks/useToast\";\nimport { SvgArrowExchange } from \"@opal/icons\";\nimport { useAuthType } from \"@/lib/hooks\";\nimport { AuthType } from \"@/lib/constants\";\n\ninterface MCPAuthenticationModalProps {\n  mcpServer: MCPServer | null;\n  skipOverlay?: boolean;\n  onTriggerFetchTools?: (serverId: number) => Promise<void> | void;\n  mutateMcpServers: KeyedMutator<MCPServersResponse>;\n}\n\ninterface MCPAuthTemplate {\n  headers: Record<string, string>;\n  required_fields: string[];\n}\n\nexport interface MCPAuthFormValues {\n  transport: MCPTransportType;\n  auth_type: MCPAuthenticationType;\n  auth_performer: MCPAuthenticationPerformer;\n  api_token: string;\n  auth_template: MCPAuthTemplate;\n  user_credentials: Record<string, string>;\n  oauth_client_id: string;\n  oauth_client_secret: string;\n}\n\nconst validationSchema = Yup.object().shape({\n  transport: Yup.string()\n    .oneOf([MCPTransportType.STREAMABLE_HTTP, MCPTransportType.SSE])\n    .required(\"Transport is required\"),\n  auth_type: Yup.string()\n    .oneOf([\n      MCPAuthenticationType.NONE,\n      MCPAuthenticationType.API_TOKEN,\n      MCPAuthenticationType.OAUTH,\n      MCPAuthenticationType.PT_OAUTH,\n    ])\n    .required(\"Authentication type is required\"),\n  auth_performer: Yup.string().when(\"auth_type\", {\n    is: (auth_type: string) => auth_type !== MCPAuthenticationType.NONE,\n    then: (schema) =>\n      schema\n        .oneOf([\n          MCPAuthenticationPerformer.ADMIN,\n          MCPAuthenticationPerformer.PER_USER,\n        ])\n        .required(\"Authentication performer is required\"),\n    otherwise: (schema) => schema.notRequired(),\n  }),\n  api_token: Yup.string().when([\"auth_type\", \"auth_performer\"], {\n    is: (auth_type: string, auth_performer: string) =>\n      auth_type === MCPAuthenticationType.API_TOKEN &&\n      auth_performer === MCPAuthenticationPerformer.ADMIN,\n    then: (schema) => schema.required(\"API token is required\"),\n    otherwise: (schema) => schema.notRequired(),\n  }),\n  oauth_client_id: Yup.string().when(\"auth_type\", {\n    is: MCPAuthenticationType.OAUTH,\n    then: (schema) => schema.notRequired(),\n    otherwise: (schema) => schema.notRequired(),\n  }),\n  oauth_client_secret: Yup.string().when(\"auth_type\", {\n    is: MCPAuthenticationType.OAUTH,\n    then: (schema) => schema.notRequired(),\n    otherwise: (schema) => schema.notRequired(),\n  }),\n});\n\nexport default function MCPAuthenticationModal({\n  mcpServer,\n  skipOverlay = false,\n  onTriggerFetchTools,\n  mutateMcpServers,\n}: MCPAuthenticationModalProps) {\n  const { isOpen, toggle } = useModal();\n  const [activeAuthTab, setActiveAuthTab] = useState<\"per-user\" | \"admin\">(\n    \"per-user\"\n  );\n  const [isSubmitting, setIsSubmitting] = useState(false);\n\n  // Check if OAuth is enabled for the Onyx instance\n  const authType = useAuthType();\n  const isOAuthEnabled =\n    authType === AuthType.OIDC || authType === AuthType.GOOGLE_OAUTH;\n\n  const redirectUri = useMemo(() => {\n    if (typeof window === \"undefined\") {\n      return \"https://{YOUR_DOMAIN}/mcp/oauth/callback\";\n    }\n    return `${window.location.origin}/mcp/oauth/callback`;\n  }, []);\n\n  // Get the current frontend URL for redirect URI\n  const { data: fullServer } = useSWR<MCPServer>(\n    mcpServer ? SWR_KEYS.adminMcpServer(mcpServer.id) : null,\n    errorHandlingFetcher\n  );\n\n  // Set the initial active tab based on the server configuration\n  useEffect(() => {\n    if (fullServer) {\n      if (\n        fullServer.auth_performer === MCPAuthenticationPerformer.ADMIN ||\n        fullServer.auth_type === MCPAuthenticationType.NONE\n      ) {\n        setActiveAuthTab(\"admin\");\n      } else {\n        setActiveAuthTab(\"per-user\");\n      }\n    }\n  }, [fullServer]);\n\n  // Helper function to determine transport from URL\n  const getTransportFromUrl = (url: string): MCPTransportType => {\n    const lowerUrl = url.toLowerCase();\n    if (lowerUrl.endsWith(\"sse\")) {\n      return MCPTransportType.SSE;\n    } else if (lowerUrl.endsWith(\"mcp\")) {\n      return MCPTransportType.STREAMABLE_HTTP;\n    }\n    // Default to STREAMABLE_HTTP\n    return MCPTransportType.STREAMABLE_HTTP;\n  };\n\n  const initialValues = useMemo<MCPAuthFormValues>(() => {\n    if (!fullServer) {\n      return {\n        transport: mcpServer?.server_url\n          ? getTransportFromUrl(mcpServer.server_url)\n          : MCPTransportType.STREAMABLE_HTTP,\n        auth_type: MCPAuthenticationType.OAUTH,\n        auth_performer: MCPAuthenticationPerformer.PER_USER,\n        api_token: \"\",\n        auth_template: {\n          headers: { Authorization: \"Bearer {api_key}\" },\n          required_fields: [\"api_key\"],\n        },\n        user_credentials: {},\n        oauth_client_id: \"\",\n        oauth_client_secret: \"\",\n      };\n    }\n\n    return {\n      transport: fullServer.server_url\n        ? getTransportFromUrl(fullServer.server_url)\n        : (fullServer.transport as MCPTransportType) ||\n          MCPTransportType.STREAMABLE_HTTP,\n      auth_type:\n        (fullServer.auth_type as MCPAuthenticationType) ||\n        MCPAuthenticationType.OAUTH,\n      auth_performer:\n        (fullServer.auth_performer as MCPAuthenticationPerformer) ||\n        MCPAuthenticationPerformer.PER_USER,\n      // Admin API Token\n      api_token: fullServer.admin_credentials?.api_key || \"\",\n      // OAuth Credentials\n      oauth_client_id: fullServer.admin_credentials?.client_id || \"\",\n      oauth_client_secret: fullServer.admin_credentials?.client_secret || \"\",\n      // Auth Template\n      auth_template: (fullServer.auth_template as MCPAuthTemplate) || {\n        headers: { Authorization: \"Bearer {api_key}\" },\n        required_fields: [\"api_key\"],\n      },\n      // User Credentials (substitutions)\n      user_credentials:\n        (fullServer.user_credentials as Record<string, string>) || {},\n    };\n  }, [fullServer, mcpServer?.server_url]);\n\n  const constructServerData = (values: MCPAuthFormValues) => {\n    if (!mcpServer) return null;\n    const authType = values.auth_type;\n\n    return {\n      name: mcpServer.name,\n      description: mcpServer.description || undefined,\n      server_url: mcpServer.server_url,\n      transport: values.transport,\n      auth_type: values.auth_type,\n      auth_performer: values.auth_performer,\n      api_token:\n        authType === MCPAuthenticationType.API_TOKEN &&\n        values.auth_performer === MCPAuthenticationPerformer.ADMIN\n          ? values.api_token\n          : undefined,\n      auth_template:\n        values.auth_performer === MCPAuthenticationPerformer.PER_USER &&\n        authType === MCPAuthenticationType.API_TOKEN\n          ? values.auth_template\n          : undefined,\n      admin_credentials:\n        values.auth_performer === MCPAuthenticationPerformer.PER_USER &&\n        authType === MCPAuthenticationType.API_TOKEN\n          ? values.user_credentials || {}\n          : undefined,\n      oauth_client_id:\n        authType === MCPAuthenticationType.OAUTH\n          ? values.oauth_client_id\n          : undefined,\n      oauth_client_secret:\n        authType === MCPAuthenticationType.OAUTH\n          ? values.oauth_client_secret\n          : undefined,\n      existing_server_id: mcpServer.id,\n    };\n  };\n\n  const handleSubmit = async (values: MCPAuthFormValues) => {\n    const serverData = constructServerData(values);\n    if (!serverData || !mcpServer) return;\n\n    setIsSubmitting(true);\n\n    try {\n      const authType = values.auth_type;\n      // Step 1: Save the authentication configuration to the MCP server\n      const { data: serverResult, error: serverError } =\n        await upsertMCPServer(serverData);\n\n      if (serverError || !serverResult) {\n        throw new Error(serverError || \"Failed to save server configuration\");\n      }\n\n      // Step 2: Update status to AWAITING_AUTH after successful config save\n      if (authType === MCPAuthenticationType.OAUTH) {\n        await updateMCPServerStatus(\n          mcpServer.id,\n          MCPServerStatus.AWAITING_AUTH\n        );\n      }\n\n      // Step 3: For OAuth, initiate the OAuth flow\n      if (authType === MCPAuthenticationType.OAUTH) {\n        const oauthResponse = await fetch(\"/api/admin/mcp/oauth/connect\", {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({\n            server_id: mcpServer.id.toString(),\n            oauth_client_id: values.oauth_client_id,\n            oauth_client_secret: values.oauth_client_secret,\n            return_path: `/admin/actions/mcp/?server_id=${mcpServer.id}&trigger_fetch=true`,\n            include_resource_param: true,\n          }),\n        });\n\n        if (!oauthResponse.ok) {\n          const error = await oauthResponse.json();\n          // Refresh server list so latest status is visible after auth failure\n          await mutateMcpServers();\n          toggle(false);\n          throw new Error(\"Failed to initiate OAuth: \" + error.detail);\n        }\n\n        const { oauth_url } = await oauthResponse.json();\n        window.location.href = oauth_url;\n      } else {\n        // For non-OAuth authentication, trigger tools fetch in-place (no hard navigation)\n        if (onTriggerFetchTools) {\n          onTriggerFetchTools(mcpServer.id);\n        } else {\n          // Fallback to previous behavior if parent didn't provide handler\n          window.location.href = `/admin/actions/mcp/?server_id=${mcpServer.id}&trigger_fetch=true`;\n        }\n        toggle(false);\n      }\n    } catch (error) {\n      console.error(\"Error saving authentication:\", error);\n      // Ensure UI reflects latest status after any auth/config failure\n      await mutateMcpServers();\n      toast.error(\n        error instanceof Error\n          ? error.message\n          : \"Failed to save authentication configuration\"\n      );\n    } finally {\n      setIsSubmitting(false);\n    }\n  };\n\n  return (\n    <Modal open={isOpen} onOpenChange={toggle}>\n      <Modal.Content width=\"sm\" height=\"lg\" skipOverlay={skipOverlay}>\n        <Modal.Header\n          icon={SvgArrowExchange}\n          title={`Authenticate ${mcpServer?.name || \"MCP Server\"}`}\n          description=\"Authenticate your connection to start using the MCP server.\"\n        />\n\n        <Formik<MCPAuthFormValues>\n          initialValues={initialValues}\n          validationSchema={validationSchema}\n          onSubmit={handleSubmit}\n          enableReinitialize\n        >\n          {({\n            values,\n            handleChange,\n            setFieldValue,\n            errors,\n            touched,\n            isValid,\n            dirty,\n          }) => {\n            // Auto-populate transport based on URL\n            useEffect(() => {\n              if (mcpServer?.server_url) {\n                const transport = getTransportFromUrl(mcpServer.server_url);\n                setFieldValue(\"transport\", transport);\n              }\n            }, [mcpServer?.server_url, setFieldValue]);\n\n            return (\n              <Form className=\"flex flex-col h-full\">\n                <Modal.Body>\n                  <div className=\"flex flex-col gap-4 p-2\">\n                    {/* Authentication Type */}\n                    <FormField\n                      name=\"auth_type\"\n                      state={\n                        errors.auth_type && touched.auth_type\n                          ? \"error\"\n                          : touched.auth_type\n                            ? \"success\"\n                            : \"idle\"\n                      }\n                    >\n                      <FormField.Label>Authentication Method</FormField.Label>\n                      <FormField.Control asChild>\n                        <InputSelect\n                          value={values.auth_type}\n                          onValueChange={(value) => {\n                            setFieldValue(\"auth_type\", value);\n                            // For OAuth + OAuth pass-through, we only support per-user auth\n                            if (\n                              value === MCPAuthenticationType.OAUTH ||\n                              value === MCPAuthenticationType.PT_OAUTH\n                            ) {\n                              setFieldValue(\n                                \"auth_performer\",\n                                MCPAuthenticationPerformer.PER_USER\n                              );\n                            } else if (\n                              value === MCPAuthenticationType.API_TOKEN\n                            ) {\n                              // Keep auth_performer in sync with the selected API token tab\n                              setFieldValue(\n                                \"auth_performer\",\n                                activeAuthTab === \"admin\"\n                                  ? MCPAuthenticationPerformer.ADMIN\n                                  : MCPAuthenticationPerformer.PER_USER\n                              );\n                            }\n                          }}\n                        >\n                          <InputSelect.Trigger\n                            placeholder=\"Select method\"\n                            data-testid=\"mcp-auth-method-select\"\n                          />\n                          <InputSelect.Content>\n                            <InputSelect.Item\n                              value={MCPAuthenticationType.OAUTH}\n                              description=\"Each user need to authenticate via OAuth with their own credentials.\"\n                            >\n                              OAuth\n                            </InputSelect.Item>\n                            {isOAuthEnabled && (\n                              <InputSelect.Item\n                                value={MCPAuthenticationType.PT_OAUTH}\n                                description=\"Forward the user's OAuth access token used to authenticate Onyx.\"\n                              >\n                                OAuth Pass-through\n                              </InputSelect.Item>\n                            )}\n                            <InputSelect.Item\n                              value={MCPAuthenticationType.API_TOKEN}\n                              description=\"Use per-user individual API key or organization-wide shared API key.\"\n                            >\n                              API Key\n                            </InputSelect.Item>\n                            <InputSelect.Item\n                              value={MCPAuthenticationType.NONE}\n                              description=\"Not Recommended\"\n                            >\n                              None\n                            </InputSelect.Item>\n                          </InputSelect.Content>\n                        </InputSelect>\n                      </FormField.Control>\n                      <FormField.Message\n                        messages={{\n                          error: errors.auth_type,\n                        }}\n                      />\n                    </FormField>\n                    <Separator className=\"py-0\" />\n                  </div>\n\n                  {/* OAuth Section */}\n                  {values.auth_type === MCPAuthenticationType.OAUTH && (\n                    <div className=\"flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12\">\n                      {/* OAuth Client ID */}\n                      <FormField\n                        name=\"oauth_client_id\"\n                        state={\n                          errors.oauth_client_id && touched.oauth_client_id\n                            ? \"error\"\n                            : touched.oauth_client_id\n                              ? \"success\"\n                              : \"idle\"\n                        }\n                      >\n                        <FormField.Label optional>Client ID</FormField.Label>\n                        <FormField.Control asChild>\n                          <InputTypeIn\n                            name=\"oauth_client_id\"\n                            value={values.oauth_client_id}\n                            onChange={handleChange}\n                            placeholder=\" \"\n                            showClearButton={false}\n                          />\n                        </FormField.Control>\n                        <FormField.Message\n                          messages={{\n                            error: errors.oauth_client_id,\n                          }}\n                        />\n                      </FormField>\n                      {/* OAuth Client Secret */}\n                      <FormField\n                        name=\"oauth_client_secret\"\n                        state={\n                          errors.oauth_client_secret &&\n                          touched.oauth_client_secret\n                            ? \"error\"\n                            : touched.oauth_client_secret\n                              ? \"success\"\n                              : \"idle\"\n                        }\n                      >\n                        <FormField.Label optional>\n                          Client Secret\n                        </FormField.Label>\n                        <FormField.Control asChild>\n                          <PasswordInputTypeIn\n                            name=\"oauth_client_secret\"\n                            value={values.oauth_client_secret}\n                            onChange={handleChange}\n                            placeholder=\" \"\n                            showClearButton={false}\n                          />\n                        </FormField.Control>\n                        <FormField.Message\n                          messages={{\n                            error: errors.oauth_client_secret,\n                          }}\n                        />\n                      </FormField>\n\n                      {/* Info Text */}\n                      <div className=\"flex flex-col gap-2\">\n                        <Text as=\"p\" text03 secondaryBody>\n                          Client ID and secret are optional if the server\n                          connection supports Dynamic Client Registration (DCR).\n                        </Text>\n                        <Text as=\"p\" text03 secondaryBody>\n                          If your server does not support DCR, you need register\n                          your Onyx instance with the server provider to obtain\n                          these credentials first. Make sure to grant Onyx\n                          necessary scopes/permissions for your actions.\n                        </Text>\n\n                        {/* Redirect URI */}\n                        <div className=\"flex items-center gap-1 w-full\">\n                          <Text\n                            as=\"p\"\n                            text03\n                            secondaryBody\n                            className=\"whitespace-nowrap\"\n                          >\n                            Use{\" \"}\n                            <span className=\"font-secondary-action\">\n                              redirect URI\n                            </span>\n                            :\n                          </Text>\n                          <Text\n                            as=\"p\"\n                            text04\n                            className=\"font-mono text-[12px] leading-[16px] truncate\"\n                          >\n                            {redirectUri}\n                          </Text>\n                          <CopyIconButton\n                            getCopyText={() => redirectUri}\n                            tooltip=\"Copy redirect URI\"\n                            prominence=\"tertiary\"\n                            size=\"sm\"\n                          />\n                        </div>\n                      </div>\n                    </div>\n                  )}\n\n                  {/* API Key Section with Tabs */}\n                  {values.auth_type === MCPAuthenticationType.API_TOKEN && (\n                    <div className=\"flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12\">\n                      <Tabs\n                        value={activeAuthTab}\n                        onValueChange={(value) => {\n                          setActiveAuthTab(value as \"per-user\" | \"admin\");\n                          // Update auth_performer based on tab selection\n                          setFieldValue(\n                            \"auth_performer\",\n                            value === \"per-user\"\n                              ? MCPAuthenticationPerformer.PER_USER\n                              : MCPAuthenticationPerformer.ADMIN\n                          );\n                        }}\n                      >\n                        <Tabs.List>\n                          <Tabs.Trigger value=\"per-user\">\n                            Individual Key (Per User)\n                          </Tabs.Trigger>\n                          <Tabs.Trigger value=\"admin\">\n                            Shared Key (Admin)\n                          </Tabs.Trigger>\n                        </Tabs.List>\n\n                        {/* Per-user Tab Content */}\n                        <Tabs.Content value=\"per-user\">\n                          <PerUserAuthConfig\n                            values={values}\n                            setFieldValue={setFieldValue}\n                          />\n                        </Tabs.Content>\n\n                        {/* Admin Tab Content */}\n                        <Tabs.Content value=\"admin\">\n                          <div className=\"flex flex-col gap-4 px-2 py-2 bg-background-tint-00 rounded-12\">\n                            <FormField\n                              name=\"api_token\"\n                              state={\n                                errors.api_token && touched.api_token\n                                  ? \"error\"\n                                  : touched.api_token\n                                    ? \"success\"\n                                    : \"idle\"\n                              }\n                            >\n                              <FormField.Label>API Key</FormField.Label>\n                              <FormField.Control asChild>\n                                <PasswordInputTypeIn\n                                  name=\"api_token\"\n                                  value={values.api_token}\n                                  onChange={handleChange}\n                                  placeholder=\"Shared API key for your organization\"\n                                  showClearButton={false}\n                                />\n                              </FormField.Control>\n                              <FormField.Description>\n                                Do not use your personal API key. Make sure this\n                                key is appropriate to share with everyone in\n                                your organization.\n                              </FormField.Description>\n                              <FormField.Message\n                                messages={{\n                                  error: errors.api_token,\n                                }}\n                              />\n                            </FormField>\n                          </div>\n                        </Tabs.Content>\n                      </Tabs>\n                    </div>\n                  )}\n                  {values.auth_type === MCPAuthenticationType.NONE && (\n                    <Message\n                      text=\"No authentication for this MCP server\"\n                      description=\"No authentication will be used for this connection. Make sure you trust this server. You are responsible for actions taken with this connection.\"\n                      default\n                      medium\n                      static\n                      className=\"w-full\"\n                      close={false}\n                    />\n                  )}\n                  {values.auth_type === MCPAuthenticationType.PT_OAUTH && (\n                    <Message\n                      text=\"Use pass-through for services with shared identity provider.\"\n                      description=\"Onyx will forward the user's OAuth access token directly to the server as an Authorization header. Make sure the server supports authentication with the same provider.\"\n                      default\n                      medium\n                      static\n                      className=\"w-full\"\n                      close={false}\n                    />\n                  )}\n                </Modal.Body>\n\n                <Modal.Footer>\n                  <Button\n                    prominence=\"tertiary\"\n                    type=\"button\"\n                    onClick={() => toggle(false)}\n                  >\n                    Cancel\n                  </Button>\n                  <Button\n                    disabled={!isValid || isSubmitting}\n                    type=\"submit\"\n                    data-testid=\"mcp-auth-connect-button\"\n                  >\n                    {isSubmitting ? \"Connecting...\" : \"Connect\"}\n                  </Button>\n                </Modal.Footer>\n              </Form>\n            );\n          }}\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/modals/OpenAPIAuthenticationModal.tsx",
    "content": "\"use client\";\n\nimport React, { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { Formik, Form, FormikHelpers } from \"formik\";\nimport * as Yup from \"yup\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport PasswordInputTypeIn from \"@/refresh-components/inputs/PasswordInputTypeIn\";\nimport { FormField } from \"@/refresh-components/form/FormField\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport KeyValueInput, {\n  KeyValue,\n} from \"@/refresh-components/inputs/InputKeyValue\";\nimport { OAuthConfig } from \"@/lib/tools/interfaces\";\nimport { getOAuthConfig } from \"@/lib/oauth/api\";\nimport { SvgArrowExchange } from \"@opal/icons\";\nimport { useAuthType } from \"@/lib/hooks\";\nimport { AuthType } from \"@/lib/constants\";\nimport Message from \"@/refresh-components/messages/Message\";\n\nexport type AuthMethod = \"oauth\" | \"custom-header\" | \"pt-oauth\";\n\nexport interface OpenAPIAuthFormValues {\n  authMethod: AuthMethod;\n  authorizationUrl: string;\n  tokenUrl: string;\n  clientId: string;\n  clientSecret: string;\n  scopes: string;\n  headers: KeyValue[];\n}\n\ninterface OpenAPIAuthenticationModalProps {\n  isOpen: boolean;\n  onClose: () => void;\n  title: string;\n  description?: string;\n  skipOverlay?: boolean;\n  defaultMethod?: AuthMethod;\n  oauthConfigId?: number | null;\n  initialHeaders?: KeyValue[] | null;\n  onConnect?: (values: OpenAPIAuthFormValues) => Promise<void> | void;\n  onSkip?: () => void;\n  entityName?: string | null;\n  passthroughOAuthEnabled?: boolean;\n}\n\nconst MASKED_CREDENTIAL_VALUE = \"********\";\n\nconst defaultValues: OpenAPIAuthFormValues = {\n  authMethod: \"oauth\",\n  authorizationUrl: \"\",\n  tokenUrl: \"\",\n  clientId: \"\",\n  clientSecret: \"\",\n  scopes: \"\",\n  headers: [\n    {\n      key: \"Authorization\",\n      value: \"\",\n    },\n  ],\n};\n\nexport default function OpenAPIAuthenticationModal({\n  isOpen,\n  onClose,\n  title,\n  description = \"Authenticate your connection to start using the OpenAPI actions.\",\n  skipOverlay = false,\n  defaultMethod = \"oauth\",\n  oauthConfigId = null,\n  initialHeaders = null,\n  passthroughOAuthEnabled = false,\n  onConnect,\n  onSkip,\n  entityName = null,\n}: OpenAPIAuthenticationModalProps) {\n  const authType = useAuthType();\n  const isOAuthEnabled =\n    authType === AuthType.OIDC || authType === AuthType.GOOGLE_OAUTH;\n  const [existingOAuthConfig, setExistingOAuthConfig] =\n    useState<OAuthConfig | null>(null);\n  const [isLoadingOAuthConfig, setIsLoadingOAuthConfig] = useState(false);\n  const [oauthConfigError, setOAuthConfigError] = useState<string | null>(null);\n\n  const isEditingOAuthConfig = Boolean(oauthConfigId);\n  const hasInitialHeaders =\n    Array.isArray(initialHeaders) && initialHeaders.length > 0;\n  const isEditMode = isEditingOAuthConfig || hasInitialHeaders;\n  const shouldDisableForm =\n    isEditingOAuthConfig &&\n    isLoadingOAuthConfig &&\n    !existingOAuthConfig &&\n    !oauthConfigError;\n\n  const redirectUri = useMemo(() => {\n    if (typeof window === \"undefined\") {\n      return \"https://{YOUR_DOMAIN}/oauth-config/callback\";\n    }\n    return `${window.location.origin}/oauth-config/callback`;\n  }, []);\n\n  useEffect(() => {\n    let isActive = true;\n\n    if (!isOpen || !oauthConfigId) {\n      setExistingOAuthConfig(null);\n      setOAuthConfigError(null);\n      setIsLoadingOAuthConfig(false);\n      return () => {\n        isActive = false;\n      };\n    }\n\n    const fetchConfig = async () => {\n      setIsLoadingOAuthConfig(true);\n      setOAuthConfigError(null);\n      try {\n        const config = await getOAuthConfig(oauthConfigId);\n        if (!isActive) {\n          return;\n        }\n        setExistingOAuthConfig(config);\n      } catch (error) {\n        console.error(\"Failed to load OAuth configuration\", error);\n        if (isActive) {\n          setExistingOAuthConfig(null);\n          setOAuthConfigError(\n            \"Failed to load existing OAuth configuration. Re-enter the details to update it.\"\n          );\n        }\n      } finally {\n        if (isActive) {\n          setIsLoadingOAuthConfig(false);\n        }\n      }\n    };\n\n    fetchConfig();\n\n    return () => {\n      isActive = false;\n    };\n  }, [isOpen, oauthConfigId]);\n\n  const dynamicValidationSchema = useMemo(\n    () =>\n      Yup.object({\n        authMethod: Yup.mixed<AuthMethod>()\n          .oneOf([\"oauth\", \"pt-oauth\", \"custom-header\"])\n          .required(\"Authentication method is required\"),\n        authorizationUrl: Yup.string()\n          .url(\"Enter a valid URL\")\n          .when(\"authMethod\", {\n            is: \"oauth\",\n            then: (schema) => schema.required(\"Authorization URL is required\"),\n            otherwise: (schema) => schema.notRequired(),\n          }),\n        tokenUrl: Yup.string()\n          .url(\"Enter a valid URL\")\n          .when(\"authMethod\", {\n            is: \"oauth\",\n            then: (schema) => schema.required(\"Token URL is required\"),\n            otherwise: (schema) => schema.notRequired(),\n          }),\n        clientId: Yup.string().when(\"authMethod\", {\n          is: \"oauth\",\n          then: (schema) =>\n            isEditingOAuthConfig\n              ? schema.optional()\n              : schema.required(\"Client ID is required\"),\n          otherwise: (schema) => schema.notRequired(),\n        }),\n        clientSecret: Yup.string().when(\"authMethod\", {\n          is: \"oauth\",\n          then: (schema) =>\n            isEditingOAuthConfig\n              ? schema.optional()\n              : schema.required(\"Client secret is required\"),\n          otherwise: (schema) => schema.notRequired(),\n        }),\n        scopes: Yup.string().notRequired(),\n        headers: Yup.array().when(\"authMethod\", {\n          is: \"custom-header\",\n          then: () =>\n            Yup.array()\n              .of(\n                Yup.object({\n                  key: Yup.string().required(\"Header key is required\"),\n                  value: Yup.string().required(\"Header value is required\"),\n                })\n              )\n              .min(1, \"Add at least one authentication header\"),\n          otherwise: () =>\n            Yup.array().of(\n              Yup.object({\n                key: Yup.string(),\n                value: Yup.string(),\n              })\n            ),\n        }),\n      }),\n    [isEditingOAuthConfig]\n  );\n\n  const computedInitialValues = useMemo<OpenAPIAuthFormValues>(() => {\n    const baseHeaders =\n      hasInitialHeaders && initialHeaders\n        ? initialHeaders.map((header) => ({ ...header }))\n        : defaultValues.headers.map((header) => ({ ...header }));\n\n    if (isEditingOAuthConfig) {\n      const shouldMaskCredentials = Boolean(\n        existingOAuthConfig?.has_client_credentials\n      );\n      return {\n        authMethod: \"oauth\",\n        authorizationUrl:\n          existingOAuthConfig?.authorization_url ||\n          defaultValues.authorizationUrl,\n        tokenUrl: existingOAuthConfig?.token_url || defaultValues.tokenUrl,\n        clientId: shouldMaskCredentials ? MASKED_CREDENTIAL_VALUE : \"\",\n        clientSecret: shouldMaskCredentials ? MASKED_CREDENTIAL_VALUE : \"\",\n        scopes: existingOAuthConfig?.scopes?.join(\", \") || \"\",\n        headers: baseHeaders,\n      };\n    }\n\n    if (hasInitialHeaders && initialHeaders) {\n      return {\n        ...defaultValues,\n        authMethod: \"custom-header\",\n        headers: baseHeaders,\n      };\n    }\n\n    if (passthroughOAuthEnabled) {\n      return {\n        ...defaultValues,\n        authMethod: \"pt-oauth\",\n      };\n    }\n\n    return {\n      ...defaultValues,\n      authMethod: defaultMethod,\n      headers: baseHeaders,\n    };\n  }, [\n    defaultMethod,\n    existingOAuthConfig,\n    hasInitialHeaders,\n    initialHeaders,\n    isEditingOAuthConfig,\n    passthroughOAuthEnabled,\n  ]);\n\n  const handleSubmit = useCallback(\n    async (\n      values: OpenAPIAuthFormValues,\n      formikHelpers: FormikHelpers<OpenAPIAuthFormValues>\n    ) => {\n      if (shouldDisableForm) {\n        formikHelpers.setSubmitting(false);\n        return;\n      }\n      const sanitizeCredentials = (\n        formValues: OpenAPIAuthFormValues\n      ): OpenAPIAuthFormValues => {\n        if (!isEditingOAuthConfig || formValues.authMethod !== \"oauth\") {\n          return formValues;\n        }\n\n        const sanitizeValue = (value: string) =>\n          value === MASKED_CREDENTIAL_VALUE ? \"\" : value;\n\n        return {\n          ...formValues,\n          clientId: sanitizeValue(formValues.clientId),\n          clientSecret: sanitizeValue(formValues.clientSecret),\n        };\n      };\n\n      try {\n        const sanitizedValues = sanitizeCredentials(values);\n        await onConnect?.(sanitizedValues);\n        onClose();\n      } finally {\n        formikHelpers.setSubmitting(false);\n      }\n    },\n    [onConnect, onClose, shouldDisableForm]\n  );\n\n  const handleSkip = useCallback(() => {\n    if (onSkip) {\n      onSkip();\n    } else {\n      onClose();\n    }\n  }, [onSkip, onClose]);\n\n  return (\n    <Modal\n      open={isOpen}\n      onOpenChange={(open) => {\n        if (!open) {\n          onClose();\n        }\n      }}\n    >\n      <Modal.Content width=\"sm\" height=\"lg\" skipOverlay={skipOverlay}>\n        <Modal.Header\n          icon={SvgArrowExchange}\n          title={title}\n          description={description}\n          onClose={onClose}\n        />\n\n        <Formik\n          initialValues={computedInitialValues}\n          validationSchema={dynamicValidationSchema}\n          validateOnMount\n          enableReinitialize\n          onSubmit={handleSubmit}\n        >\n          {({\n            values,\n            errors,\n            touched,\n            handleChange,\n            setFieldValue,\n            setFieldError,\n            isSubmitting,\n            isValid,\n            dirty,\n          }) => (\n            <Form className=\"flex flex-col h-full\">\n              <Modal.Body>\n                {oauthConfigError && (\n                  <div className=\"mb-3\">\n                    <Text\n                      as=\"p\"\n                      mainUiBody\n                      className=\"text-action-text-danger-05\"\n                    >\n                      {oauthConfigError}\n                    </Text>\n                  </div>\n                )}\n\n                {shouldDisableForm ? (\n                  <div className=\"flex min-h-[220px] items-center justify-center rounded-12 border border-border-01 bg-background-tint-00\">\n                    <Text as=\"p\" secondaryBody text03>\n                      Loading existing configuration...\n                    </Text>\n                  </div>\n                ) : (\n                  <>\n                    <div className=\"flex flex-col gap-4 px-2 pt-2\">\n                      <FormField\n                        name=\"authMethod\"\n                        state={\n                          errors.authMethod && touched.authMethod\n                            ? \"error\"\n                            : touched.authMethod\n                              ? \"success\"\n                              : \"idle\"\n                        }\n                      >\n                        <FormField.Label>Authentication Method</FormField.Label>\n                        <FormField.Control asChild>\n                          <InputSelect\n                            value={values.authMethod}\n                            onValueChange={(value) =>\n                              setFieldValue(\"authMethod\", value)\n                            }\n                          >\n                            <InputSelect.Trigger placeholder=\"Select method\" />\n                            <InputSelect.Content>\n                              <InputSelect.Item\n                                value=\"oauth\"\n                                description=\"Each user authenticates via OAuth with their own credentials.\"\n                              >\n                                OAuth\n                              </InputSelect.Item>\n                              {isOAuthEnabled && (\n                                <InputSelect.Item\n                                  value=\"pt-oauth\"\n                                  description=\"Forward the user's OAuth access token used to authenticate Onyx.\"\n                                >\n                                  OAuth Pass-through\n                                </InputSelect.Item>\n                              )}\n                              <InputSelect.Item\n                                value=\"custom-header\"\n                                description=\"Send custom headers with every request.\"\n                              >\n                                Custom Authorization Header\n                              </InputSelect.Item>\n                            </InputSelect.Content>\n                          </InputSelect>\n                        </FormField.Control>\n                        <FormField.Message\n                          messages={{\n                            error: errors.authMethod,\n                          }}\n                        />\n                      </FormField>\n                    </div>\n\n                    <Separator className=\"py-0\" />\n\n                    {values.authMethod === \"oauth\" && (\n                      <section className=\"flex flex-col gap-4 rounded-12 bg-background-tint-00 border border-border-01 p-4\">\n                        <FormField\n                          name=\"authorizationUrl\"\n                          state={\n                            errors.authorizationUrl && touched.authorizationUrl\n                              ? \"error\"\n                              : touched.authorizationUrl\n                                ? \"success\"\n                                : \"idle\"\n                          }\n                        >\n                          <FormField.Label>Authorization URL</FormField.Label>\n                          <FormField.Control asChild>\n                            <InputTypeIn\n                              name=\"authorizationUrl\"\n                              value={values.authorizationUrl}\n                              onChange={handleChange}\n                              placeholder=\"https://example.com/oauth/authorize\"\n                              showClearButton={false}\n                            />\n                          </FormField.Control>\n                          <FormField.Message\n                            messages={{\n                              error: errors.authorizationUrl,\n                            }}\n                          />\n                        </FormField>\n\n                        <FormField\n                          name=\"tokenUrl\"\n                          state={\n                            errors.tokenUrl && touched.tokenUrl\n                              ? \"error\"\n                              : touched.tokenUrl\n                                ? \"success\"\n                                : \"idle\"\n                          }\n                        >\n                          <FormField.Label>Token URL</FormField.Label>\n                          <FormField.Control asChild>\n                            <InputTypeIn\n                              name=\"tokenUrl\"\n                              value={values.tokenUrl}\n                              onChange={handleChange}\n                              placeholder=\"https://example.com/oauth/access_token\"\n                              showClearButton={false}\n                            />\n                          </FormField.Control>\n                          <FormField.Message\n                            messages={{\n                              error: errors.tokenUrl,\n                            }}\n                          />\n                        </FormField>\n\n                        <FormField\n                          name=\"clientId\"\n                          state={\n                            errors.clientId && touched.clientId\n                              ? \"error\"\n                              : touched.clientId\n                                ? \"success\"\n                                : \"idle\"\n                          }\n                        >\n                          <FormField.Label>OAuth Client ID</FormField.Label>\n                          <FormField.Control asChild>\n                            <InputTypeIn\n                              name=\"clientId\"\n                              value={values.clientId}\n                              onChange={handleChange}\n                              placeholder=\" \"\n                              showClearButton={false}\n                            />\n                          </FormField.Control>\n                          {isEditingOAuthConfig && (\n                            <FormField.Description>\n                              Leave blank to keep the current client ID.\n                            </FormField.Description>\n                          )}\n                          <FormField.Message\n                            messages={{\n                              error: errors.clientId,\n                            }}\n                          />\n                        </FormField>\n\n                        <FormField\n                          name=\"clientSecret\"\n                          state={\n                            errors.clientSecret && touched.clientSecret\n                              ? \"error\"\n                              : touched.clientSecret\n                                ? \"success\"\n                                : \"idle\"\n                          }\n                        >\n                          <FormField.Label>OAuth Client Secret</FormField.Label>\n                          <FormField.Control asChild>\n                            <PasswordInputTypeIn\n                              name=\"clientSecret\"\n                              value={values.clientSecret}\n                              onChange={handleChange}\n                              placeholder=\" \"\n                              showClearButton={false}\n                            />\n                          </FormField.Control>\n                          {isEditingOAuthConfig && (\n                            <FormField.Description>\n                              Leave blank to keep the current client secret.\n                            </FormField.Description>\n                          )}\n                          <FormField.Message\n                            messages={{\n                              error: errors.clientSecret,\n                            }}\n                          />\n                        </FormField>\n\n                        <FormField\n                          name=\"scopes\"\n                          state={\n                            errors.scopes && touched.scopes\n                              ? \"error\"\n                              : touched.scopes\n                                ? \"success\"\n                                : \"idle\"\n                          }\n                        >\n                          <FormField.Label>\n                            Scopes{\" \"}\n                            <span className=\"text-text-03\">(Optional)</span>\n                          </FormField.Label>\n                          <FormField.Control asChild>\n                            <InputTypeIn\n                              name=\"scopes\"\n                              value={values.scopes}\n                              onChange={handleChange}\n                              placeholder=\"e.g. repo, user\"\n                              showClearButton={false}\n                            />\n                          </FormField.Control>\n                          <FormField.Description>\n                            Comma-separated list of OAuth scopes to request.\n                          </FormField.Description>\n                          <FormField.Message\n                            messages={{\n                              error: errors.scopes,\n                            }}\n                          />\n                        </FormField>\n\n                        <div className=\"flex flex-col gap-3 rounded-12 bg-background-tint-01 p-3\">\n                          <Text as=\"p\" text03 secondaryBody>\n                            OAuth passthrough is only available if you enable\n                            OIDC or OAuth authentication.\n                          </Text>\n                          <div className=\"flex flex-col gap-2 w-full\">\n                            <Text\n                              as=\"p\"\n                              text03\n                              secondaryBody\n                              className=\"flex flex-wrap gap-1\"\n                            >\n                              Use{\" \"}\n                              <span className=\"font-secondary-action\">\n                                redirect URI\n                              </span>\n                              :\n                            </Text>\n                            <div className=\"flex items-center gap-2 rounded-08 border border-border-01 bg-background-tint-00 px-3 py-2\">\n                              <Text\n                                as=\"p\"\n                                text04\n                                className=\"font-mono text-[12px] leading-[16px] truncate flex-1\"\n                              >\n                                {redirectUri}\n                              </Text>\n                              <CopyIconButton\n                                getCopyText={() => redirectUri}\n                                tooltip=\"Copy redirect URI\"\n                                prominence=\"tertiary\"\n                                size=\"sm\"\n                              />\n                            </div>\n                          </div>\n                        </div>\n                      </section>\n                    )}\n                    {values.authMethod === \"custom-header\" && (\n                      <section className=\"flex flex-col gap-4 rounded-12 bg-background-tint-00 border border-border-01 p-4\">\n                        <div className=\"flex flex-col gap-2\">\n                          <Text as=\"p\" mainUiAction text04>\n                            Authentication Headers\n                          </Text>\n                          <Text as=\"p\" secondaryBody text03>\n                            Specify custom headers for all requests sent to this\n                            action&apos;s API endpoint.\n                          </Text>\n                        </div>\n                        <FormField\n                          name=\"headers\"\n                          state={errors.headers ? \"error\" : \"idle\"}\n                        >\n                          <FormField.Control asChild>\n                            <KeyValueInput\n                              keyTitle=\"Header\"\n                              valueTitle=\"Value\"\n                              items={values.headers}\n                              onChange={(items) =>\n                                setFieldValue(\"headers\", items)\n                              }\n                              addButtonLabel=\"Add Header\"\n                              onValidationError={(message) =>\n                                setFieldError(\"headers\", message || undefined)\n                              }\n                              layout=\"equal\"\n                            />\n                          </FormField.Control>\n                          <FormField.Message\n                            messages={{\n                              error:\n                                typeof errors.headers === \"string\"\n                                  ? errors.headers\n                                  : undefined,\n                            }}\n                          />\n                        </FormField>\n                      </section>\n                    )}\n                    {values.authMethod === \"pt-oauth\" && (\n                      <Message\n                        text=\"Use pass-through for services with shared identity provider.\"\n                        description=\"Onyx will forward the user's OAuth access token directly to the server as an Authorization header. Make sure the server supports authentication with the same provider.\"\n                        default\n                        medium\n                        static\n                        className=\"w-full\"\n                        close={false}\n                      />\n                    )}\n                  </>\n                )}\n              </Modal.Body>\n\n              <Modal.Footer>\n                <Button\n                  prominence=\"tertiary\"\n                  type=\"button\"\n                  onClick={handleSkip}\n                >\n                  Cancel\n                </Button>\n                <Button\n                  disabled={\n                    !isValid || isSubmitting || shouldDisableForm || !dirty\n                  }\n                  type=\"submit\"\n                >\n                  {isSubmitting ? \"Connecting...\" : \"Connect\"}\n                </Button>\n              </Modal.Footer>\n            </Form>\n          )}\n        </Formik>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/actions/skeleton/ActionCardSkeleton.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\ninterface ActionCardSkeletonProps {\n  className?: string;\n}\n\nconst ActionCardSkeleton: React.FC<ActionCardSkeletonProps> = ({\n  className,\n}) => {\n  return (\n    <div\n      className={cn(\n        \"w-full border border-border-01 rounded-16 bg-background-tint-00\",\n        className\n      )}\n      role=\"status\"\n      aria-label=\"Loading action card\"\n    >\n      <div className=\"flex flex-col w-full\">\n        {/* Header Section */}\n        <div className=\"flex items-start justify-between gap-2 p-3 w-full\">\n          {/* Left: Icon + Title / Description */}\n          <div className=\"flex gap-2 items-start flex-1 min-w-0 mr-2\">\n            {/* Icon */}\n            <div className=\"flex items-center px-0 py-0.5 shrink-0\">\n              <div className=\"h-7 w-7 rounded-12 bg-background-tint-02 animate-pulse\" />\n            </div>\n\n            {/* Title & Description */}\n            <div className=\"flex flex-col items-start flex-1 min-w-0 gap-2\">\n              <div className=\"h-4 w-1/3 rounded bg-background-tint-02 animate-pulse\" />\n              <div className=\"h-3 w-2/3 rounded bg-background-tint-02 animate-pulse\" />\n            </div>\n          </div>\n\n          {/* Right: Actions / View tools button */}\n          <div className=\"flex flex-col gap-2 items-end shrink-0\">\n            {/* Top row: icon buttons / status */}\n            <div className=\"flex items-center gap-2\">\n              <div className=\"h-8 w-8 rounded-full bg-background-tint-02 animate-pulse\" />\n              <div className=\"h-8 w-8 rounded-full bg-background-tint-02 animate-pulse\" />\n            </div>\n\n            {/* View tools button placeholder */}\n            <div className=\"h-8 w-32 rounded-full bg-background-tint-02 animate-pulse\" />\n          </div>\n        </div>\n      </div>\n    </div>\n  );\n};\n\nActionCardSkeleton.displayName = \"ActionCardSkeleton\";\n\nexport default ActionCardSkeleton;\n"
  },
  {
    "path": "web/src/sections/actions/skeleton/ToolItemSkeleton.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport { cn } from \"@/lib/utils\";\n\ninterface ToolItemSkeletonProps {\n  className?: string;\n}\n\nconst ToolItemSkeleton: React.FC<ToolItemSkeletonProps> = ({ className }) => {\n  return (\n    <div\n      className={cn(\n        \"flex items-start justify-between w-full p-2 rounded-08 border border-border-01 bg-background-tint-00\",\n        className\n      )}\n    >\n      {/* Left Section: Icon and Content */}\n      <div className=\"flex gap-1 items-start flex-1 min-w-0 pr-2\">\n        {/* Icon Container Skeleton */}\n        <div className=\"flex items-center justify-center shrink-0\">\n          <div className=\"h-5 w-5 rounded bg-background-tint-02 animate-pulse\" />\n        </div>\n\n        {/* Content Container */}\n        <div className=\"flex flex-col items-start flex-1 min-w-0 gap-1\">\n          {/* Tool Name Skeleton */}\n          <div className=\"flex items-center w-full min-h-[20px] px-0.5\">\n            <div className=\"h-4 w-1/3 rounded bg-background-tint-02 animate-pulse\" />\n          </div>\n\n          {/* Description Skeleton */}\n          <div className=\"px-0.5 w-full space-y-1\">\n            <div className=\"h-3 w-full rounded bg-background-tint-02 animate-pulse\" />\n            <div className=\"h-3 w-2/3 rounded bg-background-tint-02 animate-pulse\" />\n          </div>\n        </div>\n      </div>\n\n      {/* Right Section: Switch Skeleton */}\n      <div className=\"flex gap-2 items-start justify-end shrink-0\">\n        <div className=\"flex items-center justify-center gap-1 h-5 px-0.5 py-0.5\">\n          <div className=\"h-5 w-9 rounded-full bg-background-tint-02 animate-pulse\" />\n        </div>\n      </div>\n    </div>\n  );\n};\n\nToolItemSkeleton.displayName = \"ToolItemSkeleton\";\nexport default ToolItemSkeleton;\n"
  },
  {
    "path": "web/src/sections/admin/AdminListHeader.tsx",
    "content": "\"use client\";\n\nimport { Button, Card } from \"@opal/components\";\nimport { Content } from \"@opal/layouts\";\nimport { SvgPlusCircle } from \"@opal/icons\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\n\ninterface AdminListHeaderProps {\n  /** Whether items exist — controls search bar vs empty-state card. */\n  hasItems: boolean;\n  /** Current search query. */\n  searchQuery: string;\n  /** Called when the search query changes. */\n  onSearchQueryChange: (query: string) => void;\n  /** Search input placeholder. */\n  placeholder?: string;\n  /** Text shown in the empty-state card when no items exist. */\n  emptyStateText: string;\n  /** Called when the action button is clicked. */\n  onAction: () => void;\n  /** Label for the action button. */\n  actionLabel: string;\n}\n\n/**\n * AdminListHeader — the top bar for simple admin list pages.\n *\n * Handles two states:\n *\n * 1. **Items exist** (`hasItems = true`): renders a search input on the left\n *    with a primary action button on the right.\n * 2. **No items** (`hasItems = false`): renders a bordered card with\n *    descriptive text on the left and the same action button on the right.\n *\n * The action button always renders with a `SvgPlusCircle` right icon.\n *\n * Used on admin pages that have a flat list of items with no advanced\n * filtering — e.g. Service Accounts, Groups, OpenAPI Actions, MCP Servers.\n *\n * @example\n * ```tsx\n * <AdminListHeader\n *   hasItems={items.length > 0}\n *   searchQuery={search}\n *   onSearchQueryChange={setSearch}\n *   placeholder=\"Search service accounts...\"\n *   emptyStateText=\"Create service account API keys with user-level access.\"\n *   onAction={handleCreate}\n *   actionLabel=\"New Service Account\"\n * />\n * ```\n */\nexport default function AdminListHeader({\n  hasItems,\n  searchQuery,\n  onSearchQueryChange,\n  placeholder = \"Search...\",\n  emptyStateText,\n  onAction,\n  actionLabel,\n}: AdminListHeaderProps) {\n  const actionButton = (\n    <Button rightIcon={SvgPlusCircle} onClick={onAction}>\n      {actionLabel}\n    </Button>\n  );\n\n  if (!hasItems) {\n    return (\n      <Card rounding=\"lg\" border=\"solid\">\n        <div className=\"flex flex-row items-center justify-between gap-3\">\n          <Content\n            title={emptyStateText}\n            sizePreset=\"main-ui\"\n            variant=\"body\"\n            prominence=\"muted\"\n            widthVariant=\"fit\"\n          />\n          {actionButton}\n        </div>\n      </Card>\n    );\n  }\n\n  return (\n    <div className=\"flex flex-row gap-3 items-center px-2 pb-3\">\n      <InputTypeIn\n        variant=\"internal\"\n        leftSearchIcon\n        placeholder={placeholder}\n        value={searchQuery}\n        onChange={(e) => onSearchQueryChange(e.target.value)}\n        showClearButton={false}\n      />\n      {actionButton}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/admin/ProviderCard.tsx",
    "content": "\"use client\";\n\nimport type { IconFunctionComponent } from \"@opal/types\";\nimport { Button, SelectCard } from \"@opal/components\";\nimport { Content, CardHeaderLayout } from \"@opal/layouts\";\nimport {\n  SvgArrowExchange,\n  SvgArrowRightCircle,\n  SvgCheckSquare,\n  SvgSettings,\n  SvgUnplug,\n} from \"@opal/icons\";\n\n/**\n * ProviderCard — a stateful card for selecting / connecting / disconnecting\n * an external service provider (LLM, search engine, voice model, etc.).\n *\n * Built on opal `SelectCard` + `CardHeaderLayout`. Maps a three-state\n * status model to the `SelectCard` state system:\n *\n * | Status         | SelectCard state | Right action           |\n * |----------------|------------------|------------------------|\n * | `disconnected` | `empty`          | \"Connect\" button       |\n * | `connected`    | `filled`         | \"Set as Default\" button|\n * | `selected`     | `selected`       | \"Current Default\" label|\n *\n * Bottom-right actions (Disconnect, Edit) are always visible when the\n * provider is connected or selected.\n *\n * Used on admin configuration pages: Web Search, Image Generation,\n * Voice, and LLM Configuration.\n *\n * @example\n * ```tsx\n * <ProviderCard\n *   icon={SvgGlobe}\n *   title=\"Exa\"\n *   description=\"Exa.ai\"\n *   status=\"connected\"\n *   onConnect={() => openModal()}\n *   onSelect={() => setDefault(id)}\n *   onEdit={() => openEditModal()}\n *   onDisconnect={() => confirmDisconnect(id)}\n * />\n * ```\n */\n\ntype ProviderStatus = \"disconnected\" | \"connected\" | \"selected\";\n\ninterface ProviderCardProps {\n  icon: IconFunctionComponent;\n  title: string;\n  description: string;\n  status: ProviderStatus;\n  onConnect?: () => void;\n  onSelect?: () => void;\n  onDeselect?: () => void;\n  onEdit?: () => void;\n  onDisconnect?: () => void;\n  selectedLabel?: string;\n  \"aria-label\"?: string;\n}\n\nconst STATUS_TO_STATE = {\n  disconnected: \"empty\",\n  connected: \"filled\",\n  selected: \"selected\",\n} as const;\n\nexport default function ProviderCard({\n  icon,\n  title,\n  description,\n  status,\n  onConnect,\n  onSelect,\n  onDeselect,\n  onEdit,\n  onDisconnect,\n  selectedLabel = \"Current Default\",\n  \"aria-label\": ariaLabel,\n}: ProviderCardProps) {\n  const isDisconnected = status === \"disconnected\";\n  const isConnected = status === \"connected\";\n  const isSelected = status === \"selected\";\n\n  return (\n    <SelectCard\n      state={STATUS_TO_STATE[status]}\n      padding=\"sm\"\n      rounding=\"lg\"\n      aria-label={ariaLabel}\n      onClick={isDisconnected && onConnect ? onConnect : undefined}\n    >\n      <CardHeaderLayout\n        sizePreset=\"main-ui\"\n        variant=\"section\"\n        icon={icon}\n        title={title}\n        description={description}\n        rightChildren={\n          isDisconnected && onConnect ? (\n            <Button\n              prominence=\"tertiary\"\n              rightIcon={SvgArrowExchange}\n              onClick={(e) => {\n                e.stopPropagation();\n                onConnect();\n              }}\n            >\n              Connect\n            </Button>\n          ) : isConnected && onSelect ? (\n            <Button\n              prominence=\"tertiary\"\n              rightIcon={SvgArrowRightCircle}\n              onClick={(e) => {\n                e.stopPropagation();\n                onSelect();\n              }}\n            >\n              Set as Default\n            </Button>\n          ) : isSelected ? (\n            <div className=\"p-2\">\n              <Content\n                title={selectedLabel}\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n                icon={SvgCheckSquare}\n              />\n            </div>\n          ) : undefined\n        }\n        bottomRightChildren={\n          !isDisconnected ? (\n            <div className=\"flex flex-row px-1 pb-1\">\n              {onDisconnect && (\n                <Button\n                  icon={SvgUnplug}\n                  tooltip=\"Disconnect\"\n                  aria-label={`Disconnect ${title}`}\n                  prominence=\"tertiary\"\n                  onClick={(e) => {\n                    e.stopPropagation();\n                    onDisconnect();\n                  }}\n                  size=\"md\"\n                />\n              )}\n              {onEdit && (\n                <Button\n                  icon={SvgSettings}\n                  tooltip=\"Edit\"\n                  aria-label={`Edit ${title}`}\n                  prominence=\"tertiary\"\n                  onClick={(e) => {\n                    e.stopPropagation();\n                    onEdit();\n                  }}\n                  size=\"md\"\n                />\n              )}\n            </div>\n          ) : undefined\n        }\n      />\n    </SelectCard>\n  );\n}\n\nexport type { ProviderCardProps, ProviderStatus };\n"
  },
  {
    "path": "web/src/sections/cards/AgentCard.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useCallback } from \"react\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { Button } from \"@opal/components\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { usePinnedAgents, useAgent } from \"@/hooks/useAgents\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport {\n  checkUserOwnsAgent,\n  updateAgentSharedStatus,\n  updateAgentFeaturedStatus,\n} from \"@/lib/agents\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  SvgActions,\n  SvgBarChart,\n  SvgBubbleText,\n  SvgEdit,\n  SvgPin,\n  SvgPinned,\n  SvgShare,\n  SvgUser,\n} from \"@opal/icons\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport ShareAgentModal from \"@/sections/modals/ShareAgentModal\";\nimport AgentViewerModal from \"@/sections/modals/AgentViewerModal\";\nimport { toast } from \"@/hooks/useToast\";\nimport { CardItemLayout } from \"@/layouts/general-layouts\";\nimport { Content } from \"@opal/layouts\";\nimport { Interactive } from \"@opal/core\";\nimport { Card } from \"@/refresh-components/cards\";\n\nexport interface AgentCardProps {\n  agent: MinimalPersonaSnapshot;\n}\n\nexport default function AgentCard({ agent }: AgentCardProps) {\n  const route = useAppRouter();\n  const router = useRouter();\n  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();\n  const pinned = useMemo(\n    () => pinnedAgents.some((pinnedAgent) => pinnedAgent.id === agent.id),\n    [agent.id, pinnedAgents]\n  );\n  const { user, isAdmin, isCurator } = useUser();\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n  const canUpdateFeaturedStatus = isAdmin || isCurator;\n  const isOwnedByUser = checkUserOwnsAgent(user, agent);\n  const shareAgentModal = useCreateModal();\n  const agentViewerModal = useCreateModal();\n  const { agent: fullAgent, refresh: refreshAgent } = useAgent(agent.id);\n\n  // Start chat and auto-pin unpinned agents to the sidebar\n  const handleStartChat = useCallback(() => {\n    if (!pinned) {\n      togglePinnedAgent(agent, true);\n    }\n    route({ agentId: agent.id });\n  }, [pinned, togglePinnedAgent, agent, route]);\n\n  const handleShare = useCallback(\n    async (\n      userIds: string[],\n      groupIds: number[],\n      isPublic: boolean,\n      isFeatured: boolean,\n      labelIds: number[]\n    ) => {\n      const shareError = await updateAgentSharedStatus(\n        agent.id,\n        userIds,\n        groupIds,\n        isPublic,\n        isPaidEnterpriseFeaturesEnabled,\n        labelIds\n      );\n\n      if (shareError) {\n        toast.error(`Failed to share agent: ${shareError}`);\n        return;\n      }\n\n      if (canUpdateFeaturedStatus) {\n        const featuredError = await updateAgentFeaturedStatus(\n          agent.id,\n          isFeatured\n        );\n        if (featuredError) {\n          toast.error(`Failed to update featured status: ${featuredError}`);\n          refreshAgent();\n          return;\n        }\n      }\n\n      refreshAgent();\n      shareAgentModal.toggle(false);\n    },\n    [\n      agent.id,\n      canUpdateFeaturedStatus,\n      isPaidEnterpriseFeaturesEnabled,\n      refreshAgent,\n    ]\n  );\n\n  return (\n    <>\n      <shareAgentModal.Provider>\n        <ShareAgentModal\n          agentId={agent.id}\n          userIds={fullAgent?.users?.map((u) => u.id) ?? []}\n          groupIds={fullAgent?.groups ?? []}\n          isPublic={fullAgent?.is_public ?? false}\n          isFeatured={fullAgent?.is_featured ?? false}\n          labelIds={fullAgent?.labels?.map((l) => l.id) ?? []}\n          onShare={handleShare}\n        />\n      </shareAgentModal.Provider>\n\n      <agentViewerModal.Provider>\n        {fullAgent && <AgentViewerModal agent={fullAgent} />}\n      </agentViewerModal.Provider>\n\n      <Interactive.Simple\n        onClick={() => agentViewerModal.toggle(true)}\n        group=\"group/AgentCard\"\n      >\n        <Card\n          padding={0}\n          gap={0}\n          height=\"full\"\n          className=\"radial-00 hover:shadow-00\"\n        >\n          <div className=\"flex self-stretch h-[6rem]\">\n            <CardItemLayout\n              icon={(props) => <AgentAvatar agent={agent} {...props} />}\n              title={agent.name}\n              description={agent.description}\n              rightChildren={\n                <>\n                  {isOwnedByUser && isPaidEnterpriseFeaturesEnabled && (\n                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n                    <IconButton\n                      icon={SvgBarChart}\n                      tertiary\n                      onClick={noProp(() =>\n                        router.push(`/ee/agents/stats/${agent.id}` as Route)\n                      )}\n                      tooltip=\"View Agent Stats\"\n                      className=\"hidden group-hover/AgentCard:flex\"\n                    />\n                  )}\n                  {isOwnedByUser && (\n                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n                    <IconButton\n                      icon={SvgEdit}\n                      tertiary\n                      onClick={noProp(() =>\n                        router.push(`/app/agents/edit/${agent.id}` as Route)\n                      )}\n                      tooltip=\"Edit Agent\"\n                      className=\"hidden group-hover/AgentCard:flex\"\n                    />\n                  )}\n                  {isOwnedByUser && (\n                    // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n                    <IconButton\n                      icon={SvgShare}\n                      tertiary\n                      onClick={noProp(() => shareAgentModal.toggle(true))}\n                      tooltip=\"Share Agent\"\n                      className=\"hidden group-hover/AgentCard:flex\"\n                    />\n                  )}\n                  {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                  <IconButton\n                    icon={pinned ? SvgPinned : SvgPin}\n                    tertiary\n                    onClick={noProp(() => togglePinnedAgent(agent, !pinned))}\n                    tooltip={pinned ? \"Unpin from Sidebar\" : \"Pin to Sidebar\"}\n                    className={cn(\n                      !pinned && \"hidden group-hover/AgentCard:flex\"\n                    )}\n                  />\n                </>\n              }\n            />\n          </div>\n\n          {/* Footer section - bg-background-tint-01 */}\n          <div className=\"bg-background-tint-01 p-1 flex flex-row items-end justify-between w-full\">\n            {/* Left side - creator and actions */}\n            <div className=\"flex flex-col gap-1 py-1 px-2\">\n              <Content\n                icon={SvgUser}\n                title={agent.owner?.email || \"Onyx\"}\n                sizePreset=\"secondary\"\n                variant=\"body\"\n                prominence=\"muted\"\n              />\n              <Content\n                icon={SvgActions}\n                title={\n                  agent.tools.length > 0\n                    ? `${agent.tools.length} Action${\n                        agent.tools.length > 1 ? \"s\" : \"\"\n                      }`\n                    : \"No Actions\"\n                }\n                sizePreset=\"secondary\"\n                variant=\"body\"\n                prominence=\"muted\"\n              />\n            </div>\n\n            {/* Right side - Start Chat button */}\n            <div className=\"p-0.5\">\n              <Button\n                prominence=\"tertiary\"\n                rightIcon={SvgBubbleText}\n                onClick={noProp(handleStartChat)}\n              >\n                Start Chat\n              </Button>\n            </div>\n          </div>\n        </Card>\n      </Interactive.Simple>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/cards/DocumentSetCard.tsx",
    "content": "\"use client\";\n\nimport { DocumentSetSummary } from \"@/lib/types\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport SimpleTooltip from \"@/refresh-components/SimpleTooltip\";\nimport { SvgFiles } from \"@opal/icons\";\nimport { Interactive } from \"@opal/core\";\nimport { AttachmentItemLayout } from \"@/layouts/general-layouts\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\nexport interface DocumentSetCardProps {\n  documentSet: DocumentSetSummary;\n  isSelected?: boolean;\n  onSelectToggle?: (isSelected: boolean) => void;\n  disabled?: boolean;\n  disabledTooltip?: string;\n}\n\nexport default function DocumentSetCard({\n  documentSet,\n  isSelected,\n  onSelectToggle,\n  disabled,\n  disabledTooltip,\n}: DocumentSetCardProps) {\n  return (\n    <SimpleTooltip\n      tooltip={disabled && disabledTooltip ? disabledTooltip : undefined}\n      disabled={!disabled || !disabledTooltip}\n    >\n      <div className=\"max-w-[12rem]\">\n        <Interactive.Simple\n          onClick={\n            disabled || isSelected === undefined\n              ? undefined\n              : () => onSelectToggle?.(!isSelected)\n          }\n        >\n          <Interactive.Container\n            data-testid={`document-set-card-${documentSet.id}`}\n            border\n            heightVariant=\"fit\"\n          >\n            <AttachmentItemLayout\n              icon={SvgFiles}\n              title={documentSet.name}\n              description={documentSet.description}\n              rightChildren={\n                isSelected === undefined ? undefined : (\n                  <div onClick={(e) => e.stopPropagation()}>\n                    <Checkbox\n                      checked={isSelected}\n                      disabled={disabled}\n                      onCheckedChange={\n                        disabled\n                          ? undefined\n                          : () => onSelectToggle?.(!isSelected)\n                      }\n                    />\n                  </div>\n                )\n              }\n            />\n            <Spacer horizontal rem={0.5} />\n          </Interactive.Container>\n        </Interactive.Simple>\n      </div>\n    </SimpleTooltip>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/cards/FileCard.tsx",
    "content": "\"use client\";\n\nimport { useMemo, useState } from \"react\";\nimport type { ProjectFile } from \"@/app/app/projects/projectsService\";\nimport { UserFileStatus } from \"@/app/app/projects/projectsService\";\nimport { cn, isImageFile } from \"@/lib/utils\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { SvgFileText, SvgX } from \"@opal/icons\";\nimport { Interactive, Hoverable } from \"@opal/core\";\nimport { AttachmentItemLayout } from \"@/layouts/general-layouts\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\ninterface RemovableProps {\n  onRemove?: () => void;\n  children: React.ReactNode;\n}\n\nfunction Removable({ onRemove, children }: RemovableProps) {\n  if (!onRemove) {\n    return <>{children}</>;\n  }\n\n  return (\n    <Hoverable.Root group=\"fileCard\" widthVariant=\"fit\">\n      <div className=\"relative\">\n        <div\n          className={cn(\n            \"absolute -left-2 -top-2 z-10\",\n            \"pointer-events-none focus-within:pointer-events-auto\"\n          )}\n        >\n          <Hoverable.Item group=\"fileCard\" variant=\"opacity-on-hover\">\n            <button\n              type=\"button\"\n              onClick={(e) => {\n                e.stopPropagation();\n                onRemove();\n              }}\n              title=\"Remove\"\n              aria-label=\"Remove\"\n              className={cn(\n                \"h-4 w-4\",\n                \"flex items-center justify-center\",\n                \"rounded-04 border border-border text-[11px]\",\n                \"bg-background-neutral-inverted-01 text-text-inverted-05 shadow-sm\",\n                \"pointer-events-auto\",\n                \"hover:opacity-90\"\n              )}\n            >\n              <SvgX className=\"h-3 w-3 stroke-text-inverted-03\" />\n            </button>\n          </Hoverable.Item>\n        </div>\n        {children}\n      </div>\n    </Hoverable.Root>\n  );\n}\n\ninterface ImageFileCardProps {\n  file: ProjectFile;\n  imageUrl: string | null;\n  removeFile?: (fileId: string) => void;\n  onFileClick?: (file: ProjectFile) => void;\n  isProcessing?: boolean;\n  compact?: boolean;\n}\nfunction ImageFileCard({\n  file,\n  imageUrl,\n  removeFile,\n  onFileClick,\n  isProcessing = false,\n  compact = false,\n}: ImageFileCardProps) {\n  const sizeClass = compact ? \"h-11 w-11\" : \"h-20 w-20\";\n  const loaderSize = compact ? \"h-5 w-5\" : \"h-8 w-8\";\n  const iconSize = compact ? \"h-5 w-5\" : \"h-8 w-8\";\n  const [imgError, setImgError] = useState(false);\n\n  const doneUploading = String(file.status) !== UserFileStatus.UPLOADING;\n\n  return (\n    <Removable\n      onRemove={\n        removeFile && doneUploading ? () => removeFile(file.id) : undefined\n      }\n    >\n      <div\n        className={cn(\n          sizeClass,\n          \"rounded-08 border border-border-01\",\n          isProcessing && \"bg-background-neutral-02\",\n          onFileClick && !isProcessing && \"cursor-pointer hover:opacity-90\"\n        )}\n        onClick={() => {\n          if (onFileClick && !isProcessing) {\n            onFileClick(file);\n          }\n        }}\n      >\n        {!doneUploading || !imageUrl ? (\n          <div className=\"h-full w-full flex items-center justify-center\">\n            <SimpleLoader className={loaderSize} />\n          </div>\n        ) : imgError ? (\n          <div className=\"h-full w-full flex items-center justify-center\">\n            <SvgFileText className={iconSize} />\n          </div>\n        ) : (\n          <img\n            src={imageUrl}\n            alt={file.name}\n            className=\"h-full w-full object-cover rounded-08\"\n            onError={() => setImgError(true)}\n          />\n        )}\n      </div>\n    </Removable>\n  );\n}\n\nexport interface FileCardProps {\n  file: ProjectFile;\n  removeFile?: (fileId: string) => void;\n  hideProcessingState?: boolean;\n  onFileClick?: (file: ProjectFile) => void;\n  compactImages?: boolean;\n}\nexport function FileCard({\n  file,\n  removeFile,\n  hideProcessingState = false,\n  onFileClick,\n  compactImages = false,\n}: FileCardProps) {\n  const typeLabel = useMemo(() => {\n    const name = String(file.name || \"\");\n    const lastDotIndex = name.lastIndexOf(\".\");\n    if (lastDotIndex <= 0 || lastDotIndex === name.length - 1) {\n      return \"\";\n    }\n    return name.slice(lastDotIndex + 1).toUpperCase();\n  }, [file.name]);\n\n  const isImage = useMemo(() => {\n    return isImageFile(file.name);\n  }, [file.name]);\n\n  const imageUrl = useMemo(() => {\n    if (isImage && file.file_id) {\n      return `/api/chat/file/${file.file_id}`;\n    }\n    return null;\n  }, [isImage, file.file_id]);\n\n  const isActuallyProcessing =\n    String(file.status) === UserFileStatus.UPLOADING ||\n    String(file.status) === UserFileStatus.PROCESSING;\n\n  // When hideProcessingState is true, we treat processing files as completed for display purposes\n  const isProcessing = hideProcessingState ? false : isActuallyProcessing;\n\n  const doneUploading = String(file.status) !== UserFileStatus.UPLOADING;\n\n  // For images, always show the larger preview layout (even while processing)\n  if (isImage) {\n    return (\n      <ImageFileCard\n        file={file}\n        imageUrl={imageUrl}\n        removeFile={removeFile}\n        onFileClick={onFileClick}\n        isProcessing={isProcessing}\n        compact={compactImages}\n      />\n    );\n  }\n\n  return (\n    <Removable\n      onRemove={\n        removeFile && doneUploading ? () => removeFile(file.id) : undefined\n      }\n    >\n      <div className=\"min-w-0 max-w-[12rem]\">\n        <Interactive.Container border heightVariant=\"fit\">\n          <div className=\"[&_.opal-content-md-title-row]:min-w-0 [&_.opal-content-md-title]:break-all\">\n            <AttachmentItemLayout\n              icon={isProcessing ? SimpleLoader : SvgFileText}\n              title={file.name}\n              description={\n                isProcessing\n                  ? file.status === UserFileStatus.UPLOADING\n                    ? \"Uploading...\"\n                    : \"Processing...\"\n                  : typeLabel\n              }\n            />\n          </div>\n          <Spacer horizontal rem={0.5} />\n        </Interactive.Container>\n      </div>\n    </Removable>\n  );\n}\n\n// Skeleton loading component for file cards\nexport function FileCardSkeleton() {\n  return (\n    <div className=\"min-w-[120px] max-w-[240px] h-11 rounded-08 bg-background-tint-02 animate-pulse\" />\n  );\n}\n"
  },
  {
    "path": "web/src/sections/cards/README.md",
    "content": "# Cards\n\nThis directory contains feature-specific card components.\n\nCards are self-contained UI components that display information about a specific entity (e.g., an agent, a document set, a connector) in a visually distinct, bounded container. They typically include:\n\n- Entity identification (name, avatar, icon)\n- Summary information\n- Quick actions (buttons, menus)\n\n## Guidelines\n\n- Each card should be focused on a single entity type\n- Cards should be reusable across different pages/contexts\n- Keep card-specific logic within the card component\n- Use shared components from `@/refresh-components` for common UI elements\n"
  },
  {
    "path": "web/src/sections/chat/ChatScrollContainer.tsx",
    "content": "\"use client\";\n\nimport React, {\n  ForwardedRef,\n  useCallback,\n  useEffect,\n  useImperativeHandle,\n  useRef,\n  useState,\n} from \"react\";\nimport { ScrollContainerProvider } from \"@/components/chat/ScrollContainerContext\";\nimport { cn } from \"@/lib/utils\";\n\n// Size constants\nconst DEFAULT_ANCHOR_OFFSET_PX = 16; // 1rem\nconst DEFAULT_FADE_THRESHOLD_PX = 80; // 5rem\nconst DEFAULT_BUTTON_THRESHOLD_PX = 32; // 2rem\n\n// Fade configuration\nconst TOP_FADE_HEIGHT = \"1rem\";\nconst BOTTOM_FADE_HEIGHT = \"1rem\";\n\nexport interface ScrollState {\n  isAtBottom: boolean;\n  hasContentAbove: boolean;\n  hasContentBelow: boolean;\n}\n\nexport interface ChatScrollContainerHandle {\n  scrollToBottom: (behavior?: ScrollBehavior) => void;\n}\n\nexport interface ChatScrollContainerProps {\n  children: React.ReactNode;\n\n  /**\n   * CSS selector for the anchor element (e.g., \"#message-123\")\n   * Used to scroll to a specific message position\n   */\n  anchorSelector?: string;\n\n  /** Enable auto-scroll behavior (follow new content) */\n  autoScroll?: boolean;\n\n  /** Whether content is currently streaming (affects scroll button visibility) */\n  isStreaming?: boolean;\n\n  /** Callback when scroll button visibility should change */\n  onScrollButtonVisibilityChange?: (visible: boolean) => void;\n\n  /** Session ID - resets scroll state when changed */\n  sessionId?: string;\n\n  /** Hide the scrollbar (scroll still works, just invisible) */\n  hideScrollbar?: boolean;\n}\n\n// Build a CSS mask that fades content opacity at top/bottom edges\nfunction buildContentMask(): string {\n  // Mask uses black = visible, transparent = hidden\n  // Top: fades from transparent to visible over 1rem\n  // Bottom: fades from visible to transparent over 1rem\n  return `linear-gradient(to bottom, transparent 0%, transparent 0rem, black ${TOP_FADE_HEIGHT}, black calc(100% - ${BOTTOM_FADE_HEIGHT}), transparent 100%)`;\n}\n\nconst ChatScrollContainer = React.memo(\n  React.forwardRef(\n    (\n      {\n        children,\n        anchorSelector,\n        autoScroll = true,\n        isStreaming = false,\n        onScrollButtonVisibilityChange,\n        sessionId,\n        hideScrollbar = false,\n      }: ChatScrollContainerProps,\n      ref: ForwardedRef<ChatScrollContainerHandle>\n    ) => {\n      const anchorOffsetPx = DEFAULT_ANCHOR_OFFSET_PX;\n      const fadeThresholdPx = DEFAULT_FADE_THRESHOLD_PX;\n      const buttonThresholdPx = DEFAULT_BUTTON_THRESHOLD_PX;\n      const scrollContainerRef = useRef<HTMLDivElement>(null);\n      const contentWrapperRef = useRef<HTMLDivElement>(null);\n      const spacerHeightRef = useRef(0);\n      const endDivRef = useRef<HTMLDivElement>(null);\n      const scrolledForSessionRef = useRef<string | null>(null);\n      const prevAnchorSelectorRef = useRef<string | null>(null);\n\n      const [hasContentAbove, setHasContentAbove] = useState(false);\n      const [hasContentBelow, setHasContentBelow] = useState(false);\n      const [isAtBottom, setIsAtBottom] = useState(true);\n      const isAtBottomRef = useRef(true); // Ref for use in callbacks\n      const isAutoScrollingRef = useRef(false); // Prevent handleScroll from interfering during auto-scroll\n      const prevScrollTopRef = useRef(0); // Track scroll position to detect scroll direction\n      const [isScrollReady, setIsScrollReady] = useState(false);\n\n      // Use refs for values that change during streaming to prevent effect re-runs\n      const onScrollButtonVisibilityChangeRef = useRef(\n        onScrollButtonVisibilityChange\n      );\n      onScrollButtonVisibilityChangeRef.current =\n        onScrollButtonVisibilityChange;\n      const autoScrollRef = useRef(autoScroll);\n      autoScrollRef.current = autoScroll;\n      const isStreamingRef = useRef(isStreaming);\n      isStreamingRef.current = isStreaming;\n\n      // Get current scroll state\n      const getScrollState = useCallback((): ScrollState => {\n        const container = scrollContainerRef.current;\n        if (!container || !endDivRef.current) {\n          return {\n            isAtBottom: true,\n            hasContentAbove: false,\n            hasContentBelow: false,\n          };\n        }\n\n        // Exclude the dynamic spacer — it's cosmetic (push-up effect) and\n        // shouldn't make the system think there's real content below the viewport.\n        const contentEnd =\n          endDivRef.current.offsetTop - spacerHeightRef.current;\n        const viewportBottom = container.scrollTop + container.clientHeight;\n        const contentBelowViewport = contentEnd - viewportBottom;\n\n        return {\n          isAtBottom: contentBelowViewport <= buttonThresholdPx,\n          hasContentAbove: container.scrollTop > fadeThresholdPx,\n          hasContentBelow: contentBelowViewport > fadeThresholdPx,\n        };\n      }, [buttonThresholdPx, fadeThresholdPx]);\n\n      // Update scroll state and notify parent about button visibility\n      const updateScrollState = useCallback(() => {\n        const state = getScrollState();\n        setIsAtBottom(state.isAtBottom);\n        isAtBottomRef.current = state.isAtBottom; // Keep ref in sync\n        setHasContentAbove(state.hasContentAbove);\n        setHasContentBelow(state.hasContentBelow);\n\n        // Show button when user is not at bottom (e.g., scrolled up)\n        onScrollButtonVisibilityChangeRef.current?.(!state.isAtBottom);\n      }, [getScrollState]);\n\n      // Scroll to bottom of content\n      const scrollToBottom = useCallback(\n        (behavior: ScrollBehavior = \"smooth\") => {\n          const container = scrollContainerRef.current;\n          if (!container || !endDivRef.current) return;\n\n          // Mark as auto-scrolling to prevent handleScroll interference\n          isAutoScrollingRef.current = true;\n\n          // Use scrollTo instead of scrollIntoView for better cross-browser support\n          const targetScrollTop =\n            container.scrollHeight - container.clientHeight;\n          container.scrollTo({ top: targetScrollTop, behavior });\n\n          // Update tracking refs\n          prevScrollTopRef.current = targetScrollTop;\n          isAtBottomRef.current = true;\n\n          // For smooth scrolling, keep isAutoScrollingRef true longer\n          if (behavior === \"smooth\") {\n            // Clear after animation likely completes (Safari smooth scroll is ~500ms)\n            setTimeout(() => {\n              isAutoScrollingRef.current = false;\n              if (container) {\n                prevScrollTopRef.current = container.scrollTop;\n              }\n              // Refresh scroll state so the scroll-to-bottom button hides\n              updateScrollState();\n            }, 600);\n          } else {\n            isAutoScrollingRef.current = false;\n          }\n        },\n        [updateScrollState]\n      );\n\n      // Expose scrollToBottom via ref\n      useImperativeHandle(ref, () => ({ scrollToBottom }), [scrollToBottom]);\n\n      // Re-evaluate button visibility when at-bottom state changes\n      useEffect(() => {\n        onScrollButtonVisibilityChangeRef.current?.(!isAtBottom);\n      }, [isAtBottom]);\n\n      // Handle scroll events (user scrolls)\n      const handleScroll = useCallback(() => {\n        const container = scrollContainerRef.current;\n        if (!container) return;\n\n        // Skip if this scroll was triggered by auto-scroll\n        if (isAutoScrollingRef.current) return;\n\n        const currentScrollTop = container.scrollTop;\n        const scrolledUp = currentScrollTop < prevScrollTopRef.current - 5; // 5px threshold to ignore micro-movements\n        prevScrollTopRef.current = currentScrollTop;\n\n        // Only update isAtBottomRef when user explicitly scrolls UP\n        // This prevents content growth or programmatic scrolls from disabling auto-scroll\n        if (scrolledUp) {\n          updateScrollState();\n        } else {\n          // Still update fade overlays, but preserve isAtBottomRef\n          const state = getScrollState();\n          setHasContentAbove(state.hasContentAbove);\n          setHasContentBelow(state.hasContentBelow);\n          // Update button visibility based on actual position\n          onScrollButtonVisibilityChangeRef.current?.(!state.isAtBottom);\n        }\n      }, [updateScrollState, getScrollState]);\n\n      // Watch for content changes (MutationObserver + ResizeObserver)\n      useEffect(() => {\n        const container = scrollContainerRef.current;\n        if (!container) return;\n\n        let rafId: number | null = null;\n\n        const onContentChange = () => {\n          if (rafId) return;\n          rafId = requestAnimationFrame(() => {\n            rafId = null;\n\n            // Capture whether we were at bottom BEFORE content changed\n            const wasAtBottom = isAtBottomRef.current;\n\n            // Auto-scroll: follow content if we were at bottom.\n            // Skip instant auto-scroll during DynamicBottomSpacer's smooth\n            // scroll to avoid competing scroll commands.\n            if (\n              autoScrollRef.current &&\n              wasAtBottom &&\n              container.dataset.smoothScrollActive !== \"true\"\n            ) {\n              // scrollToBottom handles isAutoScrollingRef and ref updates\n              scrollToBottom(\"instant\");\n            }\n\n            updateScrollState();\n          });\n        };\n\n        // MutationObserver for content changes\n        const mutationObserver = new MutationObserver(onContentChange);\n        mutationObserver.observe(container, {\n          childList: true,\n          subtree: true,\n          characterData: true,\n        });\n\n        // ResizeObserver for container size changes\n        const resizeObserver = new ResizeObserver(onContentChange);\n        resizeObserver.observe(container);\n\n        return () => {\n          mutationObserver.disconnect();\n          resizeObserver.disconnect();\n          if (rafId) cancelAnimationFrame(rafId);\n        };\n      }, [updateScrollState, scrollToBottom]);\n\n      // Handle session changes and anchor changes\n      useEffect(() => {\n        const container = scrollContainerRef.current;\n        if (!container) return;\n\n        const isNewSession =\n          scrolledForSessionRef.current !== null &&\n          scrolledForSessionRef.current !== sessionId;\n        const isNewAnchor = prevAnchorSelectorRef.current !== anchorSelector;\n\n        // Reset on session change\n        if (isNewSession) {\n          scrolledForSessionRef.current = null;\n          setIsScrollReady(false);\n          prevScrollTopRef.current = 0;\n          isAtBottomRef.current = true;\n        }\n\n        const shouldScroll =\n          (scrolledForSessionRef.current !== sessionId || isNewAnchor) &&\n          anchorSelector;\n\n        if (!shouldScroll) {\n          prevAnchorSelectorRef.current = anchorSelector ?? null;\n          return;\n        }\n\n        const anchorElement = container.querySelector(\n          anchorSelector!\n        ) as HTMLElement;\n        if (!anchorElement || !endDivRef.current) {\n          setIsScrollReady(true);\n          scrolledForSessionRef.current = sessionId ?? null;\n          prevAnchorSelectorRef.current = anchorSelector ?? null;\n          return;\n        }\n\n        // Determine scroll behavior\n        // New session with existing content = instant, new anchor = smooth\n        const isLoadingExistingContent =\n          isNewSession || scrolledForSessionRef.current === null;\n        const behavior: ScrollBehavior = isLoadingExistingContent\n          ? \"instant\"\n          : \"smooth\";\n\n        // Defer scroll to next tick for layout to settle\n        const timeoutId = setTimeout(() => {\n          let targetScrollTop: number;\n\n          // When loading an existing conversation, scroll to bottom\n          // Otherwise (e.g., anchor change during conversation), scroll to anchor\n          if (isLoadingExistingContent) {\n            targetScrollTop = container.scrollHeight - container.clientHeight;\n          } else {\n            targetScrollTop = Math.max(\n              0,\n              anchorElement.offsetTop - anchorOffsetPx\n            );\n          }\n\n          container.scrollTo({ top: targetScrollTop, behavior });\n\n          // Update prevScrollTopRef so scroll direction is measured from new position\n          prevScrollTopRef.current = targetScrollTop;\n\n          updateScrollState();\n\n          // Mark as \"at bottom\" after scrolling to bottom so auto-scroll continues\n          if (isLoadingExistingContent || autoScrollRef.current) {\n            isAtBottomRef.current = true;\n          }\n\n          setIsScrollReady(true);\n          scrolledForSessionRef.current = sessionId ?? null;\n          prevAnchorSelectorRef.current = anchorSelector ?? null;\n        }, 0);\n\n        return () => clearTimeout(timeoutId);\n      }, [sessionId, anchorSelector, anchorOffsetPx, updateScrollState]);\n\n      // Build mask to fade content opacity at edges\n      const contentMask = buildContentMask();\n\n      return (\n        <div className=\"flex flex-col flex-1 min-h-0 w-full relative overflow-hidden mb-1\">\n          <div\n            key={sessionId}\n            ref={scrollContainerRef}\n            data-testid=\"chat-scroll-container\"\n            className={cn(\n              \"flex flex-col flex-1 min-h-0 overflow-y-auto overflow-x-hidden\",\n              hideScrollbar ? \"no-scrollbar\" : \"default-scrollbar\"\n            )}\n            onScroll={handleScroll}\n            style={{\n              scrollbarGutter: \"stable both-edges\",\n              // Apply mask to fade content opacity at edges\n              maskImage: contentMask,\n              WebkitMaskImage: contentMask,\n            }}\n          >\n            <div\n              ref={contentWrapperRef}\n              className=\"w-full flex-1 flex flex-col items-center px-4\"\n              data-scroll-ready={isScrollReady}\n              style={{\n                visibility: isScrollReady ? \"visible\" : \"hidden\",\n              }}\n            >\n              <ScrollContainerProvider\n                scrollContainerRef={scrollContainerRef}\n                contentWrapperRef={contentWrapperRef}\n                spacerHeightRef={spacerHeightRef}\n              >\n                {children}\n              </ScrollContainerProvider>\n\n              {/* End marker to measure content end */}\n              <div ref={endDivRef} />\n            </div>\n          </div>\n        </div>\n      );\n    }\n  )\n);\n\nChatScrollContainer.displayName = \"ChatScrollContainer\";\n\nexport default ChatScrollContainer;\n"
  },
  {
    "path": "web/src/sections/chat/ChatUI.tsx",
    "content": "\"use client\";\n\nimport React, { useCallback, useMemo, useRef } from \"react\";\nimport { Message } from \"@/app/app/interfaces\";\nimport { OnyxDocument, MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport HumanMessage from \"@/app/app/message/HumanMessage\";\nimport { ErrorBanner } from \"@/app/app/message/Resubmit\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { LlmDescriptor, LlmManager } from \"@/lib/hooks\";\nimport AgentMessage from \"@/app/app/message/messageComponents/AgentMessage\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport DynamicBottomSpacer from \"@/components/chat/DynamicBottomSpacer\";\nimport {\n  useCurrentMessageHistory,\n  useCurrentMessageTree,\n  useLoadingError,\n  useUncaughtError,\n} from \"@/app/app/stores/useChatSessionStore\";\n\nexport interface ChatUIProps {\n  liveAgent: MinimalPersonaSnapshot;\n  llmManager: LlmManager;\n  setPresentingDocument: (doc: MinimalOnyxDocument | null) => void;\n  onMessageSelection: (nodeId: number) => void;\n  stopGenerating: () => void;\n\n  // Submit handlers\n  onSubmit: (args: {\n    message: string;\n    messageIdToResend?: number;\n    currentMessageFiles: any[];\n    deepResearch: boolean;\n    modelOverride?: LlmDescriptor;\n    regenerationRequest?: {\n      messageId: number;\n      parentMessage: Message;\n      forceSearch?: boolean;\n    };\n    forceSearch?: boolean;\n  }) => Promise<void>;\n  deepResearchEnabled: boolean;\n  currentMessageFiles: any[];\n\n  onResubmit: () => void;\n\n  /**\n   * Node ID of the message to use as scroll anchor.\n   * Used by DynamicBottomSpacer to position the push-up effect.\n   */\n  anchorNodeId?: number;\n}\n\nconst ChatUI = React.memo(\n  ({\n    liveAgent,\n    llmManager,\n    setPresentingDocument,\n    onMessageSelection,\n    stopGenerating,\n    onSubmit,\n    deepResearchEnabled,\n    currentMessageFiles,\n    onResubmit,\n    anchorNodeId,\n  }: ChatUIProps) => {\n    // Get messages and error state from store\n    const messages = useCurrentMessageHistory();\n    const messageTree = useCurrentMessageTree();\n    const error = useUncaughtError();\n    const loadError = useLoadingError();\n    // Stable fallbacks to avoid changing prop identities on each render\n    const emptyDocs = useMemo<OnyxDocument[]>(() => [], []);\n    const emptyChildrenIds = useMemo<number[]>(() => [], []);\n\n    // Use refs to keep callbacks stable while always using latest values\n    const onSubmitRef = useRef(onSubmit);\n    const deepResearchEnabledRef = useRef(deepResearchEnabled);\n    const currentMessageFilesRef = useRef(currentMessageFiles);\n    onSubmitRef.current = onSubmit;\n    deepResearchEnabledRef.current = deepResearchEnabled;\n    currentMessageFilesRef.current = currentMessageFiles;\n\n    const createRegenerator = useCallback(\n      (regenerationRequest: {\n        messageId: number;\n        parentMessage: Message;\n        forceSearch?: boolean;\n      }) => {\n        return async function (modelOverride: LlmDescriptor) {\n          return await onSubmitRef.current({\n            message: regenerationRequest.parentMessage.message,\n            currentMessageFiles: currentMessageFilesRef.current,\n            deepResearch: deepResearchEnabledRef.current,\n            modelOverride,\n            messageIdToResend: regenerationRequest.parentMessage.messageId,\n            regenerationRequest,\n            forceSearch: regenerationRequest.forceSearch,\n          });\n        };\n      },\n      []\n    );\n\n    const handleEditWithMessageId = useCallback(\n      (editedContent: string, msgId: number) => {\n        onSubmitRef.current({\n          message: editedContent,\n          messageIdToResend: msgId,\n          currentMessageFiles: [],\n          deepResearch: deepResearchEnabledRef.current,\n        });\n      },\n      []\n    );\n\n    return (\n      <>\n        <div className=\"flex flex-col w-full max-w-[var(--app-page-main-content-width)] h-full pt-4 pb-8 pr-1 gap-12\">\n          {messages.map((message, i) => {\n            const messageReactComponentKey = `message-${message.nodeId}`;\n            const parentMessage = message.parentNodeId\n              ? messageTree?.get(message.parentNodeId)\n              : null;\n            if (message.type === \"user\") {\n              const nextMessage =\n                messages.length > i + 1 ? messages[i + 1] : null;\n\n              return (\n                <div\n                  id={messageReactComponentKey}\n                  key={messageReactComponentKey}\n                >\n                  <HumanMessage\n                    disableSwitchingForStreaming={\n                      (nextMessage && nextMessage.is_generating) || false\n                    }\n                    stopGenerating={stopGenerating}\n                    content={message.message}\n                    files={message.files}\n                    messageId={message.messageId}\n                    nodeId={message.nodeId}\n                    onEdit={handleEditWithMessageId}\n                    otherMessagesCanSwitchTo={\n                      parentMessage?.childrenNodeIds ?? emptyChildrenIds\n                    }\n                    onMessageSelection={onMessageSelection}\n                  />\n                </div>\n              );\n            } else if (message.type === \"assistant\") {\n              if ((error || loadError) && i === messages.length - 1) {\n                return (\n                  <div key={`error-${message.nodeId}`} className=\"p-4\">\n                    <ErrorBanner\n                      resubmit={onResubmit}\n                      error={error || loadError || \"\"}\n                      errorCode={message.errorCode || undefined}\n                      isRetryable={message.isRetryable ?? true}\n                      details={message.errorDetails || undefined}\n                      stackTrace={message.stackTrace || undefined}\n                    />\n                  </div>\n                );\n              }\n\n              const previousMessage = i !== 0 ? messages[i - 1] : null;\n              const chatStateData = {\n                agent: liveAgent,\n                docs: message.documents ?? emptyDocs,\n                citations: message.citations,\n                setPresentingDocument,\n                overriddenModel: llmManager.currentLlm?.modelName,\n                researchType: message.researchType,\n              };\n\n              return (\n                <div\n                  id={`message-${message.nodeId}`}\n                  key={messageReactComponentKey}\n                >\n                  <AgentMessage\n                    rawPackets={message.packets}\n                    packetCount={message.packetCount}\n                    chatState={chatStateData}\n                    nodeId={message.nodeId}\n                    messageId={message.messageId}\n                    currentFeedback={message.currentFeedback}\n                    llmManager={llmManager}\n                    otherMessagesCanSwitchTo={\n                      parentMessage?.childrenNodeIds ?? emptyChildrenIds\n                    }\n                    onMessageSelection={onMessageSelection}\n                    onRegenerate={createRegenerator}\n                    parentMessage={previousMessage}\n                    processingDurationSeconds={\n                      message.processingDurationSeconds\n                    }\n                  />\n                </div>\n              );\n            }\n            return null;\n          })}\n\n          {/* Error banner when last message is user message or error type */}\n          {(((error !== null || loadError !== null) &&\n            messages[messages.length - 1]?.type === \"user\") ||\n            messages[messages.length - 1]?.type === \"error\") && (\n            <div className=\"p-4\">\n              <ErrorBanner\n                resubmit={onResubmit}\n                error={error || loadError || \"\"}\n                errorCode={\n                  messages[messages.length - 1]?.errorCode || undefined\n                }\n                isRetryable={messages[messages.length - 1]?.isRetryable ?? true}\n                details={\n                  messages[messages.length - 1]?.errorDetails || undefined\n                }\n                stackTrace={\n                  messages[messages.length - 1]?.stackTrace || undefined\n                }\n              />\n            </div>\n          )}\n        </div>\n        {/* Dynamic spacer for \"fresh chat\" effect - pushes content up when new message is sent */}\n        <DynamicBottomSpacer anchorNodeId={anchorNodeId} />\n      </>\n    );\n  }\n);\nChatUI.displayName = \"ChatUI\";\n\nexport default ChatUI;\n"
  },
  {
    "path": "web/src/sections/document-sidebar/ChatDocumentDisplay.tsx",
    "content": "import { SourceIcon } from \"@/components/SourceIcon\";\nimport { MinimalOnyxDocument, OnyxDocument } from \"@/lib/search/interfaces\";\nimport { FiTag } from \"react-icons/fi\";\nimport { buildDocumentSummaryDisplay } from \"@/components/search/DocumentDisplay\";\nimport { DocumentUpdatedAtBadge } from \"@/components/search/DocumentUpdatedAtBadge\";\nimport { MetadataBadge } from \"@/components/MetadataBadge\";\nimport { WebResultIcon } from \"@/components/WebResultIcon\";\nimport { Dispatch, SetStateAction, useMemo } from \"react\";\nimport { openDocument } from \"@/lib/search/utils\";\nimport { ValidSources } from \"@/lib/types\";\nimport { cn } from \"@/lib/utils\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport Text from \"@/refresh-components/texts/Text\";\n\ninterface DocumentMetadataBlockProps {\n  modal?: boolean;\n  document: OnyxDocument;\n}\n\nfunction DocumentMetadataBlock({\n  modal,\n  document,\n}: DocumentMetadataBlockProps) {\n  const MAX_METADATA_ITEMS = 3;\n  const metadataEntries = Object.entries(document.metadata);\n\n  return (\n    <div className=\"flex items-center overflow-hidden\">\n      {document.updated_at && (\n        <DocumentUpdatedAtBadge updatedAt={document.updated_at} modal={modal} />\n      )}\n\n      {metadataEntries.length > 0 && (\n        <>\n          <div className=\"flex items-center overflow-hidden\">\n            {metadataEntries\n              .slice(0, MAX_METADATA_ITEMS)\n              .map(([key, value], index) => (\n                <MetadataBadge\n                  key={index}\n                  icon={FiTag}\n                  value={`${key}=${value}`}\n                />\n              ))}\n            {metadataEntries.length > MAX_METADATA_ITEMS && (\n              <span className=\"ml-1 text-xs text-text-500\">...</span>\n            )}\n          </div>\n        </>\n      )}\n    </div>\n  );\n}\n\nexport interface ChatDocumentDisplayProps {\n  document: OnyxDocument;\n  modal?: boolean;\n  isSelected: boolean;\n  setPresentingDocument: Dispatch<SetStateAction<MinimalOnyxDocument | null>>;\n}\n\nexport default function ChatDocumentDisplay({\n  document,\n  modal,\n  isSelected,\n  setPresentingDocument,\n}: ChatDocumentDisplayProps) {\n  const isInternet = document.is_internet;\n  const title = useMemo(\n    () => document.semantic_identifier || document.document_id,\n    [document.semantic_identifier, document.document_id]\n  );\n\n  if (document.score === null) {\n    return null;\n  }\n\n  const hasMetadata =\n    document.updated_at || Object.keys(document.metadata).length > 0;\n\n  return (\n    <div\n      onClick={() => openDocument(document, setPresentingDocument)}\n      className={cn(\n        \"flex w-full flex-col p-3 gap-2 rounded-12 hover:bg-background-tint-00 cursor-pointer\",\n        isSelected && \"bg-action-link-02\"\n      )}\n    >\n      <div className=\"flex items-center gap-2\">\n        {document.is_internet || document.source_type === ValidSources.Web ? (\n          <WebResultIcon url={document.link} />\n        ) : (\n          <SourceIcon sourceType={document.source_type} iconSize={18} />\n        )}\n        <Truncated className=\"line-clamp-2\" side=\"left\">\n          {title}\n        </Truncated>\n      </div>\n\n      {hasMetadata && (\n        <DocumentMetadataBlock modal={modal} document={document} />\n      )}\n\n      <Text as=\"p\" className=\"line-clamp-2 text-left\" secondaryBody text03>\n        {buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}\n      </Text>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/document-sidebar/DocumentsSidebar.tsx",
    "content": "\"use client\";\n\nimport { MinimalOnyxDocument, OnyxDocument } from \"@/lib/search/interfaces\";\nimport ChatDocumentDisplay from \"@/sections/document-sidebar/ChatDocumentDisplay\";\nimport { removeDuplicateDocs } from \"@/lib/documentUtils\";\nimport { Dispatch, SetStateAction, useMemo, memo } from \"react\";\nimport { getCitations } from \"@/app/app/services/packetUtils\";\nimport {\n  useCurrentMessageTree,\n  useSelectedNodeForDocDisplay,\n} from \"@/app/app/stores/useChatSessionStore\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { SvgSearchMenu, SvgX } from \"@opal/icons\";\nimport Separator from \"@/refresh-components/Separator\";\n\n// Build an OnyxDocument from basic file info\nconst buildOnyxDocumentFromFile = (\n  id: string,\n  name?: string | null,\n  appendProjectPrefix?: boolean\n): OnyxDocument => {\n  const document_id = appendProjectPrefix ? `project_file__${id}` : id;\n  return {\n    document_id,\n    semantic_identifier: name || id,\n    link: \"\",\n    source_type: \"file\" as any,\n    blurb: \"\",\n    boost: 0,\n    hidden: false,\n    score: 1,\n    chunk_ind: 0,\n    match_highlights: [],\n    metadata: {},\n    updated_at: null,\n    is_internet: false,\n  } as any;\n};\n\ninterface HeaderProps {\n  children: string;\n  onClose: () => void;\n}\n\nfunction Header({ children, onClose }: HeaderProps) {\n  return (\n    <div className=\"sticky top-0 z-sticky bg-background-tint-01\">\n      <div className=\"flex flex-row w-full items-center justify-between gap-2 py-3\">\n        <div className=\"flex items-center gap-2 w-full px-3\">\n          <SvgSearchMenu className=\"w-[1.3rem] h-[1.3rem] stroke-text-03\" />\n          <Text as=\"p\" headingH3 text03>\n            {children}\n          </Text>\n        </div>\n        <Button\n          icon={SvgX}\n          prominence=\"tertiary\"\n          onClick={onClose}\n          tooltip=\"Close Sidebar\"\n        />\n      </div>\n      <Separator noPadding />\n    </div>\n  );\n}\n\ninterface ChatDocumentDisplayWrapperProps {\n  children?: React.ReactNode;\n}\n\nfunction ChatDocumentDisplayWrapper({\n  children,\n}: ChatDocumentDisplayWrapperProps) {\n  return (\n    <div className=\"flex flex-col gap-1 items-center justify-center\">\n      {children}\n    </div>\n  );\n}\n\ninterface DocumentsSidebarProps {\n  closeSidebar: () => void;\n  selectedDocuments: OnyxDocument[] | null;\n  modal: boolean;\n  setPresentingDocument: Dispatch<SetStateAction<MinimalOnyxDocument | null>>;\n}\n\nconst DocumentsSidebar = memo(\n  ({\n    closeSidebar,\n    modal,\n    selectedDocuments,\n    setPresentingDocument,\n  }: DocumentsSidebarProps) => {\n    const idOfMessageToDisplay = useSelectedNodeForDocDisplay();\n    const currentMessageTree = useCurrentMessageTree();\n\n    const selectedMessage = idOfMessageToDisplay\n      ? currentMessageTree?.get(idOfMessageToDisplay)\n      : null;\n\n    // Get citations in order and build a set of cited document IDs\n    const { citedDocumentIds, citationOrder } = useMemo(() => {\n      if (!selectedMessage) {\n        return {\n          citedDocumentIds: new Set<string>(),\n          citationOrder: new Map<string, number>(),\n        };\n      }\n\n      const citedDocumentIds = new Set<string>();\n      const citationOrder = new Map<string, number>();\n      const citations = getCitations(selectedMessage.packets);\n      citations.forEach((citation, index) => {\n        citedDocumentIds.add(citation.document_id);\n        // Only set the order for the first occurrence\n        if (!citationOrder.has(citation.document_id)) {\n          citationOrder.set(citation.document_id, index);\n        }\n      });\n      return { citedDocumentIds, citationOrder };\n    }, [idOfMessageToDisplay, selectedMessage?.packets.length]);\n\n    // if these are missing for some reason, then nothing we can do. Just\n    // don't render.\n    // TODO: improve this display\n    if (!selectedMessage || !currentMessageTree) return null;\n\n    const humanMessage = selectedMessage.parentNodeId\n      ? currentMessageTree.get(selectedMessage.parentNodeId)\n      : null;\n    const humanFileDescriptors = humanMessage?.files.filter(\n      (file) => file.user_file_id !== null\n    );\n    const selectedDocumentIds =\n      selectedDocuments?.map((document) => document.document_id) || [];\n    const currentDocuments = selectedMessage.documents || null;\n    const dedupedDocuments = removeDuplicateDocs(currentDocuments || []);\n    const citedDocuments = dedupedDocuments\n      .filter(\n        (doc) =>\n          doc.document_id !== null &&\n          doc.document_id !== undefined &&\n          citedDocumentIds.has(doc.document_id)\n      )\n      .sort((a, b) => {\n        // Sort by citation order (order citations appeared in the answer)\n        const orderA = citationOrder.get(a.document_id) ?? Infinity;\n        const orderB = citationOrder.get(b.document_id) ?? Infinity;\n        return orderA - orderB;\n      });\n    const otherDocuments = dedupedDocuments.filter(\n      (doc) =>\n        doc.document_id === null ||\n        doc.document_id === undefined ||\n        !citedDocumentIds.has(doc.document_id)\n    );\n    const hasCited = citedDocuments.length > 0;\n    const hasOther = otherDocuments.length > 0;\n\n    return (\n      <div\n        id=\"onyx-chat-sidebar\"\n        className=\"bg-background-tint-01 overflow-y-scroll h-full w-full border-l\"\n      >\n        <div className=\"flex flex-col px-3 gap-6\">\n          {hasCited && (\n            <div>\n              <Header onClose={closeSidebar}>Cited Sources</Header>\n              <ChatDocumentDisplayWrapper>\n                {citedDocuments.map((document) => (\n                  <ChatDocumentDisplay\n                    key={document.document_id}\n                    setPresentingDocument={setPresentingDocument}\n                    modal={modal}\n                    document={document}\n                    isSelected={selectedDocumentIds.includes(\n                      document.document_id\n                    )}\n                  />\n                ))}\n              </ChatDocumentDisplayWrapper>\n            </div>\n          )}\n\n          {hasOther && (\n            <div>\n              <Header onClose={closeSidebar}>\n                {citedDocuments.length > 0 ? \"More\" : \"Found Sources\"}\n              </Header>\n              <ChatDocumentDisplayWrapper>\n                {otherDocuments.map((document) => (\n                  <ChatDocumentDisplay\n                    key={document.document_id}\n                    setPresentingDocument={setPresentingDocument}\n                    modal={modal}\n                    document={document}\n                    isSelected={selectedDocumentIds.includes(\n                      document.document_id\n                    )}\n                  />\n                ))}\n              </ChatDocumentDisplayWrapper>\n            </div>\n          )}\n\n          {humanFileDescriptors && humanFileDescriptors.length > 0 && (\n            <div>\n              <Header onClose={closeSidebar}>User Files</Header>\n              <ChatDocumentDisplayWrapper>\n                {humanFileDescriptors.map((file) => (\n                  <ChatDocumentDisplay\n                    key={file.id}\n                    setPresentingDocument={setPresentingDocument}\n                    modal={modal}\n                    document={buildOnyxDocumentFromFile(\n                      file.id,\n                      file.name,\n                      false\n                    )}\n                    isSelected={false}\n                  />\n                ))}\n              </ChatDocumentDisplayWrapper>\n            </div>\n          )}\n        </div>\n      </div>\n    );\n  }\n);\nDocumentsSidebar.displayName = \"DocumentsSidebar\";\n\nexport default DocumentsSidebar;\n"
  },
  {
    "path": "web/src/sections/input/AppInputBar.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useCallback,\n  useContext,\n  useEffect,\n  useMemo,\n  useRef,\n  useState,\n} from \"react\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport LLMPopover from \"@/refresh-components/popovers/LLMPopover\";\nimport { InputPrompt } from \"@/app/app/interfaces\";\nimport { FilterManager, LlmManager, useFederatedConnectors } from \"@/lib/hooks\";\nimport usePromptShortcuts from \"@/hooks/usePromptShortcuts\";\nimport useFilter from \"@/hooks/useFilter\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport { ChatState } from \"@/app/app/interfaces\";\nimport { useForcedTools } from \"@/lib/hooks/useForcedTools\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { getPastedFilesIfNoText } from \"@/lib/clipboard\";\nimport { cn, isImageFile } from \"@/lib/utils\";\nimport { Disabled } from \"@opal/core\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport {\n  SettingsContext,\n  useVectorDbEnabled,\n} from \"@/providers/SettingsProvider\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { FileCard } from \"@/sections/cards/FileCard\";\nimport {\n  ProjectFile,\n  UserFileStatus,\n} from \"@/app/app/projects/projectsService\";\nimport FilePickerPopover from \"@/refresh-components/popovers/FilePickerPopover\";\nimport ActionsPopover from \"@/refresh-components/popovers/ActionsPopover\";\nimport {\n  getIconForAction,\n  hasSearchToolsAvailable,\n} from \"@/app/app/services/actionUtils\";\nimport {\n  SvgArrowUp,\n  SvgGlobe,\n  SvgHourglass,\n  SvgMicrophone,\n  SvgPlus,\n  SvgPlusCircle,\n  SvgSearch,\n  SvgStop,\n  SvgX,\n} from \"@opal/icons\";\nimport { Button, SelectButton } from \"@opal/components\";\nimport Popover from \"@/refresh-components/Popover\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport MicrophoneButton from \"@/sections/input/MicrophoneButton\";\nimport Waveform from \"@/components/voice/Waveform\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\nimport { useVoiceStatus } from \"@/hooks/useVoiceStatus\";\n\nconst MIN_INPUT_HEIGHT = 44;\nconst MAX_INPUT_HEIGHT = 200;\n\nexport interface AppInputBarHandle {\n  reset: () => void;\n  focus: () => void;\n}\n\nexport interface AppInputBarProps {\n  initialMessage?: string;\n  stopGenerating: () => void;\n  onSubmit: (message: string) => void;\n  llmManager: LlmManager;\n  chatState: ChatState;\n  currentSessionFileTokenCount: number;\n  availableContextTokens: number;\n\n  // agents\n  selectedAgent: MinimalPersonaSnapshot | undefined;\n\n  handleFileUpload: (files: File[]) => void;\n  filterManager: FilterManager;\n  deepResearchEnabled: boolean;\n  setPresentingDocument?: (document: MinimalOnyxDocument) => void;\n  toggleDeepResearch: () => void;\n  disabled: boolean;\n  ref?: React.Ref<AppInputBarHandle>;\n  // Side panel tab reading\n  tabReadingEnabled?: boolean;\n  currentTabUrl?: string | null;\n  onToggleTabReading?: () => void;\n}\n\nconst AppInputBar = React.memo(\n  ({\n    filterManager,\n    initialMessage = \"\",\n    stopGenerating,\n    onSubmit,\n    chatState,\n    currentSessionFileTokenCount,\n    availableContextTokens,\n    selectedAgent,\n\n    handleFileUpload,\n    llmManager,\n    deepResearchEnabled,\n    toggleDeepResearch,\n    setPresentingDocument,\n    disabled,\n    ref,\n    tabReadingEnabled,\n    currentTabUrl,\n    onToggleTabReading,\n  }: AppInputBarProps) => {\n    // Internal message state - kept local to avoid parent re-renders on every keystroke\n    const [message, setMessage] = useState(initialMessage);\n    const [isRecording, setIsRecording] = useState(false);\n    const [recordingCycleCount, setRecordingCycleCount] = useState(0);\n    const [isMuted, setIsMuted] = useState(false);\n    const [audioLevel, setAudioLevel] = useState(0);\n    const stopRecordingRef = useRef<(() => Promise<string | null>) | null>(\n      null\n    );\n    const setMutedRef = useRef<((muted: boolean) => void) | null>(null);\n    const textAreaRef = useRef<HTMLTextAreaElement>(null);\n    const textAreaWrapperRef = useRef<HTMLDivElement>(null);\n    const filesWrapperRef = useRef<HTMLDivElement>(null);\n    const filesContentRef = useRef<HTMLDivElement>(null);\n    const containerRef = useRef<HTMLDivElement>(null);\n    const { user, isAdmin } = useUser();\n    const { state } = useQueryController();\n    const isClassifying = state.phase === \"classifying\";\n    const isSearchActive =\n      state.phase === \"searching\" || state.phase === \"search-results\";\n    const {\n      stopTTS,\n      isTTSPlaying,\n      isManualTTSPlaying,\n      isTTSLoading,\n      isAwaitingAutoPlaybackStart,\n      isTTSMuted,\n      toggleTTSMute,\n    } = useVoiceMode();\n    const { sttEnabled } = useVoiceStatus();\n    // Show mic button: always if STT configured, or greyed-out for admins to prompt setup\n    const showMicButton = sttEnabled || isAdmin;\n    const isVoicePlaybackActive =\n      isTTSPlaying || isTTSLoading || isAwaitingAutoPlaybackStart;\n    const isVoicePlaybackControllable = isVoicePlaybackActive && !isRecording;\n    const isTTSActuallySpeaking = isTTSPlaying || isManualTTSPlaying;\n    const appFocus = useAppFocus();\n    const isNewSession = appFocus.isNewSession();\n    const appMode = state.phase === \"idle\" ? state.appMode : undefined;\n    const isSearchMode =\n      (isNewSession && appMode === \"search\") || isSearchActive;\n\n    const handleRecordingChange = useCallback((nextIsRecording: boolean) => {\n      setIsRecording((prevIsRecording) => {\n        if (!prevIsRecording && nextIsRecording) {\n          setRecordingCycleCount((count) => count + 1);\n        }\n        return nextIsRecording;\n      });\n    }, []);\n\n    // Wrapper for onSubmit that stops TTS first to prevent overlapping voices\n    const handleSubmit = useCallback(\n      (text: string) => {\n        stopTTS();\n        onSubmit(text);\n      },\n      [stopTTS, onSubmit]\n    );\n    const submitMessage = useCallback(\n      (text: string) => {\n        if (!text.trim()) {\n          return;\n        }\n        handleSubmit(text);\n      },\n      [handleSubmit]\n    );\n\n    // Expose reset and focus methods to parent via ref\n    React.useImperativeHandle(ref, () => ({\n      reset: () => {\n        setMessage(\"\");\n      },\n      focus: () => {\n        textAreaRef.current?.focus();\n      },\n    }));\n\n    // Sync non-empty prop changes to internal state (e.g. NRFPage reads URL params\n    // after mount). Intentionally skips empty strings — clearing is handled via the\n    // imperative ref.reset() method, not by passing initialMessage=\"\".\n    useEffect(() => {\n      if (initialMessage) {\n        setMessage(initialMessage);\n      }\n    }, [initialMessage]);\n    const shouldShowRecordingWaveformBelow =\n      isRecording &&\n      !isVoicePlaybackActive &&\n      (isNewSession || recordingCycleCount === 1);\n\n    useEffect(() => {\n      if (isNewSession && !initialMessage) {\n        setMessage(\"\");\n      }\n    }, [isNewSession, initialMessage]);\n\n    const { forcedToolIds, setForcedToolIds } = useForcedTools();\n    const { currentMessageFiles, setCurrentMessageFiles, currentProjectId } =\n      useProjectsContext();\n\n    const currentIndexingFiles = useMemo(() => {\n      return currentMessageFiles.filter(\n        (file) => file.status === UserFileStatus.PROCESSING\n      );\n    }, [currentMessageFiles]);\n\n    const hasUploadingFiles = useMemo(() => {\n      return currentMessageFiles.some(\n        (file) => file.status === UserFileStatus.UPLOADING\n      );\n    }, [currentMessageFiles]);\n\n    // Convert ProjectFile to MinimalOnyxDocument format for viewing\n    const handleFileClick = useCallback(\n      (file: ProjectFile) => {\n        if (!setPresentingDocument) return;\n\n        const documentForViewer: MinimalOnyxDocument = {\n          document_id: `project_file__${file.file_id}`,\n          semantic_identifier: file.name,\n        };\n\n        setPresentingDocument(documentForViewer);\n      },\n      [setPresentingDocument]\n    );\n\n    const handleUploadChange = useCallback(\n      async (e: React.ChangeEvent<HTMLInputElement>) => {\n        const files = e.target.files;\n        if (!files || files.length === 0) return;\n        handleFileUpload(Array.from(files));\n        e.target.value = \"\";\n      },\n      [handleFileUpload]\n    );\n\n    const combinedSettings = useContext(SettingsContext);\n\n    // TODO(@raunakab): Replace this useEffect with CSS `field-sizing: content` once\n    // Firefox ships it unflagged (currently behind `layout.css.field-sizing.enabled`).\n    // Auto-resize textarea based on content (chat mode only).\n    // Reset to min-height first so scrollHeight reflects actual content size,\n    // then clamp between min and max. This handles both growing and shrinking.\n    useEffect(() => {\n      const wrapper = textAreaWrapperRef.current;\n      const textarea = textAreaRef.current;\n      if (!wrapper || !textarea) return;\n\n      // Reset so scrollHeight reflects actual content size\n      wrapper.style.height = `${MIN_INPUT_HEIGHT}px`;\n\n      // scrollHeight doesn't include the wrapper's padding, so add it back\n      const wrapperStyle = getComputedStyle(wrapper);\n      const paddingTop = parseFloat(wrapperStyle.paddingTop);\n      const paddingBottom = parseFloat(wrapperStyle.paddingBottom);\n      const contentHeight = textarea.scrollHeight + paddingTop + paddingBottom;\n\n      wrapper.style.height = `${Math.min(\n        Math.max(contentHeight, MIN_INPUT_HEIGHT),\n        MAX_INPUT_HEIGHT\n      )}px`;\n    }, [message, isSearchMode]);\n\n    // Animate attached files wrapper to its content height so CSS transitions\n    // can interpolate between concrete pixel values (0px ↔ Npx).\n    const showFiles = !isSearchMode && currentMessageFiles.length > 0;\n    useEffect(() => {\n      const wrapper = filesWrapperRef.current;\n      const content = filesContentRef.current;\n      if (!wrapper || !content) return;\n\n      if (showFiles) {\n        // Measure the inner content's actual height, then add padding (p-1 = 8px total)\n        const PADDING = 8;\n        wrapper.style.height = `${content.offsetHeight + PADDING}px`;\n      } else {\n        wrapper.style.height = \"0px\";\n      }\n    }, [showFiles, currentMessageFiles]);\n\n    function handlePaste(event: React.ClipboardEvent) {\n      const pastedFiles = getPastedFilesIfNoText(event.clipboardData);\n      if (pastedFiles.length > 0) {\n        event.preventDefault();\n        handleFileUpload(pastedFiles);\n      }\n    }\n\n    const handleRemoveMessageFile = useCallback(\n      (fileId: string) => {\n        setCurrentMessageFiles((prev) => prev.filter((f) => f.id !== fileId));\n      },\n      [setCurrentMessageFiles]\n    );\n\n    const { activePromptShortcuts } = usePromptShortcuts();\n    const vectorDbEnabled = useVectorDbEnabled();\n    const { ccPairs, isLoading: ccPairsLoading } = useCCPairs(vectorDbEnabled);\n    const { data: federatedConnectorsData, isLoading: federatedLoading } =\n      useFederatedConnectors();\n\n    // Bottom controls are hidden until all data is loaded\n    const controlsLoading =\n      ccPairsLoading ||\n      federatedLoading ||\n      !selectedAgent ||\n      llmManager.isLoadingProviders;\n    const [showPrompts, setShowPrompts] = useState(false);\n\n    // Memoize availableSources to prevent unnecessary re-renders\n    const memoizedAvailableSources = useMemo(\n      () => [\n        ...ccPairs.map((ccPair) => ccPair.source),\n        ...(federatedConnectorsData?.map((connector) => connector.source) ||\n          []),\n      ],\n      [ccPairs, federatedConnectorsData]\n    );\n\n    const [tabbingIconIndex, setTabbingIconIndex] = useState(0);\n\n    const hidePrompts = useCallback(() => {\n      setTimeout(() => {\n        setShowPrompts(false);\n      }, 50);\n      setTabbingIconIndex(0);\n    }, []);\n\n    function updateInputPrompt(prompt: InputPrompt) {\n      hidePrompts();\n      setMessage(`${prompt.content}`);\n    }\n\n    const { filtered: filteredPrompts, setQuery: setPromptFilterQuery } =\n      useFilter(activePromptShortcuts, (prompt) => prompt.prompt);\n\n    // Memoize sorted prompts to avoid re-sorting on every render\n    const sortedFilteredPrompts = useMemo(\n      () => [...filteredPrompts].sort((a, b) => a.id - b.id),\n      [filteredPrompts]\n    );\n\n    // Reset tabbingIconIndex when filtered prompts change to avoid out-of-bounds\n    useEffect(() => {\n      setTabbingIconIndex(0);\n    }, [filteredPrompts]);\n\n    const handlePromptInput = useCallback(\n      (text: string) => {\n        if (text.startsWith(\"/\")) {\n          setShowPrompts(true);\n        } else {\n          hidePrompts();\n        }\n      },\n      [hidePrompts]\n    );\n\n    const handleInputChange = useCallback(\n      (event: React.ChangeEvent<HTMLTextAreaElement>) => {\n        const text = event.target.value;\n        setMessage(text);\n        handlePromptInput(text);\n\n        const promptFilterQuery = text.startsWith(\"/\") ? text.slice(1) : \"\";\n        setPromptFilterQuery(promptFilterQuery);\n      },\n      [setMessage, handlePromptInput, setPromptFilterQuery]\n    );\n\n    // Determine if we should hide processing state based on context limits\n    const hideProcessingState = useMemo(() => {\n      if (currentMessageFiles.length > 0 && currentIndexingFiles.length > 0) {\n        const currentFilesTokenTotal = currentMessageFiles.reduce(\n          (acc, file) => acc + (file.token_count || 0),\n          0\n        );\n        const totalTokens =\n          (currentSessionFileTokenCount || 0) + currentFilesTokenTotal;\n        // Hide processing state when files are within context limits\n        return totalTokens < availableContextTokens;\n      }\n      return false;\n    }, [\n      currentMessageFiles,\n      currentSessionFileTokenCount,\n      currentIndexingFiles,\n      availableContextTokens,\n    ]);\n\n    const shouldCompactImages = useMemo(() => {\n      return currentMessageFiles.length > 1;\n    }, [currentMessageFiles]);\n\n    const hasImageFiles = useMemo(\n      () => currentMessageFiles.some((f) => isImageFile(f.name)),\n      [currentMessageFiles]\n    );\n\n    // Check if the agent has search tools available (internal search or web search)\n    // AND if deep research is globally enabled in admin settings\n    const showDeepResearch = useMemo(() => {\n      const deepResearchGloballyEnabled =\n        combinedSettings?.settings?.deep_research_enabled ?? true;\n      const isProjectWorkflow = currentProjectId !== null;\n\n      // TODO(@yuhong): Re-enable Deep Research in Projects workflow once it is fully supported.\n      // https://linear.app/onyx-app/issue/ENG-3818/re-enable-deep-research-in-projects\n      return (\n        !isProjectWorkflow &&\n        deepResearchGloballyEnabled &&\n        hasSearchToolsAvailable(selectedAgent?.tools || [])\n      );\n    }, [\n      selectedAgent?.tools,\n      combinedSettings?.settings?.deep_research_enabled,\n      currentProjectId,\n    ]);\n\n    function handleKeyDownForPromptShortcuts(\n      e: React.KeyboardEvent<HTMLTextAreaElement>\n    ) {\n      if (!user?.preferences?.shortcut_enabled || !showPrompts) return;\n\n      if (e.key === \"Enter\") {\n        e.preventDefault();\n        if (tabbingIconIndex === sortedFilteredPrompts.length) {\n          // \"Create a new prompt\" is selected\n          window.open(\"/app/settings/chat-preferences\", \"_self\");\n        } else {\n          const selectedPrompt = sortedFilteredPrompts[tabbingIconIndex];\n          if (selectedPrompt) {\n            updateInputPrompt(selectedPrompt);\n          }\n        }\n      } else if (e.key === \"Tab\" && e.shiftKey) {\n        // Shift+Tab: cycle backward\n        e.preventDefault();\n        setTabbingIconIndex((prev) => Math.max(prev - 1, 0));\n      } else if (e.key === \"Tab\") {\n        // Tab: cycle forward\n        e.preventDefault();\n        setTabbingIconIndex((prev) =>\n          Math.min(prev + 1, sortedFilteredPrompts.length)\n        );\n      } else if (e.key === \"ArrowDown\") {\n        e.preventDefault();\n        setTabbingIconIndex((prev) =>\n          Math.min(prev + 1, sortedFilteredPrompts.length)\n        );\n      } else if (e.key === \"ArrowUp\") {\n        e.preventDefault();\n        setTabbingIconIndex((prev) => Math.max(prev - 1, 0));\n      }\n    }\n\n    const chatControls = (\n      <div\n        {...(isSearchMode ? { inert: true } : {})}\n        className={cn(\n          \"flex justify-between items-center w-full\",\n          isSearchMode\n            ? \"opacity-0 p-0 h-0 overflow-hidden pointer-events-none\"\n            : \"opacity-100 p-1 h-[2.75rem] pointer-events-auto\",\n          \"transition-all duration-150\"\n        )}\n      >\n        {/* Bottom left controls */}\n        <div className=\"flex flex-row items-center\">\n          {/* (+) button - always visible */}\n          <FilePickerPopover\n            onFileClick={handleFileClick}\n            onPickRecent={(file: ProjectFile) => {\n              // Check if file with same ID already exists\n              if (\n                !currentMessageFiles.some(\n                  (existingFile) => existingFile.file_id === file.file_id\n                )\n              ) {\n                setCurrentMessageFiles((prev) => [...prev, file]);\n              }\n            }}\n            onUnpickRecent={(file: ProjectFile) => {\n              setCurrentMessageFiles((prev) =>\n                prev.filter(\n                  (existingFile) => existingFile.file_id !== file.file_id\n                )\n              );\n            }}\n            handleUploadChange={handleUploadChange}\n            trigger={(open) => (\n              <Button\n                disabled={disabled}\n                icon={SvgPlusCircle}\n                tooltip=\"Attach Files\"\n                interaction={open ? \"hover\" : \"rest\"}\n                prominence=\"tertiary\"\n              />\n            )}\n            selectedFileIds={currentMessageFiles.map((f) => f.id)}\n          />\n\n          {/* Controls that load in when data is ready */}\n          <div\n            data-testid=\"actions-container\"\n            className={cn(\n              \"flex flex-row items-center\",\n              controlsLoading && \"invisible\"\n            )}\n          >\n            {selectedAgent && selectedAgent.tools.length > 0 && (\n              <ActionsPopover\n                selectedAgent={selectedAgent}\n                filterManager={filterManager}\n                availableSources={memoizedAvailableSources}\n                disabled={disabled}\n              />\n            )}\n            {onToggleTabReading ? (\n              <SelectButton\n                disabled={disabled}\n                icon={SvgGlobe}\n                onClick={onToggleTabReading}\n                state={tabReadingEnabled ? \"selected\" : \"empty\"}\n              >\n                {tabReadingEnabled\n                  ? currentTabUrl\n                    ? (() => {\n                        try {\n                          return new URL(currentTabUrl).hostname;\n                        } catch {\n                          return currentTabUrl;\n                        }\n                      })()\n                    : \"Reading tab...\"\n                  : \"Read this tab\"}\n              </SelectButton>\n            ) : (\n              showDeepResearch && (\n                <SelectButton\n                  disabled={disabled}\n                  variant=\"select-light\"\n                  icon={SvgHourglass}\n                  onClick={toggleDeepResearch}\n                  state={deepResearchEnabled ? \"selected\" : \"empty\"}\n                  foldable={!deepResearchEnabled}\n                >\n                  Deep Research\n                </SelectButton>\n              )\n            )}\n\n            {selectedAgent &&\n              forcedToolIds.length > 0 &&\n              forcedToolIds.map((toolId) => {\n                const tool = selectedAgent.tools.find(\n                  (tool) => tool.id === toolId\n                );\n                if (!tool) {\n                  return null;\n                }\n                return (\n                  <Disabled disabled={disabled} key={toolId}>\n                    <SelectButton\n                      variant=\"select-light\"\n                      icon={getIconForAction(tool)}\n                      onClick={() => {\n                        setForcedToolIds(\n                          forcedToolIds.filter((id) => id !== toolId)\n                        );\n                      }}\n                      state=\"selected\"\n                    >\n                      {tool.display_name}\n                    </SelectButton>\n                  </Disabled>\n                );\n              })}\n          </div>\n        </div>\n\n        {/* Bottom right controls */}\n        <div className=\"flex flex-row items-center gap-1\">\n          <div\n            data-testid=\"AppInputBar/llm-popover-trigger\"\n            className={cn(controlsLoading && \"invisible\")}\n          >\n            <LLMPopover\n              llmManager={llmManager}\n              requiresImageInput={hasImageFiles}\n              disabled={disabled}\n            />\n          </div>\n          {showMicButton &&\n            (sttEnabled ? (\n              <MicrophoneButton\n                onTranscription={(text) => setMessage(text)}\n                disabled={disabled || chatState === \"streaming\"}\n                autoSend={user?.preferences?.voice_auto_send ?? false}\n                autoListen={user?.preferences?.voice_auto_playback ?? false}\n                isNewSession={isNewSession}\n                chatState={chatState}\n                onRecordingChange={handleRecordingChange}\n                stopRecordingRef={stopRecordingRef}\n                currentMessage={message}\n                onRecordingStart={() => {}}\n                onAutoSend={(text) => {\n                  submitMessage(text);\n                }}\n                onMuteChange={setIsMuted}\n                setMutedRef={setMutedRef}\n                onAudioLevel={setAudioLevel}\n              />\n            ) : (\n              <Button\n                disabled\n                icon={SvgMicrophone}\n                aria-label=\"Set up voice\"\n                prominence=\"tertiary\"\n                tooltip=\"Voice not configured. Set up in admin settings.\"\n              />\n            ))}\n\n          <Button\n            disabled={\n              (chatState === \"input\" &&\n                !isVoicePlaybackControllable &&\n                !message) ||\n              hasUploadingFiles ||\n              isClassifying\n            }\n            id=\"onyx-chat-input-send-button\"\n            icon={\n              isClassifying\n                ? SimpleLoader\n                : chatState === \"streaming\" || isVoicePlaybackControllable\n                  ? SvgStop\n                  : SvgArrowUp\n            }\n            onClick={() => {\n              if (chatState == \"streaming\") {\n                stopTTS({ manual: true });\n                stopGenerating();\n              } else if (isVoicePlaybackControllable) {\n                stopTTS({ manual: true });\n              } else if (message) {\n                submitMessage(message);\n              }\n            }}\n          />\n        </div>\n      </div>\n    );\n\n    return (\n      <Disabled disabled={disabled} allowClick>\n        <div\n          ref={containerRef}\n          id=\"onyx-chat-input\"\n          className={cn(\n            \"relative w-full flex flex-col shadow-01 bg-background-neutral-00 rounded-16\"\n            // # Note (from @raunakab):\n            //\n            // `shadow-01` extends ~14px below the element (2px offset + 12px blur).\n            // Because the content area in `Root` (app-layouts.tsx) uses `overflow-auto`,\n            // shadows that exceed the container bounds are clipped.\n            //\n            // The 14px breathing room is now applied externally via animated spacer\n            // divs in `AppPage.tsx` (above and below the AppInputBar) so that the\n            // spacing can transition smoothly when switching between search and chat\n            // modes. See the corresponding note there for details.\n          )}\n        >\n          {/* Voice waveform overlay (positioned outside normal flow to avoid resizing input) */}\n          {isTTSActuallySpeaking ? (\n            <div className=\"absolute bottom-full mb-1 left-1 z-10\">\n              <Waveform\n                variant=\"speaking\"\n                isActive={isTTSActuallySpeaking}\n                isMuted={isTTSMuted}\n                onMuteToggle={toggleTTSMute}\n              />\n            </div>\n          ) : isRecording &&\n            !isVoicePlaybackActive &&\n            !shouldShowRecordingWaveformBelow ? (\n            <div className=\"absolute bottom-full mb-1 left-1 right-1 z-10\">\n              <Waveform\n                variant=\"recording\"\n                isActive={isRecording}\n                isMuted={isMuted}\n                audioLevel={audioLevel}\n                onMuteToggle={() => {\n                  setMutedRef.current?.(!isMuted);\n                }}\n              />\n            </div>\n          ) : null}\n\n          {/* Attached Files */}\n          <div\n            ref={filesWrapperRef}\n            {...(!showFiles ? { inert: true } : {})}\n            className={cn(\n              \"transition-all duration-150\",\n              showFiles\n                ? \"opacity-100 p-1\"\n                : \"opacity-0 p-0 overflow-hidden pointer-events-none\"\n            )}\n          >\n            <div ref={filesContentRef} className=\"flex flex-wrap gap-1\">\n              {currentMessageFiles.map((file) => (\n                <FileCard\n                  key={file.id}\n                  file={file}\n                  removeFile={handleRemoveMessageFile}\n                  hideProcessingState={hideProcessingState}\n                  onFileClick={handleFileClick}\n                  compactImages={shouldCompactImages}\n                />\n              ))}\n            </div>\n          </div>\n\n          <div className=\"flex flex-row items-center w-full\">\n            <Popover\n              open={user?.preferences?.shortcut_enabled && showPrompts}\n              onOpenChange={setShowPrompts}\n            >\n              <Popover.Anchor asChild>\n                <div\n                  ref={textAreaWrapperRef}\n                  className=\"px-3 py-2 flex-1 flex h-[2.75rem]\"\n                >\n                  <textarea\n                    id=\"onyx-chat-input-textarea\"\n                    role=\"textarea\"\n                    ref={textAreaRef}\n                    onPaste={handlePaste}\n                    onKeyDownCapture={handleKeyDownForPromptShortcuts}\n                    onChange={handleInputChange}\n                    className={cn(\n                      \"p-[2px] w-full h-full outline-none bg-transparent resize-none placeholder:text-text-03 whitespace-pre-wrap break-words\",\n                      \"overflow-y-auto\"\n                    )}\n                    autoFocus\n                    rows={1}\n                    style={{ scrollbarWidth: \"thin\" }}\n                    aria-multiline={true}\n                    placeholder={\n                      isRecording\n                        ? \"Listening...\"\n                        : isVoicePlaybackActive\n                          ? \"Onyx is speaking...\"\n                          : isSearchMode\n                            ? \"Search connected sources\"\n                            : \"How can I help you today?\"\n                    }\n                    value={message}\n                    onKeyDown={(event) => {\n                      if (\n                        event.key === \"Enter\" &&\n                        !showPrompts &&\n                        !event.shiftKey &&\n                        !(event.nativeEvent as any).isComposing\n                      ) {\n                        event.preventDefault();\n                        if (\n                          message &&\n                          !disabled &&\n                          !isClassifying &&\n                          !hasUploadingFiles\n                        ) {\n                          submitMessage(message);\n                        }\n                      }\n                    }}\n                    suppressContentEditableWarning={true}\n                    disabled={disabled}\n                  />\n                </div>\n              </Popover.Anchor>\n\n              <Popover.Content\n                side=\"top\"\n                align=\"start\"\n                onOpenAutoFocus={(e) => e.preventDefault()}\n                width=\"xl\"\n              >\n                <Popover.Menu>\n                  {[\n                    ...sortedFilteredPrompts.map((prompt, index) => (\n                      <LineItem\n                        key={prompt.id}\n                        selected={tabbingIconIndex === index}\n                        emphasized={tabbingIconIndex === index}\n                        description={prompt.content?.trim()}\n                        onClick={() => updateInputPrompt(prompt)}\n                      >\n                        {prompt.prompt}\n                      </LineItem>\n                    )),\n                    sortedFilteredPrompts.length > 0 ? null : undefined,\n                    <LineItem\n                      key=\"create-new\"\n                      href=\"/app/settings/chat-preferences\"\n                      icon={SvgPlus}\n                      selected={\n                        tabbingIconIndex === sortedFilteredPrompts.length\n                      }\n                      emphasized={\n                        tabbingIconIndex === sortedFilteredPrompts.length\n                      }\n                    >\n                      Create New Prompt\n                    </LineItem>,\n                  ]}\n                </Popover.Menu>\n              </Popover.Content>\n            </Popover>\n\n            {isSearchMode && (\n              <Section flexDirection=\"row\" width=\"fit\" gap={0}>\n                <Button\n                  disabled={!message || isClassifying}\n                  icon={SvgX}\n                  onClick={() => setMessage(\"\")}\n                  prominence=\"tertiary\"\n                />\n                <Button\n                  disabled={!message || isClassifying || hasUploadingFiles}\n                  id=\"onyx-chat-input-send-button\"\n                  icon={isClassifying ? SimpleLoader : SvgSearch}\n                  onClick={() => {\n                    if (chatState == \"streaming\") {\n                      stopGenerating();\n                    } else if (message) {\n                      submitMessage(message);\n                    }\n                  }}\n                  prominence=\"tertiary\"\n                />\n                <Spacer horizontal rem={0.25} />\n              </Section>\n            )}\n          </div>\n\n          {chatControls}\n\n          {/* First recording cycle waveform below input */}\n          {shouldShowRecordingWaveformBelow && (\n            <div className=\"absolute top-full mt-1 left-1 right-1 z-10\">\n              <Waveform\n                variant=\"recording\"\n                isActive={isRecording}\n                isMuted={isMuted}\n                audioLevel={audioLevel}\n                onMuteToggle={() => {\n                  setMutedRef.current?.(!isMuted);\n                }}\n              />\n            </div>\n          )}\n        </div>\n      </Disabled>\n    );\n  }\n);\nAppInputBar.displayName = \"AppInputBar\";\n\nexport default AppInputBar;\n"
  },
  {
    "path": "web/src/sections/input/MicrophoneButton.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useRef } from \"react\";\nimport { Button } from \"@opal/components\";\nimport { SvgMicrophone } from \"@opal/icons\";\nimport { useVoiceRecorder } from \"@/hooks/useVoiceRecorder\";\nimport { useVoiceMode } from \"@/providers/VoiceModeProvider\";\nimport { toast } from \"@/hooks/useToast\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { ChatState } from \"@/app/app/interfaces\";\n\ninterface MicrophoneButtonProps {\n  onTranscription: (text: string) => void;\n  disabled?: boolean;\n  autoSend?: boolean;\n  /** Called with transcribed text when autoSend is enabled */\n  onAutoSend?: (text: string) => void;\n  /**\n   * Internal prop: auto-start listening when TTS finishes or chat response completes.\n   * Tied to voice_auto_playback user preference.\n   * Enables conversation flow: speak → AI responds → auto-listen again.\n   * Note: autoSend is separate - it controls whether message auto-submits after recording.\n   */\n  autoListen?: boolean;\n  /** Current chat state - used to detect when response streaming finishes */\n  chatState?: ChatState;\n  /** Called when recording state changes */\n  onRecordingChange?: (isRecording: boolean) => void;\n  /** Ref to expose stop recording function to parent */\n  stopRecordingRef?: React.MutableRefObject<\n    (() => Promise<string | null>) | null\n  >;\n  /** Called when recording starts */\n  onRecordingStart?: () => void;\n  /** Existing message text to prepend to transcription (append mode) */\n  currentMessage?: string;\n  /** Called when mute state changes */\n  onMuteChange?: (isMuted: boolean) => void;\n  /** Ref to expose setMuted function to parent */\n  setMutedRef?: React.MutableRefObject<((muted: boolean) => void) | null>;\n  /** Called with current microphone audio level (0-1) for waveform visualization */\n  onAudioLevel?: (level: number) => void;\n  /** Whether current chat is a new session (used to reset auto-listen arming) */\n  isNewSession?: boolean;\n}\n\nfunction MicrophoneButton({\n  onTranscription,\n  disabled = false,\n  autoSend = false,\n  onAutoSend,\n  autoListen = false,\n  chatState,\n  onRecordingChange,\n  stopRecordingRef,\n  onRecordingStart,\n  currentMessage = \"\",\n  onMuteChange,\n  setMutedRef,\n  onAudioLevel,\n  isNewSession = false,\n}: MicrophoneButtonProps) {\n  const {\n    isTTSPlaying,\n    isTTSLoading,\n    isAwaitingAutoPlaybackStart,\n    manualStopCount,\n  } = useVoiceMode();\n\n  // Refs for tracking state across renders\n  // Track whether TTS was actually playing audio (not just loading)\n  const wasTTSActuallyPlayingRef = useRef(false);\n  const manualStopRequestedRef = useRef(false);\n  const lastHandledManualStopCountRef = useRef(manualStopCount);\n  const autoListenCooldownTimerRef = useRef<NodeJS.Timeout | null>(null);\n  const hasManualRecordStartRef = useRef(false);\n  // Prevent late transcript events from repopulating input after auto-send.\n  const suppressTranscriptUpdatesRef = useRef(false);\n  // Snapshot of existing message text when recording starts (for append mode)\n  const messagePrefixRef = useRef(\"\");\n  const currentMessageRef = useRef(currentMessage);\n\n  useEffect(() => {\n    currentMessageRef.current = currentMessage;\n  }, [currentMessage]);\n\n  // Helper to combine prefix with new transcript\n  const withPrefix = useCallback((text: string) => {\n    const prefix = messagePrefixRef.current;\n    if (!prefix) return text;\n    return prefix + (prefix.endsWith(\" \") ? \"\" : \" \") + text;\n  }, []);\n\n  // Handler for VAD (Voice Activity Detection) triggered auto-send.\n  // VAD runs server-side in the STT provider and detects when the user stops speaking.\n  const handleFinalTranscript = useCallback(\n    (text: string) => {\n      const combined = withPrefix(text);\n      if (!suppressTranscriptUpdatesRef.current) {\n        onTranscription(combined);\n      }\n      const isManualStop = manualStopRequestedRef.current;\n      // Only auto-send if chat is ready for input (not streaming)\n      if (!isManualStop && autoSend && onAutoSend && chatState === \"input\") {\n        suppressTranscriptUpdatesRef.current = true;\n        onAutoSend(combined);\n        // Clear prefix after send to prevent stale text in next auto-listen cycle\n        messagePrefixRef.current = \"\";\n      }\n    },\n    [onTranscription, autoSend, onAutoSend, chatState, withPrefix]\n  );\n\n  const {\n    isRecording,\n    isProcessing,\n    isMuted,\n    error,\n    liveTranscript,\n    audioLevel,\n    startRecording,\n    stopRecording,\n    setMuted,\n  } = useVoiceRecorder({\n    onFinalTranscript: handleFinalTranscript,\n    autoStopOnSilence: autoSend,\n  });\n\n  // Expose stopRecording to parent\n  useEffect(() => {\n    if (stopRecordingRef) {\n      stopRecordingRef.current = stopRecording;\n    }\n  }, [stopRecording, stopRecordingRef]);\n\n  // Expose setMuted to parent\n  useEffect(() => {\n    if (setMutedRef) {\n      setMutedRef.current = setMuted;\n    }\n  }, [setMuted, setMutedRef]);\n\n  // Notify parent when mute state changes\n  useEffect(() => {\n    onMuteChange?.(isMuted);\n  }, [isMuted, onMuteChange]);\n\n  // Forward audio level to parent for waveform visualization\n  useEffect(() => {\n    onAudioLevel?.(audioLevel);\n  }, [audioLevel, onAudioLevel]);\n\n  // Notify parent when recording state changes\n  useEffect(() => {\n    onRecordingChange?.(isRecording);\n  }, [isRecording, onRecordingChange]);\n\n  // Update input with live transcript as user speaks (appending to existing text)\n  useEffect(() => {\n    if (\n      isRecording &&\n      liveTranscript &&\n      !suppressTranscriptUpdatesRef.current\n    ) {\n      onTranscription(withPrefix(liveTranscript));\n    }\n  }, [isRecording, liveTranscript, onTranscription, withPrefix]);\n\n  const handleClick = useCallback(async () => {\n    if (isRecording) {\n      // When recording, clicking the mic button stops recording\n      manualStopRequestedRef.current = true;\n      try {\n        const finalTranscript = await stopRecording();\n        if (finalTranscript) {\n          const combined = withPrefix(finalTranscript);\n          onTranscription(combined);\n          if (\n            autoSend &&\n            onAutoSend &&\n            chatState === \"input\" &&\n            combined.trim()\n          ) {\n            onAutoSend(combined);\n          }\n        }\n        messagePrefixRef.current = \"\";\n      } finally {\n        manualStopRequestedRef.current = false;\n      }\n    } else {\n      try {\n        // Snapshot existing text so transcription can append to it\n        suppressTranscriptUpdatesRef.current = false;\n        messagePrefixRef.current = currentMessage;\n        onRecordingStart?.();\n        await startRecording();\n        // Arm auto-listen only after first manual mic start in this session.\n        hasManualRecordStartRef.current = true;\n      } catch (err) {\n        console.error(\"Microphone access failed:\", err);\n        toast.error(\"Could not access microphone\");\n      }\n    }\n  }, [\n    isRecording,\n    startRecording,\n    stopRecording,\n    onRecordingStart,\n    onTranscription,\n    autoSend,\n    onAutoSend,\n    chatState,\n    currentMessage,\n    withPrefix,\n  ]);\n\n  // Auto-start listening shortly after TTS finishes (only if autoListen is enabled).\n  // Small cooldown reduces playback bleed being re-captured by the microphone.\n  // IMPORTANT: Only trigger auto-listen if TTS was actually playing audio,\n  // not just loading. This prevents auto-listen from triggering when TTS fails.\n  useEffect(() => {\n    if (autoListenCooldownTimerRef.current) {\n      clearTimeout(autoListenCooldownTimerRef.current);\n      autoListenCooldownTimerRef.current = null;\n    }\n\n    const stoppedManually =\n      manualStopCount !== lastHandledManualStopCountRef.current;\n\n    // Only trigger auto-listen if TTS was actually playing (not just loading)\n    if (\n      wasTTSActuallyPlayingRef.current &&\n      !isTTSPlaying &&\n      !isTTSLoading &&\n      !isAwaitingAutoPlaybackStart &&\n      autoListen &&\n      hasManualRecordStartRef.current &&\n      !disabled &&\n      !isRecording &&\n      !stoppedManually\n    ) {\n      autoListenCooldownTimerRef.current = setTimeout(() => {\n        autoListenCooldownTimerRef.current = null;\n        if (\n          !autoListen ||\n          disabled ||\n          isRecording ||\n          isTTSPlaying ||\n          isTTSLoading ||\n          isAwaitingAutoPlaybackStart\n        ) {\n          return;\n        }\n        messagePrefixRef.current = currentMessageRef.current;\n        startRecording().catch((err) => {\n          console.error(\"Auto-start microphone failed:\", err);\n          toast.error(\"Could not auto-start microphone\");\n        });\n      }, 400);\n    }\n\n    if (stoppedManually) {\n      lastHandledManualStopCountRef.current = manualStopCount;\n    }\n\n    // Only track actual playback - not loading states\n    // This ensures auto-listen only triggers after audio actually played\n    if (isTTSPlaying) {\n      wasTTSActuallyPlayingRef.current = true;\n    } else if (!isTTSPlaying && !isTTSLoading && !isAwaitingAutoPlaybackStart) {\n      // Reset when TTS is completely done\n      wasTTSActuallyPlayingRef.current = false;\n    }\n  }, [\n    isTTSPlaying,\n    isTTSLoading,\n    isAwaitingAutoPlaybackStart,\n    autoListen,\n    disabled,\n    isRecording,\n    startRecording,\n    manualStopCount,\n  ]);\n\n  // New sessions must start with an explicit manual mic press.\n  useEffect(() => {\n    if (isNewSession) {\n      hasManualRecordStartRef.current = false;\n      suppressTranscriptUpdatesRef.current = false;\n    }\n  }, [isNewSession]);\n\n  useEffect(() => {\n    if (!isRecording) {\n      suppressTranscriptUpdatesRef.current = false;\n    }\n  }, [isRecording]);\n\n  useEffect(() => {\n    return () => {\n      if (autoListenCooldownTimerRef.current) {\n        clearTimeout(autoListenCooldownTimerRef.current);\n        autoListenCooldownTimerRef.current = null;\n      }\n    };\n  }, []);\n\n  useEffect(() => {\n    if (error) {\n      console.error(\"Voice recorder error:\", error);\n      toast.error(error);\n    }\n  }, [error]);\n\n  // Icon: show loader when processing, otherwise mic\n  const icon = isProcessing ? SimpleLoader : SvgMicrophone;\n\n  // Disable when processing or TTS is playing (don't want to pick up TTS audio)\n  const isDisabled =\n    disabled ||\n    isProcessing ||\n    isTTSPlaying ||\n    isTTSLoading ||\n    isAwaitingAutoPlaybackStart;\n\n  // Recording = darkened (primary), not recording = light (tertiary)\n  const prominence = isRecording ? \"primary\" : \"tertiary\";\n\n  return (\n    <Button\n      disabled={isDisabled}\n      icon={icon}\n      onClick={handleClick}\n      aria-label={isRecording ? \"Stop recording\" : \"Start recording\"}\n      prominence={prominence}\n    />\n  );\n}\n\nexport default MicrophoneButton;\n"
  },
  {
    "path": "web/src/sections/input/SharedAppInputBar.tsx",
    "content": "\"use client\";\n\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button, OpenButton, SelectButton } from \"@opal/components\";\nimport { OpenAISVG } from \"@/components/icons/icons\";\nimport {\n  SvgPlusCircle,\n  SvgArrowUp,\n  SvgSliders,\n  SvgHourglass,\n  SvgEditBig,\n} from \"@opal/icons\";\n\nexport default function SharedAppInputBar() {\n  return (\n    <div className=\"relative w-full\">\n      <div className=\"w-full flex flex-col shadow-01 bg-background-neutral-00 rounded-16\">\n        {/* Textarea area */}\n        <div className=\"flex flex-row items-center w-full\">\n          <Text text03 className=\"w-full px-3 pt-3 pb-2 select-none\">\n            How can Onyx help you today\n          </Text>\n        </div>\n\n        {/* Bottom toolbar */}\n        <div className=\"flex justify-between items-center w-full p-1 min-h-[40px]\">\n          {/* Left side controls */}\n          <div className=\"flex flex-row items-center\">\n            <Button disabled icon={SvgPlusCircle} prominence=\"tertiary\" />\n            <Button disabled icon={SvgSliders} prominence=\"tertiary\" />\n            <SelectButton disabled icon={SvgHourglass} />\n          </div>\n\n          {/* Right side controls */}\n          <div className=\"flex flex-row items-center gap-1\">\n            <OpenButton disabled icon={OpenAISVG}>\n              GPT-4o\n            </OpenButton>\n            <Button disabled icon={SvgArrowUp} />\n          </div>\n        </div>\n      </div>\n\n      {/* Fade overlay */}\n      <div className=\"absolute inset-0 rounded-16 backdrop-blur-sm bg-background-neutral-00/50\" />\n\n      {/* CTA button */}\n      <div className=\"absolute inset-0 flex items-center justify-center\">\n        <Button prominence=\"secondary\" icon={SvgEditBig} href=\"/app\">\n          Start New Session\n        </Button>\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/knowledge/AgentKnowledgePane.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useState,\n  useMemo,\n  useRef,\n  memo,\n  useCallback,\n  useEffect,\n} from \"react\";\nimport * as GeneralLayouts from \"@/layouts/general-layouts\";\nimport { Content } from \"@opal/layouts\";\nimport * as TableLayouts from \"@/layouts/table-layouts\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { Card } from \"@/refresh-components/cards\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport {\n  SvgPlusCircle,\n  SvgArrowUpRight,\n  SvgFiles,\n  SvgFolder,\n} from \"@opal/icons\";\nimport type { CCPairSummary } from \"@/lib/types\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { ValidSources, DocumentSetSummary } from \"@/lib/types\";\nimport useCCPairs from \"@/hooks/useCCPairs\";\nimport { ConnectedSource } from \"@/lib/hierarchy/interfaces\";\nimport { ProjectFile } from \"@/app/app/projects/projectsService\";\nimport {\n  AttachedDocumentSnapshot,\n  HierarchyNodeSnapshot,\n} from \"@/app/admin/agents/interfaces\";\nimport { timeAgo } from \"@/lib/time\";\nimport Spacer from \"@/refresh-components/Spacer\";\nimport { Disabled } from \"@opal/core\";\nimport SourceHierarchyBrowser from \"./SourceHierarchyBrowser\";\n\n// Knowledge pane view states\ntype KnowledgeView = \"main\" | \"add\" | \"document-sets\" | \"sources\" | \"recent\";\n\n// ============================================================================\n// KNOWLEDGE SIDEBAR - Left column showing all knowledge categories\n// ============================================================================\n\ninterface KnowledgeSidebarProps {\n  activeView: KnowledgeView;\n  activeSource?: ValidSources;\n  connectedSources: ConnectedSource[];\n  selectedSources: ValidSources[];\n  selectedDocumentSetIds: number[];\n  selectedFileIds: string[];\n  sourceSelectionCounts: Map<ValidSources, number>;\n  onNavigateToRecent: () => void;\n  onNavigateToDocumentSets: () => void;\n  onNavigateToSource: (source: ValidSources) => void;\n  vectorDbEnabled: boolean;\n}\n\nfunction KnowledgeSidebar({\n  activeView,\n  activeSource,\n  connectedSources,\n  selectedSources,\n  selectedDocumentSetIds,\n  selectedFileIds,\n  sourceSelectionCounts,\n  onNavigateToRecent,\n  onNavigateToDocumentSets,\n  onNavigateToSource,\n  vectorDbEnabled,\n}: KnowledgeSidebarProps) {\n  return (\n    <TableLayouts.SidebarLayout aria-label=\"knowledge-sidebar\">\n      <LineItem\n        icon={SvgFiles}\n        onClick={onNavigateToRecent}\n        selected={activeView === \"recent\"}\n        emphasized={activeView === \"recent\" || selectedFileIds.length > 0}\n        aria-label=\"knowledge-sidebar-files\"\n        rightChildren={\n          selectedFileIds.length > 0 ? (\n            <Text mainUiAction className=\"text-action-link-05\">\n              {selectedFileIds.length}\n            </Text>\n          ) : undefined\n        }\n      >\n        Your Files\n      </LineItem>\n\n      {vectorDbEnabled && (\n        <>\n          <LineItem\n            icon={SvgFolder}\n            onClick={onNavigateToDocumentSets}\n            selected={activeView === \"document-sets\"}\n            emphasized={\n              activeView === \"document-sets\" ||\n              selectedDocumentSetIds.length > 0\n            }\n            aria-label=\"knowledge-sidebar-document-sets\"\n            rightChildren={\n              selectedDocumentSetIds.length > 0 ? (\n                <Text mainUiAction className=\"text-action-link-05\">\n                  {selectedDocumentSetIds.length}\n                </Text>\n              ) : undefined\n            }\n          >\n            Document Set\n          </LineItem>\n\n          <Separator noPadding />\n\n          {connectedSources.map((connectedSource) => {\n            const sourceMetadata = getSourceMetadata(connectedSource.source);\n            const isSelected = selectedSources.includes(connectedSource.source);\n            const isActive =\n              activeView === \"sources\" &&\n              activeSource === connectedSource.source;\n            const selectionCount =\n              sourceSelectionCounts.get(connectedSource.source) ?? 0;\n\n            return (\n              <LineItem\n                key={connectedSource.source}\n                icon={sourceMetadata.icon}\n                onClick={() => onNavigateToSource(connectedSource.source)}\n                selected={isActive}\n                emphasized={isActive || isSelected || selectionCount > 0}\n                aria-label={`knowledge-sidebar-source-${connectedSource.source}`}\n                rightChildren={\n                  selectionCount > 0 ? (\n                    <Text mainUiAction className=\"text-action-link-05\">\n                      {selectionCount}\n                    </Text>\n                  ) : undefined\n                }\n              >\n                {sourceMetadata.displayName}\n              </LineItem>\n            );\n          })}\n        </>\n      )}\n    </TableLayouts.SidebarLayout>\n  );\n}\n\n// ============================================================================\n// KNOWLEDGE TABLE - Generic table component for knowledge items\n// ============================================================================\n\ninterface KnowledgeTableColumn<T> {\n  key: string;\n  header: string;\n  sortable?: boolean;\n  width?: number; // Width in rem\n  render: (item: T) => React.ReactNode;\n}\n\ninterface KnowledgeTableProps<T> {\n  items: T[];\n  columns: KnowledgeTableColumn<T>[];\n  getItemId: (item: T) => string | number;\n  selectedIds: (string | number)[];\n  onToggleItem: (id: string | number) => void;\n  searchValue?: string;\n  onSearchChange?: (value: string) => void;\n  searchPlaceholder?: string;\n  headerActions?: React.ReactNode;\n  emptyMessage?: string;\n}\n\nfunction KnowledgeTable<T>({\n  items,\n  columns,\n  getItemId,\n  selectedIds,\n  onToggleItem,\n  searchValue,\n  onSearchChange,\n  searchPlaceholder = \"Search...\",\n  headerActions,\n  emptyMessage = \"No items available.\",\n  ariaLabelPrefix,\n}: KnowledgeTableProps<T> & { ariaLabelPrefix?: string }) {\n  return (\n    <GeneralLayouts.Section gap={0} alignItems=\"stretch\" justifyContent=\"start\">\n      {/* Header with search and actions */}\n      <GeneralLayouts.Section\n        flexDirection=\"row\"\n        justifyContent=\"start\"\n        alignItems=\"center\"\n        gap={0.5}\n        height=\"auto\"\n      >\n        {onSearchChange !== undefined && (\n          <GeneralLayouts.Section height=\"auto\">\n            <InputTypeIn\n              leftSearchIcon\n              value={searchValue ?? \"\"}\n              onChange={(e) => onSearchChange?.(e.target.value)}\n              placeholder={searchPlaceholder}\n              variant=\"internal\"\n            />\n          </GeneralLayouts.Section>\n        )}\n        {headerActions}\n      </GeneralLayouts.Section>\n\n      <Spacer rem={0.5} />\n\n      {/* Table header */}\n      <TableLayouts.TableRow>\n        <TableLayouts.CheckboxCell />\n        {columns.map((column) => (\n          <TableLayouts.TableCell\n            key={column.key}\n            flex={!column.width}\n            width={column.width}\n          >\n            <GeneralLayouts.Section\n              flexDirection=\"row\"\n              justifyContent=\"start\"\n              alignItems=\"center\"\n              gap={0.25}\n              height=\"auto\"\n            >\n              <Text secondaryBody text03>\n                {column.header}\n              </Text>\n            </GeneralLayouts.Section>\n          </TableLayouts.TableCell>\n        ))}\n      </TableLayouts.TableRow>\n\n      <Separator noPadding />\n\n      {/* Table body */}\n      {items.length === 0 ? (\n        <GeneralLayouts.Section height=\"auto\" padding={1}>\n          <Text text03 secondaryBody>\n            {emptyMessage}\n          </Text>\n        </GeneralLayouts.Section>\n      ) : (\n        <GeneralLayouts.Section gap={0} alignItems=\"stretch\" height=\"auto\">\n          {items.map((item) => {\n            const id = getItemId(item);\n            const isSelected = selectedIds.includes(id);\n\n            return (\n              <TableLayouts.TableRow\n                key={String(id)}\n                selected={isSelected}\n                onClick={() => onToggleItem(id)}\n                aria-label={\n                  ariaLabelPrefix ? `${ariaLabelPrefix}-${id}` : undefined\n                }\n              >\n                <TableLayouts.CheckboxCell>\n                  <Checkbox\n                    checked={isSelected}\n                    onCheckedChange={() => onToggleItem(id)}\n                  />\n                </TableLayouts.CheckboxCell>\n                {columns.map((column) => (\n                  <TableLayouts.TableCell\n                    key={column.key}\n                    flex={!column.width}\n                    width={column.width}\n                  >\n                    {column.render(item)}\n                  </TableLayouts.TableCell>\n                ))}\n              </TableLayouts.TableRow>\n            );\n          })}\n        </GeneralLayouts.Section>\n      )}\n    </GeneralLayouts.Section>\n  );\n}\n\n// ============================================================================\n// DOCUMENT SETS TABLE - Table content for document sets view\n// ============================================================================\n\ninterface DocumentSetsTableContentProps {\n  documentSets: DocumentSetSummary[];\n  selectedDocumentSetIds: number[];\n  onDocumentSetToggle: (documentSetId: number) => void;\n}\n\nfunction DocumentSetsTableContent({\n  documentSets,\n  selectedDocumentSetIds,\n  onDocumentSetToggle,\n}: DocumentSetsTableContentProps) {\n  const [searchValue, setSearchValue] = useState(\"\");\n\n  const filteredDocumentSets = useMemo(() => {\n    if (!searchValue) return documentSets;\n    const lower = searchValue.toLowerCase();\n    return documentSets.filter((ds) => ds.name.toLowerCase().includes(lower));\n  }, [documentSets, searchValue]);\n\n  const columns: KnowledgeTableColumn<DocumentSetSummary>[] = [\n    {\n      key: \"name\",\n      header: \"Name\",\n      sortable: true,\n      render: (ds) => (\n        <Content\n          icon={SvgFolder}\n          title={ds.name}\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n        />\n      ),\n    },\n    {\n      key: \"sources\",\n      header: \"Sources\",\n      width: 8,\n      render: (ds) => (\n        <TableLayouts.SourceIconsRow>\n          {ds.cc_pair_summaries\n            ?.slice(0, 4)\n            .map((summary: CCPairSummary, idx: number) => {\n              const sourceMetadata = getSourceMetadata(summary.source);\n              return <sourceMetadata.icon key={idx} size={16} />;\n            })}\n          {(ds.cc_pair_summaries?.length ?? 0) > 4 && (\n            <Text text03 secondaryBody>\n              +{(ds.cc_pair_summaries?.length ?? 0) - 4}\n            </Text>\n          )}\n        </TableLayouts.SourceIconsRow>\n      ),\n    },\n  ];\n\n  return (\n    <KnowledgeTable\n      items={filteredDocumentSets}\n      columns={columns}\n      getItemId={(ds) => ds.id}\n      selectedIds={selectedDocumentSetIds}\n      onToggleItem={(id) => onDocumentSetToggle(id as number)}\n      searchValue={searchValue}\n      onSearchChange={setSearchValue}\n      searchPlaceholder=\"Search document sets...\"\n      emptyMessage=\"No document sets available.\"\n      ariaLabelPrefix=\"document-set-row\"\n    />\n  );\n}\n\ninterface SourcesTableContentProps {\n  source: ValidSources;\n  selectedDocumentIds: string[];\n  onToggleDocument: (documentId: string) => void;\n  onSetDocumentIds: (ids: string[]) => void;\n  selectedFolderIds: number[];\n  onToggleFolder: (folderId: number) => void;\n  onSetFolderIds: (ids: number[]) => void;\n  onDeselectAllDocuments: () => void;\n  onDeselectAllFolders: () => void;\n  initialAttachedDocuments?: AttachedDocumentSnapshot[];\n  onSelectionCountChange?: (source: ValidSources, count: number) => void;\n}\n\nfunction SourcesTableContent({\n  source,\n  selectedDocumentIds,\n  onToggleDocument,\n  onSetDocumentIds,\n  selectedFolderIds,\n  onToggleFolder,\n  onSetFolderIds,\n  onDeselectAllDocuments,\n  onDeselectAllFolders,\n  initialAttachedDocuments,\n  onSelectionCountChange,\n}: SourcesTableContentProps) {\n  return (\n    <GeneralLayouts.Section gap={0.5} alignItems=\"stretch\">\n      {/* Hierarchy browser */}\n      <SourceHierarchyBrowser\n        source={source}\n        selectedDocumentIds={selectedDocumentIds}\n        onToggleDocument={onToggleDocument}\n        onSetDocumentIds={onSetDocumentIds}\n        selectedFolderIds={selectedFolderIds}\n        onToggleFolder={onToggleFolder}\n        onSetFolderIds={onSetFolderIds}\n        initialAttachedDocuments={initialAttachedDocuments}\n        onDeselectAllDocuments={onDeselectAllDocuments}\n        onDeselectAllFolders={onDeselectAllFolders}\n        onSelectionCountChange={onSelectionCountChange}\n      />\n    </GeneralLayouts.Section>\n  );\n}\n\n// ============================================================================\n// RECENT FILES TABLE - Table content for user files view\n// ============================================================================\n\ninterface RecentFilesTableContentProps {\n  allRecentFiles: ProjectFile[];\n  selectedFileIds: string[];\n  onToggleFile: (fileId: string) => void;\n  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  hasProcessingFiles: boolean;\n}\n\nfunction RecentFilesTableContent({\n  allRecentFiles,\n  selectedFileIds,\n  onToggleFile,\n  onUploadChange,\n  hasProcessingFiles,\n}: RecentFilesTableContentProps) {\n  const [searchValue, setSearchValue] = useState(\"\");\n\n  const filteredFiles = useMemo(() => {\n    if (!searchValue) return allRecentFiles;\n    const lower = searchValue.toLowerCase();\n    return allRecentFiles.filter((f) => f.name.toLowerCase().includes(lower));\n  }, [allRecentFiles, searchValue]);\n\n  const columns: KnowledgeTableColumn<ProjectFile>[] = [\n    {\n      key: \"name\",\n      header: \"Name\",\n      sortable: true,\n      render: (file) => (\n        <Content\n          icon={SvgFiles}\n          title={file.name}\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n        />\n      ),\n    },\n    {\n      key: \"lastUpdated\",\n      header: \"Last Updated\",\n      sortable: true,\n      width: 8,\n      render: (file) => (\n        <Text text03 secondaryBody>\n          {timeAgo(file.last_accessed_at || file.created_at)}\n        </Text>\n      ),\n    },\n  ];\n\n  const fileInputRef = React.useRef<HTMLInputElement>(null);\n\n  return (\n    <GeneralLayouts.Section gap={0.5} alignItems=\"stretch\">\n      <TableLayouts.HiddenInput\n        inputRef={fileInputRef}\n        type=\"file\"\n        multiple\n        onChange={onUploadChange}\n      />\n\n      <KnowledgeTable\n        items={filteredFiles}\n        columns={columns}\n        getItemId={(file) => file.id}\n        selectedIds={selectedFileIds}\n        onToggleItem={(id) => onToggleFile(id as string)}\n        searchValue={searchValue}\n        onSearchChange={setSearchValue}\n        searchPlaceholder=\"Search files...\"\n        ariaLabelPrefix=\"user-file-row\"\n        headerActions={\n          <Button\n            prominence=\"internal\"\n            icon={SvgPlusCircle}\n            onClick={() => fileInputRef.current?.click()}\n          >\n            Add File\n          </Button>\n        }\n        emptyMessage=\"No files available. Upload files to get started.\"\n      />\n\n      {hasProcessingFiles && (\n        <GeneralLayouts.Section height=\"auto\" alignItems=\"start\">\n          <Text as=\"p\" text03 secondaryBody>\n            Onyx is still processing your uploaded files. You can create the\n            agent now, but it will not have access to all files until processing\n            completes.\n          </Text>\n        </GeneralLayouts.Section>\n      )}\n    </GeneralLayouts.Section>\n  );\n}\n\n// ============================================================================\n// TWO-COLUMN LAYOUT - Sidebar + Table for detailed views\n// ============================================================================\n\ninterface KnowledgeTwoColumnViewProps {\n  activeView: KnowledgeView;\n  activeSource?: ValidSources;\n  connectedSources: ConnectedSource[];\n  selectedSources: ValidSources[];\n  selectedDocumentSetIds: number[];\n  selectedFileIds: string[];\n  selectedDocumentIds: string[];\n  selectedFolderIds: number[];\n  sourceSelectionCounts: Map<ValidSources, number>;\n  documentSets: DocumentSetSummary[];\n  allRecentFiles: ProjectFile[];\n  onNavigateToRecent: () => void;\n  onNavigateToDocumentSets: () => void;\n  onNavigateToSource: (source: ValidSources) => void;\n  onDocumentSetToggle: (id: number) => void;\n  onSourceToggle: (source: ValidSources) => void;\n  onFileToggle: (fileId: string) => void;\n  onToggleDocument: (documentId: string) => void;\n  onToggleFolder: (folderId: number) => void;\n  onSetDocumentIds: (ids: string[]) => void;\n  onSetFolderIds: (ids: number[]) => void;\n  onDeselectAllDocuments: () => void;\n  onDeselectAllFolders: () => void;\n  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  hasProcessingFiles: boolean;\n  initialAttachedDocuments?: AttachedDocumentSnapshot[];\n  onSelectionCountChange: (source: ValidSources, count: number) => void;\n  vectorDbEnabled: boolean;\n}\n\nconst KnowledgeTwoColumnView = memo(function KnowledgeTwoColumnView({\n  activeView,\n  activeSource,\n  connectedSources,\n  selectedSources,\n  selectedDocumentSetIds,\n  selectedFileIds,\n  selectedDocumentIds,\n  selectedFolderIds,\n  sourceSelectionCounts,\n  documentSets,\n  allRecentFiles,\n  onNavigateToRecent,\n  onNavigateToDocumentSets,\n  onNavigateToSource,\n  onDocumentSetToggle,\n  onSourceToggle,\n  onFileToggle,\n  onToggleDocument,\n  onToggleFolder,\n  onSetDocumentIds,\n  onSetFolderIds,\n  onDeselectAllDocuments,\n  onDeselectAllFolders,\n  onUploadChange,\n  hasProcessingFiles,\n  initialAttachedDocuments,\n  onSelectionCountChange,\n  vectorDbEnabled,\n}: KnowledgeTwoColumnViewProps) {\n  return (\n    <TableLayouts.TwoColumnLayout minHeight={18.75}>\n      <KnowledgeSidebar\n        activeView={activeView}\n        activeSource={activeSource}\n        connectedSources={connectedSources}\n        selectedSources={selectedSources}\n        selectedDocumentSetIds={selectedDocumentSetIds}\n        selectedFileIds={selectedFileIds}\n        sourceSelectionCounts={sourceSelectionCounts}\n        onNavigateToRecent={onNavigateToRecent}\n        onNavigateToDocumentSets={onNavigateToDocumentSets}\n        onNavigateToSource={onNavigateToSource}\n        vectorDbEnabled={vectorDbEnabled}\n      />\n\n      <TableLayouts.ContentColumn>\n        {activeView === \"document-sets\" && (\n          <DocumentSetsTableContent\n            documentSets={documentSets}\n            selectedDocumentSetIds={selectedDocumentSetIds}\n            onDocumentSetToggle={onDocumentSetToggle}\n          />\n        )}\n        {activeView === \"sources\" && activeSource && (\n          <SourcesTableContent\n            source={activeSource}\n            selectedDocumentIds={selectedDocumentIds}\n            onToggleDocument={onToggleDocument}\n            onSetDocumentIds={onSetDocumentIds}\n            selectedFolderIds={selectedFolderIds}\n            onToggleFolder={onToggleFolder}\n            onSetFolderIds={onSetFolderIds}\n            onDeselectAllDocuments={onDeselectAllDocuments}\n            onDeselectAllFolders={onDeselectAllFolders}\n            initialAttachedDocuments={initialAttachedDocuments}\n            onSelectionCountChange={onSelectionCountChange}\n          />\n        )}\n        {activeView === \"recent\" && (\n          <RecentFilesTableContent\n            allRecentFiles={allRecentFiles}\n            selectedFileIds={selectedFileIds}\n            onToggleFile={onFileToggle}\n            onUploadChange={onUploadChange}\n            hasProcessingFiles={hasProcessingFiles}\n          />\n        )}\n      </TableLayouts.ContentColumn>\n    </TableLayouts.TwoColumnLayout>\n  );\n});\n\n// ============================================================================\n// KNOWLEDGE ADD VIEW - Initial pill selection view\n// ============================================================================\n\ninterface KnowledgeAddViewProps {\n  connectedSources: ConnectedSource[];\n  onNavigateToDocumentSets: () => void;\n  onNavigateToRecent: () => void;\n  onNavigateToSource: (source: ValidSources) => void;\n  selectedDocumentSetIds: number[];\n  selectedFileIds: string[];\n  selectedSources: ValidSources[];\n  sourceSelectionCounts: Map<ValidSources, number>;\n  vectorDbEnabled: boolean;\n}\n\nconst KnowledgeAddView = memo(function KnowledgeAddView({\n  connectedSources,\n  onNavigateToDocumentSets,\n  onNavigateToRecent,\n  onNavigateToSource,\n  selectedDocumentSetIds,\n  selectedFileIds,\n  selectedSources,\n  sourceSelectionCounts,\n  vectorDbEnabled,\n}: KnowledgeAddViewProps) {\n  return (\n    <GeneralLayouts.Section\n      gap={0.5}\n      alignItems=\"start\"\n      height=\"auto\"\n      aria-label=\"knowledge-add-view\"\n    >\n      <GeneralLayouts.Section\n        flexDirection=\"row\"\n        justifyContent=\"start\"\n        gap={0.5}\n        height=\"auto\"\n        wrap\n      >\n        {vectorDbEnabled && (\n          <LineItem\n            icon={SvgFolder}\n            onClick={onNavigateToDocumentSets}\n            emphasized={selectedDocumentSetIds.length > 0}\n            aria-label=\"knowledge-add-document-sets\"\n            rightChildren={\n              selectedDocumentSetIds.length > 0 ? (\n                <Text mainUiAction className=\"text-action-link-05\">\n                  {selectedDocumentSetIds.length}\n                </Text>\n              ) : undefined\n            }\n          >\n            Document Sets\n          </LineItem>\n        )}\n\n        <LineItem\n          icon={SvgFiles}\n          description=\"Recent or new uploads\"\n          onClick={onNavigateToRecent}\n          emphasized={selectedFileIds.length > 0}\n          aria-label=\"knowledge-add-files\"\n          rightChildren={\n            selectedFileIds.length > 0 ? (\n              <Text mainUiAction className=\"text-action-link-05\">\n                {selectedFileIds.length}\n              </Text>\n            ) : undefined\n          }\n        >\n          Your Files\n        </LineItem>\n      </GeneralLayouts.Section>\n\n      {vectorDbEnabled && connectedSources.length > 0 && (\n        <>\n          <Text as=\"p\" text03 secondaryBody>\n            Connected Sources\n          </Text>\n          {connectedSources.map((connectedSource) => {\n            const sourceMetadata = getSourceMetadata(connectedSource.source);\n            const isSelected = selectedSources.includes(connectedSource.source);\n            const selectionCount =\n              sourceSelectionCounts.get(connectedSource.source) ?? 0;\n            return (\n              <LineItem\n                key={connectedSource.source}\n                icon={sourceMetadata.icon}\n                onClick={() => onNavigateToSource(connectedSource.source)}\n                emphasized={isSelected || selectionCount > 0}\n                aria-label={`knowledge-add-source-${connectedSource.source}`}\n                rightChildren={\n                  selectionCount > 0 ? (\n                    <Text mainUiAction className=\"text-action-link-05\">\n                      {selectionCount}\n                    </Text>\n                  ) : undefined\n                }\n              >\n                {sourceMetadata.displayName}\n              </LineItem>\n            );\n          })}\n        </>\n      )}\n    </GeneralLayouts.Section>\n  );\n});\n\n// ============================================================================\n// KNOWLEDGE MAIN CONTENT - Empty state and preview\n// ============================================================================\n\ninterface KnowledgeMainContentProps {\n  hasAnyKnowledge: boolean;\n  selectedDocumentSetIds: number[];\n  selectedDocumentIds: string[];\n  selectedFolderIds: number[];\n  selectedFileIds: string[];\n  selectedSources: ValidSources[];\n  documentSets: DocumentSetSummary[];\n  allRecentFiles: ProjectFile[];\n  connectedSources: ConnectedSource[];\n  onAddKnowledge: () => void;\n  onViewEdit: () => void;\n  onFileClick?: (file: ProjectFile) => void;\n}\n\nconst KnowledgeMainContent = memo(function KnowledgeMainContent({\n  hasAnyKnowledge,\n  selectedDocumentSetIds,\n  selectedDocumentIds,\n  selectedFolderIds,\n  selectedFileIds,\n  selectedSources,\n  documentSets,\n  allRecentFiles,\n  connectedSources,\n  onAddKnowledge,\n  onViewEdit,\n  onFileClick,\n}: KnowledgeMainContentProps) {\n  if (!hasAnyKnowledge) {\n    return (\n      <GeneralLayouts.Section\n        flexDirection=\"row\"\n        justifyContent=\"between\"\n        alignItems=\"center\"\n        height=\"auto\"\n      >\n        <Text text03 secondaryBody>\n          Add documents or connected sources to use for this agent.\n        </Text>\n        <Button\n          icon={SvgPlusCircle}\n          onClick={onAddKnowledge}\n          prominence=\"tertiary\"\n          aria-label=\"knowledge-add-button\"\n        />\n      </GeneralLayouts.Section>\n    );\n  }\n\n  // Has knowledge - show preview with count\n  const totalSelected =\n    selectedDocumentSetIds.length +\n    selectedDocumentIds.length +\n    selectedFolderIds.length +\n    selectedFileIds.length +\n    selectedSources.length;\n\n  return (\n    <GeneralLayouts.Section\n      flexDirection=\"row\"\n      justifyContent=\"between\"\n      alignItems=\"center\"\n      height=\"auto\"\n    >\n      <Text as=\"p\" text03 secondaryBody>\n        {totalSelected} knowledge source{totalSelected !== 1 ? \"s\" : \"\"}{\" \"}\n        selected\n      </Text>\n      <Button\n        prominence=\"internal\"\n        icon={SvgArrowUpRight}\n        onClick={onViewEdit}\n        aria-label=\"knowledge-view-edit\"\n      >\n        View / Edit\n      </Button>\n    </GeneralLayouts.Section>\n  );\n});\n\n// ============================================================================\n// MAIN COMPONENT - AgentKnowledgePane\n// ============================================================================\n\ninterface AgentKnowledgePaneProps {\n  enableKnowledge: boolean;\n  onEnableKnowledgeChange: (enabled: boolean) => void;\n  selectedSources: ValidSources[];\n  onSourcesChange: (sources: ValidSources[]) => void;\n  documentSets: DocumentSetSummary[];\n  selectedDocumentSetIds: number[];\n  onDocumentSetIdsChange: (ids: number[]) => void;\n  selectedDocumentIds: string[];\n  onDocumentIdsChange: (ids: string[]) => void;\n  selectedFolderIds: number[];\n  onFolderIdsChange: (ids: number[]) => void;\n  selectedFileIds: string[];\n  onFileIdsChange: (ids: string[]) => void;\n  allRecentFiles: ProjectFile[];\n  onFileClick?: (file: ProjectFile) => void;\n  onUploadChange: (e: React.ChangeEvent<HTMLInputElement>) => void;\n  hasProcessingFiles: boolean;\n  // Initial attached documents for existing agents (to populate selectedDocumentDetails)\n  initialAttachedDocuments?: AttachedDocumentSnapshot[];\n  // Initial hierarchy nodes for existing agents (to calculate per-source counts)\n  initialHierarchyNodes?: HierarchyNodeSnapshot[];\n  // When false, hides document sets, connected sources, and hierarchy nodes\n  // (these require a vector DB). User files are still shown.\n  vectorDbEnabled?: boolean;\n}\n\nexport default function AgentKnowledgePane({\n  enableKnowledge,\n  onEnableKnowledgeChange,\n  selectedSources,\n  onSourcesChange,\n  documentSets,\n  selectedDocumentSetIds,\n  onDocumentSetIdsChange,\n  selectedDocumentIds,\n  onDocumentIdsChange,\n  selectedFolderIds,\n  onFolderIdsChange,\n  selectedFileIds,\n  onFileIdsChange,\n  allRecentFiles,\n  onFileClick,\n  onUploadChange,\n  hasProcessingFiles,\n  initialAttachedDocuments,\n  initialHierarchyNodes,\n  vectorDbEnabled = true,\n}: AgentKnowledgePaneProps) {\n  // View state\n  const [view, setView] = useState<KnowledgeView>(\"main\");\n  const [activeSource, setActiveSource] = useState<ValidSources | undefined>();\n\n  // Reset view to main when knowledge is disabled\n  useEffect(() => {\n    if (!enableKnowledge) {\n      setView(\"main\");\n    }\n  }, [enableKnowledge]);\n\n  // Get connected sources from CC pairs\n  const { ccPairs } = useCCPairs(vectorDbEnabled);\n  const connectedSources: ConnectedSource[] = useMemo(() => {\n    if (!ccPairs || ccPairs.length === 0) return [];\n    const sourceSet = new Set<ValidSources>();\n    ccPairs.forEach((pair) => sourceSet.add(pair.source));\n    return Array.from(sourceSet).map((source) => ({\n      source,\n      connectorCount: ccPairs.filter((p) => p.source === source).length,\n    }));\n  }, [ccPairs]);\n\n  // Track per-source selection counts\n  // Initialized from initialHierarchyNodes and initialAttachedDocuments\n  const [sourceSelectionCounts, setSourceSelectionCounts] = useState<\n    Map<ValidSources, number>\n  >(() => {\n    const counts = new Map<ValidSources, number>();\n\n    // Count folders from initialHierarchyNodes (which have source info)\n    if (initialHierarchyNodes) {\n      for (const node of initialHierarchyNodes) {\n        const current = counts.get(node.source) ?? 0;\n        counts.set(node.source, current + 1);\n      }\n    }\n\n    // Count documents from initialAttachedDocuments (which now include source)\n    if (initialAttachedDocuments) {\n      for (const doc of initialAttachedDocuments) {\n        if (doc.source) {\n          const current = counts.get(doc.source) ?? 0;\n          counts.set(doc.source, current + 1);\n        }\n      }\n    }\n\n    return counts;\n  });\n\n  // Handler for selection count changes from SourceHierarchyBrowser\n  const handleSelectionCountChange = useCallback(\n    (source: ValidSources, count: number) => {\n      setSourceSelectionCounts((prev) => {\n        const newCounts = new Map(prev);\n        if (count === 0) {\n          newCounts.delete(source);\n        } else {\n          newCounts.set(source, count);\n        }\n        return newCounts;\n      });\n    },\n    []\n  );\n\n  // Check if any knowledge is selected\n  const hasAnyKnowledge =\n    selectedDocumentSetIds.length > 0 ||\n    selectedDocumentIds.length > 0 ||\n    selectedFolderIds.length > 0 ||\n    selectedFileIds.length > 0 ||\n    selectedSources.length > 0;\n\n  // Navigation handlers - memoized to prevent unnecessary re-renders\n  const handleNavigateToAdd = useCallback(() => setView(\"add\"), []);\n  const handleNavigateToMain = useCallback(() => setView(\"main\"), []);\n  const handleNavigateToDocumentSets = useCallback(\n    () => setView(\"document-sets\"),\n    []\n  );\n  const handleNavigateToRecent = useCallback(() => setView(\"recent\"), []);\n  const handleNavigateToSource = useCallback((source: ValidSources) => {\n    setActiveSource(source);\n    setView(\"sources\");\n  }, []);\n\n  // Toggle handlers - memoized to prevent unnecessary re-renders\n  const handleDocumentSetToggle = useCallback(\n    (documentSetId: number) => {\n      const newIds = selectedDocumentSetIds.includes(documentSetId)\n        ? selectedDocumentSetIds.filter((id) => id !== documentSetId)\n        : [...selectedDocumentSetIds, documentSetId];\n      onDocumentSetIdsChange(newIds);\n    },\n    [selectedDocumentSetIds, onDocumentSetIdsChange]\n  );\n\n  const handleSourceToggle = useCallback(\n    (source: ValidSources) => {\n      const newSources = selectedSources.includes(source)\n        ? selectedSources.filter((s) => s !== source)\n        : [...selectedSources, source];\n      onSourcesChange(newSources);\n    },\n    [selectedSources, onSourcesChange]\n  );\n\n  const handleFileToggle = useCallback(\n    (fileId: string) => {\n      const newIds = selectedFileIds.includes(fileId)\n        ? selectedFileIds.filter((id) => id !== fileId)\n        : [...selectedFileIds, fileId];\n      onFileIdsChange(newIds);\n    },\n    [selectedFileIds, onFileIdsChange]\n  );\n\n  const handleDocumentToggle = useCallback(\n    (documentId: string) => {\n      const newIds = selectedDocumentIds.includes(documentId)\n        ? selectedDocumentIds.filter((id) => id !== documentId)\n        : [...selectedDocumentIds, documentId];\n      onDocumentIdsChange(newIds);\n    },\n    [selectedDocumentIds, onDocumentIdsChange]\n  );\n\n  const handleFolderToggle = useCallback(\n    (folderId: number) => {\n      const newIds = selectedFolderIds.includes(folderId)\n        ? selectedFolderIds.filter((id) => id !== folderId)\n        : [...selectedFolderIds, folderId];\n      onFolderIdsChange(newIds);\n    },\n    [selectedFolderIds, onFolderIdsChange]\n  );\n\n  const handleDeselectAllDocuments = useCallback(() => {\n    onDocumentIdsChange([]);\n  }, [onDocumentIdsChange]);\n\n  const handleDeselectAllFolders = useCallback(() => {\n    onFolderIdsChange([]);\n  }, [onFolderIdsChange]);\n\n  // Memoized content based on view - prevents unnecessary re-renders\n  const renderedContent = useMemo(() => {\n    switch (view) {\n      case \"main\":\n        return (\n          <KnowledgeMainContent\n            hasAnyKnowledge={hasAnyKnowledge}\n            selectedDocumentSetIds={selectedDocumentSetIds}\n            selectedDocumentIds={selectedDocumentIds}\n            selectedFolderIds={selectedFolderIds}\n            selectedFileIds={selectedFileIds}\n            selectedSources={selectedSources}\n            documentSets={documentSets}\n            allRecentFiles={allRecentFiles}\n            connectedSources={connectedSources}\n            onAddKnowledge={handleNavigateToAdd}\n            onViewEdit={handleNavigateToAdd}\n            onFileClick={onFileClick}\n          />\n        );\n\n      case \"add\":\n        return (\n          <KnowledgeAddView\n            connectedSources={connectedSources}\n            onNavigateToDocumentSets={handleNavigateToDocumentSets}\n            onNavigateToRecent={handleNavigateToRecent}\n            onNavigateToSource={handleNavigateToSource}\n            selectedDocumentSetIds={selectedDocumentSetIds}\n            selectedFileIds={selectedFileIds}\n            selectedSources={selectedSources}\n            sourceSelectionCounts={sourceSelectionCounts}\n            vectorDbEnabled={vectorDbEnabled}\n          />\n        );\n\n      case \"document-sets\":\n      case \"sources\":\n      case \"recent\":\n        return (\n          <KnowledgeTwoColumnView\n            activeView={view}\n            activeSource={activeSource}\n            connectedSources={connectedSources}\n            selectedSources={selectedSources}\n            selectedDocumentSetIds={selectedDocumentSetIds}\n            selectedFileIds={selectedFileIds}\n            selectedDocumentIds={selectedDocumentIds}\n            selectedFolderIds={selectedFolderIds}\n            sourceSelectionCounts={sourceSelectionCounts}\n            documentSets={documentSets}\n            allRecentFiles={allRecentFiles}\n            onNavigateToRecent={handleNavigateToRecent}\n            onNavigateToDocumentSets={handleNavigateToDocumentSets}\n            onNavigateToSource={handleNavigateToSource}\n            onDocumentSetToggle={handleDocumentSetToggle}\n            onSourceToggle={handleSourceToggle}\n            onFileToggle={handleFileToggle}\n            onToggleDocument={handleDocumentToggle}\n            onToggleFolder={handleFolderToggle}\n            onSetDocumentIds={onDocumentIdsChange}\n            onSetFolderIds={onFolderIdsChange}\n            onDeselectAllDocuments={handleDeselectAllDocuments}\n            onDeselectAllFolders={handleDeselectAllFolders}\n            onUploadChange={onUploadChange}\n            hasProcessingFiles={hasProcessingFiles}\n            initialAttachedDocuments={initialAttachedDocuments}\n            onSelectionCountChange={handleSelectionCountChange}\n            vectorDbEnabled={vectorDbEnabled}\n          />\n        );\n\n      default:\n        return null;\n    }\n  }, [\n    view,\n    activeSource,\n    hasAnyKnowledge,\n    selectedDocumentSetIds,\n    selectedDocumentIds,\n    selectedFolderIds,\n    selectedFileIds,\n    selectedSources,\n    sourceSelectionCounts,\n    documentSets,\n    allRecentFiles,\n    connectedSources,\n    hasProcessingFiles,\n    initialAttachedDocuments,\n    vectorDbEnabled,\n    onFileClick,\n    onUploadChange,\n    onDocumentIdsChange,\n    onFolderIdsChange,\n    handleNavigateToAdd,\n    handleNavigateToDocumentSets,\n    handleNavigateToRecent,\n    handleNavigateToSource,\n    handleDocumentSetToggle,\n    handleSourceToggle,\n    handleFileToggle,\n    handleDocumentToggle,\n    handleFolderToggle,\n    handleDeselectAllDocuments,\n    handleDeselectAllFolders,\n    handleSelectionCountChange,\n  ]);\n\n  return (\n    <GeneralLayouts.Section gap={0.5} alignItems=\"stretch\" height=\"auto\">\n      <Content\n        title=\"Knowledge\"\n        description=\"Add specific connectors and documents for this agent to use to inform its responses.\"\n        sizePreset=\"main-content\"\n        variant=\"section\"\n      />\n\n      <Card>\n        <GeneralLayouts.Section gap={0.5} alignItems=\"stretch\" height=\"auto\">\n          <InputLayouts.Horizontal\n            title=\"Use Knowledge\"\n            description=\"Let this agent reference these documents to inform its responses.\"\n          >\n            <Switch\n              name=\"enable_knowledge\"\n              checked={enableKnowledge}\n              onCheckedChange={onEnableKnowledgeChange}\n            />\n          </InputLayouts.Horizontal>\n\n          <Disabled disabled={!enableKnowledge}>\n            <GeneralLayouts.Section alignItems=\"stretch\" height=\"auto\">\n              {renderedContent}\n            </GeneralLayouts.Section>\n          </Disabled>\n        </GeneralLayouts.Section>\n      </Card>\n    </GeneralLayouts.Section>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/knowledge/SourceHierarchyBrowser.tsx",
    "content": "\"use client\";\n\nimport React, {\n  useState,\n  useMemo,\n  useEffect,\n  useCallback,\n  useRef,\n} from \"react\";\nimport * as GeneralLayouts from \"@/layouts/general-layouts\";\nimport * as TableLayouts from \"@/layouts/table-layouts\";\nimport { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport Separator from \"@/refresh-components/Separator\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Popover from \"@/refresh-components/Popover\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport SelectButton from \"@/refresh-components/buttons/SelectButton\";\nimport Divider from \"@/refresh-components/Divider\";\nimport {\n  SvgFolder,\n  SvgChevronRight,\n  SvgFileText,\n  SvgEye,\n  SvgXCircle,\n  SvgCheck,\n  SvgArrowUpDown,\n} from \"@opal/icons\";\nimport { getSourceMetadata } from \"@/lib/sources\";\nimport { ValidSources } from \"@/lib/types\";\nimport {\n  HierarchyNodeSummary,\n  DocumentSummary,\n  DocumentPageCursor,\n  HierarchyItem,\n  HierarchyBreadcrumbProps,\n  DocumentSortField,\n  DocumentSortDirection,\n  FolderPosition,\n} from \"@/lib/hierarchy/interfaces\";\nimport {\n  fetchHierarchyNodes,\n  fetchHierarchyNodeDocuments,\n} from \"@/lib/hierarchy/svc\";\nimport { AttachedDocumentSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { timeAgo } from \"@/lib/time\";\nimport Spacer from \"@/refresh-components/Spacer\";\n\n// ============================================================================\n// HIERARCHY BREADCRUMB - Navigation path for folder hierarchy\n// ============================================================================\n\nfunction HierarchyBreadcrumb({\n  source,\n  path,\n  onNavigateToRoot,\n  onNavigateToNode,\n}: HierarchyBreadcrumbProps) {\n  const sourceMetadata = getSourceMetadata(source);\n  const MAX_VISIBLE_SEGMENTS = 3;\n\n  // Determine which segments to show\n  const shouldCollapse = path.length > MAX_VISIBLE_SEGMENTS;\n  const visiblePath = shouldCollapse\n    ? path.slice(path.length - MAX_VISIBLE_SEGMENTS + 1)\n    : path;\n  const collapsedCount = shouldCollapse\n    ? path.length - MAX_VISIBLE_SEGMENTS + 1\n    : 0;\n\n  return (\n    <GeneralLayouts.Section\n      flexDirection=\"row\"\n      justifyContent=\"start\"\n      alignItems=\"center\"\n      gap={0.25}\n      height=\"auto\"\n    >\n      {/* Root source link */}\n      {path.length > 0 ? (\n        <Button prominence=\"tertiary\" onClick={onNavigateToRoot}>\n          {sourceMetadata.displayName}\n        </Button>\n      ) : (\n        <Text text03>{sourceMetadata.displayName}</Text>\n      )}\n\n      {/* Collapsed indicator */}\n      {shouldCollapse && (\n        <>\n          <SvgChevronRight size={12} className=\"stroke-text-04\" />\n          <Text text03 secondaryBody>\n            ...\n          </Text>\n        </>\n      )}\n\n      {/* Visible path segments */}\n      {visiblePath.map((node, visibleIndex) => {\n        const actualIndex = shouldCollapse\n          ? collapsedCount + visibleIndex\n          : visibleIndex;\n        const isLast = actualIndex === path.length - 1;\n\n        return (\n          <React.Fragment key={node.id}>\n            <SvgChevronRight size={12} className=\"stroke-text-04\" />\n            {isLast ? (\n              <Text text03>{node.title}</Text>\n            ) : (\n              <Button\n                prominence=\"tertiary\"\n                onClick={() => onNavigateToNode(node, actualIndex)}\n              >\n                {node.title}\n              </Button>\n            )}\n          </React.Fragment>\n        );\n      })}\n    </GeneralLayouts.Section>\n  );\n}\n\n// ============================================================================\n// SOURCE HIERARCHY BROWSER - Browsable folder/document hierarchy for a source\n// ============================================================================\n\nexport interface SourceHierarchyBrowserProps {\n  source: ValidSources;\n  selectedDocumentIds: string[];\n  onToggleDocument: (documentId: string) => void;\n  onSetDocumentIds: (ids: string[]) => void;\n  selectedFolderIds: number[];\n  onToggleFolder: (folderId: number) => void;\n  onSetFolderIds: (ids: number[]) => void;\n  onDeselectAllDocuments: () => void;\n  onDeselectAllFolders: () => void;\n  initialAttachedDocuments?: AttachedDocumentSnapshot[];\n  // Callback to report selection count changes for this source\n  onSelectionCountChange?: (source: ValidSources, count: number) => void;\n}\n\nexport default function SourceHierarchyBrowser({\n  source,\n  selectedDocumentIds,\n  onToggleDocument,\n  onSetDocumentIds,\n  selectedFolderIds,\n  onToggleFolder,\n  onSetFolderIds,\n  onDeselectAllDocuments,\n  onDeselectAllFolders,\n  initialAttachedDocuments,\n  onSelectionCountChange,\n}: SourceHierarchyBrowserProps) {\n  // State for hierarchy nodes (loaded once per source)\n  const [allNodes, setAllNodes] = useState<HierarchyNodeSummary[]>([]);\n  const [isLoadingNodes, setIsLoadingNodes] = useState(false);\n  const [nodesError, setNodesError] = useState<string | null>(null);\n\n  // State for current navigation path\n  const [path, setPath] = useState<HierarchyNodeSummary[]>([]);\n\n  // State for documents (paginated)\n  const [documents, setDocuments] = useState<DocumentSummary[]>([]);\n  const [nextCursor, setNextCursor] = useState<DocumentPageCursor | null>(null);\n  const [isLoadingDocuments, setIsLoadingDocuments] = useState(false);\n  const [hasMoreDocuments, setHasMoreDocuments] = useState(true);\n\n  // Search state\n  const [searchValue, setSearchValue] = useState(\"\");\n\n  // Sort state\n  const [sortField, setSortField] = useState<DocumentSortField>(\"last_updated\");\n  const [sortDirection, setSortDirection] =\n    useState<DocumentSortDirection>(\"desc\");\n  const [folderPosition, setFolderPosition] =\n    useState<FolderPosition>(\"on_top\");\n  const [sortDropdownOpen, setSortDropdownOpen] = useState(false);\n\n  // View selected only filter state\n  const [viewSelectedOnly, setViewSelectedOnly] = useState(false);\n\n  // Store path before entering view selected mode so we can restore it\n  const [savedPath, setSavedPath] = useState<HierarchyNodeSummary[]>([]);\n\n  // Store selected document details (for showing all selected documents in view selected mode)\n  // Note: useState (not useMemo) because this is modified independently when users select/deselect documents\n  const [selectedDocumentDetails, setSelectedDocumentDetails] = useState<\n    Map<string, DocumentSummary>\n  >(() => new Map(initialAttachedDocuments?.map((doc) => [doc.id, doc]) ?? []));\n\n  // Ref for scroll container\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n\n  // Get current parent node ID (null for root)\n  const lastPathNode = path[path.length - 1];\n  const currentParentId = lastPathNode ? lastPathNode.id : null;\n\n  // Load hierarchy nodes when source changes\n  useEffect(() => {\n    const loadNodes = async () => {\n      setIsLoadingNodes(true);\n      setNodesError(null);\n      setAllNodes([]);\n      setPath([]);\n      setDocuments([]);\n      setNextCursor(null);\n      setHasMoreDocuments(true);\n\n      try {\n        const response = await fetchHierarchyNodes(source);\n        setAllNodes(response.nodes);\n      } catch (error) {\n        setNodesError(\n          error instanceof Error ? error.message : \"Failed to load folders\"\n        );\n      } finally {\n        setIsLoadingNodes(false);\n      }\n    };\n\n    loadNodes();\n  }, [source]);\n\n  // Load documents when current path or sort options change\n  useEffect(() => {\n    const loadDocuments = async () => {\n      // Skip if no nodes loaded yet (still loading hierarchy)\n      if (allNodes.length === 0 && !nodesError) return;\n\n      setIsLoadingDocuments(true);\n      setDocuments([]);\n      setNextCursor(null);\n      setHasMoreDocuments(true);\n\n      try {\n        // We need a parent hierarchy node to fetch documents\n        // For root level, we need to find the root node(s)\n        const parentNodeId = currentParentId;\n        if (parentNodeId === null) {\n          // At root level - find root nodes (nodes with no parent)\n          const rootNodes = allNodes.filter((n) => n.parent_id === null);\n          if (rootNodes.length === 0) {\n            setHasMoreDocuments(false);\n            return;\n          }\n          // For now, just don't load documents at root level\n          // Documents are always children of a hierarchy node\n          setHasMoreDocuments(false);\n          return;\n        }\n\n        const response = await fetchHierarchyNodeDocuments({\n          parent_hierarchy_node_id: parentNodeId,\n          cursor: null,\n          sort_field: sortField,\n          sort_direction: sortDirection,\n          folder_position: folderPosition,\n        });\n\n        setDocuments(response.documents);\n        setNextCursor(response.next_cursor);\n        setHasMoreDocuments(response.next_cursor !== null);\n      } catch (error) {\n        console.error(\"Failed to load documents:\", error);\n      } finally {\n        setIsLoadingDocuments(false);\n      }\n    };\n\n    loadDocuments();\n  }, [\n    currentParentId,\n    allNodes,\n    nodesError,\n    sortField,\n    sortDirection,\n    folderPosition,\n  ]);\n\n  // Load more documents (for infinite scroll)\n  const loadMoreDocuments = useCallback(async () => {\n    if (!hasMoreDocuments || isLoadingDocuments || !nextCursor) return;\n    if (currentParentId === null) return;\n\n    setIsLoadingDocuments(true);\n\n    try {\n      const response = await fetchHierarchyNodeDocuments({\n        parent_hierarchy_node_id: currentParentId,\n        cursor: nextCursor,\n        sort_field: sortField,\n        sort_direction: sortDirection,\n        folder_position: folderPosition,\n      });\n\n      setDocuments((prev) => [...prev, ...response.documents]);\n      setNextCursor(response.next_cursor);\n      setHasMoreDocuments(response.next_cursor !== null);\n    } catch (error) {\n      console.error(\"Failed to load more documents:\", error);\n    } finally {\n      setIsLoadingDocuments(false);\n    }\n  }, [\n    currentParentId,\n    nextCursor,\n    hasMoreDocuments,\n    isLoadingDocuments,\n    sortField,\n    sortDirection,\n    folderPosition,\n  ]);\n\n  // Infinite scroll handler\n  const handleScroll = useCallback(() => {\n    const container = scrollContainerRef.current;\n    if (!container) return;\n\n    const { scrollTop, scrollHeight, clientHeight } = container;\n    const scrollThreshold = 100; // Load more when within 100px of bottom\n\n    if (scrollHeight - scrollTop - clientHeight < scrollThreshold) {\n      loadMoreDocuments();\n    }\n  }, [loadMoreDocuments]);\n\n  // Populate selectedDocumentDetails for any documents that are already selected\n  // but don't have their details stored (e.g., when editing an existing agent)\n  useEffect(() => {\n    if (documents.length === 0) return;\n\n    const missingDetails = documents.filter(\n      (doc) =>\n        selectedDocumentIds.includes(doc.id) &&\n        !selectedDocumentDetails.has(doc.id)\n    );\n\n    if (missingDetails.length > 0) {\n      setSelectedDocumentDetails((prev) => {\n        const updated = new Map(prev);\n        missingDetails.forEach((doc) => updated.set(doc.id, doc));\n        return updated;\n      });\n    }\n  }, [documents, selectedDocumentIds, selectedDocumentDetails]);\n\n  // Get child folders of the current path\n  const childFolders = useMemo(() => {\n    return allNodes.filter((node) => node.parent_id === currentParentId);\n  }, [allNodes, currentParentId]);\n\n  // Combine folders and documents into items list\n  const items: HierarchyItem[] = useMemo(() => {\n    const folderItems: HierarchyItem[] = childFolders.map((node) => ({\n      type: \"folder\",\n      data: node,\n    }));\n    const documentItems: HierarchyItem[] = documents.map((doc) => ({\n      type: \"document\",\n      data: doc,\n    }));\n\n    // Sort folders based on the sort field and direction\n    const sortedFolders = [...folderItems].sort((a, b) => {\n      const aTitle = a.data.title.toLowerCase();\n      const bTitle = b.data.title.toLowerCase();\n      if (sortField === \"name\") {\n        return sortDirection === \"asc\"\n          ? aTitle.localeCompare(bTitle)\n          : bTitle.localeCompare(aTitle);\n      }\n      // For last_updated, folders don't have timestamps, so sort by name\n      return aTitle.localeCompare(bTitle);\n    });\n\n    // Handle folder position\n    if (folderPosition === \"on_top\") {\n      return [...sortedFolders, ...documentItems];\n    }\n\n    // Mixed: interleave folders with documents based on sort order\n    // Since folders don't have last_modified, we treat them as coming first in the sort\n    // when sorting by last_updated, or we sort them alphabetically with docs by name\n    if (sortField === \"name\") {\n      const combined = [...sortedFolders, ...documentItems];\n      return combined.sort((a, b) => {\n        const aTitle = a.data.title.toLowerCase();\n        const bTitle = b.data.title.toLowerCase();\n        return sortDirection === \"asc\"\n          ? aTitle.localeCompare(bTitle)\n          : bTitle.localeCompare(aTitle);\n      });\n    }\n\n    // For last_updated with mixed, put folders at the end since they don't have timestamps\n    return [...documentItems, ...sortedFolders];\n  }, [childFolders, documents, sortField, sortDirection, folderPosition]);\n\n  // Filter items by search and view selected mode\n  const filteredItems = useMemo(() => {\n    let result: HierarchyItem[];\n\n    if (viewSelectedOnly) {\n      // In view selected mode, show selected items from THIS source only\n      // allNodes is already source-specific, so filtering against it gives us source-specific folders\n      const selectedFolders: HierarchyItem[] = allNodes\n        .filter((node) => selectedFolderIds.includes(node.id))\n        .map((node) => ({ type: \"folder\" as const, data: node }));\n\n      // Create a set of node IDs from this source to filter documents\n      const nodeIdsInSource = new Set(allNodes.map((node) => node.id));\n\n      // Only include documents whose parent belongs to this source\n      const selectedDocs: HierarchyItem[] = selectedDocumentIds\n        .map((docId) => selectedDocumentDetails.get(docId))\n        .filter((doc): doc is DocumentSummary => doc !== undefined)\n        .filter(\n          (doc) => doc.parent_id !== null && nodeIdsInSource.has(doc.parent_id)\n        )\n        .map((doc) => ({ type: \"document\" as const, data: doc }));\n\n      result = [...selectedFolders, ...selectedDocs];\n    } else {\n      // Normal mode: show items from current folder\n      result = items;\n    }\n\n    // Filter by search\n    if (searchValue) {\n      const lower = searchValue.toLowerCase();\n      result = result.filter((item) =>\n        item.data.title.toLowerCase().includes(lower)\n      );\n    }\n\n    return result;\n  }, [\n    items,\n    searchValue,\n    viewSelectedOnly,\n    selectedFolderIds,\n    selectedDocumentIds,\n    allNodes,\n    selectedDocumentDetails,\n  ]);\n\n  // Count selected items for this source only\n  const currentSourceSelectedCount = useMemo(() => {\n    // Folders: count how many selectedFolderIds are in allNodes (source-specific)\n    const folderCount = allNodes.filter((node) =>\n      selectedFolderIds.includes(node.id)\n    ).length;\n\n    // Documents: count how many selected documents have parent in this source\n    const nodeIdsInSource = new Set(allNodes.map((node) => node.id));\n    const docCount = selectedDocumentIds.filter((docId) => {\n      const doc = selectedDocumentDetails.get(docId);\n      return (\n        doc && doc.parent_id !== null && nodeIdsInSource.has(doc.parent_id)\n      );\n    }).length;\n\n    return folderCount + docCount;\n  }, [\n    allNodes,\n    selectedFolderIds,\n    selectedDocumentIds,\n    selectedDocumentDetails,\n  ]);\n\n  // Report selection count changes to parent\n  useEffect(() => {\n    onSelectionCountChange?.(source, currentSourceSelectedCount);\n  }, [source, currentSourceSelectedCount, onSelectionCountChange]);\n\n  // Header checkbox state: count how many visible items are selected\n  const visibleSelectedCount = useMemo(() => {\n    return filteredItems.filter((item) => {\n      const isFolder = item.type === \"folder\";\n      if (isFolder) {\n        return selectedFolderIds.includes(item.data.id as number);\n      }\n      return selectedDocumentIds.includes(item.data.id as string);\n    }).length;\n  }, [filteredItems, selectedFolderIds, selectedDocumentIds]);\n\n  const allVisibleSelected =\n    filteredItems.length > 0 && visibleSelectedCount === filteredItems.length;\n  const someVisibleSelected =\n    visibleSelectedCount > 0 && visibleSelectedCount < filteredItems.length;\n\n  // Handler for header checkbox click\n  const handleHeaderCheckboxClick = () => {\n    // Get visible folders and documents\n    const visibleFolders = filteredItems.filter(\n      (item) => item.type === \"folder\"\n    );\n    const visibleDocs = filteredItems.filter(\n      (item) => item.type === \"document\"\n    );\n    const visibleFolderIds = visibleFolders.map(\n      (item) => item.data.id as number\n    );\n    const visibleDocumentIds = visibleDocs.map(\n      (item) => item.data.id as string\n    );\n\n    if (allVisibleSelected) {\n      // Deselect all visible items by removing them from the selected arrays\n      const newFolderIds = selectedFolderIds.filter(\n        (id) => !visibleFolderIds.includes(id)\n      );\n      const newDocumentIds = selectedDocumentIds.filter(\n        (id) => !visibleDocumentIds.includes(id)\n      );\n      onSetFolderIds(newFolderIds);\n      onSetDocumentIds(newDocumentIds);\n\n      // Remove deselected documents from details map\n      setSelectedDocumentDetails((prev) => {\n        const updated = new Map(prev);\n        visibleDocumentIds.forEach((id) => updated.delete(id));\n        return updated;\n      });\n\n      // If we deselected everything, exit view selected mode\n      if (newFolderIds.length === 0 && newDocumentIds.length === 0) {\n        setViewSelectedOnly(false);\n      }\n    } else {\n      // Select all visible items by adding them to the selected arrays\n      const newFolderIds = [\n        ...selectedFolderIds,\n        ...visibleFolderIds.filter((id) => !selectedFolderIds.includes(id)),\n      ];\n      const newDocumentIds = [\n        ...selectedDocumentIds,\n        ...visibleDocumentIds.filter((id) => !selectedDocumentIds.includes(id)),\n      ];\n      onSetFolderIds(newFolderIds);\n      onSetDocumentIds(newDocumentIds);\n\n      // Store details for newly selected documents\n      setSelectedDocumentDetails((prev) => {\n        const updated = new Map(prev);\n        visibleDocs.forEach((item) => {\n          const docId = item.data.id as string;\n          if (!prev.has(docId)) {\n            updated.set(docId, item.data as DocumentSummary);\n          }\n        });\n        return updated;\n      });\n    }\n  };\n\n  // Navigation handlers\n  const handleNavigateToRoot = () => setPath([]);\n\n  const handleNavigateToNode = (node: HierarchyNodeSummary, index: number) => {\n    setPath((prev) => prev.slice(0, index + 1));\n  };\n\n  const handleClickIntoFolder = (folder: HierarchyNodeSummary) => {\n    if (viewSelectedOnly) {\n      // Exit view selected mode and navigate to the folder\n      // We need to build the path to this folder from root\n      const buildPathToFolder = (\n        targetId: number\n      ): HierarchyNodeSummary[] | null => {\n        const node = allNodes.find((n) => n.id === targetId);\n        if (!node) return null;\n        if (node.parent_id === null) return [node];\n        const parentPath = buildPathToFolder(node.parent_id);\n        if (!parentPath) return null;\n        return [...parentPath, node];\n      };\n      const pathToFolder = buildPathToFolder(folder.id);\n      if (pathToFolder) {\n        setPath(pathToFolder);\n      } else {\n        // Fallback: just set the folder as the path\n        setPath([folder]);\n      }\n      setViewSelectedOnly(false);\n    } else {\n      setPath((prev) => [...prev, folder]);\n    }\n  };\n\n  // Handler for deselecting all items\n  const handleDeselectAll = () => {\n    onDeselectAllDocuments();\n    onDeselectAllFolders();\n    setSelectedDocumentDetails(new Map());\n    setViewSelectedOnly(false);\n  };\n\n  // Handler for toggling view selected mode\n  const handleToggleViewSelected = () => {\n    setViewSelectedOnly((prev) => {\n      if (!prev) {\n        // Entering view selected mode - save current path\n        setSavedPath(path);\n      } else {\n        // Exiting view selected mode - restore saved path\n        setPath(savedPath);\n      }\n      return !prev;\n    });\n  };\n\n  // Handler for clicking a row (folder or document)\n  const handleItemClick = (item: HierarchyItem) => {\n    if (item.type === \"folder\") {\n      onToggleFolder(item.data.id);\n      return;\n    }\n    const docId = item.data.id;\n    const isCurrentlySelected = selectedDocumentIds.includes(docId);\n    if (isCurrentlySelected) {\n      setSelectedDocumentDetails((prev) => {\n        const updated = new Map(prev);\n        updated.delete(docId);\n        return updated;\n      });\n    } else {\n      setSelectedDocumentDetails((prev) => {\n        const updated = new Map(prev);\n        updated.set(docId, item.data);\n        return updated;\n      });\n    }\n    onToggleDocument(docId);\n  };\n\n  // Get the icon for a hierarchy item row\n  const getItemIcon = (item: HierarchyItem, isSelected: boolean) => {\n    if (item.type === \"folder\") {\n      return <SvgFolder size={16} />;\n    }\n    if (isSelected) {\n      return <Checkbox checked={true} />;\n    }\n    return <SvgFileText size={16} />;\n  };\n\n  // Render loading state\n  if (isLoadingNodes) {\n    return (\n      <GeneralLayouts.Section height=\"auto\" padding={1}>\n        <Text text03 secondaryBody>\n          Loading folders...\n        </Text>\n      </GeneralLayouts.Section>\n    );\n  }\n\n  // Render error state\n  if (nodesError) {\n    return (\n      <GeneralLayouts.Section height=\"auto\" padding={1}>\n        <Text text03 secondaryBody>\n          {nodesError}\n        </Text>\n      </GeneralLayouts.Section>\n    );\n  }\n\n  return (\n    <GeneralLayouts.Section gap={0} alignItems=\"stretch\" justifyContent=\"start\">\n      {/* Header with search */}\n      <GeneralLayouts.Section\n        flexDirection=\"row\"\n        justifyContent=\"start\"\n        alignItems=\"center\"\n        gap={0.5}\n        height=\"auto\"\n      >\n        <GeneralLayouts.Section height=\"auto\" width=\"fit\">\n          <InputTypeIn\n            leftSearchIcon\n            value={searchValue}\n            onChange={(e) => setSearchValue(e.target.value)}\n            placeholder=\"Search...\"\n            variant=\"internal\"\n          />\n        </GeneralLayouts.Section>\n      </GeneralLayouts.Section>\n\n      {/* Breadcrumb OR \"Selected items\" pill - mutually exclusive */}\n      {viewSelectedOnly ? (\n        <>\n          <Spacer rem={0.5} />\n          <Button\n            variant=\"action\"\n            prominence=\"tertiary\"\n            onClick={handleToggleViewSelected}\n          >\n            Selected items\n          </Button>\n        </>\n      ) : (\n        (path.length > 0 || allNodes.length > 0) && (\n          <>\n            <Spacer rem={0.5} />\n            <HierarchyBreadcrumb\n              source={source}\n              path={path}\n              onNavigateToRoot={handleNavigateToRoot}\n              onNavigateToNode={handleNavigateToNode}\n            />\n          </>\n        )\n      )}\n\n      <Spacer rem={0.5} />\n\n      {/* Table header */}\n      <TableLayouts.TableRow>\n        <TableLayouts.CheckboxCell>\n          {filteredItems.length > 0 && (\n            <Checkbox\n              checked={allVisibleSelected}\n              indeterminate={someVisibleSelected}\n              onCheckedChange={handleHeaderCheckboxClick}\n            />\n          )}\n        </TableLayouts.CheckboxCell>\n        <TableLayouts.TableCell flex>\n          <Text secondaryBody text03>\n            Name\n          </Text>\n        </TableLayouts.TableCell>\n        <TableLayouts.TableCell width={8}>\n          <Popover open={sortDropdownOpen} onOpenChange={setSortDropdownOpen}>\n            <Popover.Trigger asChild>\n              <div>\n                <SelectButton\n                  rightIcon={SvgArrowUpDown}\n                  transient={sortDropdownOpen}\n                  onClick={() => setSortDropdownOpen(true)}\n                >\n                  {sortField === \"name\" ? \"Name\" : \"Last Updated\"}\n                </SelectButton>\n              </div>\n            </Popover.Trigger>\n            <Popover.Content align=\"end\" sideOffset={4} width=\"lg\">\n              <Popover.Menu>\n                {/* Sort by section */}\n                <Divider showTitle text=\"Sort by\" dividerLine={false} />\n                <LineItem\n                  selected={sortField === \"name\"}\n                  onClick={() => setSortField(\"name\")}\n                  rightChildren={\n                    sortField === \"name\" ? <SvgCheck size={16} /> : undefined\n                  }\n                >\n                  Name\n                </LineItem>\n                <LineItem\n                  selected={sortField === \"last_updated\"}\n                  onClick={() => setSortField(\"last_updated\")}\n                  rightChildren={\n                    sortField === \"last_updated\" ? (\n                      <SvgCheck size={16} />\n                    ) : undefined\n                  }\n                >\n                  Last Updated\n                </LineItem>\n                {/* Sorting Order section */}\n                <Divider showTitle text=\"Sorting Order\" dividerLine={false} />\n                <LineItem\n                  selected={sortDirection === \"desc\"}\n                  onClick={() => setSortDirection(\"desc\")}\n                  rightChildren={\n                    sortDirection === \"desc\" ? (\n                      <SvgCheck size={16} />\n                    ) : undefined\n                  }\n                >\n                  {sortField === \"name\" ? \"Z to A\" : \"Recent to Old\"}\n                </LineItem>\n                <LineItem\n                  selected={sortDirection === \"asc\"}\n                  onClick={() => setSortDirection(\"asc\")}\n                  rightChildren={\n                    sortDirection === \"asc\" ? <SvgCheck size={16} /> : undefined\n                  }\n                >\n                  {sortField === \"name\" ? \"A to Z\" : \"Old to Recent\"}\n                </LineItem>\n                {/* Folders section */}\n                <Divider showTitle text=\"Folders\" dividerLine={false} />\n                <LineItem\n                  selected={folderPosition === \"on_top\"}\n                  onClick={() => setFolderPosition(\"on_top\")}\n                  rightChildren={\n                    folderPosition === \"on_top\" ? (\n                      <SvgCheck size={16} />\n                    ) : undefined\n                  }\n                >\n                  On top\n                </LineItem>\n                <LineItem\n                  selected={folderPosition === \"mixed\"}\n                  onClick={() => setFolderPosition(\"mixed\")}\n                  rightChildren={\n                    folderPosition === \"mixed\" ? (\n                      <SvgCheck size={16} />\n                    ) : undefined\n                  }\n                >\n                  Mixed with Files\n                </LineItem>\n              </Popover.Menu>\n            </Popover.Content>\n          </Popover>\n        </TableLayouts.TableCell>\n      </TableLayouts.TableRow>\n\n      <Separator noPadding />\n\n      {/* Scrollable table body */}\n      <div\n        ref={scrollContainerRef}\n        onScroll={handleScroll}\n        className=\"overflow-y-auto max-h-[20rem]\"\n      >\n        {filteredItems.length === 0 && !isLoadingDocuments ? (\n          <GeneralLayouts.Section height=\"auto\" padding={1}>\n            <Text text03 secondaryBody>\n              {path.length === 0\n                ? \"Select a folder to browse documents.\"\n                : \"No items in this folder.\"}\n            </Text>\n          </GeneralLayouts.Section>\n        ) : (\n          <GeneralLayouts.Section gap={0} alignItems=\"stretch\" height=\"auto\">\n            {filteredItems.map((item) => {\n              const isFolder = item.type === \"folder\";\n              const id = isFolder ? `folder-${item.data.id}` : item.data.id;\n              const isSelected = isFolder\n                ? selectedFolderIds.includes(item.data.id as number)\n                : selectedDocumentIds.includes(item.data.id as string);\n\n              return (\n                <TableLayouts.TableRow\n                  key={id}\n                  selected={isSelected}\n                  onClick={() => handleItemClick(item)}\n                >\n                  <TableLayouts.CheckboxCell>\n                    {getItemIcon(item, isSelected)}\n                  </TableLayouts.CheckboxCell>\n                  <TableLayouts.TableCell flex>\n                    <GeneralLayouts.Section\n                      flexDirection=\"row\"\n                      justifyContent=\"start\"\n                      alignItems=\"center\"\n                      gap={0.25}\n                      height=\"auto\"\n                      width=\"fit\"\n                    >\n                      <Truncated>{item.data.title}</Truncated>\n                      {isFolder && (\n                        <Button\n                          icon={SvgChevronRight}\n                          prominence=\"tertiary\"\n                          size=\"sm\"\n                          onClick={(e) => {\n                            e.stopPropagation();\n                            handleClickIntoFolder(\n                              item.data as HierarchyNodeSummary\n                            );\n                          }}\n                        />\n                      )}\n                    </GeneralLayouts.Section>\n                  </TableLayouts.TableCell>\n                  <TableLayouts.TableCell width={8}>\n                    <Text text03 secondaryBody>\n                      {isFolder\n                        ? \"—\"\n                        : timeAgo(\n                            (item.data as DocumentSummary).last_modified\n                          ) || \"—\"}\n                    </Text>\n                  </TableLayouts.TableCell>\n                </TableLayouts.TableRow>\n              );\n            })}\n\n            {/* Loading more indicator */}\n            {isLoadingDocuments && documents.length > 0 && (\n              <GeneralLayouts.Section height=\"auto\" padding={0.5}>\n                <Text text03 secondaryBody>\n                  Loading more...\n                </Text>\n              </GeneralLayouts.Section>\n            )}\n          </GeneralLayouts.Section>\n        )}\n      </div>\n\n      {/* Table footer - only show when items are selected for this source */}\n      {currentSourceSelectedCount > 0 && (\n        <>\n          <Spacer rem={0.5} />\n          <GeneralLayouts.Section\n            flexDirection=\"row\"\n            justifyContent=\"start\"\n            alignItems=\"center\"\n            gap={0.5}\n            height=\"auto\"\n          >\n            <Text text03 secondaryBody>\n              {currentSourceSelectedCount}{\" \"}\n              {currentSourceSelectedCount === 1 ? \"item\" : \"items\"} selected\n            </Text>\n            <Button\n              icon={SvgEye}\n              variant={viewSelectedOnly ? \"action\" : undefined}\n              prominence=\"tertiary\"\n              size={viewSelectedOnly ? undefined : \"sm\"}\n              onClick={handleToggleViewSelected}\n            />\n            <Button\n              icon={SvgXCircle}\n              prominence=\"tertiary\"\n              size=\"sm\"\n              onClick={handleDeselectAll}\n            />\n          </GeneralLayouts.Section>\n        </>\n      )}\n    </GeneralLayouts.Section>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/AgentViewerModal.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useMemo, useState } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport { FullPersona } from \"@/app/admin/agents/interfaces\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Content, ContentAction } from \"@opal/layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport Separator from \"@/refresh-components/Separator\";\nimport SimpleCollapsible from \"@/refresh-components/SimpleCollapsible\";\nimport {\n  SvgActions,\n  SvgBubbleText,\n  SvgExpand,\n  SvgFold,\n  SvgOrganization,\n  SvgStar,\n  SvgUser,\n} from \"@opal/icons\";\nimport * as ExpandableCard from \"@/layouts/expandable-card-layouts\";\nimport * as ActionsLayouts from \"@/layouts/actions-layouts\";\nimport useMcpServersForAgentEditor from \"@/hooks/useMcpServersForAgentEditor\";\nimport { getActionIcon } from \"@/lib/tools/mcpUtils\";\nimport { MCPServer, ToolSnapshot } from \"@/lib/tools/interfaces\";\nimport EmptyMessage from \"@/refresh-components/EmptyMessage\";\nimport { Horizontal } from \"@/layouts/input-layouts\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport { Button } from \"@opal/components\";\nimport { SEARCH_PARAM_NAMES } from \"@/app/app/services/searchParams\";\nimport AppInputBar from \"@/sections/input/AppInputBar\";\nimport { useFilters, useLlmManager } from \"@/lib/hooks\";\nimport { formatMmDdYyyy } from \"@/lib/dateUtils\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { FileCard } from \"@/sections/cards/FileCard\";\nimport DocumentSetCard from \"@/sections/cards/DocumentSetCard\";\nimport { getDisplayName } from \"@/lib/llmConfig/utils\";\nimport { useLLMProviders } from \"@/hooks/useLLMProviders\";\nimport { Interactive } from \"@opal/core\";\n\n/**\n * Read-only MCP Server card for the viewer modal.\n * Displays the server header with its tools listed in the expandable content area.\n */\ninterface ViewerMCPServerCardProps {\n  server: MCPServer;\n  tools: ToolSnapshot[];\n}\n\nfunction ViewerMCPServerCard({ server, tools }: ViewerMCPServerCardProps) {\n  const [folded, setFolded] = useState(false);\n  const serverIcon = getActionIcon(server.server_url, server.name);\n\n  return (\n    <ExpandableCard.Root isFolded={folded} onFoldedChange={setFolded}>\n      <ExpandableCard.Header>\n        <div className=\"p-2\">\n          <ContentAction\n            icon={serverIcon}\n            title={server.name}\n            description={server.description}\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            rightChildren={\n              <Button\n                prominence=\"internal\"\n                rightIcon={folded ? SvgExpand : SvgFold}\n                onClick={() => setFolded((prev) => !prev)}\n              >\n                {folded ? \"Expand\" : \"Fold\"}\n              </Button>\n            }\n          />\n        </div>\n      </ExpandableCard.Header>\n      {tools.length > 0 && (\n        <ActionsLayouts.Content>\n          {tools.map((tool) => (\n            <Section key={tool.id} padding={0.25}>\n              <Content\n                title={tool.display_name}\n                description={tool.description}\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n              />\n            </Section>\n          ))}\n        </ActionsLayouts.Content>\n      )}\n    </ExpandableCard.Root>\n  );\n}\n\n/**\n * Read-only OpenAPI tool card for the viewer modal.\n * Displays just the tool header (no expandable content).\n */\nfunction ViewerOpenApiToolCard({ tool }: { tool: ToolSnapshot }) {\n  return (\n    <ExpandableCard.Root>\n      <ExpandableCard.Header>\n        <div className=\"p-2\">\n          <Content\n            icon={SvgActions}\n            title={tool.display_name}\n            description={tool.description}\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n          />\n        </div>\n      </ExpandableCard.Header>\n    </ExpandableCard.Root>\n  );\n}\n\n/**\n * Floating ChatInputBar below the AgentViewerModal.\n * On submit, navigates to the agent's chat with the message pre-filled.\n */\ninterface AgentChatInputProps {\n  agent: FullPersona;\n  onSubmit: (message: string) => void;\n}\nfunction AgentChatInput({ agent, onSubmit }: AgentChatInputProps) {\n  const llmManager = useLlmManager(undefined, agent);\n  const filterManager = useFilters();\n\n  return (\n    <AppInputBar\n      onSubmit={onSubmit}\n      llmManager={llmManager}\n      chatState=\"input\"\n      filterManager={filterManager}\n      selectedAgent={agent}\n      stopGenerating={() => {}}\n      handleFileUpload={() => {}}\n      currentSessionFileTokenCount={0}\n      availableContextTokens={Infinity}\n      deepResearchEnabled={false}\n      toggleDeepResearch={() => {}}\n      disabled={false}\n    />\n  );\n}\n\n/**\n * AgentViewerModal - A read-only view of an agent's configuration\n *\n * This modal is the view-only counterpart to `AgentEditorPage.tsx`. While\n * AgentEditorPage allows creating and editing agents with forms and inputs,\n * AgentViewerModal displays the same information in a read-only format.\n *\n * Key differences from AgentEditorPage:\n * - Modal presentation instead of full page\n * - Read-only display (no form inputs, switches, or editable fields)\n * - Static text/badges instead of form controls\n * - Designed to be opened from AgentCard when clicking on the card body\n *\n * Sections displayed (mirroring AgentEditorPage):\n * - Agent info: name, description, avatar\n * - Instructions (system prompt)\n * - Conversation starters\n * - Knowledge configuration\n * - Actions/tools\n * - Advanced options (model, sharing status)\n */\nexport interface AgentViewerModalProps {\n  agent: FullPersona;\n}\nexport default function AgentViewerModal({ agent }: AgentViewerModalProps) {\n  const agentViewerModal = useModal();\n  const router = useRouter();\n  const { allRecentFiles } = useProjectsContext();\n  const { llmProviders } = useLLMProviders(agent.id);\n\n  const handleStartChat = useCallback(\n    (message: string) => {\n      const params = new URLSearchParams({\n        [SEARCH_PARAM_NAMES.PERSONA_ID]: String(agent.id),\n        [SEARCH_PARAM_NAMES.USER_PROMPT]: message,\n        [SEARCH_PARAM_NAMES.SEND_ON_LOAD]: \"true\",\n      });\n      router.push(`/app?${params.toString()}` as Route);\n      agentViewerModal.toggle(false);\n    },\n    [agent.id, router, agentViewerModal]\n  );\n\n  const hasKnowledge =\n    (agent.document_sets && agent.document_sets.length > 0) ||\n    (agent.hierarchy_nodes && agent.hierarchy_nodes.length > 0) ||\n    (agent.user_file_ids && agent.user_file_ids.length > 0);\n\n  // Categorize tools into MCP, OpenAPI, and built-in\n  const mcpToolsByServerId = useMemo(() => {\n    const map = new Map<number, ToolSnapshot[]>();\n    agent.tools.forEach((tool) => {\n      if (tool.mcp_server_id != null) {\n        const existing = map.get(tool.mcp_server_id) || [];\n        existing.push(tool);\n        map.set(tool.mcp_server_id, existing);\n      }\n    });\n    return map;\n  }, [agent.tools]);\n\n  const openApiTools = useMemo(\n    () =>\n      agent.tools.filter((t) => !t.in_code_tool_id && t.mcp_server_id == null),\n    [agent.tools]\n  );\n\n  // Fetch MCP server metadata for display\n  const { mcpData } = useMcpServersForAgentEditor();\n  const mcpServers = mcpData?.mcp_servers ?? [];\n\n  const mcpServersWithTools = useMemo(\n    () =>\n      mcpServers\n        .filter((server) => mcpToolsByServerId.has(server.id))\n        .map((server) => ({\n          server,\n          tools: mcpToolsByServerId.get(server.id)!,\n        })),\n    [mcpServers, mcpToolsByServerId]\n  );\n\n  const hasActions = mcpServersWithTools.length > 0 || openApiTools.length > 0;\n  const defaultModel = getDisplayName(agent, llmProviders ?? []);\n\n  return (\n    <Modal\n      open={agentViewerModal.isOpen}\n      onOpenChange={agentViewerModal.toggle}\n    >\n      <Modal.Content\n        width=\"lg\"\n        height=\"lg\"\n        bottomSlot={<AgentChatInput agent={agent} onSubmit={handleStartChat} />}\n      >\n        <Modal.Header\n          icon={(props) => <AgentAvatar agent={agent} {...props} size={24} />}\n          title={agent.name}\n          onClose={() => agentViewerModal.toggle(false)}\n        />\n\n        <Modal.Body>\n          {/* Metadata */}\n          <Section flexDirection=\"row\" justifyContent=\"start\">\n            {agent.is_featured && (\n              <Content\n                icon={SvgStar}\n                title=\"Featured\"\n                sizePreset=\"main-ui\"\n                variant=\"body\"\n                widthVariant=\"fit\"\n              />\n            )}\n            <Content\n              icon={SvgUser}\n              title={agent.owner?.email ?? \"Onyx\"}\n              sizePreset=\"main-ui\"\n              variant=\"body\"\n              prominence=\"muted\"\n              widthVariant=\"fit\"\n            />\n            {agent.is_public && (\n              <Content\n                icon={SvgOrganization}\n                title=\"Public to your organization\"\n                sizePreset=\"main-ui\"\n                variant=\"body\"\n                prominence=\"muted\"\n                widthVariant=\"fit\"\n              />\n            )}\n          </Section>\n\n          {/* Description */}\n          {agent.description && <Text text03>{agent.description}</Text>}\n\n          {/* Knowledge */}\n          <Separator noPadding />\n          <Section gap={0.5} alignItems=\"start\">\n            <Content\n              title=\"Knowledge\"\n              sizePreset=\"main-content\"\n              variant=\"section\"\n            />\n            {hasKnowledge ? (\n              <Section\n                gap={0.5}\n                flexDirection=\"row\"\n                justifyContent=\"start\"\n                wrap\n                alignItems=\"start\"\n              >\n                {agent.document_sets?.map((docSet) => (\n                  <DocumentSetCard key={docSet.id} documentSet={docSet} />\n                ))}\n                {agent.user_file_ids?.map((fileId) => {\n                  const file = allRecentFiles.find((f) => f.id === fileId);\n                  if (!file) return null;\n                  return <FileCard key={fileId} file={file} />;\n                })}\n              </Section>\n            ) : (\n              <EmptyMessage title=\"No Knowledge\" />\n            )}\n          </Section>\n\n          {/* Actions & Tools */}\n          <SimpleCollapsible>\n            <SimpleCollapsible.Header title=\"Actions & Tools\" />\n            <SimpleCollapsible.Content>\n              {hasActions ? (\n                <Section gap={0.5} alignItems=\"start\">\n                  {mcpServersWithTools.map(({ server, tools }) => (\n                    <ViewerMCPServerCard\n                      key={server.id}\n                      server={server}\n                      tools={tools}\n                    />\n                  ))}\n                  {openApiTools.map((tool) => (\n                    <ViewerOpenApiToolCard key={tool.id} tool={tool} />\n                  ))}\n                </Section>\n              ) : (\n                <EmptyMessage title=\"No Actions\" />\n              )}\n            </SimpleCollapsible.Content>\n          </SimpleCollapsible>\n\n          {/* More Info (Collapsible) */}\n          <Separator noPadding />\n          <SimpleCollapsible>\n            <SimpleCollapsible.Header title=\"More Info\" />\n            <SimpleCollapsible.Content>\n              <Section gap={0.5} alignItems=\"start\">\n                {agent.system_prompt && (\n                  <Content\n                    title=\"Instructions\"\n                    description={agent.system_prompt}\n                    sizePreset=\"main-ui\"\n                    variant=\"section\"\n                  />\n                )}\n                {defaultModel && (\n                  <Horizontal\n                    title=\"Default Model\"\n                    description=\"This model will be used by Onyx by default in your chats.\"\n                    nonInteractive\n                    sizePreset=\"main-ui\"\n                  >\n                    <Text>{defaultModel}</Text>\n                  </Horizontal>\n                )}\n                {agent.search_start_date && (\n                  <Horizontal\n                    title=\"Knowledge Cutoff Date\"\n                    description=\"Documents with a last-updated date prior to this will be ignored.\"\n                    nonInteractive\n                    sizePreset=\"main-ui\"\n                  >\n                    <Text mainUiMono>\n                      {formatMmDdYyyy(agent.search_start_date)}\n                    </Text>\n                  </Horizontal>\n                )}\n                <Horizontal\n                  title=\"Overwrite System Prompts\"\n                  description='Remove the base system prompt which includes useful instructions (e.g. \"You can use Markdown tables\"). This may affect response quality.'\n                  nonInteractive\n                  sizePreset=\"main-ui\"\n                >\n                  <Switch disabled checked={agent.replace_base_system_prompt} />\n                </Horizontal>\n              </Section>\n            </SimpleCollapsible.Content>\n          </SimpleCollapsible>\n\n          {/* Prompt Reminders */}\n          {agent.task_prompt && (\n            <>\n              <Separator noPadding />\n              <Content\n                title=\"Prompt Reminders\"\n                description={agent.task_prompt}\n                sizePreset=\"main-content\"\n                variant=\"section\"\n              />\n            </>\n          )}\n\n          {/* Conversation Starters */}\n          {agent.starter_messages && agent.starter_messages.length > 0 && (\n            <>\n              <Separator noPadding />\n              <Content\n                title=\"Conversation Starters\"\n                sizePreset=\"main-content\"\n                variant=\"section\"\n              />\n              <div className=\"grid grid-cols-2 gap-1 w-full\">\n                {agent.starter_messages.map((starter, index) => (\n                  <Interactive.Stateless\n                    key={index}\n                    onClick={() => handleStartChat(starter.message)}\n                    prominence=\"tertiary\"\n                  >\n                    <Interactive.Container>\n                      <Content\n                        icon={SvgBubbleText}\n                        title={starter.message}\n                        sizePreset=\"main-ui\"\n                        variant=\"body\"\n                        prominence=\"muted\"\n                        widthVariant=\"full\"\n                      />\n                    </Interactive.Container>\n                  </Interactive.Stateless>\n                ))}\n              </div>\n            </>\n          )}\n        </Modal.Body>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/FeedbackModal.tsx",
    "content": "\"use client\";\n\nimport { FeedbackType } from \"@/app/app/interfaces\";\nimport { Button } from \"@opal/components\";\nimport useFeedbackController from \"@/hooks/useFeedbackController\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { SvgThumbsDown, SvgThumbsUp } from \"@opal/icons\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Formik } from \"formik\";\nimport * as Yup from \"yup\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport InputTextAreaField from \"@/refresh-components/form/InputTextAreaField\";\n\nexport interface FeedbackModalProps {\n  feedbackType: FeedbackType;\n  messageId: number;\n}\n\ninterface FeedbackFormValues {\n  additional_feedback: string;\n}\n\nexport default function FeedbackModal({\n  feedbackType,\n  messageId,\n}: FeedbackModalProps) {\n  const modal = useModal();\n  const { handleFeedbackChange } = useFeedbackController();\n\n  const initialValues: FeedbackFormValues = {\n    additional_feedback: \"\",\n  };\n\n  const validationSchema = Yup.object({\n    additional_feedback:\n      feedbackType === \"dislike\"\n        ? Yup.string().trim().required(\"Feedback is required\")\n        : Yup.string().trim(),\n  });\n\n  async function handleSubmit(values: FeedbackFormValues) {\n    const feedbackText = values.additional_feedback;\n\n    const success = await handleFeedbackChange(\n      messageId,\n      feedbackType,\n      feedbackText,\n      undefined\n    );\n\n    // Only close modal if submission was successful\n    if (success) {\n      modal.toggle(false);\n    }\n  }\n\n  return (\n    <>\n      <Modal open={modal.isOpen} onOpenChange={modal.toggle}>\n        <Modal.Content width=\"sm\">\n          <Modal.Header\n            icon={feedbackType === \"like\" ? SvgThumbsUp : SvgThumbsDown}\n            title=\"Feedback\"\n            onClose={() => modal.toggle(false)}\n          />\n          <Formik\n            initialValues={initialValues}\n            validationSchema={validationSchema}\n            onSubmit={handleSubmit}\n          >\n            {({\n              isSubmitting,\n              handleSubmit: formikHandleSubmit,\n              dirty,\n              isValid,\n            }) => (\n              <>\n                <Modal.Body>\n                  <InputLayouts.Vertical\n                    name=\"additional_feedback\"\n                    title=\"Provide Additional Details\"\n                    suffix={feedbackType === \"like\" ? \"optional\" : undefined}\n                  >\n                    <InputTextAreaField\n                      name=\"additional_feedback\"\n                      placeholder={`What did you ${feedbackType} about this response?`}\n                    />\n                  </InputLayouts.Vertical>\n                </Modal.Body>\n\n                <Modal.Footer>\n                  <Button\n                    prominence=\"secondary\"\n                    onClick={() => modal.toggle(false)}\n                    type=\"button\"\n                  >\n                    Cancel\n                  </Button>\n                  <Button\n                    disabled={\n                      isSubmitting ||\n                      (feedbackType === \"dislike\" && (!dirty || !isValid))\n                    }\n                    onClick={() => formikHandleSubmit()}\n                  >\n                    {isSubmitting ? \"Submitting...\" : \"Submit\"}\n                  </Button>\n                </Modal.Footer>\n              </>\n            )}\n          </Formik>\n        </Modal.Content>\n      </Modal>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/NewTenantModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\nimport { SvgArrowRight, SvgUsers, SvgX } from \"@opal/icons\";\nimport { logout } from \"@/lib/user\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { NewTenantInfo } from \"@/lib/types\";\nimport { useRouter } from \"next/navigation\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { ErrorTextLayout } from \"@/layouts/input-layouts\";\n\n// App domain should not be hardcoded\nconst APP_DOMAIN = process.env.NEXT_PUBLIC_APP_DOMAIN || \"onyx.app\";\n\nexport interface NewTenantModalProps {\n  tenantInfo: NewTenantInfo;\n  isInvite?: boolean;\n  onClose?: () => void;\n}\n\nexport default function NewTenantModal({\n  tenantInfo,\n  isInvite = false,\n  onClose,\n}: NewTenantModalProps) {\n  const router = useRouter();\n  const { user } = useUser();\n  const [isLoading, setIsLoading] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  async function handleJoinTenant() {\n    setIsLoading(true);\n    setError(null);\n\n    try {\n      if (isInvite) {\n        // Accept the invitation through the API\n        const response = await fetch(\"/api/tenants/users/invite/accept\", {\n          method: \"POST\",\n          headers: {\n            \"Content-Type\": \"application/json\",\n          },\n          body: JSON.stringify({ tenant_id: tenantInfo.tenant_id }),\n        });\n\n        if (!response.ok) {\n          const errorData = await response.json().catch(() => ({}));\n          throw new Error(\n            errorData.detail ||\n              errorData.message ||\n              \"Failed to accept invitation\"\n          );\n        }\n\n        toast.success(\"You have accepted the invitation.\");\n      } else {\n        // For non-invite flow, just show success message\n        toast.success(\"Processing your team join request...\");\n      }\n\n      // Common logout and redirect for both flows\n      await logout();\n      router.push(`/auth/join?email=${encodeURIComponent(user?.email || \"\")}`);\n      onClose?.();\n    } catch (error) {\n      const message =\n        error instanceof Error\n          ? error.message\n          : \"Failed to join the team. Please try again.\";\n\n      setError(message);\n      toast.error(message);\n    } finally {\n      setIsLoading(false);\n    }\n  }\n\n  async function handleRejectInvite() {\n    if (!isInvite) return;\n\n    setIsLoading(true);\n    setError(null);\n\n    try {\n      // Deny the invitation through the API\n      const response = await fetch(\"/api/tenants/users/invite/deny\", {\n        method: \"POST\",\n        headers: {\n          \"Content-Type\": \"application/json\",\n        },\n        body: JSON.stringify({ tenant_id: tenantInfo.tenant_id }),\n      });\n\n      if (!response.ok) {\n        const errorData = await response.json().catch(() => ({}));\n        throw new Error(\n          errorData.detail ||\n            errorData.message ||\n            \"Failed to decline invitation\"\n        );\n      }\n\n      toast.info(\"You have declined the invitation.\");\n      onClose?.();\n    } catch (error) {\n      const message =\n        error instanceof Error\n          ? error.message\n          : \"Failed to decline the invitation. Please try again.\";\n\n      setError(message);\n      toast.error(message);\n    } finally {\n      setIsLoading(false);\n    }\n  }\n\n  const title = isInvite\n    ? `You have been invited to join ${\n        tenantInfo.number_of_users\n      } other teammate${\n        tenantInfo.number_of_users === 1 ? \"\" : \"s\"\n      } of ${APP_DOMAIN}.`\n    : `Your request to join ${tenantInfo.number_of_users} other users of ${APP_DOMAIN} has been approved.`;\n\n  const description = isInvite\n    ? `By accepting this invitation, you will join the existing ${APP_DOMAIN} team and lose access to your current team. Note: you will lose access to your current agents, prompts, chats, and connected sources.`\n    : `To finish joining your team, please reauthenticate with ${user?.email}.`;\n\n  return (\n    <Modal open>\n      <Modal.Content width=\"sm\" height=\"sm\" preventAccidentalClose={false}>\n        <Modal.Header icon={SvgUsers} title={title} onClose={onClose} />\n\n        <Modal.Body>\n          <Text>{description}</Text>\n          {error && <ErrorTextLayout>{error}</ErrorTextLayout>}\n        </Modal.Body>\n\n        <Modal.Footer>\n          <BasicModalFooter\n            cancel={\n              isInvite ? (\n                <Button\n                  disabled={isLoading}\n                  prominence=\"secondary\"\n                  onClick={handleRejectInvite}\n                  icon={SvgX}\n                >\n                  Decline\n                </Button>\n              ) : undefined\n            }\n            submit={\n              <Button\n                disabled={isLoading}\n                onClick={handleJoinTenant}\n                rightIcon={SvgArrowRight}\n              >\n                {isLoading\n                  ? isInvite\n                    ? \"Accepting...\"\n                    : \"Joining...\"\n                  : isInvite\n                    ? \"Accept Invitation\"\n                    : \"Reauthenticate\"}\n              </Button>\n            }\n          />\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/ExceptionTraceModal.tsx",
    "content": "import Modal from \"@/refresh-components/Modal\";\nimport { SvgAlertTriangle } from \"@opal/icons\";\nimport { CodePreview } from \"@/sections/modals/PreviewModal/variants/CodePreview\";\nimport { CopyButton } from \"@/sections/modals/PreviewModal/variants/shared\";\nimport FloatingFooter from \"@/sections/modals/PreviewModal/FloatingFooter\";\n\ninterface ExceptionTraceModalProps {\n  onOutsideClick: () => void;\n  exceptionTrace: string;\n  language?: string;\n}\n\nexport default function ExceptionTraceModal({\n  onOutsideClick,\n  exceptionTrace,\n  language = \"python\",\n}: ExceptionTraceModalProps) {\n  return (\n    <Modal open onOpenChange={onOutsideClick}>\n      <Modal.Content width=\"full\" height=\"full\">\n        <Modal.Header\n          icon={SvgAlertTriangle}\n          title=\"Full Exception Trace\"\n          onClose={onOutsideClick}\n          height=\"fit\"\n        />\n\n        <div className=\"flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01\">\n          <CodePreview content={exceptionTrace} language={language} normalize />\n        </div>\n\n        <FloatingFooter\n          right={<CopyButton getText={() => exceptionTrace} />}\n          codeBackground\n        />\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/FloatingFooter.tsx",
    "content": "import { cn } from \"@/lib/utils\";\nimport { ReactNode } from \"react\";\n\ninterface FloatingFooterProps {\n  left?: ReactNode;\n  right?: ReactNode;\n  codeBackground?: boolean;\n}\n\nexport default function FloatingFooter({\n  left,\n  right,\n  codeBackground,\n}: FloatingFooterProps) {\n  return (\n    <div\n      className={cn(\n        \"absolute bottom-0 left-0 right-0\",\n        \"flex items-center justify-between\",\n        \"p-4 pointer-events-none w-full\"\n      )}\n      style={{\n        background: `linear-gradient(to top, var(--background-${\n          codeBackground ? \"code-01\" : \"tint-01\"\n        }) 40%, transparent)`,\n      }}\n    >\n      {/* Left slot */}\n      <div className=\"pointer-events-auto\">{left}</div>\n\n      {/* Right slot */}\n      {right ? (\n        <div className=\"pointer-events-auto rounded-12 bg-background-tint-00 p-1 shadow-lg\">\n          {right}\n        </div>\n      ) : null}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/PreviewModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useCallback, useMemo } from \"react\";\nimport { MinimalOnyxDocument } from \"@/lib/search/interfaces\";\nimport Modal from \"@/refresh-components/Modal\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport FloatingFooter from \"@/sections/modals/PreviewModal/FloatingFooter\";\nimport mime from \"mime\";\nimport {\n  getCodeLanguage,\n  getDataLanguage,\n  getLanguageByMime,\n} from \"@/lib/languages\";\nimport { fetchChatFile } from \"@/lib/chat/svc\";\nimport { PreviewContext } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { resolveVariant } from \"@/sections/modals/PreviewModal/variants\";\n\ninterface PreviewModalProps {\n  presentingDocument: MinimalOnyxDocument;\n  onClose: () => void;\n}\n\nexport default function PreviewModal({\n  presentingDocument,\n  onClose,\n}: PreviewModalProps) {\n  const [fileContent, setFileContent] = useState(\"\");\n  const [fileUrl, setFileUrl] = useState(\"\");\n  const [fileName, setFileName] = useState(\"\");\n  const [isLoading, setIsLoading] = useState(true);\n  const [loadError, setLoadError] = useState<string | null>(null);\n  const [mimeType, setMimeType] = useState(\"application/octet-stream\");\n  const [zoom, setZoom] = useState(100);\n\n  const variant = useMemo(\n    () => resolveVariant(presentingDocument.semantic_identifier, mimeType),\n    [presentingDocument.semantic_identifier, mimeType]\n  );\n\n  const language = useMemo(\n    () =>\n      getCodeLanguage(presentingDocument.semantic_identifier || \"\") ||\n      getLanguageByMime(mimeType) ||\n      getDataLanguage(presentingDocument.semantic_identifier || \"\") ||\n      \"plaintext\",\n    [mimeType, presentingDocument.semantic_identifier]\n  );\n\n  const lineCount = useMemo(() => {\n    if (!fileContent) return 0;\n    return fileContent.split(\"\\n\").length;\n  }, [fileContent]);\n\n  const fileSize = useMemo(() => {\n    if (!fileContent) return \"\";\n    const bytes = new TextEncoder().encode(fileContent).length;\n    if (bytes < 1024) return `${bytes} B`;\n    const kb = bytes / 1024;\n    if (kb < 1024) return `${kb.toFixed(2)} KB`;\n    const mb = kb / 1024;\n    return `${mb.toFixed(2)} MB`;\n  }, [fileContent]);\n\n  const fetchFile = useCallback(async () => {\n    setIsLoading(true);\n    setLoadError(null);\n    setFileContent(\"\");\n    const fileIdLocal =\n      presentingDocument.document_id.split(\"__\")[1] ||\n      presentingDocument.document_id;\n\n    try {\n      const response = await fetchChatFile(fileIdLocal);\n\n      const blob = await response.blob();\n      const url = window.URL.createObjectURL(blob);\n      setFileUrl((prev) => {\n        if (prev) window.URL.revokeObjectURL(prev);\n        return url;\n      });\n\n      const originalFileName =\n        presentingDocument.semantic_identifier || \"document\";\n      setFileName(originalFileName);\n\n      const rawContentType =\n        response.headers.get(\"Content-Type\") || \"application/octet-stream\";\n      const resolvedMime =\n        rawContentType === \"application/octet-stream\"\n          ? mime.getType(originalFileName) ?? rawContentType\n          : rawContentType;\n      setMimeType(resolvedMime);\n\n      const resolved = resolveVariant(\n        presentingDocument.semantic_identifier,\n        resolvedMime\n      );\n      if (resolved.needsTextContent) {\n        setFileContent(await blob.text());\n      }\n    } catch {\n      setLoadError(\"Failed to load document.\");\n    } finally {\n      setIsLoading(false);\n    }\n  }, [presentingDocument]);\n\n  useEffect(() => {\n    fetchFile();\n  }, [fetchFile]);\n\n  useEffect(() => {\n    return () => {\n      if (fileUrl) window.URL.revokeObjectURL(fileUrl);\n    };\n  }, [fileUrl]);\n\n  const handleZoomIn = useCallback(\n    () => setZoom((prev) => Math.min(prev + 25, 200)),\n    []\n  );\n  const handleZoomOut = useCallback(\n    () => setZoom((prev) => Math.max(prev - 25, 25)),\n    []\n  );\n\n  const ctx: PreviewContext = useMemo(\n    () => ({\n      fileContent,\n      fileUrl,\n      fileName,\n      language,\n      lineCount,\n      fileSize,\n      zoom,\n      onZoomIn: handleZoomIn,\n      onZoomOut: handleZoomOut,\n    }),\n    [\n      fileContent,\n      fileUrl,\n      fileName,\n      language,\n      lineCount,\n      fileSize,\n      zoom,\n      handleZoomIn,\n      handleZoomOut,\n    ]\n  );\n\n  return (\n    <Modal\n      open\n      onOpenChange={(open) => {\n        if (!open) onClose();\n      }}\n    >\n      <Modal.Content\n        width={variant.width}\n        height={variant.height}\n        preventAccidentalClose={false}\n        onOpenAutoFocus={(e) => e.preventDefault()}\n      >\n        <Modal.Header\n          title={fileName || \"Document\"}\n          description={variant.headerDescription(ctx)}\n          onClose={onClose}\n        />\n\n        {/* Body — uses flex-1/min-h-0/overflow-hidden (not Modal.Body)\n            so that child ScrollIndicatorDivs become the actual scroll\n            container instead of the body stealing it via overflow-y-auto. */}\n        <div className=\"flex flex-col flex-1 min-h-0 overflow-hidden w-full bg-background-tint-01\">\n          {isLoading ? (\n            <Section>\n              <SimpleLoader className=\"h-8 w-8\" />\n            </Section>\n          ) : loadError ? (\n            <Section padding={1}>\n              <Text text03 mainUiBody>\n                {loadError}\n              </Text>\n            </Section>\n          ) : (\n            variant.renderContent(ctx)\n          )}\n        </div>\n\n        {!isLoading && !loadError && (\n          <FloatingFooter\n            left={variant.renderFooterLeft(ctx)}\n            right={variant.renderFooterRight(ctx)}\n            codeBackground={variant.codeBackground}\n          />\n        )}\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/index.ts",
    "content": "export { default } from \"@/sections/modals/PreviewModal/PreviewModal\";\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/interfaces.ts",
    "content": "import React from \"react\";\nimport { ModalContentProps } from \"@/refresh-components/Modal\";\n\nexport interface PreviewContext {\n  fileContent: string;\n  fileUrl: string;\n  fileName: string;\n  language: string;\n  lineCount: number;\n  fileSize: string;\n  zoom: number;\n  onZoomIn: () => void;\n  onZoomOut: () => void;\n}\n\nexport interface PreviewVariant\n  extends Required<Pick<ModalContentProps, \"width\" | \"height\">> {\n  /** Return true if this variant should handle the given file. */\n  matches: (semanticIdentifier: string | null, mimeType: string) => boolean;\n  /** Whether the fetcher should read the blob as text. */\n  needsTextContent: boolean;\n  /** Whether the variant renders on a code-style background (bg-background-code-01). */\n  codeBackground: boolean;\n  /** String shown below the title in the modal header. */\n  headerDescription: (ctx: PreviewContext) => string;\n  /** Body content. */\n  renderContent: (ctx: PreviewContext) => React.ReactNode;\n  /** Left side of the floating footer (e.g. line count text, zoom controls). Return null for nothing. */\n  renderFooterLeft: (ctx: PreviewContext) => React.ReactNode;\n  /** Right side of the floating footer (e.g. copy + download buttons). */\n  renderFooterRight: (ctx: PreviewContext) => React.ReactNode;\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/CodePreview.tsx",
    "content": "\"use client\";\n\nimport MinimalMarkdown from \"@/components/chat/MinimalMarkdown\";\nimport ScrollIndicatorDiv from \"@/refresh-components/ScrollIndicatorDiv\";\nimport { cn } from \"@/lib/utils\";\nimport \"@/app/app/message/custom-code-styles.css\";\n\ninterface CodePreviewProps {\n  content: string;\n  language?: string | null;\n  normalize?: boolean;\n}\n\nexport function CodePreview({\n  content,\n  language,\n  normalize,\n}: CodePreviewProps) {\n  // Wrap raw content in a fenced code block for syntax highlighting. Uses ~~~\n  // instead of ``` to avoid conflicts with backticks in the content. Any literal\n  // ~~~ sequences in the content are escaped so they don't accidentally close the fence.\n  const markdownContent = normalize\n    ? `~~~${language || \"\"}\\n${content.replace(/~~~/g, \"\\\\~\\\\~\\\\~\")}\\n~~~`\n    : content;\n\n  return (\n    <ScrollIndicatorDiv\n      className={cn(\"p-4\", normalize && \"bg-background-code-01\")}\n      backgroundColor={normalize ? \"var(--background-code-01)\" : undefined}\n      variant=\"shadow\"\n      bottomSpacing=\"2rem\"\n      disableBottomIndicator\n    >\n      <MinimalMarkdown content={markdownContent} showHeader={false} />\n    </ScrollIndicatorDiv>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/codeVariant.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { getCodeLanguage } from \"@/lib/languages\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { CodePreview } from \"@/sections/modals/PreviewModal/variants/CodePreview\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nexport const codeVariant: PreviewVariant = {\n  matches: (name) => !!getCodeLanguage(name || \"\"),\n  width: \"xl\",\n  height: \"lg\",\n  needsTextContent: true,\n  codeBackground: true,\n\n  headerDescription: (ctx) =>\n    ctx.fileContent\n      ? `${ctx.language} - ${ctx.lineCount} ${\n          ctx.lineCount === 1 ? \"line\" : \"lines\"\n        } · ${ctx.fileSize}`\n      : \"\",\n\n  renderContent: (ctx) => (\n    <CodePreview normalize content={ctx.fileContent} language={ctx.language} />\n  ),\n\n  renderFooterLeft: (ctx) => (\n    <Text text03 mainUiBody className=\"select-none\">\n      {ctx.lineCount} {ctx.lineCount === 1 ? \"line\" : \"lines\"}\n    </Text>\n  ),\n\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <CopyButton getText={() => ctx.fileContent} />\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/csvVariant.tsx",
    "content": "import {\n  Table,\n  TableBody,\n  TableCell,\n  TableHead,\n  TableHeader,\n  TableRow,\n} from \"@/components/ui/table\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\nimport TextSeparator from \"@/refresh-components/TextSeparator\";\n\ninterface CsvData {\n  headers: string[];\n  rows: string[][];\n}\n\nfunction parseCsv(content: string): CsvData {\n  const lines = content.split(/\\r?\\n/).filter((l) => l.length > 0);\n  const headers = lines.length > 0 ? lines[0]?.split(\",\") ?? [] : [];\n  const rows = lines.slice(1).map((line) => line.split(\",\"));\n  return { headers, rows };\n}\n\nexport const csvVariant: PreviewVariant = {\n  matches: (name, mime) =>\n    mime.startsWith(\"text/csv\") || (name || \"\").toLowerCase().endsWith(\".csv\"),\n  width: \"full\",\n  height: \"full\",\n  needsTextContent: true,\n  codeBackground: false,\n  headerDescription: (ctx) => {\n    if (!ctx.fileContent) return \"\";\n    const { rows } = parseCsv(ctx.fileContent);\n    return `CSV - ${rows.length} rows • ${ctx.fileSize}`;\n  },\n\n  renderContent: (ctx) => {\n    if (!ctx.fileContent) return null;\n    const { headers, rows } = parseCsv(ctx.fileContent);\n    return (\n      <Section justifyContent=\"start\" alignItems=\"start\" padding={1}>\n        <Table>\n          <TableHeader className=\"sticky top-0 z-sticky bg-background-tint-01\">\n            <TableRow noHover>\n              {headers.map((h: string, i: number) => (\n                <TableHead key={i}>\n                  <Text as=\"p\" className=\"line-clamp-2\" text04 secondaryAction>\n                    {h}\n                  </Text>\n                </TableHead>\n              ))}\n            </TableRow>\n          </TableHeader>\n          <TableBody>\n            {rows.map((row: string[], rIdx: number) => (\n              <TableRow key={rIdx} noHover>\n                {headers.map((_: string, cIdx: number) => (\n                  <TableCell\n                    key={cIdx}\n                    className={cn(\n                      cIdx === 0 && \"sticky left-0 bg-background-tint-01\",\n                      \"py-4 px-4 whitespace-normal break-words\"\n                    )}\n                  >\n                    <Text\n                      as=\"p\"\n                      {...(cIdx === 0\n                        ? { text04: true, secondaryAction: true }\n                        : { text03: true, secondaryBody: true })}\n                    >\n                      {row?.[cIdx] ?? \"\"}\n                    </Text>\n                  </TableCell>\n                ))}\n              </TableRow>\n            ))}\n          </TableBody>\n        </Table>\n        <TextSeparator\n          count={rows.length}\n          text={rows.length === 1 ? \"row\" : \"rows\"}\n        />\n      </Section>\n    );\n  },\n\n  renderFooterLeft: (ctx) => {\n    if (!ctx.fileContent) return null;\n    const { headers, rows } = parseCsv(ctx.fileContent);\n    return (\n      <Text text03 mainUiBody className=\"select-none\">\n        {headers.length} {headers.length === 1 ? \"column\" : \"columns\"} •{\" \"}\n        {rows.length} {rows.length === 1 ? \"row\" : \"rows\"}\n      </Text>\n    );\n  },\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <CopyButton getText={() => ctx.fileContent} />\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/dataVariant.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { getDataLanguage, getLanguageByMime } from \"@/lib/languages\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { CodePreview } from \"@/sections/modals/PreviewModal/variants/CodePreview\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nfunction formatContent(language: string, content: string): string {\n  if (language === \"json\") {\n    try {\n      return JSON.stringify(JSON.parse(content), null, 2);\n    } catch {\n      return content;\n    }\n  }\n  return content;\n}\n\nexport const dataVariant: PreviewVariant = {\n  matches: (name, mime) =>\n    !!getDataLanguage(name || \"\") || !!getLanguageByMime(mime),\n  width: \"xl\",\n  height: \"lg\",\n  needsTextContent: true,\n  codeBackground: true,\n\n  headerDescription: (ctx) =>\n    ctx.fileContent\n      ? `${ctx.language} - ${ctx.lineCount} ${\n          ctx.lineCount === 1 ? \"line\" : \"lines\"\n        } · ${ctx.fileSize}`\n      : \"\",\n\n  renderContent: (ctx) => {\n    const formatted = formatContent(ctx.language, ctx.fileContent);\n    return (\n      <CodePreview normalize content={formatted} language={ctx.language} />\n    );\n  },\n\n  renderFooterLeft: (ctx) => (\n    <Text text03 mainUiBody className=\"select-none\">\n      {ctx.lineCount} {ctx.lineCount === 1 ? \"line\" : \"lines\"}\n    </Text>\n  ),\n\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <CopyButton getText={() => ctx.fileContent} />\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/docxVariant.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect, useRef } from \"react\";\nimport { renderAsync } from \"docx-preview\";\nimport ScrollIndicatorDiv from \"@/refresh-components/ScrollIndicatorDiv\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { PreviewContext } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nconst DOCX_MIMES = [\n  \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\",\n  \"application/msword\",\n];\n\nfunction isLegacyDoc(fileName: string): boolean {\n  const lower = fileName.toLowerCase();\n  return lower.endsWith(\".doc\") && !lower.endsWith(\".docx\");\n}\n\ninterface DocxLoadResult {\n  plainText: string;\n  wordCount: number;\n}\n\ninterface DocxPreviewProps {\n  fileUrl: string;\n  onLoad: (result: DocxLoadResult) => void;\n}\n\nfunction DocxPreview({ fileUrl, onLoad }: DocxPreviewProps) {\n  const [isLoading, setIsLoading] = useState(true);\n  const [error, setError] = useState<string | null>(null);\n  const bodyRef = useRef<HTMLDivElement>(null);\n  const styleRef = useRef<HTMLDivElement>(null);\n  const onLoadRef = useRef(onLoad);\n  onLoadRef.current = onLoad;\n\n  useEffect(() => {\n    async function loadDocument() {\n      setIsLoading(true);\n      setError(null);\n      try {\n        const response = await fetch(fileUrl);\n        if (!response.ok) {\n          throw new Error(`Failed to fetch document: ${response.status}`);\n        }\n        const buffer = await response.arrayBuffer();\n\n        // Render the DOCX with full layout fidelity\n        if (bodyRef.current && styleRef.current) {\n          bodyRef.current.innerHTML = \"\";\n          styleRef.current.innerHTML = \"\";\n\n          await renderAsync(buffer, bodyRef.current, styleRef.current, {\n            className: \"docx\",\n            inWrapper: false,\n            ignoreWidth: false,\n            ignoreHeight: false,\n            ignoreFonts: false,\n            breakPages: true,\n            useBase64URL: true,\n            renderHeaders: true,\n            renderFooters: true,\n            renderFootnotes: true,\n            renderEndnotes: true,\n          });\n        }\n\n        // Extract plain text from the rendered DOM\n        const text = bodyRef.current?.innerText ?? \"\";\n        const words = text\n          .split(/\\s+/)\n          .filter((w: string) => w.length > 0).length;\n\n        onLoadRef.current({ plainText: text, wordCount: words });\n      } catch {\n        setError(\n          \"Could not preview this document. Download the file to view it.\"\n        );\n      } finally {\n        setIsLoading(false);\n      }\n    }\n    loadDocument();\n  }, [fileUrl]);\n\n  if (error) {\n    return (\n      <Section justifyContent=\"center\" alignItems=\"center\" padding={1.5}>\n        <Text text03 mainUiBody>\n          {error}\n        </Text>\n      </Section>\n    );\n  }\n\n  return (\n    <ScrollIndicatorDiv\n      className=\"flex-1 min-h-0 bg-background-tint-00\"\n      variant=\"shadow\"\n    >\n      {isLoading && (\n        <Section>\n          <SimpleLoader className=\"h-8 w-8\" />\n        </Section>\n      )}\n      {/* Style container for docx-preview generated styles */}\n      <div ref={styleRef} />\n      {/* Body container where docx-preview renders the document */}\n      <div ref={bodyRef} className=\"docx-host px-32 pb-16\" />\n    </ScrollIndicatorDiv>\n  );\n}\n\n// Store parsed result outside the variant so footer can access it\nlet lastDocxResult: DocxLoadResult | null = null;\n\nexport const docxVariant: PreviewVariant = {\n  matches: (name, mime) => {\n    if (DOCX_MIMES.some((m) => mime === m)) return true;\n    const lower = (name || \"\").toLowerCase();\n    return lower.endsWith(\".docx\") || lower.endsWith(\".doc\");\n  },\n  width: \"full\",\n  height: \"full\",\n  needsTextContent: false,\n  codeBackground: false,\n  headerDescription: () => {\n    if (lastDocxResult) {\n      const count = lastDocxResult.wordCount;\n      return `Word Document • ${count.toLocaleString()} ${\n        count === 1 ? \"word\" : \"words\"\n      }`;\n    }\n    return \"Word Document\";\n  },\n\n  renderContent: (ctx: PreviewContext) => {\n    if (isLegacyDoc(ctx.fileName)) {\n      lastDocxResult = null;\n      return (\n        <Section justifyContent=\"center\" alignItems=\"center\" padding={1.5}>\n          <Text text03 mainUiBody>\n            Legacy .doc format cannot be previewed. Download the file to view\n            it.\n          </Text>\n        </Section>\n      );\n    }\n    return (\n      <DocxPreview\n        fileUrl={ctx.fileUrl}\n        onLoad={(result) => {\n          lastDocxResult = result;\n        }}\n      />\n    );\n  },\n\n  renderFooterLeft: () => null,\n  renderFooterRight: (ctx: PreviewContext) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      {lastDocxResult && (\n        <CopyButton getText={() => lastDocxResult?.plainText ?? \"\"} />\n      )}\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/imageVariant.tsx",
    "content": "import { Section } from \"@/layouts/general-layouts\";\nimport PreviewImage from \"@/refresh-components/PreviewImage\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport {\n  DownloadButton,\n  ZoomControls,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nexport const imageVariant: PreviewVariant = {\n  matches: (_name, mime) => mime.startsWith(\"image/\"),\n  width: \"full\",\n  height: \"full\",\n  needsTextContent: false,\n  codeBackground: false,\n  headerDescription: () => \"\",\n\n  renderContent: (ctx) => (\n    <div\n      className=\"flex flex-1 min-h-0 items-center justify-center p-4 transition-transform duration-300 ease-in-out\"\n      style={{\n        transform: `scale(${ctx.zoom / 100})`,\n        transformOrigin: \"center\",\n      }}\n    >\n      <PreviewImage\n        src={ctx.fileUrl}\n        alt={ctx.fileName}\n        className=\"max-w-full max-h-full\"\n      />\n    </div>\n  ),\n\n  renderFooterLeft: (ctx) => (\n    <ZoomControls\n      zoom={ctx.zoom}\n      onZoomIn={ctx.onZoomIn}\n      onZoomOut={ctx.onZoomOut}\n    />\n  ),\n\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/index.ts",
    "content": "import { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { codeVariant } from \"@/sections/modals/PreviewModal/variants/codeVariant\";\nimport { imageVariant } from \"@/sections/modals/PreviewModal/variants/imageVariant\";\nimport { pdfVariant } from \"@/sections/modals/PreviewModal/variants/pdfVariant\";\nimport { csvVariant } from \"@/sections/modals/PreviewModal/variants/csvVariant\";\nimport { markdownVariant } from \"@/sections/modals/PreviewModal/variants/markdownVariant\";\nimport { dataVariant } from \"@/sections/modals/PreviewModal/variants/dataVariant\";\nimport { textVariant } from \"@/sections/modals/PreviewModal/variants/textVariant\";\nimport { unsupportedVariant } from \"@/sections/modals/PreviewModal/variants/unsupportedVariant\";\nimport { docxVariant } from \"@/sections/modals/PreviewModal/variants/docxVariant\";\n\n// Note: Order does matter for the order that filters that are hit\nconst PREVIEW_VARIANTS: PreviewVariant[] = [\n  codeVariant,\n  imageVariant,\n  pdfVariant,\n  csvVariant,\n  markdownVariant,\n  docxVariant,\n  textVariant,\n  dataVariant,\n];\n\nexport function resolveVariant(\n  semanticIdentifier: string | null,\n  mimeType: string\n): PreviewVariant {\n  return (\n    PREVIEW_VARIANTS.find((v) => v.matches(semanticIdentifier, mimeType)) ??\n    unsupportedVariant\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/markdownVariant.tsx",
    "content": "import { Section } from \"@/layouts/general-layouts\";\nimport { isMarkdownFile } from \"@/lib/languages\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { CodePreview } from \"@/sections/modals/PreviewModal/variants/CodePreview\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nconst MARKDOWN_MIMES = [\n  \"text/markdown\",\n  \"text/x-markdown\",\n  \"text/x-rst\",\n  \"text/x-org\",\n];\n\nexport const markdownVariant: PreviewVariant = {\n  matches: (name, mime) => {\n    if (MARKDOWN_MIMES.some((m) => mime.startsWith(m))) return true;\n    return isMarkdownFile(name || \"\");\n  },\n  width: \"full\",\n  height: \"full\",\n  needsTextContent: true,\n  codeBackground: false,\n  headerDescription: () => \"\",\n\n  renderContent: (ctx) => (\n    <CodePreview content={ctx.fileContent} language={ctx.language} />\n  ),\n\n  renderFooterLeft: () => null,\n\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <CopyButton getText={() => ctx.fileContent} />\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/pdfVariant.tsx",
    "content": "import { Section } from \"@/layouts/general-layouts\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { DownloadButton } from \"@/sections/modals/PreviewModal/variants/shared\";\n\nexport const pdfVariant: PreviewVariant = {\n  matches: (_name, mime) => mime === \"application/pdf\",\n  width: \"full\",\n  height: \"full\",\n  needsTextContent: false,\n  codeBackground: false,\n  headerDescription: () => \"\",\n\n  renderContent: (ctx) => (\n    <iframe\n      src={`${ctx.fileUrl}#toolbar=0`}\n      className=\"w-full h-full flex-1 min-h-0 border-none\"\n      title=\"PDF Viewer\"\n    />\n  ),\n\n  renderFooterLeft: () => null,\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/shared.tsx",
    "content": "import { Button } from \"@opal/components\";\nimport { SvgDownload, SvgZoomIn, SvgZoomOut } from \"@opal/icons\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport { Section } from \"@/layouts/general-layouts\";\n\ninterface DownloadButtonProps {\n  fileUrl: string;\n  fileName: string;\n}\n\nexport function DownloadButton({ fileUrl, fileName }: DownloadButtonProps) {\n  return (\n    <a href={fileUrl} download={fileName}>\n      <Button\n        prominence=\"tertiary\"\n        size=\"sm\"\n        icon={SvgDownload}\n        tooltip=\"Download\"\n      />\n    </a>\n  );\n}\n\ninterface CopyButtonProps {\n  getText: () => string;\n}\n\nexport function CopyButton({ getText }: CopyButtonProps) {\n  return (\n    <CopyIconButton getCopyText={getText} tooltip=\"Copy content\" size=\"sm\" />\n  );\n}\n\ninterface ZoomControlsProps {\n  zoom: number;\n  onZoomIn: () => void;\n  onZoomOut: () => void;\n}\n\nexport function ZoomControls({ zoom, onZoomIn, onZoomOut }: ZoomControlsProps) {\n  return (\n    <div className=\"rounded-12 bg-background-tint-00 p-1 shadow-lg\">\n      <Section flexDirection=\"row\" width=\"fit\">\n        <Button\n          prominence=\"tertiary\"\n          size=\"sm\"\n          icon={SvgZoomOut}\n          onClick={onZoomOut}\n          tooltip=\"Zoom Out\"\n        />\n        <Text mainUiMono text03>\n          {zoom}%\n        </Text>\n        <Button\n          prominence=\"tertiary\"\n          size=\"sm\"\n          icon={SvgZoomIn}\n          onClick={onZoomIn}\n          tooltip=\"Zoom In\"\n        />\n      </Section>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/textVariant.tsx",
    "content": "import Text from \"@/refresh-components/texts/Text\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { CodePreview } from \"@/sections/modals/PreviewModal/variants/CodePreview\";\nimport {\n  CopyButton,\n  DownloadButton,\n} from \"@/sections/modals/PreviewModal/variants/shared\";\n\nconst TEXT_MIMES = [\n  \"text/plain\",\n  \"text/x-log\",\n  \"text/x-config\",\n  \"text/tab-separated-values\",\n];\n\nconst TEXT_EXTENSIONS = [\".txt\", \".log\", \".conf\", \".tsv\"];\n\nexport const textVariant: PreviewVariant = {\n  matches: (name, mime) => {\n    if (TEXT_MIMES.some((supportedMime) => mime.startsWith(supportedMime))) {\n      return true;\n    }\n\n    const lowerName = (name || \"\").toLowerCase();\n    return TEXT_EXTENSIONS.some((extension) => lowerName.endsWith(extension));\n  },\n  width: \"xl\",\n  height: \"lg\",\n  needsTextContent: true,\n  codeBackground: true,\n  headerDescription: (ctx) =>\n    ctx.fileContent\n      ? `${ctx.lineCount} ${ctx.lineCount === 1 ? \"line\" : \"lines\"} · ${\n          ctx.fileSize\n        }`\n      : \"\",\n\n  renderContent: (ctx) => (\n    <CodePreview normalize content={ctx.fileContent} language={ctx.language} />\n  ),\n\n  renderFooterLeft: (ctx) => (\n    <Text text03 mainUiBody className=\"select-none\">\n      {ctx.lineCount} {ctx.lineCount === 1 ? \"line\" : \"lines\"}\n    </Text>\n  ),\n\n  renderFooterRight: (ctx) => (\n    <Section flexDirection=\"row\" width=\"fit\">\n      <CopyButton getText={() => ctx.fileContent} />\n      <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n    </Section>\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/PreviewModal/variants/unsupportedVariant.tsx",
    "content": "import { Button } from \"@opal/components\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { PreviewVariant } from \"@/sections/modals/PreviewModal/interfaces\";\nimport { DownloadButton } from \"@/sections/modals/PreviewModal/variants/shared\";\n\nexport const unsupportedVariant: PreviewVariant = {\n  matches: () => true,\n  width: \"xl\",\n  height: \"full\",\n  needsTextContent: false,\n  codeBackground: false,\n  headerDescription: () => \"\",\n\n  renderContent: (ctx) => (\n    <div className=\"flex flex-col items-center justify-center flex-1 w-full min-h-0 gap-4 p-6\">\n      <Text as=\"p\" text03 mainUiBody>\n        This file format is not supported for preview.\n      </Text>\n      <a href={ctx.fileUrl} download={ctx.fileName}>\n        <Button>Download File</Button>\n      </a>\n    </div>\n  ),\n\n  renderFooterLeft: () => null,\n  renderFooterRight: (ctx) => (\n    <DownloadButton fileUrl={ctx.fileUrl} fileName={ctx.fileName} />\n  ),\n};\n"
  },
  {
    "path": "web/src/sections/modals/ShareAgentModal.test.tsx",
    "content": "import React, { useEffect } from \"react\";\nimport { render, screen, waitFor } from \"@tests/setup/test-utils\";\nimport ShareAgentModal, { ShareAgentModalProps } from \"./ShareAgentModal\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\n\njest.mock(\"@/hooks/useShareableUsers\", () => ({\n  __esModule: true,\n  default: jest.fn(() => ({ data: [] })),\n}));\n\njest.mock(\"@/hooks/useShareableGroups\", () => ({\n  __esModule: true,\n  default: jest.fn(() => ({ data: [] })),\n}));\n\njest.mock(\"@/hooks/useAgents\", () => ({\n  useAgent: jest.fn(() => ({ agent: null })),\n}));\n\njest.mock(\"@/lib/hooks\", () => ({\n  useLabels: jest.fn(() => ({\n    labels: [],\n    createLabel: jest.fn(),\n  })),\n}));\n\nfunction ModalHarness(props: ShareAgentModalProps) {\n  const modal = useCreateModal();\n\n  useEffect(() => {\n    modal.toggle(true);\n  }, [modal]);\n\n  return (\n    <modal.Provider>\n      <ShareAgentModal {...props} />\n    </modal.Provider>\n  );\n}\n\nfunction renderShareAgentModal(overrides: Partial<ShareAgentModalProps> = {}) {\n  const props: ShareAgentModalProps = {\n    userIds: [],\n    groupIds: [],\n    isPublic: false,\n    isFeatured: false,\n    labelIds: [],\n    ...overrides,\n  };\n\n  return render(<ModalHarness {...props} />);\n}\n\ndescribe(\"ShareAgentModal\", () => {\n  it(\"defaults to Users & Groups when the agent is private\", async () => {\n    renderShareAgentModal({ isPublic: false });\n\n    await waitFor(() =>\n      expect(\n        screen.getByRole(\"tab\", { name: \"Users & Groups\" })\n      ).toHaveAttribute(\"data-state\", \"active\")\n    );\n\n    expect(\n      screen.getByRole(\"tab\", { name: \"Your Organization\" })\n    ).toHaveAttribute(\"data-state\", \"inactive\");\n  });\n\n  it(\"defaults to Your Organization when the agent is public\", async () => {\n    renderShareAgentModal({ isPublic: true });\n\n    await waitFor(() =>\n      expect(\n        screen.getByRole(\"tab\", { name: \"Your Organization\" })\n      ).toHaveAttribute(\"data-state\", \"active\")\n    );\n\n    expect(screen.getByRole(\"tab\", { name: \"Users & Groups\" })).toHaveAttribute(\n      \"data-state\",\n      \"inactive\"\n    );\n  });\n});\n"
  },
  {
    "path": "web/src/sections/modals/ShareAgentModal.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport Modal, { BasicModalFooter } from \"@/refresh-components/Modal\";\nimport {\n  SvgLink,\n  SvgOrganization,\n  SvgShare,\n  SvgTag,\n  SvgUser,\n  SvgUsers,\n  SvgX,\n} from \"@opal/icons\";\nimport InputChipField from \"@/refresh-components/inputs/InputChipField\";\nimport Message from \"@/refresh-components/messages/Message\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { Card } from \"@/refresh-components/cards\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox/InputComboBox\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport SwitchField from \"@/refresh-components/form/SwitchField\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport useShareableUsers from \"@/hooks/useShareableUsers\";\nimport useShareableGroups from \"@/hooks/useShareableGroups\";\nimport { useModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { Formik, useFormikContext } from \"formik\";\nimport { useAgent } from \"@/hooks/useAgents\";\nimport { Button } from \"@opal/components\";\nimport { useLabels } from \"@/lib/hooks\";\nimport { PersonaLabel } from \"@/app/admin/agents/interfaces\";\n\nconst YOUR_ORGANIZATION_TAB = \"Your Organization\";\nconst USERS_AND_GROUPS_TAB = \"Users & Groups\";\n\n// ============================================================================\n// Types\n// ============================================================================\n\ninterface ShareAgentFormValues {\n  selectedUserIds: string[];\n  selectedGroupIds: number[];\n  isPublic: boolean;\n  isFeatured: boolean;\n  labelIds: number[];\n}\n\n// ============================================================================\n// ShareAgentFormContent\n// ============================================================================\n\ninterface ShareAgentFormContentProps {\n  agentId?: number;\n}\n\nfunction ShareAgentFormContent({ agentId }: ShareAgentFormContentProps) {\n  const { values, setFieldValue, handleSubmit, dirty, isSubmitting } =\n    useFormikContext<ShareAgentFormValues>();\n  const { data: usersData } = useShareableUsers({ includeApiKeys: true });\n  const { data: groupsData } = useShareableGroups();\n  const { user: currentUser, isAdmin, isCurator } = useUser();\n  const { agent: fullAgent } = useAgent(agentId ?? null);\n  const shareAgentModal = useModal();\n  const { labels: allLabels, createLabel } = useLabels();\n  const [labelInputValue, setLabelInputValue] = useState(\"\");\n\n  const acceptedUsers = usersData ?? [];\n  const groups = groupsData ?? [];\n  const canUpdateFeaturedStatus = isAdmin || isCurator;\n\n  // Create options for InputComboBox from all accepted users and groups\n  const comboBoxOptions = useMemo(() => {\n    const userOptions = acceptedUsers\n      .filter((user) => user.id !== currentUser?.id)\n      .map((user) => ({\n        value: `user-${user.id}`,\n        label: user.email,\n      }));\n\n    const groupOptions = groups.map((group) => ({\n      value: `group-${group.id}`,\n      label: group.name,\n    }));\n\n    return [...userOptions, ...groupOptions];\n  }, [acceptedUsers, groups, currentUser?.id]);\n\n  // Compute owner and displayed users\n  const ownerId = fullAgent?.owner?.id;\n  const owner = ownerId\n    ? acceptedUsers.find((user) => user.id === ownerId)\n    : acceptedUsers.find((user) => user.id === currentUser?.id);\n  const otherUsers = owner\n    ? acceptedUsers.filter(\n        (user) =>\n          user.id !== owner.id && values.selectedUserIds.includes(user.id)\n      )\n    : acceptedUsers;\n  const displayedUsers = [...(owner ? [owner] : []), ...otherUsers];\n\n  // Compute displayed groups based on current form values\n  const displayedGroups = groups.filter((group) =>\n    values.selectedGroupIds.includes(group.id)\n  );\n\n  // Handlers\n  function handleClose() {\n    shareAgentModal.toggle(false);\n  }\n\n  function handleCopyLink() {\n    if (!agentId) return;\n    const url = `${window.location.origin}/chat?agentId=${agentId}`;\n    navigator.clipboard.writeText(url);\n  }\n\n  function handleComboBoxSelect(selectedValue: string) {\n    if (selectedValue.startsWith(\"user-\")) {\n      const userId = selectedValue.replace(\"user-\", \"\");\n      if (!values.selectedUserIds.includes(userId)) {\n        setFieldValue(\"selectedUserIds\", [...values.selectedUserIds, userId]);\n      }\n    } else if (selectedValue.startsWith(\"group-\")) {\n      const groupId = parseInt(selectedValue.replace(\"group-\", \"\"));\n      if (!values.selectedGroupIds.includes(groupId)) {\n        setFieldValue(\"selectedGroupIds\", [\n          ...values.selectedGroupIds,\n          groupId,\n        ]);\n      }\n    }\n  }\n\n  function handleRemoveUser(userId: string) {\n    setFieldValue(\n      \"selectedUserIds\",\n      values.selectedUserIds.filter((id) => id !== userId)\n    );\n  }\n\n  function handleRemoveGroup(groupId: number) {\n    setFieldValue(\n      \"selectedGroupIds\",\n      values.selectedGroupIds.filter((id) => id !== groupId)\n    );\n  }\n\n  const selectedLabels: PersonaLabel[] = useMemo(() => {\n    if (!allLabels) return [];\n    return allLabels.filter((label) => values.labelIds.includes(label.id));\n  }, [allLabels, values.labelIds]);\n\n  function handleRemoveLabel(labelId: number) {\n    setFieldValue(\n      \"labelIds\",\n      values.labelIds.filter((id) => id !== labelId)\n    );\n  }\n\n  const addLabel = useCallback(\n    async (name: string) => {\n      const trimmed = name.trim();\n      if (!trimmed) return;\n\n      const existing = allLabels?.find(\n        (l) => l.name.toLowerCase() === trimmed.toLowerCase()\n      );\n      if (existing) {\n        if (!values.labelIds.includes(existing.id)) {\n          setFieldValue(\"labelIds\", [...values.labelIds, existing.id]);\n        }\n      } else {\n        const newLabel = await createLabel(trimmed);\n        if (newLabel) {\n          setFieldValue(\"labelIds\", [...values.labelIds, newLabel.id]);\n        }\n      }\n      setLabelInputValue(\"\");\n    },\n    [allLabels, values.labelIds, setFieldValue, createLabel]\n  );\n\n  const chipItems = useMemo(\n    () =>\n      selectedLabels.map((label) => ({\n        id: String(label.id),\n        label: label.name,\n      })),\n    [selectedLabels]\n  );\n\n  return (\n    <Modal.Content width=\"sm\" height=\"lg\">\n      <Modal.Header icon={SvgShare} title=\"Share Agent\" onClose={handleClose} />\n\n      <Modal.Body padding={0.5}>\n        <Card variant=\"borderless\" padding={0.5}>\n          <Tabs\n            defaultValue={\n              values.isPublic ? YOUR_ORGANIZATION_TAB : USERS_AND_GROUPS_TAB\n            }\n          >\n            <Tabs.List>\n              <Tabs.Trigger icon={SvgUsers} value={USERS_AND_GROUPS_TAB}>\n                {USERS_AND_GROUPS_TAB}\n              </Tabs.Trigger>\n              <Tabs.Trigger\n                icon={SvgOrganization}\n                value={YOUR_ORGANIZATION_TAB}\n              >\n                {YOUR_ORGANIZATION_TAB}\n              </Tabs.Trigger>\n            </Tabs.List>\n\n            <Tabs.Content value={USERS_AND_GROUPS_TAB}>\n              <Section gap={0.5} alignItems=\"start\">\n                <InputComboBox\n                  placeholder=\"Add users and groups\"\n                  value=\"\"\n                  onChange={() => {}}\n                  onValueChange={handleComboBoxSelect}\n                  options={comboBoxOptions}\n                  strict\n                />\n                {(displayedUsers.length > 0 || displayedGroups.length > 0) && (\n                  <Section gap={0} alignItems=\"stretch\">\n                    {/* Shared Users */}\n                    {displayedUsers.map((user) => {\n                      const isOwner = fullAgent?.owner?.id === user.id;\n                      const isCurrentUser = currentUser?.id === user.id;\n\n                      return (\n                        <LineItem\n                          key={`user-${user.id}`}\n                          icon={SvgUser}\n                          description={isCurrentUser ? \"You\" : undefined}\n                          rightChildren={\n                            isOwner || (isCurrentUser && !agentId) ? (\n                              // Owner will always have the agent \"shared\" with it.\n                              // Therefore, we never render any `IconButton SvgX` to remove it.\n                              //\n                              // Note:\n                              // This user, during creation, is assumed to be the \"owner\".\n                              // That is why the `(isCurrentUser && !agent)` condition exists.\n                              <Text secondaryBody text03>\n                                Owner\n                              </Text>\n                            ) : (\n                              // For all other cases (including for \"self-unsharing\"),\n                              // we render an `IconButton SvgX` to remove a person from the list.\n                              <Button\n                                prominence=\"tertiary\"\n                                size=\"sm\"\n                                icon={SvgX}\n                                onClick={() => handleRemoveUser(user.id)}\n                              />\n                            )\n                          }\n                        >\n                          {user.email}\n                        </LineItem>\n                      );\n                    })}\n\n                    {/* Shared Groups */}\n                    {displayedGroups.map((group) => (\n                      <LineItem\n                        key={`group-${group.id}`}\n                        icon={SvgUsers}\n                        rightChildren={\n                          <Button\n                            prominence=\"tertiary\"\n                            size=\"sm\"\n                            icon={SvgX}\n                            onClick={() => handleRemoveGroup(group.id)}\n                          />\n                        }\n                      >\n                        {group.name}\n                      </LineItem>\n                    ))}\n                  </Section>\n                )}\n              </Section>\n              {values.isPublic && (\n                <Section>\n                  <Message\n                    iconComponent={SvgOrganization}\n                    close={false}\n                    static\n                    className=\"w-full\"\n                    text=\"This agent is public to your organization.\"\n                    description=\"Everyone in your organization has access to this agent.\"\n                  />\n                </Section>\n              )}\n            </Tabs.Content>\n\n            <Tabs.Content value={YOUR_ORGANIZATION_TAB} padding={0.5}>\n              <Section gap={1} alignItems=\"stretch\">\n                <InputLayouts.Horizontal\n                  title=\"Publish This Agent\"\n                  description=\"Make this agent available to everyone in your organization.\"\n                >\n                  <SwitchField name=\"isPublic\" />\n                </InputLayouts.Horizontal>\n\n                {canUpdateFeaturedStatus && (\n                  <>\n                    <div className=\"border-t border-border-02\" />\n\n                    <InputLayouts.Horizontal\n                      title=\"Feature This Agent\"\n                      description=\"Show this agent at the top of the explore agents list and automatically pin it to the sidebar for new users with access.\"\n                    >\n                      <SwitchField name=\"isFeatured\" />\n                    </InputLayouts.Horizontal>\n                  </>\n                )}\n\n                <InputChipField\n                  chips={chipItems}\n                  onRemoveChip={(id) => handleRemoveLabel(Number(id))}\n                  onAdd={addLabel}\n                  value={labelInputValue}\n                  onChange={setLabelInputValue}\n                  placeholder=\"Add labels...\"\n                  icon={SvgTag}\n                />\n                <Text secondaryBody text04>\n                  Add labels and categories to help people better discover this\n                  agent.\n                </Text>\n              </Section>\n            </Tabs.Content>\n          </Tabs>\n        </Card>\n      </Modal.Body>\n\n      <Modal.Footer>\n        <BasicModalFooter\n          left={\n            agentId ? (\n              <Button\n                prominence=\"secondary\"\n                icon={SvgLink}\n                onClick={handleCopyLink}\n              >\n                Copy Link\n              </Button>\n            ) : undefined\n          }\n          cancel={\n            <Button\n              disabled={isSubmitting}\n              prominence=\"secondary\"\n              onClick={handleClose}\n            >\n              Cancel\n            </Button>\n          }\n          submit={\n            <Button\n              disabled={!dirty || isSubmitting}\n              onClick={() => handleSubmit()}\n            >\n              Save\n            </Button>\n          }\n        />\n      </Modal.Footer>\n    </Modal.Content>\n  );\n}\n\n// ============================================================================\n// ShareAgentModal\n// ============================================================================\n\nexport interface ShareAgentModalProps {\n  agentId?: number;\n  userIds: string[];\n  groupIds: number[];\n  isPublic: boolean;\n  isFeatured: boolean;\n  labelIds: number[];\n  onShare?: (\n    userIds: string[],\n    groupIds: number[],\n    isPublic: boolean,\n    isFeatured: boolean,\n    labelIds: number[]\n  ) => Promise<void> | void;\n}\n\nexport default function ShareAgentModal({\n  agentId,\n  userIds,\n  groupIds,\n  isPublic,\n  isFeatured,\n  labelIds,\n  onShare,\n}: ShareAgentModalProps) {\n  const shareAgentModal = useModal();\n\n  const initialValues = useMemo(\n    (): ShareAgentFormValues => ({\n      selectedUserIds: userIds,\n      selectedGroupIds: groupIds,\n      isPublic: isPublic,\n      isFeatured: isFeatured,\n      labelIds: labelIds,\n    }),\n    [userIds, groupIds, isPublic, isFeatured, labelIds]\n  );\n  const [modalInitialValues, setModalInitialValues] =\n    useState<ShareAgentFormValues>(initialValues);\n  const wasOpenRef = useRef(false);\n\n  useEffect(() => {\n    // Capture fresh props exactly when the modal opens, then keep them stable\n    // while open so in-flight parent updates don't reset form state.\n    if (shareAgentModal.isOpen && !wasOpenRef.current) {\n      setModalInitialValues(initialValues);\n    }\n    wasOpenRef.current = shareAgentModal.isOpen;\n  }, [shareAgentModal.isOpen, initialValues]);\n\n  async function handleSubmit(values: ShareAgentFormValues) {\n    await onShare?.(\n      values.selectedUserIds,\n      values.selectedGroupIds,\n      values.isPublic,\n      values.isFeatured,\n      values.labelIds\n    );\n  }\n\n  return (\n    <Modal open={shareAgentModal.isOpen} onOpenChange={shareAgentModal.toggle}>\n      <Formik\n        initialValues={modalInitialValues}\n        onSubmit={handleSubmit}\n        enableReinitialize\n      >\n        <ShareAgentFormContent agentId={agentId} />\n      </Formik>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/ShareChatSessionModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { ChatSession, ChatSessionSharedStatus } from \"@/app/app/interfaces\";\nimport { toast } from \"@/hooks/useToast\";\nimport { useChatSessionStore } from \"@/app/app/stores/useChatSessionStore\";\nimport { copyAll } from \"@/app/app/message/copyingUtils\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Modal from \"@/refresh-components/Modal\";\nimport { Button } from \"@opal/components\";\nimport CopyIconButton from \"@/refresh-components/buttons/CopyIconButton\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { SvgLink, SvgShare, SvgUsers } from \"@opal/icons\";\nimport SvgCheck from \"@opal/icons/check\";\nimport SvgLock from \"@opal/icons/lock\";\n\nimport type { IconProps } from \"@opal/types\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\n\nfunction buildShareLink(chatSessionId: string) {\n  const baseUrl = `${window.location.protocol}//${window.location.host}`;\n  return `${baseUrl}/app/shared/${chatSessionId}`;\n}\n\nasync function generateShareLink(chatSessionId: string) {\n  const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ sharing_status: \"public\" }),\n  });\n\n  if (response.ok) {\n    return buildShareLink(chatSessionId);\n  }\n  return null;\n}\n\nasync function deleteShareLink(chatSessionId: string) {\n  const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, {\n    method: \"PATCH\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify({ sharing_status: \"private\" }),\n  });\n\n  return response.ok;\n}\n\ninterface PrivacyOptionProps {\n  icon: React.FunctionComponent<IconProps>;\n  title: string;\n  description: string;\n  selected: boolean;\n  onClick: () => void;\n  ariaLabel?: string;\n}\n\nfunction PrivacyOption({\n  icon: Icon,\n  title,\n  description,\n  selected,\n  onClick,\n  ariaLabel,\n}: PrivacyOptionProps) {\n  return (\n    <div\n      className={cn(\n        \"p-1.5 rounded-08 cursor-pointer \",\n        selected ? \"bg-background-tint-00\" : \"bg-transparent\",\n        \"hover:bg-background-tint-02\"\n      )}\n      onClick={onClick}\n      aria-label={ariaLabel}\n    >\n      <div className=\"flex flex-row gap-1 items-center\">\n        <div className=\"flex w-5 p-[2px] self-stretch justify-center\">\n          <Icon\n            size={16}\n            className={cn(selected ? \"stroke-text-05\" : \"stroke-text-03\")}\n          />\n        </div>\n        <div className=\"flex flex-col flex-1 px-0.5\">\n          <Text mainUiBody text05={selected} text03={!selected}>\n            {title}\n          </Text>\n          <Text secondaryBody text03>\n            {description}\n          </Text>\n        </div>\n        {selected && (\n          <div className=\"flex w-5 self-stretch justify-center\">\n            <SvgCheck size={16} className=\"stroke-action-link-05\" />\n          </div>\n        )}\n      </div>\n    </div>\n  );\n}\n\ninterface ShareChatSessionModalProps {\n  chatSession: ChatSession;\n  onClose: () => void;\n}\n\nexport default function ShareChatSessionModal({\n  chatSession,\n  onClose,\n}: ShareChatSessionModalProps) {\n  const isCurrentlyPublic =\n    chatSession.shared_status === ChatSessionSharedStatus.Public;\n\n  const [selectedPrivacy, setSelectedPrivacy] = useState<\"private\" | \"public\">(\n    isCurrentlyPublic ? \"public\" : \"private\"\n  );\n  const [shareLink, setShareLink] = useState<string>(\n    isCurrentlyPublic ? buildShareLink(chatSession.id) : \"\"\n  );\n  const [isLoading, setIsLoading] = useState(false);\n  const updateCurrentChatSessionSharedStatus = useChatSessionStore(\n    (state) => state.updateCurrentChatSessionSharedStatus\n  );\n  const { refreshChatSessions } = useChatSessions();\n\n  const wantsPublic = selectedPrivacy === \"public\";\n\n  const isShared = shareLink && selectedPrivacy === \"public\";\n\n  let submitButtonText = \"Done\";\n  if (wantsPublic && !isCurrentlyPublic && !shareLink) {\n    submitButtonText = \"Create Share Link\";\n  } else if (!wantsPublic && isCurrentlyPublic) {\n    submitButtonText = \"Make Private\";\n  } else if (isShared) {\n    submitButtonText = \"Copy Link\";\n  }\n\n  async function handleSubmit() {\n    setIsLoading(true);\n    try {\n      if (wantsPublic && !isCurrentlyPublic && !shareLink) {\n        const link = await generateShareLink(chatSession.id);\n        if (link) {\n          setShareLink(link);\n          updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Public);\n          await refreshChatSessions();\n          copyAll(link);\n          toast.success(\"Share link copied to clipboard!\");\n        } else {\n          toast.error(\"Failed to generate share link\");\n        }\n      } else if (!wantsPublic && isCurrentlyPublic) {\n        const success = await deleteShareLink(chatSession.id);\n        if (success) {\n          setShareLink(\"\");\n          updateCurrentChatSessionSharedStatus(ChatSessionSharedStatus.Private);\n          await refreshChatSessions();\n          toast.success(\"Chat is now private\");\n          onClose();\n        } else {\n          toast.error(\"Failed to make chat private\");\n        }\n      } else if (wantsPublic && shareLink) {\n        copyAll(shareLink);\n        toast.success(\"Share link copied to clipboard!\");\n      } else {\n        onClose();\n      }\n    } catch (e) {\n      console.error(e);\n      toast.error(\"An error occurred\");\n    } finally {\n      setIsLoading(false);\n    }\n  }\n\n  return (\n    <Modal open onOpenChange={(isOpen) => !isOpen && onClose()}>\n      <Modal.Content width=\"sm\">\n        <Modal.Header\n          icon={SvgShare}\n          title={isShared ? \"Chat shared\" : \"Share this chat\"}\n          description=\"All existing and future messages in this chat will be shared.\"\n          onClose={onClose}\n        />\n        <Modal.Body twoTone>\n          <Section\n            justifyContent=\"start\"\n            alignItems=\"stretch\"\n            height=\"auto\"\n            gap={0.12}\n          >\n            <PrivacyOption\n              icon={SvgLock}\n              title=\"Private\"\n              description=\"Only you have access to this chat.\"\n              selected={selectedPrivacy === \"private\"}\n              onClick={() => setSelectedPrivacy(\"private\")}\n              ariaLabel=\"share-modal-option-private\"\n            />\n            <PrivacyOption\n              icon={SvgUsers}\n              title=\"Your Organization\"\n              description=\"Anyone in your organization can view this chat.\"\n              selected={selectedPrivacy === \"public\"}\n              onClick={() => setSelectedPrivacy(\"public\")}\n              ariaLabel=\"share-modal-option-public\"\n            />\n          </Section>\n\n          {isShared && (\n            <InputTypeIn\n              aria-label=\"share-modal-link-input\"\n              readOnly\n              value={shareLink}\n              rightSection={\n                <CopyIconButton\n                  getCopyText={() => shareLink}\n                  tooltip=\"Copy link\"\n                  size=\"sm\"\n                  aria-label=\"share-modal-copy-link\"\n                />\n              }\n            />\n          )}\n        </Modal.Body>\n        <Modal.Footer>\n          {!isShared && (\n            <Button\n              prominence=\"secondary\"\n              onClick={onClose}\n              aria-label=\"share-modal-cancel\"\n            >\n              Cancel\n            </Button>\n          )}\n          <Button\n            disabled={isLoading}\n            onClick={handleSubmit}\n            icon={isShared ? SvgLink : undefined}\n            width={isShared ? \"full\" : undefined}\n            aria-label=\"share-modal-submit\"\n          >\n            {submitButtonText}\n          </Button>\n        </Modal.Footer>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/AnthropicModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik } from \"formik\";\nimport { LLMProviderFormProps } from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  APIKeyField,\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\n\nconst ANTHROPIC_PROVIDER_NAME = \"anthropic\";\nconst DEFAULT_DEFAULT_MODEL_NAME = \"claude-sonnet-4-5\";\n\nexport default function AnthropicModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const isOnboarding = variant === \"onboarding\";\n  const [isTesting, setIsTesting] = useState(false);\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    ANTHROPIC_PROVIDER_NAME\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues = isOnboarding\n    ? {\n        ...buildOnboardingInitialValues(),\n        name: ANTHROPIC_PROVIDER_NAME,\n        provider: ANTHROPIC_PROVIDER_NAME,\n        api_key: \"\",\n        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,\n      }\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        api_base: existingLlmProvider?.api_base ?? undefined,\n        default_model_name:\n          (defaultModelName &&\n          modelConfigurations.some((m) => m.name === defaultModelName)\n            ? defaultModelName\n            : undefined) ??\n          wellKnownLLMProvider?.recommended_default_model?.name ??\n          DEFAULT_DEFAULT_MODEL_NAME,\n        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];\n\n          await submitOnboardingProvider({\n            providerName: ANTHROPIC_PROVIDER_NAME,\n            payload: {\n              ...values,\n              model_configurations: modelConfigsToUse,\n              is_auto_mode:\n                values.default_model_name === DEFAULT_DEFAULT_MODEL_NAME,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: ANTHROPIC_PROVIDER_NAME,\n            values,\n            initialValues,\n            modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LLMConfigurationModalWrapper\n          providerEndpoint={ANTHROPIC_PROVIDER_NAME}\n          existingProviderName={existingLlmProvider?.name}\n          onClose={onClose}\n          isFormValid={formikProps.isValid}\n          isDirty={formikProps.dirty}\n          isTesting={isTesting}\n          isSubmitting={formikProps.isSubmitting}\n        >\n          <APIKeyField providerName=\"Anthropic\" />\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <DisplayNameField disabled={!!existingLlmProvider} />\n            </>\n          )}\n\n          <FieldSeparator />\n          {isOnboarding ? (\n            <SingleDefaultModelField placeholder=\"E.g. claude-sonnet-4-5\" />\n          ) : (\n            <ModelsField\n              modelConfigurations={modelConfigurations}\n              formikProps={formikProps}\n              recommendedDefaultModel={\n                wellKnownLLMProvider?.recommended_default_model ?? null\n              }\n              shouldShowAutoUpdateToggle={true}\n            />\n          )}\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <ModelsAccessField formikProps={formikProps} />\n            </>\n          )}\n        </LLMConfigurationModalWrapper>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/AzureModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  APIKeyField,\n  DisplayNameField,\n  FieldSeparator,\n  FieldWrapper,\n  ModelsAccessField,\n  ModelsField,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport {\n  isValidAzureTargetUri,\n  parseAzureTargetUri,\n} from \"@/lib/azureTargetUri\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst AZURE_PROVIDER_NAME = \"azure\";\n\ninterface AzureModalValues extends BaseLLMFormValues {\n  api_key: string;\n  target_uri: string;\n  api_base?: string;\n  api_version?: string;\n  deployment_name?: string;\n}\n\nfunction buildTargetUri(existingLlmProvider?: LLMProviderView): string {\n  if (!existingLlmProvider?.api_base || !existingLlmProvider?.api_version) {\n    return \"\";\n  }\n\n  const deploymentName =\n    existingLlmProvider.deployment_name || \"your-deployment\";\n  return `${existingLlmProvider.api_base}/openai/deployments/${deploymentName}/chat/completions?api-version=${existingLlmProvider.api_version}`;\n}\n\nconst processValues = (values: AzureModalValues): AzureModalValues => {\n  let processedValues = { ...values };\n  if (values.target_uri) {\n    try {\n      const { url, apiVersion, deploymentName } = parseAzureTargetUri(\n        values.target_uri\n      );\n      processedValues = {\n        ...processedValues,\n        api_base: url.origin,\n        api_version: apiVersion,\n        deployment_name: deploymentName || processedValues.deployment_name,\n      };\n    } catch {\n      toast.warning(\"Failed to parse target URI — using original values.\");\n    }\n  }\n  return processedValues;\n};\n\nexport default function AzureModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const isOnboarding = variant === \"onboarding\";\n  const [isTesting, setIsTesting] = useState(false);\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(AZURE_PROVIDER_NAME);\n\n  const [addedModels, setAddedModels] = useState<ModelConfiguration[]>([]);\n\n  if (open === false) return null;\n\n  const onClose = () => {\n    setAddedModels([]);\n    onOpenChange?.(false);\n  };\n\n  const baseModelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  // Merge base models with any user-added models (dedup by name)\n  const existingNames = new Set(baseModelConfigurations.map((m) => m.name));\n  const modelConfigurations = [\n    ...baseModelConfigurations,\n    ...addedModels.filter((m) => !existingNames.has(m.name)),\n  ];\n\n  const initialValues: AzureModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: AZURE_PROVIDER_NAME,\n        provider: AZURE_PROVIDER_NAME,\n        api_key: \"\",\n        target_uri: \"\",\n        default_model_name: \"\",\n      } as AzureModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        target_uri: buildTargetUri(existingLlmProvider),\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        target_uri: Yup.string()\n          .required(\"Target URI is required\")\n          .test(\n            \"valid-target-uri\",\n            \"Target URI must be a valid URL with api-version query parameter and either a deployment name in the path or /openai/responses\",\n            (value) => (value ? isValidAzureTargetUri(value) : false)\n          ),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        target_uri: Yup.string()\n          .required(\"Target URI is required\")\n          .test(\n            \"valid-target-uri\",\n            \"Target URI must be a valid URL with api-version query parameter and either a deployment name in the path or /openai/responses\",\n            (value) => (value ? isValidAzureTargetUri(value) : false)\n          ),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        const processedValues = processValues(values);\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];\n\n          await submitOnboardingProvider({\n            providerName: AZURE_PROVIDER_NAME,\n            payload: {\n              ...processedValues,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: AZURE_PROVIDER_NAME,\n            values: processedValues,\n            initialValues,\n            modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LLMConfigurationModalWrapper\n          providerEndpoint={AZURE_PROVIDER_NAME}\n          existingProviderName={existingLlmProvider?.name}\n          onClose={onClose}\n          isFormValid={formikProps.isValid}\n          isDirty={formikProps.dirty}\n          isTesting={isTesting}\n          isSubmitting={formikProps.isSubmitting}\n        >\n          <FieldWrapper>\n            <InputLayouts.Vertical\n              name=\"target_uri\"\n              title=\"Target URI\"\n              subDescription=\"Paste your endpoint target URI from Azure OpenAI (including API endpoint base, deployment name, and API version).\"\n            >\n              <InputTypeInField\n                name=\"target_uri\"\n                placeholder=\"https://your-resource.cognitiveservices.azure.com/openai/deployments/deployment-name/chat/completions?api-version=2025-01-01-preview\"\n              />\n            </InputLayouts.Vertical>\n          </FieldWrapper>\n\n          <APIKeyField providerName=\"Azure\" />\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <DisplayNameField disabled={!!existingLlmProvider} />\n            </>\n          )}\n\n          <FieldSeparator />\n\n          {isOnboarding ? (\n            <SingleDefaultModelField placeholder=\"E.g. gpt-4o\" />\n          ) : (\n            <ModelsField\n              modelConfigurations={modelConfigurations}\n              formikProps={formikProps}\n              recommendedDefaultModel={null}\n              shouldShowAutoUpdateToggle={false}\n              onAddModel={(modelName) => {\n                const newModel: ModelConfiguration = {\n                  name: modelName,\n                  is_visible: true,\n                  max_input_tokens: null,\n                  supports_image_input: false,\n                  supports_reasoning: false,\n                };\n                setAddedModels((prev) => [...prev, newModel]);\n                const currentSelected =\n                  formikProps.values.selected_model_names ?? [];\n                formikProps.setFieldValue(\"selected_model_names\", [\n                  ...currentSelected,\n                  modelName,\n                ]);\n                if (!formikProps.values.default_model_name) {\n                  formikProps.setFieldValue(\"default_model_name\", modelName);\n                }\n              }}\n            />\n          )}\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <ModelsAccessField formikProps={formikProps} />\n            </>\n          )}\n        </LLMConfigurationModalWrapper>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/BedrockModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputSelectField from \"@/refresh-components/form/InputSelectField\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  ModelsField,\n  DisplayNameField,\n  FieldSeparator,\n  FieldWrapper,\n  ModelsAccessField,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { fetchBedrockModels } from \"@/app/admin/configuration/llm/utils\";\nimport { Card } from \"@opal/components\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { SvgAlertCircle } from \"@opal/icons\";\nimport { Content } from \"@opal/layouts\";\nimport { toast } from \"@/hooks/useToast\";\nimport useOnMount from \"@/hooks/useOnMount\";\n\nconst BEDROCK_PROVIDER_NAME = \"bedrock\";\nconst AWS_REGION_OPTIONS = [\n  { name: \"us-east-1\", value: \"us-east-1\" },\n  { name: \"us-east-2\", value: \"us-east-2\" },\n  { name: \"us-west-2\", value: \"us-west-2\" },\n  { name: \"us-gov-east-1\", value: \"us-gov-east-1\" },\n  { name: \"us-gov-west-1\", value: \"us-gov-west-1\" },\n  { name: \"ap-northeast-1\", value: \"ap-northeast-1\" },\n  { name: \"ap-south-1\", value: \"ap-south-1\" },\n  { name: \"ap-southeast-1\", value: \"ap-southeast-1\" },\n  { name: \"ap-southeast-2\", value: \"ap-southeast-2\" },\n  { name: \"ap-east-1\", value: \"ap-east-1\" },\n  { name: \"ca-central-1\", value: \"ca-central-1\" },\n  { name: \"eu-central-1\", value: \"eu-central-1\" },\n  { name: \"eu-west-2\", value: \"eu-west-2\" },\n];\nconst AUTH_METHOD_IAM = \"iam\";\nconst AUTH_METHOD_ACCESS_KEY = \"access_key\";\nconst AUTH_METHOD_LONG_TERM_API_KEY = \"long_term_api_key\";\nconst FIELD_AWS_REGION_NAME = \"custom_config.AWS_REGION_NAME\";\nconst FIELD_BEDROCK_AUTH_METHOD = \"custom_config.BEDROCK_AUTH_METHOD\";\nconst FIELD_AWS_ACCESS_KEY_ID = \"custom_config.AWS_ACCESS_KEY_ID\";\nconst FIELD_AWS_SECRET_ACCESS_KEY = \"custom_config.AWS_SECRET_ACCESS_KEY\";\nconst FIELD_AWS_BEARER_TOKEN_BEDROCK = \"custom_config.AWS_BEARER_TOKEN_BEDROCK\";\n\ninterface BedrockModalValues extends BaseLLMFormValues {\n  custom_config: {\n    AWS_REGION_NAME: string;\n    BEDROCK_AUTH_METHOD?: string;\n    AWS_ACCESS_KEY_ID?: string;\n    AWS_SECRET_ACCESS_KEY?: string;\n    AWS_BEARER_TOKEN_BEDROCK?: string;\n  };\n}\n\ninterface BedrockModalInternalsProps {\n  formikProps: FormikProps<BedrockModalValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  modelConfigurations: ModelConfiguration[];\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction BedrockModalInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  modelConfigurations,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: BedrockModalInternalsProps) {\n  const authMethod = formikProps.values.custom_config?.BEDROCK_AUTH_METHOD;\n\n  useEffect(() => {\n    if (authMethod === AUTH_METHOD_IAM) {\n      formikProps.setFieldValue(FIELD_AWS_ACCESS_KEY_ID, \"\");\n      formikProps.setFieldValue(FIELD_AWS_SECRET_ACCESS_KEY, \"\");\n      formikProps.setFieldValue(FIELD_AWS_BEARER_TOKEN_BEDROCK, \"\");\n    } else if (authMethod === AUTH_METHOD_ACCESS_KEY) {\n      formikProps.setFieldValue(FIELD_AWS_BEARER_TOKEN_BEDROCK, \"\");\n    } else if (authMethod === AUTH_METHOD_LONG_TERM_API_KEY) {\n      formikProps.setFieldValue(FIELD_AWS_ACCESS_KEY_ID, \"\");\n      formikProps.setFieldValue(FIELD_AWS_SECRET_ACCESS_KEY, \"\");\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [authMethod]);\n\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || modelConfigurations;\n\n  const isAuthComplete =\n    authMethod === AUTH_METHOD_IAM ||\n    (authMethod === AUTH_METHOD_ACCESS_KEY &&\n      formikProps.values.custom_config?.AWS_ACCESS_KEY_ID &&\n      formikProps.values.custom_config?.AWS_SECRET_ACCESS_KEY) ||\n    (authMethod === AUTH_METHOD_LONG_TERM_API_KEY &&\n      formikProps.values.custom_config?.AWS_BEARER_TOKEN_BEDROCK);\n\n  const isFetchDisabled =\n    !formikProps.values.custom_config?.AWS_REGION_NAME || !isAuthComplete;\n\n  const handleFetchModels = async () => {\n    const { models, error } = await fetchBedrockModels({\n      aws_region_name: formikProps.values.custom_config?.AWS_REGION_NAME ?? \"\",\n      aws_access_key_id: formikProps.values.custom_config?.AWS_ACCESS_KEY_ID,\n      aws_secret_access_key:\n        formikProps.values.custom_config?.AWS_SECRET_ACCESS_KEY,\n      aws_bearer_token_bedrock:\n        formikProps.values.custom_config?.AWS_BEARER_TOKEN_BEDROCK,\n      provider_name: existingLlmProvider?.name,\n    });\n    if (error) {\n      throw new Error(error);\n    }\n    setFetchedModels(models);\n  };\n\n  // Auto-fetch models on initial load when editing an existing provider\n  useOnMount(() => {\n    if (existingLlmProvider && !isFetchDisabled) {\n      handleFetchModels().catch((err) => {\n        toast.error(\n          err instanceof Error ? err.message : \"Failed to fetch models\"\n        );\n      });\n    }\n  });\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={BEDROCK_PROVIDER_NAME}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <FieldWrapper>\n        <Section gap={1}>\n          <InputLayouts.Vertical\n            name={FIELD_AWS_REGION_NAME}\n            title=\"AWS Region\"\n            subDescription=\"Region where your Amazon Bedrock models are hosted.\"\n          >\n            <InputSelectField name={FIELD_AWS_REGION_NAME}>\n              <InputSelect.Trigger placeholder=\"Select a region\" />\n              <InputSelect.Content>\n                {AWS_REGION_OPTIONS.map((option) => (\n                  <InputSelect.Item key={option.value} value={option.value}>\n                    {option.name}\n                  </InputSelect.Item>\n                ))}\n              </InputSelect.Content>\n            </InputSelectField>\n          </InputLayouts.Vertical>\n\n          <InputLayouts.Vertical\n            name={FIELD_BEDROCK_AUTH_METHOD}\n            title=\"Authentication Method\"\n            subDescription=\"Choose how Onyx should authenticate with Bedrock.\"\n          >\n            <InputSelect\n              value={authMethod || AUTH_METHOD_ACCESS_KEY}\n              onValueChange={(value) =>\n                formikProps.setFieldValue(FIELD_BEDROCK_AUTH_METHOD, value)\n              }\n            >\n              <InputSelect.Trigger defaultValue={AUTH_METHOD_IAM} />\n              <InputSelect.Content>\n                <InputSelect.Item\n                  value={AUTH_METHOD_IAM}\n                  description=\"Recommended for AWS environments\"\n                >\n                  Environment IAM Role\n                </InputSelect.Item>\n                <InputSelect.Item\n                  value={AUTH_METHOD_ACCESS_KEY}\n                  description=\"For non-AWS environments\"\n                >\n                  Access Key\n                </InputSelect.Item>\n                <InputSelect.Item\n                  value={AUTH_METHOD_LONG_TERM_API_KEY}\n                  description=\"For non-AWS environments\"\n                >\n                  Long-term API Key\n                </InputSelect.Item>\n              </InputSelect.Content>\n            </InputSelect>\n          </InputLayouts.Vertical>\n        </Section>\n      </FieldWrapper>\n\n      {authMethod === AUTH_METHOD_ACCESS_KEY && (\n        <Card background=\"light\" border=\"none\" padding=\"sm\">\n          <Section gap={1}>\n            <InputLayouts.Vertical\n              name={FIELD_AWS_ACCESS_KEY_ID}\n              title=\"AWS Access Key ID\"\n            >\n              <InputTypeInField\n                name={FIELD_AWS_ACCESS_KEY_ID}\n                placeholder=\"AKIAIOSFODNN7EXAMPLE\"\n              />\n            </InputLayouts.Vertical>\n            <InputLayouts.Vertical\n              name={FIELD_AWS_SECRET_ACCESS_KEY}\n              title=\"AWS Secret Access Key\"\n            >\n              <PasswordInputTypeInField\n                name={FIELD_AWS_SECRET_ACCESS_KEY}\n                placeholder=\"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\"\n              />\n            </InputLayouts.Vertical>\n          </Section>\n        </Card>\n      )}\n\n      {authMethod === AUTH_METHOD_IAM && (\n        <FieldWrapper>\n          <Card background=\"none\" border=\"solid\" padding=\"sm\">\n            <Content\n              icon={SvgAlertCircle}\n              title=\"Onyx will use the IAM role attached to the environment it’s running in to authenticate.\"\n              variant=\"body\"\n              sizePreset=\"main-ui\"\n            />\n          </Card>\n        </FieldWrapper>\n      )}\n\n      {authMethod === AUTH_METHOD_LONG_TERM_API_KEY && (\n        <Card background=\"light\" border=\"none\" padding=\"sm\">\n          <Section gap={0.5}>\n            <InputLayouts.Vertical\n              name={FIELD_AWS_BEARER_TOKEN_BEDROCK}\n              title=\"Long-term API Key\"\n            >\n              <PasswordInputTypeInField\n                name={FIELD_AWS_BEARER_TOKEN_BEDROCK}\n                placeholder=\"Your long-term API key\"\n              />\n            </InputLayouts.Vertical>\n          </Section>\n        </Card>\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. us.anthropic.claude-sonnet-4-5-v1\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n          onRefetch={isFetchDisabled ? undefined : handleFetchModels}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function BedrockModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    BEDROCK_PROVIDER_NAME\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: BedrockModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: BEDROCK_PROVIDER_NAME,\n        provider: BEDROCK_PROVIDER_NAME,\n        default_model_name: \"\",\n        custom_config: {\n          AWS_REGION_NAME: \"\",\n          BEDROCK_AUTH_METHOD: \"access_key\",\n          AWS_ACCESS_KEY_ID: \"\",\n          AWS_SECRET_ACCESS_KEY: \"\",\n          AWS_BEARER_TOKEN_BEDROCK: \"\",\n        },\n      } as BedrockModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        custom_config: {\n          AWS_REGION_NAME:\n            (existingLlmProvider?.custom_config?.AWS_REGION_NAME as string) ??\n            \"\",\n          BEDROCK_AUTH_METHOD:\n            (existingLlmProvider?.custom_config\n              ?.BEDROCK_AUTH_METHOD as string) ?? \"access_key\",\n          AWS_ACCESS_KEY_ID:\n            (existingLlmProvider?.custom_config?.AWS_ACCESS_KEY_ID as string) ??\n            \"\",\n          AWS_SECRET_ACCESS_KEY:\n            (existingLlmProvider?.custom_config\n              ?.AWS_SECRET_ACCESS_KEY as string) ?? \"\",\n          AWS_BEARER_TOKEN_BEDROCK:\n            (existingLlmProvider?.custom_config\n              ?.AWS_BEARER_TOKEN_BEDROCK as string) ?? \"\",\n        },\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        default_model_name: Yup.string().required(\"Model name is required\"),\n        custom_config: Yup.object({\n          AWS_REGION_NAME: Yup.string().required(\"AWS Region is required\"),\n        }),\n      })\n    : buildDefaultValidationSchema().shape({\n        custom_config: Yup.object({\n          AWS_REGION_NAME: Yup.string().required(\"AWS Region is required\"),\n        }),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        const filteredCustomConfig = Object.fromEntries(\n          Object.entries(values.custom_config || {}).filter(([, v]) => v !== \"\")\n        );\n\n        const submitValues = {\n          ...values,\n          custom_config:\n            Object.keys(filteredCustomConfig).length > 0\n              ? filteredCustomConfig\n              : undefined,\n        };\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: BEDROCK_PROVIDER_NAME,\n            payload: {\n              ...submitValues,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: BEDROCK_PROVIDER_NAME,\n            values: submitValues,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <BedrockModalInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          modelConfigurations={modelConfigurations}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/BifrostModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { markdown } from \"@opal/utils\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderName,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport { fetchBifrostModels } from \"@/app/admin/configuration/llm/utils\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  FieldWrapper,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst BIFROST_PROVIDER_NAME = LLMProviderName.BIFROST;\nconst DEFAULT_API_BASE = \"\";\n\ninterface BifrostModalValues extends BaseLLMFormValues {\n  api_key: string;\n  api_base: string;\n}\n\ninterface BifrostModalInternalsProps {\n  formikProps: FormikProps<BifrostModalValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  modelConfigurations: ModelConfiguration[];\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction BifrostModalInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  modelConfigurations,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: BifrostModalInternalsProps) {\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || modelConfigurations;\n\n  const isFetchDisabled = !formikProps.values.api_base;\n\n  const handleFetchModels = async () => {\n    const { models, error } = await fetchBifrostModels({\n      api_base: formikProps.values.api_base,\n      api_key: formikProps.values.api_key || undefined,\n      provider_name: existingLlmProvider?.name,\n    });\n    if (error) {\n      throw new Error(error);\n    }\n    setFetchedModels(models);\n  };\n\n  // Auto-fetch models on initial load when editing an existing provider\n  useEffect(() => {\n    if (existingLlmProvider && !isFetchDisabled) {\n      handleFetchModels().catch((err) => {\n        console.error(\"Failed to fetch Bifrost models:\", err);\n        toast.error(\n          err instanceof Error ? err.message : \"Failed to fetch models\"\n        );\n      });\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, []);\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={LLMProviderName.BIFROST}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"api_base\"\n          title=\"API Base URL\"\n          subDescription=\"Paste your Bifrost gateway endpoint URL (including API version).\"\n        >\n          <InputTypeInField\n            name=\"api_base\"\n            placeholder=\"https://your-bifrost-gateway.com/v1\"\n          />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"api_key\"\n          title=\"API Key\"\n          suffix=\"optional\"\n          subDescription={markdown(\n            \"Paste your API key from [Bifrost](https://docs.getbifrost.ai/overview) to access your models.\"\n          )}\n        >\n          <PasswordInputTypeInField name=\"api_key\" placeholder=\"API Key\" />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. anthropic/claude-sonnet-4-6\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n          onRefetch={isFetchDisabled ? undefined : handleFetchModels}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function BifrostModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    BIFROST_PROVIDER_NAME\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: BifrostModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: BIFROST_PROVIDER_NAME,\n        provider: BIFROST_PROVIDER_NAME,\n        api_key: \"\",\n        api_base: DEFAULT_API_BASE,\n        default_model_name: \"\",\n      } as BifrostModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: BIFROST_PROVIDER_NAME,\n            payload: {\n              ...values,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: BIFROST_PROVIDER_NAME,\n            values,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <BifrostModalInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          modelConfigurations={modelConfigurations}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/CustomModal.test.tsx",
    "content": "/**\n * Integration Test: Custom LLM Provider Configuration Workflow\n *\n * Tests the complete user journey for configuring a custom LLM provider.\n * This tests the full workflow: open modal → form fill → test config → save → set as default\n */\n\nimport { render, screen, setupUser, waitFor } from \"@tests/setup/test-utils\";\nimport CustomModal from \"@/sections/modals/llmConfig/CustomModal\";\nimport { toast } from \"@/hooks/useToast\";\n\n// Mock SWR's mutate function and useSWR\nconst mockMutate = jest.fn();\njest.mock(\"swr\", () => {\n  const actual = jest.requireActual(\"swr\");\n  return {\n    ...actual,\n    useSWRConfig: () => ({ mutate: mockMutate }),\n    __esModule: true,\n    default: () => ({ data: undefined, error: undefined, isLoading: false }),\n  };\n});\n\n// Mock toast\njest.mock(\"@/hooks/useToast\", () => {\n  const success = jest.fn();\n  const error = jest.fn();\n  const toastFn = Object.assign(jest.fn(), {\n    success,\n    error,\n    info: jest.fn(),\n    warning: jest.fn(),\n    dismiss: jest.fn(),\n    clearAll: jest.fn(),\n    _markLeaving: jest.fn(),\n  });\n  return {\n    toast: toastFn,\n    useToast: () => ({\n      toast: toastFn,\n      dismiss: toastFn.dismiss,\n      clearAll: toastFn.clearAll,\n    }),\n  };\n});\n\n// Mock usePaidEnterpriseFeaturesEnabled\njest.mock(\"@/components/settings/usePaidEnterpriseFeaturesEnabled\", () => ({\n  usePaidEnterpriseFeaturesEnabled: () => false,\n}));\n\ndescribe(\"Custom LLM Provider Configuration Workflow\", () => {\n  let fetchSpy: jest.SpyInstance;\n\n  beforeEach(() => {\n    jest.clearAllMocks();\n    fetchSpy = jest.spyOn(global, \"fetch\");\n  });\n\n  afterEach(() => {\n    fetchSpy.mockRestore();\n  });\n\n  async function fillBasicFields(\n    user: ReturnType<typeof setupUser>,\n    options: {\n      name: string;\n      provider: string;\n      modelName: string;\n    }\n  ) {\n    const nameInput = screen.getByPlaceholderText(\"Display Name\");\n    const providerInput = screen.getByPlaceholderText(\"Provider Name\");\n\n    await user.type(nameInput, options.name);\n    await user.type(providerInput, options.provider);\n\n    // Fill in model name (first model row)\n    const modelNameInput = screen.getByPlaceholderText(\"Model name\");\n    await user.type(modelNameInput, options.modelName);\n  }\n\n  test(\"creates a new custom LLM provider successfully\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider?is_creation=true\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({\n        id: 1,\n        name: \"My Custom Provider\",\n        provider: \"openai\",\n      }),\n    } as Response);\n\n    render(<CustomModal open={true} onOpenChange={() => {}} />);\n\n    await fillBasicFields(user, {\n      name: \"My Custom Provider\",\n      provider: \"openai\",\n      modelName: \"gpt-4\",\n    });\n\n    // Submit the form\n    const submitButton = screen.getByRole(\"button\", { name: /connect/i });\n    await user.click(submitButton);\n\n    // Verify test API was called first\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/test\",\n        expect.objectContaining({\n          method: \"POST\",\n          headers: { \"Content-Type\": \"application/json\" },\n        })\n      );\n    });\n\n    // Verify create API was called\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/provider?is_creation=true\",\n        expect.objectContaining({\n          method: \"PUT\",\n          headers: { \"Content-Type\": \"application/json\" },\n        })\n      );\n    });\n\n    // Verify success toast\n    await waitFor(() => {\n      expect(toast.success).toHaveBeenCalledWith(\n        \"Provider enabled successfully!\"\n      );\n    });\n\n    // Verify SWR cache was invalidated\n    expect(mockMutate).toHaveBeenCalledWith(\"/api/admin/llm/provider\");\n    expect(mockMutate).toHaveBeenCalledWith(\"/api/llm/provider\");\n\n    const personaProvidersMutateCall = mockMutate.mock.calls.find(\n      ([key]) => typeof key === \"function\"\n    );\n    expect(personaProvidersMutateCall).toBeDefined();\n\n    const personaProviderFilter = personaProvidersMutateCall?.[0] as (\n      key: unknown\n    ) => boolean;\n    expect(personaProviderFilter(\"/api/llm/persona/42/providers\")).toBe(true);\n    expect(personaProviderFilter(\"/api/llm/provider\")).toBe(false);\n  });\n\n  test(\"shows error when test configuration fails\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/admin/llm/test (failure)\n    fetchSpy.mockResolvedValueOnce({\n      ok: false,\n      status: 400,\n      json: async () => ({ detail: \"Invalid API key\" }),\n    } as Response);\n\n    render(<CustomModal open={true} onOpenChange={() => {}} />);\n\n    await fillBasicFields(user, {\n      name: \"Bad Provider\",\n      provider: \"openai\",\n      modelName: \"gpt-4\",\n    });\n\n    // Submit the form\n    const submitButton = screen.getByRole(\"button\", { name: /connect/i });\n    await user.click(submitButton);\n\n    // Verify test API was called\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/test\",\n        expect.objectContaining({\n          method: \"POST\",\n        })\n      );\n    });\n\n    // Verify error toast is displayed with the API error message\n    await waitFor(() => {\n      expect(toast.error).toHaveBeenCalledWith(\"Invalid API key\");\n    });\n\n    // Verify create API was NOT called\n    expect(\n      fetchSpy.mock.calls.find((call) =>\n        call[0].includes(\"/api/admin/llm/provider\")\n      )\n    ).toBeUndefined();\n  });\n\n  test(\"updates an existing LLM provider\", async () => {\n    const user = setupUser();\n\n    const existingProvider = {\n      id: 1,\n      name: \"Existing Provider\",\n      provider: \"anthropic\",\n      api_key: \"old-key\",\n      api_base: \"\",\n      api_version: \"\",\n      model_configurations: [\n        {\n          name: \"claude-3-opus\",\n          display_name: \"\",\n          is_visible: true,\n          max_input_tokens: null,\n          supports_image_input: false,\n          supports_reasoning: false,\n        },\n      ],\n      custom_config: {},\n      is_public: true,\n      is_auto_mode: false,\n      groups: [],\n      personas: [],\n      deployment_name: null,\n    };\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider (update, no is_creation param)\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({ ...existingProvider }),\n    } as Response);\n\n    render(\n      <CustomModal\n        existingLlmProvider={existingProvider}\n        open={true}\n        onOpenChange={() => {}}\n      />\n    );\n\n    // Make a change to dirty the form (Update is disabled until dirty)\n    const modelInputs = screen.getAllByPlaceholderText(\"Model name\");\n    await user.type(modelInputs[0]!, \"-updated\");\n\n    // Submit — button says \"Update\" for existing providers\n    const submitButton = screen.getByRole(\"button\", { name: /update/i });\n    await user.click(submitButton);\n\n    // Verify test was called\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/test\",\n        expect.any(Object)\n      );\n    });\n\n    // Verify update API was called (without is_creation param)\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/provider\",\n        expect.objectContaining({\n          method: \"PUT\",\n        })\n      );\n    });\n\n    // Verify success message says \"updated\"\n    await waitFor(() => {\n      expect(toast.success).toHaveBeenCalledWith(\n        \"Provider updated successfully!\"\n      );\n    });\n  });\n\n  test(\"preserves additional models when updating a provider\", async () => {\n    const user = setupUser();\n\n    const existingProvider = {\n      id: 7,\n      name: \"ArcAI\",\n      provider: \"openai\",\n      api_key: \"old-key\",\n      api_base: \"https://example-openai-compatible.local/v1\",\n      api_version: \"\",\n      model_configurations: [\n        {\n          name: \"gpt-oss-20b-bw-failover\",\n          display_name: \"\",\n          is_visible: true,\n          max_input_tokens: null,\n          supports_image_input: false,\n          supports_reasoning: false,\n        },\n      ],\n      custom_config: {},\n      is_public: true,\n      is_auto_mode: false,\n      groups: [],\n      personas: [],\n      deployment_name: null,\n    };\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({\n        ...existingProvider,\n        model_configurations: [\n          ...existingProvider.model_configurations,\n          {\n            name: \"nemotron\",\n            display_name: \"\",\n            is_visible: true,\n            max_input_tokens: null,\n            supports_image_input: false,\n            supports_reasoning: false,\n          },\n        ],\n      }),\n    } as Response);\n\n    render(\n      <CustomModal\n        existingLlmProvider={existingProvider}\n        open={true}\n        onOpenChange={() => {}}\n      />\n    );\n\n    // Add a new model\n    const addModelButton = screen.getByRole(\"button\", { name: /add model/i });\n    await user.click(addModelButton);\n\n    // Fill in second model name\n    const modelInputs = screen.getAllByPlaceholderText(\"Model name\");\n    await user.type(modelInputs[1]!, \"nemotron\");\n\n    const submitButton = screen.getByRole(\"button\", { name: /update/i });\n    await user.click(submitButton);\n\n    await waitFor(() => {\n      expect(fetchSpy).toHaveBeenCalledWith(\n        \"/api/admin/llm/provider\",\n        expect.objectContaining({\n          method: \"PUT\",\n        })\n      );\n    });\n\n    const updateCall = fetchSpy.mock.calls.find(\n      (call) =>\n        call[0] === \"/api/admin/llm/provider\" &&\n        call[1]?.method?.toUpperCase() === \"PUT\"\n    );\n    expect(updateCall).toBeDefined();\n\n    const requestBody = JSON.parse(updateCall![1].body as string);\n    expect(requestBody.model_configurations).toHaveLength(2);\n    expect(requestBody.model_configurations).toEqual(\n      expect.arrayContaining([\n        expect.objectContaining({ name: \"gpt-oss-20b-bw-failover\" }),\n        expect.objectContaining({ name: \"nemotron\" }),\n      ])\n    );\n  });\n\n  test(\"sets provider as default when shouldMarkAsDefault is true\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider?is_creation=true\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({\n        id: 5,\n        name: \"New Default Provider\",\n        provider: \"openai\",\n      }),\n    } as Response);\n\n    // Mock POST /api/admin/llm/default\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    render(\n      <CustomModal\n        shouldMarkAsDefault={true}\n        open={true}\n        onOpenChange={() => {}}\n      />\n    );\n\n    await fillBasicFields(user, {\n      name: \"New Default Provider\",\n      provider: \"openai\",\n      modelName: \"gpt-4\",\n    });\n\n    // Submit\n    const submitButton = screen.getByRole(\"button\", { name: /connect/i });\n    await user.click(submitButton);\n\n    // Verify set as default API was called with correct endpoint and body\n    await waitFor(() => {\n      const defaultCall = fetchSpy.mock.calls.find(\n        ([url]) => url === \"/api/admin/llm/default\"\n      );\n      expect(defaultCall).toBeDefined();\n\n      const [, options] = defaultCall!;\n      expect(options.method).toBe(\"POST\");\n      expect(options.headers).toEqual({ \"Content-Type\": \"application/json\" });\n\n      const body = JSON.parse(options.body);\n      expect(body.provider_id).toBe(5);\n      expect(body).toHaveProperty(\"model_name\");\n    });\n  });\n\n  test(\"shows error when provider creation fails\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider?is_creation=true (failure)\n    fetchSpy.mockResolvedValueOnce({\n      ok: false,\n      status: 500,\n      json: async () => ({ detail: \"Database error\" }),\n    } as Response);\n\n    render(<CustomModal open={true} onOpenChange={() => {}} />);\n\n    await fillBasicFields(user, {\n      name: \"Test Provider\",\n      provider: \"openai\",\n      modelName: \"gpt-4\",\n    });\n\n    // Submit\n    const submitButton = screen.getByRole(\"button\", { name: /connect/i });\n    await user.click(submitButton);\n\n    // Verify error toast\n    await waitFor(() => {\n      expect(toast.error).toHaveBeenCalledWith(\n        \"Failed to enable provider: Database error\"\n      );\n    });\n  });\n\n  test(\"adds custom configuration key-value pairs\", async () => {\n    const user = setupUser();\n\n    // Mock POST /api/admin/llm/test\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    // Mock PUT /api/admin/llm/provider?is_creation=true\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({ id: 1, name: \"Provider with Custom Config\" }),\n    } as Response);\n\n    render(<CustomModal open={true} onOpenChange={() => {}} />);\n\n    // Fill basic fields\n    const nameInput = screen.getByPlaceholderText(\"Display Name\");\n    await user.type(nameInput, \"Cloudflare Provider\");\n\n    const providerInput = screen.getByPlaceholderText(\"Provider Name\");\n    await user.type(providerInput, \"cloudflare\");\n\n    // Click \"Add Line\" button for custom config (aria-label from KeyValueInput)\n    const addLineButton = screen.getByRole(\"button\", {\n      name: /add key and value pair/i,\n    });\n    await user.click(addLineButton);\n\n    // Fill in custom config key-value pair\n    const keyInputs = screen.getAllByPlaceholderText(\"Key\");\n    const valueInputs = screen.getAllByPlaceholderText(\"Value\");\n\n    await user.type(keyInputs[0]!, \"CLOUDFLARE_ACCOUNT_ID\");\n    await user.type(valueInputs[0]!, \"my-account-id-123\");\n\n    // Fill in model name\n    const modelNameInput = screen.getByPlaceholderText(\"Model name\");\n    await user.type(modelNameInput, \"@cf/meta/llama-2-7b-chat-int8\");\n\n    // Submit\n    const submitButton = screen.getByRole(\"button\", { name: /connect/i });\n    await user.click(submitButton);\n\n    // Verify the custom config was included in the request\n    await waitFor(() => {\n      const createCall = fetchSpy.mock.calls.find((call) =>\n        call[0].includes(\"/api/admin/llm/provider\")\n      );\n      expect(createCall).toBeDefined();\n\n      const requestBody = JSON.parse(createCall![1].body);\n      expect(requestBody.custom_config).toEqual({\n        CLOUDFLARE_ACCOUNT_ID: \"my-account-id-123\",\n      });\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/CustomModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport { LLMProviderFormProps, ModelConfiguration } from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport {\n  buildDefaultInitialValues,\n  buildOnboardingInitialValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  DisplayNameField,\n  FieldSeparator,\n  ModelsAccessField,\n  LLMConfigurationModalWrapper,\n  FieldWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport KeyValueInput, {\n  KeyValue,\n} from \"@/refresh-components/inputs/InputKeyValue\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button, Card, EmptyMessageCard } from \"@opal/components\";\nimport { SvgMinusCircle, SvgPlusCircle } from \"@opal/icons\";\nimport { toast } from \"@/hooks/useToast\";\nimport { Content } from \"@opal/layouts\";\nimport { Section } from \"@/layouts/general-layouts\";\n\n// ─── Model Configuration List ─────────────────────────────────────────────────\n\nconst MODEL_GRID_COLS = \"grid-cols-[2fr_2fr_minmax(10rem,1fr)_1fr_2.25rem]\";\n\ntype CustomModelConfiguration = Pick<\n  ModelConfiguration,\n  \"name\" | \"max_input_tokens\" | \"supports_image_input\"\n> & {\n  display_name: string;\n};\n\ninterface ModelConfigurationItemProps {\n  model: CustomModelConfiguration;\n  onChange: (next: CustomModelConfiguration) => void;\n  onRemove: () => void;\n  canRemove: boolean;\n}\n\nfunction ModelConfigurationItem({\n  model,\n  onChange,\n  onRemove,\n  canRemove,\n}: ModelConfigurationItemProps) {\n  return (\n    <>\n      <InputTypeIn\n        placeholder=\"Model name\"\n        value={model.name}\n        onChange={(e) => onChange({ ...model, name: e.target.value })}\n        showClearButton={false}\n      />\n      <InputTypeIn\n        placeholder=\"Display name\"\n        value={model.display_name}\n        onChange={(e) => onChange({ ...model, display_name: e.target.value })}\n        showClearButton={false}\n      />\n      <InputSelect\n        value={model.supports_image_input ? \"text-image\" : \"text-only\"}\n        onValueChange={(value) =>\n          onChange({ ...model, supports_image_input: value === \"text-image\" })\n        }\n      >\n        <InputSelect.Trigger placeholder=\"Input type\" />\n        <InputSelect.Content>\n          <InputSelect.Item value=\"text-only\">Text Only</InputSelect.Item>\n          <InputSelect.Item value=\"text-image\">Text & Image</InputSelect.Item>\n        </InputSelect.Content>\n      </InputSelect>\n      <InputTypeIn\n        placeholder=\"Default\"\n        value={model.max_input_tokens?.toString() ?? \"\"}\n        onChange={(e) =>\n          onChange({\n            ...model,\n            max_input_tokens:\n              e.target.value === \"\" ? null : Number(e.target.value),\n          })\n        }\n        showClearButton={false}\n        type=\"number\"\n      />\n      <Button\n        disabled={!canRemove}\n        prominence=\"tertiary\"\n        icon={SvgMinusCircle}\n        onClick={onRemove}\n      />\n    </>\n  );\n}\n\ninterface ModelConfigurationListProps {\n  formikProps: FormikProps<{\n    model_configurations: CustomModelConfiguration[];\n  }>;\n}\n\nfunction ModelConfigurationList({ formikProps }: ModelConfigurationListProps) {\n  const models = formikProps.values.model_configurations;\n\n  function handleChange(index: number, next: CustomModelConfiguration) {\n    const updated = [...models];\n    updated[index] = next;\n    formikProps.setFieldValue(\"model_configurations\", updated);\n  }\n\n  function handleRemove(index: number) {\n    formikProps.setFieldValue(\n      \"model_configurations\",\n      models.filter((_, i) => i !== index)\n    );\n  }\n\n  function handleAdd() {\n    formikProps.setFieldValue(\"model_configurations\", [\n      ...models,\n      {\n        name: \"\",\n        display_name: \"\",\n        max_input_tokens: null,\n        supports_image_input: false,\n      },\n    ]);\n  }\n\n  return (\n    <div className=\"w-full flex flex-col gap-y-2\">\n      {models.length > 0 ? (\n        <div className={`grid items-center gap-1 ${MODEL_GRID_COLS}`}>\n          <div className=\"pb-1\">\n            <Text mainUiAction>Model Name</Text>\n          </div>\n          <Text mainUiAction>Display Name</Text>\n          <Text mainUiAction>Input Type</Text>\n          <Text mainUiAction>Max Tokens</Text>\n          <div aria-hidden />\n\n          {models.map((model, index) => (\n            <ModelConfigurationItem\n              key={index}\n              model={model}\n              onChange={(next) => handleChange(index, next)}\n              onRemove={() => handleRemove(index)}\n              canRemove={models.length > 1}\n            />\n          ))}\n        </div>\n      ) : (\n        <EmptyMessageCard title=\"No models added yet.\" padding=\"sm\" />\n      )}\n\n      <Button\n        prominence=\"secondary\"\n        icon={SvgPlusCircle}\n        onClick={handleAdd}\n        type=\"button\"\n      >\n        Add Model\n      </Button>\n    </div>\n  );\n}\n\n// ─── Custom Config Processing ─────────────────────────────────────────────────\n\nfunction customConfigProcessing(items: KeyValue[]) {\n  const customConfig: { [key: string]: string } = {};\n  items.forEach(({ key, value }) => {\n    customConfig[key] = value;\n  });\n  return customConfig;\n}\n\nexport default function CustomModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n}: LLMProviderFormProps) {\n  const isOnboarding = variant === \"onboarding\";\n  const [isTesting, setIsTesting] = useState(false);\n  const { mutate } = useSWRConfig();\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const initialValues = {\n    ...buildDefaultInitialValues(\n      existingLlmProvider,\n      undefined,\n      defaultModelName\n    ),\n    ...(isOnboarding ? buildOnboardingInitialValues() : {}),\n    provider: existingLlmProvider?.provider ?? \"\",\n    model_configurations: existingLlmProvider?.model_configurations.map(\n      (mc) => ({\n        name: mc.name,\n        display_name: mc.display_name ?? \"\",\n        max_input_tokens: mc.max_input_tokens ?? null,\n        supports_image_input: mc.supports_image_input,\n      })\n    ) ?? [\n      {\n        name: \"\",\n        display_name: \"\",\n        max_input_tokens: null,\n        supports_image_input: false,\n      },\n    ],\n    custom_config_list: existingLlmProvider?.custom_config\n      ? Object.entries(existingLlmProvider.custom_config).map(\n          ([key, value]) => ({ key, value: String(value) })\n        )\n      : [],\n  };\n\n  const modelConfigurationSchema = Yup.object({\n    name: Yup.string().required(\"Model name is required\"),\n    max_input_tokens: Yup.number()\n      .transform((value, originalValue) =>\n        originalValue === \"\" || originalValue === undefined ? null : value\n      )\n      .nullable()\n      .optional(),\n  });\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        provider: Yup.string().required(\"Provider Name is required\"),\n        model_configurations: Yup.array(modelConfigurationSchema),\n      })\n    : Yup.object().shape({\n        name: Yup.string().required(\"Display Name is required\"),\n        provider: Yup.string().required(\"Provider Name is required\"),\n        model_configurations: Yup.array(modelConfigurationSchema),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        setSubmitting(true);\n\n        const modelConfigurations = values.model_configurations\n          .filter((mc) => mc.name.trim() !== \"\")\n          .map((mc) => ({\n            name: mc.name,\n            display_name: mc.display_name || undefined,\n            is_visible: true,\n            max_input_tokens: mc.max_input_tokens ?? null,\n            supports_image_input: mc.supports_image_input,\n            supports_reasoning: false,\n          }));\n\n        if (modelConfigurations.length === 0) {\n          toast.error(\"At least one model name is required\");\n          setSubmitting(false);\n          return;\n        }\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          await submitOnboardingProvider({\n            providerName: values.provider,\n            payload: {\n              ...values,\n              model_configurations: modelConfigurations,\n              custom_config: customConfigProcessing(values.custom_config_list),\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: true,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          const selectedModelNames = modelConfigurations.map(\n            (config) => config.name\n          );\n\n          await submitLLMProvider({\n            providerName: values.provider,\n            values: {\n              ...values,\n              selected_model_names: selectedModelNames,\n              custom_config: customConfigProcessing(values.custom_config_list),\n            },\n            initialValues: {\n              ...initialValues,\n              custom_config: customConfigProcessing(\n                initialValues.custom_config_list\n              ),\n            },\n            modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LLMConfigurationModalWrapper\n          providerEndpoint=\"custom\"\n          existingProviderName={existingLlmProvider?.name}\n          onClose={onClose}\n          isFormValid={formikProps.isValid}\n          isDirty={formikProps.dirty}\n          isTesting={isTesting}\n          isSubmitting={formikProps.isSubmitting}\n        >\n          {!isOnboarding && (\n            <Section gap={0}>\n              <DisplayNameField disabled={!!existingLlmProvider} />\n\n              <FieldWrapper>\n                <InputLayouts.Vertical\n                  name=\"provider\"\n                  title=\"Provider Name\"\n                  subDescription=\"Should be one of the providers listed at https://docs.litellm.ai/docs/providers.\"\n                >\n                  <InputTypeInField\n                    name=\"provider\"\n                    placeholder=\"Provider Name\"\n                    variant={existingLlmProvider ? \"disabled\" : undefined}\n                  />\n                </InputLayouts.Vertical>\n              </FieldWrapper>\n            </Section>\n          )}\n\n          <FieldSeparator />\n\n          <FieldWrapper>\n            <Section gap={0.75}>\n              <Content\n                title=\"Provider Configs\"\n                description=\"Add properties as needed by the model provider. This is passed to LiteLLM completion() call as arguments in the environment variable. See LiteLLM documentation for more instructions.\"\n                widthVariant=\"full\"\n                variant=\"section\"\n                sizePreset=\"main-content\"\n              />\n\n              <KeyValueInput\n                items={formikProps.values.custom_config_list}\n                onChange={(items) =>\n                  formikProps.setFieldValue(\"custom_config_list\", items)\n                }\n                addButtonLabel=\"Add Line\"\n              />\n            </Section>\n          </FieldWrapper>\n\n          <FieldSeparator />\n\n          <Section gap={0.5}>\n            <FieldWrapper>\n              <Content\n                title=\"Models\"\n                description=\"List LLM models you wish to use and their configurations for this provider. See full list of models at LiteLLM.\"\n                variant=\"section\"\n                sizePreset=\"main-content\"\n                widthVariant=\"full\"\n              />\n            </FieldWrapper>\n\n            <Card padding=\"sm\">\n              <ModelConfigurationList formikProps={formikProps as any} />\n            </Card>\n          </Section>\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <ModelsAccessField formikProps={formikProps} />\n            </>\n          )}\n        </LLMConfigurationModalWrapper>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/LMStudioForm.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderName,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  FieldWrapper,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { fetchModels } from \"@/app/admin/configuration/llm/utils\";\nimport debounce from \"lodash/debounce\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst DEFAULT_API_BASE = \"http://localhost:1234\";\n\ninterface LMStudioFormValues extends BaseLLMFormValues {\n  api_base: string;\n  custom_config: {\n    LM_STUDIO_API_KEY?: string;\n  };\n}\n\ninterface LMStudioFormInternalsProps {\n  formikProps: FormikProps<LMStudioFormValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction LMStudioFormInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: LMStudioFormInternalsProps) {\n  const initialApiKey =\n    (existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY as string) ?? \"\";\n\n  const doFetchModels = useCallback(\n    (apiBase: string, apiKey: string | undefined, signal: AbortSignal) => {\n      fetchModels(\n        LLMProviderName.LM_STUDIO,\n        {\n          api_base: apiBase,\n          custom_config: apiKey ? { LM_STUDIO_API_KEY: apiKey } : {},\n          api_key_changed: apiKey !== initialApiKey,\n          name: existingLlmProvider?.name,\n        },\n        signal\n      ).then((data) => {\n        if (signal.aborted) return;\n        if (data.error) {\n          toast.error(data.error);\n          setFetchedModels([]);\n          return;\n        }\n        setFetchedModels(data.models);\n      });\n    },\n    [existingLlmProvider?.name, initialApiKey, setFetchedModels]\n  );\n\n  const debouncedFetchModels = useMemo(\n    () => debounce(doFetchModels, 500),\n    [doFetchModels]\n  );\n\n  const apiBase = formikProps.values.api_base;\n  const apiKey = formikProps.values.custom_config?.LM_STUDIO_API_KEY;\n\n  useEffect(() => {\n    if (apiBase) {\n      const controller = new AbortController();\n      debouncedFetchModels(apiBase, apiKey, controller.signal);\n      return () => {\n        debouncedFetchModels.cancel();\n        controller.abort();\n      };\n    } else {\n      setFetchedModels([]);\n    }\n  }, [apiBase, apiKey, debouncedFetchModels, setFetchedModels]);\n\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || [];\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={LLMProviderName.LM_STUDIO}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"api_base\"\n          title=\"API Base URL\"\n          subDescription=\"The base URL for your LM Studio server.\"\n        >\n          <InputTypeInField\n            name=\"api_base\"\n            placeholder=\"Your LM Studio API base URL\"\n          />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"custom_config.LM_STUDIO_API_KEY\"\n          title=\"API Key\"\n          subDescription=\"Optional API key if your LM Studio server requires authentication.\"\n          suffix=\"optional\"\n        >\n          <PasswordInputTypeInField\n            name=\"custom_config.LM_STUDIO_API_KEY\"\n            placeholder=\"API Key\"\n          />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. llama3.1\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function LMStudioForm({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    LLMProviderName.LM_STUDIO\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: LMStudioFormValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: LLMProviderName.LM_STUDIO,\n        provider: LLMProviderName.LM_STUDIO,\n        api_base: DEFAULT_API_BASE,\n        default_model_name: \"\",\n        custom_config: {\n          LM_STUDIO_API_KEY: \"\",\n        },\n      } as LMStudioFormValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,\n        custom_config: {\n          LM_STUDIO_API_KEY:\n            (existingLlmProvider?.custom_config?.LM_STUDIO_API_KEY as string) ??\n            \"\",\n        },\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        const filteredCustomConfig = Object.fromEntries(\n          Object.entries(values.custom_config || {}).filter(([, v]) => v !== \"\")\n        );\n\n        const submitValues = {\n          ...values,\n          custom_config:\n            Object.keys(filteredCustomConfig).length > 0\n              ? filteredCustomConfig\n              : undefined,\n        };\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: LLMProviderName.LM_STUDIO,\n            payload: {\n              ...submitValues,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: LLMProviderName.LM_STUDIO,\n            values: submitValues,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LMStudioFormInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/LiteLLMProxyModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderName,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport { fetchLiteLLMProxyModels } from \"@/app/admin/configuration/llm/utils\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  APIKeyField,\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  FieldWrapper,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst DEFAULT_API_BASE = \"http://localhost:4000\";\n\ninterface LiteLLMProxyModalValues extends BaseLLMFormValues {\n  api_key: string;\n  api_base: string;\n}\n\ninterface LiteLLMProxyModalInternalsProps {\n  formikProps: FormikProps<LiteLLMProxyModalValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  modelConfigurations: ModelConfiguration[];\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction LiteLLMProxyModalInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  modelConfigurations,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: LiteLLMProxyModalInternalsProps) {\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || modelConfigurations;\n\n  const isFetchDisabled =\n    !formikProps.values.api_base || !formikProps.values.api_key;\n\n  const handleFetchModels = async () => {\n    const { models, error } = await fetchLiteLLMProxyModels({\n      api_base: formikProps.values.api_base,\n      api_key: formikProps.values.api_key,\n      provider_name: existingLlmProvider?.name,\n    });\n    if (error) {\n      throw new Error(error);\n    }\n    setFetchedModels(models);\n  };\n\n  // Auto-fetch models on initial load when editing an existing provider\n  useEffect(() => {\n    if (existingLlmProvider && !isFetchDisabled) {\n      handleFetchModels().catch((err) => {\n        toast.error(\n          err instanceof Error ? err.message : \"Failed to fetch models\"\n        );\n      });\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, []);\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={LLMProviderName.LITELLM_PROXY}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"api_base\"\n          title=\"API Base URL\"\n          subDescription=\"The base URL for your LiteLLM Proxy server.\"\n        >\n          <InputTypeInField\n            name=\"api_base\"\n            placeholder=\"https://your-litellm-proxy.com\"\n          />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      <APIKeyField providerName=\"LiteLLM Proxy\" />\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. gpt-4o\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n          onRefetch={isFetchDisabled ? undefined : handleFetchModels}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function LiteLLMProxyModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    LLMProviderName.LITELLM_PROXY\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: LiteLLMProxyModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: LLMProviderName.LITELLM_PROXY,\n        provider: LLMProviderName.LITELLM_PROXY,\n        api_key: \"\",\n        api_base: DEFAULT_API_BASE,\n        default_model_name: \"\",\n      } as LiteLLMProxyModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        api_base: Yup.string().required(\"API Base URL is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        api_base: Yup.string().required(\"API Base URL is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: LLMProviderName.LITELLM_PROXY,\n            payload: {\n              ...values,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: LLMProviderName.LITELLM_PROXY,\n            values,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LiteLLMProxyModalInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          modelConfigurations={modelConfigurations}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/OllamaModal.tsx",
    "content": "\"use client\";\n\nimport { useCallback, useEffect, useMemo, useRef, useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { fetchOllamaModels } from \"@/app/admin/configuration/llm/utils\";\nimport debounce from \"lodash/debounce\";\nimport Tabs from \"@/refresh-components/Tabs\";\nimport { Card } from \"@opal/components\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst OLLAMA_PROVIDER_NAME = \"ollama_chat\";\nconst DEFAULT_API_BASE = \"http://127.0.0.1:11434\";\nconst TAB_SELF_HOSTED = \"self-hosted\";\nconst TAB_CLOUD = \"cloud\";\n\ninterface OllamaModalValues extends BaseLLMFormValues {\n  api_base: string;\n  custom_config: {\n    OLLAMA_API_KEY?: string;\n  };\n}\n\ninterface OllamaModalInternalsProps {\n  formikProps: FormikProps<OllamaModalValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction OllamaModalInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: OllamaModalInternalsProps) {\n  const isInitialMount = useRef(true);\n\n  const doFetchModels = useCallback(\n    (apiBase: string, signal: AbortSignal) => {\n      fetchOllamaModels({\n        api_base: apiBase,\n        provider_name: existingLlmProvider?.name,\n        signal,\n      }).then((data) => {\n        if (signal.aborted) return;\n        if (data.error) {\n          toast.error(data.error);\n          setFetchedModels([]);\n          return;\n        }\n        setFetchedModels(data.models);\n      });\n    },\n    [existingLlmProvider?.name, setFetchedModels]\n  );\n\n  const debouncedFetchModels = useMemo(\n    () => debounce(doFetchModels, 500),\n    [doFetchModels]\n  );\n\n  // Skip the initial fetch for new providers — api_base starts with a default\n  // value, which would otherwise trigger a fetch before the user has done\n  // anything. Existing providers should still auto-fetch on mount.\n  useEffect(() => {\n    if (isInitialMount.current) {\n      isInitialMount.current = false;\n      if (!existingLlmProvider) return;\n    }\n\n    if (formikProps.values.api_base) {\n      const controller = new AbortController();\n      debouncedFetchModels(formikProps.values.api_base, controller.signal);\n      return () => {\n        debouncedFetchModels.cancel();\n        controller.abort();\n      };\n    } else {\n      setFetchedModels([]);\n    }\n  }, [\n    formikProps.values.api_base,\n    debouncedFetchModels,\n    setFetchedModels,\n    existingLlmProvider,\n  ]);\n\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || [];\n\n  const hasApiKey = !!formikProps.values.custom_config?.OLLAMA_API_KEY;\n  const defaultTab =\n    existingLlmProvider && hasApiKey ? TAB_CLOUD : TAB_SELF_HOSTED;\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={OLLAMA_PROVIDER_NAME}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <Card background=\"light\" border=\"none\" padding=\"sm\">\n        <Tabs defaultValue={defaultTab}>\n          <Tabs.List>\n            <Tabs.Trigger value={TAB_SELF_HOSTED}>\n              Self-hosted Ollama\n            </Tabs.Trigger>\n            <Tabs.Trigger value={TAB_CLOUD}>Ollama Cloud</Tabs.Trigger>\n          </Tabs.List>\n          <Tabs.Content value={TAB_SELF_HOSTED}>\n            <InputLayouts.Vertical\n              name=\"api_base\"\n              title=\"API Base URL\"\n              subDescription=\"The base URL for your Ollama instance.\"\n            >\n              <InputTypeInField\n                name=\"api_base\"\n                placeholder=\"Your Ollama API base URL\"\n              />\n            </InputLayouts.Vertical>\n          </Tabs.Content>\n\n          <Tabs.Content value={TAB_CLOUD}>\n            <InputLayouts.Vertical\n              name=\"custom_config.OLLAMA_API_KEY\"\n              title=\"API Key\"\n              subDescription=\"Your Ollama Cloud API key.\"\n            >\n              <PasswordInputTypeInField\n                name=\"custom_config.OLLAMA_API_KEY\"\n                placeholder=\"API Key\"\n              />\n            </InputLayouts.Vertical>\n          </Tabs.Content>\n        </Tabs>\n      </Card>\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. llama3.1\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function OllamaModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } =\n    useWellKnownLLMProvider(OLLAMA_PROVIDER_NAME);\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: OllamaModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: OLLAMA_PROVIDER_NAME,\n        provider: OLLAMA_PROVIDER_NAME,\n        api_base: DEFAULT_API_BASE,\n        default_model_name: \"\",\n        custom_config: {\n          OLLAMA_API_KEY: \"\",\n        },\n      } as OllamaModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,\n        custom_config: {\n          OLLAMA_API_KEY:\n            (existingLlmProvider?.custom_config?.OLLAMA_API_KEY as string) ??\n            \"\",\n        },\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_base: Yup.string().required(\"API Base URL is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        const filteredCustomConfig = Object.fromEntries(\n          Object.entries(values.custom_config || {}).filter(([, v]) => v !== \"\")\n        );\n\n        const submitValues = {\n          ...values,\n          custom_config:\n            Object.keys(filteredCustomConfig).length > 0\n              ? filteredCustomConfig\n              : undefined,\n        };\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: OLLAMA_PROVIDER_NAME,\n            payload: {\n              ...submitValues,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: OLLAMA_PROVIDER_NAME,\n            values: submitValues,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <OllamaModalInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/OpenAIModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik } from \"formik\";\nimport { LLMProviderFormProps } from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  APIKeyField,\n  ModelsField,\n  DisplayNameField,\n  FieldSeparator,\n  ModelsAccessField,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\n\nconst OPENAI_PROVIDER_NAME = \"openai\";\nconst DEFAULT_DEFAULT_MODEL_NAME = \"gpt-5.2\";\n\nexport default function OpenAIModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const isOnboarding = variant === \"onboarding\";\n  const [isTesting, setIsTesting] = useState(false);\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } =\n    useWellKnownLLMProvider(OPENAI_PROVIDER_NAME);\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues = isOnboarding\n    ? {\n        ...buildOnboardingInitialValues(),\n        name: OPENAI_PROVIDER_NAME,\n        provider: OPENAI_PROVIDER_NAME,\n        api_key: \"\",\n        default_model_name: DEFAULT_DEFAULT_MODEL_NAME,\n      }\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        default_model_name:\n          (defaultModelName &&\n          modelConfigurations.some((m) => m.name === defaultModelName)\n            ? defaultModelName\n            : undefined) ??\n          wellKnownLLMProvider?.recommended_default_model?.name ??\n          DEFAULT_DEFAULT_MODEL_NAME,\n        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];\n\n          await submitOnboardingProvider({\n            providerName: OPENAI_PROVIDER_NAME,\n            payload: {\n              ...values,\n              model_configurations: modelConfigsToUse,\n              is_auto_mode:\n                values.default_model_name === DEFAULT_DEFAULT_MODEL_NAME,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: OPENAI_PROVIDER_NAME,\n            values,\n            initialValues,\n            modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LLMConfigurationModalWrapper\n          providerEndpoint={OPENAI_PROVIDER_NAME}\n          existingProviderName={existingLlmProvider?.name}\n          onClose={onClose}\n          isFormValid={formikProps.isValid}\n          isDirty={formikProps.dirty}\n          isTesting={isTesting}\n          isSubmitting={formikProps.isSubmitting}\n        >\n          <APIKeyField providerName=\"OpenAI\" />\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <DisplayNameField disabled={!!existingLlmProvider} />\n            </>\n          )}\n\n          <FieldSeparator />\n          {isOnboarding ? (\n            <SingleDefaultModelField placeholder=\"E.g. gpt-5.2\" />\n          ) : (\n            <ModelsField\n              modelConfigurations={modelConfigurations}\n              formikProps={formikProps}\n              recommendedDefaultModel={\n                wellKnownLLMProvider?.recommended_default_model ?? null\n              }\n              shouldShowAutoUpdateToggle={true}\n            />\n          )}\n\n          {!isOnboarding && (\n            <>\n              <FieldSeparator />\n              <ModelsAccessField formikProps={formikProps} />\n            </>\n          )}\n        </LLMConfigurationModalWrapper>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/OpenRouterModal.tsx",
    "content": "\"use client\";\n\nimport { useState, useEffect } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik, FormikProps } from \"formik\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport {\n  LLMProviderFormProps,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport { fetchOpenRouterModels } from \"@/app/admin/configuration/llm/utils\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  APIKeyField,\n  ModelsField,\n  DisplayNameField,\n  ModelsAccessField,\n  FieldSeparator,\n  FieldWrapper,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\nimport { toast } from \"@/hooks/useToast\";\n\nconst OPENROUTER_PROVIDER_NAME = \"openrouter\";\nconst DEFAULT_API_BASE = \"https://openrouter.ai/api/v1\";\ninterface OpenRouterModalValues extends BaseLLMFormValues {\n  api_key: string;\n  api_base: string;\n}\n\ninterface OpenRouterModalInternalsProps {\n  formikProps: FormikProps<OpenRouterModalValues>;\n  existingLlmProvider: LLMProviderView | undefined;\n  fetchedModels: ModelConfiguration[];\n  setFetchedModels: (models: ModelConfiguration[]) => void;\n  modelConfigurations: ModelConfiguration[];\n  isTesting: boolean;\n  onClose: () => void;\n  isOnboarding: boolean;\n}\n\nfunction OpenRouterModalInternals({\n  formikProps,\n  existingLlmProvider,\n  fetchedModels,\n  setFetchedModels,\n  modelConfigurations,\n  isTesting,\n  onClose,\n  isOnboarding,\n}: OpenRouterModalInternalsProps) {\n  const currentModels =\n    fetchedModels.length > 0\n      ? fetchedModels\n      : existingLlmProvider?.model_configurations || modelConfigurations;\n\n  const isFetchDisabled =\n    !formikProps.values.api_base || !formikProps.values.api_key;\n\n  const handleFetchModels = async () => {\n    const { models, error } = await fetchOpenRouterModels({\n      api_base: formikProps.values.api_base,\n      api_key: formikProps.values.api_key,\n      provider_name: existingLlmProvider?.name,\n    });\n    if (error) {\n      throw new Error(error);\n    }\n    setFetchedModels(models);\n  };\n\n  // Auto-fetch models on initial load when editing an existing provider\n  useEffect(() => {\n    if (existingLlmProvider && !isFetchDisabled) {\n      handleFetchModels().catch((err) => {\n        toast.error(\n          err instanceof Error ? err.message : \"Failed to fetch models\"\n        );\n      });\n    }\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, []);\n\n  return (\n    <LLMConfigurationModalWrapper\n      providerEndpoint={OPENROUTER_PROVIDER_NAME}\n      existingProviderName={existingLlmProvider?.name}\n      onClose={onClose}\n      isFormValid={formikProps.isValid}\n      isDirty={formikProps.dirty}\n      isTesting={isTesting}\n      isSubmitting={formikProps.isSubmitting}\n    >\n      <FieldWrapper>\n        <InputLayouts.Vertical\n          name=\"api_base\"\n          title=\"API Base URL\"\n          subDescription=\"Paste your OpenRouter-compatible endpoint URL or use OpenRouter API directly.\"\n        >\n          <InputTypeInField\n            name=\"api_base\"\n            placeholder=\"Your OpenRouter base URL\"\n          />\n        </InputLayouts.Vertical>\n      </FieldWrapper>\n\n      <APIKeyField providerName=\"OpenRouter\" />\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <DisplayNameField disabled={!!existingLlmProvider} />\n        </>\n      )}\n\n      <FieldSeparator />\n\n      {isOnboarding ? (\n        <SingleDefaultModelField placeholder=\"E.g. openai/gpt-4o\" />\n      ) : (\n        <ModelsField\n          modelConfigurations={currentModels}\n          formikProps={formikProps}\n          recommendedDefaultModel={null}\n          shouldShowAutoUpdateToggle={false}\n          onRefetch={isFetchDisabled ? undefined : handleFetchModels}\n        />\n      )}\n\n      {!isOnboarding && (\n        <>\n          <FieldSeparator />\n          <ModelsAccessField formikProps={formikProps} />\n        </>\n      )}\n    </LLMConfigurationModalWrapper>\n  );\n}\n\nexport default function OpenRouterModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const [fetchedModels, setFetchedModels] = useState<ModelConfiguration[]>([]);\n  const [isTesting, setIsTesting] = useState(false);\n  const isOnboarding = variant === \"onboarding\";\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    OPENROUTER_PROVIDER_NAME\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: OpenRouterModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: OPENROUTER_PROVIDER_NAME,\n        provider: OPENROUTER_PROVIDER_NAME,\n        api_key: \"\",\n        api_base: DEFAULT_API_BASE,\n        default_model_name: \"\",\n      } as OpenRouterModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        api_key: existingLlmProvider?.api_key ?? \"\",\n        api_base: existingLlmProvider?.api_base ?? DEFAULT_API_BASE,\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        api_base: Yup.string().required(\"API Base URL is required\"),\n        default_model_name: Yup.string().required(\"Model name is required\"),\n      })\n    : buildDefaultValidationSchema().shape({\n        api_key: Yup.string().required(\"API Key is required\"),\n        api_base: Yup.string().required(\"API Base URL is required\"),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            fetchedModels.length > 0 ? fetchedModels : [];\n\n          await submitOnboardingProvider({\n            providerName: OPENROUTER_PROVIDER_NAME,\n            payload: {\n              ...values,\n              model_configurations: modelConfigsToUse,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: OPENROUTER_PROVIDER_NAME,\n            values,\n            initialValues,\n            modelConfigurations:\n              fetchedModels.length > 0 ? fetchedModels : modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <OpenRouterModalInternals\n          formikProps={formikProps}\n          existingLlmProvider={existingLlmProvider}\n          fetchedModels={fetchedModels}\n          setFetchedModels={setFetchedModels}\n          modelConfigurations={modelConfigurations}\n          isTesting={isTesting}\n          onClose={onClose}\n          isOnboarding={isOnboarding}\n        />\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/VertexAIModal.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { useSWRConfig } from \"swr\";\nimport { Formik } from \"formik\";\nimport { FileUploadFormField } from \"@/components/Field\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport { LLMProviderFormProps } from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { useWellKnownLLMProvider } from \"@/hooks/useLLMProviders\";\nimport {\n  buildDefaultInitialValues,\n  buildDefaultValidationSchema,\n  buildAvailableModelConfigurations,\n  buildOnboardingInitialValues,\n  BaseLLMFormValues,\n} from \"@/sections/modals/llmConfig/utils\";\nimport {\n  submitLLMProvider,\n  submitOnboardingProvider,\n} from \"@/sections/modals/llmConfig/svc\";\nimport {\n  ModelsField,\n  DisplayNameField,\n  FieldSeparator,\n  FieldWrapper,\n  ModelsAccessField,\n  SingleDefaultModelField,\n  LLMConfigurationModalWrapper,\n} from \"@/sections/modals/llmConfig/shared\";\n\nconst VERTEXAI_PROVIDER_NAME = \"vertex_ai\";\nconst VERTEXAI_DISPLAY_NAME = \"Google Cloud Vertex AI\";\nconst VERTEXAI_DEFAULT_MODEL = \"gemini-2.5-pro\";\nconst VERTEXAI_DEFAULT_LOCATION = \"global\";\n\ninterface VertexAIModalValues extends BaseLLMFormValues {\n  custom_config: {\n    vertex_credentials: string;\n    vertex_location: string;\n  };\n}\n\nexport default function VertexAIModal({\n  variant = \"llm-configuration\",\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  open,\n  onOpenChange,\n  defaultModelName,\n  onboardingState,\n  onboardingActions,\n  llmDescriptor,\n}: LLMProviderFormProps) {\n  const isOnboarding = variant === \"onboarding\";\n  const [isTesting, setIsTesting] = useState(false);\n  const { mutate } = useSWRConfig();\n  const { wellKnownLLMProvider } = useWellKnownLLMProvider(\n    VERTEXAI_PROVIDER_NAME\n  );\n\n  if (open === false) return null;\n\n  const onClose = () => onOpenChange?.(false);\n\n  const modelConfigurations = buildAvailableModelConfigurations(\n    existingLlmProvider,\n    wellKnownLLMProvider ?? llmDescriptor\n  );\n\n  const initialValues: VertexAIModalValues = isOnboarding\n    ? ({\n        ...buildOnboardingInitialValues(),\n        name: VERTEXAI_PROVIDER_NAME,\n        provider: VERTEXAI_PROVIDER_NAME,\n        default_model_name: VERTEXAI_DEFAULT_MODEL,\n        custom_config: {\n          vertex_credentials: \"\",\n          vertex_location: VERTEXAI_DEFAULT_LOCATION,\n        },\n      } as VertexAIModalValues)\n    : {\n        ...buildDefaultInitialValues(\n          existingLlmProvider,\n          modelConfigurations,\n          defaultModelName\n        ),\n        default_model_name:\n          (defaultModelName &&\n          modelConfigurations.some((m) => m.name === defaultModelName)\n            ? defaultModelName\n            : undefined) ??\n          wellKnownLLMProvider?.recommended_default_model?.name ??\n          VERTEXAI_DEFAULT_MODEL,\n        is_auto_mode: existingLlmProvider?.is_auto_mode ?? true,\n        custom_config: {\n          vertex_credentials:\n            (existingLlmProvider?.custom_config\n              ?.vertex_credentials as string) ?? \"\",\n          vertex_location:\n            (existingLlmProvider?.custom_config?.vertex_location as string) ??\n            VERTEXAI_DEFAULT_LOCATION,\n        },\n      };\n\n  const validationSchema = isOnboarding\n    ? Yup.object().shape({\n        default_model_name: Yup.string().required(\"Model name is required\"),\n        custom_config: Yup.object({\n          vertex_credentials: Yup.string().required(\n            \"Credentials file is required\"\n          ),\n          vertex_location: Yup.string(),\n        }),\n      })\n    : buildDefaultValidationSchema().shape({\n        custom_config: Yup.object({\n          vertex_credentials: Yup.string().required(\n            \"Credentials file is required\"\n          ),\n          vertex_location: Yup.string(),\n        }),\n      });\n\n  return (\n    <Formik\n      initialValues={initialValues}\n      validationSchema={validationSchema}\n      validateOnMount={true}\n      onSubmit={async (values, { setSubmitting }) => {\n        const filteredCustomConfig = Object.fromEntries(\n          Object.entries(values.custom_config || {}).filter(\n            ([key, v]) => key === \"vertex_credentials\" || v !== \"\"\n          )\n        );\n\n        const submitValues = {\n          ...values,\n          custom_config:\n            Object.keys(filteredCustomConfig).length > 0\n              ? filteredCustomConfig\n              : undefined,\n        };\n\n        if (isOnboarding && onboardingState && onboardingActions) {\n          const modelConfigsToUse =\n            (wellKnownLLMProvider ?? llmDescriptor)?.known_models ?? [];\n\n          await submitOnboardingProvider({\n            providerName: VERTEXAI_PROVIDER_NAME,\n            payload: {\n              ...submitValues,\n              model_configurations: modelConfigsToUse,\n              is_auto_mode:\n                values.default_model_name === VERTEXAI_DEFAULT_MODEL,\n            },\n            onboardingState,\n            onboardingActions,\n            isCustomProvider: false,\n            onClose,\n            setIsSubmitting: setSubmitting,\n          });\n        } else {\n          await submitLLMProvider({\n            providerName: VERTEXAI_PROVIDER_NAME,\n            values: submitValues,\n            initialValues,\n            modelConfigurations,\n            existingLlmProvider,\n            shouldMarkAsDefault,\n            setIsTesting,\n            mutate,\n            onClose,\n            setSubmitting,\n          });\n        }\n      }}\n    >\n      {(formikProps) => (\n        <LLMConfigurationModalWrapper\n          providerEndpoint={VERTEXAI_PROVIDER_NAME}\n          providerName={VERTEXAI_DISPLAY_NAME}\n          existingProviderName={existingLlmProvider?.name}\n          onClose={onClose}\n          isFormValid={formikProps.isValid}\n          isDirty={formikProps.dirty}\n          isTesting={isTesting}\n          isSubmitting={formikProps.isSubmitting}\n        >\n          <FieldWrapper>\n            <InputLayouts.Vertical\n              name=\"custom_config.vertex_location\"\n              title=\"Google Cloud Region Name\"\n              subDescription=\"Region where your Google Vertex AI models are hosted. See full list of regions supported at Google Cloud.\"\n            >\n              <InputTypeInField\n                name=\"custom_config.vertex_location\"\n                placeholder={VERTEXAI_DEFAULT_LOCATION}\n              />\n            </InputLayouts.Vertical>\n          </FieldWrapper>\n\n          <FieldWrapper>\n            <InputLayouts.Vertical\n              name=\"custom_config.vertex_credentials\"\n              title=\"API Key\"\n              subDescription=\"Attach your API key JSON from Google Cloud to access your models.\"\n            >\n              <FileUploadFormField\n                name=\"custom_config.vertex_credentials\"\n                label=\"\"\n              />\n            </InputLayouts.Vertical>\n          </FieldWrapper>\n\n          <FieldSeparator />\n\n          {!isOnboarding && (\n            <DisplayNameField disabled={!!existingLlmProvider} />\n          )}\n\n          <FieldSeparator />\n\n          {isOnboarding ? (\n            <SingleDefaultModelField placeholder=\"E.g. gemini-2.5-pro\" />\n          ) : (\n            <ModelsField\n              modelConfigurations={modelConfigurations}\n              formikProps={formikProps}\n              recommendedDefaultModel={\n                wellKnownLLMProvider?.recommended_default_model ?? null\n              }\n              shouldShowAutoUpdateToggle={true}\n            />\n          )}\n\n          {!isOnboarding && <ModelsAccessField formikProps={formikProps} />}\n        </LLMConfigurationModalWrapper>\n      )}\n    </Formik>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/getModal.tsx",
    "content": "import { LLMProviderName, LLMProviderView } from \"@/interfaces/llm\";\nimport AnthropicModal from \"@/sections/modals/llmConfig/AnthropicModal\";\nimport OpenAIModal from \"@/sections/modals/llmConfig/OpenAIModal\";\nimport OllamaModal from \"@/sections/modals/llmConfig/OllamaModal\";\nimport AzureModal from \"@/sections/modals/llmConfig/AzureModal\";\nimport VertexAIModal from \"@/sections/modals/llmConfig/VertexAIModal\";\nimport OpenRouterModal from \"@/sections/modals/llmConfig/OpenRouterModal\";\nimport CustomModal from \"@/sections/modals/llmConfig/CustomModal\";\nimport BedrockModal from \"@/sections/modals/llmConfig/BedrockModal\";\nimport LMStudioForm from \"@/sections/modals/llmConfig/LMStudioForm\";\nimport LiteLLMProxyModal from \"@/sections/modals/llmConfig/LiteLLMProxyModal\";\nimport BifrostModal from \"@/sections/modals/llmConfig/BifrostModal\";\n\nfunction detectIfRealOpenAIProvider(provider: LLMProviderView) {\n  return (\n    provider.provider === LLMProviderName.OPENAI &&\n    provider.api_key &&\n    !provider.api_base &&\n    Object.keys(provider.custom_config || {}).length === 0\n  );\n}\n\nexport function getModalForExistingProvider(\n  provider: LLMProviderView,\n  open?: boolean,\n  onOpenChange?: (open: boolean) => void,\n  defaultModelName?: string\n) {\n  const props = {\n    existingLlmProvider: provider,\n    open,\n    onOpenChange,\n    defaultModelName,\n  };\n\n  switch (provider.provider) {\n    case LLMProviderName.OPENAI:\n      // \"openai\" as a provider name can be used for litellm proxy / any OpenAI-compatible provider\n      if (detectIfRealOpenAIProvider(provider)) {\n        return <OpenAIModal {...props} />;\n      } else {\n        return <CustomModal {...props} />;\n      }\n    case LLMProviderName.ANTHROPIC:\n      return <AnthropicModal {...props} />;\n    case LLMProviderName.OLLAMA_CHAT:\n      return <OllamaModal {...props} />;\n    case LLMProviderName.AZURE:\n      return <AzureModal {...props} />;\n    case LLMProviderName.VERTEX_AI:\n      return <VertexAIModal {...props} />;\n    case LLMProviderName.BEDROCK:\n      return <BedrockModal {...props} />;\n    case LLMProviderName.OPENROUTER:\n      return <OpenRouterModal {...props} />;\n    case LLMProviderName.LM_STUDIO:\n      return <LMStudioForm {...props} />;\n    case LLMProviderName.LITELLM_PROXY:\n      return <LiteLLMProxyModal {...props} />;\n    case LLMProviderName.BIFROST:\n      return <BifrostModal {...props} />;\n    default:\n      return <CustomModal {...props} />;\n  }\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/shared.tsx",
    "content": "\"use client\";\n\nimport { ReactNode, useState } from \"react\";\nimport { Form, FormikProps } from \"formik\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { useAgents } from \"@/hooks/useAgents\";\nimport { useUserGroups } from \"@/lib/hooks\";\nimport { ModelConfiguration, SimpleKnownModel } from \"@/interfaces/llm\";\nimport * as InputLayouts from \"@/layouts/input-layouts\";\nimport Checkbox from \"@/refresh-components/inputs/Checkbox\";\nimport InputTypeInField from \"@/refresh-components/form/InputTypeInField\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport InputComboBox from \"@/refresh-components/inputs/InputComboBox\";\nimport InputSelect from \"@/refresh-components/inputs/InputSelect\";\nimport PasswordInputTypeInField from \"@/refresh-components/form/PasswordInputTypeInField\";\nimport Switch from \"@/refresh-components/inputs/Switch\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button, LineItemButton, Tag } from \"@opal/components\";\nimport { BaseLLMFormValues } from \"@/sections/modals/llmConfig/utils\";\nimport { WithoutStyles } from \"@opal/types\";\nimport Separator from \"@/refresh-components/Separator\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { Hoverable } from \"@opal/core\";\nimport { Content } from \"@opal/layouts\";\nimport {\n  SvgArrowExchange,\n  SvgOnyxOctagon,\n  SvgOrganization,\n  SvgPlusCircle,\n  SvgRefreshCw,\n  SvgSparkle,\n  SvgUserManage,\n  SvgUsers,\n  SvgX,\n} from \"@opal/icons\";\nimport SvgOnyxLogo from \"@opal/icons/onyx-logo\";\nimport { Card, EmptyMessageCard } from \"@opal/components\";\nimport { ContentAction } from \"@opal/layouts\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport useUsers from \"@/hooks/useUsers\";\nimport { toast } from \"@/hooks/useToast\";\nimport { UserRole } from \"@/lib/types\";\nimport Modal from \"@/refresh-components/Modal\";\nimport {\n  getProviderIcon,\n  getProviderDisplayName,\n  getProviderProductName,\n} from \"@/lib/llmConfig/providers\";\n\nexport function FieldSeparator() {\n  return <Separator noPadding className=\"px-2\" />;\n}\n\nexport type FieldWrapperProps = WithoutStyles<\n  React.HTMLAttributes<HTMLDivElement>\n>;\n\nexport function FieldWrapper(props: FieldWrapperProps) {\n  return <div {...props} className=\"p-2 w-full\" />;\n}\n\n// ─── DisplayNameField ────────────────────────────────────────────────────────\n\nexport interface DisplayNameFieldProps {\n  disabled?: boolean;\n}\n\nexport function DisplayNameField({ disabled = false }: DisplayNameFieldProps) {\n  return (\n    <FieldWrapper>\n      <InputLayouts.Vertical\n        name=\"name\"\n        title=\"Display Name\"\n        subDescription=\"Used to identify this provider in the app.\"\n      >\n        <InputTypeInField\n          name=\"name\"\n          placeholder=\"Display Name\"\n          variant={disabled ? \"disabled\" : undefined}\n        />\n      </InputLayouts.Vertical>\n    </FieldWrapper>\n  );\n}\n\n// ─── APIKeyField ─────────────────────────────────────────────────────────────\n\nexport interface APIKeyFieldProps {\n  optional?: boolean;\n  providerName?: string;\n}\n\nexport function APIKeyField({\n  optional = false,\n  providerName,\n}: APIKeyFieldProps) {\n  return (\n    <FieldWrapper>\n      <InputLayouts.Vertical\n        name=\"api_key\"\n        title=\"API Key\"\n        subDescription={\n          providerName\n            ? `Paste your API key from ${providerName} to access your models.`\n            : \"Paste your API key to access your models.\"\n        }\n        suffix={optional ? \"optional\" : undefined}\n      >\n        <PasswordInputTypeInField name=\"api_key\" placeholder=\"API Key\" />\n      </InputLayouts.Vertical>\n    </FieldWrapper>\n  );\n}\n\n// ─── SingleDefaultModelField ─────────────────────────────────────────────────\n\nexport interface SingleDefaultModelFieldProps {\n  placeholder?: string;\n}\n\nexport function SingleDefaultModelField({\n  placeholder = \"E.g. gpt-4o\",\n}: SingleDefaultModelFieldProps) {\n  return (\n    <InputLayouts.Vertical\n      name=\"default_model_name\"\n      title=\"Default Model\"\n      description=\"The model to use by default for this provider unless otherwise specified.\"\n    >\n      <InputTypeInField name=\"default_model_name\" placeholder={placeholder} />\n    </InputLayouts.Vertical>\n  );\n}\n\n// ─── ModelsAccessField ──────────────────────────────────────────────────────\n\n/** Prefix used to distinguish group IDs from agent IDs in the combobox. */\nconst GROUP_PREFIX = \"group:\";\nconst AGENT_PREFIX = \"agent:\";\n\ninterface ModelsAccessFieldProps<T> {\n  formikProps: FormikProps<T>;\n}\n\nexport function ModelsAccessField<T extends BaseLLMFormValues>({\n  formikProps,\n}: ModelsAccessFieldProps<T>) {\n  const { agents } = useAgents();\n  const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups();\n  const { data: usersData } = useUsers({ includeApiKeys: false });\n  const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled();\n\n  const adminCount =\n    usersData?.accepted.filter((u) => u.role === UserRole.ADMIN).length ?? 0;\n\n  const isPublic = formikProps.values.is_public;\n  const selectedGroupIds = formikProps.values.groups ?? [];\n  const selectedAgentIds = formikProps.values.personas ?? [];\n\n  // Build a flat list of combobox options from groups + agents\n  const groupOptions =\n    isPaidEnterpriseFeaturesEnabled && !userGroupsIsLoading && userGroups\n      ? userGroups.map((g) => ({\n          value: `${GROUP_PREFIX}${g.id}`,\n          label: g.name,\n          description: \"Group\",\n        }))\n      : [];\n\n  const agentOptions = agents.map((a) => ({\n    value: `${AGENT_PREFIX}${a.id}`,\n    label: a.name,\n    description: \"Agent\",\n  }));\n\n  // Exclude already-selected items from the dropdown\n  const selectedKeys = new Set([\n    ...selectedGroupIds.map((id) => `${GROUP_PREFIX}${id}`),\n    ...selectedAgentIds.map((id) => `${AGENT_PREFIX}${id}`),\n  ]);\n\n  const availableOptions = [...groupOptions, ...agentOptions].filter(\n    (opt) => !selectedKeys.has(opt.value)\n  );\n\n  // Resolve selected IDs back to full objects for display\n  const groupById = new Map((userGroups ?? []).map((g) => [g.id, g]));\n  const agentMap = new Map(agents.map((a) => [a.id, a]));\n\n  function handleAccessChange(value: string) {\n    if (value === \"public\") {\n      formikProps.setFieldValue(\"is_public\", true);\n      formikProps.setFieldValue(\"groups\", []);\n      formikProps.setFieldValue(\"personas\", []);\n    } else {\n      formikProps.setFieldValue(\"is_public\", false);\n    }\n  }\n\n  function handleSelect(compositeValue: string) {\n    if (compositeValue.startsWith(GROUP_PREFIX)) {\n      const id = Number(compositeValue.slice(GROUP_PREFIX.length));\n      if (!selectedGroupIds.includes(id)) {\n        formikProps.setFieldValue(\"groups\", [...selectedGroupIds, id]);\n      }\n    } else if (compositeValue.startsWith(AGENT_PREFIX)) {\n      const id = Number(compositeValue.slice(AGENT_PREFIX.length));\n      if (!selectedAgentIds.includes(id)) {\n        formikProps.setFieldValue(\"personas\", [...selectedAgentIds, id]);\n      }\n    }\n  }\n\n  function handleRemoveGroup(id: number) {\n    formikProps.setFieldValue(\n      \"groups\",\n      selectedGroupIds.filter((gid) => gid !== id)\n    );\n  }\n\n  function handleRemoveAgent(id: number) {\n    formikProps.setFieldValue(\n      \"personas\",\n      selectedAgentIds.filter((aid) => aid !== id)\n    );\n  }\n\n  return (\n    <div className=\"flex flex-col w-full\">\n      <FieldWrapper>\n        <InputLayouts.Horizontal\n          name=\"is_public\"\n          title=\"Models Access\"\n          description=\"Who can access this provider.\"\n        >\n          <InputSelect\n            value={isPublic ? \"public\" : \"private\"}\n            onValueChange={handleAccessChange}\n          >\n            <InputSelect.Trigger placeholder=\"Select access level\" />\n            <InputSelect.Content>\n              <InputSelect.Item value=\"public\" icon={SvgOrganization}>\n                All Users & Agents\n              </InputSelect.Item>\n              <InputSelect.Item value=\"private\" icon={SvgUsers}>\n                Named Groups & Agents\n              </InputSelect.Item>\n            </InputSelect.Content>\n          </InputSelect>\n        </InputLayouts.Horizontal>\n      </FieldWrapper>\n\n      {!isPublic && (\n        <Card background=\"light\" border=\"none\" padding=\"sm\">\n          <Section gap={0.5}>\n            <InputComboBox\n              placeholder=\"Add groups and agents\"\n              value=\"\"\n              onChange={() => {}}\n              onValueChange={handleSelect}\n              options={availableOptions}\n              strict\n              leftSearchIcon\n            />\n\n            <Card background=\"heavy\" border=\"none\" padding=\"sm\">\n              <ContentAction\n                icon={SvgUserManage}\n                title=\"Admin\"\n                description={`${adminCount} ${\n                  adminCount === 1 ? \"member\" : \"members\"\n                }`}\n                sizePreset=\"main-ui\"\n                variant=\"section\"\n                rightChildren={\n                  <Text secondaryBody text03>\n                    Always shared\n                  </Text>\n                }\n                paddingVariant=\"fit\"\n              />\n            </Card>\n            {selectedGroupIds.length > 0 && (\n              <div className=\"grid grid-cols-2 gap-1 w-full\">\n                {selectedGroupIds.map((id) => {\n                  const group = groupById.get(id);\n                  const memberCount = group?.users.length ?? 0;\n                  return (\n                    <div key={`group-${id}`} className=\"min-w-0\">\n                      <Card background=\"heavy\" border=\"none\" padding=\"sm\">\n                        <ContentAction\n                          icon={SvgUsers}\n                          title={group?.name ?? `Group ${id}`}\n                          description={`${memberCount} ${\n                            memberCount === 1 ? \"member\" : \"members\"\n                          }`}\n                          sizePreset=\"main-ui\"\n                          variant=\"section\"\n                          rightChildren={\n                            <Button\n                              size=\"sm\"\n                              prominence=\"internal\"\n                              icon={SvgX}\n                              onClick={() => handleRemoveGroup(id)}\n                              type=\"button\"\n                            />\n                          }\n                          paddingVariant=\"fit\"\n                        />\n                      </Card>\n                    </div>\n                  );\n                })}\n              </div>\n            )}\n\n            <FieldSeparator />\n\n            {selectedAgentIds.length > 0 ? (\n              <div className=\"grid grid-cols-2 gap-1 w-full\">\n                {selectedAgentIds.map((id) => {\n                  const agent = agentMap.get(id);\n                  return (\n                    <div key={`agent-${id}`} className=\"min-w-0\">\n                      <Card background=\"heavy\" border=\"none\" padding=\"sm\">\n                        <ContentAction\n                          icon={\n                            agent\n                              ? () => <AgentAvatar agent={agent} size={20} />\n                              : SvgSparkle\n                          }\n                          title={agent?.name ?? `Agent ${id}`}\n                          description=\"Agent\"\n                          sizePreset=\"main-ui\"\n                          variant=\"section\"\n                          rightChildren={\n                            <Button\n                              size=\"sm\"\n                              prominence=\"internal\"\n                              icon={SvgX}\n                              onClick={() => handleRemoveAgent(id)}\n                              type=\"button\"\n                            />\n                          }\n                          paddingVariant=\"fit\"\n                        />\n                      </Card>\n                    </div>\n                  );\n                })}\n              </div>\n            ) : (\n              <div className=\"w-full p-2\">\n                <Content\n                  icon={SvgOnyxOctagon}\n                  title=\"No agents added\"\n                  description=\"This provider will not be used by any agents.\"\n                  variant=\"section\"\n                  sizePreset=\"main-ui\"\n                />\n              </div>\n            )}\n          </Section>\n        </Card>\n      )}\n    </div>\n  );\n}\n\n// ─── ModelsField ─────────────────────────────────────────────────────\n\nexport interface ModelsFieldProps<T> {\n  formikProps: FormikProps<T>;\n  modelConfigurations: ModelConfiguration[];\n  recommendedDefaultModel: SimpleKnownModel | null;\n  shouldShowAutoUpdateToggle: boolean;\n  /** Called when the user clicks the refresh button to re-fetch models. */\n  onRefetch?: () => Promise<void> | void;\n  /** Called when the user adds a custom model by name. Enables the \"Add Model\" input. */\n  onAddModel?: (modelName: string) => void;\n}\n\nexport function ModelsField<T extends BaseLLMFormValues>({\n  formikProps,\n  modelConfigurations,\n  recommendedDefaultModel,\n  shouldShowAutoUpdateToggle,\n  onRefetch,\n  onAddModel,\n}: ModelsFieldProps<T>) {\n  const [newModelName, setNewModelName] = useState(\"\");\n  const isAutoMode = formikProps.values.is_auto_mode;\n  const selectedModels = formikProps.values.selected_model_names ?? [];\n  const defaultModel = formikProps.values.default_model_name;\n\n  function handleCheckboxChange(modelName: string, checked: boolean) {\n    // Read current values inside the handler to avoid stale closure issues\n    const currentSelected = formikProps.values.selected_model_names ?? [];\n    const currentDefault = formikProps.values.default_model_name;\n\n    if (checked) {\n      const newSelected = [...currentSelected, modelName];\n      formikProps.setFieldValue(\"selected_model_names\", newSelected);\n      // If this is the first model, set it as default\n      if (currentSelected.length === 0) {\n        formikProps.setFieldValue(\"default_model_name\", modelName);\n      }\n    } else {\n      const newSelected = currentSelected.filter((name) => name !== modelName);\n      formikProps.setFieldValue(\"selected_model_names\", newSelected);\n      // If removing the default, set the first remaining model as default\n      if (currentDefault === modelName && newSelected.length > 0) {\n        formikProps.setFieldValue(\"default_model_name\", newSelected[0]);\n      } else if (newSelected.length === 0) {\n        formikProps.setFieldValue(\"default_model_name\", undefined);\n      }\n    }\n  }\n\n  function handleSetDefault(modelName: string) {\n    formikProps.setFieldValue(\"default_model_name\", modelName);\n  }\n\n  function handleToggleAutoMode(nextIsAutoMode: boolean) {\n    formikProps.setFieldValue(\"is_auto_mode\", nextIsAutoMode);\n    formikProps.setFieldValue(\n      \"selected_model_names\",\n      modelConfigurations.filter((m) => m.is_visible).map((m) => m.name)\n    );\n    formikProps.setFieldValue(\n      \"default_model_name\",\n      recommendedDefaultModel?.name ?? undefined\n    );\n  }\n\n  const allSelected =\n    modelConfigurations.length > 0 &&\n    modelConfigurations.every((m) => selectedModels.includes(m.name));\n\n  function handleToggleSelectAll() {\n    if (allSelected) {\n      formikProps.setFieldValue(\"selected_model_names\", []);\n      formikProps.setFieldValue(\"default_model_name\", undefined);\n    } else {\n      const allNames = modelConfigurations.map((m) => m.name);\n      formikProps.setFieldValue(\"selected_model_names\", allNames);\n      if (!formikProps.values.default_model_name && allNames.length > 0) {\n        formikProps.setFieldValue(\"default_model_name\", allNames[0]);\n      }\n    }\n  }\n\n  const visibleModels = modelConfigurations.filter((m) => m.is_visible);\n\n  return (\n    <Card background=\"light\" border=\"none\" padding=\"sm\">\n      <Section gap={0.5}>\n        <InputLayouts.Horizontal\n          title=\"Models\"\n          description=\"Select models to make available for this provider.\"\n          nonInteractive\n          center\n        >\n          <Section flexDirection=\"row\" gap={0}>\n            <Button\n              disabled={isAutoMode || modelConfigurations.length === 0}\n              prominence=\"tertiary\"\n              size=\"md\"\n              onClick={handleToggleSelectAll}\n            >\n              {allSelected ? \"Unselect All\" : \"Select All\"}\n            </Button>\n            {onRefetch && (\n              <Button\n                prominence=\"tertiary\"\n                icon={SvgRefreshCw}\n                onClick={async () => {\n                  try {\n                    await onRefetch();\n                  } catch (err) {\n                    toast.error(\n                      err instanceof Error\n                        ? err.message\n                        : \"Failed to fetch models\"\n                    );\n                  }\n                }}\n              />\n            )}\n          </Section>\n        </InputLayouts.Horizontal>\n\n        {modelConfigurations.length === 0 ? (\n          <EmptyMessageCard title=\"No models available.\" padding=\"sm\" />\n        ) : (\n          <Section gap={0.25}>\n            {isAutoMode\n              ? // Auto mode: read-only display\n                visibleModels.map((model) => (\n                  <Hoverable.Root\n                    key={model.name}\n                    group=\"LLMConfigurationButton\"\n                    widthVariant=\"full\"\n                  >\n                    <LineItemButton\n                      variant=\"section\"\n                      sizePreset=\"main-ui\"\n                      selectVariant=\"select-heavy\"\n                      state=\"selected\"\n                      icon={() => <Checkbox checked />}\n                      title={model.display_name || model.name}\n                      rightChildren={\n                        model.name === defaultModel ? (\n                          <Section>\n                            <Tag title=\"Default Model\" color=\"blue\" />\n                          </Section>\n                        ) : undefined\n                      }\n                    />\n                  </Hoverable.Root>\n                ))\n              : // Manual mode: checkbox selection\n                modelConfigurations.map((modelConfiguration) => {\n                  const isSelected = selectedModels.includes(\n                    modelConfiguration.name\n                  );\n                  const isDefault = defaultModel === modelConfiguration.name;\n\n                  return (\n                    <Hoverable.Root\n                      key={modelConfiguration.name}\n                      group=\"LLMConfigurationButton\"\n                      widthVariant=\"full\"\n                    >\n                      <LineItemButton\n                        variant=\"section\"\n                        sizePreset=\"main-ui\"\n                        selectVariant=\"select-heavy\"\n                        state={isSelected ? \"selected\" : \"empty\"}\n                        icon={() => <Checkbox checked={isSelected} />}\n                        title={modelConfiguration.name}\n                        onClick={() =>\n                          handleCheckboxChange(\n                            modelConfiguration.name,\n                            !isSelected\n                          )\n                        }\n                        rightChildren={\n                          isSelected ? (\n                            isDefault ? (\n                              <Section>\n                                <Tag color=\"blue\" title=\"Default Model\" />\n                              </Section>\n                            ) : (\n                              <Hoverable.Item\n                                group=\"LLMConfigurationButton\"\n                                variant=\"opacity-on-hover\"\n                              >\n                                <Button\n                                  size=\"sm\"\n                                  prominence=\"internal\"\n                                  onClick={(e) => {\n                                    e.stopPropagation();\n                                    handleSetDefault(modelConfiguration.name);\n                                  }}\n                                  type=\"button\"\n                                >\n                                  Set as default\n                                </Button>\n                              </Hoverable.Item>\n                            )\n                          ) : undefined\n                        }\n                      />\n                    </Hoverable.Root>\n                  );\n                })}\n          </Section>\n        )}\n\n        {onAddModel && !isAutoMode && (\n          <Section flexDirection=\"row\" gap={0.5}>\n            <div className=\"flex-1\">\n              <InputTypeIn\n                placeholder=\"Enter model name\"\n                value={newModelName}\n                onChange={(e) => setNewModelName(e.target.value)}\n                onKeyDown={(e) => {\n                  if (e.key === \"Enter\" && newModelName.trim()) {\n                    e.preventDefault();\n                    const trimmed = newModelName.trim();\n                    if (!modelConfigurations.some((m) => m.name === trimmed)) {\n                      onAddModel(trimmed);\n                      setNewModelName(\"\");\n                    }\n                  }\n                }}\n                showClearButton={false}\n              />\n            </div>\n            <Button\n              prominence=\"secondary\"\n              icon={SvgPlusCircle}\n              type=\"button\"\n              disabled={\n                !newModelName.trim() ||\n                modelConfigurations.some((m) => m.name === newModelName.trim())\n              }\n              onClick={() => {\n                const trimmed = newModelName.trim();\n                if (\n                  trimmed &&\n                  !modelConfigurations.some((m) => m.name === trimmed)\n                ) {\n                  onAddModel(trimmed);\n                  setNewModelName(\"\");\n                }\n              }}\n            >\n              Add Model\n            </Button>\n          </Section>\n        )}\n\n        {shouldShowAutoUpdateToggle && (\n          <InputLayouts.Horizontal\n            title=\"Auto Update\"\n            description=\"Update the available models when new models are released.\"\n          >\n            <Switch\n              checked={isAutoMode}\n              onCheckedChange={handleToggleAutoMode}\n            />\n          </InputLayouts.Horizontal>\n        )}\n      </Section>\n    </Card>\n  );\n}\n\n// ============================================================================\n// LLMConfigurationModalWrapper\n// ============================================================================\n\ninterface LLMConfigurationModalWrapperProps {\n  providerEndpoint: string;\n  providerName?: string;\n  existingProviderName?: string;\n  onClose: () => void;\n  isFormValid: boolean;\n  isDirty?: boolean;\n  isTesting?: boolean;\n  isSubmitting?: boolean;\n  children: ReactNode;\n}\n\nexport function LLMConfigurationModalWrapper({\n  providerEndpoint,\n  providerName,\n  existingProviderName,\n  onClose,\n  isFormValid,\n  isDirty,\n  isTesting,\n  isSubmitting,\n  children,\n}: LLMConfigurationModalWrapperProps) {\n  const busy = isTesting || isSubmitting;\n  const providerIcon = getProviderIcon(providerEndpoint);\n  const providerDisplayName =\n    providerName ?? getProviderDisplayName(providerEndpoint);\n  const providerProductName = getProviderProductName(providerEndpoint);\n\n  const title = existingProviderName\n    ? `Configure \"${existingProviderName}\"`\n    : `Set up ${providerProductName}`;\n  const description = `Connect to ${providerDisplayName} and set up your ${providerProductName} models.`;\n\n  return (\n    <Modal open onOpenChange={onClose}>\n      <Modal.Content width=\"lg\" height=\"lg\">\n        <Form className=\"flex flex-col h-full min-h-0\">\n          <Modal.Header\n            icon={providerIcon}\n            moreIcon1={SvgArrowExchange}\n            moreIcon2={SvgOnyxLogo}\n            title={title}\n            description={description}\n            onClose={onClose}\n          />\n          <Modal.Body padding={0.5} gap={0.5}>\n            {children}\n          </Modal.Body>\n          <Modal.Footer>\n            <Button prominence=\"secondary\" onClick={onClose} type=\"button\">\n              Cancel\n            </Button>\n            <Button\n              disabled={\n                !isFormValid || busy || (!!existingProviderName && !isDirty)\n              }\n              type=\"submit\"\n              icon={busy ? SimpleLoader : undefined}\n            >\n              {existingProviderName\n                ? busy\n                  ? \"Updating\"\n                  : \"Update\"\n                : busy\n                  ? \"Connecting\"\n                  : \"Connect\"}\n            </Button>\n          </Modal.Footer>\n        </Form>\n      </Modal.Content>\n    </Modal>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/svc.ts",
    "content": "import {\n  LLMProviderName,\n  LLMProviderView,\n  ModelConfiguration,\n} from \"@/interfaces/llm\";\nimport {\n  LLM_ADMIN_URL,\n  LLM_PROVIDERS_ADMIN_URL,\n} from \"@/lib/llmConfig/constants\";\nimport { refreshLlmProviderCaches } from \"@/lib/llmConfig/cache\";\nimport { toast } from \"@/hooks/useToast\";\nimport isEqual from \"lodash/isEqual\";\nimport { parseAzureTargetUri } from \"@/lib/azureTargetUri\";\nimport {\n  track,\n  AnalyticsEvent,\n  LLMProviderConfiguredSource,\n} from \"@/lib/analytics\";\nimport {\n  BaseLLMFormValues,\n  SubmitLLMProviderParams,\n  SubmitOnboardingProviderParams,\n  TestApiKeyResult,\n  filterModelConfigurations,\n  getAutoModeModelConfigurations,\n} from \"@/sections/modals/llmConfig/utils\";\n\nconst submitLlmTestRequest = async (\n  payload: Record<string, unknown>,\n  fallbackErrorMessage: string\n): Promise<TestApiKeyResult> => {\n  try {\n    const response = await fetch(\"/api/admin/llm/test\", {\n      method: \"POST\",\n      headers: { \"Content-Type\": \"application/json\" },\n      body: JSON.stringify(payload),\n    });\n\n    if (!response.ok) {\n      const errorMsg = (await response.json()).detail;\n      return { ok: false, errorMessage: errorMsg };\n    }\n\n    return { ok: true };\n  } catch {\n    return {\n      ok: false,\n      errorMessage: fallbackErrorMessage,\n    };\n  }\n};\n\nexport const submitLLMProvider = async <T extends BaseLLMFormValues>({\n  providerName,\n  values,\n  initialValues,\n  modelConfigurations,\n  existingLlmProvider,\n  shouldMarkAsDefault,\n  hideSuccess,\n  setIsTesting,\n  mutate,\n  onClose,\n  setSubmitting,\n}: SubmitLLMProviderParams<T>): Promise<void> => {\n  setSubmitting(true);\n\n  const { selected_model_names: visibleModels, api_key, ...rest } = values;\n\n  // In auto mode, use recommended models from descriptor\n  // In manual mode, use user's selection\n  let filteredModelConfigurations: ModelConfiguration[];\n  let finalDefaultModelName = rest.default_model_name;\n\n  if (values.is_auto_mode) {\n    filteredModelConfigurations =\n      getAutoModeModelConfigurations(modelConfigurations);\n\n    // In auto mode, use the first recommended model as default if current default isn't in the list\n    const visibleModelNames = new Set(\n      filteredModelConfigurations.map((m) => m.name)\n    );\n    if (\n      finalDefaultModelName &&\n      !visibleModelNames.has(finalDefaultModelName)\n    ) {\n      finalDefaultModelName = filteredModelConfigurations[0]?.name ?? \"\";\n    }\n  } else {\n    filteredModelConfigurations = filterModelConfigurations(\n      modelConfigurations,\n      visibleModels,\n      rest.default_model_name as string | undefined\n    );\n  }\n\n  const customConfigChanged = !isEqual(\n    values.custom_config,\n    initialValues.custom_config\n  );\n\n  const normalizedApiBase =\n    typeof rest.api_base === \"string\" && rest.api_base.trim() === \"\"\n      ? undefined\n      : rest.api_base;\n\n  const finalValues = {\n    ...rest,\n    api_base: normalizedApiBase,\n    default_model_name: finalDefaultModelName,\n    api_key,\n    api_key_changed: api_key !== (initialValues.api_key as string | undefined),\n    custom_config_changed: customConfigChanged,\n    model_configurations: filteredModelConfigurations,\n  };\n\n  // Test the configuration\n  if (!isEqual(finalValues, initialValues)) {\n    setIsTesting(true);\n\n    const response = await fetch(\"/api/admin/llm/test\", {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        provider: providerName,\n        ...finalValues,\n        model: finalDefaultModelName,\n        id: existingLlmProvider?.id,\n      }),\n    });\n    setIsTesting(false);\n\n    if (!response.ok) {\n      const errorMsg = (await response.json()).detail;\n      toast.error(errorMsg);\n      setSubmitting(false);\n      return;\n    }\n  }\n\n  const response = await fetch(\n    `${LLM_PROVIDERS_ADMIN_URL}${\n      existingLlmProvider ? \"\" : \"?is_creation=true\"\n    }`,\n    {\n      method: \"PUT\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        provider: providerName,\n        ...finalValues,\n        id: existingLlmProvider?.id,\n      }),\n    }\n  );\n\n  if (!response.ok) {\n    const errorMsg = (await response.json()).detail;\n    const fullErrorMsg = existingLlmProvider\n      ? `Failed to update provider: ${errorMsg}`\n      : `Failed to enable provider: ${errorMsg}`;\n    toast.error(fullErrorMsg);\n    return;\n  }\n\n  if (shouldMarkAsDefault) {\n    const newLlmProvider = (await response.json()) as LLMProviderView;\n    const setDefaultResponse = await fetch(`${LLM_ADMIN_URL}/default`, {\n      method: \"POST\",\n      headers: {\n        \"Content-Type\": \"application/json\",\n      },\n      body: JSON.stringify({\n        provider_id: newLlmProvider.id,\n        model_name: finalDefaultModelName,\n      }),\n    });\n    if (!setDefaultResponse.ok) {\n      const errorMsg = (await setDefaultResponse.json()).detail;\n      toast.error(`Failed to set provider as default: ${errorMsg}`);\n      return;\n    }\n  }\n\n  await refreshLlmProviderCaches(mutate);\n  onClose();\n\n  if (!hideSuccess) {\n    const successMsg = existingLlmProvider\n      ? \"Provider updated successfully!\"\n      : \"Provider enabled successfully!\";\n    toast.success(successMsg);\n  }\n\n  const knownProviders = new Set<string>(Object.values(LLMProviderName));\n  track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {\n    provider: knownProviders.has(providerName) ? providerName : \"custom\",\n    is_creation: !existingLlmProvider,\n    source: LLMProviderConfiguredSource.ADMIN_PAGE,\n  });\n\n  setSubmitting(false);\n};\n\nexport const testApiKeyHelper = async (\n  providerName: string,\n  formValues: Record<string, unknown>,\n  apiKey?: string,\n  modelName?: string,\n  customConfigOverride?: Record<string, unknown>\n): Promise<TestApiKeyResult> => {\n  let finalApiBase = formValues?.api_base;\n  let finalApiVersion = formValues?.api_version;\n  let finalDeploymentName = formValues?.deployment_name;\n\n  if (providerName === \"azure\" && formValues?.target_uri) {\n    try {\n      const { url, apiVersion, deploymentName } = parseAzureTargetUri(\n        formValues.target_uri as string\n      );\n      finalApiBase = url.origin;\n      finalApiVersion = apiVersion;\n      finalDeploymentName = deploymentName || \"\";\n    } catch {\n      // leave defaults so validation can surface errors upstream\n    }\n  }\n\n  const payload = {\n    api_key: apiKey ?? formValues?.api_key,\n    api_base: finalApiBase,\n    api_version: finalApiVersion,\n    deployment_name: finalDeploymentName,\n    provider: providerName,\n    api_key_changed: true,\n    custom_config_changed: true,\n    custom_config: {\n      ...((formValues?.custom_config as Record<string, unknown>) ?? {}),\n      ...(customConfigOverride ?? {}),\n    },\n    model: modelName ?? (formValues?.default_model_name as string) ?? \"\",\n  };\n\n  return await submitLlmTestRequest(\n    payload,\n    \"An error occurred while testing the API key.\"\n  );\n};\n\nexport const testCustomProvider = async (\n  formValues: Record<string, unknown>\n): Promise<TestApiKeyResult> => {\n  return await submitLlmTestRequest(\n    { ...formValues },\n    \"An error occurred while testing the custom provider.\"\n  );\n};\n\nexport const submitOnboardingProvider = async ({\n  providerName,\n  payload,\n  onboardingState,\n  onboardingActions,\n  isCustomProvider,\n  onClose,\n  setIsSubmitting,\n}: SubmitOnboardingProviderParams): Promise<void> => {\n  setIsSubmitting(true);\n\n  // Test credentials\n  let result: TestApiKeyResult;\n  if (isCustomProvider) {\n    result = await testCustomProvider(payload);\n  } else {\n    result = await testApiKeyHelper(providerName, payload);\n  }\n\n  if (!result.ok) {\n    toast.error(result.errorMessage);\n    setIsSubmitting(false);\n    return;\n  }\n\n  // Create provider\n  const response = await fetch(`${LLM_PROVIDERS_ADMIN_URL}?is_creation=true`, {\n    method: \"PUT\",\n    headers: { \"Content-Type\": \"application/json\" },\n    body: JSON.stringify(payload),\n  });\n\n  if (!response.ok) {\n    const errorMsg = (await response.json()).detail;\n    toast.error(errorMsg);\n    setIsSubmitting(false);\n    return;\n  }\n\n  // Set as default if first provider\n  if (\n    onboardingState?.data?.llmProviders == null ||\n    onboardingState.data.llmProviders.length === 0\n  ) {\n    try {\n      const newLlmProvider = await response.json();\n      if (newLlmProvider?.id != null) {\n        const defaultModelName =\n          (payload as Record<string, string>).default_model_name ??\n          (payload as Record<string, ModelConfiguration[]>)\n            .model_configurations?.[0]?.name ??\n          \"\";\n\n        if (defaultModelName) {\n          const setDefaultResponse = await fetch(`${LLM_ADMIN_URL}/default`, {\n            method: \"POST\",\n            headers: { \"Content-Type\": \"application/json\" },\n            body: JSON.stringify({\n              provider_id: newLlmProvider.id,\n              model_name: defaultModelName,\n            }),\n          });\n          if (!setDefaultResponse.ok) {\n            const err = await setDefaultResponse.json().catch(() => ({}));\n            toast.error(err?.detail ?? \"Failed to set provider as default\");\n            setIsSubmitting(false);\n            return;\n          }\n        }\n      }\n    } catch (_e) {\n      toast.error(\"Failed to set new provider as default\");\n    }\n  }\n\n  track(AnalyticsEvent.CONFIGURED_LLM_PROVIDER, {\n    provider: isCustomProvider ? \"custom\" : providerName,\n    is_creation: true,\n    source: LLMProviderConfiguredSource.CHAT_ONBOARDING,\n  });\n\n  // Update onboarding state\n  onboardingActions.updateData({\n    llmProviders: [\n      ...(onboardingState?.data.llmProviders ?? []),\n      isCustomProvider ? \"custom\" : providerName,\n    ],\n  });\n  onboardingActions.setButtonActive(true);\n\n  setIsSubmitting(false);\n  onClose();\n};\n"
  },
  {
    "path": "web/src/sections/modals/llmConfig/utils.ts",
    "content": "import {\n  LLMProviderView,\n  ModelConfiguration,\n  WellKnownLLMProviderDescriptor,\n} from \"@/interfaces/llm\";\nimport * as Yup from \"yup\";\nimport { ScopedMutator } from \"swr\";\nimport { OnboardingActions, OnboardingState } from \"@/interfaces/onboarding\";\n\n// Common class names for the Form component across all LLM provider forms\nexport const LLM_FORM_CLASS_NAME = \"flex flex-col gap-y-4 items-stretch mt-6\";\n\nexport const buildDefaultInitialValues = (\n  existingLlmProvider?: LLMProviderView,\n  modelConfigurations?: ModelConfiguration[],\n  currentDefaultModelName?: string\n) => {\n  const defaultModelName =\n    (currentDefaultModelName &&\n    existingLlmProvider?.model_configurations?.some(\n      (m) => m.name === currentDefaultModelName\n    )\n      ? currentDefaultModelName\n      : undefined) ??\n    existingLlmProvider?.model_configurations?.[0]?.name ??\n    modelConfigurations?.[0]?.name ??\n    \"\";\n\n  // Auto mode must be explicitly enabled by the user\n  // Default to false for new providers, preserve existing value when editing\n  const isAutoMode = existingLlmProvider?.is_auto_mode ?? false;\n\n  return {\n    name: existingLlmProvider?.name || \"\",\n    default_model_name: defaultModelName,\n    is_public: existingLlmProvider?.is_public ?? true,\n    is_auto_mode: isAutoMode,\n    groups: existingLlmProvider?.groups ?? [],\n    personas: existingLlmProvider?.personas ?? [],\n    selected_model_names: existingLlmProvider\n      ? existingLlmProvider.model_configurations\n          .filter((modelConfiguration) => modelConfiguration.is_visible)\n          .map((modelConfiguration) => modelConfiguration.name)\n      : modelConfigurations\n          ?.filter((modelConfiguration) => modelConfiguration.is_visible)\n          .map((modelConfiguration) => modelConfiguration.name) ?? [],\n  };\n};\n\nexport const buildDefaultValidationSchema = () => {\n  return Yup.object({\n    name: Yup.string().required(\"Display Name is required\"),\n    default_model_name: Yup.string().required(\"Model name is required\"),\n    is_public: Yup.boolean().required(),\n    is_auto_mode: Yup.boolean().required(),\n    groups: Yup.array().of(Yup.number()),\n    personas: Yup.array().of(Yup.number()),\n    selected_model_names: Yup.array().of(Yup.string()),\n  });\n};\n\nexport const buildAvailableModelConfigurations = (\n  existingLlmProvider?: LLMProviderView,\n  wellKnownLLMProvider?: WellKnownLLMProviderDescriptor\n): ModelConfiguration[] => {\n  const existingModels = existingLlmProvider?.model_configurations ?? [];\n  const wellKnownModels = wellKnownLLMProvider?.known_models ?? [];\n\n  // Create a map to deduplicate by model name, preferring existing models\n  const modelMap = new Map<string, ModelConfiguration>();\n\n  // Add well-known models first\n  wellKnownModels.forEach((model) => {\n    modelMap.set(model.name, model);\n  });\n\n  // Override with existing models (they take precedence)\n  existingModels.forEach((model) => {\n    modelMap.set(model.name, model);\n  });\n\n  return Array.from(modelMap.values());\n};\n\n// Base form values that all provider forms share\nexport interface BaseLLMFormValues {\n  name: string;\n  api_key?: string;\n  api_base?: string;\n  default_model_name?: string;\n  is_public: boolean;\n  is_auto_mode: boolean;\n  groups: number[];\n  personas: number[];\n  selected_model_names: string[];\n  custom_config?: Record<string, string>;\n}\n\nexport interface SubmitLLMProviderParams<\n  T extends BaseLLMFormValues = BaseLLMFormValues,\n> {\n  providerName: string;\n  values: T;\n  initialValues: T;\n  modelConfigurations: ModelConfiguration[];\n  existingLlmProvider?: LLMProviderView;\n  shouldMarkAsDefault?: boolean;\n  hideSuccess?: boolean;\n  setIsTesting: (testing: boolean) => void;\n  mutate: ScopedMutator;\n  onClose: () => void;\n  setSubmitting: (submitting: boolean) => void;\n}\n\nexport const filterModelConfigurations = (\n  currentModelConfigurations: ModelConfiguration[],\n  visibleModels: string[],\n  defaultModelName?: string\n): ModelConfiguration[] => {\n  return currentModelConfigurations\n    .map(\n      (modelConfiguration): ModelConfiguration => ({\n        name: modelConfiguration.name,\n        is_visible: visibleModels.includes(modelConfiguration.name),\n        max_input_tokens: modelConfiguration.max_input_tokens ?? null,\n        supports_image_input: modelConfiguration.supports_image_input,\n        supports_reasoning: modelConfiguration.supports_reasoning,\n        display_name: modelConfiguration.display_name,\n      })\n    )\n    .filter(\n      (modelConfiguration) =>\n        modelConfiguration.name === defaultModelName ||\n        modelConfiguration.is_visible\n    );\n};\n\n// Helper to get model configurations for auto mode\n// In auto mode, we include ALL models but preserve their visibility status\n// Models in the auto config are visible, others are created but not visible\nexport const getAutoModeModelConfigurations = (\n  modelConfigurations: ModelConfiguration[]\n): ModelConfiguration[] => {\n  return modelConfigurations.map(\n    (modelConfiguration): ModelConfiguration => ({\n      name: modelConfiguration.name,\n      is_visible: modelConfiguration.is_visible,\n      max_input_tokens: modelConfiguration.max_input_tokens ?? null,\n      supports_image_input: modelConfiguration.supports_image_input,\n      supports_reasoning: modelConfiguration.supports_reasoning,\n      display_name: modelConfiguration.display_name,\n    })\n  );\n};\n\nexport type TestApiKeyResult =\n  | { ok: true }\n  | { ok: false; errorMessage: string };\n\nexport const getModelOptions = (\n  fetchedModelConfigurations: Array<{ name: string }>\n) => {\n  return fetchedModelConfigurations.map((model) => ({\n    label: model.name,\n    value: model.name,\n  }));\n};\n\n/** Initial values used by onboarding forms (flat shape, always creating new). */\nexport const buildOnboardingInitialValues = () => ({\n  name: \"\",\n  provider: \"\",\n  api_key: \"\",\n  api_base: \"\",\n  api_version: \"\",\n  default_model_name: \"\",\n  model_configurations: [] as ModelConfiguration[],\n  custom_config: {} as Record<string, string>,\n  api_key_changed: true,\n  groups: [] as number[],\n  is_public: true,\n  is_auto_mode: false,\n  personas: [] as number[],\n  selected_model_names: [] as string[],\n  deployment_name: \"\",\n  target_uri: \"\",\n});\n\nexport interface SubmitOnboardingProviderParams {\n  providerName: string;\n  payload: Record<string, unknown>;\n  onboardingState: OnboardingState;\n  onboardingActions: OnboardingActions;\n  isCustomProvider: boolean;\n  onClose: () => void;\n  setIsSubmitting: (submitting: boolean) => void;\n}\n"
  },
  {
    "path": "web/src/sections/onboarding/OnboardingFlow.tsx",
    "content": "\"use client\";\n\nimport { memo } from \"react\";\nimport OnboardingHeader from \"./components/OnboardingHeader\";\nimport NameStep from \"./steps/NameStep\";\nimport LLMStep from \"./steps/LLMStep\";\nimport FinalStep from \"./steps/FinalStep\";\nimport {\n  OnboardingActions,\n  OnboardingState,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport { WellKnownLLMProviderDescriptor } from \"@/interfaces/llm\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { UserRole } from \"@/lib/types\";\nimport NonAdminStep from \"./components/NonAdminStep\";\n\ntype OnboardingFlowProps = {\n  showOnboarding: boolean;\n  handleHideOnboarding: () => void;\n  handleFinishOnboarding: () => void;\n  state: OnboardingState;\n  actions: OnboardingActions;\n  llmDescriptors: WellKnownLLMProviderDescriptor[];\n};\n\nconst OnboardingFlowInner = ({\n  showOnboarding,\n  handleHideOnboarding,\n  handleFinishOnboarding,\n  state: onboardingState,\n  actions: onboardingActions,\n  llmDescriptors,\n}: OnboardingFlowProps) => {\n  const { user } = useUser();\n\n  if (!user) return null;\n\n  const hasStarted = onboardingState.currentStep !== OnboardingStep.Welcome;\n\n  return user.role === UserRole.ADMIN ? (\n    showOnboarding ? (\n      <div\n        className=\"flex flex-col items-center justify-center w-full max-w-[var(--app-page-main-content-width)] gap-2 mb-4\"\n        aria-label=\"onboarding-flow\"\n      >\n        <OnboardingHeader\n          state={onboardingState}\n          actions={onboardingActions}\n          handleHideOnboarding={handleHideOnboarding}\n          handleFinishOnboarding={handleFinishOnboarding}\n        />\n        {hasStarted && (\n          <div className=\"relative w-full overflow-hidden\">\n            <div className=\"flex flex-col gap-2 animate-in slide-in-from-right duration-500 ease-out\">\n              <NameStep state={onboardingState} actions={onboardingActions} />\n              <LLMStep\n                state={onboardingState}\n                actions={onboardingActions}\n                llmDescriptors={llmDescriptors}\n                disabled={\n                  onboardingState.currentStep !== OnboardingStep.LlmSetup\n                }\n              />\n              <div\n                className={\n                  \"transition-all duration-500 ease-out \" +\n                  (onboardingState.currentStep === OnboardingStep.Complete\n                    ? \"opacity-100 translate-x-0\"\n                    : \"opacity-0 translate-x-full\")\n                }\n              >\n                {onboardingState.currentStep === OnboardingStep.Complete && (\n                  <FinalStep />\n                )}\n              </div>\n            </div>\n          </div>\n        )}\n      </div>\n    ) : (\n      // When showOnboarding is false, the parent only renders this component\n      // if the admin hasn't set their name.\n      <NonAdminStep />\n    )\n  ) : !user.personalization?.name ? (\n    <NonAdminStep />\n  ) : null;\n};\n\nconst OnboardingFlow = memo(OnboardingFlowInner);\nexport default OnboardingFlow;\n"
  },
  {
    "path": "web/src/sections/onboarding/__tests__/onboardingReducer.test.ts",
    "content": "import { onboardingReducer, initialState } from \"../reducer\";\nimport {\n  OnboardingActionType,\n  OnboardingStep,\n  OnboardingState,\n} from \"@/interfaces/onboarding\";\n\ndescribe(\"onboardingReducer\", () => {\n  describe(\"initial state\", () => {\n    it(\"starts at Welcome step with default values\", () => {\n      expect(initialState).toEqual({\n        currentStep: OnboardingStep.Welcome,\n        stepIndex: 0,\n        totalSteps: 3,\n        data: {},\n        isButtonActive: true,\n        isLoading: false,\n      });\n    });\n  });\n\n  describe(\"NEXT_STEP\", () => {\n    it(\"advances Welcome -> Name\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.NEXT_STEP,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.Name);\n      expect(result.stepIndex).toBe(1);\n    });\n\n    it(\"advances Name -> LlmSetup\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        currentStep: OnboardingStep.Name,\n        stepIndex: 1,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.NEXT_STEP,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);\n      expect(result.stepIndex).toBe(2);\n    });\n\n    it(\"advances LlmSetup -> Complete and sets isButtonActive to true\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        currentStep: OnboardingStep.LlmSetup,\n        stepIndex: 2,\n        isButtonActive: false,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.NEXT_STEP,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.Complete);\n      expect(result.stepIndex).toBe(3);\n      expect(result.isButtonActive).toBe(true);\n    });\n\n    it(\"is a no-op when already at Complete\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        currentStep: OnboardingStep.Complete,\n        stepIndex: 3,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.NEXT_STEP,\n      });\n      expect(result).toBe(state);\n    });\n  });\n\n  describe(\"PREV_STEP\", () => {\n    it(\"goes Complete -> LlmSetup\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        currentStep: OnboardingStep.Complete,\n        stepIndex: 3,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.PREV_STEP,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);\n      expect(result.stepIndex).toBe(2);\n    });\n\n    it(\"goes LlmSetup -> Name\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        currentStep: OnboardingStep.LlmSetup,\n        stepIndex: 2,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.PREV_STEP,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.Name);\n      expect(result.stepIndex).toBe(1);\n    });\n\n    it(\"is a no-op when already at Welcome\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.PREV_STEP,\n      });\n      expect(result).toBe(initialState);\n    });\n  });\n\n  describe(\"GO_TO_STEP\", () => {\n    it(\"jumps directly to any step\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.GO_TO_STEP,\n        step: OnboardingStep.LlmSetup,\n      });\n      expect(result.currentStep).toBe(OnboardingStep.LlmSetup);\n      expect(result.stepIndex).toBe(2);\n    });\n\n    it(\"sets isButtonActive to true when jumping to Complete\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        isButtonActive: false,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.GO_TO_STEP,\n        step: OnboardingStep.Complete,\n      });\n      expect(result.isButtonActive).toBe(true);\n      expect(result.stepIndex).toBe(3);\n    });\n\n    it(\"preserves isButtonActive when jumping to non-Complete step\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        isButtonActive: false,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.GO_TO_STEP,\n        step: OnboardingStep.Name,\n      });\n      expect(result.isButtonActive).toBe(false);\n    });\n  });\n\n  describe(\"UPDATE_DATA\", () => {\n    it(\"merges userName into data\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.UPDATE_DATA,\n        payload: { userName: \"Alice\" },\n      });\n      expect(result.data.userName).toBe(\"Alice\");\n    });\n\n    it(\"merges llmProviders into data\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.UPDATE_DATA,\n        payload: { llmProviders: [\"openai\", \"anthropic\"] },\n      });\n      expect(result.data.llmProviders).toEqual([\"openai\", \"anthropic\"]);\n    });\n\n    it(\"preserves existing data fields when merging new ones\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        data: { userName: \"Alice\" },\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.UPDATE_DATA,\n        payload: { llmProviders: [\"openai\"] },\n      });\n      expect(result.data.userName).toBe(\"Alice\");\n      expect(result.data.llmProviders).toEqual([\"openai\"]);\n    });\n  });\n\n  describe(\"SET_BUTTON_ACTIVE\", () => {\n    it(\"sets isButtonActive to false\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.SET_BUTTON_ACTIVE,\n        isButtonActive: false,\n      });\n      expect(result.isButtonActive).toBe(false);\n    });\n\n    it(\"sets isButtonActive to true\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        isButtonActive: false,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.SET_BUTTON_ACTIVE,\n        isButtonActive: true,\n      });\n      expect(result.isButtonActive).toBe(true);\n    });\n  });\n\n  describe(\"SET_LOADING\", () => {\n    it(\"sets isLoading to true\", () => {\n      const result = onboardingReducer(initialState, {\n        type: OnboardingActionType.SET_LOADING,\n        isLoading: true,\n      });\n      expect(result.isLoading).toBe(true);\n    });\n\n    it(\"sets isLoading to false\", () => {\n      const state: OnboardingState = {\n        ...initialState,\n        isLoading: true,\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.SET_LOADING,\n        isLoading: false,\n      });\n      expect(result.isLoading).toBe(false);\n    });\n  });\n\n  describe(\"RESET\", () => {\n    it(\"returns to initial state\", () => {\n      const state: OnboardingState = {\n        currentStep: OnboardingStep.Complete,\n        stepIndex: 3,\n        totalSteps: 3,\n        data: { userName: \"Alice\", llmProviders: [\"openai\"] },\n        isButtonActive: false,\n        isLoading: true,\n        error: \"some error\",\n      };\n      const result = onboardingReducer(state, {\n        type: OnboardingActionType.RESET,\n      });\n      expect(result).toEqual(initialState);\n    });\n  });\n\n  describe(\"unknown action\", () => {\n    it(\"returns state unchanged for unknown action type\", () => {\n      const result = onboardingReducer(initialState, {\n        type: \"UNKNOWN_ACTION\" as OnboardingActionType,\n      } as any);\n      expect(result).toBe(initialState);\n    });\n  });\n});\n"
  },
  {
    "path": "web/src/sections/onboarding/components/LLMProviderCard.tsx",
    "content": "\"use client\";\n\nimport { memo, useCallback, useState } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport { Disabled } from \"@opal/core\";\nimport {\n  SvgArrowExchange,\n  SvgCheckCircle,\n  SvgServer,\n  SvgSettings,\n} from \"@opal/icons\";\nimport { ProviderIcon } from \"@/app/admin/configuration/llm/ProviderIcon\";\n\nexport interface LLMProviderCardProps {\n  title: string;\n  subtitle: string;\n  providerName?: string;\n  disabled?: boolean;\n  isConnected?: boolean;\n  onClick: () => void;\n}\n\nfunction LLMProviderCardInner({\n  title,\n  subtitle,\n  providerName,\n  disabled,\n  isConnected,\n  onClick,\n}: LLMProviderCardProps) {\n  const [isHovered, setIsHovered] = useState(false);\n\n  const handleCardClick = useCallback(() => {\n    if (disabled) {\n      return;\n    }\n\n    if (isConnected) {\n      // If connected, redirect to admin page\n      window.location.href = \"/admin/configuration/llm\";\n      return;\n    }\n\n    // If not connected, call onClick to open the form\n    onClick();\n  }, [disabled, isConnected, onClick]);\n\n  const handleSettingsClick = useCallback(\n    noProp(() => (window.location.href = \"/admin/configuration/llm\")),\n    []\n  );\n\n  return (\n    <Disabled disabled={disabled} allowClick>\n      <div\n        role=\"button\"\n        tabIndex={0}\n        onClick={handleCardClick}\n        onKeyDown={(e) => {\n          if (!disabled && (e.key === \"Enter\" || e.key === \" \")) {\n            e.preventDefault();\n            handleCardClick();\n          }\n        }}\n        onMouseEnter={() => setIsHovered(true)}\n        onMouseLeave={() => setIsHovered(false)}\n        className={cn(\n          \"flex justify-between h-full w-full p-1 rounded-12 border border-border-01 bg-background-neutral-01 transition-colors text-left\",\n          !disabled && \"hover:bg-background-neutral-02 cursor-pointer\"\n        )}\n      >\n        <div className=\"flex gap-1 p-1 flex-1 min-w-0\">\n          <div className=\"flex items-start h-full pt-0.5\">\n            {providerName ? (\n              <ProviderIcon provider={providerName} size={16} className=\"\" />\n            ) : (\n              <SvgServer className=\"w-4 h-4 stroke-text-04\" />\n            )}\n          </div>\n          <div className=\"min-w-0 flex flex-col justify-center\">\n            <Text as=\"p\" text04 mainUiAction>\n              {title}\n            </Text>\n            <Truncated text03 secondaryBody>\n              {subtitle}\n            </Truncated>\n          </div>\n        </div>\n        {isConnected ? (\n          <div className=\"flex items-start gap-1 p-1\">\n            {isHovered && (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <IconButton\n                internal\n                icon={SvgSettings}\n                disabled={disabled}\n                onClick={handleSettingsClick}\n                className=\"hover:bg-transparent\"\n              />\n            )}\n            <div className=\"p-1\">\n              <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05\" />\n            </div>\n          </div>\n        ) : (\n          <div className=\"flex items-start p-1\">\n            <div className=\"flex items-center gap-0.5\">\n              <Text as=\"p\" text03 secondaryAction>\n                Connect\n              </Text>\n              <div className=\"p-0.5\">\n                <SvgArrowExchange className=\"w-4 h-4 stroke-text-03\" />\n              </div>\n            </div>\n          </div>\n        )}\n      </div>\n    </Disabled>\n  );\n}\n\nconst LLMProviderCard = memo(LLMProviderCardInner);\nexport default LLMProviderCard;\n"
  },
  {
    "path": "web/src/sections/onboarding/components/NonAdminStep.tsx",
    "content": "\"use client\";\n\nimport React, { useRef, useState, useEffect } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { updateUserPersonalization } from \"@/lib/userSettings\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { toast } from \"@/hooks/useToast\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { Button } from \"@opal/components\";\nimport InputAvatar from \"@/refresh-components/inputs/InputAvatar\";\nimport { cn } from \"@/lib/utils\";\nimport { SvgCheckCircle, SvgEdit, SvgUser, SvgX } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Hoverable } from \"@opal/core\";\n\nexport default function NonAdminStep() {\n  const inputRef = useRef<HTMLInputElement>(null);\n  const { user, refreshUser } = useUser();\n  const [name, setName] = useState(\"\");\n  const [showHeader, setShowHeader] = useState(false);\n  const [isEditing, setIsEditing] = useState(true);\n  const [savedName, setSavedName] = useState(\"\");\n\n  // Initialize name from user if available\n  useEffect(() => {\n    if (user?.personalization?.name && !savedName) {\n      setSavedName(user.personalization.name);\n      setIsEditing(false);\n    }\n  }, [user?.personalization?.name, savedName]);\n\n  const containerClasses = cn(\n    \"flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01 mb-4\"\n  );\n\n  const handleSave = () => {\n    updateUserPersonalization({ name })\n      .then(() => {\n        setSavedName(name);\n        setShowHeader(true);\n        setIsEditing(false);\n        // Don't call refreshUser() here — it would cause OnboardingFlow to\n        // unmount this component (since user.personalization.name becomes set),\n        // hiding the confirmation banner before the user sees it.\n        // refreshUser() is called in handleDismissConfirmation instead.\n      })\n      .catch((error) => {\n        toast.error(\"Failed to save name. Please try again.\");\n        console.error(error);\n      });\n  };\n\n  const handleDismissConfirmation = () => {\n    setShowHeader(false);\n    refreshUser();\n  };\n\n  return (\n    <>\n      {showHeader && (\n        <div\n          className=\"flex items-center justify-between w-full min-h-11 py-1 pl-3 pr-2 bg-background-tint-00 rounded-16 shadow-01 mb-2\"\n          aria-label=\"non-admin-confirmation\"\n        >\n          <ContentAction\n            icon={({ className, ...props }) => (\n              <SvgCheckCircle\n                className={cn(className, \"stroke-status-success-05\")}\n                {...props}\n              />\n            )}\n            title=\"You're all set!\"\n            sizePreset=\"main-ui\"\n            variant=\"body\"\n            prominence=\"muted\"\n            paddingVariant=\"fit\"\n            rightChildren={\n              <Button\n                prominence=\"tertiary\"\n                size=\"sm\"\n                icon={SvgX}\n                onClick={handleDismissConfirmation}\n              />\n            }\n          />\n        </div>\n      )}\n      {isEditing ? (\n        <div\n          className={containerClasses}\n          onClick={() => inputRef.current?.focus()}\n          role=\"group\"\n          aria-label=\"non-admin-name-prompt\"\n        >\n          <ContentAction\n            icon={SvgUser}\n            title=\"What should Onyx call you?\"\n            description=\"We will display this name in the app.\"\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            paddingVariant=\"fit\"\n            rightChildren={\n              <div className=\"flex items-center justify-end gap-2\">\n                <InputTypeIn\n                  ref={inputRef}\n                  placeholder=\"Your name\"\n                  value={name || \"\"}\n                  onChange={(e: React.ChangeEvent<HTMLInputElement>) =>\n                    setName(e.target.value)\n                  }\n                  onKeyDown={(e) => {\n                    if (e.key === \"Enter\" && name && name.trim().length > 0) {\n                      e.preventDefault();\n                      handleSave();\n                    }\n                  }}\n                  className=\"w-[26%] min-w-40\"\n                />\n                <Button disabled={name === \"\"} onClick={handleSave}>\n                  Save\n                </Button>\n              </div>\n            }\n          />\n        </div>\n      ) : (\n        <Hoverable.Root group=\"nonAdminName\" widthVariant=\"full\">\n          <div\n            className={containerClasses}\n            aria-label=\"Edit display name\"\n            role=\"button\"\n            tabIndex={0}\n            onClick={() => {\n              setIsEditing(true);\n              setName(savedName);\n            }}\n          >\n            <div className=\"flex items-center gap-1\">\n              <InputAvatar\n                className={cn(\n                  \"flex items-center justify-center bg-background-neutral-inverted-00\",\n                  \"w-5 h-5\"\n                )}\n              >\n                <Text as=\"p\" inverted secondaryBody>\n                  {savedName?.[0]?.toUpperCase()}\n                </Text>\n              </InputAvatar>\n              <Text as=\"p\" text04 mainUiAction>\n                {savedName}\n              </Text>\n            </div>\n            <div className=\"p-1 flex items-center gap-1\">\n              {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n              <Hoverable.Item group=\"nonAdminName\" variant=\"opacity-on-hover\">\n                <IconButton internal icon={SvgEdit} tooltip=\"Edit\" />\n              </Hoverable.Item>\n              <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05\" />\n            </div>\n          </div>\n        </Hoverable.Root>\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/onboarding/components/OnboardingHeader.tsx",
    "content": "import React from \"react\";\nimport { STEP_CONFIG } from \"@/sections/onboarding/constants\";\nimport {\n  OnboardingActions,\n  OnboardingState,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport { SvgProgressCircle, SvgX } from \"@opal/icons\";\nimport { Card } from \"@/refresh-components/cards\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ContentAction } from \"@opal/layouts\";\n\ninterface OnboardingHeaderProps {\n  state: OnboardingState;\n  actions: OnboardingActions;\n  handleHideOnboarding: () => void;\n  handleFinishOnboarding: () => void;\n}\nconst OnboardingHeader = React.memo(\n  ({\n    state: onboardingState,\n    actions: onboardingActions,\n    handleHideOnboarding,\n    handleFinishOnboarding,\n  }: OnboardingHeaderProps) => {\n    const iconPercentage =\n      STEP_CONFIG[onboardingState.currentStep].iconPercentage;\n    const stepButtonText = STEP_CONFIG[onboardingState.currentStep].buttonText;\n    const isWelcomeStep =\n      onboardingState.currentStep === OnboardingStep.Welcome;\n    const isCompleteStep =\n      onboardingState.currentStep === OnboardingStep.Complete;\n\n    function handleButtonClick() {\n      if (isCompleteStep) handleFinishOnboarding();\n      else onboardingActions.nextStep();\n    }\n\n    return (\n      <Card padding={0.5} data-label=\"onboarding-header\">\n        <ContentAction\n          icon={(props) => (\n            <SvgProgressCircle value={iconPercentage} {...props} />\n          )}\n          title={STEP_CONFIG[onboardingState.currentStep].title}\n          sizePreset=\"main-ui\"\n          variant=\"body\"\n          prominence=\"muted\"\n          paddingVariant=\"sm\"\n          rightChildren={\n            stepButtonText ? (\n              <Section flexDirection=\"row\">\n                {!isWelcomeStep && (\n                  <Text as=\"p\" text03 mainUiBody>\n                    Step {onboardingState.stepIndex} of{\" \"}\n                    {onboardingState.totalSteps}\n                  </Text>\n                )}\n                <Button\n                  disabled={!onboardingState.isButtonActive}\n                  onClick={handleButtonClick}\n                >\n                  {stepButtonText}\n                </Button>\n              </Section>\n            ) : (\n              <Button\n                prominence=\"tertiary\"\n                size=\"sm\"\n                icon={SvgX}\n                onClick={handleHideOnboarding}\n              />\n            )\n          }\n        />\n      </Card>\n    );\n  }\n);\nOnboardingHeader.displayName = \"OnboardingHeader\";\n\nexport default OnboardingHeader;\n"
  },
  {
    "path": "web/src/sections/onboarding/constants.ts",
    "content": "import { OnboardingStep, FinalStepItemProps } from \"@/interfaces/onboarding\";\nimport { SvgGlobe, SvgImage, SvgUsers } from \"@opal/icons\";\n\ntype StepConfig = {\n  index: number;\n  title: string;\n  buttonText: string;\n  iconPercentage: number;\n};\n\nexport const STEP_CONFIG: Record<OnboardingStep, StepConfig> = {\n  [OnboardingStep.Welcome]: {\n    index: 0,\n    title: \"Let's take a moment to get you set up.\",\n    buttonText: \"Let's Go\",\n    iconPercentage: 10,\n  },\n  [OnboardingStep.Name]: {\n    index: 1,\n    title: \"Let's take a moment to get you set up.\",\n    buttonText: \"Next\",\n    iconPercentage: 40,\n  },\n  [OnboardingStep.LlmSetup]: {\n    index: 2,\n    title: \"Almost there! Connect your models to start chatting.\",\n    buttonText: \"Next\",\n    iconPercentage: 70,\n  },\n  [OnboardingStep.Complete]: {\n    index: 3,\n    title: \"You're all set, review the optional settings or click Finish Setup\",\n    buttonText: \"Finish Setup\",\n    iconPercentage: 100,\n  },\n} as const;\n\nexport const TOTAL_STEPS = 3;\n\nexport const STEP_NAVIGATION: Record<\n  OnboardingStep,\n  { next?: OnboardingStep; prev?: OnboardingStep }\n> = {\n  [OnboardingStep.Welcome]: { next: OnboardingStep.Name },\n  [OnboardingStep.Name]: {\n    next: OnboardingStep.LlmSetup,\n    prev: OnboardingStep.Welcome,\n  },\n  [OnboardingStep.LlmSetup]: {\n    next: OnboardingStep.Complete,\n    prev: OnboardingStep.Name,\n  },\n  [OnboardingStep.Complete]: { prev: OnboardingStep.LlmSetup },\n};\n\nexport const FINAL_SETUP_CONFIG: FinalStepItemProps[] = [\n  {\n    title: \"Select web search provider\",\n    description: \"Enable Onyx to search the internet for information.\",\n    icon: SvgGlobe,\n    buttonText: \"Web Search\",\n    buttonHref: \"/admin/configuration/web-search\",\n  },\n  {\n    title: \"Enable image generation\",\n    description: \"Set up models to create images in your chats.\",\n    icon: SvgImage,\n    buttonText: \"Image Generation\",\n    buttonHref: \"/admin/configuration/image-generation\",\n  },\n  {\n    title: \"Invite your team\",\n    description: \"Manage users and permissions for your team\",\n    icon: SvgUsers,\n    buttonText: \"Manage Users\",\n    buttonHref: \"/admin/users\",\n  },\n];\n"
  },
  {
    "path": "web/src/sections/onboarding/forms/getOnboardingForm.tsx",
    "content": "import React from \"react\";\nimport {\n  WellKnownLLMProviderDescriptor,\n  LLMProviderName,\n} from \"@/interfaces/llm\";\nimport { OnboardingActions, OnboardingState } from \"@/interfaces/onboarding\";\nimport OpenAIModal from \"@/sections/modals/llmConfig/OpenAIModal\";\nimport AnthropicModal from \"@/sections/modals/llmConfig/AnthropicModal\";\nimport OllamaModal from \"@/sections/modals/llmConfig/OllamaModal\";\nimport AzureModal from \"@/sections/modals/llmConfig/AzureModal\";\nimport BedrockModal from \"@/sections/modals/llmConfig/BedrockModal\";\nimport VertexAIModal from \"@/sections/modals/llmConfig/VertexAIModal\";\nimport OpenRouterModal from \"@/sections/modals/llmConfig/OpenRouterModal\";\nimport CustomModal from \"@/sections/modals/llmConfig/CustomModal\";\nimport LMStudioForm from \"@/sections/modals/llmConfig/LMStudioForm\";\nimport LiteLLMProxyModal from \"@/sections/modals/llmConfig/LiteLLMProxyModal\";\n\n// Display info for LLM provider cards - title is the product name, displayName is the company/platform\nconst PROVIDER_DISPLAY_INFO: Record<\n  string,\n  { title: string; displayName: string }\n> = {\n  [LLMProviderName.OPENAI]: { title: \"GPT\", displayName: \"OpenAI\" },\n  [LLMProviderName.ANTHROPIC]: { title: \"Claude\", displayName: \"Anthropic\" },\n  [LLMProviderName.OLLAMA_CHAT]: { title: \"Ollama\", displayName: \"Ollama\" },\n  [LLMProviderName.AZURE]: {\n    title: \"Azure OpenAI\",\n    displayName: \"Microsoft Azure Cloud\",\n  },\n  [LLMProviderName.BEDROCK]: {\n    title: \"Amazon Bedrock\",\n    displayName: \"AWS\",\n  },\n  [LLMProviderName.VERTEX_AI]: {\n    title: \"Gemini\",\n    displayName: \"Google Cloud Vertex AI\",\n  },\n  [LLMProviderName.OPENROUTER]: {\n    title: \"OpenRouter\",\n    displayName: \"OpenRouter\",\n  },\n  [LLMProviderName.LM_STUDIO]: {\n    title: \"LM Studio\",\n    displayName: \"LM Studio\",\n  },\n  [LLMProviderName.LITELLM_PROXY]: {\n    title: \"LiteLLM Proxy\",\n    displayName: \"LiteLLM Proxy\",\n  },\n};\n\nexport function getProviderDisplayInfo(providerName: string): {\n  title: string;\n  displayName: string;\n} {\n  return (\n    PROVIDER_DISPLAY_INFO[providerName] ?? {\n      title: providerName,\n      displayName: providerName,\n    }\n  );\n}\n\nexport interface OnboardingFormProps {\n  llmDescriptor?: WellKnownLLMProviderDescriptor;\n  isCustomProvider?: boolean;\n  onboardingState: OnboardingState;\n  onboardingActions: OnboardingActions;\n  open: boolean;\n  onOpenChange: (open: boolean) => void;\n}\n\nexport function getOnboardingForm({\n  llmDescriptor,\n  isCustomProvider,\n  onboardingState,\n  onboardingActions,\n  open,\n  onOpenChange,\n}: OnboardingFormProps): React.ReactNode {\n  const sharedProps = {\n    variant: \"onboarding\" as const,\n    onboardingState,\n    onboardingActions,\n    open,\n    onOpenChange,\n  };\n\n  // Handle custom provider\n  if (isCustomProvider || !llmDescriptor) {\n    return <CustomModal {...sharedProps} />;\n  }\n\n  const providerProps = {\n    ...sharedProps,\n    llmDescriptor,\n  };\n\n  switch (llmDescriptor.name) {\n    case LLMProviderName.OPENAI:\n      return <OpenAIModal {...providerProps} />;\n\n    case LLMProviderName.ANTHROPIC:\n      return <AnthropicModal {...providerProps} />;\n\n    case LLMProviderName.OLLAMA_CHAT:\n      return <OllamaModal {...providerProps} />;\n\n    case LLMProviderName.AZURE:\n      return <AzureModal {...providerProps} />;\n\n    case LLMProviderName.BEDROCK:\n      return <BedrockModal {...providerProps} />;\n\n    case LLMProviderName.VERTEX_AI:\n      return <VertexAIModal {...providerProps} />;\n\n    case LLMProviderName.OPENROUTER:\n      return <OpenRouterModal {...providerProps} />;\n\n    case LLMProviderName.LM_STUDIO:\n      return <LMStudioForm {...providerProps} />;\n\n    case LLMProviderName.LITELLM_PROXY:\n      return <LiteLLMProxyModal {...providerProps} />;\n\n    default:\n      return <CustomModal {...sharedProps} />;\n  }\n}\n"
  },
  {
    "path": "web/src/sections/onboarding/reducer.ts",
    "content": "import {\n  OnboardingState,\n  OnboardingAction,\n  OnboardingActionType,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport { STEP_NAVIGATION, STEP_CONFIG, TOTAL_STEPS } from \"./constants\";\n\nexport const initialState: OnboardingState = {\n  currentStep: OnboardingStep.Welcome,\n  stepIndex: 0,\n  totalSteps: TOTAL_STEPS,\n  data: {},\n  isButtonActive: true,\n  isLoading: false,\n};\n\nexport function onboardingReducer(\n  state: OnboardingState,\n  action: OnboardingAction\n): OnboardingState {\n  switch (action.type) {\n    case OnboardingActionType.NEXT_STEP: {\n      const nextStep = STEP_NAVIGATION[state.currentStep].next;\n      if (!nextStep) return state;\n      return {\n        ...state,\n        currentStep: nextStep,\n        stepIndex: STEP_CONFIG[nextStep].index,\n        isButtonActive:\n          nextStep === OnboardingStep.Complete ? true : state.isButtonActive,\n        error: undefined,\n      };\n    }\n\n    case OnboardingActionType.PREV_STEP: {\n      const prevStep = STEP_NAVIGATION[state.currentStep].prev;\n      if (!prevStep) return state;\n\n      return {\n        ...state,\n        currentStep: prevStep,\n        stepIndex: STEP_CONFIG[prevStep].index,\n        error: undefined,\n      };\n    }\n\n    case OnboardingActionType.GO_TO_STEP:\n      return {\n        ...state,\n        currentStep: action.step,\n        stepIndex: STEP_CONFIG[action.step].index,\n        isButtonActive:\n          action.step === OnboardingStep.Complete ? true : state.isButtonActive,\n        error: undefined,\n      };\n\n    case OnboardingActionType.UPDATE_DATA:\n      return {\n        ...state,\n        data: { ...state.data, ...action.payload },\n      };\n\n    case OnboardingActionType.SET_BUTTON_ACTIVE:\n      return {\n        ...state,\n        isButtonActive: action.isButtonActive,\n      };\n\n    case OnboardingActionType.SET_LOADING:\n      return {\n        ...state,\n        isLoading: action.isLoading,\n      };\n\n    case OnboardingActionType.SET_ERROR:\n      return {\n        ...state,\n        error: action.error,\n      };\n\n    case OnboardingActionType.RESET:\n      return initialState;\n\n    default:\n      return state;\n  }\n}\n"
  },
  {
    "path": "web/src/sections/onboarding/steps/FinalStep.tsx",
    "content": "import React from \"react\";\nimport Link from \"next/link\";\nimport type { Route } from \"next\";\nimport { Button } from \"@opal/components\";\nimport { FINAL_SETUP_CONFIG } from \"@/sections/onboarding/constants\";\nimport { FinalStepItemProps } from \"@/interfaces/onboarding\";\nimport { SvgExternalLink } from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Card } from \"@/refresh-components/cards\";\n\nconst FinalStepItem = React.memo(\n  ({\n    title,\n    description,\n    icon: Icon,\n    buttonText,\n    buttonHref,\n  }: FinalStepItemProps) => {\n    const isExternalLink = buttonHref.startsWith(\"http\");\n    const linkProps = isExternalLink\n      ? { target: \"_blank\", rel: \"noopener noreferrer\" }\n      : {};\n\n    return (\n      <Card padding={0.25} variant=\"secondary\">\n        <ContentAction\n          icon={Icon}\n          title={title}\n          description={description}\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n          paddingVariant=\"sm\"\n          rightChildren={\n            <Link href={buttonHref as Route} {...linkProps}>\n              <Button prominence=\"tertiary\" rightIcon={SvgExternalLink}>\n                {buttonText}\n              </Button>\n            </Link>\n          }\n        />\n      </Card>\n    );\n  }\n);\nFinalStepItem.displayName = \"FinalStepItem\";\n\nexport default function FinalStep() {\n  return (\n    <Section gap={0.5}>\n      {FINAL_SETUP_CONFIG.map((item) => (\n        <FinalStepItem key={item.title} {...item} />\n      ))}\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/onboarding/steps/LLMStep.tsx",
    "content": "\"use client\";\n\nimport { memo, useState, useCallback } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { Button } from \"@opal/components\";\nimport Separator from \"@/refresh-components/Separator\";\nimport LLMProviderCard from \"../components/LLMProviderCard\";\nimport {\n  OnboardingActions,\n  OnboardingState,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport { WellKnownLLMProviderDescriptor } from \"@/interfaces/llm\";\nimport {\n  getOnboardingForm,\n  getProviderDisplayInfo,\n} from \"../forms/getOnboardingForm\";\nimport { Disabled } from \"@opal/core\";\nimport { ProviderIcon } from \"@/app/admin/configuration/llm/ProviderIcon\";\nimport { SvgCheckCircle, SvgCpu, SvgExternalLink } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\n\ntype LLMStepProps = {\n  state: OnboardingState;\n  actions: OnboardingActions;\n  llmDescriptors: WellKnownLLMProviderDescriptor[];\n  disabled?: boolean;\n};\n\ninterface SelectedProvider {\n  llmDescriptor?: WellKnownLLMProviderDescriptor;\n  isCustomProvider: boolean;\n}\n\nconst LLMProviderSkeleton = () => {\n  return (\n    <div className=\"flex justify-between h-full w-full p-1 rounded-12 border border-border-01 bg-background-neutral-01 animate-pulse\">\n      <div className=\"flex gap-1 p-1 flex-1 min-w-0\">\n        <div className=\"h-full p-0.5\">\n          <div className=\"w-4 h-4 rounded-full bg-neutral-200\" />\n        </div>\n        <div className=\"min-w-0 flex-1\">\n          <div className=\"h-3 w-1/2 bg-neutral-200 rounded\" />\n          <div className=\"mt-2 h-2 w-3/4 bg-neutral-200 rounded\" />\n        </div>\n      </div>\n      <div className=\"h-6 w-16 bg-neutral-200 rounded\" />\n    </div>\n  );\n};\n\ntype StackedProviderIconsProps = {\n  providers: string[];\n};\n\nconst StackedProviderIcons = ({ providers }: StackedProviderIconsProps) => {\n  if (!providers || providers.length === 0) {\n    return null;\n  }\n\n  return (\n    <div className=\"flex items-center\">\n      {providers.slice(0, 3).map((provider, index) => (\n        <div\n          key={provider}\n          className=\"relative flex items-center justify-center w-6 h-6 rounded-04 bg-background-neutral-01 border border-border-01\"\n          style={{\n            marginLeft: index > 0 ? \"-8px\" : \"0\",\n            zIndex: providers.length - index,\n          }}\n        >\n          <ProviderIcon provider={provider} size={16} />\n        </div>\n      ))}\n      {providers.length > 3 && (\n        <div\n          className=\"relative flex items-center justify-center w-6 h-6 rounded-04 bg-background-neutral-01 border border-border-01\"\n          style={{\n            marginLeft: \"-8px\",\n            zIndex: 0,\n          }}\n        >\n          <Text as=\"p\" text03 secondaryBody>\n            +{providers.length - 3}\n          </Text>\n        </div>\n      )}\n    </div>\n  );\n};\n\nconst LLMStepInner = ({\n  state: onboardingState,\n  actions: onboardingActions,\n  llmDescriptors,\n  disabled,\n}: LLMStepProps) => {\n  const isLoading = !llmDescriptors || llmDescriptors.length === 0;\n\n  const [selectedProvider, setSelectedProvider] =\n    useState<SelectedProvider | null>(null);\n  const [isModalOpen, setIsModalOpen] = useState(false);\n\n  const handleProviderClick = useCallback(\n    (\n      llmDescriptor?: WellKnownLLMProviderDescriptor,\n      isCustomProvider: boolean = false\n    ) => {\n      setSelectedProvider({ llmDescriptor, isCustomProvider });\n      setIsModalOpen(true);\n    },\n    []\n  );\n\n  const handleModalClose = useCallback((open: boolean) => {\n    setIsModalOpen(open);\n    if (!open) {\n      setSelectedProvider(null);\n    }\n  }, []);\n\n  if (\n    onboardingState.currentStep === OnboardingStep.LlmSetup ||\n    onboardingState.currentStep === OnboardingStep.Name\n  ) {\n    return (\n      <Disabled disabled={disabled} allowClick>\n        <div\n          className=\"flex flex-col items-center justify-between w-full p-1 rounded-16 border border-border-01 bg-background-tint-00\"\n          aria-label=\"onboarding-llm-step\"\n        >\n          <ContentAction\n            icon={SvgCpu}\n            title=\"Connect your LLM models\"\n            description=\"Onyx supports both self-hosted models and popular providers.\"\n            sizePreset=\"main-ui\"\n            variant=\"section\"\n            paddingVariant=\"lg\"\n            rightChildren={\n              <Button\n                disabled={disabled}\n                prominence=\"tertiary\"\n                rightIcon={SvgExternalLink}\n                href=\"/admin/configuration/llm\"\n              >\n                View in Admin Panel\n              </Button>\n            }\n          />\n          <Separator />\n          <div className=\"flex flex-wrap gap-1 [&>*:last-child:nth-child(odd)]:basis-full\">\n            {isLoading ? (\n              Array.from({ length: 8 }).map((_, idx) => (\n                <div\n                  key={idx}\n                  className=\"basis-[calc(50%-theme(spacing.1)/2)] grow\"\n                >\n                  <LLMProviderSkeleton />\n                </div>\n              ))\n            ) : (\n              <>\n                {/* Render the selected provider form */}\n                {selectedProvider &&\n                  getOnboardingForm({\n                    llmDescriptor: selectedProvider.llmDescriptor,\n                    isCustomProvider: selectedProvider.isCustomProvider,\n                    onboardingState,\n                    onboardingActions,\n                    open: isModalOpen,\n                    onOpenChange: handleModalClose,\n                  })}\n\n                {/* Render provider cards */}\n                {llmDescriptors.map((llmDescriptor) => {\n                  const displayInfo = getProviderDisplayInfo(\n                    llmDescriptor.name\n                  );\n                  return (\n                    <div\n                      key={llmDescriptor.name}\n                      className=\"basis-[calc(50%-theme(spacing.1)/2)] grow\"\n                    >\n                      <LLMProviderCard\n                        title={displayInfo.title}\n                        subtitle={displayInfo.displayName}\n                        providerName={llmDescriptor.name}\n                        disabled={disabled}\n                        isConnected={onboardingState.data.llmProviders?.some(\n                          (provider) => provider === llmDescriptor.name\n                        )}\n                        onClick={() =>\n                          handleProviderClick(llmDescriptor, false)\n                        }\n                      />\n                    </div>\n                  );\n                })}\n\n                {/* Custom provider card */}\n                <div className=\"basis-[calc(50%-theme(spacing.1)/2)] grow\">\n                  <LLMProviderCard\n                    title=\"Custom LLM Provider\"\n                    subtitle=\"LiteLLM Compatible APIs\"\n                    disabled={disabled}\n                    isConnected={onboardingState.data.llmProviders?.some(\n                      (provider) => provider === \"custom\"\n                    )}\n                    onClick={() => handleProviderClick(undefined, true)}\n                  />\n                </div>\n              </>\n            )}\n          </div>\n        </div>\n      </Disabled>\n    );\n  } else {\n    return (\n      <button\n        type=\"button\"\n        className=\"flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01 opacity-50\"\n        onClick={() => {\n          onboardingActions.setButtonActive(true);\n          onboardingActions.goToStep(OnboardingStep.LlmSetup);\n        }}\n        aria-label=\"Edit LLM providers\"\n      >\n        <div className=\"flex items-center gap-1\">\n          <StackedProviderIcons\n            providers={onboardingState.data.llmProviders || []}\n          />\n          <Text as=\"p\" text04 mainUiAction>\n            {onboardingState.data.llmProviders?.length || 0}{\" \"}\n            {(onboardingState.data.llmProviders?.length || 0) === 1\n              ? \"model\"\n              : \"models\"}{\" \"}\n            connected\n          </Text>\n        </div>\n        <div className=\"p-1\">\n          <SvgCheckCircle className=\"w-4 h-4 stroke-status-success-05\" />\n        </div>\n      </button>\n    );\n  }\n};\n\nconst LLMStep = memo(LLMStepInner);\nexport default LLMStep;\n"
  },
  {
    "path": "web/src/sections/onboarding/steps/NameStep.tsx",
    "content": "\"use client\";\n\nimport React, { useRef } from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport {\n  OnboardingState,\n  OnboardingActions,\n  OnboardingStep,\n} from \"@/interfaces/onboarding\";\nimport InputAvatar from \"@/refresh-components/inputs/InputAvatar\";\nimport { cn } from \"@/lib/utils\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { SvgCheckCircle, SvgEdit, SvgUser } from \"@opal/icons\";\nimport { ContentAction } from \"@opal/layouts\";\nimport { Hoverable } from \"@opal/core\";\n\nexport interface NameStepProps {\n  state: OnboardingState;\n  actions: OnboardingActions;\n}\n\nconst NameStep = React.memo(\n  ({ state: onboardingState, actions: onboardingActions }: NameStepProps) => {\n    const { userName } = onboardingState.data;\n    const { updateName, goToStep, setButtonActive, nextStep } =\n      onboardingActions;\n\n    const isActive = onboardingState.currentStep === OnboardingStep.Name;\n    const containerClasses = cn(\n      \"flex items-center justify-between w-full p-3 bg-background-tint-00 rounded-16 border border-border-01\"\n    );\n\n    const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {\n      if (e.key === \"Enter\" && userName && userName.trim().length > 0) {\n        e.preventDefault();\n        nextStep();\n      }\n    };\n\n    const inputRef = useRef<HTMLInputElement>(null);\n    return isActive ? (\n      <div\n        className={containerClasses}\n        onClick={() => inputRef.current?.focus()}\n        role=\"group\"\n        aria-label=\"onboarding-name-step\"\n      >\n        <ContentAction\n          icon={SvgUser}\n          title=\"What should Onyx call you?\"\n          description=\"We will display this name in the app.\"\n          sizePreset=\"main-ui\"\n          variant=\"section\"\n          paddingVariant=\"fit\"\n          rightChildren={\n            <InputTypeIn\n              ref={inputRef}\n              placeholder=\"Your name\"\n              value={userName || \"\"}\n              onChange={(e) => updateName(e.target.value)}\n              onKeyDown={handleKeyDown}\n              className=\"max-w-60\"\n            />\n          }\n        />\n      </div>\n    ) : (\n      <Hoverable.Root group=\"nameStep\" widthVariant=\"full\">\n        <div\n          className={containerClasses}\n          onClick={() => {\n            setButtonActive(true);\n            goToStep(OnboardingStep.Name);\n          }}\n          aria-label=\"Edit display name\"\n          role=\"button\"\n          tabIndex={0}\n        >\n          <div\n            className={cn(\"flex items-center gap-1\", !isActive && \"opacity-50\")}\n          >\n            <InputAvatar\n              className={cn(\n                \"flex items-center justify-center bg-background-neutral-inverted-00\",\n                \"w-5 h-5\"\n              )}\n            >\n              <Text as=\"p\" inverted secondaryBody>\n                {userName?.[0]?.toUpperCase()}\n              </Text>\n            </InputAvatar>\n            <Text as=\"p\" text04 mainUiAction>\n              {userName}\n            </Text>\n          </div>\n          <div className=\"p-1 flex items-center gap-1\">\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <Hoverable.Item group=\"nameStep\" variant=\"opacity-on-hover\">\n              <IconButton internal icon={SvgEdit} tooltip=\"Edit\" />\n            </Hoverable.Item>\n            <SvgCheckCircle\n              className={cn(\n                \"w-4 h-4 stroke-status-success-05\",\n                !isActive && \"opacity-50\"\n              )}\n            />\n          </div>\n        </div>\n      </Hoverable.Root>\n    );\n  }\n);\nNameStep.displayName = \"NameStep\";\n\nexport default NameStep;\n"
  },
  {
    "path": "web/src/sections/settings/Memories.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport FileTile from \"@/refresh-components/tiles/FileTile\";\nimport ButtonTile from \"@/refresh-components/tiles/ButtonTile\";\nimport { SvgAddLines, SvgFilter, SvgMenu, SvgPlusCircle } from \"@opal/icons\";\nimport MemoriesModal from \"@/refresh-components/modals/MemoriesModal\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { Button } from \"@opal/components\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { MemoryItem } from \"@/lib/types\";\n\ninterface MemoriesProps {\n  memories: MemoryItem[];\n  onSaveMemories: (memories: MemoryItem[]) => Promise<boolean>;\n}\n\nexport default function Memories({ memories, onSaveMemories }: MemoriesProps) {\n  const memoriesModal = useCreateModal();\n  const [targetMemoryId, setTargetMemoryId] = useState<number | null>(null);\n\n  return (\n    <>\n      {memories.length === 0 ? (\n        <LineItem\n          skeleton\n          description=\"Add personal note or memory that Onyx should remember.\"\n          onClick={() => {\n            setTargetMemoryId(null);\n            memoriesModal.toggle(true);\n          }}\n          rightChildren={\n            <Button\n              prominence=\"internal\"\n              icon={SvgPlusCircle}\n              onClick={() => {\n                setTargetMemoryId(null);\n                memoriesModal.toggle(true);\n              }}\n            />\n          }\n        />\n      ) : (\n        <div className=\"self-stretch flex flex-row items-center justify-between gap-2\">\n          <div className=\"flex flex-row items-center gap-2\">\n            {memories.slice(0, 2).map((memory, index) => (\n              <FileTile\n                key={memory.id ?? index}\n                description={memory.content}\n                onOpen={() => {\n                  setTargetMemoryId(memory.id);\n                  memoriesModal.toggle(true);\n                }}\n              />\n            ))}\n          </div>\n          <ButtonTile\n            title=\"View/Add\"\n            description=\"All Memories\"\n            icon={SvgAddLines}\n            onClick={() => {\n              setTargetMemoryId(null);\n              memoriesModal.toggle(true);\n            }}\n          />\n        </div>\n      )}\n\n      <memoriesModal.Provider>\n        <MemoriesModal\n          memories={memories}\n          onSaveMemories={onSaveMemories}\n          initialTargetMemoryId={targetMemoryId}\n          focusNewLine={targetMemoryId === null}\n        />\n      </memoriesModal.Provider>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/AdminSidebar.tsx",
    "content": "\"use client\";\n\nimport { useCallback } from \"react\";\nimport { usePathname } from \"next/navigation\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport SidebarSection from \"@/sections/sidebar/SidebarSection\";\nimport SidebarWrapper from \"@/sections/sidebar/SidebarWrapper\";\nimport { useIsKGExposed } from \"@/app/admin/kg/utils\";\nimport { useCustomAnalyticsEnabled } from \"@/lib/hooks/useCustomAnalyticsEnabled\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { UserRole } from \"@/lib/types\";\nimport { usePaidEnterpriseFeaturesEnabled } from \"@/components/settings/usePaidEnterpriseFeaturesEnabled\";\nimport { CombinedSettings } from \"@/interfaces/settings\";\nimport { SidebarTab } from \"@opal/components\";\nimport SidebarBody from \"@/sections/sidebar/SidebarBody\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { Disabled } from \"@opal/core\";\nimport { SvgArrowUpCircle, SvgUserManage, SvgX } from \"@opal/icons\";\nimport {\n  useBillingInformation,\n  useLicense,\n  hasActiveSubscription,\n} from \"@/lib/billing\";\nimport { Content } from \"@opal/layouts\";\nimport { ADMIN_ROUTES, sidebarItem } from \"@/lib/admin-routes\";\nimport useFilter from \"@/hooks/useFilter\";\nimport { IconFunctionComponent } from \"@opal/types\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { getUserDisplayName } from \"@/lib/user\";\nimport { APP_SLOGAN } from \"@/lib/constants\";\n\nconst SECTIONS = {\n  UNLABELED: \"\",\n  AGENTS_AND_ACTIONS: \"Agents & Actions\",\n  DOCUMENTS_AND_KNOWLEDGE: \"Documents & Knowledge\",\n  INTEGRATIONS: \"Integrations\",\n  PERMISSIONS: \"Permissions\",\n  ORGANIZATION: \"Organization\",\n  USAGE: \"Usage\",\n} as const;\n\ninterface SidebarItemEntry {\n  section: string;\n  name: string;\n  icon: IconFunctionComponent;\n  link: string;\n  error?: boolean;\n  disabled?: boolean;\n}\n\nfunction buildItems(\n  isCurator: boolean,\n  enableCloud: boolean,\n  enableEnterprise: boolean,\n  settings: CombinedSettings | null,\n  kgExposed: boolean,\n  customAnalyticsEnabled: boolean,\n  hasSubscription: boolean,\n  hooksEnabled: boolean\n): SidebarItemEntry[] {\n  const vectorDbEnabled = settings?.settings.vector_db_enabled !== false;\n  const items: SidebarItemEntry[] = [];\n\n  const add = (section: string, route: Parameters<typeof sidebarItem>[0]) => {\n    items.push({ ...sidebarItem(route), section });\n  };\n\n  const addDisabled = (\n    section: string,\n    route: Parameters<typeof sidebarItem>[0],\n    isDisabled: boolean\n  ) => {\n    items.push({ ...sidebarItem(route), section, disabled: isDisabled });\n  };\n\n  // 1. No header — core configuration (admin only)\n  if (!isCurator) {\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.LLM_MODELS);\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.WEB_SEARCH);\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.IMAGE_GENERATION);\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.VOICE);\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.CODE_INTERPRETER);\n    add(SECTIONS.UNLABELED, ADMIN_ROUTES.CHAT_PREFERENCES);\n\n    if (vectorDbEnabled && kgExposed) {\n      add(SECTIONS.UNLABELED, ADMIN_ROUTES.KNOWLEDGE_GRAPH);\n    }\n\n    if (!enableCloud && customAnalyticsEnabled) {\n      addDisabled(\n        SECTIONS.UNLABELED,\n        ADMIN_ROUTES.CUSTOM_ANALYTICS,\n        !enableEnterprise\n      );\n    }\n  }\n\n  // 2. Agents & Actions\n  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.AGENTS);\n  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.MCP_ACTIONS);\n  add(SECTIONS.AGENTS_AND_ACTIONS, ADMIN_ROUTES.OPENAPI_ACTIONS);\n\n  // 3. Documents & Knowledge\n  if (vectorDbEnabled) {\n    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.INDEXING_STATUS);\n    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.ADD_CONNECTOR);\n    add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.DOCUMENT_SETS);\n    if (!isCurator && !enableCloud) {\n      items.push({\n        ...sidebarItem(ADMIN_ROUTES.INDEX_SETTINGS),\n        section: SECTIONS.DOCUMENTS_AND_KNOWLEDGE,\n        error: settings?.settings.needs_reindexing,\n      });\n    }\n    if (!isCurator && settings?.settings.opensearch_indexing_enabled) {\n      add(SECTIONS.DOCUMENTS_AND_KNOWLEDGE, ADMIN_ROUTES.INDEX_MIGRATION);\n    }\n  }\n\n  // 4. Integrations (admin only)\n  if (!isCurator) {\n    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.API_KEYS);\n    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.SLACK_BOTS);\n    add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.DISCORD_BOTS);\n    if (hooksEnabled) {\n      add(SECTIONS.INTEGRATIONS, ADMIN_ROUTES.HOOKS);\n    }\n  }\n\n  // 5. Permissions\n  if (!isCurator) {\n    add(SECTIONS.PERMISSIONS, ADMIN_ROUTES.USERS);\n    addDisabled(SECTIONS.PERMISSIONS, ADMIN_ROUTES.GROUPS, !enableEnterprise);\n    addDisabled(SECTIONS.PERMISSIONS, ADMIN_ROUTES.SCIM, !enableEnterprise);\n  } else if (enableEnterprise) {\n    add(SECTIONS.PERMISSIONS, ADMIN_ROUTES.GROUPS);\n  }\n\n  // 6. Organization (admin only)\n  if (!isCurator) {\n    if (hasSubscription) {\n      add(SECTIONS.ORGANIZATION, ADMIN_ROUTES.BILLING);\n    } else {\n      items.push({\n        section: SECTIONS.ORGANIZATION,\n        name: \"Upgrade Plan\",\n        icon: SvgArrowUpCircle,\n        link: ADMIN_ROUTES.BILLING.path,\n      });\n    }\n    add(SECTIONS.ORGANIZATION, ADMIN_ROUTES.TOKEN_RATE_LIMITS);\n    addDisabled(SECTIONS.ORGANIZATION, ADMIN_ROUTES.THEME, !enableEnterprise);\n  }\n\n  // 7. Usage (admin only)\n  if (!isCurator) {\n    addDisabled(SECTIONS.USAGE, ADMIN_ROUTES.USAGE, !enableEnterprise);\n    if (settings?.settings.query_history_type !== \"disabled\") {\n      addDisabled(\n        SECTIONS.USAGE,\n        ADMIN_ROUTES.QUERY_HISTORY,\n        !enableEnterprise\n      );\n    }\n  }\n\n  return items;\n}\n\n/** Preserve section ordering while grouping consecutive items by section. */\nfunction groupBySection(items: SidebarItemEntry[]) {\n  const groups: { section: string; items: SidebarItemEntry[] }[] = [];\n  for (const item of items) {\n    const last = groups[groups.length - 1];\n    if (last && last.section === item.section) {\n      last.items.push(item);\n    } else {\n      groups.push({ section: item.section, items: [item] });\n    }\n  }\n  return groups;\n}\n\ninterface AdminSidebarProps {\n  enableCloudSS: boolean;\n}\n\nexport default function AdminSidebar({ enableCloudSS }: AdminSidebarProps) {\n  const { kgExposed } = useIsKGExposed();\n  const pathname = usePathname();\n  const { customAnalyticsEnabled } = useCustomAnalyticsEnabled();\n  const { user } = useUser();\n  const settings = useSettingsContext();\n  const enableEnterprise = usePaidEnterpriseFeaturesEnabled();\n  const { data: billingData, isLoading: billingLoading } =\n    useBillingInformation();\n  const { data: licenseData, isLoading: licenseLoading } = useLicense();\n  const isCurator =\n    user?.role === UserRole.CURATOR || user?.role === UserRole.GLOBAL_CURATOR;\n  // Default to true while loading to avoid flashing \"Upgrade Plan\"\n  const hasSubscriptionOrLicense =\n    billingLoading || licenseLoading\n      ? true\n      : Boolean(\n          (billingData && hasActiveSubscription(billingData)) ||\n            licenseData?.has_license\n        );\n  const hooksEnabled =\n    enableEnterprise && (settings?.settings.hooks_enabled ?? false);\n\n  const allItems = buildItems(\n    isCurator,\n    enableCloudSS,\n    enableEnterprise,\n    settings,\n    kgExposed,\n    customAnalyticsEnabled,\n    hasSubscriptionOrLicense,\n    hooksEnabled\n  );\n\n  const itemExtractor = useCallback((item: SidebarItemEntry) => item.name, []);\n\n  const { query, setQuery, filtered } = useFilter(allItems, itemExtractor);\n\n  const groups = groupBySection(filtered);\n\n  return (\n    <SidebarWrapper>\n      <SidebarBody\n        scrollKey=\"admin-sidebar\"\n        pinnedContent={\n          <div className=\"flex flex-col w-full\">\n            <SidebarTab\n              icon={({ className }) => <SvgX className={className} size={16} />}\n              href=\"/app\"\n              variant=\"sidebar-light\"\n            >\n              Exit Admin Panel\n            </SidebarTab>\n            <InputTypeIn\n              variant=\"internal\"\n              leftSearchIcon\n              placeholder=\"Search...\"\n              value={query}\n              onChange={(e) => setQuery(e.target.value)}\n            />\n          </div>\n        }\n        footer={\n          <Section gap={0} height=\"fit\" alignItems=\"start\">\n            <div className=\"p-[0.38rem] w-full\">\n              <Content\n                icon={SvgUserManage}\n                title={getUserDisplayName(user)}\n                sizePreset=\"main-ui\"\n                variant=\"body\"\n                prominence=\"muted\"\n                widthVariant=\"full\"\n              />\n            </div>\n            <div className=\"flex flex-row gap-1 p-[0.38rem] w-full\">\n              <Text text03 secondaryAction>\n                <a\n                  className=\"underline\"\n                  href=\"https://onyx.app\"\n                  target=\"_blank\"\n                >\n                  Onyx\n                </a>\n              </Text>\n              <Text text03 secondaryBody>\n                |\n              </Text>\n              {settings.webVersion ? (\n                <Text text03 secondaryBody>\n                  {settings.webVersion}\n                </Text>\n              ) : (\n                <Text text03 secondaryBody>\n                  {APP_SLOGAN}\n                </Text>\n              )}\n            </div>\n          </Section>\n        }\n      >\n        {groups.map((group, groupIndex) => {\n          const tabs = group.items.map(({ link, icon, name, disabled }) => (\n            <Disabled key={link} disabled={disabled}>\n              {/*\n                # NOTE (@raunakab)\n                We intentionally add a `div` intermediary here.\n                Without it, the disabled styling that is default provided by the `Disabled` component (which we want here) would be overridden by the custom disabled styling provided by the `SidebarTab`.\n                Therefore, in order to avoid that overriding, we add a layer of indirection.\n              */}\n              <div>\n                <SidebarTab\n                  disabled={disabled}\n                  icon={icon}\n                  href={disabled ? undefined : link}\n                  selected={pathname.startsWith(link)}\n                >\n                  {name}\n                </SidebarTab>\n              </div>\n            </Disabled>\n          ));\n\n          if (!group.section) {\n            return <div key={groupIndex}>{tabs}</div>;\n          }\n\n          return (\n            <SidebarSection key={groupIndex} title={group.section}>\n              {tabs}\n            </SidebarSection>\n          );\n        })}\n      </SidebarBody>\n    </SidebarWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/AgentButton.tsx",
    "content": "\"use client\";\n\nimport React, { memo } from \"react\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport { usePinnedAgents, useCurrentAgent } from \"@/hooks/useAgents\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport { SidebarTab } from \"@opal/components\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { useSortable } from \"@dnd-kit/sortable\";\nimport { CSS } from \"@dnd-kit/utilities\";\nimport useOnMount from \"@/hooks/useOnMount\";\nimport AgentAvatar from \"@/refresh-components/avatars/AgentAvatar\";\nimport { SvgPin, SvgX } from \"@opal/icons\";\n\ninterface SortableItemProps {\n  id: number;\n  children?: React.ReactNode;\n}\n\nfunction SortableItem({ id, children }: SortableItemProps) {\n  const isMounted = useOnMount();\n  const { attributes, listeners, setNodeRef, transform, isDragging } =\n    useSortable({ id });\n\n  if (!isMounted) {\n    return <div className=\"flex items-center group\">{children}</div>;\n  }\n\n  return (\n    <div\n      ref={setNodeRef}\n      style={{\n        transform: CSS.Transform.toString(transform),\n        ...(isDragging && { zIndex: 1000, position: \"relative\" as const }),\n      }}\n      {...attributes}\n      {...listeners}\n      className=\"flex items-center group\"\n    >\n      {children}\n    </div>\n  );\n}\n\nexport interface AgentButtonProps {\n  agent: MinimalPersonaSnapshot;\n}\n\nconst AgentButton = memo(({ agent }: AgentButtonProps) => {\n  const currentAgent = useCurrentAgent();\n  const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();\n  const isActuallyPinned = pinnedAgents.some((a) => a.id === agent.id);\n  const isCurrentAgent = currentAgent?.id === agent.id;\n\n  const handleClick = async () => {\n    if (!isActuallyPinned) {\n      await togglePinnedAgent(agent, true);\n    }\n  };\n\n  return (\n    <SortableItem id={agent.id}>\n      <div className=\"flex flex-col w-full h-full\">\n        <SidebarTab\n          key={agent.id}\n          icon={() => <AgentAvatar agent={agent} />}\n          href={`/app?agentId=${agent.id}`}\n          onClick={handleClick}\n          selected={isCurrentAgent}\n          rightChildren={\n            // Hide unpin button for current agent since auto-pin would immediately re-pin\n            isCurrentAgent ? null : (\n              // TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved\n              <IconButton\n                icon={\n                  SvgX /* We only show the unpin button for pinned agents */\n                }\n                internal\n                onClick={noProp(() => togglePinnedAgent(agent, false))}\n                className={cn(\"hidden group-hover/SidebarTab:flex\")}\n                tooltip={\"Unpin Agent\"}\n              />\n            )\n          }\n        >\n          {agent.name}\n        </SidebarTab>\n      </div>\n    </SortableItem>\n  );\n});\nAgentButton.displayName = \"AgentButton\";\n\nexport default AgentButton;\n"
  },
  {
    "path": "web/src/sections/sidebar/AppSidebar.tsx",
    "content": "\"use client\";\n\nimport { useCallback, memo, useMemo, useState, useEffect, useRef } from \"react\";\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { useRouter } from \"next/navigation\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { MinimalPersonaSnapshot } from \"@/app/admin/agents/interfaces\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport ChatButton from \"@/sections/sidebar/ChatButton\";\nimport AgentButton from \"@/sections/sidebar/AgentButton\";\nimport { DragEndEvent } from \"@dnd-kit/core\";\nimport {\n  DndContext,\n  closestCenter,\n  KeyboardSensor,\n  PointerSensor,\n  useSensor,\n  useSensors,\n  pointerWithin,\n} from \"@dnd-kit/core\";\nimport {\n  arrayMove,\n  SortableContext,\n  sortableKeyboardCoordinates,\n  verticalListSortingStrategy,\n} from \"@dnd-kit/sortable\";\nimport { useDroppable } from \"@dnd-kit/core\";\nimport {\n  restrictToFirstScrollableAncestor,\n  restrictToVerticalAxis,\n} from \"@dnd-kit/modifiers\";\nimport SidebarSection from \"@/sections/sidebar/SidebarSection\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { useProjects } from \"@/lib/hooks/useProjects\";\nimport { useAgents, useCurrentAgent, usePinnedAgents } from \"@/hooks/useAgents\";\nimport { useAppSidebarContext } from \"@/providers/AppSidebarProvider\";\nimport ProjectFolderButton from \"@/sections/sidebar/ProjectFolderButton\";\nimport CreateProjectModal from \"@/components/modals/CreateProjectModal\";\nimport MoveCustomAgentChatModal from \"@/components/modals/MoveCustomAgentChatModal\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { removeChatSessionFromProject } from \"@/app/app/projects/projectsService\";\nimport type { Project } from \"@/app/app/projects/projectsService\";\nimport SidebarWrapper from \"@/sections/sidebar/SidebarWrapper\";\nimport { Button as OpalButton } from \"@opal/components\";\nimport { cn } from \"@/lib/utils\";\nimport {\n  DRAG_TYPES,\n  DEFAULT_PERSONA_ID,\n  FEATURE_FLAGS,\n  LOCAL_STORAGE_KEYS,\n} from \"@/sections/sidebar/constants\";\nimport { showErrorNotification, handleMoveOperation } from \"./sidebarUtils\";\nimport { SidebarTab } from \"@opal/components\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport SidebarBody from \"@/sections/sidebar/SidebarBody\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport { useModalContext } from \"@/components/context/ModalContext\";\nimport useScreenSize from \"@/hooks/useScreenSize\";\nimport {\n  SvgDevKit,\n  SvgEditBig,\n  SvgFolderPlus,\n  SvgMoreHorizontal,\n  SvgOnyxOctagon,\n  SvgSearchMenu,\n  SvgSettings,\n} from \"@opal/icons\";\nimport SidebarTabSkeleton from \"@/refresh-components/skeletons/SidebarTabSkeleton\";\nimport BuildModeIntroBackground from \"@/app/craft/components/IntroBackground\";\nimport BuildModeIntroContent from \"@/app/craft/components/IntroContent\";\nimport { CRAFT_PATH } from \"@/app/craft/v1/constants\";\nimport { usePostHog } from \"posthog-js/react\";\nimport { track, AnalyticsEvent } from \"@/lib/analytics\";\nimport { motion, AnimatePresence } from \"motion/react\";\nimport { Notification, NotificationType } from \"@/interfaces/settings\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport UserAvatarPopover from \"@/sections/sidebar/UserAvatarPopover\";\nimport ChatSearchCommandMenu from \"@/sections/sidebar/ChatSearchCommandMenu\";\nimport { useQueryController } from \"@/providers/QueryControllerProvider\";\n\n// Visible-agents = pinned-agents + current-agent (if current-agent not in pinned-agents)\n// OR Visible-agents = pinned-agents (if current-agent in pinned-agents)\nfunction buildVisibleAgents(\n  pinnedAgents: MinimalPersonaSnapshot[],\n  currentAgent: MinimalPersonaSnapshot | null\n): [MinimalPersonaSnapshot[], boolean] {\n  /* NOTE: The unified agent (id = 0) is not visible in the sidebar,\n  so we filter it out. */\n  if (!currentAgent)\n    return [pinnedAgents.filter((agent) => agent.id !== 0), false];\n  const currentAgentIsPinned = pinnedAgents.some(\n    (pinnedAgent) => pinnedAgent.id === currentAgent.id\n  );\n  const visibleAgents = (\n    currentAgentIsPinned ? pinnedAgents : [...pinnedAgents, currentAgent]\n  ).filter((agent) => agent.id !== 0);\n\n  return [visibleAgents, currentAgentIsPinned];\n}\n\nconst SKELETON_WIDTHS_BASE = [\"w-4/5\", \"w-4/5\", \"w-3/5\"];\n\nfunction shuffleWidths(): string[] {\n  return [...SKELETON_WIDTHS_BASE].sort(() => Math.random() - 0.5);\n}\n\ninterface RecentsSectionProps {\n  chatSessions: ChatSession[];\n  hasMore: boolean;\n  isLoadingMore: boolean;\n  onLoadMore: () => void;\n}\n\nfunction RecentsSection({\n  chatSessions,\n  hasMore,\n  isLoadingMore,\n  onLoadMore,\n}: RecentsSectionProps) {\n  const { setNodeRef, isOver } = useDroppable({\n    id: DRAG_TYPES.RECENTS,\n    data: {\n      type: DRAG_TYPES.RECENTS,\n    },\n  });\n\n  // Re-shuffle skeleton widths each time loaded session count changes\n  const skeletonWidths = useMemo(shuffleWidths, [chatSessions.length]);\n\n  // Sentinel ref for IntersectionObserver-based infinite scroll\n  const sentinelRef = useRef<HTMLDivElement | null>(null);\n  const onLoadMoreRef = useRef(onLoadMore);\n  onLoadMoreRef.current = onLoadMore;\n\n  useEffect(() => {\n    if (!hasMore || isLoadingMore) return;\n\n    const sentinel = sentinelRef.current;\n    if (!sentinel) return;\n\n    const observer = new IntersectionObserver(\n      (entries) => {\n        if (entries[0]?.isIntersecting) {\n          onLoadMoreRef.current();\n        }\n      },\n      { threshold: 0 }\n    );\n\n    observer.observe(sentinel);\n    return () => observer.disconnect();\n  }, [hasMore, isLoadingMore]);\n\n  return (\n    <div\n      ref={setNodeRef}\n      className={cn(\n        \"transition-colors duration-200 rounded-08 h-full\",\n        isOver && \"bg-background-tint-03\"\n      )}\n    >\n      <SidebarSection title=\"Recents\">\n        {chatSessions.length === 0 ? (\n          <Text as=\"p\" text01 className=\"px-3\">\n            Try sending a message! Your chat history will appear here.\n          </Text>\n        ) : (\n          <>\n            {chatSessions.map((chatSession) => (\n              <ChatButton\n                key={chatSession.id}\n                chatSession={chatSession}\n                draggable\n              />\n            ))}\n            {hasMore &&\n              skeletonWidths.map((width, i) => (\n                <div\n                  key={i}\n                  ref={i === 0 ? sentinelRef : undefined}\n                  className={cn(\n                    \"transition-opacity duration-300\",\n                    isLoadingMore ? \"opacity-100\" : \"opacity-40\"\n                  )}\n                >\n                  <SidebarTabSkeleton textWidth={width} />\n                </div>\n              ))}\n          </>\n        )}\n      </SidebarSection>\n    </div>\n  );\n}\n\ninterface AppSidebarInnerProps {\n  folded: boolean;\n  onFoldClick: () => void;\n}\n\nconst MemoizedAppSidebarInner = memo(\n  ({ folded, onFoldClick }: AppSidebarInnerProps) => {\n    const router = useRouter();\n    const combinedSettings = useSettingsContext();\n    const posthog = usePostHog();\n    const { newTenantInfo, invitationInfo } = useModalContext();\n    const { setAppMode, reset } = useQueryController();\n\n    // Use SWR hooks for data fetching\n    const {\n      chatSessions,\n      refreshChatSessions,\n      isLoading: isLoadingChatSessions,\n      hasMore,\n      isLoadingMore,\n      loadMore,\n    } = useChatSessions();\n    const {\n      projects,\n      refreshProjects,\n      isLoading: isLoadingProjects,\n    } = useProjects();\n    const { isLoading: isLoadingAgents } = useAgents();\n    const currentAgent = useCurrentAgent();\n    const {\n      pinnedAgents,\n      updatePinnedAgents,\n      isLoading: isLoadingPinnedAgents,\n    } = usePinnedAgents();\n\n    // Wait for ALL dynamic data before showing any sections\n    const isLoadingDynamicContent =\n      isLoadingChatSessions ||\n      isLoadingProjects ||\n      isLoadingAgents ||\n      isLoadingPinnedAgents;\n\n    // Still need some context for stateful operations\n    const { refreshCurrentProjectDetails, currentProjectId } =\n      useProjectsContext();\n\n    // State for custom agent modal\n    const [pendingMoveChatSession, setPendingMoveChatSession] =\n      useState<ChatSession | null>(null);\n    const [pendingMoveProjectId, setPendingMoveProjectId] = useState<\n      number | null\n    >(null);\n    const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =\n      useState(false);\n\n    // Fetch notifications for build mode intro\n    const { data: notifications, mutate: mutateNotifications } = useSWR<\n      Notification[]\n    >(SWR_KEYS.notifications, errorHandlingFetcher);\n\n    // Check if Onyx Craft is enabled via settings (backed by PostHog feature flag)\n    // Only explicit true enables the feature; false or undefined = disabled\n    const isOnyxCraftEnabled =\n      combinedSettings?.settings?.onyx_craft_enabled === true;\n\n    // Find build_mode feature announcement notification (only if Onyx Craft is enabled)\n    const buildModeNotification = isOnyxCraftEnabled\n      ? notifications?.find(\n          (n) =>\n            n.notif_type === NotificationType.FEATURE_ANNOUNCEMENT &&\n            n.additional_data?.feature === \"build_mode\" &&\n            !n.dismissed\n        )\n      : undefined;\n\n    // State for intro animation overlay\n    const [showIntroAnimation, setShowIntroAnimation] = useState(false);\n    // Track if auto-trigger has fired (prevents race condition during dismiss)\n    const hasAutoTriggeredRef = useRef(false);\n\n    // Auto-show intro once when there's an undismissed notification\n    // Don't show if tenant/invitation modal is open (e.g., \"join existing team\" modal)\n    // Gated by PostHog feature flag: if `craft-animation-disabled` is true (or\n    // PostHog is unavailable), skip the auto-show entirely.\n    const isCraftAnimationDisabled =\n      posthog?.isFeatureEnabled(FEATURE_FLAGS.CRAFT_ANIMATION_DISABLED) ?? true;\n    const hasTenantModal = !!(newTenantInfo || invitationInfo);\n    useEffect(() => {\n      if (\n        isOnyxCraftEnabled &&\n        buildModeNotification &&\n        !hasAutoTriggeredRef.current &&\n        !hasTenantModal &&\n        !isCraftAnimationDisabled\n      ) {\n        hasAutoTriggeredRef.current = true;\n        setShowIntroAnimation(true);\n      }\n    }, [\n      buildModeNotification,\n      isOnyxCraftEnabled,\n      hasTenantModal,\n      isCraftAnimationDisabled,\n    ]);\n\n    // Dismiss the build mode notification\n    const dismissBuildModeNotification = useCallback(async () => {\n      if (!buildModeNotification) return;\n      try {\n        await fetch(`/api/notifications/${buildModeNotification.id}/dismiss`, {\n          method: \"POST\",\n        });\n        mutateNotifications();\n      } catch (error) {\n        console.error(\"Error dismissing notification:\", error);\n      }\n    }, [buildModeNotification, mutateNotifications]);\n\n    const [visibleAgents, currentAgentIsPinned] = useMemo(\n      () => buildVisibleAgents(pinnedAgents, currentAgent),\n      [pinnedAgents, currentAgent]\n    );\n    const visibleAgentIds = useMemo(\n      () => visibleAgents.map((agent) => agent.id),\n      [visibleAgents]\n    );\n\n    const sensors = useSensors(\n      useSensor(PointerSensor, {\n        activationConstraint: {\n          distance: 8,\n        },\n      }),\n      useSensor(KeyboardSensor, {\n        coordinateGetter: sortableKeyboardCoordinates,\n      })\n    );\n\n    // Handle agent drag and drop\n    const handleAgentDragEnd = useCallback(\n      (event: DragEndEvent) => {\n        const { active, over } = event;\n        if (!over) return;\n        if (active.id === over.id) return;\n\n        const activeIndex = visibleAgentIds.findIndex(\n          (agentId) => agentId === active.id\n        );\n        const overIndex = visibleAgentIds.findIndex(\n          (agentId) => agentId === over.id\n        );\n\n        let newPinnedAgents: MinimalPersonaSnapshot[];\n\n        if (currentAgent && !currentAgentIsPinned) {\n          // This is the case in which the user is dragging the UNPINNED agent and moving it to somewhere else in the list.\n          // This is an indication that we WANT to pin this agent!\n          if (activeIndex === visibleAgentIds.length - 1) {\n            const pinnedWithCurrent = [...pinnedAgents, currentAgent];\n            newPinnedAgents = arrayMove(\n              pinnedWithCurrent,\n              activeIndex,\n              overIndex\n            );\n          } else {\n            // Use visibleAgents to ensure the indices match with `visibleAgentIds`\n            newPinnedAgents = arrayMove(visibleAgents, activeIndex, overIndex);\n          }\n        } else {\n          // Use visibleAgents to ensure the indices match with `visibleAgentIds`\n          newPinnedAgents = arrayMove(visibleAgents, activeIndex, overIndex);\n        }\n\n        updatePinnedAgents(newPinnedAgents);\n      },\n      [\n        visibleAgentIds,\n        visibleAgents,\n        pinnedAgents,\n        updatePinnedAgents,\n        currentAgent,\n        currentAgentIsPinned,\n      ]\n    );\n\n    // Perform the actual move\n    async function performChatMove(\n      targetProjectId: number,\n      chatSession: ChatSession\n    ) {\n      try {\n        await handleMoveOperation({\n          chatSession,\n          targetProjectId,\n          refreshChatSessions,\n          refreshCurrentProjectDetails,\n          fetchProjects: refreshProjects,\n          currentProjectId,\n        });\n        const projectRefreshPromise = currentProjectId\n          ? refreshCurrentProjectDetails()\n          : refreshProjects();\n        await Promise.all([refreshChatSessions(), projectRefreshPromise]);\n      } catch (error) {\n        console.error(\"Failed to move chat:\", error);\n        throw error;\n      }\n    }\n\n    // Handle chat to project drag and drop\n    const handleChatProjectDragEnd = useCallback(\n      async (event: DragEndEvent) => {\n        const { active, over } = event;\n        if (!over) return;\n\n        const activeData = active.data.current;\n        const overData = over.data.current;\n\n        if (!activeData || !overData) {\n          return;\n        }\n\n        // Check if we're dragging a chat onto a project\n        if (\n          activeData?.type === DRAG_TYPES.CHAT &&\n          overData?.type === DRAG_TYPES.PROJECT\n        ) {\n          const chatSession = activeData.chatSession as ChatSession;\n          const targetProject = overData.project as Project;\n          const sourceProjectId = activeData.projectId;\n\n          // Don't do anything if dropping on the same project\n          if (sourceProjectId === targetProject.id) {\n            return;\n          }\n\n          const hideModal =\n            typeof window !== \"undefined\" &&\n            window.localStorage.getItem(\n              LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL\n            ) === \"true\";\n\n          const isChatUsingDefaultAgent =\n            chatSession.persona_id === DEFAULT_PERSONA_ID;\n\n          if (!isChatUsingDefaultAgent && !hideModal) {\n            setPendingMoveChatSession(chatSession);\n            setPendingMoveProjectId(targetProject.id);\n            setShowMoveCustomAgentModal(true);\n            return;\n          }\n\n          try {\n            await performChatMove(targetProject.id, chatSession);\n          } catch (error) {\n            showErrorNotification(\"Failed to move chat. Please try again.\");\n          }\n        }\n\n        // Check if we're dragging a chat from a project to the Recents section\n        if (\n          activeData?.type === DRAG_TYPES.CHAT &&\n          overData?.type === DRAG_TYPES.RECENTS\n        ) {\n          const chatSession = activeData.chatSession as ChatSession;\n          const sourceProjectId = activeData.projectId;\n\n          // Only remove from project if it was in a project\n          if (sourceProjectId) {\n            try {\n              await removeChatSessionFromProject(chatSession.id);\n              const projectRefreshPromise = currentProjectId\n                ? refreshCurrentProjectDetails()\n                : refreshProjects();\n              await Promise.all([refreshChatSessions(), projectRefreshPromise]);\n            } catch (error) {\n              console.error(\"Failed to remove chat from project:\", error);\n            }\n          }\n        }\n      },\n      [\n        currentProjectId,\n        refreshChatSessions,\n        refreshCurrentProjectDetails,\n        refreshProjects,\n      ]\n    );\n\n    const { isAdmin, isCurator, user } = useUser();\n    const activeSidebarTab = useAppFocus();\n    const createProjectModal = useCreateModal();\n    const defaultAppMode =\n      (user?.preferences?.default_app_mode?.toLowerCase() as\n        | \"chat\"\n        | \"search\") ?? \"chat\";\n    const newSessionButton = useMemo(() => {\n      const href =\n        combinedSettings?.settings?.disable_default_assistant && currentAgent\n          ? `/app?agentId=${currentAgent.id}`\n          : \"/app\";\n      return (\n        <div data-testid=\"AppSidebar/new-session\">\n          <SidebarTab\n            icon={SvgEditBig}\n            folded={folded}\n            href={href}\n            selected={activeSidebarTab.isNewSession()}\n            onClick={() => {\n              if (!activeSidebarTab.isNewSession()) return;\n              setAppMode(defaultAppMode);\n              reset();\n            }}\n          >\n            New Session\n          </SidebarTab>\n        </div>\n      );\n    }, [\n      folded,\n      activeSidebarTab,\n      combinedSettings,\n      currentAgent,\n      defaultAppMode,\n    ]);\n\n    const buildButton = useMemo(\n      () => (\n        <div data-testid=\"AppSidebar/build\">\n          <SidebarTab\n            icon={SvgDevKit}\n            folded={folded}\n            href={CRAFT_PATH}\n            onClick={() => track(AnalyticsEvent.CLICKED_CRAFT_IN_SIDEBAR)}\n          >\n            Craft\n          </SidebarTab>\n        </div>\n      ),\n      [folded, posthog]\n    );\n\n    const searchChatsButton = useMemo(\n      () => (\n        <ChatSearchCommandMenu\n          trigger={\n            <SidebarTab icon={SvgSearchMenu} folded={folded}>\n              Search Chats\n            </SidebarTab>\n          }\n        />\n      ),\n      [folded]\n    );\n    const moreAgentsButton = useMemo(\n      () => (\n        <div data-testid=\"AppSidebar/more-agents\">\n          <SidebarTab\n            icon={\n              folded || visibleAgents.length === 0\n                ? SvgOnyxOctagon\n                : SvgMoreHorizontal\n            }\n            href=\"/app/agents\"\n            folded={folded}\n            selected={activeSidebarTab.isMoreAgents()}\n            variant={folded ? \"sidebar-heavy\" : \"sidebar-light\"}\n          >\n            {visibleAgents.length === 0 ? \"Explore Agents\" : \"More Agents\"}\n          </SidebarTab>\n        </div>\n      ),\n      [folded, activeSidebarTab, visibleAgents]\n    );\n    const newProjectButton = useMemo(\n      () => (\n        <SidebarTab\n          icon={SvgFolderPlus}\n          onClick={() => createProjectModal.toggle(true)}\n          selected={createProjectModal.isOpen}\n          folded={folded}\n          variant={folded ? \"sidebar-heavy\" : \"sidebar-light\"}\n        >\n          New Project\n        </SidebarTab>\n      ),\n      [folded, createProjectModal.toggle, createProjectModal.isOpen]\n    );\n    const handleShowBuildIntro = useCallback(() => {\n      setShowIntroAnimation(true);\n    }, []);\n\n    const settingsButton = useMemo(\n      () => (\n        <div>\n          {(isAdmin || isCurator) && (\n            <SidebarTab\n              href={isCurator ? \"/admin/agents\" : \"/admin/configuration/llm\"}\n              icon={SvgSettings}\n              folded={folded}\n            >\n              {isAdmin ? \"Admin Panel\" : \"Curator Panel\"}\n            </SidebarTab>\n          )}\n          <UserAvatarPopover\n            folded={folded}\n            onShowBuildIntro={\n              isOnyxCraftEnabled ? handleShowBuildIntro : undefined\n            }\n          />\n        </div>\n      ),\n      [folded, isAdmin, isCurator, handleShowBuildIntro, isOnyxCraftEnabled]\n    );\n\n    return (\n      <>\n        <createProjectModal.Provider>\n          <CreateProjectModal />\n        </createProjectModal.Provider>\n\n        {showMoveCustomAgentModal && (\n          <MoveCustomAgentChatModal\n            onCancel={() => {\n              setShowMoveCustomAgentModal(false);\n              setPendingMoveChatSession(null);\n              setPendingMoveProjectId(null);\n            }}\n            onConfirm={async (doNotShowAgain: boolean) => {\n              if (doNotShowAgain && typeof window !== \"undefined\") {\n                window.localStorage.setItem(\n                  LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,\n                  \"true\"\n                );\n              }\n              const chat = pendingMoveChatSession;\n              const target = pendingMoveProjectId;\n              setShowMoveCustomAgentModal(false);\n              setPendingMoveChatSession(null);\n              setPendingMoveProjectId(null);\n              if (chat && target != null) {\n                try {\n                  await performChatMove(target, chat);\n                } catch (error) {\n                  showErrorNotification(\n                    \"Failed to move chat. Please try again.\"\n                  );\n                }\n              }\n            }}\n          />\n        )}\n\n        {/* Intro animation overlay */}\n        <AnimatePresence>\n          {showIntroAnimation && (\n            <motion.div\n              className=\"fixed inset-0 z-[9999]\"\n              initial={{ opacity: 0 }}\n              animate={{ opacity: 1 }}\n              exit={{ opacity: 0 }}\n              transition={{ duration: 0.5 }}\n            >\n              <BuildModeIntroBackground />\n              <BuildModeIntroContent\n                onClose={() => {\n                  setShowIntroAnimation(false);\n                  dismissBuildModeNotification();\n                }}\n                onTryBuildMode={() => {\n                  setShowIntroAnimation(false);\n                  dismissBuildModeNotification();\n                  router.push(CRAFT_PATH);\n                }}\n              />\n            </motion.div>\n          )}\n        </AnimatePresence>\n\n        <SidebarWrapper folded={folded} onFoldClick={onFoldClick}>\n          <SidebarBody\n            scrollKey=\"app-sidebar\"\n            footer={settingsButton}\n            pinnedContent={\n              <div className=\"flex flex-col\">\n                {newSessionButton}\n                {searchChatsButton}\n                {isOnyxCraftEnabled && buildButton}\n                {folded && moreAgentsButton}\n                {folded && newProjectButton}\n              </div>\n            }\n          >\n            {/* When folded, all nav buttons are in pinnedContent — nothing here */}\n            {folded ? null : isLoadingDynamicContent ? null : (\n              <>\n                {/* Agents */}\n                <DndContext\n                  sensors={sensors}\n                  collisionDetection={closestCenter}\n                  onDragEnd={handleAgentDragEnd}\n                >\n                  <SidebarSection title=\"Agents\">\n                    <SortableContext\n                      items={visibleAgentIds}\n                      strategy={verticalListSortingStrategy}\n                    >\n                      {visibleAgents.map((visibleAgent) => (\n                        <AgentButton\n                          key={visibleAgent.id}\n                          agent={visibleAgent}\n                        />\n                      ))}\n                    </SortableContext>\n                    {moreAgentsButton}\n                  </SidebarSection>\n                </DndContext>\n\n                {/* Wrap Projects and Recents in a shared DndContext for chat-to-project drag */}\n                <DndContext\n                  sensors={sensors}\n                  collisionDetection={pointerWithin}\n                  modifiers={[\n                    restrictToFirstScrollableAncestor,\n                    restrictToVerticalAxis,\n                  ]}\n                  onDragEnd={handleChatProjectDragEnd}\n                >\n                  {/* Projects */}\n                  <SidebarSection\n                    title=\"Projects\"\n                    action={\n                      <OpalButton\n                        icon={SvgFolderPlus}\n                        prominence=\"tertiary\"\n                        size=\"sm\"\n                        tooltip=\"New Project\"\n                        onClick={() => createProjectModal.toggle(true)}\n                      />\n                    }\n                  >\n                    {projects.map((project) => (\n                      <ProjectFolderButton key={project.id} project={project} />\n                    ))}\n                    {projects.length === 0 && newProjectButton}\n                  </SidebarSection>\n\n                  {/* Recents */}\n                  <RecentsSection\n                    chatSessions={chatSessions}\n                    hasMore={hasMore}\n                    isLoadingMore={isLoadingMore}\n                    onLoadMore={loadMore}\n                  />\n                </DndContext>\n              </>\n            )}\n          </SidebarBody>\n        </SidebarWrapper>\n      </>\n    );\n  }\n);\nMemoizedAppSidebarInner.displayName = \"AppSidebar\";\n\nexport default function AppSidebar() {\n  const { folded, setFolded } = useAppSidebarContext();\n  const { isMobile } = useScreenSize();\n\n  if (!isMobile)\n    return (\n      <MemoizedAppSidebarInner\n        folded={folded}\n        onFoldClick={() => setFolded((prev) => !prev)}\n      />\n    );\n\n  return (\n    <>\n      <div\n        className={cn(\n          \"fixed inset-y-0 left-0 z-50 transition-transform duration-200\",\n          folded ? \"-translate-x-full\" : \"translate-x-0\"\n        )}\n      >\n        <MemoizedAppSidebarInner\n          folded={false}\n          onFoldClick={() => setFolded(true)}\n        />\n      </div>\n\n      {/* Hitbox to close the sidebar if anything outside of it is touched */}\n      <div\n        className={cn(\n          \"fixed inset-0 z-40 bg-mask-03 backdrop-blur-03 transition-opacity duration-200\",\n          folded\n            ? \"opacity-0 pointer-events-none\"\n            : \"opacity-100 pointer-events-auto\"\n        )}\n        onClick={() => setFolded(true)}\n      />\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/ChatButton.tsx",
    "content": "\"use client\";\n\nimport React, { useState, memo, useMemo, useEffect } from \"react\";\nimport { useDraggable } from \"@dnd-kit/core\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { deleteChatSession, renameChatSession } from \"@/app/app/services/lib\";\nimport { ChatSession } from \"@/app/app/interfaces\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport {\n  Project,\n  removeChatSessionFromProject,\n  createProject as createProjectService,\n} from \"@/app/app/projects/projectsService\";\nimport { useProjectsContext } from \"@/providers/ProjectsContext\";\nimport MoveCustomAgentChatModal from \"@/components/modals/MoveCustomAgentChatModal\";\nimport { UNNAMED_CHAT } from \"@/lib/constants\";\nimport ShareChatSessionModal from \"@/sections/modals/ShareChatSessionModal\";\nimport { SidebarTab } from \"@opal/components\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport { Button } from \"@opal/components\";\nimport InputTypeIn from \"@/refresh-components/inputs/InputTypeIn\";\nimport { DRAG_TYPES, LOCAL_STORAGE_KEYS } from \"@/sections/sidebar/constants\";\nimport {\n  shouldShowMoveModal,\n  showErrorNotification,\n  handleMoveOperation,\n} from \"@/sections/sidebar/sidebarUtils\";\nimport ButtonRenaming from \"@/refresh-components/buttons/ButtonRenaming\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport {\n  SvgChevronLeft,\n  SvgEdit,\n  SvgFolder,\n  SvgFolderIn,\n  SvgFolderPlus,\n  SvgMoreHorizontal,\n  SvgShare,\n  SvgTrash,\n} from \"@opal/icons\";\nimport useOnMount from \"@/hooks/useOnMount\";\nimport { useAgents, usePinnedAgents } from \"@/hooks/useAgents\";\n\nexport interface PopoverSearchInputProps {\n  setShowMoveOptions: (show: boolean) => void;\n  onSearch: (term: string) => void;\n}\n\nexport function PopoverSearchInput({\n  setShowMoveOptions,\n  onSearch,\n}: PopoverSearchInputProps) {\n  const [searchTerm, setSearchTerm] = useState(\"\");\n\n  const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {\n    const value = e.target.value;\n    setSearchTerm(value);\n    onSearch(value);\n  };\n  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {\n    if (e.key === \"Escape\") {\n      setShowMoveOptions(false);\n    }\n  };\n\n  const handleClickBackButton = (e: React.MouseEvent<HTMLButtonElement>) => {\n    e.stopPropagation();\n    setShowMoveOptions(false);\n    setSearchTerm(\"\");\n  };\n\n  return (\n    <div className=\"flex flex-row items-center\">\n      <Button\n        icon={SvgChevronLeft}\n        onClick={handleClickBackButton}\n        prominence=\"tertiary\"\n        size=\"sm\"\n      />\n      <InputTypeIn\n        type=\"text\"\n        value={searchTerm}\n        onChange={handleChange}\n        onKeyDown={handleKeyDown}\n        placeholder=\"Search Projects\"\n        onClick={noProp()}\n        variant=\"internal\"\n        autoFocus\n      />\n    </div>\n  );\n}\n\nexport interface ChatButtonProps {\n  chatSession: ChatSession;\n  project?: Project;\n  draggable?: boolean;\n}\n\nconst ChatButton = memo(\n  ({ chatSession, project, draggable = false }: ChatButtonProps) => {\n    const route = useAppRouter();\n    const activeSidebarTab = useAppFocus();\n    const active = useMemo(\n      () =>\n        activeSidebarTab.isChat() &&\n        activeSidebarTab.getId() === chatSession.id,\n      [activeSidebarTab, chatSession.id]\n    );\n    const mounted = useOnMount();\n    const [displayName, setDisplayName] = useState(\n      chatSession.name || UNNAMED_CHAT\n    );\n    const [renaming, setRenaming] = useState(false);\n    const [deleteConfirmationModalOpen, setDeleteConfirmationModalOpen] =\n      useState(false);\n    const [showMoveOptions, setShowMoveOptions] = useState(false);\n    const [showShareModal, setShowShareModal] = useState(false);\n    const [searchTerm, setSearchTerm] = useState(\"\");\n    const [popoverItems, setPopoverItems] = useState<React.ReactNode[]>([]);\n    const { refreshChatSessions, removeSession } = useChatSessions();\n    const {\n      refreshCurrentProjectDetails,\n      projects,\n      fetchProjects,\n      currentProjectId,\n      createProject,\n    } = useProjectsContext();\n    const { agents } = useAgents();\n    const { pinnedAgents, togglePinnedAgent } = usePinnedAgents();\n    const [popoverOpen, setPopoverOpen] = useState(false);\n    const [pendingMoveProjectId, setPendingMoveProjectId] = useState<\n      number | null\n    >(null);\n    const [showMoveCustomAgentModal, setShowMoveCustomAgentModal] =\n      useState(false);\n    const [navigateAfterMoveProjectId, setNavigateAfterMoveProjectId] =\n      useState<number | null>(null);\n\n    // Drag and drop setup for chat sessions\n    const dragId = `${DRAG_TYPES.CHAT}-${chatSession.id}`;\n    const { attributes, listeners, setNodeRef, transform, isDragging } =\n      useDraggable({\n        id: dragId,\n        data: {\n          type: DRAG_TYPES.CHAT,\n          chatSession,\n          projectId: project?.id,\n        },\n        disabled: !draggable || renaming,\n      });\n\n    // Sync local name state when chatSession.name changes (e.g., after auto-naming)\n    useEffect(() => {\n      const newName = chatSession.name || UNNAMED_CHAT;\n      const oldName = displayName;\n\n      // Only animate if transitioning from UNNAMED_CHAT to a real name\n      if (oldName === UNNAMED_CHAT && newName !== UNNAMED_CHAT && mounted) {\n        // Type out the name character by character\n        let currentIndex = 0;\n        const typingInterval = setInterval(() => {\n          currentIndex++;\n          setDisplayName(newName.slice(0, currentIndex));\n\n          if (currentIndex >= newName.length) {\n            clearInterval(typingInterval);\n          }\n        }, 30); // 30ms per character\n\n        return () => clearInterval(typingInterval);\n      } else {\n        // No animation for other changes (manual rename, initial load, etc.)\n        setDisplayName(newName);\n      }\n    }, [chatSession.name, mounted]);\n\n    const filteredProjects = useMemo(() => {\n      if (!searchTerm) return projects;\n      const term = searchTerm.toLowerCase();\n      return projects.filter((project) =>\n        project.name.toLowerCase().includes(term)\n      );\n    }, [projects, searchTerm]);\n\n    useEffect(() => {\n      if (!showMoveOptions) {\n        const popoverItems = [\n          <LineItem\n            key=\"share\"\n            icon={SvgShare}\n            onClick={noProp(() => setShowShareModal(true))}\n          >\n            Share\n          </LineItem>,\n          <LineItem\n            key=\"rename\"\n            icon={SvgEdit}\n            onClick={noProp(() => setRenaming(true))}\n          >\n            Rename\n          </LineItem>,\n          <LineItem\n            key=\"move\"\n            icon={SvgFolderIn}\n            onClick={noProp(() => setShowMoveOptions(true))}\n          >\n            Move to Project\n          </LineItem>,\n          project && (\n            <LineItem\n              key=\"remove\"\n              icon={SvgFolder}\n              onClick={noProp(() => handleRemoveFromProject())}\n            >\n              {`Remove from ${project.name}`}\n            </LineItem>\n          ),\n          null,\n          <LineItem\n            key=\"delete\"\n            icon={SvgTrash}\n            danger\n            onClick={noProp(() => setDeleteConfirmationModalOpen(true))}\n          >\n            Delete\n          </LineItem>,\n        ];\n        setPopoverItems(popoverItems);\n      } else {\n        const availableProjects = filteredProjects.filter(\n          (candidateProject) => candidateProject.id !== project?.id\n        );\n\n        const popoverItems = [\n          <PopoverSearchInput\n            key=\"search\"\n            setShowMoveOptions={setShowMoveOptions}\n            onSearch={setSearchTerm}\n          />,\n          ...availableProjects.map((targetProject) => (\n            <LineItem\n              key={targetProject.id}\n              icon={SvgFolder}\n              onClick={noProp(() => handleChatMove(targetProject))}\n            >\n              {targetProject.name}\n            </LineItem>\n          )),\n          // Show \"Create New Project\" option when no projects match the search\n          ...(availableProjects.length === 0 && searchTerm.trim() !== \"\"\n            ? [\n                null,\n                <LineItem\n                  key=\"create-new\"\n                  icon={SvgFolderPlus}\n                  onClick={noProp(() =>\n                    handleCreateProjectAndMove(searchTerm.trim())\n                  )}\n                >\n                  {`Create ${searchTerm.trim()}`}\n                </LineItem>,\n              ]\n            : []),\n        ];\n        setPopoverItems(popoverItems);\n      }\n    }, [\n      showMoveOptions,\n      filteredProjects,\n      refreshChatSessions,\n      fetchProjects,\n      currentProjectId,\n      refreshCurrentProjectDetails,\n      project,\n      chatSession.id,\n      searchTerm,\n      createProject,\n    ]);\n\n    // Pin the chat's agent when clicking on the conversation\n    async function handleClick() {\n      const agent = agents.find((a) => a.id === chatSession.persona_id);\n      if (agent) {\n        const isAlreadyPinned = pinnedAgents.some((a) => a.id === agent.id);\n        if (!isAlreadyPinned) {\n          await togglePinnedAgent(agent, true);\n        }\n      }\n    }\n\n    async function handleRename(newName: string) {\n      setDisplayName(newName);\n      await renameChatSession(chatSession.id, newName);\n      await refreshChatSessions();\n    }\n\n    async function handleChatDelete() {\n      try {\n        await deleteChatSession(chatSession.id);\n        removeSession(chatSession.id);\n\n        if (project) {\n          await fetchProjects();\n          await refreshCurrentProjectDetails();\n\n          // Only route if the deleted chat is the currently opened chat session\n          if (active) {\n            route({ projectId: project.id });\n          }\n        }\n        await refreshChatSessions();\n      } catch (error) {\n        console.error(\"Failed to delete chat:\", error);\n        showErrorNotification(\"Failed to delete chat. Please try again.\");\n      }\n    }\n\n    async function performMove(targetProjectId: number) {\n      try {\n        await handleMoveOperation({\n          chatSession,\n          targetProjectId,\n          refreshChatSessions,\n          refreshCurrentProjectDetails,\n          fetchProjects,\n          currentProjectId,\n        });\n        setShowMoveOptions(false);\n        setSearchTerm(\"\");\n      } catch (error) {\n        // handleMoveOperation already handles error notification\n        console.error(\"Failed to move chat:\", error);\n      }\n    }\n\n    async function handleChatMove(targetProject: Project) {\n      if (shouldShowMoveModal(chatSession)) {\n        setPendingMoveProjectId(targetProject.id);\n        setShowMoveCustomAgentModal(true);\n        return;\n      }\n      await performMove(targetProject.id);\n    }\n\n    async function handleRemoveFromProject() {\n      try {\n        await removeChatSessionFromProject(chatSession.id);\n        const projectRefreshPromise = currentProjectId\n          ? refreshCurrentProjectDetails()\n          : fetchProjects();\n        await Promise.all([refreshChatSessions(), projectRefreshPromise]);\n        setShowMoveOptions(false);\n        setSearchTerm(\"\");\n      } catch (error) {\n        console.error(\"Failed to remove chat from project:\", error);\n      }\n    }\n\n    async function handleCreateProjectAndMove(projectName: string) {\n      try {\n        // Create the new project using the service directly (without navigation)\n        const newProject = await createProjectService(projectName);\n\n        // Refresh projects list to include the new project\n        await fetchProjects();\n\n        // Mark that we want to navigate to this project after moving\n        setNavigateAfterMoveProjectId(newProject.id);\n\n        // Check if we should show the move modal for custom agents\n        if (shouldShowMoveModal(chatSession)) {\n          setPendingMoveProjectId(newProject.id);\n          setShowMoveCustomAgentModal(true);\n          setShowMoveOptions(false);\n          setSearchTerm(\"\");\n          return;\n        }\n\n        // Move the chat to the newly created project\n        await performMove(newProject.id);\n\n        // Navigate to the new project to see the chat\n        route({ projectId: newProject.id });\n        setNavigateAfterMoveProjectId(null);\n      } catch (error) {\n        console.error(\"Failed to create project and move chat:\", error);\n        showErrorNotification(\"Failed to create project. Please try again.\");\n        setNavigateAfterMoveProjectId(null);\n      }\n    }\n\n    const rightMenu = (\n      <>\n        <Popover.Trigger asChild onClick={noProp()}>\n          <div>\n            {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n            <IconButton\n              icon={SvgMoreHorizontal}\n              className={cn(\n                !popoverOpen && \"hidden\",\n                !renaming && \"group-hover/SidebarTab:flex\"\n              )}\n              transient={popoverOpen}\n              internal\n            />\n          </div>\n        </Popover.Trigger>\n        <Popover.Content side=\"right\" align=\"start\" width=\"md\">\n          <PopoverMenu>{popoverItems}</PopoverMenu>\n        </Popover.Content>\n      </>\n    );\n\n    const popover = (\n      <Popover\n        onOpenChange={(state) => {\n          setPopoverOpen(state);\n          if (!state) {\n            setShowMoveOptions(false);\n            setSearchTerm(\"\");\n          }\n        }}\n      >\n        <Popover.Anchor>\n          <SidebarTab\n            href={isDragging ? undefined : `/app?chatId=${chatSession.id}`}\n            onClick={handleClick}\n            selected={active}\n            rightChildren={rightMenu}\n            nested={!!project}\n          >\n            {renaming ? (\n              <ButtonRenaming\n                initialName={chatSession.name}\n                onRename={handleRename}\n                onClose={() => setRenaming(false)}\n              />\n            ) : (\n              displayName\n            )}\n          </SidebarTab>\n        </Popover.Anchor>\n      </Popover>\n    );\n\n    return (\n      <>\n        {deleteConfirmationModalOpen && (\n          <ConfirmationModalLayout\n            title=\"Delete Chat\"\n            icon={SvgTrash}\n            onClose={() => setDeleteConfirmationModalOpen(false)}\n            submit={\n              <Button\n                variant=\"danger\"\n                onClick={() => {\n                  setDeleteConfirmationModalOpen(false);\n                  handleChatDelete();\n                }}\n              >\n                Delete\n              </Button>\n            }\n          >\n            Are you sure you want to delete this chat? This action cannot be\n            undone.\n          </ConfirmationModalLayout>\n        )}\n\n        {showMoveCustomAgentModal && (\n          <MoveCustomAgentChatModal\n            onCancel={() => {\n              setShowMoveCustomAgentModal(false);\n              setPendingMoveProjectId(null);\n              setNavigateAfterMoveProjectId(null);\n            }}\n            onConfirm={async (doNotShowAgain: boolean) => {\n              if (doNotShowAgain && typeof window !== \"undefined\") {\n                window.localStorage.setItem(\n                  LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL,\n                  \"true\"\n                );\n              }\n              const target = pendingMoveProjectId;\n              const shouldNavigate = navigateAfterMoveProjectId;\n              setShowMoveCustomAgentModal(false);\n              setPendingMoveProjectId(null);\n              if (target != null) {\n                await performMove(target);\n                // Navigate if this was triggered by creating a new project\n                if (shouldNavigate != null) {\n                  route({ projectId: shouldNavigate });\n                  setNavigateAfterMoveProjectId(null);\n                }\n              }\n            }}\n          />\n        )}\n\n        {showShareModal && (\n          <ShareChatSessionModal\n            chatSession={chatSession}\n            onClose={() => setShowShareModal(false)}\n          />\n        )}\n\n        {draggable ? (\n          <div\n            ref={setNodeRef}\n            style={{\n              transform: transform\n                ? `translate3d(0px, ${transform.y}px, 0)`\n                : undefined,\n              opacity: isDragging ? 0.5 : 1,\n            }}\n            {...(mounted ? attributes : {})}\n            {...(mounted ? listeners : {})}\n          >\n            {popover}\n          </div>\n        ) : (\n          popover\n        )}\n      </>\n    );\n  }\n);\nChatButton.displayName = \"ChatButton\";\n\nexport default ChatButton;\n"
  },
  {
    "path": "web/src/sections/sidebar/ChatSearchCommandMenu.tsx",
    "content": "\"use client\";\n\nimport React, { useState, useMemo, useCallback } from \"react\";\nimport { useRouter } from \"next/navigation\";\nimport type { Route } from \"next\";\nimport CommandMenu, {\n  useCommandMenuContext,\n} from \"@/refresh-components/commandmenu/CommandMenu\";\nimport { useProjects } from \"@/lib/hooks/useProjects\";\nimport { useCreateModal } from \"@/refresh-components/contexts/ModalContext\";\nimport CreateProjectModal from \"@/components/modals/CreateProjectModal\";\nimport {\n  formatDisplayTime,\n  highlightMatch,\n} from \"@/sections/sidebar/chatSearchUtils\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\nimport { useCurrentAgent } from \"@/hooks/useAgents\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport {\n  useChatSearchOptimistic,\n  FilterableChat,\n} from \"./useChatSearchOptimistic\";\nimport {\n  SvgEditBig,\n  SvgFolder,\n  SvgFolderPlus,\n  SvgBubbleText,\n  SvgArrowUpDown,\n  SvgKeystroke,\n} from \"@opal/icons\";\nimport TextSeparator from \"@/refresh-components/TextSeparator\";\n\n/**\n * Dynamic footer that shows contextual action labels based on highlighted item type\n */\nfunction DynamicFooter() {\n  const { highlightedItemType } = useCommandMenuContext();\n\n  // \"Show all\" for filters, \"Open\" for everything else (items, actions, or no highlight)\n  const actionLabel = highlightedItemType === \"filter\" ? \"Show all\" : \"Open\";\n\n  return (\n    <CommandMenu.Footer\n      leftActions={\n        <>\n          <CommandMenu.FooterAction icon={SvgArrowUpDown} label=\"Select\" />\n          <CommandMenu.FooterAction icon={SvgKeystroke} label={actionLabel} />\n        </>\n      }\n    />\n  );\n}\n\ninterface ChatSearchCommandMenuProps {\n  trigger: React.ReactNode;\n}\n\ninterface FilterableProject {\n  id: number;\n  label: string;\n  description: string | null;\n  time: string;\n}\n\nexport default function ChatSearchCommandMenu({\n  trigger,\n}: ChatSearchCommandMenuProps) {\n  const [open, setOpen] = useState(false);\n  const [searchValue, setSearchValue] = useState(\"\");\n  const [activeFilter, setActiveFilter] = useState<\n    \"all\" | \"chats\" | \"projects\"\n  >(\"all\");\n  const [initialProjectName, setInitialProjectName] = useState<\n    string | undefined\n  >();\n  const router = useRouter();\n\n  // Data hooks\n  const { projects } = useProjects();\n  const combinedSettings = useSettingsContext();\n  const currentAgent = useCurrentAgent();\n  const createProjectModal = useCreateModal();\n\n  // Constants for preview limits\n  const PREVIEW_CHATS_LIMIT = 4;\n  const PREVIEW_PROJECTS_LIMIT = 3;\n\n  // Determine if we should enable optimistic search (when searching or viewing chats filter)\n  const shouldUseOptimisticSearch =\n    searchValue.trim().length > 0 || activeFilter === \"chats\";\n\n  // Use optimistic search hook for chat sessions (includes fallback from useChatSessions + useProjects)\n  const {\n    results: filteredChats,\n    isSearching,\n    hasMore,\n    isLoadingMore,\n    sentinelRef,\n  } = useChatSearchOptimistic({\n    searchQuery: searchValue,\n    enabled: shouldUseOptimisticSearch,\n  });\n\n  // Transform and filter projects (sorted by latest first)\n  const filteredProjects = useMemo<FilterableProject[]>(() => {\n    const projectList = projects\n      .map((project) => ({\n        id: project.id,\n        label: project.name,\n        description: project.description,\n        time: project.created_at,\n      }))\n      .sort((a, b) => new Date(b.time).getTime() - new Date(a.time).getTime());\n\n    if (!searchValue.trim()) return projectList;\n\n    const term = searchValue.toLowerCase();\n    return projectList.filter(\n      (project) =>\n        project.label.toLowerCase().includes(term) ||\n        project.description?.toLowerCase().includes(term)\n    );\n  }, [projects, searchValue]);\n\n  // Compute displayed items based on filter state\n  const displayedChats = useMemo(() => {\n    if (activeFilter === \"all\" && !searchValue.trim()) {\n      return filteredChats.slice(0, PREVIEW_CHATS_LIMIT);\n    }\n    return filteredChats;\n  }, [filteredChats, activeFilter, searchValue]);\n\n  const displayedProjects = useMemo(() => {\n    if (activeFilter === \"all\" && !searchValue.trim()) {\n      return filteredProjects.slice(0, PREVIEW_PROJECTS_LIMIT);\n    }\n    return filteredProjects;\n  }, [filteredProjects, activeFilter, searchValue]);\n\n  // Header filters for showing active filter as a chip\n  const headerFilters = useMemo(() => {\n    if (activeFilter === \"chats\") {\n      return [{ id: \"chats\", label: \"Sessions\" }];\n    }\n    if (activeFilter === \"projects\") {\n      return [{ id: \"projects\", label: \"Projects\" }];\n    }\n    return [];\n  }, [activeFilter]);\n\n  const handleFilterRemove = useCallback(() => {\n    setActiveFilter(\"all\");\n  }, []);\n\n  // Navigation handlers\n  const handleNewSession = useCallback(() => {\n    const href =\n      combinedSettings?.settings?.disable_default_assistant && currentAgent\n        ? `/app?agentId=${currentAgent.id}`\n        : \"/app\";\n    router.push(href as Route);\n    setOpen(false);\n  }, [router, combinedSettings, currentAgent]);\n\n  const handleChatSelect = useCallback(\n    (chatId: string) => {\n      router.push(`/chat?chatId=${chatId}` as Route);\n      setOpen(false);\n    },\n    [router]\n  );\n\n  const handleProjectSelect = useCallback(\n    (projectId: number) => {\n      router.push(`/chat?projectId=${projectId}` as Route);\n      setOpen(false);\n    },\n    [router]\n  );\n\n  const handleNewProject = useCallback(\n    (initialName?: string) => {\n      setInitialProjectName(initialName);\n      setOpen(false);\n      createProjectModal.toggle(true);\n    },\n    [createProjectModal]\n  );\n\n  const handleOpenChange = useCallback((newOpen: boolean) => {\n    setOpen(newOpen);\n    if (!newOpen) {\n      setSearchValue(\"\");\n      setActiveFilter(\"all\");\n    }\n  }, []);\n\n  const handleEmptyBackspace = useCallback(() => {\n    if (activeFilter !== \"all\") {\n      // Remove active filter, return to root menu\n      setActiveFilter(\"all\");\n    } else {\n      // No filter active, close the menu\n      setOpen(false);\n    }\n  }, [activeFilter]);\n\n  const hasSearchValue = searchValue.trim().length > 0;\n\n  return (\n    <>\n      <div aria-label=\"Open chat search\" onClick={() => setOpen(true)}>\n        {trigger}\n      </div>\n\n      <CommandMenu open={open} onOpenChange={handleOpenChange}>\n        <CommandMenu.Content>\n          <CommandMenu.Header\n            placeholder=\"Search chat sessions, projects...\"\n            value={searchValue}\n            onValueChange={setSearchValue}\n            filters={headerFilters}\n            onFilterRemove={handleFilterRemove}\n            onClose={() => setOpen(false)}\n            onEmptyBackspace={handleEmptyBackspace}\n          />\n\n          <CommandMenu.List\n            emptyMessage={\n              hasSearchValue ? \"No results found\" : \"No chats or projects yet\"\n            }\n          >\n            {/* New Session action - always visible in \"all\" filter, even during search */}\n            {activeFilter === \"all\" && (\n              <CommandMenu.Action\n                value=\"new-session\"\n                icon={SvgEditBig}\n                onSelect={handleNewSession}\n                defaultHighlight={!hasSearchValue}\n              >\n                New Session\n              </CommandMenu.Action>\n            )}\n\n            {/* Recent Sessions section - show if filter is 'all' or 'chats' */}\n            {(activeFilter === \"all\" || activeFilter === \"chats\") &&\n              displayedChats.length > 0 && (\n                <>\n                  {searchValue.trim().length === 0 && (\n                    <CommandMenu.Filter\n                      value=\"recent-sessions\"\n                      onSelect={() => setActiveFilter(\"chats\")}\n                      isApplied={\n                        activeFilter === \"chats\" ||\n                        filteredChats.length <= PREVIEW_CHATS_LIMIT\n                      }\n                    >\n                      {activeFilter === \"chats\" ? \"Recent\" : \"Recent Sessions\"}\n                    </CommandMenu.Filter>\n                  )}\n                  {displayedChats.map((chat) => (\n                    <CommandMenu.Item\n                      key={chat.id}\n                      value={`chat-${chat.id}`}\n                      icon={SvgBubbleText}\n                      rightContent={({ isHighlighted }) =>\n                        isHighlighted ? (\n                          <Text figureKeystroke text02>\n                            ↵\n                          </Text>\n                        ) : (\n                          <Text secondaryBody text03>\n                            {formatDisplayTime(chat.time)}\n                          </Text>\n                        )\n                      }\n                      onSelect={() => handleChatSelect(chat.id)}\n                    >\n                      {highlightMatch(chat.label, searchValue)}\n                    </CommandMenu.Item>\n                  ))}\n                  {/* Infinite scroll sentinel and loading indicator for chats */}\n                  {activeFilter === \"chats\" && hasMore && (\n                    <div ref={sentinelRef} className=\"h-1\" aria-hidden=\"true\" />\n                  )}\n                  {activeFilter === \"chats\" &&\n                    (isLoadingMore || isSearching) && (\n                      <div className=\"flex justify-center items-center py-3\">\n                        <div className=\"h-5 w-5 animate-spin rounded-full border-2 border-solid border-text-04 border-t-text-02\" />\n                      </div>\n                    )}\n                </>\n              )}\n\n            {/* Projects section - show if filter is 'all' or 'projects' */}\n            {(activeFilter === \"all\" || activeFilter === \"projects\") && (\n              <>\n                <CommandMenu.Filter\n                  value=\"projects\"\n                  onSelect={() => setActiveFilter(\"projects\")}\n                  isApplied={\n                    activeFilter === \"projects\" ||\n                    filteredProjects.length <= PREVIEW_PROJECTS_LIMIT\n                  }\n                >\n                  Projects\n                </CommandMenu.Filter>\n                {/* New Project action - shown after Projects filter when no search term */}\n                {!hasSearchValue && activeFilter === \"all\" && (\n                  <CommandMenu.Action\n                    value=\"new-project\"\n                    icon={SvgFolderPlus}\n                    onSelect={() => handleNewProject()}\n                  >\n                    New Project\n                  </CommandMenu.Action>\n                )}\n                {displayedProjects.map((project) => (\n                  <CommandMenu.Item\n                    key={project.id}\n                    value={`project-${project.id}`}\n                    icon={SvgFolder}\n                    rightContent={({ isHighlighted }) =>\n                      isHighlighted ? (\n                        <Text figureKeystroke text02>\n                          ↵\n                        </Text>\n                      ) : (\n                        <Text secondaryBody text03>\n                          {formatDisplayTime(project.time)}\n                        </Text>\n                      )\n                    }\n                    onSelect={() => handleProjectSelect(project.id)}\n                  >\n                    {highlightMatch(project.label, searchValue)}\n                  </CommandMenu.Item>\n                ))}\n              </>\n            )}\n\n            {/* Create New Project with search term - shown at bottom when searching */}\n            {hasSearchValue &&\n              (activeFilter === \"all\" || activeFilter === \"projects\") && (\n                <CommandMenu.Action\n                  value=\"create-project-with-name\"\n                  icon={SvgFolderPlus}\n                  onSelect={() => handleNewProject(searchValue.trim())}\n                >\n                  <>\n                    Create New Project \"\n                    <span className=\"text-text-05\">{searchValue.trim()}</span>\"\n                  </>\n                </CommandMenu.Action>\n              )}\n\n            {/* No more results separator - shown when no results for the active filter */}\n            {((activeFilter === \"chats\" && displayedChats.length === 0) ||\n              (activeFilter === \"projects\" && displayedProjects.length === 0) ||\n              (activeFilter === \"all\" &&\n                displayedChats.length === 0 &&\n                displayedProjects.length === 0)) && (\n              <TextSeparator text=\"No more results\" className=\"mt-auto mb-2\" />\n            )}\n          </CommandMenu.List>\n\n          <DynamicFooter />\n        </CommandMenu.Content>\n      </CommandMenu>\n\n      {/* Project creation modal */}\n      <createProjectModal.Provider>\n        <CreateProjectModal initialProjectName={initialProjectName} />\n      </createProjectModal.Provider>\n    </>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/CreateConnectorSidebar.tsx",
    "content": "import { useFormContext } from \"@/components/context/FormContext\";\nimport { credentialTemplates } from \"@/lib/connectors/credentials\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport StepSidebar from \"@/sections/sidebar/StepSidebarWrapper\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport { SvgSettings } from \"@opal/icons\";\n\nexport default function Sidebar() {\n  const { formStep, setFormStep, connector, allowAdvanced, allowCreate } =\n    useFormContext();\n  const noCredential = credentialTemplates[connector] == null;\n\n  const { isAdmin } = useUser();\n  const buttonName = isAdmin ? \"Admin Page\" : \"Curator Page\";\n\n  const settingSteps = [\n    ...(!noCredential ? [\"Credential\"] : []),\n    \"Connector\",\n    ...(connector == \"file\" ? [] : [\"Advanced (optional)\"]),\n  ];\n\n  return (\n    <StepSidebar\n      buttonName={buttonName}\n      buttonIcon={SvgSettings}\n      buttonHref=\"/admin/add-connector\"\n    >\n      <div className=\"relative\">\n        {connector != \"file\" && (\n          <div className=\"absolute h-[85%] left-[6px] top-[8px] bottom-0 w-0.5 bg-background-tint-04\"></div>\n        )}\n        {settingSteps.map((step, index) => {\n          const allowed =\n            (step == \"Connector\" && allowCreate) ||\n            (step == \"Advanced (optional)\" && allowAdvanced) ||\n            index <= formStep;\n\n          return (\n            <div\n              key={index}\n              className={`flex items-center mb-6 relative ${\n                !allowed ? \"cursor-not-allowed\" : \"cursor-pointer\"\n              }`}\n              onClick={() => {\n                if (allowed) {\n                  setFormStep(index - (noCredential ? 1 : 0));\n                }\n              }}\n            >\n              <div className=\"flex-shrink-0 mr-4 z-10\">\n                <div\n                  className={`rounded-full h-3.5 w-3.5 flex items-center justify-center ${\n                    allowed ? \"bg-blue-500\" : \"bg-background-tint-04\"\n                  }`}\n                >\n                  {formStep === index && (\n                    <div className=\"h-2 w-2 rounded-full bg-white\"></div>\n                  )}\n                </div>\n              </div>\n              <Text as=\"p\" text04={index <= formStep} text02={index > formStep}>\n                {step}\n              </Text>\n            </div>\n          );\n        })}\n      </div>\n    </StepSidebar>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/NotificationsPopover.tsx",
    "content": "\"use client\";\n\nimport useSWR from \"swr\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { useRouter } from \"next/navigation\";\nimport { Route } from \"next\";\nimport { track, AnalyticsEvent } from \"@/lib/analytics\";\nimport { Notification, NotificationType } from \"@/interfaces/settings\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport { SvgSparkle, SvgRefreshCw, SvgX } from \"@opal/icons\";\nimport { IconProps } from \"@opal/types\";\nimport { Button } from \"@opal/components\";\nimport SimpleLoader from \"@/refresh-components/loaders/SimpleLoader\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport Separator from \"@/refresh-components/Separator\";\n\nfunction getNotificationIcon(\n  notifType: string\n): React.FunctionComponent<IconProps> {\n  switch (notifType) {\n    case NotificationType.REINDEX:\n      return SvgRefreshCw;\n    default:\n      return SvgSparkle;\n  }\n}\n\ninterface NotificationsPopoverProps {\n  onClose: () => void;\n  onNavigate: () => void;\n  onShowBuildIntro?: () => void;\n}\n\nexport default function NotificationsPopover({\n  onClose,\n  onNavigate,\n  onShowBuildIntro,\n}: NotificationsPopoverProps) {\n  const router = useRouter();\n  const {\n    data: notifications,\n    mutate,\n    isLoading,\n  } = useSWR<Notification[]>(SWR_KEYS.notifications, errorHandlingFetcher);\n\n  const handleNotificationClick = (notification: Notification) => {\n    // Handle build_mode feature announcement specially - show intro animation\n    if (\n      notification.notif_type === NotificationType.FEATURE_ANNOUNCEMENT &&\n      notification.additional_data?.feature === \"build_mode\" &&\n      onShowBuildIntro\n    ) {\n      onNavigate();\n      onShowBuildIntro();\n      return;\n    }\n\n    const link = notification.additional_data?.link;\n    if (!link) return;\n\n    // Track release notes clicks\n    if (notification.notif_type === NotificationType.RELEASE_NOTES) {\n      track(AnalyticsEvent.RELEASE_NOTIFICATION_CLICKED, {\n        version: notification.additional_data?.version,\n      });\n    }\n\n    // External links open in new tab\n    if (link.startsWith(\"http://\") || link.startsWith(\"https://\")) {\n      if (!notification.dismissed) {\n        handleDismiss(notification.id);\n      }\n      window.open(link, \"_blank\", \"noopener,noreferrer\");\n      return;\n    }\n\n    // Relative links navigate internally\n    onNavigate();\n    router.push(link as Route);\n  };\n\n  const handleDismiss = async (\n    notificationId: number,\n    e?: React.MouseEvent\n  ) => {\n    e?.stopPropagation(); // Prevent triggering the LineItem onClick\n    try {\n      const response = await fetch(\n        `/api/notifications/${notificationId}/dismiss`,\n        {\n          method: \"POST\",\n        }\n      );\n      if (response.ok) {\n        mutate(); // Refresh the notifications list\n      }\n    } catch (error) {\n      console.error(\"Error dismissing notification:\", error);\n    }\n  };\n\n  return (\n    <Section gap={0.5} padding={0.25}>\n      <Section flexDirection=\"row\" justifyContent=\"between\" padding={0.5}>\n        <Text headingH3>Notifications</Text>\n        <Button icon={SvgX} prominence=\"tertiary\" size=\"sm\" onClick={onClose} />\n      </Section>\n\n      <Separator noPadding className=\"px-2\" />\n\n      <Section>\n        {isLoading ? (\n          <div className=\"h-48\">\n            <Section>\n              <SimpleLoader />\n            </Section>\n          </div>\n        ) : !notifications || notifications.length === 0 ? (\n          <div className=\"h-48\">\n            <Section>\n              <Text as=\"p\" text03>\n                No notifications\n              </Text>\n            </Section>\n          </div>\n        ) : (\n          <div className=\"max-h-96 overflow-y-auto w-full\">\n            <Section alignItems=\"stretch\" gap={0}>\n              {notifications.map((notification) => (\n                <LineItem\n                  key={notification.id}\n                  icon={getNotificationIcon(notification.notif_type)}\n                  description={notification.description ?? undefined}\n                  onClick={() => handleNotificationClick(notification)}\n                  strikethrough={notification.dismissed}\n                  rightChildren={\n                    !notification.dismissed ? (\n                      <Button\n                        prominence=\"tertiary\"\n                        size=\"sm\"\n                        icon={SvgX}\n                        onClick={(e) => handleDismiss(notification.id, e)}\n                        tooltip=\"Dismiss\"\n                      />\n                    ) : undefined\n                  }\n                >\n                  {notification.title}\n                </LineItem>\n              ))}\n            </Section>\n          </div>\n        )}\n      </Section>\n    </Section>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/ProjectFolderButton.tsx",
    "content": "\"use client\";\n\nimport React, { useState, memo } from \"react\";\nimport { Project, useProjectsContext } from \"@/providers/ProjectsContext\";\nimport { useDroppable } from \"@dnd-kit/core\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport ConfirmationModalLayout from \"@/refresh-components/layouts/ConfirmationModalLayout\";\nimport ChatButton from \"@/sections/sidebar/ChatButton\";\nimport { useAppRouter } from \"@/hooks/appNavigation\";\nimport { cn, noProp } from \"@/lib/utils\";\nimport { DRAG_TYPES } from \"./constants\";\nimport { SidebarTab } from \"@opal/components\";\nimport IconButton from \"@/refresh-components/buttons/IconButton\";\nimport Truncated from \"@/refresh-components/texts/Truncated\";\nimport { Button } from \"@opal/components\";\nimport ButtonRenaming from \"@/refresh-components/buttons/ButtonRenaming\";\nimport type { IconProps } from \"@opal/types\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport {\n  SvgEdit,\n  SvgFolder,\n  SvgFolderOpen,\n  SvgFolderPartialOpen,\n  SvgMoreHorizontal,\n  SvgTrash,\n} from \"@opal/icons\";\n\nexport interface ProjectFolderButtonProps {\n  project: Project;\n}\n\nconst ProjectFolderButton = memo(({ project }: ProjectFolderButtonProps) => {\n  const route = useAppRouter();\n  const [open, setOpen] = useState(false);\n  const [deleteConfirmationModalOpen, setDeleteConfirmationModalOpen] =\n    useState(false);\n  const { renameProject, deleteProject } = useProjectsContext();\n  const [isEditing, setIsEditing] = useState(false);\n  const [popoverOpen, setPopoverOpen] = useState(false);\n  const [isHoveringIcon, setIsHoveringIcon] = useState(false);\n  const [allowHoverEffect, setAllowHoverEffect] = useState(true);\n  const activeSidebar = useAppFocus();\n\n  // Make project droppable\n  const dropId = `project-${project.id}`;\n  const { setNodeRef, isOver } = useDroppable({\n    id: dropId,\n    data: {\n      type: DRAG_TYPES.PROJECT,\n      project,\n    },\n  });\n\n  function getFolderIcon(): React.FunctionComponent<IconProps> {\n    if (open) {\n      return SvgFolderOpen;\n    } else {\n      return isHoveringIcon && allowHoverEffect\n        ? SvgFolderPartialOpen\n        : SvgFolder;\n    }\n  }\n\n  function handleIconClick() {\n    setOpen((prev) => !prev);\n    setAllowHoverEffect(false);\n  }\n\n  function handleIconHover(hovering: boolean) {\n    setIsHoveringIcon(hovering);\n    // Re-enable hover effects when cursor leaves the icon\n    if (!hovering) {\n      setAllowHoverEffect(true);\n    }\n  }\n\n  function handleTextClick() {\n    route({ projectId: project.id });\n  }\n\n  async function handleRename(newName: string) {\n    await renameProject(project.id, newName);\n  }\n\n  const popoverItems = [\n    <LineItem\n      key=\"rename-project\"\n      icon={SvgEdit}\n      onClick={noProp(() => setIsEditing(true))}\n    >\n      Rename Project\n    </LineItem>,\n    null,\n    <LineItem\n      key=\"delete-project\"\n      icon={SvgTrash}\n      onClick={noProp(() => setDeleteConfirmationModalOpen(true))}\n      danger\n    >\n      Delete Project\n    </LineItem>,\n  ];\n\n  return (\n    <div\n      ref={setNodeRef}\n      className={cn(\n        \"transition-colors duration-200\",\n        isOver && \"bg-background-tint-03 rounded-08\"\n      )}\n    >\n      {/* Confirmation Modal (only for deletion) */}\n      {deleteConfirmationModalOpen && (\n        <ConfirmationModalLayout\n          title=\"Delete Project\"\n          icon={SvgTrash}\n          onClose={() => setDeleteConfirmationModalOpen(false)}\n          submit={\n            <Button\n              variant=\"danger\"\n              onClick={() => {\n                setDeleteConfirmationModalOpen(false);\n                deleteProject(project.id);\n              }}\n            >\n              Delete\n            </Button>\n          }\n        >\n          Are you sure you want to delete this project? This action cannot be\n          undone.\n        </ConfirmationModalLayout>\n      )}\n\n      {/* Project Folder */}\n      <Popover onOpenChange={setPopoverOpen}>\n        <Popover.Anchor>\n          <SidebarTab\n            icon={() => (\n              <Button\n                onMouseEnter={() => handleIconHover(true)}\n                onMouseLeave={() => handleIconHover(false)}\n                icon={getFolderIcon()}\n                prominence=\"tertiary\"\n                size=\"sm\"\n                onClick={noProp(handleIconClick)}\n              />\n            )}\n            selected={\n              activeSidebar.isProject() &&\n              activeSidebar.getId() === String(project.id)\n            }\n            onClick={noProp(handleTextClick)}\n            rightChildren={\n              <>\n                <Popover.Trigger asChild onClick={noProp()}>\n                  <div>\n                    {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}\n                    <IconButton\n                      icon={SvgMoreHorizontal}\n                      className={cn(\n                        !popoverOpen && \"hidden\",\n                        !isEditing && \"group-hover/SidebarTab:flex\"\n                      )}\n                      transient={popoverOpen}\n                      internal\n                    />\n                  </div>\n                </Popover.Trigger>\n\n                <Popover.Content side=\"right\" align=\"end\" width=\"md\">\n                  <PopoverMenu>{popoverItems}</PopoverMenu>\n                </Popover.Content>\n              </>\n            }\n          >\n            {isEditing ? (\n              <ButtonRenaming\n                initialName={project.name}\n                onRename={handleRename}\n                onClose={() => setIsEditing(false)}\n              />\n            ) : (\n              <Truncated text03>{project.name}</Truncated>\n            )}\n          </SidebarTab>\n        </Popover.Anchor>\n      </Popover>\n\n      {/* Project Chat-Sessions */}\n      {open &&\n        project.chat_sessions.map((chatSession) => (\n          <ChatButton\n            key={chatSession.id}\n            chatSession={chatSession}\n            project={project}\n            draggable\n          />\n        ))}\n    </div>\n  );\n});\nProjectFolderButton.displayName = \"ProjectFolderButton\";\n\nexport default ProjectFolderButton;\n"
  },
  {
    "path": "web/src/sections/sidebar/SidebarBody.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport OverflowDiv from \"@/refresh-components/OverflowDiv\";\n\nexport interface SidebarBodyProps {\n  pinnedContent?: React.ReactNode;\n  children?: React.ReactNode;\n  footer?: React.ReactNode;\n  /**\n   * Unique key to enable scroll position persistence across navigation.\n   * Pass this through from parent sidebar components (e.g., \"admin-sidebar\", \"app-sidebar\").\n   */\n  scrollKey: string;\n}\n\nexport default function SidebarBody({\n  pinnedContent,\n  children,\n  footer,\n  scrollKey,\n}: SidebarBodyProps) {\n  return (\n    <div className=\"flex flex-col min-h-0 h-full gap-3\">\n      {pinnedContent && <div className=\"px-2\">{pinnedContent}</div>}\n      <OverflowDiv className=\"gap-3 px-2\" scrollKey={scrollKey}>\n        {children}\n      </OverflowDiv>\n      {footer && <div className=\"px-2\">{footer}</div>}\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/SidebarSection.tsx",
    "content": "\"use client\";\n\nimport React from \"react\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport { cn } from \"@/lib/utils\";\n\nexport interface SidebarSectionProps {\n  title: string;\n  children?: React.ReactNode;\n  action?: React.ReactNode;\n  className?: string;\n}\n\nexport default function SidebarSection({\n  title,\n  children,\n  action,\n  className,\n}: SidebarSectionProps) {\n  return (\n    <div className={cn(\"flex flex-col group/SidebarSection\", className)}>\n      <div className=\"pl-2 pr-1.5 py-1 sticky top-[0rem] bg-background-tint-02 z-10 flex flex-row items-center justify-between min-h-[2rem]\">\n        <Text as=\"p\" secondaryBody text02>\n          {title}\n        </Text>\n        {action && (\n          <div className=\"flex-shrink-0 opacity-0 group-hover/SidebarSection:opacity-100 transition-opacity\">\n            {action}\n          </div>\n        )}\n      </div>\n      <div>{children}</div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/SidebarWrapper.tsx",
    "content": "import React, { useMemo } from \"react\";\nimport { cn } from \"@/lib/utils\";\nimport { Button } from \"@opal/components\";\nimport Logo from \"@/refresh-components/Logo\";\nimport { SvgSidebar } from \"@opal/icons\";\nimport { useSettingsContext } from \"@/providers/SettingsProvider\";\n\ninterface LogoSectionProps {\n  folded?: boolean;\n  onFoldClick?: () => void;\n}\n\nfunction LogoSection({ folded, onFoldClick }: LogoSectionProps) {\n  const settings = useSettingsContext();\n  const logoDisplayStyle = settings.enterpriseSettings?.logo_display_style;\n\n  const logo = useMemo(\n    () => (\n      <div className=\"px-1\">\n        <Logo folded={folded} size={28} />\n      </div>\n    ),\n    [folded]\n  );\n  const closeButton = useMemo(\n    () => (\n      <div className=\"px-1\">\n        <Button\n          icon={SvgSidebar}\n          prominence=\"tertiary\"\n          tooltip=\"Close Sidebar\"\n          size=\"md\"\n          onClick={onFoldClick}\n        />\n      </div>\n    ),\n    [onFoldClick]\n  );\n\n  return (\n    <div className=\"flex flex-row justify-between items-start pt-3 px-2\">\n      {folded === undefined ? (\n        logo\n      ) : folded && logoDisplayStyle !== \"name_only\" ? (\n        <>\n          <div className=\"group-hover/SidebarWrapper:hidden\">{logo}</div>\n          <div className=\"hidden group-hover/SidebarWrapper:flex\">\n            {closeButton}\n          </div>\n        </>\n      ) : folded ? (\n        closeButton\n      ) : (\n        <>\n          {logo}\n          {closeButton}\n        </>\n      )}\n    </div>\n  );\n}\n\nexport interface SidebarWrapperProps {\n  folded?: boolean;\n  onFoldClick?: () => void;\n  children?: React.ReactNode;\n}\n\nexport default function SidebarWrapper({\n  folded,\n  onFoldClick,\n  children,\n}: SidebarWrapperProps) {\n  return (\n    // This extra `div` wrapping needs to be present (for some reason).\n    // Without, the widths of the sidebars don't properly get set to the explicitly declared widths (i.e., `4rem` folded and `15rem` unfolded).\n    <div>\n      <div\n        className={cn(\n          \"h-screen flex flex-col bg-background-tint-02 py-2 gap-4 group/SidebarWrapper transition-width duration-200 ease-in-out\",\n          folded ? \"w-[3.25rem]\" : \"w-[15rem]\"\n        )}\n      >\n        <LogoSection folded={folded} onFoldClick={onFoldClick} />\n        {children}\n      </div>\n    </div>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/StepSidebarWrapper.tsx",
    "content": "import { ReactNode } from \"react\";\nimport type { IconProps } from \"@opal/types\";\nimport { SidebarTab } from \"@opal/components\";\nimport SidebarWrapper from \"@/sections/sidebar/SidebarWrapper\";\n\nexport interface StepSidebarProps {\n  children: ReactNode;\n  buttonName: string;\n  buttonIcon: React.FunctionComponent<IconProps>;\n  buttonHref: string;\n}\n\nexport default function StepSidebar({\n  children,\n  buttonName,\n  buttonIcon,\n  buttonHref,\n}: StepSidebarProps) {\n  return (\n    <SidebarWrapper>\n      <div className=\"px-2\">\n        <SidebarTab icon={buttonIcon} href={buttonHref}>\n          {buttonName}\n        </SidebarTab>\n      </div>\n\n      <div className=\"h-full w-full px-4\">{children}</div>\n    </SidebarWrapper>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/UpsertEmbeddingSidebar.tsx",
    "content": "import { useEmbeddingFormContext } from \"@/components/context/EmbeddingContext\";\nimport Text from \"@/refresh-components/texts/Text\";\nimport StepSidebar from \"@/sections/sidebar/StepSidebarWrapper\";\nimport { SvgSettings } from \"@opal/icons\";\nexport default function EmbeddingSidebar() {\n  const { formStep, setFormStep } = useEmbeddingFormContext();\n\n  const settingSteps = [\"Embedding Model\", \"Reranking Model\", \"Advanced\"];\n\n  return (\n    <StepSidebar\n      buttonName=\"Index Settings\"\n      buttonIcon={SvgSettings}\n      buttonHref=\"/admin/configuration/search\"\n    >\n      <div className=\"relative\">\n        <div className=\"absolute h-[85%] left-[6px] top-[8px] bottom-0 w-0.5 bg-background-tint-04\"></div>\n        {settingSteps.map((step, index) => {\n          const allowed = true; // All steps are always allowed for embedding configuration\n\n          return (\n            <div\n              key={index}\n              className={`flex items-center mb-6 relative ${\n                !allowed ? \"cursor-not-allowed\" : \"cursor-pointer\"\n              }`}\n              onClick={() => {\n                if (allowed) {\n                  setFormStep(index);\n                }\n              }}\n            >\n              <div className=\"flex-shrink-0 mr-4 z-10\">\n                <div\n                  className={`rounded-full h-3.5 w-3.5 flex items-center justify-center ${\n                    allowed ? \"bg-blue-500\" : \"bg-background-tint-04\"\n                  }`}\n                >\n                  {formStep === index && (\n                    <div className=\"h-2 w-2 rounded-full bg-white\"></div>\n                  )}\n                </div>\n              </div>\n              <Text as=\"p\" text04={index <= formStep} text02={index > formStep}>\n                {step}\n              </Text>\n            </div>\n          );\n        })}\n      </div>\n    </StepSidebar>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/UserAvatarPopover.tsx",
    "content": "\"use client\";\n\nimport { useState } from \"react\";\nimport { LOGOUT_DISABLED } from \"@/lib/constants\";\nimport { Notification } from \"@/interfaces/settings\";\nimport useSWR, { preload } from \"swr\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { SWR_KEYS } from \"@/lib/swr-keys\";\nimport { checkUserIsNoAuthUser, getUserDisplayName, logout } from \"@/lib/user\";\nimport { useUser } from \"@/providers/UserProvider\";\nimport LineItem from \"@/refresh-components/buttons/LineItem\";\nimport Popover, { PopoverMenu } from \"@/refresh-components/Popover\";\nimport { usePathname, useRouter, useSearchParams } from \"next/navigation\";\nimport { SidebarTab } from \"@opal/components\";\nimport NotificationsPopover from \"@/sections/sidebar/NotificationsPopover\";\nimport {\n  SvgBell,\n  SvgExternalLink,\n  SvgLogOut,\n  SvgUser,\n  SvgNotificationBubble,\n} from \"@opal/icons\";\nimport { Section } from \"@/layouts/general-layouts\";\nimport { toast } from \"@/hooks/useToast\";\nimport useAppFocus from \"@/hooks/useAppFocus\";\nimport { useVectorDbEnabled } from \"@/providers/SettingsProvider\";\nimport UserAvatar from \"@/refresh-components/avatars/UserAvatar\";\n\ninterface SettingsPopoverProps {\n  onUserSettingsClick: () => void;\n  onOpenNotifications: () => void;\n}\n\nfunction SettingsPopover({\n  onUserSettingsClick,\n  onOpenNotifications,\n}: SettingsPopoverProps) {\n  const { user } = useUser();\n  const { data: notifications } = useSWR<Notification[]>(\n    SWR_KEYS.notifications,\n    errorHandlingFetcher,\n    { revalidateOnFocus: false }\n  );\n  const router = useRouter();\n  const pathname = usePathname();\n  const searchParams = useSearchParams();\n  const undismissedCount =\n    notifications?.filter((n) => !n.dismissed).length ?? 0;\n  const isAnonymousUser =\n    user?.is_anonymous_user || checkUserIsNoAuthUser(user?.id ?? \"\");\n  const showLogout = user && !isAnonymousUser && !LOGOUT_DISABLED;\n  const showLogin = isAnonymousUser;\n\n  const handleLogin = () => {\n    const currentUrl = `${pathname}${\n      searchParams?.toString() ? `?${searchParams.toString()}` : \"\"\n    }`;\n    const encodedRedirect = encodeURIComponent(currentUrl);\n    router.push(`/auth/login?next=${encodedRedirect}`);\n  };\n\n  const handleLogout = () => {\n    logout()\n      .then((response) => {\n        if (!response?.ok) {\n          alert(\"Failed to logout\");\n          return;\n        }\n\n        const currentUrl = `${pathname}${\n          searchParams?.toString() ? `?${searchParams.toString()}` : \"\"\n        }`;\n\n        const encodedRedirect = encodeURIComponent(currentUrl);\n\n        router.push(\n          `/auth/login?disableAutoRedirect=true&next=${encodedRedirect}`\n        );\n      })\n\n      .catch(() => {\n        toast.error(\"Failed to logout\");\n      });\n  };\n\n  return (\n    <>\n      <PopoverMenu>\n        {[\n          <div key=\"user-settings\" data-testid=\"Settings/user-settings\">\n            <LineItem\n              icon={SvgUser}\n              href=\"/app/settings\"\n              onClick={onUserSettingsClick}\n            >\n              User Settings\n            </LineItem>\n          </div>,\n          <LineItem\n            key=\"notifications\"\n            icon={SvgBell}\n            onClick={onOpenNotifications}\n          >\n            {`Notifications${\n              undismissedCount > 0 ? ` (${undismissedCount})` : \"\"\n            }`}\n          </LineItem>,\n          <LineItem\n            key=\"help-faq\"\n            icon={SvgExternalLink}\n            href=\"https://docs.onyx.app\"\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n          >\n            Help & FAQ\n          </LineItem>,\n          null,\n          showLogin && (\n            <LineItem key=\"log-in\" icon={SvgUser} onClick={handleLogin}>\n              Log in\n            </LineItem>\n          ),\n          showLogout && (\n            <LineItem\n              key=\"log-out\"\n              icon={SvgLogOut}\n              danger\n              onClick={handleLogout}\n            >\n              Log out\n            </LineItem>\n          ),\n        ]}\n      </PopoverMenu>\n    </>\n  );\n}\n\nexport interface SettingsProps {\n  folded?: boolean;\n  onShowBuildIntro?: () => void;\n}\n\nexport default function UserAvatarPopover({\n  folded,\n  onShowBuildIntro,\n}: SettingsProps) {\n  const [popupState, setPopupState] = useState<\n    \"Settings\" | \"Notifications\" | undefined\n  >(undefined);\n  const { user } = useUser();\n  const appFocus = useAppFocus();\n  const vectorDbEnabled = useVectorDbEnabled();\n\n  // Fetch notifications for display\n  // The GET endpoint also triggers a refresh if release notes are stale\n  const { data: notifications } = useSWR<Notification[]>(\n    SWR_KEYS.notifications,\n    errorHandlingFetcher\n  );\n\n  const userDisplayName = getUserDisplayName(user);\n  const undismissedCount =\n    notifications?.filter((n) => !n.dismissed).length ?? 0;\n  const hasNotifications = undismissedCount > 0;\n\n  const handlePopoverOpen = (state: boolean) => {\n    if (state) {\n      // Prefetch user settings data when popover opens for instant modal display\n      preload(\"/api/user/pats\", errorHandlingFetcher);\n      preload(\"/api/federated/oauth-status\", errorHandlingFetcher);\n      if (vectorDbEnabled) {\n        preload(\"/api/manage/connector-status\", errorHandlingFetcher);\n      }\n      preload(\"/api/llm/provider\", errorHandlingFetcher);\n      setPopupState(\"Settings\");\n    } else {\n      setPopupState(undefined);\n    }\n  };\n\n  return (\n    <Popover open={!!popupState} onOpenChange={handlePopoverOpen}>\n      <Popover.Trigger asChild>\n        <div id=\"onyx-user-dropdown\">\n          <SidebarTab\n            icon={() => (\n              <div className=\"w-[16px] flex flex-col justify-center items-center\">\n                <UserAvatar user={user} size={18} />\n              </div>\n            )}\n            rightChildren={\n              hasNotifications ? (\n                <Section padding={0.5}>\n                  <SvgNotificationBubble size={6} />\n                </Section>\n              ) : undefined\n            }\n            type=\"button\"\n            selected={!!popupState || appFocus.isUserSettings()}\n            folded={folded}\n          >\n            {userDisplayName}\n          </SidebarTab>\n        </div>\n      </Popover.Trigger>\n\n      <Popover.Content\n        align=\"end\"\n        side=\"right\"\n        width={popupState === \"Notifications\" ? \"xl\" : \"md\"}\n      >\n        {popupState === \"Settings\" && (\n          <SettingsPopover\n            onUserSettingsClick={() => {\n              setPopupState(undefined);\n            }}\n            onOpenNotifications={() => setPopupState(\"Notifications\")}\n          />\n        )}\n        {popupState === \"Notifications\" && (\n          <NotificationsPopover\n            onClose={() => setPopupState(\"Settings\")}\n            onNavigate={() => setPopupState(undefined)}\n            onShowBuildIntro={onShowBuildIntro}\n          />\n        )}\n      </Popover.Content>\n    </Popover>\n  );\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/chatSearchUtils.ts",
    "content": "import React from \"react\";\n\n/**\n * Escapes special regex characters in a string\n */\nfunction escapeRegex(str: string): string {\n  return str.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n}\n\n/**\n * Highlights matched portions of text based on search query.\n * Matched portions get text-05 (highlighted), non-matched stay as default.\n */\nexport function highlightMatch(text: string, query: string): React.ReactNode {\n  if (!query.trim()) return text;\n\n  const escapedQuery = escapeRegex(query.trim());\n  const regex = new RegExp(`(${escapedQuery})`, \"gi\");\n  const parts = text.split(regex);\n\n  if (parts.length === 1) return text; // No matches\n\n  return parts.map((part, i) =>\n    i % 2 === 1\n      ? React.createElement(\"span\", { key: i, className: \"text-text-05\" }, part)\n      : React.createElement(React.Fragment, { key: i }, part)\n  );\n}\n\n/**\n * Formats a date string for display in the chat search menu.\n * Examples: \"just now\", \"5 mins ago\", \"3 hours ago\", \"yesterday\", \"3 days ago\", \"October 23\"\n */\nexport function formatDisplayTime(isoDate: string): string {\n  const date = new Date(isoDate);\n  const now = new Date();\n  const diffMs = now.getTime() - date.getTime();\n\n  if (diffMs < 0) {\n    return \"just now\";\n  }\n\n  const diffMins = Math.floor(diffMs / (1000 * 60));\n  const diffHours = Math.floor(diffMs / (1000 * 60 * 60));\n  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));\n\n  // Just now (less than 1 minute)\n  if (diffMins < 1) {\n    return \"just now\";\n  }\n\n  // X mins ago (1-59 minutes)\n  if (diffMins < 60) {\n    return `${diffMins} ${diffMins === 1 ? \"min\" : \"mins\"} ago`;\n  }\n\n  // X hours ago (1-23 hours)\n  if (diffHours < 24) {\n    return `${diffHours} ${diffHours === 1 ? \"hour\" : \"hours\"} ago`;\n  }\n\n  // Check if yesterday\n  const yesterday = new Date(now);\n  yesterday.setDate(yesterday.getDate() - 1);\n  if (\n    date.getDate() === yesterday.getDate() &&\n    date.getMonth() === yesterday.getMonth() &&\n    date.getFullYear() === yesterday.getFullYear()\n  ) {\n    return \"yesterday\";\n  }\n\n  // X days ago (2-7 days)\n  if (diffDays <= 7) {\n    return `${diffDays} ${diffDays === 1 ? \"day\" : \"days\"} ago`;\n  }\n\n  // Month Day format (e.g., \"October 23\")\n  return date.toLocaleDateString(\"en-US\", {\n    month: \"long\",\n    day: \"numeric\",\n  });\n}\n"
  },
  {
    "path": "web/src/sections/sidebar/constants.ts",
    "content": "export const DRAG_TYPES = {\n  CHAT: \"chat\",\n  PROJECT: \"project\",\n  RECENTS: \"recents\",\n} as const;\n\nexport const LOCAL_STORAGE_KEYS = {\n  HIDE_MOVE_CUSTOM_AGENT_MODAL: \"onyx:hideMoveCustomAgentModal\",\n} as const;\n\nexport const DEFAULT_PERSONA_ID = 0;\n\nexport const FEATURE_FLAGS = {\n  CRAFT_ANIMATION_DISABLED: \"craft-animation-disabled\",\n} as const;\n"
  },
  {
    "path": "web/src/sections/sidebar/sidebarUtils.ts",
    "content": "import { ChatSession } from \"@/app/app/interfaces\";\nimport { LOCAL_STORAGE_KEYS, DEFAULT_PERSONA_ID } from \"./constants\";\nimport { moveChatSession } from \"@/app/app/projects/projectsService\";\nimport { toast } from \"@/hooks/useToast\";\n\nexport const shouldShowMoveModal = (chatSession: ChatSession): boolean => {\n  const hideModal =\n    typeof window !== \"undefined\" &&\n    window.localStorage.getItem(\n      LOCAL_STORAGE_KEYS.HIDE_MOVE_CUSTOM_AGENT_MODAL\n    ) === \"true\";\n\n  return !hideModal && chatSession.persona_id !== DEFAULT_PERSONA_ID;\n};\n\nexport const showErrorNotification = (message: string) => {\n  toast.error(message);\n};\n\nexport interface MoveOperationParams {\n  chatSession: ChatSession;\n  targetProjectId: number;\n  refreshChatSessions: () => Promise<any>;\n  refreshCurrentProjectDetails: () => Promise<any>;\n  fetchProjects: () => Promise<any>;\n  currentProjectId: number | null;\n}\n\nexport const handleMoveOperation = async ({\n  chatSession,\n  targetProjectId,\n  refreshChatSessions,\n  refreshCurrentProjectDetails,\n  fetchProjects,\n  currentProjectId,\n}: MoveOperationParams) => {\n  try {\n    await moveChatSession(targetProjectId, chatSession.id);\n    const projectRefreshPromise = currentProjectId\n      ? refreshCurrentProjectDetails()\n      : fetchProjects();\n    await Promise.all([refreshChatSessions(), projectRefreshPromise]);\n  } catch (error) {\n    console.error(\"Failed to perform move operation:\", error);\n    toast.error(\"Failed to move chat. Please try again.\");\n    throw error;\n  }\n};\n"
  },
  {
    "path": "web/src/sections/sidebar/useChatSearchOptimistic.ts",
    "content": "import { useState, useEffect, useCallback, useMemo, useRef } from \"react\";\nimport useSWRInfinite from \"swr/infinite\";\nimport useChatSessions from \"@/hooks/useChatSessions\";\nimport { useProjects } from \"@/lib/hooks/useProjects\";\nimport { errorHandlingFetcher } from \"@/lib/fetcher\";\nimport { ChatSearchResponse } from \"@/app/app/interfaces\";\nimport { UNNAMED_CHAT } from \"@/lib/constants\";\n\nexport interface FilterableChat {\n  id: string;\n  label: string;\n  time: string;\n}\n\ninterface UseChatSearchOptimisticOptions {\n  searchQuery: string;\n  enabled?: boolean;\n}\n\ninterface UseChatSearchOptimisticResult {\n  results: FilterableChat[];\n  isSearching: boolean;\n  hasMore: boolean;\n  fetchMore: () => Promise<void>;\n  isLoadingMore: boolean;\n  sentinelRef: React.RefObject<HTMLDivElement | null>;\n}\n\nconst PAGE_SIZE = 20;\nconst DEBOUNCE_MS = 300;\n\n// --- Helper Functions ---\n\nfunction transformApiResponse(response: ChatSearchResponse): FilterableChat[] {\n  const chats: FilterableChat[] = [];\n  for (const group of response.groups) {\n    for (const chat of group.chats) {\n      chats.push({\n        id: chat.id,\n        label: chat.name || UNNAMED_CHAT,\n        time: chat.time_created,\n      });\n    }\n  }\n  return chats;\n}\n\nfunction filterLocalSessions(\n  sessions: FilterableChat[],\n  searchQuery: string\n): FilterableChat[] {\n  if (!searchQuery.trim()) {\n    return sessions;\n  }\n  const term = searchQuery.toLowerCase();\n  return sessions.filter((chat) => chat.label.toLowerCase().includes(term));\n}\n\n// --- Hook ---\n\nexport function useChatSearchOptimistic(\n  options: UseChatSearchOptimisticOptions\n): UseChatSearchOptimisticResult {\n  const { searchQuery, enabled = true } = options;\n\n  // Debounced search query for API calls\n  const [debouncedQuery, setDebouncedQuery] = useState(searchQuery);\n\n  // Ref for infinite scroll sentinel\n  const sentinelRef = useRef<HTMLDivElement | null>(null);\n\n  // 1. Get already-cached data from existing hooks\n  const { chatSessions } = useChatSessions();\n  const { projects } = useProjects();\n\n  // 2. Build combined fallback data (instant display)\n  const fallbackSessions = useMemo<FilterableChat[]>(() => {\n    const chatMap = new Map<string, FilterableChat>();\n\n    // Add regular chats from useChatSessions\n    for (const chat of chatSessions) {\n      chatMap.set(chat.id, {\n        id: chat.id,\n        label: chat.name || UNNAMED_CHAT,\n        time: chat.time_updated || chat.time_created,\n      });\n    }\n\n    // Add project chats from useProjects\n    for (const project of projects) {\n      for (const chat of project.chat_sessions) {\n        chatMap.set(chat.id, {\n          id: chat.id,\n          label: chat.name || UNNAMED_CHAT,\n          time: chat.time_updated || chat.time_created,\n        });\n      }\n    }\n\n    // Sort by most recent\n    return Array.from(chatMap.values()).sort(\n      (a, b) => new Date(b.time).getTime() - new Date(a.time).getTime()\n    );\n  }, [chatSessions, projects]);\n\n  // Debounce the search query\n  useEffect(() => {\n    const timer = setTimeout(() => setDebouncedQuery(searchQuery), DEBOUNCE_MS);\n    return () => clearTimeout(timer);\n  }, [searchQuery]);\n\n  // 3. SWR key generator for infinite scroll\n  const getKey = useCallback(\n    (pageIndex: number, previousPageData: ChatSearchResponse | null) => {\n      // Don't fetch if not enabled\n      if (!enabled) return null;\n\n      // Reached the end\n      if (previousPageData && !previousPageData.has_more) return null;\n\n      const page = pageIndex + 1;\n      const params = new URLSearchParams();\n      params.set(\"page\", page.toString());\n      params.set(\"page_size\", PAGE_SIZE.toString());\n\n      if (debouncedQuery.trim()) {\n        params.set(\"query\", debouncedQuery);\n      }\n\n      return `/api/chat/search?${params.toString()}`;\n    },\n    [enabled, debouncedQuery]\n  );\n\n  // 4. Use SWR for paginated data (replaces fallback after fetch)\n  const { data, size, setSize, isValidating } =\n    useSWRInfinite<ChatSearchResponse>(getKey, errorHandlingFetcher, {\n      revalidateOnFocus: false,\n      dedupingInterval: 30000,\n      revalidateFirstPage: false,\n      persistSize: true,\n    });\n\n  // Transform SWR data to FilterableChat[]\n  const swrResults = useMemo<FilterableChat[]>(() => {\n    if (!data || data.length === 0) return [];\n\n    const allChats: FilterableChat[] = [];\n    for (const page of data) {\n      allChats.push(...transformApiResponse(page));\n    }\n\n    // Deduplicate by id (keep first occurrence)\n    const seen = new Set<string>();\n    return allChats.filter((chat) => {\n      if (seen.has(chat.id)) return false;\n      seen.add(chat.id);\n      return true;\n    });\n  }, [data]);\n\n  // Determine if we have more pages\n  const hasMore = useMemo(() => {\n    if (!data || data.length === 0) return true;\n    const lastPage = data[data.length - 1];\n    return lastPage?.has_more ?? false;\n  }, [data]);\n\n  // 5. Return fallback if no SWR data yet, otherwise return SWR data\n  const results = useMemo<FilterableChat[]>(() => {\n    // If SWR has data, use it (paginated, searchable)\n    if (swrResults.length > 0) {\n      return swrResults;\n    }\n\n    // Otherwise use fallback (already-cached data)\n    // Apply local filtering if there's a search query\n    if (searchQuery.trim()) {\n      return filterLocalSessions(fallbackSessions, searchQuery);\n    }\n\n    return fallbackSessions;\n  }, [swrResults, fallbackSessions, searchQuery]);\n\n  // Loading states\n  const isSearching = isValidating && size === 1;\n  const isLoadingMore = isValidating && size > 1;\n\n  // Fetch more results for infinite scroll\n  const fetchMore = useCallback(async () => {\n    if (!enabled || isValidating || !hasMore) {\n      return;\n    }\n    await setSize(size + 1);\n  }, [enabled, isValidating, hasMore, setSize, size]);\n\n  // IntersectionObserver for infinite scroll\n  useEffect(() => {\n    const sentinel = sentinelRef.current;\n    if (!sentinel || !enabled) return;\n\n    const observer = new IntersectionObserver(\n      (entries) => {\n        const entry = entries[0];\n        if (entry?.isIntersecting && hasMore && !isValidating) {\n          fetchMore();\n        }\n      },\n      {\n        root: null,\n        rootMargin: \"100px\",\n        threshold: 0,\n      }\n    );\n\n    observer.observe(sentinel);\n\n    return () => {\n      observer.disconnect();\n    };\n  }, [enabled, hasMore, isValidating, fetchMore]);\n\n  return {\n    results,\n    isSearching,\n    hasMore,\n    fetchMore,\n    isLoadingMore,\n    sentinelRef,\n  };\n}\n"
  },
  {
    "path": "web/src/types.ts",
    "content": "/**\n * Utility type that removes style override properties from a component's props.\n *\n * This type omits `className` and `style` properties from type `T`, preventing\n * external style customization. Useful for enforcing consistent design system\n * styling and preventing arbitrary style overrides.\n *\n * @template T - The base type to remove style properties from\n *\n * @example\n * ```tsx\n * // Create a button that doesn't allow style overrides\n * interface ButtonProps extends WithoutStyles<React.ComponentProps<\"button\">> {\n *   variant: \"primary\" | \"secondary\";\n * }\n *\n * function Button({ variant, ...props }: ButtonProps) {\n *   // Users cannot pass className or style props\n *   return <button {...props} className={getVariantClass(variant)} />;\n * }\n *\n * // ✅ Valid\n * <Button variant=\"primary\" onClick={handleClick} />\n *\n * // ❌ TypeScript error - className not allowed\n * <Button variant=\"primary\" className=\"custom-class\" />\n * ```\n */\nexport type WithoutStyles<T> = Omit<T, \"className\" | \"style\">;\n"
  },
  {
    "path": "web/tailwind-themes/tailwind.config.js",
    "content": "const plugin = require(\"tailwindcss/plugin\");\n\n/** @type {import('tailwindcss').Config} */\n\nmodule.exports = {\n  darkMode: \"class\",\n  content: [\"./src/**/*.{js,jsx,ts,tsx}\", \"./lib/opal/**/*.{js,jsx,ts,tsx}\"],\n  theme: {\n    container: {\n      center: true,\n    },\n    transparent: \"transparent\",\n    current: \"currentColor\",\n    extend: {\n      lineClamp: {\n        7: \"7\",\n        8: \"8\",\n        9: \"9\",\n        10: \"10\",\n      },\n      transitionProperty: {\n        spacing: \"margin, padding\",\n      },\n      keyframes: {\n        shimmer: {\n          \"0%\": { backgroundPosition: \"100% 0\" },\n          \"100%\": { backgroundPosition: \"-100% 0\" },\n        },\n        \"subtle-pulse\": {\n          \"0%, 100%\": { opacity: 0.9 },\n          \"50%\": { opacity: 0.5 },\n        },\n        pulse: {\n          \"0%, 100%\": { opacity: 0.9 },\n          \"50%\": { opacity: 0.4 },\n        },\n        \"fade-in-scale\": {\n          \"0%\": { opacity: \"0\", transform: \"scale(0.95)\" },\n          \"100%\": { opacity: \"1\", transform: \"scale(1)\" },\n        },\n        \"fade-out-scale\": {\n          \"0%\": { opacity: \"1\", transform: \"scale(1)\" },\n          \"100%\": { opacity: \"0\", transform: \"scale(0.95)\" },\n        },\n        \"collapsible-down\": {\n          from: { height: \"0\" },\n          to: { height: \"var(--radix-collapsible-content-height)\" },\n        },\n        \"collapsible-up\": {\n          from: { height: \"var(--radix-collapsible-content-height)\" },\n          to: { height: \"0\" },\n        },\n      },\n      animation: {\n        shimmer: \"shimmer 1.8s ease-out infinite\",\n        \"fade-in-up\": \"fadeInUp 0.5s ease-out\",\n        \"subtle-pulse\": \"subtle-pulse 2s ease-in-out infinite\",\n        pulse: \"pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite\",\n        \"fade-in-scale\": \"fade-in-scale 0.2s ease-out forwards\",\n        \"fade-out-scale\": \"fade-out-scale 0.2s ease-in forwards\",\n        \"collapsible-down\": \"collapsible-down 0.2s ease-out\",\n        \"collapsible-up\": \"collapsible-up 0.2s ease-out\",\n      },\n      gradientColorStops: {\n        \"neutral-10\": \"var(--neutral-10) 5%\",\n      },\n      screens: {\n        \"2xl\": \"1420px\",\n        \"3xl\": \"1700px\",\n        \"4xl\": \"2000px\",\n        mobile: { max: \"767px\" },\n        desktop: \"768px\",\n        tall: { raw: \"(min-height: 800px)\" },\n        short: { raw: \"(max-height: 799px)\" },\n        \"very-short\": { raw: \"(max-height: 600px)\" },\n      },\n      fontFamily: {\n        sans: [\"Hanken Grotesk\", \"sans-serif\"],\n        hanken: [\"Hanken Grotesk\", \"sans-serif\"],\n      },\n      width: {\n        \"message-xs\": \"450px\",\n        \"message-sm\": \"550px\",\n        \"message-default\": \"740px\",\n        \"searchbar-xs\": \"560px\",\n        \"searchbar-sm\": \"660px\",\n        searchbar: \"850px\",\n        \"document-sidebar\": \"800px\",\n        \"document-sidebar-large\": \"1000px\",\n        \"searchbar-max\": \"60px\",\n      },\n      maxWidth: {\n        \"document-sidebar\": \"1000px\",\n        \"message-max\": \"850px\",\n        \"content-max\": \"725px\",\n        \"searchbar-max\": \"800px\",\n      },\n      colors: {\n        // New and updated Figma stylings\n        \"text-05\": \"var(--text-05)\",\n        \"text-04\": \"var(--text-04)\",\n        \"text-03\": \"var(--text-03)\",\n        \"text-02\": \"var(--text-02)\",\n        \"text-01\": \"var(--text-01)\",\n        \"text-inverted-01\": \"var(--text-inverted-01)\",\n        \"text-inverted-02\": \"var(--text-inverted-02)\",\n        \"text-inverted-03\": \"var(--text-inverted-03)\",\n        \"text-inverted-04\": \"var(--text-inverted-04)\",\n        \"text-inverted-05\": \"var(--text-inverted-05)\",\n        \"text-light-03\": \"var(--text-light-03)\",\n        \"text-light-05\": \"var(--text-light-05)\",\n        \"text-dark-03\": \"var(--text-dark-03)\",\n        \"text-dark-05\": \"var(--text-dark-05)\",\n        \"background-neutral-00\": \"var(--background-neutral-00)\",\n        \"background-neutral-01\": \"var(--background-neutral-01)\",\n        \"background-neutral-02\": \"var(--background-neutral-02)\",\n        \"background-neutral-03\": \"var(--background-neutral-03)\",\n        \"background-neutral-04\": \"var(--background-neutral-04)\",\n        \"background-neutral-light-00\": \"var(--background-neutral-light-00)\",\n        \"background-neutral-light-03\": \"var(--background-neutral-light-03)\",\n        \"background-neutral-dark-03\": \"var(--background-neutral-dark-03)\",\n        \"background-neutral-inverted-04\":\n          \"var(--background-neutral-inverted-04)\",\n        \"background-neutral-inverted-03\":\n          \"var(--background-neutral-inverted-03)\",\n        \"background-neutral-inverted-02\":\n          \"var(--background-neutral-inverted-02)\",\n        \"background-neutral-inverted-01\":\n          \"var(--background-neutral-inverted-01)\",\n        \"background-neutral-inverted-00\":\n          \"var(--background-neutral-inverted-00)\",\n        \"background-tint-00\": \"var(--background-tint-00)\",\n        \"background-tint-01\": \"var(--background-tint-01)\",\n        \"background-tint-02\": \"var(--background-tint-02)\",\n        \"background-tint-03\": \"var(--background-tint-03)\",\n        \"background-tint-04\": \"var(--background-tint-04)\",\n        \"background-tint-inverted-04\": \"var(--background-tint-inverted-04)\",\n        \"background-tint-inverted-03\": \"var(--background-tint-inverted-03)\",\n        \"background-tint-inverted-02\": \"var(--background-tint-inverted-02)\",\n        \"background-tint-inverted-01\": \"var(--background-tint-inverted-01)\",\n        \"background-tint-inverted-00\": \"var(--background-tint-inverted-00)\",\n        \"border-01\": \"var(--border-01)\",\n        \"border-02\": \"var(--border-02)\",\n        \"border-03\": \"var(--border-03)\",\n        \"border-04\": \"var(--border-04)\",\n        \"border-05\": \"var(--border-05)\",\n        \"border-inverted-05\": \"var(--border-inverted-05)\",\n        \"border-inverted-04\": \"var(--border-inverted-04)\",\n        \"border-inverted-03\": \"var(--border-inverted-03)\",\n        \"border-inverted-02\": \"var(--border-inverted-02)\",\n        \"border-inverted-01\": \"var(--border-inverted-01)\",\n        \"action-link-06\": \"var(--action-link-06)\",\n        \"action-link-05\": \"var(--action-link-05)\",\n        \"action-link-04\": \"var(--action-link-04)\",\n        \"action-link-03\": \"var(--action-link-03)\",\n        \"action-link-02\": \"var(--action-link-02)\",\n        \"action-link-01\": \"var(--action-link-01)\",\n        \"action-link-00\": \"var(--action-link-00)\",\n        \"action-danger-06\": \"var(--action-danger-06)\",\n        \"action-danger-05\": \"var(--action-danger-05)\",\n        \"action-danger-04\": \"var(--action-danger-04)\",\n        \"action-danger-03\": \"var(--action-danger-03)\",\n        \"action-danger-02\": \"var(--action-danger-02)\",\n        \"action-danger-01\": \"var(--action-danger-01)\",\n        \"action-text-link-05\": \"var(--action-text-link-05)\",\n        \"action-text-danger-05\": \"var(--action-text-danger-05)\",\n        \"highlight-match\": \"var(--highlight-match)\",\n        \"highlight-selection\": \"var(--highlight-selection)\",\n        \"highlight-active\": \"var(--highlight-active)\",\n        \"highlight-accent\": \"var(--highlight-accent)\",\n        \"theme-primary-06\": \"var(--theme-primary-06)\",\n        \"theme-primary-05\": \"var(--theme-primary-05)\",\n        \"theme-primary-04\": \"var(--theme-primary-04)\",\n        \"theme-gradient-05\": \"var(--theme-gradient-05)\",\n        \"theme-gradient-00\": \"var(--theme-gradient-00)\",\n        \"theme-red-05\": \"var(--theme-red-05)\",\n        \"theme-red-04\": \"var(--theme-red-04)\",\n        \"theme-red-02\": \"var(--theme-red-02)\",\n        \"theme-red-01\": \"var(--theme-red-01)\",\n        \"theme-orange-05\": \"var(--theme-orange-05)\",\n        \"theme-orange-04\": \"var(--theme-orange-04)\",\n        \"theme-orange-02\": \"var(--theme-orange-02)\",\n        \"theme-orange-01\": \"var(--theme-orange-01)\",\n        \"theme-amber-05\": \"var(--theme-amber-05)\",\n        \"theme-amber-04\": \"var(--theme-amber-04)\",\n        \"theme-amber-02\": \"var(--theme-amber-02)\",\n        \"theme-amber-01\": \"var(--theme-amber-01)\",\n        \"theme-yellow-05\": \"var(--theme-yellow-05)\",\n        \"theme-yellow-02\": \"var(--theme-yellow-02)\",\n        \"theme-yellow-01\": \"var(--theme-yellow-01)\",\n        \"theme-green-05\": \"var(--theme-green-05)\",\n        \"theme-green-02\": \"var(--theme-green-02)\",\n        \"theme-green-01\": \"var(--theme-green-01)\",\n        \"theme-lime-05\": \"var(--theme-lime-05)\",\n        \"theme-lime-02\": \"var(--theme-lime-02)\",\n        \"theme-lime-01\": \"var(--theme-lime-01)\",\n        \"theme-cyan-05\": \"var(--theme-cyan-05)\",\n        \"theme-cyan-02\": \"var(--theme-cyan-02)\",\n        \"theme-cyan-01\": \"var(--theme-cyan-01)\",\n        \"theme-sky-05\": \"var(--theme-sky-05)\",\n        \"theme-sky-02\": \"var(--theme-sky-02)\",\n        \"theme-sky-01\": \"var(--theme-sky-01)\",\n        \"theme-blue-05\": \"var(--theme-blue-05)\",\n        \"theme-blue-02\": \"var(--theme-blue-02)\",\n        \"theme-blue-01\": \"var(--theme-blue-01)\",\n        \"theme-purple-05\": \"var(--theme-purple-05)\",\n        \"theme-purple-02\": \"var(--theme-purple-02)\",\n        \"theme-purple-01\": \"var(--theme-purple-01)\",\n        \"theme-magenta-05\": \"var(--theme-magenta-05)\",\n        \"theme-magenta-02\": \"var(--theme-magenta-02)\",\n        \"theme-magenta-01\": \"var(--theme-magenta-01)\",\n        \"onyx-ink-100\": \"var(--onyx-ink-100)\",\n        \"onyx-ink-95\": \"var(--onyx-ink-95)\",\n        \"onyx-ink-90\": \"var(--onyx-ink-90)\",\n        \"onyx-chrome-20\": \"var(--onyx-chrome-20)\",\n        \"onyx-chrome-10\": \"var(--onyx-chrome-10)\",\n        \"onyx-chrome-00\": \"var(--onyx-chrome-00)\",\n        \"tint-98\": \"var(--tint-98)\",\n        \"tint-95\": \"var(--tint-95)\",\n        \"tint-90\": \"var(--tint-90)\",\n        \"tint-85\": \"var(--tint-85)\",\n        \"tint-80\": \"var(--tint-80)\",\n        \"tint-60\": \"var(--tint-60)\",\n        \"tint-50\": \"var(--tint-50)\",\n        \"tint-40\": \"var(--tint-40)\",\n        \"tint-20\": \"var(--tint-20)\",\n        \"tint-10\": \"var(--tint-10)\",\n        \"tint-05\": \"var(--tint-05)\",\n        \"tint-02\": \"var(--tint-02)\",\n        \"shadow-01\": \"var(--shadow-01)\",\n        \"shadow-02\": \"var(--shadow-02)\",\n        \"shadow-03\": \"var(--shadow-03)\",\n        \"mask-01\": \"var(--mask-01)\",\n        \"mask-02\": \"var(--mask-02)\",\n        \"mask-03\": \"var(--mask-03)\",\n        \"status-info-05\": \"var(--status-info-05)\",\n        \"status-info-02\": \"var(--status-info-02)\",\n        \"status-info-01\": \"var(--status-info-01)\",\n        \"status-info-00\": \"var(--status-info-00)\",\n        \"status-success-05\": \"var(--status-success-05)\",\n        \"status-success-02\": \"var(--status-success-02)\",\n        \"status-success-01\": \"var(--status-success-01)\",\n        \"status-success-00\": \"var(--status-success-00)\",\n        \"status-warning-05\": \"var(--status-warning-05)\",\n        \"status-warning-02\": \"var(--status-warning-02)\",\n        \"status-warning-01\": \"var(--status-warning-01)\",\n        \"status-warning-00\": \"var(--status-warning-00)\",\n        \"status-error-05\": \"var(--status-error-05)\",\n        \"status-error-02\": \"var(--status-error-02)\",\n        \"status-error-01\": \"var(--status-error-01)\",\n        \"status-error-00\": \"var(--status-error-00)\",\n        \"status-text-success-05\": \"var(--status-text-success-05)\",\n        \"status-text-info-05\": \"var(--status-text-info-05)\",\n        \"status-text-warning-05\": \"var(--status-text-warning-05)\",\n        \"status-text-error-05\": \"var(--status-text-error-05)\",\n\n        \"code-code\": \"var(--code-code)\",\n        \"code-comment\": \"var(--code-comment)\",\n        \"code-keyword\": \"var(--code-keyword)\",\n        \"code-string\": \"var(--code-string)\",\n        \"code-number\": \"var(--code-number)\",\n        \"code-definition\": \"var(--code-definition)\",\n        \"background-code-01\": \"var(--background-code-01)\",\n\n        // Shimmer colors for loading animations\n        \"shimmer-base\": \"var(--shimmer-base)\",\n        \"shimmer-highlight\": \"var(--shimmer-highlight)\",\n\n        // Tailwind defaults\n        background: \"var(--background-tint-01)\",\n        foreground: \"var(--background-tint-inverted-01)\",\n        border: \"var(--border-01)\",\n        text: \"var(--text-04)\",\n      },\n      borderRadius: {\n        \"02\": \"var(--border-radius-02)\",\n        \"04\": \"var(--border-radius-04)\",\n        \"08\": \"var(--border-radius-08)\",\n        12: \"var(--border-radius-12)\",\n        16: \"var(--border-radius-16)\",\n        full: \"var(--border-radius-full)\",\n      },\n      fontSize: {\n        \"2xs\": \"0.625rem\",\n        \"code-sm\": \"small\",\n      },\n      fontWeight: {\n        description: \"375\",\n        \"token-bold\": \"bold\",\n      },\n      fontStyle: {\n        \"token-italic\": \"italic\",\n      },\n      backdropBlur: {\n        \"01\": \"var(--backdrop-blur-01)\",\n        \"02\": \"var(--backdrop-blur-02)\",\n        \"03\": \"var(--backdrop-blur-03)\",\n      },\n      calendar: {\n        // Light mode\n        \"bg-selected\": \"var(--calendar-bg-selected)\",\n        \"bg-outside-selected\": \"var(--calendar-bg-outside-selected)\",\n        \"text-muted\": \"var(--calendar-text-muted)\",\n        \"text-selected\": \"var(--calendar-text-selected)\",\n        \"range-start\": \"var(--calendar-range-start)\",\n        \"range-middle\": \"var(--calendar-range-middle)\",\n        \"range-end\": \"var(--calendar-range-end)\",\n        \"text-in-range\": \"var(--calendar-text-in-range)\",\n\n        // Dark mode\n        \"bg-selected-dark\": \"var(--calendar-bg-selected-dark)\",\n        \"bg-outside-selected-dark\": \"var(--calendar-bg-outside-selected-dark)\",\n        \"text-muted-dark\": \"var(--calendar-text-muted-dark)\",\n        \"text-selected-dark\": \"var(--calendar-text-selected-dark)\",\n        \"range-start-dark\": \"var(--calendar-range-start-dark)\",\n        \"range-middle-dark\": \"var(--calendar-range-middle-dark)\",\n        \"range-end-dark\": \"var(--calendar-range-end-dark)\",\n        \"text-in-range-dark\": \"var(--calendar-text-in-range-dark)\",\n\n        // Hover effects\n        \"hover-bg\": \"var(--calendar-hover-bg)\",\n        \"hover-bg-dark\": \"var(--calendar-hover-bg-dark)\",\n        \"hover-text\": \"var(--calendar-hover-text)\",\n        \"hover-text-dark\": \"var(--calendar-hover-text-dark)\",\n\n        // Today's date\n        \"today-bg\": \"var(--calendar-today-bg)\",\n        \"today-bg-dark\": \"var(--calendar-today-bg-dark)\",\n        \"today-text\": \"var(--calendar-today-text)\",\n        \"today-text-dark\": \"var(--calendar-today-text-dark)\",\n      },\n    },\n  },\n  safelist: [\n    {\n      pattern:\n        /^(bg-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n      variants: [\"hover\", \"ui-selected\"],\n    },\n    {\n      pattern:\n        /^(text-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n      variants: [\"hover\", \"ui-selected\"],\n    },\n    {\n      pattern:\n        /^(border-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n      variants: [\"hover\", \"ui-selected\"],\n    },\n    {\n      pattern:\n        /^(ring-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n    },\n    {\n      pattern:\n        /^(stroke-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n    },\n    {\n      pattern:\n        /^(fill-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,\n    },\n  ],\n  // Note: @tailwindcss/container-queries plugin is not needed here.\n  // Container queries (@container, cqw units) are native in Tailwind v4+.\n  // After upgrading to Tailwind 4, this comment can be removed.\n  plugins: [\n    require(\"@tailwindcss/typography\"),\n    require(\"@headlessui/tailwindcss\"),\n    require(\"tailwindcss-animate\"),\n    require(\"@tailwindcss/container-queries\"),\n    plugin(({ addVariant }) => {\n      addVariant(\"focus-within-nonactive\", \"&:focus-within:not(:active)\");\n    }),\n    plugin(({ addUtilities }) => {\n      addUtilities({\n        \".break-anywhere\": {\n          \"overflow-wrap\": \"anywhere\",\n        },\n      });\n    }),\n  ],\n};\n"
  },
  {
    "path": "web/tailwind.config.js",
    "content": "var merge = require(\"lodash/merge\");\nvar path = require(\"path\");\nvar fs = require(\"fs\");\nvar { createRequire } = require(\"module\");\n\n// Use relative paths for imports\nconst baseThemes = require(\"./tailwind-themes/tailwind.config.js\");\n\nlet customThemes = null;\n\n// Determine which theme to load: custom theme if specified, otherwise default\nconst themeName = process.env.NEXT_PUBLIC_THEME || \"default\";\nconst customThemePath = path.join(\n  __dirname,\n  \"tailwind-themes/custom\",\n  themeName,\n  \"tailwind.config.js\"\n);\n\nif (fs.existsSync(customThemePath)) {\n  // Use createRequire to avoid bundler static analysis without using eval\n  const dynamicRequire = createRequire(__filename);\n  customThemes = dynamicRequire(customThemePath);\n}\n\n/** @type {import('tailwindcss').Config} */\nmodule.exports = customThemes ? merge(baseThemes, customThemes) : baseThemes;\n"
  },
  {
    "path": "web/tests/README.md",
    "content": "# React Integration Testing Guide\n\nComprehensive guide for writing integration tests in the Onyx web application using Jest and React Testing Library.\n\n## Table of Contents\n\n- [Running Tests](#running-tests)\n- [Core Concepts](#core-concepts)\n- [Writing Tests](#writing-tests)\n- [Query Selectors](#query-selectors)\n- [User Interactions](#user-interactions)\n- [Async Operations](#async-operations)\n- [Mocking](#mocking)\n- [Common Patterns](#common-patterns)\n- [Testing Philosophy](#testing-philosophy)\n- [Troubleshooting](#troubleshooting)\n\n## Running Tests\n\n```bash\n# Run all tests\nnpm test\n\n# Run specific test file\nnpm test -- EmailPasswordForm.test\n\n# Run tests matching pattern\nnpm test -- --testPathPattern=\"auth\"\n\n# Run without coverage\nnpm test -- --no-coverage\n\n# Run in watch mode\nnpm test -- --watch\n\n# Run with verbose output\nnpm test -- --verbose\n```\n\n## Core Concepts\n\n### Test Structure\n\nTests are **co-located** with source files for easy discovery and maintenance:\n\n```\nsrc/app/auth/login/\n├── EmailPasswordForm.tsx\n└── EmailPasswordForm.test.tsx\n```\n\n### Test Anatomy\n\nEvery test follows this structure:\n\n```typescript\nimport { render, screen, setupUser, waitFor } from \"@tests/setup/test-utils\";\nimport MyComponent from \"./MyComponent\";\n\ntest(\"descriptive test name explaining user behavior\", async () => {\n  // 1. Setup - Create user, mock APIs\n  const user = setupUser();\n  const fetchSpy = jest.spyOn(global, \"fetch\");\n\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({ data: \"value\" }),\n  } as Response);\n\n  // 2. Render - Display the component\n  render(<MyComponent />);\n\n  // 3. Act - Simulate user interactions\n  await user.type(screen.getByRole(\"textbox\"), \"test input\");\n  await user.click(screen.getByRole(\"button\", { name: /submit/i }));\n\n  // 4. Assert - Verify expected outcomes\n  await waitFor(() => {\n    expect(screen.getByText(/success/i)).toBeInTheDocument();\n  });\n\n  // 5. Cleanup - Restore mocks\n  fetchSpy.mockRestore();\n});\n```\n\n### setupUser() - Automatic act() Wrapping\n\n**ALWAYS use `setupUser()` instead of `userEvent.setup()`**\n\n```typescript\n// ✅ Correct - Automatic act() wrapping\nconst user = setupUser();\nawait user.click(button);\nawait user.type(input, \"text\");\n\n// ❌ Wrong - Manual act() required, verbose\nconst user = userEvent.setup();\nawait act(async () => {\n  await user.click(button);\n});\n```\n\nThe `setupUser()` helper automatically wraps all user interactions in React's `act()` to prevent warnings and ensure proper state updates.\n\n## Writing Tests\n\n### Query Selectors\n\nUse queries in this priority order (most accessible first):\n\n#### 1. Role Queries (Preferred)\n\n```typescript\n// Buttons\nscreen.getByRole(\"button\", { name: /submit/i });\nscreen.getByRole(\"button\", { name: /cancel/i });\n\n// Text inputs\nscreen.getByRole(\"textbox\", { name: /email/i });\n\n// Checkboxes\nscreen.getByRole(\"checkbox\", { name: /remember me/i });\n\n// Links\nscreen.getByRole(\"link\", { name: /learn more/i });\n\n// Headings\nscreen.getByRole(\"heading\", { name: /welcome/i });\n```\n\n#### 2. Label Queries\n\n```typescript\n// For form inputs with labels\nscreen.getByLabelText(/password/i);\nscreen.getByLabelText(/email address/i);\n```\n\n#### 3. Placeholder Queries\n\n```typescript\n// When no label exists\nscreen.getByPlaceholderText(/enter email/i);\n```\n\n#### 4. Text Queries\n\n```typescript\n// For non-interactive text\nscreen.getByText(/welcome back/i);\nscreen.getByText(/error occurred/i);\n```\n\n#### Query Variants\n\n```typescript\n// getBy - Throws error if not found (immediate)\nscreen.getByRole(\"button\");\n\n// queryBy - Returns null if not found (checking absence)\nexpect(screen.queryByText(/error/i)).not.toBeInTheDocument();\n\n// findBy - Returns promise, waits for element (async)\nexpect(await screen.findByText(/success/i)).toBeInTheDocument();\n\n// getAllBy - Returns array of all matches\nconst inputs = screen.getAllByRole(\"textbox\");\n```\n\n### Query Selectors: The Wrong Way\n\n**❌ Avoid these anti-patterns:**\n\n```typescript\n// DON'T query by test IDs\nscreen.getByTestId(\"submit-button\");\n\n// DON'T query by class names\ncontainer.querySelector(\".submit-btn\");\n\n// DON'T query by element types\ncontainer.querySelector(\"button\");\n```\n\n## User Interactions\n\n### Basic Interactions\n\n```typescript\nconst user = setupUser();\n\n// Click\nawait user.click(screen.getByRole(\"button\", { name: /submit/i }));\n\n// Type text\nawait user.type(screen.getByRole(\"textbox\"), \"test input\");\n\n// Clear and type\nawait user.clear(input);\nawait user.type(input, \"new value\");\n\n// Check/uncheck checkbox\nawait user.click(screen.getByRole(\"checkbox\"));\n\n// Select from dropdown\nawait user.selectOptions(screen.getByRole(\"combobox\"), \"option-value\");\n\n// Upload file\nconst file = new File([\"content\"], \"test.txt\", { type: \"text/plain\" });\nconst input = screen.getByLabelText(/upload/i);\nawait user.upload(input, file);\n```\n\n### Form Interactions\n\n```typescript\ntest(\"user can fill and submit form\", async () => {\n  const user = setupUser();\n\n  render(<ContactForm />);\n\n  await user.type(screen.getByLabelText(/name/i), \"John Doe\");\n  await user.type(screen.getByLabelText(/email/i), \"john@example.com\");\n  await user.type(screen.getByLabelText(/message/i), \"Hello!\");\n  await user.click(screen.getByRole(\"button\", { name: /send/i }));\n\n  await waitFor(() => {\n    expect(screen.getByText(/message sent/i)).toBeInTheDocument();\n  });\n});\n```\n\n## Async Operations\n\n### Handling Async State Updates\n\n**Rule**: After triggering state changes, always wait for UI updates before asserting.\n\n#### Pattern 1: findBy Queries (Simplest)\n\n```typescript\n// Element appears after async operation\nawait user.click(createButton);\nexpect(await screen.findByRole(\"textbox\")).toBeInTheDocument();\n```\n\n#### Pattern 2: waitFor (Complex Assertions)\n\n```typescript\nawait user.click(submitButton);\n\nawait waitFor(() => {\n  expect(screen.getByText(\"Success\")).toBeInTheDocument();\n  expect(screen.getByText(\"Count: 5\")).toBeInTheDocument();\n});\n```\n\n#### Pattern 3: waitForElementToBeRemoved\n\n```typescript\nawait user.click(deleteButton);\n\nawait waitForElementToBeRemoved(() => screen.queryByText(/item name/i));\n```\n\n### Common Async Mistakes\n\n```typescript\n// ❌ Wrong - getBy immediately after state change\nawait user.click(button);\nexpect(screen.getByText(\"Updated\")).toBeInTheDocument(); // May fail!\n\n// ✅ Correct - Wait for state update\nawait user.click(button);\nexpect(await screen.findByText(\"Updated\")).toBeInTheDocument();\n\n// ❌ Wrong - Multiple getBy calls without waiting\nawait user.click(button);\nexpect(screen.getByText(\"Success\")).toBeInTheDocument();\nexpect(screen.getByText(\"Data loaded\")).toBeInTheDocument();\n\n// ✅ Correct - Single waitFor with multiple assertions\nawait user.click(button);\nawait waitFor(() => {\n  expect(screen.getByText(\"Success\")).toBeInTheDocument();\n  expect(screen.getByText(\"Data loaded\")).toBeInTheDocument();\n});\n```\n\n## Mocking\n\n### Mocking fetch API\n\n**IMPORTANT**: Always document which endpoint each mock corresponds to using comments.\n\n```typescript\nlet fetchSpy: jest.SpyInstance;\n\nbeforeEach(() => {\n  fetchSpy = jest.spyOn(global, \"fetch\");\n});\n\nafterEach(() => {\n  fetchSpy.mockRestore();\n});\n\ntest(\"fetches data successfully\", async () => {\n  // Mock GET /api/data\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({ data: [1, 2, 3] }),\n  } as Response);\n\n  render(<MyComponent />);\n\n  await waitFor(() => {\n    expect(fetchSpy).toHaveBeenCalledWith(\"/api/data\");\n  });\n});\n```\n\n**Why comment the endpoint?** Sequential mocks can be confusing. Comments make it clear which API call each mock corresponds to, making tests easier to understand and maintain.\n\n### Multiple API Calls\n\n**Pattern**: Document each endpoint with a comment, then verify it was called correctly.\n\n```typescript\ntest(\"handles multiple API calls\", async () => {\n  const user = setupUser();\n\n  // Mock GET /api/items\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({ items: [] }),\n  } as Response);\n\n  // Mock POST /api/items\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({ id: 1, name: \"New Item\" }),\n  } as Response);\n\n  render(<MyComponent />);\n\n  // Verify GET was called\n  await waitFor(() => {\n    expect(fetchSpy).toHaveBeenCalledWith(\"/api/items\");\n  });\n\n  await user.click(screen.getByRole(\"button\", { name: /create/i }));\n\n  // Verify POST was called\n  await waitFor(() => {\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/items\",\n      expect.objectContaining({ method: \"POST\" })\n    );\n  });\n});\n```\n\n**Three API calls example:**\n\n```typescript\ntest(\"test, create, and set as default\", async () => {\n  const user = setupUser();\n\n  // Mock POST /api/llm/test\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({}),\n  } as Response);\n\n  // Mock PUT /api/llm/provider?is_creation=true\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({ id: 5, name: \"New Provider\" }),\n  } as Response);\n\n  // Mock POST /api/llm/provider/5/default\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({}),\n  } as Response);\n\n  render(<MyForm />);\n\n  await user.type(screen.getByLabelText(/name/i), \"New Provider\");\n  await user.click(screen.getByRole(\"button\", { name: /create/i }));\n\n  // Verify all three endpoints were called\n  await waitFor(() => {\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/llm/test\",\n      expect.objectContaining({ method: \"POST\" })\n    );\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/llm/provider\",\n      expect.objectContaining({ method: \"PUT\" })\n    );\n    expect(fetchSpy).toHaveBeenCalledWith(\n      \"/api/llm/provider/5/default\",\n      expect.objectContaining({ method: \"POST\" })\n    );\n  });\n});\n```\n\n### Verifying Request Body\n\n```typescript\ntest(\"sends correct data\", async () => {\n  const user = setupUser();\n\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({}),\n  } as Response);\n\n  render(<MyForm />);\n\n  await user.type(screen.getByLabelText(/name/i), \"Test\");\n  await user.click(screen.getByRole(\"button\", { name: /submit/i }));\n\n  await waitFor(() => {\n    expect(fetchSpy).toHaveBeenCalled();\n  });\n\n  const callArgs = fetchSpy.mock.calls[0];\n  const requestBody = JSON.parse(callArgs[1].body);\n\n  expect(requestBody).toEqual({\n    name: \"Test\",\n    active: true,\n  });\n});\n```\n\n### Mocking Errors\n\n```typescript\ntest(\"displays error message on failure\", async () => {\n  // Mock GET /api/data (network error)\n  fetchSpy.mockRejectedValueOnce(new Error(\"Network error\"));\n\n  render(<MyComponent />);\n\n  await waitFor(() => {\n    expect(screen.getByText(/failed to load/i)).toBeInTheDocument();\n  });\n});\n\ntest(\"handles API error response\", async () => {\n  // Mock POST /api/items (server error)\n  fetchSpy.mockResolvedValueOnce({\n    ok: false,\n    status: 500,\n  } as Response);\n\n  render(<MyComponent />);\n\n  await waitFor(() => {\n    expect(screen.getByText(/something went wrong/i)).toBeInTheDocument();\n  });\n});\n```\n\n### Mocking Next.js Router\n\n```typescript\n// At top of test file\njest.mock(\"next/navigation\", () => ({\n  useRouter: () => ({\n    push: jest.fn(),\n    back: jest.fn(),\n    refresh: jest.fn(),\n  }),\n  usePathname: () => \"/current-path\",\n}));\n```\n\n## Common Patterns\n\n### Testing CRUD Operations\n\n```typescript\ndescribe(\"User Management\", () => {\n  test(\"creates new user\", async () => {\n    const user = setupUser();\n\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({ id: 1, name: \"New User\" }),\n    } as Response);\n\n    render(<UserForm />);\n\n    await user.type(screen.getByLabelText(/name/i), \"New User\");\n    await user.click(screen.getByRole(\"button\", { name: /create/i }));\n\n    await waitFor(() => {\n      expect(screen.getByText(/user created/i)).toBeInTheDocument();\n    });\n  });\n\n  test(\"edits existing user\", async () => {\n    const user = setupUser();\n\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({ id: 1, name: \"Updated User\" }),\n    } as Response);\n\n    render(<UserForm initialData={{ id: 1, name: \"Old Name\" }} />);\n\n    await user.clear(screen.getByLabelText(/name/i));\n    await user.type(screen.getByLabelText(/name/i), \"Updated User\");\n    await user.click(screen.getByRole(\"button\", { name: /save/i }));\n\n    await waitFor(() => {\n      expect(screen.getByText(/user updated/i)).toBeInTheDocument();\n    });\n  });\n\n  test(\"deletes user\", async () => {\n    const user = setupUser();\n\n    fetchSpy.mockResolvedValueOnce({\n      ok: true,\n      json: async () => ({}),\n    } as Response);\n\n    render(<UserList />);\n\n    await waitFor(() => {\n      expect(screen.getByText(\"John Doe\")).toBeInTheDocument();\n    });\n\n    await user.click(screen.getByRole(\"button\", { name: /delete/i }));\n\n    await waitFor(() => {\n      expect(screen.queryByText(\"John Doe\")).not.toBeInTheDocument();\n    });\n  });\n});\n```\n\n### Testing Conditional Rendering\n\n```typescript\ntest(\"shows edit form when edit button clicked\", async () => {\n  const user = setupUser();\n\n  render(<MyComponent />);\n\n  expect(screen.queryByRole(\"textbox\")).not.toBeInTheDocument();\n\n  await user.click(screen.getByRole(\"button\", { name: /edit/i }));\n\n  expect(await screen.findByRole(\"textbox\")).toBeInTheDocument();\n});\n\ntest(\"toggles between states\", async () => {\n  const user = setupUser();\n\n  render(<Toggle />);\n\n  const button = screen.getByRole(\"button\", { name: /show details/i });\n\n  await user.click(button);\n  expect(await screen.findByText(/details content/i)).toBeInTheDocument();\n\n  await user.click(button);\n  expect(screen.queryByText(/details content/i)).not.toBeInTheDocument();\n});\n```\n\n### Testing Lists and Tables\n\n```typescript\ntest(\"displays list of items\", async () => {\n  fetchSpy.mockResolvedValueOnce({\n    ok: true,\n    json: async () => ({\n      items: [\n        { id: 1, name: \"Item 1\" },\n        { id: 2, name: \"Item 2\" },\n        { id: 3, name: \"Item 3\" },\n      ],\n    }),\n  } as Response);\n\n  render(<ItemList />);\n\n  await waitFor(() => {\n    expect(screen.getByText(\"Item 1\")).toBeInTheDocument();\n    expect(screen.getByText(\"Item 2\")).toBeInTheDocument();\n    expect(screen.getByText(\"Item 3\")).toBeInTheDocument();\n  });\n});\n\ntest(\"filters items\", async () => {\n  const user = setupUser();\n\n  render(<FilterableList items={mockItems} />);\n\n  await user.type(screen.getByRole(\"searchbox\"), \"specific\");\n\n  await waitFor(() => {\n    expect(screen.getByText(\"Specific Item\")).toBeInTheDocument();\n    expect(screen.queryByText(\"Other Item\")).not.toBeInTheDocument();\n  });\n});\n```\n\n### Testing Validation\n\n```typescript\ntest(\"shows validation errors\", async () => {\n  const user = setupUser();\n\n  render(<LoginForm />);\n\n  await user.click(screen.getByRole(\"button\", { name: /submit/i }));\n\n  await waitFor(() => {\n    expect(screen.getByText(/email is required/i)).toBeInTheDocument();\n    expect(screen.getByText(/password is required/i)).toBeInTheDocument();\n  });\n});\n\ntest(\"clears validation on valid input\", async () => {\n  const user = setupUser();\n\n  render(<LoginForm />);\n\n  await user.click(screen.getByRole(\"button\", { name: /submit/i }));\n\n  await waitFor(() => {\n    expect(screen.getByText(/email is required/i)).toBeInTheDocument();\n  });\n\n  await user.type(screen.getByLabelText(/email/i), \"valid@email.com\");\n\n  await waitFor(() => {\n    expect(screen.queryByText(/email is required/i)).not.toBeInTheDocument();\n  });\n});\n```\n\n## Testing Philosophy\n\n### What to Test\n\n**✅ Test user-visible behavior:**\n\n- Forms can be filled and submitted\n- Buttons trigger expected actions\n- Success/error messages appear\n- Navigation works correctly\n- Data is displayed after loading\n- Validation errors show and clear appropriately\n\n**✅ Test integration points:**\n\n- API calls are made with correct parameters\n- Responses are handled properly\n- Error states are handled\n- Loading states appear\n\n**❌ Don't test implementation details:**\n\n- Internal state values\n- Component lifecycle methods\n- CSS class names\n- Specific React hooks being used\n\n### Test Naming\n\nWrite test names that describe user behavior:\n\n```typescript\n// ✅ Good - Describes what user can do\ntest(\"user can create new prompt\", async () => {});\ntest(\"shows error when API call fails\", async () => {});\ntest(\"filters items by search term\", async () => {});\n\n// ❌ Bad - Implementation-focused\ntest(\"handleSubmit is called\", async () => {});\ntest(\"state updates correctly\", async () => {});\ntest(\"renders without crashing\", async () => {});\n```\n\n### Minimal Mocking\n\nOnly mock external dependencies:\n\n```typescript\n// ✅ Mock external APIs\njest.spyOn(global, \"fetch\");\n\n// ✅ Mock Next.js router\njest.mock(\"next/navigation\");\n\n// ✅ Mock problematic packages\n// (configured in tests/setup/__mocks__)\n\n// ❌ Don't mock application code\n// ❌ Don't mock component internals\n// ❌ Don't mock utility functions\n```\n\n## Troubleshooting\n\n### \"Not wrapped in act()\" Warning\n\n**Solution**: Always use `setupUser()` instead of `userEvent.setup()`\n\n```typescript\n// ✅ Correct\nconst user = setupUser();\n\n// ❌ Wrong\nconst user = userEvent.setup();\n```\n\n### \"Unable to find element\" Error\n\n**Solution**: Element hasn't appeared yet, use `findBy` or `waitFor`\n\n```typescript\n// ❌ Wrong - getBy doesn't wait\nawait user.click(button);\nexpect(screen.getByText(\"Success\")).toBeInTheDocument();\n\n// ✅ Correct - findBy waits\nawait user.click(button);\nexpect(await screen.findByText(\"Success\")).toBeInTheDocument();\n```\n\n### \"Multiple elements found\" Error\n\n**Solution**: Be more specific with your query\n\n```typescript\n// ❌ Too broad\nscreen.getByRole(\"button\");\n\n// ✅ Specific\nscreen.getByRole(\"button\", { name: /submit/i });\n```\n\n### Test Times Out\n\n**Causes**:\n\n1. Async operation never completes\n2. Waiting for element that never appears\n3. Missing mock for API call\n\n**Solutions**:\n\n```typescript\n// Check fetch is mocked\nexpect(fetchSpy).toHaveBeenCalled()\n\n// Use queryBy to check if element exists\nexpect(screen.queryByText(\"Text\")).toBeInTheDocument()\n\n// Verify mock is set up before render\nfetchSpy.mockResolvedValueOnce(...)\nrender(<Component />)\n```\n\n## Examples\n\nSee comprehensive test examples:\n\n- `src/app/auth/login/EmailPasswordForm.test.tsx` - Login/signup workflows, validation\n- `src/app/chat/input-prompts/InputPrompts.test.tsx` - CRUD operations, conditional rendering\n- `src/app/admin/configuration/llm/CustomLLMProviderUpdateForm.test.tsx` - Complex forms, multi-step workflows\n\n## Built-in Mocks\n\nOnly essential mocks in `tests/setup/__mocks__/`:\n\n- `UserProvider` - Removes auth requirement for tests\n- `react-markdown` / `remark-gfm` - ESM compatibility\n\nSee `tests/setup/__mocks__/README.md` for details.\n"
  },
  {
    "path": "web/tests/e2e/admin/admin_auth.setup.ts",
    "content": "// dependency for all admin user tests\nimport { test as setup } from \"@playwright/test\";\n\nsetup(\"authenticate as admin\", async ({ browser }) => {\n  const context = await browser.newContext({ storageState: \"admin_auth.json\" });\n  const page = await context.newPage();\n  await page.goto(\"/app\");\n  await page.waitForURL(\"/app\");\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/admin_oauth_redirect_uri.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\n\ntest.use({ storageState: \"admin_auth.json\" });\n\ntest(\"Admin - OAuth Redirect - Missing Code\", async ({ page }) => {\n  await page.goto(\"/admin/connectors/slack/oauth/callback?state=xyz\");\n\n  await expect(page.locator(\"p.text-text-500\")).toHaveText(\n    \"Missing authorization code.\"\n  );\n});\n\ntest(\"Admin - OAuth Redirect - Missing State\", async ({ page }) => {\n  await page.goto(\"/admin/connectors/slack/oauth/callback?code=123\");\n\n  await expect(page.locator(\"p.text-text-500\")).toHaveText(\n    \"Missing state parameter.\"\n  );\n});\n\ntest(\"Admin - OAuth Redirect - Invalid Connector\", async ({ page }) => {\n  await page.goto(\n    \"/admin/connectors/invalid-connector/oauth/callback?code=123&state=xyz\"\n  );\n\n  await expect(page.locator(\"p.text-text-500\")).toHaveText(\n    \"invalid_connector is not a valid source type.\"\n  );\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/admin_pages.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { THEMES, setThemeBeforeNavigation } from \"@tests/e2e/utils/theme\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\ntest.use({ storageState: \"admin_auth.json\" });\ntest.describe.configure({ mode: \"parallel\" });\n\n/**\n * Discover all navigable admin pages by collecting links from the sidebar.\n * The sidebar is rendered on every `/admin/*` page, so we visit one admin\n * route and scrape the `<a>` elements that are present for the current\n * user / feature-flag configuration.\n */\nasync function discoverAdminPages(page: Page): Promise<string[]> {\n  await page.goto(\"/admin/configuration/llm\");\n  await page.waitForLoadState(\"networkidle\");\n\n  return page.evaluate(() => {\n    const sidebar = document.querySelector('[class*=\"group/SidebarWrapper\"]');\n    if (!sidebar) return [];\n\n    const hrefs = new Set<string>();\n    sidebar\n      .querySelectorAll<HTMLAnchorElement>('a[href^=\"/admin/\"]')\n      .forEach((a) => hrefs.add(a.getAttribute(\"href\")!));\n    return Array.from(hrefs);\n  });\n}\n\nfor (const theme of THEMES) {\n  test(`Admin pages – ${theme} mode`, async ({ page }) => {\n    await setThemeBeforeNavigation(page, theme);\n\n    const adminHrefs = await discoverAdminPages(page);\n    expect(\n      adminHrefs.length,\n      \"Expected to discover at least one admin page from the sidebar\"\n    ).toBeGreaterThan(0);\n\n    for (const href of adminHrefs) {\n      const slug = href.replace(/^\\/admin\\//, \"\").replace(/\\//g, \"--\");\n\n      await test.step(\n        slug,\n        async () => {\n          await page.goto(href);\n\n          try {\n            await expect(\n              page.locator('[aria-label=\"admin-page-title\"]')\n            ).toBeVisible({ timeout: 10000 });\n          } catch (error) {\n            console.error(`Failed to find admin-page-title for \"${href}\"`);\n            throw error;\n          }\n\n          await page.waitForLoadState(\"networkidle\");\n\n          await expectScreenshot(page, {\n            name: `admin-${theme}-${slug}`,\n            mask: [\n              '[data-testid=\"admin-date-range-selector-button\"]',\n              '[data-column-id=\"updated_at\"]',\n            ],\n          });\n        },\n        { box: true }\n      );\n    }\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/code-interpreter/code_interpreter.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\n\nconst CODE_INTERPRETER_URL = \"/admin/configuration/code-interpreter\";\nconst API_STATUS_URL = \"**/api/admin/code-interpreter\";\nconst API_HEALTH_URL = \"**/api/admin/code-interpreter/health\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Intercept the status (GET /) and health (GET /health) endpoints with the\n * given values so the page renders deterministically.\n *\n * Also handles PUT requests — by default they succeed (200). Pass\n * `putStatus` to simulate failures.\n */\nasync function mockCodeInterpreterApi(\n  page: Page,\n  opts: { enabled: boolean; healthy: boolean; putStatus?: number }\n) {\n  const putStatus = opts.putStatus ?? 200;\n\n  await page.route(API_HEALTH_URL, async (route) => {\n    await route.fulfill({\n      status: 200,\n      contentType: \"application/json\",\n      body: JSON.stringify({ healthy: opts.healthy }),\n    });\n  });\n\n  await page.route(API_STATUS_URL, async (route) => {\n    if (route.request().method() === \"PUT\") {\n      await route.fulfill({\n        status: putStatus,\n        contentType: \"application/json\",\n        body:\n          putStatus >= 400\n            ? JSON.stringify({ detail: \"Server Error\" })\n            : JSON.stringify(null),\n      });\n    } else {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/json\",\n        body: JSON.stringify({ enabled: opts.enabled }),\n      });\n    }\n  });\n}\n\n/**\n * The disconnect icon button is an icon-only opal Button whose tooltip text\n * is not exposed as an accessible name. Locate it by finding the first\n * icon-only button (no label span) inside the card area.\n */\nfunction getDisconnectIconButton(page: Page) {\n  return page\n    .locator(\"button:has(.interactive-foreground-icon):not(:has(span))\")\n    .first();\n}\n\n// ---------------------------------------------------------------------------\n// Tests\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Code Interpreter Admin Page\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n  });\n\n  test(\"page loads with header and description\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.locator('[aria-label=\"admin-page-title\"]')).toHaveText(\n      /^Code Interpreter/,\n      { timeout: 10000 }\n    );\n\n    await expect(page.getByText(\"Built-in Python runtime\")).toBeVisible();\n  });\n\n  test(\"shows Connected status when enabled and healthy\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByText(\"Connected\")).toBeVisible({ timeout: 10000 });\n  });\n\n  test(\"shows Connection Lost when enabled but unhealthy\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, { enabled: true, healthy: false });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByText(\"Connection Lost\")).toBeVisible({\n      timeout: 10000,\n    });\n  });\n\n  test(\"shows Reconnect button when disabled\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, { enabled: false, healthy: false });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByRole(\"button\", { name: \"Reconnect\" })).toBeVisible({\n      timeout: 10000,\n    });\n    await expect(page.getByText(\"(Disconnected)\")).toBeVisible();\n  });\n\n  test(\"disconnect flow opens modal and sends PUT request\", async ({\n    page,\n  }) => {\n    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByText(\"Connected\")).toBeVisible({ timeout: 10000 });\n\n    // Click the disconnect icon button\n    await getDisconnectIconButton(page).click();\n\n    // Modal should appear\n    await expect(page.getByText(\"Disconnect Code Interpreter\")).toBeVisible();\n    await expect(\n      page.getByText(\"All running sessions connected to\")\n    ).toBeVisible();\n\n    // Click the danger Disconnect button in the modal\n    const modal = page.getByRole(\"dialog\");\n    await modal.getByRole(\"button\", { name: \"Disconnect\" }).click();\n\n    // Modal should close after successful disconnect\n    await expect(page.getByText(\"Disconnect Code Interpreter\")).not.toBeVisible(\n      { timeout: 5000 }\n    );\n  });\n\n  test(\"disconnect modal can be closed without disconnecting\", async ({\n    page,\n  }) => {\n    await mockCodeInterpreterApi(page, { enabled: true, healthy: true });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByText(\"Connected\")).toBeVisible({ timeout: 10000 });\n\n    // Open modal\n    await getDisconnectIconButton(page).click();\n    await expect(page.getByText(\"Disconnect Code Interpreter\")).toBeVisible();\n\n    // Close modal via Cancel button\n    const modal = page.getByRole(\"dialog\");\n    await modal.getByRole(\"button\", { name: \"Cancel\" }).click();\n\n    // Modal should be gone, page still shows Connected\n    await expect(\n      page.getByText(\"Disconnect Code Interpreter\")\n    ).not.toBeVisible();\n    await expect(page.getByText(\"Connected\")).toBeVisible();\n  });\n\n  test(\"reconnect flow sends PUT with enabled=true\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, { enabled: false, healthy: false });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByRole(\"button\", { name: \"Reconnect\" })).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Intercept the PUT and verify the payload\n    const putPromise = page.waitForRequest(\n      (req) =>\n        req.url().includes(\"/api/admin/code-interpreter\") &&\n        req.method() === \"PUT\"\n    );\n\n    await page.getByRole(\"button\", { name: \"Reconnect\" }).click();\n\n    const putReq = await putPromise;\n    expect(putReq.postDataJSON()).toEqual({ enabled: true });\n  });\n\n  test(\"shows Checking... while reconnect is in progress\", async ({ page }) => {\n    // Use a single route handler that delays PUT responses\n    await page.route(API_HEALTH_URL, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/json\",\n        body: JSON.stringify({ healthy: false }),\n      });\n    });\n\n    await page.route(API_STATUS_URL, async (route) => {\n      if (route.request().method() === \"PUT\") {\n        await new Promise((resolve) => setTimeout(resolve, 2000));\n        await route.fulfill({\n          status: 200,\n          contentType: \"application/json\",\n          body: JSON.stringify(null),\n        });\n      } else {\n        await route.fulfill({\n          status: 200,\n          contentType: \"application/json\",\n          body: JSON.stringify({ enabled: false }),\n        });\n      }\n    });\n\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByRole(\"button\", { name: \"Reconnect\" })).toBeVisible({\n      timeout: 10000,\n    });\n\n    await page.getByRole(\"button\", { name: \"Reconnect\" }).click();\n\n    // Should show Checking... while the request is in flight\n    await expect(page.getByText(\"Checking...\")).toBeVisible({ timeout: 3000 });\n  });\n\n  test(\"shows error toast when disconnect fails\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, {\n      enabled: true,\n      healthy: true,\n      putStatus: 500,\n    });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByText(\"Connected\")).toBeVisible({ timeout: 10000 });\n\n    // Open modal and click disconnect\n    await getDisconnectIconButton(page).click();\n    const modal = page.getByRole(\"dialog\");\n    await modal.getByRole(\"button\", { name: \"Disconnect\" }).click();\n\n    // Error toast should appear\n    await expect(\n      page.getByText(\"Failed to disconnect Code Interpreter\")\n    ).toBeVisible({ timeout: 5000 });\n  });\n\n  test(\"shows error toast when reconnect fails\", async ({ page }) => {\n    await mockCodeInterpreterApi(page, {\n      enabled: false,\n      healthy: false,\n      putStatus: 500,\n    });\n    await page.goto(CODE_INTERPRETER_URL);\n\n    await expect(page.getByRole(\"button\", { name: \"Reconnect\" })).toBeVisible({\n      timeout: 10000,\n    });\n\n    await page.getByRole(\"button\", { name: \"Reconnect\" }).click();\n\n    // Error toast should appear\n    await expect(\n      page.getByText(\"Failed to reconnect Code Interpreter\")\n    ).toBeVisible({ timeout: 5000 });\n\n    // Reconnect button should reappear (not stuck in Checking...)\n    await expect(page.getByRole(\"button\", { name: \"Reconnect\" })).toBeVisible({\n      timeout: 5000,\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/default-agent.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page, Locator } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport {\n  TOOL_IDS,\n  waitForUnifiedGreeting,\n  openActionManagement,\n} from \"@tests/e2e/utils/tools\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n/**\n * Locate the Switch toggle for a built-in tool by its display name.\n * Each tool sits inside its own `<label>` wrapper created by InputLayouts.Horizontal.\n */\nfunction getToolSwitch(page: Page, toolName: string): Locator {\n  return page\n    .locator(\"label\")\n    .filter({ has: page.getByText(toolName, { exact: true }) })\n    .locator('button[role=\"switch\"]')\n    .first();\n}\n\n/**\n * Click a button and wait for the PATCH response to complete.\n * Uses waitForResponse set up *before* the click to avoid race conditions.\n */\nasync function clickAndWaitForPatch(\n  page: Page,\n  buttonLocator: Locator\n): Promise<void> {\n  const patchPromise = page.waitForResponse(\n    (r) =>\n      r.url().includes(\"/api/admin/default-assistant\") &&\n      r.request().method() === \"PATCH\",\n    { timeout: 8000 }\n  );\n  await buttonLocator.click();\n  await patchPromise;\n}\n\ntest.describe(\"Chat Preferences Admin Page\", () => {\n  let testCcPairId: number | null = null;\n  let webSearchProviderId: number | null = null;\n  let imageGenConfigId: string | null = null;\n\n  test.beforeEach(async ({ page }) => {\n    // Log in as admin\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    const apiClient = new OnyxApiClient(page.request);\n\n    // Create a connector so Internal Search tool becomes available\n    testCcPairId = await apiClient.createFileConnector(\n      `Test Connector ${Date.now()}`\n    );\n\n    // Create providers for Web Search and Image Generation tools\n    try {\n      webSearchProviderId = await apiClient.createWebSearchProvider(\n        \"exa\",\n        `Test Web Search Provider ${Date.now()}`\n      );\n      imageGenConfigId = await apiClient.createImageGenerationConfig(\n        `test-image-gen-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create tool providers: ${error}`);\n    }\n\n    // Navigate to chat preferences\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n\n    // Attach basic API logging for this spec\n    page.on(\"response\", async (resp) => {\n      const url = resp.url();\n      if (\n        url.includes(\"/api/admin/default-assistant\") ||\n        url.includes(\"/api/admin/settings\")\n      ) {\n        const method = resp.request().method();\n        const status = resp.status();\n        let body = \"\";\n        try {\n          body = await resp.text();\n        } catch {}\n        console.log(\n          `[api:response] ${method} ${url} => ${status} body=${body?.slice(\n            0,\n            300\n          )}`\n        );\n      }\n    });\n\n    // Proactively log tool availability and current config\n    try {\n      const baseURL = process.env.BASE_URL || \"http://localhost:3000\";\n      const toolsResp = await page.request.get(`${baseURL}/api/tool`);\n      const cfgResp = await page.request.get(\n        `${baseURL}/api/admin/default-assistant/configuration`\n      );\n      console.log(\n        `[/api/tool] status=${toolsResp.status()} body=${(\n          await toolsResp.text()\n        ).slice(0, 400)}`\n      );\n      console.log(\n        `[/configuration] status=${cfgResp.status()} body=${(\n          await cfgResp.text()\n        ).slice(0, 400)}`\n      );\n    } catch (e) {\n      console.log(`[setup] Failed to fetch initial admin config: ${String(e)}`);\n    }\n  });\n\n  test.afterEach(async ({ page }) => {\n    const apiClient = new OnyxApiClient(page.request);\n\n    // Clean up the test connector\n    if (testCcPairId !== null) {\n      try {\n        await apiClient.deleteCCPair(testCcPairId);\n        testCcPairId = null;\n      } catch (error) {\n        console.warn(\n          `Failed to delete test connector ${testCcPairId}: ${error}`\n        );\n      }\n    }\n\n    // Clean up web search provider\n    if (webSearchProviderId !== null) {\n      try {\n        await apiClient.deleteWebSearchProvider(webSearchProviderId);\n        webSearchProviderId = null;\n      } catch (error) {\n        console.warn(\n          `Failed to delete web search provider ${webSearchProviderId}: ${error}`\n        );\n      }\n    }\n\n    // Clean up image gen config\n    if (imageGenConfigId !== null) {\n      try {\n        await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n        imageGenConfigId = null;\n      } catch (error) {\n        console.warn(\n          `Failed to delete image gen config ${imageGenConfigId}: ${error}`\n        );\n      }\n    }\n  });\n\n  test(\"should load chat preferences page for admin users\", async ({\n    page,\n  }) => {\n    // Verify page loads with expected content\n    await expect(page.locator('[aria-label=\"admin-page-title\"]')).toHaveText(\n      /^Chat Preferences/\n    );\n    await expect(page.getByText(\"Actions & Tools\")).toBeVisible();\n  });\n\n  test(\"should toggle Internal Search tool on and off\", async ({ page }) => {\n    await page.waitForSelector(\"text=Internal Search\", { timeout: 10000 });\n\n    const searchSwitch = getToolSwitch(page, \"Internal Search\");\n\n    // Get initial state\n    const initialState = await searchSwitch.getAttribute(\"aria-checked\");\n    console.log(\n      `[toggle] Internal Search initial aria-checked=${initialState}`\n    );\n\n    // Set up response listener before the click to avoid race conditions\n    const patchRespPromise = page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/admin/default-assistant\") &&\n        r.request().method() === \"PATCH\",\n      { timeout: 8000 }\n    );\n\n    // Toggle it — auto-saves immediately\n    await searchSwitch.click();\n\n    // Wait for PATCH to complete\n    const patchResp = await patchRespPromise;\n    console.log(\n      `[toggle] Internal Search PATCH status=${patchResp.status()} body=${(\n        await patchResp.text()\n      ).slice(0, 300)}`\n    );\n\n    // Wait for success toast\n    await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n      timeout: 5000,\n    });\n\n    // Refresh page to verify persistence\n    await page.reload();\n    await page.waitForSelector(\"text=Internal Search\", { timeout: 10000 });\n\n    // Wait for SWR data to load and React to re-render with the persisted state\n    const expectedState = initialState === \"true\" ? \"false\" : \"true\";\n    await expect(searchSwitch).toHaveAttribute(\"aria-checked\", expectedState, {\n      timeout: 10000,\n    });\n    console.log(\n      `[toggle] Internal Search after reload aria-checked=${expectedState}`\n    );\n\n    // Toggle back to original state\n    await clickAndWaitForPatch(page, searchSwitch);\n  });\n\n  test(\"should toggle Web Search tool on and off\", async ({ page }) => {\n    await page.waitForSelector(\"text=Web Search\", { timeout: 10000 });\n\n    const webSearchSwitch = getToolSwitch(page, \"Web Search\");\n\n    // Get initial state\n    const initialState = await webSearchSwitch.getAttribute(\"aria-checked\");\n    console.log(`[toggle] Web Search initial aria-checked=${initialState}`);\n\n    // Set up response listener before the click to avoid race conditions\n    const patchRespPromise = page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/admin/default-assistant\") &&\n        r.request().method() === \"PATCH\",\n      { timeout: 8000 }\n    );\n\n    // Toggle it\n    await webSearchSwitch.click();\n\n    // Wait for PATCH to complete\n    const patchResp = await patchRespPromise;\n    console.log(\n      `[toggle] Web Search PATCH status=${patchResp.status()} body=${(\n        await patchResp.text()\n      ).slice(0, 300)}`\n    );\n\n    // Wait for success toast\n    await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n      timeout: 5000,\n    });\n\n    // Refresh page to verify persistence\n    await page.reload();\n    await page.waitForSelector(\"text=Web Search\", { timeout: 10000 });\n\n    // Wait for SWR data to load and React to re-render with the persisted state\n    const expectedState = initialState === \"true\" ? \"false\" : \"true\";\n    await expect(webSearchSwitch).toHaveAttribute(\n      \"aria-checked\",\n      expectedState,\n      { timeout: 10000 }\n    );\n    console.log(\n      `[toggle] Web Search after reload aria-checked=${expectedState}`\n    );\n\n    // Toggle back to original state\n    await clickAndWaitForPatch(page, webSearchSwitch);\n  });\n\n  test(\"should toggle Image Generation tool on and off\", async ({ page }) => {\n    await page.waitForSelector(\"text=Image Generation\", { timeout: 10000 });\n\n    const imageGenSwitch = getToolSwitch(page, \"Image Generation\");\n\n    // Get initial state\n    const initialState = await imageGenSwitch.getAttribute(\"aria-checked\");\n    console.log(\n      `[toggle] Image Generation initial aria-checked=${initialState}`\n    );\n\n    // Set up response listener before the click to avoid race conditions\n    const patchRespPromise = page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/admin/default-assistant\") &&\n        r.request().method() === \"PATCH\",\n      { timeout: 8000 }\n    );\n\n    // Toggle it\n    await imageGenSwitch.click();\n\n    // Wait for PATCH to complete\n    const patchResp = await patchRespPromise;\n    console.log(\n      `[toggle] Image Generation PATCH status=${patchResp.status()} body=${(\n        await patchResp.text()\n      ).slice(0, 300)}`\n    );\n\n    // Wait for success toast\n    await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n      timeout: 5000,\n    });\n\n    // Refresh page to verify persistence\n    await page.reload();\n    await page.waitForSelector(\"text=Image Generation\", { timeout: 10000 });\n\n    // Wait for SWR data to load and React to re-render with the persisted state\n    const expectedState = initialState === \"true\" ? \"false\" : \"true\";\n    await expect(imageGenSwitch).toHaveAttribute(\n      \"aria-checked\",\n      expectedState,\n      { timeout: 10000 }\n    );\n    console.log(\n      `[toggle] Image Generation after reload aria-checked=${expectedState}`\n    );\n\n    // Toggle back to original state\n    await clickAndWaitForPatch(page, imageGenSwitch);\n  });\n\n  test(\"should edit and save system prompt\", async ({ page }) => {\n    // Click \"Modify Prompt\" to open the system prompt modal\n    await page.getByText(\"Modify Prompt\").click();\n\n    // Wait for modal to appear\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Fill textarea with random suffix to ensure uniqueness\n    const testPrompt = `This is a test system prompt for the E2E test. ${Math.floor(\n      Math.random() * 1000000\n    )}`;\n    const textarea = modal.getByPlaceholder(\"Enter your system prompt...\");\n    await textarea.fill(testPrompt);\n\n    // Click Save and wait for PATCH to complete\n    await clickAndWaitForPatch(\n      page,\n      modal.getByRole(\"button\", { name: \"Save\" })\n    );\n\n    // Modal should close after save\n    await expect(modal).not.toBeVisible();\n\n    // Refresh page to verify persistence\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    // Reopen modal and verify\n    await page.getByText(\"Modify Prompt\").click();\n    const modalAfter = page.getByRole(\"dialog\");\n    await expect(modalAfter).toBeVisible({ timeout: 5000 });\n    await expect(\n      modalAfter.getByPlaceholder(\"Enter your system prompt...\")\n    ).toHaveValue(testPrompt);\n\n    // Close modal without saving to clean up\n    await modalAfter.getByRole(\"button\", { name: \"Cancel\" }).click();\n  });\n\n  test(\"should allow empty system prompt\", async ({ page }) => {\n    // Open system prompt modal\n    await page.getByText(\"Modify Prompt\").click();\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    const textarea = modal.getByPlaceholder(\"Enter your system prompt...\");\n\n    // Get initial value to restore later\n    const initialValue = await textarea.inputValue();\n\n    // If already empty, add some text first\n    if (initialValue === \"\") {\n      await textarea.fill(\"Temporary text\");\n      await clickAndWaitForPatch(\n        page,\n        modal.getByRole(\"button\", { name: \"Save\" })\n      );\n      // Reopen modal\n      await page.getByText(\"Modify Prompt\").click();\n      await expect(modal).toBeVisible({ timeout: 5000 });\n    }\n\n    // Clear the textarea\n    await textarea.fill(\"\");\n\n    // Save\n    await clickAndWaitForPatch(\n      page,\n      modal.getByRole(\"button\", { name: \"Save\" })\n    );\n\n    // Refresh page to verify persistence\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    // Reopen modal and check\n    await page.getByText(\"Modify Prompt\").click();\n    const modalAfter = page.getByRole(\"dialog\");\n    await expect(modalAfter).toBeVisible({ timeout: 5000 });\n\n    // The modal pre-populates with default prompt when system_prompt is empty/null,\n    // so we just verify the modal opens without error\n    const textareaAfter = modalAfter.getByPlaceholder(\n      \"Enter your system prompt...\"\n    );\n    await expect(textareaAfter).toBeVisible();\n\n    // Restore original value if it wasn't already empty\n    if (initialValue !== \"\") {\n      await textareaAfter.fill(initialValue);\n      await clickAndWaitForPatch(\n        page,\n        modalAfter.getByRole(\"button\", { name: \"Save\" })\n      );\n    } else {\n      await modalAfter.getByRole(\"button\", { name: \"Cancel\" }).click();\n    }\n  });\n\n  test(\"should handle very long system prompt gracefully\", async ({ page }) => {\n    // Open system prompt modal\n    await page.getByText(\"Modify Prompt\").click();\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    const textarea = modal.getByPlaceholder(\"Enter your system prompt...\");\n\n    // Get initial value to restore later\n    const initialValue = await textarea.inputValue();\n\n    // Create a very long prompt (~4800 characters)\n    const longPrompt = \"This is a test. \".repeat(300);\n\n    await textarea.fill(longPrompt);\n\n    // Save\n    await clickAndWaitForPatch(\n      page,\n      modal.getByRole(\"button\", { name: \"Save\" })\n    );\n\n    // Verify persistence after reload\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByText(\"Modify Prompt\").click();\n    const modalAfter = page.getByRole(\"dialog\");\n    await expect(modalAfter).toBeVisible({ timeout: 5000 });\n    await expect(\n      modalAfter.getByPlaceholder(\"Enter your system prompt...\")\n    ).toHaveValue(longPrompt);\n\n    // Restore original value\n    if (initialValue !== longPrompt) {\n      const restoreTextarea = modalAfter.getByPlaceholder(\n        \"Enter your system prompt...\"\n      );\n      await restoreTextarea.fill(initialValue);\n      await clickAndWaitForPatch(\n        page,\n        modalAfter.getByRole(\"button\", { name: \"Save\" })\n      );\n    } else {\n      await modalAfter.getByRole(\"button\", { name: \"Cancel\" }).click();\n    }\n  });\n\n  test(\"should reject invalid tool IDs via API\", async ({ page }) => {\n    // Use browser console to send invalid tool IDs\n    // This simulates what would happen if someone tried to bypass the UI\n    const response = await page.evaluate(async () => {\n      const res = await fetch(\"/api/admin/default-assistant\", {\n        method: \"PATCH\",\n        headers: { \"Content-Type\": \"application/json\" },\n        body: JSON.stringify({\n          tool_ids: [\"InvalidTool\", \"AnotherInvalidTool\"],\n        }),\n      });\n      return {\n        ok: res.ok,\n        status: res.status,\n        body: await res.text(),\n      };\n    });\n    // Also try via page.request (uses storageState) to capture status in case page fetch fails\n    try {\n      const baseURL = process.env.BASE_URL || \"http://localhost:3000\";\n      const alt = await page.request.patch(\n        `${baseURL}/api/admin/default-assistant`,\n        {\n          data: { tool_ids: [\"InvalidTool\", \"AnotherInvalidTool\"] },\n          headers: { \"Content-Type\": \"application/json\" },\n        }\n      );\n      console.log(\n        `[invalid-tools] page.request.patch status=${alt.status()} body=${(\n          await alt.text()\n        ).slice(0, 300)}`\n      );\n    } catch (e) {\n      console.log(`[invalid-tools] page.request.patch error: ${String(e)}`);\n    }\n\n    // Check that the request failed with 400 or 422 (validation error)\n    expect(response.ok).toBe(false);\n    expect([400, 422].includes(response.status)).toBe(true);\n    // The error message should indicate invalid tool IDs\n    if (response.status === 400) {\n      expect(response.body).toContain(\"Invalid tool IDs\");\n    }\n  });\n\n  test(\"should toggle all tools and verify in chat\", async ({ page }) => {\n    // Providers are now created in beforeEach, so all tools should be available\n\n    // Wait for ALL three tools to be visible in the UI\n    await page.waitForSelector(\"text=Internal Search\", { timeout: 10000 });\n    await page.waitForSelector(\"text=Web Search\", { timeout: 10000 });\n    await page.waitForSelector(\"text=Image Generation\", { timeout: 10000 });\n\n    // Wait for form to fully initialize\n    await page.waitForTimeout(2000);\n\n    // Store initial states\n    const toolStates: Record<string, string | null> = {};\n\n    // Capture current states (we'll restore these at the end)\n    for (const toolName of [\n      \"Internal Search\",\n      \"Web Search\",\n      \"Image Generation\",\n    ]) {\n      const toolSwitch = getToolSwitch(page, toolName);\n      const state = await toolSwitch.getAttribute(\"aria-checked\");\n      toolStates[toolName] = state;\n      console.log(`[toggle-all] Initial state for ${toolName}: ${state}`);\n    }\n\n    // Disable all tools\n    for (const toolName of [\n      \"Internal Search\",\n      \"Web Search\",\n      \"Image Generation\",\n    ]) {\n      const toolSwitch = getToolSwitch(page, toolName);\n      const currentState = await toolSwitch.getAttribute(\"aria-checked\");\n      if (currentState === \"true\") {\n        await clickAndWaitForPatch(page, toolSwitch);\n        const newState = await toolSwitch.getAttribute(\"aria-checked\");\n        console.log(`[toggle-all] Clicked ${toolName}, new state=${newState}`);\n      }\n    }\n\n    // Navigate to app to verify tools are disabled and initial load greeting\n    await page.goto(\"/app\");\n    await waitForUnifiedGreeting(page);\n\n    // Go back and re-enable all tools\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForLoadState(\"networkidle\");\n    // Reload to ensure the page has the updated tools list (after providers were created)\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n    await page.waitForSelector(\"text=Internal Search\", { timeout: 10000 });\n\n    for (const toolName of [\n      \"Internal Search\",\n      \"Web Search\",\n      \"Image Generation\",\n    ]) {\n      const toolSwitch = getToolSwitch(page, toolName);\n      const currentState = await toolSwitch.getAttribute(\"aria-checked\");\n      if (currentState === \"false\") {\n        await clickAndWaitForPatch(page, toolSwitch);\n        const newState = await toolSwitch.getAttribute(\"aria-checked\");\n        console.log(`[toggle-all] Clicked ${toolName}, new state=${newState}`);\n      }\n    }\n\n    // Navigate to app and verify the Action Management toggle and actions exist\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Wait a bit for backend to process the changes\n    await page.waitForTimeout(2000);\n\n    // Reload to ensure ChatContext has fresh tool data after providers were created\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    // Debug: Check what tools are available via API\n    try {\n      const baseURL = process.env.BASE_URL || \"http://localhost:3000\";\n      const toolsResp = await page.request.get(`${baseURL}/api/tool`);\n      const toolsData = await toolsResp.json();\n      console.log(\n        `[toggle-all] Available tools from API: ${JSON.stringify(\n          toolsData.map((t: any) => ({\n            name: t.name,\n            display_name: t.display_name,\n            in_code_tool_id: t.in_code_tool_id,\n          }))\n        )}`\n      );\n    } catch (e) {\n      console.warn(`[toggle-all] Failed to fetch tools: ${e}`);\n    }\n\n    // Debug: Check assistant configuration\n    try {\n      const baseURL = process.env.BASE_URL || \"http://localhost:3000\";\n      const configResp = await page.request.get(\n        `${baseURL}/api/admin/default-assistant/configuration`\n      );\n      const configData = await configResp.json();\n      console.log(\n        `[toggle-all] Default agent config: ${JSON.stringify(configData)}`\n      );\n    } catch (e) {\n      console.warn(`[toggle-all] Failed to fetch config: ${e}`);\n    }\n\n    await waitForUnifiedGreeting(page);\n    await expect(page.locator(TOOL_IDS.actionToggle)).toBeVisible();\n    await openActionManagement(page);\n\n    // Debug: Check what's actually in the popover\n    const popover = page.locator(TOOL_IDS.options);\n    const popoverText = await popover.textContent();\n    console.log(`[toggle-all] Popover text: ${popoverText}`);\n\n    // Verify at least Internal Search is visible (it should always be enabled)\n    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Check if other tools are visible (they might not be if there's a form state issue)\n    const webSearchVisible = await page\n      .locator(TOOL_IDS.webSearchOption)\n      .isVisible()\n      .catch(() => false);\n    const imageGenVisible = await page\n      .locator(TOOL_IDS.imageGenerationOption)\n      .isVisible()\n      .catch(() => false);\n    console.log(\n      `[toggle-all] Tools visible in chat: Internal Search=true, Web Search=${webSearchVisible}, Image Gen=${imageGenVisible}`\n    );\n\n    // NOTE: Only Internal Search is verified as visible due to a known issue with\n    // Web Search and Image Generation form state when providers are created in beforeEach.\n    // This is being tracked separately as a potential Formik/form state bug.\n\n    await page.goto(\"/admin/configuration/chat-preferences\");\n\n    // Restore original states\n    let needsSave = false;\n    for (const toolName of [\n      \"Internal Search\",\n      \"Web Search\",\n      \"Image Generation\",\n    ]) {\n      const toolSwitch = getToolSwitch(page, toolName);\n      const currentState = await toolSwitch.getAttribute(\"aria-checked\");\n      const originalState = toolStates[toolName];\n\n      if (currentState !== originalState) {\n        await clickAndWaitForPatch(page, toolSwitch);\n        needsSave = true;\n      }\n    }\n  });\n});\n\ntest.describe(\"Chat Preferences Non-Admin Access\", () => {\n  test(\"should redirect non-authenticated users\", async ({ page }) => {\n    // Clear cookies to ensure we're not authenticated\n    await page.context().clearCookies();\n\n    // Try to navigate directly to chat preferences without logging in\n    await page.goto(\"/admin/configuration/chat-preferences\");\n\n    // Wait for navigation to settle\n    await page.waitForTimeout(2000);\n\n    // Should be redirected away from admin page\n    const url = page.url();\n    expect(!url.includes(\"/admin/configuration/chat-preferences\")).toBe(true);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/disable_default_agent.spec.ts",
    "content": "import { test, expect, Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { createAgent } from \"@tests/e2e/utils/agentUtils\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nconst MAX_SETTING_SAVE_ATTEMPTS = 5;\nconst SETTING_SAVE_RETRY_DELAY_MS = 750;\n\n/**\n * Expand the \"Advanced Options\" collapsible section on the Chat Preferences page.\n * The section is closed by default (`defaultOpen={false}`).\n * Only expands if not already open (checks for the switch element visibility).\n */\nasync function expandAdvancedOptions(page: Page): Promise<void> {\n  // Wait for the page title to be visible, signalling the form has loaded\n  await expect(page.locator('[aria-label=\"admin-page-title\"]')).toBeVisible({\n    timeout: 10000,\n  });\n\n  // Check if the switch is already visible (section already expanded)\n  const switchEl = page.locator(\"#disable_default_assistant\");\n  const alreadyVisible = await switchEl.isVisible().catch(() => false);\n  if (alreadyVisible) return;\n\n  const header = page.getByText(\"Advanced Options\", { exact: true });\n  await expect(header).toBeVisible({ timeout: 10000 });\n  await header.scrollIntoViewIfNeeded();\n  await header.click();\n\n  // Wait for the collapsible content to expand and switch to appear\n  await expect(switchEl).toBeVisible({ timeout: 5000 });\n}\n\n/**\n * Toggle the \"Always Start with an Agent\" setting (formerly \"Disable Default Agent\")\n * on the Chat Preferences page. Uses auto-save via the SwitchField.\n *\n * The switch is a SwitchField with name=\"disable_default_assistant\" which renders\n * `<button role=\"switch\" id=\"disable_default_assistant\" aria-checked=\"...\">`.\n */\nasync function setDisableDefaultAssistantSetting(\n  page: Page,\n  isDisabled: boolean\n): Promise<void> {\n  let lastCheckedState = false;\n\n  for (let attempt = 0; attempt < MAX_SETTING_SAVE_ATTEMPTS; attempt += 1) {\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Expand \"Advanced Options\" collapsible (closed by default)\n    await expandAdvancedOptions(page);\n\n    const switchEl = page.locator(\"#disable_default_assistant\");\n    await expect(switchEl).toBeVisible({ timeout: 5000 });\n\n    const currentState = await switchEl.getAttribute(\"aria-checked\");\n    lastCheckedState = currentState === \"true\";\n\n    if (lastCheckedState === isDisabled) {\n      return;\n    }\n\n    // Toggle the switch\n    await switchEl.click();\n\n    // Wait for auto-save toast\n    await expect(page.getByText(\"Settings updated\")).toBeVisible({\n      timeout: 5000,\n    });\n\n    await page.waitForTimeout(SETTING_SAVE_RETRY_DELAY_MS);\n\n    // Verify persistence after reload\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    // Re-expand Advanced Options (closed by default after reload)\n    await expandAdvancedOptions(page);\n\n    const newState = await switchEl.getAttribute(\"aria-checked\");\n    lastCheckedState = newState === \"true\";\n\n    if (lastCheckedState === isDisabled) {\n      return;\n    }\n  }\n\n  throw new Error(\n    `Failed to persist Always Start with an Agent setting after ${MAX_SETTING_SAVE_ATTEMPTS} attempts (expected ${isDisabled}, last=${lastCheckedState}).`\n  );\n}\n\ntest.describe(\"Disable Default Agent Setting @exclusive\", () => {\n  let createdAssistantId: number | null = null;\n\n  test.beforeEach(async ({ page }) => {\n    // Log in as admin\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n  });\n\n  test.afterEach(async ({ page }) => {\n    // Clean up any assistant created during the test\n    if (createdAssistantId !== null) {\n      const client = new OnyxApiClient(page.request);\n      await client.deleteAgent(createdAssistantId);\n      createdAssistantId = null;\n    }\n\n    // Ensure default agent is enabled (switch unchecked) after each test\n    // to avoid interfering with other tests\n    await setDisableDefaultAssistantSetting(page, false);\n  });\n\n  test(\"admin can enable and disable the setting in chat preferences\", async ({\n    page,\n  }) => {\n    await setDisableDefaultAssistantSetting(page, true);\n    await setDisableDefaultAssistantSetting(page, false);\n    await setDisableDefaultAssistantSetting(page, true);\n  });\n\n  test(\"new session button uses current agent when setting is enabled\", async ({\n    page,\n  }) => {\n    // First enable the setting\n    await setDisableDefaultAssistantSetting(page, true);\n\n    // Navigate to app and create a new assistant to ensure there's one besides the default\n    await page.goto(\"/app\");\n    const agentName = `Test Assistant ${Date.now()}`;\n    await createAgent(page, {\n      name: agentName,\n      description: \"Test assistant for new session button test\",\n      instructions: \"You are a helpful test assistant.\",\n    });\n\n    // Extract the assistant ID from the URL\n    const currentUrl = page.url();\n    const agentIdMatch = currentUrl.match(/agentId=(\\d+)/);\n    expect(agentIdMatch).toBeTruthy();\n\n    // Store for cleanup\n    if (agentIdMatch) {\n      createdAssistantId = Number(agentIdMatch[1]);\n    }\n\n    // Click the \"New Session\" button\n    const newSessionButton = page.locator(\n      '[data-testid=\"AppSidebar/new-session\"]'\n    );\n    await newSessionButton.click();\n\n    // Verify the WelcomeMessage shown is NOT from the default agent\n    // Default agent shows onyx-logo, custom agents show agent-name-display\n    await expect(page.locator('[data-testid=\"onyx-logo\"]')).not.toBeVisible();\n    await expect(\n      page.locator('[data-testid=\"agent-name-display\"]')\n    ).toBeVisible();\n  });\n\n  test(\"direct navigation to /app uses first pinned assistant when setting is enabled\", async ({\n    page,\n  }) => {\n    // First enable the setting\n    await setDisableDefaultAssistantSetting(page, true);\n\n    // Navigate directly to /app\n    await page.goto(\"/app\");\n\n    // Verify that we didn't land on the default agent (ID 0)\n    // The assistant selection should be a pinned or available assistant (not ID 0)\n    const currentUrl = page.url();\n    // If agentId is in URL, it should not be 0\n    if (currentUrl.includes(\"agentId=\")) {\n      expect(currentUrl).not.toContain(\"agentId=0\");\n    }\n  });\n\n  test(\"chat preferences shows disabled state when setting is enabled\", async ({\n    page,\n  }) => {\n    // First enable the setting\n    await setDisableDefaultAssistantSetting(page, true);\n\n    // Navigate to chat preferences configuration page\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Wait for the page to fully render (page title signals form is loaded)\n    await expect(page.locator('[aria-label=\"admin-page-title\"]')).toHaveText(\n      /^Chat Preferences/,\n      { timeout: 10000 }\n    );\n\n    // The new page wraps Connectors + Actions & Tools in <Disabled disabled={values.disable_default_assistant}>\n    // When disabled, the section should have reduced opacity / disabled styling\n    // The \"Modify Prompt\" button should still be accessible (it's outside the Disabled wrapper)\n    // Use text locator (Opal Button wraps text in Interactive.Base > Slot which may\n    // not expose role=\"button\" to Playwright's getByRole)\n    await expect(page.getByText(\"Modify Prompt\")).toBeVisible({\n      timeout: 5000,\n    });\n\n    // The \"Actions & Tools\" section text should still be present but visually disabled\n    await expect(page.getByText(\"Actions & Tools\")).toBeVisible();\n  });\n\n  test(\"chat preferences shows full configuration UI when setting is disabled\", async ({\n    page,\n  }) => {\n    // Ensure setting is disabled\n    await setDisableDefaultAssistantSetting(page, false);\n\n    // Navigate to chat preferences configuration page\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Verify configuration UI is shown (Actions & Tools section should be visible and enabled)\n    await expect(page.getByText(\"Actions & Tools\")).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Verify the page title\n    await expect(page.locator('[aria-label=\"admin-page-title\"]')).toHaveText(\n      /^Chat Preferences/\n    );\n  });\n\n  test(\"default agent is available again when setting is disabled\", async ({\n    page,\n  }) => {\n    // Navigate to settings and ensure setting is disabled\n    await setDisableDefaultAssistantSetting(page, false);\n\n    // Navigate directly to /app without parameters\n    await page.goto(\"/app\");\n\n    // The default agent (ID 0) should be available\n    // We can verify this by checking that the app loads successfully\n    // and doesn't force navigation to a specific assistant\n    expect(page.url()).toContain(\"/app\");\n\n    // Verify the new session button navigates to /app without agentId\n    const newSessionButton = page.locator(\n      '[data-testid=\"AppSidebar/new-session\"]'\n    );\n    await newSessionButton.click();\n\n    // Should navigate to /app without agentId parameter\n    const newUrl = page.url();\n    expect(newUrl).toContain(\"/app\");\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/discord-bot/admin-workflows.spec.ts",
    "content": "/**\n * E2E tests for Discord bot admin workflow flows.\n *\n * These tests verify complete user journeys that span multiple pages/components.\n * Individual component tests are in their respective spec files.\n */\n\nimport {\n  test,\n  expect,\n  gotoDiscordBotPage,\n  gotoGuildDetailPage,\n} from \"@tests/e2e/admin/discord-bot/fixtures\";\n\n// Disable retries for Discord bot tests - attempt once at most\ntest.describe.configure({ retries: 0 });\n\ntest.describe(\"Admin Workflow E2E Flows\", () => {\n  test(\"complete setup and configuration flow\", async ({\n    adminPage,\n    mockRegisteredGuild,\n    mockBotConfigured: _mockBotConfigured,\n  }) => {\n    // Start at list page\n    await gotoDiscordBotPage(adminPage);\n\n    // Verify list page loads\n    await expect(\n      adminPage\n        .locator('[aria-label=\"admin-page-title\"]')\n        .getByText(\"Discord Integration\")\n    ).toBeVisible();\n    await expect(\n      adminPage.locator(\"text=Server Configurations\").first()\n    ).toBeVisible();\n\n    // Navigate to guild detail page\n    const guildButton = adminPage.locator(\n      `button:has-text(\"${mockRegisteredGuild.name}\")`\n    );\n    await expect(guildButton).toBeVisible({ timeout: 10000 });\n    await guildButton.click();\n\n    // Verify detail page loads\n    await expect(adminPage).toHaveURL(\n      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)\n    );\n    await expect(\n      adminPage.locator(\"text=Channel Configuration\").first()\n    ).toBeVisible();\n\n    // Configure a channel: toggle enabled, show unsaved changes, save\n    const channelRow = adminPage.locator(\"tbody tr\").first();\n    await expect(channelRow).toBeVisible();\n\n    const enableToggle = channelRow.locator('[role=\"switch\"]').first();\n    if (await enableToggle.isVisible()) {\n      const initialState = await enableToggle.getAttribute(\"aria-checked\");\n      await enableToggle.click();\n\n      await expect(enableToggle).toHaveAttribute(\n        \"aria-checked\",\n        initialState === \"true\" ? \"false\" : \"true\"\n      );\n    }\n\n    // Verify unsaved changes indicator\n    await expect(\n      adminPage.locator(\"text=You have unsaved changes\")\n    ).toBeVisible({ timeout: 5000 });\n\n    // Save changes - wait for the bulk update API call\n    // Update button is now in the header\n    const updateButton = adminPage.locator(\n      'button:has-text(\"Update Configuration\")'\n    );\n    // Verify button is visible and enabled before clicking\n    await expect(updateButton).toBeEnabled({ timeout: 5000 });\n\n    const bulkUpdatePromise = adminPage.waitForResponse(\n      (response) =>\n        response\n          .url()\n          .includes(\n            `/api/manage/admin/discord-bot/guilds/${mockRegisteredGuild.id}/channels`\n          ) && response.request().method() === \"PATCH\"\n    );\n\n    await updateButton.click();\n    await bulkUpdatePromise;\n\n    // Verify success toast\n    const successToast = adminPage.locator(\"text=/updated/i\");\n    await expect(successToast).toBeVisible({ timeout: 5000 });\n\n    // Navigate back to list\n    const backButton = adminPage.locator(\n      'button:has-text(\"Back\"), a:has-text(\"Back\"), button[aria-label*=\"back\" i]'\n    );\n    if (await backButton.isVisible({ timeout: 5000 }).catch(() => false)) {\n      await backButton.click();\n      await expect(adminPage).toHaveURL(/\\/admin\\/discord-bot$/);\n    }\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/discord-bot/bot-config.spec.ts",
    "content": "/**\n * E2E tests for Discord bot configuration page.\n *\n * Tests the bot token configuration card which allows admins to:\n * - Enter and save a Discord bot token\n * - View configuration status (Configured/Not Configured badge)\n * - Delete the bot token configuration\n */\n\nimport {\n  test,\n  expect,\n  gotoDiscordBotPage,\n} from \"@tests/e2e/admin/discord-bot/fixtures\";\n\n// Disable retries for Discord bot tests - attempt once at most\ntest.describe.configure({ retries: 0 });\n\ntest.describe(\"Bot Configuration Page\", () => {\n  test(\"bot config page loads\", async ({ adminPage }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Page should load without errors\n    await expect(adminPage).toHaveURL(/\\/admin\\/discord-bot/);\n    // Page title should contain \"Discord\"\n    await expect(\n      adminPage\n        .locator('[aria-label=\"admin-page-title\"]')\n        .getByText(\"Discord Integration\")\n    ).toBeVisible();\n  });\n\n  test(\"bot config shows token input when not configured\", async ({\n    adminPage,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // When not configured, should show:\n    // - \"Not Configured\" badge OR\n    // - Token input field with \"Save Token\" button\n    const notConfiguredBadge = adminPage.locator(\"text=Not Configured\");\n    const tokenInput = adminPage.locator('input[placeholder*=\"token\" i]');\n    const saveTokenButton = adminPage.locator('button:has-text(\"Save Token\")');\n\n    // Either not configured state with input, or already configured\n    const configuredBadge = adminPage.locator(\"text=Configured\").first();\n\n    // Check that at least one of the states is visible\n    // Check configured state first, then fall back to not configured state\n    const isConfigured = await configuredBadge\n      .isVisible({ timeout: 5000 })\n      .catch(() => false);\n\n    if (isConfigured) {\n      // Bot is configured - verify configured badge is visible\n      await expect(configuredBadge).toBeVisible();\n    } else {\n      // Bot is not configured - verify not configured badge and input are visible\n      await expect(notConfiguredBadge).toBeVisible({ timeout: 10000 });\n      await expect(tokenInput).toBeVisible();\n      await expect(saveTokenButton).toBeVisible();\n    }\n  });\n\n  test(\"bot config save token validation\", async ({ adminPage }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    const tokenInput = adminPage.locator('input[placeholder*=\"token\" i]');\n    const saveTokenButton = adminPage.locator('button:has-text(\"Save Token\")');\n\n    // Only run if token input is visible (not already configured)\n    if (await tokenInput.isVisible({ timeout: 5000 }).catch(() => false)) {\n      // Save button should be disabled when input is empty\n      await expect(saveTokenButton).toBeDisabled();\n\n      // Enter a token\n      await tokenInput.fill(\"test_bot_token_12345\");\n\n      // Save button should now be enabled\n      await expect(saveTokenButton).toBeEnabled();\n\n      // Clear input\n      await tokenInput.clear();\n\n      // Button should be disabled again\n      await expect(saveTokenButton).toBeDisabled();\n    }\n  });\n\n  test(\"bot config shows configured state\", async ({\n    adminPage,\n    mockBotConfigured,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // With mockBotConfigured, should show configured state\n    const configuredBadge = adminPage.locator(\"text=Configured\").first();\n    const deleteButton = adminPage.locator(\n      'button:has-text(\"Delete Discord Token\")'\n    );\n\n    // Should show configured badge\n    await expect(configuredBadge).toBeVisible({ timeout: 10000 });\n\n    // Should show delete button when configured\n    await expect(deleteButton).toBeVisible();\n  });\n\n  test(\"bot config delete shows confirmation modal\", async ({\n    adminPage,\n    mockBotConfigured,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Wait for configured state to be visible\n    const configuredBadge = adminPage.locator(\"text=Configured\").first();\n    await expect(configuredBadge).toBeVisible({ timeout: 10000 });\n\n    // Find and click delete button\n    const deleteButton = adminPage.locator(\n      'button:has-text(\"Delete Discord Token\")'\n    );\n    await expect(deleteButton).toBeVisible();\n    await deleteButton.click();\n\n    // Confirmation modal should appear\n    const modal = adminPage.locator('[role=\"dialog\"]');\n    await expect(modal).toBeVisible({ timeout: 10000 });\n\n    // Modal should have cancel and confirm buttons\n    const cancelButton = adminPage.locator('button:has-text(\"Cancel\")');\n    const confirmButton = adminPage.locator(\n      'button:has-text(\"Delete\"), button:has-text(\"Confirm\")'\n    );\n\n    // At least one of these buttons should be visible\n    await expect(cancelButton.or(confirmButton).first()).toBeVisible({\n      timeout: 5000,\n    });\n\n    // Cancel to avoid actually deleting\n    if (await cancelButton.isVisible({ timeout: 5000 }).catch(() => false)) {\n      await cancelButton.click();\n      await expect(modal).not.toBeVisible({ timeout: 5000 });\n    }\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/discord-bot/channel-config.spec.ts",
    "content": "/**\n * E2E tests for Discord guild detail page and channel configuration.\n *\n * Tests the guild detail page which includes:\n * - Guild enabled/disabled toggle\n * - Default Agent (persona) selector\n * - Channel Configuration section with:\n *   - List of channels with icons (text/forum)\n *   - Enabled toggle per channel\n *   - Require @mention toggle\n *   - Thread Only Mode toggle\n *   - Agent Override dropdown\n */\n\nimport {\n  test,\n  expect,\n  gotoGuildDetailPage,\n} from \"@tests/e2e/admin/discord-bot/fixtures\";\n\n// Disable retries for Discord bot tests - attempt once at most\ntest.describe.configure({ retries: 0 });\n\ntest.describe(\"Guild Detail Page & Channel Configuration\", () => {\n  test(\"guild detail page loads\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Page should load with guild info\n    await expect(adminPage).toHaveURL(\n      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)\n    );\n\n    // Should show the guild name in the header\n    await expect(\n      adminPage.locator(`text=${mockRegisteredGuild.name}`)\n    ).toBeVisible();\n  });\n\n  test(\"guild default agent dropdown shows options\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Should show \"Default Agent\" section\n    await expect(adminPage.locator(\"text=Default Agent\").first()).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Find the persona/agent dropdown (InputSelect)\n    const agentDropdown = adminPage.locator('button:has-text(\"Default Agent\")');\n\n    if (await agentDropdown.isVisible({ timeout: 5000 }).catch(() => false)) {\n      await agentDropdown.click();\n\n      // Dropdown should show available options\n      const options = adminPage.locator('[role=\"option\"]');\n      await expect(options.first()).toBeVisible({ timeout: 5000 });\n    }\n  });\n});\n\ntest.describe(\"Channel Configuration\", () => {\n  test(\"channels table displays with action buttons\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Channel list table should be visible\n    const channelTable = adminPage.locator(\"table\");\n    await expect(channelTable).toBeVisible({ timeout: 10000 });\n\n    // Should show our mock channels\n    await expect(adminPage.locator(\"text=general\")).toBeVisible();\n    await expect(adminPage.locator(\"text=help-forum\")).toBeVisible();\n    await expect(adminPage.locator(\"text=private-support\")).toBeVisible();\n\n    // Should show action buttons\n    await expect(\n      adminPage.locator('button:has-text(\"Enable All\")')\n    ).toBeVisible();\n    await expect(\n      adminPage.locator('button:has-text(\"Disable All\")')\n    ).toBeVisible();\n    // Update button is now in the header, not in the channel config section\n    await expect(\n      adminPage.locator('button:has-text(\"Update Configuration\")')\n    ).toBeVisible();\n  });\n\n  test(\"channels table has correct columns\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Table headers should be visible\n    await expect(adminPage.locator(\"th:has-text('Channel')\")).toBeVisible();\n    await expect(adminPage.locator(\"th:has-text('Enabled')\")).toBeVisible();\n    await expect(\n      adminPage.locator(\"th:has-text('Require @mention')\")\n    ).toBeVisible();\n    await expect(\n      adminPage.locator(\"th:has-text('Thread Only Mode')\")\n    ).toBeVisible();\n    await expect(\n      adminPage.locator(\"th:has-text('Agent Override')\")\n    ).toBeVisible();\n  });\n\n  test(\"channel enabled toggle updates state\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Find the row for \"general\" channel\n    const generalRow = adminPage.locator(\"tr\").filter({\n      hasText: \"general\",\n    });\n\n    // Find the first switch in that row (Enabled toggle)\n    const enabledToggle = generalRow.locator('[role=\"switch\"]').first();\n    await expect(enabledToggle).toBeVisible({ timeout: 10000 });\n\n    // Get initial state\n    const initialState = await enabledToggle.getAttribute(\"aria-checked\");\n\n    // Click to toggle\n    await enabledToggle.click();\n\n    // State should change (local state update)\n    await expect(enabledToggle).toHaveAttribute(\n      \"aria-checked\",\n      initialState === \"true\" ? \"false\" : \"true\"\n    );\n  });\n\n  test(\"channel require mention toggle works\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Find the row for \"general\" channel\n    const generalRow = adminPage.locator(\"tr\").filter({\n      hasText: \"general\",\n    });\n\n    // Find switches - second one should be \"require @mention\"\n    const switches = generalRow.locator('[role=\"switch\"]');\n    const requireMentionToggle = switches.nth(1);\n\n    await expect(requireMentionToggle).toBeVisible({ timeout: 10000 });\n\n    // Get initial state\n    const initialState =\n      await requireMentionToggle.getAttribute(\"aria-checked\");\n\n    // Click to toggle\n    await requireMentionToggle.click();\n\n    // State should change\n    await expect(requireMentionToggle).toHaveAttribute(\n      \"aria-checked\",\n      initialState === \"true\" ? \"false\" : \"true\"\n    );\n  });\n\n  test(\"channel thread only mode toggle works for text channels\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Find the row for \"general\" channel (text type)\n    const generalRow = adminPage.locator(\"tr\").filter({\n      hasText: \"general\",\n    });\n\n    // Find switches - third one should be \"thread only mode\"\n    const switches = generalRow.locator('[role=\"switch\"]');\n    const threadOnlyToggle = switches.nth(2);\n\n    await expect(threadOnlyToggle).toBeVisible({ timeout: 10000 });\n\n    // Toggle should be clickable for text channels\n    await threadOnlyToggle.click();\n\n    // Verify it changed\n    const newState = await threadOnlyToggle.getAttribute(\"aria-checked\");\n    expect(newState).toBe(\"true\");\n  });\n\n  test(\"forum channels do not show thread only toggle\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Find the row for \"help-forum\" channel (forum type)\n    const forumRow = adminPage.locator(\"tr\").filter({\n      hasText: \"help-forum\",\n    });\n\n    // Forum channels should only have 2 switches (Enabled, Require @mention)\n    // Thread Only Mode is not applicable to forums\n    const switches = forumRow.locator('[role=\"switch\"]');\n    const count = await switches.count();\n\n    // Should have fewer switches than text channels (2 vs 3)\n    expect(count).toBe(2);\n  });\n\n  test(\"enable all button works\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    const enableAllButton = adminPage.locator('button:has-text(\"Enable All\")');\n    await expect(enableAllButton).toBeVisible({ timeout: 10000 });\n    await enableAllButton.click();\n\n    // Wait for UI to update - all enabled toggles should be checked\n    const rows = adminPage.locator(\"tbody tr\");\n    const rowCount = await rows.count();\n\n    for (let i = 0; i < rowCount; i++) {\n      const toggle = rows.nth(i).locator('[role=\"switch\"]').first();\n      if (await toggle.isVisible()) {\n        await expect(toggle).toHaveAttribute(\"aria-checked\", \"true\");\n      }\n    }\n  });\n\n  test(\"disable all button works\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    const disableAllButton = adminPage.locator(\n      'button:has-text(\"Disable All\")'\n    );\n    await expect(disableAllButton).toBeVisible({ timeout: 10000 });\n    await disableAllButton.click();\n\n    // Wait for UI to update - all enabled toggles should be unchecked\n    const rows = adminPage.locator(\"tbody tr\");\n    const rowCount = await rows.count();\n\n    for (let i = 0; i < rowCount; i++) {\n      const toggle = rows.nth(i).locator('[role=\"switch\"]').first();\n      if (await toggle.isVisible()) {\n        await expect(toggle).toHaveAttribute(\"aria-checked\", \"false\");\n      }\n    }\n  });\n\n  test(\"unsaved changes indicator appears\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoGuildDetailPage(adminPage, mockRegisteredGuild.id);\n\n    // Find the unsaved changes message container (always in DOM, hidden with opacity-0)\n    const unsavedMessage = adminPage.locator(\"text=You have unsaved changes\");\n    // The container div has class \"sticky\" and controls visibility via opacity\n    const messageContainer = adminPage\n      .locator(\"div.sticky\")\n      .filter({ has: unsavedMessage })\n      .first();\n\n    // Initially hidden (opacity-0)\n    await expect(messageContainer).toHaveCSS(\"opacity\", \"0\");\n\n    // Make a change\n    const generalRow = adminPage.locator(\"tr\").filter({\n      hasText: \"general\",\n    });\n    const enabledToggle = generalRow.locator('[role=\"switch\"]').first();\n    await enabledToggle.click();\n\n    // Unsaved changes indicator should appear (opacity-100)\n    await expect(messageContainer).toHaveCSS(\"opacity\", \"1\", { timeout: 5000 });\n    await expect(unsavedMessage).toBeVisible({ timeout: 5000 });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/discord-bot/fixtures.ts",
    "content": "/**\n * Playwright fixtures for Discord bot admin UI tests.\n *\n * These fixtures provide:\n * - Authenticated admin page\n * - API client for backend operations\n * - Mock data for guilds and channels (since real Discord integration isn't available in tests)\n */\n\nimport { test as base, expect, Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n/**\n * Mock data types matching backend response schemas\n */\ninterface MockGuild {\n  id: number;\n  guild_id: string | null;\n  guild_name: string | null;\n  registration_key: string;\n  registered_at: string | null;\n  enabled: boolean;\n  default_persona_id: number | null;\n}\n\ninterface MockChannel {\n  id: number;\n  channel_id: string;\n  channel_name: string;\n  channel_type: \"text\" | \"forum\";\n  is_private: boolean;\n  enabled: boolean;\n  require_bot_invocation: boolean;\n  thread_only_mode: boolean;\n  persona_override_id: number | null;\n}\n\n/**\n * Constants for mock data\n */\nconst MOCK_GUILD_ID = 999;\n\n/**\n * Helper to authenticate and clear cookies\n */\nasync function authenticateAdmin(page: Page): Promise<void> {\n  await page.context().clearCookies();\n  await loginAs(page, \"admin\");\n}\n\n/**\n * Helper to create JSON response\n */\nfunction jsonResponse(data: unknown, status = 200) {\n  return {\n    status,\n    contentType: \"application/json\",\n    body: JSON.stringify(data),\n  };\n}\n\n/**\n * Creates mock channel data for a registered guild\n */\nfunction createMockChannels(): MockChannel[] {\n  return [\n    {\n      id: 1,\n      channel_id: \"1234567890123456789\",\n      channel_name: \"general\",\n      channel_type: \"text\",\n      is_private: false,\n      enabled: true,\n      require_bot_invocation: false,\n      thread_only_mode: false,\n      persona_override_id: null,\n    },\n    {\n      id: 2,\n      channel_id: \"1234567890123456790\",\n      channel_name: \"help-forum\",\n      channel_type: \"forum\",\n      is_private: false,\n      enabled: false,\n      require_bot_invocation: true,\n      thread_only_mode: false,\n      persona_override_id: null,\n    },\n    {\n      id: 3,\n      channel_id: \"1234567890123456791\",\n      channel_name: \"private-support\",\n      channel_type: \"text\",\n      is_private: true,\n      enabled: true,\n      require_bot_invocation: true,\n      thread_only_mode: true,\n      persona_override_id: null,\n    },\n  ];\n}\n\n/**\n * Creates a mock registered guild\n */\nfunction createMockRegisteredGuild(id: number): MockGuild {\n  return {\n    id,\n    guild_id: \"987654321098765432\",\n    guild_name: \"Test Discord Server\",\n    registration_key: \"test-key-12345\",\n    registered_at: new Date().toISOString(),\n    enabled: true,\n    default_persona_id: null,\n  };\n}\n\n/**\n * Creates a mock pending guild (not yet registered)\n */\nfunction createMockPendingGuild(id: number): MockGuild {\n  return {\n    id,\n    guild_id: null,\n    guild_name: null,\n    registration_key: \"pending-key-67890\",\n    registered_at: null,\n    enabled: false,\n    default_persona_id: null,\n  };\n}\n\n// Extend base test with Discord bot fixtures\nexport const test = base.extend<{\n  adminPage: Page;\n  apiClient: OnyxApiClient;\n  seededGuild: { id: number; name: string; registrationKey: string };\n  mockRegisteredGuild: {\n    id: number;\n    name: string;\n    guild: MockGuild;\n    channels: MockChannel[];\n  };\n  mockBotConfigured: boolean;\n}>({\n  // Admin page fixture - ensures proper authentication before each test\n  adminPage: async ({ page }, use) => {\n    await authenticateAdmin(page);\n    await use(page);\n  },\n\n  // API client fixture - provides access to OnyxApiClient for backend operations\n  apiClient: async ({ page }, use) => {\n    await authenticateAdmin(page);\n    const client = new OnyxApiClient(page.request);\n    await use(client);\n  },\n\n  // Seeded guild fixture - creates a real pending guild via API\n  seededGuild: async ({ page }, use) => {\n    await authenticateAdmin(page);\n\n    const apiClient = new OnyxApiClient(page.request);\n    const guild = await apiClient.createDiscordGuild();\n\n    await use({\n      id: guild.id,\n      name: guild.guild_name || \"Pending\",\n      registrationKey: guild.registration_key,\n    });\n\n    // Cleanup\n    await apiClient.deleteDiscordGuild(guild.id);\n  },\n\n  // Mock registered guild fixture - provides a fully mocked registered guild with channels\n  // This intercepts API calls to simulate a registered guild without needing Discord\n  mockRegisteredGuild: async ({ page }, use) => {\n    await authenticateAdmin(page);\n\n    // Use a mutable object so we can update it when PATCH requests come in\n    let mockGuild = createMockRegisteredGuild(MOCK_GUILD_ID);\n    const mockChannels = createMockChannels();\n\n    // Mock the guild list endpoint\n    await page.route(\n      \"**/api/manage/admin/discord-bot/guilds\",\n      async (route) => {\n        const method = route.request().method();\n        if (method === \"GET\") {\n          await route.fulfill(jsonResponse([mockGuild]));\n        } else if (method === \"POST\") {\n          // Allow creating new guilds - return a new pending guild\n          const newGuild = createMockPendingGuild(MOCK_GUILD_ID + 1);\n          await route.fulfill(jsonResponse(newGuild));\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    // Mock the specific guild endpoint\n    await page.route(\n      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}`,\n      async (route) => {\n        const method = route.request().method();\n        if (method === \"GET\") {\n          await route.fulfill(jsonResponse(mockGuild));\n        } else if (method === \"PATCH\") {\n          // Handle updates - merge with current state and update mockGuild\n          const body = (await route.request().postDataJSON()) || {};\n          mockGuild = { ...mockGuild, ...body };\n          await route.fulfill(jsonResponse(mockGuild));\n        } else if (method === \"DELETE\") {\n          await route.fulfill({ status: 204, body: \"\" });\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    // Mock the channels endpoint for this guild\n    await page.route(\n      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}/channels`,\n      async (route) => {\n        await route.fulfill(jsonResponse(mockChannels));\n      }\n    );\n\n    // Mock channel update endpoint\n    await page.route(\n      `**/api/manage/admin/discord-bot/guilds/${MOCK_GUILD_ID}/channels/*`,\n      async (route) => {\n        if (route.request().method() === \"PATCH\") {\n          const body = (await route.request().postDataJSON()) || {};\n          // Extract channel ID from URL: .../channels/{id}\n          const urlMatch = route\n            .request()\n            .url()\n            .match(/\\/channels\\/(\\d+)/);\n          const channelIdStr = urlMatch?.[1];\n          const channelId = channelIdStr ? parseInt(channelIdStr, 10) : null;\n          const channel = channelId\n            ? mockChannels.find((c) => c.id === channelId)\n            : null;\n\n          if (channel) {\n            const updatedChannel = { ...channel, ...body };\n            await route.fulfill(jsonResponse(updatedChannel));\n          } else {\n            await route.fulfill(\n              jsonResponse({ error: \"Channel not found\" }, 404)\n            );\n          }\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    await use({\n      id: MOCK_GUILD_ID,\n      name: mockGuild.guild_name!,\n      guild: mockGuild,\n      channels: mockChannels,\n    });\n\n    // No cleanup needed - routes are automatically cleared when page closes\n  },\n\n  // Mock bot configuration state\n  mockBotConfigured: async ({ page }, use) => {\n    const configResponse = {\n      configured: true,\n      created_at: new Date().toISOString(),\n    };\n\n    await page.route(\n      \"**/api/manage/admin/discord-bot/config\",\n      async (route) => {\n        const method = route.request().method();\n        if (method === \"GET\" || method === \"POST\") {\n          await route.fulfill(jsonResponse(configResponse));\n        } else if (method === \"DELETE\") {\n          await route.fulfill({ status: 204, body: \"\" });\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    await use(true);\n  },\n});\n\nexport { expect };\n\n/**\n * Navigation helpers for Discord bot pages.\n * These wait for specific UI elements that indicate the page has loaded.\n */\nexport async function gotoDiscordBotPage(adminPage: Page): Promise<void> {\n  await adminPage.goto(\"/admin/discord-bot\");\n  await adminPage.waitForLoadState(\"networkidle\");\n  // Wait for the page title\n  await adminPage.waitForSelector(\"text=Discord Integration\", {\n    timeout: 15000,\n  });\n}\n\nexport async function gotoGuildDetailPage(\n  adminPage: Page,\n  guildId: number\n): Promise<void> {\n  await adminPage.goto(`/admin/discord-bot/${guildId}`);\n  await adminPage.waitForLoadState(\"networkidle\");\n  // Wait for Channel Configuration section (the main content area on guild detail page)\n  await adminPage.waitForSelector(\"text=Channel Configuration\", {\n    timeout: 15000,\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/discord-bot/guilds-list.spec.ts",
    "content": "/**\n * E2E tests for Discord guilds list page.\n *\n * Tests the server configurations table which shows:\n * - List of registered and pending Discord servers\n * - Status badges (Registered/Pending)\n * - Enabled/Disabled status\n * - Add Server and Delete actions\n */\n\nimport {\n  test,\n  expect,\n  gotoDiscordBotPage,\n} from \"@tests/e2e/admin/discord-bot/fixtures\";\n\n// Disable retries for Discord bot tests - attempt once at most\ntest.describe.configure({ retries: 0 });\n\ntest.describe(\"Guilds List Page\", () => {\n  test(\"guilds page shows server configurations\", async ({ adminPage }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Should show Server Configurations section\n    // Use .first() to avoid strict mode violation if it appears in multiple places\n    const serverConfigSection = adminPage\n      .locator(\"text=Server Configurations\")\n      .first();\n    await expect(serverConfigSection).toBeVisible({ timeout: 10000 });\n  });\n\n  test(\"guilds page empty state\", async ({ adminPage }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Should show either:\n    // - \"No Discord servers configured yet\" empty message\n    // - OR a table with servers\n    // - OR Add Server button\n    const emptyState = adminPage.locator(\n      \"text=No Discord servers configured yet\"\n    );\n    const addButton = adminPage.locator('button:has-text(\"Add Server\")');\n    const serverTable = adminPage.locator(\"table\");\n\n    // Check each state separately to avoid strict mode violation\n    // (empty state and add button can both be visible when bot not configured)\n    const hasEmptyState = await emptyState\n      .isVisible({ timeout: 5000 })\n      .catch(() => false);\n    const hasAddButton = await addButton\n      .isVisible({ timeout: 5000 })\n      .catch(() => false);\n    const hasTable = await serverTable\n      .isVisible({ timeout: 5000 })\n      .catch(() => false);\n\n    expect(hasEmptyState || hasAddButton || hasTable).toBe(true);\n  });\n\n  test(\"guilds page shows mock registered guild\", async ({\n    adminPage,\n    mockRegisteredGuild,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Mock guild should appear in the list\n    const guildName = adminPage.locator(`text=${mockRegisteredGuild.name}`);\n    await expect(guildName).toBeVisible({ timeout: 10000 });\n\n    // Find the table row containing the guild to scope badges\n    const tableRow = adminPage.locator(\"tr\").filter({\n      hasText: mockRegisteredGuild.name,\n    });\n\n    // Should show Registered badge in the guild's row\n    const registeredBadge = tableRow.locator(\"text=Registered\");\n    await expect(registeredBadge).toBeVisible();\n\n    // Should show enabled toggle switch in the guild's row (in Enabled column)\n    const enabledSwitch = tableRow.locator('[role=\"switch\"]').first();\n    await expect(enabledSwitch).toBeVisible();\n    await expect(enabledSwitch).toHaveAttribute(\"aria-checked\", \"true\");\n  });\n\n  test(\"guild enabled toggle works in table\", async ({\n    adminPage,\n    mockRegisteredGuild,\n    mockBotConfigured: _mockBotConfigured,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Find the table row containing the guild\n    const tableRow = adminPage.locator(\"tr\").filter({\n      hasText: mockRegisteredGuild.name,\n    });\n    await expect(tableRow).toBeVisible({ timeout: 10000 });\n\n    // Find the enabled toggle switch in that row\n    const enabledSwitch = tableRow.locator('[role=\"switch\"]').first();\n    await expect(enabledSwitch).toBeVisible({ timeout: 10000 });\n    await expect(enabledSwitch).toHaveAttribute(\"aria-checked\", \"true\");\n    await expect(enabledSwitch).toBeEnabled();\n\n    const initialState = await enabledSwitch.getAttribute(\"aria-checked\");\n    const expectedState = initialState === \"true\" ? \"false\" : \"true\";\n    const guildUrl = `/api/manage/admin/discord-bot/guilds/${mockRegisteredGuild.id}`;\n    const guildsListUrl = `/api/manage/admin/discord-bot/guilds`;\n\n    // Set up response waiters before clicking\n    const patchPromise = adminPage.waitForResponse(\n      (response) =>\n        response.url().includes(guildUrl) &&\n        response.request().method() === \"PATCH\"\n    );\n\n    // refreshGuilds() calls the list endpoint, not the individual guild endpoint\n    const getPromise = adminPage.waitForResponse(\n      (response) =>\n        response.url().includes(guildsListUrl) &&\n        response.request().method() === \"GET\"\n    );\n\n    await enabledSwitch.click();\n\n    // Wait for PATCH then GET (refreshGuilds) to complete\n    await patchPromise;\n    await getPromise;\n\n    // Verify the toggle state changed\n    await expect(enabledSwitch).toHaveAttribute(\"aria-checked\", expectedState);\n  });\n\n  test(\"guilds page add server modal and copy key\", async ({ adminPage }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    const addButton = adminPage.locator('button:has-text(\"Add Server\")');\n\n    if (await addButton.isVisible({ timeout: 5000 }).catch(() => false)) {\n      // Button might be disabled if bot not configured\n      if (await addButton.isEnabled()) {\n        await addButton.click();\n\n        // Should show modal with registration key\n        const modal = adminPage.locator('[role=\"dialog\"]');\n        await expect(modal).toBeVisible({ timeout: 10000 });\n\n        // Modal should show \"Registration Key\" title\n        await expect(modal.getByText(\"Registration Key\")).toBeVisible();\n\n        // Should show the !register command (scoped to modal)\n        await expect(modal.getByText(\"!register\")).toBeVisible();\n\n        // Find and click copy button\n        const copyButton = adminPage.locator(\"button\").filter({\n          has: adminPage.locator(\"svg\"),\n        });\n\n        const copyButtons = await copyButton.all();\n        for (const btn of copyButtons) {\n          const ariaLabel = await btn.getAttribute(\"aria-label\");\n          if (ariaLabel?.toLowerCase().includes(\"copy\")) {\n            await btn.click();\n\n            // Toast notification should appear\n            const toast = adminPage.locator(\"text=/copied/i\");\n            await expect(toast).toBeVisible({ timeout: 5000 });\n            break;\n          }\n        }\n      }\n    }\n  });\n\n  test(\"guilds page delete shows confirmation\", async ({\n    adminPage,\n    mockRegisteredGuild,\n    mockBotConfigured: _mockBotConfigured,\n  }) => {\n    await gotoDiscordBotPage(adminPage);\n\n    // Wait for table to load with mock guild\n    await expect(\n      adminPage.locator(`text=${mockRegisteredGuild.name}`)\n    ).toBeVisible({ timeout: 10000 });\n\n    // Wait for table to be fully loaded and stable\n    await adminPage.waitForLoadState(\"networkidle\");\n\n    // Find the table row containing the guild\n    const tableRow = adminPage.locator(\"tr\").filter({\n      hasText: mockRegisteredGuild.name,\n    });\n    await expect(tableRow).toBeVisible({ timeout: 10000 });\n\n    // Find delete button in that row - it's an IconButton (last button in Actions column)\n    // The DeleteButton uses IconButton with tooltip=\"Delete\" and SvgTrash icon\n    const deleteButton = tableRow.locator(\"button\").last();\n\n    if (await deleteButton.isVisible({ timeout: 5000 }).catch(() => false)) {\n      // Ensure the button is visible and scrolled into view\n      await deleteButton.scrollIntoViewIfNeeded();\n      await deleteButton.waitFor({ state: \"visible\" });\n\n      // Wait for any animations/transitions to complete\n      await adminPage.waitForTimeout(300);\n\n      // Use force click to bypass any overlay/interception issues\n      // The SettingsLayouts.Body div may be intercepting pointer events\n      await deleteButton.click({ force: true });\n\n      // Confirmation modal should appear\n      const modal = adminPage.locator('[role=\"dialog\"]');\n      await expect(modal).toBeVisible({ timeout: 10000 });\n\n      // Cancel to avoid actually deleting\n      const cancelButton = adminPage.locator('button:has-text(\"Cancel\")');\n      if (await cancelButton.isVisible({ timeout: 5000 }).catch(() => false)) {\n        await cancelButton.click();\n        await expect(modal).not.toBeVisible({ timeout: 5000 });\n      }\n    }\n  });\n\n  test(\"guilds page navigate to guild detail\", async ({\n    adminPage,\n    mockRegisteredGuild,\n    mockBotConfigured: _mockBotConfigured,\n  }) => {\n    // Wait for bot config API to complete to ensure Card is enabled\n    // The Card is disabled when bot is not configured\n    // Set up the wait BEFORE navigation so we can catch the response\n    const configResponsePromise = adminPage.waitForResponse(\n      (response) =>\n        response.url().includes(\"/api/manage/admin/discord-bot/config\") &&\n        response.request().method() === \"GET\"\n    );\n\n    await gotoDiscordBotPage(adminPage);\n    await configResponsePromise;\n\n    // Wait for table to load with mock guild\n    const guildButton = adminPage.locator(\n      `button:has-text(\"${mockRegisteredGuild.name}\")`\n    );\n    await expect(guildButton).toBeVisible({ timeout: 10000 });\n\n    // Ensure button is enabled (it's disabled if bot not configured or guild not registered)\n    // mockBotConfigured ensures bot is configured, mockRegisteredGuild ensures guild is registered\n    await expect(guildButton).toBeEnabled();\n\n    // Click on the guild name to navigate to detail page\n    await guildButton.click();\n\n    // Should navigate to guild detail page\n    await expect(adminPage).toHaveURL(\n      new RegExp(`/admin/discord-bot/${mockRegisteredGuild.id}`)\n    );\n\n    // Verify detail page loaded correctly\n    // \"Channel Configuration\" is in a LineItemLayout in the body content, not the page title\n    await expect(\n      adminPage.locator(\"text=Channel Configuration\").first()\n    ).toBeVisible();\n  });\n\n  test(\"loading state shows loader\", async ({ adminPage }) => {\n    // Intercept API to delay response\n    await adminPage.route(\n      \"**/api/manage/admin/discord-bot/**\",\n      async (route) => {\n        await new Promise((r) => setTimeout(r, 1000));\n        await route.continue();\n      }\n    );\n\n    await adminPage.goto(\"/admin/discord-bot\");\n\n    // Should show loading indicator (ThreeDotsLoader)\n    // The loader should appear while data is being fetched\n    // ThreeDotsLoader uses react-loader-spinner's ThreeDots with ariaLabel=\"grid-loading\"\n    const loader = adminPage.locator('[aria-label=\"grid-loading\"]');\n    // Give it a moment to appear\n    await expect(loader).toBeVisible({ timeout: 5000 });\n\n    // Wait for page to finish loading\n    await adminPage.waitForLoadState(\"networkidle\");\n\n    // After loading, page title should be visible\n    await expect(\n      adminPage\n        .locator('[aria-label=\"admin-page-title\"]')\n        .getByText(\"Discord Integration\")\n    ).toBeVisible();\n  });\n\n  test(\"error state shows error message\", async ({ adminPage }) => {\n    // Intercept API to return error\n    await adminPage.route(\"**/api/manage/admin/discord-bot/guilds\", (route) => {\n      route.fulfill({\n        status: 500,\n        contentType: \"application/json\",\n        body: JSON.stringify({ detail: \"Internal Server Error\" }),\n      });\n    });\n\n    await adminPage.goto(\"/admin/discord-bot\");\n    await adminPage.waitForLoadState(\"networkidle\");\n\n    // Should show error message from ErrorCallout\n    // ErrorCallout shows both title (\"Failed to load Discord servers\") and detail (\"Internal Server Error\")\n    // Use .first() to get the first matching element (the title)\n    const errorMessage = adminPage.locator(\"text=/failed|error/i\").first();\n    await expect(errorMessage).toBeVisible({ timeout: 10000 });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/ee_feature_redirect.spec.ts",
    "content": "import { test, expect } from \"@tests/e2e/fixtures/eeFeatures\";\n\ntest.describe(\"EE Feature Redirect\", () => {\n  test(\"redirects to /chat with toast when EE features are not licensed\", async ({\n    page,\n    eeEnabled,\n  }) => {\n    test.skip(eeEnabled, \"Redirect only happens without Enterprise license\");\n\n    await page.goto(\"/admin/theme\");\n\n    await expect(page).toHaveURL(/\\/chat/, { timeout: 10_000 });\n\n    const toastContainer = page.getByTestId(\"toast-container\");\n    await expect(toastContainer).toBeVisible({ timeout: 5_000 });\n    await expect(\n      toastContainer.getByText(/only accessible with a paid license/i)\n    ).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/groups/GroupsAdminPage.ts",
    "content": "/**\n * Page Object Model for the Admin Groups page (/admin/groups).\n *\n * Covers the list page, create page, and edit page interactions.\n */\n\nimport { type Page, type Locator, expect } from \"@playwright/test\";\n\n/** URL pattern that matches the groups data fetch. */\nconst GROUPS_API = /\\/api\\/manage\\/admin\\/user-group/;\n\nexport class GroupsAdminPage {\n  readonly page: Page;\n\n  constructor(page: Page) {\n    this.page = page;\n  }\n\n  // ---------------------------------------------------------------------------\n  // Navigation\n  // ---------------------------------------------------------------------------\n\n  async goto() {\n    await this.page.goto(\"/admin/groups\");\n    await expect(this.newGroupButton).toBeVisible({ timeout: 15000 });\n  }\n\n  async gotoCreate() {\n    await this.page.goto(\"/admin/groups/create\");\n    await expect(this.page.getByText(\"Create Group\")).toBeVisible({\n      timeout: 15000,\n    });\n  }\n\n  async gotoEdit(groupId: number) {\n    await this.page.goto(`/admin/groups/${groupId}`);\n    // Wait for the form to be ready — avoids networkidle hanging due to SWR polling.\n    await expect(this.groupNameInput).toBeVisible({ timeout: 15000 });\n  }\n\n  // ---------------------------------------------------------------------------\n  // List page\n  // ---------------------------------------------------------------------------\n\n  /** The Groups page heading container (unique to the list page). */\n  get pageHeading(): Locator {\n    return this.page.getByTestId(\"groups-page-heading\");\n  }\n\n  /** The search input on the list page. */\n  get listSearchInput(): Locator {\n    return this.page.getByPlaceholder(\"Search groups...\");\n  }\n\n  /** The \"New Group\" button on the list page header. */\n  get newGroupButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"New Group\" });\n  }\n\n  /** Returns all group cards on the list page. */\n  get groupCards(): Locator {\n    return this.page.locator(\"[data-card]\");\n  }\n\n  /**\n   * Returns a group card by name.\n   * Cards use ContentAction which renders the title as text — match by content.\n   */\n  getGroupCard(name: string): Locator {\n    return this.page.locator(\"[data-card]\").filter({ hasText: name });\n  }\n\n  /** Click into a group's edit page from the list. */\n  async openGroup(name: string) {\n    const card = this.getGroupCard(name);\n    await card.getByRole(\"button\", { name: \"View group\" }).click();\n    await expect(this.groupNameInput).toBeVisible({ timeout: 15000 });\n  }\n\n  /** Search groups on the list page. */\n  async searchGroups(term: string) {\n    await this.listSearchInput.fill(term);\n  }\n\n  /** Click \"New Group\" to navigate to the create page. */\n  async clickNewGroup() {\n    await this.newGroupButton.click();\n    await expect(this.page.getByText(\"Create Group\")).toBeVisible({\n      timeout: 15000,\n    });\n  }\n\n  // ---------------------------------------------------------------------------\n  // Create page\n  // ---------------------------------------------------------------------------\n\n  /** The group name input on create/edit pages. */\n  get groupNameInput(): Locator {\n    return this.page.getByPlaceholder(\"Name your group\");\n  }\n\n  /** The member search input on create/edit pages. */\n  get memberSearchInput(): Locator {\n    return this.page.getByPlaceholder(\"Search users and accounts...\");\n  }\n\n  /** The \"Create\" button on the create page. */\n  get createButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Create\", exact: true });\n  }\n\n  /** The \"Cancel\" button on create/edit pages. */\n  get cancelButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Cancel\" });\n  }\n\n  /** Fill in the group name on create/edit pages. */\n  async setGroupName(name: string) {\n    await this.groupNameInput.fill(name);\n  }\n\n  /** Search for members in the members table. */\n  async searchMembers(term: string) {\n    await this.memberSearchInput.fill(term);\n  }\n\n  /** Select a member row by checking their checkbox (create page / add mode). */\n  async selectMember(emailOrName: string) {\n    const row = this.page.getByRole(\"row\").filter({ hasText: emailOrName });\n    const checkbox = row.getByRole(\"checkbox\");\n    await checkbox.click();\n  }\n\n  /** Submit the create form. */\n  async submitCreate() {\n    await this.createButton.click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Edit page\n  // ---------------------------------------------------------------------------\n\n  /** The \"Save Changes\" button on the edit page. */\n  get saveButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Save Changes\" });\n  }\n\n  /** The \"Add\" button to enter add-members mode. */\n  get addMembersButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Add\", exact: true });\n  }\n\n  /** The \"Done\" button to exit add-members mode. */\n  get doneAddingButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Done\" });\n  }\n\n  /** The \"Delete Group\" button in the danger zone card. */\n  get deleteGroupButton(): Locator {\n    return this.page.getByRole(\"button\", { name: \"Delete Group\" });\n  }\n\n  /** Enter add-members mode on the edit page. */\n  async startAddingMembers() {\n    await this.addMembersButton.click();\n    await expect(this.doneAddingButton).toBeVisible();\n  }\n\n  /** Exit add-members mode. */\n  async finishAddingMembers() {\n    await this.doneAddingButton.click();\n    await expect(this.addMembersButton).toBeVisible();\n  }\n\n  /**\n   * Remove a member from the member view via the minus button.\n   * Only works in member view (not add mode).\n   */\n  async removeMember(emailOrName: string) {\n    const row = this.page.getByRole(\"row\").filter({ hasText: emailOrName });\n    // The remove button is an IconButton with SvgMinusCircle in the actions column\n    await row.getByRole(\"button\").last().click();\n  }\n\n  /** Save the edit form. */\n  async submitEdit() {\n    await this.saveButton.click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Delete flow\n  // ---------------------------------------------------------------------------\n\n  /** Click \"Delete Group\" to open the confirmation modal. */\n  async clickDeleteGroup() {\n    await this.deleteGroupButton.click();\n  }\n\n  /** The delete confirmation modal. */\n  get deleteModal(): Locator {\n    return this.page.getByRole(\"dialog\");\n  }\n\n  /** Confirm deletion in the modal. */\n  async confirmDelete() {\n    await this.deleteModal.getByRole(\"button\", { name: \"Delete\" }).click();\n  }\n\n  /** Cancel deletion in the modal. */\n  async cancelDelete() {\n    // The modal close button (X icon) or clicking outside\n    await this.deleteModal\n      .getByRole(\"button\")\n      .filter({ hasText: /close|cancel/i })\n      .first()\n      .click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Assertions\n  // ---------------------------------------------------------------------------\n\n  async expectToast(message: string | RegExp) {\n    await expect(this.page.getByText(message)).toBeVisible({ timeout: 10000 });\n  }\n\n  /** Assert a group card exists on the list page. */\n  async expectGroupVisible(name: string) {\n    await expect(this.getGroupCard(name)).toBeVisible({ timeout: 10000 });\n  }\n\n  /** Assert a group card does NOT exist on the list page. */\n  async expectGroupNotVisible(name: string) {\n    await expect(this.getGroupCard(name)).not.toBeVisible({ timeout: 10000 });\n  }\n\n  /** Assert we navigated back to the groups list. */\n  async expectOnListPage() {\n    await expect(this.page).toHaveURL(/\\/admin\\/groups\\/?$/);\n    await expect(this.newGroupButton).toBeVisible();\n  }\n\n  /** Assert we are on the edit page for a specific group. */\n  async expectOnEditPage(groupId: number) {\n    await expect(this.page).toHaveURL(`/admin/groups/${groupId}`);\n  }\n\n  /** Wait for the groups API response after a mutation. */\n  async waitForGroupsRefresh() {\n    await this.page.waitForResponse(GROUPS_API);\n  }\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/groups/fixtures.ts",
    "content": "/**\n * Playwright fixtures for Admin Groups page tests.\n *\n * Provides:\n * - Authenticated admin page\n * - OnyxApiClient for API-level setup/teardown\n * - GroupsAdminPage page object\n */\n\nimport { test as base, expect, type Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { GroupsAdminPage } from \"./GroupsAdminPage\";\n\nexport const test = base.extend<{\n  adminPage: Page;\n  api: OnyxApiClient;\n  groupsPage: GroupsAdminPage;\n}>({\n  adminPage: async ({ page }, use) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await use(page);\n  },\n\n  api: async ({ adminPage }, use) => {\n    const client = new OnyxApiClient(adminPage.request);\n    await use(client);\n  },\n\n  groupsPage: async ({ adminPage }, use) => {\n    const groupsPage = new GroupsAdminPage(adminPage);\n    await use(groupsPage);\n  },\n});\n\nexport { expect };\n"
  },
  {
    "path": "web/tests/e2e/admin/groups/groups.spec.ts",
    "content": "/**\n * E2E Tests: Admin Groups Page\n *\n * Tests the full groups management page — list, create, edit, delete.\n *\n * Uses the GroupsAdminPage POM for all interactions. Groups are created via\n * OnyxApiClient for setup and cleaned up in afterAll/afterEach.\n */\n\nimport { test, expect } from \"./fixtures\";\nimport type { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport type { Browser } from \"@playwright/test\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nfunction uniqueGroupName(prefix: string): string {\n  return `e2e-${prefix}-${Date.now()}`;\n}\n\n/** Best-effort cleanup — logs failures instead of silently swallowing them. */\nasync function softCleanup(fn: () => Promise<unknown>): Promise<void> {\n  await fn().catch((e) => console.warn(\"cleanup:\", e));\n}\n\n/**\n * Creates an authenticated API context for beforeAll/afterAll hooks.\n */\nasync function withApiContext(\n  browser: Browser,\n  fn: (api: OnyxApiClient) => Promise<void>\n): Promise<void> {\n  const context = await browser.newContext({\n    storageState: \"admin_auth.json\",\n  });\n  try {\n    const { OnyxApiClient } = await import(\"@tests/e2e/utils/onyxApiClient\");\n    const api = new OnyxApiClient(context.request);\n    await fn(api);\n  } finally {\n    await context.close();\n  }\n}\n\n// ---------------------------------------------------------------------------\n// List page\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Groups page — layout\", () => {\n  let adminGroupId: number;\n  let basicGroupId: number;\n  let layoutGroupId: number;\n  const layoutGroupName = uniqueGroupName(\"layout\");\n\n  test.beforeAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      const groups = await api.getUserGroups();\n      const adminGroup = groups.find((g) => g.name === \"Admin\" && g.is_default);\n      const basicGroup = groups.find((g) => g.name === \"Basic\" && g.is_default);\n      if (!adminGroup || !basicGroup) {\n        throw new Error(\"Default Admin/Basic groups not found\");\n      }\n      adminGroupId = adminGroup.id;\n      basicGroupId = basicGroup.id;\n\n      // Create a custom group so the list is non-empty (default groups are\n      // excluded from the API response by default).\n      layoutGroupId = await api.createUserGroup(layoutGroupName);\n      await api.waitForGroupSync(layoutGroupId);\n    });\n  });\n\n  test.afterAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      await softCleanup(() => api.deleteUserGroup(layoutGroupId));\n    });\n  });\n\n  test(\"renders page title, search, and new group button\", async ({\n    groupsPage,\n  }) => {\n    await groupsPage.goto();\n\n    await expect(groupsPage.pageHeading).toBeVisible();\n    await expect(groupsPage.listSearchInput).toBeVisible();\n    await expect(groupsPage.newGroupButton).toBeVisible();\n  });\n\n  test.skip(\"shows built-in groups (Admin, Basic)\", async ({ groupsPage }) => {\n    // TODO: Enable once default groups are shown via include_default=true\n    await groupsPage.goto();\n\n    await groupsPage.expectGroupVisible(\"Admin\");\n    await groupsPage.expectGroupVisible(\"Basic\");\n  });\n\n  test(\"search filters groups by name\", async ({ groupsPage, api }) => {\n    const name = uniqueGroupName(\"search\");\n    const groupId = await api.createUserGroup(name);\n    await api.waitForGroupSync(groupId);\n\n    try {\n      await groupsPage.goto();\n      await groupsPage.expectGroupVisible(name);\n\n      await groupsPage.searchGroups(\"zzz-nonexistent-zzz\");\n      await groupsPage.expectGroupNotVisible(name);\n\n      await groupsPage.searchGroups(name);\n      await groupsPage.expectGroupVisible(name);\n    } finally {\n      await softCleanup(() => api.deleteUserGroup(groupId));\n    }\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Create flow\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Groups page — create\", () => {\n  test(\"navigates to create page via New Group button\", async ({\n    groupsPage,\n  }) => {\n    await groupsPage.goto();\n    await groupsPage.clickNewGroup();\n\n    await expect(groupsPage.page).toHaveURL(/\\/admin\\/groups\\/create/);\n    await expect(groupsPage.groupNameInput).toBeVisible();\n  });\n\n  test(\"creates a group and redirects to list\", async ({ groupsPage, api }) => {\n    const name = uniqueGroupName(\"create\");\n    let groupId: number | undefined;\n\n    try {\n      await groupsPage.gotoCreate();\n      await groupsPage.setGroupName(name);\n      await groupsPage.submitCreate();\n\n      await groupsPage.expectToast(`Group \"${name}\" created`);\n      await groupsPage.expectOnListPage();\n\n      // Find the group ID for cleanup via the authenticated page context\n      const res = await groupsPage.page.request.get(\n        \"/api/manage/admin/user-group\"\n      );\n      const groups = await res.json();\n      const group = groups.find(\n        (g: { name: string; id: number }) => g.name === name\n      );\n      groupId = group?.id;\n    } finally {\n      if (groupId !== undefined) {\n        await softCleanup(() => api.deleteUserGroup(groupId!));\n      }\n    }\n  });\n\n  test(\"cancel returns to list without creating\", async ({ groupsPage }) => {\n    await groupsPage.gotoCreate();\n    await groupsPage.setGroupName(\"should-not-be-created\");\n    await groupsPage.cancelButton.click();\n\n    await groupsPage.expectOnListPage();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Edit flow\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Groups page — edit @exclusive\", () => {\n  let groupId: number;\n  const groupName = uniqueGroupName(\"edit\");\n\n  test.beforeAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      groupId = await api.createUserGroup(groupName);\n      await api.waitForGroupSync(groupId);\n    });\n  });\n\n  test.afterAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      await softCleanup(() => api.deleteUserGroup(groupId));\n    });\n  });\n\n  test(\"navigates to edit page from list\", async ({ groupsPage }) => {\n    await groupsPage.goto();\n    await groupsPage.openGroup(groupName);\n\n    await groupsPage.expectOnEditPage(groupId);\n    await expect(groupsPage.saveButton).toBeVisible();\n  });\n\n  test(\"edit page shows group name and save/cancel buttons\", async ({\n    groupsPage,\n  }) => {\n    await groupsPage.gotoEdit(groupId);\n\n    await expect(groupsPage.groupNameInput).toHaveValue(groupName);\n    await expect(groupsPage.saveButton).toBeVisible();\n    await expect(groupsPage.cancelButton).toBeVisible();\n  });\n\n  test(\"can toggle add-members mode\", async ({ groupsPage }) => {\n    await groupsPage.gotoEdit(groupId);\n\n    await expect(groupsPage.addMembersButton).toBeVisible();\n    await groupsPage.startAddingMembers();\n    await expect(groupsPage.doneAddingButton).toBeVisible();\n    await groupsPage.finishAddingMembers();\n    await expect(groupsPage.addMembersButton).toBeVisible();\n  });\n\n  test(\"cancel returns to list without saving\", async ({ groupsPage }) => {\n    await groupsPage.gotoEdit(groupId);\n    await groupsPage.cancelButton.click();\n\n    await groupsPage.expectOnListPage();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Delete flow\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Groups page — delete\", () => {\n  test(\"delete group via edit page\", async ({ groupsPage, api }) => {\n    const name = uniqueGroupName(\"delete\");\n    const groupId = await api.createUserGroup(name);\n    await api.waitForGroupSync(groupId);\n\n    await groupsPage.gotoEdit(groupId);\n    await groupsPage.clickDeleteGroup();\n\n    // Modal should show the group name\n    await expect(groupsPage.deleteModal).toBeVisible();\n    await expect(groupsPage.deleteModal.getByText(name)).toBeVisible();\n\n    await groupsPage.confirmDelete();\n    await groupsPage.expectToast(`Group \"${name}\" deleted`);\n    await groupsPage.expectOnListPage();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Sync status (No Vector DB)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Groups page — sync @lite\", () => {\n  test.beforeAll(async ({ browser }) => {\n    const context = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    try {\n      const { OnyxApiClient } = await import(\"@tests/e2e/utils/onyxApiClient\");\n      const client = new OnyxApiClient(context.request);\n      const vectorDbEnabled = await client.isVectorDbEnabled();\n      test.skip(\n        vectorDbEnabled,\n        \"Skipped: vector DB is enabled in this deployment\"\n      );\n    } finally {\n      await context.close();\n    }\n  });\n\n  test(\"newly created group syncs immediately\", async ({ groupsPage, api }) => {\n    const name = uniqueGroupName(\"sync\");\n    let groupId: number | undefined;\n\n    try {\n      // Create via API and verify sync completes\n      groupId = await api.createUserGroup(name);\n      await api.waitForGroupSync(groupId);\n\n      // Navigate to edit page and verify it loads without error\n      await groupsPage.gotoEdit(groupId);\n      await expect(groupsPage.groupNameInput).toHaveValue(name);\n    } finally {\n      if (groupId !== undefined) {\n        await softCleanup(() => api.deleteUserGroup(groupId!));\n      }\n    }\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/image-generation/disconnect-provider.spec.ts",
    "content": "import { test, expect, Page, Locator } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\nconst IMAGE_GENERATION_URL = \"/admin/configuration/image-generation\";\n\nconst FAKE_CONNECTED_CONFIG = {\n  image_provider_id: \"openai_dalle_3\",\n  model_configuration_id: 100,\n  model_name: \"dall-e-3\",\n  llm_provider_id: 100,\n  llm_provider_name: \"openai-dalle3\",\n  is_default: false,\n};\n\nconst FAKE_DEFAULT_CONFIG = {\n  image_provider_id: \"openai_gpt_image_1\",\n  model_configuration_id: 101,\n  model_name: \"gpt-image-1\",\n  llm_provider_id: 101,\n  llm_provider_name: \"openai-gpt-image-1\",\n  is_default: true,\n};\n\nfunction getProviderCard(page: Page, providerId: string): Locator {\n  return page.getByLabel(`image-gen-provider-${providerId}`, { exact: true });\n}\n\nfunction mainContainer(page: Page): Locator {\n  return page.locator(\"[data-main-container]\");\n}\n\n/**\n * Sets up route mocks so the page sees configured providers\n * without needing real API keys.\n */\nasync function mockImageGenApis(\n  page: Page,\n  configs: (typeof FAKE_CONNECTED_CONFIG)[]\n) {\n  await page.route(\"**/api/admin/image-generation/config\", async (route) => {\n    if (route.request().method() === \"GET\") {\n      await route.fulfill({ status: 200, json: configs });\n    } else {\n      await route.continue();\n    }\n  });\n\n  await page.route(\n    \"**/api/admin/llm/provider?include_image_gen=true\",\n    async (route) => {\n      await route.fulfill({ status: 200, json: { providers: [] } });\n    }\n  );\n}\n\ntest.describe(\"Image Generation Provider Disconnect\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n  });\n\n  test(\"should disconnect a connected (non-default) provider\", async ({\n    page,\n  }) => {\n    const configs = [{ ...FAKE_CONNECTED_CONFIG }, { ...FAKE_DEFAULT_CONFIG }];\n    await mockImageGenApis(page, configs);\n\n    await page.goto(IMAGE_GENERATION_URL);\n    await page.waitForSelector(\"text=Image Generation Model\", {\n      timeout: 20000,\n    });\n\n    const card = getProviderCard(page, \"openai_dalle_3\");\n    await card.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"image-gen-disconnect-non-default-before\",\n    });\n\n    // Hover to reveal disconnect button, then verify\n    await card.hover();\n    const disconnectButton = card.getByRole(\"button\", {\n      name: \"Disconnect DALL-E 3\",\n    });\n    await expect(disconnectButton).toBeVisible();\n    await expect(disconnectButton).toBeEnabled();\n\n    // Mock the DELETE to succeed and update the config list\n    await page.route(\n      \"**/api/admin/image-generation/config/openai_dalle_3\",\n      async (route) => {\n        if (route.request().method() === \"DELETE\") {\n          // Update the GET mock to return only the default config\n          await page.unroute(\"**/api/admin/image-generation/config\");\n          await page.route(\n            \"**/api/admin/image-generation/config\",\n            async (route) => {\n              if (route.request().method() === \"GET\") {\n                await route.fulfill({\n                  status: 200,\n                  json: [{ ...FAKE_DEFAULT_CONFIG }],\n                });\n              } else {\n                await route.continue();\n              }\n            }\n          );\n          await route.fulfill({ status: 200, json: {} });\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    // Click disconnect\n    await disconnectButton.click();\n\n    // Verify confirmation modal appears\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n    await expect(confirmDialog).toContainText(\"Disconnect DALL-E 3\");\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"image-gen-disconnect-non-default-modal\",\n    });\n\n    // Click Disconnect in the confirmation modal\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await confirmButton.click();\n\n    // Verify the card reverts to disconnected state (shows \"Connect\" button)\n    await expect(card.getByRole(\"button\", { name: \"Connect\" })).toBeVisible({\n      timeout: 10000,\n    });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"image-gen-disconnect-non-default-after\",\n    });\n  });\n\n  test(\"should show replacement dropdown when disconnecting default provider with alternatives\", async ({\n    page,\n  }) => {\n    const configs = [{ ...FAKE_CONNECTED_CONFIG }, { ...FAKE_DEFAULT_CONFIG }];\n    await mockImageGenApis(page, configs);\n\n    await page.goto(IMAGE_GENERATION_URL);\n    await page.waitForSelector(\"text=Image Generation Model\", {\n      timeout: 20000,\n    });\n\n    const defaultCard = getProviderCard(page, \"openai_gpt_image_1\");\n    await defaultCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    // Hover to reveal disconnect button\n    await defaultCard.hover();\n    const disconnectButton = defaultCard.getByRole(\"button\", {\n      name: \"Disconnect GPT Image 1\",\n    });\n    await expect(disconnectButton).toBeVisible();\n    await expect(disconnectButton).toBeEnabled();\n\n    await disconnectButton.click();\n\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n\n    // Should show replacement dropdown since there's an alternative\n    await expect(\n      confirmDialog.getByText(\"Session history will be preserved\")\n    ).toBeVisible();\n\n    // Disconnect button should be enabled because first replacement is auto-selected\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await expect(confirmButton).toBeEnabled();\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"image-gen-disconnect-default-with-alt-modal\",\n    });\n  });\n\n  test(\"should show connect message when disconnecting default provider with no alternatives\", async ({\n    page,\n  }) => {\n    // Only the default config — no other providers configured\n    await mockImageGenApis(page, [{ ...FAKE_DEFAULT_CONFIG }]);\n\n    await page.goto(IMAGE_GENERATION_URL);\n    await page.waitForSelector(\"text=Image Generation Model\", {\n      timeout: 20000,\n    });\n\n    const defaultCard = getProviderCard(page, \"openai_gpt_image_1\");\n    await defaultCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await defaultCard.hover();\n    const disconnectButton = defaultCard.getByRole(\"button\", {\n      name: \"Disconnect GPT Image 1\",\n    });\n    await disconnectButton.click();\n\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n\n    // Should show message about connecting another provider\n    await expect(\n      confirmDialog.getByText(\"Connect another provider\")\n    ).toBeVisible();\n\n    // Disconnect button should be enabled\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await expect(confirmButton).toBeEnabled();\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"image-gen-disconnect-no-alt-modal\",\n    });\n  });\n\n  test(\"should not show disconnect button for unconfigured providers\", async ({\n    page,\n  }) => {\n    await mockImageGenApis(page, [{ ...FAKE_DEFAULT_CONFIG }]);\n\n    await page.goto(IMAGE_GENERATION_URL);\n    await page.waitForSelector(\"text=Image Generation Model\", {\n      timeout: 20000,\n    });\n\n    // DALL-E 3 is not configured — should not have a disconnect button\n    const card = getProviderCard(page, \"openai_dalle_3\");\n    await card.waitFor({ state: \"visible\", timeout: 10000 });\n\n    const disconnectButton = card.getByRole(\"button\", {\n      name: \"Disconnect DALL-E 3\",\n    });\n    await expect(disconnectButton).not.toBeVisible();\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"image-gen-disconnect-unconfigured\",\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/image-generation/image-generation-content.spec.ts",
    "content": "import { test, expect, Page, Locator } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nconst IMAGE_GENERATION_URL =\n  \"http://localhost:3000/admin/configuration/image-generation\";\n\n// Provider IDs matching constants.ts\nconst PROVIDERS = [\n  { id: \"openai_gpt_image_1_5\", title: \"GPT Image 1.5\" },\n  { id: \"openai_gpt_image_1\", title: \"GPT Image 1\" },\n  { id: \"openai_dalle_3\", title: \"DALL-E 3\" },\n  { id: \"azure_dalle_3\", title: \"Azure OpenAI DALL-E 3\" },\n];\n\n// Helper to find a provider card by its aria-label\nfunction getProviderCard(page: Page, providerId: string): Locator {\n  return page.getByLabel(`image-gen-provider-${providerId}`, { exact: true });\n}\n\n// Helper to open the provider connection modal\nasync function openProviderModal(\n  page: Page,\n  providerId: string\n): Promise<void> {\n  const card = getProviderCard(page, providerId);\n  await card.waitFor({ state: \"visible\", timeout: 10000 });\n\n  // Click the Connect button within the card\n  const connectButton = card.getByRole(\"button\", { name: \"Connect\" });\n  await connectButton.waitFor({ state: \"visible\", timeout: 5000 });\n  await connectButton.click();\n}\n\ntest.describe(\"Image Generation Provider Configuration\", () => {\n  test.beforeEach(async ({ page }) => {\n    // Log in as admin before each test\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    // Navigate to image generation config page\n    await page.goto(IMAGE_GENERATION_URL);\n    await page.waitForLoadState(\"networkidle\");\n\n    // Wait for page to fully load - look for the section heading\n    await page.waitForSelector(\"text=Image Generation Model\", {\n      timeout: 20000,\n    });\n\n    console.log(\"[image-gen-test] Page loaded successfully\");\n  });\n\n  test(\"should open connection modal for all image generation providers\", async ({\n    page,\n  }) => {\n    for (const provider of PROVIDERS) {\n      console.log(\n        `[image-gen-test] Testing modal open for provider: ${provider.title}`\n      );\n\n      // Click Connect on provider card using aria-label\n      await openProviderModal(page, provider.id);\n\n      // Verify modal opens with correct title\n      // Modal title is \"Connect {providerTitle}\" for new connections\n      const modalDialog = page.getByRole(\"dialog\", {\n        name: new RegExp(`connect ${provider.title}`, \"i\"),\n      });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      console.log(`[image-gen-test] Modal opened for ${provider.title}`);\n\n      // Close modal by pressing Escape\n      await page.keyboard.press(\"Escape\");\n      await expect(modalDialog).not.toBeVisible({ timeout: 5000 });\n\n      console.log(`[image-gen-test] Modal closed for ${provider.title}`);\n    }\n\n    console.log(\n      \"[image-gen-test] All provider modals opened and closed successfully\"\n    );\n  });\n\n  test.describe(\"OpenAI DALL-E 3 Configuration\", () => {\n    const OPENAI_API_KEY = process.env.OPENAI_API_KEY;\n\n    test.skip(!OPENAI_API_KEY, \"OPENAI_API_KEY environment variable not set\");\n\n    test.afterEach(async ({ page }) => {\n      // Clean up the image generation config created during the test\n      const apiClient = new OnyxApiClient(page.request);\n      try {\n        await apiClient.deleteImageGenerationConfig(\"openai_dalle_3\");\n        console.log(\"[image-gen-test] Cleaned up DALL-E 3 config\");\n      } catch (error) {\n        console.warn(\n          `[image-gen-test] Failed to clean up DALL-E 3 config: ${error}`\n        );\n      }\n    });\n\n    test.skip(\"should configure DALL-E 3 with API key\", async ({ page }) => {\n      // Click Connect on DALL-E 3 card using aria-label\n      await openProviderModal(page, \"openai_dalle_3\");\n\n      // Wait for modal to open\n      const modalDialog = page.getByRole(\"dialog\", {\n        name: /connect dall-e 3/i,\n      });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      // Enter API key - use getByRole(\"combobox\") to target only the input, not the listbox\n      const apiKeyInput = modalDialog.getByRole(\"combobox\");\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await apiKeyInput.clear();\n      await apiKeyInput.fill(OPENAI_API_KEY!);\n\n      // Close the dropdown by pressing Escape - it intercepts clicks on the Connect button\n      await page.keyboard.press(\"Escape\");\n\n      // Click Connect button in modal - scope to the dialog to avoid matching other buttons\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[image-gen-test] Clicked Connect, waiting for validation...\"\n      );\n\n      // Wait for modal to close (indicates success)\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n\n      console.log(\n        \"[image-gen-test] Modal closed, verifying provider is configured...\"\n      );\n\n      // Wait for page to update\n      await page.waitForLoadState(\"networkidle\");\n\n      // Verify DALL-E 3 is now configured - should show \"Current Default\"\n      const dalleCard = getProviderCard(page, \"openai_dalle_3\");\n      await expect(\n        dalleCard.getByRole(\"button\", { name: \"Current Default\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\"[image-gen-test] DALL-E 3 configured successfully\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/llm_provider_setup.spec.ts",
    "content": "import { expect, test } from \"@playwright/test\";\nimport type { Locator, Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nconst LLM_SETUP_URL = \"/admin/configuration/llm\";\nconst BASE_URL = process.env.BASE_URL || \"http://localhost:3000\";\nconst PROVIDER_API_KEY =\n  process.env.E2E_LLM_PROVIDER_API_KEY ||\n  process.env.OPENAI_API_KEY ||\n  \"e2e-placeholder-api-key-not-used\";\n\ntype AdminLLMProvider = {\n  id: number;\n  name: string;\n  is_auto_mode: boolean;\n};\n\ntype DefaultModelInfo = {\n  provider_id: number;\n  model_name: string;\n} | null;\n\ntype ProviderModelConfig = {\n  name: string;\n  is_visible: boolean;\n};\n\nfunction uniqueName(prefix: string): string {\n  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;\n}\n\nfunction normalizeAlphaNum(input: string): string {\n  return input.toLowerCase().replace(/[^a-z0-9]/g, \"\");\n}\n\nfunction modelTokenVariants(modelName: string): string[][] {\n  return modelName\n    .toLowerCase()\n    .split(/[^a-z0-9]+/)\n    .filter((token) => token.length > 0)\n    .map((token) => {\n      // Display names may shorten long numeric segments to suffixes.\n      if (/^\\d+$/.test(token) && token.length > 5) {\n        return [token, token.slice(-5)];\n      }\n      return [token];\n    });\n}\n\nfunction textMatchesModel(modelName: string, candidateText: string): boolean {\n  const normalizedCandidate = normalizeAlphaNum(candidateText);\n  if (!normalizedCandidate) {\n    return false;\n  }\n\n  const tokenVariants = modelTokenVariants(modelName);\n  return tokenVariants.every((variants) =>\n    variants.some((variant) =>\n      normalizedCandidate.includes(normalizeAlphaNum(variant))\n    )\n  );\n}\n\nasync function getAdminLLMProviderResponse(page: Page) {\n  const response = await page.request.get(`${BASE_URL}/api/admin/llm/provider`);\n  expect(response.ok()).toBeTruthy();\n  return (await response.json()) as {\n    providers: AdminLLMProvider[];\n    default_text: DefaultModelInfo;\n    default_vision: DefaultModelInfo;\n  };\n}\n\nasync function listAdminLLMProviders(page: Page): Promise<AdminLLMProvider[]> {\n  const data = await getAdminLLMProviderResponse(page);\n  return data.providers;\n}\n\nasync function getDefaultTextModel(page: Page): Promise<DefaultModelInfo> {\n  const data = await getAdminLLMProviderResponse(page);\n  return data.default_text ?? null;\n}\n\nasync function createPublicProvider(\n  page: Page,\n  providerName: string,\n  modelName: string = \"gpt-4o\"\n): Promise<number> {\n  return createPublicProviderWithModels(page, providerName, [\n    { name: modelName, is_visible: true },\n  ]);\n}\n\nasync function createPublicProviderWithModels(\n  page: Page,\n  providerName: string,\n  modelConfigurations: ProviderModelConfig[]\n): Promise<number> {\n  expect(modelConfigurations.length).toBeGreaterThan(0);\n\n  const response = await page.request.put(\n    `${BASE_URL}/api/admin/llm/provider?is_creation=true`,\n    {\n      data: {\n        name: providerName,\n        provider: \"openai\",\n        api_key: PROVIDER_API_KEY,\n        is_public: true,\n        groups: [],\n        personas: [],\n        model_configurations: modelConfigurations,\n      },\n    }\n  );\n  expect(response.ok()).toBeTruthy();\n  const data = (await response.json()) as { id: number };\n  return data.id;\n}\n\nasync function navigateToAdminLlmPageFromChat(page: Page): Promise<void> {\n  await page.goto(LLM_SETUP_URL);\n  await page.waitForURL(\"**/admin/configuration/llm**\");\n  await expect(page.getByLabel(\"admin-page-title\")).toHaveText(\n    /^Language Models/\n  );\n}\n\nasync function exitAdminToChat(page: Page): Promise<void> {\n  await page.goto(\"/app\");\n  await page.waitForURL(\"**/app**\");\n  await page\n    .locator(\"#onyx-chat-input-textarea\")\n    .waitFor({ state: \"visible\", timeout: 15000 });\n}\n\nasync function isModelVisibleInChatProviders(\n  page: Page,\n  modelName: string\n): Promise<boolean> {\n  const response = await page.request.get(`${BASE_URL}/api/llm/provider`);\n  expect(response.ok()).toBeTruthy();\n\n  const data = (await response.json()) as {\n    providers: {\n      model_configurations: { name: string; is_visible: boolean }[];\n    }[];\n  };\n\n  return data.providers.some((provider) =>\n    provider.model_configurations.some(\n      (model) => model.name === modelName && model.is_visible\n    )\n  );\n}\n\nasync function expectModelVisibilityInChatProviders(\n  page: Page,\n  modelName: string,\n  expectedVisible: boolean\n): Promise<void> {\n  await expect\n    .poll(() => isModelVisibleInChatProviders(page, modelName), {\n      timeout: 30000,\n    })\n    .toBe(expectedVisible);\n}\n\nasync function getModelCountInChatSelector(\n  page: Page,\n  modelName: string\n): Promise<number> {\n  const dialog = page.locator('[role=\"dialog\"]').first();\n\n  // When used in expect.poll retries, a previous attempt may leave the\n  // popover open. Ensure a clean state before toggling it.\n  if (await dialog.isVisible()) {\n    await page.keyboard.press(\"Escape\");\n    await dialog.waitFor({ state: \"hidden\", timeout: 5000 });\n  }\n\n  await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n  await dialog.waitFor({ state: \"visible\", timeout: 10000 });\n\n  await dialog.getByPlaceholder(\"Search models...\").fill(modelName);\n  const optionButtons = dialog.getByRole(\"button\");\n  const optionTexts = await optionButtons.allTextContents();\n  const uniqueOptionTexts = Array.from(\n    new Set(optionTexts.map((text) => text.trim()))\n  );\n  const count = uniqueOptionTexts.filter((text) =>\n    textMatchesModel(modelName, text)\n  ).length;\n\n  await page.keyboard.press(\"Escape\");\n  await dialog.waitFor({ state: \"hidden\", timeout: 10000 });\n\n  return count;\n}\n\nasync function getProviderByName(\n  page: Page,\n  providerName: string\n): Promise<AdminLLMProvider | null> {\n  const providers = await listAdminLLMProviders(page);\n  return providers.find((provider) => provider.name === providerName) ?? null;\n}\n\nasync function findProviderCard(\n  page: Page,\n  providerName: string\n): Promise<Locator> {\n  return page\n    .locator(\"div.rounded-16\")\n    .filter({ hasText: providerName })\n    .first();\n}\n\nasync function openOpenAiSetupModal(page: Page): Promise<Locator> {\n  const openAiCard = page\n    .locator(\"div.rounded-16\")\n    .filter({ hasText: \"OpenAI\" })\n    .filter({ has: page.getByRole(\"button\", { name: \"Connect\" }) })\n    .first();\n\n  await expect(openAiCard).toBeVisible({ timeout: 10000 });\n  await openAiCard.getByRole(\"button\", { name: \"Connect\" }).click();\n\n  const modal = page.getByRole(\"dialog\", { name: /set up gpt/i });\n  await expect(modal).toBeVisible({ timeout: 10000 });\n  return modal;\n}\n\nasync function openProviderEditModal(\n  page: Page,\n  providerName: string\n): Promise<Locator> {\n  const providerCard = await findProviderCard(page, providerName);\n  await expect(providerCard).toBeVisible({ timeout: 10000 });\n  await providerCard.getByRole(\"button\", { name: /^Edit/ }).click();\n\n  const modal = page.getByRole(\"dialog\", { name: /configure/i });\n  await expect(modal).toBeVisible({ timeout: 10000 });\n  return modal;\n}\n\ntest.describe(\"LLM Provider Setup @exclusive\", () => {\n  let providersToCleanup: number[] = [];\n\n  test.beforeEach(async ({ page }) => {\n    providersToCleanup = [];\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await page.goto(LLM_SETUP_URL);\n    await page.waitForLoadState(\"networkidle\");\n    await expect(page.getByLabel(\"admin-page-title\")).toHaveText(\n      /^Language Models/\n    );\n  });\n\n  test.afterEach(async ({ page }) => {\n    const apiClient = new OnyxApiClient(page.request);\n    const uniqueIds = Array.from(new Set(providersToCleanup));\n\n    for (const providerId of uniqueIds) {\n      try {\n        await apiClient.deleteProvider(providerId);\n      } catch (error) {\n        console.warn(\n          `Cleanup failed for provider ${providerId}: ${String(error)}`\n        );\n      }\n    }\n  });\n\n  test(\"admin can create, edit, and delete a provider from the LLM setup page\", async ({\n    page,\n  }) => {\n    // Keep this flow deterministic without external LLM connectivity.\n    await page.route(\"**/api/admin/llm/test\", async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/json\",\n        body: JSON.stringify({ success: true }),\n      });\n    });\n\n    const providerName = uniqueName(\"PW OpenAI Provider\");\n    const apiKey = PROVIDER_API_KEY;\n\n    const setupModal = await openOpenAiSetupModal(page);\n    await setupModal.getByLabel(\"Display Name\").fill(providerName);\n    await setupModal.getByLabel(\"API Key\").fill(apiKey);\n\n    const enableButton = setupModal.getByRole(\"button\", { name: \"Connect\" });\n    await expect(enableButton).toBeEnabled({ timeout: 10000 });\n    await enableButton.click();\n    await expect(setupModal).not.toBeVisible({ timeout: 30000 });\n\n    await expect\n      .poll(\n        async () => (await getProviderByName(page, providerName))?.id ?? null\n      )\n      .not.toBeNull();\n\n    const createdProvider = await getProviderByName(page, providerName);\n    expect(createdProvider).not.toBeNull();\n    providersToCleanup.push(createdProvider!.id);\n\n    const editModal = await openProviderEditModal(page, providerName);\n    const autoUpdateSwitch = editModal.getByRole(\"switch\").first();\n    const initialAutoModeState =\n      (await autoUpdateSwitch.getAttribute(\"aria-checked\")) === \"true\";\n    await autoUpdateSwitch.click();\n\n    const updateButton = editModal.getByRole(\"button\", { name: \"Update\" });\n    await expect(updateButton).toBeEnabled({ timeout: 10000 });\n    await updateButton.click();\n    await expect(editModal).not.toBeVisible({ timeout: 30000 });\n\n    await expect\n      .poll(async () => {\n        const provider = await getProviderByName(page, providerName);\n        return provider?.is_auto_mode;\n      })\n      .toBe(!initialAutoModeState);\n\n    const providerCard = await findProviderCard(page, providerName);\n    await providerCard.hover();\n    await providerCard.getByRole(\"button\", { name: /^Delete/ }).click();\n    const confirmationModal = page.getByRole(\"dialog\");\n    await expect(confirmationModal).toBeVisible({ timeout: 10000 });\n    await confirmationModal.getByRole(\"button\", { name: \"Delete\" }).click();\n    await expect(confirmationModal).not.toBeVisible({ timeout: 15000 });\n\n    await expect\n      .poll(\n        async () => (await getProviderByName(page, providerName))?.id ?? null\n      )\n      .toBeNull();\n\n    providersToCleanup = providersToCleanup.filter(\n      (providerId) => providerId !== createdProvider!.id\n    );\n  });\n\n  test(\"admin can switch the default model via the default model dropdown\", async ({\n    page,\n  }) => {\n    const apiClient = new OnyxApiClient(page.request);\n    const initialDefault = await getDefaultTextModel(page);\n\n    const firstProviderName = uniqueName(\"PW Baseline Provider\");\n    const secondProviderName = uniqueName(\"PW Target Provider\");\n    const firstModelName = \"gpt-4o\";\n    const secondModelName = \"gpt-4o-mini\";\n\n    const firstProviderId = await createPublicProvider(\n      page,\n      firstProviderName,\n      firstModelName\n    );\n    const secondProviderId = await createPublicProvider(\n      page,\n      secondProviderName,\n      secondModelName\n    );\n    providersToCleanup.push(firstProviderId, secondProviderId);\n\n    try {\n      await apiClient.setProviderAsDefault(firstProviderId, firstModelName);\n\n      await page.reload();\n      await page.waitForLoadState(\"networkidle\");\n\n      // Open the Default Model dropdown and select the model from the\n      // second provider's group (scoped to avoid picking a same-named model\n      // from another provider).\n      await page.getByRole(\"combobox\").click();\n      const targetGroup = page\n        .locator('[role=\"group\"]')\n        .filter({ hasText: secondProviderName });\n      const defaultResponsePromise = page.waitForResponse(\n        (response) =>\n          response.url().includes(\"/api/admin/llm/default\") &&\n          response.request().method() === \"POST\"\n      );\n      await targetGroup.locator('[role=\"option\"]').click();\n      await defaultResponsePromise;\n\n      // Verify the default switched to the second provider\n      await expect\n        .poll(async () => {\n          const defaultText = await getDefaultTextModel(page);\n          return defaultText?.provider_id;\n        })\n        .toBe(secondProviderId);\n    } finally {\n      if (initialDefault) {\n        try {\n          await apiClient.setProviderAsDefault(\n            initialDefault.provider_id,\n            initialDefault.model_name\n          );\n        } catch (error) {\n          console.warn(`Failed to restore initial default: ${String(error)}`);\n        }\n      }\n    }\n  });\n\n  test(\"adding a hidden model on an existing provider shows it in chat after one save\", async ({\n    page,\n  }) => {\n    await page.route(\"**/api/admin/llm/test\", async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/json\",\n        body: JSON.stringify({ success: true }),\n      });\n    });\n\n    const providerName = uniqueName(\"PW Provider Add Model\");\n    const ts = Date.now();\n    const alwaysVisibleModel = `pw-visible-${ts}-base`;\n    const modelToEnable = `pw-hidden-${ts}-to-enable`;\n\n    const providerId = await createPublicProviderWithModels(\n      page,\n      providerName,\n      [\n        { name: alwaysVisibleModel, is_visible: true },\n        { name: modelToEnable, is_visible: false },\n      ]\n    );\n    providersToCleanup.push(providerId);\n    await expectModelVisibilityInChatProviders(page, modelToEnable, false);\n\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n    await page\n      .locator(\"#onyx-chat-input-textarea\")\n      .waitFor({ state: \"visible\", timeout: 15000 });\n\n    await expect\n      .poll(() => getModelCountInChatSelector(page, modelToEnable), {\n        timeout: 15000,\n      })\n      .toBe(0);\n\n    await navigateToAdminLlmPageFromChat(page);\n\n    const editModal = await openProviderEditModal(page, providerName);\n    await editModal.getByText(modelToEnable, { exact: true }).click();\n\n    const updateButton = editModal.getByRole(\"button\", { name: \"Update\" });\n    const providerUpdateResponsePromise = page.waitForResponse(\n      (response) =>\n        response.url().includes(\"/api/admin/llm/provider\") &&\n        response.request().method() === \"PUT\"\n    );\n    await expect(updateButton).toBeEnabled({ timeout: 10000 });\n    await updateButton.click();\n    await providerUpdateResponsePromise;\n    await expect(editModal).not.toBeVisible({ timeout: 30000 });\n    await expectModelVisibilityInChatProviders(page, modelToEnable, true);\n\n    await exitAdminToChat(page);\n    await expect\n      .poll(() => getModelCountInChatSelector(page, modelToEnable), {\n        timeout: 15000,\n      })\n      .toBe(1);\n  });\n\n  test(\"removing a visible model on an existing provider hides it in chat after one save\", async ({\n    page,\n  }) => {\n    await page.route(\"**/api/admin/llm/test\", async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/json\",\n        body: JSON.stringify({ success: true }),\n      });\n    });\n\n    const providerName = uniqueName(\"PW Provider Remove Model\");\n    const ts = Date.now();\n    const alwaysVisibleModel = `pw-visible-${ts}-base`;\n    const modelToDisable = `pw-visible-${ts}-to-disable`;\n\n    const providerId = await createPublicProviderWithModels(\n      page,\n      providerName,\n      [\n        { name: alwaysVisibleModel, is_visible: true },\n        { name: modelToDisable, is_visible: true },\n      ]\n    );\n    providersToCleanup.push(providerId);\n    await expectModelVisibilityInChatProviders(page, modelToDisable, true);\n\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n    await page\n      .locator(\"#onyx-chat-input-textarea\")\n      .waitFor({ state: \"visible\", timeout: 15000 });\n\n    await expect\n      .poll(() => getModelCountInChatSelector(page, modelToDisable), {\n        timeout: 15000,\n      })\n      .toBe(1);\n\n    await navigateToAdminLlmPageFromChat(page);\n\n    const editModal = await openProviderEditModal(page, providerName);\n    await editModal.getByText(modelToDisable, { exact: true }).click();\n\n    const updateButton = editModal.getByRole(\"button\", { name: \"Update\" });\n    const providerUpdateResponsePromise = page.waitForResponse(\n      (response) =>\n        response.url().includes(\"/api/admin/llm/provider\") &&\n        response.request().method() === \"PUT\"\n    );\n    await expect(updateButton).toBeEnabled({ timeout: 10000 });\n    await updateButton.click();\n    await providerUpdateResponsePromise;\n    await expect(editModal).not.toBeVisible({ timeout: 30000 });\n    await expectModelVisibilityInChatProviders(page, modelToDisable, false);\n\n    await exitAdminToChat(page);\n    await expect\n      .poll(() => getModelCountInChatSelector(page, modelToDisable), {\n        timeout: 15000,\n      })\n      .toBe(0);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/oauth_config/test_tool_oauth.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { Page, Browser } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n// --- Locator Helper Functions ---\nconst getAuthorizationUrlInput = (page: Page) =>\n  page.locator('input[name=\"authorizationUrl\"]');\nconst getTokenUrlInput = (page: Page) => page.locator('input[name=\"tokenUrl\"]');\nconst getClientIdInput = (page: Page) => page.locator('input[name=\"clientId\"]');\nconst getClientSecretInput = (page: Page) =>\n  page.locator('input[name=\"clientSecret\"]');\nconst getScopesInput = (page: Page) => page.locator('input[name=\"scopes\"]');\nconst getConnectButton = (page: Page) =>\n  page.getByRole(\"button\", { name: \"Connect\" });\nconst getDefinitionTextarea = (page: Page) =>\n  page.locator('textarea[name=\"definition\"]');\nconst getAddActionButton = (page: Page) =>\n  page.getByRole(\"button\", { name: \"Add Action\" });\nconst getAddOpenAPIActionButton = (page: Page) =>\n  page.getByRole(\"button\", { name: \"Add OpenAPI Action\" });\n\n// Simple OpenAPI schema for testing\nconst SIMPLE_OPENAPI_SCHEMA = `{\n  \"openapi\": \"3.0.0\",\n  \"info\": {\n    \"title\": \"Test API\",\n    \"version\": \"1.0.0\",\n    \"description\": \"A test API for OAuth tool selection\"\n  },\n  \"servers\": [\n    {\n      \"url\": \"https://api.example.com\"\n    }\n  ],\n  \"paths\": {\n    \"/test\": {\n      \"get\": {\n        \"operationId\": \"test_operation\",\n        \"summary\": \"Test operation\",\n        \"description\": \"A test operation\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Success\"\n          }\n        }\n      }\n    }\n  }\n}`;\n\nlet createdAssistantId: number | null = null;\nlet createdToolName: string | null = null;\n\ntest.afterAll(async ({ browser }: { browser: Browser }) => {\n  const context = await browser.newContext({\n    storageState: \"admin_auth.json\",\n  });\n  const page = await context.newPage();\n  const client = new OnyxApiClient(page.request);\n\n  // Delete the assistant first (it references the tool)\n  if (createdAssistantId !== null) {\n    await client.deleteAgent(createdAssistantId);\n  }\n\n  // Then delete the tool\n  if (createdToolName !== null) {\n    const tool = await client.findToolByName(createdToolName);\n    if (tool) {\n      await client.deleteCustomTool(tool.id);\n    }\n  }\n\n  await context.close();\n});\n\ntest(\"Tool OAuth Configuration: Creation, Selection, and Assistant Integration\", async ({\n  page,\n}) => {\n  await page.context().clearCookies();\n  await loginAs(page, \"admin\");\n\n  // --- Step 1: Navigate to OpenAPI Actions Page and Open Add Modal ---\n  const toolName = `Test API ${Date.now()}`;\n  const authorizationUrl = \"https://github.com/login/oauth/authorize\";\n  const tokenUrl = \"https://github.com/login/oauth/access_token\";\n  const clientId = \"test_client_id_456\";\n  const clientSecret = \"test_client_secret_789\";\n  const scopes = \"repo, user\";\n\n  // Create a unique OpenAPI schema with the unique tool name\n  const uniqueOpenAPISchema = SIMPLE_OPENAPI_SCHEMA.replace(\n    '\"title\": \"Test API\"',\n    `\"title\": \"${toolName}\"`\n  );\n\n  await page.goto(\"/admin/actions/open-api\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Click \"Add OpenAPI Action\" button to open modal\n  const addOpenAPIActionButton = getAddOpenAPIActionButton(page);\n  await addOpenAPIActionButton.click();\n\n  // Wait for modal to appear\n  await expect(\n    page.getByRole(\"dialog\", { name: \"Add OpenAPI action\" })\n  ).toBeVisible({ timeout: 5000 });\n\n  // Fill in the OpenAPI definition in the modal\n  const definitionTextarea = getDefinitionTextarea(page);\n  await definitionTextarea.fill(uniqueOpenAPISchema);\n\n  // Wait for validation to complete (debounced, can take a few seconds)\n  // The tool name appears in the modal after successful validation\n  await expect(page.getByText(toolName)).toBeVisible({\n    timeout: 15000,\n  });\n\n  // --- Step 2: Submit the OpenAPI Action Creation ---\n  const addActionButton = getAddActionButton(page);\n  await addActionButton.scrollIntoViewIfNeeded();\n  await addActionButton.click();\n\n  // --- Step 3: Configure OAuth in Authentication Modal ---\n  // Wait for the authentication modal to appear\n  await expect(page.getByText(\"Authentication Method\")).toBeVisible({\n    timeout: 5000,\n  });\n\n  // Store tool name for cleanup now that the tool is confirmed created\n  createdToolName = toolName;\n\n  // OAuth should be selected by default, fill in OAuth config details\n  await getAuthorizationUrlInput(page).fill(authorizationUrl);\n  await getTokenUrlInput(page).fill(tokenUrl);\n  await getClientIdInput(page).fill(clientId);\n  await getClientSecretInput(page).fill(clientSecret);\n  await getScopesInput(page).fill(scopes);\n\n  // Submit the authentication form\n  const connectButton = getConnectButton(page);\n  await connectButton.click();\n\n  // Wait for authentication to complete and return to the actions list\n  await page.waitForTimeout(2000);\n\n  // --- Step 4: Verify Tool Was Created with OAuth Config ---\n  // We should be on the OpenAPI actions page\n  await page.waitForLoadState(\"networkidle\");\n\n  // Verify we're on the open-api page\n  expect(page.url()).toContain(\"/admin/actions/open-api\");\n\n  // The tool should appear in the actions list - look for our unique tool name\n  await expect(page.getByText(toolName, { exact: false }).first()).toBeVisible({\n    timeout: 20000,\n  });\n\n  // --- Step 5: Verify OAuth Config Persists in Edit Mode ---\n  // Find the action card with our tool and click the manage button\n  const actionCard = page.locator(`[aria-label*=\"${toolName}\"]`).first();\n  await expect(actionCard).toBeVisible({ timeout: 5000 });\n\n  // Click the manage button (gear icon) on the card\n  const manageButton = actionCard\n    .getByRole(\"button\", { name: /manage/i })\n    .or(actionCard.locator('button[aria-label*=\"anage\"]'))\n    .first();\n  await manageButton.click();\n\n  // Wait for the edit modal to appear\n  const editDialog = page.getByRole(\"dialog\", { name: \"Edit OpenAPI action\" });\n  await expect(editDialog).toBeVisible({ timeout: 5000 });\n\n  // Wait for the definition textarea to be visible (indicates modal is loaded)\n  await expect(editDialog.locator('textarea[name=\"definition\"]')).toBeVisible({\n    timeout: 10000,\n  });\n\n  // Verify authentication status is shown (indicates OAuth is configured)\n  await expect(editDialog.getByText(\"Authenticated & Enabled\")).toBeVisible({\n    timeout: 5000,\n  });\n\n  // Verify the \"Edit Configs\" button is visible (confirms OAuth config persists)\n  const editConfigsButton = editDialog.getByRole(\"button\", {\n    name: \"Edit Configs\",\n  });\n  await expect(editConfigsButton).toBeVisible({ timeout: 5000 });\n\n  // Close the modal\n  const closeButton = page\n    .locator('button[aria-label*=\"lose\"]')\n    .or(page.getByRole(\"button\", { name: \"Cancel\" }))\n    .first();\n  await closeButton.click();\n\n  // Wait for modal to close\n  await page.waitForTimeout(500);\n\n  // Test complete for steps 1-5! We've verified:\n  // 1. OpenAPI action can be created via modal\n  // 2. OAuth config is created and applied during action creation\n  // 3. The tool is created and authenticated with the OAuth config\n  // 4. The OAuth config persists when editing the tool\n\n  // --- Step 6: Create Assistant and Verify Tool Availability ---\n  // Navigate to the assistant creation page\n  await page.goto(\"/app/agents/create\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Fill in basic assistant details\n  const agentName = `Test Assistant ${Date.now()}`;\n  const agentDescription = \"Assistant with OAuth tool\";\n  const assistantInstructions = \"Use the tool when needed\";\n\n  await page.locator('input[name=\"name\"]').fill(agentName);\n  await page.locator('textarea[name=\"description\"]').fill(agentDescription);\n  await page\n    .locator('textarea[name=\"instructions\"]')\n    .fill(assistantInstructions);\n\n  // Scroll down to the Actions section (tools are listed there)\n  const actionsHeading = page.locator(\"text=Actions\").first();\n  await expect(actionsHeading).toBeVisible({ timeout: 10000 });\n  await actionsHeading.scrollIntoViewIfNeeded();\n\n  // Look for our tool in the list\n  // The tool display_name is the tool name we created\n  const toolLabel = page.locator(`label:has-text(\"${toolName}\")`);\n  await expect(toolLabel).toBeVisible({ timeout: 10000 });\n  await toolLabel.scrollIntoViewIfNeeded();\n\n  // Turn it on\n  await toolLabel.click();\n\n  // Submit the assistant creation form\n  const createButton = page.locator('button[type=\"submit\"]:has-text(\"Create\")');\n  await createButton.scrollIntoViewIfNeeded();\n  await createButton.click();\n\n  // Verify redirection to app page with the new assistant ID\n  await page.waitForURL(/.*\\/app\\?agentId=\\d+.*/, { timeout: 10000 });\n  const assistantUrl = page.url();\n  const agentIdMatch = assistantUrl.match(/agentId=(\\d+)/);\n  expect(agentIdMatch).toBeTruthy();\n\n  // Store assistant ID for cleanup\n  if (agentIdMatch) {\n    createdAssistantId = Number(agentIdMatch[1]);\n  }\n\n  // Test complete! We've verified:\n  // 5. The tool with OAuth config is available in assistant creation\n  // 6. The tool can be selected and the assistant can be created successfully\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/scim/fixtures.ts",
    "content": "/**\n * Playwright fixtures for SCIM admin UI tests.\n *\n * Provides:\n * - Authenticated admin page\n * - Stateful mock for the SCIM token endpoint\n *   (GET starts as 404; POST creates a token and flips GET to 200)\n */\n\nimport { test as base, expect, Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport type { ScimTokenResponse } from \"@/app/admin/scim/interfaces\";\n\n// ---------------------------------------------------------------------------\n// Fixture control interface\n// ---------------------------------------------------------------------------\n\ninterface MockTokenControl {\n  /** Pre-seed the mock so GET returns an existing token (200). */\n  seedToken: () => ScimTokenResponse;\n}\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nasync function authenticateAdmin(page: Page): Promise<void> {\n  await page.context().clearCookies();\n  await loginAs(page, \"admin\");\n}\n\nfunction jsonResponse(data: unknown, status = 200) {\n  return {\n    status,\n    contentType: \"application/json\",\n    body: JSON.stringify(data),\n  };\n}\n\n// ---------------------------------------------------------------------------\n// Extended test fixture\n// ---------------------------------------------------------------------------\n\nexport const test = base.extend<{\n  adminPage: Page;\n  mockTokenEndpoint: MockTokenControl;\n}>({\n  adminPage: async ({ page }, use) => {\n    await authenticateAdmin(page);\n    await use(page);\n  },\n\n  mockTokenEndpoint: async ({ adminPage }, use) => {\n    let currentToken: ScimTokenResponse | null = null;\n    let tokenCounter = 0;\n\n    function makeToken(): { token: ScimTokenResponse; rawToken: string } {\n      tokenCounter++;\n      const rawToken = `scim_test_token_${tokenCounter}_${Date.now()}`;\n      const token: ScimTokenResponse = {\n        id: tokenCounter,\n        name: \"default\",\n        token_display: rawToken.slice(0, 16) + \"...\",\n        is_active: true,\n        created_at: new Date().toISOString(),\n        last_used_at: null,\n        idp_domain: null,\n      };\n      return { token, rawToken };\n    }\n\n    await adminPage.route(\n      \"**/api/admin/enterprise-settings/scim/token\",\n      async (route) => {\n        const method = route.request().method();\n\n        if (method === \"GET\") {\n          if (currentToken) {\n            await route.fulfill(jsonResponse(currentToken));\n          } else {\n            await route.fulfill(jsonResponse({ detail: \"Not found\" }, 404));\n          }\n        } else if (method === \"POST\") {\n          const { token, rawToken } = makeToken();\n          currentToken = token;\n          await route.fulfill(jsonResponse({ ...token, raw_token: rawToken }));\n        } else {\n          await route.continue();\n        }\n      }\n    );\n\n    await use({\n      seedToken: () => {\n        const { token } = makeToken();\n        currentToken = token;\n        return token;\n      },\n    });\n  },\n});\n\nexport { expect };\n\n// ---------------------------------------------------------------------------\n// Navigation helper\n// ---------------------------------------------------------------------------\n\nexport async function gotoScimPage(adminPage: Page): Promise<void> {\n  await adminPage.goto(\"/admin/scim\");\n  await expect(adminPage.getByText(\"SCIM Sync\")).toBeVisible({\n    timeout: 15000,\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/scim/scim.spec.ts",
    "content": "/**\n * E2E Tests: SCIM Token Management\n *\n * Tests the full lifecycle of SCIM tokens — generation, clipboard copy,\n * file download, and regeneration with confirmation.\n */\n\nimport { test, expect, gotoScimPage } from \"./fixtures\";\n\ntest.describe(\"SCIM Token Management\", () => {\n  test(\"generate token, copy, and download\", async ({\n    adminPage,\n    mockTokenEndpoint: _mockTokenEndpoint,\n  }) => {\n    await gotoScimPage(adminPage);\n\n    // No token yet — click generate\n    await adminPage\n      .getByRole(\"button\", { name: \"Generate SCIM Token\" })\n      .click();\n\n    // Token modal opens (.first() to skip hidden Radix aria-describedby element)\n    await expect(\n      adminPage.getByText(\"Save this key before continuing\").first()\n    ).toBeVisible({ timeout: 10000 });\n\n    // Grab the raw token from the textarea\n    const textarea = adminPage.locator(\"textarea\");\n    await textarea.waitFor({ state: \"visible\" });\n    const tokenValue = await textarea.inputValue();\n    expect(tokenValue).toContain(\"scim_test_token_\");\n\n    // Copy to clipboard\n    await adminPage\n      .context()\n      .grantPermissions([\"clipboard-read\", \"clipboard-write\"]);\n    await adminPage.getByRole(\"button\", { name: \"Copy Token\" }).click();\n    await expect(adminPage.getByText(\"Token copied to clipboard\")).toBeVisible({\n      timeout: 5000,\n    });\n    const clipboardText = await adminPage.evaluate(() =>\n      navigator.clipboard.readText()\n    );\n    expect(clipboardText).toBe(tokenValue);\n\n    // Download\n    const downloadPromise = adminPage.waitForEvent(\"download\");\n    await adminPage.getByRole(\"button\", { name: \"Download\" }).click();\n    const download = await downloadPromise;\n    expect(download.suggestedFilename()).toMatch(/^onyx-scim-token-\\d+\\.txt$/);\n  });\n\n  test(\"regenerate token\", async ({ adminPage, mockTokenEndpoint }) => {\n    // Start with an existing token so the card shows \"Regenerate\"\n    mockTokenEndpoint.seedToken();\n    await gotoScimPage(adminPage);\n\n    // Click regenerate on the card\n    await adminPage.getByRole(\"button\", { name: \"Regenerate Token\" }).click();\n\n    // Confirmation modal appears\n    await expect(adminPage.getByText(\"Regenerate SCIM Token\")).toBeVisible();\n    await expect(\n      adminPage.getByText(\"Your current SCIM token will be revoked\")\n    ).toBeVisible();\n\n    // Confirm via the danger button inside the dialog\n    const dialog = adminPage.locator('[role=\"dialog\"]');\n    await dialog.getByRole(\"button\", { name: \"Regenerate Token\" }).click();\n\n    // Token display modal replaces the confirmation modal\n    await expect(\n      adminPage.getByText(\"Save this key before continuing\").first()\n    ).toBeVisible({ timeout: 10000 });\n\n    const textarea = adminPage.locator(\"textarea\");\n    await textarea.waitFor({ state: \"visible\" });\n    const tokenValue = await textarea.inputValue();\n    expect(tokenValue).toContain(\"scim_test_token_\");\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/theme/appearance_theme_settings.spec.ts",
    "content": "import { test, expect } from \"@tests/e2e/fixtures/eeFeatures\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\n\ntest.describe(\"Appearance Theme Settings @exclusive\", () => {\n  const TEST_VALUES = {\n    applicationName: `TestApp${Date.now()}`,\n    greetingMessage: \"Welcome to our test application\",\n    chatHeader: \"Test Header Content\",\n    chatFooter: \"Test Footer Disclaimer\",\n    noticeHeader: \"Important Notice\",\n    noticeContent: \"Please read and agree to continue\",\n    consentPrompt: \"I agree to the terms\",\n  };\n\n  test.beforeEach(async ({ page, eeEnabled }) => {\n    test.skip(\n      !eeEnabled,\n      \"Enterprise license not active — skipping theme tests\"\n    );\n\n    // Fresh session — the eeEnabled fixture already logged in to check the\n    // setting, so clear cookies and re-login for a clean test state.\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    await page.goto(\"/admin/theme\");\n    await expect(\n      page.locator('[data-label=\"application-name-input\"]')\n    ).toBeVisible({ timeout: 10_000 });\n\n    // Clear localStorage to ensure consent modal shows\n    await page.evaluate(() => {\n      localStorage.removeItem(\"allUsersInitialPopupFlowCompleted\");\n    });\n  });\n\n  test.afterEach(async ({ page }) => {\n    // Reset settings to defaults\n    await page.goto(\"/admin/theme\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // If the form isn't visible (e.g. EE license not active, or test failed\n    // before navigating here), skip cleanup — there's nothing to reset.\n    const appNameInput = page.locator('[data-label=\"application-name-input\"]');\n    if (!(await appNameInput.isVisible({ timeout: 3000 }).catch(() => false))) {\n      return;\n    }\n\n    // Clear form fields\n    await appNameInput.clear();\n\n    const greetingInput = page.locator('[data-label=\"greeting-message-input\"]');\n    await greetingInput.clear();\n\n    const headerInput = page.locator('[data-label=\"chat-header-input\"]');\n    await headerInput.clear();\n\n    const footerTextarea = page.locator('[data-label=\"chat-footer-textarea\"]');\n    await footerTextarea.clear();\n\n    // Disable notice toggle if enabled\n    const noticeToggle = page.locator(\n      '[data-label=\"first-visit-notice-toggle\"]'\n    );\n    const isChecked = await noticeToggle.getAttribute(\"aria-checked\");\n    if (isChecked === \"true\") {\n      await noticeToggle.click();\n      await page.waitForTimeout(300);\n    }\n\n    // Save reset\n    const saveButton = page.getByRole(\"button\", { name: \"Apply Changes\" });\n    if (await saveButton.isEnabled()) {\n      await saveButton.click();\n      await page.waitForResponse(\n        (r) =>\n          r.url().includes(\"/api/admin/enterprise-settings\") &&\n          r.request().method() === \"PUT\"\n      );\n    }\n\n    // Clear localStorage\n    await page.evaluate(() => {\n      localStorage.removeItem(\"allUsersInitialPopupFlowCompleted\");\n    });\n  });\n\n  test(\"admin configures branding and verifies across pages\", async ({\n    page,\n  }) => {\n    // 1. Fill in Application Name (page already navigated in beforeEach)\n    const appNameInput = page.locator('[data-label=\"application-name-input\"]');\n    await appNameInput.fill(TEST_VALUES.applicationName);\n\n    // 3. Fill in Greeting Message\n    const greetingInput = page.locator('[data-label=\"greeting-message-input\"]');\n    await greetingInput.fill(TEST_VALUES.greetingMessage);\n\n    // 4. Fill in Chat Header\n    const headerInput = page.locator('[data-label=\"chat-header-input\"]');\n    await headerInput.fill(TEST_VALUES.chatHeader);\n\n    // 5. Fill in Chat Footer\n    const footerTextarea = page.locator('[data-label=\"chat-footer-textarea\"]');\n    await footerTextarea.fill(TEST_VALUES.chatFooter);\n\n    // 6. Enable First Visit Notice\n    const noticeToggle = page.locator(\n      '[data-label=\"first-visit-notice-toggle\"]'\n    );\n    await noticeToggle.click();\n\n    // 7. Fill Notice Header (wait for it to be visible first)\n    const noticeHeaderInput = page.locator(\n      '[data-label=\"notice-header-input\"]'\n    );\n    await expect(noticeHeaderInput).toBeVisible({ timeout: 5000 });\n    await noticeHeaderInput.fill(TEST_VALUES.noticeHeader);\n\n    // 8. Fill Notice Content\n    const noticeContentTextarea = page.locator(\n      '[data-label=\"notice-content-textarea\"]'\n    );\n    await noticeContentTextarea.fill(TEST_VALUES.noticeContent);\n\n    // 9. Enable Consent Requirement (only if not already enabled)\n    const consentToggle = page.locator('[data-label=\"require-consent-toggle\"]');\n    const consentState = await consentToggle.getAttribute(\"aria-checked\");\n    if (consentState !== \"true\") {\n      await consentToggle.click();\n    }\n\n    // 10. Fill Consent Prompt (wait for it to be visible first)\n    const consentPromptTextarea = page.locator(\n      '[data-label=\"consent-prompt-textarea\"]'\n    );\n    await expect(consentPromptTextarea).toBeVisible({ timeout: 5000 });\n    await consentPromptTextarea.fill(TEST_VALUES.consentPrompt);\n\n    // 11. Click Apply Changes\n    const saveButton = page.getByRole(\"button\", { name: \"Apply Changes\" });\n    await expect(saveButton).toBeEnabled();\n    await saveButton.click();\n\n    // 12. Wait for API response\n    const response = await page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/admin/enterprise-settings\") &&\n        r.request().method() === \"PUT\",\n      { timeout: 10000 }\n    );\n    expect(response.status()).toBe(200);\n\n    // 13. Wait for success message\n    await expect(page.getByText(/successfully/i)).toBeVisible({\n      timeout: 5000,\n    });\n\n    // 14. Verify admin sidebar has branding (application name)\n    await expect(\n      page.getByText(TEST_VALUES.applicationName).first()\n    ).toBeVisible({\n      timeout: 5000,\n    });\n\n    // 15. Navigate to chat page\n    // Clear localStorage again right before navigation to ensure consent modal shows\n    await page.evaluate(() => {\n      localStorage.removeItem(\"allUsersInitialPopupFlowCompleted\");\n    });\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // 16. Handle consent modal\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 15000 });\n\n    // Verify notice header and content\n    await expect(\n      modal.getByText(TEST_VALUES.noticeHeader).first()\n    ).toBeVisible();\n    await expect(\n      modal.getByText(TEST_VALUES.noticeContent).first()\n    ).toBeVisible();\n\n    // Check consent checkbox\n    const checkbox = modal.getByLabel(\"Consent checkbox\");\n    await checkbox.click();\n\n    // Click Start button\n    const startButton = modal.getByRole(\"button\", { name: \"Start\" });\n    await startButton.click();\n\n    // Wait for modal to close\n    await expect(modal).not.toBeVisible({ timeout: 5000 });\n\n    // 17. Verify sidebar branding on chat page\n    await expect(\n      page.getByText(TEST_VALUES.applicationName).first()\n    ).toBeVisible();\n\n    // 18. Verify greeting message on welcome screen\n    await expect(page.getByText(TEST_VALUES.greetingMessage)).toBeVisible();\n\n    // 19. Verify chat header content\n    await expect(page.getByText(TEST_VALUES.chatHeader)).toBeVisible();\n\n    // 20. Verify chat footer content\n    await expect(page.getByText(TEST_VALUES.chatFooter)).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/users/UsersAdminPage.ts",
    "content": "/**\n * Page Object Model for the Admin Users page (/admin/users).\n *\n * Encapsulates all locators and interactions so specs remain declarative.\n */\n\nimport { type Page, type Locator, expect } from \"@playwright/test\";\n\n/** URL pattern that matches the users data fetch. */\nconst USERS_API = /\\/api\\/manage\\/users\\/(accepted\\/all|invited)/;\n\nexport class UsersAdminPage {\n  readonly page: Page;\n\n  // Top-level elements\n  readonly inviteButton: Locator;\n  readonly searchInput: Locator;\n\n  // Filter buttons\n  readonly accountTypesFilter: Locator;\n  readonly groupsFilter: Locator;\n  readonly statusFilter: Locator;\n\n  // Table\n  readonly table: Locator;\n  readonly tableRows: Locator;\n\n  // Pagination & footer\n  readonly paginationSummary: Locator;\n  readonly downloadCsvButton: Locator;\n\n  constructor(page: Page) {\n    this.page = page;\n    this.inviteButton = page.getByRole(\"button\", { name: \"Invite Users\" });\n    this.searchInput = page.getByPlaceholder(\"Search users...\");\n\n    this.accountTypesFilter = page.getByLabel(\"Filter by role\");\n    this.groupsFilter = page.getByLabel(\"Filter by group\");\n    this.statusFilter = page.getByLabel(\"Filter by status\");\n\n    this.table = page.getByRole(\"table\");\n    this.tableRows = page.getByRole(\"table\").locator(\"tbody tr\");\n\n    this.paginationSummary = page.getByText(/Showing \\d/);\n    this.downloadCsvButton = page.getByRole(\"button\", {\n      name: \"Download CSV\",\n    });\n  }\n\n  // ---------------------------------------------------------------------------\n  // Popover helper\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Returns a locator for the currently open popover / filter dropdown.\n   * Radix Popover renders its content with `role=\"dialog\"`. Using\n   * `getByRole(\"dialog\").first()` targets the oldest open dialog, which is\n   * always the popover during row-action or filter flows (confirmation\n   * modals open later and would be `.last()`).\n   */\n  get popover(): Locator {\n    return this.page.getByRole(\"dialog\").first();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Navigation\n  // ---------------------------------------------------------------------------\n\n  async goto() {\n    await this.page.goto(\"/admin/users\");\n    await expect(this.page.getByText(\"Users & Requests\")).toBeVisible({\n      timeout: 15000,\n    });\n    // Wait for the table to finish loading (pagination summary only appears\n    // after the async data fetch completes).\n    await expect(this.paginationSummary).toBeVisible({ timeout: 15000 });\n  }\n\n  // ---------------------------------------------------------------------------\n  // Waiting helpers\n  // ---------------------------------------------------------------------------\n\n  /** Wait for the users API response that follows a table-refreshing action. */\n  private async waitForTableRefresh(): Promise<void> {\n    await this.page.waitForResponse(USERS_API);\n  }\n\n  // ---------------------------------------------------------------------------\n  // Search\n  // ---------------------------------------------------------------------------\n\n  async search(term: string) {\n    await this.searchInput.fill(term);\n  }\n\n  async clearSearch() {\n    await this.searchInput.fill(\"\");\n  }\n\n  // ---------------------------------------------------------------------------\n  // Filters\n  // ---------------------------------------------------------------------------\n\n  async openAccountTypesFilter() {\n    await this.accountTypesFilter.click();\n    await expect(this.popover).toBeVisible();\n  }\n\n  async selectAccountType(label: string) {\n    await this.popover.getByText(label, { exact: false }).first().click();\n  }\n\n  async openStatusFilter() {\n    await this.statusFilter.click();\n    await expect(this.popover).toBeVisible();\n  }\n\n  async selectStatus(label: string) {\n    await this.popover.getByText(label, { exact: false }).first().click();\n  }\n\n  async openGroupsFilter() {\n    await this.groupsFilter.click();\n    await expect(this.popover).toBeVisible();\n  }\n\n  async selectGroup(label: string) {\n    await this.popover.getByText(label, { exact: false }).first().click();\n  }\n\n  async closePopover() {\n    await this.page.keyboard.press(\"Escape\");\n    await expect(this.page.getByRole(\"dialog\")).not.toBeVisible();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Table interactions\n  // ---------------------------------------------------------------------------\n\n  async getVisibleRowCount(): Promise<number> {\n    return await this.tableRows.count();\n  }\n\n  /**\n   * Returns the text content of a specific column across all visible rows.\n   * Column indices: 0=Name, 1=Groups, 2=Account Type, 3=Status, 4=Last Updated.\n   */\n  async getColumnTexts(columnIndex: number): Promise<string[]> {\n    const cells = this.tableRows.locator(`td:nth-child(${columnIndex + 2})`);\n    const count = await cells.count();\n    const texts: string[] = [];\n    for (let i = 0; i < count; i++) {\n      const text = await cells.nth(i).textContent();\n      if (text) texts.push(text.trim());\n    }\n    return texts;\n  }\n\n  getRowByEmail(email: string): Locator {\n    return this.table.getByRole(\"row\").filter({ hasText: email });\n  }\n\n  /** Click the sort button on a column header. */\n  async sortByColumn(columnName: string) {\n    // Column headers are <th> elements. The sort button is a child <button>\n    // that only appears on hover — hover first to reveal it.\n    const header = this.table.locator(\"th\").filter({ hasText: columnName });\n    await header.hover();\n    await header.locator(\"button\").first().click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Pagination\n  // ---------------------------------------------------------------------------\n\n  /** Click a numbered page button in the table footer. */\n  async goToPage(pageNumber: number) {\n    const footer = this.page.locator(\".table-footer\");\n    await footer\n      .getByRole(\"button\")\n      .filter({ hasText: String(pageNumber) })\n      .click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Row actions\n  // ---------------------------------------------------------------------------\n\n  async openRowActions(email: string) {\n    const row = this.getRowByEmail(email);\n    const actionsButton = row.getByRole(\"button\").last();\n    await actionsButton.click();\n    await expect(this.popover).toBeVisible();\n  }\n\n  async clickRowAction(actionName: string) {\n    await this.popover.getByText(actionName).first().click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Confirmation modals\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Returns the most recently opened dialog (modal).\n   * Uses `.last()` because confirmation modals are portaled after row-action\n   * popovers, and a closing popover (role=\"dialog\") may briefly remain in the\n   * DOM during its exit animation.\n   */\n  get dialog(): Locator {\n    return this.page.getByRole(\"dialog\").last();\n  }\n\n  async confirmModalAction(buttonName: string) {\n    await this.dialog.getByRole(\"button\", { name: buttonName }).first().click();\n  }\n\n  async cancelModal() {\n    await this.dialog.getByRole(\"button\", { name: \"Cancel\" }).first().click();\n  }\n\n  async expectToast(message: string | RegExp) {\n    await expect(this.page.getByText(message)).toBeVisible();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Invite modal\n  // ---------------------------------------------------------------------------\n\n  /** The email input inside the invite modal. */\n  get inviteEmailInput(): Locator {\n    return this.dialog.getByPlaceholder(\"Add an email and press enter\");\n  }\n\n  async openInviteModal() {\n    await this.inviteButton.click();\n    await expect(this.dialog.getByText(\"Invite Users\")).toBeVisible();\n  }\n\n  async addInviteEmail(email: string) {\n    await this.inviteEmailInput.pressSequentially(email, { delay: 20 });\n    await this.inviteEmailInput.press(\"Enter\");\n    // Wait for the chip to appear in the dialog\n    await expect(this.dialog.getByText(email)).toBeVisible();\n  }\n\n  async submitInvite() {\n    await this.dialog.getByRole(\"button\", { name: \"Invite\" }).click();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Inline role editing (Popover + OpenButton + LineItem)\n  // ---------------------------------------------------------------------------\n\n  async openRoleDropdown(email: string) {\n    const row = this.getRowByEmail(email);\n    const roleButton = row\n      .locator(\"button\")\n      .filter({ hasText: /Basic|Admin|Global Curator|Slack User/ });\n    await roleButton.click();\n    await expect(this.popover).toBeVisible();\n  }\n\n  async selectRole(roleName: string) {\n    await this.popover.getByText(roleName).first().click();\n    await this.waitForTableRefresh();\n  }\n\n  // ---------------------------------------------------------------------------\n  // Edit groups modal\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Stable locator for the edit-groups modal.\n   *\n   * We can't use the generic `dialog` getter (`.last()`) here because the\n   * groups search opens a Radix Popover (also `role=\"dialog\"`) inside the\n   * modal, which shifts what `.last()` resolves to.  Targeting by accessible\n   * name keeps the reference pinned to the modal itself.\n   */\n  get editGroupsDialog(): Locator {\n    return this.page.getByRole(\"dialog\", { name: /Edit User/ });\n  }\n\n  /** The search input inside the edit groups modal. */\n  get groupSearchInput(): Locator {\n    return this.editGroupsDialog.getByPlaceholder(\"Search groups to join...\");\n  }\n\n  async openEditGroupsModal(email: string) {\n    await this.openRowActions(email);\n    await this.clickRowAction(\"Groups\");\n    await expect(\n      this.editGroupsDialog.getByText(\"Edit User's Groups & Roles\")\n    ).toBeVisible();\n  }\n\n  async searchGroupsInModal(term: string) {\n    // Click the input first to open the popover (Radix Popover.Trigger\n    // wraps the input — fill() alone bypasses the trigger's click handler).\n    await this.groupSearchInput.click();\n    await this.groupSearchInput.fill(term);\n    // The group name appears in the popover dropdown (nested dialog).\n    // Use page-level search since the popover may be portaled.\n    await expect(this.page.getByText(term).first()).toBeVisible();\n  }\n\n  async toggleGroupInModal(groupName: string) {\n    // LineItem renders as a <div>, not <button>.\n    // The popover dropdown is a nested dialog inside the modal.\n    await this.page\n      .getByRole(\"dialog\")\n      .last()\n      .getByText(groupName)\n      .first()\n      .click();\n  }\n\n  async saveGroupsModal() {\n    await this.editGroupsDialog\n      .getByRole(\"button\", { name: \"Save Changes\" })\n      .click();\n  }\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/users/fixtures.ts",
    "content": "/**\n * Playwright fixtures for Admin Users page tests.\n *\n * Provides:\n * - Authenticated admin page\n * - OnyxApiClient for API-level setup/teardown\n * - UsersAdminPage page object\n */\n\nimport { test as base, expect, type Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { UsersAdminPage } from \"./UsersAdminPage\";\n\nexport const test = base.extend<{\n  adminPage: Page;\n  api: OnyxApiClient;\n  usersPage: UsersAdminPage;\n}>({\n  adminPage: async ({ page }, use) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await use(page);\n  },\n\n  api: async ({ adminPage }, use) => {\n    const client = new OnyxApiClient(adminPage.request);\n    await use(client);\n  },\n\n  usersPage: async ({ adminPage }, use) => {\n    const usersPage = new UsersAdminPage(adminPage);\n    await use(usersPage);\n  },\n});\n\nexport { expect };\n"
  },
  {
    "path": "web/tests/e2e/admin/users/users.spec.ts",
    "content": "/**\n * E2E Tests: Admin Users Page\n *\n * Tests the full users management page — search, filters, sorting,\n * inline role editing, row actions, invite modal, and group management.\n *\n * Read-only tests (layout, search, filters, sorting, pagination) run against\n * whatever users already exist in the database (at minimum 10 from global-setup:\n * 2 admins + 8 workers). Mutation tests create their own ephemeral users.\n */\n\nimport { test, expect } from \"./fixtures\";\nimport { TEST_ADMIN_CREDENTIALS } from \"@tests/e2e/constants\";\nimport type { Browser } from \"@playwright/test\";\nimport type { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\n\nfunction uniqueEmail(prefix: string): string {\n  return `e2e-${prefix}-${Date.now()}@test.onyx`;\n}\n\nconst TEST_PASSWORD = \"TestPassword123!\";\n\n/** Best-effort cleanup — logs failures instead of silently swallowing them. */\nasync function softCleanup(fn: () => Promise<unknown>): Promise<void> {\n  await fn().catch((e) => console.warn(\"cleanup:\", e));\n}\n\n/**\n * Creates an authenticated API context for beforeAll/afterAll hooks.\n * Handles browser context lifecycle so callers only write the setup logic.\n */\nasync function withApiContext(\n  browser: Browser,\n  fn: (api: OnyxApiClient) => Promise<void>\n): Promise<void> {\n  const context = await browser.newContext({\n    storageState: \"admin_auth.json\",\n  });\n  try {\n    const { OnyxApiClient } = await import(\"@tests/e2e/utils/onyxApiClient\");\n    const api = new OnyxApiClient(context.request);\n    await fn(api);\n  } finally {\n    await context.close();\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Page load & layout\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — layout\", () => {\n  test(\"renders page title, invite button, search, and stats bar\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n\n    await expect(usersPage.page.getByText(\"Users & Requests\")).toBeVisible();\n    await expect(usersPage.inviteButton).toBeVisible();\n    await expect(usersPage.searchInput).toBeVisible();\n    // Stats bar renders number and label as separate elements\n    await expect(usersPage.page.getByText(\"active users\")).toBeVisible();\n  });\n\n  test(\"table renders with correct column headers\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    for (const header of [\n      \"Name\",\n      \"Groups\",\n      \"Account Type\",\n      \"Status\",\n      \"Last Updated\",\n    ]) {\n      await expect(\n        usersPage.table.locator(\"th\").filter({ hasText: header })\n      ).toBeVisible();\n    }\n  });\n\n  test(\"pagination shows summary and controls\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    await expect(usersPage.paginationSummary).toBeVisible();\n    await expect(usersPage.paginationSummary).toContainText(\"Showing\");\n  });\n\n  test(\"CSV download button is visible in footer\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await expect(usersPage.downloadCsvButton).toBeVisible();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Search (uses existing DB users — at least admin_user@example.com)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — search\", () => {\n  test(\"search filters table rows by email\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.search(TEST_ADMIN_CREDENTIALS.email);\n\n    const row = usersPage.getRowByEmail(TEST_ADMIN_CREDENTIALS.email);\n    await expect(row).toBeVisible();\n\n    const rowCount = await usersPage.getVisibleRowCount();\n    expect(rowCount).toBeGreaterThanOrEqual(1);\n  });\n\n  test(\"search with no results shows empty state\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.search(\"zzz-no-match-exists-xyz@nowhere.invalid\");\n\n    await expect(usersPage.page.getByText(\"No users found\")).toBeVisible();\n  });\n\n  test(\"clearing search restores all results\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    await usersPage.search(\"zzz-no-match-exists-xyz@nowhere.invalid\");\n    await expect(usersPage.page.getByText(\"No users found\")).toBeVisible();\n\n    await usersPage.clearSearch();\n\n    await expect(usersPage.table).toBeVisible();\n    const rowCount = await usersPage.getVisibleRowCount();\n    expect(rowCount).toBeGreaterThan(0);\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Filters (uses existing DB users)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — filters\", () => {\n  test(\"account types filter shows expected roles\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.openAccountTypesFilter();\n\n    await expect(\n      usersPage.popover.getByText(\"All Account Types\").first()\n    ).toBeVisible();\n    await expect(usersPage.popover.getByText(\"Admin\").first()).toBeVisible();\n    await expect(usersPage.popover.getByText(\"Basic\").first()).toBeVisible();\n\n    await usersPage.closePopover();\n  });\n\n  test(\"filtering by Admin role shows only admin users\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n    await usersPage.openAccountTypesFilter();\n    await usersPage.selectAccountType(\"Admin\");\n    await usersPage.closePopover();\n\n    await expect(usersPage.accountTypesFilter).toContainText(\"Admin\");\n\n    const rowCount = await usersPage.getVisibleRowCount();\n    expect(rowCount).toBeGreaterThan(0);\n\n    // Every visible row's Account Type column must say \"Admin\"\n    const roleTexts = await usersPage.getColumnTexts(2);\n    for (const role of roleTexts) {\n      expect(role).toBe(\"Admin\");\n    }\n  });\n\n  test(\"status filter for Active shows only active users\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n    await usersPage.openStatusFilter();\n    await usersPage.selectStatus(\"Active\");\n    await usersPage.closePopover();\n\n    await expect(usersPage.statusFilter).toContainText(\"Active\");\n\n    const rowCount = await usersPage.getVisibleRowCount();\n    expect(rowCount).toBeGreaterThan(0);\n\n    // Every visible row's Status column must say \"Active\"\n    const statusTexts = await usersPage.getColumnTexts(3);\n    for (const status of statusTexts) {\n      expect(status).toBe(\"Active\");\n    }\n  });\n\n  test(\"resetting filter shows all users again\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    await usersPage.openStatusFilter();\n    await usersPage.selectStatus(\"Active\");\n    await usersPage.closePopover();\n    const filteredCount = await usersPage.getVisibleRowCount();\n\n    await usersPage.openStatusFilter();\n    await usersPage.selectStatus(\"All Status\");\n    await usersPage.closePopover();\n    const allCount = await usersPage.getVisibleRowCount();\n\n    expect(allCount).toBeGreaterThanOrEqual(filteredCount);\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Sorting (uses existing DB users)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — sorting\", () => {\n  test(\"clicking Name sort twice reverses row order\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    const firstRowBefore = await usersPage.tableRows.first().textContent();\n\n    // Click twice — first click may match default order; second guarantees reversal\n    await usersPage.sortByColumn(\"Name\");\n    await usersPage.sortByColumn(\"Name\");\n\n    const firstRowAfter = await usersPage.tableRows.first().textContent();\n    expect(firstRowAfter).not.toBe(firstRowBefore);\n  });\n\n  test(\"clicking Account Type sort twice reorders rows\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n\n    const rolesBefore = await usersPage.getColumnTexts(2);\n\n    // Click twice to guarantee a different order from default\n    await usersPage.sortByColumn(\"Account Type\");\n    await usersPage.sortByColumn(\"Account Type\");\n\n    const rolesAfter = await usersPage.getColumnTexts(2);\n    expect(rolesAfter.length).toBeGreaterThan(0);\n    expect(rolesAfter).not.toEqual(rolesBefore);\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Pagination (uses existing DB users — need > 8 for multi-page)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — pagination\", () => {\n  test(\"clicking page 2 navigates to second page\", async ({ usersPage }) => {\n    await usersPage.goto();\n\n    const summaryBefore = await usersPage.paginationSummary.textContent();\n\n    // With 10+ users and page size 8, page 2 should exist\n    await usersPage.goToPage(2);\n\n    await expect(usersPage.paginationSummary).not.toHaveText(summaryBefore!);\n\n    // Go back to page 1\n    await usersPage.goToPage(1);\n    await expect(usersPage.paginationSummary).toHaveText(summaryBefore!);\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Invite users (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — invite users\", () => {\n  test(\"invite modal opens with correct structure\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.openInviteModal();\n\n    await expect(usersPage.dialog.getByText(\"Invite Users\")).toBeVisible();\n    await expect(usersPage.inviteEmailInput).toBeVisible();\n\n    await usersPage.cancelModal();\n    await expect(usersPage.dialog).not.toBeVisible();\n  });\n\n  test(\"invite a user and verify Invite Pending status\", async ({\n    usersPage,\n    api,\n  }) => {\n    const email = uniqueEmail(\"invite\");\n\n    await usersPage.goto();\n    await usersPage.openInviteModal();\n    await usersPage.addInviteEmail(email);\n    await usersPage.submitInvite();\n\n    await usersPage.expectToast(/Invited 1 user/);\n\n    // Reload and search\n    await usersPage.goto();\n    await usersPage.search(email);\n\n    const row = usersPage.getRowByEmail(email);\n    await expect(row).toBeVisible();\n    await expect(row).toContainText(\"Invite Pending\");\n\n    // Cleanup\n    await api.cancelInvite(email);\n  });\n\n  test(\"invite multiple users at once\", async ({ usersPage, api }) => {\n    const email1 = uniqueEmail(\"multi1\");\n    const email2 = uniqueEmail(\"multi2\");\n\n    await usersPage.goto();\n    await usersPage.openInviteModal();\n\n    await usersPage.addInviteEmail(email1);\n    await usersPage.addInviteEmail(email2);\n\n    await usersPage.submitInvite();\n    await usersPage.expectToast(/Invited 2 users/);\n\n    // Cleanup\n    await api.cancelInvite(email1);\n    await api.cancelInvite(email2);\n  });\n\n  test(\"invite modal shows error icon for invalid emails\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n    await usersPage.openInviteModal();\n\n    await usersPage.addInviteEmail(\"not-an-email\");\n\n    // The chip should be rendered with an error state\n    await expect(usersPage.dialog.getByText(\"not-an-email\")).toBeVisible();\n\n    await usersPage.cancelModal();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Row actions — deactivate / activate (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — deactivate & activate\", () => {\n  let testUserEmail: string;\n\n  test.beforeAll(async ({ browser }) => {\n    testUserEmail = uniqueEmail(\"deact\");\n    await withApiContext(browser, async (api) => {\n      await api.registerUser(testUserEmail, TEST_PASSWORD);\n    });\n  });\n\n  test(\"deactivate and then reactivate a user\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n\n    const row = usersPage.getRowByEmail(testUserEmail);\n    await expect(row).toBeVisible();\n    await expect(row).toContainText(\"Active\");\n\n    // Deactivate\n    await usersPage.openRowActions(testUserEmail);\n    await usersPage.clickRowAction(\"Deactivate User\");\n\n    await expect(usersPage.dialog.getByText(\"Deactivate User\")).toBeVisible();\n    await expect(usersPage.dialog.getByText(testUserEmail)).toBeVisible();\n    await expect(\n      usersPage.dialog.getByText(\"will immediately lose access\")\n    ).toBeVisible();\n\n    await usersPage.confirmModalAction(\"Deactivate\");\n    await usersPage.expectToast(\"User deactivated\");\n\n    // Verify Inactive\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n    const inactiveRow = usersPage.getRowByEmail(testUserEmail);\n    await expect(inactiveRow).toContainText(\"Inactive\");\n\n    // Reactivate\n    await usersPage.openRowActions(testUserEmail);\n    await usersPage.clickRowAction(\"Activate User\");\n\n    await expect(usersPage.dialog.getByText(\"Activate User\")).toBeVisible();\n\n    await usersPage.confirmModalAction(\"Activate\");\n    await usersPage.expectToast(\"User activated\");\n\n    // Verify Active again\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n    const reactivatedRow = usersPage.getRowByEmail(testUserEmail);\n    await expect(reactivatedRow).toContainText(\"Active\");\n  });\n\n  test.afterAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      await softCleanup(() => api.deactivateUser(testUserEmail));\n      await softCleanup(() => api.deleteUser(testUserEmail));\n    });\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Row actions — delete user (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — delete user\", () => {\n  test(\"delete an inactive user\", async ({ usersPage, api }) => {\n    const email = uniqueEmail(\"delete\");\n    await api.registerUser(email, TEST_PASSWORD);\n    await api.deactivateUser(email);\n\n    await usersPage.goto();\n    await usersPage.search(email);\n\n    const row = usersPage.getRowByEmail(email);\n    await expect(row).toBeVisible();\n    await expect(row).toContainText(\"Inactive\");\n\n    await usersPage.openRowActions(email);\n    await usersPage.clickRowAction(\"Delete User\");\n\n    await expect(usersPage.dialog.getByText(\"Delete User\")).toBeVisible();\n    await expect(\n      usersPage.dialog.getByText(\"will be permanently removed\")\n    ).toBeVisible();\n\n    await usersPage.confirmModalAction(\"Delete\");\n    await usersPage.expectToast(\"User deleted\");\n\n    // User gone\n    await usersPage.goto();\n    await usersPage.search(email);\n    await expect(usersPage.page.getByText(\"No users found\")).toBeVisible();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Row actions — cancel invite (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — cancel invite\", () => {\n  test(\"cancel a pending invite\", async ({ usersPage, api }) => {\n    const email = uniqueEmail(\"cancel-inv\");\n    await api.inviteUsers([email]);\n\n    await usersPage.goto();\n    await usersPage.search(email);\n\n    const row = usersPage.getRowByEmail(email);\n    await expect(row).toBeVisible();\n    await expect(row).toContainText(\"Invite Pending\");\n\n    await usersPage.openRowActions(email);\n    await usersPage.clickRowAction(\"Cancel Invite\");\n\n    await expect(\n      usersPage.dialog.getByText(\"Cancel Invite\").first()\n    ).toBeVisible();\n\n    await usersPage.confirmModalAction(\"Cancel Invite\");\n    await usersPage.expectToast(\"Invite cancelled\");\n\n    // User gone\n    await usersPage.goto();\n    await usersPage.search(email);\n    await expect(usersPage.page.getByText(\"No users found\")).toBeVisible();\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Inline role editing (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — inline role editing\", () => {\n  let testUserEmail: string;\n\n  test.beforeAll(async ({ browser }) => {\n    testUserEmail = uniqueEmail(\"role\");\n    await withApiContext(browser, async (api) => {\n      await api.registerUser(testUserEmail, TEST_PASSWORD);\n    });\n  });\n\n  test(\"change user role from Basic to Admin and back\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n\n    const row = usersPage.getRowByEmail(testUserEmail);\n    await expect(row).toBeVisible();\n\n    // Initially Basic\n    await expect(row.getByText(\"Basic\")).toBeVisible();\n\n    // Change to Admin\n    await usersPage.openRoleDropdown(testUserEmail);\n    await usersPage.selectRole(\"Admin\");\n    await expect(row.getByText(\"Admin\")).toBeVisible();\n\n    // Change back to Basic\n    await usersPage.openRoleDropdown(testUserEmail);\n    await usersPage.selectRole(\"Basic\");\n    await expect(row.getByText(\"Basic\")).toBeVisible();\n  });\n\n  test.afterAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      await softCleanup(() => api.deactivateUser(testUserEmail));\n      await softCleanup(() => api.deleteUser(testUserEmail));\n    });\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Group management (creates ephemeral data)\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — group management\", () => {\n  let testUserEmail: string;\n  let testGroupId: number;\n  const groupName = `E2E-UsersTest-${Date.now()}`;\n\n  test.beforeAll(async ({ browser }) => {\n    testUserEmail = uniqueEmail(\"grp\");\n    await withApiContext(browser, async (api) => {\n      await api.registerUser(testUserEmail, TEST_PASSWORD);\n      testGroupId = await api.createUserGroup(groupName);\n      await api.waitForGroupSync(testGroupId);\n    });\n  });\n\n  test(\"add user to group via edit groups modal\", async ({ usersPage }) => {\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n\n    const row = usersPage.getRowByEmail(testUserEmail);\n    await expect(row).toBeVisible();\n\n    await usersPage.openEditGroupsModal(testUserEmail);\n    await usersPage.searchGroupsInModal(groupName);\n    await usersPage.toggleGroupInModal(groupName);\n    await usersPage.saveGroupsModal();\n    await usersPage.expectToast(\"User updated\");\n\n    // Verify group shows in the row\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n    const rowWithGroup = usersPage.getRowByEmail(testUserEmail);\n    await expect(rowWithGroup).toContainText(groupName);\n  });\n\n  test(\"remove user from group via edit groups modal\", async ({\n    usersPage,\n  }) => {\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n\n    const row = usersPage.getRowByEmail(testUserEmail);\n    await expect(row).toBeVisible();\n\n    await usersPage.openEditGroupsModal(testUserEmail);\n\n    // Group shows as joined — click to remove\n    await usersPage.toggleGroupInModal(groupName);\n    await usersPage.saveGroupsModal();\n    await usersPage.expectToast(\"User updated\");\n\n    // Verify group removed\n    await usersPage.goto();\n    await usersPage.search(testUserEmail);\n    await expect(usersPage.getRowByEmail(testUserEmail)).not.toContainText(\n      groupName\n    );\n  });\n\n  test.afterAll(async ({ browser }) => {\n    await withApiContext(browser, async (api) => {\n      await softCleanup(() => api.deleteUserGroup(testGroupId));\n      await softCleanup(() => api.deactivateUser(testUserEmail));\n      await softCleanup(() => api.deleteUser(testUserEmail));\n    });\n  });\n});\n\n// ---------------------------------------------------------------------------\n// Stats bar\n// ---------------------------------------------------------------------------\n\ntest.describe(\"Users page — stats bar\", () => {\n  test(\"stats bar shows active users count\", async ({ usersPage }) => {\n    await usersPage.goto();\n    // Number and label are separate elements; check for the label\n    await expect(usersPage.page.getByText(\"active users\")).toBeVisible();\n  });\n\n  test(\"stats bar updates after inviting a user\", async ({\n    usersPage,\n    api,\n  }) => {\n    const email = uniqueEmail(\"stats\");\n\n    await usersPage.goto();\n\n    await usersPage.openInviteModal();\n    await usersPage.addInviteEmail(email);\n    await usersPage.submitInvite();\n    await usersPage.expectToast(/Invited 1 user/);\n\n    // Stats bar should reflect the new invite\n    await usersPage.goto();\n    await expect(usersPage.page.getByText(\"pending invites\")).toBeVisible();\n\n    // Cleanup\n    await api.cancelInvite(email);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/voice/disconnect-provider.spec.ts",
    "content": "import { test, expect, Page, Locator } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\nconst VOICE_URL = \"/admin/configuration/voice\";\n\nconst FAKE_PROVIDERS = {\n  openai_active_stt: {\n    id: 1,\n    name: \"openai\",\n    provider_type: \"openai\",\n    is_default_stt: true,\n    is_default_tts: false,\n    stt_model: \"whisper\",\n    tts_model: null,\n    default_voice: null,\n    has_api_key: true,\n    target_uri: null,\n  },\n  openai_active_both: {\n    id: 1,\n    name: \"openai\",\n    provider_type: \"openai\",\n    is_default_stt: true,\n    is_default_tts: true,\n    stt_model: \"whisper\",\n    tts_model: \"tts-1\",\n    default_voice: \"alloy\",\n    has_api_key: true,\n    target_uri: null,\n  },\n  openai_connected: {\n    id: 1,\n    name: \"openai\",\n    provider_type: \"openai\",\n    is_default_stt: false,\n    is_default_tts: false,\n    stt_model: null,\n    tts_model: null,\n    default_voice: null,\n    has_api_key: true,\n    target_uri: null,\n  },\n  elevenlabs_connected: {\n    id: 2,\n    name: \"elevenlabs\",\n    provider_type: \"elevenlabs\",\n    is_default_stt: false,\n    is_default_tts: false,\n    stt_model: null,\n    tts_model: null,\n    default_voice: null,\n    has_api_key: true,\n    target_uri: null,\n  },\n};\n\nfunction findModelCard(page: Page, ariaLabel: string): Locator {\n  return page.getByLabel(ariaLabel, { exact: true });\n}\n\nfunction mainContainer(page: Page): Locator {\n  return page.locator(\"[data-main-container]\");\n}\n\nasync function mockVoiceApis(\n  page: Page,\n  providers: (typeof FAKE_PROVIDERS)[keyof typeof FAKE_PROVIDERS][]\n) {\n  await page.route(\"**/api/admin/voice/providers\", async (route) => {\n    if (route.request().method() === \"GET\") {\n      await route.fulfill({ status: 200, json: providers });\n    } else {\n      await route.continue();\n    }\n  });\n}\n\ntest.describe(\"Voice Provider Disconnect\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n  });\n\n  test(\"should disconnect a non-active provider and affect both STT and TTS cards\", async ({\n    page,\n  }) => {\n    const providers = [\n      { ...FAKE_PROVIDERS.openai_connected },\n      { ...FAKE_PROVIDERS.elevenlabs_connected },\n    ];\n    await mockVoiceApis(page, providers);\n\n    await page.goto(VOICE_URL);\n    await page.waitForSelector(\"text=Speech to Text\", { timeout: 20000 });\n\n    const whisperCard = findModelCard(page, \"voice-stt-whisper\");\n    await whisperCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-non-active-before\",\n    });\n\n    const disconnectButton = whisperCard.getByRole(\"button\", {\n      name: \"Disconnect Whisper\",\n    });\n    await expect(disconnectButton).toBeVisible();\n    await expect(disconnectButton).toBeEnabled();\n\n    // Mock DELETE to succeed and remove OpenAI from provider list\n    await page.route(\"**/api/admin/voice/providers/1\", async (route) => {\n      if (route.request().method() === \"DELETE\") {\n        await page.unroute(\"**/api/admin/voice/providers\");\n        await page.route(\"**/api/admin/voice/providers\", async (route) => {\n          if (route.request().method() === \"GET\") {\n            await route.fulfill({\n              status: 200,\n              json: [{ ...FAKE_PROVIDERS.elevenlabs_connected }],\n            });\n          } else {\n            await route.continue();\n          }\n        });\n        await route.fulfill({ status: 200, json: {} });\n      } else {\n        await route.continue();\n      }\n    });\n\n    await disconnectButton.click();\n\n    // Modal shows provider name, not model name\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n    await expect(confirmDialog).toContainText(\"Disconnect OpenAI\");\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"voice-disconnect-non-active-modal\",\n    });\n\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await confirmButton.click();\n\n    // Both STT and TTS cards for OpenAI revert to disconnected\n    await expect(\n      whisperCard.getByRole(\"button\", { name: \"Connect\" })\n    ).toBeVisible({ timeout: 10000 });\n\n    const tts1Card = findModelCard(page, \"voice-tts-tts-1\");\n    await expect(tts1Card.getByRole(\"button\", { name: \"Connect\" })).toBeVisible(\n      { timeout: 10000 }\n    );\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-non-active-after\",\n    });\n  });\n\n  test(\"should show replacement dropdown when disconnecting active provider with alternatives\", async ({\n    page,\n  }) => {\n    // OpenAI is active for STT, ElevenLabs is also configured\n    const providers = [\n      { ...FAKE_PROVIDERS.openai_active_stt },\n      { ...FAKE_PROVIDERS.elevenlabs_connected },\n    ];\n    await mockVoiceApis(page, providers);\n\n    await page.goto(VOICE_URL);\n    await page.waitForSelector(\"text=Speech to Text\", { timeout: 20000 });\n\n    const whisperCard = findModelCard(page, \"voice-stt-whisper\");\n    await whisperCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-active-with-alt-before\",\n    });\n\n    const disconnectButton = whisperCard.getByRole(\"button\", {\n      name: \"Disconnect Whisper\",\n    });\n    await disconnectButton.click();\n\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n    await expect(confirmDialog).toContainText(\"Disconnect OpenAI\");\n\n    // Should show replacement text and dropdown\n    await expect(\n      confirmDialog.getByText(\"Session history will be preserved\")\n    ).toBeVisible();\n\n    // Disconnect button should be enabled because first replacement is auto-selected\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await expect(confirmButton).toBeEnabled();\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"voice-disconnect-active-with-alt-modal\",\n    });\n  });\n\n  test(\"should show replacement when provider is default for both STT and TTS\", async ({\n    page,\n  }) => {\n    // OpenAI is default for both modes, ElevenLabs also configured\n    const providers = [\n      { ...FAKE_PROVIDERS.openai_active_both },\n      { ...FAKE_PROVIDERS.elevenlabs_connected },\n    ];\n    await mockVoiceApis(page, providers);\n\n    await page.goto(VOICE_URL);\n    await page.waitForSelector(\"text=Speech to Text\", { timeout: 20000 });\n\n    const whisperCard = findModelCard(page, \"voice-stt-whisper\");\n    await whisperCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-both-modes-before\",\n    });\n\n    const disconnectButton = whisperCard.getByRole(\"button\", {\n      name: \"Disconnect Whisper\",\n    });\n    await disconnectButton.click();\n\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n    await expect(confirmDialog).toContainText(\"Disconnect OpenAI\");\n\n    // Should mention both modes\n    await expect(\n      confirmDialog.getByText(\"speech-to-text or text-to-speech\")\n    ).toBeVisible();\n\n    // Should show replacement dropdown\n    await expect(\n      confirmDialog.getByText(\"Session history will be preserved\")\n    ).toBeVisible();\n\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await expect(confirmButton).toBeEnabled();\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"voice-disconnect-both-modes-modal\",\n    });\n  });\n\n  test(\"should show connect message when disconnecting active provider with no alternatives\", async ({\n    page,\n  }) => {\n    // Only OpenAI configured, active for STT — no other providers\n    const providers = [{ ...FAKE_PROVIDERS.openai_active_stt }];\n    await mockVoiceApis(page, providers);\n\n    await page.goto(VOICE_URL);\n    await page.waitForSelector(\"text=Speech to Text\", { timeout: 20000 });\n\n    const whisperCard = findModelCard(page, \"voice-stt-whisper\");\n    await whisperCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-no-alt-before\",\n    });\n\n    const disconnectButton = whisperCard.getByRole(\"button\", {\n      name: \"Disconnect Whisper\",\n    });\n    await disconnectButton.click();\n\n    const confirmDialog = page.getByRole(\"dialog\");\n    await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n    await expect(confirmDialog).toContainText(\"Disconnect OpenAI\");\n\n    // Should show message about connecting another provider\n    await expect(\n      confirmDialog.getByText(\"Connect another provider\")\n    ).toBeVisible();\n\n    // Disconnect button should be enabled\n    const confirmButton = confirmDialog.getByRole(\"button\", {\n      name: \"Disconnect\",\n    });\n    await expect(confirmButton).toBeEnabled();\n\n    await expectElementScreenshot(confirmDialog, {\n      name: \"voice-disconnect-no-alt-modal\",\n    });\n  });\n\n  test(\"should not show disconnect button for unconfigured provider\", async ({\n    page,\n  }) => {\n    await mockVoiceApis(page, []);\n\n    await page.goto(VOICE_URL);\n    await page.waitForSelector(\"text=Speech to Text\", { timeout: 20000 });\n\n    const whisperCard = findModelCard(page, \"voice-stt-whisper\");\n    await whisperCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n    const disconnectButton = whisperCard.getByRole(\"button\", {\n      name: \"Disconnect Whisper\",\n    });\n    await expect(disconnectButton).not.toBeVisible();\n\n    await expectElementScreenshot(mainContainer(page), {\n      name: \"voice-disconnect-unconfigured\",\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/web-search/disconnect-provider.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\nimport {\n  WEB_SEARCH_URL,\n  FAKE_SEARCH_PROVIDERS,\n  FAKE_CONTENT_PROVIDERS,\n  findProviderCard,\n  mainContainer,\n  mockWebSearchApis,\n} from \"./svc\";\n\ntest.describe(\"Web Search Provider Disconnect\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n  });\n\n  test.describe(\"Search Engine Providers\", () => {\n    test(\"should disconnect a connected (non-active) search provider\", async ({\n      page,\n    }) => {\n      const searchProviders = [\n        { ...FAKE_SEARCH_PROVIDERS.exa },\n        { ...FAKE_SEARCH_PROVIDERS.brave },\n      ];\n      await mockWebSearchApis(page, searchProviders, []);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Search Engine\", { timeout: 20000 });\n\n      const braveCard = findProviderCard(page, \"Brave\");\n      await braveCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await expectElementScreenshot(mainContainer(page), {\n        name: \"web-search-disconnect-non-active-before\",\n      });\n\n      await braveCard.hover();\n      const disconnectButton = braveCard.getByRole(\"button\", {\n        name: \"Disconnect Brave\",\n      });\n      await expect(disconnectButton).toBeVisible();\n      await expect(disconnectButton).toBeEnabled();\n\n      // Mock the DELETE to succeed\n      await page.route(\n        \"**/api/admin/web-search/search-providers/2\",\n        async (route) => {\n          if (route.request().method() === \"DELETE\") {\n            await page.unroute(\"**/api/admin/web-search/search-providers\");\n            await page.route(\n              \"**/api/admin/web-search/search-providers\",\n              async (route) => {\n                if (route.request().method() === \"GET\") {\n                  await route.fulfill({\n                    status: 200,\n                    json: [{ ...FAKE_SEARCH_PROVIDERS.exa }],\n                  });\n                } else {\n                  await route.continue();\n                }\n              }\n            );\n            await route.fulfill({ status: 200, json: {} });\n          } else {\n            await route.continue();\n          }\n        }\n      );\n\n      await disconnectButton.click();\n\n      const confirmDialog = page.getByRole(\"dialog\");\n      await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n      await expect(confirmDialog).toContainText(\"Disconnect Brave\");\n\n      await expectElementScreenshot(confirmDialog, {\n        name: \"web-search-disconnect-non-active-modal\",\n      });\n\n      const confirmButton = confirmDialog.getByRole(\"button\", {\n        name: \"Disconnect\",\n      });\n      await confirmButton.click();\n\n      await expect(\n        braveCard.getByRole(\"button\", { name: \"Connect\" })\n      ).toBeVisible({ timeout: 10000 });\n\n      await expectElementScreenshot(mainContainer(page), {\n        name: \"web-search-disconnect-non-active-after\",\n      });\n    });\n\n    test(\"should show replacement dropdown when disconnecting active search provider with alternatives\", async ({\n      page,\n    }) => {\n      // Exa is active, Brave is also configured\n      const searchProviders = [\n        { ...FAKE_SEARCH_PROVIDERS.exa },\n        { ...FAKE_SEARCH_PROVIDERS.brave },\n      ];\n      await mockWebSearchApis(page, searchProviders, []);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Search Engine\", { timeout: 20000 });\n\n      const exaCard = findProviderCard(page, \"Exa\");\n      await exaCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await exaCard.hover();\n      const disconnectButton = exaCard.getByRole(\"button\", {\n        name: \"Disconnect Exa\",\n      });\n      await expect(disconnectButton).toBeVisible();\n      await expect(disconnectButton).toBeEnabled();\n\n      await disconnectButton.click();\n\n      const confirmDialog = page.getByRole(\"dialog\");\n      await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n      await expect(confirmDialog).toContainText(\"Disconnect Exa\");\n\n      // Should show replacement dropdown\n      await expect(\n        confirmDialog.getByText(\"Search history will be preserved\")\n      ).toBeVisible();\n\n      // Disconnect button should be enabled because first replacement is auto-selected\n      const confirmButton = confirmDialog.getByRole(\"button\", {\n        name: \"Disconnect\",\n      });\n      await expect(confirmButton).toBeEnabled();\n\n      await expectElementScreenshot(confirmDialog, {\n        name: \"web-search-disconnect-active-with-alt-modal\",\n      });\n    });\n\n    test(\"should show connect message when disconnecting active search provider with no alternatives\", async ({\n      page,\n    }) => {\n      // Only Exa configured and active\n      await mockWebSearchApis(page, [{ ...FAKE_SEARCH_PROVIDERS.exa }], []);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Search Engine\", { timeout: 20000 });\n\n      const exaCard = findProviderCard(page, \"Exa\");\n      await exaCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await exaCard.hover();\n      const disconnectButton = exaCard.getByRole(\"button\", {\n        name: \"Disconnect Exa\",\n      });\n      await disconnectButton.click();\n\n      const confirmDialog = page.getByRole(\"dialog\");\n      await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n\n      // Should show message about connecting another provider\n      await expect(\n        confirmDialog.getByText(\"Connect another provider\")\n      ).toBeVisible();\n\n      // Disconnect button should be enabled\n      const confirmButton = confirmDialog.getByRole(\"button\", {\n        name: \"Disconnect\",\n      });\n      await expect(confirmButton).toBeEnabled();\n\n      await expectElementScreenshot(confirmDialog, {\n        name: \"web-search-disconnect-no-alt-modal\",\n      });\n    });\n\n    test(\"should not show disconnect button for unconfigured search provider\", async ({\n      page,\n    }) => {\n      await mockWebSearchApis(page, [{ ...FAKE_SEARCH_PROVIDERS.exa }], []);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Search Engine\", { timeout: 20000 });\n\n      const braveCard = findProviderCard(page, \"Brave\");\n      await braveCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const disconnectButton = braveCard.getByRole(\"button\", {\n        name: \"Disconnect Brave\",\n      });\n      await expect(disconnectButton).not.toBeVisible();\n\n      await expectElementScreenshot(mainContainer(page), {\n        name: \"web-search-disconnect-unconfigured\",\n      });\n    });\n  });\n\n  test.describe(\"Web Crawler (Content) Providers\", () => {\n    test(\"should disconnect a connected (non-active) content provider\", async ({\n      page,\n    }) => {\n      // Firecrawl connected but not active, Exa is active\n      const contentProviders = [\n        { ...FAKE_CONTENT_PROVIDERS.firecrawl, is_active: false },\n        { ...FAKE_CONTENT_PROVIDERS.exa, is_active: true },\n      ];\n      await mockWebSearchApis(page, [], contentProviders);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Web Crawler\", { timeout: 20000 });\n\n      const firecrawlCard = findProviderCard(page, \"Firecrawl\");\n      await firecrawlCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await firecrawlCard.hover();\n      const disconnectButton = firecrawlCard.getByRole(\"button\", {\n        name: \"Disconnect Firecrawl\",\n      });\n      await expect(disconnectButton).toBeVisible();\n      await expect(disconnectButton).toBeEnabled();\n\n      // Mock the DELETE to succeed\n      await page.route(\n        \"**/api/admin/web-search/content-providers/10\",\n        async (route) => {\n          if (route.request().method() === \"DELETE\") {\n            await page.unroute(\"**/api/admin/web-search/content-providers\");\n            await page.route(\n              \"**/api/admin/web-search/content-providers\",\n              async (route) => {\n                if (route.request().method() === \"GET\") {\n                  await route.fulfill({\n                    status: 200,\n                    json: [{ ...FAKE_CONTENT_PROVIDERS.exa, is_active: true }],\n                  });\n                } else {\n                  await route.continue();\n                }\n              }\n            );\n            await route.fulfill({ status: 200, json: {} });\n          } else {\n            await route.continue();\n          }\n        }\n      );\n\n      await disconnectButton.click();\n\n      const confirmDialog = page.getByRole(\"dialog\");\n      await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n      await expect(confirmDialog).toContainText(\"Disconnect Firecrawl\");\n\n      await expectElementScreenshot(confirmDialog, {\n        name: \"web-search-disconnect-content-non-active-modal\",\n      });\n\n      const confirmButton = confirmDialog.getByRole(\"button\", {\n        name: \"Disconnect\",\n      });\n      await confirmButton.click();\n\n      await expect(\n        firecrawlCard.getByRole(\"button\", { name: \"Connect\" })\n      ).toBeVisible({ timeout: 10000 });\n    });\n\n    test(\"should show replacement dropdown when disconnecting active content provider with alternatives\", async ({\n      page,\n    }) => {\n      // Firecrawl is active, Exa is also configured\n      const contentProviders = [\n        { ...FAKE_CONTENT_PROVIDERS.firecrawl },\n        { ...FAKE_CONTENT_PROVIDERS.exa },\n      ];\n      await mockWebSearchApis(page, [], contentProviders);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Web Crawler\", { timeout: 20000 });\n\n      const firecrawlCard = findProviderCard(page, \"Firecrawl\");\n      await firecrawlCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await firecrawlCard.hover();\n      const disconnectButton = firecrawlCard.getByRole(\"button\", {\n        name: \"Disconnect Firecrawl\",\n      });\n      await disconnectButton.click();\n\n      const confirmDialog = page.getByRole(\"dialog\");\n      await expect(confirmDialog).toBeVisible({ timeout: 5000 });\n\n      // Should show replacement dropdown\n      await expect(\n        confirmDialog.getByText(\"Search history will be preserved\")\n      ).toBeVisible();\n\n      // Disconnect should be enabled because first replacement is auto-selected\n      const confirmButton = confirmDialog.getByRole(\"button\", {\n        name: \"Disconnect\",\n      });\n      await expect(confirmButton).toBeEnabled();\n\n      await expectElementScreenshot(confirmDialog, {\n        name: \"web-search-disconnect-content-active-with-alt-modal\",\n      });\n    });\n\n    test(\"should not show disconnect for Onyx Web Crawler (built-in)\", async ({\n      page,\n    }) => {\n      await mockWebSearchApis(page, [], []);\n\n      await page.goto(WEB_SEARCH_URL);\n      await page.waitForSelector(\"text=Web Crawler\", { timeout: 20000 });\n\n      const onyxCard = findProviderCard(page, \"Onyx Web Crawler\");\n      await onyxCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const disconnectButton = onyxCard.getByRole(\"button\", {\n        name: \"Disconnect Onyx Web Crawler\",\n      });\n      await expect(disconnectButton).not.toBeVisible();\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/web-search/svc.ts",
    "content": "import type { Page, Locator } from \"@playwright/test\";\n\nexport const WEB_SEARCH_URL = \"/admin/configuration/web-search\";\n\nexport const FAKE_SEARCH_PROVIDERS = {\n  exa: {\n    id: 1,\n    name: \"Exa\",\n    provider_type: \"exa\",\n    is_active: true,\n    config: null,\n    has_api_key: true,\n  },\n  brave: {\n    id: 2,\n    name: \"Brave\",\n    provider_type: \"brave\",\n    is_active: false,\n    config: null,\n    has_api_key: true,\n  },\n};\n\nexport const FAKE_CONTENT_PROVIDERS = {\n  firecrawl: {\n    id: 10,\n    name: \"Firecrawl\",\n    provider_type: \"firecrawl\",\n    is_active: true,\n    config: { base_url: \"https://api.firecrawl.dev/v2/scrape\" },\n    has_api_key: true,\n  },\n  exa: {\n    id: 11,\n    name: \"Exa\",\n    provider_type: \"exa\",\n    is_active: false,\n    config: null,\n    has_api_key: true,\n  },\n};\n\nexport function findProviderCard(page: Page, providerLabel: string): Locator {\n  return page\n    .locator(\"div.rounded-16\")\n    .filter({ hasText: providerLabel })\n    .first();\n}\n\nexport function mainContainer(page: Page): Locator {\n  return page.locator(\"[data-main-container]\");\n}\n\nexport async function openProviderModal(\n  page: Page,\n  providerLabel: string\n): Promise<void> {\n  const card = findProviderCard(page, providerLabel);\n  await card.waitFor({ state: \"visible\", timeout: 10000 });\n\n  // First try to find the Connect button\n  const connectButton = card.getByRole(\"button\", { name: \"Connect\" });\n  if (await connectButton.isVisible({ timeout: 1000 }).catch(() => false)) {\n    await connectButton.click();\n    return;\n  }\n\n  // If no Connect button, click the Edit icon button to update credentials\n  const editButton = card.getByRole(\"button\", { name: /^Edit / });\n  await editButton.waitFor({ state: \"visible\", timeout: 5000 });\n  await editButton.click();\n}\n\nexport async function mockWebSearchApis(\n  page: Page,\n  searchProviders: (typeof FAKE_SEARCH_PROVIDERS)[keyof typeof FAKE_SEARCH_PROVIDERS][],\n  contentProviders: (typeof FAKE_CONTENT_PROVIDERS)[keyof typeof FAKE_CONTENT_PROVIDERS][]\n): Promise<void> {\n  await page.route(\n    \"**/api/admin/web-search/search-providers\",\n    async (route) => {\n      if (route.request().method() === \"GET\") {\n        await route.fulfill({ status: 200, json: searchProviders });\n      } else {\n        await route.continue();\n      }\n    }\n  );\n\n  await page.route(\n    \"**/api/admin/web-search/content-providers\",\n    async (route) => {\n      if (route.request().method() === \"GET\") {\n        await route.fulfill({ status: 200, json: contentProviders });\n      } else {\n        await route.continue();\n      }\n    }\n  );\n}\n"
  },
  {
    "path": "web/tests/e2e/admin/web-search/web_content_providers.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { WEB_SEARCH_URL, findProviderCard, openProviderModal } from \"./svc\";\n\ntest.describe(\"Web Content Provider Configuration\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    await page.goto(WEB_SEARCH_URL);\n    await page.waitForLoadState(\"networkidle\");\n\n    // Wait for page to fully load\n    await page.waitForSelector(\"text=Web Crawler\", { timeout: 20000 });\n\n    console.log(\"[web-content-test] Page loaded successfully\");\n  });\n\n  test.describe(\"Firecrawl Provider\", () => {\n    const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;\n\n    test.skip(\n      !FIRECRAWL_API_KEY,\n      \"FIRECRAWL_API_KEY environment variable not set\"\n    );\n\n    test(\"should configure Firecrawl as web crawler\", async ({ page }) => {\n      // Click Connect on the Firecrawl card (or key icon if already configured)\n      await openProviderModal(page, \"Firecrawl\");\n\n      const modalDialog = page.getByRole(\"dialog\");\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n      await expect(\n        page.getByText(\"Set up Firecrawl\", { exact: false })\n      ).toBeVisible();\n\n      // Firecrawl has a base URL field (shown first) and API key\n      const baseUrlInput = page.locator('input[placeholder=\"https://\"]');\n      await baseUrlInput.waitFor({ state: \"visible\", timeout: 5000 });\n      // Don't check value - it might have a custom value from previous config\n\n      // Enter API key - clear first in case modal opened with masked credentials.\n      const apiKeyInput = modalDialog.getByTestId(\"web-provider-api-key-input\");\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await apiKeyInput.clear();\n      await apiKeyInput.fill(FIRECRAWL_API_KEY!);\n\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[web-content-test] Clicked Connect, waiting for validation...\"\n      );\n\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n\n      console.log(\n        \"[web-content-test] Modal closed, verifying Firecrawl is active...\"\n      );\n\n      await page.waitForLoadState(\"networkidle\");\n\n      const firecrawlCard = findProviderCard(page, \"Firecrawl\");\n      await expect(\n        firecrawlCard.getByRole(\"button\", { name: \"Current Crawler\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\"[web-content-test] Firecrawl configured successfully\");\n    });\n\n    test(\"should switch back to Onyx Web Crawler from Firecrawl\", async ({\n      page,\n    }) => {\n      // First, ensure Firecrawl is configured and active\n      const firecrawlCard = findProviderCard(page, \"Firecrawl\");\n      await firecrawlCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const connectButton = firecrawlCard.getByRole(\"button\", {\n        name: \"Connect\",\n      });\n      const setDefaultButton = firecrawlCard.getByRole(\"button\", {\n        name: \"Set as Default\",\n      });\n\n      // Only configure if Connect button is visible (not already configured)\n      if (await connectButton.isVisible()) {\n        await connectButton.click();\n\n        const modalDialog = page.getByRole(\"dialog\");\n        await expect(modalDialog).toBeVisible({ timeout: 10000 });\n        await expect(\n          page.getByText(\"Set up Firecrawl\", { exact: false })\n        ).toBeVisible();\n\n        // Enter API key - clear first in case modal opened with masked credentials.\n        const apiKeyInput = modalDialog.getByTestId(\n          \"web-provider-api-key-input\"\n        );\n        await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await apiKeyInput.clear();\n        await apiKeyInput.fill(FIRECRAWL_API_KEY!);\n\n        await modalDialog\n          .getByRole(\"button\", { name: \"Connect\", exact: true })\n          .click();\n        await expect(modalDialog).not.toBeVisible({ timeout: 60000 });\n        await page.waitForLoadState(\"networkidle\");\n      } else if (await setDefaultButton.isVisible()) {\n        // If already configured but not active, set as default\n        await setDefaultButton.click();\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      // Verify Firecrawl is now the current crawler\n      const updatedFirecrawlCard = findProviderCard(page, \"Firecrawl\");\n      await expect(\n        updatedFirecrawlCard.getByRole(\"button\", { name: \"Current Crawler\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\n        \"[web-content-test] Firecrawl configured, now switching to Onyx Web Crawler...\"\n      );\n\n      // Switch to Onyx Web Crawler\n      const onyxCrawlerCard = findProviderCard(page, \"Onyx Web Crawler\");\n      await onyxCrawlerCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const onyxSetDefault = onyxCrawlerCard.getByRole(\"button\", {\n        name: \"Set as Default\",\n      });\n\n      if (await onyxSetDefault.isVisible()) {\n        await onyxSetDefault.click();\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      await expect(\n        onyxCrawlerCard.getByRole(\"button\", { name: \"Current Crawler\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\"[web-content-test] Switched back to Onyx Web Crawler\");\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/admin/web-search/web_search_providers.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { WEB_SEARCH_URL, findProviderCard, openProviderModal } from \"./svc\";\n\ntest.describe(\"Web Search Provider Configuration\", () => {\n  test.beforeEach(async ({ page }) => {\n    // Log in as admin before each test\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    // Navigate to web search config page\n    await page.goto(WEB_SEARCH_URL);\n    await page.waitForLoadState(\"networkidle\");\n\n    // Wait for page to fully load - look for the Search Engine section heading\n    await page.waitForSelector(\"text=Search Engine\", { timeout: 20000 });\n\n    console.log(\"[web-search-test] Page loaded successfully\");\n  });\n\n  test.describe(\"Exa Provider\", () => {\n    const EXA_API_KEY = process.env.EXA_API_KEY;\n\n    test.skip(!EXA_API_KEY, \"EXA_API_KEY environment variable not set\");\n\n    test.skip(\"should configure Exa as web search provider\", async ({\n      page,\n    }) => {\n      // Click Connect on the Exa card (or key icon if already configured)\n      await openProviderModal(page, \"Exa\");\n\n      // Wait for modal to open - Modal uses Radix Dialog with role=\"dialog\"\n      const modalDialog = page.getByRole(\"dialog\", { name: /set up exa/i });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      // Enter API key - clear first in case modal opened with masked credentials\n      // Note: PasswordInputTypeIn uses type=\"text\" with custom ∗ masking per design guidelines\n      const apiKeyInput = modalDialog.getByLabel(/api key/i);\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await apiKeyInput.clear();\n      await apiKeyInput.fill(EXA_API_KEY!);\n\n      // Click Connect in modal - scope to the dialog to avoid matching other Connect buttons\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[web-search-test] Clicked Connect, waiting for validation...\"\n      );\n\n      // Wait for modal to close\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n\n      console.log(\n        \"[web-search-test] Modal closed, verifying provider is active...\"\n      );\n\n      // Wait for page to update\n      await page.waitForLoadState(\"networkidle\");\n\n      // Verify Exa is now the current default - look for \"Current Default\" button in the Exa card\n      const exaCard = findProviderCard(page, \"Exa\");\n      await expect(\n        exaCard.getByRole(\"button\", { name: \"Current Default\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\"[web-search-test] Exa provider configured successfully\");\n    });\n  });\n\n  test.describe(\"Google PSE Provider\", () => {\n    const GOOGLE_PSE_API_KEY = process.env.GOOGLE_PSE_API_KEY;\n    const GOOGLE_PSE_SEARCH_ENGINE_ID = process.env.GOOGLE_PSE_SEARCH_ENGINE_ID;\n\n    test.skip(\n      !GOOGLE_PSE_API_KEY || !GOOGLE_PSE_SEARCH_ENGINE_ID,\n      \"GOOGLE_PSE_API_KEY or GOOGLE_PSE_SEARCH_ENGINE_ID environment variable not set\"\n    );\n\n    test(\"should configure Google PSE as web search provider\", async ({\n      page,\n    }) => {\n      // Click Connect on the Google PSE card\n      await openProviderModal(page, \"Google PSE\");\n\n      // Wait for modal to open\n      const modalDialog = page.getByRole(\"dialog\", {\n        name: /set up google pse/i,\n      });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      // Google PSE requires both Search Engine ID and API key\n      // Enter Search Engine ID\n      const searchEngineIdInput = page.locator(\n        'input[placeholder=\"Enter search engine ID\"]'\n      );\n      await searchEngineIdInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);\n\n      // Enter API key\n      const apiKeyInput = modalDialog.getByLabel(/api key/i);\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);\n\n      // Click Connect in modal\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[web-search-test] Clicked Connect for Google PSE, waiting for validation...\"\n      );\n\n      // Wait for modal to close\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n\n      console.log(\n        \"[web-search-test] Modal closed, verifying Google PSE is active...\"\n      );\n\n      // Wait for page to update\n      await page.waitForLoadState(\"networkidle\");\n\n      // Verify Google PSE is now the current default\n      const googleCard = findProviderCard(page, \"Google PSE\");\n      await expect(\n        googleCard.getByRole(\"button\", { name: \"Current Default\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\n        \"[web-search-test] Google PSE provider configured successfully\"\n      );\n    });\n\n    test(\"should reconnect with stored API key using update key button\", async ({\n      page,\n    }) => {\n      // First, configure Google PSE if not already configured\n      const googleCard = findProviderCard(page, \"Google PSE\");\n      await googleCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const connectButton = googleCard.getByRole(\"button\", { name: \"Connect\" });\n\n      // Only configure if Connect button is visible (not already configured)\n      if (await connectButton.isVisible()) {\n        await connectButton.click();\n        const setupDialog = page.getByRole(\"dialog\", {\n          name: /set up google pse/i,\n        });\n        await expect(setupDialog).toBeVisible({ timeout: 10000 });\n\n        const searchEngineIdInput = page.locator(\n          'input[placeholder=\"Enter search engine ID\"]'\n        );\n        await searchEngineIdInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);\n\n        const apiKeyInput = setupDialog.getByLabel(/api key/i);\n        await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);\n\n        await setupDialog\n          .getByRole(\"button\", { name: \"Connect\", exact: true })\n          .click();\n        await expect(setupDialog).not.toBeVisible({ timeout: 30000 });\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      console.log(\n        \"[web-search-test] Google PSE configured, now testing update key button...\"\n      );\n\n      // Now click the Edit icon button\n      const updatedGoogleCard = findProviderCard(page, \"Google PSE\");\n      const editButton = updatedGoogleCard.getByRole(\"button\", {\n        name: /^Edit /,\n      });\n      await expect(editButton).toBeVisible({ timeout: 10000 });\n      await editButton.click();\n\n      // Modal should open with masked API key\n      const modalDialog = page.getByRole(\"dialog\", {\n        name: /set up google pse/i,\n      });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      // Verify the API key input shows masked value\n      // PasswordInputTypeIn displays stored values with ∗ (ASTERISK OPERATOR) per design guidelines\n      const apiKeyInput = modalDialog.getByLabel(/api key/i);\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await expect(apiKeyInput).toHaveValue(\"∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗\");\n\n      // Immediately click Connect without changing anything\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[web-search-test] Clicked Connect with stored key, waiting for success...\"\n      );\n\n      // Wait for modal to close (success)\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n\n      console.log(\n        \"[web-search-test] Modal closed, verifying Google PSE is still active...\"\n      );\n\n      // Wait for page to update\n      await page.waitForLoadState(\"networkidle\");\n\n      // Verify Google PSE is still the current default\n      const finalGoogleCard = findProviderCard(page, \"Google PSE\");\n      await expect(\n        finalGoogleCard.getByRole(\"button\", { name: \"Current Default\" })\n      ).toBeVisible({ timeout: 15000 });\n\n      console.log(\n        \"[web-search-test] Successfully reconnected with stored API key\"\n      );\n    });\n\n    test(\"should fail when changing search engine ID with stored API key\", async ({\n      page,\n    }) => {\n      // First, configure Google PSE if not already configured\n      const googleCard = findProviderCard(page, \"Google PSE\");\n      await googleCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const connectButton = googleCard.getByRole(\"button\", { name: \"Connect\" });\n\n      // Only configure if Connect button is visible (not already configured)\n      if (await connectButton.isVisible()) {\n        await connectButton.click();\n        const setupDialog = page.getByRole(\"dialog\", {\n          name: /set up google pse/i,\n        });\n        await expect(setupDialog).toBeVisible({ timeout: 10000 });\n\n        const searchEngineIdInput = page.locator(\n          'input[placeholder=\"Enter search engine ID\"]'\n        );\n        await searchEngineIdInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);\n\n        const apiKeyInput = setupDialog.getByLabel(/api key/i);\n        await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);\n\n        await setupDialog\n          .getByRole(\"button\", { name: \"Connect\", exact: true })\n          .click();\n        await expect(setupDialog).not.toBeVisible({ timeout: 30000 });\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      console.log(\n        \"[web-search-test] Google PSE configured, now testing invalid search engine ID change...\"\n      );\n\n      // Now click the Edit icon button\n      const updatedGoogleCard = findProviderCard(page, \"Google PSE\");\n      const editButton = updatedGoogleCard.getByRole(\"button\", {\n        name: /^Edit /,\n      });\n      await expect(editButton).toBeVisible({ timeout: 10000 });\n      await editButton.click();\n\n      // Modal should open with masked API key\n      const modalDialog = page.getByRole(\"dialog\", {\n        name: /set up google pse/i,\n      });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      // Change the search engine ID to an invalid value\n      const searchEngineIdInput = page.locator(\n        'input[placeholder=\"Enter search engine ID\"]'\n      );\n      await searchEngineIdInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await searchEngineIdInput.clear();\n      await searchEngineIdInput.fill(\"invalid-search-engine-id\");\n\n      // Do NOT change the API key - keep the masked value\n      // PasswordInputTypeIn displays stored values with ∗ (ASTERISK OPERATOR) per design guidelines\n      const apiKeyInput = modalDialog.getByLabel(/api key/i);\n      await expect(apiKeyInput).toHaveValue(\"∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗∗\");\n\n      // Click Connect - should fail because search engine ID doesn't match the stored API key\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      console.log(\n        \"[web-search-test] Clicked Connect with invalid search engine ID, waiting for error...\"\n      );\n\n      // Should show error message\n      await expect(page.getByText(/failed|invalid|error/i).first()).toBeVisible(\n        { timeout: 20000 }\n      );\n\n      console.log(\n        \"[web-search-test] Error message displayed as expected for mismatched search engine ID\"\n      );\n    });\n  });\n\n  test.describe(\"Brave Provider\", () => {\n    const BRAVE_SEARCH_API_KEY = process.env.BRAVE_SEARCH_API_KEY;\n\n    test.skip(\n      !BRAVE_SEARCH_API_KEY,\n      \"BRAVE_SEARCH_API_KEY environment variable not set\"\n    );\n\n    test(\"should configure Brave as web search provider\", async ({ page }) => {\n      await openProviderModal(page, \"Brave\");\n\n      const modalDialog = page.getByRole(\"dialog\", { name: /set up brave/i });\n      await expect(modalDialog).toBeVisible({ timeout: 10000 });\n\n      const apiKeyInput = modalDialog.getByLabel(/api key/i);\n      await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n      await apiKeyInput.clear();\n      await apiKeyInput.fill(BRAVE_SEARCH_API_KEY!);\n\n      const modalConnectButton = modalDialog.getByRole(\"button\", {\n        name: \"Connect\",\n        exact: true,\n      });\n      await expect(modalConnectButton).toBeEnabled({ timeout: 5000 });\n      await modalConnectButton.click();\n\n      await expect(modalDialog).not.toBeVisible({ timeout: 30000 });\n      await page.waitForLoadState(\"networkidle\");\n\n      const braveCard = findProviderCard(page, \"Brave\");\n      await expect(\n        braveCard.getByRole(\"button\", { name: \"Current Default\" })\n      ).toBeVisible({ timeout: 15000 });\n    });\n  });\n\n  test.describe(\"Provider Switching\", () => {\n    // These tests require both providers to be configured\n    const EXA_API_KEY = process.env.EXA_API_KEY;\n    const GOOGLE_PSE_API_KEY = process.env.GOOGLE_PSE_API_KEY;\n    const GOOGLE_PSE_SEARCH_ENGINE_ID = process.env.GOOGLE_PSE_SEARCH_ENGINE_ID;\n\n    test.skip(\n      !EXA_API_KEY || !GOOGLE_PSE_API_KEY || !GOOGLE_PSE_SEARCH_ENGINE_ID,\n      \"Both EXA and Google PSE credentials required\"\n    );\n\n    test(\"should switch between configured providers\", async ({ page }) => {\n      // First, configure Exa if needed\n      const exaCard = findProviderCard(page, \"Exa\");\n      await exaCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      let connectButton = exaCard.getByRole(\"button\", { name: \"Connect\" });\n\n      // Only configure if Connect button is visible (not already configured)\n      if (await connectButton.isVisible()) {\n        await connectButton.click();\n        const exaDialog = page.getByRole(\"dialog\", { name: /set up exa/i });\n        await expect(exaDialog).toBeVisible({ timeout: 10000 });\n\n        const apiKeyInput = exaDialog.getByLabel(/api key/i);\n        await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await apiKeyInput.fill(EXA_API_KEY!);\n\n        await exaDialog\n          .getByRole(\"button\", { name: \"Connect\", exact: true })\n          .click();\n        await expect(exaDialog).not.toBeVisible({ timeout: 30000 });\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      // Configure Google PSE if needed\n      const googleCard = findProviderCard(page, \"Google PSE\");\n      await googleCard.waitFor({ state: \"visible\", timeout: 10000 });\n\n      connectButton = googleCard.getByRole(\"button\", { name: \"Connect\" });\n\n      if (await connectButton.isVisible()) {\n        await connectButton.click();\n        const googleDialog = page.getByRole(\"dialog\", {\n          name: /set up google pse/i,\n        });\n        await expect(googleDialog).toBeVisible({ timeout: 10000 });\n\n        const searchEngineIdInput = page.locator(\n          'input[placeholder=\"Enter search engine ID\"]'\n        );\n        await searchEngineIdInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await searchEngineIdInput.fill(GOOGLE_PSE_SEARCH_ENGINE_ID!);\n\n        const apiKeyInput = googleDialog.getByLabel(/api key/i);\n        await apiKeyInput.waitFor({ state: \"visible\", timeout: 5000 });\n        await apiKeyInput.fill(GOOGLE_PSE_API_KEY!);\n\n        await googleDialog\n          .getByRole(\"button\", { name: \"Connect\", exact: true })\n          .click();\n        await expect(googleDialog).not.toBeVisible({ timeout: 30000 });\n        await page.waitForLoadState(\"networkidle\");\n      }\n\n      // Now test switching - click \"Set as Default\" on whichever is not current\n      const exaSetDefault = exaCard.getByRole(\"button\", {\n        name: \"Set as Default\",\n      });\n      const googleSetDefault = googleCard.getByRole(\"button\", {\n        name: \"Set as Default\",\n      });\n\n      if (await exaSetDefault.isVisible()) {\n        console.log(\"[web-search-test] Switching to Exa as default...\");\n        await exaSetDefault.click();\n        await page.waitForLoadState(\"networkidle\");\n        await expect(\n          exaCard.getByRole(\"button\", { name: \"Current Default\" })\n        ).toBeVisible({ timeout: 15000 });\n        console.log(\"[web-search-test] Successfully switched to Exa\");\n      } else if (await googleSetDefault.isVisible()) {\n        console.log(\"[web-search-test] Switching to Google PSE as default...\");\n        await googleSetDefault.click();\n        await page.waitForLoadState(\"networkidle\");\n        await expect(\n          googleCard.getByRole(\"button\", { name: \"Current Default\" })\n        ).toBeVisible({ timeout: 15000 });\n        console.log(\"[web-search-test] Successfully switched to Google PSE\");\n      }\n    });\n  });\n\n  // TODO: @jessica - add Serper provider tests\n});\n"
  },
  {
    "path": "web/tests/e2e/agents/create_and_edit_agent.spec.ts",
    "content": "import { test, expect, Page, Browser } from \"@playwright/test\";\nimport { loginAs, loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\n// --- Locator Helper Functions ---\nconst getNameInput = (page: Page) => page.locator('input[name=\"name\"]');\nconst getDescriptionInput = (page: Page) =>\n  page.locator('textarea[name=\"description\"]');\nconst getInstructionsTextarea = (page: Page) =>\n  page.locator('textarea[name=\"instructions\"]');\nconst getReminderTextarea = (page: Page) =>\n  page.locator('textarea[name=\"reminders\"]');\nconst getKnowledgeToggle = (page: Page) =>\n  page.locator('button[role=\"switch\"][name=\"enable_knowledge\"]');\n\n// Helper function to set date using InputDatePicker (sets to today's date)\nconst setKnowledgeCutoffDate = async (page: Page) => {\n  // Find and click the date picker button within the Knowledge Cutoff Date section\n  const datePickerButton = page\n    .locator('label:has-text(\"Knowledge Cutoff Date\")')\n    .locator(\"..\")\n    .locator('button:has-text(\"Select Date\"), button:has-text(\"/\")');\n\n  await datePickerButton.click();\n\n  // Wait for the popover to open\n  await page.waitForSelector('[role=\"dialog\"]', {\n    state: \"visible\",\n    timeout: 5000,\n  });\n\n  // Click the \"Today\" button to set to today's date\n  const todayButton = page\n    .locator('[role=\"dialog\"]')\n    .getByRole(\"button\", { name: \"Today\" })\n    .first();\n  await todayButton.click();\n\n  // The popover should close automatically after selection\n  await page.waitForSelector('[role=\"dialog\"]', {\n    state: \"hidden\",\n    timeout: 5000,\n  });\n};\nconst getStarterMessageInput = (page: Page, index: number = 0) =>\n  page.locator(`input[name=\"starter_messages.${index}\"]`);\nconst getCreateSubmitButton = (page: Page) =>\n  page.locator('button[type=\"submit\"]:has-text(\"Create\")');\nconst getUpdateSubmitButton = (page: Page) =>\n  page.locator('button[type=\"submit\"]:has-text(\"Save\")');\n\n// Helper to navigate to document sets view in the new Knowledge UI\nconst navigateToDocumentSetsView = async (page: Page) => {\n  // First, check if we need to click \"View / Edit\" or \"Add\" button to open the knowledge panel\n  const viewEditButton = page.getByLabel(\"knowledge-view-edit\");\n  const addButton = page.getByLabel(\"knowledge-add-button\");\n\n  if (await viewEditButton.isVisible()) {\n    await viewEditButton.click();\n  } else if (await addButton.isVisible()) {\n    await addButton.click();\n  }\n\n  // Now click on \"Document Sets\" in the add view or sidebar\n  const documentSetsButton = page.getByLabel(\"knowledge-add-document-sets\");\n  if (await documentSetsButton.isVisible()) {\n    await documentSetsButton.click();\n  } else {\n    // Try the sidebar version\n    const sidebarDocumentSets = page.getByLabel(\n      \"knowledge-sidebar-document-sets\"\n    );\n    if (await sidebarDocumentSets.isVisible()) {\n      await sidebarDocumentSets.click();\n    }\n  }\n\n  // Wait for the document sets table to appear\n  await page.waitForTimeout(500);\n};\n\n// Helper to select a document set by ID in the new Knowledge UI\nconst selectDocumentSet = async (page: Page, documentSetId: number) => {\n  const documentSetRow = page.getByLabel(`document-set-row-${documentSetId}`);\n  await expect(documentSetRow).toBeVisible({ timeout: 5000 });\n  await documentSetRow.click();\n};\n\n// Helper to navigate to files view in the new Knowledge UI\nconst navigateToFilesView = async (page: Page) => {\n  // First, check if we need to click \"View / Edit\" or \"Add\" button to open the knowledge panel\n  const viewEditButton = page.getByLabel(\"knowledge-view-edit\");\n  const addButton = page.getByLabel(\"knowledge-add-button\");\n\n  if (await viewEditButton.isVisible()) {\n    await viewEditButton.click();\n  } else if (await addButton.isVisible()) {\n    await addButton.click();\n  }\n\n  // Now click on \"Your Files\" in the add view or sidebar\n  const filesButton = page.getByLabel(\"knowledge-add-files\");\n  if (await filesButton.isVisible()) {\n    await filesButton.click();\n  } else {\n    // Try the sidebar version\n    const sidebarFiles = page.getByLabel(\"knowledge-sidebar-files\");\n    if (await sidebarFiles.isVisible()) {\n      await sidebarFiles.click();\n    }\n  }\n\n  // Wait for the files table to appear\n  await page.waitForTimeout(500);\n};\n\ntest.describe(\"Assistant Creation and Edit Verification\", () => {\n  // Configure this entire suite to run serially\n  test.describe.configure({ mode: \"serial\" });\n\n  test.describe(\"User Files Only\", () => {\n    let userFilesAssistantId: number | null = null;\n\n    test.afterAll(async ({ browser }: { browser: Browser }) => {\n      if (userFilesAssistantId !== null) {\n        const context = await browser.newContext({\n          storageState: \"admin_auth.json\",\n        });\n        const page = await context.newPage();\n        const cleanupClient = new OnyxApiClient(page.request);\n        await cleanupClient.deleteAgent(userFilesAssistantId);\n        await context.close();\n        console.log(\n          \"[test] Cleanup completed - deleted User Files Only assistant\"\n        );\n      }\n    });\n\n    test(\"should create assistant with user files when no connectors exist @exclusive\", async ({\n      page,\n    }, testInfo) => {\n      await page.context().clearCookies();\n      await loginAsWorkerUser(page, testInfo.workerIndex);\n\n      const agentName = \"E2E User Files Assistant\";\n      const agentDescription = \"Testing user file uploads without connectors\";\n      const assistantInstructions = \"Help users with their documents.\";\n\n      await page.goto(\"/app/agents/create\");\n\n      // Fill in basic assistant details\n      await getNameInput(page).fill(agentName);\n      await getDescriptionInput(page).fill(agentDescription);\n      await getInstructionsTextarea(page).fill(assistantInstructions);\n\n      // Enable Knowledge toggle\n      const knowledgeToggle = getKnowledgeToggle(page);\n      await knowledgeToggle.scrollIntoViewIfNeeded();\n      await expect(knowledgeToggle).toHaveAttribute(\"aria-checked\", \"false\");\n      await knowledgeToggle.click();\n\n      // Navigate to files view in the new Knowledge UI\n      await navigateToFilesView(page);\n\n      // Verify \"Add File\" button is visible in the new UI\n      const addFileButton = page.getByRole(\"button\", {\n        name: /add file/i,\n      });\n      await expect(addFileButton).toBeVisible();\n\n      // Submit the assistant creation form\n      await getCreateSubmitButton(page).click();\n\n      // Verify redirection to chat page with the new assistant\n      await page.waitForURL(/.*\\/app\\?agentId=\\d+.*/);\n      const url = page.url();\n      const agentIdMatch = url.match(/agentId=(\\d+)/);\n      expect(agentIdMatch).toBeTruthy();\n\n      // Store assistant ID for cleanup\n      if (agentIdMatch) {\n        userFilesAssistantId = Number(agentIdMatch[1]);\n      }\n\n      console.log(\n        `[test] Successfully created assistant without connectors: ${agentName}`\n      );\n    });\n  });\n\n  test.describe(\"With Knowledge\", () => {\n    let ccPairId: number;\n    let documentSetId: number;\n    let knowledgeAssistantId: number | null = null;\n\n    test.afterAll(async ({ browser }: { browser: Browser }) => {\n      // Cleanup using browser fixture (worker-scoped) to avoid per-test fixture limitation\n      const context = await browser.newContext({\n        storageState: \"admin_auth.json\",\n      });\n      const page = await context.newPage();\n      const cleanupClient = new OnyxApiClient(page.request);\n\n      if (knowledgeAssistantId !== null) {\n        await cleanupClient.deleteAgent(knowledgeAssistantId);\n      }\n      if (ccPairId && documentSetId) {\n        await cleanupClient.deleteDocumentSet(documentSetId);\n        await cleanupClient.deleteCCPair(ccPairId);\n      }\n\n      await context.close();\n      console.log(\n        \"[test] Cleanup completed - deleted assistant, connector, and document set\"\n      );\n    });\n\n    test(\"should create and edit assistant with Knowledge enabled\", async ({\n      page,\n    }, testInfo) => {\n      // Login as admin to create connector and document set (requires admin permissions)\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n\n      // Create a connector and document set to enable the Knowledge toggle\n      const onyxApiClient = new OnyxApiClient(page.request);\n      ccPairId = await onyxApiClient.createFileConnector(\"Test Connector\");\n      documentSetId = await onyxApiClient.createDocumentSet(\n        \"Test Document Set\",\n        [ccPairId]\n      );\n\n      // Navigate to a page to ensure session is fully established\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      // Now login as a regular user to test the assistant creation\n      await page.context().clearCookies();\n      await loginAsWorkerUser(page, testInfo.workerIndex);\n\n      // --- Initial Values ---\n      const agentName = \"Test Assistant 1\";\n      const agentDescription = \"This is a test assistant description.\";\n      const assistantInstructions = \"These are the test instructions.\";\n      const assistantReminder = \"Initial reminder.\";\n      const assistantStarterMessage = \"Initial starter message?\";\n\n      // --- Edited Values ---\n      const editedAssistantName = \"Edited Assistant\";\n      const editedAssistantDescription = \"This is the edited description.\";\n      const editedAssistantInstructions = \"These are the edited instructions.\";\n      const editedAssistantReminder = \"Edited reminder.\";\n      const editedAssistantStarterMessage = \"Edited starter message?\";\n\n      // Navigate to the assistant creation page\n      await page.goto(\"/app/agents/create\");\n\n      // --- Fill in Initial Assistant Details ---\n      await getNameInput(page).fill(agentName);\n      await getDescriptionInput(page).fill(agentDescription);\n      await getInstructionsTextarea(page).fill(assistantInstructions);\n\n      // Reminder\n      await getReminderTextarea(page).fill(assistantReminder);\n\n      // Knowledge Cutoff Date\n      await setKnowledgeCutoffDate(page);\n\n      // Enable Knowledge toggle (should now be enabled due to connector)\n      const knowledgeToggle = getKnowledgeToggle(page);\n      await knowledgeToggle.scrollIntoViewIfNeeded();\n\n      // Verify toggle is NOT disabled\n      await expect(knowledgeToggle).not.toBeDisabled();\n      await knowledgeToggle.click();\n\n      // Navigate to document sets view and select the document set\n      await navigateToDocumentSetsView(page);\n      await selectDocumentSet(page, documentSetId);\n\n      // Starter Message\n      await getStarterMessageInput(page).fill(assistantStarterMessage);\n\n      // Submit the creation form\n      await getCreateSubmitButton(page).click();\n\n      // Verify redirection to chat page with the new assistant ID\n      await page.waitForURL(/.*\\/app\\?agentId=\\d+.*/);\n      const url = page.url();\n      const agentIdMatch = url.match(/agentId=(\\d+)/);\n      expect(agentIdMatch).toBeTruthy();\n      const agentId = agentIdMatch ? agentIdMatch[1] : null;\n      expect(agentId).not.toBeNull();\n      await expectScreenshot(page, {\n        name: \"welcome-page-with-assistant\",\n        hide: [\"[data-testid='AppInputBar/llm-popover-trigger']\"],\n      });\n\n      // Store assistant ID for cleanup\n      knowledgeAssistantId = Number(agentId);\n\n      // Navigate directly to the edit page\n      await page.goto(`/app/agents/edit/${agentId}`);\n      await page.waitForURL(`**/app/agents/edit/${agentId}`);\n\n      // Verify basic fields\n      await expect(getNameInput(page)).toHaveValue(agentName);\n      await expect(getDescriptionInput(page)).toHaveValue(agentDescription);\n      await expect(getInstructionsTextarea(page)).toHaveValue(\n        assistantInstructions\n      );\n\n      // Verify advanced fields\n      await expect(getReminderTextarea(page)).toHaveValue(assistantReminder);\n      // Knowledge toggle should be enabled since we have a connector\n      await expect(getKnowledgeToggle(page)).toHaveAttribute(\n        \"aria-checked\",\n        \"true\"\n      );\n      // Verify document set is selected by navigating to the document sets view\n      await navigateToDocumentSetsView(page);\n      const documentSetRow = page.getByLabel(\n        `document-set-row-${documentSetId}`\n      );\n      await expect(documentSetRow).toBeVisible();\n      // The row should have a checked checkbox (data-selected attribute)\n      await expect(documentSetRow).toHaveAttribute(\"data-selected\", \"true\");\n\n      await expect(getStarterMessageInput(page)).toHaveValue(\n        assistantStarterMessage\n      );\n\n      // --- Edit Assistant Details ---\n      await getNameInput(page).fill(editedAssistantName);\n      await getDescriptionInput(page).fill(editedAssistantDescription);\n      await getInstructionsTextarea(page).fill(editedAssistantInstructions);\n      await getReminderTextarea(page).fill(editedAssistantReminder);\n      await setKnowledgeCutoffDate(page);\n      await getStarterMessageInput(page).fill(editedAssistantStarterMessage);\n\n      // Submit the edit form\n      await getUpdateSubmitButton(page).click();\n\n      // Verify redirection back to the chat page\n      await page.waitForURL(/.*\\/app\\?agentId=\\d+.*/);\n      expect(page.url()).toContain(`agentId=${agentId}`);\n\n      // --- Navigate to Edit Page Again and Verify Edited Values ---\n      await page.goto(`/app/agents/edit/${agentId}`);\n      await page.waitForURL(`**/app/agents/edit/${agentId}`);\n\n      // Verify basic fields\n      await expect(getNameInput(page)).toHaveValue(editedAssistantName);\n      await expect(getDescriptionInput(page)).toHaveValue(\n        editedAssistantDescription\n      );\n      await expect(getInstructionsTextarea(page)).toHaveValue(\n        editedAssistantInstructions\n      );\n\n      // Verify advanced fields\n      await expect(getReminderTextarea(page)).toHaveValue(\n        editedAssistantReminder\n      );\n      await expect(getKnowledgeToggle(page)).toHaveAttribute(\n        \"aria-checked\",\n        \"true\"\n      );\n      // Verify document set is still selected after edit\n      await navigateToDocumentSetsView(page);\n      const documentSetRowAfterEdit = page.getByLabel(\n        `document-set-row-${documentSetId}`\n      );\n      await expect(documentSetRowAfterEdit).toBeVisible();\n      await expect(documentSetRowAfterEdit).toHaveAttribute(\n        \"data-selected\",\n        \"true\"\n      );\n\n      await expect(getStarterMessageInput(page)).toHaveValue(\n        editedAssistantStarterMessage\n      );\n\n      console.log(\n        `[test] Successfully tested Knowledge-enabled assistant: ${agentName}`\n      );\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/agents/llm_provider_rbac.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { Page } from \"@playwright/test\";\nimport { loginAsRandomUser, loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n/**\n * This test verifies that LLM Provider RBAC works correctly in the assistant editor.\n *\n * Test scenario:\n * 1. Create a restricted LLM provider (not public, assigned to specific group)\n * 2. Create a user who doesn't have access to the restricted provider\n * 3. Navigate to assistant creation page\n * 4. Verify the restricted provider doesn't appear in the LLM selector\n */\n\nconst getDefaultModelSelector = (page: Page) =>\n  page\n    .locator(\n      'button:has-text(\"User Default\"), button:has-text(\"System Default\")'\n    )\n    .first();\n\nconst getLLMProviderOptions = async (page: Page) => {\n  // Click the selector to open the dropdown\n  await getDefaultModelSelector(page).click();\n\n  // Wait for the dropdown to be visible\n  await page.waitForSelector('[role=\"option\"]', { state: \"visible\" });\n\n  // Get all visible options\n  const options = await page.locator('[role=\"option\"]').allTextContents();\n\n  // Close the dropdown by clicking elsewhere\n  await page.keyboard.press(\"Escape\");\n\n  return options;\n};\n\ntest(\"Restricted LLM Provider should not appear for unauthorized users\", async ({\n  page,\n}) => {\n  await page.context().clearCookies();\n\n  // Step 1: Login as admin to create test fixtures\n  await loginAs(page, \"admin\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Step 2: Create a user group that will have access to the restricted provider\n  const restrictedGroupName = `Restricted Group ${Date.now()}`;\n  let groupId: number | null = null;\n  let providerId: number | null = null;\n\n  const client = new OnyxApiClient(page.request);\n\n  try {\n    groupId = await client.createUserGroup(restrictedGroupName);\n    console.log(`Created user group with ID: ${groupId}`);\n\n    // Step 3: Create a restricted LLM provider assigned to that group\n    const restrictedProviderName = `Restricted Provider ${Date.now()}`;\n    providerId = await client.createRestrictedProvider(\n      restrictedProviderName,\n      groupId\n    );\n    console.log(\n      `Created restricted provider \"${restrictedProviderName}\" with ID: ${providerId}`\n    );\n\n    // Step 4: Logout and login as a random user (who won't be in the restricted group)\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    // Step 5: Navigate to the assistant creation page\n    await page.goto(\"/app/agents/create\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Step 6: Scroll to the Default Model section\n    const defaultModelSection = page.locator(\"text=Default Model\").first();\n    await defaultModelSection.scrollIntoViewIfNeeded();\n\n    // Step 7: Get all available LLM provider options\n    const llmOptions = await getLLMProviderOptions(page);\n\n    // Step 8: Verify that we have some options (at least the default provider)\n    expect(llmOptions.length).toBeGreaterThan(0);\n\n    // Step 9: Verify the restricted provider does NOT appear\n    const hasRestrictedProvider = llmOptions.some((option) =>\n      option.includes(restrictedProviderName)\n    );\n    expect(hasRestrictedProvider).toBe(false);\n\n    // Step 10: Verify that default/public providers DO appear\n    const hasDefaultOption = llmOptions.some(\n      (option) =>\n        option.includes(\"Default\") ||\n        option.includes(\"GPT\") ||\n        option.includes(\"Claude\")\n    );\n    expect(hasDefaultOption).toBe(true);\n\n    console.log(\n      `✓ Verified restricted provider \"${restrictedProviderName}\" does not appear for unauthorized user`\n    );\n  } finally {\n    // Cleanup: Login as admin again to delete test fixtures\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await page.waitForLoadState(\"networkidle\");\n\n    if (providerId) {\n      await client.deleteProvider(providerId);\n      console.log(`Deleted provider with ID: ${providerId}`);\n    }\n\n    if (groupId) {\n      await client.deleteUserGroup(groupId);\n      console.log(`Deleted user group with ID: ${groupId}`);\n    }\n  }\n});\n\ntest(\"Default Model selector shows available models\", async ({ page }) => {\n  await page.context().clearCookies();\n  await loginAsRandomUser(page);\n\n  // Navigate to the assistant creation page\n  await page.goto(\"/app/agents/create\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Scroll to the Default Model section\n  const defaultModelSection = page.locator(\"text=Default Model\").first();\n  await defaultModelSection.scrollIntoViewIfNeeded();\n\n  // Open the model selector\n  await getDefaultModelSelector(page).click();\n  await page.waitForSelector('[role=\"option\"]', { state: \"visible\" });\n\n  // Get all options\n  const options = await page.locator('[role=\"option\"]').allTextContents();\n\n  // Close dropdown\n  await page.keyboard.press(\"Escape\");\n\n  // Verify we have at least the default option\n  expect(options.length).toBeGreaterThan(0);\n\n  // Verify the default/system default option exists\n  const hasDefaultOption = options.some((option) =>\n    option.toLowerCase().includes(\"default\")\n  );\n  expect(hasDefaultOption).toBeTruthy();\n});\n"
  },
  {
    "path": "web/tests/e2e/agents/user_file_attachment.spec.ts",
    "content": "import { test, expect, Page } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\n\n/**\n * E2E test to verify user files are properly attached to assistants.\n *\n * This test prevents a regression where user_file_ids were not being saved\n * when creating an assistant, causing uploaded files to not be associated\n * with the persona in the database.\n */\n\n// --- Locator Helper Functions ---\nconst getNameInput = (page: Page) => page.locator('input[name=\"name\"]');\nconst getDescriptionInput = (page: Page) =>\n  page.locator('textarea[name=\"description\"]');\nconst getInstructionsTextarea = (page: Page) =>\n  page.locator('textarea[name=\"instructions\"]');\nconst getKnowledgeToggle = (page: Page) =>\n  page.locator('button[role=\"switch\"][name=\"enable_knowledge\"]');\nconst getCreateSubmitButton = (page: Page) =>\n  page.locator('button[type=\"submit\"]:has-text(\"Create\")');\n\nconst extractAssistantIdFromCreateResponse = (\n  payload: Record<string, unknown> | null\n): number | null => {\n  if (!payload) {\n    return null;\n  }\n  const rawId = payload.id ?? payload.assistant_id ?? payload.persona_id;\n  if (typeof rawId === \"number\" && Number.isFinite(rawId)) {\n    return rawId;\n  }\n  if (typeof rawId === \"string\") {\n    const parsed = Number(rawId);\n    if (Number.isFinite(parsed)) {\n      return parsed;\n    }\n  }\n  return null;\n};\n\nconst createAgentAndGetId = async (page: Page): Promise<number> => {\n  const createResponsePromise = page.waitForResponse(\n    (response) => {\n      if (response.request().method() !== \"POST\" || !response.ok()) {\n        return false;\n      }\n      try {\n        const pathname = new URL(response.url()).pathname;\n        return /^\\/api\\/persona\\/?$/.test(pathname);\n      } catch {\n        return false;\n      }\n    },\n    { timeout: 30000 }\n  );\n\n  await getCreateSubmitButton(page).click();\n\n  const createResponse = await createResponsePromise;\n\n  await page.waitForURL(\n    (url) => {\n      const href = typeof url === \"string\" ? url : url.toString();\n      return /\\/app\\?agentId=\\d+/.test(href) || /\\/app\\?chatId=/.test(href);\n    },\n    { timeout: 20000 }\n  );\n\n  const agentIdFromUrl = page.url().match(/agentId=(\\d+)/);\n  if (agentIdFromUrl?.[1]) {\n    return Number(agentIdFromUrl[1]);\n  }\n\n  const createPayload = (await createResponse\n    .json()\n    .catch(() => null)) as Record<string, unknown> | null;\n  const agentIdFromResponse =\n    extractAssistantIdFromCreateResponse(createPayload);\n  if (agentIdFromResponse !== null) {\n    return agentIdFromResponse;\n  }\n\n  throw new Error(\n    `Assistant ID missing from URL (${page.url()}) and create response payload`\n  );\n};\n\n// Helper to navigate to files view in the Knowledge UI\nconst navigateToFilesView = async (page: Page) => {\n  // Check if we need to click \"View / Edit\" or \"Add\" button to open the knowledge panel\n  const viewEditButton = page.getByLabel(\"knowledge-view-edit\");\n  const addButton = page.getByLabel(\"knowledge-add-button\");\n\n  if (await viewEditButton.isVisible()) {\n    await viewEditButton.click();\n  } else if (await addButton.isVisible()) {\n    await addButton.click();\n  }\n\n  // Click on \"Your Files\" in the add view or sidebar\n  const filesButton = page.getByLabel(\"knowledge-add-files\");\n  if (await filesButton.isVisible()) {\n    await filesButton.click();\n  } else {\n    // Try the sidebar version\n    const sidebarFiles = page.getByLabel(\"knowledge-sidebar-files\");\n    if (await sidebarFiles.isVisible()) {\n      await sidebarFiles.click();\n    }\n  }\n\n  // Wait for the files table to appear\n  await page.waitForTimeout(500);\n};\n\n// Helper to upload a file through the knowledge panel\nasync function uploadTestFile(\n  page: Page,\n  fileName: string,\n  content: string,\n  maxRetries: number = 3\n): Promise<string> {\n  const buffer = Buffer.from(content, \"utf-8\");\n\n  for (let attempt = 1; attempt <= maxRetries; attempt++) {\n    try {\n      console.log(`[test] Upload attempt ${attempt} for ${fileName}`);\n\n      // Find the Add File button\n      const addFileButton = page.getByRole(\"button\", { name: /add file/i });\n      await expect(addFileButton).toBeVisible({ timeout: 5000 });\n      await expect(addFileButton).toBeEnabled({ timeout: 5000 });\n\n      // Set up file chooser listener before clicking\n      const fileChooserPromise = page.waitForEvent(\"filechooser\", {\n        timeout: 5000,\n      });\n      await addFileButton.click();\n      const fileChooser = await fileChooserPromise;\n\n      // Wait for upload API completion to avoid racing the UI refresh.\n      const uploadResponsePromise = page.waitForResponse(\n        (response) =>\n          response.url().includes(\"/api/user/projects/file/upload\") &&\n          response.request().method() === \"POST\",\n        { timeout: 15000 }\n      );\n\n      // Upload the file\n      await fileChooser.setFiles({\n        name: fileName,\n        mimeType: \"text/plain\",\n        buffer: buffer,\n      });\n      const uploadResponse = await uploadResponsePromise;\n      expect(uploadResponse.ok()).toBeTruthy();\n\n      // Wait for network to settle after upload\n      await page.waitForLoadState(\"networkidle\", { timeout: 10000 });\n\n      // Wait a moment for the UI to update\n      await page.waitForTimeout(500);\n\n      // Wait for the uploaded file row to appear.\n      const fileRow = page\n        .locator('[aria-label^=\"user-file-row-\"]')\n        .filter({ hasText: fileName })\n        .first();\n      await expect(fileRow).toBeVisible({ timeout: 10000 });\n\n      console.log(`[test] Successfully uploaded ${fileName}`);\n\n      // Return the file name for verification later\n      return fileName;\n    } catch (error) {\n      console.log(\n        `[test] Upload attempt ${attempt} failed: ${\n          error instanceof Error ? error.message : \"unknown error\"\n        }`\n      );\n      if (attempt === maxRetries) {\n        throw error;\n      }\n      await page.waitForTimeout(1000);\n    }\n  }\n\n  throw new Error(\n    `Failed to upload file ${fileName} after ${maxRetries} attempts`\n  );\n}\n\n// Helper to select a file by clicking its row\nasync function selectFileByName(page: Page, fileName: string): Promise<void> {\n  const fileNameWithoutExt = fileName.replace(\".txt\", \"\");\n\n  // Try to find and click the row containing the file name\n  // First try by aria-label\n  let fileRow = page.locator(`[aria-label^=\"user-file-row-\"]`, {\n    has: page.locator(`text=${fileNameWithoutExt}`),\n  });\n\n  if ((await fileRow.count()) === 0) {\n    // Fall back to finding by table-row-layout class\n    fileRow = page.locator(\"[data-selected]\", {\n      has: page.locator(`text=${fileNameWithoutExt}`),\n    });\n  }\n\n  if ((await fileRow.count()) === 0) {\n    // Last resort: find any clickable row with the file name\n    fileRow = page\n      .locator(\"div\", {\n        has: page.locator(`text=${fileNameWithoutExt}`),\n      })\n      .filter({\n        has: page.locator('[role=\"checkbox\"], input[type=\"checkbox\"]'),\n      })\n      .first();\n  }\n\n  if ((await fileRow.count()) > 0) {\n    await fileRow.click();\n  } else {\n    // Just click on the file name text itself\n    await page.locator(`text=${fileNameWithoutExt}`).first().click();\n  }\n\n  // Wait for the selection to register\n  await page.waitForTimeout(300);\n  console.log(`[test] Selected file: ${fileName}`);\n}\n\ntest.describe(\"User File Attachment to Assistant\", () => {\n  // Run serially to avoid session conflicts between parallel workers\n  test.describe.configure({ mode: \"serial\", retries: 1 });\n\n  test(\"should persist user file attachment after creating assistant\", async ({\n    page,\n  }: {\n    page: Page;\n  }) => {\n    // Login as a random user (no admin needed for user files)\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    const agentName = `User File Test ${Date.now()}`;\n    const agentDescription = \"Testing user file persistence\";\n    const assistantInstructions = \"Help users with their uploaded files.\";\n    const testFileName = `test-file-${Date.now()}.txt`;\n    const testFileContent =\n      \"This is test content for the user file attachment test.\";\n\n    // Navigate to assistant creation page\n    await page.goto(\"/app/agents/create\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Fill in basic assistant details\n    await getNameInput(page).fill(agentName);\n    await getDescriptionInput(page).fill(agentDescription);\n    await getInstructionsTextarea(page).fill(assistantInstructions);\n\n    // Enable Knowledge toggle\n    const knowledgeToggle = getKnowledgeToggle(page);\n    await knowledgeToggle.scrollIntoViewIfNeeded();\n    await expect(knowledgeToggle).toHaveAttribute(\"aria-checked\", \"false\");\n    await knowledgeToggle.click();\n    await expect(knowledgeToggle).toHaveAttribute(\"aria-checked\", \"true\");\n\n    // Navigate to files view in the Knowledge UI\n    await navigateToFilesView(page);\n\n    // Upload a test file - this automatically adds it to user_file_ids\n    await uploadTestFile(page, testFileName, testFileContent);\n\n    // NOTE: We do NOT call selectFileByName here because uploadTestFile\n    // already adds the file to user_file_ids. Clicking again would toggle it OFF.\n\n    // Verify file appears in the UI (use first() since file may appear in multiple places)\n    const fileText = page.getByText(testFileName).first();\n    await expect(fileText).toBeVisible();\n\n    // Submit the assistant creation form and resolve assistant ID from URL or API response.\n    const agentId = await createAgentAndGetId(page);\n\n    console.log(\n      `[test] Created assistant ${agentName} with ID ${agentId}, now verifying file persistence...`\n    );\n\n    // Navigate to the edit page for the assistant\n    await page.goto(`/app/agents/edit/${agentId}`);\n    await page.waitForURL(`**/app/agents/edit/${agentId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    // Verify knowledge toggle is still enabled\n    await expect(getKnowledgeToggle(page)).toHaveAttribute(\n      \"aria-checked\",\n      \"true\"\n    );\n\n    // Navigate to files view\n    await navigateToFilesView(page);\n\n    // Wait for files to load\n    await page.waitForTimeout(1000);\n\n    // Verify the uploaded file still appears and is selected\n    const fileNameWithoutExt = testFileName.replace(\".txt\", \"\");\n    const fileTextAfterEdit = page\n      .locator(`text=${fileNameWithoutExt}`)\n      .first();\n    await expect(fileTextAfterEdit).toBeVisible({ timeout: 10000 });\n\n    // Wait for UI to fully render the selection state\n    await page.waitForTimeout(500);\n\n    // Verify the file row has data-selected=\"true\" (indicating it's attached to the assistant)\n    // This confirms: user_file_ids were saved when creating the assistant,\n    // and they're correctly loaded and displayed when editing\n    const fileRowAfterEdit = page.locator(\"[data-selected='true']\", {\n      has: page.locator(`text=${fileNameWithoutExt}`),\n    });\n\n    await expect(fileRowAfterEdit).toBeVisible({ timeout: 5000 });\n\n    console.log(\n      `[test] Successfully verified user file ${testFileName} is persisted and selected for assistant ${agentName}`\n    );\n  });\n\n  test(\"should persist multiple user files after editing assistant\", async ({\n    page,\n  }: {\n    page: Page;\n  }) => {\n    // Login as a random user\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    const agentName = `Multi-File Test ${Date.now()}`;\n    const testFileName1 = `test-file-1-${Date.now()}.txt`;\n    const testFileName2 = `test-file-2-${Date.now()}.txt`;\n    const testFileContent = \"Test content for multi-file test.\";\n\n    // Navigate to assistant creation page\n    await page.goto(\"/app/agents/create\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Fill in basic assistant details\n    await getNameInput(page).fill(agentName);\n    await getDescriptionInput(page).fill(\"Testing multiple user files\");\n    await getInstructionsTextarea(page).fill(\"Help with multiple files.\");\n\n    // Enable Knowledge toggle\n    const knowledgeToggle = getKnowledgeToggle(page);\n    await knowledgeToggle.scrollIntoViewIfNeeded();\n    await knowledgeToggle.click();\n\n    // Navigate to files view\n    await navigateToFilesView(page);\n\n    // Upload first file - automatically adds to user_file_ids\n    await uploadTestFile(page, testFileName1, testFileContent);\n\n    // Upload second file - automatically adds to user_file_ids\n    await uploadTestFile(page, testFileName2, testFileContent);\n\n    // NOTE: We do NOT call selectFileByName because uploadTestFile\n    // already adds files to user_file_ids. Clicking would toggle them OFF.\n\n    // Create the assistant and resolve assistant ID from URL or API response.\n    const agentId = await createAgentAndGetId(page);\n\n    // Go to edit page\n    await page.goto(`/app/agents/edit/${agentId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    // Navigate to files view\n    await navigateToFilesView(page);\n\n    // Wait for files to load\n    await page.waitForTimeout(1000);\n\n    // Verify both files are visible and selected\n    // This confirms: user_file_ids were saved when creating the assistant,\n    // and they're correctly loaded and displayed when editing\n    for (const fileName of [testFileName1, testFileName2]) {\n      const fileNameWithoutExt = fileName.replace(\".txt\", \"\");\n      const fileText = page.locator(`text=${fileNameWithoutExt}`).first();\n      await expect(fileText).toBeVisible({ timeout: 10000 });\n\n      // Verify the file is selected (data-selected=\"true\")\n      const fileRow = page.locator(\"[data-selected='true']\", {\n        has: page.locator(`text=${fileNameWithoutExt}`),\n      });\n      await expect(fileRow).toBeVisible({ timeout: 5000 });\n    }\n\n    console.log(\n      `[test] Successfully verified multiple user files are persisted for assistant ${agentName}`\n    );\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/auth/email_verification.spec.ts",
    "content": "/**\n * E2E Test: Email Verification Success Flow\n * Tests that the login page displays verification success message when redirected from email verification\n */\nimport { test, expect } from \"@playwright/test\";\n\ntest(\"Login page shows verification success message after email verification\", async ({\n  page,\n}) => {\n  // Clear cookies so we hit the login page as an unauthenticated user\n  await page.context().clearCookies();\n\n  // Navigate to login page with verified=true query param (simulating redirect from email verification)\n  await page.goto(\"/auth/login?verified=true\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Verify the success message is visible\n  await expect(\n    page.getByText(\"Your email has been verified! Please sign in to continue.\")\n  ).toBeVisible();\n\n  // Verify normal login page elements are still present\n  await expect(page.getByTestId(\"email\")).toBeVisible();\n  await expect(page.getByTestId(\"password\")).toBeVisible();\n});\n"
  },
  {
    "path": "web/tests/e2e/auth/login.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport {\n  TEST_ADMIN_CREDENTIALS,\n  workerUserCredentials,\n} from \"@tests/e2e/constants\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\n// These tests exercise the browser login UI.\n// They clear cookies to start unauthenticated, then drive the login form.\n\ntest.describe(\"Login flow\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n  });\n\n  test(\"Login page renders email and password fields\", async ({ page }) => {\n    await page.goto(\"/auth/login\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await expect(page.getByTestId(\"email\")).toBeVisible();\n    await expect(page.getByTestId(\"password\")).toBeVisible();\n    await expect(page.getByRole(\"button\", { name: \"Sign In\" })).toBeVisible();\n\n    // Capture the login page UI\n    await expectScreenshot(page, { name: \"login-page-initial\" });\n  });\n\n  test(\"User can log in with valid credentials\", async ({ page }) => {\n    const { email, password } = TEST_ADMIN_CREDENTIALS;\n\n    await page.goto(\"/auth/login\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(email);\n    await page.getByTestId(\"password\").fill(password);\n    await page.getByRole(\"button\", { name: \"Sign In\" }).click();\n\n    await expect(page).toHaveURL(/\\/app/);\n\n    // Verify the session is valid\n    const me = await page.request.get(\"/api/me\");\n    expect(me.ok()).toBe(true);\n    const body = await me.json();\n    expect(body.email).toBe(email);\n  });\n\n  test(\"Login fails with invalid password\", async ({ page }) => {\n    await page.goto(\"/auth/login\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(workerUserCredentials(0).email);\n    await page.getByTestId(\"password\").fill(\"WrongPassword123!\");\n    await page.getByRole(\"button\", { name: \"Sign In\" }).click();\n\n    // Wait for error message to appear (use exact match to avoid duplicate selector)\n    await expect(\n      page.getByText(\"Invalid email or password\", { exact: true })\n    ).toBeVisible();\n\n    // Capture the error state\n    await expectScreenshot(page, { name: \"login-invalid-password-error\" });\n\n    // Should stay on the login page\n    await expect(page).toHaveURL(/\\/auth\\/login/);\n\n    // Should not be authenticated\n    const me = await page.request.get(\"/api/me\");\n    expect(me.ok()).toBe(false);\n  });\n\n  test(\"Login fails with non-existent user\", async ({ page }) => {\n    await page.goto(\"/auth/login\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(\"nonexistent@example.com\");\n    await page.getByTestId(\"password\").fill(\"SomePassword123!\");\n    await page.getByRole(\"button\", { name: \"Sign In\" }).click();\n\n    // Wait for error message to appear (use exact match to avoid duplicate selector)\n    await expect(\n      page.getByText(\"Invalid email or password\", { exact: true })\n    ).toBeVisible();\n\n    // Capture the error state\n    await expectScreenshot(page, { name: \"login-nonexistent-user-error\" });\n\n    // Should stay on the login page\n    await expect(page).toHaveURL(/\\/auth\\/login/);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/auth/password_managements.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAsRandomUser, loginAs } from \"@tests/e2e/utils/auth\";\nimport {\n  TEST_ADMIN2_CREDENTIALS,\n  TEST_ADMIN_CREDENTIALS,\n} from \"@tests/e2e/constants\";\n\n// test(\"User changes password and logs in with new password\", async ({\n\n// Skip this test for now\ntest.skip(\"User changes password and logs in with new password\", async ({\n  page,\n}) => {\n  // Clear browser context before starting the test\n  await page.context().clearCookies();\n  await page.context().clearPermissions();\n\n  const { email: uniqueEmail, password: initialPassword } =\n    await loginAsRandomUser(page);\n  const newPassword = \"newPassword456!\";\n\n  // Navigate to user settings\n  await page.click(\"#onyx-user-dropdown\");\n  await page.getByText(\"User Settings\").click();\n  await page.getByRole(\"button\", { name: \"Password\" }).click();\n\n  // Change password\n  await page.getByLabel(\"Current Password\").fill(initialPassword);\n  await page.getByLabel(\"New Password\", { exact: true }).fill(newPassword);\n  await page.getByLabel(\"Confirm New Password\").fill(newPassword);\n  await page.getByRole(\"button\", { name: \"Change Password\" }).click();\n\n  // Verify password change success message\n  await expect(page.getByText(\"Password changed successfully\")).toBeVisible();\n\n  // Log out\n  await page.getByRole(\"button\", { name: \"Close modal\", exact: true }).click();\n  await page.click(\"#onyx-user-dropdown\");\n  await page.getByText(\"Log out\").click();\n\n  // Log in with new password\n  await page.goto(\"/auth/login\");\n  await page.getByTestId(\"email\").fill(uniqueEmail);\n  await page.getByTestId(\"password\").fill(newPassword);\n  await page.getByRole(\"button\", { name: \"Log In\" }).click();\n\n  // Verify successful login\n  await expect(page).toHaveURL(\"http://localhost:3000/app\");\n  await expect(page.getByText(\"Explore Agents\")).toBeVisible();\n});\n\ntest.use({ storageState: \"admin2_auth.json\" });\n\n// Skip this test for now\ntest.skip(\"Admin resets own password and logs in with new password\", async ({\n  page,\n}) => {\n  const { email: adminEmail, password: adminPassword } =\n    TEST_ADMIN2_CREDENTIALS;\n  // Navigate to admin panel\n  await page.goto(\"/admin/indexing/status\");\n\n  // Check if redirected to login page\n  if (page.url().includes(\"/auth/login\")) {\n    await loginAs(page, \"admin2\");\n  }\n\n  // Navigate to Users page in admin panel\n  await page.goto(\"/admin/users\");\n\n  await page.waitForTimeout(500);\n  // Find the admin user and click on it\n  // Log current URL\n  console.log(\"Current URL:\", page.url());\n  // Log current rows\n  const rows = await page.$$eval(\"tr\", (rows) =>\n    rows.map((row) => row.textContent)\n  );\n  console.log(\"Current rows:\", rows);\n\n  // Log admin email we're looking for\n  console.log(\"Admin email:\", adminEmail);\n\n  // Attempt to find and click the row\n  await page\n    .getByRole(\"row\", { name: adminEmail + \" Active\" })\n    .getByRole(\"button\")\n    .click();\n\n  await page.waitForTimeout(500);\n  // Reset password\n  await page.getByRole(\"button\", { name: \"Reset Password\" }).click();\n  await page.getByRole(\"button\", { name: \"Reset Password\" }).click();\n\n  // Copy the new password\n  const newPasswordElement = page.getByTestId(\"new-password\");\n  const newPassword = await newPasswordElement.textContent();\n  if (!newPassword) {\n    throw new Error(\"New password not found\");\n  }\n\n  // Close the modal\n  await page.getByLabel(\"Close modal\").click();\n\n  // Log out\n  await page.click(\"#onyx-user-dropdown\");\n  await page.getByText(\"Log out\").click();\n\n  // Log in with new password\n  await page.goto(\"/auth/login\");\n  await page.getByTestId(\"email\").fill(adminEmail);\n  await page.getByTestId(\"password\").fill(newPassword);\n\n  await page.getByRole(\"button\", { name: \"Log In\" }).click();\n\n  // Verify successful login\n  await expect(page).toHaveURL(\"http://localhost:3000/app\");\n  await expect(page.getByText(\"Explore Agents\")).toBeVisible();\n});\n"
  },
  {
    "path": "web/tests/e2e/auth/pat_management.spec.ts",
    "content": "/**\n * E2E Test: Personal Access Token (PAT) Management\n * Tests complete user flow: login → create → authenticate → delete\n */\nimport { test, expect } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\n\ntest(\"PAT Complete Workflow\", async ({ page }, testInfo) => {\n  // Skip in admin project - we test with fresh user auth\n  test.skip(\n    testInfo.project.name === \"admin\",\n    \"Test requires clean user auth state\"\n  );\n\n  await page.context().clearCookies();\n  const { email } = await loginAsRandomUser(page);\n\n  await page.goto(\"/app\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Click on user dropdown and open settings (same pattern as other tests)\n  await page.locator(\"#onyx-user-dropdown\").click();\n  await page.getByText(\"User Settings\").first().click();\n\n  // Wait for settings modal to appear (first page has \"Full Name\" section)\n  await expect(page.getByText(\"Full Name\")).toBeVisible();\n\n  await page\n    .locator('a[href=\"/app/settings/accounts-access\"]')\n    .click({ force: true });\n\n  // Wait for PAT page to load (button is unique to the PAT section)\n  await expect(page.locator('button:has-text(\"New Access Token\")')).toBeVisible(\n    {\n      timeout: 10000,\n    }\n  );\n\n  await page.locator('button:has-text(\"New Access Token\")').first().click();\n\n  const tokenName = `E2E Test Token ${Date.now()}`;\n  const nameInput = page\n    .locator('input[placeholder*=\"Name your token\"]')\n    .first();\n  await nameInput.fill(tokenName);\n\n  // Click the Radix UI combobox for expiration (not a select element)\n  const expirationCombobox = page.locator(\n    'button[role=\"combobox\"][aria-label*=\"expiration\"]'\n  );\n  if (await expirationCombobox.isVisible()) {\n    await expirationCombobox.click();\n    // Wait for dropdown and select 7 days option using role=option\n    await page.getByRole(\"option\", { name: \"7 days\" }).click();\n  }\n\n  await page.locator('button:has-text(\"Create Token\")').first().click();\n\n  const tokenDisplay = page\n    .locator(\"code\")\n    .filter({ hasText: \"onyx_pat_\" })\n    .first();\n  await tokenDisplay.waitFor({ state: \"visible\", timeout: 5000 });\n\n  const tokenValue = await tokenDisplay.textContent();\n  expect(tokenValue).toContain(\"onyx_pat_\");\n\n  // Grant clipboard permissions before copying\n  await page.context().grantPermissions([\"clipboard-read\", \"clipboard-write\"]);\n\n  // Copy the newly created token (button is inside .code-copy-button)\n  await page.locator(\".code-copy-button button\").click();\n\n  // Wait a moment for clipboard to be written and verify\n  await page.waitForTimeout(500);\n  const clipboardText = await page.evaluate(() =>\n    navigator.clipboard.readText()\n  );\n  expect(clipboardText).toBe(tokenValue);\n\n  await page.locator('button:has-text(\"Done\")').first().click();\n  await expect(page.getByText(tokenName).first()).toBeVisible({\n    timeout: 5000,\n  });\n\n  // Test the PAT token works by making an API request in a new context (no session cookies)\n  const testContext = await page.context().browser()!.newContext();\n  const apiResponse = await testContext.request.get(\n    \"http://localhost:3000/api/me\",\n    {\n      headers: {\n        Authorization: `Bearer ${tokenValue}`,\n      },\n    }\n  );\n  expect(apiResponse.ok()).toBeTruthy();\n  const userData = await apiResponse.json();\n  expect(userData.email).toBe(email);\n  await testContext.close();\n\n  // Find and click the delete button using the aria-label with token name\n  const deleteButton = page.locator(\n    `button[aria-label=\"Delete token ${tokenName}\"]`\n  );\n  await deleteButton.click();\n\n  const confirmButton = page.locator('button:has-text(\"Revoke\")').first();\n  await confirmButton.waitFor({ state: \"visible\", timeout: 3000 });\n  await confirmButton.click();\n\n  // Wait for the modal to close (it contains the token name in its text)\n  await expect(confirmButton).not.toBeVisible({ timeout: 3000 });\n\n  // Now verify the token is no longer in the list\n  await expect(page.locator(`p:text-is(\"${tokenName}\")`)).not.toBeVisible({\n    timeout: 5000,\n  });\n\n  // Create a new context without cookies to test the revoked token\n  const newContext = await page.context().browser()!.newContext();\n  const revokedApiResponse = await newContext.request.get(\n    \"http://localhost:3000/api/me\",\n    {\n      headers: {\n        Authorization: `Bearer ${tokenValue}`,\n      },\n    }\n  );\n  await newContext.close();\n  // Revoked tokens return 403 Forbidden (as per backend tests)\n  expect(revokedApiResponse.status()).toBe(403);\n});\n\ntest(\"PAT Multiple Tokens Management\", async ({ page }, testInfo) => {\n  // Skip in admin project - we test with fresh user auth\n  test.skip(\n    testInfo.project.name === \"admin\",\n    \"Test requires clean user auth state\"\n  );\n\n  await page.context().clearCookies();\n  await loginAsRandomUser(page);\n\n  await page.goto(\"/app\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Click on user dropdown and open settings (same pattern as other tests)\n  await page.locator(\"#onyx-user-dropdown\").click();\n  await page.getByText(\"User Settings\").first().click();\n\n  // Wait for settings modal to appear (first page has \"Full Name\" section)\n  await expect(page.getByText(\"Full Name\")).toBeVisible();\n\n  await page\n    .locator('a[href=\"/app/settings/accounts-access\"]')\n    .click({ force: true });\n\n  // Wait for PAT page to load (button is unique to the PAT section)\n  await expect(page.locator('button:has-text(\"New Access Token\")')).toBeVisible(\n    {\n      timeout: 10000,\n    }\n  );\n\n  const tokens = [\n    { name: `Token 1 - ${Date.now()}`, expiration: \"7 days\" },\n    { name: `Token 2 - ${Date.now() + 1}`, expiration: \"30 days\" },\n    { name: `Token 3 - ${Date.now() + 2}`, expiration: \"No expiration\" },\n  ];\n\n  for (const token of tokens) {\n    // Click \"New Access Token\" button to open the modal\n    await page.locator('button:has-text(\"New Access Token\")').first().click();\n\n    // Fill in the token name\n    const nameInput = page\n      .locator('input[placeholder*=\"Name your token\"]')\n      .first();\n    await nameInput.fill(token.name);\n\n    // Click the Radix UI combobox for expiration (not a select element)\n    const expirationCombobox = page.locator(\n      'button[role=\"combobox\"][aria-label*=\"expiration\"]'\n    );\n    if (await expirationCombobox.isVisible()) {\n      await expirationCombobox.click();\n      // Wait for dropdown and select the option using role=option\n      await page.getByRole(\"option\", { name: token.expiration }).click();\n    }\n\n    // Create the token\n    await page.locator('button:has-text(\"Create Token\")').first().click();\n\n    // Wait for token to be created (code block with token appears)\n    await page\n      .locator(\"code\")\n      .filter({ hasText: \"onyx_pat_\" })\n      .first()\n      .waitFor({ state: \"visible\", timeout: 5000 });\n\n    // Close the modal by clicking \"Done\"\n    await page.locator('button:has-text(\"Done\")').first().click();\n\n    // Wait for token to appear in the list\n    await expect(page.getByText(token.name).first()).toBeVisible({\n      timeout: 5000,\n    });\n  }\n\n  // Verify all tokens are visible in the list\n  for (const token of tokens) {\n    await expect(page.getByText(token.name).first()).toBeVisible();\n  }\n\n  // Delete the second token using its aria-label\n  const deleteButton = page.locator(\n    `button[aria-label=\"Delete token ${tokens[1]!.name}\"]`\n  );\n  await deleteButton.click();\n\n  // Click \"Revoke\" to confirm deletion\n  const confirmButton = page.locator('button:has-text(\"Revoke\")').first();\n  await confirmButton.waitFor({ state: \"visible\", timeout: 3000 });\n  await confirmButton.click();\n\n  // Wait for the modal to close\n  await expect(confirmButton).not.toBeVisible({ timeout: 3000 });\n\n  // Now verify the deleted token is no longer in the list\n  await expect(page.getByText(tokens[1]!.name)).not.toBeVisible({\n    timeout: 5000,\n  });\n\n  // Verify the other two tokens are still visible\n  await expect(page.getByText(tokens[0]!.name).first()).toBeVisible();\n  await expect(page.getByText(tokens[2]!.name).first()).toBeVisible();\n});\n"
  },
  {
    "path": "web/tests/e2e/auth/signup.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\n// These tests exercise the signup (user registration) flow.\n// They clear cookies to start unauthenticated, then drive the signup form.\n\ntest.describe(\"Signup flow\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n  });\n\n  test(\"Signup page renders correctly\", async ({ page }) => {\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Verify form elements are present\n    await expect(page.getByTestId(\"email\")).toBeVisible();\n    await expect(page.getByTestId(\"password\")).toBeVisible();\n    await expect(\n      page.getByRole(\"button\", { name: \"Create account\" })\n    ).toBeVisible();\n\n    // Capture the initial signup page\n    await expectScreenshot(page, { name: \"signup-page-initial\" });\n  });\n\n  test(\"User can create a new account\", async ({ page }) => {\n    // Generate a unique email for this test\n    const uniqueEmail = `testuser_${Date.now()}@example.com`;\n    const password = \"NewUserPassword123!\";\n\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(uniqueEmail);\n    await page.getByTestId(\"password\").fill(password);\n    await page.getByRole(\"button\", { name: \"Create account\" }).click();\n\n    // Should redirect to the app page after successful signup\n    await expect(page).toHaveURL(/\\/app/, { timeout: 10000 });\n\n    // Verify the session is valid and user is logged in\n    const me = await page.request.get(\"/api/me\");\n    expect(me.ok()).toBe(true);\n    const body = await me.json();\n    expect(body.email).toBe(uniqueEmail);\n  });\n\n  test(\"Signup fails with weak password\", async ({ page }) => {\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(\"newuser@example.com\");\n    await page.getByTestId(\"password\").fill(\"weak\"); // Too short\n\n    // Trigger validation by blurring the password field\n    await page.getByTestId(\"password\").blur();\n\n    // Wait for validation error to appear\n    await expect(\n      page.getByText(/must be at least 8 characters/i)\n    ).toBeVisible();\n\n    // Verify submit button is disabled\n    await expect(\n      page.getByRole(\"button\", { name: \"Create account\" })\n    ).toBeDisabled();\n\n    // Capture the validation error state\n    await expectScreenshot(page, { name: \"signup-weak-password-error\" });\n\n    // Should stay on the signup page\n    await expect(page).toHaveURL(/\\/auth\\/signup/);\n  });\n\n  test(\"Signup fails with existing email\", async ({ page }) => {\n    // Use an email that already exists (from global-setup)\n    const existingEmail = \"admin_user@example.com\";\n\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(existingEmail);\n    await page.getByTestId(\"password\").fill(\"SomePassword123!\");\n    await page.getByRole(\"button\", { name: \"Create account\" }).click();\n\n    // Wait for error message to appear\n    await expect(\n      page.getByText(\"An account already exists with the specified email.\", {\n        exact: true,\n      })\n    ).toBeVisible();\n\n    // Capture the error state\n    await expectScreenshot(page, { name: \"signup-existing-email-error\" });\n\n    // Should stay on the signup page\n    await expect(page).toHaveURL(/\\/auth\\/signup/);\n\n    // Should not be authenticated as the existing user\n    const me = await page.request.get(\"/api/me\");\n    expect(me.ok()).toBe(false);\n  });\n\n  test(\"Signup fails with invalid email format\", async ({ page }) => {\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(\"notavalidemail\");\n    await page.getByTestId(\"password\").fill(\"ValidPassword123!\");\n\n    // Trigger validation by blurring the email field\n    await page.getByTestId(\"email\").blur();\n\n    // Verify submit button is disabled\n    await expect(\n      page.getByRole(\"button\", { name: \"Create account\" })\n    ).toBeDisabled();\n\n    // Capture the validation error state\n    await expectScreenshot(page, { name: \"signup-invalid-email-error\" });\n\n    // Should stay on the signup page\n    await expect(page).toHaveURL(/\\/auth\\/signup/);\n  });\n\n  test(\"Signup fails with disposable email address\", async ({ page }) => {\n    // Use a disposable email domain from the fallback list\n    const disposableEmail = `testuser_${Date.now()}@mailinator.com`;\n\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByTestId(\"email\").fill(disposableEmail);\n    await page.getByTestId(\"password\").fill(\"ValidPassword123!\");\n    await page.getByRole(\"button\", { name: \"Create account\" }).click();\n\n    // Wait for error message to appear\n    await expect(\n      page.getByText(\"Disposable email addresses are not allowed\").first()\n    ).toBeVisible();\n\n    // Capture the error state with hidden email to avoid non-deterministic diffs\n    await expectScreenshot(page, {\n      name: \"signup-disposable-email-error\",\n      mask: [\"[data-testid='email']\"],\n    });\n\n    // Should stay on the signup page\n    await expect(page).toHaveURL(/\\/auth\\/signup/);\n\n    // Should not be authenticated\n    const me = await page.request.get(\"/api/me\");\n    expect(me.ok()).toBe(false);\n  });\n\n  test(\"Login link navigates to login page\", async ({ page }) => {\n    await page.goto(\"/auth/signup\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Find and click the login link\n    const loginLink = page.getByRole(\"link\", { name: /sign in/i });\n    await expect(loginLink).toBeVisible();\n    await loginLink.click();\n\n    // Should navigate to login page\n    await expect(page).toHaveURL(/\\/auth\\/login/);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/actions_popover.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport {\n  TOOL_IDS,\n  openActionManagement,\n  openSourceManagement,\n  toggleToolDisabled,\n  getSourceToggle,\n} from \"@tests/e2e/utils/tools\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nconst LOCAL_STORAGE_KEY = \"selectedInternalSearchSources\";\n\ntest.describe(\"ActionsPopover Tool Toggles\", () => {\n  test.describe.configure({ mode: \"serial\" });\n\n  let ccPairId: number | null = null;\n  let webSearchProviderId: number | null = null;\n  let imageGenConfigId: string | null = null;\n\n  test.beforeAll(async ({ browser }) => {\n    const ctx = await browser.newContext({ storageState: \"admin_auth.json\" });\n    const page = await ctx.newPage();\n    await page.goto(\"http://localhost:3000/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    const apiClient = new OnyxApiClient(page.request);\n\n    // Create a file connector so internal search tool is available\n    ccPairId = await apiClient.createFileConnector(\n      `actions-popover-test-${Date.now()}`\n    );\n\n    // Create providers for web search and image generation (best-effort)\n    try {\n      webSearchProviderId = await apiClient.createWebSearchProvider(\n        \"exa\",\n        `actions-popover-web-search-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create web search provider: ${error}`);\n    }\n\n    try {\n      imageGenConfigId = await apiClient.createImageGenerationConfig(\n        `actions-popover-image-gen-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create image gen config: ${error}`);\n    }\n\n    // Ensure all tools are enabled on the default agent\n    const toolsResp = await page.request.get(\"/api/tool\");\n    const allTools = await toolsResp.json();\n    const toolIdsByCodeId: Record<string, number> = {};\n    allTools.forEach((t: any) => {\n      if (t.in_code_tool_id) toolIdsByCodeId[t.in_code_tool_id] = t.id;\n    });\n\n    const configResp = await page.request.get(\n      \"/api/admin/default-assistant/configuration\"\n    );\n    const currentConfig = await configResp.json();\n\n    const desiredToolIds = [\n      toolIdsByCodeId[\"SearchTool\"],\n      toolIdsByCodeId[\"WebSearchTool\"],\n      toolIdsByCodeId[\"ImageGenerationTool\"],\n    ].filter(Boolean);\n\n    const uniqueToolIds = Array.from(\n      new Set([...(currentConfig.tool_ids || []), ...desiredToolIds])\n    );\n\n    await page.request.patch(\"/api/admin/default-assistant\", {\n      data: { tool_ids: uniqueToolIds },\n    });\n\n    await ctx.close();\n  });\n\n  test.afterAll(async ({ browser }) => {\n    const ctx = await browser.newContext({ storageState: \"admin_auth.json\" });\n    const page = await ctx.newPage();\n    await page.goto(\"http://localhost:3000/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    const apiClient = new OnyxApiClient(page.request);\n\n    if (ccPairId !== null) {\n      try {\n        await apiClient.deleteCCPair(ccPairId);\n      } catch (error) {\n        console.warn(`Cleanup: failed to delete connector: ${error}`);\n      }\n    }\n    if (webSearchProviderId !== null) {\n      try {\n        await apiClient.deleteWebSearchProvider(webSearchProviderId);\n      } catch (error) {\n        console.warn(`Cleanup: failed to delete web search provider: ${error}`);\n      }\n    }\n    if (imageGenConfigId !== null) {\n      try {\n        await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n      } catch (error) {\n        console.warn(`Cleanup: failed to delete image gen config: ${error}`);\n      }\n    }\n\n    await ctx.close();\n  });\n\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n    // Clear source preferences for a clean slate\n    await page.evaluate(\n      (key) => localStorage.removeItem(key),\n      LOCAL_STORAGE_KEY\n    );\n  });\n\n  test(\"should show internal search and other tools in popover\", async ({\n    page,\n  }) => {\n    await openActionManagement(page);\n\n    // Internal search must be visible (connector was created in beforeAll)\n    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Soft-check other tools (depend on provider setup success)\n    const webVisible = await page\n      .locator(TOOL_IDS.webSearchOption)\n      .isVisible()\n      .catch(() => false);\n    const imgVisible = await page\n      .locator(TOOL_IDS.imageGenerationOption)\n      .isVisible()\n      .catch(() => false);\n    console.log(`[tools] web_search=${webVisible}, image_gen=${imgVisible}`);\n  });\n\n  test(\"source preferences should persist to localStorage and survive reload\", async ({\n    page,\n  }) => {\n    await openActionManagement(page);\n    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({\n      timeout: 10000,\n    });\n    await openSourceManagement(page);\n\n    // Find the first source switch\n    const switches = page.locator('[role=\"switch\"]');\n    await expect(switches.first()).toBeVisible({ timeout: 5000 });\n\n    const firstSwitch = switches.first();\n    const ariaLabel = await firstSwitch.getAttribute(\"aria-label\");\n    const sourceName = ariaLabel?.replace(\"Toggle \", \"\") || \"\";\n    expect(sourceName).toBeTruthy();\n\n    // Ensure it's enabled, then disable it\n    if ((await firstSwitch.getAttribute(\"aria-checked\")) === \"false\") {\n      await firstSwitch.click();\n      await expect(firstSwitch).toHaveAttribute(\"aria-checked\", \"true\");\n    }\n    await firstSwitch.click();\n    await expect(firstSwitch).toHaveAttribute(\"aria-checked\", \"false\");\n\n    // Verify localStorage was updated\n    const stored = await page.evaluate(\n      (key) => localStorage.getItem(key),\n      LOCAL_STORAGE_KEY\n    );\n    expect(stored).toBeTruthy();\n    expect(JSON.parse(stored!).sourcePreferences).toBeDefined();\n\n    // Reload and verify persistence\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n\n    await openActionManagement(page);\n    await openSourceManagement(page);\n\n    const sourceToggle = getSourceToggle(page, sourceName);\n    await expect(sourceToggle).toHaveAttribute(\"aria-checked\", \"false\", {\n      timeout: 10000,\n    });\n  });\n\n  test(\"disabling search tool clears sources, re-enabling restores them\", async ({\n    page,\n  }) => {\n    await openActionManagement(page);\n    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Open source management and count enabled sources\n    await openSourceManagement(page);\n    const switches = page.locator('[role=\"switch\"]');\n    await expect(switches.first()).toBeVisible({ timeout: 5000 });\n\n    const totalSources = await switches.count();\n    let enabledBefore = 0;\n    for (let i = 0; i < totalSources; i++) {\n      if ((await switches.nth(i).getAttribute(\"aria-checked\")) === \"true\") {\n        enabledBefore++;\n      }\n    }\n    expect(enabledBefore).toBeGreaterThan(0);\n\n    // Go back to primary view\n    await page.locator('button[aria-label=\"Back\"]').click();\n    await expect(page.locator(TOOL_IDS.searchOption)).toBeVisible();\n\n    // Disable the search tool\n    await toggleToolDisabled(page, TOOL_IDS.searchOption);\n\n    // Verify localStorage was written (the fix being tested)\n    const stored = await page.evaluate(\n      (key) => localStorage.getItem(key),\n      LOCAL_STORAGE_KEY\n    );\n    expect(stored).toBeTruthy();\n\n    // Re-enable the search tool\n    await toggleToolDisabled(page, TOOL_IDS.searchOption);\n\n    // Verify sources were restored\n    await openSourceManagement(page);\n    const switchesAfter = page.locator('[role=\"switch\"]');\n    const totalAfter = await switchesAfter.count();\n    let enabledAfter = 0;\n    for (let i = 0; i < totalAfter; i++) {\n      if (\n        (await switchesAfter.nth(i).getAttribute(\"aria-checked\")) === \"true\"\n      ) {\n        enabledAfter++;\n      }\n    }\n    expect(enabledAfter).toBe(enabledBefore);\n  });\n\n  test(\"tool enabled and disabled states both persist across reload\", async ({\n    page,\n  }) => {\n    await openActionManagement(page);\n    const searchOption = page.locator(TOOL_IDS.searchOption);\n    await expect(searchOption).toBeVisible({ timeout: 10000 });\n\n    // The slash button says \"Disable\" when the tool is enabled\n    await searchOption.hover();\n    const slashButton = searchOption.locator(\n      'button[aria-label=\"Disable\"], button[aria-label=\"Enable\"]'\n    );\n    await expect(slashButton.first()).toHaveAttribute(\"aria-label\", \"Disable\");\n\n    // Reload — enabled state should persist\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n    await openActionManagement(page);\n    await page.locator(TOOL_IDS.searchOption).hover();\n    await expect(\n      page\n        .locator(TOOL_IDS.searchOption)\n        .locator('button[aria-label=\"Disable\"], button[aria-label=\"Enable\"]')\n        .first()\n    ).toHaveAttribute(\"aria-label\", \"Disable\");\n\n    // Disable the search tool\n    await toggleToolDisabled(page, TOOL_IDS.searchOption);\n\n    // Verify it's now disabled (slash button says \"Enable\")\n    await page.locator(TOOL_IDS.searchOption).hover();\n    await expect(\n      page\n        .locator(TOOL_IDS.searchOption)\n        .locator('button[aria-label=\"Disable\"], button[aria-label=\"Enable\"]')\n        .first()\n    ).toHaveAttribute(\"aria-label\", \"Enable\");\n\n    // Reload — disabled state should also persist (saved to DB)\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n    await openActionManagement(page);\n    await page.locator(TOOL_IDS.searchOption).hover();\n    await expect(\n      page\n        .locator(TOOL_IDS.searchOption)\n        .locator('button[aria-label=\"Disable\"], button[aria-label=\"Enable\"]')\n        .first()\n    ).toHaveAttribute(\"aria-label\", \"Enable\");\n\n    // Re-enable the tool for cleanup (serial tests follow)\n    await toggleToolDisabled(page, TOOL_IDS.searchOption);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/chat-search-command-menu.spec.ts",
    "content": "import { test, expect, Page, Locator } from \"@playwright/test\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\n// Test data storage\nconst TEST_PREFIX = \"E2E-CMD\";\nlet chatSessionIds: string[] = [];\nlet projectIds: number[] = [];\n\n/**\n * Helper to get the command menu dialog locator (using the content wrapper)\n */\nfunction getCommandMenuContent(page: Page): Locator {\n  return page.locator('[role=\"dialog\"]:has([data-command-menu-list])');\n}\n\n/**\n * Helper to open the command menu and return a scoped locator\n */\nasync function openCommandMenu(page: Page): Promise<Locator> {\n  await page.getByLabel(\"Open chat search\").click();\n  const dialog = getCommandMenuContent(page);\n  await expect(\n    dialog.getByPlaceholder(\"Search chat sessions, projects...\")\n  ).toBeVisible();\n  return dialog;\n}\n\ntest.describe(\"Chat Search Command Menu\", () => {\n  test.beforeAll(async ({ browser }, workerInfo) => {\n    const context = await browser.newContext();\n    const page = await context.newPage();\n    await loginAsWorkerUser(page, workerInfo.workerIndex);\n    const client = new OnyxApiClient(page.request);\n\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    for (let i = 1; i <= 5; i++) {\n      const id = await client.createChatSession(`${TEST_PREFIX} Chat ${i}`);\n      chatSessionIds.push(id);\n    }\n\n    for (let i = 1; i <= 4; i++) {\n      const id = await client.createProject(`${TEST_PREFIX} Project ${i}`);\n      projectIds.push(id);\n    }\n\n    await context.close();\n  });\n\n  test.afterAll(async ({ browser }, workerInfo) => {\n    const context = await browser.newContext();\n    const page = await context.newPage();\n    await loginAsWorkerUser(page, workerInfo.workerIndex);\n    const client = new OnyxApiClient(page.request);\n\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    for (const id of chatSessionIds) {\n      await client.deleteChatSession(id);\n    }\n    for (const id of projectIds) {\n      await client.deleteProject(id);\n    }\n\n    await context.close();\n  });\n\n  test.beforeEach(async ({ page }, testInfo) => {\n    await page.context().clearCookies();\n    await loginAsWorkerUser(page, testInfo.workerIndex);\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  // -- Opening --\n\n  test(\"Opens with search input, New Session action, and correct positioning\", async ({\n    page,\n  }) => {\n    const dialog = await openCommandMenu(page);\n\n    await expect(\n      dialog.getByPlaceholder(\"Search chat sessions, projects...\")\n    ).toBeFocused();\n    await expect(\n      dialog.locator('[data-command-item=\"new-session\"]')\n    ).toBeVisible();\n\n    await expectScreenshot(page, { name: \"command-menu-default-open\" });\n  });\n\n  // -- Preview limits --\n\n  test(\"Shows at most 4 chats and 3 projects in preview\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const chatCount = await dialog\n      .locator('[data-command-item^=\"chat-\"]')\n      .count();\n    expect(chatCount).toBeLessThanOrEqual(4);\n\n    const projectCount = await dialog\n      .locator('[data-command-item^=\"project-\"]')\n      .count();\n    expect(projectCount).toBeLessThanOrEqual(3);\n  });\n\n  test('Shows \"Recent Sessions\", \"Projects\" filters and \"New Project\" action', async ({\n    page,\n  }) => {\n    const dialog = await openCommandMenu(page);\n\n    await expect(\n      dialog.locator('[data-command-item=\"recent-sessions\"]')\n    ).toBeVisible();\n    await expect(\n      dialog.locator('[data-command-item=\"projects\"]')\n    ).toBeVisible();\n    await expect(\n      dialog.locator('[data-command-item=\"new-project\"]')\n    ).toBeVisible();\n  });\n\n  // -- Filter expansion --\n\n  test('\"Recent Sessions\" filter expands to show all 5 chats', async ({\n    page,\n  }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"recent-sessions\"]').click();\n\n    await page.waitForTimeout(500);\n\n    for (let i = 1; i <= 5; i++) {\n      await expect(\n        dialog.locator(`[data-command-item=\"chat-${chatSessionIds[i - 1]}\"]`)\n      ).toBeVisible();\n    }\n\n    await expect(dialog.getByText(\"Sessions\")).toBeVisible();\n  });\n\n  test('\"Projects\" filter expands to show all 4 projects', async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"projects\"]').click();\n\n    await page.waitForTimeout(500);\n\n    for (let i = 1; i <= 4; i++) {\n      await expect(\n        dialog.locator(`[data-command-item=\"project-${projectIds[i - 1]}\"]`)\n      ).toBeVisible();\n    }\n\n    await expectScreenshot(page, { name: \"command-menu-projects-filter\" });\n  });\n\n  test(\"Filter chip X removes filter and returns to all\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"recent-sessions\"]').click();\n    await expect(dialog.getByText(\"Sessions\")).toBeVisible();\n\n    await dialog.locator('button[aria-label=\"Remove Sessions filter\"]').click();\n\n    await expect(\n      dialog.locator('[data-command-item=\"new-session\"]')\n    ).toBeVisible();\n  });\n\n  test(\"Backspace on empty input removes active filter\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"recent-sessions\"]').click();\n    await expect(dialog.getByText(\"Sessions\")).toBeVisible();\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.focus();\n    await page.keyboard.press(\"Backspace\");\n\n    await expect(\n      dialog.locator('[data-command-item=\"new-session\"]')\n    ).toBeVisible();\n  });\n\n  test(\"Backspace on empty input with no filter closes menu\", async ({\n    page,\n  }) => {\n    await openCommandMenu(page);\n    await page.keyboard.press(\"Backspace\");\n    await expect(getCommandMenuContent(page)).not.toBeVisible();\n  });\n\n  // -- Search --\n\n  test(\"Search finds matching chat session\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(`${TEST_PREFIX} Chat 3`);\n    await page.waitForTimeout(500);\n\n    await expect(\n      dialog.locator(`[data-command-item=\"chat-${chatSessionIds[2]}\"]`)\n    ).toBeVisible();\n\n    await expectScreenshot(page, { name: \"command-menu-search-results\" });\n  });\n\n  test(\"Search finds matching project\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(`${TEST_PREFIX} Project 2`);\n    await page.waitForTimeout(500);\n\n    await expect(\n      dialog.locator(`[data-command-item=\"project-${projectIds[1]}\"]`)\n    ).toBeVisible();\n  });\n\n  test('Search shows \"Create New Project\" action with typed name', async ({\n    page,\n  }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(\"my custom project name\");\n\n    await expect(\n      dialog.locator('[data-command-item=\"create-project-with-name\"]')\n    ).toBeVisible();\n  });\n\n  test(\"Search with no results shows empty state\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(\"xyz123nonexistent9999\");\n    await page.waitForTimeout(500);\n\n    const noResults = dialog.getByText(\"No results found\");\n    const noMore = dialog.getByText(\"No more results\");\n    await expect(noResults.or(noMore)).toBeVisible();\n\n    await expectScreenshot(page, { name: \"command-menu-no-results\" });\n  });\n\n  // -- Navigation --\n\n  test('\"New Session\" navigates to /app', async ({ page }) => {\n    // Start from /chat so navigation is observable\n    await page.goto(\"/chat\");\n    await page.waitForLoadState(\"networkidle\");\n\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"new-session\"]').click();\n\n    await page.waitForURL(/\\/app/);\n    expect(page.url()).toContain(\"/app\");\n  });\n\n  test(\"Clicking a chat session navigates to its URL\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(`${TEST_PREFIX} Chat 1`);\n    await page.waitForTimeout(500);\n\n    await dialog\n      .locator(`[data-command-item=\"chat-${chatSessionIds[0]}\"]`)\n      .click();\n\n    await page.waitForURL(/chatId=/);\n    expect(page.url()).toContain(`chatId=${chatSessionIds[0]}`);\n  });\n\n  test(\"Clicking a project navigates to its URL\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(`${TEST_PREFIX} Project 1`);\n    await page.waitForTimeout(500);\n\n    await dialog\n      .locator(`[data-command-item=\"project-${projectIds[0]}\"]`)\n      .click();\n\n    await page.waitForURL(/projectId=/);\n    expect(page.url()).toContain(`projectId=${projectIds[0]}`);\n  });\n\n  test('\"New Project\" opens create project modal', async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"new-project\"]').click();\n    await expect(page.getByText(\"Create New Project\")).toBeVisible();\n  });\n\n  // -- Menu state --\n\n  test(\"Menu closes after selecting an item\", async ({ page }) => {\n    const dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"new-session\"]').click();\n    await expect(getCommandMenuContent(page)).not.toBeVisible();\n  });\n\n  test(\"Escape closes menu\", async ({ page }) => {\n    await openCommandMenu(page);\n    await page.keyboard.press(\"Escape\");\n    await expect(getCommandMenuContent(page)).not.toBeVisible();\n  });\n\n  test(\"Menu state resets when reopened\", async ({ page }) => {\n    let dialog = await openCommandMenu(page);\n    await dialog.locator('[data-command-item=\"recent-sessions\"]').click();\n    await expect(dialog.getByText(\"Sessions\")).toBeVisible();\n\n    const input = dialog.getByPlaceholder(\"Search chat sessions, projects...\");\n    await input.fill(\"test query\");\n\n    await page.keyboard.press(\"Escape\");\n    await expect(getCommandMenuContent(page)).not.toBeVisible();\n\n    dialog = await openCommandMenu(page);\n\n    await expect(\n      dialog.getByPlaceholder(\"Search chat sessions, projects...\")\n    ).toHaveValue(\"\");\n    await expect(\n      dialog.locator('[data-command-item=\"new-session\"]')\n    ).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/chat_message_rendering.spec.ts",
    "content": "import { expect, Page, test } from \"@playwright/test\";\nimport { loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\nimport { sendMessage } from \"@tests/e2e/utils/chatActions\";\nimport { THEMES, setThemeBeforeNavigation } from \"@tests/e2e/utils/theme\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\nconst SHORT_USER_MESSAGE = \"What is Onyx?\";\n\nconst LONG_WORD_USER_MESSAGE =\n  \"Please look into this issue: __________________________________________ and also this token: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA and this URL: https://example.com/a/very/long/path/that/keeps/going/and/going/and/going/without/any/breaks/whatsoever/to/test/overflow\";\n\nconst LONG_USER_MESSAGE = `I've been evaluating several enterprise search and AI platforms for our organization, and I have a number of detailed questions about Onyx that I'd like to understand before we make a decision.\n\nFirst, can you explain how Onyx handles document indexing across multiple data sources? We currently use Confluence, Google Drive, Slack, and GitHub, and we need to ensure that all of these can be indexed simultaneously without performance degradation.\n\nSecond, I'm interested in understanding the security model. Specifically, how does Onyx handle document-level permissions when syncing from sources that have their own ACL systems? Does it respect the original source permissions, or does it create its own permission layer?\n\nThird, we have a requirement for real-time or near-real-time indexing. What is the typical latency between a document being updated in a source system and it becoming searchable in Onyx?\n\nFinally, could you walk me through the architecture of the AI chat system? How does it decide which documents to reference when answering a question, and how does it handle cases where the retrieved documents might contain conflicting information?`;\n\nconst SHORT_AI_RESPONSE =\n  \"Onyx is an open-source AI-powered enterprise search platform that connects to your company's documents, apps, and people.\";\n\nconst LONG_AI_RESPONSE = `Onyx is an open-source Gen-AI and Enterprise Search platform designed to connect to your company's documents, applications, and people. Let me address each of your questions in detail.\n\n## Document Indexing\n\nOnyx uses a **connector-based architecture** where each data source has a dedicated connector. These connectors run as background workers and can index simultaneously without interfering with each other. The supported connectors include:\n\n- **Confluence** — Full page and space indexing with attachment support\n- **Google Drive** — File and folder indexing with shared drive support\n- **Slack** — Channel message indexing with thread support\n- **GitHub** — Repository, issue, and pull request indexing\n\nEach connector runs on its own schedule and can be configured independently for polling frequency.\n\n## Security Model\n\nOnyx implements a **document-level permission system** that syncs with source ACLs. When documents are indexed, their permissions are preserved:\n\n\\`\\`\\`\nSource Permission → Onyx ACL Sync → Query-time Filtering\n\\`\\`\\`\n\nThis means that when a user searches, they only see documents they have access to in the original source system. The permission sync runs periodically to stay up to date.\n\n## Indexing Latency\n\nThe typical indexing latency depends on your configuration:\n\n1. **Polling mode**: Documents are picked up on the next polling cycle (configurable, default 10 minutes)\n2. **Webhook mode**: Near real-time, typically under 30 seconds\n3. **Manual trigger**: Immediate indexing on demand\n\n## AI Chat Architecture\n\nThe chat system uses a **Retrieval-Augmented Generation (RAG)** pipeline:\n\n1. User query is analyzed and expanded\n2. Relevant documents are retrieved from the vector database (Vespa)\n3. Documents are ranked and filtered by relevance and permissions\n4. The LLM generates a response grounded in the retrieved documents\n5. Citations are attached to specific claims in the response\n\nWhen documents contain conflicting information, the system presents the most relevant and recent information first, and includes citations so users can verify the source material themselves.`;\n\nconst MARKDOWN_AI_RESPONSE = `Here's a quick overview with various formatting:\n\n### Key Features\n\n| Feature | Status | Notes |\n|---------|--------|-------|\n| Enterprise Search | ✅ Available | Full-text and semantic |\n| AI Chat | ✅ Available | Multi-model support |\n| Connectors | ✅ Available | 30+ integrations |\n| Permissions | ✅ Available | Source ACL sync |\n\n### Code Example\n\n\\`\\`\\`python\nfrom onyx import OnyxClient\n\nclient = OnyxClient(api_key=\"your-key\")\nresults = client.search(\"quarterly revenue report\")\n\nfor doc in results:\n    print(f\"{doc.title}: {doc.score:.2f}\")\n\\`\\`\\`\n\n> **Note**: Onyx supports both cloud and self-hosted deployments. The self-hosted option gives you full control over your data.\n\nKey benefits include:\n\n- **Privacy**: Your data stays within your infrastructure\n- **Flexibility**: Connect any data source via custom connectors\n- **Extensibility**: Open-source codebase with active community`;\n\nconst LATEX_AI_RESPONSE = `Here is a mix of math and plain text:\n\nInline math should render cleanly: \\\\(E = mc^2\\\\).\n\nDisplay math should render on its own line:\n\\\\[\n\\\\int_0^1 x^2 \\\\, dx = \\\\frac{1}{3}\n\\\\]\n\nThis currency value should stay plain text: $100.\n\nAnd this LaTeX source should remain a code block:\n\\`\\`\\`latex\n\\\\int_0^1 x^2 \\\\, dx = \\\\frac{1}{3}\n\\`\\`\\``;\n\ninterface MockDocument {\n  document_id: string;\n  semantic_identifier: string;\n  link: string;\n  source_type: string;\n  blurb: string;\n  is_internet: boolean;\n}\n\ninterface SearchMockOptions {\n  content: string;\n  queries: string[];\n  documents: MockDocument[];\n  /** Maps citation number -> document_id */\n  citations: Record<number, string>;\n  isInternetSearch?: boolean;\n}\n\nlet turnCounter = 0;\n\nfunction buildMockStream(content: string): string {\n  turnCounter += 1;\n  const userMessageId = turnCounter * 100 + 1;\n  const agentMessageId = turnCounter * 100 + 2;\n\n  const packets = [\n    {\n      user_message_id: userMessageId,\n      reserved_assistant_message_id: agentMessageId,\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: {\n        type: \"message_start\",\n        id: `mock-${agentMessageId}`,\n        content,\n        final_documents: null,\n      },\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: { type: \"stop\", stop_reason: \"finished\" },\n    },\n    {\n      message_id: agentMessageId,\n      citations: {},\n      files: [],\n    },\n  ];\n\n  return `${packets.map((p) => JSON.stringify(p)).join(\"\\n\")}\\n`;\n}\n\nfunction buildMockSearchStream(options: SearchMockOptions): string {\n  turnCounter += 1;\n  const userMessageId = turnCounter * 100 + 1;\n  const agentMessageId = turnCounter * 100 + 2;\n\n  const fullDocs = options.documents.map((doc) => ({\n    ...doc,\n    boost: 0,\n    hidden: false,\n    score: 0.95,\n    chunk_ind: 0,\n    match_highlights: [],\n    metadata: {},\n    updated_at: null,\n  }));\n\n  // Turn 0: search tool\n  // Turn 1: answer + citations\n  const packets: Record<string, unknown>[] = [\n    {\n      user_message_id: userMessageId,\n      reserved_assistant_message_id: agentMessageId,\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: {\n        type: \"search_tool_start\",\n        ...(options.isInternetSearch !== undefined && {\n          is_internet_search: options.isInternetSearch,\n        }),\n      },\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: { type: \"search_tool_queries_delta\", queries: options.queries },\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: { type: \"search_tool_documents_delta\", documents: fullDocs },\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: { type: \"section_end\" },\n    },\n    {\n      placement: { turn_index: 1, tab_index: 0 },\n      obj: {\n        type: \"message_start\",\n        id: `mock-${agentMessageId}`,\n        content: options.content,\n        final_documents: fullDocs,\n      },\n    },\n    ...Object.entries(options.citations).map(([num, docId]) => ({\n      placement: { turn_index: 1, tab_index: 0 },\n      obj: {\n        type: \"citation_info\",\n        citation_number: Number(num),\n        document_id: docId,\n      },\n    })),\n    {\n      placement: { turn_index: 1, tab_index: 0 },\n      obj: { type: \"stop\", stop_reason: \"finished\" },\n    },\n    {\n      message_id: agentMessageId,\n      citations: options.citations,\n      files: [],\n    },\n  ];\n\n  return `${packets.map((p) => JSON.stringify(p)).join(\"\\n\")}\\n`;\n}\n\nasync function openChat(page: Page): Promise<void> {\n  await page.goto(\"/app\");\n  await page.waitForLoadState(\"networkidle\");\n  await page.waitForSelector(\"#onyx-chat-input-textarea\", { timeout: 15000 });\n}\n\nasync function mockChatEndpoint(\n  page: Page,\n  responseContent: string\n): Promise<void> {\n  await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n    await route.fulfill({\n      status: 200,\n      contentType: \"text/plain\",\n      body: buildMockStream(responseContent),\n    });\n  });\n}\n\nasync function mockChatEndpointSequence(\n  page: Page,\n  responses: string[]\n): Promise<void> {\n  let callIndex = 0;\n  await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n    const content =\n      responses[Math.min(callIndex, responses.length - 1)] ??\n      responses[responses.length - 1]!;\n    callIndex += 1;\n    await route.fulfill({\n      status: 200,\n      contentType: \"text/plain\",\n      body: buildMockStream(content),\n    });\n  });\n}\n\nasync function scrollChatTo(\n  page: Page,\n  position: \"top\" | \"bottom\"\n): Promise<void> {\n  const scrollContainer = page.getByTestId(\"chat-scroll-container\");\n  await scrollContainer.evaluate(async (el, pos) => {\n    el.scrollTo({ top: pos === \"top\" ? 0 : el.scrollHeight });\n    await new Promise<void>((r) => requestAnimationFrame(() => r()));\n  }, position);\n}\n\nasync function screenshotChatContainer(\n  page: Page,\n  name: string\n): Promise<void> {\n  const container = page.locator(\"[data-main-container]\");\n  await expect(container).toBeVisible();\n  await scrollChatTo(page, \"bottom\");\n  await expectElementScreenshot(container, { name });\n}\n\n/**\n * Captures two screenshots of the chat container for long-content tests:\n * one scrolled to the top and one scrolled to the bottom. Both are captured\n * for the current theme, ensuring consistent scroll positions regardless of\n * whether the page was just navigated to (top) or just finished streaming (bottom).\n */\nasync function screenshotChatContainerTopAndBottom(\n  page: Page,\n  name: string\n): Promise<void> {\n  const container = page.locator(\"[data-main-container]\");\n  await expect(container).toBeVisible();\n\n  await scrollChatTo(page, \"top\");\n  await expectElementScreenshot(container, { name: `${name}-top` });\n\n  await scrollChatTo(page, \"bottom\");\n  await expectElementScreenshot(container, { name: `${name}-bottom` });\n}\n\nfor (const theme of THEMES) {\n  test.describe(`Chat Message Rendering (${theme} mode)`, () => {\n    test.beforeEach(async ({ page }, testInfo) => {\n      turnCounter = 0;\n      await page.context().clearCookies();\n      await setThemeBeforeNavigation(page, theme);\n      await loginAsWorkerUser(page, testInfo.workerIndex);\n    });\n\n    test.describe(\"Short Messages\", () => {\n      test(\"short user message with short AI response renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, SHORT_AI_RESPONSE);\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n\n        const userMessage = page.locator(\"#onyx-human-message\").first();\n        await expect(userMessage).toContainText(SHORT_USER_MESSAGE);\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"open-source AI-powered\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-short-message-short-response-${theme}`\n        );\n      });\n    });\n\n    test.describe(\"Long Messages\", () => {\n      test(\"long user message renders without truncation\", async ({ page }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, SHORT_AI_RESPONSE);\n\n        await sendMessage(page, LONG_USER_MESSAGE);\n\n        const userMessage = page.locator(\"#onyx-human-message\").first();\n        await expect(userMessage).toContainText(\"document indexing\");\n        await expect(userMessage).toContainText(\"security model\");\n        await expect(userMessage).toContainText(\"real-time or near-real-time\");\n        await expect(userMessage).toContainText(\"architecture of the AI chat\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-long-user-message-short-response-${theme}`\n        );\n      });\n\n      test(\"long AI response with markdown renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, LONG_AI_RESPONSE);\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"Document Indexing\");\n        await expect(aiMessage).toContainText(\"Security Model\");\n        await expect(aiMessage).toContainText(\"Indexing Latency\");\n        await expect(aiMessage).toContainText(\"AI Chat Architecture\");\n\n        await screenshotChatContainerTopAndBottom(\n          page,\n          `chat-short-message-long-response-${theme}`\n        );\n      });\n\n      test(\"user message with very long words wraps without overflowing\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, SHORT_AI_RESPONSE);\n\n        await sendMessage(page, LONG_WORD_USER_MESSAGE);\n\n        const userMessage = page.locator(\"#onyx-human-message\").first();\n        await expect(userMessage).toContainText(\"__________\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-long-word-user-message-${theme}`\n        );\n\n        // Assert the message bubble does not overflow horizontally.\n        const overflows = await userMessage.evaluate((el) => {\n          const bubble = el.querySelector<HTMLElement>(\n            \".whitespace-break-spaces\"\n          );\n          if (!bubble)\n            throw new Error(\n              \"Expected human message bubble (.whitespace-break-spaces) to exist\"\n            );\n          return bubble.scrollWidth > bubble.offsetWidth;\n        });\n        expect(overflows).toBe(false);\n      });\n\n      test(\"long user message with long AI response renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, LONG_AI_RESPONSE);\n\n        await sendMessage(page, LONG_USER_MESSAGE);\n\n        const userMessage = page.locator(\"#onyx-human-message\").first();\n        await expect(userMessage).toContainText(\"document indexing\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"Retrieval-Augmented Generation\");\n\n        await screenshotChatContainerTopAndBottom(\n          page,\n          `chat-long-message-long-response-${theme}`\n        );\n      });\n    });\n\n    test.describe(\"Markdown and Code Rendering\", () => {\n      test(\"AI response with tables and code blocks renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, MARKDOWN_AI_RESPONSE);\n\n        await sendMessage(page, \"Give me an overview of Onyx features\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"Key Features\");\n        await expect(aiMessage).toContainText(\"OnyxClient\");\n        await expect(aiMessage).toContainText(\"Privacy\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-markdown-code-response-${theme}`\n        );\n      });\n\n      test(\"AI response with LaTeX math renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, LATEX_AI_RESPONSE);\n\n        await sendMessage(page, \"Show me inline and block math\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n\n        await screenshotChatContainer(\n          page,\n          `chat-latex-math-response-${theme}`\n        );\n\n        await expect(aiMessage).toContainText(\"Inline math should render\");\n        await expect(aiMessage).toContainText(\n          \"This currency value should stay plain text: $100.\"\n        );\n        await expect(aiMessage.locator(\".katex\")).toHaveCount(2);\n        await expect(aiMessage.locator(\".katex-display\")).toBeVisible();\n        await expect(aiMessage.getByRole(\"code\")).toContainText(\n          \"\\\\int_0^1 x^2 \\\\, dx = \\\\frac{1}{3}\"\n        );\n      });\n    });\n\n    test.describe(\"Multi-Turn Conversation\", () => {\n      test(\"multi-turn conversation renders all messages correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n\n        const responses = [\n          SHORT_AI_RESPONSE,\n          \"Yes, Onyx supports over 30 data source connectors including Confluence, Google Drive, Slack, GitHub, Jira, Notion, and many more.\",\n          \"To get started, you can deploy Onyx using Docker Compose with a single command. The setup takes about 5 minutes.\",\n        ];\n\n        await mockChatEndpointSequence(page, responses);\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n        await expect(page.getByTestId(\"onyx-ai-message\").first()).toContainText(\n          \"open-source AI-powered\"\n        );\n\n        await sendMessage(page, \"What connectors does it support?\");\n        await expect(page.getByTestId(\"onyx-ai-message\")).toHaveCount(2, {\n          timeout: 30000,\n        });\n\n        await sendMessage(page, \"How do I get started?\");\n        await expect(page.getByTestId(\"onyx-ai-message\")).toHaveCount(3, {\n          timeout: 30000,\n        });\n\n        const userMessages = page.locator(\"#onyx-human-message\");\n        await expect(userMessages).toHaveCount(3);\n\n        await screenshotChatContainerTopAndBottom(\n          page,\n          `chat-multi-turn-conversation-${theme}`\n        );\n      });\n\n      test(\"multi-turn with mixed message lengths renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n\n        const responses = [LONG_AI_RESPONSE, SHORT_AI_RESPONSE];\n\n        await mockChatEndpointSequence(page, responses);\n\n        await sendMessage(page, LONG_USER_MESSAGE);\n        await expect(page.getByTestId(\"onyx-ai-message\").first()).toContainText(\n          \"Document Indexing\"\n        );\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n        await expect(page.getByTestId(\"onyx-ai-message\")).toHaveCount(2, {\n          timeout: 30000,\n        });\n\n        await screenshotChatContainerTopAndBottom(\n          page,\n          `chat-multi-turn-mixed-lengths-${theme}`\n        );\n      });\n    });\n\n    test.describe(\"Web Search with Citations\", () => {\n      const TOOLBAR_BUTTONS = [\n        \"AgentMessage/copy-button\",\n        \"AgentMessage/like-button\",\n        \"AgentMessage/dislike-button\",\n      ] as const;\n\n      async function screenshotToolbarButtonHoverStates(\n        page: Page,\n        namePrefix: string\n      ): Promise<void> {\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        const toolbar = aiMessage.getByTestId(\"AgentMessage/toolbar\");\n        await expect(toolbar).toBeVisible({ timeout: 10000 });\n\n        await toolbar.scrollIntoViewIfNeeded();\n        await page.evaluate(\n          () => new Promise<void>((r) => requestAnimationFrame(() => r()))\n        );\n\n        for (const buttonTestId of TOOLBAR_BUTTONS) {\n          const button = aiMessage.getByTestId(buttonTestId);\n          await button.hover();\n          const buttonSlug = buttonTestId.split(\"/\")[1];\n          await expectElementScreenshot(toolbar, {\n            name: `${namePrefix}-toolbar-${buttonSlug}-hover-${theme}`,\n          });\n        }\n\n        // Sources tag is located by role+name since SourceTag has no testid.\n        const sourcesButton = toolbar.getByRole(\"button\", { name: \"Sources\" });\n        if (await sourcesButton.isVisible()) {\n          await sourcesButton.hover();\n          await expectElementScreenshot(toolbar, {\n            name: `${namePrefix}-toolbar-sources-hover-${theme}`,\n          });\n        }\n\n        // LLMPopover trigger is only rendered when the regenerate action is\n        // available (requires onRegenerate + parentMessage + llmManager props).\n        const llmTrigger = aiMessage.getByTestId(\"llm-popover-trigger\");\n        if (await llmTrigger.isVisible()) {\n          await llmTrigger.hover();\n          await expectElementScreenshot(toolbar, {\n            name: `${namePrefix}-toolbar-llm-popover-hover-${theme}`,\n          });\n        }\n      }\n\n      const WEB_SEARCH_DOCUMENTS: MockDocument[] = [\n        {\n          document_id: \"web-doc-1\",\n          semantic_identifier: \"Onyx Documentation - Getting Started\",\n          link: \"https://docs.onyx.app/getting-started\",\n          source_type: \"web\",\n          blurb:\n            \"Onyx is an open-source enterprise search and AI platform. Deploy in minutes with Docker Compose.\",\n          is_internet: true,\n        },\n        {\n          document_id: \"web-doc-2\",\n          semantic_identifier: \"Onyx GitHub Repository\",\n          link: \"https://github.com/onyx-dot-app/onyx\",\n          source_type: \"web\",\n          blurb:\n            \"Open-source Gen-AI platform with 30+ connectors. MIT licensed community edition.\",\n          is_internet: true,\n        },\n        {\n          document_id: \"web-doc-3\",\n          semantic_identifier: \"Enterprise Search Comparison 2025\",\n          link: \"https://example.com/enterprise-search-comparison\",\n          source_type: \"web\",\n          blurb:\n            \"Comparing top enterprise search platforms including Onyx, Glean, and Coveo.\",\n          is_internet: true,\n        },\n      ];\n\n      const WEB_SEARCH_RESPONSE = `Based on my web search, here's what I found about Onyx:\n\nOnyx is an open-source enterprise search and AI platform that can be deployed in minutes using Docker Compose [[D1]](https://docs.onyx.app/getting-started). The project is hosted on GitHub and is MIT licensed for the community edition, with over 30 connectors available [[D2]](https://github.com/onyx-dot-app/onyx).\n\nIn comparisons with other enterprise search platforms, Onyx stands out for its open-source nature and self-hosted deployment option [[D3]](https://example.com/enterprise-search-comparison). Unlike proprietary alternatives, you maintain full control over your data and infrastructure.\n\nKey advantages include:\n\n- **Self-hosted**: Deploy on your own infrastructure\n- **Open source**: Full visibility into the codebase [[D2]](https://github.com/onyx-dot-app/onyx)\n- **Quick setup**: Get running in under 5 minutes [[D1]](https://docs.onyx.app/getting-started)\n- **Extensible**: 30+ pre-built connectors with custom connector support`;\n\n      test(\"web search response with citations renders correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n\n        await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n          await route.fulfill({\n            status: 200,\n            contentType: \"text/plain\",\n            body: buildMockSearchStream({\n              content: WEB_SEARCH_RESPONSE,\n              queries: [\"Onyx enterprise search platform overview\"],\n              documents: WEB_SEARCH_DOCUMENTS,\n              citations: {\n                1: \"web-doc-1\",\n                2: \"web-doc-2\",\n                3: \"web-doc-3\",\n              },\n              isInternetSearch: true,\n            }),\n          });\n        });\n\n        await sendMessage(page, \"Search the web for information about Onyx\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"open-source enterprise search\");\n        await expect(aiMessage).toContainText(\"Docker Compose\");\n        await expect(aiMessage).toContainText(\"MIT licensed\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-web-search-with-citations-${theme}`\n        );\n\n        await screenshotToolbarButtonHoverStates(page, \"chat-web-search\");\n      });\n\n      test(\"internal document search response renders correctly\", async ({\n        page,\n      }) => {\n        const internalDocs: MockDocument[] = [\n          {\n            document_id: \"confluence-doc-1\",\n            semantic_identifier: \"Q3 2025 Engineering Roadmap\",\n            link: \"https://company.atlassian.net/wiki/spaces/ENG/pages/123\",\n            source_type: \"confluence\",\n            blurb:\n              \"Engineering priorities for Q3 include platform stability, new connector integrations, and performance improvements.\",\n            is_internet: false,\n          },\n          {\n            document_id: \"gdrive-doc-1\",\n            semantic_identifier: \"Platform Architecture Overview.pdf\",\n            link: \"https://drive.google.com/file/d/abc123\",\n            source_type: \"google_drive\",\n            blurb:\n              \"Onyx platform architecture document covering microservices, data flow, and deployment topology.\",\n            is_internet: false,\n          },\n        ];\n\n        const internalResponse = `Based on your company's internal documents, here is the engineering roadmap:\n\nThe Q3 2025 priorities focus on three main areas [[D1]](https://company.atlassian.net/wiki/spaces/ENG/pages/123):\n\n1. **Platform stability** — Improving error handling and retry mechanisms across all connectors\n2. **New integrations** — Adding support for ServiceNow and Zendesk connectors\n3. **Performance** — Optimizing vector search latency and reducing indexing time\n\nThe platform architecture document provides additional context on how these improvements fit into the overall system design [[D2]](https://drive.google.com/file/d/abc123). The microservices architecture allows each component to be scaled independently.`;\n\n        await openChat(page);\n\n        await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n          await route.fulfill({\n            status: 200,\n            contentType: \"text/plain\",\n            body: buildMockSearchStream({\n              content: internalResponse,\n              queries: [\"Q3 engineering roadmap priorities\"],\n              documents: internalDocs,\n              citations: {\n                1: \"confluence-doc-1\",\n                2: \"gdrive-doc-1\",\n              },\n              isInternetSearch: false,\n            }),\n          });\n        });\n\n        await sendMessage(page, \"What are our engineering priorities for Q3?\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n        await expect(aiMessage).toContainText(\"Platform stability\");\n        await expect(aiMessage).toContainText(\"New integrations\");\n        await expect(aiMessage).toContainText(\"Performance\");\n\n        await screenshotChatContainer(\n          page,\n          `chat-internal-search-with-citations-${theme}`\n        );\n\n        await screenshotToolbarButtonHoverStates(page, \"chat-internal-search\");\n      });\n    });\n\n    test.describe(\"Header Levels\", () => {\n      const HEADINGS_RESPONSE = `# Getting Started\n\nThis is the introductory paragraph.\n\n## Installing the \\`onyx-sdk\\`\n\nFollow these steps to install the SDK.\n\n### Configuration Options\n\nSome details about configuration.\n\n#### The \\`max_results\\` Parameter\n\nSet \\`max_results\\` to limit the number of returned documents.`;\n\n      test(\"h1 through h4 headings with inline code render correctly\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, HEADINGS_RESPONSE);\n\n        await sendMessage(page, \"Show me all heading levels\");\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n\n        await expect(aiMessage.locator(\"h1\")).toContainText(\"Getting Started\");\n        await expect(aiMessage.locator(\"h2\")).toContainText(\"Installing the\");\n        await expect(\n          aiMessage.locator(\"h2\").locator('[data-testid=\"code-block\"]')\n        ).toContainText(\"onyx-sdk\");\n        await expect(aiMessage.locator(\"h3\")).toContainText(\n          \"Configuration Options\"\n        );\n        await expect(aiMessage.locator(\"h4\")).toContainText(\"Parameter\");\n        await expect(\n          aiMessage.locator(\"h4\").locator('[data-testid=\"code-block\"]')\n        ).toContainText(\"max_results\");\n\n        await expect(aiMessage.locator(\"h1\")).toHaveCount(1);\n        await expect(aiMessage.locator(\"h2\")).toHaveCount(1);\n        await expect(aiMessage.locator(\"h3\")).toHaveCount(1);\n        await expect(aiMessage.locator(\"h4\")).toHaveCount(1);\n\n        await screenshotChatContainer(\n          page,\n          `chat-heading-levels-h1-h4-${theme}`\n        );\n      });\n    });\n\n    test.describe(\"Message Interaction States\", () => {\n      test(\"hovering over user message shows action buttons\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, SHORT_AI_RESPONSE);\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n\n        const userMessage = page.locator(\"#onyx-human-message\").first();\n        await userMessage.hover();\n\n        const editButton = userMessage.getByTestId(\"HumanMessage/edit-button\");\n        await expect(editButton).toBeVisible({ timeout: 5000 });\n\n        await screenshotChatContainer(\n          page,\n          `chat-user-message-hover-state-${theme}`\n        );\n      });\n\n      test(\"AI message toolbar is visible after response completes\", async ({\n        page,\n      }) => {\n        await openChat(page);\n        await mockChatEndpoint(page, SHORT_AI_RESPONSE);\n\n        await sendMessage(page, SHORT_USER_MESSAGE);\n\n        const aiMessage = page.getByTestId(\"onyx-ai-message\").first();\n\n        const copyButton = aiMessage.getByTestId(\"AgentMessage/copy-button\");\n        const likeButton = aiMessage.getByTestId(\"AgentMessage/like-button\");\n        const dislikeButton = aiMessage.getByTestId(\n          \"AgentMessage/dislike-button\"\n        );\n\n        await expect(copyButton).toBeVisible({ timeout: 10000 });\n        await expect(likeButton).toBeVisible();\n        await expect(dislikeButton).toBeVisible();\n\n        await screenshotChatContainer(\n          page,\n          `chat-ai-message-with-toolbar-${theme}`\n        );\n      });\n    });\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/chat/chat_session_not_found.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { THEMES, setThemeBeforeNavigation } from \"@tests/e2e/utils/theme\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\nconst NON_EXISTENT_CHAT_ID = \"00000000-0000-0000-0000-000000000000\";\n\nfor (const theme of THEMES) {\n  test.describe(`Chat session not found (${theme} mode)`, () => {\n    test.beforeEach(async ({ page }) => {\n      await setThemeBeforeNavigation(page, theme);\n    });\n\n    test(\"should show 404 page for a non-existent chat session\", async ({\n      page,\n    }) => {\n      await page.goto(`/app?chatId=${NON_EXISTENT_CHAT_ID}`);\n\n      await expect(page.getByText(\"Chat not found\")).toBeVisible({\n        timeout: 10000,\n      });\n      await expect(\n        page.getByText(\"This chat session doesn't exist or has been deleted.\")\n      ).toBeVisible();\n      await expect(\n        page.getByRole(\"link\", { name: \"Start a new chat\" })\n      ).toBeVisible();\n\n      // Sidebar should still be visible\n      await expect(page.getByTestId(\"AppSidebar/new-session\")).toBeVisible();\n\n      const container = page.locator(\"[data-main-container]\");\n      await expect(container).toBeVisible();\n      await expectElementScreenshot(container, {\n        name: `chat-session-not-found-${theme}`,\n      });\n    });\n\n    test(\"should navigate to /app when clicking Start a new chat\", async ({\n      page,\n    }) => {\n      await page.goto(`/app?chatId=${NON_EXISTENT_CHAT_ID}`);\n\n      await expect(page.getByText(\"Chat not found\")).toBeVisible({\n        timeout: 10000,\n      });\n\n      await page.getByRole(\"link\", { name: \"Start a new chat\" }).click();\n      await page.waitForLoadState(\"networkidle\");\n\n      await expect(page).toHaveURL(\"/app\");\n      await expect(page.getByText(\"Chat not found\")).toBeHidden();\n    });\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/chat/current_agent.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { dragElementAbove, dragElementBelow } from \"@tests/e2e/utils/dragUtils\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\nimport { createAgent, pinAgentByName } from \"@tests/e2e/utils/agentUtils\";\n\n// TODO (chris): figure out why this test is flakey\ntest.skip(\"Assistant Drag and Drop\", async ({ page }) => {\n  await page.context().clearCookies();\n  await loginAsRandomUser(page);\n\n  // Navigate to the chat page\n  await page.goto(\"/app\");\n\n  // Ensure at least two assistants exist for drag-and-drop\n  const ts = Date.now();\n  const nameA = `E2E Assistant A ${ts}`;\n  const nameB = `E2E Assistant B ${ts}`;\n  const nameC = `E2E Assistant C ${ts}`;\n  await createAgent(page, {\n    name: nameA,\n    description: \"E2E-created assistant A\",\n    instructions: \"Assistant A instructions\",\n  });\n  await pinAgentByName(page, nameA);\n  await expect(\n    page.locator('[data-testid^=\"assistant-[\"]').filter({ hasText: nameA })\n  ).toBeVisible();\n\n  await createAgent(page, {\n    name: nameB,\n    description: \"E2E-created assistant B\",\n    instructions: \"Assistant B instructions\",\n  });\n  await pinAgentByName(page, nameB);\n  await expect(\n    page.locator('[data-testid^=\"assistant-[\"]').filter({ hasText: nameB })\n  ).toBeVisible();\n\n  await createAgent(page, {\n    name: nameC,\n    description: \"E2E-created assistant C\",\n    instructions: \"Assistant C instructions\",\n  });\n  await pinAgentByName(page, nameC);\n  await expect(\n    page.locator('[data-testid^=\"assistant-[\"]').filter({ hasText: nameC })\n  ).toBeVisible();\n\n  // Helper function to get the current order of assistants\n  const getAssistantOrder = async () => {\n    const assistants = await page.$$('[data-testid^=\"assistant-[\"]');\n    const names = await Promise.all(\n      assistants.map(async (assistant) => {\n        const nameEl = await assistant.$(\"span.line-clamp-1\");\n        const txt = nameEl ? await nameEl.textContent() : null;\n        return (txt || \"\").trim();\n      })\n    );\n    return names;\n  };\n\n  // Get the initial order\n  const initialOrder = await getAssistantOrder();\n\n  // Drag second assistant above first\n  const secondAssistant = page.locator('[data-testid^=\"assistant-[\"]').nth(1);\n  const firstAssistant = page.locator('[data-testid^=\"assistant-[\"]').nth(0);\n\n  await dragElementAbove(secondAssistant, firstAssistant, page);\n\n  // Check new order\n  // wait a second to make sure that the order has been applied\n  await page.waitForTimeout(500);\n  const orderAfterDragUp = await getAssistantOrder();\n  expect(orderAfterDragUp[0]).toBe(initialOrder[1]);\n  expect(orderAfterDragUp[1]).toBe(initialOrder[0]);\n\n  // Drag last assistant to second position\n  const assistants = page.locator('[data-testid^=\"assistant-[\"]');\n  const lastIndex = (await assistants.count()) - 1;\n  const lastAssistant = assistants.nth(lastIndex);\n  const secondPosition = assistants.nth(1);\n\n  await page.waitForTimeout(3000);\n  await dragElementBelow(lastAssistant, secondPosition, page);\n\n  // Check new order\n  // wait a second to make sure that the order has been applied\n  await page.waitForTimeout(500);\n  const orderAfterDragDown = await getAssistantOrder();\n  expect(orderAfterDragDown[1]).toBe(initialOrder[lastIndex]);\n\n  // Refresh and verify order\n  await page.reload();\n  const orderAfterRefresh = await getAssistantOrder();\n  expect(orderAfterRefresh).toEqual(orderAfterDragDown);\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/default_agent.spec.ts",
    "content": "import { GREETING_MESSAGES } from \"@/lib/chat/greetingMessages\";\nimport { test, expect } from \"@playwright/test\";\nimport { loginAsRandomUser, loginAs } from \"@tests/e2e/utils/auth\";\nimport {\n  sendMessage,\n  startNewChat,\n  verifyAgentIsChosen,\n  verifyDefaultAgentIsChosen,\n} from \"@tests/e2e/utils/chatActions\";\nimport {\n  TOOL_IDS,\n  openActionManagement,\n  waitForUnifiedGreeting,\n} from \"@tests/e2e/utils/tools\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n// Tool-related test selectors now imported from shared utils\n\ntest.describe(\"Default Agent Tests\", () => {\n  let imageGenConfigId: string | null = null;\n\n  test.beforeAll(async ({ browser }) => {\n    // Create image generation config as admin so ImageGenerationTool becomes available\n    // This is needed because the Create Agent form enables Image Generation by default\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    await adminPage.goto(\"http://localhost:3000/app\");\n    await adminPage.waitForLoadState(\"networkidle\");\n\n    const apiClient = new OnyxApiClient(adminPage.request);\n    try {\n      imageGenConfigId = await apiClient.createImageGenerationConfig(\n        `test-default-assistant-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create image generation config: ${error}`);\n    }\n\n    await adminContext.close();\n  });\n\n  test.afterAll(async ({ browser }) => {\n    // Cleanup the image generation config\n    if (imageGenConfigId) {\n      const adminContext = await browser.newContext({\n        storageState: \"admin_auth.json\",\n      });\n      const adminPage = await adminContext.newPage();\n      await adminPage.goto(\"http://localhost:3000/app\");\n      await adminPage.waitForLoadState(\"networkidle\");\n\n      const apiClient = new OnyxApiClient(adminPage.request);\n      await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n\n      await adminContext.close();\n    }\n  });\n\n  test.beforeEach(async ({ page }) => {\n    // Clear cookies and log in as a random user\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    // Navigate to the chat page\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  test.describe(\"Greeting Message Display\", () => {\n    test(\"should display greeting message when opening new chat with default agent\", async ({\n      page,\n    }) => {\n      // Look for greeting message - should be one from the predefined list\n      const greeting = await waitForUnifiedGreeting(page);\n      expect(GREETING_MESSAGES).toContain(greeting.trim());\n    });\n\n    test(\"greeting message should remain consistent during session\", async ({\n      page,\n    }) => {\n      // Get initial greeting\n      const initialGreeting = await waitForUnifiedGreeting(page);\n\n      // Reload the page\n      await page.reload();\n      await page.waitForLoadState(\"networkidle\");\n\n      // Get greeting after reload\n      const greetingAfterReload = await waitForUnifiedGreeting(page);\n\n      // Both greetings should be valid but might differ after reload\n      expect(GREETING_MESSAGES).toContain(initialGreeting?.trim());\n      expect(GREETING_MESSAGES).toContain(greetingAfterReload?.trim());\n    });\n\n    test(\"greeting should only appear for default agent\", async ({ page }) => {\n      // First verify greeting appears for default agent\n      const greetingElement = await page.waitForSelector(\n        '[data-testid=\"onyx-logo\"]',\n        { timeout: 5000 }\n      );\n      expect(greetingElement).toBeTruthy();\n\n      // Create a custom agent to test non-default behavior\n      await page.getByTestId(\"AppSidebar/more-agents\").click();\n      await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n      await page\n        .locator('input[name=\"name\"]')\n        .waitFor({ state: \"visible\", timeout: 10000 });\n      await page.locator('input[name=\"name\"]').fill(\"Custom Test Agent\");\n      await page\n        .locator('textarea[name=\"description\"]')\n        .fill(\"Test Description\");\n      await page\n        .locator('textarea[name=\"instructions\"]')\n        .fill(\"Test Instructions\");\n      await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n      // Wait for agent to be created and selected\n      await verifyAgentIsChosen(page, \"Custom Test Agent\");\n\n      // Greeting should NOT appear for custom agent\n      const customGreeting = await page.$('[data-testid=\"onyx-logo\"]');\n      expect(customGreeting).toBeNull();\n    });\n  });\n\n  test.describe(\"Default Agent Branding\", () => {\n    test(\"should display Onyx logo for default agent\", async ({ page }) => {\n      // Look for Onyx logo\n      const logoElement = await page.waitForSelector(\n        '[data-testid=\"onyx-logo\"]',\n        { timeout: 5000 }\n      );\n      expect(logoElement).toBeTruthy();\n\n      // Should NOT show agent name for default agent\n      const agentNameElement = await page.$(\n        '[data-testid=\"agent-name-display\"]'\n      );\n      expect(agentNameElement).toBeNull();\n    });\n\n    test(\"custom agents should show name and icon instead of logo\", async ({\n      page,\n    }) => {\n      // Create a custom agent\n      await page.getByTestId(\"AppSidebar/more-agents\").click();\n      await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n      await page\n        .locator('input[name=\"name\"]')\n        .waitFor({ state: \"visible\", timeout: 10000 });\n      await page.locator('input[name=\"name\"]').fill(\"Custom Agent\");\n      await page\n        .locator('textarea[name=\"description\"]')\n        .fill(\"Test Description\");\n      await page\n        .locator('textarea[name=\"instructions\"]')\n        .fill(\"Test Instructions\");\n      await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n      // Wait for agent to be created and selected\n      await verifyAgentIsChosen(page, \"Custom Agent\");\n\n      // Should show agent name and icon, not Onyx logo\n      const agentNameElement = await page.waitForSelector(\n        '[data-testid=\"agent-name-display\"]',\n        { timeout: 5000 }\n      );\n      const nameText = await agentNameElement.textContent();\n      expect(nameText).toContain(\"Custom Agent\");\n\n      // Onyx logo should NOT be shown\n      const logoElement = await page.$('[data-testid=\"onyx-logo\"]');\n      expect(logoElement).toBeNull();\n    });\n  });\n\n  test.describe(\"Starter Messages\", () => {\n    test(\"default agent should NOT have starter messages\", async ({ page }) => {\n      // Check that starter messages container does not exist for default agent\n      const starterMessagesContainer = await page.$(\n        '[data-testid=\"starter-messages\"]'\n      );\n      expect(starterMessagesContainer).toBeNull();\n\n      // Verify no starter message buttons exist\n      const starterButtons = await page.$$('[data-testid^=\"starter-message-\"]');\n      expect(starterButtons.length).toBe(0);\n    });\n\n    test(\"custom agents should display starter messages\", async ({ page }) => {\n      // Create a custom agent with starter messages\n      await page.getByTestId(\"AppSidebar/more-agents\").click();\n      await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n      await page\n        .locator('input[name=\"name\"]')\n        .waitFor({ state: \"visible\", timeout: 10000 });\n      await page.locator('input[name=\"name\"]').fill(\"Test Agent with Starters\");\n      await page\n        .locator('textarea[name=\"description\"]')\n        .fill(\"Test Description\");\n      await page\n        .locator('textarea[name=\"instructions\"]')\n        .fill(\"Test Instructions\");\n\n      // Add starter messages (if the UI supports it)\n      // For now, we'll create without starter messages and check the behavior\n      await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n      // Wait for assistant to be created and selected\n      await verifyAgentIsChosen(page, \"Test Agent with Starters\");\n\n      // Starter messages container might exist but be empty for custom agents\n      const starterMessagesContainer = await page.$(\n        '[data-testid=\"starter-messages\"]'\n      );\n      // It's okay if it exists but has no messages, or doesn't exist at all\n      if (starterMessagesContainer) {\n        const starterButtons = await page.$$(\n          '[data-testid^=\"starter-message-\"]'\n        );\n        // Custom agent without configured starter messages should have none\n        expect(starterButtons.length).toBe(0);\n      }\n    });\n  });\n\n  test.describe(\"Agent Selection\", () => {\n    test(\"default agent should be selected for new chats\", async ({ page }) => {\n      // Verify the input placeholder indicates default agent (Onyx)\n      await verifyDefaultAgentIsChosen(page);\n    });\n\n    test(\"default agent should NOT appear in agent selector\", async ({\n      page,\n    }) => {\n      // Open agent selector\n      await page.getByTestId(\"AppSidebar/more-agents\").click();\n\n      // Wait for modal or assistant list to appear\n      // The selector might be in a modal or dropdown.\n      await page\n        .getByLabel(\"AgentsPage/new-agent-button\")\n        .waitFor({ state: \"visible\", timeout: 5000 });\n\n      // Look for default agent by name - it should NOT be there\n      const assistantElements = await page.$$('[data-testid^=\"agent-\"]');\n      const assistantTexts = await Promise.all(\n        assistantElements.map((el) => el.textContent())\n      );\n\n      // Check that the default agent is not in the list\n      const hasDefaultAssistant = assistantTexts.some(\n        (text) =>\n          text?.includes(\"Assistant\") &&\n          !text?.includes(\"Test\") &&\n          !text?.includes(\"Custom\")\n      );\n      expect(hasDefaultAssistant).toBe(false);\n\n      // Close the modal/selector\n      await page.keyboard.press(\"Escape\");\n    });\n\n    test(\"should be able to switch from default to custom agent\", async ({\n      page,\n    }) => {\n      // Create a custom agent\n      await page.getByTestId(\"AppSidebar/more-agents\").click();\n      await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n      await page\n        .locator('input[name=\"name\"]')\n        .waitFor({ state: \"visible\", timeout: 10000 });\n      await page.locator('input[name=\"name\"]').fill(\"Switch Test Agent\");\n      await page\n        .locator('textarea[name=\"description\"]')\n        .fill(\"Test Description\");\n      await page\n        .locator('textarea[name=\"instructions\"]')\n        .fill(\"Test Instructions\");\n      await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n      // Verify switched to custom agent\n      await verifyAgentIsChosen(page, \"Switch Test Agent\");\n\n      // Start new chat to go back to default\n      await startNewChat(page);\n\n      // Should be back to default agent\n      await verifyDefaultAgentIsChosen(page);\n    });\n  });\n\n  test.describe(\"Action Management Toggle\", () => {\n    let imageGenConfigId: string | null = null;\n\n    test.beforeAll(async ({ browser }) => {\n      // Create image generation config as admin so ImageGenerationTool becomes available\n      // Use saved admin auth state instead of logging in again\n      const adminContext = await browser.newContext({\n        storageState: \"admin_auth.json\",\n      });\n      const adminPage = await adminContext.newPage();\n      await adminPage.goto(\"http://localhost:3000/app\");\n      await adminPage.waitForLoadState(\"networkidle\");\n\n      const apiClient = new OnyxApiClient(adminPage.request);\n      try {\n        imageGenConfigId = await apiClient.createImageGenerationConfig(\n          `test-action-toggle-${Date.now()}`\n        );\n      } catch (error) {\n        console.warn(`Failed to create image generation config: ${error}`);\n      }\n\n      await adminContext.close();\n    });\n\n    test.afterAll(async ({ browser }) => {\n      // Cleanup the image generation config\n      if (imageGenConfigId) {\n        const adminContext = await browser.newContext({\n          storageState: \"admin_auth.json\",\n        });\n        const adminPage = await adminContext.newPage();\n        await adminPage.goto(\"http://localhost:3000/app\");\n        await adminPage.waitForLoadState(\"networkidle\");\n\n        const apiClient = new OnyxApiClient(adminPage.request);\n        await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n\n        await adminContext.close();\n      }\n    });\n\n    test(\"should display action management toggle\", async ({ page }) => {\n      // Look for action management toggle button\n      const actionToggle = await page.waitForSelector(TOOL_IDS.actionToggle, {\n        timeout: 5000,\n      });\n      expect(actionToggle).toBeTruthy();\n    });\n\n    test(\"should show web-search + image-generation tools options when clicked\", async ({\n      page,\n    }) => {\n      // This test requires admin permissions to create web search provider\n      // Note: Image generation config is already created by beforeAll\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"domcontentloaded\");\n\n      const apiClient = new OnyxApiClient(page.request);\n      let webSearchProviderId: number | null = null;\n\n      try {\n        // Set up a web search provider so the tool is available\n        webSearchProviderId = await apiClient.createWebSearchProvider(\n          \"exa\",\n          `Test Web Search Provider ${Date.now()}`\n        );\n      } catch (error) {\n        console.warn(\n          `Failed to create web search provider for test: ${error}. Test may fail.`\n        );\n      }\n\n      // Enable the tools in default agent config via API\n      // Get current tools to find their IDs\n      const toolsListResp = await page.request.get(\n        \"http://localhost:3000/api/tool\"\n      );\n      const allTools = await toolsListResp.json();\n      const toolIdsByCodeId: { [key: string]: number } = {};\n      allTools.forEach((tool: any) => {\n        if (tool.in_code_tool_id) {\n          toolIdsByCodeId[tool.in_code_tool_id] = tool.id;\n        }\n      });\n\n      // Get current config\n      const currentConfigResp = await page.request.get(\n        \"http://localhost:3000/api/admin/default-assistant/configuration\"\n      );\n      const currentConfig = await currentConfigResp.json();\n\n      // Add Web Search and Image Generation tool IDs\n      const toolIdsToEnable = [\n        ...(currentConfig.tool_ids || []),\n        toolIdsByCodeId[\"WebSearchTool\"],\n        toolIdsByCodeId[\"ImageGenerationTool\"],\n      ].filter((id) => id !== undefined);\n\n      // Deduplicate\n      const uniqueToolIds = Array.from(new Set(toolIdsToEnable));\n\n      // Update config via API\n      await page.request.patch(\n        \"http://localhost:3000/api/admin/default-assistant\",\n        {\n          data: { tool_ids: uniqueToolIds },\n        }\n      );\n\n      console.log(`[test] Enabled tools via API: ${uniqueToolIds}`);\n\n      // Go back to chat\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"domcontentloaded\");\n\n      // Will NOT show the `internal-search` option since that will be excluded when there are no connectors connected.\n      // (Since we removed pre-seeded docs, we will have NO connectors connected on a fresh install; therefore, `internal-search` will not be available.)\n      await openActionManagement(page);\n      await expect(page.locator(TOOL_IDS.webSearchOption)).toBeVisible({\n        timeout: 10000,\n      });\n      await expect(page.locator(TOOL_IDS.imageGenerationOption)).toBeVisible({\n        timeout: 10000,\n      });\n\n      // Clean up web search provider only (image gen config is managed by beforeAll/afterAll)\n      if (webSearchProviderId !== null) {\n        try {\n          await apiClient.deleteWebSearchProvider(webSearchProviderId);\n        } catch (error) {\n          console.warn(\n            `Failed to delete web search provider ${webSearchProviderId}: ${error}`\n          );\n        }\n      }\n    });\n\n    test(\"should be able to toggle tools on and off\", async ({ page }) => {\n      // Click action management toggle\n      await page.click(TOOL_IDS.actionToggle);\n\n      // Wait for tool options\n      await page.waitForSelector(TOOL_IDS.options, {\n        timeout: 5000,\n      });\n\n      // Find a checkbox/toggle within the image-generation tool option\n      const imageGenerationToolOption = await page.$(\n        TOOL_IDS.imageGenerationOption\n      );\n      expect(imageGenerationToolOption).toBeTruthy();\n\n      // Look for a checkbox or switch within the tool option\n      const imageGenerationToggle = await imageGenerationToolOption?.$(\n        TOOL_IDS.toggleInput\n      );\n\n      if (imageGenerationToggle) {\n        const initialState = await imageGenerationToggle.isChecked();\n        await imageGenerationToggle.click();\n\n        // Verify state changed\n        const newState = await imageGenerationToggle.isChecked();\n        expect(newState).toBe(!initialState);\n\n        // Toggle it back\n        await imageGenerationToggle.click();\n        const finalState = await imageGenerationToggle.isChecked();\n        expect(finalState).toBe(initialState);\n      } else {\n        // If no toggle found, just click the option itself\n        await imageGenerationToolOption?.click();\n        // Check if the option has some visual state change\n        // This is a fallback behavior if toggles work differently\n      }\n    });\n\n    test(\"tool toggle state should persist across page refresh\", async ({\n      page,\n    }) => {\n      // Click action management toggle\n      await page.click(TOOL_IDS.actionToggle);\n\n      // Wait for tool options\n      await page.waitForSelector(TOOL_IDS.options, {\n        timeout: 5000,\n      });\n\n      // Find the internet image-generation tool option and its toggle\n      const imageGenerationToolOption = await page.$(\n        TOOL_IDS.imageGenerationOption\n      );\n      expect(imageGenerationToolOption).toBeTruthy();\n\n      const imageGenerationToggle = await imageGenerationToolOption?.$(\n        TOOL_IDS.toggleInput\n      );\n\n      let toggledState = false;\n      if (imageGenerationToggle) {\n        await imageGenerationToggle.click();\n        toggledState = await imageGenerationToggle.isChecked();\n      } else {\n        // Click the option itself if no toggle found\n        await imageGenerationToolOption?.click();\n        // Assume toggled if clicked\n        toggledState = true;\n      }\n\n      // Reload page\n      await page.reload();\n      await page.waitForLoadState(\"networkidle\");\n\n      // Open action management again\n      await page.click(TOOL_IDS.actionToggle);\n      await page.waitForSelector(TOOL_IDS.options, {\n        timeout: 5000,\n      });\n\n      // Check if state persisted\n      const imageGenerationToolOptionAfterReload = await page.$(\n        TOOL_IDS.imageGenerationOption\n      );\n      const imageGenerationToggleAfterReload =\n        await imageGenerationToolOptionAfterReload?.$(TOOL_IDS.toggleInput);\n\n      if (imageGenerationToggleAfterReload) {\n        const stateAfterReload =\n          await imageGenerationToggleAfterReload.isChecked();\n        expect(stateAfterReload).toBe(toggledState);\n      }\n    });\n  });\n});\n\ntest.describe(\"End-to-End Default Agent Flow\", () => {\n  let imageGenConfigId: string | null = null;\n\n  test.beforeAll(async ({ browser }) => {\n    // Create image generation config as admin so ImageGenerationTool becomes available\n    // Use saved admin auth state instead of logging in again\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    await adminPage.goto(\"http://localhost:3000/app\");\n    await adminPage.waitForLoadState(\"networkidle\");\n\n    const apiClient = new OnyxApiClient(adminPage.request);\n    try {\n      imageGenConfigId = await apiClient.createImageGenerationConfig(\n        `test-e2e-journey-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create image generation config: ${error}`);\n    }\n\n    await adminContext.close();\n  });\n\n  test.afterAll(async ({ browser }) => {\n    // Cleanup the image generation config\n    if (imageGenConfigId) {\n      const adminContext = await browser.newContext({\n        storageState: \"admin_auth.json\",\n      });\n      const adminPage = await adminContext.newPage();\n      await adminPage.goto(\"http://localhost:3000/app\");\n      await adminPage.waitForLoadState(\"networkidle\");\n\n      const apiClient = new OnyxApiClient(adminPage.request);\n      await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n\n      await adminContext.close();\n    }\n  });\n\n  test(\"complete user journey with default agent\", async ({ page }) => {\n    // Clear cookies and log in as a random user\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    // Navigate to the chat page\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Verify greeting message appears\n    await expect(page.locator('[data-testid=\"onyx-logo\"]')).toBeVisible();\n\n    // Verify Onyx logo is displayed\n    await expect(page.locator('[data-testid=\"onyx-logo\"]')).toBeVisible();\n\n    // Send a message using the chat input\n    await sendMessage(page, \"Hello, can you help me?\");\n\n    // Open action management and verify tools\n    await openActionManagement(page);\n\n    // Close action management\n    await page.keyboard.press(\"Escape\");\n\n    // Start a new chat\n    await startNewChat(page);\n\n    // Verify we're back to default agent with greeting\n    await expect(page.locator('[data-testid=\"onyx-logo\"]')).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/default_app_mode.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\ntest.describe(\"Default App Mode\", () => {\n  test(\"loads persisted Search mode after refresh\", async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    // Arrange\n    const apiClient = new OnyxApiClient(page.request);\n    const ccPairId = await apiClient.createFileConnector(\n      \"Default App Mode Test Connector\"\n    );\n    await apiClient.setDefaultAppMode(\"SEARCH\");\n\n    try {\n      // Act\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      // Assert\n      const appModeButton = page.getByLabel(\"Change app mode\");\n      await appModeButton.waitFor({ state: \"visible\", timeout: 10000 });\n      await expect(appModeButton).toHaveText(/Search/);\n    } finally {\n      await apiClient.setDefaultAppMode(\"CHAT\");\n      await apiClient.deleteCCPair(ccPairId);\n    }\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/file_preview_modal.spec.ts",
    "content": "import { test, expect, Page } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"../utils/auth\";\nimport * as fs from \"fs\";\nimport * as path from \"path\";\n\n/**\n * Builds a newline-delimited JSON stream body matching the packet\n * format that useChatController expects:\n *\n * 1. MessageResponseIDInfo — identifies the user/assistant messages\n * 2. Packet-wrapped streaming objects ({placement, obj}) — the actual content\n * 3. BackendMessage — the final completed message\n *\n * Each line is a raw JSON object parsed by handleSSEStream.\n */\nfunction buildMockStream(messageContent: string): string {\n  const packets = [\n    // 1. Message ID info — tells the frontend the message IDs\n    JSON.stringify({\n      user_message_id: 1,\n      reserved_assistant_message_id: 2,\n    }),\n    // 2. Streaming content packets wrapped in {placement, obj}\n    JSON.stringify({\n      placement: { turn_index: 0 },\n      obj: {\n        type: \"message_start\",\n        id: \"mock-message-id\",\n        content: \"\",\n        final_documents: null,\n      },\n    }),\n    JSON.stringify({\n      placement: { turn_index: 0 },\n      obj: {\n        type: \"message_delta\",\n        content: messageContent,\n      },\n    }),\n    JSON.stringify({\n      placement: { turn_index: 0 },\n      obj: {\n        type: \"message_end\",\n      },\n    }),\n    JSON.stringify({\n      placement: { turn_index: 0 },\n      obj: {\n        type: \"stop\",\n        stop_reason: \"finished\",\n      },\n    }),\n    // 3. Final BackendMessage — the completed message record\n    JSON.stringify({\n      message_id: 2,\n      message_type: \"assistant\",\n      research_type: null,\n      parent_message: 1,\n      latest_child_message: null,\n      message: messageContent,\n      rephrased_query: null,\n      context_docs: null,\n      time_sent: new Date().toISOString(),\n      citations: {},\n      files: [],\n      tool_call: null,\n      overridden_model: null,\n    }),\n  ];\n  return packets.join(\"\\n\") + \"\\n\";\n}\n\n/**\n * Sends a message while intercepting the backend response with\n * a controlled mock stream. Returns once the AI message renders.\n */\nasync function sendMessageWithMockResponse(\n  page: Page,\n  userMessage: string,\n  mockResponseContent: string\n) {\n  const existingMessageCount = await page\n    .locator('[data-testid=\"onyx-ai-message\"]')\n    .count();\n\n  // Intercept the send-chat-message endpoint and return our mock stream\n  await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n    await route.fulfill({\n      status: 200,\n      contentType: \"application/json\",\n      body: buildMockStream(mockResponseContent),\n    });\n  });\n\n  await page.locator(\"#onyx-chat-input-textarea\").click();\n  await page.locator(\"#onyx-chat-input-textarea\").fill(userMessage);\n  await page.locator(\"#onyx-chat-input-send-button\").click();\n\n  // Wait for the AI message to appear\n  await expect(page.locator('[data-testid=\"onyx-ai-message\"]')).toHaveCount(\n    existingMessageCount + 1,\n    { timeout: 30000 }\n  );\n\n  // Unroute so future requests go through normally\n  await page.unroute(\"**/api/chat/send-chat-message\");\n}\n\nconst MOCK_FILE_ID = \"00000000-0000-0000-0000-000000000001\";\n\ntest.describe(\"File preview modal from chat file links\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  test(\"clicking a text file link opens the TextViewModal\", async ({\n    page,\n  }) => {\n    const mockContent = `Here is your file: [notes.txt](/api/chat/file/${MOCK_FILE_ID})`;\n\n    // Mock the file endpoint to return text content\n    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"text/plain\",\n        body: \"Hello from the mock file!\",\n      });\n    });\n\n    await sendMessageWithMockResponse(page, \"Give me the file\", mockContent);\n\n    // Find the link in the AI message and click it\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const fileLink = aiMessage.locator(\"a\").filter({ hasText: \"notes.txt\" });\n    await expect(fileLink).toBeVisible({ timeout: 5000 });\n    await fileLink.click();\n\n    // Verify the modal opens\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Verify the file name is shown in the header\n    await expect(modal.getByText(\"notes.txt\")).toBeVisible();\n\n    // Verify the download link exists\n    await expect(modal.locator(\"a[download]\")).toBeVisible();\n\n    // Verify the file content is rendered\n    await expect(modal.getByText(\"Hello from the mock file!\")).toBeVisible();\n  });\n\n  test(\"clicking a code file link opens the PreviewModal with syntax highlighting\", async ({\n    page,\n  }) => {\n    const mockContent = `Here is your script: [app.py](/api/chat/file/${MOCK_FILE_ID})`;\n    const pythonCode = 'def hello():\\n    print(\"Hello, world!\")';\n\n    // Mock the file endpoint to return Python code\n    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/octet-stream\",\n        body: pythonCode,\n      });\n    });\n\n    await sendMessageWithMockResponse(page, \"Give me the script\", mockContent);\n\n    // Find the link in the AI message and click it\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const fileLink = aiMessage.locator(\"a\").filter({ hasText: \"app.py\" });\n    await expect(fileLink).toBeVisible({ timeout: 5000 });\n    await fileLink.click();\n\n    // Verify the PreviewModal opens\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Verify the file name is shown in the header\n    await expect(modal.getByText(\"app.py\")).toBeVisible();\n\n    // Verify the header description shows language and line info\n    await expect(\n      modal\n        .locator(\"div\")\n        .filter({ hasText: /python/i })\n        .first()\n    ).toBeVisible();\n    await expect(\n      modal\n        .locator(\"div\")\n        .filter({ hasText: /2 lines/ })\n        .first()\n    ).toBeVisible();\n\n    // Verify the code content is rendered\n    await expect(modal.getByText(\"Hello, world!\")).toBeVisible();\n\n    // Verify the download icon button exists (tooltip-only, no visible text)\n    const downloadButton = modal.locator(\"button\").last();\n    await expect(downloadButton).toBeVisible();\n\n    // Hover to verify the download tooltip appears\n    await downloadButton.hover();\n    await expect(page.getByText(\"Download\")).toBeVisible({ timeout: 3000 });\n  });\n\n  test(\"download button triggers file download\", async ({ page }) => {\n    const mockContent = `Here: [data.csv](/api/chat/file/${MOCK_FILE_ID})`;\n\n    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"text/csv\",\n        body: \"name,age\\nAlice,30\\nBob,25\",\n      });\n    });\n\n    await sendMessageWithMockResponse(page, \"Give me the csv\", mockContent);\n\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const fileLink = aiMessage.locator(\"a\").filter({ hasText: \"data.csv\" });\n    await expect(fileLink).toBeVisible({ timeout: 5000 });\n    await fileLink.click();\n\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Click the download link and verify a download starts\n    const downloadPromise = page.waitForEvent(\"download\");\n    await modal.locator(\"a[download]\").last().click();\n    const download = await downloadPromise;\n\n    expect(download.suggestedFilename()).toContain(\"data.csv\");\n  });\n\n  test(\"clicking a .docx file link opens the preview modal and renders content\", async ({\n    page,\n  }) => {\n    const mockContent = `Here is your document: [report.docx](/api/chat/file/${MOCK_FILE_ID})`;\n\n    // Serve a real .docx fixture so docx-preview can parse it\n    const docxBuffer = fs.readFileSync(\n      path.join(__dirname, \"../fixtures/three_images.docx\")\n    );\n\n    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType:\n          \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\",\n        body: docxBuffer,\n      });\n    });\n\n    await sendMessageWithMockResponse(\n      page,\n      \"Give me the document\",\n      mockContent\n    );\n\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const fileLink = aiMessage.locator(\"a\").filter({ hasText: \"report.docx\" });\n    await expect(fileLink).toBeVisible({ timeout: 5000 });\n    await fileLink.click();\n\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Verify the file name is shown in the header\n    await expect(modal.getByText(\"report.docx\")).toBeVisible();\n\n    // Verify the header describes it as a Word Document\n    await expect(\n      modal\n        .locator(\"div\")\n        .filter({ hasText: /Word Document/ })\n        .first()\n    ).toBeVisible();\n\n    // Verify docx-preview rendered content into the body container\n    await expect(modal.locator(\".docx-host\")).toBeVisible({ timeout: 10000 });\n\n    // Verify the download button exists\n    await expect(modal.locator(\"a[download]\")).toBeVisible();\n  });\n\n  test(\"clicking a legacy .doc file link shows unsupported message\", async ({\n    page,\n  }) => {\n    const mockContent = `Here is your document: [old_report.doc](/api/chat/file/${MOCK_FILE_ID})`;\n\n    await page.route(`**/api/chat/file/${MOCK_FILE_ID}`, async (route) => {\n      await route.fulfill({\n        status: 200,\n        contentType: \"application/msword\",\n        body: \"fake binary content\",\n      });\n    });\n\n    await sendMessageWithMockResponse(\n      page,\n      \"Give me the old document\",\n      mockContent\n    );\n\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const fileLink = aiMessage\n      .locator(\"a\")\n      .filter({ hasText: \"old_report.doc\" });\n    await expect(fileLink).toBeVisible({ timeout: 5000 });\n    await fileLink.click();\n\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Verify the file name is shown\n    await expect(modal.getByText(\"old_report.doc\")).toBeVisible();\n\n    // Verify the legacy .doc message is shown\n    await expect(\n      modal.getByText(/Legacy .doc format cannot be previewed/)\n    ).toBeVisible();\n\n    // Verify download button is still available\n    await expect(modal.locator(\"a[download]\")).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/input_focus_retention.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\n\ntest.describe(`Chat Input Focus Retention`, () => {\n  test.beforeEach(async ({ page }, testInfo) => {\n    await page.context().clearCookies();\n    await loginAsWorkerUser(page, testInfo.workerIndex);\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  test(\"clicking empty space retains focus on chat input\", async ({ page }) => {\n    const textarea = page.locator(\"#onyx-chat-input-textarea\");\n    await textarea.waitFor({ state: \"visible\", timeout: 10000 });\n\n    // Focus the textarea and type something\n    await textarea.focus();\n    await textarea.fill(\"test message\");\n    await expect(textarea).toBeFocused();\n\n    // Click on the main container's empty space (top-left corner)\n    const container = page.locator(\"[data-main-container]\");\n    await container.click({ position: { x: 10, y: 10 } });\n\n    // Focus should remain on the textarea\n    await expect(textarea).toBeFocused();\n  });\n\n  test(\"clicking interactive elements still moves focus away\", async ({\n    page,\n  }) => {\n    const textarea = page.locator(\"#onyx-chat-input-textarea\");\n    await textarea.waitFor({ state: \"visible\", timeout: 10000 });\n\n    // Focus the textarea\n    await textarea.focus();\n    await expect(textarea).toBeFocused();\n\n    // Click on an interactive element inside the container\n    const button = page.locator(\"[data-main-container] button\").first();\n    await button.waitFor({ state: \"visible\", timeout: 5000 });\n    await button.click();\n\n    // Focus should have moved away from the textarea\n    await expect(textarea).not.toBeFocused();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/live_agent.spec.ts",
    "content": "import { test } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\nimport {\n  sendMessage,\n  startNewChat,\n  verifyAgentIsChosen,\n  verifyDefaultAgentIsChosen,\n} from \"@tests/e2e/utils/chatActions\";\n\ntest(\"Chat workflow\", async ({ page }) => {\n  // Clear cookies and log in as a random user\n  await page.context().clearCookies();\n  // Use waitForSelector for robustness instead of expect().toBeVisible()\n  // await page.waitForSelector(\n  //   `//div[@aria-label=\"Agents Modal\"]//*[contains(text(), \"${agentName}\") and not(contains(@class, 'invisible'))]`,\n  //   { state: \"visible\", timeout: 10000 }\n  // );\n  await loginAsRandomUser(page);\n\n  // Navigate to the chat page\n  await page.goto(\"/app\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Test interaction with the Default agent\n  await sendMessage(page, \"Hi\");\n\n  // Start a new chat session\n  await startNewChat(page);\n\n  // Verify the presence of the expected text\n  await verifyDefaultAgentIsChosen(page);\n\n  // Test creation of a new assistant\n  await page.getByTestId(\"AppSidebar/more-agents\").click();\n  await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n  await page.locator('input[name=\"name\"]').click();\n  await page.locator('input[name=\"name\"]').fill(\"Test Assistant\");\n  await page.locator('textarea[name=\"description\"]').click();\n  await page\n    .locator('textarea[name=\"description\"]')\n    .fill(\"Test Assistant Description\");\n  await page.locator('textarea[name=\"instructions\"]').click();\n  await page\n    .locator('textarea[name=\"instructions\"]')\n    .fill(\"Test Assistant Instructions\");\n  await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n  // Verify the successful creation of the new assistant\n  await verifyAgentIsChosen(page, \"Test Assistant\");\n\n  // Start another new chat session\n  await startNewChat(page);\n  await page.waitForLoadState(\"networkidle\");\n\n  // Verify the presence of the default agent text\n  await verifyDefaultAgentIsChosen(page);\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/llm_ordering.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { verifyCurrentModel } from \"@tests/e2e/utils/chatActions\";\nimport { ensureImageGenerationEnabled } from \"@tests/e2e/utils/agentUtils\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\ntest.describe(\"LLM Ordering\", () => {\n  let imageGenConfigId: string | null = null;\n\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    const apiClient = new OnyxApiClient(page.request);\n\n    // Create image generation config so the checkbox appears\n    try {\n      imageGenConfigId = await apiClient.createImageGenerationConfig(\n        `test-image-gen-${Date.now()}`\n      );\n    } catch (error) {\n      console.warn(`Failed to create image generation config: ${error}`);\n    }\n  });\n\n  test.afterEach(async ({ page }) => {\n    const apiClient = new OnyxApiClient(page.request);\n\n    if (imageGenConfigId !== null) {\n      try {\n        await apiClient.deleteImageGenerationConfig(imageGenConfigId);\n        imageGenConfigId = null;\n      } catch (error) {\n        console.warn(`Failed to delete image gen config: ${error}`);\n      }\n    }\n  });\n\n  test(\"Non-image-generation model visibility in chat input bar\", async ({\n    page,\n  }) => {\n    await ensureImageGenerationEnabled(page);\n\n    await page.goto(\"/app\");\n    await page.waitForSelector(\"#onyx-chat-input-textarea\", { timeout: 10000 });\n\n    const trigger = page.getByTestId(\"llm-popover-trigger\");\n    const originalTriggerText = (await trigger.textContent())?.trim() ?? \"\";\n\n    await trigger.click();\n    await page.waitForSelector('[role=\"dialog\"]', { timeout: 5000 });\n\n    const dialog = page.locator('[role=\"dialog\"]');\n    const allModelItems = dialog.locator(\"[data-selected]\");\n    await expect(allModelItems.first()).toBeVisible({ timeout: 5000 });\n\n    const count = await allModelItems.count();\n    expect(count).toBeGreaterThan(0);\n\n    // Pick the first non-selected model so the trigger text changes after click\n    const nonSelectedItem = dialog.locator('[data-selected=\"false\"]').first();\n    const hasNonSelected = (await nonSelectedItem.count()) > 0;\n    const targetItem = hasNonSelected ? nonSelectedItem : allModelItems.first();\n\n    await expect(targetItem).toBeVisible();\n    await targetItem.click();\n\n    // Verify the popover closed and the trigger updated\n    await expect(dialog).toBeHidden();\n\n    if (hasNonSelected) {\n      const updatedTriggerText = (await trigger.textContent())?.trim() ?? \"\";\n      expect(updatedTriggerText).not.toBe(originalTriggerText);\n    }\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/llm_runtime_selection.spec.ts",
    "content": "import { expect, Page, test } from \"@playwright/test\";\nimport { loginAs, loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\nimport {\n  selectModelFromInputPopover,\n  sendMessage,\n  startNewChat,\n  verifyCurrentModel,\n} from \"@tests/e2e/utils/chatActions\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\ntype SendChatMessagePayload = {\n  llm_override?: {\n    model_provider?: string | null;\n    model_version?: string | null;\n    temperature?: number | null;\n  } | null;\n};\n\nfunction uniqueName(prefix: string): string {\n  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;\n}\n\nasync function openChat(page: Page): Promise<void> {\n  await page.goto(\"/app\");\n  await page.waitForLoadState(\"networkidle\");\n  await page.waitForSelector(\"#onyx-chat-input-textarea\", { timeout: 15000 });\n}\n\nasync function loginWithCleanCookies(\n  page: Page,\n  user: \"admin\" | number\n): Promise<void> {\n  await page.context().clearCookies();\n  if (typeof user === \"number\") {\n    await loginAsWorkerUser(page, user);\n  } else {\n    await loginAs(page, user);\n  }\n}\n\nasync function createLlmProvider(\n  page: Page,\n  params: {\n    name: string;\n    provider: string;\n    defaultModelName: string;\n    isPublic: boolean;\n    groupIds?: number[];\n  }\n): Promise<number> {\n  const response = await page.request.put(\n    \"/api/admin/llm/provider?is_creation=true\",\n    {\n      data: {\n        name: params.name,\n        provider: params.provider,\n        api_key: \"e2e-placeholder-api-key-not-used\",\n        default_model_name: params.defaultModelName,\n        is_public: params.isPublic,\n        groups: params.groupIds ?? [],\n        personas: [],\n        model_configurations: [\n          {\n            name: params.defaultModelName,\n            is_visible: true,\n          },\n        ],\n      },\n    }\n  );\n\n  expect(response.ok()).toBeTruthy();\n  const data = (await response.json()) as { id: number };\n  return data.id;\n}\n\nasync function sendMessageAndCapturePayload(\n  page: Page,\n  message: string\n): Promise<SendChatMessagePayload> {\n  const requestPromise = page.waitForRequest(\n    (request) =>\n      request.url().includes(\"/api/chat/send-chat-message\") &&\n      request.method() === \"POST\"\n  );\n\n  await sendMessage(page, message);\n\n  const request = await requestPromise;\n  return request.postDataJSON() as SendChatMessagePayload;\n}\n\ntype LlmProviderBasics = {\n  name: string;\n  model_configurations: Array<{ name: string }>;\n};\n\nasync function listUserLlmProviders(page: Page): Promise<LlmProviderBasics[]> {\n  const response = await page.request.get(\"/api/llm/provider\");\n  expect(response.ok()).toBeTruthy();\n  const data = (await response.json()) as {\n    providers: LlmProviderBasics[];\n  };\n  return data.providers;\n}\n\nasync function waitForModelOnProvider(\n  page: Page,\n  modelName: string,\n  providerNames: string[]\n): Promise<void> {\n  await expect\n    .poll(\n      async () => {\n        const providers = await listUserLlmProviders(page);\n        return providerNames.every((providerName) =>\n          providers.some(\n            (provider) =>\n              provider.name === providerName &&\n              provider.model_configurations.some(\n                (modelConfig) => modelConfig.name === modelName\n              )\n          )\n        );\n      },\n      { timeout: 30000 }\n    )\n    .toBeTruthy();\n}\n\nfunction buildMockStreamResponse(turn: number): string {\n  const userMessageId = turn * 100 + 1;\n  const agentMessageId = turn * 100 + 2;\n\n  const packets = [\n    {\n      user_message_id: userMessageId,\n      reserved_assistant_message_id: agentMessageId,\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: {\n        type: \"message_start\",\n        id: `mock-${agentMessageId}`,\n        content: \"Mock response for provider collision assertion.\",\n        final_documents: null,\n      },\n    },\n    {\n      placement: { turn_index: 0, tab_index: 0 },\n      obj: { type: \"stop\", stop_reason: \"finished\" },\n    },\n    {\n      message_id: agentMessageId,\n      citations: {},\n      files: [],\n    },\n  ];\n\n  return `${packets.map((packet) => JSON.stringify(packet)).join(\"\\n\")}\\n`;\n}\n\ntest.describe(\"LLM Runtime Selection\", () => {\n  let providersToCleanup: number[] = [];\n  let groupsToCleanup: number[] = [];\n\n  test.beforeEach(async ({ page }, testInfo) => {\n    providersToCleanup = [];\n    groupsToCleanup = [];\n    await loginWithCleanCookies(page, testInfo.workerIndex);\n  });\n\n  test.afterEach(async ({ page }) => {\n    await loginWithCleanCookies(page, \"admin\");\n\n    const client = new OnyxApiClient(page.request);\n    const providerIds = Array.from(new Set(providersToCleanup));\n    const groupIds = Array.from(new Set(groupsToCleanup));\n\n    for (const providerId of providerIds) {\n      try {\n        await client.deleteProvider(providerId);\n      } catch (error) {\n        console.warn(\n          `Cleanup failed for provider ${providerId}: ${String(error)}`\n        );\n      }\n    }\n\n    for (const groupId of groupIds) {\n      try {\n        await client.deleteUserGroup(groupId);\n      } catch (error) {\n        console.warn(`Cleanup failed for group ${groupId}: ${String(error)}`);\n      }\n    }\n  });\n\n  test(\"model selection persists across refresh and subsequent messages in the same chat\", async ({\n    page,\n  }, testInfo) => {\n    await loginWithCleanCookies(page, \"admin\");\n\n    const persistenceProviderName = uniqueName(\"PW Runtime Persist Provider\");\n    const persistenceModelName = `persist-runtime-model-${Date.now()}`;\n    const persistenceProviderId = await createLlmProvider(page, {\n      name: persistenceProviderName,\n      provider: \"openai\",\n      defaultModelName: persistenceModelName,\n      isPublic: true,\n    });\n    providersToCleanup.push(persistenceProviderId);\n    await waitForModelOnProvider(page, persistenceModelName, [\n      persistenceProviderName,\n    ]);\n\n    await loginWithCleanCookies(page, testInfo.workerIndex);\n    await openChat(page);\n\n    let turn = 0;\n    await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n      turn += 1;\n      await route.fulfill({\n        status: 200,\n        contentType: \"text/plain\",\n        body: buildMockStreamResponse(turn),\n      });\n    });\n\n    const selectedModelDisplay = await selectModelFromInputPopover(page, [\n      persistenceModelName,\n    ]);\n    await verifyCurrentModel(page, selectedModelDisplay);\n\n    const firstPayload = await sendMessageAndCapturePayload(\n      page,\n      \"First persistence check message.\"\n    );\n    const firstModelVersion = firstPayload.llm_override?.model_version;\n    const firstModelProvider = firstPayload.llm_override?.model_provider;\n\n    expect(firstModelVersion).toBeTruthy();\n    expect(firstModelProvider).toBeTruthy();\n    expect(firstModelProvider).toBe(persistenceProviderName);\n    expect(page.url()).toContain(\"chatId=\");\n\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n    await page.waitForSelector(\"#onyx-chat-input-textarea\", { timeout: 15000 });\n\n    await verifyCurrentModel(page, selectedModelDisplay);\n\n    const secondPayload = await sendMessageAndCapturePayload(\n      page,\n      \"Second persistence check after refresh.\"\n    );\n\n    expect(secondPayload.llm_override?.model_version).toBe(firstModelVersion);\n    expect(secondPayload.llm_override?.model_provider).toBe(firstModelProvider);\n  });\n\n  test(\"regenerate with alternate model preserves version history semantics\", async ({\n    page,\n  }) => {\n    await openChat(page);\n\n    let turn = 0;\n    await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n      turn += 1;\n      await route.fulfill({\n        status: 200,\n        contentType: \"text/plain\",\n        body: buildMockStreamResponse(turn),\n      });\n    });\n\n    // Keep this aligned with the existing stable regenerate flow test.\n    const initialModelDisplay = await selectModelFromInputPopover(page, [\n      \"GPT-4.1\",\n      \"GPT-4o Mini\",\n      \"GPT-4o\",\n    ]);\n    await verifyCurrentModel(page, initialModelDisplay);\n\n    const initialPayload = await sendMessageAndCapturePayload(\n      page,\n      \"Generate a short sentence for regeneration.\"\n    );\n    const initialModelVersion = initialPayload.llm_override?.model_version;\n\n    const aiMessage = page.locator('[data-testid=\"onyx-ai-message\"]').first();\n    await aiMessage.hover();\n\n    const regenerateControl = aiMessage.getByTestId(\"AgentMessage/regenerate\");\n    await regenerateControl.click();\n    await page.waitForSelector('[role=\"dialog\"]', {\n      state: \"visible\",\n      timeout: 10000,\n    });\n\n    const regenerateDialog = page.locator('[role=\"dialog\"]');\n    const alternateModelOption = regenerateDialog\n      .locator('[data-selected=\"false\"]')\n      .first();\n\n    test.skip(\n      (await regenerateDialog.locator('[data-selected=\"false\"]').count()) === 0,\n      \"Regenerate model picker requires at least two runtime model options\"\n    );\n\n    const regenerateRequestPromise = page.waitForRequest(\n      (request) =>\n        request.url().includes(\"/api/chat/send-chat-message\") &&\n        request.method() === \"POST\"\n    );\n\n    await expect(alternateModelOption).toBeVisible({ timeout: 15000 });\n    await alternateModelOption.click();\n\n    const regeneratePayload = (await regenerateRequestPromise.then((request) =>\n      request.postDataJSON()\n    )) as SendChatMessagePayload;\n\n    await page.waitForSelector('[data-testid=\"AgentMessage/regenerate\"]', {\n      state: \"visible\",\n      timeout: 20000,\n    });\n\n    const messageSwitcher = page\n      .getByTestId(\"MessageSwitcher/container\")\n      .first();\n    await expect(messageSwitcher).toBeVisible({ timeout: 10000 });\n    await expect(messageSwitcher).toContainText(\"2/2\");\n\n    await messageSwitcher\n      .locator(\"..\")\n      .locator(\"svg\")\n      .first()\n      .locator(\"..\")\n      .click();\n    await expect(messageSwitcher).toContainText(\"1/2\");\n\n    await messageSwitcher\n      .locator(\"..\")\n      .locator(\"svg\")\n      .last()\n      .locator(\"..\")\n      .click();\n    await expect(messageSwitcher).toContainText(\"2/2\");\n\n    expect(regeneratePayload.llm_override?.model_version).toBeTruthy();\n    expect(regeneratePayload.llm_override?.model_provider).toBeTruthy();\n    expect(regeneratePayload.llm_override?.model_version).not.toBe(\n      initialModelVersion\n    );\n  });\n\n  test(\"same model name across providers resolves to provider-specific runtime payloads\", async ({\n    page,\n  }, testInfo) => {\n    await loginWithCleanCookies(page, \"admin\");\n\n    const sharedModelName = `shared-runtime-model-${Date.now()}`;\n    const openAiProviderName = uniqueName(\"PW Runtime OpenAI\");\n    const anthropicProviderName = uniqueName(\"PW Runtime Anthropic\");\n\n    const openAiProviderId = await createLlmProvider(page, {\n      name: openAiProviderName,\n      provider: \"openai\",\n      defaultModelName: sharedModelName,\n      isPublic: true,\n    });\n    const anthropicProviderId = await createLlmProvider(page, {\n      name: anthropicProviderName,\n      provider: \"anthropic\",\n      defaultModelName: sharedModelName,\n      isPublic: true,\n    });\n\n    providersToCleanup.push(openAiProviderId, anthropicProviderId);\n\n    await waitForModelOnProvider(page, sharedModelName, [\n      openAiProviderName,\n      anthropicProviderName,\n    ]);\n\n    await loginWithCleanCookies(page, testInfo.workerIndex);\n\n    const capturedPayloads: SendChatMessagePayload[] = [];\n    let turn = 0;\n\n    await page.route(\"**/api/chat/send-chat-message\", async (route) => {\n      turn += 1;\n      capturedPayloads.push(\n        route.request().postDataJSON() as SendChatMessagePayload\n      );\n      await route.fulfill({\n        status: 200,\n        contentType: \"text/plain\",\n        body: buildMockStreamResponse(turn),\n      });\n    });\n\n    await openChat(page);\n\n    await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n    const dialog = page.locator('[role=\"dialog\"]');\n    await dialog.getByPlaceholder(\"Search models...\").fill(sharedModelName);\n\n    const sharedModelOptions = dialog.locator(\"[data-selected]\");\n    await expect(sharedModelOptions).toHaveCount(2);\n    const openAiModelOption = dialog\n      .getByRole(\"region\", { name: /openai/i })\n      .locator(\"[data-selected]\")\n      .first();\n    await expect(openAiModelOption).toBeVisible();\n    await openAiModelOption.click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n\n    await sendMessage(page, \"Collision payload check one.\");\n    await expect.poll(() => capturedPayloads.length).toBe(1);\n\n    // Use a new session so runtime selection is not overwritten by the previous\n    // chat session's persisted model override.\n    await startNewChat(page);\n    await page.waitForSelector(\"#onyx-chat-input-textarea\", { timeout: 15000 });\n\n    await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n    const secondDialog = page.locator('[role=\"dialog\"]');\n    await secondDialog\n      .getByPlaceholder(\"Search models...\")\n      .fill(sharedModelName);\n\n    const secondSharedModelOptions = secondDialog.locator(\"[data-selected]\");\n    await expect(secondSharedModelOptions).toHaveCount(2);\n    const anthropicModelOption = secondDialog\n      .getByRole(\"region\", { name: /anthropic/i })\n      .locator(\"[data-selected]\")\n      .first();\n    await expect(anthropicModelOption).toBeVisible();\n    await anthropicModelOption.click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n\n    await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n    const verifyDialog = page.locator('[role=\"dialog\"]');\n    const selectedAnthropicOption = verifyDialog\n      .getByRole(\"region\", { name: /anthropic/i })\n      .locator('[data-selected=\"true\"]');\n    await expect(selectedAnthropicOption).toHaveCount(1);\n    await page.keyboard.press(\"Escape\");\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n\n    await sendMessage(page, \"Collision payload check two.\");\n    await expect.poll(() => capturedPayloads.length).toBe(2);\n\n    for (const payload of capturedPayloads) {\n      expect(payload.llm_override?.model_version).toBe(sharedModelName);\n      expect(payload.llm_override?.model_provider).toBeTruthy();\n    }\n\n    const providersUsed = capturedPayloads.map(\n      (payload) => payload.llm_override?.model_provider\n    );\n\n    expect(new Set(providersUsed)).toEqual(\n      new Set([openAiProviderName, anthropicProviderName])\n    );\n  });\n\n  test(\"restricted provider model is unavailable to unauthorized runtime user selection\", async ({\n    page,\n  }, testInfo) => {\n    await loginWithCleanCookies(page, \"admin\");\n\n    const client = new OnyxApiClient(page.request);\n    const restrictedGroupName = uniqueName(\"PW Runtime Restricted Group\");\n    const restrictedModelName = `restricted-runtime-model-${Date.now()}`;\n    const restrictedProviderName = uniqueName(\"PW Runtime Restricted Provider\");\n\n    let groupId: number;\n    try {\n      groupId = await client.createUserGroup(restrictedGroupName);\n    } catch (error) {\n      const errorText = String(error);\n      const requiresEnterpriseLicense =\n        errorText.includes(\"enterprise_license_required\") ||\n        errorText.includes(\"This feature requires an Enterprise license\");\n      test.skip(\n        requiresEnterpriseLicense,\n        \"Restricted provider test requires Enterprise license-enabled environment\"\n      );\n      throw error;\n    }\n    groupsToCleanup.push(groupId);\n\n    const restrictedProviderId = await createLlmProvider(page, {\n      name: restrictedProviderName,\n      provider: \"openai\",\n      defaultModelName: restrictedModelName,\n      isPublic: false,\n      groupIds: [groupId],\n    });\n    providersToCleanup.push(restrictedProviderId);\n\n    await loginWithCleanCookies(page, testInfo.workerIndex);\n    await openChat(page);\n\n    await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n\n    const dialog = page.locator('[role=\"dialog\"]');\n    await dialog.getByPlaceholder(\"Search models...\").fill(restrictedModelName);\n\n    const restrictedModelOption = dialog\n      .locator(\"[data-selected]\")\n      .filter({ hasText: restrictedModelName });\n\n    await expect(restrictedModelOption).toHaveCount(0);\n    await expect(dialog.getByText(\"No models found\")).toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/message_edit_regenerate.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\nimport { sendMessage, switchModel } from \"@tests/e2e/utils/chatActions\";\n\ntest.describe(\"Message Edit and Regenerate Tests\", () => {\n  test.beforeEach(async ({ page }) => {\n    // Clear cookies and log in as a random user\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    // Navigate to the chat page\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  test(\"Complete message editing functionality\", async ({ page }) => {\n    // Send initial message\n    await sendMessage(page, \"What is 2+2?\");\n\n    // Test cancel editing\n    let userMessage = page.locator(\"#onyx-human-message\").first();\n    await userMessage.hover();\n    let editButton = userMessage\n      .locator('[data-testid=\"HumanMessage/edit-button\"]')\n      .first();\n    await editButton.click();\n\n    let textarea = userMessage.locator(\"textarea\");\n    await textarea.fill(\"This edit will be cancelled\");\n\n    const cancelButton = userMessage.locator('button:has-text(\"Cancel\")');\n    await cancelButton.click();\n\n    // Verify original message is preserved\n    let messageContent = await userMessage.textContent();\n    expect(messageContent).toContain(\"What is 2+2?\");\n    expect(messageContent).not.toContain(\"This edit will be cancelled\");\n\n    // Edit the message for real\n    await userMessage.hover();\n    editButton = userMessage\n      .locator('[data-testid=\"HumanMessage/edit-button\"]')\n      .first();\n    await editButton.click();\n\n    textarea = userMessage.locator(\"textarea\");\n    await textarea.fill(\"What is 3+3?\");\n\n    let submitButton = userMessage.locator('button:has-text(\"Submit\")');\n    await submitButton.click();\n\n    // Wait for the new AI response to complete\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"detached\",\n    });\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"visible\",\n      timeout: 30000,\n    });\n\n    // Verify edited message is displayed\n    messageContent = await page\n      .locator(\"#onyx-human-message\")\n      .first()\n      .textContent();\n    expect(messageContent).toContain(\"What is 3+3?\");\n\n    // Verify version switcher appears and shows 2/2\n    let messageSwitcher = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(messageSwitcher).toBeVisible();\n    await expect(messageSwitcher).toContainText(\"2/2\");\n\n    // Edit again to create a third version\n    userMessage = page.locator(\"#onyx-human-message\").first();\n    await userMessage.hover();\n    editButton = userMessage\n      .locator('[data-testid=\"HumanMessage/edit-button\"]')\n      .first();\n    await editButton.click();\n\n    textarea = userMessage.locator(\"textarea\");\n    await textarea.fill(\"What is 4+4?\");\n\n    submitButton = userMessage.locator('button:has-text(\"Submit\")');\n    await submitButton.click();\n\n    // Wait for the new AI response to complete\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"detached\",\n    });\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"visible\",\n      timeout: 30000,\n    });\n\n    // Verify navigation between versions\n    // Find the switcher showing \"3 / 3\"\n    let switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible();\n    await expect(switcherSpan).toContainText(\"3/3\");\n\n    // Navigate to previous version - click the first svg icon's parent (left chevron)\n    await switcherSpan\n      .locator(\"..\")\n      .locator(\"svg\")\n      .first()\n      .locator(\"..\")\n      .click();\n\n    // Check we're now at \"2 / 3\"\n    switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible({ timeout: 5000 });\n    await expect(switcherSpan).toContainText(\"2/3\");\n\n    // Navigate to first version - re-find the button each time\n    await switcherSpan\n      .locator(\"..\")\n      .locator(\"svg\")\n      .first()\n      .locator(\"..\")\n      .click();\n\n    // Check we're now at \"1 / 3\"\n    switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible({ timeout: 5000 });\n    await expect(switcherSpan).toContainText(\"1/3\");\n\n    // Navigate forward using next button - click the last svg icon's parent (right chevron)\n    await switcherSpan\n      .locator(\"..\")\n      .locator(\"svg\")\n      .last()\n      .locator(\"..\")\n      .click();\n\n    // Check we're back at \"2 / 3\"\n    switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible({ timeout: 5000 });\n    await expect(switcherSpan).toContainText(\"2/3\");\n  });\n\n  test(\"Message regeneration with model selection\", async ({ page }) => {\n    // make sure we're using something other than GPT-4o Mini, otherwise the below\n    // will fail since we need to switch to a different model for the test\n    await switchModel(page, \"GPT-4.1\");\n\n    // Send initial message\n    await sendMessage(page, \"hi! Respond with no more than a sentence\");\n\n    // Capture the original AI response text (just the message content, not buttons/switcher)\n    const aiMessage = page.locator('[data-testid=\"onyx-ai-message\"]').first();\n    // Target the actual message content div (the one with select-text class)\n    const messageContent = aiMessage.locator(\".select-text\").first();\n    const originalResponseText = await messageContent.textContent();\n\n    // Hover over AI message to show regenerate button\n    await aiMessage.hover();\n\n    // Click regenerate button using its data-testid\n    const regenerateButton = aiMessage.getByTestId(\"AgentMessage/regenerate\");\n    await regenerateButton.click();\n\n    // Wait for dropdown to appear and select GPT-4o Mini\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n\n    // Look for the GPT-4o Mini option in the dropdown\n    const gpt4oMiniOption = page\n      .locator('[role=\"dialog\"]')\n      .getByText(\"GPT-4o Mini\", { exact: true })\n      .first();\n    await gpt4oMiniOption.click();\n\n    // Wait for regeneration to complete by waiting for feedback buttons to appear\n    // The feedback buttons (copy, like, dislike, regenerate) appear when streaming is complete\n    await page.waitForSelector('[data-testid=\"AgentMessage/regenerate\"]', {\n      state: \"visible\",\n      timeout: 15000,\n    });\n\n    // Verify version switcher appears showing \"2 / 2\"\n    const messageSwitcher = page\n      .getByTestId(\"MessageSwitcher/container\")\n      .first();\n    await expect(messageSwitcher).toBeVisible({ timeout: 5000 });\n    await expect(messageSwitcher).toContainText(\"2/2\");\n\n    // Navigate to previous version\n    await messageSwitcher\n      .locator(\"..\")\n      .locator(\"svg\")\n      .first()\n      .locator(\"..\")\n      .click();\n\n    // Verify we're at \"1 / 2\"\n    let switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible({ timeout: 5000 });\n    await expect(switcherSpan).toContainText(\"1/2\");\n\n    // Verify we're back to the original response\n    const firstVersionText = await messageContent.textContent();\n    expect(firstVersionText).toBe(originalResponseText);\n\n    // Navigate back to regenerated version\n    await switcherSpan\n      .locator(\"..\")\n      .locator(\"svg\")\n      .last()\n      .locator(\"..\")\n      .click();\n\n    // Verify we're back at \"2 / 2\"\n    switcherSpan = page.getByTestId(\"MessageSwitcher/container\").first();\n    await expect(switcherSpan).toBeVisible({ timeout: 5000 });\n    await expect(switcherSpan).toContainText(\"2/2\");\n  });\n\n  test(\"Message editing with files\", async ({ page }) => {\n    const testFileName = `test-edit-${Date.now()}.txt`;\n    const testFileContent = \"This is a test file for editing with attachments.\";\n    const buffer = Buffer.from(testFileContent, \"utf-8\");\n\n    // Trigger the native file dialog by clicking the hidden file input,\n    // then intercept it with the filechooser event (same pattern as\n    // user_file_attachment.spec.ts).\n    const fileInput = page.locator('input[type=\"file\"]').first();\n    const fileChooserPromise = page.waitForEvent(\"filechooser\");\n    await fileInput.dispatchEvent(\"click\");\n    const fileChooser = await fileChooserPromise;\n\n    const uploadResponsePromise = page.waitForResponse(\n      (response) =>\n        response.url().includes(\"/api/user/projects/file/upload\") &&\n        response.request().method() === \"POST\"\n    );\n\n    await fileChooser.setFiles({\n      name: testFileName,\n      mimeType: \"text/plain\",\n      buffer: buffer,\n    });\n\n    const uploadResponse = await uploadResponsePromise;\n    expect(uploadResponse.ok()).toBeTruthy();\n\n    // Wait for upload processing to complete and file card to render\n    await page.waitForLoadState(\"networkidle\", { timeout: 10000 });\n    await expect(page.getByText(testFileName).first()).toBeVisible({\n      timeout: 10000,\n    });\n\n    // Send a message with the file attached using the shared utility\n    await sendMessage(page, \"Summarize this file\");\n\n    // Verify the file is displayed in the sent human message\n    const humanMessage = page.locator(\"#onyx-human-message\").first();\n\n    // Verify message text is displayed\n    const messageContent = await humanMessage.textContent();\n    expect(messageContent).toContain(\"Summarize this file\");\n\n    // Hover and click the edit button\n    await humanMessage.hover();\n    const editButton = humanMessage\n      .locator('[data-testid=\"HumanMessage/edit-button\"]')\n      .first();\n    await expect(editButton).toBeVisible();\n    await editButton.click();\n\n    // Edit the message text\n    const textarea = humanMessage.locator(\"textarea\");\n    await textarea.fill(\"What does this file contain?\");\n\n    // Submit the edit\n    const submitButton = humanMessage.locator('button:has-text(\"Submit\")');\n    await submitButton.click();\n\n    // Wait for the new AI response to complete\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"detached\",\n    });\n    await page.waitForSelector('[data-testid=\"AgentMessage/copy-button\"]', {\n      state: \"visible\",\n      timeout: 30000,\n    });\n\n    // Verify the edited message text is displayed\n    const editedHumanMessage = page.locator(\"#onyx-human-message\").first();\n    const editedMessageContent = await editedHumanMessage.textContent();\n    expect(editedMessageContent).toContain(\"What does this file contain?\");\n    expect(editedMessageContent).not.toContain(\"Summarize this file\");\n\n    // Verify the file is still attached after editing\n    const editedFileDisplay = editedHumanMessage.locator(\"#onyx-file\");\n    await expect(editedFileDisplay).toBeVisible();\n    await expect(editedFileDisplay.getByText(testFileName)).toBeVisible();\n\n    // Verify the version switcher shows 2/2 (original + edited)\n    const messageSwitcher = page\n      .getByTestId(\"MessageSwitcher/container\")\n      .first();\n    await expect(messageSwitcher).toBeVisible();\n    await expect(messageSwitcher).toContainText(\"2/2\");\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/message_feedback.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\nimport { sendMessage } from \"@tests/e2e/utils/chatActions\";\n\ntest.describe(\"Message feedback thumbs controls\", () => {\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n  });\n\n  test(\"allows submitting and clearing thumbs up/down feedback\", async ({\n    page,\n  }) => {\n    const createFeedbackRequests: {\n      is_positive: boolean;\n      chat_message_id: number;\n      feedback_text?: string;\n      predefined_feedback?: string;\n    }[] = [];\n    const removeFeedbackRequests: {\n      url: string;\n      query: Record<string, string>;\n    }[] = [];\n\n    await page.route(\n      \"**/api/chat/create-chat-message-feedback\",\n      async (route) => {\n        const body = JSON.parse(route.request().postData() ?? \"{}\");\n        createFeedbackRequests.push(body);\n        await route.fulfill({\n          status: 200,\n          contentType: \"application/json\",\n          body: \"{}\",\n        });\n      }\n    );\n\n    await page.route(\n      \"**/api/chat/remove-chat-message-feedback?*\",\n      async (route) => {\n        const url = new URL(route.request().url());\n        removeFeedbackRequests.push({\n          url: route.request().url(),\n          query: Object.fromEntries(url.searchParams.entries()),\n        });\n        await route.fulfill({\n          status: 200,\n          contentType: \"application/json\",\n          body: \"{}\",\n        });\n      }\n    );\n\n    await sendMessage(page, \"Share a short fun fact.\");\n\n    const aiMessage = page.getByTestId(\"onyx-ai-message\").last();\n    const likeButton = aiMessage.getByTestId(\"AgentMessage/like-button\");\n    const dislikeButton = aiMessage.getByTestId(\"AgentMessage/dislike-button\");\n\n    await expect(likeButton).toBeVisible({ timeout: 15000 });\n    await expect(dislikeButton).toBeVisible();\n\n    // Thumbs up opens the feedback modal with optional feedback\n    await likeButton.click();\n    const modalTitle = page.getByText(\"Feedback\").first();\n    await expect(modalTitle).toBeVisible({ timeout: 5000 });\n\n    // Submit without entering feedback (optional for thumbs up)\n    const submitButton = page.getByRole(\"button\", { name: \"Submit\" });\n    await expect(submitButton).toBeEnabled({ timeout: 2000 });\n\n    await Promise.all([\n      page.waitForRequest(\"**/api/chat/create-chat-message-feedback\"),\n      submitButton.click(),\n    ]);\n\n    expect(createFeedbackRequests).toHaveLength(1);\n    const likedRequest = createFeedbackRequests[0];\n    expect(likedRequest?.is_positive).toBe(true);\n    expect(likedRequest?.chat_message_id).toBeTruthy();\n    expect(likedRequest?.feedback_text).toBeFalsy();\n\n    await expect(modalTitle).toBeHidden({ timeout: 5000 });\n\n    // Clicking thumbs up again removes the feedback\n    await Promise.all([\n      page.waitForRequest(\"**/api/chat/remove-chat-message-feedback?*\"),\n      likeButton.click(),\n    ]);\n    expect(removeFeedbackRequests).toHaveLength(1);\n    expect(removeFeedbackRequests[0]?.query.chat_message_id).toBe(\n      String(likedRequest?.chat_message_id)\n    );\n\n    // Thumbs down opens the feedback modal with mandatory feedback\n    await dislikeButton.click();\n    await expect(modalTitle).toBeVisible({ timeout: 5000 });\n\n    // Verify submit button is disabled without feedback\n    const submitButtonDislike = page.getByRole(\"button\", { name: \"Submit\" });\n    await expect(submitButtonDislike).toBeDisabled();\n\n    // Enter feedback (mandatory for thumbs down)\n    const feedbackInput = page.getByPlaceholder(\n      /What did you .* about this response\\?/i\n    );\n    await feedbackInput.fill(\"Response missed some details.\");\n\n    // Submit button should now be enabled\n    await expect(submitButtonDislike).toBeEnabled();\n\n    await Promise.all([\n      page.waitForRequest(\"**/api/chat/create-chat-message-feedback\"),\n      submitButtonDislike.click(),\n    ]);\n\n    expect(createFeedbackRequests).toHaveLength(2);\n    const dislikedRequest = createFeedbackRequests[1];\n    expect(dislikedRequest?.is_positive).toBe(false);\n    expect(dislikedRequest?.feedback_text).toContain(\"missed some details\");\n    expect(dislikedRequest?.chat_message_id).toBe(\n      likedRequest?.chat_message_id\n    );\n\n    await expect(modalTitle).toBeHidden({ timeout: 5000 });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/project_files_visual_regression.spec.ts",
    "content": "import { expect, test, type Locator, type Page } from \"@playwright/test\";\nimport { loginAsWorkerUser } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\nconst PROJECT_NAME = \"E2E-PROJECT-FILES-VISUAL\";\nconst ATTACHMENT_ITEM_TITLE_TEST_ID = \"attachment-item-title\";\nconst ATTACHMENT_ITEM_ICON_WRAPPER_TEST_ID = \"attachment-item-icon-wrapper\";\nconst LONG_FILE_NAME =\n  \"CSE_202_Final_Project_Solution_Regression_Check_Long_Name.txt\";\nconst FILE_CONTENT = \"Visual regression test content for long filename cards.\";\n\nlet projectId: number | null = null;\n\ntype Geometry = {\n  elementLeft: number;\n  elementRight: number;\n  elementTop: number;\n  elementBottom: number;\n  cardLeft: number;\n  cardRight: number;\n  cardTop: number;\n  cardBottom: number;\n};\n\nfunction getFilesSection(page: Page): Locator {\n  return page\n    .locator(\"div\")\n    .filter({ has: page.getByRole(\"button\", { name: \"Add Files\" }) })\n    .filter({ hasText: \"Chats in this project can access these files.\" })\n    .first();\n}\n\nasync function uploadFileToProject(\n  page: Page,\n  targetProjectId: number,\n  fileName: string,\n  content: string\n): Promise<void> {\n  const response = await page.request.post(\"/api/user/projects/file/upload\", {\n    multipart: {\n      project_id: String(targetProjectId),\n      files: {\n        name: fileName,\n        mimeType: \"text/plain\",\n        buffer: Buffer.from(content, \"utf-8\"),\n      },\n    },\n  });\n\n  expect(response.ok()).toBeTruthy();\n}\n\nasync function getElementGeometryInCard(\n  element: Locator\n): Promise<Geometry | null> {\n  return element.evaluate((targetEl) => {\n    let cardEl: HTMLElement | null = targetEl.parentElement;\n\n    while (cardEl) {\n      const style = window.getComputedStyle(cardEl);\n      const hasBorder =\n        parseFloat(style.borderTopWidth) > 0 ||\n        parseFloat(style.borderLeftWidth) > 0;\n      const hasRadius = parseFloat(style.borderTopLeftRadius) > 0;\n\n      if (hasBorder && hasRadius) {\n        break;\n      }\n      cardEl = cardEl.parentElement;\n    }\n\n    if (!cardEl) {\n      return null;\n    }\n\n    const elementRect = targetEl.getBoundingClientRect();\n    const cardRect = cardEl.getBoundingClientRect();\n\n    return {\n      elementLeft: elementRect.left,\n      elementRight: elementRect.right,\n      elementTop: elementRect.top,\n      elementBottom: elementRect.bottom,\n      cardLeft: cardRect.left,\n      cardRight: cardRect.right,\n      cardTop: cardRect.top,\n      cardBottom: cardRect.bottom,\n    };\n  });\n}\n\nfunction expectGeometryWithinCard(geometry: Geometry | null): void {\n  expect(geometry).not.toBeNull();\n  expect(geometry!.elementLeft).toBeGreaterThanOrEqual(geometry!.cardLeft - 1);\n  expect(geometry!.elementRight).toBeLessThanOrEqual(geometry!.cardRight + 1);\n  expect(geometry!.elementTop).toBeGreaterThanOrEqual(geometry!.cardTop - 1);\n  expect(geometry!.elementBottom).toBeLessThanOrEqual(geometry!.cardBottom + 1);\n}\n\ntest.describe(\"Project Files visual regression\", () => {\n  test.beforeAll(async ({ browser }, workerInfo) => {\n    const context = await browser.newContext();\n    const page = await context.newPage();\n\n    await loginAsWorkerUser(page, workerInfo.workerIndex);\n    const client = new OnyxApiClient(page.request);\n\n    projectId = await client.createProject(PROJECT_NAME);\n    await uploadFileToProject(page, projectId, LONG_FILE_NAME, FILE_CONTENT);\n\n    await context.close();\n  });\n\n  test.afterAll(async ({ browser }, workerInfo) => {\n    if (!projectId) {\n      return;\n    }\n\n    const context = await browser.newContext();\n    const page = await context.newPage();\n\n    await loginAsWorkerUser(page, workerInfo.workerIndex);\n    const client = new OnyxApiClient(page.request);\n    await client.deleteProject(projectId);\n\n    await context.close();\n  });\n\n  test.beforeEach(async ({ page }, workerInfo) => {\n    if (projectId === null) {\n      throw new Error(\n        \"Project setup failed in beforeAll; cannot run visual regression test\"\n      );\n    }\n\n    await page.context().clearCookies();\n    await loginAsWorkerUser(page, workerInfo.workerIndex);\n    await page.goto(`/app?projectId=${projectId}`);\n    await page.waitForLoadState(\"networkidle\");\n    await expect(\n      page.getByText(\"Chats in this project can access these files.\")\n    ).toBeVisible();\n  });\n\n  test(\"long underscore filename stays visually contained in file card\", async ({\n    page,\n  }) => {\n    const filesSection = getFilesSection(page);\n    await expect(filesSection).toBeVisible();\n\n    const fileTitle = filesSection\n      .locator(`[data-testid=\"${ATTACHMENT_ITEM_TITLE_TEST_ID}\"]`)\n      .filter({ hasText: LONG_FILE_NAME })\n      .first();\n    await expect(fileTitle).toBeVisible();\n\n    // Wait for deterministic post-processing state before geometry checks/screenshot.\n    await expect(fileTitle).not.toContainText(\"Processing...\", {\n      timeout: 30_000,\n    });\n    await expect(fileTitle).not.toContainText(\"Uploading...\", {\n      timeout: 30_000,\n    });\n    await expect(fileTitle).toContainText(\"TXT\", { timeout: 30_000 });\n\n    const iconWrapper = filesSection\n      .locator(`[data-testid=\"${ATTACHMENT_ITEM_ICON_WRAPPER_TEST_ID}\"]`)\n      .first();\n    await expect(iconWrapper).toBeVisible();\n\n    const container = page.locator(\"[data-main-container]\");\n    await expect(container).toBeVisible();\n    await expectElementScreenshot(container, {\n      name: \"project-files-long-underscore-filename\",\n    });\n\n    const iconGeometry = await getElementGeometryInCard(iconWrapper);\n    const titleGeometry = await getElementGeometryInCard(fileTitle);\n    expectGeometryWithinCard(iconGeometry);\n    expectGeometryWithinCard(titleGeometry);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/scroll_behavior.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"@tests/e2e/utils/auth\";\nimport { sendMessage, startNewChat } from \"@tests/e2e/utils/chatActions\";\n\n/**\n * Helper to toggle auto-scroll setting via the settings panel\n */\nasync function setAutoScroll(page: Page, enabled: boolean) {\n  // Open user dropdown menu (same pattern as other tests)\n  await page.locator(\"#onyx-user-dropdown\").click();\n  await page.getByText(\"User Settings\").first().click();\n  // Wait for dialog to appear\n  await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n\n  // Navigate to Chat Preferences tab\n  await page\n    .locator('a[href=\"/app/settings/chat-preferences\"]')\n    .click({ force: true });\n\n  // Find the auto-scroll switch by locating the label text and then finding\n  // the switch within the same container\n  const autoScrollSwitch = page\n    .locator(\"label\")\n    .filter({ hasText: \"Chat Auto-scroll\" })\n    .locator('button[role=\"switch\"]');\n\n  await autoScrollSwitch.waitFor({ state: \"visible\" });\n\n  const isCurrentlyChecked =\n    (await autoScrollSwitch.getAttribute(\"aria-checked\")) === \"true\";\n\n  if (isCurrentlyChecked !== enabled) {\n    await autoScrollSwitch.click();\n    // Wait for the switch state to update\n    const expectedState = enabled ? \"true\" : \"false\";\n    await expect(autoScrollSwitch).toHaveAttribute(\n      \"aria-checked\",\n      expectedState\n    );\n  }\n\n  await page.locator('a[href=\"/app\"]').click({ force: true });\n}\n\n/**\n * Helper to get the scroll container element\n */\nfunction getScrollContainer(page: Page) {\n  // The scroll container is the div with overflow-y-auto inside ChatUI\n  return page.locator(\".overflow-y-auto\").first();\n}\n\ntest.describe(\"Chat Scroll Behavior\", () => {\n  // Configure this suite to run serially to resepect auto-scroll settings\n  test.describe.configure({ mode: \"serial\" });\n\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n    await page.goto(\"/app\");\n    const nameInput = page.getByPlaceholder(\"Your name\");\n    await nameInput.waitFor();\n    await nameInput.fill(\"Playwright Tester\");\n    await page.getByText(\"Save\").click();\n    await Promise.all([\n      // Wait for sidebar navigation to be visible to indicate page is loaded\n      page.getByText(\"Agents\").first().waitFor(),\n      page.getByText(\"Projects\").first().waitFor(),\n    ]);\n  });\n\n  // TODO(Nik): https://linear.app/onyx-app/issue/ENG-3422/playwright-tests-for-scroll-behavior\n  test.skip(\"Opening existing conversation positions correctly\", async ({\n    page,\n  }) => {\n    // Turn off auto-scroll\n    await setAutoScroll(page, false);\n\n    // Create a conversation with multiple messages\n    await sendMessage(\n      page,\n      \"Message 1: Creating some content to enable scrolling\"\n    );\n    await sendMessage(page, \"Message 2: More content for the scroll test\");\n\n    // Reload page to simulate opening an existing conversation\n    await page.reload();\n    await Promise.all([\n      // Wait for sidebar navigation to be visible to indicate page is loaded\n      page.getByText(\"Agents\").first().waitFor(),\n      page.getByText(\"Projects\").first().waitFor(),\n    ]);\n\n    // Wait for scroll positioning to complete (content becomes visible)\n    await page\n      .locator('[data-scroll-ready=\"true\"]')\n      .waitFor({ timeout: 30000 });\n\n    // Wait for the user messages to be visible\n    const lastUserMessage = page.locator(\"#onyx-human-message\").last();\n    await lastUserMessage.waitFor({ state: \"visible\", timeout: 30000 });\n\n    // Verify the last user message is positioned near the top of the viewport\n    const isPositionedCorrectly = await lastUserMessage.evaluate(\n      (el: HTMLElement) => {\n        const scrollContainer = el.closest(\".overflow-y-auto\");\n        if (!scrollContainer) return false;\n\n        const containerRect = scrollContainer.getBoundingClientRect();\n        const elementRect = el.getBoundingClientRect();\n\n        // Check if element is near the top of the container (within 100px)\n        return elementRect.top - containerRect.top < 100;\n      }\n    );\n\n    expect(isPositionedCorrectly).toBe(true);\n  });\n\n  test(\"Auto-scroll ON: scrolls to bottom on new message\", async ({ page }) => {\n    // Ensure auto-scroll is ON (default)\n    await setAutoScroll(page, true);\n\n    // Send a message\n    await sendMessage(page, \"Hello, this is a test message\");\n\n    // Send another message to create some content\n    await sendMessage(page, \"Another message to test scrolling behavior\");\n\n    // The scroll container should be scrolled to bottom\n    const scrollContainer = getScrollContainer(page);\n    const isAtBottom = await scrollContainer.evaluate((el: HTMLElement) => {\n      return Math.abs(el.scrollHeight - el.scrollTop - el.clientHeight) < 10;\n    });\n\n    expect(isAtBottom).toBe(true);\n  });\n});\n\n/**\n * Tests for the Dynamic Bottom Spacer feature.\n *\n * The DynamicBottomSpacer creates a \"fresh chat\" effect where new messages\n * appear at the top of the viewport (below the header), giving each exchange\n * a clean slate appearance while preserving scroll-up access to history.\n */\ntest.describe(\"Dynamic Bottom Spacer - Fresh Chat Effect\", () => {\n  test.describe.configure({ mode: \"serial\" });\n\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n    await page.goto(\"/app\");\n    const nameInput = page.getByPlaceholder(\"Your name\");\n    await nameInput.waitFor();\n    await nameInput.fill(\"Playwright Tester\");\n    await page.getByText(\"Save\").click();\n    await Promise.all([\n      page.getByText(\"Agents\").first().waitFor(),\n      page.getByText(\"Projects\").first().waitFor(),\n    ]);\n  });\n\n  /**\n   * Helper to get the position of an element relative to scroll container\n   */\n  async function getElementPositionInContainer(\n    page: Page,\n    elementLocator: ReturnType<Page[\"locator\"]>\n  ) {\n    return elementLocator.evaluate((el: HTMLElement) => {\n      const scrollContainer = el.closest(\".overflow-y-auto\");\n      if (!scrollContainer) return null;\n\n      const containerRect = scrollContainer.getBoundingClientRect();\n      const elementRect = el.getBoundingClientRect();\n\n      return {\n        topOffset: elementRect.top - containerRect.top,\n        containerHeight: containerRect.height,\n        elementTop: elementRect.top,\n        containerTop: containerRect.top,\n      };\n    });\n  }\n\n  test(\"Follow-up message appears near top of viewport (fresh chat effect)\", async ({\n    page,\n  }) => {\n    // First, create some conversation history\n    await sendMessage(\n      page,\n      \"This is the first message to establish conversation history\"\n    );\n\n    // Send a follow-up message - this should trigger the fresh chat effect\n    await sendMessage(\n      page,\n      \"This follow-up message should appear near the top of the viewport\"\n    );\n\n    // Get the last user message (the follow-up)\n    const lastUserMessage = page.locator(\"#onyx-human-message\").last();\n    await lastUserMessage.waitFor({ state: \"visible\" });\n\n    // Check that the follow-up message is positioned near the top of the container\n    // (within ~150px to account for sticky header and some padding)\n    await expect\n      .poll(\n        async () => {\n          const position = await getElementPositionInContainer(\n            page,\n            lastUserMessage\n          );\n          return position?.topOffset ?? Number.POSITIVE_INFINITY;\n        },\n        { timeout: 5000 }\n      )\n      .toBeLessThan(150);\n  });\n\n  test(\"Dynamic spacer element exists and has correct attributes\", async ({\n    page,\n  }) => {\n    // Send a message to start a conversation\n    await sendMessage(page, \"Test message to initialize chat\");\n\n    // Send a follow-up to trigger the spacer\n    await sendMessage(page, \"Follow-up message\");\n\n    // Verify the dynamic spacer element exists with correct attributes\n    const spacer = page.locator('[data-dynamic-spacer=\"true\"]');\n    await expect(spacer).toBeVisible({ timeout: 10000 });\n    await expect(spacer).toHaveAttribute(\"aria-hidden\", \"true\");\n  });\n\n  test(\"User can scroll up to see previous messages after fresh chat effect\", async ({\n    page,\n  }) => {\n    // Create conversation history\n    await sendMessage(page, \"First message in the conversation\");\n    await sendMessage(page, \"Second message in the conversation\");\n\n    // Send a follow-up (triggers fresh chat effect)\n    await sendMessage(page, \"Third message - should be at top\");\n\n    // Now scroll up to verify previous messages are accessible\n    const scrollContainer = getScrollContainer(page);\n    await scrollContainer.evaluate((el: HTMLElement) => {\n      el.scrollTo({ top: 0, behavior: \"instant\" });\n    });\n\n    // Wait for scroll to complete\n    await expect\n      .poll(() => scrollContainer.evaluate((el: HTMLElement) => el.scrollTop), {\n        timeout: 5000,\n      })\n      .toBeLessThanOrEqual(1);\n\n    // Verify the first message is now visible\n    const firstUserMessage = page.locator(\"#onyx-human-message\").first();\n    await expect(firstUserMessage).toBeVisible();\n\n    // Verify the first message content\n    await expect(firstUserMessage).toContainText(\"First message\");\n  });\n\n  test(\"Scroll container remains at bottom after AI response completes\", async ({\n    page,\n  }) => {\n    // Send a message\n    await sendMessage(page, \"Please respond with a short message\");\n\n    // After AI response completes, verify we're still at the bottom\n    const scrollContainer = getScrollContainer(page);\n    const isAtBottom = await scrollContainer.evaluate((el: HTMLElement) => {\n      // Allow a small tolerance (10px) for rounding\n      return Math.abs(el.scrollHeight - el.scrollTop - el.clientHeight) < 10;\n    });\n\n    expect(isAtBottom).toBe(true);\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/share_chat.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAsRandomUser } from \"../utils/auth\";\nimport { expectElementScreenshot } from \"../utils/visualRegression\";\n\nasync function sendMessageAndWaitForChat(page: Page, message: string) {\n  await page.locator(\"#onyx-chat-input-textarea\").click();\n  await page.locator(\"#onyx-chat-input-textarea\").fill(message);\n  await page.locator(\"#onyx-chat-input-send-button\").click();\n\n  await page.waitForFunction(\n    () => window.location.href.includes(\"chatId=\"),\n    null,\n    { timeout: 15000 }\n  );\n\n  await expect(page.locator('[aria-label=\"share-chat-button\"]')).toBeVisible({\n    timeout: 10000,\n  });\n}\n\nasync function openShareModal(page: Page) {\n  await page.locator('[aria-label=\"share-chat-button\"]').click();\n  await expect(page.getByRole(\"dialog\")).toBeVisible({ timeout: 5000 });\n}\n\ntest.describe(\"Share Chat Session Modal\", () => {\n  test.describe.configure({ mode: \"serial\" });\n\n  let page: Page;\n\n  test.beforeAll(async ({ browser }) => {\n    page = await browser.newPage();\n    await loginAsRandomUser(page);\n    await sendMessageAndWaitForChat(page, \"Hello for share test\");\n  });\n\n  test.afterAll(async () => {\n    await page.close();\n  });\n\n  test(\"shows Private selected by default\", async () => {\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n    await expect(dialog).toBeVisible();\n\n    const privateOption = dialog.locator(\n      '[aria-label=\"share-modal-option-private\"]'\n    );\n    await expect(privateOption.locator(\"svg\").last()).toBeVisible();\n\n    const submitButton = dialog.locator('[aria-label=\"share-modal-submit\"]');\n    await expect(submitButton).toHaveText(\"Done\");\n\n    const cancelButton = dialog.locator('[aria-label=\"share-modal-cancel\"]');\n    await expect(cancelButton).toBeVisible();\n\n    await expectElementScreenshot(dialog, {\n      name: \"share-modal-default-private\",\n    });\n\n    await page.keyboard.press(\"Escape\");\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n  });\n\n  test(\"selecting Your Organization changes submit text\", async () => {\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n\n    await dialog.locator('[aria-label=\"share-modal-option-public\"]').click();\n\n    const submitButton = dialog.locator('[aria-label=\"share-modal-submit\"]');\n    await expect(submitButton).toHaveText(\"Create Share Link\");\n\n    const cancelButton = dialog.locator('[aria-label=\"share-modal-cancel\"]');\n    await expect(cancelButton).toBeVisible();\n\n    await expectElementScreenshot(dialog, {\n      name: \"share-modal-public-selected\",\n    });\n\n    await page.keyboard.press(\"Escape\");\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n  });\n\n  test(\"Cancel closes modal without API calls\", async () => {\n    let patchCallCount = 0;\n    await page.route(\"**/api/chat/chat-session/*\", async (route) => {\n      if (route.request().method() === \"PATCH\") {\n        patchCallCount++;\n      }\n      await route.continue();\n    });\n\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n    const cancelButton = dialog.locator('[aria-label=\"share-modal-cancel\"]');\n    await cancelButton.click();\n\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n    expect(patchCallCount).toBe(0);\n\n    await page.unrouteAll({ behavior: \"ignoreErrors\" });\n  });\n\n  test(\"X button closes modal without API calls\", async () => {\n    let patchCallCount = 0;\n    await page.route(\"**/api/chat/chat-session/*\", async (route) => {\n      if (route.request().method() === \"PATCH\") {\n        patchCallCount++;\n      }\n      await route.continue();\n    });\n\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n    const closeButton = dialog.locator('div[tabindex=\"-1\"] button');\n    await closeButton.click();\n\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n    expect(patchCallCount).toBe(0);\n\n    await page.unrouteAll({ behavior: \"ignoreErrors\" });\n  });\n\n  test(\"creating a share link calls API and shows link\", async () => {\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n\n    let patchBody: Record<string, unknown> | null = null;\n    await page.route(\"**/api/chat/chat-session/*\", async (route) => {\n      if (route.request().method() === \"PATCH\") {\n        patchBody = JSON.parse(route.request().postData() ?? \"{}\");\n        await route.continue();\n      } else {\n        await route.continue();\n      }\n    });\n\n    await dialog.locator('[aria-label=\"share-modal-option-public\"]').click();\n    const submitButton = dialog.locator('[aria-label=\"share-modal-submit\"]');\n    await submitButton.click();\n\n    await page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/chat/chat-session/\") &&\n        r.request().method() === \"PATCH\",\n      { timeout: 10000 }\n    );\n\n    expect(patchBody).toEqual({ sharing_status: \"public\" });\n\n    const linkInput = dialog.locator('[aria-label=\"share-modal-link-input\"]');\n    await expect(linkInput).toHaveValue(/\\/app\\/shared\\//, { timeout: 5000 });\n\n    await expect(submitButton).toHaveText(\"Copy Link\");\n    await expect(dialog.getByText(\"Chat shared\")).toBeVisible();\n    await expect(\n      dialog.locator('[aria-label=\"share-modal-cancel\"]')\n    ).toBeHidden();\n\n    await expectElementScreenshot(dialog, {\n      name: \"share-modal-link-created\",\n      mask: ['[aria-label=\"share-modal-link-input\"]'],\n    });\n\n    await page.unrouteAll({ behavior: \"ignoreErrors\" });\n\n    // Wait for the toast to confirm SWR data has been refreshed\n    // before closing, so the next test sees up-to-date shared_status\n    await expect(\n      page.getByText(\"Share link copied to clipboard!\").first()\n    ).toBeVisible({ timeout: 5000 });\n\n    await page.keyboard.press(\"Escape\");\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n  });\n\n  test(\"Copy Link triggers clipboard copy\", async () => {\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n\n    await expect(\n      dialog.locator('[aria-label=\"share-modal-link-input\"]')\n    ).toBeVisible({ timeout: 5000 });\n\n    const submitButton = dialog.locator('[aria-label=\"share-modal-submit\"]');\n    await expect(submitButton).toHaveText(\"Copy Link\");\n\n    await submitButton.click();\n\n    await expect(\n      page.getByText(\"Share link copied to clipboard!\").first()\n    ).toBeVisible({ timeout: 5000 });\n\n    await page.keyboard.press(\"Escape\");\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n  });\n\n  test(\"making chat private again calls API and closes modal\", async () => {\n    let patchBody: Record<string, unknown> | null = null;\n    await page.route(\"**/api/chat/chat-session/*\", async (route) => {\n      if (route.request().method() === \"PATCH\") {\n        patchBody = JSON.parse(route.request().postData() ?? \"{}\");\n        await route.continue();\n      } else {\n        await route.continue();\n      }\n    });\n\n    await openShareModal(page);\n\n    const dialog = page.getByRole(\"dialog\");\n    const submitButton = dialog.locator('[aria-label=\"share-modal-submit\"]');\n\n    await dialog.locator('[aria-label=\"share-modal-option-private\"]').click();\n\n    await expect(submitButton).toHaveText(\"Make Private\");\n\n    await submitButton.click();\n\n    await page.waitForResponse(\n      (r) =>\n        r.url().includes(\"/api/chat/chat-session/\") &&\n        r.request().method() === \"PATCH\",\n      { timeout: 10000 }\n    );\n\n    expect(patchBody).toEqual({ sharing_status: \"private\" });\n\n    await expect(dialog).toBeHidden({ timeout: 5000 });\n\n    await expect(page.getByText(\"Chat is now private\")).toBeVisible({\n      timeout: 5000,\n    });\n\n    await page.unrouteAll({ behavior: \"ignoreErrors\" });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/chat/welcome_page.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport {\n  expectScreenshot,\n  expectElementScreenshot,\n} from \"@tests/e2e/utils/visualRegression\";\nimport { GREETING_MESSAGES } from \"@/lib/chat/greetingMessages\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\n\ntest.describe.configure({ mode: \"parallel\" });\n\nconst THEMES = [\"light\", \"dark\"] as const;\n\nfor (const theme of THEMES) {\n  test.describe(`Welcome page — /app (${theme} mode)`, () => {\n    test.beforeEach(async ({ page }) => {\n      // Always log in before each test to ensure a valid session.\n      await loginAs(page, \"admin\");\n\n      // Inject theme into localStorage so next-themes picks it up immediately.\n      await page.addInitScript((t: string) => {\n        localStorage.setItem(\"theme\", t);\n      }, theme);\n\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n    });\n\n    // ── Full-page screenshot ──────────────────────────────────────────\n\n    test(\"full page visual snapshot\", async ({ page }) => {\n      // Wait for the welcome greeting to ensure the page has fully rendered\n      await page\n        .getByTestId(\"chat-intro\")\n        .waitFor({ state: \"visible\", timeout: 10000 });\n\n      await expectScreenshot(page, {\n        name: `welcome-${theme}-full-page`,\n        hide: ['[data-testid=\"onyx-logo\"]'], // greeting text is random, hide to prevent size variation\n      });\n    });\n\n    // ── Input bar element screenshot ──────────────────────────────────\n\n    test(\"input bar element snapshot\", async ({ page }) => {\n      const inputBar = page.locator(\"#onyx-chat-input\");\n      await inputBar.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await expectElementScreenshot(inputBar, {\n        name: `welcome-${theme}-input-bar`,\n      });\n    });\n\n    // ── Sidebar element screenshot ────────────────────────────────────\n\n    test(\"sidebar element snapshot\", async ({ page }) => {\n      // SidebarWrapper renders a div with `group/SidebarWrapper` Tailwind\n      // group class — this is the most stable identifier for the sidebar\n      // container element.\n      const sidebar = page.locator(\".group\\\\/SidebarWrapper\");\n      await sidebar.waitFor({ state: \"visible\", timeout: 10000 });\n\n      await expectElementScreenshot(sidebar, {\n        name: `welcome-${theme}-sidebar`,\n      });\n    });\n\n    // ── Content assertions ────────────────────────────────────────────\n\n    test(\"displays greeting from default agent\", async ({ page }) => {\n      const greetingContainer = page.getByTestId(\"onyx-logo\");\n      await greetingContainer.waitFor({ state: \"visible\", timeout: 10000 });\n\n      const text = await greetingContainer.textContent();\n      expect(GREETING_MESSAGES).toContain(text?.trim());\n    });\n\n    test(\"chat input is visible and focusable\", async ({ page }) => {\n      const textarea = page.locator(\"#onyx-chat-input-textarea\");\n      await expect(textarea).toBeVisible({ timeout: 10000 });\n\n      await textarea.click();\n      await expect(textarea).toBeFocused();\n    });\n\n    test(\"new session button is visible in the sidebar\", async ({ page }) => {\n      const newSessionBtn = page.getByTestId(\"AppSidebar/new-session\");\n      await expect(newSessionBtn).toBeVisible({ timeout: 10000 });\n    });\n\n    test.skip(\"send button is visible in the input bar\", async ({ page }) => {\n      const sendButton = page.locator(\"#onyx-chat-input-send-button\");\n      await expect(sendButton).toBeVisible({ timeout: 10000 });\n\n      await expectElementScreenshot(sendButton, {\n        name: `welcome-${theme}-send-button`,\n      });\n    });\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/connectors/federated_slack.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAs, loginAsRandomUser } from \"@tests/e2e/utils/auth\";\n\ntest.use({ storageState: \"admin_auth.json\" });\n\nconst SLACK_CLIENT_ID = process.env.SLACK_CLIENT_ID;\nconst SLACK_CLIENT_SECRET = process.env.SLACK_CLIENT_SECRET;\n\nasync function createFederatedSlackConnector(page: Page) {\n  // Navigate to add connector page\n  await page.goto(\"/admin/add-connector\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Click on Slack connector tile (specifically the one with \"Logo Slack\" text, not \"Slack Bots\")\n  await page.getByRole(\"link\", { name: \"Logo Slack\" }).first().click();\n  await page.waitForLoadState(\"networkidle\");\n\n  if (!SLACK_CLIENT_ID || !SLACK_CLIENT_SECRET) {\n    throw new Error(\"SLACK_CLIENT_ID and SLACK_CLIENT_SECRET must be set\");\n  }\n\n  // Fill in the client ID and client secret\n  await page.getByLabel(/client id/i).fill(SLACK_CLIENT_ID);\n  await page.getByLabel(/client secret/i).fill(SLACK_CLIENT_SECRET);\n\n  // Submit the form to create or update the federated connector\n  const createOrUpdateButton = await page.getByRole(\"button\", {\n    name: /create|update/i,\n  });\n  await createOrUpdateButton.click();\n\n  // Wait for success message or redirect\n  await page.waitForTimeout(2000);\n}\n\nasync function navigateToUserSettings(page: Page) {\n  // Wait for any existing modals to close\n  await page.waitForTimeout(1000);\n\n  // Wait for potential modal backdrop to disappear\n  await page\n    .waitForSelector(\".fixed.inset-0.bg-neutral-950\\\\/50\", {\n      state: \"detached\",\n      timeout: 5000,\n    })\n    .catch(() => {});\n\n  // Click on user dropdown/settings button\n  await page.locator(\"#onyx-user-dropdown\").click();\n\n  // Click on settings option\n  await page.getByText(\"User Settings\").click();\n\n  // Wait for settings modal to appear\n  await expect(page.locator(\"h2\", { hasText: \"User Settings\" })).toBeVisible();\n}\n\nasync function openConnectorsTab(page: Page) {\n  // Click on the Connectors tab in user settings\n  await page.getByRole(\"button\", { name: \"Connectors\" }).click();\n\n  // Wait for connectors section to be visible\n  // Allow multiple instances of \"Connected Services\" to be visible\n  const connectedServicesLocators = page.getByText(\"Connected Services\");\n  await expect(connectedServicesLocators.first()).toBeVisible();\n}\n\n/**\n * Cleanup function to delete the federated Slack connector from the admin panel\n * This ensures test isolation by removing any test data created during the test\n */\nasync function deleteFederatedSlackConnector(page: Page) {\n  // Navigate to admin indexing status page\n  await page.goto(\"/admin/indexing/status\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // Expand the Slack section first (summary row toggles open on click)\n  const slackSummaryRow = page.locator(\"tr\").filter({\n    has: page.locator(\"text=/^\\\\s*Slack\\\\s*$/i\"),\n  });\n  if ((await slackSummaryRow.count()) > 0) {\n    await slackSummaryRow.first().click();\n    // Wait a moment for rows to render\n    await page.waitForTimeout(500);\n  }\n\n  // Look for the Slack federated connector row inside the expanded section\n  // The federated connectors have a \"Federated Access\" badge\n  const slackRow = page.locator(\"tr\", { hasText: /federated access/i });\n\n  // Check if the connector exists\n  const rowCount = await slackRow.count();\n  if (rowCount === 0) {\n    // No federated Slack connector found, nothing to delete\n    console.log(\"No federated Slack connector found to delete\");\n    return;\n  }\n\n  // Click on the row to navigate to the detail page\n  await slackRow.first().click();\n  await page.waitForLoadState(\"networkidle\");\n\n  // Look for and click the delete button\n  // Open the Manage menu and click Delete\n  const manageButton = page.getByRole(\"button\", { name: /manage/i });\n  await manageButton\n    .waitFor({ state: \"visible\", timeout: 5000 })\n    .catch(() => {});\n  if (!(await manageButton.isVisible().catch(() => false))) {\n    console.log(\"Manage button not visible; skipping delete\");\n    return;\n  }\n  await manageButton.click();\n  // Wait for the dropdown menu to appear and settle (Radix animation)\n  await page\n    .getByRole(\"menu\")\n    .waitFor({ state: \"visible\", timeout: 3000 })\n    .catch(() => {});\n  await page.waitForTimeout(150);\n\n  page.once(\"dialog\", (dialog) => dialog.accept());\n  const deleteMenuItem = page.getByRole(\"menuitem\", { name: /^Delete$/ });\n  await expect(deleteMenuItem).toBeVisible({ timeout: 5000 });\n  await deleteMenuItem.click({ force: true });\n  // Wait for deletion to complete and redirect\n  await page.waitForURL(\"**/admin/indexing/status*\", { timeout: 15000 });\n  await page.waitForLoadState(\"networkidle\");\n}\n\n// Causes other tests to fail for some reason???\n// TODO (chris): fix this test\ntest.skip(\"Federated Slack Connector - Create, OAuth Modal, and User Settings Flow\", async ({\n  page,\n}) => {\n  try {\n    // Setup: Clear cookies and log in as admin\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    // Create a federated Slack connector in admin panel\n    await createFederatedSlackConnector(page);\n\n    // Log in as a random user\n    await page.context().clearCookies();\n    await loginAsRandomUser(page);\n\n    // Navigate back to main page and verify OAuth modal appears\n    await page.goto(\"/app\");\n    await page.waitForLoadState(\"networkidle\");\n\n    // Check if the OAuth modal appears\n    await expect(\n      page.getByText(/improve answer quality by letting/i)\n    ).toBeVisible({ timeout: 10000 });\n    await expect(page.getByText(/slack/i)).toBeVisible();\n\n    // Decline the OAuth connection\n    await page.getByRole(\"button\", { name: \"Skip for now\" }).click();\n\n    // Wait for modal to disappear\n    await expect(\n      page.getByText(/improve answer quality by letting/i)\n    ).not.toBeVisible();\n\n    // Go to user settings and verify the connector appears\n    await navigateToUserSettings(page);\n    await openConnectorsTab(page);\n\n    // Verify Slack connector appears in the federated connectors section\n    await expect(page.getByText(\"Federated Connectors\")).toBeVisible();\n    await expect(page.getByText(\"Slack\")).toBeVisible();\n    await expect(page.getByText(\"Not connected\")).toBeVisible();\n\n    // Verify there's a Connect button available\n    await expect(\n      page.locator(\"button\", { hasText: /^Connect$/ })\n    ).toBeVisible();\n  } finally {\n    // Cleanup: Delete the federated Slack connector\n    // Log back in as admin to delete the connector\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    await deleteFederatedSlackConnector(page);\n  }\n});\n"
  },
  {
    "path": "web/tests/e2e/connectors/inlineFileManagement.spec.ts",
    "content": "import { test, expect, Page } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\n/** Upload a file through the inline manager, retrying on transient failures. */\nasync function uploadTestFile(\n  page: Page,\n  fileName: string,\n  content: string,\n  maxRetries: number = 3\n): Promise<void> {\n  const buffer = Buffer.from(content, \"utf-8\");\n\n  for (let attempt = 1; attempt <= maxRetries; attempt++) {\n    try {\n      const addFilesButton = page.getByRole(\"button\", { name: /add files/i });\n      await expect(addFilesButton).toBeVisible({ timeout: 5000 });\n      await expect(addFilesButton).toBeEnabled({ timeout: 5000 });\n\n      const fileChooserPromise = page.waitForEvent(\"filechooser\", {\n        timeout: 5000,\n      });\n      await addFilesButton.click();\n      const fileChooser = await fileChooserPromise;\n      await fileChooser.setFiles({\n        name: fileName,\n        mimeType: \"text/plain\",\n        buffer: buffer,\n      });\n      await expect(page.getByText(fileName)).toBeVisible({ timeout: 5000 });\n      return;\n    } catch (error) {\n      if (attempt === maxRetries) {\n        throw error;\n      }\n      await page.waitForTimeout(1000);\n    }\n  }\n}\n\ntest.describe(\"InlineFileManagement\", () => {\n  test.describe.configure({ retries: 2 });\n\n  let testCcPairId: number | null = null;\n\n  test.beforeEach(async ({ page }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    const apiClient = new OnyxApiClient(page.request);\n    testCcPairId = await apiClient.createFileConnector(\n      `Test File Connector ${Date.now()}`\n    );\n  });\n\n  test.afterEach(async ({ page }) => {\n    const apiClient = new OnyxApiClient(page.request);\n\n    if (testCcPairId !== null) {\n      try {\n        await apiClient.deleteCCPair(testCcPairId);\n        testCcPairId = null;\n      } catch (error) {\n        console.warn(\n          `Failed to delete test connector ${testCcPairId}: ${error}`\n        );\n      }\n    }\n  });\n\n  test(\"should display files section on connector page\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await expect(page.getByText(/Files \\(/)).toBeVisible({ timeout: 10000 });\n    await expect(page.getByRole(\"button\", { name: /edit/i })).toBeVisible();\n  });\n\n  test(\"should enter and exit edit mode\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByRole(\"button\", { name: /edit/i }).click();\n    await expect(page.getByRole(\"button\", { name: /cancel/i })).toBeVisible();\n    await expect(\n      page.getByRole(\"button\", { name: /save changes/i })\n    ).toBeVisible();\n    await expect(\n      page.getByRole(\"button\", { name: /add files/i })\n    ).toBeVisible();\n    await page.getByRole(\"button\", { name: /cancel/i }).click();\n    await expect(page.getByRole(\"button\", { name: /edit/i })).toBeVisible();\n  });\n\n  test(\"should add files and show them as pending\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByRole(\"button\", { name: /edit/i }).click();\n    await page.waitForTimeout(500);\n    await uploadTestFile(\n      page,\n      \"test-document.txt\",\n      \"This is a test document content\"\n    );\n    await expect(page.getByText(\"New\")).toBeVisible();\n    const saveButton = page.getByRole(\"button\", { name: /save changes/i });\n    await expect(saveButton).toBeEnabled();\n  });\n\n  test(\"should remove pending file before saving\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByRole(\"button\", { name: /edit/i }).click();\n    await page.waitForTimeout(500);\n    await uploadTestFile(\n      page,\n      \"file-to-remove.txt\",\n      \"This file will be removed\"\n    );\n    const newFileRow = page.locator(\"tr\", { hasText: \"file-to-remove.txt\" });\n    await newFileRow.locator('button[title=\"Remove file\"]').click();\n    await expect(page.getByText(\"file-to-remove.txt\")).not.toBeVisible();\n  });\n\n  test(\"should show confirmation modal when saving\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByRole(\"button\", { name: /edit/i }).click();\n    await page.waitForTimeout(500);\n    await uploadTestFile(\n      page,\n      \"confirm-test.txt\",\n      \"Test content for confirmation modal\"\n    );\n    await page.getByRole(\"button\", { name: /save changes/i }).click();\n    const modalDialog = page.getByRole(\"dialog\", {\n      name: /confirm file changes/i,\n    });\n    await expect(modalDialog).toBeVisible({ timeout: 5000 });\n    await expect(\n      modalDialog.getByText(/1 file\\(s\\) will be added/)\n    ).toBeVisible();\n    await expect(\n      modalDialog.getByRole(\"button\", { name: /confirm & save/i })\n    ).toBeVisible();\n    await page.keyboard.press(\"Escape\");\n    await expect(modalDialog).not.toBeVisible();\n    await expect(\n      page.getByRole(\"button\", { name: /save changes/i })\n    ).toBeVisible();\n  });\n\n  test(\"should cancel edit mode and discard changes\", async ({ page }) => {\n    await page.goto(`/admin/connector/${testCcPairId}`);\n    await page.waitForLoadState(\"networkidle\");\n\n    await page.getByRole(\"button\", { name: /edit/i }).click();\n    await page.waitForTimeout(500);\n    await uploadTestFile(\n      page,\n      \"discard-test.txt\",\n      \"This file should be discarded\"\n    );\n    await page.getByRole(\"button\", { name: /cancel/i }).click();\n    await expect(page.getByRole(\"button\", { name: /edit/i })).toBeVisible();\n    await expect(page.getByText(\"discard-test.txt\")).not.toBeVisible();\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/constants.ts",
    "content": "export const TEST_ADMIN_CREDENTIALS = {\n  email: \"admin_user@example.com\",\n  password: \"TestPassword123!\",\n};\n\nexport const TEST_ADMIN2_CREDENTIALS = {\n  email: \"admin2_user@example.com\",\n  password: \"TestPassword123!\",\n};\n\n/**\n * Number of distinct worker users provisioned during global setup.\n * Must be >= the max concurrent workers in playwright.config.ts.\n * Playwright's workerIndex can exceed this (retries spawn new workers\n * with incrementing indices), so callers should use modulo:\n *   workerIndex % WORKER_USER_POOL_SIZE\n */\nexport const WORKER_USER_POOL_SIZE = 8;\n\nexport function workerUserCredentials(workerIndex: number): {\n  email: string;\n  password: string;\n} {\n  return {\n    email: `worker${workerIndex}@example.com`,\n    password: \"WorkerPassword123!\",\n  };\n}\n"
  },
  {
    "path": "web/tests/e2e/fixtures/eeFeatures.ts",
    "content": "/**\n * Playwright fixture that detects EE (Enterprise Edition) license state.\n *\n * Usage:\n * ```ts\n * import { test, expect } from \"@tests/e2e/fixtures/eeFeatures\";\n *\n * test(\"my EE-gated test\", async ({ page, eeEnabled }) => {\n *   test.skip(!eeEnabled, \"Requires active Enterprise license\");\n *   // ... rest of test\n * });\n * ```\n *\n * The fixture:\n * - Authenticates as admin\n * - Fetches /api/settings to check ee_features_enabled\n * - Provides a boolean to the test BEFORE any navigation happens\n *\n * This lets tests call test.skip() synchronously at the top, which is the\n * correct Playwright pattern — never navigate then decide to skip.\n */\n\nimport { test as base, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\n\nexport const test = base.extend<{\n  /** Whether EE features are enabled (valid enterprise license). */\n  eeEnabled: boolean;\n}>({\n  eeEnabled: async ({ page }, use) => {\n    await loginAs(page, \"admin\");\n    const res = await page.request.get(\"/api/settings\");\n    if (!res.ok()) {\n      // Fail open — if we can't determine, assume EE is not enabled\n      await use(false);\n      return;\n    }\n    const settings = await res.json();\n    await use(settings.ee_features_enabled === true);\n  },\n});\n\nexport { expect };\n"
  },
  {
    "path": "web/tests/e2e/fixtures/llmProvider.ts",
    "content": "/**\n * Playwright fixture that ensures a public LLM provider is available.\n *\n * Usage:\n * ```ts\n * // Import from this file instead of @playwright/test\n * import { test, expect } from \"@tests/e2e/fixtures/llmProvider\";\n *\n * test(\"my test that needs an LLM provider\", async ({ page, llmProviderId }) => {\n *   // llmProviderId is the ID of the provider that was created (or null if\n *   // one already existed). The fixture handles cleanup automatically.\n * });\n * ```\n *\n * The fixture:\n * - Authenticates as admin\n * - Creates a public LLM provider if none exists\n * - Provides the created provider ID to the test\n * - Cleans up the provider after all tests in the file complete\n */\n\nimport { test as base, expect } from \"@playwright/test\";\nimport { loginAs } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nexport const test = base.extend<{\n  /**\n   * The ID of the public LLM provider created by this fixture, or `null`\n   * if a public provider already existed.\n   */\n  llmProviderId: number | null;\n}>({\n  llmProviderId: async ({ page }, use) => {\n    // Authenticate as admin to be able to create/list providers\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    const client = new OnyxApiClient(page.request);\n    const createdId = await client.ensurePublicProvider();\n    await use(createdId);\n\n    // Cleanup: only delete if we created one\n    if (createdId !== null) {\n      // Re-authenticate in case the test changed the session\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      await client.deleteProvider(createdId);\n    }\n  },\n});\n\nexport { expect };\n"
  },
  {
    "path": "web/tests/e2e/global-setup.ts",
    "content": "import { FullConfig, request } from \"@playwright/test\";\nimport {\n  TEST_ADMIN_CREDENTIALS,\n  TEST_ADMIN2_CREDENTIALS,\n  WORKER_USER_POOL_SIZE,\n  workerUserCredentials,\n} from \"@tests/e2e/constants\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\n\nconst PREFLIGHT_TIMEOUT_MS = 60_000;\nconst PREFLIGHT_POLL_INTERVAL_MS = 2_000;\nconst PREFLIGHT_WARN_AFTER_MS = 15_000;\n\n/**\n * Poll the health endpoint until the server is ready or we time out.\n * Fails fast with a clear error so developers don't see cryptic browser errors.\n */\nasync function waitForServer(baseURL: string): Promise<void> {\n  const healthURL = baseURL;\n  const deadline = Date.now() + PREFLIGHT_TIMEOUT_MS;\n  const startTime = Date.now();\n  let warned = false;\n\n  console.log(`[global-setup] Waiting for server at ${healthURL} ...`);\n\n  while (Date.now() < deadline) {\n    try {\n      const res = await fetch(healthURL);\n      if (res.ok) {\n        console.log(\"[global-setup] Server is ready.\");\n        return;\n      }\n      console.log(`[global-setup] Server returned ${res.status}, retrying ...`);\n    } catch {\n      // Connection refused / DNS error — server not up yet.\n    }\n\n    if (!warned && Date.now() - startTime >= PREFLIGHT_WARN_AFTER_MS) {\n      warned = true;\n      console.warn(\n        `[global-setup] ⚠ Still waiting for server after ${\n          PREFLIGHT_WARN_AFTER_MS / 1000\n        }s.\\n` +\n          `  Please verify that both the backend and frontend are running.\\n` +\n          `  You can start them with: ods compose dev`\n      );\n    }\n\n    await new Promise((r) => setTimeout(r, PREFLIGHT_POLL_INTERVAL_MS));\n  }\n\n  throw new Error(\n    `Onyx is not running at ${baseURL}. ` +\n      `Timed out after ${\n        PREFLIGHT_TIMEOUT_MS / 1000\n      }s waiting for ${healthURL} to return 200. ` +\n      `Make sure the backend and frontend are running (e.g. \\`ods compose dev\\`).`\n  );\n}\n\n/**\n * Register a user via the backend API. Idempotent — silently succeeds if the\n * user already exists (HTTP 400 with \"REGISTER_USER_ALREADY_EXISTS\").\n */\nasync function ensureUserExists(\n  apiBase: string,\n  email: string,\n  password: string\n): Promise<void> {\n  const ctx = await request.newContext({ baseURL: apiBase });\n  try {\n    const res = await ctx.post(\"/api/auth/register\", {\n      data: { email, username: email, password },\n    });\n\n    if (res.ok()) {\n      console.log(`[global-setup] Registered user ${email}`);\n    } else {\n      const body = await res.text();\n      // \"REGISTER_USER_ALREADY_EXISTS\" is the standard FastAPI-Users error code\n      if (\n        res.status() === 400 &&\n        body.includes(\"REGISTER_USER_ALREADY_EXISTS\")\n      ) {\n        console.log(`[global-setup] User ${email} already exists, skipping.`);\n      } else {\n        console.warn(\n          `[global-setup] Unexpected response registering ${email}: ${res.status()} ${body}`\n        );\n      }\n    }\n  } finally {\n    await ctx.dispose();\n  }\n}\n\n/**\n * Log in via the API and save the resulting cookies as a Playwright storage\n * state file.  No browser is needed — this uses Playwright's lightweight\n * request context, which is much faster and produces no console noise.\n */\nasync function apiLoginAndSaveState(\n  baseURL: string,\n  email: string,\n  password: string,\n  storageStatePath: string\n): Promise<void> {\n  const ctx = await request.newContext({ baseURL });\n  try {\n    const res = await ctx.post(\"/api/auth/login\", {\n      form: { username: email, password },\n    });\n    if (!res.ok()) {\n      const body = await res.text();\n      throw new Error(\n        `[global-setup] Login failed for ${email}: ${res.status()} ${body}`\n      );\n    }\n    await ctx.storageState({ path: storageStatePath });\n  } finally {\n    await ctx.dispose();\n  }\n}\n\n/**\n * Promote a user to admin via the manage API.\n * Requires an authenticated context (admin storage state).\n */\nasync function promoteToAdmin(\n  baseURL: string,\n  adminStorageState: string,\n  email: string\n): Promise<void> {\n  const ctx = await request.newContext({\n    baseURL,\n    storageState: adminStorageState,\n  });\n  try {\n    const res = await ctx.patch(\"/api/manage/set-user-role\", {\n      data: {\n        user_email: email,\n        new_role: \"admin\",\n      },\n    });\n    if (res.ok()) {\n      console.log(`[global-setup] Promoted ${email} to admin`);\n    } else if (res.status() === 403) {\n      throw new Error(\n        `[global-setup] Cannot promote ${email} — the primary admin account ` +\n          `(${TEST_ADMIN_CREDENTIALS.email}) does not have the admin role.\\n\\n` +\n          `This usually happens when running tests against a non-fresh database ` +\n          `where another user was registered first.\\n\\n` +\n          `To fix this, either:\\n` +\n          `  1. Promote the user manually: ${baseURL}/admin/users\\n` +\n          `  2. Reset to a seeded database: ods db restore --fetch-seeded\\n`\n      );\n    } else {\n      const body = await res.text();\n      console.warn(\n        `[global-setup] Failed to promote ${email}: ${res.status()} ${body}`\n      );\n    }\n  } finally {\n    await ctx.dispose();\n  }\n}\n\nasync function globalSetup(config: FullConfig) {\n  // Get baseURL from config, fallback to localhost:3000\n  const baseURL = config.projects[0]?.use?.baseURL || \"http://localhost:3000\";\n\n  // ── Preflight check ──────────────────────────────────────────────────\n  await waitForServer(baseURL);\n\n  // ── Provision test users via API ─────────────────────────────────────\n  // The first user registered becomes the admin automatically.\n  // Order matters: admin first, then admin2, then worker users.\n  await ensureUserExists(\n    baseURL,\n    TEST_ADMIN_CREDENTIALS.email,\n    TEST_ADMIN_CREDENTIALS.password\n  );\n  await ensureUserExists(\n    baseURL,\n    TEST_ADMIN2_CREDENTIALS.email,\n    TEST_ADMIN2_CREDENTIALS.password\n  );\n\n  for (let i = 0; i < WORKER_USER_POOL_SIZE; i++) {\n    const { email, password } = workerUserCredentials(i);\n    await ensureUserExists(baseURL, email, password);\n  }\n\n  // ── Login via API and save storage state ───────────────────────────\n  await apiLoginAndSaveState(\n    baseURL,\n    TEST_ADMIN_CREDENTIALS.email,\n    TEST_ADMIN_CREDENTIALS.password,\n    \"admin_auth.json\"\n  );\n\n  // Promote admin2 now that we have an admin session\n  await promoteToAdmin(\n    baseURL,\n    \"admin_auth.json\",\n    TEST_ADMIN2_CREDENTIALS.email\n  );\n\n  await apiLoginAndSaveState(\n    baseURL,\n    TEST_ADMIN2_CREDENTIALS.email,\n    TEST_ADMIN2_CREDENTIALS.password,\n    \"admin2_auth.json\"\n  );\n\n  for (let i = 0; i < WORKER_USER_POOL_SIZE; i++) {\n    const { email, password } = workerUserCredentials(i);\n    const storageStatePath = `worker${i}_auth.json`;\n    await apiLoginAndSaveState(baseURL, email, password, storageStatePath);\n\n    const workerCtx = await request.newContext({\n      baseURL,\n      storageState: storageStatePath,\n    });\n    try {\n      const res = await workerCtx.patch(\"/api/user/personalization\", {\n        data: { name: \"worker\" },\n      });\n      if (!res.ok()) {\n        console.warn(\n          `[global-setup] Failed to set display name for ${email}: ${res.status()}`\n        );\n      }\n    } finally {\n      await workerCtx.dispose();\n    }\n  }\n\n  // ── Ensure a public LLM provider exists ───────────────────────────\n  // Many tests depend on a default LLM being configured (file uploads,\n  // assistant creation, etc.).  Re-use the admin session we just saved.\n  const adminCtx = await request.newContext({\n    baseURL,\n    storageState: \"admin_auth.json\",\n  });\n  try {\n    const client = new OnyxApiClient(adminCtx, baseURL);\n    await client.ensurePublicProvider();\n  } finally {\n    await adminCtx.dispose();\n  }\n}\n\nexport default globalSetup;\n"
  },
  {
    "path": "web/tests/e2e/mcp/default-agent-mcp.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAs, apiLogin } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport {\n  startMcpApiKeyServer,\n  McpServerProcess,\n} from \"@tests/e2e/utils/mcpServer\";\nimport {\n  getPacketObjectsByType,\n  sendMessageAndCaptureStreamPackets,\n} from \"@tests/e2e/utils/chatStream\";\n\nconst API_KEY = process.env.MCP_API_KEY || \"test-api-key-12345\";\nconst DEFAULT_PORT = Number(process.env.MCP_API_KEY_TEST_PORT || \"8005\");\nconst MCP_API_KEY_TEST_URL = process.env.MCP_API_KEY_TEST_URL;\nconst MCP_ASSERTED_TOOL_NAME = \"tool_0\";\n\nasync function scrollToBottom(page: Page): Promise<void> {\n  try {\n    await page.evaluate(() => {\n      window.scrollTo(0, document.body.scrollHeight);\n    });\n    await page.waitForTimeout(200);\n  } catch {\n    // ignore scrolling failures\n  }\n}\n\nasync function ensureOnboardingComplete(page: Page): Promise<void> {\n  await page.evaluate(async () => {\n    try {\n      await fetch(\"/api/user/personalization\", {\n        method: \"PATCH\",\n        headers: { \"Content-Type\": \"application/json\" },\n        credentials: \"include\",\n        body: JSON.stringify({ name: \"Playwright User\" }),\n      });\n    } catch {\n      // ignore personalization failures\n    }\n  });\n\n  await page.reload();\n  await page.waitForLoadState(\"networkidle\");\n}\n\nconst getToolName = (packetObject: Record<string, unknown>): string | null => {\n  const value = packetObject.tool_name;\n  return typeof value === \"string\" ? value : null;\n};\n\nfunction getToolPacketCounts(\n  packets: Record<string, unknown>[],\n  toolName: string\n): { start: number; delta: number; debug: number } {\n  const start = getPacketObjectsByType(packets, \"custom_tool_start\").filter(\n    (packetObject) => getToolName(packetObject) === toolName\n  ).length;\n  const delta = getPacketObjectsByType(packets, \"custom_tool_delta\").filter(\n    (packetObject) => getToolName(packetObject) === toolName\n  ).length;\n  const debug = getPacketObjectsByType(packets, \"tool_call_debug\").filter(\n    (packetObject) => getToolName(packetObject) === toolName\n  ).length;\n\n  return { start, delta, debug };\n}\n\nasync function fetchMcpToolIdByName(\n  page: Page,\n  serverId: number,\n  toolName: string\n): Promise<number> {\n  const response = await page.request.get(\n    `/api/admin/mcp/server/${serverId}/db-tools`\n  );\n  expect(response.ok()).toBeTruthy();\n  const data = (await response.json()) as {\n    tools?: Array<{ id: number; name: string }>;\n  };\n  const matchedTool = data.tools?.find((tool) => tool.name === toolName);\n  expect(matchedTool?.id).toBeTruthy();\n  return matchedTool!.id;\n}\n\ntest.describe(\"Default Agent MCP Integration\", () => {\n  test.describe.configure({ mode: \"serial\" });\n\n  let serverProcess: McpServerProcess | null = null;\n  let serverId: number | null = null;\n  let serverName: string;\n  let serverUrl: string;\n  let basicUserEmail: string;\n  let basicUserPassword: string;\n  let createdProviderId: number | null = null;\n  let assertedToolId: number | null = null;\n\n  test.beforeAll(async ({ browser }) => {\n    // Use dockerized server if URL is provided, otherwise start local server\n    if (MCP_API_KEY_TEST_URL) {\n      serverUrl = MCP_API_KEY_TEST_URL;\n      console.log(\n        `[test-setup] Using dockerized MCP API key server at ${serverUrl}`\n      );\n    } else {\n      // Start the MCP API key server locally\n      serverProcess = await startMcpApiKeyServer({\n        port: DEFAULT_PORT,\n        apiKey: API_KEY,\n      });\n      serverUrl = `http://${serverProcess.address.host}:${serverProcess.address.port}/mcp`;\n      console.log(\n        `[test-setup] MCP API key server started locally at ${serverUrl}`\n      );\n    }\n\n    serverName = `PW API Key Server ${Date.now()}`;\n\n    // Setup as admin\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    const adminClient = new OnyxApiClient(adminPage.request);\n\n    // Ensure a public LLM provider exists\n    createdProviderId = await adminClient.ensurePublicProvider();\n\n    // Clean up any existing servers with the same URL\n    try {\n      const existingServers = await adminClient.listMcpServers();\n      for (const server of existingServers) {\n        if (server.server_url === serverUrl) {\n          await adminClient.deleteMcpServer(server.id);\n        }\n      }\n    } catch (error) {\n      console.warn(\"Failed to cleanup existing MCP servers\", error);\n    }\n\n    // Create a basic user for testing\n    basicUserEmail = `pw-basic-user-${Date.now()}@example.com`;\n    basicUserPassword = \"BasicUserPass123!\";\n    await adminClient.registerUser(basicUserEmail, basicUserPassword);\n\n    await adminContext.close();\n  });\n\n  test.afterAll(async ({ browser }) => {\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    const adminClient = new OnyxApiClient(adminPage.request);\n\n    if (createdProviderId !== null) {\n      await adminClient.deleteProvider(createdProviderId);\n    }\n\n    if (serverId) {\n      await adminClient.deleteMcpServer(serverId);\n    }\n\n    await adminContext.close();\n\n    // Only stop the server if we started it locally\n    if (serverProcess) {\n      await serverProcess.stop();\n    }\n  });\n\n  test(\"Admin configures API key MCP server and adds tools to default agent\", async ({\n    page,\n  }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    console.log(`[test] Starting with server name: ${serverName}`);\n\n    // Navigate to MCP actions page\n    await page.goto(\"/admin/actions/mcp\");\n    await page.waitForURL(\"**/admin/actions/mcp**\");\n    console.log(`[test] Navigated to MCP actions page`);\n\n    // Click \"Add MCP Server\" button to open modal\n    await page.getByRole(\"button\", { name: /Add MCP Server/i }).click();\n    await page.waitForTimeout(500); // Wait for modal to appear\n    console.log(`[test] Opened Add MCP Server modal`);\n\n    // Fill basic server info in AddMCPServerModal\n    await page.locator(\"input#name\").fill(serverName);\n    await page.locator(\"textarea#description\").fill(\"Test API key MCP server\");\n    await page.locator(\"input#server_url\").fill(serverUrl);\n    console.log(`[test] Filled basic server details`);\n\n    // Submit the modal to create server\n    const createServerResponsePromise = page.waitForResponse((resp) => {\n      try {\n        const url = new URL(resp.url());\n        return (\n          url.pathname === \"/api/admin/mcp/server\" &&\n          resp.request().method() === \"POST\" &&\n          resp.ok()\n        );\n      } catch {\n        return false;\n      }\n    });\n    await page.getByRole(\"button\", { name: \"Add Server\" }).click();\n    const createServerResponse = await createServerResponsePromise;\n    const createdServer = (await createServerResponse.json()) as {\n      id?: number;\n    };\n    expect(createdServer.id).toBeTruthy();\n    serverId = Number(createdServer.id);\n    expect(serverId).toBeGreaterThan(0);\n    console.log(`[test] Created MCP server with id: ${serverId}`);\n    await page.waitForTimeout(1000); // Wait for modal to close and auth modal to open\n    console.log(`[test] Created MCP server, auth modal should open`);\n\n    // MCPAuthenticationModal should now be open - configure API Key authentication\n    await page.waitForTimeout(500); // Ensure modal is fully rendered\n\n    // Select API Key as authentication method\n    const authMethodSelect = page.getByTestId(\"mcp-auth-method-select\");\n    await authMethodSelect.click();\n    await page.getByRole(\"option\", { name: \"API Key\" }).click();\n    console.log(`[test] Selected API Key authentication method`);\n\n    await page.waitForTimeout(500); // Wait for tabs to appear\n\n    // The modal now shows tabs - select \"Shared Key (Admin)\" tab\n    const adminTab = page.getByRole(\"tab\", { name: /Shared Key.*Admin/i });\n    await expect(adminTab).toBeVisible({ timeout: 5000 });\n    await adminTab.click();\n    await page.waitForTimeout(300);\n    console.log(`[test] Selected Shared Key (Admin) tab`);\n\n    // Wait for API token field to appear and fill it\n    const apiTokenInput = page.locator('input[name=\"api_token\"]');\n    await expect(apiTokenInput).toBeVisible({ timeout: 10000 });\n    await apiTokenInput.click(); // Focus the field first\n    await apiTokenInput.fill(API_KEY);\n    console.log(`[test] Filled API key`);\n\n    // Click Connect button to submit authentication\n    const connectButton = page.getByTestId(\"mcp-auth-connect-button\");\n    await expect(connectButton).toBeVisible({ timeout: 5000 });\n    await connectButton.click();\n    console.log(`[test] Clicked Connect button`);\n\n    // Wait for the tools to be fetched\n    await page.waitForTimeout(1000);\n    console.log(`[test] Tools fetched successfully`);\n\n    // Verify server card is visible\n    await expect(\n      page.getByText(serverName, { exact: false }).first()\n    ).toBeVisible({ timeout: 20000 });\n    console.log(`[test] Verified server card is visible`);\n\n    // Click the refresh button to fetch/refresh tools\n    const refreshButton = page.getByRole(\"button\", { name: \"Refresh tools\" });\n    await expect(refreshButton).toBeVisible({ timeout: 5000 });\n    await refreshButton.click();\n    console.log(`[test] Clicked refresh tools button`);\n\n    // Wait for tools to load - \"No tools available\" should disappear\n    await expect(page.getByText(\"No tools available\")).not.toBeVisible({\n      timeout: 15000,\n    });\n    console.log(`[test] Tools loaded successfully`);\n\n    assertedToolId = await fetchMcpToolIdByName(\n      page,\n      serverId,\n      MCP_ASSERTED_TOOL_NAME\n    );\n    console.log(\n      `[test] Resolved ${MCP_ASSERTED_TOOL_NAME} to tool ID ${assertedToolId}`\n    );\n\n    // Disable multiple tools (tool_0, tool_1, tool_2, tool_3)\n    const toolIds = [\"tool_11\", \"tool_12\", \"tool_13\", \"tool_14\"];\n    let disabledToolsCount = 0;\n\n    for (const toolId of toolIds) {\n      const toolToggle = page.getByLabel(`tool-toggle-${toolId}`).first();\n\n      // Check if the tool exists\n      const isVisible = await toolToggle\n        .isVisible({ timeout: 2000 })\n        .catch(() => false);\n\n      if (!isVisible) {\n        console.log(`[test] Tool ${toolId} not found, skipping`);\n        continue;\n      }\n\n      console.log(`[test] Found tool: ${toolId}`);\n\n      // Disable if currently enabled (tools are enabled by default)\n      const state = await toolToggle.getAttribute(\"aria-checked\");\n      if (state === \"true\") {\n        await toolToggle.click();\n        await expect(toolToggle).toHaveAttribute(\"aria-checked\", \"false\", {\n          timeout: 5000,\n        });\n        disabledToolsCount++;\n        console.log(`[test] Disabled tool: ${toolId}`);\n      } else {\n        console.log(`[test] Tool ${toolId} already disabled`);\n      }\n    }\n\n    console.log(\n      `[test] Successfully disabled ${disabledToolsCount} tools via UI`\n    );\n  });\n\n  test(\"Admin adds MCP tools to default agent via chat preferences page\", async ({\n    page,\n  }) => {\n    test.skip(!serverId, \"MCP server must be created first\");\n\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    console.log(`[test] Logged in as admin for chat preferences config`);\n\n    // Navigate to chat preferences page\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n    console.log(`[test] Navigated to chat preferences page`);\n\n    // Wait for page to load\n    await expect(page.locator('[aria-label=\"admin-page-title\"]')).toBeVisible({\n      timeout: 10000,\n    });\n    console.log(`[test] Page loaded`);\n\n    // Scroll to the Actions & Tools section (open by default)\n    await scrollToBottom(page);\n\n    // Find the MCP server card by name text\n    // The server name appears inside a label within the ActionsLayouts.Header\n    const serverLabel = page\n      .locator(\"label\")\n      .filter({ has: page.getByText(serverName, { exact: true }) });\n    await expect(serverLabel.first()).toBeVisible({ timeout: 10000 });\n    console.log(`[test] MCP server card found for server: ${serverName}`);\n\n    // Scroll server card into view\n    await serverLabel.first().scrollIntoViewIfNeeded();\n\n    // The server-level Switch in the header toggles ALL tools\n    const serverSwitch = serverLabel\n      .first()\n      .locator('button[role=\"switch\"]')\n      .first();\n    await expect(serverSwitch).toBeVisible({ timeout: 5000 });\n\n    // Enable all tools by toggling the server switch ON\n    const serverState = await serverSwitch.getAttribute(\"aria-checked\");\n    if (serverState !== \"true\") {\n      await serverSwitch.click();\n      // Auto-save triggers immediately\n      await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n        timeout: 10000,\n      });\n    }\n    console.log(`[test] MCP tools successfully added to default agent`);\n  });\n\n  test(\"Basic user can see and toggle MCP tools in default agent\", async ({\n    page,\n  }) => {\n    test.skip(!serverId, \"MCP server must be configured first\");\n    test.skip(!basicUserEmail, \"Basic user must be created first\");\n\n    await page.context().clearCookies();\n    await apiLogin(page, basicUserEmail, basicUserPassword);\n    console.log(`[test] Logged in as basic user: ${basicUserEmail}`);\n\n    // Navigate to chat (which uses default agent for new users)\n    await page.goto(\"/app\");\n    await page.waitForURL(\"**/app**\");\n    await ensureOnboardingComplete(page);\n    console.log(`[test] Navigated to chat page`);\n\n    // Open actions popover\n    const actionsButton = page.getByTestId(\"action-management-toggle\");\n    await expect(actionsButton).toBeVisible({ timeout: 10000 });\n    await actionsButton.click();\n    console.log(`[test] Opened actions popover`);\n\n    // Wait for popover to open\n    const popover = page.locator('[data-testid=\"tool-options\"]');\n    await expect(popover).toBeVisible({ timeout: 5000 });\n\n    // Find the MCP server in the list\n    const serverLineItem = popover\n      .locator(\".group\\\\/LineItem\")\n      .filter({ hasText: serverName });\n    await expect(serverLineItem).toBeVisible({ timeout: 10000 });\n    console.log(`[test] Found MCP server: ${serverName}`);\n\n    // Click to open the server's tool list\n    await serverLineItem.click();\n    await page.waitForTimeout(500);\n    console.log(`[test] Clicked on MCP server to view tools`);\n\n    // Verify we're in the tool list view (should have Enable/Disable All)\n    await expect(\n      popover.getByText(/(Enable|Disable) All/i).first()\n    ).toBeVisible({ timeout: 5000 });\n    console.log(`[test] Tool list view loaded`);\n\n    // Find a specific tool (tool_0)\n    const toolLineItem = popover\n      .locator(\".group\\\\/LineItem\")\n      .filter({ hasText: /^tool_0/ })\n      .first();\n    await expect(toolLineItem).toBeVisible({ timeout: 5000 });\n    console.log(`[test] Found tool: tool_0`);\n\n    // Find the toggle switch for the tool\n    const toolToggle = toolLineItem.locator('[role=\"switch\"]');\n    await expect(toolToggle).toBeVisible({ timeout: 5000 });\n    console.log(`[test] Tool toggle is visible`);\n\n    // Get initial state and toggle\n    const initialState = await toolToggle.getAttribute(\"aria-checked\");\n    console.log(`[test] Initial toggle state: ${initialState}`);\n    await toolToggle.click();\n    await page.waitForTimeout(300);\n\n    // Wait for state to change\n    const expectedState = initialState === \"true\" ? \"false\" : \"true\";\n    await expect(toolToggle).toHaveAttribute(\"aria-checked\", expectedState, {\n      timeout: 5000,\n    });\n    console.log(`[test] Toggle state changed to: ${expectedState}`);\n\n    // Toggle back\n    await toolToggle.click();\n    await page.waitForTimeout(300);\n    await expect(toolToggle).toHaveAttribute(\"aria-checked\", initialState!, {\n      timeout: 5000,\n    });\n    console.log(`[test] Toggled back to original state: ${initialState}`);\n\n    // Test \"Disable All\" functionality\n    const disableAllButton = popover.getByText(/Disable All/i).first();\n    const hasDisableAll = await disableAllButton.isVisible();\n    console.log(`[test] Disable All button visible: ${hasDisableAll}`);\n\n    if (hasDisableAll) {\n      await disableAllButton.click();\n      await page.waitForTimeout(500);\n\n      // Verify at least one toggle is now unchecked\n      const anyUnchecked = await popover\n        .locator('[role=\"switch\"][aria-checked=\"false\"]')\n        .count();\n      expect(anyUnchecked).toBeGreaterThan(0);\n      console.log(`[test] Disabled all tools (${anyUnchecked} unchecked)`);\n    }\n\n    // Test \"Enable All\" functionality\n    const enableAllButton = popover.getByText(/Enable All/i).first();\n    const hasEnableAll = await enableAllButton.isVisible();\n    console.log(`[test] Enable All button visible: ${hasEnableAll}`);\n\n    if (hasEnableAll) {\n      await enableAllButton.click();\n      await page.waitForTimeout(500);\n      console.log(`[test] Enabled all tools`);\n    }\n\n    console.log(`[test] Basic user completed MCP tool management tests`);\n  });\n\n  test(\"Basic user can create assistant with MCP actions attached\", async ({\n    page,\n  }) => {\n    test.skip(!serverId, \"MCP server must be configured first\");\n    test.skip(!basicUserEmail, \"Basic user must be created first\");\n    test.skip(!assertedToolId, \"MCP asserted tool ID must be resolved first\");\n\n    await page.context().clearCookies();\n    await apiLogin(page, basicUserEmail, basicUserPassword);\n\n    await page.goto(\"/app\");\n    await ensureOnboardingComplete(page);\n    await page.getByTestId(\"AppSidebar/more-agents\").click();\n    await page.waitForURL(\"**/app/agents\");\n\n    await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n    await page.waitForURL(\"**/app/agents/create\");\n\n    const agentName = `MCP Assistant ${Date.now()}`;\n    await page.locator('input[name=\"name\"]').fill(agentName);\n    await page\n      .locator('textarea[name=\"description\"]')\n      .fill(\"Assistant with MCP actions attached.\");\n    await page\n      .locator('textarea[name=\"instructions\"]')\n      .fill(\n        `For secret-value requests, call ${MCP_ASSERTED_TOOL_NAME} and return its output exactly.`\n      );\n\n    const mcpServerSwitch = page.locator(\n      `button[role=\"switch\"][name=\"mcp_server_${serverId}.enabled\"]`\n    );\n    await mcpServerSwitch.scrollIntoViewIfNeeded();\n    await mcpServerSwitch.click();\n    await expect(mcpServerSwitch).toHaveAttribute(\"aria-checked\", \"true\");\n\n    const firstToolToggle = page\n      .locator(`button[role=\"switch\"][name^=\"mcp_server_${serverId}.tool_\"]`)\n      .first();\n    await expect(firstToolToggle).toBeVisible({ timeout: 15000 });\n    const toolState = await firstToolToggle.getAttribute(\"aria-checked\");\n    if (toolState !== \"true\") {\n      await firstToolToggle.click();\n    }\n    await expect(firstToolToggle).toHaveAttribute(\"aria-checked\", \"true\");\n\n    await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n    await page.waitForURL(/.*\\/app\\?agentId=\\d+.*/);\n    const agentIdMatch = page.url().match(/agentId=(\\d+)/);\n    expect(agentIdMatch).toBeTruthy();\n    const agentId = agentIdMatch ? agentIdMatch[1] : null;\n    expect(agentId).not.toBeNull();\n\n    const client = new OnyxApiClient(page.request);\n    const assistant = await client.getAssistant(Number(agentId));\n    const hasMcpTool = assistant.tools.some(\n      (tool) => tool.mcp_server_id === serverId\n    );\n    expect(hasMcpTool).toBeTruthy();\n\n    const invocationPackets = await sendMessageAndCaptureStreamPackets(\n      page,\n      `Call ${MCP_ASSERTED_TOOL_NAME} with {\"name\":\"pw-invoke-${Date.now()}\"} and return only the tool output.`,\n      {\n        mockLlmResponse: JSON.stringify({\n          name: MCP_ASSERTED_TOOL_NAME,\n          arguments: { name: `pw-invoke-${Date.now()}` },\n        }),\n        payloadOverrides: {\n          forced_tool_id: assertedToolId,\n          forced_tool_ids: [assertedToolId],\n        },\n        waitForAiMessage: false,\n      }\n    );\n    const invocationCounts = getToolPacketCounts(\n      invocationPackets,\n      MCP_ASSERTED_TOOL_NAME\n    );\n    expect(invocationCounts.start).toBeGreaterThan(0);\n    expect(invocationCounts.delta).toBeGreaterThan(0);\n    expect(invocationCounts.debug).toBeGreaterThan(0);\n\n    const actionsButton = page.getByTestId(\"action-management-toggle\");\n    await expect(actionsButton).toBeVisible({ timeout: 10000 });\n    await actionsButton.click();\n\n    const popover = page.locator('[data-testid=\"tool-options\"]');\n    await expect(popover).toBeVisible({ timeout: 5000 });\n\n    const serverLineItem = popover\n      .locator(\".group\\\\/LineItem\")\n      .filter({ hasText: serverName })\n      .first();\n    await expect(serverLineItem).toBeVisible({ timeout: 10000 });\n    await serverLineItem.click();\n\n    const toolSearchInput = popover\n      .getByPlaceholder(/Search .* tools/i)\n      .first();\n    await expect(toolSearchInput).toBeVisible({ timeout: 10000 });\n    await toolSearchInput.fill(MCP_ASSERTED_TOOL_NAME);\n\n    const toolToggle = popover.getByLabel(`Toggle ${MCP_ASSERTED_TOOL_NAME}`);\n    await expect(toolToggle).toBeVisible({ timeout: 10000 });\n    const isToolToggleUnchecked = async () => {\n      const dataState = await toolToggle.getAttribute(\"data-state\");\n      if (typeof dataState === \"string\") {\n        return dataState === \"unchecked\";\n      }\n      return (await toolToggle.getAttribute(\"aria-checked\")) === \"false\";\n    };\n    if (!(await isToolToggleUnchecked())) {\n      await toolToggle.click();\n    }\n    await expect\n      .poll(isToolToggleUnchecked, {\n        timeout: 5000,\n      })\n      .toBe(true);\n\n    await page.keyboard.press(\"Escape\").catch(() => {});\n\n    const disabledPackets = await sendMessageAndCaptureStreamPackets(\n      page,\n      `Call ${MCP_ASSERTED_TOOL_NAME} with {\"name\":\"pw-disabled-${Date.now()}\"} and return only the tool output.`,\n      {\n        mockLlmResponse: JSON.stringify({\n          name: MCP_ASSERTED_TOOL_NAME,\n          arguments: { name: `pw-disabled-${Date.now()}` },\n        }),\n        payloadOverrides: {\n          forced_tool_id: assertedToolId,\n          forced_tool_ids: [assertedToolId],\n        },\n        waitForAiMessage: false,\n      }\n    );\n    const disabledCounts = getToolPacketCounts(\n      disabledPackets,\n      MCP_ASSERTED_TOOL_NAME\n    );\n    expect(disabledCounts.start).toBe(0);\n    expect(disabledCounts.delta).toBe(0);\n    expect(disabledCounts.debug).toBe(0);\n  });\n\n  test(\"Admin can modify MCP tools in default agent\", async ({ page }) => {\n    test.skip(!serverId, \"MCP server must be configured first\");\n\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n    console.log(`[test] Testing tool modification`);\n\n    // Navigate to chat preferences page\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n\n    // Scroll to Actions & Tools section\n    await scrollToBottom(page);\n\n    // Find the MCP server card by name\n    const serverLabel = page\n      .locator(\"label\")\n      .filter({ has: page.getByText(serverName, { exact: true }) });\n    await expect(serverLabel.first()).toBeVisible({ timeout: 10000 });\n    await serverLabel.first().scrollIntoViewIfNeeded();\n\n    // Click \"Expand\" to reveal individual tools\n    const expandButton = page.getByRole(\"button\", { name: \"Expand\" }).first();\n    const isExpandVisible = await expandButton.isVisible().catch(() => false);\n    if (isExpandVisible) {\n      await expandButton.click();\n      await page.waitForTimeout(300);\n      console.log(`[test] Expanded MCP server card`);\n    }\n\n    // Find a specific tool by name inside the expanded card content\n    // Individual tools are rendered as ActionsLayouts.Tool with their own Card > Label\n    const toolLabel = page\n      .locator(\"label\")\n      .filter({ has: page.getByText(\"tool_0\", { exact: true }) });\n    const firstToolSwitch = toolLabel\n      .first()\n      .locator('button[role=\"switch\"]')\n      .first();\n\n    await expect(firstToolSwitch).toBeVisible({ timeout: 5000 });\n    await firstToolSwitch.scrollIntoViewIfNeeded();\n\n    // Get initial state and toggle\n    const initialChecked = await firstToolSwitch.getAttribute(\"aria-checked\");\n    console.log(`[test] Initial tool state: ${initialChecked}`);\n    await firstToolSwitch.click();\n\n    // Wait for auto-save toast\n    await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n      timeout: 10000,\n    });\n    console.log(`[test] Save successful`);\n\n    // Reload and verify persistence\n    await page.reload();\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n    await scrollToBottom(page);\n\n    // Re-find the server card\n    const serverLabelAfter = page\n      .locator(\"label\")\n      .filter({ has: page.getByText(serverName, { exact: true }) });\n    await expect(serverLabelAfter.first()).toBeVisible({ timeout: 10000 });\n    await serverLabelAfter.first().scrollIntoViewIfNeeded();\n\n    // Re-expand the card\n    const expandButtonAfter = page\n      .getByRole(\"button\", { name: \"Expand\" })\n      .first();\n    const isExpandVisibleAfter = await expandButtonAfter\n      .isVisible()\n      .catch(() => false);\n    if (isExpandVisibleAfter) {\n      await expandButtonAfter.click();\n      await page.waitForTimeout(300);\n    }\n\n    // Verify the tool state persisted\n    const toolLabelAfter = page\n      .locator(\"label\")\n      .filter({ has: page.getByText(\"tool_0\", { exact: true }) });\n    const firstToolSwitchAfter = toolLabelAfter\n      .first()\n      .locator('button[role=\"switch\"]')\n      .first();\n    await expect(firstToolSwitchAfter).toBeVisible({ timeout: 5000 });\n    const finalChecked =\n      await firstToolSwitchAfter.getAttribute(\"aria-checked\");\n    console.log(`[test] Final tool state: ${finalChecked}`);\n    expect(finalChecked).not.toEqual(initialChecked);\n  });\n\n  test(\"Instructions persist when saving via chat preferences\", async ({\n    page,\n  }) => {\n    await page.context().clearCookies();\n    await loginAs(page, \"admin\");\n\n    await page.goto(\"/admin/configuration/chat-preferences\");\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n\n    // Click \"Modify Prompt\" to open the system prompt modal\n    const modifyButton = page.getByText(\"Modify Prompt\");\n    await expect(modifyButton).toBeVisible({ timeout: 5000 });\n    await modifyButton.click();\n\n    const modal = page.getByRole(\"dialog\");\n    await expect(modal).toBeVisible({ timeout: 5000 });\n\n    // Fill instructions in the modal textarea\n    const testInstructions = `Test instructions for MCP - ${Date.now()}`;\n    const textarea = modal.getByPlaceholder(\"Enter your system prompt...\");\n    await textarea.fill(testInstructions);\n    console.log(`[test] Filled instructions`);\n\n    // Click Save in the modal footer\n    await modal.getByRole(\"button\", { name: \"Save\" }).click();\n\n    await expect(page.getByText(\"System prompt updated\")).toBeVisible({\n      timeout: 10000,\n    });\n    console.log(`[test] Instructions saved successfully`);\n\n    // Modal should close\n    await expect(modal).not.toBeVisible();\n\n    // Reload and verify — wait for all data to load before opening modal\n    // (the modal reads system_prompt from SWR state at click time, so data must be ready)\n    await page.reload();\n    await page.waitForLoadState(\"networkidle\");\n    await page.waitForURL(\"**/admin/configuration/chat-preferences**\");\n\n    // Reopen modal and check persisted value\n    const modifyButtonAfter = page.getByText(\"Modify Prompt\");\n    await expect(modifyButtonAfter).toBeVisible({ timeout: 5000 });\n    await modifyButtonAfter.click();\n\n    const modalAfter = page.getByRole(\"dialog\");\n    await expect(modalAfter).toBeVisible({ timeout: 5000 });\n    await expect(\n      modalAfter.getByPlaceholder(\"Enter your system prompt...\")\n    ).toHaveValue(testInstructions);\n\n    console.log(`[test] Instructions persisted correctly`);\n\n    // Close modal\n    await modalAfter.getByRole(\"button\", { name: \"Cancel\" }).click();\n  });\n\n  test(\"MCP tools appear in basic user's chat actions after being added to default agent\", async ({\n    page,\n  }) => {\n    test.skip(!serverId, \"MCP server must be configured first\");\n    test.skip(!basicUserEmail, \"Basic user must be created first\");\n\n    await page.context().clearCookies();\n    await apiLogin(page, basicUserEmail, basicUserPassword);\n    console.log(`[test] Logged in as basic user to verify tool visibility`);\n\n    // Navigate to chat\n    await page.goto(\"/app\");\n    await page.waitForURL(\"**/app**\");\n    console.log(`[test] Navigated to chat`);\n\n    // Open actions popover\n    const actionsButton = page.getByTestId(\"action-management-toggle\");\n    await expect(actionsButton).toBeVisible({ timeout: 10000 });\n    await actionsButton.click();\n    console.log(`[test] Opened actions popover`);\n\n    // Wait for popover\n    const popover = page.locator('[data-testid=\"tool-options\"]');\n    await expect(popover).toBeVisible({ timeout: 5000 });\n\n    // Verify MCP server appears in the actions list\n    const serverLineItem = popover\n      .locator(\".group\\\\/LineItem\")\n      .filter({ hasText: serverName });\n    await expect(serverLineItem).toBeVisible({ timeout: 10000 });\n    console.log(`[test] Found MCP server in actions list`);\n\n    // Click to see tools\n    await serverLineItem.click();\n    await page.waitForTimeout(500);\n    console.log(`[test] Clicked server to view tools`);\n\n    // Verify tools are present\n    const toolsList = popover.locator('[role=\"switch\"]');\n    const toolCount = await toolsList.count();\n    expect(toolCount).toBeGreaterThan(0);\n\n    console.log(\n      `[test] Basic user can see ${toolCount} MCP tools from default agent`\n    );\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/mcp/mcp_oauth_flow.spec.ts",
    "content": "import { test, expect } from \"@playwright/test\";\nimport type { Page, Browser, Locator } from \"@playwright/test\";\nimport { loginAs, loginAsWorkerUser, apiLogin } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport {\n  startMcpOauthServer,\n  McpServerProcess,\n} from \"@tests/e2e/utils/mcpServer\";\nimport { TEST_ADMIN_CREDENTIALS } from \"@tests/e2e/constants\";\nimport { logPageState } from \"@tests/e2e/utils/pageStateLogger\";\nimport {\n  getPacketObjectsByType,\n  sendMessageAndCaptureStreamPackets,\n} from \"@tests/e2e/utils/chatStream\";\n\nconst REQUIRED_ENV_VARS = [\n  \"MCP_OAUTH_CLIENT_ID\",\n  \"MCP_OAUTH_CLIENT_SECRET\",\n  \"MCP_OAUTH_ISSUER\",\n  \"MCP_OAUTH_JWKS_URI\",\n  \"MCP_OAUTH_USERNAME\",\n  \"MCP_OAUTH_PASSWORD\",\n];\n\nconst missingEnvVars = REQUIRED_ENV_VARS.filter(\n  (envVar) => !process.env[envVar]\n);\n\nif (missingEnvVars.length > 0) {\n  throw new Error(\n    `Missing required environment variables for MCP OAuth tests: ${missingEnvVars.join(\n      \", \"\n    )}`\n  );\n}\n\nconst DEFAULT_MCP_SERVER_URL =\n  process.env.MCP_TEST_SERVER_URL || \"http://127.0.0.1:8004/mcp\";\nlet runtimeMcpServerUrl = DEFAULT_MCP_SERVER_URL;\nconst CLIENT_ID = process.env.MCP_OAUTH_CLIENT_ID!;\nconst CLIENT_SECRET = process.env.MCP_OAUTH_CLIENT_SECRET!;\nconst IDP_USERNAME = process.env.MCP_OAUTH_USERNAME!;\nconst IDP_PASSWORD = process.env.MCP_OAUTH_PASSWORD!;\nconst APP_BASE_URL = process.env.MCP_TEST_APP_BASE || \"http://localhost:3000\";\nconst APP_HOST = new URL(APP_BASE_URL).host;\nconst IDP_HOST = new URL(process.env.MCP_OAUTH_ISSUER!).host;\nconst QUICK_CONFIRM_CONNECTED_TIMEOUT_MS = Number(\n  process.env.MCP_OAUTH_QUICK_CONFIRM_TIMEOUT_MS || 2000\n);\nconst POST_CLICK_URL_CHANGE_WAIT_MS = Number(\n  process.env.MCP_OAUTH_POST_CLICK_URL_CHANGE_WAIT_MS || 5000\n);\nconst MCP_OAUTH_FLOW_TEST_TIMEOUT_MS = Number(\n  process.env.MCP_OAUTH_TEST_TIMEOUT_MS || 300_000\n);\n\ntype Credentials = {\n  email: string;\n  password: string;\n};\n\ntype FlowArtifacts = {\n  serverId: number;\n  serverName: string;\n  agentId: number;\n  agentName: string;\n  toolName: string;\n  toolId: number | null;\n};\n\ntype StepLogger = (message: string) => void;\n\nconst DEFAULT_USERNAME_SELECTORS = [\n  'input[name=\"identifier\"]',\n  \"#identifier-input\",\n  'input[name=\"username\"]',\n  \"#okta-signin-username\",\n  \"#idp-discovery-username\",\n  'input[id=\"idp-discovery-username\"]',\n  'input[name=\"email\"]',\n  'input[type=\"email\"]',\n  \"#username\",\n  'input[name=\"user\"]',\n];\n\nconst DEFAULT_PASSWORD_SELECTORS = [\n  'input[name=\"credentials.passcode\"]',\n  'input[name=\"password\"]',\n  \"#okta-signin-password\",\n  'input[type=\"password\"]',\n  \"#password\",\n];\n\nconst DEFAULT_SUBMIT_SELECTORS = [\n  'button[type=\"submit\"]',\n  'input[type=\"submit\"]',\n  'button:has-text(\"Sign in\")',\n  'button:has-text(\"Log in\")',\n  'button:has-text(\"Continue\")',\n  'button:has-text(\"Verify\")',\n];\n\nconst DEFAULT_NEXT_SELECTORS = [\n  'button:has-text(\"Next\")',\n  'button:has-text(\"Continue\")',\n  'input[type=\"submit\"][value=\"Next\"]',\n];\n\nconst DEFAULT_CONSENT_SELECTORS = [\n  'button:has-text(\"Allow\")',\n  'button:has-text(\"Authorize\")',\n  'button:has-text(\"Accept\")',\n  'button:has-text(\"Grant\")',\n];\n\nconst TOOL_NAMES = {\n  admin: \"tool_0\",\n  curator: \"tool_1\",\n};\n\nconst SPEC_START_MS = Date.now();\n\nfunction parseSelectorList(\n  value: string | undefined,\n  defaults: string[]\n): string[] {\n  if (!value) return defaults;\n  return value\n    .split(\",\")\n    .map((selector) => selector.trim())\n    .filter(Boolean);\n}\n\nfunction buildMcpServerUrl(baseUrl: string): string {\n  const trimmed = baseUrl.replace(/\\/+$/, \"\");\n  return trimmed.endsWith(\"/mcp\") ? trimmed : `${trimmed}/mcp`;\n}\n\nconst logOauthEvent = (page: Page | null, message: string) => {\n  const location = page ? ` url=${page.url()}` : \"\";\n  console.log(`[mcp-oauth-test] ${message}${location}`);\n};\n\nconst delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));\n\nasync function clickAndWaitForPossibleUrlChange(\n  page: Page,\n  clickAction: () => Promise<void>,\n  context: string\n) {\n  const startingUrl = page.url();\n  const urlChangePromise = page\n    .waitForURL(\n      (url) => {\n        const href = typeof url === \"string\" ? url : url.toString();\n        return href !== startingUrl;\n      },\n      { timeout: POST_CLICK_URL_CHANGE_WAIT_MS }\n    )\n    .then(() => true)\n    .catch(() => false);\n\n  await clickAction();\n  const changed = await urlChangePromise;\n  if (changed) {\n    logOauthEvent(page, `${context}: observed URL change after click`);\n  } else {\n    logOauthEvent(\n      page,\n      `${context}: no immediate URL change; continuing OAuth flow`\n    );\n  }\n}\n\nfunction createStepLogger(testName: string) {\n  const start = Date.now();\n  return (message: string) => {\n    const elapsed = ((Date.now() - start) / 1000).toFixed(1);\n    console.log(`[mcp-oauth-step][${testName}] ${message} (+${elapsed}s)`);\n  };\n}\n\nconst getToolName = (packetObject: Record<string, unknown>): string | null => {\n  const value = packetObject.tool_name;\n  return typeof value === \"string\" ? value : null;\n};\n\nasync function verifyToolInvocationFromChat(\n  page: Page,\n  toolName: string,\n  contextLabel: string,\n  forcedToolId?: number | null\n) {\n  const prompt = [\n    `Call the MCP tool \"${toolName}\" now.`,\n    `Pass {\"name\":\"playwright-${Date.now()}\"} as the arguments.`,\n    \"Return the exact tool output.\",\n  ].join(\" \");\n\n  const packets = await sendMessageAndCaptureStreamPackets(page, prompt, {\n    mockLlmResponse: JSON.stringify({\n      name: toolName,\n      arguments: { name: `playwright-${Date.now()}` },\n    }),\n    payloadOverrides:\n      forcedToolId != null\n        ? {\n            forced_tool_id: forcedToolId,\n            forced_tool_ids: [forcedToolId],\n          }\n        : undefined,\n    waitForAiMessage: false,\n  });\n  const startPackets = getPacketObjectsByType(\n    packets,\n    \"custom_tool_start\"\n  ).filter((packetObject) => getToolName(packetObject) === toolName);\n  const deltaPackets = getPacketObjectsByType(\n    packets,\n    \"custom_tool_delta\"\n  ).filter((packetObject) => getToolName(packetObject) === toolName);\n  const debugPackets = getPacketObjectsByType(\n    packets,\n    \"tool_call_debug\"\n  ).filter((packetObject) => getToolName(packetObject) === toolName);\n\n  expect(startPackets.length).toBeGreaterThan(0);\n  expect(deltaPackets.length).toBeGreaterThan(0);\n  expect(debugPackets.length).toBeGreaterThan(0);\n\n  console.log(\n    `[mcp-oauth-test] ${contextLabel}: tool invocation packets received for ${toolName}`\n  );\n}\n\nasync function fetchMcpToolIdByName(\n  page: Page,\n  serverId: number,\n  toolName: string,\n  timeoutMs: number = 15_000\n): Promise<number | null> {\n  const start = Date.now();\n  let visibleToolNames: string[] = [];\n\n  while (Date.now() - start < timeoutMs) {\n    const response = await page.request.get(\n      `/api/admin/mcp/server/${serverId}/db-tools`\n    );\n    if (!response.ok()) {\n      await page.waitForTimeout(500);\n      continue;\n    }\n\n    const data = (await response.json()) as {\n      tools?: Array<Record<string, unknown>>;\n    };\n    const tools = Array.isArray(data.tools) ? data.tools : [];\n    visibleToolNames = tools\n      .map((tool) => {\n        const value =\n          tool.name ??\n          tool.display_name ??\n          tool.in_code_tool_id ??\n          tool.displayName;\n        return typeof value === \"string\" ? value : \"\";\n      })\n      .filter(Boolean);\n\n    const matchedTool = tools.find((tool) => {\n      const candidates = [\n        tool.name,\n        tool.display_name,\n        tool.in_code_tool_id,\n        tool.displayName,\n      ].filter((value): value is string => typeof value === \"string\");\n      return candidates.includes(toolName);\n    });\n    if (matchedTool) {\n      const id = matchedTool.id;\n      if (typeof id === \"number\") {\n        return id;\n      }\n      if (typeof id === \"string\") {\n        const parsed = Number(id);\n        if (!Number.isNaN(parsed)) {\n          return parsed;\n        }\n      }\n    }\n\n    await page.waitForTimeout(500);\n  }\n\n  console.warn(\n    `[mcp-oauth-test] Could not resolve tool id for ${toolName} on server ${serverId}. Visible tools: ${visibleToolNames.join(\n      \", \"\n    )}`\n  );\n  return null;\n}\n\nasync function logoutSession(page: Page, contextLabel: string) {\n  try {\n    const response = await page.request.post(`${APP_BASE_URL}/api/auth/logout`);\n    const status = response.status();\n    if (!response.ok() && status !== 401) {\n      const body = await response.text();\n      console.warn(\n        `[mcp-oauth-test] ${contextLabel}: Logout returned ${status} - ${body}`\n      );\n    } else {\n      console.log(\n        `[mcp-oauth-test] ${contextLabel}: Logout request completed with status ${status}`\n      );\n    }\n  } catch (error) {\n    console.warn(\n      `[mcp-oauth-test] ${contextLabel}: Logout request failed - ${String(\n        error\n      )}`\n    );\n  }\n}\n\nasync function verifySessionUser(\n  page: Page,\n  expected: { email: string; role: string },\n  contextLabel: string\n) {\n  const response = await page.request.get(`${APP_BASE_URL}/api/me`);\n  const status = response.status();\n  expect(response.ok()).toBeTruthy();\n  const data = await response.json();\n  expect(data.email).toBe(expected.email);\n  expect(data.role).toBe(expected.role);\n  console.log(\n    `[mcp-oauth-test] ${contextLabel}: Verified session user ${data.email} (${data.role}) via /api/me (status ${status})`\n  );\n}\n\nasync function logPageStateWithTag(page: Page, context: string) {\n  const elapsed = ((Date.now() - SPEC_START_MS) / 1000).toFixed(1);\n  await logPageState(page, `${context} (+${elapsed}s)`, \"[mcp-oauth-debug]\");\n}\n\nasync function fillFirstVisible(\n  page: Page,\n  selectors: string[],\n  value: string\n): Promise<boolean> {\n  for (const selector of selectors) {\n    const locator = page.locator(selector).first();\n    const count = await locator.count();\n    if (count === 0) {\n      logOauthEvent(page, `Selector ${selector} not found`);\n      continue;\n    }\n    logOauthEvent(page, `Filling first visible selector: ${selector}`);\n    let isVisible = await locator.isVisible().catch(() => false);\n    logOauthEvent(page, `Selector ${selector} is visible: ${isVisible}`);\n    if (!isVisible) {\n      logOauthEvent(\n        page,\n        `Selector ${selector} is not visible, waiting for it to be visible`\n      );\n      try {\n        await locator.waitFor({ state: \"visible\", timeout: 500 });\n        isVisible = true;\n      } catch {\n        continue;\n      }\n    }\n    if (!isVisible) {\n      continue;\n    }\n    const existing = await locator\n      .inputValue()\n      .catch(() => \"\")\n      .then((val) => val ?? \"\");\n    if (existing !== value) {\n      await locator.fill(value);\n    }\n    return true;\n  }\n  return false;\n}\n\nasync function clickFirstVisible(\n  page: Page,\n  selectors: string[],\n  options: { optional?: boolean } = {}\n): Promise<boolean> {\n  for (const selector of selectors) {\n    const locator = page.locator(selector).first();\n    const count = await locator.count();\n    if (count === 0) continue;\n    let isVisible = await locator.isVisible().catch(() => false);\n    if (!isVisible) {\n      try {\n        await locator.waitFor({ state: \"visible\", timeout: 500 });\n        isVisible = true;\n      } catch {\n        continue;\n      }\n    }\n    try {\n      await locator.click();\n      return true;\n    } catch (err) {\n      if (!options.optional) {\n        throw err;\n      }\n    }\n  }\n  return false;\n}\n\nasync function waitForAnySelector(\n  page: Page,\n  selectors: string[],\n  options: { timeout?: number } = {}\n): Promise<boolean> {\n  const timeout = options.timeout ?? 5000;\n  const deadline = Date.now() + timeout;\n  while (Date.now() < deadline) {\n    for (const selector of selectors) {\n      const locator = page.locator(selector).first();\n      if ((await locator.count()) === 0) {\n        continue;\n      }\n      try {\n        if (await locator.isVisible()) {\n          return true;\n        }\n      } catch {\n        continue;\n      }\n    }\n    await page.waitForTimeout(50);\n  }\n  return false;\n}\n\nasync function scrollToBottom(page: Page): Promise<void> {\n  try {\n    await page.evaluate(() => {\n      const section = document.querySelector(\n        '[data-testid=\"available-tools-section\"]'\n      );\n      if (section && \"scrollIntoView\" in section) {\n        section.scrollIntoView({ behavior: \"instant\", block: \"end\" });\n      } else {\n        window.scrollTo(0, document.body.scrollHeight);\n      }\n    });\n    await page.waitForTimeout(200);\n  } catch {\n    // ignore scrolling failures in test environment\n  }\n}\n\nconst isOnHost = (url: string, host: string): boolean => {\n  try {\n    return new URL(url).host === host;\n  } catch {\n    return false;\n  }\n};\n\nconst isOnAppHost = (url: string): boolean => isOnHost(url, APP_HOST);\nconst isOnIdpHost = (url: string): boolean => isOnHost(url, IDP_HOST);\n\nasync function performIdpLogin(page: Page): Promise<void> {\n  const usernameSelectors = parseSelectorList(\n    process.env.MCP_OAUTH_TEST_USERNAME_SELECTOR,\n    DEFAULT_USERNAME_SELECTORS\n  );\n  const passwordSelectors = parseSelectorList(\n    process.env.MCP_OAUTH_TEST_PASSWORD_SELECTOR,\n    DEFAULT_PASSWORD_SELECTORS\n  );\n  const submitSelectors = parseSelectorList(\n    process.env.MCP_OAUTH_TEST_SUBMIT_SELECTOR,\n    DEFAULT_SUBMIT_SELECTORS\n  );\n  const nextSelectors = parseSelectorList(\n    process.env.MCP_OAUTH_TEST_NEXT_SELECTOR,\n    DEFAULT_NEXT_SELECTORS\n  );\n  const consentSelectors = parseSelectorList(\n    process.env.MCP_OAUTH_TEST_CONSENT_SELECTOR,\n    DEFAULT_CONSENT_SELECTORS\n  );\n  const passwordSelectorString = passwordSelectors.join(\",\");\n\n  await page\n    .waitForLoadState(\"domcontentloaded\", { timeout: 1000 })\n    .catch(() => {});\n\n  logOauthEvent(page, \"Attempting IdP login\");\n  await waitForAnySelector(page, usernameSelectors, { timeout: 1000 });\n  logOauthEvent(page, `Username selectors: ${usernameSelectors.join(\", \")}`);\n  const usernameFilled = await fillFirstVisible(\n    page,\n    usernameSelectors,\n    IDP_USERNAME\n  );\n  if (usernameFilled) {\n    logOauthEvent(page, \"Filled username\");\n    await clickFirstVisible(page, nextSelectors, { optional: true });\n    await waitForAnySelector(page, passwordSelectors, { timeout: 2000 });\n  }\n\n  const submitPasswordAttempt = async (attemptLabel: string) => {\n    const passwordReady = await waitForAnySelector(page, passwordSelectors, {\n      timeout: 8000,\n    });\n    if (!passwordReady) {\n      await logPageStateWithTag(\n        page,\n        `Password input did not appear during ${attemptLabel}`\n      );\n      return false;\n    }\n    const filled = await fillFirstVisible(\n      page,\n      passwordSelectors,\n      IDP_PASSWORD\n    );\n    if (!filled) {\n      await logPageStateWithTag(\n        page,\n        `Unable to find password input during ${attemptLabel}`\n      );\n      return false;\n    }\n    logOauthEvent(page, `Filled password (${attemptLabel})`);\n    const clickedSubmit = await clickFirstVisible(page, submitSelectors, {\n      optional: true,\n    });\n    if (!clickedSubmit) {\n      // As a fallback, press Enter in the password field\n      const passwordLocator = page.locator(passwordSelectorString).first();\n      if ((await passwordLocator.count()) > 0) {\n        await passwordLocator.press(\"Enter\").catch(() => {});\n      } else {\n        await page.keyboard.press(\"Enter\").catch(() => {});\n      }\n    }\n    logOauthEvent(page, `Submitted IdP credentials (${attemptLabel})`);\n    await page\n      .waitForLoadState(\"domcontentloaded\", { timeout: 15000 })\n      .catch(() => {});\n    await page.waitForTimeout(300);\n    return true;\n  };\n\n  const hasVisiblePasswordField = async (): Promise<boolean> => {\n    const locator = page.locator(passwordSelectorString);\n    const count = await locator.count();\n    for (let i = 0; i < count; i++) {\n      try {\n        if (await locator.nth(i).isVisible()) {\n          return true;\n        }\n      } catch {\n        continue;\n      }\n    }\n    return false;\n  };\n\n  await submitPasswordAttempt(\"initial\");\n\n  const MAX_PASSWORD_RETRIES = 3;\n  for (let retry = 1; retry <= MAX_PASSWORD_RETRIES; retry++) {\n    await page.waitForTimeout(250);\n    if (!isOnIdpHost(page.url())) {\n      break;\n    }\n    if (!(await hasVisiblePasswordField())) {\n      break;\n    }\n    logOauthEvent(page, `Password challenge still visible (retry ${retry})`);\n    const success = await submitPasswordAttempt(`retry ${retry}`);\n    if (!success) {\n      break;\n    }\n  }\n\n  await clickFirstVisible(page, consentSelectors, { optional: true });\n  logOauthEvent(page, \"Handled consent prompt if present\");\n  await page\n    .waitForLoadState(\"networkidle\", { timeout: 10000 })\n    .catch(() => {});\n}\n\nasync function completeOauthFlow(\n  page: Page,\n  options: {\n    expectReturnPathContains: string;\n    confirmConnected?: () => Promise<void>;\n    scrollToBottomOnReturn?: boolean;\n  }\n): Promise<void> {\n  logOauthEvent(\n    page,\n    `Completing OAuth flow with options: ${JSON.stringify(options)}`\n  );\n  const returnSubstring = options.expectReturnPathContains;\n  const matchesExpectedReturnPath = (url: string) => {\n    if (!isOnAppHost(url)) {\n      return false;\n    }\n    if (url.includes(returnSubstring)) {\n      return true;\n    }\n    // Re-auth flows can return to a chat session URL instead of agentId URL.\n    if (\n      returnSubstring.includes(\"/app?agentId=\") &&\n      url.includes(\"/app?chatId=\")\n    ) {\n      return true;\n    }\n    return false;\n  };\n\n  logOauthEvent(page, `Current page URL: ${page.url()}`);\n\n  const waitForUrlOrRedirect = async (\n    description: string,\n    timeout: number,\n    predicate: (url: string) => boolean\n  ) => {\n    const waitStart = Date.now();\n    const current = page.url();\n    if (predicate(current)) {\n      logOauthEvent(\n        page,\n        `${description} already satisfied (elapsed ${Date.now() - waitStart}ms)`\n      );\n      return;\n    }\n    logOauthEvent(page, `Waiting for ${description} (timeout ${timeout}ms)`);\n    try {\n      await page.waitForURL(\n        (url) => {\n          const href = typeof url === \"string\" ? url : url.toString();\n          try {\n            return predicate(href);\n          } catch (err) {\n            logOauthEvent(\n              null,\n              `Predicate threw while waiting for ${description}: ${String(err)}`\n            );\n            return false;\n          }\n        },\n        { timeout }\n      );\n      logOauthEvent(\n        page,\n        `${description} satisfied after ${Date.now() - waitStart}ms`\n      );\n    } catch (error) {\n      // If the predicate became true after the timeout (e.g., navigation finished\n      // just before the rejection), treat it as success.\n      if (predicate(page.url())) {\n        logOauthEvent(\n          page,\n          `${description} satisfied (after timeout) in ${\n            Date.now() - waitStart\n          }ms`\n        );\n        return;\n      }\n      await logPageStateWithTag(page, `Timeout waiting for ${description}`);\n      throw error;\n    }\n  };\n\n  const tryConfirmConnected = async (\n    suppressErrors: boolean\n  ): Promise<boolean> => {\n    if (!options.confirmConnected) {\n      return false;\n    }\n    if (page.isClosed()) {\n      const message = \"Page closed before confirmConnected check\";\n      if (suppressErrors) {\n        logOauthEvent(null, message);\n        return false;\n      }\n      throw new Error(message);\n    }\n    if (!isOnAppHost(page.url())) {\n      const message = `confirmConnected requested while not on app host (url=${page.url()})`;\n      if (suppressErrors) {\n        logOauthEvent(page, message);\n        return false;\n      }\n      throw new Error(message);\n    }\n    const confirmPromise = options\n      .confirmConnected()\n      .then(() => ({ status: \"success\" as const }))\n      .catch((error) => ({ status: \"error\" as const, error }));\n    if (suppressErrors) {\n      const result = await Promise.race([\n        confirmPromise,\n        delay(QUICK_CONFIRM_CONNECTED_TIMEOUT_MS).then(() => ({\n          status: \"timeout\" as const,\n        })),\n      ]);\n      if (result.status === \"success\") {\n        return true;\n      }\n      if (result.status === \"error\") {\n        logOauthEvent(page, \"confirmConnected check failed, continuing\");\n        return false;\n      }\n      logOauthEvent(\n        page,\n        `confirmConnected quick check timed out after ${QUICK_CONFIRM_CONNECTED_TIMEOUT_MS}ms`\n      );\n      return false;\n    }\n    const finalResult = await confirmPromise;\n    if (finalResult.status === \"success\") {\n      return true;\n    }\n    throw finalResult.error;\n  };\n\n  if (\n    matchesExpectedReturnPath(page.url()) &&\n    (await tryConfirmConnected(true))\n  ) {\n    return;\n  }\n\n  if (isOnAppHost(page.url()) && !page.url().includes(\"/mcp/oauth/callback\")) {\n    logOauthEvent(page, \"Waiting for redirect away from app host\");\n    await waitForUrlOrRedirect(\"IdP redirect\", 10000, (url) => {\n      const parsed = new URL(url);\n      return (\n        parsed.host !== APP_HOST ||\n        parsed.pathname.includes(\"/mcp/oauth/callback\")\n      );\n    });\n  }\n\n  if (!isOnAppHost(page.url())) {\n    logOauthEvent(page, \"Starting IdP login step\");\n    await performIdpLogin(page);\n  } else if (!page.url().includes(\"/mcp/oauth/callback\")) {\n    logOauthEvent(page, \"Still on app host, waiting for OAuth callback\");\n    await waitForUrlOrRedirect(\n      \"OAuth callback\",\n      60000,\n      (url) =>\n        url.includes(\"/mcp/oauth/callback\") || matchesExpectedReturnPath(url)\n    );\n  }\n\n  if (!page.url().includes(\"/mcp/oauth/callback\")) {\n    logOauthEvent(page, \"Waiting for OAuth callback redirect\");\n    await waitForUrlOrRedirect(\n      \"OAuth callback\",\n      60000,\n      (url) =>\n        url.includes(\"/mcp/oauth/callback\") || matchesExpectedReturnPath(url)\n    );\n  }\n\n  const waitForReturnStart = Date.now();\n  await page\n    .waitForLoadState(\"domcontentloaded\", { timeout: 5000 })\n    .catch(() => {});\n  logOauthEvent(\n    page,\n    `Initial post-return load wait completed in ${\n      Date.now() - waitForReturnStart\n    }ms`\n  );\n\n  await waitForUrlOrRedirect(`return path ${returnSubstring}`, 60000, (url) =>\n    matchesExpectedReturnPath(url)\n  );\n  const returnLoadStart = Date.now();\n  await page\n    .waitForLoadState(\"domcontentloaded\", { timeout: 5000 })\n    .catch(() => {});\n  logOauthEvent(\n    page,\n    `Post-return domcontentloaded wait finished in ${\n      Date.now() - returnLoadStart\n    }ms`\n  );\n  if (!matchesExpectedReturnPath(page.url())) {\n    throw new Error(\n      `Redirected but final URL (${page.url()}) does not contain expected substring ${returnSubstring}`\n    );\n  }\n  logOauthEvent(page, `Returned to ${returnSubstring}`);\n\n  if (options.scrollToBottomOnReturn) {\n    await scrollToBottom(page);\n  }\n\n  await tryConfirmConnected(false);\n}\n\nasync function selectMcpTools(page: Page, serverId: number) {\n  // Find the server toggle switch by its name attribute\n  const toggleButton = page.locator(\n    `button[role=\"switch\"][name=\"mcp_server_${serverId}.enabled\"]`\n  );\n  const toggleExists = await toggleButton.count();\n  if (toggleExists === 0) {\n    throw new Error(\n      `MCP server section ${serverId} not found in assistant form`\n    );\n  }\n\n  // Check if the server is enabled (switch is checked)\n  const isEnabled = await toggleButton.getAttribute(\"aria-checked\");\n  if (isEnabled !== \"true\") {\n    await toggleButton.click();\n  }\n\n  // Individual tools are automatically enabled when the server switch is turned on\n  // The new AgentEditorPage enables all tools when the server is enabled\n}\n\nconst escapeRegex = (value: string): string =>\n  value.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n\nconst ACTION_POPOVER_SELECTOR = '[data-testid=\"tool-options\"]';\nconst LINE_ITEM_SELECTOR = \".group\\\\/LineItem\";\n\nasync function ensureActionPopoverInPrimaryView(page: Page) {\n  const popover = page.locator(ACTION_POPOVER_SELECTOR);\n  const isVisible = await popover.isVisible().catch(() => false);\n  if (!isVisible) {\n    return;\n  }\n\n  const serverRows = page.locator(\"[data-mcp-server-name]\");\n  if ((await serverRows.count()) > 0) {\n    return;\n  }\n\n  const backButton = popover.getByRole(\"button\", { name: /Back/i }).first();\n  if ((await backButton.count()) === 0) {\n    return;\n  }\n  await backButton.click().catch(() => {});\n  await page.waitForTimeout(200);\n}\n\nasync function waitForMcpSecondaryView(page: Page) {\n  const toggleControls = page\n    .locator(ACTION_POPOVER_SELECTOR)\n    .locator(LINE_ITEM_SELECTOR)\n    .filter({ hasText: /(Enable|Disable) All/i })\n    .first();\n  await toggleControls\n    .waitFor({ state: \"visible\", timeout: 5000 })\n    .catch(() => {});\n}\n\nasync function findMcpToolLineItemButton(\n  page: Page,\n  toolName: string,\n  timeoutMs = 5000\n): Promise<Locator | null> {\n  const deadline = Date.now() + timeoutMs;\n  const toolRegex = new RegExp(escapeRegex(toolName), \"i\");\n\n  while (Date.now() < deadline) {\n    const lineItem = page\n      .locator(\n        `${ACTION_POPOVER_SELECTOR} [data-testid^=\"tool-option-\"] ${LINE_ITEM_SELECTOR}, ` +\n          `${ACTION_POPOVER_SELECTOR} ${LINE_ITEM_SELECTOR}`\n      )\n      .filter({ hasText: toolRegex })\n      .first();\n    if ((await lineItem.count()) > 0) {\n      return lineItem;\n    }\n    await page.waitForTimeout(200);\n  }\n\n  return null;\n}\n\nasync function logActionPopoverHtml(page: Page, context: string) {\n  try {\n    const popover = page.locator(ACTION_POPOVER_SELECTOR);\n    if ((await popover.count()) === 0) {\n      console.log(\n        `[mcp-oauth-debug] ${context} action-popover-html=\"<unavailable>\" reason=popover-missing`\n      );\n      return;\n    }\n    const isVisible = await popover.isVisible().catch(() => false);\n    if (!isVisible) {\n      console.log(\n        `[mcp-oauth-debug] ${context} action-popover-html=\"<unavailable>\" reason=popover-hidden`\n      );\n      return;\n    }\n    const html = await popover.evaluate((node) => node.innerHTML || \"\");\n    const snippet = html.replace(/\\s+/g, \" \").slice(0, 2000);\n    console.log(\n      `[mcp-oauth-debug] ${context} action-popover-html=${JSON.stringify(\n        snippet\n      )}`\n    );\n  } catch (error) {\n    console.log(\n      `[mcp-oauth-debug] ${context} action-popover-html=\"<unavailable>\" reason=${String(\n        error\n      )}`\n    );\n  }\n}\n\nasync function closeActionsPopover(page: Page) {\n  if (page.isClosed()) {\n    return;\n  }\n\n  const popover = page.locator(ACTION_POPOVER_SELECTOR);\n  if ((await popover.count()) === 0) {\n    return;\n  }\n  const isVisible = await popover.isVisible().catch(() => false);\n  if (!isVisible) {\n    return;\n  }\n\n  const backButton = popover.getByRole(\"button\", { name: /Back/i }).first();\n  if ((await backButton.count()) > 0) {\n    await backButton.click().catch(() => {});\n    await page.waitForTimeout(200).catch(() => {});\n  }\n\n  if (!page.isClosed()) {\n    await page.keyboard.press(\"Escape\").catch(() => {});\n  }\n}\n\nasync function openActionsPopover(page: Page) {\n  const popover = page.locator(ACTION_POPOVER_SELECTOR);\n  const isVisible = await popover.isVisible().catch(() => false);\n  if (!isVisible) {\n    await page.locator('[data-testid=\"action-management-toggle\"]').click();\n    await popover.waitFor({ state: \"visible\", timeout: 10000 });\n  }\n  await ensureActionPopoverInPrimaryView(page);\n}\n\nasync function restoreAssistantContext(page: Page, agentId: number) {\n  const assistantPath = `/app?agentId=${agentId}`;\n  logOauthEvent(\n    page,\n    `Restoring assistant context for agentId=${agentId} (current url=${page.url()})`\n  );\n\n  // Clear chat-focused URL state first, then explicitly reselect assistant.\n  await page.goto(`${APP_BASE_URL}/app`, { waitUntil: \"domcontentloaded\" });\n  await page\n    .waitForLoadState(\"networkidle\", { timeout: 10000 })\n    .catch(() => {});\n\n  const assistantLink = page.locator(`a[href*=\"agentId=${agentId}\"]`).first();\n  if ((await assistantLink.count()) > 0) {\n    await clickAndWaitForPossibleUrlChange(\n      page,\n      () => assistantLink.click(),\n      `Restore assistant ${agentId} from sidebar`\n    );\n  } else {\n    await page.goto(`${APP_BASE_URL}${assistantPath}`, {\n      waitUntil: \"domcontentloaded\",\n    });\n  }\n\n  await page\n    .waitForLoadState(\"networkidle\", { timeout: 10000 })\n    .catch(() => {});\n  logOauthEvent(page, `Assistant context restore landed on ${page.url()}`);\n}\n\nfunction getServerRowLocator(page: Page, serverName: string) {\n  const labelRegex = new RegExp(escapeRegex(serverName));\n  return page\n    .locator(\n      `${ACTION_POPOVER_SELECTOR} [data-mcp-server-name] ${LINE_ITEM_SELECTOR}, ` +\n        `${ACTION_POPOVER_SELECTOR} ${LINE_ITEM_SELECTOR}`\n    )\n    .filter({ hasText: labelRegex })\n    .first();\n}\n\nasync function collectActionPopoverEntries(page: Page): Promise<string[]> {\n  const locator = page\n    .locator(ACTION_POPOVER_SELECTOR)\n    .locator(\n      `[data-mcp-server-name] ${LINE_ITEM_SELECTOR}, ` +\n        `[data-testid^=\"tool-option-\"] ${LINE_ITEM_SELECTOR}, ` +\n        `${LINE_ITEM_SELECTOR}`\n    );\n  try {\n    return await locator.evaluateAll((nodes) =>\n      nodes\n        .map((node) =>\n          (node.textContent || \"\")\n            .replace(/\\s+/g, \" \")\n            .replace(/\\u00a0/g, \" \")\n            .trim()\n        )\n        .filter(Boolean)\n    );\n  } catch {\n    return [];\n  }\n}\n\nasync function waitForServerRow(\n  page: Page,\n  serverName: string,\n  timeoutMs: number = 10_000\n): Promise<Locator | null> {\n  await page\n    .locator(ACTION_POPOVER_SELECTOR)\n    .waitFor({ state: \"visible\", timeout: 5000 })\n    .catch(() => {});\n\n  const locator = getServerRowLocator(page, serverName);\n  const pollInterval = 100;\n  const deadline = Date.now() + timeoutMs;\n\n  while (Date.now() < deadline) {\n    if ((await locator.count()) > 0) {\n      return locator;\n    }\n    await page.waitForTimeout(pollInterval);\n  }\n\n  return null;\n}\n\nasync function clickServerRowAndWaitForPossibleUrlChangeWithRetry(\n  page: Page,\n  serverName: string,\n  actionName: string,\n  timeoutMs: number = 15_000\n): Promise<boolean> {\n  let serverLocator: Locator | null = await waitForServerRow(\n    page,\n    serverName,\n    timeoutMs\n  );\n  if (!serverLocator) {\n    return false;\n  }\n\n  for (let attempt = 0; attempt < 5; attempt++) {\n    if (!serverLocator) {\n      const refreshedServerLocator = await waitForServerRow(\n        page,\n        serverName,\n        5000\n      );\n      if (!refreshedServerLocator) {\n        continue;\n      }\n      serverLocator = refreshedServerLocator;\n    }\n    const locatorToClick = serverLocator;\n    try {\n      await clickAndWaitForPossibleUrlChange(\n        page,\n        () => locatorToClick.click({ force: true, timeout: 3000 }),\n        actionName\n      );\n      return true;\n    } catch {\n      if (attempt === 4) {\n        break;\n      }\n      await page.waitForTimeout(150);\n      await ensureActionPopoverInPrimaryView(page);\n      const refreshedServerLocator = await waitForServerRow(\n        page,\n        serverName,\n        5000\n      );\n      if (refreshedServerLocator) {\n        serverLocator = refreshedServerLocator;\n      }\n    }\n  }\n\n  return false;\n}\n\nasync function ensureToolOptionVisible(\n  page: Page,\n  toolName: string,\n  serverName: string\n) {\n  await page\n    .waitForSelector(ACTION_POPOVER_SELECTOR, {\n      state: \"visible\",\n      timeout: 5000,\n    })\n    .catch(() => {});\n\n  let toolOption = page\n    .getByTestId(`tool-option-${toolName}`)\n    .locator(LINE_ITEM_SELECTOR)\n    .first();\n  if ((await toolOption.count()) > 0) {\n    return toolOption;\n  }\n\n  await ensureActionPopoverInPrimaryView(page);\n  let serverLocator = await waitForServerRow(page, serverName, 10_000);\n  if (!serverLocator) {\n    const entries = await collectActionPopoverEntries(page);\n    await logPageStateWithTag(\n      page,\n      `MCP server row ${serverName} not found while forcing tool ${toolName}. Visible entries: ${JSON.stringify(\n        entries\n      )}`\n    );\n    throw new Error(`Unable to locate MCP server row for ${serverName}`);\n  }\n\n  let serverClicked = false;\n  for (let attempt = 0; attempt < 3; attempt++) {\n    try {\n      await serverLocator.click({ force: true, timeout: 3000 });\n      serverClicked = true;\n      break;\n    } catch (error) {\n      if (attempt === 2) {\n        throw error;\n      }\n      await page.waitForTimeout(150);\n      await ensureActionPopoverInPrimaryView(page);\n      const refreshedServerLocator = await waitForServerRow(\n        page,\n        serverName,\n        5000\n      );\n      if (refreshedServerLocator) {\n        serverLocator = refreshedServerLocator;\n      }\n    }\n  }\n  if (!serverClicked) {\n    throw new Error(`Unable to click MCP server row for ${serverName}`);\n  }\n\n  await waitForMcpSecondaryView(page);\n\n  for (let attempt = 0; attempt < 3; attempt++) {\n    const mcpToolButton = await findMcpToolLineItemButton(\n      page,\n      toolName,\n      10000\n    );\n    if (mcpToolButton) {\n      const isVisible = await mcpToolButton.isVisible().catch(() => false);\n      if (isVisible) {\n        return mcpToolButton;\n      }\n    }\n    if (attempt < 2) {\n      await closeActionsPopover(page);\n      await openActionsPopover(page);\n      await ensureActionPopoverInPrimaryView(page);\n      const refreshedServerLocator = await waitForServerRow(\n        page,\n        serverName,\n        7000\n      );\n      if (!refreshedServerLocator) {\n        break;\n      }\n      await refreshedServerLocator.click({ force: true, timeout: 3000 });\n      await waitForMcpSecondaryView(page);\n    }\n  }\n\n  await logPageStateWithTag(\n    page,\n    `Tool option ${toolName} still missing after selecting MCP server ${serverName}`\n  );\n  await logActionPopoverHtml(\n    page,\n    `Tool option ${toolName} missing after selecting ${serverName}`\n  );\n  throw new Error(\n    `Tool option ${toolName} not available after selecting server ${serverName}`\n  );\n}\n\nasync function verifyMcpToolRowVisible(\n  page: Page,\n  serverName: string,\n  toolName: string\n) {\n  await openActionsPopover(page);\n  const toolButton = await ensureToolOptionVisible(page, toolName, serverName);\n  await expect(toolButton).toBeVisible({ timeout: 5000 });\n  await closeActionsPopover(page);\n}\n\nasync function ensureMcpToolEnabledInActions(\n  page: Page,\n  serverName: string,\n  toolName: string\n) {\n  await openActionsPopover(page);\n  const toolButton = await ensureToolOptionVisible(page, toolName, serverName);\n  await expect(toolButton).toBeVisible({ timeout: 5000 });\n\n  let toolToggle = toolButton.getByRole(\"switch\").first();\n  if ((await toolToggle.count()) === 0) {\n    toolToggle = page.getByLabel(`Toggle ${toolName}`).first();\n  }\n  await expect(toolToggle).toBeVisible({ timeout: 5000 });\n\n  const isToggleChecked = async () => {\n    const dataState = await toolToggle.getAttribute(\"data-state\");\n    if (typeof dataState === \"string\") {\n      return dataState === \"checked\";\n    }\n    return (await toolToggle.getAttribute(\"aria-checked\")) === \"true\";\n  };\n\n  if (!(await isToggleChecked())) {\n    await toolToggle.click();\n  }\n  await expect.poll(isToggleChecked, { timeout: 5000 }).toBe(true);\n  await closeActionsPopover(page);\n}\n\nasync function reauthenticateFromChat(\n  page: Page,\n  serverName: string,\n  returnSubstring: string\n) {\n  await openActionsPopover(page);\n  const beforeClickUrl = page.url();\n  const clickedServerRow =\n    await clickServerRowAndWaitForPossibleUrlChangeWithRetry(\n      page,\n      serverName,\n      \"Re-authenticate server row click\",\n      15_000\n    );\n  if (!clickedServerRow) {\n    const entries = await collectActionPopoverEntries(page);\n    await logPageStateWithTag(\n      page,\n      `reauthenticateFromChat could not click ${serverName}; visible entries: ${JSON.stringify(\n        entries\n      )}`\n    );\n    throw new Error(\n      `Unable to click MCP server row ${serverName} while reauthenticating`\n    );\n  }\n\n  // Some MCP rows trigger OAuth directly instead of showing a footer action.\n  if (page.url() !== beforeClickUrl || !isOnAppHost(page.url())) {\n    await completeOauthFlow(page, {\n      expectReturnPathContains: returnSubstring,\n    });\n    return;\n  }\n\n  await waitForMcpSecondaryView(page);\n  const reauthItem = page.getByText(\"Re-Authenticate\").first();\n  let reauthVisible = await reauthItem.isVisible().catch(() => false);\n  if (!reauthVisible) {\n    // Popover state can rerender; retry selection once before failing.\n    await closeActionsPopover(page);\n    await openActionsPopover(page);\n    const retryBeforeClickUrl = page.url();\n    const clickedRetry =\n      await clickServerRowAndWaitForPossibleUrlChangeWithRetry(\n        page,\n        serverName,\n        \"Re-authenticate server row click retry\",\n        10_000\n      );\n    if (!clickedRetry) {\n      const entries = await collectActionPopoverEntries(page);\n      await logPageStateWithTag(\n        page,\n        `reauthenticateFromChat retry could not click ${serverName}; visible entries: ${JSON.stringify(\n          entries\n        )}`\n      );\n      throw new Error(\n        `Unable to click MCP server row ${serverName} on reauth retry`\n      );\n    }\n\n    if (page.url() !== retryBeforeClickUrl || !isOnAppHost(page.url())) {\n      await completeOauthFlow(page, {\n        expectReturnPathContains: returnSubstring,\n      });\n      return;\n    }\n\n    await waitForMcpSecondaryView(page);\n    reauthVisible = await reauthItem.isVisible().catch(() => false);\n  }\n\n  await expect(reauthItem).toBeVisible({ timeout: 15000 });\n  await clickAndWaitForPossibleUrlChange(\n    page,\n    () => reauthItem.click(),\n    \"Re-authenticate click\"\n  );\n  await completeOauthFlow(page, {\n    expectReturnPathContains: returnSubstring,\n  });\n}\n\nasync function ensureServerVisibleInActions(\n  page: Page,\n  serverName: string,\n  options?: {\n    agentId?: number;\n  }\n) {\n  for (let attempt = 0; attempt < 2; attempt++) {\n    await page.keyboard.press(\"Escape\").catch(() => {});\n    await openActionsPopover(page);\n    const locatorToUse = await waitForServerRow(page, serverName, 15_000);\n\n    if (locatorToUse) {\n      await expect(locatorToUse).toBeVisible({ timeout: 15000 });\n      await page.keyboard.press(\"Escape\").catch(() => {});\n      return;\n    }\n\n    const entries = await collectActionPopoverEntries(page);\n    await logPageStateWithTag(\n      page,\n      `ensureServerVisibleInActions could not find ${serverName}; visible entries: ${JSON.stringify(\n        entries\n      )}`\n    );\n    await page.keyboard.press(\"Escape\").catch(() => {});\n\n    if (attempt === 0 && options?.agentId) {\n      logOauthEvent(\n        page,\n        `Server ${serverName} missing in actions, retrying after restoring assistant ${options.agentId} context`\n      );\n      await restoreAssistantContext(page, options.agentId);\n      continue;\n    }\n\n    throw new Error(`Server ${serverName} not visible in actions popover`);\n  }\n}\n\nasync function waitForUserRecord(\n  client: OnyxApiClient,\n  email: string,\n  timeoutMs: number = 10_000\n) {\n  const start = Date.now();\n  while (Date.now() - start < timeoutMs) {\n    const record = await client.getUserByEmail(email);\n    if (record) {\n      return record;\n    }\n    await new Promise((resolve) => setTimeout(resolve, 500));\n  }\n  throw new Error(`Timed out waiting for user record ${email}`);\n}\n\nasync function waitForAssistantByName(\n  client: OnyxApiClient,\n  agentName: string,\n  timeoutMs: number = 20_000\n) {\n  const start = Date.now();\n  while (Date.now() - start < timeoutMs) {\n    const assistant = await client.findAgentByName(agentName, {\n      getEditable: true,\n    });\n    if (assistant) {\n      return assistant;\n    }\n    await new Promise((resolve) => setTimeout(resolve, 500));\n  }\n  throw new Error(`Timed out waiting for assistant ${agentName}`);\n}\n\nasync function waitForAssistantTools(\n  client: OnyxApiClient,\n  agentName: string,\n  requiredToolNames: string[],\n  timeoutMs: number = 30_000\n) {\n  const start = Date.now();\n  while (Date.now() - start < timeoutMs) {\n    const assistant = await client.findAgentByName(agentName, {\n      getEditable: true,\n    });\n    if (\n      assistant &&\n      Array.isArray(assistant.tools) &&\n      requiredToolNames.every((name) =>\n        assistant.tools.some(\n          (tool: any) =>\n            tool?.name === name ||\n            tool?.in_code_tool_id === name ||\n            tool?.display_name === name\n        )\n      )\n    ) {\n      return assistant;\n    }\n    await new Promise((resolve) => setTimeout(resolve, 500));\n  }\n  throw new Error(\n    `Timed out waiting for assistant ${agentName} to include tools: ${requiredToolNames.join(\n      \", \"\n    )}`\n  );\n}\n\nasync function mockEmptyOauthStatus(page: Page): Promise<void> {\n  await page.route(\"**/api/mcp/oauth/status*\", (route) =>\n    route.fulfill({\n      status: 200,\n      contentType: \"application/json\",\n      body: JSON.stringify({ statuses: [] }),\n    })\n  );\n}\n\nfunction getNumericQueryParam(\n  urlString: string,\n  paramName: string\n): number | null {\n  try {\n    const value = new URL(urlString).searchParams.get(paramName);\n    if (!value) {\n      return null;\n    }\n    const parsed = Number(value);\n    return Number.isNaN(parsed) ? null : parsed;\n  } catch {\n    return null;\n  }\n}\n\nasync function configureOauthServerAndEnableTool(\n  page: Page,\n  options: {\n    serverName: string;\n    serverDescription: string;\n    serverUrl: string;\n    toolName: string;\n    connectContext: string;\n    logStep: StepLogger;\n  }\n): Promise<number> {\n  const { serverName, serverDescription, serverUrl, toolName, connectContext } =\n    options;\n\n  await page.goto(\"/admin/actions/mcp\");\n  await page.waitForURL(\"**/admin/actions/mcp**\", { timeout: 15000 });\n  options.logStep(\"Opened MCP actions page\");\n\n  await page.getByRole(\"button\", { name: /Add MCP Server/i }).click();\n  await expect(page.locator(\"input#name\")).toBeVisible({ timeout: 10000 });\n  options.logStep(\"Opened Add MCP Server modal\");\n\n  await page.locator(\"input#name\").fill(serverName);\n  await page.locator(\"textarea#description\").fill(serverDescription);\n  await page.locator(\"input#server_url\").fill(serverUrl);\n  options.logStep(`Filled server URL: ${serverUrl}`);\n\n  await page.getByRole(\"button\", { name: \"Add Server\" }).click();\n  await expect(page.getByTestId(\"mcp-auth-method-select\")).toBeVisible({\n    timeout: 10000,\n  });\n  options.logStep(\"Created MCP server, auth modal opened\");\n\n  const authMethodSelect = page.getByTestId(\"mcp-auth-method-select\");\n  await authMethodSelect.click();\n  await page.getByRole(\"option\", { name: \"OAuth\" }).click();\n  options.logStep(\"Selected OAuth authentication method\");\n\n  await page.locator('input[name=\"oauth_client_id\"]').fill(CLIENT_ID);\n  await page.locator('input[name=\"oauth_client_secret\"]').fill(CLIENT_SECRET);\n  options.logStep(\"Filled OAuth credentials\");\n\n  const connectButton = page.getByTestId(\"mcp-auth-connect-button\");\n  await clickAndWaitForPossibleUrlChange(\n    page,\n    () => connectButton.click(),\n    connectContext\n  );\n  options.logStep(\"Triggered OAuth connection\");\n\n  let serverId: number | null = null;\n  await completeOauthFlow(page, {\n    expectReturnPathContains: \"/admin/actions/mcp\",\n    confirmConnected: async () => {\n      serverId = getNumericQueryParam(page.url(), \"server_id\");\n      if (serverId === null) {\n        throw new Error(\"Missing or invalid server_id in OAuth return URL\");\n      }\n      await expect(\n        page.getByText(serverName, { exact: false }).first()\n      ).toBeVisible({ timeout: 15000 });\n    },\n    scrollToBottomOnReturn: false,\n  });\n  options.logStep(\"Completed OAuth flow for MCP server\");\n\n  if (serverId === null) {\n    serverId = getNumericQueryParam(page.url(), \"server_id\");\n  }\n  if (serverId === null) {\n    throw new Error(\"Expected numeric server_id in URL after OAuth flow\");\n  }\n\n  await expect(\n    page.getByText(serverName, { exact: false }).first()\n  ).toBeVisible({\n    timeout: 20000,\n  });\n  const toolToggles = page.getByLabel(`tool-toggle-${toolName}`);\n  await expect(toolToggles.first()).toBeVisible({ timeout: 20000 });\n  options.logStep(\"Verified server card and tool toggles are visible\");\n\n  const toggleCount = await toolToggles.count();\n  options.logStep(`Found ${toggleCount} instance(s) of ${toolName}`);\n  for (let i = 0; i < toggleCount; i++) {\n    const toggle = toolToggles.nth(i);\n    const isEnabled = await toggle.getAttribute(\"aria-checked\");\n    if (isEnabled !== \"true\") {\n      await toggle.click();\n      await expect(toggle).toHaveAttribute(\"aria-checked\", \"true\", {\n        timeout: 5000,\n      });\n      options.logStep(`Enabled tool instance ${i + 1}: ${toolName}`);\n    }\n  }\n  options.logStep(\"Tools auto-fetched and enabled via UI\");\n\n  return serverId;\n}\n\nasync function openAssistantEditor(\n  page: Page,\n  options: {\n    logStep: StepLogger;\n    onLoginRedirect?: () => Promise<void>;\n  }\n): Promise<void> {\n  const assistantEditorUrl = `${APP_BASE_URL}/app/agents/create?admin=true`;\n  let assistantPageLoaded = false;\n\n  for (let attempt = 0; attempt < 2 && !assistantPageLoaded; attempt++) {\n    await page.goto(assistantEditorUrl);\n    try {\n      await page.waitForURL(\"**/app/agents/create**\", {\n        timeout: 15000,\n      });\n      assistantPageLoaded = true;\n    } catch (error) {\n      const currentUrl = page.url();\n      if (currentUrl.includes(\"/app/agents/create\")) {\n        assistantPageLoaded = true;\n        break;\n      }\n      if (currentUrl.includes(\"/app?from=login\") && options.onLoginRedirect) {\n        await options.onLoginRedirect();\n        continue;\n      }\n      await logPageStateWithTag(\n        page,\n        \"Timed out waiting for /app/agents/create\"\n      );\n      throw error;\n    }\n  }\n\n  if (!assistantPageLoaded) {\n    throw new Error(\"Unable to navigate to /app/agents/create\");\n  }\n  options.logStep(\"Assistant editor loaded\");\n}\n\nasync function createAgentAndWaitForTool(\n  page: Page,\n  options: {\n    apiClient: OnyxApiClient;\n    agentName: string;\n    instructions: string;\n    description: string;\n    serverId: number;\n    toolName: string;\n    logStep: StepLogger;\n  }\n): Promise<number> {\n  const {\n    apiClient,\n    agentName,\n    instructions,\n    description,\n    serverId,\n    toolName,\n    logStep,\n  } = options;\n\n  await page.locator('input[name=\"name\"]').fill(agentName);\n  await page.locator('textarea[name=\"instructions\"]').fill(instructions);\n  await page.locator('textarea[name=\"description\"]').fill(description);\n  await selectMcpTools(page, serverId);\n\n  await page.getByRole(\"button\", { name: \"Create\" }).click();\n  await page.waitForURL(\n    (url) => {\n      const href = typeof url === \"string\" ? url : url.toString();\n      return /\\/app\\?agentId=\\d+/.test(href) || href.includes(\"/admin/agents\");\n    },\n    { timeout: 20000 }\n  );\n\n  let agentId = getNumericQueryParam(page.url(), \"agentId\");\n  if (agentId === null) {\n    const assistantRecord = await waitForAssistantByName(apiClient, agentName);\n    agentId = assistantRecord.id;\n    await page.goto(`/app?agentId=${agentId}`);\n    await page.waitForURL(/\\/app\\?agentId=\\d+/, { timeout: 20000 });\n  }\n  if (agentId === null) {\n    throw new Error(\"Assistant ID could not be determined\");\n  }\n  logStep(`Assistant created with id ${agentId}`);\n\n  await waitForAssistantTools(apiClient, agentName, [toolName]);\n  logStep(\"Confirmed assistant tools are available\");\n  return agentId;\n}\n\ntest.describe(\"MCP OAuth flows\", () => {\n  test.describe.configure({ mode: \"serial\" });\n  test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);\n\n  let serverProcess: McpServerProcess | null = null;\n  let adminArtifacts: FlowArtifacts | null = null;\n  let curatorArtifacts: FlowArtifacts | null = null;\n  let curatorCredentials: Credentials | null = null;\n  let curatorTwoCredentials: Credentials | null = null;\n  let curatorGroupId: number | null = null;\n  let curatorTwoGroupId: number | null = null;\n\n  test.beforeAll(async ({ browser }, workerInfo) => {\n    if (workerInfo.project.name !== \"admin\") {\n      return;\n    }\n\n    if (!process.env.MCP_TEST_SERVER_URL) {\n      const basePort = Number(process.env.MCP_TEST_SERVER_PORT || \"8004\");\n      const allocatedPort = basePort + workerInfo.workerIndex;\n      serverProcess = await startMcpOauthServer({\n        port: allocatedPort,\n        bindHost: process.env.MCP_TEST_SERVER_BIND_HOST,\n        publicHost: process.env.MCP_TEST_SERVER_PUBLIC_HOST,\n      });\n      const explicitPublicUrl = process.env.MCP_TEST_SERVER_PUBLIC_URL;\n      if (explicitPublicUrl) {\n        runtimeMcpServerUrl = buildMcpServerUrl(explicitPublicUrl);\n      } else {\n        const { host: publicHost, port } = serverProcess.address;\n        runtimeMcpServerUrl = buildMcpServerUrl(`http://${publicHost}:${port}`);\n      }\n    } else {\n      runtimeMcpServerUrl = buildMcpServerUrl(process.env.MCP_TEST_SERVER_URL);\n    }\n\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    const adminClient = new OnyxApiClient(adminPage.request);\n    try {\n      const existingServers = await adminClient.listMcpServers();\n      for (const server of existingServers) {\n        if (server.server_url === runtimeMcpServerUrl) {\n          await adminClient.deleteMcpServer(server.id);\n        }\n      }\n    } catch (error) {\n      console.warn(\"Failed to cleanup existing MCP servers\", error);\n    }\n\n    const basePassword = \"TestPassword123!\";\n    curatorCredentials = {\n      email: `pw-curator-${Date.now()}@example.com`,\n      password: basePassword,\n    };\n    await adminClient.registerUser(\n      curatorCredentials.email,\n      curatorCredentials.password\n    );\n    const curatorRecord = await waitForUserRecord(\n      adminClient,\n      curatorCredentials.email\n    );\n    curatorGroupId = await adminClient.createUserGroup(\n      `Playwright Curator Group ${Date.now()}`,\n      [curatorRecord.id]\n    );\n    await adminClient.setCuratorStatus(\n      String(curatorGroupId),\n      curatorRecord.id,\n      true\n    );\n    curatorTwoCredentials = {\n      email: `pw-curator-${Date.now()}-b@example.com`,\n      password: basePassword,\n    };\n    await adminClient.registerUser(\n      curatorTwoCredentials.email,\n      curatorTwoCredentials.password\n    );\n    const curatorTwoRecord = await waitForUserRecord(\n      adminClient,\n      curatorTwoCredentials.email\n    );\n    curatorTwoGroupId = await adminClient.createUserGroup(\n      `Playwright Curator Group ${Date.now()}-2`,\n      [curatorTwoRecord.id]\n    );\n    await adminClient.setCuratorStatus(\n      String(curatorTwoGroupId),\n      curatorTwoRecord.id,\n      true\n    );\n\n    await adminContext.close();\n  });\n\n  test.afterAll(async ({ browser }, workerInfo) => {\n    if (workerInfo.project.name !== \"admin\") {\n      return;\n    }\n\n    if (serverProcess) {\n      await serverProcess.stop();\n    }\n\n    const adminContext = await browser.newContext({\n      storageState: \"admin_auth.json\",\n    });\n    const adminPage = await adminContext.newPage();\n    const adminClient = new OnyxApiClient(adminPage.request);\n\n    if (adminArtifacts?.agentId) {\n      await adminClient.deleteAgent(adminArtifacts.agentId);\n    }\n    if (adminArtifacts?.serverId) {\n      await adminClient.deleteMcpServer(adminArtifacts.serverId);\n    }\n\n    if (curatorArtifacts?.agentId) {\n      await adminClient.deleteAgent(curatorArtifacts.agentId);\n    }\n    if (curatorArtifacts?.serverId) {\n      await adminClient.deleteMcpServer(curatorArtifacts.serverId);\n    }\n\n    if (curatorGroupId) {\n      await adminClient.deleteUserGroup(curatorGroupId);\n    }\n    if (curatorTwoGroupId) {\n      await adminClient.deleteUserGroup(curatorTwoGroupId);\n    }\n\n    await adminContext.close();\n  });\n\n  test(\"Admin can configure OAuth MCP server and use tools end-to-end\", async ({\n    page,\n  }, testInfo) => {\n    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);\n    const logStep = createStepLogger(\"AdminFlow\");\n    test.skip(\n      testInfo.project.name !== \"admin\",\n      \"MCP OAuth flows run only in admin project\"\n    );\n    logStep(\"Starting admin MCP OAuth flow\");\n\n    await mockEmptyOauthStatus(page);\n\n    await page.context().clearCookies();\n    logStep(\"Cleared cookies\");\n    await loginAs(page, \"admin\");\n    await verifySessionUser(\n      page,\n      { email: TEST_ADMIN_CREDENTIALS.email, role: \"admin\" },\n      \"AdminFlow primary login\"\n    );\n    const adminApiClient = new OnyxApiClient(page.request);\n    logStep(\"Logged in as admin\");\n\n    const serverName = `PW MCP Admin ${Date.now()}`;\n    const agentName = `PW Admin Assistant ${Date.now()}`;\n\n    const serverId = await configureOauthServerAndEnableTool(page, {\n      serverName,\n      serverDescription: \"Playwright MCP OAuth server (admin)\",\n      serverUrl: runtimeMcpServerUrl,\n      toolName: TOOL_NAMES.admin,\n      connectContext: \"Admin connect click\",\n      logStep,\n    });\n\n    await openAssistantEditor(page, {\n      logStep,\n      onLoginRedirect: async () => {\n        await loginAs(page, \"admin\");\n        await verifySessionUser(\n          page,\n          { email: TEST_ADMIN_CREDENTIALS.email, role: \"admin\" },\n          \"AdminFlow assistant editor relogin\"\n        );\n      },\n    });\n\n    const agentId = await createAgentAndWaitForTool(page, {\n      apiClient: adminApiClient,\n      agentName,\n      instructions: \"Assist with MCP OAuth testing.\",\n      description: \"Playwright admin MCP assistant.\",\n      serverId,\n      toolName: TOOL_NAMES.admin,\n      logStep,\n    });\n    const createdAgent = await adminApiClient.getAssistant(agentId);\n    expect(createdAgent.is_public).toBe(false);\n    logStep(\"Verified newly created agent is private by default\");\n    const adminToolId = await fetchMcpToolIdByName(\n      page,\n      serverId,\n      TOOL_NAMES.admin\n    );\n\n    await ensureServerVisibleInActions(page, serverName, { agentId });\n    await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.admin);\n    await ensureMcpToolEnabledInActions(page, serverName, TOOL_NAMES.admin);\n    logStep(\"Verified admin MCP tool row visible before reauth\");\n    await verifyToolInvocationFromChat(\n      page,\n      TOOL_NAMES.admin,\n      \"AdminFlow pre-reauth\",\n      adminToolId\n    );\n    logStep(\"Verified admin MCP tool invocation before reauth\");\n\n    await reauthenticateFromChat(page, serverName, `/app?agentId=${agentId}`);\n    await ensureServerVisibleInActions(page, serverName, { agentId });\n    await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.admin);\n    await ensureMcpToolEnabledInActions(page, serverName, TOOL_NAMES.admin);\n    logStep(\"Verified admin MCP tool row visible after reauth\");\n    await verifyToolInvocationFromChat(\n      page,\n      TOOL_NAMES.admin,\n      \"AdminFlow post-reauth\",\n      adminToolId\n    );\n    logStep(\"Verified admin MCP tool invocation after reauth\");\n\n    // Verify server card still shows the server and tools\n    await page.goto(\"/admin/actions/mcp\");\n    await page.waitForURL(\"**/admin/actions/mcp**\", { timeout: 15000 });\n    await expect(\n      page.getByText(serverName, { exact: false }).first()\n    ).toBeVisible({ timeout: 15000 });\n    logStep(\"Verified MCP server card is still visible on actions page\");\n\n    await adminApiClient.updateAgentSharing(agentId, {\n      isPublic: true,\n      userIds: createdAgent.users.map((user) => user.id),\n      groupIds: createdAgent.groups,\n    });\n    logStep(\"Published agent explicitly for end-user MCP flow\");\n\n    adminArtifacts = {\n      serverId,\n      serverName,\n      agentId,\n      agentName,\n      toolName: TOOL_NAMES.admin,\n      toolId: adminToolId,\n    };\n  });\n\n  test(\"Curator flow with access isolation\", async ({\n    page,\n    browser,\n  }, testInfo) => {\n    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);\n    const logStep = createStepLogger(\"CuratorFlow\");\n    test.skip(\n      testInfo.project.name !== \"admin\",\n      \"MCP OAuth flows run only in admin project\"\n    );\n    logStep(\"Starting curator MCP OAuth flow\");\n    await mockEmptyOauthStatus(page);\n\n    if (!curatorCredentials || !curatorTwoCredentials) {\n      test.skip(true, \"Curator credentials were not initialized\");\n    }\n\n    await page.context().clearCookies();\n    logStep(\"Cleared cookies\");\n    await apiLogin(\n      page,\n      curatorCredentials!.email,\n      curatorCredentials!.password\n    );\n    await verifySessionUser(\n      page,\n      { email: curatorCredentials!.email, role: \"curator\" },\n      \"CuratorFlow primary login\"\n    );\n    logStep(\"Logged in as curator\");\n    const curatorApiClient = new OnyxApiClient(page.request);\n\n    const serverName = `PW MCP Curator ${Date.now()}`;\n    const agentName = `PW Curator Assistant ${Date.now()}`;\n\n    let curatorServerProcess: McpServerProcess | null = null;\n    let curatorRuntimeMcpServerUrl = runtimeMcpServerUrl;\n\n    try {\n      if (!process.env.MCP_TEST_SERVER_URL) {\n        const basePort =\n          (serverProcess?.address.port ??\n            Number(process.env.MCP_TEST_SERVER_PORT || \"8004\")) + 1;\n        curatorServerProcess = await startMcpOauthServer({ port: basePort });\n        const { host, port } = curatorServerProcess.address;\n        curatorRuntimeMcpServerUrl = `http://${host}:${port}/mcp`;\n      }\n\n      const serverId = await configureOauthServerAndEnableTool(page, {\n        serverName,\n        serverDescription: \"Playwright MCP OAuth server (curator)\",\n        serverUrl: curatorRuntimeMcpServerUrl,\n        toolName: TOOL_NAMES.curator,\n        connectContext: \"Curator connect click\",\n        logStep,\n      });\n\n      await openAssistantEditor(page, { logStep });\n\n      const agentId = await createAgentAndWaitForTool(page, {\n        apiClient: curatorApiClient,\n        agentName,\n        instructions: \"Curator MCP OAuth assistant.\",\n        description: \"Playwright curator MCP assistant.\",\n        serverId,\n        toolName: TOOL_NAMES.curator,\n        logStep,\n      });\n\n      await ensureServerVisibleInActions(page, serverName, { agentId });\n      await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.curator);\n      logStep(\"Verified curator MCP tool row visible before reauth\");\n\n      await reauthenticateFromChat(page, serverName, `/app?agentId=${agentId}`);\n      await ensureServerVisibleInActions(page, serverName, { agentId });\n      await verifyMcpToolRowVisible(page, serverName, TOOL_NAMES.curator);\n      logStep(\"Verified curator MCP tool row visible after reauth\");\n\n      curatorArtifacts = {\n        serverId,\n        serverName,\n        agentId,\n        agentName,\n        toolName: TOOL_NAMES.curator,\n        toolId: null,\n      };\n\n      // Verify isolation: second curator must not be able to edit first curator's server\n      const curatorTwoContext = await browser.newContext();\n      const curatorTwoPage = await curatorTwoContext.newPage();\n      await logoutSession(\n        curatorTwoPage,\n        \"CuratorFlow secondary pre-login logout\"\n      );\n      await apiLogin(\n        curatorTwoPage,\n        curatorTwoCredentials!.email,\n        curatorTwoCredentials!.password\n      );\n      await verifySessionUser(\n        curatorTwoPage,\n        { email: curatorTwoCredentials!.email, role: \"curator\" },\n        \"CuratorFlow secondary login\"\n      );\n      await curatorTwoPage.goto(\"/admin/actions/mcp\");\n      const serverLocator = curatorTwoPage.getByText(serverName, {\n        exact: false,\n      });\n      await expect(serverLocator).not.toHaveCount(0, { timeout: 15000 });\n\n      const editResponse = await curatorTwoPage.request.get(\n        `${APP_BASE_URL}/api/admin/mcp/servers/${serverId}`\n      );\n      expect(editResponse.status()).toBe(403);\n      await curatorTwoContext.close();\n    } finally {\n      await curatorServerProcess?.stop().catch(() => {});\n    }\n  });\n\n  test(\"End user can authenticate and invoke MCP tools via chat\", async ({\n    page,\n  }, testInfo) => {\n    test.setTimeout(MCP_OAUTH_FLOW_TEST_TIMEOUT_MS);\n    const logStep = createStepLogger(\"UserFlow\");\n    test.skip(\n      testInfo.project.name !== \"admin\",\n      \"MCP OAuth flows run only in admin project\"\n    );\n    logStep(\"Starting end-user MCP OAuth flow\");\n    await mockEmptyOauthStatus(page);\n\n    test.skip(!adminArtifacts, \"Admin flow must complete before user test\");\n\n    await page.context().clearCookies();\n    logStep(\"Cleared cookies\");\n    await loginAsWorkerUser(page, testInfo.workerIndex);\n    logStep(\"Logged in as worker user\");\n\n    const agentId = adminArtifacts!.agentId;\n    const serverName = adminArtifacts!.serverName;\n    const toolName = adminArtifacts!.toolName;\n\n    await page.goto(`/app?agentId=${agentId}`, {\n      waitUntil: \"load\",\n    });\n    await ensureServerVisibleInActions(page, serverName, { agentId });\n    logStep(\"Opened chat as user and ensured server visible\");\n\n    await openActionsPopover(page);\n    const serverLineItem = await waitForServerRow(page, serverName, 15_000);\n    if (!serverLineItem) {\n      const entries = await collectActionPopoverEntries(page);\n      await logPageStateWithTag(\n        page,\n        `UserFlow reauth locate failed for ${serverName}; visible entries: ${JSON.stringify(\n          entries\n        )}`\n      );\n      throw new Error(\n        `Unable to locate MCP server row ${serverName} for user reauth`\n      );\n    }\n    await expect(serverLineItem).toBeVisible({ timeout: 15000 });\n\n    const clickedServerRow =\n      await clickServerRowAndWaitForPossibleUrlChangeWithRetry(\n        page,\n        serverName,\n        \"End-user reauth click\",\n        15_000\n      );\n    if (!clickedServerRow) {\n      const entries = await collectActionPopoverEntries(page);\n      await logPageStateWithTag(\n        page,\n        `UserFlow reauth click failed for ${serverName}; visible entries: ${JSON.stringify(\n          entries\n        )}`\n      );\n      throw new Error(\n        `Unable to click MCP server row ${serverName} for user reauth`\n      );\n    }\n\n    await completeOauthFlow(page, {\n      expectReturnPathContains: `/app?agentId=${agentId}`,\n    });\n    logStep(\"Completed user OAuth reauthentication\");\n\n    await ensureServerVisibleInActions(page, serverName, { agentId });\n    await verifyMcpToolRowVisible(page, serverName, toolName);\n    await ensureMcpToolEnabledInActions(page, serverName, toolName);\n    logStep(\"Verified user MCP tool row visible after reauth\");\n    await verifyToolInvocationFromChat(\n      page,\n      toolName,\n      \"UserFlow post-reauth\",\n      adminArtifacts!.toolId\n    );\n    logStep(\"Verified user MCP tool invocation after reauth\");\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/onboarding/onboarding_flow.spec.ts",
    "content": "import { expect, test } from \"@playwright/test\";\nimport type { Page } from \"@playwright/test\";\nimport { loginAs, loginAsRandomUser, apiLogin } from \"@tests/e2e/utils/auth\";\nimport { OnyxApiClient } from \"@tests/e2e/utils/onyxApiClient\";\nimport { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\n/**\n * Onboarding Flow E2E Tests\n *\n * Tests the 4 main user scenarios:\n * 1. Admin WITHOUT LLM providers -> Full onboarding, chat disabled\n * 2. Admin WITH LLM providers -> No full onboarding, chat enabled\n * 3. Non-admin WITHOUT LLM providers -> NonAdminStep name prompt, chat disabled\n * 4. Non-admin WITH LLM providers -> NonAdminStep name prompt, chat enabled\n *\n * Marked @exclusive because scenarios 1 & 3 delete all LLM providers.\n */\n\nasync function deleteAllProviders(client: OnyxApiClient): Promise<void> {\n  const providers = await client.listLlmProviders();\n  for (const provider of providers) {\n    try {\n      await client.deleteProvider(provider.id, { force: true });\n    } catch (error) {\n      console.warn(\n        `Failed to delete provider ${provider.id}: ${String(error)}`\n      );\n    }\n  }\n}\n\nasync function createFreshAdmin(\n  page: Page\n): Promise<{ email: string; password: string }> {\n  // First, log in as the existing admin so we can promote the new user\n  await page.context().clearCookies();\n  const { email, password } = await loginAsRandomUser(page);\n\n  // Now promote the new user to admin via the existing admin\n  await page.context().clearCookies();\n  await loginAs(page, \"admin\");\n  const adminClient = new OnyxApiClient(page.request);\n  await adminClient.setUserRole(email, \"admin\");\n\n  // Log back in as the new admin\n  await page.context().clearCookies();\n  await apiLogin(page, email, password);\n\n  return { email, password };\n}\n\nasync function createFreshUser(\n  page: Page\n): Promise<{ email: string; password: string }> {\n  await page.context().clearCookies();\n  return await loginAsRandomUser(page);\n}\n\ntest.describe(\"Onboarding Flow @exclusive\", () => {\n  test.describe(\"Scenario 1: Admin WITHOUT LLM providers\", () => {\n    test.beforeEach(async ({ page }) => {\n      // Delete all providers first (as existing admin)\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await deleteAllProviders(adminClient);\n\n      // Create a fresh admin user (no chat history)\n      await createFreshAdmin(page);\n    });\n\n    test.afterEach(async ({ page }) => {\n      // Restore providers\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await adminClient.ensurePublicProvider();\n    });\n\n    test(\"shows full onboarding flow with Welcome step\", async ({ page }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      const onboardingFlow = page.locator('[aria-label=\"onboarding-flow\"]');\n      await expect(onboardingFlow).toBeVisible({ timeout: 15000 });\n\n      const header = page.locator('[data-label=\"onboarding-header\"]');\n      await expect(header).toBeVisible();\n      await expect(\n        header.getByRole(\"button\", { name: \"Let's Go\" })\n      ).toBeVisible();\n\n      await expectElementScreenshot(header, {\n        name: \"onboarding-welcome-step\",\n      });\n    });\n\n    test(\"chat input bar is disabled during onboarding\", async ({ page }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      await expect(page.locator('[aria-label=\"onboarding-flow\"]')).toBeVisible({\n        timeout: 15000,\n      });\n\n      const chatInput = page.locator(\"#onyx-chat-input\");\n      await expect(chatInput).toHaveAttribute(\"aria-disabled\", \"true\");\n\n      await expectElementScreenshot(chatInput, {\n        name: \"onboarding-chat-disabled\",\n      });\n    });\n\n    test(\"can progress through onboarding steps\", async ({ page }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      const header = page.locator('[data-label=\"onboarding-header\"]');\n      await expect(header).toBeVisible({ timeout: 15000 });\n      await header.getByRole(\"button\", { name: \"Let's Go\" }).click();\n\n      const nameStep = page.locator('[aria-label=\"onboarding-name-step\"]');\n      await expect(nameStep).toBeVisible({ timeout: 10000 });\n      await nameStep.getByPlaceholder(\"Your name\").fill(\"Test Admin\");\n\n      await expectElementScreenshot(nameStep, {\n        name: \"onboarding-name-step\",\n      });\n\n      const nextButton = header.getByRole(\"button\", { name: \"Next\" });\n      await expect(nextButton).toBeEnabled({ timeout: 10000 });\n      await nextButton.click();\n\n      const llmStep = page.locator('[aria-label=\"onboarding-llm-step\"]');\n      await expect(llmStep).toBeVisible({ timeout: 10000 });\n\n      await expectElementScreenshot(llmStep, {\n        name: \"onboarding-llm-step\",\n      });\n    });\n  });\n\n  test.describe(\"Scenario 2: Admin WITH LLM providers\", () => {\n    test.beforeEach(async ({ page }) => {\n      // Ensure provider exists\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await adminClient.ensurePublicProvider();\n\n      // Create a fresh admin user\n      await createFreshAdmin(page);\n    });\n\n    test(\"does not show full onboarding flow\", async ({ page }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      await expect(\n        page.locator('[aria-label=\"onboarding-flow\"]')\n      ).not.toBeVisible({ timeout: 5000 });\n    });\n\n    test(\"shows name prompt (NonAdminStep) when name not set\", async ({\n      page,\n    }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      const namePrompt = page.locator('[aria-label=\"non-admin-name-prompt\"]');\n      await expect(namePrompt).toBeVisible({ timeout: 15000 });\n      await expect(\n        namePrompt.getByRole(\"button\", { name: \"Save\" })\n      ).toBeVisible();\n\n      await expectElementScreenshot(namePrompt, {\n        name: \"onboarding-admin-name-prompt\",\n      });\n    });\n\n    test(\"chat input bar is enabled\", async ({ page }) => {\n      await page.goto(\"/app\");\n      await page.waitForLoadState(\"networkidle\");\n\n      await expect(page.locator(\"#onyx-chat-input\")).toBeVisible({\n        timeout: 15000,\n      });\n\n      const chatInput = page.locator(\"#onyx-chat-input\");\n      await expect(chatInput).not.toHaveAttribute(\"aria-disabled\", \"true\");\n    });\n  });\n\n  test.describe(\"Scenario 3: Non-admin WITHOUT LLM providers\", () => {\n    test.beforeEach(async ({ page }) => {\n      // Delete all providers (as existing admin)\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await deleteAllProviders(adminClient);\n\n      // Create a fresh non-admin user\n      await createFreshUser(page);\n    });\n\n    test.afterEach(async ({ page }) => {\n      // Restore providers\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await adminClient.ensurePublicProvider();\n    });\n\n    test(\"shows NonAdminStep name prompt\", async ({ page }) => {\n      // loginAsRandomUser already navigates to /app\n      const namePrompt = page.locator('[aria-label=\"non-admin-name-prompt\"]');\n      await expect(namePrompt).toBeVisible({ timeout: 15000 });\n      await expect(\n        namePrompt.getByRole(\"button\", { name: \"Save\" })\n      ).toBeVisible();\n\n      await expectElementScreenshot(namePrompt, {\n        name: \"onboarding-nonadmin-name-prompt\",\n      });\n    });\n\n    test(\"does NOT show full onboarding flow\", async ({ page }) => {\n      await expect(\n        page.locator('[aria-label=\"onboarding-flow\"]')\n      ).not.toBeVisible({ timeout: 5000 });\n      await expect(\n        page.locator('[aria-label=\"onboarding-llm-step\"]')\n      ).not.toBeVisible();\n    });\n\n    test(\"chat input bar is disabled\", async ({ page }) => {\n      await expect(page.locator(\"#onyx-chat-input\")).toBeVisible({\n        timeout: 15000,\n      });\n\n      const chatInput = page.locator(\"#onyx-chat-input\");\n      await expect(chatInput).toHaveAttribute(\"aria-disabled\", \"true\");\n    });\n\n    test(\"can save name and see confirmation\", async ({ page }) => {\n      const namePrompt = page.locator('[aria-label=\"non-admin-name-prompt\"]');\n      await expect(namePrompt).toBeVisible({ timeout: 15000 });\n\n      await namePrompt.getByPlaceholder(\"Your name\").fill(\"Test User\");\n      await namePrompt.getByRole(\"button\", { name: \"Save\" }).click();\n\n      const confirmation = page.locator(\n        '[aria-label=\"non-admin-confirmation\"]'\n      );\n      await expect(confirmation).toBeVisible({ timeout: 10000 });\n\n      await expectElementScreenshot(confirmation, {\n        name: \"onboarding-nonadmin-confirmation\",\n      });\n    });\n  });\n\n  test.describe(\"Scenario 4: Non-admin WITH LLM providers\", () => {\n    test.beforeEach(async ({ page }) => {\n      // Ensure provider exists\n      await page.context().clearCookies();\n      await loginAs(page, \"admin\");\n      const adminClient = new OnyxApiClient(page.request);\n      await adminClient.ensurePublicProvider();\n\n      // Create a fresh non-admin user\n      await createFreshUser(page);\n    });\n\n    test(\"shows name prompt when name not set\", async ({ page }) => {\n      // loginAsRandomUser already navigates to /app\n      const namePrompt = page.locator('[aria-label=\"non-admin-name-prompt\"]');\n      await expect(namePrompt).toBeVisible({ timeout: 15000 });\n    });\n\n    test(\"chat input bar is enabled\", async ({ page }) => {\n      await expect(page.locator(\"#onyx-chat-input\")).toBeVisible({\n        timeout: 15000,\n      });\n\n      const chatInput = page.locator(\"#onyx-chat-input\");\n      await expect(chatInput).not.toHaveAttribute(\"aria-disabled\", \"true\");\n    });\n\n    test(\"after setting name, shows confirmation then no onboarding UI\", async ({\n      page,\n    }) => {\n      const namePrompt = page.locator('[aria-label=\"non-admin-name-prompt\"]');\n      await expect(namePrompt).toBeVisible({ timeout: 15000 });\n\n      await namePrompt.getByPlaceholder(\"Your name\").fill(\"E2E User\");\n      await namePrompt.getByRole(\"button\", { name: \"Save\" }).click();\n\n      const confirmation = page.locator(\n        '[aria-label=\"non-admin-confirmation\"]'\n      );\n      await expect(confirmation).toBeVisible({ timeout: 10000 });\n\n      await expectElementScreenshot(confirmation, {\n        name: \"onboarding-nonadmin-with-llm-confirmation\",\n      });\n\n      await confirmation.getByRole(\"button\").first().click();\n      await expect(namePrompt).not.toBeVisible({ timeout: 5000 });\n      await expect(confirmation).not.toBeVisible();\n    });\n  });\n});\n"
  },
  {
    "path": "web/tests/e2e/settings/settings_pages.spec.ts",
    "content": "import { expect, test } from \"@playwright/test\";\nimport { THEMES, setThemeBeforeNavigation } from \"@tests/e2e/utils/theme\";\nimport { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n\ntest.use({ storageState: \"admin_auth.json\" });\n\n/** Maps each settings slug to the header title shown on that page. */\nconst SLUG_TO_HEADER: Record<string, string> = {\n  general: \"Profile\",\n  \"chat-preferences\": \"Chats\",\n  \"accounts-access\": \"Accounts\",\n  connectors: \"Connectors\",\n};\n\nfor (const theme of THEMES) {\n  test.describe(`Settings pages (${theme} mode)`, () => {\n    test.beforeEach(async ({ page }) => {\n      await setThemeBeforeNavigation(page, theme);\n    });\n\n    test(\"should screenshot each settings tab\", async ({ page }) => {\n      await page.goto(\"/app/settings/general\");\n      await page\n        .getByTestId(\"settings-left-tab-navigation\")\n        .waitFor({ state: \"visible\" });\n\n      const nav = page.getByTestId(\"settings-left-tab-navigation\");\n      const tabs = nav.locator(\"a\");\n      await expect(tabs.first()).toBeVisible({ timeout: 10_000 });\n      const count = await tabs.count();\n\n      for (let i = 0; i < count; i++) {\n        const tab = tabs.nth(i);\n        const href = await tab.getAttribute(\"href\");\n        const slug = href ? href.replace(\"/app/settings/\", \"\") : `tab-${i}`;\n\n        await tab.click();\n\n        const expectedHeader = SLUG_TO_HEADER[slug];\n        if (expectedHeader) {\n          await expect(\n            page\n              .locator(\".opal-content-md-header\")\n              .filter({ hasText: expectedHeader })\n          ).toBeVisible({ timeout: 10_000 });\n        } else {\n          await page.waitForLoadState(\"networkidle\");\n        }\n\n        await expectScreenshot(page, {\n          name: `settings-${theme}-${slug}`,\n        });\n      }\n    });\n  });\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/agentUtils.ts",
    "content": "import { Page } from \"@playwright/test\";\nimport { expect } from \"@playwright/test\";\nimport { verifyAgentIsChosen } from \"./chatActions\";\n\nexport type AgentParams = {\n  name: string;\n  description?: string;\n  instructions?: string; // system_prompt\n};\n\n// Create an assistant via the UI from the app page and wait until it is active\nexport async function createAgent(page: Page, params: AgentParams) {\n  const { name, description = \"\", instructions = \"Test Instructions\" } = params;\n\n  // Navigate to creation flow\n  // We assume we're on /app; if not, go there first\n  if (!page.url().includes(\"/app\")) {\n    await page.goto(\"/app\");\n  }\n\n  // Open Assistants modal/list\n  await page.getByTestId(\"AppSidebar/more-agents\").click();\n  await page.getByLabel(\"AgentsPage/new-agent-button\").click();\n\n  // Fill required fields\n  await page.locator('input[name=\"name\"]').fill(name);\n  if (description) {\n    await page.locator('textarea[name=\"description\"]').fill(description);\n  }\n  await page.locator('textarea[name=\"instructions\"]').fill(instructions);\n\n  // Submit create\n  await page.getByRole(\"button\", { name: \"Create\" }).click();\n\n  // Verify it is selected in chat (placeholder contains assistant name)\n  await verifyAgentIsChosen(page, name);\n}\n\n// Pin an assistant by its visible name in the sidebar list.\n// If already pinned, this will leave it pinned (no-op).\nexport async function pinAgentByName(\n  page: Page,\n  agentName: string\n): Promise<void> {\n  const row = page\n    .locator('[data-testid^=\"assistant-[\"]')\n    .filter({ hasText: agentName })\n    .first();\n\n  await row.waitFor({ state: \"visible\", timeout: 10000 });\n  await row.hover();\n\n  const button = row.locator(\"button\").first();\n  await button.hover();\n\n  // Tooltip indicates pin vs unpin; use it if available\n  const pinTooltip = page.getByText(\"Pin this assistant to the sidebar\");\n  const unpinTooltip = page.getByText(\"Unpin this assistant from the sidebar\");\n\n  try {\n    await expect(pinTooltip.or(unpinTooltip)).toBeVisible({ timeout: 2000 });\n  } catch {\n    // Tooltip may fail to appear in CI; continue optimistically\n  }\n\n  if (await pinTooltip.isVisible().catch(() => false)) {\n    await button.click();\n    await page.waitForTimeout(300);\n  }\n}\n\n/**\n * Ensures the Image Generation tool is enabled in the default agent configuration.\n * If it's not enabled, it will toggle it on.\n *\n * Navigates to the Chat Preferences page and toggles the Image Generation switch\n * inside the \"Actions & Tools\" collapsible section (open by default).\n */\nexport async function ensureImageGenerationEnabled(page: Page): Promise<void> {\n  // Navigate to the chat preferences page\n  await page.goto(\"/admin/configuration/chat-preferences\");\n  await page.waitForLoadState(\"networkidle\");\n\n  // The \"Actions & Tools\" collapsible is open by default.\n  // Find the Image Generation tool switch via its label container.\n  const imageGenSwitch = page\n    .locator(\"label\")\n    .filter({ has: page.getByText(\"Image Generation\", { exact: true }) })\n    .locator('button[role=\"switch\"]')\n    .first();\n\n  await expect(imageGenSwitch).toBeVisible({ timeout: 10000 });\n\n  // Check if it's already enabled\n  const currentState = await imageGenSwitch.getAttribute(\"aria-checked\");\n\n  if (currentState !== \"true\") {\n    // Toggle it on — auto-saves immediately via PATCH /api/admin/default-assistant\n    await imageGenSwitch.click();\n\n    // Wait for the auto-save toast to confirm success\n    await expect(page.getByText(\"Tools updated\").first()).toBeVisible({\n      timeout: 5000,\n    });\n\n    // Verify it's now enabled\n    const newState = await imageGenSwitch.getAttribute(\"aria-checked\");\n    if (newState !== \"true\") {\n      throw new Error(\"Failed to enable Image Generation tool\");\n    }\n  }\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/auth.ts",
    "content": "import type { Page } from \"@playwright/test\";\nimport {\n  TEST_ADMIN2_CREDENTIALS,\n  TEST_ADMIN_CREDENTIALS,\n  WORKER_USER_POOL_SIZE,\n  workerUserCredentials,\n} from \"@tests/e2e/constants\";\n\n/**\n * Log in via the API and set cookies on the page's browser context.\n * Much faster than navigating through the login UI.\n */\nexport async function apiLogin(\n  page: Page,\n  email: string,\n  password: string\n): Promise<void> {\n  const res = await page.request.post(\"/api/auth/login\", {\n    form: { username: email, password },\n  });\n  if (!res.ok()) {\n    const body = await res.text();\n    throw new Error(`API login failed for ${email}: ${res.status()} ${body}`);\n  }\n}\n\n// Logs in a known test user (admin or admin2) via the API.\n// Users must already be provisioned (see global-setup.ts).\nexport async function loginAs(\n  page: Page,\n  userType: \"admin\" | \"admin2\"\n): Promise<void> {\n  const { email, password } =\n    userType === \"admin\" ? TEST_ADMIN_CREDENTIALS : TEST_ADMIN2_CREDENTIALS;\n\n  await apiLogin(page, email, password);\n}\n\n/**\n * Log in as a worker-specific user for test isolation.\n * Uses modulo to map any workerIndex (which can exceed the pool size due to\n * retries spawning new workers) back to a provisioned user. This is safe\n * because retries never run in parallel with the original attempt.\n */\nexport async function loginAsWorkerUser(\n  page: Page,\n  workerIndex: number\n): Promise<void> {\n  const { email, password } = workerUserCredentials(\n    workerIndex % WORKER_USER_POOL_SIZE\n  );\n  await apiLogin(page, email, password);\n}\n\n// Generate a random email and password for throwaway test users.\nconst generateRandomCredentials = () => {\n  const randomString = Math.random().toString(36).substring(2, 10);\n  const specialChars = \"!@#$%^&*()_+{}[]|:;<>,.?~\";\n  const randomSpecialChar =\n    specialChars[Math.floor(Math.random() * specialChars.length)];\n  const randomUpperCase = String.fromCharCode(\n    65 + Math.floor(Math.random() * 26)\n  );\n  const randomNumber = Math.floor(Math.random() * 10);\n\n  return {\n    email: `test_${randomString}@example.com`,\n    password: `P@ssw0rd_${randomUpperCase}${randomSpecialChar}${randomNumber}${randomString}`,\n  };\n};\n\n// Register and log in as a new random user via the API.\nexport async function loginAsRandomUser(page: Page): Promise<{\n  email: string;\n  password: string;\n}> {\n  const { email, password } = generateRandomCredentials();\n\n  const registerRes = await page.request.post(\"/api/auth/register\", {\n    data: { email, username: email, password },\n  });\n  if (!registerRes.ok()) {\n    const body = await registerRes.text();\n    throw new Error(\n      `Failed to register random user ${email}: ${registerRes.status()} ${body}`\n    );\n  }\n\n  await apiLogin(page, email, password);\n\n  // Navigate to the app so the page is ready for test interactions\n  await page.goto(\"/app?new_team=true\");\n  await page.waitForLoadState(\"networkidle\");\n\n  return { email, password };\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/chatActions.ts",
    "content": "import { Page } from \"@playwright/test\";\nimport { expect } from \"@playwright/test\";\n\nexport async function verifyDefaultAgentIsChosen(page: Page) {\n  await expect(page.getByTestId(\"onyx-logo\")).toBeVisible({ timeout: 5000 });\n}\n\nexport async function verifyAgentIsChosen(\n  page: Page,\n  agentName: string,\n  timeout: number = 5000\n) {\n  await expect(\n    page.getByTestId(\"agent-name-display\").getByText(agentName)\n  ).toBeVisible({ timeout });\n}\n\nexport async function navigateToAgentInHistorySidebar(\n  page: Page,\n  testId: string,\n  agentName: string\n) {\n  await page.getByTestId(`assistant-${testId}`).click();\n  try {\n    await verifyAgentIsChosen(page, agentName);\n  } catch (error) {\n    console.error(\"Error in navigateToAgentInHistorySidebar:\", error);\n    const pageText = await page.textContent(\"body\");\n    console.log(\"Page text:\", pageText);\n    throw error;\n  }\n}\n\nexport async function sendMessage(page: Page, message: string) {\n  // Count existing AI messages before sending\n  const existingMessageCount = await page\n    .locator('[data-testid=\"onyx-ai-message\"]')\n    .count();\n\n  await page.locator(\"#onyx-chat-input-textarea\").click();\n  await page.locator(\"#onyx-chat-input-textarea\").fill(message);\n  await page.locator(\"#onyx-chat-input-send-button\").click();\n\n  // Wait for a NEW AI message to appear (count should increase)\n  await expect(page.locator('[data-testid=\"onyx-ai-message\"]')).toHaveCount(\n    existingMessageCount + 1,\n    { timeout: 30000 }\n  );\n\n  // Wait for up to 10 seconds for the URL to contain 'chatId='\n  await page.waitForFunction(\n    () => window.location.href.includes(\"chatId=\"),\n    null,\n    { timeout: 10000 }\n  );\n}\n\nexport async function verifyCurrentModel(page: Page, modelName: string) {\n  const text = await page\n    .getByTestId(\"AppInputBar/llm-popover-trigger\")\n    .textContent();\n  expect(text).toContain(modelName);\n}\n\nexport async function selectModelFromInputPopover(\n  page: Page,\n  preferredModels: string[]\n): Promise<string> {\n  const currentModelText =\n    (\n      await page.getByTestId(\"AppInputBar/llm-popover-trigger\").textContent()\n    )?.trim() ?? \"\";\n\n  await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n  await page.waitForSelector('[role=\"dialog\"]', {\n    state: \"visible\",\n    timeout: 10000,\n  });\n\n  const dialog = page.locator('[role=\"dialog\"]');\n  const searchInput = dialog.getByPlaceholder(\"Search models...\");\n\n  for (const modelName of preferredModels) {\n    await searchInput.fill(modelName);\n    const modelOptions = dialog.locator(\"[data-selected]\");\n    const nonSelectedOptions = dialog.locator('[data-selected=\"false\"]');\n\n    if ((await modelOptions.count()) > 0) {\n      const candidate =\n        (await nonSelectedOptions.count()) > 0\n          ? nonSelectedOptions.first()\n          : modelOptions.first();\n\n      await candidate.click();\n      await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n      const selectedText =\n        (\n          await page\n            .getByTestId(\"AppInputBar/llm-popover-trigger\")\n            .textContent()\n        )?.trim() ?? \"\";\n      if (!selectedText) {\n        throw new Error(\n          \"Failed to read selected model text from input trigger\"\n        );\n      }\n      return selectedText;\n    }\n  }\n\n  // Reset search so fallback sees all available models.\n  await searchInput.fill(\"\");\n\n  const nonSelectedOptions = dialog.locator('[data-selected=\"false\"]');\n  if ((await nonSelectedOptions.count()) > 0) {\n    const fallback = nonSelectedOptions.first();\n    await expect(fallback).toBeVisible();\n    await fallback.click();\n    await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n\n    const selectedText =\n      (\n        await page.getByTestId(\"AppInputBar/llm-popover-trigger\").textContent()\n      )?.trim() ?? \"\";\n    if (!selectedText) {\n      throw new Error(\"Failed to read selected model text from input trigger\");\n    }\n    return selectedText;\n  }\n\n  await page.keyboard.press(\"Escape\").catch(() => {});\n  await page\n    .waitForSelector('[role=\"dialog\"]', { state: \"hidden\", timeout: 5000 })\n    .catch(() => {});\n\n  if (currentModelText) {\n    return currentModelText;\n  }\n\n  throw new Error(\"Unable to select a model from input popover\");\n}\n\nexport async function switchModel(page: Page, modelName: string) {\n  await page.getByTestId(\"AppInputBar/llm-popover-trigger\").click();\n\n  // Wait for the popover to open\n  await page.waitForSelector('[role=\"dialog\"]', { state: \"visible\" });\n\n  const modelButton = page\n    .locator('[role=\"dialog\"]')\n    .locator('[role=\"button\"]')\n    .filter({ hasText: modelName })\n    .first();\n\n  await modelButton.click();\n\n  // Wait for the popover to close\n  await page.waitForSelector('[role=\"dialog\"]', { state: \"hidden\" });\n}\n\nexport async function startNewChat(page: Page) {\n  await page.getByTestId(\"AppSidebar/new-session\").click();\n  await expect(page.getByTestId(\"chat-intro\")).toBeVisible();\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/chatStream.ts",
    "content": "import { expect, Page, Route } from \"@playwright/test\";\nimport { sendMessage } from \"./chatActions\";\n\nexport type ChatStreamObject = Record<string, unknown> & {\n  type?: string;\n};\n\nexport type ChatStreamPacket = Record<string, unknown> & {\n  obj?: ChatStreamObject;\n};\n\nfunction parseStreamLine(rawLine: string): ChatStreamPacket | null {\n  const trimmed = rawLine.trim();\n  if (!trimmed) {\n    return null;\n  }\n\n  const withoutPrefix = trimmed.startsWith(\"data:\")\n    ? trimmed.slice(\"data:\".length).trim()\n    : trimmed;\n  if (!withoutPrefix || withoutPrefix === \"[DONE]\") {\n    return null;\n  }\n\n  try {\n    return JSON.parse(withoutPrefix) as ChatStreamPacket;\n  } catch {\n    return null;\n  }\n}\n\nexport function parseChatStreamBody(body: string): ChatStreamPacket[] {\n  return body\n    .split(\"\\n\")\n    .map(parseStreamLine)\n    .filter((packet): packet is ChatStreamPacket => packet !== null);\n}\n\nexport function getPacketObjectsByType(\n  packets: ChatStreamPacket[],\n  packetType: string\n): ChatStreamObject[] {\n  return packets\n    .map((packet) => packet.obj)\n    .filter(\n      (obj): obj is ChatStreamObject =>\n        !!obj && typeof obj.type === \"string\" && obj.type === packetType\n    );\n}\n\nexport async function sendMessageAndCaptureStreamPackets(\n  page: Page,\n  message: string,\n  options?: {\n    mockLlmResponse?: string;\n    payloadOverrides?: Record<string, unknown>;\n    waitForAiMessage?: boolean;\n  }\n): Promise<ChatStreamPacket[]> {\n  const requestUrlPattern = \"**/api/chat/send-chat-message\";\n  const mockLlmResponse = options?.mockLlmResponse;\n  const payloadOverrides = options?.payloadOverrides;\n  const waitForAiMessage = options?.waitForAiMessage ?? true;\n  const routeHandler = async (route: Route) => {\n    if (!mockLlmResponse && !payloadOverrides) {\n      await route.continue();\n      return;\n    }\n\n    const request = route.request();\n    const payload = request.postDataJSON() as Record<string, unknown>;\n    if (payloadOverrides) {\n      Object.assign(payload, payloadOverrides);\n    }\n    if (mockLlmResponse) {\n      payload.mock_llm_response = mockLlmResponse;\n    }\n\n    await route.continue({\n      postData: JSON.stringify(payload),\n      headers: {\n        ...request.headers(),\n        \"content-type\": \"application/json\",\n      },\n    });\n  };\n\n  await page.route(requestUrlPattern, routeHandler);\n\n  const responsePromise = page.waitForResponse((response) => {\n    if (\n      response.request().method() !== \"POST\" ||\n      !response.url().includes(\"/api/chat/send-chat-message\")\n    ) {\n      return false;\n    }\n\n    const requestBody = response.request().postData();\n    if (!requestBody) {\n      return true;\n    }\n\n    try {\n      const payload = JSON.parse(requestBody) as Record<string, unknown>;\n      return payload.message === message;\n    } catch {\n      return true;\n    }\n  });\n\n  try {\n    if (waitForAiMessage) {\n      await sendMessage(page, message);\n    } else {\n      await page.locator(\"#onyx-chat-input-textarea\").click();\n      await page.locator(\"#onyx-chat-input-textarea\").fill(message);\n      await page.locator(\"#onyx-chat-input-send-button\").click();\n      await page\n        .waitForFunction(() => window.location.href.includes(\"chatId=\"), null, {\n          timeout: 10000,\n        })\n        .catch(() => {});\n    }\n\n    const response = await responsePromise;\n    expect(response.ok()).toBeTruthy();\n    const body = await response.text();\n    return parseChatStreamBody(body);\n  } finally {\n    await page.unroute(requestUrlPattern, routeHandler);\n  }\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/dragUtils.ts",
    "content": "import { Locator, Page } from \"@playwright/test\";\n\n/**\n * Drag \"source\" above (higher Y) \"target\" by using mouse events.\n * Positions the cursor on the lower half of source, then moves to the top half of the target.\n */\nexport async function dragElementAbove(\n  sourceLocator: Locator,\n  targetLocator: Locator,\n  page: Page\n) {\n  // Get bounding boxes\n  const sourceBB = await sourceLocator.boundingBox();\n  const targetBB = await targetLocator.boundingBox();\n  if (!sourceBB || !targetBB) {\n    throw new Error(\"Source/target bounding boxes not found.\");\n  }\n\n  // Move over source, press mouse down\n  await page.mouse.move(\n    sourceBB.x + sourceBB.width / 2,\n    sourceBB.y + sourceBB.height * 0.75 // Move to 3/4 down the source element\n  );\n  await page.mouse.down();\n\n  // Move to a point slightly above the target's center\n  await page.mouse.move(\n    targetBB.x + targetBB.width / 2,\n    targetBB.y + targetBB.height * 0.1, // Move to 1/10 down the target element\n    { steps: 20 } // Increase steps for smoother drag\n  );\n  await page.mouse.up();\n\n  // Increase wait time for DnD transitions\n  await page.waitForTimeout(200);\n}\n\n/**\n * Drag \"source\" below (higher Y → lower Y) \"target\" using mouse events.\n */\nexport async function dragElementBelow(\n  sourceLocator: Locator,\n  targetLocator: Locator,\n  page: Page\n) {\n  // Get bounding boxes\n  const sourceBB = await targetLocator.boundingBox();\n  const targetBB = await sourceLocator.boundingBox();\n  if (!sourceBB || !targetBB) {\n    throw new Error(\"Source/target bounding boxes not found.\");\n  }\n\n  // Move over source, press mouse down\n  await page.mouse.move(\n    sourceBB.x + sourceBB.width / 2,\n    sourceBB.y + sourceBB.height * 0.25 // Move to 1/4 down the source element\n  );\n  await page.mouse.down();\n\n  // Move to a point well below the target's bottom edge\n  await page.mouse.move(\n    targetBB.x + targetBB.width / 2,\n    targetBB.y + targetBB.height + 50, // Move 50 pixels below the target element\n    { steps: 50 } // Keep the same number of steps for smooth drag\n  );\n\n  // Hold for a moment to ensure the drag is registered\n  await page.waitForTimeout(500);\n\n  await page.mouse.up();\n\n  // Wait for DnD transitions and potential animations\n  await page.waitForTimeout(1000);\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/mcpServer.ts",
    "content": "import { spawn, ChildProcessWithoutNullStreams } from \"child_process\";\nimport path from \"path\";\nimport net from \"net\";\nimport fs from \"fs\";\n\ninterface StartServerOptions {\n  bindHost?: string;\n  publicHost?: string;\n  port?: number;\n  pythonBinary?: string;\n  scriptPath?: string;\n  readyTimeoutMs?: number;\n}\n\nconst DEFAULT_BIND_HOST =\n  process.env.MCP_TEST_SERVER_BIND_HOST ||\n  process.env.MCP_TEST_SERVER_HOST ||\n  \"127.0.0.1\";\nconst DEFAULT_PUBLIC_HOST =\n  process.env.MCP_TEST_SERVER_PUBLIC_HOST || DEFAULT_BIND_HOST;\nconst DEFAULT_PORT = Number(process.env.MCP_TEST_SERVER_PORT || \"8004\");\nconst READY_TIMEOUT_MS = 25_000;\n\nexport class McpServerProcess {\n  private process: ChildProcessWithoutNullStreams;\n  private bindHost: string;\n  private publicHost: string;\n  private port: number;\n  private stopped = false;\n\n  constructor(\n    proc: ChildProcessWithoutNullStreams,\n    bindHost: string,\n    publicHost: string,\n    port: number\n  ) {\n    this.process = proc;\n    this.bindHost = bindHost;\n    this.publicHost = publicHost;\n    this.port = port;\n  }\n\n  get address(): { host: string; port: number } {\n    return { host: this.publicHost, port: this.port };\n  }\n\n  get bindAddress(): { host: string; port: number } {\n    return { host: this.bindHost, port: this.port };\n  }\n\n  async stop(signal: NodeJS.Signals = \"SIGTERM\"): Promise<void> {\n    if (this.stopped) return;\n    this.stopped = true;\n    return new Promise((resolve) => {\n      const timeout = setTimeout(() => {\n        if (!this.process.killed) {\n          this.process.kill(\"SIGKILL\");\n        }\n        resolve();\n      }, 5_000);\n\n      this.process.once(\"exit\", () => {\n        clearTimeout(timeout);\n        resolve();\n      });\n\n      this.process.kill(signal);\n    });\n  }\n}\n\nfunction waitForPort(\n  host: string,\n  port: number,\n  proc: ChildProcessWithoutNullStreams,\n  timeoutMs: number\n): Promise<void> {\n  return new Promise((resolve, reject) => {\n    const start = Date.now();\n\n    const connectHost =\n      host === \"0.0.0.0\" || host === \"::\" ? \"127.0.0.1\" : host;\n\n    const check = () => {\n      if (proc.exitCode !== null) {\n        reject(\n          new Error(\n            `MCP server process exited with code ${proc.exitCode ?? \"unknown\"}`\n          )\n        );\n        return;\n      }\n\n      const socket = net.createConnection({ host: connectHost, port });\n\n      socket.once(\"connect\", () => {\n        socket.destroy();\n        resolve();\n      });\n\n      socket.once(\"error\", () => {\n        socket.destroy();\n        if (Date.now() - start >= timeoutMs) {\n          reject(\n            new Error(\n              `Timed out waiting for MCP OAuth test server to listen on ${host}:${port}`\n            )\n          );\n        } else {\n          setTimeout(check, 250);\n        }\n      });\n    };\n\n    check();\n  });\n}\n\nexport async function startMcpOauthServer(\n  options: StartServerOptions = {}\n): Promise<McpServerProcess> {\n  const bindHost = options.bindHost || DEFAULT_BIND_HOST;\n  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;\n  const port = options.port ?? DEFAULT_PORT;\n  const pythonBinary = options.pythonBinary || \"python3\";\n  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;\n\n  const scriptPath =\n    options.scriptPath ||\n    path.resolve(\n      __dirname,\n      \"../../../..\",\n      \"backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_oauth.py\"\n    );\n  const scriptDir = path.dirname(scriptPath);\n\n  const proc = spawn(pythonBinary, [scriptPath, port.toString()], {\n    cwd: scriptDir,\n    stdio: [\"pipe\", \"pipe\", \"pipe\"],\n    env: {\n      ...process.env,\n      MCP_SERVER_PORT: port.toString(),\n      MCP_SERVER_HOST: bindHost,\n      MCP_SERVER_PUBLIC_HOST: publicHost,\n    },\n  });\n\n  proc.stdout.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.log(`[mcp-oauth-server] ${message.trimEnd()}`);\n  });\n  proc.stderr.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.error(`[mcp-oauth-server:stderr] ${message.trimEnd()}`);\n  });\n\n  proc.on(\"error\", (err) => {\n    console.error(\"[mcp-oauth-server] failed to start\", err);\n  });\n\n  await waitForPort(bindHost, port, proc, readyTimeout);\n\n  return new McpServerProcess(proc, bindHost, publicHost, port);\n}\n\nexport async function startMcpApiKeyServer(\n  options: StartServerOptions & { apiKey?: string } = {}\n): Promise<McpServerProcess> {\n  const bindHost = options.bindHost || DEFAULT_BIND_HOST;\n  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;\n  const port = options.port ?? DEFAULT_PORT;\n  const pythonBinary = options.pythonBinary || \"python3\";\n  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;\n  const apiKey = options.apiKey || \"test-api-key-12345\";\n\n  const scriptPath =\n    options.scriptPath ||\n    path.resolve(\n      __dirname,\n      \"../../../..\",\n      \"backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_api_key.py\"\n    );\n  const scriptDir = path.dirname(scriptPath);\n\n  const proc = spawn(pythonBinary, [scriptPath, apiKey, port.toString()], {\n    cwd: scriptDir,\n    stdio: [\"pipe\", \"pipe\", \"pipe\"],\n    env: {\n      ...process.env,\n      MCP_SERVER_PORT: port.toString(),\n      MCP_SERVER_HOST: bindHost,\n      MCP_SERVER_PUBLIC_HOST: publicHost,\n    },\n  });\n\n  proc.stdout.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.log(`[mcp-api-key-server] ${message.trimEnd()}`);\n  });\n  proc.stderr.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.error(`[mcp-api-key-server:stderr] ${message.trimEnd()}`);\n  });\n\n  proc.on(\"error\", (err) => {\n    console.error(\"[mcp-api-key-server] failed to start\", err);\n  });\n\n  await waitForPort(bindHost, port, proc, readyTimeout);\n\n  return new McpServerProcess(proc, bindHost, publicHost, port);\n}\n\n/**\n * Start the MCP Google OAuth Pass-Through test server.\n *\n * This server validates Google OAuth tokens that are passed through from Onyx.\n * It calls Google's tokeninfo endpoint to verify the token is valid.\n *\n * For testing pass-through OAuth scenarios where Onyx forwards the user's\n * Google OAuth access token to an MCP server.\n */\nexport async function startMcpGoogleOAuthServer(\n  options: StartServerOptions & { requiredScopes?: string[] } = {}\n): Promise<McpServerProcess> {\n  const bindHost = options.bindHost || DEFAULT_BIND_HOST;\n  const publicHost = options.publicHost || DEFAULT_PUBLIC_HOST;\n  const port = options.port ?? 8006; // Default to 8006 to not conflict with other MCP servers\n  const pythonBinary = options.pythonBinary || \"python3\";\n  const readyTimeout = options.readyTimeoutMs ?? READY_TIMEOUT_MS;\n  const requiredScopes = options.requiredScopes || [];\n\n  const scriptPath =\n    options.scriptPath ||\n    path.resolve(\n      __dirname,\n      \"../../../..\",\n      \"backend/tests/integration/mock_services/mcp_test_server/run_mcp_server_google_oauth.py\"\n    );\n  const scriptDir = path.dirname(scriptPath);\n\n  const proc = spawn(pythonBinary, [scriptPath, port.toString()], {\n    cwd: scriptDir,\n    stdio: [\"pipe\", \"pipe\", \"pipe\"],\n    env: {\n      ...process.env,\n      MCP_SERVER_PORT: port.toString(),\n      MCP_SERVER_HOST: bindHost,\n      MCP_SERVER_PUBLIC_HOST: publicHost,\n      MCP_GOOGLE_REQUIRED_SCOPES: requiredScopes.join(\",\"),\n    },\n  });\n\n  proc.stdout.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.log(`[mcp-google-oauth-server] ${message.trimEnd()}`);\n  });\n  proc.stderr.on(\"data\", (chunk) => {\n    const message = chunk.toString();\n    console.error(`[mcp-google-oauth-server:stderr] ${message.trimEnd()}`);\n  });\n\n  proc.on(\"error\", (err) => {\n    console.error(\"[mcp-google-oauth-server] failed to start\", err);\n  });\n\n  await waitForPort(bindHost, port, proc, readyTimeout);\n\n  return new McpServerProcess(proc, bindHost, publicHost, port);\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/onyxApiClient.ts",
    "content": "import { APIRequestContext, expect, APIResponse } from \"@playwright/test\";\n\nconst E2E_LLM_PROVIDER_API_KEY =\n  process.env.E2E_LLM_PROVIDER_API_KEY ||\n  process.env.OPENAI_API_KEY ||\n  \"e2e-placeholder-api-key-not-used\";\n\nconst E2E_WEB_SEARCH_API_KEY =\n  process.env.E2E_WEB_SEARCH_API_KEY ||\n  process.env.EXA_API_KEY ||\n  process.env.BRAVE_SEARCH_API_KEY ||\n  process.env.SERPER_API_KEY ||\n  \"e2e-placeholder-web-search-key\";\n\nconst E2E_IMAGE_GEN_API_KEY =\n  process.env.E2E_IMAGE_GEN_API_KEY ||\n  process.env.OPENAI_API_KEY ||\n  E2E_LLM_PROVIDER_API_KEY;\n\n/**\n * API Client for Onyx backend operations in E2E tests.\n *\n * Provides a type-safe, abstracted interface for interacting with the Onyx backend API.\n * All methods handle authentication via the Playwright page context and include automatic\n * error handling, logging, and polling for asynchronous operations.\n *\n * **Available Endpoints:**\n *\n * **Connectors:**\n * - `createFileConnector(name)` - Creates a file connector with mock credentials\n * - `deleteCCPair(ccPairId)` - Deletes a connector-credential pair (with polling until complete)\n *\n * **Document Sets:**\n * - `createDocumentSet(name, ccPairIds)` - Creates a document set from connector pairs\n * - `deleteDocumentSet(id)` - Deletes a document set (with polling until complete)\n *\n * **LLM Providers:**\n * - `listLlmProviders()` - Lists LLM providers (admin endpoint, includes is_public)\n * - `ensurePublicProvider(name?)` - Idempotently creates a public default LLM provider\n * - `createRestrictedProvider(name, groupId)` - Creates a restricted LLM provider assigned to a group\n * - `setProviderAsDefault(id)` - Sets an LLM provider as the default for chat\n * - `deleteProvider(id)` - Deletes an LLM provider\n *\n * **User Groups:**\n * - `getUserGroups()` - Lists all user groups (including default system groups)\n * - `createUserGroup(name)` - Creates a user group\n * - `deleteUserGroup(id)` - Deletes a user group\n *\n * **Tool Providers:**\n * - `createWebSearchProvider(type, name)` - Creates and activates a web search provider\n * - `deleteWebSearchProvider(id)` - Deletes a web search provider\n * - `createImageGenerationConfig(id, model, provider, isDefault)` - Creates an image generation config (enables image gen tool)\n * - `deleteImageGenerationConfig(id)` - Deletes an image generation config\n *\n * **Chat Sessions:**\n * - `createChatSession(description, personaId?)` - Creates a chat session with a description\n * - `deleteChatSession(chatId)` - Deletes a chat session\n *\n * **Projects:**\n * - `createProject(name)` - Creates a project with a name\n * - `deleteProject(projectId)` - Deletes a project\n *\n * **Usage Example:**\n * ```typescript\n * // From a test with a Page:\n * const client = new OnyxApiClient(page.request);\n *\n * // From global-setup with a standalone context (pass baseURL explicitly):\n * const ctx = await request.newContext({ baseURL, storageState: \"admin_auth.json\" });\n * const client = new OnyxApiClient(ctx, baseURL);\n * ```\n *\n * @param request - Playwright APIRequestContext with authenticated session\n *                  (e.g. `page.request`, `context.request`, or `request.newContext()`)\n * @param baseUrl - Optional base URL override (e.g. `http://localhost:3000`).\n *                  Defaults to `process.env.BASE_URL` or `http://localhost:3000`.\n *                  Pass this when the Playwright-configured baseURL differs from\n *                  the env var (e.g. in `global-setup.ts` where the config value\n *                  is authoritative).\n */\nexport class OnyxApiClient {\n  private baseUrl: string;\n\n  constructor(\n    private request: APIRequestContext,\n    baseUrl?: string\n  ) {\n    this.baseUrl = `${\n      baseUrl ?? process.env.BASE_URL ?? \"http://localhost:3000\"\n    }/api`;\n  }\n\n  /**\n   * Generic GET request to the API.\n   *\n   * @param endpoint - API endpoint path (e.g., \"/manage/document-set/123\")\n   * @returns The API response\n   */\n  private async get(endpoint: string): Promise<APIResponse> {\n    return await this.request.get(`${this.baseUrl}${endpoint}`);\n  }\n\n  /**\n   * Generic POST request to the API.\n   *\n   * @param endpoint - API endpoint path (e.g., \"/manage/admin/document-set\")\n   * @param data - Optional request body data\n   * @returns The API response\n   */\n  private async post(endpoint: string, data?: any): Promise<APIResponse> {\n    return await this.request.post(`${this.baseUrl}${endpoint}`, {\n      data,\n    });\n  }\n\n  /**\n   * Generic DELETE request to the API.\n   *\n   * @param endpoint - API endpoint path (e.g., \"/manage/admin/document-set/123\")\n   * @returns The API response\n   */\n  private async delete(endpoint: string): Promise<APIResponse> {\n    return await this.request.delete(`${this.baseUrl}${endpoint}`);\n  }\n\n  /**\n   * Generic PUT request to the API.\n   *\n   * @param endpoint - API endpoint path (e.g., \"/manage/admin/cc-pair/123/status\")\n   * @param data - Optional request body data\n   * @returns The API response\n   */\n  private async put(endpoint: string, data?: any): Promise<APIResponse> {\n    return await this.request.put(`${this.baseUrl}${endpoint}`, {\n      data,\n    });\n  }\n\n  /**\n   * Handle API response - parse JSON and handle errors.\n   *\n   * @param response - The API response to handle\n   * @param errorMessage - Error message prefix to use if request failed\n   * @returns Parsed JSON response data\n   * @throws Error if the response is not ok\n   */\n  private async handleResponse<T>(\n    response: APIResponse,\n    errorMessage: string\n  ): Promise<T> {\n    if (!response.ok()) {\n      const errorText = await response.text();\n      throw new Error(`${errorMessage}: ${response.status()} - ${errorText}`);\n    }\n    return await response.json();\n  }\n\n  /**\n   * Handle API response with logging on error (non-throwing).\n   * Used for cleanup operations where we want to log errors but not fail the test.\n   *\n   * @param response - The API response to handle\n   * @param errorMessage - Error message prefix to use if request failed\n   * @returns true if response was ok, false otherwise\n   */\n  private async handleResponseSoft(\n    response: APIResponse,\n    errorMessage: string\n  ): Promise<boolean> {\n    if (!response.ok()) {\n      const errorText = await response.text();\n      console.error(\n        `[OnyxApiClient] ${errorMessage}: ${response.status()} - ${errorText}`\n      );\n      return false;\n    }\n    return true;\n  }\n\n  /**\n   * Wait for a resource to be deleted by polling until 404.\n   * Uses Playwright's expect.poll() with automatic retry and exponential backoff.\n   * We poll here because the deletion endpoint is asynchronous (kicks off a celery task)\n   * and we want to wait for it to complete.\n   *\n   * @param endpoint - API endpoint to poll (e.g., \"/manage/document-set/123\")\n   * @param resourceType - Human-readable resource type for error messages (e.g., \"Document set\")\n   * @param resourceId - The resource ID for error messages\n   * @param timeout - Maximum time to wait in milliseconds (default: 30000)\n   * @returns Promise that resolves when resource returns 404, or rejects on timeout\n   */\n  private async waitForDeletion(\n    endpoint: string,\n    resourceType: string,\n    resourceId: number | string,\n    timeout: number = 30000\n  ): Promise<void> {\n    await expect\n      .poll(\n        async () => {\n          const checkResponse = await this.get(endpoint);\n          return checkResponse.status();\n        },\n        {\n          message: `${resourceType} ${resourceId} was not deleted`,\n          timeout,\n        }\n      )\n      .toBe(404);\n  }\n\n  /**\n   * Log an action with consistent formatting.\n   *\n   * @param message - The message to log (will be prefixed with \"[OnyxApiClient]\")\n   */\n  private log(message: string): void {\n    console.log(`[OnyxApiClient] ${message}`);\n  }\n\n  /**\n   * Checks whether the vector database is enabled in this deployment.\n   *\n   * @returns true if vector DB is enabled, false if DISABLE_VECTOR_DB is set\n   */\n  async isVectorDbEnabled(): Promise<boolean> {\n    const response = await this.get(\"/settings\");\n    const data = await this.handleResponse<{ vector_db_enabled: boolean }>(\n      response,\n      \"Failed to fetch settings\"\n    );\n    return data.vector_db_enabled;\n  }\n\n  /**\n   * Creates a simple file connector with mock credentials.\n   * This enables the Knowledge toggle in assistant creation.\n   *\n   * @param connectorName - Name for the connector (defaults to \"Test File Connector\")\n   * @param accessType - Access type for the connector (defaults to \"public\")\n   * @returns The connector-credential pair ID (ccPairId)\n   * @throws Error if the connector creation fails\n   */\n  async createFileConnector(\n    connectorName: string = \"Test File Connector\",\n    accessType: \"public\" | \"private\" = \"public\"\n  ): Promise<number> {\n    const response = await this.post(\n      \"/manage/admin/connector-with-mock-credential\",\n      {\n        name: connectorName,\n        source: \"file\",\n        input_type: \"load_state\",\n        connector_specific_config: {\n          file_locations: [],\n        },\n        refresh_freq: null,\n        prune_freq: null,\n        indexing_start: null,\n        access_type: accessType,\n        groups: [],\n      }\n    );\n\n    const responseData = await this.handleResponse<{ data: number }>(\n      response,\n      \"Failed to create connector\"\n    );\n\n    const ccPairId = responseData.data;\n    this.log(\n      `Created file connector: ${connectorName} (CC Pair ID: ${ccPairId})`\n    );\n\n    // Pause the connector immediately to prevent indexing during tests\n    await this.pauseConnector(ccPairId);\n\n    return ccPairId;\n  }\n\n  /**\n   * Pauses a connector-credential pair to prevent indexing.\n   *\n   * @param ccPairId - The connector-credential pair ID to pause\n   * @throws Error if the pause operation fails\n   */\n  async pauseConnector(ccPairId: number): Promise<void> {\n    const response = await this.put(\n      `/manage/admin/cc-pair/${ccPairId}/status`,\n      {\n        status: \"PAUSED\",\n      }\n    );\n\n    await this.handleResponse(response, \"Failed to pause connector\");\n    this.log(`Paused connector CC Pair ID: ${ccPairId}`);\n  }\n\n  /**\n   * Creates a document set from connector-credential pairs.\n   *\n   * @param documentSetName - Name for the document set\n   * @param ccPairIds - Array of connector-credential pair IDs to include in the set\n   * @returns The document set ID\n   * @throws Error if the document set creation fails\n   */\n  async createDocumentSet(\n    documentSetName: string,\n    ccPairIds: number[]\n  ): Promise<number> {\n    const response = await this.post(\"/manage/admin/document-set\", {\n      name: documentSetName,\n      description: `Test document set: ${documentSetName}`,\n      cc_pair_ids: ccPairIds,\n      is_public: true,\n      users: [],\n      groups: [],\n      federated_connectors: [],\n    });\n\n    const documentSetId = await this.handleResponse<number>(\n      response,\n      \"Failed to create document set\"\n    );\n\n    this.log(`Created document set: ${documentSetName} (ID: ${documentSetId})`);\n    return documentSetId;\n  }\n\n  /**\n   * Deletes a document set and waits for deletion to complete.\n   * Uses polling to verify the deletion was successful (waits for 404 response).\n   *\n   * @param documentSetId - The document set ID to delete\n   * @returns Promise that resolves when deletion is confirmed, or rejects on timeout\n   */\n  async deleteDocumentSet(documentSetId: number): Promise<void> {\n    const response = await this.delete(\n      `/manage/admin/document-set/${documentSetId}`\n    );\n\n    if (\n      !(await this.handleResponseSoft(\n        response,\n        `Failed to delete document set ${documentSetId}`\n      ))\n    ) {\n      return;\n    }\n\n    this.log(`Initiated deletion for document set: ${documentSetId}`);\n    await this.waitForDeletion(\n      `/manage/document-set/${documentSetId}`,\n      \"Document set\",\n      documentSetId\n    );\n    this.log(`Document set ${documentSetId} deletion confirmed`);\n  }\n\n  /**\n   * Deletes a connector-credential pair and waits for deletion to complete.\n   * Fetches the CC pair details to get connector/credential IDs, then initiates deletion\n   * and polls until the deletion is confirmed (waits for 404 response).\n   *\n   * @param ccPairId - The connector-credential pair ID to delete\n   * @returns Promise that resolves when deletion is confirmed, or rejects on timeout\n   */\n  async deleteCCPair(ccPairId: number): Promise<void> {\n    // Get CC pair details to extract connector_id and credential_id\n    const getResponse = await this.get(`/manage/admin/cc-pair/${ccPairId}`);\n\n    if (\n      !(await this.handleResponseSoft(\n        getResponse,\n        `Failed to get CC pair ${ccPairId} details`\n      ))\n    ) {\n      return;\n    }\n\n    const ccPairInfo = await getResponse.json();\n    const {\n      connector: { id: connectorId },\n      credential: { id: credentialId },\n    } = ccPairInfo;\n\n    // Delete using the deletion-attempt endpoint\n    const deleteResponse = await this.post(\"/manage/admin/deletion-attempt\", {\n      connector_id: connectorId,\n      credential_id: credentialId,\n    });\n\n    if (\n      !(await this.handleResponseSoft(\n        deleteResponse,\n        `Failed to delete CC pair ${ccPairId}`\n      ))\n    ) {\n      return;\n    }\n\n    this.log(\n      `Initiated deletion for CC pair: ${ccPairId} (connector: ${connectorId}, credential: ${credentialId})`\n    );\n    await this.waitForDeletion(\n      `/manage/admin/cc-pair/${ccPairId}`,\n      \"CC pair\",\n      ccPairId\n    );\n    this.log(`CC pair ${ccPairId} deletion confirmed`);\n  }\n\n  /**\n   * Creates a restricted LLM provider assigned to a specific user group.\n   *\n   * @param providerName - Name for the provider\n   * @param groupId - The user group ID that should have access to this provider\n   * @returns The provider ID\n   * @throws Error if the provider creation fails\n   */\n  async createRestrictedProvider(\n    providerName: string,\n    groupId: number\n  ): Promise<number> {\n    const response = await this.request.put(\n      `${this.baseUrl}/admin/llm/provider?is_creation=true`,\n      {\n        data: {\n          name: providerName,\n          provider: \"openai\",\n          api_key: E2E_LLM_PROVIDER_API_KEY,\n          default_model_name: \"gpt-4o\",\n          is_public: false,\n          groups: [groupId],\n          personas: [],\n        },\n      }\n    );\n\n    const responseData = await this.handleResponse<{ id: number }>(\n      response,\n      \"Failed to create restricted provider\"\n    );\n\n    this.log(\n      `Created restricted LLM provider: ${providerName} (ID: ${responseData.id}, Group: ${groupId})`\n    );\n    return responseData.id;\n  }\n\n  /**\n   * Lists LLM providers visible to the admin (includes `is_public`).\n   *\n   * @returns Array of LLM providers with id and is_public fields\n   */\n  async listLlmProviders(): Promise<\n    Array<{\n      id: number;\n      is_public?: boolean;\n    }>\n  > {\n    const response = await this.get(\"/admin/llm/provider\");\n    const data = await this.handleResponse<{\n      providers: Array<{ id: number; is_public?: boolean }>;\n    }>(response, \"Failed to list LLM providers\");\n    return data.providers;\n  }\n\n  /**\n   * Ensure at least one public LLM provider exists and is set as default.\n   *\n   * Idempotent — returns `null` if a public provider already exists,\n   * or the new provider ID if one was created.\n   *\n   * @param providerName - Name for the provider (default: \"PW Default Provider\")\n   * @returns The provider ID if one was created, or `null` if already present\n   */\n  async ensurePublicProvider(\n    providerName: string = \"PW Default Provider\"\n  ): Promise<number | null> {\n    const providers = await this.listLlmProviders();\n    const hasPublic = providers.some((p) => p.is_public);\n\n    if (hasPublic) {\n      return null;\n    }\n\n    const defaultModelName = \"gpt-4o\";\n    const response = await this.request.put(\n      `${this.baseUrl}/admin/llm/provider?is_creation=true`,\n      {\n        data: {\n          name: providerName,\n          provider: \"openai\",\n          api_key: E2E_LLM_PROVIDER_API_KEY,\n          is_public: true,\n          groups: [],\n          personas: [],\n          model_configurations: [{ name: defaultModelName, is_visible: true }],\n        },\n      }\n    );\n\n    const responseData = await this.handleResponse<{ id: number }>(\n      response,\n      \"Failed to create public provider\"\n    );\n\n    // Set as default so get_default_llm() works (needed for tokenization, etc.)\n    await this.setProviderAsDefault(responseData.id, defaultModelName);\n\n    this.log(\n      `Created public LLM provider: ${providerName} (ID: ${responseData.id})`\n    );\n    return responseData.id;\n  }\n\n  /**\n   * Sets an LLM provider + model as the default for chat.\n   *\n   * @param providerId - The provider ID to set as default\n   * @param modelName - The model name to set as default\n   */\n  async setProviderAsDefault(\n    providerId: number,\n    modelName: string\n  ): Promise<void> {\n    const response = await this.post(\"/admin/llm/default\", {\n      provider_id: providerId,\n      model_name: modelName,\n    });\n\n    await this.handleResponseSoft(\n      response,\n      `Failed to set provider ${providerId} as default`\n    );\n\n    this.log(`Set LLM provider ${providerId} as default`);\n  }\n\n  /**\n   * Deletes an LLM provider.\n   *\n   * @param providerId - The provider ID to delete\n   */\n  async deleteProvider(\n    providerId: number,\n    { force = false }: { force?: boolean } = {}\n  ): Promise<void> {\n    const query = force ? \"?force=true\" : \"\";\n    const response = await this.delete(\n      `/admin/llm/provider/${providerId}${query}`\n    );\n\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete provider ${providerId}`\n    );\n\n    this.log(`Deleted LLM provider: ${providerId}`);\n  }\n\n  /**\n   * Creates a user group.\n   *\n   * @param groupName - Name for the user group\n   * @param userIds - Optional list of user IDs to add to the group\n   * @param ccPairIds - Optional list of connector-credential pair IDs to associate\n   * @returns The user group ID\n   * @throws Error if the user group creation fails\n   */\n  async createUserGroup(\n    groupName: string,\n    userIds: string[] = [],\n    ccPairIds: number[] = []\n  ): Promise<number> {\n    const response = await this.post(\"/manage/admin/user-group\", {\n      name: groupName,\n      user_ids: userIds,\n      cc_pair_ids: ccPairIds,\n    });\n\n    const responseData = await this.handleResponse<{ id: number }>(\n      response,\n      \"Failed to create user group\"\n    );\n\n    this.log(`Created user group: ${groupName} (ID: ${responseData.id})`);\n    return responseData.id;\n  }\n\n  /**\n   * Polls until a user group has finished syncing (is_up_to_date === true).\n   * Newly created groups start syncing immediately; many mutation endpoints\n   * reject requests while the group is still syncing.\n   */\n  async waitForGroupSync(\n    groupId: number,\n    timeout: number = 30000\n  ): Promise<void> {\n    await expect\n      .poll(\n        async () => {\n          const res = await this.get(\"/manage/admin/user-group\");\n          const groups = await res.json();\n          const group = groups.find(\n            (g: { id: number; is_up_to_date: boolean }) => g.id === groupId\n          );\n          return group?.is_up_to_date ?? false;\n        },\n        {\n          message: `User group ${groupId} did not finish syncing`,\n          timeout,\n        }\n      )\n      .toBe(true);\n    this.log(`User group ${groupId} finished syncing`);\n  }\n\n  /**\n   * Deletes a user group.\n   *\n   * @param groupId - The user group ID to delete\n   */\n  async deleteUserGroup(groupId: number): Promise<void> {\n    const response = await this.delete(`/manage/admin/user-group/${groupId}`);\n\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete user group ${groupId}`\n    );\n\n    this.log(`Deleted user group: ${groupId}`);\n  }\n\n  /**\n   * Lists all user groups.\n   */\n  async getUserGroups(): Promise<\n    Array<{ id: number; name: string; is_default: boolean }>\n  > {\n    const response = await this.get(\n      \"/manage/admin/user-group?include_default=true\"\n    );\n    return response.json();\n  }\n\n  async setUserRole(\n    email: string,\n    role: \"admin\" | \"curator\" | \"global_curator\" | \"basic\",\n    explicitOverride = false\n  ): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/set-user-role`,\n      {\n        data: {\n          user_email: email,\n          new_role: role,\n          explicit_override: explicitOverride,\n        },\n      }\n    );\n    await this.handleResponse(response, `Failed to set user role for ${email}`);\n    this.log(`Updated role for ${email} to ${role}`);\n  }\n\n  async deleteMcpServer(serverId: number): Promise<boolean> {\n    const response = await this.request.delete(\n      `${this.baseUrl}/admin/mcp/server/${serverId}`\n    );\n    const success = await this.handleResponseSoft(\n      response,\n      `Failed to delete MCP server ${serverId}`\n    );\n    if (success) {\n      this.log(`Deleted MCP server ${serverId}`);\n    }\n    return success;\n  }\n\n  async deleteCustomTool(toolId: number): Promise<boolean> {\n    const response = await this.request.delete(\n      `${this.baseUrl}/admin/tool/custom/${toolId}`\n    );\n    const success = await this.handleResponseSoft(\n      response,\n      `Failed to delete custom tool ${toolId}`\n    );\n    if (success) {\n      this.log(`Deleted custom tool ${toolId}`);\n    }\n    return success;\n  }\n\n  async listOpenApiTools(): Promise<\n    Array<{ id: number; name: string; description: string }>\n  > {\n    const response = await this.get(\"/tool/openapi\");\n    return await this.handleResponse(response, \"Failed to list OpenAPI tools\");\n  }\n\n  async findToolByName(\n    name: string\n  ): Promise<{ id: number; name: string; description: string } | null> {\n    const tools = await this.listOpenApiTools();\n    return tools.find((tool) => tool.name === name) ?? null;\n  }\n\n  async deleteAgent(agentId: number): Promise<boolean> {\n    const response = await this.request.delete(\n      `${this.baseUrl}/persona/${agentId}`\n    );\n    const success = await this.handleResponseSoft(\n      response,\n      `Failed to delete assistant ${agentId}`\n    );\n    if (success) {\n      this.log(`Deleted assistant ${agentId}`);\n    }\n    return success;\n  }\n\n  async getAssistant(agentId: number): Promise<{\n    id: number;\n    is_public: boolean;\n    users: Array<{ id: string }>;\n    groups: number[];\n    tools: Array<{ id: number; mcp_server_id?: number | null }>;\n  }> {\n    const response = await this.get(`/persona/${agentId}`);\n    return await this.handleResponse(\n      response,\n      `Failed to fetch assistant ${agentId}`\n    );\n  }\n\n  async updateAgentSharing(\n    agentId: number,\n    options: {\n      userIds?: string[];\n      groupIds?: number[];\n      isPublic?: boolean;\n      labelIds?: number[];\n    }\n  ): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/persona/${agentId}/share`,\n      {\n        data: {\n          user_ids: options.userIds,\n          group_ids: options.groupIds,\n          is_public: options.isPublic,\n          label_ids: options.labelIds,\n        },\n      }\n    );\n    await this.handleResponse(\n      response,\n      `Failed to update sharing for assistant ${agentId}`\n    );\n    this.log(\n      `Updated assistant sharing: ${agentId} (is_public=${String(\n        options.isPublic\n      )})`\n    );\n  }\n\n  async listMcpServers(): Promise<any[]> {\n    const response = await this.get(`/admin/mcp/servers`);\n    const data = await this.handleResponse<{ mcp_servers: any[] }>(\n      response,\n      \"Failed to list MCP servers\"\n    );\n    return data.mcp_servers;\n  }\n\n  async listAgents(options?: {\n    includeDeleted?: boolean;\n    getEditable?: boolean;\n  }): Promise<any[]> {\n    const params = new URLSearchParams();\n    if (options?.includeDeleted) {\n      params.set(\"include_deleted\", \"true\");\n    }\n    if (options?.getEditable ?? true) {\n      params.set(\"get_editable\", \"true\");\n    }\n    const query = params.toString();\n    const response = await this.get(\n      `/admin/persona${query ? `?${query}` : \"\"}`\n    );\n    return await this.handleResponse<any[]>(\n      response,\n      \"Failed to list assistants\"\n    );\n  }\n\n  async findAgentByName(\n    name: string,\n    options?: { includeDeleted?: boolean; getEditable?: boolean }\n  ): Promise<any | null> {\n    const assistants = await this.listAgents(options);\n    return assistants.find((assistant) => assistant.name === name) ?? null;\n  }\n\n  async registerUser(email: string, password: string): Promise<{ id: string }> {\n    const response = await this.request.post(`${this.baseUrl}/auth/register`, {\n      data: {\n        email,\n        username: email,\n        password,\n      },\n    });\n    const data = await this.handleResponse<{ id: string }>(\n      response,\n      `Failed to register user ${email}`\n    );\n    return data;\n  }\n\n  async getUserByEmail(email: string): Promise<{\n    id: string;\n    email: string;\n    role: string;\n  } | null> {\n    const response = await this.request.get(\n      `${this.baseUrl}/manage/users/accepted`,\n      {\n        params: {\n          q: email,\n          page_size: 1,\n        },\n      }\n    );\n    const data = await this.handleResponse<{ items: any[] }>(\n      response,\n      `Failed to fetch user ${email}`\n    );\n    const [user] = data.items;\n    return user\n      ? {\n          id: user.id,\n          email: user.email,\n          role: user.role,\n        }\n      : null;\n  }\n\n  async setCuratorStatus(\n    userGroupId: string,\n    userId: string,\n    isCurator: boolean = true\n  ): Promise<void> {\n    const response = await this.request.post(\n      `${this.baseUrl}/manage/admin/user-group/${userGroupId}/set-curator`,\n      {\n        data: {\n          user_id: userId,\n          is_curator: isCurator,\n        },\n      }\n    );\n    await this.handleResponse(\n      response,\n      `Failed to update curator status for ${userId}`\n    );\n  }\n\n  /**\n   * Create and activate a web search provider for testing.\n   * Uses env-backed keys when available and falls back to a placeholder key.\n   *\n   * @param providerType - Type of provider: \"exa\", \"brave\", \"serper\", \"google_pse\", \"searxng\"\n   * @param name - Optional name for the provider (defaults to \"Test Provider\")\n   * @returns The created provider ID\n   */\n  async createWebSearchProvider(\n    providerType: \"exa\" | \"brave\" | \"serper\" | \"google_pse\" | \"searxng\" = \"exa\",\n    name: string = \"Test Provider\"\n  ): Promise<number> {\n    const config: Record<string, string> = {};\n    if (providerType === \"google_pse\") {\n      config.search_engine_id = \"test-engine-id\";\n    }\n    if (providerType === \"searxng\") {\n      config.searxng_base_url = \"https://test-searxng.example.com\";\n    }\n\n    const response = await this.post(\"/admin/web-search/search-providers\", {\n      name,\n      provider_type: providerType,\n      api_key: E2E_WEB_SEARCH_API_KEY,\n      api_key_changed: true,\n      config: Object.keys(config).length > 0 ? config : undefined,\n      activate: true,\n    });\n\n    const data = await this.handleResponse<{ id: number }>(\n      response,\n      `Failed to create web search provider ${providerType}`\n    );\n    return data.id;\n  }\n\n  /**\n   * Delete a web search provider.\n   *\n   * @param providerId - ID of the provider to delete\n   */\n  async deleteWebSearchProvider(providerId: number): Promise<void> {\n    const response = await this.delete(\n      `/admin/web-search/search-providers/${providerId}`\n    );\n    if (!response.ok()) {\n      const errorText = await response.text();\n      console.warn(\n        `Failed to delete web search provider ${providerId}: ${response.status()} - ${errorText}`\n      );\n    }\n  }\n\n  /**\n   * Creates an image generation configuration for testing.\n   * This enables the image generation tool in assistants.\n   *\n   * API: POST /api/admin/image-generation/config\n   * Schema (ImageGenerationConfigCreate):\n   *   - image_provider_id: string (required) - unique key\n   *   - model_name: string (required) - e.g., \"dall-e-3\"\n   *   - provider: string - e.g., \"openai\"\n   *   - api_key: string\n   *   - is_default: boolean\n   *\n   * @param imageProviderId - Unique identifier for the image generation config\n   * @param modelName - Model name (defaults to \"dall-e-3\")\n   * @param provider - Provider name (defaults to \"openai\")\n   * @param isDefault - Whether this should be the default config (defaults to true)\n   * @returns The image_provider_id\n   */\n  async createImageGenerationConfig(\n    imageProviderId: string,\n    modelName: string = \"dall-e-3\",\n    provider: string = \"openai\",\n    isDefault: boolean = true\n  ): Promise<string> {\n    const response = await this.post(\"/admin/image-generation/config\", {\n      image_provider_id: imageProviderId,\n      model_name: modelName,\n      provider: provider,\n      api_key: E2E_IMAGE_GEN_API_KEY,\n      is_default: isDefault,\n    });\n\n    await this.handleResponse(\n      response,\n      \"Failed to create image generation config\"\n    );\n\n    this.log(`Created image generation config: ${imageProviderId}`);\n    return imageProviderId;\n  }\n\n  /**\n   * Deletes an image generation configuration.\n   *\n   * @param imageProviderId - The image_provider_id to delete\n   */\n  async deleteImageGenerationConfig(imageProviderId: string): Promise<void> {\n    const response = await this.delete(\n      `/admin/image-generation/config/${imageProviderId}`\n    );\n\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete image generation config ${imageProviderId}`\n    );\n\n    this.log(`Deleted image generation config: ${imageProviderId}`);\n  }\n\n  // === Discord Bot Methods ===\n\n  /**\n   * Creates a Discord guild configuration.\n   * Returns the guild config with registration key (shown once).\n   *\n   * @returns The created guild config with id and registration_key\n   */\n  async createDiscordGuild(): Promise<{\n    id: number;\n    registration_key: string;\n    guild_name: string | null;\n  }> {\n    const response = await this.post(\"/manage/admin/discord-bot/guilds\");\n\n    const guild = await this.handleResponse<{\n      id: number;\n      registration_key: string;\n      guild_name: string | null;\n    }>(response, \"Failed to create Discord guild config\");\n\n    this.log(\n      `Created Discord guild config: id=${guild.id}, registration_key=${guild.registration_key}`\n    );\n    return guild;\n  }\n\n  /**\n   * Lists all Discord guild configurations.\n   *\n   * @returns Array of guild configs\n   */\n  async listDiscordGuilds(): Promise<\n    Array<{\n      id: number;\n      guild_id: string | null;\n      guild_name: string | null;\n      enabled: boolean;\n    }>\n  > {\n    const response = await this.get(\"/manage/admin/discord-bot/guilds\");\n    return await this.handleResponse(response, \"Failed to list Discord guilds\");\n  }\n\n  /**\n   * Gets a specific Discord guild configuration.\n   *\n   * @param guildId - The internal guild config ID\n   * @returns The guild config or null if not found\n   */\n  async getDiscordGuild(guildId: number): Promise<{\n    id: number;\n    guild_id: string | null;\n    guild_name: string | null;\n    enabled: boolean;\n    default_persona_id: number | null;\n  } | null> {\n    const response = await this.get(\n      `/manage/admin/discord-bot/guilds/${guildId}`\n    );\n    if (response.status() === 404) {\n      return null;\n    }\n    return await this.handleResponse(\n      response,\n      `Failed to get Discord guild ${guildId}`\n    );\n  }\n\n  /**\n   * Updates a Discord guild configuration.\n   *\n   * @param guildId - The internal guild config ID\n   * @param updates - The fields to update\n   * @returns The updated guild config\n   */\n  async updateDiscordGuild(\n    guildId: number,\n    updates: { enabled?: boolean; default_persona_id?: number | null }\n  ): Promise<{\n    id: number;\n    guild_id: string | null;\n    guild_name: string | null;\n    enabled: boolean;\n  }> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/admin/discord-bot/guilds/${guildId}`,\n      { data: updates }\n    );\n    return await this.handleResponse(\n      response,\n      `Failed to update Discord guild ${guildId}`\n    );\n  }\n\n  /**\n   * Deletes a Discord guild configuration.\n   *\n   * @param guildId - The internal guild config ID\n   */\n  async deleteDiscordGuild(guildId: number): Promise<void> {\n    const response = await this.delete(\n      `/manage/admin/discord-bot/guilds/${guildId}`\n    );\n\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete Discord guild ${guildId}`\n    );\n\n    this.log(`Deleted Discord guild config: ${guildId}`);\n  }\n\n  /**\n   * Lists channels for a Discord guild configuration.\n   *\n   * @param guildConfigId - The internal guild config ID\n   * @returns Array of channel configs\n   */\n  async listDiscordChannels(guildConfigId: number): Promise<\n    Array<{\n      id: number;\n      channel_id: string;\n      channel_name: string;\n      channel_type: string;\n      enabled: boolean;\n    }>\n  > {\n    const response = await this.get(\n      `/manage/admin/discord-bot/guilds/${guildConfigId}/channels`\n    );\n    return await this.handleResponse(\n      response,\n      `Failed to list channels for guild ${guildConfigId}`\n    );\n  }\n\n  /**\n   * Updates a Discord channel configuration.\n   *\n   * @param guildConfigId - The internal guild config ID\n   * @param channelConfigId - The internal channel config ID\n   * @param updates - The fields to update\n   * @returns The updated channel config\n   */\n  async updateDiscordChannel(\n    guildConfigId: number,\n    channelConfigId: number,\n    updates: {\n      enabled?: boolean;\n      thread_only_mode?: boolean;\n      require_bot_invocation?: boolean;\n      persona_override_id?: number | null;\n    }\n  ): Promise<{\n    id: number;\n    channel_id: string;\n    channel_name: string;\n    enabled: boolean;\n  }> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/admin/discord-bot/guilds/${guildConfigId}/channels/${channelConfigId}`,\n      { data: updates }\n    );\n    return await this.handleResponse(\n      response,\n      `Failed to update channel ${channelConfigId}`\n    );\n  }\n\n  // === User Management Methods ===\n\n  async deactivateUser(email: string): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/admin/deactivate-user`,\n      { data: { user_email: email } }\n    );\n    await this.handleResponse(response, `Failed to deactivate user ${email}`);\n    this.log(`Deactivated user: ${email}`);\n  }\n\n  async activateUser(email: string): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/admin/activate-user`,\n      { data: { user_email: email } }\n    );\n    await this.handleResponse(response, `Failed to activate user ${email}`);\n    this.log(`Activated user: ${email}`);\n  }\n\n  async deleteUser(email: string): Promise<void> {\n    const response = await this.request.delete(\n      `${this.baseUrl}/manage/admin/delete-user`,\n      { data: { user_email: email } }\n    );\n    await this.handleResponse(response, `Failed to delete user ${email}`);\n    this.log(`Deleted user: ${email}`);\n  }\n\n  async cancelInvite(email: string): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/manage/admin/remove-invited-user`,\n      { data: { user_email: email } }\n    );\n    await this.handleResponse(response, `Failed to cancel invite for ${email}`);\n    this.log(`Cancelled invite for: ${email}`);\n  }\n\n  async inviteUsers(emails: string[]): Promise<void> {\n    const response = await this.put(\"/manage/admin/users\", { emails });\n    await this.handleResponse(response, `Failed to invite users`);\n    this.log(`Invited users: ${emails.join(\", \")}`);\n  }\n\n  async setPersonalName(name: string): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/user/personalization`,\n      { data: { name } }\n    );\n    await this.handleResponse(\n      response,\n      `Failed to set personal name to ${name}`\n    );\n    this.log(`Set personal name: ${name}`);\n  }\n\n  // === Chat Session Methods ===\n\n  /**\n   * Creates a chat session with a specific description.\n   *\n   * @param description - The description/title for the chat session\n   * @param personaId - The persona/assistant ID to use (defaults to 0)\n   * @returns The chat session ID\n   * @throws Error if the chat session creation fails\n   */\n  async createChatSession(\n    description: string,\n    personaId: number = 0\n  ): Promise<string> {\n    const response = await this.post(\"/chat/create-chat-session\", {\n      persona_id: personaId,\n      description,\n    });\n    const data = await this.handleResponse<{ chat_session_id: string }>(\n      response,\n      \"Failed to create chat session\"\n    );\n    this.log(\n      `Created chat session: ${description} (ID: ${data.chat_session_id})`\n    );\n    return data.chat_session_id;\n  }\n\n  /**\n   * Deletes a chat session.\n   *\n   * @param chatId - The chat session ID to delete\n   */\n  async deleteChatSession(chatId: string): Promise<void> {\n    const response = await this.delete(`/chat/delete-chat-session/${chatId}`);\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete chat session ${chatId}`\n    );\n    this.log(`Deleted chat session: ${chatId}`);\n  }\n\n  // === Project Methods ===\n\n  /**\n   * Creates a project with a specific name.\n   *\n   * @param name - The name for the project\n   * @returns The project ID\n   * @throws Error if the project creation fails\n   */\n  async createProject(name: string): Promise<number> {\n    const response = await this.post(\n      `/user/projects/create?name=${encodeURIComponent(name)}`\n    );\n    const data = await this.handleResponse<{ id: number }>(\n      response,\n      \"Failed to create project\"\n    );\n    this.log(`Created project: ${name} (ID: ${data.id})`);\n    return data.id;\n  }\n\n  /**\n   * Deletes a project.\n   *\n   * @param projectId - The project ID to delete\n   */\n  async deleteProject(projectId: number): Promise<void> {\n    const response = await this.delete(`/user/projects/${projectId}`);\n    await this.handleResponseSoft(\n      response,\n      `Failed to delete project ${projectId}`\n    );\n    this.log(`Deleted project: ${projectId}`);\n  }\n\n  /**\n   * Sets the current user's default app mode preference.\n   *\n   * @param mode - The default mode to persist (\"CHAT\" or \"SEARCH\")\n   */\n  async setDefaultAppMode(mode: \"CHAT\" | \"SEARCH\"): Promise<void> {\n    const response = await this.request.patch(\n      `${this.baseUrl}/user/default-app-mode`,\n      {\n        data: { default_app_mode: mode },\n      }\n    );\n    await this.handleResponse(\n      response,\n      `Failed to set default app mode to ${mode}`\n    );\n    this.log(`Set default app mode: ${mode}`);\n  }\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/pageStateLogger.ts",
    "content": "import { Page } from \"@playwright/test\";\n\nconst RUN_START_MS = Date.now();\n\n/**\n * Captures a lightweight snapshot of the current page state to aid debugging of flaky flows.\n * Limits the amount of text collected so logs remain readable in CI.\n */\nexport async function logPageState(\n  page: Page,\n  context: string,\n  logTag = \"[e2e-page-debug]\"\n) {\n  const sinceStartMs = Date.now() - RUN_START_MS;\n  const snapshot: Record<string, unknown> = {\n    context,\n    timestamp: new Date().toISOString(),\n    elapsedMs: sinceStartMs,\n    elapsedSeconds: Number((sinceStartMs / 1000).toFixed(3)),\n  };\n\n  if (page.isClosed()) {\n    snapshot.url = \"<page-closed>\";\n    snapshot.title = \"<unavailable>\";\n    snapshot.readyState = \"<page-closed>\";\n    snapshot.bodySnippet = \"<unavailable>\";\n    snapshot.visibleButtons = \"<unavailable>\";\n    snapshot.visibleInputs = \"<unavailable>\";\n    snapshot.note = \"page was already closed before dump\";\n    console.log(`${logTag} ${JSON.stringify(snapshot)}`);\n    return;\n  }\n\n  snapshot.url = page.url();\n\n  try {\n    snapshot.title = await page.title();\n  } catch {\n    snapshot.title = \"<unavailable>\";\n  }\n\n  try {\n    snapshot.readyState = await page.evaluate(\n      () => document.readyState ?? \"<unknown>\"\n    );\n  } catch {\n    snapshot.readyState = \"<unknown>\";\n  }\n\n  try {\n    const bodyText = await page.evaluate(() => document.body?.innerText ?? \"\");\n    snapshot.bodySnippet = bodyText.trim().replace(/\\s+/g, \" \").slice(0, 500);\n  } catch {\n    snapshot.bodySnippet = \"<unavailable>\";\n  }\n\n  try {\n    snapshot.visibleButtons = await page.evaluate(() =>\n      Array.from(document.querySelectorAll(\"button\"))\n        .slice(0, 5)\n        .map((btn) => ({\n          text: btn.innerText,\n          disabled: (btn as HTMLButtonElement).disabled,\n          dataTestId: btn.getAttribute(\"data-testid\"),\n        }))\n    );\n  } catch {\n    snapshot.visibleButtons = \"<unavailable>\";\n  }\n\n  try {\n    snapshot.visibleInputs = await page.evaluate(() =>\n      Array.from(\n        document.querySelectorAll<HTMLInputElement | HTMLTextAreaElement>(\n          \"input, textarea\"\n        )\n      )\n        .slice(0, 5)\n        .map((input) => ({\n          name: input.name,\n          type: input instanceof HTMLInputElement ? input.type : \"textarea\",\n          value: input.value,\n          dataTestId: input.getAttribute(\"data-testid\"),\n        }))\n    );\n  } catch {\n    snapshot.visibleInputs = \"<unavailable>\";\n  }\n\n  console.log(`${logTag} ${JSON.stringify(snapshot)}`);\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/theme.ts",
    "content": "import type { Page } from \"@playwright/test\";\n\nexport const THEMES = [\"light\", \"dark\"] as const;\nexport type Theme = (typeof THEMES)[number];\n\n/**\n * Injects the given theme into localStorage via `addInitScript` so that\n * `next-themes` applies it on first render. Call this in `beforeEach`\n * **before** any `page.goto()`.\n */\nexport async function setThemeBeforeNavigation(\n  page: Page,\n  theme: Theme\n): Promise<void> {\n  await page.addInitScript((t: string) => {\n    localStorage.setItem(\"theme\", t);\n  }, theme);\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/tools.ts",
    "content": "// Shared test utilities for tool/action management and greetings\n\nimport { Page } from \"@playwright/test\";\n\nexport const TOOL_IDS = {\n  actionToggle: '[data-testid=\"action-management-toggle\"]',\n  options: '[data-testid=\"tool-options\"]',\n  // These IDs are derived from tool.name in the app\n  searchOption: '[data-testid=\"tool-option-internal_search\"]',\n  webSearchOption: '[data-testid=\"tool-option-web_search\"]',\n  imageGenerationOption: '[data-testid=\"tool-option-generate_image\"]',\n  // Generic toggle selector used inside tool options\n  toggleInput: 'input[type=\"checkbox\"], input[type=\"radio\"], [role=\"switch\"]',\n} as const;\n\nexport { GREETING_MESSAGES } from \"../../../src/lib/chat/greetingMessages\";\n\n// Wait for the unified assistant greeting and return its text\nexport async function waitForUnifiedGreeting(page: Page): Promise<string> {\n  const el = await page.waitForSelector('[data-testid=\"onyx-logo\"]', {\n    timeout: 5000,\n  });\n  const text = (await el.textContent())?.trim() || \"\";\n  return text;\n}\n\n// Ensure the Action Management popover is open\nexport async function openActionManagement(page: Page): Promise<void> {\n  const actionToggle = page.locator(TOOL_IDS.actionToggle);\n  await actionToggle.waitFor();\n  await actionToggle.click();\n  await page.locator(TOOL_IDS.options).waitFor();\n}\n\n// Check presence of the Action Management toggle\nexport async function isActionTogglePresent(page: Page): Promise<boolean> {\n  const el = await page.$(TOOL_IDS.actionToggle);\n  return !!el;\n}\n\n/**\n * Click the disable/enable (slash) button on a tool line item.\n * The button is hidden until hover; we hover first, then force-click\n * using aria-label which matches the button's current state.\n */\nexport async function toggleToolDisabled(\n  page: Page,\n  toolSelector: string\n): Promise<void> {\n  const toolOption = page.locator(toolSelector);\n  await toolOption.hover();\n  const slashButton = toolOption.locator(\n    'button[aria-label=\"Disable\"], button[aria-label=\"Enable\"]'\n  );\n  await slashButton.first().click({ force: true });\n}\n\n/**\n * Open the source management secondary view for the internal search tool.\n * Assumes the ActionsPopover is already open.\n */\nexport async function openSourceManagement(page: Page): Promise<void> {\n  const searchOption = page.locator(TOOL_IDS.searchOption);\n  await searchOption\n    .locator('button[aria-label=\"Configure Connectors\"]')\n    .click();\n  // Wait for the source list Back button (indicates secondary view is open)\n  await page.locator('button[aria-label=\"Back\"]').waitFor({ timeout: 5000 });\n}\n\n/**\n * Get a source toggle Switch in the source management view by display name.\n */\nexport function getSourceToggle(page: Page, sourceName: string) {\n  return page.locator(`[aria-label=\"Toggle ${sourceName}\"]`);\n}\n"
  },
  {
    "path": "web/tests/e2e/utils/visualRegression.ts",
    "content": "import type { Locator, Page, PageScreenshotOptions } from \"@playwright/test\";\nimport { expect } from \"@playwright/test\";\n\n/**\n * Whether visual regression assertions are enabled.\n *\n * When `VISUAL_REGRESSION=true` is set, `expectScreenshot()` calls\n * `toHaveScreenshot()` which will fail if the screenshot differs from the\n * stored baseline.\n *\n * When disabled (the default), screenshots are still captured and saved but\n * mismatches do NOT fail the test — this lets CI collect screenshots for later\n * review without gating on them.\n */\nconst VISUAL_REGRESSION_ENABLED =\n  process.env.VISUAL_REGRESSION?.toLowerCase() === \"true\";\n\n/**\n * Default selectors to mask across all screenshots so that dynamic content\n * (timestamps, avatars, etc.) doesn't cause spurious diffs.\n */\nconst DEFAULT_MASK_SELECTORS: string[] = [\n  // Add selectors for dynamic content that should be masked, e.g.:\n  // '[data-testid=\"timestamp\"]',\n  // '[data-testid=\"user-avatar\"]',\n];\n\n/**\n * Default selectors to hide (visibility: hidden) across all screenshots.\n * These elements are overlays or ephemeral UI that would cause spurious diffs.\n */\nconst DEFAULT_HIDE_SELECTORS: string[] = [\n  '[data-testid=\"toast-container\"]',\n  // TODO: Remove once it loads consistently.\n  '[data-testid=\"actions-container\"]',\n];\n\ninterface ScreenshotOptions {\n  /**\n   * Name for the screenshot file. If omitted, Playwright auto-generates one\n   * from the test title.\n   */\n  name?: string;\n\n  /**\n   * Additional CSS selectors to mask (on top of the defaults).\n   * Masked areas are replaced with a pink box so they don't cause diffs.\n   */\n  mask?: string[];\n\n  /**\n   * CSS selectors for elements to hide (visibility: hidden) before taking\n   * the screenshot. This removes elements from the visual output while\n   * preserving their layout space, preventing size-related inconsistencies.\n   */\n  hide?: string[];\n\n  /**\n   * If true, capture the full scrollable page instead of just the viewport.\n   * Defaults to false.\n   */\n  fullPage?: boolean;\n\n  /**\n   * Override the max diff pixel ratio for this specific screenshot.\n   */\n  maxDiffPixelRatio?: number;\n\n  /**\n   * Override the per-channel threshold for this specific screenshot.\n   */\n  threshold?: number;\n\n  /**\n   * Additional Playwright screenshot options.\n   */\n  screenshotOptions?: PageScreenshotOptions;\n}\n\ninterface ElementScreenshotOptions {\n  /**\n   * Name for the screenshot file. If omitted, Playwright auto-generates one\n   * from the test title.\n   */\n  name?: string;\n\n  /**\n   * Additional CSS selectors to mask (on top of the defaults).\n   * The selectors are resolved relative to the page the locator belongs to.\n   */\n  mask?: string[];\n\n  /**\n   * CSS selectors for elements to hide (visibility: hidden) before taking\n   * the screenshot. This removes elements from the visual output while\n   * preserving their layout space, preventing size-related inconsistencies.\n   */\n  hide?: string[];\n\n  /**\n   * Override the max diff pixel ratio for this specific screenshot.\n   */\n  maxDiffPixelRatio?: number;\n\n  /**\n   * Override the per-channel threshold for this specific screenshot.\n   */\n  threshold?: number;\n}\n\n/**\n * Wait for all running CSS animations and transitions on the page to finish\n * before proceeding.  This prevents screenshot tests from being non-deterministic\n * when animated elements (e.g. slide-in cards) are still mid-flight.\n *\n * The implementation:\n *   1. Yields one animation frame so that any pending animations have a chance\n *      to register with the Web Animations API.\n *   2. Calls `Promise.allSettled` on every active animation's `.finished`\n *      promise so we wait for completion (or cancellation) of all of them.\n */\nexport async function waitForAnimations(page: Page): Promise<void> {\n  await page.evaluate(async () => {\n    // Allow any freshly-scheduled animations to start\n    await new Promise<void>((resolve) =>\n      requestAnimationFrame(() => resolve())\n    );\n    // Wait for every currently-registered animation to finish (or be cancelled)\n    const animations = document\n      .getAnimations()\n      .filter(\n        (animation) => animation.effect?.getTiming().iterations !== Infinity\n      );\n    await Promise.allSettled(animations.map((animation) => animation.finished));\n  });\n}\n\n/**\n * Wait for every **visible** `<img>` on the page to finish loading (or error).\n *\n * This prevents screenshot flakiness caused by images that have been added to\n * the DOM but haven't been decoded yet — `networkidle` only guarantees that\n * fewer than 2 connections are in flight, not that every image is painted.\n *\n * Only images that are actually visible and in (or near) the viewport are\n * waited on. Hidden images (e.g. the `dark:hidden` / `hidden dark:block`\n * alternates created by `createLogoIcon`) and offscreen lazy-loaded images\n * are skipped so they don't force a needless timeout.\n *\n * Times out after `timeoutMs` (default 5 000 ms) so a single broken image\n * doesn't block the entire test forever.\n */\nexport async function waitForImages(\n  page: Page,\n  timeoutMs: number = 5_000\n): Promise<void> {\n  await page.evaluate(async (timeout) => {\n    const images = Array.from(document.querySelectorAll(\"img\")).filter(\n      (img) => {\n        // Skip images hidden via CSS (display:none, visibility:hidden, etc.)\n        // This covers createLogoIcon's dark-mode alternates.\n        const style = getComputedStyle(img);\n        if (\n          style.display === \"none\" ||\n          style.visibility === \"hidden\" ||\n          style.opacity === \"0\"\n        ) {\n          return false;\n        }\n\n        // Skip images that have no layout box (zero size or detached).\n        const rect = img.getBoundingClientRect();\n        if (rect.width === 0 && rect.height === 0) return false;\n\n        // Skip images far below the viewport (lazy-loaded, not yet needed).\n        if (rect.top > window.innerHeight * 2) return false;\n\n        return true;\n      }\n    );\n\n    await Promise.race([\n      Promise.allSettled(\n        images.map((img) => {\n          if (img.complete) return Promise.resolve();\n          return new Promise<void>((resolve) => {\n            img.addEventListener(\"load\", () => resolve(), { once: true });\n            img.addEventListener(\"error\", () => resolve(), { once: true });\n          });\n        })\n      ),\n      new Promise<void>((resolve) => setTimeout(resolve, timeout)),\n    ]);\n  }, timeoutMs);\n}\n\n/**\n * Take a screenshot and optionally assert it matches the stored baseline.\n *\n * Behavior depends on the `VISUAL_REGRESSION` environment variable:\n * - `VISUAL_REGRESSION=true`  → assert via `toHaveScreenshot()` (fails on diff)\n * - Otherwise                 → capture and save the screenshot for review only\n *\n * Usage:\n * ```ts\n * import { expectScreenshot } from \"@tests/e2e/utils/visualRegression\";\n *\n * test(\"admin page looks right\", async ({ page }) => {\n *   await page.goto(\"/admin/settings\");\n *   await expectScreenshot(page, { name: \"admin-settings\" });\n * });\n * ```\n */\nexport async function expectScreenshot(\n  page: Page,\n  options: ScreenshotOptions = {}\n): Promise<void> {\n  const {\n    name,\n    mask = [],\n    hide = [],\n    fullPage = false,\n    maxDiffPixelRatio,\n    threshold,\n  } = options;\n\n  // Merge default hide selectors with per-call selectors\n  const allHideSelectors = [...DEFAULT_HIDE_SELECTORS, ...hide];\n\n  // Hide elements by setting visibility: hidden\n  let styleHandle;\n  if (allHideSelectors.length > 0) {\n    styleHandle = await page.addStyleTag({\n      content: allHideSelectors\n        .map(\n          (selector) =>\n            `${selector} { visibility: hidden !important; opacity: 0 !important; pointer-events: none !important; }`\n        )\n        .join(\"\\n\"),\n    });\n  }\n\n  try {\n    // Combine default masks with per-call masks\n    const allMaskSelectors = [...DEFAULT_MASK_SELECTORS, ...mask];\n    const maskLocators = allMaskSelectors.map((selector) =>\n      page.locator(selector)\n    );\n\n    // Wait for images to finish loading / decoding so that logo icons\n    // and other <img> elements are fully painted before the screenshot.\n    await waitForImages(page);\n\n    // Wait for any in-flight CSS animations / transitions to settle so that\n    // screenshots are deterministic (e.g. slide-in card animations on the\n    // onboarding flow).\n    await waitForAnimations(page);\n\n    // Build the screenshot name array (Playwright expects string[])\n    const nameArg = name ? [name + \".png\"] : undefined;\n\n    if (VISUAL_REGRESSION_ENABLED) {\n      // Assert mode — fail the test if the screenshot differs from baseline\n      const screenshotOpts = {\n        fullPage,\n        mask: maskLocators.length > 0 ? maskLocators : undefined,\n        ...(maxDiffPixelRatio !== undefined && { maxDiffPixelRatio }),\n        ...(threshold !== undefined && { threshold }),\n      };\n\n      if (nameArg) {\n        await expect(page).toHaveScreenshot(nameArg, screenshotOpts);\n      } else {\n        await expect(page).toHaveScreenshot(screenshotOpts);\n      }\n    } else {\n      // Capture-only mode — save the screenshot without asserting\n      const screenshotPath = name\n        ? `output/screenshots/${name}.png`\n        : undefined;\n      await page.screenshot({\n        path: screenshotPath,\n        fullPage,\n        mask: maskLocators.length > 0 ? maskLocators : undefined,\n        ...options.screenshotOptions,\n      });\n    }\n  } finally {\n    // Remove the injected style tag to avoid affecting subsequent screenshots/assertions\n    if (styleHandle) {\n      await styleHandle.evaluate((el: HTMLStyleElement) => el.remove());\n    }\n  }\n}\n\n/**\n * Take a screenshot of a specific element and optionally assert it matches\n * the stored baseline.\n *\n * Works like {@link expectScreenshot} but scopes the screenshot to a single\n * `Locator` instead of the full page.\n *\n * Usage:\n * ```ts\n * import { expectElementScreenshot } from \"@tests/e2e/utils/visualRegression\";\n *\n * test(\"sidebar looks right\", async ({ page }) => {\n *   await page.goto(\"/app\");\n *   const sidebar = page.getByTestId(\"AppSidebar/new-session\");\n *   await expectElementScreenshot(sidebar, { name: \"sidebar-new-session\" });\n * });\n * ```\n */\nexport async function expectElementScreenshot(\n  locator: Locator,\n  options: ElementScreenshotOptions = {}\n): Promise<void> {\n  const { name, mask = [], hide = [], maxDiffPixelRatio, threshold } = options;\n\n  const page = locator.page();\n\n  // Merge default hide selectors with per-call selectors\n  const allHideSelectors = [...DEFAULT_HIDE_SELECTORS, ...hide];\n\n  // Hide elements by setting visibility: hidden\n  let styleHandle;\n  if (allHideSelectors.length > 0) {\n    styleHandle = await page.addStyleTag({\n      content: allHideSelectors\n        .map(\n          (selector) =>\n            `${selector} { visibility: hidden !important; opacity: 0 !important; pointer-events: none !important; }`\n        )\n        .join(\"\\n\"),\n    });\n  }\n\n  try {\n    // Combine default masks with per-call masks\n    const allMaskSelectors = [...DEFAULT_MASK_SELECTORS, ...mask];\n    const maskLocators = allMaskSelectors.map((selector) =>\n      page.locator(selector)\n    );\n\n    // Wait for images to finish loading / decoding.\n    await waitForImages(page);\n\n    // Wait for any in-flight CSS animations / transitions to settle so that\n    // element screenshots are deterministic (same reasoning as expectScreenshot).\n    await waitForAnimations(page);\n\n    // Build the screenshot name array (Playwright expects string[])\n    const nameArg = name ? [name + \".png\"] : undefined;\n\n    if (VISUAL_REGRESSION_ENABLED) {\n      const screenshotOpts = {\n        mask: maskLocators.length > 0 ? maskLocators : undefined,\n        ...(maxDiffPixelRatio !== undefined && { maxDiffPixelRatio }),\n        ...(threshold !== undefined && { threshold }),\n      };\n\n      if (nameArg) {\n        await expect(locator).toHaveScreenshot(nameArg, screenshotOpts);\n      } else {\n        await expect(locator).toHaveScreenshot(screenshotOpts);\n      }\n    } else {\n      // Capture-only mode — save the screenshot without asserting\n      const screenshotPath = name\n        ? `output/screenshots/${name}.png`\n        : undefined;\n      await locator.screenshot({\n        path: screenshotPath,\n        mask: maskLocators.length > 0 ? maskLocators : undefined,\n      });\n    }\n  } finally {\n    // Remove the injected style tag to avoid affecting subsequent screenshots/assertions\n    if (styleHandle) {\n      await styleHandle.evaluate((el: HTMLStyleElement) => el.remove());\n    }\n  }\n}\n"
  },
  {
    "path": "web/tests/setup/fileMock.js",
    "content": "// Mock for static image imports used by Next.js Image component\n// Returns an object compatible with Next.js static import structure\nmodule.exports = {\n  src: \"/test-image.png\",\n  height: 100,\n  width: 100,\n  blurDataURL: \"data:image/png;base64,test\",\n};\n"
  },
  {
    "path": "web/tests/setup/jest.setup.ts",
    "content": "import \"@testing-library/jest-dom\";\nimport { TextEncoder, TextDecoder } from \"util\";\n\n// Tell React 18+ this is a test environment where act() is available\n// This suppresses \"not configured to support act(...)\" warnings\n// @ts-ignore\nglobalThis.IS_REACT_ACT_ENVIRONMENT = true;\n\n// Polyfill TextEncoder/TextDecoder (required for some libraries)\nglobal.TextEncoder = TextEncoder as any;\nglobal.TextDecoder = TextDecoder as any;\n\n// Only set up browser-specific mocks if we're in a jsdom environment\nif (typeof window !== \"undefined\") {\n  // Polyfill fetch for jsdom\n  // @ts-ignore\n  import(\"whatwg-fetch\");\n\n  // Mock BroadcastChannel for JSDOM\n  global.BroadcastChannel = class BroadcastChannel {\n    constructor(public name: string) {}\n    postMessage() {}\n    close() {}\n    addEventListener() {}\n    removeEventListener() {}\n    dispatchEvent() {\n      return true;\n    }\n  } as any;\n\n  // Mock window.matchMedia for responsive components\n  Object.defineProperty(window, \"matchMedia\", {\n    writable: true,\n    value: jest.fn().mockImplementation((query) => ({\n      matches: false,\n      media: query,\n      onchange: null,\n      addListener: jest.fn(), // deprecated\n      removeListener: jest.fn(), // deprecated\n      addEventListener: jest.fn(),\n      removeEventListener: jest.fn(),\n      dispatchEvent: jest.fn(),\n    })),\n  });\n\n  // Mock IntersectionObserver\n  global.IntersectionObserver = class IntersectionObserver {\n    constructor() {}\n    disconnect() {}\n    observe() {}\n    takeRecords() {\n      return [];\n    }\n    unobserve() {}\n  } as any;\n\n  // Mock ResizeObserver\n  global.ResizeObserver = class ResizeObserver {\n    constructor() {}\n    disconnect() {}\n    observe() {}\n    unobserve() {}\n  } as any;\n\n  // Mock window.scrollTo\n  global.scrollTo = jest.fn();\n}\n\n// Suppress specific known console errors that are not actionable in tests.\n// This pattern is recommended for handling third-party library warnings:\n// https://github.com/testing-library/user-event/issues/1114#issuecomment-1876164351\n//\n// Radix UI's compose-refs package triggers state updates during component unmount\n// which causes React to emit \"not configured to support act\" warnings. This happens\n// because the updates occur in React's commit phase, outside of any act() boundary.\n// The IS_REACT_ACT_ENVIRONMENT flag doesn't help because jsdom's globalThis is set\n// up before our setup file runs.\nconst SUPPRESSED_ERRORS = [\n  \"The current testing environment is not configured to support act\",\n] as const;\n\nconst originalError = console.error;\nconsole.error = (...args: any[]) => {\n  if (\n    typeof args[0] === \"string\" &&\n    SUPPRESSED_ERRORS.some((error) => args[0].includes(error))\n  ) {\n    return;\n  }\n  originalError.call(console, ...args);\n};\n"
  },
  {
    "path": "web/tests/setup/llmProviderTestUtils.ts",
    "content": "import { LLMProviderDescriptor } from \"@/interfaces/llm\";\n\nexport function makeProvider(\n  overrides: Partial<LLMProviderDescriptor>\n): LLMProviderDescriptor {\n  return {\n    id: overrides.id ?? 1,\n    name: overrides.name ?? \"Provider\",\n    provider: overrides.provider ?? \"openai\",\n    provider_display_name: overrides.provider_display_name ?? \"Provider\",\n    model_configurations: overrides.model_configurations ?? [],\n    ...overrides,\n  };\n}\n"
  },
  {
    "path": "web/tests/setup/mocks/README.md",
    "content": "# Test Mocks Directory\n\nThis directory contains mock implementations used in Jest tests.\n\n## Mocking Strategy\n\n**Use `transformIgnorePatterns` for ES Module packages** instead of mocking them.\n\n### Two Approaches:\n\n| Approach                     | Use When                                                      | Examples                                                             |\n| ---------------------------- | ------------------------------------------------------------- | -------------------------------------------------------------------- |\n| **transformIgnorePatterns**  | All ESM packages                                              | `@radix-ui`, `@headlessui`, `react-markdown`, `remark-*`, `rehype-*` |\n| **moduleNameMapper (mocks)** | Non-executable assets/files, or components with complex setup | CSS files, images, UserProvider                                      |\n\n### Why Use transformIgnorePatterns:\n\nModern npm packages ship as ES Modules (ESM) by default. Jest runs in a Node environment that expects CommonJS. The `transformIgnorePatterns` configuration tells Jest which packages in `node_modules` to transform from ESM to CommonJS.\n\n**Benefits:**\n\n- Tests run against real package code, not mocks\n- No need to maintain mock implementations\n- Catches real bugs in how we use dependencies\n\n**Trade-off:**\n\n- Tests run slower (transformation takes time, especially for markdown packages)\n\n## When to Add to transformIgnorePatterns\n\n**Add packages to the `transformIgnorePatterns` array in `jest.config.js` when:**\n\n### ✅ Add to transformIgnorePatterns:\n\n1. **SyntaxError: Unexpected token 'export'**\n\n   ```\n   Error: SyntaxError: Unexpected token 'export'\n   at node_modules/package-name/index.js:1\n   ```\n\n   → Package uses ES Modules and needs transformation\n\n2. **Package ships as ESM**\n\n   - Check `package.json`: `\"type\": \"module\"` or `\"exports\"` field\n   - Files use `export`/`import` syntax\n   - Common in modern packages (markdown, UI libraries)\n\n3. **Works fine when transformed**\n   - Package has no complex dependencies\n   - No browser-specific APIs or native modules\n   - Just needs ESM → CommonJS conversion\n\n### How to Add:\n\n1. Open `web/jest.config.js`\n2. Find the `transformIgnorePatterns` array\n3. Add package name to the appropriate category:\n\n```javascript\ntransformIgnorePatterns: [\n  \"/node_modules/(?!(\" +\n    [\n      // ... existing packages ...\n\n      // Add your package here (grouped by category)\n      \"your-package-name\",\n      \"another-package\",\n\n      // Use regex patterns for related packages\n      \"package-.*\",  // All packages starting with \"package-\"\n    ].join(\"|\") +\n    \")/)\",\n],\n```\n\n**Example:** Adding `remark-directive`:\n\n```javascript\n// Markdown & Syntax Highlighting\n\"react-markdown\",\n\"remark-gfm\",\n\"remark-math\",\n\"remark-directive\",  // ← Add here\n\"remark-parse\",\n```\n\n## When to Add Mocks to This Directory\n\n**Only mock things that CANNOT be executed in tests.**\n\n### ✅ DO Mock:\n\n1. **CSS/Style Files**\n\n   - Already handled by `cssMock.js`\n   - Cannot be executed in Node environment\n   - Examples: `.css`, `.scss`, `.sass`, `.less`\n\n2. **Static Assets**\n\n   - Already handled by `fileMock.js`\n   - Binary files that can't be imported\n   - Examples: images, fonts, videos\n\n3. **Components with Complex External Dependencies**\n   - Components that require browser APIs not available in jsdom\n   - Components with difficult-to-setup external dependencies\n   - Example: `UserProvider.tsx` (already mocked)\n\n### ❌ DON'T Mock:\n\n1. **ES Module Packages**\n\n   - ALWAYS use `transformIgnorePatterns` instead\n   - Even complex packages like `react-markdown` with deep ESM dependency trees\n   - Add the package (and any dependencies that fail) to `transformIgnorePatterns`\n\n2. **Your Own Code**\n\n   - Test real implementations\n   - Mocking defeats the purpose of testing\n\n3. **Packages That Work in Jest**\n   - Most packages work fine in Jest\n   - No need to add them anywhere\n\n## Current Mocks\n\nThis directory contains **necessary mocks**:\n\n```\nmocks/\n├── components/\n│   └── UserProvider.tsx        # Component with complex dependencies\n├── cssMock.js                  # All CSS/style files\n└── README.md                   # This file\n```\n\n**Note:** `fileMock.js` is in `tests/setup/` (not in `mocks/`) for historical reasons.\n\n## How to Add a New Mock\n\n### Step 1: Determine if You Really Need a Mock\n\n**Try `transformIgnorePatterns` first!** Only create a mock if:\n\n- Asset/file cannot be executed (CSS, images)\n- Component has complex external dependencies\n- Package absolutely cannot work when transformed\n\n### Step 2: Create the Mock File\n\n**For Components:**\n\n```typescript\n// mocks/components/ComponentName.tsx\nimport React from 'react';\n\nexport default function ComponentName({ children }: { children?: React.ReactNode }) {\n  return <div data-testid=\"mock-component-name\">{children}</div>;\n}\n```\n\n**For CSS/Assets:** (Already handled - no need to create)\n\n### Step 3: Register in jest.config.js\n\nAdd to `moduleNameMapper`:\n\n```javascript\nmoduleNameMapper: {\n  // Before path aliases!\n  \"^@/components/ComponentName$\":\n    \"<rootDir>/tests/setup/mocks/components/ComponentName.tsx\",\n\n  // Path aliases come last\n  \"^@/(.*)$\": \"<rootDir>/src/$1\",\n}\n```\n\n### Step 4: Verify Tests Pass\n\n```bash\nnpm test\n```\n\n## Decision Tree\n\n```\nNeed to use a package in tests?\n         ↓\nDoes it cause \"SyntaxError: Unexpected token 'export'\"?\n         ↓\n      YES → Try adding to transformIgnorePatterns first ✅\n         ↓\n      Does it still fail after transformation?\n         ↓\n      YES → Create mock (complex ESM structure) ⚠️\n         |\n      NO → Transformation worked! ✅\n         |\nIs it CSS/static asset?\n         ↓\n      YES → Already mocked (cssMock.js/fileMock.js) ✅\n         |\n      NO → Can the package be executed in Node/jsdom?\n         ↓\n      YES → Use it directly (no mock needed) ✅\n         |\n      NO → Is it a component with complex dependencies?\n         ↓\n      YES → Create mock in mocks/components/ ⚠️\n         |\n      NO → You probably don't need a mock! ✅\n```\n\n## Examples\n\n### ✅ Example 1: ESM Package\n\n**Problem:** `@tiptap/react` causes `SyntaxError: Unexpected token 'export'`\n\n**Solution:** Add to `transformIgnorePatterns` in `jest.config.js`\n\n```javascript\ntransformIgnorePatterns: [\n  \"/node_modules/(?!(\" +\n    [\n      // ...\n      \"@tiptap/react\",  // ← Add here\n      \"@tiptap/core\",\n      // ...\n    ].join(\"|\") +\n    \")/)\",\n],\n```\n\n**If you get more errors:** Keep adding the failing packages until tests pass. The package may have ESM dependencies that also need transformation.\n\n### ✅ Example 2: Complex ESM Package with Dependencies\n\n**Problem:** `react-markdown` causes SyntaxError, then after fixing it, `devlop` fails, then `hast-util-to-jsx-runtime` fails...\n\n**Solution:** Keep adding packages to transformIgnorePatterns:\n\n```javascript\n[\n  \"react-markdown\",\n  \"remark-.*\", // All remark packages\n  \"rehype-.*\", // All rehype packages\n  \"hast-.*\", // All hast packages\n  \"devlop\",\n  \"hastscript\",\n  // ... and so on\n];\n```\n\n**Pro tip:** Use wildcard patterns like `\"remark-.*\"` to match all packages with that prefix.\n\n### ✅ Example 3: Static Asset (Already Handled)\n\n**Problem:** Importing CSS causes error\n\n**Solution:** Already handled! `cssMock.js` catches all CSS imports.\n\n### ✅ Example 4: Component Mock (Rare Case)\n\n**Problem:** `AuthProvider` requires complex auth setup\n\n**Solution:**\n\n```typescript\n// mocks/components/AuthProvider.tsx\nimport React from 'react';\n\nexport default function AuthProvider({ children }: { children?: React.ReactNode }) {\n  return <div data-testid=\"mock-auth-provider\">{children}</div>;\n}\n```\n\n```javascript\n// jest.config.js\n\"^@/components/auth/AuthProvider$\":\n  \"<rootDir>/tests/setup/mocks/components/AuthProvider.tsx\",\n```\n\n## Troubleshooting\n\n### \"SyntaxError: Unexpected token 'export'\"\n\n**Fix:** Add the package to `transformIgnorePatterns` in `jest.config.js`\n\n**If it happens again:** The package likely has ESM dependencies. Keep adding failing packages to the list until tests pass.\n\n### \"Cannot find module 'package-name'\"\n\n**Check:**\n\n1. Is package installed? `npm ls package-name`\n2. Is path in `jest.config.js` correct?\n3. Did you add to `transformIgnorePatterns` if it's ESM?\n\n### Tests slow after adding to transformIgnorePatterns\n\n**This is expected.** Transformation takes time, especially for packages with deep dependency trees like `react-markdown`.\n\n**Example:** The markdown tests take ~23 seconds vs ~1 second without markdown packages.\n\n**Why this is worth it:**\n\n- Tests run against real code, catching real bugs\n- No mock maintenance burden\n- More confidence in test results\n\n**If tests are too slow:**\n\n1. Use `jest --maxWorkers=50%` to parallelize (already configured)\n2. Run specific test files during development: `npm test -- --testPathPattern=MyComponent`\n3. Let CI run the full suite\n\n### Package still fails after adding to transformIgnorePatterns\n\n**Rare, but possible issues:**\n\n1. Package requires browser APIs → Mock it or use jsdom\n2. Package has native dependencies → May need different approach\n3. TypeScript type errors → Check tsconfig `allowJs: true` in jest.config.js transform options\n\n## Testing Philosophy\n\n**The Goal:** Write tests that are reliable and test YOUR code with REAL dependencies.\n\n- ✅ **Transform ESM packages** - Always use `transformIgnorePatterns` for npm packages\n- ✅ **Mock only non-executable things** - CSS, images, videos (things Node.js can't execute)\n- ✅ **Test real code** - More confidence, catches real bugs, no mock maintenance\n- ❌ **Don't mock packages** - Even if they have complex dependency trees\n- ⚠️ **Accept slower tests** - Transformation takes time, but correctness > speed\n\n## Additional Resources\n\n- [Jest transformIgnorePatterns Documentation](https://jestjs.io/docs/configuration#transformignorepatterns-arraystring)\n- [ES Modules in Jest](https://jestjs.io/docs/ecmascript-modules)\n- [Testing Library Best Practices](https://testing-library.com/docs/guiding-principles/)\n"
  },
  {
    "path": "web/tests/setup/mocks/components/UserProvider.tsx",
    "content": "/**\n * Mock for @/components/user/UserProvider\n *\n * Why this mock exists:\n * The real UserProvider requires complex props (authTypeMetadata, settings, user)\n * that are not relevant for most component integration tests. This mock provides\n * a simple useUser() hook with safe default values.\n *\n * Usage:\n * Automatically applied via jest.config.js moduleNameMapper.\n * Any component that imports from \"@/components/user/UserProvider\" will get this mock.\n *\n * To customize user values in a specific test:\n * You would need to either:\n * 1. Pass props to the real UserProvider (requires disabling this mock for that test)\n * 2. Extend this mock to accept custom values via a setup function\n */\nimport React, { createContext, useContext } from \"react\";\n\ninterface UserContextType {\n  user: any;\n  isAdmin: boolean;\n  isCurator: boolean;\n  refreshUser: () => Promise<void>;\n  isCloudSuperuser: boolean;\n  updateUserAutoScroll: (autoScroll: boolean) => Promise<void>;\n  updateUserShortcuts: (enabled: boolean) => Promise<void>;\n  toggleAgentPinnedStatus: (\n    currentPinnedAgentIDs: number[],\n    agentId: number,\n    isPinned: boolean\n  ) => Promise<boolean>;\n  updateUserTemperatureOverrideEnabled: (enabled: boolean) => Promise<void>;\n  updateUserPersonalization: (personalization: any) => Promise<void>;\n}\n\nconst mockUserContext: UserContextType = {\n  user: null,\n  isAdmin: false,\n  isCurator: false,\n  refreshUser: async () => {},\n  isCloudSuperuser: false,\n  updateUserAutoScroll: async () => {},\n  updateUserShortcuts: async () => {},\n  toggleAgentPinnedStatus: async () => true,\n  updateUserTemperatureOverrideEnabled: async () => {},\n  updateUserPersonalization: async () => {},\n};\n\nconst UserContext = createContext<UserContextType | undefined>(mockUserContext);\n\nexport function useUser() {\n  const context = useContext(UserContext);\n  if (context === undefined) {\n    throw new Error(\"useUser must be used within a UserProvider\");\n  }\n  return context;\n}\n\nexport function UserProvider({ children }: { children: React.ReactNode }) {\n  return (\n    <UserContext.Provider value={mockUserContext}>\n      {children}\n    </UserContext.Provider>\n  );\n}\n"
  },
  {
    "path": "web/tests/setup/mocks/cssMock.js",
    "content": "// Mock for CSS imports\nmodule.exports = {};\n"
  },
  {
    "path": "web/tests/setup/test-utils.tsx",
    "content": "import React, { ReactElement } from \"react\";\nimport { render, RenderOptions } from \"@testing-library/react\";\nimport userEvent from \"@testing-library/user-event\";\nimport { SWRConfig } from \"swr\";\nimport * as TooltipPrimitive from \"@radix-ui/react-tooltip\";\nexport { makeProvider } from \"./llmProviderTestUtils\";\n\n/**\n * Custom render function that wraps components with common providers\n * used throughout the Onyx application.\n */\n\ninterface AllProvidersProps {\n  children: React.ReactNode;\n  swrConfig?: Record<string, any>;\n}\n\n/**\n * Wrapper component that provides all necessary context providers for tests.\n * Customize this as needed when you discover more global providers in the app.\n */\nfunction AllTheProviders({ children, swrConfig = {} }: AllProvidersProps) {\n  return (\n    <SWRConfig\n      value={{\n        // Disable deduping in tests to ensure each test gets fresh data\n        dedupingInterval: 0,\n        // Use a Map instead of cache to avoid state leaking between tests\n        provider: () => new Map(),\n        // Disable error retries in tests for faster failures\n        shouldRetryOnError: false,\n        // Merge any custom SWR config passed from tests\n        ...swrConfig,\n      }}\n    >\n      <TooltipPrimitive.Provider>{children}</TooltipPrimitive.Provider>\n    </SWRConfig>\n  );\n}\n\ninterface CustomRenderOptions extends Omit<RenderOptions, \"wrapper\"> {\n  swrConfig?: Record<string, any>;\n}\n\n/**\n * Custom render function that wraps the component with all providers.\n * Use this instead of @testing-library/react's render in your tests.\n *\n * @example\n * import { render, screen } from '@tests/setup/test-utils';\n *\n * test('renders component', () => {\n *   render(<MyComponent />);\n *   expect(screen.getByText('Hello')).toBeInTheDocument();\n * });\n *\n * @example\n * // With custom SWR config to mock API responses\n * render(<MyComponent />, {\n *   swrConfig: {\n *     fallback: {\n *       '/api/credentials': mockCredentials,\n *     },\n *   },\n * });\n */\nconst customRender = (\n  ui: ReactElement,\n  { swrConfig, ...options }: CustomRenderOptions = {}\n) => {\n  const Wrapper = ({ children }: { children: React.ReactNode }) => (\n    <AllTheProviders swrConfig={swrConfig}>{children}</AllTheProviders>\n  );\n\n  return render(ui, { wrapper: Wrapper, ...options });\n};\n\n// Re-export everything from @testing-library/react\nexport * from \"@testing-library/react\";\nexport { userEvent };\n\n// Override render with our custom render\nexport { customRender as render };\n\n/**\n * Setup userEvent with optimized configuration for testing.\n * All user interactions are automatically wrapped in act() to prevent warnings.\n * Use this helper instead of userEvent.setup() directly.\n *\n * @example\n * const user = setupUser();\n * await user.click(button);\n * await user.type(input, \"text\");\n */\nexport function setupUser(options = {}) {\n  const baseUser = userEvent.setup({\n    // Configure for React 18 to reduce act warnings\n    delay: null, // Instant typing - batches state updates better\n    ...options,\n  });\n\n  // Wrap all user-event methods in act() to prevent act warnings. We add this here\n  // to prevent all callsites from needing to import and wrap user events in act()\n  return new Proxy(baseUser, {\n    get(target, prop) {\n      const value = target[prop as keyof typeof target];\n\n      // Only wrap methods (functions), not properties\n      if (typeof value === \"function\") {\n        return async (...args: any[]) => {\n          const { act } = await import(\"@testing-library/react\");\n          return act(async () => {\n            return (value as Function).apply(target, args);\n          });\n        };\n      }\n\n      return value;\n    },\n  });\n}\n"
  },
  {
    "path": "web/tsconfig.json",
    "content": "{\n  \"compilerOptions\": {\n    \"target\": \"es5\",\n    \"lib\": [\"dom\", \"dom.iterable\", \"esnext\"],\n    \"allowJs\": true,\n    \"skipLibCheck\": true,\n    \"strict\": true,\n    \"forceConsistentCasingInFileNames\": true,\n    \"noEmit\": true,\n    \"esModuleInterop\": true,\n    \"module\": \"ESNext\",\n    \"moduleResolution\": \"node\",\n    \"noUncheckedIndexedAccess\": true,\n    \"resolveJsonModule\": true,\n    \"isolatedModules\": true,\n    \"jsx\": \"react-jsx\",\n    \"incremental\": true,\n    \"plugins\": [\n      {\n        \"name\": \"next\"\n      }\n    ],\n    \"paths\": {\n      \"@/*\": [\"./src/*\"],\n      \"@tests/*\": [\"./tests/*\"],\n      \"@public/*\": [\"./public/*\"],\n      \"@opal/*\": [\"./lib/opal/src/*\"],\n      \"@opal/types/*\": [\"./lib/opal/src/types/*\"]\n    }\n  },\n  \"include\": [\n    \"next-env.d.ts\",\n    \"**/*.ts\",\n    \"**/*.tsx\",\n    \".next/types/**/*.ts\",\n    \".next/dev/types/**/*.ts\"\n  ],\n  \"exclude\": [\"node_modules\", \"lib/opal\"]\n}\n"
  },
  {
    "path": "web/tsconfig.types.json",
    "content": "{\n  \"extends\": \"./tsconfig.json\",\n  \"compilerOptions\": {\n    \"paths\": {\n      \"@/*\": [\"./src/*\"],\n      \"@tests/*\": [\"./tests/*\"],\n      \"@public/*\": [\"./public/*\"],\n      \"@opal/*\": [\"./lib/opal/src/*\"]\n    }\n  },\n  \"include\": [\n    \"next-env.d.ts\",\n    \"src/**/*\",\n    \"tests/**/*\",\n    \".next/types/**/*.ts\",\n    \".next/dev/types/**/*.ts\",\n    \"types/**/*.d.ts\"\n  ],\n  \"exclude\": [\"node_modules\", \"lib/opal\"]\n}\n"
  },
  {
    "path": "web/types/assets.d.ts",
    "content": "declare module \"*.svg\" {\n  const src: string;\n  export default src;\n}\n\ndeclare module \"*.png\" {\n  const src: string;\n  export default src;\n}\n\ndeclare module \"*.jpg\" {\n  const src: string;\n  export default src;\n}\n\ndeclare module \"*.jpeg\" {\n  const src: string;\n  export default src;\n}\n\ndeclare module \"*.gif\" {\n  const src: string;\n  export default src;\n}\n\ndeclare module \"*.webp\" {\n  const src: string;\n  export default src;\n}\n"
  },
  {
    "path": "web/types/favicon-fetch.d.ts",
    "content": "declare module \"favicon-fetch\" {\n  type FetchFaviconArg = string | { uri: string };\n  const fetchFavicon: (input: FetchFaviconArg) => Promise<string | undefined>;\n  export default fetchFavicon;\n}\n"
  },
  {
    "path": "widget/.gitignore",
    "content": ".vite/\ndist/"
  },
  {
    "path": "widget/README.md",
    "content": "# Onyx Chat Widget\n\nAn embeddable, lightweight chat widget that brings AI-powered conversations to any website. Built with [Lit](https://lit.dev/) web components for maximum compatibility and minimal bundle size.\n\n## Security Note\n\n⚠️ **Always use a limited-scope API key for the widget.** The API key is visible in client-side code, so it should have restricted permissions and rate limits. Never use admin or full-access keys.\n\n## Features\n\n- 🚀 **Lightweight** - ~100-150kb gzipped bundle\n- 🎨 **Fully Customizable** - Colors, branding, and styling\n- 📱 **Responsive** - Desktop popup, mobile fullscreen\n- 🔒 **Shadow DOM Isolation** - No style conflicts with your site\n- 💬 **Real-time Streaming** - Server-sent events (SSE) for fast responses\n- 🌐 **Two Deployment Modes** - Cloud CDN or self-hosted\n- ♿ **Markdown Support** - Rich text formatting in responses\n- 💾 **Session Persistence** - Conversations survive page reloads\n- 🎯 **Two Display Modes** - Floating launcher or inline embed\n\n## Quick Start\n\n### Cloud Deployment (Recommended)\n\nAdd these two lines to your website:\n\n```html\n<!-- Load the widget -->\n<script type=\"module\" src=\"https://cdn.onyx.app/widget/1.0/dist/onyx-widget.js\"></script>\n\n<!-- Configure and display -->\n<onyx-chat-widget\n  backend-url=\"https://cloud.onyx.app/api\"\n  api-key=\"your_api_key_here\"\n  mode=\"launcher\"\n>\n</onyx-chat-widget>\n```\n\nThat's it! The widget will appear as a floating button in the bottom-right corner.\n\n## How It Works\n\n### Architecture Overview\n\n```\n┌─────────────────────────────────────────┐\n│         Customer Website                │\n│  ┌───────────────────────────────────┐  │\n│  │  <onyx-chat-widget>               │  │\n│  │  (Web Component)                  │  │\n│  │  ┌─────────────────────────────┐  │  │\n│  │  │    Shadow DOM               │  │  │\n│  │  │  • Isolated styles          │  │  │\n│  │  │  • UI components            │  │  │\n│  │  │  • Message history          │  │  │\n│  │  └─────────────────────────────┘  │  │\n│  └───────────────────────────────────┘  │\n└──────────────┬──────────────────────────┘\n               │ API Calls (SSE)\n               ▼\n┌──────────────────────────────────────────┐\n│         Onyx Backend                     │\n│  • POST /api/chat/create-chat-session    │\n│  • POST /api/chat/send-chat-message      │\n│  • Streams responses via SSE             │\n└──────────────────────────────────────────┘\n```\n\n### Technology Stack\n\n- **Frontend Framework**: [Lit](https://lit.dev/) - Lightweight web components\n- **Markdown Rendering**: [marked.js](https://marked.js.org/)\n- **Build Tool**: [Vite](https://vitejs.dev/)\n- **Styling**: CSS-in-JS with Shadow DOM isolation\n- **API Communication**: Fetch API with SSE (Server-Sent Events)\n\n### Component Structure\n\n```\n<onyx-chat-widget>\n  └─ Shadow DOM\n      ├─ Launcher Button (mode=\"launcher\" only)\n      └─ Chat Container\n          ├─ Header\n          │   ├─ Logo/Avatar\n          │   ├─ Agent Name\n          │   └─ Actions (Reset, Close)\n          ├─ Disclaimer\n          ├─ Messages\n          │   ├─ User Messages\n          │   ├─ Assistant Messages (with markdown)\n          │   └─ Typing Indicator\n          └─ Input Area\n              ├─ Text Input\n              ├─ Send Button\n              └─ \"Powered by Onyx\" Footer\n```\n\n## Configuration Options\n\n### Required Attributes\n\n| Attribute     | Type   | Description                                                          |\n| ------------- | ------ | -------------------------------------------------------------------- |\n| `backend-url` | string | Your Onyx backend API URL (or set `VITE_WIDGET_BACKEND_URL` in .env) |\n| `api-key`     | string | API key for authentication (or set `VITE_WIDGET_API_KEY` in .env)    |\n\n**Note**: For cloud deployment, these must be provided as HTML attributes. For self-hosted deployment, they can be set in `.env` file during build and will be baked into the bundle.\n\n### Optional Attributes\n\n| Attribute          | Type   | Default       | Description                              |\n| ------------------ | ------ | ------------- | ---------------------------------------- |\n| `agent-id`         | number | `undefined`   | Specific agent/persona to use            |\n| `agent-name`       | string | `\"Assistant\"` | Display name in header                   |\n| `logo`             | string | Onyx logo     | URL to custom logo image                 |\n| `primary-color`    | string  | `#1c1c1c`     | Primary brand color (buttons, accents)   |\n| `background-color` | string  | `#e9e9e9`     | Widget background color                  |\n| `text-color`       | string  | `#000000bf`   | Text color (75% opacity black)           |\n| `mode`             | string  | `\"launcher\"`  | Display mode: `\"launcher\"` or `\"inline\"` |\n| `include-citations`| boolean | `false`       | Include citation markers in responses    |\n\n**Note**: These attributes must be provided as HTML attributes. Only `backend-url` and `api-key` can optionally be set via environment variables for self-hosted builds.\n\n### Configuration Examples\n\n**Basic Setup:**\n\n```html\n<onyx-chat-widget backend-url=\"https://cloud.onyx.app/api\" api-key=\"on_abc123\">\n</onyx-chat-widget>\n```\n\n**Full Customization:**\n\n```html\n<onyx-chat-widget\n  backend-url=\"https://cloud.onyx.app/api\"\n  api-key=\"on_abc123\"\n  agent-id=\"42\"\n  agent-name=\"Support Bot\"\n  logo=\"https://yoursite.com/logo.png\"\n  primary-color=\"#FF6B35\"\n  background-color=\"#FFFFFF\"\n  text-color=\"#1A1A1A\"\n  mode=\"launcher\"\n>\n</onyx-chat-widget>\n```\n\n**Inline Mode (Embedded):**\n\n```html\n<div style=\"width: 400px; height: 600px;\">\n  <onyx-chat-widget\n    backend-url=\"https://cloud.onyx.app/api\"\n    api-key=\"on_abc123\"\n    mode=\"inline\"\n  >\n  </onyx-chat-widget>\n</div>\n```\n\n## Display Modes\n\n### Launcher Mode (Default)\n\nA floating button appears in the bottom-right corner. Clicking it opens a chat popup.\n\n- **Desktop**: 400x600px popup above the button\n- **Mobile (<768px)**: Full-screen overlay\n\n```html\n<onyx-chat-widget mode=\"launcher\"></onyx-chat-widget>\n```\n\n### Inline Mode\n\nThe widget is embedded directly in your page layout. Perfect for dedicated support pages.\n\n```html\n<div class=\"chat-container\">\n  <onyx-chat-widget mode=\"inline\"></onyx-chat-widget>\n</div>\n```\n\n**CSS Tip**: The widget will fill its container's dimensions in inline mode.\n\n## Development\n\n### Prerequisites\n\n- Node.js 18+ and npm\n- Access to Onyx backend API\n\n### Setup\n\n```bash\n# Navigate to widget directory\ncd widget/\n\n# Install dependencies\nnpm install\n\n# Copy example env file (for self-hosted builds)\ncp .env.example .env\n```\n\n### Development Server\n\n```bash\nnpm run dev\n```\n\nOpens at `http://localhost:5173` with hot module replacement.\n\n### Build Commands\n\n```bash\n# Cloud deployment (no config baked in)\nnpm run build:cloud\n\n# Self-hosted deployment (config from .env)\nnpm run build:self-hosted\n\n# Standard build (same as cloud)\nnpm run build\n```\n\n### Project Structure\n\n```\nwidget/\n├── src/\n│   ├── index.ts                 # Entry point\n│   ├── widget.ts                # Main component\n│   ├── config/\n│   │   ├── config.ts            # Configuration resolver\n│   │   └── build-config.ts      # Build-time config injection\n│   ├── services/\n│   │   ├── api-service.ts       # API client (SSE streaming)\n│   │   └── stream-parser.ts     # SSE packet processor\n│   ├── types/\n│   │   ├── api-types.ts         # Backend packet types\n│   │   └── widget-types.ts      # Widget configuration types\n│   ├── styles/\n│   │   ├── theme.ts             # Design tokens\n│   │   ├── colors.ts            # Color system\n│   │   └── widget-styles.ts     # Component styles\n│   ├── utils/\n│   │   └── storage.ts           # Session persistence\n│   └── assets/\n│       └── logo.ts              # Default Onyx logo (base64)\n├── dist/                        # Build output\n├── index.html\n├── package.json\n├── vite.config.ts\n└── tsconfig.json\n```\n\n### Key Files\n\n- **[src/widget.ts](src/widget.ts)** - Main Lit component with all UI logic\n- **[src/services/api-service.ts](src/services/api-service.ts)** - Handles API calls and SSE streaming\n- **[src/styles/widget-styles.ts](src/styles/widget-styles.ts)** - All CSS styles\n- **[vite.config.ts](vite.config.ts)** - Build configuration (cloud vs self-hosted)\n\n## API Integration\n\n### Backend Endpoints Used\n\nThe widget communicates with these Onyx backend endpoints:\n\n#### 1. Create Chat Session\n\n```\nPOST /chat/create-chat-session\nContent-Type: application/json\nAuthorization: Bearer YOUR_API_KEY\n\n{\n  \"persona_id\": 42  // Optional agent ID\n}\n\nResponse:\n{\n  \"chat_session_id\": \"uuid-here\"\n}\n```\n\n#### 2. Send Message (SSE Streaming)\n\n```\nPOST /chat/send-chat-message\nContent-Type: application/json\nAuthorization: Bearer YOUR_API_KEY\n\n{\n  \"message\": \"User's question\",\n  \"chat_session_id\": \"uuid-here\",\n  \"parent_message_id\": 123,  // null for first message\n  \"origin\": \"widget\",\n  \"include_citations\": false\n}\n\nResponse: Server-Sent Events stream\n{\"type\": \"message_start\"}\n{\"type\": \"message_delta\", \"content\": \"Hello\"}\n{\"type\": \"message_delta\", \"content\": \" world!\"}\n{\"type\": \"stop\"}\n```\n\n## Deployment\n\n### Self-Hosted Deployment\n\n1. **Create `.env` file:**\n\n   ```bash\n   VITE_WIDGET_BACKEND_URL=https://your-backend.com\n   VITE_WIDGET_API_KEY=your_api_key\n   ```\n\n2. **Build with config baked in:**\n\n   ```bash\n   npm run build:self-hosted\n   ```\n\n3. **Deploy `dist/onyx-widget.js` to your server**\n\n4. **Customer embed:**\n   ```html\n   <script type=\"module\" src=\"https://your-cdn.com/onyx-widget.js\"></script>\n   <onyx-chat-widget\n     agent-id=\"1\"\n     agent-name=\"Support\"\n     logo=\"https://path-to-your-logo.com/\"\n   >\n   </onyx-chat-widget>\n   ```\n\n## Customization\n\n### Styling\n\nThe widget uses CSS custom properties (CSS variables) for theming. All styles are scoped within Shadow DOM to prevent conflicts.\n\n**Default Colors (aligned with web/src/app/css/colors.css):**\n\n```css\n--theme-primary-05: #1c1c1c; /* Buttons, accents (onyx-ink-95) */\n--theme-primary-06: #000000; /* Hover state (onyx-ink-100) */\n--background-neutral-00: #ffffff; /* Widget background (grey-00) */\n--background-neutral-03: #e6e6e6; /* Background hover (grey-10) */\n--text-04: #000000bf; /* Text (alpha-grey-100-75) */\n--text-light-05: #ffffff; /* White text on dark (grey-00) */\n--border-01: #00000033; /* Borders (alpha-grey-100-20) */\n```\n\n**Override via attributes:**\n\n```html\n<onyx-chat-widget\n  primary-color=\"#FF6B35\"\n  background-color=\"#FFFFFF\"\n  text-color=\"#1A1A1A\"\n>\n</onyx-chat-widget>\n```\n\n## Browser Support\n\n- ✅ Chrome/Edge 90+ (Chromium)\n- ✅ Firefox 90+\n- ✅ Safari 15+\n- ✅ Mobile Safari (iOS 15+)\n- ✅ Mobile Chrome (Android)\n\n**Requirements:**\n\n- ES Modules support\n- Custom Elements v1\n- Shadow DOM v1\n- Fetch API with SSE\n\n## Performance\n\n- **Bundle Size**: ~100-150kb gzipped\n- **Initial Load**: Shadow DOM renders immediately\n- **Message Latency**: Real-time SSE streaming (<100ms first token)\n- **Session Persistence**: sessionStorage (auto-save on each message)\n"
  },
  {
    "path": "widget/index.html",
    "content": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <title>Onyx Chat Widget - Development</title>\n    <style>\n      * {\n        box-sizing: border-box;\n      }\n\n      body {\n        font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", \"Roboto\",\n          \"Helvetica\", \"Arial\", sans-serif;\n        margin: 0;\n        padding: 0;\n        background: #f5f5f5;\n        color: #1a1a1a;\n        line-height: 1.6;\n      }\n\n      .header {\n        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n        color: white;\n        padding: 60px 20px;\n        text-align: center;\n      }\n\n      .header h1 {\n        margin: 0 0 10px 0;\n        font-size: 2.5em;\n        font-weight: 700;\n      }\n\n      .header p {\n        margin: 0;\n        font-size: 1.2em;\n        opacity: 0.9;\n      }\n\n      .container {\n        max-width: 1200px;\n        margin: 0 auto;\n        padding: 40px 20px;\n      }\n\n      .section {\n        background: white;\n        padding: 40px;\n        border-radius: 12px;\n        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);\n        margin-bottom: 40px;\n      }\n\n      .section h2 {\n        margin-top: 0;\n        color: #667eea;\n        font-size: 1.8em;\n        margin-bottom: 20px;\n      }\n\n      .section p {\n        color: #666;\n        margin-bottom: 20px;\n      }\n\n      .code-block {\n        background: #f8f9fa;\n        border: 1px solid #e1e4e8;\n        border-radius: 6px;\n        padding: 16px;\n        font-family: \"Monaco\", \"Courier New\", monospace;\n        font-size: 14px;\n        overflow-x: auto;\n        margin: 20px 0;\n      }\n\n      .demo-grid {\n        display: grid;\n        grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));\n        gap: 30px;\n        margin-top: 30px;\n      }\n\n      .demo-card {\n        background: #f8f9fa;\n        border: 2px solid #e1e4e8;\n        border-radius: 8px;\n        padding: 20px;\n      }\n\n      .demo-card h3 {\n        margin-top: 0;\n        color: #1a1a1a;\n        font-size: 1.2em;\n      }\n\n      .demo-card p {\n        color: #666;\n        font-size: 0.95em;\n        margin-bottom: 15px;\n      }\n\n      .compact-demo {\n        width: 100%;\n        max-width: 600px;\n        margin: 20px auto 0;\n      }\n\n      .badge {\n        display: inline-block;\n        padding: 4px 12px;\n        background: #667eea;\n        color: white;\n        border-radius: 12px;\n        font-size: 0.85em;\n        font-weight: 600;\n        margin-bottom: 10px;\n      }\n\n      .info-box {\n        background: #e3f2fd;\n        border-left: 4px solid #2196f3;\n        padding: 16px 20px;\n        margin: 20px 0;\n        border-radius: 4px;\n      }\n\n      .info-box strong {\n        color: #1565c0;\n      }\n\n      .feature-list {\n        list-style: none;\n        padding: 0;\n      }\n\n      .feature-list li {\n        padding: 8px 0;\n        padding-left: 24px;\n        position: relative;\n      }\n\n      .feature-list li:before {\n        content: \"✓\";\n        position: absolute;\n        left: 0;\n        color: #667eea;\n        font-weight: bold;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"header\">\n      <h1>🤖 Onyx Chat Widget</h1>\n      <p>Embeddable AI-powered chat for your website</p>\n    </div>\n\n    <div class=\"container\">\n      <!-- Introduction Section -->\n      <div class=\"section\">\n        <h2>Development Environment</h2>\n        <p>\n          This page demonstrates the Onyx Chat Widget in development mode. The\n          widget is loaded from <code>src/index.ts</code> via Vite with hot\n          module replacement.\n        </p>\n\n        <div class=\"info-box\">\n          <strong>Configuration:</strong> Widget settings are loaded from the\n          <code>.env</code> file. Create one from <code>.env.example</code> to\n          configure backend URL, API key, and other options.\n        </div>\n\n        <ul class=\"feature-list\">\n          <li>Real-time streaming responses via SSE</li>\n          <li>Markdown rendering for rich content</li>\n          <li>Session persistence across page reloads</li>\n          <li>Shadow DOM isolation (no style conflicts)</li>\n          <li>Responsive design (desktop & mobile)</li>\n          <li>Two display modes: Launcher & Inline</li>\n        </ul>\n      </div>\n\n      <!-- Launcher Mode Section -->\n      <div class=\"section\">\n        <span class=\"badge\">Launcher Mode</span>\n        <h2>Floating Chat Button</h2>\n        <p>\n          Look at the bottom-right corner of this page! A floating chat button\n          will appear there. Click it to open the chat popup. This is the\n          default mode for most website integrations.\n        </p>\n\n        <div class=\"code-block\">\n          &lt;onyx-chat-widget mode=\"launcher\"&gt;&lt;/onyx-chat-widget&gt;\n        </div>\n\n        <div class=\"demo-grid\">\n          <div class=\"demo-card\">\n            <h3>Desktop View</h3>\n            <p>Opens as a 400×600px popup above the button</p>\n          </div>\n          <div class=\"demo-card\">\n            <h3>Mobile View</h3>\n            <p>Expands to full-screen overlay (&lt;768px)</p>\n          </div>\n        </div>\n      </div>\n\n      <!-- Compact Inline Mode Section -->\n      <div class=\"section\">\n        <span class=\"badge\">Compact Mode</span>\n        <h2>Compact Inline (No Messages)</h2>\n        <p>\n          When using inline mode without any messages, the widget displays in a\n          compact form - just a search-like input bar. Once you send a message,\n          it expands to show the full chat interface.\n        </p>\n\n        <div class=\"code-block\">\n          &lt;onyx-chat-widget mode=\"inline\"&gt;&lt;/onyx-chat-widget&gt;\n          &lt;!-- Automatically compact when no messages --&gt;\n        </div>\n\n        <div class=\"compact-demo\">\n          <onyx-chat-widget mode=\"inline\"></onyx-chat-widget>\n        </div>\n      </div>\n\n      <!-- Configuration Section -->\n      <div class=\"section\">\n        <h2>Configuration Options</h2>\n        <p>\n          The widget can be customized via HTML attributes or environment\n          variables (for self-hosted builds). Below are some common\n          configuration examples:\n        </p>\n\n        <h3>Basic Setup</h3>\n        <div class=\"code-block\">\n          &lt;onyx-chat-widget backend-url=\"https://api.onyx.app\"\n          api-key=\"your_api_key_here\"&gt; &lt;/onyx-chat-widget&gt;\n        </div>\n\n        <h3>Custom Branding</h3>\n        <div class=\"code-block\">\n          &lt;onyx-chat-widget backend-url=\"https://api.onyx.app\"\n          api-key=\"your_api_key_here\" agent-id=\"42\" agent-name=\"Support Bot\"\n          logo=\"https://yoursite.com/logo.png\" primary-color=\"#FF6B35\"\n          background-color=\"#FFFFFF\" text-color=\"#1A1A1A\"&gt;\n          &lt;/onyx-chat-widget&gt;\n        </div>\n\n        <div class=\"info-box\">\n          <strong>Note:</strong> For cloud deployments, configuration must be\n          provided via HTML attributes. For self-hosted builds, you can bake\n          configuration into the bundle using environment variables.\n        </div>\n      </div>\n    </div>\n\n    <!-- Launcher widget instance -->\n    <onyx-chat-widget\n      backend-url=\"http://localhost:8080\"\n      api-key=\"your-api-key-here\"\n      mode=\"launcher\"\n    ></onyx-chat-widget>\n\n    <!-- Load widget source via Vite for local development -->\n    <script type=\"module\" src=\"./dist/onyx-widget.js\"></script>\n\n    <!-- Load widget source via CDN for production -->\n    <!-- <script type=\"module\" src=\"https://cdn.onyx.app/widget/1.0/dist/onyx-widget.js\"></script> -->\n  </body>\n</html>\n"
  },
  {
    "path": "widget/package.json",
    "content": "{\n  \"name\": \"onyx-chat-widget\",\n  \"version\": \"1.0.0\",\n  \"description\": \"Embeddable chat widget for Onyx\",\n  \"type\": \"module\",\n  \"main\": \"dist/onyx-widget.js\",\n  \"types\": \"dist/types/index.d.ts\",\n  \"files\": [\n    \"dist\"\n  ],\n  \"scripts\": {\n    \"dev\": \"vite\",\n    \"build\": \"vite build\",\n    \"build:cloud\": \"vite build --mode production\",\n    \"build:self-hosted\": \"vite build --mode self-hosted\",\n    \"preview\": \"vite preview\",\n    \"type-check\": \"tsc --noEmit\"\n  },\n  \"dependencies\": {\n    \"dompurify\": \"^3.3.2\",\n    \"lit\": \"^3.1.0\",\n    \"marked\": \"^12.0.0\",\n    \"terser\": \"^5.46.1\"\n  },\n  \"devDependencies\": {\n    \"@types/dompurify\": \"^3.0.0\",\n    \"@types/node\": \"^20.0.0\",\n    \"typescript\": \"^5.3.0\",\n    \"vite\": \"^7.3.1\"\n  }\n}\n"
  },
  {
    "path": "widget/src/assets/logo.ts",
    "content": "// Default Onyx logo as base64 data URL\nexport const DEFAULT_LOGO =\n  \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAGQCAYAAACAvzbMAAAACXBIWXMAAG66AABuugHW3rEXAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAABnYSURBVHgB7d3NleTGlYbhV+JmtJM8CHlAehDazW56LGjIAlIWdMmCpixA0YKmdrNDedD0AOmBtJxZcTKIKrKqOqsKmYmfuMD7nPMd8ZAUKWUk7s3ATUSCpFPyMd19GiRJesUfj/numP6Yn5+l/LkGSZIeKY3jwzH/4svGcaqRfDwmIUnarcTQDMY0jlNpsZFI0q5khtnGzxOlvf9nSpI2KjNt43iezzgnkaTNeG0wPlfKv6u5/3dLkoI5ZzA+ZyNpcU4iSSEkrhuMzxUbiSRVKjPvfGOqfMKBuyRVIROjcZy6vdUgSVrUw3yjFOHaG8XYRpKQJM2mhsH4nI2kxUYiSZP6mjoH43OlxUYiSVfJxJxvTJUOB+6SdJZ37LtxPE+PA3dJetGW5xtTN5KEJMnGcUF6nJNI2rHMUARtHNelxUYiaScyzjfmSIcDd0kb1WDjWCI9DtwlbYDzjfUbSUKSArFx1JOyBi02EkmVywzFKkJh3WNabCSSKpNxvhEpHcPDmpK0mgYbR+T0OHCXtCDnG9tsJN/i7S1JM7Fx7CMtNhJJE8k4GN9rIynH6EvS2TLON8zwHmiQpDeU21QNNg7zZXpsJJJOcL5hxqZneK8kJO2ajcNckxYbibQ7+ZhPxCpWpt60eBKwtHkZ5xtmvnQ4J5E2pdym+g4bh1kuPTYSKTTnG2bt9Dhwl0JJ2DhMfWmxkUjVynibytSfFgfuUjUyNg4TLx3OSaRVPAzGPxOraBjzPD02krB+hyIpjaMcu/3d/R9LW3E45sdj/nH/x5Imko75iINxs4+0OHCXrpZxvmH2mxYH7tLZMjYOYx5SZn0Nkl70MBjviXVxG7NUemwkVXGIvj4H49J5DsfcHfN3HLhrpxIOxo25Ni0O3LUjGecbxkyd8vMEGWmjMjYOY+ZOj3OSxTgDmdfDb4yXGUdC0lIODDOS8nDiv9EsbCDzcDAu1eGAA/fZ2ECm9fUx7xl2HTYOqS632EhUoYzzDWOipMOBuyqQsXEYEzU9Dtyv4i2sy5TbU35tUNqGn475Cw7bz/Z7dInyRitvuL8yDOgkxXN3zH8f8w02j4u4A5lGZtgKv0dS7e4Yhul36Co2kGmlY26wkUi1KTuM8kzID9g4JmMDmUfitx1JQtJaSuMov3L4Pd6mmpwNZH7NMR+wkUhLOjDsNmwc2oQGv/JrzNzp8NuR2rDM8ERslAvSmAjpsHEszltY60k4cJeuUW5NldtUtwzPcmhhNpD1JRy4S+dwMC6d0OBvohvzUno84Vp6U4MDd2Me0uF8QzpbOR7+llgXuzFTpcPGIV0tYSMx+8i/GGYbCVXPIXos6Zh3+BO52h4H49KCGhy4m/jpcTAurabsSDpiFQ1jOpxvSNVIOCcx9afDxiFVKzE0kjKMjFJUzLbjYFwKJuGcxKyb0jhucL4hhdZgIzHL5TMOxqXNyThwN/Olw/mGNsRPQKclHLib6dJh43iJNSiwDr0mMTSSnlgFy6yfh/lGQq/5jEJqGN7oGb0l4ZzEjIuD8fEahtfsHQqnZ1i8Dp2jwUZivkzH8N6wcYxXdh8Pr50CyTx982d0rowDd+N841KZp69jRmF84suLQJdJOHDfY8o1lNGlWrAGRZQ4fUF8ja6RcOC+9TjfmEbi9OubUfVaTi/e92gKCeckNg69puX069yiqiVev0i8QKbVYCOJnO5+DTWdhDUorJbXL5gbNIfMl3MnU3fjyGgOH7AGhVQ6e8/ri+cngHklHLjXnBYbx9x6rEEhNYy7iL5Dc0s4cK8lzjeW0zBuTW5QdXrGLV6PllKKVoONxMaxDz3WoJAazru4Mlpaw29P5pr50uFgfA0N1qCwzi1MHVpLxjnJXI0jo7WU1//c9VIFMpddcBmtKWEjmSItvpfXlrEGhVUuoEsWr0M1SAwPefbUVZhrjvONurRYg0JKXHchJlSTBhuJjSOWxHVr6hFLK2q5bvE83qRODZ4E/Dgd/qZErVqsQSElrr8wfainbpl9z0k6vE9es4Q1KKyPTHOR3qDaJfbTSEpBabFxRPABa1BYPdNdsH4CiCExXGxTrX1tjeMG34uR9FiDQmqY9uL1eJN4GrbRSHpsHBE1TPs+uEGLKRfd1BexYmqIOXDv8DZVZD3WoJAa5rmgM4osE2NO0uF7LboGa1BY5QKc68JWfIn6Gkm5x12+rul3/reh1AprUECZeS/0jLYisf7A3cH49mSsQWG1zLt4HdqihmUbSfl3lS9m2Di2p8UaFFJimYs/oa1qmHfgXv7ZGW1VwhoUVssyi+fRAttXZhG32Dh0nhZrUEiJZRauxId69iNxeSN5GIwntAcJa1BYH1hu8Upu0J4khplFz7iL+wYv8L0p7w9rUFBjLmw/AWgKDaffb+XPORjfL2tQUA3LLtxDPN5k38rx6R3ON2QNCq1nncX7jCStV4N6dJWGdRbuIRlJe9ZgDQqrY93F65C0Z9agoDLrLpyfAKR9K88LWYOCaqlj8Tok7ZE1KKhEHQv3kISkPUlYg970e+r0gbo0SNqT2mqQX+kdKVFX5y/xoR5pPxLDNW8NekONO5D31KcsnJ8ApH3I1FesrUEj9dTV+av+BCBpctagkWrbgTTUO7AuC/cOSVvWUHcNatCLau38D+mQtGW11yCPWHpBQ90L95CMpC0qdxisQUF1xFi8DklbZA0KqpYjA8YmI2lLEtagsFpiLd4nJG1JS6wa1KFfJGIt3EP8Sq+0DYlYtechiZXV8DXeD8TkQz3SNkStQQ07l4jV8R/HBwul+BL1f3W32hq09g4k8oN5Hi0gxZeJe9r27mtQT6yOf+oTgKS4emLVnKp2IWvuQBri/86GRwtIcTVYg8LqidXpX0qHpIh6YtWal7K7402iHBkwNhlJkWRi1Rhr0CMdsRbnrXRIiqRcsxFqizXomUSshRmbjKQIErFqS7U1aI0h+ge26T2SIthqDfqWhf2OZSWGwdVW/emYfyOpVglr0GS+YlkfGU7e3ar/O+YOSbWyBgWVGB56qfX+4RTxeBOpXontfHW3ihq05Awks/3i6vEmUr0y8R8cfMuiNWjJGUjP9hevKPcf/4Sk2uypBv2ZBWYhS+1AGvaxcIXHm0j1adhXDYp8UO0XSuev/d7hlOmQVJOeWDXEGnQvE+uFnyoZSTXIxKod1qBHOmK96FOlQ1INyrUYoWZYg55JxHrBp05G0poSsWpGqBo09xD9A/v2HklrsgbNaM6v8Sa2fWTAWB5vIq0jYQ0qZqtBcx5lsvUjA8byeBNpHdagQbgalNjf1+ZeisebSMsr11xPrFoRrgbNNQPJ7Oehnbd4vIm0vPIgXULFbDVorhlIj4v3mMebSMuyBj01Sw2aYwfS4MI9t7mjBaSKNViDngtzxFLp/LXfE1wjHZKW0BOrNliD7mVivaBLJyNpTplYNcEa9EhHrBdz6XRImtMnYtUEa9C9RKwXcq1kJM0hEasWhK9BUw7RP6Ax3iNpDtagcSarQVN9jTfhkQFjLfZrYdKOJKxB55jkeJOpjjLxyIDx/gOPN5GmZg06TzU1KOHX5s6Nx5tI0+qJVQM2UYOmmIFkfGjnXB5vIk2nwRp0rklq0BQzkB4X7xIHhlmIpOtYgy5z9fEm1+5AGly4SyX8Sq90rQZr0KVWP96kdP7a7/XVnA5J1/hMrGveGnQvE/MFqy0ZSZfIxLrWN1eDrrmF9S2awgckXeI9msLiNSgRq8PWHr+/Lp0nEesarz2ZC1y6A/mAptQg6RzWoGm95wKXfI034ZEBU/N4E2m8hDVoahfVoEt2IHb+6flgoTSe89fpXVSDzt2BlH9J+dpcQlNzFyKNU3YfCU3t7Bp07g6k/K53QnMI85vF0ooarEFzOXsXcu4OxM4/rwMebyK9xho0r7OONzlnB9Lgws0t4YOF0ksarEFzK7uQd8zAIwOWSYekU8q1EeEatgY9k4GfzWLJSHosE+sa3kUNGnsL6z1a0gckPWYNWtaoGjRmiJ7woZ01lGH6AUkJa9Aavjnmp9f+hjE7ED8Nr8MHC6WBNWgdzVt/w1s7kISdfy0+WChZg9b0Zg16awfikQHr8XgTydnHmt6sQW/tQErnT2gt7kK0d9agdb1ag17bgTS4cGvzeBPtWYM1aG2v7kJe24HY+etwwONNtE/WoDoceKEGvbQDaXDhapHwwULtT4M1qBaJF2rQSw3EwVVd/Bqj9sYaVJfRNSgT65F7jxaQtiUT69rcbQ06tQN5j2r0AWkfrEF1+qIGPR+iJ3xop2Yeb6KtS1iDavakBn317C9+POZrVKvS8P8HabusQXV7UoMe70ASdv7a+WChtixhDardkxr0eAbifcf6ebyJtswaVL8nNejxDqR0/oRq5y5EW2UNiuHXGvSwA2lw4aLweBNtUYM1KIpfa9DDDsTOH0v5kZdvkLbDGhTL4Zg/lx1IgwsXTfmWSkbahgZrUDTpmFwaiIOrmHywUFthDYrpw5iftJUk6QulgfwdRfQD0jZYg2L6wSF6TAf8jRBty2d8Aj2SA/dD9OIfKBI/sWlr3FHH8ksNetiBlO/19vf/qbodcPeh7bEGxXHgvgY97EDKk4XuQmLwk5q2yBoUx6816PFRJn4CiMEj3bVV1qAYfq1Bj7/GWz4B+Om2brfYPLRd1qD63fKoBvmDUrG4+9DWJaxBNXtSg54/SFj+wh2q0S02D23fAWtQrW55VoNOPYnuV0Tr5NZee2ENqtPoGtQd87OpJh3SvpT3fIRrcy/5zAkvnYXlJ4C6uPvQ3liD6nLyK9a/e+W/0OPxJjU44IOD2idrUB0OvFCDXjuN10+9dfCTmPbKBwvr8GINem0H4kM96zvg7kP7ZQ1a34FXatBrOxCPFlifu0DtmTVofT++9hdf24EUfgJYlw8Oau+sQet6tQa99YuEHi2wnltsHpI1aD23vFGD3tqBFOVHXj6jpbn7kAYJjzdZw5s1aMxvov+ERwss7Rabh/TggDVoaf9kwhqUifXUZPRkJD2WiXUNW4Oe6YCfzezpkHRKuTYiXMPR0zPSmFtYD/w63TIcGEqn+VDtMka/zmOG6I/9C79ON6cDPjgovaZ8Ok5oLgfOqEFfcZ4/4P35Of2N4UsLkk4rH3r/E83lrBp07g7Eh3rmc8Ddh/QWa9B8Dsd8w/DszSjnzEDAowXm9COS3mINms8dZzSP4twdSOEngHn44KA0jjVoHmfXoHN3IEXpUP9EU7rF5iGN5fEm07tlwRqUifW95tqTkHSOcsRSpGvcGvRMR+wXrJa0SLpER6xrvdZ0rCAT88WqLRlJl8jEutatQc90xHzBaomnHEvX6Yh1zdeWnitcMkR/zEHWdfw6onQdjze5zlWv3yVf433O400uc8AHB6UplE/RCZ3rwJU16NodSOGn6Mv4yUmahndCLlNFDSq7j7ILiXC/r6b7ju7apGlYgy6rQYkrTbED8WiB891x5pEBkl5kDTrfHRM8ODjFDKR4+ASgcTy2RJqWx5ucZ5IaNMUOpPBogfFusXlIU/OIpfFuqbAGZWLdA1wrCUlzyMSqBdagZzpivZBLxyPbpXl1xKoJS6ejYplYL+bSyUiaUyZWTbAGPdMR6wVdKj2SltARqzaErUFTDdEfc5h+mg8OSsvwK72nTV6Dpvoa73Meb/LUAY8tkZZkDXrqwAw16Cvm8Qe83//Y3475CUlLsQY9FaoGebTA0/uOCUlLsgYtUIPmmIEUHi3wmzt8cFBamjXoN3fMVIPmmoEUHm8y8NgSaR3WoMFsNWiuHUjh8SYeWyKtyRoUvAZlYt0rnDoJSWvKxKoZ1qBnOmK94FOlQ1INyrUYoWZYg07IxHrRp0pGUg0ysWrHVHnHRnTEeuGvTY+kmnTEqiEhatCcQ/TH9jbI8tgSqS7WoBnM+TXex/b0a2EHPLZEqtFejjc5sFANmusok+f+l/0cLeCxJVKdrEGB7eFogR6/uivVag81qPz/SyxkqRlIsYejBe7wwUGpVnuoQeVXTw8sZKkZyIOtHy3gsSVS3axBE1pyB1KUTwBb/V3wW2weUu22fLzJLTuoQZlY9xTHJiEpgkys2mINeqYj1sK8lQ5JkZRrNkJtsQadkIm1OG8lIymSTKwa81YaduYzsRbopfRIiqgjVq2prgYtPUR/bCuDLI8tkWKyBgW2hYd6eiRFZg26wpo7kC081OPuQ4oteg3a9e++R96FLHpkgKRZRL8TkljRmjuQIvIuZNEjAyTNInINusUa9EsHjdTxq+j8kiZTdiGRao816JmOWAvXImlLPmENCisTa/ESkrYkE6sGfY2e6IixcB2StsgaFFgmxuK9Q9IWZWLUoAad1FP3wvVI2rIOa9Boa3+N97nav07ng4PSttV+vIk16BU1P9RTVeeXNAtr0Blq24HU/FCPnV/avppr0FZ/SXFStX4CSEjaA2vQSLXtQIoaf7P4Fo8MkPaixl3ILdag0RJ2fknrSViDQuuoY+FaJO2RNSiwTB2L55EB0j5l6qhBGV2kY92F65C0Z9agwDLrLl6DpD3LWINC61ln4XokCT5jDTqpxq/xPrfW1+l8cHDfMsPtg5IG7dlajxVYgyawxkM91Xd+zaK8177j9CfO8p5o0B5Zg4K7YdnF+w7tSSkQHxhXJPr7vzehPblh2Rp0gyaz9CeAhPYgMb5xnEqL75W9sAYF9z3LLFyLti4z7dczW/yu/h5YgwJLLLN4CW1VZt7v9Zd/doO2KmENCq1coHMuXIu25mEw3rPMxf8zDty3rMMaFFZm3sXLaCvOGYzP2Ug+4ifKLclYg0LrmGfhOrQFiaFo1/Z7Di02kq0otcIaFFRmnsVrUGSZek5PfauRZBRZxhoUWs+0C9ejqDIxGsfzlIcVGxRVjzUorBumXbwGRbLGYHyu9Azvvz+iSMr7zxoU1JQP9fQoihoG43M2khbnJFFYg4K7YZrFu0G1S9Q5GJ8rLTaSCG6YZr0/osVN9QkgoVplYs43psonHLjXzBoU3LVHC7SoRpl9N47n6fEeea2sQYGV3yu/ZvESqsXDfKMUy5/Nq40koVokrEGhdVy2cC2qwZYH43M2khaLTy06rEFhZS5bvIzWVHaPexqMz5UWG8naMtag0DrOW7gOrSXjfGOOdFiQ1lRe/3PW6zOqRua8xWvQ0jI2jiXS4/t7DRlrUGhjb4X0aCnON9ZvJAktpbzmY9dGlblh3OI1aG42jroaSYuNZAljjzdpUHXGPNTTozllhmJl46gzLTaSOY2tQZ57VqkbXl+8GzSHjPONSOlw4D6XG3iziatSb30CSGhK77BxRE6Pt1OmZg0K7hbs/DNyvrHdRpLQFF463qRF1cucXryErmHj2H7K2rZ4rVzrpSOWEgqh4+nCtehSmeH1q7HgmfnSYsG7RsfT1/NHFEbm6eJldK6M8w0zvAfKrEvnyTx9HTMKpbzxy8J5ZMB5Gmwc5sv0OHA/V8dvr52CKZ+ayuI16C3ON8zY9DhwHyszvGYNCsndx+tsHObSOHAfp0Nh+cTnaRkH42a6tAzfPNKXrEHajIzzDTNfOrxdI21Og43DLJceG4kUmvMNs3b6Y77FOYkUho3D1JgWG4lUrXzMJ2IVFbO/tPhQnVSNjPMNEy8dzkmkVZTbVA02DhM/PTaSsH6HIimNowwlv8Pvl2tbDgw/xfDD/R9LmkjCwbjZT1ocuEtXy3ibyuw3LQ7cpbNlbBzGPKTDOYn0qjLTKLONcgBklAvbmCXTYyOpikP09TkYl85zwIG7di4d8xEH48ZckxYH7tqRjPMNY6ZOiwN3bVjGxmHM3CkzxAZpAx4G4z2xLkJjoqfHRjI7h+jzcDAu1eFwzI/H/AMH7pOzgUwrMTSOBhuHVJvbY/6OjUSVyTjfMCZKys8eZKSVZWwcxkRNj3OSq3gL63IdfoqRtuCnY/5yzL/RWX6PLlXecH9lePNJiueO4Rr+BpvHRdyBTCMzbIXfI6l2dwzD9Dt0FRvItNIxN9hIpBrdMpyfdYcmYQOZR2J4BuS/8JweaU3l1lR5BuR7vE2lgBp8Et2YpVMOKb3B57Fm5Q5kOQ3Dra2MpLncMew4fkTaoMxwLzbKJzljIqTDD2eLcweynoQDd+kaZaZRdhoOxldiA1lf4rfbWwlJb3EwLp3Q4MDdmJfS42BcelODZ2wZ85AO5xvS2TIO3M1+02HjkK6WsJGYfaQ8v1FmG1+j6jlEjyXhwF3b5GBcWlCDA3cTPz3+9LO0mgYH7iZeOpxvSNUo94xviVVEzP7SYeOQqpWwkZi68jAYT0gKITHcW+6JVWzMduKJuNIGNNhIzHLpcTAubc47HLib+dLhfEPavIRzEjNdOmwc0u4khkZS7lVHKVamjjgYl/SLhHMSMy4OxiW9qMFGYr7MZxyMSxop48DdON+QdIWEA3cbhyRdITE0kp5YhdCMz8N8IyFJM0g4J9lq43C+IWkxDTaSyOlwMC5pZRkH7tEaR0aSKpJw4F5zPmHjkFS5hAP3WuJ8Q1JICeckNg5JulKDjWSJdPevtY1D0uZkhnvxUQpypMaRkaQdSDhwnyItNg5JO5Vw4H5unG9I0iOlGDbYSGwcknSFBhvJ43T3r4kkaaTMvuckHc43JOkqiX01khYbhyRNKjH8DndPrIYwJs43JGkhDdtoJDYOSVpJw/B73ZGaRkmHg3FJqkImxpykw/mGJFUpUV8jKbepWmwckhRCYpgt9KzbOMr/BucbkhRUw7KNxMYhSRvTMO9P75Z/9jskSZuVmXZO0uF8Q5J2JXF5Iym3qcpDjV8jSdqtxPiBu/MNSdJJDacbSflzN9g4JElvaBhmG843pBf8P8QugRRrUGSGAAAAAElFTkSuQmCC\";\n"
  },
  {
    "path": "widget/src/config/config.ts",
    "content": "import { WidgetConfig } from \"@/types/widget-types\";\n\n/**\n * Resolve widget configuration from attributes and environment variables\n * Priority: attributes > environment variables > defaults\n */\nexport function resolveConfig(attributes: Partial<WidgetConfig>): WidgetConfig {\n  const config = {\n    backendUrl:\n      attributes.backendUrl || import.meta.env.VITE_WIDGET_BACKEND_URL || \"\",\n    apiKey: attributes.apiKey || import.meta.env.VITE_WIDGET_API_KEY || \"\",\n    agentId: attributes.agentId,\n    primaryColor: attributes.primaryColor,\n    backgroundColor: attributes.backgroundColor,\n    textColor: attributes.textColor,\n    agentName: attributes.agentName || \"Assistant\",\n    logo: attributes.logo,\n    mode: attributes.mode || \"launcher\",\n    includeCitations: attributes.includeCitations ?? false,\n  };\n\n  if (!config.backendUrl || !config.apiKey) {\n    throw new Error(\n      \"backendUrl and apiKey are required for the widget to function\",\n    );\n  }\n\n  return config;\n}\n"
  },
  {
    "path": "widget/src/index.ts",
    "content": "/**\n * Onyx Chat Widget - Entry Point\n * Exports the main web component\n */\n\nimport { OnyxChatWidget } from \"./widget\";\n\n// Define the custom element\nif (\n  typeof customElements !== \"undefined\" &&\n  !customElements.get(\"onyx-chat-widget\")\n) {\n  customElements.define(\"onyx-chat-widget\", OnyxChatWidget);\n}\n\n// Export for use in other modules\nexport { OnyxChatWidget };\nexport * from \"./types/api-types\";\nexport * from \"./types/widget-types\";\n"
  },
  {
    "path": "widget/src/services/api-service.ts",
    "content": "/**\n * API Service - Handles all communication with Onyx backend\n */\n\nimport {\n  Packet,\n  CreateSessionRequest,\n  CreateSessionResponse,\n  SendMessageRequest,\n} from \"@/types/api-types\";\n\nexport class ApiService {\n  private maxRetries = 3;\n  private retryDelay = 1000;\n\n  constructor(\n    private backendUrl: string,\n    private apiKey: string,\n  ) {}\n\n  /**\n   * Create a new chat session\n   */\n  async createChatSession(agentId?: number): Promise<string> {\n    const request: CreateSessionRequest = {};\n    if (agentId !== undefined) {\n      request.persona_id = agentId;\n    }\n\n    const response = await this.fetchWithRetry(\n      `${this.backendUrl}/chat/create-chat-session`,\n      {\n        method: \"POST\",\n        headers: this.getHeaders(),\n        body: JSON.stringify(request),\n      },\n    );\n\n    if (!response.ok) {\n      throw new Error(\n        `Failed to create session: ${response.status} ${response.statusText}`,\n      );\n    }\n\n    const data = (await response.json()) as CreateSessionResponse;\n    return data.chat_session_id;\n  }\n\n  /**\n   * Stream a message to the chat\n   * Returns an async generator of packets\n   */\n  async *streamMessage(params: {\n    message: string;\n    chatSessionId: string;\n    parentMessageId?: number | null;\n    signal?: AbortSignal;\n    includeCitations?: boolean;\n  }): AsyncGenerator<Packet, void, unknown> {\n    const request: SendMessageRequest = {\n      message: params.message,\n      chat_session_id: params.chatSessionId,\n      parent_message_id: params.parentMessageId ?? null,\n      origin: \"widget\",\n      include_citations: params.includeCitations ?? false,\n    };\n\n    const response = await this.fetchWithRetry(\n      `${this.backendUrl}/chat/send-chat-message`,\n      {\n        method: \"POST\",\n        headers: this.getHeaders(),\n        body: JSON.stringify(request),\n        signal: params.signal,\n      },\n    );\n\n    if (!response.ok) {\n      throw new Error(\n        `Failed to send message: ${response.status} ${response.statusText}`,\n      );\n    }\n\n    // Parse SSE stream\n    yield* this.parseSSEStream(response);\n  }\n\n  /**\n   * Parse Server-Sent Events stream\n   * Backend returns newline-delimited JSON packets\n   */\n  private async *parseSSEStream(\n    response: Response,\n  ): AsyncGenerator<Packet, void, unknown> {\n    const reader = response.body?.getReader();\n    if (!reader) {\n      throw new Error(\"Response body is not readable\");\n    }\n\n    const decoder = new TextDecoder();\n    let buffer = \"\";\n\n    try {\n      while (true) {\n        const { done, value } = await reader.read();\n        if (done) break;\n\n        buffer += decoder.decode(value, { stream: true });\n        const lines = buffer.split(\"\\n\");\n        buffer = lines.pop() || \"\"; // Keep incomplete line in buffer\n\n        for (const line of lines) {\n          if (line.trim()) {\n            try {\n              const rawData = JSON.parse(line);\n\n              // Check if this is a MessageResponseIDInfo (not wrapped in Packet)\n              if (\n                \"user_message_id\" in rawData &&\n                \"reserved_assistant_message_id\" in rawData\n              ) {\n                // Wrap it in a Packet structure for consistent handling\n                const packet: Packet = {\n                  obj: rawData as any,\n                };\n                yield packet;\n              } else {\n                // Regular packet with placement and obj\n                yield rawData as Packet;\n              }\n            } catch (e) {\n              // Fail fast on malformed packets - don't hide backend issues\n              throw new Error(\n                `Failed to parse SSE packet: ${line}. Error: ${e}`,\n              );\n            }\n          }\n        }\n      }\n\n      // Process any remaining data in buffer\n      if (buffer.trim()) {\n        try {\n          const rawData = JSON.parse(buffer);\n\n          // Check if this is a MessageResponseIDInfo (not wrapped in Packet)\n          if (\n            \"user_message_id\" in rawData &&\n            \"reserved_assistant_message_id\" in rawData\n          ) {\n            const packet: Packet = {\n              obj: rawData as any,\n            };\n            yield packet;\n          } else {\n            yield rawData as Packet;\n          }\n        } catch (e) {\n          // Fail fast on malformed final buffer packets\n          throw new Error(\n            `Failed to parse final packet: ${buffer}. Error: ${e}`,\n          );\n        }\n      }\n    } finally {\n      reader.releaseLock();\n    }\n  }\n\n  /**\n   * Fetch with retry logic for network failures and 5xx errors\n   */\n  private async fetchWithRetry(\n    url: string,\n    options: RequestInit,\n    retries = 0,\n  ): Promise<Response> {\n    try {\n      const response = await fetch(url, options);\n\n      // Retry on 5xx or 429 errors\n      if (!response.ok && retries < this.maxRetries) {\n        if (response.status >= 500 || response.status === 429) {\n          const delay = this.retryDelay * Math.pow(2, retries);\n          await new Promise((resolve) => setTimeout(resolve, delay));\n          return this.fetchWithRetry(url, options, retries + 1);\n        }\n      }\n\n      return response;\n    } catch (error) {\n      // Don't retry if the request was aborted by the caller\n      if (error instanceof Error && error.name === \"AbortError\") {\n        throw error;\n      }\n\n      // Retry on network errors\n      if (retries < this.maxRetries) {\n        const delay = this.retryDelay * Math.pow(2, retries);\n        await new Promise((resolve) => setTimeout(resolve, delay));\n        return this.fetchWithRetry(url, options, retries + 1);\n      }\n      throw error;\n    }\n  }\n\n  /**\n   * Get common headers for API requests\n   */\n  private getHeaders(): Record<string, string> {\n    return {\n      \"Content-Type\": \"application/json\",\n      Authorization: `Bearer ${this.apiKey}`,\n    };\n  }\n}\n"
  },
  {
    "path": "widget/src/services/stream-parser.ts",
    "content": "/**\n * Stream Parser - Processes SSE packets and updates state\n */\n\nimport { Packet, Message, SearchDocument } from \"@/types/api-types\";\nimport { ChatMessage } from \"@/types/widget-types\";\n\nexport interface ParsedMessage {\n  message: ChatMessage;\n  isComplete: boolean;\n}\n\nexport interface MessageIDs {\n  userMessageId: number | null;\n  assistantMessageId: number;\n}\n\n/**\n * Process a single packet from the SSE stream\n * Returns the current message being built and any state updates\n */\nexport function processPacket(\n  packet: Packet,\n  currentMessage: ChatMessage | null,\n): {\n  message: ChatMessage | null;\n  citation?: { citation_number: number; document_id: string };\n  documents?: SearchDocument[];\n  status?: string;\n  messageIds?: MessageIDs;\n} {\n  // Safety check - throw on malformed packets to fail fast\n  if (!packet || !packet.obj) {\n    throw new Error(\"Received malformed packet: packet.obj is missing\");\n  }\n\n  const obj = packet.obj;\n\n  // Handle MessageResponseIDInfo (doesn't have a type field)\n  if (\"reserved_assistant_message_id\" in obj && \"user_message_id\" in obj) {\n    return {\n      message: currentMessage,\n      messageIds: {\n        userMessageId: obj.user_message_id,\n        assistantMessageId: obj.reserved_assistant_message_id,\n      },\n    };\n  }\n\n  // Type guard - ensure obj has a type field\n  if (!(\"type\" in obj)) {\n    throw new Error(\"Packet missing type field\");\n  }\n\n  switch (obj.type) {\n    case \"message_start\":\n      // Start of a new assistant response\n      return {\n        message: {\n          id: `msg-${Date.now()}`,\n          role: \"assistant\",\n          content: \"\",\n          timestamp: Date.now(),\n          isStreaming: true,\n        },\n        status: \"\", // Clear status when response starts\n      };\n\n    case \"message_delta\":\n      // Append to current message\n      if (currentMessage && currentMessage.role === \"assistant\") {\n        return {\n          message: {\n            ...currentMessage,\n            content: currentMessage.content + (obj.content || \"\"),\n          },\n          // No status update - let the message speak for itself\n        };\n      }\n      return { message: currentMessage };\n\n    case \"citation_info\":\n      // Handle individual citation info packet\n      return {\n        message: currentMessage,\n        citation: {\n          citation_number: obj.citation_number,\n          document_id: obj.document_id,\n        },\n      };\n\n    case \"search_tool_start\":\n      // Tool is starting - check if it's internet search\n      return {\n        message: currentMessage,\n        status: obj.is_internet_search\n          ? \"Searching the web...\"\n          : \"Searching internally...\",\n      };\n\n    case \"search_tool_queries_delta\":\n      // Queries being generated\n      return {\n        message: currentMessage,\n        status: \"Generating search queries...\",\n      };\n\n    case \"search_tool_documents_delta\":\n      // Search results coming in — capture document metadata for citation resolution\n      return {\n        message: currentMessage,\n        documents: obj.documents,\n        status: \"Reading documents...\",\n      };\n\n    case \"open_url_start\":\n      return {\n        message: currentMessage,\n        status: \"Opening URLs...\",\n      };\n\n    case \"open_url_urls\":\n      return {\n        message: currentMessage,\n        status: \"Fetching web pages...\",\n      };\n\n    case \"open_url_documents\":\n      // Capture documents from URL fetching for citation resolution\n      return {\n        message: currentMessage,\n        documents: obj.documents,\n        status: \"Processing web content...\",\n      };\n\n    case \"image_generation_start\":\n      return {\n        message: currentMessage,\n        status: \"Generating image...\",\n      };\n\n    case \"image_generation_heartbeat\":\n      return {\n        message: currentMessage,\n        status: \"Generating image...\",\n      };\n\n    case \"python_tool_start\":\n      return {\n        message: currentMessage,\n        status: \"Running Python code...\",\n      };\n\n    case \"python_tool_delta\":\n      return {\n        message: currentMessage,\n        status: \"Running Python code...\",\n      };\n\n    case \"custom_tool_start\":\n      return {\n        message: currentMessage,\n        status: \"Running custom tool...\",\n      };\n\n    case \"reasoning_start\":\n      return {\n        message: currentMessage,\n        status: \"Thinking...\",\n      };\n\n    case \"reasoning_delta\":\n      return {\n        message: currentMessage,\n        status: \"Thinking...\",\n      };\n\n    case \"deep_research_plan_start\":\n      return {\n        message: currentMessage,\n        status: \"Planning research...\",\n      };\n\n    case \"research_agent_start\":\n      return {\n        message: currentMessage,\n        status: \"Researching...\",\n      };\n\n    case \"intermediate_report_start\":\n      return {\n        message: currentMessage,\n        status: \"Generating report...\",\n      };\n\n    case \"stop\":\n    case \"overall_stop\":\n      // End of stream - mark message as complete\n      if (currentMessage) {\n        return {\n          message: {\n            ...currentMessage,\n            isStreaming: false,\n          },\n        };\n      }\n      return { message: currentMessage };\n\n    case \"error\":\n      // Error occurred during streaming - throw to fail fast\n      throw new Error(`Stream error: ${obj.exception}`);\n\n    default:\n      // Unknown packet type\n      return { message: currentMessage };\n  }\n}\n\n/**\n * Convert API Message type to widget ChatMessage\n */\nexport function convertMessage(msg: Message): ChatMessage {\n  return {\n    id: msg.id,\n    role: msg.role,\n    content: msg.content,\n    timestamp: msg.timestamp,\n    isStreaming: msg.isStreaming,\n  };\n}\n\n/**\n * Check if a packet is the final packet in a stream\n */\nexport function isStreamComplete(packet: Packet): boolean {\n  return \"type\" in packet.obj && packet.obj.type === \"overall_stop\";\n}\n\n/**\n * Check if a packet is an error\n */\nexport function isStreamError(packet: Packet): boolean {\n  return \"type\" in packet.obj && packet.obj.type === \"error\";\n}\n"
  },
  {
    "path": "widget/src/styles/colors.ts",
    "content": "import { css } from \"lit\";\n\nexport const colors = css`\n  :host {\n    /* Base Colors - Aligned with web/src/app/css/colors.css */\n    --grey-100: #000000;\n    --grey-10: #e6e6e6;\n    --grey-00: #ffffff;\n    --alpha-grey-100-75: #000000bf;\n    --alpha-grey-100-20: #00000033;\n\n    /* Onyx Brand Colors */\n    --onyx-ink-100: #000000;\n    --onyx-ink-95: #1c1c1c;\n\n    /* Theme / Primary - Configurable via env vars */\n    --theme-primary-06: var(--onyx-ink-100);\n    --theme-primary-05: var(--onyx-ink-95);\n\n    /* Background / Neutral */\n    --background-neutral-00: var(--grey-00);\n    --background-neutral-03: var(--grey-10);\n\n    /* Text */\n    --text-04: var(--alpha-grey-100-75);\n    --text-light-05: var(--grey-00);\n\n    /* Border */\n    --border-01: var(--alpha-grey-100-20);\n\n    /* Shadow */\n    --shadow-02: 0px 2px 12px rgba(0, 0, 0, 0.1);\n\n    /* Status / Error */\n    --status-error-01: #fee;\n    --status-error-05: #c00;\n  }\n`;\n"
  },
  {
    "path": "widget/src/styles/theme.ts",
    "content": "import { css } from \"lit\";\nimport { colors } from \"./colors\";\n\n/**\n * Onyx Design System - Theme\n * Typography, spacing, and layout tokens from Figma\n */\nexport const theme = css`\n  ${colors}\n\n  :host {\n    /* Typography - Hanken Grotesk */\n    --onyx-font-family: \"Hanken Grotesk\", -apple-system, BlinkMacSystemFont,\n      \"Segoe UI\", sans-serif;\n    --onyx-font-family-mono: \"DM Mono\", \"Monaco\", \"Menlo\", monospace;\n\n    /* Font Sizes */\n    --onyx-font-size-small: 10px;\n    --onyx-font-size-secondary: 12px;\n    --onyx-font-size-sm: 13px;\n    --onyx-font-size-main: 14px;\n    --onyx-font-size-label: 16px;\n\n    /* Line Heights */\n    --onyx-line-height-small: 12px;\n    --onyx-line-height-secondary: 16px;\n    --onyx-line-height-main: 20px;\n    --onyx-line-height-label: 24px;\n    --onyx-line-height-section: 28px;\n    --onyx-line-height-headline: 36px;\n\n    /* Font Weights */\n    --onyx-weight-regular: 400;\n    --onyx-weight-medium: 500;\n    --onyx-weight-semibold: 600;\n\n    /* Content Heights */\n    --onyx-height-content-secondary: 12px;\n    --onyx-height-content-main: 16px;\n    --onyx-height-content-label: 18px;\n    --onyx-height-content-section: 24px;\n\n    /* Border Radius - from Figma */\n    --onyx-radius-04: 4px;\n    --onyx-radius-08: 8px;\n    --onyx-radius-12: 12px;\n    --onyx-radius-16: 16px;\n    --onyx-radius-round: 1000px;\n\n    /* Spacing - Block */\n    --onyx-space-block-1x: 4px;\n    --onyx-space-block-2x: 8px;\n    --onyx-space-block-3x: 12px;\n    --onyx-space-block-4x: 16px;\n    --onyx-space-block-6x: 24px;\n\n    /* Spacing - Inline */\n    --onyx-space-inline-0: 0px;\n    --onyx-space-inline-0_5x: 2px;\n    --onyx-space-inline-1x: 4px;\n\n    /* Legacy spacing aliases (for compatibility) */\n    --onyx-space-2xs: var(--onyx-space-block-1x);\n    --onyx-space-xs: var(--onyx-space-block-2x);\n    --onyx-space-sm: var(--onyx-space-block-3x);\n    --onyx-space-md: var(--onyx-space-block-4x);\n    --onyx-space-lg: var(--onyx-space-block-6x);\n\n    /* Padding */\n    --onyx-padding-icon-0: 0px;\n    --onyx-padding-icon-0_5x: 2px;\n    --onyx-padding-text-0_5x: 2px;\n    --onyx-padding-text-1x: 4px;\n\n    /* Icon Weights (stroke-width) */\n    --onyx-icon-weight-secondary: 1px;\n    --onyx-icon-weight-main: 1.5px;\n    --onyx-icon-weight-section: 2px;\n\n    /* Z-index */\n    --onyx-z-launcher: 9999;\n    --onyx-z-widget: 10000;\n\n    /* Transitions */\n    --onyx-transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);\n    --onyx-transition-base: 200ms cubic-bezier(0.4, 0, 0.2, 1);\n  }\n\n  * {\n    box-sizing: border-box;\n  }\n`;\n"
  },
  {
    "path": "widget/src/styles/widget-styles.ts",
    "content": "import { css } from \"lit\";\n\n/**\n * Onyx Chat Widget - Component Styles\n * All styling for the main widget component\n */\nexport const widgetStyles = css`\n  :host {\n    display: block;\n    font-family: var(--onyx-font-family);\n  }\n\n  .launcher {\n    position: fixed;\n    background: var(--background-neutral-00);\n    bottom: 20px;\n    right: 20px;\n    width: 56px;\n    height: 56px;\n    border-radius: 50%;\n    color: var(--text-light-05);\n    border: none;\n    cursor: pointer;\n    box-shadow: var(--shadow-02);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    z-index: var(--onyx-z-launcher);\n    transition:\n      transform 200ms cubic-bezier(0.4, 0, 0.2, 1),\n      box-shadow 200ms cubic-bezier(0.4, 0, 0.2, 1),\n      background 200ms cubic-bezier(0.4, 0, 0.2, 1);\n  }\n\n  .launcher img {\n    filter: drop-shadow(0px 1px 2px rgba(255, 255, 255, 0.3));\n  }\n\n  .launcher:hover {\n    transform: translateY(-2px);\n    background: var(--background-neutral-03);\n    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.2);\n  }\n\n  .launcher:active {\n    transform: translateY(0px);\n    box-shadow: var(--shadow-02);\n  }\n\n  .container {\n    position: fixed;\n    bottom: 20px;\n    right: 20px;\n    width: 400px;\n    height: 600px;\n    background: var(--background-neutral-00);\n    border-radius: var(--onyx-radius-16);\n    box-shadow: var(--shadow-02);\n    display: flex;\n    flex-direction: column;\n    overflow: hidden;\n    z-index: var(--onyx-z-widget);\n    border: 1px solid var(--border-01);\n    animation: fadeInSlideUp 300ms cubic-bezier(0.4, 0, 0.2, 1) forwards;\n    opacity: 0;\n    transform: translateY(20px);\n  }\n\n  @keyframes fadeInSlideUp {\n    to {\n      opacity: 1;\n      transform: translateY(0);\n    }\n  }\n\n  .container.inline {\n    position: static;\n    width: 100%;\n    height: 100%;\n    border-radius: var(--onyx-radius-08);\n    animation: none;\n    opacity: 1;\n    transform: none;\n  }\n\n  .container.inline.compact {\n    background: transparent;\n    border: none;\n    box-shadow: none;\n    border-radius: var(--onyx-radius-16);\n  }\n\n  @media (max-width: 768px) {\n    .container:not(.inline) {\n      position: fixed;\n      inset: 0;\n      width: 100vw;\n      height: 100vh;\n      border-radius: 0;\n      bottom: 0;\n      right: 0;\n    }\n  }\n\n  .header {\n    display: flex;\n    align-items: center;\n    justify-content: space-between;\n    padding: var(--onyx-space-md);\n    background: var(--background-neutral-00);\n    color: var(--text-04);\n    border-bottom: 1px solid var(--border-01);\n  }\n\n  .header-left {\n    display: flex;\n    align-items: center;\n    gap: var(--onyx-space-sm);\n  }\n\n  .header-right {\n    display: flex;\n    align-items: center;\n    gap: var(--onyx-space-xs);\n  }\n\n  .avatar {\n    width: 32px;\n    height: 32px;\n    border-radius: 50%;\n    background: var(--background-neutral-00);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    font-size: 18px;\n  }\n\n  .header-title {\n    font-weight: 600;\n    font-size: var(--onyx-font-size-label);\n    line-height: var(--onyx-line-height-label);\n    color: var(--text-04);\n  }\n\n  .icon-button {\n    background: none;\n    border: none;\n    color: var(--text-04);\n    cursor: pointer;\n    padding: var(--onyx-space-xs);\n    border-radius: var(--onyx-radius-08);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    transition:\n      background var(--onyx-transition-fast),\n      color var(--onyx-transition-fast);\n    font-size: 18px;\n    width: 32px;\n    height: 32px;\n  }\n\n  .icon-button:hover {\n    background: var(--background-neutral-00);\n    color: var(--text-04);\n  }\n\n  .messages {\n    flex: 1;\n    overflow-y: auto;\n    padding: var(--onyx-space-md);\n    display: flex;\n    flex-direction: column;\n    gap: var(--onyx-space-md);\n    background: var(--background-neutral-00);\n  }\n\n  .message {\n    display: flex;\n    flex-direction: column;\n    gap: var(--onyx-space-xs);\n  }\n\n  .message.user {\n    align-items: flex-end;\n  }\n\n  .message.assistant {\n    align-items: flex-start;\n  }\n\n  .message-bubble {\n    max-width: 85%;\n    padding: var(--onyx-space-sm) var(--onyx-space-md);\n    border-radius: var(--onyx-radius-12);\n    word-wrap: break-word;\n    font-size: var(--onyx-font-size-main);\n    line-height: var(--onyx-line-height-main);\n  }\n\n  .message.user .message-bubble {\n    background: var(--onyx-user-message-bg);\n    color: var(--text-04);\n    border: 1px solid var(--border-01);\n  }\n\n  .message.assistant .message-bubble {\n    background: var(--onyx-assistant-message-bg);\n    color: var(--text-04);\n    border: 1px solid var(--border-01);\n  }\n\n  /* Markdown styles */\n  .message-bubble :first-child {\n    margin-top: 0;\n  }\n\n  .message-bubble :last-child {\n    margin-bottom: 0;\n  }\n\n  .message-bubble p {\n    margin: 0.5em 0;\n  }\n\n  .message-bubble code {\n    background: rgba(0, 0, 0, 0.08);\n    padding: 2px 4px;\n    border-radius: 3px;\n    font-family: \"Monaco\", \"Courier New\", monospace;\n    font-size: 0.9em;\n  }\n\n  .message-bubble pre {\n    background: rgba(0, 0, 0, 0.08);\n    padding: var(--onyx-space-sm);\n    border-radius: var(--onyx-radius-sm);\n    overflow-x: auto;\n    margin: 0.5em 0;\n  }\n\n  .message-bubble pre code {\n    background: none;\n    padding: 0;\n  }\n\n  .message-bubble ul,\n  .message-bubble ol {\n    margin: 0.5em 0;\n    padding-left: 1.5em;\n  }\n\n  .message-bubble li {\n    margin: 0.25em 0;\n  }\n\n  .message-bubble a {\n    color: var(--theme-primary-05);\n    text-decoration: underline;\n  }\n\n  .message-bubble a:hover {\n    text-decoration: none;\n  }\n\n  .message-bubble h1,\n  .message-bubble h2,\n  .message-bubble h3,\n  .message-bubble h4,\n  .message-bubble h5,\n  .message-bubble h6 {\n    margin: 0.5em 0 0.25em 0;\n    font-weight: 600;\n  }\n\n  .message-bubble h1 {\n    font-size: 1.5em;\n  }\n  .message-bubble h2 {\n    font-size: 1.3em;\n  }\n  .message-bubble h3 {\n    font-size: 1.1em;\n  }\n\n  .message-bubble blockquote {\n    border-left: 3px solid var(--border-01);\n    margin: 0.5em 0;\n    padding-left: var(--onyx-space-md);\n    color: var(--text-04);\n  }\n\n  .message-bubble strong {\n    font-weight: 600;\n  }\n\n  .message-bubble em {\n    font-style: italic;\n  }\n\n  .message-bubble hr {\n    border: none;\n    border-top: 1px solid var(--border-01);\n    margin: 0.5em 0;\n  }\n\n  .status-container {\n    display: flex;\n    align-items: center;\n    gap: var(--onyx-space-sm);\n  }\n\n  .typing-indicator {\n    display: flex;\n    gap: 4px;\n  }\n\n  .typing-dot {\n    width: 8px;\n    height: 8px;\n    border-radius: 50%;\n    background: var(--text-04);\n    animation: typing 1.4s infinite;\n  }\n\n  .typing-dot:nth-child(2) {\n    animation-delay: 0.2s;\n  }\n\n  .typing-dot:nth-child(3) {\n    animation-delay: 0.4s;\n  }\n\n  @keyframes typing {\n    0%,\n    60%,\n    100% {\n      opacity: 0.3;\n      transform: translateY(0);\n    }\n    30% {\n      opacity: 1;\n      transform: translateY(-4px);\n    }\n  }\n\n  .status-text {\n    color: var(--text-04);\n    font-size: var(--onyx-font-size-sm);\n    font-style: italic;\n  }\n\n  .input-wrapper {\n    border-top: 1px solid var(--border-01);\n    background: var(--background-neutral-00);\n  }\n\n  .input-container {\n    padding: var(--onyx-space-md) var(--onyx-space-md) 4px;\n    display: flex;\n    align-items: center;\n    gap: var(--onyx-space-xs);\n  }\n\n  .input {\n    flex: 1;\n    min-width: 0;\n    padding: var(--onyx-space-xs) var(--onyx-space-sm);\n    border: 1px solid var(--theme-primary-05);\n    border-radius: var(--onyx-radius-08);\n    font-size: var(--onyx-font-size-main);\n    line-height: var(--onyx-line-height-main);\n    outline: none;\n    font-family: var(--onyx-font-family);\n    background: var(--background-neutral-00);\n    color: var(--text-04);\n    transition:\n      border-color var(--onyx-transition-fast),\n      box-shadow var(--onyx-transition-fast);\n    height: 36px;\n  }\n\n  .input:focus {\n    border-color: var(--theme-primary-05);\n    outline: 2px solid var(--theme-primary-05);\n    outline-offset: -2px;\n  }\n\n  .powered-by {\n    font-size: 10px;\n    color: var(--text-04);\n    opacity: 0.5;\n    text-align: center;\n    padding: 0 var(--onyx-space-md) var(--onyx-space-xs);\n  }\n\n  .powered-by a {\n    color: var(--text-04);\n    text-decoration: none;\n    transition: opacity var(--onyx-transition-fast);\n  }\n\n  .powered-by a:hover {\n    opacity: 0.8;\n    text-decoration: underline;\n  }\n\n  .send-button {\n    background: var(--theme-primary-05);\n    border: none;\n    color: var(--text-light-05);\n    cursor: pointer;\n    padding: var(--onyx-space-sm);\n    border-radius: 50%;\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    transition:\n      background var(--onyx-transition-fast),\n      transform var(--onyx-transition-fast);\n    flex-shrink: 0;\n    width: 36px;\n    height: 36px;\n  }\n\n  .send-button svg {\n    width: 18px;\n    height: 18px;\n  }\n\n  .send-button:hover:not(:disabled) {\n    background: var(--theme-primary-06);\n    transform: scale(1.05);\n  }\n\n  .send-button:active:not(:disabled) {\n    transform: scale(0.95);\n  }\n\n  .send-button:disabled {\n    opacity: 0.5;\n    cursor: not-allowed;\n  }\n\n  .disclaimer {\n    padding: var(--onyx-space-xs) var(--onyx-space-md);\n    background: var(--background-neutral-00);\n    color: var(--text-04);\n    font-size: 11px;\n    line-height: 1.3;\n    text-align: center;\n    border-bottom: 1px solid var(--border-01);\n  }\n\n  .error {\n    padding: var(--onyx-space-md);\n    background: var(--status-error-01);\n    color: var(--status-error-05);\n    border-radius: var(--onyx-radius-08);\n    margin: var(--onyx-space-md);\n    font-size: var(--onyx-font-size-main);\n  }\n\n  /* Compact inline mode (no messages) */\n  .container.compact {\n    height: auto;\n    min-height: unset;\n    border: none;\n    box-shadow: none;\n    background: transparent;\n  }\n\n  .compact-input-container {\n    display: flex;\n    align-items: center;\n    gap: var(--onyx-space-sm);\n    padding: var(--onyx-space-md);\n    background: var(--background-neutral-00);\n    border-radius: var(--onyx-radius-16);\n    border: 1px solid var(--border-01);\n    box-shadow: var(--shadow-02);\n    transition:\n      border-color var(--onyx-transition-base),\n      box-shadow var(--onyx-transition-base);\n  }\n\n  .compact-input-container:focus-within {\n    border-color: var(--text-04);\n    box-shadow:\n      var(--shadow-02),\n      0 0 0 3px var(--background-neutral-00);\n  }\n\n  .compact-avatar {\n    width: 40px;\n    height: 40px;\n    border-radius: 50%;\n    background: var(--background-neutral-00);\n    display: flex;\n    align-items: center;\n    justify-content: center;\n    flex-shrink: 0;\n    color: var(--text-light-05);\n    box-shadow: 0px 2px 8px rgba(0, 0, 0, 0.1);\n  }\n\n  .compact-input {\n    flex: 1;\n    min-width: 0;\n    padding: var(--onyx-space-sm);\n    border: none;\n    font-size: var(--onyx-font-size-label);\n    line-height: var(--onyx-line-height-label);\n    outline: none;\n    font-family: var(--onyx-font-family);\n    background: transparent;\n    color: var(--text-04);\n    font-weight: 500;\n  }\n\n  .compact-input::placeholder {\n    color: var(--text-04);\n    font-weight: 400;\n  }\n\n  /* Inline citation superscripts */\n  .message-bubble sup {\n    font-size: 0.65em;\n    color: var(--theme-primary-05);\n    font-weight: 700;\n    opacity: 0.5;\n    cursor: default;\n    letter-spacing: -0.02em;\n  }\n\n  /* Citation source row */\n  .citation-list {\n    display: flex;\n    flex-wrap: wrap;\n    align-items: stretch;\n    gap: 6px;\n    margin-top: 10px;\n  }\n\n  .citation-badge {\n    display: inline-flex;\n    align-items: center;\n    gap: 5px;\n    font-size: 12px;\n    font-weight: 500;\n    padding: 4px 10px 4px 8px;\n    border-radius: var(--onyx-radius-08);\n    background: var(--background-neutral-00);\n    color: var(--text-04);\n    text-decoration: none;\n    cursor: pointer;\n    border: 1px solid var(--border-01);\n    transition:\n      border-color 150ms ease,\n      background 150ms ease;\n    line-height: 1.2;\n    font-family: var(--onyx-font-family);\n  }\n\n  .citation-badge .citation-num {\n    font-size: 11px;\n    font-weight: 600;\n    color: var(--text-04);\n    opacity: 0.45;\n    flex-shrink: 0;\n  }\n\n  .citation-badge .citation-title {\n    overflow: hidden;\n    text-overflow: ellipsis;\n    white-space: nowrap;\n    max-width: 180px;\n    font-size: 11px;\n    opacity: 0.8;\n    text-decoration: none;\n  }\n\n  a.citation-badge,\n  a.citation-badge:visited,\n  a.citation-badge:active,\n  a.citation-badge:hover {\n    text-decoration: none !important;\n  }\n\n  a.citation-badge:hover {\n    border-color: var(--theme-primary-05);\n    background: var(--background-neutral-03);\n  }\n\n  span.citation-badge {\n    cursor: default;\n  }\n\n  .citation-more {\n    display: inline-flex;\n    align-items: center;\n    font-size: 11px;\n    font-weight: 500;\n    padding: 4px 10px;\n    border-radius: var(--onyx-radius-08);\n    background: none;\n    color: var(--text-04);\n    opacity: 0.6;\n    border: 1px dashed var(--border-01);\n    cursor: pointer;\n    font-family: var(--onyx-font-family);\n    transition:\n      opacity 150ms ease,\n      border-color 150ms ease;\n  }\n\n  .citation-more:hover {\n    opacity: 1;\n    border-color: var(--theme-primary-05);\n  }\n\n  .citation-list.expanded .citation-more {\n    display: none;\n  }\n\n  .citation-overflow {\n    display: none;\n    flex-wrap: wrap;\n    gap: 6px;\n    width: 100%;\n  }\n\n  .citation-list.expanded .citation-overflow {\n    display: flex;\n  }\n`;\n"
  },
  {
    "path": "widget/src/types/api-types.ts",
    "content": "/**\n * API Types - Mirror backend streaming_models.py packet structure\n */\n\nexport interface Packet {\n  placement?: Record<string, any>;\n  obj: PacketType;\n}\n\nexport type PacketType =\n  | MessageResponseIDInfo\n  | MessageStart\n  | MessageDelta\n  | CitationInfo\n  | SearchToolStart\n  | SearchToolQueriesDelta\n  | SearchToolDocumentsDelta\n  | OpenUrlStart\n  | OpenUrlUrls\n  | OpenUrlDocuments\n  | ImageGenerationStart\n  | ImageGenerationHeartbeat\n  | PythonToolStart\n  | PythonToolDelta\n  | CustomToolStart\n  | ReasoningStart\n  | ReasoningDelta\n  | DeepResearchPlanStart\n  | ResearchAgentStart\n  | IntermediateReportStart\n  | Stop\n  | OverallStop\n  | ErrorPacket;\n\nexport interface MessageResponseIDInfo {\n  type?: \"message_response_id_info\"; // Optional for backend compatibility\n  user_message_id: number | null;\n  reserved_assistant_message_id: number;\n}\n\nexport interface MessageStart {\n  type: \"message_start\";\n}\n\nexport interface MessageDelta {\n  type: \"message_delta\";\n  content: string;\n}\n\nexport interface CitationInfo {\n  type: \"citation_info\";\n  citation_number: number;\n  document_id: string;\n}\n\nexport interface ResolvedCitation {\n  citation_number: number;\n  document_id: string;\n  semantic_identifier?: string;\n  link?: string;\n}\n\nexport interface SearchToolStart {\n  type: \"search_tool_start\";\n  is_internet_search?: boolean;\n}\n\nexport interface SearchToolQueriesDelta {\n  type: \"search_tool_queries_delta\";\n  queries: string[];\n}\n\nexport interface SearchToolDocumentsDelta {\n  type: \"search_tool_documents_delta\";\n  documents: SearchDocument[];\n}\n\nexport interface SearchDocument {\n  document_id: string;\n  semantic_identifier: string;\n  title: string;\n  link?: string;\n}\n\nexport interface OpenUrlStart {\n  type: \"open_url_start\";\n}\n\nexport interface OpenUrlUrls {\n  type: \"open_url_urls\";\n  urls: string[];\n}\n\nexport interface OpenUrlDocuments {\n  type: \"open_url_documents\";\n  documents: SearchDocument[];\n}\n\nexport interface ImageGenerationStart {\n  type: \"image_generation_start\";\n}\n\nexport interface ImageGenerationHeartbeat {\n  type: \"image_generation_heartbeat\";\n}\n\nexport interface PythonToolStart {\n  type: \"python_tool_start\";\n}\n\nexport interface PythonToolDelta {\n  type: \"python_tool_delta\";\n  code?: string;\n}\n\nexport interface CustomToolStart {\n  type: \"custom_tool_start\";\n}\n\nexport interface ReasoningStart {\n  type: \"reasoning_start\";\n}\n\nexport interface ReasoningDelta {\n  type: \"reasoning_delta\";\n  reasoning: string;\n}\n\nexport interface DeepResearchPlanStart {\n  type: \"deep_research_plan_start\";\n}\n\nexport interface ResearchAgentStart {\n  type: \"research_agent_start\";\n}\n\nexport interface IntermediateReportStart {\n  type: \"intermediate_report_start\";\n}\n\nexport interface Stop {\n  type: \"stop\";\n}\n\nexport interface OverallStop {\n  type: \"overall_stop\";\n}\n\nexport interface ErrorPacket {\n  type: \"error\";\n  exception: string;\n}\n\nexport interface Message {\n  id: string;\n  role: \"user\" | \"assistant\";\n  content: string;\n  timestamp: number;\n  isStreaming?: boolean;\n  citations?: ResolvedCitation[];\n}\n\nexport interface ChatSession {\n  id: string;\n  created_at: string;\n  updated_at: string;\n  messages: Message[];\n}\n\nexport interface SendMessageRequest {\n  message: string;\n  chat_session_id?: string;\n  parent_message_id?: number | null;\n  origin?: string;\n  include_citations?: boolean;\n}\n\nexport interface CreateSessionRequest {\n  persona_id?: number;\n}\n\nexport interface CreateSessionResponse {\n  chat_session_id: string;\n}\n"
  },
  {
    "path": "widget/src/types/widget-types.ts",
    "content": "/**\n * Widget-specific types\n */\n\nimport { ResolvedCitation } from \"@/types/api-types\";\n\nexport interface WidgetConfig {\n  // Required\n  backendUrl: string;\n  apiKey: string;\n\n  // Optional - Assistant\n  agentId?: number;\n  agentName?: string;\n  logo?: string;\n\n  // Optional - Customization\n  primaryColor?: string;\n  backgroundColor?: string;\n  textColor?: string;\n\n  // Optional - Display\n  mode?: \"launcher\" | \"inline\";\n\n  // Optional - Citations\n  includeCitations?: boolean;\n}\n\nexport interface ChatState {\n  sessionId?: string;\n  messages: ChatMessage[];\n  isLoading: boolean;\n  error?: string;\n}\n\nexport interface ChatMessage {\n  id: string | number; // string for temporary local IDs, number for backend IDs\n  role: \"user\" | \"assistant\";\n  content: string;\n  timestamp: number;\n  isStreaming?: boolean;\n  citations?: ResolvedCitation[];\n}\n"
  },
  {
    "path": "widget/src/utils/storage.ts",
    "content": "/**\n * Session storage utilities\n */\n\nimport { ChatMessage } from \"@/types/widget-types\";\n\nconst SESSION_KEY = \"onyx-widget-session\";\nconst SESSION_TTL = 24 * 60 * 60 * 1000; // 24 hours\n\nexport interface StoredSession {\n  sessionId: string;\n  messages: ChatMessage[];\n  timestamp: number;\n}\n\n/**\n * Save session to sessionStorage\n */\nexport function saveSession(sessionId: string, messages: ChatMessage[]): void {\n  try {\n    const session: StoredSession = {\n      sessionId,\n      messages,\n      timestamp: Date.now(),\n    };\n    sessionStorage.setItem(SESSION_KEY, JSON.stringify(session));\n  } catch (e) {\n    console.warn(\"Failed to save session:\", e);\n  }\n}\n\n/**\n * Load session from sessionStorage\n * Returns null if session doesn't exist or has expired\n */\nexport function loadSession(): StoredSession | null {\n  try {\n    const data = sessionStorage.getItem(SESSION_KEY);\n    if (!data) return null;\n\n    const session: StoredSession = JSON.parse(data);\n\n    // Check if session has expired\n    if (Date.now() - session.timestamp > SESSION_TTL) {\n      clearSession();\n      return null;\n    }\n\n    return session;\n  } catch (e) {\n    console.warn(\"Failed to load session:\", e);\n    return null;\n  }\n}\n\n/**\n * Clear session from sessionStorage\n */\nexport function clearSession(): void {\n  try {\n    sessionStorage.removeItem(SESSION_KEY);\n  } catch (e) {\n    console.warn(\"Failed to clear session:\", e);\n  }\n}\n\n/**\n * Check if a session exists\n */\nexport function hasSession(): boolean {\n  try {\n    const data = sessionStorage.getItem(SESSION_KEY);\n    if (!data) return false;\n\n    const session: StoredSession = JSON.parse(data);\n\n    // Check if session has expired\n    if (Date.now() - session.timestamp > SESSION_TTL) {\n      clearSession();\n      return false;\n    }\n\n    return true;\n  } catch (e) {\n    return false;\n  }\n}\n"
  },
  {
    "path": "widget/src/widget.ts",
    "content": "/**\n * Onyx Chat Widget - Main Component\n * Orchestrates launcher/inline modes and manages widget lifecycle\n */\n\nimport { LitElement, html, TemplateResult } from \"lit\";\nimport { customElement, property, state } from \"lit/decorators.js\";\nimport { unsafeHTML } from \"lit/directives/unsafe-html.js\";\nimport { marked } from \"marked\";\nimport DOMPurify from \"dompurify\";\nimport { WidgetConfig, ChatMessage } from \"./types/widget-types\";\nimport { SearchDocument, ResolvedCitation } from \"./types/api-types\";\nimport { resolveConfig } from \"./config/config\";\nimport { theme } from \"./styles/theme\";\nimport { widgetStyles } from \"./styles/widget-styles\";\nimport { ApiService } from \"./services/api-service\";\nimport { processPacket } from \"./services/stream-parser\";\nimport { saveSession, loadSession, clearSession } from \"./utils/storage\";\nimport { DEFAULT_LOGO } from \"./assets/logo\";\n\n@customElement(\"onyx-chat-widget\")\nexport class OnyxChatWidget extends LitElement {\n  static styles = [theme, widgetStyles];\n\n  // Configuration attributes\n  @property({ attribute: \"backend-url\" }) backendUrl?: string;\n  @property({ attribute: \"api-key\" }) apiKey?: string;\n  @property({ attribute: \"agent-id\", type: Number }) agentId?: number;\n  @property({ attribute: \"primary-color\" }) primaryColor?: string;\n  @property({ attribute: \"background-color\" }) backgroundColor?: string;\n  @property({ attribute: \"text-color\" }) textColor?: string;\n  @property({ attribute: \"agent-name\" }) agentName?: string;\n  @property({ attribute: \"logo\" }) logo?: string;\n  @property() mode?: \"launcher\" | \"inline\";\n  @property({ attribute: \"include-citations\", type: Boolean })\n  includeCitations?: boolean;\n\n  // Internal state\n  @state() private isOpen = false;\n  @state() private chatSessionId?: string;\n  @state() private messages: ChatMessage[] = [];\n  @state() private isLoading = false;\n  @state() private isStreaming = false;\n  @state() private streamingStatus = \"\"; // e.g., \"Searching the web...\", \"Generating response...\"\n  @state() private error?: string;\n  @state() private inputValue = \"\";\n\n  private config!: WidgetConfig;\n  private apiService!: ApiService;\n  private abortController?: AbortController;\n  // Citation state — plain fields (not @state) since Map mutations don't trigger Lit re-renders\n  private documentMap = new Map<string, SearchDocument>();\n  private citationMap = new Map<number, string>();\n\n  constructor() {\n    super();\n    // Configure marked options\n    marked.setOptions({\n      breaks: true, // Convert \\n to <br>\n      gfm: true, // GitHub Flavored Markdown\n    });\n  }\n\n  updated(changedProperties: Map<string, any>) {\n    super.updated(changedProperties);\n\n    // Auto-scroll when messages change or streaming status changes\n    if (\n      changedProperties.has(\"messages\") ||\n      changedProperties.has(\"isStreaming\")\n    ) {\n      this.scrollToBottom();\n    }\n  }\n\n  private scrollToBottom() {\n    // Use requestAnimationFrame to ensure DOM is updated\n    requestAnimationFrame(() => {\n      const messagesContainer = this.shadowRoot?.querySelector(\".messages\");\n      if (messagesContainer) {\n        messagesContainer.scrollTop = messagesContainer.scrollHeight;\n      }\n    });\n  }\n\n  connectedCallback() {\n    super.connectedCallback();\n\n    // Resolve configuration\n    this.config = resolveConfig({\n      backendUrl: this.backendUrl,\n      apiKey: this.apiKey,\n      agentId: this.agentId,\n      primaryColor: this.primaryColor,\n      backgroundColor: this.backgroundColor,\n      textColor: this.textColor,\n      agentName: this.agentName,\n      logo: this.logo,\n      mode: this.mode,\n      includeCitations: this.includeCitations,\n    });\n\n    // Apply custom colors\n    this.applyCustomColors();\n\n    // Initialize API service\n    this.apiService = new ApiService(\n      this.config.backendUrl,\n      this.config.apiKey,\n    );\n\n    // Load persisted session\n    const stored = loadSession();\n    if (stored) {\n      this.chatSessionId = stored.sessionId;\n      this.messages = stored.messages;\n    }\n\n    // Auto-open if inline mode\n    if (this.config.mode === \"inline\") {\n      this.isOpen = true;\n    }\n  }\n\n  private applyCustomColors() {\n    // Primary color (buttons, accents)\n    if (this.config.primaryColor) {\n      this.style.setProperty(\"--theme-primary-05\", this.config.primaryColor);\n      this.style.setProperty(\n        \"--theme-primary-06\",\n        this.adjustBrightness(this.config.primaryColor, -10),\n      );\n    }\n\n    // Background color\n    if (this.config.backgroundColor) {\n      this.style.setProperty(\n        \"--background-neutral-00\",\n        this.config.backgroundColor,\n      );\n      this.style.setProperty(\n        \"--background-neutral-03\",\n        this.adjustBrightness(this.config.backgroundColor, -10),\n      );\n    }\n\n    // Text color\n    if (this.config.textColor) {\n      this.style.setProperty(\"--text-04\", this.config.textColor);\n    }\n  }\n\n  private adjustBrightness(color: string, percent: number): string {\n    const num = parseInt(color.replace(\"#\", \"\"), 16);\n    const amt = Math.round(2.55 * percent);\n    const R = (num >> 16) + amt;\n    const G = ((num >> 8) & 0x00ff) + amt;\n    const B = (num & 0x0000ff) + amt;\n    return (\n      \"#\" +\n      (\n        0x1000000 +\n        (R < 255 ? (R < 1 ? 0 : R) : 255) * 0x10000 +\n        (G < 255 ? (G < 1 ? 0 : G) : 255) * 0x100 +\n        (B < 255 ? (B < 1 ? 0 : B) : 255)\n      )\n        .toString(16)\n        .slice(1)\n    );\n  }\n\n  /**\n   * Public API: Reset conversation\n   */\n  public resetConversation() {\n    // Abort any active streaming request first\n    if (this.abortController) {\n      this.abortController.abort();\n      this.abortController = undefined;\n    }\n\n    this.messages = [];\n    this.chatSessionId = undefined;\n    this.error = undefined;\n    this.inputValue = \"\";\n    this.isStreaming = false;\n    this.isLoading = false;\n    this.streamingStatus = \"\";\n    this.documentMap.clear();\n    this.citationMap.clear();\n    clearSession();\n  }\n\n  /**\n   * Render markdown content safely.\n   * Strips [[n]](url) citation links before markdown parsing so they render\n   * as plain [n] text references. Citation badges are rendered separately.\n   * Renumbers citations to sequential display numbers (1, 2, 3...).\n   */\n  private renderMarkdown(content: string, citations?: ResolvedCitation[]) {\n    try {\n      let stripped = content;\n      if (this.config.includeCitations) {\n        if (citations?.length) {\n          // Build a map from backend citation number → sequential display number\n          const displayMap = new Map<number, number>();\n          citations.forEach((c, i) => displayMap.set(c.citation_number, i + 1));\n\n          // Replace [[n]](url) with superscript-style display number\n          stripped = stripped.replace(\n            /\\[\\[(\\d+)\\]\\]\\([^)]*\\)/g,\n            (_match, num) => {\n              const displayNum = displayMap.get(Number(num));\n              return displayNum ? `<sup>[${displayNum}]</sup>` : \"\";\n            },\n          );\n        } else {\n          // Still streaming or no citations resolved yet — strip raw links\n          stripped = stripped.replace(/\\[\\[(\\d+)\\]\\]\\([^)]*\\)/g, \"\");\n        }\n      }\n      const htmlContent = marked.parse(stripped, { async: false }) as string;\n      const sanitizedHTML = DOMPurify.sanitize(htmlContent, {\n        ADD_TAGS: [\"sup\"],\n      });\n      return unsafeHTML(sanitizedHTML);\n    } catch (err) {\n      console.error(\"Failed to parse markdown:\", err);\n      return content; // Fallback to plain text\n    }\n  }\n\n  private static readonly CITATIONS_COLLAPSED_COUNT = 1;\n\n  /**\n   * Render a single citation badge.\n   */\n  private renderCitationBadge(\n    c: ResolvedCitation,\n    displayNum: number,\n  ): TemplateResult {\n    const title = c.semantic_identifier || \"Source\";\n    const safeHref =\n      c.link && /^https?:\\/\\//i.test(c.link) ? c.link : undefined;\n    return safeHref\n      ? html`<a\n          class=\"citation-badge\"\n          href=${safeHref}\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n          title=${title}\n          ><span class=\"citation-num\">${displayNum}</span\n          ><span class=\"citation-title\">${title}</span></a\n        >`\n      : html`<span class=\"citation-badge\" title=${title}\n          ><span class=\"citation-num\">${displayNum}</span\n          ><span class=\"citation-title\">${title}</span></span\n        >`;\n  }\n\n  /**\n   * Toggle expanded state for a citation list.\n   */\n  private toggleCitationExpand(e: Event): void {\n    const container = (e.target as HTMLElement).closest(\".citation-list\");\n    if (container) {\n      container.classList.toggle(\"expanded\");\n    }\n  }\n\n  /**\n   * Render citation badges for a message.\n   * Shows first 3 inline, collapses the rest behind a \"+N more\" toggle.\n   */\n  private renderCitations(\n    citations?: ResolvedCitation[],\n  ): string | TemplateResult {\n    if (!citations?.length) return \"\";\n    const limit = OnyxChatWidget.CITATIONS_COLLAPSED_COUNT;\n    const visible = citations.slice(0, limit);\n    const overflow = citations.slice(limit);\n\n    return html`\n      <div class=\"citation-list\">\n        ${visible.map((c, i) => this.renderCitationBadge(c, i + 1))}\n        ${overflow.length > 0\n          ? html`\n              <button class=\"citation-more\" @click=${this.toggleCitationExpand}>\n                +${overflow.length} more\n              </button>\n              <div class=\"citation-overflow\">\n                ${overflow.map((c, i) =>\n                  this.renderCitationBadge(c, limit + i + 1),\n                )}\n              </div>\n            `\n          : \"\"}\n      </div>\n    `;\n  }\n\n  private toggleOpen() {\n    this.isOpen = !this.isOpen;\n  }\n\n  private close() {\n    if (this.config.mode === \"launcher\") {\n      this.isOpen = false;\n    }\n  }\n\n  private handleInput(e: InputEvent) {\n    this.inputValue = (e.target as HTMLInputElement).value;\n  }\n\n  private handleKeyDown(e: KeyboardEvent) {\n    if (e.key === \"Enter\" && !e.shiftKey) {\n      e.preventDefault();\n      this.sendMessage();\n    }\n  }\n\n  private async sendMessage() {\n    const message = this.inputValue.trim();\n    if (!message || this.isLoading || this.isStreaming) return;\n\n    // Clear input immediately\n    this.inputValue = \"\";\n\n    // Add user message\n    const userMessage: ChatMessage = {\n      id: `msg-${Date.now()}`,\n      role: \"user\",\n      content: message,\n      timestamp: Date.now(),\n    };\n    this.messages = [...this.messages, userMessage];\n\n    try {\n      this.isStreaming = true;\n      this.error = undefined;\n\n      // Create session if needed\n      if (!this.chatSessionId) {\n        this.isLoading = true;\n        this.chatSessionId = await this.apiService.createChatSession(\n          this.config.agentId,\n        );\n        this.isLoading = false;\n      }\n\n      // Get parent message ID (last assistant message with a numeric ID from backend)\n      const parentMessage = [...this.messages]\n        .reverse()\n        .find((m) => m.role === \"assistant\" && typeof m.id === \"number\");\n      const parentMessageId =\n        parentMessage && typeof parentMessage.id === \"number\"\n          ? parentMessage.id\n          : null;\n\n      // Stream response\n      this.abortController = new AbortController();\n      let currentMessage: ChatMessage | null = null;\n      let assistantMessageId: number | null = null;\n\n      for await (const packet of this.apiService.streamMessage({\n        message,\n        chatSessionId: this.chatSessionId,\n        parentMessageId,\n        signal: this.abortController.signal,\n        includeCitations: this.config.includeCitations,\n      })) {\n        const result = processPacket(packet, currentMessage);\n\n        // Capture message IDs from backend and update local messages\n        if (result.messageIds) {\n          // Update user message ID if we got one\n          if (result.messageIds.userMessageId !== null) {\n            const userMsgIndex = this.messages.findIndex(\n              (m) => m.id === userMessage.id,\n            );\n            if (userMsgIndex >= 0) {\n              // Create new array to trigger reactivity\n              const updatedMessage = {\n                ...this.messages[userMsgIndex],\n                id: result.messageIds.userMessageId,\n              };\n              this.messages = [\n                ...this.messages.slice(0, userMsgIndex),\n                updatedMessage,\n                ...this.messages.slice(userMsgIndex + 1),\n              ];\n            }\n          }\n          // Store assistant message ID to apply when message is created\n          assistantMessageId = result.messageIds.assistantMessageId;\n        }\n\n        // Update status if provided\n        if (result.status !== undefined) {\n          this.streamingStatus = result.status;\n        }\n\n        // Accumulate document metadata for citation resolution\n        if (result.documents) {\n          for (const doc of result.documents) {\n            this.documentMap.set(doc.document_id, doc);\n          }\n        }\n\n        // Accumulate citation mappings for the current message\n        if (result.citation) {\n          this.citationMap.set(\n            result.citation.citation_number,\n            result.citation.document_id,\n          );\n        }\n\n        if (result.message) {\n          // Reset per-message citation state when a new message starts\n          if (\n            result.message.isStreaming &&\n            result.message.content === \"\" &&\n            currentMessage === null\n          ) {\n            this.citationMap.clear();\n          }\n\n          currentMessage = result.message;\n\n          // Apply the backend message ID if we have it and message doesn't have a numeric ID yet\n          if (\n            assistantMessageId !== null &&\n            typeof currentMessage.id !== \"number\"\n          ) {\n            currentMessage.id = assistantMessageId;\n          }\n\n          // When message is complete, resolve citations and attach to message\n          if (!currentMessage.isStreaming && this.citationMap.size > 0) {\n            const resolved: ResolvedCitation[] = [];\n            for (const [citNum, docId] of this.citationMap) {\n              const doc = this.documentMap.get(docId);\n              resolved.push({\n                citation_number: citNum,\n                document_id: docId,\n                semantic_identifier: doc?.semantic_identifier,\n                link: doc?.link ?? undefined,\n              });\n            }\n            resolved.sort((a, b) => a.citation_number - b.citation_number);\n            currentMessage = { ...currentMessage, citations: resolved };\n          }\n\n          // Update or add message\n          const existingIndex = this.messages.findIndex(\n            (m) => m.id === currentMessage?.id,\n          );\n          if (existingIndex >= 0) {\n            this.messages = [\n              ...this.messages.slice(0, existingIndex),\n              currentMessage,\n              ...this.messages.slice(existingIndex + 1),\n            ];\n          } else {\n            this.messages = [...this.messages, currentMessage];\n          }\n\n          // Clear streaming state and persist when message is complete\n          if (!currentMessage.isStreaming) {\n            this.isStreaming = false;\n            this.streamingStatus = \"\";\n            saveSession(this.chatSessionId, this.messages);\n          }\n        }\n      }\n    } catch (err: any) {\n      console.error(\"Failed to send message:\", err);\n      if (err.name !== \"AbortError\") {\n        this.error = err.message || \"Failed to send message\";\n      }\n    } finally {\n      this.isStreaming = false;\n      this.isLoading = false;\n      this.streamingStatus = \"\";\n      this.abortController = undefined;\n    }\n  }\n\n  render() {\n    const showContainer = this.config.mode === \"inline\" || this.isOpen;\n    const hasMessages = this.messages.length > 0 || this.isStreaming;\n    const isCompactInline = this.config.mode === \"inline\" && !hasMessages;\n\n    return html`\n      ${this.config.mode === \"launcher\"\n        ? html`\n            <button\n              class=\"launcher\"\n              @click=${this.toggleOpen}\n              title=\"Open chat\"\n            >\n              <img\n                src=\"${this.config.logo || DEFAULT_LOGO}\"\n                alt=\"Logo\"\n                style=\"width: 32px; height: 32px; object-fit: contain;\"\n              />\n            </button>\n          `\n        : \"\"}\n      ${showContainer\n        ? html`\n            <div\n              class=\"container ${this.config.mode === \"inline\"\n                ? \"inline\"\n                : \"\"} ${isCompactInline ? \"compact\" : \"\"}\"\n            >\n              ${isCompactInline\n                ? this.renderCompactInput()\n                : html`\n                    ${this.renderHeader()} ${this.renderMessages()}\n                    ${this.renderInput()}\n                  `}\n            </div>\n          `\n        : \"\"}\n    `;\n  }\n\n  private renderHeader() {\n    return html`\n      <div class=\"header\">\n        <div class=\"header-left\">\n          <div class=\"avatar\">\n            <img\n              src=\"${this.config.logo || DEFAULT_LOGO}\"\n              alt=\"Logo\"\n              style=\"width: 100%; height: 100%; object-fit: contain;\"\n            />\n          </div>\n          <div class=\"header-title\">\n            ${this.config.agentName || \"Assistant\"}\n          </div>\n        </div>\n        <div class=\"header-right\">\n          <button\n            class=\"icon-button\"\n            @click=${this.resetConversation}\n            title=\"Reset conversation\"\n          >\n            <svg\n              width=\"16\"\n              height=\"16\"\n              viewBox=\"0 0 16 16\"\n              fill=\"none\"\n              stroke=\"currentColor\"\n            >\n              <path\n                d=\"M14.448 3.10983V6.77746M14.448 6.77746H10.7803M14.448 6.77746L11.6117 4.11231C10.9547 3.45502 10.142 2.97486 9.24923 2.71664C8.35651 2.45842 7.41292 2.43055 6.50651 2.63564C5.6001 2.84072 4.76042 3.27208 4.06581 3.88945C3.3712 4.50683 2.84431 5.2901 2.53429 6.16618M1 12.8902V9.22254M1 9.22254H4.66763M1 9.22254L3.8363 11.8877C4.49326 12.545 5.30603 13.0251 6.19875 13.2834C7.09147 13.5416 8.03506 13.5694 8.94147 13.3644C9.84787 13.1593 10.6876 12.7279 11.3822 12.1105C12.0768 11.4932 12.6037 10.7099 12.9137 9.83381\"\n                stroke-width=\"1.5\"\n                stroke-linecap=\"round\"\n                stroke-linejoin=\"round\"\n              />\n            </svg>\n          </button>\n          ${this.config.mode === \"launcher\"\n            ? html`\n                <button class=\"icon-button\" @click=${this.close} title=\"Close\">\n                  <svg\n                    width=\"16\"\n                    height=\"16\"\n                    viewBox=\"0 0 28 28\"\n                    fill=\"none\"\n                    stroke=\"currentColor\"\n                  >\n                    <path\n                      d=\"M21 7L7 21M7 7L21 21\"\n                      stroke-width=\"2\"\n                      stroke-linejoin=\"round\"\n                    />\n                  </svg>\n                </button>\n              `\n            : \"\"}\n        </div>\n      </div>\n    `;\n  }\n\n  private renderMessages() {\n    // Check if there's a streaming message with content\n    const hasStreamingContent = this.messages.some(\n      (m) => m.role === \"assistant\" && m.isStreaming && m.content.length > 0,\n    );\n    // Show ellipsis only when: streaming AND (has status text OR no content yet)\n    const showEllipsis =\n      this.isStreaming && (this.streamingStatus || !hasStreamingContent);\n\n    return html`\n      <div class=\"disclaimer\">\n        Responses are generated by AI and may be inaccurate\n      </div>\n      <div class=\"messages\">\n        ${this.error ? html` <div class=\"error\">${this.error}</div> ` : \"\"}\n        ${this.messages.map(\n          (msg) => html`\n            <div class=\"message ${msg.role}\">\n              <div class=\"message-bubble\">\n                ${msg.role === \"assistant\"\n                  ? html`${this.renderMarkdown(\n                      msg.content,\n                      msg.citations,\n                    )}${this.renderCitations(msg.citations)}`\n                  : msg.content}\n              </div>\n            </div>\n          `,\n        )}\n        ${showEllipsis\n          ? html`\n              <div class=\"message assistant\">\n                <div class=\"message-bubble\">\n                  <div class=\"status-container\">\n                    <div class=\"typing-indicator\">\n                      <div class=\"typing-dot\"></div>\n                      <div class=\"typing-dot\"></div>\n                      <div class=\"typing-dot\"></div>\n                    </div>\n                    ${this.streamingStatus\n                      ? html`\n                          <span class=\"status-text\"\n                            >${this.streamingStatus}</span\n                          >\n                        `\n                      : \"\"}\n                  </div>\n                </div>\n              </div>\n            `\n          : \"\"}\n      </div>\n    `;\n  }\n\n  private renderInput() {\n    return html`\n      <div class=\"input-wrapper\">\n        <div class=\"input-container\">\n          <input\n            class=\"input\"\n            type=\"text\"\n            .value=${this.inputValue}\n            @input=${this.handleInput}\n            @keydown=${this.handleKeyDown}\n            placeholder=\"Type your message...\"\n            ?disabled=${this.isLoading || this.isStreaming}\n          />\n          <button\n            class=\"send-button\"\n            @click=${this.sendMessage}\n            ?disabled=${!this.inputValue.trim() ||\n            this.isLoading ||\n            this.isStreaming}\n            title=\"Send message\"\n          >\n            <svg\n              width=\"20\"\n              height=\"20\"\n              viewBox=\"0 0 18 18\"\n              fill=\"none\"\n              stroke=\"currentColor\"\n            >\n              <path\n                d=\"M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665\"\n                stroke-width=\"2\"\n                stroke-linecap=\"round\"\n                stroke-linejoin=\"round\"\n              />\n            </svg>\n          </button>\n        </div>\n        <div class=\"powered-by\">\n          Powered by\n          <a\n            href=\"https://onyx.app\"\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            style=\"text-decoration: underline;\"\n            >Onyx</a\n          >\n        </div>\n      </div>\n    `;\n  }\n\n  private renderCompactInput() {\n    return html`\n      <div class=\"compact-input-container\">\n        <div class=\"compact-avatar\">\n          <img\n            src=\"${this.config.logo || DEFAULT_LOGO}\"\n            alt=\"Logo\"\n            style=\"width: 100%; height: 100%; object-fit: contain;\"\n          />\n        </div>\n        <input\n          class=\"compact-input\"\n          type=\"text\"\n          .value=${this.inputValue}\n          @input=${this.handleInput}\n          @keydown=${this.handleKeyDown}\n          placeholder=\"Ask ${this.config.agentName || \"Assistant\"} anything...\"\n          ?disabled=${this.isLoading || this.isStreaming}\n        />\n        <button\n          class=\"send-button\"\n          @click=${this.sendMessage}\n          ?disabled=${!this.inputValue.trim() ||\n          this.isLoading ||\n          this.isStreaming}\n          title=\"Send message\"\n        >\n          <svg\n            width=\"18\"\n            height=\"18\"\n            viewBox=\"0 0 18 18\"\n            fill=\"none\"\n            stroke=\"currentColor\"\n          >\n            <path\n              d=\"M8 2.6665V13.3335M8 2.6665L4 6.6665M8 2.6665L12 6.6665\"\n              stroke-width=\"2\"\n              stroke-linecap=\"round\"\n              stroke-linejoin=\"round\"\n            />\n          </svg>\n        </button>\n      </div>\n    `;\n  }\n}\n\ndeclare global {\n  interface HTMLElementTagNameMap {\n    \"onyx-chat-widget\": OnyxChatWidget;\n  }\n}\n"
  },
  {
    "path": "widget/tsconfig.json",
    "content": "{\n  \"compilerOptions\": {\n    \"target\": \"ES2020\",\n    \"useDefineForClassFields\": false,\n    \"experimentalDecorators\": true,\n    \"lib\": [\n      \"ES2020\",\n      \"DOM\",\n      \"DOM.Iterable\"\n    ],\n    \"types\": [\"vite/client\"],\n    \"module\": \"ESNext\",\n    \"skipLibCheck\": true,\n    \"esModuleInterop\": true,\n    \"allowSyntheticDefaultImports\": true,\n\n    /* Bundler mode */\n    \"moduleResolution\": \"bundler\",\n    \"allowImportingTsExtensions\": true,\n    \"resolveJsonModule\": true,\n    \"isolatedModules\": true,\n    \"noEmit\": true,\n\n    /* Linting */\n    \"strict\": true,\n    \"noUnusedLocals\": true,\n    \"noUnusedParameters\": true,\n    \"noFallthroughCasesInSwitch\": true,\n    \"baseUrl\": \".\",\n    \"paths\": {\n      \"@/*\": [\n        \"src/*\"\n      ]\n    }\n  },\n  \"include\": [\n    \"src\"\n  ]\n}\n"
  },
  {
    "path": "widget/vite.config.ts",
    "content": "import { defineConfig, loadEnv } from \"vite\";\nimport { resolve } from \"path\";\n\nexport default defineConfig(({ mode }) => {\n  const isSelfHosted = mode === \"self-hosted\";\n\n  // Load env file based on mode\n  const env = loadEnv(mode, process.cwd(), \"\");\n\n  return {\n    resolve: {\n      alias: {\n        \"@\": resolve(__dirname, \"./src\"),\n      },\n    },\n    build: {\n      lib: {\n        entry: resolve(__dirname, \"src/index.ts\"),\n        name: \"OnyxWidget\",\n        fileName: \"onyx-widget\",\n        formats: [\"es\"],\n      },\n      rollupOptions: {\n        output: {\n          inlineDynamicImports: true,\n        },\n      },\n      sourcemap: false,\n      minify: \"terser\",\n      terserOptions: {\n        compress: {\n          drop_console: true,\n        },\n      },\n    },\n    define: isSelfHosted\n      ? {\n          \"import.meta.env.VITE_WIDGET_BACKEND_URL\": JSON.stringify(\n            env.VITE_WIDGET_BACKEND_URL,\n          ),\n          \"import.meta.env.VITE_WIDGET_API_KEY\": JSON.stringify(\n            env.VITE_WIDGET_API_KEY,\n          ),\n        }\n      : {},\n  };\n});\n"
  }
]